maxframe 1.3.0__cp310-cp310-macosx_10_9_universal2.whl → 2.0.0__cp310-cp310-macosx_10_9_universal2.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of maxframe might be problematic. Click here for more details.

Files changed (644) hide show
  1. maxframe/_utils.cpython-310-darwin.so +0 -0
  2. maxframe/_utils.pyi +21 -0
  3. maxframe/_utils.pyx +4 -3
  4. maxframe/codegen/__init__.py +27 -0
  5. maxframe/{codegen.py → codegen/core.py} +49 -43
  6. maxframe/codegen/spe/__init__.py +16 -0
  7. maxframe/codegen/spe/core.py +307 -0
  8. maxframe/codegen/spe/dataframe/__init__.py +37 -0
  9. maxframe/codegen/spe/dataframe/accessors/__init__.py +15 -0
  10. maxframe/codegen/spe/dataframe/accessors/base.py +53 -0
  11. maxframe/codegen/spe/dataframe/accessors/dict_.py +194 -0
  12. maxframe/codegen/spe/dataframe/accessors/list_.py +80 -0
  13. maxframe/codegen/spe/dataframe/arithmetic.py +84 -0
  14. maxframe/codegen/spe/dataframe/datasource.py +181 -0
  15. maxframe/codegen/spe/dataframe/datastore.py +204 -0
  16. maxframe/codegen/spe/dataframe/extensions.py +63 -0
  17. maxframe/codegen/spe/dataframe/fetch.py +26 -0
  18. maxframe/codegen/spe/dataframe/groupby.py +224 -0
  19. maxframe/codegen/spe/dataframe/indexing.py +238 -0
  20. maxframe/codegen/spe/dataframe/merge.py +73 -0
  21. maxframe/codegen/spe/dataframe/misc.py +286 -0
  22. maxframe/codegen/spe/dataframe/missing.py +64 -0
  23. maxframe/codegen/spe/dataframe/reduction.py +160 -0
  24. maxframe/codegen/spe/dataframe/sort.py +83 -0
  25. maxframe/codegen/spe/dataframe/statistics.py +46 -0
  26. maxframe/codegen/spe/dataframe/tests/__init__.py +13 -0
  27. maxframe/codegen/spe/dataframe/tests/accessors/__init__.py +13 -0
  28. maxframe/codegen/spe/dataframe/tests/accessors/test_base.py +33 -0
  29. maxframe/codegen/spe/dataframe/tests/accessors/test_dict.py +310 -0
  30. maxframe/codegen/spe/dataframe/tests/accessors/test_list.py +137 -0
  31. maxframe/codegen/spe/dataframe/tests/indexing/__init__.py +13 -0
  32. maxframe/codegen/spe/dataframe/tests/indexing/conftest.py +58 -0
  33. maxframe/codegen/spe/dataframe/tests/indexing/test_getitem.py +124 -0
  34. maxframe/codegen/spe/dataframe/tests/indexing/test_iloc.py +76 -0
  35. maxframe/codegen/spe/dataframe/tests/indexing/test_indexing.py +39 -0
  36. maxframe/codegen/spe/dataframe/tests/indexing/test_rename.py +51 -0
  37. maxframe/codegen/spe/dataframe/tests/indexing/test_reset_index.py +88 -0
  38. maxframe/codegen/spe/dataframe/tests/indexing/test_sample.py +45 -0
  39. maxframe/codegen/spe/dataframe/tests/indexing/test_set_axis.py +45 -0
  40. maxframe/codegen/spe/dataframe/tests/indexing/test_set_index.py +41 -0
  41. maxframe/codegen/spe/dataframe/tests/indexing/test_setitem.py +46 -0
  42. maxframe/codegen/spe/dataframe/tests/misc/__init__.py +13 -0
  43. maxframe/codegen/spe/dataframe/tests/misc/test_apply.py +133 -0
  44. maxframe/codegen/spe/dataframe/tests/misc/test_drop_duplicates.py +92 -0
  45. maxframe/codegen/spe/dataframe/tests/misc/test_misc.py +234 -0
  46. maxframe/codegen/spe/dataframe/tests/missing/__init__.py +13 -0
  47. maxframe/codegen/spe/dataframe/tests/missing/test_checkna.py +94 -0
  48. maxframe/codegen/spe/dataframe/tests/missing/test_dropna.py +50 -0
  49. maxframe/codegen/spe/dataframe/tests/missing/test_fillna.py +94 -0
  50. maxframe/codegen/spe/dataframe/tests/missing/test_replace.py +45 -0
  51. maxframe/codegen/spe/dataframe/tests/test_arithmetic.py +73 -0
  52. maxframe/codegen/spe/dataframe/tests/test_datasource.py +184 -0
  53. maxframe/codegen/spe/dataframe/tests/test_datastore.py +200 -0
  54. maxframe/codegen/spe/dataframe/tests/test_extensions.py +88 -0
  55. maxframe/codegen/spe/dataframe/tests/test_groupby.py +225 -0
  56. maxframe/codegen/spe/dataframe/tests/test_merge.py +400 -0
  57. maxframe/codegen/spe/dataframe/tests/test_reduction.py +104 -0
  58. maxframe/codegen/spe/dataframe/tests/test_sort.py +159 -0
  59. maxframe/codegen/spe/dataframe/tests/test_statistics.py +70 -0
  60. maxframe/codegen/spe/dataframe/tests/test_tseries.py +29 -0
  61. maxframe/codegen/spe/dataframe/tests/test_value_counts.py +60 -0
  62. maxframe/codegen/spe/dataframe/tests/test_window.py +69 -0
  63. maxframe/codegen/spe/dataframe/tseries.py +46 -0
  64. maxframe/codegen/spe/dataframe/udf.py +62 -0
  65. maxframe/codegen/spe/dataframe/value_counts.py +31 -0
  66. maxframe/codegen/spe/dataframe/window.py +65 -0
  67. maxframe/codegen/spe/learn/__init__.py +15 -0
  68. maxframe/codegen/spe/learn/contrib/__init__.py +15 -0
  69. maxframe/codegen/spe/learn/contrib/lightgbm.py +160 -0
  70. maxframe/codegen/spe/learn/contrib/models.py +41 -0
  71. maxframe/codegen/spe/learn/contrib/pytorch.py +49 -0
  72. maxframe/codegen/spe/learn/contrib/tests/__init__.py +13 -0
  73. maxframe/codegen/spe/learn/contrib/tests/test_lightgbm.py +123 -0
  74. maxframe/codegen/spe/learn/contrib/tests/test_models.py +41 -0
  75. maxframe/codegen/spe/learn/contrib/tests/test_pytorch.py +53 -0
  76. maxframe/codegen/spe/learn/contrib/tests/test_xgboost.py +98 -0
  77. maxframe/codegen/spe/learn/contrib/xgboost.py +152 -0
  78. maxframe/codegen/spe/learn/metrics/__init__.py +15 -0
  79. maxframe/codegen/spe/learn/metrics/_classification.py +120 -0
  80. maxframe/codegen/spe/learn/metrics/tests/__init__.py +13 -0
  81. maxframe/codegen/spe/learn/metrics/tests/test_classification.py +93 -0
  82. maxframe/codegen/spe/learn/model_selection/__init__.py +13 -0
  83. maxframe/codegen/spe/learn/model_selection/tests/__init__.py +13 -0
  84. maxframe/codegen/spe/learn/model_selection/tests/test_split.py +41 -0
  85. maxframe/codegen/spe/learn/preprocessing/__init__.py +15 -0
  86. maxframe/codegen/spe/learn/preprocessing/_data.py +37 -0
  87. maxframe/codegen/spe/learn/preprocessing/_label.py +47 -0
  88. maxframe/codegen/spe/learn/preprocessing/tests/__init__.py +13 -0
  89. maxframe/codegen/spe/learn/preprocessing/tests/test_data.py +31 -0
  90. maxframe/codegen/spe/learn/preprocessing/tests/test_label.py +43 -0
  91. maxframe/codegen/spe/learn/utils/__init__.py +15 -0
  92. maxframe/codegen/spe/learn/utils/checks.py +55 -0
  93. maxframe/codegen/spe/learn/utils/multiclass.py +60 -0
  94. maxframe/codegen/spe/learn/utils/shuffle.py +85 -0
  95. maxframe/codegen/spe/learn/utils/sparsefuncs.py +35 -0
  96. maxframe/codegen/spe/learn/utils/tests/__init__.py +13 -0
  97. maxframe/codegen/spe/learn/utils/tests/test_checks.py +48 -0
  98. maxframe/codegen/spe/learn/utils/tests/test_multiclass.py +52 -0
  99. maxframe/codegen/spe/learn/utils/tests/test_shuffle.py +50 -0
  100. maxframe/codegen/spe/learn/utils/tests/test_sparsefuncs.py +34 -0
  101. maxframe/codegen/spe/learn/utils/tests/test_validation.py +44 -0
  102. maxframe/codegen/spe/learn/utils/validation.py +35 -0
  103. maxframe/codegen/spe/objects.py +26 -0
  104. maxframe/codegen/spe/remote.py +29 -0
  105. maxframe/codegen/spe/tensor/__init__.py +28 -0
  106. maxframe/codegen/spe/tensor/arithmetic.py +95 -0
  107. maxframe/codegen/spe/tensor/core.py +41 -0
  108. maxframe/codegen/spe/tensor/datasource.py +165 -0
  109. maxframe/codegen/spe/tensor/extensions.py +35 -0
  110. maxframe/codegen/spe/tensor/fetch.py +26 -0
  111. maxframe/codegen/spe/tensor/indexing.py +63 -0
  112. maxframe/codegen/spe/tensor/linalg.py +63 -0
  113. maxframe/codegen/spe/tensor/merge.py +31 -0
  114. maxframe/codegen/spe/tensor/misc.py +121 -0
  115. maxframe/codegen/spe/tensor/random.py +29 -0
  116. maxframe/codegen/spe/tensor/reduction.py +39 -0
  117. maxframe/codegen/spe/tensor/reshape.py +26 -0
  118. maxframe/codegen/spe/tensor/sort.py +42 -0
  119. maxframe/codegen/spe/tensor/special.py +35 -0
  120. maxframe/codegen/spe/tensor/statistics.py +24 -0
  121. maxframe/codegen/spe/tensor/tests/__init__.py +13 -0
  122. maxframe/codegen/spe/tensor/tests/test_arithmetic.py +103 -0
  123. maxframe/codegen/spe/tensor/tests/test_datasource.py +99 -0
  124. maxframe/codegen/spe/tensor/tests/test_extensions.py +37 -0
  125. maxframe/codegen/spe/tensor/tests/test_indexing.py +44 -0
  126. maxframe/codegen/spe/tensor/tests/test_linalg.py +38 -0
  127. maxframe/codegen/spe/tensor/tests/test_merge.py +28 -0
  128. maxframe/codegen/spe/tensor/tests/test_misc.py +94 -0
  129. maxframe/codegen/spe/tensor/tests/test_random.py +55 -0
  130. maxframe/codegen/spe/tensor/tests/test_reduction.py +65 -0
  131. maxframe/codegen/spe/tensor/tests/test_reshape.py +39 -0
  132. maxframe/codegen/spe/tensor/tests/test_sort.py +49 -0
  133. maxframe/codegen/spe/tensor/tests/test_special.py +28 -0
  134. maxframe/codegen/spe/tensor/tests/test_statistics.py +29 -0
  135. maxframe/codegen/spe/tests/__init__.py +13 -0
  136. maxframe/codegen/spe/tests/test_remote.py +29 -0
  137. maxframe/codegen/spe/tests/test_spe_codegen.py +141 -0
  138. maxframe/codegen/spe/utils.py +54 -0
  139. maxframe/codegen/tests/__init__.py +13 -0
  140. maxframe/{tests → codegen/tests}/test_codegen.py +3 -5
  141. maxframe/config/__init__.py +1 -1
  142. maxframe/config/config.py +50 -23
  143. maxframe/config/tests/test_config.py +4 -12
  144. maxframe/config/validators.py +5 -0
  145. maxframe/conftest.py +38 -10
  146. maxframe/core/__init__.py +1 -0
  147. maxframe/core/context.py +110 -0
  148. maxframe/core/entity/__init__.py +1 -0
  149. maxframe/core/entity/core.py +0 -7
  150. maxframe/core/entity/objects.py +19 -5
  151. maxframe/core/entity/output_types.py +11 -0
  152. maxframe/core/entity/tests/test_objects.py +11 -12
  153. maxframe/core/entity/tileables.py +3 -1
  154. maxframe/core/entity/utils.py +15 -0
  155. maxframe/core/graph/__init__.py +6 -1
  156. maxframe/core/graph/builder/base.py +5 -1
  157. maxframe/core/graph/core.cpython-310-darwin.so +0 -0
  158. maxframe/core/graph/core.pyx +17 -6
  159. maxframe/core/graph/entity.py +18 -6
  160. maxframe/core/operator/__init__.py +8 -3
  161. maxframe/core/operator/base.py +35 -12
  162. maxframe/core/operator/core.py +37 -14
  163. maxframe/core/operator/fetch.py +5 -18
  164. maxframe/core/operator/objects.py +0 -20
  165. maxframe/core/operator/shuffle.py +6 -72
  166. maxframe/dataframe/__init__.py +1 -0
  167. maxframe/dataframe/accessors/datetime_/core.py +7 -4
  168. maxframe/dataframe/accessors/string_/core.py +9 -6
  169. maxframe/dataframe/arithmetic/core.py +31 -20
  170. maxframe/dataframe/arithmetic/tests/test_arithmetic.py +6 -0
  171. maxframe/dataframe/core.py +98 -91
  172. maxframe/dataframe/datasource/core.py +8 -1
  173. maxframe/dataframe/datasource/date_range.py +8 -0
  174. maxframe/dataframe/datasource/from_index.py +9 -5
  175. maxframe/dataframe/datasource/from_records.py +9 -2
  176. maxframe/dataframe/datasource/from_tensor.py +32 -21
  177. maxframe/dataframe/datasource/read_csv.py +8 -2
  178. maxframe/dataframe/datasource/read_odps_query.py +109 -19
  179. maxframe/dataframe/datasource/read_odps_table.py +20 -5
  180. maxframe/dataframe/datasource/read_parquet.py +8 -3
  181. maxframe/dataframe/datasource/tests/test_datasource.py +80 -1
  182. maxframe/dataframe/datastore/tests/test_to_odps.py +52 -1
  183. maxframe/dataframe/datastore/to_csv.py +7 -3
  184. maxframe/dataframe/datastore/to_odps.py +42 -6
  185. maxframe/dataframe/extensions/__init__.py +6 -1
  186. maxframe/dataframe/extensions/apply_chunk.py +96 -136
  187. maxframe/dataframe/extensions/flatjson.py +3 -2
  188. maxframe/dataframe/extensions/flatmap.py +15 -7
  189. maxframe/dataframe/fetch/core.py +12 -1
  190. maxframe/dataframe/groupby/__init__.py +7 -0
  191. maxframe/dataframe/groupby/aggregation.py +62 -9
  192. maxframe/dataframe/groupby/apply.py +50 -74
  193. maxframe/dataframe/groupby/apply_chunk.py +393 -0
  194. maxframe/dataframe/groupby/core.py +80 -17
  195. maxframe/dataframe/groupby/extensions.py +26 -0
  196. maxframe/dataframe/groupby/fill.py +9 -4
  197. maxframe/dataframe/groupby/sample.py +7 -7
  198. maxframe/dataframe/groupby/tests/test_groupby.py +3 -3
  199. maxframe/dataframe/groupby/transform.py +57 -54
  200. maxframe/dataframe/indexing/align.py +7 -6
  201. maxframe/dataframe/indexing/getitem.py +9 -8
  202. maxframe/dataframe/indexing/iloc.py +28 -23
  203. maxframe/dataframe/indexing/insert.py +7 -3
  204. maxframe/dataframe/indexing/loc.py +9 -8
  205. maxframe/dataframe/indexing/reindex.py +36 -30
  206. maxframe/dataframe/indexing/rename_axis.py +18 -10
  207. maxframe/dataframe/indexing/reset_index.py +0 -2
  208. maxframe/dataframe/indexing/sample.py +13 -9
  209. maxframe/dataframe/indexing/set_axis.py +9 -6
  210. maxframe/dataframe/indexing/setitem.py +8 -5
  211. maxframe/dataframe/indexing/where.py +12 -9
  212. maxframe/dataframe/merge/__init__.py +0 -1
  213. maxframe/dataframe/merge/concat.py +10 -31
  214. maxframe/dataframe/merge/merge.py +2 -24
  215. maxframe/dataframe/misc/__init__.py +6 -0
  216. maxframe/dataframe/misc/_duplicate.py +7 -3
  217. maxframe/dataframe/misc/apply.py +106 -139
  218. maxframe/dataframe/misc/astype.py +3 -2
  219. maxframe/dataframe/misc/case_when.py +11 -7
  220. maxframe/dataframe/misc/cut.py +11 -10
  221. maxframe/dataframe/misc/describe.py +7 -3
  222. maxframe/dataframe/misc/drop.py +13 -11
  223. maxframe/dataframe/misc/eval.py +0 -2
  224. maxframe/dataframe/misc/get_dummies.py +78 -49
  225. maxframe/dataframe/misc/isin.py +13 -10
  226. maxframe/dataframe/misc/map.py +21 -6
  227. maxframe/dataframe/misc/melt.py +8 -1
  228. maxframe/dataframe/misc/pivot.py +232 -0
  229. maxframe/dataframe/misc/pivot_table.py +52 -40
  230. maxframe/dataframe/misc/rechunk.py +59 -0
  231. maxframe/dataframe/misc/shift.py +7 -4
  232. maxframe/dataframe/misc/stack.py +5 -3
  233. maxframe/dataframe/misc/tests/test_misc.py +167 -1
  234. maxframe/dataframe/misc/transform.py +63 -65
  235. maxframe/dataframe/misc/value_counts.py +7 -4
  236. maxframe/dataframe/missing/dropna.py +16 -7
  237. maxframe/dataframe/missing/fillna.py +18 -10
  238. maxframe/dataframe/missing/replace.py +10 -6
  239. maxframe/dataframe/missing/tests/test_missing.py +2 -2
  240. maxframe/dataframe/operators.py +1 -27
  241. maxframe/dataframe/reduction/aggregation.py +128 -3
  242. maxframe/dataframe/reduction/core.py +20 -6
  243. maxframe/dataframe/reduction/median.py +1 -1
  244. maxframe/dataframe/reduction/tests/test_reduction.py +33 -0
  245. maxframe/dataframe/reduction/unique.py +53 -7
  246. maxframe/dataframe/statistics/corr.py +9 -6
  247. maxframe/dataframe/statistics/quantile.py +9 -6
  248. maxframe/dataframe/tseries/to_datetime.py +6 -4
  249. maxframe/dataframe/utils.py +219 -31
  250. maxframe/dataframe/window/rolling.py +7 -4
  251. maxframe/env.py +1 -0
  252. maxframe/errors.py +9 -0
  253. maxframe/extension.py +13 -2
  254. maxframe/io/objects/core.py +67 -51
  255. maxframe/io/objects/tensor.py +73 -17
  256. maxframe/io/objects/tests/test_object_io.py +10 -55
  257. maxframe/io/odpsio/arrow.py +15 -2
  258. maxframe/io/odpsio/schema.py +43 -13
  259. maxframe/io/odpsio/tableio.py +63 -11
  260. maxframe/io/odpsio/tests/test_arrow.py +1 -2
  261. maxframe/io/odpsio/tests/test_schema.py +114 -1
  262. maxframe/io/odpsio/tests/test_tableio.py +42 -0
  263. maxframe/io/odpsio/tests/test_volumeio.py +21 -58
  264. maxframe/io/odpsio/volumeio.py +23 -8
  265. maxframe/learn/__init__.py +2 -2
  266. maxframe/learn/contrib/__init__.py +2 -2
  267. maxframe/learn/contrib/graph/connected_components.py +2 -1
  268. maxframe/learn/contrib/lightgbm/__init__.py +33 -0
  269. maxframe/learn/contrib/lightgbm/_predict.py +138 -0
  270. maxframe/learn/contrib/lightgbm/_train.py +163 -0
  271. maxframe/learn/contrib/lightgbm/callback.py +114 -0
  272. maxframe/learn/contrib/lightgbm/classifier.py +199 -0
  273. maxframe/learn/contrib/lightgbm/core.py +372 -0
  274. maxframe/learn/contrib/lightgbm/dataset.py +153 -0
  275. maxframe/learn/contrib/lightgbm/regressor.py +29 -0
  276. maxframe/learn/contrib/lightgbm/tests/__init__.py +13 -0
  277. maxframe/learn/contrib/lightgbm/tests/test_callback.py +58 -0
  278. maxframe/learn/contrib/llm/models/dashscope.py +34 -0
  279. maxframe/learn/contrib/llm/models/managed.py +15 -0
  280. maxframe/learn/contrib/llm/multi_modal.py +92 -0
  281. maxframe/learn/contrib/llm/text.py +21 -5
  282. maxframe/learn/contrib/models.py +38 -9
  283. maxframe/learn/contrib/utils.py +55 -0
  284. maxframe/learn/contrib/xgboost/callback.py +86 -0
  285. maxframe/learn/contrib/xgboost/classifier.py +26 -30
  286. maxframe/learn/contrib/xgboost/core.py +54 -42
  287. maxframe/learn/contrib/xgboost/dmatrix.py +19 -12
  288. maxframe/learn/contrib/xgboost/predict.py +13 -8
  289. maxframe/learn/contrib/xgboost/regressor.py +28 -27
  290. maxframe/learn/contrib/xgboost/tests/test_callback.py +41 -0
  291. maxframe/learn/contrib/xgboost/train.py +59 -16
  292. maxframe/learn/core.py +252 -0
  293. maxframe/learn/datasets/__init__.py +20 -0
  294. maxframe/learn/datasets/samples_generator.py +628 -0
  295. maxframe/learn/linear_model/__init__.py +15 -0
  296. maxframe/learn/linear_model/_base.py +163 -0
  297. maxframe/learn/linear_model/_lin_reg.py +175 -0
  298. maxframe/learn/metrics/__init__.py +25 -0
  299. maxframe/learn/metrics/_check_targets.py +95 -0
  300. maxframe/learn/metrics/_classification.py +1121 -0
  301. maxframe/learn/metrics/_regression.py +256 -0
  302. maxframe/learn/model_selection/__init__.py +15 -0
  303. maxframe/learn/model_selection/_split.py +451 -0
  304. maxframe/learn/model_selection/tests/__init__.py +13 -0
  305. maxframe/learn/model_selection/tests/test_split.py +156 -0
  306. maxframe/learn/preprocessing/__init__.py +16 -0
  307. maxframe/learn/preprocessing/_data/__init__.py +17 -0
  308. maxframe/learn/preprocessing/_data/min_max_scaler.py +390 -0
  309. maxframe/learn/preprocessing/_data/normalize.py +127 -0
  310. maxframe/learn/preprocessing/_data/standard_scaler.py +503 -0
  311. maxframe/learn/preprocessing/_data/utils.py +79 -0
  312. maxframe/learn/preprocessing/_label/__init__.py +16 -0
  313. maxframe/learn/preprocessing/_label/_label_binarizer.py +599 -0
  314. maxframe/learn/preprocessing/_label/_label_encoder.py +174 -0
  315. maxframe/learn/utils/__init__.py +4 -0
  316. maxframe/learn/utils/_encode.py +314 -0
  317. maxframe/learn/utils/checks.py +161 -0
  318. maxframe/learn/utils/core.py +33 -0
  319. maxframe/learn/utils/extmath.py +176 -0
  320. maxframe/learn/utils/multiclass.py +292 -0
  321. maxframe/learn/utils/shuffle.py +114 -0
  322. maxframe/learn/utils/sparsefuncs.py +87 -0
  323. maxframe/learn/utils/validation.py +775 -0
  324. maxframe/lib/__init__.py +0 -2
  325. maxframe/lib/compat.py +145 -0
  326. maxframe/lib/filesystem/_oss_lib/glob.py +1 -1
  327. maxframe/lib/mmh3.cpython-310-darwin.so +0 -0
  328. maxframe/lib/sparse/__init__.py +10 -15
  329. maxframe/lib/sparse/array.py +45 -33
  330. maxframe/lib/sparse/core.py +0 -2
  331. maxframe/lib/sparse/linalg.py +31 -0
  332. maxframe/lib/sparse/matrix.py +5 -2
  333. maxframe/lib/sparse/tests/__init__.py +0 -2
  334. maxframe/lib/sparse/tests/test_sparse.py +53 -53
  335. maxframe/lib/sparse/vector.py +0 -2
  336. maxframe/mixin.py +59 -2
  337. maxframe/opcodes.py +13 -5
  338. maxframe/protocol.py +67 -14
  339. maxframe/remote/core.py +16 -14
  340. maxframe/remote/run_script.py +6 -3
  341. maxframe/serialization/__init__.py +2 -0
  342. maxframe/serialization/core.cpython-310-darwin.so +0 -0
  343. maxframe/serialization/core.pxd +3 -0
  344. maxframe/serialization/core.pyi +3 -1
  345. maxframe/serialization/core.pyx +82 -4
  346. maxframe/serialization/pandas.py +5 -1
  347. maxframe/serialization/serializables/core.py +6 -5
  348. maxframe/serialization/serializables/field.py +2 -2
  349. maxframe/serialization/serializables/tests/test_field_type.py +3 -5
  350. maxframe/serialization/tests/test_serial.py +27 -0
  351. maxframe/session.py +4 -71
  352. maxframe/sperunner.py +165 -0
  353. maxframe/tensor/__init__.py +35 -2
  354. maxframe/tensor/arithmetic/__init__.py +2 -4
  355. maxframe/tensor/arithmetic/abs.py +0 -2
  356. maxframe/tensor/arithmetic/absolute.py +0 -2
  357. maxframe/tensor/arithmetic/add.py +34 -4
  358. maxframe/tensor/arithmetic/angle.py +0 -2
  359. maxframe/tensor/arithmetic/arccos.py +1 -4
  360. maxframe/tensor/arithmetic/arccosh.py +1 -3
  361. maxframe/tensor/arithmetic/arcsin.py +0 -2
  362. maxframe/tensor/arithmetic/arcsinh.py +0 -2
  363. maxframe/tensor/arithmetic/arctan.py +0 -2
  364. maxframe/tensor/arithmetic/arctan2.py +0 -2
  365. maxframe/tensor/arithmetic/arctanh.py +0 -2
  366. maxframe/tensor/arithmetic/around.py +0 -2
  367. maxframe/tensor/arithmetic/bitand.py +0 -2
  368. maxframe/tensor/arithmetic/bitor.py +1 -3
  369. maxframe/tensor/arithmetic/bitxor.py +1 -3
  370. maxframe/tensor/arithmetic/cbrt.py +0 -2
  371. maxframe/tensor/arithmetic/ceil.py +0 -2
  372. maxframe/tensor/arithmetic/clip.py +13 -13
  373. maxframe/tensor/arithmetic/conj.py +0 -2
  374. maxframe/tensor/arithmetic/copysign.py +0 -2
  375. maxframe/tensor/arithmetic/core.py +47 -39
  376. maxframe/tensor/arithmetic/cos.py +1 -3
  377. maxframe/tensor/arithmetic/cosh.py +0 -2
  378. maxframe/tensor/arithmetic/deg2rad.py +0 -2
  379. maxframe/tensor/arithmetic/degrees.py +0 -2
  380. maxframe/tensor/arithmetic/divide.py +0 -2
  381. maxframe/tensor/arithmetic/equal.py +0 -2
  382. maxframe/tensor/arithmetic/exp.py +1 -3
  383. maxframe/tensor/arithmetic/exp2.py +0 -2
  384. maxframe/tensor/arithmetic/expm1.py +0 -2
  385. maxframe/tensor/arithmetic/fabs.py +0 -2
  386. maxframe/tensor/arithmetic/fix.py +0 -2
  387. maxframe/tensor/arithmetic/float_power.py +0 -2
  388. maxframe/tensor/arithmetic/floor.py +0 -2
  389. maxframe/tensor/arithmetic/floordiv.py +0 -2
  390. maxframe/tensor/arithmetic/fmax.py +0 -2
  391. maxframe/tensor/arithmetic/fmin.py +0 -2
  392. maxframe/tensor/arithmetic/fmod.py +0 -2
  393. maxframe/tensor/arithmetic/frexp.py +6 -2
  394. maxframe/tensor/arithmetic/greater.py +0 -2
  395. maxframe/tensor/arithmetic/greater_equal.py +0 -2
  396. maxframe/tensor/arithmetic/hypot.py +0 -2
  397. maxframe/tensor/arithmetic/i0.py +1 -3
  398. maxframe/tensor/arithmetic/imag.py +0 -2
  399. maxframe/tensor/arithmetic/invert.py +1 -3
  400. maxframe/tensor/arithmetic/isclose.py +0 -2
  401. maxframe/tensor/arithmetic/iscomplex.py +0 -2
  402. maxframe/tensor/arithmetic/isfinite.py +1 -3
  403. maxframe/tensor/arithmetic/isinf.py +0 -2
  404. maxframe/tensor/arithmetic/isnan.py +0 -2
  405. maxframe/tensor/arithmetic/isreal.py +0 -2
  406. maxframe/tensor/arithmetic/ldexp.py +0 -2
  407. maxframe/tensor/arithmetic/less.py +0 -2
  408. maxframe/tensor/arithmetic/less_equal.py +0 -2
  409. maxframe/tensor/arithmetic/log.py +1 -3
  410. maxframe/tensor/arithmetic/log10.py +1 -3
  411. maxframe/tensor/arithmetic/log1p.py +1 -3
  412. maxframe/tensor/arithmetic/log2.py +1 -3
  413. maxframe/tensor/arithmetic/logaddexp.py +0 -2
  414. maxframe/tensor/arithmetic/logaddexp2.py +0 -2
  415. maxframe/tensor/arithmetic/logical_and.py +0 -2
  416. maxframe/tensor/arithmetic/logical_not.py +1 -3
  417. maxframe/tensor/arithmetic/logical_or.py +0 -2
  418. maxframe/tensor/arithmetic/logical_xor.py +0 -2
  419. maxframe/tensor/arithmetic/lshift.py +0 -2
  420. maxframe/tensor/arithmetic/maximum.py +0 -2
  421. maxframe/tensor/arithmetic/minimum.py +0 -2
  422. maxframe/tensor/arithmetic/mod.py +0 -2
  423. maxframe/tensor/arithmetic/modf.py +6 -2
  424. maxframe/tensor/arithmetic/multiply.py +37 -4
  425. maxframe/tensor/arithmetic/nan_to_num.py +0 -2
  426. maxframe/tensor/arithmetic/negative.py +0 -2
  427. maxframe/tensor/arithmetic/nextafter.py +0 -2
  428. maxframe/tensor/arithmetic/not_equal.py +0 -2
  429. maxframe/tensor/arithmetic/positive.py +0 -2
  430. maxframe/tensor/arithmetic/power.py +0 -2
  431. maxframe/tensor/arithmetic/rad2deg.py +0 -2
  432. maxframe/tensor/arithmetic/radians.py +0 -2
  433. maxframe/tensor/arithmetic/real.py +0 -2
  434. maxframe/tensor/arithmetic/reciprocal.py +5 -3
  435. maxframe/tensor/arithmetic/rint.py +1 -3
  436. maxframe/tensor/arithmetic/rshift.py +0 -2
  437. maxframe/tensor/arithmetic/setimag.py +0 -2
  438. maxframe/tensor/arithmetic/setreal.py +0 -2
  439. maxframe/tensor/arithmetic/sign.py +0 -2
  440. maxframe/tensor/arithmetic/signbit.py +0 -2
  441. maxframe/tensor/arithmetic/sin.py +0 -2
  442. maxframe/tensor/arithmetic/sinc.py +1 -3
  443. maxframe/tensor/arithmetic/sinh.py +0 -2
  444. maxframe/tensor/arithmetic/spacing.py +0 -2
  445. maxframe/tensor/arithmetic/sqrt.py +0 -2
  446. maxframe/tensor/arithmetic/square.py +0 -2
  447. maxframe/tensor/arithmetic/subtract.py +4 -2
  448. maxframe/tensor/arithmetic/tan.py +0 -2
  449. maxframe/tensor/arithmetic/tanh.py +0 -2
  450. maxframe/tensor/arithmetic/tests/__init__.py +0 -2
  451. maxframe/tensor/arithmetic/tests/test_arithmetic.py +43 -9
  452. maxframe/tensor/arithmetic/truediv.py +0 -2
  453. maxframe/tensor/arithmetic/trunc.py +0 -2
  454. maxframe/tensor/arithmetic/utils.py +32 -6
  455. maxframe/tensor/array_utils.py +3 -25
  456. maxframe/tensor/core.py +6 -6
  457. maxframe/tensor/datasource/__init__.py +10 -2
  458. maxframe/tensor/datasource/arange.py +0 -2
  459. maxframe/tensor/datasource/array.py +3 -22
  460. maxframe/tensor/datasource/core.py +15 -10
  461. maxframe/tensor/datasource/diag.py +140 -0
  462. maxframe/tensor/datasource/diagflat.py +69 -0
  463. maxframe/tensor/datasource/empty.py +0 -2
  464. maxframe/tensor/datasource/eye.py +95 -0
  465. maxframe/tensor/datasource/from_dataframe.py +0 -2
  466. maxframe/tensor/datasource/from_dense.py +0 -17
  467. maxframe/tensor/datasource/from_sparse.py +0 -2
  468. maxframe/tensor/datasource/full.py +0 -2
  469. maxframe/tensor/datasource/identity.py +54 -0
  470. maxframe/tensor/datasource/indices.py +115 -0
  471. maxframe/tensor/datasource/linspace.py +140 -0
  472. maxframe/tensor/datasource/meshgrid.py +135 -0
  473. maxframe/tensor/datasource/ones.py +8 -3
  474. maxframe/tensor/datasource/tests/test_datasource.py +32 -1
  475. maxframe/tensor/datasource/tri_array.py +107 -0
  476. maxframe/tensor/datasource/zeros.py +7 -3
  477. maxframe/tensor/extensions/__init__.py +31 -0
  478. maxframe/tensor/extensions/accessor.py +25 -0
  479. maxframe/tensor/extensions/apply_chunk.py +137 -0
  480. maxframe/tensor/indexing/__init__.py +1 -1
  481. maxframe/tensor/indexing/choose.py +8 -6
  482. maxframe/tensor/indexing/compress.py +0 -2
  483. maxframe/tensor/indexing/extract.py +0 -2
  484. maxframe/tensor/indexing/fill_diagonal.py +9 -6
  485. maxframe/tensor/indexing/flatnonzero.py +1 -3
  486. maxframe/tensor/indexing/getitem.py +10 -43
  487. maxframe/tensor/indexing/nonzero.py +2 -4
  488. maxframe/tensor/indexing/setitem.py +19 -9
  489. maxframe/tensor/indexing/slice.py +6 -3
  490. maxframe/tensor/indexing/take.py +0 -2
  491. maxframe/tensor/indexing/tests/__init__.py +0 -2
  492. maxframe/tensor/indexing/tests/test_indexing.py +0 -2
  493. maxframe/tensor/indexing/unravel_index.py +6 -6
  494. maxframe/tensor/lib/__init__.py +16 -0
  495. maxframe/tensor/lib/index_tricks.py +404 -0
  496. maxframe/tensor/linalg/__init__.py +36 -0
  497. maxframe/tensor/linalg/dot.py +145 -0
  498. maxframe/tensor/linalg/inner.py +36 -0
  499. maxframe/tensor/linalg/inv.py +83 -0
  500. maxframe/tensor/linalg/lu.py +115 -0
  501. maxframe/tensor/linalg/matmul.py +225 -0
  502. maxframe/tensor/linalg/qr.py +124 -0
  503. maxframe/tensor/linalg/solve_triangular.py +103 -0
  504. maxframe/tensor/linalg/svd.py +167 -0
  505. maxframe/tensor/linalg/tensordot.py +213 -0
  506. maxframe/tensor/linalg/vdot.py +73 -0
  507. maxframe/tensor/merge/__init__.py +4 -0
  508. maxframe/tensor/merge/append.py +74 -0
  509. maxframe/tensor/merge/column_stack.py +63 -0
  510. maxframe/tensor/merge/concatenate.py +3 -2
  511. maxframe/tensor/merge/dstack.py +71 -0
  512. maxframe/tensor/merge/hstack.py +70 -0
  513. maxframe/tensor/merge/stack.py +0 -2
  514. maxframe/tensor/merge/tests/test_merge.py +0 -2
  515. maxframe/tensor/misc/__init__.py +18 -5
  516. maxframe/tensor/misc/astype.py +10 -8
  517. maxframe/tensor/misc/broadcast_to.py +1 -1
  518. maxframe/tensor/misc/copy.py +64 -0
  519. maxframe/tensor/misc/diff.py +115 -0
  520. maxframe/tensor/misc/flatten.py +63 -0
  521. maxframe/tensor/misc/in1d.py +94 -0
  522. maxframe/tensor/misc/isin.py +130 -0
  523. maxframe/tensor/misc/ndim.py +53 -0
  524. maxframe/tensor/misc/ravel.py +0 -2
  525. maxframe/tensor/misc/repeat.py +129 -0
  526. maxframe/tensor/misc/searchsorted.py +147 -0
  527. maxframe/tensor/misc/setdiff1d.py +58 -0
  528. maxframe/tensor/misc/squeeze.py +117 -0
  529. maxframe/tensor/misc/swapaxes.py +113 -0
  530. maxframe/tensor/misc/tests/test_misc.py +0 -2
  531. maxframe/tensor/misc/transpose.py +8 -4
  532. maxframe/tensor/misc/trapezoid.py +123 -0
  533. maxframe/tensor/misc/unique.py +0 -1
  534. maxframe/tensor/misc/where.py +10 -8
  535. maxframe/tensor/operators.py +0 -34
  536. maxframe/tensor/random/__init__.py +3 -5
  537. maxframe/tensor/random/binomial.py +0 -2
  538. maxframe/tensor/random/bytes.py +0 -2
  539. maxframe/tensor/random/chisquare.py +0 -2
  540. maxframe/tensor/random/choice.py +9 -8
  541. maxframe/tensor/random/core.py +20 -5
  542. maxframe/tensor/random/dirichlet.py +0 -2
  543. maxframe/tensor/random/exponential.py +0 -2
  544. maxframe/tensor/random/f.py +2 -4
  545. maxframe/tensor/random/gamma.py +0 -2
  546. maxframe/tensor/random/geometric.py +0 -2
  547. maxframe/tensor/random/gumbel.py +0 -2
  548. maxframe/tensor/random/hypergeometric.py +0 -2
  549. maxframe/tensor/random/laplace.py +2 -4
  550. maxframe/tensor/random/logistic.py +0 -2
  551. maxframe/tensor/random/lognormal.py +0 -2
  552. maxframe/tensor/random/logseries.py +0 -2
  553. maxframe/tensor/random/multinomial.py +0 -2
  554. maxframe/tensor/random/multivariate_normal.py +0 -2
  555. maxframe/tensor/random/negative_binomial.py +0 -2
  556. maxframe/tensor/random/noncentral_chisquare.py +0 -2
  557. maxframe/tensor/random/noncentral_f.py +1 -3
  558. maxframe/tensor/random/normal.py +0 -2
  559. maxframe/tensor/random/pareto.py +0 -2
  560. maxframe/tensor/random/permutation.py +6 -3
  561. maxframe/tensor/random/poisson.py +0 -2
  562. maxframe/tensor/random/power.py +0 -2
  563. maxframe/tensor/random/rand.py +0 -2
  564. maxframe/tensor/random/randint.py +0 -2
  565. maxframe/tensor/random/randn.py +0 -2
  566. maxframe/tensor/random/random_integers.py +0 -2
  567. maxframe/tensor/random/random_sample.py +0 -2
  568. maxframe/tensor/random/rayleigh.py +0 -2
  569. maxframe/tensor/random/standard_cauchy.py +0 -2
  570. maxframe/tensor/random/standard_exponential.py +0 -2
  571. maxframe/tensor/random/standard_gamma.py +0 -2
  572. maxframe/tensor/random/standard_normal.py +0 -2
  573. maxframe/tensor/random/standard_t.py +0 -2
  574. maxframe/tensor/random/tests/__init__.py +0 -2
  575. maxframe/tensor/random/tests/test_random.py +0 -2
  576. maxframe/tensor/random/triangular.py +0 -2
  577. maxframe/tensor/random/uniform.py +0 -2
  578. maxframe/tensor/random/vonmises.py +0 -2
  579. maxframe/tensor/random/wald.py +0 -2
  580. maxframe/tensor/random/weibull.py +0 -2
  581. maxframe/tensor/random/zipf.py +0 -2
  582. maxframe/tensor/reduction/__init__.py +0 -2
  583. maxframe/tensor/reduction/all.py +0 -2
  584. maxframe/tensor/reduction/allclose.py +0 -2
  585. maxframe/tensor/reduction/any.py +0 -2
  586. maxframe/tensor/reduction/argmax.py +1 -3
  587. maxframe/tensor/reduction/argmin.py +1 -3
  588. maxframe/tensor/reduction/array_equal.py +0 -2
  589. maxframe/tensor/reduction/core.py +0 -2
  590. maxframe/tensor/reduction/count_nonzero.py +0 -2
  591. maxframe/tensor/reduction/cumprod.py +0 -2
  592. maxframe/tensor/reduction/cumsum.py +0 -2
  593. maxframe/tensor/reduction/max.py +0 -2
  594. maxframe/tensor/reduction/mean.py +0 -2
  595. maxframe/tensor/reduction/min.py +0 -2
  596. maxframe/tensor/reduction/nanargmax.py +0 -2
  597. maxframe/tensor/reduction/nanargmin.py +0 -2
  598. maxframe/tensor/reduction/nancumprod.py +0 -2
  599. maxframe/tensor/reduction/nancumsum.py +0 -2
  600. maxframe/tensor/reduction/nanmax.py +0 -2
  601. maxframe/tensor/reduction/nanmean.py +0 -2
  602. maxframe/tensor/reduction/nanmin.py +0 -2
  603. maxframe/tensor/reduction/nanprod.py +0 -2
  604. maxframe/tensor/reduction/nanstd.py +0 -2
  605. maxframe/tensor/reduction/nansum.py +0 -2
  606. maxframe/tensor/reduction/nanvar.py +0 -2
  607. maxframe/tensor/reduction/prod.py +0 -2
  608. maxframe/tensor/reduction/std.py +0 -2
  609. maxframe/tensor/reduction/sum.py +0 -2
  610. maxframe/tensor/reduction/tests/test_reduction.py +1 -4
  611. maxframe/tensor/reduction/var.py +0 -2
  612. maxframe/tensor/reshape/__init__.py +0 -2
  613. maxframe/tensor/reshape/reshape.py +6 -5
  614. maxframe/tensor/reshape/tests/__init__.py +0 -2
  615. maxframe/tensor/reshape/tests/test_reshape.py +0 -2
  616. maxframe/tensor/sort/__init__.py +16 -0
  617. maxframe/tensor/sort/argsort.py +150 -0
  618. maxframe/tensor/sort/sort.py +295 -0
  619. maxframe/tensor/special/__init__.py +37 -0
  620. maxframe/tensor/special/core.py +38 -0
  621. maxframe/tensor/special/misc.py +142 -0
  622. maxframe/tensor/special/statistical.py +56 -0
  623. maxframe/tensor/statistics/__init__.py +5 -0
  624. maxframe/tensor/statistics/average.py +143 -0
  625. maxframe/tensor/statistics/bincount.py +133 -0
  626. maxframe/tensor/statistics/quantile.py +10 -8
  627. maxframe/tensor/ufunc/__init__.py +0 -2
  628. maxframe/tensor/ufunc/ufunc.py +0 -2
  629. maxframe/tensor/utils.py +21 -3
  630. maxframe/tests/test_protocol.py +3 -3
  631. maxframe/tests/test_utils.py +210 -1
  632. maxframe/tests/utils.py +59 -1
  633. maxframe/udf.py +76 -6
  634. maxframe/utils.py +418 -17
  635. {maxframe-1.3.0.dist-info → maxframe-2.0.0.dist-info}/METADATA +5 -1
  636. maxframe-2.0.0.dist-info/RECORD +939 -0
  637. {maxframe-1.3.0.dist-info → maxframe-2.0.0.dist-info}/WHEEL +1 -1
  638. maxframe_client/clients/framedriver.py +19 -3
  639. maxframe_client/fetcher.py +113 -6
  640. maxframe_client/session/odps.py +173 -38
  641. maxframe_client/session/task.py +3 -1
  642. maxframe_client/tests/test_session.py +41 -5
  643. maxframe-1.3.0.dist-info/RECORD +0 -705
  644. {maxframe-1.3.0.dist-info → maxframe-2.0.0.dist-info}/top_level.txt +0 -0
@@ -13,14 +13,15 @@
13
13
  # limitations under the License.
14
14
 
15
15
  from collections import namedtuple
16
+ from typing import List
16
17
 
17
18
  import pandas as pd
18
19
 
19
20
  from ... import opcodes
20
- from ...core import ENTITY_TYPE, Entity, OutputType
21
+ from ...core import ENTITY_TYPE, Entity, EntityData, OutputType
21
22
  from ...core.operator import MapReduceOperator
22
23
  from ...serialization.serializables import AnyField, BoolField, Int32Field
23
- from ...utils import lazy_import, no_default, pd_release_version
24
+ from ...utils import lazy_import, no_default
24
25
  from ..core import SERIES_TYPE
25
26
  from ..initializer import Series as asseries
26
27
  from ..operators import DataFrameOperatorMixin
@@ -28,15 +29,13 @@ from ..utils import build_df, build_series, parse_index
28
29
 
29
30
  cudf = lazy_import("cudf")
30
31
 
31
- _GROUP_KEYS_NO_DEFAULT = pd_release_version[:2] == (1, 5)
32
- _default_group_keys = no_default if _GROUP_KEYS_NO_DEFAULT else True
33
-
34
32
 
35
33
  NamedAgg = namedtuple("NamedAgg", ["column", "aggfunc"])
36
34
 
37
35
 
38
- class DataFrameGroupByOperator(MapReduceOperator, DataFrameOperatorMixin):
36
+ class DataFrameGroupByOp(MapReduceOperator, DataFrameOperatorMixin):
39
37
  _op_type_ = opcodes.GROUPBY
38
+ _legacy_name = "DataFrameGroupByOperator"
40
39
 
41
40
  by = AnyField(
42
41
  "by",
@@ -96,8 +95,6 @@ class DataFrameGroupByOperator(MapReduceOperator, DataFrameOperatorMixin):
96
95
 
97
96
  new_kw = self.groupby_params
98
97
  new_kw.update(kwargs)
99
- if new_kw.get("level"):
100
- new_kw["level"] = 0
101
98
  if isinstance(new_kw["by"], list):
102
99
  new_by = []
103
100
  for v in new_kw["by"]:
@@ -115,17 +112,18 @@ class DataFrameGroupByOperator(MapReduceOperator, DataFrameOperatorMixin):
115
112
  new_kw["by"] = new_by
116
113
  return mock_obj.groupby(**new_kw)
117
114
 
118
- def _set_inputs(self, inputs):
119
- super()._set_inputs(inputs)
120
- inputs_iter = iter(self._inputs[1:])
115
+ @classmethod
116
+ def _set_inputs(cls, op: "DataFrameGroupByOp", inputs: List[EntityData]):
117
+ super()._set_inputs(op, inputs)
118
+ inputs_iter = iter(op._inputs[1:])
121
119
  if len(inputs) > 1:
122
120
  by = []
123
- for k in self.by:
121
+ for k in op.by:
124
122
  if isinstance(k, SERIES_TYPE):
125
123
  by.append(next(inputs_iter))
126
124
  else:
127
125
  by.append(k)
128
- self.by = by
126
+ op.by = by
129
127
 
130
128
  def __call__(self, df):
131
129
  params = df.params.copy()
@@ -153,9 +151,74 @@ class DataFrameGroupByOperator(MapReduceOperator, DataFrameOperatorMixin):
153
151
  return self.new_tileable(inputs, **params)
154
152
 
155
153
 
156
- def groupby(
157
- df, by=None, level=None, as_index=True, sort=True, group_keys=_default_group_keys
158
- ):
154
+ DataFrameGroupByOperator = DataFrameGroupByOp
155
+
156
+
157
+ def groupby(df, by=None, level=None, as_index=True, sort=True, group_keys=True):
158
+ """
159
+ Group DataFrame using a mapper or by a Series of columns.
160
+
161
+ A groupby operation involves some combination of splitting the
162
+ object, applying a function, and combining the results. This can be
163
+ used to group large amounts of data and compute operations on these
164
+ groups.
165
+
166
+ Parameters
167
+ ----------
168
+ by : mapping, function, label, or list of labels
169
+ Used to determine the groups for the groupby.
170
+ If ``by`` is a function, it's called on each value of the object's
171
+ index. If a dict or Series is passed, the Series or dict VALUES
172
+ will be used to determine the groups (the Series' values are first
173
+ aligned; see ``.align()`` method). If an ndarray is passed, the
174
+ values are used as-is to determine the groups. A label or list of
175
+ labels may be passed to group by the columns in ``self``. Notice
176
+ that a tuple is interpreted as a (single) key.
177
+ as_index : bool, default True
178
+ For aggregated output, return object with group labels as the
179
+ index. Only relevant for DataFrame input. as_index=False is
180
+ effectively "SQL-style" grouped output.
181
+ sort : bool, default True
182
+ Sort group keys. Get better performance by turning this off.
183
+ Note this does not influence the order of observations within each
184
+ group. Groupby preserves the order of rows within each group.
185
+ group_keys : bool
186
+ When calling apply, add group keys to index to identify pieces.
187
+
188
+ Notes
189
+ -----
190
+ MaxFrame only supports groupby with axis=0.
191
+ Default value of `group_keys` will be decided given the version of local
192
+ pandas library, which is True since pandas 2.0.
193
+
194
+ Returns
195
+ -------
196
+ DataFrameGroupBy
197
+ Returns a groupby object that contains information about the groups.
198
+
199
+ See Also
200
+ --------
201
+ resample : Convenience method for frequency conversion and resampling
202
+ of time series.
203
+
204
+ Examples
205
+ --------
206
+ >>> import maxframe.dataframe as md
207
+ >>> df = md.DataFrame({'Animal': ['Falcon', 'Falcon',
208
+ ... 'Parrot', 'Parrot'],
209
+ ... 'Max Speed': [380., 370., 24., 26.]})
210
+ >>> df.execute()
211
+ Animal Max Speed
212
+ 0 Falcon 380.0
213
+ 1 Falcon 370.0
214
+ 2 Parrot 24.0
215
+ 3 Parrot 26.0
216
+ >>> df.groupby(['Animal']).mean().execute()
217
+ Max Speed
218
+ Animal
219
+ Falcon 375.0
220
+ Parrot 25.0
221
+ """
159
222
  if not as_index and df.op.output_types[0] == OutputType.series:
160
223
  raise TypeError("as_index=False only valid with DataFrame")
161
224
 
@@ -168,7 +231,7 @@ def groupby(
168
231
  by = [by]
169
232
  elif df.ndim > 1 and by is not None and not isinstance(by, list):
170
233
  by = [by]
171
- op = DataFrameGroupByOperator(
234
+ op = DataFrameGroupByOp(
172
235
  by=by,
173
236
  level=level,
174
237
  as_index=as_index,
@@ -0,0 +1,26 @@
1
+ # Copyright 1999-2025 Alibaba Group Holding Ltd.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ from ...core import BaseMaxFrameAccessor
16
+ from ..core import DataFrameGroupBy, SeriesGroupBy
17
+
18
+
19
+ class DataFrameGroupByMaxFrameAccessor(BaseMaxFrameAccessor):
20
+ obj: "DataFrameGroupBy"
21
+ _api_count: int = 0
22
+
23
+
24
+ class SeriesGroupByMaxFrameAccessor(BaseMaxFrameAccessor):
25
+ obj: "SeriesGroupBy"
26
+ _api_count: int = 0
@@ -21,8 +21,9 @@ from ..operators import DataFrameOperator, DataFrameOperatorMixin
21
21
  from ..utils import parse_index
22
22
 
23
23
 
24
- class GroupByFillOperator(DataFrameOperator, DataFrameOperatorMixin):
24
+ class GroupByFill(DataFrameOperator, DataFrameOperatorMixin):
25
25
  _op_module_ = "dataframe.groupby"
26
+ _legacy_name = "GroupByFillOperator"
26
27
 
27
28
  value = AnyField("value", default=None)
28
29
  method = StringField("method", default=None)
@@ -77,21 +78,25 @@ class GroupByFillOperator(DataFrameOperator, DataFrameOperatorMixin):
77
78
  return self.new_tileable([groupby], **kw)
78
79
 
79
80
 
80
- class GroupByFFill(GroupByFillOperator):
81
+ class GroupByFFill(GroupByFill):
81
82
  _op_type_ = opcodes.FILL_NA
82
83
  _func_name = "ffill"
83
84
 
84
85
 
85
- class GroupByBFill(GroupByFillOperator):
86
+ class GroupByBFill(GroupByFill):
86
87
  _op_type = opcodes.FILL_NA
87
88
  _func_name = "bfill"
88
89
 
89
90
 
90
- class GroupByFillNa(GroupByFillOperator):
91
+ class GroupByFillNa(GroupByFill):
91
92
  _op_type = opcodes.FILL_NA
92
93
  _func_name = "fillna"
93
94
 
94
95
 
96
+ # keep for import compatibility
97
+ GroupByFillOperator = GroupByFill
98
+
99
+
95
100
  def ffill(groupby, limit=None):
96
101
  """
97
102
  Forward fill the values.
@@ -13,13 +13,13 @@
13
13
  # limitations under the License.
14
14
 
15
15
  import copy
16
- from typing import Optional, Sequence, Union
16
+ from typing import List, Optional, Sequence, Union
17
17
 
18
18
  import numpy as np
19
19
  import pandas as pd
20
20
 
21
21
  from ... import opcodes
22
- from ...core import ENTITY_TYPE, OutputType, get_output_types
22
+ from ...core import ENTITY_TYPE, EntityData, OutputType, get_output_types
23
23
  from ...serialization.serializables import (
24
24
  BoolField,
25
25
  DictField,
@@ -48,7 +48,6 @@ class GroupBySample(DataFrameOperator, DataFrameOperatorMixin):
48
48
  seed = Int32Field("seed", default=None)
49
49
  _random_state = RandomStateField("random_state", default=None)
50
50
  errors = StringField("errors", default=None)
51
-
52
51
  # for chunks
53
52
  # num of instances for chunks
54
53
  input_nsplits = NDArrayField("input_nsplits", default=None)
@@ -60,12 +59,13 @@ class GroupBySample(DataFrameOperator, DataFrameOperatorMixin):
60
59
  def random_state(self):
61
60
  return self._random_state
62
61
 
63
- def _set_inputs(self, inputs):
64
- super()._set_inputs(inputs)
62
+ @classmethod
63
+ def _set_inputs(cls, op: "GroupBySample", inputs: List[EntityData]):
64
+ super()._set_inputs(op, inputs)
65
65
  input_iter = iter(inputs)
66
66
  next(input_iter)
67
- if isinstance(self.weights, ENTITY_TYPE):
68
- self.weights = next(input_iter)
67
+ if isinstance(op.weights, ENTITY_TYPE):
68
+ op.weights = next(input_iter)
69
69
 
70
70
  def __call__(self, groupby):
71
71
  df = groupby
@@ -23,7 +23,7 @@ from .... import opcodes
23
23
  from ....core import OutputType
24
24
  from ...core import DataFrame, DataFrameGroupBy, SeriesGroupBy
25
25
  from ..aggregation import DataFrameGroupByAgg
26
- from ..core import DataFrameGroupByOperator
26
+ from ..core import DataFrameGroupByOp
27
27
  from ..getitem import GroupByIndex
28
28
 
29
29
 
@@ -39,7 +39,7 @@ def test_groupby():
39
39
 
40
40
  grouped = mdf.groupby("b")
41
41
  assert isinstance(grouped, DataFrameGroupBy)
42
- assert isinstance(grouped.op, DataFrameGroupByOperator)
42
+ assert isinstance(grouped.op, DataFrameGroupByOp)
43
43
  assert list(grouped.key_dtypes.index) == ["b"]
44
44
 
45
45
  series = pd.Series([3, 4, 5, 3, 5, 4, 1, 2, 3])
@@ -47,7 +47,7 @@ def test_groupby():
47
47
  grouped = ms.groupby(lambda x: x + 1)
48
48
 
49
49
  assert isinstance(grouped, SeriesGroupBy)
50
- assert isinstance(grouped.op, DataFrameGroupByOperator)
50
+ assert isinstance(grouped.op, DataFrameGroupByOp)
51
51
 
52
52
  with pytest.raises(TypeError):
53
53
  ms.groupby(lambda x: x + 1, as_index=False)
@@ -12,19 +12,22 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
- import logging
15
+ from typing import MutableMapping, Union
16
16
 
17
17
  import numpy as np
18
- import pandas as pd
19
18
 
20
19
  from ... import opcodes
21
20
  from ...core import OutputType
22
21
  from ...serialization.serializables import AnyField, BoolField, DictField, TupleField
23
- from ...utils import quiet_stdio
22
+ from ...udf import BuiltinFunction, MarkedFunction
23
+ from ...utils import copy_if_possible
24
24
  from ..operators import DataFrameOperator, DataFrameOperatorMixin
25
- from ..utils import copy_func_scheduling_hints, parse_index
26
-
27
- logger = logging.getLogger(__name__)
25
+ from ..utils import (
26
+ InferredDataFrameMeta,
27
+ copy_func_scheduling_hints,
28
+ infer_dataframe_return_value,
29
+ parse_index,
30
+ )
28
31
 
29
32
 
30
33
  class GroupByTransform(DataFrameOperator, DataFrameOperatorMixin):
@@ -42,68 +45,61 @@ class GroupByTransform(DataFrameOperator, DataFrameOperatorMixin):
42
45
  if hasattr(self, "func"):
43
46
  copy_func_scheduling_hints(self.func, self)
44
47
 
45
- def _infer_df_func_returns(self, in_groupby, dtypes, index):
46
- index_value, output_types, new_dtypes = None, None, None
47
-
48
- output_types = (
49
- [OutputType.dataframe]
50
- if in_groupby.op.output_types[0] == OutputType.dataframe_groupby
51
- else [OutputType.series]
52
- )
53
-
54
- try:
55
- mock_groupby = in_groupby.op.build_mock_groupby()
56
- with np.errstate(all="ignore"), quiet_stdio():
57
- if self.call_agg:
58
- infer_df = mock_groupby.agg(self.func, *self.args, **self.kwds)
59
- else:
60
- infer_df = mock_groupby.transform(
61
- self.func, *self.args, **self.kwds
62
- )
63
-
64
- # todo return proper index when sort=True is implemented
65
- index_value = parse_index(None, in_groupby.key, self.func)
66
-
67
- if isinstance(infer_df, pd.DataFrame):
68
- output_types = [OutputType.dataframe]
69
- new_dtypes = new_dtypes or infer_df.dtypes
48
+ def has_custom_code(self) -> bool:
49
+ return not isinstance(self.func, BuiltinFunction)
50
+
51
+ def _infer_df_func_returns(
52
+ self, in_groupby, dtypes, dtype=None, name=None, index=None
53
+ ) -> InferredDataFrameMeta:
54
+ def infer_func(groupby_obj):
55
+ args = copy_if_possible(self.args)
56
+ kwds = copy_if_possible(self.kwds)
57
+ if self.call_agg:
58
+ return groupby_obj.agg(self.func, *args, **kwds)
70
59
  else:
71
- output_types = [OutputType.series]
72
- new_dtypes = new_dtypes or (infer_df.name, infer_df.dtype)
73
- except: # noqa: E722 # nosec
74
- logger.info("Exception raised while inferring df_func", exc_info=True)
60
+ return groupby_obj.transform(self.func, *args, **kwds)
75
61
 
76
- self.output_types = output_types if not self.output_types else self.output_types
77
- dtypes = new_dtypes if dtypes is None else dtypes
78
- index_value = index_value if index is None else parse_index(index)
79
- return dtypes, index_value
62
+ if self.call_agg:
63
+ output_type = None
64
+ elif in_groupby.op.output_types[0] == OutputType.dataframe_groupby:
65
+ output_type = OutputType.dataframe
66
+ else:
67
+ output_type = OutputType.series
68
+
69
+ inferred_meta = infer_dataframe_return_value(
70
+ in_groupby,
71
+ infer_func,
72
+ output_type=output_type,
73
+ dtypes=dtypes,
74
+ dtype=dtype,
75
+ name=name,
76
+ index=index,
77
+ )
78
+ if inferred_meta.output_type and not self.output_types:
79
+ self.output_types = [inferred_meta.output_type]
80
+ return inferred_meta
80
81
 
81
82
  def __call__(
82
83
  self, groupby, dtypes=None, dtype=None, name=None, index=None, skip_infer=None
83
84
  ):
84
85
  in_df = groupby.inputs[0]
85
86
 
86
- if dtypes is None and dtype is not None:
87
- dtypes = (name, dtype)
88
87
  if skip_infer:
89
- dtypes, index_value = None, None
88
+ dtypes, dtype, name, index_value = None, None, None, None
90
89
  self.output_types = (
91
90
  [OutputType.dataframe]
92
91
  if groupby.op.output_types[0] == OutputType.dataframe_groupby
93
92
  else [OutputType.series]
94
93
  )
95
94
  else:
96
- dtypes, index_value = self._infer_df_func_returns(groupby, dtypes, index)
97
- for arg, desc in zip(
98
- (self.output_types, dtypes), ("output_types", "dtypes")
99
- ):
100
- if arg is None:
101
- raise TypeError(
102
- f"Cannot determine {desc} by calculating with enumerate data, "
103
- "please specify it as arguments"
104
- )
105
- if index_value is None:
106
- index_value = parse_index(None, (in_df.key, in_df.index_value.key))
95
+ inferred_meta = self._infer_df_func_returns(
96
+ groupby, dtypes=dtypes, dtype=dtype, name=name, index=index
97
+ )
98
+ inferred_meta.check_absence("output_type", "dtypes", "dtype")
99
+ dtypes = inferred_meta.dtypes
100
+ dtype = inferred_meta.dtype
101
+ name = inferred_meta.name
102
+ index_value = inferred_meta.index_value
107
103
 
108
104
  if self.output_types[0] == OutputType.dataframe:
109
105
  new_shape = (
@@ -123,7 +119,6 @@ class GroupByTransform(DataFrameOperator, DataFrameOperatorMixin):
123
119
  columns_value=columns_value,
124
120
  )
125
121
  else:
126
- name, dtype = dtypes
127
122
  new_shape = (np.nan,) if self.call_agg else groupby.shape
128
123
  return self.new_series(
129
124
  [groupby],
@@ -133,6 +128,14 @@ class GroupByTransform(DataFrameOperator, DataFrameOperatorMixin):
133
128
  index_value=index_value,
134
129
  )
135
130
 
131
+ @classmethod
132
+ def estimate_size(
133
+ cls, ctx: MutableMapping[str, Union[int, float]], op: "GroupByTransform"
134
+ ) -> None:
135
+ if isinstance(op.func, MarkedFunction):
136
+ ctx[op.outputs[0].key] = float("inf")
137
+ super().estimate_size(ctx, op)
138
+
136
139
 
137
140
  def groupby_transform(
138
141
  groupby,
@@ -12,12 +12,12 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
- from typing import Any, Optional, Union
15
+ from typing import Any, List, Optional, Union
16
16
 
17
17
  import numpy as np
18
18
 
19
19
  from ... import opcodes
20
- from ...core import OutputType, get_output_types
20
+ from ...core import EntityData, OutputType, get_output_types
21
21
  from ...serialization.serializables import (
22
22
  AnyField,
23
23
  Int16Field,
@@ -53,10 +53,11 @@ class DataFrameAlign(DataFrameOperator, DataFrameOperatorMixin):
53
53
  def output_limit(self) -> int:
54
54
  return 2
55
55
 
56
- def _set_inputs(self, inputs):
57
- super()._set_inputs(inputs)
58
- self.lhs = inputs[0]
59
- self.rhs = inputs[1]
56
+ @classmethod
57
+ def _set_inputs(cls, op: "DataFrameAlign", inputs: List[EntityData]):
58
+ super()._set_inputs(op, inputs)
59
+ op.lhs = inputs[0]
60
+ op.rhs = inputs[1]
60
61
 
61
62
  def __call__(self, lhs: TileableType, rhs: TileableType):
62
63
  if self.broadcast_axis != 1 or lhs.ndim == rhs.ndim:
@@ -13,12 +13,13 @@
13
13
  # limitations under the License.
14
14
 
15
15
  from numbers import Integral
16
+ from typing import List
16
17
 
17
18
  import numpy as np
18
19
  import pandas as pd
19
20
 
20
21
  from ... import opcodes
21
- from ...core import ENTITY_TYPE, OutputType
22
+ from ...core import ENTITY_TYPE, EntityData, OutputType
22
23
  from ...serialization.serializables import AnyField, BoolField
23
24
  from ...tensor.core import TENSOR_TYPE
24
25
  from ...tensor.datasource import tensor as astensor
@@ -66,7 +67,6 @@ class DataFrameIndex(DataFrameOperator, DataFrameOperatorMixin):
66
67
  _op_type_ = opcodes.INDEX
67
68
 
68
69
  col_names = AnyField("col_names", default=None)
69
-
70
70
  # for bool index
71
71
  mask = AnyField("mask", default=None)
72
72
  identical_index = BoolField("identical_index")
@@ -75,12 +75,13 @@ class DataFrameIndex(DataFrameOperator, DataFrameOperatorMixin):
75
75
  output_types = output_types or [OutputType.series]
76
76
  super().__init__(_output_types=output_types, **kw)
77
77
 
78
- def _set_inputs(self, inputs):
79
- super()._set_inputs(inputs)
80
- if isinstance(self.col_names, ENTITY_TYPE):
81
- self.col_names = self._inputs[0]
82
- if isinstance(self.mask, ENTITY_TYPE):
83
- self.mask = self._inputs[-1]
78
+ @classmethod
79
+ def _set_inputs(cls, op: "DataFrameIndex", inputs: List[EntityData]):
80
+ super()._set_inputs(op, inputs)
81
+ if isinstance(op.col_names, ENTITY_TYPE):
82
+ op.col_names = op._inputs[0]
83
+ if isinstance(op.mask, ENTITY_TYPE):
84
+ op.mask = op._inputs[-1]
84
85
 
85
86
  def __call__(self, df):
86
87
  if self.col_names is not None:
@@ -13,6 +13,7 @@
13
13
  # limitations under the License.
14
14
 
15
15
  from numbers import Integral
16
+ from typing import List
16
17
 
17
18
  import numpy as np
18
19
  import pandas as pd
@@ -21,7 +22,7 @@ from pandas.core.indexing import IndexingError
21
22
 
22
23
  from ... import opcodes
23
24
  from ...config import options
24
- from ...core import ENTITY_TYPE, OutputType
25
+ from ...core import ENTITY_TYPE, EntityData, OutputType
25
26
  from ...serialization.serializables import AnyField, KeyField, ListField
26
27
  from ...tensor import asarray
27
28
  from ...tensor.indexing.core import calc_shape
@@ -181,26 +182,29 @@ class DataFrameIlocGetItem(DataFrameOperator, HeadTailOptimizedOperatorMixin):
181
182
  _input = KeyField("input")
182
183
  indexes = ListField("indexes", default=None)
183
184
 
184
- def __init__(self, gpu=None, sparse=False, output_types=None, **kw):
185
- super().__init__(gpu=gpu, sparse=sparse, _output_types=output_types, **kw)
185
+ def __init__(self, gpu=None, sparse=False, **kw):
186
+ if kw.get("output_types"):
187
+ kw["_output_types"] = kw.pop("output_types")
188
+ super().__init__(gpu=gpu, sparse=sparse, **kw)
186
189
  if not self.output_types:
187
- self.output_types = [OutputType.dataframe]
190
+ self._output_types = [OutputType.dataframe]
188
191
 
189
192
  @property
190
193
  def input(self):
191
194
  return self._input
192
195
 
193
- def _set_inputs(self, inputs):
194
- super()._set_inputs(inputs)
195
- inputs_iter = iter(self._inputs)
196
- self._input = next(inputs_iter)
196
+ @classmethod
197
+ def _set_inputs(cls, op: "DataFrameIlocGetItem", inputs: List[EntityData]):
198
+ super()._set_inputs(op, inputs)
199
+ inputs_iter = iter(op._inputs)
200
+ op._input = next(inputs_iter)
197
201
  indexes = []
198
- for index in self.indexes:
202
+ for index in op.indexes:
199
203
  if isinstance(index, ENTITY_TYPE):
200
204
  indexes.append(next(inputs_iter))
201
205
  else:
202
206
  indexes.append(index)
203
- self.indexes = indexes
207
+ op.indexes = indexes
204
208
 
205
209
  def __call__(self, df):
206
210
  # Note [Fancy Index of Numpy and Pandas]
@@ -222,7 +226,6 @@ class DataFrameIlocGetItem(DataFrameOperator, HeadTailOptimizedOperatorMixin):
222
226
  inputs = [df] + [
223
227
  index for index in self.indexes if isinstance(index, ENTITY_TYPE)
224
228
  ]
225
-
226
229
  # NB: pandas only compresses the result to series when index on one of axis is integral
227
230
  if isinstance(self.indexes[1], Integral):
228
231
  shape = shape0
@@ -274,19 +277,20 @@ class SeriesIlocGetItem(DataFrameOperator, HeadTailOptimizedOperatorMixin):
274
277
  def input(self):
275
278
  return self._input
276
279
 
277
- def _set_inputs(self, inputs):
278
- super()._set_inputs(inputs)
280
+ @classmethod
281
+ def _set_inputs(cls, op: "SeriesIlocGetItem", inputs: List[EntityData]):
282
+ super()._set_inputs(op, inputs)
279
283
 
280
- inputs_iter = iter(self._inputs)
281
- self._input = next(inputs_iter)
284
+ inputs_iter = iter(op._inputs)
285
+ op._input = next(inputs_iter)
282
286
 
283
287
  indexes = []
284
- for index in self.indexes:
288
+ for index in op.indexes:
285
289
  if isinstance(index, ENTITY_TYPE):
286
290
  indexes.append(next(inputs_iter))
287
291
  else:
288
292
  indexes.append(index)
289
- self.indexes = indexes
293
+ op.indexes = indexes
290
294
 
291
295
  def __call__(self, series):
292
296
  if isinstance(self.indexes[0], Integral):
@@ -322,19 +326,20 @@ class IndexIlocGetItem(DataFrameOperator, DataFrameOperatorMixin):
322
326
  def input(self):
323
327
  return self._input
324
328
 
325
- def _set_inputs(self, inputs):
326
- super()._set_inputs(inputs)
329
+ @classmethod
330
+ def _set_inputs(cls, op: "IndexIlocGetItem", inputs: List[EntityData]):
331
+ super()._set_inputs(op, inputs)
327
332
 
328
- inputs_iter = iter(self._inputs)
329
- self._input = next(inputs_iter)
333
+ inputs_iter = iter(op._inputs)
334
+ op._input = next(inputs_iter)
330
335
 
331
336
  indexes = []
332
- for index in self.indexes:
337
+ for index in op.indexes:
333
338
  if isinstance(index, ENTITY_TYPE):
334
339
  indexes.append(next(inputs_iter))
335
340
  else:
336
341
  indexes.append(index)
337
- self.indexes = indexes
342
+ op.indexes = indexes
338
343
 
339
344
  def __call__(self, idx):
340
345
  if isinstance(self.indexes[0], Integral):
@@ -12,9 +12,12 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
+ from typing import List
16
+
15
17
  import pandas as pd
16
18
 
17
19
  from ... import opcodes
20
+ from ...core import EntityData
18
21
  from ...serialization.serializables import AnyField, BoolField, Int64Field
19
22
  from ...tensor.core import TENSOR_TYPE
20
23
  from ..core import SERIES_TYPE
@@ -30,10 +33,11 @@ class DataFrameInsert(DataFrameOperator, DataFrameOperatorMixin):
30
33
  value = AnyField("value")
31
34
  allow_duplicates = BoolField("allow_duplicates")
32
35
 
33
- def _set_inputs(self, inputs):
34
- super()._set_inputs(inputs)
36
+ @classmethod
37
+ def _set_inputs(cls, op: "DataFrameInsert", inputs: List[EntityData]):
38
+ super()._set_inputs(op, inputs)
35
39
  if len(inputs) > 1:
36
- self._value = self._inputs[-1]
40
+ op._value = op._inputs[-1]
37
41
 
38
42
  def __call__(self, df):
39
43
  inputs = [df]