maxframe 1.3.0__cp38-cp38-win32.whl → 2.0.0b1__cp38-cp38-win32.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of maxframe might be problematic. Click here for more details.

Files changed (643) hide show
  1. maxframe/_utils.cp38-win32.pyd +0 -0
  2. maxframe/_utils.pyi +21 -0
  3. maxframe/_utils.pyx +4 -3
  4. maxframe/codegen/__init__.py +27 -0
  5. maxframe/{codegen.py → codegen/core.py} +49 -43
  6. maxframe/codegen/spe/__init__.py +16 -0
  7. maxframe/codegen/spe/core.py +307 -0
  8. maxframe/codegen/spe/dataframe/__init__.py +37 -0
  9. maxframe/codegen/spe/dataframe/accessors/__init__.py +15 -0
  10. maxframe/codegen/spe/dataframe/accessors/base.py +53 -0
  11. maxframe/codegen/spe/dataframe/accessors/dict_.py +194 -0
  12. maxframe/codegen/spe/dataframe/accessors/list_.py +80 -0
  13. maxframe/codegen/spe/dataframe/arithmetic.py +84 -0
  14. maxframe/codegen/spe/dataframe/datasource.py +181 -0
  15. maxframe/codegen/spe/dataframe/datastore.py +204 -0
  16. maxframe/codegen/spe/dataframe/extensions.py +63 -0
  17. maxframe/codegen/spe/dataframe/fetch.py +26 -0
  18. maxframe/codegen/spe/dataframe/groupby.py +224 -0
  19. maxframe/codegen/spe/dataframe/indexing.py +238 -0
  20. maxframe/codegen/spe/dataframe/merge.py +73 -0
  21. maxframe/codegen/spe/dataframe/misc.py +286 -0
  22. maxframe/codegen/spe/dataframe/missing.py +64 -0
  23. maxframe/codegen/spe/dataframe/reduction.py +160 -0
  24. maxframe/codegen/spe/dataframe/sort.py +83 -0
  25. maxframe/codegen/spe/dataframe/statistics.py +46 -0
  26. maxframe/codegen/spe/dataframe/tests/__init__.py +13 -0
  27. maxframe/codegen/spe/dataframe/tests/accessors/__init__.py +13 -0
  28. maxframe/codegen/spe/dataframe/tests/accessors/test_base.py +33 -0
  29. maxframe/codegen/spe/dataframe/tests/accessors/test_dict.py +310 -0
  30. maxframe/codegen/spe/dataframe/tests/accessors/test_list.py +137 -0
  31. maxframe/codegen/spe/dataframe/tests/indexing/__init__.py +13 -0
  32. maxframe/codegen/spe/dataframe/tests/indexing/conftest.py +58 -0
  33. maxframe/codegen/spe/dataframe/tests/indexing/test_getitem.py +124 -0
  34. maxframe/codegen/spe/dataframe/tests/indexing/test_iloc.py +76 -0
  35. maxframe/codegen/spe/dataframe/tests/indexing/test_indexing.py +39 -0
  36. maxframe/codegen/spe/dataframe/tests/indexing/test_rename.py +51 -0
  37. maxframe/codegen/spe/dataframe/tests/indexing/test_reset_index.py +88 -0
  38. maxframe/codegen/spe/dataframe/tests/indexing/test_sample.py +45 -0
  39. maxframe/codegen/spe/dataframe/tests/indexing/test_set_axis.py +45 -0
  40. maxframe/codegen/spe/dataframe/tests/indexing/test_set_index.py +41 -0
  41. maxframe/codegen/spe/dataframe/tests/indexing/test_setitem.py +46 -0
  42. maxframe/codegen/spe/dataframe/tests/misc/__init__.py +13 -0
  43. maxframe/codegen/spe/dataframe/tests/misc/test_apply.py +133 -0
  44. maxframe/codegen/spe/dataframe/tests/misc/test_drop_duplicates.py +92 -0
  45. maxframe/codegen/spe/dataframe/tests/misc/test_misc.py +234 -0
  46. maxframe/codegen/spe/dataframe/tests/missing/__init__.py +13 -0
  47. maxframe/codegen/spe/dataframe/tests/missing/test_checkna.py +94 -0
  48. maxframe/codegen/spe/dataframe/tests/missing/test_dropna.py +50 -0
  49. maxframe/codegen/spe/dataframe/tests/missing/test_fillna.py +94 -0
  50. maxframe/codegen/spe/dataframe/tests/missing/test_replace.py +45 -0
  51. maxframe/codegen/spe/dataframe/tests/test_arithmetic.py +73 -0
  52. maxframe/codegen/spe/dataframe/tests/test_datasource.py +184 -0
  53. maxframe/codegen/spe/dataframe/tests/test_datastore.py +200 -0
  54. maxframe/codegen/spe/dataframe/tests/test_extensions.py +88 -0
  55. maxframe/codegen/spe/dataframe/tests/test_groupby.py +225 -0
  56. maxframe/codegen/spe/dataframe/tests/test_merge.py +400 -0
  57. maxframe/codegen/spe/dataframe/tests/test_reduction.py +104 -0
  58. maxframe/codegen/spe/dataframe/tests/test_sort.py +159 -0
  59. maxframe/codegen/spe/dataframe/tests/test_statistics.py +70 -0
  60. maxframe/codegen/spe/dataframe/tests/test_tseries.py +29 -0
  61. maxframe/codegen/spe/dataframe/tests/test_value_counts.py +60 -0
  62. maxframe/codegen/spe/dataframe/tests/test_window.py +69 -0
  63. maxframe/codegen/spe/dataframe/tseries.py +46 -0
  64. maxframe/codegen/spe/dataframe/udf.py +62 -0
  65. maxframe/codegen/spe/dataframe/value_counts.py +31 -0
  66. maxframe/codegen/spe/dataframe/window.py +65 -0
  67. maxframe/codegen/spe/learn/__init__.py +15 -0
  68. maxframe/codegen/spe/learn/contrib/__init__.py +15 -0
  69. maxframe/codegen/spe/learn/contrib/lightgbm.py +160 -0
  70. maxframe/codegen/spe/learn/contrib/models.py +41 -0
  71. maxframe/codegen/spe/learn/contrib/pytorch.py +49 -0
  72. maxframe/codegen/spe/learn/contrib/tests/__init__.py +13 -0
  73. maxframe/codegen/spe/learn/contrib/tests/test_lightgbm.py +123 -0
  74. maxframe/codegen/spe/learn/contrib/tests/test_models.py +41 -0
  75. maxframe/codegen/spe/learn/contrib/tests/test_pytorch.py +53 -0
  76. maxframe/codegen/spe/learn/contrib/tests/test_xgboost.py +98 -0
  77. maxframe/codegen/spe/learn/contrib/xgboost.py +152 -0
  78. maxframe/codegen/spe/learn/metrics/__init__.py +15 -0
  79. maxframe/codegen/spe/learn/metrics/_classification.py +120 -0
  80. maxframe/codegen/spe/learn/metrics/tests/__init__.py +13 -0
  81. maxframe/codegen/spe/learn/metrics/tests/test_classification.py +93 -0
  82. maxframe/codegen/spe/learn/model_selection/__init__.py +13 -0
  83. maxframe/codegen/spe/learn/model_selection/tests/__init__.py +13 -0
  84. maxframe/codegen/spe/learn/model_selection/tests/test_split.py +41 -0
  85. maxframe/codegen/spe/learn/preprocessing/__init__.py +15 -0
  86. maxframe/codegen/spe/learn/preprocessing/_data.py +37 -0
  87. maxframe/codegen/spe/learn/preprocessing/_label.py +47 -0
  88. maxframe/codegen/spe/learn/preprocessing/tests/__init__.py +13 -0
  89. maxframe/codegen/spe/learn/preprocessing/tests/test_data.py +31 -0
  90. maxframe/codegen/spe/learn/preprocessing/tests/test_label.py +43 -0
  91. maxframe/codegen/spe/learn/utils/__init__.py +15 -0
  92. maxframe/codegen/spe/learn/utils/checks.py +55 -0
  93. maxframe/codegen/spe/learn/utils/multiclass.py +60 -0
  94. maxframe/codegen/spe/learn/utils/shuffle.py +85 -0
  95. maxframe/codegen/spe/learn/utils/sparsefuncs.py +35 -0
  96. maxframe/codegen/spe/learn/utils/tests/__init__.py +13 -0
  97. maxframe/codegen/spe/learn/utils/tests/test_checks.py +48 -0
  98. maxframe/codegen/spe/learn/utils/tests/test_multiclass.py +52 -0
  99. maxframe/codegen/spe/learn/utils/tests/test_shuffle.py +50 -0
  100. maxframe/codegen/spe/learn/utils/tests/test_sparsefuncs.py +34 -0
  101. maxframe/codegen/spe/learn/utils/tests/test_validation.py +44 -0
  102. maxframe/codegen/spe/learn/utils/validation.py +35 -0
  103. maxframe/codegen/spe/objects.py +26 -0
  104. maxframe/codegen/spe/remote.py +29 -0
  105. maxframe/codegen/spe/tensor/__init__.py +28 -0
  106. maxframe/codegen/spe/tensor/arithmetic.py +95 -0
  107. maxframe/codegen/spe/tensor/core.py +41 -0
  108. maxframe/codegen/spe/tensor/datasource.py +165 -0
  109. maxframe/codegen/spe/tensor/extensions.py +35 -0
  110. maxframe/codegen/spe/tensor/fetch.py +26 -0
  111. maxframe/codegen/spe/tensor/indexing.py +63 -0
  112. maxframe/codegen/spe/tensor/linalg.py +63 -0
  113. maxframe/codegen/spe/tensor/merge.py +31 -0
  114. maxframe/codegen/spe/tensor/misc.py +121 -0
  115. maxframe/codegen/spe/tensor/random.py +29 -0
  116. maxframe/codegen/spe/tensor/reduction.py +39 -0
  117. maxframe/codegen/spe/tensor/reshape.py +26 -0
  118. maxframe/codegen/spe/tensor/sort.py +42 -0
  119. maxframe/codegen/spe/tensor/special.py +35 -0
  120. maxframe/codegen/spe/tensor/statistics.py +24 -0
  121. maxframe/codegen/spe/tensor/tests/__init__.py +13 -0
  122. maxframe/codegen/spe/tensor/tests/test_arithmetic.py +103 -0
  123. maxframe/codegen/spe/tensor/tests/test_datasource.py +99 -0
  124. maxframe/codegen/spe/tensor/tests/test_extensions.py +37 -0
  125. maxframe/codegen/spe/tensor/tests/test_indexing.py +44 -0
  126. maxframe/codegen/spe/tensor/tests/test_linalg.py +38 -0
  127. maxframe/codegen/spe/tensor/tests/test_merge.py +28 -0
  128. maxframe/codegen/spe/tensor/tests/test_misc.py +94 -0
  129. maxframe/codegen/spe/tensor/tests/test_random.py +55 -0
  130. maxframe/codegen/spe/tensor/tests/test_reduction.py +65 -0
  131. maxframe/codegen/spe/tensor/tests/test_reshape.py +39 -0
  132. maxframe/codegen/spe/tensor/tests/test_sort.py +49 -0
  133. maxframe/codegen/spe/tensor/tests/test_special.py +28 -0
  134. maxframe/codegen/spe/tensor/tests/test_statistics.py +29 -0
  135. maxframe/codegen/spe/tests/__init__.py +13 -0
  136. maxframe/codegen/spe/tests/test_remote.py +29 -0
  137. maxframe/codegen/spe/tests/test_spe_codegen.py +141 -0
  138. maxframe/codegen/spe/utils.py +54 -0
  139. maxframe/codegen/tests/__init__.py +13 -0
  140. maxframe/{tests → codegen/tests}/test_codegen.py +3 -5
  141. maxframe/config/__init__.py +1 -1
  142. maxframe/config/config.py +50 -23
  143. maxframe/config/tests/test_config.py +4 -12
  144. maxframe/config/validators.py +5 -0
  145. maxframe/conftest.py +38 -10
  146. maxframe/core/__init__.py +1 -0
  147. maxframe/core/context.py +110 -0
  148. maxframe/core/entity/__init__.py +1 -0
  149. maxframe/core/entity/core.py +0 -7
  150. maxframe/core/entity/objects.py +19 -5
  151. maxframe/core/entity/output_types.py +11 -0
  152. maxframe/core/entity/tests/test_objects.py +11 -12
  153. maxframe/core/entity/tileables.py +3 -1
  154. maxframe/core/entity/utils.py +15 -0
  155. maxframe/core/graph/__init__.py +6 -1
  156. maxframe/core/graph/builder/base.py +5 -1
  157. maxframe/core/graph/core.cp38-win32.pyd +0 -0
  158. maxframe/core/graph/core.pyx +17 -6
  159. maxframe/core/graph/entity.py +18 -6
  160. maxframe/core/operator/__init__.py +8 -3
  161. maxframe/core/operator/base.py +35 -12
  162. maxframe/core/operator/core.py +37 -14
  163. maxframe/core/operator/fetch.py +5 -18
  164. maxframe/core/operator/objects.py +0 -20
  165. maxframe/core/operator/shuffle.py +6 -72
  166. maxframe/dataframe/__init__.py +1 -0
  167. maxframe/dataframe/accessors/datetime_/core.py +7 -4
  168. maxframe/dataframe/accessors/string_/core.py +9 -6
  169. maxframe/dataframe/arithmetic/core.py +31 -20
  170. maxframe/dataframe/arithmetic/tests/test_arithmetic.py +6 -0
  171. maxframe/dataframe/core.py +98 -91
  172. maxframe/dataframe/datasource/core.py +8 -1
  173. maxframe/dataframe/datasource/date_range.py +8 -0
  174. maxframe/dataframe/datasource/from_index.py +9 -5
  175. maxframe/dataframe/datasource/from_records.py +9 -2
  176. maxframe/dataframe/datasource/from_tensor.py +32 -21
  177. maxframe/dataframe/datasource/read_csv.py +8 -2
  178. maxframe/dataframe/datasource/read_odps_query.py +33 -3
  179. maxframe/dataframe/datasource/read_odps_table.py +20 -5
  180. maxframe/dataframe/datasource/read_parquet.py +8 -3
  181. maxframe/dataframe/datasource/tests/test_datasource.py +33 -0
  182. maxframe/dataframe/datastore/tests/test_to_odps.py +52 -1
  183. maxframe/dataframe/datastore/to_csv.py +7 -3
  184. maxframe/dataframe/datastore/to_odps.py +42 -6
  185. maxframe/dataframe/extensions/__init__.py +6 -1
  186. maxframe/dataframe/extensions/apply_chunk.py +96 -136
  187. maxframe/dataframe/extensions/flatjson.py +3 -2
  188. maxframe/dataframe/extensions/flatmap.py +15 -7
  189. maxframe/dataframe/fetch/core.py +12 -1
  190. maxframe/dataframe/groupby/__init__.py +7 -0
  191. maxframe/dataframe/groupby/aggregation.py +62 -9
  192. maxframe/dataframe/groupby/apply.py +50 -74
  193. maxframe/dataframe/groupby/apply_chunk.py +393 -0
  194. maxframe/dataframe/groupby/core.py +80 -17
  195. maxframe/dataframe/groupby/extensions.py +26 -0
  196. maxframe/dataframe/groupby/fill.py +9 -4
  197. maxframe/dataframe/groupby/sample.py +7 -7
  198. maxframe/dataframe/groupby/tests/test_groupby.py +3 -3
  199. maxframe/dataframe/groupby/transform.py +57 -54
  200. maxframe/dataframe/indexing/align.py +7 -6
  201. maxframe/dataframe/indexing/getitem.py +9 -8
  202. maxframe/dataframe/indexing/iloc.py +28 -23
  203. maxframe/dataframe/indexing/insert.py +7 -3
  204. maxframe/dataframe/indexing/loc.py +9 -8
  205. maxframe/dataframe/indexing/reindex.py +36 -30
  206. maxframe/dataframe/indexing/rename_axis.py +18 -10
  207. maxframe/dataframe/indexing/reset_index.py +0 -2
  208. maxframe/dataframe/indexing/sample.py +13 -9
  209. maxframe/dataframe/indexing/set_axis.py +9 -6
  210. maxframe/dataframe/indexing/setitem.py +8 -5
  211. maxframe/dataframe/indexing/where.py +12 -9
  212. maxframe/dataframe/merge/__init__.py +0 -1
  213. maxframe/dataframe/merge/concat.py +10 -31
  214. maxframe/dataframe/merge/merge.py +2 -24
  215. maxframe/dataframe/misc/__init__.py +6 -0
  216. maxframe/dataframe/misc/_duplicate.py +7 -3
  217. maxframe/dataframe/misc/apply.py +106 -139
  218. maxframe/dataframe/misc/astype.py +3 -2
  219. maxframe/dataframe/misc/case_when.py +11 -7
  220. maxframe/dataframe/misc/cut.py +11 -10
  221. maxframe/dataframe/misc/describe.py +7 -3
  222. maxframe/dataframe/misc/drop.py +13 -11
  223. maxframe/dataframe/misc/eval.py +0 -2
  224. maxframe/dataframe/misc/get_dummies.py +78 -49
  225. maxframe/dataframe/misc/isin.py +13 -10
  226. maxframe/dataframe/misc/map.py +21 -6
  227. maxframe/dataframe/misc/melt.py +8 -1
  228. maxframe/dataframe/misc/pivot.py +232 -0
  229. maxframe/dataframe/misc/pivot_table.py +52 -40
  230. maxframe/dataframe/misc/rechunk.py +59 -0
  231. maxframe/dataframe/misc/shift.py +7 -4
  232. maxframe/dataframe/misc/stack.py +5 -3
  233. maxframe/dataframe/misc/tests/test_misc.py +167 -1
  234. maxframe/dataframe/misc/transform.py +63 -65
  235. maxframe/dataframe/misc/value_counts.py +7 -4
  236. maxframe/dataframe/missing/dropna.py +16 -7
  237. maxframe/dataframe/missing/fillna.py +18 -10
  238. maxframe/dataframe/missing/replace.py +10 -6
  239. maxframe/dataframe/missing/tests/test_missing.py +2 -2
  240. maxframe/dataframe/operators.py +1 -27
  241. maxframe/dataframe/reduction/aggregation.py +128 -3
  242. maxframe/dataframe/reduction/core.py +20 -6
  243. maxframe/dataframe/reduction/median.py +1 -1
  244. maxframe/dataframe/reduction/tests/test_reduction.py +33 -0
  245. maxframe/dataframe/reduction/unique.py +53 -7
  246. maxframe/dataframe/statistics/corr.py +9 -6
  247. maxframe/dataframe/statistics/quantile.py +9 -6
  248. maxframe/dataframe/tseries/to_datetime.py +6 -4
  249. maxframe/dataframe/utils.py +219 -31
  250. maxframe/dataframe/window/rolling.py +7 -4
  251. maxframe/env.py +1 -0
  252. maxframe/errors.py +9 -0
  253. maxframe/extension.py +13 -2
  254. maxframe/io/objects/core.py +67 -51
  255. maxframe/io/objects/tensor.py +73 -17
  256. maxframe/io/objects/tests/test_object_io.py +8 -55
  257. maxframe/io/odpsio/arrow.py +15 -2
  258. maxframe/io/odpsio/schema.py +43 -13
  259. maxframe/io/odpsio/tableio.py +63 -11
  260. maxframe/io/odpsio/tests/test_arrow.py +1 -2
  261. maxframe/io/odpsio/tests/test_schema.py +114 -1
  262. maxframe/io/odpsio/tests/test_tableio.py +42 -0
  263. maxframe/io/odpsio/tests/test_volumeio.py +22 -48
  264. maxframe/learn/__init__.py +2 -2
  265. maxframe/learn/contrib/__init__.py +2 -2
  266. maxframe/learn/contrib/graph/connected_components.py +2 -1
  267. maxframe/learn/contrib/lightgbm/__init__.py +33 -0
  268. maxframe/learn/contrib/lightgbm/_predict.py +138 -0
  269. maxframe/learn/contrib/lightgbm/_train.py +163 -0
  270. maxframe/learn/contrib/lightgbm/callback.py +114 -0
  271. maxframe/learn/contrib/lightgbm/classifier.py +199 -0
  272. maxframe/learn/contrib/lightgbm/core.py +372 -0
  273. maxframe/learn/contrib/lightgbm/dataset.py +153 -0
  274. maxframe/learn/contrib/lightgbm/regressor.py +29 -0
  275. maxframe/learn/contrib/lightgbm/tests/__init__.py +13 -0
  276. maxframe/learn/contrib/lightgbm/tests/test_callback.py +58 -0
  277. maxframe/learn/contrib/llm/models/dashscope.py +34 -0
  278. maxframe/learn/contrib/llm/models/managed.py +15 -0
  279. maxframe/learn/contrib/llm/multi_modal.py +92 -0
  280. maxframe/learn/contrib/llm/text.py +21 -5
  281. maxframe/learn/contrib/models.py +38 -9
  282. maxframe/learn/contrib/utils.py +55 -0
  283. maxframe/learn/contrib/xgboost/callback.py +86 -0
  284. maxframe/learn/contrib/xgboost/classifier.py +26 -30
  285. maxframe/learn/contrib/xgboost/core.py +53 -42
  286. maxframe/learn/contrib/xgboost/dmatrix.py +19 -12
  287. maxframe/learn/contrib/xgboost/predict.py +13 -8
  288. maxframe/learn/contrib/xgboost/regressor.py +28 -27
  289. maxframe/learn/contrib/xgboost/tests/test_callback.py +41 -0
  290. maxframe/learn/contrib/xgboost/train.py +59 -16
  291. maxframe/learn/core.py +252 -0
  292. maxframe/learn/datasets/__init__.py +20 -0
  293. maxframe/learn/datasets/samples_generator.py +628 -0
  294. maxframe/learn/linear_model/__init__.py +15 -0
  295. maxframe/learn/linear_model/_base.py +163 -0
  296. maxframe/learn/linear_model/_lin_reg.py +175 -0
  297. maxframe/learn/metrics/__init__.py +25 -0
  298. maxframe/learn/metrics/_check_targets.py +95 -0
  299. maxframe/learn/metrics/_classification.py +1121 -0
  300. maxframe/learn/metrics/_regression.py +256 -0
  301. maxframe/learn/model_selection/__init__.py +15 -0
  302. maxframe/learn/model_selection/_split.py +451 -0
  303. maxframe/learn/model_selection/tests/__init__.py +13 -0
  304. maxframe/learn/model_selection/tests/test_split.py +156 -0
  305. maxframe/learn/preprocessing/__init__.py +16 -0
  306. maxframe/learn/preprocessing/_data/__init__.py +17 -0
  307. maxframe/learn/preprocessing/_data/min_max_scaler.py +390 -0
  308. maxframe/learn/preprocessing/_data/normalize.py +127 -0
  309. maxframe/learn/preprocessing/_data/standard_scaler.py +503 -0
  310. maxframe/learn/preprocessing/_data/utils.py +79 -0
  311. maxframe/learn/preprocessing/_label/__init__.py +16 -0
  312. maxframe/learn/preprocessing/_label/_label_binarizer.py +599 -0
  313. maxframe/learn/preprocessing/_label/_label_encoder.py +174 -0
  314. maxframe/learn/utils/__init__.py +4 -0
  315. maxframe/learn/utils/_encode.py +314 -0
  316. maxframe/learn/utils/checks.py +161 -0
  317. maxframe/learn/utils/core.py +33 -0
  318. maxframe/learn/utils/extmath.py +176 -0
  319. maxframe/learn/utils/multiclass.py +292 -0
  320. maxframe/learn/utils/shuffle.py +114 -0
  321. maxframe/learn/utils/sparsefuncs.py +87 -0
  322. maxframe/learn/utils/validation.py +775 -0
  323. maxframe/lib/__init__.py +0 -2
  324. maxframe/lib/compat.py +145 -0
  325. maxframe/lib/filesystem/_oss_lib/glob.py +1 -1
  326. maxframe/lib/mmh3.cp38-win32.pyd +0 -0
  327. maxframe/lib/sparse/__init__.py +10 -15
  328. maxframe/lib/sparse/array.py +45 -33
  329. maxframe/lib/sparse/core.py +0 -2
  330. maxframe/lib/sparse/linalg.py +31 -0
  331. maxframe/lib/sparse/matrix.py +5 -2
  332. maxframe/lib/sparse/tests/__init__.py +0 -2
  333. maxframe/lib/sparse/tests/test_sparse.py +53 -53
  334. maxframe/lib/sparse/vector.py +0 -2
  335. maxframe/mixin.py +59 -2
  336. maxframe/opcodes.py +13 -5
  337. maxframe/protocol.py +67 -14
  338. maxframe/remote/core.py +16 -14
  339. maxframe/remote/run_script.py +6 -3
  340. maxframe/serialization/__init__.py +2 -0
  341. maxframe/serialization/core.cp38-win32.pyd +0 -0
  342. maxframe/serialization/core.pxd +3 -0
  343. maxframe/serialization/core.pyi +3 -1
  344. maxframe/serialization/core.pyx +82 -4
  345. maxframe/serialization/pandas.py +5 -1
  346. maxframe/serialization/serializables/core.py +6 -5
  347. maxframe/serialization/serializables/field.py +2 -2
  348. maxframe/serialization/serializables/tests/test_field_type.py +3 -5
  349. maxframe/serialization/tests/test_serial.py +27 -0
  350. maxframe/session.py +4 -71
  351. maxframe/sperunner.py +165 -0
  352. maxframe/tensor/__init__.py +35 -2
  353. maxframe/tensor/arithmetic/__init__.py +2 -4
  354. maxframe/tensor/arithmetic/abs.py +0 -2
  355. maxframe/tensor/arithmetic/absolute.py +0 -2
  356. maxframe/tensor/arithmetic/add.py +34 -4
  357. maxframe/tensor/arithmetic/angle.py +0 -2
  358. maxframe/tensor/arithmetic/arccos.py +1 -4
  359. maxframe/tensor/arithmetic/arccosh.py +1 -3
  360. maxframe/tensor/arithmetic/arcsin.py +0 -2
  361. maxframe/tensor/arithmetic/arcsinh.py +0 -2
  362. maxframe/tensor/arithmetic/arctan.py +0 -2
  363. maxframe/tensor/arithmetic/arctan2.py +0 -2
  364. maxframe/tensor/arithmetic/arctanh.py +0 -2
  365. maxframe/tensor/arithmetic/around.py +0 -2
  366. maxframe/tensor/arithmetic/bitand.py +0 -2
  367. maxframe/tensor/arithmetic/bitor.py +1 -3
  368. maxframe/tensor/arithmetic/bitxor.py +1 -3
  369. maxframe/tensor/arithmetic/cbrt.py +0 -2
  370. maxframe/tensor/arithmetic/ceil.py +0 -2
  371. maxframe/tensor/arithmetic/clip.py +13 -13
  372. maxframe/tensor/arithmetic/conj.py +0 -2
  373. maxframe/tensor/arithmetic/copysign.py +0 -2
  374. maxframe/tensor/arithmetic/core.py +47 -39
  375. maxframe/tensor/arithmetic/cos.py +1 -3
  376. maxframe/tensor/arithmetic/cosh.py +0 -2
  377. maxframe/tensor/arithmetic/deg2rad.py +0 -2
  378. maxframe/tensor/arithmetic/degrees.py +0 -2
  379. maxframe/tensor/arithmetic/divide.py +0 -2
  380. maxframe/tensor/arithmetic/equal.py +0 -2
  381. maxframe/tensor/arithmetic/exp.py +1 -3
  382. maxframe/tensor/arithmetic/exp2.py +0 -2
  383. maxframe/tensor/arithmetic/expm1.py +0 -2
  384. maxframe/tensor/arithmetic/fabs.py +0 -2
  385. maxframe/tensor/arithmetic/fix.py +0 -2
  386. maxframe/tensor/arithmetic/float_power.py +0 -2
  387. maxframe/tensor/arithmetic/floor.py +0 -2
  388. maxframe/tensor/arithmetic/floordiv.py +0 -2
  389. maxframe/tensor/arithmetic/fmax.py +0 -2
  390. maxframe/tensor/arithmetic/fmin.py +0 -2
  391. maxframe/tensor/arithmetic/fmod.py +0 -2
  392. maxframe/tensor/arithmetic/frexp.py +6 -2
  393. maxframe/tensor/arithmetic/greater.py +0 -2
  394. maxframe/tensor/arithmetic/greater_equal.py +0 -2
  395. maxframe/tensor/arithmetic/hypot.py +0 -2
  396. maxframe/tensor/arithmetic/i0.py +1 -3
  397. maxframe/tensor/arithmetic/imag.py +0 -2
  398. maxframe/tensor/arithmetic/invert.py +1 -3
  399. maxframe/tensor/arithmetic/isclose.py +0 -2
  400. maxframe/tensor/arithmetic/iscomplex.py +0 -2
  401. maxframe/tensor/arithmetic/isfinite.py +1 -3
  402. maxframe/tensor/arithmetic/isinf.py +0 -2
  403. maxframe/tensor/arithmetic/isnan.py +0 -2
  404. maxframe/tensor/arithmetic/isreal.py +0 -2
  405. maxframe/tensor/arithmetic/ldexp.py +0 -2
  406. maxframe/tensor/arithmetic/less.py +0 -2
  407. maxframe/tensor/arithmetic/less_equal.py +0 -2
  408. maxframe/tensor/arithmetic/log.py +1 -3
  409. maxframe/tensor/arithmetic/log10.py +1 -3
  410. maxframe/tensor/arithmetic/log1p.py +1 -3
  411. maxframe/tensor/arithmetic/log2.py +1 -3
  412. maxframe/tensor/arithmetic/logaddexp.py +0 -2
  413. maxframe/tensor/arithmetic/logaddexp2.py +0 -2
  414. maxframe/tensor/arithmetic/logical_and.py +0 -2
  415. maxframe/tensor/arithmetic/logical_not.py +1 -3
  416. maxframe/tensor/arithmetic/logical_or.py +0 -2
  417. maxframe/tensor/arithmetic/logical_xor.py +0 -2
  418. maxframe/tensor/arithmetic/lshift.py +0 -2
  419. maxframe/tensor/arithmetic/maximum.py +0 -2
  420. maxframe/tensor/arithmetic/minimum.py +0 -2
  421. maxframe/tensor/arithmetic/mod.py +0 -2
  422. maxframe/tensor/arithmetic/modf.py +6 -2
  423. maxframe/tensor/arithmetic/multiply.py +37 -4
  424. maxframe/tensor/arithmetic/nan_to_num.py +0 -2
  425. maxframe/tensor/arithmetic/negative.py +0 -2
  426. maxframe/tensor/arithmetic/nextafter.py +0 -2
  427. maxframe/tensor/arithmetic/not_equal.py +0 -2
  428. maxframe/tensor/arithmetic/positive.py +0 -2
  429. maxframe/tensor/arithmetic/power.py +0 -2
  430. maxframe/tensor/arithmetic/rad2deg.py +0 -2
  431. maxframe/tensor/arithmetic/radians.py +0 -2
  432. maxframe/tensor/arithmetic/real.py +0 -2
  433. maxframe/tensor/arithmetic/reciprocal.py +5 -3
  434. maxframe/tensor/arithmetic/rint.py +1 -3
  435. maxframe/tensor/arithmetic/rshift.py +0 -2
  436. maxframe/tensor/arithmetic/setimag.py +0 -2
  437. maxframe/tensor/arithmetic/setreal.py +0 -2
  438. maxframe/tensor/arithmetic/sign.py +0 -2
  439. maxframe/tensor/arithmetic/signbit.py +0 -2
  440. maxframe/tensor/arithmetic/sin.py +0 -2
  441. maxframe/tensor/arithmetic/sinc.py +1 -3
  442. maxframe/tensor/arithmetic/sinh.py +0 -2
  443. maxframe/tensor/arithmetic/spacing.py +0 -2
  444. maxframe/tensor/arithmetic/sqrt.py +0 -2
  445. maxframe/tensor/arithmetic/square.py +0 -2
  446. maxframe/tensor/arithmetic/subtract.py +4 -2
  447. maxframe/tensor/arithmetic/tan.py +0 -2
  448. maxframe/tensor/arithmetic/tanh.py +0 -2
  449. maxframe/tensor/arithmetic/tests/__init__.py +0 -2
  450. maxframe/tensor/arithmetic/tests/test_arithmetic.py +43 -9
  451. maxframe/tensor/arithmetic/truediv.py +0 -2
  452. maxframe/tensor/arithmetic/trunc.py +0 -2
  453. maxframe/tensor/arithmetic/utils.py +32 -6
  454. maxframe/tensor/array_utils.py +3 -25
  455. maxframe/tensor/core.py +6 -6
  456. maxframe/tensor/datasource/__init__.py +10 -2
  457. maxframe/tensor/datasource/arange.py +0 -2
  458. maxframe/tensor/datasource/array.py +3 -22
  459. maxframe/tensor/datasource/core.py +15 -10
  460. maxframe/tensor/datasource/diag.py +140 -0
  461. maxframe/tensor/datasource/diagflat.py +69 -0
  462. maxframe/tensor/datasource/empty.py +0 -2
  463. maxframe/tensor/datasource/eye.py +95 -0
  464. maxframe/tensor/datasource/from_dataframe.py +0 -2
  465. maxframe/tensor/datasource/from_dense.py +0 -17
  466. maxframe/tensor/datasource/from_sparse.py +0 -2
  467. maxframe/tensor/datasource/full.py +0 -2
  468. maxframe/tensor/datasource/identity.py +54 -0
  469. maxframe/tensor/datasource/indices.py +115 -0
  470. maxframe/tensor/datasource/linspace.py +140 -0
  471. maxframe/tensor/datasource/meshgrid.py +135 -0
  472. maxframe/tensor/datasource/ones.py +8 -3
  473. maxframe/tensor/datasource/tests/test_datasource.py +32 -1
  474. maxframe/tensor/datasource/tri_array.py +107 -0
  475. maxframe/tensor/datasource/zeros.py +7 -3
  476. maxframe/tensor/extensions/__init__.py +31 -0
  477. maxframe/tensor/extensions/accessor.py +25 -0
  478. maxframe/tensor/extensions/apply_chunk.py +137 -0
  479. maxframe/tensor/indexing/__init__.py +1 -1
  480. maxframe/tensor/indexing/choose.py +8 -6
  481. maxframe/tensor/indexing/compress.py +0 -2
  482. maxframe/tensor/indexing/extract.py +0 -2
  483. maxframe/tensor/indexing/fill_diagonal.py +9 -6
  484. maxframe/tensor/indexing/flatnonzero.py +1 -3
  485. maxframe/tensor/indexing/getitem.py +10 -43
  486. maxframe/tensor/indexing/nonzero.py +2 -4
  487. maxframe/tensor/indexing/setitem.py +19 -9
  488. maxframe/tensor/indexing/slice.py +6 -3
  489. maxframe/tensor/indexing/take.py +0 -2
  490. maxframe/tensor/indexing/tests/__init__.py +0 -2
  491. maxframe/tensor/indexing/tests/test_indexing.py +0 -2
  492. maxframe/tensor/indexing/unravel_index.py +6 -6
  493. maxframe/tensor/lib/__init__.py +16 -0
  494. maxframe/tensor/lib/index_tricks.py +404 -0
  495. maxframe/tensor/linalg/__init__.py +36 -0
  496. maxframe/tensor/linalg/dot.py +145 -0
  497. maxframe/tensor/linalg/inner.py +36 -0
  498. maxframe/tensor/linalg/inv.py +83 -0
  499. maxframe/tensor/linalg/lu.py +115 -0
  500. maxframe/tensor/linalg/matmul.py +225 -0
  501. maxframe/tensor/linalg/qr.py +124 -0
  502. maxframe/tensor/linalg/solve_triangular.py +103 -0
  503. maxframe/tensor/linalg/svd.py +167 -0
  504. maxframe/tensor/linalg/tensordot.py +213 -0
  505. maxframe/tensor/linalg/vdot.py +73 -0
  506. maxframe/tensor/merge/__init__.py +4 -0
  507. maxframe/tensor/merge/append.py +74 -0
  508. maxframe/tensor/merge/column_stack.py +63 -0
  509. maxframe/tensor/merge/concatenate.py +3 -2
  510. maxframe/tensor/merge/dstack.py +71 -0
  511. maxframe/tensor/merge/hstack.py +70 -0
  512. maxframe/tensor/merge/stack.py +0 -2
  513. maxframe/tensor/merge/tests/test_merge.py +0 -2
  514. maxframe/tensor/misc/__init__.py +18 -5
  515. maxframe/tensor/misc/astype.py +10 -8
  516. maxframe/tensor/misc/broadcast_to.py +1 -1
  517. maxframe/tensor/misc/copy.py +64 -0
  518. maxframe/tensor/misc/diff.py +115 -0
  519. maxframe/tensor/misc/flatten.py +63 -0
  520. maxframe/tensor/misc/in1d.py +94 -0
  521. maxframe/tensor/misc/isin.py +130 -0
  522. maxframe/tensor/misc/ndim.py +53 -0
  523. maxframe/tensor/misc/ravel.py +0 -2
  524. maxframe/tensor/misc/repeat.py +129 -0
  525. maxframe/tensor/misc/searchsorted.py +147 -0
  526. maxframe/tensor/misc/setdiff1d.py +58 -0
  527. maxframe/tensor/misc/squeeze.py +117 -0
  528. maxframe/tensor/misc/swapaxes.py +113 -0
  529. maxframe/tensor/misc/tests/test_misc.py +0 -2
  530. maxframe/tensor/misc/transpose.py +8 -4
  531. maxframe/tensor/misc/trapezoid.py +123 -0
  532. maxframe/tensor/misc/unique.py +0 -1
  533. maxframe/tensor/misc/where.py +10 -8
  534. maxframe/tensor/operators.py +0 -34
  535. maxframe/tensor/random/__init__.py +3 -5
  536. maxframe/tensor/random/binomial.py +0 -2
  537. maxframe/tensor/random/bytes.py +0 -2
  538. maxframe/tensor/random/chisquare.py +0 -2
  539. maxframe/tensor/random/choice.py +9 -8
  540. maxframe/tensor/random/core.py +20 -5
  541. maxframe/tensor/random/dirichlet.py +0 -2
  542. maxframe/tensor/random/exponential.py +0 -2
  543. maxframe/tensor/random/f.py +2 -4
  544. maxframe/tensor/random/gamma.py +0 -2
  545. maxframe/tensor/random/geometric.py +0 -2
  546. maxframe/tensor/random/gumbel.py +0 -2
  547. maxframe/tensor/random/hypergeometric.py +0 -2
  548. maxframe/tensor/random/laplace.py +2 -4
  549. maxframe/tensor/random/logistic.py +0 -2
  550. maxframe/tensor/random/lognormal.py +0 -2
  551. maxframe/tensor/random/logseries.py +0 -2
  552. maxframe/tensor/random/multinomial.py +0 -2
  553. maxframe/tensor/random/multivariate_normal.py +0 -2
  554. maxframe/tensor/random/negative_binomial.py +0 -2
  555. maxframe/tensor/random/noncentral_chisquare.py +0 -2
  556. maxframe/tensor/random/noncentral_f.py +1 -3
  557. maxframe/tensor/random/normal.py +0 -2
  558. maxframe/tensor/random/pareto.py +0 -2
  559. maxframe/tensor/random/permutation.py +6 -3
  560. maxframe/tensor/random/poisson.py +0 -2
  561. maxframe/tensor/random/power.py +0 -2
  562. maxframe/tensor/random/rand.py +0 -2
  563. maxframe/tensor/random/randint.py +0 -2
  564. maxframe/tensor/random/randn.py +0 -2
  565. maxframe/tensor/random/random_integers.py +0 -2
  566. maxframe/tensor/random/random_sample.py +0 -2
  567. maxframe/tensor/random/rayleigh.py +0 -2
  568. maxframe/tensor/random/standard_cauchy.py +0 -2
  569. maxframe/tensor/random/standard_exponential.py +0 -2
  570. maxframe/tensor/random/standard_gamma.py +0 -2
  571. maxframe/tensor/random/standard_normal.py +0 -2
  572. maxframe/tensor/random/standard_t.py +0 -2
  573. maxframe/tensor/random/tests/__init__.py +0 -2
  574. maxframe/tensor/random/tests/test_random.py +0 -2
  575. maxframe/tensor/random/triangular.py +0 -2
  576. maxframe/tensor/random/uniform.py +0 -2
  577. maxframe/tensor/random/vonmises.py +0 -2
  578. maxframe/tensor/random/wald.py +0 -2
  579. maxframe/tensor/random/weibull.py +0 -2
  580. maxframe/tensor/random/zipf.py +0 -2
  581. maxframe/tensor/reduction/__init__.py +0 -2
  582. maxframe/tensor/reduction/all.py +0 -2
  583. maxframe/tensor/reduction/allclose.py +0 -2
  584. maxframe/tensor/reduction/any.py +0 -2
  585. maxframe/tensor/reduction/argmax.py +1 -3
  586. maxframe/tensor/reduction/argmin.py +1 -3
  587. maxframe/tensor/reduction/array_equal.py +0 -2
  588. maxframe/tensor/reduction/core.py +0 -2
  589. maxframe/tensor/reduction/count_nonzero.py +0 -2
  590. maxframe/tensor/reduction/cumprod.py +0 -2
  591. maxframe/tensor/reduction/cumsum.py +0 -2
  592. maxframe/tensor/reduction/max.py +0 -2
  593. maxframe/tensor/reduction/mean.py +0 -2
  594. maxframe/tensor/reduction/min.py +0 -2
  595. maxframe/tensor/reduction/nanargmax.py +0 -2
  596. maxframe/tensor/reduction/nanargmin.py +0 -2
  597. maxframe/tensor/reduction/nancumprod.py +0 -2
  598. maxframe/tensor/reduction/nancumsum.py +0 -2
  599. maxframe/tensor/reduction/nanmax.py +0 -2
  600. maxframe/tensor/reduction/nanmean.py +0 -2
  601. maxframe/tensor/reduction/nanmin.py +0 -2
  602. maxframe/tensor/reduction/nanprod.py +0 -2
  603. maxframe/tensor/reduction/nanstd.py +0 -2
  604. maxframe/tensor/reduction/nansum.py +0 -2
  605. maxframe/tensor/reduction/nanvar.py +0 -2
  606. maxframe/tensor/reduction/prod.py +0 -2
  607. maxframe/tensor/reduction/std.py +0 -2
  608. maxframe/tensor/reduction/sum.py +0 -2
  609. maxframe/tensor/reduction/tests/test_reduction.py +1 -4
  610. maxframe/tensor/reduction/var.py +0 -2
  611. maxframe/tensor/reshape/__init__.py +0 -2
  612. maxframe/tensor/reshape/reshape.py +6 -5
  613. maxframe/tensor/reshape/tests/__init__.py +0 -2
  614. maxframe/tensor/reshape/tests/test_reshape.py +0 -2
  615. maxframe/tensor/sort/__init__.py +16 -0
  616. maxframe/tensor/sort/argsort.py +150 -0
  617. maxframe/tensor/sort/sort.py +295 -0
  618. maxframe/tensor/special/__init__.py +37 -0
  619. maxframe/tensor/special/core.py +38 -0
  620. maxframe/tensor/special/misc.py +142 -0
  621. maxframe/tensor/special/statistical.py +56 -0
  622. maxframe/tensor/statistics/__init__.py +5 -0
  623. maxframe/tensor/statistics/average.py +143 -0
  624. maxframe/tensor/statistics/bincount.py +133 -0
  625. maxframe/tensor/statistics/quantile.py +10 -8
  626. maxframe/tensor/ufunc/__init__.py +0 -2
  627. maxframe/tensor/ufunc/ufunc.py +0 -2
  628. maxframe/tensor/utils.py +21 -3
  629. maxframe/tests/test_protocol.py +3 -3
  630. maxframe/tests/test_utils.py +210 -1
  631. maxframe/tests/utils.py +67 -1
  632. maxframe/udf.py +76 -6
  633. maxframe/utils.py +418 -17
  634. {maxframe-1.3.0.dist-info → maxframe-2.0.0b1.dist-info}/METADATA +5 -1
  635. maxframe-2.0.0b1.dist-info/RECORD +939 -0
  636. maxframe_client/clients/framedriver.py +19 -3
  637. maxframe_client/fetcher.py +113 -6
  638. maxframe_client/session/odps.py +173 -38
  639. maxframe_client/session/task.py +3 -1
  640. maxframe_client/tests/test_session.py +41 -5
  641. maxframe-1.3.0.dist-info/RECORD +0 -705
  642. {maxframe-1.3.0.dist-info → maxframe-2.0.0b1.dist-info}/WHEEL +0 -0
  643. {maxframe-1.3.0.dist-info → maxframe-2.0.0b1.dist-info}/top_level.txt +0 -0
@@ -12,8 +12,9 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
+ from typing import MutableMapping, Union
16
+
15
17
  import numpy as np
16
- import pandas as pd
17
18
 
18
19
  from ... import opcodes
19
20
  from ...core import OutputType
@@ -26,12 +27,13 @@ from ...serialization.serializables import (
26
27
  StringField,
27
28
  TupleField,
28
29
  )
29
- from ...utils import get_func_token, quiet_stdio, tokenize
30
+ from ...udf import BuiltinFunction, MarkedFunction
31
+ from ...utils import copy_if_possible, get_func_token, make_dtype, make_dtypes, tokenize
30
32
  from ..operators import DataFrameOperator, DataFrameOperatorMixin
31
33
  from ..utils import (
34
+ InferredDataFrameMeta,
32
35
  copy_func_scheduling_hints,
33
- make_dtype,
34
- make_dtypes,
36
+ infer_dataframe_return_value,
35
37
  parse_index,
36
38
  validate_output_types,
37
39
  )
@@ -56,6 +58,7 @@ class GroupByApply(
56
58
  args = TupleField("args", default_factory=tuple)
57
59
  kwds = DictField("kwds", default_factory=dict)
58
60
  maybe_agg = BoolField("maybe_agg", default=None)
61
+
59
62
  logic_key = StringField("logic_key", default=None)
60
63
  func_key = AnyField("func_key", default=None)
61
64
  need_clean_up_func = BoolField("need_clean_up_func", default=False)
@@ -65,6 +68,9 @@ class GroupByApply(
65
68
  if hasattr(self, "func"):
66
69
  copy_func_scheduling_hints(self.func, self)
67
70
 
71
+ def has_custom_code(self) -> bool:
72
+ return not isinstance(self.func, BuiltinFunction)
73
+
68
74
  def _update_key(self):
69
75
  values = [v for v in self._values_ if v is not self.func] + [
70
76
  get_func_token(self.func)
@@ -73,96 +79,66 @@ class GroupByApply(
73
79
  return self
74
80
 
75
81
  def _infer_df_func_returns(
76
- self, in_groupby, in_df, dtypes=None, dtype=None, name=None, index=None
77
- ):
78
- index_value, output_type, new_dtypes = None, None, None
79
-
80
- if self.output_types is not None and (dtypes is not None or dtype is not None):
81
- ret_dtypes = dtypes if dtypes is not None else (dtype, name)
82
- ret_index_value = parse_index(index) if index is not None else None
83
- return ret_dtypes, ret_index_value
84
-
85
- try:
86
- infer_df = in_groupby.op.build_mock_groupby().apply(
87
- self.func, *self.args, **self.kwds
88
- )
89
-
90
- if len(infer_df) <= 2:
91
- # we create mock df with 4 rows, 2 groups
92
- # if return df has 2 rows, we assume that
93
- # it's an aggregation operation
94
- self.maybe_agg = True
95
-
96
- # todo return proper index when sort=True is implemented
97
- index_value = parse_index(infer_df.index[:0], in_df.key, self.func)
98
-
99
- # for backward compatibility
100
- dtype = dtype if dtype is not None else dtypes
101
- if isinstance(infer_df, pd.DataFrame):
102
- output_type = output_type or OutputType.dataframe
103
- new_dtypes = new_dtypes or infer_df.dtypes
104
- elif isinstance(infer_df, pd.Series):
105
- output_type = output_type or OutputType.series
106
- new_dtypes = new_dtypes or (
107
- name or infer_df.name,
108
- dtype or infer_df.dtype,
109
- )
110
- else:
111
- output_type = OutputType.series
112
- new_dtypes = (name, dtype or pd.Series(infer_df).dtype)
113
- except: # noqa: E722 # nosec
114
- pass
115
-
82
+ self, in_groupby, dtypes=None, dtype=None, name=None, index=None
83
+ ) -> InferredDataFrameMeta:
84
+ def infer_func(groupby_obj):
85
+ args = copy_if_possible(self.args)
86
+ kwds = copy_if_possible(self.kwds)
87
+ return groupby_obj.apply(self.func, *args, **kwds)
88
+
89
+ output_type = self.output_types[0] if self.output_types else None
90
+ inferred_meta = infer_dataframe_return_value(
91
+ in_groupby,
92
+ infer_func,
93
+ dtypes=dtypes,
94
+ dtype=dtype,
95
+ name=name,
96
+ index=index,
97
+ output_type=output_type,
98
+ )
116
99
  self.output_types = (
117
- [output_type]
118
- if not self.output_types and output_type
100
+ [inferred_meta.output_type]
101
+ if not self.output_types and inferred_meta.output_type
119
102
  else self.output_types
120
103
  )
121
- dtypes = new_dtypes if dtypes is None else dtypes
122
- index_value = index_value if index is None else parse_index(index)
123
- return dtypes, index_value
104
+ self.maybe_agg = inferred_meta.maybe_agg
105
+ return inferred_meta
124
106
 
125
107
  def __call__(self, groupby, dtypes=None, dtype=None, name=None, index=None):
126
- in_df = groupby
127
108
  if self.output_types and self.output_types[0] == OutputType.df_or_series:
128
109
  return self.new_df_or_series([groupby])
129
- while in_df.op.output_types[0] not in (OutputType.dataframe, OutputType.series):
130
- in_df = in_df.inputs[0]
131
-
132
- with quiet_stdio():
133
- dtypes, index_value = self._infer_df_func_returns(
134
- groupby, in_df, dtypes, dtype=dtype, name=name, index=index
135
- )
136
- if index_value is None:
137
- index_value = parse_index(None, (in_df.key, in_df.index_value.key))
138
- for arg, desc in zip((self.output_types, dtypes), ("output_types", "dtypes")):
139
- if arg is None:
140
- raise TypeError(
141
- f"Cannot determine {desc} by calculating with enumerate data, "
142
- "please specify it as arguments"
143
- )
144
110
 
111
+ inferred_meta = self._infer_df_func_returns(
112
+ groupby, dtypes=dtypes, dtype=dtype, name=name, index=index
113
+ )
114
+ inferred_meta.check_absence("output_type", "dtypes", "dtype")
145
115
  if self.output_types[0] == OutputType.dataframe:
146
- new_shape = (np.nan, len(dtypes))
116
+ new_shape = (np.nan, len(inferred_meta.dtypes))
147
117
  return self.new_dataframe(
148
118
  [groupby],
149
119
  shape=new_shape,
150
- dtypes=dtypes,
151
- index_value=index_value,
152
- columns_value=parse_index(dtypes.index, store_data=True),
120
+ dtypes=inferred_meta.dtypes,
121
+ index_value=inferred_meta.index_value,
122
+ columns_value=parse_index(inferred_meta.dtypes.index, store_data=True),
153
123
  )
154
124
  else:
155
- name = name or dtypes[0]
156
- dtype = dtype or dtypes[1]
157
125
  new_shape = (np.nan,)
158
126
  return self.new_series(
159
127
  [groupby],
160
- name=name,
128
+ name=inferred_meta.name,
161
129
  shape=new_shape,
162
- dtype=dtype,
163
- index_value=index_value,
130
+ dtype=inferred_meta.dtype,
131
+ index_value=inferred_meta.index_value,
164
132
  )
165
133
 
134
+ @classmethod
135
+ def estimate_size(
136
+ cls, ctx: MutableMapping[str, Union[int, float]], op: "GroupByApply"
137
+ ) -> None:
138
+ if isinstance(op.func, MarkedFunction):
139
+ ctx[op.outputs[0].key] = float("inf")
140
+ super().estimate_size(ctx, op)
141
+
166
142
 
167
143
  def groupby_apply(
168
144
  groupby,
@@ -0,0 +1,393 @@
1
+ # Copyright 1999-2025 Alibaba Group Holding Ltd.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ from typing import Any, Callable, Dict, List, MutableMapping, Tuple, Union
16
+
17
+ import numpy as np
18
+ import pandas as pd
19
+
20
+ from ... import opcodes
21
+ from ...core import OutputType
22
+ from ...lib.version import parse as parse_version
23
+ from ...serialization.serializables import (
24
+ DictField,
25
+ FunctionField,
26
+ Int32Field,
27
+ TupleField,
28
+ )
29
+ from ...udf import BuiltinFunction, MarkedFunction
30
+ from ...utils import copy_if_possible
31
+ from ..core import (
32
+ DATAFRAME_GROUPBY_TYPE,
33
+ GROUPBY_TYPE,
34
+ DataFrameGroupBy,
35
+ IndexValue,
36
+ SeriesGroupBy,
37
+ )
38
+ from ..operators import DataFrameOperator, DataFrameOperatorMixin
39
+ from ..utils import (
40
+ InferredDataFrameMeta,
41
+ build_empty_df,
42
+ copy_func_scheduling_hints,
43
+ infer_dataframe_return_value,
44
+ make_column_list,
45
+ make_dtype,
46
+ make_dtypes,
47
+ parse_index,
48
+ validate_output_types,
49
+ )
50
+
51
+ _need_enforce_group_keys = parse_version(pd.__version__) < parse_version("1.5.0")
52
+
53
+
54
+ class GroupByApplyChunk(DataFrameOperatorMixin, DataFrameOperator):
55
+ _op_type_ = opcodes.APPLY_CHUNK
56
+ _op_module_ = "dataframe.groupby"
57
+
58
+ func = FunctionField("func")
59
+ batch_rows = Int32Field("batch_rows", default=None)
60
+ args = TupleField("args", default=None)
61
+ kwargs = DictField("kwargs", default=None)
62
+
63
+ groupby_params = DictField("groupby_params", default=None)
64
+
65
+ def __init__(self, output_type=None, **kw):
66
+ if output_type:
67
+ kw["_output_types"] = [output_type]
68
+ super().__init__(**kw)
69
+ if hasattr(self, "func"):
70
+ copy_func_scheduling_hints(self.func, self)
71
+
72
+ def has_custom_code(self) -> bool:
73
+ return not isinstance(self.func, BuiltinFunction)
74
+
75
+ def _call_dataframe(self, df, dtypes, dtype, name, index_value, element_wise):
76
+ # return dataframe
77
+ if self.output_types[0] == OutputType.dataframe:
78
+ dtypes = make_dtypes(dtypes)
79
+ # apply_chunk will use generate new range index for results
80
+ return self.new_dataframe(
81
+ [df],
82
+ shape=df.shape if element_wise else (np.nan, len(dtypes)),
83
+ index_value=index_value,
84
+ columns_value=parse_index(dtypes.index, store_data=True),
85
+ dtypes=dtypes,
86
+ )
87
+
88
+ # return series
89
+ return self.new_series(
90
+ [df], shape=(np.nan,), name=name, dtype=dtype, index_value=index_value
91
+ )
92
+
93
+ def _call_series(self, series, dtypes, dtype, name, index_value, element_wise):
94
+ if self.output_types[0] == OutputType.series:
95
+ shape = series.shape if element_wise else (np.nan,)
96
+ return self.new_series(
97
+ [series],
98
+ dtype=dtype,
99
+ shape=shape,
100
+ index_value=index_value,
101
+ name=name,
102
+ )
103
+
104
+ dtypes = make_dtypes(dtypes)
105
+ return self.new_dataframe(
106
+ [series],
107
+ shape=(np.nan, len(dtypes)),
108
+ index_value=index_value,
109
+ columns_value=parse_index(dtypes.index, store_data=True),
110
+ dtypes=dtypes,
111
+ )
112
+
113
+ def __call__(
114
+ self,
115
+ groupby: Union[DataFrameGroupBy, SeriesGroupBy],
116
+ dtypes: Union[Tuple[str, Any], Dict[str, Any]] = None,
117
+ dtype: Any = None,
118
+ name: Any = None,
119
+ output_type=None,
120
+ index=None,
121
+ ):
122
+ input_df = groupby.inputs[0]
123
+ if isinstance(input_df, GROUPBY_TYPE):
124
+ input_df = input_df.inputs[0]
125
+
126
+ # if skip_infer, directly build a frame
127
+ if self.output_types and self.output_types[0] == OutputType.df_or_series:
128
+ return self.new_df_or_series([input_df])
129
+
130
+ # infer return index and dtypes
131
+ inferred_meta = self._infer_batch_func_returns(
132
+ groupby,
133
+ output_type=output_type,
134
+ dtypes=dtypes,
135
+ dtype=dtype,
136
+ name=name,
137
+ index=index,
138
+ )
139
+
140
+ if inferred_meta.index_value is None:
141
+ inferred_meta.index_value = parse_index(
142
+ None, (groupby.key, groupby.index_value.key, self.func)
143
+ )
144
+ inferred_meta.check_absence("output_type", "dtypes", "dtype")
145
+
146
+ if isinstance(groupby, DATAFRAME_GROUPBY_TYPE):
147
+ return self._call_dataframe(
148
+ input_df,
149
+ dtypes=inferred_meta.dtypes,
150
+ dtype=inferred_meta.dtype,
151
+ name=inferred_meta.name,
152
+ index_value=inferred_meta.index_value,
153
+ element_wise=inferred_meta.elementwise,
154
+ )
155
+
156
+ return self._call_series(
157
+ input_df,
158
+ dtypes=inferred_meta.dtypes,
159
+ dtype=inferred_meta.dtype,
160
+ name=inferred_meta.name,
161
+ index_value=inferred_meta.index_value,
162
+ element_wise=inferred_meta.elementwise,
163
+ )
164
+
165
+ def _infer_batch_func_returns(
166
+ self,
167
+ input_groupby: Union[DataFrameGroupBy, SeriesGroupBy],
168
+ output_type: OutputType,
169
+ dtypes: Union[pd.Series, List[Any], Dict[str, Any]] = None,
170
+ dtype: Any = None,
171
+ name: Any = None,
172
+ index: Union[pd.Index, IndexValue] = None,
173
+ elementwise: bool = None,
174
+ ) -> InferredDataFrameMeta:
175
+ def infer_func(groupby_obj):
176
+ args = copy_if_possible(self.args or ())
177
+ kwargs = copy_if_possible(self.kwargs or {})
178
+
179
+ in_obj = input_groupby
180
+ while isinstance(in_obj, GROUPBY_TYPE):
181
+ in_obj = in_obj.inputs[0]
182
+
183
+ by_cols = make_column_list(groupby_params.get("by"), in_obj.dtypes) or []
184
+ if not groupby_params.get("selection"):
185
+ selection = [
186
+ c for c in input_groupby.inputs[0].dtypes.index if c not in by_cols
187
+ ]
188
+ groupby_obj = groupby_obj[selection]
189
+ res = groupby_obj.apply(self.func, *args, **kwargs)
190
+ if _need_enforce_group_keys and groupby_params.get("group_keys"):
191
+ by_levels = (
192
+ make_column_list(groupby_params.get("level"), in_obj.index.names)
193
+ or []
194
+ )
195
+
196
+ input_df = input_groupby
197
+ while isinstance(input_df, GROUPBY_TYPE):
198
+ input_df = input_df.inputs[0]
199
+
200
+ idx_df = res.index.to_frame()
201
+ if by_cols:
202
+ idx_names = by_cols + list(res.index.names)
203
+ mock_idx_df = build_empty_df(
204
+ input_df.dtypes[by_cols], index=idx_df.index
205
+ )
206
+ else:
207
+ idx_names = by_levels + list(res.index.names)
208
+ if len(in_obj.index.names) > 1:
209
+ idx_dtypes = in_obj.index_value.value.dtypes
210
+ else:
211
+ idx_dtypes = pd.Series(
212
+ [in_obj.index.dtype], index=[in_obj.index.name]
213
+ )
214
+ mock_idx_df = build_empty_df(
215
+ idx_dtypes[by_levels], index=idx_df.index
216
+ )
217
+ idx_df = pd.concat([mock_idx_df, idx_df], axis=1)
218
+ res.index = pd.MultiIndex.from_frame(idx_df, names=idx_names)
219
+ return res
220
+
221
+ groupby_params = input_groupby.op.groupby_params
222
+ inferred_meta = infer_dataframe_return_value(
223
+ input_groupby,
224
+ infer_func,
225
+ output_type=output_type,
226
+ dtypes=dtypes,
227
+ dtype=dtype,
228
+ name=name,
229
+ index=index,
230
+ elementwise=elementwise,
231
+ )
232
+
233
+ # merge specified and inferred index, dtypes, output_type
234
+ # elementwise used to decide shape
235
+ self.output_types = (
236
+ [inferred_meta.output_type]
237
+ if not self.output_types and inferred_meta.output_type
238
+ else self.output_types
239
+ )
240
+ if self.output_types:
241
+ inferred_meta.output_type = self.output_types[0]
242
+ inferred_meta.dtypes = dtypes if dtypes is not None else inferred_meta.dtypes
243
+ if index is not None:
244
+ inferred_meta.index_value = (
245
+ parse_index(index)
246
+ if index is not input_groupby.index_value
247
+ else input_groupby.index_value
248
+ )
249
+ else:
250
+ inferred_meta.index_value = inferred_meta.index_value
251
+ inferred_meta.elementwise = elementwise or inferred_meta.elementwise
252
+ return inferred_meta
253
+
254
+ @classmethod
255
+ def estimate_size(
256
+ cls,
257
+ ctx: MutableMapping[str, Union[int, float]],
258
+ op: "GroupByApplyChunk",
259
+ ) -> None:
260
+ if isinstance(op.func, MarkedFunction):
261
+ ctx[op.outputs[0].key] = float("inf")
262
+ super().estimate_size(ctx, op)
263
+
264
+
265
+ def df_groupby_apply_chunk(
266
+ dataframe_groupby,
267
+ func: Union[str, Callable],
268
+ batch_rows=None,
269
+ dtypes=None,
270
+ dtype=None,
271
+ name=None,
272
+ output_type=None,
273
+ index=None,
274
+ skip_infer=False,
275
+ args=(),
276
+ **kwargs,
277
+ ):
278
+ """
279
+ Apply function `func` group-wise and combine the results together.
280
+ The pandas DataFrame given to the function is a chunk of the input
281
+ dataframe, consider as a batch rows.
282
+
283
+ The function passed to `apply` must take a dataframe as its first
284
+ argument and return a DataFrame, Series or scalar. `apply` will
285
+ then take care of combining the results back together into a single
286
+ dataframe or series. `apply` is therefore a highly flexible
287
+ grouping method.
288
+
289
+ Don't expect to receive all rows of the DataFrame in the function,
290
+ as it depends on the implementation of MaxFrame and the internal
291
+ running state of MaxCompute.
292
+
293
+ Parameters
294
+ ----------
295
+ func : callable
296
+ A callable that takes a dataframe as its first argument, and
297
+ returns a dataframe, a series or a scalar. In addition the
298
+ callable may take positional and keyword arguments.
299
+
300
+ batch_rows : int
301
+ Specify expected number of rows in a batch, as well as the len of
302
+ function input dataframe. When the remaining data is insufficient,
303
+ it may be less than this number.
304
+
305
+ output_type : {'dataframe', 'series'}, default None
306
+ Specify type of returned object. See `Notes` for more details.
307
+
308
+ dtypes : Series, default None
309
+ Specify dtypes of returned DataFrames. See `Notes` for more details.
310
+
311
+ dtype : numpy.dtype, default None
312
+ Specify dtype of returned Series. See `Notes` for more details.
313
+
314
+ name : str, default None
315
+ Specify name of returned Series. See `Notes` for more details.
316
+
317
+ index : Index, default None
318
+ Specify index of returned object. See `Notes` for more details.
319
+
320
+ skip_infer: bool, default False
321
+ Whether infer dtypes when dtypes or output_type is not specified.
322
+
323
+ args, kwargs : tuple and dict
324
+ Optional positional and keyword arguments to pass to `func`.
325
+
326
+ Returns
327
+ -------
328
+ applied : Series or DataFrame
329
+
330
+ See Also
331
+ --------
332
+ Series.apply : Apply a function to a Series.
333
+ DataFrame.apply : Apply a function to each row or column of a DataFrame.
334
+ DataFrame.mf.apply_chunk : Apply a function to row batches of a DataFrame.
335
+
336
+ Notes
337
+ -----
338
+ When deciding output dtypes and shape of the return value, MaxFrame will
339
+ try applying ``func`` onto a mock grouped object, and the apply call
340
+ may fail. When this happens, you need to specify the type of apply
341
+ call (DataFrame or Series) in output_type.
342
+
343
+ * For DataFrame output, you need to specify a list or a pandas Series
344
+ as ``dtypes`` of output DataFrame. ``index`` of output can also be
345
+ specified.
346
+ * For Series output, you need to specify ``dtype`` and ``name`` of
347
+ output Series.
348
+
349
+ MaxFrame adopts expected behavior of pandas>=3.0 by ignoring group columns
350
+ in user function input. If you still need a group column for your function
351
+ input, try selecting it right after `groupby` results, for instance,
352
+ ``df.groupby("A")[["A", "B", "C"]].mf.apply_batch(func)`` will pass data of
353
+ column A into ``func``.
354
+ """
355
+ if not isinstance(func, Callable):
356
+ raise TypeError("function must be a callable object")
357
+
358
+ if batch_rows is not None:
359
+ if not isinstance(batch_rows, int):
360
+ raise TypeError("batch_rows must be an integer")
361
+ elif batch_rows <= 0:
362
+ raise ValueError("batch_rows must be greater than 0")
363
+
364
+ if dtype is not None:
365
+ dtype = make_dtype(dtype)
366
+
367
+ output_types = kwargs.pop("output_types", None)
368
+ object_type = kwargs.pop("object_type", None)
369
+ output_types = validate_output_types(
370
+ output_type=output_type, output_types=output_types, object_type=object_type
371
+ )
372
+ output_type = output_types[0] if output_types else None
373
+ if skip_infer and output_type is None:
374
+ output_type = OutputType.df_or_series
375
+
376
+ # bind args and kwargs
377
+ op = GroupByApplyChunk(
378
+ func=func,
379
+ batch_rows=batch_rows,
380
+ output_type=output_type,
381
+ args=args,
382
+ kwargs=kwargs,
383
+ groupby_params=dataframe_groupby.op.groupby_params,
384
+ )
385
+
386
+ return op(
387
+ dataframe_groupby,
388
+ dtypes=dtypes,
389
+ dtype=dtype,
390
+ name=name,
391
+ index=index,
392
+ output_type=output_type,
393
+ )