maxframe 2.0.0b2__cp37-cp37m-win32.whl → 2.3.0rc1__cp37-cp37m-win32.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of maxframe might be problematic. Click here for more details.

Files changed (443) hide show
  1. maxframe/__init__.py +1 -0
  2. maxframe/_utils.cp37-win32.pyd +0 -0
  3. maxframe/_utils.pyx +14 -1
  4. maxframe/codegen/core.py +9 -8
  5. maxframe/codegen/spe/core.py +1 -1
  6. maxframe/codegen/spe/dataframe/__init__.py +1 -0
  7. maxframe/codegen/spe/dataframe/accessors/base.py +18 -0
  8. maxframe/codegen/spe/dataframe/accessors/dict_.py +25 -130
  9. maxframe/codegen/spe/dataframe/accessors/list_.py +12 -48
  10. maxframe/codegen/spe/dataframe/accessors/struct_.py +28 -0
  11. maxframe/codegen/spe/dataframe/arithmetic.py +7 -2
  12. maxframe/codegen/spe/dataframe/groupby.py +88 -0
  13. maxframe/codegen/spe/dataframe/indexing.py +99 -4
  14. maxframe/codegen/spe/dataframe/merge.py +38 -1
  15. maxframe/codegen/spe/dataframe/misc.py +11 -33
  16. maxframe/codegen/spe/dataframe/reduction.py +32 -9
  17. maxframe/codegen/spe/dataframe/reshape.py +46 -0
  18. maxframe/codegen/spe/dataframe/sort.py +39 -18
  19. maxframe/codegen/spe/dataframe/tests/accessors/test_dict.py +9 -15
  20. maxframe/codegen/spe/dataframe/tests/accessors/test_list.py +4 -7
  21. maxframe/codegen/spe/dataframe/tests/accessors/test_struct.py +75 -0
  22. maxframe/codegen/spe/dataframe/tests/indexing/test_iloc.py +20 -1
  23. maxframe/codegen/spe/dataframe/tests/indexing/test_loc.py +35 -0
  24. maxframe/codegen/spe/dataframe/tests/misc/test_misc.py +0 -32
  25. maxframe/codegen/spe/dataframe/tests/test_groupby.py +81 -18
  26. maxframe/codegen/spe/dataframe/tests/test_merge.py +27 -1
  27. maxframe/codegen/spe/dataframe/tests/test_reduction.py +13 -0
  28. maxframe/codegen/spe/dataframe/tests/test_reshape.py +79 -0
  29. maxframe/codegen/spe/dataframe/tests/test_sort.py +20 -0
  30. maxframe/codegen/spe/dataframe/tseries.py +9 -0
  31. maxframe/codegen/spe/learn/contrib/lightgbm.py +4 -3
  32. maxframe/codegen/spe/learn/contrib/tests/test_xgboost.py +2 -1
  33. maxframe/codegen/spe/learn/metrics/__init__.py +1 -1
  34. maxframe/codegen/spe/learn/metrics/_ranking.py +76 -0
  35. maxframe/codegen/spe/learn/metrics/pairwise.py +51 -0
  36. maxframe/codegen/spe/learn/metrics/tests/test_pairwise.py +36 -0
  37. maxframe/codegen/spe/learn/metrics/tests/test_ranking.py +59 -0
  38. maxframe/codegen/spe/tensor/__init__.py +3 -0
  39. maxframe/codegen/spe/tensor/datasource.py +1 -0
  40. maxframe/codegen/spe/tensor/fft.py +74 -0
  41. maxframe/codegen/spe/tensor/linalg.py +29 -2
  42. maxframe/codegen/spe/tensor/misc.py +79 -25
  43. maxframe/codegen/spe/tensor/spatial.py +45 -0
  44. maxframe/codegen/spe/tensor/statistics.py +44 -0
  45. maxframe/codegen/spe/tensor/tests/test_fft.py +64 -0
  46. maxframe/codegen/spe/tensor/tests/test_linalg.py +15 -1
  47. maxframe/codegen/spe/tensor/tests/test_misc.py +52 -2
  48. maxframe/codegen/spe/tensor/tests/test_spatial.py +33 -0
  49. maxframe/codegen/spe/tensor/tests/test_statistics.py +15 -1
  50. maxframe/codegen/spe/tests/test_spe_codegen.py +6 -12
  51. maxframe/codegen/spe/utils.py +2 -0
  52. maxframe/config/config.py +73 -9
  53. maxframe/config/tests/test_validators.py +13 -1
  54. maxframe/config/validators.py +49 -0
  55. maxframe/conftest.py +54 -17
  56. maxframe/core/accessor.py +2 -2
  57. maxframe/core/base.py +2 -1
  58. maxframe/core/entity/core.py +5 -0
  59. maxframe/core/entity/tileables.py +3 -1
  60. maxframe/core/graph/core.cp37-win32.pyd +0 -0
  61. maxframe/core/graph/entity.py +8 -3
  62. maxframe/core/mode.py +6 -1
  63. maxframe/core/operator/base.py +9 -2
  64. maxframe/core/operator/core.py +10 -2
  65. maxframe/core/operator/utils.py +13 -0
  66. maxframe/dataframe/__init__.py +12 -5
  67. maxframe/dataframe/accessors/__init__.py +1 -1
  68. maxframe/dataframe/accessors/compat.py +45 -0
  69. maxframe/dataframe/accessors/datetime_/__init__.py +4 -1
  70. maxframe/dataframe/accessors/dict_/contains.py +7 -16
  71. maxframe/dataframe/accessors/dict_/core.py +48 -0
  72. maxframe/dataframe/accessors/dict_/getitem.py +17 -21
  73. maxframe/dataframe/accessors/dict_/length.py +7 -16
  74. maxframe/dataframe/accessors/dict_/remove.py +6 -18
  75. maxframe/dataframe/accessors/dict_/setitem.py +8 -18
  76. maxframe/dataframe/accessors/dict_/tests/test_dict_accessor.py +62 -22
  77. maxframe/dataframe/accessors/list_/__init__.py +2 -2
  78. maxframe/dataframe/accessors/list_/core.py +48 -0
  79. maxframe/dataframe/accessors/list_/getitem.py +12 -19
  80. maxframe/dataframe/accessors/list_/length.py +7 -16
  81. maxframe/dataframe/accessors/list_/tests/test_list_accessor.py +11 -9
  82. maxframe/dataframe/accessors/string_/__init__.py +4 -1
  83. maxframe/dataframe/accessors/struct_/__init__.py +37 -0
  84. maxframe/dataframe/accessors/struct_/accessor.py +39 -0
  85. maxframe/dataframe/accessors/struct_/core.py +43 -0
  86. maxframe/dataframe/accessors/struct_/dtypes.py +53 -0
  87. maxframe/dataframe/accessors/struct_/field.py +123 -0
  88. maxframe/dataframe/accessors/struct_/tests/__init__.py +13 -0
  89. maxframe/dataframe/accessors/struct_/tests/test_struct_accessor.py +91 -0
  90. maxframe/dataframe/arithmetic/__init__.py +18 -4
  91. maxframe/dataframe/arithmetic/between.py +106 -0
  92. maxframe/dataframe/arithmetic/dot.py +237 -0
  93. maxframe/dataframe/arithmetic/maximum.py +33 -0
  94. maxframe/dataframe/arithmetic/minimum.py +33 -0
  95. maxframe/dataframe/arithmetic/{around.py → round.py} +11 -7
  96. maxframe/dataframe/core.py +161 -224
  97. maxframe/dataframe/datasource/__init__.py +18 -0
  98. maxframe/dataframe/datasource/core.py +6 -0
  99. maxframe/dataframe/datasource/direct.py +57 -0
  100. maxframe/dataframe/datasource/from_dict.py +124 -0
  101. maxframe/dataframe/datasource/from_index.py +1 -1
  102. maxframe/dataframe/datasource/from_records.py +77 -0
  103. maxframe/dataframe/datasource/from_tensor.py +109 -41
  104. maxframe/dataframe/datasource/read_csv.py +21 -14
  105. maxframe/dataframe/datasource/read_odps_query.py +29 -6
  106. maxframe/dataframe/datasource/read_odps_table.py +32 -10
  107. maxframe/dataframe/datasource/read_parquet.py +38 -39
  108. maxframe/dataframe/datasource/tests/test_datasource.py +37 -0
  109. maxframe/dataframe/datastore/__init__.py +11 -1
  110. maxframe/dataframe/datastore/direct.py +268 -0
  111. maxframe/dataframe/datastore/to_csv.py +29 -41
  112. maxframe/dataframe/datastore/to_odps.py +36 -4
  113. maxframe/dataframe/extensions/__init__.py +20 -4
  114. maxframe/dataframe/extensions/apply_chunk.py +32 -6
  115. maxframe/dataframe/extensions/cartesian_chunk.py +153 -0
  116. maxframe/dataframe/extensions/collect_kv.py +126 -0
  117. maxframe/dataframe/extensions/extract_kv.py +177 -0
  118. maxframe/dataframe/extensions/flatjson.py +2 -1
  119. maxframe/dataframe/extensions/map_reduce.py +263 -0
  120. maxframe/dataframe/extensions/rebalance.py +62 -0
  121. maxframe/dataframe/extensions/tests/test_apply_chunk.py +9 -2
  122. maxframe/dataframe/extensions/tests/test_extensions.py +54 -0
  123. maxframe/dataframe/extensions/tests/test_map_reduce.py +135 -0
  124. maxframe/dataframe/groupby/__init__.py +17 -2
  125. maxframe/dataframe/groupby/aggregation.py +86 -49
  126. maxframe/dataframe/groupby/apply.py +1 -1
  127. maxframe/dataframe/groupby/apply_chunk.py +19 -5
  128. maxframe/dataframe/groupby/core.py +116 -16
  129. maxframe/dataframe/groupby/cum.py +4 -25
  130. maxframe/dataframe/groupby/expanding.py +264 -0
  131. maxframe/dataframe/groupby/fill.py +1 -1
  132. maxframe/dataframe/groupby/getitem.py +12 -5
  133. maxframe/dataframe/groupby/head.py +11 -1
  134. maxframe/dataframe/groupby/rank.py +136 -0
  135. maxframe/dataframe/groupby/rolling.py +206 -0
  136. maxframe/dataframe/groupby/shift.py +114 -0
  137. maxframe/dataframe/groupby/tests/test_groupby.py +0 -5
  138. maxframe/dataframe/indexing/__init__.py +22 -2
  139. maxframe/dataframe/indexing/droplevel.py +195 -0
  140. maxframe/dataframe/indexing/filter.py +169 -0
  141. maxframe/dataframe/indexing/get_level_values.py +76 -0
  142. maxframe/dataframe/indexing/iat.py +45 -0
  143. maxframe/dataframe/indexing/iloc.py +152 -12
  144. maxframe/dataframe/indexing/insert.py +46 -18
  145. maxframe/dataframe/indexing/loc.py +287 -7
  146. maxframe/dataframe/indexing/reindex.py +14 -5
  147. maxframe/dataframe/indexing/rename.py +6 -0
  148. maxframe/dataframe/indexing/rename_axis.py +2 -2
  149. maxframe/dataframe/indexing/reorder_levels.py +143 -0
  150. maxframe/dataframe/indexing/reset_index.py +33 -6
  151. maxframe/dataframe/indexing/sample.py +8 -0
  152. maxframe/dataframe/indexing/setitem.py +3 -3
  153. maxframe/dataframe/indexing/swaplevel.py +185 -0
  154. maxframe/dataframe/indexing/take.py +99 -0
  155. maxframe/dataframe/indexing/truncate.py +140 -0
  156. maxframe/dataframe/indexing/where.py +0 -11
  157. maxframe/dataframe/indexing/xs.py +148 -0
  158. maxframe/dataframe/merge/__init__.py +15 -1
  159. maxframe/dataframe/merge/append.py +97 -98
  160. maxframe/dataframe/merge/combine.py +244 -0
  161. maxframe/dataframe/merge/combine_first.py +120 -0
  162. maxframe/dataframe/merge/compare.py +387 -0
  163. maxframe/dataframe/merge/concat.py +183 -0
  164. maxframe/dataframe/merge/update.py +271 -0
  165. maxframe/dataframe/misc/__init__.py +28 -11
  166. maxframe/dataframe/misc/_duplicate.py +10 -4
  167. maxframe/dataframe/misc/apply.py +1 -1
  168. maxframe/dataframe/misc/check_unique.py +82 -0
  169. maxframe/dataframe/misc/clip.py +145 -0
  170. maxframe/dataframe/misc/describe.py +175 -9
  171. maxframe/dataframe/misc/drop.py +31 -0
  172. maxframe/dataframe/misc/drop_duplicates.py +2 -2
  173. maxframe/dataframe/misc/duplicated.py +2 -2
  174. maxframe/dataframe/misc/get_dummies.py +5 -1
  175. maxframe/dataframe/misc/infer_dtypes.py +251 -0
  176. maxframe/dataframe/misc/isin.py +2 -2
  177. maxframe/dataframe/misc/map.py +125 -18
  178. maxframe/dataframe/misc/repeat.py +159 -0
  179. maxframe/dataframe/misc/tests/test_misc.py +48 -3
  180. maxframe/dataframe/misc/to_numeric.py +3 -0
  181. maxframe/dataframe/misc/transform.py +12 -5
  182. maxframe/dataframe/misc/transpose.py +13 -1
  183. maxframe/dataframe/misc/valid_index.py +115 -0
  184. maxframe/dataframe/misc/value_counts.py +38 -4
  185. maxframe/dataframe/missing/checkna.py +14 -6
  186. maxframe/dataframe/missing/dropna.py +5 -0
  187. maxframe/dataframe/missing/fillna.py +1 -1
  188. maxframe/dataframe/missing/replace.py +7 -4
  189. maxframe/dataframe/reduction/__init__.py +35 -16
  190. maxframe/dataframe/reduction/aggregation.py +43 -14
  191. maxframe/dataframe/reduction/all.py +2 -2
  192. maxframe/dataframe/reduction/any.py +2 -2
  193. maxframe/dataframe/reduction/argmax.py +103 -0
  194. maxframe/dataframe/reduction/argmin.py +103 -0
  195. maxframe/dataframe/reduction/core.py +80 -24
  196. maxframe/dataframe/reduction/count.py +13 -9
  197. maxframe/dataframe/reduction/cov.py +166 -0
  198. maxframe/dataframe/reduction/cummax.py +2 -2
  199. maxframe/dataframe/reduction/cummin.py +2 -2
  200. maxframe/dataframe/reduction/cumprod.py +2 -2
  201. maxframe/dataframe/reduction/cumsum.py +2 -2
  202. maxframe/dataframe/reduction/custom_reduction.py +2 -2
  203. maxframe/dataframe/reduction/idxmax.py +185 -0
  204. maxframe/dataframe/reduction/idxmin.py +185 -0
  205. maxframe/dataframe/reduction/kurtosis.py +37 -30
  206. maxframe/dataframe/reduction/max.py +2 -2
  207. maxframe/dataframe/reduction/mean.py +9 -7
  208. maxframe/dataframe/reduction/median.py +2 -2
  209. maxframe/dataframe/reduction/min.py +2 -2
  210. maxframe/dataframe/reduction/mode.py +144 -0
  211. maxframe/dataframe/reduction/nunique.py +19 -11
  212. maxframe/dataframe/reduction/prod.py +18 -13
  213. maxframe/dataframe/reduction/reduction_size.py +2 -2
  214. maxframe/dataframe/reduction/sem.py +13 -9
  215. maxframe/dataframe/reduction/skew.py +31 -27
  216. maxframe/dataframe/reduction/str_concat.py +10 -7
  217. maxframe/dataframe/reduction/sum.py +18 -14
  218. maxframe/dataframe/reduction/tests/test_reduction.py +12 -0
  219. maxframe/dataframe/reduction/unique.py +20 -3
  220. maxframe/dataframe/reduction/var.py +16 -12
  221. maxframe/dataframe/reshape/__init__.py +38 -0
  222. maxframe/dataframe/{misc → reshape}/pivot.py +1 -0
  223. maxframe/dataframe/{misc → reshape}/pivot_table.py +1 -0
  224. maxframe/dataframe/reshape/unstack.py +114 -0
  225. maxframe/dataframe/sort/__init__.py +16 -1
  226. maxframe/dataframe/sort/argsort.py +68 -0
  227. maxframe/dataframe/sort/core.py +2 -1
  228. maxframe/dataframe/sort/nlargest.py +238 -0
  229. maxframe/dataframe/sort/nsmallest.py +228 -0
  230. maxframe/dataframe/sort/rank.py +147 -0
  231. maxframe/dataframe/statistics/__init__.py +3 -3
  232. maxframe/dataframe/statistics/corr.py +1 -0
  233. maxframe/dataframe/statistics/quantile.py +2 -2
  234. maxframe/dataframe/tests/test_typing.py +104 -0
  235. maxframe/dataframe/tests/test_utils.py +66 -2
  236. maxframe/dataframe/tseries/__init__.py +19 -0
  237. maxframe/dataframe/tseries/at_time.py +61 -0
  238. maxframe/dataframe/tseries/between_time.py +122 -0
  239. maxframe/dataframe/typing_.py +185 -0
  240. maxframe/dataframe/utils.py +125 -52
  241. maxframe/dataframe/window/aggregation.py +8 -4
  242. maxframe/dataframe/window/core.py +14 -1
  243. maxframe/dataframe/window/ewm.py +1 -3
  244. maxframe/dataframe/window/expanding.py +37 -35
  245. maxframe/dataframe/window/rolling.py +49 -39
  246. maxframe/dataframe/window/tests/test_expanding.py +1 -7
  247. maxframe/dataframe/window/tests/test_rolling.py +1 -1
  248. maxframe/env.py +7 -4
  249. maxframe/errors.py +2 -2
  250. maxframe/io/odpsio/schema.py +9 -3
  251. maxframe/io/odpsio/tableio.py +7 -2
  252. maxframe/io/odpsio/tests/test_schema.py +198 -83
  253. maxframe/learn/__init__.py +10 -2
  254. maxframe/learn/cluster/__init__.py +15 -0
  255. maxframe/learn/cluster/_kmeans.py +782 -0
  256. maxframe/learn/contrib/llm/core.py +18 -7
  257. maxframe/learn/contrib/llm/deploy/__init__.py +13 -0
  258. maxframe/learn/contrib/llm/deploy/config.py +221 -0
  259. maxframe/learn/contrib/llm/deploy/core.py +247 -0
  260. maxframe/learn/contrib/llm/deploy/framework.py +35 -0
  261. maxframe/learn/contrib/llm/deploy/loader.py +360 -0
  262. maxframe/learn/contrib/llm/deploy/tests/__init__.py +13 -0
  263. maxframe/learn/contrib/llm/deploy/tests/test_register_models.py +359 -0
  264. maxframe/learn/contrib/llm/models/__init__.py +1 -0
  265. maxframe/learn/contrib/llm/models/dashscope.py +12 -6
  266. maxframe/learn/contrib/llm/models/managed.py +76 -11
  267. maxframe/learn/contrib/llm/models/openai.py +72 -0
  268. maxframe/learn/contrib/llm/tests/__init__.py +13 -0
  269. maxframe/learn/contrib/llm/tests/test_core.py +34 -0
  270. maxframe/learn/contrib/llm/tests/test_openai.py +187 -0
  271. maxframe/learn/contrib/llm/tests/test_text_gen.py +155 -0
  272. maxframe/learn/contrib/llm/text.py +348 -42
  273. maxframe/learn/contrib/models.py +4 -1
  274. maxframe/learn/contrib/xgboost/classifier.py +2 -0
  275. maxframe/learn/contrib/xgboost/core.py +113 -4
  276. maxframe/learn/contrib/xgboost/predict.py +4 -2
  277. maxframe/learn/contrib/xgboost/regressor.py +5 -0
  278. maxframe/learn/contrib/xgboost/train.py +7 -2
  279. maxframe/learn/core.py +66 -0
  280. maxframe/learn/linear_model/_base.py +58 -1
  281. maxframe/learn/linear_model/_lin_reg.py +1 -1
  282. maxframe/learn/metrics/__init__.py +6 -0
  283. maxframe/learn/metrics/_classification.py +145 -0
  284. maxframe/learn/metrics/_ranking.py +477 -0
  285. maxframe/learn/metrics/_scorer.py +60 -0
  286. maxframe/learn/metrics/pairwise/__init__.py +21 -0
  287. maxframe/learn/metrics/pairwise/core.py +77 -0
  288. maxframe/learn/metrics/pairwise/cosine.py +115 -0
  289. maxframe/learn/metrics/pairwise/euclidean.py +176 -0
  290. maxframe/learn/metrics/pairwise/haversine.py +96 -0
  291. maxframe/learn/metrics/pairwise/manhattan.py +80 -0
  292. maxframe/learn/metrics/pairwise/pairwise.py +127 -0
  293. maxframe/learn/metrics/pairwise/pairwise_distances_topk.py +121 -0
  294. maxframe/learn/metrics/pairwise/rbf_kernel.py +51 -0
  295. maxframe/learn/metrics/tests/__init__.py +13 -0
  296. maxframe/learn/metrics/tests/test_scorer.py +26 -0
  297. maxframe/learn/preprocessing/_data/min_max_scaler.py +34 -23
  298. maxframe/learn/preprocessing/_data/standard_scaler.py +34 -25
  299. maxframe/learn/utils/__init__.py +2 -1
  300. maxframe/learn/utils/checks.py +1 -2
  301. maxframe/learn/utils/core.py +59 -0
  302. maxframe/learn/utils/extmath.py +79 -9
  303. maxframe/learn/utils/odpsio.py +262 -0
  304. maxframe/learn/utils/validation.py +2 -2
  305. maxframe/lib/compat.py +40 -0
  306. maxframe/lib/dtypes_extension/__init__.py +16 -1
  307. maxframe/lib/dtypes_extension/_fake_arrow_dtype.py +604 -0
  308. maxframe/lib/dtypes_extension/blob.py +304 -0
  309. maxframe/lib/dtypes_extension/dtypes.py +40 -0
  310. maxframe/lib/dtypes_extension/tests/test_blob.py +88 -0
  311. maxframe/lib/dtypes_extension/tests/test_dtypes.py +16 -1
  312. maxframe/lib/dtypes_extension/tests/test_fake_arrow_dtype.py +75 -0
  313. maxframe/lib/filesystem/_oss_lib/common.py +124 -50
  314. maxframe/lib/filesystem/_oss_lib/glob.py +1 -1
  315. maxframe/lib/filesystem/_oss_lib/handle.py +21 -25
  316. maxframe/lib/filesystem/base.py +1 -1
  317. maxframe/lib/filesystem/core.py +1 -1
  318. maxframe/lib/filesystem/oss.py +115 -46
  319. maxframe/lib/filesystem/tests/test_oss.py +74 -36
  320. maxframe/lib/mmh3.cp37-win32.pyd +0 -0
  321. maxframe/lib/wrapped_pickle.py +10 -0
  322. maxframe/opcodes.py +41 -15
  323. maxframe/protocol.py +12 -0
  324. maxframe/remote/core.py +4 -0
  325. maxframe/serialization/__init__.py +11 -2
  326. maxframe/serialization/arrow.py +38 -13
  327. maxframe/serialization/blob.py +32 -0
  328. maxframe/serialization/core.cp37-win32.pyd +0 -0
  329. maxframe/serialization/core.pyx +39 -1
  330. maxframe/serialization/exception.py +2 -4
  331. maxframe/serialization/numpy.py +11 -0
  332. maxframe/serialization/pandas.py +46 -9
  333. maxframe/serialization/serializables/core.py +2 -2
  334. maxframe/serialization/tests/test_serial.py +31 -4
  335. maxframe/tensor/__init__.py +38 -8
  336. maxframe/tensor/arithmetic/__init__.py +19 -10
  337. maxframe/tensor/arithmetic/core.py +2 -2
  338. maxframe/tensor/arithmetic/iscomplexobj.py +53 -0
  339. maxframe/tensor/arithmetic/tests/test_arithmetic.py +6 -9
  340. maxframe/tensor/core.py +6 -2
  341. maxframe/tensor/datasource/tests/test_datasource.py +2 -1
  342. maxframe/tensor/extensions/__init__.py +2 -0
  343. maxframe/tensor/extensions/apply_chunk.py +3 -3
  344. maxframe/tensor/extensions/rebalance.py +65 -0
  345. maxframe/tensor/fft/__init__.py +32 -0
  346. maxframe/tensor/fft/core.py +168 -0
  347. maxframe/tensor/fft/fft.py +112 -0
  348. maxframe/tensor/fft/fft2.py +118 -0
  349. maxframe/tensor/fft/fftfreq.py +80 -0
  350. maxframe/tensor/fft/fftn.py +123 -0
  351. maxframe/tensor/fft/fftshift.py +79 -0
  352. maxframe/tensor/fft/hfft.py +112 -0
  353. maxframe/tensor/fft/ifft.py +114 -0
  354. maxframe/tensor/fft/ifft2.py +115 -0
  355. maxframe/tensor/fft/ifftn.py +123 -0
  356. maxframe/tensor/fft/ifftshift.py +73 -0
  357. maxframe/tensor/fft/ihfft.py +93 -0
  358. maxframe/tensor/fft/irfft.py +118 -0
  359. maxframe/tensor/fft/irfft2.py +62 -0
  360. maxframe/tensor/fft/irfftn.py +114 -0
  361. maxframe/tensor/fft/rfft.py +116 -0
  362. maxframe/tensor/fft/rfft2.py +63 -0
  363. maxframe/tensor/fft/rfftfreq.py +87 -0
  364. maxframe/tensor/fft/rfftn.py +113 -0
  365. maxframe/tensor/indexing/fill_diagonal.py +1 -7
  366. maxframe/tensor/linalg/__init__.py +7 -0
  367. maxframe/tensor/linalg/_einsumfunc.py +1025 -0
  368. maxframe/tensor/linalg/cholesky.py +117 -0
  369. maxframe/tensor/linalg/einsum.py +339 -0
  370. maxframe/tensor/linalg/lstsq.py +100 -0
  371. maxframe/tensor/linalg/matrix_norm.py +75 -0
  372. maxframe/tensor/linalg/norm.py +249 -0
  373. maxframe/tensor/linalg/solve.py +72 -0
  374. maxframe/tensor/linalg/solve_triangular.py +2 -2
  375. maxframe/tensor/linalg/vector_norm.py +113 -0
  376. maxframe/tensor/misc/__init__.py +24 -1
  377. maxframe/tensor/misc/argwhere.py +72 -0
  378. maxframe/tensor/misc/array_split.py +46 -0
  379. maxframe/tensor/misc/broadcast_arrays.py +57 -0
  380. maxframe/tensor/misc/copyto.py +130 -0
  381. maxframe/tensor/misc/delete.py +104 -0
  382. maxframe/tensor/misc/dsplit.py +68 -0
  383. maxframe/tensor/misc/ediff1d.py +74 -0
  384. maxframe/tensor/misc/expand_dims.py +85 -0
  385. maxframe/tensor/misc/flip.py +90 -0
  386. maxframe/tensor/misc/fliplr.py +64 -0
  387. maxframe/tensor/misc/flipud.py +68 -0
  388. maxframe/tensor/misc/hsplit.py +85 -0
  389. maxframe/tensor/misc/insert.py +139 -0
  390. maxframe/tensor/misc/moveaxis.py +83 -0
  391. maxframe/tensor/misc/result_type.py +88 -0
  392. maxframe/tensor/misc/roll.py +124 -0
  393. maxframe/tensor/misc/rollaxis.py +77 -0
  394. maxframe/tensor/misc/shape.py +89 -0
  395. maxframe/tensor/misc/split.py +190 -0
  396. maxframe/tensor/misc/tile.py +109 -0
  397. maxframe/tensor/misc/vsplit.py +74 -0
  398. maxframe/tensor/reduction/array_equal.py +2 -1
  399. maxframe/tensor/sort/__init__.py +2 -0
  400. maxframe/tensor/sort/argpartition.py +98 -0
  401. maxframe/tensor/sort/partition.py +228 -0
  402. maxframe/tensor/spatial/__init__.py +15 -0
  403. maxframe/tensor/spatial/distance/__init__.py +17 -0
  404. maxframe/tensor/spatial/distance/cdist.py +421 -0
  405. maxframe/tensor/spatial/distance/pdist.py +398 -0
  406. maxframe/tensor/spatial/distance/squareform.py +153 -0
  407. maxframe/tensor/special/__init__.py +159 -21
  408. maxframe/tensor/special/airy.py +55 -0
  409. maxframe/tensor/special/bessel.py +199 -0
  410. maxframe/tensor/special/core.py +65 -4
  411. maxframe/tensor/special/ellip_func_integrals.py +155 -0
  412. maxframe/tensor/special/ellip_harm.py +55 -0
  413. maxframe/tensor/special/err_fresnel.py +223 -0
  414. maxframe/tensor/special/gamma_funcs.py +303 -0
  415. maxframe/tensor/special/hypergeometric_funcs.py +69 -0
  416. maxframe/tensor/special/info_theory.py +189 -0
  417. maxframe/tensor/special/misc.py +21 -0
  418. maxframe/tensor/statistics/__init__.py +6 -0
  419. maxframe/tensor/statistics/corrcoef.py +77 -0
  420. maxframe/tensor/statistics/cov.py +222 -0
  421. maxframe/tensor/statistics/digitize.py +126 -0
  422. maxframe/tensor/statistics/histogram.py +520 -0
  423. maxframe/tensor/statistics/median.py +85 -0
  424. maxframe/tensor/statistics/ptp.py +89 -0
  425. maxframe/tensor/utils.py +3 -3
  426. maxframe/tests/test_udf.py +61 -0
  427. maxframe/tests/test_utils.py +51 -6
  428. maxframe/tests/utils.py +0 -2
  429. maxframe/typing_.py +2 -0
  430. maxframe/udf.py +130 -9
  431. maxframe/utils.py +254 -27
  432. {maxframe-2.0.0b2.dist-info → maxframe-2.3.0rc1.dist-info}/METADATA +3 -3
  433. {maxframe-2.0.0b2.dist-info → maxframe-2.3.0rc1.dist-info}/RECORD +442 -264
  434. maxframe_client/fetcher.py +35 -4
  435. maxframe_client/session/odps.py +7 -2
  436. maxframe_client/session/task.py +8 -1
  437. maxframe_client/tests/test_fetcher.py +76 -3
  438. maxframe_client/tests/test_session.py +28 -1
  439. maxframe/dataframe/arrays.py +0 -864
  440. /maxframe/dataframe/{misc → reshape}/melt.py +0 -0
  441. /maxframe/dataframe/{misc → reshape}/stack.py +0 -0
  442. {maxframe-2.0.0b2.dist-info → maxframe-2.3.0rc1.dist-info}/WHEEL +0 -0
  443. {maxframe-2.0.0b2.dist-info → maxframe-2.3.0rc1.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,135 @@
1
+ # Copyright 1999-2025 Alibaba Group Holding Ltd.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ import functools
16
+ from collections import defaultdict
17
+
18
+ import pandas as pd
19
+ import pytest
20
+
21
+ from .... import dataframe as md
22
+ from ...groupby.apply_chunk import GroupByApplyChunk
23
+ from .. import DataFrameApplyChunk
24
+
25
+
26
+ @pytest.fixture
27
+ def df1():
28
+ return md.DataFrame(
29
+ {
30
+ "name": ["name key", "name", "key", "name", "key name"],
31
+ "id": [4, 2, 4, 3, 3],
32
+ "fid": [5.3, 3.5, 4.2, 2.2, 4.1],
33
+ }
34
+ )
35
+
36
+
37
+ def test_map_reduce_with_map_only(df1):
38
+ func = functools.partial(lambda x: x)
39
+
40
+ with pytest.raises(ValueError):
41
+ df1.mf.map_reduce(func, reducer_dtypes={"col": "string"})
42
+
43
+ mapped = df1.mf.map_reduce(func)
44
+ assert isinstance(mapped.op, DataFrameApplyChunk)
45
+ assert mapped.op.func is func
46
+
47
+ map_combined = df1.mf.map_reduce(
48
+ func, combiner=func, mapper_dtypes=df1.dtypes, mapper_index=df1.index
49
+ )
50
+ assert isinstance(map_combined.op, DataFrameApplyChunk)
51
+ assert map_combined.op.func.__name__ == "CombinedMapper"
52
+
53
+
54
+ def test_mapper_with_combiner(df1):
55
+ class BaseFunc:
56
+ def __init__(self):
57
+ self._word_to_count = defaultdict(lambda: 0)
58
+
59
+ def _collect_df(self):
60
+ word_to_count = self._word_to_count.copy()
61
+ self._word_to_count.clear()
62
+ return pd.DataFrame(
63
+ [list(tp) for tp in word_to_count.items()],
64
+ columns=["word", "count"],
65
+ )
66
+
67
+ def close(self):
68
+ print(f"Close {type(self)}")
69
+
70
+ class MapperCls(BaseFunc):
71
+ def __call__(self, batch, end=False):
72
+ for words in batch["name"]:
73
+ for w in words.split():
74
+ self._word_to_count[w] += 1
75
+ if end:
76
+ return self._collect_df()
77
+
78
+ class CombinerCls(BaseFunc):
79
+ def __call__(self, batch, end=False):
80
+ for _, row in batch.iterrows():
81
+ self._word_to_count[row["word"]] = row["count"]
82
+ if end:
83
+ return self._collect_df()
84
+
85
+ map_combined = df1.mf.map_reduce(
86
+ MapperCls,
87
+ combiner=CombinerCls,
88
+ group_cols="word",
89
+ mapper_dtypes={"word": "str", "count": "int"},
90
+ mapper_index=df1.index,
91
+ )
92
+
93
+ raw = df1.op.data
94
+ combiner = map_combined.op.func()
95
+ ret1 = combiner(raw.iloc[:3], end=True)
96
+ ret2 = combiner(raw.iloc[3:], end=True)
97
+ close_ret = combiner.close()
98
+ expected1 = pd.DataFrame([["key", 2], ["name", 2]], columns=["word", "count"])
99
+ expected2 = pd.DataFrame([["key", 1], ["name", 2]], columns=["word", "count"])
100
+ assert close_ret is None
101
+ pd.testing.assert_frame_equal(ret1.reset_index(drop=True), expected1)
102
+ pd.testing.assert_frame_equal(ret2.reset_index(drop=True), expected2)
103
+
104
+
105
+ def test_map_reduce_with_reduce_only(df1):
106
+ func = functools.partial(lambda x: x)
107
+
108
+ with pytest.raises(ValueError):
109
+ df1.mf.map_reduce(reducer=func, mapper_dtypes={"col": "string"})
110
+
111
+ reduced = df1.mf.map_reduce(reducer=func, group_cols="name")
112
+ assert isinstance(reduced.op, GroupByApplyChunk)
113
+ assert reduced.op.func is func
114
+ assert reduced.op.groupby_params["by"] == ["name"]
115
+
116
+
117
+ def test_map_reduce_with_both_funcs(df1):
118
+ map_func = functools.partial(lambda x: x)
119
+
120
+ class ReducerCls:
121
+ def __call__(self, batch):
122
+ return batch
123
+
124
+ reduced = df1.mf.map_reduce(
125
+ mapper=map_func,
126
+ reducer=ReducerCls,
127
+ group_cols="name",
128
+ reducer_dtypes=df1.dtypes,
129
+ reducer_index=df1.index,
130
+ )
131
+ assert isinstance(reduced.op, GroupByApplyChunk)
132
+ assert reduced.op.func is ReducerCls
133
+ assert reduced.op.groupby_params["by"] == ["name"]
134
+ assert isinstance(reduced.inputs[0].op, DataFrameApplyChunk)
135
+ assert reduced.inputs[0].op.func is map_func
@@ -14,7 +14,9 @@
14
14
 
15
15
  # noinspection PyUnresolvedReferences
16
16
  from ..core import DataFrameGroupBy, GroupBy, SeriesGroupBy
17
- from .core import NamedAgg
17
+ from .core import _make_named_agg_compat
18
+ from .expanding import ExpandingGroupby
19
+ from .rolling import RollingGroupby
18
20
 
19
21
 
20
22
  def _install():
@@ -24,12 +26,15 @@ def _install():
24
26
  from .apply import groupby_apply
25
27
  from .apply_chunk import df_groupby_apply_chunk
26
28
  from .core import groupby
27
- from .cum import cumcount, cummax, cummin, cumprod, cumsum
29
+ from .expanding import cumcount, cummax, cummin, cumprod, cumsum, expanding
28
30
  from .extensions import DataFrameGroupByMaxFrameAccessor
29
31
  from .fill import bfill, ffill, fillna
30
32
  from .getitem import df_groupby_getitem
31
33
  from .head import head
34
+ from .rank import rank
35
+ from .rolling import rolling
32
36
  from .sample import groupby_sample
37
+ from .shift import shift
33
38
  from .transform import groupby_transform
34
39
 
35
40
  for cls in DATAFRAME_TYPE:
@@ -69,6 +74,12 @@ def _install():
69
74
  setattr(cls, "cumprod", cumprod)
70
75
  setattr(cls, "cumsum", cumsum)
71
76
 
77
+ setattr(cls, "expanding", expanding)
78
+ setattr(cls, "rolling", rolling)
79
+
80
+ setattr(cls, "shift", shift)
81
+ setattr(cls, "rank", rank)
82
+
72
83
  setattr(cls, "head", head)
73
84
 
74
85
  setattr(cls, "sample", groupby_sample)
@@ -88,3 +99,7 @@ def _install():
88
99
 
89
100
  _install()
90
101
  del _install
102
+
103
+
104
+ __getattr__ = _make_named_agg_compat
105
+ del _make_named_agg_compat
@@ -20,16 +20,21 @@ import numpy as np
20
20
  import pandas as pd
21
21
 
22
22
  from ... import opcodes
23
- from ...core import ENTITY_TYPE, EntityData, OutputType
23
+ from ...config import options
24
+ from ...core import ENTITY_TYPE, EntityData, OutputType, enter_mode
25
+ from ...serialization import PickleContainer
24
26
  from ...serialization.serializables import (
25
27
  AnyField,
28
+ BoolField,
26
29
  DictField,
30
+ Int8Field,
27
31
  Int32Field,
28
32
  Int64Field,
29
33
  ListField,
30
34
  StringField,
31
35
  )
32
- from ...utils import lazy_import, pd_release_version
36
+ from ...udf import BuiltinFunction
37
+ from ...utils import find_objects, get_pd_option, lazy_import, pd_release_version
33
38
  from ..core import GROUPBY_TYPE
34
39
  from ..operators import DataFrameOperator, DataFrameOperatorMixin
35
40
  from ..reduction.aggregation import (
@@ -46,19 +51,7 @@ logger = logging.getLogger(__name__)
46
51
  CV_THRESHOLD = 0.2
47
52
  MEAN_RATIO_THRESHOLD = 2 / 3
48
53
  _support_get_group_without_as_index = pd_release_version[:2] > (1, 0)
49
-
50
-
51
- class SizeRecorder:
52
- def __init__(self):
53
- self._raw_records = []
54
- self._agg_records = []
55
-
56
- def record(self, raw_record: int, agg_record: int):
57
- self._raw_records.append(raw_record)
58
- self._agg_records.append(agg_record)
59
-
60
- def get(self):
61
- return self._raw_records, self._agg_records
54
+ _support_multi_index_as_index = pd_release_version[:2] > (2, 0)
62
55
 
63
56
 
64
57
  _agg_functions = {
@@ -86,24 +79,28 @@ _series_col_name = "col_name"
86
79
 
87
80
  def _patch_groupby_kurt():
88
81
  try:
89
- from pandas.core.groupby import DataFrameGroupBy, SeriesGroupBy
90
-
91
- if not hasattr(DataFrameGroupBy, "kurt"): # pragma: no branch
92
-
93
- def _kurt_by_frame(a, *args, **kwargs):
94
- data = a.to_frame().kurt(*args, **kwargs).iloc[0]
95
- if is_cudf(data): # pragma: no cover
96
- data = data.copy()
97
- return data
98
-
99
- def _group_kurt(x, *args, **kwargs):
100
- if kwargs.get("numeric_only") is not None:
101
- return x.agg(functools.partial(_kurt_by_frame, *args, **kwargs))
102
- else:
103
- return x.agg(functools.partial(pd.Series.kurt, *args, **kwargs))
104
-
105
- DataFrameGroupBy.kurt = DataFrameGroupBy.kurtosis = _group_kurt
106
- SeriesGroupBy.kurt = SeriesGroupBy.kurtosis = _group_kurt
82
+ try:
83
+ from pandas.api.typing import DataFrameGroupBy, SeriesGroupBy
84
+ except ImportError:
85
+ from pandas.core.groupby import DataFrameGroupBy, SeriesGroupBy
86
+
87
+ if hasattr(DataFrameGroupBy, "kurt"): # pragma: no branch
88
+ return
89
+
90
+ def _kurt_by_frame(a, *args, **kwargs):
91
+ data = a.to_frame().kurt(*args, **kwargs).iloc[0]
92
+ if is_cudf(data): # pragma: no cover
93
+ data = data.copy()
94
+ return data
95
+
96
+ def _group_kurt(x, *args, **kwargs):
97
+ if kwargs.get("numeric_only") is not None:
98
+ return x.agg(functools.partial(_kurt_by_frame, *args, **kwargs))
99
+ else:
100
+ return x.agg(functools.partial(pd.Series.kurt, *args, **kwargs))
101
+
102
+ DataFrameGroupBy.kurt = DataFrameGroupBy.kurtosis = _group_kurt
103
+ SeriesGroupBy.kurt = SeriesGroupBy.kurtosis = _group_kurt
107
104
  except (AttributeError, ImportError): # pragma: no cover
108
105
  pass
109
106
 
@@ -119,7 +116,10 @@ def build_mock_agg_result(
119
116
  **raw_func_kw,
120
117
  ):
121
118
  try:
122
- agg_result = groupby.op.build_mock_groupby().aggregate(raw_func, **raw_func_kw)
119
+ with enter_mode(mock=True):
120
+ agg_result = groupby.op.build_mock_groupby().aggregate(
121
+ raw_func, **raw_func_kw
122
+ )
123
123
  except ValueError:
124
124
  if (
125
125
  groupby_params.get("as_index") or _support_get_group_without_as_index
@@ -137,23 +137,43 @@ def build_mock_agg_result(
137
137
  class DataFrameGroupByAgg(DataFrameOperator, DataFrameOperatorMixin):
138
138
  _op_type_ = opcodes.GROUPBY_AGG
139
139
 
140
- raw_func = AnyField("raw_func")
141
- raw_func_kw = DictField("raw_func_kw")
142
- func = AnyField("func")
140
+ raw_func = AnyField("raw_func", default=None)
141
+ raw_func_kw = DictField("raw_func_kw", default=None)
142
+ func = AnyField("func", default=None)
143
143
  func_rename = ListField("func_rename", default=None)
144
144
 
145
- raw_groupby_params = DictField("raw_groupby_params")
146
- groupby_params = DictField("groupby_params")
145
+ raw_groupby_params = DictField("raw_groupby_params", default=None)
146
+ groupby_params = DictField("groupby_params", default=None)
147
147
 
148
- method = StringField("method")
148
+ method = StringField("method", default=None)
149
149
 
150
150
  # for chunk
151
- chunk_store_limit = Int64Field("chunk_store_limit")
152
- pre_funcs = ListField("pre_funcs")
153
- agg_funcs = ListField("agg_funcs")
154
- post_funcs = ListField("post_funcs")
155
- index_levels = Int32Field("index_levels")
156
- size_recorder_name = StringField("size_recorder_name")
151
+ chunk_store_limit = Int64Field("chunk_store_limit", default=None)
152
+ pre_funcs = ListField("pre_funcs", default=None)
153
+ agg_funcs = ListField("agg_funcs", default=None)
154
+ post_funcs = ListField("post_funcs", default=None)
155
+ index_levels = Int32Field("index_levels", default=None)
156
+ size_recorder_name = StringField("size_recorder_name", default=None)
157
+ combine_size = Int32Field("combine_size", default=None)
158
+
159
+ use_inf_as_na = BoolField("use_inf_as_na", default=None)
160
+ input_ndim = Int8Field("input_ndim", default=1)
161
+ append_level = BoolField("append_level", default=False)
162
+
163
+ def has_custom_code(self) -> bool:
164
+ callable_bys = find_objects(
165
+ self.groupby_params.get("by"), types=PickleContainer, checker=callable
166
+ )
167
+ if callable_bys and any(
168
+ not isinstance(fun, BuiltinFunction) for fun in callable_bys
169
+ ):
170
+ return True
171
+
172
+ return any(
173
+ fun.custom_reduction
174
+ and not isinstance(fun.custom_reduction, BuiltinFunction)
175
+ for fun in self.agg_funcs or ()
176
+ )
157
177
 
158
178
  @classmethod
159
179
  def _set_inputs(cls, op: "DataFrameGroupByAgg", inputs: List[EntityData]):
@@ -193,7 +213,9 @@ class DataFrameGroupByAgg(DataFrameOperator, DataFrameOperatorMixin):
193
213
 
194
214
  def _fix_as_index(self, result_index: pd.Index):
195
215
  # make sure if as_index=False takes effect
196
- if isinstance(result_index, pd.MultiIndex):
216
+ if not _support_multi_index_as_index and isinstance(
217
+ result_index, pd.MultiIndex
218
+ ):
197
219
  # if MultiIndex, as_index=False definitely takes no effect
198
220
  self.groupby_params["as_index"] = True
199
221
  elif result_index.name is not None:
@@ -217,12 +239,17 @@ class DataFrameGroupByAgg(DataFrameOperator, DataFrameOperatorMixin):
217
239
  agg_df.index, groupby.key, groupby.index_value.key
218
240
  )
219
241
 
242
+ self.input_ndim = 2
243
+
220
244
  # make sure if as_index=False takes effect
221
245
  self._fix_as_index(agg_df.index)
222
246
 
223
247
  # determine num of indices to group in intermediate steps
224
248
  self.index_levels = self._get_index_levels(groupby, agg_df.index)
225
249
 
250
+ # if True, name of agg funcs will be appended as the last level
251
+ self.append_level = agg_df.dtypes.index.nlevels > input_df.dtypes.index.nlevels
252
+
226
253
  inputs = self._get_inputs([input_df])
227
254
  return self.new_dataframe(
228
255
  inputs,
@@ -247,6 +274,8 @@ class DataFrameGroupByAgg(DataFrameOperator, DataFrameOperatorMixin):
247
274
 
248
275
  inputs = self._get_inputs([in_series])
249
276
 
277
+ self.input_ndim = 1
278
+
250
279
  # determine num of indices to group in intermediate steps
251
280
  self.index_levels = self._get_index_levels(groupby, agg_result.index)
252
281
 
@@ -351,9 +380,10 @@ def agg(groupby, func=None, method="auto", *args, **kwargs):
351
380
  1 1 2 0.590715
352
381
  2 3 4 0.704907
353
382
 
354
- To control the output names with different aggregations per column, pandas supports “named aggregation”
383
+ To control the output names with different aggregations per column,
384
+ MaxFrame supports “named aggregation”
355
385
 
356
- >>> from maxframe.dataframe.groupby import NamedAgg
386
+ >>> from maxframe.dataframe import NamedAgg
357
387
  >>> df.groupby("A").agg(
358
388
  ... b_min=NamedAgg(column="B", aggfunc="min"),
359
389
  ... c_sum=NamedAgg(column="C", aggfunc="sum")).execute()
@@ -376,6 +406,10 @@ def agg(groupby, func=None, method="auto", *args, **kwargs):
376
406
  f"Method {method} is not available, please specify 'tree' or 'shuffle"
377
407
  )
378
408
 
409
+ combine_size = (
410
+ kwargs.pop("combine_size", None) or options.dpe.reduction.combine_size
411
+ )
412
+
379
413
  if not is_funcs_aggregate(func, ndim=groupby.ndim):
380
414
  # pass index to transform, otherwise it will lose name info for index
381
415
  agg_result = build_mock_agg_result(
@@ -400,5 +434,8 @@ def agg(groupby, func=None, method="auto", *args, **kwargs):
400
434
  method=method,
401
435
  raw_groupby_params=groupby.op.groupby_params,
402
436
  groupby_params=groupby.op.groupby_params,
437
+ combine_size=combine_size,
438
+ chunk_store_limit=options.chunk_store_limit,
439
+ use_inf_as_na=get_pd_option("mode.use_inf_as_na", False),
403
440
  )
404
441
  return agg_op(groupby)
@@ -60,7 +60,7 @@ class GroupByApply(
60
60
  maybe_agg = BoolField("maybe_agg", default=None)
61
61
 
62
62
  logic_key = StringField("logic_key", default=None)
63
- func_key = AnyField("func_key", default=None)
63
+ func_ref = AnyField("func_ref", default=None)
64
64
  need_clean_up_func = BoolField("need_clean_up_func", default=False)
65
65
 
66
66
  def __init__(self, output_types=None, **kw):
@@ -22,15 +22,18 @@ from ...core import OutputType
22
22
  from ...lib.version import parse as parse_version
23
23
  from ...serialization.serializables import (
24
24
  DictField,
25
+ FieldTypes,
25
26
  FunctionField,
26
27
  Int32Field,
28
+ ListField,
27
29
  TupleField,
28
30
  )
29
31
  from ...udf import BuiltinFunction, MarkedFunction
30
- from ...utils import copy_if_possible
32
+ from ...utils import copy_if_possible, make_dtype, make_dtypes
31
33
  from ..core import (
32
34
  DATAFRAME_GROUPBY_TYPE,
33
35
  GROUPBY_TYPE,
36
+ INDEX_TYPE,
34
37
  DataFrameGroupBy,
35
38
  IndexValue,
36
39
  SeriesGroupBy,
@@ -42,8 +45,6 @@ from ..utils import (
42
45
  copy_func_scheduling_hints,
43
46
  infer_dataframe_return_value,
44
47
  make_column_list,
45
- make_dtype,
46
- make_dtypes,
47
48
  parse_index,
48
49
  validate_output_types,
49
50
  )
@@ -61,6 +62,8 @@ class GroupByApplyChunk(DataFrameOperatorMixin, DataFrameOperator):
61
62
  kwargs = DictField("kwargs", default=None)
62
63
 
63
64
  groupby_params = DictField("groupby_params", default=None)
65
+ order_cols = ListField("order_cols", default=None)
66
+ ascending = ListField("ascending", FieldTypes.bool, default_factory=lambda: [True])
64
67
 
65
68
  def __init__(self, output_type=None, **kw):
66
69
  if output_type:
@@ -240,14 +243,14 @@ class GroupByApplyChunk(DataFrameOperatorMixin, DataFrameOperator):
240
243
  if self.output_types:
241
244
  inferred_meta.output_type = self.output_types[0]
242
245
  inferred_meta.dtypes = dtypes if dtypes is not None else inferred_meta.dtypes
246
+ if isinstance(index, INDEX_TYPE):
247
+ index = index.index_value
243
248
  if index is not None:
244
249
  inferred_meta.index_value = (
245
250
  parse_index(index)
246
251
  if index is not input_groupby.index_value
247
252
  else input_groupby.index_value
248
253
  )
249
- else:
250
- inferred_meta.index_value = inferred_meta.index_value
251
254
  inferred_meta.elementwise = elementwise or inferred_meta.elementwise
252
255
  return inferred_meta
253
256
 
@@ -272,6 +275,8 @@ def df_groupby_apply_chunk(
272
275
  output_type=None,
273
276
  index=None,
274
277
  skip_infer=False,
278
+ order_cols=None,
279
+ ascending=True,
275
280
  args=(),
276
281
  **kwargs,
277
282
  ):
@@ -373,6 +378,13 @@ def df_groupby_apply_chunk(
373
378
  if skip_infer and output_type is None:
374
379
  output_type = OutputType.df_or_series
375
380
 
381
+ if order_cols and not isinstance(order_cols, list):
382
+ order_cols = [order_cols]
383
+ if not isinstance(ascending, list):
384
+ ascending = [ascending]
385
+ elif len(order_cols) != len(ascending):
386
+ raise ValueError("order_cols and ascending must have same length")
387
+
376
388
  # bind args and kwargs
377
389
  op = GroupByApplyChunk(
378
390
  func=func,
@@ -380,6 +392,8 @@ def df_groupby_apply_chunk(
380
392
  output_type=output_type,
381
393
  args=args,
382
394
  kwargs=kwargs,
395
+ order_cols=order_cols,
396
+ ascending=ascending,
383
397
  groupby_params=dataframe_groupby.op.groupby_params,
384
398
  )
385
399
 
@@ -12,30 +12,37 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
- from collections import namedtuple
16
- from typing import List
15
+ import os
16
+ import warnings
17
+ from typing import Any, Dict, List
17
18
 
18
19
  import pandas as pd
19
20
 
20
21
  from ... import opcodes
21
22
  from ...core import ENTITY_TYPE, Entity, EntityData, OutputType
22
23
  from ...core.operator import MapReduceOperator
23
- from ...serialization.serializables import AnyField, BoolField, Int32Field
24
- from ...utils import lazy_import, no_default
25
- from ..core import SERIES_TYPE
24
+ from ...env import MAXFRAME_INSIDE_TASK
25
+ from ...serialization import PickleContainer
26
+ from ...serialization.serializables import AnyField, BoolField, DictField, Int32Field
27
+ from ...udf import BuiltinFunction
28
+ from ...utils import find_objects, lazy_import, no_default
29
+ from ..core import GROUPBY_TYPE, SERIES_TYPE
26
30
  from ..initializer import Series as asseries
27
- from ..operators import DataFrameOperatorMixin
28
- from ..utils import build_df, build_series, parse_index
31
+ from ..operators import DataFrameOperator, DataFrameOperatorMixin
32
+ from ..utils import (
33
+ build_df,
34
+ build_series,
35
+ call_groupby_with_params,
36
+ make_column_list,
37
+ parse_index,
38
+ )
29
39
 
30
40
  cudf = lazy_import("cudf")
31
41
 
32
42
 
33
- NamedAgg = namedtuple("NamedAgg", ["column", "aggfunc"])
34
-
35
-
36
43
  class DataFrameGroupByOp(MapReduceOperator, DataFrameOperatorMixin):
37
44
  _op_type_ = opcodes.GROUPBY
38
- _legacy_name = "DataFrameGroupByOperator"
45
+ _legacy_name = "DataFrameGroupByOperator" # since v2.0.0
39
46
 
40
47
  by = AnyField(
41
48
  "by",
@@ -61,6 +68,12 @@ class DataFrameGroupByOp(MapReduceOperator, DataFrameOperatorMixin):
61
68
  output_types = [OutputType.series_groupby]
62
69
  self.output_types = output_types
63
70
 
71
+ def has_custom_code(self) -> bool:
72
+ callable_bys = find_objects(self.by, types=PickleContainer, checker=callable)
73
+ if not callable_bys:
74
+ return False
75
+ return any(not isinstance(fun, BuiltinFunction) for fun in callable_bys)
76
+
64
77
  @property
65
78
  def is_dataframe_obj(self):
66
79
  return self.output_types[0] in (
@@ -93,8 +106,8 @@ class DataFrameGroupByOp(MapReduceOperator, DataFrameOperatorMixin):
93
106
  ensure_string=True,
94
107
  )
95
108
 
96
- new_kw = self.groupby_params
97
- new_kw.update(kwargs)
109
+ new_kw = self.groupby_params.copy()
110
+ new_kw.update({k: v for k, v in kwargs.items()})
98
111
  if isinstance(new_kw["by"], list):
99
112
  new_by = []
100
113
  for v in new_kw["by"]:
@@ -110,7 +123,7 @@ class DataFrameGroupByOp(MapReduceOperator, DataFrameOperatorMixin):
110
123
  else:
111
124
  new_by.append(v)
112
125
  new_kw["by"] = new_by
113
- return mock_obj.groupby(**new_kw)
126
+ return call_groupby_with_params(mock_obj, new_kw)
114
127
 
115
128
  @classmethod
116
129
  def _set_inputs(cls, op: "DataFrameGroupByOp", inputs: List[EntityData]):
@@ -118,8 +131,8 @@ class DataFrameGroupByOp(MapReduceOperator, DataFrameOperatorMixin):
118
131
  inputs_iter = iter(op._inputs[1:])
119
132
  if len(inputs) > 1:
120
133
  by = []
121
- for k in op.by:
122
- if isinstance(k, SERIES_TYPE):
134
+ for k in op.by or ():
135
+ if isinstance(k, ENTITY_TYPE):
123
136
  by.append(next(inputs_iter))
124
137
  else:
125
138
  by.append(k)
@@ -240,3 +253,90 @@ def groupby(df, by=None, level=None, as_index=True, sort=True, group_keys=True):
240
253
  output_types=output_types,
241
254
  )
242
255
  return op(df)
256
+
257
+
258
+ class BaseGroupByWindowOp(DataFrameOperatorMixin, DataFrameOperator):
259
+ _op_module_ = "dataframe.groupby"
260
+
261
+ groupby_params = DictField("groupby_params", default=None)
262
+ window_params = DictField("window_params", default=None)
263
+
264
+ def __init__(self, output_types=None, **kw):
265
+ super().__init__(_output_types=output_types, **kw)
266
+
267
+ def _calc_mock_result_df(self, mock_groupby):
268
+ raise NotImplementedError
269
+
270
+ def get_sort_cols_to_asc(self) -> Dict[Any, bool]:
271
+ order_cols = self.window_params.get("order_cols") or []
272
+ asc_list = self.window_params.get("ascending") or [True]
273
+ if len(asc_list) < len(order_cols):
274
+ asc_list = [asc_list[0]] * len(order_cols)
275
+ return dict(zip(order_cols, asc_list))
276
+
277
+ def _calc_out_dtypes(self, in_groupby):
278
+ in_obj = in_groupby
279
+ groupby_params = in_groupby.op.groupby_params
280
+ while isinstance(in_obj, GROUPBY_TYPE):
281
+ in_obj = in_obj.inputs[0]
282
+
283
+ if in_groupby.ndim == 1:
284
+ selection = None
285
+ else:
286
+ by_cols = (
287
+ make_column_list(groupby_params.get("by"), in_groupby.dtypes) or []
288
+ )
289
+ selection = groupby_params.get("selection")
290
+ if not selection:
291
+ selection = [c for c in in_obj.dtypes.index if c not in by_cols]
292
+
293
+ mock_groupby = in_groupby.op.build_mock_groupby(
294
+ group_keys=False, selection=selection
295
+ )
296
+
297
+ result_df = self._calc_mock_result_df(mock_groupby)
298
+
299
+ if isinstance(result_df, pd.DataFrame):
300
+ self.output_types = [OutputType.dataframe]
301
+ return result_df.dtypes
302
+ else:
303
+ self.output_types = [OutputType.series]
304
+ return result_df.name, result_df.dtype
305
+
306
+ def __call__(self, groupby):
307
+ in_df = groupby
308
+ while in_df.op.output_types[0] not in (OutputType.dataframe, OutputType.series):
309
+ in_df = in_df.inputs[0]
310
+
311
+ out_dtypes = self._calc_out_dtypes(groupby)
312
+
313
+ kw = in_df.params.copy()
314
+ if self.output_types[0] == OutputType.dataframe:
315
+ kw.update(
316
+ dict(
317
+ columns_value=parse_index(out_dtypes.index, store_data=True),
318
+ dtypes=out_dtypes,
319
+ shape=(groupby.shape[0], len(out_dtypes)),
320
+ )
321
+ )
322
+ else:
323
+ name, dtype = out_dtypes
324
+ kw.update(dtype=dtype, name=name, shape=(groupby.shape[0],))
325
+ return self.new_tileable([in_df], **kw)
326
+
327
+
328
+ def _make_named_agg_compat(name): # pragma: no cover
329
+ # to make imports compatible
330
+ from ..reduction import NamedAgg
331
+
332
+ if name == "NamedAgg":
333
+ if MAXFRAME_INSIDE_TASK not in os.environ:
334
+ warnings.warn(
335
+ "Please import NamedAgg from maxframe.dataframe",
336
+ DeprecationWarning,
337
+ )
338
+ return NamedAgg
339
+ raise AttributeError(f"module {__name__} has no attribute {name}")
340
+
341
+
342
+ __getattr__ = _make_named_agg_compat