maxframe 2.0.0b2__cp37-cp37m-win32.whl → 2.3.0rc1__cp37-cp37m-win32.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of maxframe might be problematic. Click here for more details.

Files changed (443) hide show
  1. maxframe/__init__.py +1 -0
  2. maxframe/_utils.cp37-win32.pyd +0 -0
  3. maxframe/_utils.pyx +14 -1
  4. maxframe/codegen/core.py +9 -8
  5. maxframe/codegen/spe/core.py +1 -1
  6. maxframe/codegen/spe/dataframe/__init__.py +1 -0
  7. maxframe/codegen/spe/dataframe/accessors/base.py +18 -0
  8. maxframe/codegen/spe/dataframe/accessors/dict_.py +25 -130
  9. maxframe/codegen/spe/dataframe/accessors/list_.py +12 -48
  10. maxframe/codegen/spe/dataframe/accessors/struct_.py +28 -0
  11. maxframe/codegen/spe/dataframe/arithmetic.py +7 -2
  12. maxframe/codegen/spe/dataframe/groupby.py +88 -0
  13. maxframe/codegen/spe/dataframe/indexing.py +99 -4
  14. maxframe/codegen/spe/dataframe/merge.py +38 -1
  15. maxframe/codegen/spe/dataframe/misc.py +11 -33
  16. maxframe/codegen/spe/dataframe/reduction.py +32 -9
  17. maxframe/codegen/spe/dataframe/reshape.py +46 -0
  18. maxframe/codegen/spe/dataframe/sort.py +39 -18
  19. maxframe/codegen/spe/dataframe/tests/accessors/test_dict.py +9 -15
  20. maxframe/codegen/spe/dataframe/tests/accessors/test_list.py +4 -7
  21. maxframe/codegen/spe/dataframe/tests/accessors/test_struct.py +75 -0
  22. maxframe/codegen/spe/dataframe/tests/indexing/test_iloc.py +20 -1
  23. maxframe/codegen/spe/dataframe/tests/indexing/test_loc.py +35 -0
  24. maxframe/codegen/spe/dataframe/tests/misc/test_misc.py +0 -32
  25. maxframe/codegen/spe/dataframe/tests/test_groupby.py +81 -18
  26. maxframe/codegen/spe/dataframe/tests/test_merge.py +27 -1
  27. maxframe/codegen/spe/dataframe/tests/test_reduction.py +13 -0
  28. maxframe/codegen/spe/dataframe/tests/test_reshape.py +79 -0
  29. maxframe/codegen/spe/dataframe/tests/test_sort.py +20 -0
  30. maxframe/codegen/spe/dataframe/tseries.py +9 -0
  31. maxframe/codegen/spe/learn/contrib/lightgbm.py +4 -3
  32. maxframe/codegen/spe/learn/contrib/tests/test_xgboost.py +2 -1
  33. maxframe/codegen/spe/learn/metrics/__init__.py +1 -1
  34. maxframe/codegen/spe/learn/metrics/_ranking.py +76 -0
  35. maxframe/codegen/spe/learn/metrics/pairwise.py +51 -0
  36. maxframe/codegen/spe/learn/metrics/tests/test_pairwise.py +36 -0
  37. maxframe/codegen/spe/learn/metrics/tests/test_ranking.py +59 -0
  38. maxframe/codegen/spe/tensor/__init__.py +3 -0
  39. maxframe/codegen/spe/tensor/datasource.py +1 -0
  40. maxframe/codegen/spe/tensor/fft.py +74 -0
  41. maxframe/codegen/spe/tensor/linalg.py +29 -2
  42. maxframe/codegen/spe/tensor/misc.py +79 -25
  43. maxframe/codegen/spe/tensor/spatial.py +45 -0
  44. maxframe/codegen/spe/tensor/statistics.py +44 -0
  45. maxframe/codegen/spe/tensor/tests/test_fft.py +64 -0
  46. maxframe/codegen/spe/tensor/tests/test_linalg.py +15 -1
  47. maxframe/codegen/spe/tensor/tests/test_misc.py +52 -2
  48. maxframe/codegen/spe/tensor/tests/test_spatial.py +33 -0
  49. maxframe/codegen/spe/tensor/tests/test_statistics.py +15 -1
  50. maxframe/codegen/spe/tests/test_spe_codegen.py +6 -12
  51. maxframe/codegen/spe/utils.py +2 -0
  52. maxframe/config/config.py +73 -9
  53. maxframe/config/tests/test_validators.py +13 -1
  54. maxframe/config/validators.py +49 -0
  55. maxframe/conftest.py +54 -17
  56. maxframe/core/accessor.py +2 -2
  57. maxframe/core/base.py +2 -1
  58. maxframe/core/entity/core.py +5 -0
  59. maxframe/core/entity/tileables.py +3 -1
  60. maxframe/core/graph/core.cp37-win32.pyd +0 -0
  61. maxframe/core/graph/entity.py +8 -3
  62. maxframe/core/mode.py +6 -1
  63. maxframe/core/operator/base.py +9 -2
  64. maxframe/core/operator/core.py +10 -2
  65. maxframe/core/operator/utils.py +13 -0
  66. maxframe/dataframe/__init__.py +12 -5
  67. maxframe/dataframe/accessors/__init__.py +1 -1
  68. maxframe/dataframe/accessors/compat.py +45 -0
  69. maxframe/dataframe/accessors/datetime_/__init__.py +4 -1
  70. maxframe/dataframe/accessors/dict_/contains.py +7 -16
  71. maxframe/dataframe/accessors/dict_/core.py +48 -0
  72. maxframe/dataframe/accessors/dict_/getitem.py +17 -21
  73. maxframe/dataframe/accessors/dict_/length.py +7 -16
  74. maxframe/dataframe/accessors/dict_/remove.py +6 -18
  75. maxframe/dataframe/accessors/dict_/setitem.py +8 -18
  76. maxframe/dataframe/accessors/dict_/tests/test_dict_accessor.py +62 -22
  77. maxframe/dataframe/accessors/list_/__init__.py +2 -2
  78. maxframe/dataframe/accessors/list_/core.py +48 -0
  79. maxframe/dataframe/accessors/list_/getitem.py +12 -19
  80. maxframe/dataframe/accessors/list_/length.py +7 -16
  81. maxframe/dataframe/accessors/list_/tests/test_list_accessor.py +11 -9
  82. maxframe/dataframe/accessors/string_/__init__.py +4 -1
  83. maxframe/dataframe/accessors/struct_/__init__.py +37 -0
  84. maxframe/dataframe/accessors/struct_/accessor.py +39 -0
  85. maxframe/dataframe/accessors/struct_/core.py +43 -0
  86. maxframe/dataframe/accessors/struct_/dtypes.py +53 -0
  87. maxframe/dataframe/accessors/struct_/field.py +123 -0
  88. maxframe/dataframe/accessors/struct_/tests/__init__.py +13 -0
  89. maxframe/dataframe/accessors/struct_/tests/test_struct_accessor.py +91 -0
  90. maxframe/dataframe/arithmetic/__init__.py +18 -4
  91. maxframe/dataframe/arithmetic/between.py +106 -0
  92. maxframe/dataframe/arithmetic/dot.py +237 -0
  93. maxframe/dataframe/arithmetic/maximum.py +33 -0
  94. maxframe/dataframe/arithmetic/minimum.py +33 -0
  95. maxframe/dataframe/arithmetic/{around.py → round.py} +11 -7
  96. maxframe/dataframe/core.py +161 -224
  97. maxframe/dataframe/datasource/__init__.py +18 -0
  98. maxframe/dataframe/datasource/core.py +6 -0
  99. maxframe/dataframe/datasource/direct.py +57 -0
  100. maxframe/dataframe/datasource/from_dict.py +124 -0
  101. maxframe/dataframe/datasource/from_index.py +1 -1
  102. maxframe/dataframe/datasource/from_records.py +77 -0
  103. maxframe/dataframe/datasource/from_tensor.py +109 -41
  104. maxframe/dataframe/datasource/read_csv.py +21 -14
  105. maxframe/dataframe/datasource/read_odps_query.py +29 -6
  106. maxframe/dataframe/datasource/read_odps_table.py +32 -10
  107. maxframe/dataframe/datasource/read_parquet.py +38 -39
  108. maxframe/dataframe/datasource/tests/test_datasource.py +37 -0
  109. maxframe/dataframe/datastore/__init__.py +11 -1
  110. maxframe/dataframe/datastore/direct.py +268 -0
  111. maxframe/dataframe/datastore/to_csv.py +29 -41
  112. maxframe/dataframe/datastore/to_odps.py +36 -4
  113. maxframe/dataframe/extensions/__init__.py +20 -4
  114. maxframe/dataframe/extensions/apply_chunk.py +32 -6
  115. maxframe/dataframe/extensions/cartesian_chunk.py +153 -0
  116. maxframe/dataframe/extensions/collect_kv.py +126 -0
  117. maxframe/dataframe/extensions/extract_kv.py +177 -0
  118. maxframe/dataframe/extensions/flatjson.py +2 -1
  119. maxframe/dataframe/extensions/map_reduce.py +263 -0
  120. maxframe/dataframe/extensions/rebalance.py +62 -0
  121. maxframe/dataframe/extensions/tests/test_apply_chunk.py +9 -2
  122. maxframe/dataframe/extensions/tests/test_extensions.py +54 -0
  123. maxframe/dataframe/extensions/tests/test_map_reduce.py +135 -0
  124. maxframe/dataframe/groupby/__init__.py +17 -2
  125. maxframe/dataframe/groupby/aggregation.py +86 -49
  126. maxframe/dataframe/groupby/apply.py +1 -1
  127. maxframe/dataframe/groupby/apply_chunk.py +19 -5
  128. maxframe/dataframe/groupby/core.py +116 -16
  129. maxframe/dataframe/groupby/cum.py +4 -25
  130. maxframe/dataframe/groupby/expanding.py +264 -0
  131. maxframe/dataframe/groupby/fill.py +1 -1
  132. maxframe/dataframe/groupby/getitem.py +12 -5
  133. maxframe/dataframe/groupby/head.py +11 -1
  134. maxframe/dataframe/groupby/rank.py +136 -0
  135. maxframe/dataframe/groupby/rolling.py +206 -0
  136. maxframe/dataframe/groupby/shift.py +114 -0
  137. maxframe/dataframe/groupby/tests/test_groupby.py +0 -5
  138. maxframe/dataframe/indexing/__init__.py +22 -2
  139. maxframe/dataframe/indexing/droplevel.py +195 -0
  140. maxframe/dataframe/indexing/filter.py +169 -0
  141. maxframe/dataframe/indexing/get_level_values.py +76 -0
  142. maxframe/dataframe/indexing/iat.py +45 -0
  143. maxframe/dataframe/indexing/iloc.py +152 -12
  144. maxframe/dataframe/indexing/insert.py +46 -18
  145. maxframe/dataframe/indexing/loc.py +287 -7
  146. maxframe/dataframe/indexing/reindex.py +14 -5
  147. maxframe/dataframe/indexing/rename.py +6 -0
  148. maxframe/dataframe/indexing/rename_axis.py +2 -2
  149. maxframe/dataframe/indexing/reorder_levels.py +143 -0
  150. maxframe/dataframe/indexing/reset_index.py +33 -6
  151. maxframe/dataframe/indexing/sample.py +8 -0
  152. maxframe/dataframe/indexing/setitem.py +3 -3
  153. maxframe/dataframe/indexing/swaplevel.py +185 -0
  154. maxframe/dataframe/indexing/take.py +99 -0
  155. maxframe/dataframe/indexing/truncate.py +140 -0
  156. maxframe/dataframe/indexing/where.py +0 -11
  157. maxframe/dataframe/indexing/xs.py +148 -0
  158. maxframe/dataframe/merge/__init__.py +15 -1
  159. maxframe/dataframe/merge/append.py +97 -98
  160. maxframe/dataframe/merge/combine.py +244 -0
  161. maxframe/dataframe/merge/combine_first.py +120 -0
  162. maxframe/dataframe/merge/compare.py +387 -0
  163. maxframe/dataframe/merge/concat.py +183 -0
  164. maxframe/dataframe/merge/update.py +271 -0
  165. maxframe/dataframe/misc/__init__.py +28 -11
  166. maxframe/dataframe/misc/_duplicate.py +10 -4
  167. maxframe/dataframe/misc/apply.py +1 -1
  168. maxframe/dataframe/misc/check_unique.py +82 -0
  169. maxframe/dataframe/misc/clip.py +145 -0
  170. maxframe/dataframe/misc/describe.py +175 -9
  171. maxframe/dataframe/misc/drop.py +31 -0
  172. maxframe/dataframe/misc/drop_duplicates.py +2 -2
  173. maxframe/dataframe/misc/duplicated.py +2 -2
  174. maxframe/dataframe/misc/get_dummies.py +5 -1
  175. maxframe/dataframe/misc/infer_dtypes.py +251 -0
  176. maxframe/dataframe/misc/isin.py +2 -2
  177. maxframe/dataframe/misc/map.py +125 -18
  178. maxframe/dataframe/misc/repeat.py +159 -0
  179. maxframe/dataframe/misc/tests/test_misc.py +48 -3
  180. maxframe/dataframe/misc/to_numeric.py +3 -0
  181. maxframe/dataframe/misc/transform.py +12 -5
  182. maxframe/dataframe/misc/transpose.py +13 -1
  183. maxframe/dataframe/misc/valid_index.py +115 -0
  184. maxframe/dataframe/misc/value_counts.py +38 -4
  185. maxframe/dataframe/missing/checkna.py +14 -6
  186. maxframe/dataframe/missing/dropna.py +5 -0
  187. maxframe/dataframe/missing/fillna.py +1 -1
  188. maxframe/dataframe/missing/replace.py +7 -4
  189. maxframe/dataframe/reduction/__init__.py +35 -16
  190. maxframe/dataframe/reduction/aggregation.py +43 -14
  191. maxframe/dataframe/reduction/all.py +2 -2
  192. maxframe/dataframe/reduction/any.py +2 -2
  193. maxframe/dataframe/reduction/argmax.py +103 -0
  194. maxframe/dataframe/reduction/argmin.py +103 -0
  195. maxframe/dataframe/reduction/core.py +80 -24
  196. maxframe/dataframe/reduction/count.py +13 -9
  197. maxframe/dataframe/reduction/cov.py +166 -0
  198. maxframe/dataframe/reduction/cummax.py +2 -2
  199. maxframe/dataframe/reduction/cummin.py +2 -2
  200. maxframe/dataframe/reduction/cumprod.py +2 -2
  201. maxframe/dataframe/reduction/cumsum.py +2 -2
  202. maxframe/dataframe/reduction/custom_reduction.py +2 -2
  203. maxframe/dataframe/reduction/idxmax.py +185 -0
  204. maxframe/dataframe/reduction/idxmin.py +185 -0
  205. maxframe/dataframe/reduction/kurtosis.py +37 -30
  206. maxframe/dataframe/reduction/max.py +2 -2
  207. maxframe/dataframe/reduction/mean.py +9 -7
  208. maxframe/dataframe/reduction/median.py +2 -2
  209. maxframe/dataframe/reduction/min.py +2 -2
  210. maxframe/dataframe/reduction/mode.py +144 -0
  211. maxframe/dataframe/reduction/nunique.py +19 -11
  212. maxframe/dataframe/reduction/prod.py +18 -13
  213. maxframe/dataframe/reduction/reduction_size.py +2 -2
  214. maxframe/dataframe/reduction/sem.py +13 -9
  215. maxframe/dataframe/reduction/skew.py +31 -27
  216. maxframe/dataframe/reduction/str_concat.py +10 -7
  217. maxframe/dataframe/reduction/sum.py +18 -14
  218. maxframe/dataframe/reduction/tests/test_reduction.py +12 -0
  219. maxframe/dataframe/reduction/unique.py +20 -3
  220. maxframe/dataframe/reduction/var.py +16 -12
  221. maxframe/dataframe/reshape/__init__.py +38 -0
  222. maxframe/dataframe/{misc → reshape}/pivot.py +1 -0
  223. maxframe/dataframe/{misc → reshape}/pivot_table.py +1 -0
  224. maxframe/dataframe/reshape/unstack.py +114 -0
  225. maxframe/dataframe/sort/__init__.py +16 -1
  226. maxframe/dataframe/sort/argsort.py +68 -0
  227. maxframe/dataframe/sort/core.py +2 -1
  228. maxframe/dataframe/sort/nlargest.py +238 -0
  229. maxframe/dataframe/sort/nsmallest.py +228 -0
  230. maxframe/dataframe/sort/rank.py +147 -0
  231. maxframe/dataframe/statistics/__init__.py +3 -3
  232. maxframe/dataframe/statistics/corr.py +1 -0
  233. maxframe/dataframe/statistics/quantile.py +2 -2
  234. maxframe/dataframe/tests/test_typing.py +104 -0
  235. maxframe/dataframe/tests/test_utils.py +66 -2
  236. maxframe/dataframe/tseries/__init__.py +19 -0
  237. maxframe/dataframe/tseries/at_time.py +61 -0
  238. maxframe/dataframe/tseries/between_time.py +122 -0
  239. maxframe/dataframe/typing_.py +185 -0
  240. maxframe/dataframe/utils.py +125 -52
  241. maxframe/dataframe/window/aggregation.py +8 -4
  242. maxframe/dataframe/window/core.py +14 -1
  243. maxframe/dataframe/window/ewm.py +1 -3
  244. maxframe/dataframe/window/expanding.py +37 -35
  245. maxframe/dataframe/window/rolling.py +49 -39
  246. maxframe/dataframe/window/tests/test_expanding.py +1 -7
  247. maxframe/dataframe/window/tests/test_rolling.py +1 -1
  248. maxframe/env.py +7 -4
  249. maxframe/errors.py +2 -2
  250. maxframe/io/odpsio/schema.py +9 -3
  251. maxframe/io/odpsio/tableio.py +7 -2
  252. maxframe/io/odpsio/tests/test_schema.py +198 -83
  253. maxframe/learn/__init__.py +10 -2
  254. maxframe/learn/cluster/__init__.py +15 -0
  255. maxframe/learn/cluster/_kmeans.py +782 -0
  256. maxframe/learn/contrib/llm/core.py +18 -7
  257. maxframe/learn/contrib/llm/deploy/__init__.py +13 -0
  258. maxframe/learn/contrib/llm/deploy/config.py +221 -0
  259. maxframe/learn/contrib/llm/deploy/core.py +247 -0
  260. maxframe/learn/contrib/llm/deploy/framework.py +35 -0
  261. maxframe/learn/contrib/llm/deploy/loader.py +360 -0
  262. maxframe/learn/contrib/llm/deploy/tests/__init__.py +13 -0
  263. maxframe/learn/contrib/llm/deploy/tests/test_register_models.py +359 -0
  264. maxframe/learn/contrib/llm/models/__init__.py +1 -0
  265. maxframe/learn/contrib/llm/models/dashscope.py +12 -6
  266. maxframe/learn/contrib/llm/models/managed.py +76 -11
  267. maxframe/learn/contrib/llm/models/openai.py +72 -0
  268. maxframe/learn/contrib/llm/tests/__init__.py +13 -0
  269. maxframe/learn/contrib/llm/tests/test_core.py +34 -0
  270. maxframe/learn/contrib/llm/tests/test_openai.py +187 -0
  271. maxframe/learn/contrib/llm/tests/test_text_gen.py +155 -0
  272. maxframe/learn/contrib/llm/text.py +348 -42
  273. maxframe/learn/contrib/models.py +4 -1
  274. maxframe/learn/contrib/xgboost/classifier.py +2 -0
  275. maxframe/learn/contrib/xgboost/core.py +113 -4
  276. maxframe/learn/contrib/xgboost/predict.py +4 -2
  277. maxframe/learn/contrib/xgboost/regressor.py +5 -0
  278. maxframe/learn/contrib/xgboost/train.py +7 -2
  279. maxframe/learn/core.py +66 -0
  280. maxframe/learn/linear_model/_base.py +58 -1
  281. maxframe/learn/linear_model/_lin_reg.py +1 -1
  282. maxframe/learn/metrics/__init__.py +6 -0
  283. maxframe/learn/metrics/_classification.py +145 -0
  284. maxframe/learn/metrics/_ranking.py +477 -0
  285. maxframe/learn/metrics/_scorer.py +60 -0
  286. maxframe/learn/metrics/pairwise/__init__.py +21 -0
  287. maxframe/learn/metrics/pairwise/core.py +77 -0
  288. maxframe/learn/metrics/pairwise/cosine.py +115 -0
  289. maxframe/learn/metrics/pairwise/euclidean.py +176 -0
  290. maxframe/learn/metrics/pairwise/haversine.py +96 -0
  291. maxframe/learn/metrics/pairwise/manhattan.py +80 -0
  292. maxframe/learn/metrics/pairwise/pairwise.py +127 -0
  293. maxframe/learn/metrics/pairwise/pairwise_distances_topk.py +121 -0
  294. maxframe/learn/metrics/pairwise/rbf_kernel.py +51 -0
  295. maxframe/learn/metrics/tests/__init__.py +13 -0
  296. maxframe/learn/metrics/tests/test_scorer.py +26 -0
  297. maxframe/learn/preprocessing/_data/min_max_scaler.py +34 -23
  298. maxframe/learn/preprocessing/_data/standard_scaler.py +34 -25
  299. maxframe/learn/utils/__init__.py +2 -1
  300. maxframe/learn/utils/checks.py +1 -2
  301. maxframe/learn/utils/core.py +59 -0
  302. maxframe/learn/utils/extmath.py +79 -9
  303. maxframe/learn/utils/odpsio.py +262 -0
  304. maxframe/learn/utils/validation.py +2 -2
  305. maxframe/lib/compat.py +40 -0
  306. maxframe/lib/dtypes_extension/__init__.py +16 -1
  307. maxframe/lib/dtypes_extension/_fake_arrow_dtype.py +604 -0
  308. maxframe/lib/dtypes_extension/blob.py +304 -0
  309. maxframe/lib/dtypes_extension/dtypes.py +40 -0
  310. maxframe/lib/dtypes_extension/tests/test_blob.py +88 -0
  311. maxframe/lib/dtypes_extension/tests/test_dtypes.py +16 -1
  312. maxframe/lib/dtypes_extension/tests/test_fake_arrow_dtype.py +75 -0
  313. maxframe/lib/filesystem/_oss_lib/common.py +124 -50
  314. maxframe/lib/filesystem/_oss_lib/glob.py +1 -1
  315. maxframe/lib/filesystem/_oss_lib/handle.py +21 -25
  316. maxframe/lib/filesystem/base.py +1 -1
  317. maxframe/lib/filesystem/core.py +1 -1
  318. maxframe/lib/filesystem/oss.py +115 -46
  319. maxframe/lib/filesystem/tests/test_oss.py +74 -36
  320. maxframe/lib/mmh3.cp37-win32.pyd +0 -0
  321. maxframe/lib/wrapped_pickle.py +10 -0
  322. maxframe/opcodes.py +41 -15
  323. maxframe/protocol.py +12 -0
  324. maxframe/remote/core.py +4 -0
  325. maxframe/serialization/__init__.py +11 -2
  326. maxframe/serialization/arrow.py +38 -13
  327. maxframe/serialization/blob.py +32 -0
  328. maxframe/serialization/core.cp37-win32.pyd +0 -0
  329. maxframe/serialization/core.pyx +39 -1
  330. maxframe/serialization/exception.py +2 -4
  331. maxframe/serialization/numpy.py +11 -0
  332. maxframe/serialization/pandas.py +46 -9
  333. maxframe/serialization/serializables/core.py +2 -2
  334. maxframe/serialization/tests/test_serial.py +31 -4
  335. maxframe/tensor/__init__.py +38 -8
  336. maxframe/tensor/arithmetic/__init__.py +19 -10
  337. maxframe/tensor/arithmetic/core.py +2 -2
  338. maxframe/tensor/arithmetic/iscomplexobj.py +53 -0
  339. maxframe/tensor/arithmetic/tests/test_arithmetic.py +6 -9
  340. maxframe/tensor/core.py +6 -2
  341. maxframe/tensor/datasource/tests/test_datasource.py +2 -1
  342. maxframe/tensor/extensions/__init__.py +2 -0
  343. maxframe/tensor/extensions/apply_chunk.py +3 -3
  344. maxframe/tensor/extensions/rebalance.py +65 -0
  345. maxframe/tensor/fft/__init__.py +32 -0
  346. maxframe/tensor/fft/core.py +168 -0
  347. maxframe/tensor/fft/fft.py +112 -0
  348. maxframe/tensor/fft/fft2.py +118 -0
  349. maxframe/tensor/fft/fftfreq.py +80 -0
  350. maxframe/tensor/fft/fftn.py +123 -0
  351. maxframe/tensor/fft/fftshift.py +79 -0
  352. maxframe/tensor/fft/hfft.py +112 -0
  353. maxframe/tensor/fft/ifft.py +114 -0
  354. maxframe/tensor/fft/ifft2.py +115 -0
  355. maxframe/tensor/fft/ifftn.py +123 -0
  356. maxframe/tensor/fft/ifftshift.py +73 -0
  357. maxframe/tensor/fft/ihfft.py +93 -0
  358. maxframe/tensor/fft/irfft.py +118 -0
  359. maxframe/tensor/fft/irfft2.py +62 -0
  360. maxframe/tensor/fft/irfftn.py +114 -0
  361. maxframe/tensor/fft/rfft.py +116 -0
  362. maxframe/tensor/fft/rfft2.py +63 -0
  363. maxframe/tensor/fft/rfftfreq.py +87 -0
  364. maxframe/tensor/fft/rfftn.py +113 -0
  365. maxframe/tensor/indexing/fill_diagonal.py +1 -7
  366. maxframe/tensor/linalg/__init__.py +7 -0
  367. maxframe/tensor/linalg/_einsumfunc.py +1025 -0
  368. maxframe/tensor/linalg/cholesky.py +117 -0
  369. maxframe/tensor/linalg/einsum.py +339 -0
  370. maxframe/tensor/linalg/lstsq.py +100 -0
  371. maxframe/tensor/linalg/matrix_norm.py +75 -0
  372. maxframe/tensor/linalg/norm.py +249 -0
  373. maxframe/tensor/linalg/solve.py +72 -0
  374. maxframe/tensor/linalg/solve_triangular.py +2 -2
  375. maxframe/tensor/linalg/vector_norm.py +113 -0
  376. maxframe/tensor/misc/__init__.py +24 -1
  377. maxframe/tensor/misc/argwhere.py +72 -0
  378. maxframe/tensor/misc/array_split.py +46 -0
  379. maxframe/tensor/misc/broadcast_arrays.py +57 -0
  380. maxframe/tensor/misc/copyto.py +130 -0
  381. maxframe/tensor/misc/delete.py +104 -0
  382. maxframe/tensor/misc/dsplit.py +68 -0
  383. maxframe/tensor/misc/ediff1d.py +74 -0
  384. maxframe/tensor/misc/expand_dims.py +85 -0
  385. maxframe/tensor/misc/flip.py +90 -0
  386. maxframe/tensor/misc/fliplr.py +64 -0
  387. maxframe/tensor/misc/flipud.py +68 -0
  388. maxframe/tensor/misc/hsplit.py +85 -0
  389. maxframe/tensor/misc/insert.py +139 -0
  390. maxframe/tensor/misc/moveaxis.py +83 -0
  391. maxframe/tensor/misc/result_type.py +88 -0
  392. maxframe/tensor/misc/roll.py +124 -0
  393. maxframe/tensor/misc/rollaxis.py +77 -0
  394. maxframe/tensor/misc/shape.py +89 -0
  395. maxframe/tensor/misc/split.py +190 -0
  396. maxframe/tensor/misc/tile.py +109 -0
  397. maxframe/tensor/misc/vsplit.py +74 -0
  398. maxframe/tensor/reduction/array_equal.py +2 -1
  399. maxframe/tensor/sort/__init__.py +2 -0
  400. maxframe/tensor/sort/argpartition.py +98 -0
  401. maxframe/tensor/sort/partition.py +228 -0
  402. maxframe/tensor/spatial/__init__.py +15 -0
  403. maxframe/tensor/spatial/distance/__init__.py +17 -0
  404. maxframe/tensor/spatial/distance/cdist.py +421 -0
  405. maxframe/tensor/spatial/distance/pdist.py +398 -0
  406. maxframe/tensor/spatial/distance/squareform.py +153 -0
  407. maxframe/tensor/special/__init__.py +159 -21
  408. maxframe/tensor/special/airy.py +55 -0
  409. maxframe/tensor/special/bessel.py +199 -0
  410. maxframe/tensor/special/core.py +65 -4
  411. maxframe/tensor/special/ellip_func_integrals.py +155 -0
  412. maxframe/tensor/special/ellip_harm.py +55 -0
  413. maxframe/tensor/special/err_fresnel.py +223 -0
  414. maxframe/tensor/special/gamma_funcs.py +303 -0
  415. maxframe/tensor/special/hypergeometric_funcs.py +69 -0
  416. maxframe/tensor/special/info_theory.py +189 -0
  417. maxframe/tensor/special/misc.py +21 -0
  418. maxframe/tensor/statistics/__init__.py +6 -0
  419. maxframe/tensor/statistics/corrcoef.py +77 -0
  420. maxframe/tensor/statistics/cov.py +222 -0
  421. maxframe/tensor/statistics/digitize.py +126 -0
  422. maxframe/tensor/statistics/histogram.py +520 -0
  423. maxframe/tensor/statistics/median.py +85 -0
  424. maxframe/tensor/statistics/ptp.py +89 -0
  425. maxframe/tensor/utils.py +3 -3
  426. maxframe/tests/test_udf.py +61 -0
  427. maxframe/tests/test_utils.py +51 -6
  428. maxframe/tests/utils.py +0 -2
  429. maxframe/typing_.py +2 -0
  430. maxframe/udf.py +130 -9
  431. maxframe/utils.py +254 -27
  432. {maxframe-2.0.0b2.dist-info → maxframe-2.3.0rc1.dist-info}/METADATA +3 -3
  433. {maxframe-2.0.0b2.dist-info → maxframe-2.3.0rc1.dist-info}/RECORD +442 -264
  434. maxframe_client/fetcher.py +35 -4
  435. maxframe_client/session/odps.py +7 -2
  436. maxframe_client/session/task.py +8 -1
  437. maxframe_client/tests/test_fetcher.py +76 -3
  438. maxframe_client/tests/test_session.py +28 -1
  439. maxframe/dataframe/arrays.py +0 -864
  440. /maxframe/dataframe/{misc → reshape}/melt.py +0 -0
  441. /maxframe/dataframe/{misc → reshape}/stack.py +0 -0
  442. {maxframe-2.0.0b2.dist-info → maxframe-2.3.0rc1.dist-info}/WHEEL +0 -0
  443. {maxframe-2.0.0b2.dist-info → maxframe-2.3.0rc1.dist-info}/top_level.txt +0 -0
@@ -12,17 +12,13 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
- from typing import List
16
-
17
15
  from ... import opcodes
18
- from ...core import EntityData
19
16
  from ...serialization.serializables import (
20
17
  AnyField,
21
18
  BoolField,
22
19
  DictField,
23
20
  Int32Field,
24
21
  Int64Field,
25
- KeyField,
26
22
  ListField,
27
23
  StringField,
28
24
  )
@@ -33,27 +29,26 @@ from .core import DataFrameDataStore
33
29
  class DataFrameToCSV(DataFrameDataStore):
34
30
  _op_type_ = opcodes.TO_CSV
35
31
 
36
- input = KeyField("input")
37
- path = AnyField("path")
38
- sep = StringField("sep")
39
- na_rep = StringField("na_rep")
40
- float_format = StringField("float_format")
41
- columns = ListField("columns")
42
- header = AnyField("header")
43
- index = BoolField("index")
44
- index_label = AnyField("index_label")
45
- mode = StringField("mode")
46
- encoding = StringField("encoding")
47
- compression = AnyField("compression")
48
- quoting = Int32Field("quoting")
49
- quotechar = StringField("quotechar")
50
- line_terminator = StringField("line_terminator")
51
- chunksize = Int64Field("chunksize")
52
- date_format = StringField("date_format")
53
- doublequote = BoolField("doublequote")
54
- escapechar = StringField("escapechar")
55
- decimal = StringField("decimal")
56
- storage_options = DictField("storage_options")
32
+ path = AnyField("path", default=None)
33
+ sep = StringField("sep", default=None)
34
+ na_rep = StringField("na_rep", default=None)
35
+ float_format = StringField("float_format", default=None)
36
+ columns = ListField("columns", default=None)
37
+ header = AnyField("header", default=None)
38
+ index = BoolField("index", default=None)
39
+ index_label = AnyField("index_label", default=None)
40
+ mode = StringField("mode", default=None)
41
+ encoding = StringField("encoding", default=None)
42
+ compression = AnyField("compression", default=None)
43
+ quoting = Int32Field("quoting", default=None)
44
+ quotechar = StringField("quotechar", default=None)
45
+ line_terminator = StringField("line_terminator", default=None)
46
+ chunksize = Int64Field("chunksize", default=None)
47
+ date_format = StringField("date_format", default=None)
48
+ doublequote = BoolField("doublequote", default=None)
49
+ escapechar = StringField("escapechar", default=None)
50
+ decimal = StringField("decimal", default=None)
51
+ storage_options = DictField("storage_options", default=None)
57
52
 
58
53
  def __init__(self, output_types=None, **kw):
59
54
  super().__init__(_output_types=output_types, **kw)
@@ -63,19 +58,6 @@ class DataFrameToCSV(DataFrameDataStore):
63
58
  # if wildcard in path, write csv into multiple files
64
59
  return "*" not in self.path
65
60
 
66
- @property
67
- def output_stat(self):
68
- return self.output_stat
69
-
70
- @property
71
- def output_limit(self):
72
- return 1 if not self.output_stat else 2
73
-
74
- @classmethod
75
- def _set_inputs(cls, op: "DataFrameToCSV", inputs: List[EntityData]):
76
- super()._set_inputs(op, inputs)
77
- op._input = op._inputs[0]
78
-
79
61
  def __call__(self, df):
80
62
  index_value = parse_index(df.index_value.to_pandas()[:0], df)
81
63
  if df.ndim == 2:
@@ -110,13 +92,14 @@ def to_csv(
110
92
  compression="infer",
111
93
  quoting=None,
112
94
  quotechar='"',
113
- line_terminator=None,
95
+ lineterminator=None,
114
96
  chunksize=None,
115
97
  date_format=None,
116
98
  doublequote=True,
117
99
  escapechar=None,
118
100
  decimal=".",
119
101
  storage_options=None,
102
+ **kw,
120
103
  ):
121
104
  r"""
122
105
  Write object to a comma-separated values (csv) file.
@@ -169,7 +152,7 @@ def to_csv(
169
152
  will treat them as non-numeric.
170
153
  quotechar : str, default '\"'
171
154
  String of length 1. Character used to quote fields.
172
- line_terminator : str, optional
155
+ lineterminator : str, optional
173
156
  The newline character or character sequence to use in the output
174
157
  file. Defaults to `os.linesep`, which depends on the OS in which
175
158
  this method is called ('\n' for linux, '\r\n' for Windows, i.e.).
@@ -203,6 +186,11 @@ def to_csv(
203
186
  ... 'weapon': ['sai', 'bo staff']})
204
187
  >>> df.to_csv('out.csv', index=False).execute()
205
188
  """
189
+ lineterminator = lineterminator or kw.pop("line_terminator", None)
190
+ if kw:
191
+ raise TypeError(
192
+ f"to_csv() got an unexpected keyword argument '{next(iter(kw))}'"
193
+ )
206
194
 
207
195
  if mode != "w": # pragma: no cover
208
196
  raise NotImplementedError("only support to_csv with mode 'w' for now")
@@ -220,7 +208,7 @@ def to_csv(
220
208
  compression=compression,
221
209
  quoting=quoting,
222
210
  quotechar=quotechar,
223
- line_terminator=line_terminator,
211
+ line_terminator=lineterminator,
224
212
  chunksize=chunksize,
225
213
  date_format=date_format,
226
214
  doublequote=doublequote,
@@ -56,10 +56,17 @@ class DataFrameToODPSTable(DataFrameDataStore):
56
56
  index_label = ListField("index_label", FieldTypes.string, default=None)
57
57
  lifecycle = Int64Field("lifecycle", default=None)
58
58
  table_properties = DictField("table_properties", default=None)
59
+ primary_key = ListField("primary_key", FieldTypes.string, default=None)
60
+ use_generated_table_meta = BoolField("use_generated_table_meta", default=False)
59
61
 
60
62
  def __init__(self, **kw):
61
63
  super().__init__(_output_types=[OutputType.dataframe], **kw)
62
64
 
65
+ def check_inputs(self, inputs: List[TileableType]):
66
+ if self.use_generated_table_meta:
67
+ return None
68
+ return super().check_inputs(inputs)
69
+
63
70
  def __call__(self, x):
64
71
  shape = (0,) * len(x.shape)
65
72
  index_value = parse_index(x.index_value.to_pandas()[:0], x.key, "index")
@@ -100,11 +107,12 @@ def to_odps_table(
100
107
  partition: Optional[str] = None,
101
108
  partition_col: Union[None, str, List[str]] = None,
102
109
  overwrite: bool = False,
103
- unknown_as_string: Optional[bool] = None,
110
+ unknown_as_string: Optional[bool] = True,
104
111
  index: bool = True,
105
112
  index_label: Union[None, str, List[str]] = None,
106
113
  lifecycle: Optional[int] = None,
107
114
  table_properties: Optional[dict] = None,
115
+ primary_key: Union[None, str, List[str]] = None,
108
116
  ):
109
117
  """
110
118
  Write DataFrame object into a MaxCompute (ODPS) table.
@@ -145,6 +153,10 @@ def to_odps_table(
145
153
  Specify lifecycle of the output table.
146
154
  table_properties: Optional[dict]
147
155
  Specify properties of the output table.
156
+ primary_key: Union[None, str, List[str]]
157
+ If provided and target table does not exist, target table
158
+ will be a delta table with columns specified in this argument
159
+ as primary key.
148
160
 
149
161
  Returns
150
162
  -------
@@ -201,12 +213,14 @@ def to_odps_table(
201
213
  index_table_intersect = index_cols & table_cols
202
214
  if index_table_intersect:
203
215
  raise ValueError(
204
- f"Index column(s) {index_table_intersect} conflict with column(s) of the input dataframe."
216
+ f"Index column(s) {index_table_intersect} conflict with "
217
+ f"column(s) of the input dataframe."
205
218
  )
206
219
  index_partition_intersect = index_cols & partition_col_set
207
220
  if index_partition_intersect:
208
221
  raise ValueError(
209
- f"Index column(s) {index_partition_intersect} conflict with partition column(s)."
222
+ f"Index column(s) {index_partition_intersect} conflict "
223
+ f"with partition column(s)."
210
224
  )
211
225
 
212
226
  if partition_col:
@@ -217,6 +231,23 @@ def to_odps_table(
217
231
  " is not the data column(s) of the input dataframe."
218
232
  )
219
233
 
234
+ table_properties = table_properties or {}
235
+ if primary_key is not None:
236
+ table_properties["transactional"] = "true"
237
+ if odps_entry.exist_table(table):
238
+ table_obj = odps_entry.get_table(table)
239
+ if table_obj.is_transactional:
240
+ table_properties = table_properties or {}
241
+ table_properties["transactional"] = "true"
242
+ primary_key = primary_key or table_obj.primary_key or ()
243
+ if set(primary_key) != set(table_obj.primary_key or ()):
244
+ raise ValueError(
245
+ f"Primary keys between existing table {table} and "
246
+ f"provided arguments are not same."
247
+ )
248
+ if primary_key and not isinstance(primary_key, (list, tuple)):
249
+ primary_key = [primary_key]
250
+
220
251
  op = DataFrameToODPSTable(
221
252
  dtypes=df.dtypes,
222
253
  table_name=table,
@@ -227,6 +258,7 @@ def to_odps_table(
227
258
  index=index,
228
259
  index_label=index_label,
229
260
  lifecycle=lifecycle or options.session.table_lifecycle,
230
- table_properties=table_properties,
261
+ table_properties=table_properties or None,
262
+ primary_key=primary_key or None,
231
263
  )
232
264
  return op(df)
@@ -24,20 +24,36 @@ from .apply_chunk import (
24
24
  df_apply_chunk,
25
25
  series_apply_chunk,
26
26
  )
27
+ from .cartesian_chunk import cartesian_chunk
28
+ from .collect_kv import collect_kv
29
+ from .extract_kv import extract_kv
27
30
  from .flatjson import series_flatjson
28
31
  from .flatmap import df_flatmap, series_flatmap
32
+ from .map_reduce import map_reduce
33
+ from .rebalance import DataFrameRebalance, rebalance
29
34
  from .reshuffle import DataFrameReshuffle, df_reshuffle
30
35
 
31
36
 
32
37
  def _install():
33
38
  from ..core import DATAFRAME_TYPE, INDEX_TYPE, SERIES_TYPE
34
39
 
35
- DataFrameMaxFrameAccessor._register("reshuffle", df_reshuffle)
36
- DataFrameMaxFrameAccessor._register("flatmap", df_flatmap)
37
40
  DataFrameMaxFrameAccessor._register("apply_chunk", df_apply_chunk)
38
- SeriesMaxFrameAccessor._register("flatmap", series_flatmap)
39
- SeriesMaxFrameAccessor._register("flatjson", series_flatjson)
41
+ DataFrameMaxFrameAccessor._register("cartesian_chunk", cartesian_chunk)
42
+ DataFrameMaxFrameAccessor._register("collect_kv", collect_kv)
43
+ DataFrameMaxFrameAccessor._register("extract_kv", extract_kv)
44
+ DataFrameMaxFrameAccessor._register("flatmap", df_flatmap)
45
+ DataFrameMaxFrameAccessor._register("map_reduce", map_reduce)
46
+ DataFrameMaxFrameAccessor._register("rebalance", rebalance)
47
+ DataFrameMaxFrameAccessor._register("reshuffle", df_reshuffle)
48
+
40
49
  SeriesMaxFrameAccessor._register("apply_chunk", series_apply_chunk)
50
+ SeriesMaxFrameAccessor._register("cartesian_chunk", cartesian_chunk)
51
+ SeriesMaxFrameAccessor._register("extract_kv", extract_kv)
52
+ SeriesMaxFrameAccessor._register("flatjson", series_flatjson)
53
+ SeriesMaxFrameAccessor._register("flatmap", series_flatmap)
54
+ SeriesMaxFrameAccessor._register("rebalance", rebalance)
55
+
56
+ IndexMaxFrameAccessor._register("rebalance", rebalance)
41
57
 
42
58
  if DataFrameMaxFrameAccessor._api_count:
43
59
  for t in DATAFRAME_TYPE:
@@ -26,9 +26,10 @@ from ...serialization.serializables import (
26
26
  Int32Field,
27
27
  TupleField,
28
28
  )
29
+ from ...typing_ import TileableType
29
30
  from ...udf import BuiltinFunction, MarkedFunction
30
31
  from ...utils import copy_if_possible, make_dtype, make_dtypes
31
- from ..core import DATAFRAME_TYPE, DataFrame, IndexValue, Series
32
+ from ..core import DATAFRAME_TYPE, INDEX_TYPE, DataFrame, IndexValue, Series
32
33
  from ..operators import DataFrameOperator, DataFrameOperatorMixin
33
34
  from ..utils import (
34
35
  InferredDataFrameMeta,
@@ -43,7 +44,7 @@ from ..utils import (
43
44
 
44
45
  class DataFrameApplyChunk(DataFrameOperator, DataFrameOperatorMixin):
45
46
  _op_type_ = opcodes.APPLY_CHUNK
46
- _legacy_name = "DataFrameApplyChunkOperator"
47
+ _legacy_name = "DataFrameApplyChunkOperator" # since v2.0.0
47
48
 
48
49
  func = FunctionField("func")
49
50
  batch_rows = Int32Field("batch_rows", default=None)
@@ -60,16 +61,26 @@ class DataFrameApplyChunk(DataFrameOperator, DataFrameOperatorMixin):
60
61
  def has_custom_code(self) -> bool:
61
62
  return not isinstance(self.func, BuiltinFunction)
62
63
 
64
+ def check_inputs(self, inputs: List[TileableType]):
65
+ # for apply_chunk we allow called on non-deterministic tileables
66
+ pass
67
+
63
68
  def _call_dataframe(self, df, dtypes, dtype, name, index_value, element_wise):
64
69
  # return dataframe
65
70
  if self.output_types[0] == OutputType.dataframe:
66
71
  dtypes = make_dtypes(dtypes)
72
+ if dtypes is not None:
73
+ shape = df.shape if element_wise else (np.nan, len(dtypes))
74
+ cols_value = parse_index(dtypes.index, store_data=True)
75
+ else:
76
+ shape = (np.nan, np.nan)
77
+ cols_value = None
67
78
  # apply_chunk will use generate new range index for results
68
79
  return self.new_dataframe(
69
80
  [df],
70
- shape=df.shape if element_wise else (np.nan, len(dtypes)),
81
+ shape=shape,
71
82
  index_value=index_value,
72
- columns_value=parse_index(dtypes.index, store_data=True),
83
+ columns_value=cols_value,
73
84
  dtypes=dtypes,
74
85
  )
75
86
 
@@ -106,11 +117,17 @@ class DataFrameApplyChunk(DataFrameOperator, DataFrameOperatorMixin):
106
117
  name: Any = None,
107
118
  output_type=None,
108
119
  index=None,
120
+ skip_infer=False,
109
121
  ):
110
122
  args = self.args or ()
111
123
  kwargs = self.kwargs or {}
112
124
  # if not dtypes and not skip_infer:
113
- packed_func = get_packed_func(df_or_series, self.func, *args, **kwargs)
125
+ try:
126
+ packed_func = get_packed_func(df_or_series, self.func, *args, **kwargs)
127
+ except:
128
+ if not skip_infer:
129
+ raise
130
+ packed_func = self.func
114
131
 
115
132
  # if skip_infer, directly build a frame
116
133
  if self.output_types and self.output_types[0] == OutputType.df_or_series:
@@ -125,13 +142,15 @@ class DataFrameApplyChunk(DataFrameOperator, DataFrameOperatorMixin):
125
142
  dtype=dtype,
126
143
  name=name,
127
144
  index=index,
145
+ skip_infer=skip_infer,
128
146
  )
129
147
 
130
148
  if inferred_meta.index_value is None:
131
149
  inferred_meta.index_value = parse_index(
132
150
  None, (df_or_series.key, df_or_series.index_value.key, self.func)
133
151
  )
134
- inferred_meta.check_absence("output_type", "dtypes", "dtype")
152
+ if not skip_infer:
153
+ inferred_meta.check_absence("output_type", "dtypes", "dtype")
135
154
 
136
155
  if isinstance(df_or_series, DATAFRAME_TYPE):
137
156
  return self._call_dataframe(
@@ -163,6 +182,7 @@ class DataFrameApplyChunk(DataFrameOperator, DataFrameOperatorMixin):
163
182
  name: Any = None,
164
183
  index: Union[pd.Index, IndexValue] = None,
165
184
  elementwise: bool = None,
185
+ skip_infer: bool = False,
166
186
  **kwargs,
167
187
  ) -> InferredDataFrameMeta:
168
188
  inferred_meta = infer_dataframe_return_value(
@@ -174,7 +194,10 @@ class DataFrameApplyChunk(DataFrameOperator, DataFrameOperatorMixin):
174
194
  name=name,
175
195
  index=index,
176
196
  elementwise=elementwise,
197
+ skip_infer=skip_infer,
177
198
  )
199
+ if skip_infer:
200
+ return inferred_meta
178
201
 
179
202
  # merge specified and inferred index, dtypes, output_type
180
203
  # elementwise used to decide shape
@@ -186,6 +209,8 @@ class DataFrameApplyChunk(DataFrameOperator, DataFrameOperatorMixin):
186
209
  if self.output_types:
187
210
  inferred_meta.output_type = self.output_types[0]
188
211
  inferred_meta.dtypes = dtypes if dtypes is not None else inferred_meta.dtypes
212
+ if isinstance(index, INDEX_TYPE):
213
+ index = index.index_value
189
214
  if index is not None:
190
215
  inferred_meta.index_value = (
191
216
  parse_index(index)
@@ -458,6 +483,7 @@ def df_apply_chunk(
458
483
  name=name,
459
484
  index=index,
460
485
  output_type=output_type,
486
+ skip_infer=skip_infer,
461
487
  )
462
488
 
463
489
 
@@ -0,0 +1,153 @@
1
+ # Copyright 1999-2025 Alibaba Group Holding Ltd.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ from typing import List
16
+
17
+ import numpy as np
18
+ import pandas as pd
19
+
20
+ from ... import opcodes
21
+ from ...core import EntityData, OutputType
22
+ from ...serialization.serializables import (
23
+ DictField,
24
+ FunctionField,
25
+ KeyField,
26
+ TupleField,
27
+ )
28
+ from ...udf import BuiltinFunction
29
+ from ...utils import quiet_stdio
30
+ from ..operators import DataFrameOperator, DataFrameOperatorMixin
31
+ from ..utils import (
32
+ build_df,
33
+ build_empty_df,
34
+ build_series,
35
+ parse_index,
36
+ validate_output_types,
37
+ )
38
+
39
+
40
+ class DataFrameCartesianChunk(DataFrameOperator, DataFrameOperatorMixin):
41
+ _op_type_ = opcodes.CARTESIAN_CHUNK
42
+
43
+ left = KeyField("left")
44
+ right = KeyField("right")
45
+ func = FunctionField("func")
46
+ args = TupleField("args")
47
+ kwargs = DictField("kwargs")
48
+
49
+ def __init__(self, output_types=None, **kw):
50
+ super().__init__(_output_types=output_types, **kw)
51
+ if self.memory_scale is None:
52
+ self.memory_scale = 2.0
53
+
54
+ @classmethod
55
+ def _set_inputs(cls, op: "DataFrameCartesianChunk", inputs: List[EntityData]):
56
+ super()._set_inputs(op, inputs)
57
+ op.left, op.right = op.inputs[:2]
58
+
59
+ @staticmethod
60
+ def _build_test_obj(obj):
61
+ return (
62
+ build_df(obj, size=2)
63
+ if obj.ndim == 2
64
+ else build_series(obj, size=2, name=obj.name)
65
+ )
66
+
67
+ def has_custom_code(self) -> bool:
68
+ return not isinstance(self.func, BuiltinFunction)
69
+
70
+ def __call__(self, left, right, index=None, dtypes=None):
71
+ test_left = self._build_test_obj(left)
72
+ test_right = self._build_test_obj(right)
73
+ output_type = self._output_types[0] if self._output_types else None
74
+
75
+ if output_type == OutputType.df_or_series:
76
+ return self.new_df_or_series([left, right])
77
+
78
+ # try run to infer meta
79
+ try:
80
+ with np.errstate(all="ignore"), quiet_stdio():
81
+ obj = self.func(test_left, test_right, *self.args, **self.kwargs)
82
+ except: # noqa: E722 # nosec # pylint: disable=bare-except
83
+ if output_type == OutputType.series:
84
+ obj = pd.Series([], dtype=np.dtype(object))
85
+ elif output_type == OutputType.dataframe and dtypes is not None:
86
+ obj = build_empty_df(dtypes)
87
+ else:
88
+ raise TypeError(
89
+ "Cannot determine `output_type`, "
90
+ "you have to specify it as `dataframe` or `series`, "
91
+ "for dataframe, `dtypes` is required as well "
92
+ "if output_type='dataframe'"
93
+ )
94
+
95
+ if getattr(obj, "ndim", 0) == 1 or output_type == OutputType.series:
96
+ shape = self.kwargs.pop("shape", (np.nan,))
97
+ if index is None:
98
+ index = obj.index
99
+ index_value = parse_index(
100
+ index, left, right, self.func, self.args, self.kwargs
101
+ )
102
+ return self.new_series(
103
+ [left, right],
104
+ dtype=obj.dtype,
105
+ shape=shape,
106
+ index_value=index_value,
107
+ name=obj.name,
108
+ )
109
+ else:
110
+ dtypes = dtypes if dtypes is not None else obj.dtypes
111
+ # dataframe
112
+ shape = (np.nan, len(dtypes))
113
+ columns_value = parse_index(dtypes.index, store_data=True)
114
+ if index is None:
115
+ index = obj.index
116
+ index_value = parse_index(
117
+ index, left, right, self.func, self.args, self.kwargs
118
+ )
119
+ return self.new_dataframe(
120
+ [left, right],
121
+ shape=shape,
122
+ dtypes=dtypes,
123
+ index_value=index_value,
124
+ columns_value=columns_value,
125
+ )
126
+
127
+
128
+ def cartesian_chunk(left, right, func, skip_infer=False, args=(), **kwargs):
129
+ output_type = kwargs.pop("output_type", None)
130
+ output_types = kwargs.pop("output_types", None)
131
+ object_type = kwargs.pop("object_type", None)
132
+ output_types = validate_output_types(
133
+ output_type=output_type, output_types=output_types, object_type=object_type
134
+ )
135
+ output_type = output_types[0] if output_types else None
136
+ if output_type:
137
+ output_types = [output_type]
138
+ elif skip_infer:
139
+ output_types = [OutputType.df_or_series]
140
+ index = kwargs.pop("index", None)
141
+ dtypes = kwargs.pop("dtypes", None)
142
+ memory_scale = kwargs.pop("memory_scale", None)
143
+
144
+ op = DataFrameCartesianChunk(
145
+ left=left,
146
+ right=right,
147
+ func=func,
148
+ args=args,
149
+ kwargs=kwargs,
150
+ output_types=output_types,
151
+ memory_scale=memory_scale,
152
+ )
153
+ return op(left, right, index=index, dtypes=dtypes)
@@ -0,0 +1,126 @@
1
+ # Copyright 1999-2025 Alibaba Group Holding Ltd.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ import numpy as np
16
+ import pandas as pd
17
+
18
+ from ... import opcodes
19
+ from ...serialization.serializables import AnyField, StringField
20
+ from ...utils import no_default
21
+ from ..operators import DataFrameOperator, DataFrameOperatorMixin
22
+ from ..utils import make_column_list
23
+
24
+
25
+ class DataFrameCollectKv(DataFrameOperator, DataFrameOperatorMixin):
26
+ _op_type_ = opcodes.COLLECT_KV
27
+
28
+ columns = AnyField("columns", default=None)
29
+ kv_delim = StringField("kv_delim", default=None)
30
+ item_delim = StringField("item_delim", default=None)
31
+ kv_col = StringField("kv_col", default=None)
32
+
33
+ def __call__(self, df):
34
+ if self.columns is None:
35
+ cols = list(df.dtypes.index)
36
+ else:
37
+ cols = self.columns if isinstance(self.columns, list) else [self.columns]
38
+ new_dtypes = df.dtypes.drop(cols, errors="ignore")
39
+ new_dtypes = pd.concat(
40
+ [new_dtypes, pd.Series([np.dtype("object")], index=[self.kv_col])]
41
+ )
42
+ shape = (df.shape[0], len(new_dtypes))
43
+ return self.new_dataframe(
44
+ [df],
45
+ shape=shape,
46
+ dtypes=new_dtypes,
47
+ index_value=df.index_value,
48
+ columns_value=new_dtypes.index,
49
+ )
50
+
51
+
52
+ def collect_kv(
53
+ data,
54
+ columns=None,
55
+ kv_delim="=",
56
+ item_delim=",",
57
+ kv_col="kv_col",
58
+ ):
59
+ """
60
+ Merge values in specified columns into a key-value represented column.
61
+
62
+ Parameters
63
+ ----------
64
+ columns : list, default None
65
+ The columns to be merged.
66
+ kv_delim : str, default '='
67
+ Delimiter between key and value.
68
+ item_delim : str, default ','
69
+ Delimiter between key-value pairs.
70
+ kv_col : str, default 'kv_col'
71
+ Name of the new key-value column
72
+
73
+ Returns
74
+ -------
75
+ DataFrame
76
+ converted data frame
77
+
78
+ See Also
79
+ --------
80
+ DataFrame.mf.extract_kv
81
+
82
+ Examples
83
+ -------
84
+ >>> import maxframe.dataframe as md
85
+
86
+ >>> df = md.DataFrame({"name": ["name1", "name2", "name3", "name4", "name5"],
87
+ ... "k1": [1.0, NaN, 7.1, NaN, NaN],
88
+ ... "k2": [3.0, 3.0, NaN, 1.2, 1.0],
89
+ ... "k3": [NaN, 5.1, NaN, 1.5, NaN],
90
+ ... "k5": [10.0, NaN, NaN, NaN, NaN,],
91
+ ... "k7": [NaN, NaN, 8.2, NaN, NaN, ],
92
+ ... "k9": [NaN, NaN, NaN, NaN, 1.1]})
93
+ >>> df.execute()
94
+ name k1 k2 k3 k5 k7 k9
95
+ 0 name1 1.0 3.0 NaN 10.0 NaN NaN
96
+ 1 name2 NaN 3.0 5.1 NaN NaN NaN
97
+ 2 name3 7.1 NaN NaN NaN 8.2 NaN
98
+ 3 name4 NaN 1.2 1.5 NaN NaN NaN
99
+ 4 name5 NaN 1.0 NaN NaN NaN 1.1
100
+
101
+ The field names to be merged are specified by columns
102
+ kv_delim is to delimit the key and value and '=' is default
103
+ item_delim is to delimit the Key-Value pairs, ',' is default
104
+ The new column name is specified by kv_col, 'kv_col' is default
105
+
106
+ >>> df.mf.collect_kv(columns=['k1', 'k2', 'k3', 'k5', 'k7', 'k9']).execute()
107
+ name kv_col
108
+ 0 name1 k1=1.0,k2=3.0,k5=10.0
109
+ 1 name2 k2=3.0,k3=5.1
110
+ 2 name3 k1=7.1,k7=8.2
111
+ 3 name4 k2=1.2,k3=1.5
112
+ 4 name5 k2=1.0,k9=1.1
113
+ """
114
+ columns_list = make_column_list(columns, data.dtypes) or []
115
+ non_exist_key = next(
116
+ (c for c in columns_list if c not in data.dtypes.index), no_default
117
+ )
118
+ if columns_list and non_exist_key is not no_default:
119
+ raise ValueError(f"Column {non_exist_key} specified is not a valid column.")
120
+ op = DataFrameCollectKv(
121
+ columns=columns,
122
+ kv_delim=kv_delim,
123
+ item_delim=item_delim,
124
+ kv_col=kv_col,
125
+ )
126
+ return op(data)