maxframe 2.0.0b2__cp37-cp37m-win32.whl → 2.3.0rc1__cp37-cp37m-win32.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of maxframe might be problematic. Click here for more details.

Files changed (443) hide show
  1. maxframe/__init__.py +1 -0
  2. maxframe/_utils.cp37-win32.pyd +0 -0
  3. maxframe/_utils.pyx +14 -1
  4. maxframe/codegen/core.py +9 -8
  5. maxframe/codegen/spe/core.py +1 -1
  6. maxframe/codegen/spe/dataframe/__init__.py +1 -0
  7. maxframe/codegen/spe/dataframe/accessors/base.py +18 -0
  8. maxframe/codegen/spe/dataframe/accessors/dict_.py +25 -130
  9. maxframe/codegen/spe/dataframe/accessors/list_.py +12 -48
  10. maxframe/codegen/spe/dataframe/accessors/struct_.py +28 -0
  11. maxframe/codegen/spe/dataframe/arithmetic.py +7 -2
  12. maxframe/codegen/spe/dataframe/groupby.py +88 -0
  13. maxframe/codegen/spe/dataframe/indexing.py +99 -4
  14. maxframe/codegen/spe/dataframe/merge.py +38 -1
  15. maxframe/codegen/spe/dataframe/misc.py +11 -33
  16. maxframe/codegen/spe/dataframe/reduction.py +32 -9
  17. maxframe/codegen/spe/dataframe/reshape.py +46 -0
  18. maxframe/codegen/spe/dataframe/sort.py +39 -18
  19. maxframe/codegen/spe/dataframe/tests/accessors/test_dict.py +9 -15
  20. maxframe/codegen/spe/dataframe/tests/accessors/test_list.py +4 -7
  21. maxframe/codegen/spe/dataframe/tests/accessors/test_struct.py +75 -0
  22. maxframe/codegen/spe/dataframe/tests/indexing/test_iloc.py +20 -1
  23. maxframe/codegen/spe/dataframe/tests/indexing/test_loc.py +35 -0
  24. maxframe/codegen/spe/dataframe/tests/misc/test_misc.py +0 -32
  25. maxframe/codegen/spe/dataframe/tests/test_groupby.py +81 -18
  26. maxframe/codegen/spe/dataframe/tests/test_merge.py +27 -1
  27. maxframe/codegen/spe/dataframe/tests/test_reduction.py +13 -0
  28. maxframe/codegen/spe/dataframe/tests/test_reshape.py +79 -0
  29. maxframe/codegen/spe/dataframe/tests/test_sort.py +20 -0
  30. maxframe/codegen/spe/dataframe/tseries.py +9 -0
  31. maxframe/codegen/spe/learn/contrib/lightgbm.py +4 -3
  32. maxframe/codegen/spe/learn/contrib/tests/test_xgboost.py +2 -1
  33. maxframe/codegen/spe/learn/metrics/__init__.py +1 -1
  34. maxframe/codegen/spe/learn/metrics/_ranking.py +76 -0
  35. maxframe/codegen/spe/learn/metrics/pairwise.py +51 -0
  36. maxframe/codegen/spe/learn/metrics/tests/test_pairwise.py +36 -0
  37. maxframe/codegen/spe/learn/metrics/tests/test_ranking.py +59 -0
  38. maxframe/codegen/spe/tensor/__init__.py +3 -0
  39. maxframe/codegen/spe/tensor/datasource.py +1 -0
  40. maxframe/codegen/spe/tensor/fft.py +74 -0
  41. maxframe/codegen/spe/tensor/linalg.py +29 -2
  42. maxframe/codegen/spe/tensor/misc.py +79 -25
  43. maxframe/codegen/spe/tensor/spatial.py +45 -0
  44. maxframe/codegen/spe/tensor/statistics.py +44 -0
  45. maxframe/codegen/spe/tensor/tests/test_fft.py +64 -0
  46. maxframe/codegen/spe/tensor/tests/test_linalg.py +15 -1
  47. maxframe/codegen/spe/tensor/tests/test_misc.py +52 -2
  48. maxframe/codegen/spe/tensor/tests/test_spatial.py +33 -0
  49. maxframe/codegen/spe/tensor/tests/test_statistics.py +15 -1
  50. maxframe/codegen/spe/tests/test_spe_codegen.py +6 -12
  51. maxframe/codegen/spe/utils.py +2 -0
  52. maxframe/config/config.py +73 -9
  53. maxframe/config/tests/test_validators.py +13 -1
  54. maxframe/config/validators.py +49 -0
  55. maxframe/conftest.py +54 -17
  56. maxframe/core/accessor.py +2 -2
  57. maxframe/core/base.py +2 -1
  58. maxframe/core/entity/core.py +5 -0
  59. maxframe/core/entity/tileables.py +3 -1
  60. maxframe/core/graph/core.cp37-win32.pyd +0 -0
  61. maxframe/core/graph/entity.py +8 -3
  62. maxframe/core/mode.py +6 -1
  63. maxframe/core/operator/base.py +9 -2
  64. maxframe/core/operator/core.py +10 -2
  65. maxframe/core/operator/utils.py +13 -0
  66. maxframe/dataframe/__init__.py +12 -5
  67. maxframe/dataframe/accessors/__init__.py +1 -1
  68. maxframe/dataframe/accessors/compat.py +45 -0
  69. maxframe/dataframe/accessors/datetime_/__init__.py +4 -1
  70. maxframe/dataframe/accessors/dict_/contains.py +7 -16
  71. maxframe/dataframe/accessors/dict_/core.py +48 -0
  72. maxframe/dataframe/accessors/dict_/getitem.py +17 -21
  73. maxframe/dataframe/accessors/dict_/length.py +7 -16
  74. maxframe/dataframe/accessors/dict_/remove.py +6 -18
  75. maxframe/dataframe/accessors/dict_/setitem.py +8 -18
  76. maxframe/dataframe/accessors/dict_/tests/test_dict_accessor.py +62 -22
  77. maxframe/dataframe/accessors/list_/__init__.py +2 -2
  78. maxframe/dataframe/accessors/list_/core.py +48 -0
  79. maxframe/dataframe/accessors/list_/getitem.py +12 -19
  80. maxframe/dataframe/accessors/list_/length.py +7 -16
  81. maxframe/dataframe/accessors/list_/tests/test_list_accessor.py +11 -9
  82. maxframe/dataframe/accessors/string_/__init__.py +4 -1
  83. maxframe/dataframe/accessors/struct_/__init__.py +37 -0
  84. maxframe/dataframe/accessors/struct_/accessor.py +39 -0
  85. maxframe/dataframe/accessors/struct_/core.py +43 -0
  86. maxframe/dataframe/accessors/struct_/dtypes.py +53 -0
  87. maxframe/dataframe/accessors/struct_/field.py +123 -0
  88. maxframe/dataframe/accessors/struct_/tests/__init__.py +13 -0
  89. maxframe/dataframe/accessors/struct_/tests/test_struct_accessor.py +91 -0
  90. maxframe/dataframe/arithmetic/__init__.py +18 -4
  91. maxframe/dataframe/arithmetic/between.py +106 -0
  92. maxframe/dataframe/arithmetic/dot.py +237 -0
  93. maxframe/dataframe/arithmetic/maximum.py +33 -0
  94. maxframe/dataframe/arithmetic/minimum.py +33 -0
  95. maxframe/dataframe/arithmetic/{around.py → round.py} +11 -7
  96. maxframe/dataframe/core.py +161 -224
  97. maxframe/dataframe/datasource/__init__.py +18 -0
  98. maxframe/dataframe/datasource/core.py +6 -0
  99. maxframe/dataframe/datasource/direct.py +57 -0
  100. maxframe/dataframe/datasource/from_dict.py +124 -0
  101. maxframe/dataframe/datasource/from_index.py +1 -1
  102. maxframe/dataframe/datasource/from_records.py +77 -0
  103. maxframe/dataframe/datasource/from_tensor.py +109 -41
  104. maxframe/dataframe/datasource/read_csv.py +21 -14
  105. maxframe/dataframe/datasource/read_odps_query.py +29 -6
  106. maxframe/dataframe/datasource/read_odps_table.py +32 -10
  107. maxframe/dataframe/datasource/read_parquet.py +38 -39
  108. maxframe/dataframe/datasource/tests/test_datasource.py +37 -0
  109. maxframe/dataframe/datastore/__init__.py +11 -1
  110. maxframe/dataframe/datastore/direct.py +268 -0
  111. maxframe/dataframe/datastore/to_csv.py +29 -41
  112. maxframe/dataframe/datastore/to_odps.py +36 -4
  113. maxframe/dataframe/extensions/__init__.py +20 -4
  114. maxframe/dataframe/extensions/apply_chunk.py +32 -6
  115. maxframe/dataframe/extensions/cartesian_chunk.py +153 -0
  116. maxframe/dataframe/extensions/collect_kv.py +126 -0
  117. maxframe/dataframe/extensions/extract_kv.py +177 -0
  118. maxframe/dataframe/extensions/flatjson.py +2 -1
  119. maxframe/dataframe/extensions/map_reduce.py +263 -0
  120. maxframe/dataframe/extensions/rebalance.py +62 -0
  121. maxframe/dataframe/extensions/tests/test_apply_chunk.py +9 -2
  122. maxframe/dataframe/extensions/tests/test_extensions.py +54 -0
  123. maxframe/dataframe/extensions/tests/test_map_reduce.py +135 -0
  124. maxframe/dataframe/groupby/__init__.py +17 -2
  125. maxframe/dataframe/groupby/aggregation.py +86 -49
  126. maxframe/dataframe/groupby/apply.py +1 -1
  127. maxframe/dataframe/groupby/apply_chunk.py +19 -5
  128. maxframe/dataframe/groupby/core.py +116 -16
  129. maxframe/dataframe/groupby/cum.py +4 -25
  130. maxframe/dataframe/groupby/expanding.py +264 -0
  131. maxframe/dataframe/groupby/fill.py +1 -1
  132. maxframe/dataframe/groupby/getitem.py +12 -5
  133. maxframe/dataframe/groupby/head.py +11 -1
  134. maxframe/dataframe/groupby/rank.py +136 -0
  135. maxframe/dataframe/groupby/rolling.py +206 -0
  136. maxframe/dataframe/groupby/shift.py +114 -0
  137. maxframe/dataframe/groupby/tests/test_groupby.py +0 -5
  138. maxframe/dataframe/indexing/__init__.py +22 -2
  139. maxframe/dataframe/indexing/droplevel.py +195 -0
  140. maxframe/dataframe/indexing/filter.py +169 -0
  141. maxframe/dataframe/indexing/get_level_values.py +76 -0
  142. maxframe/dataframe/indexing/iat.py +45 -0
  143. maxframe/dataframe/indexing/iloc.py +152 -12
  144. maxframe/dataframe/indexing/insert.py +46 -18
  145. maxframe/dataframe/indexing/loc.py +287 -7
  146. maxframe/dataframe/indexing/reindex.py +14 -5
  147. maxframe/dataframe/indexing/rename.py +6 -0
  148. maxframe/dataframe/indexing/rename_axis.py +2 -2
  149. maxframe/dataframe/indexing/reorder_levels.py +143 -0
  150. maxframe/dataframe/indexing/reset_index.py +33 -6
  151. maxframe/dataframe/indexing/sample.py +8 -0
  152. maxframe/dataframe/indexing/setitem.py +3 -3
  153. maxframe/dataframe/indexing/swaplevel.py +185 -0
  154. maxframe/dataframe/indexing/take.py +99 -0
  155. maxframe/dataframe/indexing/truncate.py +140 -0
  156. maxframe/dataframe/indexing/where.py +0 -11
  157. maxframe/dataframe/indexing/xs.py +148 -0
  158. maxframe/dataframe/merge/__init__.py +15 -1
  159. maxframe/dataframe/merge/append.py +97 -98
  160. maxframe/dataframe/merge/combine.py +244 -0
  161. maxframe/dataframe/merge/combine_first.py +120 -0
  162. maxframe/dataframe/merge/compare.py +387 -0
  163. maxframe/dataframe/merge/concat.py +183 -0
  164. maxframe/dataframe/merge/update.py +271 -0
  165. maxframe/dataframe/misc/__init__.py +28 -11
  166. maxframe/dataframe/misc/_duplicate.py +10 -4
  167. maxframe/dataframe/misc/apply.py +1 -1
  168. maxframe/dataframe/misc/check_unique.py +82 -0
  169. maxframe/dataframe/misc/clip.py +145 -0
  170. maxframe/dataframe/misc/describe.py +175 -9
  171. maxframe/dataframe/misc/drop.py +31 -0
  172. maxframe/dataframe/misc/drop_duplicates.py +2 -2
  173. maxframe/dataframe/misc/duplicated.py +2 -2
  174. maxframe/dataframe/misc/get_dummies.py +5 -1
  175. maxframe/dataframe/misc/infer_dtypes.py +251 -0
  176. maxframe/dataframe/misc/isin.py +2 -2
  177. maxframe/dataframe/misc/map.py +125 -18
  178. maxframe/dataframe/misc/repeat.py +159 -0
  179. maxframe/dataframe/misc/tests/test_misc.py +48 -3
  180. maxframe/dataframe/misc/to_numeric.py +3 -0
  181. maxframe/dataframe/misc/transform.py +12 -5
  182. maxframe/dataframe/misc/transpose.py +13 -1
  183. maxframe/dataframe/misc/valid_index.py +115 -0
  184. maxframe/dataframe/misc/value_counts.py +38 -4
  185. maxframe/dataframe/missing/checkna.py +14 -6
  186. maxframe/dataframe/missing/dropna.py +5 -0
  187. maxframe/dataframe/missing/fillna.py +1 -1
  188. maxframe/dataframe/missing/replace.py +7 -4
  189. maxframe/dataframe/reduction/__init__.py +35 -16
  190. maxframe/dataframe/reduction/aggregation.py +43 -14
  191. maxframe/dataframe/reduction/all.py +2 -2
  192. maxframe/dataframe/reduction/any.py +2 -2
  193. maxframe/dataframe/reduction/argmax.py +103 -0
  194. maxframe/dataframe/reduction/argmin.py +103 -0
  195. maxframe/dataframe/reduction/core.py +80 -24
  196. maxframe/dataframe/reduction/count.py +13 -9
  197. maxframe/dataframe/reduction/cov.py +166 -0
  198. maxframe/dataframe/reduction/cummax.py +2 -2
  199. maxframe/dataframe/reduction/cummin.py +2 -2
  200. maxframe/dataframe/reduction/cumprod.py +2 -2
  201. maxframe/dataframe/reduction/cumsum.py +2 -2
  202. maxframe/dataframe/reduction/custom_reduction.py +2 -2
  203. maxframe/dataframe/reduction/idxmax.py +185 -0
  204. maxframe/dataframe/reduction/idxmin.py +185 -0
  205. maxframe/dataframe/reduction/kurtosis.py +37 -30
  206. maxframe/dataframe/reduction/max.py +2 -2
  207. maxframe/dataframe/reduction/mean.py +9 -7
  208. maxframe/dataframe/reduction/median.py +2 -2
  209. maxframe/dataframe/reduction/min.py +2 -2
  210. maxframe/dataframe/reduction/mode.py +144 -0
  211. maxframe/dataframe/reduction/nunique.py +19 -11
  212. maxframe/dataframe/reduction/prod.py +18 -13
  213. maxframe/dataframe/reduction/reduction_size.py +2 -2
  214. maxframe/dataframe/reduction/sem.py +13 -9
  215. maxframe/dataframe/reduction/skew.py +31 -27
  216. maxframe/dataframe/reduction/str_concat.py +10 -7
  217. maxframe/dataframe/reduction/sum.py +18 -14
  218. maxframe/dataframe/reduction/tests/test_reduction.py +12 -0
  219. maxframe/dataframe/reduction/unique.py +20 -3
  220. maxframe/dataframe/reduction/var.py +16 -12
  221. maxframe/dataframe/reshape/__init__.py +38 -0
  222. maxframe/dataframe/{misc → reshape}/pivot.py +1 -0
  223. maxframe/dataframe/{misc → reshape}/pivot_table.py +1 -0
  224. maxframe/dataframe/reshape/unstack.py +114 -0
  225. maxframe/dataframe/sort/__init__.py +16 -1
  226. maxframe/dataframe/sort/argsort.py +68 -0
  227. maxframe/dataframe/sort/core.py +2 -1
  228. maxframe/dataframe/sort/nlargest.py +238 -0
  229. maxframe/dataframe/sort/nsmallest.py +228 -0
  230. maxframe/dataframe/sort/rank.py +147 -0
  231. maxframe/dataframe/statistics/__init__.py +3 -3
  232. maxframe/dataframe/statistics/corr.py +1 -0
  233. maxframe/dataframe/statistics/quantile.py +2 -2
  234. maxframe/dataframe/tests/test_typing.py +104 -0
  235. maxframe/dataframe/tests/test_utils.py +66 -2
  236. maxframe/dataframe/tseries/__init__.py +19 -0
  237. maxframe/dataframe/tseries/at_time.py +61 -0
  238. maxframe/dataframe/tseries/between_time.py +122 -0
  239. maxframe/dataframe/typing_.py +185 -0
  240. maxframe/dataframe/utils.py +125 -52
  241. maxframe/dataframe/window/aggregation.py +8 -4
  242. maxframe/dataframe/window/core.py +14 -1
  243. maxframe/dataframe/window/ewm.py +1 -3
  244. maxframe/dataframe/window/expanding.py +37 -35
  245. maxframe/dataframe/window/rolling.py +49 -39
  246. maxframe/dataframe/window/tests/test_expanding.py +1 -7
  247. maxframe/dataframe/window/tests/test_rolling.py +1 -1
  248. maxframe/env.py +7 -4
  249. maxframe/errors.py +2 -2
  250. maxframe/io/odpsio/schema.py +9 -3
  251. maxframe/io/odpsio/tableio.py +7 -2
  252. maxframe/io/odpsio/tests/test_schema.py +198 -83
  253. maxframe/learn/__init__.py +10 -2
  254. maxframe/learn/cluster/__init__.py +15 -0
  255. maxframe/learn/cluster/_kmeans.py +782 -0
  256. maxframe/learn/contrib/llm/core.py +18 -7
  257. maxframe/learn/contrib/llm/deploy/__init__.py +13 -0
  258. maxframe/learn/contrib/llm/deploy/config.py +221 -0
  259. maxframe/learn/contrib/llm/deploy/core.py +247 -0
  260. maxframe/learn/contrib/llm/deploy/framework.py +35 -0
  261. maxframe/learn/contrib/llm/deploy/loader.py +360 -0
  262. maxframe/learn/contrib/llm/deploy/tests/__init__.py +13 -0
  263. maxframe/learn/contrib/llm/deploy/tests/test_register_models.py +359 -0
  264. maxframe/learn/contrib/llm/models/__init__.py +1 -0
  265. maxframe/learn/contrib/llm/models/dashscope.py +12 -6
  266. maxframe/learn/contrib/llm/models/managed.py +76 -11
  267. maxframe/learn/contrib/llm/models/openai.py +72 -0
  268. maxframe/learn/contrib/llm/tests/__init__.py +13 -0
  269. maxframe/learn/contrib/llm/tests/test_core.py +34 -0
  270. maxframe/learn/contrib/llm/tests/test_openai.py +187 -0
  271. maxframe/learn/contrib/llm/tests/test_text_gen.py +155 -0
  272. maxframe/learn/contrib/llm/text.py +348 -42
  273. maxframe/learn/contrib/models.py +4 -1
  274. maxframe/learn/contrib/xgboost/classifier.py +2 -0
  275. maxframe/learn/contrib/xgboost/core.py +113 -4
  276. maxframe/learn/contrib/xgboost/predict.py +4 -2
  277. maxframe/learn/contrib/xgboost/regressor.py +5 -0
  278. maxframe/learn/contrib/xgboost/train.py +7 -2
  279. maxframe/learn/core.py +66 -0
  280. maxframe/learn/linear_model/_base.py +58 -1
  281. maxframe/learn/linear_model/_lin_reg.py +1 -1
  282. maxframe/learn/metrics/__init__.py +6 -0
  283. maxframe/learn/metrics/_classification.py +145 -0
  284. maxframe/learn/metrics/_ranking.py +477 -0
  285. maxframe/learn/metrics/_scorer.py +60 -0
  286. maxframe/learn/metrics/pairwise/__init__.py +21 -0
  287. maxframe/learn/metrics/pairwise/core.py +77 -0
  288. maxframe/learn/metrics/pairwise/cosine.py +115 -0
  289. maxframe/learn/metrics/pairwise/euclidean.py +176 -0
  290. maxframe/learn/metrics/pairwise/haversine.py +96 -0
  291. maxframe/learn/metrics/pairwise/manhattan.py +80 -0
  292. maxframe/learn/metrics/pairwise/pairwise.py +127 -0
  293. maxframe/learn/metrics/pairwise/pairwise_distances_topk.py +121 -0
  294. maxframe/learn/metrics/pairwise/rbf_kernel.py +51 -0
  295. maxframe/learn/metrics/tests/__init__.py +13 -0
  296. maxframe/learn/metrics/tests/test_scorer.py +26 -0
  297. maxframe/learn/preprocessing/_data/min_max_scaler.py +34 -23
  298. maxframe/learn/preprocessing/_data/standard_scaler.py +34 -25
  299. maxframe/learn/utils/__init__.py +2 -1
  300. maxframe/learn/utils/checks.py +1 -2
  301. maxframe/learn/utils/core.py +59 -0
  302. maxframe/learn/utils/extmath.py +79 -9
  303. maxframe/learn/utils/odpsio.py +262 -0
  304. maxframe/learn/utils/validation.py +2 -2
  305. maxframe/lib/compat.py +40 -0
  306. maxframe/lib/dtypes_extension/__init__.py +16 -1
  307. maxframe/lib/dtypes_extension/_fake_arrow_dtype.py +604 -0
  308. maxframe/lib/dtypes_extension/blob.py +304 -0
  309. maxframe/lib/dtypes_extension/dtypes.py +40 -0
  310. maxframe/lib/dtypes_extension/tests/test_blob.py +88 -0
  311. maxframe/lib/dtypes_extension/tests/test_dtypes.py +16 -1
  312. maxframe/lib/dtypes_extension/tests/test_fake_arrow_dtype.py +75 -0
  313. maxframe/lib/filesystem/_oss_lib/common.py +124 -50
  314. maxframe/lib/filesystem/_oss_lib/glob.py +1 -1
  315. maxframe/lib/filesystem/_oss_lib/handle.py +21 -25
  316. maxframe/lib/filesystem/base.py +1 -1
  317. maxframe/lib/filesystem/core.py +1 -1
  318. maxframe/lib/filesystem/oss.py +115 -46
  319. maxframe/lib/filesystem/tests/test_oss.py +74 -36
  320. maxframe/lib/mmh3.cp37-win32.pyd +0 -0
  321. maxframe/lib/wrapped_pickle.py +10 -0
  322. maxframe/opcodes.py +41 -15
  323. maxframe/protocol.py +12 -0
  324. maxframe/remote/core.py +4 -0
  325. maxframe/serialization/__init__.py +11 -2
  326. maxframe/serialization/arrow.py +38 -13
  327. maxframe/serialization/blob.py +32 -0
  328. maxframe/serialization/core.cp37-win32.pyd +0 -0
  329. maxframe/serialization/core.pyx +39 -1
  330. maxframe/serialization/exception.py +2 -4
  331. maxframe/serialization/numpy.py +11 -0
  332. maxframe/serialization/pandas.py +46 -9
  333. maxframe/serialization/serializables/core.py +2 -2
  334. maxframe/serialization/tests/test_serial.py +31 -4
  335. maxframe/tensor/__init__.py +38 -8
  336. maxframe/tensor/arithmetic/__init__.py +19 -10
  337. maxframe/tensor/arithmetic/core.py +2 -2
  338. maxframe/tensor/arithmetic/iscomplexobj.py +53 -0
  339. maxframe/tensor/arithmetic/tests/test_arithmetic.py +6 -9
  340. maxframe/tensor/core.py +6 -2
  341. maxframe/tensor/datasource/tests/test_datasource.py +2 -1
  342. maxframe/tensor/extensions/__init__.py +2 -0
  343. maxframe/tensor/extensions/apply_chunk.py +3 -3
  344. maxframe/tensor/extensions/rebalance.py +65 -0
  345. maxframe/tensor/fft/__init__.py +32 -0
  346. maxframe/tensor/fft/core.py +168 -0
  347. maxframe/tensor/fft/fft.py +112 -0
  348. maxframe/tensor/fft/fft2.py +118 -0
  349. maxframe/tensor/fft/fftfreq.py +80 -0
  350. maxframe/tensor/fft/fftn.py +123 -0
  351. maxframe/tensor/fft/fftshift.py +79 -0
  352. maxframe/tensor/fft/hfft.py +112 -0
  353. maxframe/tensor/fft/ifft.py +114 -0
  354. maxframe/tensor/fft/ifft2.py +115 -0
  355. maxframe/tensor/fft/ifftn.py +123 -0
  356. maxframe/tensor/fft/ifftshift.py +73 -0
  357. maxframe/tensor/fft/ihfft.py +93 -0
  358. maxframe/tensor/fft/irfft.py +118 -0
  359. maxframe/tensor/fft/irfft2.py +62 -0
  360. maxframe/tensor/fft/irfftn.py +114 -0
  361. maxframe/tensor/fft/rfft.py +116 -0
  362. maxframe/tensor/fft/rfft2.py +63 -0
  363. maxframe/tensor/fft/rfftfreq.py +87 -0
  364. maxframe/tensor/fft/rfftn.py +113 -0
  365. maxframe/tensor/indexing/fill_diagonal.py +1 -7
  366. maxframe/tensor/linalg/__init__.py +7 -0
  367. maxframe/tensor/linalg/_einsumfunc.py +1025 -0
  368. maxframe/tensor/linalg/cholesky.py +117 -0
  369. maxframe/tensor/linalg/einsum.py +339 -0
  370. maxframe/tensor/linalg/lstsq.py +100 -0
  371. maxframe/tensor/linalg/matrix_norm.py +75 -0
  372. maxframe/tensor/linalg/norm.py +249 -0
  373. maxframe/tensor/linalg/solve.py +72 -0
  374. maxframe/tensor/linalg/solve_triangular.py +2 -2
  375. maxframe/tensor/linalg/vector_norm.py +113 -0
  376. maxframe/tensor/misc/__init__.py +24 -1
  377. maxframe/tensor/misc/argwhere.py +72 -0
  378. maxframe/tensor/misc/array_split.py +46 -0
  379. maxframe/tensor/misc/broadcast_arrays.py +57 -0
  380. maxframe/tensor/misc/copyto.py +130 -0
  381. maxframe/tensor/misc/delete.py +104 -0
  382. maxframe/tensor/misc/dsplit.py +68 -0
  383. maxframe/tensor/misc/ediff1d.py +74 -0
  384. maxframe/tensor/misc/expand_dims.py +85 -0
  385. maxframe/tensor/misc/flip.py +90 -0
  386. maxframe/tensor/misc/fliplr.py +64 -0
  387. maxframe/tensor/misc/flipud.py +68 -0
  388. maxframe/tensor/misc/hsplit.py +85 -0
  389. maxframe/tensor/misc/insert.py +139 -0
  390. maxframe/tensor/misc/moveaxis.py +83 -0
  391. maxframe/tensor/misc/result_type.py +88 -0
  392. maxframe/tensor/misc/roll.py +124 -0
  393. maxframe/tensor/misc/rollaxis.py +77 -0
  394. maxframe/tensor/misc/shape.py +89 -0
  395. maxframe/tensor/misc/split.py +190 -0
  396. maxframe/tensor/misc/tile.py +109 -0
  397. maxframe/tensor/misc/vsplit.py +74 -0
  398. maxframe/tensor/reduction/array_equal.py +2 -1
  399. maxframe/tensor/sort/__init__.py +2 -0
  400. maxframe/tensor/sort/argpartition.py +98 -0
  401. maxframe/tensor/sort/partition.py +228 -0
  402. maxframe/tensor/spatial/__init__.py +15 -0
  403. maxframe/tensor/spatial/distance/__init__.py +17 -0
  404. maxframe/tensor/spatial/distance/cdist.py +421 -0
  405. maxframe/tensor/spatial/distance/pdist.py +398 -0
  406. maxframe/tensor/spatial/distance/squareform.py +153 -0
  407. maxframe/tensor/special/__init__.py +159 -21
  408. maxframe/tensor/special/airy.py +55 -0
  409. maxframe/tensor/special/bessel.py +199 -0
  410. maxframe/tensor/special/core.py +65 -4
  411. maxframe/tensor/special/ellip_func_integrals.py +155 -0
  412. maxframe/tensor/special/ellip_harm.py +55 -0
  413. maxframe/tensor/special/err_fresnel.py +223 -0
  414. maxframe/tensor/special/gamma_funcs.py +303 -0
  415. maxframe/tensor/special/hypergeometric_funcs.py +69 -0
  416. maxframe/tensor/special/info_theory.py +189 -0
  417. maxframe/tensor/special/misc.py +21 -0
  418. maxframe/tensor/statistics/__init__.py +6 -0
  419. maxframe/tensor/statistics/corrcoef.py +77 -0
  420. maxframe/tensor/statistics/cov.py +222 -0
  421. maxframe/tensor/statistics/digitize.py +126 -0
  422. maxframe/tensor/statistics/histogram.py +520 -0
  423. maxframe/tensor/statistics/median.py +85 -0
  424. maxframe/tensor/statistics/ptp.py +89 -0
  425. maxframe/tensor/utils.py +3 -3
  426. maxframe/tests/test_udf.py +61 -0
  427. maxframe/tests/test_utils.py +51 -6
  428. maxframe/tests/utils.py +0 -2
  429. maxframe/typing_.py +2 -0
  430. maxframe/udf.py +130 -9
  431. maxframe/utils.py +254 -27
  432. {maxframe-2.0.0b2.dist-info → maxframe-2.3.0rc1.dist-info}/METADATA +3 -3
  433. {maxframe-2.0.0b2.dist-info → maxframe-2.3.0rc1.dist-info}/RECORD +442 -264
  434. maxframe_client/fetcher.py +35 -4
  435. maxframe_client/session/odps.py +7 -2
  436. maxframe_client/session/task.py +8 -1
  437. maxframe_client/tests/test_fetcher.py +76 -3
  438. maxframe_client/tests/test_session.py +28 -1
  439. maxframe/dataframe/arrays.py +0 -864
  440. /maxframe/dataframe/{misc → reshape}/melt.py +0 -0
  441. /maxframe/dataframe/{misc → reshape}/stack.py +0 -0
  442. {maxframe-2.0.0b2.dist-info → maxframe-2.3.0rc1.dist-info}/WHEEL +0 -0
  443. {maxframe-2.0.0b2.dist-info → maxframe-2.3.0rc1.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,177 @@
1
+ # Copyright 1999-2025 Alibaba Group Holding Ltd.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ from typing import List
16
+
17
+ import numpy as np
18
+ import pandas as pd
19
+
20
+ from ... import opcodes
21
+ from ...core import EntityData, OutputType
22
+ from ...serialization.serializables import AnyField, KeyField, StringField
23
+ from ...utils import make_dtype, no_default
24
+ from ..operators import DataFrameOperator, DataFrameOperatorMixin
25
+ from ..utils import make_column_list
26
+
27
+
28
+ class DataFrameExtractKv(DataFrameOperator, DataFrameOperatorMixin):
29
+ _op_type_ = opcodes.EXTRACT_KV
30
+
31
+ columns = AnyField("columns", default=None)
32
+ kv_delim = StringField("kv_delim", default="=")
33
+ item_delim = StringField("item_delim", default=",")
34
+ dtype = AnyField("dtype", default=None)
35
+ fill_value = AnyField("fill_value", default=None)
36
+ errors = StringField("errors", default="raise")
37
+ # intermediate agg data
38
+ agg_results = KeyField("agg_results", default=None)
39
+
40
+ def __init__(self, kv_delim="=", item_delim=",", **kw):
41
+ super().__init__(kv_delim=kv_delim, item_delim=item_delim, **kw)
42
+ self.output_types = [OutputType.dataframe]
43
+
44
+ @classmethod
45
+ def _set_inputs(cls, op: "DataFrameExtractKv", inputs: List[EntityData]):
46
+ super()._set_inputs(op, inputs)
47
+ if op.agg_results is not None:
48
+ op.agg_results = inputs[-1]
49
+
50
+ def __call__(self, df):
51
+ shape = (df.shape[0], np.nan)
52
+ errors_arg = self.errors
53
+
54
+ def get_keys(row, cols, kv_delim, item_delim):
55
+ for col in cols:
56
+ if row[col] is not None:
57
+ pairs = row[col].split(item_delim)
58
+ else:
59
+ pairs = []
60
+ for pair in pairs:
61
+ result = pair.split(kv_delim, 1)
62
+ if len(result) == 2:
63
+ yield f"{col}_{result[0]}"
64
+ elif errors_arg == "raise":
65
+ raise ValueError(f"Malformed data {pair} in column '{col}'.")
66
+
67
+ all_keys = df.mf.flatmap(
68
+ get_keys,
69
+ dtypes=pd.Series([str], index=["keys_cols"]),
70
+ cols=self.columns,
71
+ kv_delim=self.kv_delim,
72
+ item_delim=self.item_delim,
73
+ )
74
+ self.agg_results = all_keys.drop_duplicates().sort_values(by="keys_cols")
75
+ inputs = [df]
76
+ inputs.append(self.agg_results)
77
+ return self.new_dataframe(
78
+ inputs,
79
+ shape=shape,
80
+ dtypes=None,
81
+ index_value=df.index_value,
82
+ columns_value=None,
83
+ )
84
+
85
+
86
+ def extract_kv(
87
+ data,
88
+ columns=None,
89
+ kv_delim="=",
90
+ item_delim=",",
91
+ dtype="float",
92
+ fill_value=None,
93
+ errors="raise",
94
+ ):
95
+ """
96
+ Extract values in key-value represented columns into standalone columns.
97
+ New column names will be the name of the key-value column followed by
98
+ an underscore and the key.
99
+
100
+ Parameters
101
+ ----------
102
+ columns : list, default None
103
+ The key-value columns to be extracted.
104
+ kv_delim : str, default '='
105
+ Delimiter between key and value.
106
+ item_delim : str, default ','
107
+ Delimiter between key-value pairs.
108
+ dtype : str
109
+ Type of value columns to generate.
110
+ fill_value : object, default None
111
+ Default value for missing key-value pairs.
112
+ errors : {'ignore', 'raise'}, default 'raise'
113
+ * If 'raise', then invalid parsing will raise an exception.
114
+ * If 'ignore', then invalid parsing will return the input.
115
+
116
+ Returns
117
+ -------
118
+ DataFrame
119
+ extracted data frame
120
+
121
+ See Also
122
+ --------
123
+ DataFrame.mf.collect_kv
124
+
125
+ Examples
126
+ --------
127
+ >>> import numpy as np
128
+ >>> import maxframe.dataframe as md
129
+
130
+ >>> df = md.DataFrame({"name": ["name1", "name2", "name3", "name4", "name5"],
131
+ ... "kv": ["k1=1.0,k2=3.0,k5=10.0",
132
+ ... "k2=3.0,k3=5.1",
133
+ ... "k1=7.1,k7=8.2",
134
+ ... "k2=1.2,k3=1.5",
135
+ ... "k2=1.0,k9=1.1"]})
136
+ >>> df.execute()
137
+ name kv
138
+ 0 name1 k1=1.0,k2=3.0,k5=10.0
139
+ 1 name2 k2=3.0,k3=5.1
140
+ 2 name3 k1=7.1,k7=8.2
141
+ 3 name4 k2=1.2,k3=1.5
142
+ 4 name5 k2=1.0,k9=1.1
143
+
144
+ The field names to be expanded are specified by columns
145
+ kv_delim is to delimit the key and value and '=' is default
146
+ item_delim is to delimit the Key-Value pairs, ',' is default
147
+ The output field name is the original field name connect with the key by "_"
148
+ fill_value is used to fill missing values, None is default
149
+
150
+ >>> df.mf.extract_kv(columns=['kv'], kv_delim='=', item_delim=',').execute()
151
+ name kv_k1 kv_k2 kv_k3 kv_k5 kv_k7 kv_k9
152
+ 0 name1 1.0 3.0 NaN 10.0 NaN NaN
153
+ 1 name2 NaN 3.0 5.1 NaN NaN NaN
154
+ 2 name3 7.1 NaN NaN NaN 8.2 NaN
155
+ 3 name4 NaN 1.2 1.5 NaN NaN NaN
156
+ 4 name5 NaN 1.0 NaN NaN NaN 1.1
157
+ """
158
+ if columns is None:
159
+ columns = data.dtypes.index.tolist()
160
+ columns_list = make_column_list(columns, data.dtypes)
161
+ non_exist_key = next(
162
+ (c for c in columns_list if c not in data.dtypes.index), no_default
163
+ )
164
+ if non_exist_key is not no_default:
165
+ raise ValueError(f"Column {non_exist_key} specified is not a valid column.")
166
+ for col in columns_list:
167
+ if str(data.dtypes[col]) not in ("object", "string"):
168
+ raise ValueError(f"Column '{col}' must be of string type.")
169
+ op = DataFrameExtractKv(
170
+ columns=columns,
171
+ kv_delim=kv_delim,
172
+ item_delim=item_delim,
173
+ dtype=make_dtype(dtype),
174
+ fill_value=fill_value,
175
+ errors=errors,
176
+ )
177
+ return op(data)
@@ -39,12 +39,13 @@ class SeriesFlatJSONOperator(DataFrameOperator, DataFrameOperatorMixin):
39
39
  name=name,
40
40
  dtype=make_dtype(dtype),
41
41
  )
42
+ dtypes = make_dtypes(dtypes)
42
43
  return self.new_dataframe(
43
44
  [series],
44
45
  shape=(series.shape[0], len(dtypes)),
45
46
  index_value=series.index_value,
46
47
  columns_value=parse_index(dtypes.index, store_data=True),
47
- dtypes=make_dtypes(dtypes),
48
+ dtypes=dtypes,
48
49
  )
49
50
 
50
51
 
@@ -0,0 +1,263 @@
1
+ # Copyright 1999-2025 Alibaba Group Holding Ltd.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ import inspect
16
+ from typing import Any, Callable, List, Optional, Union
17
+
18
+ import pandas as pd
19
+
20
+
21
+ def _has_end_arg(func) -> bool:
22
+ f_args = inspect.getfullargspec(func)
23
+ return "end" in f_args.args or "end" in f_args.kwonlyargs
24
+
25
+
26
+ def _gen_combined_mapper(
27
+ mapper: Callable,
28
+ combiner: Callable,
29
+ group_cols: List[Any],
30
+ order_cols: List[Any],
31
+ ascending: Union[bool, List[bool]] = True,
32
+ ):
33
+ class CombinedMapper:
34
+ def __init__(self):
35
+ if isinstance(mapper, type):
36
+ self.f = mapper()
37
+ else:
38
+ self.f = mapper
39
+
40
+ if isinstance(combiner, type):
41
+ self.combiner = combiner()
42
+ else:
43
+ self.combiner = combiner
44
+
45
+ def _combine_mapper_result(self, mapper_result, end=False):
46
+ if mapper_result is None:
47
+ return None
48
+ res = mapper_result
49
+ if order_cols:
50
+ res = mapper_result.sort_values(order_cols, ascending=ascending)
51
+
52
+ kw = {"end": end} if _has_end_arg(self.combiner) else {}
53
+ gcols = group_cols or list(res.columns)
54
+ return res.groupby(gcols, group_keys=False)[list(res.columns)].apply(
55
+ self.combiner, **kw
56
+ )
57
+
58
+ def __call__(self, batch, end=False):
59
+ kw = {"end": end} if _has_end_arg(self.f) else {}
60
+ f_ret = self.f(batch, **kw)
61
+ return self._combine_mapper_result(f_ret, end=end)
62
+
63
+ def close(self) -> None:
64
+ if hasattr(self.f, "close"):
65
+ self.f.close()
66
+ if hasattr(self.combiner, "close"):
67
+ self.combiner.close()
68
+
69
+ return CombinedMapper
70
+
71
+
72
+ def map_reduce(
73
+ df,
74
+ mapper: Optional[Callable] = None,
75
+ reducer: Optional[Callable] = None,
76
+ group_cols: Optional[List[Any]] = None,
77
+ *,
78
+ order_cols: List[Any] = None,
79
+ ascending: Union[bool, List[bool]] = True,
80
+ combiner: Callable = None,
81
+ batch_rows: Optional[int] = 1024,
82
+ mapper_dtypes: pd.Series = None,
83
+ mapper_index: pd.Index = None,
84
+ mapper_batch_rows: Optional[int] = None,
85
+ reducer_dtypes: pd.Series = None,
86
+ reducer_index: pd.Index = None,
87
+ reducer_batch_rows: Optional[int] = None,
88
+ ignore_index: bool = False,
89
+ ):
90
+ """
91
+ Map-reduce API over certain DataFrames. This function is roughly
92
+ a shortcut for
93
+
94
+ .. code-block:: python
95
+
96
+ df.mf.apply_chunk(mapper).groupby(group_keys).mf.apply_chunk(reducer)
97
+
98
+ Parameters
99
+ ----------
100
+ mapper : function or type
101
+ Mapper function or class.
102
+ reducer : function or type
103
+ Reducer function or class.
104
+ group_cols : str or list[str]
105
+ The keys to group after mapper. If absent, all columns in the mapped
106
+ DataFrame will be used.
107
+ order_cols : str or list[str]
108
+ The columns to sort after groupby.
109
+ ascending : bool or list[bool] or None
110
+ Whether columns should be in ascending order or not, only effective when
111
+ `order_cols` are specified. If a list of booleans are passed, orders of
112
+ every column in `order_cols` are specified.
113
+ combiner : function or class
114
+ Combiner function or class. Should accept and returns the same schema
115
+ of mapper outputs.
116
+ batch_rows : int or None
117
+ Rows in batches for mappers and reducers. Ignored if `mapper_batch_rows`
118
+ specified for mappers or `reducer_batch_rows` specified for reducers.
119
+ 1024 by default.
120
+ mapper_dtypes : pd.Series or dict or None
121
+ Output dtypes of mapper stage.
122
+ mapper_index : pd.Index or None
123
+ Index of DataFrame returned by mappers.
124
+ mapper_batch_rows : int or None
125
+ Rows in batches for mappers. If specified, `batch_rows` will be ignored
126
+ for mappers.
127
+ reducer_dtypes : pd.Series or dict or None
128
+ Output dtypes of reducer stage.
129
+ reducer_index : pd.Index or None
130
+ Index of DataFrame returned by reducers.
131
+ reducer_batch_rows : int or None
132
+ Rows in batches for mappers. If specified, `batch_rows` will be ignored
133
+ for reducers.
134
+ ignore_index : bool
135
+ If true, indexes generated at mapper or reducer functions will be ignored.
136
+
137
+ Returns
138
+ -------
139
+ output: DataFrame
140
+ Result DataFrame after map and reduce.
141
+
142
+ Examples
143
+ --------
144
+
145
+ We first define a DataFrame with a column of several words.
146
+
147
+ >>> from collections import defaultdict
148
+ >>> import maxframe.dataframe as md
149
+ >>> from maxframe.udf import with_running_options
150
+ >>> df = pd.DataFrame(
151
+ >>> {
152
+ >>> "name": ["name key", "name", "key", "name", "key name"],
153
+ >>> "id": [4, 2, 4, 3, 3],
154
+ >>> "fid": [5.3, 3.5, 4.2, 2.2, 4.1],
155
+ >>> }
156
+ >>> )
157
+
158
+ Then we write a mapper function which accepts batches in the DataFrame
159
+ and returns counts of words in every row.
160
+
161
+ >>> def mapper(batch):
162
+ >>> word_to_count = defaultdict(lambda: 0)
163
+ >>> for words in batch["name"]:
164
+ >>> for w in words.split():
165
+ >>> word_to_count[w] += 1
166
+ >>> return pd.DataFrame(
167
+ >>> [list(tp) for tp in word_to_count.items()], columns=["word", "count"]
168
+ >>> )
169
+
170
+ After that we write a reducer function which aggregates records with
171
+ the same word. Running options such as CPU specifications can be supplied
172
+ as well.
173
+
174
+ >>> @with_running_options(cpu=2)
175
+ >>> class TestReducer:
176
+ >>> def __init__(self):
177
+ >>> self._word_to_count = defaultdict(lambda: 0)
178
+ >>>
179
+ >>> def __call__(self, batch, end=False):
180
+ >>> word = None
181
+ >>> for _, row in batch.iterrows():
182
+ >>> word = row.iloc[0]
183
+ >>> self._word_to_count[row.iloc[0]] += row.iloc[1]
184
+ >>> if end:
185
+ >>> return pd.DataFrame(
186
+ >>> [[word, self._word_to_count[word]]], columns=["word", "count"]
187
+ >>> )
188
+ >>>
189
+ >>> def close(self):
190
+ >>> # you can do several cleanups here
191
+ >>> print("close")
192
+
193
+ Finally we can call `map_reduce` with mappers and reducers specified above.
194
+
195
+ >>> res = df.mf.map_reduce(
196
+ >>> mapper,
197
+ >>> TestReducer,
198
+ >>> group_cols=["word"],
199
+ >>> mapper_dtypes={"word": "str", "count": "int"},
200
+ >>> mapper_index=pd.Index([0]),
201
+ >>> reducer_dtypes={"word": "str", "count": "int"},
202
+ >>> reducer_index=pd.Index([0]),
203
+ >>> ignore_index=True,
204
+ >>> )
205
+ >>> res.execute().fetch()
206
+ word count
207
+ 0 key 3
208
+ 1 name 4
209
+
210
+ See Also
211
+ --------
212
+ DataFrame.mf.apply_chunk, DataFrame.groupby.mf.apply_chunk
213
+ """
214
+ mapper_batch_rows = mapper_batch_rows or batch_rows
215
+ reducer_batch_rows = reducer_batch_rows or batch_rows
216
+
217
+ def check_arg(arg_type, locals_):
218
+ if locals_.get(arg_type) is not None:
219
+ return
220
+ for suffix in ("dtypes", "index"):
221
+ arg_name = f"{arg_type}_{suffix}"
222
+ if locals_.get(arg_name) is not None:
223
+ raise ValueError(f"Cannot specify {arg_name} when {arg_type} is None")
224
+
225
+ if mapper is None:
226
+ check_arg("mapper", locals())
227
+ mapped = df
228
+ group_cols = group_cols or df.dtypes.index
229
+ if combiner is not None:
230
+ raise ValueError("Combiner cannot be set when mapper is None")
231
+ else:
232
+ if combiner is not None:
233
+ mapper = _gen_combined_mapper(
234
+ mapper, combiner, group_cols, order_cols, ascending=ascending
235
+ )
236
+ mapped = df.mf.apply_chunk(
237
+ mapper,
238
+ batch_rows=mapper_batch_rows,
239
+ dtypes=mapper_dtypes,
240
+ output_type="dataframe",
241
+ index=mapper_index,
242
+ )
243
+ group_cols = group_cols or list(df.dtypes.index)
244
+
245
+ if reducer is None:
246
+ check_arg("reducer", locals())
247
+ res = mapped
248
+ else:
249
+ res = mapped.groupby(group_cols, group_keys=False)[
250
+ list(mapped.dtypes.index)
251
+ ].mf.apply_chunk(
252
+ reducer,
253
+ batch_rows=reducer_batch_rows,
254
+ dtypes=reducer_dtypes,
255
+ output_type="dataframe",
256
+ index=reducer_index,
257
+ order_cols=order_cols,
258
+ ascending=ascending,
259
+ )
260
+
261
+ if ignore_index:
262
+ return res.reset_index(drop=True)
263
+ return res
@@ -0,0 +1,62 @@
1
+ # Copyright 1999-2025 Alibaba Group Holding Ltd.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ from ... import opcodes
16
+ from ...serialization.serializables import Float64Field, Int64Field
17
+ from ...tensor.extensions.rebalance import RebalanceMixin
18
+ from ..operators import DataFrameOperator, DataFrameOperatorMixin
19
+ from ..utils import validate_axis
20
+
21
+
22
+ class DataFrameRebalance(RebalanceMixin, DataFrameOperatorMixin, DataFrameOperator):
23
+ _op_type_ = opcodes.REBALANCE
24
+
25
+ axis = Int64Field("axis")
26
+ factor = Float64Field("factor", default=None)
27
+ num_partitions = Int64Field("num_partitions")
28
+
29
+ def __init__(self, output_types=None, **kw):
30
+ super().__init__(_output_types=output_types, **kw)
31
+
32
+
33
+ def rebalance(df_or_series, axis=0, factor=None, num_partitions=None):
34
+ """
35
+ Make data more balanced across entire cluster.
36
+
37
+ Parameters
38
+ ----------
39
+ axis : int
40
+ The axis to rebalance.
41
+ factor : float
42
+ Specified so that number of chunks after balance is
43
+ total number of input chunks * factor.
44
+ num_partitions : int
45
+ Specified so the number of chunks are at most
46
+ num_partitions.
47
+
48
+ Returns
49
+ -------
50
+ Series or DataFrame
51
+ Result of DataFrame or Series after rebalanced.
52
+ """
53
+ axis = validate_axis(axis, df_or_series)
54
+ if num_partitions is None and factor is None:
55
+ raise ValueError("Need to specify num_partitions or factor")
56
+ if num_partitions is not None and factor is not None:
57
+ raise ValueError(
58
+ "num_partitions and factor cannot be specified at the same time"
59
+ )
60
+
61
+ op = DataFrameRebalance(axis=axis, factor=factor, num_partitions=num_partitions)
62
+ return op(df_or_series)
@@ -91,15 +91,22 @@ def test_apply_chunk_infer_dtypes_and_index(df1, df2, df3):
91
91
  assert result.index_value is df2.index_value
92
92
  assert result.dtypes.equals(df2.dtypes)
93
93
 
94
+ def process(data, param, k) -> pd.DataFrame[df2.dtypes]:
95
+ return data * param * k
96
+
97
+ result = df2.mf.apply_chunk(process, batch_rows=3, args=(4,), k=1)
98
+ assert result.index_value is df2.index_value
99
+ assert result.dtypes.equals(df2.dtypes)
100
+
94
101
  # mark functions
95
102
  from ....udf import with_python_requirements, with_resources
96
103
 
97
104
  @with_resources("empty.txt")
98
105
  @with_python_requirements("numpy")
99
- def process(data, k):
106
+ def process(data, k) -> pd.DataFrame[df1.dtypes]:
100
107
  return data
101
108
 
102
- result = df1.mf.apply_chunk(process, batch_rows=3, output_type="dataframe", k=1)
109
+ result = df1.mf.apply_chunk(process, batch_rows=3, k=1)
103
110
  assert result.index_value is df1.index_value
104
111
  assert result.dtypes.equals(df1.dtypes)
105
112
  assert isinstance(result.op.func, MarkedFunction)
@@ -60,6 +60,40 @@ def df3():
60
60
  )
61
61
 
62
62
 
63
+ @pytest.fixture
64
+ def df4():
65
+ return DataFrame(
66
+ {
67
+ "name1": ["a", "b", "c", "d"],
68
+ "name2": ["a", "b", "c", "d"],
69
+ "num": [1, 2, 3, 4],
70
+ "kv": [
71
+ "k1=1.1,k2=3.1,k3=1.0",
72
+ "k1=7.1,k4=8.2",
73
+ "k5=1.2,k7=1.5",
74
+ "k3=1.1,k9=1",
75
+ ],
76
+ "vk": ["v1=1.1,v2=1.2", "v3=1.1,v4=1.2", "v5=1.1,v6=1.2", "v7=1.1,v8=1.2"],
77
+ }
78
+ )
79
+
80
+
81
+ @pytest.fixture
82
+ def df5():
83
+ return DataFrame(
84
+ {
85
+ "name1": ["name1", "name2", "name3", "name4", "name5"],
86
+ "name2": ["name1", "name2", "name3", "name4", "name5"],
87
+ "k1": [1.0, None, 7.1, None, None],
88
+ "k2": [3.0, 3.0, None, 1.2, 1.0],
89
+ "k3": [None, 5.1, None, 1.5, None],
90
+ "k5": [10.0, None, None, None, None],
91
+ "k7": [None, None, 8.2, None, None],
92
+ "k9": [None, None, None, None, 1.1],
93
+ }
94
+ )
95
+
96
+
63
97
  def test_flatmap(df1, df2, df3):
64
98
  def f(x, keys):
65
99
  if x["a"] in keys:
@@ -142,3 +176,23 @@ def test_flatjson():
142
176
  )
143
177
  with pytest.raises(ValueError):
144
178
  s1.mf.flatjson(["$.a"])
179
+
180
+
181
+ def test_extract_kv(df4):
182
+ extract_kv_df = df4.mf.extract_kv(
183
+ columns=["kv", "vk"], kv_delim=",", item_delim="="
184
+ )
185
+ assert extract_kv_df.shape == (4, np.nan)
186
+ assert extract_kv_df.index_value.key == df4.index_value.key
187
+ with pytest.raises(ValueError):
188
+ df4.mf.extract_kv(columns=["name"])
189
+ with pytest.raises(ValueError):
190
+ df4.mf.extract_kv(columns=["num"])
191
+
192
+
193
+ def test_collect_kv(df5):
194
+ collect_kv_df = df5.mf.collect_kv(columns=["k1", "k2", "k3", "k5", "k7", "k9"])
195
+ assert collect_kv_df.shape == (5, 3)
196
+ assert collect_kv_df.index_value.key == df5.index_value.key
197
+ with pytest.raises(ValueError):
198
+ df5.mf.collect_kv(columns=["num"])