maxframe 2.0.0b2__cp37-cp37m-win32.whl → 2.3.0rc1__cp37-cp37m-win32.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of maxframe might be problematic. Click here for more details.

Files changed (443) hide show
  1. maxframe/__init__.py +1 -0
  2. maxframe/_utils.cp37-win32.pyd +0 -0
  3. maxframe/_utils.pyx +14 -1
  4. maxframe/codegen/core.py +9 -8
  5. maxframe/codegen/spe/core.py +1 -1
  6. maxframe/codegen/spe/dataframe/__init__.py +1 -0
  7. maxframe/codegen/spe/dataframe/accessors/base.py +18 -0
  8. maxframe/codegen/spe/dataframe/accessors/dict_.py +25 -130
  9. maxframe/codegen/spe/dataframe/accessors/list_.py +12 -48
  10. maxframe/codegen/spe/dataframe/accessors/struct_.py +28 -0
  11. maxframe/codegen/spe/dataframe/arithmetic.py +7 -2
  12. maxframe/codegen/spe/dataframe/groupby.py +88 -0
  13. maxframe/codegen/spe/dataframe/indexing.py +99 -4
  14. maxframe/codegen/spe/dataframe/merge.py +38 -1
  15. maxframe/codegen/spe/dataframe/misc.py +11 -33
  16. maxframe/codegen/spe/dataframe/reduction.py +32 -9
  17. maxframe/codegen/spe/dataframe/reshape.py +46 -0
  18. maxframe/codegen/spe/dataframe/sort.py +39 -18
  19. maxframe/codegen/spe/dataframe/tests/accessors/test_dict.py +9 -15
  20. maxframe/codegen/spe/dataframe/tests/accessors/test_list.py +4 -7
  21. maxframe/codegen/spe/dataframe/tests/accessors/test_struct.py +75 -0
  22. maxframe/codegen/spe/dataframe/tests/indexing/test_iloc.py +20 -1
  23. maxframe/codegen/spe/dataframe/tests/indexing/test_loc.py +35 -0
  24. maxframe/codegen/spe/dataframe/tests/misc/test_misc.py +0 -32
  25. maxframe/codegen/spe/dataframe/tests/test_groupby.py +81 -18
  26. maxframe/codegen/spe/dataframe/tests/test_merge.py +27 -1
  27. maxframe/codegen/spe/dataframe/tests/test_reduction.py +13 -0
  28. maxframe/codegen/spe/dataframe/tests/test_reshape.py +79 -0
  29. maxframe/codegen/spe/dataframe/tests/test_sort.py +20 -0
  30. maxframe/codegen/spe/dataframe/tseries.py +9 -0
  31. maxframe/codegen/spe/learn/contrib/lightgbm.py +4 -3
  32. maxframe/codegen/spe/learn/contrib/tests/test_xgboost.py +2 -1
  33. maxframe/codegen/spe/learn/metrics/__init__.py +1 -1
  34. maxframe/codegen/spe/learn/metrics/_ranking.py +76 -0
  35. maxframe/codegen/spe/learn/metrics/pairwise.py +51 -0
  36. maxframe/codegen/spe/learn/metrics/tests/test_pairwise.py +36 -0
  37. maxframe/codegen/spe/learn/metrics/tests/test_ranking.py +59 -0
  38. maxframe/codegen/spe/tensor/__init__.py +3 -0
  39. maxframe/codegen/spe/tensor/datasource.py +1 -0
  40. maxframe/codegen/spe/tensor/fft.py +74 -0
  41. maxframe/codegen/spe/tensor/linalg.py +29 -2
  42. maxframe/codegen/spe/tensor/misc.py +79 -25
  43. maxframe/codegen/spe/tensor/spatial.py +45 -0
  44. maxframe/codegen/spe/tensor/statistics.py +44 -0
  45. maxframe/codegen/spe/tensor/tests/test_fft.py +64 -0
  46. maxframe/codegen/spe/tensor/tests/test_linalg.py +15 -1
  47. maxframe/codegen/spe/tensor/tests/test_misc.py +52 -2
  48. maxframe/codegen/spe/tensor/tests/test_spatial.py +33 -0
  49. maxframe/codegen/spe/tensor/tests/test_statistics.py +15 -1
  50. maxframe/codegen/spe/tests/test_spe_codegen.py +6 -12
  51. maxframe/codegen/spe/utils.py +2 -0
  52. maxframe/config/config.py +73 -9
  53. maxframe/config/tests/test_validators.py +13 -1
  54. maxframe/config/validators.py +49 -0
  55. maxframe/conftest.py +54 -17
  56. maxframe/core/accessor.py +2 -2
  57. maxframe/core/base.py +2 -1
  58. maxframe/core/entity/core.py +5 -0
  59. maxframe/core/entity/tileables.py +3 -1
  60. maxframe/core/graph/core.cp37-win32.pyd +0 -0
  61. maxframe/core/graph/entity.py +8 -3
  62. maxframe/core/mode.py +6 -1
  63. maxframe/core/operator/base.py +9 -2
  64. maxframe/core/operator/core.py +10 -2
  65. maxframe/core/operator/utils.py +13 -0
  66. maxframe/dataframe/__init__.py +12 -5
  67. maxframe/dataframe/accessors/__init__.py +1 -1
  68. maxframe/dataframe/accessors/compat.py +45 -0
  69. maxframe/dataframe/accessors/datetime_/__init__.py +4 -1
  70. maxframe/dataframe/accessors/dict_/contains.py +7 -16
  71. maxframe/dataframe/accessors/dict_/core.py +48 -0
  72. maxframe/dataframe/accessors/dict_/getitem.py +17 -21
  73. maxframe/dataframe/accessors/dict_/length.py +7 -16
  74. maxframe/dataframe/accessors/dict_/remove.py +6 -18
  75. maxframe/dataframe/accessors/dict_/setitem.py +8 -18
  76. maxframe/dataframe/accessors/dict_/tests/test_dict_accessor.py +62 -22
  77. maxframe/dataframe/accessors/list_/__init__.py +2 -2
  78. maxframe/dataframe/accessors/list_/core.py +48 -0
  79. maxframe/dataframe/accessors/list_/getitem.py +12 -19
  80. maxframe/dataframe/accessors/list_/length.py +7 -16
  81. maxframe/dataframe/accessors/list_/tests/test_list_accessor.py +11 -9
  82. maxframe/dataframe/accessors/string_/__init__.py +4 -1
  83. maxframe/dataframe/accessors/struct_/__init__.py +37 -0
  84. maxframe/dataframe/accessors/struct_/accessor.py +39 -0
  85. maxframe/dataframe/accessors/struct_/core.py +43 -0
  86. maxframe/dataframe/accessors/struct_/dtypes.py +53 -0
  87. maxframe/dataframe/accessors/struct_/field.py +123 -0
  88. maxframe/dataframe/accessors/struct_/tests/__init__.py +13 -0
  89. maxframe/dataframe/accessors/struct_/tests/test_struct_accessor.py +91 -0
  90. maxframe/dataframe/arithmetic/__init__.py +18 -4
  91. maxframe/dataframe/arithmetic/between.py +106 -0
  92. maxframe/dataframe/arithmetic/dot.py +237 -0
  93. maxframe/dataframe/arithmetic/maximum.py +33 -0
  94. maxframe/dataframe/arithmetic/minimum.py +33 -0
  95. maxframe/dataframe/arithmetic/{around.py → round.py} +11 -7
  96. maxframe/dataframe/core.py +161 -224
  97. maxframe/dataframe/datasource/__init__.py +18 -0
  98. maxframe/dataframe/datasource/core.py +6 -0
  99. maxframe/dataframe/datasource/direct.py +57 -0
  100. maxframe/dataframe/datasource/from_dict.py +124 -0
  101. maxframe/dataframe/datasource/from_index.py +1 -1
  102. maxframe/dataframe/datasource/from_records.py +77 -0
  103. maxframe/dataframe/datasource/from_tensor.py +109 -41
  104. maxframe/dataframe/datasource/read_csv.py +21 -14
  105. maxframe/dataframe/datasource/read_odps_query.py +29 -6
  106. maxframe/dataframe/datasource/read_odps_table.py +32 -10
  107. maxframe/dataframe/datasource/read_parquet.py +38 -39
  108. maxframe/dataframe/datasource/tests/test_datasource.py +37 -0
  109. maxframe/dataframe/datastore/__init__.py +11 -1
  110. maxframe/dataframe/datastore/direct.py +268 -0
  111. maxframe/dataframe/datastore/to_csv.py +29 -41
  112. maxframe/dataframe/datastore/to_odps.py +36 -4
  113. maxframe/dataframe/extensions/__init__.py +20 -4
  114. maxframe/dataframe/extensions/apply_chunk.py +32 -6
  115. maxframe/dataframe/extensions/cartesian_chunk.py +153 -0
  116. maxframe/dataframe/extensions/collect_kv.py +126 -0
  117. maxframe/dataframe/extensions/extract_kv.py +177 -0
  118. maxframe/dataframe/extensions/flatjson.py +2 -1
  119. maxframe/dataframe/extensions/map_reduce.py +263 -0
  120. maxframe/dataframe/extensions/rebalance.py +62 -0
  121. maxframe/dataframe/extensions/tests/test_apply_chunk.py +9 -2
  122. maxframe/dataframe/extensions/tests/test_extensions.py +54 -0
  123. maxframe/dataframe/extensions/tests/test_map_reduce.py +135 -0
  124. maxframe/dataframe/groupby/__init__.py +17 -2
  125. maxframe/dataframe/groupby/aggregation.py +86 -49
  126. maxframe/dataframe/groupby/apply.py +1 -1
  127. maxframe/dataframe/groupby/apply_chunk.py +19 -5
  128. maxframe/dataframe/groupby/core.py +116 -16
  129. maxframe/dataframe/groupby/cum.py +4 -25
  130. maxframe/dataframe/groupby/expanding.py +264 -0
  131. maxframe/dataframe/groupby/fill.py +1 -1
  132. maxframe/dataframe/groupby/getitem.py +12 -5
  133. maxframe/dataframe/groupby/head.py +11 -1
  134. maxframe/dataframe/groupby/rank.py +136 -0
  135. maxframe/dataframe/groupby/rolling.py +206 -0
  136. maxframe/dataframe/groupby/shift.py +114 -0
  137. maxframe/dataframe/groupby/tests/test_groupby.py +0 -5
  138. maxframe/dataframe/indexing/__init__.py +22 -2
  139. maxframe/dataframe/indexing/droplevel.py +195 -0
  140. maxframe/dataframe/indexing/filter.py +169 -0
  141. maxframe/dataframe/indexing/get_level_values.py +76 -0
  142. maxframe/dataframe/indexing/iat.py +45 -0
  143. maxframe/dataframe/indexing/iloc.py +152 -12
  144. maxframe/dataframe/indexing/insert.py +46 -18
  145. maxframe/dataframe/indexing/loc.py +287 -7
  146. maxframe/dataframe/indexing/reindex.py +14 -5
  147. maxframe/dataframe/indexing/rename.py +6 -0
  148. maxframe/dataframe/indexing/rename_axis.py +2 -2
  149. maxframe/dataframe/indexing/reorder_levels.py +143 -0
  150. maxframe/dataframe/indexing/reset_index.py +33 -6
  151. maxframe/dataframe/indexing/sample.py +8 -0
  152. maxframe/dataframe/indexing/setitem.py +3 -3
  153. maxframe/dataframe/indexing/swaplevel.py +185 -0
  154. maxframe/dataframe/indexing/take.py +99 -0
  155. maxframe/dataframe/indexing/truncate.py +140 -0
  156. maxframe/dataframe/indexing/where.py +0 -11
  157. maxframe/dataframe/indexing/xs.py +148 -0
  158. maxframe/dataframe/merge/__init__.py +15 -1
  159. maxframe/dataframe/merge/append.py +97 -98
  160. maxframe/dataframe/merge/combine.py +244 -0
  161. maxframe/dataframe/merge/combine_first.py +120 -0
  162. maxframe/dataframe/merge/compare.py +387 -0
  163. maxframe/dataframe/merge/concat.py +183 -0
  164. maxframe/dataframe/merge/update.py +271 -0
  165. maxframe/dataframe/misc/__init__.py +28 -11
  166. maxframe/dataframe/misc/_duplicate.py +10 -4
  167. maxframe/dataframe/misc/apply.py +1 -1
  168. maxframe/dataframe/misc/check_unique.py +82 -0
  169. maxframe/dataframe/misc/clip.py +145 -0
  170. maxframe/dataframe/misc/describe.py +175 -9
  171. maxframe/dataframe/misc/drop.py +31 -0
  172. maxframe/dataframe/misc/drop_duplicates.py +2 -2
  173. maxframe/dataframe/misc/duplicated.py +2 -2
  174. maxframe/dataframe/misc/get_dummies.py +5 -1
  175. maxframe/dataframe/misc/infer_dtypes.py +251 -0
  176. maxframe/dataframe/misc/isin.py +2 -2
  177. maxframe/dataframe/misc/map.py +125 -18
  178. maxframe/dataframe/misc/repeat.py +159 -0
  179. maxframe/dataframe/misc/tests/test_misc.py +48 -3
  180. maxframe/dataframe/misc/to_numeric.py +3 -0
  181. maxframe/dataframe/misc/transform.py +12 -5
  182. maxframe/dataframe/misc/transpose.py +13 -1
  183. maxframe/dataframe/misc/valid_index.py +115 -0
  184. maxframe/dataframe/misc/value_counts.py +38 -4
  185. maxframe/dataframe/missing/checkna.py +14 -6
  186. maxframe/dataframe/missing/dropna.py +5 -0
  187. maxframe/dataframe/missing/fillna.py +1 -1
  188. maxframe/dataframe/missing/replace.py +7 -4
  189. maxframe/dataframe/reduction/__init__.py +35 -16
  190. maxframe/dataframe/reduction/aggregation.py +43 -14
  191. maxframe/dataframe/reduction/all.py +2 -2
  192. maxframe/dataframe/reduction/any.py +2 -2
  193. maxframe/dataframe/reduction/argmax.py +103 -0
  194. maxframe/dataframe/reduction/argmin.py +103 -0
  195. maxframe/dataframe/reduction/core.py +80 -24
  196. maxframe/dataframe/reduction/count.py +13 -9
  197. maxframe/dataframe/reduction/cov.py +166 -0
  198. maxframe/dataframe/reduction/cummax.py +2 -2
  199. maxframe/dataframe/reduction/cummin.py +2 -2
  200. maxframe/dataframe/reduction/cumprod.py +2 -2
  201. maxframe/dataframe/reduction/cumsum.py +2 -2
  202. maxframe/dataframe/reduction/custom_reduction.py +2 -2
  203. maxframe/dataframe/reduction/idxmax.py +185 -0
  204. maxframe/dataframe/reduction/idxmin.py +185 -0
  205. maxframe/dataframe/reduction/kurtosis.py +37 -30
  206. maxframe/dataframe/reduction/max.py +2 -2
  207. maxframe/dataframe/reduction/mean.py +9 -7
  208. maxframe/dataframe/reduction/median.py +2 -2
  209. maxframe/dataframe/reduction/min.py +2 -2
  210. maxframe/dataframe/reduction/mode.py +144 -0
  211. maxframe/dataframe/reduction/nunique.py +19 -11
  212. maxframe/dataframe/reduction/prod.py +18 -13
  213. maxframe/dataframe/reduction/reduction_size.py +2 -2
  214. maxframe/dataframe/reduction/sem.py +13 -9
  215. maxframe/dataframe/reduction/skew.py +31 -27
  216. maxframe/dataframe/reduction/str_concat.py +10 -7
  217. maxframe/dataframe/reduction/sum.py +18 -14
  218. maxframe/dataframe/reduction/tests/test_reduction.py +12 -0
  219. maxframe/dataframe/reduction/unique.py +20 -3
  220. maxframe/dataframe/reduction/var.py +16 -12
  221. maxframe/dataframe/reshape/__init__.py +38 -0
  222. maxframe/dataframe/{misc → reshape}/pivot.py +1 -0
  223. maxframe/dataframe/{misc → reshape}/pivot_table.py +1 -0
  224. maxframe/dataframe/reshape/unstack.py +114 -0
  225. maxframe/dataframe/sort/__init__.py +16 -1
  226. maxframe/dataframe/sort/argsort.py +68 -0
  227. maxframe/dataframe/sort/core.py +2 -1
  228. maxframe/dataframe/sort/nlargest.py +238 -0
  229. maxframe/dataframe/sort/nsmallest.py +228 -0
  230. maxframe/dataframe/sort/rank.py +147 -0
  231. maxframe/dataframe/statistics/__init__.py +3 -3
  232. maxframe/dataframe/statistics/corr.py +1 -0
  233. maxframe/dataframe/statistics/quantile.py +2 -2
  234. maxframe/dataframe/tests/test_typing.py +104 -0
  235. maxframe/dataframe/tests/test_utils.py +66 -2
  236. maxframe/dataframe/tseries/__init__.py +19 -0
  237. maxframe/dataframe/tseries/at_time.py +61 -0
  238. maxframe/dataframe/tseries/between_time.py +122 -0
  239. maxframe/dataframe/typing_.py +185 -0
  240. maxframe/dataframe/utils.py +125 -52
  241. maxframe/dataframe/window/aggregation.py +8 -4
  242. maxframe/dataframe/window/core.py +14 -1
  243. maxframe/dataframe/window/ewm.py +1 -3
  244. maxframe/dataframe/window/expanding.py +37 -35
  245. maxframe/dataframe/window/rolling.py +49 -39
  246. maxframe/dataframe/window/tests/test_expanding.py +1 -7
  247. maxframe/dataframe/window/tests/test_rolling.py +1 -1
  248. maxframe/env.py +7 -4
  249. maxframe/errors.py +2 -2
  250. maxframe/io/odpsio/schema.py +9 -3
  251. maxframe/io/odpsio/tableio.py +7 -2
  252. maxframe/io/odpsio/tests/test_schema.py +198 -83
  253. maxframe/learn/__init__.py +10 -2
  254. maxframe/learn/cluster/__init__.py +15 -0
  255. maxframe/learn/cluster/_kmeans.py +782 -0
  256. maxframe/learn/contrib/llm/core.py +18 -7
  257. maxframe/learn/contrib/llm/deploy/__init__.py +13 -0
  258. maxframe/learn/contrib/llm/deploy/config.py +221 -0
  259. maxframe/learn/contrib/llm/deploy/core.py +247 -0
  260. maxframe/learn/contrib/llm/deploy/framework.py +35 -0
  261. maxframe/learn/contrib/llm/deploy/loader.py +360 -0
  262. maxframe/learn/contrib/llm/deploy/tests/__init__.py +13 -0
  263. maxframe/learn/contrib/llm/deploy/tests/test_register_models.py +359 -0
  264. maxframe/learn/contrib/llm/models/__init__.py +1 -0
  265. maxframe/learn/contrib/llm/models/dashscope.py +12 -6
  266. maxframe/learn/contrib/llm/models/managed.py +76 -11
  267. maxframe/learn/contrib/llm/models/openai.py +72 -0
  268. maxframe/learn/contrib/llm/tests/__init__.py +13 -0
  269. maxframe/learn/contrib/llm/tests/test_core.py +34 -0
  270. maxframe/learn/contrib/llm/tests/test_openai.py +187 -0
  271. maxframe/learn/contrib/llm/tests/test_text_gen.py +155 -0
  272. maxframe/learn/contrib/llm/text.py +348 -42
  273. maxframe/learn/contrib/models.py +4 -1
  274. maxframe/learn/contrib/xgboost/classifier.py +2 -0
  275. maxframe/learn/contrib/xgboost/core.py +113 -4
  276. maxframe/learn/contrib/xgboost/predict.py +4 -2
  277. maxframe/learn/contrib/xgboost/regressor.py +5 -0
  278. maxframe/learn/contrib/xgboost/train.py +7 -2
  279. maxframe/learn/core.py +66 -0
  280. maxframe/learn/linear_model/_base.py +58 -1
  281. maxframe/learn/linear_model/_lin_reg.py +1 -1
  282. maxframe/learn/metrics/__init__.py +6 -0
  283. maxframe/learn/metrics/_classification.py +145 -0
  284. maxframe/learn/metrics/_ranking.py +477 -0
  285. maxframe/learn/metrics/_scorer.py +60 -0
  286. maxframe/learn/metrics/pairwise/__init__.py +21 -0
  287. maxframe/learn/metrics/pairwise/core.py +77 -0
  288. maxframe/learn/metrics/pairwise/cosine.py +115 -0
  289. maxframe/learn/metrics/pairwise/euclidean.py +176 -0
  290. maxframe/learn/metrics/pairwise/haversine.py +96 -0
  291. maxframe/learn/metrics/pairwise/manhattan.py +80 -0
  292. maxframe/learn/metrics/pairwise/pairwise.py +127 -0
  293. maxframe/learn/metrics/pairwise/pairwise_distances_topk.py +121 -0
  294. maxframe/learn/metrics/pairwise/rbf_kernel.py +51 -0
  295. maxframe/learn/metrics/tests/__init__.py +13 -0
  296. maxframe/learn/metrics/tests/test_scorer.py +26 -0
  297. maxframe/learn/preprocessing/_data/min_max_scaler.py +34 -23
  298. maxframe/learn/preprocessing/_data/standard_scaler.py +34 -25
  299. maxframe/learn/utils/__init__.py +2 -1
  300. maxframe/learn/utils/checks.py +1 -2
  301. maxframe/learn/utils/core.py +59 -0
  302. maxframe/learn/utils/extmath.py +79 -9
  303. maxframe/learn/utils/odpsio.py +262 -0
  304. maxframe/learn/utils/validation.py +2 -2
  305. maxframe/lib/compat.py +40 -0
  306. maxframe/lib/dtypes_extension/__init__.py +16 -1
  307. maxframe/lib/dtypes_extension/_fake_arrow_dtype.py +604 -0
  308. maxframe/lib/dtypes_extension/blob.py +304 -0
  309. maxframe/lib/dtypes_extension/dtypes.py +40 -0
  310. maxframe/lib/dtypes_extension/tests/test_blob.py +88 -0
  311. maxframe/lib/dtypes_extension/tests/test_dtypes.py +16 -1
  312. maxframe/lib/dtypes_extension/tests/test_fake_arrow_dtype.py +75 -0
  313. maxframe/lib/filesystem/_oss_lib/common.py +124 -50
  314. maxframe/lib/filesystem/_oss_lib/glob.py +1 -1
  315. maxframe/lib/filesystem/_oss_lib/handle.py +21 -25
  316. maxframe/lib/filesystem/base.py +1 -1
  317. maxframe/lib/filesystem/core.py +1 -1
  318. maxframe/lib/filesystem/oss.py +115 -46
  319. maxframe/lib/filesystem/tests/test_oss.py +74 -36
  320. maxframe/lib/mmh3.cp37-win32.pyd +0 -0
  321. maxframe/lib/wrapped_pickle.py +10 -0
  322. maxframe/opcodes.py +41 -15
  323. maxframe/protocol.py +12 -0
  324. maxframe/remote/core.py +4 -0
  325. maxframe/serialization/__init__.py +11 -2
  326. maxframe/serialization/arrow.py +38 -13
  327. maxframe/serialization/blob.py +32 -0
  328. maxframe/serialization/core.cp37-win32.pyd +0 -0
  329. maxframe/serialization/core.pyx +39 -1
  330. maxframe/serialization/exception.py +2 -4
  331. maxframe/serialization/numpy.py +11 -0
  332. maxframe/serialization/pandas.py +46 -9
  333. maxframe/serialization/serializables/core.py +2 -2
  334. maxframe/serialization/tests/test_serial.py +31 -4
  335. maxframe/tensor/__init__.py +38 -8
  336. maxframe/tensor/arithmetic/__init__.py +19 -10
  337. maxframe/tensor/arithmetic/core.py +2 -2
  338. maxframe/tensor/arithmetic/iscomplexobj.py +53 -0
  339. maxframe/tensor/arithmetic/tests/test_arithmetic.py +6 -9
  340. maxframe/tensor/core.py +6 -2
  341. maxframe/tensor/datasource/tests/test_datasource.py +2 -1
  342. maxframe/tensor/extensions/__init__.py +2 -0
  343. maxframe/tensor/extensions/apply_chunk.py +3 -3
  344. maxframe/tensor/extensions/rebalance.py +65 -0
  345. maxframe/tensor/fft/__init__.py +32 -0
  346. maxframe/tensor/fft/core.py +168 -0
  347. maxframe/tensor/fft/fft.py +112 -0
  348. maxframe/tensor/fft/fft2.py +118 -0
  349. maxframe/tensor/fft/fftfreq.py +80 -0
  350. maxframe/tensor/fft/fftn.py +123 -0
  351. maxframe/tensor/fft/fftshift.py +79 -0
  352. maxframe/tensor/fft/hfft.py +112 -0
  353. maxframe/tensor/fft/ifft.py +114 -0
  354. maxframe/tensor/fft/ifft2.py +115 -0
  355. maxframe/tensor/fft/ifftn.py +123 -0
  356. maxframe/tensor/fft/ifftshift.py +73 -0
  357. maxframe/tensor/fft/ihfft.py +93 -0
  358. maxframe/tensor/fft/irfft.py +118 -0
  359. maxframe/tensor/fft/irfft2.py +62 -0
  360. maxframe/tensor/fft/irfftn.py +114 -0
  361. maxframe/tensor/fft/rfft.py +116 -0
  362. maxframe/tensor/fft/rfft2.py +63 -0
  363. maxframe/tensor/fft/rfftfreq.py +87 -0
  364. maxframe/tensor/fft/rfftn.py +113 -0
  365. maxframe/tensor/indexing/fill_diagonal.py +1 -7
  366. maxframe/tensor/linalg/__init__.py +7 -0
  367. maxframe/tensor/linalg/_einsumfunc.py +1025 -0
  368. maxframe/tensor/linalg/cholesky.py +117 -0
  369. maxframe/tensor/linalg/einsum.py +339 -0
  370. maxframe/tensor/linalg/lstsq.py +100 -0
  371. maxframe/tensor/linalg/matrix_norm.py +75 -0
  372. maxframe/tensor/linalg/norm.py +249 -0
  373. maxframe/tensor/linalg/solve.py +72 -0
  374. maxframe/tensor/linalg/solve_triangular.py +2 -2
  375. maxframe/tensor/linalg/vector_norm.py +113 -0
  376. maxframe/tensor/misc/__init__.py +24 -1
  377. maxframe/tensor/misc/argwhere.py +72 -0
  378. maxframe/tensor/misc/array_split.py +46 -0
  379. maxframe/tensor/misc/broadcast_arrays.py +57 -0
  380. maxframe/tensor/misc/copyto.py +130 -0
  381. maxframe/tensor/misc/delete.py +104 -0
  382. maxframe/tensor/misc/dsplit.py +68 -0
  383. maxframe/tensor/misc/ediff1d.py +74 -0
  384. maxframe/tensor/misc/expand_dims.py +85 -0
  385. maxframe/tensor/misc/flip.py +90 -0
  386. maxframe/tensor/misc/fliplr.py +64 -0
  387. maxframe/tensor/misc/flipud.py +68 -0
  388. maxframe/tensor/misc/hsplit.py +85 -0
  389. maxframe/tensor/misc/insert.py +139 -0
  390. maxframe/tensor/misc/moveaxis.py +83 -0
  391. maxframe/tensor/misc/result_type.py +88 -0
  392. maxframe/tensor/misc/roll.py +124 -0
  393. maxframe/tensor/misc/rollaxis.py +77 -0
  394. maxframe/tensor/misc/shape.py +89 -0
  395. maxframe/tensor/misc/split.py +190 -0
  396. maxframe/tensor/misc/tile.py +109 -0
  397. maxframe/tensor/misc/vsplit.py +74 -0
  398. maxframe/tensor/reduction/array_equal.py +2 -1
  399. maxframe/tensor/sort/__init__.py +2 -0
  400. maxframe/tensor/sort/argpartition.py +98 -0
  401. maxframe/tensor/sort/partition.py +228 -0
  402. maxframe/tensor/spatial/__init__.py +15 -0
  403. maxframe/tensor/spatial/distance/__init__.py +17 -0
  404. maxframe/tensor/spatial/distance/cdist.py +421 -0
  405. maxframe/tensor/spatial/distance/pdist.py +398 -0
  406. maxframe/tensor/spatial/distance/squareform.py +153 -0
  407. maxframe/tensor/special/__init__.py +159 -21
  408. maxframe/tensor/special/airy.py +55 -0
  409. maxframe/tensor/special/bessel.py +199 -0
  410. maxframe/tensor/special/core.py +65 -4
  411. maxframe/tensor/special/ellip_func_integrals.py +155 -0
  412. maxframe/tensor/special/ellip_harm.py +55 -0
  413. maxframe/tensor/special/err_fresnel.py +223 -0
  414. maxframe/tensor/special/gamma_funcs.py +303 -0
  415. maxframe/tensor/special/hypergeometric_funcs.py +69 -0
  416. maxframe/tensor/special/info_theory.py +189 -0
  417. maxframe/tensor/special/misc.py +21 -0
  418. maxframe/tensor/statistics/__init__.py +6 -0
  419. maxframe/tensor/statistics/corrcoef.py +77 -0
  420. maxframe/tensor/statistics/cov.py +222 -0
  421. maxframe/tensor/statistics/digitize.py +126 -0
  422. maxframe/tensor/statistics/histogram.py +520 -0
  423. maxframe/tensor/statistics/median.py +85 -0
  424. maxframe/tensor/statistics/ptp.py +89 -0
  425. maxframe/tensor/utils.py +3 -3
  426. maxframe/tests/test_udf.py +61 -0
  427. maxframe/tests/test_utils.py +51 -6
  428. maxframe/tests/utils.py +0 -2
  429. maxframe/typing_.py +2 -0
  430. maxframe/udf.py +130 -9
  431. maxframe/utils.py +254 -27
  432. {maxframe-2.0.0b2.dist-info → maxframe-2.3.0rc1.dist-info}/METADATA +3 -3
  433. {maxframe-2.0.0b2.dist-info → maxframe-2.3.0rc1.dist-info}/RECORD +442 -264
  434. maxframe_client/fetcher.py +35 -4
  435. maxframe_client/session/odps.py +7 -2
  436. maxframe_client/session/task.py +8 -1
  437. maxframe_client/tests/test_fetcher.py +76 -3
  438. maxframe_client/tests/test_session.py +28 -1
  439. maxframe/dataframe/arrays.py +0 -864
  440. /maxframe/dataframe/{misc → reshape}/melt.py +0 -0
  441. /maxframe/dataframe/{misc → reshape}/stack.py +0 -0
  442. {maxframe-2.0.0b2.dist-info → maxframe-2.3.0rc1.dist-info}/WHEEL +0 -0
  443. {maxframe-2.0.0b2.dist-info → maxframe-2.3.0rc1.dist-info}/top_level.txt +0 -0
@@ -28,20 +28,25 @@ from ...tensor.utils import calc_sliced_size, filter_inputs
28
28
  from ...utils import is_full_slice, lazy_import, pd_release_version
29
29
  from ..core import DATAFRAME_TYPE, IndexValue
30
30
  from ..operators import DataFrameOperator, DataFrameOperatorMixin
31
- from ..utils import parse_index
31
+ from ..utils import parse_index, validate_axis
32
32
  from .iloc import DataFrameIlocSetItem
33
33
 
34
34
  cudf = lazy_import("cudf")
35
35
  with_slice_locs_kind = pd_release_version < (1, 4, 0)
36
36
 
37
37
 
38
- def process_loc_indexes(inp, indexes, fetch_index: bool = True):
38
+ def process_loc_indexes(inp, indexes, fetch_index: bool = True, axis=None):
39
39
  ndim = inp.ndim
40
40
 
41
41
  if not isinstance(indexes, tuple):
42
42
  indexes = (indexes,)
43
+ if axis is not None and inp.axes[axis].nlevels > 1:
44
+ indexes = (indexes,)
43
45
  if len(indexes) < ndim:
44
- indexes += (slice(None),) * (ndim - len(indexes))
46
+ if axis == 0 or axis is None:
47
+ indexes += (slice(None),) * (ndim - len(indexes))
48
+ else:
49
+ indexes = (slice(None),) * (ndim - len(indexes)) + indexes
45
50
  if len(indexes) > ndim:
46
51
  raise IndexingError("Too many indexers")
47
52
 
@@ -67,8 +72,9 @@ def process_loc_indexes(inp, indexes, fetch_index: bool = True):
67
72
 
68
73
 
69
74
  class DataFrameLoc:
70
- def __init__(self, obj):
75
+ def __init__(self, obj, axis=None):
71
76
  self._obj = obj
77
+ self._axis = axis
72
78
 
73
79
  def _use_iloc(self, indexes):
74
80
  # for RangeIndex from 0, use iloc instead of loc
@@ -105,7 +111,7 @@ class DataFrameLoc:
105
111
  return True, None
106
112
 
107
113
  def __getitem__(self, indexes):
108
- indexes = process_loc_indexes(self._obj, indexes)
114
+ indexes = process_loc_indexes(self._obj, indexes, axis=self._axis)
109
115
 
110
116
  use_iloc, new_indexes = self._use_iloc(indexes)
111
117
  if use_iloc:
@@ -120,7 +126,9 @@ class DataFrameLoc:
120
126
  raise NotImplementedError("Only scalar value is supported to set by loc")
121
127
  if not isinstance(self._obj, DATAFRAME_TYPE):
122
128
  raise NotImplementedError("Only DataFrame is supported to set by loc")
123
- indexes = process_loc_indexes(self._obj, indexes, fetch_index=False)
129
+ indexes = process_loc_indexes(
130
+ self._obj, indexes, fetch_index=False, axis=self._axis
131
+ )
124
132
  use_iloc, new_indexes = self._use_iloc(indexes)
125
133
  if use_iloc:
126
134
  op = DataFrameIlocSetItem(indexes=new_indexes, value=value)
@@ -137,9 +145,13 @@ class DataFrameLoc:
137
145
  ret = op([self._obj] + indices_tileable)
138
146
  self._obj.data = ret.data
139
147
 
148
+ def __call__(self, axis):
149
+ axis = validate_axis(axis, self._obj)
150
+ return DataFrameLoc(self._obj, axis)
151
+
140
152
 
141
153
  class DataFrameLocSetItem(DataFrameOperator, DataFrameOperatorMixin):
142
- _op_type_ = opcodes.DATAFRAME_ILOC_SETITEM
154
+ _op_type_ = opcodes.DATAFRAME_LOC_SETITEM
143
155
 
144
156
  indexes = ListField("indexes", default=None)
145
157
  value = AnyField("value", default=None)
@@ -336,6 +348,17 @@ class DataFrameLocGetItem(DataFrameOperator, DataFrameOperatorMixin):
336
348
  else:
337
349
  # append None to indicate returning Series
338
350
  param["shape"] = None
351
+ elif isinstance(index, tuple):
352
+ has_ranges = any(
353
+ isinstance(i, (slice, np.ndarray))
354
+ or (hasattr(i, "dtype") and index.ndim == 1)
355
+ for i in index
356
+ )
357
+ if has_ranges:
358
+ param["shape"] = np.nan
359
+ param["index_value"] = parse_index(pd_index, inp, index)
360
+ else:
361
+ param["shape"] = None
339
362
  else:
340
363
  param["shape"] = None
341
364
  return param
@@ -411,4 +434,261 @@ class DataFrameLocGetItem(DataFrameOperator, DataFrameOperatorMixin):
411
434
 
412
435
 
413
436
  def loc(a):
437
+ """
438
+ Access a group of rows and columns by label(s) or a boolean array.
439
+
440
+ ``.loc[]`` is primarily label based, but may also be used with a
441
+ boolean array.
442
+
443
+ Allowed inputs are:
444
+
445
+ - A single label, e.g. ``5`` or ``'a'``, (note that ``5`` is
446
+ interpreted as a *label* of the index, and **never** as an
447
+ integer position along the index).
448
+ - A list or array of labels, e.g. ``['a', 'b', 'c']``.
449
+ - A slice object with labels, e.g. ``'a':'f'``.
450
+
451
+ .. warning:: Note that contrary to usual python slices, **both** the
452
+ start and the stop are included
453
+
454
+ - A boolean array of the same length as the axis being sliced,
455
+ e.g. ``[True, False, True]``.
456
+ - An alignable boolean Series. The index of the key will be aligned before
457
+ masking.
458
+ - An alignable Index. The Index of the returned selection will be the input.
459
+ - A ``callable`` function with one argument (the calling Series or
460
+ DataFrame) and that returns valid output for indexing (one of the above)
461
+
462
+ See more at :ref:`Selection by Label <indexing.label>`.
463
+
464
+ Raises
465
+ ------
466
+ KeyError
467
+ If any items are not found.
468
+ IndexingError
469
+ If an indexed key is passed and its index is unalignable to the frame index.
470
+
471
+ See Also
472
+ --------
473
+ DataFrame.at : Access a single value for a row/column label pair.
474
+ DataFrame.iloc : Access group of rows and columns by integer position(s).
475
+ DataFrame.xs : Returns a cross-section (row(s) or column(s)) from the
476
+ Series/DataFrame.
477
+ Series.loc : Access group of values using labels.
478
+
479
+ Examples
480
+ --------
481
+ **Getting values**
482
+
483
+ >>> import maxframe.dataframe as md
484
+ >>> df = md.DataFrame([[1, 2], [4, 5], [7, 8]],
485
+ ... index=['cobra', 'viper', 'sidewinder'],
486
+ ... columns=['max_speed', 'shield'])
487
+ >>> df.execute()
488
+ max_speed shield
489
+ cobra 1 2
490
+ viper 4 5
491
+ sidewinder 7 8
492
+
493
+ Single label. Note this returns the row as a Series.
494
+
495
+ >>> df.loc['viper'].execute()
496
+ max_speed 4
497
+ shield 5
498
+ Name: viper, dtype: int64
499
+
500
+ List of labels. Note using ``[[]]`` returns a DataFrame.
501
+
502
+ >>> df.loc[['viper', 'sidewinder']].execute()
503
+ max_speed shield
504
+ viper 4 5
505
+ sidewinder 7 8
506
+
507
+ Single label for row and column
508
+
509
+ >>> df.loc['cobra', 'shield'].execute()
510
+ 2
511
+
512
+ Slice with labels for row and single label for column. As mentioned
513
+ above, note that both the start and stop of the slice are included.
514
+
515
+ >>> df.loc['cobra':'viper', 'max_speed'].execute()
516
+ cobra 1
517
+ viper 4
518
+ Name: max_speed, dtype: int64
519
+
520
+ Boolean list with the same length as the row axis
521
+
522
+ >>> df.loc[[False, False, True]].execute()
523
+ max_speed shield
524
+ sidewinder 7 8
525
+
526
+ Alignable boolean Series:
527
+
528
+ >>> df.loc[md.Series([False, True, False],
529
+ ... index=['viper', 'sidewinder', 'cobra'])].execute()
530
+ max_speed shield
531
+ sidewinder 7 8
532
+
533
+ Index (same behavior as ``df.reindex``)
534
+
535
+ >>> df.loc[md.Index(["cobra", "viper"], name="foo")].execute()
536
+ max_speed shield
537
+ foo
538
+ cobra 1 2
539
+ viper 4 5
540
+
541
+ Conditional that returns a boolean Series
542
+
543
+ >>> df.loc[df['shield'] > 6].execute()
544
+ max_speed shield
545
+ sidewinder 7 8
546
+
547
+ Conditional that returns a boolean Series with column labels specified
548
+
549
+ >>> df.loc[df['shield'] > 6, ['max_speed']].execute()
550
+ max_speed
551
+ sidewinder 7
552
+
553
+ Callable that returns a boolean Series
554
+
555
+ >>> df.loc[lambda df: df['shield'] == 8].execute()
556
+ max_speed shield
557
+ sidewinder 7 8
558
+
559
+ **Setting values**
560
+
561
+ Set value for all items matching the list of labels
562
+
563
+ >>> df.loc[['viper', 'sidewinder'], ['shield']] = 50
564
+ >>> df.execute()
565
+ max_speed shield
566
+ cobra 1 2
567
+ viper 4 50
568
+ sidewinder 7 50
569
+
570
+ Set value for an entire row
571
+
572
+ >>> df.loc['cobra'] = 10
573
+ >>> df.execute()
574
+ max_speed shield
575
+ cobra 10 10
576
+ viper 4 50
577
+ sidewinder 7 50
578
+
579
+ Set value for an entire column
580
+
581
+ >>> df.loc[:, 'max_speed'] = 30
582
+ >>> df.execute()
583
+ max_speed shield
584
+ cobra 30 10
585
+ viper 30 50
586
+ sidewinder 30 50
587
+
588
+ Set value for rows matching callable condition
589
+
590
+ >>> df.loc[df['shield'] > 35] = 0
591
+ >>> df.execute()
592
+ max_speed shield
593
+ cobra 30 10
594
+ viper 0 0
595
+ sidewinder 0 0
596
+
597
+ **Getting values on a DataFrame with an index that has integer labels**
598
+
599
+ Another example using integers for the index
600
+
601
+ >>> df = md.DataFrame([[1, 2], [4, 5], [7, 8]],
602
+ ... index=[7, 8, 9], columns=['max_speed', 'shield'])
603
+ >>> df.execute()
604
+ max_speed shield
605
+ 7 1 2
606
+ 8 4 5
607
+ 9 7 8
608
+
609
+ Slice with integer labels for rows. As mentioned above, note that both
610
+ the start and stop of the slice are included.
611
+
612
+ >>> df.loc[7:9].execute()
613
+ max_speed shield
614
+ 7 1 2
615
+ 8 4 5
616
+ 9 7 8
617
+
618
+ **Getting values with a MultiIndex**
619
+
620
+ A number of examples using a DataFrame with a MultiIndex
621
+
622
+ >>> tuples = [
623
+ ... ('cobra', 'mark i'), ('cobra', 'mark ii'),
624
+ ... ('sidewinder', 'mark i'), ('sidewinder', 'mark ii'),
625
+ ... ('viper', 'mark ii'), ('viper', 'mark iii')
626
+ ... ]
627
+ >>> index = md.MultiIndex.from_tuples(tuples)
628
+ >>> values = [[12, 2], [0, 4], [10, 20],
629
+ ... [1, 4], [7, 1], [16, 36]]
630
+ >>> df = md.DataFrame(values, columns=['max_speed', 'shield'], index=index)
631
+ >>> df.execute()
632
+ max_speed shield
633
+ cobra mark i 12 2
634
+ mark ii 0 4
635
+ sidewinder mark i 10 20
636
+ mark ii 1 4
637
+ viper mark ii 7 1
638
+ mark iii 16 36
639
+
640
+ Single label. Note this returns a DataFrame with a single index.
641
+
642
+ >>> df.loc['cobra'].execute()
643
+ max_speed shield
644
+ mark i 12 2
645
+ mark ii 0 4
646
+
647
+ Single index tuple. Note this returns a Series.
648
+
649
+ >>> df.loc[('cobra', 'mark ii')].execute()
650
+ max_speed 0
651
+ shield 4
652
+ Name: (cobra, mark ii), dtype: int64
653
+
654
+ Single label for row and column. Similar to passing in a tuple, this
655
+ returns a Series.
656
+
657
+ >>> df.loc['cobra', 'mark i'].execute()
658
+ max_speed 12
659
+ shield 2
660
+ Name: (cobra, mark i), dtype: int64
661
+
662
+ Single tuple. Note using ``[[]]`` returns a DataFrame.
663
+
664
+ >>> df.loc[[('cobra', 'mark ii')]].execute()
665
+ max_speed shield
666
+ cobra mark ii 0 4
667
+
668
+ Single tuple for the index with a single label for the column
669
+
670
+ >>> df.loc[('cobra', 'mark i'), 'shield'].execute()
671
+ 2
672
+
673
+ Slice from index tuple to single label
674
+
675
+ >>> df.loc[('cobra', 'mark i'):'viper'].execute()
676
+ max_speed shield
677
+ cobra mark i 12 2
678
+ mark ii 0 4
679
+ sidewinder mark i 10 20
680
+ mark ii 1 4
681
+ viper mark ii 7 1
682
+ mark iii 16 36
683
+
684
+ Slice from index tuple to index tuple
685
+
686
+ >>> df.loc[('cobra', 'mark i'):('viper', 'mark ii')].execute()
687
+ max_speed shield
688
+ cobra mark i 12 2
689
+ mark ii 0 4
690
+ sidewinder mark i 10 20
691
+ mark ii 1 4
692
+ viper mark ii 7 1
693
+ """
414
694
  return DataFrameLoc(a)
@@ -16,6 +16,8 @@ from typing import List
16
16
 
17
17
  import numpy as np
18
18
 
19
+ from ...core.operator import OperatorStage
20
+
19
21
  try:
20
22
  import scipy.sparse as sps
21
23
  except ImportError: # pragma: no cover
@@ -31,7 +33,7 @@ from ...serialization.serializables import (
31
33
  StringField,
32
34
  )
33
35
  from ...tensor import tensor as astensor
34
- from ...utils import lazy_import, pd_release_version
36
+ from ...utils import is_full_slice, lazy_import, pd_release_version
35
37
  from ..core import INDEX_TYPE
36
38
  from ..core import Index as DataFrameIndexType
37
39
  from ..initializer import Index as asindex
@@ -92,12 +94,19 @@ class DataFrameReindex(DataFrameOperator, DataFrameOperatorMixin):
92
94
  @classmethod
93
95
  def _set_inputs(cls, op: "DataFrameReindex", inputs: List[EntityData]):
94
96
  super()._set_inputs(op, inputs)
95
- inputs_iter = iter(op._inputs)
97
+ if getattr(op, "indexes", None):
98
+ op.index, op.columns = [
99
+ None if is_full_slice(idx) else idx for idx in list(op.indexes) + [None]
100
+ ][:2]
101
+ inputs_iter = iter(inputs)
96
102
  op._input = next(inputs_iter)
97
103
  if op.index is not None and isinstance(op.index, ENTITY_TYPE):
98
104
  op.index = next(inputs_iter)
99
- if op.fill_value is not None and isinstance(op.fill_value, ENTITY_TYPE):
100
- op.fill_value = next(inputs_iter)
105
+ if op.fill_value is not None:
106
+ if op.stage == OperatorStage.agg:
107
+ op.fill_value = None
108
+ elif isinstance(op.fill_value, ENTITY_TYPE):
109
+ op.fill_value = next(inputs_iter)
101
110
 
102
111
  def __call__(self, df_or_series):
103
112
  inputs = [df_or_series]
@@ -363,7 +372,7 @@ def reindex(
363
372
  axes_kwargs = dict(index=index, columns=columns, axis=axis)
364
373
  axes = validate_axis_style_args(
365
374
  df_or_series,
366
- (labels,),
375
+ (labels,) if labels is not None else (),
367
376
  {k: v for k, v in axes_kwargs.items() if v is not None},
368
377
  "labels",
369
378
  "reindex",
@@ -16,6 +16,7 @@ import warnings
16
16
 
17
17
  from ... import opcodes
18
18
  from ...core import get_output_types
19
+ from ...serialization import PickleContainer
19
20
  from ...serialization.serializables import AnyField, StringField
20
21
  from ..core import INDEX_TYPE, SERIES_TYPE
21
22
  from ..operators import DataFrameOperator, DataFrameOperatorMixin
@@ -34,6 +35,11 @@ class DataFrameRename(DataFrameOperator, DataFrameOperatorMixin):
34
35
  def __init__(self, output_types=None, **kw):
35
36
  super().__init__(_output_types=output_types, **kw)
36
37
 
38
+ def has_custom_code(self) -> bool:
39
+ return isinstance(self.columns_mapper, PickleContainer) or isinstance(
40
+ self.index_mapper, PickleContainer
41
+ )
42
+
37
43
  def _calc_renamed_df(self, df, errors="ignore"):
38
44
  empty_df = build_df(df)
39
45
  return empty_df.rename(
@@ -85,8 +85,8 @@ def rename_axis_with_level(
85
85
  else:
86
86
  columns = mapper
87
87
  op = DataFrameRenameAxis(
88
- index=None if index is no_default else index,
89
- columns=None if columns is no_default else columns,
88
+ index=index,
89
+ columns=columns,
90
90
  copy_value=copy,
91
91
  level=level,
92
92
  axis=0 if index is not no_default else 1,
@@ -0,0 +1,143 @@
1
+ # Copyright 1999-2025 Alibaba Group Holding Ltd.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ import pandas as pd
16
+
17
+ from ... import opcodes
18
+ from ...core import get_output_types
19
+ from ...serialization.serializables import AnyField, Int32Field
20
+ from ..operators import DataFrameOperator, DataFrameOperatorMixin
21
+ from ..utils import parse_index, validate_axis
22
+
23
+
24
+ class DataFrameReorderLevels(DataFrameOperator, DataFrameOperatorMixin):
25
+ _op_type_ = opcodes.REORDER_LEVELS
26
+
27
+ order = AnyField("order")
28
+ axis = Int32Field("axis", default=0)
29
+
30
+ def __call__(self, df_or_series):
31
+ # Determine output type
32
+ self._output_types = get_output_types(df_or_series)
33
+
34
+ if self.axis == 0:
35
+ src_idx_value = df_or_series.index_value
36
+ else:
37
+ src_idx_value = df_or_series.columns_value
38
+
39
+ # Create reordered index
40
+ pd_index = src_idx_value.to_pandas()
41
+ if not isinstance(pd_index, pd.MultiIndex):
42
+ raise ValueError("reorder_levels can only be used with MultiIndex")
43
+ pd_index = pd_index.reorder_levels(self.order)
44
+
45
+ params = df_or_series.params
46
+ if self.axis == 0:
47
+ params["index_value"] = parse_index(pd_index)
48
+ else:
49
+ params["columns_value"] = parse_index(pd_index, store_data=True)
50
+ return self.new_tileable([df_or_series], **params)
51
+
52
+
53
+ def _reorder_levels(df_or_series, order, axis=0):
54
+ axis = validate_axis(axis, df_or_series)
55
+ op = DataFrameReorderLevels(order=order, axis=axis)
56
+ return op(df_or_series)
57
+
58
+
59
+ def df_reorder_levels(df, order, axis=0):
60
+ """
61
+ Rearrange index levels using input order. May not drop or duplicate levels.
62
+
63
+ Parameters
64
+ ----------
65
+ order : list of int or list of str
66
+ List representing new level order. Reference level by number
67
+ (position) or by key (label).
68
+ axis : {0 or 'index', 1 or 'columns'}, default 0
69
+ Where to reorder levels.
70
+
71
+ Returns
72
+ -------
73
+ DataFrame
74
+
75
+ Examples
76
+ --------
77
+ >>> import maxframe.dataframe as md
78
+ >>> data = {
79
+ ... "class": ["Mammals", "Mammals", "Reptiles"],
80
+ ... "diet": ["Omnivore", "Carnivore", "Carnivore"],
81
+ ... "species": ["Humans", "Dogs", "Snakes"],
82
+ ... }
83
+ >>> df = md.DataFrame(data, columns=["class", "diet", "species"])
84
+ >>> df = df.set_index(["class", "diet"])
85
+ >>> df.execute()
86
+ species
87
+ class diet
88
+ Mammals Omnivore Humans
89
+ Carnivore Dogs
90
+ Reptiles Carnivore Snakes
91
+
92
+ Let's reorder the levels of the index:
93
+
94
+ >>> df.reorder_levels(["diet", "class"]).execute()
95
+ species
96
+ diet class
97
+ Omnivore Mammals Humans
98
+ Carnivore Mammals Dogs
99
+ Reptiles Snakes
100
+ """
101
+ return _reorder_levels(df, order, axis=axis)
102
+
103
+
104
+ def series_reorder_levels(series, order):
105
+ """
106
+ Rearrange index levels using input order.
107
+
108
+ May not drop or duplicate levels.
109
+
110
+ Parameters
111
+ ----------
112
+ order : list of int representing new level order
113
+ Reference level by number or key.
114
+
115
+ Returns
116
+ -------
117
+ type of caller (new object)
118
+
119
+ Examples
120
+ --------
121
+ >>> import maxframe.tensor as mt
122
+ >>> import maxframe.dataframe as md
123
+ >>> arrays = [mt.array(["dog", "dog", "cat", "cat", "bird", "bird"]),
124
+ ... mt.array(["white", "black", "white", "black", "white", "black"])]
125
+ >>> s = md.Series([1, 2, 3, 3, 5, 2], index=arrays)
126
+ >>> s.execute()
127
+ dog white 1
128
+ black 2
129
+ cat white 3
130
+ black 3
131
+ bird white 5
132
+ black 2
133
+ dtype: int64
134
+ >>> s.reorder_levels([1, 0]).execute()
135
+ white dog 1
136
+ black dog 2
137
+ white cat 3
138
+ black cat 3
139
+ white bird 5
140
+ black bird 2
141
+ dtype: int64
142
+ """
143
+ return _reorder_levels(series, order)
@@ -18,10 +18,12 @@ import pandas as pd
18
18
  from ... import opcodes
19
19
  from ...core import OutputType
20
20
  from ...serialization.serializables import AnyField, BoolField
21
- from ...utils import no_default
21
+ from ...utils import no_default, pd_release_version
22
22
  from ..operators import DATAFRAME_TYPE, DataFrameOperator, DataFrameOperatorMixin
23
23
  from ..utils import build_empty_df, build_empty_series, parse_index
24
24
 
25
+ _reset_index_has_names = pd_release_version >= (1, 5)
26
+
25
27
 
26
28
  class DataFrameResetIndex(DataFrameOperator, DataFrameOperatorMixin):
27
29
  _op_type_ = opcodes.RESET_INDEX
@@ -29,8 +31,10 @@ class DataFrameResetIndex(DataFrameOperator, DataFrameOperatorMixin):
29
31
  level = AnyField("level", default=None)
30
32
  drop = BoolField("drop", default=False)
31
33
  name = AnyField("name", default=None)
32
- col_level = AnyField("col_level", default=0)
33
- col_fill = AnyField("col_fill", default="")
34
+ col_level = AnyField("col_level", default=None)
35
+ col_fill = AnyField("col_fill", default=None)
36
+ incremental_index = BoolField("incremental_index", default=False)
37
+ names = AnyField("names", default=None)
34
38
 
35
39
  def __init__(self, output_types=None, **kwargs):
36
40
  super().__init__(_output_types=output_types, **kwargs)
@@ -76,9 +80,26 @@ class DataFrameResetIndex(DataFrameOperator, DataFrameOperatorMixin):
76
80
  else:
77
81
  empty_df = build_empty_df(a.dtypes)
78
82
  empty_df.index = a.index_value.to_pandas()[:0]
79
- empty_df = empty_df.reset_index(
80
- level=self.level, col_level=self.col_level, col_fill=self.col_fill
81
- )
83
+
84
+ if self.names and _reset_index_has_names:
85
+ empty_df = empty_df.reset_index(
86
+ level=self.level,
87
+ col_level=self.col_level,
88
+ col_fill=self.col_fill,
89
+ names=self.names,
90
+ )
91
+ else:
92
+ empty_df = empty_df.reset_index(
93
+ level=self.level, col_level=self.col_level, col_fill=self.col_fill
94
+ )
95
+ if self.names:
96
+ names = (
97
+ [self.names] if not isinstance(self.names, list) else self.names
98
+ )
99
+ cols = list(empty_df.columns)
100
+ cols[: len(names)] = names
101
+ empty_df.columns = pd.Index(cols, name=empty_df.columns.name)
102
+
82
103
  shape = (a.shape[0], len(empty_df.columns))
83
104
  columns_value = parse_index(empty_df.columns, store_data=True)
84
105
  dtypes = empty_df.dtypes
@@ -105,6 +126,8 @@ def df_reset_index(
105
126
  inplace=False,
106
127
  col_level=0,
107
128
  col_fill="",
129
+ names=None,
130
+ incremental_index=False,
108
131
  ):
109
132
  """
110
133
  Reset the index, or a level of it.
@@ -255,6 +278,8 @@ def df_reset_index(
255
278
  drop=drop,
256
279
  col_level=col_level,
257
280
  col_fill=col_fill,
281
+ names=names,
282
+ incremental_index=incremental_index,
258
283
  output_types=[OutputType.dataframe],
259
284
  )
260
285
  ret = op(df)
@@ -270,6 +295,7 @@ def series_reset_index(
270
295
  drop=False,
271
296
  name=no_default,
272
297
  inplace=False,
298
+ incremental_index=False,
273
299
  ):
274
300
  """
275
301
  Generate a new DataFrame or Series with the index reset.
@@ -389,6 +415,7 @@ def series_reset_index(
389
415
  level=level,
390
416
  drop=drop,
391
417
  name=name,
418
+ incremental_index=incremental_index,
392
419
  output_types=[OutputType.series if drop else OutputType.dataframe],
393
420
  )
394
421
  ret = op(series)
@@ -25,6 +25,7 @@ from ...serialization.serializables import (
25
25
  Float64Field,
26
26
  Int8Field,
27
27
  Int64Field,
28
+ KeyField,
28
29
  )
29
30
  from ...tensor.random import RandomStateField
30
31
  from ..operators import DataFrameOperator, DataFrameOperatorMixin
@@ -41,6 +42,11 @@ class DataFrameSample(DataFrameOperator, DataFrameOperatorMixin):
41
42
  axis = Int8Field("axis", default=None)
42
43
  seed = Int64Field("seed", default=None)
43
44
  random_state = RandomStateField("random_state", default=None)
45
+ always_multinomial = BoolField("always_multinomial", default=None)
46
+
47
+ # for chunks
48
+ # num of instances for chunks
49
+ chunk_samples = KeyField("chunk_samples", default=None)
44
50
 
45
51
  def __init__(self, random_state=None, seed=None, **kw):
46
52
  if random_state is None:
@@ -54,6 +60,8 @@ class DataFrameSample(DataFrameOperator, DataFrameOperatorMixin):
54
60
  next(it)
55
61
  if isinstance(op.weights, ENTITY_TYPE):
56
62
  op.weights = next(it)
63
+ if isinstance(op.chunk_samples, ENTITY_TYPE):
64
+ op.chunk_samples = next(it)
57
65
 
58
66
  def __call__(self, df):
59
67
  params = df.params