maxframe 1.3.0__cp310-cp310-win32.whl → 2.0.0__cp310-cp310-win32.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of maxframe might be problematic. Click here for more details.

Files changed (644) hide show
  1. maxframe/_utils.cp310-win32.pyd +0 -0
  2. maxframe/_utils.pyi +21 -0
  3. maxframe/_utils.pyx +4 -3
  4. maxframe/codegen/__init__.py +27 -0
  5. maxframe/{codegen.py → codegen/core.py} +49 -43
  6. maxframe/codegen/spe/__init__.py +16 -0
  7. maxframe/codegen/spe/core.py +307 -0
  8. maxframe/codegen/spe/dataframe/__init__.py +37 -0
  9. maxframe/codegen/spe/dataframe/accessors/__init__.py +15 -0
  10. maxframe/codegen/spe/dataframe/accessors/base.py +53 -0
  11. maxframe/codegen/spe/dataframe/accessors/dict_.py +194 -0
  12. maxframe/codegen/spe/dataframe/accessors/list_.py +80 -0
  13. maxframe/codegen/spe/dataframe/arithmetic.py +84 -0
  14. maxframe/codegen/spe/dataframe/datasource.py +181 -0
  15. maxframe/codegen/spe/dataframe/datastore.py +204 -0
  16. maxframe/codegen/spe/dataframe/extensions.py +63 -0
  17. maxframe/codegen/spe/dataframe/fetch.py +26 -0
  18. maxframe/codegen/spe/dataframe/groupby.py +224 -0
  19. maxframe/codegen/spe/dataframe/indexing.py +238 -0
  20. maxframe/codegen/spe/dataframe/merge.py +73 -0
  21. maxframe/codegen/spe/dataframe/misc.py +286 -0
  22. maxframe/codegen/spe/dataframe/missing.py +64 -0
  23. maxframe/codegen/spe/dataframe/reduction.py +160 -0
  24. maxframe/codegen/spe/dataframe/sort.py +83 -0
  25. maxframe/codegen/spe/dataframe/statistics.py +46 -0
  26. maxframe/codegen/spe/dataframe/tests/__init__.py +13 -0
  27. maxframe/codegen/spe/dataframe/tests/accessors/__init__.py +13 -0
  28. maxframe/codegen/spe/dataframe/tests/accessors/test_base.py +33 -0
  29. maxframe/codegen/spe/dataframe/tests/accessors/test_dict.py +310 -0
  30. maxframe/codegen/spe/dataframe/tests/accessors/test_list.py +137 -0
  31. maxframe/codegen/spe/dataframe/tests/indexing/__init__.py +13 -0
  32. maxframe/codegen/spe/dataframe/tests/indexing/conftest.py +58 -0
  33. maxframe/codegen/spe/dataframe/tests/indexing/test_getitem.py +124 -0
  34. maxframe/codegen/spe/dataframe/tests/indexing/test_iloc.py +76 -0
  35. maxframe/codegen/spe/dataframe/tests/indexing/test_indexing.py +39 -0
  36. maxframe/codegen/spe/dataframe/tests/indexing/test_rename.py +51 -0
  37. maxframe/codegen/spe/dataframe/tests/indexing/test_reset_index.py +88 -0
  38. maxframe/codegen/spe/dataframe/tests/indexing/test_sample.py +45 -0
  39. maxframe/codegen/spe/dataframe/tests/indexing/test_set_axis.py +45 -0
  40. maxframe/codegen/spe/dataframe/tests/indexing/test_set_index.py +41 -0
  41. maxframe/codegen/spe/dataframe/tests/indexing/test_setitem.py +46 -0
  42. maxframe/codegen/spe/dataframe/tests/misc/__init__.py +13 -0
  43. maxframe/codegen/spe/dataframe/tests/misc/test_apply.py +133 -0
  44. maxframe/codegen/spe/dataframe/tests/misc/test_drop_duplicates.py +92 -0
  45. maxframe/codegen/spe/dataframe/tests/misc/test_misc.py +234 -0
  46. maxframe/codegen/spe/dataframe/tests/missing/__init__.py +13 -0
  47. maxframe/codegen/spe/dataframe/tests/missing/test_checkna.py +94 -0
  48. maxframe/codegen/spe/dataframe/tests/missing/test_dropna.py +50 -0
  49. maxframe/codegen/spe/dataframe/tests/missing/test_fillna.py +94 -0
  50. maxframe/codegen/spe/dataframe/tests/missing/test_replace.py +45 -0
  51. maxframe/codegen/spe/dataframe/tests/test_arithmetic.py +73 -0
  52. maxframe/codegen/spe/dataframe/tests/test_datasource.py +184 -0
  53. maxframe/codegen/spe/dataframe/tests/test_datastore.py +200 -0
  54. maxframe/codegen/spe/dataframe/tests/test_extensions.py +88 -0
  55. maxframe/codegen/spe/dataframe/tests/test_groupby.py +225 -0
  56. maxframe/codegen/spe/dataframe/tests/test_merge.py +400 -0
  57. maxframe/codegen/spe/dataframe/tests/test_reduction.py +104 -0
  58. maxframe/codegen/spe/dataframe/tests/test_sort.py +159 -0
  59. maxframe/codegen/spe/dataframe/tests/test_statistics.py +70 -0
  60. maxframe/codegen/spe/dataframe/tests/test_tseries.py +29 -0
  61. maxframe/codegen/spe/dataframe/tests/test_value_counts.py +60 -0
  62. maxframe/codegen/spe/dataframe/tests/test_window.py +69 -0
  63. maxframe/codegen/spe/dataframe/tseries.py +46 -0
  64. maxframe/codegen/spe/dataframe/udf.py +62 -0
  65. maxframe/codegen/spe/dataframe/value_counts.py +31 -0
  66. maxframe/codegen/spe/dataframe/window.py +65 -0
  67. maxframe/codegen/spe/learn/__init__.py +15 -0
  68. maxframe/codegen/spe/learn/contrib/__init__.py +15 -0
  69. maxframe/codegen/spe/learn/contrib/lightgbm.py +160 -0
  70. maxframe/codegen/spe/learn/contrib/models.py +41 -0
  71. maxframe/codegen/spe/learn/contrib/pytorch.py +49 -0
  72. maxframe/codegen/spe/learn/contrib/tests/__init__.py +13 -0
  73. maxframe/codegen/spe/learn/contrib/tests/test_lightgbm.py +123 -0
  74. maxframe/codegen/spe/learn/contrib/tests/test_models.py +41 -0
  75. maxframe/codegen/spe/learn/contrib/tests/test_pytorch.py +53 -0
  76. maxframe/codegen/spe/learn/contrib/tests/test_xgboost.py +98 -0
  77. maxframe/codegen/spe/learn/contrib/xgboost.py +152 -0
  78. maxframe/codegen/spe/learn/metrics/__init__.py +15 -0
  79. maxframe/codegen/spe/learn/metrics/_classification.py +120 -0
  80. maxframe/codegen/spe/learn/metrics/tests/__init__.py +13 -0
  81. maxframe/codegen/spe/learn/metrics/tests/test_classification.py +93 -0
  82. maxframe/codegen/spe/learn/model_selection/__init__.py +13 -0
  83. maxframe/codegen/spe/learn/model_selection/tests/__init__.py +13 -0
  84. maxframe/codegen/spe/learn/model_selection/tests/test_split.py +41 -0
  85. maxframe/codegen/spe/learn/preprocessing/__init__.py +15 -0
  86. maxframe/codegen/spe/learn/preprocessing/_data.py +37 -0
  87. maxframe/codegen/spe/learn/preprocessing/_label.py +47 -0
  88. maxframe/codegen/spe/learn/preprocessing/tests/__init__.py +13 -0
  89. maxframe/codegen/spe/learn/preprocessing/tests/test_data.py +31 -0
  90. maxframe/codegen/spe/learn/preprocessing/tests/test_label.py +43 -0
  91. maxframe/codegen/spe/learn/utils/__init__.py +15 -0
  92. maxframe/codegen/spe/learn/utils/checks.py +55 -0
  93. maxframe/codegen/spe/learn/utils/multiclass.py +60 -0
  94. maxframe/codegen/spe/learn/utils/shuffle.py +85 -0
  95. maxframe/codegen/spe/learn/utils/sparsefuncs.py +35 -0
  96. maxframe/codegen/spe/learn/utils/tests/__init__.py +13 -0
  97. maxframe/codegen/spe/learn/utils/tests/test_checks.py +48 -0
  98. maxframe/codegen/spe/learn/utils/tests/test_multiclass.py +52 -0
  99. maxframe/codegen/spe/learn/utils/tests/test_shuffle.py +50 -0
  100. maxframe/codegen/spe/learn/utils/tests/test_sparsefuncs.py +34 -0
  101. maxframe/codegen/spe/learn/utils/tests/test_validation.py +44 -0
  102. maxframe/codegen/spe/learn/utils/validation.py +35 -0
  103. maxframe/codegen/spe/objects.py +26 -0
  104. maxframe/codegen/spe/remote.py +29 -0
  105. maxframe/codegen/spe/tensor/__init__.py +28 -0
  106. maxframe/codegen/spe/tensor/arithmetic.py +95 -0
  107. maxframe/codegen/spe/tensor/core.py +41 -0
  108. maxframe/codegen/spe/tensor/datasource.py +165 -0
  109. maxframe/codegen/spe/tensor/extensions.py +35 -0
  110. maxframe/codegen/spe/tensor/fetch.py +26 -0
  111. maxframe/codegen/spe/tensor/indexing.py +63 -0
  112. maxframe/codegen/spe/tensor/linalg.py +63 -0
  113. maxframe/codegen/spe/tensor/merge.py +31 -0
  114. maxframe/codegen/spe/tensor/misc.py +121 -0
  115. maxframe/codegen/spe/tensor/random.py +29 -0
  116. maxframe/codegen/spe/tensor/reduction.py +39 -0
  117. maxframe/codegen/spe/tensor/reshape.py +26 -0
  118. maxframe/codegen/spe/tensor/sort.py +42 -0
  119. maxframe/codegen/spe/tensor/special.py +35 -0
  120. maxframe/codegen/spe/tensor/statistics.py +24 -0
  121. maxframe/codegen/spe/tensor/tests/__init__.py +13 -0
  122. maxframe/codegen/spe/tensor/tests/test_arithmetic.py +103 -0
  123. maxframe/codegen/spe/tensor/tests/test_datasource.py +99 -0
  124. maxframe/codegen/spe/tensor/tests/test_extensions.py +37 -0
  125. maxframe/codegen/spe/tensor/tests/test_indexing.py +44 -0
  126. maxframe/codegen/spe/tensor/tests/test_linalg.py +38 -0
  127. maxframe/codegen/spe/tensor/tests/test_merge.py +28 -0
  128. maxframe/codegen/spe/tensor/tests/test_misc.py +94 -0
  129. maxframe/codegen/spe/tensor/tests/test_random.py +55 -0
  130. maxframe/codegen/spe/tensor/tests/test_reduction.py +65 -0
  131. maxframe/codegen/spe/tensor/tests/test_reshape.py +39 -0
  132. maxframe/codegen/spe/tensor/tests/test_sort.py +49 -0
  133. maxframe/codegen/spe/tensor/tests/test_special.py +28 -0
  134. maxframe/codegen/spe/tensor/tests/test_statistics.py +29 -0
  135. maxframe/codegen/spe/tests/__init__.py +13 -0
  136. maxframe/codegen/spe/tests/test_remote.py +29 -0
  137. maxframe/codegen/spe/tests/test_spe_codegen.py +141 -0
  138. maxframe/codegen/spe/utils.py +54 -0
  139. maxframe/codegen/tests/__init__.py +13 -0
  140. maxframe/{tests → codegen/tests}/test_codegen.py +3 -5
  141. maxframe/config/__init__.py +1 -1
  142. maxframe/config/config.py +50 -23
  143. maxframe/config/tests/test_config.py +4 -12
  144. maxframe/config/validators.py +5 -0
  145. maxframe/conftest.py +38 -10
  146. maxframe/core/__init__.py +1 -0
  147. maxframe/core/context.py +110 -0
  148. maxframe/core/entity/__init__.py +1 -0
  149. maxframe/core/entity/core.py +0 -7
  150. maxframe/core/entity/objects.py +19 -5
  151. maxframe/core/entity/output_types.py +11 -0
  152. maxframe/core/entity/tests/test_objects.py +11 -12
  153. maxframe/core/entity/tileables.py +3 -1
  154. maxframe/core/entity/utils.py +15 -0
  155. maxframe/core/graph/__init__.py +6 -1
  156. maxframe/core/graph/builder/base.py +5 -1
  157. maxframe/core/graph/core.cp310-win32.pyd +0 -0
  158. maxframe/core/graph/core.pyx +17 -6
  159. maxframe/core/graph/entity.py +18 -6
  160. maxframe/core/operator/__init__.py +8 -3
  161. maxframe/core/operator/base.py +35 -12
  162. maxframe/core/operator/core.py +37 -14
  163. maxframe/core/operator/fetch.py +5 -18
  164. maxframe/core/operator/objects.py +0 -20
  165. maxframe/core/operator/shuffle.py +6 -72
  166. maxframe/dataframe/__init__.py +1 -0
  167. maxframe/dataframe/accessors/datetime_/core.py +7 -4
  168. maxframe/dataframe/accessors/string_/core.py +9 -6
  169. maxframe/dataframe/arithmetic/core.py +31 -20
  170. maxframe/dataframe/arithmetic/tests/test_arithmetic.py +6 -0
  171. maxframe/dataframe/core.py +98 -91
  172. maxframe/dataframe/datasource/core.py +8 -1
  173. maxframe/dataframe/datasource/date_range.py +8 -0
  174. maxframe/dataframe/datasource/from_index.py +9 -5
  175. maxframe/dataframe/datasource/from_records.py +9 -2
  176. maxframe/dataframe/datasource/from_tensor.py +32 -21
  177. maxframe/dataframe/datasource/read_csv.py +8 -2
  178. maxframe/dataframe/datasource/read_odps_query.py +109 -19
  179. maxframe/dataframe/datasource/read_odps_table.py +20 -5
  180. maxframe/dataframe/datasource/read_parquet.py +8 -3
  181. maxframe/dataframe/datasource/tests/test_datasource.py +80 -1
  182. maxframe/dataframe/datastore/tests/test_to_odps.py +52 -1
  183. maxframe/dataframe/datastore/to_csv.py +7 -3
  184. maxframe/dataframe/datastore/to_odps.py +42 -6
  185. maxframe/dataframe/extensions/__init__.py +6 -1
  186. maxframe/dataframe/extensions/apply_chunk.py +96 -136
  187. maxframe/dataframe/extensions/flatjson.py +3 -2
  188. maxframe/dataframe/extensions/flatmap.py +15 -7
  189. maxframe/dataframe/fetch/core.py +12 -1
  190. maxframe/dataframe/groupby/__init__.py +7 -0
  191. maxframe/dataframe/groupby/aggregation.py +62 -9
  192. maxframe/dataframe/groupby/apply.py +50 -74
  193. maxframe/dataframe/groupby/apply_chunk.py +393 -0
  194. maxframe/dataframe/groupby/core.py +80 -17
  195. maxframe/dataframe/groupby/extensions.py +26 -0
  196. maxframe/dataframe/groupby/fill.py +9 -4
  197. maxframe/dataframe/groupby/sample.py +7 -7
  198. maxframe/dataframe/groupby/tests/test_groupby.py +3 -3
  199. maxframe/dataframe/groupby/transform.py +57 -54
  200. maxframe/dataframe/indexing/align.py +7 -6
  201. maxframe/dataframe/indexing/getitem.py +9 -8
  202. maxframe/dataframe/indexing/iloc.py +28 -23
  203. maxframe/dataframe/indexing/insert.py +7 -3
  204. maxframe/dataframe/indexing/loc.py +9 -8
  205. maxframe/dataframe/indexing/reindex.py +36 -30
  206. maxframe/dataframe/indexing/rename_axis.py +18 -10
  207. maxframe/dataframe/indexing/reset_index.py +0 -2
  208. maxframe/dataframe/indexing/sample.py +13 -9
  209. maxframe/dataframe/indexing/set_axis.py +9 -6
  210. maxframe/dataframe/indexing/setitem.py +8 -5
  211. maxframe/dataframe/indexing/where.py +12 -9
  212. maxframe/dataframe/merge/__init__.py +0 -1
  213. maxframe/dataframe/merge/concat.py +10 -31
  214. maxframe/dataframe/merge/merge.py +2 -24
  215. maxframe/dataframe/misc/__init__.py +6 -0
  216. maxframe/dataframe/misc/_duplicate.py +7 -3
  217. maxframe/dataframe/misc/apply.py +106 -139
  218. maxframe/dataframe/misc/astype.py +3 -2
  219. maxframe/dataframe/misc/case_when.py +11 -7
  220. maxframe/dataframe/misc/cut.py +11 -10
  221. maxframe/dataframe/misc/describe.py +7 -3
  222. maxframe/dataframe/misc/drop.py +13 -11
  223. maxframe/dataframe/misc/eval.py +0 -2
  224. maxframe/dataframe/misc/get_dummies.py +78 -49
  225. maxframe/dataframe/misc/isin.py +13 -10
  226. maxframe/dataframe/misc/map.py +21 -6
  227. maxframe/dataframe/misc/melt.py +8 -1
  228. maxframe/dataframe/misc/pivot.py +232 -0
  229. maxframe/dataframe/misc/pivot_table.py +52 -40
  230. maxframe/dataframe/misc/rechunk.py +59 -0
  231. maxframe/dataframe/misc/shift.py +7 -4
  232. maxframe/dataframe/misc/stack.py +5 -3
  233. maxframe/dataframe/misc/tests/test_misc.py +167 -1
  234. maxframe/dataframe/misc/transform.py +63 -65
  235. maxframe/dataframe/misc/value_counts.py +7 -4
  236. maxframe/dataframe/missing/dropna.py +16 -7
  237. maxframe/dataframe/missing/fillna.py +18 -10
  238. maxframe/dataframe/missing/replace.py +10 -6
  239. maxframe/dataframe/missing/tests/test_missing.py +2 -2
  240. maxframe/dataframe/operators.py +1 -27
  241. maxframe/dataframe/reduction/aggregation.py +128 -3
  242. maxframe/dataframe/reduction/core.py +20 -6
  243. maxframe/dataframe/reduction/median.py +1 -1
  244. maxframe/dataframe/reduction/tests/test_reduction.py +33 -0
  245. maxframe/dataframe/reduction/unique.py +53 -7
  246. maxframe/dataframe/statistics/corr.py +9 -6
  247. maxframe/dataframe/statistics/quantile.py +9 -6
  248. maxframe/dataframe/tseries/to_datetime.py +6 -4
  249. maxframe/dataframe/utils.py +219 -31
  250. maxframe/dataframe/window/rolling.py +7 -4
  251. maxframe/env.py +1 -0
  252. maxframe/errors.py +9 -0
  253. maxframe/extension.py +13 -2
  254. maxframe/io/objects/core.py +67 -51
  255. maxframe/io/objects/tensor.py +73 -17
  256. maxframe/io/objects/tests/test_object_io.py +10 -55
  257. maxframe/io/odpsio/arrow.py +15 -2
  258. maxframe/io/odpsio/schema.py +43 -13
  259. maxframe/io/odpsio/tableio.py +63 -11
  260. maxframe/io/odpsio/tests/test_arrow.py +1 -2
  261. maxframe/io/odpsio/tests/test_schema.py +114 -1
  262. maxframe/io/odpsio/tests/test_tableio.py +42 -0
  263. maxframe/io/odpsio/tests/test_volumeio.py +21 -58
  264. maxframe/io/odpsio/volumeio.py +23 -8
  265. maxframe/learn/__init__.py +2 -2
  266. maxframe/learn/contrib/__init__.py +2 -2
  267. maxframe/learn/contrib/graph/connected_components.py +2 -1
  268. maxframe/learn/contrib/lightgbm/__init__.py +33 -0
  269. maxframe/learn/contrib/lightgbm/_predict.py +138 -0
  270. maxframe/learn/contrib/lightgbm/_train.py +163 -0
  271. maxframe/learn/contrib/lightgbm/callback.py +114 -0
  272. maxframe/learn/contrib/lightgbm/classifier.py +199 -0
  273. maxframe/learn/contrib/lightgbm/core.py +372 -0
  274. maxframe/learn/contrib/lightgbm/dataset.py +153 -0
  275. maxframe/learn/contrib/lightgbm/regressor.py +29 -0
  276. maxframe/learn/contrib/lightgbm/tests/__init__.py +13 -0
  277. maxframe/learn/contrib/lightgbm/tests/test_callback.py +58 -0
  278. maxframe/learn/contrib/llm/models/dashscope.py +34 -0
  279. maxframe/learn/contrib/llm/models/managed.py +15 -0
  280. maxframe/learn/contrib/llm/multi_modal.py +92 -0
  281. maxframe/learn/contrib/llm/text.py +21 -5
  282. maxframe/learn/contrib/models.py +38 -9
  283. maxframe/learn/contrib/utils.py +55 -0
  284. maxframe/learn/contrib/xgboost/callback.py +86 -0
  285. maxframe/learn/contrib/xgboost/classifier.py +26 -30
  286. maxframe/learn/contrib/xgboost/core.py +54 -42
  287. maxframe/learn/contrib/xgboost/dmatrix.py +19 -12
  288. maxframe/learn/contrib/xgboost/predict.py +13 -8
  289. maxframe/learn/contrib/xgboost/regressor.py +28 -27
  290. maxframe/learn/contrib/xgboost/tests/test_callback.py +41 -0
  291. maxframe/learn/contrib/xgboost/train.py +59 -16
  292. maxframe/learn/core.py +252 -0
  293. maxframe/learn/datasets/__init__.py +20 -0
  294. maxframe/learn/datasets/samples_generator.py +628 -0
  295. maxframe/learn/linear_model/__init__.py +15 -0
  296. maxframe/learn/linear_model/_base.py +163 -0
  297. maxframe/learn/linear_model/_lin_reg.py +175 -0
  298. maxframe/learn/metrics/__init__.py +25 -0
  299. maxframe/learn/metrics/_check_targets.py +95 -0
  300. maxframe/learn/metrics/_classification.py +1121 -0
  301. maxframe/learn/metrics/_regression.py +256 -0
  302. maxframe/learn/model_selection/__init__.py +15 -0
  303. maxframe/learn/model_selection/_split.py +451 -0
  304. maxframe/learn/model_selection/tests/__init__.py +13 -0
  305. maxframe/learn/model_selection/tests/test_split.py +156 -0
  306. maxframe/learn/preprocessing/__init__.py +16 -0
  307. maxframe/learn/preprocessing/_data/__init__.py +17 -0
  308. maxframe/learn/preprocessing/_data/min_max_scaler.py +390 -0
  309. maxframe/learn/preprocessing/_data/normalize.py +127 -0
  310. maxframe/learn/preprocessing/_data/standard_scaler.py +503 -0
  311. maxframe/learn/preprocessing/_data/utils.py +79 -0
  312. maxframe/learn/preprocessing/_label/__init__.py +16 -0
  313. maxframe/learn/preprocessing/_label/_label_binarizer.py +599 -0
  314. maxframe/learn/preprocessing/_label/_label_encoder.py +174 -0
  315. maxframe/learn/utils/__init__.py +4 -0
  316. maxframe/learn/utils/_encode.py +314 -0
  317. maxframe/learn/utils/checks.py +161 -0
  318. maxframe/learn/utils/core.py +33 -0
  319. maxframe/learn/utils/extmath.py +176 -0
  320. maxframe/learn/utils/multiclass.py +292 -0
  321. maxframe/learn/utils/shuffle.py +114 -0
  322. maxframe/learn/utils/sparsefuncs.py +87 -0
  323. maxframe/learn/utils/validation.py +775 -0
  324. maxframe/lib/__init__.py +0 -2
  325. maxframe/lib/compat.py +145 -0
  326. maxframe/lib/filesystem/_oss_lib/glob.py +1 -1
  327. maxframe/lib/mmh3.cp310-win32.pyd +0 -0
  328. maxframe/lib/sparse/__init__.py +10 -15
  329. maxframe/lib/sparse/array.py +45 -33
  330. maxframe/lib/sparse/core.py +0 -2
  331. maxframe/lib/sparse/linalg.py +31 -0
  332. maxframe/lib/sparse/matrix.py +5 -2
  333. maxframe/lib/sparse/tests/__init__.py +0 -2
  334. maxframe/lib/sparse/tests/test_sparse.py +53 -53
  335. maxframe/lib/sparse/vector.py +0 -2
  336. maxframe/mixin.py +59 -2
  337. maxframe/opcodes.py +13 -5
  338. maxframe/protocol.py +67 -14
  339. maxframe/remote/core.py +16 -14
  340. maxframe/remote/run_script.py +6 -3
  341. maxframe/serialization/__init__.py +2 -0
  342. maxframe/serialization/core.cp310-win32.pyd +0 -0
  343. maxframe/serialization/core.pxd +3 -0
  344. maxframe/serialization/core.pyi +3 -1
  345. maxframe/serialization/core.pyx +82 -4
  346. maxframe/serialization/pandas.py +5 -1
  347. maxframe/serialization/serializables/core.py +6 -5
  348. maxframe/serialization/serializables/field.py +2 -2
  349. maxframe/serialization/serializables/tests/test_field_type.py +3 -5
  350. maxframe/serialization/tests/test_serial.py +27 -0
  351. maxframe/session.py +4 -71
  352. maxframe/sperunner.py +165 -0
  353. maxframe/tensor/__init__.py +35 -2
  354. maxframe/tensor/arithmetic/__init__.py +2 -4
  355. maxframe/tensor/arithmetic/abs.py +0 -2
  356. maxframe/tensor/arithmetic/absolute.py +0 -2
  357. maxframe/tensor/arithmetic/add.py +34 -4
  358. maxframe/tensor/arithmetic/angle.py +0 -2
  359. maxframe/tensor/arithmetic/arccos.py +1 -4
  360. maxframe/tensor/arithmetic/arccosh.py +1 -3
  361. maxframe/tensor/arithmetic/arcsin.py +0 -2
  362. maxframe/tensor/arithmetic/arcsinh.py +0 -2
  363. maxframe/tensor/arithmetic/arctan.py +0 -2
  364. maxframe/tensor/arithmetic/arctan2.py +0 -2
  365. maxframe/tensor/arithmetic/arctanh.py +0 -2
  366. maxframe/tensor/arithmetic/around.py +0 -2
  367. maxframe/tensor/arithmetic/bitand.py +0 -2
  368. maxframe/tensor/arithmetic/bitor.py +1 -3
  369. maxframe/tensor/arithmetic/bitxor.py +1 -3
  370. maxframe/tensor/arithmetic/cbrt.py +0 -2
  371. maxframe/tensor/arithmetic/ceil.py +0 -2
  372. maxframe/tensor/arithmetic/clip.py +13 -13
  373. maxframe/tensor/arithmetic/conj.py +0 -2
  374. maxframe/tensor/arithmetic/copysign.py +0 -2
  375. maxframe/tensor/arithmetic/core.py +47 -39
  376. maxframe/tensor/arithmetic/cos.py +1 -3
  377. maxframe/tensor/arithmetic/cosh.py +0 -2
  378. maxframe/tensor/arithmetic/deg2rad.py +0 -2
  379. maxframe/tensor/arithmetic/degrees.py +0 -2
  380. maxframe/tensor/arithmetic/divide.py +0 -2
  381. maxframe/tensor/arithmetic/equal.py +0 -2
  382. maxframe/tensor/arithmetic/exp.py +1 -3
  383. maxframe/tensor/arithmetic/exp2.py +0 -2
  384. maxframe/tensor/arithmetic/expm1.py +0 -2
  385. maxframe/tensor/arithmetic/fabs.py +0 -2
  386. maxframe/tensor/arithmetic/fix.py +0 -2
  387. maxframe/tensor/arithmetic/float_power.py +0 -2
  388. maxframe/tensor/arithmetic/floor.py +0 -2
  389. maxframe/tensor/arithmetic/floordiv.py +0 -2
  390. maxframe/tensor/arithmetic/fmax.py +0 -2
  391. maxframe/tensor/arithmetic/fmin.py +0 -2
  392. maxframe/tensor/arithmetic/fmod.py +0 -2
  393. maxframe/tensor/arithmetic/frexp.py +6 -2
  394. maxframe/tensor/arithmetic/greater.py +0 -2
  395. maxframe/tensor/arithmetic/greater_equal.py +0 -2
  396. maxframe/tensor/arithmetic/hypot.py +0 -2
  397. maxframe/tensor/arithmetic/i0.py +1 -3
  398. maxframe/tensor/arithmetic/imag.py +0 -2
  399. maxframe/tensor/arithmetic/invert.py +1 -3
  400. maxframe/tensor/arithmetic/isclose.py +0 -2
  401. maxframe/tensor/arithmetic/iscomplex.py +0 -2
  402. maxframe/tensor/arithmetic/isfinite.py +1 -3
  403. maxframe/tensor/arithmetic/isinf.py +0 -2
  404. maxframe/tensor/arithmetic/isnan.py +0 -2
  405. maxframe/tensor/arithmetic/isreal.py +0 -2
  406. maxframe/tensor/arithmetic/ldexp.py +0 -2
  407. maxframe/tensor/arithmetic/less.py +0 -2
  408. maxframe/tensor/arithmetic/less_equal.py +0 -2
  409. maxframe/tensor/arithmetic/log.py +1 -3
  410. maxframe/tensor/arithmetic/log10.py +1 -3
  411. maxframe/tensor/arithmetic/log1p.py +1 -3
  412. maxframe/tensor/arithmetic/log2.py +1 -3
  413. maxframe/tensor/arithmetic/logaddexp.py +0 -2
  414. maxframe/tensor/arithmetic/logaddexp2.py +0 -2
  415. maxframe/tensor/arithmetic/logical_and.py +0 -2
  416. maxframe/tensor/arithmetic/logical_not.py +1 -3
  417. maxframe/tensor/arithmetic/logical_or.py +0 -2
  418. maxframe/tensor/arithmetic/logical_xor.py +0 -2
  419. maxframe/tensor/arithmetic/lshift.py +0 -2
  420. maxframe/tensor/arithmetic/maximum.py +0 -2
  421. maxframe/tensor/arithmetic/minimum.py +0 -2
  422. maxframe/tensor/arithmetic/mod.py +0 -2
  423. maxframe/tensor/arithmetic/modf.py +6 -2
  424. maxframe/tensor/arithmetic/multiply.py +37 -4
  425. maxframe/tensor/arithmetic/nan_to_num.py +0 -2
  426. maxframe/tensor/arithmetic/negative.py +0 -2
  427. maxframe/tensor/arithmetic/nextafter.py +0 -2
  428. maxframe/tensor/arithmetic/not_equal.py +0 -2
  429. maxframe/tensor/arithmetic/positive.py +0 -2
  430. maxframe/tensor/arithmetic/power.py +0 -2
  431. maxframe/tensor/arithmetic/rad2deg.py +0 -2
  432. maxframe/tensor/arithmetic/radians.py +0 -2
  433. maxframe/tensor/arithmetic/real.py +0 -2
  434. maxframe/tensor/arithmetic/reciprocal.py +5 -3
  435. maxframe/tensor/arithmetic/rint.py +1 -3
  436. maxframe/tensor/arithmetic/rshift.py +0 -2
  437. maxframe/tensor/arithmetic/setimag.py +0 -2
  438. maxframe/tensor/arithmetic/setreal.py +0 -2
  439. maxframe/tensor/arithmetic/sign.py +0 -2
  440. maxframe/tensor/arithmetic/signbit.py +0 -2
  441. maxframe/tensor/arithmetic/sin.py +0 -2
  442. maxframe/tensor/arithmetic/sinc.py +1 -3
  443. maxframe/tensor/arithmetic/sinh.py +0 -2
  444. maxframe/tensor/arithmetic/spacing.py +0 -2
  445. maxframe/tensor/arithmetic/sqrt.py +0 -2
  446. maxframe/tensor/arithmetic/square.py +0 -2
  447. maxframe/tensor/arithmetic/subtract.py +4 -2
  448. maxframe/tensor/arithmetic/tan.py +0 -2
  449. maxframe/tensor/arithmetic/tanh.py +0 -2
  450. maxframe/tensor/arithmetic/tests/__init__.py +0 -2
  451. maxframe/tensor/arithmetic/tests/test_arithmetic.py +43 -9
  452. maxframe/tensor/arithmetic/truediv.py +0 -2
  453. maxframe/tensor/arithmetic/trunc.py +0 -2
  454. maxframe/tensor/arithmetic/utils.py +32 -6
  455. maxframe/tensor/array_utils.py +3 -25
  456. maxframe/tensor/core.py +6 -6
  457. maxframe/tensor/datasource/__init__.py +10 -2
  458. maxframe/tensor/datasource/arange.py +0 -2
  459. maxframe/tensor/datasource/array.py +3 -22
  460. maxframe/tensor/datasource/core.py +15 -10
  461. maxframe/tensor/datasource/diag.py +140 -0
  462. maxframe/tensor/datasource/diagflat.py +69 -0
  463. maxframe/tensor/datasource/empty.py +0 -2
  464. maxframe/tensor/datasource/eye.py +95 -0
  465. maxframe/tensor/datasource/from_dataframe.py +0 -2
  466. maxframe/tensor/datasource/from_dense.py +0 -17
  467. maxframe/tensor/datasource/from_sparse.py +0 -2
  468. maxframe/tensor/datasource/full.py +0 -2
  469. maxframe/tensor/datasource/identity.py +54 -0
  470. maxframe/tensor/datasource/indices.py +115 -0
  471. maxframe/tensor/datasource/linspace.py +140 -0
  472. maxframe/tensor/datasource/meshgrid.py +135 -0
  473. maxframe/tensor/datasource/ones.py +8 -3
  474. maxframe/tensor/datasource/tests/test_datasource.py +32 -1
  475. maxframe/tensor/datasource/tri_array.py +107 -0
  476. maxframe/tensor/datasource/zeros.py +7 -3
  477. maxframe/tensor/extensions/__init__.py +31 -0
  478. maxframe/tensor/extensions/accessor.py +25 -0
  479. maxframe/tensor/extensions/apply_chunk.py +137 -0
  480. maxframe/tensor/indexing/__init__.py +1 -1
  481. maxframe/tensor/indexing/choose.py +8 -6
  482. maxframe/tensor/indexing/compress.py +0 -2
  483. maxframe/tensor/indexing/extract.py +0 -2
  484. maxframe/tensor/indexing/fill_diagonal.py +9 -6
  485. maxframe/tensor/indexing/flatnonzero.py +1 -3
  486. maxframe/tensor/indexing/getitem.py +10 -43
  487. maxframe/tensor/indexing/nonzero.py +2 -4
  488. maxframe/tensor/indexing/setitem.py +19 -9
  489. maxframe/tensor/indexing/slice.py +6 -3
  490. maxframe/tensor/indexing/take.py +0 -2
  491. maxframe/tensor/indexing/tests/__init__.py +0 -2
  492. maxframe/tensor/indexing/tests/test_indexing.py +0 -2
  493. maxframe/tensor/indexing/unravel_index.py +6 -6
  494. maxframe/tensor/lib/__init__.py +16 -0
  495. maxframe/tensor/lib/index_tricks.py +404 -0
  496. maxframe/tensor/linalg/__init__.py +36 -0
  497. maxframe/tensor/linalg/dot.py +145 -0
  498. maxframe/tensor/linalg/inner.py +36 -0
  499. maxframe/tensor/linalg/inv.py +83 -0
  500. maxframe/tensor/linalg/lu.py +115 -0
  501. maxframe/tensor/linalg/matmul.py +225 -0
  502. maxframe/tensor/linalg/qr.py +124 -0
  503. maxframe/tensor/linalg/solve_triangular.py +103 -0
  504. maxframe/tensor/linalg/svd.py +167 -0
  505. maxframe/tensor/linalg/tensordot.py +213 -0
  506. maxframe/tensor/linalg/vdot.py +73 -0
  507. maxframe/tensor/merge/__init__.py +4 -0
  508. maxframe/tensor/merge/append.py +74 -0
  509. maxframe/tensor/merge/column_stack.py +63 -0
  510. maxframe/tensor/merge/concatenate.py +3 -2
  511. maxframe/tensor/merge/dstack.py +71 -0
  512. maxframe/tensor/merge/hstack.py +70 -0
  513. maxframe/tensor/merge/stack.py +0 -2
  514. maxframe/tensor/merge/tests/test_merge.py +0 -2
  515. maxframe/tensor/misc/__init__.py +18 -5
  516. maxframe/tensor/misc/astype.py +10 -8
  517. maxframe/tensor/misc/broadcast_to.py +1 -1
  518. maxframe/tensor/misc/copy.py +64 -0
  519. maxframe/tensor/misc/diff.py +115 -0
  520. maxframe/tensor/misc/flatten.py +63 -0
  521. maxframe/tensor/misc/in1d.py +94 -0
  522. maxframe/tensor/misc/isin.py +130 -0
  523. maxframe/tensor/misc/ndim.py +53 -0
  524. maxframe/tensor/misc/ravel.py +0 -2
  525. maxframe/tensor/misc/repeat.py +129 -0
  526. maxframe/tensor/misc/searchsorted.py +147 -0
  527. maxframe/tensor/misc/setdiff1d.py +58 -0
  528. maxframe/tensor/misc/squeeze.py +117 -0
  529. maxframe/tensor/misc/swapaxes.py +113 -0
  530. maxframe/tensor/misc/tests/test_misc.py +0 -2
  531. maxframe/tensor/misc/transpose.py +8 -4
  532. maxframe/tensor/misc/trapezoid.py +123 -0
  533. maxframe/tensor/misc/unique.py +0 -1
  534. maxframe/tensor/misc/where.py +10 -8
  535. maxframe/tensor/operators.py +0 -34
  536. maxframe/tensor/random/__init__.py +3 -5
  537. maxframe/tensor/random/binomial.py +0 -2
  538. maxframe/tensor/random/bytes.py +0 -2
  539. maxframe/tensor/random/chisquare.py +0 -2
  540. maxframe/tensor/random/choice.py +9 -8
  541. maxframe/tensor/random/core.py +20 -5
  542. maxframe/tensor/random/dirichlet.py +0 -2
  543. maxframe/tensor/random/exponential.py +0 -2
  544. maxframe/tensor/random/f.py +2 -4
  545. maxframe/tensor/random/gamma.py +0 -2
  546. maxframe/tensor/random/geometric.py +0 -2
  547. maxframe/tensor/random/gumbel.py +0 -2
  548. maxframe/tensor/random/hypergeometric.py +0 -2
  549. maxframe/tensor/random/laplace.py +2 -4
  550. maxframe/tensor/random/logistic.py +0 -2
  551. maxframe/tensor/random/lognormal.py +0 -2
  552. maxframe/tensor/random/logseries.py +0 -2
  553. maxframe/tensor/random/multinomial.py +0 -2
  554. maxframe/tensor/random/multivariate_normal.py +0 -2
  555. maxframe/tensor/random/negative_binomial.py +0 -2
  556. maxframe/tensor/random/noncentral_chisquare.py +0 -2
  557. maxframe/tensor/random/noncentral_f.py +1 -3
  558. maxframe/tensor/random/normal.py +0 -2
  559. maxframe/tensor/random/pareto.py +0 -2
  560. maxframe/tensor/random/permutation.py +6 -3
  561. maxframe/tensor/random/poisson.py +0 -2
  562. maxframe/tensor/random/power.py +0 -2
  563. maxframe/tensor/random/rand.py +0 -2
  564. maxframe/tensor/random/randint.py +0 -2
  565. maxframe/tensor/random/randn.py +0 -2
  566. maxframe/tensor/random/random_integers.py +0 -2
  567. maxframe/tensor/random/random_sample.py +0 -2
  568. maxframe/tensor/random/rayleigh.py +0 -2
  569. maxframe/tensor/random/standard_cauchy.py +0 -2
  570. maxframe/tensor/random/standard_exponential.py +0 -2
  571. maxframe/tensor/random/standard_gamma.py +0 -2
  572. maxframe/tensor/random/standard_normal.py +0 -2
  573. maxframe/tensor/random/standard_t.py +0 -2
  574. maxframe/tensor/random/tests/__init__.py +0 -2
  575. maxframe/tensor/random/tests/test_random.py +0 -2
  576. maxframe/tensor/random/triangular.py +0 -2
  577. maxframe/tensor/random/uniform.py +0 -2
  578. maxframe/tensor/random/vonmises.py +0 -2
  579. maxframe/tensor/random/wald.py +0 -2
  580. maxframe/tensor/random/weibull.py +0 -2
  581. maxframe/tensor/random/zipf.py +0 -2
  582. maxframe/tensor/reduction/__init__.py +0 -2
  583. maxframe/tensor/reduction/all.py +0 -2
  584. maxframe/tensor/reduction/allclose.py +0 -2
  585. maxframe/tensor/reduction/any.py +0 -2
  586. maxframe/tensor/reduction/argmax.py +1 -3
  587. maxframe/tensor/reduction/argmin.py +1 -3
  588. maxframe/tensor/reduction/array_equal.py +0 -2
  589. maxframe/tensor/reduction/core.py +0 -2
  590. maxframe/tensor/reduction/count_nonzero.py +0 -2
  591. maxframe/tensor/reduction/cumprod.py +0 -2
  592. maxframe/tensor/reduction/cumsum.py +0 -2
  593. maxframe/tensor/reduction/max.py +0 -2
  594. maxframe/tensor/reduction/mean.py +0 -2
  595. maxframe/tensor/reduction/min.py +0 -2
  596. maxframe/tensor/reduction/nanargmax.py +0 -2
  597. maxframe/tensor/reduction/nanargmin.py +0 -2
  598. maxframe/tensor/reduction/nancumprod.py +0 -2
  599. maxframe/tensor/reduction/nancumsum.py +0 -2
  600. maxframe/tensor/reduction/nanmax.py +0 -2
  601. maxframe/tensor/reduction/nanmean.py +0 -2
  602. maxframe/tensor/reduction/nanmin.py +0 -2
  603. maxframe/tensor/reduction/nanprod.py +0 -2
  604. maxframe/tensor/reduction/nanstd.py +0 -2
  605. maxframe/tensor/reduction/nansum.py +0 -2
  606. maxframe/tensor/reduction/nanvar.py +0 -2
  607. maxframe/tensor/reduction/prod.py +0 -2
  608. maxframe/tensor/reduction/std.py +0 -2
  609. maxframe/tensor/reduction/sum.py +0 -2
  610. maxframe/tensor/reduction/tests/test_reduction.py +1 -4
  611. maxframe/tensor/reduction/var.py +0 -2
  612. maxframe/tensor/reshape/__init__.py +0 -2
  613. maxframe/tensor/reshape/reshape.py +6 -5
  614. maxframe/tensor/reshape/tests/__init__.py +0 -2
  615. maxframe/tensor/reshape/tests/test_reshape.py +0 -2
  616. maxframe/tensor/sort/__init__.py +16 -0
  617. maxframe/tensor/sort/argsort.py +150 -0
  618. maxframe/tensor/sort/sort.py +295 -0
  619. maxframe/tensor/special/__init__.py +37 -0
  620. maxframe/tensor/special/core.py +38 -0
  621. maxframe/tensor/special/misc.py +142 -0
  622. maxframe/tensor/special/statistical.py +56 -0
  623. maxframe/tensor/statistics/__init__.py +5 -0
  624. maxframe/tensor/statistics/average.py +143 -0
  625. maxframe/tensor/statistics/bincount.py +133 -0
  626. maxframe/tensor/statistics/quantile.py +10 -8
  627. maxframe/tensor/ufunc/__init__.py +0 -2
  628. maxframe/tensor/ufunc/ufunc.py +0 -2
  629. maxframe/tensor/utils.py +21 -3
  630. maxframe/tests/test_protocol.py +3 -3
  631. maxframe/tests/test_utils.py +210 -1
  632. maxframe/tests/utils.py +59 -1
  633. maxframe/udf.py +76 -6
  634. maxframe/utils.py +418 -17
  635. {maxframe-1.3.0.dist-info → maxframe-2.0.0.dist-info}/METADATA +5 -1
  636. maxframe-2.0.0.dist-info/RECORD +939 -0
  637. {maxframe-1.3.0.dist-info → maxframe-2.0.0.dist-info}/WHEEL +1 -1
  638. maxframe_client/clients/framedriver.py +19 -3
  639. maxframe_client/fetcher.py +113 -6
  640. maxframe_client/session/odps.py +173 -38
  641. maxframe_client/session/task.py +3 -1
  642. maxframe_client/tests/test_session.py +41 -5
  643. maxframe-1.3.0.dist-info/RECORD +0 -705
  644. {maxframe-1.3.0.dist-info → maxframe-2.0.0.dist-info}/top_level.txt +0 -0
@@ -12,11 +12,19 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
+ from typing import List
16
+
15
17
  import numpy as np
16
18
  import pandas as pd
17
19
 
18
- from ...core import OutputType
19
- from ...serialization.serializables import AnyField, BoolField, ListField, StringField
20
+ from ...core import EntityData, OutputType
21
+ from ...serialization.serializables import (
22
+ AnyField,
23
+ BoolField,
24
+ KeyField,
25
+ ListField,
26
+ StringField,
27
+ )
20
28
  from ..datasource.dataframe import from_pandas as from_pandas_df
21
29
  from ..datasource.series import from_pandas as from_pandas_series
22
30
  from ..initializer import Series as asseries
@@ -34,55 +42,26 @@ class DataFrameGetDummies(DataFrameOperator, DataFrameOperatorMixin):
34
42
  drop_first = BoolField("drop_first", default=None)
35
43
  dtype = AnyField("dtype", default=None)
36
44
 
37
- def __init__(self, **kws):
38
- super().__init__(**kws)
45
+ agg_results = KeyField("agg_results", default=None)
46
+
47
+ def __init__(self, **kw):
48
+ super().__init__(**kw)
39
49
  self.output_types = [OutputType.dataframe]
40
50
 
51
+ @classmethod
52
+ def _set_inputs(cls, op: "DataFrameGetDummies", inputs: List[EntityData]):
53
+ super()._set_inputs(op, inputs)
54
+ if op.agg_results is not None: # pragma: no branch
55
+ op.agg_results = inputs[-1]
56
+
41
57
  def __call__(self, data):
42
- if isinstance(data, (list, tuple)):
43
- data = asseries(data)
44
- elif isinstance(data, pd.Series):
45
- data = from_pandas_series(data)
46
- elif isinstance(data, pd.DataFrame):
47
- data = from_pandas_df(data)
48
-
49
- if self.prefix is not None:
50
- if isinstance(self.prefix, list):
51
- if self.columns is not None:
52
- encoding_col_num = len(self.columns)
53
- else:
54
- encoding_col_num = 0
55
- for dtype in data.dtypes.values:
56
- if dtype.kind in _encoding_dtype_kind:
57
- encoding_col_num += 1
58
- prefix_num = len(self.prefix)
59
- if prefix_num != encoding_col_num:
60
- raise ValueError(
61
- f"Length of 'prefix' ({prefix_num}) did not match "
62
- + f"the length of the columns being encoded ({encoding_col_num})"
63
- )
64
- elif isinstance(self.prefix, dict):
65
- if self.columns is not None:
66
- encoding_col_num = len(self.columns)
67
- prefix_num = len(self.prefix)
68
- if prefix_num != encoding_col_num:
69
- raise ValueError(
70
- f"Length of 'prefix' ({prefix_num}) did not match "
71
- + f"the length of the columns being encoded ({encoding_col_num})"
72
- )
73
- columns = self.prefix.keys()
74
- for columns_columnname, prefix_columnname in zip(
75
- columns, list(self.columns)
76
- ):
77
- if columns_columnname != prefix_columnname:
78
- raise KeyError(f"{columns_columnname}")
79
- else:
80
- self.columns = list(self.prefix.keys())
81
- # Convert prefix from dict to list, to simplify tile work
82
- self.prefix = list(self.prefix.values())
83
-
84
- return self.new_dataframe(
85
- [data],
58
+ if not self.columns:
59
+ self.agg_results = data.agg(["unique"])
60
+ else:
61
+ self.agg_results = data[self.columns].agg(["unique"])
62
+
63
+ return self.new_tileable(
64
+ [data, self.agg_results],
86
65
  shape=(np.nan, np.nan),
87
66
  dtypes=None,
88
67
  index_value=data.index_value,
@@ -127,7 +106,7 @@ def get_dummies(
127
106
  drop_first : bool, default False
128
107
  Whether to get k-1 dummies out of k categorical levels by removing the
129
108
  first level.
130
- dtype : dtype, default np.uint8
109
+ dtype : dtype, default bool
131
110
  Data type for new columns. Only a single dtype is allowed.
132
111
 
133
112
  Returns
@@ -195,6 +174,56 @@ def get_dummies(
195
174
  if columns is not None and not isinstance(columns, list):
196
175
  raise TypeError("Input must be a list-like for parameter `columns`")
197
176
 
177
+ if isinstance(data, (list, tuple)):
178
+ data = asseries(data)
179
+ elif isinstance(data, pd.Series):
180
+ data = from_pandas_series(data)
181
+ elif isinstance(data, pd.DataFrame):
182
+ data = from_pandas_df(data)
183
+
184
+ dtype = dtype if dtype is not None else np.dtype(bool)
185
+
186
+ if prefix is not None:
187
+ if isinstance(prefix, list):
188
+ if columns is not None:
189
+ encoding_col_num = len(columns)
190
+ else:
191
+ encoding_col_num = 0
192
+ for dt in data.dtypes.values:
193
+ if dt.kind in _encoding_dtype_kind:
194
+ encoding_col_num += 1
195
+ prefix_num = len(prefix)
196
+ if prefix_num != encoding_col_num:
197
+ raise ValueError(
198
+ f"Length of 'prefix' ({prefix_num}) did not match "
199
+ + f"the length of the columns being encoded ({encoding_col_num})"
200
+ )
201
+ elif isinstance(prefix, dict):
202
+ if columns is not None:
203
+ encoding_col_num = len(columns)
204
+ prefix_num = len(prefix)
205
+ if prefix_num != encoding_col_num:
206
+ raise ValueError(
207
+ f"Length of 'prefix' ({prefix_num}) did not match "
208
+ + f"the length of the columns being encoded ({encoding_col_num})"
209
+ )
210
+ prefix_cols = prefix.keys()
211
+ for columns_columnname, prefix_columnname in zip(
212
+ prefix_cols, list(columns)
213
+ ):
214
+ if columns_columnname != prefix_columnname:
215
+ raise KeyError(f"{columns_columnname}")
216
+ else:
217
+ columns = list(prefix.keys())
218
+ # Convert prefix from dict to list, to simplify tile work
219
+ prefix = list(prefix.values())
220
+
221
+ if not columns and data.ndim == 2:
222
+ columns = []
223
+ for col_name, dt in data.dtypes.items():
224
+ if dt.kind in _encoding_dtype_kind:
225
+ columns.append(col_name)
226
+
198
227
  op = DataFrameGetDummies(
199
228
  prefix=prefix,
200
229
  prefix_sep=prefix_sep,
@@ -12,12 +12,14 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
+ from typing import List
16
+
15
17
  import numpy as np
16
18
  import pandas as pd
17
19
  from pandas.api.types import is_list_like
18
20
 
19
21
  from ... import opcodes
20
- from ...core import ENTITY_TYPE
22
+ from ...core import ENTITY_TYPE, EntityData
21
23
  from ...serialization.serializables import AnyField, KeyField
22
24
  from ...tensor.core import TENSOR_TYPE
23
25
  from ..core import DATAFRAME_TYPE, INDEX_TYPE, SERIES_TYPE
@@ -30,21 +32,22 @@ class DataFrameIsin(DataFrameOperator, DataFrameOperatorMixin):
30
32
  input = KeyField("input")
31
33
  values = AnyField("values", default=None)
32
34
 
33
- def _set_inputs(self, inputs):
34
- super()._set_inputs(inputs)
35
- inputs_iter = iter(self._inputs)
36
- self.input = next(inputs_iter)
37
- if len(self._inputs) > 1:
38
- if isinstance(self.values, dict):
35
+ @classmethod
36
+ def _set_inputs(cls, op: "DataFrameIsin", inputs: List[EntityData]):
37
+ super()._set_inputs(op, inputs)
38
+ inputs_iter = iter(op._inputs)
39
+ op.input = next(inputs_iter)
40
+ if len(op._inputs) > 1:
41
+ if isinstance(op.values, dict):
39
42
  new_values = dict()
40
- for k, v in self.values.items():
43
+ for k, v in op.values.items():
41
44
  if isinstance(v, ENTITY_TYPE):
42
45
  new_values[k] = next(inputs_iter)
43
46
  else:
44
47
  new_values[k] = v
45
- self.values = new_values
48
+ op.values = new_values
46
49
  else:
47
- self.values = self._inputs[1]
50
+ op.values = op._inputs[1]
48
51
 
49
52
  def __call__(self, elements):
50
53
  inputs = [elements]
@@ -13,14 +13,15 @@
13
13
  # limitations under the License.
14
14
 
15
15
  import inspect
16
- from collections.abc import MutableMapping
16
+ from typing import List, MutableMapping, Union
17
17
 
18
18
  import numpy as np
19
19
  import pandas as pd
20
20
 
21
21
  from ... import opcodes
22
- from ...core import OutputType
22
+ from ...core import EntityData, OutputType
23
23
  from ...serialization.serializables import AnyField, KeyField, StringField
24
+ from ...udf import BuiltinFunction, MarkedFunction
24
25
  from ...utils import quiet_stdio
25
26
  from ..core import SERIES_TYPE
26
27
  from ..operators import DataFrameOperator, DataFrameOperatorMixin
@@ -41,11 +42,17 @@ class DataFrameMap(DataFrameOperator, DataFrameOperatorMixin):
41
42
  if hasattr(self, "arg"):
42
43
  copy_func_scheduling_hints(self.arg, self)
43
44
 
44
- def _set_inputs(self, inputs):
45
- super()._set_inputs(inputs)
46
- self.input = self._inputs[0]
45
+ @classmethod
46
+ def _set_inputs(cls, op: "DataFrameMap", inputs: List[EntityData]):
47
+ super()._set_inputs(op, inputs)
48
+ op.input = op._inputs[0]
47
49
  if len(inputs) == 2:
48
- self.arg = self._inputs[1]
50
+ op.arg = op._inputs[1]
51
+
52
+ def has_custom_code(self) -> bool:
53
+ return not isinstance(
54
+ self.arg, (dict, SERIES_TYPE, pd.Series)
55
+ ) and not isinstance(self.arg, BuiltinFunction)
49
56
 
50
57
  def __call__(self, series, dtype, skip_infer=False):
51
58
  if dtype is None and not skip_infer:
@@ -112,6 +119,14 @@ class DataFrameMap(DataFrameOperator, DataFrameOperatorMixin):
112
119
  name=series.name,
113
120
  )
114
121
 
122
+ @classmethod
123
+ def estimate_size(
124
+ cls, ctx: MutableMapping[str, Union[int, float]], op: "DataFrameMap"
125
+ ) -> None:
126
+ if isinstance(op.arg, MarkedFunction):
127
+ ctx[op.outputs[0].key] = float("inf")
128
+ super().estimate_size(ctx, op)
129
+
115
130
 
116
131
  def series_map(
117
132
  series, arg, na_action=None, dtype=None, memory_scale=None, skip_infer=False
@@ -16,7 +16,7 @@ import numpy as np
16
16
  import pandas as pd
17
17
 
18
18
  from ... import opcodes
19
- from ...serialization.serializables import AnyField, StringField
19
+ from ...serialization.serializables import AnyField, BoolField, StringField
20
20
  from ..operators import DataFrameOperator, DataFrameOperatorMixin, OutputType
21
21
  from ..utils import build_empty_df, parse_index
22
22
 
@@ -29,6 +29,7 @@ class DataFrameMelt(DataFrameOperator, DataFrameOperatorMixin):
29
29
  var_name = StringField("var_name", default=None)
30
30
  value_name = StringField("value_name", default=None)
31
31
  col_level = AnyField("col_level", default=None)
32
+ ignore_index = BoolField("ignore_index", default=False)
32
33
 
33
34
  def __call__(self, df):
34
35
  empty_result = build_empty_df(df.dtypes).melt(
@@ -37,6 +38,7 @@ class DataFrameMelt(DataFrameOperator, DataFrameOperatorMixin):
37
38
  var_name=self.var_name,
38
39
  value_name=self.value_name,
39
40
  col_level=self.col_level,
41
+ ignore_index=self.ignore_index,
40
42
  )
41
43
  self._output_types = [OutputType.dataframe]
42
44
  return self.new_tileable(
@@ -55,6 +57,7 @@ def melt(
55
57
  var_name=None,
56
58
  value_name="value",
57
59
  col_level=None,
60
+ ignore_index=False,
58
61
  ):
59
62
  """
60
63
  Unpivot a DataFrame from wide to long format, optionally leaving identifiers set.
@@ -79,6 +82,9 @@ def melt(
79
82
  Name to use for the 'value' column.
80
83
  col_level : int or str, optional
81
84
  If columns are a MultiIndex then use this level to melt.
85
+ ignore_index : bool, default True
86
+ If True, original index is ignored. If False, the original index
87
+ is retained. Index labels will be repeated as necessary.
82
88
 
83
89
  Returns
84
90
  -------
@@ -158,5 +164,6 @@ def melt(
158
164
  var_name=var_name,
159
165
  value_name=value_name,
160
166
  col_level=col_level,
167
+ ignore_index=ignore_index,
161
168
  )
162
169
  return op(frame)
@@ -0,0 +1,232 @@
1
+ # Copyright 1999-2025 Alibaba Group Holding Ltd.
2
+
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+
16
+ from typing import List
17
+
18
+ import numpy as np
19
+ import pandas as pd
20
+ from pandas.api.types import is_list_like
21
+
22
+ from ... import opcodes
23
+ from ...core import EntityData, OutputType
24
+ from ...serialization.serializables import AnyField, KeyField
25
+ from ...utils import no_default
26
+ from ..operators import DataFrameOperator, DataFrameOperatorMixin
27
+ from ..utils import build_df, make_column_list, parse_index
28
+
29
+
30
+ class DataFramePivot(DataFrameOperator, DataFrameOperatorMixin):
31
+ _op_type_ = opcodes.PIVOT
32
+
33
+ values = AnyField("values", default=None)
34
+ index = AnyField("index", default=None)
35
+ columns = AnyField("columns", default=None)
36
+
37
+ agg_results = KeyField("agg_results", default=None)
38
+
39
+ def __init__(self, aggfunc=None, **kw):
40
+ if aggfunc is None:
41
+ aggfunc = "mean"
42
+ super().__init__(aggfunc=aggfunc, **kw)
43
+ self._output_types = [OutputType.dataframe]
44
+
45
+ @classmethod
46
+ def _set_inputs(cls, op: "DataFramePivot", inputs: List[EntityData]):
47
+ super()._set_inputs(op, inputs)
48
+ if op.agg_results is not None: # pragma: no branch
49
+ op.agg_results = inputs[-1]
50
+
51
+ def __call__(self, df):
52
+ index_list = make_column_list(self.index, df.dtypes)
53
+ columns_list = make_column_list(self.columns, df.dtypes)
54
+
55
+ if not index_list:
56
+ index_data = pd.Index([])
57
+ elif len(index_list) == 1:
58
+ index_data = pd.Index(
59
+ [], dtype=df.dtypes[index_list[0]], name=index_list[0]
60
+ )
61
+ else:
62
+ index_data = pd.MultiIndex.from_frame(build_df(df[index_list]))
63
+ index_value = parse_index(index_data, df)
64
+
65
+ self.agg_results = df[columns_list].drop_duplicates()
66
+ columns_value = dtypes = None
67
+
68
+ inputs = [df]
69
+ if self.agg_results is not None:
70
+ inputs.append(self.agg_results)
71
+ return self.new_dataframe(
72
+ inputs,
73
+ shape=(np.nan, np.nan),
74
+ dtypes=dtypes,
75
+ columns_value=columns_value,
76
+ index_value=index_value,
77
+ )
78
+
79
+
80
+ def pivot(data, columns, index=None, values=None):
81
+ """
82
+ Return reshaped DataFrame organized by given index / column values.
83
+
84
+ Reshape data (produce a "pivot" table) based on column values. Uses
85
+ unique values from specified `index` / `columns` to form axes of the
86
+ resulting DataFrame. This function does not support data
87
+ aggregation, multiple values will result in a MultiIndex in the
88
+ columns. See the :ref:`User Guide <reshaping>` for more on reshaping.
89
+
90
+ Parameters
91
+ ----------
92
+ index : str or object or a list of str, optional
93
+ Column to use to make new frame's index. If None, uses
94
+ existing index.
95
+
96
+ columns : str or object or a list of str
97
+ Column to use to make new frame's columns.
98
+
99
+ values : str, object or a list of the previous, optional
100
+ Column(s) to use for populating new frame's values. If not
101
+ specified, all remaining columns will be used and the result will
102
+ have hierarchically indexed columns.
103
+
104
+ Returns
105
+ -------
106
+ DataFrame
107
+ Returns reshaped DataFrame.
108
+
109
+ Raises
110
+ ------
111
+ ValueError:
112
+ When there are any `index`, `columns` combinations with multiple
113
+ values. `DataFrame.pivot_table` when you need to aggregate.
114
+
115
+ See Also
116
+ --------
117
+ DataFrame.pivot_table : Generalization of pivot that can handle
118
+ duplicate values for one index/column pair.
119
+ DataFrame.unstack : Pivot based on the index values instead of a
120
+ column.
121
+ wide_to_long : Wide panel to long format. Less flexible but more
122
+ user-friendly than melt.
123
+
124
+ Notes
125
+ -----
126
+ For finer-tuned control, see hierarchical indexing documentation along
127
+ with the related stack/unstack methods.
128
+
129
+ Examples
130
+ --------
131
+ >>> import maxframe.dataframe as md
132
+ >>> df = md.DataFrame({'foo': ['one', 'one', 'one', 'two', 'two',
133
+ ... 'two'],
134
+ ... 'bar': ['A', 'B', 'C', 'A', 'B', 'C'],
135
+ ... 'baz': [1, 2, 3, 4, 5, 6],
136
+ ... 'zoo': ['x', 'y', 'z', 'q', 'w', 't']})
137
+ >>> df.execute()
138
+ foo bar baz zoo
139
+ 0 one A 1 x
140
+ 1 one B 2 y
141
+ 2 one C 3 z
142
+ 3 two A 4 q
143
+ 4 two B 5 w
144
+ 5 two C 6 t
145
+
146
+ >>> df.pivot(index='foo', columns='bar', values='baz').execute()
147
+ bar A B C
148
+ foo
149
+ one 1 2 3
150
+ two 4 5 6
151
+
152
+ >>> df.pivot(index='foo', columns='bar', values=['baz', 'zoo']).execute()
153
+ baz zoo
154
+ bar A B C A B C
155
+ foo
156
+ one 1 2 3 x y z
157
+ two 4 5 6 q w t
158
+
159
+ You could also assign a list of column names or a list of index names.
160
+
161
+ >>> df = md.DataFrame({
162
+ ... "lev1": [1, 1, 1, 2, 2, 2],
163
+ ... "lev2": [1, 1, 2, 1, 1, 2],
164
+ ... "lev3": [1, 2, 1, 2, 1, 2],
165
+ ... "lev4": [1, 2, 3, 4, 5, 6],
166
+ ... "values": [0, 1, 2, 3, 4, 5]})
167
+ >>> df.execute()
168
+ lev1 lev2 lev3 lev4 values
169
+ 0 1 1 1 1 0
170
+ 1 1 1 2 2 1
171
+ 2 1 2 1 3 2
172
+ 3 2 1 2 4 3
173
+ 4 2 1 1 5 4
174
+ 5 2 2 2 6 5
175
+
176
+ >>> df.pivot(index="lev1", columns=["lev2", "lev3"], values="values").execute()
177
+ lev2 1 2
178
+ lev3 1 2 1 2
179
+ lev1
180
+ 1 0.0 1.0 2.0 NaN
181
+ 2 4.0 3.0 NaN 5.0
182
+
183
+ >>> df.pivot(index=["lev1", "lev2"], columns=["lev3"], values="values").execute()
184
+ lev3 1 2
185
+ lev1 lev2
186
+ 1 1 0.0 1.0
187
+ 2 2.0 NaN
188
+ 2 1 4.0 3.0
189
+ 2 NaN 5.0
190
+
191
+ A ValueError is raised if there are any duplicates.
192
+
193
+ >>> df = md.DataFrame({"foo": ['one', 'one', 'two', 'two'],
194
+ ... "bar": ['A', 'A', 'B', 'C'],
195
+ ... "baz": [1, 2, 3, 4]})
196
+ >>> df.execute()
197
+ foo bar baz
198
+ 0 one A 1
199
+ 1 one A 2
200
+ 2 two B 3
201
+ 3 two C 4
202
+
203
+ Notice that the first two rows are the same for our `index`
204
+ and `columns` arguments.
205
+
206
+ >>> df.pivot(index='foo', columns='bar', values='baz').execute()
207
+ Traceback (most recent call last):
208
+ ...
209
+ ValueError: Index contains duplicate entries, cannot reshape
210
+ """
211
+ values_list = make_column_list(values, data.dtypes)
212
+ index_list = make_column_list(index, data.dtypes)
213
+ columns_list = make_column_list(columns, data.dtypes)
214
+
215
+ name_to_attr = {"values": values_list, "index": index_list, "columns": columns_list}
216
+ for key, val in name_to_attr.items():
217
+ if val is None:
218
+ continue
219
+ if not is_list_like(val):
220
+ raise ValueError(f"Need to specify {key} as a list-like object.")
221
+ non_exist_key = next((c for c in val if c not in data.dtypes.index), no_default)
222
+ if non_exist_key is not no_default:
223
+ raise ValueError(
224
+ f"Column {non_exist_key} specified in {key} is not a valid column."
225
+ )
226
+
227
+ op = DataFramePivot(
228
+ values=values,
229
+ index=index,
230
+ columns=columns,
231
+ )
232
+ return op(data)
@@ -12,16 +12,18 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
+ from typing import List
16
+
15
17
  import numpy as np
16
18
  import pandas as pd
17
19
  from pandas.api.types import is_list_like
18
20
 
19
21
  from ... import opcodes
20
- from ...core import OutputType
21
- from ...serialization.serializables import AnyField, BoolField, StringField
22
+ from ...core import EntityData, OutputType
23
+ from ...serialization.serializables import AnyField, BoolField, KeyField, StringField
22
24
  from ...utils import no_default
23
25
  from ..operators import DataFrameOperator, DataFrameOperatorMixin
24
- from ..utils import build_df, parse_index
26
+ from ..utils import build_df, make_column_list, parse_index
25
27
 
26
28
 
27
29
  class DataFramePivotTable(DataFrameOperator, DataFrameOperatorMixin):
@@ -37,35 +39,53 @@ class DataFramePivotTable(DataFrameOperator, DataFrameOperatorMixin):
37
39
  margins_name = StringField("margins_name", default=None)
38
40
  sort = BoolField("sort", default=False)
39
41
 
40
- def __init__(self, **kw):
41
- super().__init__(**kw)
42
- self.output_types = [OutputType.dataframe]
42
+ agg_results = KeyField("agg_results", default=None)
43
+
44
+ def __init__(self, aggfunc=None, **kw):
45
+ if aggfunc is None:
46
+ aggfunc = "mean"
47
+ super().__init__(aggfunc=aggfunc, **kw)
48
+ self._output_types = [OutputType.dataframe]
49
+
50
+ @classmethod
51
+ def _set_inputs(cls, op: "DataFramePivotTable", inputs: List[EntityData]):
52
+ super()._set_inputs(op, inputs)
53
+ if op.agg_results is not None: # pragma: no branch
54
+ op.agg_results = inputs[-1]
43
55
 
44
56
  def __call__(self, df):
45
- index_value = columns_value = dtypes = None
46
- if self.index is not None:
47
- # index is now a required field
48
- if len(self.index) == 1:
49
- index_data = pd.Index(
50
- [], dtype=df.dtypes[self.index[0]], name=self.index[0]
51
- )
52
- else:
53
- index_data = pd.MultiIndex.from_frame(build_df(df[self.index]))
54
- index_value = parse_index(index_data)
55
-
56
- if self.columns is None: # output columns can be determined
57
- sel_df = df
58
- groupby_obj = sel_df.groupby(self.index)
59
- if self.values:
60
- groupby_obj = groupby_obj[self.values]
61
- aggregated_df = groupby_obj.agg(self.aggfunc)
62
- index_value = aggregated_df.index_value
63
- columns_value = aggregated_df.columns_value
64
- dtypes = aggregated_df.dtypes
65
- else:
66
- columns_value = dtypes = None
57
+ index_list = make_column_list(self.index, df.dtypes)
58
+ columns_list = make_column_list(self.columns, df.dtypes)
59
+ values_list = make_column_list(self.values, df.dtypes)
60
+
61
+ if not index_list:
62
+ index_data = pd.Index([])
63
+ elif len(index_list) == 1:
64
+ index_data = pd.Index(
65
+ [], dtype=df.dtypes[index_list[0]], name=index_list[0]
66
+ )
67
+ else:
68
+ index_data = pd.MultiIndex.from_frame(build_df(df[index_list]))
69
+ index_value = parse_index(index_data, df)
70
+
71
+ if columns_list is None: # output columns can be determined
72
+ sel_df = df
73
+ groupby_obj = sel_df.groupby(index_list)
74
+ if values_list:
75
+ groupby_obj = groupby_obj[values_list]
76
+ aggregated_df = groupby_obj.agg(self.aggfunc)
77
+ index_value = aggregated_df.index_value
78
+ columns_value = aggregated_df.columns_value
79
+ dtypes = aggregated_df.dtypes
80
+ else:
81
+ self.agg_results = df[columns_list].drop_duplicates()
82
+ columns_value = dtypes = None
83
+
84
+ inputs = [df]
85
+ if self.agg_results is not None:
86
+ inputs.append(self.agg_results)
67
87
  return self.new_dataframe(
68
- [df],
88
+ inputs,
69
89
  shape=(np.nan, np.nan),
70
90
  dtypes=dtypes,
71
91
  columns_value=columns_value,
@@ -219,17 +239,9 @@ def pivot_table(
219
239
  "No group keys passed, need to specify at least one of index or columns"
220
240
  )
221
241
 
222
- def make_col_list(col):
223
- try:
224
- if col in data.dtypes.index:
225
- return [col]
226
- except TypeError:
227
- return col
228
- return col
229
-
230
- values_list = make_col_list(values)
231
- index_list = make_col_list(index)
232
- columns_list = make_col_list(columns)
242
+ values_list = make_column_list(values, data.dtypes)
243
+ index_list = make_column_list(index, data.dtypes)
244
+ columns_list = make_column_list(columns, data.dtypes)
233
245
 
234
246
  name_to_attr = {"values": values_list, "index": index_list, "columns": columns_list}
235
247
  for key, val in name_to_attr.items():