maxframe 1.3.0__cp38-cp38-win32.whl → 2.0.0b1__cp38-cp38-win32.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of maxframe might be problematic. Click here for more details.

Files changed (643) hide show
  1. maxframe/_utils.cp38-win32.pyd +0 -0
  2. maxframe/_utils.pyi +21 -0
  3. maxframe/_utils.pyx +4 -3
  4. maxframe/codegen/__init__.py +27 -0
  5. maxframe/{codegen.py → codegen/core.py} +49 -43
  6. maxframe/codegen/spe/__init__.py +16 -0
  7. maxframe/codegen/spe/core.py +307 -0
  8. maxframe/codegen/spe/dataframe/__init__.py +37 -0
  9. maxframe/codegen/spe/dataframe/accessors/__init__.py +15 -0
  10. maxframe/codegen/spe/dataframe/accessors/base.py +53 -0
  11. maxframe/codegen/spe/dataframe/accessors/dict_.py +194 -0
  12. maxframe/codegen/spe/dataframe/accessors/list_.py +80 -0
  13. maxframe/codegen/spe/dataframe/arithmetic.py +84 -0
  14. maxframe/codegen/spe/dataframe/datasource.py +181 -0
  15. maxframe/codegen/spe/dataframe/datastore.py +204 -0
  16. maxframe/codegen/spe/dataframe/extensions.py +63 -0
  17. maxframe/codegen/spe/dataframe/fetch.py +26 -0
  18. maxframe/codegen/spe/dataframe/groupby.py +224 -0
  19. maxframe/codegen/spe/dataframe/indexing.py +238 -0
  20. maxframe/codegen/spe/dataframe/merge.py +73 -0
  21. maxframe/codegen/spe/dataframe/misc.py +286 -0
  22. maxframe/codegen/spe/dataframe/missing.py +64 -0
  23. maxframe/codegen/spe/dataframe/reduction.py +160 -0
  24. maxframe/codegen/spe/dataframe/sort.py +83 -0
  25. maxframe/codegen/spe/dataframe/statistics.py +46 -0
  26. maxframe/codegen/spe/dataframe/tests/__init__.py +13 -0
  27. maxframe/codegen/spe/dataframe/tests/accessors/__init__.py +13 -0
  28. maxframe/codegen/spe/dataframe/tests/accessors/test_base.py +33 -0
  29. maxframe/codegen/spe/dataframe/tests/accessors/test_dict.py +310 -0
  30. maxframe/codegen/spe/dataframe/tests/accessors/test_list.py +137 -0
  31. maxframe/codegen/spe/dataframe/tests/indexing/__init__.py +13 -0
  32. maxframe/codegen/spe/dataframe/tests/indexing/conftest.py +58 -0
  33. maxframe/codegen/spe/dataframe/tests/indexing/test_getitem.py +124 -0
  34. maxframe/codegen/spe/dataframe/tests/indexing/test_iloc.py +76 -0
  35. maxframe/codegen/spe/dataframe/tests/indexing/test_indexing.py +39 -0
  36. maxframe/codegen/spe/dataframe/tests/indexing/test_rename.py +51 -0
  37. maxframe/codegen/spe/dataframe/tests/indexing/test_reset_index.py +88 -0
  38. maxframe/codegen/spe/dataframe/tests/indexing/test_sample.py +45 -0
  39. maxframe/codegen/spe/dataframe/tests/indexing/test_set_axis.py +45 -0
  40. maxframe/codegen/spe/dataframe/tests/indexing/test_set_index.py +41 -0
  41. maxframe/codegen/spe/dataframe/tests/indexing/test_setitem.py +46 -0
  42. maxframe/codegen/spe/dataframe/tests/misc/__init__.py +13 -0
  43. maxframe/codegen/spe/dataframe/tests/misc/test_apply.py +133 -0
  44. maxframe/codegen/spe/dataframe/tests/misc/test_drop_duplicates.py +92 -0
  45. maxframe/codegen/spe/dataframe/tests/misc/test_misc.py +234 -0
  46. maxframe/codegen/spe/dataframe/tests/missing/__init__.py +13 -0
  47. maxframe/codegen/spe/dataframe/tests/missing/test_checkna.py +94 -0
  48. maxframe/codegen/spe/dataframe/tests/missing/test_dropna.py +50 -0
  49. maxframe/codegen/spe/dataframe/tests/missing/test_fillna.py +94 -0
  50. maxframe/codegen/spe/dataframe/tests/missing/test_replace.py +45 -0
  51. maxframe/codegen/spe/dataframe/tests/test_arithmetic.py +73 -0
  52. maxframe/codegen/spe/dataframe/tests/test_datasource.py +184 -0
  53. maxframe/codegen/spe/dataframe/tests/test_datastore.py +200 -0
  54. maxframe/codegen/spe/dataframe/tests/test_extensions.py +88 -0
  55. maxframe/codegen/spe/dataframe/tests/test_groupby.py +225 -0
  56. maxframe/codegen/spe/dataframe/tests/test_merge.py +400 -0
  57. maxframe/codegen/spe/dataframe/tests/test_reduction.py +104 -0
  58. maxframe/codegen/spe/dataframe/tests/test_sort.py +159 -0
  59. maxframe/codegen/spe/dataframe/tests/test_statistics.py +70 -0
  60. maxframe/codegen/spe/dataframe/tests/test_tseries.py +29 -0
  61. maxframe/codegen/spe/dataframe/tests/test_value_counts.py +60 -0
  62. maxframe/codegen/spe/dataframe/tests/test_window.py +69 -0
  63. maxframe/codegen/spe/dataframe/tseries.py +46 -0
  64. maxframe/codegen/spe/dataframe/udf.py +62 -0
  65. maxframe/codegen/spe/dataframe/value_counts.py +31 -0
  66. maxframe/codegen/spe/dataframe/window.py +65 -0
  67. maxframe/codegen/spe/learn/__init__.py +15 -0
  68. maxframe/codegen/spe/learn/contrib/__init__.py +15 -0
  69. maxframe/codegen/spe/learn/contrib/lightgbm.py +160 -0
  70. maxframe/codegen/spe/learn/contrib/models.py +41 -0
  71. maxframe/codegen/spe/learn/contrib/pytorch.py +49 -0
  72. maxframe/codegen/spe/learn/contrib/tests/__init__.py +13 -0
  73. maxframe/codegen/spe/learn/contrib/tests/test_lightgbm.py +123 -0
  74. maxframe/codegen/spe/learn/contrib/tests/test_models.py +41 -0
  75. maxframe/codegen/spe/learn/contrib/tests/test_pytorch.py +53 -0
  76. maxframe/codegen/spe/learn/contrib/tests/test_xgboost.py +98 -0
  77. maxframe/codegen/spe/learn/contrib/xgboost.py +152 -0
  78. maxframe/codegen/spe/learn/metrics/__init__.py +15 -0
  79. maxframe/codegen/spe/learn/metrics/_classification.py +120 -0
  80. maxframe/codegen/spe/learn/metrics/tests/__init__.py +13 -0
  81. maxframe/codegen/spe/learn/metrics/tests/test_classification.py +93 -0
  82. maxframe/codegen/spe/learn/model_selection/__init__.py +13 -0
  83. maxframe/codegen/spe/learn/model_selection/tests/__init__.py +13 -0
  84. maxframe/codegen/spe/learn/model_selection/tests/test_split.py +41 -0
  85. maxframe/codegen/spe/learn/preprocessing/__init__.py +15 -0
  86. maxframe/codegen/spe/learn/preprocessing/_data.py +37 -0
  87. maxframe/codegen/spe/learn/preprocessing/_label.py +47 -0
  88. maxframe/codegen/spe/learn/preprocessing/tests/__init__.py +13 -0
  89. maxframe/codegen/spe/learn/preprocessing/tests/test_data.py +31 -0
  90. maxframe/codegen/spe/learn/preprocessing/tests/test_label.py +43 -0
  91. maxframe/codegen/spe/learn/utils/__init__.py +15 -0
  92. maxframe/codegen/spe/learn/utils/checks.py +55 -0
  93. maxframe/codegen/spe/learn/utils/multiclass.py +60 -0
  94. maxframe/codegen/spe/learn/utils/shuffle.py +85 -0
  95. maxframe/codegen/spe/learn/utils/sparsefuncs.py +35 -0
  96. maxframe/codegen/spe/learn/utils/tests/__init__.py +13 -0
  97. maxframe/codegen/spe/learn/utils/tests/test_checks.py +48 -0
  98. maxframe/codegen/spe/learn/utils/tests/test_multiclass.py +52 -0
  99. maxframe/codegen/spe/learn/utils/tests/test_shuffle.py +50 -0
  100. maxframe/codegen/spe/learn/utils/tests/test_sparsefuncs.py +34 -0
  101. maxframe/codegen/spe/learn/utils/tests/test_validation.py +44 -0
  102. maxframe/codegen/spe/learn/utils/validation.py +35 -0
  103. maxframe/codegen/spe/objects.py +26 -0
  104. maxframe/codegen/spe/remote.py +29 -0
  105. maxframe/codegen/spe/tensor/__init__.py +28 -0
  106. maxframe/codegen/spe/tensor/arithmetic.py +95 -0
  107. maxframe/codegen/spe/tensor/core.py +41 -0
  108. maxframe/codegen/spe/tensor/datasource.py +165 -0
  109. maxframe/codegen/spe/tensor/extensions.py +35 -0
  110. maxframe/codegen/spe/tensor/fetch.py +26 -0
  111. maxframe/codegen/spe/tensor/indexing.py +63 -0
  112. maxframe/codegen/spe/tensor/linalg.py +63 -0
  113. maxframe/codegen/spe/tensor/merge.py +31 -0
  114. maxframe/codegen/spe/tensor/misc.py +121 -0
  115. maxframe/codegen/spe/tensor/random.py +29 -0
  116. maxframe/codegen/spe/tensor/reduction.py +39 -0
  117. maxframe/codegen/spe/tensor/reshape.py +26 -0
  118. maxframe/codegen/spe/tensor/sort.py +42 -0
  119. maxframe/codegen/spe/tensor/special.py +35 -0
  120. maxframe/codegen/spe/tensor/statistics.py +24 -0
  121. maxframe/codegen/spe/tensor/tests/__init__.py +13 -0
  122. maxframe/codegen/spe/tensor/tests/test_arithmetic.py +103 -0
  123. maxframe/codegen/spe/tensor/tests/test_datasource.py +99 -0
  124. maxframe/codegen/spe/tensor/tests/test_extensions.py +37 -0
  125. maxframe/codegen/spe/tensor/tests/test_indexing.py +44 -0
  126. maxframe/codegen/spe/tensor/tests/test_linalg.py +38 -0
  127. maxframe/codegen/spe/tensor/tests/test_merge.py +28 -0
  128. maxframe/codegen/spe/tensor/tests/test_misc.py +94 -0
  129. maxframe/codegen/spe/tensor/tests/test_random.py +55 -0
  130. maxframe/codegen/spe/tensor/tests/test_reduction.py +65 -0
  131. maxframe/codegen/spe/tensor/tests/test_reshape.py +39 -0
  132. maxframe/codegen/spe/tensor/tests/test_sort.py +49 -0
  133. maxframe/codegen/spe/tensor/tests/test_special.py +28 -0
  134. maxframe/codegen/spe/tensor/tests/test_statistics.py +29 -0
  135. maxframe/codegen/spe/tests/__init__.py +13 -0
  136. maxframe/codegen/spe/tests/test_remote.py +29 -0
  137. maxframe/codegen/spe/tests/test_spe_codegen.py +141 -0
  138. maxframe/codegen/spe/utils.py +54 -0
  139. maxframe/codegen/tests/__init__.py +13 -0
  140. maxframe/{tests → codegen/tests}/test_codegen.py +3 -5
  141. maxframe/config/__init__.py +1 -1
  142. maxframe/config/config.py +50 -23
  143. maxframe/config/tests/test_config.py +4 -12
  144. maxframe/config/validators.py +5 -0
  145. maxframe/conftest.py +38 -10
  146. maxframe/core/__init__.py +1 -0
  147. maxframe/core/context.py +110 -0
  148. maxframe/core/entity/__init__.py +1 -0
  149. maxframe/core/entity/core.py +0 -7
  150. maxframe/core/entity/objects.py +19 -5
  151. maxframe/core/entity/output_types.py +11 -0
  152. maxframe/core/entity/tests/test_objects.py +11 -12
  153. maxframe/core/entity/tileables.py +3 -1
  154. maxframe/core/entity/utils.py +15 -0
  155. maxframe/core/graph/__init__.py +6 -1
  156. maxframe/core/graph/builder/base.py +5 -1
  157. maxframe/core/graph/core.cp38-win32.pyd +0 -0
  158. maxframe/core/graph/core.pyx +17 -6
  159. maxframe/core/graph/entity.py +18 -6
  160. maxframe/core/operator/__init__.py +8 -3
  161. maxframe/core/operator/base.py +35 -12
  162. maxframe/core/operator/core.py +37 -14
  163. maxframe/core/operator/fetch.py +5 -18
  164. maxframe/core/operator/objects.py +0 -20
  165. maxframe/core/operator/shuffle.py +6 -72
  166. maxframe/dataframe/__init__.py +1 -0
  167. maxframe/dataframe/accessors/datetime_/core.py +7 -4
  168. maxframe/dataframe/accessors/string_/core.py +9 -6
  169. maxframe/dataframe/arithmetic/core.py +31 -20
  170. maxframe/dataframe/arithmetic/tests/test_arithmetic.py +6 -0
  171. maxframe/dataframe/core.py +98 -91
  172. maxframe/dataframe/datasource/core.py +8 -1
  173. maxframe/dataframe/datasource/date_range.py +8 -0
  174. maxframe/dataframe/datasource/from_index.py +9 -5
  175. maxframe/dataframe/datasource/from_records.py +9 -2
  176. maxframe/dataframe/datasource/from_tensor.py +32 -21
  177. maxframe/dataframe/datasource/read_csv.py +8 -2
  178. maxframe/dataframe/datasource/read_odps_query.py +33 -3
  179. maxframe/dataframe/datasource/read_odps_table.py +20 -5
  180. maxframe/dataframe/datasource/read_parquet.py +8 -3
  181. maxframe/dataframe/datasource/tests/test_datasource.py +33 -0
  182. maxframe/dataframe/datastore/tests/test_to_odps.py +52 -1
  183. maxframe/dataframe/datastore/to_csv.py +7 -3
  184. maxframe/dataframe/datastore/to_odps.py +42 -6
  185. maxframe/dataframe/extensions/__init__.py +6 -1
  186. maxframe/dataframe/extensions/apply_chunk.py +96 -136
  187. maxframe/dataframe/extensions/flatjson.py +3 -2
  188. maxframe/dataframe/extensions/flatmap.py +15 -7
  189. maxframe/dataframe/fetch/core.py +12 -1
  190. maxframe/dataframe/groupby/__init__.py +7 -0
  191. maxframe/dataframe/groupby/aggregation.py +62 -9
  192. maxframe/dataframe/groupby/apply.py +50 -74
  193. maxframe/dataframe/groupby/apply_chunk.py +393 -0
  194. maxframe/dataframe/groupby/core.py +80 -17
  195. maxframe/dataframe/groupby/extensions.py +26 -0
  196. maxframe/dataframe/groupby/fill.py +9 -4
  197. maxframe/dataframe/groupby/sample.py +7 -7
  198. maxframe/dataframe/groupby/tests/test_groupby.py +3 -3
  199. maxframe/dataframe/groupby/transform.py +57 -54
  200. maxframe/dataframe/indexing/align.py +7 -6
  201. maxframe/dataframe/indexing/getitem.py +9 -8
  202. maxframe/dataframe/indexing/iloc.py +28 -23
  203. maxframe/dataframe/indexing/insert.py +7 -3
  204. maxframe/dataframe/indexing/loc.py +9 -8
  205. maxframe/dataframe/indexing/reindex.py +36 -30
  206. maxframe/dataframe/indexing/rename_axis.py +18 -10
  207. maxframe/dataframe/indexing/reset_index.py +0 -2
  208. maxframe/dataframe/indexing/sample.py +13 -9
  209. maxframe/dataframe/indexing/set_axis.py +9 -6
  210. maxframe/dataframe/indexing/setitem.py +8 -5
  211. maxframe/dataframe/indexing/where.py +12 -9
  212. maxframe/dataframe/merge/__init__.py +0 -1
  213. maxframe/dataframe/merge/concat.py +10 -31
  214. maxframe/dataframe/merge/merge.py +2 -24
  215. maxframe/dataframe/misc/__init__.py +6 -0
  216. maxframe/dataframe/misc/_duplicate.py +7 -3
  217. maxframe/dataframe/misc/apply.py +106 -139
  218. maxframe/dataframe/misc/astype.py +3 -2
  219. maxframe/dataframe/misc/case_when.py +11 -7
  220. maxframe/dataframe/misc/cut.py +11 -10
  221. maxframe/dataframe/misc/describe.py +7 -3
  222. maxframe/dataframe/misc/drop.py +13 -11
  223. maxframe/dataframe/misc/eval.py +0 -2
  224. maxframe/dataframe/misc/get_dummies.py +78 -49
  225. maxframe/dataframe/misc/isin.py +13 -10
  226. maxframe/dataframe/misc/map.py +21 -6
  227. maxframe/dataframe/misc/melt.py +8 -1
  228. maxframe/dataframe/misc/pivot.py +232 -0
  229. maxframe/dataframe/misc/pivot_table.py +52 -40
  230. maxframe/dataframe/misc/rechunk.py +59 -0
  231. maxframe/dataframe/misc/shift.py +7 -4
  232. maxframe/dataframe/misc/stack.py +5 -3
  233. maxframe/dataframe/misc/tests/test_misc.py +167 -1
  234. maxframe/dataframe/misc/transform.py +63 -65
  235. maxframe/dataframe/misc/value_counts.py +7 -4
  236. maxframe/dataframe/missing/dropna.py +16 -7
  237. maxframe/dataframe/missing/fillna.py +18 -10
  238. maxframe/dataframe/missing/replace.py +10 -6
  239. maxframe/dataframe/missing/tests/test_missing.py +2 -2
  240. maxframe/dataframe/operators.py +1 -27
  241. maxframe/dataframe/reduction/aggregation.py +128 -3
  242. maxframe/dataframe/reduction/core.py +20 -6
  243. maxframe/dataframe/reduction/median.py +1 -1
  244. maxframe/dataframe/reduction/tests/test_reduction.py +33 -0
  245. maxframe/dataframe/reduction/unique.py +53 -7
  246. maxframe/dataframe/statistics/corr.py +9 -6
  247. maxframe/dataframe/statistics/quantile.py +9 -6
  248. maxframe/dataframe/tseries/to_datetime.py +6 -4
  249. maxframe/dataframe/utils.py +219 -31
  250. maxframe/dataframe/window/rolling.py +7 -4
  251. maxframe/env.py +1 -0
  252. maxframe/errors.py +9 -0
  253. maxframe/extension.py +13 -2
  254. maxframe/io/objects/core.py +67 -51
  255. maxframe/io/objects/tensor.py +73 -17
  256. maxframe/io/objects/tests/test_object_io.py +8 -55
  257. maxframe/io/odpsio/arrow.py +15 -2
  258. maxframe/io/odpsio/schema.py +43 -13
  259. maxframe/io/odpsio/tableio.py +63 -11
  260. maxframe/io/odpsio/tests/test_arrow.py +1 -2
  261. maxframe/io/odpsio/tests/test_schema.py +114 -1
  262. maxframe/io/odpsio/tests/test_tableio.py +42 -0
  263. maxframe/io/odpsio/tests/test_volumeio.py +22 -48
  264. maxframe/learn/__init__.py +2 -2
  265. maxframe/learn/contrib/__init__.py +2 -2
  266. maxframe/learn/contrib/graph/connected_components.py +2 -1
  267. maxframe/learn/contrib/lightgbm/__init__.py +33 -0
  268. maxframe/learn/contrib/lightgbm/_predict.py +138 -0
  269. maxframe/learn/contrib/lightgbm/_train.py +163 -0
  270. maxframe/learn/contrib/lightgbm/callback.py +114 -0
  271. maxframe/learn/contrib/lightgbm/classifier.py +199 -0
  272. maxframe/learn/contrib/lightgbm/core.py +372 -0
  273. maxframe/learn/contrib/lightgbm/dataset.py +153 -0
  274. maxframe/learn/contrib/lightgbm/regressor.py +29 -0
  275. maxframe/learn/contrib/lightgbm/tests/__init__.py +13 -0
  276. maxframe/learn/contrib/lightgbm/tests/test_callback.py +58 -0
  277. maxframe/learn/contrib/llm/models/dashscope.py +34 -0
  278. maxframe/learn/contrib/llm/models/managed.py +15 -0
  279. maxframe/learn/contrib/llm/multi_modal.py +92 -0
  280. maxframe/learn/contrib/llm/text.py +21 -5
  281. maxframe/learn/contrib/models.py +38 -9
  282. maxframe/learn/contrib/utils.py +55 -0
  283. maxframe/learn/contrib/xgboost/callback.py +86 -0
  284. maxframe/learn/contrib/xgboost/classifier.py +26 -30
  285. maxframe/learn/contrib/xgboost/core.py +53 -42
  286. maxframe/learn/contrib/xgboost/dmatrix.py +19 -12
  287. maxframe/learn/contrib/xgboost/predict.py +13 -8
  288. maxframe/learn/contrib/xgboost/regressor.py +28 -27
  289. maxframe/learn/contrib/xgboost/tests/test_callback.py +41 -0
  290. maxframe/learn/contrib/xgboost/train.py +59 -16
  291. maxframe/learn/core.py +252 -0
  292. maxframe/learn/datasets/__init__.py +20 -0
  293. maxframe/learn/datasets/samples_generator.py +628 -0
  294. maxframe/learn/linear_model/__init__.py +15 -0
  295. maxframe/learn/linear_model/_base.py +163 -0
  296. maxframe/learn/linear_model/_lin_reg.py +175 -0
  297. maxframe/learn/metrics/__init__.py +25 -0
  298. maxframe/learn/metrics/_check_targets.py +95 -0
  299. maxframe/learn/metrics/_classification.py +1121 -0
  300. maxframe/learn/metrics/_regression.py +256 -0
  301. maxframe/learn/model_selection/__init__.py +15 -0
  302. maxframe/learn/model_selection/_split.py +451 -0
  303. maxframe/learn/model_selection/tests/__init__.py +13 -0
  304. maxframe/learn/model_selection/tests/test_split.py +156 -0
  305. maxframe/learn/preprocessing/__init__.py +16 -0
  306. maxframe/learn/preprocessing/_data/__init__.py +17 -0
  307. maxframe/learn/preprocessing/_data/min_max_scaler.py +390 -0
  308. maxframe/learn/preprocessing/_data/normalize.py +127 -0
  309. maxframe/learn/preprocessing/_data/standard_scaler.py +503 -0
  310. maxframe/learn/preprocessing/_data/utils.py +79 -0
  311. maxframe/learn/preprocessing/_label/__init__.py +16 -0
  312. maxframe/learn/preprocessing/_label/_label_binarizer.py +599 -0
  313. maxframe/learn/preprocessing/_label/_label_encoder.py +174 -0
  314. maxframe/learn/utils/__init__.py +4 -0
  315. maxframe/learn/utils/_encode.py +314 -0
  316. maxframe/learn/utils/checks.py +161 -0
  317. maxframe/learn/utils/core.py +33 -0
  318. maxframe/learn/utils/extmath.py +176 -0
  319. maxframe/learn/utils/multiclass.py +292 -0
  320. maxframe/learn/utils/shuffle.py +114 -0
  321. maxframe/learn/utils/sparsefuncs.py +87 -0
  322. maxframe/learn/utils/validation.py +775 -0
  323. maxframe/lib/__init__.py +0 -2
  324. maxframe/lib/compat.py +145 -0
  325. maxframe/lib/filesystem/_oss_lib/glob.py +1 -1
  326. maxframe/lib/mmh3.cp38-win32.pyd +0 -0
  327. maxframe/lib/sparse/__init__.py +10 -15
  328. maxframe/lib/sparse/array.py +45 -33
  329. maxframe/lib/sparse/core.py +0 -2
  330. maxframe/lib/sparse/linalg.py +31 -0
  331. maxframe/lib/sparse/matrix.py +5 -2
  332. maxframe/lib/sparse/tests/__init__.py +0 -2
  333. maxframe/lib/sparse/tests/test_sparse.py +53 -53
  334. maxframe/lib/sparse/vector.py +0 -2
  335. maxframe/mixin.py +59 -2
  336. maxframe/opcodes.py +13 -5
  337. maxframe/protocol.py +67 -14
  338. maxframe/remote/core.py +16 -14
  339. maxframe/remote/run_script.py +6 -3
  340. maxframe/serialization/__init__.py +2 -0
  341. maxframe/serialization/core.cp38-win32.pyd +0 -0
  342. maxframe/serialization/core.pxd +3 -0
  343. maxframe/serialization/core.pyi +3 -1
  344. maxframe/serialization/core.pyx +82 -4
  345. maxframe/serialization/pandas.py +5 -1
  346. maxframe/serialization/serializables/core.py +6 -5
  347. maxframe/serialization/serializables/field.py +2 -2
  348. maxframe/serialization/serializables/tests/test_field_type.py +3 -5
  349. maxframe/serialization/tests/test_serial.py +27 -0
  350. maxframe/session.py +4 -71
  351. maxframe/sperunner.py +165 -0
  352. maxframe/tensor/__init__.py +35 -2
  353. maxframe/tensor/arithmetic/__init__.py +2 -4
  354. maxframe/tensor/arithmetic/abs.py +0 -2
  355. maxframe/tensor/arithmetic/absolute.py +0 -2
  356. maxframe/tensor/arithmetic/add.py +34 -4
  357. maxframe/tensor/arithmetic/angle.py +0 -2
  358. maxframe/tensor/arithmetic/arccos.py +1 -4
  359. maxframe/tensor/arithmetic/arccosh.py +1 -3
  360. maxframe/tensor/arithmetic/arcsin.py +0 -2
  361. maxframe/tensor/arithmetic/arcsinh.py +0 -2
  362. maxframe/tensor/arithmetic/arctan.py +0 -2
  363. maxframe/tensor/arithmetic/arctan2.py +0 -2
  364. maxframe/tensor/arithmetic/arctanh.py +0 -2
  365. maxframe/tensor/arithmetic/around.py +0 -2
  366. maxframe/tensor/arithmetic/bitand.py +0 -2
  367. maxframe/tensor/arithmetic/bitor.py +1 -3
  368. maxframe/tensor/arithmetic/bitxor.py +1 -3
  369. maxframe/tensor/arithmetic/cbrt.py +0 -2
  370. maxframe/tensor/arithmetic/ceil.py +0 -2
  371. maxframe/tensor/arithmetic/clip.py +13 -13
  372. maxframe/tensor/arithmetic/conj.py +0 -2
  373. maxframe/tensor/arithmetic/copysign.py +0 -2
  374. maxframe/tensor/arithmetic/core.py +47 -39
  375. maxframe/tensor/arithmetic/cos.py +1 -3
  376. maxframe/tensor/arithmetic/cosh.py +0 -2
  377. maxframe/tensor/arithmetic/deg2rad.py +0 -2
  378. maxframe/tensor/arithmetic/degrees.py +0 -2
  379. maxframe/tensor/arithmetic/divide.py +0 -2
  380. maxframe/tensor/arithmetic/equal.py +0 -2
  381. maxframe/tensor/arithmetic/exp.py +1 -3
  382. maxframe/tensor/arithmetic/exp2.py +0 -2
  383. maxframe/tensor/arithmetic/expm1.py +0 -2
  384. maxframe/tensor/arithmetic/fabs.py +0 -2
  385. maxframe/tensor/arithmetic/fix.py +0 -2
  386. maxframe/tensor/arithmetic/float_power.py +0 -2
  387. maxframe/tensor/arithmetic/floor.py +0 -2
  388. maxframe/tensor/arithmetic/floordiv.py +0 -2
  389. maxframe/tensor/arithmetic/fmax.py +0 -2
  390. maxframe/tensor/arithmetic/fmin.py +0 -2
  391. maxframe/tensor/arithmetic/fmod.py +0 -2
  392. maxframe/tensor/arithmetic/frexp.py +6 -2
  393. maxframe/tensor/arithmetic/greater.py +0 -2
  394. maxframe/tensor/arithmetic/greater_equal.py +0 -2
  395. maxframe/tensor/arithmetic/hypot.py +0 -2
  396. maxframe/tensor/arithmetic/i0.py +1 -3
  397. maxframe/tensor/arithmetic/imag.py +0 -2
  398. maxframe/tensor/arithmetic/invert.py +1 -3
  399. maxframe/tensor/arithmetic/isclose.py +0 -2
  400. maxframe/tensor/arithmetic/iscomplex.py +0 -2
  401. maxframe/tensor/arithmetic/isfinite.py +1 -3
  402. maxframe/tensor/arithmetic/isinf.py +0 -2
  403. maxframe/tensor/arithmetic/isnan.py +0 -2
  404. maxframe/tensor/arithmetic/isreal.py +0 -2
  405. maxframe/tensor/arithmetic/ldexp.py +0 -2
  406. maxframe/tensor/arithmetic/less.py +0 -2
  407. maxframe/tensor/arithmetic/less_equal.py +0 -2
  408. maxframe/tensor/arithmetic/log.py +1 -3
  409. maxframe/tensor/arithmetic/log10.py +1 -3
  410. maxframe/tensor/arithmetic/log1p.py +1 -3
  411. maxframe/tensor/arithmetic/log2.py +1 -3
  412. maxframe/tensor/arithmetic/logaddexp.py +0 -2
  413. maxframe/tensor/arithmetic/logaddexp2.py +0 -2
  414. maxframe/tensor/arithmetic/logical_and.py +0 -2
  415. maxframe/tensor/arithmetic/logical_not.py +1 -3
  416. maxframe/tensor/arithmetic/logical_or.py +0 -2
  417. maxframe/tensor/arithmetic/logical_xor.py +0 -2
  418. maxframe/tensor/arithmetic/lshift.py +0 -2
  419. maxframe/tensor/arithmetic/maximum.py +0 -2
  420. maxframe/tensor/arithmetic/minimum.py +0 -2
  421. maxframe/tensor/arithmetic/mod.py +0 -2
  422. maxframe/tensor/arithmetic/modf.py +6 -2
  423. maxframe/tensor/arithmetic/multiply.py +37 -4
  424. maxframe/tensor/arithmetic/nan_to_num.py +0 -2
  425. maxframe/tensor/arithmetic/negative.py +0 -2
  426. maxframe/tensor/arithmetic/nextafter.py +0 -2
  427. maxframe/tensor/arithmetic/not_equal.py +0 -2
  428. maxframe/tensor/arithmetic/positive.py +0 -2
  429. maxframe/tensor/arithmetic/power.py +0 -2
  430. maxframe/tensor/arithmetic/rad2deg.py +0 -2
  431. maxframe/tensor/arithmetic/radians.py +0 -2
  432. maxframe/tensor/arithmetic/real.py +0 -2
  433. maxframe/tensor/arithmetic/reciprocal.py +5 -3
  434. maxframe/tensor/arithmetic/rint.py +1 -3
  435. maxframe/tensor/arithmetic/rshift.py +0 -2
  436. maxframe/tensor/arithmetic/setimag.py +0 -2
  437. maxframe/tensor/arithmetic/setreal.py +0 -2
  438. maxframe/tensor/arithmetic/sign.py +0 -2
  439. maxframe/tensor/arithmetic/signbit.py +0 -2
  440. maxframe/tensor/arithmetic/sin.py +0 -2
  441. maxframe/tensor/arithmetic/sinc.py +1 -3
  442. maxframe/tensor/arithmetic/sinh.py +0 -2
  443. maxframe/tensor/arithmetic/spacing.py +0 -2
  444. maxframe/tensor/arithmetic/sqrt.py +0 -2
  445. maxframe/tensor/arithmetic/square.py +0 -2
  446. maxframe/tensor/arithmetic/subtract.py +4 -2
  447. maxframe/tensor/arithmetic/tan.py +0 -2
  448. maxframe/tensor/arithmetic/tanh.py +0 -2
  449. maxframe/tensor/arithmetic/tests/__init__.py +0 -2
  450. maxframe/tensor/arithmetic/tests/test_arithmetic.py +43 -9
  451. maxframe/tensor/arithmetic/truediv.py +0 -2
  452. maxframe/tensor/arithmetic/trunc.py +0 -2
  453. maxframe/tensor/arithmetic/utils.py +32 -6
  454. maxframe/tensor/array_utils.py +3 -25
  455. maxframe/tensor/core.py +6 -6
  456. maxframe/tensor/datasource/__init__.py +10 -2
  457. maxframe/tensor/datasource/arange.py +0 -2
  458. maxframe/tensor/datasource/array.py +3 -22
  459. maxframe/tensor/datasource/core.py +15 -10
  460. maxframe/tensor/datasource/diag.py +140 -0
  461. maxframe/tensor/datasource/diagflat.py +69 -0
  462. maxframe/tensor/datasource/empty.py +0 -2
  463. maxframe/tensor/datasource/eye.py +95 -0
  464. maxframe/tensor/datasource/from_dataframe.py +0 -2
  465. maxframe/tensor/datasource/from_dense.py +0 -17
  466. maxframe/tensor/datasource/from_sparse.py +0 -2
  467. maxframe/tensor/datasource/full.py +0 -2
  468. maxframe/tensor/datasource/identity.py +54 -0
  469. maxframe/tensor/datasource/indices.py +115 -0
  470. maxframe/tensor/datasource/linspace.py +140 -0
  471. maxframe/tensor/datasource/meshgrid.py +135 -0
  472. maxframe/tensor/datasource/ones.py +8 -3
  473. maxframe/tensor/datasource/tests/test_datasource.py +32 -1
  474. maxframe/tensor/datasource/tri_array.py +107 -0
  475. maxframe/tensor/datasource/zeros.py +7 -3
  476. maxframe/tensor/extensions/__init__.py +31 -0
  477. maxframe/tensor/extensions/accessor.py +25 -0
  478. maxframe/tensor/extensions/apply_chunk.py +137 -0
  479. maxframe/tensor/indexing/__init__.py +1 -1
  480. maxframe/tensor/indexing/choose.py +8 -6
  481. maxframe/tensor/indexing/compress.py +0 -2
  482. maxframe/tensor/indexing/extract.py +0 -2
  483. maxframe/tensor/indexing/fill_diagonal.py +9 -6
  484. maxframe/tensor/indexing/flatnonzero.py +1 -3
  485. maxframe/tensor/indexing/getitem.py +10 -43
  486. maxframe/tensor/indexing/nonzero.py +2 -4
  487. maxframe/tensor/indexing/setitem.py +19 -9
  488. maxframe/tensor/indexing/slice.py +6 -3
  489. maxframe/tensor/indexing/take.py +0 -2
  490. maxframe/tensor/indexing/tests/__init__.py +0 -2
  491. maxframe/tensor/indexing/tests/test_indexing.py +0 -2
  492. maxframe/tensor/indexing/unravel_index.py +6 -6
  493. maxframe/tensor/lib/__init__.py +16 -0
  494. maxframe/tensor/lib/index_tricks.py +404 -0
  495. maxframe/tensor/linalg/__init__.py +36 -0
  496. maxframe/tensor/linalg/dot.py +145 -0
  497. maxframe/tensor/linalg/inner.py +36 -0
  498. maxframe/tensor/linalg/inv.py +83 -0
  499. maxframe/tensor/linalg/lu.py +115 -0
  500. maxframe/tensor/linalg/matmul.py +225 -0
  501. maxframe/tensor/linalg/qr.py +124 -0
  502. maxframe/tensor/linalg/solve_triangular.py +103 -0
  503. maxframe/tensor/linalg/svd.py +167 -0
  504. maxframe/tensor/linalg/tensordot.py +213 -0
  505. maxframe/tensor/linalg/vdot.py +73 -0
  506. maxframe/tensor/merge/__init__.py +4 -0
  507. maxframe/tensor/merge/append.py +74 -0
  508. maxframe/tensor/merge/column_stack.py +63 -0
  509. maxframe/tensor/merge/concatenate.py +3 -2
  510. maxframe/tensor/merge/dstack.py +71 -0
  511. maxframe/tensor/merge/hstack.py +70 -0
  512. maxframe/tensor/merge/stack.py +0 -2
  513. maxframe/tensor/merge/tests/test_merge.py +0 -2
  514. maxframe/tensor/misc/__init__.py +18 -5
  515. maxframe/tensor/misc/astype.py +10 -8
  516. maxframe/tensor/misc/broadcast_to.py +1 -1
  517. maxframe/tensor/misc/copy.py +64 -0
  518. maxframe/tensor/misc/diff.py +115 -0
  519. maxframe/tensor/misc/flatten.py +63 -0
  520. maxframe/tensor/misc/in1d.py +94 -0
  521. maxframe/tensor/misc/isin.py +130 -0
  522. maxframe/tensor/misc/ndim.py +53 -0
  523. maxframe/tensor/misc/ravel.py +0 -2
  524. maxframe/tensor/misc/repeat.py +129 -0
  525. maxframe/tensor/misc/searchsorted.py +147 -0
  526. maxframe/tensor/misc/setdiff1d.py +58 -0
  527. maxframe/tensor/misc/squeeze.py +117 -0
  528. maxframe/tensor/misc/swapaxes.py +113 -0
  529. maxframe/tensor/misc/tests/test_misc.py +0 -2
  530. maxframe/tensor/misc/transpose.py +8 -4
  531. maxframe/tensor/misc/trapezoid.py +123 -0
  532. maxframe/tensor/misc/unique.py +0 -1
  533. maxframe/tensor/misc/where.py +10 -8
  534. maxframe/tensor/operators.py +0 -34
  535. maxframe/tensor/random/__init__.py +3 -5
  536. maxframe/tensor/random/binomial.py +0 -2
  537. maxframe/tensor/random/bytes.py +0 -2
  538. maxframe/tensor/random/chisquare.py +0 -2
  539. maxframe/tensor/random/choice.py +9 -8
  540. maxframe/tensor/random/core.py +20 -5
  541. maxframe/tensor/random/dirichlet.py +0 -2
  542. maxframe/tensor/random/exponential.py +0 -2
  543. maxframe/tensor/random/f.py +2 -4
  544. maxframe/tensor/random/gamma.py +0 -2
  545. maxframe/tensor/random/geometric.py +0 -2
  546. maxframe/tensor/random/gumbel.py +0 -2
  547. maxframe/tensor/random/hypergeometric.py +0 -2
  548. maxframe/tensor/random/laplace.py +2 -4
  549. maxframe/tensor/random/logistic.py +0 -2
  550. maxframe/tensor/random/lognormal.py +0 -2
  551. maxframe/tensor/random/logseries.py +0 -2
  552. maxframe/tensor/random/multinomial.py +0 -2
  553. maxframe/tensor/random/multivariate_normal.py +0 -2
  554. maxframe/tensor/random/negative_binomial.py +0 -2
  555. maxframe/tensor/random/noncentral_chisquare.py +0 -2
  556. maxframe/tensor/random/noncentral_f.py +1 -3
  557. maxframe/tensor/random/normal.py +0 -2
  558. maxframe/tensor/random/pareto.py +0 -2
  559. maxframe/tensor/random/permutation.py +6 -3
  560. maxframe/tensor/random/poisson.py +0 -2
  561. maxframe/tensor/random/power.py +0 -2
  562. maxframe/tensor/random/rand.py +0 -2
  563. maxframe/tensor/random/randint.py +0 -2
  564. maxframe/tensor/random/randn.py +0 -2
  565. maxframe/tensor/random/random_integers.py +0 -2
  566. maxframe/tensor/random/random_sample.py +0 -2
  567. maxframe/tensor/random/rayleigh.py +0 -2
  568. maxframe/tensor/random/standard_cauchy.py +0 -2
  569. maxframe/tensor/random/standard_exponential.py +0 -2
  570. maxframe/tensor/random/standard_gamma.py +0 -2
  571. maxframe/tensor/random/standard_normal.py +0 -2
  572. maxframe/tensor/random/standard_t.py +0 -2
  573. maxframe/tensor/random/tests/__init__.py +0 -2
  574. maxframe/tensor/random/tests/test_random.py +0 -2
  575. maxframe/tensor/random/triangular.py +0 -2
  576. maxframe/tensor/random/uniform.py +0 -2
  577. maxframe/tensor/random/vonmises.py +0 -2
  578. maxframe/tensor/random/wald.py +0 -2
  579. maxframe/tensor/random/weibull.py +0 -2
  580. maxframe/tensor/random/zipf.py +0 -2
  581. maxframe/tensor/reduction/__init__.py +0 -2
  582. maxframe/tensor/reduction/all.py +0 -2
  583. maxframe/tensor/reduction/allclose.py +0 -2
  584. maxframe/tensor/reduction/any.py +0 -2
  585. maxframe/tensor/reduction/argmax.py +1 -3
  586. maxframe/tensor/reduction/argmin.py +1 -3
  587. maxframe/tensor/reduction/array_equal.py +0 -2
  588. maxframe/tensor/reduction/core.py +0 -2
  589. maxframe/tensor/reduction/count_nonzero.py +0 -2
  590. maxframe/tensor/reduction/cumprod.py +0 -2
  591. maxframe/tensor/reduction/cumsum.py +0 -2
  592. maxframe/tensor/reduction/max.py +0 -2
  593. maxframe/tensor/reduction/mean.py +0 -2
  594. maxframe/tensor/reduction/min.py +0 -2
  595. maxframe/tensor/reduction/nanargmax.py +0 -2
  596. maxframe/tensor/reduction/nanargmin.py +0 -2
  597. maxframe/tensor/reduction/nancumprod.py +0 -2
  598. maxframe/tensor/reduction/nancumsum.py +0 -2
  599. maxframe/tensor/reduction/nanmax.py +0 -2
  600. maxframe/tensor/reduction/nanmean.py +0 -2
  601. maxframe/tensor/reduction/nanmin.py +0 -2
  602. maxframe/tensor/reduction/nanprod.py +0 -2
  603. maxframe/tensor/reduction/nanstd.py +0 -2
  604. maxframe/tensor/reduction/nansum.py +0 -2
  605. maxframe/tensor/reduction/nanvar.py +0 -2
  606. maxframe/tensor/reduction/prod.py +0 -2
  607. maxframe/tensor/reduction/std.py +0 -2
  608. maxframe/tensor/reduction/sum.py +0 -2
  609. maxframe/tensor/reduction/tests/test_reduction.py +1 -4
  610. maxframe/tensor/reduction/var.py +0 -2
  611. maxframe/tensor/reshape/__init__.py +0 -2
  612. maxframe/tensor/reshape/reshape.py +6 -5
  613. maxframe/tensor/reshape/tests/__init__.py +0 -2
  614. maxframe/tensor/reshape/tests/test_reshape.py +0 -2
  615. maxframe/tensor/sort/__init__.py +16 -0
  616. maxframe/tensor/sort/argsort.py +150 -0
  617. maxframe/tensor/sort/sort.py +295 -0
  618. maxframe/tensor/special/__init__.py +37 -0
  619. maxframe/tensor/special/core.py +38 -0
  620. maxframe/tensor/special/misc.py +142 -0
  621. maxframe/tensor/special/statistical.py +56 -0
  622. maxframe/tensor/statistics/__init__.py +5 -0
  623. maxframe/tensor/statistics/average.py +143 -0
  624. maxframe/tensor/statistics/bincount.py +133 -0
  625. maxframe/tensor/statistics/quantile.py +10 -8
  626. maxframe/tensor/ufunc/__init__.py +0 -2
  627. maxframe/tensor/ufunc/ufunc.py +0 -2
  628. maxframe/tensor/utils.py +21 -3
  629. maxframe/tests/test_protocol.py +3 -3
  630. maxframe/tests/test_utils.py +210 -1
  631. maxframe/tests/utils.py +67 -1
  632. maxframe/udf.py +76 -6
  633. maxframe/utils.py +418 -17
  634. {maxframe-1.3.0.dist-info → maxframe-2.0.0b1.dist-info}/METADATA +5 -1
  635. maxframe-2.0.0b1.dist-info/RECORD +939 -0
  636. maxframe_client/clients/framedriver.py +19 -3
  637. maxframe_client/fetcher.py +113 -6
  638. maxframe_client/session/odps.py +173 -38
  639. maxframe_client/session/task.py +3 -1
  640. maxframe_client/tests/test_session.py +41 -5
  641. maxframe-1.3.0.dist-info/RECORD +0 -705
  642. {maxframe-1.3.0.dist-info → maxframe-2.0.0b1.dist-info}/WHEEL +0 -0
  643. {maxframe-1.3.0.dist-info → maxframe-2.0.0b1.dist-info}/top_level.txt +0 -0
@@ -13,20 +13,14 @@
13
13
  # limitations under the License.
14
14
 
15
15
  from ... import opcodes
16
- from ...serialization.serializables import (
17
- FieldTypes,
18
- Int32Field,
19
- StringField,
20
- TupleField,
21
- )
22
- from . import FetchShuffle, ShuffleFetchType
16
+ from ...serialization.serializables import Int32Field, StringField
23
17
  from .base import Operator, OperatorStage, VirtualOperator
18
+ from .core import TileableOperatorMixin
24
19
 
25
20
 
26
- class ShuffleProxy(VirtualOperator):
21
+ class ShuffleProxy(VirtualOperator, TileableOperatorMixin):
27
22
  _op_type_ = opcodes.SHUFFLE_PROXY
28
23
  n_mappers = Int32Field("n_mappers", default=0)
29
- # `n_reducers` will be updated in `MapReduceOperator._new_chunks`
30
24
  n_reducers = Int32Field("n_reducers", default=0)
31
25
 
32
26
 
@@ -37,13 +31,11 @@ class MapReduceOperator(Operator):
37
31
  """
38
32
 
39
33
  # for reducer
40
- reducer_index = TupleField("reducer_index", FieldTypes.uint64)
34
+ reducer_id = Int32Field("reducer_id")
41
35
  # Total reducer nums, which also be shuffle blocks for single mapper.
42
36
  n_reducers = Int32Field("n_reducers")
43
- # The reducer ordinal in all reducers. It's different from reducer_index,
44
- # which might be a tuple.
45
- # `reducer_ordinal` will be set in `_new_chunks`.
46
- reducer_ordinal = Int32Field("reducer_ordinal")
37
+ # preserved field to keep serialization compatible
38
+ _mr_preserved = Int32Field("mr_preserved")
47
39
  reducer_phase = StringField("reducer_phase", default=None)
48
40
 
49
41
  def __init__(self, *args, **kwargs):
@@ -51,61 +43,3 @@ class MapReduceOperator(Operator):
51
43
  if self.stage == OperatorStage.reduce:
52
44
  # for reducer, we assign worker at first
53
45
  self.scheduling_hint.reassign_worker = True
54
-
55
- def get_dependent_data_keys(self):
56
- from .fetch import FetchShuffle
57
-
58
- if self.stage == OperatorStage.reduce:
59
- inputs = self.inputs or ()
60
- deps = []
61
- for inp in inputs:
62
- if isinstance(inp.op, ShuffleProxy):
63
- deps.extend(
64
- [(chunk.key, self.reducer_index) for chunk in inp.inputs or ()]
65
- )
66
- elif isinstance(inp.op, FetchShuffle):
67
- # fetch shuffle by index doesn't store data keys, so it won't run into this function.
68
- assert inp.op.shuffle_fetch_type == ShuffleFetchType.FETCH_BY_KEY
69
- deps.extend([(k, self.reducer_index) for k in inp.op.source_keys])
70
- else:
71
- deps.append(inp.key)
72
- return deps
73
- return super().get_dependent_data_keys()
74
-
75
- def iter_mapper_keys(self, input_id=0):
76
- # key is mapper chunk key, index is mapper chunk index.
77
- input_chunk = self.inputs[input_id]
78
- if isinstance(input_chunk.op, ShuffleProxy):
79
- keys = [inp.key for inp in input_chunk.inputs]
80
- else:
81
- assert isinstance(input_chunk.op, FetchShuffle), input_chunk.op
82
- if input_chunk.op.shuffle_fetch_type == ShuffleFetchType.FETCH_BY_INDEX:
83
- # For fetch shuffle by index, all shuffle block of same reducers are
84
- # identified by their index. chunk key are not needed any more.
85
- # so just mock key here.
86
- # keep this in sync with ray executor `execute_subtask`.
87
- return list(range(input_chunk.op.n_mappers))
88
- keys = input_chunk.op.source_keys
89
- return keys
90
-
91
- def iter_mapper_data(self, ctx, input_id=0, pop=False, skip_none=False):
92
- for key in self.iter_mapper_keys(input_id):
93
- try:
94
- if pop:
95
- yield ctx.pop((key, self.reducer_index))
96
- else:
97
- yield ctx[key, self.reducer_index]
98
- except KeyError:
99
- if not skip_none: # pragma: no cover
100
- raise
101
- if not pop:
102
- ctx[key, self.reducer_index] = None
103
-
104
- def execute(self, ctx, op):
105
- """The mapper stage must ensure all mapper blocks are inserted into ctx
106
- and no blocks for some reducers are missing. This is needed by shuffle
107
- fetch by index, which shuffle block are identified by the index instead
108
- of data keys. For operators implementation simplicity, we can sort the
109
- `ctx` by key which are (chunk key, reducer index) tuple and relax the
110
- insert order requirements.
111
- """
@@ -46,6 +46,7 @@ from .misc.cut import cut
46
46
  from .misc.eval import maxframe_eval as eval # pylint: disable=redefined-builtin
47
47
  from .misc.get_dummies import get_dummies
48
48
  from .misc.melt import melt
49
+ from .misc.pivot import pivot
49
50
  from .misc.pivot_table import pivot_table
50
51
  from .misc.qcut import qcut
51
52
  from .misc.to_numeric import to_numeric
@@ -12,10 +12,12 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
+ from typing import List
16
+
15
17
  import pandas as pd
16
18
 
17
19
  from .... import opcodes
18
- from ....core import OutputType
20
+ from ....core import EntityData, OutputType
19
21
  from ....serialization.serializables import (
20
22
  BoolField,
21
23
  DictField,
@@ -45,9 +47,10 @@ class SeriesDatetimeMethod(DataFrameOperator, DataFrameOperatorMixin):
45
47
  def input(self):
46
48
  return self._input
47
49
 
48
- def _set_inputs(self, inputs):
49
- super()._set_inputs(inputs)
50
- self._input = self._inputs[0]
50
+ @classmethod
51
+ def _set_inputs(cls, op: "SeriesDatetimeMethod", inputs: List[EntityData]):
52
+ super()._set_inputs(op, inputs)
53
+ op._input = op._inputs[0]
51
54
 
52
55
  def __call__(self, inp):
53
56
  return datetime_method_to_handlers[self.method].call(self, inp)
@@ -12,11 +12,13 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
+ from typing import List
16
+
15
17
  import numpy as np
16
18
  import pandas as pd
17
19
 
18
20
  from .... import opcodes
19
- from ....core import OutputType
21
+ from ....core import EntityData, OutputType
20
22
  from ....serialization.serializables import DictField, KeyField, StringField, TupleField
21
23
  from ....tensor import tensor as astensor
22
24
  from ....tensor.core import TENSOR_TYPE
@@ -43,12 +45,13 @@ class SeriesStringMethod(DataFrameOperator, DataFrameOperatorMixin):
43
45
  def input(self):
44
46
  return self._input
45
47
 
46
- def _set_inputs(self, inputs):
47
- super()._set_inputs(inputs)
48
- self._input = self._inputs[0]
49
- if len(self._inputs) == 2:
48
+ @classmethod
49
+ def _set_inputs(cls, op: "SeriesStringMethod", inputs: List[EntityData]):
50
+ super()._set_inputs(op, inputs)
51
+ op._input = op._inputs[0]
52
+ if len(op._inputs) == 2:
50
53
  # for method cat
51
- self.method_kwargs["others"] = self._inputs[1]
54
+ op.method_kwargs["others"] = op._inputs[1]
52
55
 
53
56
  def __call__(self, inp):
54
57
  return string_method_to_handlers[self.method].call(self, inp)
@@ -13,14 +13,15 @@
13
13
  # limitations under the License.
14
14
 
15
15
  import copy
16
+ from typing import List, MutableMapping, Union
16
17
 
17
18
  import numpy as np
18
19
  import pandas as pd
19
20
 
20
- from ...core import ENTITY_TYPE
21
+ from ...core import ENTITY_TYPE, EntityData
21
22
  from ...serialization.serializables import AnyField
22
23
  from ...tensor.core import TENSOR_TYPE
23
- from ...utils import classproperty, get_dtype
24
+ from ...utils import classproperty, make_dtype
24
25
  from ..core import DATAFRAME_TYPE, SERIES_TYPE
25
26
  from ..operators import DataFrameOperator, DataFrameOperatorMixin
26
27
  from ..ufunc.tensor import TensorUfuncMixin
@@ -30,6 +31,7 @@ from ..utils import (
30
31
  infer_dtypes,
31
32
  infer_index_value,
32
33
  parse_index,
34
+ validate_axis,
33
35
  )
34
36
 
35
37
 
@@ -63,7 +65,7 @@ class DataFrameBinOpMixin(DataFrameOperatorMixin):
63
65
  x2 is None or pd.api.types.is_scalar(x2) or isinstance(x2, TENSOR_TYPE)
64
66
  ):
65
67
  x2_dtype = x2.dtype if hasattr(x2, "dtype") else type(x2)
66
- x2_dtype = get_dtype(x2_dtype)
68
+ x2_dtype = make_dtype(x2_dtype)
67
69
  if hasattr(cls, "return_dtype"):
68
70
  dtype = cls.return_dtype
69
71
  else:
@@ -153,7 +155,7 @@ class DataFrameBinOpMixin(DataFrameOperatorMixin):
153
155
  columns = x1.columns_value
154
156
  dtypes = x1.dtypes
155
157
  index_shape, index = np.nan, None
156
- if x1.index_value is not None and x1.index_value is not None:
158
+ if x1.index_value is not None and x2.index_value is not None:
157
159
  if x1.index_value.key == x2.index_value.key:
158
160
  dtypes = pd.Series(
159
161
  [
@@ -277,34 +279,42 @@ class DataFrameBinOpMixin(DataFrameOperatorMixin):
277
279
  raise NotImplementedError
278
280
  return self._call(x2, x1)
279
281
 
282
+ @classmethod
283
+ def estimate_size(
284
+ cls, ctx: MutableMapping[str, Union[int, float]], op: "DataFrameOperator"
285
+ ):
286
+ ctx[op.outputs[0].key] = max(ctx[inp.key] for inp in (op.inputs or ()))
287
+
280
288
 
281
- class DataFrameBinOp(DataFrameOperator, DataFrameBinOpMixin):
289
+ class DataFrameBinOp(DataFrameBinOpMixin, DataFrameOperator):
282
290
  axis = AnyField("axis", default=None)
283
291
  level = AnyField("level", default=None)
284
292
  fill_value = AnyField("fill_value", default=None)
285
293
  lhs = AnyField("lhs")
286
294
  rhs = AnyField("rhs")
287
295
 
288
- def __init__(self, output_types=None, **kw):
289
- super().__init__(_output_types=output_types, **kw)
296
+ def __init__(self, output_types=None, axis=0, **kw):
297
+ axis = validate_axis(axis)
298
+ super().__init__(_output_types=output_types, axis=axis, **kw)
290
299
 
291
- def _set_inputs(self, inputs):
292
- super()._set_inputs(inputs)
293
- if len(self._inputs) == 2:
294
- self.lhs = self._inputs[0]
295
- self.rhs = self._inputs[1]
300
+ @classmethod
301
+ def _set_inputs(cls, op: "DataFrameBinOp", inputs: List[EntityData]):
302
+ super()._set_inputs(op, inputs)
303
+ if len(op._inputs) == 2:
304
+ op.lhs = op._inputs[0]
305
+ op.rhs = op._inputs[1]
296
306
  else:
297
- if isinstance(self.lhs, ENTITY_TYPE):
298
- self.lhs = self._inputs[0]
299
- elif pd.api.types.is_scalar(self.lhs):
300
- self.rhs = self._inputs[0]
307
+ if isinstance(op.lhs, ENTITY_TYPE):
308
+ op.lhs = op._inputs[0]
309
+ elif isinstance(op.rhs, ENTITY_TYPE):
310
+ op.rhs = op._inputs[0]
301
311
 
302
312
 
303
313
  class DataFrameUnaryOpMixin(DataFrameOperatorMixin):
304
314
  __slots__ = ()
305
315
 
306
316
 
307
- class DataFrameUnaryOp(DataFrameOperator, DataFrameUnaryOpMixin):
317
+ class DataFrameUnaryOp(DataFrameUnaryOpMixin, DataFrameOperator):
308
318
  def __init__(self, output_types=None, **kw):
309
319
  super().__init__(_output_types=output_types, **kw)
310
320
 
@@ -337,9 +347,10 @@ class DataFrameUnaryOp(DataFrameOperator, DataFrameUnaryOpMixin):
337
347
 
338
348
 
339
349
  class DataFrameArithmeticTreeMixin:
340
- def _set_inputs(self, inputs):
341
- inputs = self._get_inputs_data(inputs)
342
- setattr(self, "_inputs", inputs)
350
+ @classmethod
351
+ def _set_inputs(cls, op: "DataFrameOperator", inputs: List[EntityData]):
352
+ inputs = op._get_inputs_data(inputs)
353
+ setattr(op, "_inputs", inputs)
343
354
 
344
355
 
345
356
  class DataFrameUnaryUfunc(DataFrameUnaryOp, TensorUfuncMixin):
@@ -15,6 +15,7 @@
15
15
  import datetime
16
16
  import operator
17
17
  from dataclasses import dataclass
18
+ from math import isinf
18
19
  from typing import Callable
19
20
 
20
21
  import numpy as np
@@ -22,6 +23,7 @@ import pandas as pd
22
23
  import pytest
23
24
 
24
25
  from ....core import OperatorType
26
+ from ....core.operator import estimate_size
25
27
  from ....tests.utils import assert_mf_index_dtype
26
28
  from ....utils import dataslots
27
29
  from ...core import IndexValue
@@ -185,6 +187,10 @@ def test_without_shuffle(func_name, func_opts):
185
187
  assert df3.index_value.key != df2.index_value.key
186
188
  assert df3.shape[1] == 11 # columns is recorded, so we can get it
187
189
 
190
+ result_ctx = {inp.key: 10 for inp in df3.op.inputs}
191
+ estimate_size(result_ctx, df3.op)
192
+ assert result_ctx[df3.key] >= 0 and not isinf(result_ctx[df3.key])
193
+
188
194
 
189
195
  @pytest.mark.parametrize("func_name, func_opts", binary_functions.items())
190
196
  def test_dataframe_and_series_with_align_map(func_name, func_opts):
@@ -1,5 +1,3 @@
1
- #!/usr/bin/env python
2
- # -*- coding: utf-8 -*-
3
1
  # Copyright 1999-2025 Alibaba Group Holding Ltd.
4
2
  #
5
3
  # Licensed under the Apache License, Version 2.0 (the "License");
@@ -34,7 +32,7 @@ from ..core import (
34
32
  is_build_mode,
35
33
  register_output_types,
36
34
  )
37
- from ..core.entity.utils import refresh_tileable_shape
35
+ from ..core.entity.utils import fill_chunk_slices, refresh_tileable_shape
38
36
  from ..protocol import DataFrameTableMeta
39
37
  from ..serialization.serializables import (
40
38
  AnyField,
@@ -303,6 +301,10 @@ class IndexValue(Serializable):
303
301
  def names(self) -> list:
304
302
  return self._names
305
303
 
304
+ @property
305
+ def dtypes(self) -> pd.Series:
306
+ return pd.Series(self._dtypes, index=self._names)
307
+
306
308
  def to_pandas(self):
307
309
  data = getattr(self, "_data", None)
308
310
  sortorder = getattr(self, "_sortorder", None)
@@ -445,9 +447,7 @@ class DtypesValue(Serializable):
445
447
  def refresh_index_value(tileable: ENTITY_TYPE):
446
448
  index_to_index_values = dict()
447
449
  for chunk in tileable.chunks:
448
- if chunk.ndim == 1:
449
- index_to_index_values[chunk.index] = chunk.index_value
450
- elif chunk.index[1] == 0:
450
+ if chunk.ndim == 1 or chunk.index[1] == 0:
451
451
  index_to_index_values[chunk.index] = chunk.index_value
452
452
  index_value = merge_index_value(index_to_index_values, store_data=False)
453
453
  # keep key as original index_value's
@@ -637,11 +637,12 @@ class IndexData(HasShapeTileableData, _ToPandasMixin):
637
637
  def refresh_params(self):
638
638
  # refresh params when chunks updated
639
639
  refresh_tileable_shape(self)
640
- refresh_index_value(self)
641
- if self._dtype is None:
642
- self._dtype = self.chunks[0].dtype
643
- if self._name is None:
644
- self._name = self.chunks[0].name
640
+ fill_chunk_slices(self)
641
+ # refresh_index_value(self)
642
+ # if self._dtype is None:
643
+ # self._dtype = self.chunks[0].dtype
644
+ # if self._name is None:
645
+ # self._name = self.chunks[0].name
645
646
 
646
647
  def refresh_from_table_meta(self, table_meta: DataFrameTableMeta) -> None:
647
648
  pass
@@ -696,73 +697,6 @@ class IndexData(HasShapeTileableData, _ToPandasMixin):
696
697
 
697
698
  return from_index(self, dtype=dtype, extract_multi_index=extract_multi_index)
698
699
 
699
-
700
- class Index(HasShapeTileable, _ToPandasMixin):
701
- __slots__ = "_df_or_series", "_parent_key", "_axis"
702
- _allow_data_type_ = (IndexData,)
703
- type_name = "Index"
704
-
705
- def __new__(cls, data: Union[pd.Index, IndexData] = None, **_):
706
- if data is not None and not isinstance(data, pd.Index):
707
- # create corresponding Index class
708
- # according to type of index_value
709
- clz = globals()[type(data.index_value.value).__name__]
710
- else:
711
- clz = cls
712
- return object.__new__(clz)
713
-
714
- def __len__(self):
715
- return len(self._data)
716
-
717
- def __maxframe_tensor__(self, dtype=None, order="K"):
718
- return self._data.__maxframe_tensor__(dtype=dtype, order=order)
719
-
720
- def _get_df_or_series(self):
721
- obj = getattr(self, "_df_or_series", None)
722
- if obj is not None:
723
- return obj()
724
- return None
725
-
726
- def _set_df_or_series(self, df_or_series, axis):
727
- self._df_or_series = weakref.ref(df_or_series)
728
- self._parent_key = df_or_series.key
729
- self._axis = axis
730
-
731
- @property
732
- def T(self):
733
- """Return the transpose, which is by definition self."""
734
- return self
735
-
736
- @property
737
- def name(self):
738
- return self._data.name
739
-
740
- @name.setter
741
- def name(self, value):
742
- df_or_series = self._get_df_or_series()
743
- if df_or_series is not None and df_or_series.key == self._parent_key:
744
- df_or_series.rename_axis(value, axis=self._axis, inplace=True)
745
- self.data = df_or_series.axes[self._axis].data
746
- else:
747
- self.rename(value, inplace=True)
748
-
749
- @property
750
- def names(self):
751
- return self._data.names
752
-
753
- @names.setter
754
- def names(self, value):
755
- df_or_series = self._get_df_or_series()
756
- if df_or_series is not None:
757
- df_or_series.rename_axis(value, axis=self._axis, inplace=True)
758
- self.data = df_or_series.axes[self._axis].data
759
- else:
760
- self.rename(value, inplace=True)
761
-
762
- @property
763
- def values(self):
764
- return self.to_tensor()
765
-
766
700
  def to_frame(self, index: bool = True, name=None):
767
701
  """
768
702
  Create a DataFrame with a column containing the Index.
@@ -838,7 +772,7 @@ class Index(HasShapeTileable, _ToPandasMixin):
838
772
  columns = [name or self.name or 0]
839
773
  index_ = self if index else None
840
774
  return dataframe_from_tensor(
841
- self._data._to_maxframe_tensor(self, extract_multi_index=True),
775
+ self._to_maxframe_tensor(self, extract_multi_index=True),
842
776
  index=index_,
843
777
  columns=columns,
844
778
  )
@@ -867,6 +801,73 @@ class Index(HasShapeTileable, _ToPandasMixin):
867
801
  return series_from_index(self, index=index, name=name)
868
802
 
869
803
 
804
+ class Index(HasShapeTileable, _ToPandasMixin):
805
+ __slots__ = "_df_or_series", "_parent_key", "_axis"
806
+ _allow_data_type_ = (IndexData,)
807
+ type_name = "Index"
808
+
809
+ def __new__(cls, data: Union[pd.Index, IndexData] = None, **_):
810
+ if data is not None and not isinstance(data, pd.Index):
811
+ # create corresponding Index class
812
+ # according to type of index_value
813
+ clz = globals()[type(data.index_value.value).__name__]
814
+ else:
815
+ clz = cls
816
+ return object.__new__(clz)
817
+
818
+ def __len__(self):
819
+ return len(self._data)
820
+
821
+ def __maxframe_tensor__(self, dtype=None, order="K"):
822
+ return self._data.__maxframe_tensor__(dtype=dtype, order=order)
823
+
824
+ def _get_df_or_series(self):
825
+ obj = getattr(self, "_df_or_series", None)
826
+ if obj is not None:
827
+ return obj()
828
+ return None
829
+
830
+ def _set_df_or_series(self, df_or_series, axis):
831
+ self._df_or_series = weakref.ref(df_or_series)
832
+ self._parent_key = df_or_series.key
833
+ self._axis = axis
834
+
835
+ @property
836
+ def T(self):
837
+ """Return the transpose, which is by definition self."""
838
+ return self
839
+
840
+ @property
841
+ def name(self):
842
+ return self._data.name
843
+
844
+ @name.setter
845
+ def name(self, value):
846
+ df_or_series = self._get_df_or_series()
847
+ if df_or_series is not None and df_or_series.key == self._parent_key:
848
+ df_or_series.rename_axis(value, axis=self._axis, inplace=True)
849
+ self.data = df_or_series.axes[self._axis].data
850
+ else:
851
+ self.rename(value, inplace=True)
852
+
853
+ @property
854
+ def names(self):
855
+ return self._data.names
856
+
857
+ @names.setter
858
+ def names(self, value):
859
+ df_or_series = self._get_df_or_series()
860
+ if df_or_series is not None:
861
+ df_or_series.rename_axis(value, axis=self._axis, inplace=True)
862
+ self.data = df_or_series.axes[self._axis].data
863
+ else:
864
+ self.rename(value, inplace=True)
865
+
866
+ @property
867
+ def values(self):
868
+ return self.to_tensor()
869
+
870
+
870
871
  class RangeIndex(Index):
871
872
  __slots__ = ()
872
873
 
@@ -969,11 +970,12 @@ class BaseSeriesData(HasShapeTileableData, _ToPandasMixin):
969
970
  def refresh_params(self):
970
971
  # refresh params when chunks updated
971
972
  refresh_tileable_shape(self)
972
- refresh_index_value(self)
973
+ fill_chunk_slices(self)
974
+ # refresh_index_value(self)
973
975
  if self._dtype is None:
974
- self._dtype = self.chunks[0].dtype
975
- if self._name is None:
976
- self._name = self.chunks[0].name
976
+ self._dtype = getattr(self.chunks[0], "dtype", None)
977
+ # if self._name is None:
978
+ # self._name = self.chunks[0].name
977
979
 
978
980
  def refresh_from_table_meta(self, table_meta: DataFrameTableMeta) -> None:
979
981
  pass
@@ -1074,6 +1076,12 @@ class SeriesData(_BatchedFetcher, BaseSeriesData):
1074
1076
  into=into
1075
1077
  )
1076
1078
 
1079
+ def to_frame(self, name=None):
1080
+ from . import dataframe_from_tensor
1081
+
1082
+ name = name or self.name or 0
1083
+ return dataframe_from_tensor(self, columns=[name])
1084
+
1077
1085
 
1078
1086
  class Series(HasShapeTileable, _ToPandasMixin):
1079
1087
  __slots__ = ("_cache",)
@@ -1287,10 +1295,7 @@ class Series(HasShapeTileable, _ToPandasMixin):
1287
1295
  1 b
1288
1296
  2 c
1289
1297
  """
1290
- from . import dataframe_from_tensor
1291
-
1292
- name = name or self.name or 0
1293
- return dataframe_from_tensor(self, columns=[name])
1298
+ return self._data.to_frame(name=name)
1294
1299
 
1295
1300
  def between(self, left, right, inclusive="both"):
1296
1301
  """
@@ -1498,8 +1503,8 @@ class BaseDataFrameData(HasShapeTileableData, _ToPandasMixin):
1498
1503
  "shape": self.shape,
1499
1504
  "dtypes": self.dtypes,
1500
1505
  "index_value": self.index_value,
1501
- "columns_value": self.columns_value,
1502
- "dtypes_value": self.dtypes_value,
1506
+ "columns_value": getattr(self, "columns_value", None),
1507
+ "dtypes_value": getattr(self, "dtypes_value", None),
1503
1508
  }
1504
1509
 
1505
1510
  def _set_params(self, new_params: Dict[str, Any]):
@@ -1531,8 +1536,9 @@ class BaseDataFrameData(HasShapeTileableData, _ToPandasMixin):
1531
1536
  def refresh_params(self):
1532
1537
  # refresh params when chunks updated
1533
1538
  refresh_tileable_shape(self)
1534
- refresh_index_value(self)
1535
- refresh_dtypes(self)
1539
+ fill_chunk_slices(self)
1540
+ # refresh_index_value(self)
1541
+ # refresh_dtypes(self)
1536
1542
 
1537
1543
  def refresh_from_dtypes(self, dtypes: pd.Series) -> None:
1538
1544
  self._dtypes = dtypes
@@ -2227,6 +2233,7 @@ class CategoricalData(HasShapeTileableData, _ToPandasMixin):
2227
2233
  def refresh_params(self):
2228
2234
  # refresh params when chunks updated
2229
2235
  refresh_tileable_shape(self)
2236
+ fill_chunk_slices(self)
2230
2237
  if self._dtype is None:
2231
2238
  self._dtype = self.chunks[0].dtype
2232
2239
  if self._categories_value is None:
@@ -13,9 +13,10 @@
13
13
  # limitations under the License.
14
14
 
15
15
  import asyncio
16
- from typing import List, Optional
16
+ from typing import List, MutableMapping, Optional, Union
17
17
 
18
18
  from ...serialization.serializables import Int64Field, StringField
19
+ from ...utils import estimate_pandas_size
19
20
  from ..operators import DataFrameOperator, DataFrameOperatorMixin
20
21
 
21
22
 
@@ -79,3 +80,9 @@ class IncrementalIndexDatasource(HeadOptimizedDataSource):
79
80
  class PandasDataSourceOperator(DataFrameOperator):
80
81
  def get_data(self):
81
82
  return getattr(self, "data", None)
83
+
84
+ @classmethod
85
+ def estimate_size(
86
+ cls, ctx: MutableMapping[str, Union[int, float]], op: "PandasDataSourceOperator"
87
+ ):
88
+ ctx[op.outputs[0].key] = estimate_pandas_size(op.get_data())
@@ -14,6 +14,7 @@
14
14
 
15
15
  import warnings
16
16
  from datetime import date, datetime, time
17
+ from typing import MutableMapping, Union
17
18
 
18
19
  import numpy as np
19
20
  import pandas as pd
@@ -162,6 +163,13 @@ class DataFrameDateRange(DataFrameOperator, DataFrameOperatorMixin):
162
163
  freq=self.freq,
163
164
  )
164
165
 
166
+ @classmethod
167
+ def estimate_size(
168
+ cls, ctx: MutableMapping[str, Union[int, float]], op: "DataFrameDateRange"
169
+ ): # pragma: no cover
170
+ # todo implement this to facilitate local computation
171
+ ctx[op.outputs[0].key] = float("inf")
172
+
165
173
 
166
174
  _midnight = time(0, 0)
167
175
 
@@ -12,7 +12,10 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
+ from typing import List
16
+
15
17
  from ... import opcodes
18
+ from ...core import EntityData
16
19
  from ...serialization.serializables import AnyField, KeyField
17
20
  from ..initializer import Index
18
21
  from ..operators import DataFrameOperator, DataFrameOperatorMixin
@@ -25,11 +28,12 @@ class SeriesFromIndex(DataFrameOperator, DataFrameOperatorMixin):
25
28
  index = KeyField("index")
26
29
  name = AnyField("name", default=None)
27
30
 
28
- def _set_inputs(self, inputs):
29
- super()._set_inputs(inputs)
30
- self.input_ = self._inputs[0]
31
- if len(self._inputs) > 1:
32
- self.index = self._inputs[1]
31
+ @classmethod
32
+ def _set_inputs(cls, op: "SeriesFromIndex", inputs: List[EntityData]):
33
+ super()._set_inputs(op, inputs)
34
+ op.input_ = op._inputs[0]
35
+ if len(op._inputs) > 1:
36
+ op.index = op._inputs[1]
33
37
 
34
38
  def __call__(self, index, new_index=None, name=None):
35
39
  inputs = [index]