maxframe 1.3.1__cp38-cp38-win_amd64.whl → 2.0.0b1__cp38-cp38-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of maxframe might be problematic. Click here for more details.

Files changed (639) hide show
  1. maxframe/_utils.cp38-win_amd64.pyd +0 -0
  2. maxframe/_utils.pyi +21 -0
  3. maxframe/_utils.pyx +4 -3
  4. maxframe/codegen/__init__.py +27 -0
  5. maxframe/{codegen.py → codegen/core.py} +49 -43
  6. maxframe/codegen/spe/__init__.py +16 -0
  7. maxframe/codegen/spe/core.py +307 -0
  8. maxframe/codegen/spe/dataframe/__init__.py +37 -0
  9. maxframe/codegen/spe/dataframe/accessors/__init__.py +15 -0
  10. maxframe/codegen/spe/dataframe/accessors/base.py +53 -0
  11. maxframe/codegen/spe/dataframe/accessors/dict_.py +194 -0
  12. maxframe/codegen/spe/dataframe/accessors/list_.py +80 -0
  13. maxframe/codegen/spe/dataframe/arithmetic.py +84 -0
  14. maxframe/codegen/spe/dataframe/datasource.py +181 -0
  15. maxframe/codegen/spe/dataframe/datastore.py +204 -0
  16. maxframe/codegen/spe/dataframe/extensions.py +63 -0
  17. maxframe/codegen/spe/dataframe/fetch.py +26 -0
  18. maxframe/codegen/spe/dataframe/groupby.py +224 -0
  19. maxframe/codegen/spe/dataframe/indexing.py +238 -0
  20. maxframe/codegen/spe/dataframe/merge.py +73 -0
  21. maxframe/codegen/spe/dataframe/misc.py +286 -0
  22. maxframe/codegen/spe/dataframe/missing.py +64 -0
  23. maxframe/codegen/spe/dataframe/reduction.py +160 -0
  24. maxframe/codegen/spe/dataframe/sort.py +83 -0
  25. maxframe/codegen/spe/dataframe/statistics.py +46 -0
  26. maxframe/codegen/spe/dataframe/tests/__init__.py +13 -0
  27. maxframe/codegen/spe/dataframe/tests/accessors/__init__.py +13 -0
  28. maxframe/codegen/spe/dataframe/tests/accessors/test_base.py +33 -0
  29. maxframe/codegen/spe/dataframe/tests/accessors/test_dict.py +310 -0
  30. maxframe/codegen/spe/dataframe/tests/accessors/test_list.py +137 -0
  31. maxframe/codegen/spe/dataframe/tests/indexing/__init__.py +13 -0
  32. maxframe/codegen/spe/dataframe/tests/indexing/conftest.py +58 -0
  33. maxframe/codegen/spe/dataframe/tests/indexing/test_getitem.py +124 -0
  34. maxframe/codegen/spe/dataframe/tests/indexing/test_iloc.py +76 -0
  35. maxframe/codegen/spe/dataframe/tests/indexing/test_indexing.py +39 -0
  36. maxframe/codegen/spe/dataframe/tests/indexing/test_rename.py +51 -0
  37. maxframe/codegen/spe/dataframe/tests/indexing/test_reset_index.py +88 -0
  38. maxframe/codegen/spe/dataframe/tests/indexing/test_sample.py +45 -0
  39. maxframe/codegen/spe/dataframe/tests/indexing/test_set_axis.py +45 -0
  40. maxframe/codegen/spe/dataframe/tests/indexing/test_set_index.py +41 -0
  41. maxframe/codegen/spe/dataframe/tests/indexing/test_setitem.py +46 -0
  42. maxframe/codegen/spe/dataframe/tests/misc/__init__.py +13 -0
  43. maxframe/codegen/spe/dataframe/tests/misc/test_apply.py +133 -0
  44. maxframe/codegen/spe/dataframe/tests/misc/test_drop_duplicates.py +92 -0
  45. maxframe/codegen/spe/dataframe/tests/misc/test_misc.py +234 -0
  46. maxframe/codegen/spe/dataframe/tests/missing/__init__.py +13 -0
  47. maxframe/codegen/spe/dataframe/tests/missing/test_checkna.py +94 -0
  48. maxframe/codegen/spe/dataframe/tests/missing/test_dropna.py +50 -0
  49. maxframe/codegen/spe/dataframe/tests/missing/test_fillna.py +94 -0
  50. maxframe/codegen/spe/dataframe/tests/missing/test_replace.py +45 -0
  51. maxframe/codegen/spe/dataframe/tests/test_arithmetic.py +73 -0
  52. maxframe/codegen/spe/dataframe/tests/test_datasource.py +184 -0
  53. maxframe/codegen/spe/dataframe/tests/test_datastore.py +200 -0
  54. maxframe/codegen/spe/dataframe/tests/test_extensions.py +88 -0
  55. maxframe/codegen/spe/dataframe/tests/test_groupby.py +225 -0
  56. maxframe/codegen/spe/dataframe/tests/test_merge.py +400 -0
  57. maxframe/codegen/spe/dataframe/tests/test_reduction.py +104 -0
  58. maxframe/codegen/spe/dataframe/tests/test_sort.py +159 -0
  59. maxframe/codegen/spe/dataframe/tests/test_statistics.py +70 -0
  60. maxframe/codegen/spe/dataframe/tests/test_tseries.py +29 -0
  61. maxframe/codegen/spe/dataframe/tests/test_value_counts.py +60 -0
  62. maxframe/codegen/spe/dataframe/tests/test_window.py +69 -0
  63. maxframe/codegen/spe/dataframe/tseries.py +46 -0
  64. maxframe/codegen/spe/dataframe/udf.py +62 -0
  65. maxframe/codegen/spe/dataframe/value_counts.py +31 -0
  66. maxframe/codegen/spe/dataframe/window.py +65 -0
  67. maxframe/codegen/spe/learn/__init__.py +15 -0
  68. maxframe/codegen/spe/learn/contrib/__init__.py +15 -0
  69. maxframe/codegen/spe/learn/contrib/lightgbm.py +160 -0
  70. maxframe/codegen/spe/learn/contrib/models.py +41 -0
  71. maxframe/codegen/spe/learn/contrib/pytorch.py +49 -0
  72. maxframe/codegen/spe/learn/contrib/tests/__init__.py +13 -0
  73. maxframe/codegen/spe/learn/contrib/tests/test_lightgbm.py +123 -0
  74. maxframe/codegen/spe/learn/contrib/tests/test_models.py +41 -0
  75. maxframe/codegen/spe/learn/contrib/tests/test_pytorch.py +53 -0
  76. maxframe/codegen/spe/learn/contrib/tests/test_xgboost.py +98 -0
  77. maxframe/codegen/spe/learn/contrib/xgboost.py +152 -0
  78. maxframe/codegen/spe/learn/metrics/__init__.py +15 -0
  79. maxframe/codegen/spe/learn/metrics/_classification.py +120 -0
  80. maxframe/codegen/spe/learn/metrics/tests/__init__.py +13 -0
  81. maxframe/codegen/spe/learn/metrics/tests/test_classification.py +93 -0
  82. maxframe/codegen/spe/learn/model_selection/__init__.py +13 -0
  83. maxframe/codegen/spe/learn/model_selection/tests/__init__.py +13 -0
  84. maxframe/codegen/spe/learn/model_selection/tests/test_split.py +41 -0
  85. maxframe/codegen/spe/learn/preprocessing/__init__.py +15 -0
  86. maxframe/codegen/spe/learn/preprocessing/_data.py +37 -0
  87. maxframe/codegen/spe/learn/preprocessing/_label.py +47 -0
  88. maxframe/codegen/spe/learn/preprocessing/tests/__init__.py +13 -0
  89. maxframe/codegen/spe/learn/preprocessing/tests/test_data.py +31 -0
  90. maxframe/codegen/spe/learn/preprocessing/tests/test_label.py +43 -0
  91. maxframe/codegen/spe/learn/utils/__init__.py +15 -0
  92. maxframe/codegen/spe/learn/utils/checks.py +55 -0
  93. maxframe/codegen/spe/learn/utils/multiclass.py +60 -0
  94. maxframe/codegen/spe/learn/utils/shuffle.py +85 -0
  95. maxframe/codegen/spe/learn/utils/sparsefuncs.py +35 -0
  96. maxframe/codegen/spe/learn/utils/tests/__init__.py +13 -0
  97. maxframe/codegen/spe/learn/utils/tests/test_checks.py +48 -0
  98. maxframe/codegen/spe/learn/utils/tests/test_multiclass.py +52 -0
  99. maxframe/codegen/spe/learn/utils/tests/test_shuffle.py +50 -0
  100. maxframe/codegen/spe/learn/utils/tests/test_sparsefuncs.py +34 -0
  101. maxframe/codegen/spe/learn/utils/tests/test_validation.py +44 -0
  102. maxframe/codegen/spe/learn/utils/validation.py +35 -0
  103. maxframe/codegen/spe/objects.py +26 -0
  104. maxframe/codegen/spe/remote.py +29 -0
  105. maxframe/codegen/spe/tensor/__init__.py +28 -0
  106. maxframe/codegen/spe/tensor/arithmetic.py +95 -0
  107. maxframe/codegen/spe/tensor/core.py +41 -0
  108. maxframe/codegen/spe/tensor/datasource.py +165 -0
  109. maxframe/codegen/spe/tensor/extensions.py +35 -0
  110. maxframe/codegen/spe/tensor/fetch.py +26 -0
  111. maxframe/codegen/spe/tensor/indexing.py +63 -0
  112. maxframe/codegen/spe/tensor/linalg.py +63 -0
  113. maxframe/codegen/spe/tensor/merge.py +31 -0
  114. maxframe/codegen/spe/tensor/misc.py +121 -0
  115. maxframe/codegen/spe/tensor/random.py +29 -0
  116. maxframe/codegen/spe/tensor/reduction.py +39 -0
  117. maxframe/codegen/spe/tensor/reshape.py +26 -0
  118. maxframe/codegen/spe/tensor/sort.py +42 -0
  119. maxframe/codegen/spe/tensor/special.py +35 -0
  120. maxframe/codegen/spe/tensor/statistics.py +24 -0
  121. maxframe/codegen/spe/tensor/tests/__init__.py +13 -0
  122. maxframe/codegen/spe/tensor/tests/test_arithmetic.py +103 -0
  123. maxframe/codegen/spe/tensor/tests/test_datasource.py +99 -0
  124. maxframe/codegen/spe/tensor/tests/test_extensions.py +37 -0
  125. maxframe/codegen/spe/tensor/tests/test_indexing.py +44 -0
  126. maxframe/codegen/spe/tensor/tests/test_linalg.py +38 -0
  127. maxframe/codegen/spe/tensor/tests/test_merge.py +28 -0
  128. maxframe/codegen/spe/tensor/tests/test_misc.py +94 -0
  129. maxframe/codegen/spe/tensor/tests/test_random.py +55 -0
  130. maxframe/codegen/spe/tensor/tests/test_reduction.py +65 -0
  131. maxframe/codegen/spe/tensor/tests/test_reshape.py +39 -0
  132. maxframe/codegen/spe/tensor/tests/test_sort.py +49 -0
  133. maxframe/codegen/spe/tensor/tests/test_special.py +28 -0
  134. maxframe/codegen/spe/tensor/tests/test_statistics.py +29 -0
  135. maxframe/codegen/spe/tests/__init__.py +13 -0
  136. maxframe/codegen/spe/tests/test_remote.py +29 -0
  137. maxframe/codegen/spe/tests/test_spe_codegen.py +141 -0
  138. maxframe/codegen/spe/utils.py +54 -0
  139. maxframe/codegen/tests/__init__.py +13 -0
  140. maxframe/{tests → codegen/tests}/test_codegen.py +3 -5
  141. maxframe/config/__init__.py +1 -1
  142. maxframe/config/config.py +50 -23
  143. maxframe/config/tests/test_config.py +4 -12
  144. maxframe/config/validators.py +5 -0
  145. maxframe/conftest.py +38 -10
  146. maxframe/core/__init__.py +1 -0
  147. maxframe/core/context.py +110 -0
  148. maxframe/core/entity/__init__.py +1 -0
  149. maxframe/core/entity/core.py +0 -7
  150. maxframe/core/entity/objects.py +19 -5
  151. maxframe/core/entity/output_types.py +11 -0
  152. maxframe/core/entity/tests/test_objects.py +11 -12
  153. maxframe/core/entity/tileables.py +3 -1
  154. maxframe/core/entity/utils.py +15 -0
  155. maxframe/core/graph/__init__.py +6 -1
  156. maxframe/core/graph/builder/base.py +5 -1
  157. maxframe/core/graph/core.cp38-win_amd64.pyd +0 -0
  158. maxframe/core/graph/core.pyx +17 -6
  159. maxframe/core/graph/entity.py +18 -6
  160. maxframe/core/operator/__init__.py +8 -3
  161. maxframe/core/operator/base.py +35 -12
  162. maxframe/core/operator/core.py +37 -14
  163. maxframe/core/operator/fetch.py +5 -18
  164. maxframe/core/operator/objects.py +0 -20
  165. maxframe/core/operator/shuffle.py +6 -72
  166. maxframe/dataframe/__init__.py +1 -0
  167. maxframe/dataframe/accessors/datetime_/core.py +7 -4
  168. maxframe/dataframe/accessors/string_/core.py +9 -6
  169. maxframe/dataframe/arithmetic/core.py +31 -20
  170. maxframe/dataframe/arithmetic/tests/test_arithmetic.py +6 -0
  171. maxframe/dataframe/core.py +98 -91
  172. maxframe/dataframe/datasource/core.py +8 -1
  173. maxframe/dataframe/datasource/date_range.py +8 -0
  174. maxframe/dataframe/datasource/from_index.py +9 -5
  175. maxframe/dataframe/datasource/from_records.py +9 -2
  176. maxframe/dataframe/datasource/from_tensor.py +32 -21
  177. maxframe/dataframe/datasource/read_csv.py +8 -2
  178. maxframe/dataframe/datasource/read_odps_query.py +33 -3
  179. maxframe/dataframe/datasource/read_odps_table.py +20 -5
  180. maxframe/dataframe/datasource/read_parquet.py +8 -3
  181. maxframe/dataframe/datasource/tests/test_datasource.py +33 -0
  182. maxframe/dataframe/datastore/tests/test_to_odps.py +52 -1
  183. maxframe/dataframe/datastore/to_csv.py +7 -3
  184. maxframe/dataframe/datastore/to_odps.py +42 -6
  185. maxframe/dataframe/extensions/__init__.py +6 -1
  186. maxframe/dataframe/extensions/apply_chunk.py +96 -136
  187. maxframe/dataframe/extensions/flatjson.py +3 -2
  188. maxframe/dataframe/extensions/flatmap.py +15 -7
  189. maxframe/dataframe/fetch/core.py +12 -1
  190. maxframe/dataframe/groupby/__init__.py +7 -0
  191. maxframe/dataframe/groupby/aggregation.py +9 -8
  192. maxframe/dataframe/groupby/apply.py +50 -74
  193. maxframe/dataframe/groupby/apply_chunk.py +393 -0
  194. maxframe/dataframe/groupby/core.py +80 -17
  195. maxframe/dataframe/groupby/extensions.py +26 -0
  196. maxframe/dataframe/groupby/fill.py +9 -4
  197. maxframe/dataframe/groupby/sample.py +7 -7
  198. maxframe/dataframe/groupby/tests/test_groupby.py +3 -3
  199. maxframe/dataframe/groupby/transform.py +57 -54
  200. maxframe/dataframe/indexing/align.py +7 -6
  201. maxframe/dataframe/indexing/getitem.py +9 -8
  202. maxframe/dataframe/indexing/iloc.py +28 -23
  203. maxframe/dataframe/indexing/insert.py +7 -3
  204. maxframe/dataframe/indexing/loc.py +9 -8
  205. maxframe/dataframe/indexing/reindex.py +36 -30
  206. maxframe/dataframe/indexing/rename_axis.py +18 -10
  207. maxframe/dataframe/indexing/reset_index.py +0 -2
  208. maxframe/dataframe/indexing/sample.py +13 -9
  209. maxframe/dataframe/indexing/set_axis.py +9 -6
  210. maxframe/dataframe/indexing/setitem.py +8 -5
  211. maxframe/dataframe/indexing/where.py +12 -9
  212. maxframe/dataframe/merge/__init__.py +0 -1
  213. maxframe/dataframe/merge/concat.py +10 -31
  214. maxframe/dataframe/merge/merge.py +2 -24
  215. maxframe/dataframe/misc/__init__.py +6 -0
  216. maxframe/dataframe/misc/_duplicate.py +7 -3
  217. maxframe/dataframe/misc/apply.py +106 -139
  218. maxframe/dataframe/misc/astype.py +3 -2
  219. maxframe/dataframe/misc/case_when.py +11 -7
  220. maxframe/dataframe/misc/cut.py +11 -10
  221. maxframe/dataframe/misc/describe.py +7 -3
  222. maxframe/dataframe/misc/drop.py +13 -11
  223. maxframe/dataframe/misc/eval.py +0 -2
  224. maxframe/dataframe/misc/get_dummies.py +78 -49
  225. maxframe/dataframe/misc/isin.py +13 -10
  226. maxframe/dataframe/misc/map.py +21 -6
  227. maxframe/dataframe/misc/melt.py +8 -1
  228. maxframe/dataframe/misc/pivot.py +232 -0
  229. maxframe/dataframe/misc/pivot_table.py +52 -40
  230. maxframe/dataframe/misc/rechunk.py +59 -0
  231. maxframe/dataframe/misc/shift.py +7 -4
  232. maxframe/dataframe/misc/stack.py +5 -3
  233. maxframe/dataframe/misc/tests/test_misc.py +167 -1
  234. maxframe/dataframe/misc/transform.py +63 -65
  235. maxframe/dataframe/misc/value_counts.py +7 -4
  236. maxframe/dataframe/missing/dropna.py +16 -7
  237. maxframe/dataframe/missing/fillna.py +18 -10
  238. maxframe/dataframe/missing/replace.py +10 -6
  239. maxframe/dataframe/missing/tests/test_missing.py +2 -2
  240. maxframe/dataframe/operators.py +1 -27
  241. maxframe/dataframe/reduction/aggregation.py +65 -3
  242. maxframe/dataframe/reduction/core.py +3 -1
  243. maxframe/dataframe/reduction/median.py +1 -1
  244. maxframe/dataframe/reduction/tests/test_reduction.py +33 -0
  245. maxframe/dataframe/reduction/unique.py +53 -7
  246. maxframe/dataframe/statistics/corr.py +9 -6
  247. maxframe/dataframe/statistics/quantile.py +9 -6
  248. maxframe/dataframe/tseries/to_datetime.py +6 -4
  249. maxframe/dataframe/utils.py +219 -31
  250. maxframe/dataframe/window/rolling.py +7 -4
  251. maxframe/env.py +1 -0
  252. maxframe/errors.py +9 -0
  253. maxframe/extension.py +13 -2
  254. maxframe/io/objects/core.py +67 -51
  255. maxframe/io/objects/tensor.py +73 -17
  256. maxframe/io/objects/tests/test_object_io.py +8 -55
  257. maxframe/io/odpsio/arrow.py +15 -2
  258. maxframe/io/odpsio/schema.py +43 -13
  259. maxframe/io/odpsio/tableio.py +63 -11
  260. maxframe/io/odpsio/tests/test_arrow.py +1 -2
  261. maxframe/io/odpsio/tests/test_schema.py +114 -1
  262. maxframe/io/odpsio/tests/test_tableio.py +42 -0
  263. maxframe/io/odpsio/tests/test_volumeio.py +22 -48
  264. maxframe/learn/__init__.py +2 -2
  265. maxframe/learn/contrib/__init__.py +2 -2
  266. maxframe/learn/contrib/graph/connected_components.py +2 -1
  267. maxframe/learn/contrib/lightgbm/__init__.py +33 -0
  268. maxframe/learn/contrib/lightgbm/_predict.py +138 -0
  269. maxframe/learn/contrib/lightgbm/_train.py +163 -0
  270. maxframe/learn/contrib/lightgbm/callback.py +114 -0
  271. maxframe/learn/contrib/lightgbm/classifier.py +199 -0
  272. maxframe/learn/contrib/lightgbm/core.py +372 -0
  273. maxframe/learn/contrib/lightgbm/dataset.py +153 -0
  274. maxframe/learn/contrib/lightgbm/regressor.py +29 -0
  275. maxframe/learn/contrib/lightgbm/tests/__init__.py +13 -0
  276. maxframe/learn/contrib/lightgbm/tests/test_callback.py +58 -0
  277. maxframe/learn/contrib/models.py +38 -9
  278. maxframe/learn/contrib/utils.py +55 -0
  279. maxframe/learn/contrib/xgboost/callback.py +86 -0
  280. maxframe/learn/contrib/xgboost/classifier.py +26 -30
  281. maxframe/learn/contrib/xgboost/core.py +53 -42
  282. maxframe/learn/contrib/xgboost/dmatrix.py +19 -12
  283. maxframe/learn/contrib/xgboost/predict.py +16 -9
  284. maxframe/learn/contrib/xgboost/regressor.py +28 -27
  285. maxframe/learn/contrib/xgboost/tests/test_callback.py +41 -0
  286. maxframe/learn/contrib/xgboost/train.py +59 -16
  287. maxframe/learn/core.py +252 -0
  288. maxframe/learn/datasets/__init__.py +20 -0
  289. maxframe/learn/datasets/samples_generator.py +628 -0
  290. maxframe/learn/linear_model/__init__.py +15 -0
  291. maxframe/learn/linear_model/_base.py +163 -0
  292. maxframe/learn/linear_model/_lin_reg.py +175 -0
  293. maxframe/learn/metrics/__init__.py +25 -0
  294. maxframe/learn/metrics/_check_targets.py +95 -0
  295. maxframe/learn/metrics/_classification.py +1121 -0
  296. maxframe/learn/metrics/_regression.py +256 -0
  297. maxframe/learn/model_selection/__init__.py +15 -0
  298. maxframe/learn/model_selection/_split.py +451 -0
  299. maxframe/learn/model_selection/tests/__init__.py +13 -0
  300. maxframe/learn/model_selection/tests/test_split.py +156 -0
  301. maxframe/learn/preprocessing/__init__.py +16 -0
  302. maxframe/learn/preprocessing/_data/__init__.py +17 -0
  303. maxframe/learn/preprocessing/_data/min_max_scaler.py +390 -0
  304. maxframe/learn/preprocessing/_data/normalize.py +127 -0
  305. maxframe/learn/preprocessing/_data/standard_scaler.py +503 -0
  306. maxframe/learn/preprocessing/_data/utils.py +79 -0
  307. maxframe/learn/preprocessing/_label/__init__.py +16 -0
  308. maxframe/learn/preprocessing/_label/_label_binarizer.py +599 -0
  309. maxframe/learn/preprocessing/_label/_label_encoder.py +174 -0
  310. maxframe/learn/utils/__init__.py +4 -0
  311. maxframe/learn/utils/_encode.py +314 -0
  312. maxframe/learn/utils/checks.py +161 -0
  313. maxframe/learn/utils/core.py +33 -0
  314. maxframe/learn/utils/extmath.py +176 -0
  315. maxframe/learn/utils/multiclass.py +292 -0
  316. maxframe/learn/utils/shuffle.py +114 -0
  317. maxframe/learn/utils/sparsefuncs.py +87 -0
  318. maxframe/learn/utils/validation.py +775 -0
  319. maxframe/lib/__init__.py +0 -2
  320. maxframe/lib/compat.py +145 -0
  321. maxframe/lib/filesystem/_oss_lib/glob.py +1 -1
  322. maxframe/lib/mmh3.cp38-win_amd64.pyd +0 -0
  323. maxframe/lib/sparse/__init__.py +10 -15
  324. maxframe/lib/sparse/array.py +45 -33
  325. maxframe/lib/sparse/core.py +0 -2
  326. maxframe/lib/sparse/linalg.py +31 -0
  327. maxframe/lib/sparse/matrix.py +5 -2
  328. maxframe/lib/sparse/tests/__init__.py +0 -2
  329. maxframe/lib/sparse/tests/test_sparse.py +53 -53
  330. maxframe/lib/sparse/vector.py +0 -2
  331. maxframe/mixin.py +59 -2
  332. maxframe/opcodes.py +13 -5
  333. maxframe/protocol.py +67 -14
  334. maxframe/remote/core.py +16 -14
  335. maxframe/remote/run_script.py +6 -3
  336. maxframe/serialization/__init__.py +2 -0
  337. maxframe/serialization/core.cp38-win_amd64.pyd +0 -0
  338. maxframe/serialization/core.pxd +3 -0
  339. maxframe/serialization/core.pyi +3 -1
  340. maxframe/serialization/core.pyx +82 -4
  341. maxframe/serialization/pandas.py +5 -1
  342. maxframe/serialization/serializables/core.py +6 -5
  343. maxframe/serialization/serializables/field.py +2 -2
  344. maxframe/serialization/serializables/tests/test_field_type.py +3 -5
  345. maxframe/serialization/tests/test_serial.py +27 -0
  346. maxframe/session.py +4 -71
  347. maxframe/sperunner.py +165 -0
  348. maxframe/tensor/__init__.py +35 -2
  349. maxframe/tensor/arithmetic/__init__.py +2 -4
  350. maxframe/tensor/arithmetic/abs.py +0 -2
  351. maxframe/tensor/arithmetic/absolute.py +0 -2
  352. maxframe/tensor/arithmetic/add.py +34 -4
  353. maxframe/tensor/arithmetic/angle.py +0 -2
  354. maxframe/tensor/arithmetic/arccos.py +1 -4
  355. maxframe/tensor/arithmetic/arccosh.py +1 -3
  356. maxframe/tensor/arithmetic/arcsin.py +0 -2
  357. maxframe/tensor/arithmetic/arcsinh.py +0 -2
  358. maxframe/tensor/arithmetic/arctan.py +0 -2
  359. maxframe/tensor/arithmetic/arctan2.py +0 -2
  360. maxframe/tensor/arithmetic/arctanh.py +0 -2
  361. maxframe/tensor/arithmetic/around.py +0 -2
  362. maxframe/tensor/arithmetic/bitand.py +0 -2
  363. maxframe/tensor/arithmetic/bitor.py +1 -3
  364. maxframe/tensor/arithmetic/bitxor.py +1 -3
  365. maxframe/tensor/arithmetic/cbrt.py +0 -2
  366. maxframe/tensor/arithmetic/ceil.py +0 -2
  367. maxframe/tensor/arithmetic/clip.py +13 -13
  368. maxframe/tensor/arithmetic/conj.py +0 -2
  369. maxframe/tensor/arithmetic/copysign.py +0 -2
  370. maxframe/tensor/arithmetic/core.py +47 -39
  371. maxframe/tensor/arithmetic/cos.py +1 -3
  372. maxframe/tensor/arithmetic/cosh.py +0 -2
  373. maxframe/tensor/arithmetic/deg2rad.py +0 -2
  374. maxframe/tensor/arithmetic/degrees.py +0 -2
  375. maxframe/tensor/arithmetic/divide.py +0 -2
  376. maxframe/tensor/arithmetic/equal.py +0 -2
  377. maxframe/tensor/arithmetic/exp.py +1 -3
  378. maxframe/tensor/arithmetic/exp2.py +0 -2
  379. maxframe/tensor/arithmetic/expm1.py +0 -2
  380. maxframe/tensor/arithmetic/fabs.py +0 -2
  381. maxframe/tensor/arithmetic/fix.py +0 -2
  382. maxframe/tensor/arithmetic/float_power.py +0 -2
  383. maxframe/tensor/arithmetic/floor.py +0 -2
  384. maxframe/tensor/arithmetic/floordiv.py +0 -2
  385. maxframe/tensor/arithmetic/fmax.py +0 -2
  386. maxframe/tensor/arithmetic/fmin.py +0 -2
  387. maxframe/tensor/arithmetic/fmod.py +0 -2
  388. maxframe/tensor/arithmetic/frexp.py +6 -2
  389. maxframe/tensor/arithmetic/greater.py +0 -2
  390. maxframe/tensor/arithmetic/greater_equal.py +0 -2
  391. maxframe/tensor/arithmetic/hypot.py +0 -2
  392. maxframe/tensor/arithmetic/i0.py +1 -3
  393. maxframe/tensor/arithmetic/imag.py +0 -2
  394. maxframe/tensor/arithmetic/invert.py +1 -3
  395. maxframe/tensor/arithmetic/isclose.py +0 -2
  396. maxframe/tensor/arithmetic/iscomplex.py +0 -2
  397. maxframe/tensor/arithmetic/isfinite.py +1 -3
  398. maxframe/tensor/arithmetic/isinf.py +0 -2
  399. maxframe/tensor/arithmetic/isnan.py +0 -2
  400. maxframe/tensor/arithmetic/isreal.py +0 -2
  401. maxframe/tensor/arithmetic/ldexp.py +0 -2
  402. maxframe/tensor/arithmetic/less.py +0 -2
  403. maxframe/tensor/arithmetic/less_equal.py +0 -2
  404. maxframe/tensor/arithmetic/log.py +1 -3
  405. maxframe/tensor/arithmetic/log10.py +1 -3
  406. maxframe/tensor/arithmetic/log1p.py +1 -3
  407. maxframe/tensor/arithmetic/log2.py +1 -3
  408. maxframe/tensor/arithmetic/logaddexp.py +0 -2
  409. maxframe/tensor/arithmetic/logaddexp2.py +0 -2
  410. maxframe/tensor/arithmetic/logical_and.py +0 -2
  411. maxframe/tensor/arithmetic/logical_not.py +1 -3
  412. maxframe/tensor/arithmetic/logical_or.py +0 -2
  413. maxframe/tensor/arithmetic/logical_xor.py +0 -2
  414. maxframe/tensor/arithmetic/lshift.py +0 -2
  415. maxframe/tensor/arithmetic/maximum.py +0 -2
  416. maxframe/tensor/arithmetic/minimum.py +0 -2
  417. maxframe/tensor/arithmetic/mod.py +0 -2
  418. maxframe/tensor/arithmetic/modf.py +6 -2
  419. maxframe/tensor/arithmetic/multiply.py +37 -4
  420. maxframe/tensor/arithmetic/nan_to_num.py +0 -2
  421. maxframe/tensor/arithmetic/negative.py +0 -2
  422. maxframe/tensor/arithmetic/nextafter.py +0 -2
  423. maxframe/tensor/arithmetic/not_equal.py +0 -2
  424. maxframe/tensor/arithmetic/positive.py +0 -2
  425. maxframe/tensor/arithmetic/power.py +0 -2
  426. maxframe/tensor/arithmetic/rad2deg.py +0 -2
  427. maxframe/tensor/arithmetic/radians.py +0 -2
  428. maxframe/tensor/arithmetic/real.py +0 -2
  429. maxframe/tensor/arithmetic/reciprocal.py +5 -3
  430. maxframe/tensor/arithmetic/rint.py +1 -3
  431. maxframe/tensor/arithmetic/rshift.py +0 -2
  432. maxframe/tensor/arithmetic/setimag.py +0 -2
  433. maxframe/tensor/arithmetic/setreal.py +0 -2
  434. maxframe/tensor/arithmetic/sign.py +0 -2
  435. maxframe/tensor/arithmetic/signbit.py +0 -2
  436. maxframe/tensor/arithmetic/sin.py +0 -2
  437. maxframe/tensor/arithmetic/sinc.py +1 -3
  438. maxframe/tensor/arithmetic/sinh.py +0 -2
  439. maxframe/tensor/arithmetic/spacing.py +0 -2
  440. maxframe/tensor/arithmetic/sqrt.py +0 -2
  441. maxframe/tensor/arithmetic/square.py +0 -2
  442. maxframe/tensor/arithmetic/subtract.py +4 -2
  443. maxframe/tensor/arithmetic/tan.py +0 -2
  444. maxframe/tensor/arithmetic/tanh.py +0 -2
  445. maxframe/tensor/arithmetic/tests/__init__.py +0 -2
  446. maxframe/tensor/arithmetic/tests/test_arithmetic.py +43 -9
  447. maxframe/tensor/arithmetic/truediv.py +0 -2
  448. maxframe/tensor/arithmetic/trunc.py +0 -2
  449. maxframe/tensor/arithmetic/utils.py +32 -6
  450. maxframe/tensor/array_utils.py +3 -25
  451. maxframe/tensor/core.py +6 -6
  452. maxframe/tensor/datasource/__init__.py +10 -2
  453. maxframe/tensor/datasource/arange.py +0 -2
  454. maxframe/tensor/datasource/array.py +3 -22
  455. maxframe/tensor/datasource/core.py +15 -10
  456. maxframe/tensor/datasource/diag.py +140 -0
  457. maxframe/tensor/datasource/diagflat.py +69 -0
  458. maxframe/tensor/datasource/empty.py +0 -2
  459. maxframe/tensor/datasource/eye.py +95 -0
  460. maxframe/tensor/datasource/from_dataframe.py +0 -2
  461. maxframe/tensor/datasource/from_dense.py +0 -17
  462. maxframe/tensor/datasource/from_sparse.py +0 -2
  463. maxframe/tensor/datasource/full.py +0 -2
  464. maxframe/tensor/datasource/identity.py +54 -0
  465. maxframe/tensor/datasource/indices.py +115 -0
  466. maxframe/tensor/datasource/linspace.py +140 -0
  467. maxframe/tensor/datasource/meshgrid.py +135 -0
  468. maxframe/tensor/datasource/ones.py +8 -3
  469. maxframe/tensor/datasource/tests/test_datasource.py +32 -1
  470. maxframe/tensor/datasource/tri_array.py +107 -0
  471. maxframe/tensor/datasource/zeros.py +7 -3
  472. maxframe/tensor/extensions/__init__.py +31 -0
  473. maxframe/tensor/extensions/accessor.py +25 -0
  474. maxframe/tensor/extensions/apply_chunk.py +137 -0
  475. maxframe/tensor/indexing/__init__.py +1 -1
  476. maxframe/tensor/indexing/choose.py +8 -6
  477. maxframe/tensor/indexing/compress.py +0 -2
  478. maxframe/tensor/indexing/extract.py +0 -2
  479. maxframe/tensor/indexing/fill_diagonal.py +9 -6
  480. maxframe/tensor/indexing/flatnonzero.py +1 -3
  481. maxframe/tensor/indexing/getitem.py +10 -43
  482. maxframe/tensor/indexing/nonzero.py +2 -4
  483. maxframe/tensor/indexing/setitem.py +19 -9
  484. maxframe/tensor/indexing/slice.py +6 -3
  485. maxframe/tensor/indexing/take.py +0 -2
  486. maxframe/tensor/indexing/tests/__init__.py +0 -2
  487. maxframe/tensor/indexing/tests/test_indexing.py +0 -2
  488. maxframe/tensor/indexing/unravel_index.py +6 -6
  489. maxframe/tensor/lib/__init__.py +16 -0
  490. maxframe/tensor/lib/index_tricks.py +404 -0
  491. maxframe/tensor/linalg/__init__.py +36 -0
  492. maxframe/tensor/linalg/dot.py +145 -0
  493. maxframe/tensor/linalg/inner.py +36 -0
  494. maxframe/tensor/linalg/inv.py +83 -0
  495. maxframe/tensor/linalg/lu.py +115 -0
  496. maxframe/tensor/linalg/matmul.py +225 -0
  497. maxframe/tensor/linalg/qr.py +124 -0
  498. maxframe/tensor/linalg/solve_triangular.py +103 -0
  499. maxframe/tensor/linalg/svd.py +167 -0
  500. maxframe/tensor/linalg/tensordot.py +213 -0
  501. maxframe/tensor/linalg/vdot.py +73 -0
  502. maxframe/tensor/merge/__init__.py +4 -0
  503. maxframe/tensor/merge/append.py +74 -0
  504. maxframe/tensor/merge/column_stack.py +63 -0
  505. maxframe/tensor/merge/concatenate.py +3 -2
  506. maxframe/tensor/merge/dstack.py +71 -0
  507. maxframe/tensor/merge/hstack.py +70 -0
  508. maxframe/tensor/merge/stack.py +0 -2
  509. maxframe/tensor/merge/tests/test_merge.py +0 -2
  510. maxframe/tensor/misc/__init__.py +18 -5
  511. maxframe/tensor/misc/astype.py +10 -8
  512. maxframe/tensor/misc/broadcast_to.py +1 -1
  513. maxframe/tensor/misc/copy.py +64 -0
  514. maxframe/tensor/misc/diff.py +115 -0
  515. maxframe/tensor/misc/flatten.py +63 -0
  516. maxframe/tensor/misc/in1d.py +94 -0
  517. maxframe/tensor/misc/isin.py +130 -0
  518. maxframe/tensor/misc/ndim.py +53 -0
  519. maxframe/tensor/misc/ravel.py +0 -2
  520. maxframe/tensor/misc/repeat.py +129 -0
  521. maxframe/tensor/misc/searchsorted.py +147 -0
  522. maxframe/tensor/misc/setdiff1d.py +58 -0
  523. maxframe/tensor/misc/squeeze.py +117 -0
  524. maxframe/tensor/misc/swapaxes.py +113 -0
  525. maxframe/tensor/misc/tests/test_misc.py +0 -2
  526. maxframe/tensor/misc/transpose.py +8 -4
  527. maxframe/tensor/misc/trapezoid.py +123 -0
  528. maxframe/tensor/misc/unique.py +0 -1
  529. maxframe/tensor/misc/where.py +10 -8
  530. maxframe/tensor/operators.py +0 -34
  531. maxframe/tensor/random/__init__.py +3 -5
  532. maxframe/tensor/random/binomial.py +0 -2
  533. maxframe/tensor/random/bytes.py +0 -2
  534. maxframe/tensor/random/chisquare.py +0 -2
  535. maxframe/tensor/random/choice.py +9 -8
  536. maxframe/tensor/random/core.py +20 -5
  537. maxframe/tensor/random/dirichlet.py +0 -2
  538. maxframe/tensor/random/exponential.py +0 -2
  539. maxframe/tensor/random/f.py +2 -4
  540. maxframe/tensor/random/gamma.py +0 -2
  541. maxframe/tensor/random/geometric.py +0 -2
  542. maxframe/tensor/random/gumbel.py +0 -2
  543. maxframe/tensor/random/hypergeometric.py +0 -2
  544. maxframe/tensor/random/laplace.py +2 -4
  545. maxframe/tensor/random/logistic.py +0 -2
  546. maxframe/tensor/random/lognormal.py +0 -2
  547. maxframe/tensor/random/logseries.py +0 -2
  548. maxframe/tensor/random/multinomial.py +0 -2
  549. maxframe/tensor/random/multivariate_normal.py +0 -2
  550. maxframe/tensor/random/negative_binomial.py +0 -2
  551. maxframe/tensor/random/noncentral_chisquare.py +0 -2
  552. maxframe/tensor/random/noncentral_f.py +1 -3
  553. maxframe/tensor/random/normal.py +0 -2
  554. maxframe/tensor/random/pareto.py +0 -2
  555. maxframe/tensor/random/permutation.py +6 -3
  556. maxframe/tensor/random/poisson.py +0 -2
  557. maxframe/tensor/random/power.py +0 -2
  558. maxframe/tensor/random/rand.py +0 -2
  559. maxframe/tensor/random/randint.py +0 -2
  560. maxframe/tensor/random/randn.py +0 -2
  561. maxframe/tensor/random/random_integers.py +0 -2
  562. maxframe/tensor/random/random_sample.py +0 -2
  563. maxframe/tensor/random/rayleigh.py +0 -2
  564. maxframe/tensor/random/standard_cauchy.py +0 -2
  565. maxframe/tensor/random/standard_exponential.py +0 -2
  566. maxframe/tensor/random/standard_gamma.py +0 -2
  567. maxframe/tensor/random/standard_normal.py +0 -2
  568. maxframe/tensor/random/standard_t.py +0 -2
  569. maxframe/tensor/random/tests/__init__.py +0 -2
  570. maxframe/tensor/random/tests/test_random.py +0 -2
  571. maxframe/tensor/random/triangular.py +0 -2
  572. maxframe/tensor/random/uniform.py +0 -2
  573. maxframe/tensor/random/vonmises.py +0 -2
  574. maxframe/tensor/random/wald.py +0 -2
  575. maxframe/tensor/random/weibull.py +0 -2
  576. maxframe/tensor/random/zipf.py +0 -2
  577. maxframe/tensor/reduction/__init__.py +0 -2
  578. maxframe/tensor/reduction/all.py +0 -2
  579. maxframe/tensor/reduction/allclose.py +0 -2
  580. maxframe/tensor/reduction/any.py +0 -2
  581. maxframe/tensor/reduction/argmax.py +1 -3
  582. maxframe/tensor/reduction/argmin.py +1 -3
  583. maxframe/tensor/reduction/array_equal.py +0 -2
  584. maxframe/tensor/reduction/core.py +0 -2
  585. maxframe/tensor/reduction/count_nonzero.py +0 -2
  586. maxframe/tensor/reduction/cumprod.py +0 -2
  587. maxframe/tensor/reduction/cumsum.py +0 -2
  588. maxframe/tensor/reduction/max.py +0 -2
  589. maxframe/tensor/reduction/mean.py +0 -2
  590. maxframe/tensor/reduction/min.py +0 -2
  591. maxframe/tensor/reduction/nanargmax.py +0 -2
  592. maxframe/tensor/reduction/nanargmin.py +0 -2
  593. maxframe/tensor/reduction/nancumprod.py +0 -2
  594. maxframe/tensor/reduction/nancumsum.py +0 -2
  595. maxframe/tensor/reduction/nanmax.py +0 -2
  596. maxframe/tensor/reduction/nanmean.py +0 -2
  597. maxframe/tensor/reduction/nanmin.py +0 -2
  598. maxframe/tensor/reduction/nanprod.py +0 -2
  599. maxframe/tensor/reduction/nanstd.py +0 -2
  600. maxframe/tensor/reduction/nansum.py +0 -2
  601. maxframe/tensor/reduction/nanvar.py +0 -2
  602. maxframe/tensor/reduction/prod.py +0 -2
  603. maxframe/tensor/reduction/std.py +0 -2
  604. maxframe/tensor/reduction/sum.py +0 -2
  605. maxframe/tensor/reduction/tests/test_reduction.py +1 -4
  606. maxframe/tensor/reduction/var.py +0 -2
  607. maxframe/tensor/reshape/__init__.py +0 -2
  608. maxframe/tensor/reshape/reshape.py +6 -5
  609. maxframe/tensor/reshape/tests/__init__.py +0 -2
  610. maxframe/tensor/reshape/tests/test_reshape.py +0 -2
  611. maxframe/tensor/sort/__init__.py +16 -0
  612. maxframe/tensor/sort/argsort.py +150 -0
  613. maxframe/tensor/sort/sort.py +295 -0
  614. maxframe/tensor/special/__init__.py +37 -0
  615. maxframe/tensor/special/core.py +38 -0
  616. maxframe/tensor/special/misc.py +142 -0
  617. maxframe/tensor/special/statistical.py +56 -0
  618. maxframe/tensor/statistics/__init__.py +5 -0
  619. maxframe/tensor/statistics/average.py +143 -0
  620. maxframe/tensor/statistics/bincount.py +133 -0
  621. maxframe/tensor/statistics/quantile.py +10 -8
  622. maxframe/tensor/ufunc/__init__.py +0 -2
  623. maxframe/tensor/ufunc/ufunc.py +0 -2
  624. maxframe/tensor/utils.py +21 -3
  625. maxframe/tests/test_protocol.py +3 -3
  626. maxframe/tests/test_utils.py +210 -1
  627. maxframe/tests/utils.py +67 -1
  628. maxframe/udf.py +76 -6
  629. maxframe/utils.py +418 -17
  630. {maxframe-1.3.1.dist-info → maxframe-2.0.0b1.dist-info}/METADATA +4 -1
  631. maxframe-2.0.0b1.dist-info/RECORD +939 -0
  632. maxframe_client/clients/framedriver.py +19 -3
  633. maxframe_client/fetcher.py +113 -6
  634. maxframe_client/session/odps.py +173 -38
  635. maxframe_client/session/task.py +3 -1
  636. maxframe_client/tests/test_session.py +41 -5
  637. maxframe-1.3.1.dist-info/RECORD +0 -705
  638. {maxframe-1.3.1.dist-info → maxframe-2.0.0b1.dist-info}/WHEEL +0 -0
  639. {maxframe-1.3.1.dist-info → maxframe-2.0.0b1.dist-info}/top_level.txt +0 -0
@@ -1,5 +1,3 @@
1
- #!/usr/bin/env python
2
- # -*- coding: utf-8 -*-
3
1
  # Copyright 1999-2025 Alibaba Group Holding Ltd.
4
2
  #
5
3
  # Licensed under the Apache License, Version 2.0 (the "License");
@@ -14,6 +12,8 @@
14
12
  # See the License for the specific language governing permissions and
15
13
  # limitations under the License.
16
14
 
15
+ from typing import MutableMapping, Union
16
+
17
17
  import numpy as np
18
18
  import pandas as pd
19
19
 
@@ -58,6 +58,13 @@ class DataFrameFromRecords(DataFrameOperator, DataFrameOperatorMixin):
58
58
  columns_value=columns_value,
59
59
  )
60
60
 
61
+ @classmethod
62
+ def estimate_size(
63
+ cls, ctx: MutableMapping[str, Union[int, float]], op: "DataFrameFromRecords"
64
+ ): # pragma: no cover
65
+ # todo implement this to facilitate local computation
66
+ ctx[op.outputs[0].key] = float("inf")
67
+
61
68
 
62
69
  def from_records(
63
70
  data,
@@ -1,5 +1,3 @@
1
- #!/usr/bin/env python
2
- # -*- coding: utf-8 -*-
3
1
  # Copyright 1999-2025 Alibaba Group Holding Ltd.
4
2
  #
5
3
  # Licensed under the Apache License, Version 2.0 (the "License");
@@ -15,13 +13,13 @@
15
13
  # limitations under the License.
16
14
 
17
15
  from collections import OrderedDict
18
- from typing import Any, Dict, List, Union
16
+ from typing import Any, Dict, List, MutableMapping, Union
19
17
 
20
18
  import numpy as np
21
19
  import pandas as pd
22
20
 
23
21
  from ... import opcodes
24
- from ...core import ENTITY_TYPE, OutputType
22
+ from ...core import ENTITY_TYPE, EntityData, OutputType
25
23
  from ...serialization.serializables import AnyField, KeyField
26
24
  from ...tensor.core import Tensor
27
25
  from ...tensor.datasource import tensor as astensor
@@ -46,24 +44,25 @@ class DataFrameFromTensor(DataFrameOperator, DataFrameOperatorMixin):
46
44
  kwargs["_output_types"] = [OutputType.dataframe]
47
45
  super().__init__(*args, **kwargs)
48
46
 
49
- def _set_inputs(self, inputs: List[EntityType]):
50
- super()._set_inputs(inputs)
51
- inputs_iter = iter(self._inputs)
52
- if self.input is not None:
53
- if not isinstance(self.input, dict):
54
- self.input = next(inputs_iter)
47
+ @classmethod
48
+ def _set_inputs(cls, op: "DataFrameFromTensor", inputs: List[EntityData]):
49
+ super()._set_inputs(op, inputs)
50
+ inputs_iter = iter(op._inputs)
51
+ if op.input is not None:
52
+ if not isinstance(op.input, dict):
53
+ op.input = next(inputs_iter)
55
54
  else:
56
55
  # check each value for input
57
56
  new_input = OrderedDict()
58
- for k, v in self.input.items():
57
+ for k, v in op.input.items():
59
58
  if isinstance(v, ENTITY_TYPE):
60
59
  new_input[k] = next(inputs_iter)
61
60
  else:
62
61
  new_input[k] = v
63
- self.input = new_input
62
+ op.input = new_input
64
63
 
65
- if isinstance(self.index, ENTITY_TYPE):
66
- self.index = next(inputs_iter)
64
+ if isinstance(op.index, ENTITY_TYPE):
65
+ op.index = next(inputs_iter)
67
66
 
68
67
  def __call__(
69
68
  self,
@@ -138,7 +137,11 @@ class DataFrameFromTensor(DataFrameOperator, DataFrameOperatorMixin):
138
137
  )
139
138
  index_value = self._process_index(index, tileables)
140
139
  else:
141
- self.index = index = pd.RangeIndex(0, tileables[0].shape[0])
140
+ if np.isnan(tileables[0].shape[0]):
141
+ index = pd.RangeIndex(0)
142
+ else:
143
+ index = pd.RangeIndex(0, tileables[0].shape[0])
144
+ self.index = index
142
145
  index_value = parse_index(index)
143
146
 
144
147
  if columns is not None:
@@ -260,6 +263,13 @@ class DataFrameFromTensor(DataFrameOperator, DataFrameOperatorMixin):
260
263
  columns_value=columns_value,
261
264
  )
262
265
 
266
+ @classmethod
267
+ def estimate_size(
268
+ cls, ctx: MutableMapping[str, Union[int, float]], op: "DataFrameFromTensor"
269
+ ): # pragma: no cover
270
+ # todo implement this to facilitate local computation
271
+ ctx[op.outputs[0].key] = float("inf")
272
+
263
273
 
264
274
  def dataframe_from_tensor(
265
275
  tensor: Tensor,
@@ -340,12 +350,13 @@ class SeriesFromTensor(DataFrameOperator, DataFrameOperatorMixin):
340
350
  input = KeyField("input")
341
351
  index = AnyField("index")
342
352
 
343
- def _set_inputs(self, inputs: List[EntityType]):
344
- super()._set_inputs(inputs)
345
- if self.input is not None:
346
- self.input = self.inputs[0]
347
- if self.index is not None and hasattr(self.index, "key"):
348
- self.index = self.inputs[-1]
353
+ @classmethod
354
+ def _set_inputs(cls, op: "SeriesFromTensor", inputs: List[EntityData]):
355
+ super()._set_inputs(op, inputs)
356
+ if op.input is not None:
357
+ op.input = op.inputs[0]
358
+ if op.index is not None and hasattr(op.index, "key"):
359
+ op.index = op.inputs[-1]
349
360
 
350
361
  def __call__(
351
362
  self,
@@ -1,5 +1,3 @@
1
- #!/usr/bin/env python
2
- # -*- coding: utf-8 -*-
3
1
  # Copyright 1999-2025 Alibaba Group Holding Ltd.
4
2
  #
5
3
  # Licensed under the Apache License, Version 2.0 (the "License");
@@ -15,6 +13,7 @@
15
13
  # limitations under the License.
16
14
 
17
15
  from io import BytesIO
16
+ from typing import MutableMapping, Union
18
17
  from urllib.parse import urlparse
19
18
 
20
19
  import numpy as np
@@ -129,6 +128,13 @@ class DataFrameReadCSV(
129
128
  chunk_bytes=chunk_bytes,
130
129
  )
131
130
 
131
+ @classmethod
132
+ def estimate_size(
133
+ cls, ctx: MutableMapping[str, Union[int, float]], op: "DataFrameReadCSV"
134
+ ): # pragma: no cover
135
+ # todo implement this to facilitate local computation
136
+ ctx[op.outputs[0].key] = float("inf")
137
+
132
138
 
133
139
  def read_csv(
134
140
  path,
@@ -13,9 +13,11 @@
13
13
  # limitations under the License.
14
14
 
15
15
  import dataclasses
16
+ import io
16
17
  import logging
17
18
  import re
18
- from typing import Dict, List, Optional, Tuple, Union
19
+ import tokenize
20
+ from typing import Dict, List, MutableMapping, Optional, Tuple, Union
19
21
 
20
22
  import numpy as np
21
23
  import pandas as pd
@@ -110,7 +112,15 @@ def _split_explain_string(explain_string: str) -> List[str]:
110
112
  grouped = []
111
113
  for part in parts:
112
114
  part = part.strip("\n")
113
- if grouped and not part.startswith(" "):
115
+ part_line1 = part.split("\n", 1)[0]
116
+ # initial line of part should not start with spaces (Statistics row)
117
+ # or with quote marks
118
+ if (
119
+ grouped
120
+ and not part.startswith(" ")
121
+ and "'" not in part_line1
122
+ and '"' not in part_line1
123
+ ):
114
124
  final_parts.append("\n\n".join(grouped).strip())
115
125
  grouped = []
116
126
  grouped.append(part)
@@ -267,6 +277,15 @@ class DataFrameReadODPSQuery(
267
277
  def set_pruned_columns(self, columns, *, keep_order=None): # pragma: no cover
268
278
  self.columns = columns
269
279
 
280
+ @classmethod
281
+ def estimate_size(
282
+ cls, ctx: MutableMapping[str, Union[int, float]], op: "DataFrameReadODPSQuery"
283
+ ): # pragma: no cover
284
+ # use infinity to show that the size cannot be inferred
285
+ # todo when local catalyst is implemented,
286
+ # a more precise estimation here can be useful then.
287
+ ctx[op.outputs[0].key] = float("inf")
288
+
270
289
  def __call__(self, chunk_bytes=None, chunk_size=None):
271
290
  if is_empty(self.index_columns):
272
291
  index_value = parse_index(pd.RangeIndex(0))
@@ -302,6 +321,17 @@ class DataFrameReadODPSQuery(
302
321
  )
303
322
 
304
323
 
324
+ def _check_token_in_sql(token: str, sql: str) -> bool:
325
+ try:
326
+ names = set()
327
+ for tk_info in tokenize.tokenize(io.BytesIO(sql.encode()).readline):
328
+ if tk_info.type == tokenize.NAME:
329
+ names.add(tk_info.string)
330
+ return token in names
331
+ except: # pragma: no cover
332
+ return False
333
+
334
+
305
335
  def read_odps_query(
306
336
  query: str,
307
337
  odps_entry: ODPS = None,
@@ -382,7 +412,7 @@ def read_odps_query(
382
412
  new_columns = []
383
413
  for col in odps_schema.columns:
384
414
  anon_match = _ANONYMOUS_COL_REGEX.match(col.name)
385
- if anon_match and col.name not in query:
415
+ if anon_match and not _check_token_in_sql(col.name, query):
386
416
  new_name = anonymous_col_prefix + anon_match.group(1)
387
417
  col_renames[col.name] = new_name
388
418
  new_columns.append(Column(new_name, col.type))
@@ -13,7 +13,7 @@
13
13
  # limitations under the License.
14
14
 
15
15
  import logging
16
- from typing import List, Optional, Union
16
+ from typing import List, MutableMapping, Optional, Union
17
17
 
18
18
  import numpy as np
19
19
  import pandas as pd
@@ -34,7 +34,7 @@ from ...serialization.serializables import (
34
34
  SeriesField,
35
35
  StringField,
36
36
  )
37
- from ...utils import is_empty
37
+ from ...utils import estimate_table_size, is_empty
38
38
  from ..core import DataFrame # noqa: F401
39
39
  from ..utils import parse_index
40
40
  from .core import ColumnPruneSupportedDataSourceMixin, IncrementalIndexDatasource
@@ -46,6 +46,7 @@ class DataFrameReadODPSTable(
46
46
  IncrementalIndexDatasource,
47
47
  ColumnPruneSupportedDataSourceMixin,
48
48
  ):
49
+ __slots__ = ("_odps_entry",)
49
50
  _op_type_ = opcodes.READ_ODPS_TABLE
50
51
 
51
52
  table_name = StringField("table_name")
@@ -61,7 +62,8 @@ class DataFrameReadODPSTable(
61
62
  index_dtypes = SeriesField("index_dtypes", default=None)
62
63
 
63
64
  def __init__(self, memory_scale=None, **kw):
64
- output_type = kw.get("output_type", OutputType.dataframe)
65
+ output_type = kw.pop("output_type", OutputType.dataframe)
66
+ self._odps_entry = kw.pop("odps_entry", None)
65
67
  super(DataFrameReadODPSTable, self).__init__(
66
68
  memory_scale=memory_scale, _output_types=[output_type], **kw
67
69
  )
@@ -130,6 +132,18 @@ class DataFrameReadODPSTable(
130
132
  chunk_size=chunk_size,
131
133
  )
132
134
 
135
+ @classmethod
136
+ def estimate_size(
137
+ cls, ctx: MutableMapping[str, Union[int, float]], op: "DataFrameReadODPSTable"
138
+ ) -> None:
139
+ odps_entry = op._odps_entry or ODPS.from_global() or ODPS.from_environments()
140
+ if not odps_entry: # pragma: no cover
141
+ ctx[op.outputs[0].key] = float("inf")
142
+ return
143
+ ctx[op.outputs[0].key] = estimate_table_size(
144
+ odps_entry, op.table_name, op.partitions
145
+ )
146
+
133
147
 
134
148
  def read_odps_table(
135
149
  table_name: Union[str, Table],
@@ -212,7 +226,8 @@ def read_odps_table(
212
226
  index_dtypes = pd.Series(table_index_types, index=index_col)
213
227
 
214
228
  if columns is not None:
215
- table_col_set = set([c.lower() for c in columns])
229
+ new_columns = [c.lower() for c in columns]
230
+ table_col_set = set(new_columns)
216
231
  col_diff = sorted(table_col_set - set(table_columns))
217
232
  if col_diff:
218
233
  raise ValueError(
@@ -223,7 +238,6 @@ def read_odps_table(
223
238
  raise ValueError("Index columns and columns shall not overlap.")
224
239
 
225
240
  # reorder columns
226
- new_columns = [c for c in table_columns if c in table_col_set]
227
241
  df_types = [df_types[table_columns.index(col)] for col in new_columns]
228
242
  table_columns = new_columns
229
243
  columns = new_columns
@@ -253,6 +267,7 @@ def read_odps_table(
253
267
  last_modified_time=to_timestamp(table.last_data_modified_time),
254
268
  index_columns=index_col,
255
269
  index_dtypes=index_dtypes,
270
+ odps_entry=odps_entry,
256
271
  **kw,
257
272
  )
258
273
  return op(shape, chunk_bytes=chunk_bytes, chunk_size=chunk_size)
@@ -1,5 +1,3 @@
1
- #!/usr/bin/env python
2
- # -*- coding: utf-8 -*-
3
1
  # Copyright 1999-2025 Alibaba Group Holding Ltd.
4
2
  #
5
3
  # Licensed under the Apache License, Version 2.0 (the "License");
@@ -15,7 +13,7 @@
15
13
  # limitations under the License.
16
14
 
17
15
  import os
18
- from typing import Dict
16
+ from typing import Dict, MutableMapping, Union
19
17
  from urllib.parse import urlparse
20
18
 
21
19
  import numpy as np
@@ -308,6 +306,13 @@ class DataFrameReadParquet(
308
306
  columns_value=columns_value,
309
307
  )
310
308
 
309
+ @classmethod
310
+ def estimate_size(
311
+ cls, ctx: MutableMapping[str, Union[int, float]], op: "DataFrameReadParquet"
312
+ ): # pragma: no cover
313
+ # todo implement this to facilitate local computation
314
+ ctx[op.outputs[0].key] = float("inf")
315
+
311
316
 
312
317
  def read_parquet(
313
318
  path,
@@ -15,6 +15,7 @@
15
15
  import os
16
16
  import uuid
17
17
  from collections import OrderedDict
18
+ from math import isinf
18
19
 
19
20
  import numpy as np
20
21
  import pandas as pd
@@ -24,6 +25,7 @@ from odps import types as odps_types
24
25
 
25
26
  from .... import tensor as mt
26
27
  from ....core import OutputType
28
+ from ....core.operator import estimate_size
27
29
  from ....tests.utils import tn
28
30
  from ....utils import lazy_import
29
31
  from ... import read_odps_query, read_odps_table
@@ -71,6 +73,10 @@ def test_from_pandas_dataframe():
71
73
  assert df.index_value.max_val == 9
72
74
  np.testing.assert_equal(df.columns_value._index_value._data, data.columns.values)
73
75
 
76
+ result_ctx = dict()
77
+ estimate_size(result_ctx, df.op)
78
+ assert result_ctx[df.key] > 0 and not isinf(result_ctx[df.key])
79
+
74
80
  data2 = data[::2]
75
81
  df2 = from_pandas_df(data2, chunk_size=4)
76
82
 
@@ -258,6 +264,10 @@ def test_from_odps_table():
258
264
  ),
259
265
  )
260
266
 
267
+ result_ctx = dict()
268
+ estimate_size(result_ctx, df.op)
269
+ assert result_ctx[df.key] >= 0 and not isinf(result_ctx[df.key])
270
+
261
271
  with pytest.raises(ValueError):
262
272
  read_odps_table(test_table, columns=["col3", "col4"])
263
273
  with pytest.raises(ValueError):
@@ -300,6 +310,7 @@ def test_from_odps_table():
300
310
  ),
301
311
  )
302
312
 
313
+ test_parted_table.create_partition("pt=20240103")
303
314
  df = read_odps_table(
304
315
  test_parted_table, columns=["col1", "col2", "pt"], partitions="pt=20240103"
305
316
  )
@@ -314,6 +325,10 @@ def test_from_odps_table():
314
325
  ),
315
326
  )
316
327
 
328
+ result_ctx = dict()
329
+ estimate_size(result_ctx, df.op)
330
+ assert result_ctx[df.key] >= 0 and not isinf(result_ctx[df.key])
331
+
317
332
  out_idx = read_odps_table(
318
333
  test_table,
319
334
  columns=[],
@@ -545,3 +560,21 @@ def test_resolve_multi_join():
545
560
  for col, (exp_nm, exp_tp) in zip(schema.columns, expected_col_types.items()):
546
561
  assert col.name == exp_nm
547
562
  assert col.type == odps_types.validate_data_type(exp_tp)
563
+
564
+
565
+ def test_resolve_break_lines():
566
+ input_path = os.path.join(
567
+ os.path.dirname(__file__), "test-data", "task-input-with-break-line.txt"
568
+ )
569
+ with open(input_path, "r") as f:
570
+ sector = f.read()
571
+
572
+ expected_col_types = {
573
+ "key": "string",
574
+ "value": "string",
575
+ }
576
+
577
+ schema = _parse_full_explain(sector)
578
+ for col, (exp_nm, exp_tp) in zip(schema.columns, expected_col_types.items()):
579
+ assert col.name == exp_nm
580
+ assert col.type == odps_types.validate_data_type(exp_tp)
@@ -12,9 +12,10 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
+ import pandas as pd
15
16
  import pytest
16
17
 
17
- from ... import DataFrame
18
+ from ... import DataFrame, Index
18
19
  from ..to_odps import to_odps_table
19
20
 
20
21
 
@@ -23,6 +24,25 @@ def df():
23
24
  return DataFrame({"A": [1, 2], "B": [3, 4]})
24
25
 
25
26
 
27
+ @pytest.fixture
28
+ def df_with_named_index():
29
+ return DataFrame({"A": [1, 2], "B": [3, 4]}, index=Index([1, 2], name="A"))
30
+
31
+
32
+ @pytest.fixture
33
+ def df_with_named_multi_indexes():
34
+ arrays = [
35
+ ["c1", "c2"],
36
+ ["d1", "d2"],
37
+ ["e1", "e2"],
38
+ ]
39
+ multi_index = pd.MultiIndex.from_arrays(arrays, names=("C", "D", "E"))
40
+ return DataFrame(
41
+ {"A": [1, 2], "B": [3, 4]},
42
+ index=multi_index,
43
+ )
44
+
45
+
26
46
  @pytest.mark.parametrize(
27
47
  "kwargs",
28
48
  [
@@ -46,3 +66,34 @@ def test_to_odps_table_validation(df, kwargs):
46
66
  )
47
67
  def test_to_odps_table_vaild(df, kwargs):
48
68
  to_odps_table(df, "test_table", **kwargs)
69
+
70
+
71
+ def test_to_odps_table_column_conflicts(
72
+ df, df_with_named_index, df_with_named_multi_indexes
73
+ ):
74
+ to_odps_table(df.reset_index(), "test_table", index=False)
75
+ to_odps_table(df.reset_index(), "test_table", index_label="C")
76
+ with pytest.raises(ValueError):
77
+ to_odps_table(df.reset_index(), "test_table")
78
+
79
+ to_odps_table(df_with_named_index, "test_table", index=False)
80
+ to_odps_table(df_with_named_index, "test_table", index_label="C")
81
+ with pytest.raises(ValueError):
82
+ to_odps_table(df_with_named_index, "test_table")
83
+
84
+ to_odps_table(df, "test_table", partition="C='1'")
85
+ with pytest.raises(ValueError):
86
+ to_odps_table(df, "test_table", partition="A='1'")
87
+
88
+ with pytest.raises(ValueError):
89
+ to_odps_table(df, "test_table", partition="A='1'")
90
+
91
+ to_odps_table(df_with_named_multi_indexes, "test_table")
92
+ to_odps_table(
93
+ df_with_named_multi_indexes, "test_table", partition="C='1'", index=False
94
+ )
95
+ with pytest.raises(ValueError):
96
+ to_odps_table(df_with_named_multi_indexes, "test_table", partition="C='1'")
97
+
98
+ df_with_named_multi_indexes.index.names = ["C1", "D1", "E1"]
99
+ to_odps_table(df_with_named_multi_indexes, "test_table", partition="C='1'")
@@ -12,7 +12,10 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
+ from typing import List
16
+
15
17
  from ... import opcodes
18
+ from ...core import EntityData
16
19
  from ...serialization.serializables import (
17
20
  AnyField,
18
21
  BoolField,
@@ -68,9 +71,10 @@ class DataFrameToCSV(DataFrameDataStore):
68
71
  def output_limit(self):
69
72
  return 1 if not self.output_stat else 2
70
73
 
71
- def _set_inputs(self, inputs):
72
- super()._set_inputs(inputs)
73
- self._input = self._inputs[0]
74
+ @classmethod
75
+ def _set_inputs(cls, op: "DataFrameToCSV", inputs: List[EntityData]):
76
+ super()._set_inputs(op, inputs)
77
+ op._input = op._inputs[0]
74
78
 
75
79
  def __call__(self, df):
76
80
  index_value = parse_index(df.index_value.to_pandas()[:0], df)
@@ -1,5 +1,3 @@
1
- #!/usr/bin/env python
2
- # -*- coding: utf-8 -*-
3
1
  # Copyright 1999-2025 Alibaba Group Holding Ltd.
4
2
  #
5
3
  # Licensed under the Apache License, Version 2.0 (the "License");
@@ -14,8 +12,9 @@
14
12
  # See the License for the specific language governing permissions and
15
13
  # limitations under the License.
16
14
 
15
+ import itertools
17
16
  import logging
18
- from typing import List, Optional, Union
17
+ from typing import Any, List, Optional, Union
19
18
 
20
19
  from odps import ODPS
21
20
  from odps.models import Table as ODPSTable
@@ -75,6 +74,25 @@ class DataFrameToODPSTable(DataFrameDataStore):
75
74
  columns_value=columns_value,
76
75
  )
77
76
 
77
+ @classmethod
78
+ def get_index_mapping(
79
+ cls,
80
+ index_label: Optional[List[str]],
81
+ raw_index_levels: List[Any],
82
+ ) -> List[Any]:
83
+ def_labels = index_label or itertools.repeat(None)
84
+ def_labels = itertools.chain(def_labels, itertools.repeat(None))
85
+ names = raw_index_levels
86
+ if len(names) == 1:
87
+ default_labels = ["index"]
88
+ else:
89
+ default_labels = [f"level_{i}" for i in range(len(names))]
90
+ indexes = [
91
+ def_label or name or label
92
+ for def_label, name, label in zip(def_labels, names, default_labels)
93
+ ]
94
+ return [x.lower() for x in indexes]
95
+
78
96
 
79
97
  def to_odps_table(
80
98
  df: TileableType,
@@ -161,11 +179,14 @@ def to_odps_table(
161
179
  f"index_label needs {len(df.index.nlevels)} labels "
162
180
  f"but it only have {len(index_label)}"
163
181
  )
182
+
183
+ # check if table partition columns conflicts with dataframe columns
164
184
  table_cols = set(build_dataframe_table_meta(df).table_column_names)
185
+ partition_col_set = (
186
+ set(x.lower() for x in PartitionSpec(partition).keys()) if partition else set()
187
+ )
165
188
  if partition:
166
- partition_intersect = (
167
- set(x.lower() for x in PartitionSpec(partition).keys()) & table_cols
168
- )
189
+ partition_intersect = partition_col_set & table_cols
169
190
  if partition_intersect:
170
191
  raise ValueError(
171
192
  f"Data column(s) {partition_intersect} in the dataframe"
@@ -173,6 +194,21 @@ def to_odps_table(
173
194
  " Use 'partition_col' instead."
174
195
  )
175
196
 
197
+ if index:
198
+ index_cols = set(
199
+ DataFrameToODPSTable.get_index_mapping(index_label, df.index.names)
200
+ )
201
+ index_table_intersect = index_cols & table_cols
202
+ if index_table_intersect:
203
+ raise ValueError(
204
+ f"Index column(s) {index_table_intersect} conflict with column(s) of the input dataframe."
205
+ )
206
+ index_partition_intersect = index_cols & partition_col_set
207
+ if index_partition_intersect:
208
+ raise ValueError(
209
+ f"Index column(s) {index_partition_intersect} conflict with partition column(s)."
210
+ )
211
+
176
212
  if partition_col:
177
213
  partition_diff = set(x.lower() for x in partition_col) - table_cols
178
214
  if partition_diff:
@@ -18,7 +18,12 @@ from .accessor import (
18
18
  IndexMaxFrameAccessor,
19
19
  SeriesMaxFrameAccessor,
20
20
  )
21
- from .apply_chunk import df_apply_chunk, series_apply_chunk
21
+ from .apply_chunk import (
22
+ DataFrameApplyChunk,
23
+ DataFrameApplyChunkOperator,
24
+ df_apply_chunk,
25
+ series_apply_chunk,
26
+ )
22
27
  from .flatjson import series_flatjson
23
28
  from .flatmap import df_flatmap, series_flatmap
24
29
  from .reshuffle import DataFrameReshuffle, df_reshuffle