maxframe 1.3.1__cp39-cp39-win_amd64.whl → 2.0.0__cp39-cp39-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of maxframe might be problematic. Click here for more details.

Files changed (640) hide show
  1. maxframe/_utils.cp39-win_amd64.pyd +0 -0
  2. maxframe/_utils.pyi +21 -0
  3. maxframe/_utils.pyx +4 -3
  4. maxframe/codegen/__init__.py +27 -0
  5. maxframe/{codegen.py → codegen/core.py} +49 -43
  6. maxframe/codegen/spe/__init__.py +16 -0
  7. maxframe/codegen/spe/core.py +307 -0
  8. maxframe/codegen/spe/dataframe/__init__.py +37 -0
  9. maxframe/codegen/spe/dataframe/accessors/__init__.py +15 -0
  10. maxframe/codegen/spe/dataframe/accessors/base.py +53 -0
  11. maxframe/codegen/spe/dataframe/accessors/dict_.py +194 -0
  12. maxframe/codegen/spe/dataframe/accessors/list_.py +80 -0
  13. maxframe/codegen/spe/dataframe/arithmetic.py +84 -0
  14. maxframe/codegen/spe/dataframe/datasource.py +181 -0
  15. maxframe/codegen/spe/dataframe/datastore.py +204 -0
  16. maxframe/codegen/spe/dataframe/extensions.py +63 -0
  17. maxframe/codegen/spe/dataframe/fetch.py +26 -0
  18. maxframe/codegen/spe/dataframe/groupby.py +224 -0
  19. maxframe/codegen/spe/dataframe/indexing.py +238 -0
  20. maxframe/codegen/spe/dataframe/merge.py +73 -0
  21. maxframe/codegen/spe/dataframe/misc.py +286 -0
  22. maxframe/codegen/spe/dataframe/missing.py +64 -0
  23. maxframe/codegen/spe/dataframe/reduction.py +160 -0
  24. maxframe/codegen/spe/dataframe/sort.py +83 -0
  25. maxframe/codegen/spe/dataframe/statistics.py +46 -0
  26. maxframe/codegen/spe/dataframe/tests/__init__.py +13 -0
  27. maxframe/codegen/spe/dataframe/tests/accessors/__init__.py +13 -0
  28. maxframe/codegen/spe/dataframe/tests/accessors/test_base.py +33 -0
  29. maxframe/codegen/spe/dataframe/tests/accessors/test_dict.py +310 -0
  30. maxframe/codegen/spe/dataframe/tests/accessors/test_list.py +137 -0
  31. maxframe/codegen/spe/dataframe/tests/indexing/__init__.py +13 -0
  32. maxframe/codegen/spe/dataframe/tests/indexing/conftest.py +58 -0
  33. maxframe/codegen/spe/dataframe/tests/indexing/test_getitem.py +124 -0
  34. maxframe/codegen/spe/dataframe/tests/indexing/test_iloc.py +76 -0
  35. maxframe/codegen/spe/dataframe/tests/indexing/test_indexing.py +39 -0
  36. maxframe/codegen/spe/dataframe/tests/indexing/test_rename.py +51 -0
  37. maxframe/codegen/spe/dataframe/tests/indexing/test_reset_index.py +88 -0
  38. maxframe/codegen/spe/dataframe/tests/indexing/test_sample.py +45 -0
  39. maxframe/codegen/spe/dataframe/tests/indexing/test_set_axis.py +45 -0
  40. maxframe/codegen/spe/dataframe/tests/indexing/test_set_index.py +41 -0
  41. maxframe/codegen/spe/dataframe/tests/indexing/test_setitem.py +46 -0
  42. maxframe/codegen/spe/dataframe/tests/misc/__init__.py +13 -0
  43. maxframe/codegen/spe/dataframe/tests/misc/test_apply.py +133 -0
  44. maxframe/codegen/spe/dataframe/tests/misc/test_drop_duplicates.py +92 -0
  45. maxframe/codegen/spe/dataframe/tests/misc/test_misc.py +234 -0
  46. maxframe/codegen/spe/dataframe/tests/missing/__init__.py +13 -0
  47. maxframe/codegen/spe/dataframe/tests/missing/test_checkna.py +94 -0
  48. maxframe/codegen/spe/dataframe/tests/missing/test_dropna.py +50 -0
  49. maxframe/codegen/spe/dataframe/tests/missing/test_fillna.py +94 -0
  50. maxframe/codegen/spe/dataframe/tests/missing/test_replace.py +45 -0
  51. maxframe/codegen/spe/dataframe/tests/test_arithmetic.py +73 -0
  52. maxframe/codegen/spe/dataframe/tests/test_datasource.py +184 -0
  53. maxframe/codegen/spe/dataframe/tests/test_datastore.py +200 -0
  54. maxframe/codegen/spe/dataframe/tests/test_extensions.py +88 -0
  55. maxframe/codegen/spe/dataframe/tests/test_groupby.py +225 -0
  56. maxframe/codegen/spe/dataframe/tests/test_merge.py +400 -0
  57. maxframe/codegen/spe/dataframe/tests/test_reduction.py +104 -0
  58. maxframe/codegen/spe/dataframe/tests/test_sort.py +159 -0
  59. maxframe/codegen/spe/dataframe/tests/test_statistics.py +70 -0
  60. maxframe/codegen/spe/dataframe/tests/test_tseries.py +29 -0
  61. maxframe/codegen/spe/dataframe/tests/test_value_counts.py +60 -0
  62. maxframe/codegen/spe/dataframe/tests/test_window.py +69 -0
  63. maxframe/codegen/spe/dataframe/tseries.py +46 -0
  64. maxframe/codegen/spe/dataframe/udf.py +62 -0
  65. maxframe/codegen/spe/dataframe/value_counts.py +31 -0
  66. maxframe/codegen/spe/dataframe/window.py +65 -0
  67. maxframe/codegen/spe/learn/__init__.py +15 -0
  68. maxframe/codegen/spe/learn/contrib/__init__.py +15 -0
  69. maxframe/codegen/spe/learn/contrib/lightgbm.py +160 -0
  70. maxframe/codegen/spe/learn/contrib/models.py +41 -0
  71. maxframe/codegen/spe/learn/contrib/pytorch.py +49 -0
  72. maxframe/codegen/spe/learn/contrib/tests/__init__.py +13 -0
  73. maxframe/codegen/spe/learn/contrib/tests/test_lightgbm.py +123 -0
  74. maxframe/codegen/spe/learn/contrib/tests/test_models.py +41 -0
  75. maxframe/codegen/spe/learn/contrib/tests/test_pytorch.py +53 -0
  76. maxframe/codegen/spe/learn/contrib/tests/test_xgboost.py +98 -0
  77. maxframe/codegen/spe/learn/contrib/xgboost.py +152 -0
  78. maxframe/codegen/spe/learn/metrics/__init__.py +15 -0
  79. maxframe/codegen/spe/learn/metrics/_classification.py +120 -0
  80. maxframe/codegen/spe/learn/metrics/tests/__init__.py +13 -0
  81. maxframe/codegen/spe/learn/metrics/tests/test_classification.py +93 -0
  82. maxframe/codegen/spe/learn/model_selection/__init__.py +13 -0
  83. maxframe/codegen/spe/learn/model_selection/tests/__init__.py +13 -0
  84. maxframe/codegen/spe/learn/model_selection/tests/test_split.py +41 -0
  85. maxframe/codegen/spe/learn/preprocessing/__init__.py +15 -0
  86. maxframe/codegen/spe/learn/preprocessing/_data.py +37 -0
  87. maxframe/codegen/spe/learn/preprocessing/_label.py +47 -0
  88. maxframe/codegen/spe/learn/preprocessing/tests/__init__.py +13 -0
  89. maxframe/codegen/spe/learn/preprocessing/tests/test_data.py +31 -0
  90. maxframe/codegen/spe/learn/preprocessing/tests/test_label.py +43 -0
  91. maxframe/codegen/spe/learn/utils/__init__.py +15 -0
  92. maxframe/codegen/spe/learn/utils/checks.py +55 -0
  93. maxframe/codegen/spe/learn/utils/multiclass.py +60 -0
  94. maxframe/codegen/spe/learn/utils/shuffle.py +85 -0
  95. maxframe/codegen/spe/learn/utils/sparsefuncs.py +35 -0
  96. maxframe/codegen/spe/learn/utils/tests/__init__.py +13 -0
  97. maxframe/codegen/spe/learn/utils/tests/test_checks.py +48 -0
  98. maxframe/codegen/spe/learn/utils/tests/test_multiclass.py +52 -0
  99. maxframe/codegen/spe/learn/utils/tests/test_shuffle.py +50 -0
  100. maxframe/codegen/spe/learn/utils/tests/test_sparsefuncs.py +34 -0
  101. maxframe/codegen/spe/learn/utils/tests/test_validation.py +44 -0
  102. maxframe/codegen/spe/learn/utils/validation.py +35 -0
  103. maxframe/codegen/spe/objects.py +26 -0
  104. maxframe/codegen/spe/remote.py +29 -0
  105. maxframe/codegen/spe/tensor/__init__.py +28 -0
  106. maxframe/codegen/spe/tensor/arithmetic.py +95 -0
  107. maxframe/codegen/spe/tensor/core.py +41 -0
  108. maxframe/codegen/spe/tensor/datasource.py +165 -0
  109. maxframe/codegen/spe/tensor/extensions.py +35 -0
  110. maxframe/codegen/spe/tensor/fetch.py +26 -0
  111. maxframe/codegen/spe/tensor/indexing.py +63 -0
  112. maxframe/codegen/spe/tensor/linalg.py +63 -0
  113. maxframe/codegen/spe/tensor/merge.py +31 -0
  114. maxframe/codegen/spe/tensor/misc.py +121 -0
  115. maxframe/codegen/spe/tensor/random.py +29 -0
  116. maxframe/codegen/spe/tensor/reduction.py +39 -0
  117. maxframe/codegen/spe/tensor/reshape.py +26 -0
  118. maxframe/codegen/spe/tensor/sort.py +42 -0
  119. maxframe/codegen/spe/tensor/special.py +35 -0
  120. maxframe/codegen/spe/tensor/statistics.py +24 -0
  121. maxframe/codegen/spe/tensor/tests/__init__.py +13 -0
  122. maxframe/codegen/spe/tensor/tests/test_arithmetic.py +103 -0
  123. maxframe/codegen/spe/tensor/tests/test_datasource.py +99 -0
  124. maxframe/codegen/spe/tensor/tests/test_extensions.py +37 -0
  125. maxframe/codegen/spe/tensor/tests/test_indexing.py +44 -0
  126. maxframe/codegen/spe/tensor/tests/test_linalg.py +38 -0
  127. maxframe/codegen/spe/tensor/tests/test_merge.py +28 -0
  128. maxframe/codegen/spe/tensor/tests/test_misc.py +94 -0
  129. maxframe/codegen/spe/tensor/tests/test_random.py +55 -0
  130. maxframe/codegen/spe/tensor/tests/test_reduction.py +65 -0
  131. maxframe/codegen/spe/tensor/tests/test_reshape.py +39 -0
  132. maxframe/codegen/spe/tensor/tests/test_sort.py +49 -0
  133. maxframe/codegen/spe/tensor/tests/test_special.py +28 -0
  134. maxframe/codegen/spe/tensor/tests/test_statistics.py +29 -0
  135. maxframe/codegen/spe/tests/__init__.py +13 -0
  136. maxframe/codegen/spe/tests/test_remote.py +29 -0
  137. maxframe/codegen/spe/tests/test_spe_codegen.py +141 -0
  138. maxframe/codegen/spe/utils.py +54 -0
  139. maxframe/codegen/tests/__init__.py +13 -0
  140. maxframe/{tests → codegen/tests}/test_codegen.py +3 -5
  141. maxframe/config/__init__.py +1 -1
  142. maxframe/config/config.py +50 -23
  143. maxframe/config/tests/test_config.py +4 -12
  144. maxframe/config/validators.py +5 -0
  145. maxframe/conftest.py +38 -10
  146. maxframe/core/__init__.py +1 -0
  147. maxframe/core/context.py +110 -0
  148. maxframe/core/entity/__init__.py +1 -0
  149. maxframe/core/entity/core.py +0 -7
  150. maxframe/core/entity/objects.py +19 -5
  151. maxframe/core/entity/output_types.py +11 -0
  152. maxframe/core/entity/tests/test_objects.py +11 -12
  153. maxframe/core/entity/tileables.py +3 -1
  154. maxframe/core/entity/utils.py +15 -0
  155. maxframe/core/graph/__init__.py +6 -1
  156. maxframe/core/graph/builder/base.py +5 -1
  157. maxframe/core/graph/core.cp39-win_amd64.pyd +0 -0
  158. maxframe/core/graph/core.pyx +17 -6
  159. maxframe/core/graph/entity.py +18 -6
  160. maxframe/core/operator/__init__.py +8 -3
  161. maxframe/core/operator/base.py +35 -12
  162. maxframe/core/operator/core.py +37 -14
  163. maxframe/core/operator/fetch.py +5 -18
  164. maxframe/core/operator/objects.py +0 -20
  165. maxframe/core/operator/shuffle.py +6 -72
  166. maxframe/dataframe/__init__.py +1 -0
  167. maxframe/dataframe/accessors/datetime_/core.py +7 -4
  168. maxframe/dataframe/accessors/string_/core.py +9 -6
  169. maxframe/dataframe/arithmetic/core.py +31 -20
  170. maxframe/dataframe/arithmetic/tests/test_arithmetic.py +6 -0
  171. maxframe/dataframe/core.py +98 -91
  172. maxframe/dataframe/datasource/core.py +8 -1
  173. maxframe/dataframe/datasource/date_range.py +8 -0
  174. maxframe/dataframe/datasource/from_index.py +9 -5
  175. maxframe/dataframe/datasource/from_records.py +9 -2
  176. maxframe/dataframe/datasource/from_tensor.py +32 -21
  177. maxframe/dataframe/datasource/read_csv.py +8 -2
  178. maxframe/dataframe/datasource/read_odps_query.py +109 -19
  179. maxframe/dataframe/datasource/read_odps_table.py +20 -5
  180. maxframe/dataframe/datasource/read_parquet.py +8 -3
  181. maxframe/dataframe/datasource/tests/test_datasource.py +80 -1
  182. maxframe/dataframe/datastore/tests/test_to_odps.py +52 -1
  183. maxframe/dataframe/datastore/to_csv.py +7 -3
  184. maxframe/dataframe/datastore/to_odps.py +42 -6
  185. maxframe/dataframe/extensions/__init__.py +6 -1
  186. maxframe/dataframe/extensions/apply_chunk.py +96 -136
  187. maxframe/dataframe/extensions/flatjson.py +3 -2
  188. maxframe/dataframe/extensions/flatmap.py +15 -7
  189. maxframe/dataframe/fetch/core.py +12 -1
  190. maxframe/dataframe/groupby/__init__.py +7 -0
  191. maxframe/dataframe/groupby/aggregation.py +9 -8
  192. maxframe/dataframe/groupby/apply.py +50 -74
  193. maxframe/dataframe/groupby/apply_chunk.py +393 -0
  194. maxframe/dataframe/groupby/core.py +80 -17
  195. maxframe/dataframe/groupby/extensions.py +26 -0
  196. maxframe/dataframe/groupby/fill.py +9 -4
  197. maxframe/dataframe/groupby/sample.py +7 -7
  198. maxframe/dataframe/groupby/tests/test_groupby.py +3 -3
  199. maxframe/dataframe/groupby/transform.py +57 -54
  200. maxframe/dataframe/indexing/align.py +7 -6
  201. maxframe/dataframe/indexing/getitem.py +9 -8
  202. maxframe/dataframe/indexing/iloc.py +28 -23
  203. maxframe/dataframe/indexing/insert.py +7 -3
  204. maxframe/dataframe/indexing/loc.py +9 -8
  205. maxframe/dataframe/indexing/reindex.py +36 -30
  206. maxframe/dataframe/indexing/rename_axis.py +18 -10
  207. maxframe/dataframe/indexing/reset_index.py +0 -2
  208. maxframe/dataframe/indexing/sample.py +13 -9
  209. maxframe/dataframe/indexing/set_axis.py +9 -6
  210. maxframe/dataframe/indexing/setitem.py +8 -5
  211. maxframe/dataframe/indexing/where.py +12 -9
  212. maxframe/dataframe/merge/__init__.py +0 -1
  213. maxframe/dataframe/merge/concat.py +10 -31
  214. maxframe/dataframe/merge/merge.py +2 -24
  215. maxframe/dataframe/misc/__init__.py +6 -0
  216. maxframe/dataframe/misc/_duplicate.py +7 -3
  217. maxframe/dataframe/misc/apply.py +106 -139
  218. maxframe/dataframe/misc/astype.py +3 -2
  219. maxframe/dataframe/misc/case_when.py +11 -7
  220. maxframe/dataframe/misc/cut.py +11 -10
  221. maxframe/dataframe/misc/describe.py +7 -3
  222. maxframe/dataframe/misc/drop.py +13 -11
  223. maxframe/dataframe/misc/eval.py +0 -2
  224. maxframe/dataframe/misc/get_dummies.py +78 -49
  225. maxframe/dataframe/misc/isin.py +13 -10
  226. maxframe/dataframe/misc/map.py +21 -6
  227. maxframe/dataframe/misc/melt.py +8 -1
  228. maxframe/dataframe/misc/pivot.py +232 -0
  229. maxframe/dataframe/misc/pivot_table.py +52 -40
  230. maxframe/dataframe/misc/rechunk.py +59 -0
  231. maxframe/dataframe/misc/shift.py +7 -4
  232. maxframe/dataframe/misc/stack.py +5 -3
  233. maxframe/dataframe/misc/tests/test_misc.py +167 -1
  234. maxframe/dataframe/misc/transform.py +63 -65
  235. maxframe/dataframe/misc/value_counts.py +7 -4
  236. maxframe/dataframe/missing/dropna.py +16 -7
  237. maxframe/dataframe/missing/fillna.py +18 -10
  238. maxframe/dataframe/missing/replace.py +10 -6
  239. maxframe/dataframe/missing/tests/test_missing.py +2 -2
  240. maxframe/dataframe/operators.py +1 -27
  241. maxframe/dataframe/reduction/aggregation.py +65 -3
  242. maxframe/dataframe/reduction/core.py +3 -1
  243. maxframe/dataframe/reduction/median.py +1 -1
  244. maxframe/dataframe/reduction/tests/test_reduction.py +33 -0
  245. maxframe/dataframe/reduction/unique.py +53 -7
  246. maxframe/dataframe/statistics/corr.py +9 -6
  247. maxframe/dataframe/statistics/quantile.py +9 -6
  248. maxframe/dataframe/tseries/to_datetime.py +6 -4
  249. maxframe/dataframe/utils.py +219 -31
  250. maxframe/dataframe/window/rolling.py +7 -4
  251. maxframe/env.py +1 -0
  252. maxframe/errors.py +9 -0
  253. maxframe/extension.py +13 -2
  254. maxframe/io/objects/core.py +67 -51
  255. maxframe/io/objects/tensor.py +73 -17
  256. maxframe/io/objects/tests/test_object_io.py +10 -55
  257. maxframe/io/odpsio/arrow.py +15 -2
  258. maxframe/io/odpsio/schema.py +43 -13
  259. maxframe/io/odpsio/tableio.py +63 -11
  260. maxframe/io/odpsio/tests/test_arrow.py +1 -2
  261. maxframe/io/odpsio/tests/test_schema.py +114 -1
  262. maxframe/io/odpsio/tests/test_tableio.py +42 -0
  263. maxframe/io/odpsio/tests/test_volumeio.py +21 -58
  264. maxframe/io/odpsio/volumeio.py +23 -8
  265. maxframe/learn/__init__.py +2 -2
  266. maxframe/learn/contrib/__init__.py +2 -2
  267. maxframe/learn/contrib/graph/connected_components.py +2 -1
  268. maxframe/learn/contrib/lightgbm/__init__.py +33 -0
  269. maxframe/learn/contrib/lightgbm/_predict.py +138 -0
  270. maxframe/learn/contrib/lightgbm/_train.py +163 -0
  271. maxframe/learn/contrib/lightgbm/callback.py +114 -0
  272. maxframe/learn/contrib/lightgbm/classifier.py +199 -0
  273. maxframe/learn/contrib/lightgbm/core.py +372 -0
  274. maxframe/learn/contrib/lightgbm/dataset.py +153 -0
  275. maxframe/learn/contrib/lightgbm/regressor.py +29 -0
  276. maxframe/learn/contrib/lightgbm/tests/__init__.py +13 -0
  277. maxframe/learn/contrib/lightgbm/tests/test_callback.py +58 -0
  278. maxframe/learn/contrib/models.py +38 -9
  279. maxframe/learn/contrib/utils.py +55 -0
  280. maxframe/learn/contrib/xgboost/callback.py +86 -0
  281. maxframe/learn/contrib/xgboost/classifier.py +26 -30
  282. maxframe/learn/contrib/xgboost/core.py +54 -42
  283. maxframe/learn/contrib/xgboost/dmatrix.py +19 -12
  284. maxframe/learn/contrib/xgboost/predict.py +16 -9
  285. maxframe/learn/contrib/xgboost/regressor.py +28 -27
  286. maxframe/learn/contrib/xgboost/tests/test_callback.py +41 -0
  287. maxframe/learn/contrib/xgboost/train.py +59 -16
  288. maxframe/learn/core.py +252 -0
  289. maxframe/learn/datasets/__init__.py +20 -0
  290. maxframe/learn/datasets/samples_generator.py +628 -0
  291. maxframe/learn/linear_model/__init__.py +15 -0
  292. maxframe/learn/linear_model/_base.py +163 -0
  293. maxframe/learn/linear_model/_lin_reg.py +175 -0
  294. maxframe/learn/metrics/__init__.py +25 -0
  295. maxframe/learn/metrics/_check_targets.py +95 -0
  296. maxframe/learn/metrics/_classification.py +1121 -0
  297. maxframe/learn/metrics/_regression.py +256 -0
  298. maxframe/learn/model_selection/__init__.py +15 -0
  299. maxframe/learn/model_selection/_split.py +451 -0
  300. maxframe/learn/model_selection/tests/__init__.py +13 -0
  301. maxframe/learn/model_selection/tests/test_split.py +156 -0
  302. maxframe/learn/preprocessing/__init__.py +16 -0
  303. maxframe/learn/preprocessing/_data/__init__.py +17 -0
  304. maxframe/learn/preprocessing/_data/min_max_scaler.py +390 -0
  305. maxframe/learn/preprocessing/_data/normalize.py +127 -0
  306. maxframe/learn/preprocessing/_data/standard_scaler.py +503 -0
  307. maxframe/learn/preprocessing/_data/utils.py +79 -0
  308. maxframe/learn/preprocessing/_label/__init__.py +16 -0
  309. maxframe/learn/preprocessing/_label/_label_binarizer.py +599 -0
  310. maxframe/learn/preprocessing/_label/_label_encoder.py +174 -0
  311. maxframe/learn/utils/__init__.py +4 -0
  312. maxframe/learn/utils/_encode.py +314 -0
  313. maxframe/learn/utils/checks.py +161 -0
  314. maxframe/learn/utils/core.py +33 -0
  315. maxframe/learn/utils/extmath.py +176 -0
  316. maxframe/learn/utils/multiclass.py +292 -0
  317. maxframe/learn/utils/shuffle.py +114 -0
  318. maxframe/learn/utils/sparsefuncs.py +87 -0
  319. maxframe/learn/utils/validation.py +775 -0
  320. maxframe/lib/__init__.py +0 -2
  321. maxframe/lib/compat.py +145 -0
  322. maxframe/lib/filesystem/_oss_lib/glob.py +1 -1
  323. maxframe/lib/mmh3.cp39-win_amd64.pyd +0 -0
  324. maxframe/lib/sparse/__init__.py +10 -15
  325. maxframe/lib/sparse/array.py +45 -33
  326. maxframe/lib/sparse/core.py +0 -2
  327. maxframe/lib/sparse/linalg.py +31 -0
  328. maxframe/lib/sparse/matrix.py +5 -2
  329. maxframe/lib/sparse/tests/__init__.py +0 -2
  330. maxframe/lib/sparse/tests/test_sparse.py +53 -53
  331. maxframe/lib/sparse/vector.py +0 -2
  332. maxframe/mixin.py +59 -2
  333. maxframe/opcodes.py +13 -5
  334. maxframe/protocol.py +67 -14
  335. maxframe/remote/core.py +16 -14
  336. maxframe/remote/run_script.py +6 -3
  337. maxframe/serialization/__init__.py +2 -0
  338. maxframe/serialization/core.cp39-win_amd64.pyd +0 -0
  339. maxframe/serialization/core.pxd +3 -0
  340. maxframe/serialization/core.pyi +3 -1
  341. maxframe/serialization/core.pyx +82 -4
  342. maxframe/serialization/pandas.py +5 -1
  343. maxframe/serialization/serializables/core.py +6 -5
  344. maxframe/serialization/serializables/field.py +2 -2
  345. maxframe/serialization/serializables/tests/test_field_type.py +3 -5
  346. maxframe/serialization/tests/test_serial.py +27 -0
  347. maxframe/session.py +4 -71
  348. maxframe/sperunner.py +165 -0
  349. maxframe/tensor/__init__.py +35 -2
  350. maxframe/tensor/arithmetic/__init__.py +2 -4
  351. maxframe/tensor/arithmetic/abs.py +0 -2
  352. maxframe/tensor/arithmetic/absolute.py +0 -2
  353. maxframe/tensor/arithmetic/add.py +34 -4
  354. maxframe/tensor/arithmetic/angle.py +0 -2
  355. maxframe/tensor/arithmetic/arccos.py +1 -4
  356. maxframe/tensor/arithmetic/arccosh.py +1 -3
  357. maxframe/tensor/arithmetic/arcsin.py +0 -2
  358. maxframe/tensor/arithmetic/arcsinh.py +0 -2
  359. maxframe/tensor/arithmetic/arctan.py +0 -2
  360. maxframe/tensor/arithmetic/arctan2.py +0 -2
  361. maxframe/tensor/arithmetic/arctanh.py +0 -2
  362. maxframe/tensor/arithmetic/around.py +0 -2
  363. maxframe/tensor/arithmetic/bitand.py +0 -2
  364. maxframe/tensor/arithmetic/bitor.py +1 -3
  365. maxframe/tensor/arithmetic/bitxor.py +1 -3
  366. maxframe/tensor/arithmetic/cbrt.py +0 -2
  367. maxframe/tensor/arithmetic/ceil.py +0 -2
  368. maxframe/tensor/arithmetic/clip.py +13 -13
  369. maxframe/tensor/arithmetic/conj.py +0 -2
  370. maxframe/tensor/arithmetic/copysign.py +0 -2
  371. maxframe/tensor/arithmetic/core.py +47 -39
  372. maxframe/tensor/arithmetic/cos.py +1 -3
  373. maxframe/tensor/arithmetic/cosh.py +0 -2
  374. maxframe/tensor/arithmetic/deg2rad.py +0 -2
  375. maxframe/tensor/arithmetic/degrees.py +0 -2
  376. maxframe/tensor/arithmetic/divide.py +0 -2
  377. maxframe/tensor/arithmetic/equal.py +0 -2
  378. maxframe/tensor/arithmetic/exp.py +1 -3
  379. maxframe/tensor/arithmetic/exp2.py +0 -2
  380. maxframe/tensor/arithmetic/expm1.py +0 -2
  381. maxframe/tensor/arithmetic/fabs.py +0 -2
  382. maxframe/tensor/arithmetic/fix.py +0 -2
  383. maxframe/tensor/arithmetic/float_power.py +0 -2
  384. maxframe/tensor/arithmetic/floor.py +0 -2
  385. maxframe/tensor/arithmetic/floordiv.py +0 -2
  386. maxframe/tensor/arithmetic/fmax.py +0 -2
  387. maxframe/tensor/arithmetic/fmin.py +0 -2
  388. maxframe/tensor/arithmetic/fmod.py +0 -2
  389. maxframe/tensor/arithmetic/frexp.py +6 -2
  390. maxframe/tensor/arithmetic/greater.py +0 -2
  391. maxframe/tensor/arithmetic/greater_equal.py +0 -2
  392. maxframe/tensor/arithmetic/hypot.py +0 -2
  393. maxframe/tensor/arithmetic/i0.py +1 -3
  394. maxframe/tensor/arithmetic/imag.py +0 -2
  395. maxframe/tensor/arithmetic/invert.py +1 -3
  396. maxframe/tensor/arithmetic/isclose.py +0 -2
  397. maxframe/tensor/arithmetic/iscomplex.py +0 -2
  398. maxframe/tensor/arithmetic/isfinite.py +1 -3
  399. maxframe/tensor/arithmetic/isinf.py +0 -2
  400. maxframe/tensor/arithmetic/isnan.py +0 -2
  401. maxframe/tensor/arithmetic/isreal.py +0 -2
  402. maxframe/tensor/arithmetic/ldexp.py +0 -2
  403. maxframe/tensor/arithmetic/less.py +0 -2
  404. maxframe/tensor/arithmetic/less_equal.py +0 -2
  405. maxframe/tensor/arithmetic/log.py +1 -3
  406. maxframe/tensor/arithmetic/log10.py +1 -3
  407. maxframe/tensor/arithmetic/log1p.py +1 -3
  408. maxframe/tensor/arithmetic/log2.py +1 -3
  409. maxframe/tensor/arithmetic/logaddexp.py +0 -2
  410. maxframe/tensor/arithmetic/logaddexp2.py +0 -2
  411. maxframe/tensor/arithmetic/logical_and.py +0 -2
  412. maxframe/tensor/arithmetic/logical_not.py +1 -3
  413. maxframe/tensor/arithmetic/logical_or.py +0 -2
  414. maxframe/tensor/arithmetic/logical_xor.py +0 -2
  415. maxframe/tensor/arithmetic/lshift.py +0 -2
  416. maxframe/tensor/arithmetic/maximum.py +0 -2
  417. maxframe/tensor/arithmetic/minimum.py +0 -2
  418. maxframe/tensor/arithmetic/mod.py +0 -2
  419. maxframe/tensor/arithmetic/modf.py +6 -2
  420. maxframe/tensor/arithmetic/multiply.py +37 -4
  421. maxframe/tensor/arithmetic/nan_to_num.py +0 -2
  422. maxframe/tensor/arithmetic/negative.py +0 -2
  423. maxframe/tensor/arithmetic/nextafter.py +0 -2
  424. maxframe/tensor/arithmetic/not_equal.py +0 -2
  425. maxframe/tensor/arithmetic/positive.py +0 -2
  426. maxframe/tensor/arithmetic/power.py +0 -2
  427. maxframe/tensor/arithmetic/rad2deg.py +0 -2
  428. maxframe/tensor/arithmetic/radians.py +0 -2
  429. maxframe/tensor/arithmetic/real.py +0 -2
  430. maxframe/tensor/arithmetic/reciprocal.py +5 -3
  431. maxframe/tensor/arithmetic/rint.py +1 -3
  432. maxframe/tensor/arithmetic/rshift.py +0 -2
  433. maxframe/tensor/arithmetic/setimag.py +0 -2
  434. maxframe/tensor/arithmetic/setreal.py +0 -2
  435. maxframe/tensor/arithmetic/sign.py +0 -2
  436. maxframe/tensor/arithmetic/signbit.py +0 -2
  437. maxframe/tensor/arithmetic/sin.py +0 -2
  438. maxframe/tensor/arithmetic/sinc.py +1 -3
  439. maxframe/tensor/arithmetic/sinh.py +0 -2
  440. maxframe/tensor/arithmetic/spacing.py +0 -2
  441. maxframe/tensor/arithmetic/sqrt.py +0 -2
  442. maxframe/tensor/arithmetic/square.py +0 -2
  443. maxframe/tensor/arithmetic/subtract.py +4 -2
  444. maxframe/tensor/arithmetic/tan.py +0 -2
  445. maxframe/tensor/arithmetic/tanh.py +0 -2
  446. maxframe/tensor/arithmetic/tests/__init__.py +0 -2
  447. maxframe/tensor/arithmetic/tests/test_arithmetic.py +43 -9
  448. maxframe/tensor/arithmetic/truediv.py +0 -2
  449. maxframe/tensor/arithmetic/trunc.py +0 -2
  450. maxframe/tensor/arithmetic/utils.py +32 -6
  451. maxframe/tensor/array_utils.py +3 -25
  452. maxframe/tensor/core.py +6 -6
  453. maxframe/tensor/datasource/__init__.py +10 -2
  454. maxframe/tensor/datasource/arange.py +0 -2
  455. maxframe/tensor/datasource/array.py +3 -22
  456. maxframe/tensor/datasource/core.py +15 -10
  457. maxframe/tensor/datasource/diag.py +140 -0
  458. maxframe/tensor/datasource/diagflat.py +69 -0
  459. maxframe/tensor/datasource/empty.py +0 -2
  460. maxframe/tensor/datasource/eye.py +95 -0
  461. maxframe/tensor/datasource/from_dataframe.py +0 -2
  462. maxframe/tensor/datasource/from_dense.py +0 -17
  463. maxframe/tensor/datasource/from_sparse.py +0 -2
  464. maxframe/tensor/datasource/full.py +0 -2
  465. maxframe/tensor/datasource/identity.py +54 -0
  466. maxframe/tensor/datasource/indices.py +115 -0
  467. maxframe/tensor/datasource/linspace.py +140 -0
  468. maxframe/tensor/datasource/meshgrid.py +135 -0
  469. maxframe/tensor/datasource/ones.py +8 -3
  470. maxframe/tensor/datasource/tests/test_datasource.py +32 -1
  471. maxframe/tensor/datasource/tri_array.py +107 -0
  472. maxframe/tensor/datasource/zeros.py +7 -3
  473. maxframe/tensor/extensions/__init__.py +31 -0
  474. maxframe/tensor/extensions/accessor.py +25 -0
  475. maxframe/tensor/extensions/apply_chunk.py +137 -0
  476. maxframe/tensor/indexing/__init__.py +1 -1
  477. maxframe/tensor/indexing/choose.py +8 -6
  478. maxframe/tensor/indexing/compress.py +0 -2
  479. maxframe/tensor/indexing/extract.py +0 -2
  480. maxframe/tensor/indexing/fill_diagonal.py +9 -6
  481. maxframe/tensor/indexing/flatnonzero.py +1 -3
  482. maxframe/tensor/indexing/getitem.py +10 -43
  483. maxframe/tensor/indexing/nonzero.py +2 -4
  484. maxframe/tensor/indexing/setitem.py +19 -9
  485. maxframe/tensor/indexing/slice.py +6 -3
  486. maxframe/tensor/indexing/take.py +0 -2
  487. maxframe/tensor/indexing/tests/__init__.py +0 -2
  488. maxframe/tensor/indexing/tests/test_indexing.py +0 -2
  489. maxframe/tensor/indexing/unravel_index.py +6 -6
  490. maxframe/tensor/lib/__init__.py +16 -0
  491. maxframe/tensor/lib/index_tricks.py +404 -0
  492. maxframe/tensor/linalg/__init__.py +36 -0
  493. maxframe/tensor/linalg/dot.py +145 -0
  494. maxframe/tensor/linalg/inner.py +36 -0
  495. maxframe/tensor/linalg/inv.py +83 -0
  496. maxframe/tensor/linalg/lu.py +115 -0
  497. maxframe/tensor/linalg/matmul.py +225 -0
  498. maxframe/tensor/linalg/qr.py +124 -0
  499. maxframe/tensor/linalg/solve_triangular.py +103 -0
  500. maxframe/tensor/linalg/svd.py +167 -0
  501. maxframe/tensor/linalg/tensordot.py +213 -0
  502. maxframe/tensor/linalg/vdot.py +73 -0
  503. maxframe/tensor/merge/__init__.py +4 -0
  504. maxframe/tensor/merge/append.py +74 -0
  505. maxframe/tensor/merge/column_stack.py +63 -0
  506. maxframe/tensor/merge/concatenate.py +3 -2
  507. maxframe/tensor/merge/dstack.py +71 -0
  508. maxframe/tensor/merge/hstack.py +70 -0
  509. maxframe/tensor/merge/stack.py +0 -2
  510. maxframe/tensor/merge/tests/test_merge.py +0 -2
  511. maxframe/tensor/misc/__init__.py +18 -5
  512. maxframe/tensor/misc/astype.py +10 -8
  513. maxframe/tensor/misc/broadcast_to.py +1 -1
  514. maxframe/tensor/misc/copy.py +64 -0
  515. maxframe/tensor/misc/diff.py +115 -0
  516. maxframe/tensor/misc/flatten.py +63 -0
  517. maxframe/tensor/misc/in1d.py +94 -0
  518. maxframe/tensor/misc/isin.py +130 -0
  519. maxframe/tensor/misc/ndim.py +53 -0
  520. maxframe/tensor/misc/ravel.py +0 -2
  521. maxframe/tensor/misc/repeat.py +129 -0
  522. maxframe/tensor/misc/searchsorted.py +147 -0
  523. maxframe/tensor/misc/setdiff1d.py +58 -0
  524. maxframe/tensor/misc/squeeze.py +117 -0
  525. maxframe/tensor/misc/swapaxes.py +113 -0
  526. maxframe/tensor/misc/tests/test_misc.py +0 -2
  527. maxframe/tensor/misc/transpose.py +8 -4
  528. maxframe/tensor/misc/trapezoid.py +123 -0
  529. maxframe/tensor/misc/unique.py +0 -1
  530. maxframe/tensor/misc/where.py +10 -8
  531. maxframe/tensor/operators.py +0 -34
  532. maxframe/tensor/random/__init__.py +3 -5
  533. maxframe/tensor/random/binomial.py +0 -2
  534. maxframe/tensor/random/bytes.py +0 -2
  535. maxframe/tensor/random/chisquare.py +0 -2
  536. maxframe/tensor/random/choice.py +9 -8
  537. maxframe/tensor/random/core.py +20 -5
  538. maxframe/tensor/random/dirichlet.py +0 -2
  539. maxframe/tensor/random/exponential.py +0 -2
  540. maxframe/tensor/random/f.py +2 -4
  541. maxframe/tensor/random/gamma.py +0 -2
  542. maxframe/tensor/random/geometric.py +0 -2
  543. maxframe/tensor/random/gumbel.py +0 -2
  544. maxframe/tensor/random/hypergeometric.py +0 -2
  545. maxframe/tensor/random/laplace.py +2 -4
  546. maxframe/tensor/random/logistic.py +0 -2
  547. maxframe/tensor/random/lognormal.py +0 -2
  548. maxframe/tensor/random/logseries.py +0 -2
  549. maxframe/tensor/random/multinomial.py +0 -2
  550. maxframe/tensor/random/multivariate_normal.py +0 -2
  551. maxframe/tensor/random/negative_binomial.py +0 -2
  552. maxframe/tensor/random/noncentral_chisquare.py +0 -2
  553. maxframe/tensor/random/noncentral_f.py +1 -3
  554. maxframe/tensor/random/normal.py +0 -2
  555. maxframe/tensor/random/pareto.py +0 -2
  556. maxframe/tensor/random/permutation.py +6 -3
  557. maxframe/tensor/random/poisson.py +0 -2
  558. maxframe/tensor/random/power.py +0 -2
  559. maxframe/tensor/random/rand.py +0 -2
  560. maxframe/tensor/random/randint.py +0 -2
  561. maxframe/tensor/random/randn.py +0 -2
  562. maxframe/tensor/random/random_integers.py +0 -2
  563. maxframe/tensor/random/random_sample.py +0 -2
  564. maxframe/tensor/random/rayleigh.py +0 -2
  565. maxframe/tensor/random/standard_cauchy.py +0 -2
  566. maxframe/tensor/random/standard_exponential.py +0 -2
  567. maxframe/tensor/random/standard_gamma.py +0 -2
  568. maxframe/tensor/random/standard_normal.py +0 -2
  569. maxframe/tensor/random/standard_t.py +0 -2
  570. maxframe/tensor/random/tests/__init__.py +0 -2
  571. maxframe/tensor/random/tests/test_random.py +0 -2
  572. maxframe/tensor/random/triangular.py +0 -2
  573. maxframe/tensor/random/uniform.py +0 -2
  574. maxframe/tensor/random/vonmises.py +0 -2
  575. maxframe/tensor/random/wald.py +0 -2
  576. maxframe/tensor/random/weibull.py +0 -2
  577. maxframe/tensor/random/zipf.py +0 -2
  578. maxframe/tensor/reduction/__init__.py +0 -2
  579. maxframe/tensor/reduction/all.py +0 -2
  580. maxframe/tensor/reduction/allclose.py +0 -2
  581. maxframe/tensor/reduction/any.py +0 -2
  582. maxframe/tensor/reduction/argmax.py +1 -3
  583. maxframe/tensor/reduction/argmin.py +1 -3
  584. maxframe/tensor/reduction/array_equal.py +0 -2
  585. maxframe/tensor/reduction/core.py +0 -2
  586. maxframe/tensor/reduction/count_nonzero.py +0 -2
  587. maxframe/tensor/reduction/cumprod.py +0 -2
  588. maxframe/tensor/reduction/cumsum.py +0 -2
  589. maxframe/tensor/reduction/max.py +0 -2
  590. maxframe/tensor/reduction/mean.py +0 -2
  591. maxframe/tensor/reduction/min.py +0 -2
  592. maxframe/tensor/reduction/nanargmax.py +0 -2
  593. maxframe/tensor/reduction/nanargmin.py +0 -2
  594. maxframe/tensor/reduction/nancumprod.py +0 -2
  595. maxframe/tensor/reduction/nancumsum.py +0 -2
  596. maxframe/tensor/reduction/nanmax.py +0 -2
  597. maxframe/tensor/reduction/nanmean.py +0 -2
  598. maxframe/tensor/reduction/nanmin.py +0 -2
  599. maxframe/tensor/reduction/nanprod.py +0 -2
  600. maxframe/tensor/reduction/nanstd.py +0 -2
  601. maxframe/tensor/reduction/nansum.py +0 -2
  602. maxframe/tensor/reduction/nanvar.py +0 -2
  603. maxframe/tensor/reduction/prod.py +0 -2
  604. maxframe/tensor/reduction/std.py +0 -2
  605. maxframe/tensor/reduction/sum.py +0 -2
  606. maxframe/tensor/reduction/tests/test_reduction.py +1 -4
  607. maxframe/tensor/reduction/var.py +0 -2
  608. maxframe/tensor/reshape/__init__.py +0 -2
  609. maxframe/tensor/reshape/reshape.py +6 -5
  610. maxframe/tensor/reshape/tests/__init__.py +0 -2
  611. maxframe/tensor/reshape/tests/test_reshape.py +0 -2
  612. maxframe/tensor/sort/__init__.py +16 -0
  613. maxframe/tensor/sort/argsort.py +150 -0
  614. maxframe/tensor/sort/sort.py +295 -0
  615. maxframe/tensor/special/__init__.py +37 -0
  616. maxframe/tensor/special/core.py +38 -0
  617. maxframe/tensor/special/misc.py +142 -0
  618. maxframe/tensor/special/statistical.py +56 -0
  619. maxframe/tensor/statistics/__init__.py +5 -0
  620. maxframe/tensor/statistics/average.py +143 -0
  621. maxframe/tensor/statistics/bincount.py +133 -0
  622. maxframe/tensor/statistics/quantile.py +10 -8
  623. maxframe/tensor/ufunc/__init__.py +0 -2
  624. maxframe/tensor/ufunc/ufunc.py +0 -2
  625. maxframe/tensor/utils.py +21 -3
  626. maxframe/tests/test_protocol.py +3 -3
  627. maxframe/tests/test_utils.py +210 -1
  628. maxframe/tests/utils.py +59 -1
  629. maxframe/udf.py +76 -6
  630. maxframe/utils.py +418 -17
  631. {maxframe-1.3.1.dist-info → maxframe-2.0.0.dist-info}/METADATA +4 -1
  632. maxframe-2.0.0.dist-info/RECORD +939 -0
  633. {maxframe-1.3.1.dist-info → maxframe-2.0.0.dist-info}/WHEEL +1 -1
  634. maxframe_client/clients/framedriver.py +19 -3
  635. maxframe_client/fetcher.py +113 -6
  636. maxframe_client/session/odps.py +173 -38
  637. maxframe_client/session/task.py +3 -1
  638. maxframe_client/tests/test_session.py +41 -5
  639. maxframe-1.3.1.dist-info/RECORD +0 -705
  640. {maxframe-1.3.1.dist-info → maxframe-2.0.0.dist-info}/top_level.txt +0 -0
@@ -1,5 +1,3 @@
1
- #!/usr/bin/env python
2
- # -*- coding: utf-8 -*-
3
1
  # Copyright 1999-2025 Alibaba Group Holding Ltd.
4
2
  #
5
3
  # Licensed under the Apache License, Version 2.0 (the "License");
@@ -14,6 +12,8 @@
14
12
  # See the License for the specific language governing permissions and
15
13
  # limitations under the License.
16
14
 
15
+ from typing import MutableMapping, Union
16
+
17
17
  import numpy as np
18
18
  import pandas as pd
19
19
 
@@ -58,6 +58,13 @@ class DataFrameFromRecords(DataFrameOperator, DataFrameOperatorMixin):
58
58
  columns_value=columns_value,
59
59
  )
60
60
 
61
+ @classmethod
62
+ def estimate_size(
63
+ cls, ctx: MutableMapping[str, Union[int, float]], op: "DataFrameFromRecords"
64
+ ): # pragma: no cover
65
+ # todo implement this to facilitate local computation
66
+ ctx[op.outputs[0].key] = float("inf")
67
+
61
68
 
62
69
  def from_records(
63
70
  data,
@@ -1,5 +1,3 @@
1
- #!/usr/bin/env python
2
- # -*- coding: utf-8 -*-
3
1
  # Copyright 1999-2025 Alibaba Group Holding Ltd.
4
2
  #
5
3
  # Licensed under the Apache License, Version 2.0 (the "License");
@@ -15,13 +13,13 @@
15
13
  # limitations under the License.
16
14
 
17
15
  from collections import OrderedDict
18
- from typing import Any, Dict, List, Union
16
+ from typing import Any, Dict, List, MutableMapping, Union
19
17
 
20
18
  import numpy as np
21
19
  import pandas as pd
22
20
 
23
21
  from ... import opcodes
24
- from ...core import ENTITY_TYPE, OutputType
22
+ from ...core import ENTITY_TYPE, EntityData, OutputType
25
23
  from ...serialization.serializables import AnyField, KeyField
26
24
  from ...tensor.core import Tensor
27
25
  from ...tensor.datasource import tensor as astensor
@@ -46,24 +44,25 @@ class DataFrameFromTensor(DataFrameOperator, DataFrameOperatorMixin):
46
44
  kwargs["_output_types"] = [OutputType.dataframe]
47
45
  super().__init__(*args, **kwargs)
48
46
 
49
- def _set_inputs(self, inputs: List[EntityType]):
50
- super()._set_inputs(inputs)
51
- inputs_iter = iter(self._inputs)
52
- if self.input is not None:
53
- if not isinstance(self.input, dict):
54
- self.input = next(inputs_iter)
47
+ @classmethod
48
+ def _set_inputs(cls, op: "DataFrameFromTensor", inputs: List[EntityData]):
49
+ super()._set_inputs(op, inputs)
50
+ inputs_iter = iter(op._inputs)
51
+ if op.input is not None:
52
+ if not isinstance(op.input, dict):
53
+ op.input = next(inputs_iter)
55
54
  else:
56
55
  # check each value for input
57
56
  new_input = OrderedDict()
58
- for k, v in self.input.items():
57
+ for k, v in op.input.items():
59
58
  if isinstance(v, ENTITY_TYPE):
60
59
  new_input[k] = next(inputs_iter)
61
60
  else:
62
61
  new_input[k] = v
63
- self.input = new_input
62
+ op.input = new_input
64
63
 
65
- if isinstance(self.index, ENTITY_TYPE):
66
- self.index = next(inputs_iter)
64
+ if isinstance(op.index, ENTITY_TYPE):
65
+ op.index = next(inputs_iter)
67
66
 
68
67
  def __call__(
69
68
  self,
@@ -138,7 +137,11 @@ class DataFrameFromTensor(DataFrameOperator, DataFrameOperatorMixin):
138
137
  )
139
138
  index_value = self._process_index(index, tileables)
140
139
  else:
141
- self.index = index = pd.RangeIndex(0, tileables[0].shape[0])
140
+ if np.isnan(tileables[0].shape[0]):
141
+ index = pd.RangeIndex(0)
142
+ else:
143
+ index = pd.RangeIndex(0, tileables[0].shape[0])
144
+ self.index = index
142
145
  index_value = parse_index(index)
143
146
 
144
147
  if columns is not None:
@@ -260,6 +263,13 @@ class DataFrameFromTensor(DataFrameOperator, DataFrameOperatorMixin):
260
263
  columns_value=columns_value,
261
264
  )
262
265
 
266
+ @classmethod
267
+ def estimate_size(
268
+ cls, ctx: MutableMapping[str, Union[int, float]], op: "DataFrameFromTensor"
269
+ ): # pragma: no cover
270
+ # todo implement this to facilitate local computation
271
+ ctx[op.outputs[0].key] = float("inf")
272
+
263
273
 
264
274
  def dataframe_from_tensor(
265
275
  tensor: Tensor,
@@ -340,12 +350,13 @@ class SeriesFromTensor(DataFrameOperator, DataFrameOperatorMixin):
340
350
  input = KeyField("input")
341
351
  index = AnyField("index")
342
352
 
343
- def _set_inputs(self, inputs: List[EntityType]):
344
- super()._set_inputs(inputs)
345
- if self.input is not None:
346
- self.input = self.inputs[0]
347
- if self.index is not None and hasattr(self.index, "key"):
348
- self.index = self.inputs[-1]
353
+ @classmethod
354
+ def _set_inputs(cls, op: "SeriesFromTensor", inputs: List[EntityData]):
355
+ super()._set_inputs(op, inputs)
356
+ if op.input is not None:
357
+ op.input = op.inputs[0]
358
+ if op.index is not None and hasattr(op.index, "key"):
359
+ op.index = op.inputs[-1]
349
360
 
350
361
  def __call__(
351
362
  self,
@@ -1,5 +1,3 @@
1
- #!/usr/bin/env python
2
- # -*- coding: utf-8 -*-
3
1
  # Copyright 1999-2025 Alibaba Group Holding Ltd.
4
2
  #
5
3
  # Licensed under the Apache License, Version 2.0 (the "License");
@@ -15,6 +13,7 @@
15
13
  # limitations under the License.
16
14
 
17
15
  from io import BytesIO
16
+ from typing import MutableMapping, Union
18
17
  from urllib.parse import urlparse
19
18
 
20
19
  import numpy as np
@@ -129,6 +128,13 @@ class DataFrameReadCSV(
129
128
  chunk_bytes=chunk_bytes,
130
129
  )
131
130
 
131
+ @classmethod
132
+ def estimate_size(
133
+ cls, ctx: MutableMapping[str, Union[int, float]], op: "DataFrameReadCSV"
134
+ ): # pragma: no cover
135
+ # todo implement this to facilitate local computation
136
+ ctx[op.outputs[0].key] = float("inf")
137
+
132
138
 
133
139
  def read_csv(
134
140
  path,
@@ -13,13 +13,18 @@
13
13
  # limitations under the License.
14
14
 
15
15
  import dataclasses
16
+ import functools
17
+ import io
16
18
  import logging
17
19
  import re
18
- from typing import Dict, List, Optional, Tuple, Union
20
+ import tokenize
21
+ from typing import Dict, List, MutableMapping, Optional, Tuple, Union
19
22
 
20
23
  import numpy as np
21
24
  import pandas as pd
22
25
  from odps import ODPS
26
+ from odps.errors import ODPSError
27
+ from odps.models import TableSchema
23
28
  from odps.types import Column, OdpsSchema, validate_data_type
24
29
  from odps.utils import split_sql_by_semicolon
25
30
 
@@ -110,7 +115,15 @@ def _split_explain_string(explain_string: str) -> List[str]:
110
115
  grouped = []
111
116
  for part in parts:
112
117
  part = part.strip("\n")
113
- if grouped and not part.startswith(" "):
118
+ part_line1 = part.split("\n", 1)[0]
119
+ # initial line of part should not start with spaces (Statistics row)
120
+ # or with quote marks
121
+ if (
122
+ grouped
123
+ and not part.startswith(" ")
124
+ and "'" not in part_line1
125
+ and '"' not in part_line1
126
+ ):
114
127
  final_parts.append("\n\n".join(grouped).strip())
115
128
  grouped = []
116
129
  grouped.append(part)
@@ -235,13 +248,18 @@ def _parse_explained_schema(explain_string: str) -> OdpsSchema:
235
248
  return _parse_full_explain(explain_string)
236
249
 
237
250
 
238
- def _build_explain_sql(sql_stmt: str, no_split: bool = False) -> str:
251
+ def _build_explain_sql(
252
+ sql_stmt: str, no_split: bool = False, use_output: bool = False
253
+ ) -> str:
254
+ clause = "EXPLAIN "
255
+ if use_output:
256
+ clause += "OUTPUT "
239
257
  if no_split:
240
- return "EXPLAIN " + sql_stmt
258
+ return clause + sql_stmt
241
259
  sql_parts = split_sql_by_semicolon(sql_stmt)
242
260
  if not sql_parts:
243
261
  raise ValueError(f"Cannot explain SQL statement {sql_stmt}")
244
- sql_parts[-1] = "EXPLAIN " + sql_parts[-1]
262
+ sql_parts[-1] = clause + sql_parts[-1]
245
263
  return "\n".join(sql_parts)
246
264
 
247
265
 
@@ -267,6 +285,15 @@ class DataFrameReadODPSQuery(
267
285
  def set_pruned_columns(self, columns, *, keep_order=None): # pragma: no cover
268
286
  self.columns = columns
269
287
 
288
+ @classmethod
289
+ def estimate_size(
290
+ cls, ctx: MutableMapping[str, Union[int, float]], op: "DataFrameReadODPSQuery"
291
+ ): # pragma: no cover
292
+ # use infinity to show that the size cannot be inferred
293
+ # todo when local catalyst is implemented,
294
+ # a more precise estimation here can be useful then.
295
+ ctx[op.outputs[0].key] = float("inf")
296
+
270
297
  def __call__(self, chunk_bytes=None, chunk_size=None):
271
298
  if is_empty(self.index_columns):
272
299
  index_value = parse_index(pd.RangeIndex(0))
@@ -302,6 +329,73 @@ class DataFrameReadODPSQuery(
302
329
  )
303
330
 
304
331
 
332
+ def _check_token_in_sql(token: str, sql: str) -> bool:
333
+ try:
334
+ names = set()
335
+ for tk_info in tokenize.tokenize(io.BytesIO(sql.encode()).readline):
336
+ if tk_info.type == tokenize.NAME:
337
+ names.add(tk_info.string)
338
+ return token in names
339
+ except: # pragma: no cover
340
+ return False
341
+
342
+
343
+ def _resolve_schema_by_explain(
344
+ odps_entry: ODPS,
345
+ query: str,
346
+ no_split_sql: bool = False,
347
+ hints: Dict[str, str] = None,
348
+ use_explain_output: bool = True,
349
+ ) -> OdpsSchema:
350
+ hints = (hints or dict()).copy()
351
+ hints["odps.sql.select.output.format"] = "json"
352
+ explain_stmt = _build_explain_sql(
353
+ query, no_split=no_split_sql, use_output=use_explain_output
354
+ )
355
+ inst = odps_entry.execute_sql(explain_stmt, hints=hints)
356
+ logger.debug("Explain output instance ID: %s", inst.id)
357
+ explain_str = list(inst.get_task_results().values())[0]
358
+ if use_explain_output:
359
+ if not explain_str or "nothing to explain" in explain_str:
360
+ raise ValueError("The SQL statement should be an instant query")
361
+ return TableSchema.parse(None, explain_str)
362
+ else:
363
+ return _parse_explained_schema(explain_str)
364
+
365
+
366
+ def _resolve_query_schema(
367
+ odps_entry: ODPS,
368
+ query: str,
369
+ no_split_sql: bool = False,
370
+ hints: Dict[str, str] = None,
371
+ use_explain_output: Optional[bool] = None,
372
+ ) -> OdpsSchema:
373
+ methods = []
374
+ if use_explain_output is not False:
375
+ # None or True
376
+ methods.append(_resolve_schema_by_explain)
377
+ if use_explain_output is not True:
378
+ # None or False
379
+ methods.append(
380
+ functools.partial(_resolve_schema_by_explain, use_explain_output=False)
381
+ )
382
+ for idx, resolve_method in enumerate(methods):
383
+ try:
384
+ return resolve_method(
385
+ odps_entry, query, no_split_sql=no_split_sql, hints=hints
386
+ )
387
+ except ODPSError as ex:
388
+ msg = (
389
+ f"Failed to obtain schema from SQL explain: {ex!r}\n"
390
+ f"Explain instance ID: {ex.instance_id}"
391
+ )
392
+ if idx + 1 == len(methods) or "ODPS-0130161" not in str(ex):
393
+ exc = ValueError(msg)
394
+ raise exc.with_traceback(ex.__traceback__) from None
395
+ # will this happen?
396
+ raise ValueError("Failed to obtain schema from SQL explain") # pragma: no cover
397
+
398
+
305
399
  def read_odps_query(
306
400
  query: str,
307
401
  odps_entry: ODPS = None,
@@ -341,6 +435,8 @@ def read_odps_query(
341
435
  DataFrame read from MaxCompute (ODPS) table
342
436
  """
343
437
  no_split_sql = kw.pop("no_split_sql", False)
438
+ # if use_explain_output is None, will try two methods.
439
+ use_explain_output = kw.pop("use_explain_output", None)
344
440
 
345
441
  hints = options.sql.settings.copy() or {}
346
442
  if sql_hints:
@@ -365,24 +461,18 @@ def read_odps_query(
365
461
 
366
462
  col_renames = {}
367
463
  if not skip_schema:
368
- explain_stmt = _build_explain_sql(query, no_split=no_split_sql)
369
- inst = odps_entry.execute_sql(explain_stmt, hints=hints)
370
- logger.debug("Explain instance ID: %s", inst.id)
371
- explain_str = list(inst.get_task_results().values())[0]
372
-
373
- try:
374
- odps_schema = _parse_explained_schema(explain_str)
375
- except BaseException as ex:
376
- exc = ValueError(
377
- f"Failed to obtain schema from SQL explain: {ex!r}"
378
- f"\nExplain instance ID: {inst.id}"
379
- )
380
- raise exc.with_traceback(ex.__traceback__) from None
464
+ odps_schema = _resolve_query_schema(
465
+ odps_entry,
466
+ query,
467
+ no_split_sql=no_split_sql,
468
+ hints=hints,
469
+ use_explain_output=use_explain_output,
470
+ )
381
471
 
382
472
  new_columns = []
383
473
  for col in odps_schema.columns:
384
474
  anon_match = _ANONYMOUS_COL_REGEX.match(col.name)
385
- if anon_match and col.name not in query:
475
+ if anon_match and not _check_token_in_sql(col.name, query):
386
476
  new_name = anonymous_col_prefix + anon_match.group(1)
387
477
  col_renames[col.name] = new_name
388
478
  new_columns.append(Column(new_name, col.type))
@@ -13,7 +13,7 @@
13
13
  # limitations under the License.
14
14
 
15
15
  import logging
16
- from typing import List, Optional, Union
16
+ from typing import List, MutableMapping, Optional, Union
17
17
 
18
18
  import numpy as np
19
19
  import pandas as pd
@@ -34,7 +34,7 @@ from ...serialization.serializables import (
34
34
  SeriesField,
35
35
  StringField,
36
36
  )
37
- from ...utils import is_empty
37
+ from ...utils import estimate_table_size, is_empty
38
38
  from ..core import DataFrame # noqa: F401
39
39
  from ..utils import parse_index
40
40
  from .core import ColumnPruneSupportedDataSourceMixin, IncrementalIndexDatasource
@@ -46,6 +46,7 @@ class DataFrameReadODPSTable(
46
46
  IncrementalIndexDatasource,
47
47
  ColumnPruneSupportedDataSourceMixin,
48
48
  ):
49
+ __slots__ = ("_odps_entry",)
49
50
  _op_type_ = opcodes.READ_ODPS_TABLE
50
51
 
51
52
  table_name = StringField("table_name")
@@ -61,7 +62,8 @@ class DataFrameReadODPSTable(
61
62
  index_dtypes = SeriesField("index_dtypes", default=None)
62
63
 
63
64
  def __init__(self, memory_scale=None, **kw):
64
- output_type = kw.get("output_type", OutputType.dataframe)
65
+ output_type = kw.pop("output_type", OutputType.dataframe)
66
+ self._odps_entry = kw.pop("odps_entry", None)
65
67
  super(DataFrameReadODPSTable, self).__init__(
66
68
  memory_scale=memory_scale, _output_types=[output_type], **kw
67
69
  )
@@ -130,6 +132,18 @@ class DataFrameReadODPSTable(
130
132
  chunk_size=chunk_size,
131
133
  )
132
134
 
135
+ @classmethod
136
+ def estimate_size(
137
+ cls, ctx: MutableMapping[str, Union[int, float]], op: "DataFrameReadODPSTable"
138
+ ) -> None:
139
+ odps_entry = op._odps_entry or ODPS.from_global() or ODPS.from_environments()
140
+ if not odps_entry: # pragma: no cover
141
+ ctx[op.outputs[0].key] = float("inf")
142
+ return
143
+ ctx[op.outputs[0].key] = estimate_table_size(
144
+ odps_entry, op.table_name, op.partitions
145
+ )
146
+
133
147
 
134
148
  def read_odps_table(
135
149
  table_name: Union[str, Table],
@@ -212,7 +226,8 @@ def read_odps_table(
212
226
  index_dtypes = pd.Series(table_index_types, index=index_col)
213
227
 
214
228
  if columns is not None:
215
- table_col_set = set([c.lower() for c in columns])
229
+ new_columns = [c.lower() for c in columns]
230
+ table_col_set = set(new_columns)
216
231
  col_diff = sorted(table_col_set - set(table_columns))
217
232
  if col_diff:
218
233
  raise ValueError(
@@ -223,7 +238,6 @@ def read_odps_table(
223
238
  raise ValueError("Index columns and columns shall not overlap.")
224
239
 
225
240
  # reorder columns
226
- new_columns = [c for c in table_columns if c in table_col_set]
227
241
  df_types = [df_types[table_columns.index(col)] for col in new_columns]
228
242
  table_columns = new_columns
229
243
  columns = new_columns
@@ -253,6 +267,7 @@ def read_odps_table(
253
267
  last_modified_time=to_timestamp(table.last_data_modified_time),
254
268
  index_columns=index_col,
255
269
  index_dtypes=index_dtypes,
270
+ odps_entry=odps_entry,
256
271
  **kw,
257
272
  )
258
273
  return op(shape, chunk_bytes=chunk_bytes, chunk_size=chunk_size)
@@ -1,5 +1,3 @@
1
- #!/usr/bin/env python
2
- # -*- coding: utf-8 -*-
3
1
  # Copyright 1999-2025 Alibaba Group Holding Ltd.
4
2
  #
5
3
  # Licensed under the Apache License, Version 2.0 (the "License");
@@ -15,7 +13,7 @@
15
13
  # limitations under the License.
16
14
 
17
15
  import os
18
- from typing import Dict
16
+ from typing import Dict, MutableMapping, Union
19
17
  from urllib.parse import urlparse
20
18
 
21
19
  import numpy as np
@@ -308,6 +306,13 @@ class DataFrameReadParquet(
308
306
  columns_value=columns_value,
309
307
  )
310
308
 
309
+ @classmethod
310
+ def estimate_size(
311
+ cls, ctx: MutableMapping[str, Union[int, float]], op: "DataFrameReadParquet"
312
+ ): # pragma: no cover
313
+ # todo implement this to facilitate local computation
314
+ ctx[op.outputs[0].key] = float("inf")
315
+
311
316
 
312
317
  def read_parquet(
313
318
  path,
@@ -15,15 +15,19 @@
15
15
  import os
16
16
  import uuid
17
17
  from collections import OrderedDict
18
+ from math import isinf
18
19
 
20
+ import mock
19
21
  import numpy as np
20
22
  import pandas as pd
21
23
  import pytest
22
24
  from odps import ODPS
23
25
  from odps import types as odps_types
26
+ from odps.errors import ODPSError
24
27
 
25
28
  from .... import tensor as mt
26
29
  from ....core import OutputType
30
+ from ....core.operator import estimate_size
27
31
  from ....tests.utils import tn
28
32
  from ....utils import lazy_import
29
33
  from ... import read_odps_query, read_odps_table
@@ -48,6 +52,7 @@ from ..read_odps_query import (
48
52
  ColumnSchema,
49
53
  _parse_full_explain,
50
54
  _parse_simple_explain,
55
+ _resolve_query_schema,
51
56
  _resolve_task_sector,
52
57
  )
53
58
  from ..series import from_pandas as from_pandas_series
@@ -71,6 +76,10 @@ def test_from_pandas_dataframe():
71
76
  assert df.index_value.max_val == 9
72
77
  np.testing.assert_equal(df.columns_value._index_value._data, data.columns.values)
73
78
 
79
+ result_ctx = dict()
80
+ estimate_size(result_ctx, df.op)
81
+ assert result_ctx[df.key] > 0 and not isinf(result_ctx[df.key])
82
+
74
83
  data2 = data[::2]
75
84
  df2 = from_pandas_df(data2, chunk_size=4)
76
85
 
@@ -258,6 +267,10 @@ def test_from_odps_table():
258
267
  ),
259
268
  )
260
269
 
270
+ result_ctx = dict()
271
+ estimate_size(result_ctx, df.op)
272
+ assert result_ctx[df.key] >= 0 and not isinf(result_ctx[df.key])
273
+
261
274
  with pytest.raises(ValueError):
262
275
  read_odps_table(test_table, columns=["col3", "col4"])
263
276
  with pytest.raises(ValueError):
@@ -300,6 +313,7 @@ def test_from_odps_table():
300
313
  ),
301
314
  )
302
315
 
316
+ test_parted_table.create_partition("pt=20240103")
303
317
  df = read_odps_table(
304
318
  test_parted_table, columns=["col1", "col2", "pt"], partitions="pt=20240103"
305
319
  )
@@ -314,6 +328,10 @@ def test_from_odps_table():
314
328
  ),
315
329
  )
316
330
 
331
+ result_ctx = dict()
332
+ estimate_size(result_ctx, df.op)
333
+ assert result_ctx[df.key] >= 0 and not isinf(result_ctx[df.key])
334
+
317
335
  out_idx = read_odps_table(
318
336
  test_table,
319
337
  columns=[],
@@ -345,7 +363,7 @@ def test_from_odps_query():
345
363
 
346
364
  with pytest.raises(ValueError) as err_info:
347
365
  read_odps_query(
348
- f"CREATE TABLE dummy_table_{uuid.uuid4().hex} "
366
+ f"CREATE TABLE dummy_table_{uuid.uuid4().hex} LIFECYCLE 1 "
349
367
  f"AS SELECT * FROM {table1_name}"
350
368
  )
351
369
  assert "instant query" in err_info.value.args[0]
@@ -545,3 +563,64 @@ def test_resolve_multi_join():
545
563
  for col, (exp_nm, exp_tp) in zip(schema.columns, expected_col_types.items()):
546
564
  assert col.name == exp_nm
547
565
  assert col.type == odps_types.validate_data_type(exp_tp)
566
+
567
+
568
+ def test_resolve_break_lines():
569
+ input_path = os.path.join(
570
+ os.path.dirname(__file__), "test-data", "task-input-with-break-line.txt"
571
+ )
572
+ with open(input_path, "r") as f:
573
+ sector = f.read()
574
+
575
+ expected_col_types = {
576
+ "key": "string",
577
+ "value": "string",
578
+ }
579
+
580
+ schema = _parse_full_explain(sector)
581
+ for col, (exp_nm, exp_tp) in zip(schema.columns, expected_col_types.items()):
582
+ assert col.name == exp_nm
583
+ assert col.type == odps_types.validate_data_type(exp_tp)
584
+
585
+
586
+ @pytest.mark.parametrize("use_explain_output", [None, False, True])
587
+ def test_explain_use_explain_output(use_explain_output):
588
+ class MockInstance:
589
+ @property
590
+ def id(self):
591
+ return "mock_id"
592
+
593
+ def get_task_results(self):
594
+ return {"pot": """{"columns":[{"name":"a_bigint","type":"BIGINT"}]}"""}
595
+
596
+ old_execute_sql = ODPS.execute_sql
597
+ exec_count = 0
598
+
599
+ def new_execute_sql(self, sql, *args, **kw):
600
+ nonlocal exec_count
601
+ exec_count += 1
602
+
603
+ if use_explain_output and sql.lower().startswith("explain output select"):
604
+ return MockInstance()
605
+ elif use_explain_output is None and sql.lower().startswith("explain output"):
606
+ raise ODPSError("ODPS-0130161: mock error")
607
+ return old_execute_sql(self, sql, *args, **kw)
608
+
609
+ odps_entry = ODPS.from_environments()
610
+
611
+ with mock.patch("odps.core.ODPS.execute_sql", new=new_execute_sql):
612
+ with pytest.raises(ValueError):
613
+ _resolve_query_schema(
614
+ odps_entry, "not_a_sql", use_explain_output=use_explain_output
615
+ )
616
+ assert exec_count == (2 if use_explain_output is None else 1)
617
+
618
+ exec_count = 0
619
+ schema = _resolve_query_schema(
620
+ odps_entry,
621
+ "select cast(1 as bigint) as a_bigint",
622
+ use_explain_output=use_explain_output,
623
+ )
624
+ assert schema.columns[0].name == "a_bigint"
625
+ assert schema.columns[0].type == odps_types.bigint
626
+ assert exec_count == (2 if use_explain_output is None else 1)
@@ -12,9 +12,10 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
+ import pandas as pd
15
16
  import pytest
16
17
 
17
- from ... import DataFrame
18
+ from ... import DataFrame, Index
18
19
  from ..to_odps import to_odps_table
19
20
 
20
21
 
@@ -23,6 +24,25 @@ def df():
23
24
  return DataFrame({"A": [1, 2], "B": [3, 4]})
24
25
 
25
26
 
27
+ @pytest.fixture
28
+ def df_with_named_index():
29
+ return DataFrame({"A": [1, 2], "B": [3, 4]}, index=Index([1, 2], name="A"))
30
+
31
+
32
+ @pytest.fixture
33
+ def df_with_named_multi_indexes():
34
+ arrays = [
35
+ ["c1", "c2"],
36
+ ["d1", "d2"],
37
+ ["e1", "e2"],
38
+ ]
39
+ multi_index = pd.MultiIndex.from_arrays(arrays, names=("C", "D", "E"))
40
+ return DataFrame(
41
+ {"A": [1, 2], "B": [3, 4]},
42
+ index=multi_index,
43
+ )
44
+
45
+
26
46
  @pytest.mark.parametrize(
27
47
  "kwargs",
28
48
  [
@@ -46,3 +66,34 @@ def test_to_odps_table_validation(df, kwargs):
46
66
  )
47
67
  def test_to_odps_table_vaild(df, kwargs):
48
68
  to_odps_table(df, "test_table", **kwargs)
69
+
70
+
71
+ def test_to_odps_table_column_conflicts(
72
+ df, df_with_named_index, df_with_named_multi_indexes
73
+ ):
74
+ to_odps_table(df.reset_index(), "test_table", index=False)
75
+ to_odps_table(df.reset_index(), "test_table", index_label="C")
76
+ with pytest.raises(ValueError):
77
+ to_odps_table(df.reset_index(), "test_table")
78
+
79
+ to_odps_table(df_with_named_index, "test_table", index=False)
80
+ to_odps_table(df_with_named_index, "test_table", index_label="C")
81
+ with pytest.raises(ValueError):
82
+ to_odps_table(df_with_named_index, "test_table")
83
+
84
+ to_odps_table(df, "test_table", partition="C='1'")
85
+ with pytest.raises(ValueError):
86
+ to_odps_table(df, "test_table", partition="A='1'")
87
+
88
+ with pytest.raises(ValueError):
89
+ to_odps_table(df, "test_table", partition="A='1'")
90
+
91
+ to_odps_table(df_with_named_multi_indexes, "test_table")
92
+ to_odps_table(
93
+ df_with_named_multi_indexes, "test_table", partition="C='1'", index=False
94
+ )
95
+ with pytest.raises(ValueError):
96
+ to_odps_table(df_with_named_multi_indexes, "test_table", partition="C='1'")
97
+
98
+ df_with_named_multi_indexes.index.names = ["C1", "D1", "E1"]
99
+ to_odps_table(df_with_named_multi_indexes, "test_table", partition="C='1'")