maxframe 1.3.0__cp37-cp37m-win32.whl → 2.0.0b1__cp37-cp37m-win32.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of maxframe might be problematic. Click here for more details.

Files changed (643) hide show
  1. maxframe/_utils.cp37-win32.pyd +0 -0
  2. maxframe/_utils.pyi +21 -0
  3. maxframe/_utils.pyx +4 -3
  4. maxframe/codegen/__init__.py +27 -0
  5. maxframe/{codegen.py → codegen/core.py} +49 -43
  6. maxframe/codegen/spe/__init__.py +16 -0
  7. maxframe/codegen/spe/core.py +307 -0
  8. maxframe/codegen/spe/dataframe/__init__.py +37 -0
  9. maxframe/codegen/spe/dataframe/accessors/__init__.py +15 -0
  10. maxframe/codegen/spe/dataframe/accessors/base.py +53 -0
  11. maxframe/codegen/spe/dataframe/accessors/dict_.py +194 -0
  12. maxframe/codegen/spe/dataframe/accessors/list_.py +80 -0
  13. maxframe/codegen/spe/dataframe/arithmetic.py +84 -0
  14. maxframe/codegen/spe/dataframe/datasource.py +181 -0
  15. maxframe/codegen/spe/dataframe/datastore.py +204 -0
  16. maxframe/codegen/spe/dataframe/extensions.py +63 -0
  17. maxframe/codegen/spe/dataframe/fetch.py +26 -0
  18. maxframe/codegen/spe/dataframe/groupby.py +224 -0
  19. maxframe/codegen/spe/dataframe/indexing.py +238 -0
  20. maxframe/codegen/spe/dataframe/merge.py +73 -0
  21. maxframe/codegen/spe/dataframe/misc.py +286 -0
  22. maxframe/codegen/spe/dataframe/missing.py +64 -0
  23. maxframe/codegen/spe/dataframe/reduction.py +160 -0
  24. maxframe/codegen/spe/dataframe/sort.py +83 -0
  25. maxframe/codegen/spe/dataframe/statistics.py +46 -0
  26. maxframe/codegen/spe/dataframe/tests/__init__.py +13 -0
  27. maxframe/codegen/spe/dataframe/tests/accessors/__init__.py +13 -0
  28. maxframe/codegen/spe/dataframe/tests/accessors/test_base.py +33 -0
  29. maxframe/codegen/spe/dataframe/tests/accessors/test_dict.py +310 -0
  30. maxframe/codegen/spe/dataframe/tests/accessors/test_list.py +137 -0
  31. maxframe/codegen/spe/dataframe/tests/indexing/__init__.py +13 -0
  32. maxframe/codegen/spe/dataframe/tests/indexing/conftest.py +58 -0
  33. maxframe/codegen/spe/dataframe/tests/indexing/test_getitem.py +124 -0
  34. maxframe/codegen/spe/dataframe/tests/indexing/test_iloc.py +76 -0
  35. maxframe/codegen/spe/dataframe/tests/indexing/test_indexing.py +39 -0
  36. maxframe/codegen/spe/dataframe/tests/indexing/test_rename.py +51 -0
  37. maxframe/codegen/spe/dataframe/tests/indexing/test_reset_index.py +88 -0
  38. maxframe/codegen/spe/dataframe/tests/indexing/test_sample.py +45 -0
  39. maxframe/codegen/spe/dataframe/tests/indexing/test_set_axis.py +45 -0
  40. maxframe/codegen/spe/dataframe/tests/indexing/test_set_index.py +41 -0
  41. maxframe/codegen/spe/dataframe/tests/indexing/test_setitem.py +46 -0
  42. maxframe/codegen/spe/dataframe/tests/misc/__init__.py +13 -0
  43. maxframe/codegen/spe/dataframe/tests/misc/test_apply.py +133 -0
  44. maxframe/codegen/spe/dataframe/tests/misc/test_drop_duplicates.py +92 -0
  45. maxframe/codegen/spe/dataframe/tests/misc/test_misc.py +234 -0
  46. maxframe/codegen/spe/dataframe/tests/missing/__init__.py +13 -0
  47. maxframe/codegen/spe/dataframe/tests/missing/test_checkna.py +94 -0
  48. maxframe/codegen/spe/dataframe/tests/missing/test_dropna.py +50 -0
  49. maxframe/codegen/spe/dataframe/tests/missing/test_fillna.py +94 -0
  50. maxframe/codegen/spe/dataframe/tests/missing/test_replace.py +45 -0
  51. maxframe/codegen/spe/dataframe/tests/test_arithmetic.py +73 -0
  52. maxframe/codegen/spe/dataframe/tests/test_datasource.py +184 -0
  53. maxframe/codegen/spe/dataframe/tests/test_datastore.py +200 -0
  54. maxframe/codegen/spe/dataframe/tests/test_extensions.py +88 -0
  55. maxframe/codegen/spe/dataframe/tests/test_groupby.py +225 -0
  56. maxframe/codegen/spe/dataframe/tests/test_merge.py +400 -0
  57. maxframe/codegen/spe/dataframe/tests/test_reduction.py +104 -0
  58. maxframe/codegen/spe/dataframe/tests/test_sort.py +159 -0
  59. maxframe/codegen/spe/dataframe/tests/test_statistics.py +70 -0
  60. maxframe/codegen/spe/dataframe/tests/test_tseries.py +29 -0
  61. maxframe/codegen/spe/dataframe/tests/test_value_counts.py +60 -0
  62. maxframe/codegen/spe/dataframe/tests/test_window.py +69 -0
  63. maxframe/codegen/spe/dataframe/tseries.py +46 -0
  64. maxframe/codegen/spe/dataframe/udf.py +62 -0
  65. maxframe/codegen/spe/dataframe/value_counts.py +31 -0
  66. maxframe/codegen/spe/dataframe/window.py +65 -0
  67. maxframe/codegen/spe/learn/__init__.py +15 -0
  68. maxframe/codegen/spe/learn/contrib/__init__.py +15 -0
  69. maxframe/codegen/spe/learn/contrib/lightgbm.py +160 -0
  70. maxframe/codegen/spe/learn/contrib/models.py +41 -0
  71. maxframe/codegen/spe/learn/contrib/pytorch.py +49 -0
  72. maxframe/codegen/spe/learn/contrib/tests/__init__.py +13 -0
  73. maxframe/codegen/spe/learn/contrib/tests/test_lightgbm.py +123 -0
  74. maxframe/codegen/spe/learn/contrib/tests/test_models.py +41 -0
  75. maxframe/codegen/spe/learn/contrib/tests/test_pytorch.py +53 -0
  76. maxframe/codegen/spe/learn/contrib/tests/test_xgboost.py +98 -0
  77. maxframe/codegen/spe/learn/contrib/xgboost.py +152 -0
  78. maxframe/codegen/spe/learn/metrics/__init__.py +15 -0
  79. maxframe/codegen/spe/learn/metrics/_classification.py +120 -0
  80. maxframe/codegen/spe/learn/metrics/tests/__init__.py +13 -0
  81. maxframe/codegen/spe/learn/metrics/tests/test_classification.py +93 -0
  82. maxframe/codegen/spe/learn/model_selection/__init__.py +13 -0
  83. maxframe/codegen/spe/learn/model_selection/tests/__init__.py +13 -0
  84. maxframe/codegen/spe/learn/model_selection/tests/test_split.py +41 -0
  85. maxframe/codegen/spe/learn/preprocessing/__init__.py +15 -0
  86. maxframe/codegen/spe/learn/preprocessing/_data.py +37 -0
  87. maxframe/codegen/spe/learn/preprocessing/_label.py +47 -0
  88. maxframe/codegen/spe/learn/preprocessing/tests/__init__.py +13 -0
  89. maxframe/codegen/spe/learn/preprocessing/tests/test_data.py +31 -0
  90. maxframe/codegen/spe/learn/preprocessing/tests/test_label.py +43 -0
  91. maxframe/codegen/spe/learn/utils/__init__.py +15 -0
  92. maxframe/codegen/spe/learn/utils/checks.py +55 -0
  93. maxframe/codegen/spe/learn/utils/multiclass.py +60 -0
  94. maxframe/codegen/spe/learn/utils/shuffle.py +85 -0
  95. maxframe/codegen/spe/learn/utils/sparsefuncs.py +35 -0
  96. maxframe/codegen/spe/learn/utils/tests/__init__.py +13 -0
  97. maxframe/codegen/spe/learn/utils/tests/test_checks.py +48 -0
  98. maxframe/codegen/spe/learn/utils/tests/test_multiclass.py +52 -0
  99. maxframe/codegen/spe/learn/utils/tests/test_shuffle.py +50 -0
  100. maxframe/codegen/spe/learn/utils/tests/test_sparsefuncs.py +34 -0
  101. maxframe/codegen/spe/learn/utils/tests/test_validation.py +44 -0
  102. maxframe/codegen/spe/learn/utils/validation.py +35 -0
  103. maxframe/codegen/spe/objects.py +26 -0
  104. maxframe/codegen/spe/remote.py +29 -0
  105. maxframe/codegen/spe/tensor/__init__.py +28 -0
  106. maxframe/codegen/spe/tensor/arithmetic.py +95 -0
  107. maxframe/codegen/spe/tensor/core.py +41 -0
  108. maxframe/codegen/spe/tensor/datasource.py +165 -0
  109. maxframe/codegen/spe/tensor/extensions.py +35 -0
  110. maxframe/codegen/spe/tensor/fetch.py +26 -0
  111. maxframe/codegen/spe/tensor/indexing.py +63 -0
  112. maxframe/codegen/spe/tensor/linalg.py +63 -0
  113. maxframe/codegen/spe/tensor/merge.py +31 -0
  114. maxframe/codegen/spe/tensor/misc.py +121 -0
  115. maxframe/codegen/spe/tensor/random.py +29 -0
  116. maxframe/codegen/spe/tensor/reduction.py +39 -0
  117. maxframe/codegen/spe/tensor/reshape.py +26 -0
  118. maxframe/codegen/spe/tensor/sort.py +42 -0
  119. maxframe/codegen/spe/tensor/special.py +35 -0
  120. maxframe/codegen/spe/tensor/statistics.py +24 -0
  121. maxframe/codegen/spe/tensor/tests/__init__.py +13 -0
  122. maxframe/codegen/spe/tensor/tests/test_arithmetic.py +103 -0
  123. maxframe/codegen/spe/tensor/tests/test_datasource.py +99 -0
  124. maxframe/codegen/spe/tensor/tests/test_extensions.py +37 -0
  125. maxframe/codegen/spe/tensor/tests/test_indexing.py +44 -0
  126. maxframe/codegen/spe/tensor/tests/test_linalg.py +38 -0
  127. maxframe/codegen/spe/tensor/tests/test_merge.py +28 -0
  128. maxframe/codegen/spe/tensor/tests/test_misc.py +94 -0
  129. maxframe/codegen/spe/tensor/tests/test_random.py +55 -0
  130. maxframe/codegen/spe/tensor/tests/test_reduction.py +65 -0
  131. maxframe/codegen/spe/tensor/tests/test_reshape.py +39 -0
  132. maxframe/codegen/spe/tensor/tests/test_sort.py +49 -0
  133. maxframe/codegen/spe/tensor/tests/test_special.py +28 -0
  134. maxframe/codegen/spe/tensor/tests/test_statistics.py +29 -0
  135. maxframe/codegen/spe/tests/__init__.py +13 -0
  136. maxframe/codegen/spe/tests/test_remote.py +29 -0
  137. maxframe/codegen/spe/tests/test_spe_codegen.py +141 -0
  138. maxframe/codegen/spe/utils.py +54 -0
  139. maxframe/codegen/tests/__init__.py +13 -0
  140. maxframe/{tests → codegen/tests}/test_codegen.py +3 -5
  141. maxframe/config/__init__.py +1 -1
  142. maxframe/config/config.py +50 -23
  143. maxframe/config/tests/test_config.py +4 -12
  144. maxframe/config/validators.py +5 -0
  145. maxframe/conftest.py +38 -10
  146. maxframe/core/__init__.py +1 -0
  147. maxframe/core/context.py +110 -0
  148. maxframe/core/entity/__init__.py +1 -0
  149. maxframe/core/entity/core.py +0 -7
  150. maxframe/core/entity/objects.py +19 -5
  151. maxframe/core/entity/output_types.py +11 -0
  152. maxframe/core/entity/tests/test_objects.py +11 -12
  153. maxframe/core/entity/tileables.py +3 -1
  154. maxframe/core/entity/utils.py +15 -0
  155. maxframe/core/graph/__init__.py +6 -1
  156. maxframe/core/graph/builder/base.py +5 -1
  157. maxframe/core/graph/core.cp37-win32.pyd +0 -0
  158. maxframe/core/graph/core.pyx +17 -6
  159. maxframe/core/graph/entity.py +18 -6
  160. maxframe/core/operator/__init__.py +8 -3
  161. maxframe/core/operator/base.py +35 -12
  162. maxframe/core/operator/core.py +37 -14
  163. maxframe/core/operator/fetch.py +5 -18
  164. maxframe/core/operator/objects.py +0 -20
  165. maxframe/core/operator/shuffle.py +6 -72
  166. maxframe/dataframe/__init__.py +1 -0
  167. maxframe/dataframe/accessors/datetime_/core.py +7 -4
  168. maxframe/dataframe/accessors/string_/core.py +9 -6
  169. maxframe/dataframe/arithmetic/core.py +31 -20
  170. maxframe/dataframe/arithmetic/tests/test_arithmetic.py +6 -0
  171. maxframe/dataframe/core.py +98 -91
  172. maxframe/dataframe/datasource/core.py +8 -1
  173. maxframe/dataframe/datasource/date_range.py +8 -0
  174. maxframe/dataframe/datasource/from_index.py +9 -5
  175. maxframe/dataframe/datasource/from_records.py +9 -2
  176. maxframe/dataframe/datasource/from_tensor.py +32 -21
  177. maxframe/dataframe/datasource/read_csv.py +8 -2
  178. maxframe/dataframe/datasource/read_odps_query.py +33 -3
  179. maxframe/dataframe/datasource/read_odps_table.py +20 -5
  180. maxframe/dataframe/datasource/read_parquet.py +8 -3
  181. maxframe/dataframe/datasource/tests/test_datasource.py +33 -0
  182. maxframe/dataframe/datastore/tests/test_to_odps.py +52 -1
  183. maxframe/dataframe/datastore/to_csv.py +7 -3
  184. maxframe/dataframe/datastore/to_odps.py +42 -6
  185. maxframe/dataframe/extensions/__init__.py +6 -1
  186. maxframe/dataframe/extensions/apply_chunk.py +96 -136
  187. maxframe/dataframe/extensions/flatjson.py +3 -2
  188. maxframe/dataframe/extensions/flatmap.py +15 -7
  189. maxframe/dataframe/fetch/core.py +12 -1
  190. maxframe/dataframe/groupby/__init__.py +7 -0
  191. maxframe/dataframe/groupby/aggregation.py +62 -9
  192. maxframe/dataframe/groupby/apply.py +50 -74
  193. maxframe/dataframe/groupby/apply_chunk.py +393 -0
  194. maxframe/dataframe/groupby/core.py +80 -17
  195. maxframe/dataframe/groupby/extensions.py +26 -0
  196. maxframe/dataframe/groupby/fill.py +9 -4
  197. maxframe/dataframe/groupby/sample.py +7 -7
  198. maxframe/dataframe/groupby/tests/test_groupby.py +3 -3
  199. maxframe/dataframe/groupby/transform.py +57 -54
  200. maxframe/dataframe/indexing/align.py +7 -6
  201. maxframe/dataframe/indexing/getitem.py +9 -8
  202. maxframe/dataframe/indexing/iloc.py +28 -23
  203. maxframe/dataframe/indexing/insert.py +7 -3
  204. maxframe/dataframe/indexing/loc.py +9 -8
  205. maxframe/dataframe/indexing/reindex.py +36 -30
  206. maxframe/dataframe/indexing/rename_axis.py +18 -10
  207. maxframe/dataframe/indexing/reset_index.py +0 -2
  208. maxframe/dataframe/indexing/sample.py +13 -9
  209. maxframe/dataframe/indexing/set_axis.py +9 -6
  210. maxframe/dataframe/indexing/setitem.py +8 -5
  211. maxframe/dataframe/indexing/where.py +12 -9
  212. maxframe/dataframe/merge/__init__.py +0 -1
  213. maxframe/dataframe/merge/concat.py +10 -31
  214. maxframe/dataframe/merge/merge.py +2 -24
  215. maxframe/dataframe/misc/__init__.py +6 -0
  216. maxframe/dataframe/misc/_duplicate.py +7 -3
  217. maxframe/dataframe/misc/apply.py +106 -139
  218. maxframe/dataframe/misc/astype.py +3 -2
  219. maxframe/dataframe/misc/case_when.py +11 -7
  220. maxframe/dataframe/misc/cut.py +11 -10
  221. maxframe/dataframe/misc/describe.py +7 -3
  222. maxframe/dataframe/misc/drop.py +13 -11
  223. maxframe/dataframe/misc/eval.py +0 -2
  224. maxframe/dataframe/misc/get_dummies.py +78 -49
  225. maxframe/dataframe/misc/isin.py +13 -10
  226. maxframe/dataframe/misc/map.py +21 -6
  227. maxframe/dataframe/misc/melt.py +8 -1
  228. maxframe/dataframe/misc/pivot.py +232 -0
  229. maxframe/dataframe/misc/pivot_table.py +52 -40
  230. maxframe/dataframe/misc/rechunk.py +59 -0
  231. maxframe/dataframe/misc/shift.py +7 -4
  232. maxframe/dataframe/misc/stack.py +5 -3
  233. maxframe/dataframe/misc/tests/test_misc.py +167 -1
  234. maxframe/dataframe/misc/transform.py +63 -65
  235. maxframe/dataframe/misc/value_counts.py +7 -4
  236. maxframe/dataframe/missing/dropna.py +16 -7
  237. maxframe/dataframe/missing/fillna.py +18 -10
  238. maxframe/dataframe/missing/replace.py +10 -6
  239. maxframe/dataframe/missing/tests/test_missing.py +2 -2
  240. maxframe/dataframe/operators.py +1 -27
  241. maxframe/dataframe/reduction/aggregation.py +128 -3
  242. maxframe/dataframe/reduction/core.py +20 -6
  243. maxframe/dataframe/reduction/median.py +1 -1
  244. maxframe/dataframe/reduction/tests/test_reduction.py +33 -0
  245. maxframe/dataframe/reduction/unique.py +53 -7
  246. maxframe/dataframe/statistics/corr.py +9 -6
  247. maxframe/dataframe/statistics/quantile.py +9 -6
  248. maxframe/dataframe/tseries/to_datetime.py +6 -4
  249. maxframe/dataframe/utils.py +219 -31
  250. maxframe/dataframe/window/rolling.py +7 -4
  251. maxframe/env.py +1 -0
  252. maxframe/errors.py +9 -0
  253. maxframe/extension.py +13 -2
  254. maxframe/io/objects/core.py +67 -51
  255. maxframe/io/objects/tensor.py +73 -17
  256. maxframe/io/objects/tests/test_object_io.py +8 -55
  257. maxframe/io/odpsio/arrow.py +15 -2
  258. maxframe/io/odpsio/schema.py +43 -13
  259. maxframe/io/odpsio/tableio.py +63 -11
  260. maxframe/io/odpsio/tests/test_arrow.py +1 -2
  261. maxframe/io/odpsio/tests/test_schema.py +114 -1
  262. maxframe/io/odpsio/tests/test_tableio.py +42 -0
  263. maxframe/io/odpsio/tests/test_volumeio.py +22 -48
  264. maxframe/learn/__init__.py +2 -2
  265. maxframe/learn/contrib/__init__.py +2 -2
  266. maxframe/learn/contrib/graph/connected_components.py +2 -1
  267. maxframe/learn/contrib/lightgbm/__init__.py +33 -0
  268. maxframe/learn/contrib/lightgbm/_predict.py +138 -0
  269. maxframe/learn/contrib/lightgbm/_train.py +163 -0
  270. maxframe/learn/contrib/lightgbm/callback.py +114 -0
  271. maxframe/learn/contrib/lightgbm/classifier.py +199 -0
  272. maxframe/learn/contrib/lightgbm/core.py +372 -0
  273. maxframe/learn/contrib/lightgbm/dataset.py +153 -0
  274. maxframe/learn/contrib/lightgbm/regressor.py +29 -0
  275. maxframe/learn/contrib/lightgbm/tests/__init__.py +13 -0
  276. maxframe/learn/contrib/lightgbm/tests/test_callback.py +58 -0
  277. maxframe/learn/contrib/llm/models/dashscope.py +34 -0
  278. maxframe/learn/contrib/llm/models/managed.py +15 -0
  279. maxframe/learn/contrib/llm/multi_modal.py +92 -0
  280. maxframe/learn/contrib/llm/text.py +21 -5
  281. maxframe/learn/contrib/models.py +38 -9
  282. maxframe/learn/contrib/utils.py +55 -0
  283. maxframe/learn/contrib/xgboost/callback.py +86 -0
  284. maxframe/learn/contrib/xgboost/classifier.py +26 -30
  285. maxframe/learn/contrib/xgboost/core.py +53 -42
  286. maxframe/learn/contrib/xgboost/dmatrix.py +19 -12
  287. maxframe/learn/contrib/xgboost/predict.py +13 -8
  288. maxframe/learn/contrib/xgboost/regressor.py +28 -27
  289. maxframe/learn/contrib/xgboost/tests/test_callback.py +41 -0
  290. maxframe/learn/contrib/xgboost/train.py +59 -16
  291. maxframe/learn/core.py +252 -0
  292. maxframe/learn/datasets/__init__.py +20 -0
  293. maxframe/learn/datasets/samples_generator.py +628 -0
  294. maxframe/learn/linear_model/__init__.py +15 -0
  295. maxframe/learn/linear_model/_base.py +163 -0
  296. maxframe/learn/linear_model/_lin_reg.py +175 -0
  297. maxframe/learn/metrics/__init__.py +25 -0
  298. maxframe/learn/metrics/_check_targets.py +95 -0
  299. maxframe/learn/metrics/_classification.py +1121 -0
  300. maxframe/learn/metrics/_regression.py +256 -0
  301. maxframe/learn/model_selection/__init__.py +15 -0
  302. maxframe/learn/model_selection/_split.py +451 -0
  303. maxframe/learn/model_selection/tests/__init__.py +13 -0
  304. maxframe/learn/model_selection/tests/test_split.py +156 -0
  305. maxframe/learn/preprocessing/__init__.py +16 -0
  306. maxframe/learn/preprocessing/_data/__init__.py +17 -0
  307. maxframe/learn/preprocessing/_data/min_max_scaler.py +390 -0
  308. maxframe/learn/preprocessing/_data/normalize.py +127 -0
  309. maxframe/learn/preprocessing/_data/standard_scaler.py +503 -0
  310. maxframe/learn/preprocessing/_data/utils.py +79 -0
  311. maxframe/learn/preprocessing/_label/__init__.py +16 -0
  312. maxframe/learn/preprocessing/_label/_label_binarizer.py +599 -0
  313. maxframe/learn/preprocessing/_label/_label_encoder.py +174 -0
  314. maxframe/learn/utils/__init__.py +4 -0
  315. maxframe/learn/utils/_encode.py +314 -0
  316. maxframe/learn/utils/checks.py +161 -0
  317. maxframe/learn/utils/core.py +33 -0
  318. maxframe/learn/utils/extmath.py +176 -0
  319. maxframe/learn/utils/multiclass.py +292 -0
  320. maxframe/learn/utils/shuffle.py +114 -0
  321. maxframe/learn/utils/sparsefuncs.py +87 -0
  322. maxframe/learn/utils/validation.py +775 -0
  323. maxframe/lib/__init__.py +0 -2
  324. maxframe/lib/compat.py +145 -0
  325. maxframe/lib/filesystem/_oss_lib/glob.py +1 -1
  326. maxframe/lib/mmh3.cp37-win32.pyd +0 -0
  327. maxframe/lib/sparse/__init__.py +10 -15
  328. maxframe/lib/sparse/array.py +45 -33
  329. maxframe/lib/sparse/core.py +0 -2
  330. maxframe/lib/sparse/linalg.py +31 -0
  331. maxframe/lib/sparse/matrix.py +5 -2
  332. maxframe/lib/sparse/tests/__init__.py +0 -2
  333. maxframe/lib/sparse/tests/test_sparse.py +53 -53
  334. maxframe/lib/sparse/vector.py +0 -2
  335. maxframe/mixin.py +59 -2
  336. maxframe/opcodes.py +13 -5
  337. maxframe/protocol.py +67 -14
  338. maxframe/remote/core.py +16 -14
  339. maxframe/remote/run_script.py +6 -3
  340. maxframe/serialization/__init__.py +2 -0
  341. maxframe/serialization/core.cp37-win32.pyd +0 -0
  342. maxframe/serialization/core.pxd +3 -0
  343. maxframe/serialization/core.pyi +3 -1
  344. maxframe/serialization/core.pyx +82 -4
  345. maxframe/serialization/pandas.py +5 -1
  346. maxframe/serialization/serializables/core.py +6 -5
  347. maxframe/serialization/serializables/field.py +2 -2
  348. maxframe/serialization/serializables/tests/test_field_type.py +3 -5
  349. maxframe/serialization/tests/test_serial.py +27 -0
  350. maxframe/session.py +4 -71
  351. maxframe/sperunner.py +165 -0
  352. maxframe/tensor/__init__.py +35 -2
  353. maxframe/tensor/arithmetic/__init__.py +2 -4
  354. maxframe/tensor/arithmetic/abs.py +0 -2
  355. maxframe/tensor/arithmetic/absolute.py +0 -2
  356. maxframe/tensor/arithmetic/add.py +34 -4
  357. maxframe/tensor/arithmetic/angle.py +0 -2
  358. maxframe/tensor/arithmetic/arccos.py +1 -4
  359. maxframe/tensor/arithmetic/arccosh.py +1 -3
  360. maxframe/tensor/arithmetic/arcsin.py +0 -2
  361. maxframe/tensor/arithmetic/arcsinh.py +0 -2
  362. maxframe/tensor/arithmetic/arctan.py +0 -2
  363. maxframe/tensor/arithmetic/arctan2.py +0 -2
  364. maxframe/tensor/arithmetic/arctanh.py +0 -2
  365. maxframe/tensor/arithmetic/around.py +0 -2
  366. maxframe/tensor/arithmetic/bitand.py +0 -2
  367. maxframe/tensor/arithmetic/bitor.py +1 -3
  368. maxframe/tensor/arithmetic/bitxor.py +1 -3
  369. maxframe/tensor/arithmetic/cbrt.py +0 -2
  370. maxframe/tensor/arithmetic/ceil.py +0 -2
  371. maxframe/tensor/arithmetic/clip.py +13 -13
  372. maxframe/tensor/arithmetic/conj.py +0 -2
  373. maxframe/tensor/arithmetic/copysign.py +0 -2
  374. maxframe/tensor/arithmetic/core.py +47 -39
  375. maxframe/tensor/arithmetic/cos.py +1 -3
  376. maxframe/tensor/arithmetic/cosh.py +0 -2
  377. maxframe/tensor/arithmetic/deg2rad.py +0 -2
  378. maxframe/tensor/arithmetic/degrees.py +0 -2
  379. maxframe/tensor/arithmetic/divide.py +0 -2
  380. maxframe/tensor/arithmetic/equal.py +0 -2
  381. maxframe/tensor/arithmetic/exp.py +1 -3
  382. maxframe/tensor/arithmetic/exp2.py +0 -2
  383. maxframe/tensor/arithmetic/expm1.py +0 -2
  384. maxframe/tensor/arithmetic/fabs.py +0 -2
  385. maxframe/tensor/arithmetic/fix.py +0 -2
  386. maxframe/tensor/arithmetic/float_power.py +0 -2
  387. maxframe/tensor/arithmetic/floor.py +0 -2
  388. maxframe/tensor/arithmetic/floordiv.py +0 -2
  389. maxframe/tensor/arithmetic/fmax.py +0 -2
  390. maxframe/tensor/arithmetic/fmin.py +0 -2
  391. maxframe/tensor/arithmetic/fmod.py +0 -2
  392. maxframe/tensor/arithmetic/frexp.py +6 -2
  393. maxframe/tensor/arithmetic/greater.py +0 -2
  394. maxframe/tensor/arithmetic/greater_equal.py +0 -2
  395. maxframe/tensor/arithmetic/hypot.py +0 -2
  396. maxframe/tensor/arithmetic/i0.py +1 -3
  397. maxframe/tensor/arithmetic/imag.py +0 -2
  398. maxframe/tensor/arithmetic/invert.py +1 -3
  399. maxframe/tensor/arithmetic/isclose.py +0 -2
  400. maxframe/tensor/arithmetic/iscomplex.py +0 -2
  401. maxframe/tensor/arithmetic/isfinite.py +1 -3
  402. maxframe/tensor/arithmetic/isinf.py +0 -2
  403. maxframe/tensor/arithmetic/isnan.py +0 -2
  404. maxframe/tensor/arithmetic/isreal.py +0 -2
  405. maxframe/tensor/arithmetic/ldexp.py +0 -2
  406. maxframe/tensor/arithmetic/less.py +0 -2
  407. maxframe/tensor/arithmetic/less_equal.py +0 -2
  408. maxframe/tensor/arithmetic/log.py +1 -3
  409. maxframe/tensor/arithmetic/log10.py +1 -3
  410. maxframe/tensor/arithmetic/log1p.py +1 -3
  411. maxframe/tensor/arithmetic/log2.py +1 -3
  412. maxframe/tensor/arithmetic/logaddexp.py +0 -2
  413. maxframe/tensor/arithmetic/logaddexp2.py +0 -2
  414. maxframe/tensor/arithmetic/logical_and.py +0 -2
  415. maxframe/tensor/arithmetic/logical_not.py +1 -3
  416. maxframe/tensor/arithmetic/logical_or.py +0 -2
  417. maxframe/tensor/arithmetic/logical_xor.py +0 -2
  418. maxframe/tensor/arithmetic/lshift.py +0 -2
  419. maxframe/tensor/arithmetic/maximum.py +0 -2
  420. maxframe/tensor/arithmetic/minimum.py +0 -2
  421. maxframe/tensor/arithmetic/mod.py +0 -2
  422. maxframe/tensor/arithmetic/modf.py +6 -2
  423. maxframe/tensor/arithmetic/multiply.py +37 -4
  424. maxframe/tensor/arithmetic/nan_to_num.py +0 -2
  425. maxframe/tensor/arithmetic/negative.py +0 -2
  426. maxframe/tensor/arithmetic/nextafter.py +0 -2
  427. maxframe/tensor/arithmetic/not_equal.py +0 -2
  428. maxframe/tensor/arithmetic/positive.py +0 -2
  429. maxframe/tensor/arithmetic/power.py +0 -2
  430. maxframe/tensor/arithmetic/rad2deg.py +0 -2
  431. maxframe/tensor/arithmetic/radians.py +0 -2
  432. maxframe/tensor/arithmetic/real.py +0 -2
  433. maxframe/tensor/arithmetic/reciprocal.py +5 -3
  434. maxframe/tensor/arithmetic/rint.py +1 -3
  435. maxframe/tensor/arithmetic/rshift.py +0 -2
  436. maxframe/tensor/arithmetic/setimag.py +0 -2
  437. maxframe/tensor/arithmetic/setreal.py +0 -2
  438. maxframe/tensor/arithmetic/sign.py +0 -2
  439. maxframe/tensor/arithmetic/signbit.py +0 -2
  440. maxframe/tensor/arithmetic/sin.py +0 -2
  441. maxframe/tensor/arithmetic/sinc.py +1 -3
  442. maxframe/tensor/arithmetic/sinh.py +0 -2
  443. maxframe/tensor/arithmetic/spacing.py +0 -2
  444. maxframe/tensor/arithmetic/sqrt.py +0 -2
  445. maxframe/tensor/arithmetic/square.py +0 -2
  446. maxframe/tensor/arithmetic/subtract.py +4 -2
  447. maxframe/tensor/arithmetic/tan.py +0 -2
  448. maxframe/tensor/arithmetic/tanh.py +0 -2
  449. maxframe/tensor/arithmetic/tests/__init__.py +0 -2
  450. maxframe/tensor/arithmetic/tests/test_arithmetic.py +43 -9
  451. maxframe/tensor/arithmetic/truediv.py +0 -2
  452. maxframe/tensor/arithmetic/trunc.py +0 -2
  453. maxframe/tensor/arithmetic/utils.py +32 -6
  454. maxframe/tensor/array_utils.py +3 -25
  455. maxframe/tensor/core.py +6 -6
  456. maxframe/tensor/datasource/__init__.py +10 -2
  457. maxframe/tensor/datasource/arange.py +0 -2
  458. maxframe/tensor/datasource/array.py +3 -22
  459. maxframe/tensor/datasource/core.py +15 -10
  460. maxframe/tensor/datasource/diag.py +140 -0
  461. maxframe/tensor/datasource/diagflat.py +69 -0
  462. maxframe/tensor/datasource/empty.py +0 -2
  463. maxframe/tensor/datasource/eye.py +95 -0
  464. maxframe/tensor/datasource/from_dataframe.py +0 -2
  465. maxframe/tensor/datasource/from_dense.py +0 -17
  466. maxframe/tensor/datasource/from_sparse.py +0 -2
  467. maxframe/tensor/datasource/full.py +0 -2
  468. maxframe/tensor/datasource/identity.py +54 -0
  469. maxframe/tensor/datasource/indices.py +115 -0
  470. maxframe/tensor/datasource/linspace.py +140 -0
  471. maxframe/tensor/datasource/meshgrid.py +135 -0
  472. maxframe/tensor/datasource/ones.py +8 -3
  473. maxframe/tensor/datasource/tests/test_datasource.py +32 -1
  474. maxframe/tensor/datasource/tri_array.py +107 -0
  475. maxframe/tensor/datasource/zeros.py +7 -3
  476. maxframe/tensor/extensions/__init__.py +31 -0
  477. maxframe/tensor/extensions/accessor.py +25 -0
  478. maxframe/tensor/extensions/apply_chunk.py +137 -0
  479. maxframe/tensor/indexing/__init__.py +1 -1
  480. maxframe/tensor/indexing/choose.py +8 -6
  481. maxframe/tensor/indexing/compress.py +0 -2
  482. maxframe/tensor/indexing/extract.py +0 -2
  483. maxframe/tensor/indexing/fill_diagonal.py +9 -6
  484. maxframe/tensor/indexing/flatnonzero.py +1 -3
  485. maxframe/tensor/indexing/getitem.py +10 -43
  486. maxframe/tensor/indexing/nonzero.py +2 -4
  487. maxframe/tensor/indexing/setitem.py +19 -9
  488. maxframe/tensor/indexing/slice.py +6 -3
  489. maxframe/tensor/indexing/take.py +0 -2
  490. maxframe/tensor/indexing/tests/__init__.py +0 -2
  491. maxframe/tensor/indexing/tests/test_indexing.py +0 -2
  492. maxframe/tensor/indexing/unravel_index.py +6 -6
  493. maxframe/tensor/lib/__init__.py +16 -0
  494. maxframe/tensor/lib/index_tricks.py +404 -0
  495. maxframe/tensor/linalg/__init__.py +36 -0
  496. maxframe/tensor/linalg/dot.py +145 -0
  497. maxframe/tensor/linalg/inner.py +36 -0
  498. maxframe/tensor/linalg/inv.py +83 -0
  499. maxframe/tensor/linalg/lu.py +115 -0
  500. maxframe/tensor/linalg/matmul.py +225 -0
  501. maxframe/tensor/linalg/qr.py +124 -0
  502. maxframe/tensor/linalg/solve_triangular.py +103 -0
  503. maxframe/tensor/linalg/svd.py +167 -0
  504. maxframe/tensor/linalg/tensordot.py +213 -0
  505. maxframe/tensor/linalg/vdot.py +73 -0
  506. maxframe/tensor/merge/__init__.py +4 -0
  507. maxframe/tensor/merge/append.py +74 -0
  508. maxframe/tensor/merge/column_stack.py +63 -0
  509. maxframe/tensor/merge/concatenate.py +3 -2
  510. maxframe/tensor/merge/dstack.py +71 -0
  511. maxframe/tensor/merge/hstack.py +70 -0
  512. maxframe/tensor/merge/stack.py +0 -2
  513. maxframe/tensor/merge/tests/test_merge.py +0 -2
  514. maxframe/tensor/misc/__init__.py +18 -5
  515. maxframe/tensor/misc/astype.py +10 -8
  516. maxframe/tensor/misc/broadcast_to.py +1 -1
  517. maxframe/tensor/misc/copy.py +64 -0
  518. maxframe/tensor/misc/diff.py +115 -0
  519. maxframe/tensor/misc/flatten.py +63 -0
  520. maxframe/tensor/misc/in1d.py +94 -0
  521. maxframe/tensor/misc/isin.py +130 -0
  522. maxframe/tensor/misc/ndim.py +53 -0
  523. maxframe/tensor/misc/ravel.py +0 -2
  524. maxframe/tensor/misc/repeat.py +129 -0
  525. maxframe/tensor/misc/searchsorted.py +147 -0
  526. maxframe/tensor/misc/setdiff1d.py +58 -0
  527. maxframe/tensor/misc/squeeze.py +117 -0
  528. maxframe/tensor/misc/swapaxes.py +113 -0
  529. maxframe/tensor/misc/tests/test_misc.py +0 -2
  530. maxframe/tensor/misc/transpose.py +8 -4
  531. maxframe/tensor/misc/trapezoid.py +123 -0
  532. maxframe/tensor/misc/unique.py +0 -1
  533. maxframe/tensor/misc/where.py +10 -8
  534. maxframe/tensor/operators.py +0 -34
  535. maxframe/tensor/random/__init__.py +3 -5
  536. maxframe/tensor/random/binomial.py +0 -2
  537. maxframe/tensor/random/bytes.py +0 -2
  538. maxframe/tensor/random/chisquare.py +0 -2
  539. maxframe/tensor/random/choice.py +9 -8
  540. maxframe/tensor/random/core.py +20 -5
  541. maxframe/tensor/random/dirichlet.py +0 -2
  542. maxframe/tensor/random/exponential.py +0 -2
  543. maxframe/tensor/random/f.py +2 -4
  544. maxframe/tensor/random/gamma.py +0 -2
  545. maxframe/tensor/random/geometric.py +0 -2
  546. maxframe/tensor/random/gumbel.py +0 -2
  547. maxframe/tensor/random/hypergeometric.py +0 -2
  548. maxframe/tensor/random/laplace.py +2 -4
  549. maxframe/tensor/random/logistic.py +0 -2
  550. maxframe/tensor/random/lognormal.py +0 -2
  551. maxframe/tensor/random/logseries.py +0 -2
  552. maxframe/tensor/random/multinomial.py +0 -2
  553. maxframe/tensor/random/multivariate_normal.py +0 -2
  554. maxframe/tensor/random/negative_binomial.py +0 -2
  555. maxframe/tensor/random/noncentral_chisquare.py +0 -2
  556. maxframe/tensor/random/noncentral_f.py +1 -3
  557. maxframe/tensor/random/normal.py +0 -2
  558. maxframe/tensor/random/pareto.py +0 -2
  559. maxframe/tensor/random/permutation.py +6 -3
  560. maxframe/tensor/random/poisson.py +0 -2
  561. maxframe/tensor/random/power.py +0 -2
  562. maxframe/tensor/random/rand.py +0 -2
  563. maxframe/tensor/random/randint.py +0 -2
  564. maxframe/tensor/random/randn.py +0 -2
  565. maxframe/tensor/random/random_integers.py +0 -2
  566. maxframe/tensor/random/random_sample.py +0 -2
  567. maxframe/tensor/random/rayleigh.py +0 -2
  568. maxframe/tensor/random/standard_cauchy.py +0 -2
  569. maxframe/tensor/random/standard_exponential.py +0 -2
  570. maxframe/tensor/random/standard_gamma.py +0 -2
  571. maxframe/tensor/random/standard_normal.py +0 -2
  572. maxframe/tensor/random/standard_t.py +0 -2
  573. maxframe/tensor/random/tests/__init__.py +0 -2
  574. maxframe/tensor/random/tests/test_random.py +0 -2
  575. maxframe/tensor/random/triangular.py +0 -2
  576. maxframe/tensor/random/uniform.py +0 -2
  577. maxframe/tensor/random/vonmises.py +0 -2
  578. maxframe/tensor/random/wald.py +0 -2
  579. maxframe/tensor/random/weibull.py +0 -2
  580. maxframe/tensor/random/zipf.py +0 -2
  581. maxframe/tensor/reduction/__init__.py +0 -2
  582. maxframe/tensor/reduction/all.py +0 -2
  583. maxframe/tensor/reduction/allclose.py +0 -2
  584. maxframe/tensor/reduction/any.py +0 -2
  585. maxframe/tensor/reduction/argmax.py +1 -3
  586. maxframe/tensor/reduction/argmin.py +1 -3
  587. maxframe/tensor/reduction/array_equal.py +0 -2
  588. maxframe/tensor/reduction/core.py +0 -2
  589. maxframe/tensor/reduction/count_nonzero.py +0 -2
  590. maxframe/tensor/reduction/cumprod.py +0 -2
  591. maxframe/tensor/reduction/cumsum.py +0 -2
  592. maxframe/tensor/reduction/max.py +0 -2
  593. maxframe/tensor/reduction/mean.py +0 -2
  594. maxframe/tensor/reduction/min.py +0 -2
  595. maxframe/tensor/reduction/nanargmax.py +0 -2
  596. maxframe/tensor/reduction/nanargmin.py +0 -2
  597. maxframe/tensor/reduction/nancumprod.py +0 -2
  598. maxframe/tensor/reduction/nancumsum.py +0 -2
  599. maxframe/tensor/reduction/nanmax.py +0 -2
  600. maxframe/tensor/reduction/nanmean.py +0 -2
  601. maxframe/tensor/reduction/nanmin.py +0 -2
  602. maxframe/tensor/reduction/nanprod.py +0 -2
  603. maxframe/tensor/reduction/nanstd.py +0 -2
  604. maxframe/tensor/reduction/nansum.py +0 -2
  605. maxframe/tensor/reduction/nanvar.py +0 -2
  606. maxframe/tensor/reduction/prod.py +0 -2
  607. maxframe/tensor/reduction/std.py +0 -2
  608. maxframe/tensor/reduction/sum.py +0 -2
  609. maxframe/tensor/reduction/tests/test_reduction.py +1 -4
  610. maxframe/tensor/reduction/var.py +0 -2
  611. maxframe/tensor/reshape/__init__.py +0 -2
  612. maxframe/tensor/reshape/reshape.py +6 -5
  613. maxframe/tensor/reshape/tests/__init__.py +0 -2
  614. maxframe/tensor/reshape/tests/test_reshape.py +0 -2
  615. maxframe/tensor/sort/__init__.py +16 -0
  616. maxframe/tensor/sort/argsort.py +150 -0
  617. maxframe/tensor/sort/sort.py +295 -0
  618. maxframe/tensor/special/__init__.py +37 -0
  619. maxframe/tensor/special/core.py +38 -0
  620. maxframe/tensor/special/misc.py +142 -0
  621. maxframe/tensor/special/statistical.py +56 -0
  622. maxframe/tensor/statistics/__init__.py +5 -0
  623. maxframe/tensor/statistics/average.py +143 -0
  624. maxframe/tensor/statistics/bincount.py +133 -0
  625. maxframe/tensor/statistics/quantile.py +10 -8
  626. maxframe/tensor/ufunc/__init__.py +0 -2
  627. maxframe/tensor/ufunc/ufunc.py +0 -2
  628. maxframe/tensor/utils.py +21 -3
  629. maxframe/tests/test_protocol.py +3 -3
  630. maxframe/tests/test_utils.py +210 -1
  631. maxframe/tests/utils.py +67 -1
  632. maxframe/udf.py +76 -6
  633. maxframe/utils.py +418 -17
  634. {maxframe-1.3.0.dist-info → maxframe-2.0.0b1.dist-info}/METADATA +5 -1
  635. maxframe-2.0.0b1.dist-info/RECORD +939 -0
  636. maxframe_client/clients/framedriver.py +19 -3
  637. maxframe_client/fetcher.py +113 -6
  638. maxframe_client/session/odps.py +173 -38
  639. maxframe_client/session/task.py +3 -1
  640. maxframe_client/tests/test_session.py +41 -5
  641. maxframe-1.3.0.dist-info/RECORD +0 -705
  642. {maxframe-1.3.0.dist-info → maxframe-2.0.0b1.dist-info}/WHEEL +0 -0
  643. {maxframe-1.3.0.dist-info → maxframe-2.0.0b1.dist-info}/top_level.txt +0 -0
@@ -21,13 +21,15 @@ from odps import types as odps_types
21
21
  from .... import dataframe as md
22
22
  from .... import tensor as mt
23
23
  from ....core import OutputType
24
- from ....lib.dtypes_extension import dict_
24
+ from ....lib.dtypes_extension import ArrowDtype, dict_, list_
25
25
  from ....utils import pd_release_version
26
26
  from ..schema import (
27
27
  arrow_schema_to_odps_schema,
28
28
  build_dataframe_table_meta,
29
29
  build_table_column_name,
30
30
  odps_schema_to_arrow_schema,
31
+ odps_schema_to_pandas_dtypes,
32
+ pandas_dtypes_to_arrow_schema,
31
33
  pandas_to_odps_schema,
32
34
  pandas_types_to_arrow_schema,
33
35
  )
@@ -42,6 +44,8 @@ def _wrap_maxframe_obj(obj, wrap="no"):
42
44
  obj = md.Series(obj)
43
45
  elif isinstance(obj, pd.Index):
44
46
  obj = md.Index(obj)
47
+ elif isinstance(obj, np.ndarray):
48
+ obj = mt.tensor(obj)
45
49
  else:
46
50
  obj = mt.scalar(obj)
47
51
  if wrap == "data":
@@ -191,6 +195,24 @@ def test_pandas_to_odps_schema_scalar(wrap_obj):
191
195
  assert meta.pd_index_level_names == [None]
192
196
 
193
197
 
198
+ @pytest.mark.parametrize("wrap_obj", ["no", "yes", "data"])
199
+ def test_pandas_to_odps_schema_tensor(wrap_obj):
200
+ data = np.array([1, 2, 3])
201
+
202
+ test_tensor = _wrap_maxframe_obj(data, wrap=wrap_obj)
203
+ if wrap_obj != "no":
204
+ test_tensor.op.data = None
205
+
206
+ schema, meta = pandas_to_odps_schema(test_tensor, unknown_as_string=True)
207
+ assert schema.columns[0].name == "_idx_0"
208
+ assert schema.columns[0].type.name == "bigint"
209
+ assert meta.type == OutputType.tensor
210
+ assert meta.table_column_names == []
211
+ assert meta.table_index_column_names == ["_idx_0"]
212
+ assert meta.pd_column_level_names == []
213
+ assert meta.pd_index_level_names == [None]
214
+
215
+
194
216
  def test_odps_arrow_schema_conversion():
195
217
  odps_schema = odps_types.OdpsSchema(
196
218
  [
@@ -275,6 +297,95 @@ def test_odps_arrow_schema_conversion():
275
297
  arrow_schema_to_odps_schema(pa.schema([("col1", pa.float16())]))
276
298
 
277
299
 
300
+ def test_odps_pandas_schema_conversion():
301
+ odps_schema = odps_types.OdpsSchema(
302
+ [
303
+ odps_types.Column("col1", "string"),
304
+ odps_types.Column("col2", "binary"),
305
+ odps_types.Column("col3", "tinyint"),
306
+ odps_types.Column("col4", "smallint"),
307
+ odps_types.Column("col5", "int"),
308
+ odps_types.Column("col6", "bigint"),
309
+ odps_types.Column("col7", "boolean"),
310
+ odps_types.Column("col8", "float"),
311
+ odps_types.Column("col9", "double"),
312
+ # odps_types.Column("col10", "date"),
313
+ odps_types.Column("col11", "datetime"),
314
+ odps_types.Column("col12", "timestamp"),
315
+ # odps_types.Column("col13", "decimal(10, 2)"),
316
+ odps_types.Column("col14", "array<string>"),
317
+ odps_types.Column("col15", "map<string, bigint>"),
318
+ # odps_types.Column("col16", "struct<a1: string, a2: map<string, bigint>>"),
319
+ # odps_types.Column("col17", "CHAR(15)"),
320
+ # odps_types.Column("col18", "VARCHAR(15)"),
321
+ # odps_types.Column("col19", "decimal"),
322
+ ]
323
+ )
324
+ pd_dtypes = odps_schema_to_pandas_dtypes(odps_schema)
325
+ pd.testing.assert_series_equal(
326
+ pd_dtypes,
327
+ pd.Series(
328
+ [
329
+ np.dtype("O"), # string
330
+ np.dtype("O"), # binary
331
+ np.dtype(np.int8),
332
+ np.dtype(np.int16),
333
+ np.dtype(np.int32),
334
+ np.dtype(np.int64),
335
+ np.dtype(np.bool_),
336
+ np.dtype(np.float32),
337
+ np.dtype(np.float64),
338
+ np.dtype(
339
+ "datetime64[ms]" if pd_release_version[0] >= 2 else "datetime64[ns]"
340
+ ),
341
+ np.dtype("datetime64[ns]"),
342
+ ArrowDtype(pa.list_(pa.string())),
343
+ ArrowDtype(pa.map_(pa.string(), pa.int64())),
344
+ ],
345
+ index=[c.name for c in odps_schema.columns],
346
+ ),
347
+ )
348
+
349
+ expected_odps_schema = odps_types.OdpsSchema(
350
+ [
351
+ odps_types.Column("col1", "string"),
352
+ odps_types.Column("col2", "string"), # binary
353
+ odps_types.Column("col3", "tinyint"),
354
+ odps_types.Column("col4", "smallint"),
355
+ odps_types.Column("col5", "int"),
356
+ odps_types.Column("col6", "bigint"),
357
+ odps_types.Column("col7", "boolean"),
358
+ odps_types.Column("col8", "float"),
359
+ odps_types.Column("col9", "double"),
360
+ # odps_types.Column("col10", "date"),
361
+ odps_types.Column(
362
+ "col11", "datetime" if pd_release_version[0] >= 2 else "timestamp"
363
+ ),
364
+ odps_types.Column("col12", "timestamp"),
365
+ # odps_types.Column("col13", "decimal(10, 2)"),
366
+ odps_types.Column("col14", "array<string>"),
367
+ odps_types.Column("col15", "map<string, bigint>"),
368
+ # odps_types.Column("col16", "struct<a1: string, a2: map<string, bigint>>"),
369
+ # odps_types.Column("col17", "string"),
370
+ # odps_types.Column("col18", "string"),
371
+ # odps_types.Column("col19", "decimal(38, 18)"),
372
+ ]
373
+ )
374
+
375
+ odps_schema2 = arrow_schema_to_odps_schema(
376
+ pandas_dtypes_to_arrow_schema(pd_dtypes, unknown_as_string=True)
377
+ )
378
+ assert [c.name for c in expected_odps_schema.columns] == [
379
+ c.name for c in odps_schema2.columns
380
+ ]
381
+ assert [c.type for c in expected_odps_schema.columns] == [
382
+ c.type for c in odps_schema2.columns
383
+ ]
384
+
385
+ with pytest.raises(TypeError):
386
+ arrow_schema_to_odps_schema(pa.schema([("col1", pa.float16())]))
387
+
388
+
278
389
  def test_build_column_name():
279
390
  records = dict()
280
391
  assert build_table_column_name(0, "a" * 127, records) == "a" * 127
@@ -345,8 +456,10 @@ def test_pandas_types_to_arrow_schema():
345
456
  {
346
457
  "int8": pd.Series([], dtype=np.int8),
347
458
  "map": pd.Series([], dtype=dict_(pa.string(), pa.string())),
459
+ "list": pd.Series([], dtype=list_(pa.string())),
348
460
  },
349
461
  )
350
462
  schema = pandas_types_to_arrow_schema(pd_data)
351
463
  assert schema.field("int8").type == pa.int8()
352
464
  assert schema.field("map").type == pa.map_(pa.string(), pa.string())
465
+ assert schema.field("list").type == pa.list_(pa.string())
@@ -14,15 +14,19 @@
14
14
 
15
15
  import datetime
16
16
 
17
+ import mock
17
18
  import numpy as np
18
19
  import pandas as pd
19
20
  import pyarrow as pa
20
21
  import pytest
21
22
  from odps import ODPS
23
+ from odps.errors import TableModified
24
+ from odps.models import Table
22
25
 
23
26
  from ....config import options
24
27
  from ....tests.utils import flaky, tn
25
28
  from ....utils import config_odps_default_options
29
+ from .. import TunnelTableIO
26
30
  from ..tableio import ODPSTableIO
27
31
 
28
32
 
@@ -161,3 +165,41 @@ def test_table_io_with_parts(switch_table_io):
161
165
  pd.testing.assert_frame_equal(reader.read_all().to_pandas(), expected_data)
162
166
  finally:
163
167
  tb.drop()
168
+
169
+
170
+ def test_tunnel_table_io_with_modified():
171
+ config_odps_default_options()
172
+
173
+ o = ODPS.from_environments()
174
+ table_io = TunnelTableIO(o)
175
+
176
+ # test read and write tables with partition
177
+ parted_table_name = tn("test_tunnel_write_modified")
178
+ o.delete_table(parted_table_name, if_exists=True)
179
+ tb = o.create_table(
180
+ parted_table_name,
181
+ (",".join(f"{c} double" for c in "abcde"), "pt string"),
182
+ lifecycle=1,
183
+ )
184
+
185
+ raised = False
186
+ raw_open_reader = Table.open_reader
187
+
188
+ def _new_open_reader(self, *args, **kwargs):
189
+ nonlocal raised
190
+ if not raised:
191
+ raised = True
192
+ raise TableModified("Intentional error")
193
+ return raw_open_reader(self, *args, **kwargs)
194
+
195
+ try:
196
+ pd_data = pd.DataFrame(np.random.rand(100, 5), columns=list("abcde"))
197
+ with table_io.open_writer(parted_table_name, "pt=test") as writer:
198
+ writer.write(pa.Table.from_pandas(pd_data, preserve_index=False))
199
+
200
+ with mock.patch(
201
+ "odps.models.table.Table.open_reader", new=_new_open_reader
202
+ ), table_io.open_reader(parted_table_name, "pt=test") as reader:
203
+ pd.testing.assert_frame_equal(reader.read_all().to_pandas(), pd_data)
204
+ finally:
205
+ tb.drop()
@@ -12,10 +12,12 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
+ import contextlib
16
+
15
17
  import pytest
16
18
  from odps import ODPS
17
19
 
18
- from ....tests.utils import tn
20
+ from ....tests.utils import create_test_volume, tn
19
21
  from ..volumeio import ODPSVolumeReader, ODPSVolumeWriter
20
22
 
21
23
 
@@ -24,59 +26,31 @@ def create_volume(request, oss_config):
24
26
  test_vol_name = tn("test_vol_name_" + request.param)
25
27
  odps_entry = ODPS.from_environments()
26
28
 
27
- try:
28
- odps_entry.delete_volume(test_vol_name)
29
- except:
30
- pass
29
+ @contextlib.contextmanager
30
+ def create_parted_volume():
31
+ try:
32
+ odps_entry.delete_volume(test_vol_name)
33
+ except:
34
+ pass
35
+ try:
36
+ odps_entry.create_parted_volume(test_vol_name)
37
+ yield
38
+ finally:
39
+ try:
40
+ odps_entry.delete_volume(test_vol_name)
41
+ except BaseException:
42
+ pass
31
43
 
32
44
  oss_test_dir_name = None
33
45
  if request.param == "parted":
34
- odps_entry.create_parted_volume(test_vol_name)
46
+ ctx = create_parted_volume()
35
47
  else:
36
- oss_test_dir_name = tn("test_oss_directory")
37
- if oss_config is None:
38
- pytest.skip("Need oss and its config to run this test")
39
- (
40
- oss_access_id,
41
- oss_secret_access_key,
42
- oss_bucket_name,
43
- oss_endpoint,
44
- ) = oss_config.oss_config
45
-
46
- if "test" in oss_endpoint:
47
- # offline config
48
- test_location = "oss://%s:%s@%s/%s/%s" % (
49
- oss_access_id,
50
- oss_secret_access_key,
51
- oss_endpoint,
52
- oss_bucket_name,
53
- oss_test_dir_name,
54
- )
55
- rolearn = None
56
- else:
57
- # online config
58
- endpoint_parts = oss_endpoint.split(".", 1)
59
- if "-internal" not in endpoint_parts[0]:
60
- endpoint_parts[0] += "-internal"
61
- test_location = "oss://%s/%s/%s" % (
62
- ".".join(endpoint_parts),
63
- oss_bucket_name,
64
- oss_test_dir_name,
65
- )
66
- rolearn = oss_config.oss_rolearn
67
-
68
- oss_config.oss_bucket.put_object(oss_test_dir_name + "/", b"")
69
- odps_entry.create_external_volume(
70
- test_vol_name, location=test_location, rolearn=rolearn
71
- )
48
+ ctx = create_test_volume(test_vol_name, oss_config)
49
+
72
50
  try:
73
- yield test_vol_name
51
+ with ctx:
52
+ yield test_vol_name
74
53
  finally:
75
- try:
76
- odps_entry.delete_volume(test_vol_name)
77
- except BaseException:
78
- pass
79
-
80
54
  if oss_test_dir_name is not None:
81
55
  import oss2
82
56
 
@@ -12,6 +12,6 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
- from . import contrib
15
+ from . import contrib, model_selection, preprocessing
16
16
 
17
- del contrib
17
+ del contrib, model_selection, preprocessing
@@ -12,6 +12,6 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
- from . import graph, llm, models, pytorch
15
+ from . import graph, lightgbm, llm, pytorch
16
16
 
17
- del graph, llm, models, pytorch
17
+ del graph, lightgbm, llm, pytorch
@@ -19,8 +19,9 @@ from maxframe import opcodes
19
19
 
20
20
  from ....core import OutputType
21
21
  from ....dataframe.operators import DataFrameOperator, DataFrameOperatorMixin
22
- from ....dataframe.utils import make_dtypes, parse_index
22
+ from ....dataframe.utils import parse_index
23
23
  from ....serialization.serializables import Int32Field, StringField
24
+ from ....utils import make_dtypes
24
25
 
25
26
 
26
27
  class DataFrameConnectedComponentsOperator(DataFrameOperator, DataFrameOperatorMixin):
@@ -0,0 +1,33 @@
1
+ # Copyright 1999-2025 Alibaba Group Holding Ltd.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ from ._predict import predict
16
+ from ._train import train
17
+ from .dataset import Dataset
18
+
19
+ # isort: off
20
+ from ..utils import config_mod_getattr as _config_mod_getattr
21
+
22
+ _config_mod_getattr(
23
+ {
24
+ "LGBMClassifier": ".classifier.LGBMClassifier",
25
+ "LGBMRegressor": ".regressor.LGBMRegressor",
26
+ # "LGBMRanker": ".ranker.LGBMRanker",
27
+ },
28
+ globals(),
29
+ )
30
+
31
+ del _config_mod_getattr
32
+
33
+ __all__ = ["Dataset", "LGBMClassifier", "LGBMRegressor", "predict", "train"]
@@ -0,0 +1,138 @@
1
+ # Copyright 1999-2025 Alibaba Group Holding Ltd.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ from typing import Any, Dict, List
16
+
17
+ import numpy as np
18
+
19
+ from .... import opcodes
20
+ from ....core import OutputType
21
+ from ....core.operator import Operator, TileableOperatorMixin
22
+ from ....serialization.serializables import BoolField, DictField, Int32Field, KeyField
23
+ from ....tensor.core import TensorOrder
24
+ from ....typing_ import EntityType
25
+ from ..models import to_remote_model
26
+ from .dataset import check_data
27
+
28
+
29
+ class LGBMPredict(Operator, TileableOperatorMixin):
30
+ _op_type_ = opcodes.LGBM_PREDICT
31
+ _op_module_ = "learn.contrib.lightgbm"
32
+ output_dtype = np.dtype(np.float64)
33
+
34
+ data = KeyField("data", default=None)
35
+ booster = KeyField("booster", default=None)
36
+ start_iteration = Int32Field("start_iteration", default=0)
37
+ num_iterations = Int32Field("num_iterations", default=None)
38
+ raw_score = BoolField("raw_score", default=False)
39
+ pred_leaf = BoolField("pred_leaf", default=False)
40
+ pred_contrib = BoolField("pred_contrib", default=False)
41
+ validate_features = BoolField("validate_features", default=False)
42
+ kwds = DictField("kwds", default=None)
43
+
44
+ def __init__(self, output_types=None, **kw):
45
+ super().__init__(_output_types=output_types, **kw)
46
+
47
+ def has_custom_code(self) -> bool:
48
+ return True
49
+
50
+ @property
51
+ def output_limit(self) -> int:
52
+ return 1 + self.pred_leaf + self.pred_contrib
53
+
54
+ @classmethod
55
+ def _set_inputs(cls, op: "LGBMPredict", inputs: List[EntityType]):
56
+ super()._set_inputs(op, inputs)
57
+ it = iter(inputs)
58
+ op.data = next(it)
59
+ op.booster = next(it)
60
+
61
+ def __call__(self):
62
+ num_class = getattr(self.booster.op, "num_class", None)
63
+
64
+ result_kw: Dict[str, Any] = {
65
+ "dtype": self.output_dtype,
66
+ "order": TensorOrder.C_ORDER,
67
+ }
68
+ kws = [result_kw]
69
+
70
+ if num_class is not None:
71
+ num_class = int(num_class)
72
+ if num_class is not None and num_class > 2:
73
+ result_kw["shape"] = (self.data.shape[0], num_class)
74
+ else:
75
+ result_kw["shape"] = (self.data.shape[0],)
76
+
77
+ if self.pred_leaf:
78
+ kws.append(
79
+ {
80
+ "shape": (np.nan, np.nan),
81
+ "dtype": np.dtype(np.float_),
82
+ "order": TensorOrder.C_ORDER,
83
+ }
84
+ )
85
+ if self.pred_contrib:
86
+ kws.append(
87
+ {
88
+ "shape": (np.nan, np.nan),
89
+ "dtype": np.dtype(np.float_),
90
+ "order": TensorOrder.C_ORDER,
91
+ }
92
+ )
93
+
94
+ return self.new_tileables([self.data, self.booster], kws=kws)
95
+
96
+
97
+ def predict(
98
+ booster,
99
+ data,
100
+ raw_score: bool = False,
101
+ start_iteration: int = 0,
102
+ num_iteration: int = None,
103
+ pred_leaf: bool = False,
104
+ pred_contrib: bool = False,
105
+ validate_features: bool = False,
106
+ **kwargs,
107
+ ):
108
+ import lightgbm
109
+
110
+ from .core import Booster, BoosterData
111
+
112
+ if not isinstance(booster, (Booster, BoosterData, lightgbm.Booster)):
113
+ raise TypeError(
114
+ f"model has to be a lightgbm.Booster, got {type(booster)} instead"
115
+ )
116
+ elif isinstance(booster, lightgbm.Booster):
117
+ booster = to_remote_model(booster, model_cls=Booster)
118
+ proba = kwargs.pop("proba", False)
119
+
120
+ data = check_data(data)
121
+
122
+ op = LGBMPredict(
123
+ data=data,
124
+ booster=booster,
125
+ output_types=[OutputType.tensor],
126
+ proba=proba,
127
+ raw_score=raw_score,
128
+ start_iteration=start_iteration,
129
+ num_iteration=num_iteration,
130
+ pred_leaf=pred_leaf,
131
+ pred_contrib=pred_contrib,
132
+ validate_features=validate_features,
133
+ kwds=kwargs,
134
+ )
135
+ results = op()
136
+ if len(results) == 1:
137
+ return results[0]
138
+ return results
@@ -0,0 +1,163 @@
1
+ # Copyright 1999-2025 Alibaba Group Holding Ltd.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ import itertools
16
+ import logging
17
+ from typing import List
18
+
19
+ from .... import opcodes
20
+ from ....core import OutputType
21
+ from ....core.operator import ObjectOperator, ObjectOperatorMixin
22
+ from ....serialization.serializables import (
23
+ AnyField,
24
+ BoolField,
25
+ DictField,
26
+ FieldTypes,
27
+ FunctionField,
28
+ Int32Field,
29
+ KeyField,
30
+ ListField,
31
+ StringField,
32
+ )
33
+ from ....typing_ import EntityType
34
+ from ..models import to_remote_model
35
+ from ..utils import TrainingCallback
36
+ from .core import Booster, BoosterData, LGBMScikitLearnBase
37
+
38
+ logger = logging.getLogger(__name__)
39
+
40
+
41
+ class LGBMTrain(ObjectOperator, ObjectOperatorMixin):
42
+ _op_type_ = opcodes.LGBM_TRAIN
43
+ _op_module_ = "learn.contrib.lightgbm"
44
+
45
+ params = DictField("params", key_type=FieldTypes.string, default_factory=dict)
46
+ train_set = KeyField("train_set", default=None)
47
+ num_boost_round = Int32Field("num_boost_round", default=None)
48
+ valid_sets = ListField("valid_sets", FieldTypes.key, default_factory=list)
49
+ valid_names = ListField("valid_names", FieldTypes.string, default_factory=list)
50
+ feval = FunctionField("feval", default=None)
51
+ init_model = KeyField("init_model", default=None)
52
+ feature_name = AnyField("feature_name", default=None)
53
+ categorical_feature = AnyField("categorical_feature", default=None)
54
+ keep_training_booster = BoolField("keep_training_booster", default=False)
55
+ callbacks = ListField(
56
+ "callbacks",
57
+ field_type=FunctionField.field_type,
58
+ default=None,
59
+ on_serialize=TrainingCallback.from_local,
60
+ )
61
+ tree_learner = StringField("tree_learner", default=None)
62
+ timeout = Int32Field("timeout", default=None)
63
+ # indicating shape of the predicted data of the model
64
+ num_class = Int32Field("num_class", default=None)
65
+
66
+ def __init__(self, gpu=None, **kw):
67
+ super().__init__(gpu=gpu, **kw)
68
+ if self.output_types is None:
69
+ self.output_types = [OutputType.object]
70
+ if self.has_evals_result:
71
+ self.output_types.append(OutputType.object)
72
+
73
+ @classmethod
74
+ def _set_inputs(cls, op: "LGBMTrain", inputs: List[EntityType]):
75
+ super()._set_inputs(op, inputs)
76
+ input_it = iter(op.inputs)
77
+ op.train_set = next(input_it)
78
+ op.valid_sets = list(itertools.islice(input_it, len(op.valid_sets or [])))
79
+ if op.init_model is not None:
80
+ op.init_model = next(input_it)
81
+
82
+ def __call__(self, evals_result):
83
+ inputs = [self.train_set]
84
+ if self.has_evals_result:
85
+ inputs.extend(self.valid_sets)
86
+ if self.init_model is not None:
87
+ inputs.append(self.init_model)
88
+ kws = [{"object_class": Booster}, {}]
89
+ return self.new_tileables(inputs, kws=kws, evals_result=evals_result)[0]
90
+
91
+ @property
92
+ def output_limit(self):
93
+ return 2 if self.has_evals_result else 1
94
+
95
+ def has_custom_code(self) -> bool:
96
+ if not self.callbacks:
97
+ return False
98
+ return any(
99
+ not isinstance(cb, TrainingCallback) or cb.has_custom_code()
100
+ for cb in self.callbacks
101
+ )
102
+
103
+ @property
104
+ def has_evals_result(self) -> bool:
105
+ return bool(self.valid_sets)
106
+
107
+
108
+ def _get_lgbm_booster(init_model):
109
+ import lightgbm
110
+
111
+ if isinstance(init_model, (LGBMScikitLearnBase, lightgbm.LGBMModel)):
112
+ init_model = init_model.booster_
113
+
114
+ if isinstance(init_model, (Booster, BoosterData)):
115
+ return init_model
116
+ elif isinstance(init_model, lightgbm.Booster):
117
+ return to_remote_model(init_model, model_cls=Booster)
118
+ raise ValueError(f"Cannot use {type(init_model)} as init_model")
119
+
120
+
121
+ def train(
122
+ params,
123
+ train_set,
124
+ num_boost_round=100,
125
+ valid_sets=None,
126
+ valid_names=None,
127
+ feval=None,
128
+ init_model=None,
129
+ keep_training_booster=False,
130
+ callbacks=None,
131
+ num_class=2,
132
+ evals_result=None,
133
+ **kwargs,
134
+ ):
135
+ evals_result = evals_result if evals_result is not None else dict()
136
+ timeout = kwargs.pop("timeout", 120)
137
+ session = kwargs.pop("session", None)
138
+ run_kwargs = kwargs.pop("run_kwargs", dict())
139
+ if valid_sets and valid_names:
140
+ if not isinstance(valid_names, list):
141
+ raise TypeError("valid_names must be a list of strings")
142
+ if len(valid_names) != len(valid_sets):
143
+ raise ValueError("size of valid_names must match size of valid_sets")
144
+ if init_model is not None:
145
+ init_model = _get_lgbm_booster(init_model)
146
+ data = LGBMTrain(
147
+ params=params,
148
+ train_set=train_set,
149
+ num_boost_round=num_boost_round,
150
+ valid_sets=valid_sets,
151
+ valid_names=valid_names,
152
+ feval=feval,
153
+ init_model=init_model,
154
+ keep_training_booster=keep_training_booster,
155
+ callbacks=callbacks,
156
+ num_class=num_class,
157
+ evals_result=evals_result,
158
+ timeout=timeout,
159
+ **kwargs,
160
+ )(evals_result)
161
+ if valid_sets:
162
+ data.execute(session=session, **run_kwargs)
163
+ return data