maxframe 1.3.0__cp310-cp310-win32.whl → 2.0.0b1__cp310-cp310-win32.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of maxframe might be problematic. Click here for more details.

Files changed (643) hide show
  1. maxframe/_utils.cp310-win32.pyd +0 -0
  2. maxframe/_utils.pyi +21 -0
  3. maxframe/_utils.pyx +4 -3
  4. maxframe/codegen/__init__.py +27 -0
  5. maxframe/{codegen.py → codegen/core.py} +49 -43
  6. maxframe/codegen/spe/__init__.py +16 -0
  7. maxframe/codegen/spe/core.py +307 -0
  8. maxframe/codegen/spe/dataframe/__init__.py +37 -0
  9. maxframe/codegen/spe/dataframe/accessors/__init__.py +15 -0
  10. maxframe/codegen/spe/dataframe/accessors/base.py +53 -0
  11. maxframe/codegen/spe/dataframe/accessors/dict_.py +194 -0
  12. maxframe/codegen/spe/dataframe/accessors/list_.py +80 -0
  13. maxframe/codegen/spe/dataframe/arithmetic.py +84 -0
  14. maxframe/codegen/spe/dataframe/datasource.py +181 -0
  15. maxframe/codegen/spe/dataframe/datastore.py +204 -0
  16. maxframe/codegen/spe/dataframe/extensions.py +63 -0
  17. maxframe/codegen/spe/dataframe/fetch.py +26 -0
  18. maxframe/codegen/spe/dataframe/groupby.py +224 -0
  19. maxframe/codegen/spe/dataframe/indexing.py +238 -0
  20. maxframe/codegen/spe/dataframe/merge.py +73 -0
  21. maxframe/codegen/spe/dataframe/misc.py +286 -0
  22. maxframe/codegen/spe/dataframe/missing.py +64 -0
  23. maxframe/codegen/spe/dataframe/reduction.py +160 -0
  24. maxframe/codegen/spe/dataframe/sort.py +83 -0
  25. maxframe/codegen/spe/dataframe/statistics.py +46 -0
  26. maxframe/codegen/spe/dataframe/tests/__init__.py +13 -0
  27. maxframe/codegen/spe/dataframe/tests/accessors/__init__.py +13 -0
  28. maxframe/codegen/spe/dataframe/tests/accessors/test_base.py +33 -0
  29. maxframe/codegen/spe/dataframe/tests/accessors/test_dict.py +310 -0
  30. maxframe/codegen/spe/dataframe/tests/accessors/test_list.py +137 -0
  31. maxframe/codegen/spe/dataframe/tests/indexing/__init__.py +13 -0
  32. maxframe/codegen/spe/dataframe/tests/indexing/conftest.py +58 -0
  33. maxframe/codegen/spe/dataframe/tests/indexing/test_getitem.py +124 -0
  34. maxframe/codegen/spe/dataframe/tests/indexing/test_iloc.py +76 -0
  35. maxframe/codegen/spe/dataframe/tests/indexing/test_indexing.py +39 -0
  36. maxframe/codegen/spe/dataframe/tests/indexing/test_rename.py +51 -0
  37. maxframe/codegen/spe/dataframe/tests/indexing/test_reset_index.py +88 -0
  38. maxframe/codegen/spe/dataframe/tests/indexing/test_sample.py +45 -0
  39. maxframe/codegen/spe/dataframe/tests/indexing/test_set_axis.py +45 -0
  40. maxframe/codegen/spe/dataframe/tests/indexing/test_set_index.py +41 -0
  41. maxframe/codegen/spe/dataframe/tests/indexing/test_setitem.py +46 -0
  42. maxframe/codegen/spe/dataframe/tests/misc/__init__.py +13 -0
  43. maxframe/codegen/spe/dataframe/tests/misc/test_apply.py +133 -0
  44. maxframe/codegen/spe/dataframe/tests/misc/test_drop_duplicates.py +92 -0
  45. maxframe/codegen/spe/dataframe/tests/misc/test_misc.py +234 -0
  46. maxframe/codegen/spe/dataframe/tests/missing/__init__.py +13 -0
  47. maxframe/codegen/spe/dataframe/tests/missing/test_checkna.py +94 -0
  48. maxframe/codegen/spe/dataframe/tests/missing/test_dropna.py +50 -0
  49. maxframe/codegen/spe/dataframe/tests/missing/test_fillna.py +94 -0
  50. maxframe/codegen/spe/dataframe/tests/missing/test_replace.py +45 -0
  51. maxframe/codegen/spe/dataframe/tests/test_arithmetic.py +73 -0
  52. maxframe/codegen/spe/dataframe/tests/test_datasource.py +184 -0
  53. maxframe/codegen/spe/dataframe/tests/test_datastore.py +200 -0
  54. maxframe/codegen/spe/dataframe/tests/test_extensions.py +88 -0
  55. maxframe/codegen/spe/dataframe/tests/test_groupby.py +225 -0
  56. maxframe/codegen/spe/dataframe/tests/test_merge.py +400 -0
  57. maxframe/codegen/spe/dataframe/tests/test_reduction.py +104 -0
  58. maxframe/codegen/spe/dataframe/tests/test_sort.py +159 -0
  59. maxframe/codegen/spe/dataframe/tests/test_statistics.py +70 -0
  60. maxframe/codegen/spe/dataframe/tests/test_tseries.py +29 -0
  61. maxframe/codegen/spe/dataframe/tests/test_value_counts.py +60 -0
  62. maxframe/codegen/spe/dataframe/tests/test_window.py +69 -0
  63. maxframe/codegen/spe/dataframe/tseries.py +46 -0
  64. maxframe/codegen/spe/dataframe/udf.py +62 -0
  65. maxframe/codegen/spe/dataframe/value_counts.py +31 -0
  66. maxframe/codegen/spe/dataframe/window.py +65 -0
  67. maxframe/codegen/spe/learn/__init__.py +15 -0
  68. maxframe/codegen/spe/learn/contrib/__init__.py +15 -0
  69. maxframe/codegen/spe/learn/contrib/lightgbm.py +160 -0
  70. maxframe/codegen/spe/learn/contrib/models.py +41 -0
  71. maxframe/codegen/spe/learn/contrib/pytorch.py +49 -0
  72. maxframe/codegen/spe/learn/contrib/tests/__init__.py +13 -0
  73. maxframe/codegen/spe/learn/contrib/tests/test_lightgbm.py +123 -0
  74. maxframe/codegen/spe/learn/contrib/tests/test_models.py +41 -0
  75. maxframe/codegen/spe/learn/contrib/tests/test_pytorch.py +53 -0
  76. maxframe/codegen/spe/learn/contrib/tests/test_xgboost.py +98 -0
  77. maxframe/codegen/spe/learn/contrib/xgboost.py +152 -0
  78. maxframe/codegen/spe/learn/metrics/__init__.py +15 -0
  79. maxframe/codegen/spe/learn/metrics/_classification.py +120 -0
  80. maxframe/codegen/spe/learn/metrics/tests/__init__.py +13 -0
  81. maxframe/codegen/spe/learn/metrics/tests/test_classification.py +93 -0
  82. maxframe/codegen/spe/learn/model_selection/__init__.py +13 -0
  83. maxframe/codegen/spe/learn/model_selection/tests/__init__.py +13 -0
  84. maxframe/codegen/spe/learn/model_selection/tests/test_split.py +41 -0
  85. maxframe/codegen/spe/learn/preprocessing/__init__.py +15 -0
  86. maxframe/codegen/spe/learn/preprocessing/_data.py +37 -0
  87. maxframe/codegen/spe/learn/preprocessing/_label.py +47 -0
  88. maxframe/codegen/spe/learn/preprocessing/tests/__init__.py +13 -0
  89. maxframe/codegen/spe/learn/preprocessing/tests/test_data.py +31 -0
  90. maxframe/codegen/spe/learn/preprocessing/tests/test_label.py +43 -0
  91. maxframe/codegen/spe/learn/utils/__init__.py +15 -0
  92. maxframe/codegen/spe/learn/utils/checks.py +55 -0
  93. maxframe/codegen/spe/learn/utils/multiclass.py +60 -0
  94. maxframe/codegen/spe/learn/utils/shuffle.py +85 -0
  95. maxframe/codegen/spe/learn/utils/sparsefuncs.py +35 -0
  96. maxframe/codegen/spe/learn/utils/tests/__init__.py +13 -0
  97. maxframe/codegen/spe/learn/utils/tests/test_checks.py +48 -0
  98. maxframe/codegen/spe/learn/utils/tests/test_multiclass.py +52 -0
  99. maxframe/codegen/spe/learn/utils/tests/test_shuffle.py +50 -0
  100. maxframe/codegen/spe/learn/utils/tests/test_sparsefuncs.py +34 -0
  101. maxframe/codegen/spe/learn/utils/tests/test_validation.py +44 -0
  102. maxframe/codegen/spe/learn/utils/validation.py +35 -0
  103. maxframe/codegen/spe/objects.py +26 -0
  104. maxframe/codegen/spe/remote.py +29 -0
  105. maxframe/codegen/spe/tensor/__init__.py +28 -0
  106. maxframe/codegen/spe/tensor/arithmetic.py +95 -0
  107. maxframe/codegen/spe/tensor/core.py +41 -0
  108. maxframe/codegen/spe/tensor/datasource.py +165 -0
  109. maxframe/codegen/spe/tensor/extensions.py +35 -0
  110. maxframe/codegen/spe/tensor/fetch.py +26 -0
  111. maxframe/codegen/spe/tensor/indexing.py +63 -0
  112. maxframe/codegen/spe/tensor/linalg.py +63 -0
  113. maxframe/codegen/spe/tensor/merge.py +31 -0
  114. maxframe/codegen/spe/tensor/misc.py +121 -0
  115. maxframe/codegen/spe/tensor/random.py +29 -0
  116. maxframe/codegen/spe/tensor/reduction.py +39 -0
  117. maxframe/codegen/spe/tensor/reshape.py +26 -0
  118. maxframe/codegen/spe/tensor/sort.py +42 -0
  119. maxframe/codegen/spe/tensor/special.py +35 -0
  120. maxframe/codegen/spe/tensor/statistics.py +24 -0
  121. maxframe/codegen/spe/tensor/tests/__init__.py +13 -0
  122. maxframe/codegen/spe/tensor/tests/test_arithmetic.py +103 -0
  123. maxframe/codegen/spe/tensor/tests/test_datasource.py +99 -0
  124. maxframe/codegen/spe/tensor/tests/test_extensions.py +37 -0
  125. maxframe/codegen/spe/tensor/tests/test_indexing.py +44 -0
  126. maxframe/codegen/spe/tensor/tests/test_linalg.py +38 -0
  127. maxframe/codegen/spe/tensor/tests/test_merge.py +28 -0
  128. maxframe/codegen/spe/tensor/tests/test_misc.py +94 -0
  129. maxframe/codegen/spe/tensor/tests/test_random.py +55 -0
  130. maxframe/codegen/spe/tensor/tests/test_reduction.py +65 -0
  131. maxframe/codegen/spe/tensor/tests/test_reshape.py +39 -0
  132. maxframe/codegen/spe/tensor/tests/test_sort.py +49 -0
  133. maxframe/codegen/spe/tensor/tests/test_special.py +28 -0
  134. maxframe/codegen/spe/tensor/tests/test_statistics.py +29 -0
  135. maxframe/codegen/spe/tests/__init__.py +13 -0
  136. maxframe/codegen/spe/tests/test_remote.py +29 -0
  137. maxframe/codegen/spe/tests/test_spe_codegen.py +141 -0
  138. maxframe/codegen/spe/utils.py +54 -0
  139. maxframe/codegen/tests/__init__.py +13 -0
  140. maxframe/{tests → codegen/tests}/test_codegen.py +3 -5
  141. maxframe/config/__init__.py +1 -1
  142. maxframe/config/config.py +50 -23
  143. maxframe/config/tests/test_config.py +4 -12
  144. maxframe/config/validators.py +5 -0
  145. maxframe/conftest.py +38 -10
  146. maxframe/core/__init__.py +1 -0
  147. maxframe/core/context.py +110 -0
  148. maxframe/core/entity/__init__.py +1 -0
  149. maxframe/core/entity/core.py +0 -7
  150. maxframe/core/entity/objects.py +19 -5
  151. maxframe/core/entity/output_types.py +11 -0
  152. maxframe/core/entity/tests/test_objects.py +11 -12
  153. maxframe/core/entity/tileables.py +3 -1
  154. maxframe/core/entity/utils.py +15 -0
  155. maxframe/core/graph/__init__.py +6 -1
  156. maxframe/core/graph/builder/base.py +5 -1
  157. maxframe/core/graph/core.cp310-win32.pyd +0 -0
  158. maxframe/core/graph/core.pyx +17 -6
  159. maxframe/core/graph/entity.py +18 -6
  160. maxframe/core/operator/__init__.py +8 -3
  161. maxframe/core/operator/base.py +35 -12
  162. maxframe/core/operator/core.py +37 -14
  163. maxframe/core/operator/fetch.py +5 -18
  164. maxframe/core/operator/objects.py +0 -20
  165. maxframe/core/operator/shuffle.py +6 -72
  166. maxframe/dataframe/__init__.py +1 -0
  167. maxframe/dataframe/accessors/datetime_/core.py +7 -4
  168. maxframe/dataframe/accessors/string_/core.py +9 -6
  169. maxframe/dataframe/arithmetic/core.py +31 -20
  170. maxframe/dataframe/arithmetic/tests/test_arithmetic.py +6 -0
  171. maxframe/dataframe/core.py +98 -91
  172. maxframe/dataframe/datasource/core.py +8 -1
  173. maxframe/dataframe/datasource/date_range.py +8 -0
  174. maxframe/dataframe/datasource/from_index.py +9 -5
  175. maxframe/dataframe/datasource/from_records.py +9 -2
  176. maxframe/dataframe/datasource/from_tensor.py +32 -21
  177. maxframe/dataframe/datasource/read_csv.py +8 -2
  178. maxframe/dataframe/datasource/read_odps_query.py +33 -3
  179. maxframe/dataframe/datasource/read_odps_table.py +20 -5
  180. maxframe/dataframe/datasource/read_parquet.py +8 -3
  181. maxframe/dataframe/datasource/tests/test_datasource.py +33 -0
  182. maxframe/dataframe/datastore/tests/test_to_odps.py +52 -1
  183. maxframe/dataframe/datastore/to_csv.py +7 -3
  184. maxframe/dataframe/datastore/to_odps.py +42 -6
  185. maxframe/dataframe/extensions/__init__.py +6 -1
  186. maxframe/dataframe/extensions/apply_chunk.py +96 -136
  187. maxframe/dataframe/extensions/flatjson.py +3 -2
  188. maxframe/dataframe/extensions/flatmap.py +15 -7
  189. maxframe/dataframe/fetch/core.py +12 -1
  190. maxframe/dataframe/groupby/__init__.py +7 -0
  191. maxframe/dataframe/groupby/aggregation.py +62 -9
  192. maxframe/dataframe/groupby/apply.py +50 -74
  193. maxframe/dataframe/groupby/apply_chunk.py +393 -0
  194. maxframe/dataframe/groupby/core.py +80 -17
  195. maxframe/dataframe/groupby/extensions.py +26 -0
  196. maxframe/dataframe/groupby/fill.py +9 -4
  197. maxframe/dataframe/groupby/sample.py +7 -7
  198. maxframe/dataframe/groupby/tests/test_groupby.py +3 -3
  199. maxframe/dataframe/groupby/transform.py +57 -54
  200. maxframe/dataframe/indexing/align.py +7 -6
  201. maxframe/dataframe/indexing/getitem.py +9 -8
  202. maxframe/dataframe/indexing/iloc.py +28 -23
  203. maxframe/dataframe/indexing/insert.py +7 -3
  204. maxframe/dataframe/indexing/loc.py +9 -8
  205. maxframe/dataframe/indexing/reindex.py +36 -30
  206. maxframe/dataframe/indexing/rename_axis.py +18 -10
  207. maxframe/dataframe/indexing/reset_index.py +0 -2
  208. maxframe/dataframe/indexing/sample.py +13 -9
  209. maxframe/dataframe/indexing/set_axis.py +9 -6
  210. maxframe/dataframe/indexing/setitem.py +8 -5
  211. maxframe/dataframe/indexing/where.py +12 -9
  212. maxframe/dataframe/merge/__init__.py +0 -1
  213. maxframe/dataframe/merge/concat.py +10 -31
  214. maxframe/dataframe/merge/merge.py +2 -24
  215. maxframe/dataframe/misc/__init__.py +6 -0
  216. maxframe/dataframe/misc/_duplicate.py +7 -3
  217. maxframe/dataframe/misc/apply.py +106 -139
  218. maxframe/dataframe/misc/astype.py +3 -2
  219. maxframe/dataframe/misc/case_when.py +11 -7
  220. maxframe/dataframe/misc/cut.py +11 -10
  221. maxframe/dataframe/misc/describe.py +7 -3
  222. maxframe/dataframe/misc/drop.py +13 -11
  223. maxframe/dataframe/misc/eval.py +0 -2
  224. maxframe/dataframe/misc/get_dummies.py +78 -49
  225. maxframe/dataframe/misc/isin.py +13 -10
  226. maxframe/dataframe/misc/map.py +21 -6
  227. maxframe/dataframe/misc/melt.py +8 -1
  228. maxframe/dataframe/misc/pivot.py +232 -0
  229. maxframe/dataframe/misc/pivot_table.py +52 -40
  230. maxframe/dataframe/misc/rechunk.py +59 -0
  231. maxframe/dataframe/misc/shift.py +7 -4
  232. maxframe/dataframe/misc/stack.py +5 -3
  233. maxframe/dataframe/misc/tests/test_misc.py +167 -1
  234. maxframe/dataframe/misc/transform.py +63 -65
  235. maxframe/dataframe/misc/value_counts.py +7 -4
  236. maxframe/dataframe/missing/dropna.py +16 -7
  237. maxframe/dataframe/missing/fillna.py +18 -10
  238. maxframe/dataframe/missing/replace.py +10 -6
  239. maxframe/dataframe/missing/tests/test_missing.py +2 -2
  240. maxframe/dataframe/operators.py +1 -27
  241. maxframe/dataframe/reduction/aggregation.py +128 -3
  242. maxframe/dataframe/reduction/core.py +20 -6
  243. maxframe/dataframe/reduction/median.py +1 -1
  244. maxframe/dataframe/reduction/tests/test_reduction.py +33 -0
  245. maxframe/dataframe/reduction/unique.py +53 -7
  246. maxframe/dataframe/statistics/corr.py +9 -6
  247. maxframe/dataframe/statistics/quantile.py +9 -6
  248. maxframe/dataframe/tseries/to_datetime.py +6 -4
  249. maxframe/dataframe/utils.py +219 -31
  250. maxframe/dataframe/window/rolling.py +7 -4
  251. maxframe/env.py +1 -0
  252. maxframe/errors.py +9 -0
  253. maxframe/extension.py +13 -2
  254. maxframe/io/objects/core.py +67 -51
  255. maxframe/io/objects/tensor.py +73 -17
  256. maxframe/io/objects/tests/test_object_io.py +8 -55
  257. maxframe/io/odpsio/arrow.py +15 -2
  258. maxframe/io/odpsio/schema.py +43 -13
  259. maxframe/io/odpsio/tableio.py +63 -11
  260. maxframe/io/odpsio/tests/test_arrow.py +1 -2
  261. maxframe/io/odpsio/tests/test_schema.py +114 -1
  262. maxframe/io/odpsio/tests/test_tableio.py +42 -0
  263. maxframe/io/odpsio/tests/test_volumeio.py +22 -48
  264. maxframe/learn/__init__.py +2 -2
  265. maxframe/learn/contrib/__init__.py +2 -2
  266. maxframe/learn/contrib/graph/connected_components.py +2 -1
  267. maxframe/learn/contrib/lightgbm/__init__.py +33 -0
  268. maxframe/learn/contrib/lightgbm/_predict.py +138 -0
  269. maxframe/learn/contrib/lightgbm/_train.py +163 -0
  270. maxframe/learn/contrib/lightgbm/callback.py +114 -0
  271. maxframe/learn/contrib/lightgbm/classifier.py +199 -0
  272. maxframe/learn/contrib/lightgbm/core.py +372 -0
  273. maxframe/learn/contrib/lightgbm/dataset.py +153 -0
  274. maxframe/learn/contrib/lightgbm/regressor.py +29 -0
  275. maxframe/learn/contrib/lightgbm/tests/__init__.py +13 -0
  276. maxframe/learn/contrib/lightgbm/tests/test_callback.py +58 -0
  277. maxframe/learn/contrib/llm/models/dashscope.py +34 -0
  278. maxframe/learn/contrib/llm/models/managed.py +15 -0
  279. maxframe/learn/contrib/llm/multi_modal.py +92 -0
  280. maxframe/learn/contrib/llm/text.py +21 -5
  281. maxframe/learn/contrib/models.py +38 -9
  282. maxframe/learn/contrib/utils.py +55 -0
  283. maxframe/learn/contrib/xgboost/callback.py +86 -0
  284. maxframe/learn/contrib/xgboost/classifier.py +26 -30
  285. maxframe/learn/contrib/xgboost/core.py +53 -42
  286. maxframe/learn/contrib/xgboost/dmatrix.py +19 -12
  287. maxframe/learn/contrib/xgboost/predict.py +13 -8
  288. maxframe/learn/contrib/xgboost/regressor.py +28 -27
  289. maxframe/learn/contrib/xgboost/tests/test_callback.py +41 -0
  290. maxframe/learn/contrib/xgboost/train.py +59 -16
  291. maxframe/learn/core.py +252 -0
  292. maxframe/learn/datasets/__init__.py +20 -0
  293. maxframe/learn/datasets/samples_generator.py +628 -0
  294. maxframe/learn/linear_model/__init__.py +15 -0
  295. maxframe/learn/linear_model/_base.py +163 -0
  296. maxframe/learn/linear_model/_lin_reg.py +175 -0
  297. maxframe/learn/metrics/__init__.py +25 -0
  298. maxframe/learn/metrics/_check_targets.py +95 -0
  299. maxframe/learn/metrics/_classification.py +1121 -0
  300. maxframe/learn/metrics/_regression.py +256 -0
  301. maxframe/learn/model_selection/__init__.py +15 -0
  302. maxframe/learn/model_selection/_split.py +451 -0
  303. maxframe/learn/model_selection/tests/__init__.py +13 -0
  304. maxframe/learn/model_selection/tests/test_split.py +156 -0
  305. maxframe/learn/preprocessing/__init__.py +16 -0
  306. maxframe/learn/preprocessing/_data/__init__.py +17 -0
  307. maxframe/learn/preprocessing/_data/min_max_scaler.py +390 -0
  308. maxframe/learn/preprocessing/_data/normalize.py +127 -0
  309. maxframe/learn/preprocessing/_data/standard_scaler.py +503 -0
  310. maxframe/learn/preprocessing/_data/utils.py +79 -0
  311. maxframe/learn/preprocessing/_label/__init__.py +16 -0
  312. maxframe/learn/preprocessing/_label/_label_binarizer.py +599 -0
  313. maxframe/learn/preprocessing/_label/_label_encoder.py +174 -0
  314. maxframe/learn/utils/__init__.py +4 -0
  315. maxframe/learn/utils/_encode.py +314 -0
  316. maxframe/learn/utils/checks.py +161 -0
  317. maxframe/learn/utils/core.py +33 -0
  318. maxframe/learn/utils/extmath.py +176 -0
  319. maxframe/learn/utils/multiclass.py +292 -0
  320. maxframe/learn/utils/shuffle.py +114 -0
  321. maxframe/learn/utils/sparsefuncs.py +87 -0
  322. maxframe/learn/utils/validation.py +775 -0
  323. maxframe/lib/__init__.py +0 -2
  324. maxframe/lib/compat.py +145 -0
  325. maxframe/lib/filesystem/_oss_lib/glob.py +1 -1
  326. maxframe/lib/mmh3.cp310-win32.pyd +0 -0
  327. maxframe/lib/sparse/__init__.py +10 -15
  328. maxframe/lib/sparse/array.py +45 -33
  329. maxframe/lib/sparse/core.py +0 -2
  330. maxframe/lib/sparse/linalg.py +31 -0
  331. maxframe/lib/sparse/matrix.py +5 -2
  332. maxframe/lib/sparse/tests/__init__.py +0 -2
  333. maxframe/lib/sparse/tests/test_sparse.py +53 -53
  334. maxframe/lib/sparse/vector.py +0 -2
  335. maxframe/mixin.py +59 -2
  336. maxframe/opcodes.py +13 -5
  337. maxframe/protocol.py +67 -14
  338. maxframe/remote/core.py +16 -14
  339. maxframe/remote/run_script.py +6 -3
  340. maxframe/serialization/__init__.py +2 -0
  341. maxframe/serialization/core.cp310-win32.pyd +0 -0
  342. maxframe/serialization/core.pxd +3 -0
  343. maxframe/serialization/core.pyi +3 -1
  344. maxframe/serialization/core.pyx +82 -4
  345. maxframe/serialization/pandas.py +5 -1
  346. maxframe/serialization/serializables/core.py +6 -5
  347. maxframe/serialization/serializables/field.py +2 -2
  348. maxframe/serialization/serializables/tests/test_field_type.py +3 -5
  349. maxframe/serialization/tests/test_serial.py +27 -0
  350. maxframe/session.py +4 -71
  351. maxframe/sperunner.py +165 -0
  352. maxframe/tensor/__init__.py +35 -2
  353. maxframe/tensor/arithmetic/__init__.py +2 -4
  354. maxframe/tensor/arithmetic/abs.py +0 -2
  355. maxframe/tensor/arithmetic/absolute.py +0 -2
  356. maxframe/tensor/arithmetic/add.py +34 -4
  357. maxframe/tensor/arithmetic/angle.py +0 -2
  358. maxframe/tensor/arithmetic/arccos.py +1 -4
  359. maxframe/tensor/arithmetic/arccosh.py +1 -3
  360. maxframe/tensor/arithmetic/arcsin.py +0 -2
  361. maxframe/tensor/arithmetic/arcsinh.py +0 -2
  362. maxframe/tensor/arithmetic/arctan.py +0 -2
  363. maxframe/tensor/arithmetic/arctan2.py +0 -2
  364. maxframe/tensor/arithmetic/arctanh.py +0 -2
  365. maxframe/tensor/arithmetic/around.py +0 -2
  366. maxframe/tensor/arithmetic/bitand.py +0 -2
  367. maxframe/tensor/arithmetic/bitor.py +1 -3
  368. maxframe/tensor/arithmetic/bitxor.py +1 -3
  369. maxframe/tensor/arithmetic/cbrt.py +0 -2
  370. maxframe/tensor/arithmetic/ceil.py +0 -2
  371. maxframe/tensor/arithmetic/clip.py +13 -13
  372. maxframe/tensor/arithmetic/conj.py +0 -2
  373. maxframe/tensor/arithmetic/copysign.py +0 -2
  374. maxframe/tensor/arithmetic/core.py +47 -39
  375. maxframe/tensor/arithmetic/cos.py +1 -3
  376. maxframe/tensor/arithmetic/cosh.py +0 -2
  377. maxframe/tensor/arithmetic/deg2rad.py +0 -2
  378. maxframe/tensor/arithmetic/degrees.py +0 -2
  379. maxframe/tensor/arithmetic/divide.py +0 -2
  380. maxframe/tensor/arithmetic/equal.py +0 -2
  381. maxframe/tensor/arithmetic/exp.py +1 -3
  382. maxframe/tensor/arithmetic/exp2.py +0 -2
  383. maxframe/tensor/arithmetic/expm1.py +0 -2
  384. maxframe/tensor/arithmetic/fabs.py +0 -2
  385. maxframe/tensor/arithmetic/fix.py +0 -2
  386. maxframe/tensor/arithmetic/float_power.py +0 -2
  387. maxframe/tensor/arithmetic/floor.py +0 -2
  388. maxframe/tensor/arithmetic/floordiv.py +0 -2
  389. maxframe/tensor/arithmetic/fmax.py +0 -2
  390. maxframe/tensor/arithmetic/fmin.py +0 -2
  391. maxframe/tensor/arithmetic/fmod.py +0 -2
  392. maxframe/tensor/arithmetic/frexp.py +6 -2
  393. maxframe/tensor/arithmetic/greater.py +0 -2
  394. maxframe/tensor/arithmetic/greater_equal.py +0 -2
  395. maxframe/tensor/arithmetic/hypot.py +0 -2
  396. maxframe/tensor/arithmetic/i0.py +1 -3
  397. maxframe/tensor/arithmetic/imag.py +0 -2
  398. maxframe/tensor/arithmetic/invert.py +1 -3
  399. maxframe/tensor/arithmetic/isclose.py +0 -2
  400. maxframe/tensor/arithmetic/iscomplex.py +0 -2
  401. maxframe/tensor/arithmetic/isfinite.py +1 -3
  402. maxframe/tensor/arithmetic/isinf.py +0 -2
  403. maxframe/tensor/arithmetic/isnan.py +0 -2
  404. maxframe/tensor/arithmetic/isreal.py +0 -2
  405. maxframe/tensor/arithmetic/ldexp.py +0 -2
  406. maxframe/tensor/arithmetic/less.py +0 -2
  407. maxframe/tensor/arithmetic/less_equal.py +0 -2
  408. maxframe/tensor/arithmetic/log.py +1 -3
  409. maxframe/tensor/arithmetic/log10.py +1 -3
  410. maxframe/tensor/arithmetic/log1p.py +1 -3
  411. maxframe/tensor/arithmetic/log2.py +1 -3
  412. maxframe/tensor/arithmetic/logaddexp.py +0 -2
  413. maxframe/tensor/arithmetic/logaddexp2.py +0 -2
  414. maxframe/tensor/arithmetic/logical_and.py +0 -2
  415. maxframe/tensor/arithmetic/logical_not.py +1 -3
  416. maxframe/tensor/arithmetic/logical_or.py +0 -2
  417. maxframe/tensor/arithmetic/logical_xor.py +0 -2
  418. maxframe/tensor/arithmetic/lshift.py +0 -2
  419. maxframe/tensor/arithmetic/maximum.py +0 -2
  420. maxframe/tensor/arithmetic/minimum.py +0 -2
  421. maxframe/tensor/arithmetic/mod.py +0 -2
  422. maxframe/tensor/arithmetic/modf.py +6 -2
  423. maxframe/tensor/arithmetic/multiply.py +37 -4
  424. maxframe/tensor/arithmetic/nan_to_num.py +0 -2
  425. maxframe/tensor/arithmetic/negative.py +0 -2
  426. maxframe/tensor/arithmetic/nextafter.py +0 -2
  427. maxframe/tensor/arithmetic/not_equal.py +0 -2
  428. maxframe/tensor/arithmetic/positive.py +0 -2
  429. maxframe/tensor/arithmetic/power.py +0 -2
  430. maxframe/tensor/arithmetic/rad2deg.py +0 -2
  431. maxframe/tensor/arithmetic/radians.py +0 -2
  432. maxframe/tensor/arithmetic/real.py +0 -2
  433. maxframe/tensor/arithmetic/reciprocal.py +5 -3
  434. maxframe/tensor/arithmetic/rint.py +1 -3
  435. maxframe/tensor/arithmetic/rshift.py +0 -2
  436. maxframe/tensor/arithmetic/setimag.py +0 -2
  437. maxframe/tensor/arithmetic/setreal.py +0 -2
  438. maxframe/tensor/arithmetic/sign.py +0 -2
  439. maxframe/tensor/arithmetic/signbit.py +0 -2
  440. maxframe/tensor/arithmetic/sin.py +0 -2
  441. maxframe/tensor/arithmetic/sinc.py +1 -3
  442. maxframe/tensor/arithmetic/sinh.py +0 -2
  443. maxframe/tensor/arithmetic/spacing.py +0 -2
  444. maxframe/tensor/arithmetic/sqrt.py +0 -2
  445. maxframe/tensor/arithmetic/square.py +0 -2
  446. maxframe/tensor/arithmetic/subtract.py +4 -2
  447. maxframe/tensor/arithmetic/tan.py +0 -2
  448. maxframe/tensor/arithmetic/tanh.py +0 -2
  449. maxframe/tensor/arithmetic/tests/__init__.py +0 -2
  450. maxframe/tensor/arithmetic/tests/test_arithmetic.py +43 -9
  451. maxframe/tensor/arithmetic/truediv.py +0 -2
  452. maxframe/tensor/arithmetic/trunc.py +0 -2
  453. maxframe/tensor/arithmetic/utils.py +32 -6
  454. maxframe/tensor/array_utils.py +3 -25
  455. maxframe/tensor/core.py +6 -6
  456. maxframe/tensor/datasource/__init__.py +10 -2
  457. maxframe/tensor/datasource/arange.py +0 -2
  458. maxframe/tensor/datasource/array.py +3 -22
  459. maxframe/tensor/datasource/core.py +15 -10
  460. maxframe/tensor/datasource/diag.py +140 -0
  461. maxframe/tensor/datasource/diagflat.py +69 -0
  462. maxframe/tensor/datasource/empty.py +0 -2
  463. maxframe/tensor/datasource/eye.py +95 -0
  464. maxframe/tensor/datasource/from_dataframe.py +0 -2
  465. maxframe/tensor/datasource/from_dense.py +0 -17
  466. maxframe/tensor/datasource/from_sparse.py +0 -2
  467. maxframe/tensor/datasource/full.py +0 -2
  468. maxframe/tensor/datasource/identity.py +54 -0
  469. maxframe/tensor/datasource/indices.py +115 -0
  470. maxframe/tensor/datasource/linspace.py +140 -0
  471. maxframe/tensor/datasource/meshgrid.py +135 -0
  472. maxframe/tensor/datasource/ones.py +8 -3
  473. maxframe/tensor/datasource/tests/test_datasource.py +32 -1
  474. maxframe/tensor/datasource/tri_array.py +107 -0
  475. maxframe/tensor/datasource/zeros.py +7 -3
  476. maxframe/tensor/extensions/__init__.py +31 -0
  477. maxframe/tensor/extensions/accessor.py +25 -0
  478. maxframe/tensor/extensions/apply_chunk.py +137 -0
  479. maxframe/tensor/indexing/__init__.py +1 -1
  480. maxframe/tensor/indexing/choose.py +8 -6
  481. maxframe/tensor/indexing/compress.py +0 -2
  482. maxframe/tensor/indexing/extract.py +0 -2
  483. maxframe/tensor/indexing/fill_diagonal.py +9 -6
  484. maxframe/tensor/indexing/flatnonzero.py +1 -3
  485. maxframe/tensor/indexing/getitem.py +10 -43
  486. maxframe/tensor/indexing/nonzero.py +2 -4
  487. maxframe/tensor/indexing/setitem.py +19 -9
  488. maxframe/tensor/indexing/slice.py +6 -3
  489. maxframe/tensor/indexing/take.py +0 -2
  490. maxframe/tensor/indexing/tests/__init__.py +0 -2
  491. maxframe/tensor/indexing/tests/test_indexing.py +0 -2
  492. maxframe/tensor/indexing/unravel_index.py +6 -6
  493. maxframe/tensor/lib/__init__.py +16 -0
  494. maxframe/tensor/lib/index_tricks.py +404 -0
  495. maxframe/tensor/linalg/__init__.py +36 -0
  496. maxframe/tensor/linalg/dot.py +145 -0
  497. maxframe/tensor/linalg/inner.py +36 -0
  498. maxframe/tensor/linalg/inv.py +83 -0
  499. maxframe/tensor/linalg/lu.py +115 -0
  500. maxframe/tensor/linalg/matmul.py +225 -0
  501. maxframe/tensor/linalg/qr.py +124 -0
  502. maxframe/tensor/linalg/solve_triangular.py +103 -0
  503. maxframe/tensor/linalg/svd.py +167 -0
  504. maxframe/tensor/linalg/tensordot.py +213 -0
  505. maxframe/tensor/linalg/vdot.py +73 -0
  506. maxframe/tensor/merge/__init__.py +4 -0
  507. maxframe/tensor/merge/append.py +74 -0
  508. maxframe/tensor/merge/column_stack.py +63 -0
  509. maxframe/tensor/merge/concatenate.py +3 -2
  510. maxframe/tensor/merge/dstack.py +71 -0
  511. maxframe/tensor/merge/hstack.py +70 -0
  512. maxframe/tensor/merge/stack.py +0 -2
  513. maxframe/tensor/merge/tests/test_merge.py +0 -2
  514. maxframe/tensor/misc/__init__.py +18 -5
  515. maxframe/tensor/misc/astype.py +10 -8
  516. maxframe/tensor/misc/broadcast_to.py +1 -1
  517. maxframe/tensor/misc/copy.py +64 -0
  518. maxframe/tensor/misc/diff.py +115 -0
  519. maxframe/tensor/misc/flatten.py +63 -0
  520. maxframe/tensor/misc/in1d.py +94 -0
  521. maxframe/tensor/misc/isin.py +130 -0
  522. maxframe/tensor/misc/ndim.py +53 -0
  523. maxframe/tensor/misc/ravel.py +0 -2
  524. maxframe/tensor/misc/repeat.py +129 -0
  525. maxframe/tensor/misc/searchsorted.py +147 -0
  526. maxframe/tensor/misc/setdiff1d.py +58 -0
  527. maxframe/tensor/misc/squeeze.py +117 -0
  528. maxframe/tensor/misc/swapaxes.py +113 -0
  529. maxframe/tensor/misc/tests/test_misc.py +0 -2
  530. maxframe/tensor/misc/transpose.py +8 -4
  531. maxframe/tensor/misc/trapezoid.py +123 -0
  532. maxframe/tensor/misc/unique.py +0 -1
  533. maxframe/tensor/misc/where.py +10 -8
  534. maxframe/tensor/operators.py +0 -34
  535. maxframe/tensor/random/__init__.py +3 -5
  536. maxframe/tensor/random/binomial.py +0 -2
  537. maxframe/tensor/random/bytes.py +0 -2
  538. maxframe/tensor/random/chisquare.py +0 -2
  539. maxframe/tensor/random/choice.py +9 -8
  540. maxframe/tensor/random/core.py +20 -5
  541. maxframe/tensor/random/dirichlet.py +0 -2
  542. maxframe/tensor/random/exponential.py +0 -2
  543. maxframe/tensor/random/f.py +2 -4
  544. maxframe/tensor/random/gamma.py +0 -2
  545. maxframe/tensor/random/geometric.py +0 -2
  546. maxframe/tensor/random/gumbel.py +0 -2
  547. maxframe/tensor/random/hypergeometric.py +0 -2
  548. maxframe/tensor/random/laplace.py +2 -4
  549. maxframe/tensor/random/logistic.py +0 -2
  550. maxframe/tensor/random/lognormal.py +0 -2
  551. maxframe/tensor/random/logseries.py +0 -2
  552. maxframe/tensor/random/multinomial.py +0 -2
  553. maxframe/tensor/random/multivariate_normal.py +0 -2
  554. maxframe/tensor/random/negative_binomial.py +0 -2
  555. maxframe/tensor/random/noncentral_chisquare.py +0 -2
  556. maxframe/tensor/random/noncentral_f.py +1 -3
  557. maxframe/tensor/random/normal.py +0 -2
  558. maxframe/tensor/random/pareto.py +0 -2
  559. maxframe/tensor/random/permutation.py +6 -3
  560. maxframe/tensor/random/poisson.py +0 -2
  561. maxframe/tensor/random/power.py +0 -2
  562. maxframe/tensor/random/rand.py +0 -2
  563. maxframe/tensor/random/randint.py +0 -2
  564. maxframe/tensor/random/randn.py +0 -2
  565. maxframe/tensor/random/random_integers.py +0 -2
  566. maxframe/tensor/random/random_sample.py +0 -2
  567. maxframe/tensor/random/rayleigh.py +0 -2
  568. maxframe/tensor/random/standard_cauchy.py +0 -2
  569. maxframe/tensor/random/standard_exponential.py +0 -2
  570. maxframe/tensor/random/standard_gamma.py +0 -2
  571. maxframe/tensor/random/standard_normal.py +0 -2
  572. maxframe/tensor/random/standard_t.py +0 -2
  573. maxframe/tensor/random/tests/__init__.py +0 -2
  574. maxframe/tensor/random/tests/test_random.py +0 -2
  575. maxframe/tensor/random/triangular.py +0 -2
  576. maxframe/tensor/random/uniform.py +0 -2
  577. maxframe/tensor/random/vonmises.py +0 -2
  578. maxframe/tensor/random/wald.py +0 -2
  579. maxframe/tensor/random/weibull.py +0 -2
  580. maxframe/tensor/random/zipf.py +0 -2
  581. maxframe/tensor/reduction/__init__.py +0 -2
  582. maxframe/tensor/reduction/all.py +0 -2
  583. maxframe/tensor/reduction/allclose.py +0 -2
  584. maxframe/tensor/reduction/any.py +0 -2
  585. maxframe/tensor/reduction/argmax.py +1 -3
  586. maxframe/tensor/reduction/argmin.py +1 -3
  587. maxframe/tensor/reduction/array_equal.py +0 -2
  588. maxframe/tensor/reduction/core.py +0 -2
  589. maxframe/tensor/reduction/count_nonzero.py +0 -2
  590. maxframe/tensor/reduction/cumprod.py +0 -2
  591. maxframe/tensor/reduction/cumsum.py +0 -2
  592. maxframe/tensor/reduction/max.py +0 -2
  593. maxframe/tensor/reduction/mean.py +0 -2
  594. maxframe/tensor/reduction/min.py +0 -2
  595. maxframe/tensor/reduction/nanargmax.py +0 -2
  596. maxframe/tensor/reduction/nanargmin.py +0 -2
  597. maxframe/tensor/reduction/nancumprod.py +0 -2
  598. maxframe/tensor/reduction/nancumsum.py +0 -2
  599. maxframe/tensor/reduction/nanmax.py +0 -2
  600. maxframe/tensor/reduction/nanmean.py +0 -2
  601. maxframe/tensor/reduction/nanmin.py +0 -2
  602. maxframe/tensor/reduction/nanprod.py +0 -2
  603. maxframe/tensor/reduction/nanstd.py +0 -2
  604. maxframe/tensor/reduction/nansum.py +0 -2
  605. maxframe/tensor/reduction/nanvar.py +0 -2
  606. maxframe/tensor/reduction/prod.py +0 -2
  607. maxframe/tensor/reduction/std.py +0 -2
  608. maxframe/tensor/reduction/sum.py +0 -2
  609. maxframe/tensor/reduction/tests/test_reduction.py +1 -4
  610. maxframe/tensor/reduction/var.py +0 -2
  611. maxframe/tensor/reshape/__init__.py +0 -2
  612. maxframe/tensor/reshape/reshape.py +6 -5
  613. maxframe/tensor/reshape/tests/__init__.py +0 -2
  614. maxframe/tensor/reshape/tests/test_reshape.py +0 -2
  615. maxframe/tensor/sort/__init__.py +16 -0
  616. maxframe/tensor/sort/argsort.py +150 -0
  617. maxframe/tensor/sort/sort.py +295 -0
  618. maxframe/tensor/special/__init__.py +37 -0
  619. maxframe/tensor/special/core.py +38 -0
  620. maxframe/tensor/special/misc.py +142 -0
  621. maxframe/tensor/special/statistical.py +56 -0
  622. maxframe/tensor/statistics/__init__.py +5 -0
  623. maxframe/tensor/statistics/average.py +143 -0
  624. maxframe/tensor/statistics/bincount.py +133 -0
  625. maxframe/tensor/statistics/quantile.py +10 -8
  626. maxframe/tensor/ufunc/__init__.py +0 -2
  627. maxframe/tensor/ufunc/ufunc.py +0 -2
  628. maxframe/tensor/utils.py +21 -3
  629. maxframe/tests/test_protocol.py +3 -3
  630. maxframe/tests/test_utils.py +210 -1
  631. maxframe/tests/utils.py +67 -1
  632. maxframe/udf.py +76 -6
  633. maxframe/utils.py +418 -17
  634. {maxframe-1.3.0.dist-info → maxframe-2.0.0b1.dist-info}/METADATA +5 -1
  635. maxframe-2.0.0b1.dist-info/RECORD +939 -0
  636. {maxframe-1.3.0.dist-info → maxframe-2.0.0b1.dist-info}/WHEEL +1 -1
  637. maxframe_client/clients/framedriver.py +19 -3
  638. maxframe_client/fetcher.py +113 -6
  639. maxframe_client/session/odps.py +173 -38
  640. maxframe_client/session/task.py +3 -1
  641. maxframe_client/tests/test_session.py +41 -5
  642. maxframe-1.3.0.dist-info/RECORD +0 -705
  643. {maxframe-1.3.0.dist-info → maxframe-2.0.0b1.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,153 @@
1
+ # Copyright 1999-2025 Alibaba Group Holding Ltd.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ from typing import List
16
+
17
+ from .... import opcodes
18
+ from ....core import EntityData
19
+ from ....core.entity.output_types import get_output_types
20
+ from ....core.operator.base import Operator
21
+ from ....core.operator.core import TileableOperatorMixin
22
+ from ....dataframe.core import DATAFRAME_TYPE
23
+ from ....serialization.serializables import BoolField, KeyField, ListField
24
+ from ....serialization.serializables.field import AnyField
25
+ from ....tensor import tensor as astensor
26
+ from ....tensor.core import TENSOR_TYPE
27
+ from ....typing_ import TileableType
28
+ from ...utils import convert_to_tensor_or_dataframe
29
+
30
+
31
+ class ToLGBMDataset(Operator, TileableOperatorMixin):
32
+ _op_type_ = opcodes.TO_LGBM_DATASET
33
+
34
+ data = KeyField("data", default=None)
35
+ label = KeyField("label", default=None)
36
+ reference = KeyField("reference", default=None)
37
+ weight = KeyField("weight", default=None)
38
+ init_score = KeyField("init_score", default=None)
39
+ group = AnyField("group", default=None)
40
+ feature_name = ListField("feature_name", default=None)
41
+ categorical_feature = ListField("categorical_feature", default=None)
42
+ params = AnyField("params", default=None)
43
+ free_raw_data = BoolField("free_raw_data", default=None)
44
+ position = AnyField("position", default=None)
45
+ # if to collocate the data, label and weight
46
+ collocate = BoolField("collocate", default=False)
47
+
48
+ @property
49
+ def output_limit(self):
50
+ if self.collocate:
51
+ return 1 + sum(bool(x) for x in [self.label, self.weight, self.init_score])
52
+ return 1
53
+
54
+ @classmethod
55
+ def _set_inputs(cls, op: "ToLGBMDataset", inputs: List[EntityData]):
56
+ super()._set_inputs(op, inputs)
57
+ if op.data is not None:
58
+ op.data = op._inputs[0]
59
+ has_label = op.label is not None
60
+ if has_label:
61
+ op.label = op._inputs[1]
62
+ if op.weight is not None:
63
+ i = 1 if not has_label else 2
64
+ op.weight = op._inputs[i]
65
+ if op.init_score is not None:
66
+ op.init_score = op._inputs[-1]
67
+
68
+ @staticmethod
69
+ def _get_kw(obj):
70
+ if isinstance(obj, TENSOR_TYPE):
71
+ return {"shape": obj.shape, "dtype": obj.dtype, "order": obj.order}
72
+ else:
73
+ return {
74
+ "shape": obj.shape,
75
+ "dtypes": obj.dtypes,
76
+ "index_value": obj.index_value,
77
+ "columns_value": obj.columns_value,
78
+ }
79
+
80
+ def __call__(self):
81
+ inputs = [self.data]
82
+ kw = self._get_kw(self.data)
83
+ if self.label is not None:
84
+ inputs.append(self.label)
85
+ if self.weight is not None:
86
+ inputs.append(self.weight)
87
+ if self.init_score is not None:
88
+ inputs.append(self.init_score)
89
+
90
+ return self.new_tileable(inputs, **kw)
91
+
92
+
93
+ def check_data(data):
94
+ data = convert_to_tensor_or_dataframe(data)
95
+ if data.ndim != 2:
96
+ raise ValueError(f"Expecting 2-d data, got: {data.ndim}-d")
97
+
98
+ return data
99
+
100
+
101
+ def check_array_like(y: TileableType, name: str) -> TileableType:
102
+ if y is None:
103
+ return
104
+ y = convert_to_tensor_or_dataframe(y)
105
+ if isinstance(y, DATAFRAME_TYPE):
106
+ y = y.iloc[:, 0]
107
+ return astensor(y)
108
+
109
+
110
+ def to_lgbm_dataset(
111
+ data,
112
+ label=None,
113
+ reference=None,
114
+ weight=None,
115
+ group=None,
116
+ init_score=None,
117
+ feature_name="auto",
118
+ categorical_feature="auto",
119
+ params=None,
120
+ free_raw_data=True,
121
+ position=None,
122
+ ):
123
+ data = check_data(data)
124
+ label = check_array_like(label, "label")
125
+ weight = check_array_like(weight, "weight")
126
+ init_score = check_array_like(init_score, "init_score")
127
+
128
+ if weight is not None and weight.ndim > 1:
129
+ raise ValueError("weight must be 1-dimensional")
130
+
131
+ # If not multiple outputs, try to collect the chunks on same worker into one
132
+ # to feed the data into LightGBM for training.
133
+ op = ToLGBMDataset(
134
+ data=data,
135
+ label=label,
136
+ reference=reference,
137
+ weight=weight,
138
+ group=group,
139
+ init_score=init_score,
140
+ feature_name=None if feature_name == "auto" else feature_name,
141
+ categorical_feature=None
142
+ if categorical_feature == "auto"
143
+ else categorical_feature,
144
+ params=params,
145
+ free_raw_data=free_raw_data,
146
+ position=position,
147
+ gpu=data.op.gpu,
148
+ _output_types=get_output_types(data),
149
+ )
150
+ return op()
151
+
152
+
153
+ Dataset = to_lgbm_dataset
@@ -0,0 +1,29 @@
1
+ # Copyright 1999-2025 Alibaba Group Holding Ltd.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ from ..utils import make_import_error_func
16
+ from .core import LGBMScikitLearnBase
17
+
18
+ try:
19
+ import lightgbm
20
+ except ImportError:
21
+ lightgbm = None
22
+
23
+
24
+ if not lightgbm:
25
+ LGBMRegressor = make_import_error_func("lightgbm")
26
+ else:
27
+
28
+ class LGBMRegressor(LGBMScikitLearnBase, lightgbm.LGBMRegressor):
29
+ _default_objective = "regression"
@@ -0,0 +1,13 @@
1
+ # Copyright 1999-2025 Alibaba Group Holding Ltd.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
@@ -0,0 +1,58 @@
1
+ # Copyright 1999-2025 Alibaba Group Holding Ltd.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ import inspect
16
+
17
+ import pytest
18
+
19
+ from ..callback import (
20
+ EarlyStoppingCallback,
21
+ LGBMTrainingCallback,
22
+ early_stopping,
23
+ reset_parameter,
24
+ )
25
+
26
+ try:
27
+ from lightgbm.callback import _EarlyStoppingCallback
28
+ except ImportError:
29
+ try:
30
+ from lightgbm.callback import early_stopping as _EarlyStoppingCallback
31
+ except ImportError:
32
+ pytestmark = pytest.mark.skip("Need lightgbm to run the test")
33
+
34
+
35
+ def test_lgbm_training_callbacks():
36
+ early_stopping_cb = early_stopping(stopping_rounds=5)
37
+ assert not early_stopping_cb.has_custom_code()
38
+
39
+ local_early_stopping_cb = early_stopping_cb.to_local()
40
+ if isinstance(_EarlyStoppingCallback, type):
41
+ assert isinstance(local_early_stopping_cb, _EarlyStoppingCallback)
42
+ assert local_early_stopping_cb.stopping_rounds == 5
43
+ else:
44
+ assert local_early_stopping_cb.__qualname__.startswith(
45
+ _EarlyStoppingCallback.__name__
46
+ )
47
+ cb_vars = inspect.getclosurevars(local_early_stopping_cb).nonlocals
48
+ assert cb_vars["stopping_rounds"] == 5
49
+
50
+ rmt_early_stopping_cb = LGBMTrainingCallback.from_local(local_early_stopping_cb)
51
+ assert isinstance(rmt_early_stopping_cb, EarlyStoppingCallback)
52
+ assert rmt_early_stopping_cb.stopping_rounds == 5
53
+
54
+ lr_sch_cb = reset_parameter(a=[0.5, 0.4, 0.1])
55
+ assert not lr_sch_cb.has_custom_code()
56
+
57
+ lr_sch_cb = reset_parameter(a=lambda x: x * 0.1)
58
+ assert lr_sch_cb.has_custom_code()
@@ -34,8 +34,25 @@ class DashScopeLLMMixin(Serializable):
34
34
 
35
35
 
36
36
  class DashScopeTextLLM(TextLLM, DashScopeLLMMixin):
37
+ """
38
+ DashScope text LLM.
39
+ """
40
+
37
41
  api_key_resource = StringField("api_key_resource", default=None)
38
42
 
43
+ def __init__(self, name: str, api_key_resource: str):
44
+ """
45
+ Initialize a DashScope text LLM.
46
+
47
+ Parameters
48
+ ----------
49
+ name : str
50
+ The LLM name to use, check DashScope for `available models <https://help.aliyun.com/zh/model-studio/getting-started/models>`_.
51
+ api_key_resource : str
52
+ The MaxCompute resource file name containing the DashScope API key.
53
+ """
54
+ super().__init__(name=name, api_key_resource=api_key_resource)
55
+
39
56
  def generate(
40
57
  self,
41
58
  data,
@@ -50,8 +67,25 @@ class DashScopeTextLLM(TextLLM, DashScopeLLMMixin):
50
67
 
51
68
 
52
69
  class DashScopeMultiModalLLM(MultiModalLLM, DashScopeLLMMixin):
70
+ """
71
+ DashScope multi-modal LLM.
72
+ """
73
+
53
74
  api_key_resource = StringField("api_key_resource", default=None)
54
75
 
76
+ def __init__(self, name: str, api_key_resource: str):
77
+ """
78
+ Initialize a DashScope multi-modal LLM.
79
+
80
+ Parameters
81
+ ----------
82
+ name : str
83
+ The LLM name to use, check DashScope for `available models <https://help.aliyun.com/zh/model-studio/getting-started/models>`_.
84
+ api_key_resource : str
85
+ The MaxCompute resource file name containing the DashScope API key.
86
+ """
87
+ super().__init__(name=name, api_key_resource=api_key_resource)
88
+
55
89
  def generate(
56
90
  self,
57
91
  data,
@@ -27,6 +27,21 @@ class ManagedLLMTextGenOperator(LLMTextGenOperator):
27
27
 
28
28
 
29
29
  class ManagedTextLLM(TextLLM):
30
+ """
31
+ Managed text LLM by MaxFrame.
32
+ """
33
+
34
+ def __init__(self, name: str):
35
+ """
36
+ Initialize a managed text LLM.
37
+
38
+ Parameters
39
+ ----------
40
+ name : str
41
+ The managed text LLM name to use.
42
+ """
43
+ super().__init__(name=name)
44
+
30
45
  def generate(
31
46
  self,
32
47
  data,
@@ -34,6 +34,98 @@ def generate(
34
34
  prompt_template: Dict[str, Any],
35
35
  params: Dict[str, Any] = None,
36
36
  ):
37
+ """
38
+ Generate text with multi model llm based on given data and prompt template.
39
+
40
+ Parameters
41
+ ----------
42
+ data : DataFrame or Series
43
+ Input data used for generation. Can be maxframe DataFrame, Series that contain text to be processed.
44
+ model : MultiModalLLM
45
+ Language model instance support **MultiModal** inputs used for text generation.
46
+ prompt_template : List[Dict[str, List[Dict[str, str]]]]
47
+ List of message with column names as placeholders. Each message contains a role and content. Content is a list of dict, each dict contains a text or image, the value can reference column data from input.
48
+
49
+ Here is an example of prompt template.
50
+
51
+ .. code-block:: python
52
+
53
+ [
54
+ {
55
+ "role": "<role>", # e.g. "user" or "assistant"
56
+ "content": [
57
+ {
58
+ # At least one of these fields is required
59
+ "image": "<image_data_url>", # optional
60
+ "text": "<prompt_text_template>" # optional
61
+ },
62
+ ...
63
+ ]
64
+ }
65
+ ]
66
+
67
+ Where:
68
+
69
+ - ``text`` can be a Python format string using column names from input data as parameters (e.g. ``"{column_name}"``)
70
+ - ``image`` should be a DataURL string following `RFC2397 <https://en.wikipedia.org/wiki/Data_URI_scheme>`_ standard with format.
71
+
72
+ .. code-block:: none
73
+
74
+ data:<mime_type>[;base64],<column_name>
75
+
76
+
77
+ params : Dict[str, Any], optional
78
+ Additional parameters for generation configuration, by default None.
79
+ Can include settings like temperature, max_tokens, etc.
80
+
81
+ Returns
82
+ -------
83
+ DataFrame
84
+ Generated text raw response and success status. If the success is False, the generated text will return the
85
+ error message.
86
+
87
+ Notes
88
+ -----
89
+ - The ``api_key_resource`` parameter should reference a text file resource in MaxCompute that contains only your DashScope API key.
90
+
91
+ - Using DashScope services requires enabling public network access for your MaxCompute project. This can be configured through the MaxCompute console by `enabling the Internet access feature <https://help.aliyun.com/zh/maxcompute/user-guide/network-connection-process>`_ for your project. Without this configuration, the API calls to DashScope will fail due to network connectivity issues.
92
+
93
+ Examples
94
+ --------
95
+ You can initialize a DashScope multi-modal model (such as qwen-vl-max) by providing a model name and an ``api_key_resource``.
96
+ The ``api_key_resource`` is a MaxCompute resource name that points to a text file containing a `DashScope <https://dashscope.aliyun.com/>`_ API key.
97
+
98
+ >>> from maxframe.learn.contrib.llm.models.dashscope import DashScopeMultiModalLLM
99
+ >>> import maxframe.dataframe as md
100
+ >>>
101
+ >>> model = DashScopeMultiModalLLM(
102
+ ... name="qwen-vl-max",
103
+ ... api_key_resource="<api-key-resource-name>"
104
+ ... )
105
+
106
+ We use Data Url Schema to provide multi modal input in prompt template, here is an example to fill in the image from table.
107
+
108
+ Assuming you have a MaxCompute table with two columns: ``image_id`` (as the index) and ``encoded_image_data_base64`` (containing Base64 encoded image data),
109
+ you can construct a prompt message template as follows:
110
+
111
+ >>> df = md.read_odps_table("image_content", index_col="image_id")
112
+
113
+ >>> prompt_template = [
114
+ ... {
115
+ ... "role": "user",
116
+ ... "content": [
117
+ ... {
118
+ ... "image": "_image_data_base64",
119
+ ... },
120
+ ... {
121
+ ... "text": "Analyze this image in detail",
122
+ ... },
123
+ ... ],
124
+ ... },
125
+ ... ]
126
+ >>> result = model.generate(df, prompt_template)
127
+ >>> result.execute()
128
+ """
37
129
  if not isinstance(data, DATAFRAME_TYPE) and not isinstance(data, SERIES_TYPE):
38
130
  raise ValueError("data must be a maxframe dataframe or series object")
39
131
  if not isinstance(model, MultiModalLLM):
@@ -145,16 +145,16 @@ def generate(
145
145
  >>> import maxframe.dataframe as md
146
146
  >>>
147
147
  >>> # Initialize the model
148
- >>> llm = ManagedTextLLM(name="Qwen2.5-1.5B")
148
+ >>> llm = ManagedTextLLM(name="Qwen2.5-0.5B-instruct")
149
149
  >>>
150
150
  >>> # Prepare prompt template
151
151
  >>> messages = [
152
152
  ... {
153
153
  ... "role": "user",
154
- ... "content": "{query}",
154
+ ... "content": "Help answer following question: {query}",
155
155
  ... },
156
156
  ... ]
157
- >>>
157
+
158
158
  >>> # Create sample data
159
159
  >>> df = md.DataFrame({"query": ["What is machine learning?"]})
160
160
  >>>
@@ -177,7 +177,7 @@ def summary(series, model: TextLLM, index=None):
177
177
 
178
178
  Parameters
179
179
  ----------
180
- series : pandas.Series
180
+ series : Series
181
181
  A maxframe Series containing text data to be summarized.
182
182
  Each element should be a text string.
183
183
  model : TextLLM
@@ -189,6 +189,11 @@ def summary(series, model: TextLLM, index=None):
189
189
  -------
190
190
  maxframe.Series
191
191
  A pandas Series containing the generated summaries and success status.
192
+
193
+ Notes
194
+ -----
195
+ **Preview:** This API is in preview state and may be unstable.
196
+ The interface may change in future releases.
192
197
  """
193
198
  if not isinstance(series, Series):
194
199
  raise ValueError("series must be a maxframe series object")
@@ -208,7 +213,7 @@ def translate(
208
213
  Parameters
209
214
  ----------
210
215
  series : pandas.Series
211
- A maxframe Series containing text data to be translate.
216
+ A maxframe Series containing text data to translate.
212
217
  Each element should be a text string.
213
218
  model : TextLLM
214
219
  Language model instance used for text summarization.
@@ -223,6 +228,12 @@ def translate(
223
228
  -------
224
229
  maxframe.Series
225
230
  A pandas Series containing the generated translation and success status.
231
+
232
+ Notes
233
+ -----
234
+ **Preview:** This API is in preview state and may be unstable.
235
+ The interface may change in future releases.
236
+
226
237
  """
227
238
  if not isinstance(series, Series):
228
239
  raise ValueError("series must be a maxframe series object")
@@ -268,6 +279,11 @@ def classify(
268
279
  -------
269
280
  maxframe.Series
270
281
  A pandas Series containing the generated classification results and success status.
282
+
283
+ Notes
284
+ -----
285
+ **Preview:** This API is in preview state and may be unstable.
286
+ The interface may change in future releases.
271
287
  """
272
288
  if not isinstance(series, Series):
273
289
  raise ValueError("series must be a maxframe series object")
@@ -12,6 +12,8 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
+ from typing import Type
16
+
15
17
  from ... import opcodes
16
18
  from ...core import ENTITY_TYPE, OutputType
17
19
  from ...core.operator import ObjectOperator, ObjectOperatorMixin
@@ -21,7 +23,30 @@ from ...serialization.serializables import (
21
23
  FunctionField,
22
24
  TupleField,
23
25
  )
26
+ from ...udf import BuiltinFunction
24
27
  from ...utils import find_objects, replace_objects
28
+ from ..core import Model, ModelData
29
+
30
+
31
+ class ModelWithEvalData(ModelData):
32
+ __slots__ = ("_evals_result",)
33
+
34
+ _evals_result: dict
35
+
36
+ def __init__(self, *args, evals_result=None, **kwargs):
37
+ super().__init__(*args, **kwargs)
38
+ self._evals_result = evals_result if evals_result is not None else dict()
39
+
40
+ def execute(self, session=None, **kw):
41
+ # The evals_result should be fetched when BoosterData.execute() is called.
42
+ result = super().execute(session=session, **kw)
43
+ if self.op.has_evals_result and self.key == self.op.outputs[0].key:
44
+ self._evals_result.update(self.op.outputs[1].fetch(session=session))
45
+ return result
46
+
47
+
48
+ class ModelWithEval(Model):
49
+ pass
25
50
 
26
51
 
27
52
  class ModelDataSource(ObjectOperator, ObjectOperatorMixin):
@@ -29,7 +54,7 @@ class ModelDataSource(ObjectOperator, ObjectOperatorMixin):
29
54
 
30
55
  data = AnyField("data")
31
56
 
32
- def __call__(self, model_cls):
57
+ def __call__(self, model_cls: Type[ModelWithEval]):
33
58
  self._output_types = [OutputType.object]
34
59
  return self.new_tileable(None, object_class=model_cls)
35
60
 
@@ -48,14 +73,18 @@ class ModelApplyChunk(ObjectOperator, ObjectOperatorMixin):
48
73
  self._output_types = list(output_types)
49
74
  super().__init__(**kwargs)
50
75
 
51
- def _set_inputs(self, inputs):
52
- super()._set_inputs(inputs)
53
- old_inputs = find_objects(self.args, ENTITY_TYPE) + find_objects(
54
- self.kwargs, ENTITY_TYPE
76
+ def has_custom_code(self) -> bool:
77
+ return not isinstance(self.func, BuiltinFunction)
78
+
79
+ @classmethod
80
+ def _set_inputs(cls, op: "ModelApplyChunk", inputs):
81
+ super()._set_inputs(op, inputs)
82
+ old_inputs = find_objects(op.args, ENTITY_TYPE) + find_objects(
83
+ op.kwargs, ENTITY_TYPE
55
84
  )
56
- mapping = {o: n for o, n in zip(old_inputs, self._inputs[1:])}
57
- self.args = replace_objects(self.args, mapping)
58
- self.kwargs = replace_objects(self.kwargs, mapping)
85
+ mapping = {o: n for o, n in zip(old_inputs, op._inputs[1:])}
86
+ op.args = replace_objects(op.args, mapping)
87
+ op.kwargs = replace_objects(op.kwargs, mapping)
59
88
 
60
89
  @property
61
90
  def output_limit(self) -> int:
@@ -72,6 +101,6 @@ class ModelApplyChunk(ObjectOperator, ObjectOperatorMixin):
72
101
  return self.new_tileables(inputs, kws=output_kws)
73
102
 
74
103
 
75
- def to_remote_model(model, model_cls):
104
+ def to_remote_model(model, model_cls: Type[ModelWithEval]) -> ModelWithEval:
76
105
  op = ModelDataSource(data=model)
77
106
  return op(model_cls)
@@ -14,6 +14,8 @@
14
14
 
15
15
  import sys
16
16
 
17
+ from ...serialization.serializables import Serializable
18
+
17
19
 
18
20
  def make_import_error_func(package_name):
19
21
  def _func(*_, **__): # pragma: no cover
@@ -51,3 +53,56 @@ def config_mod_getattr(mod_dict, globals_):
51
53
  "__warningregistry__": dict(),
52
54
  }
53
55
  )
56
+
57
+
58
+ class TrainingCallback(Serializable):
59
+ _local_cls = None
60
+
61
+ @classmethod
62
+ def _load_local_to_remote_mapping(cls, globals_dict):
63
+ if cls._local_to_remote:
64
+ return
65
+ for v in globals_dict.values():
66
+ if isinstance(v, type) and issubclass(v, cls) and v._local_cls is not None:
67
+ cls._local_to_remote[v._local_cls] = v
68
+
69
+ @classmethod
70
+ def from_local(cls, callback_obj):
71
+ if isinstance(callback_obj, (list, tuple)):
72
+ return [cls.from_local(x) for x in callback_obj]
73
+ if not type(callback_obj) in cls._local_to_remote:
74
+ return callback_obj
75
+
76
+ kw = {}
77
+ remote_cls = cls._local_to_remote[type(callback_obj)]
78
+ for attr in remote_cls._FIELDS:
79
+ try:
80
+ kw[attr] = getattr(callback_obj, attr)
81
+ except AttributeError:
82
+ pass
83
+ return remote_cls(**kw)
84
+
85
+ def has_custom_code(self) -> bool:
86
+ return False
87
+
88
+ @classmethod
89
+ def remote_to_local(cls, remote_obj):
90
+ if isinstance(remote_obj, (list, tuple)):
91
+ return [cls.remote_to_local(x) for x in remote_obj]
92
+ if not isinstance(remote_obj, TrainingCallback):
93
+ return remote_obj
94
+ return remote_obj.to_local()
95
+
96
+ def _extract_kw(self) -> dict:
97
+ kw = {}
98
+ for attr in type(self)._FIELDS:
99
+ val = getattr(self, attr, None)
100
+ if val is not None:
101
+ kw[attr] = val
102
+ return kw
103
+
104
+ def to_local(self):
105
+ return type(self)._local_cls(**self._extract_kw())
106
+
107
+ def __call__(self, *args, **kwargs):
108
+ return self.to_local()(*args, **kwargs)