maxframe 1.3.1__cp310-cp310-win32.whl → 2.0.0b1__cp310-cp310-win32.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of maxframe might be problematic. Click here for more details.

Files changed (639) hide show
  1. maxframe/_utils.cp310-win32.pyd +0 -0
  2. maxframe/_utils.pyi +21 -0
  3. maxframe/_utils.pyx +4 -3
  4. maxframe/codegen/__init__.py +27 -0
  5. maxframe/{codegen.py → codegen/core.py} +49 -43
  6. maxframe/codegen/spe/__init__.py +16 -0
  7. maxframe/codegen/spe/core.py +307 -0
  8. maxframe/codegen/spe/dataframe/__init__.py +37 -0
  9. maxframe/codegen/spe/dataframe/accessors/__init__.py +15 -0
  10. maxframe/codegen/spe/dataframe/accessors/base.py +53 -0
  11. maxframe/codegen/spe/dataframe/accessors/dict_.py +194 -0
  12. maxframe/codegen/spe/dataframe/accessors/list_.py +80 -0
  13. maxframe/codegen/spe/dataframe/arithmetic.py +84 -0
  14. maxframe/codegen/spe/dataframe/datasource.py +181 -0
  15. maxframe/codegen/spe/dataframe/datastore.py +204 -0
  16. maxframe/codegen/spe/dataframe/extensions.py +63 -0
  17. maxframe/codegen/spe/dataframe/fetch.py +26 -0
  18. maxframe/codegen/spe/dataframe/groupby.py +224 -0
  19. maxframe/codegen/spe/dataframe/indexing.py +238 -0
  20. maxframe/codegen/spe/dataframe/merge.py +73 -0
  21. maxframe/codegen/spe/dataframe/misc.py +286 -0
  22. maxframe/codegen/spe/dataframe/missing.py +64 -0
  23. maxframe/codegen/spe/dataframe/reduction.py +160 -0
  24. maxframe/codegen/spe/dataframe/sort.py +83 -0
  25. maxframe/codegen/spe/dataframe/statistics.py +46 -0
  26. maxframe/codegen/spe/dataframe/tests/__init__.py +13 -0
  27. maxframe/codegen/spe/dataframe/tests/accessors/__init__.py +13 -0
  28. maxframe/codegen/spe/dataframe/tests/accessors/test_base.py +33 -0
  29. maxframe/codegen/spe/dataframe/tests/accessors/test_dict.py +310 -0
  30. maxframe/codegen/spe/dataframe/tests/accessors/test_list.py +137 -0
  31. maxframe/codegen/spe/dataframe/tests/indexing/__init__.py +13 -0
  32. maxframe/codegen/spe/dataframe/tests/indexing/conftest.py +58 -0
  33. maxframe/codegen/spe/dataframe/tests/indexing/test_getitem.py +124 -0
  34. maxframe/codegen/spe/dataframe/tests/indexing/test_iloc.py +76 -0
  35. maxframe/codegen/spe/dataframe/tests/indexing/test_indexing.py +39 -0
  36. maxframe/codegen/spe/dataframe/tests/indexing/test_rename.py +51 -0
  37. maxframe/codegen/spe/dataframe/tests/indexing/test_reset_index.py +88 -0
  38. maxframe/codegen/spe/dataframe/tests/indexing/test_sample.py +45 -0
  39. maxframe/codegen/spe/dataframe/tests/indexing/test_set_axis.py +45 -0
  40. maxframe/codegen/spe/dataframe/tests/indexing/test_set_index.py +41 -0
  41. maxframe/codegen/spe/dataframe/tests/indexing/test_setitem.py +46 -0
  42. maxframe/codegen/spe/dataframe/tests/misc/__init__.py +13 -0
  43. maxframe/codegen/spe/dataframe/tests/misc/test_apply.py +133 -0
  44. maxframe/codegen/spe/dataframe/tests/misc/test_drop_duplicates.py +92 -0
  45. maxframe/codegen/spe/dataframe/tests/misc/test_misc.py +234 -0
  46. maxframe/codegen/spe/dataframe/tests/missing/__init__.py +13 -0
  47. maxframe/codegen/spe/dataframe/tests/missing/test_checkna.py +94 -0
  48. maxframe/codegen/spe/dataframe/tests/missing/test_dropna.py +50 -0
  49. maxframe/codegen/spe/dataframe/tests/missing/test_fillna.py +94 -0
  50. maxframe/codegen/spe/dataframe/tests/missing/test_replace.py +45 -0
  51. maxframe/codegen/spe/dataframe/tests/test_arithmetic.py +73 -0
  52. maxframe/codegen/spe/dataframe/tests/test_datasource.py +184 -0
  53. maxframe/codegen/spe/dataframe/tests/test_datastore.py +200 -0
  54. maxframe/codegen/spe/dataframe/tests/test_extensions.py +88 -0
  55. maxframe/codegen/spe/dataframe/tests/test_groupby.py +225 -0
  56. maxframe/codegen/spe/dataframe/tests/test_merge.py +400 -0
  57. maxframe/codegen/spe/dataframe/tests/test_reduction.py +104 -0
  58. maxframe/codegen/spe/dataframe/tests/test_sort.py +159 -0
  59. maxframe/codegen/spe/dataframe/tests/test_statistics.py +70 -0
  60. maxframe/codegen/spe/dataframe/tests/test_tseries.py +29 -0
  61. maxframe/codegen/spe/dataframe/tests/test_value_counts.py +60 -0
  62. maxframe/codegen/spe/dataframe/tests/test_window.py +69 -0
  63. maxframe/codegen/spe/dataframe/tseries.py +46 -0
  64. maxframe/codegen/spe/dataframe/udf.py +62 -0
  65. maxframe/codegen/spe/dataframe/value_counts.py +31 -0
  66. maxframe/codegen/spe/dataframe/window.py +65 -0
  67. maxframe/codegen/spe/learn/__init__.py +15 -0
  68. maxframe/codegen/spe/learn/contrib/__init__.py +15 -0
  69. maxframe/codegen/spe/learn/contrib/lightgbm.py +160 -0
  70. maxframe/codegen/spe/learn/contrib/models.py +41 -0
  71. maxframe/codegen/spe/learn/contrib/pytorch.py +49 -0
  72. maxframe/codegen/spe/learn/contrib/tests/__init__.py +13 -0
  73. maxframe/codegen/spe/learn/contrib/tests/test_lightgbm.py +123 -0
  74. maxframe/codegen/spe/learn/contrib/tests/test_models.py +41 -0
  75. maxframe/codegen/spe/learn/contrib/tests/test_pytorch.py +53 -0
  76. maxframe/codegen/spe/learn/contrib/tests/test_xgboost.py +98 -0
  77. maxframe/codegen/spe/learn/contrib/xgboost.py +152 -0
  78. maxframe/codegen/spe/learn/metrics/__init__.py +15 -0
  79. maxframe/codegen/spe/learn/metrics/_classification.py +120 -0
  80. maxframe/codegen/spe/learn/metrics/tests/__init__.py +13 -0
  81. maxframe/codegen/spe/learn/metrics/tests/test_classification.py +93 -0
  82. maxframe/codegen/spe/learn/model_selection/__init__.py +13 -0
  83. maxframe/codegen/spe/learn/model_selection/tests/__init__.py +13 -0
  84. maxframe/codegen/spe/learn/model_selection/tests/test_split.py +41 -0
  85. maxframe/codegen/spe/learn/preprocessing/__init__.py +15 -0
  86. maxframe/codegen/spe/learn/preprocessing/_data.py +37 -0
  87. maxframe/codegen/spe/learn/preprocessing/_label.py +47 -0
  88. maxframe/codegen/spe/learn/preprocessing/tests/__init__.py +13 -0
  89. maxframe/codegen/spe/learn/preprocessing/tests/test_data.py +31 -0
  90. maxframe/codegen/spe/learn/preprocessing/tests/test_label.py +43 -0
  91. maxframe/codegen/spe/learn/utils/__init__.py +15 -0
  92. maxframe/codegen/spe/learn/utils/checks.py +55 -0
  93. maxframe/codegen/spe/learn/utils/multiclass.py +60 -0
  94. maxframe/codegen/spe/learn/utils/shuffle.py +85 -0
  95. maxframe/codegen/spe/learn/utils/sparsefuncs.py +35 -0
  96. maxframe/codegen/spe/learn/utils/tests/__init__.py +13 -0
  97. maxframe/codegen/spe/learn/utils/tests/test_checks.py +48 -0
  98. maxframe/codegen/spe/learn/utils/tests/test_multiclass.py +52 -0
  99. maxframe/codegen/spe/learn/utils/tests/test_shuffle.py +50 -0
  100. maxframe/codegen/spe/learn/utils/tests/test_sparsefuncs.py +34 -0
  101. maxframe/codegen/spe/learn/utils/tests/test_validation.py +44 -0
  102. maxframe/codegen/spe/learn/utils/validation.py +35 -0
  103. maxframe/codegen/spe/objects.py +26 -0
  104. maxframe/codegen/spe/remote.py +29 -0
  105. maxframe/codegen/spe/tensor/__init__.py +28 -0
  106. maxframe/codegen/spe/tensor/arithmetic.py +95 -0
  107. maxframe/codegen/spe/tensor/core.py +41 -0
  108. maxframe/codegen/spe/tensor/datasource.py +165 -0
  109. maxframe/codegen/spe/tensor/extensions.py +35 -0
  110. maxframe/codegen/spe/tensor/fetch.py +26 -0
  111. maxframe/codegen/spe/tensor/indexing.py +63 -0
  112. maxframe/codegen/spe/tensor/linalg.py +63 -0
  113. maxframe/codegen/spe/tensor/merge.py +31 -0
  114. maxframe/codegen/spe/tensor/misc.py +121 -0
  115. maxframe/codegen/spe/tensor/random.py +29 -0
  116. maxframe/codegen/spe/tensor/reduction.py +39 -0
  117. maxframe/codegen/spe/tensor/reshape.py +26 -0
  118. maxframe/codegen/spe/tensor/sort.py +42 -0
  119. maxframe/codegen/spe/tensor/special.py +35 -0
  120. maxframe/codegen/spe/tensor/statistics.py +24 -0
  121. maxframe/codegen/spe/tensor/tests/__init__.py +13 -0
  122. maxframe/codegen/spe/tensor/tests/test_arithmetic.py +103 -0
  123. maxframe/codegen/spe/tensor/tests/test_datasource.py +99 -0
  124. maxframe/codegen/spe/tensor/tests/test_extensions.py +37 -0
  125. maxframe/codegen/spe/tensor/tests/test_indexing.py +44 -0
  126. maxframe/codegen/spe/tensor/tests/test_linalg.py +38 -0
  127. maxframe/codegen/spe/tensor/tests/test_merge.py +28 -0
  128. maxframe/codegen/spe/tensor/tests/test_misc.py +94 -0
  129. maxframe/codegen/spe/tensor/tests/test_random.py +55 -0
  130. maxframe/codegen/spe/tensor/tests/test_reduction.py +65 -0
  131. maxframe/codegen/spe/tensor/tests/test_reshape.py +39 -0
  132. maxframe/codegen/spe/tensor/tests/test_sort.py +49 -0
  133. maxframe/codegen/spe/tensor/tests/test_special.py +28 -0
  134. maxframe/codegen/spe/tensor/tests/test_statistics.py +29 -0
  135. maxframe/codegen/spe/tests/__init__.py +13 -0
  136. maxframe/codegen/spe/tests/test_remote.py +29 -0
  137. maxframe/codegen/spe/tests/test_spe_codegen.py +141 -0
  138. maxframe/codegen/spe/utils.py +54 -0
  139. maxframe/codegen/tests/__init__.py +13 -0
  140. maxframe/{tests → codegen/tests}/test_codegen.py +3 -5
  141. maxframe/config/__init__.py +1 -1
  142. maxframe/config/config.py +50 -23
  143. maxframe/config/tests/test_config.py +4 -12
  144. maxframe/config/validators.py +5 -0
  145. maxframe/conftest.py +38 -10
  146. maxframe/core/__init__.py +1 -0
  147. maxframe/core/context.py +110 -0
  148. maxframe/core/entity/__init__.py +1 -0
  149. maxframe/core/entity/core.py +0 -7
  150. maxframe/core/entity/objects.py +19 -5
  151. maxframe/core/entity/output_types.py +11 -0
  152. maxframe/core/entity/tests/test_objects.py +11 -12
  153. maxframe/core/entity/tileables.py +3 -1
  154. maxframe/core/entity/utils.py +15 -0
  155. maxframe/core/graph/__init__.py +6 -1
  156. maxframe/core/graph/builder/base.py +5 -1
  157. maxframe/core/graph/core.cp310-win32.pyd +0 -0
  158. maxframe/core/graph/core.pyx +17 -6
  159. maxframe/core/graph/entity.py +18 -6
  160. maxframe/core/operator/__init__.py +8 -3
  161. maxframe/core/operator/base.py +35 -12
  162. maxframe/core/operator/core.py +37 -14
  163. maxframe/core/operator/fetch.py +5 -18
  164. maxframe/core/operator/objects.py +0 -20
  165. maxframe/core/operator/shuffle.py +6 -72
  166. maxframe/dataframe/__init__.py +1 -0
  167. maxframe/dataframe/accessors/datetime_/core.py +7 -4
  168. maxframe/dataframe/accessors/string_/core.py +9 -6
  169. maxframe/dataframe/arithmetic/core.py +31 -20
  170. maxframe/dataframe/arithmetic/tests/test_arithmetic.py +6 -0
  171. maxframe/dataframe/core.py +98 -91
  172. maxframe/dataframe/datasource/core.py +8 -1
  173. maxframe/dataframe/datasource/date_range.py +8 -0
  174. maxframe/dataframe/datasource/from_index.py +9 -5
  175. maxframe/dataframe/datasource/from_records.py +9 -2
  176. maxframe/dataframe/datasource/from_tensor.py +32 -21
  177. maxframe/dataframe/datasource/read_csv.py +8 -2
  178. maxframe/dataframe/datasource/read_odps_query.py +33 -3
  179. maxframe/dataframe/datasource/read_odps_table.py +20 -5
  180. maxframe/dataframe/datasource/read_parquet.py +8 -3
  181. maxframe/dataframe/datasource/tests/test_datasource.py +33 -0
  182. maxframe/dataframe/datastore/tests/test_to_odps.py +52 -1
  183. maxframe/dataframe/datastore/to_csv.py +7 -3
  184. maxframe/dataframe/datastore/to_odps.py +42 -6
  185. maxframe/dataframe/extensions/__init__.py +6 -1
  186. maxframe/dataframe/extensions/apply_chunk.py +96 -136
  187. maxframe/dataframe/extensions/flatjson.py +3 -2
  188. maxframe/dataframe/extensions/flatmap.py +15 -7
  189. maxframe/dataframe/fetch/core.py +12 -1
  190. maxframe/dataframe/groupby/__init__.py +7 -0
  191. maxframe/dataframe/groupby/aggregation.py +9 -8
  192. maxframe/dataframe/groupby/apply.py +50 -74
  193. maxframe/dataframe/groupby/apply_chunk.py +393 -0
  194. maxframe/dataframe/groupby/core.py +80 -17
  195. maxframe/dataframe/groupby/extensions.py +26 -0
  196. maxframe/dataframe/groupby/fill.py +9 -4
  197. maxframe/dataframe/groupby/sample.py +7 -7
  198. maxframe/dataframe/groupby/tests/test_groupby.py +3 -3
  199. maxframe/dataframe/groupby/transform.py +57 -54
  200. maxframe/dataframe/indexing/align.py +7 -6
  201. maxframe/dataframe/indexing/getitem.py +9 -8
  202. maxframe/dataframe/indexing/iloc.py +28 -23
  203. maxframe/dataframe/indexing/insert.py +7 -3
  204. maxframe/dataframe/indexing/loc.py +9 -8
  205. maxframe/dataframe/indexing/reindex.py +36 -30
  206. maxframe/dataframe/indexing/rename_axis.py +18 -10
  207. maxframe/dataframe/indexing/reset_index.py +0 -2
  208. maxframe/dataframe/indexing/sample.py +13 -9
  209. maxframe/dataframe/indexing/set_axis.py +9 -6
  210. maxframe/dataframe/indexing/setitem.py +8 -5
  211. maxframe/dataframe/indexing/where.py +12 -9
  212. maxframe/dataframe/merge/__init__.py +0 -1
  213. maxframe/dataframe/merge/concat.py +10 -31
  214. maxframe/dataframe/merge/merge.py +2 -24
  215. maxframe/dataframe/misc/__init__.py +6 -0
  216. maxframe/dataframe/misc/_duplicate.py +7 -3
  217. maxframe/dataframe/misc/apply.py +106 -139
  218. maxframe/dataframe/misc/astype.py +3 -2
  219. maxframe/dataframe/misc/case_when.py +11 -7
  220. maxframe/dataframe/misc/cut.py +11 -10
  221. maxframe/dataframe/misc/describe.py +7 -3
  222. maxframe/dataframe/misc/drop.py +13 -11
  223. maxframe/dataframe/misc/eval.py +0 -2
  224. maxframe/dataframe/misc/get_dummies.py +78 -49
  225. maxframe/dataframe/misc/isin.py +13 -10
  226. maxframe/dataframe/misc/map.py +21 -6
  227. maxframe/dataframe/misc/melt.py +8 -1
  228. maxframe/dataframe/misc/pivot.py +232 -0
  229. maxframe/dataframe/misc/pivot_table.py +52 -40
  230. maxframe/dataframe/misc/rechunk.py +59 -0
  231. maxframe/dataframe/misc/shift.py +7 -4
  232. maxframe/dataframe/misc/stack.py +5 -3
  233. maxframe/dataframe/misc/tests/test_misc.py +167 -1
  234. maxframe/dataframe/misc/transform.py +63 -65
  235. maxframe/dataframe/misc/value_counts.py +7 -4
  236. maxframe/dataframe/missing/dropna.py +16 -7
  237. maxframe/dataframe/missing/fillna.py +18 -10
  238. maxframe/dataframe/missing/replace.py +10 -6
  239. maxframe/dataframe/missing/tests/test_missing.py +2 -2
  240. maxframe/dataframe/operators.py +1 -27
  241. maxframe/dataframe/reduction/aggregation.py +65 -3
  242. maxframe/dataframe/reduction/core.py +3 -1
  243. maxframe/dataframe/reduction/median.py +1 -1
  244. maxframe/dataframe/reduction/tests/test_reduction.py +33 -0
  245. maxframe/dataframe/reduction/unique.py +53 -7
  246. maxframe/dataframe/statistics/corr.py +9 -6
  247. maxframe/dataframe/statistics/quantile.py +9 -6
  248. maxframe/dataframe/tseries/to_datetime.py +6 -4
  249. maxframe/dataframe/utils.py +219 -31
  250. maxframe/dataframe/window/rolling.py +7 -4
  251. maxframe/env.py +1 -0
  252. maxframe/errors.py +9 -0
  253. maxframe/extension.py +13 -2
  254. maxframe/io/objects/core.py +67 -51
  255. maxframe/io/objects/tensor.py +73 -17
  256. maxframe/io/objects/tests/test_object_io.py +8 -55
  257. maxframe/io/odpsio/arrow.py +15 -2
  258. maxframe/io/odpsio/schema.py +43 -13
  259. maxframe/io/odpsio/tableio.py +63 -11
  260. maxframe/io/odpsio/tests/test_arrow.py +1 -2
  261. maxframe/io/odpsio/tests/test_schema.py +114 -1
  262. maxframe/io/odpsio/tests/test_tableio.py +42 -0
  263. maxframe/io/odpsio/tests/test_volumeio.py +22 -48
  264. maxframe/learn/__init__.py +2 -2
  265. maxframe/learn/contrib/__init__.py +2 -2
  266. maxframe/learn/contrib/graph/connected_components.py +2 -1
  267. maxframe/learn/contrib/lightgbm/__init__.py +33 -0
  268. maxframe/learn/contrib/lightgbm/_predict.py +138 -0
  269. maxframe/learn/contrib/lightgbm/_train.py +163 -0
  270. maxframe/learn/contrib/lightgbm/callback.py +114 -0
  271. maxframe/learn/contrib/lightgbm/classifier.py +199 -0
  272. maxframe/learn/contrib/lightgbm/core.py +372 -0
  273. maxframe/learn/contrib/lightgbm/dataset.py +153 -0
  274. maxframe/learn/contrib/lightgbm/regressor.py +29 -0
  275. maxframe/learn/contrib/lightgbm/tests/__init__.py +13 -0
  276. maxframe/learn/contrib/lightgbm/tests/test_callback.py +58 -0
  277. maxframe/learn/contrib/models.py +38 -9
  278. maxframe/learn/contrib/utils.py +55 -0
  279. maxframe/learn/contrib/xgboost/callback.py +86 -0
  280. maxframe/learn/contrib/xgboost/classifier.py +26 -30
  281. maxframe/learn/contrib/xgboost/core.py +53 -42
  282. maxframe/learn/contrib/xgboost/dmatrix.py +19 -12
  283. maxframe/learn/contrib/xgboost/predict.py +16 -9
  284. maxframe/learn/contrib/xgboost/regressor.py +28 -27
  285. maxframe/learn/contrib/xgboost/tests/test_callback.py +41 -0
  286. maxframe/learn/contrib/xgboost/train.py +59 -16
  287. maxframe/learn/core.py +252 -0
  288. maxframe/learn/datasets/__init__.py +20 -0
  289. maxframe/learn/datasets/samples_generator.py +628 -0
  290. maxframe/learn/linear_model/__init__.py +15 -0
  291. maxframe/learn/linear_model/_base.py +163 -0
  292. maxframe/learn/linear_model/_lin_reg.py +175 -0
  293. maxframe/learn/metrics/__init__.py +25 -0
  294. maxframe/learn/metrics/_check_targets.py +95 -0
  295. maxframe/learn/metrics/_classification.py +1121 -0
  296. maxframe/learn/metrics/_regression.py +256 -0
  297. maxframe/learn/model_selection/__init__.py +15 -0
  298. maxframe/learn/model_selection/_split.py +451 -0
  299. maxframe/learn/model_selection/tests/__init__.py +13 -0
  300. maxframe/learn/model_selection/tests/test_split.py +156 -0
  301. maxframe/learn/preprocessing/__init__.py +16 -0
  302. maxframe/learn/preprocessing/_data/__init__.py +17 -0
  303. maxframe/learn/preprocessing/_data/min_max_scaler.py +390 -0
  304. maxframe/learn/preprocessing/_data/normalize.py +127 -0
  305. maxframe/learn/preprocessing/_data/standard_scaler.py +503 -0
  306. maxframe/learn/preprocessing/_data/utils.py +79 -0
  307. maxframe/learn/preprocessing/_label/__init__.py +16 -0
  308. maxframe/learn/preprocessing/_label/_label_binarizer.py +599 -0
  309. maxframe/learn/preprocessing/_label/_label_encoder.py +174 -0
  310. maxframe/learn/utils/__init__.py +4 -0
  311. maxframe/learn/utils/_encode.py +314 -0
  312. maxframe/learn/utils/checks.py +161 -0
  313. maxframe/learn/utils/core.py +33 -0
  314. maxframe/learn/utils/extmath.py +176 -0
  315. maxframe/learn/utils/multiclass.py +292 -0
  316. maxframe/learn/utils/shuffle.py +114 -0
  317. maxframe/learn/utils/sparsefuncs.py +87 -0
  318. maxframe/learn/utils/validation.py +775 -0
  319. maxframe/lib/__init__.py +0 -2
  320. maxframe/lib/compat.py +145 -0
  321. maxframe/lib/filesystem/_oss_lib/glob.py +1 -1
  322. maxframe/lib/mmh3.cp310-win32.pyd +0 -0
  323. maxframe/lib/sparse/__init__.py +10 -15
  324. maxframe/lib/sparse/array.py +45 -33
  325. maxframe/lib/sparse/core.py +0 -2
  326. maxframe/lib/sparse/linalg.py +31 -0
  327. maxframe/lib/sparse/matrix.py +5 -2
  328. maxframe/lib/sparse/tests/__init__.py +0 -2
  329. maxframe/lib/sparse/tests/test_sparse.py +53 -53
  330. maxframe/lib/sparse/vector.py +0 -2
  331. maxframe/mixin.py +59 -2
  332. maxframe/opcodes.py +13 -5
  333. maxframe/protocol.py +67 -14
  334. maxframe/remote/core.py +16 -14
  335. maxframe/remote/run_script.py +6 -3
  336. maxframe/serialization/__init__.py +2 -0
  337. maxframe/serialization/core.cp310-win32.pyd +0 -0
  338. maxframe/serialization/core.pxd +3 -0
  339. maxframe/serialization/core.pyi +3 -1
  340. maxframe/serialization/core.pyx +82 -4
  341. maxframe/serialization/pandas.py +5 -1
  342. maxframe/serialization/serializables/core.py +6 -5
  343. maxframe/serialization/serializables/field.py +2 -2
  344. maxframe/serialization/serializables/tests/test_field_type.py +3 -5
  345. maxframe/serialization/tests/test_serial.py +27 -0
  346. maxframe/session.py +4 -71
  347. maxframe/sperunner.py +165 -0
  348. maxframe/tensor/__init__.py +35 -2
  349. maxframe/tensor/arithmetic/__init__.py +2 -4
  350. maxframe/tensor/arithmetic/abs.py +0 -2
  351. maxframe/tensor/arithmetic/absolute.py +0 -2
  352. maxframe/tensor/arithmetic/add.py +34 -4
  353. maxframe/tensor/arithmetic/angle.py +0 -2
  354. maxframe/tensor/arithmetic/arccos.py +1 -4
  355. maxframe/tensor/arithmetic/arccosh.py +1 -3
  356. maxframe/tensor/arithmetic/arcsin.py +0 -2
  357. maxframe/tensor/arithmetic/arcsinh.py +0 -2
  358. maxframe/tensor/arithmetic/arctan.py +0 -2
  359. maxframe/tensor/arithmetic/arctan2.py +0 -2
  360. maxframe/tensor/arithmetic/arctanh.py +0 -2
  361. maxframe/tensor/arithmetic/around.py +0 -2
  362. maxframe/tensor/arithmetic/bitand.py +0 -2
  363. maxframe/tensor/arithmetic/bitor.py +1 -3
  364. maxframe/tensor/arithmetic/bitxor.py +1 -3
  365. maxframe/tensor/arithmetic/cbrt.py +0 -2
  366. maxframe/tensor/arithmetic/ceil.py +0 -2
  367. maxframe/tensor/arithmetic/clip.py +13 -13
  368. maxframe/tensor/arithmetic/conj.py +0 -2
  369. maxframe/tensor/arithmetic/copysign.py +0 -2
  370. maxframe/tensor/arithmetic/core.py +47 -39
  371. maxframe/tensor/arithmetic/cos.py +1 -3
  372. maxframe/tensor/arithmetic/cosh.py +0 -2
  373. maxframe/tensor/arithmetic/deg2rad.py +0 -2
  374. maxframe/tensor/arithmetic/degrees.py +0 -2
  375. maxframe/tensor/arithmetic/divide.py +0 -2
  376. maxframe/tensor/arithmetic/equal.py +0 -2
  377. maxframe/tensor/arithmetic/exp.py +1 -3
  378. maxframe/tensor/arithmetic/exp2.py +0 -2
  379. maxframe/tensor/arithmetic/expm1.py +0 -2
  380. maxframe/tensor/arithmetic/fabs.py +0 -2
  381. maxframe/tensor/arithmetic/fix.py +0 -2
  382. maxframe/tensor/arithmetic/float_power.py +0 -2
  383. maxframe/tensor/arithmetic/floor.py +0 -2
  384. maxframe/tensor/arithmetic/floordiv.py +0 -2
  385. maxframe/tensor/arithmetic/fmax.py +0 -2
  386. maxframe/tensor/arithmetic/fmin.py +0 -2
  387. maxframe/tensor/arithmetic/fmod.py +0 -2
  388. maxframe/tensor/arithmetic/frexp.py +6 -2
  389. maxframe/tensor/arithmetic/greater.py +0 -2
  390. maxframe/tensor/arithmetic/greater_equal.py +0 -2
  391. maxframe/tensor/arithmetic/hypot.py +0 -2
  392. maxframe/tensor/arithmetic/i0.py +1 -3
  393. maxframe/tensor/arithmetic/imag.py +0 -2
  394. maxframe/tensor/arithmetic/invert.py +1 -3
  395. maxframe/tensor/arithmetic/isclose.py +0 -2
  396. maxframe/tensor/arithmetic/iscomplex.py +0 -2
  397. maxframe/tensor/arithmetic/isfinite.py +1 -3
  398. maxframe/tensor/arithmetic/isinf.py +0 -2
  399. maxframe/tensor/arithmetic/isnan.py +0 -2
  400. maxframe/tensor/arithmetic/isreal.py +0 -2
  401. maxframe/tensor/arithmetic/ldexp.py +0 -2
  402. maxframe/tensor/arithmetic/less.py +0 -2
  403. maxframe/tensor/arithmetic/less_equal.py +0 -2
  404. maxframe/tensor/arithmetic/log.py +1 -3
  405. maxframe/tensor/arithmetic/log10.py +1 -3
  406. maxframe/tensor/arithmetic/log1p.py +1 -3
  407. maxframe/tensor/arithmetic/log2.py +1 -3
  408. maxframe/tensor/arithmetic/logaddexp.py +0 -2
  409. maxframe/tensor/arithmetic/logaddexp2.py +0 -2
  410. maxframe/tensor/arithmetic/logical_and.py +0 -2
  411. maxframe/tensor/arithmetic/logical_not.py +1 -3
  412. maxframe/tensor/arithmetic/logical_or.py +0 -2
  413. maxframe/tensor/arithmetic/logical_xor.py +0 -2
  414. maxframe/tensor/arithmetic/lshift.py +0 -2
  415. maxframe/tensor/arithmetic/maximum.py +0 -2
  416. maxframe/tensor/arithmetic/minimum.py +0 -2
  417. maxframe/tensor/arithmetic/mod.py +0 -2
  418. maxframe/tensor/arithmetic/modf.py +6 -2
  419. maxframe/tensor/arithmetic/multiply.py +37 -4
  420. maxframe/tensor/arithmetic/nan_to_num.py +0 -2
  421. maxframe/tensor/arithmetic/negative.py +0 -2
  422. maxframe/tensor/arithmetic/nextafter.py +0 -2
  423. maxframe/tensor/arithmetic/not_equal.py +0 -2
  424. maxframe/tensor/arithmetic/positive.py +0 -2
  425. maxframe/tensor/arithmetic/power.py +0 -2
  426. maxframe/tensor/arithmetic/rad2deg.py +0 -2
  427. maxframe/tensor/arithmetic/radians.py +0 -2
  428. maxframe/tensor/arithmetic/real.py +0 -2
  429. maxframe/tensor/arithmetic/reciprocal.py +5 -3
  430. maxframe/tensor/arithmetic/rint.py +1 -3
  431. maxframe/tensor/arithmetic/rshift.py +0 -2
  432. maxframe/tensor/arithmetic/setimag.py +0 -2
  433. maxframe/tensor/arithmetic/setreal.py +0 -2
  434. maxframe/tensor/arithmetic/sign.py +0 -2
  435. maxframe/tensor/arithmetic/signbit.py +0 -2
  436. maxframe/tensor/arithmetic/sin.py +0 -2
  437. maxframe/tensor/arithmetic/sinc.py +1 -3
  438. maxframe/tensor/arithmetic/sinh.py +0 -2
  439. maxframe/tensor/arithmetic/spacing.py +0 -2
  440. maxframe/tensor/arithmetic/sqrt.py +0 -2
  441. maxframe/tensor/arithmetic/square.py +0 -2
  442. maxframe/tensor/arithmetic/subtract.py +4 -2
  443. maxframe/tensor/arithmetic/tan.py +0 -2
  444. maxframe/tensor/arithmetic/tanh.py +0 -2
  445. maxframe/tensor/arithmetic/tests/__init__.py +0 -2
  446. maxframe/tensor/arithmetic/tests/test_arithmetic.py +43 -9
  447. maxframe/tensor/arithmetic/truediv.py +0 -2
  448. maxframe/tensor/arithmetic/trunc.py +0 -2
  449. maxframe/tensor/arithmetic/utils.py +32 -6
  450. maxframe/tensor/array_utils.py +3 -25
  451. maxframe/tensor/core.py +6 -6
  452. maxframe/tensor/datasource/__init__.py +10 -2
  453. maxframe/tensor/datasource/arange.py +0 -2
  454. maxframe/tensor/datasource/array.py +3 -22
  455. maxframe/tensor/datasource/core.py +15 -10
  456. maxframe/tensor/datasource/diag.py +140 -0
  457. maxframe/tensor/datasource/diagflat.py +69 -0
  458. maxframe/tensor/datasource/empty.py +0 -2
  459. maxframe/tensor/datasource/eye.py +95 -0
  460. maxframe/tensor/datasource/from_dataframe.py +0 -2
  461. maxframe/tensor/datasource/from_dense.py +0 -17
  462. maxframe/tensor/datasource/from_sparse.py +0 -2
  463. maxframe/tensor/datasource/full.py +0 -2
  464. maxframe/tensor/datasource/identity.py +54 -0
  465. maxframe/tensor/datasource/indices.py +115 -0
  466. maxframe/tensor/datasource/linspace.py +140 -0
  467. maxframe/tensor/datasource/meshgrid.py +135 -0
  468. maxframe/tensor/datasource/ones.py +8 -3
  469. maxframe/tensor/datasource/tests/test_datasource.py +32 -1
  470. maxframe/tensor/datasource/tri_array.py +107 -0
  471. maxframe/tensor/datasource/zeros.py +7 -3
  472. maxframe/tensor/extensions/__init__.py +31 -0
  473. maxframe/tensor/extensions/accessor.py +25 -0
  474. maxframe/tensor/extensions/apply_chunk.py +137 -0
  475. maxframe/tensor/indexing/__init__.py +1 -1
  476. maxframe/tensor/indexing/choose.py +8 -6
  477. maxframe/tensor/indexing/compress.py +0 -2
  478. maxframe/tensor/indexing/extract.py +0 -2
  479. maxframe/tensor/indexing/fill_diagonal.py +9 -6
  480. maxframe/tensor/indexing/flatnonzero.py +1 -3
  481. maxframe/tensor/indexing/getitem.py +10 -43
  482. maxframe/tensor/indexing/nonzero.py +2 -4
  483. maxframe/tensor/indexing/setitem.py +19 -9
  484. maxframe/tensor/indexing/slice.py +6 -3
  485. maxframe/tensor/indexing/take.py +0 -2
  486. maxframe/tensor/indexing/tests/__init__.py +0 -2
  487. maxframe/tensor/indexing/tests/test_indexing.py +0 -2
  488. maxframe/tensor/indexing/unravel_index.py +6 -6
  489. maxframe/tensor/lib/__init__.py +16 -0
  490. maxframe/tensor/lib/index_tricks.py +404 -0
  491. maxframe/tensor/linalg/__init__.py +36 -0
  492. maxframe/tensor/linalg/dot.py +145 -0
  493. maxframe/tensor/linalg/inner.py +36 -0
  494. maxframe/tensor/linalg/inv.py +83 -0
  495. maxframe/tensor/linalg/lu.py +115 -0
  496. maxframe/tensor/linalg/matmul.py +225 -0
  497. maxframe/tensor/linalg/qr.py +124 -0
  498. maxframe/tensor/linalg/solve_triangular.py +103 -0
  499. maxframe/tensor/linalg/svd.py +167 -0
  500. maxframe/tensor/linalg/tensordot.py +213 -0
  501. maxframe/tensor/linalg/vdot.py +73 -0
  502. maxframe/tensor/merge/__init__.py +4 -0
  503. maxframe/tensor/merge/append.py +74 -0
  504. maxframe/tensor/merge/column_stack.py +63 -0
  505. maxframe/tensor/merge/concatenate.py +3 -2
  506. maxframe/tensor/merge/dstack.py +71 -0
  507. maxframe/tensor/merge/hstack.py +70 -0
  508. maxframe/tensor/merge/stack.py +0 -2
  509. maxframe/tensor/merge/tests/test_merge.py +0 -2
  510. maxframe/tensor/misc/__init__.py +18 -5
  511. maxframe/tensor/misc/astype.py +10 -8
  512. maxframe/tensor/misc/broadcast_to.py +1 -1
  513. maxframe/tensor/misc/copy.py +64 -0
  514. maxframe/tensor/misc/diff.py +115 -0
  515. maxframe/tensor/misc/flatten.py +63 -0
  516. maxframe/tensor/misc/in1d.py +94 -0
  517. maxframe/tensor/misc/isin.py +130 -0
  518. maxframe/tensor/misc/ndim.py +53 -0
  519. maxframe/tensor/misc/ravel.py +0 -2
  520. maxframe/tensor/misc/repeat.py +129 -0
  521. maxframe/tensor/misc/searchsorted.py +147 -0
  522. maxframe/tensor/misc/setdiff1d.py +58 -0
  523. maxframe/tensor/misc/squeeze.py +117 -0
  524. maxframe/tensor/misc/swapaxes.py +113 -0
  525. maxframe/tensor/misc/tests/test_misc.py +0 -2
  526. maxframe/tensor/misc/transpose.py +8 -4
  527. maxframe/tensor/misc/trapezoid.py +123 -0
  528. maxframe/tensor/misc/unique.py +0 -1
  529. maxframe/tensor/misc/where.py +10 -8
  530. maxframe/tensor/operators.py +0 -34
  531. maxframe/tensor/random/__init__.py +3 -5
  532. maxframe/tensor/random/binomial.py +0 -2
  533. maxframe/tensor/random/bytes.py +0 -2
  534. maxframe/tensor/random/chisquare.py +0 -2
  535. maxframe/tensor/random/choice.py +9 -8
  536. maxframe/tensor/random/core.py +20 -5
  537. maxframe/tensor/random/dirichlet.py +0 -2
  538. maxframe/tensor/random/exponential.py +0 -2
  539. maxframe/tensor/random/f.py +2 -4
  540. maxframe/tensor/random/gamma.py +0 -2
  541. maxframe/tensor/random/geometric.py +0 -2
  542. maxframe/tensor/random/gumbel.py +0 -2
  543. maxframe/tensor/random/hypergeometric.py +0 -2
  544. maxframe/tensor/random/laplace.py +2 -4
  545. maxframe/tensor/random/logistic.py +0 -2
  546. maxframe/tensor/random/lognormal.py +0 -2
  547. maxframe/tensor/random/logseries.py +0 -2
  548. maxframe/tensor/random/multinomial.py +0 -2
  549. maxframe/tensor/random/multivariate_normal.py +0 -2
  550. maxframe/tensor/random/negative_binomial.py +0 -2
  551. maxframe/tensor/random/noncentral_chisquare.py +0 -2
  552. maxframe/tensor/random/noncentral_f.py +1 -3
  553. maxframe/tensor/random/normal.py +0 -2
  554. maxframe/tensor/random/pareto.py +0 -2
  555. maxframe/tensor/random/permutation.py +6 -3
  556. maxframe/tensor/random/poisson.py +0 -2
  557. maxframe/tensor/random/power.py +0 -2
  558. maxframe/tensor/random/rand.py +0 -2
  559. maxframe/tensor/random/randint.py +0 -2
  560. maxframe/tensor/random/randn.py +0 -2
  561. maxframe/tensor/random/random_integers.py +0 -2
  562. maxframe/tensor/random/random_sample.py +0 -2
  563. maxframe/tensor/random/rayleigh.py +0 -2
  564. maxframe/tensor/random/standard_cauchy.py +0 -2
  565. maxframe/tensor/random/standard_exponential.py +0 -2
  566. maxframe/tensor/random/standard_gamma.py +0 -2
  567. maxframe/tensor/random/standard_normal.py +0 -2
  568. maxframe/tensor/random/standard_t.py +0 -2
  569. maxframe/tensor/random/tests/__init__.py +0 -2
  570. maxframe/tensor/random/tests/test_random.py +0 -2
  571. maxframe/tensor/random/triangular.py +0 -2
  572. maxframe/tensor/random/uniform.py +0 -2
  573. maxframe/tensor/random/vonmises.py +0 -2
  574. maxframe/tensor/random/wald.py +0 -2
  575. maxframe/tensor/random/weibull.py +0 -2
  576. maxframe/tensor/random/zipf.py +0 -2
  577. maxframe/tensor/reduction/__init__.py +0 -2
  578. maxframe/tensor/reduction/all.py +0 -2
  579. maxframe/tensor/reduction/allclose.py +0 -2
  580. maxframe/tensor/reduction/any.py +0 -2
  581. maxframe/tensor/reduction/argmax.py +1 -3
  582. maxframe/tensor/reduction/argmin.py +1 -3
  583. maxframe/tensor/reduction/array_equal.py +0 -2
  584. maxframe/tensor/reduction/core.py +0 -2
  585. maxframe/tensor/reduction/count_nonzero.py +0 -2
  586. maxframe/tensor/reduction/cumprod.py +0 -2
  587. maxframe/tensor/reduction/cumsum.py +0 -2
  588. maxframe/tensor/reduction/max.py +0 -2
  589. maxframe/tensor/reduction/mean.py +0 -2
  590. maxframe/tensor/reduction/min.py +0 -2
  591. maxframe/tensor/reduction/nanargmax.py +0 -2
  592. maxframe/tensor/reduction/nanargmin.py +0 -2
  593. maxframe/tensor/reduction/nancumprod.py +0 -2
  594. maxframe/tensor/reduction/nancumsum.py +0 -2
  595. maxframe/tensor/reduction/nanmax.py +0 -2
  596. maxframe/tensor/reduction/nanmean.py +0 -2
  597. maxframe/tensor/reduction/nanmin.py +0 -2
  598. maxframe/tensor/reduction/nanprod.py +0 -2
  599. maxframe/tensor/reduction/nanstd.py +0 -2
  600. maxframe/tensor/reduction/nansum.py +0 -2
  601. maxframe/tensor/reduction/nanvar.py +0 -2
  602. maxframe/tensor/reduction/prod.py +0 -2
  603. maxframe/tensor/reduction/std.py +0 -2
  604. maxframe/tensor/reduction/sum.py +0 -2
  605. maxframe/tensor/reduction/tests/test_reduction.py +1 -4
  606. maxframe/tensor/reduction/var.py +0 -2
  607. maxframe/tensor/reshape/__init__.py +0 -2
  608. maxframe/tensor/reshape/reshape.py +6 -5
  609. maxframe/tensor/reshape/tests/__init__.py +0 -2
  610. maxframe/tensor/reshape/tests/test_reshape.py +0 -2
  611. maxframe/tensor/sort/__init__.py +16 -0
  612. maxframe/tensor/sort/argsort.py +150 -0
  613. maxframe/tensor/sort/sort.py +295 -0
  614. maxframe/tensor/special/__init__.py +37 -0
  615. maxframe/tensor/special/core.py +38 -0
  616. maxframe/tensor/special/misc.py +142 -0
  617. maxframe/tensor/special/statistical.py +56 -0
  618. maxframe/tensor/statistics/__init__.py +5 -0
  619. maxframe/tensor/statistics/average.py +143 -0
  620. maxframe/tensor/statistics/bincount.py +133 -0
  621. maxframe/tensor/statistics/quantile.py +10 -8
  622. maxframe/tensor/ufunc/__init__.py +0 -2
  623. maxframe/tensor/ufunc/ufunc.py +0 -2
  624. maxframe/tensor/utils.py +21 -3
  625. maxframe/tests/test_protocol.py +3 -3
  626. maxframe/tests/test_utils.py +210 -1
  627. maxframe/tests/utils.py +67 -1
  628. maxframe/udf.py +76 -6
  629. maxframe/utils.py +418 -17
  630. {maxframe-1.3.1.dist-info → maxframe-2.0.0b1.dist-info}/METADATA +4 -1
  631. maxframe-2.0.0b1.dist-info/RECORD +939 -0
  632. {maxframe-1.3.1.dist-info → maxframe-2.0.0b1.dist-info}/WHEEL +1 -1
  633. maxframe_client/clients/framedriver.py +19 -3
  634. maxframe_client/fetcher.py +113 -6
  635. maxframe_client/session/odps.py +173 -38
  636. maxframe_client/session/task.py +3 -1
  637. maxframe_client/tests/test_session.py +41 -5
  638. maxframe-1.3.1.dist-info/RECORD +0 -705
  639. {maxframe-1.3.1.dist-info → maxframe-2.0.0b1.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,153 @@
1
+ # Copyright 1999-2025 Alibaba Group Holding Ltd.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ from typing import List
16
+
17
+ from .... import opcodes
18
+ from ....core import EntityData
19
+ from ....core.entity.output_types import get_output_types
20
+ from ....core.operator.base import Operator
21
+ from ....core.operator.core import TileableOperatorMixin
22
+ from ....dataframe.core import DATAFRAME_TYPE
23
+ from ....serialization.serializables import BoolField, KeyField, ListField
24
+ from ....serialization.serializables.field import AnyField
25
+ from ....tensor import tensor as astensor
26
+ from ....tensor.core import TENSOR_TYPE
27
+ from ....typing_ import TileableType
28
+ from ...utils import convert_to_tensor_or_dataframe
29
+
30
+
31
+ class ToLGBMDataset(Operator, TileableOperatorMixin):
32
+ _op_type_ = opcodes.TO_LGBM_DATASET
33
+
34
+ data = KeyField("data", default=None)
35
+ label = KeyField("label", default=None)
36
+ reference = KeyField("reference", default=None)
37
+ weight = KeyField("weight", default=None)
38
+ init_score = KeyField("init_score", default=None)
39
+ group = AnyField("group", default=None)
40
+ feature_name = ListField("feature_name", default=None)
41
+ categorical_feature = ListField("categorical_feature", default=None)
42
+ params = AnyField("params", default=None)
43
+ free_raw_data = BoolField("free_raw_data", default=None)
44
+ position = AnyField("position", default=None)
45
+ # if to collocate the data, label and weight
46
+ collocate = BoolField("collocate", default=False)
47
+
48
+ @property
49
+ def output_limit(self):
50
+ if self.collocate:
51
+ return 1 + sum(bool(x) for x in [self.label, self.weight, self.init_score])
52
+ return 1
53
+
54
+ @classmethod
55
+ def _set_inputs(cls, op: "ToLGBMDataset", inputs: List[EntityData]):
56
+ super()._set_inputs(op, inputs)
57
+ if op.data is not None:
58
+ op.data = op._inputs[0]
59
+ has_label = op.label is not None
60
+ if has_label:
61
+ op.label = op._inputs[1]
62
+ if op.weight is not None:
63
+ i = 1 if not has_label else 2
64
+ op.weight = op._inputs[i]
65
+ if op.init_score is not None:
66
+ op.init_score = op._inputs[-1]
67
+
68
+ @staticmethod
69
+ def _get_kw(obj):
70
+ if isinstance(obj, TENSOR_TYPE):
71
+ return {"shape": obj.shape, "dtype": obj.dtype, "order": obj.order}
72
+ else:
73
+ return {
74
+ "shape": obj.shape,
75
+ "dtypes": obj.dtypes,
76
+ "index_value": obj.index_value,
77
+ "columns_value": obj.columns_value,
78
+ }
79
+
80
+ def __call__(self):
81
+ inputs = [self.data]
82
+ kw = self._get_kw(self.data)
83
+ if self.label is not None:
84
+ inputs.append(self.label)
85
+ if self.weight is not None:
86
+ inputs.append(self.weight)
87
+ if self.init_score is not None:
88
+ inputs.append(self.init_score)
89
+
90
+ return self.new_tileable(inputs, **kw)
91
+
92
+
93
+ def check_data(data):
94
+ data = convert_to_tensor_or_dataframe(data)
95
+ if data.ndim != 2:
96
+ raise ValueError(f"Expecting 2-d data, got: {data.ndim}-d")
97
+
98
+ return data
99
+
100
+
101
+ def check_array_like(y: TileableType, name: str) -> TileableType:
102
+ if y is None:
103
+ return
104
+ y = convert_to_tensor_or_dataframe(y)
105
+ if isinstance(y, DATAFRAME_TYPE):
106
+ y = y.iloc[:, 0]
107
+ return astensor(y)
108
+
109
+
110
+ def to_lgbm_dataset(
111
+ data,
112
+ label=None,
113
+ reference=None,
114
+ weight=None,
115
+ group=None,
116
+ init_score=None,
117
+ feature_name="auto",
118
+ categorical_feature="auto",
119
+ params=None,
120
+ free_raw_data=True,
121
+ position=None,
122
+ ):
123
+ data = check_data(data)
124
+ label = check_array_like(label, "label")
125
+ weight = check_array_like(weight, "weight")
126
+ init_score = check_array_like(init_score, "init_score")
127
+
128
+ if weight is not None and weight.ndim > 1:
129
+ raise ValueError("weight must be 1-dimensional")
130
+
131
+ # If not multiple outputs, try to collect the chunks on same worker into one
132
+ # to feed the data into LightGBM for training.
133
+ op = ToLGBMDataset(
134
+ data=data,
135
+ label=label,
136
+ reference=reference,
137
+ weight=weight,
138
+ group=group,
139
+ init_score=init_score,
140
+ feature_name=None if feature_name == "auto" else feature_name,
141
+ categorical_feature=None
142
+ if categorical_feature == "auto"
143
+ else categorical_feature,
144
+ params=params,
145
+ free_raw_data=free_raw_data,
146
+ position=position,
147
+ gpu=data.op.gpu,
148
+ _output_types=get_output_types(data),
149
+ )
150
+ return op()
151
+
152
+
153
+ Dataset = to_lgbm_dataset
@@ -0,0 +1,29 @@
1
+ # Copyright 1999-2025 Alibaba Group Holding Ltd.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ from ..utils import make_import_error_func
16
+ from .core import LGBMScikitLearnBase
17
+
18
+ try:
19
+ import lightgbm
20
+ except ImportError:
21
+ lightgbm = None
22
+
23
+
24
+ if not lightgbm:
25
+ LGBMRegressor = make_import_error_func("lightgbm")
26
+ else:
27
+
28
+ class LGBMRegressor(LGBMScikitLearnBase, lightgbm.LGBMRegressor):
29
+ _default_objective = "regression"
@@ -0,0 +1,13 @@
1
+ # Copyright 1999-2025 Alibaba Group Holding Ltd.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
@@ -0,0 +1,58 @@
1
+ # Copyright 1999-2025 Alibaba Group Holding Ltd.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ import inspect
16
+
17
+ import pytest
18
+
19
+ from ..callback import (
20
+ EarlyStoppingCallback,
21
+ LGBMTrainingCallback,
22
+ early_stopping,
23
+ reset_parameter,
24
+ )
25
+
26
+ try:
27
+ from lightgbm.callback import _EarlyStoppingCallback
28
+ except ImportError:
29
+ try:
30
+ from lightgbm.callback import early_stopping as _EarlyStoppingCallback
31
+ except ImportError:
32
+ pytestmark = pytest.mark.skip("Need lightgbm to run the test")
33
+
34
+
35
+ def test_lgbm_training_callbacks():
36
+ early_stopping_cb = early_stopping(stopping_rounds=5)
37
+ assert not early_stopping_cb.has_custom_code()
38
+
39
+ local_early_stopping_cb = early_stopping_cb.to_local()
40
+ if isinstance(_EarlyStoppingCallback, type):
41
+ assert isinstance(local_early_stopping_cb, _EarlyStoppingCallback)
42
+ assert local_early_stopping_cb.stopping_rounds == 5
43
+ else:
44
+ assert local_early_stopping_cb.__qualname__.startswith(
45
+ _EarlyStoppingCallback.__name__
46
+ )
47
+ cb_vars = inspect.getclosurevars(local_early_stopping_cb).nonlocals
48
+ assert cb_vars["stopping_rounds"] == 5
49
+
50
+ rmt_early_stopping_cb = LGBMTrainingCallback.from_local(local_early_stopping_cb)
51
+ assert isinstance(rmt_early_stopping_cb, EarlyStoppingCallback)
52
+ assert rmt_early_stopping_cb.stopping_rounds == 5
53
+
54
+ lr_sch_cb = reset_parameter(a=[0.5, 0.4, 0.1])
55
+ assert not lr_sch_cb.has_custom_code()
56
+
57
+ lr_sch_cb = reset_parameter(a=lambda x: x * 0.1)
58
+ assert lr_sch_cb.has_custom_code()
@@ -12,6 +12,8 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
+ from typing import Type
16
+
15
17
  from ... import opcodes
16
18
  from ...core import ENTITY_TYPE, OutputType
17
19
  from ...core.operator import ObjectOperator, ObjectOperatorMixin
@@ -21,7 +23,30 @@ from ...serialization.serializables import (
21
23
  FunctionField,
22
24
  TupleField,
23
25
  )
26
+ from ...udf import BuiltinFunction
24
27
  from ...utils import find_objects, replace_objects
28
+ from ..core import Model, ModelData
29
+
30
+
31
+ class ModelWithEvalData(ModelData):
32
+ __slots__ = ("_evals_result",)
33
+
34
+ _evals_result: dict
35
+
36
+ def __init__(self, *args, evals_result=None, **kwargs):
37
+ super().__init__(*args, **kwargs)
38
+ self._evals_result = evals_result if evals_result is not None else dict()
39
+
40
+ def execute(self, session=None, **kw):
41
+ # The evals_result should be fetched when BoosterData.execute() is called.
42
+ result = super().execute(session=session, **kw)
43
+ if self.op.has_evals_result and self.key == self.op.outputs[0].key:
44
+ self._evals_result.update(self.op.outputs[1].fetch(session=session))
45
+ return result
46
+
47
+
48
+ class ModelWithEval(Model):
49
+ pass
25
50
 
26
51
 
27
52
  class ModelDataSource(ObjectOperator, ObjectOperatorMixin):
@@ -29,7 +54,7 @@ class ModelDataSource(ObjectOperator, ObjectOperatorMixin):
29
54
 
30
55
  data = AnyField("data")
31
56
 
32
- def __call__(self, model_cls):
57
+ def __call__(self, model_cls: Type[ModelWithEval]):
33
58
  self._output_types = [OutputType.object]
34
59
  return self.new_tileable(None, object_class=model_cls)
35
60
 
@@ -48,14 +73,18 @@ class ModelApplyChunk(ObjectOperator, ObjectOperatorMixin):
48
73
  self._output_types = list(output_types)
49
74
  super().__init__(**kwargs)
50
75
 
51
- def _set_inputs(self, inputs):
52
- super()._set_inputs(inputs)
53
- old_inputs = find_objects(self.args, ENTITY_TYPE) + find_objects(
54
- self.kwargs, ENTITY_TYPE
76
+ def has_custom_code(self) -> bool:
77
+ return not isinstance(self.func, BuiltinFunction)
78
+
79
+ @classmethod
80
+ def _set_inputs(cls, op: "ModelApplyChunk", inputs):
81
+ super()._set_inputs(op, inputs)
82
+ old_inputs = find_objects(op.args, ENTITY_TYPE) + find_objects(
83
+ op.kwargs, ENTITY_TYPE
55
84
  )
56
- mapping = {o: n for o, n in zip(old_inputs, self._inputs[1:])}
57
- self.args = replace_objects(self.args, mapping)
58
- self.kwargs = replace_objects(self.kwargs, mapping)
85
+ mapping = {o: n for o, n in zip(old_inputs, op._inputs[1:])}
86
+ op.args = replace_objects(op.args, mapping)
87
+ op.kwargs = replace_objects(op.kwargs, mapping)
59
88
 
60
89
  @property
61
90
  def output_limit(self) -> int:
@@ -72,6 +101,6 @@ class ModelApplyChunk(ObjectOperator, ObjectOperatorMixin):
72
101
  return self.new_tileables(inputs, kws=output_kws)
73
102
 
74
103
 
75
- def to_remote_model(model, model_cls):
104
+ def to_remote_model(model, model_cls: Type[ModelWithEval]) -> ModelWithEval:
76
105
  op = ModelDataSource(data=model)
77
106
  return op(model_cls)
@@ -14,6 +14,8 @@
14
14
 
15
15
  import sys
16
16
 
17
+ from ...serialization.serializables import Serializable
18
+
17
19
 
18
20
  def make_import_error_func(package_name):
19
21
  def _func(*_, **__): # pragma: no cover
@@ -51,3 +53,56 @@ def config_mod_getattr(mod_dict, globals_):
51
53
  "__warningregistry__": dict(),
52
54
  }
53
55
  )
56
+
57
+
58
+ class TrainingCallback(Serializable):
59
+ _local_cls = None
60
+
61
+ @classmethod
62
+ def _load_local_to_remote_mapping(cls, globals_dict):
63
+ if cls._local_to_remote:
64
+ return
65
+ for v in globals_dict.values():
66
+ if isinstance(v, type) and issubclass(v, cls) and v._local_cls is not None:
67
+ cls._local_to_remote[v._local_cls] = v
68
+
69
+ @classmethod
70
+ def from_local(cls, callback_obj):
71
+ if isinstance(callback_obj, (list, tuple)):
72
+ return [cls.from_local(x) for x in callback_obj]
73
+ if not type(callback_obj) in cls._local_to_remote:
74
+ return callback_obj
75
+
76
+ kw = {}
77
+ remote_cls = cls._local_to_remote[type(callback_obj)]
78
+ for attr in remote_cls._FIELDS:
79
+ try:
80
+ kw[attr] = getattr(callback_obj, attr)
81
+ except AttributeError:
82
+ pass
83
+ return remote_cls(**kw)
84
+
85
+ def has_custom_code(self) -> bool:
86
+ return False
87
+
88
+ @classmethod
89
+ def remote_to_local(cls, remote_obj):
90
+ if isinstance(remote_obj, (list, tuple)):
91
+ return [cls.remote_to_local(x) for x in remote_obj]
92
+ if not isinstance(remote_obj, TrainingCallback):
93
+ return remote_obj
94
+ return remote_obj.to_local()
95
+
96
+ def _extract_kw(self) -> dict:
97
+ kw = {}
98
+ for attr in type(self)._FIELDS:
99
+ val = getattr(self, attr, None)
100
+ if val is not None:
101
+ kw[attr] = val
102
+ return kw
103
+
104
+ def to_local(self):
105
+ return type(self)._local_cls(**self._extract_kw())
106
+
107
+ def __call__(self, *args, **kwargs):
108
+ return self.to_local()(*args, **kwargs)
@@ -0,0 +1,86 @@
1
+ # Copyright 1999-2025 Alibaba Group Holding Ltd.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ from typing import Callable, Optional, Sequence, Union
16
+
17
+ from ....serialization.serializables import (
18
+ AnyField,
19
+ BoolField,
20
+ Float32Field,
21
+ Int32Field,
22
+ StringField,
23
+ )
24
+ from ....udf import BuiltinFunction
25
+ from ..utils import TrainingCallback
26
+
27
+ try:
28
+ from xgboost.callback import EarlyStopping as _EarlyStopping
29
+ from xgboost.callback import LearningRateScheduler as _LearningRateScheduler
30
+ except ImportError:
31
+ _LearningRateScheduler = _EarlyStopping = None
32
+
33
+
34
+ class XGBTrainingCallback(TrainingCallback):
35
+ _local_to_remote = {}
36
+
37
+ @classmethod
38
+ def from_local(cls, callback_obj):
39
+ cls._load_local_to_remote_mapping(globals())
40
+ return super().from_local(callback_obj)
41
+
42
+
43
+ class LearningRateScheduler(XGBTrainingCallback):
44
+ _local_cls = _LearningRateScheduler
45
+
46
+ learning_rates = AnyField("learning_rates", default=None)
47
+
48
+ def __init__(
49
+ self, learning_rates: Union[Callable[[int], float], Sequence[float]], **kw
50
+ ) -> None:
51
+ super().__init__(learning_rates=learning_rates, **kw)
52
+
53
+ def has_custom_code(self) -> bool:
54
+ return not isinstance(self.learning_rates, (tuple, list, BuiltinFunction))
55
+
56
+
57
+ class EarlyStopping(XGBTrainingCallback):
58
+ _local_cls = _EarlyStopping
59
+
60
+ rounds = Int32Field("rounds")
61
+ metric_name = StringField("metric_name", default=None)
62
+ data_name = StringField("data_name", default=None)
63
+ maximize = BoolField("maximize", default=None)
64
+ save_best = BoolField("save_best", default=None)
65
+ min_delta = Float32Field("min_delta", default=None)
66
+
67
+ def __init__(
68
+ self,
69
+ *,
70
+ rounds: int,
71
+ metric_name: Optional[str] = None,
72
+ data_name: Optional[str] = None,
73
+ maximize: Optional[bool] = None,
74
+ save_best: Optional[bool] = False,
75
+ min_delta: float = 0.0,
76
+ **kw
77
+ ) -> None:
78
+ super().__init__(
79
+ rounds=rounds,
80
+ metric_name=metric_name,
81
+ data_name=data_name,
82
+ maximize=maximize,
83
+ save_best=save_best,
84
+ min_delta=min_delta,
85
+ **kw
86
+ )
@@ -26,9 +26,7 @@ if not xgboost:
26
26
  else:
27
27
  from xgboost.sklearn import XGBClassifierBase
28
28
 
29
- from .core import wrap_evaluation_matrices
30
29
  from .predict import predict
31
- from .train import train
32
30
 
33
31
  class XGBClassifier(XGBScikitLearnBase, XGBClassifierBase):
34
32
  """
@@ -43,6 +41,15 @@ else:
43
41
  super().__init__(**kwargs)
44
42
  self._set_model(xgb_model)
45
43
 
44
+ def get_xgb_params(self):
45
+ params = super().get_xgb_params()
46
+ if self.n_classes_ > 2:
47
+ params["objective"] = "multi:softprob"
48
+ params["num_class"] = self.n_classes_
49
+ else:
50
+ params["objective"] = "binary:logistic"
51
+ return params
52
+
46
53
  def fit(
47
54
  self,
48
55
  X,
@@ -50,43 +57,32 @@ else:
50
57
  sample_weight=None,
51
58
  base_margin=None,
52
59
  eval_set=None,
60
+ xgb_model=None,
53
61
  sample_weight_eval_set=None,
54
62
  base_margin_eval_set=None,
55
63
  num_class=None,
56
64
  **kw,
57
65
  ):
58
66
  session = kw.pop("session", None)
59
- run_kwargs = kw.pop("run_kwargs", None) or dict()
60
- dtrain, evals = wrap_evaluation_matrices(
61
- None,
67
+ run_kwargs = kw.pop("run_kwargs", dict())
68
+
69
+ if num_class is not None:
70
+ self.n_classes_ = num_class
71
+ else:
72
+ t_labels = mt.unique(y).execute(session=session, **run_kwargs)
73
+ self.n_classes_ = t_labels.shape[0]
74
+
75
+ super().fit(
62
76
  X,
63
77
  y,
64
- sample_weight,
65
- base_margin,
66
- eval_set,
67
- sample_weight_eval_set,
68
- base_margin_eval_set,
69
- )
70
- params = self.get_xgb_params()
71
- self._n_features_in = X.shape[1]
72
- self.n_classes_ = num_class or 1
73
- if self.n_classes_ > 2:
74
- params["objective"] = "multi:softprob"
75
- params["num_class"] = self.n_classes_
76
- else:
77
- params["objective"] = "binary:logistic"
78
- self.evals_result_ = dict()
79
- result = train(
80
- params,
81
- dtrain,
82
- num_boost_round=self.get_num_boosting_rounds(),
83
- evals=evals,
84
- evals_result=self.evals_result_,
85
- num_class=num_class,
86
- session=session,
87
- run_kwargs=run_kwargs,
78
+ sample_weight=sample_weight,
79
+ base_margin=base_margin,
80
+ eval_set=eval_set,
81
+ xgb_model=xgb_model,
82
+ sample_weight_eval_set=sample_weight_eval_set,
83
+ base_margin_eval_set=base_margin_eval_set,
84
+ **kw,
88
85
  )
89
- self._Booster = result
90
86
  return self
91
87
 
92
88
  def predict(self, data, **kw):