maxframe 1.3.0__cp310-cp310-macosx_10_9_universal2.whl → 2.0.0__cp310-cp310-macosx_10_9_universal2.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of maxframe might be problematic. Click here for more details.

Files changed (644) hide show
  1. maxframe/_utils.cpython-310-darwin.so +0 -0
  2. maxframe/_utils.pyi +21 -0
  3. maxframe/_utils.pyx +4 -3
  4. maxframe/codegen/__init__.py +27 -0
  5. maxframe/{codegen.py → codegen/core.py} +49 -43
  6. maxframe/codegen/spe/__init__.py +16 -0
  7. maxframe/codegen/spe/core.py +307 -0
  8. maxframe/codegen/spe/dataframe/__init__.py +37 -0
  9. maxframe/codegen/spe/dataframe/accessors/__init__.py +15 -0
  10. maxframe/codegen/spe/dataframe/accessors/base.py +53 -0
  11. maxframe/codegen/spe/dataframe/accessors/dict_.py +194 -0
  12. maxframe/codegen/spe/dataframe/accessors/list_.py +80 -0
  13. maxframe/codegen/spe/dataframe/arithmetic.py +84 -0
  14. maxframe/codegen/spe/dataframe/datasource.py +181 -0
  15. maxframe/codegen/spe/dataframe/datastore.py +204 -0
  16. maxframe/codegen/spe/dataframe/extensions.py +63 -0
  17. maxframe/codegen/spe/dataframe/fetch.py +26 -0
  18. maxframe/codegen/spe/dataframe/groupby.py +224 -0
  19. maxframe/codegen/spe/dataframe/indexing.py +238 -0
  20. maxframe/codegen/spe/dataframe/merge.py +73 -0
  21. maxframe/codegen/spe/dataframe/misc.py +286 -0
  22. maxframe/codegen/spe/dataframe/missing.py +64 -0
  23. maxframe/codegen/spe/dataframe/reduction.py +160 -0
  24. maxframe/codegen/spe/dataframe/sort.py +83 -0
  25. maxframe/codegen/spe/dataframe/statistics.py +46 -0
  26. maxframe/codegen/spe/dataframe/tests/__init__.py +13 -0
  27. maxframe/codegen/spe/dataframe/tests/accessors/__init__.py +13 -0
  28. maxframe/codegen/spe/dataframe/tests/accessors/test_base.py +33 -0
  29. maxframe/codegen/spe/dataframe/tests/accessors/test_dict.py +310 -0
  30. maxframe/codegen/spe/dataframe/tests/accessors/test_list.py +137 -0
  31. maxframe/codegen/spe/dataframe/tests/indexing/__init__.py +13 -0
  32. maxframe/codegen/spe/dataframe/tests/indexing/conftest.py +58 -0
  33. maxframe/codegen/spe/dataframe/tests/indexing/test_getitem.py +124 -0
  34. maxframe/codegen/spe/dataframe/tests/indexing/test_iloc.py +76 -0
  35. maxframe/codegen/spe/dataframe/tests/indexing/test_indexing.py +39 -0
  36. maxframe/codegen/spe/dataframe/tests/indexing/test_rename.py +51 -0
  37. maxframe/codegen/spe/dataframe/tests/indexing/test_reset_index.py +88 -0
  38. maxframe/codegen/spe/dataframe/tests/indexing/test_sample.py +45 -0
  39. maxframe/codegen/spe/dataframe/tests/indexing/test_set_axis.py +45 -0
  40. maxframe/codegen/spe/dataframe/tests/indexing/test_set_index.py +41 -0
  41. maxframe/codegen/spe/dataframe/tests/indexing/test_setitem.py +46 -0
  42. maxframe/codegen/spe/dataframe/tests/misc/__init__.py +13 -0
  43. maxframe/codegen/spe/dataframe/tests/misc/test_apply.py +133 -0
  44. maxframe/codegen/spe/dataframe/tests/misc/test_drop_duplicates.py +92 -0
  45. maxframe/codegen/spe/dataframe/tests/misc/test_misc.py +234 -0
  46. maxframe/codegen/spe/dataframe/tests/missing/__init__.py +13 -0
  47. maxframe/codegen/spe/dataframe/tests/missing/test_checkna.py +94 -0
  48. maxframe/codegen/spe/dataframe/tests/missing/test_dropna.py +50 -0
  49. maxframe/codegen/spe/dataframe/tests/missing/test_fillna.py +94 -0
  50. maxframe/codegen/spe/dataframe/tests/missing/test_replace.py +45 -0
  51. maxframe/codegen/spe/dataframe/tests/test_arithmetic.py +73 -0
  52. maxframe/codegen/spe/dataframe/tests/test_datasource.py +184 -0
  53. maxframe/codegen/spe/dataframe/tests/test_datastore.py +200 -0
  54. maxframe/codegen/spe/dataframe/tests/test_extensions.py +88 -0
  55. maxframe/codegen/spe/dataframe/tests/test_groupby.py +225 -0
  56. maxframe/codegen/spe/dataframe/tests/test_merge.py +400 -0
  57. maxframe/codegen/spe/dataframe/tests/test_reduction.py +104 -0
  58. maxframe/codegen/spe/dataframe/tests/test_sort.py +159 -0
  59. maxframe/codegen/spe/dataframe/tests/test_statistics.py +70 -0
  60. maxframe/codegen/spe/dataframe/tests/test_tseries.py +29 -0
  61. maxframe/codegen/spe/dataframe/tests/test_value_counts.py +60 -0
  62. maxframe/codegen/spe/dataframe/tests/test_window.py +69 -0
  63. maxframe/codegen/spe/dataframe/tseries.py +46 -0
  64. maxframe/codegen/spe/dataframe/udf.py +62 -0
  65. maxframe/codegen/spe/dataframe/value_counts.py +31 -0
  66. maxframe/codegen/spe/dataframe/window.py +65 -0
  67. maxframe/codegen/spe/learn/__init__.py +15 -0
  68. maxframe/codegen/spe/learn/contrib/__init__.py +15 -0
  69. maxframe/codegen/spe/learn/contrib/lightgbm.py +160 -0
  70. maxframe/codegen/spe/learn/contrib/models.py +41 -0
  71. maxframe/codegen/spe/learn/contrib/pytorch.py +49 -0
  72. maxframe/codegen/spe/learn/contrib/tests/__init__.py +13 -0
  73. maxframe/codegen/spe/learn/contrib/tests/test_lightgbm.py +123 -0
  74. maxframe/codegen/spe/learn/contrib/tests/test_models.py +41 -0
  75. maxframe/codegen/spe/learn/contrib/tests/test_pytorch.py +53 -0
  76. maxframe/codegen/spe/learn/contrib/tests/test_xgboost.py +98 -0
  77. maxframe/codegen/spe/learn/contrib/xgboost.py +152 -0
  78. maxframe/codegen/spe/learn/metrics/__init__.py +15 -0
  79. maxframe/codegen/spe/learn/metrics/_classification.py +120 -0
  80. maxframe/codegen/spe/learn/metrics/tests/__init__.py +13 -0
  81. maxframe/codegen/spe/learn/metrics/tests/test_classification.py +93 -0
  82. maxframe/codegen/spe/learn/model_selection/__init__.py +13 -0
  83. maxframe/codegen/spe/learn/model_selection/tests/__init__.py +13 -0
  84. maxframe/codegen/spe/learn/model_selection/tests/test_split.py +41 -0
  85. maxframe/codegen/spe/learn/preprocessing/__init__.py +15 -0
  86. maxframe/codegen/spe/learn/preprocessing/_data.py +37 -0
  87. maxframe/codegen/spe/learn/preprocessing/_label.py +47 -0
  88. maxframe/codegen/spe/learn/preprocessing/tests/__init__.py +13 -0
  89. maxframe/codegen/spe/learn/preprocessing/tests/test_data.py +31 -0
  90. maxframe/codegen/spe/learn/preprocessing/tests/test_label.py +43 -0
  91. maxframe/codegen/spe/learn/utils/__init__.py +15 -0
  92. maxframe/codegen/spe/learn/utils/checks.py +55 -0
  93. maxframe/codegen/spe/learn/utils/multiclass.py +60 -0
  94. maxframe/codegen/spe/learn/utils/shuffle.py +85 -0
  95. maxframe/codegen/spe/learn/utils/sparsefuncs.py +35 -0
  96. maxframe/codegen/spe/learn/utils/tests/__init__.py +13 -0
  97. maxframe/codegen/spe/learn/utils/tests/test_checks.py +48 -0
  98. maxframe/codegen/spe/learn/utils/tests/test_multiclass.py +52 -0
  99. maxframe/codegen/spe/learn/utils/tests/test_shuffle.py +50 -0
  100. maxframe/codegen/spe/learn/utils/tests/test_sparsefuncs.py +34 -0
  101. maxframe/codegen/spe/learn/utils/tests/test_validation.py +44 -0
  102. maxframe/codegen/spe/learn/utils/validation.py +35 -0
  103. maxframe/codegen/spe/objects.py +26 -0
  104. maxframe/codegen/spe/remote.py +29 -0
  105. maxframe/codegen/spe/tensor/__init__.py +28 -0
  106. maxframe/codegen/spe/tensor/arithmetic.py +95 -0
  107. maxframe/codegen/spe/tensor/core.py +41 -0
  108. maxframe/codegen/spe/tensor/datasource.py +165 -0
  109. maxframe/codegen/spe/tensor/extensions.py +35 -0
  110. maxframe/codegen/spe/tensor/fetch.py +26 -0
  111. maxframe/codegen/spe/tensor/indexing.py +63 -0
  112. maxframe/codegen/spe/tensor/linalg.py +63 -0
  113. maxframe/codegen/spe/tensor/merge.py +31 -0
  114. maxframe/codegen/spe/tensor/misc.py +121 -0
  115. maxframe/codegen/spe/tensor/random.py +29 -0
  116. maxframe/codegen/spe/tensor/reduction.py +39 -0
  117. maxframe/codegen/spe/tensor/reshape.py +26 -0
  118. maxframe/codegen/spe/tensor/sort.py +42 -0
  119. maxframe/codegen/spe/tensor/special.py +35 -0
  120. maxframe/codegen/spe/tensor/statistics.py +24 -0
  121. maxframe/codegen/spe/tensor/tests/__init__.py +13 -0
  122. maxframe/codegen/spe/tensor/tests/test_arithmetic.py +103 -0
  123. maxframe/codegen/spe/tensor/tests/test_datasource.py +99 -0
  124. maxframe/codegen/spe/tensor/tests/test_extensions.py +37 -0
  125. maxframe/codegen/spe/tensor/tests/test_indexing.py +44 -0
  126. maxframe/codegen/spe/tensor/tests/test_linalg.py +38 -0
  127. maxframe/codegen/spe/tensor/tests/test_merge.py +28 -0
  128. maxframe/codegen/spe/tensor/tests/test_misc.py +94 -0
  129. maxframe/codegen/spe/tensor/tests/test_random.py +55 -0
  130. maxframe/codegen/spe/tensor/tests/test_reduction.py +65 -0
  131. maxframe/codegen/spe/tensor/tests/test_reshape.py +39 -0
  132. maxframe/codegen/spe/tensor/tests/test_sort.py +49 -0
  133. maxframe/codegen/spe/tensor/tests/test_special.py +28 -0
  134. maxframe/codegen/spe/tensor/tests/test_statistics.py +29 -0
  135. maxframe/codegen/spe/tests/__init__.py +13 -0
  136. maxframe/codegen/spe/tests/test_remote.py +29 -0
  137. maxframe/codegen/spe/tests/test_spe_codegen.py +141 -0
  138. maxframe/codegen/spe/utils.py +54 -0
  139. maxframe/codegen/tests/__init__.py +13 -0
  140. maxframe/{tests → codegen/tests}/test_codegen.py +3 -5
  141. maxframe/config/__init__.py +1 -1
  142. maxframe/config/config.py +50 -23
  143. maxframe/config/tests/test_config.py +4 -12
  144. maxframe/config/validators.py +5 -0
  145. maxframe/conftest.py +38 -10
  146. maxframe/core/__init__.py +1 -0
  147. maxframe/core/context.py +110 -0
  148. maxframe/core/entity/__init__.py +1 -0
  149. maxframe/core/entity/core.py +0 -7
  150. maxframe/core/entity/objects.py +19 -5
  151. maxframe/core/entity/output_types.py +11 -0
  152. maxframe/core/entity/tests/test_objects.py +11 -12
  153. maxframe/core/entity/tileables.py +3 -1
  154. maxframe/core/entity/utils.py +15 -0
  155. maxframe/core/graph/__init__.py +6 -1
  156. maxframe/core/graph/builder/base.py +5 -1
  157. maxframe/core/graph/core.cpython-310-darwin.so +0 -0
  158. maxframe/core/graph/core.pyx +17 -6
  159. maxframe/core/graph/entity.py +18 -6
  160. maxframe/core/operator/__init__.py +8 -3
  161. maxframe/core/operator/base.py +35 -12
  162. maxframe/core/operator/core.py +37 -14
  163. maxframe/core/operator/fetch.py +5 -18
  164. maxframe/core/operator/objects.py +0 -20
  165. maxframe/core/operator/shuffle.py +6 -72
  166. maxframe/dataframe/__init__.py +1 -0
  167. maxframe/dataframe/accessors/datetime_/core.py +7 -4
  168. maxframe/dataframe/accessors/string_/core.py +9 -6
  169. maxframe/dataframe/arithmetic/core.py +31 -20
  170. maxframe/dataframe/arithmetic/tests/test_arithmetic.py +6 -0
  171. maxframe/dataframe/core.py +98 -91
  172. maxframe/dataframe/datasource/core.py +8 -1
  173. maxframe/dataframe/datasource/date_range.py +8 -0
  174. maxframe/dataframe/datasource/from_index.py +9 -5
  175. maxframe/dataframe/datasource/from_records.py +9 -2
  176. maxframe/dataframe/datasource/from_tensor.py +32 -21
  177. maxframe/dataframe/datasource/read_csv.py +8 -2
  178. maxframe/dataframe/datasource/read_odps_query.py +109 -19
  179. maxframe/dataframe/datasource/read_odps_table.py +20 -5
  180. maxframe/dataframe/datasource/read_parquet.py +8 -3
  181. maxframe/dataframe/datasource/tests/test_datasource.py +80 -1
  182. maxframe/dataframe/datastore/tests/test_to_odps.py +52 -1
  183. maxframe/dataframe/datastore/to_csv.py +7 -3
  184. maxframe/dataframe/datastore/to_odps.py +42 -6
  185. maxframe/dataframe/extensions/__init__.py +6 -1
  186. maxframe/dataframe/extensions/apply_chunk.py +96 -136
  187. maxframe/dataframe/extensions/flatjson.py +3 -2
  188. maxframe/dataframe/extensions/flatmap.py +15 -7
  189. maxframe/dataframe/fetch/core.py +12 -1
  190. maxframe/dataframe/groupby/__init__.py +7 -0
  191. maxframe/dataframe/groupby/aggregation.py +62 -9
  192. maxframe/dataframe/groupby/apply.py +50 -74
  193. maxframe/dataframe/groupby/apply_chunk.py +393 -0
  194. maxframe/dataframe/groupby/core.py +80 -17
  195. maxframe/dataframe/groupby/extensions.py +26 -0
  196. maxframe/dataframe/groupby/fill.py +9 -4
  197. maxframe/dataframe/groupby/sample.py +7 -7
  198. maxframe/dataframe/groupby/tests/test_groupby.py +3 -3
  199. maxframe/dataframe/groupby/transform.py +57 -54
  200. maxframe/dataframe/indexing/align.py +7 -6
  201. maxframe/dataframe/indexing/getitem.py +9 -8
  202. maxframe/dataframe/indexing/iloc.py +28 -23
  203. maxframe/dataframe/indexing/insert.py +7 -3
  204. maxframe/dataframe/indexing/loc.py +9 -8
  205. maxframe/dataframe/indexing/reindex.py +36 -30
  206. maxframe/dataframe/indexing/rename_axis.py +18 -10
  207. maxframe/dataframe/indexing/reset_index.py +0 -2
  208. maxframe/dataframe/indexing/sample.py +13 -9
  209. maxframe/dataframe/indexing/set_axis.py +9 -6
  210. maxframe/dataframe/indexing/setitem.py +8 -5
  211. maxframe/dataframe/indexing/where.py +12 -9
  212. maxframe/dataframe/merge/__init__.py +0 -1
  213. maxframe/dataframe/merge/concat.py +10 -31
  214. maxframe/dataframe/merge/merge.py +2 -24
  215. maxframe/dataframe/misc/__init__.py +6 -0
  216. maxframe/dataframe/misc/_duplicate.py +7 -3
  217. maxframe/dataframe/misc/apply.py +106 -139
  218. maxframe/dataframe/misc/astype.py +3 -2
  219. maxframe/dataframe/misc/case_when.py +11 -7
  220. maxframe/dataframe/misc/cut.py +11 -10
  221. maxframe/dataframe/misc/describe.py +7 -3
  222. maxframe/dataframe/misc/drop.py +13 -11
  223. maxframe/dataframe/misc/eval.py +0 -2
  224. maxframe/dataframe/misc/get_dummies.py +78 -49
  225. maxframe/dataframe/misc/isin.py +13 -10
  226. maxframe/dataframe/misc/map.py +21 -6
  227. maxframe/dataframe/misc/melt.py +8 -1
  228. maxframe/dataframe/misc/pivot.py +232 -0
  229. maxframe/dataframe/misc/pivot_table.py +52 -40
  230. maxframe/dataframe/misc/rechunk.py +59 -0
  231. maxframe/dataframe/misc/shift.py +7 -4
  232. maxframe/dataframe/misc/stack.py +5 -3
  233. maxframe/dataframe/misc/tests/test_misc.py +167 -1
  234. maxframe/dataframe/misc/transform.py +63 -65
  235. maxframe/dataframe/misc/value_counts.py +7 -4
  236. maxframe/dataframe/missing/dropna.py +16 -7
  237. maxframe/dataframe/missing/fillna.py +18 -10
  238. maxframe/dataframe/missing/replace.py +10 -6
  239. maxframe/dataframe/missing/tests/test_missing.py +2 -2
  240. maxframe/dataframe/operators.py +1 -27
  241. maxframe/dataframe/reduction/aggregation.py +128 -3
  242. maxframe/dataframe/reduction/core.py +20 -6
  243. maxframe/dataframe/reduction/median.py +1 -1
  244. maxframe/dataframe/reduction/tests/test_reduction.py +33 -0
  245. maxframe/dataframe/reduction/unique.py +53 -7
  246. maxframe/dataframe/statistics/corr.py +9 -6
  247. maxframe/dataframe/statistics/quantile.py +9 -6
  248. maxframe/dataframe/tseries/to_datetime.py +6 -4
  249. maxframe/dataframe/utils.py +219 -31
  250. maxframe/dataframe/window/rolling.py +7 -4
  251. maxframe/env.py +1 -0
  252. maxframe/errors.py +9 -0
  253. maxframe/extension.py +13 -2
  254. maxframe/io/objects/core.py +67 -51
  255. maxframe/io/objects/tensor.py +73 -17
  256. maxframe/io/objects/tests/test_object_io.py +10 -55
  257. maxframe/io/odpsio/arrow.py +15 -2
  258. maxframe/io/odpsio/schema.py +43 -13
  259. maxframe/io/odpsio/tableio.py +63 -11
  260. maxframe/io/odpsio/tests/test_arrow.py +1 -2
  261. maxframe/io/odpsio/tests/test_schema.py +114 -1
  262. maxframe/io/odpsio/tests/test_tableio.py +42 -0
  263. maxframe/io/odpsio/tests/test_volumeio.py +21 -58
  264. maxframe/io/odpsio/volumeio.py +23 -8
  265. maxframe/learn/__init__.py +2 -2
  266. maxframe/learn/contrib/__init__.py +2 -2
  267. maxframe/learn/contrib/graph/connected_components.py +2 -1
  268. maxframe/learn/contrib/lightgbm/__init__.py +33 -0
  269. maxframe/learn/contrib/lightgbm/_predict.py +138 -0
  270. maxframe/learn/contrib/lightgbm/_train.py +163 -0
  271. maxframe/learn/contrib/lightgbm/callback.py +114 -0
  272. maxframe/learn/contrib/lightgbm/classifier.py +199 -0
  273. maxframe/learn/contrib/lightgbm/core.py +372 -0
  274. maxframe/learn/contrib/lightgbm/dataset.py +153 -0
  275. maxframe/learn/contrib/lightgbm/regressor.py +29 -0
  276. maxframe/learn/contrib/lightgbm/tests/__init__.py +13 -0
  277. maxframe/learn/contrib/lightgbm/tests/test_callback.py +58 -0
  278. maxframe/learn/contrib/llm/models/dashscope.py +34 -0
  279. maxframe/learn/contrib/llm/models/managed.py +15 -0
  280. maxframe/learn/contrib/llm/multi_modal.py +92 -0
  281. maxframe/learn/contrib/llm/text.py +21 -5
  282. maxframe/learn/contrib/models.py +38 -9
  283. maxframe/learn/contrib/utils.py +55 -0
  284. maxframe/learn/contrib/xgboost/callback.py +86 -0
  285. maxframe/learn/contrib/xgboost/classifier.py +26 -30
  286. maxframe/learn/contrib/xgboost/core.py +54 -42
  287. maxframe/learn/contrib/xgboost/dmatrix.py +19 -12
  288. maxframe/learn/contrib/xgboost/predict.py +13 -8
  289. maxframe/learn/contrib/xgboost/regressor.py +28 -27
  290. maxframe/learn/contrib/xgboost/tests/test_callback.py +41 -0
  291. maxframe/learn/contrib/xgboost/train.py +59 -16
  292. maxframe/learn/core.py +252 -0
  293. maxframe/learn/datasets/__init__.py +20 -0
  294. maxframe/learn/datasets/samples_generator.py +628 -0
  295. maxframe/learn/linear_model/__init__.py +15 -0
  296. maxframe/learn/linear_model/_base.py +163 -0
  297. maxframe/learn/linear_model/_lin_reg.py +175 -0
  298. maxframe/learn/metrics/__init__.py +25 -0
  299. maxframe/learn/metrics/_check_targets.py +95 -0
  300. maxframe/learn/metrics/_classification.py +1121 -0
  301. maxframe/learn/metrics/_regression.py +256 -0
  302. maxframe/learn/model_selection/__init__.py +15 -0
  303. maxframe/learn/model_selection/_split.py +451 -0
  304. maxframe/learn/model_selection/tests/__init__.py +13 -0
  305. maxframe/learn/model_selection/tests/test_split.py +156 -0
  306. maxframe/learn/preprocessing/__init__.py +16 -0
  307. maxframe/learn/preprocessing/_data/__init__.py +17 -0
  308. maxframe/learn/preprocessing/_data/min_max_scaler.py +390 -0
  309. maxframe/learn/preprocessing/_data/normalize.py +127 -0
  310. maxframe/learn/preprocessing/_data/standard_scaler.py +503 -0
  311. maxframe/learn/preprocessing/_data/utils.py +79 -0
  312. maxframe/learn/preprocessing/_label/__init__.py +16 -0
  313. maxframe/learn/preprocessing/_label/_label_binarizer.py +599 -0
  314. maxframe/learn/preprocessing/_label/_label_encoder.py +174 -0
  315. maxframe/learn/utils/__init__.py +4 -0
  316. maxframe/learn/utils/_encode.py +314 -0
  317. maxframe/learn/utils/checks.py +161 -0
  318. maxframe/learn/utils/core.py +33 -0
  319. maxframe/learn/utils/extmath.py +176 -0
  320. maxframe/learn/utils/multiclass.py +292 -0
  321. maxframe/learn/utils/shuffle.py +114 -0
  322. maxframe/learn/utils/sparsefuncs.py +87 -0
  323. maxframe/learn/utils/validation.py +775 -0
  324. maxframe/lib/__init__.py +0 -2
  325. maxframe/lib/compat.py +145 -0
  326. maxframe/lib/filesystem/_oss_lib/glob.py +1 -1
  327. maxframe/lib/mmh3.cpython-310-darwin.so +0 -0
  328. maxframe/lib/sparse/__init__.py +10 -15
  329. maxframe/lib/sparse/array.py +45 -33
  330. maxframe/lib/sparse/core.py +0 -2
  331. maxframe/lib/sparse/linalg.py +31 -0
  332. maxframe/lib/sparse/matrix.py +5 -2
  333. maxframe/lib/sparse/tests/__init__.py +0 -2
  334. maxframe/lib/sparse/tests/test_sparse.py +53 -53
  335. maxframe/lib/sparse/vector.py +0 -2
  336. maxframe/mixin.py +59 -2
  337. maxframe/opcodes.py +13 -5
  338. maxframe/protocol.py +67 -14
  339. maxframe/remote/core.py +16 -14
  340. maxframe/remote/run_script.py +6 -3
  341. maxframe/serialization/__init__.py +2 -0
  342. maxframe/serialization/core.cpython-310-darwin.so +0 -0
  343. maxframe/serialization/core.pxd +3 -0
  344. maxframe/serialization/core.pyi +3 -1
  345. maxframe/serialization/core.pyx +82 -4
  346. maxframe/serialization/pandas.py +5 -1
  347. maxframe/serialization/serializables/core.py +6 -5
  348. maxframe/serialization/serializables/field.py +2 -2
  349. maxframe/serialization/serializables/tests/test_field_type.py +3 -5
  350. maxframe/serialization/tests/test_serial.py +27 -0
  351. maxframe/session.py +4 -71
  352. maxframe/sperunner.py +165 -0
  353. maxframe/tensor/__init__.py +35 -2
  354. maxframe/tensor/arithmetic/__init__.py +2 -4
  355. maxframe/tensor/arithmetic/abs.py +0 -2
  356. maxframe/tensor/arithmetic/absolute.py +0 -2
  357. maxframe/tensor/arithmetic/add.py +34 -4
  358. maxframe/tensor/arithmetic/angle.py +0 -2
  359. maxframe/tensor/arithmetic/arccos.py +1 -4
  360. maxframe/tensor/arithmetic/arccosh.py +1 -3
  361. maxframe/tensor/arithmetic/arcsin.py +0 -2
  362. maxframe/tensor/arithmetic/arcsinh.py +0 -2
  363. maxframe/tensor/arithmetic/arctan.py +0 -2
  364. maxframe/tensor/arithmetic/arctan2.py +0 -2
  365. maxframe/tensor/arithmetic/arctanh.py +0 -2
  366. maxframe/tensor/arithmetic/around.py +0 -2
  367. maxframe/tensor/arithmetic/bitand.py +0 -2
  368. maxframe/tensor/arithmetic/bitor.py +1 -3
  369. maxframe/tensor/arithmetic/bitxor.py +1 -3
  370. maxframe/tensor/arithmetic/cbrt.py +0 -2
  371. maxframe/tensor/arithmetic/ceil.py +0 -2
  372. maxframe/tensor/arithmetic/clip.py +13 -13
  373. maxframe/tensor/arithmetic/conj.py +0 -2
  374. maxframe/tensor/arithmetic/copysign.py +0 -2
  375. maxframe/tensor/arithmetic/core.py +47 -39
  376. maxframe/tensor/arithmetic/cos.py +1 -3
  377. maxframe/tensor/arithmetic/cosh.py +0 -2
  378. maxframe/tensor/arithmetic/deg2rad.py +0 -2
  379. maxframe/tensor/arithmetic/degrees.py +0 -2
  380. maxframe/tensor/arithmetic/divide.py +0 -2
  381. maxframe/tensor/arithmetic/equal.py +0 -2
  382. maxframe/tensor/arithmetic/exp.py +1 -3
  383. maxframe/tensor/arithmetic/exp2.py +0 -2
  384. maxframe/tensor/arithmetic/expm1.py +0 -2
  385. maxframe/tensor/arithmetic/fabs.py +0 -2
  386. maxframe/tensor/arithmetic/fix.py +0 -2
  387. maxframe/tensor/arithmetic/float_power.py +0 -2
  388. maxframe/tensor/arithmetic/floor.py +0 -2
  389. maxframe/tensor/arithmetic/floordiv.py +0 -2
  390. maxframe/tensor/arithmetic/fmax.py +0 -2
  391. maxframe/tensor/arithmetic/fmin.py +0 -2
  392. maxframe/tensor/arithmetic/fmod.py +0 -2
  393. maxframe/tensor/arithmetic/frexp.py +6 -2
  394. maxframe/tensor/arithmetic/greater.py +0 -2
  395. maxframe/tensor/arithmetic/greater_equal.py +0 -2
  396. maxframe/tensor/arithmetic/hypot.py +0 -2
  397. maxframe/tensor/arithmetic/i0.py +1 -3
  398. maxframe/tensor/arithmetic/imag.py +0 -2
  399. maxframe/tensor/arithmetic/invert.py +1 -3
  400. maxframe/tensor/arithmetic/isclose.py +0 -2
  401. maxframe/tensor/arithmetic/iscomplex.py +0 -2
  402. maxframe/tensor/arithmetic/isfinite.py +1 -3
  403. maxframe/tensor/arithmetic/isinf.py +0 -2
  404. maxframe/tensor/arithmetic/isnan.py +0 -2
  405. maxframe/tensor/arithmetic/isreal.py +0 -2
  406. maxframe/tensor/arithmetic/ldexp.py +0 -2
  407. maxframe/tensor/arithmetic/less.py +0 -2
  408. maxframe/tensor/arithmetic/less_equal.py +0 -2
  409. maxframe/tensor/arithmetic/log.py +1 -3
  410. maxframe/tensor/arithmetic/log10.py +1 -3
  411. maxframe/tensor/arithmetic/log1p.py +1 -3
  412. maxframe/tensor/arithmetic/log2.py +1 -3
  413. maxframe/tensor/arithmetic/logaddexp.py +0 -2
  414. maxframe/tensor/arithmetic/logaddexp2.py +0 -2
  415. maxframe/tensor/arithmetic/logical_and.py +0 -2
  416. maxframe/tensor/arithmetic/logical_not.py +1 -3
  417. maxframe/tensor/arithmetic/logical_or.py +0 -2
  418. maxframe/tensor/arithmetic/logical_xor.py +0 -2
  419. maxframe/tensor/arithmetic/lshift.py +0 -2
  420. maxframe/tensor/arithmetic/maximum.py +0 -2
  421. maxframe/tensor/arithmetic/minimum.py +0 -2
  422. maxframe/tensor/arithmetic/mod.py +0 -2
  423. maxframe/tensor/arithmetic/modf.py +6 -2
  424. maxframe/tensor/arithmetic/multiply.py +37 -4
  425. maxframe/tensor/arithmetic/nan_to_num.py +0 -2
  426. maxframe/tensor/arithmetic/negative.py +0 -2
  427. maxframe/tensor/arithmetic/nextafter.py +0 -2
  428. maxframe/tensor/arithmetic/not_equal.py +0 -2
  429. maxframe/tensor/arithmetic/positive.py +0 -2
  430. maxframe/tensor/arithmetic/power.py +0 -2
  431. maxframe/tensor/arithmetic/rad2deg.py +0 -2
  432. maxframe/tensor/arithmetic/radians.py +0 -2
  433. maxframe/tensor/arithmetic/real.py +0 -2
  434. maxframe/tensor/arithmetic/reciprocal.py +5 -3
  435. maxframe/tensor/arithmetic/rint.py +1 -3
  436. maxframe/tensor/arithmetic/rshift.py +0 -2
  437. maxframe/tensor/arithmetic/setimag.py +0 -2
  438. maxframe/tensor/arithmetic/setreal.py +0 -2
  439. maxframe/tensor/arithmetic/sign.py +0 -2
  440. maxframe/tensor/arithmetic/signbit.py +0 -2
  441. maxframe/tensor/arithmetic/sin.py +0 -2
  442. maxframe/tensor/arithmetic/sinc.py +1 -3
  443. maxframe/tensor/arithmetic/sinh.py +0 -2
  444. maxframe/tensor/arithmetic/spacing.py +0 -2
  445. maxframe/tensor/arithmetic/sqrt.py +0 -2
  446. maxframe/tensor/arithmetic/square.py +0 -2
  447. maxframe/tensor/arithmetic/subtract.py +4 -2
  448. maxframe/tensor/arithmetic/tan.py +0 -2
  449. maxframe/tensor/arithmetic/tanh.py +0 -2
  450. maxframe/tensor/arithmetic/tests/__init__.py +0 -2
  451. maxframe/tensor/arithmetic/tests/test_arithmetic.py +43 -9
  452. maxframe/tensor/arithmetic/truediv.py +0 -2
  453. maxframe/tensor/arithmetic/trunc.py +0 -2
  454. maxframe/tensor/arithmetic/utils.py +32 -6
  455. maxframe/tensor/array_utils.py +3 -25
  456. maxframe/tensor/core.py +6 -6
  457. maxframe/tensor/datasource/__init__.py +10 -2
  458. maxframe/tensor/datasource/arange.py +0 -2
  459. maxframe/tensor/datasource/array.py +3 -22
  460. maxframe/tensor/datasource/core.py +15 -10
  461. maxframe/tensor/datasource/diag.py +140 -0
  462. maxframe/tensor/datasource/diagflat.py +69 -0
  463. maxframe/tensor/datasource/empty.py +0 -2
  464. maxframe/tensor/datasource/eye.py +95 -0
  465. maxframe/tensor/datasource/from_dataframe.py +0 -2
  466. maxframe/tensor/datasource/from_dense.py +0 -17
  467. maxframe/tensor/datasource/from_sparse.py +0 -2
  468. maxframe/tensor/datasource/full.py +0 -2
  469. maxframe/tensor/datasource/identity.py +54 -0
  470. maxframe/tensor/datasource/indices.py +115 -0
  471. maxframe/tensor/datasource/linspace.py +140 -0
  472. maxframe/tensor/datasource/meshgrid.py +135 -0
  473. maxframe/tensor/datasource/ones.py +8 -3
  474. maxframe/tensor/datasource/tests/test_datasource.py +32 -1
  475. maxframe/tensor/datasource/tri_array.py +107 -0
  476. maxframe/tensor/datasource/zeros.py +7 -3
  477. maxframe/tensor/extensions/__init__.py +31 -0
  478. maxframe/tensor/extensions/accessor.py +25 -0
  479. maxframe/tensor/extensions/apply_chunk.py +137 -0
  480. maxframe/tensor/indexing/__init__.py +1 -1
  481. maxframe/tensor/indexing/choose.py +8 -6
  482. maxframe/tensor/indexing/compress.py +0 -2
  483. maxframe/tensor/indexing/extract.py +0 -2
  484. maxframe/tensor/indexing/fill_diagonal.py +9 -6
  485. maxframe/tensor/indexing/flatnonzero.py +1 -3
  486. maxframe/tensor/indexing/getitem.py +10 -43
  487. maxframe/tensor/indexing/nonzero.py +2 -4
  488. maxframe/tensor/indexing/setitem.py +19 -9
  489. maxframe/tensor/indexing/slice.py +6 -3
  490. maxframe/tensor/indexing/take.py +0 -2
  491. maxframe/tensor/indexing/tests/__init__.py +0 -2
  492. maxframe/tensor/indexing/tests/test_indexing.py +0 -2
  493. maxframe/tensor/indexing/unravel_index.py +6 -6
  494. maxframe/tensor/lib/__init__.py +16 -0
  495. maxframe/tensor/lib/index_tricks.py +404 -0
  496. maxframe/tensor/linalg/__init__.py +36 -0
  497. maxframe/tensor/linalg/dot.py +145 -0
  498. maxframe/tensor/linalg/inner.py +36 -0
  499. maxframe/tensor/linalg/inv.py +83 -0
  500. maxframe/tensor/linalg/lu.py +115 -0
  501. maxframe/tensor/linalg/matmul.py +225 -0
  502. maxframe/tensor/linalg/qr.py +124 -0
  503. maxframe/tensor/linalg/solve_triangular.py +103 -0
  504. maxframe/tensor/linalg/svd.py +167 -0
  505. maxframe/tensor/linalg/tensordot.py +213 -0
  506. maxframe/tensor/linalg/vdot.py +73 -0
  507. maxframe/tensor/merge/__init__.py +4 -0
  508. maxframe/tensor/merge/append.py +74 -0
  509. maxframe/tensor/merge/column_stack.py +63 -0
  510. maxframe/tensor/merge/concatenate.py +3 -2
  511. maxframe/tensor/merge/dstack.py +71 -0
  512. maxframe/tensor/merge/hstack.py +70 -0
  513. maxframe/tensor/merge/stack.py +0 -2
  514. maxframe/tensor/merge/tests/test_merge.py +0 -2
  515. maxframe/tensor/misc/__init__.py +18 -5
  516. maxframe/tensor/misc/astype.py +10 -8
  517. maxframe/tensor/misc/broadcast_to.py +1 -1
  518. maxframe/tensor/misc/copy.py +64 -0
  519. maxframe/tensor/misc/diff.py +115 -0
  520. maxframe/tensor/misc/flatten.py +63 -0
  521. maxframe/tensor/misc/in1d.py +94 -0
  522. maxframe/tensor/misc/isin.py +130 -0
  523. maxframe/tensor/misc/ndim.py +53 -0
  524. maxframe/tensor/misc/ravel.py +0 -2
  525. maxframe/tensor/misc/repeat.py +129 -0
  526. maxframe/tensor/misc/searchsorted.py +147 -0
  527. maxframe/tensor/misc/setdiff1d.py +58 -0
  528. maxframe/tensor/misc/squeeze.py +117 -0
  529. maxframe/tensor/misc/swapaxes.py +113 -0
  530. maxframe/tensor/misc/tests/test_misc.py +0 -2
  531. maxframe/tensor/misc/transpose.py +8 -4
  532. maxframe/tensor/misc/trapezoid.py +123 -0
  533. maxframe/tensor/misc/unique.py +0 -1
  534. maxframe/tensor/misc/where.py +10 -8
  535. maxframe/tensor/operators.py +0 -34
  536. maxframe/tensor/random/__init__.py +3 -5
  537. maxframe/tensor/random/binomial.py +0 -2
  538. maxframe/tensor/random/bytes.py +0 -2
  539. maxframe/tensor/random/chisquare.py +0 -2
  540. maxframe/tensor/random/choice.py +9 -8
  541. maxframe/tensor/random/core.py +20 -5
  542. maxframe/tensor/random/dirichlet.py +0 -2
  543. maxframe/tensor/random/exponential.py +0 -2
  544. maxframe/tensor/random/f.py +2 -4
  545. maxframe/tensor/random/gamma.py +0 -2
  546. maxframe/tensor/random/geometric.py +0 -2
  547. maxframe/tensor/random/gumbel.py +0 -2
  548. maxframe/tensor/random/hypergeometric.py +0 -2
  549. maxframe/tensor/random/laplace.py +2 -4
  550. maxframe/tensor/random/logistic.py +0 -2
  551. maxframe/tensor/random/lognormal.py +0 -2
  552. maxframe/tensor/random/logseries.py +0 -2
  553. maxframe/tensor/random/multinomial.py +0 -2
  554. maxframe/tensor/random/multivariate_normal.py +0 -2
  555. maxframe/tensor/random/negative_binomial.py +0 -2
  556. maxframe/tensor/random/noncentral_chisquare.py +0 -2
  557. maxframe/tensor/random/noncentral_f.py +1 -3
  558. maxframe/tensor/random/normal.py +0 -2
  559. maxframe/tensor/random/pareto.py +0 -2
  560. maxframe/tensor/random/permutation.py +6 -3
  561. maxframe/tensor/random/poisson.py +0 -2
  562. maxframe/tensor/random/power.py +0 -2
  563. maxframe/tensor/random/rand.py +0 -2
  564. maxframe/tensor/random/randint.py +0 -2
  565. maxframe/tensor/random/randn.py +0 -2
  566. maxframe/tensor/random/random_integers.py +0 -2
  567. maxframe/tensor/random/random_sample.py +0 -2
  568. maxframe/tensor/random/rayleigh.py +0 -2
  569. maxframe/tensor/random/standard_cauchy.py +0 -2
  570. maxframe/tensor/random/standard_exponential.py +0 -2
  571. maxframe/tensor/random/standard_gamma.py +0 -2
  572. maxframe/tensor/random/standard_normal.py +0 -2
  573. maxframe/tensor/random/standard_t.py +0 -2
  574. maxframe/tensor/random/tests/__init__.py +0 -2
  575. maxframe/tensor/random/tests/test_random.py +0 -2
  576. maxframe/tensor/random/triangular.py +0 -2
  577. maxframe/tensor/random/uniform.py +0 -2
  578. maxframe/tensor/random/vonmises.py +0 -2
  579. maxframe/tensor/random/wald.py +0 -2
  580. maxframe/tensor/random/weibull.py +0 -2
  581. maxframe/tensor/random/zipf.py +0 -2
  582. maxframe/tensor/reduction/__init__.py +0 -2
  583. maxframe/tensor/reduction/all.py +0 -2
  584. maxframe/tensor/reduction/allclose.py +0 -2
  585. maxframe/tensor/reduction/any.py +0 -2
  586. maxframe/tensor/reduction/argmax.py +1 -3
  587. maxframe/tensor/reduction/argmin.py +1 -3
  588. maxframe/tensor/reduction/array_equal.py +0 -2
  589. maxframe/tensor/reduction/core.py +0 -2
  590. maxframe/tensor/reduction/count_nonzero.py +0 -2
  591. maxframe/tensor/reduction/cumprod.py +0 -2
  592. maxframe/tensor/reduction/cumsum.py +0 -2
  593. maxframe/tensor/reduction/max.py +0 -2
  594. maxframe/tensor/reduction/mean.py +0 -2
  595. maxframe/tensor/reduction/min.py +0 -2
  596. maxframe/tensor/reduction/nanargmax.py +0 -2
  597. maxframe/tensor/reduction/nanargmin.py +0 -2
  598. maxframe/tensor/reduction/nancumprod.py +0 -2
  599. maxframe/tensor/reduction/nancumsum.py +0 -2
  600. maxframe/tensor/reduction/nanmax.py +0 -2
  601. maxframe/tensor/reduction/nanmean.py +0 -2
  602. maxframe/tensor/reduction/nanmin.py +0 -2
  603. maxframe/tensor/reduction/nanprod.py +0 -2
  604. maxframe/tensor/reduction/nanstd.py +0 -2
  605. maxframe/tensor/reduction/nansum.py +0 -2
  606. maxframe/tensor/reduction/nanvar.py +0 -2
  607. maxframe/tensor/reduction/prod.py +0 -2
  608. maxframe/tensor/reduction/std.py +0 -2
  609. maxframe/tensor/reduction/sum.py +0 -2
  610. maxframe/tensor/reduction/tests/test_reduction.py +1 -4
  611. maxframe/tensor/reduction/var.py +0 -2
  612. maxframe/tensor/reshape/__init__.py +0 -2
  613. maxframe/tensor/reshape/reshape.py +6 -5
  614. maxframe/tensor/reshape/tests/__init__.py +0 -2
  615. maxframe/tensor/reshape/tests/test_reshape.py +0 -2
  616. maxframe/tensor/sort/__init__.py +16 -0
  617. maxframe/tensor/sort/argsort.py +150 -0
  618. maxframe/tensor/sort/sort.py +295 -0
  619. maxframe/tensor/special/__init__.py +37 -0
  620. maxframe/tensor/special/core.py +38 -0
  621. maxframe/tensor/special/misc.py +142 -0
  622. maxframe/tensor/special/statistical.py +56 -0
  623. maxframe/tensor/statistics/__init__.py +5 -0
  624. maxframe/tensor/statistics/average.py +143 -0
  625. maxframe/tensor/statistics/bincount.py +133 -0
  626. maxframe/tensor/statistics/quantile.py +10 -8
  627. maxframe/tensor/ufunc/__init__.py +0 -2
  628. maxframe/tensor/ufunc/ufunc.py +0 -2
  629. maxframe/tensor/utils.py +21 -3
  630. maxframe/tests/test_protocol.py +3 -3
  631. maxframe/tests/test_utils.py +210 -1
  632. maxframe/tests/utils.py +59 -1
  633. maxframe/udf.py +76 -6
  634. maxframe/utils.py +418 -17
  635. {maxframe-1.3.0.dist-info → maxframe-2.0.0.dist-info}/METADATA +5 -1
  636. maxframe-2.0.0.dist-info/RECORD +939 -0
  637. {maxframe-1.3.0.dist-info → maxframe-2.0.0.dist-info}/WHEEL +1 -1
  638. maxframe_client/clients/framedriver.py +19 -3
  639. maxframe_client/fetcher.py +113 -6
  640. maxframe_client/session/odps.py +173 -38
  641. maxframe_client/session/task.py +3 -1
  642. maxframe_client/tests/test_session.py +41 -5
  643. maxframe-1.3.0.dist-info/RECORD +0 -705
  644. {maxframe-1.3.0.dist-info → maxframe-2.0.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,503 @@
1
+ # Copyright 1999-2025 Alibaba Group Holding Ltd.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ import numpy as np
16
+
17
+ from .... import tensor as mt
18
+ from ....core import ENTITY_TYPE
19
+ from ....lib import sparse
20
+ from ...core import BaseEstimator, TransformerMixin
21
+ from ...utils.extmath import _incremental_mean_and_var
22
+ from ...utils.validation import (
23
+ FLOAT_DTYPES,
24
+ _check_sample_weight,
25
+ check_array,
26
+ check_is_fitted,
27
+ )
28
+ from .utils import _handle_zeros_in_scale, _is_constant_feature
29
+
30
+
31
+ class StandardScaler(TransformerMixin, BaseEstimator):
32
+ """Standardize features by removing the mean and scaling to unit variance.
33
+
34
+ The standard score of a sample `x` is calculated as:
35
+
36
+ .. code-block:: text
37
+
38
+ z = (x - u) / s
39
+
40
+ where `u` is the mean of the training samples or zero if `with_mean=False`,
41
+ and `s` is the standard deviation of the training samples or one if
42
+ `with_std=False`.
43
+
44
+ Centering and scaling happen independently on each feature by computing
45
+ the relevant statistics on the samples in the training set. Mean and
46
+ standard deviation are then stored to be used on later data using
47
+ :meth:`transform`.
48
+
49
+ Standardization of a dataset is a common requirement for many
50
+ machine learning estimators: they might behave badly if the
51
+ individual features do not more or less look like standard normally
52
+ distributed data (e.g. Gaussian with 0 mean and unit variance).
53
+
54
+ For instance many elements used in the objective function of
55
+ a learning algorithm (such as the RBF kernel of Support Vector
56
+ Machines or the L1 and L2 regularizers of linear models) assume that
57
+ all features are centered around 0 and have variance in the same
58
+ order. If a feature has a variance that is orders of magnitude larger
59
+ than others, it might dominate the objective function and make the
60
+ estimator unable to learn from other features correctly as expected.
61
+
62
+ `StandardScaler` is sensitive to outliers, and the features may scale
63
+ differently from each other in the presence of outliers. For an example
64
+ visualization, refer to :ref:`Compare StandardScaler with other scalers
65
+ <plot_all_scaling_standard_scaler_section>`.
66
+
67
+ This scaler can also be applied to sparse CSR or CSC matrices by passing
68
+ `with_mean=False` to avoid breaking the sparsity structure of the data.
69
+
70
+ Read more in the :ref:`User Guide <preprocessing_scaler>`.
71
+
72
+ Parameters
73
+ ----------
74
+ copy : bool, default=True
75
+ If False, try to avoid a copy and do inplace scaling instead.
76
+ This is not guaranteed to always work inplace; e.g. if the data is
77
+ not a NumPy array or scipy.sparse CSR matrix, a copy may still be
78
+ returned.
79
+
80
+ with_mean : bool, default=True
81
+ If True, center the data before scaling.
82
+ This does not work (and will raise an exception) when attempted on
83
+ sparse matrices, because centering them entails building a dense
84
+ matrix which in common use cases is likely to be too large to fit in
85
+ memory.
86
+
87
+ with_std : bool, default=True
88
+ If True, scale the data to unit variance (or equivalently,
89
+ unit standard deviation).
90
+
91
+ Attributes
92
+ ----------
93
+ scale_ : ndarray of shape (n_features,) or None
94
+ Per feature relative scaling of the data to achieve zero mean and unit
95
+ variance. Generally this is calculated using `np.sqrt(var_)`. If a
96
+ variance is zero, we can't achieve unit variance, and the data is left
97
+ as-is, giving a scaling factor of 1. `scale_` is equal to `None`
98
+ when `with_std=False`.
99
+
100
+ mean_ : ndarray of shape (n_features,) or None
101
+ The mean value for each feature in the training set.
102
+ Equal to ``None`` when ``with_mean=False`` and ``with_std=False``.
103
+
104
+ var_ : ndarray of shape (n_features,) or None
105
+ The variance for each feature in the training set. Used to compute
106
+ `scale_`. Equal to ``None`` when ``with_mean=False`` and
107
+ ``with_std=False``.
108
+
109
+ n_features_in_ : int
110
+ Number of features seen during :term:`fit`.
111
+
112
+ feature_names_in_ : ndarray of shape (`n_features_in_`,)
113
+ Names of features seen during :term:`fit`. Defined only when `X`
114
+ has feature names that are all strings.
115
+
116
+ n_samples_seen_ : int or ndarray of shape (n_features,)
117
+ The number of samples processed by the estimator for each feature.
118
+ If there are no missing samples, the ``n_samples_seen`` will be an
119
+ integer, otherwise it will be an array of dtype int. If
120
+ `sample_weights` are used it will be a float (if no missing data)
121
+ or an array of dtype float that sums the weights seen so far.
122
+ Will be reset on new calls to fit, but increments across
123
+ ``partial_fit`` calls.
124
+
125
+ See Also
126
+ --------
127
+ scale : Equivalent function without the estimator API.
128
+
129
+ :class:`~sklearn.decomposition.PCA` : Further removes the linear
130
+ correlation across features with 'whiten=True'.
131
+
132
+ Notes
133
+ -----
134
+ NaNs are treated as missing values: disregarded in fit, and maintained in
135
+ transform.
136
+
137
+ We use a biased estimator for the standard deviation, equivalent to
138
+ `numpy.std(x, ddof=0)`. Note that the choice of `ddof` is unlikely to
139
+ affect model performance.
140
+
141
+ Examples
142
+ --------
143
+ >>> from maxframe.learn.preprocessing import StandardScaler
144
+ >>> data = [[0, 0], [0, 0], [1, 1], [1, 1]]
145
+ >>> scaler = StandardScaler()
146
+ >>> print(scaler.fit(data))
147
+ StandardScaler()
148
+ >>> print(scaler.mean_.execute())
149
+ [0.5 0.5]
150
+ >>> print(scaler.transform(data).execute())
151
+ [[-1. -1.]
152
+ [-1. -1.]
153
+ [ 1. 1.]
154
+ [ 1. 1.]]
155
+ >>> print(scaler.transform([[2, 2]]).execute())
156
+ [[3. 3.]]
157
+ """
158
+
159
+ def __init__(self, *, copy=True, with_mean=True, with_std=True):
160
+ self.with_mean = with_mean
161
+ self.with_std = with_std
162
+ self.copy = copy
163
+
164
+ def _reset(self):
165
+ """Reset internal data-dependent state of the scaler, if necessary.
166
+
167
+ __init__ parameters are not touched.
168
+ """
169
+ # Checking one attribute is enough, because they are all set together
170
+ # in partial_fit
171
+ if hasattr(self, "scale_"):
172
+ del self.scale_
173
+ del self.n_samples_seen_
174
+ del self.mean_
175
+ del self.var_
176
+
177
+ def fit(
178
+ self,
179
+ X,
180
+ y=None,
181
+ sample_weight=None,
182
+ execute=False,
183
+ session=None,
184
+ run_kwargs=None,
185
+ ) -> "StandardScaler":
186
+ """Compute the mean and std to be used for later scaling.
187
+
188
+ Parameters
189
+ ----------
190
+ X : {array-like, sparse matrix} of shape (n_samples, n_features)
191
+ The data used to compute the mean and standard deviation
192
+ used for later scaling along the features axis.
193
+
194
+ y : None
195
+ Ignored.
196
+
197
+ sample_weight : array-like of shape (n_samples,), default=None
198
+ Individual weights for each sample.
199
+
200
+ Returns
201
+ -------
202
+ self : object
203
+ Fitted scaler.
204
+ """
205
+ # Reset internal state before fitting
206
+ self._reset()
207
+ return self.partial_fit(
208
+ X, y, sample_weight, execute=execute, session=session, run_kwargs=run_kwargs
209
+ )
210
+
211
+ def partial_fit(
212
+ self,
213
+ X,
214
+ y=None,
215
+ sample_weight=None,
216
+ execute=False,
217
+ session=None,
218
+ run_kwargs=None,
219
+ ):
220
+ """Online computation of mean and std on X for later scaling.
221
+
222
+ All of X is processed as a single batch. This is intended for cases
223
+ when :meth:`fit` is not feasible due to very large number of
224
+ `n_samples` or because X is read from a continuous stream.
225
+
226
+ The algorithm for incremental mean and std is given in Equation 1.5a,b
227
+ in Chan, Tony F., Gene H. Golub, and Randall J. LeVeque. "Algorithms
228
+ for computing the sample variance: Analysis and recommendations."
229
+ The American Statistician 37.3 (1983): 242-247:
230
+
231
+ Parameters
232
+ ----------
233
+ X : {array-like, sparse matrix} of shape (n_samples, n_features)
234
+ The data used to compute the mean and standard deviation
235
+ used for later scaling along the features axis.
236
+
237
+ y : None
238
+ Ignored.
239
+
240
+ sample_weight : array-like of shape (n_samples,), default=None
241
+ Individual weights for each sample.
242
+
243
+ Returns
244
+ -------
245
+ self : object
246
+ Fitted scaler.
247
+ """
248
+ first_call = not hasattr(self, "n_samples_seen_")
249
+ X = self._validate_data(
250
+ X,
251
+ accept_sparse=("csr", "csc"),
252
+ dtype=FLOAT_DTYPES,
253
+ force_all_finite="allow-nan",
254
+ reset=first_call,
255
+ )
256
+ n_features = X.shape[1]
257
+
258
+ if sample_weight is not None:
259
+ sample_weight = _check_sample_weight(sample_weight, X, dtype=X.dtype)
260
+
261
+ # Even in the case of `with_mean=False`, we update the mean anyway
262
+ # This is needed for the incremental computation of the var
263
+ # See incr_mean_variance_axis and _incremental_mean_variance_axis
264
+
265
+ # if n_samples_seen_ is an integer (i.e. no missing values), we need to
266
+ # transform it to a NumPy array of shape (n_features,) required by
267
+ # incr_mean_variance_axis and _incremental_variance_axis
268
+ dtype = np.int64 if sample_weight is None else X.dtype
269
+ if not hasattr(self, "n_samples_seen_"):
270
+ self.n_samples_seen_ = mt.zeros(n_features, dtype=dtype)
271
+ # elif np.size(self.n_samples_seen_) == 1:
272
+ # self.n_samples_seen_ = np.repeat(self.n_samples_seen_, X.shape[1])
273
+ # self.n_samples_seen_ = self.n_samples_seen_.astype(dtype, copy=False)
274
+
275
+ if sparse.issparse(X):
276
+ raise NotImplementedError("Scaling on sparse tensors is not supported")
277
+ else:
278
+ # First pass
279
+ if not hasattr(self, "scale_"):
280
+ self.mean_ = 0.0
281
+ if self.with_std:
282
+ self.var_ = 0.0
283
+ else:
284
+ self.var_ = None
285
+
286
+ if not self.with_mean and not self.with_std:
287
+ self.mean_ = None
288
+ self.var_ = None
289
+ self.n_samples_seen_ += X.shape[0] - mt.isnan(X).sum(axis=0)
290
+
291
+ else:
292
+ self.mean_, self.var_, self.n_samples_seen_ = _incremental_mean_and_var(
293
+ X,
294
+ self.mean_,
295
+ self.var_,
296
+ self.n_samples_seen_,
297
+ sample_weight=sample_weight,
298
+ )
299
+
300
+ # # for backward-compatibility, reduce n_samples_seen_ to an integer
301
+ # # if the number of samples is the same for each feature (i.e. no
302
+ # # missing values)
303
+ # if np.ptp(self.n_samples_seen_) == 0:
304
+ # self.n_samples_seen_ = self.n_samples_seen_[0]
305
+
306
+ if self.with_std:
307
+ # Extract the list of near constant features on the raw variances,
308
+ # before taking the square root.
309
+ constant_mask = _is_constant_feature(
310
+ self.var_, self.mean_, self.n_samples_seen_
311
+ )
312
+ self.scale_ = _handle_zeros_in_scale(
313
+ mt.sqrt(self.var_), copy=False, constant_mask=constant_mask
314
+ )
315
+ else:
316
+ self.scale_ = None
317
+
318
+ if execute:
319
+ self.execute(session=session, **(run_kwargs or dict()))
320
+ return self
321
+
322
+ def transform(self, X, copy=None, execute=False, session=None, run_kwargs=None):
323
+ """Perform standardization by centering and scaling.
324
+
325
+ Parameters
326
+ ----------
327
+ X : {array-like, sparse matrix of shape (n_samples, n_features)
328
+ The data used to scale along the features axis.
329
+ copy : bool, default=None
330
+ Copy the input X or not.
331
+
332
+ Returns
333
+ -------
334
+ X_tr : {ndarray, sparse matrix} of shape (n_samples, n_features)
335
+ Transformed array.
336
+ """
337
+ check_is_fitted(self)
338
+
339
+ copy = copy if copy is not None else self.copy
340
+ X = self._validate_data(
341
+ X,
342
+ reset=False,
343
+ accept_sparse="csr",
344
+ copy=copy,
345
+ dtype=FLOAT_DTYPES,
346
+ force_all_finite="allow-nan",
347
+ )
348
+
349
+ if sparse.issparse(X):
350
+ raise NotImplementedError("Scaling on sparse tensors is not supported")
351
+ else:
352
+ if self.with_mean:
353
+ X -= self.mean_
354
+ if self.with_std:
355
+ X /= self.scale_
356
+ if execute:
357
+ X.execute(session=session, **(run_kwargs or dict()))
358
+ return X
359
+
360
+ def inverse_transform(
361
+ self, X, copy=None, execute=False, session=None, run_kwargs=None
362
+ ):
363
+ """Scale back the data to the original representation.
364
+
365
+ Parameters
366
+ ----------
367
+ X : {array-like, sparse matrix} of shape (n_samples, n_features)
368
+ The data used to scale along the features axis.
369
+ copy : bool, default=None
370
+ Copy the input X or not.
371
+
372
+ Returns
373
+ -------
374
+ X_tr : {ndarray, sparse matrix} of shape (n_samples, n_features)
375
+ Transformed array.
376
+ """
377
+ check_is_fitted(self)
378
+
379
+ copy = copy if copy is not None else self.copy
380
+ X = check_array(
381
+ X,
382
+ accept_sparse="csr",
383
+ copy=copy,
384
+ dtype=FLOAT_DTYPES,
385
+ force_all_finite="allow-nan",
386
+ )
387
+
388
+ if sparse.issparse(X):
389
+ raise NotImplementedError("Scaling on sparse tensors is not supported")
390
+ else:
391
+ if self.with_std:
392
+ X *= self.scale_
393
+ if self.with_mean:
394
+ X += self.mean_
395
+ if execute:
396
+ X.execute(session=session, **(run_kwargs or dict()))
397
+ return X
398
+
399
+
400
+ def scale(X, *, axis=0, with_mean=True, with_std=True, copy=True):
401
+ """Standardize a dataset along any axis.
402
+
403
+ Center to the mean and component wise scale to unit variance.
404
+
405
+ Read more in the :ref:`User Guide <preprocessing_scaler>`.
406
+
407
+ Parameters
408
+ ----------
409
+ X : {array-like, sparse matrix} of shape (n_samples, n_features)
410
+ The data to center and scale.
411
+
412
+ axis : {0, 1}, default=0
413
+ Axis used to compute the means and standard deviations along. If 0,
414
+ independently standardize each feature, otherwise (if 1) standardize
415
+ each sample.
416
+
417
+ with_mean : bool, default=True
418
+ If True, center the data before scaling.
419
+
420
+ with_std : bool, default=True
421
+ If True, scale the data to unit variance (or equivalently,
422
+ unit standard deviation).
423
+
424
+ copy : bool, default=True
425
+ If False, try to avoid a copy and scale in place.
426
+ This is not guaranteed to always work in place; e.g. if the data is
427
+ a numpy array with an int dtype, a copy will be returned even with
428
+ copy=False.
429
+
430
+ Returns
431
+ -------
432
+ X_tr : {ndarray, sparse matrix} of shape (n_samples, n_features)
433
+ The transformed data.
434
+
435
+ See Also
436
+ --------
437
+ StandardScaler : Performs scaling to unit variance using the Transformer
438
+ API (e.g. as part of a preprocessing
439
+ :class:`~sklearn.pipeline.Pipeline`).
440
+
441
+ Notes
442
+ -----
443
+ This implementation will refuse to center scipy.sparse matrices
444
+ since it would make them non-sparse and would potentially crash the
445
+ program with memory exhaustion problems.
446
+
447
+ Instead the caller is expected to either set explicitly
448
+ `with_mean=False` (in that case, only variance scaling will be
449
+ performed on the features of the CSC matrix) or to call `X.toarray()`
450
+ if he/she expects the materialized dense array to fit in memory.
451
+
452
+ To avoid memory copy the caller should pass a CSC matrix.
453
+
454
+ NaNs are treated as missing values: disregarded to compute the statistics,
455
+ and maintained during the data transformation.
456
+
457
+ We use a biased estimator for the standard deviation, equivalent to
458
+ `numpy.std(x, ddof=0)`. Note that the choice of `ddof` is unlikely to
459
+ affect model performance.
460
+
461
+ For a comparison of the different scalers, transformers, and normalizers,
462
+ see: :ref:`sphx_glr_auto_examples_preprocessing_plot_all_scaling.py`.
463
+
464
+ .. warning:: Risk of data leak
465
+
466
+ Do not use :func:`~sklearn.preprocessing.scale` unless you know
467
+ what you are doing. A common mistake is to apply it to the entire data
468
+ *before* splitting into training and test sets. This will bias the
469
+ model evaluation because information would have leaked from the test
470
+ set to the training set.
471
+ In general, we recommend using
472
+ :class:`~sklearn.preprocessing.StandardScaler` within a
473
+ :ref:`Pipeline <pipeline>` in order to prevent most risks of data
474
+ leaking: `pipe = make_pipeline(StandardScaler(), LogisticRegression())`.
475
+
476
+ Examples
477
+ --------
478
+ >>> from maxframe.learn.preprocessing import scale
479
+ >>> X = [[-2, 1, 2], [-1, 0, 1]]
480
+ >>> scale(X, axis=0).execute() # scaling each column independently
481
+ array([[-1., 1., 1.],
482
+ [ 1., -1., -1.]])
483
+ >>> scale(X, axis=1).execute() # scaling each row independently
484
+ array([[-1.37..., 0.39..., 0.98...],
485
+ [-1.22..., 0. , 1.22...]])
486
+ """
487
+ if not isinstance(X, ENTITY_TYPE):
488
+ X = mt.tensor(X)
489
+
490
+ ndim = X.ndim
491
+ if ndim == 1:
492
+ X = X.reshape((X.shape[0], 1))
493
+ if axis == 1:
494
+ X = X.T
495
+
496
+ scaler = StandardScaler(with_mean=with_mean, with_std=with_std, copy=copy)
497
+ transformed = scaler.fit_transform(X)
498
+
499
+ if axis == 1:
500
+ transformed = transformed.T
501
+ if ndim == 1:
502
+ transformed = transformed.reshape(transformed.shape[0])
503
+ return transformed
@@ -0,0 +1,79 @@
1
+ # Copyright 1999-2025 Alibaba Group Holding Ltd.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ import numpy as np
16
+
17
+ from .... import tensor as mt
18
+ from ....tensor.core import TENSOR_TYPE
19
+
20
+
21
+ def _handle_zeros_in_scale(scale, copy=True, constant_mask=None):
22
+ """Set scales of near constant features to 1.
23
+
24
+ The goal is to avoid division by very small or zero values.
25
+
26
+ Near constant features are detected automatically by identifying
27
+ scales close to machine precision unless they are precomputed by
28
+ the caller and passed with the `constant_mask` kwarg.
29
+
30
+ Typically for standard scaling, the scales are the standard
31
+ deviation while near constant features are better detected on the
32
+ computed variances which are closer to machine precision by
33
+ construction.
34
+ """
35
+
36
+ # if we are fitting on 1D arrays, scale might be a scalar
37
+ if np.isscalar(scale): # pragma: no cover
38
+ if scale == 0.0:
39
+ scale = 1.0
40
+ return scale
41
+ elif hasattr(scale, "ndim") and scale.ndim == 0: # pragma: no cover
42
+ # scalar that is tensor
43
+ return mt.where(scale == 0.0, 1.0, scale)
44
+ elif isinstance(scale, (np.ndarray, TENSOR_TYPE)):
45
+ if copy:
46
+ # New array to avoid side-effects
47
+ scale = scale.copy()
48
+
49
+ if constant_mask is None:
50
+ constant_mask = scale < 10 * np.finfo(scale.dtype).eps
51
+ scale[constant_mask] = 1.0
52
+ return scale
53
+
54
+
55
+ def _get_real_sample_size(tileable):
56
+ if not np.isnan(tileable.shape[0]):
57
+ return mt.asarray(tileable.shape[0])
58
+
59
+ def get_real_shape(s):
60
+ return s.shape[0]
61
+
62
+ slc = [slice(None, None, None)] + [0] * (tileable.ndim - 1)
63
+ return tileable[slc].mf.apply_chunk(get_real_shape, shape=(np.nan,)).sum()
64
+
65
+
66
+ def _is_constant_feature(var, mean, n_samples):
67
+ """Detect if a feature is indistinguishable from a constant feature.
68
+
69
+ The detection is based on its computed variance and on the theoretical
70
+ error bounds of the '2 pass algorithm' for variance computation.
71
+
72
+ See "Algorithms for computing the sample variance: analysis and
73
+ recommendations", by Chan, Golub, and LeVeque.
74
+ """
75
+ # In scikit-learn, variance is always computed using float64 accumulators.
76
+ eps = np.finfo(np.float64).eps
77
+
78
+ upper_bound = n_samples * eps * var + (n_samples * mean * eps) ** 2
79
+ return var <= upper_bound
@@ -0,0 +1,16 @@
1
+ # Copyright 1999-2025 Alibaba Group Holding Ltd.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ from ._label_binarizer import LabelBinarizer, _label_binarize, label_binarize
16
+ from ._label_encoder import LabelEncoder