maxframe 2.0.0b2__cp37-cp37m-win32.whl → 2.3.0rc1__cp37-cp37m-win32.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of maxframe might be problematic. Click here for more details.

Files changed (443) hide show
  1. maxframe/__init__.py +1 -0
  2. maxframe/_utils.cp37-win32.pyd +0 -0
  3. maxframe/_utils.pyx +14 -1
  4. maxframe/codegen/core.py +9 -8
  5. maxframe/codegen/spe/core.py +1 -1
  6. maxframe/codegen/spe/dataframe/__init__.py +1 -0
  7. maxframe/codegen/spe/dataframe/accessors/base.py +18 -0
  8. maxframe/codegen/spe/dataframe/accessors/dict_.py +25 -130
  9. maxframe/codegen/spe/dataframe/accessors/list_.py +12 -48
  10. maxframe/codegen/spe/dataframe/accessors/struct_.py +28 -0
  11. maxframe/codegen/spe/dataframe/arithmetic.py +7 -2
  12. maxframe/codegen/spe/dataframe/groupby.py +88 -0
  13. maxframe/codegen/spe/dataframe/indexing.py +99 -4
  14. maxframe/codegen/spe/dataframe/merge.py +38 -1
  15. maxframe/codegen/spe/dataframe/misc.py +11 -33
  16. maxframe/codegen/spe/dataframe/reduction.py +32 -9
  17. maxframe/codegen/spe/dataframe/reshape.py +46 -0
  18. maxframe/codegen/spe/dataframe/sort.py +39 -18
  19. maxframe/codegen/spe/dataframe/tests/accessors/test_dict.py +9 -15
  20. maxframe/codegen/spe/dataframe/tests/accessors/test_list.py +4 -7
  21. maxframe/codegen/spe/dataframe/tests/accessors/test_struct.py +75 -0
  22. maxframe/codegen/spe/dataframe/tests/indexing/test_iloc.py +20 -1
  23. maxframe/codegen/spe/dataframe/tests/indexing/test_loc.py +35 -0
  24. maxframe/codegen/spe/dataframe/tests/misc/test_misc.py +0 -32
  25. maxframe/codegen/spe/dataframe/tests/test_groupby.py +81 -18
  26. maxframe/codegen/spe/dataframe/tests/test_merge.py +27 -1
  27. maxframe/codegen/spe/dataframe/tests/test_reduction.py +13 -0
  28. maxframe/codegen/spe/dataframe/tests/test_reshape.py +79 -0
  29. maxframe/codegen/spe/dataframe/tests/test_sort.py +20 -0
  30. maxframe/codegen/spe/dataframe/tseries.py +9 -0
  31. maxframe/codegen/spe/learn/contrib/lightgbm.py +4 -3
  32. maxframe/codegen/spe/learn/contrib/tests/test_xgboost.py +2 -1
  33. maxframe/codegen/spe/learn/metrics/__init__.py +1 -1
  34. maxframe/codegen/spe/learn/metrics/_ranking.py +76 -0
  35. maxframe/codegen/spe/learn/metrics/pairwise.py +51 -0
  36. maxframe/codegen/spe/learn/metrics/tests/test_pairwise.py +36 -0
  37. maxframe/codegen/spe/learn/metrics/tests/test_ranking.py +59 -0
  38. maxframe/codegen/spe/tensor/__init__.py +3 -0
  39. maxframe/codegen/spe/tensor/datasource.py +1 -0
  40. maxframe/codegen/spe/tensor/fft.py +74 -0
  41. maxframe/codegen/spe/tensor/linalg.py +29 -2
  42. maxframe/codegen/spe/tensor/misc.py +79 -25
  43. maxframe/codegen/spe/tensor/spatial.py +45 -0
  44. maxframe/codegen/spe/tensor/statistics.py +44 -0
  45. maxframe/codegen/spe/tensor/tests/test_fft.py +64 -0
  46. maxframe/codegen/spe/tensor/tests/test_linalg.py +15 -1
  47. maxframe/codegen/spe/tensor/tests/test_misc.py +52 -2
  48. maxframe/codegen/spe/tensor/tests/test_spatial.py +33 -0
  49. maxframe/codegen/spe/tensor/tests/test_statistics.py +15 -1
  50. maxframe/codegen/spe/tests/test_spe_codegen.py +6 -12
  51. maxframe/codegen/spe/utils.py +2 -0
  52. maxframe/config/config.py +73 -9
  53. maxframe/config/tests/test_validators.py +13 -1
  54. maxframe/config/validators.py +49 -0
  55. maxframe/conftest.py +54 -17
  56. maxframe/core/accessor.py +2 -2
  57. maxframe/core/base.py +2 -1
  58. maxframe/core/entity/core.py +5 -0
  59. maxframe/core/entity/tileables.py +3 -1
  60. maxframe/core/graph/core.cp37-win32.pyd +0 -0
  61. maxframe/core/graph/entity.py +8 -3
  62. maxframe/core/mode.py +6 -1
  63. maxframe/core/operator/base.py +9 -2
  64. maxframe/core/operator/core.py +10 -2
  65. maxframe/core/operator/utils.py +13 -0
  66. maxframe/dataframe/__init__.py +12 -5
  67. maxframe/dataframe/accessors/__init__.py +1 -1
  68. maxframe/dataframe/accessors/compat.py +45 -0
  69. maxframe/dataframe/accessors/datetime_/__init__.py +4 -1
  70. maxframe/dataframe/accessors/dict_/contains.py +7 -16
  71. maxframe/dataframe/accessors/dict_/core.py +48 -0
  72. maxframe/dataframe/accessors/dict_/getitem.py +17 -21
  73. maxframe/dataframe/accessors/dict_/length.py +7 -16
  74. maxframe/dataframe/accessors/dict_/remove.py +6 -18
  75. maxframe/dataframe/accessors/dict_/setitem.py +8 -18
  76. maxframe/dataframe/accessors/dict_/tests/test_dict_accessor.py +62 -22
  77. maxframe/dataframe/accessors/list_/__init__.py +2 -2
  78. maxframe/dataframe/accessors/list_/core.py +48 -0
  79. maxframe/dataframe/accessors/list_/getitem.py +12 -19
  80. maxframe/dataframe/accessors/list_/length.py +7 -16
  81. maxframe/dataframe/accessors/list_/tests/test_list_accessor.py +11 -9
  82. maxframe/dataframe/accessors/string_/__init__.py +4 -1
  83. maxframe/dataframe/accessors/struct_/__init__.py +37 -0
  84. maxframe/dataframe/accessors/struct_/accessor.py +39 -0
  85. maxframe/dataframe/accessors/struct_/core.py +43 -0
  86. maxframe/dataframe/accessors/struct_/dtypes.py +53 -0
  87. maxframe/dataframe/accessors/struct_/field.py +123 -0
  88. maxframe/dataframe/accessors/struct_/tests/__init__.py +13 -0
  89. maxframe/dataframe/accessors/struct_/tests/test_struct_accessor.py +91 -0
  90. maxframe/dataframe/arithmetic/__init__.py +18 -4
  91. maxframe/dataframe/arithmetic/between.py +106 -0
  92. maxframe/dataframe/arithmetic/dot.py +237 -0
  93. maxframe/dataframe/arithmetic/maximum.py +33 -0
  94. maxframe/dataframe/arithmetic/minimum.py +33 -0
  95. maxframe/dataframe/arithmetic/{around.py → round.py} +11 -7
  96. maxframe/dataframe/core.py +161 -224
  97. maxframe/dataframe/datasource/__init__.py +18 -0
  98. maxframe/dataframe/datasource/core.py +6 -0
  99. maxframe/dataframe/datasource/direct.py +57 -0
  100. maxframe/dataframe/datasource/from_dict.py +124 -0
  101. maxframe/dataframe/datasource/from_index.py +1 -1
  102. maxframe/dataframe/datasource/from_records.py +77 -0
  103. maxframe/dataframe/datasource/from_tensor.py +109 -41
  104. maxframe/dataframe/datasource/read_csv.py +21 -14
  105. maxframe/dataframe/datasource/read_odps_query.py +29 -6
  106. maxframe/dataframe/datasource/read_odps_table.py +32 -10
  107. maxframe/dataframe/datasource/read_parquet.py +38 -39
  108. maxframe/dataframe/datasource/tests/test_datasource.py +37 -0
  109. maxframe/dataframe/datastore/__init__.py +11 -1
  110. maxframe/dataframe/datastore/direct.py +268 -0
  111. maxframe/dataframe/datastore/to_csv.py +29 -41
  112. maxframe/dataframe/datastore/to_odps.py +36 -4
  113. maxframe/dataframe/extensions/__init__.py +20 -4
  114. maxframe/dataframe/extensions/apply_chunk.py +32 -6
  115. maxframe/dataframe/extensions/cartesian_chunk.py +153 -0
  116. maxframe/dataframe/extensions/collect_kv.py +126 -0
  117. maxframe/dataframe/extensions/extract_kv.py +177 -0
  118. maxframe/dataframe/extensions/flatjson.py +2 -1
  119. maxframe/dataframe/extensions/map_reduce.py +263 -0
  120. maxframe/dataframe/extensions/rebalance.py +62 -0
  121. maxframe/dataframe/extensions/tests/test_apply_chunk.py +9 -2
  122. maxframe/dataframe/extensions/tests/test_extensions.py +54 -0
  123. maxframe/dataframe/extensions/tests/test_map_reduce.py +135 -0
  124. maxframe/dataframe/groupby/__init__.py +17 -2
  125. maxframe/dataframe/groupby/aggregation.py +86 -49
  126. maxframe/dataframe/groupby/apply.py +1 -1
  127. maxframe/dataframe/groupby/apply_chunk.py +19 -5
  128. maxframe/dataframe/groupby/core.py +116 -16
  129. maxframe/dataframe/groupby/cum.py +4 -25
  130. maxframe/dataframe/groupby/expanding.py +264 -0
  131. maxframe/dataframe/groupby/fill.py +1 -1
  132. maxframe/dataframe/groupby/getitem.py +12 -5
  133. maxframe/dataframe/groupby/head.py +11 -1
  134. maxframe/dataframe/groupby/rank.py +136 -0
  135. maxframe/dataframe/groupby/rolling.py +206 -0
  136. maxframe/dataframe/groupby/shift.py +114 -0
  137. maxframe/dataframe/groupby/tests/test_groupby.py +0 -5
  138. maxframe/dataframe/indexing/__init__.py +22 -2
  139. maxframe/dataframe/indexing/droplevel.py +195 -0
  140. maxframe/dataframe/indexing/filter.py +169 -0
  141. maxframe/dataframe/indexing/get_level_values.py +76 -0
  142. maxframe/dataframe/indexing/iat.py +45 -0
  143. maxframe/dataframe/indexing/iloc.py +152 -12
  144. maxframe/dataframe/indexing/insert.py +46 -18
  145. maxframe/dataframe/indexing/loc.py +287 -7
  146. maxframe/dataframe/indexing/reindex.py +14 -5
  147. maxframe/dataframe/indexing/rename.py +6 -0
  148. maxframe/dataframe/indexing/rename_axis.py +2 -2
  149. maxframe/dataframe/indexing/reorder_levels.py +143 -0
  150. maxframe/dataframe/indexing/reset_index.py +33 -6
  151. maxframe/dataframe/indexing/sample.py +8 -0
  152. maxframe/dataframe/indexing/setitem.py +3 -3
  153. maxframe/dataframe/indexing/swaplevel.py +185 -0
  154. maxframe/dataframe/indexing/take.py +99 -0
  155. maxframe/dataframe/indexing/truncate.py +140 -0
  156. maxframe/dataframe/indexing/where.py +0 -11
  157. maxframe/dataframe/indexing/xs.py +148 -0
  158. maxframe/dataframe/merge/__init__.py +15 -1
  159. maxframe/dataframe/merge/append.py +97 -98
  160. maxframe/dataframe/merge/combine.py +244 -0
  161. maxframe/dataframe/merge/combine_first.py +120 -0
  162. maxframe/dataframe/merge/compare.py +387 -0
  163. maxframe/dataframe/merge/concat.py +183 -0
  164. maxframe/dataframe/merge/update.py +271 -0
  165. maxframe/dataframe/misc/__init__.py +28 -11
  166. maxframe/dataframe/misc/_duplicate.py +10 -4
  167. maxframe/dataframe/misc/apply.py +1 -1
  168. maxframe/dataframe/misc/check_unique.py +82 -0
  169. maxframe/dataframe/misc/clip.py +145 -0
  170. maxframe/dataframe/misc/describe.py +175 -9
  171. maxframe/dataframe/misc/drop.py +31 -0
  172. maxframe/dataframe/misc/drop_duplicates.py +2 -2
  173. maxframe/dataframe/misc/duplicated.py +2 -2
  174. maxframe/dataframe/misc/get_dummies.py +5 -1
  175. maxframe/dataframe/misc/infer_dtypes.py +251 -0
  176. maxframe/dataframe/misc/isin.py +2 -2
  177. maxframe/dataframe/misc/map.py +125 -18
  178. maxframe/dataframe/misc/repeat.py +159 -0
  179. maxframe/dataframe/misc/tests/test_misc.py +48 -3
  180. maxframe/dataframe/misc/to_numeric.py +3 -0
  181. maxframe/dataframe/misc/transform.py +12 -5
  182. maxframe/dataframe/misc/transpose.py +13 -1
  183. maxframe/dataframe/misc/valid_index.py +115 -0
  184. maxframe/dataframe/misc/value_counts.py +38 -4
  185. maxframe/dataframe/missing/checkna.py +14 -6
  186. maxframe/dataframe/missing/dropna.py +5 -0
  187. maxframe/dataframe/missing/fillna.py +1 -1
  188. maxframe/dataframe/missing/replace.py +7 -4
  189. maxframe/dataframe/reduction/__init__.py +35 -16
  190. maxframe/dataframe/reduction/aggregation.py +43 -14
  191. maxframe/dataframe/reduction/all.py +2 -2
  192. maxframe/dataframe/reduction/any.py +2 -2
  193. maxframe/dataframe/reduction/argmax.py +103 -0
  194. maxframe/dataframe/reduction/argmin.py +103 -0
  195. maxframe/dataframe/reduction/core.py +80 -24
  196. maxframe/dataframe/reduction/count.py +13 -9
  197. maxframe/dataframe/reduction/cov.py +166 -0
  198. maxframe/dataframe/reduction/cummax.py +2 -2
  199. maxframe/dataframe/reduction/cummin.py +2 -2
  200. maxframe/dataframe/reduction/cumprod.py +2 -2
  201. maxframe/dataframe/reduction/cumsum.py +2 -2
  202. maxframe/dataframe/reduction/custom_reduction.py +2 -2
  203. maxframe/dataframe/reduction/idxmax.py +185 -0
  204. maxframe/dataframe/reduction/idxmin.py +185 -0
  205. maxframe/dataframe/reduction/kurtosis.py +37 -30
  206. maxframe/dataframe/reduction/max.py +2 -2
  207. maxframe/dataframe/reduction/mean.py +9 -7
  208. maxframe/dataframe/reduction/median.py +2 -2
  209. maxframe/dataframe/reduction/min.py +2 -2
  210. maxframe/dataframe/reduction/mode.py +144 -0
  211. maxframe/dataframe/reduction/nunique.py +19 -11
  212. maxframe/dataframe/reduction/prod.py +18 -13
  213. maxframe/dataframe/reduction/reduction_size.py +2 -2
  214. maxframe/dataframe/reduction/sem.py +13 -9
  215. maxframe/dataframe/reduction/skew.py +31 -27
  216. maxframe/dataframe/reduction/str_concat.py +10 -7
  217. maxframe/dataframe/reduction/sum.py +18 -14
  218. maxframe/dataframe/reduction/tests/test_reduction.py +12 -0
  219. maxframe/dataframe/reduction/unique.py +20 -3
  220. maxframe/dataframe/reduction/var.py +16 -12
  221. maxframe/dataframe/reshape/__init__.py +38 -0
  222. maxframe/dataframe/{misc → reshape}/pivot.py +1 -0
  223. maxframe/dataframe/{misc → reshape}/pivot_table.py +1 -0
  224. maxframe/dataframe/reshape/unstack.py +114 -0
  225. maxframe/dataframe/sort/__init__.py +16 -1
  226. maxframe/dataframe/sort/argsort.py +68 -0
  227. maxframe/dataframe/sort/core.py +2 -1
  228. maxframe/dataframe/sort/nlargest.py +238 -0
  229. maxframe/dataframe/sort/nsmallest.py +228 -0
  230. maxframe/dataframe/sort/rank.py +147 -0
  231. maxframe/dataframe/statistics/__init__.py +3 -3
  232. maxframe/dataframe/statistics/corr.py +1 -0
  233. maxframe/dataframe/statistics/quantile.py +2 -2
  234. maxframe/dataframe/tests/test_typing.py +104 -0
  235. maxframe/dataframe/tests/test_utils.py +66 -2
  236. maxframe/dataframe/tseries/__init__.py +19 -0
  237. maxframe/dataframe/tseries/at_time.py +61 -0
  238. maxframe/dataframe/tseries/between_time.py +122 -0
  239. maxframe/dataframe/typing_.py +185 -0
  240. maxframe/dataframe/utils.py +125 -52
  241. maxframe/dataframe/window/aggregation.py +8 -4
  242. maxframe/dataframe/window/core.py +14 -1
  243. maxframe/dataframe/window/ewm.py +1 -3
  244. maxframe/dataframe/window/expanding.py +37 -35
  245. maxframe/dataframe/window/rolling.py +49 -39
  246. maxframe/dataframe/window/tests/test_expanding.py +1 -7
  247. maxframe/dataframe/window/tests/test_rolling.py +1 -1
  248. maxframe/env.py +7 -4
  249. maxframe/errors.py +2 -2
  250. maxframe/io/odpsio/schema.py +9 -3
  251. maxframe/io/odpsio/tableio.py +7 -2
  252. maxframe/io/odpsio/tests/test_schema.py +198 -83
  253. maxframe/learn/__init__.py +10 -2
  254. maxframe/learn/cluster/__init__.py +15 -0
  255. maxframe/learn/cluster/_kmeans.py +782 -0
  256. maxframe/learn/contrib/llm/core.py +18 -7
  257. maxframe/learn/contrib/llm/deploy/__init__.py +13 -0
  258. maxframe/learn/contrib/llm/deploy/config.py +221 -0
  259. maxframe/learn/contrib/llm/deploy/core.py +247 -0
  260. maxframe/learn/contrib/llm/deploy/framework.py +35 -0
  261. maxframe/learn/contrib/llm/deploy/loader.py +360 -0
  262. maxframe/learn/contrib/llm/deploy/tests/__init__.py +13 -0
  263. maxframe/learn/contrib/llm/deploy/tests/test_register_models.py +359 -0
  264. maxframe/learn/contrib/llm/models/__init__.py +1 -0
  265. maxframe/learn/contrib/llm/models/dashscope.py +12 -6
  266. maxframe/learn/contrib/llm/models/managed.py +76 -11
  267. maxframe/learn/contrib/llm/models/openai.py +72 -0
  268. maxframe/learn/contrib/llm/tests/__init__.py +13 -0
  269. maxframe/learn/contrib/llm/tests/test_core.py +34 -0
  270. maxframe/learn/contrib/llm/tests/test_openai.py +187 -0
  271. maxframe/learn/contrib/llm/tests/test_text_gen.py +155 -0
  272. maxframe/learn/contrib/llm/text.py +348 -42
  273. maxframe/learn/contrib/models.py +4 -1
  274. maxframe/learn/contrib/xgboost/classifier.py +2 -0
  275. maxframe/learn/contrib/xgboost/core.py +113 -4
  276. maxframe/learn/contrib/xgboost/predict.py +4 -2
  277. maxframe/learn/contrib/xgboost/regressor.py +5 -0
  278. maxframe/learn/contrib/xgboost/train.py +7 -2
  279. maxframe/learn/core.py +66 -0
  280. maxframe/learn/linear_model/_base.py +58 -1
  281. maxframe/learn/linear_model/_lin_reg.py +1 -1
  282. maxframe/learn/metrics/__init__.py +6 -0
  283. maxframe/learn/metrics/_classification.py +145 -0
  284. maxframe/learn/metrics/_ranking.py +477 -0
  285. maxframe/learn/metrics/_scorer.py +60 -0
  286. maxframe/learn/metrics/pairwise/__init__.py +21 -0
  287. maxframe/learn/metrics/pairwise/core.py +77 -0
  288. maxframe/learn/metrics/pairwise/cosine.py +115 -0
  289. maxframe/learn/metrics/pairwise/euclidean.py +176 -0
  290. maxframe/learn/metrics/pairwise/haversine.py +96 -0
  291. maxframe/learn/metrics/pairwise/manhattan.py +80 -0
  292. maxframe/learn/metrics/pairwise/pairwise.py +127 -0
  293. maxframe/learn/metrics/pairwise/pairwise_distances_topk.py +121 -0
  294. maxframe/learn/metrics/pairwise/rbf_kernel.py +51 -0
  295. maxframe/learn/metrics/tests/__init__.py +13 -0
  296. maxframe/learn/metrics/tests/test_scorer.py +26 -0
  297. maxframe/learn/preprocessing/_data/min_max_scaler.py +34 -23
  298. maxframe/learn/preprocessing/_data/standard_scaler.py +34 -25
  299. maxframe/learn/utils/__init__.py +2 -1
  300. maxframe/learn/utils/checks.py +1 -2
  301. maxframe/learn/utils/core.py +59 -0
  302. maxframe/learn/utils/extmath.py +79 -9
  303. maxframe/learn/utils/odpsio.py +262 -0
  304. maxframe/learn/utils/validation.py +2 -2
  305. maxframe/lib/compat.py +40 -0
  306. maxframe/lib/dtypes_extension/__init__.py +16 -1
  307. maxframe/lib/dtypes_extension/_fake_arrow_dtype.py +604 -0
  308. maxframe/lib/dtypes_extension/blob.py +304 -0
  309. maxframe/lib/dtypes_extension/dtypes.py +40 -0
  310. maxframe/lib/dtypes_extension/tests/test_blob.py +88 -0
  311. maxframe/lib/dtypes_extension/tests/test_dtypes.py +16 -1
  312. maxframe/lib/dtypes_extension/tests/test_fake_arrow_dtype.py +75 -0
  313. maxframe/lib/filesystem/_oss_lib/common.py +124 -50
  314. maxframe/lib/filesystem/_oss_lib/glob.py +1 -1
  315. maxframe/lib/filesystem/_oss_lib/handle.py +21 -25
  316. maxframe/lib/filesystem/base.py +1 -1
  317. maxframe/lib/filesystem/core.py +1 -1
  318. maxframe/lib/filesystem/oss.py +115 -46
  319. maxframe/lib/filesystem/tests/test_oss.py +74 -36
  320. maxframe/lib/mmh3.cp37-win32.pyd +0 -0
  321. maxframe/lib/wrapped_pickle.py +10 -0
  322. maxframe/opcodes.py +41 -15
  323. maxframe/protocol.py +12 -0
  324. maxframe/remote/core.py +4 -0
  325. maxframe/serialization/__init__.py +11 -2
  326. maxframe/serialization/arrow.py +38 -13
  327. maxframe/serialization/blob.py +32 -0
  328. maxframe/serialization/core.cp37-win32.pyd +0 -0
  329. maxframe/serialization/core.pyx +39 -1
  330. maxframe/serialization/exception.py +2 -4
  331. maxframe/serialization/numpy.py +11 -0
  332. maxframe/serialization/pandas.py +46 -9
  333. maxframe/serialization/serializables/core.py +2 -2
  334. maxframe/serialization/tests/test_serial.py +31 -4
  335. maxframe/tensor/__init__.py +38 -8
  336. maxframe/tensor/arithmetic/__init__.py +19 -10
  337. maxframe/tensor/arithmetic/core.py +2 -2
  338. maxframe/tensor/arithmetic/iscomplexobj.py +53 -0
  339. maxframe/tensor/arithmetic/tests/test_arithmetic.py +6 -9
  340. maxframe/tensor/core.py +6 -2
  341. maxframe/tensor/datasource/tests/test_datasource.py +2 -1
  342. maxframe/tensor/extensions/__init__.py +2 -0
  343. maxframe/tensor/extensions/apply_chunk.py +3 -3
  344. maxframe/tensor/extensions/rebalance.py +65 -0
  345. maxframe/tensor/fft/__init__.py +32 -0
  346. maxframe/tensor/fft/core.py +168 -0
  347. maxframe/tensor/fft/fft.py +112 -0
  348. maxframe/tensor/fft/fft2.py +118 -0
  349. maxframe/tensor/fft/fftfreq.py +80 -0
  350. maxframe/tensor/fft/fftn.py +123 -0
  351. maxframe/tensor/fft/fftshift.py +79 -0
  352. maxframe/tensor/fft/hfft.py +112 -0
  353. maxframe/tensor/fft/ifft.py +114 -0
  354. maxframe/tensor/fft/ifft2.py +115 -0
  355. maxframe/tensor/fft/ifftn.py +123 -0
  356. maxframe/tensor/fft/ifftshift.py +73 -0
  357. maxframe/tensor/fft/ihfft.py +93 -0
  358. maxframe/tensor/fft/irfft.py +118 -0
  359. maxframe/tensor/fft/irfft2.py +62 -0
  360. maxframe/tensor/fft/irfftn.py +114 -0
  361. maxframe/tensor/fft/rfft.py +116 -0
  362. maxframe/tensor/fft/rfft2.py +63 -0
  363. maxframe/tensor/fft/rfftfreq.py +87 -0
  364. maxframe/tensor/fft/rfftn.py +113 -0
  365. maxframe/tensor/indexing/fill_diagonal.py +1 -7
  366. maxframe/tensor/linalg/__init__.py +7 -0
  367. maxframe/tensor/linalg/_einsumfunc.py +1025 -0
  368. maxframe/tensor/linalg/cholesky.py +117 -0
  369. maxframe/tensor/linalg/einsum.py +339 -0
  370. maxframe/tensor/linalg/lstsq.py +100 -0
  371. maxframe/tensor/linalg/matrix_norm.py +75 -0
  372. maxframe/tensor/linalg/norm.py +249 -0
  373. maxframe/tensor/linalg/solve.py +72 -0
  374. maxframe/tensor/linalg/solve_triangular.py +2 -2
  375. maxframe/tensor/linalg/vector_norm.py +113 -0
  376. maxframe/tensor/misc/__init__.py +24 -1
  377. maxframe/tensor/misc/argwhere.py +72 -0
  378. maxframe/tensor/misc/array_split.py +46 -0
  379. maxframe/tensor/misc/broadcast_arrays.py +57 -0
  380. maxframe/tensor/misc/copyto.py +130 -0
  381. maxframe/tensor/misc/delete.py +104 -0
  382. maxframe/tensor/misc/dsplit.py +68 -0
  383. maxframe/tensor/misc/ediff1d.py +74 -0
  384. maxframe/tensor/misc/expand_dims.py +85 -0
  385. maxframe/tensor/misc/flip.py +90 -0
  386. maxframe/tensor/misc/fliplr.py +64 -0
  387. maxframe/tensor/misc/flipud.py +68 -0
  388. maxframe/tensor/misc/hsplit.py +85 -0
  389. maxframe/tensor/misc/insert.py +139 -0
  390. maxframe/tensor/misc/moveaxis.py +83 -0
  391. maxframe/tensor/misc/result_type.py +88 -0
  392. maxframe/tensor/misc/roll.py +124 -0
  393. maxframe/tensor/misc/rollaxis.py +77 -0
  394. maxframe/tensor/misc/shape.py +89 -0
  395. maxframe/tensor/misc/split.py +190 -0
  396. maxframe/tensor/misc/tile.py +109 -0
  397. maxframe/tensor/misc/vsplit.py +74 -0
  398. maxframe/tensor/reduction/array_equal.py +2 -1
  399. maxframe/tensor/sort/__init__.py +2 -0
  400. maxframe/tensor/sort/argpartition.py +98 -0
  401. maxframe/tensor/sort/partition.py +228 -0
  402. maxframe/tensor/spatial/__init__.py +15 -0
  403. maxframe/tensor/spatial/distance/__init__.py +17 -0
  404. maxframe/tensor/spatial/distance/cdist.py +421 -0
  405. maxframe/tensor/spatial/distance/pdist.py +398 -0
  406. maxframe/tensor/spatial/distance/squareform.py +153 -0
  407. maxframe/tensor/special/__init__.py +159 -21
  408. maxframe/tensor/special/airy.py +55 -0
  409. maxframe/tensor/special/bessel.py +199 -0
  410. maxframe/tensor/special/core.py +65 -4
  411. maxframe/tensor/special/ellip_func_integrals.py +155 -0
  412. maxframe/tensor/special/ellip_harm.py +55 -0
  413. maxframe/tensor/special/err_fresnel.py +223 -0
  414. maxframe/tensor/special/gamma_funcs.py +303 -0
  415. maxframe/tensor/special/hypergeometric_funcs.py +69 -0
  416. maxframe/tensor/special/info_theory.py +189 -0
  417. maxframe/tensor/special/misc.py +21 -0
  418. maxframe/tensor/statistics/__init__.py +6 -0
  419. maxframe/tensor/statistics/corrcoef.py +77 -0
  420. maxframe/tensor/statistics/cov.py +222 -0
  421. maxframe/tensor/statistics/digitize.py +126 -0
  422. maxframe/tensor/statistics/histogram.py +520 -0
  423. maxframe/tensor/statistics/median.py +85 -0
  424. maxframe/tensor/statistics/ptp.py +89 -0
  425. maxframe/tensor/utils.py +3 -3
  426. maxframe/tests/test_udf.py +61 -0
  427. maxframe/tests/test_utils.py +51 -6
  428. maxframe/tests/utils.py +0 -2
  429. maxframe/typing_.py +2 -0
  430. maxframe/udf.py +130 -9
  431. maxframe/utils.py +254 -27
  432. {maxframe-2.0.0b2.dist-info → maxframe-2.3.0rc1.dist-info}/METADATA +3 -3
  433. {maxframe-2.0.0b2.dist-info → maxframe-2.3.0rc1.dist-info}/RECORD +442 -264
  434. maxframe_client/fetcher.py +35 -4
  435. maxframe_client/session/odps.py +7 -2
  436. maxframe_client/session/task.py +8 -1
  437. maxframe_client/tests/test_fetcher.py +76 -3
  438. maxframe_client/tests/test_session.py +28 -1
  439. maxframe/dataframe/arrays.py +0 -864
  440. /maxframe/dataframe/{misc → reshape}/melt.py +0 -0
  441. /maxframe/dataframe/{misc → reshape}/stack.py +0 -0
  442. {maxframe-2.0.0b2.dist-info → maxframe-2.3.0rc1.dist-info}/WHEEL +0 -0
  443. {maxframe-2.0.0b2.dist-info → maxframe-2.3.0rc1.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,782 @@
1
+ # Copyright 1999-2025 Alibaba Group Holding Ltd.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ import warnings
16
+ from typing import List
17
+
18
+ import numpy as np
19
+
20
+ from ...tensor.core import TENSOR_TYPE
21
+
22
+ try:
23
+ from sklearn.exceptions import ConvergenceWarning
24
+ except ImportError:
25
+ ConvergenceWarning = RuntimeWarning
26
+
27
+ from ... import opcodes
28
+ from ... import tensor as mt
29
+ from ...core import ENTITY_TYPE, EntityData, ExecutableTuple, OutputType
30
+ from ...core.operator import Operator
31
+ from ...serialization.serializables import (
32
+ AnyField,
33
+ BoolField,
34
+ Float64Field,
35
+ Int32Field,
36
+ KeyField,
37
+ StringField,
38
+ )
39
+ from ...tensor.random import RandomStateField
40
+ from ..core import BaseEstimator, ClusterMixin, LearnOperatorMixin, TransformerMixin
41
+ from ..metrics.pairwise import euclidean_distances
42
+ from ..utils.validation import _check_sample_weight, check_array, check_is_fitted
43
+
44
+
45
+ def _validate_center_shape(X, n_centers, centers):
46
+ """Check if centers is compatible with X and n_centers"""
47
+ if len(centers) != n_centers:
48
+ raise ValueError(
49
+ "The shape of the initial centers (%s) "
50
+ "does not match the number of clusters %i" % (centers.shape, n_centers)
51
+ )
52
+ if not np.isnan(X.shape[1]) and centers.shape[1] != X.shape[1]:
53
+ raise ValueError(
54
+ "The number of features of the initial centers %s "
55
+ "does not match the number of features of the data %s."
56
+ % (centers.shape[1], X.shape[1])
57
+ )
58
+
59
+
60
+ class KMeansFitOp(LearnOperatorMixin, Operator):
61
+ _op_type_ = opcodes.KMEANS_FIT
62
+
63
+ X = KeyField("X")
64
+ sample_weight = KeyField("sample_weight", default=None)
65
+ n_clusters = Int32Field("n_clusters", default=None)
66
+ init = AnyField("init", default=None)
67
+ n_init = Int32Field("n_init", default=None)
68
+ max_iter = Int32Field("max_iter", default=None)
69
+ tol = Float64Field("tol", default=None)
70
+ verbose = Int32Field("verbose", default=None)
71
+ random_state = RandomStateField("random_state", default=None)
72
+ copy_x = BoolField("copy_x", default=None)
73
+ algorithm = StringField("algorithm", default=None)
74
+ oversampling_factor = Int32Field("oversampling_factor", default=None)
75
+ init_iter = Int32Field("init_iter", default=None)
76
+
77
+ @property
78
+ def output_limit(self) -> int:
79
+ return 4
80
+
81
+ @classmethod
82
+ def _set_inputs(cls, op: "KMeansFitOp", inputs: List[EntityData]):
83
+ super()._set_inputs(op, inputs)
84
+ input_iter = iter(op._inputs)
85
+ op.X = next(input_iter)
86
+ if isinstance(op.sample_weight, ENTITY_TYPE):
87
+ op.sample_weight = next(input_iter)
88
+ if isinstance(op.init, ENTITY_TYPE):
89
+ op.init = next(input_iter)
90
+
91
+ def __call__(self, X, sample_weight=None, init=None):
92
+ self.X = X
93
+ self.sample_weight = sample_weight
94
+
95
+ inputs = [X]
96
+ if isinstance(sample_weight, ENTITY_TYPE):
97
+ inputs.append(sample_weight)
98
+ if isinstance(init, ENTITY_TYPE):
99
+ inputs.append(init)
100
+
101
+ self._output_types = [OutputType.tensor] * 2 + [OutputType.scalar] * 2
102
+ kws = [
103
+ {
104
+ "dtype": np.dtype(float),
105
+ "shape": (np.nan, X.shape[1]),
106
+ }, # cluster_centers
107
+ {"dtype": np.dtype(int), "shape": (X.shape[0],)}, # labels
108
+ {"dtype": np.dtype(float), "shape": ()}, # inertia
109
+ {"dtype": np.dtype(int), "shape": ()}, # n_iter
110
+ ]
111
+ return self.new_tileables(inputs, kws=kws)
112
+
113
+ def _check_params(self):
114
+ # n_init
115
+ if self.n_init <= 0:
116
+ raise ValueError(f"n_init should be > 0, got {self.n_init} instead.")
117
+
118
+ # max_iter
119
+ if self.max_iter <= 0:
120
+ raise ValueError(f"max_iter should be > 0, got {self.max_iter} instead.")
121
+
122
+ # n_clusters
123
+ if not np.isnan(self.X.shape[0]) and self.X.shape[0] < self.n_clusters:
124
+ raise ValueError(
125
+ f"n_samples={self.X.shape[0]} should be >= n_clusters={self.n_clusters}."
126
+ )
127
+
128
+ # algorithm
129
+ if self.algorithm not in ("auto", "full", "elkan"):
130
+ raise ValueError(
131
+ f"Algorithm must be 'auto', 'full' or 'elkan', "
132
+ f"got {self.algorithm} instead."
133
+ )
134
+
135
+ if self.algorithm == "auto":
136
+ # note:
137
+ # Different from scikit-learn,
138
+ # for now, full seems more efficient when data is large,
139
+ # elkan needs to be tuned more
140
+ # old: algorithm = "full" if self.n_clusters == 1 else "elkan"
141
+ self.algorithm = "full"
142
+ if self.algorithm == "elkan" and self.n_clusters == 1:
143
+ warnings.warn(
144
+ "algorithm='elkan' doesn't make sense for a single "
145
+ "cluster. Using 'full' instead.",
146
+ RuntimeWarning,
147
+ )
148
+ self.algorithm = "full"
149
+
150
+ # init
151
+ # fixme remove when callable init supported
152
+ if callable(self.init):
153
+ raise NotImplementedError("Callable init param not implemented by now")
154
+
155
+ if not (
156
+ hasattr(self.init, "__array__")
157
+ or isinstance(self.init, TENSOR_TYPE)
158
+ or callable(self.init)
159
+ or (
160
+ isinstance(self.init, str)
161
+ and self.init in ["k-means++", "k-means||", "random"]
162
+ )
163
+ ):
164
+ raise ValueError(
165
+ f"init should be either 'k-means++', 'k-mean||', 'random', "
166
+ f"a tensor, a ndarray or a "
167
+ f"callable, got '{self.init}' instead."
168
+ )
169
+
170
+ if hasattr(self.init, "__array__") and self.n_init != 1:
171
+ warnings.warn(
172
+ f"Explicit initial center position passed: performing only"
173
+ f" one init in {self.__class__.__name__} instead of "
174
+ f"n_init={self.n_init}.",
175
+ RuntimeWarning,
176
+ stacklevel=2,
177
+ )
178
+ self.n_init = 1
179
+
180
+
181
+ class KMeansPredictOp(LearnOperatorMixin, Operator):
182
+ _op_type_ = opcodes.KMEANS_PREDICT
183
+
184
+ cluster_centers = KeyField("cluster_centers")
185
+ X = KeyField("X")
186
+ sample_weight = KeyField("sample_weight", default=None)
187
+
188
+ @property
189
+ def output_limit(self) -> int:
190
+ return 2
191
+
192
+ @classmethod
193
+ def _set_inputs(cls, op: "KMeansPredictOp", inputs: List[EntityData]):
194
+ super()._set_inputs(op, inputs)
195
+ op.cluster_centers = op.inputs[0]
196
+ op.X = op.inputs[1]
197
+ if op.sample_weight is not None:
198
+ op.sample_weight = op.inputs[2]
199
+
200
+ def __call__(self, cluster_centers, X, sample_weight=None):
201
+ self.cluster_centers = cluster_centers
202
+ self.X = X
203
+ self.sample_weight = sample_weight
204
+
205
+ inputs = [cluster_centers, X]
206
+ if sample_weight is not None:
207
+ inputs.append(sample_weight)
208
+
209
+ self._output_types = [OutputType.tensor, OutputType.scalar]
210
+ kws = [
211
+ {"dtype": np.dtype(int), "shape": (X.shape[0],)},
212
+ {"dtype": np.dtype(float), "shape": ()},
213
+ ]
214
+ return self.new_tileables(inputs, kws=kws)
215
+
216
+
217
+ class KMeans(TransformerMixin, ClusterMixin, BaseEstimator):
218
+ """K-Means clustering.
219
+
220
+ Read more in the :ref:`User Guide <k_means>`.
221
+
222
+ Parameters
223
+ ----------
224
+
225
+ n_clusters : int, default=8
226
+ The number of clusters to form as well as the number of
227
+ centroids to generate.
228
+
229
+ init : {'k-means++', 'k-means||', 'random'} or tensor of shape \
230
+ (n_clusters, n_features), default='k-means||'
231
+ Method for initialization, defaults to 'k-means||':
232
+
233
+ 'k-means++' : selects initial cluster centers for k-mean
234
+ clustering in a smart way to speed up convergence. See section
235
+ Notes in k_init for more details.
236
+
237
+ 'k-means||': scalable k-means++.
238
+
239
+ 'random': choose k observations (rows) at random from data for
240
+ the initial centroids.
241
+
242
+ If a tensor is passed, it should be of shape (n_clusters, n_features)
243
+ and gives the initial centers.
244
+
245
+ n_init : int, default=1
246
+ Number of time the k-means algorithm will be run with different
247
+ centroid seeds. The final results will be the best output of
248
+ n_init consecutive runs in terms of inertia.
249
+
250
+ max_iter : int, default=300
251
+ Maximum number of iterations of the k-means algorithm for a
252
+ single run.
253
+
254
+ tol : float, default=1e-4
255
+ Relative tolerance with regards to inertia to declare convergence.
256
+
257
+ verbose : int, default=0
258
+ Verbosity mode.
259
+
260
+ random_state : int, RandomState instance, default=None
261
+ Determines random number generation for centroid initialization. Use
262
+ an int to make the randomness deterministic.
263
+ See :term:`Glossary <random_state>`.
264
+
265
+ copy_x : bool, default=True
266
+ When pre-computing distances it is more numerically accurate to center
267
+ the data first. If copy_x is True (default), then the original data is
268
+ not modified, ensuring X is C-contiguous. If False, the original data
269
+ is modified, and put back before the function returns, but small
270
+ numerical differences may be introduced by subtracting and then adding
271
+ the data mean, in this case it will also not ensure that data is
272
+ C-contiguous which may cause a significant slowdown.
273
+
274
+ algorithm : {"auto", "full", "elkan"}, default="auto"
275
+ K-means algorithm to use. The classical EM-style algorithm is "full".
276
+ The "elkan" variation is more efficient by using the triangle
277
+ inequality, but currently doesn't support sparse data. "auto" chooses
278
+ "elkan" for dense data and "full" for sparse data.
279
+
280
+ oversampling_factor: int, default=2
281
+ Only work for kmeans||, used in each iteration in kmeans||.
282
+
283
+ init_iter: int, default=5
284
+ Only work for kmeans||, indicates how may iterations required.
285
+
286
+ Attributes
287
+ ----------
288
+ cluster_centers_ : tensor of shape (n_clusters, n_features)
289
+ Coordinates of cluster centers. If the algorithm stops before fully
290
+ converging (see ``tol`` and ``max_iter``), these will not be
291
+ consistent with ``labels_``.
292
+
293
+ labels_ : tensor of shape (n_samples,)
294
+ Labels of each point
295
+
296
+ inertia_ : float
297
+ Sum of squared distances of samples to their closest cluster center.
298
+
299
+ n_iter_ : int
300
+ Number of iterations run.
301
+
302
+ See Also
303
+ --------
304
+
305
+ MiniBatchKMeans
306
+ Alternative online implementation that does incremental updates
307
+ of the centers positions using mini-batches.
308
+ For large scale learning (say n_samples > 10k) MiniBatchKMeans is
309
+ probably much faster than the default batch implementation.
310
+
311
+ Notes
312
+ -----
313
+ The k-means problem is solved using either Lloyd's or Elkan's algorithm.
314
+
315
+ The average complexity is given by O(k n T), were n is the number of
316
+ samples and T is the number of iteration.
317
+
318
+ The worst case complexity is given by O(n^(k+2/p)) with
319
+ n = n_samples, p = n_features. (D. Arthur and S. Vassilvitskii,
320
+ 'How slow is the k-means method?' SoCG2006)
321
+
322
+ In practice, the k-means algorithm is very fast (one of the fastest
323
+ clustering algorithms available), but it falls in local minima. That's why
324
+ it can be useful to restart it several times.
325
+
326
+ If the algorithm stops before fully converging (because of ``tol`` or
327
+ ``max_iter``), ``labels_`` and ``cluster_centers_`` will not be consistent,
328
+ i.e. the ``cluster_centers_`` will not be the means of the points in each
329
+ cluster. Also, the estimator will reassign ``labels_`` after the last
330
+ iteration to make ``labels_`` consistent with ``predict`` on the training
331
+ set.
332
+
333
+ Examples
334
+ --------
335
+
336
+ >>> from maxframe.learn.cluster import KMeans
337
+ >>> import maxframe.tensor as mt
338
+ >>> X = mt.array([[1, 2], [1, 4], [1, 0],
339
+ ... [10, 2], [10, 4], [10, 0]])
340
+ >>> kmeans = KMeans(n_clusters=2, random_state=0, init='k-means++').fit(X).execute()
341
+ >>> kmeans.labels_
342
+ array([1, 1, 1, 0, 0, 0], dtype=int32)
343
+ >>> kmeans.predict([[0, 0], [12, 3]]).execute()
344
+ array([1, 0], dtype=int32)
345
+ >>> kmeans.cluster_centers_
346
+ array([[10., 2.],
347
+ [ 1., 2.]])
348
+ """
349
+
350
+ def __init__(
351
+ self,
352
+ n_clusters=8,
353
+ init="k-means||",
354
+ n_init=1,
355
+ max_iter=300,
356
+ tol=1e-4,
357
+ verbose=0,
358
+ random_state=None,
359
+ copy_x=True,
360
+ algorithm="auto",
361
+ oversampling_factor=2,
362
+ init_iter=5,
363
+ ):
364
+ self.n_clusters = n_clusters
365
+ self.init = init
366
+ self.max_iter = max_iter
367
+ self.tol = tol
368
+ self.n_init = n_init
369
+ self.verbose = verbose
370
+ self.random_state = (
371
+ random_state
372
+ if isinstance(random_state, np.random.RandomState)
373
+ else np.random.RandomState(random_state)
374
+ )
375
+ self.copy_x = copy_x
376
+ self.algorithm = algorithm
377
+ self.oversampling_factor = oversampling_factor
378
+ self.init_iter = init_iter
379
+
380
+ def _check_test_data(self, X):
381
+ X = check_array(
382
+ X,
383
+ accept_sparse=True,
384
+ dtype=[np.float64, np.float32],
385
+ order="C",
386
+ accept_large_sparse=False,
387
+ )
388
+ n_samples, n_features = X.shape
389
+ expected_n_features = self.cluster_centers_.shape[1]
390
+ if not n_features == expected_n_features: # pragma: no cover
391
+ raise ValueError(
392
+ f"Incorrect number of features. Got {n_features} features, "
393
+ f"expected {expected_n_features}"
394
+ )
395
+
396
+ return X
397
+
398
+ def fit(
399
+ self,
400
+ X,
401
+ y=None,
402
+ sample_weight=None,
403
+ execute=False,
404
+ session=None,
405
+ run_kwargs=None,
406
+ ):
407
+ """Compute k-means clustering.
408
+
409
+ Parameters
410
+ ----------
411
+ X : {array-like, sparse matrix} of shape (n_samples, n_features)
412
+ Training instances to cluster. It must be noted that the data
413
+ will be converted to C ordering, which will cause a memory
414
+ copy if the given data is not C-contiguous.
415
+ If a sparse matrix is passed, a copy will be made if it's not in
416
+ CSR format.
417
+
418
+ y : Ignored
419
+ Not used, present here for API consistency by convention.
420
+
421
+ sample_weight : array-like of shape (n_samples,), default=None
422
+ The weights for each observation in X. If None, all observations
423
+ are assigned equal weight.
424
+
425
+ Returns
426
+ -------
427
+ self
428
+ Fitted estimator.
429
+ """
430
+ expect_chunk_size_on_columns = mt.tensor(X).shape[1]
431
+ if not np.isnan(expect_chunk_size_on_columns):
432
+ X = mt.tensor(X, chunk_size={1: expect_chunk_size_on_columns})
433
+
434
+ X = self._validate_data(
435
+ X,
436
+ accept_sparse=True,
437
+ dtype=[np.float64, np.float32],
438
+ order="C",
439
+ copy=self.copy_x,
440
+ accept_large_sparse=False,
441
+ )
442
+ if sample_weight is not None:
443
+ sample_weight = _check_sample_weight(sample_weight, X, dtype=X.dtype)
444
+
445
+ op = KMeansFitOp(
446
+ X=X,
447
+ init=self.init,
448
+ sample_weight=sample_weight,
449
+ n_clusters=self.n_clusters,
450
+ max_iter=self.max_iter,
451
+ tol=self.tol,
452
+ n_init=self.n_init,
453
+ verbose=self.verbose,
454
+ random_state=self.random_state,
455
+ copy_x=self.copy_x,
456
+ algorithm=self.algorithm,
457
+ oversampling_factor=self.oversampling_factor,
458
+ init_iter=self.init_iter,
459
+ )
460
+
461
+ # check params beforehand to raise errors early
462
+ op._check_params()
463
+ self.algorithm = op.algorithm
464
+ self.n_init = op.n_init
465
+
466
+ # Validate init array
467
+ init = self.init
468
+ if hasattr(init, "__array__"):
469
+ init = check_array(init, dtype=X.dtype.type, copy=True, order="C")
470
+ _validate_center_shape(X, self.n_clusters, init)
471
+
472
+ [
473
+ self.cluster_centers_,
474
+ self.labels_,
475
+ self.inertia_,
476
+ self.n_iter_,
477
+ ] = op(X, sample_weight=sample_weight, init=init)
478
+ if execute:
479
+ self.execute(session=session, run_kwargs=run_kwargs)
480
+ return self
481
+
482
+ def fit_predict(
483
+ self,
484
+ X,
485
+ y=None,
486
+ execute=False,
487
+ sample_weight=None,
488
+ session=None,
489
+ run_kwargs=None,
490
+ ):
491
+ """Compute cluster centers and predict cluster index for each sample.
492
+
493
+ Convenience method; equivalent to calling fit(X) followed by
494
+ predict(X).
495
+
496
+ Parameters
497
+ ----------
498
+ X : {array-like, sparse matrix} of shape (n_samples, n_features)
499
+ New data to transform.
500
+
501
+ y : Ignored
502
+ Not used, present here for API consistency by convention.
503
+
504
+ sample_weight : array-like of shape (n_samples,), default=None
505
+ The weights for each observation in X. If None, all observations
506
+ are assigned equal weight.
507
+
508
+ Returns
509
+ -------
510
+ labels : ndarray of shape (n_samples,)
511
+ Index of the cluster each sample belongs to.
512
+ """
513
+ return self.fit(
514
+ X,
515
+ execute=execute,
516
+ sample_weight=sample_weight,
517
+ session=session,
518
+ run_kwargs=run_kwargs,
519
+ ).labels_
520
+
521
+ def fit_transform(
522
+ self, X, y=None, sample_weight=None, session=None, run_kwargs=None
523
+ ):
524
+ """Compute clustering and transform X to cluster-distance space.
525
+
526
+ Equivalent to fit(X).transform(X), but more efficiently implemented.
527
+
528
+ Parameters
529
+ ----------
530
+ X : {array-like, sparse matrix} of shape (n_samples, n_features)
531
+ New data to transform.
532
+
533
+ y : Ignored
534
+ Not used, present here for API consistency by convention.
535
+
536
+ sample_weight : array-like of shape (n_samples,), default=None
537
+ The weights for each observation in X. If None, all observations
538
+ are assigned equal weight.
539
+
540
+ Returns
541
+ -------
542
+ X_new : array of shape (n_samples, n_clusters)
543
+ X transformed in the new space.
544
+ """
545
+ # Currently, this just skips a copy of the data if it is not in
546
+ # np.array or CSR format already.
547
+ # XXX This skips _check_test_data, which may change the dtype;
548
+ # we should refactor the input validation.
549
+ self.fit(X, sample_weight=sample_weight)
550
+ return self._transform(X, session=session, run_kwargs=run_kwargs)
551
+
552
+ def transform(self, X, session=None, run_kwargs=None):
553
+ """Transform X to a cluster-distance space.
554
+
555
+ In the new space, each dimension is the distance to the cluster
556
+ centers. Note that even if X is sparse, the array returned by
557
+ `transform` will typically be dense.
558
+
559
+ Parameters
560
+ ----------
561
+ X : {array-like, sparse matrix} of shape (n_samples, n_features)
562
+ New data to transform.
563
+
564
+ Returns
565
+ -------
566
+ X_new : tensor of shape (n_samples, n_clusters)
567
+ X transformed in the new space.
568
+ """
569
+ check_is_fitted(self)
570
+
571
+ X = self._check_test_data(X)
572
+ return self._transform(X, session=session, run_kwargs=run_kwargs)
573
+
574
+ def _transform(self, X, session=None, run_kwargs=None):
575
+ """guts of transform method; no input validation"""
576
+ return euclidean_distances(X, self.cluster_centers_).execute(
577
+ session=session, **(run_kwargs or dict())
578
+ )
579
+
580
+ def predict(
581
+ self, X, sample_weight=None, execute=False, session=None, run_kwargs=None
582
+ ):
583
+ """Predict the closest cluster each sample in X belongs to.
584
+
585
+ In the vector quantization literature, `cluster_centers_` is called
586
+ the code book and each value returned by `predict` is the index of
587
+ the closest code in the code book.
588
+
589
+ Parameters
590
+ ----------
591
+ X : {array-like, sparse matrix} of shape (n_samples, n_features)
592
+ New data to predict.
593
+
594
+ sample_weight : array-like of shape (n_samples,), default=None
595
+ The weights for each observation in X. If None, all observations
596
+ are assigned equal weight.
597
+
598
+ Returns
599
+ -------
600
+ labels : tensor of shape (n_samples,)
601
+ Index of the cluster each sample belongs to.
602
+ """
603
+ check_is_fitted(self)
604
+
605
+ X = self._check_test_data(X)
606
+
607
+ op = KMeansPredictOp()
608
+ tp = ExecutableTuple(op(self.cluster_centers_, X, sample_weight))
609
+ if execute:
610
+ tp = tp.execute(session=session, **(run_kwargs or dict()))
611
+ return tp[0]
612
+
613
+ def score(
614
+ self,
615
+ X,
616
+ y=None,
617
+ execute=False,
618
+ sample_weight=None,
619
+ session=None,
620
+ run_kwargs=None,
621
+ ):
622
+ """Opposite of the value of X on the K-means objective.
623
+
624
+ Parameters
625
+ ----------
626
+ X : {array-like, sparse matrix} of shape (n_samples, n_features)
627
+ New data.
628
+
629
+ y : Ignored
630
+ Not used, present here for API consistency by convention.
631
+
632
+ sample_weight : array-like of shape (n_samples,), default=None
633
+ The weights for each observation in X. If None, all observations
634
+ are assigned equal weight.
635
+
636
+ Returns
637
+ -------
638
+ score : float
639
+ Opposite of the value of X on the K-means objective.
640
+ """
641
+ check_is_fitted(self)
642
+
643
+ X = self._check_test_data(X)
644
+
645
+ op = KMeansPredictOp()
646
+ tp = ExecutableTuple(op(self.cluster_centers_, X, sample_weight))
647
+ if execute:
648
+ tp = tp.execute(session=session, **(run_kwargs or dict()))
649
+ return tp[1]
650
+
651
+
652
+ def k_means(
653
+ X,
654
+ n_clusters,
655
+ sample_weight=None,
656
+ init="k-means||",
657
+ n_init=10,
658
+ max_iter=300,
659
+ verbose=False,
660
+ tol=1e-4,
661
+ random_state=None,
662
+ copy_x=True,
663
+ algorithm="auto",
664
+ oversampling_factor=2,
665
+ init_iter=5,
666
+ return_n_iter=False,
667
+ ):
668
+ """K-means clustering algorithm.
669
+
670
+ Parameters
671
+ ----------
672
+ X : Tensor, shape (n_samples, n_features)
673
+ The observations to cluster. It must be noted that the data
674
+ will be converted to C ordering, which will cause a memory copy
675
+ if the given data is not C-contiguous.
676
+
677
+ n_clusters : int
678
+ The number of clusters to form as well as the number of
679
+ centroids to generate.
680
+
681
+ sample_weight : array-like, shape (n_samples,), optional
682
+ The weights for each observation in X. If None, all observations
683
+ are assigned equal weight (default: None)
684
+
685
+ init : {'k-means++', 'k-means||', 'random', or tensor, or a callable}, optional
686
+ Method for initialization, default to 'k-means||':
687
+
688
+ 'k-means++' : selects initial cluster centers for k-mean
689
+ clustering in a smart way to speed up convergence. See section
690
+ Notes in k_init for more details.
691
+
692
+ 'k-means||': scalable k-means++.
693
+
694
+ 'random': choose k observations (rows) at random from data for
695
+ the initial centroids.
696
+
697
+ If an ndarray is passed, it should be of shape (n_clusters, n_features)
698
+ and gives the initial centers.
699
+
700
+ If a callable is passed, it should take arguments X, k and
701
+ and a random state and return an initialization.
702
+
703
+ n_init : int, optional, default: 10
704
+ Number of time the k-means algorithm will be run with different
705
+ centroid seeds. The final results will be the best output of
706
+ n_init consecutive runs in terms of inertia.
707
+
708
+ max_iter : int, optional, default 300
709
+ Maximum number of iterations of the k-means algorithm to run.
710
+
711
+ verbose : boolean, optional
712
+ Verbosity mode.
713
+
714
+ tol : float, optional
715
+ The relative increment in the results before declaring convergence.
716
+
717
+ random_state : int, RandomState instance or None (default)
718
+ Determines random number generation for centroid initialization. Use
719
+ an int to make the randomness deterministic.
720
+ See :term:`Glossary <random_state>`.
721
+
722
+ copy_x : bool, optional
723
+ When pre-computing distances it is more numerically accurate to center
724
+ the data first. If copy_x is True (default), then the original data is
725
+ not modified, ensuring X is C-contiguous. If False, the original data
726
+ is modified, and put back before the function returns, but small
727
+ numerical differences may be introduced by subtracting and then adding
728
+ the data mean, in this case it will also not ensure that data is
729
+ C-contiguous which may cause a significant slowdown.
730
+
731
+ algorithm : "auto", "full" or "elkan", default="auto"
732
+ K-means algorithm to use. The classical EM-style algorithm is "full".
733
+ The "elkan" variation is more efficient by using the triangle
734
+ inequality, but currently doesn't support sparse data. "auto" chooses
735
+ "elkan" for dense data and "full" for sparse data.
736
+
737
+ oversampling_factor: int, default=2
738
+ Only work for kmeans||, used in each iteration in kmeans||.
739
+
740
+ init_iter: int, default=5
741
+ Only work for kmeans||, indicates how may iterations required.
742
+
743
+ return_n_iter : bool, optional
744
+ Whether or not to return the number of iterations.
745
+
746
+ Returns
747
+ -------
748
+ centroid : float ndarray with shape (k, n_features)
749
+ Centroids found at the last iteration of k-means.
750
+
751
+ label : integer ndarray with shape (n_samples,)
752
+ label[i] is the code or index of the centroid the
753
+ i'th observation is closest to.
754
+
755
+ inertia : float
756
+ The final value of the inertia criterion (sum of squared distances to
757
+ the closest centroid for all observations in the training set).
758
+
759
+ best_n_iter : int
760
+ Number of iterations corresponding to the best results.
761
+ Returned only if `return_n_iter` is set to True.
762
+ """
763
+
764
+ est = KMeans(
765
+ n_clusters=n_clusters,
766
+ init=init,
767
+ n_init=n_init,
768
+ max_iter=max_iter,
769
+ verbose=verbose,
770
+ tol=tol,
771
+ random_state=random_state,
772
+ copy_x=copy_x,
773
+ algorithm=algorithm,
774
+ oversampling_factor=oversampling_factor,
775
+ init_iter=init_iter,
776
+ ).fit(X, sample_weight=sample_weight)
777
+ if return_n_iter:
778
+ return ExecutableTuple(
779
+ (est.cluster_centers_, est.labels_, est.inertia_, est.n_iter_)
780
+ )
781
+ else:
782
+ return ExecutableTuple((est.cluster_centers_, est.labels_, est.inertia_))