maxframe 2.0.0b2__cp39-cp39-macosx_10_9_universal2.whl → 2.2.0__cp39-cp39-macosx_10_9_universal2.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of maxframe might be problematic. Click here for more details.

Files changed (391) hide show
  1. maxframe/__init__.py +1 -0
  2. maxframe/_utils.cpython-39-darwin.so +0 -0
  3. maxframe/_utils.pyx +14 -1
  4. maxframe/codegen/core.py +6 -6
  5. maxframe/codegen/spe/core.py +1 -1
  6. maxframe/codegen/spe/dataframe/__init__.py +1 -0
  7. maxframe/codegen/spe/dataframe/accessors/base.py +18 -0
  8. maxframe/codegen/spe/dataframe/accessors/dict_.py +25 -130
  9. maxframe/codegen/spe/dataframe/accessors/list_.py +12 -48
  10. maxframe/codegen/spe/dataframe/accessors/struct_.py +28 -0
  11. maxframe/codegen/spe/dataframe/arithmetic.py +7 -2
  12. maxframe/codegen/spe/dataframe/groupby.py +88 -0
  13. maxframe/codegen/spe/dataframe/indexing.py +99 -4
  14. maxframe/codegen/spe/dataframe/merge.py +34 -1
  15. maxframe/codegen/spe/dataframe/misc.py +9 -33
  16. maxframe/codegen/spe/dataframe/reduction.py +14 -9
  17. maxframe/codegen/spe/dataframe/reshape.py +46 -0
  18. maxframe/codegen/spe/dataframe/sort.py +30 -17
  19. maxframe/codegen/spe/dataframe/tests/accessors/test_dict.py +9 -15
  20. maxframe/codegen/spe/dataframe/tests/accessors/test_list.py +4 -7
  21. maxframe/codegen/spe/dataframe/tests/accessors/test_struct.py +75 -0
  22. maxframe/codegen/spe/dataframe/tests/indexing/test_iloc.py +20 -1
  23. maxframe/codegen/spe/dataframe/tests/indexing/test_loc.py +35 -0
  24. maxframe/codegen/spe/dataframe/tests/misc/test_misc.py +0 -32
  25. maxframe/codegen/spe/dataframe/tests/test_groupby.py +81 -18
  26. maxframe/codegen/spe/dataframe/tests/test_merge.py +27 -1
  27. maxframe/codegen/spe/dataframe/tests/test_reshape.py +79 -0
  28. maxframe/codegen/spe/dataframe/tests/test_sort.py +20 -0
  29. maxframe/codegen/spe/learn/contrib/tests/test_xgboost.py +2 -1
  30. maxframe/codegen/spe/learn/metrics/__init__.py +1 -1
  31. maxframe/codegen/spe/learn/metrics/_ranking.py +76 -0
  32. maxframe/codegen/spe/learn/metrics/pairwise.py +51 -0
  33. maxframe/codegen/spe/learn/metrics/tests/test_pairwise.py +36 -0
  34. maxframe/codegen/spe/learn/metrics/tests/test_ranking.py +59 -0
  35. maxframe/codegen/spe/tensor/__init__.py +3 -0
  36. maxframe/codegen/spe/tensor/fft.py +74 -0
  37. maxframe/codegen/spe/tensor/linalg.py +29 -2
  38. maxframe/codegen/spe/tensor/misc.py +79 -25
  39. maxframe/codegen/spe/tensor/spatial.py +45 -0
  40. maxframe/codegen/spe/tensor/statistics.py +44 -0
  41. maxframe/codegen/spe/tensor/tests/test_fft.py +64 -0
  42. maxframe/codegen/spe/tensor/tests/test_linalg.py +15 -1
  43. maxframe/codegen/spe/tensor/tests/test_misc.py +52 -2
  44. maxframe/codegen/spe/tensor/tests/test_spatial.py +33 -0
  45. maxframe/codegen/spe/tensor/tests/test_statistics.py +15 -1
  46. maxframe/codegen/spe/tests/test_spe_codegen.py +6 -12
  47. maxframe/codegen/spe/utils.py +2 -0
  48. maxframe/config/config.py +70 -9
  49. maxframe/config/tests/test_validators.py +13 -1
  50. maxframe/config/validators.py +49 -0
  51. maxframe/conftest.py +44 -17
  52. maxframe/core/accessor.py +2 -2
  53. maxframe/core/entity/core.py +5 -0
  54. maxframe/core/entity/tileables.py +1 -1
  55. maxframe/core/graph/core.cpython-39-darwin.so +0 -0
  56. maxframe/core/graph/entity.py +1 -2
  57. maxframe/core/operator/base.py +9 -2
  58. maxframe/core/operator/core.py +10 -2
  59. maxframe/core/operator/utils.py +13 -0
  60. maxframe/dataframe/__init__.py +10 -3
  61. maxframe/dataframe/accessors/__init__.py +1 -1
  62. maxframe/dataframe/accessors/compat.py +45 -0
  63. maxframe/dataframe/accessors/datetime_/__init__.py +4 -1
  64. maxframe/dataframe/accessors/dict_/contains.py +7 -16
  65. maxframe/dataframe/accessors/dict_/core.py +48 -0
  66. maxframe/dataframe/accessors/dict_/getitem.py +17 -21
  67. maxframe/dataframe/accessors/dict_/length.py +7 -16
  68. maxframe/dataframe/accessors/dict_/remove.py +6 -18
  69. maxframe/dataframe/accessors/dict_/setitem.py +8 -18
  70. maxframe/dataframe/accessors/dict_/tests/test_dict_accessor.py +62 -22
  71. maxframe/dataframe/accessors/list_/__init__.py +2 -2
  72. maxframe/dataframe/accessors/list_/core.py +48 -0
  73. maxframe/dataframe/accessors/list_/getitem.py +12 -19
  74. maxframe/dataframe/accessors/list_/length.py +7 -16
  75. maxframe/dataframe/accessors/list_/tests/test_list_accessor.py +11 -9
  76. maxframe/dataframe/accessors/string_/__init__.py +4 -1
  77. maxframe/dataframe/accessors/struct_/__init__.py +37 -0
  78. maxframe/dataframe/accessors/struct_/accessor.py +39 -0
  79. maxframe/dataframe/accessors/struct_/core.py +43 -0
  80. maxframe/dataframe/accessors/struct_/dtypes.py +53 -0
  81. maxframe/dataframe/accessors/struct_/field.py +123 -0
  82. maxframe/dataframe/accessors/struct_/tests/__init__.py +13 -0
  83. maxframe/dataframe/accessors/struct_/tests/test_struct_accessor.py +91 -0
  84. maxframe/dataframe/arithmetic/__init__.py +14 -4
  85. maxframe/dataframe/arithmetic/between.py +106 -0
  86. maxframe/dataframe/arithmetic/dot.py +237 -0
  87. maxframe/dataframe/arithmetic/{around.py → round.py} +11 -7
  88. maxframe/dataframe/core.py +63 -118
  89. maxframe/dataframe/datasource/__init__.py +18 -0
  90. maxframe/dataframe/datasource/from_dict.py +124 -0
  91. maxframe/dataframe/datasource/from_index.py +1 -1
  92. maxframe/dataframe/datasource/from_records.py +77 -0
  93. maxframe/dataframe/datasource/from_tensor.py +109 -41
  94. maxframe/dataframe/datasource/read_csv.py +2 -3
  95. maxframe/dataframe/datasource/tests/test_datasource.py +37 -0
  96. maxframe/dataframe/datastore/__init__.py +5 -1
  97. maxframe/dataframe/datastore/to_csv.py +29 -41
  98. maxframe/dataframe/datastore/to_odps.py +30 -4
  99. maxframe/dataframe/extensions/__init__.py +20 -4
  100. maxframe/dataframe/extensions/apply_chunk.py +32 -6
  101. maxframe/dataframe/extensions/cartesian_chunk.py +153 -0
  102. maxframe/dataframe/extensions/collect_kv.py +126 -0
  103. maxframe/dataframe/extensions/extract_kv.py +177 -0
  104. maxframe/dataframe/extensions/map_reduce.py +263 -0
  105. maxframe/dataframe/extensions/rebalance.py +62 -0
  106. maxframe/dataframe/extensions/tests/test_apply_chunk.py +9 -2
  107. maxframe/dataframe/extensions/tests/test_extensions.py +54 -0
  108. maxframe/dataframe/extensions/tests/test_map_reduce.py +135 -0
  109. maxframe/dataframe/groupby/__init__.py +12 -1
  110. maxframe/dataframe/groupby/aggregation.py +78 -45
  111. maxframe/dataframe/groupby/apply.py +1 -1
  112. maxframe/dataframe/groupby/apply_chunk.py +18 -2
  113. maxframe/dataframe/groupby/core.py +96 -12
  114. maxframe/dataframe/groupby/cum.py +4 -25
  115. maxframe/dataframe/groupby/expanding.py +264 -0
  116. maxframe/dataframe/groupby/fill.py +1 -1
  117. maxframe/dataframe/groupby/getitem.py +12 -5
  118. maxframe/dataframe/groupby/head.py +11 -1
  119. maxframe/dataframe/groupby/rank.py +136 -0
  120. maxframe/dataframe/groupby/rolling.py +206 -0
  121. maxframe/dataframe/groupby/shift.py +114 -0
  122. maxframe/dataframe/groupby/tests/test_groupby.py +0 -5
  123. maxframe/dataframe/indexing/__init__.py +20 -1
  124. maxframe/dataframe/indexing/droplevel.py +195 -0
  125. maxframe/dataframe/indexing/filter.py +169 -0
  126. maxframe/dataframe/indexing/get_level_values.py +76 -0
  127. maxframe/dataframe/indexing/iat.py +45 -0
  128. maxframe/dataframe/indexing/iloc.py +152 -12
  129. maxframe/dataframe/indexing/insert.py +1 -1
  130. maxframe/dataframe/indexing/loc.py +287 -7
  131. maxframe/dataframe/indexing/reindex.py +14 -5
  132. maxframe/dataframe/indexing/rename.py +6 -0
  133. maxframe/dataframe/indexing/rename_axis.py +2 -2
  134. maxframe/dataframe/indexing/reorder_levels.py +143 -0
  135. maxframe/dataframe/indexing/reset_index.py +33 -6
  136. maxframe/dataframe/indexing/sample.py +8 -0
  137. maxframe/dataframe/indexing/setitem.py +3 -3
  138. maxframe/dataframe/indexing/swaplevel.py +185 -0
  139. maxframe/dataframe/indexing/take.py +99 -0
  140. maxframe/dataframe/indexing/truncate.py +140 -0
  141. maxframe/dataframe/indexing/where.py +0 -11
  142. maxframe/dataframe/indexing/xs.py +148 -0
  143. maxframe/dataframe/merge/__init__.py +12 -1
  144. maxframe/dataframe/merge/append.py +97 -98
  145. maxframe/dataframe/merge/combine_first.py +120 -0
  146. maxframe/dataframe/merge/compare.py +387 -0
  147. maxframe/dataframe/merge/concat.py +183 -0
  148. maxframe/dataframe/merge/update.py +271 -0
  149. maxframe/dataframe/misc/__init__.py +16 -10
  150. maxframe/dataframe/misc/_duplicate.py +10 -4
  151. maxframe/dataframe/misc/apply.py +1 -1
  152. maxframe/dataframe/misc/check_unique.py +51 -0
  153. maxframe/dataframe/misc/clip.py +145 -0
  154. maxframe/dataframe/misc/describe.py +175 -9
  155. maxframe/dataframe/misc/drop_duplicates.py +2 -2
  156. maxframe/dataframe/misc/duplicated.py +2 -2
  157. maxframe/dataframe/misc/get_dummies.py +5 -1
  158. maxframe/dataframe/misc/isin.py +2 -2
  159. maxframe/dataframe/misc/map.py +94 -0
  160. maxframe/dataframe/misc/tests/test_misc.py +13 -2
  161. maxframe/dataframe/misc/to_numeric.py +3 -0
  162. maxframe/dataframe/misc/transform.py +12 -5
  163. maxframe/dataframe/misc/transpose.py +13 -1
  164. maxframe/dataframe/misc/valid_index.py +115 -0
  165. maxframe/dataframe/misc/value_counts.py +38 -4
  166. maxframe/dataframe/missing/checkna.py +13 -6
  167. maxframe/dataframe/missing/dropna.py +5 -0
  168. maxframe/dataframe/missing/fillna.py +1 -1
  169. maxframe/dataframe/missing/replace.py +7 -4
  170. maxframe/dataframe/reduction/__init__.py +29 -15
  171. maxframe/dataframe/reduction/aggregation.py +38 -9
  172. maxframe/dataframe/reduction/all.py +2 -2
  173. maxframe/dataframe/reduction/any.py +2 -2
  174. maxframe/dataframe/reduction/argmax.py +100 -0
  175. maxframe/dataframe/reduction/argmin.py +100 -0
  176. maxframe/dataframe/reduction/core.py +65 -18
  177. maxframe/dataframe/reduction/count.py +13 -9
  178. maxframe/dataframe/reduction/cov.py +166 -0
  179. maxframe/dataframe/reduction/cummax.py +2 -2
  180. maxframe/dataframe/reduction/cummin.py +2 -2
  181. maxframe/dataframe/reduction/cumprod.py +2 -2
  182. maxframe/dataframe/reduction/cumsum.py +2 -2
  183. maxframe/dataframe/reduction/custom_reduction.py +2 -2
  184. maxframe/dataframe/reduction/idxmax.py +185 -0
  185. maxframe/dataframe/reduction/idxmin.py +185 -0
  186. maxframe/dataframe/reduction/kurtosis.py +37 -30
  187. maxframe/dataframe/reduction/max.py +2 -2
  188. maxframe/dataframe/reduction/mean.py +9 -7
  189. maxframe/dataframe/reduction/median.py +2 -2
  190. maxframe/dataframe/reduction/min.py +2 -2
  191. maxframe/dataframe/reduction/nunique.py +9 -8
  192. maxframe/dataframe/reduction/prod.py +18 -13
  193. maxframe/dataframe/reduction/reduction_size.py +2 -2
  194. maxframe/dataframe/reduction/sem.py +13 -9
  195. maxframe/dataframe/reduction/skew.py +31 -27
  196. maxframe/dataframe/reduction/str_concat.py +10 -7
  197. maxframe/dataframe/reduction/sum.py +18 -14
  198. maxframe/dataframe/reduction/unique.py +20 -3
  199. maxframe/dataframe/reduction/var.py +16 -12
  200. maxframe/dataframe/reshape/__init__.py +38 -0
  201. maxframe/dataframe/{misc → reshape}/pivot.py +1 -0
  202. maxframe/dataframe/{misc → reshape}/pivot_table.py +1 -0
  203. maxframe/dataframe/reshape/unstack.py +114 -0
  204. maxframe/dataframe/sort/__init__.py +8 -0
  205. maxframe/dataframe/sort/argsort.py +62 -0
  206. maxframe/dataframe/sort/core.py +1 -0
  207. maxframe/dataframe/sort/nlargest.py +238 -0
  208. maxframe/dataframe/sort/nsmallest.py +228 -0
  209. maxframe/dataframe/statistics/__init__.py +3 -3
  210. maxframe/dataframe/statistics/corr.py +1 -0
  211. maxframe/dataframe/statistics/quantile.py +2 -2
  212. maxframe/dataframe/tests/test_typing.py +104 -0
  213. maxframe/dataframe/tests/test_utils.py +66 -2
  214. maxframe/dataframe/typing_.py +185 -0
  215. maxframe/dataframe/utils.py +95 -26
  216. maxframe/dataframe/window/aggregation.py +8 -4
  217. maxframe/dataframe/window/core.py +14 -1
  218. maxframe/dataframe/window/ewm.py +1 -3
  219. maxframe/dataframe/window/expanding.py +37 -35
  220. maxframe/dataframe/window/rolling.py +49 -39
  221. maxframe/dataframe/window/tests/test_expanding.py +1 -7
  222. maxframe/dataframe/window/tests/test_rolling.py +1 -1
  223. maxframe/env.py +7 -4
  224. maxframe/errors.py +2 -2
  225. maxframe/io/odpsio/schema.py +9 -3
  226. maxframe/io/odpsio/tableio.py +7 -2
  227. maxframe/io/odpsio/tests/test_schema.py +198 -83
  228. maxframe/learn/__init__.py +10 -2
  229. maxframe/learn/cluster/__init__.py +15 -0
  230. maxframe/learn/cluster/_kmeans.py +782 -0
  231. maxframe/learn/contrib/llm/core.py +2 -0
  232. maxframe/learn/contrib/xgboost/core.py +86 -1
  233. maxframe/learn/contrib/xgboost/train.py +5 -2
  234. maxframe/learn/core.py +66 -0
  235. maxframe/learn/linear_model/_base.py +58 -1
  236. maxframe/learn/linear_model/_lin_reg.py +1 -1
  237. maxframe/learn/metrics/__init__.py +6 -0
  238. maxframe/learn/metrics/_classification.py +145 -0
  239. maxframe/learn/metrics/_ranking.py +477 -0
  240. maxframe/learn/metrics/_scorer.py +60 -0
  241. maxframe/learn/metrics/pairwise/__init__.py +21 -0
  242. maxframe/learn/metrics/pairwise/core.py +77 -0
  243. maxframe/learn/metrics/pairwise/cosine.py +115 -0
  244. maxframe/learn/metrics/pairwise/euclidean.py +176 -0
  245. maxframe/learn/metrics/pairwise/haversine.py +96 -0
  246. maxframe/learn/metrics/pairwise/manhattan.py +80 -0
  247. maxframe/learn/metrics/pairwise/pairwise.py +127 -0
  248. maxframe/learn/metrics/pairwise/pairwise_distances_topk.py +121 -0
  249. maxframe/learn/metrics/pairwise/rbf_kernel.py +51 -0
  250. maxframe/learn/metrics/tests/__init__.py +13 -0
  251. maxframe/learn/metrics/tests/test_scorer.py +26 -0
  252. maxframe/learn/utils/__init__.py +1 -1
  253. maxframe/learn/utils/checks.py +1 -2
  254. maxframe/learn/utils/core.py +59 -0
  255. maxframe/learn/utils/extmath.py +37 -0
  256. maxframe/learn/utils/odpsio.py +193 -0
  257. maxframe/learn/utils/validation.py +2 -2
  258. maxframe/lib/compat.py +40 -0
  259. maxframe/lib/dtypes_extension/__init__.py +16 -1
  260. maxframe/lib/dtypes_extension/_fake_arrow_dtype.py +604 -0
  261. maxframe/lib/dtypes_extension/blob.py +304 -0
  262. maxframe/lib/dtypes_extension/dtypes.py +40 -0
  263. maxframe/lib/dtypes_extension/tests/test_blob.py +88 -0
  264. maxframe/lib/dtypes_extension/tests/test_dtypes.py +16 -1
  265. maxframe/lib/dtypes_extension/tests/test_fake_arrow_dtype.py +75 -0
  266. maxframe/lib/filesystem/_oss_lib/common.py +122 -50
  267. maxframe/lib/filesystem/_oss_lib/glob.py +1 -1
  268. maxframe/lib/filesystem/_oss_lib/handle.py +21 -25
  269. maxframe/lib/filesystem/base.py +1 -1
  270. maxframe/lib/filesystem/core.py +1 -1
  271. maxframe/lib/filesystem/oss.py +115 -46
  272. maxframe/lib/filesystem/tests/test_oss.py +74 -36
  273. maxframe/lib/mmh3.cpython-39-darwin.so +0 -0
  274. maxframe/lib/wrapped_pickle.py +10 -0
  275. maxframe/opcodes.py +33 -15
  276. maxframe/protocol.py +12 -0
  277. maxframe/serialization/__init__.py +11 -2
  278. maxframe/serialization/arrow.py +38 -13
  279. maxframe/serialization/blob.py +32 -0
  280. maxframe/serialization/core.cpython-39-darwin.so +0 -0
  281. maxframe/serialization/core.pyx +39 -1
  282. maxframe/serialization/exception.py +2 -4
  283. maxframe/serialization/numpy.py +11 -0
  284. maxframe/serialization/pandas.py +46 -9
  285. maxframe/serialization/serializables/core.py +2 -2
  286. maxframe/serialization/tests/test_serial.py +29 -2
  287. maxframe/tensor/__init__.py +38 -8
  288. maxframe/tensor/arithmetic/__init__.py +19 -10
  289. maxframe/tensor/arithmetic/iscomplexobj.py +53 -0
  290. maxframe/tensor/arithmetic/tests/test_arithmetic.py +6 -0
  291. maxframe/tensor/core.py +3 -2
  292. maxframe/tensor/datasource/tests/test_datasource.py +2 -1
  293. maxframe/tensor/extensions/__init__.py +2 -0
  294. maxframe/tensor/extensions/apply_chunk.py +3 -3
  295. maxframe/tensor/extensions/rebalance.py +65 -0
  296. maxframe/tensor/fft/__init__.py +32 -0
  297. maxframe/tensor/fft/core.py +168 -0
  298. maxframe/tensor/fft/fft.py +112 -0
  299. maxframe/tensor/fft/fft2.py +118 -0
  300. maxframe/tensor/fft/fftfreq.py +80 -0
  301. maxframe/tensor/fft/fftn.py +123 -0
  302. maxframe/tensor/fft/fftshift.py +79 -0
  303. maxframe/tensor/fft/hfft.py +112 -0
  304. maxframe/tensor/fft/ifft.py +114 -0
  305. maxframe/tensor/fft/ifft2.py +115 -0
  306. maxframe/tensor/fft/ifftn.py +123 -0
  307. maxframe/tensor/fft/ifftshift.py +73 -0
  308. maxframe/tensor/fft/ihfft.py +93 -0
  309. maxframe/tensor/fft/irfft.py +118 -0
  310. maxframe/tensor/fft/irfft2.py +62 -0
  311. maxframe/tensor/fft/irfftn.py +114 -0
  312. maxframe/tensor/fft/rfft.py +116 -0
  313. maxframe/tensor/fft/rfft2.py +63 -0
  314. maxframe/tensor/fft/rfftfreq.py +87 -0
  315. maxframe/tensor/fft/rfftn.py +113 -0
  316. maxframe/tensor/indexing/fill_diagonal.py +1 -7
  317. maxframe/tensor/linalg/__init__.py +7 -0
  318. maxframe/tensor/linalg/_einsumfunc.py +1025 -0
  319. maxframe/tensor/linalg/cholesky.py +117 -0
  320. maxframe/tensor/linalg/einsum.py +339 -0
  321. maxframe/tensor/linalg/lstsq.py +100 -0
  322. maxframe/tensor/linalg/matrix_norm.py +75 -0
  323. maxframe/tensor/linalg/norm.py +249 -0
  324. maxframe/tensor/linalg/solve.py +72 -0
  325. maxframe/tensor/linalg/solve_triangular.py +2 -2
  326. maxframe/tensor/linalg/vector_norm.py +113 -0
  327. maxframe/tensor/misc/__init__.py +24 -1
  328. maxframe/tensor/misc/argwhere.py +72 -0
  329. maxframe/tensor/misc/array_split.py +46 -0
  330. maxframe/tensor/misc/broadcast_arrays.py +57 -0
  331. maxframe/tensor/misc/copyto.py +130 -0
  332. maxframe/tensor/misc/delete.py +104 -0
  333. maxframe/tensor/misc/dsplit.py +68 -0
  334. maxframe/tensor/misc/ediff1d.py +74 -0
  335. maxframe/tensor/misc/expand_dims.py +85 -0
  336. maxframe/tensor/misc/flip.py +90 -0
  337. maxframe/tensor/misc/fliplr.py +64 -0
  338. maxframe/tensor/misc/flipud.py +68 -0
  339. maxframe/tensor/misc/hsplit.py +85 -0
  340. maxframe/tensor/misc/insert.py +139 -0
  341. maxframe/tensor/misc/moveaxis.py +83 -0
  342. maxframe/tensor/misc/result_type.py +88 -0
  343. maxframe/tensor/misc/roll.py +124 -0
  344. maxframe/tensor/misc/rollaxis.py +77 -0
  345. maxframe/tensor/misc/shape.py +89 -0
  346. maxframe/tensor/misc/split.py +190 -0
  347. maxframe/tensor/misc/tile.py +109 -0
  348. maxframe/tensor/misc/vsplit.py +74 -0
  349. maxframe/tensor/reduction/array_equal.py +2 -1
  350. maxframe/tensor/sort/__init__.py +2 -0
  351. maxframe/tensor/sort/argpartition.py +98 -0
  352. maxframe/tensor/sort/partition.py +228 -0
  353. maxframe/tensor/spatial/__init__.py +15 -0
  354. maxframe/tensor/spatial/distance/__init__.py +17 -0
  355. maxframe/tensor/spatial/distance/cdist.py +421 -0
  356. maxframe/tensor/spatial/distance/pdist.py +398 -0
  357. maxframe/tensor/spatial/distance/squareform.py +153 -0
  358. maxframe/tensor/special/__init__.py +159 -21
  359. maxframe/tensor/special/airy.py +55 -0
  360. maxframe/tensor/special/bessel.py +199 -0
  361. maxframe/tensor/special/core.py +65 -4
  362. maxframe/tensor/special/ellip_func_integrals.py +155 -0
  363. maxframe/tensor/special/ellip_harm.py +55 -0
  364. maxframe/tensor/special/err_fresnel.py +223 -0
  365. maxframe/tensor/special/gamma_funcs.py +303 -0
  366. maxframe/tensor/special/hypergeometric_funcs.py +69 -0
  367. maxframe/tensor/special/info_theory.py +189 -0
  368. maxframe/tensor/special/misc.py +21 -0
  369. maxframe/tensor/statistics/__init__.py +6 -0
  370. maxframe/tensor/statistics/corrcoef.py +77 -0
  371. maxframe/tensor/statistics/cov.py +222 -0
  372. maxframe/tensor/statistics/digitize.py +126 -0
  373. maxframe/tensor/statistics/histogram.py +520 -0
  374. maxframe/tensor/statistics/median.py +85 -0
  375. maxframe/tensor/statistics/ptp.py +89 -0
  376. maxframe/tensor/utils.py +3 -3
  377. maxframe/tests/test_utils.py +43 -1
  378. maxframe/tests/utils.py +0 -2
  379. maxframe/typing_.py +2 -0
  380. maxframe/udf.py +27 -2
  381. maxframe/utils.py +193 -19
  382. {maxframe-2.0.0b2.dist-info → maxframe-2.2.0.dist-info}/METADATA +3 -2
  383. {maxframe-2.0.0b2.dist-info → maxframe-2.2.0.dist-info}/RECORD +391 -236
  384. maxframe_client/fetcher.py +35 -4
  385. maxframe_client/session/odps.py +7 -2
  386. maxframe_client/tests/test_fetcher.py +76 -3
  387. maxframe_client/tests/test_session.py +4 -1
  388. /maxframe/dataframe/{misc → reshape}/melt.py +0 -0
  389. /maxframe/dataframe/{misc → reshape}/stack.py +0 -0
  390. {maxframe-2.0.0b2.dist-info → maxframe-2.2.0.dist-info}/WHEEL +0 -0
  391. {maxframe-2.0.0b2.dist-info → maxframe-2.2.0.dist-info}/top_level.txt +0 -0
@@ -23,6 +23,7 @@ from ....core.operator.core import TileableOperatorMixin
23
23
  from ....dataframe.core import SERIES_TYPE
24
24
  from ....dataframe.operators import DataFrameOperatorMixin
25
25
  from ....dataframe.utils import parse_index
26
+ from ....serialization.serializables import Int32Field
26
27
  from ....serialization.serializables.core import Serializable
27
28
  from ....serialization.serializables.field import AnyField, DictField, StringField
28
29
 
@@ -39,6 +40,7 @@ class LLMTaskOperator(Operator, DataFrameOperatorMixin):
39
40
  model = AnyField("model", default=None)
40
41
  params = DictField("params", default=None)
41
42
  running_options: Dict[str, Any] = DictField("running_options", default=None)
43
+ timeout = Int32Field("timeout", default=None)
42
44
 
43
45
  def __init__(self, output_types=None, **kw):
44
46
  if output_types is None:
@@ -20,6 +20,7 @@ from typing import Any, Callable, Dict, List, Optional, Tuple, Union
20
20
  import numpy as np
21
21
 
22
22
  from ....errors import TileableNotExecutedError
23
+ from ....udf import builtin_function
23
24
 
24
25
  try:
25
26
  import xgboost
@@ -27,9 +28,16 @@ except ImportError:
27
28
  xgboost = None
28
29
 
29
30
  from ....core import OutputType
31
+ from ...utils.odpsio import ToODPSModelMixin
30
32
  from ..models import ModelApplyChunk, ModelWithEval, ModelWithEvalData, to_remote_model
31
33
  from .dmatrix import DMatrix
32
34
 
35
+ _xgb_type_to_np_type = {
36
+ "float": "float32",
37
+ "int": "int32",
38
+ "i": "bool",
39
+ }
40
+
33
41
 
34
42
  class BoosterData(ModelWithEvalData):
35
43
  @staticmethod
@@ -88,6 +96,58 @@ class BoosterData(ModelWithEvalData):
88
96
  strict_shape=strict_shape,
89
97
  )
90
98
 
99
+ @staticmethod
100
+ @builtin_function
101
+ def _get_training_info(bst, evals_result, local_info):
102
+ model_infos = {
103
+ "iteration": bst.num_boosted_rounds(),
104
+ }
105
+ if evals_result:
106
+ model_infos.update(
107
+ dict(
108
+ duration_ms=evals_result.get("duration_ms"),
109
+ )
110
+ )
111
+ if bst.feature_names:
112
+ model_infos["feature_names"] = bst.feature_names
113
+ model_infos["feature_types"] = [
114
+ _xgb_type_to_np_type[x] for x in bst.feature_types
115
+ ]
116
+ model_infos.update(local_info or {})
117
+
118
+ try:
119
+ config = json.loads(bst.save_config())
120
+ stack = [config]
121
+ internal = {}
122
+ while stack:
123
+ obj = stack.pop()
124
+ for k, v in obj.items():
125
+ if k.endswith("_param"):
126
+ for p_k, p_v in v.items():
127
+ internal[p_k] = p_v
128
+ elif isinstance(v, dict):
129
+ stack.append(v)
130
+
131
+ for k, v in internal.items():
132
+ for t in (int, float, str):
133
+ try:
134
+ model_infos[k] = t(v)
135
+ break
136
+ except ValueError:
137
+ continue
138
+ except ValueError:
139
+ pass
140
+
141
+ return model_infos
142
+
143
+ def get_training_info(self, evals_result: dict = None, local_info: dict = None):
144
+ evals_result = getattr(self, "_evals_result", None) or evals_result
145
+ args = (evals_result, local_info)
146
+ op = ModelApplyChunk(
147
+ func=self._get_training_info, output_types=[OutputType.object]
148
+ )
149
+ return op(self, [{}], args=args)[0]
150
+
91
151
 
92
152
  class Booster(ModelWithEval):
93
153
  pass
@@ -97,7 +157,7 @@ if not xgboost:
97
157
  XGBScikitLearnBase = None
98
158
  else:
99
159
 
100
- class XGBScikitLearnBase(xgboost.XGBModel):
160
+ class XGBScikitLearnBase(xgboost.XGBModel, ToODPSModelMixin):
101
161
  """
102
162
  Base class for implementing scikit-learn interface
103
163
  """
@@ -181,6 +241,7 @@ else:
181
241
  **train_kw,
182
242
  )
183
243
  self._Booster = result
244
+ self.evals_result_t_ = result.op.outputs[-1]
184
245
  return self
185
246
 
186
247
  def predict(self, data, **kw):
@@ -276,6 +337,30 @@ else:
276
337
  n_features=self._n_features_in,
277
338
  )[0]
278
339
 
340
+ @property
341
+ def training_info_(self):
342
+ local_info = {}
343
+ attrs = [
344
+ "n_classes_",
345
+ "learning_rate",
346
+ ]
347
+ for attr in attrs:
348
+ if getattr(self, attr, None):
349
+ local_info[attr] = getattr(self, attr)
350
+ return self._Booster.get_training_info(
351
+ evals_result=self.evals_result_t_, local_info=local_info
352
+ )
353
+
354
+ def _get_odps_model_info(self) -> ToODPSModelMixin.ODPSModelInfo:
355
+ model_format = (
356
+ "BOOSTED_TREE_CLASSIFIER"
357
+ if hasattr(self, "predict_proba")
358
+ else "BOOSTED_TREE_REGRESSOR"
359
+ )
360
+ return ToODPSModelMixin.ODPSModelInfo(
361
+ model_format=model_format, model_params=self._Booster
362
+ )
363
+
279
364
  def wrap_evaluation_matrices(
280
365
  missing: float,
281
366
  X: Any,
@@ -50,7 +50,7 @@ class XGBTrain(ObjectOperator, ObjectOperatorMixin):
50
50
  dtrain = KeyField("dtrain", default=None)
51
51
  evals = ListField("evals", on_serialize=_on_serialize_evals, default=None)
52
52
  obj = FunctionField("obj", default=None)
53
- feval = FunctionField("obj", default=None)
53
+ feval = FunctionField("feval", default=None)
54
54
  maximize = BoolField("maximize", default=None)
55
55
  early_stopping_rounds = Int64Field("early_stopping_rounds", default=None)
56
56
  verbose_eval = AnyField("verbose_eval", default=None)
@@ -64,8 +64,11 @@ class XGBTrain(ObjectOperator, ObjectOperatorMixin):
64
64
  custom_metric = FunctionField("custom_metric", default=None)
65
65
  num_boost_round = Int64Field("num_boost_round", default=10)
66
66
  num_class = Int64Field("num_class", default=None)
67
+ _has_evals_result = BoolField("has_evals_result", default=False)
67
68
 
68
69
  def __init__(self, gpu=None, **kw):
70
+ if kw.get("evals_result") is not None:
71
+ kw["_has_evals_result"] = True
69
72
  super().__init__(gpu=gpu, **kw)
70
73
  if self.output_types is None:
71
74
  self.output_types = [OutputType.object]
@@ -110,7 +113,7 @@ class XGBTrain(ObjectOperator, ObjectOperatorMixin):
110
113
 
111
114
  @property
112
115
  def has_evals_result(self) -> bool:
113
- return self.evals
116
+ return self._has_evals_result or self.evals
114
117
 
115
118
 
116
119
  def _get_xgb_booster(xgb_model):
maxframe/learn/core.py CHANGED
@@ -222,6 +222,41 @@ class TransformerMixin:
222
222
  return self.fit(X, y, **fit_params).transform(X)
223
223
 
224
224
 
225
+ class ClassifierMixin:
226
+ """Mixin class for all classifiers in scikit-learn."""
227
+
228
+ _estimator_type = "classifier"
229
+
230
+ def score(self, X, y, sample_weight=None):
231
+ """
232
+ Return the mean accuracy on the given test data and labels.
233
+
234
+ In multi-label classification, this is the subset accuracy
235
+ which is a harsh metric since you require for each sample that
236
+ each label set be correctly predicted.
237
+
238
+ Parameters
239
+ ----------
240
+ X : array-like of shape (n_samples, n_features)
241
+ Test samples.
242
+
243
+ y : array-like of shape (n_samples,) or (n_samples, n_outputs)
244
+ True labels for X.
245
+
246
+ sample_weight : array-like of shape (n_samples,), default=None
247
+ Sample weights.
248
+
249
+ Returns
250
+ -------
251
+ score : Tensor
252
+ Mean accuracy of self.predict(X) wrt. y.
253
+ """
254
+ from .metrics import accuracy_score
255
+
256
+ result = accuracy_score(y, self.predict(X), sample_weight=sample_weight)
257
+ return result
258
+
259
+
225
260
  class RegressorMixin:
226
261
  """Mixin class for all regression estimators in scikit-learn."""
227
262
 
@@ -276,3 +311,34 @@ class RegressorMixin:
276
311
 
277
312
  def _more_tags(self): # noqa: R0201 # pylint: disable=no-self-use
278
313
  return {"requires_y": True}
314
+
315
+
316
+ class ClusterMixin:
317
+ """Mixin class for all cluster estimators in scikit-learn."""
318
+
319
+ _estimator_type = "clusterer"
320
+
321
+ def fit_predict(self, X, y=None):
322
+ """
323
+ Perform clustering on `X` and returns cluster labels.
324
+
325
+ Parameters
326
+ ----------
327
+ X : array-like of shape (n_samples, n_features)
328
+ Input data.
329
+
330
+ y : Ignored
331
+ Not used, present for API consistency by convention.
332
+
333
+ Returns
334
+ -------
335
+ labels : ndarray of shape (n_samples,), dtype=np.int64
336
+ Cluster labels.
337
+ """
338
+ # non-optimized default implementation; override when a better
339
+ # method is possible for a given clustering algorithm
340
+ self.fit(X)
341
+ return self.labels_
342
+
343
+ def _more_tags(self):
344
+ return {"preserves_dtype": []}
@@ -17,7 +17,7 @@ from abc import ABCMeta, abstractmethod
17
17
 
18
18
  from ... import tensor as mt
19
19
  from ...tensor.datasource import tensor as astensor
20
- from ..core import BaseEstimator
20
+ from ..core import BaseEstimator, ClassifierMixin
21
21
  from ..preprocessing import normalize as f_normalize
22
22
  from ..utils.validation import FLOAT_DTYPES, check_array
23
23
 
@@ -161,3 +161,60 @@ class LinearModel(BaseEstimator, metaclass=ABCMeta):
161
161
 
162
162
  def _more_tags(self): # noqa: R0201 # pylint: disable=no-self-use
163
163
  return {"requires_y": True}
164
+
165
+
166
+ class LinearClassifierMixin(ClassifierMixin):
167
+ """Mixin for linear classifiers.
168
+
169
+ Handles prediction for sparse and dense X.
170
+ """
171
+
172
+ def decision_function(self, X):
173
+ """
174
+ Predict confidence scores for samples.
175
+
176
+ The confidence score for a sample is proportional to the signed
177
+ distance of that sample to the hyperplane.
178
+
179
+ Parameters
180
+ ----------
181
+ X : array-like or sparse matrix, shape (n_samples, n_features)
182
+ Samples.
183
+
184
+ Returns
185
+ -------
186
+ array, shape=(n_samples,) if n_classes == 2 else (n_samples, n_classes)
187
+ Confidence scores per (sample, class) combination. In the binary
188
+ case, confidence score for self.classes_[1] where >0 means this
189
+ class would be predicted.
190
+ """
191
+ check_is_fitted(self)
192
+
193
+ X = check_array(X, accept_sparse="csr")
194
+
195
+ n_features = self.coef_.shape[1]
196
+ if X.shape[1] != n_features:
197
+ raise ValueError(
198
+ "X has %d features per sample; expecting %d" % (X.shape[1], n_features)
199
+ )
200
+
201
+ scores = mt.dot(X, self.coef_.T) + self.intercept_
202
+ return scores
203
+
204
+ def predict(self, X):
205
+ """
206
+ Predict class labels for samples in X.
207
+
208
+ Parameters
209
+ ----------
210
+ X : array-like or sparse matrix, shape (n_samples, n_features)
211
+ Samples.
212
+
213
+ Returns
214
+ -------
215
+ C : array, shape [n_samples]
216
+ Predicted class label per sample.
217
+ """
218
+ scores = self.decision_function(X)
219
+ indices = scores.argmax(axis=1)
220
+ return self.classes_[indices].execute()
@@ -22,7 +22,7 @@ from ._base import LinearModel, _rescale_data
22
22
  try:
23
23
  from sklearn.base import MultiOutputMixin
24
24
  except ImportError:
25
- MultiOutputMixin = object
25
+ MultiOutputMixin = type("MultiOutputMixin", (object,), {})
26
26
 
27
27
 
28
28
  class LinearRegression(MultiOutputMixin, RegressorMixin, LinearModel):
@@ -17,9 +17,15 @@ from ._classification import (
17
17
  accuracy_score,
18
18
  f1_score,
19
19
  fbeta_score,
20
+ log_loss,
20
21
  multilabel_confusion_matrix,
21
22
  precision_recall_fscore_support,
22
23
  precision_score,
23
24
  recall_score,
24
25
  )
26
+ from ._ranking import auc, roc_auc_score, roc_curve
25
27
  from ._regression import r2_score
28
+ from .pairwise import pairwise_distances
29
+
30
+ # isort: off
31
+ from ._scorer import get_scorer
@@ -33,6 +33,7 @@ from ...serialization.serializables import (
33
33
  from ...tensor.core import TensorOrder
34
34
  from ...typing_ import EntityType
35
35
  from ..core import LearnOperatorMixin
36
+ from ..utils import check_array, check_consistent_length
36
37
  from ._check_targets import _check_targets
37
38
 
38
39
 
@@ -163,6 +164,150 @@ def accuracy_score(
163
164
  return score.execute(session=session, **(run_kwargs or dict()))
164
165
 
165
166
 
167
+ class LogLoss(Operator, LearnOperatorMixin):
168
+ _op_type_ = opcodes.LOG_LOSS
169
+
170
+ y_true = AnyField("y_true")
171
+ y_pred = AnyField("y_pred")
172
+ eps = Float64Field("eps", default=1e-15)
173
+ normalize = BoolField("normalize", default=True)
174
+ sample_weight = AnyField("sample_weight", default=None)
175
+ labels = AnyField("labels", default=None)
176
+
177
+ @classmethod
178
+ def _set_inputs(cls, op: "LogLoss", inputs: List[EntityType]):
179
+ super()._set_inputs(op, inputs)
180
+ inputs_iter = iter(op.inputs)
181
+ op.y_true = next(inputs_iter)
182
+ op.y_pred = next(inputs_iter)
183
+ if isinstance(op.sample_weight, ENTITY_TYPE):
184
+ op.sample_weight = next(inputs_iter)
185
+ if isinstance(op.labels, ENTITY_TYPE):
186
+ op.labels = next(inputs_iter)
187
+
188
+ def __call__(self, y_true, y_pred, sample_weight=None, labels=None):
189
+ self._output_types = [OutputType.tensor]
190
+ self.sample_weight = sample_weight
191
+ self.labels = labels
192
+ inputs = [y_true, y_pred]
193
+ if isinstance(self.sample_weight, ENTITY_TYPE):
194
+ inputs.append(self.sample_weight)
195
+ if isinstance(self.labels, ENTITY_TYPE):
196
+ inputs.append(self.labels)
197
+
198
+ dtype = (
199
+ np.dtype(float)
200
+ if self.normalize
201
+ else np.result_type(y_true.dtype, y_pred.dtype)
202
+ )
203
+ return self.new_tileable(
204
+ inputs, dtype=dtype, shape=(), order=TensorOrder.C_ORDER
205
+ )
206
+
207
+
208
+ def log_loss(
209
+ y_true,
210
+ y_pred,
211
+ *,
212
+ eps=1e-15,
213
+ normalize=True,
214
+ sample_weight=None,
215
+ labels=None,
216
+ execute=False,
217
+ session=None,
218
+ run_kwargs=None,
219
+ ):
220
+ r"""Log loss, aka logistic loss or cross-entropy loss.
221
+
222
+ This is the loss function used in (multinomial) logistic regression
223
+ and extensions of it such as neural networks, defined as the negative
224
+ log-likelihood of a logistic model that returns ``y_pred`` probabilities
225
+ for its training data ``y_true``.
226
+ The log loss is only defined for two or more labels.
227
+ For a single sample with true label :math:`y \in \{0,1\}` and
228
+ and a probability estimate :math:`p = \operatorname{Pr}(y = 1)`, the log
229
+ loss is:
230
+
231
+ .. math::
232
+ L_{\log}(y, p) = -(y \log (p) + (1 - y) \log (1 - p))
233
+
234
+ Read more in the :ref:`User Guide <log_loss>`.
235
+
236
+ Parameters
237
+ ----------
238
+ y_true : array-like or label indicator matrix
239
+ Ground truth (correct) labels for n_samples samples.
240
+
241
+ y_pred : array-like of float, shape = (n_samples, n_classes) or (n_samples,)
242
+ Predicted probabilities, as returned by a classifier's
243
+ predict_proba method. If ``y_pred.shape = (n_samples,)``
244
+ the probabilities provided are assumed to be that of the
245
+ positive class. The labels in ``y_pred`` are assumed to be
246
+ ordered alphabetically, as done by
247
+ :class:`preprocessing.LabelBinarizer`.
248
+
249
+ eps : float, default=1e-15
250
+ Log loss is undefined for p=0 or p=1, so probabilities are
251
+ clipped to max(eps, min(1 - eps, p)).
252
+
253
+ normalize : bool, default=True
254
+ If true, return the mean loss per sample.
255
+ Otherwise, return the sum of the per-sample losses.
256
+
257
+ sample_weight : array-like of shape (n_samples,), default=None
258
+ Sample weights.
259
+
260
+ labels : array-like, default=None
261
+ If not provided, labels will be inferred from y_true. If ``labels``
262
+ is ``None`` and ``y_pred`` has shape (n_samples,) the labels are
263
+ assumed to be binary and are inferred from ``y_true``.
264
+
265
+ Returns
266
+ -------
267
+ loss : float
268
+
269
+ Notes
270
+ -----
271
+ The logarithm used is the natural logarithm (base-e).
272
+
273
+ Examples
274
+ --------
275
+ >>> from maxframe.learn.metrics import log_loss
276
+ >>> log_loss(["spam", "ham", "ham", "spam"],
277
+ ... [[.1, .9], [.9, .1], [.8, .2], [.35, .65]])
278
+ 0.21616...
279
+
280
+ References
281
+ ----------
282
+ C.M. Bishop (2006). Pattern Recognition and Machine Learning. Springer,
283
+ p. 209.
284
+ """
285
+ if not isinstance(y_true, (ENTITY_TYPE, np.ndarray)):
286
+ y_true = mt.array(y_true)
287
+ if not isinstance(y_pred, (ENTITY_TYPE, np.ndarray)):
288
+ y_pred = mt.array(y_pred)
289
+ if sample_weight is not None and not isinstance(y_pred, (ENTITY_TYPE, np.ndarray)):
290
+ sample_weight = mt.array(sample_weight)
291
+ if labels is not None and not isinstance(labels, (ENTITY_TYPE, np.ndarray)):
292
+ labels = mt.array(labels)
293
+
294
+ y_pred = check_array(y_pred, ensure_2d=False)
295
+ y_pred, y_true, sample_weight = check_consistent_length(
296
+ y_pred, y_true, sample_weight
297
+ )
298
+
299
+ op = LogLoss(eps=eps, normalize=normalize)
300
+ res = op(
301
+ y_true=y_true,
302
+ y_pred=y_pred,
303
+ sample_weight=sample_weight,
304
+ labels=labels,
305
+ )
306
+ if execute:
307
+ return res.execute(session=session, **(run_kwargs or {}))
308
+ return res
309
+
310
+
166
311
  class MultiLabelConfusionMatrix(Operator, LearnOperatorMixin):
167
312
  _op_type_ = opcodes.MULTILABEL_CONFUSION_MATRIX
168
313