maxframe 2.0.0b2__cp37-cp37m-win32.whl → 2.3.0rc1__cp37-cp37m-win32.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of maxframe might be problematic. Click here for more details.

Files changed (443) hide show
  1. maxframe/__init__.py +1 -0
  2. maxframe/_utils.cp37-win32.pyd +0 -0
  3. maxframe/_utils.pyx +14 -1
  4. maxframe/codegen/core.py +9 -8
  5. maxframe/codegen/spe/core.py +1 -1
  6. maxframe/codegen/spe/dataframe/__init__.py +1 -0
  7. maxframe/codegen/spe/dataframe/accessors/base.py +18 -0
  8. maxframe/codegen/spe/dataframe/accessors/dict_.py +25 -130
  9. maxframe/codegen/spe/dataframe/accessors/list_.py +12 -48
  10. maxframe/codegen/spe/dataframe/accessors/struct_.py +28 -0
  11. maxframe/codegen/spe/dataframe/arithmetic.py +7 -2
  12. maxframe/codegen/spe/dataframe/groupby.py +88 -0
  13. maxframe/codegen/spe/dataframe/indexing.py +99 -4
  14. maxframe/codegen/spe/dataframe/merge.py +38 -1
  15. maxframe/codegen/spe/dataframe/misc.py +11 -33
  16. maxframe/codegen/spe/dataframe/reduction.py +32 -9
  17. maxframe/codegen/spe/dataframe/reshape.py +46 -0
  18. maxframe/codegen/spe/dataframe/sort.py +39 -18
  19. maxframe/codegen/spe/dataframe/tests/accessors/test_dict.py +9 -15
  20. maxframe/codegen/spe/dataframe/tests/accessors/test_list.py +4 -7
  21. maxframe/codegen/spe/dataframe/tests/accessors/test_struct.py +75 -0
  22. maxframe/codegen/spe/dataframe/tests/indexing/test_iloc.py +20 -1
  23. maxframe/codegen/spe/dataframe/tests/indexing/test_loc.py +35 -0
  24. maxframe/codegen/spe/dataframe/tests/misc/test_misc.py +0 -32
  25. maxframe/codegen/spe/dataframe/tests/test_groupby.py +81 -18
  26. maxframe/codegen/spe/dataframe/tests/test_merge.py +27 -1
  27. maxframe/codegen/spe/dataframe/tests/test_reduction.py +13 -0
  28. maxframe/codegen/spe/dataframe/tests/test_reshape.py +79 -0
  29. maxframe/codegen/spe/dataframe/tests/test_sort.py +20 -0
  30. maxframe/codegen/spe/dataframe/tseries.py +9 -0
  31. maxframe/codegen/spe/learn/contrib/lightgbm.py +4 -3
  32. maxframe/codegen/spe/learn/contrib/tests/test_xgboost.py +2 -1
  33. maxframe/codegen/spe/learn/metrics/__init__.py +1 -1
  34. maxframe/codegen/spe/learn/metrics/_ranking.py +76 -0
  35. maxframe/codegen/spe/learn/metrics/pairwise.py +51 -0
  36. maxframe/codegen/spe/learn/metrics/tests/test_pairwise.py +36 -0
  37. maxframe/codegen/spe/learn/metrics/tests/test_ranking.py +59 -0
  38. maxframe/codegen/spe/tensor/__init__.py +3 -0
  39. maxframe/codegen/spe/tensor/datasource.py +1 -0
  40. maxframe/codegen/spe/tensor/fft.py +74 -0
  41. maxframe/codegen/spe/tensor/linalg.py +29 -2
  42. maxframe/codegen/spe/tensor/misc.py +79 -25
  43. maxframe/codegen/spe/tensor/spatial.py +45 -0
  44. maxframe/codegen/spe/tensor/statistics.py +44 -0
  45. maxframe/codegen/spe/tensor/tests/test_fft.py +64 -0
  46. maxframe/codegen/spe/tensor/tests/test_linalg.py +15 -1
  47. maxframe/codegen/spe/tensor/tests/test_misc.py +52 -2
  48. maxframe/codegen/spe/tensor/tests/test_spatial.py +33 -0
  49. maxframe/codegen/spe/tensor/tests/test_statistics.py +15 -1
  50. maxframe/codegen/spe/tests/test_spe_codegen.py +6 -12
  51. maxframe/codegen/spe/utils.py +2 -0
  52. maxframe/config/config.py +73 -9
  53. maxframe/config/tests/test_validators.py +13 -1
  54. maxframe/config/validators.py +49 -0
  55. maxframe/conftest.py +54 -17
  56. maxframe/core/accessor.py +2 -2
  57. maxframe/core/base.py +2 -1
  58. maxframe/core/entity/core.py +5 -0
  59. maxframe/core/entity/tileables.py +3 -1
  60. maxframe/core/graph/core.cp37-win32.pyd +0 -0
  61. maxframe/core/graph/entity.py +8 -3
  62. maxframe/core/mode.py +6 -1
  63. maxframe/core/operator/base.py +9 -2
  64. maxframe/core/operator/core.py +10 -2
  65. maxframe/core/operator/utils.py +13 -0
  66. maxframe/dataframe/__init__.py +12 -5
  67. maxframe/dataframe/accessors/__init__.py +1 -1
  68. maxframe/dataframe/accessors/compat.py +45 -0
  69. maxframe/dataframe/accessors/datetime_/__init__.py +4 -1
  70. maxframe/dataframe/accessors/dict_/contains.py +7 -16
  71. maxframe/dataframe/accessors/dict_/core.py +48 -0
  72. maxframe/dataframe/accessors/dict_/getitem.py +17 -21
  73. maxframe/dataframe/accessors/dict_/length.py +7 -16
  74. maxframe/dataframe/accessors/dict_/remove.py +6 -18
  75. maxframe/dataframe/accessors/dict_/setitem.py +8 -18
  76. maxframe/dataframe/accessors/dict_/tests/test_dict_accessor.py +62 -22
  77. maxframe/dataframe/accessors/list_/__init__.py +2 -2
  78. maxframe/dataframe/accessors/list_/core.py +48 -0
  79. maxframe/dataframe/accessors/list_/getitem.py +12 -19
  80. maxframe/dataframe/accessors/list_/length.py +7 -16
  81. maxframe/dataframe/accessors/list_/tests/test_list_accessor.py +11 -9
  82. maxframe/dataframe/accessors/string_/__init__.py +4 -1
  83. maxframe/dataframe/accessors/struct_/__init__.py +37 -0
  84. maxframe/dataframe/accessors/struct_/accessor.py +39 -0
  85. maxframe/dataframe/accessors/struct_/core.py +43 -0
  86. maxframe/dataframe/accessors/struct_/dtypes.py +53 -0
  87. maxframe/dataframe/accessors/struct_/field.py +123 -0
  88. maxframe/dataframe/accessors/struct_/tests/__init__.py +13 -0
  89. maxframe/dataframe/accessors/struct_/tests/test_struct_accessor.py +91 -0
  90. maxframe/dataframe/arithmetic/__init__.py +18 -4
  91. maxframe/dataframe/arithmetic/between.py +106 -0
  92. maxframe/dataframe/arithmetic/dot.py +237 -0
  93. maxframe/dataframe/arithmetic/maximum.py +33 -0
  94. maxframe/dataframe/arithmetic/minimum.py +33 -0
  95. maxframe/dataframe/arithmetic/{around.py → round.py} +11 -7
  96. maxframe/dataframe/core.py +161 -224
  97. maxframe/dataframe/datasource/__init__.py +18 -0
  98. maxframe/dataframe/datasource/core.py +6 -0
  99. maxframe/dataframe/datasource/direct.py +57 -0
  100. maxframe/dataframe/datasource/from_dict.py +124 -0
  101. maxframe/dataframe/datasource/from_index.py +1 -1
  102. maxframe/dataframe/datasource/from_records.py +77 -0
  103. maxframe/dataframe/datasource/from_tensor.py +109 -41
  104. maxframe/dataframe/datasource/read_csv.py +21 -14
  105. maxframe/dataframe/datasource/read_odps_query.py +29 -6
  106. maxframe/dataframe/datasource/read_odps_table.py +32 -10
  107. maxframe/dataframe/datasource/read_parquet.py +38 -39
  108. maxframe/dataframe/datasource/tests/test_datasource.py +37 -0
  109. maxframe/dataframe/datastore/__init__.py +11 -1
  110. maxframe/dataframe/datastore/direct.py +268 -0
  111. maxframe/dataframe/datastore/to_csv.py +29 -41
  112. maxframe/dataframe/datastore/to_odps.py +36 -4
  113. maxframe/dataframe/extensions/__init__.py +20 -4
  114. maxframe/dataframe/extensions/apply_chunk.py +32 -6
  115. maxframe/dataframe/extensions/cartesian_chunk.py +153 -0
  116. maxframe/dataframe/extensions/collect_kv.py +126 -0
  117. maxframe/dataframe/extensions/extract_kv.py +177 -0
  118. maxframe/dataframe/extensions/flatjson.py +2 -1
  119. maxframe/dataframe/extensions/map_reduce.py +263 -0
  120. maxframe/dataframe/extensions/rebalance.py +62 -0
  121. maxframe/dataframe/extensions/tests/test_apply_chunk.py +9 -2
  122. maxframe/dataframe/extensions/tests/test_extensions.py +54 -0
  123. maxframe/dataframe/extensions/tests/test_map_reduce.py +135 -0
  124. maxframe/dataframe/groupby/__init__.py +17 -2
  125. maxframe/dataframe/groupby/aggregation.py +86 -49
  126. maxframe/dataframe/groupby/apply.py +1 -1
  127. maxframe/dataframe/groupby/apply_chunk.py +19 -5
  128. maxframe/dataframe/groupby/core.py +116 -16
  129. maxframe/dataframe/groupby/cum.py +4 -25
  130. maxframe/dataframe/groupby/expanding.py +264 -0
  131. maxframe/dataframe/groupby/fill.py +1 -1
  132. maxframe/dataframe/groupby/getitem.py +12 -5
  133. maxframe/dataframe/groupby/head.py +11 -1
  134. maxframe/dataframe/groupby/rank.py +136 -0
  135. maxframe/dataframe/groupby/rolling.py +206 -0
  136. maxframe/dataframe/groupby/shift.py +114 -0
  137. maxframe/dataframe/groupby/tests/test_groupby.py +0 -5
  138. maxframe/dataframe/indexing/__init__.py +22 -2
  139. maxframe/dataframe/indexing/droplevel.py +195 -0
  140. maxframe/dataframe/indexing/filter.py +169 -0
  141. maxframe/dataframe/indexing/get_level_values.py +76 -0
  142. maxframe/dataframe/indexing/iat.py +45 -0
  143. maxframe/dataframe/indexing/iloc.py +152 -12
  144. maxframe/dataframe/indexing/insert.py +46 -18
  145. maxframe/dataframe/indexing/loc.py +287 -7
  146. maxframe/dataframe/indexing/reindex.py +14 -5
  147. maxframe/dataframe/indexing/rename.py +6 -0
  148. maxframe/dataframe/indexing/rename_axis.py +2 -2
  149. maxframe/dataframe/indexing/reorder_levels.py +143 -0
  150. maxframe/dataframe/indexing/reset_index.py +33 -6
  151. maxframe/dataframe/indexing/sample.py +8 -0
  152. maxframe/dataframe/indexing/setitem.py +3 -3
  153. maxframe/dataframe/indexing/swaplevel.py +185 -0
  154. maxframe/dataframe/indexing/take.py +99 -0
  155. maxframe/dataframe/indexing/truncate.py +140 -0
  156. maxframe/dataframe/indexing/where.py +0 -11
  157. maxframe/dataframe/indexing/xs.py +148 -0
  158. maxframe/dataframe/merge/__init__.py +15 -1
  159. maxframe/dataframe/merge/append.py +97 -98
  160. maxframe/dataframe/merge/combine.py +244 -0
  161. maxframe/dataframe/merge/combine_first.py +120 -0
  162. maxframe/dataframe/merge/compare.py +387 -0
  163. maxframe/dataframe/merge/concat.py +183 -0
  164. maxframe/dataframe/merge/update.py +271 -0
  165. maxframe/dataframe/misc/__init__.py +28 -11
  166. maxframe/dataframe/misc/_duplicate.py +10 -4
  167. maxframe/dataframe/misc/apply.py +1 -1
  168. maxframe/dataframe/misc/check_unique.py +82 -0
  169. maxframe/dataframe/misc/clip.py +145 -0
  170. maxframe/dataframe/misc/describe.py +175 -9
  171. maxframe/dataframe/misc/drop.py +31 -0
  172. maxframe/dataframe/misc/drop_duplicates.py +2 -2
  173. maxframe/dataframe/misc/duplicated.py +2 -2
  174. maxframe/dataframe/misc/get_dummies.py +5 -1
  175. maxframe/dataframe/misc/infer_dtypes.py +251 -0
  176. maxframe/dataframe/misc/isin.py +2 -2
  177. maxframe/dataframe/misc/map.py +125 -18
  178. maxframe/dataframe/misc/repeat.py +159 -0
  179. maxframe/dataframe/misc/tests/test_misc.py +48 -3
  180. maxframe/dataframe/misc/to_numeric.py +3 -0
  181. maxframe/dataframe/misc/transform.py +12 -5
  182. maxframe/dataframe/misc/transpose.py +13 -1
  183. maxframe/dataframe/misc/valid_index.py +115 -0
  184. maxframe/dataframe/misc/value_counts.py +38 -4
  185. maxframe/dataframe/missing/checkna.py +14 -6
  186. maxframe/dataframe/missing/dropna.py +5 -0
  187. maxframe/dataframe/missing/fillna.py +1 -1
  188. maxframe/dataframe/missing/replace.py +7 -4
  189. maxframe/dataframe/reduction/__init__.py +35 -16
  190. maxframe/dataframe/reduction/aggregation.py +43 -14
  191. maxframe/dataframe/reduction/all.py +2 -2
  192. maxframe/dataframe/reduction/any.py +2 -2
  193. maxframe/dataframe/reduction/argmax.py +103 -0
  194. maxframe/dataframe/reduction/argmin.py +103 -0
  195. maxframe/dataframe/reduction/core.py +80 -24
  196. maxframe/dataframe/reduction/count.py +13 -9
  197. maxframe/dataframe/reduction/cov.py +166 -0
  198. maxframe/dataframe/reduction/cummax.py +2 -2
  199. maxframe/dataframe/reduction/cummin.py +2 -2
  200. maxframe/dataframe/reduction/cumprod.py +2 -2
  201. maxframe/dataframe/reduction/cumsum.py +2 -2
  202. maxframe/dataframe/reduction/custom_reduction.py +2 -2
  203. maxframe/dataframe/reduction/idxmax.py +185 -0
  204. maxframe/dataframe/reduction/idxmin.py +185 -0
  205. maxframe/dataframe/reduction/kurtosis.py +37 -30
  206. maxframe/dataframe/reduction/max.py +2 -2
  207. maxframe/dataframe/reduction/mean.py +9 -7
  208. maxframe/dataframe/reduction/median.py +2 -2
  209. maxframe/dataframe/reduction/min.py +2 -2
  210. maxframe/dataframe/reduction/mode.py +144 -0
  211. maxframe/dataframe/reduction/nunique.py +19 -11
  212. maxframe/dataframe/reduction/prod.py +18 -13
  213. maxframe/dataframe/reduction/reduction_size.py +2 -2
  214. maxframe/dataframe/reduction/sem.py +13 -9
  215. maxframe/dataframe/reduction/skew.py +31 -27
  216. maxframe/dataframe/reduction/str_concat.py +10 -7
  217. maxframe/dataframe/reduction/sum.py +18 -14
  218. maxframe/dataframe/reduction/tests/test_reduction.py +12 -0
  219. maxframe/dataframe/reduction/unique.py +20 -3
  220. maxframe/dataframe/reduction/var.py +16 -12
  221. maxframe/dataframe/reshape/__init__.py +38 -0
  222. maxframe/dataframe/{misc → reshape}/pivot.py +1 -0
  223. maxframe/dataframe/{misc → reshape}/pivot_table.py +1 -0
  224. maxframe/dataframe/reshape/unstack.py +114 -0
  225. maxframe/dataframe/sort/__init__.py +16 -1
  226. maxframe/dataframe/sort/argsort.py +68 -0
  227. maxframe/dataframe/sort/core.py +2 -1
  228. maxframe/dataframe/sort/nlargest.py +238 -0
  229. maxframe/dataframe/sort/nsmallest.py +228 -0
  230. maxframe/dataframe/sort/rank.py +147 -0
  231. maxframe/dataframe/statistics/__init__.py +3 -3
  232. maxframe/dataframe/statistics/corr.py +1 -0
  233. maxframe/dataframe/statistics/quantile.py +2 -2
  234. maxframe/dataframe/tests/test_typing.py +104 -0
  235. maxframe/dataframe/tests/test_utils.py +66 -2
  236. maxframe/dataframe/tseries/__init__.py +19 -0
  237. maxframe/dataframe/tseries/at_time.py +61 -0
  238. maxframe/dataframe/tseries/between_time.py +122 -0
  239. maxframe/dataframe/typing_.py +185 -0
  240. maxframe/dataframe/utils.py +125 -52
  241. maxframe/dataframe/window/aggregation.py +8 -4
  242. maxframe/dataframe/window/core.py +14 -1
  243. maxframe/dataframe/window/ewm.py +1 -3
  244. maxframe/dataframe/window/expanding.py +37 -35
  245. maxframe/dataframe/window/rolling.py +49 -39
  246. maxframe/dataframe/window/tests/test_expanding.py +1 -7
  247. maxframe/dataframe/window/tests/test_rolling.py +1 -1
  248. maxframe/env.py +7 -4
  249. maxframe/errors.py +2 -2
  250. maxframe/io/odpsio/schema.py +9 -3
  251. maxframe/io/odpsio/tableio.py +7 -2
  252. maxframe/io/odpsio/tests/test_schema.py +198 -83
  253. maxframe/learn/__init__.py +10 -2
  254. maxframe/learn/cluster/__init__.py +15 -0
  255. maxframe/learn/cluster/_kmeans.py +782 -0
  256. maxframe/learn/contrib/llm/core.py +18 -7
  257. maxframe/learn/contrib/llm/deploy/__init__.py +13 -0
  258. maxframe/learn/contrib/llm/deploy/config.py +221 -0
  259. maxframe/learn/contrib/llm/deploy/core.py +247 -0
  260. maxframe/learn/contrib/llm/deploy/framework.py +35 -0
  261. maxframe/learn/contrib/llm/deploy/loader.py +360 -0
  262. maxframe/learn/contrib/llm/deploy/tests/__init__.py +13 -0
  263. maxframe/learn/contrib/llm/deploy/tests/test_register_models.py +359 -0
  264. maxframe/learn/contrib/llm/models/__init__.py +1 -0
  265. maxframe/learn/contrib/llm/models/dashscope.py +12 -6
  266. maxframe/learn/contrib/llm/models/managed.py +76 -11
  267. maxframe/learn/contrib/llm/models/openai.py +72 -0
  268. maxframe/learn/contrib/llm/tests/__init__.py +13 -0
  269. maxframe/learn/contrib/llm/tests/test_core.py +34 -0
  270. maxframe/learn/contrib/llm/tests/test_openai.py +187 -0
  271. maxframe/learn/contrib/llm/tests/test_text_gen.py +155 -0
  272. maxframe/learn/contrib/llm/text.py +348 -42
  273. maxframe/learn/contrib/models.py +4 -1
  274. maxframe/learn/contrib/xgboost/classifier.py +2 -0
  275. maxframe/learn/contrib/xgboost/core.py +113 -4
  276. maxframe/learn/contrib/xgboost/predict.py +4 -2
  277. maxframe/learn/contrib/xgboost/regressor.py +5 -0
  278. maxframe/learn/contrib/xgboost/train.py +7 -2
  279. maxframe/learn/core.py +66 -0
  280. maxframe/learn/linear_model/_base.py +58 -1
  281. maxframe/learn/linear_model/_lin_reg.py +1 -1
  282. maxframe/learn/metrics/__init__.py +6 -0
  283. maxframe/learn/metrics/_classification.py +145 -0
  284. maxframe/learn/metrics/_ranking.py +477 -0
  285. maxframe/learn/metrics/_scorer.py +60 -0
  286. maxframe/learn/metrics/pairwise/__init__.py +21 -0
  287. maxframe/learn/metrics/pairwise/core.py +77 -0
  288. maxframe/learn/metrics/pairwise/cosine.py +115 -0
  289. maxframe/learn/metrics/pairwise/euclidean.py +176 -0
  290. maxframe/learn/metrics/pairwise/haversine.py +96 -0
  291. maxframe/learn/metrics/pairwise/manhattan.py +80 -0
  292. maxframe/learn/metrics/pairwise/pairwise.py +127 -0
  293. maxframe/learn/metrics/pairwise/pairwise_distances_topk.py +121 -0
  294. maxframe/learn/metrics/pairwise/rbf_kernel.py +51 -0
  295. maxframe/learn/metrics/tests/__init__.py +13 -0
  296. maxframe/learn/metrics/tests/test_scorer.py +26 -0
  297. maxframe/learn/preprocessing/_data/min_max_scaler.py +34 -23
  298. maxframe/learn/preprocessing/_data/standard_scaler.py +34 -25
  299. maxframe/learn/utils/__init__.py +2 -1
  300. maxframe/learn/utils/checks.py +1 -2
  301. maxframe/learn/utils/core.py +59 -0
  302. maxframe/learn/utils/extmath.py +79 -9
  303. maxframe/learn/utils/odpsio.py +262 -0
  304. maxframe/learn/utils/validation.py +2 -2
  305. maxframe/lib/compat.py +40 -0
  306. maxframe/lib/dtypes_extension/__init__.py +16 -1
  307. maxframe/lib/dtypes_extension/_fake_arrow_dtype.py +604 -0
  308. maxframe/lib/dtypes_extension/blob.py +304 -0
  309. maxframe/lib/dtypes_extension/dtypes.py +40 -0
  310. maxframe/lib/dtypes_extension/tests/test_blob.py +88 -0
  311. maxframe/lib/dtypes_extension/tests/test_dtypes.py +16 -1
  312. maxframe/lib/dtypes_extension/tests/test_fake_arrow_dtype.py +75 -0
  313. maxframe/lib/filesystem/_oss_lib/common.py +124 -50
  314. maxframe/lib/filesystem/_oss_lib/glob.py +1 -1
  315. maxframe/lib/filesystem/_oss_lib/handle.py +21 -25
  316. maxframe/lib/filesystem/base.py +1 -1
  317. maxframe/lib/filesystem/core.py +1 -1
  318. maxframe/lib/filesystem/oss.py +115 -46
  319. maxframe/lib/filesystem/tests/test_oss.py +74 -36
  320. maxframe/lib/mmh3.cp37-win32.pyd +0 -0
  321. maxframe/lib/wrapped_pickle.py +10 -0
  322. maxframe/opcodes.py +41 -15
  323. maxframe/protocol.py +12 -0
  324. maxframe/remote/core.py +4 -0
  325. maxframe/serialization/__init__.py +11 -2
  326. maxframe/serialization/arrow.py +38 -13
  327. maxframe/serialization/blob.py +32 -0
  328. maxframe/serialization/core.cp37-win32.pyd +0 -0
  329. maxframe/serialization/core.pyx +39 -1
  330. maxframe/serialization/exception.py +2 -4
  331. maxframe/serialization/numpy.py +11 -0
  332. maxframe/serialization/pandas.py +46 -9
  333. maxframe/serialization/serializables/core.py +2 -2
  334. maxframe/serialization/tests/test_serial.py +31 -4
  335. maxframe/tensor/__init__.py +38 -8
  336. maxframe/tensor/arithmetic/__init__.py +19 -10
  337. maxframe/tensor/arithmetic/core.py +2 -2
  338. maxframe/tensor/arithmetic/iscomplexobj.py +53 -0
  339. maxframe/tensor/arithmetic/tests/test_arithmetic.py +6 -9
  340. maxframe/tensor/core.py +6 -2
  341. maxframe/tensor/datasource/tests/test_datasource.py +2 -1
  342. maxframe/tensor/extensions/__init__.py +2 -0
  343. maxframe/tensor/extensions/apply_chunk.py +3 -3
  344. maxframe/tensor/extensions/rebalance.py +65 -0
  345. maxframe/tensor/fft/__init__.py +32 -0
  346. maxframe/tensor/fft/core.py +168 -0
  347. maxframe/tensor/fft/fft.py +112 -0
  348. maxframe/tensor/fft/fft2.py +118 -0
  349. maxframe/tensor/fft/fftfreq.py +80 -0
  350. maxframe/tensor/fft/fftn.py +123 -0
  351. maxframe/tensor/fft/fftshift.py +79 -0
  352. maxframe/tensor/fft/hfft.py +112 -0
  353. maxframe/tensor/fft/ifft.py +114 -0
  354. maxframe/tensor/fft/ifft2.py +115 -0
  355. maxframe/tensor/fft/ifftn.py +123 -0
  356. maxframe/tensor/fft/ifftshift.py +73 -0
  357. maxframe/tensor/fft/ihfft.py +93 -0
  358. maxframe/tensor/fft/irfft.py +118 -0
  359. maxframe/tensor/fft/irfft2.py +62 -0
  360. maxframe/tensor/fft/irfftn.py +114 -0
  361. maxframe/tensor/fft/rfft.py +116 -0
  362. maxframe/tensor/fft/rfft2.py +63 -0
  363. maxframe/tensor/fft/rfftfreq.py +87 -0
  364. maxframe/tensor/fft/rfftn.py +113 -0
  365. maxframe/tensor/indexing/fill_diagonal.py +1 -7
  366. maxframe/tensor/linalg/__init__.py +7 -0
  367. maxframe/tensor/linalg/_einsumfunc.py +1025 -0
  368. maxframe/tensor/linalg/cholesky.py +117 -0
  369. maxframe/tensor/linalg/einsum.py +339 -0
  370. maxframe/tensor/linalg/lstsq.py +100 -0
  371. maxframe/tensor/linalg/matrix_norm.py +75 -0
  372. maxframe/tensor/linalg/norm.py +249 -0
  373. maxframe/tensor/linalg/solve.py +72 -0
  374. maxframe/tensor/linalg/solve_triangular.py +2 -2
  375. maxframe/tensor/linalg/vector_norm.py +113 -0
  376. maxframe/tensor/misc/__init__.py +24 -1
  377. maxframe/tensor/misc/argwhere.py +72 -0
  378. maxframe/tensor/misc/array_split.py +46 -0
  379. maxframe/tensor/misc/broadcast_arrays.py +57 -0
  380. maxframe/tensor/misc/copyto.py +130 -0
  381. maxframe/tensor/misc/delete.py +104 -0
  382. maxframe/tensor/misc/dsplit.py +68 -0
  383. maxframe/tensor/misc/ediff1d.py +74 -0
  384. maxframe/tensor/misc/expand_dims.py +85 -0
  385. maxframe/tensor/misc/flip.py +90 -0
  386. maxframe/tensor/misc/fliplr.py +64 -0
  387. maxframe/tensor/misc/flipud.py +68 -0
  388. maxframe/tensor/misc/hsplit.py +85 -0
  389. maxframe/tensor/misc/insert.py +139 -0
  390. maxframe/tensor/misc/moveaxis.py +83 -0
  391. maxframe/tensor/misc/result_type.py +88 -0
  392. maxframe/tensor/misc/roll.py +124 -0
  393. maxframe/tensor/misc/rollaxis.py +77 -0
  394. maxframe/tensor/misc/shape.py +89 -0
  395. maxframe/tensor/misc/split.py +190 -0
  396. maxframe/tensor/misc/tile.py +109 -0
  397. maxframe/tensor/misc/vsplit.py +74 -0
  398. maxframe/tensor/reduction/array_equal.py +2 -1
  399. maxframe/tensor/sort/__init__.py +2 -0
  400. maxframe/tensor/sort/argpartition.py +98 -0
  401. maxframe/tensor/sort/partition.py +228 -0
  402. maxframe/tensor/spatial/__init__.py +15 -0
  403. maxframe/tensor/spatial/distance/__init__.py +17 -0
  404. maxframe/tensor/spatial/distance/cdist.py +421 -0
  405. maxframe/tensor/spatial/distance/pdist.py +398 -0
  406. maxframe/tensor/spatial/distance/squareform.py +153 -0
  407. maxframe/tensor/special/__init__.py +159 -21
  408. maxframe/tensor/special/airy.py +55 -0
  409. maxframe/tensor/special/bessel.py +199 -0
  410. maxframe/tensor/special/core.py +65 -4
  411. maxframe/tensor/special/ellip_func_integrals.py +155 -0
  412. maxframe/tensor/special/ellip_harm.py +55 -0
  413. maxframe/tensor/special/err_fresnel.py +223 -0
  414. maxframe/tensor/special/gamma_funcs.py +303 -0
  415. maxframe/tensor/special/hypergeometric_funcs.py +69 -0
  416. maxframe/tensor/special/info_theory.py +189 -0
  417. maxframe/tensor/special/misc.py +21 -0
  418. maxframe/tensor/statistics/__init__.py +6 -0
  419. maxframe/tensor/statistics/corrcoef.py +77 -0
  420. maxframe/tensor/statistics/cov.py +222 -0
  421. maxframe/tensor/statistics/digitize.py +126 -0
  422. maxframe/tensor/statistics/histogram.py +520 -0
  423. maxframe/tensor/statistics/median.py +85 -0
  424. maxframe/tensor/statistics/ptp.py +89 -0
  425. maxframe/tensor/utils.py +3 -3
  426. maxframe/tests/test_udf.py +61 -0
  427. maxframe/tests/test_utils.py +51 -6
  428. maxframe/tests/utils.py +0 -2
  429. maxframe/typing_.py +2 -0
  430. maxframe/udf.py +130 -9
  431. maxframe/utils.py +254 -27
  432. {maxframe-2.0.0b2.dist-info → maxframe-2.3.0rc1.dist-info}/METADATA +3 -3
  433. {maxframe-2.0.0b2.dist-info → maxframe-2.3.0rc1.dist-info}/RECORD +442 -264
  434. maxframe_client/fetcher.py +35 -4
  435. maxframe_client/session/odps.py +7 -2
  436. maxframe_client/session/task.py +8 -1
  437. maxframe_client/tests/test_fetcher.py +76 -3
  438. maxframe_client/tests/test_session.py +28 -1
  439. maxframe/dataframe/arrays.py +0 -864
  440. /maxframe/dataframe/{misc → reshape}/melt.py +0 -0
  441. /maxframe/dataframe/{misc → reshape}/stack.py +0 -0
  442. {maxframe-2.0.0b2.dist-info → maxframe-2.3.0rc1.dist-info}/WHEEL +0 -0
  443. {maxframe-2.0.0b2.dist-info → maxframe-2.3.0rc1.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,121 @@
1
+ # Copyright 1999-2025 Alibaba Group Holding Ltd.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ from typing import List
16
+
17
+ import numpy as np
18
+
19
+ from .... import opcodes
20
+ from ....config import options
21
+ from ....core import EntityData
22
+ from ....core.operator import OperatorStage
23
+ from ....serialization.serializables import (
24
+ AnyField,
25
+ BoolField,
26
+ DictField,
27
+ Int64Field,
28
+ KeyField,
29
+ )
30
+ from ....tensor.core import TensorOrder
31
+ from .core import PairwiseDistances
32
+
33
+
34
+ class PairwiseDistancesTopk(PairwiseDistances):
35
+ _op_type_ = opcodes.PAIRWISE_DISTANCES_TOPK
36
+
37
+ x = KeyField("x")
38
+ y = KeyField("y")
39
+ k = Int64Field("k", default=None)
40
+ metric = AnyField("metric", default=None)
41
+ metric_kwargs = DictField("metric_kwargs", default=None)
42
+ return_index = BoolField("return_index", default=None)
43
+ working_memory = AnyField("working_memory", default=None)
44
+
45
+ @property
46
+ def output_limit(self):
47
+ return 2 if self.return_index else 1
48
+
49
+ @classmethod
50
+ def _set_inputs(cls, op: "PairwiseDistancesTopk", inputs: List[EntityData]):
51
+ super()._set_inputs(op, inputs)
52
+ if op.stage != OperatorStage.agg:
53
+ op.x, op.y = inputs[:2]
54
+ else:
55
+ op.x = op.y = None
56
+
57
+ def __call__(self, X, Y):
58
+ from .pairwise import pairwise_distances
59
+
60
+ # leverage pairwise_distances for checks
61
+ d = pairwise_distances(X, Y, metric=self.metric, **self.metric_kwargs)
62
+
63
+ if self.k > Y.shape[0]:
64
+ self.k = Y.shape[0]
65
+
66
+ X, Y = d.op.inputs
67
+
68
+ shape_list = [X.shape[0]]
69
+ shape_list.append(min(Y.shape[0], self.k))
70
+ shape = tuple(shape_list)
71
+ kws = [
72
+ {
73
+ "shape": shape,
74
+ "order": TensorOrder.C_ORDER,
75
+ "dtype": np.dtype(np.float64),
76
+ "_type_": "distance",
77
+ },
78
+ ]
79
+ if self.return_index:
80
+ kws.append(
81
+ {
82
+ "shape": shape,
83
+ "order": TensorOrder.C_ORDER,
84
+ "dtype": np.dtype(np.int64),
85
+ "_type_": "index",
86
+ }
87
+ )
88
+ return self.new_tensors([X, Y], kws=kws)
89
+ else:
90
+ return self.new_tensors([X, Y], kws=kws)[0]
91
+
92
+
93
+ def pairwise_distances_topk(
94
+ X,
95
+ Y=None,
96
+ k=None,
97
+ metric="euclidean",
98
+ return_index=True,
99
+ axis=1,
100
+ working_memory=None,
101
+ **kwds
102
+ ):
103
+ if k is None: # pragma: no cover
104
+ raise ValueError("`k` has to be specified")
105
+
106
+ if Y is None:
107
+ Y = X
108
+ if axis == 0:
109
+ X, Y = Y, X
110
+ if working_memory is None:
111
+ working_memory = options.learn.working_memory
112
+ op = PairwiseDistancesTopk(
113
+ x=X,
114
+ y=Y,
115
+ k=k,
116
+ metric=metric,
117
+ metric_kwargs=kwds,
118
+ return_index=return_index,
119
+ working_memory=working_memory,
120
+ )
121
+ return op(X, Y)
@@ -0,0 +1,51 @@
1
+ # Copyright 1999-2025 Alibaba Group Holding Ltd.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ from .... import tensor as mt
16
+ from .core import PairwiseDistances
17
+ from .euclidean import euclidean_distances
18
+
19
+
20
+ def rbf_kernel(X, Y=None, gamma=None):
21
+ """
22
+ Compute the rbf (gaussian) kernel between X and Y::
23
+
24
+ K(x, y) = exp(-gamma ||x-y||^2)
25
+
26
+ for each pair of rows x in X and y in Y.
27
+
28
+ Read more in the :ref:`User Guide <rbf_kernel>`.
29
+
30
+ Parameters
31
+ ----------
32
+ X : tensor of shape (n_samples_X, n_features)
33
+
34
+ Y : tensor of shape (n_samples_Y, n_features)
35
+
36
+ gamma : float, default None
37
+ If None, defaults to 1.0 / n_features
38
+
39
+ Returns
40
+ -------
41
+ kernel_matrix : tensor of shape (n_samples_X, n_samples_Y)
42
+ """
43
+
44
+ X, Y = PairwiseDistances.check_pairwise_arrays(X, Y)
45
+ if gamma is None:
46
+ gamma = 1.0 / X.shape[1]
47
+
48
+ K = euclidean_distances(X, Y, squared=True)
49
+ K *= -gamma
50
+ K = mt.exp(K)
51
+ return K
@@ -0,0 +1,13 @@
1
+ # Copyright 1999-2025 Alibaba Group Holding Ltd.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
@@ -0,0 +1,26 @@
1
+ # Copyright 1999-2025 Alibaba Group Holding Ltd.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ import pytest
16
+ from sklearn.metrics import r2_score
17
+
18
+ from .. import get_scorer
19
+
20
+
21
+ def test_get_scorer():
22
+ with pytest.raises(ValueError):
23
+ get_scorer("unknown")
24
+
25
+ assert get_scorer("r2") is not None
26
+ assert get_scorer(r2_score) is not None
@@ -106,10 +106,11 @@ class MinMaxScaler(TransformerMixin, BaseEstimator):
106
106
  <sphx_glr_auto_examples_preprocessing_plot_all_scaling.py>`.
107
107
  """
108
108
 
109
- def __init__(self, feature_range=(0, 1), copy=True, clip=False):
109
+ def __init__(self, feature_range=(0, 1), copy=True, clip=False, validate=True):
110
110
  self.feature_range = feature_range
111
111
  self.copy = copy
112
112
  self.clip = clip
113
+ self.validate = validate
113
114
 
114
115
  def _reset(self): # pragma: no cover
115
116
  """Reset internal data-dependent state of the scaler, if necessary.
@@ -186,13 +187,14 @@ class MinMaxScaler(TransformerMixin, BaseEstimator):
186
187
  )
187
188
 
188
189
  first_pass = not hasattr(self, "n_samples_seen_")
189
- X = self._validate_data(
190
- X,
191
- reset=first_pass,
192
- estimator=self,
193
- dtype=FLOAT_DTYPES,
194
- force_all_finite="allow-nan",
195
- )
190
+ if self.validate:
191
+ X = self._validate_data(
192
+ X,
193
+ reset=first_pass,
194
+ estimator=self,
195
+ dtype=FLOAT_DTYPES,
196
+ force_all_finite="allow-nan",
197
+ )
196
198
 
197
199
  if isinstance(X, (DATAFRAME_TYPE, SERIES_TYPE, INDEX_TYPE)):
198
200
  data_min = X.min(axis=0)
@@ -239,13 +241,14 @@ class MinMaxScaler(TransformerMixin, BaseEstimator):
239
241
  """
240
242
  check_is_fitted(self)
241
243
 
242
- X = self._validate_data(
243
- X,
244
- copy=self.copy,
245
- dtype=FLOAT_DTYPES,
246
- force_all_finite="allow-nan",
247
- reset=False,
248
- )
244
+ if self.validate:
245
+ X = self._validate_data(
246
+ X,
247
+ copy=self.copy,
248
+ dtype=FLOAT_DTYPES,
249
+ force_all_finite="allow-nan",
250
+ reset=False,
251
+ )
249
252
 
250
253
  X *= self.scale_
251
254
  X += self.min_
@@ -290,6 +293,7 @@ def minmax_scale(
290
293
  *,
291
294
  axis=0,
292
295
  copy=True,
296
+ validate=True,
293
297
  execute=False,
294
298
  session=None,
295
299
  run_kwargs=None
@@ -368,21 +372,28 @@ def minmax_scale(
368
372
  """ # noqa
369
373
  # Unlike the scaler object, this function allows 1d input.
370
374
  # If copy is required, it will be done inside the scaler object.
371
- X = check_array(
372
- X, copy=False, ensure_2d=False, dtype=FLOAT_DTYPES, force_all_finite="allow-nan"
373
- )
374
- original_ndim = X.ndim
375
+ if validate:
376
+ X = check_array(
377
+ X,
378
+ copy=False,
379
+ ensure_2d=False,
380
+ dtype=FLOAT_DTYPES,
381
+ force_all_finite="allow-nan",
382
+ )
383
+ original_ndim = X.ndim
375
384
 
376
- if original_ndim == 1:
377
- X = X.reshape(X.shape[0], 1)
385
+ if original_ndim == 1:
386
+ X = X.reshape(X.shape[0], 1)
387
+ else:
388
+ original_ndim = X.ndim
378
389
 
379
- s = MinMaxScaler(feature_range=feature_range, copy=copy)
390
+ s = MinMaxScaler(feature_range=feature_range, copy=copy, validate=validate)
380
391
  if axis == 0:
381
392
  X = s.fit_transform(X)
382
393
  else:
383
394
  X = s.fit_transform(X.T).T
384
395
 
385
- if original_ndim == 1:
396
+ if validate and original_ndim == 1:
386
397
  X = X.ravel()
387
398
 
388
399
  if not execute:
@@ -156,10 +156,11 @@ class StandardScaler(TransformerMixin, BaseEstimator):
156
156
  [[3. 3.]]
157
157
  """
158
158
 
159
- def __init__(self, *, copy=True, with_mean=True, with_std=True):
159
+ def __init__(self, *, copy=True, with_mean=True, with_std=True, validate=True):
160
160
  self.with_mean = with_mean
161
161
  self.with_std = with_std
162
162
  self.copy = copy
163
+ self.validate = validate
163
164
 
164
165
  def _reset(self):
165
166
  """Reset internal data-dependent state of the scaler, if necessary.
@@ -246,14 +247,15 @@ class StandardScaler(TransformerMixin, BaseEstimator):
246
247
  Fitted scaler.
247
248
  """
248
249
  first_call = not hasattr(self, "n_samples_seen_")
249
- X = self._validate_data(
250
- X,
251
- accept_sparse=("csr", "csc"),
252
- dtype=FLOAT_DTYPES,
253
- force_all_finite="allow-nan",
254
- reset=first_call,
255
- )
256
- n_features = X.shape[1]
250
+ if self.validate:
251
+ X = self._validate_data(
252
+ X,
253
+ accept_sparse=("csr", "csc"),
254
+ dtype=FLOAT_DTYPES,
255
+ force_all_finite="allow-nan",
256
+ reset=first_call,
257
+ )
258
+ n_features = X.shape[1] if X.ndim == 2 else 1
257
259
 
258
260
  if sample_weight is not None:
259
261
  sample_weight = _check_sample_weight(sample_weight, X, dtype=X.dtype)
@@ -267,7 +269,9 @@ class StandardScaler(TransformerMixin, BaseEstimator):
267
269
  # incr_mean_variance_axis and _incremental_variance_axis
268
270
  dtype = np.int64 if sample_weight is None else X.dtype
269
271
  if not hasattr(self, "n_samples_seen_"):
270
- self.n_samples_seen_ = mt.zeros(n_features, dtype=dtype)
272
+ self.n_samples_seen_ = (
273
+ mt.zeros(n_features, dtype=dtype) if X.ndim == 2 else 0
274
+ )
271
275
  # elif np.size(self.n_samples_seen_) == 1:
272
276
  # self.n_samples_seen_ = np.repeat(self.n_samples_seen_, X.shape[1])
273
277
  # self.n_samples_seen_ = self.n_samples_seen_.astype(dtype, copy=False)
@@ -309,9 +313,11 @@ class StandardScaler(TransformerMixin, BaseEstimator):
309
313
  constant_mask = _is_constant_feature(
310
314
  self.var_, self.mean_, self.n_samples_seen_
311
315
  )
312
- self.scale_ = _handle_zeros_in_scale(
313
- mt.sqrt(self.var_), copy=False, constant_mask=constant_mask
314
- )
316
+ self.scale_ = mt.sqrt(self.var_)
317
+ if self.validate:
318
+ self.scale_ = _handle_zeros_in_scale(
319
+ self.scale_, copy=False, constant_mask=constant_mask
320
+ )
315
321
  else:
316
322
  self.scale_ = None
317
323
 
@@ -337,14 +343,15 @@ class StandardScaler(TransformerMixin, BaseEstimator):
337
343
  check_is_fitted(self)
338
344
 
339
345
  copy = copy if copy is not None else self.copy
340
- X = self._validate_data(
341
- X,
342
- reset=False,
343
- accept_sparse="csr",
344
- copy=copy,
345
- dtype=FLOAT_DTYPES,
346
- force_all_finite="allow-nan",
347
- )
346
+ if self.validate:
347
+ X = self._validate_data(
348
+ X,
349
+ reset=False,
350
+ accept_sparse="csr",
351
+ copy=copy,
352
+ dtype=FLOAT_DTYPES,
353
+ force_all_finite="allow-nan",
354
+ )
348
355
 
349
356
  if sparse.issparse(X):
350
357
  raise NotImplementedError("Scaling on sparse tensors is not supported")
@@ -397,7 +404,7 @@ class StandardScaler(TransformerMixin, BaseEstimator):
397
404
  return X
398
405
 
399
406
 
400
- def scale(X, *, axis=0, with_mean=True, with_std=True, copy=True):
407
+ def scale(X, *, axis=0, with_mean=True, with_std=True, copy=True, validate=True):
401
408
  """Standardize a dataset along any axis.
402
409
 
403
410
  Center to the mean and component wise scale to unit variance.
@@ -488,16 +495,18 @@ def scale(X, *, axis=0, with_mean=True, with_std=True, copy=True):
488
495
  X = mt.tensor(X)
489
496
 
490
497
  ndim = X.ndim
491
- if ndim == 1:
498
+ if validate and ndim == 1:
492
499
  X = X.reshape((X.shape[0], 1))
493
500
  if axis == 1:
494
501
  X = X.T
495
502
 
496
- scaler = StandardScaler(with_mean=with_mean, with_std=with_std, copy=copy)
503
+ scaler = StandardScaler(
504
+ with_mean=with_mean, with_std=with_std, copy=copy, validate=validate
505
+ )
497
506
  transformed = scaler.fit_transform(X)
498
507
 
499
508
  if axis == 1:
500
509
  transformed = transformed.T
501
- if ndim == 1:
510
+ if validate and ndim == 1:
502
511
  transformed = transformed.reshape(transformed.shape[0])
503
512
  return transformed
@@ -14,6 +14,7 @@
14
14
 
15
15
  from .core import convert_to_tensor_or_dataframe
16
16
  from .multiclass import check_classification_targets
17
+ from .odpsio import read_odps_model
17
18
  from .shuffle import shuffle
18
19
  from .sparsefuncs import count_nonzero
19
- from .validation import check_consistent_length
20
+ from .validation import check_array, check_consistent_length
@@ -20,7 +20,7 @@ from ... import opcodes
20
20
  from ... import tensor as mt
21
21
  from ...config import options
22
22
  from ...core import ENTITY_TYPE, EntityData, OutputType, get_output_types
23
- from ...core.operator import Operator, OperatorStage
23
+ from ...core.operator import Operator
24
24
  from ...serialization.serializables import (
25
25
  BoolField,
26
26
  DataTypeField,
@@ -56,7 +56,6 @@ class CheckBase(Operator, LearnOperatorMixin):
56
56
  # output input if value not specified
57
57
  self.value = value = value if value is not None else x
58
58
  self.output_types = get_output_types(value)
59
- self.stage = OperatorStage.agg
60
59
  return self.new_tileable([x, value], kws=[value.params])
61
60
 
62
61
 
@@ -14,9 +14,19 @@
14
14
 
15
15
  import math
16
16
  import numbers
17
+ import warnings
17
18
 
19
+ import numpy as np
18
20
  import pandas as pd
19
21
 
22
+ from ...utils import parse_readable_size
23
+
24
+ try:
25
+ from sklearn import get_config as sklearn_get_config
26
+ except ImportError:
27
+ sklearn_get_config = None
28
+
29
+ from ...config import options
20
30
  from ...dataframe import DataFrame, Series
21
31
  from ...dataframe.core import DATAFRAME_TYPE, SERIES_TYPE
22
32
  from ...tensor import tensor as astensor
@@ -60,3 +70,52 @@ def is_scalar_nan(x):
60
70
  False
61
71
  """
62
72
  return isinstance(x, numbers.Real) and math.isnan(x)
73
+
74
+
75
+ def get_chunk_n_rows(row_bytes, max_n_rows=None, working_memory=None):
76
+ """Calculates how many rows can be processed within working_memory
77
+
78
+ Parameters
79
+ ----------
80
+ row_bytes : int
81
+ The expected number of bytes of memory that will be consumed
82
+ during the processing of each row.
83
+ max_n_rows : int, optional
84
+ The maximum return value.
85
+ working_memory : int or float, optional
86
+ The number of rows to fit inside this number of MiB will be returned.
87
+ When None (default), the value of
88
+ ``sklearn.get_config()['working_memory']`` is used.
89
+
90
+ Returns
91
+ -------
92
+ int or the value of n_samples
93
+
94
+ Warns
95
+ -----
96
+ Issues a UserWarning if ``row_bytes`` exceeds ``working_memory`` MiB.
97
+ """
98
+
99
+ if working_memory is None: # pragma: no cover
100
+ working_memory = options.learn.working_memory
101
+ if working_memory is None and sklearn_get_config is not None:
102
+ working_memory = sklearn_get_config()["working_memory"]
103
+ elif working_memory is None:
104
+ working_memory = 1024
105
+
106
+ if isinstance(working_memory, int):
107
+ working_memory *= 2**20
108
+ else:
109
+ working_memory = parse_readable_size(working_memory)[0]
110
+
111
+ chunk_n_rows = int(working_memory // row_bytes)
112
+ if max_n_rows is not None:
113
+ chunk_n_rows = min(chunk_n_rows, max_n_rows)
114
+ if chunk_n_rows < 1: # pragma: no cover
115
+ warnings.warn(
116
+ "Could not adhere to working_memory config. "
117
+ "Currently %.0fMiB, %.0fMiB required."
118
+ % (working_memory, np.ceil(row_bytes * 2**-20))
119
+ )
120
+ chunk_n_rows = 1
121
+ return chunk_n_rows
@@ -15,6 +15,9 @@
15
15
  import numpy as np
16
16
 
17
17
  from ... import tensor as mt
18
+ from ...core import ENTITY_TYPE
19
+ from ...dataframe.core import DATAFRAME_TYPE, SERIES_TYPE
20
+ from ...tensor.datasource import TensorZeros
18
21
 
19
22
 
20
23
  # Use at least float64 for the accumulating functions to avoid precision issue
@@ -42,13 +45,30 @@ def _safe_accumulator_op(op, x, *args, **kwargs):
42
45
  -------
43
46
  result : The output of the accumulator function passed to this function
44
47
  """
45
- if np.issubdtype(x.dtype, np.floating) and x.dtype.itemsize < 8:
48
+ if (
49
+ hasattr(x, "dtype")
50
+ and np.issubdtype(x.dtype, np.floating)
51
+ and x.dtype.itemsize < 8
52
+ ):
46
53
  result = op(x, *args, **kwargs, dtype=np.float64)
47
54
  else:
48
55
  result = op(x, *args, **kwargs)
49
56
  return result
50
57
 
51
58
 
59
+ def logsumexp_real(a, axis=None, keepdims=False):
60
+ """Simplified logsumexp for real arrays without biases"""
61
+ from ... import tensor as mt
62
+
63
+ x = mt.tensor(a)
64
+ x_max = mt.amax(a, axis=axis, keepdims=True)
65
+ exp_x_shifted = mt.exp(x - x_max)
66
+ ret = mt.log(mt.sum(exp_x_shifted, axis=axis, keepdims=True)) + x_max
67
+ if keepdims:
68
+ return ret
69
+ return mt.squeeze(ret, axis=1)
70
+
71
+
52
72
  def _incremental_mean_and_var(
53
73
  X, last_mean, last_variance, last_sample_count, sample_weight=None
54
74
  ):
@@ -104,16 +124,31 @@ def _incremental_mean_and_var(
104
124
  `utils.sparsefuncs.incr_mean_variance_axis` and
105
125
  `utils.sparsefuncs_fast.incr_mean_variance_axis0`
106
126
  """
127
+ has_last_sample = isinstance(last_sample_count, ENTITY_TYPE) and not isinstance(
128
+ last_sample_count.op, TensorZeros
129
+ )
130
+ is_df_type = isinstance(X, (DATAFRAME_TYPE, SERIES_TYPE))
131
+
107
132
  # old = stats until now
108
133
  # new = the current increment
109
134
  # updated = the aggregated stats
110
- last_sum = last_mean * last_sample_count
135
+ last_sum = last_mean * last_sample_count if has_last_sample else 0
111
136
  X_nan_mask = mt.isnan(X)
112
137
  # if mt.any(X_nan_mask):
113
138
  # sum_op = mt.nansum
114
139
  # else:
115
140
  # sum_op = mt.sum
116
- sum_op = mt.nansum
141
+
142
+ def df_sum(val, **kw):
143
+ if "dtype" in kw:
144
+ val = val.astype(kw.pop("dtype"))
145
+ return val.sum(**kw)
146
+
147
+ if is_df_type:
148
+ sum_op = df_sum
149
+ else:
150
+ sum_op = mt.nansum
151
+
117
152
  if sample_weight is not None:
118
153
  # equivalent to np.nansum(X * sample_weight, axis=0)
119
154
  # safer because np.float64(X*W) != np.float64(X)*np.float64(W)
@@ -125,10 +160,16 @@ def _incremental_mean_and_var(
125
160
  )
126
161
  else:
127
162
  new_sum = _safe_accumulator_op(sum_op, X, axis=0)
128
- n_samples = X.shape[0]
129
- new_sample_count = n_samples - mt.sum(X_nan_mask, axis=0)
163
+ if is_df_type:
164
+ new_sample_count = X.count()
165
+ else:
166
+ n_samples = X.shape[0]
167
+ new_sample_count = n_samples - mt.sum(X_nan_mask, axis=0)
130
168
 
131
- updated_sample_count = last_sample_count + new_sample_count
169
+ if not has_last_sample:
170
+ updated_sample_count = new_sample_count
171
+ else:
172
+ updated_sample_count = last_sample_count + new_sample_count
132
173
 
133
174
  updated_mean = (last_sum + new_sum) / updated_sample_count
134
175
 
@@ -157,7 +198,9 @@ def _incremental_mean_and_var(
157
198
  # and recommendations", by Chan, Golub, and LeVeque.
158
199
  new_unnormalized_variance -= correction**2 / new_sample_count
159
200
 
160
- last_unnormalized_variance = last_variance * last_sample_count
201
+ last_unnormalized_variance = (
202
+ last_variance * last_sample_count if has_last_sample else 0
203
+ )
161
204
 
162
205
  with mt.errstate(divide="ignore", invalid="ignore"):
163
206
  last_over_new_count = last_sample_count / new_sample_count
@@ -169,8 +212,35 @@ def _incremental_mean_and_var(
169
212
  * (last_sum / last_over_new_count - new_sum) ** 2
170
213
  )
171
214
 
172
- zeros = last_sample_count == 0
173
- updated_unnormalized_variance[zeros] = new_unnormalized_variance[zeros]
215
+ if not has_last_sample:
216
+ updated_unnormalized_variance = new_unnormalized_variance
217
+ else:
218
+ zeros = last_sample_count == 0
219
+ updated_unnormalized_variance[zeros] = new_unnormalized_variance[zeros]
174
220
  updated_variance = updated_unnormalized_variance / updated_sample_count
175
221
 
176
222
  return updated_mean, updated_variance, updated_sample_count
223
+
224
+
225
+ def row_norms(X, squared=False):
226
+ """Row-wise (squared) Euclidean norm of X.
227
+
228
+ Performs no input validation.
229
+
230
+ Parameters
231
+ ----------
232
+ X : array_like
233
+ The input tensor
234
+ squared : bool, optional (default = False)
235
+ If True, return squared norms.
236
+
237
+ Returns
238
+ -------
239
+ array_like
240
+ The row-wise (squared) Euclidean norm of X.
241
+ """
242
+
243
+ norms = (X**2).sum(axis=1)
244
+ if not squared:
245
+ norms = mt.sqrt(norms)
246
+ return norms