maxframe 2.0.0b2__cp37-cp37m-win32.whl → 2.3.0rc1__cp37-cp37m-win32.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of maxframe might be problematic. Click here for more details.

Files changed (443) hide show
  1. maxframe/__init__.py +1 -0
  2. maxframe/_utils.cp37-win32.pyd +0 -0
  3. maxframe/_utils.pyx +14 -1
  4. maxframe/codegen/core.py +9 -8
  5. maxframe/codegen/spe/core.py +1 -1
  6. maxframe/codegen/spe/dataframe/__init__.py +1 -0
  7. maxframe/codegen/spe/dataframe/accessors/base.py +18 -0
  8. maxframe/codegen/spe/dataframe/accessors/dict_.py +25 -130
  9. maxframe/codegen/spe/dataframe/accessors/list_.py +12 -48
  10. maxframe/codegen/spe/dataframe/accessors/struct_.py +28 -0
  11. maxframe/codegen/spe/dataframe/arithmetic.py +7 -2
  12. maxframe/codegen/spe/dataframe/groupby.py +88 -0
  13. maxframe/codegen/spe/dataframe/indexing.py +99 -4
  14. maxframe/codegen/spe/dataframe/merge.py +38 -1
  15. maxframe/codegen/spe/dataframe/misc.py +11 -33
  16. maxframe/codegen/spe/dataframe/reduction.py +32 -9
  17. maxframe/codegen/spe/dataframe/reshape.py +46 -0
  18. maxframe/codegen/spe/dataframe/sort.py +39 -18
  19. maxframe/codegen/spe/dataframe/tests/accessors/test_dict.py +9 -15
  20. maxframe/codegen/spe/dataframe/tests/accessors/test_list.py +4 -7
  21. maxframe/codegen/spe/dataframe/tests/accessors/test_struct.py +75 -0
  22. maxframe/codegen/spe/dataframe/tests/indexing/test_iloc.py +20 -1
  23. maxframe/codegen/spe/dataframe/tests/indexing/test_loc.py +35 -0
  24. maxframe/codegen/spe/dataframe/tests/misc/test_misc.py +0 -32
  25. maxframe/codegen/spe/dataframe/tests/test_groupby.py +81 -18
  26. maxframe/codegen/spe/dataframe/tests/test_merge.py +27 -1
  27. maxframe/codegen/spe/dataframe/tests/test_reduction.py +13 -0
  28. maxframe/codegen/spe/dataframe/tests/test_reshape.py +79 -0
  29. maxframe/codegen/spe/dataframe/tests/test_sort.py +20 -0
  30. maxframe/codegen/spe/dataframe/tseries.py +9 -0
  31. maxframe/codegen/spe/learn/contrib/lightgbm.py +4 -3
  32. maxframe/codegen/spe/learn/contrib/tests/test_xgboost.py +2 -1
  33. maxframe/codegen/spe/learn/metrics/__init__.py +1 -1
  34. maxframe/codegen/spe/learn/metrics/_ranking.py +76 -0
  35. maxframe/codegen/spe/learn/metrics/pairwise.py +51 -0
  36. maxframe/codegen/spe/learn/metrics/tests/test_pairwise.py +36 -0
  37. maxframe/codegen/spe/learn/metrics/tests/test_ranking.py +59 -0
  38. maxframe/codegen/spe/tensor/__init__.py +3 -0
  39. maxframe/codegen/spe/tensor/datasource.py +1 -0
  40. maxframe/codegen/spe/tensor/fft.py +74 -0
  41. maxframe/codegen/spe/tensor/linalg.py +29 -2
  42. maxframe/codegen/spe/tensor/misc.py +79 -25
  43. maxframe/codegen/spe/tensor/spatial.py +45 -0
  44. maxframe/codegen/spe/tensor/statistics.py +44 -0
  45. maxframe/codegen/spe/tensor/tests/test_fft.py +64 -0
  46. maxframe/codegen/spe/tensor/tests/test_linalg.py +15 -1
  47. maxframe/codegen/spe/tensor/tests/test_misc.py +52 -2
  48. maxframe/codegen/spe/tensor/tests/test_spatial.py +33 -0
  49. maxframe/codegen/spe/tensor/tests/test_statistics.py +15 -1
  50. maxframe/codegen/spe/tests/test_spe_codegen.py +6 -12
  51. maxframe/codegen/spe/utils.py +2 -0
  52. maxframe/config/config.py +73 -9
  53. maxframe/config/tests/test_validators.py +13 -1
  54. maxframe/config/validators.py +49 -0
  55. maxframe/conftest.py +54 -17
  56. maxframe/core/accessor.py +2 -2
  57. maxframe/core/base.py +2 -1
  58. maxframe/core/entity/core.py +5 -0
  59. maxframe/core/entity/tileables.py +3 -1
  60. maxframe/core/graph/core.cp37-win32.pyd +0 -0
  61. maxframe/core/graph/entity.py +8 -3
  62. maxframe/core/mode.py +6 -1
  63. maxframe/core/operator/base.py +9 -2
  64. maxframe/core/operator/core.py +10 -2
  65. maxframe/core/operator/utils.py +13 -0
  66. maxframe/dataframe/__init__.py +12 -5
  67. maxframe/dataframe/accessors/__init__.py +1 -1
  68. maxframe/dataframe/accessors/compat.py +45 -0
  69. maxframe/dataframe/accessors/datetime_/__init__.py +4 -1
  70. maxframe/dataframe/accessors/dict_/contains.py +7 -16
  71. maxframe/dataframe/accessors/dict_/core.py +48 -0
  72. maxframe/dataframe/accessors/dict_/getitem.py +17 -21
  73. maxframe/dataframe/accessors/dict_/length.py +7 -16
  74. maxframe/dataframe/accessors/dict_/remove.py +6 -18
  75. maxframe/dataframe/accessors/dict_/setitem.py +8 -18
  76. maxframe/dataframe/accessors/dict_/tests/test_dict_accessor.py +62 -22
  77. maxframe/dataframe/accessors/list_/__init__.py +2 -2
  78. maxframe/dataframe/accessors/list_/core.py +48 -0
  79. maxframe/dataframe/accessors/list_/getitem.py +12 -19
  80. maxframe/dataframe/accessors/list_/length.py +7 -16
  81. maxframe/dataframe/accessors/list_/tests/test_list_accessor.py +11 -9
  82. maxframe/dataframe/accessors/string_/__init__.py +4 -1
  83. maxframe/dataframe/accessors/struct_/__init__.py +37 -0
  84. maxframe/dataframe/accessors/struct_/accessor.py +39 -0
  85. maxframe/dataframe/accessors/struct_/core.py +43 -0
  86. maxframe/dataframe/accessors/struct_/dtypes.py +53 -0
  87. maxframe/dataframe/accessors/struct_/field.py +123 -0
  88. maxframe/dataframe/accessors/struct_/tests/__init__.py +13 -0
  89. maxframe/dataframe/accessors/struct_/tests/test_struct_accessor.py +91 -0
  90. maxframe/dataframe/arithmetic/__init__.py +18 -4
  91. maxframe/dataframe/arithmetic/between.py +106 -0
  92. maxframe/dataframe/arithmetic/dot.py +237 -0
  93. maxframe/dataframe/arithmetic/maximum.py +33 -0
  94. maxframe/dataframe/arithmetic/minimum.py +33 -0
  95. maxframe/dataframe/arithmetic/{around.py → round.py} +11 -7
  96. maxframe/dataframe/core.py +161 -224
  97. maxframe/dataframe/datasource/__init__.py +18 -0
  98. maxframe/dataframe/datasource/core.py +6 -0
  99. maxframe/dataframe/datasource/direct.py +57 -0
  100. maxframe/dataframe/datasource/from_dict.py +124 -0
  101. maxframe/dataframe/datasource/from_index.py +1 -1
  102. maxframe/dataframe/datasource/from_records.py +77 -0
  103. maxframe/dataframe/datasource/from_tensor.py +109 -41
  104. maxframe/dataframe/datasource/read_csv.py +21 -14
  105. maxframe/dataframe/datasource/read_odps_query.py +29 -6
  106. maxframe/dataframe/datasource/read_odps_table.py +32 -10
  107. maxframe/dataframe/datasource/read_parquet.py +38 -39
  108. maxframe/dataframe/datasource/tests/test_datasource.py +37 -0
  109. maxframe/dataframe/datastore/__init__.py +11 -1
  110. maxframe/dataframe/datastore/direct.py +268 -0
  111. maxframe/dataframe/datastore/to_csv.py +29 -41
  112. maxframe/dataframe/datastore/to_odps.py +36 -4
  113. maxframe/dataframe/extensions/__init__.py +20 -4
  114. maxframe/dataframe/extensions/apply_chunk.py +32 -6
  115. maxframe/dataframe/extensions/cartesian_chunk.py +153 -0
  116. maxframe/dataframe/extensions/collect_kv.py +126 -0
  117. maxframe/dataframe/extensions/extract_kv.py +177 -0
  118. maxframe/dataframe/extensions/flatjson.py +2 -1
  119. maxframe/dataframe/extensions/map_reduce.py +263 -0
  120. maxframe/dataframe/extensions/rebalance.py +62 -0
  121. maxframe/dataframe/extensions/tests/test_apply_chunk.py +9 -2
  122. maxframe/dataframe/extensions/tests/test_extensions.py +54 -0
  123. maxframe/dataframe/extensions/tests/test_map_reduce.py +135 -0
  124. maxframe/dataframe/groupby/__init__.py +17 -2
  125. maxframe/dataframe/groupby/aggregation.py +86 -49
  126. maxframe/dataframe/groupby/apply.py +1 -1
  127. maxframe/dataframe/groupby/apply_chunk.py +19 -5
  128. maxframe/dataframe/groupby/core.py +116 -16
  129. maxframe/dataframe/groupby/cum.py +4 -25
  130. maxframe/dataframe/groupby/expanding.py +264 -0
  131. maxframe/dataframe/groupby/fill.py +1 -1
  132. maxframe/dataframe/groupby/getitem.py +12 -5
  133. maxframe/dataframe/groupby/head.py +11 -1
  134. maxframe/dataframe/groupby/rank.py +136 -0
  135. maxframe/dataframe/groupby/rolling.py +206 -0
  136. maxframe/dataframe/groupby/shift.py +114 -0
  137. maxframe/dataframe/groupby/tests/test_groupby.py +0 -5
  138. maxframe/dataframe/indexing/__init__.py +22 -2
  139. maxframe/dataframe/indexing/droplevel.py +195 -0
  140. maxframe/dataframe/indexing/filter.py +169 -0
  141. maxframe/dataframe/indexing/get_level_values.py +76 -0
  142. maxframe/dataframe/indexing/iat.py +45 -0
  143. maxframe/dataframe/indexing/iloc.py +152 -12
  144. maxframe/dataframe/indexing/insert.py +46 -18
  145. maxframe/dataframe/indexing/loc.py +287 -7
  146. maxframe/dataframe/indexing/reindex.py +14 -5
  147. maxframe/dataframe/indexing/rename.py +6 -0
  148. maxframe/dataframe/indexing/rename_axis.py +2 -2
  149. maxframe/dataframe/indexing/reorder_levels.py +143 -0
  150. maxframe/dataframe/indexing/reset_index.py +33 -6
  151. maxframe/dataframe/indexing/sample.py +8 -0
  152. maxframe/dataframe/indexing/setitem.py +3 -3
  153. maxframe/dataframe/indexing/swaplevel.py +185 -0
  154. maxframe/dataframe/indexing/take.py +99 -0
  155. maxframe/dataframe/indexing/truncate.py +140 -0
  156. maxframe/dataframe/indexing/where.py +0 -11
  157. maxframe/dataframe/indexing/xs.py +148 -0
  158. maxframe/dataframe/merge/__init__.py +15 -1
  159. maxframe/dataframe/merge/append.py +97 -98
  160. maxframe/dataframe/merge/combine.py +244 -0
  161. maxframe/dataframe/merge/combine_first.py +120 -0
  162. maxframe/dataframe/merge/compare.py +387 -0
  163. maxframe/dataframe/merge/concat.py +183 -0
  164. maxframe/dataframe/merge/update.py +271 -0
  165. maxframe/dataframe/misc/__init__.py +28 -11
  166. maxframe/dataframe/misc/_duplicate.py +10 -4
  167. maxframe/dataframe/misc/apply.py +1 -1
  168. maxframe/dataframe/misc/check_unique.py +82 -0
  169. maxframe/dataframe/misc/clip.py +145 -0
  170. maxframe/dataframe/misc/describe.py +175 -9
  171. maxframe/dataframe/misc/drop.py +31 -0
  172. maxframe/dataframe/misc/drop_duplicates.py +2 -2
  173. maxframe/dataframe/misc/duplicated.py +2 -2
  174. maxframe/dataframe/misc/get_dummies.py +5 -1
  175. maxframe/dataframe/misc/infer_dtypes.py +251 -0
  176. maxframe/dataframe/misc/isin.py +2 -2
  177. maxframe/dataframe/misc/map.py +125 -18
  178. maxframe/dataframe/misc/repeat.py +159 -0
  179. maxframe/dataframe/misc/tests/test_misc.py +48 -3
  180. maxframe/dataframe/misc/to_numeric.py +3 -0
  181. maxframe/dataframe/misc/transform.py +12 -5
  182. maxframe/dataframe/misc/transpose.py +13 -1
  183. maxframe/dataframe/misc/valid_index.py +115 -0
  184. maxframe/dataframe/misc/value_counts.py +38 -4
  185. maxframe/dataframe/missing/checkna.py +14 -6
  186. maxframe/dataframe/missing/dropna.py +5 -0
  187. maxframe/dataframe/missing/fillna.py +1 -1
  188. maxframe/dataframe/missing/replace.py +7 -4
  189. maxframe/dataframe/reduction/__init__.py +35 -16
  190. maxframe/dataframe/reduction/aggregation.py +43 -14
  191. maxframe/dataframe/reduction/all.py +2 -2
  192. maxframe/dataframe/reduction/any.py +2 -2
  193. maxframe/dataframe/reduction/argmax.py +103 -0
  194. maxframe/dataframe/reduction/argmin.py +103 -0
  195. maxframe/dataframe/reduction/core.py +80 -24
  196. maxframe/dataframe/reduction/count.py +13 -9
  197. maxframe/dataframe/reduction/cov.py +166 -0
  198. maxframe/dataframe/reduction/cummax.py +2 -2
  199. maxframe/dataframe/reduction/cummin.py +2 -2
  200. maxframe/dataframe/reduction/cumprod.py +2 -2
  201. maxframe/dataframe/reduction/cumsum.py +2 -2
  202. maxframe/dataframe/reduction/custom_reduction.py +2 -2
  203. maxframe/dataframe/reduction/idxmax.py +185 -0
  204. maxframe/dataframe/reduction/idxmin.py +185 -0
  205. maxframe/dataframe/reduction/kurtosis.py +37 -30
  206. maxframe/dataframe/reduction/max.py +2 -2
  207. maxframe/dataframe/reduction/mean.py +9 -7
  208. maxframe/dataframe/reduction/median.py +2 -2
  209. maxframe/dataframe/reduction/min.py +2 -2
  210. maxframe/dataframe/reduction/mode.py +144 -0
  211. maxframe/dataframe/reduction/nunique.py +19 -11
  212. maxframe/dataframe/reduction/prod.py +18 -13
  213. maxframe/dataframe/reduction/reduction_size.py +2 -2
  214. maxframe/dataframe/reduction/sem.py +13 -9
  215. maxframe/dataframe/reduction/skew.py +31 -27
  216. maxframe/dataframe/reduction/str_concat.py +10 -7
  217. maxframe/dataframe/reduction/sum.py +18 -14
  218. maxframe/dataframe/reduction/tests/test_reduction.py +12 -0
  219. maxframe/dataframe/reduction/unique.py +20 -3
  220. maxframe/dataframe/reduction/var.py +16 -12
  221. maxframe/dataframe/reshape/__init__.py +38 -0
  222. maxframe/dataframe/{misc → reshape}/pivot.py +1 -0
  223. maxframe/dataframe/{misc → reshape}/pivot_table.py +1 -0
  224. maxframe/dataframe/reshape/unstack.py +114 -0
  225. maxframe/dataframe/sort/__init__.py +16 -1
  226. maxframe/dataframe/sort/argsort.py +68 -0
  227. maxframe/dataframe/sort/core.py +2 -1
  228. maxframe/dataframe/sort/nlargest.py +238 -0
  229. maxframe/dataframe/sort/nsmallest.py +228 -0
  230. maxframe/dataframe/sort/rank.py +147 -0
  231. maxframe/dataframe/statistics/__init__.py +3 -3
  232. maxframe/dataframe/statistics/corr.py +1 -0
  233. maxframe/dataframe/statistics/quantile.py +2 -2
  234. maxframe/dataframe/tests/test_typing.py +104 -0
  235. maxframe/dataframe/tests/test_utils.py +66 -2
  236. maxframe/dataframe/tseries/__init__.py +19 -0
  237. maxframe/dataframe/tseries/at_time.py +61 -0
  238. maxframe/dataframe/tseries/between_time.py +122 -0
  239. maxframe/dataframe/typing_.py +185 -0
  240. maxframe/dataframe/utils.py +125 -52
  241. maxframe/dataframe/window/aggregation.py +8 -4
  242. maxframe/dataframe/window/core.py +14 -1
  243. maxframe/dataframe/window/ewm.py +1 -3
  244. maxframe/dataframe/window/expanding.py +37 -35
  245. maxframe/dataframe/window/rolling.py +49 -39
  246. maxframe/dataframe/window/tests/test_expanding.py +1 -7
  247. maxframe/dataframe/window/tests/test_rolling.py +1 -1
  248. maxframe/env.py +7 -4
  249. maxframe/errors.py +2 -2
  250. maxframe/io/odpsio/schema.py +9 -3
  251. maxframe/io/odpsio/tableio.py +7 -2
  252. maxframe/io/odpsio/tests/test_schema.py +198 -83
  253. maxframe/learn/__init__.py +10 -2
  254. maxframe/learn/cluster/__init__.py +15 -0
  255. maxframe/learn/cluster/_kmeans.py +782 -0
  256. maxframe/learn/contrib/llm/core.py +18 -7
  257. maxframe/learn/contrib/llm/deploy/__init__.py +13 -0
  258. maxframe/learn/contrib/llm/deploy/config.py +221 -0
  259. maxframe/learn/contrib/llm/deploy/core.py +247 -0
  260. maxframe/learn/contrib/llm/deploy/framework.py +35 -0
  261. maxframe/learn/contrib/llm/deploy/loader.py +360 -0
  262. maxframe/learn/contrib/llm/deploy/tests/__init__.py +13 -0
  263. maxframe/learn/contrib/llm/deploy/tests/test_register_models.py +359 -0
  264. maxframe/learn/contrib/llm/models/__init__.py +1 -0
  265. maxframe/learn/contrib/llm/models/dashscope.py +12 -6
  266. maxframe/learn/contrib/llm/models/managed.py +76 -11
  267. maxframe/learn/contrib/llm/models/openai.py +72 -0
  268. maxframe/learn/contrib/llm/tests/__init__.py +13 -0
  269. maxframe/learn/contrib/llm/tests/test_core.py +34 -0
  270. maxframe/learn/contrib/llm/tests/test_openai.py +187 -0
  271. maxframe/learn/contrib/llm/tests/test_text_gen.py +155 -0
  272. maxframe/learn/contrib/llm/text.py +348 -42
  273. maxframe/learn/contrib/models.py +4 -1
  274. maxframe/learn/contrib/xgboost/classifier.py +2 -0
  275. maxframe/learn/contrib/xgboost/core.py +113 -4
  276. maxframe/learn/contrib/xgboost/predict.py +4 -2
  277. maxframe/learn/contrib/xgboost/regressor.py +5 -0
  278. maxframe/learn/contrib/xgboost/train.py +7 -2
  279. maxframe/learn/core.py +66 -0
  280. maxframe/learn/linear_model/_base.py +58 -1
  281. maxframe/learn/linear_model/_lin_reg.py +1 -1
  282. maxframe/learn/metrics/__init__.py +6 -0
  283. maxframe/learn/metrics/_classification.py +145 -0
  284. maxframe/learn/metrics/_ranking.py +477 -0
  285. maxframe/learn/metrics/_scorer.py +60 -0
  286. maxframe/learn/metrics/pairwise/__init__.py +21 -0
  287. maxframe/learn/metrics/pairwise/core.py +77 -0
  288. maxframe/learn/metrics/pairwise/cosine.py +115 -0
  289. maxframe/learn/metrics/pairwise/euclidean.py +176 -0
  290. maxframe/learn/metrics/pairwise/haversine.py +96 -0
  291. maxframe/learn/metrics/pairwise/manhattan.py +80 -0
  292. maxframe/learn/metrics/pairwise/pairwise.py +127 -0
  293. maxframe/learn/metrics/pairwise/pairwise_distances_topk.py +121 -0
  294. maxframe/learn/metrics/pairwise/rbf_kernel.py +51 -0
  295. maxframe/learn/metrics/tests/__init__.py +13 -0
  296. maxframe/learn/metrics/tests/test_scorer.py +26 -0
  297. maxframe/learn/preprocessing/_data/min_max_scaler.py +34 -23
  298. maxframe/learn/preprocessing/_data/standard_scaler.py +34 -25
  299. maxframe/learn/utils/__init__.py +2 -1
  300. maxframe/learn/utils/checks.py +1 -2
  301. maxframe/learn/utils/core.py +59 -0
  302. maxframe/learn/utils/extmath.py +79 -9
  303. maxframe/learn/utils/odpsio.py +262 -0
  304. maxframe/learn/utils/validation.py +2 -2
  305. maxframe/lib/compat.py +40 -0
  306. maxframe/lib/dtypes_extension/__init__.py +16 -1
  307. maxframe/lib/dtypes_extension/_fake_arrow_dtype.py +604 -0
  308. maxframe/lib/dtypes_extension/blob.py +304 -0
  309. maxframe/lib/dtypes_extension/dtypes.py +40 -0
  310. maxframe/lib/dtypes_extension/tests/test_blob.py +88 -0
  311. maxframe/lib/dtypes_extension/tests/test_dtypes.py +16 -1
  312. maxframe/lib/dtypes_extension/tests/test_fake_arrow_dtype.py +75 -0
  313. maxframe/lib/filesystem/_oss_lib/common.py +124 -50
  314. maxframe/lib/filesystem/_oss_lib/glob.py +1 -1
  315. maxframe/lib/filesystem/_oss_lib/handle.py +21 -25
  316. maxframe/lib/filesystem/base.py +1 -1
  317. maxframe/lib/filesystem/core.py +1 -1
  318. maxframe/lib/filesystem/oss.py +115 -46
  319. maxframe/lib/filesystem/tests/test_oss.py +74 -36
  320. maxframe/lib/mmh3.cp37-win32.pyd +0 -0
  321. maxframe/lib/wrapped_pickle.py +10 -0
  322. maxframe/opcodes.py +41 -15
  323. maxframe/protocol.py +12 -0
  324. maxframe/remote/core.py +4 -0
  325. maxframe/serialization/__init__.py +11 -2
  326. maxframe/serialization/arrow.py +38 -13
  327. maxframe/serialization/blob.py +32 -0
  328. maxframe/serialization/core.cp37-win32.pyd +0 -0
  329. maxframe/serialization/core.pyx +39 -1
  330. maxframe/serialization/exception.py +2 -4
  331. maxframe/serialization/numpy.py +11 -0
  332. maxframe/serialization/pandas.py +46 -9
  333. maxframe/serialization/serializables/core.py +2 -2
  334. maxframe/serialization/tests/test_serial.py +31 -4
  335. maxframe/tensor/__init__.py +38 -8
  336. maxframe/tensor/arithmetic/__init__.py +19 -10
  337. maxframe/tensor/arithmetic/core.py +2 -2
  338. maxframe/tensor/arithmetic/iscomplexobj.py +53 -0
  339. maxframe/tensor/arithmetic/tests/test_arithmetic.py +6 -9
  340. maxframe/tensor/core.py +6 -2
  341. maxframe/tensor/datasource/tests/test_datasource.py +2 -1
  342. maxframe/tensor/extensions/__init__.py +2 -0
  343. maxframe/tensor/extensions/apply_chunk.py +3 -3
  344. maxframe/tensor/extensions/rebalance.py +65 -0
  345. maxframe/tensor/fft/__init__.py +32 -0
  346. maxframe/tensor/fft/core.py +168 -0
  347. maxframe/tensor/fft/fft.py +112 -0
  348. maxframe/tensor/fft/fft2.py +118 -0
  349. maxframe/tensor/fft/fftfreq.py +80 -0
  350. maxframe/tensor/fft/fftn.py +123 -0
  351. maxframe/tensor/fft/fftshift.py +79 -0
  352. maxframe/tensor/fft/hfft.py +112 -0
  353. maxframe/tensor/fft/ifft.py +114 -0
  354. maxframe/tensor/fft/ifft2.py +115 -0
  355. maxframe/tensor/fft/ifftn.py +123 -0
  356. maxframe/tensor/fft/ifftshift.py +73 -0
  357. maxframe/tensor/fft/ihfft.py +93 -0
  358. maxframe/tensor/fft/irfft.py +118 -0
  359. maxframe/tensor/fft/irfft2.py +62 -0
  360. maxframe/tensor/fft/irfftn.py +114 -0
  361. maxframe/tensor/fft/rfft.py +116 -0
  362. maxframe/tensor/fft/rfft2.py +63 -0
  363. maxframe/tensor/fft/rfftfreq.py +87 -0
  364. maxframe/tensor/fft/rfftn.py +113 -0
  365. maxframe/tensor/indexing/fill_diagonal.py +1 -7
  366. maxframe/tensor/linalg/__init__.py +7 -0
  367. maxframe/tensor/linalg/_einsumfunc.py +1025 -0
  368. maxframe/tensor/linalg/cholesky.py +117 -0
  369. maxframe/tensor/linalg/einsum.py +339 -0
  370. maxframe/tensor/linalg/lstsq.py +100 -0
  371. maxframe/tensor/linalg/matrix_norm.py +75 -0
  372. maxframe/tensor/linalg/norm.py +249 -0
  373. maxframe/tensor/linalg/solve.py +72 -0
  374. maxframe/tensor/linalg/solve_triangular.py +2 -2
  375. maxframe/tensor/linalg/vector_norm.py +113 -0
  376. maxframe/tensor/misc/__init__.py +24 -1
  377. maxframe/tensor/misc/argwhere.py +72 -0
  378. maxframe/tensor/misc/array_split.py +46 -0
  379. maxframe/tensor/misc/broadcast_arrays.py +57 -0
  380. maxframe/tensor/misc/copyto.py +130 -0
  381. maxframe/tensor/misc/delete.py +104 -0
  382. maxframe/tensor/misc/dsplit.py +68 -0
  383. maxframe/tensor/misc/ediff1d.py +74 -0
  384. maxframe/tensor/misc/expand_dims.py +85 -0
  385. maxframe/tensor/misc/flip.py +90 -0
  386. maxframe/tensor/misc/fliplr.py +64 -0
  387. maxframe/tensor/misc/flipud.py +68 -0
  388. maxframe/tensor/misc/hsplit.py +85 -0
  389. maxframe/tensor/misc/insert.py +139 -0
  390. maxframe/tensor/misc/moveaxis.py +83 -0
  391. maxframe/tensor/misc/result_type.py +88 -0
  392. maxframe/tensor/misc/roll.py +124 -0
  393. maxframe/tensor/misc/rollaxis.py +77 -0
  394. maxframe/tensor/misc/shape.py +89 -0
  395. maxframe/tensor/misc/split.py +190 -0
  396. maxframe/tensor/misc/tile.py +109 -0
  397. maxframe/tensor/misc/vsplit.py +74 -0
  398. maxframe/tensor/reduction/array_equal.py +2 -1
  399. maxframe/tensor/sort/__init__.py +2 -0
  400. maxframe/tensor/sort/argpartition.py +98 -0
  401. maxframe/tensor/sort/partition.py +228 -0
  402. maxframe/tensor/spatial/__init__.py +15 -0
  403. maxframe/tensor/spatial/distance/__init__.py +17 -0
  404. maxframe/tensor/spatial/distance/cdist.py +421 -0
  405. maxframe/tensor/spatial/distance/pdist.py +398 -0
  406. maxframe/tensor/spatial/distance/squareform.py +153 -0
  407. maxframe/tensor/special/__init__.py +159 -21
  408. maxframe/tensor/special/airy.py +55 -0
  409. maxframe/tensor/special/bessel.py +199 -0
  410. maxframe/tensor/special/core.py +65 -4
  411. maxframe/tensor/special/ellip_func_integrals.py +155 -0
  412. maxframe/tensor/special/ellip_harm.py +55 -0
  413. maxframe/tensor/special/err_fresnel.py +223 -0
  414. maxframe/tensor/special/gamma_funcs.py +303 -0
  415. maxframe/tensor/special/hypergeometric_funcs.py +69 -0
  416. maxframe/tensor/special/info_theory.py +189 -0
  417. maxframe/tensor/special/misc.py +21 -0
  418. maxframe/tensor/statistics/__init__.py +6 -0
  419. maxframe/tensor/statistics/corrcoef.py +77 -0
  420. maxframe/tensor/statistics/cov.py +222 -0
  421. maxframe/tensor/statistics/digitize.py +126 -0
  422. maxframe/tensor/statistics/histogram.py +520 -0
  423. maxframe/tensor/statistics/median.py +85 -0
  424. maxframe/tensor/statistics/ptp.py +89 -0
  425. maxframe/tensor/utils.py +3 -3
  426. maxframe/tests/test_udf.py +61 -0
  427. maxframe/tests/test_utils.py +51 -6
  428. maxframe/tests/utils.py +0 -2
  429. maxframe/typing_.py +2 -0
  430. maxframe/udf.py +130 -9
  431. maxframe/utils.py +254 -27
  432. {maxframe-2.0.0b2.dist-info → maxframe-2.3.0rc1.dist-info}/METADATA +3 -3
  433. {maxframe-2.0.0b2.dist-info → maxframe-2.3.0rc1.dist-info}/RECORD +442 -264
  434. maxframe_client/fetcher.py +35 -4
  435. maxframe_client/session/odps.py +7 -2
  436. maxframe_client/session/task.py +8 -1
  437. maxframe_client/tests/test_fetcher.py +76 -3
  438. maxframe_client/tests/test_session.py +28 -1
  439. maxframe/dataframe/arrays.py +0 -864
  440. /maxframe/dataframe/{misc → reshape}/melt.py +0 -0
  441. /maxframe/dataframe/{misc → reshape}/stack.py +0 -0
  442. {maxframe-2.0.0b2.dist-info → maxframe-2.3.0rc1.dist-info}/WHEEL +0 -0
  443. {maxframe-2.0.0b2.dist-info → maxframe-2.3.0rc1.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,271 @@
1
+ # Copyright 1999-2025 Alibaba Group Holding Ltd.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ from ... import opcodes
16
+ from ...core import get_output_types
17
+ from ...serialization.serializables import BoolField, StringField
18
+ from ...udf import BuiltinFunction
19
+ from ..operators import DataFrameOperator, DataFrameOperatorMixin
20
+ from ..utils import parse_index
21
+
22
+
23
+ class DataFrameUpdate(DataFrameOperator, DataFrameOperatorMixin):
24
+ _op_type_ = opcodes.DATAFRAME_UPDATE
25
+
26
+ join = StringField("join", default=None)
27
+ overwrite = BoolField("overwrite", default=None)
28
+ filter_func = BoolField("filter_func", default=None)
29
+ errors = StringField("errors", default=None)
30
+
31
+ def __init__(self, output_types=None, **kwargs):
32
+ super().__init__(_output_types=output_types, **kwargs)
33
+
34
+ def has_custom_code(self) -> bool:
35
+ return not isinstance(self.filter_func, BuiltinFunction)
36
+
37
+ def __call__(self, df_or_series, other):
38
+ self._output_types = get_output_types(df_or_series)
39
+
40
+ index_tokenize_objects = [
41
+ df_or_series,
42
+ other,
43
+ self.join,
44
+ self.overwrite,
45
+ self.filter_func,
46
+ self.errors,
47
+ ]
48
+
49
+ # Return the original object (update is in-place)
50
+ if df_or_series.ndim == 2:
51
+ return self.new_dataframe(
52
+ [df_or_series, other],
53
+ shape=df_or_series.shape,
54
+ dtypes=df_or_series.dtypes,
55
+ index_value=parse_index(
56
+ df_or_series.index_value.to_pandas(), *index_tokenize_objects
57
+ ),
58
+ columns_value=df_or_series.columns_value,
59
+ )
60
+ else:
61
+ return self.new_series(
62
+ [df_or_series, other],
63
+ shape=df_or_series.shape,
64
+ dtype=df_or_series.dtype,
65
+ index_value=parse_index(
66
+ df_or_series.index_value.to_pandas(), *index_tokenize_objects
67
+ ),
68
+ name=df_or_series.name,
69
+ )
70
+
71
+
72
+ def _update(
73
+ df_or_series, other, join="left", overwrite=True, filter_func=None, errors="ignore"
74
+ ):
75
+ op = DataFrameUpdate(
76
+ join=join,
77
+ overwrite=overwrite,
78
+ filter_func=filter_func,
79
+ errors=errors,
80
+ )
81
+ result = op(df_or_series, other)
82
+ df_or_series.data = result.data
83
+
84
+
85
+ def df_update(
86
+ df, other, join="left", overwrite=True, filter_func=None, errors="ignore"
87
+ ):
88
+ """
89
+ Modify in place using non-NA values from another DataFrame.
90
+
91
+ Aligns on indices. There is no return value.
92
+
93
+ Parameters
94
+ ----------
95
+ other : DataFrame, or object coercible into a DataFrame
96
+ Should have at least one matching index/column label
97
+ with the original DataFrame. If a Series is passed,
98
+ its name attribute must be set, and that will be
99
+ used as the column name to align with the original DataFrame.
100
+ join : {'left'}, default 'left'
101
+ Only left join is implemented, keeping the index and columns of the
102
+ original object.
103
+ overwrite : bool, default True
104
+ How to handle non-NA values for overlapping keys:
105
+
106
+ * True: overwrite original DataFrame's values
107
+ with values from `other`.
108
+ * False: only update values that are NA in
109
+ the original DataFrame.
110
+
111
+ filter_func : callable(1d-array) -> bool 1d-array, optional
112
+ Can choose to replace values other than NA. Return True for values
113
+ that should be updated.
114
+ errors : {'raise', 'ignore'}, default 'ignore'
115
+ If 'raise', will raise a ValueError if the DataFrame and `other`
116
+ both contain non-NA data in the same place.
117
+
118
+ Returns
119
+ -------
120
+ None
121
+ This method directly changes calling object.
122
+
123
+ Raises
124
+ ------
125
+ ValueError
126
+ * When `errors='raise'` and there's overlapping non-NA data.
127
+ * When `errors` is not either `'ignore'` or `'raise'`
128
+ NotImplementedError
129
+ * If `join != 'left'`
130
+
131
+ See Also
132
+ --------
133
+ dict.update : Similar method for dictionaries.
134
+ DataFrame.merge : For column(s)-on-column(s) operations.
135
+
136
+ Examples
137
+ --------
138
+ >>> import maxframe.tensor as mt
139
+ >>> import maxframe.dataframe as md
140
+ >>> df = md.DataFrame({'A': [1, 2, 3],
141
+ ... 'B': [400, 500, 600]})
142
+ >>> new_df = md.DataFrame({'B': [4, 5, 6],
143
+ ... 'C': [7, 8, 9]})
144
+ >>> df.update(new_df)
145
+ >>> df.execute()
146
+ A B
147
+ 0 1 4
148
+ 1 2 5
149
+ 2 3 6
150
+
151
+ The DataFrame's length does not increase as a result of the update,
152
+ only values at matching index/column labels are updated.
153
+
154
+ >>> df = md.DataFrame({'A': ['a', 'b', 'c'],
155
+ ... 'B': ['x', 'y', 'z']})
156
+ >>> new_df = md.DataFrame({'B': ['d', 'e', 'f', 'g', 'h', 'i']})
157
+ >>> df.update(new_df)
158
+ >>> df.execute()
159
+ A B
160
+ 0 a d
161
+ 1 b e
162
+ 2 c f
163
+
164
+ >>> df = md.DataFrame({'A': ['a', 'b', 'c'],
165
+ ... 'B': ['x', 'y', 'z']})
166
+ >>> new_df = md.DataFrame({'B': ['d', 'f']}, index=[0, 2])
167
+ >>> df.update(new_df)
168
+ >>> df.execute()
169
+ A B
170
+ 0 a d
171
+ 1 b y
172
+ 2 c f
173
+
174
+ For Series, its name attribute must be set.
175
+
176
+ >>> df = md.DataFrame({'A': ['a', 'b', 'c'],
177
+ ... 'B': ['x', 'y', 'z']})
178
+ >>> new_column = md.Series(['d', 'e', 'f'], name='B')
179
+ >>> df.update(new_column)
180
+ >>> df.execute()
181
+ A B
182
+ 0 a d
183
+ 1 b e
184
+ 2 c f
185
+
186
+ If `other` contains NaNs the corresponding values are not updated
187
+ in the original dataframe.
188
+
189
+ >>> df = md.DataFrame({'A': [1, 2, 3],
190
+ ... 'B': [400., 500., 600.]})
191
+ >>> new_df = md.DataFrame({'B': [4, mt.nan, 6]})
192
+ >>> df.update(new_df)
193
+ >>> df.execute()
194
+ A B
195
+ 0 1 4.0
196
+ 1 2 500.0
197
+ 2 3 6.0
198
+ """
199
+ return _update(df, other, join, overwrite, filter_func, errors)
200
+
201
+
202
+ def series_update(series, other):
203
+ """
204
+ Modify Series in place using values from passed Series.
205
+
206
+ Uses non-NA values from passed Series to make updates. Aligns
207
+ on index.
208
+
209
+ Parameters
210
+ ----------
211
+ other : Series, or object coercible into Series
212
+
213
+ Examples
214
+ --------
215
+ >>> import maxframe.tensor as mt
216
+ >>> import maxframe.dataframe as md
217
+ >>> s = md.Series([1, 2, 3])
218
+ >>> s.update(md.Series([4, 5, 6]))
219
+ >>> s.execute()
220
+ 0 4
221
+ 1 5
222
+ 2 6
223
+ dtype: int64
224
+
225
+ >>> s = md.Series(['a', 'b', 'c'])
226
+ >>> s.update(md.Series(['d', 'e'], index=[0, 2]))
227
+ >>> s.execute()
228
+ 0 d
229
+ 1 b
230
+ 2 e
231
+ dtype: object
232
+
233
+ >>> s = md.Series([1, 2, 3])
234
+ >>> s.update(md.Series([4, 5, 6, 7, 8]))
235
+ >>> s.execute()
236
+ 0 4
237
+ 1 5
238
+ 2 6
239
+ dtype: int64
240
+
241
+ If ``other`` contains NaNs the corresponding values are not updated
242
+ in the original Series.
243
+
244
+ >>> s = md.Series([1, 2, 3])
245
+ >>> s.update(md.Series([4, mt.nan, 6]))
246
+ >>> s.execute()
247
+ 0 4
248
+ 1 2
249
+ 2 6
250
+ dtype: int64
251
+
252
+ ``other`` can also be a non-Series object type
253
+ that is coercible into a Series
254
+
255
+ >>> s = md.Series([1, 2, 3])
256
+ >>> s.update([4, mt.nan, 6])
257
+ >>> s.execute()
258
+ 0 4
259
+ 1 2
260
+ 2 6
261
+ dtype: int64
262
+
263
+ >>> s = md.Series([1, 2, 3])
264
+ >>> s.update({1: 9})
265
+ >>> s.execute()
266
+ 0 1
267
+ 1 9
268
+ 2 3
269
+ dtype: int64
270
+ """
271
+ return _update(series, other)
@@ -21,10 +21,12 @@ from .check_monotonic import (
21
21
  is_monotonic_decreasing,
22
22
  is_monotonic_increasing,
23
23
  )
24
+ from .check_unique import index_is_unique, series_is_unique
25
+ from .clip import clip
24
26
  from .cut import cut
25
27
  from .describe import describe
26
28
  from .diff import df_diff, series_diff
27
- from .drop import df_drop, df_pop, index_drop, series_drop
29
+ from .drop import df_drop, df_pop, index_drop, series_drop, series_pop
28
30
  from .drop_duplicates import (
29
31
  df_drop_duplicates,
30
32
  index_drop_duplicates,
@@ -33,21 +35,20 @@ from .drop_duplicates import (
33
35
  from .duplicated import df_duplicated, index_duplicated, series_duplicated
34
36
  from .eval import df_eval, df_query
35
37
  from .explode import df_explode, series_explode
38
+ from .infer_dtypes import convert_dtypes, infer_objects
36
39
  from .isin import df_isin, series_isin
37
- from .map import index_map, series_map
38
- from .melt import melt
40
+ from .map import df_map, index_map, series_map
39
41
  from .memory_usage import df_memory_usage, index_memory_usage, series_memory_usage
40
42
  from .pct_change import pct_change
41
- from .pivot import pivot
42
- from .pivot_table import pivot_table
43
43
  from .qcut import qcut
44
44
  from .rechunk import rechunk
45
+ from .repeat import index_repeat, series_repeat
45
46
  from .select_dtypes import select_dtypes
46
47
  from .shift import shift, tshift
47
- from .stack import stack
48
48
  from .transform import df_transform, series_transform
49
49
  from .transpose import transpose
50
- from .value_counts import value_counts
50
+ from .valid_index import first_valid_index, last_valid_index
51
+ from .value_counts import df_value_counts, value_counts
51
52
 
52
53
 
53
54
  def _install():
@@ -55,7 +56,10 @@ def _install():
55
56
 
56
57
  for t in DATAFRAME_TYPE:
57
58
  setattr(t, "apply", df_apply)
59
+ setattr(t, "applymap", df_map)
58
60
  setattr(t, "astype", astype)
61
+ setattr(t, "clip", clip)
62
+ setattr(t, "convert_dtypes", convert_dtypes)
59
63
  setattr(t, "describe", describe)
60
64
  setattr(
61
65
  t, "__delitem__", lambda df, items: df_drop(df, items, axis=1, inplace=True)
@@ -66,41 +70,50 @@ def _install():
66
70
  setattr(t, "drop", df_drop)
67
71
  setattr(t, "eval", df_eval)
68
72
  setattr(t, "explode", df_explode)
73
+ setattr(t, "first_valid_index", first_valid_index)
74
+ setattr(t, "infer_objects", infer_objects)
69
75
  setattr(t, "isin", df_isin)
70
- setattr(t, "melt", melt)
76
+ setattr(t, "last_valid_index", last_valid_index)
77
+ setattr(t, "map", df_map)
71
78
  setattr(t, "memory_usage", df_memory_usage)
72
79
  setattr(t, "pct_change", pct_change)
73
- setattr(t, "pivot", pivot)
74
- setattr(t, "pivot_table", pivot_table)
75
80
  setattr(t, "pop", df_pop)
76
81
  setattr(t, "query", df_query)
77
82
  setattr(t, "rechunk", rechunk)
78
83
  setattr(t, "select_dtypes", select_dtypes)
79
84
  setattr(t, "shift", shift)
80
- setattr(t, "stack", stack)
81
85
  setattr(t, "transform", df_transform)
82
86
  setattr(t, "transpose", transpose)
83
87
  setattr(t, "tshift", tshift)
88
+ setattr(t, "value_counts", df_value_counts)
84
89
 
85
90
  for t in SERIES_TYPE:
86
91
  setattr(t, "apply", series_apply)
87
92
  setattr(t, "astype", astype)
88
93
  setattr(t, "case_when", case_when)
89
94
  setattr(t, "check_monotonic", check_monotonic)
95
+ setattr(t, "clip", clip)
96
+ setattr(t, "convert_dtypes", convert_dtypes)
90
97
  setattr(t, "describe", describe)
91
98
  setattr(t, "diff", series_diff)
92
99
  setattr(t, "drop", series_drop)
93
100
  setattr(t, "drop_duplicates", series_drop_duplicates)
94
101
  setattr(t, "duplicated", series_duplicated)
95
102
  setattr(t, "explode", series_explode)
103
+ setattr(t, "first_valid_index", first_valid_index)
104
+ setattr(t, "infer_objects", infer_objects)
96
105
  setattr(t, "is_monotonic", property(fget=is_monotonic))
97
106
  setattr(t, "is_monotonic_decreasing", property(fget=is_monotonic_decreasing))
98
107
  setattr(t, "is_monotonic_increasing", property(fget=is_monotonic_increasing))
99
108
  setattr(t, "isin", series_isin)
109
+ setattr(t, "is_unique", property(fget=series_is_unique))
110
+ setattr(t, "last_valid_index", last_valid_index)
100
111
  setattr(t, "map", series_map)
101
112
  setattr(t, "memory_usage", series_memory_usage)
102
113
  setattr(t, "pct_change", pct_change)
114
+ setattr(t, "pop", series_pop)
103
115
  setattr(t, "rechunk", rechunk)
116
+ setattr(t, "repeat", series_repeat)
104
117
  setattr(t, "shift", shift)
105
118
  setattr(t, "transform", series_transform)
106
119
  setattr(t, "tshift", tshift)
@@ -109,15 +122,19 @@ def _install():
109
122
  for t in INDEX_TYPE:
110
123
  setattr(t, "astype", index_astype)
111
124
  setattr(t, "check_monotonic", check_monotonic)
125
+ setattr(t, "clip", clip)
112
126
  setattr(t, "drop", index_drop)
113
127
  setattr(t, "drop_duplicates", index_drop_duplicates)
114
128
  setattr(t, "duplicated", index_duplicated)
129
+ setattr(t, "has_duplicates", property(fget=lambda x: not index_is_unique(x)))
115
130
  setattr(t, "is_monotonic", property(fget=is_monotonic))
116
131
  setattr(t, "is_monotonic_increasing", property(fget=is_monotonic_increasing))
117
132
  setattr(t, "is_monotonic_decreasing", property(fget=is_monotonic_decreasing))
133
+ setattr(t, "is_unique", property(fget=index_is_unique))
118
134
  setattr(t, "map", index_map)
119
135
  setattr(t, "memory_usage", index_memory_usage)
120
136
  setattr(t, "rechunk", rechunk)
137
+ setattr(t, "repeat", index_repeat)
121
138
  setattr(t, "value_counts", value_counts)
122
139
 
123
140
 
@@ -17,28 +17,34 @@ from typing import List
17
17
  import pandas as pd
18
18
  from pandas.api.types import is_list_like
19
19
 
20
- from ...core import EntityData
20
+ from ...core import ENTITY_TYPE, EntityData
21
21
  from ...core.operator import MapReduceOperator
22
22
  from ...serialization.serializables import AnyField, KeyField, StringField
23
23
  from ..operators import DataFrameOperatorMixin
24
24
 
25
25
 
26
- class DuplicateOperand(MapReduceOperator, DataFrameOperatorMixin):
26
+ class BaseDuplicateOp(MapReduceOperator, DataFrameOperatorMixin):
27
+ _legacy_name = "DuplicateOperand" # since 2.2.0
28
+
27
29
  input = KeyField("input")
28
30
  subset = AnyField("subset", default=None)
29
31
  keep = AnyField("keep", default="first")
30
32
  method = StringField("method", default=None)
31
33
 
32
34
  @classmethod
33
- def _set_inputs(cls, op: "DuplicateOperand", inputs: List[EntityData]):
35
+ def _set_inputs(cls, op: "BaseDuplicateOp", inputs: List[EntityData]):
34
36
  super()._set_inputs(op, inputs)
35
37
  op.input = op._inputs[0]
36
38
 
37
39
 
40
+ # keep for import compatibility
41
+ DuplicateOperand = BaseDuplicateOp
42
+
43
+
38
44
  def validate_subset(df, subset):
39
45
  if subset is None:
40
46
  return subset
41
- if not is_list_like(subset):
47
+ if not is_list_like(subset) or isinstance(subset, ENTITY_TYPE):
42
48
  subset = [subset]
43
49
  else:
44
50
  subset = list(subset)
@@ -64,7 +64,7 @@ class DataFrameApply(
64
64
  DataFrameOperator, DataFrameOperatorMixin, ApplyOperandLogicKeyGeneratorMixin
65
65
  ):
66
66
  _op_type_ = opcodes.APPLY
67
- _legacy_name = "ApplyOperator"
67
+ _legacy_name = "ApplyOperator" # since v2.0.0
68
68
 
69
69
  func = FunctionField("func")
70
70
  axis = AnyField("axis", default=0)
@@ -0,0 +1,82 @@
1
+ # Copyright 1999-2025 Alibaba Group Holding Ltd.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ import pandas as pd
16
+
17
+ from ...udf import builtin_function
18
+
19
+
20
+ @builtin_function
21
+ def _tailor_unique(series_or_idx):
22
+ if not series_or_idx.is_unique:
23
+ if isinstance(series_or_idx, pd.Series):
24
+ return series_or_idx.iloc[:0]
25
+ else:
26
+ return series_or_idx[:0]
27
+ return series_or_idx
28
+
29
+
30
+ def _is_unique(series_or_index):
31
+ from ... import tensor as mt
32
+
33
+ return mt.equal(
34
+ series_or_index.mf.apply_chunk(
35
+ _tailor_unique, dtype=series_or_index.dtype
36
+ ).nunique(),
37
+ mt.shape(series_or_index)[0],
38
+ )
39
+
40
+
41
+ def series_is_unique(series):
42
+ """
43
+ Return boolean if values in the object are unique.
44
+
45
+ Returns
46
+ -------
47
+ bool
48
+
49
+ Examples
50
+ --------
51
+ >>> import maxframe.dataframe as md
52
+ >>> s = md.Series([1, 2, 3])
53
+ >>> s.is_unique.execute()
54
+ True
55
+
56
+ >>> s = md.Series([1, 2, 3, 1])
57
+ >>> s.is_unique.execute()
58
+ False
59
+ """
60
+ return _is_unique(series)
61
+
62
+
63
+ def index_is_unique(index):
64
+ """
65
+ Return boolean if values in the index are unique.
66
+
67
+ Returns
68
+ -------
69
+ bool
70
+
71
+ Examples
72
+ --------
73
+ >>> import maxframe.dataframe as md
74
+ >>> index = md.Index([1, 2, 3])
75
+ >>> index.is_unique.execute()
76
+ True
77
+
78
+ >>> index = md.Index([1, 2, 3, 1])
79
+ >>> index.is_unique.execute()
80
+ False
81
+ """
82
+ return index.to_series().is_unique
@@ -0,0 +1,145 @@
1
+ # Copyright 1999-2025 Alibaba Group Holding Ltd.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ from typing import List
16
+
17
+ from pandas.api.types import is_list_like
18
+
19
+ from ... import opcodes
20
+ from ...core import ENTITY_TYPE, get_output_types
21
+ from ...serialization.serializables import Int8Field, TupleField
22
+ from ...typing_ import EntityType
23
+ from ..operators import DataFrameOperator, DataFrameOperatorMixin
24
+ from ..utils import validate_axis
25
+
26
+
27
+ class DataFrameClip(DataFrameOperatorMixin, DataFrameOperator):
28
+ _op_type_ = opcodes.CLIP
29
+
30
+ bounds = TupleField("bounds", default=None)
31
+ axis = Int8Field("axis", default=None)
32
+
33
+ def __init__(self, output_types=None, **kw):
34
+ super().__init__(_output_types=output_types, **kw)
35
+
36
+ @classmethod
37
+ def _set_inputs(cls, op: "DataFrameClip", inputs: List[EntityType]):
38
+ super()._set_inputs(op, inputs)
39
+ inputs_iter = iter(inputs[1:])
40
+
41
+ bounds = list(op.bounds)
42
+ if len(inputs) > 1:
43
+ for idx in range(len(bounds)):
44
+ if isinstance(bounds[idx], ENTITY_TYPE):
45
+ bounds[idx] = next(inputs_iter)
46
+ op.bounds = tuple(bounds)
47
+
48
+ def __call__(self, df):
49
+ self._output_types = get_output_types(df)
50
+ bound_inputs = [bd for bd in self.bounds if isinstance(bd, ENTITY_TYPE)]
51
+ return self.new_tileable([df] + bound_inputs, **df.params)
52
+
53
+
54
+ def clip(df, lower=None, upper=None, *, axis=None, inplace=False):
55
+ """
56
+ Trim values at input threshold(s).
57
+
58
+ Assigns values outside boundary to boundary values. Thresholds
59
+ can be singular values or array like, and in the latter case
60
+ the clipping is performed element-wise in the specified axis.
61
+
62
+ Parameters
63
+ ----------
64
+ lower : float or array-like, default None
65
+ Minimum threshold value. All values below this
66
+ threshold will be set to it. If None, no lower clipping is performed.
67
+ upper : float or array-like, default None
68
+ Maximum threshold value. All values above this
69
+ threshold will be set to it. If None, no upper clipping is performed.
70
+ axis : int or str axis name, optional
71
+ Align object with lower and upper along the given axis.
72
+ inplace : bool, default False
73
+ Whether to perform the operation in place on the data.
74
+ *args, **kwargs
75
+ Additional keywords have no effect but might be accepted
76
+ for compatibility with numpy.
77
+
78
+ Returns
79
+ -------
80
+ Series or DataFrame or None
81
+ Same type as calling object with the values outside the
82
+ clip boundaries replaced or None if ``inplace=True``.
83
+
84
+ See Also
85
+ --------
86
+ Series.clip : Trim values at input threshold in series.
87
+ DataFrame.clip : Trim values at input threshold in dataframe.
88
+ numpy.clip : Clip (limit) the values in an array.
89
+
90
+ Examples
91
+ --------
92
+ >>> import maxframe.dataframe as md
93
+ >>> data = {'col_0': [9, -3, 0, -1, 5], 'col_1': [-2, -7, 6, 8, -5]}
94
+ >>> df = md.DataFrame(data)
95
+ >>> df.execute()
96
+ col_0 col_1
97
+ 0 9 -2
98
+ 1 -3 -7
99
+ 2 0 6
100
+ 3 -1 8
101
+ 4 5 -5
102
+
103
+ Clips per column using lower and upper thresholds:
104
+
105
+ >>> df.clip(lower=-4, upper=7).execute()
106
+ col_0 col_1
107
+ 0 7 -2
108
+ 1 -3 -4
109
+ 2 0 6
110
+ 3 -1 7
111
+ 4 5 -4
112
+
113
+ Clips using specific lower and upper thresholds per column element:
114
+
115
+ >>> t = md.Series([2, -4, -1, 6, 3])
116
+ >>> t.execute()
117
+ 0 2
118
+ 1 -4
119
+ 2 -1
120
+ 3 6
121
+ 4 3
122
+ dtype: int64
123
+
124
+ >>> df.clip(lower=t, upper=t).execute()
125
+ col_0 col_1
126
+ 0 2 2
127
+ 1 -3 -4
128
+ 2 0 -1
129
+ 3 -1 6
130
+ 4 5 3
131
+ """
132
+ axis = validate_axis(axis, df) if axis is not None else None
133
+ if axis is None and any(
134
+ isinstance(x, ENTITY_TYPE) or is_list_like(x) for x in (lower, upper)
135
+ ):
136
+ if df.ndim == 1:
137
+ axis = 0
138
+ else:
139
+ raise ValueError("Must specify axis=0 or 1")
140
+
141
+ op = DataFrameClip(bounds=(lower, upper), axis=axis)
142
+ out = op(df)
143
+ if inplace:
144
+ df.data = out.data
145
+ return out