maxframe 2.0.0b2__cp37-cp37m-win32.whl → 2.3.0rc1__cp37-cp37m-win32.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of maxframe might be problematic. Click here for more details.

Files changed (443) hide show
  1. maxframe/__init__.py +1 -0
  2. maxframe/_utils.cp37-win32.pyd +0 -0
  3. maxframe/_utils.pyx +14 -1
  4. maxframe/codegen/core.py +9 -8
  5. maxframe/codegen/spe/core.py +1 -1
  6. maxframe/codegen/spe/dataframe/__init__.py +1 -0
  7. maxframe/codegen/spe/dataframe/accessors/base.py +18 -0
  8. maxframe/codegen/spe/dataframe/accessors/dict_.py +25 -130
  9. maxframe/codegen/spe/dataframe/accessors/list_.py +12 -48
  10. maxframe/codegen/spe/dataframe/accessors/struct_.py +28 -0
  11. maxframe/codegen/spe/dataframe/arithmetic.py +7 -2
  12. maxframe/codegen/spe/dataframe/groupby.py +88 -0
  13. maxframe/codegen/spe/dataframe/indexing.py +99 -4
  14. maxframe/codegen/spe/dataframe/merge.py +38 -1
  15. maxframe/codegen/spe/dataframe/misc.py +11 -33
  16. maxframe/codegen/spe/dataframe/reduction.py +32 -9
  17. maxframe/codegen/spe/dataframe/reshape.py +46 -0
  18. maxframe/codegen/spe/dataframe/sort.py +39 -18
  19. maxframe/codegen/spe/dataframe/tests/accessors/test_dict.py +9 -15
  20. maxframe/codegen/spe/dataframe/tests/accessors/test_list.py +4 -7
  21. maxframe/codegen/spe/dataframe/tests/accessors/test_struct.py +75 -0
  22. maxframe/codegen/spe/dataframe/tests/indexing/test_iloc.py +20 -1
  23. maxframe/codegen/spe/dataframe/tests/indexing/test_loc.py +35 -0
  24. maxframe/codegen/spe/dataframe/tests/misc/test_misc.py +0 -32
  25. maxframe/codegen/spe/dataframe/tests/test_groupby.py +81 -18
  26. maxframe/codegen/spe/dataframe/tests/test_merge.py +27 -1
  27. maxframe/codegen/spe/dataframe/tests/test_reduction.py +13 -0
  28. maxframe/codegen/spe/dataframe/tests/test_reshape.py +79 -0
  29. maxframe/codegen/spe/dataframe/tests/test_sort.py +20 -0
  30. maxframe/codegen/spe/dataframe/tseries.py +9 -0
  31. maxframe/codegen/spe/learn/contrib/lightgbm.py +4 -3
  32. maxframe/codegen/spe/learn/contrib/tests/test_xgboost.py +2 -1
  33. maxframe/codegen/spe/learn/metrics/__init__.py +1 -1
  34. maxframe/codegen/spe/learn/metrics/_ranking.py +76 -0
  35. maxframe/codegen/spe/learn/metrics/pairwise.py +51 -0
  36. maxframe/codegen/spe/learn/metrics/tests/test_pairwise.py +36 -0
  37. maxframe/codegen/spe/learn/metrics/tests/test_ranking.py +59 -0
  38. maxframe/codegen/spe/tensor/__init__.py +3 -0
  39. maxframe/codegen/spe/tensor/datasource.py +1 -0
  40. maxframe/codegen/spe/tensor/fft.py +74 -0
  41. maxframe/codegen/spe/tensor/linalg.py +29 -2
  42. maxframe/codegen/spe/tensor/misc.py +79 -25
  43. maxframe/codegen/spe/tensor/spatial.py +45 -0
  44. maxframe/codegen/spe/tensor/statistics.py +44 -0
  45. maxframe/codegen/spe/tensor/tests/test_fft.py +64 -0
  46. maxframe/codegen/spe/tensor/tests/test_linalg.py +15 -1
  47. maxframe/codegen/spe/tensor/tests/test_misc.py +52 -2
  48. maxframe/codegen/spe/tensor/tests/test_spatial.py +33 -0
  49. maxframe/codegen/spe/tensor/tests/test_statistics.py +15 -1
  50. maxframe/codegen/spe/tests/test_spe_codegen.py +6 -12
  51. maxframe/codegen/spe/utils.py +2 -0
  52. maxframe/config/config.py +73 -9
  53. maxframe/config/tests/test_validators.py +13 -1
  54. maxframe/config/validators.py +49 -0
  55. maxframe/conftest.py +54 -17
  56. maxframe/core/accessor.py +2 -2
  57. maxframe/core/base.py +2 -1
  58. maxframe/core/entity/core.py +5 -0
  59. maxframe/core/entity/tileables.py +3 -1
  60. maxframe/core/graph/core.cp37-win32.pyd +0 -0
  61. maxframe/core/graph/entity.py +8 -3
  62. maxframe/core/mode.py +6 -1
  63. maxframe/core/operator/base.py +9 -2
  64. maxframe/core/operator/core.py +10 -2
  65. maxframe/core/operator/utils.py +13 -0
  66. maxframe/dataframe/__init__.py +12 -5
  67. maxframe/dataframe/accessors/__init__.py +1 -1
  68. maxframe/dataframe/accessors/compat.py +45 -0
  69. maxframe/dataframe/accessors/datetime_/__init__.py +4 -1
  70. maxframe/dataframe/accessors/dict_/contains.py +7 -16
  71. maxframe/dataframe/accessors/dict_/core.py +48 -0
  72. maxframe/dataframe/accessors/dict_/getitem.py +17 -21
  73. maxframe/dataframe/accessors/dict_/length.py +7 -16
  74. maxframe/dataframe/accessors/dict_/remove.py +6 -18
  75. maxframe/dataframe/accessors/dict_/setitem.py +8 -18
  76. maxframe/dataframe/accessors/dict_/tests/test_dict_accessor.py +62 -22
  77. maxframe/dataframe/accessors/list_/__init__.py +2 -2
  78. maxframe/dataframe/accessors/list_/core.py +48 -0
  79. maxframe/dataframe/accessors/list_/getitem.py +12 -19
  80. maxframe/dataframe/accessors/list_/length.py +7 -16
  81. maxframe/dataframe/accessors/list_/tests/test_list_accessor.py +11 -9
  82. maxframe/dataframe/accessors/string_/__init__.py +4 -1
  83. maxframe/dataframe/accessors/struct_/__init__.py +37 -0
  84. maxframe/dataframe/accessors/struct_/accessor.py +39 -0
  85. maxframe/dataframe/accessors/struct_/core.py +43 -0
  86. maxframe/dataframe/accessors/struct_/dtypes.py +53 -0
  87. maxframe/dataframe/accessors/struct_/field.py +123 -0
  88. maxframe/dataframe/accessors/struct_/tests/__init__.py +13 -0
  89. maxframe/dataframe/accessors/struct_/tests/test_struct_accessor.py +91 -0
  90. maxframe/dataframe/arithmetic/__init__.py +18 -4
  91. maxframe/dataframe/arithmetic/between.py +106 -0
  92. maxframe/dataframe/arithmetic/dot.py +237 -0
  93. maxframe/dataframe/arithmetic/maximum.py +33 -0
  94. maxframe/dataframe/arithmetic/minimum.py +33 -0
  95. maxframe/dataframe/arithmetic/{around.py → round.py} +11 -7
  96. maxframe/dataframe/core.py +161 -224
  97. maxframe/dataframe/datasource/__init__.py +18 -0
  98. maxframe/dataframe/datasource/core.py +6 -0
  99. maxframe/dataframe/datasource/direct.py +57 -0
  100. maxframe/dataframe/datasource/from_dict.py +124 -0
  101. maxframe/dataframe/datasource/from_index.py +1 -1
  102. maxframe/dataframe/datasource/from_records.py +77 -0
  103. maxframe/dataframe/datasource/from_tensor.py +109 -41
  104. maxframe/dataframe/datasource/read_csv.py +21 -14
  105. maxframe/dataframe/datasource/read_odps_query.py +29 -6
  106. maxframe/dataframe/datasource/read_odps_table.py +32 -10
  107. maxframe/dataframe/datasource/read_parquet.py +38 -39
  108. maxframe/dataframe/datasource/tests/test_datasource.py +37 -0
  109. maxframe/dataframe/datastore/__init__.py +11 -1
  110. maxframe/dataframe/datastore/direct.py +268 -0
  111. maxframe/dataframe/datastore/to_csv.py +29 -41
  112. maxframe/dataframe/datastore/to_odps.py +36 -4
  113. maxframe/dataframe/extensions/__init__.py +20 -4
  114. maxframe/dataframe/extensions/apply_chunk.py +32 -6
  115. maxframe/dataframe/extensions/cartesian_chunk.py +153 -0
  116. maxframe/dataframe/extensions/collect_kv.py +126 -0
  117. maxframe/dataframe/extensions/extract_kv.py +177 -0
  118. maxframe/dataframe/extensions/flatjson.py +2 -1
  119. maxframe/dataframe/extensions/map_reduce.py +263 -0
  120. maxframe/dataframe/extensions/rebalance.py +62 -0
  121. maxframe/dataframe/extensions/tests/test_apply_chunk.py +9 -2
  122. maxframe/dataframe/extensions/tests/test_extensions.py +54 -0
  123. maxframe/dataframe/extensions/tests/test_map_reduce.py +135 -0
  124. maxframe/dataframe/groupby/__init__.py +17 -2
  125. maxframe/dataframe/groupby/aggregation.py +86 -49
  126. maxframe/dataframe/groupby/apply.py +1 -1
  127. maxframe/dataframe/groupby/apply_chunk.py +19 -5
  128. maxframe/dataframe/groupby/core.py +116 -16
  129. maxframe/dataframe/groupby/cum.py +4 -25
  130. maxframe/dataframe/groupby/expanding.py +264 -0
  131. maxframe/dataframe/groupby/fill.py +1 -1
  132. maxframe/dataframe/groupby/getitem.py +12 -5
  133. maxframe/dataframe/groupby/head.py +11 -1
  134. maxframe/dataframe/groupby/rank.py +136 -0
  135. maxframe/dataframe/groupby/rolling.py +206 -0
  136. maxframe/dataframe/groupby/shift.py +114 -0
  137. maxframe/dataframe/groupby/tests/test_groupby.py +0 -5
  138. maxframe/dataframe/indexing/__init__.py +22 -2
  139. maxframe/dataframe/indexing/droplevel.py +195 -0
  140. maxframe/dataframe/indexing/filter.py +169 -0
  141. maxframe/dataframe/indexing/get_level_values.py +76 -0
  142. maxframe/dataframe/indexing/iat.py +45 -0
  143. maxframe/dataframe/indexing/iloc.py +152 -12
  144. maxframe/dataframe/indexing/insert.py +46 -18
  145. maxframe/dataframe/indexing/loc.py +287 -7
  146. maxframe/dataframe/indexing/reindex.py +14 -5
  147. maxframe/dataframe/indexing/rename.py +6 -0
  148. maxframe/dataframe/indexing/rename_axis.py +2 -2
  149. maxframe/dataframe/indexing/reorder_levels.py +143 -0
  150. maxframe/dataframe/indexing/reset_index.py +33 -6
  151. maxframe/dataframe/indexing/sample.py +8 -0
  152. maxframe/dataframe/indexing/setitem.py +3 -3
  153. maxframe/dataframe/indexing/swaplevel.py +185 -0
  154. maxframe/dataframe/indexing/take.py +99 -0
  155. maxframe/dataframe/indexing/truncate.py +140 -0
  156. maxframe/dataframe/indexing/where.py +0 -11
  157. maxframe/dataframe/indexing/xs.py +148 -0
  158. maxframe/dataframe/merge/__init__.py +15 -1
  159. maxframe/dataframe/merge/append.py +97 -98
  160. maxframe/dataframe/merge/combine.py +244 -0
  161. maxframe/dataframe/merge/combine_first.py +120 -0
  162. maxframe/dataframe/merge/compare.py +387 -0
  163. maxframe/dataframe/merge/concat.py +183 -0
  164. maxframe/dataframe/merge/update.py +271 -0
  165. maxframe/dataframe/misc/__init__.py +28 -11
  166. maxframe/dataframe/misc/_duplicate.py +10 -4
  167. maxframe/dataframe/misc/apply.py +1 -1
  168. maxframe/dataframe/misc/check_unique.py +82 -0
  169. maxframe/dataframe/misc/clip.py +145 -0
  170. maxframe/dataframe/misc/describe.py +175 -9
  171. maxframe/dataframe/misc/drop.py +31 -0
  172. maxframe/dataframe/misc/drop_duplicates.py +2 -2
  173. maxframe/dataframe/misc/duplicated.py +2 -2
  174. maxframe/dataframe/misc/get_dummies.py +5 -1
  175. maxframe/dataframe/misc/infer_dtypes.py +251 -0
  176. maxframe/dataframe/misc/isin.py +2 -2
  177. maxframe/dataframe/misc/map.py +125 -18
  178. maxframe/dataframe/misc/repeat.py +159 -0
  179. maxframe/dataframe/misc/tests/test_misc.py +48 -3
  180. maxframe/dataframe/misc/to_numeric.py +3 -0
  181. maxframe/dataframe/misc/transform.py +12 -5
  182. maxframe/dataframe/misc/transpose.py +13 -1
  183. maxframe/dataframe/misc/valid_index.py +115 -0
  184. maxframe/dataframe/misc/value_counts.py +38 -4
  185. maxframe/dataframe/missing/checkna.py +14 -6
  186. maxframe/dataframe/missing/dropna.py +5 -0
  187. maxframe/dataframe/missing/fillna.py +1 -1
  188. maxframe/dataframe/missing/replace.py +7 -4
  189. maxframe/dataframe/reduction/__init__.py +35 -16
  190. maxframe/dataframe/reduction/aggregation.py +43 -14
  191. maxframe/dataframe/reduction/all.py +2 -2
  192. maxframe/dataframe/reduction/any.py +2 -2
  193. maxframe/dataframe/reduction/argmax.py +103 -0
  194. maxframe/dataframe/reduction/argmin.py +103 -0
  195. maxframe/dataframe/reduction/core.py +80 -24
  196. maxframe/dataframe/reduction/count.py +13 -9
  197. maxframe/dataframe/reduction/cov.py +166 -0
  198. maxframe/dataframe/reduction/cummax.py +2 -2
  199. maxframe/dataframe/reduction/cummin.py +2 -2
  200. maxframe/dataframe/reduction/cumprod.py +2 -2
  201. maxframe/dataframe/reduction/cumsum.py +2 -2
  202. maxframe/dataframe/reduction/custom_reduction.py +2 -2
  203. maxframe/dataframe/reduction/idxmax.py +185 -0
  204. maxframe/dataframe/reduction/idxmin.py +185 -0
  205. maxframe/dataframe/reduction/kurtosis.py +37 -30
  206. maxframe/dataframe/reduction/max.py +2 -2
  207. maxframe/dataframe/reduction/mean.py +9 -7
  208. maxframe/dataframe/reduction/median.py +2 -2
  209. maxframe/dataframe/reduction/min.py +2 -2
  210. maxframe/dataframe/reduction/mode.py +144 -0
  211. maxframe/dataframe/reduction/nunique.py +19 -11
  212. maxframe/dataframe/reduction/prod.py +18 -13
  213. maxframe/dataframe/reduction/reduction_size.py +2 -2
  214. maxframe/dataframe/reduction/sem.py +13 -9
  215. maxframe/dataframe/reduction/skew.py +31 -27
  216. maxframe/dataframe/reduction/str_concat.py +10 -7
  217. maxframe/dataframe/reduction/sum.py +18 -14
  218. maxframe/dataframe/reduction/tests/test_reduction.py +12 -0
  219. maxframe/dataframe/reduction/unique.py +20 -3
  220. maxframe/dataframe/reduction/var.py +16 -12
  221. maxframe/dataframe/reshape/__init__.py +38 -0
  222. maxframe/dataframe/{misc → reshape}/pivot.py +1 -0
  223. maxframe/dataframe/{misc → reshape}/pivot_table.py +1 -0
  224. maxframe/dataframe/reshape/unstack.py +114 -0
  225. maxframe/dataframe/sort/__init__.py +16 -1
  226. maxframe/dataframe/sort/argsort.py +68 -0
  227. maxframe/dataframe/sort/core.py +2 -1
  228. maxframe/dataframe/sort/nlargest.py +238 -0
  229. maxframe/dataframe/sort/nsmallest.py +228 -0
  230. maxframe/dataframe/sort/rank.py +147 -0
  231. maxframe/dataframe/statistics/__init__.py +3 -3
  232. maxframe/dataframe/statistics/corr.py +1 -0
  233. maxframe/dataframe/statistics/quantile.py +2 -2
  234. maxframe/dataframe/tests/test_typing.py +104 -0
  235. maxframe/dataframe/tests/test_utils.py +66 -2
  236. maxframe/dataframe/tseries/__init__.py +19 -0
  237. maxframe/dataframe/tseries/at_time.py +61 -0
  238. maxframe/dataframe/tseries/between_time.py +122 -0
  239. maxframe/dataframe/typing_.py +185 -0
  240. maxframe/dataframe/utils.py +125 -52
  241. maxframe/dataframe/window/aggregation.py +8 -4
  242. maxframe/dataframe/window/core.py +14 -1
  243. maxframe/dataframe/window/ewm.py +1 -3
  244. maxframe/dataframe/window/expanding.py +37 -35
  245. maxframe/dataframe/window/rolling.py +49 -39
  246. maxframe/dataframe/window/tests/test_expanding.py +1 -7
  247. maxframe/dataframe/window/tests/test_rolling.py +1 -1
  248. maxframe/env.py +7 -4
  249. maxframe/errors.py +2 -2
  250. maxframe/io/odpsio/schema.py +9 -3
  251. maxframe/io/odpsio/tableio.py +7 -2
  252. maxframe/io/odpsio/tests/test_schema.py +198 -83
  253. maxframe/learn/__init__.py +10 -2
  254. maxframe/learn/cluster/__init__.py +15 -0
  255. maxframe/learn/cluster/_kmeans.py +782 -0
  256. maxframe/learn/contrib/llm/core.py +18 -7
  257. maxframe/learn/contrib/llm/deploy/__init__.py +13 -0
  258. maxframe/learn/contrib/llm/deploy/config.py +221 -0
  259. maxframe/learn/contrib/llm/deploy/core.py +247 -0
  260. maxframe/learn/contrib/llm/deploy/framework.py +35 -0
  261. maxframe/learn/contrib/llm/deploy/loader.py +360 -0
  262. maxframe/learn/contrib/llm/deploy/tests/__init__.py +13 -0
  263. maxframe/learn/contrib/llm/deploy/tests/test_register_models.py +359 -0
  264. maxframe/learn/contrib/llm/models/__init__.py +1 -0
  265. maxframe/learn/contrib/llm/models/dashscope.py +12 -6
  266. maxframe/learn/contrib/llm/models/managed.py +76 -11
  267. maxframe/learn/contrib/llm/models/openai.py +72 -0
  268. maxframe/learn/contrib/llm/tests/__init__.py +13 -0
  269. maxframe/learn/contrib/llm/tests/test_core.py +34 -0
  270. maxframe/learn/contrib/llm/tests/test_openai.py +187 -0
  271. maxframe/learn/contrib/llm/tests/test_text_gen.py +155 -0
  272. maxframe/learn/contrib/llm/text.py +348 -42
  273. maxframe/learn/contrib/models.py +4 -1
  274. maxframe/learn/contrib/xgboost/classifier.py +2 -0
  275. maxframe/learn/contrib/xgboost/core.py +113 -4
  276. maxframe/learn/contrib/xgboost/predict.py +4 -2
  277. maxframe/learn/contrib/xgboost/regressor.py +5 -0
  278. maxframe/learn/contrib/xgboost/train.py +7 -2
  279. maxframe/learn/core.py +66 -0
  280. maxframe/learn/linear_model/_base.py +58 -1
  281. maxframe/learn/linear_model/_lin_reg.py +1 -1
  282. maxframe/learn/metrics/__init__.py +6 -0
  283. maxframe/learn/metrics/_classification.py +145 -0
  284. maxframe/learn/metrics/_ranking.py +477 -0
  285. maxframe/learn/metrics/_scorer.py +60 -0
  286. maxframe/learn/metrics/pairwise/__init__.py +21 -0
  287. maxframe/learn/metrics/pairwise/core.py +77 -0
  288. maxframe/learn/metrics/pairwise/cosine.py +115 -0
  289. maxframe/learn/metrics/pairwise/euclidean.py +176 -0
  290. maxframe/learn/metrics/pairwise/haversine.py +96 -0
  291. maxframe/learn/metrics/pairwise/manhattan.py +80 -0
  292. maxframe/learn/metrics/pairwise/pairwise.py +127 -0
  293. maxframe/learn/metrics/pairwise/pairwise_distances_topk.py +121 -0
  294. maxframe/learn/metrics/pairwise/rbf_kernel.py +51 -0
  295. maxframe/learn/metrics/tests/__init__.py +13 -0
  296. maxframe/learn/metrics/tests/test_scorer.py +26 -0
  297. maxframe/learn/preprocessing/_data/min_max_scaler.py +34 -23
  298. maxframe/learn/preprocessing/_data/standard_scaler.py +34 -25
  299. maxframe/learn/utils/__init__.py +2 -1
  300. maxframe/learn/utils/checks.py +1 -2
  301. maxframe/learn/utils/core.py +59 -0
  302. maxframe/learn/utils/extmath.py +79 -9
  303. maxframe/learn/utils/odpsio.py +262 -0
  304. maxframe/learn/utils/validation.py +2 -2
  305. maxframe/lib/compat.py +40 -0
  306. maxframe/lib/dtypes_extension/__init__.py +16 -1
  307. maxframe/lib/dtypes_extension/_fake_arrow_dtype.py +604 -0
  308. maxframe/lib/dtypes_extension/blob.py +304 -0
  309. maxframe/lib/dtypes_extension/dtypes.py +40 -0
  310. maxframe/lib/dtypes_extension/tests/test_blob.py +88 -0
  311. maxframe/lib/dtypes_extension/tests/test_dtypes.py +16 -1
  312. maxframe/lib/dtypes_extension/tests/test_fake_arrow_dtype.py +75 -0
  313. maxframe/lib/filesystem/_oss_lib/common.py +124 -50
  314. maxframe/lib/filesystem/_oss_lib/glob.py +1 -1
  315. maxframe/lib/filesystem/_oss_lib/handle.py +21 -25
  316. maxframe/lib/filesystem/base.py +1 -1
  317. maxframe/lib/filesystem/core.py +1 -1
  318. maxframe/lib/filesystem/oss.py +115 -46
  319. maxframe/lib/filesystem/tests/test_oss.py +74 -36
  320. maxframe/lib/mmh3.cp37-win32.pyd +0 -0
  321. maxframe/lib/wrapped_pickle.py +10 -0
  322. maxframe/opcodes.py +41 -15
  323. maxframe/protocol.py +12 -0
  324. maxframe/remote/core.py +4 -0
  325. maxframe/serialization/__init__.py +11 -2
  326. maxframe/serialization/arrow.py +38 -13
  327. maxframe/serialization/blob.py +32 -0
  328. maxframe/serialization/core.cp37-win32.pyd +0 -0
  329. maxframe/serialization/core.pyx +39 -1
  330. maxframe/serialization/exception.py +2 -4
  331. maxframe/serialization/numpy.py +11 -0
  332. maxframe/serialization/pandas.py +46 -9
  333. maxframe/serialization/serializables/core.py +2 -2
  334. maxframe/serialization/tests/test_serial.py +31 -4
  335. maxframe/tensor/__init__.py +38 -8
  336. maxframe/tensor/arithmetic/__init__.py +19 -10
  337. maxframe/tensor/arithmetic/core.py +2 -2
  338. maxframe/tensor/arithmetic/iscomplexobj.py +53 -0
  339. maxframe/tensor/arithmetic/tests/test_arithmetic.py +6 -9
  340. maxframe/tensor/core.py +6 -2
  341. maxframe/tensor/datasource/tests/test_datasource.py +2 -1
  342. maxframe/tensor/extensions/__init__.py +2 -0
  343. maxframe/tensor/extensions/apply_chunk.py +3 -3
  344. maxframe/tensor/extensions/rebalance.py +65 -0
  345. maxframe/tensor/fft/__init__.py +32 -0
  346. maxframe/tensor/fft/core.py +168 -0
  347. maxframe/tensor/fft/fft.py +112 -0
  348. maxframe/tensor/fft/fft2.py +118 -0
  349. maxframe/tensor/fft/fftfreq.py +80 -0
  350. maxframe/tensor/fft/fftn.py +123 -0
  351. maxframe/tensor/fft/fftshift.py +79 -0
  352. maxframe/tensor/fft/hfft.py +112 -0
  353. maxframe/tensor/fft/ifft.py +114 -0
  354. maxframe/tensor/fft/ifft2.py +115 -0
  355. maxframe/tensor/fft/ifftn.py +123 -0
  356. maxframe/tensor/fft/ifftshift.py +73 -0
  357. maxframe/tensor/fft/ihfft.py +93 -0
  358. maxframe/tensor/fft/irfft.py +118 -0
  359. maxframe/tensor/fft/irfft2.py +62 -0
  360. maxframe/tensor/fft/irfftn.py +114 -0
  361. maxframe/tensor/fft/rfft.py +116 -0
  362. maxframe/tensor/fft/rfft2.py +63 -0
  363. maxframe/tensor/fft/rfftfreq.py +87 -0
  364. maxframe/tensor/fft/rfftn.py +113 -0
  365. maxframe/tensor/indexing/fill_diagonal.py +1 -7
  366. maxframe/tensor/linalg/__init__.py +7 -0
  367. maxframe/tensor/linalg/_einsumfunc.py +1025 -0
  368. maxframe/tensor/linalg/cholesky.py +117 -0
  369. maxframe/tensor/linalg/einsum.py +339 -0
  370. maxframe/tensor/linalg/lstsq.py +100 -0
  371. maxframe/tensor/linalg/matrix_norm.py +75 -0
  372. maxframe/tensor/linalg/norm.py +249 -0
  373. maxframe/tensor/linalg/solve.py +72 -0
  374. maxframe/tensor/linalg/solve_triangular.py +2 -2
  375. maxframe/tensor/linalg/vector_norm.py +113 -0
  376. maxframe/tensor/misc/__init__.py +24 -1
  377. maxframe/tensor/misc/argwhere.py +72 -0
  378. maxframe/tensor/misc/array_split.py +46 -0
  379. maxframe/tensor/misc/broadcast_arrays.py +57 -0
  380. maxframe/tensor/misc/copyto.py +130 -0
  381. maxframe/tensor/misc/delete.py +104 -0
  382. maxframe/tensor/misc/dsplit.py +68 -0
  383. maxframe/tensor/misc/ediff1d.py +74 -0
  384. maxframe/tensor/misc/expand_dims.py +85 -0
  385. maxframe/tensor/misc/flip.py +90 -0
  386. maxframe/tensor/misc/fliplr.py +64 -0
  387. maxframe/tensor/misc/flipud.py +68 -0
  388. maxframe/tensor/misc/hsplit.py +85 -0
  389. maxframe/tensor/misc/insert.py +139 -0
  390. maxframe/tensor/misc/moveaxis.py +83 -0
  391. maxframe/tensor/misc/result_type.py +88 -0
  392. maxframe/tensor/misc/roll.py +124 -0
  393. maxframe/tensor/misc/rollaxis.py +77 -0
  394. maxframe/tensor/misc/shape.py +89 -0
  395. maxframe/tensor/misc/split.py +190 -0
  396. maxframe/tensor/misc/tile.py +109 -0
  397. maxframe/tensor/misc/vsplit.py +74 -0
  398. maxframe/tensor/reduction/array_equal.py +2 -1
  399. maxframe/tensor/sort/__init__.py +2 -0
  400. maxframe/tensor/sort/argpartition.py +98 -0
  401. maxframe/tensor/sort/partition.py +228 -0
  402. maxframe/tensor/spatial/__init__.py +15 -0
  403. maxframe/tensor/spatial/distance/__init__.py +17 -0
  404. maxframe/tensor/spatial/distance/cdist.py +421 -0
  405. maxframe/tensor/spatial/distance/pdist.py +398 -0
  406. maxframe/tensor/spatial/distance/squareform.py +153 -0
  407. maxframe/tensor/special/__init__.py +159 -21
  408. maxframe/tensor/special/airy.py +55 -0
  409. maxframe/tensor/special/bessel.py +199 -0
  410. maxframe/tensor/special/core.py +65 -4
  411. maxframe/tensor/special/ellip_func_integrals.py +155 -0
  412. maxframe/tensor/special/ellip_harm.py +55 -0
  413. maxframe/tensor/special/err_fresnel.py +223 -0
  414. maxframe/tensor/special/gamma_funcs.py +303 -0
  415. maxframe/tensor/special/hypergeometric_funcs.py +69 -0
  416. maxframe/tensor/special/info_theory.py +189 -0
  417. maxframe/tensor/special/misc.py +21 -0
  418. maxframe/tensor/statistics/__init__.py +6 -0
  419. maxframe/tensor/statistics/corrcoef.py +77 -0
  420. maxframe/tensor/statistics/cov.py +222 -0
  421. maxframe/tensor/statistics/digitize.py +126 -0
  422. maxframe/tensor/statistics/histogram.py +520 -0
  423. maxframe/tensor/statistics/median.py +85 -0
  424. maxframe/tensor/statistics/ptp.py +89 -0
  425. maxframe/tensor/utils.py +3 -3
  426. maxframe/tests/test_udf.py +61 -0
  427. maxframe/tests/test_utils.py +51 -6
  428. maxframe/tests/utils.py +0 -2
  429. maxframe/typing_.py +2 -0
  430. maxframe/udf.py +130 -9
  431. maxframe/utils.py +254 -27
  432. {maxframe-2.0.0b2.dist-info → maxframe-2.3.0rc1.dist-info}/METADATA +3 -3
  433. {maxframe-2.0.0b2.dist-info → maxframe-2.3.0rc1.dist-info}/RECORD +442 -264
  434. maxframe_client/fetcher.py +35 -4
  435. maxframe_client/session/odps.py +7 -2
  436. maxframe_client/session/task.py +8 -1
  437. maxframe_client/tests/test_fetcher.py +76 -3
  438. maxframe_client/tests/test_session.py +28 -1
  439. maxframe/dataframe/arrays.py +0 -864
  440. /maxframe/dataframe/{misc → reshape}/melt.py +0 -0
  441. /maxframe/dataframe/{misc → reshape}/stack.py +0 -0
  442. {maxframe-2.0.0b2.dist-info → maxframe-2.3.0rc1.dist-info}/WHEEL +0 -0
  443. {maxframe-2.0.0b2.dist-info → maxframe-2.3.0rc1.dist-info}/top_level.txt +0 -0
@@ -12,6 +12,7 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
+ import os
15
16
  import time
16
17
  from io import BytesIO
17
18
 
@@ -21,7 +22,7 @@ import pytest
21
22
  from .. import oss
22
23
  from .._oss_lib import glob as og
23
24
  from .._oss_lib.common import OSSFileEntry
24
- from ..oss import build_oss_path
25
+ from ..oss import HostEnforceType, _rewrite_internal_endpoint, build_oss_path
25
26
 
26
27
 
27
28
  class OSSObjInfo:
@@ -54,19 +55,25 @@ class MockObject:
54
55
 
55
56
 
56
57
  class SideEffectBucket:
58
+ cached_ctx = {}
59
+
57
60
  def __init__(self, *_, **__):
58
- self.obj_dict = {
59
- "file.csv": "id1,id2,id3\n1,2,3\n",
60
- "dir/": "",
61
- "dir/file1.csv": "2",
62
- "dir/file2.csv": "3",
63
- "dir/subdir/": "",
64
- "dir/subdir/file3.csv": "s4",
65
- "dir/subdir/file4.csv": "s5",
66
- "dir2/": "",
67
- "dir2/file6.csv": "6",
68
- "dir2/file7.csv": "7",
69
- }
61
+ cur_test = os.environ["PYTEST_CURRENT_TEST"]
62
+ if cur_test in self.cached_ctx:
63
+ self.obj_dict = self.cached_ctx[cur_test]
64
+ else:
65
+ self.obj_dict = self.cached_ctx[cur_test] = {
66
+ "file.csv": "id1,id2,id3\n1,2,3\n",
67
+ "dir/": "",
68
+ "dir/file1.csv": "2",
69
+ "dir/file2.csv": "3",
70
+ "dir/subdir/": "",
71
+ "dir/subdir/file3.csv": "s4",
72
+ "dir/subdir/file4.csv": "s5",
73
+ "dir2/": "",
74
+ "dir2/file6.csv": "6",
75
+ "dir2/file7.csv": "7",
76
+ }
70
77
 
71
78
  def get_object_meta(self, key):
72
79
  return ObjectMeta(key, self.obj_dict)
@@ -77,6 +84,17 @@ class SideEffectBucket:
77
84
  def get_object(self, key, byte_range):
78
85
  return MockObject(self.obj_dict, key, byte_range)
79
86
 
87
+ def copy_object(self, bucket, src_key, dst_key):
88
+ self.obj_dict[dst_key] = self.obj_dict[src_key]
89
+
90
+ def delete_object(self, key):
91
+ from oss2.exceptions import NoSuchKey
92
+
93
+ try:
94
+ del self.obj_dict[key]
95
+ except KeyError:
96
+ raise NoSuchKey(404, {}, key, {})
97
+
80
98
 
81
99
  class SideEffectObjIter:
82
100
  def __init__(self, *args, **kwargs):
@@ -96,28 +114,29 @@ def test_oss_filesystem(fake_obj_iter, fake_oss_bucket):
96
114
  access_key_secret = "your_access_key_secret"
97
115
  end_point = "your_endpoint"
98
116
 
99
- file_path = f"oss://bucket/file.csv"
100
- dir_path = f"oss://bucket/dir/"
101
- dir_path_content_magic = f"oss://bucket/dir*/"
117
+ file_path = f"oss://your_endpoint/bucket/file.csv"
118
+ new_file_path = f"oss://your_endpoint/bucket/file1.csv"
119
+ dir_path = f"oss://your_endpoint/bucket/dir/"
120
+ dir_path_content_magic = f"oss://your_endpoint/bucket/dir*/"
102
121
  other_scheme_path = f"scheme://netloc/path"
103
- not_exist_file_path = f"oss://bucket/not_exist.csv"
122
+ not_exist_file_path = f"oss://your_endpoint/bucket/not_exist.csv"
104
123
 
105
124
  fake_file_path = build_oss_path(
106
- file_path, access_key_id, access_key_secret, end_point
125
+ file_path, end_point, access_key_id, access_key_secret
126
+ )
127
+ fake_new_file_path = build_oss_path(
128
+ new_file_path, end_point, access_key_id, access_key_secret
107
129
  )
108
130
  fake_dir_path = build_oss_path(
109
- dir_path, access_key_id, access_key_secret, end_point
131
+ dir_path, end_point, access_key_id, access_key_secret
110
132
  )
111
133
  fake_dir_path_contains_magic = build_oss_path(
112
- dir_path_content_magic, access_key_id, access_key_secret, end_point
113
- )
114
- fake_other_scheme_path = build_oss_path(
115
- other_scheme_path, access_key_id, access_key_secret, end_point
134
+ dir_path_content_magic, end_point, access_key_id, access_key_secret
116
135
  )
117
136
  fake_not_exist_file_path = build_oss_path(
118
- not_exist_file_path, access_key_id, access_key_secret, end_point
137
+ not_exist_file_path, end_point, access_key_id, access_key_secret
119
138
  )
120
- fs = oss.OSSFileSystem.get_instance()
139
+ fs = oss.OSSFileSystem()
121
140
 
122
141
  # Test OSSFileSystem.
123
142
  assert len(fs.ls(fake_dir_path)) == 4
@@ -131,21 +150,15 @@ def test_oss_filesystem(fake_obj_iter, fake_oss_bucket):
131
150
  assert fs.stat(fake_dir_path)["type"] == "directory"
132
151
  assert fs.glob(fake_dir_path) == [fake_dir_path]
133
152
 
134
- with pytest.raises(ValueError) as e:
135
- fs.exists(fake_other_scheme_path)
136
- msg1 = e.value.args[0]
137
- assert (
138
- msg1 == f"Except scheme oss, but got scheme: "
139
- f"scheme in path: {fake_other_scheme_path}"
140
- )
153
+ msg1 = f"Except scheme oss, but got scheme: scheme in path: {other_scheme_path}"
154
+ with pytest.raises(ValueError, match=msg1):
155
+ fs.exists(other_scheme_path)
141
156
 
142
- with pytest.raises(RuntimeError) as e:
157
+ with pytest.raises(ValueError, match="No credentials provided"):
143
158
  fs.exists(file_path)
144
- msg2 = e.value.args[0]
145
- assert msg2 == "Please use build_oss_path to add OSS info"
146
159
 
147
160
  with pytest.raises(OSError):
148
- print(fs.ls(fake_file_path))
161
+ fs.ls(fake_file_path)
149
162
 
150
163
  assert len(fs.glob(fake_file_path)) == 1
151
164
  assert len(fs.glob(fake_dir_path + "*", recursive=True)) == 4
@@ -180,3 +193,28 @@ def test_oss_filesystem(fake_obj_iter, fake_oss_bucket):
180
193
 
181
194
  fe = OSSFileEntry(fake_file_path)
182
195
  assert fe.path == fake_file_path
196
+
197
+ fs.rename(fake_file_path, fake_new_file_path)
198
+ assert not fs.exists(fake_file_path)
199
+ assert fs.exists(fake_new_file_path)
200
+
201
+ with pytest.raises(FileNotFoundError):
202
+ fs.delete(fake_not_exist_file_path)
203
+
204
+
205
+ def test_host_rewrite():
206
+ assert "cn-shanghai.oss.service.com" == _rewrite_internal_endpoint(
207
+ "cn-shanghai.oss.service.com", HostEnforceType.force_external
208
+ )
209
+ assert "cn-shanghai.oss.service.com" == _rewrite_internal_endpoint(
210
+ "cn-shanghai-internal.oss.service.com", HostEnforceType.force_external
211
+ )
212
+ assert "cn-shanghai-internal.oss.service.com" == _rewrite_internal_endpoint(
213
+ "cn-shanghai.oss.service.com", HostEnforceType.force_internal
214
+ )
215
+ assert "cn-shanghai-internal.oss.service.com" == _rewrite_internal_endpoint(
216
+ "cn-shanghai-internal.oss.service.com", HostEnforceType.force_internal
217
+ )
218
+ assert "1.2.3.4" == _rewrite_internal_endpoint(
219
+ "1.2.3.4", HostEnforceType.force_internal
220
+ )
Binary file
@@ -75,6 +75,16 @@ class Unpickler(pickle_mod.Unpickler):
75
75
  raise ValueError("Unpickle is forbidden here")
76
76
  return super().load()
77
77
 
78
+ def find_class(self, module, name):
79
+ try:
80
+ return super().find_class(module, name)
81
+ except ImportError:
82
+ # workaround for pickle incompatibility since numpy>=2.0
83
+ if not module.startswith("numpy._core"):
84
+ raise
85
+ module = module.replace("numpy._core", "numpy.core")
86
+ return super().find_class(module, name)
87
+
78
88
 
79
89
  @functools.wraps(pickle_mod.load)
80
90
  def load(file, **kwargs):
maxframe/opcodes.py CHANGED
@@ -271,6 +271,9 @@ SEM = 352
271
271
  STR_CONCAT = 353
272
272
  MAD = 354
273
273
  MEDIAN = 355
274
+ IDXMAX = 357
275
+ IDXMIN = 358
276
+ MODE = 359
274
277
 
275
278
  # tensor operator
276
279
  RESHAPE = 401
@@ -389,6 +392,15 @@ ALIGN = 741
389
392
  CASE_WHEN = 742
390
393
  PIVOT = 743
391
394
  PIVOT_TABLE = 744
395
+ TO_NUMERIC = 745
396
+ DATAFRAME_FILTER = 746
397
+ REORDER_LEVELS = 747
398
+ DATAFRAME_COMPARE = 748
399
+ DROPLEVEL = 749
400
+ DATAFRAME_UPDATE = 750
401
+ DATAFRAME_COMBINE = 751
402
+ DATAFRAME_INFER_DTYPES = 752
403
+ BETWEEN_TIME = 753
392
404
 
393
405
  FUSE = 801
394
406
 
@@ -400,6 +412,9 @@ MANAGED_MULTI_MODAL_GENERATION = 813
400
412
  LLM_TEXT_SUMMARIZE_TASK = 814
401
413
  LLM_TEXT_TRANSLATE_TASK = 815
402
414
  LLM_TEXT_CLASSIFY_TASK = 816
415
+ LLM_TEXT_EXTRACT_TASK = 817
416
+ LLM_TEXT_EMBEDDING_TASK = 818
417
+ OPENAI_COMPATIBLE_TEXT_GENERATION = 819
403
418
 
404
419
  # table like input for tensor
405
420
  TABLE_COO = 1003
@@ -417,6 +432,7 @@ DATAFRAME_ILOC_GETITEM = 2021
417
432
  DATAFRAME_ILOC_SETITEM = 2022
418
433
  DATAFRAME_LOC_GETITEM = 2023
419
434
  DATAFRAME_LOC_SETITEM = 2024
435
+ GET_LEVEL_VALUES = 2025
420
436
 
421
437
  # merge
422
438
  DATAFRAME_MERGE = 2010
@@ -426,7 +442,7 @@ DATAFRAME_SHUFFLE_MERGE_ALIGN = 2011
426
442
  DATAFRAME_BLOOM_FILTER = 2014
427
443
 
428
444
  # append
429
- APPEND = 2015
445
+ APPEND = 2015 # deprecated since v2.2
430
446
 
431
447
  # reset index
432
448
  RESET_INDEX = 2028
@@ -439,15 +455,14 @@ GROUPBY_AGG = 2033
439
455
  GROUPBY_CONCAT = 2034
440
456
  GROUPBY_HEAD = 2035
441
457
  GROUPBY_SAMPLE_ILOC = 2036
442
- GROUPBY_SORT_REGULAR_SAMPLE = 2037
443
- GROUPBY_SORT_PIVOT = 2038
444
458
  GROUPBY_SORT_SHUFFLE = 2039
445
459
 
446
460
  # parallel sorting by regular sampling
447
- PSRS_SORT_REGULAR_SMAPLE = 2040
461
+ PSRS_SORT_REGULAR_SAMPLE = 2040
448
462
  PSRS_CONCAT_PIVOT = 2041
449
463
  PSRS_SHUFFLE = 2042
450
464
  PSRS_ALIGN = 2043
465
+ PSRS_RANK_SHUFFLE = 2044
451
466
  # partition
452
467
  CALC_PARTITIONS_INFO = 2046
453
468
  PARTITION_MERGED = 2047
@@ -455,6 +470,7 @@ PARTITION_MERGED = 2047
455
470
  # dataframe sort
456
471
  SORT_VALUES = 2050
457
472
  SORT_INDEX = 2051
473
+ RANK = 2052
458
474
 
459
475
  # window
460
476
  ROLLING_AGG = 2060
@@ -475,6 +491,8 @@ TO_ODPS_TABLE = 20112
475
491
  READ_ODPS_VOLUME = 20113
476
492
  TO_ODPS_VOLUME = 20114
477
493
  READ_ODPS_QUERY = 20115
494
+ READ_ODPS_MODEL = 20116 # reserved
495
+ TO_ODPS_MODEL = 20117
478
496
 
479
497
  TO_CSV_STAT = 2102
480
498
 
@@ -506,9 +524,10 @@ FAISS_TRAIN_SAMPLED_INDEX = 2235
506
524
  FAISS_QUERY = 2236
507
525
  PROXIMA_SIMPLE_BUILDER = 2238
508
526
  PROXIMA_SIMPLE_SEARCHER = 2239
509
- KNEIGHBORS_GRAPH = 2237
527
+ K_NEIGHBORS_GRAPH = 2237
510
528
 
511
529
  # cluster
530
+ # k-means related
512
531
  KMEANS_PLUS_PLUS_INIT = 2250
513
532
  KMEANS_SCALABLE_PLUS_PLUS_INIT = 2251
514
533
  KMEANS_ELKAN_INIT_BOUNDS = 2252
@@ -517,7 +536,12 @@ KMEANS_ELKAN_POSTPROCESS = 2254
517
536
  KMEANS_LLOYD_UPDATE = 2255
518
537
  KMEANS_LLOYD_POSTPROCESS = 2256
519
538
  KMEANS_INERTIA = 2257
520
- KMEANS_RELOCASTE_EMPTY_CLUSTERS = 2258
539
+ KMEANS_RELOCATE_EMPTY_CLUSTERS = 2258
540
+ KMEANS_FIT = 2259
541
+ KMEANS_PREDICT = 2260
542
+
543
+ # linear models
544
+ LOGISTIC_REGRESSION_FIT = 2270
521
545
 
522
546
  # XGBoost
523
547
  XGBOOST_TRAIN = 3001
@@ -580,6 +604,10 @@ RUN_SCRIPT = 5002
580
604
  # learn metrics
581
605
  MULTILABEL_CONFUSION_MATRIX = 5201
582
606
  PRECISION_RECALL_F_SCORE_SUPPORT = 5202
607
+ AUC = 5203
608
+ ROC_CURVE = 5204
609
+ ROC_AUC_SCORE = 5205
610
+ LOG_LOSS = 5206
583
611
 
584
612
  CHOLESKY_FUSE = 999988
585
613
 
@@ -588,23 +616,21 @@ DATAFRAME_RESHUFFLE = 10001
588
616
  FLATMAP = 10002
589
617
  FLATJSON = 10003
590
618
  APPLY_CHUNK = 10004
619
+ EXTRACT_KV = 10013
620
+ COLLECT_KV = 10014
621
+
622
+ SERIES_DICT_METHOD = 10011
623
+ SERIES_LIST_METHOD = 10012
624
+ SERIES_STRUCT_METHOD = 10015
591
625
 
626
+ # placeholders for compatibility, DO NOT REMOVE
592
627
  SERIES_DICT_GETITEM = 10005
593
628
  SERIES_DICT_SETITEM = 10006
594
629
  SERIES_DICT_LENGTH = 10007
595
630
  SERIES_DICT_REMOVE = 10008
596
631
  SERIES_DICT_CONTAINS = 10009
597
- SERIES_DICT_FLATTEN = 10010
598
-
599
632
  SERIES_LIST_GETITEM = 10020
600
- SERIES_LIST_SETITEM = 10021
601
- SERIES_LIST_CONTAINS = 10022
602
633
  SERIES_LIST_LENGTH = 10023
603
- SERIES_LIST_INSERT = 10024
604
- SERIES_LIST_EXTEND = 10025
605
- SERIES_LIST_POP = 10026
606
- SERIES_LIST_SORT = 10027
607
- SERIES_LIST_FLATTEN = 10028
608
634
 
609
635
  # MaxFrame internal operators
610
636
  DATAFRAME_PROJECTION_SAME_INDEX_MERGE = 100001
maxframe/protocol.py CHANGED
@@ -39,6 +39,7 @@ from .serialization.serializables import (
39
39
  SeriesField,
40
40
  StringField,
41
41
  )
42
+ from .utils import combine_error_message_and_traceback
42
43
 
43
44
  pickling_support.install()
44
45
 
@@ -244,6 +245,9 @@ class ErrorInfo(JsonSerializable):
244
245
  "raw_error_source", ErrorSource, FieldTypes.int8, default=None
245
246
  )
246
247
  raw_error_data: Optional[Exception] = AnyField("raw_error_data", default=None)
248
+ displayed_error_message: Optional[str] = StringField(
249
+ "displayed_error_message", default=None
250
+ )
247
251
 
248
252
  @classmethod
249
253
  def from_exception(cls, exc: Exception):
@@ -282,6 +286,7 @@ class ErrorInfo(JsonSerializable):
282
286
  "error_messages": self.error_messages,
283
287
  "error_tracebacks": self.error_tracebacks,
284
288
  "raw_error_source": self.raw_error_source.value,
289
+ "displayed_error_message": self.displayed_error_message,
285
290
  }
286
291
  err_data_bufs = None
287
292
  if isinstance(self.raw_error_data, (PickleContainer, RemoteException)):
@@ -299,6 +304,13 @@ class ErrorInfo(JsonSerializable):
299
304
  ]
300
305
  return ret
301
306
 
307
+ def get_displayed_error_message(self) -> str:
308
+ if self.displayed_error_message is not None:
309
+ return self.displayed_error_message
310
+ return combine_error_message_and_traceback(
311
+ self.error_messages, self.error_tracebacks
312
+ )
313
+
302
314
 
303
315
  class DagInfo(JsonSerializable):
304
316
  session_id: str = StringField("session_id", default=None)
maxframe/remote/core.py CHANGED
@@ -27,6 +27,7 @@ from ..serialization.serializables import (
27
27
  ListField,
28
28
  )
29
29
  from ..tensor.core import TENSOR_TYPE
30
+ from ..typing_ import TileableType
30
31
  from ..udf import BuiltinFunction
31
32
  from ..utils import find_objects, replace_objects
32
33
 
@@ -59,6 +60,9 @@ class RemoteFunction(ObjectOperatorMixin, ObjectOperator):
59
60
  def has_custom_code(self) -> bool:
60
61
  return not isinstance(self.function, BuiltinFunction)
61
62
 
63
+ def check_inputs(self, inputs: List[TileableType]):
64
+ return
65
+
62
66
  @classmethod
63
67
  def _set_inputs(cls, op: "RemoteFunction", inputs: List[EntityData]):
64
68
  raw_inputs = getattr(op, "_inputs", None)
@@ -12,7 +12,16 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
- from . import arrow, exception, maxframe_objects, numpy, pandas, scipy, serializables
15
+ from . import (
16
+ arrow,
17
+ blob,
18
+ exception,
19
+ maxframe_objects,
20
+ numpy,
21
+ pandas,
22
+ scipy,
23
+ serializables,
24
+ )
16
25
  from .core import (
17
26
  PickleContainer,
18
27
  PickleHookOptions,
@@ -27,4 +36,4 @@ from .core import (
27
36
  )
28
37
  from .exception import RemoteException
29
38
 
30
- del arrow, numpy, scipy, pandas, maxframe_objects, exception
39
+ del arrow, blob, exception, maxframe_objects, numpy, pandas, scipy
@@ -14,7 +14,7 @@
14
14
 
15
15
  from typing import Any, Dict, List, Union
16
16
 
17
- from ..utils import arrow_type_from_str
17
+ from ..utils import arrow_type_from_str, extract_class_name
18
18
  from .core import Serializer, buffered
19
19
 
20
20
  try:
@@ -30,30 +30,54 @@ except ImportError: # pragma: no cover
30
30
 
31
31
  _TYPE_CHAR_ARROW_ARRAY = "A"
32
32
  _TYPE_CHAR_ARROW_CHUNKED_ARRAY = "C"
33
+ _TYPE_CHAR_ARROW_REDUCED = "R"
34
+
35
+
36
+ class ArrowDataTypeSerializer(Serializer):
37
+ def serial(self, obj: pa.DataType, context):
38
+ return [str(obj)], [], True
39
+
40
+ def deserial(self, serialized, context, subs):
41
+ return arrow_type_from_str(serialized[0])
33
42
 
34
43
 
35
44
  class ArrowArraySerializer(Serializer):
36
45
  @buffered
37
46
  def serial(self, obj: PA_ARRAY_TYPES, context: Dict):
38
- data_type = str(obj.type)
39
- if isinstance(obj, pa.Array):
40
- array_type = _TYPE_CHAR_ARROW_ARRAY
41
- buffers = obj.buffers()
42
- sizes = len(obj)
43
- elif isinstance(obj, pa.ChunkedArray):
44
- array_type = _TYPE_CHAR_ARROW_CHUNKED_ARRAY
45
- buffers = [c.buffers() for c in obj.chunks]
46
- sizes = [len(c) for c in obj.chunks]
47
- else: # pragma: no cover
47
+ if not isinstance(obj, (pa.Array, pa.ChunkedArray)):
48
48
  raise NotImplementedError(f"Array type {type(obj)} not supported")
49
- return [array_type, data_type, sizes], buffers, True
49
+
50
+ if obj.type.num_fields == 0:
51
+ # use legacy serialization in case arrow changes deserializer method
52
+ data_type = str(obj.type)
53
+ if isinstance(obj, pa.Array):
54
+ array_type = _TYPE_CHAR_ARROW_ARRAY
55
+ buffers = obj.buffers()
56
+ sizes = len(obj)
57
+ else: # ChunkedArray
58
+ array_type = _TYPE_CHAR_ARROW_CHUNKED_ARRAY
59
+ buffers = [c.buffers() for c in obj.chunks]
60
+ sizes = [len(c) for c in obj.chunks]
61
+ return [array_type, data_type, sizes], buffers, False
62
+
63
+ meth, extracted = obj.__reduce__()
64
+ meth_name = extract_class_name(meth)
65
+ return [_TYPE_CHAR_ARROW_REDUCED, meth_name, None], list(extracted), False
50
66
 
51
67
  def deserial(self, serialized: List, context: Dict, subs: List):
52
68
  array_type, data_type_str, sizes = serialized[:3]
53
- data_type = arrow_type_from_str(data_type_str)
69
+ if array_type == _TYPE_CHAR_ARROW_REDUCED:
70
+ if data_type_str == "pyarrow.lib#chunked_array":
71
+ return pa.chunked_array(*subs)
72
+ elif data_type_str == "pyarrow.lib#_restore_array":
73
+ return pa.lib._restore_array(*subs)
74
+ else:
75
+ raise NotImplementedError(f"Unknown array type: {array_type}")
54
76
  if array_type == _TYPE_CHAR_ARROW_ARRAY:
77
+ data_type = arrow_type_from_str(data_type_str)
55
78
  return pa.Array.from_buffers(data_type, sizes, subs)
56
79
  elif array_type == _TYPE_CHAR_ARROW_CHUNKED_ARRAY:
80
+ data_type = arrow_type_from_str(data_type_str)
57
81
  chunks = [
58
82
  pa.Array.from_buffers(data_type, size, bufs)
59
83
  for size, bufs in zip(sizes, subs)
@@ -89,6 +113,7 @@ class ArrowBatchSerializer(Serializer):
89
113
 
90
114
 
91
115
  if pa is not None: # pragma: no branch
116
+ ArrowDataTypeSerializer.register(pa.DataType)
92
117
  ArrowArraySerializer.register(pa.Array)
93
118
  ArrowArraySerializer.register(pa.ChunkedArray)
94
119
  ArrowBatchSerializer.register(pa.Table)
@@ -0,0 +1,32 @@
1
+ # Copyright 1999-2025 Alibaba Group Holding Ltd.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ from typing import Dict
16
+
17
+ from ..lib.dtypes_extension.blob import AbstractExternalBlob, SolidBlob
18
+ from .core import Serializer
19
+
20
+
21
+ class ExternalBlobSerializer(Serializer):
22
+ def serial(self, obj: AbstractExternalBlob, context: Dict):
23
+ _, vals = obj.__reduce__()
24
+ return [type(obj).__name__], list(vals), False
25
+
26
+ def deserial(self, serialized, context, subs):
27
+ cls_name = serialized[0]
28
+ cls = AbstractExternalBlob.get_cls_by_name(cls_name)
29
+ return cls(*subs)
30
+
31
+
32
+ ExternalBlobSerializer.register(SolidBlob)
Binary file
@@ -52,6 +52,11 @@ try:
52
52
  except (ImportError, AttributeError):
53
53
  pass
54
54
 
55
+ try:
56
+ import pyarrow as pa
57
+ except ImportError:
58
+ pa = None
59
+
55
60
  try:
56
61
  import pytz
57
62
  from pytz import BaseTzInfo as PyTZ_BaseTzInfo
@@ -95,6 +100,8 @@ cdef:
95
100
  int SLICE_SERIALIZER = 13
96
101
  int REGEX_SERIALIZER = 14
97
102
  int NO_DEFAULT_SERIALIZER = 15
103
+ int ARROW_BUFFER_SERIALIZER = 16
104
+ int RANGE_SERIALIZER = 17
98
105
  int PLACEHOLDER_SERIALIZER = 4096
99
106
 
100
107
 
@@ -874,12 +881,28 @@ cdef class SliceSerializer(Serializer):
874
881
  serializer_id = SLICE_SERIALIZER
875
882
 
876
883
  cpdef serial(self, object obj: slice, dict context):
877
- return [obj.start, obj.stop, obj.step], [], True
884
+ cdef list elems = [obj.start, obj.stop, obj.step]
885
+ for x in elems:
886
+ if x is not None and not isinstance(x, int):
887
+ return [], elems, False
888
+ return elems, [], True
878
889
 
879
890
  cpdef deserial(self, list serialized, dict context, list subs):
891
+ if len(serialized) == 0:
892
+ return slice(subs[0], subs[1], subs[2])
880
893
  return slice(*serialized[:3])
881
894
 
882
895
 
896
+ cdef class RangeSerializer(Serializer):
897
+ serializer_id = RANGE_SERIALIZER
898
+
899
+ cpdef serial(self, object obj: range, dict context):
900
+ return [obj.start, obj.stop, obj.step], [], True
901
+
902
+ cpdef deserial(self, list serialized, dict context, list subs):
903
+ return range(*serialized[:3])
904
+
905
+
883
906
  cdef class RegexSerializer(Serializer):
884
907
  serializer_id = REGEX_SERIALIZER
885
908
 
@@ -906,6 +929,18 @@ cdef class NoDefaultSerializer(Serializer):
906
929
  return no_default
907
930
 
908
931
 
932
+ cdef class ArrowBufferSerializer(Serializer):
933
+ serializer_id = ARROW_BUFFER_SERIALIZER
934
+
935
+ cpdef serial(self, object obj, dict context):
936
+ return [], [obj], True
937
+
938
+ cpdef deserial(self, list obj, dict context, list subs):
939
+ if not isinstance(subs[0], pa.Buffer):
940
+ return pa.py_buffer(subs[0])
941
+ return subs[0]
942
+
943
+
909
944
  cdef class Placeholder:
910
945
  """
911
946
  Placeholder object to reduce duplicated serialization
@@ -959,8 +994,11 @@ DtypeSerializer.register(np.dtype)
959
994
  DtypeSerializer.register(ExtensionDtype)
960
995
  ComplexSerializer.register(complex)
961
996
  SliceSerializer.register(slice)
997
+ RangeSerializer.register(range)
962
998
  RegexSerializer.register(re.Pattern)
963
999
  NoDefaultSerializer.register(NoDefault)
1000
+ if pa is not None:
1001
+ ArrowBufferSerializer.register(pa.Buffer)
964
1002
  PlaceholderSerializer.register(Placeholder)
965
1003
 
966
1004
 
@@ -18,6 +18,7 @@ from typing import Dict, List
18
18
 
19
19
  from ..errors import MaxFrameError
20
20
  from ..lib import wrapped_pickle as pickle
21
+ from ..utils import combine_error_message_and_traceback
21
22
  from .core import Serializer, buffered, pickle_buffers, unpickle_buffers
22
23
 
23
24
  logger = logging.getLogger(__name__)
@@ -53,10 +54,7 @@ class RemoteException(MaxFrameError):
53
54
  return unpickle_buffers(self.buffers) if self.buffers else self
54
55
 
55
56
  def __str__(self):
56
- tbs = []
57
- for msg, tb in zip(self.messages, self.tracebacks):
58
- tbs.append("".join([msg + "\n"] + tb))
59
- return "\nCaused by:\n".join(tbs)
57
+ return combine_error_message_and_traceback(self.messages, self.tracebacks)
60
58
 
61
59
 
62
60
  class ExceptionSerializer(Serializer):
@@ -95,5 +95,16 @@ class NDArraySerializer(Serializer):
95
95
  return val
96
96
 
97
97
 
98
+ class RandomStateSerializer(Serializer):
99
+ def serial(self, obj: np.random.RandomState, context: Dict):
100
+ return [], [obj.get_state()], False
101
+
102
+ def deserial(self, serialized, context: Dict, subs: List):
103
+ rs = np.random.RandomState()
104
+ rs.set_state(subs[0])
105
+ return rs
106
+
107
+
98
108
  NDArraySerializer.register(np.generic)
99
109
  NDArraySerializer.register(np.ndarray)
110
+ RandomStateSerializer.register(np.random.RandomState)