maxframe 2.0.0b2__cp37-cp37m-win32.whl → 2.3.0rc1__cp37-cp37m-win32.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of maxframe might be problematic. Click here for more details.

Files changed (443) hide show
  1. maxframe/__init__.py +1 -0
  2. maxframe/_utils.cp37-win32.pyd +0 -0
  3. maxframe/_utils.pyx +14 -1
  4. maxframe/codegen/core.py +9 -8
  5. maxframe/codegen/spe/core.py +1 -1
  6. maxframe/codegen/spe/dataframe/__init__.py +1 -0
  7. maxframe/codegen/spe/dataframe/accessors/base.py +18 -0
  8. maxframe/codegen/spe/dataframe/accessors/dict_.py +25 -130
  9. maxframe/codegen/spe/dataframe/accessors/list_.py +12 -48
  10. maxframe/codegen/spe/dataframe/accessors/struct_.py +28 -0
  11. maxframe/codegen/spe/dataframe/arithmetic.py +7 -2
  12. maxframe/codegen/spe/dataframe/groupby.py +88 -0
  13. maxframe/codegen/spe/dataframe/indexing.py +99 -4
  14. maxframe/codegen/spe/dataframe/merge.py +38 -1
  15. maxframe/codegen/spe/dataframe/misc.py +11 -33
  16. maxframe/codegen/spe/dataframe/reduction.py +32 -9
  17. maxframe/codegen/spe/dataframe/reshape.py +46 -0
  18. maxframe/codegen/spe/dataframe/sort.py +39 -18
  19. maxframe/codegen/spe/dataframe/tests/accessors/test_dict.py +9 -15
  20. maxframe/codegen/spe/dataframe/tests/accessors/test_list.py +4 -7
  21. maxframe/codegen/spe/dataframe/tests/accessors/test_struct.py +75 -0
  22. maxframe/codegen/spe/dataframe/tests/indexing/test_iloc.py +20 -1
  23. maxframe/codegen/spe/dataframe/tests/indexing/test_loc.py +35 -0
  24. maxframe/codegen/spe/dataframe/tests/misc/test_misc.py +0 -32
  25. maxframe/codegen/spe/dataframe/tests/test_groupby.py +81 -18
  26. maxframe/codegen/spe/dataframe/tests/test_merge.py +27 -1
  27. maxframe/codegen/spe/dataframe/tests/test_reduction.py +13 -0
  28. maxframe/codegen/spe/dataframe/tests/test_reshape.py +79 -0
  29. maxframe/codegen/spe/dataframe/tests/test_sort.py +20 -0
  30. maxframe/codegen/spe/dataframe/tseries.py +9 -0
  31. maxframe/codegen/spe/learn/contrib/lightgbm.py +4 -3
  32. maxframe/codegen/spe/learn/contrib/tests/test_xgboost.py +2 -1
  33. maxframe/codegen/spe/learn/metrics/__init__.py +1 -1
  34. maxframe/codegen/spe/learn/metrics/_ranking.py +76 -0
  35. maxframe/codegen/spe/learn/metrics/pairwise.py +51 -0
  36. maxframe/codegen/spe/learn/metrics/tests/test_pairwise.py +36 -0
  37. maxframe/codegen/spe/learn/metrics/tests/test_ranking.py +59 -0
  38. maxframe/codegen/spe/tensor/__init__.py +3 -0
  39. maxframe/codegen/spe/tensor/datasource.py +1 -0
  40. maxframe/codegen/spe/tensor/fft.py +74 -0
  41. maxframe/codegen/spe/tensor/linalg.py +29 -2
  42. maxframe/codegen/spe/tensor/misc.py +79 -25
  43. maxframe/codegen/spe/tensor/spatial.py +45 -0
  44. maxframe/codegen/spe/tensor/statistics.py +44 -0
  45. maxframe/codegen/spe/tensor/tests/test_fft.py +64 -0
  46. maxframe/codegen/spe/tensor/tests/test_linalg.py +15 -1
  47. maxframe/codegen/spe/tensor/tests/test_misc.py +52 -2
  48. maxframe/codegen/spe/tensor/tests/test_spatial.py +33 -0
  49. maxframe/codegen/spe/tensor/tests/test_statistics.py +15 -1
  50. maxframe/codegen/spe/tests/test_spe_codegen.py +6 -12
  51. maxframe/codegen/spe/utils.py +2 -0
  52. maxframe/config/config.py +73 -9
  53. maxframe/config/tests/test_validators.py +13 -1
  54. maxframe/config/validators.py +49 -0
  55. maxframe/conftest.py +54 -17
  56. maxframe/core/accessor.py +2 -2
  57. maxframe/core/base.py +2 -1
  58. maxframe/core/entity/core.py +5 -0
  59. maxframe/core/entity/tileables.py +3 -1
  60. maxframe/core/graph/core.cp37-win32.pyd +0 -0
  61. maxframe/core/graph/entity.py +8 -3
  62. maxframe/core/mode.py +6 -1
  63. maxframe/core/operator/base.py +9 -2
  64. maxframe/core/operator/core.py +10 -2
  65. maxframe/core/operator/utils.py +13 -0
  66. maxframe/dataframe/__init__.py +12 -5
  67. maxframe/dataframe/accessors/__init__.py +1 -1
  68. maxframe/dataframe/accessors/compat.py +45 -0
  69. maxframe/dataframe/accessors/datetime_/__init__.py +4 -1
  70. maxframe/dataframe/accessors/dict_/contains.py +7 -16
  71. maxframe/dataframe/accessors/dict_/core.py +48 -0
  72. maxframe/dataframe/accessors/dict_/getitem.py +17 -21
  73. maxframe/dataframe/accessors/dict_/length.py +7 -16
  74. maxframe/dataframe/accessors/dict_/remove.py +6 -18
  75. maxframe/dataframe/accessors/dict_/setitem.py +8 -18
  76. maxframe/dataframe/accessors/dict_/tests/test_dict_accessor.py +62 -22
  77. maxframe/dataframe/accessors/list_/__init__.py +2 -2
  78. maxframe/dataframe/accessors/list_/core.py +48 -0
  79. maxframe/dataframe/accessors/list_/getitem.py +12 -19
  80. maxframe/dataframe/accessors/list_/length.py +7 -16
  81. maxframe/dataframe/accessors/list_/tests/test_list_accessor.py +11 -9
  82. maxframe/dataframe/accessors/string_/__init__.py +4 -1
  83. maxframe/dataframe/accessors/struct_/__init__.py +37 -0
  84. maxframe/dataframe/accessors/struct_/accessor.py +39 -0
  85. maxframe/dataframe/accessors/struct_/core.py +43 -0
  86. maxframe/dataframe/accessors/struct_/dtypes.py +53 -0
  87. maxframe/dataframe/accessors/struct_/field.py +123 -0
  88. maxframe/dataframe/accessors/struct_/tests/__init__.py +13 -0
  89. maxframe/dataframe/accessors/struct_/tests/test_struct_accessor.py +91 -0
  90. maxframe/dataframe/arithmetic/__init__.py +18 -4
  91. maxframe/dataframe/arithmetic/between.py +106 -0
  92. maxframe/dataframe/arithmetic/dot.py +237 -0
  93. maxframe/dataframe/arithmetic/maximum.py +33 -0
  94. maxframe/dataframe/arithmetic/minimum.py +33 -0
  95. maxframe/dataframe/arithmetic/{around.py → round.py} +11 -7
  96. maxframe/dataframe/core.py +161 -224
  97. maxframe/dataframe/datasource/__init__.py +18 -0
  98. maxframe/dataframe/datasource/core.py +6 -0
  99. maxframe/dataframe/datasource/direct.py +57 -0
  100. maxframe/dataframe/datasource/from_dict.py +124 -0
  101. maxframe/dataframe/datasource/from_index.py +1 -1
  102. maxframe/dataframe/datasource/from_records.py +77 -0
  103. maxframe/dataframe/datasource/from_tensor.py +109 -41
  104. maxframe/dataframe/datasource/read_csv.py +21 -14
  105. maxframe/dataframe/datasource/read_odps_query.py +29 -6
  106. maxframe/dataframe/datasource/read_odps_table.py +32 -10
  107. maxframe/dataframe/datasource/read_parquet.py +38 -39
  108. maxframe/dataframe/datasource/tests/test_datasource.py +37 -0
  109. maxframe/dataframe/datastore/__init__.py +11 -1
  110. maxframe/dataframe/datastore/direct.py +268 -0
  111. maxframe/dataframe/datastore/to_csv.py +29 -41
  112. maxframe/dataframe/datastore/to_odps.py +36 -4
  113. maxframe/dataframe/extensions/__init__.py +20 -4
  114. maxframe/dataframe/extensions/apply_chunk.py +32 -6
  115. maxframe/dataframe/extensions/cartesian_chunk.py +153 -0
  116. maxframe/dataframe/extensions/collect_kv.py +126 -0
  117. maxframe/dataframe/extensions/extract_kv.py +177 -0
  118. maxframe/dataframe/extensions/flatjson.py +2 -1
  119. maxframe/dataframe/extensions/map_reduce.py +263 -0
  120. maxframe/dataframe/extensions/rebalance.py +62 -0
  121. maxframe/dataframe/extensions/tests/test_apply_chunk.py +9 -2
  122. maxframe/dataframe/extensions/tests/test_extensions.py +54 -0
  123. maxframe/dataframe/extensions/tests/test_map_reduce.py +135 -0
  124. maxframe/dataframe/groupby/__init__.py +17 -2
  125. maxframe/dataframe/groupby/aggregation.py +86 -49
  126. maxframe/dataframe/groupby/apply.py +1 -1
  127. maxframe/dataframe/groupby/apply_chunk.py +19 -5
  128. maxframe/dataframe/groupby/core.py +116 -16
  129. maxframe/dataframe/groupby/cum.py +4 -25
  130. maxframe/dataframe/groupby/expanding.py +264 -0
  131. maxframe/dataframe/groupby/fill.py +1 -1
  132. maxframe/dataframe/groupby/getitem.py +12 -5
  133. maxframe/dataframe/groupby/head.py +11 -1
  134. maxframe/dataframe/groupby/rank.py +136 -0
  135. maxframe/dataframe/groupby/rolling.py +206 -0
  136. maxframe/dataframe/groupby/shift.py +114 -0
  137. maxframe/dataframe/groupby/tests/test_groupby.py +0 -5
  138. maxframe/dataframe/indexing/__init__.py +22 -2
  139. maxframe/dataframe/indexing/droplevel.py +195 -0
  140. maxframe/dataframe/indexing/filter.py +169 -0
  141. maxframe/dataframe/indexing/get_level_values.py +76 -0
  142. maxframe/dataframe/indexing/iat.py +45 -0
  143. maxframe/dataframe/indexing/iloc.py +152 -12
  144. maxframe/dataframe/indexing/insert.py +46 -18
  145. maxframe/dataframe/indexing/loc.py +287 -7
  146. maxframe/dataframe/indexing/reindex.py +14 -5
  147. maxframe/dataframe/indexing/rename.py +6 -0
  148. maxframe/dataframe/indexing/rename_axis.py +2 -2
  149. maxframe/dataframe/indexing/reorder_levels.py +143 -0
  150. maxframe/dataframe/indexing/reset_index.py +33 -6
  151. maxframe/dataframe/indexing/sample.py +8 -0
  152. maxframe/dataframe/indexing/setitem.py +3 -3
  153. maxframe/dataframe/indexing/swaplevel.py +185 -0
  154. maxframe/dataframe/indexing/take.py +99 -0
  155. maxframe/dataframe/indexing/truncate.py +140 -0
  156. maxframe/dataframe/indexing/where.py +0 -11
  157. maxframe/dataframe/indexing/xs.py +148 -0
  158. maxframe/dataframe/merge/__init__.py +15 -1
  159. maxframe/dataframe/merge/append.py +97 -98
  160. maxframe/dataframe/merge/combine.py +244 -0
  161. maxframe/dataframe/merge/combine_first.py +120 -0
  162. maxframe/dataframe/merge/compare.py +387 -0
  163. maxframe/dataframe/merge/concat.py +183 -0
  164. maxframe/dataframe/merge/update.py +271 -0
  165. maxframe/dataframe/misc/__init__.py +28 -11
  166. maxframe/dataframe/misc/_duplicate.py +10 -4
  167. maxframe/dataframe/misc/apply.py +1 -1
  168. maxframe/dataframe/misc/check_unique.py +82 -0
  169. maxframe/dataframe/misc/clip.py +145 -0
  170. maxframe/dataframe/misc/describe.py +175 -9
  171. maxframe/dataframe/misc/drop.py +31 -0
  172. maxframe/dataframe/misc/drop_duplicates.py +2 -2
  173. maxframe/dataframe/misc/duplicated.py +2 -2
  174. maxframe/dataframe/misc/get_dummies.py +5 -1
  175. maxframe/dataframe/misc/infer_dtypes.py +251 -0
  176. maxframe/dataframe/misc/isin.py +2 -2
  177. maxframe/dataframe/misc/map.py +125 -18
  178. maxframe/dataframe/misc/repeat.py +159 -0
  179. maxframe/dataframe/misc/tests/test_misc.py +48 -3
  180. maxframe/dataframe/misc/to_numeric.py +3 -0
  181. maxframe/dataframe/misc/transform.py +12 -5
  182. maxframe/dataframe/misc/transpose.py +13 -1
  183. maxframe/dataframe/misc/valid_index.py +115 -0
  184. maxframe/dataframe/misc/value_counts.py +38 -4
  185. maxframe/dataframe/missing/checkna.py +14 -6
  186. maxframe/dataframe/missing/dropna.py +5 -0
  187. maxframe/dataframe/missing/fillna.py +1 -1
  188. maxframe/dataframe/missing/replace.py +7 -4
  189. maxframe/dataframe/reduction/__init__.py +35 -16
  190. maxframe/dataframe/reduction/aggregation.py +43 -14
  191. maxframe/dataframe/reduction/all.py +2 -2
  192. maxframe/dataframe/reduction/any.py +2 -2
  193. maxframe/dataframe/reduction/argmax.py +103 -0
  194. maxframe/dataframe/reduction/argmin.py +103 -0
  195. maxframe/dataframe/reduction/core.py +80 -24
  196. maxframe/dataframe/reduction/count.py +13 -9
  197. maxframe/dataframe/reduction/cov.py +166 -0
  198. maxframe/dataframe/reduction/cummax.py +2 -2
  199. maxframe/dataframe/reduction/cummin.py +2 -2
  200. maxframe/dataframe/reduction/cumprod.py +2 -2
  201. maxframe/dataframe/reduction/cumsum.py +2 -2
  202. maxframe/dataframe/reduction/custom_reduction.py +2 -2
  203. maxframe/dataframe/reduction/idxmax.py +185 -0
  204. maxframe/dataframe/reduction/idxmin.py +185 -0
  205. maxframe/dataframe/reduction/kurtosis.py +37 -30
  206. maxframe/dataframe/reduction/max.py +2 -2
  207. maxframe/dataframe/reduction/mean.py +9 -7
  208. maxframe/dataframe/reduction/median.py +2 -2
  209. maxframe/dataframe/reduction/min.py +2 -2
  210. maxframe/dataframe/reduction/mode.py +144 -0
  211. maxframe/dataframe/reduction/nunique.py +19 -11
  212. maxframe/dataframe/reduction/prod.py +18 -13
  213. maxframe/dataframe/reduction/reduction_size.py +2 -2
  214. maxframe/dataframe/reduction/sem.py +13 -9
  215. maxframe/dataframe/reduction/skew.py +31 -27
  216. maxframe/dataframe/reduction/str_concat.py +10 -7
  217. maxframe/dataframe/reduction/sum.py +18 -14
  218. maxframe/dataframe/reduction/tests/test_reduction.py +12 -0
  219. maxframe/dataframe/reduction/unique.py +20 -3
  220. maxframe/dataframe/reduction/var.py +16 -12
  221. maxframe/dataframe/reshape/__init__.py +38 -0
  222. maxframe/dataframe/{misc → reshape}/pivot.py +1 -0
  223. maxframe/dataframe/{misc → reshape}/pivot_table.py +1 -0
  224. maxframe/dataframe/reshape/unstack.py +114 -0
  225. maxframe/dataframe/sort/__init__.py +16 -1
  226. maxframe/dataframe/sort/argsort.py +68 -0
  227. maxframe/dataframe/sort/core.py +2 -1
  228. maxframe/dataframe/sort/nlargest.py +238 -0
  229. maxframe/dataframe/sort/nsmallest.py +228 -0
  230. maxframe/dataframe/sort/rank.py +147 -0
  231. maxframe/dataframe/statistics/__init__.py +3 -3
  232. maxframe/dataframe/statistics/corr.py +1 -0
  233. maxframe/dataframe/statistics/quantile.py +2 -2
  234. maxframe/dataframe/tests/test_typing.py +104 -0
  235. maxframe/dataframe/tests/test_utils.py +66 -2
  236. maxframe/dataframe/tseries/__init__.py +19 -0
  237. maxframe/dataframe/tseries/at_time.py +61 -0
  238. maxframe/dataframe/tseries/between_time.py +122 -0
  239. maxframe/dataframe/typing_.py +185 -0
  240. maxframe/dataframe/utils.py +125 -52
  241. maxframe/dataframe/window/aggregation.py +8 -4
  242. maxframe/dataframe/window/core.py +14 -1
  243. maxframe/dataframe/window/ewm.py +1 -3
  244. maxframe/dataframe/window/expanding.py +37 -35
  245. maxframe/dataframe/window/rolling.py +49 -39
  246. maxframe/dataframe/window/tests/test_expanding.py +1 -7
  247. maxframe/dataframe/window/tests/test_rolling.py +1 -1
  248. maxframe/env.py +7 -4
  249. maxframe/errors.py +2 -2
  250. maxframe/io/odpsio/schema.py +9 -3
  251. maxframe/io/odpsio/tableio.py +7 -2
  252. maxframe/io/odpsio/tests/test_schema.py +198 -83
  253. maxframe/learn/__init__.py +10 -2
  254. maxframe/learn/cluster/__init__.py +15 -0
  255. maxframe/learn/cluster/_kmeans.py +782 -0
  256. maxframe/learn/contrib/llm/core.py +18 -7
  257. maxframe/learn/contrib/llm/deploy/__init__.py +13 -0
  258. maxframe/learn/contrib/llm/deploy/config.py +221 -0
  259. maxframe/learn/contrib/llm/deploy/core.py +247 -0
  260. maxframe/learn/contrib/llm/deploy/framework.py +35 -0
  261. maxframe/learn/contrib/llm/deploy/loader.py +360 -0
  262. maxframe/learn/contrib/llm/deploy/tests/__init__.py +13 -0
  263. maxframe/learn/contrib/llm/deploy/tests/test_register_models.py +359 -0
  264. maxframe/learn/contrib/llm/models/__init__.py +1 -0
  265. maxframe/learn/contrib/llm/models/dashscope.py +12 -6
  266. maxframe/learn/contrib/llm/models/managed.py +76 -11
  267. maxframe/learn/contrib/llm/models/openai.py +72 -0
  268. maxframe/learn/contrib/llm/tests/__init__.py +13 -0
  269. maxframe/learn/contrib/llm/tests/test_core.py +34 -0
  270. maxframe/learn/contrib/llm/tests/test_openai.py +187 -0
  271. maxframe/learn/contrib/llm/tests/test_text_gen.py +155 -0
  272. maxframe/learn/contrib/llm/text.py +348 -42
  273. maxframe/learn/contrib/models.py +4 -1
  274. maxframe/learn/contrib/xgboost/classifier.py +2 -0
  275. maxframe/learn/contrib/xgboost/core.py +113 -4
  276. maxframe/learn/contrib/xgboost/predict.py +4 -2
  277. maxframe/learn/contrib/xgboost/regressor.py +5 -0
  278. maxframe/learn/contrib/xgboost/train.py +7 -2
  279. maxframe/learn/core.py +66 -0
  280. maxframe/learn/linear_model/_base.py +58 -1
  281. maxframe/learn/linear_model/_lin_reg.py +1 -1
  282. maxframe/learn/metrics/__init__.py +6 -0
  283. maxframe/learn/metrics/_classification.py +145 -0
  284. maxframe/learn/metrics/_ranking.py +477 -0
  285. maxframe/learn/metrics/_scorer.py +60 -0
  286. maxframe/learn/metrics/pairwise/__init__.py +21 -0
  287. maxframe/learn/metrics/pairwise/core.py +77 -0
  288. maxframe/learn/metrics/pairwise/cosine.py +115 -0
  289. maxframe/learn/metrics/pairwise/euclidean.py +176 -0
  290. maxframe/learn/metrics/pairwise/haversine.py +96 -0
  291. maxframe/learn/metrics/pairwise/manhattan.py +80 -0
  292. maxframe/learn/metrics/pairwise/pairwise.py +127 -0
  293. maxframe/learn/metrics/pairwise/pairwise_distances_topk.py +121 -0
  294. maxframe/learn/metrics/pairwise/rbf_kernel.py +51 -0
  295. maxframe/learn/metrics/tests/__init__.py +13 -0
  296. maxframe/learn/metrics/tests/test_scorer.py +26 -0
  297. maxframe/learn/preprocessing/_data/min_max_scaler.py +34 -23
  298. maxframe/learn/preprocessing/_data/standard_scaler.py +34 -25
  299. maxframe/learn/utils/__init__.py +2 -1
  300. maxframe/learn/utils/checks.py +1 -2
  301. maxframe/learn/utils/core.py +59 -0
  302. maxframe/learn/utils/extmath.py +79 -9
  303. maxframe/learn/utils/odpsio.py +262 -0
  304. maxframe/learn/utils/validation.py +2 -2
  305. maxframe/lib/compat.py +40 -0
  306. maxframe/lib/dtypes_extension/__init__.py +16 -1
  307. maxframe/lib/dtypes_extension/_fake_arrow_dtype.py +604 -0
  308. maxframe/lib/dtypes_extension/blob.py +304 -0
  309. maxframe/lib/dtypes_extension/dtypes.py +40 -0
  310. maxframe/lib/dtypes_extension/tests/test_blob.py +88 -0
  311. maxframe/lib/dtypes_extension/tests/test_dtypes.py +16 -1
  312. maxframe/lib/dtypes_extension/tests/test_fake_arrow_dtype.py +75 -0
  313. maxframe/lib/filesystem/_oss_lib/common.py +124 -50
  314. maxframe/lib/filesystem/_oss_lib/glob.py +1 -1
  315. maxframe/lib/filesystem/_oss_lib/handle.py +21 -25
  316. maxframe/lib/filesystem/base.py +1 -1
  317. maxframe/lib/filesystem/core.py +1 -1
  318. maxframe/lib/filesystem/oss.py +115 -46
  319. maxframe/lib/filesystem/tests/test_oss.py +74 -36
  320. maxframe/lib/mmh3.cp37-win32.pyd +0 -0
  321. maxframe/lib/wrapped_pickle.py +10 -0
  322. maxframe/opcodes.py +41 -15
  323. maxframe/protocol.py +12 -0
  324. maxframe/remote/core.py +4 -0
  325. maxframe/serialization/__init__.py +11 -2
  326. maxframe/serialization/arrow.py +38 -13
  327. maxframe/serialization/blob.py +32 -0
  328. maxframe/serialization/core.cp37-win32.pyd +0 -0
  329. maxframe/serialization/core.pyx +39 -1
  330. maxframe/serialization/exception.py +2 -4
  331. maxframe/serialization/numpy.py +11 -0
  332. maxframe/serialization/pandas.py +46 -9
  333. maxframe/serialization/serializables/core.py +2 -2
  334. maxframe/serialization/tests/test_serial.py +31 -4
  335. maxframe/tensor/__init__.py +38 -8
  336. maxframe/tensor/arithmetic/__init__.py +19 -10
  337. maxframe/tensor/arithmetic/core.py +2 -2
  338. maxframe/tensor/arithmetic/iscomplexobj.py +53 -0
  339. maxframe/tensor/arithmetic/tests/test_arithmetic.py +6 -9
  340. maxframe/tensor/core.py +6 -2
  341. maxframe/tensor/datasource/tests/test_datasource.py +2 -1
  342. maxframe/tensor/extensions/__init__.py +2 -0
  343. maxframe/tensor/extensions/apply_chunk.py +3 -3
  344. maxframe/tensor/extensions/rebalance.py +65 -0
  345. maxframe/tensor/fft/__init__.py +32 -0
  346. maxframe/tensor/fft/core.py +168 -0
  347. maxframe/tensor/fft/fft.py +112 -0
  348. maxframe/tensor/fft/fft2.py +118 -0
  349. maxframe/tensor/fft/fftfreq.py +80 -0
  350. maxframe/tensor/fft/fftn.py +123 -0
  351. maxframe/tensor/fft/fftshift.py +79 -0
  352. maxframe/tensor/fft/hfft.py +112 -0
  353. maxframe/tensor/fft/ifft.py +114 -0
  354. maxframe/tensor/fft/ifft2.py +115 -0
  355. maxframe/tensor/fft/ifftn.py +123 -0
  356. maxframe/tensor/fft/ifftshift.py +73 -0
  357. maxframe/tensor/fft/ihfft.py +93 -0
  358. maxframe/tensor/fft/irfft.py +118 -0
  359. maxframe/tensor/fft/irfft2.py +62 -0
  360. maxframe/tensor/fft/irfftn.py +114 -0
  361. maxframe/tensor/fft/rfft.py +116 -0
  362. maxframe/tensor/fft/rfft2.py +63 -0
  363. maxframe/tensor/fft/rfftfreq.py +87 -0
  364. maxframe/tensor/fft/rfftn.py +113 -0
  365. maxframe/tensor/indexing/fill_diagonal.py +1 -7
  366. maxframe/tensor/linalg/__init__.py +7 -0
  367. maxframe/tensor/linalg/_einsumfunc.py +1025 -0
  368. maxframe/tensor/linalg/cholesky.py +117 -0
  369. maxframe/tensor/linalg/einsum.py +339 -0
  370. maxframe/tensor/linalg/lstsq.py +100 -0
  371. maxframe/tensor/linalg/matrix_norm.py +75 -0
  372. maxframe/tensor/linalg/norm.py +249 -0
  373. maxframe/tensor/linalg/solve.py +72 -0
  374. maxframe/tensor/linalg/solve_triangular.py +2 -2
  375. maxframe/tensor/linalg/vector_norm.py +113 -0
  376. maxframe/tensor/misc/__init__.py +24 -1
  377. maxframe/tensor/misc/argwhere.py +72 -0
  378. maxframe/tensor/misc/array_split.py +46 -0
  379. maxframe/tensor/misc/broadcast_arrays.py +57 -0
  380. maxframe/tensor/misc/copyto.py +130 -0
  381. maxframe/tensor/misc/delete.py +104 -0
  382. maxframe/tensor/misc/dsplit.py +68 -0
  383. maxframe/tensor/misc/ediff1d.py +74 -0
  384. maxframe/tensor/misc/expand_dims.py +85 -0
  385. maxframe/tensor/misc/flip.py +90 -0
  386. maxframe/tensor/misc/fliplr.py +64 -0
  387. maxframe/tensor/misc/flipud.py +68 -0
  388. maxframe/tensor/misc/hsplit.py +85 -0
  389. maxframe/tensor/misc/insert.py +139 -0
  390. maxframe/tensor/misc/moveaxis.py +83 -0
  391. maxframe/tensor/misc/result_type.py +88 -0
  392. maxframe/tensor/misc/roll.py +124 -0
  393. maxframe/tensor/misc/rollaxis.py +77 -0
  394. maxframe/tensor/misc/shape.py +89 -0
  395. maxframe/tensor/misc/split.py +190 -0
  396. maxframe/tensor/misc/tile.py +109 -0
  397. maxframe/tensor/misc/vsplit.py +74 -0
  398. maxframe/tensor/reduction/array_equal.py +2 -1
  399. maxframe/tensor/sort/__init__.py +2 -0
  400. maxframe/tensor/sort/argpartition.py +98 -0
  401. maxframe/tensor/sort/partition.py +228 -0
  402. maxframe/tensor/spatial/__init__.py +15 -0
  403. maxframe/tensor/spatial/distance/__init__.py +17 -0
  404. maxframe/tensor/spatial/distance/cdist.py +421 -0
  405. maxframe/tensor/spatial/distance/pdist.py +398 -0
  406. maxframe/tensor/spatial/distance/squareform.py +153 -0
  407. maxframe/tensor/special/__init__.py +159 -21
  408. maxframe/tensor/special/airy.py +55 -0
  409. maxframe/tensor/special/bessel.py +199 -0
  410. maxframe/tensor/special/core.py +65 -4
  411. maxframe/tensor/special/ellip_func_integrals.py +155 -0
  412. maxframe/tensor/special/ellip_harm.py +55 -0
  413. maxframe/tensor/special/err_fresnel.py +223 -0
  414. maxframe/tensor/special/gamma_funcs.py +303 -0
  415. maxframe/tensor/special/hypergeometric_funcs.py +69 -0
  416. maxframe/tensor/special/info_theory.py +189 -0
  417. maxframe/tensor/special/misc.py +21 -0
  418. maxframe/tensor/statistics/__init__.py +6 -0
  419. maxframe/tensor/statistics/corrcoef.py +77 -0
  420. maxframe/tensor/statistics/cov.py +222 -0
  421. maxframe/tensor/statistics/digitize.py +126 -0
  422. maxframe/tensor/statistics/histogram.py +520 -0
  423. maxframe/tensor/statistics/median.py +85 -0
  424. maxframe/tensor/statistics/ptp.py +89 -0
  425. maxframe/tensor/utils.py +3 -3
  426. maxframe/tests/test_udf.py +61 -0
  427. maxframe/tests/test_utils.py +51 -6
  428. maxframe/tests/utils.py +0 -2
  429. maxframe/typing_.py +2 -0
  430. maxframe/udf.py +130 -9
  431. maxframe/utils.py +254 -27
  432. {maxframe-2.0.0b2.dist-info → maxframe-2.3.0rc1.dist-info}/METADATA +3 -3
  433. {maxframe-2.0.0b2.dist-info → maxframe-2.3.0rc1.dist-info}/RECORD +442 -264
  434. maxframe_client/fetcher.py +35 -4
  435. maxframe_client/session/odps.py +7 -2
  436. maxframe_client/session/task.py +8 -1
  437. maxframe_client/tests/test_fetcher.py +76 -3
  438. maxframe_client/tests/test_session.py +28 -1
  439. maxframe/dataframe/arrays.py +0 -864
  440. /maxframe/dataframe/{misc → reshape}/melt.py +0 -0
  441. /maxframe/dataframe/{misc → reshape}/stack.py +0 -0
  442. {maxframe-2.0.0b2.dist-info → maxframe-2.3.0rc1.dist-info}/WHEEL +0 -0
  443. {maxframe-2.0.0b2.dist-info → maxframe-2.3.0rc1.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,124 @@
1
+ # Copyright 1999-2025 Alibaba Group Holding Ltd.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ from ...core import ENTITY_TYPE
16
+ from ...utils import find_objects, no_default
17
+ from ..utils import validate_axis
18
+
19
+
20
+ def dataframe_from_dict(data, orient="columns", dtype=None, columns=None):
21
+ """
22
+ Construct DataFrame from dict of array-like or dicts.
23
+
24
+ Creates DataFrame object from dictionary by columns or by index
25
+ allowing dtype specification.
26
+
27
+ Parameters
28
+ ----------
29
+ data : dict
30
+ Of the form {field : array-like} or {field : dict}.
31
+ orient : {'columns', 'index', 'tight'}, default 'columns'
32
+ The "orientation" of the data. If the keys of the passed dict
33
+ should be the columns of the resulting DataFrame, pass 'columns'
34
+ (default). Otherwise if the keys should be rows, pass 'index'.
35
+ If 'tight', assume a dict with keys ['index', 'columns', 'data',
36
+ 'index_names', 'column_names'].
37
+
38
+ dtype : dtype, default None
39
+ Data type to force after DataFrame construction, otherwise infer.
40
+ columns : list, default None
41
+ Column labels to use when ``orient='index'``. Raises a ValueError
42
+ if used with ``orient='columns'`` or ``orient='tight'``.
43
+
44
+ Returns
45
+ -------
46
+ DataFrame
47
+
48
+ See Also
49
+ --------
50
+ DataFrame.from_records : DataFrame from structured ndarray, sequence
51
+ of tuples or dicts, or DataFrame.
52
+ DataFrame : DataFrame object creation using constructor.
53
+ DataFrame.to_dict : Convert the DataFrame to a dictionary.
54
+
55
+ Examples
56
+ --------
57
+ By default the keys of the dict become the DataFrame columns:
58
+
59
+ >>> import maxframe.dataframe as md
60
+ >>> data = {'col_1': [3, 2, 1, 0], 'col_2': ['a', 'b', 'c', 'd']}
61
+ >>> md.DataFrame.from_dict(data).execute()
62
+ col_1 col_2
63
+ 0 3 a
64
+ 1 2 b
65
+ 2 1 c
66
+ 3 0 d
67
+
68
+ Specify ``orient='index'`` to create the DataFrame using dictionary
69
+ keys as rows:
70
+
71
+ >>> data = {'row_1': [3, 2, 1, 0], 'row_2': ['a', 'b', 'c', 'd']}
72
+ >>> md.DataFrame.from_dict(data, orient='index').execute()
73
+ 0 1 2 3
74
+ row_1 3 2 1 0
75
+ row_2 a b c d
76
+
77
+ When using the 'index' orientation, the column names can be
78
+ specified manually:
79
+
80
+ >>> md.DataFrame.from_dict(data, orient='index',
81
+ ... columns=['A', 'B', 'C', 'D']).execute()
82
+ A B C D
83
+ row_1 3 2 1 0
84
+ row_2 a b c d
85
+
86
+ Specify ``orient='tight'`` to create the DataFrame using a 'tight'
87
+ format:
88
+
89
+ >>> data = {'index': [('a', 'b'), ('a', 'c')],
90
+ ... 'columns': [('x', 1), ('y', 2)],
91
+ ... 'data': [[1, 3], [2, 4]],
92
+ ... 'index_names': ['n1', 'n2'],
93
+ ... 'column_names': ['z1', 'z2']}
94
+ >>> md.DataFrame.from_dict(data, orient='tight').execute()
95
+ z1 x y
96
+ z2 1 2
97
+ n1 n2
98
+ a b 1 3
99
+ c 2 4
100
+ """
101
+ from ..initializer import DataFrame as DataFrameInit
102
+ from .from_tensor import dataframe_from_1d_tileables
103
+
104
+ if orient != "tight" and not find_objects(data, ENTITY_TYPE):
105
+ res = DataFrameInit(data)
106
+ elif orient == "tight":
107
+ # init directly
108
+ init_kw = {
109
+ "index": data.get("index"),
110
+ "columns": data.get("columns"),
111
+ }
112
+ df = DataFrameInit(data["data"], **init_kw)
113
+ rename_kw = {
114
+ "index": data.get("index_names", no_default),
115
+ "columns": data.get("column_names", no_default),
116
+ }
117
+ res = df.rename_axis(**rename_kw)
118
+ else:
119
+ axis = validate_axis(orient)
120
+ res = dataframe_from_1d_tileables(data, columns=columns, axis=axis)
121
+
122
+ if dtype is not None:
123
+ res = res.astype(dtype)
124
+ return res
@@ -51,7 +51,7 @@ class SeriesFromIndex(DataFrameOperator, DataFrameOperatorMixin):
51
51
 
52
52
 
53
53
  def series_from_index(ind, index=None, name=None):
54
- name = name or ind.name or 0
54
+ name = name or ind.name
55
55
  if index is not None:
56
56
  index = Index(index)
57
57
  op = SeriesFromIndex(input_=ind, index=index, name=name)
@@ -77,6 +77,83 @@ def from_records(
77
77
  sparse=False,
78
78
  **kw
79
79
  ):
80
+ """
81
+ Convert structured or record ndarray to DataFrame.
82
+
83
+ Creates a DataFrame object from a structured ndarray, sequence of
84
+ tuples or dicts, or DataFrame.
85
+
86
+ Parameters
87
+ ----------
88
+ data : structured ndarray, sequence of tuples or dicts, or DataFrame
89
+ Structured input data.
90
+
91
+ .. deprecated:: 2.1.0
92
+ Passing a DataFrame is deprecated.
93
+ index : str, list of fields, array-like
94
+ Field of array to use as the index, alternately a specific set of
95
+ input labels to use.
96
+ exclude : sequence, default None
97
+ Columns or fields to exclude.
98
+ columns : sequence, default None
99
+ Column names to use. If the passed data do not have names
100
+ associated with them, this argument provides names for the
101
+ columns. Otherwise this argument indicates the order of the columns
102
+ in the result (any names not found in the data will become all-NA
103
+ columns).
104
+ coerce_float : bool, default False
105
+ Attempt to convert values of non-string, non-numeric objects (like
106
+ decimal.Decimal) to floating point, useful for SQL result sets.
107
+ nrows : int, default None
108
+ Number of rows to read if data is an iterator.
109
+
110
+ Returns
111
+ -------
112
+ DataFrame
113
+
114
+ See Also
115
+ --------
116
+ DataFrame.from_dict : DataFrame from dict of array-like or dicts.
117
+ DataFrame : DataFrame object creation using constructor.
118
+
119
+ Examples
120
+ --------
121
+ Data can be provided as a structured ndarray:
122
+
123
+ >>> import maxframe.tensor as mt
124
+ >>> import maxframe.dataframe as md
125
+ >>> data = mt.array([(3, 'a'), (2, 'b'), (1, 'c'), (0, 'd')],
126
+ ... dtype=[('col_1', 'i4'), ('col_2', 'U1')])
127
+ >>> md.DataFrame.from_records(data).execute()
128
+ col_1 col_2
129
+ 0 3 a
130
+ 1 2 b
131
+ 2 1 c
132
+ 3 0 d
133
+
134
+ Data can be provided as a list of dicts:
135
+
136
+ >>> data = [{'col_1': 3, 'col_2': 'a'},
137
+ ... {'col_1': 2, 'col_2': 'b'},
138
+ ... {'col_1': 1, 'col_2': 'c'},
139
+ ... {'col_1': 0, 'col_2': 'd'}]
140
+ >>> md.DataFrame.from_records(data).execute()
141
+ col_1 col_2
142
+ 0 3 a
143
+ 1 2 b
144
+ 2 1 c
145
+ 3 0 d
146
+
147
+ Data can be provided as a list of tuples with corresponding columns:
148
+
149
+ >>> data = [(3, 'a'), (2, 'b'), (1, 'c'), (0, 'd')]
150
+ >>> md.DataFrame.from_records(data, columns=['col_1', 'col_2']).execute()
151
+ col_1 col_2
152
+ 0 3 a
153
+ 1 2 b
154
+ 2 1 c
155
+ 3 0 d
156
+ """
80
157
  if isinstance(data, np.ndarray):
81
158
  from .dataframe import from_pandas
82
159
 
@@ -39,6 +39,7 @@ class DataFrameFromTensor(DataFrameOperator, DataFrameOperatorMixin):
39
39
  input = AnyField("input")
40
40
  index = AnyField("index")
41
41
  columns = AnyField("columns")
42
+ axis = AnyField("axis")
42
43
 
43
44
  def __init__(self, *args, **kwargs):
44
45
  kwargs["_output_types"] = [OutputType.dataframe]
@@ -120,46 +121,82 @@ class DataFrameFromTensor(DataFrameOperator, DataFrameOperatorMixin):
120
121
  if isinstance(tileable, ENTITY_TYPE):
121
122
  tileables.append(tileable)
122
123
 
123
- if index is not None:
124
- tileable_size = tileables[0].shape[0]
125
- if hasattr(index, "shape"):
126
- index_size = index.shape[0]
124
+ if self.axis == 0:
125
+ if index is not None:
126
+ raise NotImplementedError("Cannot accept index when axis=0")
127
127
  else:
128
- index_size = len(index)
129
- if (
130
- not pd.isna(tileable_size)
131
- and not pd.isna(index_size)
132
- and tileable_size != index_size
133
- ):
134
- raise ValueError(
135
- f"index {index} should have the same shape "
136
- f"with tensor: {tileable_size}"
137
- )
138
- index_value = self._process_index(index, tileables)
128
+ index = pd.Index(list(input_1d_tileables.keys()))
129
+ index_value = parse_index(index, store_data=True)
130
+ self.index = index
131
+
132
+ if columns is not None:
133
+ tileable_size = tileables[0].shape[0] if tileables else 0
134
+ if not isinstance(columns, pd.Index):
135
+ columns = self.columns = pd.Index(columns)
136
+ column_size = columns.shape[0]
137
+ if (
138
+ not pd.isna(tileable_size)
139
+ and not pd.isna(column_size)
140
+ and tileable_size != column_size
141
+ ):
142
+ raise ValueError(
143
+ f"columns {columns} should have the same shape "
144
+ f"with tensor: {tileable_size}"
145
+ )
146
+ columns_value = self._process_index(columns, tileables)
147
+ else:
148
+ if not tileables or np.isnan(tileables[0].shape[0]):
149
+ columns = columns_value = None
150
+ else:
151
+ columns = pd.RangeIndex(0, tileables[0].shape[0])
152
+ columns_value = parse_index(columns, store_data=True)
153
+ self.columns = columns
154
+
155
+ shape = (len(input_1d_tileables), shape[0] if shape else 0)
139
156
  else:
140
- if np.isnan(tileables[0].shape[0]):
141
- index = pd.RangeIndex(0)
157
+ if index is not None:
158
+ tileable_size = tileables[0].shape[0] if tileables else 0
159
+ if hasattr(index, "shape"):
160
+ index_size = index.shape[0]
161
+ else:
162
+ index_size = len(index)
163
+ if (
164
+ not pd.isna(tileable_size)
165
+ and not pd.isna(index_size)
166
+ and tileable_size != index_size
167
+ ):
168
+ raise ValueError(
169
+ f"index {index} should have the same shape "
170
+ f"with tensor: {tileable_size}"
171
+ )
172
+ index_value = self._process_index(index, tileables)
142
173
  else:
143
- index = pd.RangeIndex(0, tileables[0].shape[0])
144
- self.index = index
145
- index_value = parse_index(index)
174
+ if not tileables or np.isnan(tileables[0].shape[0]):
175
+ index = pd.RangeIndex(0)
176
+ else:
177
+ index = pd.RangeIndex(0, tileables[0].shape[0])
178
+ self.index = index
179
+ index_value = parse_index(index)
146
180
 
147
- if columns is not None:
148
- if len(input_1d_tileables) != len(columns):
149
- raise ValueError(
150
- f"columns {columns} should have size {len(input_1d_tileables)}"
181
+ if columns is not None:
182
+ if len(input_1d_tileables) != len(columns):
183
+ raise ValueError(
184
+ f"columns {columns} should have size {len(input_1d_tileables)}"
185
+ )
186
+ if not isinstance(columns, pd.Index):
187
+ if isinstance(columns, ENTITY_TYPE):
188
+ raise NotImplementedError(
189
+ "The columns value cannot be a tileable"
190
+ )
191
+ columns = pd.Index(columns)
192
+ columns_value = parse_index(columns, store_data=True)
193
+ else:
194
+ columns_value = parse_index(
195
+ pd.RangeIndex(0, len(input_1d_tileables)), store_data=True
151
196
  )
152
- if not isinstance(columns, pd.Index):
153
- if isinstance(columns, ENTITY_TYPE):
154
- raise NotImplementedError("The columns value cannot be a tileable")
155
- columns = pd.Index(columns)
156
- columns_value = parse_index(columns, store_data=True)
157
- else:
158
- columns_value = parse_index(
159
- pd.RangeIndex(0, len(input_1d_tileables)), store_data=True
160
- )
161
197
 
162
- shape = (shape[0], len(input_1d_tileables))
198
+ shape = (shape[0] if shape else 0, len(input_1d_tileables))
199
+
163
200
  return self.new_dataframe(
164
201
  tileables,
165
202
  shape,
@@ -278,6 +315,9 @@ def dataframe_from_tensor(
278
315
  gpu: bool = None,
279
316
  sparse: bool = False,
280
317
  ):
318
+ if isinstance(columns, list) and columns and isinstance(columns[0], tuple):
319
+ columns = pd.MultiIndex.from_tuples(columns)
320
+
281
321
  if tensor is not None:
282
322
  if tensor.ndim > 2 or tensor.ndim <= 0:
283
323
  raise TypeError(
@@ -299,6 +339,8 @@ def dataframe_from_tensor(
299
339
  dtypes = pd.Series([], index=pd.Index([], dtype=object))
300
340
  if index is not None and not isinstance(index, ENTITY_TYPE):
301
341
  index = pd.Index(index)
342
+ if isinstance(index[0], tuple):
343
+ index = pd.MultiIndex.from_tuples(index)
302
344
  op = DataFrameFromTensor(
303
345
  input=tensor, index=index, columns=columns, gpu=gpu, sparse=sparse
304
346
  )
@@ -311,7 +353,10 @@ def dataframe_from_1d_tileables(
311
353
  columns: Union[pd.Index, list] = None,
312
354
  gpu: bool = None,
313
355
  sparse: bool = False,
356
+ axis: int = 1,
314
357
  ):
358
+ from pandas.core.dtypes.cast import find_common_type
359
+
315
360
  data = dict()
316
361
  for k, v in d.items():
317
362
  if isinstance(v, (list, tuple)) and any(
@@ -322,9 +367,9 @@ def dataframe_from_1d_tileables(
322
367
  data[k] = v
323
368
  d = data
324
369
  if columns is not None:
325
- tileables = [d.get(c) for c in columns]
370
+ tileables = [d.get(c) for c in columns] if axis == 1 else list(d.values())
326
371
  else:
327
- columns = list(d.keys())
372
+ columns = list(d.keys()) if axis == 1 else None
328
373
  tileables = list(d.values())
329
374
 
330
375
  gpu = (
@@ -332,14 +377,37 @@ def dataframe_from_1d_tileables(
332
377
  if gpu is None
333
378
  else gpu
334
379
  )
335
- dtypes = pd.Series(
336
- [t.dtype if hasattr(t, "dtype") else pd.Series(t).dtype for t in tileables],
337
- index=columns,
338
- )
380
+
381
+ if axis == 0:
382
+ col_num = (
383
+ tileables[0].shape[0]
384
+ if hasattr(tileables[0], "shape")
385
+ else len(tileables[0])
386
+ )
387
+ if pd.isna(col_num):
388
+ dtypes = None
389
+ else:
390
+ common_dtype = find_common_type(
391
+ [
392
+ t.dtype if hasattr(t, "dtype") else pd.Series(t).dtype
393
+ for t in tileables
394
+ ]
395
+ )
396
+ dtypes = pd.Series(
397
+ [common_dtype] * col_num,
398
+ index=columns if columns is not None else pd.RangeIndex(col_num),
399
+ )
400
+ else:
401
+ dtypes = pd.Series(
402
+ [t.dtype if hasattr(t, "dtype") else pd.Series(t).dtype for t in tileables],
403
+ index=columns,
404
+ )
405
+
339
406
  if index is not None and not isinstance(index, ENTITY_TYPE):
340
407
  index = pd.Index(index)
408
+
341
409
  op = DataFrameFromTensor(
342
- input=d, index=index, columns=columns, gpu=gpu, sparse=sparse
410
+ input=d, index=index, columns=columns, gpu=gpu, sparse=sparse, axis=axis
343
411
  )
344
412
  return op(d, index, columns, dtypes)
345
413
 
@@ -38,8 +38,12 @@ from ...serialization.serializables import (
38
38
  StringField,
39
39
  )
40
40
  from ...utils import lazy_import, parse_readable_size
41
- from ..utils import parse_index, to_arrow_dtypes
42
- from .core import ColumnPruneSupportedDataSourceMixin, IncrementalIndexDatasource
41
+ from ..utils import parse_index, to_arrow_dtypes, validate_dtype_backend
42
+ from .core import (
43
+ ColumnPruneSupportedDataSourceMixin,
44
+ DtypeBackendCompatibleMixin,
45
+ IncrementalIndexDatasource,
46
+ )
43
47
 
44
48
  cudf = lazy_import("cudf")
45
49
 
@@ -88,6 +92,7 @@ def _find_chunk_start_end(f, offset, size):
88
92
  class DataFrameReadCSV(
89
93
  IncrementalIndexDatasource,
90
94
  ColumnPruneSupportedDataSourceMixin,
95
+ DtypeBackendCompatibleMixin,
91
96
  ):
92
97
  _op_type_ = opcodes.READ_CSV
93
98
 
@@ -101,7 +106,7 @@ class DataFrameReadCSV(
101
106
  offset = Int64Field("offset")
102
107
  size = Int64Field("size")
103
108
  incremental_index = BoolField("incremental_index")
104
- use_arrow_dtype = BoolField("use_arrow_dtype")
109
+ dtype_backend = StringField("dtype_backend", default=None)
105
110
  keep_usecols_order = BoolField("keep_usecols_order", default=None)
106
111
  storage_options = DictField("storage_options")
107
112
  merge_small_files = BoolField("merge_small_files")
@@ -151,7 +156,7 @@ def read_csv(
151
156
  head_bytes="100k",
152
157
  head_lines=None,
153
158
  incremental_index: bool = True,
154
- use_arrow_dtype: bool = None,
159
+ dtype_backend: str = None,
155
160
  storage_options: dict = None,
156
161
  memory_scale: int = None,
157
162
  merge_small_files: bool = True,
@@ -419,8 +424,8 @@ def read_csv(
419
424
  incremental_index: bool, default True
420
425
  If index_col not specified, ensure range index incremental,
421
426
  gain a slightly better performance if setting False.
422
- use_arrow_dtype: bool, default None
423
- If True, use arrow dtype to store columns.
427
+ dtype_backend: {'numpy', 'pyarrow'}, default 'numpy'
428
+ Back-end data type applied to the resultant DataFrame (still experimental).
424
429
  storage_options: dict, optional
425
430
  Options for storage connection.
426
431
  merge_small_files: bool, default True
@@ -441,13 +446,12 @@ def read_csv(
441
446
  Examples
442
447
  --------
443
448
  >>> import maxframe.dataframe as md
444
- >>> from maxframe.lib.filesystem.oss import build_oss_path
445
449
  >>> md.read_csv('data.csv') # doctest: +SKIP
446
450
  >>> # read from HDFS
447
451
  >>> md.read_csv('hdfs://localhost:8020/test.csv') # doctest: +SKIP
448
452
  >>> # read from OSS
449
- >>> auth_path = build_oss_path(file_path, access_key_id, access_key_secret, end_point)
450
- >>> md.read_csv(auth_path)
453
+ >>> md.read_csv('oss://oss-cn-hangzhou-internal.aliyuncs.com/bucket/test.csv',
454
+ >>> storage_options={'role_arn': 'acs:ram::xxxxxx:role/aliyunodpsdefaultrole'})
451
455
  """
452
456
  # infer dtypes and columns
453
457
  if isinstance(path, (list, tuple)):
@@ -510,7 +514,7 @@ def read_csv(
510
514
  compression=compression,
511
515
  gpu=gpu,
512
516
  incremental_index=incremental_index,
513
- use_arrow_dtype=use_arrow_dtype,
517
+ dtype_backend=dtype_backend,
514
518
  storage_options=storage_options,
515
519
  memory_scale=memory_scale,
516
520
  merge_small_files=merge_small_files,
@@ -519,10 +523,13 @@ def read_csv(
519
523
  )
520
524
  chunk_bytes = chunk_bytes or options.chunk_store_limit
521
525
  dtypes = mini_df.dtypes
522
- if use_arrow_dtype is None:
523
- use_arrow_dtype = options.dataframe.use_arrow_dtype
524
- if not gpu and use_arrow_dtype:
525
- dtypes = to_arrow_dtypes(dtypes, test_df=mini_df)
526
+
527
+ dtype_backend = validate_dtype_backend(
528
+ dtype_backend or options.dataframe.dtype_backend
529
+ )
530
+
531
+ if not gpu and dtype_backend == "pyarrow":
532
+ dtypes = to_arrow_dtypes(dtypes)
526
533
  ret = op(
527
534
  index_value=index_value,
528
535
  columns_value=columns_value,
@@ -29,7 +29,7 @@ from odps.types import Column, OdpsSchema, validate_data_type
29
29
  from odps.utils import split_sql_by_semicolon
30
30
 
31
31
  from ... import opcodes
32
- from ...config import options
32
+ from ...config import option_context, options
33
33
  from ...core import OutputType
34
34
  from ...core.graph import DAG
35
35
  from ...io.odpsio import odps_schema_to_pandas_dtypes
@@ -44,8 +44,12 @@ from ...serialization.serializables import (
44
44
  StringField,
45
45
  )
46
46
  from ...utils import is_empty
47
- from ..utils import parse_index
48
- from .core import ColumnPruneSupportedDataSourceMixin, IncrementalIndexDatasource
47
+ from ..utils import parse_index, validate_dtype_backend
48
+ from .core import (
49
+ ColumnPruneSupportedDataSourceMixin,
50
+ DtypeBackendCompatibleMixin,
51
+ IncrementalIndexDatasource,
52
+ )
49
53
 
50
54
  logger = logging.getLogger(__name__)
51
55
 
@@ -266,6 +270,7 @@ def _build_explain_sql(
266
270
  class DataFrameReadODPSQuery(
267
271
  IncrementalIndexDatasource,
268
272
  ColumnPruneSupportedDataSourceMixin,
273
+ DtypeBackendCompatibleMixin,
269
274
  ):
270
275
  _op_type_ = opcodes.READ_ODPS_QUERY
271
276
 
@@ -273,12 +278,16 @@ class DataFrameReadODPSQuery(
273
278
  dtypes = SeriesField("dtypes", default=None)
274
279
  columns = AnyField("columns", default=None)
275
280
  nrows = Int64Field("nrows", default=None)
276
- use_arrow_dtype = BoolField("use_arrow_dtype", default=None)
281
+ dtype_backend = StringField("dtype_backend", default=None)
277
282
  string_as_binary = BoolField("string_as_binary", default=None)
278
283
  index_columns = ListField("index_columns", FieldTypes.string, default=None)
279
284
  index_dtypes = SeriesField("index_dtypes", default=None)
280
285
  column_renames = DictField("column_renames", default=None)
281
286
 
287
+ def __init__(self, dtype_backend=None, **kw):
288
+ dtype_backend = validate_dtype_backend(dtype_backend)
289
+ super().__init__(dtype_backend=dtype_backend, **kw)
290
+
282
291
  def get_columns(self):
283
292
  return self.columns or list(self.dtypes.index)
284
293
 
@@ -404,6 +413,7 @@ def read_odps_query(
404
413
  sql_hints: Dict[str, str] = None,
405
414
  anonymous_col_prefix: str = _DEFAULT_ANONYMOUS_COL_PREFIX,
406
415
  skip_schema: bool = False,
416
+ dtype_backend: str = None,
407
417
  **kw,
408
418
  ):
409
419
  """
@@ -428,6 +438,8 @@ def read_odps_query(
428
438
  Skip resolving output schema before execution. Once this is configured,
429
439
  the output DataFrame cannot be inputs of other DataFrame operators
430
440
  before execution.
441
+ dtype_backend: {'numpy', 'pyarrow'}, default 'numpy'
442
+ Back-end data type applied to the resultant DataFrame (still experimental).
431
443
 
432
444
  Returns
433
445
  -------
@@ -459,6 +471,14 @@ def read_odps_query(
459
471
  if odps_entry is None:
460
472
  raise ValueError("Missing odps_entry parameter")
461
473
 
474
+ if "use_arrow_dtype" in kw:
475
+ dtype_backend = dtype_backend or validate_dtype_backend(
476
+ kw.pop("use_arrow_dtype")
477
+ )
478
+ dtype_backend = validate_dtype_backend(
479
+ dtype_backend or options.dataframe.dtype_backend
480
+ )
481
+
462
482
  col_renames = {}
463
483
  if not skip_schema:
464
484
  odps_schema = _resolve_query_schema(
@@ -479,7 +499,9 @@ def read_odps_query(
479
499
  else:
480
500
  new_columns.append(col)
481
501
 
482
- dtypes = odps_schema_to_pandas_dtypes(OdpsSchema(new_columns))
502
+ with option_context():
503
+ options.dataframe.dtype_backend = dtype_backend
504
+ dtypes = odps_schema_to_pandas_dtypes(OdpsSchema(new_columns))
483
505
  else:
484
506
  dtypes = None
485
507
 
@@ -500,10 +522,11 @@ def read_odps_query(
500
522
 
501
523
  chunk_bytes = kw.pop("chunk_bytes", None)
502
524
  chunk_size = kw.pop("chunk_size", None)
525
+
503
526
  op = DataFrameReadODPSQuery(
504
527
  query=query,
505
528
  dtypes=dtypes,
506
- use_arrow_dtype=kw.pop("use_arrow_dtype", True),
529
+ dtype_backend=dtype_backend,
507
530
  string_as_binary=string_as_binary,
508
531
  index_columns=index_col,
509
532
  index_dtypes=index_dtypes,