maxframe 2.0.0b2__cp37-cp37m-win32.whl → 2.3.0rc1__cp37-cp37m-win32.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of maxframe might be problematic. Click here for more details.

Files changed (443) hide show
  1. maxframe/__init__.py +1 -0
  2. maxframe/_utils.cp37-win32.pyd +0 -0
  3. maxframe/_utils.pyx +14 -1
  4. maxframe/codegen/core.py +9 -8
  5. maxframe/codegen/spe/core.py +1 -1
  6. maxframe/codegen/spe/dataframe/__init__.py +1 -0
  7. maxframe/codegen/spe/dataframe/accessors/base.py +18 -0
  8. maxframe/codegen/spe/dataframe/accessors/dict_.py +25 -130
  9. maxframe/codegen/spe/dataframe/accessors/list_.py +12 -48
  10. maxframe/codegen/spe/dataframe/accessors/struct_.py +28 -0
  11. maxframe/codegen/spe/dataframe/arithmetic.py +7 -2
  12. maxframe/codegen/spe/dataframe/groupby.py +88 -0
  13. maxframe/codegen/spe/dataframe/indexing.py +99 -4
  14. maxframe/codegen/spe/dataframe/merge.py +38 -1
  15. maxframe/codegen/spe/dataframe/misc.py +11 -33
  16. maxframe/codegen/spe/dataframe/reduction.py +32 -9
  17. maxframe/codegen/spe/dataframe/reshape.py +46 -0
  18. maxframe/codegen/spe/dataframe/sort.py +39 -18
  19. maxframe/codegen/spe/dataframe/tests/accessors/test_dict.py +9 -15
  20. maxframe/codegen/spe/dataframe/tests/accessors/test_list.py +4 -7
  21. maxframe/codegen/spe/dataframe/tests/accessors/test_struct.py +75 -0
  22. maxframe/codegen/spe/dataframe/tests/indexing/test_iloc.py +20 -1
  23. maxframe/codegen/spe/dataframe/tests/indexing/test_loc.py +35 -0
  24. maxframe/codegen/spe/dataframe/tests/misc/test_misc.py +0 -32
  25. maxframe/codegen/spe/dataframe/tests/test_groupby.py +81 -18
  26. maxframe/codegen/spe/dataframe/tests/test_merge.py +27 -1
  27. maxframe/codegen/spe/dataframe/tests/test_reduction.py +13 -0
  28. maxframe/codegen/spe/dataframe/tests/test_reshape.py +79 -0
  29. maxframe/codegen/spe/dataframe/tests/test_sort.py +20 -0
  30. maxframe/codegen/spe/dataframe/tseries.py +9 -0
  31. maxframe/codegen/spe/learn/contrib/lightgbm.py +4 -3
  32. maxframe/codegen/spe/learn/contrib/tests/test_xgboost.py +2 -1
  33. maxframe/codegen/spe/learn/metrics/__init__.py +1 -1
  34. maxframe/codegen/spe/learn/metrics/_ranking.py +76 -0
  35. maxframe/codegen/spe/learn/metrics/pairwise.py +51 -0
  36. maxframe/codegen/spe/learn/metrics/tests/test_pairwise.py +36 -0
  37. maxframe/codegen/spe/learn/metrics/tests/test_ranking.py +59 -0
  38. maxframe/codegen/spe/tensor/__init__.py +3 -0
  39. maxframe/codegen/spe/tensor/datasource.py +1 -0
  40. maxframe/codegen/spe/tensor/fft.py +74 -0
  41. maxframe/codegen/spe/tensor/linalg.py +29 -2
  42. maxframe/codegen/spe/tensor/misc.py +79 -25
  43. maxframe/codegen/spe/tensor/spatial.py +45 -0
  44. maxframe/codegen/spe/tensor/statistics.py +44 -0
  45. maxframe/codegen/spe/tensor/tests/test_fft.py +64 -0
  46. maxframe/codegen/spe/tensor/tests/test_linalg.py +15 -1
  47. maxframe/codegen/spe/tensor/tests/test_misc.py +52 -2
  48. maxframe/codegen/spe/tensor/tests/test_spatial.py +33 -0
  49. maxframe/codegen/spe/tensor/tests/test_statistics.py +15 -1
  50. maxframe/codegen/spe/tests/test_spe_codegen.py +6 -12
  51. maxframe/codegen/spe/utils.py +2 -0
  52. maxframe/config/config.py +73 -9
  53. maxframe/config/tests/test_validators.py +13 -1
  54. maxframe/config/validators.py +49 -0
  55. maxframe/conftest.py +54 -17
  56. maxframe/core/accessor.py +2 -2
  57. maxframe/core/base.py +2 -1
  58. maxframe/core/entity/core.py +5 -0
  59. maxframe/core/entity/tileables.py +3 -1
  60. maxframe/core/graph/core.cp37-win32.pyd +0 -0
  61. maxframe/core/graph/entity.py +8 -3
  62. maxframe/core/mode.py +6 -1
  63. maxframe/core/operator/base.py +9 -2
  64. maxframe/core/operator/core.py +10 -2
  65. maxframe/core/operator/utils.py +13 -0
  66. maxframe/dataframe/__init__.py +12 -5
  67. maxframe/dataframe/accessors/__init__.py +1 -1
  68. maxframe/dataframe/accessors/compat.py +45 -0
  69. maxframe/dataframe/accessors/datetime_/__init__.py +4 -1
  70. maxframe/dataframe/accessors/dict_/contains.py +7 -16
  71. maxframe/dataframe/accessors/dict_/core.py +48 -0
  72. maxframe/dataframe/accessors/dict_/getitem.py +17 -21
  73. maxframe/dataframe/accessors/dict_/length.py +7 -16
  74. maxframe/dataframe/accessors/dict_/remove.py +6 -18
  75. maxframe/dataframe/accessors/dict_/setitem.py +8 -18
  76. maxframe/dataframe/accessors/dict_/tests/test_dict_accessor.py +62 -22
  77. maxframe/dataframe/accessors/list_/__init__.py +2 -2
  78. maxframe/dataframe/accessors/list_/core.py +48 -0
  79. maxframe/dataframe/accessors/list_/getitem.py +12 -19
  80. maxframe/dataframe/accessors/list_/length.py +7 -16
  81. maxframe/dataframe/accessors/list_/tests/test_list_accessor.py +11 -9
  82. maxframe/dataframe/accessors/string_/__init__.py +4 -1
  83. maxframe/dataframe/accessors/struct_/__init__.py +37 -0
  84. maxframe/dataframe/accessors/struct_/accessor.py +39 -0
  85. maxframe/dataframe/accessors/struct_/core.py +43 -0
  86. maxframe/dataframe/accessors/struct_/dtypes.py +53 -0
  87. maxframe/dataframe/accessors/struct_/field.py +123 -0
  88. maxframe/dataframe/accessors/struct_/tests/__init__.py +13 -0
  89. maxframe/dataframe/accessors/struct_/tests/test_struct_accessor.py +91 -0
  90. maxframe/dataframe/arithmetic/__init__.py +18 -4
  91. maxframe/dataframe/arithmetic/between.py +106 -0
  92. maxframe/dataframe/arithmetic/dot.py +237 -0
  93. maxframe/dataframe/arithmetic/maximum.py +33 -0
  94. maxframe/dataframe/arithmetic/minimum.py +33 -0
  95. maxframe/dataframe/arithmetic/{around.py → round.py} +11 -7
  96. maxframe/dataframe/core.py +161 -224
  97. maxframe/dataframe/datasource/__init__.py +18 -0
  98. maxframe/dataframe/datasource/core.py +6 -0
  99. maxframe/dataframe/datasource/direct.py +57 -0
  100. maxframe/dataframe/datasource/from_dict.py +124 -0
  101. maxframe/dataframe/datasource/from_index.py +1 -1
  102. maxframe/dataframe/datasource/from_records.py +77 -0
  103. maxframe/dataframe/datasource/from_tensor.py +109 -41
  104. maxframe/dataframe/datasource/read_csv.py +21 -14
  105. maxframe/dataframe/datasource/read_odps_query.py +29 -6
  106. maxframe/dataframe/datasource/read_odps_table.py +32 -10
  107. maxframe/dataframe/datasource/read_parquet.py +38 -39
  108. maxframe/dataframe/datasource/tests/test_datasource.py +37 -0
  109. maxframe/dataframe/datastore/__init__.py +11 -1
  110. maxframe/dataframe/datastore/direct.py +268 -0
  111. maxframe/dataframe/datastore/to_csv.py +29 -41
  112. maxframe/dataframe/datastore/to_odps.py +36 -4
  113. maxframe/dataframe/extensions/__init__.py +20 -4
  114. maxframe/dataframe/extensions/apply_chunk.py +32 -6
  115. maxframe/dataframe/extensions/cartesian_chunk.py +153 -0
  116. maxframe/dataframe/extensions/collect_kv.py +126 -0
  117. maxframe/dataframe/extensions/extract_kv.py +177 -0
  118. maxframe/dataframe/extensions/flatjson.py +2 -1
  119. maxframe/dataframe/extensions/map_reduce.py +263 -0
  120. maxframe/dataframe/extensions/rebalance.py +62 -0
  121. maxframe/dataframe/extensions/tests/test_apply_chunk.py +9 -2
  122. maxframe/dataframe/extensions/tests/test_extensions.py +54 -0
  123. maxframe/dataframe/extensions/tests/test_map_reduce.py +135 -0
  124. maxframe/dataframe/groupby/__init__.py +17 -2
  125. maxframe/dataframe/groupby/aggregation.py +86 -49
  126. maxframe/dataframe/groupby/apply.py +1 -1
  127. maxframe/dataframe/groupby/apply_chunk.py +19 -5
  128. maxframe/dataframe/groupby/core.py +116 -16
  129. maxframe/dataframe/groupby/cum.py +4 -25
  130. maxframe/dataframe/groupby/expanding.py +264 -0
  131. maxframe/dataframe/groupby/fill.py +1 -1
  132. maxframe/dataframe/groupby/getitem.py +12 -5
  133. maxframe/dataframe/groupby/head.py +11 -1
  134. maxframe/dataframe/groupby/rank.py +136 -0
  135. maxframe/dataframe/groupby/rolling.py +206 -0
  136. maxframe/dataframe/groupby/shift.py +114 -0
  137. maxframe/dataframe/groupby/tests/test_groupby.py +0 -5
  138. maxframe/dataframe/indexing/__init__.py +22 -2
  139. maxframe/dataframe/indexing/droplevel.py +195 -0
  140. maxframe/dataframe/indexing/filter.py +169 -0
  141. maxframe/dataframe/indexing/get_level_values.py +76 -0
  142. maxframe/dataframe/indexing/iat.py +45 -0
  143. maxframe/dataframe/indexing/iloc.py +152 -12
  144. maxframe/dataframe/indexing/insert.py +46 -18
  145. maxframe/dataframe/indexing/loc.py +287 -7
  146. maxframe/dataframe/indexing/reindex.py +14 -5
  147. maxframe/dataframe/indexing/rename.py +6 -0
  148. maxframe/dataframe/indexing/rename_axis.py +2 -2
  149. maxframe/dataframe/indexing/reorder_levels.py +143 -0
  150. maxframe/dataframe/indexing/reset_index.py +33 -6
  151. maxframe/dataframe/indexing/sample.py +8 -0
  152. maxframe/dataframe/indexing/setitem.py +3 -3
  153. maxframe/dataframe/indexing/swaplevel.py +185 -0
  154. maxframe/dataframe/indexing/take.py +99 -0
  155. maxframe/dataframe/indexing/truncate.py +140 -0
  156. maxframe/dataframe/indexing/where.py +0 -11
  157. maxframe/dataframe/indexing/xs.py +148 -0
  158. maxframe/dataframe/merge/__init__.py +15 -1
  159. maxframe/dataframe/merge/append.py +97 -98
  160. maxframe/dataframe/merge/combine.py +244 -0
  161. maxframe/dataframe/merge/combine_first.py +120 -0
  162. maxframe/dataframe/merge/compare.py +387 -0
  163. maxframe/dataframe/merge/concat.py +183 -0
  164. maxframe/dataframe/merge/update.py +271 -0
  165. maxframe/dataframe/misc/__init__.py +28 -11
  166. maxframe/dataframe/misc/_duplicate.py +10 -4
  167. maxframe/dataframe/misc/apply.py +1 -1
  168. maxframe/dataframe/misc/check_unique.py +82 -0
  169. maxframe/dataframe/misc/clip.py +145 -0
  170. maxframe/dataframe/misc/describe.py +175 -9
  171. maxframe/dataframe/misc/drop.py +31 -0
  172. maxframe/dataframe/misc/drop_duplicates.py +2 -2
  173. maxframe/dataframe/misc/duplicated.py +2 -2
  174. maxframe/dataframe/misc/get_dummies.py +5 -1
  175. maxframe/dataframe/misc/infer_dtypes.py +251 -0
  176. maxframe/dataframe/misc/isin.py +2 -2
  177. maxframe/dataframe/misc/map.py +125 -18
  178. maxframe/dataframe/misc/repeat.py +159 -0
  179. maxframe/dataframe/misc/tests/test_misc.py +48 -3
  180. maxframe/dataframe/misc/to_numeric.py +3 -0
  181. maxframe/dataframe/misc/transform.py +12 -5
  182. maxframe/dataframe/misc/transpose.py +13 -1
  183. maxframe/dataframe/misc/valid_index.py +115 -0
  184. maxframe/dataframe/misc/value_counts.py +38 -4
  185. maxframe/dataframe/missing/checkna.py +14 -6
  186. maxframe/dataframe/missing/dropna.py +5 -0
  187. maxframe/dataframe/missing/fillna.py +1 -1
  188. maxframe/dataframe/missing/replace.py +7 -4
  189. maxframe/dataframe/reduction/__init__.py +35 -16
  190. maxframe/dataframe/reduction/aggregation.py +43 -14
  191. maxframe/dataframe/reduction/all.py +2 -2
  192. maxframe/dataframe/reduction/any.py +2 -2
  193. maxframe/dataframe/reduction/argmax.py +103 -0
  194. maxframe/dataframe/reduction/argmin.py +103 -0
  195. maxframe/dataframe/reduction/core.py +80 -24
  196. maxframe/dataframe/reduction/count.py +13 -9
  197. maxframe/dataframe/reduction/cov.py +166 -0
  198. maxframe/dataframe/reduction/cummax.py +2 -2
  199. maxframe/dataframe/reduction/cummin.py +2 -2
  200. maxframe/dataframe/reduction/cumprod.py +2 -2
  201. maxframe/dataframe/reduction/cumsum.py +2 -2
  202. maxframe/dataframe/reduction/custom_reduction.py +2 -2
  203. maxframe/dataframe/reduction/idxmax.py +185 -0
  204. maxframe/dataframe/reduction/idxmin.py +185 -0
  205. maxframe/dataframe/reduction/kurtosis.py +37 -30
  206. maxframe/dataframe/reduction/max.py +2 -2
  207. maxframe/dataframe/reduction/mean.py +9 -7
  208. maxframe/dataframe/reduction/median.py +2 -2
  209. maxframe/dataframe/reduction/min.py +2 -2
  210. maxframe/dataframe/reduction/mode.py +144 -0
  211. maxframe/dataframe/reduction/nunique.py +19 -11
  212. maxframe/dataframe/reduction/prod.py +18 -13
  213. maxframe/dataframe/reduction/reduction_size.py +2 -2
  214. maxframe/dataframe/reduction/sem.py +13 -9
  215. maxframe/dataframe/reduction/skew.py +31 -27
  216. maxframe/dataframe/reduction/str_concat.py +10 -7
  217. maxframe/dataframe/reduction/sum.py +18 -14
  218. maxframe/dataframe/reduction/tests/test_reduction.py +12 -0
  219. maxframe/dataframe/reduction/unique.py +20 -3
  220. maxframe/dataframe/reduction/var.py +16 -12
  221. maxframe/dataframe/reshape/__init__.py +38 -0
  222. maxframe/dataframe/{misc → reshape}/pivot.py +1 -0
  223. maxframe/dataframe/{misc → reshape}/pivot_table.py +1 -0
  224. maxframe/dataframe/reshape/unstack.py +114 -0
  225. maxframe/dataframe/sort/__init__.py +16 -1
  226. maxframe/dataframe/sort/argsort.py +68 -0
  227. maxframe/dataframe/sort/core.py +2 -1
  228. maxframe/dataframe/sort/nlargest.py +238 -0
  229. maxframe/dataframe/sort/nsmallest.py +228 -0
  230. maxframe/dataframe/sort/rank.py +147 -0
  231. maxframe/dataframe/statistics/__init__.py +3 -3
  232. maxframe/dataframe/statistics/corr.py +1 -0
  233. maxframe/dataframe/statistics/quantile.py +2 -2
  234. maxframe/dataframe/tests/test_typing.py +104 -0
  235. maxframe/dataframe/tests/test_utils.py +66 -2
  236. maxframe/dataframe/tseries/__init__.py +19 -0
  237. maxframe/dataframe/tseries/at_time.py +61 -0
  238. maxframe/dataframe/tseries/between_time.py +122 -0
  239. maxframe/dataframe/typing_.py +185 -0
  240. maxframe/dataframe/utils.py +125 -52
  241. maxframe/dataframe/window/aggregation.py +8 -4
  242. maxframe/dataframe/window/core.py +14 -1
  243. maxframe/dataframe/window/ewm.py +1 -3
  244. maxframe/dataframe/window/expanding.py +37 -35
  245. maxframe/dataframe/window/rolling.py +49 -39
  246. maxframe/dataframe/window/tests/test_expanding.py +1 -7
  247. maxframe/dataframe/window/tests/test_rolling.py +1 -1
  248. maxframe/env.py +7 -4
  249. maxframe/errors.py +2 -2
  250. maxframe/io/odpsio/schema.py +9 -3
  251. maxframe/io/odpsio/tableio.py +7 -2
  252. maxframe/io/odpsio/tests/test_schema.py +198 -83
  253. maxframe/learn/__init__.py +10 -2
  254. maxframe/learn/cluster/__init__.py +15 -0
  255. maxframe/learn/cluster/_kmeans.py +782 -0
  256. maxframe/learn/contrib/llm/core.py +18 -7
  257. maxframe/learn/contrib/llm/deploy/__init__.py +13 -0
  258. maxframe/learn/contrib/llm/deploy/config.py +221 -0
  259. maxframe/learn/contrib/llm/deploy/core.py +247 -0
  260. maxframe/learn/contrib/llm/deploy/framework.py +35 -0
  261. maxframe/learn/contrib/llm/deploy/loader.py +360 -0
  262. maxframe/learn/contrib/llm/deploy/tests/__init__.py +13 -0
  263. maxframe/learn/contrib/llm/deploy/tests/test_register_models.py +359 -0
  264. maxframe/learn/contrib/llm/models/__init__.py +1 -0
  265. maxframe/learn/contrib/llm/models/dashscope.py +12 -6
  266. maxframe/learn/contrib/llm/models/managed.py +76 -11
  267. maxframe/learn/contrib/llm/models/openai.py +72 -0
  268. maxframe/learn/contrib/llm/tests/__init__.py +13 -0
  269. maxframe/learn/contrib/llm/tests/test_core.py +34 -0
  270. maxframe/learn/contrib/llm/tests/test_openai.py +187 -0
  271. maxframe/learn/contrib/llm/tests/test_text_gen.py +155 -0
  272. maxframe/learn/contrib/llm/text.py +348 -42
  273. maxframe/learn/contrib/models.py +4 -1
  274. maxframe/learn/contrib/xgboost/classifier.py +2 -0
  275. maxframe/learn/contrib/xgboost/core.py +113 -4
  276. maxframe/learn/contrib/xgboost/predict.py +4 -2
  277. maxframe/learn/contrib/xgboost/regressor.py +5 -0
  278. maxframe/learn/contrib/xgboost/train.py +7 -2
  279. maxframe/learn/core.py +66 -0
  280. maxframe/learn/linear_model/_base.py +58 -1
  281. maxframe/learn/linear_model/_lin_reg.py +1 -1
  282. maxframe/learn/metrics/__init__.py +6 -0
  283. maxframe/learn/metrics/_classification.py +145 -0
  284. maxframe/learn/metrics/_ranking.py +477 -0
  285. maxframe/learn/metrics/_scorer.py +60 -0
  286. maxframe/learn/metrics/pairwise/__init__.py +21 -0
  287. maxframe/learn/metrics/pairwise/core.py +77 -0
  288. maxframe/learn/metrics/pairwise/cosine.py +115 -0
  289. maxframe/learn/metrics/pairwise/euclidean.py +176 -0
  290. maxframe/learn/metrics/pairwise/haversine.py +96 -0
  291. maxframe/learn/metrics/pairwise/manhattan.py +80 -0
  292. maxframe/learn/metrics/pairwise/pairwise.py +127 -0
  293. maxframe/learn/metrics/pairwise/pairwise_distances_topk.py +121 -0
  294. maxframe/learn/metrics/pairwise/rbf_kernel.py +51 -0
  295. maxframe/learn/metrics/tests/__init__.py +13 -0
  296. maxframe/learn/metrics/tests/test_scorer.py +26 -0
  297. maxframe/learn/preprocessing/_data/min_max_scaler.py +34 -23
  298. maxframe/learn/preprocessing/_data/standard_scaler.py +34 -25
  299. maxframe/learn/utils/__init__.py +2 -1
  300. maxframe/learn/utils/checks.py +1 -2
  301. maxframe/learn/utils/core.py +59 -0
  302. maxframe/learn/utils/extmath.py +79 -9
  303. maxframe/learn/utils/odpsio.py +262 -0
  304. maxframe/learn/utils/validation.py +2 -2
  305. maxframe/lib/compat.py +40 -0
  306. maxframe/lib/dtypes_extension/__init__.py +16 -1
  307. maxframe/lib/dtypes_extension/_fake_arrow_dtype.py +604 -0
  308. maxframe/lib/dtypes_extension/blob.py +304 -0
  309. maxframe/lib/dtypes_extension/dtypes.py +40 -0
  310. maxframe/lib/dtypes_extension/tests/test_blob.py +88 -0
  311. maxframe/lib/dtypes_extension/tests/test_dtypes.py +16 -1
  312. maxframe/lib/dtypes_extension/tests/test_fake_arrow_dtype.py +75 -0
  313. maxframe/lib/filesystem/_oss_lib/common.py +124 -50
  314. maxframe/lib/filesystem/_oss_lib/glob.py +1 -1
  315. maxframe/lib/filesystem/_oss_lib/handle.py +21 -25
  316. maxframe/lib/filesystem/base.py +1 -1
  317. maxframe/lib/filesystem/core.py +1 -1
  318. maxframe/lib/filesystem/oss.py +115 -46
  319. maxframe/lib/filesystem/tests/test_oss.py +74 -36
  320. maxframe/lib/mmh3.cp37-win32.pyd +0 -0
  321. maxframe/lib/wrapped_pickle.py +10 -0
  322. maxframe/opcodes.py +41 -15
  323. maxframe/protocol.py +12 -0
  324. maxframe/remote/core.py +4 -0
  325. maxframe/serialization/__init__.py +11 -2
  326. maxframe/serialization/arrow.py +38 -13
  327. maxframe/serialization/blob.py +32 -0
  328. maxframe/serialization/core.cp37-win32.pyd +0 -0
  329. maxframe/serialization/core.pyx +39 -1
  330. maxframe/serialization/exception.py +2 -4
  331. maxframe/serialization/numpy.py +11 -0
  332. maxframe/serialization/pandas.py +46 -9
  333. maxframe/serialization/serializables/core.py +2 -2
  334. maxframe/serialization/tests/test_serial.py +31 -4
  335. maxframe/tensor/__init__.py +38 -8
  336. maxframe/tensor/arithmetic/__init__.py +19 -10
  337. maxframe/tensor/arithmetic/core.py +2 -2
  338. maxframe/tensor/arithmetic/iscomplexobj.py +53 -0
  339. maxframe/tensor/arithmetic/tests/test_arithmetic.py +6 -9
  340. maxframe/tensor/core.py +6 -2
  341. maxframe/tensor/datasource/tests/test_datasource.py +2 -1
  342. maxframe/tensor/extensions/__init__.py +2 -0
  343. maxframe/tensor/extensions/apply_chunk.py +3 -3
  344. maxframe/tensor/extensions/rebalance.py +65 -0
  345. maxframe/tensor/fft/__init__.py +32 -0
  346. maxframe/tensor/fft/core.py +168 -0
  347. maxframe/tensor/fft/fft.py +112 -0
  348. maxframe/tensor/fft/fft2.py +118 -0
  349. maxframe/tensor/fft/fftfreq.py +80 -0
  350. maxframe/tensor/fft/fftn.py +123 -0
  351. maxframe/tensor/fft/fftshift.py +79 -0
  352. maxframe/tensor/fft/hfft.py +112 -0
  353. maxframe/tensor/fft/ifft.py +114 -0
  354. maxframe/tensor/fft/ifft2.py +115 -0
  355. maxframe/tensor/fft/ifftn.py +123 -0
  356. maxframe/tensor/fft/ifftshift.py +73 -0
  357. maxframe/tensor/fft/ihfft.py +93 -0
  358. maxframe/tensor/fft/irfft.py +118 -0
  359. maxframe/tensor/fft/irfft2.py +62 -0
  360. maxframe/tensor/fft/irfftn.py +114 -0
  361. maxframe/tensor/fft/rfft.py +116 -0
  362. maxframe/tensor/fft/rfft2.py +63 -0
  363. maxframe/tensor/fft/rfftfreq.py +87 -0
  364. maxframe/tensor/fft/rfftn.py +113 -0
  365. maxframe/tensor/indexing/fill_diagonal.py +1 -7
  366. maxframe/tensor/linalg/__init__.py +7 -0
  367. maxframe/tensor/linalg/_einsumfunc.py +1025 -0
  368. maxframe/tensor/linalg/cholesky.py +117 -0
  369. maxframe/tensor/linalg/einsum.py +339 -0
  370. maxframe/tensor/linalg/lstsq.py +100 -0
  371. maxframe/tensor/linalg/matrix_norm.py +75 -0
  372. maxframe/tensor/linalg/norm.py +249 -0
  373. maxframe/tensor/linalg/solve.py +72 -0
  374. maxframe/tensor/linalg/solve_triangular.py +2 -2
  375. maxframe/tensor/linalg/vector_norm.py +113 -0
  376. maxframe/tensor/misc/__init__.py +24 -1
  377. maxframe/tensor/misc/argwhere.py +72 -0
  378. maxframe/tensor/misc/array_split.py +46 -0
  379. maxframe/tensor/misc/broadcast_arrays.py +57 -0
  380. maxframe/tensor/misc/copyto.py +130 -0
  381. maxframe/tensor/misc/delete.py +104 -0
  382. maxframe/tensor/misc/dsplit.py +68 -0
  383. maxframe/tensor/misc/ediff1d.py +74 -0
  384. maxframe/tensor/misc/expand_dims.py +85 -0
  385. maxframe/tensor/misc/flip.py +90 -0
  386. maxframe/tensor/misc/fliplr.py +64 -0
  387. maxframe/tensor/misc/flipud.py +68 -0
  388. maxframe/tensor/misc/hsplit.py +85 -0
  389. maxframe/tensor/misc/insert.py +139 -0
  390. maxframe/tensor/misc/moveaxis.py +83 -0
  391. maxframe/tensor/misc/result_type.py +88 -0
  392. maxframe/tensor/misc/roll.py +124 -0
  393. maxframe/tensor/misc/rollaxis.py +77 -0
  394. maxframe/tensor/misc/shape.py +89 -0
  395. maxframe/tensor/misc/split.py +190 -0
  396. maxframe/tensor/misc/tile.py +109 -0
  397. maxframe/tensor/misc/vsplit.py +74 -0
  398. maxframe/tensor/reduction/array_equal.py +2 -1
  399. maxframe/tensor/sort/__init__.py +2 -0
  400. maxframe/tensor/sort/argpartition.py +98 -0
  401. maxframe/tensor/sort/partition.py +228 -0
  402. maxframe/tensor/spatial/__init__.py +15 -0
  403. maxframe/tensor/spatial/distance/__init__.py +17 -0
  404. maxframe/tensor/spatial/distance/cdist.py +421 -0
  405. maxframe/tensor/spatial/distance/pdist.py +398 -0
  406. maxframe/tensor/spatial/distance/squareform.py +153 -0
  407. maxframe/tensor/special/__init__.py +159 -21
  408. maxframe/tensor/special/airy.py +55 -0
  409. maxframe/tensor/special/bessel.py +199 -0
  410. maxframe/tensor/special/core.py +65 -4
  411. maxframe/tensor/special/ellip_func_integrals.py +155 -0
  412. maxframe/tensor/special/ellip_harm.py +55 -0
  413. maxframe/tensor/special/err_fresnel.py +223 -0
  414. maxframe/tensor/special/gamma_funcs.py +303 -0
  415. maxframe/tensor/special/hypergeometric_funcs.py +69 -0
  416. maxframe/tensor/special/info_theory.py +189 -0
  417. maxframe/tensor/special/misc.py +21 -0
  418. maxframe/tensor/statistics/__init__.py +6 -0
  419. maxframe/tensor/statistics/corrcoef.py +77 -0
  420. maxframe/tensor/statistics/cov.py +222 -0
  421. maxframe/tensor/statistics/digitize.py +126 -0
  422. maxframe/tensor/statistics/histogram.py +520 -0
  423. maxframe/tensor/statistics/median.py +85 -0
  424. maxframe/tensor/statistics/ptp.py +89 -0
  425. maxframe/tensor/utils.py +3 -3
  426. maxframe/tests/test_udf.py +61 -0
  427. maxframe/tests/test_utils.py +51 -6
  428. maxframe/tests/utils.py +0 -2
  429. maxframe/typing_.py +2 -0
  430. maxframe/udf.py +130 -9
  431. maxframe/utils.py +254 -27
  432. {maxframe-2.0.0b2.dist-info → maxframe-2.3.0rc1.dist-info}/METADATA +3 -3
  433. {maxframe-2.0.0b2.dist-info → maxframe-2.3.0rc1.dist-info}/RECORD +442 -264
  434. maxframe_client/fetcher.py +35 -4
  435. maxframe_client/session/odps.py +7 -2
  436. maxframe_client/session/task.py +8 -1
  437. maxframe_client/tests/test_fetcher.py +76 -3
  438. maxframe_client/tests/test_session.py +28 -1
  439. maxframe/dataframe/arrays.py +0 -864
  440. /maxframe/dataframe/{misc → reshape}/melt.py +0 -0
  441. /maxframe/dataframe/{misc → reshape}/stack.py +0 -0
  442. {maxframe-2.0.0b2.dist-info → maxframe-2.3.0rc1.dist-info}/WHEEL +0 -0
  443. {maxframe-2.0.0b2.dist-info → maxframe-2.3.0rc1.dist-info}/top_level.txt +0 -0
@@ -22,7 +22,7 @@ from odps.models import Table
22
22
  from odps.utils import to_timestamp
23
23
 
24
24
  from ... import opcodes
25
- from ...config import options
25
+ from ...config import option_context, options
26
26
  from ...core import OutputType
27
27
  from ...io.odpsio import odps_schema_to_pandas_dtypes
28
28
  from ...serialization.serializables import (
@@ -36,8 +36,12 @@ from ...serialization.serializables import (
36
36
  )
37
37
  from ...utils import estimate_table_size, is_empty
38
38
  from ..core import DataFrame # noqa: F401
39
- from ..utils import parse_index
40
- from .core import ColumnPruneSupportedDataSourceMixin, IncrementalIndexDatasource
39
+ from ..utils import parse_index, validate_dtype_backend
40
+ from .core import (
41
+ ColumnPruneSupportedDataSourceMixin,
42
+ DtypeBackendCompatibleMixin,
43
+ IncrementalIndexDatasource,
44
+ )
41
45
 
42
46
  logger = logging.getLogger(__name__)
43
47
 
@@ -45,6 +49,7 @@ logger = logging.getLogger(__name__)
45
49
  class DataFrameReadODPSTable(
46
50
  IncrementalIndexDatasource,
47
51
  ColumnPruneSupportedDataSourceMixin,
52
+ DtypeBackendCompatibleMixin,
48
53
  ):
49
54
  __slots__ = ("_odps_entry",)
50
55
  _op_type_ = opcodes.READ_ODPS_TABLE
@@ -54,18 +59,22 @@ class DataFrameReadODPSTable(
54
59
  dtypes = SeriesField("dtypes", default=None)
55
60
  columns = AnyField("columns", default=None)
56
61
  nrows = Int64Field("nrows", default=None)
57
- use_arrow_dtype = BoolField("use_arrow_dtype", default=None)
62
+ dtype_backend = StringField("dtype_backend", default=None)
58
63
  string_as_binary = BoolField("string_as_binary", default=None)
59
64
  append_partitions = BoolField("append_partitions", default=None)
60
65
  last_modified_time = Int64Field("last_modified_time", default=None)
61
66
  index_columns = ListField("index_columns", FieldTypes.string, default=None)
62
67
  index_dtypes = SeriesField("index_dtypes", default=None)
63
68
 
64
- def __init__(self, memory_scale=None, **kw):
69
+ def __init__(self, memory_scale=None, dtype_backend=None, **kw):
65
70
  output_type = kw.pop("output_type", OutputType.dataframe)
66
71
  self._odps_entry = kw.pop("odps_entry", None)
72
+ dtype_backend = validate_dtype_backend(dtype_backend)
67
73
  super(DataFrameReadODPSTable, self).__init__(
68
- memory_scale=memory_scale, _output_types=[output_type], **kw
74
+ memory_scale=memory_scale,
75
+ dtype_backend=dtype_backend,
76
+ _output_types=[output_type],
77
+ **kw,
69
78
  )
70
79
 
71
80
  @property
@@ -153,6 +162,7 @@ def read_odps_table(
153
162
  odps_entry: ODPS = None,
154
163
  string_as_binary: bool = None,
155
164
  append_partitions: bool = False,
165
+ dtype_backend: str = None,
156
166
  **kw,
157
167
  ):
158
168
  """
@@ -176,6 +186,8 @@ def read_odps_table(
176
186
  append_partitions: bool
177
187
  If True, will add all partition columns as selected columns when
178
188
  `columns` is not specified,
189
+ dtype_backend: {'numpy', 'pyarrow'}, default 'numpy'
190
+ Back-end data type applied to the resultant DataFrame (still experimental).
179
191
 
180
192
  Returns
181
193
  -------
@@ -202,9 +214,20 @@ def read_odps_table(
202
214
  else table.table_schema.simple_columns
203
215
  )
204
216
  table_columns = [c.name.lower() for c in cols]
205
- table_dtypes = odps_schema_to_pandas_dtypes(
206
- table.table_schema, with_partitions=True
217
+
218
+ if "use_arrow_dtype" in kw:
219
+ dtype_backend = dtype_backend or validate_dtype_backend(
220
+ kw.pop("use_arrow_dtype")
221
+ )
222
+ dtype_backend = validate_dtype_backend(
223
+ dtype_backend or options.dataframe.dtype_backend
207
224
  )
225
+
226
+ with option_context():
227
+ options.dataframe.dtype_backend = dtype_backend
228
+ table_dtypes = odps_schema_to_pandas_dtypes(
229
+ table.table_schema, with_partitions=True
230
+ )
208
231
  df_types = [table_dtypes[c] for c in table_columns]
209
232
 
210
233
  if isinstance(index_col, str):
@@ -246,7 +269,6 @@ def read_odps_table(
246
269
  dtypes = pd.Series(df_types, index=table_columns)
247
270
  chunk_bytes = kw.pop("chunk_bytes", None)
248
271
  chunk_size = kw.pop("chunk_size", None)
249
- use_arrow_dtype = kw.pop("use_arrow_dtype", True)
250
272
 
251
273
  partitions = partitions or kw.get("partition")
252
274
  if isinstance(partitions, str):
@@ -261,7 +283,7 @@ def read_odps_table(
261
283
  partitions=partitions,
262
284
  dtypes=dtypes,
263
285
  columns=columns,
264
- use_arrow_dtype=use_arrow_dtype,
286
+ dtype_backend=dtype_backend,
265
287
  string_as_binary=string_as_binary,
266
288
  append_partitions=append_partitions,
267
289
  last_modified_time=to_timestamp(table.last_data_modified_time),
@@ -32,6 +32,7 @@ except ImportError:
32
32
 
33
33
  from ... import opcodes
34
34
  from ...config import options
35
+ from ...lib.dtypes_extension import ArrowDtype
35
36
  from ...lib.filesystem import FileSystem, get_fs, glob, open_file
36
37
  from ...serialization.serializables import (
37
38
  AnyField,
@@ -43,10 +44,13 @@ from ...serialization.serializables import (
43
44
  StringField,
44
45
  )
45
46
  from ...utils import lazy_import
46
- from ..arrays import ArrowStringDtype
47
47
  from ..operators import OutputType
48
48
  from ..utils import parse_index, to_arrow_dtypes
49
- from .core import ColumnPruneSupportedDataSourceMixin, IncrementalIndexDatasource
49
+ from .core import (
50
+ ColumnPruneSupportedDataSourceMixin,
51
+ DtypeBackendCompatibleMixin,
52
+ IncrementalIndexDatasource,
53
+ )
50
54
 
51
55
  PARQUET_MEMORY_SCALE = 15
52
56
  STRING_FIELD_OVERHEAD = 50
@@ -89,13 +93,11 @@ class ParquetEngine:
89
93
  def read_dtypes(self, f, **kwargs):
90
94
  raise NotImplementedError
91
95
 
92
- def read_to_pandas(
93
- self, f, columns=None, nrows=None, use_arrow_dtype=None, **kwargs
94
- ):
96
+ def read_to_pandas(self, f, columns=None, nrows=None, dtype_backend=None, **kwargs):
95
97
  raise NotImplementedError
96
98
 
97
99
  def read_group_to_pandas(
98
- self, f, group_index, columns=None, nrows=None, use_arrow_dtype=None, **kwargs
100
+ self, f, group_index, columns=None, nrows=None, dtype_backend=None, **kwargs
99
101
  ):
100
102
  raise NotImplementedError
101
103
 
@@ -106,11 +108,11 @@ class ParquetEngine:
106
108
  partition_keys: Dict,
107
109
  columns=None,
108
110
  nrows=None,
109
- use_arrow_dtype=None,
111
+ dtype_backend=None,
110
112
  **kwargs,
111
113
  ):
112
114
  raw_df = self.read_to_pandas(
113
- f, columns=columns, nrows=nrows, use_arrow_dtype=use_arrow_dtype, **kwargs
115
+ f, columns=columns, nrows=nrows, dtype_backend=dtype_backend, **kwargs
114
116
  )
115
117
  for col, value in partition_keys.items():
116
118
  dictionary = partitions[col]
@@ -169,28 +171,26 @@ class ArrowEngine(ParquetEngine):
169
171
  return file.schema_arrow.empty_table().to_pandas().dtypes
170
172
 
171
173
  @classmethod
172
- def _table_to_pandas(cls, t, nrows=None, use_arrow_dtype=None):
174
+ def _table_to_pandas(cls, t, nrows=None, dtype_backend=None):
173
175
  if nrows is not None:
174
176
  t = t.slice(0, nrows)
175
- if use_arrow_dtype:
176
- df = t.to_pandas(types_mapper={pa.string(): ArrowStringDtype()}.get)
177
+ if dtype_backend == "pyarrow":
178
+ df = t.to_pandas(types_mapper={pa.string(): ArrowDtype(pa.string())}.get)
177
179
  else:
178
180
  df = t.to_pandas()
179
181
  return df
180
182
 
181
- def read_to_pandas(
182
- self, f, columns=None, nrows=None, use_arrow_dtype=None, **kwargs
183
- ):
183
+ def read_to_pandas(self, f, columns=None, nrows=None, dtype_backend=None, **kwargs):
184
184
  file = pq.ParquetFile(f)
185
185
  t = file.read(columns=columns, **kwargs)
186
- return self._table_to_pandas(t, nrows=nrows, use_arrow_dtype=use_arrow_dtype)
186
+ return self._table_to_pandas(t, nrows=nrows, dtype_backend=dtype_backend)
187
187
 
188
188
  def read_group_to_pandas(
189
- self, f, group_index, columns=None, nrows=None, use_arrow_dtype=None, **kwargs
189
+ self, f, group_index, columns=None, nrows=None, dtype_backend=None, **kwargs
190
190
  ):
191
191
  file = pq.ParquetFile(f)
192
192
  t = file.read_row_group(group_index, columns=columns, **kwargs)
193
- return self._table_to_pandas(t, nrows=nrows, use_arrow_dtype=use_arrow_dtype)
193
+ return self._table_to_pandas(t, nrows=nrows, dtype_backend=dtype_backend)
194
194
 
195
195
 
196
196
  class FastpaquetEngine(ParquetEngine):
@@ -203,14 +203,12 @@ class FastpaquetEngine(ParquetEngine):
203
203
  dtypes_dict = file._dtypes()
204
204
  return pd.Series(dict((c, dtypes_dict[c]) for c in file.columns))
205
205
 
206
- def read_to_pandas(
207
- self, f, columns=None, nrows=None, use_arrow_dtype=None, **kwargs
208
- ):
206
+ def read_to_pandas(self, f, columns=None, nrows=None, dtype_backend=None, **kwargs):
209
207
  file = fastparquet.ParquetFile(f)
210
208
  df = file.to_pandas(columns, **kwargs)
211
209
  if nrows is not None:
212
210
  df = df.head(nrows)
213
- if use_arrow_dtype:
211
+ if dtype_backend == "pyarrow":
214
212
  df = df.astype(to_arrow_dtypes(df.dtypes).to_dict())
215
213
  return df
216
214
 
@@ -265,29 +263,30 @@ class CudfEngine:
265
263
  class DataFrameReadParquet(
266
264
  IncrementalIndexDatasource,
267
265
  ColumnPruneSupportedDataSourceMixin,
266
+ DtypeBackendCompatibleMixin,
268
267
  ):
269
268
  _op_type_ = opcodes.READ_PARQUET
270
269
 
271
270
  path = AnyField("path")
272
271
  engine = StringField("engine")
273
272
  columns = ListField("columns")
274
- use_arrow_dtype = BoolField("use_arrow_dtype")
275
- groups_as_chunks = BoolField("groups_as_chunks")
276
- group_index = Int32Field("group_index")
277
- read_kwargs = DictField("read_kwargs")
278
- incremental_index = BoolField("incremental_index")
279
- storage_options = DictField("storage_options")
280
- is_partitioned = BoolField("is_partitioned")
281
- merge_small_files = BoolField("merge_small_files")
282
- merge_small_file_options = DictField("merge_small_file_options")
273
+ dtype_backend = StringField("dtype_backend", default=None)
274
+ groups_as_chunks = BoolField("groups_as_chunks", default=None)
275
+ group_index = Int32Field("group_index", default=None)
276
+ read_kwargs = DictField("read_kwargs", default=None)
277
+ incremental_index = BoolField("incremental_index", default=None)
278
+ storage_options = DictField("storage_options", default=None)
279
+ is_partitioned = BoolField("is_partitioned", default=None)
280
+ merge_small_files = BoolField("merge_small_files", default=None)
281
+ merge_small_file_options = DictField("merge_small_file_options", default=None)
283
282
  # for chunk
284
283
  partitions = DictField("partitions", default=None)
285
284
  partition_keys = DictField("partition_keys", default=None)
286
285
  num_group_rows = Int64Field("num_group_rows", default=None)
287
286
  # as read meta may be too time-consuming when number of files is large,
288
287
  # thus we only read first file to get row number and raw file size
289
- first_chunk_row_num = Int64Field("first_chunk_row_num")
290
- first_chunk_raw_bytes = Int64Field("first_chunk_raw_bytes")
288
+ first_chunk_row_num = Int64Field("first_chunk_row_num", default=None)
289
+ first_chunk_raw_bytes = Int64Field("first_chunk_raw_bytes", default=None)
291
290
 
292
291
  def get_columns(self):
293
292
  return self.columns
@@ -319,7 +318,7 @@ def read_parquet(
319
318
  engine: str = "auto",
320
319
  columns: list = None,
321
320
  groups_as_chunks: bool = False,
322
- use_arrow_dtype: bool = None,
321
+ dtype_backend: str = None,
323
322
  incremental_index: bool = False,
324
323
  storage_options: dict = None,
325
324
  memory_scale: int = None,
@@ -356,8 +355,8 @@ def read_parquet(
356
355
  incremental_index: bool, default False
357
356
  If index_col not specified, ensure range index incremental,
358
357
  gain a slightly better performance if setting False.
359
- use_arrow_dtype: bool, default None
360
- If True, use arrow dtype to store columns.
358
+ dtype_backend: {'numpy', 'pyarrow'}, default 'numpy'
359
+ Back-end data type applied to the resultant DataFrame (still experimental).
361
360
  storage_options: dict, optional
362
361
  Options for storage connection.
363
362
  memory_scale: int, optional
@@ -401,9 +400,9 @@ def read_parquet(
401
400
  if columns:
402
401
  dtypes = dtypes[columns]
403
402
 
404
- if use_arrow_dtype is None:
405
- use_arrow_dtype = options.dataframe.use_arrow_dtype
406
- if use_arrow_dtype:
403
+ if dtype_backend is None:
404
+ dtype_backend = options.dataframe.dtype_backend
405
+ if dtype_backend == "pyarrow":
407
406
  dtypes = to_arrow_dtypes(dtypes)
408
407
 
409
408
  index_value = parse_index(pd.RangeIndex(-1))
@@ -413,7 +412,7 @@ def read_parquet(
413
412
  engine=engine_type,
414
413
  columns=columns,
415
414
  groups_as_chunks=groups_as_chunks,
416
- use_arrow_dtype=use_arrow_dtype,
415
+ dtype_backend=dtype_backend,
417
416
  read_kwargs=kwargs,
418
417
  incremental_index=incremental_index,
419
418
  storage_options=storage_options,
@@ -184,6 +184,23 @@ def test_from_tensor():
184
184
  df = dataframe_from_1d_tileables(d)
185
185
  pd.testing.assert_index_equal(df.columns_value.to_pandas(), pd.RangeIndex(2))
186
186
 
187
+ # test axis parameter for dataframe_from_1d_tileables
188
+ d = OrderedDict(
189
+ [("a", mt.tensor(np.random.rand(4))), ("b", mt.tensor(np.random.rand(4)))]
190
+ )
191
+
192
+ # axis=1 (default behavior) - keys become columns
193
+ df = dataframe_from_1d_tileables(d, axis=1)
194
+ assert df.shape == (4, 2)
195
+ pd.testing.assert_index_equal(df.columns_value.to_pandas(), pd.Index(["a", "b"]))
196
+ pd.testing.assert_index_equal(df.index_value.to_pandas(), pd.RangeIndex(4))
197
+
198
+ # axis=0 - keys become index (rows)
199
+ df = dataframe_from_1d_tileables(d, axis=0)
200
+ assert df.shape == (2, 4)
201
+ pd.testing.assert_index_equal(df.index_value.to_pandas(), pd.Index(["a", "b"]))
202
+ pd.testing.assert_index_equal(df.columns_value.to_pandas(), pd.RangeIndex(4))
203
+
187
204
  series = series_from_tensor(mt.random.rand(4))
188
205
  pd.testing.assert_index_equal(series.index_value.to_pandas(), pd.RangeIndex(4))
189
206
 
@@ -207,6 +224,26 @@ def test_from_tensor():
207
224
  with pytest.raises(ValueError):
208
225
  dataframe_from_tensor(mt.random.rand(4, 3), columns=["a", "b"])
209
226
 
227
+ # 1-d tensors should have same shape
228
+ with pytest.raises(ValueError):
229
+ dataframe_from_1d_tileables(
230
+ OrderedDict(
231
+ [(0, mt.tensor(np.random.rand(3))), (1, mt.tensor(np.random.rand(2)))]
232
+ )
233
+ )
234
+
235
+ # index has wrong shape
236
+ with pytest.raises(ValueError):
237
+ dataframe_from_1d_tileables(
238
+ {0: mt.tensor(np.random.rand(3))}, index=mt.tensor(np.random.rand(2))
239
+ )
240
+
241
+ # columns have wrong shape
242
+ with pytest.raises(ValueError):
243
+ dataframe_from_1d_tileables(
244
+ {0: mt.tensor(np.random.rand(3))}, columns=["a", "b"]
245
+ )
246
+
210
247
  # index should be 1-d
211
248
  with pytest.raises(ValueError):
212
249
  dataframe_from_tensor(
@@ -12,14 +12,24 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
+ from .direct import df_to_dict, series_to_dict, series_to_list, to_clipboard
16
+ from .to_csv import to_csv
15
17
  from .to_odps import to_odps_table
16
18
 
17
19
 
18
20
  def _install():
19
- from ..core import DATAFRAME_TYPE
21
+ from ..core import DATAFRAME_TYPE, SERIES_TYPE
20
22
 
21
23
  for t in DATAFRAME_TYPE:
24
+ t.to_clipboard = to_clipboard
25
+ t.to_csv = to_csv
26
+ t.to_dict = df_to_dict
22
27
  t.to_odps_table = to_odps_table
28
+ for t in SERIES_TYPE:
29
+ t.to_clipboard = to_clipboard
30
+ t.to_csv = to_csv
31
+ t.to_dict = series_to_dict
32
+ t.to_list = series_to_list
23
33
 
24
34
 
25
35
  _install()
@@ -0,0 +1,268 @@
1
+ # Copyright 1999-2025 Alibaba Group Holding Ltd.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ from ...utils import pd_release_version
16
+
17
+ _to_dict_has_index = pd_release_version[0] >= 2
18
+
19
+
20
+ def df_to_dict(
21
+ df, orient="dict", into=dict, index=True, batch_size=10000, session=None
22
+ ):
23
+ """
24
+ Convert the DataFrame to a dictionary.
25
+
26
+ The type of the key-value pairs can be customized with the parameters
27
+ (see below).
28
+
29
+ Parameters
30
+ ----------
31
+ orient : str {'dict', 'list', 'series', 'split', 'tight', 'records', 'index'}
32
+ Determines the type of the values of the dictionary.
33
+
34
+ - 'dict' (default) : dict like {column -> {index -> value}}
35
+ - 'list' : dict like {column -> [values]}
36
+ - 'series' : dict like {column -> Series(values)}
37
+ - 'split' : dict like
38
+ {'index' -> [index], 'columns' -> [columns], 'data' -> [values]}
39
+ - 'tight' : dict like
40
+ {'index' -> [index], 'columns' -> [columns], 'data' -> [values],
41
+ 'index_names' -> [index.names], 'column_names' -> [column.names]}
42
+ - 'records' : list like
43
+ [{column -> value}, ... , {column -> value}]
44
+ - 'index' : dict like {index -> {column -> value}}
45
+
46
+ into : class, default dict
47
+ The collections.abc.MutableMapping subclass used for all Mappings
48
+ in the return value. Can be the actual class or an empty
49
+ instance of the mapping type you want. If you want a
50
+ collections.defaultdict, you must pass it initialized.
51
+
52
+ index : bool, default True
53
+ Whether to include the index item (and index_names item if `orient`
54
+ is 'tight') in the returned dictionary. Can only be ``False``
55
+ when `orient` is 'split' or 'tight'.
56
+
57
+ Returns
58
+ -------
59
+ dict, list or collections.abc.MutableMapping
60
+ Return a collections.abc.MutableMapping object representing the
61
+ DataFrame. The resulting transformation depends on the `orient`
62
+ parameter.
63
+
64
+ See Also
65
+ --------
66
+ DataFrame.from_dict: Create a DataFrame from a dictionary.
67
+ DataFrame.to_json: Convert a DataFrame to JSON format.
68
+
69
+ Examples
70
+ --------
71
+ >>> import maxframe.dataframe as md
72
+ >>> df = md.DataFrame({'col1': [1, 2],
73
+ ... 'col2': [0.5, 0.75]},
74
+ ... index=['row1', 'row2'])
75
+ >>> df.execute()
76
+ col1 col2
77
+ row1 1 0.50
78
+ row2 2 0.75
79
+ >>> df.to_dict()
80
+ {'col1': {'row1': 1, 'row2': 2}, 'col2': {'row1': 0.5, 'row2': 0.75}}
81
+
82
+ You can specify the return orientation.
83
+
84
+ >>> df.to_dict('series')
85
+ {'col1': row1 1
86
+ row2 2
87
+ Name: col1, dtype: int64,
88
+ 'col2': row1 0.50
89
+ row2 0.75
90
+ Name: col2, dtype: float64}
91
+
92
+ >>> df.to_dict('split')
93
+ {'index': ['row1', 'row2'], 'columns': ['col1', 'col2'],
94
+ 'data': [[1, 0.5], [2, 0.75]]}
95
+
96
+ >>> df.to_dict('records')
97
+ [{'col1': 1, 'col2': 0.5}, {'col1': 2, 'col2': 0.75}]
98
+
99
+ >>> df.to_dict('index')
100
+ {'row1': {'col1': 1, 'col2': 0.5}, 'row2': {'col1': 2, 'col2': 0.75}}
101
+
102
+ >>> df.to_dict('tight')
103
+ {'index': ['row1', 'row2'], 'columns': ['col1', 'col2'],
104
+ 'data': [[1, 0.5], [2, 0.75]], 'index_names': [None], 'column_names': [None]}
105
+
106
+ You can also specify the mapping type.
107
+
108
+ >>> from collections import OrderedDict, defaultdict
109
+ >>> df.to_dict(into=OrderedDict)
110
+ OrderedDict([('col1', OrderedDict([('row1', 1), ('row2', 2)])),
111
+ ('col2', OrderedDict([('row1', 0.5), ('row2', 0.75)]))])
112
+
113
+ If you want a `defaultdict`, you need to initialize it:
114
+
115
+ >>> dd = defaultdict(list)
116
+ >>> df.to_dict('records', into=dd)
117
+ [defaultdict(<class 'list'>, {'col1': 1, 'col2': 0.5}),
118
+ defaultdict(<class 'list'>, {'col1': 2, 'col2': 0.75})]
119
+ """
120
+ fetch_kwargs = dict(batch_size=batch_size)
121
+ to_dict_kw = dict(orient=orient, into=into)
122
+ if _to_dict_has_index:
123
+ to_dict_kw["index"] = index
124
+ return df.to_pandas(session=session, fetch_kwargs=fetch_kwargs).to_dict(
125
+ **to_dict_kw
126
+ )
127
+
128
+
129
+ def series_to_dict(series, into=dict, batch_size=10000, session=None):
130
+ """
131
+ Convert Series to {label -> value} dict or dict-like object.
132
+
133
+ Parameters
134
+ ----------
135
+ into : class, default dict
136
+ The collections.abc.Mapping subclass to use as the return
137
+ object. Can be the actual class or an empty
138
+ instance of the mapping type you want. If you want a
139
+ collections.defaultdict, you must pass it initialized.
140
+
141
+ Returns
142
+ -------
143
+ collections.abc.Mapping
144
+ Key-value representation of Series.
145
+
146
+ Examples
147
+ --------
148
+ >>> import maxframe.dataframe as md
149
+ >>> s = md.Series([1, 2, 3, 4])
150
+ >>> s.to_dict()
151
+ {0: 1, 1: 2, 2: 3, 3: 4}
152
+ >>> from collections import OrderedDict, defaultdict
153
+ >>> s.to_dict(OrderedDict)
154
+ OrderedDict([(0, 1), (1, 2), (2, 3), (3, 4)])
155
+ >>> dd = defaultdict(list)
156
+ >>> s.to_dict(dd)
157
+ defaultdict(<class 'list'>, {0: 1, 1: 2, 2: 3, 3: 4})
158
+ """
159
+ fetch_kwargs = dict(batch_size=batch_size)
160
+ return series.to_pandas(session=session, fetch_kwargs=fetch_kwargs).to_dict(
161
+ into=into
162
+ )
163
+
164
+
165
+ def series_to_list(series, batch_size=10000, session=None):
166
+ """
167
+ Return a list of the values.
168
+
169
+ These are each a scalar type, which is a Python scalar
170
+ (for str, int, float) or a pandas scalar
171
+ (for Timestamp/Timedelta/Interval/Period)
172
+
173
+ Returns
174
+ -------
175
+ list
176
+
177
+ See Also
178
+ --------
179
+ numpy.ndarray.tolist : Return the array as an a.ndim-levels deep
180
+ nested list of Python scalars.
181
+
182
+ Examples
183
+ --------
184
+ For Series
185
+
186
+ >>> import maxframe.dataframe as md
187
+ >>> s = md.Series([1, 2, 3])
188
+ >>> s.to_list()
189
+ [1, 2, 3]
190
+
191
+ For Index:
192
+
193
+ >>> idx = md.Index([1, 2, 3])
194
+ >>> idx.execute()
195
+ Index([1, 2, 3], dtype='int64')
196
+
197
+ >>> idx.to_list()
198
+ [1, 2, 3]
199
+ """
200
+ fetch_kwargs = dict(batch_size=batch_size)
201
+ return series.to_pandas(session=session, fetch_kwargs=fetch_kwargs).to_list()
202
+
203
+
204
+ def to_clipboard(
205
+ obj, *, excel=True, sep=None, batch_size=10000, session=None, **kwargs
206
+ ):
207
+ """
208
+ Copy object to the system clipboard.
209
+
210
+ Write a text representation of object to the system clipboard.
211
+ This can be pasted into Excel, for example.
212
+
213
+ Parameters
214
+ ----------
215
+ excel : bool, default True
216
+ Produce output in a csv format for easy pasting into excel.
217
+
218
+ - True, use the provided separator for csv pasting.
219
+ - False, write a string representation of the object to the clipboard.
220
+
221
+ sep : str, default ``'\t'``
222
+ Field delimiter.
223
+ **kwargs
224
+ These parameters will be passed to DataFrame.to_csv.
225
+
226
+ See Also
227
+ --------
228
+ DataFrame.to_csv : Write a DataFrame to a comma-separated values
229
+ (csv) file.
230
+ read_clipboard : Read text from clipboard and pass to read_csv.
231
+
232
+ Notes
233
+ -----
234
+ Requirements for your platform.
235
+
236
+ - Linux : `xclip`, or `xsel` (with `PyQt4` modules)
237
+ - Windows : none
238
+ - macOS : none
239
+
240
+ This method uses the processes developed for the package `pyperclip`. A
241
+ solution to render any output string format is given in the examples.
242
+
243
+ Examples
244
+ --------
245
+ Copy the contents of a DataFrame to the clipboard.
246
+
247
+ >>> import maxframe.dataframe as md
248
+ >>> df = md.DataFrame([[1, 2, 3], [4, 5, 6]], columns=['A', 'B', 'C'])
249
+
250
+ >>> df.to_clipboard(sep=',') # doctest: +SKIP
251
+ ... # Wrote the following to the system clipboard:
252
+ ... # ,A,B,C
253
+ ... # 0,1,2,3
254
+ ... # 1,4,5,6
255
+
256
+ We can omit the index by passing the keyword `index` and setting
257
+ it to false.
258
+
259
+ >>> df.to_clipboard(sep=',', index=False) # doctest: +SKIP
260
+ ... # Wrote the following to the system clipboard:
261
+ ... # A,B,C
262
+ ... # 1,2,3
263
+ ... # 4,5,6
264
+ """
265
+ fetch_kwargs = dict(batch_size=batch_size)
266
+ return obj.to_pandas(session=session, fetch_kwargs=fetch_kwargs).to_clipboard(
267
+ excel=excel, sep=sep, **kwargs
268
+ )