maxframe 2.0.0b2__cp38-cp38-win_amd64.whl → 2.2.0__cp38-cp38-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of maxframe might be problematic. Click here for more details.

Files changed (391) hide show
  1. maxframe/__init__.py +1 -0
  2. maxframe/_utils.cp38-win_amd64.pyd +0 -0
  3. maxframe/_utils.pyx +14 -1
  4. maxframe/codegen/core.py +6 -6
  5. maxframe/codegen/spe/core.py +1 -1
  6. maxframe/codegen/spe/dataframe/__init__.py +1 -0
  7. maxframe/codegen/spe/dataframe/accessors/base.py +18 -0
  8. maxframe/codegen/spe/dataframe/accessors/dict_.py +25 -130
  9. maxframe/codegen/spe/dataframe/accessors/list_.py +12 -48
  10. maxframe/codegen/spe/dataframe/accessors/struct_.py +28 -0
  11. maxframe/codegen/spe/dataframe/arithmetic.py +7 -2
  12. maxframe/codegen/spe/dataframe/groupby.py +88 -0
  13. maxframe/codegen/spe/dataframe/indexing.py +99 -4
  14. maxframe/codegen/spe/dataframe/merge.py +34 -1
  15. maxframe/codegen/spe/dataframe/misc.py +9 -33
  16. maxframe/codegen/spe/dataframe/reduction.py +14 -9
  17. maxframe/codegen/spe/dataframe/reshape.py +46 -0
  18. maxframe/codegen/spe/dataframe/sort.py +30 -17
  19. maxframe/codegen/spe/dataframe/tests/accessors/test_dict.py +9 -15
  20. maxframe/codegen/spe/dataframe/tests/accessors/test_list.py +4 -7
  21. maxframe/codegen/spe/dataframe/tests/accessors/test_struct.py +75 -0
  22. maxframe/codegen/spe/dataframe/tests/indexing/test_iloc.py +20 -1
  23. maxframe/codegen/spe/dataframe/tests/indexing/test_loc.py +35 -0
  24. maxframe/codegen/spe/dataframe/tests/misc/test_misc.py +0 -32
  25. maxframe/codegen/spe/dataframe/tests/test_groupby.py +81 -18
  26. maxframe/codegen/spe/dataframe/tests/test_merge.py +27 -1
  27. maxframe/codegen/spe/dataframe/tests/test_reshape.py +79 -0
  28. maxframe/codegen/spe/dataframe/tests/test_sort.py +20 -0
  29. maxframe/codegen/spe/learn/contrib/tests/test_xgboost.py +2 -1
  30. maxframe/codegen/spe/learn/metrics/__init__.py +1 -1
  31. maxframe/codegen/spe/learn/metrics/_ranking.py +76 -0
  32. maxframe/codegen/spe/learn/metrics/pairwise.py +51 -0
  33. maxframe/codegen/spe/learn/metrics/tests/test_pairwise.py +36 -0
  34. maxframe/codegen/spe/learn/metrics/tests/test_ranking.py +59 -0
  35. maxframe/codegen/spe/tensor/__init__.py +3 -0
  36. maxframe/codegen/spe/tensor/fft.py +74 -0
  37. maxframe/codegen/spe/tensor/linalg.py +29 -2
  38. maxframe/codegen/spe/tensor/misc.py +79 -25
  39. maxframe/codegen/spe/tensor/spatial.py +45 -0
  40. maxframe/codegen/spe/tensor/statistics.py +44 -0
  41. maxframe/codegen/spe/tensor/tests/test_fft.py +64 -0
  42. maxframe/codegen/spe/tensor/tests/test_linalg.py +15 -1
  43. maxframe/codegen/spe/tensor/tests/test_misc.py +52 -2
  44. maxframe/codegen/spe/tensor/tests/test_spatial.py +33 -0
  45. maxframe/codegen/spe/tensor/tests/test_statistics.py +15 -1
  46. maxframe/codegen/spe/tests/test_spe_codegen.py +6 -12
  47. maxframe/codegen/spe/utils.py +2 -0
  48. maxframe/config/config.py +70 -9
  49. maxframe/config/tests/test_validators.py +13 -1
  50. maxframe/config/validators.py +49 -0
  51. maxframe/conftest.py +44 -17
  52. maxframe/core/accessor.py +2 -2
  53. maxframe/core/entity/core.py +5 -0
  54. maxframe/core/entity/tileables.py +1 -1
  55. maxframe/core/graph/core.cp38-win_amd64.pyd +0 -0
  56. maxframe/core/graph/entity.py +1 -2
  57. maxframe/core/operator/base.py +9 -2
  58. maxframe/core/operator/core.py +10 -2
  59. maxframe/core/operator/utils.py +13 -0
  60. maxframe/dataframe/__init__.py +10 -3
  61. maxframe/dataframe/accessors/__init__.py +1 -1
  62. maxframe/dataframe/accessors/compat.py +45 -0
  63. maxframe/dataframe/accessors/datetime_/__init__.py +4 -1
  64. maxframe/dataframe/accessors/dict_/contains.py +7 -16
  65. maxframe/dataframe/accessors/dict_/core.py +48 -0
  66. maxframe/dataframe/accessors/dict_/getitem.py +17 -21
  67. maxframe/dataframe/accessors/dict_/length.py +7 -16
  68. maxframe/dataframe/accessors/dict_/remove.py +6 -18
  69. maxframe/dataframe/accessors/dict_/setitem.py +8 -18
  70. maxframe/dataframe/accessors/dict_/tests/test_dict_accessor.py +62 -22
  71. maxframe/dataframe/accessors/list_/__init__.py +2 -2
  72. maxframe/dataframe/accessors/list_/core.py +48 -0
  73. maxframe/dataframe/accessors/list_/getitem.py +12 -19
  74. maxframe/dataframe/accessors/list_/length.py +7 -16
  75. maxframe/dataframe/accessors/list_/tests/test_list_accessor.py +11 -9
  76. maxframe/dataframe/accessors/string_/__init__.py +4 -1
  77. maxframe/dataframe/accessors/struct_/__init__.py +37 -0
  78. maxframe/dataframe/accessors/struct_/accessor.py +39 -0
  79. maxframe/dataframe/accessors/struct_/core.py +43 -0
  80. maxframe/dataframe/accessors/struct_/dtypes.py +53 -0
  81. maxframe/dataframe/accessors/struct_/field.py +123 -0
  82. maxframe/dataframe/accessors/struct_/tests/__init__.py +13 -0
  83. maxframe/dataframe/accessors/struct_/tests/test_struct_accessor.py +91 -0
  84. maxframe/dataframe/arithmetic/__init__.py +14 -4
  85. maxframe/dataframe/arithmetic/between.py +106 -0
  86. maxframe/dataframe/arithmetic/dot.py +237 -0
  87. maxframe/dataframe/arithmetic/{around.py → round.py} +11 -7
  88. maxframe/dataframe/core.py +63 -118
  89. maxframe/dataframe/datasource/__init__.py +18 -0
  90. maxframe/dataframe/datasource/from_dict.py +124 -0
  91. maxframe/dataframe/datasource/from_index.py +1 -1
  92. maxframe/dataframe/datasource/from_records.py +77 -0
  93. maxframe/dataframe/datasource/from_tensor.py +109 -41
  94. maxframe/dataframe/datasource/read_csv.py +2 -3
  95. maxframe/dataframe/datasource/tests/test_datasource.py +37 -0
  96. maxframe/dataframe/datastore/__init__.py +5 -1
  97. maxframe/dataframe/datastore/to_csv.py +29 -41
  98. maxframe/dataframe/datastore/to_odps.py +30 -4
  99. maxframe/dataframe/extensions/__init__.py +20 -4
  100. maxframe/dataframe/extensions/apply_chunk.py +32 -6
  101. maxframe/dataframe/extensions/cartesian_chunk.py +153 -0
  102. maxframe/dataframe/extensions/collect_kv.py +126 -0
  103. maxframe/dataframe/extensions/extract_kv.py +177 -0
  104. maxframe/dataframe/extensions/map_reduce.py +263 -0
  105. maxframe/dataframe/extensions/rebalance.py +62 -0
  106. maxframe/dataframe/extensions/tests/test_apply_chunk.py +9 -2
  107. maxframe/dataframe/extensions/tests/test_extensions.py +54 -0
  108. maxframe/dataframe/extensions/tests/test_map_reduce.py +135 -0
  109. maxframe/dataframe/groupby/__init__.py +12 -1
  110. maxframe/dataframe/groupby/aggregation.py +78 -45
  111. maxframe/dataframe/groupby/apply.py +1 -1
  112. maxframe/dataframe/groupby/apply_chunk.py +18 -2
  113. maxframe/dataframe/groupby/core.py +96 -12
  114. maxframe/dataframe/groupby/cum.py +4 -25
  115. maxframe/dataframe/groupby/expanding.py +264 -0
  116. maxframe/dataframe/groupby/fill.py +1 -1
  117. maxframe/dataframe/groupby/getitem.py +12 -5
  118. maxframe/dataframe/groupby/head.py +11 -1
  119. maxframe/dataframe/groupby/rank.py +136 -0
  120. maxframe/dataframe/groupby/rolling.py +206 -0
  121. maxframe/dataframe/groupby/shift.py +114 -0
  122. maxframe/dataframe/groupby/tests/test_groupby.py +0 -5
  123. maxframe/dataframe/indexing/__init__.py +20 -1
  124. maxframe/dataframe/indexing/droplevel.py +195 -0
  125. maxframe/dataframe/indexing/filter.py +169 -0
  126. maxframe/dataframe/indexing/get_level_values.py +76 -0
  127. maxframe/dataframe/indexing/iat.py +45 -0
  128. maxframe/dataframe/indexing/iloc.py +152 -12
  129. maxframe/dataframe/indexing/insert.py +1 -1
  130. maxframe/dataframe/indexing/loc.py +287 -7
  131. maxframe/dataframe/indexing/reindex.py +14 -5
  132. maxframe/dataframe/indexing/rename.py +6 -0
  133. maxframe/dataframe/indexing/rename_axis.py +2 -2
  134. maxframe/dataframe/indexing/reorder_levels.py +143 -0
  135. maxframe/dataframe/indexing/reset_index.py +33 -6
  136. maxframe/dataframe/indexing/sample.py +8 -0
  137. maxframe/dataframe/indexing/setitem.py +3 -3
  138. maxframe/dataframe/indexing/swaplevel.py +185 -0
  139. maxframe/dataframe/indexing/take.py +99 -0
  140. maxframe/dataframe/indexing/truncate.py +140 -0
  141. maxframe/dataframe/indexing/where.py +0 -11
  142. maxframe/dataframe/indexing/xs.py +148 -0
  143. maxframe/dataframe/merge/__init__.py +12 -1
  144. maxframe/dataframe/merge/append.py +97 -98
  145. maxframe/dataframe/merge/combine_first.py +120 -0
  146. maxframe/dataframe/merge/compare.py +387 -0
  147. maxframe/dataframe/merge/concat.py +183 -0
  148. maxframe/dataframe/merge/update.py +271 -0
  149. maxframe/dataframe/misc/__init__.py +16 -10
  150. maxframe/dataframe/misc/_duplicate.py +10 -4
  151. maxframe/dataframe/misc/apply.py +1 -1
  152. maxframe/dataframe/misc/check_unique.py +51 -0
  153. maxframe/dataframe/misc/clip.py +145 -0
  154. maxframe/dataframe/misc/describe.py +175 -9
  155. maxframe/dataframe/misc/drop_duplicates.py +2 -2
  156. maxframe/dataframe/misc/duplicated.py +2 -2
  157. maxframe/dataframe/misc/get_dummies.py +5 -1
  158. maxframe/dataframe/misc/isin.py +2 -2
  159. maxframe/dataframe/misc/map.py +94 -0
  160. maxframe/dataframe/misc/tests/test_misc.py +13 -2
  161. maxframe/dataframe/misc/to_numeric.py +3 -0
  162. maxframe/dataframe/misc/transform.py +12 -5
  163. maxframe/dataframe/misc/transpose.py +13 -1
  164. maxframe/dataframe/misc/valid_index.py +115 -0
  165. maxframe/dataframe/misc/value_counts.py +38 -4
  166. maxframe/dataframe/missing/checkna.py +13 -6
  167. maxframe/dataframe/missing/dropna.py +5 -0
  168. maxframe/dataframe/missing/fillna.py +1 -1
  169. maxframe/dataframe/missing/replace.py +7 -4
  170. maxframe/dataframe/reduction/__init__.py +29 -15
  171. maxframe/dataframe/reduction/aggregation.py +38 -9
  172. maxframe/dataframe/reduction/all.py +2 -2
  173. maxframe/dataframe/reduction/any.py +2 -2
  174. maxframe/dataframe/reduction/argmax.py +100 -0
  175. maxframe/dataframe/reduction/argmin.py +100 -0
  176. maxframe/dataframe/reduction/core.py +65 -18
  177. maxframe/dataframe/reduction/count.py +13 -9
  178. maxframe/dataframe/reduction/cov.py +166 -0
  179. maxframe/dataframe/reduction/cummax.py +2 -2
  180. maxframe/dataframe/reduction/cummin.py +2 -2
  181. maxframe/dataframe/reduction/cumprod.py +2 -2
  182. maxframe/dataframe/reduction/cumsum.py +2 -2
  183. maxframe/dataframe/reduction/custom_reduction.py +2 -2
  184. maxframe/dataframe/reduction/idxmax.py +185 -0
  185. maxframe/dataframe/reduction/idxmin.py +185 -0
  186. maxframe/dataframe/reduction/kurtosis.py +37 -30
  187. maxframe/dataframe/reduction/max.py +2 -2
  188. maxframe/dataframe/reduction/mean.py +9 -7
  189. maxframe/dataframe/reduction/median.py +2 -2
  190. maxframe/dataframe/reduction/min.py +2 -2
  191. maxframe/dataframe/reduction/nunique.py +9 -8
  192. maxframe/dataframe/reduction/prod.py +18 -13
  193. maxframe/dataframe/reduction/reduction_size.py +2 -2
  194. maxframe/dataframe/reduction/sem.py +13 -9
  195. maxframe/dataframe/reduction/skew.py +31 -27
  196. maxframe/dataframe/reduction/str_concat.py +10 -7
  197. maxframe/dataframe/reduction/sum.py +18 -14
  198. maxframe/dataframe/reduction/unique.py +20 -3
  199. maxframe/dataframe/reduction/var.py +16 -12
  200. maxframe/dataframe/reshape/__init__.py +38 -0
  201. maxframe/dataframe/{misc → reshape}/pivot.py +1 -0
  202. maxframe/dataframe/{misc → reshape}/pivot_table.py +1 -0
  203. maxframe/dataframe/reshape/unstack.py +114 -0
  204. maxframe/dataframe/sort/__init__.py +8 -0
  205. maxframe/dataframe/sort/argsort.py +62 -0
  206. maxframe/dataframe/sort/core.py +1 -0
  207. maxframe/dataframe/sort/nlargest.py +238 -0
  208. maxframe/dataframe/sort/nsmallest.py +228 -0
  209. maxframe/dataframe/statistics/__init__.py +3 -3
  210. maxframe/dataframe/statistics/corr.py +1 -0
  211. maxframe/dataframe/statistics/quantile.py +2 -2
  212. maxframe/dataframe/tests/test_typing.py +104 -0
  213. maxframe/dataframe/tests/test_utils.py +66 -2
  214. maxframe/dataframe/typing_.py +185 -0
  215. maxframe/dataframe/utils.py +95 -26
  216. maxframe/dataframe/window/aggregation.py +8 -4
  217. maxframe/dataframe/window/core.py +14 -1
  218. maxframe/dataframe/window/ewm.py +1 -3
  219. maxframe/dataframe/window/expanding.py +37 -35
  220. maxframe/dataframe/window/rolling.py +49 -39
  221. maxframe/dataframe/window/tests/test_expanding.py +1 -7
  222. maxframe/dataframe/window/tests/test_rolling.py +1 -1
  223. maxframe/env.py +7 -4
  224. maxframe/errors.py +2 -2
  225. maxframe/io/odpsio/schema.py +9 -3
  226. maxframe/io/odpsio/tableio.py +7 -2
  227. maxframe/io/odpsio/tests/test_schema.py +198 -83
  228. maxframe/learn/__init__.py +10 -2
  229. maxframe/learn/cluster/__init__.py +15 -0
  230. maxframe/learn/cluster/_kmeans.py +782 -0
  231. maxframe/learn/contrib/llm/core.py +2 -0
  232. maxframe/learn/contrib/xgboost/core.py +86 -1
  233. maxframe/learn/contrib/xgboost/train.py +5 -2
  234. maxframe/learn/core.py +66 -0
  235. maxframe/learn/linear_model/_base.py +58 -1
  236. maxframe/learn/linear_model/_lin_reg.py +1 -1
  237. maxframe/learn/metrics/__init__.py +6 -0
  238. maxframe/learn/metrics/_classification.py +145 -0
  239. maxframe/learn/metrics/_ranking.py +477 -0
  240. maxframe/learn/metrics/_scorer.py +60 -0
  241. maxframe/learn/metrics/pairwise/__init__.py +21 -0
  242. maxframe/learn/metrics/pairwise/core.py +77 -0
  243. maxframe/learn/metrics/pairwise/cosine.py +115 -0
  244. maxframe/learn/metrics/pairwise/euclidean.py +176 -0
  245. maxframe/learn/metrics/pairwise/haversine.py +96 -0
  246. maxframe/learn/metrics/pairwise/manhattan.py +80 -0
  247. maxframe/learn/metrics/pairwise/pairwise.py +127 -0
  248. maxframe/learn/metrics/pairwise/pairwise_distances_topk.py +121 -0
  249. maxframe/learn/metrics/pairwise/rbf_kernel.py +51 -0
  250. maxframe/learn/metrics/tests/__init__.py +13 -0
  251. maxframe/learn/metrics/tests/test_scorer.py +26 -0
  252. maxframe/learn/utils/__init__.py +1 -1
  253. maxframe/learn/utils/checks.py +1 -2
  254. maxframe/learn/utils/core.py +59 -0
  255. maxframe/learn/utils/extmath.py +37 -0
  256. maxframe/learn/utils/odpsio.py +193 -0
  257. maxframe/learn/utils/validation.py +2 -2
  258. maxframe/lib/compat.py +40 -0
  259. maxframe/lib/dtypes_extension/__init__.py +16 -1
  260. maxframe/lib/dtypes_extension/_fake_arrow_dtype.py +604 -0
  261. maxframe/lib/dtypes_extension/blob.py +304 -0
  262. maxframe/lib/dtypes_extension/dtypes.py +40 -0
  263. maxframe/lib/dtypes_extension/tests/test_blob.py +88 -0
  264. maxframe/lib/dtypes_extension/tests/test_dtypes.py +16 -1
  265. maxframe/lib/dtypes_extension/tests/test_fake_arrow_dtype.py +75 -0
  266. maxframe/lib/filesystem/_oss_lib/common.py +122 -50
  267. maxframe/lib/filesystem/_oss_lib/glob.py +1 -1
  268. maxframe/lib/filesystem/_oss_lib/handle.py +21 -25
  269. maxframe/lib/filesystem/base.py +1 -1
  270. maxframe/lib/filesystem/core.py +1 -1
  271. maxframe/lib/filesystem/oss.py +115 -46
  272. maxframe/lib/filesystem/tests/test_oss.py +74 -36
  273. maxframe/lib/mmh3.cp38-win_amd64.pyd +0 -0
  274. maxframe/lib/wrapped_pickle.py +10 -0
  275. maxframe/opcodes.py +33 -15
  276. maxframe/protocol.py +12 -0
  277. maxframe/serialization/__init__.py +11 -2
  278. maxframe/serialization/arrow.py +38 -13
  279. maxframe/serialization/blob.py +32 -0
  280. maxframe/serialization/core.cp38-win_amd64.pyd +0 -0
  281. maxframe/serialization/core.pyx +39 -1
  282. maxframe/serialization/exception.py +2 -4
  283. maxframe/serialization/numpy.py +11 -0
  284. maxframe/serialization/pandas.py +46 -9
  285. maxframe/serialization/serializables/core.py +2 -2
  286. maxframe/serialization/tests/test_serial.py +29 -2
  287. maxframe/tensor/__init__.py +38 -8
  288. maxframe/tensor/arithmetic/__init__.py +19 -10
  289. maxframe/tensor/arithmetic/iscomplexobj.py +53 -0
  290. maxframe/tensor/arithmetic/tests/test_arithmetic.py +6 -0
  291. maxframe/tensor/core.py +3 -2
  292. maxframe/tensor/datasource/tests/test_datasource.py +2 -1
  293. maxframe/tensor/extensions/__init__.py +2 -0
  294. maxframe/tensor/extensions/apply_chunk.py +3 -3
  295. maxframe/tensor/extensions/rebalance.py +65 -0
  296. maxframe/tensor/fft/__init__.py +32 -0
  297. maxframe/tensor/fft/core.py +168 -0
  298. maxframe/tensor/fft/fft.py +112 -0
  299. maxframe/tensor/fft/fft2.py +118 -0
  300. maxframe/tensor/fft/fftfreq.py +80 -0
  301. maxframe/tensor/fft/fftn.py +123 -0
  302. maxframe/tensor/fft/fftshift.py +79 -0
  303. maxframe/tensor/fft/hfft.py +112 -0
  304. maxframe/tensor/fft/ifft.py +114 -0
  305. maxframe/tensor/fft/ifft2.py +115 -0
  306. maxframe/tensor/fft/ifftn.py +123 -0
  307. maxframe/tensor/fft/ifftshift.py +73 -0
  308. maxframe/tensor/fft/ihfft.py +93 -0
  309. maxframe/tensor/fft/irfft.py +118 -0
  310. maxframe/tensor/fft/irfft2.py +62 -0
  311. maxframe/tensor/fft/irfftn.py +114 -0
  312. maxframe/tensor/fft/rfft.py +116 -0
  313. maxframe/tensor/fft/rfft2.py +63 -0
  314. maxframe/tensor/fft/rfftfreq.py +87 -0
  315. maxframe/tensor/fft/rfftn.py +113 -0
  316. maxframe/tensor/indexing/fill_diagonal.py +1 -7
  317. maxframe/tensor/linalg/__init__.py +7 -0
  318. maxframe/tensor/linalg/_einsumfunc.py +1025 -0
  319. maxframe/tensor/linalg/cholesky.py +117 -0
  320. maxframe/tensor/linalg/einsum.py +339 -0
  321. maxframe/tensor/linalg/lstsq.py +100 -0
  322. maxframe/tensor/linalg/matrix_norm.py +75 -0
  323. maxframe/tensor/linalg/norm.py +249 -0
  324. maxframe/tensor/linalg/solve.py +72 -0
  325. maxframe/tensor/linalg/solve_triangular.py +2 -2
  326. maxframe/tensor/linalg/vector_norm.py +113 -0
  327. maxframe/tensor/misc/__init__.py +24 -1
  328. maxframe/tensor/misc/argwhere.py +72 -0
  329. maxframe/tensor/misc/array_split.py +46 -0
  330. maxframe/tensor/misc/broadcast_arrays.py +57 -0
  331. maxframe/tensor/misc/copyto.py +130 -0
  332. maxframe/tensor/misc/delete.py +104 -0
  333. maxframe/tensor/misc/dsplit.py +68 -0
  334. maxframe/tensor/misc/ediff1d.py +74 -0
  335. maxframe/tensor/misc/expand_dims.py +85 -0
  336. maxframe/tensor/misc/flip.py +90 -0
  337. maxframe/tensor/misc/fliplr.py +64 -0
  338. maxframe/tensor/misc/flipud.py +68 -0
  339. maxframe/tensor/misc/hsplit.py +85 -0
  340. maxframe/tensor/misc/insert.py +139 -0
  341. maxframe/tensor/misc/moveaxis.py +83 -0
  342. maxframe/tensor/misc/result_type.py +88 -0
  343. maxframe/tensor/misc/roll.py +124 -0
  344. maxframe/tensor/misc/rollaxis.py +77 -0
  345. maxframe/tensor/misc/shape.py +89 -0
  346. maxframe/tensor/misc/split.py +190 -0
  347. maxframe/tensor/misc/tile.py +109 -0
  348. maxframe/tensor/misc/vsplit.py +74 -0
  349. maxframe/tensor/reduction/array_equal.py +2 -1
  350. maxframe/tensor/sort/__init__.py +2 -0
  351. maxframe/tensor/sort/argpartition.py +98 -0
  352. maxframe/tensor/sort/partition.py +228 -0
  353. maxframe/tensor/spatial/__init__.py +15 -0
  354. maxframe/tensor/spatial/distance/__init__.py +17 -0
  355. maxframe/tensor/spatial/distance/cdist.py +421 -0
  356. maxframe/tensor/spatial/distance/pdist.py +398 -0
  357. maxframe/tensor/spatial/distance/squareform.py +153 -0
  358. maxframe/tensor/special/__init__.py +159 -21
  359. maxframe/tensor/special/airy.py +55 -0
  360. maxframe/tensor/special/bessel.py +199 -0
  361. maxframe/tensor/special/core.py +65 -4
  362. maxframe/tensor/special/ellip_func_integrals.py +155 -0
  363. maxframe/tensor/special/ellip_harm.py +55 -0
  364. maxframe/tensor/special/err_fresnel.py +223 -0
  365. maxframe/tensor/special/gamma_funcs.py +303 -0
  366. maxframe/tensor/special/hypergeometric_funcs.py +69 -0
  367. maxframe/tensor/special/info_theory.py +189 -0
  368. maxframe/tensor/special/misc.py +21 -0
  369. maxframe/tensor/statistics/__init__.py +6 -0
  370. maxframe/tensor/statistics/corrcoef.py +77 -0
  371. maxframe/tensor/statistics/cov.py +222 -0
  372. maxframe/tensor/statistics/digitize.py +126 -0
  373. maxframe/tensor/statistics/histogram.py +520 -0
  374. maxframe/tensor/statistics/median.py +85 -0
  375. maxframe/tensor/statistics/ptp.py +89 -0
  376. maxframe/tensor/utils.py +3 -3
  377. maxframe/tests/test_utils.py +43 -1
  378. maxframe/tests/utils.py +0 -2
  379. maxframe/typing_.py +2 -0
  380. maxframe/udf.py +27 -2
  381. maxframe/utils.py +193 -19
  382. {maxframe-2.0.0b2.dist-info → maxframe-2.2.0.dist-info}/METADATA +3 -2
  383. {maxframe-2.0.0b2.dist-info → maxframe-2.2.0.dist-info}/RECORD +391 -236
  384. maxframe_client/fetcher.py +35 -4
  385. maxframe_client/session/odps.py +7 -2
  386. maxframe_client/tests/test_fetcher.py +76 -3
  387. maxframe_client/tests/test_session.py +4 -1
  388. /maxframe/dataframe/{misc → reshape}/melt.py +0 -0
  389. /maxframe/dataframe/{misc → reshape}/stack.py +0 -0
  390. {maxframe-2.0.0b2.dist-info → maxframe-2.2.0.dist-info}/WHEEL +0 -0
  391. {maxframe-2.0.0b2.dist-info → maxframe-2.2.0.dist-info}/top_level.txt +0 -0
@@ -51,7 +51,7 @@ class SeriesFromIndex(DataFrameOperator, DataFrameOperatorMixin):
51
51
 
52
52
 
53
53
  def series_from_index(ind, index=None, name=None):
54
- name = name or ind.name or 0
54
+ name = name or ind.name
55
55
  if index is not None:
56
56
  index = Index(index)
57
57
  op = SeriesFromIndex(input_=ind, index=index, name=name)
@@ -77,6 +77,83 @@ def from_records(
77
77
  sparse=False,
78
78
  **kw
79
79
  ):
80
+ """
81
+ Convert structured or record ndarray to DataFrame.
82
+
83
+ Creates a DataFrame object from a structured ndarray, sequence of
84
+ tuples or dicts, or DataFrame.
85
+
86
+ Parameters
87
+ ----------
88
+ data : structured ndarray, sequence of tuples or dicts, or DataFrame
89
+ Structured input data.
90
+
91
+ .. deprecated:: 2.1.0
92
+ Passing a DataFrame is deprecated.
93
+ index : str, list of fields, array-like
94
+ Field of array to use as the index, alternately a specific set of
95
+ input labels to use.
96
+ exclude : sequence, default None
97
+ Columns or fields to exclude.
98
+ columns : sequence, default None
99
+ Column names to use. If the passed data do not have names
100
+ associated with them, this argument provides names for the
101
+ columns. Otherwise this argument indicates the order of the columns
102
+ in the result (any names not found in the data will become all-NA
103
+ columns).
104
+ coerce_float : bool, default False
105
+ Attempt to convert values of non-string, non-numeric objects (like
106
+ decimal.Decimal) to floating point, useful for SQL result sets.
107
+ nrows : int, default None
108
+ Number of rows to read if data is an iterator.
109
+
110
+ Returns
111
+ -------
112
+ DataFrame
113
+
114
+ See Also
115
+ --------
116
+ DataFrame.from_dict : DataFrame from dict of array-like or dicts.
117
+ DataFrame : DataFrame object creation using constructor.
118
+
119
+ Examples
120
+ --------
121
+ Data can be provided as a structured ndarray:
122
+
123
+ >>> import maxframe.tensor as mt
124
+ >>> import maxframe.dataframe as md
125
+ >>> data = mt.array([(3, 'a'), (2, 'b'), (1, 'c'), (0, 'd')],
126
+ ... dtype=[('col_1', 'i4'), ('col_2', 'U1')])
127
+ >>> md.DataFrame.from_records(data).execute()
128
+ col_1 col_2
129
+ 0 3 a
130
+ 1 2 b
131
+ 2 1 c
132
+ 3 0 d
133
+
134
+ Data can be provided as a list of dicts:
135
+
136
+ >>> data = [{'col_1': 3, 'col_2': 'a'},
137
+ ... {'col_1': 2, 'col_2': 'b'},
138
+ ... {'col_1': 1, 'col_2': 'c'},
139
+ ... {'col_1': 0, 'col_2': 'd'}]
140
+ >>> md.DataFrame.from_records(data).execute()
141
+ col_1 col_2
142
+ 0 3 a
143
+ 1 2 b
144
+ 2 1 c
145
+ 3 0 d
146
+
147
+ Data can be provided as a list of tuples with corresponding columns:
148
+
149
+ >>> data = [(3, 'a'), (2, 'b'), (1, 'c'), (0, 'd')]
150
+ >>> md.DataFrame.from_records(data, columns=['col_1', 'col_2']).execute()
151
+ col_1 col_2
152
+ 0 3 a
153
+ 1 2 b
154
+ 2 1 c
155
+ 3 0 d
156
+ """
80
157
  if isinstance(data, np.ndarray):
81
158
  from .dataframe import from_pandas
82
159
 
@@ -39,6 +39,7 @@ class DataFrameFromTensor(DataFrameOperator, DataFrameOperatorMixin):
39
39
  input = AnyField("input")
40
40
  index = AnyField("index")
41
41
  columns = AnyField("columns")
42
+ axis = AnyField("axis")
42
43
 
43
44
  def __init__(self, *args, **kwargs):
44
45
  kwargs["_output_types"] = [OutputType.dataframe]
@@ -120,46 +121,82 @@ class DataFrameFromTensor(DataFrameOperator, DataFrameOperatorMixin):
120
121
  if isinstance(tileable, ENTITY_TYPE):
121
122
  tileables.append(tileable)
122
123
 
123
- if index is not None:
124
- tileable_size = tileables[0].shape[0]
125
- if hasattr(index, "shape"):
126
- index_size = index.shape[0]
124
+ if self.axis == 0:
125
+ if index is not None:
126
+ raise NotImplementedError("Cannot accept index when axis=0")
127
127
  else:
128
- index_size = len(index)
129
- if (
130
- not pd.isna(tileable_size)
131
- and not pd.isna(index_size)
132
- and tileable_size != index_size
133
- ):
134
- raise ValueError(
135
- f"index {index} should have the same shape "
136
- f"with tensor: {tileable_size}"
137
- )
138
- index_value = self._process_index(index, tileables)
128
+ index = pd.Index(list(input_1d_tileables.keys()))
129
+ index_value = parse_index(index, store_data=True)
130
+ self.index = index
131
+
132
+ if columns is not None:
133
+ tileable_size = tileables[0].shape[0] if tileables else 0
134
+ if not isinstance(columns, pd.Index):
135
+ columns = self.columns = pd.Index(columns)
136
+ column_size = columns.shape[0]
137
+ if (
138
+ not pd.isna(tileable_size)
139
+ and not pd.isna(column_size)
140
+ and tileable_size != column_size
141
+ ):
142
+ raise ValueError(
143
+ f"columns {columns} should have the same shape "
144
+ f"with tensor: {tileable_size}"
145
+ )
146
+ columns_value = self._process_index(columns, tileables)
147
+ else:
148
+ if not tileables or np.isnan(tileables[0].shape[0]):
149
+ columns = columns_value = None
150
+ else:
151
+ columns = pd.RangeIndex(0, tileables[0].shape[0])
152
+ columns_value = parse_index(columns, store_data=True)
153
+ self.columns = columns
154
+
155
+ shape = (len(input_1d_tileables), shape[0] if shape else 0)
139
156
  else:
140
- if np.isnan(tileables[0].shape[0]):
141
- index = pd.RangeIndex(0)
157
+ if index is not None:
158
+ tileable_size = tileables[0].shape[0] if tileables else 0
159
+ if hasattr(index, "shape"):
160
+ index_size = index.shape[0]
161
+ else:
162
+ index_size = len(index)
163
+ if (
164
+ not pd.isna(tileable_size)
165
+ and not pd.isna(index_size)
166
+ and tileable_size != index_size
167
+ ):
168
+ raise ValueError(
169
+ f"index {index} should have the same shape "
170
+ f"with tensor: {tileable_size}"
171
+ )
172
+ index_value = self._process_index(index, tileables)
142
173
  else:
143
- index = pd.RangeIndex(0, tileables[0].shape[0])
144
- self.index = index
145
- index_value = parse_index(index)
174
+ if not tileables or np.isnan(tileables[0].shape[0]):
175
+ index = pd.RangeIndex(0)
176
+ else:
177
+ index = pd.RangeIndex(0, tileables[0].shape[0])
178
+ self.index = index
179
+ index_value = parse_index(index)
146
180
 
147
- if columns is not None:
148
- if len(input_1d_tileables) != len(columns):
149
- raise ValueError(
150
- f"columns {columns} should have size {len(input_1d_tileables)}"
181
+ if columns is not None:
182
+ if len(input_1d_tileables) != len(columns):
183
+ raise ValueError(
184
+ f"columns {columns} should have size {len(input_1d_tileables)}"
185
+ )
186
+ if not isinstance(columns, pd.Index):
187
+ if isinstance(columns, ENTITY_TYPE):
188
+ raise NotImplementedError(
189
+ "The columns value cannot be a tileable"
190
+ )
191
+ columns = pd.Index(columns)
192
+ columns_value = parse_index(columns, store_data=True)
193
+ else:
194
+ columns_value = parse_index(
195
+ pd.RangeIndex(0, len(input_1d_tileables)), store_data=True
151
196
  )
152
- if not isinstance(columns, pd.Index):
153
- if isinstance(columns, ENTITY_TYPE):
154
- raise NotImplementedError("The columns value cannot be a tileable")
155
- columns = pd.Index(columns)
156
- columns_value = parse_index(columns, store_data=True)
157
- else:
158
- columns_value = parse_index(
159
- pd.RangeIndex(0, len(input_1d_tileables)), store_data=True
160
- )
161
197
 
162
- shape = (shape[0], len(input_1d_tileables))
198
+ shape = (shape[0] if shape else 0, len(input_1d_tileables))
199
+
163
200
  return self.new_dataframe(
164
201
  tileables,
165
202
  shape,
@@ -278,6 +315,9 @@ def dataframe_from_tensor(
278
315
  gpu: bool = None,
279
316
  sparse: bool = False,
280
317
  ):
318
+ if isinstance(columns, list) and columns and isinstance(columns[0], tuple):
319
+ columns = pd.MultiIndex.from_tuples(columns)
320
+
281
321
  if tensor is not None:
282
322
  if tensor.ndim > 2 or tensor.ndim <= 0:
283
323
  raise TypeError(
@@ -299,6 +339,8 @@ def dataframe_from_tensor(
299
339
  dtypes = pd.Series([], index=pd.Index([], dtype=object))
300
340
  if index is not None and not isinstance(index, ENTITY_TYPE):
301
341
  index = pd.Index(index)
342
+ if isinstance(index[0], tuple):
343
+ index = pd.MultiIndex.from_tuples(index)
302
344
  op = DataFrameFromTensor(
303
345
  input=tensor, index=index, columns=columns, gpu=gpu, sparse=sparse
304
346
  )
@@ -311,7 +353,10 @@ def dataframe_from_1d_tileables(
311
353
  columns: Union[pd.Index, list] = None,
312
354
  gpu: bool = None,
313
355
  sparse: bool = False,
356
+ axis: int = 1,
314
357
  ):
358
+ from pandas.core.dtypes.cast import find_common_type
359
+
315
360
  data = dict()
316
361
  for k, v in d.items():
317
362
  if isinstance(v, (list, tuple)) and any(
@@ -322,9 +367,9 @@ def dataframe_from_1d_tileables(
322
367
  data[k] = v
323
368
  d = data
324
369
  if columns is not None:
325
- tileables = [d.get(c) for c in columns]
370
+ tileables = [d.get(c) for c in columns] if axis == 1 else list(d.values())
326
371
  else:
327
- columns = list(d.keys())
372
+ columns = list(d.keys()) if axis == 1 else None
328
373
  tileables = list(d.values())
329
374
 
330
375
  gpu = (
@@ -332,14 +377,37 @@ def dataframe_from_1d_tileables(
332
377
  if gpu is None
333
378
  else gpu
334
379
  )
335
- dtypes = pd.Series(
336
- [t.dtype if hasattr(t, "dtype") else pd.Series(t).dtype for t in tileables],
337
- index=columns,
338
- )
380
+
381
+ if axis == 0:
382
+ col_num = (
383
+ tileables[0].shape[0]
384
+ if hasattr(tileables[0], "shape")
385
+ else len(tileables[0])
386
+ )
387
+ if pd.isna(col_num):
388
+ dtypes = None
389
+ else:
390
+ common_dtype = find_common_type(
391
+ [
392
+ t.dtype if hasattr(t, "dtype") else pd.Series(t).dtype
393
+ for t in tileables
394
+ ]
395
+ )
396
+ dtypes = pd.Series(
397
+ [common_dtype] * col_num,
398
+ index=columns if columns is not None else pd.RangeIndex(col_num),
399
+ )
400
+ else:
401
+ dtypes = pd.Series(
402
+ [t.dtype if hasattr(t, "dtype") else pd.Series(t).dtype for t in tileables],
403
+ index=columns,
404
+ )
405
+
339
406
  if index is not None and not isinstance(index, ENTITY_TYPE):
340
407
  index = pd.Index(index)
408
+
341
409
  op = DataFrameFromTensor(
342
- input=d, index=index, columns=columns, gpu=gpu, sparse=sparse
410
+ input=d, index=index, columns=columns, gpu=gpu, sparse=sparse, axis=axis
343
411
  )
344
412
  return op(d, index, columns, dtypes)
345
413
 
@@ -441,13 +441,12 @@ def read_csv(
441
441
  Examples
442
442
  --------
443
443
  >>> import maxframe.dataframe as md
444
- >>> from maxframe.lib.filesystem.oss import build_oss_path
445
444
  >>> md.read_csv('data.csv') # doctest: +SKIP
446
445
  >>> # read from HDFS
447
446
  >>> md.read_csv('hdfs://localhost:8020/test.csv') # doctest: +SKIP
448
447
  >>> # read from OSS
449
- >>> auth_path = build_oss_path(file_path, access_key_id, access_key_secret, end_point)
450
- >>> md.read_csv(auth_path)
448
+ >>> md.read_csv('oss://oss-cn-hangzhou-internal.aliyuncs.com/bucket/test.csv',
449
+ >>> storage_options={'role_arn': 'acs:ram::xxxxxx:role/aliyunodpsdefaultrole'})
451
450
  """
452
451
  # infer dtypes and columns
453
452
  if isinstance(path, (list, tuple)):
@@ -184,6 +184,23 @@ def test_from_tensor():
184
184
  df = dataframe_from_1d_tileables(d)
185
185
  pd.testing.assert_index_equal(df.columns_value.to_pandas(), pd.RangeIndex(2))
186
186
 
187
+ # test axis parameter for dataframe_from_1d_tileables
188
+ d = OrderedDict(
189
+ [("a", mt.tensor(np.random.rand(4))), ("b", mt.tensor(np.random.rand(4)))]
190
+ )
191
+
192
+ # axis=1 (default behavior) - keys become columns
193
+ df = dataframe_from_1d_tileables(d, axis=1)
194
+ assert df.shape == (4, 2)
195
+ pd.testing.assert_index_equal(df.columns_value.to_pandas(), pd.Index(["a", "b"]))
196
+ pd.testing.assert_index_equal(df.index_value.to_pandas(), pd.RangeIndex(4))
197
+
198
+ # axis=0 - keys become index (rows)
199
+ df = dataframe_from_1d_tileables(d, axis=0)
200
+ assert df.shape == (2, 4)
201
+ pd.testing.assert_index_equal(df.index_value.to_pandas(), pd.Index(["a", "b"]))
202
+ pd.testing.assert_index_equal(df.columns_value.to_pandas(), pd.RangeIndex(4))
203
+
187
204
  series = series_from_tensor(mt.random.rand(4))
188
205
  pd.testing.assert_index_equal(series.index_value.to_pandas(), pd.RangeIndex(4))
189
206
 
@@ -207,6 +224,26 @@ def test_from_tensor():
207
224
  with pytest.raises(ValueError):
208
225
  dataframe_from_tensor(mt.random.rand(4, 3), columns=["a", "b"])
209
226
 
227
+ # 1-d tensors should have same shape
228
+ with pytest.raises(ValueError):
229
+ dataframe_from_1d_tileables(
230
+ OrderedDict(
231
+ [(0, mt.tensor(np.random.rand(3))), (1, mt.tensor(np.random.rand(2)))]
232
+ )
233
+ )
234
+
235
+ # index has wrong shape
236
+ with pytest.raises(ValueError):
237
+ dataframe_from_1d_tileables(
238
+ {0: mt.tensor(np.random.rand(3))}, index=mt.tensor(np.random.rand(2))
239
+ )
240
+
241
+ # columns have wrong shape
242
+ with pytest.raises(ValueError):
243
+ dataframe_from_1d_tileables(
244
+ {0: mt.tensor(np.random.rand(3))}, columns=["a", "b"]
245
+ )
246
+
210
247
  # index should be 1-d
211
248
  with pytest.raises(ValueError):
212
249
  dataframe_from_tensor(
@@ -12,14 +12,18 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
+ from .to_csv import to_csv
15
16
  from .to_odps import to_odps_table
16
17
 
17
18
 
18
19
  def _install():
19
- from ..core import DATAFRAME_TYPE
20
+ from ..core import DATAFRAME_TYPE, SERIES_TYPE
20
21
 
21
22
  for t in DATAFRAME_TYPE:
23
+ t.to_csv = to_csv
22
24
  t.to_odps_table = to_odps_table
25
+ for t in SERIES_TYPE:
26
+ t.to_csv = to_csv
23
27
 
24
28
 
25
29
  _install()
@@ -12,17 +12,13 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
- from typing import List
16
-
17
15
  from ... import opcodes
18
- from ...core import EntityData
19
16
  from ...serialization.serializables import (
20
17
  AnyField,
21
18
  BoolField,
22
19
  DictField,
23
20
  Int32Field,
24
21
  Int64Field,
25
- KeyField,
26
22
  ListField,
27
23
  StringField,
28
24
  )
@@ -33,27 +29,26 @@ from .core import DataFrameDataStore
33
29
  class DataFrameToCSV(DataFrameDataStore):
34
30
  _op_type_ = opcodes.TO_CSV
35
31
 
36
- input = KeyField("input")
37
- path = AnyField("path")
38
- sep = StringField("sep")
39
- na_rep = StringField("na_rep")
40
- float_format = StringField("float_format")
41
- columns = ListField("columns")
42
- header = AnyField("header")
43
- index = BoolField("index")
44
- index_label = AnyField("index_label")
45
- mode = StringField("mode")
46
- encoding = StringField("encoding")
47
- compression = AnyField("compression")
48
- quoting = Int32Field("quoting")
49
- quotechar = StringField("quotechar")
50
- line_terminator = StringField("line_terminator")
51
- chunksize = Int64Field("chunksize")
52
- date_format = StringField("date_format")
53
- doublequote = BoolField("doublequote")
54
- escapechar = StringField("escapechar")
55
- decimal = StringField("decimal")
56
- storage_options = DictField("storage_options")
32
+ path = AnyField("path", default=None)
33
+ sep = StringField("sep", default=None)
34
+ na_rep = StringField("na_rep", default=None)
35
+ float_format = StringField("float_format", default=None)
36
+ columns = ListField("columns", default=None)
37
+ header = AnyField("header", default=None)
38
+ index = BoolField("index", default=None)
39
+ index_label = AnyField("index_label", default=None)
40
+ mode = StringField("mode", default=None)
41
+ encoding = StringField("encoding", default=None)
42
+ compression = AnyField("compression", default=None)
43
+ quoting = Int32Field("quoting", default=None)
44
+ quotechar = StringField("quotechar", default=None)
45
+ line_terminator = StringField("line_terminator", default=None)
46
+ chunksize = Int64Field("chunksize", default=None)
47
+ date_format = StringField("date_format", default=None)
48
+ doublequote = BoolField("doublequote", default=None)
49
+ escapechar = StringField("escapechar", default=None)
50
+ decimal = StringField("decimal", default=None)
51
+ storage_options = DictField("storage_options", default=None)
57
52
 
58
53
  def __init__(self, output_types=None, **kw):
59
54
  super().__init__(_output_types=output_types, **kw)
@@ -63,19 +58,6 @@ class DataFrameToCSV(DataFrameDataStore):
63
58
  # if wildcard in path, write csv into multiple files
64
59
  return "*" not in self.path
65
60
 
66
- @property
67
- def output_stat(self):
68
- return self.output_stat
69
-
70
- @property
71
- def output_limit(self):
72
- return 1 if not self.output_stat else 2
73
-
74
- @classmethod
75
- def _set_inputs(cls, op: "DataFrameToCSV", inputs: List[EntityData]):
76
- super()._set_inputs(op, inputs)
77
- op._input = op._inputs[0]
78
-
79
61
  def __call__(self, df):
80
62
  index_value = parse_index(df.index_value.to_pandas()[:0], df)
81
63
  if df.ndim == 2:
@@ -110,13 +92,14 @@ def to_csv(
110
92
  compression="infer",
111
93
  quoting=None,
112
94
  quotechar='"',
113
- line_terminator=None,
95
+ lineterminator=None,
114
96
  chunksize=None,
115
97
  date_format=None,
116
98
  doublequote=True,
117
99
  escapechar=None,
118
100
  decimal=".",
119
101
  storage_options=None,
102
+ **kw,
120
103
  ):
121
104
  r"""
122
105
  Write object to a comma-separated values (csv) file.
@@ -169,7 +152,7 @@ def to_csv(
169
152
  will treat them as non-numeric.
170
153
  quotechar : str, default '\"'
171
154
  String of length 1. Character used to quote fields.
172
- line_terminator : str, optional
155
+ lineterminator : str, optional
173
156
  The newline character or character sequence to use in the output
174
157
  file. Defaults to `os.linesep`, which depends on the OS in which
175
158
  this method is called ('\n' for linux, '\r\n' for Windows, i.e.).
@@ -203,6 +186,11 @@ def to_csv(
203
186
  ... 'weapon': ['sai', 'bo staff']})
204
187
  >>> df.to_csv('out.csv', index=False).execute()
205
188
  """
189
+ lineterminator = lineterminator or kw.pop("line_terminator", None)
190
+ if kw:
191
+ raise TypeError(
192
+ f"to_csv() got an unexpected keyword argument '{next(iter(kw))}'"
193
+ )
206
194
 
207
195
  if mode != "w": # pragma: no cover
208
196
  raise NotImplementedError("only support to_csv with mode 'w' for now")
@@ -220,7 +208,7 @@ def to_csv(
220
208
  compression=compression,
221
209
  quoting=quoting,
222
210
  quotechar=quotechar,
223
- line_terminator=line_terminator,
211
+ line_terminator=lineterminator,
224
212
  chunksize=chunksize,
225
213
  date_format=date_format,
226
214
  doublequote=doublequote,
@@ -56,6 +56,7 @@ class DataFrameToODPSTable(DataFrameDataStore):
56
56
  index_label = ListField("index_label", FieldTypes.string, default=None)
57
57
  lifecycle = Int64Field("lifecycle", default=None)
58
58
  table_properties = DictField("table_properties", default=None)
59
+ primary_key = ListField("primary_key", FieldTypes.string, default=None)
59
60
 
60
61
  def __init__(self, **kw):
61
62
  super().__init__(_output_types=[OutputType.dataframe], **kw)
@@ -100,11 +101,12 @@ def to_odps_table(
100
101
  partition: Optional[str] = None,
101
102
  partition_col: Union[None, str, List[str]] = None,
102
103
  overwrite: bool = False,
103
- unknown_as_string: Optional[bool] = None,
104
+ unknown_as_string: Optional[bool] = True,
104
105
  index: bool = True,
105
106
  index_label: Union[None, str, List[str]] = None,
106
107
  lifecycle: Optional[int] = None,
107
108
  table_properties: Optional[dict] = None,
109
+ primary_key: Union[None, str, List[str]] = None,
108
110
  ):
109
111
  """
110
112
  Write DataFrame object into a MaxCompute (ODPS) table.
@@ -145,6 +147,10 @@ def to_odps_table(
145
147
  Specify lifecycle of the output table.
146
148
  table_properties: Optional[dict]
147
149
  Specify properties of the output table.
150
+ primary_key: Union[None, str, List[str]]
151
+ If provided and target table does not exist, target table
152
+ will be a delta table with columns specified in this argument
153
+ as primary key.
148
154
 
149
155
  Returns
150
156
  -------
@@ -201,12 +207,14 @@ def to_odps_table(
201
207
  index_table_intersect = index_cols & table_cols
202
208
  if index_table_intersect:
203
209
  raise ValueError(
204
- f"Index column(s) {index_table_intersect} conflict with column(s) of the input dataframe."
210
+ f"Index column(s) {index_table_intersect} conflict with "
211
+ f"column(s) of the input dataframe."
205
212
  )
206
213
  index_partition_intersect = index_cols & partition_col_set
207
214
  if index_partition_intersect:
208
215
  raise ValueError(
209
- f"Index column(s) {index_partition_intersect} conflict with partition column(s)."
216
+ f"Index column(s) {index_partition_intersect} conflict "
217
+ f"with partition column(s)."
210
218
  )
211
219
 
212
220
  if partition_col:
@@ -217,6 +225,23 @@ def to_odps_table(
217
225
  " is not the data column(s) of the input dataframe."
218
226
  )
219
227
 
228
+ table_properties = table_properties or {}
229
+ if primary_key is not None:
230
+ table_properties["transactional"] = "true"
231
+ if odps_entry.exist_table(table):
232
+ table_obj = odps_entry.get_table(table)
233
+ if table_obj.is_transactional:
234
+ table_properties = table_properties or {}
235
+ table_properties["transactional"] = "true"
236
+ primary_key = primary_key or table_obj.primary_key or ()
237
+ if set(primary_key) != set(table_obj.primary_key or ()):
238
+ raise ValueError(
239
+ f"Primary keys between existing table {table} and "
240
+ f"provided arguments are not same."
241
+ )
242
+ if primary_key and not isinstance(primary_key, (list, tuple)):
243
+ primary_key = [primary_key]
244
+
220
245
  op = DataFrameToODPSTable(
221
246
  dtypes=df.dtypes,
222
247
  table_name=table,
@@ -227,6 +252,7 @@ def to_odps_table(
227
252
  index=index,
228
253
  index_label=index_label,
229
254
  lifecycle=lifecycle or options.session.table_lifecycle,
230
- table_properties=table_properties,
255
+ table_properties=table_properties or None,
256
+ primary_key=primary_key or None,
231
257
  )
232
258
  return op(df)
@@ -24,20 +24,36 @@ from .apply_chunk import (
24
24
  df_apply_chunk,
25
25
  series_apply_chunk,
26
26
  )
27
+ from .cartesian_chunk import cartesian_chunk
28
+ from .collect_kv import collect_kv
29
+ from .extract_kv import extract_kv
27
30
  from .flatjson import series_flatjson
28
31
  from .flatmap import df_flatmap, series_flatmap
32
+ from .map_reduce import map_reduce
33
+ from .rebalance import DataFrameRebalance, rebalance
29
34
  from .reshuffle import DataFrameReshuffle, df_reshuffle
30
35
 
31
36
 
32
37
  def _install():
33
38
  from ..core import DATAFRAME_TYPE, INDEX_TYPE, SERIES_TYPE
34
39
 
35
- DataFrameMaxFrameAccessor._register("reshuffle", df_reshuffle)
36
- DataFrameMaxFrameAccessor._register("flatmap", df_flatmap)
37
40
  DataFrameMaxFrameAccessor._register("apply_chunk", df_apply_chunk)
38
- SeriesMaxFrameAccessor._register("flatmap", series_flatmap)
39
- SeriesMaxFrameAccessor._register("flatjson", series_flatjson)
41
+ DataFrameMaxFrameAccessor._register("cartesian_chunk", cartesian_chunk)
42
+ DataFrameMaxFrameAccessor._register("collect_kv", collect_kv)
43
+ DataFrameMaxFrameAccessor._register("extract_kv", extract_kv)
44
+ DataFrameMaxFrameAccessor._register("flatmap", df_flatmap)
45
+ DataFrameMaxFrameAccessor._register("map_reduce", map_reduce)
46
+ DataFrameMaxFrameAccessor._register("rebalance", rebalance)
47
+ DataFrameMaxFrameAccessor._register("reshuffle", df_reshuffle)
48
+
40
49
  SeriesMaxFrameAccessor._register("apply_chunk", series_apply_chunk)
50
+ SeriesMaxFrameAccessor._register("cartesian_chunk", cartesian_chunk)
51
+ SeriesMaxFrameAccessor._register("extract_kv", extract_kv)
52
+ SeriesMaxFrameAccessor._register("flatjson", series_flatjson)
53
+ SeriesMaxFrameAccessor._register("flatmap", series_flatmap)
54
+ SeriesMaxFrameAccessor._register("rebalance", rebalance)
55
+
56
+ IndexMaxFrameAccessor._register("rebalance", rebalance)
41
57
 
42
58
  if DataFrameMaxFrameAccessor._api_count:
43
59
  for t in DATAFRAME_TYPE: