maxframe 2.0.0b1__cp37-cp37m-win_amd64.whl → 2.2.0__cp37-cp37m-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of maxframe might be problematic. Click here for more details.

Files changed (395) hide show
  1. maxframe/__init__.py +1 -0
  2. maxframe/_utils.cp37-win_amd64.pyd +0 -0
  3. maxframe/_utils.pyx +14 -1
  4. maxframe/codegen/core.py +6 -6
  5. maxframe/codegen/spe/core.py +1 -1
  6. maxframe/codegen/spe/dataframe/__init__.py +1 -0
  7. maxframe/codegen/spe/dataframe/accessors/base.py +18 -0
  8. maxframe/codegen/spe/dataframe/accessors/dict_.py +25 -130
  9. maxframe/codegen/spe/dataframe/accessors/list_.py +12 -48
  10. maxframe/codegen/spe/dataframe/accessors/struct_.py +28 -0
  11. maxframe/codegen/spe/dataframe/arithmetic.py +7 -2
  12. maxframe/codegen/spe/dataframe/groupby.py +88 -0
  13. maxframe/codegen/spe/dataframe/indexing.py +99 -4
  14. maxframe/codegen/spe/dataframe/merge.py +34 -1
  15. maxframe/codegen/spe/dataframe/misc.py +9 -33
  16. maxframe/codegen/spe/dataframe/reduction.py +14 -9
  17. maxframe/codegen/spe/dataframe/reshape.py +46 -0
  18. maxframe/codegen/spe/dataframe/sort.py +30 -17
  19. maxframe/codegen/spe/dataframe/tests/accessors/test_dict.py +9 -15
  20. maxframe/codegen/spe/dataframe/tests/accessors/test_list.py +4 -7
  21. maxframe/codegen/spe/dataframe/tests/accessors/test_struct.py +75 -0
  22. maxframe/codegen/spe/dataframe/tests/indexing/test_iloc.py +20 -1
  23. maxframe/codegen/spe/dataframe/tests/indexing/test_loc.py +35 -0
  24. maxframe/codegen/spe/dataframe/tests/misc/test_misc.py +0 -32
  25. maxframe/codegen/spe/dataframe/tests/test_groupby.py +81 -18
  26. maxframe/codegen/spe/dataframe/tests/test_merge.py +27 -1
  27. maxframe/codegen/spe/dataframe/tests/test_reshape.py +79 -0
  28. maxframe/codegen/spe/dataframe/tests/test_sort.py +20 -0
  29. maxframe/codegen/spe/learn/contrib/tests/test_xgboost.py +2 -1
  30. maxframe/codegen/spe/learn/metrics/__init__.py +1 -1
  31. maxframe/codegen/spe/learn/metrics/_ranking.py +76 -0
  32. maxframe/codegen/spe/learn/metrics/pairwise.py +51 -0
  33. maxframe/codegen/spe/learn/metrics/tests/test_pairwise.py +36 -0
  34. maxframe/codegen/spe/learn/metrics/tests/test_ranking.py +59 -0
  35. maxframe/codegen/spe/tensor/__init__.py +3 -0
  36. maxframe/codegen/spe/tensor/fft.py +74 -0
  37. maxframe/codegen/spe/tensor/linalg.py +29 -2
  38. maxframe/codegen/spe/tensor/misc.py +79 -25
  39. maxframe/codegen/spe/tensor/spatial.py +45 -0
  40. maxframe/codegen/spe/tensor/statistics.py +44 -0
  41. maxframe/codegen/spe/tensor/tests/test_fft.py +64 -0
  42. maxframe/codegen/spe/tensor/tests/test_linalg.py +15 -1
  43. maxframe/codegen/spe/tensor/tests/test_misc.py +52 -2
  44. maxframe/codegen/spe/tensor/tests/test_spatial.py +33 -0
  45. maxframe/codegen/spe/tensor/tests/test_statistics.py +15 -1
  46. maxframe/codegen/spe/tests/test_spe_codegen.py +6 -12
  47. maxframe/codegen/spe/utils.py +2 -0
  48. maxframe/config/config.py +70 -9
  49. maxframe/config/tests/test_validators.py +13 -1
  50. maxframe/config/validators.py +49 -0
  51. maxframe/conftest.py +44 -17
  52. maxframe/core/accessor.py +2 -2
  53. maxframe/core/entity/core.py +5 -0
  54. maxframe/core/entity/tileables.py +1 -1
  55. maxframe/core/graph/core.cp37-win_amd64.pyd +0 -0
  56. maxframe/core/graph/entity.py +1 -2
  57. maxframe/core/operator/base.py +9 -2
  58. maxframe/core/operator/core.py +10 -2
  59. maxframe/core/operator/utils.py +13 -0
  60. maxframe/dataframe/__init__.py +10 -3
  61. maxframe/dataframe/accessors/__init__.py +1 -1
  62. maxframe/dataframe/accessors/compat.py +45 -0
  63. maxframe/dataframe/accessors/datetime_/__init__.py +4 -1
  64. maxframe/dataframe/accessors/dict_/contains.py +7 -16
  65. maxframe/dataframe/accessors/dict_/core.py +48 -0
  66. maxframe/dataframe/accessors/dict_/getitem.py +17 -21
  67. maxframe/dataframe/accessors/dict_/length.py +7 -16
  68. maxframe/dataframe/accessors/dict_/remove.py +6 -18
  69. maxframe/dataframe/accessors/dict_/setitem.py +8 -18
  70. maxframe/dataframe/accessors/dict_/tests/test_dict_accessor.py +62 -22
  71. maxframe/dataframe/accessors/list_/__init__.py +2 -2
  72. maxframe/dataframe/accessors/list_/core.py +48 -0
  73. maxframe/dataframe/accessors/list_/getitem.py +12 -19
  74. maxframe/dataframe/accessors/list_/length.py +7 -16
  75. maxframe/dataframe/accessors/list_/tests/test_list_accessor.py +11 -9
  76. maxframe/dataframe/accessors/string_/__init__.py +4 -1
  77. maxframe/dataframe/accessors/struct_/__init__.py +37 -0
  78. maxframe/dataframe/accessors/struct_/accessor.py +39 -0
  79. maxframe/dataframe/accessors/struct_/core.py +43 -0
  80. maxframe/dataframe/accessors/struct_/dtypes.py +53 -0
  81. maxframe/dataframe/accessors/struct_/field.py +123 -0
  82. maxframe/dataframe/accessors/struct_/tests/__init__.py +13 -0
  83. maxframe/dataframe/accessors/struct_/tests/test_struct_accessor.py +91 -0
  84. maxframe/dataframe/arithmetic/__init__.py +14 -4
  85. maxframe/dataframe/arithmetic/between.py +106 -0
  86. maxframe/dataframe/arithmetic/dot.py +237 -0
  87. maxframe/dataframe/arithmetic/{around.py → round.py} +11 -7
  88. maxframe/dataframe/core.py +63 -118
  89. maxframe/dataframe/datasource/__init__.py +18 -0
  90. maxframe/dataframe/datasource/from_dict.py +124 -0
  91. maxframe/dataframe/datasource/from_index.py +1 -1
  92. maxframe/dataframe/datasource/from_records.py +77 -0
  93. maxframe/dataframe/datasource/from_tensor.py +109 -41
  94. maxframe/dataframe/datasource/read_csv.py +2 -3
  95. maxframe/dataframe/datasource/read_odps_query.py +76 -16
  96. maxframe/dataframe/datasource/tests/test_datasource.py +84 -1
  97. maxframe/dataframe/datastore/__init__.py +5 -1
  98. maxframe/dataframe/datastore/to_csv.py +29 -41
  99. maxframe/dataframe/datastore/to_odps.py +30 -4
  100. maxframe/dataframe/extensions/__init__.py +20 -4
  101. maxframe/dataframe/extensions/apply_chunk.py +32 -6
  102. maxframe/dataframe/extensions/cartesian_chunk.py +153 -0
  103. maxframe/dataframe/extensions/collect_kv.py +126 -0
  104. maxframe/dataframe/extensions/extract_kv.py +177 -0
  105. maxframe/dataframe/extensions/map_reduce.py +263 -0
  106. maxframe/dataframe/extensions/rebalance.py +62 -0
  107. maxframe/dataframe/extensions/tests/test_apply_chunk.py +9 -2
  108. maxframe/dataframe/extensions/tests/test_extensions.py +54 -0
  109. maxframe/dataframe/extensions/tests/test_map_reduce.py +135 -0
  110. maxframe/dataframe/groupby/__init__.py +12 -1
  111. maxframe/dataframe/groupby/aggregation.py +78 -45
  112. maxframe/dataframe/groupby/apply.py +1 -1
  113. maxframe/dataframe/groupby/apply_chunk.py +18 -2
  114. maxframe/dataframe/groupby/core.py +96 -12
  115. maxframe/dataframe/groupby/cum.py +4 -25
  116. maxframe/dataframe/groupby/expanding.py +264 -0
  117. maxframe/dataframe/groupby/fill.py +1 -1
  118. maxframe/dataframe/groupby/getitem.py +12 -5
  119. maxframe/dataframe/groupby/head.py +11 -1
  120. maxframe/dataframe/groupby/rank.py +136 -0
  121. maxframe/dataframe/groupby/rolling.py +206 -0
  122. maxframe/dataframe/groupby/shift.py +114 -0
  123. maxframe/dataframe/groupby/tests/test_groupby.py +0 -5
  124. maxframe/dataframe/indexing/__init__.py +20 -1
  125. maxframe/dataframe/indexing/droplevel.py +195 -0
  126. maxframe/dataframe/indexing/filter.py +169 -0
  127. maxframe/dataframe/indexing/get_level_values.py +76 -0
  128. maxframe/dataframe/indexing/iat.py +45 -0
  129. maxframe/dataframe/indexing/iloc.py +152 -12
  130. maxframe/dataframe/indexing/insert.py +1 -1
  131. maxframe/dataframe/indexing/loc.py +287 -7
  132. maxframe/dataframe/indexing/reindex.py +14 -5
  133. maxframe/dataframe/indexing/rename.py +6 -0
  134. maxframe/dataframe/indexing/rename_axis.py +2 -2
  135. maxframe/dataframe/indexing/reorder_levels.py +143 -0
  136. maxframe/dataframe/indexing/reset_index.py +33 -6
  137. maxframe/dataframe/indexing/sample.py +8 -0
  138. maxframe/dataframe/indexing/setitem.py +3 -3
  139. maxframe/dataframe/indexing/swaplevel.py +185 -0
  140. maxframe/dataframe/indexing/take.py +99 -0
  141. maxframe/dataframe/indexing/truncate.py +140 -0
  142. maxframe/dataframe/indexing/where.py +0 -11
  143. maxframe/dataframe/indexing/xs.py +148 -0
  144. maxframe/dataframe/merge/__init__.py +12 -1
  145. maxframe/dataframe/merge/append.py +97 -98
  146. maxframe/dataframe/merge/combine_first.py +120 -0
  147. maxframe/dataframe/merge/compare.py +387 -0
  148. maxframe/dataframe/merge/concat.py +183 -0
  149. maxframe/dataframe/merge/update.py +271 -0
  150. maxframe/dataframe/misc/__init__.py +16 -10
  151. maxframe/dataframe/misc/_duplicate.py +10 -4
  152. maxframe/dataframe/misc/apply.py +1 -1
  153. maxframe/dataframe/misc/check_unique.py +51 -0
  154. maxframe/dataframe/misc/clip.py +145 -0
  155. maxframe/dataframe/misc/describe.py +175 -9
  156. maxframe/dataframe/misc/drop_duplicates.py +2 -2
  157. maxframe/dataframe/misc/duplicated.py +2 -2
  158. maxframe/dataframe/misc/get_dummies.py +5 -1
  159. maxframe/dataframe/misc/isin.py +2 -2
  160. maxframe/dataframe/misc/map.py +94 -0
  161. maxframe/dataframe/misc/tests/test_misc.py +13 -2
  162. maxframe/dataframe/misc/to_numeric.py +3 -0
  163. maxframe/dataframe/misc/transform.py +12 -5
  164. maxframe/dataframe/misc/transpose.py +13 -1
  165. maxframe/dataframe/misc/valid_index.py +115 -0
  166. maxframe/dataframe/misc/value_counts.py +38 -4
  167. maxframe/dataframe/missing/checkna.py +13 -6
  168. maxframe/dataframe/missing/dropna.py +5 -0
  169. maxframe/dataframe/missing/fillna.py +1 -1
  170. maxframe/dataframe/missing/replace.py +7 -4
  171. maxframe/dataframe/reduction/__init__.py +29 -15
  172. maxframe/dataframe/reduction/aggregation.py +38 -9
  173. maxframe/dataframe/reduction/all.py +2 -2
  174. maxframe/dataframe/reduction/any.py +2 -2
  175. maxframe/dataframe/reduction/argmax.py +100 -0
  176. maxframe/dataframe/reduction/argmin.py +100 -0
  177. maxframe/dataframe/reduction/core.py +65 -18
  178. maxframe/dataframe/reduction/count.py +13 -9
  179. maxframe/dataframe/reduction/cov.py +166 -0
  180. maxframe/dataframe/reduction/cummax.py +2 -2
  181. maxframe/dataframe/reduction/cummin.py +2 -2
  182. maxframe/dataframe/reduction/cumprod.py +2 -2
  183. maxframe/dataframe/reduction/cumsum.py +2 -2
  184. maxframe/dataframe/reduction/custom_reduction.py +2 -2
  185. maxframe/dataframe/reduction/idxmax.py +185 -0
  186. maxframe/dataframe/reduction/idxmin.py +185 -0
  187. maxframe/dataframe/reduction/kurtosis.py +37 -30
  188. maxframe/dataframe/reduction/max.py +2 -2
  189. maxframe/dataframe/reduction/mean.py +9 -7
  190. maxframe/dataframe/reduction/median.py +2 -2
  191. maxframe/dataframe/reduction/min.py +2 -2
  192. maxframe/dataframe/reduction/nunique.py +9 -8
  193. maxframe/dataframe/reduction/prod.py +18 -13
  194. maxframe/dataframe/reduction/reduction_size.py +2 -2
  195. maxframe/dataframe/reduction/sem.py +13 -9
  196. maxframe/dataframe/reduction/skew.py +31 -27
  197. maxframe/dataframe/reduction/str_concat.py +10 -7
  198. maxframe/dataframe/reduction/sum.py +18 -14
  199. maxframe/dataframe/reduction/unique.py +20 -3
  200. maxframe/dataframe/reduction/var.py +16 -12
  201. maxframe/dataframe/reshape/__init__.py +38 -0
  202. maxframe/dataframe/{misc → reshape}/pivot.py +1 -0
  203. maxframe/dataframe/{misc → reshape}/pivot_table.py +1 -0
  204. maxframe/dataframe/reshape/unstack.py +114 -0
  205. maxframe/dataframe/sort/__init__.py +8 -0
  206. maxframe/dataframe/sort/argsort.py +62 -0
  207. maxframe/dataframe/sort/core.py +1 -0
  208. maxframe/dataframe/sort/nlargest.py +238 -0
  209. maxframe/dataframe/sort/nsmallest.py +228 -0
  210. maxframe/dataframe/statistics/__init__.py +3 -3
  211. maxframe/dataframe/statistics/corr.py +1 -0
  212. maxframe/dataframe/statistics/quantile.py +2 -2
  213. maxframe/dataframe/tests/test_typing.py +104 -0
  214. maxframe/dataframe/tests/test_utils.py +66 -2
  215. maxframe/dataframe/typing_.py +185 -0
  216. maxframe/dataframe/utils.py +95 -26
  217. maxframe/dataframe/window/aggregation.py +8 -4
  218. maxframe/dataframe/window/core.py +14 -1
  219. maxframe/dataframe/window/ewm.py +1 -3
  220. maxframe/dataframe/window/expanding.py +37 -35
  221. maxframe/dataframe/window/rolling.py +49 -39
  222. maxframe/dataframe/window/tests/test_expanding.py +1 -7
  223. maxframe/dataframe/window/tests/test_rolling.py +1 -1
  224. maxframe/env.py +7 -4
  225. maxframe/errors.py +2 -2
  226. maxframe/io/objects/tests/test_object_io.py +4 -2
  227. maxframe/io/odpsio/schema.py +9 -3
  228. maxframe/io/odpsio/tableio.py +7 -2
  229. maxframe/io/odpsio/tests/test_schema.py +198 -83
  230. maxframe/io/odpsio/tests/test_volumeio.py +4 -15
  231. maxframe/io/odpsio/volumeio.py +23 -8
  232. maxframe/learn/__init__.py +10 -2
  233. maxframe/learn/cluster/__init__.py +15 -0
  234. maxframe/learn/cluster/_kmeans.py +782 -0
  235. maxframe/learn/contrib/llm/core.py +2 -0
  236. maxframe/learn/contrib/xgboost/core.py +87 -1
  237. maxframe/learn/contrib/xgboost/train.py +5 -2
  238. maxframe/learn/core.py +66 -0
  239. maxframe/learn/linear_model/_base.py +58 -1
  240. maxframe/learn/linear_model/_lin_reg.py +1 -1
  241. maxframe/learn/metrics/__init__.py +6 -0
  242. maxframe/learn/metrics/_classification.py +145 -0
  243. maxframe/learn/metrics/_ranking.py +477 -0
  244. maxframe/learn/metrics/_scorer.py +60 -0
  245. maxframe/learn/metrics/pairwise/__init__.py +21 -0
  246. maxframe/learn/metrics/pairwise/core.py +77 -0
  247. maxframe/learn/metrics/pairwise/cosine.py +115 -0
  248. maxframe/learn/metrics/pairwise/euclidean.py +176 -0
  249. maxframe/learn/metrics/pairwise/haversine.py +96 -0
  250. maxframe/learn/metrics/pairwise/manhattan.py +80 -0
  251. maxframe/learn/metrics/pairwise/pairwise.py +127 -0
  252. maxframe/learn/metrics/pairwise/pairwise_distances_topk.py +121 -0
  253. maxframe/learn/metrics/pairwise/rbf_kernel.py +51 -0
  254. maxframe/learn/metrics/tests/__init__.py +13 -0
  255. maxframe/learn/metrics/tests/test_scorer.py +26 -0
  256. maxframe/learn/utils/__init__.py +1 -1
  257. maxframe/learn/utils/checks.py +1 -2
  258. maxframe/learn/utils/core.py +59 -0
  259. maxframe/learn/utils/extmath.py +37 -0
  260. maxframe/learn/utils/odpsio.py +193 -0
  261. maxframe/learn/utils/validation.py +2 -2
  262. maxframe/lib/compat.py +40 -0
  263. maxframe/lib/dtypes_extension/__init__.py +16 -1
  264. maxframe/lib/dtypes_extension/_fake_arrow_dtype.py +604 -0
  265. maxframe/lib/dtypes_extension/blob.py +304 -0
  266. maxframe/lib/dtypes_extension/dtypes.py +40 -0
  267. maxframe/lib/dtypes_extension/tests/test_blob.py +88 -0
  268. maxframe/lib/dtypes_extension/tests/test_dtypes.py +16 -1
  269. maxframe/lib/dtypes_extension/tests/test_fake_arrow_dtype.py +75 -0
  270. maxframe/lib/filesystem/_oss_lib/common.py +122 -50
  271. maxframe/lib/filesystem/_oss_lib/glob.py +1 -1
  272. maxframe/lib/filesystem/_oss_lib/handle.py +21 -25
  273. maxframe/lib/filesystem/base.py +1 -1
  274. maxframe/lib/filesystem/core.py +1 -1
  275. maxframe/lib/filesystem/oss.py +115 -46
  276. maxframe/lib/filesystem/tests/test_oss.py +74 -36
  277. maxframe/lib/mmh3.cp37-win_amd64.pyd +0 -0
  278. maxframe/lib/wrapped_pickle.py +10 -0
  279. maxframe/opcodes.py +33 -15
  280. maxframe/protocol.py +12 -0
  281. maxframe/serialization/__init__.py +11 -2
  282. maxframe/serialization/arrow.py +38 -13
  283. maxframe/serialization/blob.py +32 -0
  284. maxframe/serialization/core.cp37-win_amd64.pyd +0 -0
  285. maxframe/serialization/core.pyx +39 -1
  286. maxframe/serialization/exception.py +2 -4
  287. maxframe/serialization/numpy.py +11 -0
  288. maxframe/serialization/pandas.py +46 -9
  289. maxframe/serialization/serializables/core.py +2 -2
  290. maxframe/serialization/tests/test_serial.py +29 -2
  291. maxframe/tensor/__init__.py +38 -8
  292. maxframe/tensor/arithmetic/__init__.py +19 -10
  293. maxframe/tensor/arithmetic/iscomplexobj.py +53 -0
  294. maxframe/tensor/arithmetic/tests/test_arithmetic.py +6 -0
  295. maxframe/tensor/core.py +3 -2
  296. maxframe/tensor/datasource/tests/test_datasource.py +2 -1
  297. maxframe/tensor/extensions/__init__.py +2 -0
  298. maxframe/tensor/extensions/apply_chunk.py +3 -3
  299. maxframe/tensor/extensions/rebalance.py +65 -0
  300. maxframe/tensor/fft/__init__.py +32 -0
  301. maxframe/tensor/fft/core.py +168 -0
  302. maxframe/tensor/fft/fft.py +112 -0
  303. maxframe/tensor/fft/fft2.py +118 -0
  304. maxframe/tensor/fft/fftfreq.py +80 -0
  305. maxframe/tensor/fft/fftn.py +123 -0
  306. maxframe/tensor/fft/fftshift.py +79 -0
  307. maxframe/tensor/fft/hfft.py +112 -0
  308. maxframe/tensor/fft/ifft.py +114 -0
  309. maxframe/tensor/fft/ifft2.py +115 -0
  310. maxframe/tensor/fft/ifftn.py +123 -0
  311. maxframe/tensor/fft/ifftshift.py +73 -0
  312. maxframe/tensor/fft/ihfft.py +93 -0
  313. maxframe/tensor/fft/irfft.py +118 -0
  314. maxframe/tensor/fft/irfft2.py +62 -0
  315. maxframe/tensor/fft/irfftn.py +114 -0
  316. maxframe/tensor/fft/rfft.py +116 -0
  317. maxframe/tensor/fft/rfft2.py +63 -0
  318. maxframe/tensor/fft/rfftfreq.py +87 -0
  319. maxframe/tensor/fft/rfftn.py +113 -0
  320. maxframe/tensor/indexing/fill_diagonal.py +1 -7
  321. maxframe/tensor/linalg/__init__.py +7 -0
  322. maxframe/tensor/linalg/_einsumfunc.py +1025 -0
  323. maxframe/tensor/linalg/cholesky.py +117 -0
  324. maxframe/tensor/linalg/einsum.py +339 -0
  325. maxframe/tensor/linalg/lstsq.py +100 -0
  326. maxframe/tensor/linalg/matrix_norm.py +75 -0
  327. maxframe/tensor/linalg/norm.py +249 -0
  328. maxframe/tensor/linalg/solve.py +72 -0
  329. maxframe/tensor/linalg/solve_triangular.py +2 -2
  330. maxframe/tensor/linalg/vector_norm.py +113 -0
  331. maxframe/tensor/misc/__init__.py +24 -1
  332. maxframe/tensor/misc/argwhere.py +72 -0
  333. maxframe/tensor/misc/array_split.py +46 -0
  334. maxframe/tensor/misc/broadcast_arrays.py +57 -0
  335. maxframe/tensor/misc/copyto.py +130 -0
  336. maxframe/tensor/misc/delete.py +104 -0
  337. maxframe/tensor/misc/dsplit.py +68 -0
  338. maxframe/tensor/misc/ediff1d.py +74 -0
  339. maxframe/tensor/misc/expand_dims.py +85 -0
  340. maxframe/tensor/misc/flip.py +90 -0
  341. maxframe/tensor/misc/fliplr.py +64 -0
  342. maxframe/tensor/misc/flipud.py +68 -0
  343. maxframe/tensor/misc/hsplit.py +85 -0
  344. maxframe/tensor/misc/insert.py +139 -0
  345. maxframe/tensor/misc/moveaxis.py +83 -0
  346. maxframe/tensor/misc/result_type.py +88 -0
  347. maxframe/tensor/misc/roll.py +124 -0
  348. maxframe/tensor/misc/rollaxis.py +77 -0
  349. maxframe/tensor/misc/shape.py +89 -0
  350. maxframe/tensor/misc/split.py +190 -0
  351. maxframe/tensor/misc/tile.py +109 -0
  352. maxframe/tensor/misc/vsplit.py +74 -0
  353. maxframe/tensor/reduction/array_equal.py +2 -1
  354. maxframe/tensor/sort/__init__.py +2 -0
  355. maxframe/tensor/sort/argpartition.py +98 -0
  356. maxframe/tensor/sort/partition.py +228 -0
  357. maxframe/tensor/spatial/__init__.py +15 -0
  358. maxframe/tensor/spatial/distance/__init__.py +17 -0
  359. maxframe/tensor/spatial/distance/cdist.py +421 -0
  360. maxframe/tensor/spatial/distance/pdist.py +398 -0
  361. maxframe/tensor/spatial/distance/squareform.py +153 -0
  362. maxframe/tensor/special/__init__.py +159 -21
  363. maxframe/tensor/special/airy.py +55 -0
  364. maxframe/tensor/special/bessel.py +199 -0
  365. maxframe/tensor/special/core.py +65 -4
  366. maxframe/tensor/special/ellip_func_integrals.py +155 -0
  367. maxframe/tensor/special/ellip_harm.py +55 -0
  368. maxframe/tensor/special/err_fresnel.py +223 -0
  369. maxframe/tensor/special/gamma_funcs.py +303 -0
  370. maxframe/tensor/special/hypergeometric_funcs.py +69 -0
  371. maxframe/tensor/special/info_theory.py +189 -0
  372. maxframe/tensor/special/misc.py +21 -0
  373. maxframe/tensor/statistics/__init__.py +6 -0
  374. maxframe/tensor/statistics/corrcoef.py +77 -0
  375. maxframe/tensor/statistics/cov.py +222 -0
  376. maxframe/tensor/statistics/digitize.py +126 -0
  377. maxframe/tensor/statistics/histogram.py +520 -0
  378. maxframe/tensor/statistics/median.py +85 -0
  379. maxframe/tensor/statistics/ptp.py +89 -0
  380. maxframe/tensor/utils.py +3 -3
  381. maxframe/tests/test_utils.py +43 -1
  382. maxframe/tests/utils.py +3 -13
  383. maxframe/typing_.py +2 -0
  384. maxframe/udf.py +27 -2
  385. maxframe/utils.py +193 -19
  386. {maxframe-2.0.0b1.dist-info → maxframe-2.2.0.dist-info}/METADATA +3 -2
  387. {maxframe-2.0.0b1.dist-info → maxframe-2.2.0.dist-info}/RECORD +395 -240
  388. maxframe_client/fetcher.py +35 -4
  389. maxframe_client/session/odps.py +7 -2
  390. maxframe_client/tests/test_fetcher.py +76 -3
  391. maxframe_client/tests/test_session.py +4 -1
  392. /maxframe/dataframe/{misc → reshape}/melt.py +0 -0
  393. /maxframe/dataframe/{misc → reshape}/stack.py +0 -0
  394. {maxframe-2.0.0b1.dist-info → maxframe-2.2.0.dist-info}/WHEEL +0 -0
  395. {maxframe-2.0.0b1.dist-info → maxframe-2.2.0.dist-info}/top_level.txt +0 -0
@@ -51,7 +51,7 @@ class SeriesFromIndex(DataFrameOperator, DataFrameOperatorMixin):
51
51
 
52
52
 
53
53
  def series_from_index(ind, index=None, name=None):
54
- name = name or ind.name or 0
54
+ name = name or ind.name
55
55
  if index is not None:
56
56
  index = Index(index)
57
57
  op = SeriesFromIndex(input_=ind, index=index, name=name)
@@ -77,6 +77,83 @@ def from_records(
77
77
  sparse=False,
78
78
  **kw
79
79
  ):
80
+ """
81
+ Convert structured or record ndarray to DataFrame.
82
+
83
+ Creates a DataFrame object from a structured ndarray, sequence of
84
+ tuples or dicts, or DataFrame.
85
+
86
+ Parameters
87
+ ----------
88
+ data : structured ndarray, sequence of tuples or dicts, or DataFrame
89
+ Structured input data.
90
+
91
+ .. deprecated:: 2.1.0
92
+ Passing a DataFrame is deprecated.
93
+ index : str, list of fields, array-like
94
+ Field of array to use as the index, alternately a specific set of
95
+ input labels to use.
96
+ exclude : sequence, default None
97
+ Columns or fields to exclude.
98
+ columns : sequence, default None
99
+ Column names to use. If the passed data do not have names
100
+ associated with them, this argument provides names for the
101
+ columns. Otherwise this argument indicates the order of the columns
102
+ in the result (any names not found in the data will become all-NA
103
+ columns).
104
+ coerce_float : bool, default False
105
+ Attempt to convert values of non-string, non-numeric objects (like
106
+ decimal.Decimal) to floating point, useful for SQL result sets.
107
+ nrows : int, default None
108
+ Number of rows to read if data is an iterator.
109
+
110
+ Returns
111
+ -------
112
+ DataFrame
113
+
114
+ See Also
115
+ --------
116
+ DataFrame.from_dict : DataFrame from dict of array-like or dicts.
117
+ DataFrame : DataFrame object creation using constructor.
118
+
119
+ Examples
120
+ --------
121
+ Data can be provided as a structured ndarray:
122
+
123
+ >>> import maxframe.tensor as mt
124
+ >>> import maxframe.dataframe as md
125
+ >>> data = mt.array([(3, 'a'), (2, 'b'), (1, 'c'), (0, 'd')],
126
+ ... dtype=[('col_1', 'i4'), ('col_2', 'U1')])
127
+ >>> md.DataFrame.from_records(data).execute()
128
+ col_1 col_2
129
+ 0 3 a
130
+ 1 2 b
131
+ 2 1 c
132
+ 3 0 d
133
+
134
+ Data can be provided as a list of dicts:
135
+
136
+ >>> data = [{'col_1': 3, 'col_2': 'a'},
137
+ ... {'col_1': 2, 'col_2': 'b'},
138
+ ... {'col_1': 1, 'col_2': 'c'},
139
+ ... {'col_1': 0, 'col_2': 'd'}]
140
+ >>> md.DataFrame.from_records(data).execute()
141
+ col_1 col_2
142
+ 0 3 a
143
+ 1 2 b
144
+ 2 1 c
145
+ 3 0 d
146
+
147
+ Data can be provided as a list of tuples with corresponding columns:
148
+
149
+ >>> data = [(3, 'a'), (2, 'b'), (1, 'c'), (0, 'd')]
150
+ >>> md.DataFrame.from_records(data, columns=['col_1', 'col_2']).execute()
151
+ col_1 col_2
152
+ 0 3 a
153
+ 1 2 b
154
+ 2 1 c
155
+ 3 0 d
156
+ """
80
157
  if isinstance(data, np.ndarray):
81
158
  from .dataframe import from_pandas
82
159
 
@@ -39,6 +39,7 @@ class DataFrameFromTensor(DataFrameOperator, DataFrameOperatorMixin):
39
39
  input = AnyField("input")
40
40
  index = AnyField("index")
41
41
  columns = AnyField("columns")
42
+ axis = AnyField("axis")
42
43
 
43
44
  def __init__(self, *args, **kwargs):
44
45
  kwargs["_output_types"] = [OutputType.dataframe]
@@ -120,46 +121,82 @@ class DataFrameFromTensor(DataFrameOperator, DataFrameOperatorMixin):
120
121
  if isinstance(tileable, ENTITY_TYPE):
121
122
  tileables.append(tileable)
122
123
 
123
- if index is not None:
124
- tileable_size = tileables[0].shape[0]
125
- if hasattr(index, "shape"):
126
- index_size = index.shape[0]
124
+ if self.axis == 0:
125
+ if index is not None:
126
+ raise NotImplementedError("Cannot accept index when axis=0")
127
127
  else:
128
- index_size = len(index)
129
- if (
130
- not pd.isna(tileable_size)
131
- and not pd.isna(index_size)
132
- and tileable_size != index_size
133
- ):
134
- raise ValueError(
135
- f"index {index} should have the same shape "
136
- f"with tensor: {tileable_size}"
137
- )
138
- index_value = self._process_index(index, tileables)
128
+ index = pd.Index(list(input_1d_tileables.keys()))
129
+ index_value = parse_index(index, store_data=True)
130
+ self.index = index
131
+
132
+ if columns is not None:
133
+ tileable_size = tileables[0].shape[0] if tileables else 0
134
+ if not isinstance(columns, pd.Index):
135
+ columns = self.columns = pd.Index(columns)
136
+ column_size = columns.shape[0]
137
+ if (
138
+ not pd.isna(tileable_size)
139
+ and not pd.isna(column_size)
140
+ and tileable_size != column_size
141
+ ):
142
+ raise ValueError(
143
+ f"columns {columns} should have the same shape "
144
+ f"with tensor: {tileable_size}"
145
+ )
146
+ columns_value = self._process_index(columns, tileables)
147
+ else:
148
+ if not tileables or np.isnan(tileables[0].shape[0]):
149
+ columns = columns_value = None
150
+ else:
151
+ columns = pd.RangeIndex(0, tileables[0].shape[0])
152
+ columns_value = parse_index(columns, store_data=True)
153
+ self.columns = columns
154
+
155
+ shape = (len(input_1d_tileables), shape[0] if shape else 0)
139
156
  else:
140
- if np.isnan(tileables[0].shape[0]):
141
- index = pd.RangeIndex(0)
157
+ if index is not None:
158
+ tileable_size = tileables[0].shape[0] if tileables else 0
159
+ if hasattr(index, "shape"):
160
+ index_size = index.shape[0]
161
+ else:
162
+ index_size = len(index)
163
+ if (
164
+ not pd.isna(tileable_size)
165
+ and not pd.isna(index_size)
166
+ and tileable_size != index_size
167
+ ):
168
+ raise ValueError(
169
+ f"index {index} should have the same shape "
170
+ f"with tensor: {tileable_size}"
171
+ )
172
+ index_value = self._process_index(index, tileables)
142
173
  else:
143
- index = pd.RangeIndex(0, tileables[0].shape[0])
144
- self.index = index
145
- index_value = parse_index(index)
174
+ if not tileables or np.isnan(tileables[0].shape[0]):
175
+ index = pd.RangeIndex(0)
176
+ else:
177
+ index = pd.RangeIndex(0, tileables[0].shape[0])
178
+ self.index = index
179
+ index_value = parse_index(index)
146
180
 
147
- if columns is not None:
148
- if len(input_1d_tileables) != len(columns):
149
- raise ValueError(
150
- f"columns {columns} should have size {len(input_1d_tileables)}"
181
+ if columns is not None:
182
+ if len(input_1d_tileables) != len(columns):
183
+ raise ValueError(
184
+ f"columns {columns} should have size {len(input_1d_tileables)}"
185
+ )
186
+ if not isinstance(columns, pd.Index):
187
+ if isinstance(columns, ENTITY_TYPE):
188
+ raise NotImplementedError(
189
+ "The columns value cannot be a tileable"
190
+ )
191
+ columns = pd.Index(columns)
192
+ columns_value = parse_index(columns, store_data=True)
193
+ else:
194
+ columns_value = parse_index(
195
+ pd.RangeIndex(0, len(input_1d_tileables)), store_data=True
151
196
  )
152
- if not isinstance(columns, pd.Index):
153
- if isinstance(columns, ENTITY_TYPE):
154
- raise NotImplementedError("The columns value cannot be a tileable")
155
- columns = pd.Index(columns)
156
- columns_value = parse_index(columns, store_data=True)
157
- else:
158
- columns_value = parse_index(
159
- pd.RangeIndex(0, len(input_1d_tileables)), store_data=True
160
- )
161
197
 
162
- shape = (shape[0], len(input_1d_tileables))
198
+ shape = (shape[0] if shape else 0, len(input_1d_tileables))
199
+
163
200
  return self.new_dataframe(
164
201
  tileables,
165
202
  shape,
@@ -278,6 +315,9 @@ def dataframe_from_tensor(
278
315
  gpu: bool = None,
279
316
  sparse: bool = False,
280
317
  ):
318
+ if isinstance(columns, list) and columns and isinstance(columns[0], tuple):
319
+ columns = pd.MultiIndex.from_tuples(columns)
320
+
281
321
  if tensor is not None:
282
322
  if tensor.ndim > 2 or tensor.ndim <= 0:
283
323
  raise TypeError(
@@ -299,6 +339,8 @@ def dataframe_from_tensor(
299
339
  dtypes = pd.Series([], index=pd.Index([], dtype=object))
300
340
  if index is not None and not isinstance(index, ENTITY_TYPE):
301
341
  index = pd.Index(index)
342
+ if isinstance(index[0], tuple):
343
+ index = pd.MultiIndex.from_tuples(index)
302
344
  op = DataFrameFromTensor(
303
345
  input=tensor, index=index, columns=columns, gpu=gpu, sparse=sparse
304
346
  )
@@ -311,7 +353,10 @@ def dataframe_from_1d_tileables(
311
353
  columns: Union[pd.Index, list] = None,
312
354
  gpu: bool = None,
313
355
  sparse: bool = False,
356
+ axis: int = 1,
314
357
  ):
358
+ from pandas.core.dtypes.cast import find_common_type
359
+
315
360
  data = dict()
316
361
  for k, v in d.items():
317
362
  if isinstance(v, (list, tuple)) and any(
@@ -322,9 +367,9 @@ def dataframe_from_1d_tileables(
322
367
  data[k] = v
323
368
  d = data
324
369
  if columns is not None:
325
- tileables = [d.get(c) for c in columns]
370
+ tileables = [d.get(c) for c in columns] if axis == 1 else list(d.values())
326
371
  else:
327
- columns = list(d.keys())
372
+ columns = list(d.keys()) if axis == 1 else None
328
373
  tileables = list(d.values())
329
374
 
330
375
  gpu = (
@@ -332,14 +377,37 @@ def dataframe_from_1d_tileables(
332
377
  if gpu is None
333
378
  else gpu
334
379
  )
335
- dtypes = pd.Series(
336
- [t.dtype if hasattr(t, "dtype") else pd.Series(t).dtype for t in tileables],
337
- index=columns,
338
- )
380
+
381
+ if axis == 0:
382
+ col_num = (
383
+ tileables[0].shape[0]
384
+ if hasattr(tileables[0], "shape")
385
+ else len(tileables[0])
386
+ )
387
+ if pd.isna(col_num):
388
+ dtypes = None
389
+ else:
390
+ common_dtype = find_common_type(
391
+ [
392
+ t.dtype if hasattr(t, "dtype") else pd.Series(t).dtype
393
+ for t in tileables
394
+ ]
395
+ )
396
+ dtypes = pd.Series(
397
+ [common_dtype] * col_num,
398
+ index=columns if columns is not None else pd.RangeIndex(col_num),
399
+ )
400
+ else:
401
+ dtypes = pd.Series(
402
+ [t.dtype if hasattr(t, "dtype") else pd.Series(t).dtype for t in tileables],
403
+ index=columns,
404
+ )
405
+
339
406
  if index is not None and not isinstance(index, ENTITY_TYPE):
340
407
  index = pd.Index(index)
408
+
341
409
  op = DataFrameFromTensor(
342
- input=d, index=index, columns=columns, gpu=gpu, sparse=sparse
410
+ input=d, index=index, columns=columns, gpu=gpu, sparse=sparse, axis=axis
343
411
  )
344
412
  return op(d, index, columns, dtypes)
345
413
 
@@ -441,13 +441,12 @@ def read_csv(
441
441
  Examples
442
442
  --------
443
443
  >>> import maxframe.dataframe as md
444
- >>> from maxframe.lib.filesystem.oss import build_oss_path
445
444
  >>> md.read_csv('data.csv') # doctest: +SKIP
446
445
  >>> # read from HDFS
447
446
  >>> md.read_csv('hdfs://localhost:8020/test.csv') # doctest: +SKIP
448
447
  >>> # read from OSS
449
- >>> auth_path = build_oss_path(file_path, access_key_id, access_key_secret, end_point)
450
- >>> md.read_csv(auth_path)
448
+ >>> md.read_csv('oss://oss-cn-hangzhou-internal.aliyuncs.com/bucket/test.csv',
449
+ >>> storage_options={'role_arn': 'acs:ram::xxxxxx:role/aliyunodpsdefaultrole'})
451
450
  """
452
451
  # infer dtypes and columns
453
452
  if isinstance(path, (list, tuple)):
@@ -13,6 +13,7 @@
13
13
  # limitations under the License.
14
14
 
15
15
  import dataclasses
16
+ import functools
16
17
  import io
17
18
  import logging
18
19
  import re
@@ -22,6 +23,8 @@ from typing import Dict, List, MutableMapping, Optional, Tuple, Union
22
23
  import numpy as np
23
24
  import pandas as pd
24
25
  from odps import ODPS
26
+ from odps.errors import ODPSError
27
+ from odps.models import TableSchema
25
28
  from odps.types import Column, OdpsSchema, validate_data_type
26
29
  from odps.utils import split_sql_by_semicolon
27
30
 
@@ -245,13 +248,18 @@ def _parse_explained_schema(explain_string: str) -> OdpsSchema:
245
248
  return _parse_full_explain(explain_string)
246
249
 
247
250
 
248
- def _build_explain_sql(sql_stmt: str, no_split: bool = False) -> str:
251
+ def _build_explain_sql(
252
+ sql_stmt: str, no_split: bool = False, use_output: bool = False
253
+ ) -> str:
254
+ clause = "EXPLAIN "
255
+ if use_output:
256
+ clause += "OUTPUT "
249
257
  if no_split:
250
- return "EXPLAIN " + sql_stmt
258
+ return clause + sql_stmt
251
259
  sql_parts = split_sql_by_semicolon(sql_stmt)
252
260
  if not sql_parts:
253
261
  raise ValueError(f"Cannot explain SQL statement {sql_stmt}")
254
- sql_parts[-1] = "EXPLAIN " + sql_parts[-1]
262
+ sql_parts[-1] = clause + sql_parts[-1]
255
263
  return "\n".join(sql_parts)
256
264
 
257
265
 
@@ -332,6 +340,62 @@ def _check_token_in_sql(token: str, sql: str) -> bool:
332
340
  return False
333
341
 
334
342
 
343
+ def _resolve_schema_by_explain(
344
+ odps_entry: ODPS,
345
+ query: str,
346
+ no_split_sql: bool = False,
347
+ hints: Dict[str, str] = None,
348
+ use_explain_output: bool = True,
349
+ ) -> OdpsSchema:
350
+ hints = (hints or dict()).copy()
351
+ hints["odps.sql.select.output.format"] = "json"
352
+ explain_stmt = _build_explain_sql(
353
+ query, no_split=no_split_sql, use_output=use_explain_output
354
+ )
355
+ inst = odps_entry.execute_sql(explain_stmt, hints=hints)
356
+ logger.debug("Explain output instance ID: %s", inst.id)
357
+ explain_str = list(inst.get_task_results().values())[0]
358
+ if use_explain_output:
359
+ if not explain_str or "nothing to explain" in explain_str:
360
+ raise ValueError("The SQL statement should be an instant query")
361
+ return TableSchema.parse(None, explain_str)
362
+ else:
363
+ return _parse_explained_schema(explain_str)
364
+
365
+
366
+ def _resolve_query_schema(
367
+ odps_entry: ODPS,
368
+ query: str,
369
+ no_split_sql: bool = False,
370
+ hints: Dict[str, str] = None,
371
+ use_explain_output: Optional[bool] = None,
372
+ ) -> OdpsSchema:
373
+ methods = []
374
+ if use_explain_output is not False:
375
+ # None or True
376
+ methods.append(_resolve_schema_by_explain)
377
+ if use_explain_output is not True:
378
+ # None or False
379
+ methods.append(
380
+ functools.partial(_resolve_schema_by_explain, use_explain_output=False)
381
+ )
382
+ for idx, resolve_method in enumerate(methods):
383
+ try:
384
+ return resolve_method(
385
+ odps_entry, query, no_split_sql=no_split_sql, hints=hints
386
+ )
387
+ except ODPSError as ex:
388
+ msg = (
389
+ f"Failed to obtain schema from SQL explain: {ex!r}\n"
390
+ f"Explain instance ID: {ex.instance_id}"
391
+ )
392
+ if idx + 1 == len(methods) or "ODPS-0130161" not in str(ex):
393
+ exc = ValueError(msg)
394
+ raise exc.with_traceback(ex.__traceback__) from None
395
+ # will this happen?
396
+ raise ValueError("Failed to obtain schema from SQL explain") # pragma: no cover
397
+
398
+
335
399
  def read_odps_query(
336
400
  query: str,
337
401
  odps_entry: ODPS = None,
@@ -371,6 +435,8 @@ def read_odps_query(
371
435
  DataFrame read from MaxCompute (ODPS) table
372
436
  """
373
437
  no_split_sql = kw.pop("no_split_sql", False)
438
+ # if use_explain_output is None, will try two methods.
439
+ use_explain_output = kw.pop("use_explain_output", None)
374
440
 
375
441
  hints = options.sql.settings.copy() or {}
376
442
  if sql_hints:
@@ -395,19 +461,13 @@ def read_odps_query(
395
461
 
396
462
  col_renames = {}
397
463
  if not skip_schema:
398
- explain_stmt = _build_explain_sql(query, no_split=no_split_sql)
399
- inst = odps_entry.execute_sql(explain_stmt, hints=hints)
400
- logger.debug("Explain instance ID: %s", inst.id)
401
- explain_str = list(inst.get_task_results().values())[0]
402
-
403
- try:
404
- odps_schema = _parse_explained_schema(explain_str)
405
- except BaseException as ex:
406
- exc = ValueError(
407
- f"Failed to obtain schema from SQL explain: {ex!r}"
408
- f"\nExplain instance ID: {inst.id}"
409
- )
410
- raise exc.with_traceback(ex.__traceback__) from None
464
+ odps_schema = _resolve_query_schema(
465
+ odps_entry,
466
+ query,
467
+ no_split_sql=no_split_sql,
468
+ hints=hints,
469
+ use_explain_output=use_explain_output,
470
+ )
411
471
 
412
472
  new_columns = []
413
473
  for col in odps_schema.columns:
@@ -17,11 +17,13 @@ import uuid
17
17
  from collections import OrderedDict
18
18
  from math import isinf
19
19
 
20
+ import mock
20
21
  import numpy as np
21
22
  import pandas as pd
22
23
  import pytest
23
24
  from odps import ODPS
24
25
  from odps import types as odps_types
26
+ from odps.errors import ODPSError
25
27
 
26
28
  from .... import tensor as mt
27
29
  from ....core import OutputType
@@ -50,6 +52,7 @@ from ..read_odps_query import (
50
52
  ColumnSchema,
51
53
  _parse_full_explain,
52
54
  _parse_simple_explain,
55
+ _resolve_query_schema,
53
56
  _resolve_task_sector,
54
57
  )
55
58
  from ..series import from_pandas as from_pandas_series
@@ -181,6 +184,23 @@ def test_from_tensor():
181
184
  df = dataframe_from_1d_tileables(d)
182
185
  pd.testing.assert_index_equal(df.columns_value.to_pandas(), pd.RangeIndex(2))
183
186
 
187
+ # test axis parameter for dataframe_from_1d_tileables
188
+ d = OrderedDict(
189
+ [("a", mt.tensor(np.random.rand(4))), ("b", mt.tensor(np.random.rand(4)))]
190
+ )
191
+
192
+ # axis=1 (default behavior) - keys become columns
193
+ df = dataframe_from_1d_tileables(d, axis=1)
194
+ assert df.shape == (4, 2)
195
+ pd.testing.assert_index_equal(df.columns_value.to_pandas(), pd.Index(["a", "b"]))
196
+ pd.testing.assert_index_equal(df.index_value.to_pandas(), pd.RangeIndex(4))
197
+
198
+ # axis=0 - keys become index (rows)
199
+ df = dataframe_from_1d_tileables(d, axis=0)
200
+ assert df.shape == (2, 4)
201
+ pd.testing.assert_index_equal(df.index_value.to_pandas(), pd.Index(["a", "b"]))
202
+ pd.testing.assert_index_equal(df.columns_value.to_pandas(), pd.RangeIndex(4))
203
+
184
204
  series = series_from_tensor(mt.random.rand(4))
185
205
  pd.testing.assert_index_equal(series.index_value.to_pandas(), pd.RangeIndex(4))
186
206
 
@@ -204,6 +224,26 @@ def test_from_tensor():
204
224
  with pytest.raises(ValueError):
205
225
  dataframe_from_tensor(mt.random.rand(4, 3), columns=["a", "b"])
206
226
 
227
+ # 1-d tensors should have same shape
228
+ with pytest.raises(ValueError):
229
+ dataframe_from_1d_tileables(
230
+ OrderedDict(
231
+ [(0, mt.tensor(np.random.rand(3))), (1, mt.tensor(np.random.rand(2)))]
232
+ )
233
+ )
234
+
235
+ # index has wrong shape
236
+ with pytest.raises(ValueError):
237
+ dataframe_from_1d_tileables(
238
+ {0: mt.tensor(np.random.rand(3))}, index=mt.tensor(np.random.rand(2))
239
+ )
240
+
241
+ # columns have wrong shape
242
+ with pytest.raises(ValueError):
243
+ dataframe_from_1d_tileables(
244
+ {0: mt.tensor(np.random.rand(3))}, columns=["a", "b"]
245
+ )
246
+
207
247
  # index should be 1-d
208
248
  with pytest.raises(ValueError):
209
249
  dataframe_from_tensor(
@@ -360,7 +400,7 @@ def test_from_odps_query():
360
400
 
361
401
  with pytest.raises(ValueError) as err_info:
362
402
  read_odps_query(
363
- f"CREATE TABLE dummy_table_{uuid.uuid4().hex} "
403
+ f"CREATE TABLE dummy_table_{uuid.uuid4().hex} LIFECYCLE 1 "
364
404
  f"AS SELECT * FROM {table1_name}"
365
405
  )
366
406
  assert "instant query" in err_info.value.args[0]
@@ -578,3 +618,46 @@ def test_resolve_break_lines():
578
618
  for col, (exp_nm, exp_tp) in zip(schema.columns, expected_col_types.items()):
579
619
  assert col.name == exp_nm
580
620
  assert col.type == odps_types.validate_data_type(exp_tp)
621
+
622
+
623
+ @pytest.mark.parametrize("use_explain_output", [None, False, True])
624
+ def test_explain_use_explain_output(use_explain_output):
625
+ class MockInstance:
626
+ @property
627
+ def id(self):
628
+ return "mock_id"
629
+
630
+ def get_task_results(self):
631
+ return {"pot": """{"columns":[{"name":"a_bigint","type":"BIGINT"}]}"""}
632
+
633
+ old_execute_sql = ODPS.execute_sql
634
+ exec_count = 0
635
+
636
+ def new_execute_sql(self, sql, *args, **kw):
637
+ nonlocal exec_count
638
+ exec_count += 1
639
+
640
+ if use_explain_output and sql.lower().startswith("explain output select"):
641
+ return MockInstance()
642
+ elif use_explain_output is None and sql.lower().startswith("explain output"):
643
+ raise ODPSError("ODPS-0130161: mock error")
644
+ return old_execute_sql(self, sql, *args, **kw)
645
+
646
+ odps_entry = ODPS.from_environments()
647
+
648
+ with mock.patch("odps.core.ODPS.execute_sql", new=new_execute_sql):
649
+ with pytest.raises(ValueError):
650
+ _resolve_query_schema(
651
+ odps_entry, "not_a_sql", use_explain_output=use_explain_output
652
+ )
653
+ assert exec_count == (2 if use_explain_output is None else 1)
654
+
655
+ exec_count = 0
656
+ schema = _resolve_query_schema(
657
+ odps_entry,
658
+ "select cast(1 as bigint) as a_bigint",
659
+ use_explain_output=use_explain_output,
660
+ )
661
+ assert schema.columns[0].name == "a_bigint"
662
+ assert schema.columns[0].type == odps_types.bigint
663
+ assert exec_count == (2 if use_explain_output is None else 1)
@@ -12,14 +12,18 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
+ from .to_csv import to_csv
15
16
  from .to_odps import to_odps_table
16
17
 
17
18
 
18
19
  def _install():
19
- from ..core import DATAFRAME_TYPE
20
+ from ..core import DATAFRAME_TYPE, SERIES_TYPE
20
21
 
21
22
  for t in DATAFRAME_TYPE:
23
+ t.to_csv = to_csv
22
24
  t.to_odps_table = to_odps_table
25
+ for t in SERIES_TYPE:
26
+ t.to_csv = to_csv
23
27
 
24
28
 
25
29
  _install()