maxframe 0.1.0b5__cp310-cp310-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of maxframe might be problematic. Click here for more details.

Files changed (647) hide show
  1. maxframe/__init__.py +32 -0
  2. maxframe/_utils.cpython-310-darwin.so +0 -0
  3. maxframe/_utils.pxd +33 -0
  4. maxframe/_utils.pyx +547 -0
  5. maxframe/codegen.py +528 -0
  6. maxframe/config/__init__.py +15 -0
  7. maxframe/config/config.py +443 -0
  8. maxframe/config/tests/__init__.py +13 -0
  9. maxframe/config/tests/test_config.py +103 -0
  10. maxframe/config/tests/test_validators.py +34 -0
  11. maxframe/config/validators.py +57 -0
  12. maxframe/conftest.py +139 -0
  13. maxframe/core/__init__.py +65 -0
  14. maxframe/core/base.py +156 -0
  15. maxframe/core/entity/__init__.py +44 -0
  16. maxframe/core/entity/chunks.py +68 -0
  17. maxframe/core/entity/core.py +152 -0
  18. maxframe/core/entity/executable.py +337 -0
  19. maxframe/core/entity/fuse.py +73 -0
  20. maxframe/core/entity/objects.py +100 -0
  21. maxframe/core/entity/output_types.py +90 -0
  22. maxframe/core/entity/tileables.py +438 -0
  23. maxframe/core/entity/utils.py +24 -0
  24. maxframe/core/graph/__init__.py +17 -0
  25. maxframe/core/graph/builder/__init__.py +16 -0
  26. maxframe/core/graph/builder/base.py +86 -0
  27. maxframe/core/graph/builder/chunk.py +430 -0
  28. maxframe/core/graph/builder/tileable.py +34 -0
  29. maxframe/core/graph/builder/utils.py +41 -0
  30. maxframe/core/graph/core.cpython-310-darwin.so +0 -0
  31. maxframe/core/graph/core.pyx +467 -0
  32. maxframe/core/graph/entity.py +171 -0
  33. maxframe/core/graph/tests/__init__.py +13 -0
  34. maxframe/core/graph/tests/test_graph.py +205 -0
  35. maxframe/core/mode.py +96 -0
  36. maxframe/core/operator/__init__.py +34 -0
  37. maxframe/core/operator/base.py +450 -0
  38. maxframe/core/operator/core.py +276 -0
  39. maxframe/core/operator/fetch.py +53 -0
  40. maxframe/core/operator/fuse.py +29 -0
  41. maxframe/core/operator/objects.py +72 -0
  42. maxframe/core/operator/shuffle.py +111 -0
  43. maxframe/core/operator/tests/__init__.py +13 -0
  44. maxframe/core/operator/tests/test_core.py +64 -0
  45. maxframe/core/tests/__init__.py +13 -0
  46. maxframe/core/tests/test_mode.py +75 -0
  47. maxframe/dataframe/__init__.py +81 -0
  48. maxframe/dataframe/arithmetic/__init__.py +359 -0
  49. maxframe/dataframe/arithmetic/abs.py +33 -0
  50. maxframe/dataframe/arithmetic/add.py +60 -0
  51. maxframe/dataframe/arithmetic/arccos.py +28 -0
  52. maxframe/dataframe/arithmetic/arccosh.py +28 -0
  53. maxframe/dataframe/arithmetic/arcsin.py +28 -0
  54. maxframe/dataframe/arithmetic/arcsinh.py +28 -0
  55. maxframe/dataframe/arithmetic/arctan.py +28 -0
  56. maxframe/dataframe/arithmetic/arctanh.py +28 -0
  57. maxframe/dataframe/arithmetic/around.py +152 -0
  58. maxframe/dataframe/arithmetic/bitwise_and.py +46 -0
  59. maxframe/dataframe/arithmetic/bitwise_or.py +50 -0
  60. maxframe/dataframe/arithmetic/bitwise_xor.py +46 -0
  61. maxframe/dataframe/arithmetic/ceil.py +28 -0
  62. maxframe/dataframe/arithmetic/core.py +342 -0
  63. maxframe/dataframe/arithmetic/cos.py +28 -0
  64. maxframe/dataframe/arithmetic/cosh.py +28 -0
  65. maxframe/dataframe/arithmetic/degrees.py +28 -0
  66. maxframe/dataframe/arithmetic/docstring.py +442 -0
  67. maxframe/dataframe/arithmetic/equal.py +56 -0
  68. maxframe/dataframe/arithmetic/exp.py +28 -0
  69. maxframe/dataframe/arithmetic/exp2.py +28 -0
  70. maxframe/dataframe/arithmetic/expm1.py +28 -0
  71. maxframe/dataframe/arithmetic/floor.py +28 -0
  72. maxframe/dataframe/arithmetic/floordiv.py +64 -0
  73. maxframe/dataframe/arithmetic/greater.py +57 -0
  74. maxframe/dataframe/arithmetic/greater_equal.py +57 -0
  75. maxframe/dataframe/arithmetic/invert.py +33 -0
  76. maxframe/dataframe/arithmetic/is_ufuncs.py +62 -0
  77. maxframe/dataframe/arithmetic/less.py +57 -0
  78. maxframe/dataframe/arithmetic/less_equal.py +57 -0
  79. maxframe/dataframe/arithmetic/log.py +28 -0
  80. maxframe/dataframe/arithmetic/log10.py +28 -0
  81. maxframe/dataframe/arithmetic/log2.py +28 -0
  82. maxframe/dataframe/arithmetic/mod.py +60 -0
  83. maxframe/dataframe/arithmetic/multiply.py +60 -0
  84. maxframe/dataframe/arithmetic/negative.py +33 -0
  85. maxframe/dataframe/arithmetic/not_equal.py +56 -0
  86. maxframe/dataframe/arithmetic/power.py +68 -0
  87. maxframe/dataframe/arithmetic/radians.py +28 -0
  88. maxframe/dataframe/arithmetic/sin.py +28 -0
  89. maxframe/dataframe/arithmetic/sinh.py +28 -0
  90. maxframe/dataframe/arithmetic/sqrt.py +28 -0
  91. maxframe/dataframe/arithmetic/subtract.py +64 -0
  92. maxframe/dataframe/arithmetic/tan.py +28 -0
  93. maxframe/dataframe/arithmetic/tanh.py +28 -0
  94. maxframe/dataframe/arithmetic/tests/__init__.py +13 -0
  95. maxframe/dataframe/arithmetic/tests/test_arithmetic.py +695 -0
  96. maxframe/dataframe/arithmetic/truediv.py +64 -0
  97. maxframe/dataframe/arithmetic/trunc.py +28 -0
  98. maxframe/dataframe/arrays.py +864 -0
  99. maxframe/dataframe/core.py +2417 -0
  100. maxframe/dataframe/datasource/__init__.py +15 -0
  101. maxframe/dataframe/datasource/core.py +81 -0
  102. maxframe/dataframe/datasource/dataframe.py +59 -0
  103. maxframe/dataframe/datasource/date_range.py +504 -0
  104. maxframe/dataframe/datasource/from_index.py +54 -0
  105. maxframe/dataframe/datasource/from_records.py +107 -0
  106. maxframe/dataframe/datasource/from_tensor.py +419 -0
  107. maxframe/dataframe/datasource/index.py +117 -0
  108. maxframe/dataframe/datasource/read_csv.py +528 -0
  109. maxframe/dataframe/datasource/read_odps_query.py +299 -0
  110. maxframe/dataframe/datasource/read_odps_table.py +253 -0
  111. maxframe/dataframe/datasource/read_parquet.py +421 -0
  112. maxframe/dataframe/datasource/series.py +55 -0
  113. maxframe/dataframe/datasource/tests/__init__.py +13 -0
  114. maxframe/dataframe/datasource/tests/test_datasource.py +401 -0
  115. maxframe/dataframe/datastore/__init__.py +26 -0
  116. maxframe/dataframe/datastore/core.py +19 -0
  117. maxframe/dataframe/datastore/to_csv.py +227 -0
  118. maxframe/dataframe/datastore/to_odps.py +162 -0
  119. maxframe/dataframe/extensions/__init__.py +41 -0
  120. maxframe/dataframe/extensions/accessor.py +50 -0
  121. maxframe/dataframe/extensions/reshuffle.py +83 -0
  122. maxframe/dataframe/extensions/tests/__init__.py +13 -0
  123. maxframe/dataframe/extensions/tests/test_extensions.py +38 -0
  124. maxframe/dataframe/fetch/__init__.py +15 -0
  125. maxframe/dataframe/fetch/core.py +86 -0
  126. maxframe/dataframe/groupby/__init__.py +82 -0
  127. maxframe/dataframe/groupby/aggregation.py +350 -0
  128. maxframe/dataframe/groupby/apply.py +251 -0
  129. maxframe/dataframe/groupby/core.py +179 -0
  130. maxframe/dataframe/groupby/cum.py +124 -0
  131. maxframe/dataframe/groupby/fill.py +141 -0
  132. maxframe/dataframe/groupby/getitem.py +92 -0
  133. maxframe/dataframe/groupby/head.py +105 -0
  134. maxframe/dataframe/groupby/sample.py +214 -0
  135. maxframe/dataframe/groupby/tests/__init__.py +13 -0
  136. maxframe/dataframe/groupby/tests/test_groupby.py +374 -0
  137. maxframe/dataframe/groupby/transform.py +255 -0
  138. maxframe/dataframe/indexing/__init__.py +84 -0
  139. maxframe/dataframe/indexing/add_prefix_suffix.py +110 -0
  140. maxframe/dataframe/indexing/align.py +349 -0
  141. maxframe/dataframe/indexing/at.py +83 -0
  142. maxframe/dataframe/indexing/getitem.py +204 -0
  143. maxframe/dataframe/indexing/iat.py +37 -0
  144. maxframe/dataframe/indexing/iloc.py +566 -0
  145. maxframe/dataframe/indexing/insert.py +86 -0
  146. maxframe/dataframe/indexing/loc.py +411 -0
  147. maxframe/dataframe/indexing/reindex.py +526 -0
  148. maxframe/dataframe/indexing/rename.py +462 -0
  149. maxframe/dataframe/indexing/rename_axis.py +209 -0
  150. maxframe/dataframe/indexing/reset_index.py +402 -0
  151. maxframe/dataframe/indexing/sample.py +221 -0
  152. maxframe/dataframe/indexing/set_axis.py +194 -0
  153. maxframe/dataframe/indexing/set_index.py +61 -0
  154. maxframe/dataframe/indexing/setitem.py +130 -0
  155. maxframe/dataframe/indexing/tests/__init__.py +13 -0
  156. maxframe/dataframe/indexing/tests/test_indexing.py +488 -0
  157. maxframe/dataframe/indexing/where.py +308 -0
  158. maxframe/dataframe/initializer.py +288 -0
  159. maxframe/dataframe/merge/__init__.py +32 -0
  160. maxframe/dataframe/merge/append.py +121 -0
  161. maxframe/dataframe/merge/concat.py +325 -0
  162. maxframe/dataframe/merge/merge.py +593 -0
  163. maxframe/dataframe/merge/tests/__init__.py +13 -0
  164. maxframe/dataframe/merge/tests/test_merge.py +215 -0
  165. maxframe/dataframe/misc/__init__.py +134 -0
  166. maxframe/dataframe/misc/_duplicate.py +46 -0
  167. maxframe/dataframe/misc/accessor.py +276 -0
  168. maxframe/dataframe/misc/apply.py +692 -0
  169. maxframe/dataframe/misc/astype.py +236 -0
  170. maxframe/dataframe/misc/case_when.py +141 -0
  171. maxframe/dataframe/misc/check_monotonic.py +84 -0
  172. maxframe/dataframe/misc/cut.py +383 -0
  173. maxframe/dataframe/misc/datetimes.py +79 -0
  174. maxframe/dataframe/misc/describe.py +108 -0
  175. maxframe/dataframe/misc/diff.py +210 -0
  176. maxframe/dataframe/misc/drop.py +440 -0
  177. maxframe/dataframe/misc/drop_duplicates.py +248 -0
  178. maxframe/dataframe/misc/duplicated.py +292 -0
  179. maxframe/dataframe/misc/eval.py +728 -0
  180. maxframe/dataframe/misc/explode.py +171 -0
  181. maxframe/dataframe/misc/get_dummies.py +208 -0
  182. maxframe/dataframe/misc/isin.py +217 -0
  183. maxframe/dataframe/misc/map.py +236 -0
  184. maxframe/dataframe/misc/melt.py +162 -0
  185. maxframe/dataframe/misc/memory_usage.py +248 -0
  186. maxframe/dataframe/misc/pct_change.py +150 -0
  187. maxframe/dataframe/misc/pivot_table.py +262 -0
  188. maxframe/dataframe/misc/qcut.py +104 -0
  189. maxframe/dataframe/misc/select_dtypes.py +104 -0
  190. maxframe/dataframe/misc/shift.py +256 -0
  191. maxframe/dataframe/misc/stack.py +238 -0
  192. maxframe/dataframe/misc/string_.py +221 -0
  193. maxframe/dataframe/misc/tests/__init__.py +13 -0
  194. maxframe/dataframe/misc/tests/test_misc.py +468 -0
  195. maxframe/dataframe/misc/to_numeric.py +178 -0
  196. maxframe/dataframe/misc/transform.py +361 -0
  197. maxframe/dataframe/misc/transpose.py +136 -0
  198. maxframe/dataframe/misc/value_counts.py +182 -0
  199. maxframe/dataframe/missing/__init__.py +53 -0
  200. maxframe/dataframe/missing/checkna.py +223 -0
  201. maxframe/dataframe/missing/dropna.py +280 -0
  202. maxframe/dataframe/missing/fillna.py +275 -0
  203. maxframe/dataframe/missing/replace.py +439 -0
  204. maxframe/dataframe/missing/tests/__init__.py +13 -0
  205. maxframe/dataframe/missing/tests/test_missing.py +89 -0
  206. maxframe/dataframe/operators.py +273 -0
  207. maxframe/dataframe/plotting/__init__.py +40 -0
  208. maxframe/dataframe/plotting/core.py +78 -0
  209. maxframe/dataframe/plotting/tests/__init__.py +13 -0
  210. maxframe/dataframe/plotting/tests/test_plotting.py +136 -0
  211. maxframe/dataframe/reduction/__init__.py +107 -0
  212. maxframe/dataframe/reduction/aggregation.py +344 -0
  213. maxframe/dataframe/reduction/all.py +78 -0
  214. maxframe/dataframe/reduction/any.py +78 -0
  215. maxframe/dataframe/reduction/core.py +837 -0
  216. maxframe/dataframe/reduction/count.py +59 -0
  217. maxframe/dataframe/reduction/cummax.py +30 -0
  218. maxframe/dataframe/reduction/cummin.py +30 -0
  219. maxframe/dataframe/reduction/cumprod.py +30 -0
  220. maxframe/dataframe/reduction/cumsum.py +30 -0
  221. maxframe/dataframe/reduction/custom_reduction.py +42 -0
  222. maxframe/dataframe/reduction/kurtosis.py +104 -0
  223. maxframe/dataframe/reduction/max.py +65 -0
  224. maxframe/dataframe/reduction/mean.py +61 -0
  225. maxframe/dataframe/reduction/min.py +65 -0
  226. maxframe/dataframe/reduction/nunique.py +141 -0
  227. maxframe/dataframe/reduction/prod.py +76 -0
  228. maxframe/dataframe/reduction/reduction_size.py +36 -0
  229. maxframe/dataframe/reduction/sem.py +69 -0
  230. maxframe/dataframe/reduction/skew.py +89 -0
  231. maxframe/dataframe/reduction/std.py +53 -0
  232. maxframe/dataframe/reduction/str_concat.py +48 -0
  233. maxframe/dataframe/reduction/sum.py +77 -0
  234. maxframe/dataframe/reduction/tests/__init__.py +13 -0
  235. maxframe/dataframe/reduction/tests/test_reduction.py +486 -0
  236. maxframe/dataframe/reduction/unique.py +90 -0
  237. maxframe/dataframe/reduction/var.py +72 -0
  238. maxframe/dataframe/sort/__init__.py +34 -0
  239. maxframe/dataframe/sort/core.py +36 -0
  240. maxframe/dataframe/sort/sort_index.py +153 -0
  241. maxframe/dataframe/sort/sort_values.py +311 -0
  242. maxframe/dataframe/sort/tests/__init__.py +13 -0
  243. maxframe/dataframe/sort/tests/test_sort.py +81 -0
  244. maxframe/dataframe/statistics/__init__.py +33 -0
  245. maxframe/dataframe/statistics/corr.py +280 -0
  246. maxframe/dataframe/statistics/quantile.py +341 -0
  247. maxframe/dataframe/statistics/tests/__init__.py +13 -0
  248. maxframe/dataframe/statistics/tests/test_statistics.py +82 -0
  249. maxframe/dataframe/tests/__init__.py +13 -0
  250. maxframe/dataframe/tests/test_initializer.py +29 -0
  251. maxframe/dataframe/tseries/__init__.py +13 -0
  252. maxframe/dataframe/tseries/tests/__init__.py +13 -0
  253. maxframe/dataframe/tseries/tests/test_tseries.py +30 -0
  254. maxframe/dataframe/tseries/to_datetime.py +297 -0
  255. maxframe/dataframe/ufunc/__init__.py +27 -0
  256. maxframe/dataframe/ufunc/tensor.py +54 -0
  257. maxframe/dataframe/ufunc/ufunc.py +52 -0
  258. maxframe/dataframe/utils.py +1267 -0
  259. maxframe/dataframe/window/__init__.py +29 -0
  260. maxframe/dataframe/window/aggregation.py +96 -0
  261. maxframe/dataframe/window/core.py +69 -0
  262. maxframe/dataframe/window/ewm.py +249 -0
  263. maxframe/dataframe/window/expanding.py +147 -0
  264. maxframe/dataframe/window/rolling.py +376 -0
  265. maxframe/dataframe/window/tests/__init__.py +13 -0
  266. maxframe/dataframe/window/tests/test_ewm.py +70 -0
  267. maxframe/dataframe/window/tests/test_expanding.py +66 -0
  268. maxframe/dataframe/window/tests/test_rolling.py +57 -0
  269. maxframe/env.py +33 -0
  270. maxframe/errors.py +21 -0
  271. maxframe/extension.py +81 -0
  272. maxframe/learn/__init__.py +17 -0
  273. maxframe/learn/contrib/__init__.py +17 -0
  274. maxframe/learn/contrib/pytorch/__init__.py +16 -0
  275. maxframe/learn/contrib/pytorch/run_function.py +110 -0
  276. maxframe/learn/contrib/pytorch/run_script.py +102 -0
  277. maxframe/learn/contrib/pytorch/tests/__init__.py +13 -0
  278. maxframe/learn/contrib/pytorch/tests/test_pytorch.py +42 -0
  279. maxframe/learn/contrib/utils.py +52 -0
  280. maxframe/learn/contrib/xgboost/__init__.py +26 -0
  281. maxframe/learn/contrib/xgboost/classifier.py +86 -0
  282. maxframe/learn/contrib/xgboost/core.py +156 -0
  283. maxframe/learn/contrib/xgboost/dmatrix.py +150 -0
  284. maxframe/learn/contrib/xgboost/predict.py +138 -0
  285. maxframe/learn/contrib/xgboost/regressor.py +78 -0
  286. maxframe/learn/contrib/xgboost/tests/__init__.py +13 -0
  287. maxframe/learn/contrib/xgboost/tests/test_core.py +43 -0
  288. maxframe/learn/contrib/xgboost/train.py +121 -0
  289. maxframe/learn/utils/__init__.py +15 -0
  290. maxframe/learn/utils/core.py +29 -0
  291. maxframe/lib/__init__.py +15 -0
  292. maxframe/lib/aio/__init__.py +27 -0
  293. maxframe/lib/aio/_runners.py +162 -0
  294. maxframe/lib/aio/_threads.py +35 -0
  295. maxframe/lib/aio/base.py +82 -0
  296. maxframe/lib/aio/file.py +85 -0
  297. maxframe/lib/aio/isolation.py +100 -0
  298. maxframe/lib/aio/lru.py +242 -0
  299. maxframe/lib/aio/parallelism.py +37 -0
  300. maxframe/lib/aio/tests/__init__.py +13 -0
  301. maxframe/lib/aio/tests/test_aio_file.py +55 -0
  302. maxframe/lib/compression.py +55 -0
  303. maxframe/lib/cython/__init__.py +13 -0
  304. maxframe/lib/cython/libcpp.pxd +30 -0
  305. maxframe/lib/filesystem/__init__.py +21 -0
  306. maxframe/lib/filesystem/_glob.py +173 -0
  307. maxframe/lib/filesystem/_oss_lib/__init__.py +13 -0
  308. maxframe/lib/filesystem/_oss_lib/common.py +198 -0
  309. maxframe/lib/filesystem/_oss_lib/glob.py +147 -0
  310. maxframe/lib/filesystem/_oss_lib/handle.py +156 -0
  311. maxframe/lib/filesystem/arrow.py +236 -0
  312. maxframe/lib/filesystem/base.py +263 -0
  313. maxframe/lib/filesystem/core.py +95 -0
  314. maxframe/lib/filesystem/fsmap.py +164 -0
  315. maxframe/lib/filesystem/hdfs.py +31 -0
  316. maxframe/lib/filesystem/local.py +112 -0
  317. maxframe/lib/filesystem/oss.py +157 -0
  318. maxframe/lib/filesystem/tests/__init__.py +13 -0
  319. maxframe/lib/filesystem/tests/test_filesystem.py +223 -0
  320. maxframe/lib/filesystem/tests/test_oss.py +182 -0
  321. maxframe/lib/functools_compat.py +81 -0
  322. maxframe/lib/mmh3.cpython-310-darwin.so +0 -0
  323. maxframe/lib/mmh3_src/MurmurHash3.cpp +339 -0
  324. maxframe/lib/mmh3_src/MurmurHash3.h +43 -0
  325. maxframe/lib/mmh3_src/mmh3module.cpp +387 -0
  326. maxframe/lib/sparse/__init__.py +861 -0
  327. maxframe/lib/sparse/array.py +1604 -0
  328. maxframe/lib/sparse/core.py +92 -0
  329. maxframe/lib/sparse/matrix.py +241 -0
  330. maxframe/lib/sparse/tests/__init__.py +15 -0
  331. maxframe/lib/sparse/tests/test_sparse.py +476 -0
  332. maxframe/lib/sparse/vector.py +150 -0
  333. maxframe/lib/tblib/LICENSE +20 -0
  334. maxframe/lib/tblib/__init__.py +327 -0
  335. maxframe/lib/tblib/cpython.py +83 -0
  336. maxframe/lib/tblib/decorators.py +44 -0
  337. maxframe/lib/tblib/pickling_support.py +90 -0
  338. maxframe/lib/tests/__init__.py +13 -0
  339. maxframe/lib/tests/test_wrapped_pickle.py +51 -0
  340. maxframe/lib/version.py +620 -0
  341. maxframe/lib/wrapped_pickle.py +139 -0
  342. maxframe/mixin.py +100 -0
  343. maxframe/odpsio/__init__.py +21 -0
  344. maxframe/odpsio/arrow.py +91 -0
  345. maxframe/odpsio/schema.py +364 -0
  346. maxframe/odpsio/tableio.py +322 -0
  347. maxframe/odpsio/tests/__init__.py +13 -0
  348. maxframe/odpsio/tests/test_arrow.py +88 -0
  349. maxframe/odpsio/tests/test_schema.py +297 -0
  350. maxframe/odpsio/tests/test_tableio.py +136 -0
  351. maxframe/odpsio/tests/test_volumeio.py +90 -0
  352. maxframe/odpsio/volumeio.py +95 -0
  353. maxframe/opcodes.py +590 -0
  354. maxframe/protocol.py +415 -0
  355. maxframe/remote/__init__.py +18 -0
  356. maxframe/remote/core.py +210 -0
  357. maxframe/remote/run_script.py +121 -0
  358. maxframe/serialization/__init__.py +26 -0
  359. maxframe/serialization/arrow.py +95 -0
  360. maxframe/serialization/core.cpython-310-darwin.so +0 -0
  361. maxframe/serialization/core.pxd +44 -0
  362. maxframe/serialization/core.pyi +61 -0
  363. maxframe/serialization/core.pyx +1094 -0
  364. maxframe/serialization/exception.py +86 -0
  365. maxframe/serialization/maxframe_objects.py +39 -0
  366. maxframe/serialization/numpy.py +91 -0
  367. maxframe/serialization/pandas.py +202 -0
  368. maxframe/serialization/scipy.py +71 -0
  369. maxframe/serialization/serializables/__init__.py +55 -0
  370. maxframe/serialization/serializables/core.py +262 -0
  371. maxframe/serialization/serializables/field.py +624 -0
  372. maxframe/serialization/serializables/field_type.py +589 -0
  373. maxframe/serialization/serializables/tests/__init__.py +13 -0
  374. maxframe/serialization/serializables/tests/test_field_type.py +121 -0
  375. maxframe/serialization/serializables/tests/test_serializable.py +250 -0
  376. maxframe/serialization/tests/__init__.py +13 -0
  377. maxframe/serialization/tests/test_serial.py +412 -0
  378. maxframe/session.py +1310 -0
  379. maxframe/tensor/__init__.py +183 -0
  380. maxframe/tensor/arithmetic/__init__.py +315 -0
  381. maxframe/tensor/arithmetic/abs.py +68 -0
  382. maxframe/tensor/arithmetic/absolute.py +68 -0
  383. maxframe/tensor/arithmetic/add.py +82 -0
  384. maxframe/tensor/arithmetic/angle.py +72 -0
  385. maxframe/tensor/arithmetic/arccos.py +104 -0
  386. maxframe/tensor/arithmetic/arccosh.py +91 -0
  387. maxframe/tensor/arithmetic/arcsin.py +94 -0
  388. maxframe/tensor/arithmetic/arcsinh.py +86 -0
  389. maxframe/tensor/arithmetic/arctan.py +106 -0
  390. maxframe/tensor/arithmetic/arctan2.py +128 -0
  391. maxframe/tensor/arithmetic/arctanh.py +86 -0
  392. maxframe/tensor/arithmetic/around.py +114 -0
  393. maxframe/tensor/arithmetic/bitand.py +95 -0
  394. maxframe/tensor/arithmetic/bitor.py +102 -0
  395. maxframe/tensor/arithmetic/bitxor.py +95 -0
  396. maxframe/tensor/arithmetic/cbrt.py +66 -0
  397. maxframe/tensor/arithmetic/ceil.py +71 -0
  398. maxframe/tensor/arithmetic/clip.py +165 -0
  399. maxframe/tensor/arithmetic/conj.py +74 -0
  400. maxframe/tensor/arithmetic/copysign.py +78 -0
  401. maxframe/tensor/arithmetic/core.py +544 -0
  402. maxframe/tensor/arithmetic/cos.py +85 -0
  403. maxframe/tensor/arithmetic/cosh.py +72 -0
  404. maxframe/tensor/arithmetic/deg2rad.py +72 -0
  405. maxframe/tensor/arithmetic/degrees.py +77 -0
  406. maxframe/tensor/arithmetic/divide.py +114 -0
  407. maxframe/tensor/arithmetic/equal.py +76 -0
  408. maxframe/tensor/arithmetic/exp.py +106 -0
  409. maxframe/tensor/arithmetic/exp2.py +67 -0
  410. maxframe/tensor/arithmetic/expm1.py +79 -0
  411. maxframe/tensor/arithmetic/fabs.py +74 -0
  412. maxframe/tensor/arithmetic/fix.py +69 -0
  413. maxframe/tensor/arithmetic/float_power.py +103 -0
  414. maxframe/tensor/arithmetic/floor.py +77 -0
  415. maxframe/tensor/arithmetic/floordiv.py +94 -0
  416. maxframe/tensor/arithmetic/fmax.py +105 -0
  417. maxframe/tensor/arithmetic/fmin.py +106 -0
  418. maxframe/tensor/arithmetic/fmod.py +99 -0
  419. maxframe/tensor/arithmetic/frexp.py +92 -0
  420. maxframe/tensor/arithmetic/greater.py +77 -0
  421. maxframe/tensor/arithmetic/greater_equal.py +69 -0
  422. maxframe/tensor/arithmetic/hypot.py +77 -0
  423. maxframe/tensor/arithmetic/i0.py +89 -0
  424. maxframe/tensor/arithmetic/imag.py +67 -0
  425. maxframe/tensor/arithmetic/invert.py +110 -0
  426. maxframe/tensor/arithmetic/isclose.py +115 -0
  427. maxframe/tensor/arithmetic/iscomplex.py +64 -0
  428. maxframe/tensor/arithmetic/isfinite.py +106 -0
  429. maxframe/tensor/arithmetic/isinf.py +103 -0
  430. maxframe/tensor/arithmetic/isnan.py +82 -0
  431. maxframe/tensor/arithmetic/isreal.py +63 -0
  432. maxframe/tensor/arithmetic/ldexp.py +99 -0
  433. maxframe/tensor/arithmetic/less.py +69 -0
  434. maxframe/tensor/arithmetic/less_equal.py +69 -0
  435. maxframe/tensor/arithmetic/log.py +92 -0
  436. maxframe/tensor/arithmetic/log10.py +85 -0
  437. maxframe/tensor/arithmetic/log1p.py +95 -0
  438. maxframe/tensor/arithmetic/log2.py +85 -0
  439. maxframe/tensor/arithmetic/logaddexp.py +80 -0
  440. maxframe/tensor/arithmetic/logaddexp2.py +78 -0
  441. maxframe/tensor/arithmetic/logical_and.py +81 -0
  442. maxframe/tensor/arithmetic/logical_not.py +74 -0
  443. maxframe/tensor/arithmetic/logical_or.py +82 -0
  444. maxframe/tensor/arithmetic/logical_xor.py +88 -0
  445. maxframe/tensor/arithmetic/lshift.py +82 -0
  446. maxframe/tensor/arithmetic/maximum.py +108 -0
  447. maxframe/tensor/arithmetic/minimum.py +108 -0
  448. maxframe/tensor/arithmetic/mod.py +104 -0
  449. maxframe/tensor/arithmetic/modf.py +83 -0
  450. maxframe/tensor/arithmetic/multiply.py +81 -0
  451. maxframe/tensor/arithmetic/nan_to_num.py +99 -0
  452. maxframe/tensor/arithmetic/negative.py +65 -0
  453. maxframe/tensor/arithmetic/nextafter.py +68 -0
  454. maxframe/tensor/arithmetic/not_equal.py +72 -0
  455. maxframe/tensor/arithmetic/positive.py +47 -0
  456. maxframe/tensor/arithmetic/power.py +106 -0
  457. maxframe/tensor/arithmetic/rad2deg.py +71 -0
  458. maxframe/tensor/arithmetic/radians.py +77 -0
  459. maxframe/tensor/arithmetic/real.py +70 -0
  460. maxframe/tensor/arithmetic/reciprocal.py +76 -0
  461. maxframe/tensor/arithmetic/rint.py +68 -0
  462. maxframe/tensor/arithmetic/rshift.py +81 -0
  463. maxframe/tensor/arithmetic/setimag.py +29 -0
  464. maxframe/tensor/arithmetic/setreal.py +29 -0
  465. maxframe/tensor/arithmetic/sign.py +81 -0
  466. maxframe/tensor/arithmetic/signbit.py +65 -0
  467. maxframe/tensor/arithmetic/sin.py +98 -0
  468. maxframe/tensor/arithmetic/sinc.py +102 -0
  469. maxframe/tensor/arithmetic/sinh.py +93 -0
  470. maxframe/tensor/arithmetic/spacing.py +72 -0
  471. maxframe/tensor/arithmetic/sqrt.py +81 -0
  472. maxframe/tensor/arithmetic/square.py +69 -0
  473. maxframe/tensor/arithmetic/subtract.py +81 -0
  474. maxframe/tensor/arithmetic/tan.py +88 -0
  475. maxframe/tensor/arithmetic/tanh.py +92 -0
  476. maxframe/tensor/arithmetic/tests/__init__.py +15 -0
  477. maxframe/tensor/arithmetic/tests/test_arithmetic.py +414 -0
  478. maxframe/tensor/arithmetic/truediv.py +104 -0
  479. maxframe/tensor/arithmetic/trunc.py +72 -0
  480. maxframe/tensor/arithmetic/utils.py +65 -0
  481. maxframe/tensor/array_utils.py +186 -0
  482. maxframe/tensor/base/__init__.py +34 -0
  483. maxframe/tensor/base/astype.py +119 -0
  484. maxframe/tensor/base/atleast_1d.py +74 -0
  485. maxframe/tensor/base/broadcast_to.py +89 -0
  486. maxframe/tensor/base/ravel.py +92 -0
  487. maxframe/tensor/base/tests/__init__.py +13 -0
  488. maxframe/tensor/base/tests/test_base.py +114 -0
  489. maxframe/tensor/base/transpose.py +125 -0
  490. maxframe/tensor/base/unique.py +205 -0
  491. maxframe/tensor/base/where.py +127 -0
  492. maxframe/tensor/core.py +724 -0
  493. maxframe/tensor/datasource/__init__.py +32 -0
  494. maxframe/tensor/datasource/arange.py +156 -0
  495. maxframe/tensor/datasource/array.py +415 -0
  496. maxframe/tensor/datasource/core.py +109 -0
  497. maxframe/tensor/datasource/empty.py +169 -0
  498. maxframe/tensor/datasource/from_dataframe.py +70 -0
  499. maxframe/tensor/datasource/from_dense.py +54 -0
  500. maxframe/tensor/datasource/from_sparse.py +47 -0
  501. maxframe/tensor/datasource/full.py +186 -0
  502. maxframe/tensor/datasource/ones.py +173 -0
  503. maxframe/tensor/datasource/scalar.py +40 -0
  504. maxframe/tensor/datasource/tests/__init__.py +13 -0
  505. maxframe/tensor/datasource/tests/test_datasource.py +278 -0
  506. maxframe/tensor/datasource/zeros.py +188 -0
  507. maxframe/tensor/fetch/__init__.py +15 -0
  508. maxframe/tensor/fetch/core.py +54 -0
  509. maxframe/tensor/indexing/__init__.py +47 -0
  510. maxframe/tensor/indexing/choose.py +196 -0
  511. maxframe/tensor/indexing/compress.py +124 -0
  512. maxframe/tensor/indexing/core.py +190 -0
  513. maxframe/tensor/indexing/extract.py +71 -0
  514. maxframe/tensor/indexing/fill_diagonal.py +183 -0
  515. maxframe/tensor/indexing/flatnonzero.py +60 -0
  516. maxframe/tensor/indexing/getitem.py +175 -0
  517. maxframe/tensor/indexing/nonzero.py +120 -0
  518. maxframe/tensor/indexing/setitem.py +132 -0
  519. maxframe/tensor/indexing/slice.py +29 -0
  520. maxframe/tensor/indexing/take.py +130 -0
  521. maxframe/tensor/indexing/tests/__init__.py +15 -0
  522. maxframe/tensor/indexing/tests/test_indexing.py +234 -0
  523. maxframe/tensor/indexing/unravel_index.py +103 -0
  524. maxframe/tensor/merge/__init__.py +15 -0
  525. maxframe/tensor/merge/stack.py +132 -0
  526. maxframe/tensor/merge/tests/__init__.py +13 -0
  527. maxframe/tensor/merge/tests/test_merge.py +52 -0
  528. maxframe/tensor/operators.py +123 -0
  529. maxframe/tensor/random/__init__.py +168 -0
  530. maxframe/tensor/random/beta.py +87 -0
  531. maxframe/tensor/random/binomial.py +137 -0
  532. maxframe/tensor/random/bytes.py +39 -0
  533. maxframe/tensor/random/chisquare.py +110 -0
  534. maxframe/tensor/random/choice.py +186 -0
  535. maxframe/tensor/random/core.py +234 -0
  536. maxframe/tensor/random/dirichlet.py +123 -0
  537. maxframe/tensor/random/exponential.py +94 -0
  538. maxframe/tensor/random/f.py +135 -0
  539. maxframe/tensor/random/gamma.py +128 -0
  540. maxframe/tensor/random/geometric.py +93 -0
  541. maxframe/tensor/random/gumbel.py +167 -0
  542. maxframe/tensor/random/hypergeometric.py +148 -0
  543. maxframe/tensor/random/laplace.py +133 -0
  544. maxframe/tensor/random/logistic.py +129 -0
  545. maxframe/tensor/random/lognormal.py +159 -0
  546. maxframe/tensor/random/logseries.py +122 -0
  547. maxframe/tensor/random/multinomial.py +133 -0
  548. maxframe/tensor/random/multivariate_normal.py +192 -0
  549. maxframe/tensor/random/negative_binomial.py +125 -0
  550. maxframe/tensor/random/noncentral_chisquare.py +132 -0
  551. maxframe/tensor/random/noncentral_f.py +126 -0
  552. maxframe/tensor/random/normal.py +143 -0
  553. maxframe/tensor/random/pareto.py +140 -0
  554. maxframe/tensor/random/permutation.py +104 -0
  555. maxframe/tensor/random/poisson.py +111 -0
  556. maxframe/tensor/random/power.py +142 -0
  557. maxframe/tensor/random/rand.py +82 -0
  558. maxframe/tensor/random/randint.py +121 -0
  559. maxframe/tensor/random/randn.py +96 -0
  560. maxframe/tensor/random/random_integers.py +123 -0
  561. maxframe/tensor/random/random_sample.py +86 -0
  562. maxframe/tensor/random/rayleigh.py +110 -0
  563. maxframe/tensor/random/shuffle.py +61 -0
  564. maxframe/tensor/random/standard_cauchy.py +105 -0
  565. maxframe/tensor/random/standard_exponential.py +72 -0
  566. maxframe/tensor/random/standard_gamma.py +120 -0
  567. maxframe/tensor/random/standard_normal.py +74 -0
  568. maxframe/tensor/random/standard_t.py +135 -0
  569. maxframe/tensor/random/tests/__init__.py +15 -0
  570. maxframe/tensor/random/tests/test_random.py +167 -0
  571. maxframe/tensor/random/triangular.py +119 -0
  572. maxframe/tensor/random/uniform.py +131 -0
  573. maxframe/tensor/random/vonmises.py +131 -0
  574. maxframe/tensor/random/wald.py +114 -0
  575. maxframe/tensor/random/weibull.py +140 -0
  576. maxframe/tensor/random/zipf.py +122 -0
  577. maxframe/tensor/rechunk/__init__.py +26 -0
  578. maxframe/tensor/rechunk/rechunk.py +43 -0
  579. maxframe/tensor/reduction/__init__.py +66 -0
  580. maxframe/tensor/reduction/all.py +103 -0
  581. maxframe/tensor/reduction/allclose.py +88 -0
  582. maxframe/tensor/reduction/any.py +105 -0
  583. maxframe/tensor/reduction/argmax.py +103 -0
  584. maxframe/tensor/reduction/argmin.py +103 -0
  585. maxframe/tensor/reduction/array_equal.py +64 -0
  586. maxframe/tensor/reduction/core.py +168 -0
  587. maxframe/tensor/reduction/count_nonzero.py +81 -0
  588. maxframe/tensor/reduction/cumprod.py +97 -0
  589. maxframe/tensor/reduction/cumsum.py +101 -0
  590. maxframe/tensor/reduction/max.py +120 -0
  591. maxframe/tensor/reduction/mean.py +123 -0
  592. maxframe/tensor/reduction/min.py +120 -0
  593. maxframe/tensor/reduction/nanargmax.py +82 -0
  594. maxframe/tensor/reduction/nanargmin.py +76 -0
  595. maxframe/tensor/reduction/nancumprod.py +91 -0
  596. maxframe/tensor/reduction/nancumsum.py +94 -0
  597. maxframe/tensor/reduction/nanmax.py +111 -0
  598. maxframe/tensor/reduction/nanmean.py +106 -0
  599. maxframe/tensor/reduction/nanmin.py +111 -0
  600. maxframe/tensor/reduction/nanprod.py +94 -0
  601. maxframe/tensor/reduction/nanstd.py +126 -0
  602. maxframe/tensor/reduction/nansum.py +115 -0
  603. maxframe/tensor/reduction/nanvar.py +149 -0
  604. maxframe/tensor/reduction/prod.py +130 -0
  605. maxframe/tensor/reduction/std.py +134 -0
  606. maxframe/tensor/reduction/sum.py +125 -0
  607. maxframe/tensor/reduction/tests/__init__.py +13 -0
  608. maxframe/tensor/reduction/tests/test_reduction.py +181 -0
  609. maxframe/tensor/reduction/var.py +176 -0
  610. maxframe/tensor/reshape/__init__.py +17 -0
  611. maxframe/tensor/reshape/reshape.py +188 -0
  612. maxframe/tensor/reshape/tests/__init__.py +15 -0
  613. maxframe/tensor/reshape/tests/test_reshape.py +37 -0
  614. maxframe/tensor/statistics/__init__.py +13 -0
  615. maxframe/tensor/statistics/percentile.py +175 -0
  616. maxframe/tensor/statistics/quantile.py +288 -0
  617. maxframe/tensor/ufunc/__init__.py +26 -0
  618. maxframe/tensor/ufunc/ufunc.py +200 -0
  619. maxframe/tensor/utils.py +718 -0
  620. maxframe/tests/__init__.py +13 -0
  621. maxframe/tests/test_codegen.py +69 -0
  622. maxframe/tests/test_protocol.py +144 -0
  623. maxframe/tests/test_utils.py +376 -0
  624. maxframe/tests/utils.py +164 -0
  625. maxframe/typing_.py +37 -0
  626. maxframe/udf.py +134 -0
  627. maxframe/utils.py +1114 -0
  628. maxframe-0.1.0b5.dist-info/METADATA +104 -0
  629. maxframe-0.1.0b5.dist-info/RECORD +647 -0
  630. maxframe-0.1.0b5.dist-info/WHEEL +5 -0
  631. maxframe-0.1.0b5.dist-info/top_level.txt +3 -0
  632. maxframe_client/__init__.py +17 -0
  633. maxframe_client/clients/__init__.py +13 -0
  634. maxframe_client/clients/framedriver.py +118 -0
  635. maxframe_client/clients/spe.py +104 -0
  636. maxframe_client/conftest.py +15 -0
  637. maxframe_client/fetcher.py +264 -0
  638. maxframe_client/session/__init__.py +22 -0
  639. maxframe_client/session/consts.py +36 -0
  640. maxframe_client/session/graph.py +119 -0
  641. maxframe_client/session/odps.py +482 -0
  642. maxframe_client/session/task.py +280 -0
  643. maxframe_client/session/tests/__init__.py +13 -0
  644. maxframe_client/session/tests/test_task.py +85 -0
  645. maxframe_client/tests/__init__.py +13 -0
  646. maxframe_client/tests/test_fetcher.py +89 -0
  647. maxframe_client/tests/test_session.py +255 -0
@@ -0,0 +1,364 @@
1
+ # Copyright 1999-2024 Alibaba Group Holding Ltd.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ import string
16
+ from collections import defaultdict
17
+ from typing import Any, Dict, Tuple
18
+
19
+ import pandas as pd
20
+ import pyarrow as pa
21
+ from odps import types as odps_types
22
+ from pandas.api import types as pd_types
23
+
24
+ from ..core import TILEABLE_TYPE, OutputType
25
+ from ..protocol import DataFrameTableMeta
26
+ from ..tensor.core import TENSOR_TYPE
27
+
28
+ _TEMP_TABLE_PREFIX = "tmp_mf_"
29
+
30
+ _arrow_to_odps_types = {
31
+ pa.string(): odps_types.string,
32
+ pa.binary(): odps_types.binary,
33
+ pa.int8(): odps_types.tinyint,
34
+ pa.int16(): odps_types.smallint,
35
+ pa.int32(): odps_types.int_,
36
+ pa.int64(): odps_types.bigint,
37
+ pa.bool_(): odps_types.boolean,
38
+ pa.float32(): odps_types.float_,
39
+ pa.float64(): odps_types.double,
40
+ pa.date32(): odps_types.date,
41
+ pa.timestamp("ms"): odps_types.datetime,
42
+ pa.timestamp("ns"): odps_types.timestamp,
43
+ }
44
+
45
+ _odps_type_to_arrow = {
46
+ odps_types.string: pa.string(),
47
+ odps_types.binary: pa.binary(),
48
+ odps_types.tinyint: pa.int8(),
49
+ odps_types.smallint: pa.int16(),
50
+ odps_types.int_: pa.int32(),
51
+ odps_types.bigint: pa.int64(),
52
+ odps_types.boolean: pa.bool_(),
53
+ odps_types.float_: pa.float32(),
54
+ odps_types.double: pa.float64(),
55
+ odps_types.date: pa.date32(),
56
+ odps_types.datetime: pa.timestamp("ms"),
57
+ odps_types.timestamp: pa.timestamp("ns"),
58
+ }
59
+
60
+
61
+ def arrow_type_to_odps_type(
62
+ arrow_type: pa.DataType, col_name: str, unknown_as_string: bool = False
63
+ ) -> odps_types.DataType:
64
+ if arrow_type in _arrow_to_odps_types:
65
+ return _arrow_to_odps_types[arrow_type]
66
+ elif isinstance(arrow_type, pa.ListType):
67
+ return odps_types.Array(
68
+ arrow_type_to_odps_type(arrow_type.value_type, col_name, unknown_as_string)
69
+ )
70
+ elif isinstance(arrow_type, pa.MapType):
71
+ return odps_types.Map(
72
+ arrow_type_to_odps_type(arrow_type.key_type, col_name, unknown_as_string),
73
+ arrow_type_to_odps_type(arrow_type.item_type, col_name, unknown_as_string),
74
+ )
75
+ elif isinstance(arrow_type, pa.StructType):
76
+ type_dict = {}
77
+ for idx in range(arrow_type.num_fields):
78
+ field = arrow_type[idx]
79
+ type_dict[field.name] = arrow_type_to_odps_type(
80
+ field.type, col_name, unknown_as_string
81
+ )
82
+ return odps_types.Struct(type_dict)
83
+ elif isinstance(arrow_type, pa.Decimal128Type):
84
+ return odps_types.Decimal(arrow_type.precision, arrow_type.scale)
85
+
86
+ if unknown_as_string:
87
+ return odps_types.string
88
+ else:
89
+ raise TypeError(
90
+ "Unknown type {}, column name is {},"
91
+ "specify `unknown_as_string=True` "
92
+ "or `as_type` to set column dtype".format(arrow_type, col_name)
93
+ )
94
+
95
+
96
+ def arrow_schema_to_odps_schema(
97
+ arrow_schema: pa.Schema, unknown_as_string: bool = False
98
+ ) -> odps_types.OdpsSchema:
99
+ odps_cols = []
100
+ for col_name, col_type in zip(arrow_schema.names, arrow_schema.types):
101
+ col_odps_type = arrow_type_to_odps_type(
102
+ col_type, col_name, unknown_as_string=unknown_as_string
103
+ )
104
+ odps_cols.append(odps_types.Column(col_name, col_odps_type))
105
+
106
+ return odps_types.OdpsSchema(odps_cols)
107
+
108
+
109
+ def odps_type_to_arrow_type(
110
+ odps_type: odps_types.DataType, col_name: str
111
+ ) -> pa.DataType:
112
+ if odps_type in _odps_type_to_arrow:
113
+ col_type = _odps_type_to_arrow[odps_type]
114
+ else:
115
+ if isinstance(odps_type, odps_types.Array):
116
+ col_type = pa.list_(odps_type_to_arrow_type(odps_type.value_type, col_name))
117
+ elif isinstance(odps_type, odps_types.Map):
118
+ col_type = pa.map_(
119
+ odps_type_to_arrow_type(odps_type.key_type, col_name),
120
+ odps_type_to_arrow_type(odps_type.value_type, col_name),
121
+ )
122
+ elif isinstance(odps_type, odps_types.Struct):
123
+ fields = [
124
+ (k, odps_type_to_arrow_type(v, col_name))
125
+ for k, v in odps_type.field_types.items()
126
+ ]
127
+ col_type = pa.struct(fields)
128
+ elif isinstance(odps_type, odps_types.Decimal):
129
+ col_type = pa.decimal128(
130
+ odps_type.precision or odps_types.Decimal._max_precision,
131
+ odps_type.scale or odps_types.Decimal._max_scale,
132
+ )
133
+ elif isinstance(odps_type, (odps_types.Varchar, odps_types.Char)):
134
+ col_type = pa.string()
135
+ else:
136
+ raise TypeError(
137
+ "Unsupported type {}, column name is {}".format(odps_type, col_name)
138
+ )
139
+ return col_type
140
+
141
+
142
+ def odps_schema_to_arrow_schema(
143
+ odps_schema: odps_types.OdpsSchema, with_partitions: bool = False
144
+ ) -> pa.Schema:
145
+ arrow_schema = []
146
+ cols = odps_schema.columns if with_partitions else odps_schema.simple_columns
147
+ for col in cols:
148
+ col_name = col.name
149
+ col_type = odps_type_to_arrow_type(col.type, col_name)
150
+ arrow_schema.append(pa.field(col_name, col_type))
151
+
152
+ return pa.schema(arrow_schema)
153
+
154
+
155
+ def odps_schema_to_pandas_dtypes(
156
+ odps_schema: odps_types.OdpsSchema, with_partitions: bool = False
157
+ ) -> pd.Series:
158
+ arrow_schema = odps_schema_to_arrow_schema(
159
+ odps_schema, with_partitions=with_partitions
160
+ )
161
+ return arrow_schema.empty_table().to_pandas().dtypes
162
+
163
+
164
+ def _is_scalar_object(df_obj: Any) -> bool:
165
+ return (
166
+ isinstance(df_obj, TENSOR_TYPE) and df_obj.shape == ()
167
+ ) or pd_types.is_scalar(df_obj)
168
+
169
+
170
+ def _scalar_as_index(df_obj: Any) -> pd.Index:
171
+ if isinstance(df_obj, TILEABLE_TYPE):
172
+ return pd.Index([], dtype=df_obj.dtype)
173
+ else:
174
+ return pd.Index([df_obj])[:0]
175
+
176
+
177
+ def pandas_to_odps_schema(
178
+ df_obj: Any,
179
+ unknown_as_string: bool = False,
180
+ ignore_index=False,
181
+ ) -> Tuple[odps_types.OdpsSchema, DataFrameTableMeta]:
182
+ from .. import dataframe as md
183
+ from .arrow import pandas_to_arrow
184
+
185
+ if _is_scalar_object(df_obj):
186
+ empty_index = None
187
+ elif hasattr(df_obj, "index_value"):
188
+ empty_index = df_obj.index_value.to_pandas()[:0]
189
+ elif not isinstance(df_obj, pd.Index):
190
+ empty_index = df_obj.index[:0]
191
+ else:
192
+ empty_index = df_obj[:0]
193
+
194
+ if hasattr(df_obj, "columns_value"):
195
+ empty_columns = df_obj.dtypes.index
196
+ elif hasattr(df_obj, "columns"):
197
+ empty_columns = df_obj.columns
198
+ else:
199
+ empty_columns = None
200
+
201
+ if isinstance(df_obj, (md.DataFrame, pd.DataFrame)):
202
+ empty_df_obj = pd.DataFrame(
203
+ [], columns=empty_columns, index=empty_index
204
+ ).astype(df_obj.dtypes)
205
+ elif isinstance(df_obj, (md.Series, pd.Series)):
206
+ empty_df_obj = pd.Series([], name=df_obj.name, index=empty_index).astype(
207
+ df_obj.dtype
208
+ )
209
+ elif isinstance(df_obj, (md.Index, pd.Index)):
210
+ empty_df_obj = empty_index
211
+ else:
212
+ empty_df_obj = df_obj
213
+
214
+ arrow_data, table_meta = pandas_to_arrow(empty_df_obj, ignore_index=ignore_index)
215
+ return (
216
+ arrow_schema_to_odps_schema(
217
+ arrow_data.schema, unknown_as_string=unknown_as_string
218
+ ),
219
+ table_meta,
220
+ )
221
+
222
+
223
+ def build_table_column_name(
224
+ col_idx: int, pd_col_name: Any, records: Dict[str, str]
225
+ ) -> str:
226
+ """
227
+ Convert column name to MaxCompute acceptable names
228
+
229
+ Parameters
230
+ ----------
231
+ col_idx:
232
+ index of the column
233
+ pd_col_name:
234
+ column name in pandas
235
+ records:
236
+ record for existing columns
237
+
238
+ Returns
239
+ -------
240
+ converted column name
241
+ """
242
+
243
+ def _is_col_name_legal(name: str):
244
+ if len(name) < 1 or len(name) > 128:
245
+ return False
246
+ if name[0] not in string.ascii_letters and name[0] != "_":
247
+ return False
248
+ for ch in name:
249
+ if ch not in string.digits and ch not in string.ascii_letters and ch != "_":
250
+ return False
251
+ return True
252
+
253
+ try:
254
+ return records[pd_col_name]
255
+ except KeyError:
256
+ pass
257
+
258
+ if isinstance(pd_col_name, str):
259
+ col_name = pd_col_name
260
+ elif isinstance(pd_col_name, tuple):
261
+ col_name = "_".join(str(x) for x in pd_col_name)
262
+ else:
263
+ col_name = str(pd_col_name)
264
+
265
+ col_name = col_name.lower()
266
+ if not _is_col_name_legal(col_name):
267
+ col_name = f"_column_{col_idx}"
268
+
269
+ records[pd_col_name] = col_name
270
+ return col_name
271
+
272
+
273
+ def build_dataframe_table_meta(
274
+ df_obj: Any, ignore_index: bool = False
275
+ ) -> DataFrameTableMeta:
276
+ from .. import dataframe as md
277
+
278
+ col_to_count = defaultdict(lambda: 0)
279
+ col_to_idx = defaultdict(lambda: 0)
280
+ pd_col_to_col_name = dict()
281
+ if isinstance(df_obj, (md.DataFrame, pd.DataFrame)):
282
+ obj_type = OutputType.dataframe
283
+ elif isinstance(df_obj, (md.Series, pd.Series)):
284
+ obj_type = OutputType.series
285
+ elif isinstance(df_obj, (md.Index, pd.Index)):
286
+ obj_type = OutputType.index
287
+ elif _is_scalar_object(df_obj):
288
+ obj_type = OutputType.scalar
289
+ else: # pragma: no cover
290
+ raise TypeError(f"Cannot accept type {type(df_obj)}")
291
+
292
+ assert not ignore_index or obj_type in (OutputType.dataframe, OutputType.series)
293
+
294
+ if obj_type == OutputType.scalar:
295
+ pd_dtypes = pd.Series([])
296
+ column_index_names = []
297
+ index_obj = _scalar_as_index(df_obj)
298
+ elif obj_type == OutputType.index:
299
+ pd_dtypes = pd.Series([])
300
+ column_index_names = []
301
+ index_obj = df_obj
302
+ elif obj_type == OutputType.series:
303
+ pd_dtypes = pd.Series([df_obj.dtype], index=[df_obj.name])
304
+ column_index_names = [None]
305
+ index_obj = df_obj.index
306
+ else:
307
+ pd_dtypes = df_obj.dtypes
308
+ column_index_names = list(pd_dtypes.index.names)
309
+ index_obj = df_obj.index
310
+
311
+ if isinstance(df_obj, TILEABLE_TYPE):
312
+ table_name = _TEMP_TABLE_PREFIX + str(df_obj.key)
313
+ else:
314
+ table_name = None
315
+
316
+ sql_columns = [None] * len(pd_dtypes)
317
+ pd_col_names = pd_dtypes.index
318
+ if obj_type == OutputType.series and df_obj.name is None:
319
+ # use special table column name for series
320
+ pd_col_names = ["_data"]
321
+ for idx, col in enumerate(pd_col_names):
322
+ sql_columns[idx] = col_name = build_table_column_name(
323
+ idx, col, pd_col_to_col_name
324
+ )
325
+ col_to_count[col_name] += 1
326
+
327
+ final_sql_columns = []
328
+ for col in sql_columns:
329
+ if col_to_count[col] > 1:
330
+ col_name = f"{col}_{col_to_idx[col]}"
331
+ col_to_idx[col] += 1
332
+ while col_name in col_to_count:
333
+ col_name = f"{col}_{col_to_idx[col]}"
334
+ col_to_idx[col] += 1
335
+ final_sql_columns.append(col_name)
336
+ else:
337
+ final_sql_columns.append(col)
338
+
339
+ if hasattr(index_obj, "index_value"):
340
+ pd_index_val = index_obj.index_value.to_pandas()
341
+ else:
342
+ pd_index_val = index_obj
343
+
344
+ if hasattr(pd_index_val, "dtypes"):
345
+ index_dtypes = pd.Series(pd_index_val.dtypes.values, index=pd_index_val.names)
346
+ else:
347
+ index_dtypes = pd.Series([pd_index_val.dtype], index=pd_index_val.names)
348
+
349
+ if ignore_index:
350
+ table_index_column_names = []
351
+ pd_index_dtypes = pd.Series([], index=[])
352
+ else:
353
+ table_index_column_names = [f"_idx_{i}" for i in range(len(index_obj.names))]
354
+ pd_index_dtypes = index_dtypes
355
+
356
+ return DataFrameTableMeta(
357
+ table_name=table_name,
358
+ type=obj_type,
359
+ table_column_names=final_sql_columns,
360
+ table_index_column_names=table_index_column_names,
361
+ pd_column_dtypes=pd_dtypes,
362
+ pd_column_level_names=column_index_names,
363
+ pd_index_dtypes=pd_index_dtypes,
364
+ )
@@ -0,0 +1,322 @@
1
+ # Copyright 1999-2024 Alibaba Group Holding Ltd.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ import os
16
+ import time
17
+ from abc import ABC, abstractmethod
18
+ from contextlib import contextmanager
19
+ from typing import List, Optional, Union
20
+
21
+ import pyarrow as pa
22
+ from odps import ODPS
23
+ from odps.apis.storage_api import (
24
+ StorageApiArrowClient,
25
+ TableBatchScanResponse,
26
+ TableBatchWriteResponse,
27
+ )
28
+ from odps.types import PartitionSpec
29
+
30
+ from ..env import ODPS_STORAGE_API_ENDPOINT
31
+ from .schema import odps_schema_to_arrow_schema
32
+
33
+ PartitionsType = Union[List[str], str, None]
34
+
35
+ _DEFAULT_ROW_BATCH_SIZE = 4096
36
+
37
+
38
+ class MCTableIO(ABC):
39
+ def __init__(self, odps: ODPS):
40
+ self._odps = odps
41
+
42
+ @abstractmethod
43
+ def open_reader(
44
+ self,
45
+ full_table_name: str,
46
+ partitions: PartitionsType = None,
47
+ columns: Optional[List[str]] = None,
48
+ start: Optional[int] = None,
49
+ count: Optional[int] = None,
50
+ ):
51
+ raise NotImplementedError
52
+
53
+ @abstractmethod
54
+ def open_writer(
55
+ self,
56
+ full_table_name: str,
57
+ partition: Optional[str] = None,
58
+ overwrite: bool = True,
59
+ ):
60
+ raise NotImplementedError
61
+
62
+
63
+ class HaloTableArrowReader:
64
+ def __init__(
65
+ self,
66
+ client: StorageApiArrowClient,
67
+ scan_info: TableBatchScanResponse,
68
+ schema: pa.Schema,
69
+ start: Optional[int] = None,
70
+ count: Optional[int] = None,
71
+ row_batch_size: Optional[int] = None,
72
+ ):
73
+ self._client = client
74
+ self._scan_info = scan_info
75
+
76
+ self._cur_split_id = -1
77
+ self._cur_reader = None
78
+
79
+ self._schema = schema
80
+
81
+ self._start = start
82
+ self._count = count
83
+ self._cursor = 0
84
+ self._row_batch_size = row_batch_size
85
+
86
+ @property
87
+ def count(self) -> int:
88
+ return self._count
89
+
90
+ def _open_next_reader(self):
91
+ from odps.apis.storage_api import ReadRowsRequest
92
+
93
+ if 0 <= self._scan_info.split_count <= self._cur_split_id + 1:
94
+ # scan by split
95
+ self._cur_reader = None
96
+ return
97
+ elif self._count is not None and self._cursor >= self._count:
98
+ # scan by range
99
+ self._cur_reader = None
100
+ return
101
+
102
+ read_rows_kw = {}
103
+ if self._start is not None:
104
+ read_rows_kw["row_index"] = self._start + self._cursor
105
+ read_rows_kw["row_count"] = min(
106
+ self._row_batch_size, self._count - self._cursor
107
+ )
108
+ self._cursor = min(self._count, self._cursor + self._row_batch_size)
109
+
110
+ req = ReadRowsRequest(
111
+ session_id=self._scan_info.session_id,
112
+ split_index=self._cur_split_id + 1,
113
+ **read_rows_kw,
114
+ )
115
+ self._cur_reader = self._client.read_rows_arrow(req)
116
+ self._cur_split_id += 1
117
+
118
+ def read(self):
119
+ if self._cur_reader is None:
120
+ self._open_next_reader()
121
+ if self._cur_reader is None:
122
+ return None
123
+ while self._cur_reader is not None:
124
+ batch = self._cur_reader.read()
125
+ if batch is not None:
126
+ return batch
127
+ self._open_next_reader()
128
+ return None
129
+
130
+ def read_all(self) -> pa.Table:
131
+ batches = []
132
+ while True:
133
+ batch = self.read()
134
+ if batch is None:
135
+ break
136
+ batches.append(batch)
137
+ if not batches:
138
+ return self._schema.empty_table()
139
+ return pa.Table.from_batches(batches)
140
+
141
+
142
+ class HaloTableArrowWriter:
143
+ def __init__(
144
+ self, client: StorageApiArrowClient, write_info: TableBatchWriteResponse
145
+ ):
146
+ self._client = client
147
+ self._write_info = write_info
148
+
149
+ self._writer = None
150
+
151
+ def open(self):
152
+ from odps.apis.storage_api import WriteRowsRequest
153
+
154
+ self._writer = self._client.write_rows_arrow(
155
+ WriteRowsRequest(self._write_info.session_id)
156
+ )
157
+
158
+ def write(self, batch):
159
+ if isinstance(batch, pa.Table):
160
+ for b in batch.to_batches():
161
+ self._writer.write(b)
162
+ else:
163
+ self._writer.write(batch)
164
+
165
+ def close(self):
166
+ commit_msg, is_success = self._writer.finish()
167
+ if not is_success:
168
+ raise IOError(commit_msg)
169
+ return commit_msg
170
+
171
+
172
+ class HaloTableIO(MCTableIO):
173
+ _storage_api_endpoint = os.getenv(ODPS_STORAGE_API_ENDPOINT)
174
+
175
+ @staticmethod
176
+ def _convert_partitions(partitions: PartitionsType) -> Optional[List[str]]:
177
+ if partitions is None:
178
+ return []
179
+ elif isinstance(partitions, (str, PartitionSpec)):
180
+ partitions = [partitions]
181
+ return [
182
+ "/".join(f"{k}={v}" for k, v in PartitionSpec(pt).items())
183
+ for pt in partitions
184
+ ]
185
+
186
+ def get_table_record_count(
187
+ self, full_table_name: str, partitions: PartitionsType = None
188
+ ):
189
+ from odps.apis.storage_api import SplitOptions, TableBatchScanRequest
190
+
191
+ table = self._odps.get_table(full_table_name)
192
+ client = StorageApiArrowClient(
193
+ self._odps, table, rest_endpoint=self._storage_api_endpoint
194
+ )
195
+
196
+ split_option = SplitOptions.SplitMode.SIZE
197
+
198
+ scan_kw = {
199
+ "required_partitions": self._convert_partitions(partitions),
200
+ "split_options": SplitOptions.get_default_options(split_option),
201
+ }
202
+
203
+ # todo add more options for partition column handling
204
+ req = TableBatchScanRequest(**scan_kw)
205
+ resp = client.create_read_session(req)
206
+ return resp.record_count
207
+
208
+ @contextmanager
209
+ def open_reader(
210
+ self,
211
+ full_table_name: str,
212
+ partitions: PartitionsType = None,
213
+ columns: Optional[List[str]] = None,
214
+ partition_columns: Union[None, bool, List[str]] = None,
215
+ start: Optional[int] = None,
216
+ stop: Optional[int] = None,
217
+ reverse_range: bool = False,
218
+ row_batch_size: int = _DEFAULT_ROW_BATCH_SIZE,
219
+ ):
220
+ from odps.apis.storage_api import (
221
+ SessionRequest,
222
+ SplitOptions,
223
+ Status,
224
+ TableBatchScanRequest,
225
+ )
226
+
227
+ table = self._odps.get_table(full_table_name)
228
+ client = StorageApiArrowClient(
229
+ self._odps, table, rest_endpoint=self._storage_api_endpoint
230
+ )
231
+
232
+ split_option = SplitOptions.SplitMode.SIZE
233
+ if start is not None or stop is not None:
234
+ split_option = SplitOptions.SplitMode.ROW_OFFSET
235
+
236
+ scan_kw = {
237
+ "required_partitions": self._convert_partitions(partitions),
238
+ "split_options": SplitOptions.get_default_options(split_option),
239
+ }
240
+ columns = columns or [c.name for c in table.table_schema.simple_columns]
241
+ scan_kw["required_data_columns"] = columns
242
+ if partition_columns is True:
243
+ scan_kw["required_partition_columns"] = [
244
+ c.name for c in table.table_schema.partitions
245
+ ]
246
+ else:
247
+ scan_kw["required_partition_columns"] = partition_columns
248
+
249
+ # todo add more options for partition column handling
250
+ req = TableBatchScanRequest(**scan_kw)
251
+ resp = client.create_read_session(req)
252
+
253
+ session_id = resp.session_id
254
+ status = resp.status
255
+ while status == Status.WAIT:
256
+ resp = client.get_read_session(SessionRequest(session_id))
257
+ status = resp.status
258
+ time.sleep(1.0)
259
+
260
+ assert status == Status.OK
261
+
262
+ count = None
263
+ if start is not None or stop is not None:
264
+ if reverse_range:
265
+ start = start if start is not None else resp.record_count - 1
266
+ stop = stop if stop is not None else -1
267
+ else:
268
+ start = start if start is not None else 0
269
+ stop = stop if stop is not None else resp.record_count
270
+ start = start if start >= 0 else resp.record_count + start
271
+ stop = stop if stop >= 0 else resp.record_count + stop
272
+ if reverse_range:
273
+ count = start - stop
274
+ start = stop + 1
275
+ else:
276
+ count = stop - start
277
+
278
+ yield HaloTableArrowReader(
279
+ client,
280
+ resp,
281
+ schema=odps_schema_to_arrow_schema(table.table_schema),
282
+ start=start,
283
+ count=count,
284
+ row_batch_size=row_batch_size,
285
+ )
286
+
287
+ @contextmanager
288
+ def open_writer(
289
+ self,
290
+ full_table_name: str,
291
+ partition: Optional[str] = None,
292
+ overwrite: bool = True,
293
+ ):
294
+ from odps.apis.storage_api import (
295
+ SessionRequest,
296
+ SessionStatus,
297
+ TableBatchWriteRequest,
298
+ )
299
+
300
+ table = self._odps.get_table(full_table_name)
301
+ client = StorageApiArrowClient(
302
+ self._odps, table, rest_endpoint=self._storage_api_endpoint
303
+ )
304
+
305
+ part_strs = self._convert_partitions(partition)
306
+ part_str = part_strs[0] if part_strs else None
307
+ req = TableBatchWriteRequest(partition_spec=part_str, overwrite=overwrite)
308
+ resp = client.create_write_session(req)
309
+
310
+ session_id = resp.session_id
311
+ writer = HaloTableArrowWriter(client, resp)
312
+ writer.open()
313
+
314
+ yield writer
315
+
316
+ commit_msg = writer.close()
317
+ resp = client.commit_write_session(
318
+ SessionRequest(session_id=session_id), [commit_msg]
319
+ )
320
+ while resp.session_status == SessionStatus.COMMITTING:
321
+ resp = client.get_write_session(SessionRequest(session_id=session_id))
322
+ assert resp.session_status == SessionStatus.COMMITTED
@@ -0,0 +1,13 @@
1
+ # Copyright 1999-2024 Alibaba Group Holding Ltd.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.