maxframe 0.1.0b5__cp311-cp311-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of maxframe might be problematic. Click here for more details.

Files changed (647) hide show
  1. maxframe/__init__.py +32 -0
  2. maxframe/_utils.cpython-311-darwin.so +0 -0
  3. maxframe/_utils.pxd +33 -0
  4. maxframe/_utils.pyx +547 -0
  5. maxframe/codegen.py +528 -0
  6. maxframe/config/__init__.py +15 -0
  7. maxframe/config/config.py +443 -0
  8. maxframe/config/tests/__init__.py +13 -0
  9. maxframe/config/tests/test_config.py +103 -0
  10. maxframe/config/tests/test_validators.py +34 -0
  11. maxframe/config/validators.py +57 -0
  12. maxframe/conftest.py +139 -0
  13. maxframe/core/__init__.py +65 -0
  14. maxframe/core/base.py +156 -0
  15. maxframe/core/entity/__init__.py +44 -0
  16. maxframe/core/entity/chunks.py +68 -0
  17. maxframe/core/entity/core.py +152 -0
  18. maxframe/core/entity/executable.py +337 -0
  19. maxframe/core/entity/fuse.py +73 -0
  20. maxframe/core/entity/objects.py +100 -0
  21. maxframe/core/entity/output_types.py +90 -0
  22. maxframe/core/entity/tileables.py +438 -0
  23. maxframe/core/entity/utils.py +24 -0
  24. maxframe/core/graph/__init__.py +17 -0
  25. maxframe/core/graph/builder/__init__.py +16 -0
  26. maxframe/core/graph/builder/base.py +86 -0
  27. maxframe/core/graph/builder/chunk.py +430 -0
  28. maxframe/core/graph/builder/tileable.py +34 -0
  29. maxframe/core/graph/builder/utils.py +41 -0
  30. maxframe/core/graph/core.cpython-311-darwin.so +0 -0
  31. maxframe/core/graph/core.pyx +467 -0
  32. maxframe/core/graph/entity.py +171 -0
  33. maxframe/core/graph/tests/__init__.py +13 -0
  34. maxframe/core/graph/tests/test_graph.py +205 -0
  35. maxframe/core/mode.py +96 -0
  36. maxframe/core/operator/__init__.py +34 -0
  37. maxframe/core/operator/base.py +450 -0
  38. maxframe/core/operator/core.py +276 -0
  39. maxframe/core/operator/fetch.py +53 -0
  40. maxframe/core/operator/fuse.py +29 -0
  41. maxframe/core/operator/objects.py +72 -0
  42. maxframe/core/operator/shuffle.py +111 -0
  43. maxframe/core/operator/tests/__init__.py +13 -0
  44. maxframe/core/operator/tests/test_core.py +64 -0
  45. maxframe/core/tests/__init__.py +13 -0
  46. maxframe/core/tests/test_mode.py +75 -0
  47. maxframe/dataframe/__init__.py +81 -0
  48. maxframe/dataframe/arithmetic/__init__.py +359 -0
  49. maxframe/dataframe/arithmetic/abs.py +33 -0
  50. maxframe/dataframe/arithmetic/add.py +60 -0
  51. maxframe/dataframe/arithmetic/arccos.py +28 -0
  52. maxframe/dataframe/arithmetic/arccosh.py +28 -0
  53. maxframe/dataframe/arithmetic/arcsin.py +28 -0
  54. maxframe/dataframe/arithmetic/arcsinh.py +28 -0
  55. maxframe/dataframe/arithmetic/arctan.py +28 -0
  56. maxframe/dataframe/arithmetic/arctanh.py +28 -0
  57. maxframe/dataframe/arithmetic/around.py +152 -0
  58. maxframe/dataframe/arithmetic/bitwise_and.py +46 -0
  59. maxframe/dataframe/arithmetic/bitwise_or.py +50 -0
  60. maxframe/dataframe/arithmetic/bitwise_xor.py +46 -0
  61. maxframe/dataframe/arithmetic/ceil.py +28 -0
  62. maxframe/dataframe/arithmetic/core.py +342 -0
  63. maxframe/dataframe/arithmetic/cos.py +28 -0
  64. maxframe/dataframe/arithmetic/cosh.py +28 -0
  65. maxframe/dataframe/arithmetic/degrees.py +28 -0
  66. maxframe/dataframe/arithmetic/docstring.py +442 -0
  67. maxframe/dataframe/arithmetic/equal.py +56 -0
  68. maxframe/dataframe/arithmetic/exp.py +28 -0
  69. maxframe/dataframe/arithmetic/exp2.py +28 -0
  70. maxframe/dataframe/arithmetic/expm1.py +28 -0
  71. maxframe/dataframe/arithmetic/floor.py +28 -0
  72. maxframe/dataframe/arithmetic/floordiv.py +64 -0
  73. maxframe/dataframe/arithmetic/greater.py +57 -0
  74. maxframe/dataframe/arithmetic/greater_equal.py +57 -0
  75. maxframe/dataframe/arithmetic/invert.py +33 -0
  76. maxframe/dataframe/arithmetic/is_ufuncs.py +62 -0
  77. maxframe/dataframe/arithmetic/less.py +57 -0
  78. maxframe/dataframe/arithmetic/less_equal.py +57 -0
  79. maxframe/dataframe/arithmetic/log.py +28 -0
  80. maxframe/dataframe/arithmetic/log10.py +28 -0
  81. maxframe/dataframe/arithmetic/log2.py +28 -0
  82. maxframe/dataframe/arithmetic/mod.py +60 -0
  83. maxframe/dataframe/arithmetic/multiply.py +60 -0
  84. maxframe/dataframe/arithmetic/negative.py +33 -0
  85. maxframe/dataframe/arithmetic/not_equal.py +56 -0
  86. maxframe/dataframe/arithmetic/power.py +68 -0
  87. maxframe/dataframe/arithmetic/radians.py +28 -0
  88. maxframe/dataframe/arithmetic/sin.py +28 -0
  89. maxframe/dataframe/arithmetic/sinh.py +28 -0
  90. maxframe/dataframe/arithmetic/sqrt.py +28 -0
  91. maxframe/dataframe/arithmetic/subtract.py +64 -0
  92. maxframe/dataframe/arithmetic/tan.py +28 -0
  93. maxframe/dataframe/arithmetic/tanh.py +28 -0
  94. maxframe/dataframe/arithmetic/tests/__init__.py +13 -0
  95. maxframe/dataframe/arithmetic/tests/test_arithmetic.py +695 -0
  96. maxframe/dataframe/arithmetic/truediv.py +64 -0
  97. maxframe/dataframe/arithmetic/trunc.py +28 -0
  98. maxframe/dataframe/arrays.py +864 -0
  99. maxframe/dataframe/core.py +2417 -0
  100. maxframe/dataframe/datasource/__init__.py +15 -0
  101. maxframe/dataframe/datasource/core.py +81 -0
  102. maxframe/dataframe/datasource/dataframe.py +59 -0
  103. maxframe/dataframe/datasource/date_range.py +504 -0
  104. maxframe/dataframe/datasource/from_index.py +54 -0
  105. maxframe/dataframe/datasource/from_records.py +107 -0
  106. maxframe/dataframe/datasource/from_tensor.py +419 -0
  107. maxframe/dataframe/datasource/index.py +117 -0
  108. maxframe/dataframe/datasource/read_csv.py +528 -0
  109. maxframe/dataframe/datasource/read_odps_query.py +299 -0
  110. maxframe/dataframe/datasource/read_odps_table.py +253 -0
  111. maxframe/dataframe/datasource/read_parquet.py +421 -0
  112. maxframe/dataframe/datasource/series.py +55 -0
  113. maxframe/dataframe/datasource/tests/__init__.py +13 -0
  114. maxframe/dataframe/datasource/tests/test_datasource.py +401 -0
  115. maxframe/dataframe/datastore/__init__.py +26 -0
  116. maxframe/dataframe/datastore/core.py +19 -0
  117. maxframe/dataframe/datastore/to_csv.py +227 -0
  118. maxframe/dataframe/datastore/to_odps.py +162 -0
  119. maxframe/dataframe/extensions/__init__.py +41 -0
  120. maxframe/dataframe/extensions/accessor.py +50 -0
  121. maxframe/dataframe/extensions/reshuffle.py +83 -0
  122. maxframe/dataframe/extensions/tests/__init__.py +13 -0
  123. maxframe/dataframe/extensions/tests/test_extensions.py +38 -0
  124. maxframe/dataframe/fetch/__init__.py +15 -0
  125. maxframe/dataframe/fetch/core.py +86 -0
  126. maxframe/dataframe/groupby/__init__.py +82 -0
  127. maxframe/dataframe/groupby/aggregation.py +350 -0
  128. maxframe/dataframe/groupby/apply.py +251 -0
  129. maxframe/dataframe/groupby/core.py +179 -0
  130. maxframe/dataframe/groupby/cum.py +124 -0
  131. maxframe/dataframe/groupby/fill.py +141 -0
  132. maxframe/dataframe/groupby/getitem.py +92 -0
  133. maxframe/dataframe/groupby/head.py +105 -0
  134. maxframe/dataframe/groupby/sample.py +214 -0
  135. maxframe/dataframe/groupby/tests/__init__.py +13 -0
  136. maxframe/dataframe/groupby/tests/test_groupby.py +374 -0
  137. maxframe/dataframe/groupby/transform.py +255 -0
  138. maxframe/dataframe/indexing/__init__.py +84 -0
  139. maxframe/dataframe/indexing/add_prefix_suffix.py +110 -0
  140. maxframe/dataframe/indexing/align.py +349 -0
  141. maxframe/dataframe/indexing/at.py +83 -0
  142. maxframe/dataframe/indexing/getitem.py +204 -0
  143. maxframe/dataframe/indexing/iat.py +37 -0
  144. maxframe/dataframe/indexing/iloc.py +566 -0
  145. maxframe/dataframe/indexing/insert.py +86 -0
  146. maxframe/dataframe/indexing/loc.py +411 -0
  147. maxframe/dataframe/indexing/reindex.py +526 -0
  148. maxframe/dataframe/indexing/rename.py +462 -0
  149. maxframe/dataframe/indexing/rename_axis.py +209 -0
  150. maxframe/dataframe/indexing/reset_index.py +402 -0
  151. maxframe/dataframe/indexing/sample.py +221 -0
  152. maxframe/dataframe/indexing/set_axis.py +194 -0
  153. maxframe/dataframe/indexing/set_index.py +61 -0
  154. maxframe/dataframe/indexing/setitem.py +130 -0
  155. maxframe/dataframe/indexing/tests/__init__.py +13 -0
  156. maxframe/dataframe/indexing/tests/test_indexing.py +488 -0
  157. maxframe/dataframe/indexing/where.py +308 -0
  158. maxframe/dataframe/initializer.py +288 -0
  159. maxframe/dataframe/merge/__init__.py +32 -0
  160. maxframe/dataframe/merge/append.py +121 -0
  161. maxframe/dataframe/merge/concat.py +325 -0
  162. maxframe/dataframe/merge/merge.py +593 -0
  163. maxframe/dataframe/merge/tests/__init__.py +13 -0
  164. maxframe/dataframe/merge/tests/test_merge.py +215 -0
  165. maxframe/dataframe/misc/__init__.py +134 -0
  166. maxframe/dataframe/misc/_duplicate.py +46 -0
  167. maxframe/dataframe/misc/accessor.py +276 -0
  168. maxframe/dataframe/misc/apply.py +692 -0
  169. maxframe/dataframe/misc/astype.py +236 -0
  170. maxframe/dataframe/misc/case_when.py +141 -0
  171. maxframe/dataframe/misc/check_monotonic.py +84 -0
  172. maxframe/dataframe/misc/cut.py +383 -0
  173. maxframe/dataframe/misc/datetimes.py +79 -0
  174. maxframe/dataframe/misc/describe.py +108 -0
  175. maxframe/dataframe/misc/diff.py +210 -0
  176. maxframe/dataframe/misc/drop.py +440 -0
  177. maxframe/dataframe/misc/drop_duplicates.py +248 -0
  178. maxframe/dataframe/misc/duplicated.py +292 -0
  179. maxframe/dataframe/misc/eval.py +728 -0
  180. maxframe/dataframe/misc/explode.py +171 -0
  181. maxframe/dataframe/misc/get_dummies.py +208 -0
  182. maxframe/dataframe/misc/isin.py +217 -0
  183. maxframe/dataframe/misc/map.py +236 -0
  184. maxframe/dataframe/misc/melt.py +162 -0
  185. maxframe/dataframe/misc/memory_usage.py +248 -0
  186. maxframe/dataframe/misc/pct_change.py +150 -0
  187. maxframe/dataframe/misc/pivot_table.py +262 -0
  188. maxframe/dataframe/misc/qcut.py +104 -0
  189. maxframe/dataframe/misc/select_dtypes.py +104 -0
  190. maxframe/dataframe/misc/shift.py +256 -0
  191. maxframe/dataframe/misc/stack.py +238 -0
  192. maxframe/dataframe/misc/string_.py +221 -0
  193. maxframe/dataframe/misc/tests/__init__.py +13 -0
  194. maxframe/dataframe/misc/tests/test_misc.py +468 -0
  195. maxframe/dataframe/misc/to_numeric.py +178 -0
  196. maxframe/dataframe/misc/transform.py +361 -0
  197. maxframe/dataframe/misc/transpose.py +136 -0
  198. maxframe/dataframe/misc/value_counts.py +182 -0
  199. maxframe/dataframe/missing/__init__.py +53 -0
  200. maxframe/dataframe/missing/checkna.py +223 -0
  201. maxframe/dataframe/missing/dropna.py +280 -0
  202. maxframe/dataframe/missing/fillna.py +275 -0
  203. maxframe/dataframe/missing/replace.py +439 -0
  204. maxframe/dataframe/missing/tests/__init__.py +13 -0
  205. maxframe/dataframe/missing/tests/test_missing.py +89 -0
  206. maxframe/dataframe/operators.py +273 -0
  207. maxframe/dataframe/plotting/__init__.py +40 -0
  208. maxframe/dataframe/plotting/core.py +78 -0
  209. maxframe/dataframe/plotting/tests/__init__.py +13 -0
  210. maxframe/dataframe/plotting/tests/test_plotting.py +136 -0
  211. maxframe/dataframe/reduction/__init__.py +107 -0
  212. maxframe/dataframe/reduction/aggregation.py +344 -0
  213. maxframe/dataframe/reduction/all.py +78 -0
  214. maxframe/dataframe/reduction/any.py +78 -0
  215. maxframe/dataframe/reduction/core.py +837 -0
  216. maxframe/dataframe/reduction/count.py +59 -0
  217. maxframe/dataframe/reduction/cummax.py +30 -0
  218. maxframe/dataframe/reduction/cummin.py +30 -0
  219. maxframe/dataframe/reduction/cumprod.py +30 -0
  220. maxframe/dataframe/reduction/cumsum.py +30 -0
  221. maxframe/dataframe/reduction/custom_reduction.py +42 -0
  222. maxframe/dataframe/reduction/kurtosis.py +104 -0
  223. maxframe/dataframe/reduction/max.py +65 -0
  224. maxframe/dataframe/reduction/mean.py +61 -0
  225. maxframe/dataframe/reduction/min.py +65 -0
  226. maxframe/dataframe/reduction/nunique.py +141 -0
  227. maxframe/dataframe/reduction/prod.py +76 -0
  228. maxframe/dataframe/reduction/reduction_size.py +36 -0
  229. maxframe/dataframe/reduction/sem.py +69 -0
  230. maxframe/dataframe/reduction/skew.py +89 -0
  231. maxframe/dataframe/reduction/std.py +53 -0
  232. maxframe/dataframe/reduction/str_concat.py +48 -0
  233. maxframe/dataframe/reduction/sum.py +77 -0
  234. maxframe/dataframe/reduction/tests/__init__.py +13 -0
  235. maxframe/dataframe/reduction/tests/test_reduction.py +486 -0
  236. maxframe/dataframe/reduction/unique.py +90 -0
  237. maxframe/dataframe/reduction/var.py +72 -0
  238. maxframe/dataframe/sort/__init__.py +34 -0
  239. maxframe/dataframe/sort/core.py +36 -0
  240. maxframe/dataframe/sort/sort_index.py +153 -0
  241. maxframe/dataframe/sort/sort_values.py +311 -0
  242. maxframe/dataframe/sort/tests/__init__.py +13 -0
  243. maxframe/dataframe/sort/tests/test_sort.py +81 -0
  244. maxframe/dataframe/statistics/__init__.py +33 -0
  245. maxframe/dataframe/statistics/corr.py +280 -0
  246. maxframe/dataframe/statistics/quantile.py +341 -0
  247. maxframe/dataframe/statistics/tests/__init__.py +13 -0
  248. maxframe/dataframe/statistics/tests/test_statistics.py +82 -0
  249. maxframe/dataframe/tests/__init__.py +13 -0
  250. maxframe/dataframe/tests/test_initializer.py +29 -0
  251. maxframe/dataframe/tseries/__init__.py +13 -0
  252. maxframe/dataframe/tseries/tests/__init__.py +13 -0
  253. maxframe/dataframe/tseries/tests/test_tseries.py +30 -0
  254. maxframe/dataframe/tseries/to_datetime.py +297 -0
  255. maxframe/dataframe/ufunc/__init__.py +27 -0
  256. maxframe/dataframe/ufunc/tensor.py +54 -0
  257. maxframe/dataframe/ufunc/ufunc.py +52 -0
  258. maxframe/dataframe/utils.py +1267 -0
  259. maxframe/dataframe/window/__init__.py +29 -0
  260. maxframe/dataframe/window/aggregation.py +96 -0
  261. maxframe/dataframe/window/core.py +69 -0
  262. maxframe/dataframe/window/ewm.py +249 -0
  263. maxframe/dataframe/window/expanding.py +147 -0
  264. maxframe/dataframe/window/rolling.py +376 -0
  265. maxframe/dataframe/window/tests/__init__.py +13 -0
  266. maxframe/dataframe/window/tests/test_ewm.py +70 -0
  267. maxframe/dataframe/window/tests/test_expanding.py +66 -0
  268. maxframe/dataframe/window/tests/test_rolling.py +57 -0
  269. maxframe/env.py +33 -0
  270. maxframe/errors.py +21 -0
  271. maxframe/extension.py +81 -0
  272. maxframe/learn/__init__.py +17 -0
  273. maxframe/learn/contrib/__init__.py +17 -0
  274. maxframe/learn/contrib/pytorch/__init__.py +16 -0
  275. maxframe/learn/contrib/pytorch/run_function.py +110 -0
  276. maxframe/learn/contrib/pytorch/run_script.py +102 -0
  277. maxframe/learn/contrib/pytorch/tests/__init__.py +13 -0
  278. maxframe/learn/contrib/pytorch/tests/test_pytorch.py +42 -0
  279. maxframe/learn/contrib/utils.py +52 -0
  280. maxframe/learn/contrib/xgboost/__init__.py +26 -0
  281. maxframe/learn/contrib/xgboost/classifier.py +86 -0
  282. maxframe/learn/contrib/xgboost/core.py +156 -0
  283. maxframe/learn/contrib/xgboost/dmatrix.py +150 -0
  284. maxframe/learn/contrib/xgboost/predict.py +138 -0
  285. maxframe/learn/contrib/xgboost/regressor.py +78 -0
  286. maxframe/learn/contrib/xgboost/tests/__init__.py +13 -0
  287. maxframe/learn/contrib/xgboost/tests/test_core.py +43 -0
  288. maxframe/learn/contrib/xgboost/train.py +121 -0
  289. maxframe/learn/utils/__init__.py +15 -0
  290. maxframe/learn/utils/core.py +29 -0
  291. maxframe/lib/__init__.py +15 -0
  292. maxframe/lib/aio/__init__.py +27 -0
  293. maxframe/lib/aio/_runners.py +162 -0
  294. maxframe/lib/aio/_threads.py +35 -0
  295. maxframe/lib/aio/base.py +82 -0
  296. maxframe/lib/aio/file.py +85 -0
  297. maxframe/lib/aio/isolation.py +100 -0
  298. maxframe/lib/aio/lru.py +242 -0
  299. maxframe/lib/aio/parallelism.py +37 -0
  300. maxframe/lib/aio/tests/__init__.py +13 -0
  301. maxframe/lib/aio/tests/test_aio_file.py +55 -0
  302. maxframe/lib/compression.py +55 -0
  303. maxframe/lib/cython/__init__.py +13 -0
  304. maxframe/lib/cython/libcpp.pxd +30 -0
  305. maxframe/lib/filesystem/__init__.py +21 -0
  306. maxframe/lib/filesystem/_glob.py +173 -0
  307. maxframe/lib/filesystem/_oss_lib/__init__.py +13 -0
  308. maxframe/lib/filesystem/_oss_lib/common.py +198 -0
  309. maxframe/lib/filesystem/_oss_lib/glob.py +147 -0
  310. maxframe/lib/filesystem/_oss_lib/handle.py +156 -0
  311. maxframe/lib/filesystem/arrow.py +236 -0
  312. maxframe/lib/filesystem/base.py +263 -0
  313. maxframe/lib/filesystem/core.py +95 -0
  314. maxframe/lib/filesystem/fsmap.py +164 -0
  315. maxframe/lib/filesystem/hdfs.py +31 -0
  316. maxframe/lib/filesystem/local.py +112 -0
  317. maxframe/lib/filesystem/oss.py +157 -0
  318. maxframe/lib/filesystem/tests/__init__.py +13 -0
  319. maxframe/lib/filesystem/tests/test_filesystem.py +223 -0
  320. maxframe/lib/filesystem/tests/test_oss.py +182 -0
  321. maxframe/lib/functools_compat.py +81 -0
  322. maxframe/lib/mmh3.cpython-311-darwin.so +0 -0
  323. maxframe/lib/mmh3_src/MurmurHash3.cpp +339 -0
  324. maxframe/lib/mmh3_src/MurmurHash3.h +43 -0
  325. maxframe/lib/mmh3_src/mmh3module.cpp +387 -0
  326. maxframe/lib/sparse/__init__.py +861 -0
  327. maxframe/lib/sparse/array.py +1604 -0
  328. maxframe/lib/sparse/core.py +92 -0
  329. maxframe/lib/sparse/matrix.py +241 -0
  330. maxframe/lib/sparse/tests/__init__.py +15 -0
  331. maxframe/lib/sparse/tests/test_sparse.py +476 -0
  332. maxframe/lib/sparse/vector.py +150 -0
  333. maxframe/lib/tblib/LICENSE +20 -0
  334. maxframe/lib/tblib/__init__.py +327 -0
  335. maxframe/lib/tblib/cpython.py +83 -0
  336. maxframe/lib/tblib/decorators.py +44 -0
  337. maxframe/lib/tblib/pickling_support.py +90 -0
  338. maxframe/lib/tests/__init__.py +13 -0
  339. maxframe/lib/tests/test_wrapped_pickle.py +51 -0
  340. maxframe/lib/version.py +620 -0
  341. maxframe/lib/wrapped_pickle.py +139 -0
  342. maxframe/mixin.py +100 -0
  343. maxframe/odpsio/__init__.py +21 -0
  344. maxframe/odpsio/arrow.py +91 -0
  345. maxframe/odpsio/schema.py +364 -0
  346. maxframe/odpsio/tableio.py +322 -0
  347. maxframe/odpsio/tests/__init__.py +13 -0
  348. maxframe/odpsio/tests/test_arrow.py +88 -0
  349. maxframe/odpsio/tests/test_schema.py +297 -0
  350. maxframe/odpsio/tests/test_tableio.py +136 -0
  351. maxframe/odpsio/tests/test_volumeio.py +90 -0
  352. maxframe/odpsio/volumeio.py +95 -0
  353. maxframe/opcodes.py +590 -0
  354. maxframe/protocol.py +415 -0
  355. maxframe/remote/__init__.py +18 -0
  356. maxframe/remote/core.py +210 -0
  357. maxframe/remote/run_script.py +121 -0
  358. maxframe/serialization/__init__.py +26 -0
  359. maxframe/serialization/arrow.py +95 -0
  360. maxframe/serialization/core.cpython-311-darwin.so +0 -0
  361. maxframe/serialization/core.pxd +44 -0
  362. maxframe/serialization/core.pyi +61 -0
  363. maxframe/serialization/core.pyx +1094 -0
  364. maxframe/serialization/exception.py +86 -0
  365. maxframe/serialization/maxframe_objects.py +39 -0
  366. maxframe/serialization/numpy.py +91 -0
  367. maxframe/serialization/pandas.py +202 -0
  368. maxframe/serialization/scipy.py +71 -0
  369. maxframe/serialization/serializables/__init__.py +55 -0
  370. maxframe/serialization/serializables/core.py +262 -0
  371. maxframe/serialization/serializables/field.py +624 -0
  372. maxframe/serialization/serializables/field_type.py +589 -0
  373. maxframe/serialization/serializables/tests/__init__.py +13 -0
  374. maxframe/serialization/serializables/tests/test_field_type.py +121 -0
  375. maxframe/serialization/serializables/tests/test_serializable.py +250 -0
  376. maxframe/serialization/tests/__init__.py +13 -0
  377. maxframe/serialization/tests/test_serial.py +412 -0
  378. maxframe/session.py +1310 -0
  379. maxframe/tensor/__init__.py +183 -0
  380. maxframe/tensor/arithmetic/__init__.py +315 -0
  381. maxframe/tensor/arithmetic/abs.py +68 -0
  382. maxframe/tensor/arithmetic/absolute.py +68 -0
  383. maxframe/tensor/arithmetic/add.py +82 -0
  384. maxframe/tensor/arithmetic/angle.py +72 -0
  385. maxframe/tensor/arithmetic/arccos.py +104 -0
  386. maxframe/tensor/arithmetic/arccosh.py +91 -0
  387. maxframe/tensor/arithmetic/arcsin.py +94 -0
  388. maxframe/tensor/arithmetic/arcsinh.py +86 -0
  389. maxframe/tensor/arithmetic/arctan.py +106 -0
  390. maxframe/tensor/arithmetic/arctan2.py +128 -0
  391. maxframe/tensor/arithmetic/arctanh.py +86 -0
  392. maxframe/tensor/arithmetic/around.py +114 -0
  393. maxframe/tensor/arithmetic/bitand.py +95 -0
  394. maxframe/tensor/arithmetic/bitor.py +102 -0
  395. maxframe/tensor/arithmetic/bitxor.py +95 -0
  396. maxframe/tensor/arithmetic/cbrt.py +66 -0
  397. maxframe/tensor/arithmetic/ceil.py +71 -0
  398. maxframe/tensor/arithmetic/clip.py +165 -0
  399. maxframe/tensor/arithmetic/conj.py +74 -0
  400. maxframe/tensor/arithmetic/copysign.py +78 -0
  401. maxframe/tensor/arithmetic/core.py +544 -0
  402. maxframe/tensor/arithmetic/cos.py +85 -0
  403. maxframe/tensor/arithmetic/cosh.py +72 -0
  404. maxframe/tensor/arithmetic/deg2rad.py +72 -0
  405. maxframe/tensor/arithmetic/degrees.py +77 -0
  406. maxframe/tensor/arithmetic/divide.py +114 -0
  407. maxframe/tensor/arithmetic/equal.py +76 -0
  408. maxframe/tensor/arithmetic/exp.py +106 -0
  409. maxframe/tensor/arithmetic/exp2.py +67 -0
  410. maxframe/tensor/arithmetic/expm1.py +79 -0
  411. maxframe/tensor/arithmetic/fabs.py +74 -0
  412. maxframe/tensor/arithmetic/fix.py +69 -0
  413. maxframe/tensor/arithmetic/float_power.py +103 -0
  414. maxframe/tensor/arithmetic/floor.py +77 -0
  415. maxframe/tensor/arithmetic/floordiv.py +94 -0
  416. maxframe/tensor/arithmetic/fmax.py +105 -0
  417. maxframe/tensor/arithmetic/fmin.py +106 -0
  418. maxframe/tensor/arithmetic/fmod.py +99 -0
  419. maxframe/tensor/arithmetic/frexp.py +92 -0
  420. maxframe/tensor/arithmetic/greater.py +77 -0
  421. maxframe/tensor/arithmetic/greater_equal.py +69 -0
  422. maxframe/tensor/arithmetic/hypot.py +77 -0
  423. maxframe/tensor/arithmetic/i0.py +89 -0
  424. maxframe/tensor/arithmetic/imag.py +67 -0
  425. maxframe/tensor/arithmetic/invert.py +110 -0
  426. maxframe/tensor/arithmetic/isclose.py +115 -0
  427. maxframe/tensor/arithmetic/iscomplex.py +64 -0
  428. maxframe/tensor/arithmetic/isfinite.py +106 -0
  429. maxframe/tensor/arithmetic/isinf.py +103 -0
  430. maxframe/tensor/arithmetic/isnan.py +82 -0
  431. maxframe/tensor/arithmetic/isreal.py +63 -0
  432. maxframe/tensor/arithmetic/ldexp.py +99 -0
  433. maxframe/tensor/arithmetic/less.py +69 -0
  434. maxframe/tensor/arithmetic/less_equal.py +69 -0
  435. maxframe/tensor/arithmetic/log.py +92 -0
  436. maxframe/tensor/arithmetic/log10.py +85 -0
  437. maxframe/tensor/arithmetic/log1p.py +95 -0
  438. maxframe/tensor/arithmetic/log2.py +85 -0
  439. maxframe/tensor/arithmetic/logaddexp.py +80 -0
  440. maxframe/tensor/arithmetic/logaddexp2.py +78 -0
  441. maxframe/tensor/arithmetic/logical_and.py +81 -0
  442. maxframe/tensor/arithmetic/logical_not.py +74 -0
  443. maxframe/tensor/arithmetic/logical_or.py +82 -0
  444. maxframe/tensor/arithmetic/logical_xor.py +88 -0
  445. maxframe/tensor/arithmetic/lshift.py +82 -0
  446. maxframe/tensor/arithmetic/maximum.py +108 -0
  447. maxframe/tensor/arithmetic/minimum.py +108 -0
  448. maxframe/tensor/arithmetic/mod.py +104 -0
  449. maxframe/tensor/arithmetic/modf.py +83 -0
  450. maxframe/tensor/arithmetic/multiply.py +81 -0
  451. maxframe/tensor/arithmetic/nan_to_num.py +99 -0
  452. maxframe/tensor/arithmetic/negative.py +65 -0
  453. maxframe/tensor/arithmetic/nextafter.py +68 -0
  454. maxframe/tensor/arithmetic/not_equal.py +72 -0
  455. maxframe/tensor/arithmetic/positive.py +47 -0
  456. maxframe/tensor/arithmetic/power.py +106 -0
  457. maxframe/tensor/arithmetic/rad2deg.py +71 -0
  458. maxframe/tensor/arithmetic/radians.py +77 -0
  459. maxframe/tensor/arithmetic/real.py +70 -0
  460. maxframe/tensor/arithmetic/reciprocal.py +76 -0
  461. maxframe/tensor/arithmetic/rint.py +68 -0
  462. maxframe/tensor/arithmetic/rshift.py +81 -0
  463. maxframe/tensor/arithmetic/setimag.py +29 -0
  464. maxframe/tensor/arithmetic/setreal.py +29 -0
  465. maxframe/tensor/arithmetic/sign.py +81 -0
  466. maxframe/tensor/arithmetic/signbit.py +65 -0
  467. maxframe/tensor/arithmetic/sin.py +98 -0
  468. maxframe/tensor/arithmetic/sinc.py +102 -0
  469. maxframe/tensor/arithmetic/sinh.py +93 -0
  470. maxframe/tensor/arithmetic/spacing.py +72 -0
  471. maxframe/tensor/arithmetic/sqrt.py +81 -0
  472. maxframe/tensor/arithmetic/square.py +69 -0
  473. maxframe/tensor/arithmetic/subtract.py +81 -0
  474. maxframe/tensor/arithmetic/tan.py +88 -0
  475. maxframe/tensor/arithmetic/tanh.py +92 -0
  476. maxframe/tensor/arithmetic/tests/__init__.py +15 -0
  477. maxframe/tensor/arithmetic/tests/test_arithmetic.py +414 -0
  478. maxframe/tensor/arithmetic/truediv.py +104 -0
  479. maxframe/tensor/arithmetic/trunc.py +72 -0
  480. maxframe/tensor/arithmetic/utils.py +65 -0
  481. maxframe/tensor/array_utils.py +186 -0
  482. maxframe/tensor/base/__init__.py +34 -0
  483. maxframe/tensor/base/astype.py +119 -0
  484. maxframe/tensor/base/atleast_1d.py +74 -0
  485. maxframe/tensor/base/broadcast_to.py +89 -0
  486. maxframe/tensor/base/ravel.py +92 -0
  487. maxframe/tensor/base/tests/__init__.py +13 -0
  488. maxframe/tensor/base/tests/test_base.py +114 -0
  489. maxframe/tensor/base/transpose.py +125 -0
  490. maxframe/tensor/base/unique.py +205 -0
  491. maxframe/tensor/base/where.py +127 -0
  492. maxframe/tensor/core.py +724 -0
  493. maxframe/tensor/datasource/__init__.py +32 -0
  494. maxframe/tensor/datasource/arange.py +156 -0
  495. maxframe/tensor/datasource/array.py +415 -0
  496. maxframe/tensor/datasource/core.py +109 -0
  497. maxframe/tensor/datasource/empty.py +169 -0
  498. maxframe/tensor/datasource/from_dataframe.py +70 -0
  499. maxframe/tensor/datasource/from_dense.py +54 -0
  500. maxframe/tensor/datasource/from_sparse.py +47 -0
  501. maxframe/tensor/datasource/full.py +186 -0
  502. maxframe/tensor/datasource/ones.py +173 -0
  503. maxframe/tensor/datasource/scalar.py +40 -0
  504. maxframe/tensor/datasource/tests/__init__.py +13 -0
  505. maxframe/tensor/datasource/tests/test_datasource.py +278 -0
  506. maxframe/tensor/datasource/zeros.py +188 -0
  507. maxframe/tensor/fetch/__init__.py +15 -0
  508. maxframe/tensor/fetch/core.py +54 -0
  509. maxframe/tensor/indexing/__init__.py +47 -0
  510. maxframe/tensor/indexing/choose.py +196 -0
  511. maxframe/tensor/indexing/compress.py +124 -0
  512. maxframe/tensor/indexing/core.py +190 -0
  513. maxframe/tensor/indexing/extract.py +71 -0
  514. maxframe/tensor/indexing/fill_diagonal.py +183 -0
  515. maxframe/tensor/indexing/flatnonzero.py +60 -0
  516. maxframe/tensor/indexing/getitem.py +175 -0
  517. maxframe/tensor/indexing/nonzero.py +120 -0
  518. maxframe/tensor/indexing/setitem.py +132 -0
  519. maxframe/tensor/indexing/slice.py +29 -0
  520. maxframe/tensor/indexing/take.py +130 -0
  521. maxframe/tensor/indexing/tests/__init__.py +15 -0
  522. maxframe/tensor/indexing/tests/test_indexing.py +234 -0
  523. maxframe/tensor/indexing/unravel_index.py +103 -0
  524. maxframe/tensor/merge/__init__.py +15 -0
  525. maxframe/tensor/merge/stack.py +132 -0
  526. maxframe/tensor/merge/tests/__init__.py +13 -0
  527. maxframe/tensor/merge/tests/test_merge.py +52 -0
  528. maxframe/tensor/operators.py +123 -0
  529. maxframe/tensor/random/__init__.py +168 -0
  530. maxframe/tensor/random/beta.py +87 -0
  531. maxframe/tensor/random/binomial.py +137 -0
  532. maxframe/tensor/random/bytes.py +39 -0
  533. maxframe/tensor/random/chisquare.py +110 -0
  534. maxframe/tensor/random/choice.py +186 -0
  535. maxframe/tensor/random/core.py +234 -0
  536. maxframe/tensor/random/dirichlet.py +123 -0
  537. maxframe/tensor/random/exponential.py +94 -0
  538. maxframe/tensor/random/f.py +135 -0
  539. maxframe/tensor/random/gamma.py +128 -0
  540. maxframe/tensor/random/geometric.py +93 -0
  541. maxframe/tensor/random/gumbel.py +167 -0
  542. maxframe/tensor/random/hypergeometric.py +148 -0
  543. maxframe/tensor/random/laplace.py +133 -0
  544. maxframe/tensor/random/logistic.py +129 -0
  545. maxframe/tensor/random/lognormal.py +159 -0
  546. maxframe/tensor/random/logseries.py +122 -0
  547. maxframe/tensor/random/multinomial.py +133 -0
  548. maxframe/tensor/random/multivariate_normal.py +192 -0
  549. maxframe/tensor/random/negative_binomial.py +125 -0
  550. maxframe/tensor/random/noncentral_chisquare.py +132 -0
  551. maxframe/tensor/random/noncentral_f.py +126 -0
  552. maxframe/tensor/random/normal.py +143 -0
  553. maxframe/tensor/random/pareto.py +140 -0
  554. maxframe/tensor/random/permutation.py +104 -0
  555. maxframe/tensor/random/poisson.py +111 -0
  556. maxframe/tensor/random/power.py +142 -0
  557. maxframe/tensor/random/rand.py +82 -0
  558. maxframe/tensor/random/randint.py +121 -0
  559. maxframe/tensor/random/randn.py +96 -0
  560. maxframe/tensor/random/random_integers.py +123 -0
  561. maxframe/tensor/random/random_sample.py +86 -0
  562. maxframe/tensor/random/rayleigh.py +110 -0
  563. maxframe/tensor/random/shuffle.py +61 -0
  564. maxframe/tensor/random/standard_cauchy.py +105 -0
  565. maxframe/tensor/random/standard_exponential.py +72 -0
  566. maxframe/tensor/random/standard_gamma.py +120 -0
  567. maxframe/tensor/random/standard_normal.py +74 -0
  568. maxframe/tensor/random/standard_t.py +135 -0
  569. maxframe/tensor/random/tests/__init__.py +15 -0
  570. maxframe/tensor/random/tests/test_random.py +167 -0
  571. maxframe/tensor/random/triangular.py +119 -0
  572. maxframe/tensor/random/uniform.py +131 -0
  573. maxframe/tensor/random/vonmises.py +131 -0
  574. maxframe/tensor/random/wald.py +114 -0
  575. maxframe/tensor/random/weibull.py +140 -0
  576. maxframe/tensor/random/zipf.py +122 -0
  577. maxframe/tensor/rechunk/__init__.py +26 -0
  578. maxframe/tensor/rechunk/rechunk.py +43 -0
  579. maxframe/tensor/reduction/__init__.py +66 -0
  580. maxframe/tensor/reduction/all.py +103 -0
  581. maxframe/tensor/reduction/allclose.py +88 -0
  582. maxframe/tensor/reduction/any.py +105 -0
  583. maxframe/tensor/reduction/argmax.py +103 -0
  584. maxframe/tensor/reduction/argmin.py +103 -0
  585. maxframe/tensor/reduction/array_equal.py +64 -0
  586. maxframe/tensor/reduction/core.py +168 -0
  587. maxframe/tensor/reduction/count_nonzero.py +81 -0
  588. maxframe/tensor/reduction/cumprod.py +97 -0
  589. maxframe/tensor/reduction/cumsum.py +101 -0
  590. maxframe/tensor/reduction/max.py +120 -0
  591. maxframe/tensor/reduction/mean.py +123 -0
  592. maxframe/tensor/reduction/min.py +120 -0
  593. maxframe/tensor/reduction/nanargmax.py +82 -0
  594. maxframe/tensor/reduction/nanargmin.py +76 -0
  595. maxframe/tensor/reduction/nancumprod.py +91 -0
  596. maxframe/tensor/reduction/nancumsum.py +94 -0
  597. maxframe/tensor/reduction/nanmax.py +111 -0
  598. maxframe/tensor/reduction/nanmean.py +106 -0
  599. maxframe/tensor/reduction/nanmin.py +111 -0
  600. maxframe/tensor/reduction/nanprod.py +94 -0
  601. maxframe/tensor/reduction/nanstd.py +126 -0
  602. maxframe/tensor/reduction/nansum.py +115 -0
  603. maxframe/tensor/reduction/nanvar.py +149 -0
  604. maxframe/tensor/reduction/prod.py +130 -0
  605. maxframe/tensor/reduction/std.py +134 -0
  606. maxframe/tensor/reduction/sum.py +125 -0
  607. maxframe/tensor/reduction/tests/__init__.py +13 -0
  608. maxframe/tensor/reduction/tests/test_reduction.py +181 -0
  609. maxframe/tensor/reduction/var.py +176 -0
  610. maxframe/tensor/reshape/__init__.py +17 -0
  611. maxframe/tensor/reshape/reshape.py +188 -0
  612. maxframe/tensor/reshape/tests/__init__.py +15 -0
  613. maxframe/tensor/reshape/tests/test_reshape.py +37 -0
  614. maxframe/tensor/statistics/__init__.py +13 -0
  615. maxframe/tensor/statistics/percentile.py +175 -0
  616. maxframe/tensor/statistics/quantile.py +288 -0
  617. maxframe/tensor/ufunc/__init__.py +26 -0
  618. maxframe/tensor/ufunc/ufunc.py +200 -0
  619. maxframe/tensor/utils.py +718 -0
  620. maxframe/tests/__init__.py +13 -0
  621. maxframe/tests/test_codegen.py +69 -0
  622. maxframe/tests/test_protocol.py +144 -0
  623. maxframe/tests/test_utils.py +376 -0
  624. maxframe/tests/utils.py +164 -0
  625. maxframe/typing_.py +37 -0
  626. maxframe/udf.py +134 -0
  627. maxframe/utils.py +1114 -0
  628. maxframe-0.1.0b5.dist-info/METADATA +104 -0
  629. maxframe-0.1.0b5.dist-info/RECORD +647 -0
  630. maxframe-0.1.0b5.dist-info/WHEEL +5 -0
  631. maxframe-0.1.0b5.dist-info/top_level.txt +3 -0
  632. maxframe_client/__init__.py +17 -0
  633. maxframe_client/clients/__init__.py +13 -0
  634. maxframe_client/clients/framedriver.py +118 -0
  635. maxframe_client/clients/spe.py +104 -0
  636. maxframe_client/conftest.py +15 -0
  637. maxframe_client/fetcher.py +264 -0
  638. maxframe_client/session/__init__.py +22 -0
  639. maxframe_client/session/consts.py +36 -0
  640. maxframe_client/session/graph.py +119 -0
  641. maxframe_client/session/odps.py +482 -0
  642. maxframe_client/session/task.py +280 -0
  643. maxframe_client/session/tests/__init__.py +13 -0
  644. maxframe_client/session/tests/test_task.py +85 -0
  645. maxframe_client/tests/__init__.py +13 -0
  646. maxframe_client/tests/test_fetcher.py +89 -0
  647. maxframe_client/tests/test_session.py +255 -0
@@ -0,0 +1,88 @@
1
+ # Copyright 1999-2024 Alibaba Group Holding Ltd.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ import numpy as np
16
+ import pandas as pd
17
+
18
+ from ..arrow import arrow_to_pandas, pandas_to_arrow
19
+
20
+
21
+ def test_dataframe_convert():
22
+ pd_data = pd.DataFrame(np.random.rand(100, 5), columns=list("ABCDE"))
23
+ arrow_data, meta = pandas_to_arrow(pd_data)
24
+ assert arrow_data.column_names == ["_idx_0", "a", "b", "c", "d", "e"]
25
+
26
+ pd.testing.assert_index_equal(pd.Index(arrow_data.columns[0]), pd_data.index)
27
+ pd.testing.assert_frame_equal(
28
+ arrow_data.select(list("abcde")).to_pandas().set_axis(list("ABCDE"), axis=1),
29
+ pd_data.reset_index(drop=True),
30
+ )
31
+
32
+ pd_res = arrow_to_pandas(arrow_data, meta)
33
+ pd.testing.assert_frame_equal(pd_data, pd_res)
34
+
35
+ # test DataFrame with MultiIndex as columns
36
+ pd_data.columns = pd.MultiIndex.from_tuples(
37
+ [("A", "A"), ("A", "B"), ("B", "A"), ("B", "B"), ("B", "C")]
38
+ )
39
+ arrow_data, meta = pandas_to_arrow(pd_data)
40
+ pd_res = arrow_to_pandas(arrow_data, meta)
41
+ pd.testing.assert_frame_equal(pd_data, pd_res)
42
+
43
+
44
+ def test_series_convert():
45
+ pd_data = pd.Series(np.random.rand(100), name="series_name")
46
+ arrow_data, meta = pandas_to_arrow(pd_data)
47
+ assert arrow_data.column_names == ["_idx_0", "series_name"]
48
+
49
+ pd.testing.assert_index_equal(pd.Index(arrow_data.columns[0]), pd_data.index)
50
+ pd.testing.assert_series_equal(
51
+ arrow_data.select(["series_name"]).to_pandas().iloc[:, 0],
52
+ pd_data.reset_index(drop=True),
53
+ )
54
+
55
+ pd_res = arrow_to_pandas(arrow_data, meta)
56
+ pd.testing.assert_series_equal(pd_data, pd_res)
57
+
58
+
59
+ def test_index_convert():
60
+ pd_data = pd.Index(np.random.rand(100), name="idx_name")
61
+ arrow_data, meta = pandas_to_arrow(pd_data)
62
+ assert arrow_data.column_names == ["_idx_0"]
63
+
64
+ pd.testing.assert_index_equal(
65
+ pd.Index(arrow_data.columns[0], name="idx_name"), pd_data
66
+ )
67
+
68
+ pd_res = arrow_to_pandas(arrow_data, meta)
69
+ pd.testing.assert_index_equal(pd_data, pd_res)
70
+
71
+ # test MultiIndex
72
+ pd_data = pd.MultiIndex.from_arrays(
73
+ [np.random.choice(list("ABC"), 100), np.random.randint(0, 10, 100)]
74
+ )
75
+ arrow_data, meta = pandas_to_arrow(pd_data)
76
+ pd_res = arrow_to_pandas(arrow_data, meta)
77
+ pd.testing.assert_index_equal(pd_data, pd_res)
78
+
79
+
80
+ def test_scalar_convert():
81
+ scalar_data = 12.3456
82
+ arrow_data, meta = pandas_to_arrow(scalar_data)
83
+ assert arrow_data.column_names == ["_idx_0"]
84
+
85
+ assert arrow_data[0][0].as_py() == scalar_data
86
+
87
+ scalar_res = arrow_to_pandas(arrow_data, meta)
88
+ assert scalar_data == scalar_res
@@ -0,0 +1,297 @@
1
+ # Copyright 1999-2024 Alibaba Group Holding Ltd.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ import numpy as np
16
+ import pandas as pd
17
+ import pyarrow as pa
18
+ import pytest
19
+ from odps import types as odps_types
20
+
21
+ from ... import dataframe as md
22
+ from ... import tensor as mt
23
+ from ...core import OutputType
24
+ from ..schema import (
25
+ arrow_schema_to_odps_schema,
26
+ build_dataframe_table_meta,
27
+ build_table_column_name,
28
+ odps_schema_to_arrow_schema,
29
+ pandas_to_odps_schema,
30
+ )
31
+
32
+
33
+ def _wrap_maxframe_obj(obj, wrap="no"):
34
+ if wrap == "no":
35
+ return obj
36
+ if isinstance(obj, pd.DataFrame):
37
+ obj = md.DataFrame(obj)
38
+ elif isinstance(obj, pd.Series):
39
+ obj = md.Series(obj)
40
+ elif isinstance(obj, pd.Index):
41
+ obj = md.Index(obj)
42
+ else:
43
+ obj = mt.scalar(obj)
44
+ if wrap == "data":
45
+ return obj.data
46
+ return obj
47
+
48
+
49
+ @pytest.mark.parametrize("wrap_obj", ["no", "yes", "data"])
50
+ def test_pandas_to_odps_schema_dataframe(wrap_obj):
51
+ data = pd.DataFrame(np.random.rand(100, 5), columns=list("ABCDE"))
52
+
53
+ test_df = _wrap_maxframe_obj(data, wrap=wrap_obj)
54
+ schema, meta = pandas_to_odps_schema(test_df)
55
+ assert [c.name for c in schema.columns] == ["_idx_0"] + list(
56
+ test_df.dtypes.index.str.lower()
57
+ )
58
+ assert [c.type.name for c in schema.columns] == ["bigint"] + ["double"] * len(
59
+ test_df.columns
60
+ )
61
+ assert meta.type == OutputType.dataframe
62
+ assert meta.table_column_names == list(test_df.dtypes.index.str.lower())
63
+ assert meta.table_index_column_names == ["_idx_0"]
64
+ assert meta.pd_column_level_names == [None]
65
+ assert meta.pd_index_level_names == [None]
66
+
67
+ test_df = _wrap_maxframe_obj(data, wrap=wrap_obj)
68
+ schema, meta = pandas_to_odps_schema(test_df, ignore_index=True)
69
+ assert [c.name for c in schema.columns] == list(test_df.dtypes.index.str.lower())
70
+ assert [c.type.name for c in schema.columns] == ["double"] * len(test_df.columns)
71
+ assert meta.type == OutputType.dataframe
72
+ assert meta.table_column_names == list(test_df.dtypes.index.str.lower())
73
+ assert meta.table_index_column_names == []
74
+ assert meta.pd_column_level_names == [None]
75
+ assert meta.pd_index_level_names == []
76
+
77
+ data.columns = pd.MultiIndex.from_tuples(
78
+ [("A", "A"), ("A", "B"), ("A", "C"), ("B", "A"), ("B", "B")], names=["c1", "c2"]
79
+ )
80
+ data.index = pd.MultiIndex.from_arrays(
81
+ [np.random.choice(list("ABC"), 100), np.random.randint(0, 10, 100)]
82
+ )
83
+ test_df = _wrap_maxframe_obj(data, wrap=wrap_obj)
84
+ schema, meta = pandas_to_odps_schema(test_df, unknown_as_string=True)
85
+ assert [c.name for c in schema.columns] == ["_idx_0", "_idx_1"] + list(
86
+ test_df.dtypes.index.map("_".join).str.lower()
87
+ )
88
+ assert [c.type.name for c in schema.columns] == ["string", "bigint"] + [
89
+ "double"
90
+ ] * len(test_df.columns)
91
+ assert meta.type == OutputType.dataframe
92
+ assert meta.table_column_names == list(
93
+ test_df.dtypes.index.map("_".join).str.lower()
94
+ )
95
+ assert meta.table_index_column_names == ["_idx_0", "_idx_1"]
96
+ assert meta.pd_column_level_names == ["c1", "c2"]
97
+ assert meta.pd_index_level_names == [None, None]
98
+
99
+
100
+ @pytest.mark.parametrize("wrap_obj", ["no", "yes", "data"])
101
+ def test_pandas_to_odps_schema_series(wrap_obj):
102
+ data = pd.Series(np.random.rand(100))
103
+
104
+ test_s = _wrap_maxframe_obj(data, wrap=wrap_obj)
105
+ schema, meta = pandas_to_odps_schema(test_s)
106
+ assert [c.name for c in schema.columns] == ["_idx_0", "_data"]
107
+ assert [c.type.name for c in schema.columns] == ["bigint", "double"]
108
+ assert meta.type == OutputType.series
109
+ assert meta.table_column_names == ["_data"]
110
+ assert meta.table_index_column_names == ["_idx_0"]
111
+ assert meta.pd_column_names == [None]
112
+ assert meta.pd_column_level_names == [None]
113
+ assert meta.pd_index_level_names == [None]
114
+
115
+ schema, meta = pandas_to_odps_schema(test_s, ignore_index=True)
116
+ assert [c.name for c in schema.columns] == ["_data"]
117
+ assert [c.type.name for c in schema.columns] == ["double"]
118
+ assert meta.type == OutputType.series
119
+ assert meta.table_column_names == ["_data"]
120
+ assert meta.table_index_column_names == []
121
+ assert meta.pd_column_level_names == [None]
122
+ assert meta.pd_index_level_names == []
123
+
124
+ data.index = pd.MultiIndex.from_arrays(
125
+ [np.random.choice(list("ABC"), 100), np.random.randint(0, 10, 100)],
126
+ names=["c1", "c2"],
127
+ )
128
+ data.name = "col_name"
129
+ test_s = _wrap_maxframe_obj(data, wrap=wrap_obj)
130
+ schema, meta = pandas_to_odps_schema(test_s, unknown_as_string=True)
131
+ assert [c.name for c in schema.columns] == ["_idx_0", "_idx_1", "col_name"]
132
+ assert [c.type.name for c in schema.columns] == ["string", "bigint", "double"]
133
+ assert meta.type == OutputType.series
134
+ assert meta.table_column_names == ["col_name"]
135
+ assert meta.table_index_column_names == ["_idx_0", "_idx_1"]
136
+ assert meta.pd_column_names == ["col_name"]
137
+ assert meta.pd_column_level_names == [None]
138
+ assert meta.pd_index_level_names == ["c1", "c2"]
139
+
140
+
141
+ @pytest.mark.parametrize("wrap_obj", ["no", "yes", "data"])
142
+ def test_pandas_to_odps_schema_index(wrap_obj):
143
+ data = pd.Index(np.random.randint(0, 100, 100))
144
+
145
+ test_idx = _wrap_maxframe_obj(data, wrap=wrap_obj)
146
+ schema, meta = pandas_to_odps_schema(test_idx, unknown_as_string=True)
147
+ assert [c.name for c in schema.columns] == ["_idx_0"]
148
+ assert [c.type.name for c in schema.columns] == ["bigint"]
149
+ assert meta.type == OutputType.index
150
+ assert meta.table_column_names == []
151
+ assert meta.table_index_column_names == ["_idx_0"]
152
+ assert meta.pd_column_level_names == []
153
+ assert meta.pd_index_level_names == [None]
154
+
155
+ with pytest.raises(AssertionError):
156
+ pandas_to_odps_schema(test_idx, unknown_as_string=True, ignore_index=True)
157
+
158
+ data = pd.MultiIndex.from_arrays(
159
+ [np.random.choice(list("ABC"), 100), np.random.randint(0, 10, 100)],
160
+ names=["c1", "c2"],
161
+ )
162
+ test_idx = _wrap_maxframe_obj(data, wrap=wrap_obj)
163
+ schema, meta = pandas_to_odps_schema(test_idx, unknown_as_string=True)
164
+ assert [c.name for c in schema.columns] == ["_idx_0", "_idx_1"]
165
+ assert [c.type.name for c in schema.columns] == ["string", "bigint"]
166
+ assert meta.type == OutputType.index
167
+ assert meta.table_column_names == []
168
+ assert meta.table_index_column_names == ["_idx_0", "_idx_1"]
169
+ assert meta.pd_column_level_names == []
170
+ assert meta.pd_index_level_names == ["c1", "c2"]
171
+
172
+
173
+ @pytest.mark.parametrize("wrap_obj", ["no", "yes", "data"])
174
+ def test_pandas_to_odps_schema_scalar(wrap_obj):
175
+ data = 1234.56
176
+
177
+ test_scalar = _wrap_maxframe_obj(data, wrap=wrap_obj)
178
+ if wrap_obj != "no":
179
+ test_scalar.op.data = None
180
+ schema, meta = pandas_to_odps_schema(test_scalar, unknown_as_string=True)
181
+ assert schema.columns[0].name == "_idx_0"
182
+ assert schema.columns[0].type.name == "double"
183
+ assert meta.type == OutputType.scalar
184
+ assert meta.table_column_names == []
185
+ assert meta.table_index_column_names == ["_idx_0"]
186
+ assert meta.pd_column_level_names == []
187
+ assert meta.pd_index_level_names == [None]
188
+
189
+ with pytest.raises(AssertionError):
190
+ pandas_to_odps_schema(test_scalar, unknown_as_string=True, ignore_index=True)
191
+
192
+
193
+ def test_odps_arrow_schema_conversion():
194
+ odps_schema = odps_types.OdpsSchema(
195
+ [
196
+ odps_types.Column("col1", "string"),
197
+ odps_types.Column("col2", "binary"),
198
+ odps_types.Column("col3", "tinyint"),
199
+ odps_types.Column("col4", "smallint"),
200
+ odps_types.Column("col5", "int"),
201
+ odps_types.Column("col6", "bigint"),
202
+ odps_types.Column("col7", "boolean"),
203
+ odps_types.Column("col8", "float"),
204
+ odps_types.Column("col9", "double"),
205
+ odps_types.Column("col10", "date"),
206
+ odps_types.Column("col11", "datetime"),
207
+ odps_types.Column("col12", "timestamp"),
208
+ odps_types.Column("col13", "decimal(10, 2)"),
209
+ odps_types.Column("col14", "array<string>"),
210
+ odps_types.Column("col15", "map<string, bigint>"),
211
+ odps_types.Column("col16", "struct<a1: string, a2: map<string, bigint>>"),
212
+ odps_types.Column("col17", "CHAR(15)"),
213
+ odps_types.Column("col18", "VARCHAR(15)"),
214
+ ]
215
+ )
216
+ arrow_schema = odps_schema_to_arrow_schema(odps_schema)
217
+ assert arrow_schema.names == [f"col{i}" for i in range(1, 19)]
218
+ assert arrow_schema.types == [
219
+ pa.string(),
220
+ pa.binary(),
221
+ pa.int8(),
222
+ pa.int16(),
223
+ pa.int32(),
224
+ pa.int64(),
225
+ pa.bool_(),
226
+ pa.float32(),
227
+ pa.float64(),
228
+ pa.date32(),
229
+ pa.timestamp("ms"),
230
+ pa.timestamp("ns"),
231
+ pa.decimal128(10, 2),
232
+ pa.list_(pa.string()),
233
+ pa.map_(pa.string(), pa.int64()),
234
+ pa.struct([("a1", pa.string()), ("a2", pa.map_(pa.string(), pa.int64()))]),
235
+ pa.string(),
236
+ pa.string(),
237
+ ]
238
+
239
+ expected_odps_schema = odps_types.OdpsSchema(
240
+ [
241
+ odps_types.Column("col1", "string"),
242
+ odps_types.Column("col2", "binary"),
243
+ odps_types.Column("col3", "tinyint"),
244
+ odps_types.Column("col4", "smallint"),
245
+ odps_types.Column("col5", "int"),
246
+ odps_types.Column("col6", "bigint"),
247
+ odps_types.Column("col7", "boolean"),
248
+ odps_types.Column("col8", "float"),
249
+ odps_types.Column("col9", "double"),
250
+ odps_types.Column("col10", "date"),
251
+ odps_types.Column("col11", "datetime"),
252
+ odps_types.Column("col12", "timestamp"),
253
+ odps_types.Column("col13", "decimal(10, 2)"),
254
+ odps_types.Column("col14", "array<string>"),
255
+ odps_types.Column("col15", "map<string, bigint>"),
256
+ odps_types.Column("col16", "struct<a1: string, a2: map<string, bigint>>"),
257
+ odps_types.Column("col17", "string"),
258
+ odps_types.Column("col18", "string"),
259
+ ]
260
+ )
261
+
262
+ odps_schema2 = arrow_schema_to_odps_schema(arrow_schema)
263
+ assert [c.name for c in expected_odps_schema.columns] == [
264
+ c.name for c in odps_schema2.columns
265
+ ]
266
+ assert [c.type for c in expected_odps_schema.columns] == [
267
+ c.type for c in odps_schema2.columns
268
+ ]
269
+
270
+ with pytest.raises(TypeError):
271
+ arrow_schema_to_odps_schema(pa.schema([("col1", pa.float16())]))
272
+ with pytest.raises(TypeError):
273
+ odps_schema_to_arrow_schema(
274
+ odps_types.OdpsSchema([odps_types.Column("col1", "json")])
275
+ )
276
+
277
+
278
+ def test_build_column_name():
279
+ records = dict()
280
+ assert build_table_column_name(0, "a" * 127, records) == "a" * 127
281
+ assert build_table_column_name(1, "_abc123", records) == "_abc123"
282
+ assert build_table_column_name(2, "_abc'123", records) == "_column_2"
283
+ assert build_table_column_name(3, "a" * 256, records) == "_column_3"
284
+ assert build_table_column_name(4, ("A", 1), records) == "a_1"
285
+
286
+
287
+ @pytest.mark.parametrize("wrap_obj", ["no", "yes", "data"])
288
+ def test_build_table_meta(wrap_obj):
289
+ data = pd.DataFrame(
290
+ np.random.rand(100, 7),
291
+ columns=["A", "A", "A_0", "A_1", "a_1", "B", "C"],
292
+ )
293
+
294
+ test_df = _wrap_maxframe_obj(data, wrap=wrap_obj)
295
+ table_meta = build_dataframe_table_meta(test_df)
296
+ expected_cols = ["a_2", "a_3", "a_0", "a_1_0", "a_1_1", "b", "c"]
297
+ assert table_meta.table_column_names == expected_cols
@@ -0,0 +1,136 @@
1
+ # Copyright 1999-2024 Alibaba Group Holding Ltd.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ import numpy as np
16
+ import pandas as pd
17
+ import pyarrow as pa
18
+ from odps import ODPS
19
+
20
+ from ...tests.utils import flaky, tn
21
+ from ...utils import config_odps_default_options
22
+ from ..tableio import HaloTableIO
23
+
24
+
25
+ @flaky(max_runs=3)
26
+ def test_empty_table_io():
27
+ config_odps_default_options()
28
+
29
+ o = ODPS.from_environments()
30
+ halo_table_io = HaloTableIO(o)
31
+
32
+ # test read from empty table
33
+ empty_table_name = tn("test_empty_table_halo_read")
34
+ o.delete_table(empty_table_name, if_exists=True)
35
+ tb = o.create_table(empty_table_name, "col1 string", lifecycle=1)
36
+
37
+ try:
38
+ with halo_table_io.open_reader(empty_table_name) as reader:
39
+ assert len(reader.read_all()) == 0
40
+ finally:
41
+ tb.drop()
42
+
43
+
44
+ @flaky(max_runs=3)
45
+ def test_table_io_without_parts():
46
+ config_odps_default_options()
47
+
48
+ o = ODPS.from_environments()
49
+ halo_table_io = HaloTableIO(o)
50
+
51
+ # test read and write tables without partition
52
+ no_part_table_name = tn("test_no_part_halo_write")
53
+ o.delete_table(no_part_table_name, if_exists=True)
54
+ tb = o.create_table(
55
+ no_part_table_name, ",".join(f"{c} double" for c in "abcde"), lifecycle=1
56
+ )
57
+
58
+ try:
59
+ pd_data = pd.DataFrame(np.random.rand(100, 5), columns=list("abcde"))
60
+ with halo_table_io.open_writer(no_part_table_name) as writer:
61
+ writer.write(pa.Table.from_pandas(pd_data, preserve_index=False))
62
+ with halo_table_io.open_reader(no_part_table_name) as reader:
63
+ pd.testing.assert_frame_equal(reader.read_all().to_pandas(), pd_data)
64
+ finally:
65
+ tb.drop()
66
+
67
+
68
+ @flaky(max_runs=3)
69
+ def test_table_io_with_range_reader():
70
+ config_odps_default_options()
71
+
72
+ o = ODPS.from_environments()
73
+ halo_table_io = HaloTableIO(o)
74
+
75
+ # test read and write tables without partition
76
+ no_part_table_name = tn("test_no_part_halo_write")
77
+ o.delete_table(no_part_table_name, if_exists=True)
78
+ tb = o.create_table(
79
+ no_part_table_name, ",".join(f"{c} double" for c in "abcde"), lifecycle=1
80
+ )
81
+
82
+ try:
83
+ pd_data = pd.DataFrame(np.random.rand(100, 5), columns=list("abcde"))
84
+ with halo_table_io.open_writer(no_part_table_name) as writer:
85
+ writer.write(pa.Table.from_pandas(pd_data, preserve_index=False))
86
+
87
+ with halo_table_io.open_reader(
88
+ no_part_table_name, start=None, stop=100, row_batch_size=10
89
+ ) as reader:
90
+ pd.testing.assert_frame_equal(reader.read_all().to_pandas(), pd_data)
91
+
92
+ with halo_table_io.open_reader(
93
+ no_part_table_name,
94
+ start=-2,
95
+ stop=-52,
96
+ reverse_range=True,
97
+ row_batch_size=10,
98
+ ) as reader:
99
+ pd.testing.assert_frame_equal(
100
+ reader.read_all().to_pandas(),
101
+ pd_data.iloc[-51:-1].reset_index(drop=True),
102
+ )
103
+ finally:
104
+ tb.drop()
105
+
106
+
107
+ @flaky(max_runs=3)
108
+ def test_table_io_with_parts():
109
+ config_odps_default_options()
110
+
111
+ o = ODPS.from_environments()
112
+ halo_table_io = HaloTableIO(o)
113
+
114
+ # test read and write tables with partition
115
+ parted_table_name = tn("test_parted_halo_write")
116
+ o.delete_table(parted_table_name, if_exists=True)
117
+ tb = o.create_table(
118
+ parted_table_name,
119
+ (",".join(f"{c} double" for c in "abcde"), "pt string"),
120
+ lifecycle=1,
121
+ )
122
+
123
+ try:
124
+ pd_data = pd.DataFrame(np.random.rand(100, 5), columns=list("abcde"))
125
+ with halo_table_io.open_writer(parted_table_name, "pt=test") as writer:
126
+ writer.write(pa.Table.from_pandas(pd_data, preserve_index=False))
127
+ with halo_table_io.open_reader(parted_table_name, "pt=test") as reader:
128
+ pd.testing.assert_frame_equal(reader.read_all().to_pandas(), pd_data)
129
+ with halo_table_io.open_reader(
130
+ parted_table_name, "pt=test", partition_columns=True
131
+ ) as reader:
132
+ expected_data = pd_data.copy()
133
+ expected_data["pt"] = "test"
134
+ pd.testing.assert_frame_equal(reader.read_all().to_pandas(), expected_data)
135
+ finally:
136
+ tb.drop()
@@ -0,0 +1,90 @@
1
+ # Copyright 1999-2024 Alibaba Group Holding Ltd.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ import pytest
16
+ from odps import ODPS
17
+
18
+ from ...tests.utils import tn
19
+ from ..volumeio import ODPSVolumeReader, ODPSVolumeWriter
20
+
21
+
22
+ @pytest.fixture
23
+ def create_volume(request, oss_config):
24
+ test_vol_name = tn("test_vol_name_" + request.param)
25
+ odps_entry = ODPS.from_environments()
26
+
27
+ try:
28
+ odps_entry.delete_volume(test_vol_name)
29
+ except:
30
+ pass
31
+
32
+ oss_test_dir_name = None
33
+ if request.param == "parted":
34
+ odps_entry.create_parted_volume(test_vol_name)
35
+ else:
36
+ oss_test_dir_name = tn("test_oss_directory")
37
+ if oss_config is None:
38
+ pytest.skip("Need oss and its config to run this test")
39
+ (
40
+ oss_access_id,
41
+ oss_secret_access_key,
42
+ oss_bucket_name,
43
+ oss_endpoint,
44
+ ) = oss_config.oss_config
45
+ test_location = "oss://%s:%s@%s/%s/%s" % (
46
+ oss_access_id,
47
+ oss_secret_access_key,
48
+ oss_endpoint,
49
+ oss_bucket_name,
50
+ oss_test_dir_name,
51
+ )
52
+ oss_config.oss_bucket.put_object(oss_test_dir_name + "/", b"")
53
+ odps_entry.create_external_volume(test_vol_name, location=test_location)
54
+ try:
55
+ yield test_vol_name
56
+ finally:
57
+ try:
58
+ odps_entry.delete_volume(test_vol_name)
59
+ except BaseException:
60
+ pass
61
+
62
+ if oss_test_dir_name is not None:
63
+ import oss2
64
+
65
+ keys = [
66
+ obj.key
67
+ for obj in oss2.ObjectIterator(oss_config.oss_bucket, oss_test_dir_name)
68
+ ]
69
+ oss_config.oss_bucket.batch_delete_objects(keys)
70
+
71
+
72
+ @pytest.mark.parametrize("create_volume", ["parted", "external"], indirect=True)
73
+ def test_read_write_volume(create_volume):
74
+ test_vol_dir = "test_vol_dir"
75
+
76
+ odps_entry = ODPS.from_environments()
77
+
78
+ writer = ODPSVolumeWriter(odps_entry, create_volume, test_vol_dir)
79
+ write_session_id = writer.create_write_session()
80
+
81
+ writer = ODPSVolumeWriter(odps_entry, create_volume, test_vol_dir)
82
+ writer.write_file("file1", b"content1", write_session_id)
83
+ writer.write_file("file2", b"content2", write_session_id)
84
+ writer.commit(["file1", "file2"], write_session_id)
85
+
86
+ reader = ODPSVolumeReader(odps_entry, create_volume, test_vol_dir)
87
+ assert reader.read_file("file1") == b"content1"
88
+ assert reader.read_file("file2") == b"content2"
89
+
90
+ assert ["file1", "file2"] == sorted(reader.list_files())