maxframe 0.1.0b5__cp311-cp311-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of maxframe might be problematic. Click here for more details.

Files changed (647) hide show
  1. maxframe/__init__.py +32 -0
  2. maxframe/_utils.cpython-311-darwin.so +0 -0
  3. maxframe/_utils.pxd +33 -0
  4. maxframe/_utils.pyx +547 -0
  5. maxframe/codegen.py +528 -0
  6. maxframe/config/__init__.py +15 -0
  7. maxframe/config/config.py +443 -0
  8. maxframe/config/tests/__init__.py +13 -0
  9. maxframe/config/tests/test_config.py +103 -0
  10. maxframe/config/tests/test_validators.py +34 -0
  11. maxframe/config/validators.py +57 -0
  12. maxframe/conftest.py +139 -0
  13. maxframe/core/__init__.py +65 -0
  14. maxframe/core/base.py +156 -0
  15. maxframe/core/entity/__init__.py +44 -0
  16. maxframe/core/entity/chunks.py +68 -0
  17. maxframe/core/entity/core.py +152 -0
  18. maxframe/core/entity/executable.py +337 -0
  19. maxframe/core/entity/fuse.py +73 -0
  20. maxframe/core/entity/objects.py +100 -0
  21. maxframe/core/entity/output_types.py +90 -0
  22. maxframe/core/entity/tileables.py +438 -0
  23. maxframe/core/entity/utils.py +24 -0
  24. maxframe/core/graph/__init__.py +17 -0
  25. maxframe/core/graph/builder/__init__.py +16 -0
  26. maxframe/core/graph/builder/base.py +86 -0
  27. maxframe/core/graph/builder/chunk.py +430 -0
  28. maxframe/core/graph/builder/tileable.py +34 -0
  29. maxframe/core/graph/builder/utils.py +41 -0
  30. maxframe/core/graph/core.cpython-311-darwin.so +0 -0
  31. maxframe/core/graph/core.pyx +467 -0
  32. maxframe/core/graph/entity.py +171 -0
  33. maxframe/core/graph/tests/__init__.py +13 -0
  34. maxframe/core/graph/tests/test_graph.py +205 -0
  35. maxframe/core/mode.py +96 -0
  36. maxframe/core/operator/__init__.py +34 -0
  37. maxframe/core/operator/base.py +450 -0
  38. maxframe/core/operator/core.py +276 -0
  39. maxframe/core/operator/fetch.py +53 -0
  40. maxframe/core/operator/fuse.py +29 -0
  41. maxframe/core/operator/objects.py +72 -0
  42. maxframe/core/operator/shuffle.py +111 -0
  43. maxframe/core/operator/tests/__init__.py +13 -0
  44. maxframe/core/operator/tests/test_core.py +64 -0
  45. maxframe/core/tests/__init__.py +13 -0
  46. maxframe/core/tests/test_mode.py +75 -0
  47. maxframe/dataframe/__init__.py +81 -0
  48. maxframe/dataframe/arithmetic/__init__.py +359 -0
  49. maxframe/dataframe/arithmetic/abs.py +33 -0
  50. maxframe/dataframe/arithmetic/add.py +60 -0
  51. maxframe/dataframe/arithmetic/arccos.py +28 -0
  52. maxframe/dataframe/arithmetic/arccosh.py +28 -0
  53. maxframe/dataframe/arithmetic/arcsin.py +28 -0
  54. maxframe/dataframe/arithmetic/arcsinh.py +28 -0
  55. maxframe/dataframe/arithmetic/arctan.py +28 -0
  56. maxframe/dataframe/arithmetic/arctanh.py +28 -0
  57. maxframe/dataframe/arithmetic/around.py +152 -0
  58. maxframe/dataframe/arithmetic/bitwise_and.py +46 -0
  59. maxframe/dataframe/arithmetic/bitwise_or.py +50 -0
  60. maxframe/dataframe/arithmetic/bitwise_xor.py +46 -0
  61. maxframe/dataframe/arithmetic/ceil.py +28 -0
  62. maxframe/dataframe/arithmetic/core.py +342 -0
  63. maxframe/dataframe/arithmetic/cos.py +28 -0
  64. maxframe/dataframe/arithmetic/cosh.py +28 -0
  65. maxframe/dataframe/arithmetic/degrees.py +28 -0
  66. maxframe/dataframe/arithmetic/docstring.py +442 -0
  67. maxframe/dataframe/arithmetic/equal.py +56 -0
  68. maxframe/dataframe/arithmetic/exp.py +28 -0
  69. maxframe/dataframe/arithmetic/exp2.py +28 -0
  70. maxframe/dataframe/arithmetic/expm1.py +28 -0
  71. maxframe/dataframe/arithmetic/floor.py +28 -0
  72. maxframe/dataframe/arithmetic/floordiv.py +64 -0
  73. maxframe/dataframe/arithmetic/greater.py +57 -0
  74. maxframe/dataframe/arithmetic/greater_equal.py +57 -0
  75. maxframe/dataframe/arithmetic/invert.py +33 -0
  76. maxframe/dataframe/arithmetic/is_ufuncs.py +62 -0
  77. maxframe/dataframe/arithmetic/less.py +57 -0
  78. maxframe/dataframe/arithmetic/less_equal.py +57 -0
  79. maxframe/dataframe/arithmetic/log.py +28 -0
  80. maxframe/dataframe/arithmetic/log10.py +28 -0
  81. maxframe/dataframe/arithmetic/log2.py +28 -0
  82. maxframe/dataframe/arithmetic/mod.py +60 -0
  83. maxframe/dataframe/arithmetic/multiply.py +60 -0
  84. maxframe/dataframe/arithmetic/negative.py +33 -0
  85. maxframe/dataframe/arithmetic/not_equal.py +56 -0
  86. maxframe/dataframe/arithmetic/power.py +68 -0
  87. maxframe/dataframe/arithmetic/radians.py +28 -0
  88. maxframe/dataframe/arithmetic/sin.py +28 -0
  89. maxframe/dataframe/arithmetic/sinh.py +28 -0
  90. maxframe/dataframe/arithmetic/sqrt.py +28 -0
  91. maxframe/dataframe/arithmetic/subtract.py +64 -0
  92. maxframe/dataframe/arithmetic/tan.py +28 -0
  93. maxframe/dataframe/arithmetic/tanh.py +28 -0
  94. maxframe/dataframe/arithmetic/tests/__init__.py +13 -0
  95. maxframe/dataframe/arithmetic/tests/test_arithmetic.py +695 -0
  96. maxframe/dataframe/arithmetic/truediv.py +64 -0
  97. maxframe/dataframe/arithmetic/trunc.py +28 -0
  98. maxframe/dataframe/arrays.py +864 -0
  99. maxframe/dataframe/core.py +2417 -0
  100. maxframe/dataframe/datasource/__init__.py +15 -0
  101. maxframe/dataframe/datasource/core.py +81 -0
  102. maxframe/dataframe/datasource/dataframe.py +59 -0
  103. maxframe/dataframe/datasource/date_range.py +504 -0
  104. maxframe/dataframe/datasource/from_index.py +54 -0
  105. maxframe/dataframe/datasource/from_records.py +107 -0
  106. maxframe/dataframe/datasource/from_tensor.py +419 -0
  107. maxframe/dataframe/datasource/index.py +117 -0
  108. maxframe/dataframe/datasource/read_csv.py +528 -0
  109. maxframe/dataframe/datasource/read_odps_query.py +299 -0
  110. maxframe/dataframe/datasource/read_odps_table.py +253 -0
  111. maxframe/dataframe/datasource/read_parquet.py +421 -0
  112. maxframe/dataframe/datasource/series.py +55 -0
  113. maxframe/dataframe/datasource/tests/__init__.py +13 -0
  114. maxframe/dataframe/datasource/tests/test_datasource.py +401 -0
  115. maxframe/dataframe/datastore/__init__.py +26 -0
  116. maxframe/dataframe/datastore/core.py +19 -0
  117. maxframe/dataframe/datastore/to_csv.py +227 -0
  118. maxframe/dataframe/datastore/to_odps.py +162 -0
  119. maxframe/dataframe/extensions/__init__.py +41 -0
  120. maxframe/dataframe/extensions/accessor.py +50 -0
  121. maxframe/dataframe/extensions/reshuffle.py +83 -0
  122. maxframe/dataframe/extensions/tests/__init__.py +13 -0
  123. maxframe/dataframe/extensions/tests/test_extensions.py +38 -0
  124. maxframe/dataframe/fetch/__init__.py +15 -0
  125. maxframe/dataframe/fetch/core.py +86 -0
  126. maxframe/dataframe/groupby/__init__.py +82 -0
  127. maxframe/dataframe/groupby/aggregation.py +350 -0
  128. maxframe/dataframe/groupby/apply.py +251 -0
  129. maxframe/dataframe/groupby/core.py +179 -0
  130. maxframe/dataframe/groupby/cum.py +124 -0
  131. maxframe/dataframe/groupby/fill.py +141 -0
  132. maxframe/dataframe/groupby/getitem.py +92 -0
  133. maxframe/dataframe/groupby/head.py +105 -0
  134. maxframe/dataframe/groupby/sample.py +214 -0
  135. maxframe/dataframe/groupby/tests/__init__.py +13 -0
  136. maxframe/dataframe/groupby/tests/test_groupby.py +374 -0
  137. maxframe/dataframe/groupby/transform.py +255 -0
  138. maxframe/dataframe/indexing/__init__.py +84 -0
  139. maxframe/dataframe/indexing/add_prefix_suffix.py +110 -0
  140. maxframe/dataframe/indexing/align.py +349 -0
  141. maxframe/dataframe/indexing/at.py +83 -0
  142. maxframe/dataframe/indexing/getitem.py +204 -0
  143. maxframe/dataframe/indexing/iat.py +37 -0
  144. maxframe/dataframe/indexing/iloc.py +566 -0
  145. maxframe/dataframe/indexing/insert.py +86 -0
  146. maxframe/dataframe/indexing/loc.py +411 -0
  147. maxframe/dataframe/indexing/reindex.py +526 -0
  148. maxframe/dataframe/indexing/rename.py +462 -0
  149. maxframe/dataframe/indexing/rename_axis.py +209 -0
  150. maxframe/dataframe/indexing/reset_index.py +402 -0
  151. maxframe/dataframe/indexing/sample.py +221 -0
  152. maxframe/dataframe/indexing/set_axis.py +194 -0
  153. maxframe/dataframe/indexing/set_index.py +61 -0
  154. maxframe/dataframe/indexing/setitem.py +130 -0
  155. maxframe/dataframe/indexing/tests/__init__.py +13 -0
  156. maxframe/dataframe/indexing/tests/test_indexing.py +488 -0
  157. maxframe/dataframe/indexing/where.py +308 -0
  158. maxframe/dataframe/initializer.py +288 -0
  159. maxframe/dataframe/merge/__init__.py +32 -0
  160. maxframe/dataframe/merge/append.py +121 -0
  161. maxframe/dataframe/merge/concat.py +325 -0
  162. maxframe/dataframe/merge/merge.py +593 -0
  163. maxframe/dataframe/merge/tests/__init__.py +13 -0
  164. maxframe/dataframe/merge/tests/test_merge.py +215 -0
  165. maxframe/dataframe/misc/__init__.py +134 -0
  166. maxframe/dataframe/misc/_duplicate.py +46 -0
  167. maxframe/dataframe/misc/accessor.py +276 -0
  168. maxframe/dataframe/misc/apply.py +692 -0
  169. maxframe/dataframe/misc/astype.py +236 -0
  170. maxframe/dataframe/misc/case_when.py +141 -0
  171. maxframe/dataframe/misc/check_monotonic.py +84 -0
  172. maxframe/dataframe/misc/cut.py +383 -0
  173. maxframe/dataframe/misc/datetimes.py +79 -0
  174. maxframe/dataframe/misc/describe.py +108 -0
  175. maxframe/dataframe/misc/diff.py +210 -0
  176. maxframe/dataframe/misc/drop.py +440 -0
  177. maxframe/dataframe/misc/drop_duplicates.py +248 -0
  178. maxframe/dataframe/misc/duplicated.py +292 -0
  179. maxframe/dataframe/misc/eval.py +728 -0
  180. maxframe/dataframe/misc/explode.py +171 -0
  181. maxframe/dataframe/misc/get_dummies.py +208 -0
  182. maxframe/dataframe/misc/isin.py +217 -0
  183. maxframe/dataframe/misc/map.py +236 -0
  184. maxframe/dataframe/misc/melt.py +162 -0
  185. maxframe/dataframe/misc/memory_usage.py +248 -0
  186. maxframe/dataframe/misc/pct_change.py +150 -0
  187. maxframe/dataframe/misc/pivot_table.py +262 -0
  188. maxframe/dataframe/misc/qcut.py +104 -0
  189. maxframe/dataframe/misc/select_dtypes.py +104 -0
  190. maxframe/dataframe/misc/shift.py +256 -0
  191. maxframe/dataframe/misc/stack.py +238 -0
  192. maxframe/dataframe/misc/string_.py +221 -0
  193. maxframe/dataframe/misc/tests/__init__.py +13 -0
  194. maxframe/dataframe/misc/tests/test_misc.py +468 -0
  195. maxframe/dataframe/misc/to_numeric.py +178 -0
  196. maxframe/dataframe/misc/transform.py +361 -0
  197. maxframe/dataframe/misc/transpose.py +136 -0
  198. maxframe/dataframe/misc/value_counts.py +182 -0
  199. maxframe/dataframe/missing/__init__.py +53 -0
  200. maxframe/dataframe/missing/checkna.py +223 -0
  201. maxframe/dataframe/missing/dropna.py +280 -0
  202. maxframe/dataframe/missing/fillna.py +275 -0
  203. maxframe/dataframe/missing/replace.py +439 -0
  204. maxframe/dataframe/missing/tests/__init__.py +13 -0
  205. maxframe/dataframe/missing/tests/test_missing.py +89 -0
  206. maxframe/dataframe/operators.py +273 -0
  207. maxframe/dataframe/plotting/__init__.py +40 -0
  208. maxframe/dataframe/plotting/core.py +78 -0
  209. maxframe/dataframe/plotting/tests/__init__.py +13 -0
  210. maxframe/dataframe/plotting/tests/test_plotting.py +136 -0
  211. maxframe/dataframe/reduction/__init__.py +107 -0
  212. maxframe/dataframe/reduction/aggregation.py +344 -0
  213. maxframe/dataframe/reduction/all.py +78 -0
  214. maxframe/dataframe/reduction/any.py +78 -0
  215. maxframe/dataframe/reduction/core.py +837 -0
  216. maxframe/dataframe/reduction/count.py +59 -0
  217. maxframe/dataframe/reduction/cummax.py +30 -0
  218. maxframe/dataframe/reduction/cummin.py +30 -0
  219. maxframe/dataframe/reduction/cumprod.py +30 -0
  220. maxframe/dataframe/reduction/cumsum.py +30 -0
  221. maxframe/dataframe/reduction/custom_reduction.py +42 -0
  222. maxframe/dataframe/reduction/kurtosis.py +104 -0
  223. maxframe/dataframe/reduction/max.py +65 -0
  224. maxframe/dataframe/reduction/mean.py +61 -0
  225. maxframe/dataframe/reduction/min.py +65 -0
  226. maxframe/dataframe/reduction/nunique.py +141 -0
  227. maxframe/dataframe/reduction/prod.py +76 -0
  228. maxframe/dataframe/reduction/reduction_size.py +36 -0
  229. maxframe/dataframe/reduction/sem.py +69 -0
  230. maxframe/dataframe/reduction/skew.py +89 -0
  231. maxframe/dataframe/reduction/std.py +53 -0
  232. maxframe/dataframe/reduction/str_concat.py +48 -0
  233. maxframe/dataframe/reduction/sum.py +77 -0
  234. maxframe/dataframe/reduction/tests/__init__.py +13 -0
  235. maxframe/dataframe/reduction/tests/test_reduction.py +486 -0
  236. maxframe/dataframe/reduction/unique.py +90 -0
  237. maxframe/dataframe/reduction/var.py +72 -0
  238. maxframe/dataframe/sort/__init__.py +34 -0
  239. maxframe/dataframe/sort/core.py +36 -0
  240. maxframe/dataframe/sort/sort_index.py +153 -0
  241. maxframe/dataframe/sort/sort_values.py +311 -0
  242. maxframe/dataframe/sort/tests/__init__.py +13 -0
  243. maxframe/dataframe/sort/tests/test_sort.py +81 -0
  244. maxframe/dataframe/statistics/__init__.py +33 -0
  245. maxframe/dataframe/statistics/corr.py +280 -0
  246. maxframe/dataframe/statistics/quantile.py +341 -0
  247. maxframe/dataframe/statistics/tests/__init__.py +13 -0
  248. maxframe/dataframe/statistics/tests/test_statistics.py +82 -0
  249. maxframe/dataframe/tests/__init__.py +13 -0
  250. maxframe/dataframe/tests/test_initializer.py +29 -0
  251. maxframe/dataframe/tseries/__init__.py +13 -0
  252. maxframe/dataframe/tseries/tests/__init__.py +13 -0
  253. maxframe/dataframe/tseries/tests/test_tseries.py +30 -0
  254. maxframe/dataframe/tseries/to_datetime.py +297 -0
  255. maxframe/dataframe/ufunc/__init__.py +27 -0
  256. maxframe/dataframe/ufunc/tensor.py +54 -0
  257. maxframe/dataframe/ufunc/ufunc.py +52 -0
  258. maxframe/dataframe/utils.py +1267 -0
  259. maxframe/dataframe/window/__init__.py +29 -0
  260. maxframe/dataframe/window/aggregation.py +96 -0
  261. maxframe/dataframe/window/core.py +69 -0
  262. maxframe/dataframe/window/ewm.py +249 -0
  263. maxframe/dataframe/window/expanding.py +147 -0
  264. maxframe/dataframe/window/rolling.py +376 -0
  265. maxframe/dataframe/window/tests/__init__.py +13 -0
  266. maxframe/dataframe/window/tests/test_ewm.py +70 -0
  267. maxframe/dataframe/window/tests/test_expanding.py +66 -0
  268. maxframe/dataframe/window/tests/test_rolling.py +57 -0
  269. maxframe/env.py +33 -0
  270. maxframe/errors.py +21 -0
  271. maxframe/extension.py +81 -0
  272. maxframe/learn/__init__.py +17 -0
  273. maxframe/learn/contrib/__init__.py +17 -0
  274. maxframe/learn/contrib/pytorch/__init__.py +16 -0
  275. maxframe/learn/contrib/pytorch/run_function.py +110 -0
  276. maxframe/learn/contrib/pytorch/run_script.py +102 -0
  277. maxframe/learn/contrib/pytorch/tests/__init__.py +13 -0
  278. maxframe/learn/contrib/pytorch/tests/test_pytorch.py +42 -0
  279. maxframe/learn/contrib/utils.py +52 -0
  280. maxframe/learn/contrib/xgboost/__init__.py +26 -0
  281. maxframe/learn/contrib/xgboost/classifier.py +86 -0
  282. maxframe/learn/contrib/xgboost/core.py +156 -0
  283. maxframe/learn/contrib/xgboost/dmatrix.py +150 -0
  284. maxframe/learn/contrib/xgboost/predict.py +138 -0
  285. maxframe/learn/contrib/xgboost/regressor.py +78 -0
  286. maxframe/learn/contrib/xgboost/tests/__init__.py +13 -0
  287. maxframe/learn/contrib/xgboost/tests/test_core.py +43 -0
  288. maxframe/learn/contrib/xgboost/train.py +121 -0
  289. maxframe/learn/utils/__init__.py +15 -0
  290. maxframe/learn/utils/core.py +29 -0
  291. maxframe/lib/__init__.py +15 -0
  292. maxframe/lib/aio/__init__.py +27 -0
  293. maxframe/lib/aio/_runners.py +162 -0
  294. maxframe/lib/aio/_threads.py +35 -0
  295. maxframe/lib/aio/base.py +82 -0
  296. maxframe/lib/aio/file.py +85 -0
  297. maxframe/lib/aio/isolation.py +100 -0
  298. maxframe/lib/aio/lru.py +242 -0
  299. maxframe/lib/aio/parallelism.py +37 -0
  300. maxframe/lib/aio/tests/__init__.py +13 -0
  301. maxframe/lib/aio/tests/test_aio_file.py +55 -0
  302. maxframe/lib/compression.py +55 -0
  303. maxframe/lib/cython/__init__.py +13 -0
  304. maxframe/lib/cython/libcpp.pxd +30 -0
  305. maxframe/lib/filesystem/__init__.py +21 -0
  306. maxframe/lib/filesystem/_glob.py +173 -0
  307. maxframe/lib/filesystem/_oss_lib/__init__.py +13 -0
  308. maxframe/lib/filesystem/_oss_lib/common.py +198 -0
  309. maxframe/lib/filesystem/_oss_lib/glob.py +147 -0
  310. maxframe/lib/filesystem/_oss_lib/handle.py +156 -0
  311. maxframe/lib/filesystem/arrow.py +236 -0
  312. maxframe/lib/filesystem/base.py +263 -0
  313. maxframe/lib/filesystem/core.py +95 -0
  314. maxframe/lib/filesystem/fsmap.py +164 -0
  315. maxframe/lib/filesystem/hdfs.py +31 -0
  316. maxframe/lib/filesystem/local.py +112 -0
  317. maxframe/lib/filesystem/oss.py +157 -0
  318. maxframe/lib/filesystem/tests/__init__.py +13 -0
  319. maxframe/lib/filesystem/tests/test_filesystem.py +223 -0
  320. maxframe/lib/filesystem/tests/test_oss.py +182 -0
  321. maxframe/lib/functools_compat.py +81 -0
  322. maxframe/lib/mmh3.cpython-311-darwin.so +0 -0
  323. maxframe/lib/mmh3_src/MurmurHash3.cpp +339 -0
  324. maxframe/lib/mmh3_src/MurmurHash3.h +43 -0
  325. maxframe/lib/mmh3_src/mmh3module.cpp +387 -0
  326. maxframe/lib/sparse/__init__.py +861 -0
  327. maxframe/lib/sparse/array.py +1604 -0
  328. maxframe/lib/sparse/core.py +92 -0
  329. maxframe/lib/sparse/matrix.py +241 -0
  330. maxframe/lib/sparse/tests/__init__.py +15 -0
  331. maxframe/lib/sparse/tests/test_sparse.py +476 -0
  332. maxframe/lib/sparse/vector.py +150 -0
  333. maxframe/lib/tblib/LICENSE +20 -0
  334. maxframe/lib/tblib/__init__.py +327 -0
  335. maxframe/lib/tblib/cpython.py +83 -0
  336. maxframe/lib/tblib/decorators.py +44 -0
  337. maxframe/lib/tblib/pickling_support.py +90 -0
  338. maxframe/lib/tests/__init__.py +13 -0
  339. maxframe/lib/tests/test_wrapped_pickle.py +51 -0
  340. maxframe/lib/version.py +620 -0
  341. maxframe/lib/wrapped_pickle.py +139 -0
  342. maxframe/mixin.py +100 -0
  343. maxframe/odpsio/__init__.py +21 -0
  344. maxframe/odpsio/arrow.py +91 -0
  345. maxframe/odpsio/schema.py +364 -0
  346. maxframe/odpsio/tableio.py +322 -0
  347. maxframe/odpsio/tests/__init__.py +13 -0
  348. maxframe/odpsio/tests/test_arrow.py +88 -0
  349. maxframe/odpsio/tests/test_schema.py +297 -0
  350. maxframe/odpsio/tests/test_tableio.py +136 -0
  351. maxframe/odpsio/tests/test_volumeio.py +90 -0
  352. maxframe/odpsio/volumeio.py +95 -0
  353. maxframe/opcodes.py +590 -0
  354. maxframe/protocol.py +415 -0
  355. maxframe/remote/__init__.py +18 -0
  356. maxframe/remote/core.py +210 -0
  357. maxframe/remote/run_script.py +121 -0
  358. maxframe/serialization/__init__.py +26 -0
  359. maxframe/serialization/arrow.py +95 -0
  360. maxframe/serialization/core.cpython-311-darwin.so +0 -0
  361. maxframe/serialization/core.pxd +44 -0
  362. maxframe/serialization/core.pyi +61 -0
  363. maxframe/serialization/core.pyx +1094 -0
  364. maxframe/serialization/exception.py +86 -0
  365. maxframe/serialization/maxframe_objects.py +39 -0
  366. maxframe/serialization/numpy.py +91 -0
  367. maxframe/serialization/pandas.py +202 -0
  368. maxframe/serialization/scipy.py +71 -0
  369. maxframe/serialization/serializables/__init__.py +55 -0
  370. maxframe/serialization/serializables/core.py +262 -0
  371. maxframe/serialization/serializables/field.py +624 -0
  372. maxframe/serialization/serializables/field_type.py +589 -0
  373. maxframe/serialization/serializables/tests/__init__.py +13 -0
  374. maxframe/serialization/serializables/tests/test_field_type.py +121 -0
  375. maxframe/serialization/serializables/tests/test_serializable.py +250 -0
  376. maxframe/serialization/tests/__init__.py +13 -0
  377. maxframe/serialization/tests/test_serial.py +412 -0
  378. maxframe/session.py +1310 -0
  379. maxframe/tensor/__init__.py +183 -0
  380. maxframe/tensor/arithmetic/__init__.py +315 -0
  381. maxframe/tensor/arithmetic/abs.py +68 -0
  382. maxframe/tensor/arithmetic/absolute.py +68 -0
  383. maxframe/tensor/arithmetic/add.py +82 -0
  384. maxframe/tensor/arithmetic/angle.py +72 -0
  385. maxframe/tensor/arithmetic/arccos.py +104 -0
  386. maxframe/tensor/arithmetic/arccosh.py +91 -0
  387. maxframe/tensor/arithmetic/arcsin.py +94 -0
  388. maxframe/tensor/arithmetic/arcsinh.py +86 -0
  389. maxframe/tensor/arithmetic/arctan.py +106 -0
  390. maxframe/tensor/arithmetic/arctan2.py +128 -0
  391. maxframe/tensor/arithmetic/arctanh.py +86 -0
  392. maxframe/tensor/arithmetic/around.py +114 -0
  393. maxframe/tensor/arithmetic/bitand.py +95 -0
  394. maxframe/tensor/arithmetic/bitor.py +102 -0
  395. maxframe/tensor/arithmetic/bitxor.py +95 -0
  396. maxframe/tensor/arithmetic/cbrt.py +66 -0
  397. maxframe/tensor/arithmetic/ceil.py +71 -0
  398. maxframe/tensor/arithmetic/clip.py +165 -0
  399. maxframe/tensor/arithmetic/conj.py +74 -0
  400. maxframe/tensor/arithmetic/copysign.py +78 -0
  401. maxframe/tensor/arithmetic/core.py +544 -0
  402. maxframe/tensor/arithmetic/cos.py +85 -0
  403. maxframe/tensor/arithmetic/cosh.py +72 -0
  404. maxframe/tensor/arithmetic/deg2rad.py +72 -0
  405. maxframe/tensor/arithmetic/degrees.py +77 -0
  406. maxframe/tensor/arithmetic/divide.py +114 -0
  407. maxframe/tensor/arithmetic/equal.py +76 -0
  408. maxframe/tensor/arithmetic/exp.py +106 -0
  409. maxframe/tensor/arithmetic/exp2.py +67 -0
  410. maxframe/tensor/arithmetic/expm1.py +79 -0
  411. maxframe/tensor/arithmetic/fabs.py +74 -0
  412. maxframe/tensor/arithmetic/fix.py +69 -0
  413. maxframe/tensor/arithmetic/float_power.py +103 -0
  414. maxframe/tensor/arithmetic/floor.py +77 -0
  415. maxframe/tensor/arithmetic/floordiv.py +94 -0
  416. maxframe/tensor/arithmetic/fmax.py +105 -0
  417. maxframe/tensor/arithmetic/fmin.py +106 -0
  418. maxframe/tensor/arithmetic/fmod.py +99 -0
  419. maxframe/tensor/arithmetic/frexp.py +92 -0
  420. maxframe/tensor/arithmetic/greater.py +77 -0
  421. maxframe/tensor/arithmetic/greater_equal.py +69 -0
  422. maxframe/tensor/arithmetic/hypot.py +77 -0
  423. maxframe/tensor/arithmetic/i0.py +89 -0
  424. maxframe/tensor/arithmetic/imag.py +67 -0
  425. maxframe/tensor/arithmetic/invert.py +110 -0
  426. maxframe/tensor/arithmetic/isclose.py +115 -0
  427. maxframe/tensor/arithmetic/iscomplex.py +64 -0
  428. maxframe/tensor/arithmetic/isfinite.py +106 -0
  429. maxframe/tensor/arithmetic/isinf.py +103 -0
  430. maxframe/tensor/arithmetic/isnan.py +82 -0
  431. maxframe/tensor/arithmetic/isreal.py +63 -0
  432. maxframe/tensor/arithmetic/ldexp.py +99 -0
  433. maxframe/tensor/arithmetic/less.py +69 -0
  434. maxframe/tensor/arithmetic/less_equal.py +69 -0
  435. maxframe/tensor/arithmetic/log.py +92 -0
  436. maxframe/tensor/arithmetic/log10.py +85 -0
  437. maxframe/tensor/arithmetic/log1p.py +95 -0
  438. maxframe/tensor/arithmetic/log2.py +85 -0
  439. maxframe/tensor/arithmetic/logaddexp.py +80 -0
  440. maxframe/tensor/arithmetic/logaddexp2.py +78 -0
  441. maxframe/tensor/arithmetic/logical_and.py +81 -0
  442. maxframe/tensor/arithmetic/logical_not.py +74 -0
  443. maxframe/tensor/arithmetic/logical_or.py +82 -0
  444. maxframe/tensor/arithmetic/logical_xor.py +88 -0
  445. maxframe/tensor/arithmetic/lshift.py +82 -0
  446. maxframe/tensor/arithmetic/maximum.py +108 -0
  447. maxframe/tensor/arithmetic/minimum.py +108 -0
  448. maxframe/tensor/arithmetic/mod.py +104 -0
  449. maxframe/tensor/arithmetic/modf.py +83 -0
  450. maxframe/tensor/arithmetic/multiply.py +81 -0
  451. maxframe/tensor/arithmetic/nan_to_num.py +99 -0
  452. maxframe/tensor/arithmetic/negative.py +65 -0
  453. maxframe/tensor/arithmetic/nextafter.py +68 -0
  454. maxframe/tensor/arithmetic/not_equal.py +72 -0
  455. maxframe/tensor/arithmetic/positive.py +47 -0
  456. maxframe/tensor/arithmetic/power.py +106 -0
  457. maxframe/tensor/arithmetic/rad2deg.py +71 -0
  458. maxframe/tensor/arithmetic/radians.py +77 -0
  459. maxframe/tensor/arithmetic/real.py +70 -0
  460. maxframe/tensor/arithmetic/reciprocal.py +76 -0
  461. maxframe/tensor/arithmetic/rint.py +68 -0
  462. maxframe/tensor/arithmetic/rshift.py +81 -0
  463. maxframe/tensor/arithmetic/setimag.py +29 -0
  464. maxframe/tensor/arithmetic/setreal.py +29 -0
  465. maxframe/tensor/arithmetic/sign.py +81 -0
  466. maxframe/tensor/arithmetic/signbit.py +65 -0
  467. maxframe/tensor/arithmetic/sin.py +98 -0
  468. maxframe/tensor/arithmetic/sinc.py +102 -0
  469. maxframe/tensor/arithmetic/sinh.py +93 -0
  470. maxframe/tensor/arithmetic/spacing.py +72 -0
  471. maxframe/tensor/arithmetic/sqrt.py +81 -0
  472. maxframe/tensor/arithmetic/square.py +69 -0
  473. maxframe/tensor/arithmetic/subtract.py +81 -0
  474. maxframe/tensor/arithmetic/tan.py +88 -0
  475. maxframe/tensor/arithmetic/tanh.py +92 -0
  476. maxframe/tensor/arithmetic/tests/__init__.py +15 -0
  477. maxframe/tensor/arithmetic/tests/test_arithmetic.py +414 -0
  478. maxframe/tensor/arithmetic/truediv.py +104 -0
  479. maxframe/tensor/arithmetic/trunc.py +72 -0
  480. maxframe/tensor/arithmetic/utils.py +65 -0
  481. maxframe/tensor/array_utils.py +186 -0
  482. maxframe/tensor/base/__init__.py +34 -0
  483. maxframe/tensor/base/astype.py +119 -0
  484. maxframe/tensor/base/atleast_1d.py +74 -0
  485. maxframe/tensor/base/broadcast_to.py +89 -0
  486. maxframe/tensor/base/ravel.py +92 -0
  487. maxframe/tensor/base/tests/__init__.py +13 -0
  488. maxframe/tensor/base/tests/test_base.py +114 -0
  489. maxframe/tensor/base/transpose.py +125 -0
  490. maxframe/tensor/base/unique.py +205 -0
  491. maxframe/tensor/base/where.py +127 -0
  492. maxframe/tensor/core.py +724 -0
  493. maxframe/tensor/datasource/__init__.py +32 -0
  494. maxframe/tensor/datasource/arange.py +156 -0
  495. maxframe/tensor/datasource/array.py +415 -0
  496. maxframe/tensor/datasource/core.py +109 -0
  497. maxframe/tensor/datasource/empty.py +169 -0
  498. maxframe/tensor/datasource/from_dataframe.py +70 -0
  499. maxframe/tensor/datasource/from_dense.py +54 -0
  500. maxframe/tensor/datasource/from_sparse.py +47 -0
  501. maxframe/tensor/datasource/full.py +186 -0
  502. maxframe/tensor/datasource/ones.py +173 -0
  503. maxframe/tensor/datasource/scalar.py +40 -0
  504. maxframe/tensor/datasource/tests/__init__.py +13 -0
  505. maxframe/tensor/datasource/tests/test_datasource.py +278 -0
  506. maxframe/tensor/datasource/zeros.py +188 -0
  507. maxframe/tensor/fetch/__init__.py +15 -0
  508. maxframe/tensor/fetch/core.py +54 -0
  509. maxframe/tensor/indexing/__init__.py +47 -0
  510. maxframe/tensor/indexing/choose.py +196 -0
  511. maxframe/tensor/indexing/compress.py +124 -0
  512. maxframe/tensor/indexing/core.py +190 -0
  513. maxframe/tensor/indexing/extract.py +71 -0
  514. maxframe/tensor/indexing/fill_diagonal.py +183 -0
  515. maxframe/tensor/indexing/flatnonzero.py +60 -0
  516. maxframe/tensor/indexing/getitem.py +175 -0
  517. maxframe/tensor/indexing/nonzero.py +120 -0
  518. maxframe/tensor/indexing/setitem.py +132 -0
  519. maxframe/tensor/indexing/slice.py +29 -0
  520. maxframe/tensor/indexing/take.py +130 -0
  521. maxframe/tensor/indexing/tests/__init__.py +15 -0
  522. maxframe/tensor/indexing/tests/test_indexing.py +234 -0
  523. maxframe/tensor/indexing/unravel_index.py +103 -0
  524. maxframe/tensor/merge/__init__.py +15 -0
  525. maxframe/tensor/merge/stack.py +132 -0
  526. maxframe/tensor/merge/tests/__init__.py +13 -0
  527. maxframe/tensor/merge/tests/test_merge.py +52 -0
  528. maxframe/tensor/operators.py +123 -0
  529. maxframe/tensor/random/__init__.py +168 -0
  530. maxframe/tensor/random/beta.py +87 -0
  531. maxframe/tensor/random/binomial.py +137 -0
  532. maxframe/tensor/random/bytes.py +39 -0
  533. maxframe/tensor/random/chisquare.py +110 -0
  534. maxframe/tensor/random/choice.py +186 -0
  535. maxframe/tensor/random/core.py +234 -0
  536. maxframe/tensor/random/dirichlet.py +123 -0
  537. maxframe/tensor/random/exponential.py +94 -0
  538. maxframe/tensor/random/f.py +135 -0
  539. maxframe/tensor/random/gamma.py +128 -0
  540. maxframe/tensor/random/geometric.py +93 -0
  541. maxframe/tensor/random/gumbel.py +167 -0
  542. maxframe/tensor/random/hypergeometric.py +148 -0
  543. maxframe/tensor/random/laplace.py +133 -0
  544. maxframe/tensor/random/logistic.py +129 -0
  545. maxframe/tensor/random/lognormal.py +159 -0
  546. maxframe/tensor/random/logseries.py +122 -0
  547. maxframe/tensor/random/multinomial.py +133 -0
  548. maxframe/tensor/random/multivariate_normal.py +192 -0
  549. maxframe/tensor/random/negative_binomial.py +125 -0
  550. maxframe/tensor/random/noncentral_chisquare.py +132 -0
  551. maxframe/tensor/random/noncentral_f.py +126 -0
  552. maxframe/tensor/random/normal.py +143 -0
  553. maxframe/tensor/random/pareto.py +140 -0
  554. maxframe/tensor/random/permutation.py +104 -0
  555. maxframe/tensor/random/poisson.py +111 -0
  556. maxframe/tensor/random/power.py +142 -0
  557. maxframe/tensor/random/rand.py +82 -0
  558. maxframe/tensor/random/randint.py +121 -0
  559. maxframe/tensor/random/randn.py +96 -0
  560. maxframe/tensor/random/random_integers.py +123 -0
  561. maxframe/tensor/random/random_sample.py +86 -0
  562. maxframe/tensor/random/rayleigh.py +110 -0
  563. maxframe/tensor/random/shuffle.py +61 -0
  564. maxframe/tensor/random/standard_cauchy.py +105 -0
  565. maxframe/tensor/random/standard_exponential.py +72 -0
  566. maxframe/tensor/random/standard_gamma.py +120 -0
  567. maxframe/tensor/random/standard_normal.py +74 -0
  568. maxframe/tensor/random/standard_t.py +135 -0
  569. maxframe/tensor/random/tests/__init__.py +15 -0
  570. maxframe/tensor/random/tests/test_random.py +167 -0
  571. maxframe/tensor/random/triangular.py +119 -0
  572. maxframe/tensor/random/uniform.py +131 -0
  573. maxframe/tensor/random/vonmises.py +131 -0
  574. maxframe/tensor/random/wald.py +114 -0
  575. maxframe/tensor/random/weibull.py +140 -0
  576. maxframe/tensor/random/zipf.py +122 -0
  577. maxframe/tensor/rechunk/__init__.py +26 -0
  578. maxframe/tensor/rechunk/rechunk.py +43 -0
  579. maxframe/tensor/reduction/__init__.py +66 -0
  580. maxframe/tensor/reduction/all.py +103 -0
  581. maxframe/tensor/reduction/allclose.py +88 -0
  582. maxframe/tensor/reduction/any.py +105 -0
  583. maxframe/tensor/reduction/argmax.py +103 -0
  584. maxframe/tensor/reduction/argmin.py +103 -0
  585. maxframe/tensor/reduction/array_equal.py +64 -0
  586. maxframe/tensor/reduction/core.py +168 -0
  587. maxframe/tensor/reduction/count_nonzero.py +81 -0
  588. maxframe/tensor/reduction/cumprod.py +97 -0
  589. maxframe/tensor/reduction/cumsum.py +101 -0
  590. maxframe/tensor/reduction/max.py +120 -0
  591. maxframe/tensor/reduction/mean.py +123 -0
  592. maxframe/tensor/reduction/min.py +120 -0
  593. maxframe/tensor/reduction/nanargmax.py +82 -0
  594. maxframe/tensor/reduction/nanargmin.py +76 -0
  595. maxframe/tensor/reduction/nancumprod.py +91 -0
  596. maxframe/tensor/reduction/nancumsum.py +94 -0
  597. maxframe/tensor/reduction/nanmax.py +111 -0
  598. maxframe/tensor/reduction/nanmean.py +106 -0
  599. maxframe/tensor/reduction/nanmin.py +111 -0
  600. maxframe/tensor/reduction/nanprod.py +94 -0
  601. maxframe/tensor/reduction/nanstd.py +126 -0
  602. maxframe/tensor/reduction/nansum.py +115 -0
  603. maxframe/tensor/reduction/nanvar.py +149 -0
  604. maxframe/tensor/reduction/prod.py +130 -0
  605. maxframe/tensor/reduction/std.py +134 -0
  606. maxframe/tensor/reduction/sum.py +125 -0
  607. maxframe/tensor/reduction/tests/__init__.py +13 -0
  608. maxframe/tensor/reduction/tests/test_reduction.py +181 -0
  609. maxframe/tensor/reduction/var.py +176 -0
  610. maxframe/tensor/reshape/__init__.py +17 -0
  611. maxframe/tensor/reshape/reshape.py +188 -0
  612. maxframe/tensor/reshape/tests/__init__.py +15 -0
  613. maxframe/tensor/reshape/tests/test_reshape.py +37 -0
  614. maxframe/tensor/statistics/__init__.py +13 -0
  615. maxframe/tensor/statistics/percentile.py +175 -0
  616. maxframe/tensor/statistics/quantile.py +288 -0
  617. maxframe/tensor/ufunc/__init__.py +26 -0
  618. maxframe/tensor/ufunc/ufunc.py +200 -0
  619. maxframe/tensor/utils.py +718 -0
  620. maxframe/tests/__init__.py +13 -0
  621. maxframe/tests/test_codegen.py +69 -0
  622. maxframe/tests/test_protocol.py +144 -0
  623. maxframe/tests/test_utils.py +376 -0
  624. maxframe/tests/utils.py +164 -0
  625. maxframe/typing_.py +37 -0
  626. maxframe/udf.py +134 -0
  627. maxframe/utils.py +1114 -0
  628. maxframe-0.1.0b5.dist-info/METADATA +104 -0
  629. maxframe-0.1.0b5.dist-info/RECORD +647 -0
  630. maxframe-0.1.0b5.dist-info/WHEEL +5 -0
  631. maxframe-0.1.0b5.dist-info/top_level.txt +3 -0
  632. maxframe_client/__init__.py +17 -0
  633. maxframe_client/clients/__init__.py +13 -0
  634. maxframe_client/clients/framedriver.py +118 -0
  635. maxframe_client/clients/spe.py +104 -0
  636. maxframe_client/conftest.py +15 -0
  637. maxframe_client/fetcher.py +264 -0
  638. maxframe_client/session/__init__.py +22 -0
  639. maxframe_client/session/consts.py +36 -0
  640. maxframe_client/session/graph.py +119 -0
  641. maxframe_client/session/odps.py +482 -0
  642. maxframe_client/session/task.py +280 -0
  643. maxframe_client/session/tests/__init__.py +13 -0
  644. maxframe_client/session/tests/test_task.py +85 -0
  645. maxframe_client/tests/__init__.py +13 -0
  646. maxframe_client/tests/test_fetcher.py +89 -0
  647. maxframe_client/tests/test_session.py +255 -0
@@ -0,0 +1,299 @@
1
+ # Copyright 1999-2024 Alibaba Group Holding Ltd.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ import dataclasses
16
+ import re
17
+ from typing import Dict, List, Optional, Tuple, Union
18
+
19
+ import numpy as np
20
+ import pandas as pd
21
+ from odps import ODPS
22
+ from odps.types import Column, OdpsSchema, validate_data_type
23
+
24
+ from ... import opcodes
25
+ from ...core import OutputType
26
+ from ...core.graph import DAG
27
+ from ...odpsio import odps_schema_to_pandas_dtypes
28
+ from ...serialization.serializables import (
29
+ AnyField,
30
+ BoolField,
31
+ FieldTypes,
32
+ Int64Field,
33
+ ListField,
34
+ SeriesField,
35
+ StringField,
36
+ )
37
+ from ..utils import parse_index
38
+ from .core import ColumnPruneSupportedDataSourceMixin, IncrementalIndexDatasource
39
+
40
+ _EXPLAIN_DEPENDS_REGEX = re.compile(r"([^\s]+) depends on: ([^\n]+)")
41
+ _EXPLAIN_JOB_REGEX = re.compile(r"(\S+) is root job")
42
+ _EXPLAIN_TASKS_HEADER_REGEX = re.compile(r"In Job ([^:]+):")
43
+ _EXPLAIN_ROOT_TASKS_REGEX = re.compile(r"root Tasks: (.+)")
44
+ _EXPLAIN_TASK_REGEX = re.compile(r"In Task ([^:]+)")
45
+ _EXPLAIN_TASK_SCHEMA_REGEX = re.compile(
46
+ r"In Task ([^:]+)[\S\s]+FS: output: ([^\n #]+)[\s\S]+schema:\s+([\S\s]+)$",
47
+ re.MULTILINE,
48
+ )
49
+ _EXPLAIN_COLUMN_REGEX = re.compile(r"([^\(]+) \(([^)]+)\)(?:| AS ([^ ]+))(?:\n|$)")
50
+
51
+
52
+ @dataclasses.dataclass
53
+ class DependencySector:
54
+ roots: List[str]
55
+ dependencies: List[Tuple[str, str]]
56
+
57
+ def build_dag(self) -> DAG:
58
+ dag = DAG()
59
+ for r in self.roots:
60
+ dag.add_node(r)
61
+ for v_from, v_to in self.dependencies:
62
+ dag.add_node(v_from)
63
+ dag.add_node(v_to)
64
+ dag.add_edge(v_from, v_to)
65
+ return dag
66
+
67
+
68
+ @dataclasses.dataclass
69
+ class JobsSector(DependencySector):
70
+ jobs: Dict[str, "TasksSector"] = dataclasses.field(default_factory=dict)
71
+
72
+
73
+ @dataclasses.dataclass
74
+ class TasksSector(DependencySector):
75
+ job_name: str
76
+ tasks: Dict[str, "TaskSector"] = dataclasses.field(default_factory=dict)
77
+
78
+
79
+ @dataclasses.dataclass
80
+ class ColumnSchema:
81
+ column_name: str
82
+ column_type: str
83
+ column_alias: Optional[str]
84
+
85
+
86
+ @dataclasses.dataclass
87
+ class TaskSector:
88
+ job_name: str
89
+ task_name: str
90
+ output_target: Optional[str]
91
+ schema: List[ColumnSchema]
92
+
93
+
94
+ def _split_explain_string(explain_string: str) -> List[str]:
95
+ parts = explain_string.split("\n\n")
96
+ final_parts = []
97
+ grouped = []
98
+ for part in parts:
99
+ part = part.strip("\n")
100
+ if grouped and not part.startswith(" "):
101
+ final_parts.append("\n\n".join(grouped).strip())
102
+ grouped = []
103
+ grouped.append(part)
104
+ if grouped:
105
+ final_parts.append("\n\n".join(grouped).strip())
106
+ return final_parts
107
+
108
+
109
+ def _find_all_deps(sector: str) -> List[Tuple[str, str]]:
110
+ deps = []
111
+ for match in _EXPLAIN_DEPENDS_REGEX.findall(sector):
112
+ descendant = match[0]
113
+ for r in match[1].split(","):
114
+ deps.append((r.strip(), descendant))
115
+ return deps
116
+
117
+
118
+ def _resolve_jobs_sector(sector: str) -> JobsSector:
119
+ match = _EXPLAIN_JOB_REGEX.search(sector)
120
+ roots = [r.strip() for r in match.group(1).split(",")]
121
+ deps = _find_all_deps(sector)
122
+ return JobsSector(roots, deps)
123
+
124
+
125
+ def _resolve_tasks_sector(sector: str) -> TasksSector:
126
+ match = _EXPLAIN_ROOT_TASKS_REGEX.search(sector)
127
+ roots = [r.strip() for r in match.group(1).split(",")]
128
+
129
+ match = _EXPLAIN_TASKS_HEADER_REGEX.search(sector)
130
+ job_name = match.group(1)
131
+
132
+ deps = _find_all_deps(sector)
133
+ return TasksSector(roots, deps, job_name)
134
+
135
+
136
+ def _resolve_task_sector(job_name: str, sector: str) -> TaskSector:
137
+ match = _EXPLAIN_TASK_REGEX.match(sector)
138
+ task_name = match.group(1)
139
+
140
+ match = _EXPLAIN_TASK_SCHEMA_REGEX.match(sector)
141
+ if match is None:
142
+ return TaskSector(job_name, task_name, None, [])
143
+
144
+ out_target = match.group(2)
145
+ out_schema = match.group(3)
146
+
147
+ schemas = []
148
+ for match in _EXPLAIN_COLUMN_REGEX.findall(out_schema):
149
+ col_name, data_type, alias = match
150
+ schemas.append(ColumnSchema(col_name.strip(), data_type.strip(), alias.strip()))
151
+ return TaskSector(job_name, task_name, out_target, schemas)
152
+
153
+
154
+ def _parse_explained_schema(explain_string: str) -> OdpsSchema:
155
+ sectors = _split_explain_string(explain_string)
156
+ jobs_sector = tasks_sector = None
157
+
158
+ for sector in sectors:
159
+ if _EXPLAIN_JOB_REGEX.search(sector):
160
+ jobs_sector = _resolve_jobs_sector(sector)
161
+ elif _EXPLAIN_TASKS_HEADER_REGEX.search(sector):
162
+ tasks_sector = _resolve_tasks_sector(sector)
163
+ assert jobs_sector is not None
164
+ jobs_sector.jobs[tasks_sector.job_name] = tasks_sector
165
+ elif _EXPLAIN_TASK_REGEX.search(sector):
166
+ assert tasks_sector is not None
167
+ task_sector = _resolve_task_sector(tasks_sector.job_name, sector)
168
+ tasks_sector.tasks[task_sector.task_name] = task_sector
169
+
170
+ job_dag = jobs_sector.build_dag()
171
+ indep_job_names = list(job_dag.iter_indep(reverse=True))
172
+ if len(indep_job_names) > 1: # pragma: no cover
173
+ raise ValueError("Only one final job is allowed in SQL statement")
174
+
175
+ tasks_sector = jobs_sector.jobs[indep_job_names[0]]
176
+ task_dag = tasks_sector.build_dag()
177
+ indep_task_names = list(task_dag.iter_indep(reverse=True))
178
+ if len(indep_task_names) > 1: # pragma: no cover
179
+ raise ValueError("Only one final task is allowed in SQL statement")
180
+
181
+ task_sector = tasks_sector.tasks[indep_task_names[0]]
182
+ if not task_sector.schema: # pragma: no cover
183
+ raise ValueError("Cannot detect output schema")
184
+ if task_sector.output_target != "Screen":
185
+ raise ValueError("The SQL statement should be an instant query")
186
+ cols = [
187
+ Column(c.column_alias or c.column_name, validate_data_type(c.column_type))
188
+ for c in task_sector.schema
189
+ ]
190
+ return OdpsSchema(cols)
191
+
192
+
193
+ class DataFrameReadODPSQuery(
194
+ IncrementalIndexDatasource,
195
+ ColumnPruneSupportedDataSourceMixin,
196
+ ):
197
+ _op_type_ = opcodes.READ_ODPS_QUERY
198
+
199
+ query = StringField("query")
200
+ dtypes = SeriesField("dtypes", default=None)
201
+ columns = AnyField("columns", default=None)
202
+ nrows = Int64Field("nrows", default=None)
203
+ use_arrow_dtype = BoolField("use_arrow_dtype", default=None)
204
+ string_as_binary = BoolField("string_as_binary", default=None)
205
+ index_columns = ListField("index_columns", FieldTypes.string, default=None)
206
+ index_dtypes = SeriesField("index_dtypes", default=None)
207
+
208
+ def get_columns(self):
209
+ return self.columns
210
+
211
+ def set_pruned_columns(self, columns, *, keep_order=None): # pragma: no cover
212
+ self.columns = columns
213
+
214
+ def __call__(self, chunk_bytes=None, chunk_size=None):
215
+ if not self.index_columns:
216
+ index_value = parse_index(pd.RangeIndex(0))
217
+ elif len(self.index_columns) == 1:
218
+ index_value = parse_index(
219
+ pd.Index([], name=self.index_columns[0]).astype(
220
+ self.index_dtypes.iloc[0]
221
+ )
222
+ )
223
+ else:
224
+ idx = pd.MultiIndex.from_frame(
225
+ pd.DataFrame([], columns=self.index_columns).astype(self.index_dtypes)
226
+ )
227
+ index_value = parse_index(idx)
228
+
229
+ columns_value = parse_index(self.dtypes.index, store_data=True)
230
+ self.output_types = [OutputType.dataframe]
231
+ return self.new_tileable(
232
+ [],
233
+ None,
234
+ shape=(len(self.dtypes), np.nan),
235
+ dtypes=self.dtypes,
236
+ index_value=index_value,
237
+ columns_value=columns_value,
238
+ chunk_bytes=chunk_bytes,
239
+ chunk_size=chunk_size,
240
+ )
241
+
242
+
243
+ def read_odps_query(
244
+ query: str,
245
+ odps_entry: ODPS = None,
246
+ index_col: Union[None, str, List[str]] = None,
247
+ string_as_binary: bool = None,
248
+ **kw,
249
+ ):
250
+ """
251
+ Read data from a MaxCompute (ODPS) query into DataFrame.
252
+
253
+ Supports specifying some columns as indexes. If not specified, RangeIndex
254
+ will be generated.
255
+
256
+ Parameters
257
+ ----------
258
+ query: str
259
+ MaxCompute SQL statement.
260
+ index_col: Union[None, str, List[str]]
261
+ Columns to be specified as indexes.
262
+
263
+ Returns
264
+ -------
265
+ result: DataFrame
266
+ DataFrame read from MaxCompute (ODPS) table
267
+ """
268
+ odps_entry = odps_entry or ODPS.from_global() or ODPS.from_environments()
269
+ if odps_entry is None:
270
+ raise ValueError("Missing odps_entry parameter")
271
+ inst = odps_entry.execute_sql(f"EXPLAIN {query}")
272
+ explain_str = list(inst.get_task_results().values())[0]
273
+
274
+ odps_schema = _parse_explained_schema(explain_str)
275
+ dtypes = odps_schema_to_pandas_dtypes(odps_schema)
276
+
277
+ if not index_col:
278
+ index_dtypes = None
279
+ else:
280
+ if isinstance(index_col, str):
281
+ index_col = [index_col]
282
+ index_col_set = set(index_col)
283
+ data_cols = [c for c in dtypes.index if c not in index_col_set]
284
+ idx_dtype_vals = [dtypes[c] for c in index_col]
285
+ col_dtype_vals = [dtypes[c] for c in data_cols]
286
+ index_dtypes = pd.Series(idx_dtype_vals, index=index_col)
287
+ dtypes = pd.Series(col_dtype_vals, index=data_cols)
288
+
289
+ chunk_bytes = kw.pop("chunk_bytes", None)
290
+ chunk_size = kw.pop("chunk_size", None)
291
+ op = DataFrameReadODPSQuery(
292
+ query=query,
293
+ dtypes=dtypes,
294
+ use_arrow_dtype=kw.pop("use_arrow_dtype", True),
295
+ string_as_binary=string_as_binary,
296
+ index_columns=index_col,
297
+ index_dtypes=index_dtypes,
298
+ )
299
+ return op(chunk_bytes=chunk_bytes, chunk_size=chunk_size)
@@ -0,0 +1,253 @@
1
+ # Copyright 1999-2024 Alibaba Group Holding Ltd.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ import logging
16
+ from typing import List, Optional, Union
17
+
18
+ import numpy as np
19
+ import pandas as pd
20
+ from odps import ODPS
21
+ from odps.models import Table
22
+ from odps.utils import to_timestamp
23
+
24
+ from ... import opcodes
25
+ from ...core import OutputType
26
+ from ...odpsio import odps_schema_to_pandas_dtypes
27
+ from ...serialization.serializables import (
28
+ AnyField,
29
+ BoolField,
30
+ FieldTypes,
31
+ Int64Field,
32
+ ListField,
33
+ SeriesField,
34
+ StringField,
35
+ )
36
+ from ..core import DataFrame # noqa: F401
37
+ from ..utils import parse_index
38
+ from .core import ColumnPruneSupportedDataSourceMixin, IncrementalIndexDatasource
39
+
40
+ logger = logging.getLogger(__name__)
41
+
42
+
43
+ class DataFrameReadODPSTable(
44
+ IncrementalIndexDatasource,
45
+ ColumnPruneSupportedDataSourceMixin,
46
+ ):
47
+ _op_type_ = opcodes.READ_ODPS_TABLE
48
+
49
+ table_name = StringField("table_name")
50
+ partitions = ListField("partitions", field_type=FieldTypes.string, default=None)
51
+ dtypes = SeriesField("dtypes", default=None)
52
+ columns = AnyField("columns", default=None)
53
+ nrows = Int64Field("nrows", default=None)
54
+ use_arrow_dtype = BoolField("use_arrow_dtype", default=None)
55
+ string_as_binary = BoolField("string_as_binary", default=None)
56
+ append_partitions = BoolField("append_partitions", default=None)
57
+ last_modified_time = Int64Field("last_modified_time", default=None)
58
+ index_columns = ListField("index_columns", FieldTypes.string, default=None)
59
+ index_dtypes = SeriesField("index_dtypes", default=None)
60
+
61
+ def __init__(self, memory_scale=None, **kw):
62
+ output_type = kw.get("output_type", OutputType.dataframe)
63
+ super(DataFrameReadODPSTable, self).__init__(
64
+ memory_scale=memory_scale, _output_types=[output_type], **kw
65
+ )
66
+
67
+ @property
68
+ def partition(self):
69
+ return getattr(self, "partition_spec", None)
70
+
71
+ def get_columns(self):
72
+ return self.columns or list(self.dtypes.index)
73
+
74
+ def set_pruned_columns(self, columns, *, keep_order=None): # pragma: no cover
75
+ self.columns = columns
76
+
77
+ def __call__(self, shape, chunk_bytes=None, chunk_size=None):
78
+ if not self.index_columns:
79
+ if np.isnan(shape[0]):
80
+ index_value = parse_index(pd.RangeIndex(0))
81
+ else:
82
+ index_value = parse_index(pd.RangeIndex(shape[0]))
83
+ elif len(self.index_columns) == 1:
84
+ index_value = parse_index(
85
+ pd.Index([], name=self.index_columns[0]).astype(
86
+ self.index_dtypes.iloc[0]
87
+ )
88
+ )
89
+ else:
90
+ idx = pd.MultiIndex.from_frame(
91
+ pd.DataFrame([], columns=self.index_columns).astype(self.index_dtypes)
92
+ )
93
+ index_value = parse_index(idx)
94
+
95
+ if self.output_types[0] == OutputType.dataframe:
96
+ columns_value = parse_index(self.dtypes.index, store_data=True)
97
+ return self.new_tileable(
98
+ [],
99
+ None,
100
+ shape=shape,
101
+ dtypes=self.dtypes,
102
+ index_value=index_value,
103
+ columns_value=columns_value,
104
+ chunk_bytes=chunk_bytes,
105
+ chunk_size=chunk_size,
106
+ )
107
+ elif self.output_types[0] == OutputType.series:
108
+ return self.new_tileable(
109
+ [],
110
+ None,
111
+ shape=shape[:1],
112
+ name=self.dtypes.index[0],
113
+ dtype=self.dtypes.iloc[0],
114
+ index_value=index_value,
115
+ chunk_bytes=chunk_bytes,
116
+ chunk_size=chunk_size,
117
+ )
118
+ elif self.output_types[0] == OutputType.index:
119
+ return self.new_tileable(
120
+ [],
121
+ None,
122
+ shape=shape,
123
+ name=getattr(index_value, "name", None),
124
+ names=getattr(index_value, "names", None),
125
+ index_value=index_value,
126
+ chunk_bytes=chunk_bytes,
127
+ chunk_size=chunk_size,
128
+ )
129
+
130
+
131
+ def read_odps_table(
132
+ table_name: Union[str, Table],
133
+ partitions: Union[None, str, List[str]] = None,
134
+ columns: Optional[List[str]] = None,
135
+ index_col: Union[None, str, List[str]] = None,
136
+ odps_entry: ODPS = None,
137
+ string_as_binary: bool = None,
138
+ append_partitions: bool = False,
139
+ **kw,
140
+ ):
141
+ """
142
+ Read data from a MaxCompute (ODPS) table into DataFrame.
143
+
144
+ Supports specifying some columns as indexes. If not specified, RangeIndex
145
+ will be generated.
146
+
147
+ Parameters
148
+ ----------
149
+ table_name: Union[str, Table]
150
+ Name of the table to read from.
151
+ partitions: Union[None, str, List[str]]
152
+ Table partition or list of partitions to read from.
153
+ columns: Optional[List[str]]
154
+ Table columns to read from. You may also specify partition columns here.
155
+ If not specified, all table columns (or include partition columns if
156
+ `append_partitions` is True) will be included.
157
+ index_col: Union[None, str, List[str]]
158
+ Columns to be specified as indexes.
159
+ append_partitions: bool
160
+ If True, will add all partition columns as selected columns when
161
+ `columns` is not specified,
162
+
163
+ Returns
164
+ -------
165
+ result: DataFrame
166
+ DataFrame read from MaxCompute (ODPS) table
167
+ """
168
+ odps_entry = odps_entry or ODPS.from_global() or ODPS.from_environments()
169
+ if odps_entry is None:
170
+ raise ValueError("Missing odps_entry parameter")
171
+ if isinstance(table_name, Table):
172
+ table = table_name
173
+ else:
174
+ table = odps_entry.get_table(table_name)
175
+
176
+ if not table.table_schema.partitions and (
177
+ partitions is not None or append_partitions
178
+ ):
179
+ raise ValueError("Cannot handle partitions on non-partition tables")
180
+
181
+ cols = (
182
+ table.table_schema.columns
183
+ if append_partitions or columns is not None
184
+ else table.table_schema.simple_columns
185
+ )
186
+ table_columns = [c.name.lower() for c in cols]
187
+ table_dtypes = odps_schema_to_pandas_dtypes(
188
+ table.table_schema, with_partitions=True
189
+ )
190
+ df_types = [table_dtypes[c] for c in table_columns]
191
+
192
+ if isinstance(index_col, str):
193
+ index_col = [index_col]
194
+ if index_col and columns is None:
195
+ index_col_set = set([c.lower() for c in index_col])
196
+ col_diff = sorted(index_col_set - set(table_columns))
197
+ if col_diff:
198
+ raise ValueError(
199
+ f"Cannot found column {', '.join(col_diff)} specified "
200
+ f"in index_columns argument in source table"
201
+ )
202
+ columns = [c for c in table_columns if c not in index_col_set]
203
+
204
+ if not index_col:
205
+ index_dtypes = None
206
+ else:
207
+ table_index_types = [df_types[table_columns.index(col)] for col in index_col]
208
+ index_dtypes = pd.Series(table_index_types, index=index_col)
209
+
210
+ if columns is not None:
211
+ table_col_set = set([c.lower() for c in columns])
212
+ col_diff = sorted(table_col_set - set(table_columns))
213
+ if col_diff:
214
+ raise ValueError(
215
+ f"Cannot found column {', '.join(col_diff)} specified "
216
+ f"in columns argument in source table"
217
+ )
218
+ if any(col in table_col_set for col in index_col or ()):
219
+ raise ValueError("Index columns and columns shall not overlap.")
220
+
221
+ # reorder columns
222
+ new_columns = [c for c in table_columns if c in table_col_set]
223
+ df_types = [df_types[table_columns.index(col)] for col in new_columns]
224
+ table_columns = new_columns
225
+ columns = new_columns
226
+
227
+ shape = kw.pop("shape", None) or (np.nan, len(df_types))
228
+ dtypes = pd.Series(df_types, index=table_columns)
229
+ chunk_bytes = kw.pop("chunk_bytes", None)
230
+ chunk_size = kw.pop("chunk_size", None)
231
+ use_arrow_dtype = kw.pop("use_arrow_dtype", True)
232
+
233
+ partitions = partitions or kw.get("partition")
234
+ if isinstance(partitions, str):
235
+ partitions = [partitions]
236
+
237
+ append_partitions = append_partitions or any(
238
+ pt.name in (columns or ()) for pt in (table.table_schema.partitions or ())
239
+ )
240
+ op = DataFrameReadODPSTable(
241
+ table_name=table.full_table_name,
242
+ partitions=partitions,
243
+ dtypes=dtypes,
244
+ columns=columns,
245
+ use_arrow_dtype=use_arrow_dtype,
246
+ string_as_binary=string_as_binary,
247
+ append_partitions=append_partitions,
248
+ last_modified_time=to_timestamp(table.last_data_modified_time),
249
+ index_columns=index_col,
250
+ index_dtypes=index_dtypes,
251
+ **kw,
252
+ )
253
+ return op(shape, chunk_bytes=chunk_bytes, chunk_size=chunk_size)