maxframe 0.1.0b5__cp39-cp39-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of maxframe might be problematic. Click here for more details.

Files changed (647) hide show
  1. maxframe/__init__.py +32 -0
  2. maxframe/_utils.cpython-39-darwin.so +0 -0
  3. maxframe/_utils.pxd +33 -0
  4. maxframe/_utils.pyx +547 -0
  5. maxframe/codegen.py +528 -0
  6. maxframe/config/__init__.py +15 -0
  7. maxframe/config/config.py +443 -0
  8. maxframe/config/tests/__init__.py +13 -0
  9. maxframe/config/tests/test_config.py +103 -0
  10. maxframe/config/tests/test_validators.py +34 -0
  11. maxframe/config/validators.py +57 -0
  12. maxframe/conftest.py +139 -0
  13. maxframe/core/__init__.py +65 -0
  14. maxframe/core/base.py +156 -0
  15. maxframe/core/entity/__init__.py +44 -0
  16. maxframe/core/entity/chunks.py +68 -0
  17. maxframe/core/entity/core.py +152 -0
  18. maxframe/core/entity/executable.py +337 -0
  19. maxframe/core/entity/fuse.py +73 -0
  20. maxframe/core/entity/objects.py +100 -0
  21. maxframe/core/entity/output_types.py +90 -0
  22. maxframe/core/entity/tileables.py +438 -0
  23. maxframe/core/entity/utils.py +24 -0
  24. maxframe/core/graph/__init__.py +17 -0
  25. maxframe/core/graph/builder/__init__.py +16 -0
  26. maxframe/core/graph/builder/base.py +86 -0
  27. maxframe/core/graph/builder/chunk.py +430 -0
  28. maxframe/core/graph/builder/tileable.py +34 -0
  29. maxframe/core/graph/builder/utils.py +41 -0
  30. maxframe/core/graph/core.cpython-39-darwin.so +0 -0
  31. maxframe/core/graph/core.pyx +467 -0
  32. maxframe/core/graph/entity.py +171 -0
  33. maxframe/core/graph/tests/__init__.py +13 -0
  34. maxframe/core/graph/tests/test_graph.py +205 -0
  35. maxframe/core/mode.py +96 -0
  36. maxframe/core/operator/__init__.py +34 -0
  37. maxframe/core/operator/base.py +450 -0
  38. maxframe/core/operator/core.py +276 -0
  39. maxframe/core/operator/fetch.py +53 -0
  40. maxframe/core/operator/fuse.py +29 -0
  41. maxframe/core/operator/objects.py +72 -0
  42. maxframe/core/operator/shuffle.py +111 -0
  43. maxframe/core/operator/tests/__init__.py +13 -0
  44. maxframe/core/operator/tests/test_core.py +64 -0
  45. maxframe/core/tests/__init__.py +13 -0
  46. maxframe/core/tests/test_mode.py +75 -0
  47. maxframe/dataframe/__init__.py +81 -0
  48. maxframe/dataframe/arithmetic/__init__.py +359 -0
  49. maxframe/dataframe/arithmetic/abs.py +33 -0
  50. maxframe/dataframe/arithmetic/add.py +60 -0
  51. maxframe/dataframe/arithmetic/arccos.py +28 -0
  52. maxframe/dataframe/arithmetic/arccosh.py +28 -0
  53. maxframe/dataframe/arithmetic/arcsin.py +28 -0
  54. maxframe/dataframe/arithmetic/arcsinh.py +28 -0
  55. maxframe/dataframe/arithmetic/arctan.py +28 -0
  56. maxframe/dataframe/arithmetic/arctanh.py +28 -0
  57. maxframe/dataframe/arithmetic/around.py +152 -0
  58. maxframe/dataframe/arithmetic/bitwise_and.py +46 -0
  59. maxframe/dataframe/arithmetic/bitwise_or.py +50 -0
  60. maxframe/dataframe/arithmetic/bitwise_xor.py +46 -0
  61. maxframe/dataframe/arithmetic/ceil.py +28 -0
  62. maxframe/dataframe/arithmetic/core.py +342 -0
  63. maxframe/dataframe/arithmetic/cos.py +28 -0
  64. maxframe/dataframe/arithmetic/cosh.py +28 -0
  65. maxframe/dataframe/arithmetic/degrees.py +28 -0
  66. maxframe/dataframe/arithmetic/docstring.py +442 -0
  67. maxframe/dataframe/arithmetic/equal.py +56 -0
  68. maxframe/dataframe/arithmetic/exp.py +28 -0
  69. maxframe/dataframe/arithmetic/exp2.py +28 -0
  70. maxframe/dataframe/arithmetic/expm1.py +28 -0
  71. maxframe/dataframe/arithmetic/floor.py +28 -0
  72. maxframe/dataframe/arithmetic/floordiv.py +64 -0
  73. maxframe/dataframe/arithmetic/greater.py +57 -0
  74. maxframe/dataframe/arithmetic/greater_equal.py +57 -0
  75. maxframe/dataframe/arithmetic/invert.py +33 -0
  76. maxframe/dataframe/arithmetic/is_ufuncs.py +62 -0
  77. maxframe/dataframe/arithmetic/less.py +57 -0
  78. maxframe/dataframe/arithmetic/less_equal.py +57 -0
  79. maxframe/dataframe/arithmetic/log.py +28 -0
  80. maxframe/dataframe/arithmetic/log10.py +28 -0
  81. maxframe/dataframe/arithmetic/log2.py +28 -0
  82. maxframe/dataframe/arithmetic/mod.py +60 -0
  83. maxframe/dataframe/arithmetic/multiply.py +60 -0
  84. maxframe/dataframe/arithmetic/negative.py +33 -0
  85. maxframe/dataframe/arithmetic/not_equal.py +56 -0
  86. maxframe/dataframe/arithmetic/power.py +68 -0
  87. maxframe/dataframe/arithmetic/radians.py +28 -0
  88. maxframe/dataframe/arithmetic/sin.py +28 -0
  89. maxframe/dataframe/arithmetic/sinh.py +28 -0
  90. maxframe/dataframe/arithmetic/sqrt.py +28 -0
  91. maxframe/dataframe/arithmetic/subtract.py +64 -0
  92. maxframe/dataframe/arithmetic/tan.py +28 -0
  93. maxframe/dataframe/arithmetic/tanh.py +28 -0
  94. maxframe/dataframe/arithmetic/tests/__init__.py +13 -0
  95. maxframe/dataframe/arithmetic/tests/test_arithmetic.py +695 -0
  96. maxframe/dataframe/arithmetic/truediv.py +64 -0
  97. maxframe/dataframe/arithmetic/trunc.py +28 -0
  98. maxframe/dataframe/arrays.py +864 -0
  99. maxframe/dataframe/core.py +2417 -0
  100. maxframe/dataframe/datasource/__init__.py +15 -0
  101. maxframe/dataframe/datasource/core.py +81 -0
  102. maxframe/dataframe/datasource/dataframe.py +59 -0
  103. maxframe/dataframe/datasource/date_range.py +504 -0
  104. maxframe/dataframe/datasource/from_index.py +54 -0
  105. maxframe/dataframe/datasource/from_records.py +107 -0
  106. maxframe/dataframe/datasource/from_tensor.py +419 -0
  107. maxframe/dataframe/datasource/index.py +117 -0
  108. maxframe/dataframe/datasource/read_csv.py +528 -0
  109. maxframe/dataframe/datasource/read_odps_query.py +299 -0
  110. maxframe/dataframe/datasource/read_odps_table.py +253 -0
  111. maxframe/dataframe/datasource/read_parquet.py +421 -0
  112. maxframe/dataframe/datasource/series.py +55 -0
  113. maxframe/dataframe/datasource/tests/__init__.py +13 -0
  114. maxframe/dataframe/datasource/tests/test_datasource.py +401 -0
  115. maxframe/dataframe/datastore/__init__.py +26 -0
  116. maxframe/dataframe/datastore/core.py +19 -0
  117. maxframe/dataframe/datastore/to_csv.py +227 -0
  118. maxframe/dataframe/datastore/to_odps.py +162 -0
  119. maxframe/dataframe/extensions/__init__.py +41 -0
  120. maxframe/dataframe/extensions/accessor.py +50 -0
  121. maxframe/dataframe/extensions/reshuffle.py +83 -0
  122. maxframe/dataframe/extensions/tests/__init__.py +13 -0
  123. maxframe/dataframe/extensions/tests/test_extensions.py +38 -0
  124. maxframe/dataframe/fetch/__init__.py +15 -0
  125. maxframe/dataframe/fetch/core.py +86 -0
  126. maxframe/dataframe/groupby/__init__.py +82 -0
  127. maxframe/dataframe/groupby/aggregation.py +350 -0
  128. maxframe/dataframe/groupby/apply.py +251 -0
  129. maxframe/dataframe/groupby/core.py +179 -0
  130. maxframe/dataframe/groupby/cum.py +124 -0
  131. maxframe/dataframe/groupby/fill.py +141 -0
  132. maxframe/dataframe/groupby/getitem.py +92 -0
  133. maxframe/dataframe/groupby/head.py +105 -0
  134. maxframe/dataframe/groupby/sample.py +214 -0
  135. maxframe/dataframe/groupby/tests/__init__.py +13 -0
  136. maxframe/dataframe/groupby/tests/test_groupby.py +374 -0
  137. maxframe/dataframe/groupby/transform.py +255 -0
  138. maxframe/dataframe/indexing/__init__.py +84 -0
  139. maxframe/dataframe/indexing/add_prefix_suffix.py +110 -0
  140. maxframe/dataframe/indexing/align.py +349 -0
  141. maxframe/dataframe/indexing/at.py +83 -0
  142. maxframe/dataframe/indexing/getitem.py +204 -0
  143. maxframe/dataframe/indexing/iat.py +37 -0
  144. maxframe/dataframe/indexing/iloc.py +566 -0
  145. maxframe/dataframe/indexing/insert.py +86 -0
  146. maxframe/dataframe/indexing/loc.py +411 -0
  147. maxframe/dataframe/indexing/reindex.py +526 -0
  148. maxframe/dataframe/indexing/rename.py +462 -0
  149. maxframe/dataframe/indexing/rename_axis.py +209 -0
  150. maxframe/dataframe/indexing/reset_index.py +402 -0
  151. maxframe/dataframe/indexing/sample.py +221 -0
  152. maxframe/dataframe/indexing/set_axis.py +194 -0
  153. maxframe/dataframe/indexing/set_index.py +61 -0
  154. maxframe/dataframe/indexing/setitem.py +130 -0
  155. maxframe/dataframe/indexing/tests/__init__.py +13 -0
  156. maxframe/dataframe/indexing/tests/test_indexing.py +488 -0
  157. maxframe/dataframe/indexing/where.py +308 -0
  158. maxframe/dataframe/initializer.py +288 -0
  159. maxframe/dataframe/merge/__init__.py +32 -0
  160. maxframe/dataframe/merge/append.py +121 -0
  161. maxframe/dataframe/merge/concat.py +325 -0
  162. maxframe/dataframe/merge/merge.py +593 -0
  163. maxframe/dataframe/merge/tests/__init__.py +13 -0
  164. maxframe/dataframe/merge/tests/test_merge.py +215 -0
  165. maxframe/dataframe/misc/__init__.py +134 -0
  166. maxframe/dataframe/misc/_duplicate.py +46 -0
  167. maxframe/dataframe/misc/accessor.py +276 -0
  168. maxframe/dataframe/misc/apply.py +692 -0
  169. maxframe/dataframe/misc/astype.py +236 -0
  170. maxframe/dataframe/misc/case_when.py +141 -0
  171. maxframe/dataframe/misc/check_monotonic.py +84 -0
  172. maxframe/dataframe/misc/cut.py +383 -0
  173. maxframe/dataframe/misc/datetimes.py +79 -0
  174. maxframe/dataframe/misc/describe.py +108 -0
  175. maxframe/dataframe/misc/diff.py +210 -0
  176. maxframe/dataframe/misc/drop.py +440 -0
  177. maxframe/dataframe/misc/drop_duplicates.py +248 -0
  178. maxframe/dataframe/misc/duplicated.py +292 -0
  179. maxframe/dataframe/misc/eval.py +728 -0
  180. maxframe/dataframe/misc/explode.py +171 -0
  181. maxframe/dataframe/misc/get_dummies.py +208 -0
  182. maxframe/dataframe/misc/isin.py +217 -0
  183. maxframe/dataframe/misc/map.py +236 -0
  184. maxframe/dataframe/misc/melt.py +162 -0
  185. maxframe/dataframe/misc/memory_usage.py +248 -0
  186. maxframe/dataframe/misc/pct_change.py +150 -0
  187. maxframe/dataframe/misc/pivot_table.py +262 -0
  188. maxframe/dataframe/misc/qcut.py +104 -0
  189. maxframe/dataframe/misc/select_dtypes.py +104 -0
  190. maxframe/dataframe/misc/shift.py +256 -0
  191. maxframe/dataframe/misc/stack.py +238 -0
  192. maxframe/dataframe/misc/string_.py +221 -0
  193. maxframe/dataframe/misc/tests/__init__.py +13 -0
  194. maxframe/dataframe/misc/tests/test_misc.py +468 -0
  195. maxframe/dataframe/misc/to_numeric.py +178 -0
  196. maxframe/dataframe/misc/transform.py +361 -0
  197. maxframe/dataframe/misc/transpose.py +136 -0
  198. maxframe/dataframe/misc/value_counts.py +182 -0
  199. maxframe/dataframe/missing/__init__.py +53 -0
  200. maxframe/dataframe/missing/checkna.py +223 -0
  201. maxframe/dataframe/missing/dropna.py +280 -0
  202. maxframe/dataframe/missing/fillna.py +275 -0
  203. maxframe/dataframe/missing/replace.py +439 -0
  204. maxframe/dataframe/missing/tests/__init__.py +13 -0
  205. maxframe/dataframe/missing/tests/test_missing.py +89 -0
  206. maxframe/dataframe/operators.py +273 -0
  207. maxframe/dataframe/plotting/__init__.py +40 -0
  208. maxframe/dataframe/plotting/core.py +78 -0
  209. maxframe/dataframe/plotting/tests/__init__.py +13 -0
  210. maxframe/dataframe/plotting/tests/test_plotting.py +136 -0
  211. maxframe/dataframe/reduction/__init__.py +107 -0
  212. maxframe/dataframe/reduction/aggregation.py +344 -0
  213. maxframe/dataframe/reduction/all.py +78 -0
  214. maxframe/dataframe/reduction/any.py +78 -0
  215. maxframe/dataframe/reduction/core.py +837 -0
  216. maxframe/dataframe/reduction/count.py +59 -0
  217. maxframe/dataframe/reduction/cummax.py +30 -0
  218. maxframe/dataframe/reduction/cummin.py +30 -0
  219. maxframe/dataframe/reduction/cumprod.py +30 -0
  220. maxframe/dataframe/reduction/cumsum.py +30 -0
  221. maxframe/dataframe/reduction/custom_reduction.py +42 -0
  222. maxframe/dataframe/reduction/kurtosis.py +104 -0
  223. maxframe/dataframe/reduction/max.py +65 -0
  224. maxframe/dataframe/reduction/mean.py +61 -0
  225. maxframe/dataframe/reduction/min.py +65 -0
  226. maxframe/dataframe/reduction/nunique.py +141 -0
  227. maxframe/dataframe/reduction/prod.py +76 -0
  228. maxframe/dataframe/reduction/reduction_size.py +36 -0
  229. maxframe/dataframe/reduction/sem.py +69 -0
  230. maxframe/dataframe/reduction/skew.py +89 -0
  231. maxframe/dataframe/reduction/std.py +53 -0
  232. maxframe/dataframe/reduction/str_concat.py +48 -0
  233. maxframe/dataframe/reduction/sum.py +77 -0
  234. maxframe/dataframe/reduction/tests/__init__.py +13 -0
  235. maxframe/dataframe/reduction/tests/test_reduction.py +486 -0
  236. maxframe/dataframe/reduction/unique.py +90 -0
  237. maxframe/dataframe/reduction/var.py +72 -0
  238. maxframe/dataframe/sort/__init__.py +34 -0
  239. maxframe/dataframe/sort/core.py +36 -0
  240. maxframe/dataframe/sort/sort_index.py +153 -0
  241. maxframe/dataframe/sort/sort_values.py +311 -0
  242. maxframe/dataframe/sort/tests/__init__.py +13 -0
  243. maxframe/dataframe/sort/tests/test_sort.py +81 -0
  244. maxframe/dataframe/statistics/__init__.py +33 -0
  245. maxframe/dataframe/statistics/corr.py +280 -0
  246. maxframe/dataframe/statistics/quantile.py +341 -0
  247. maxframe/dataframe/statistics/tests/__init__.py +13 -0
  248. maxframe/dataframe/statistics/tests/test_statistics.py +82 -0
  249. maxframe/dataframe/tests/__init__.py +13 -0
  250. maxframe/dataframe/tests/test_initializer.py +29 -0
  251. maxframe/dataframe/tseries/__init__.py +13 -0
  252. maxframe/dataframe/tseries/tests/__init__.py +13 -0
  253. maxframe/dataframe/tseries/tests/test_tseries.py +30 -0
  254. maxframe/dataframe/tseries/to_datetime.py +297 -0
  255. maxframe/dataframe/ufunc/__init__.py +27 -0
  256. maxframe/dataframe/ufunc/tensor.py +54 -0
  257. maxframe/dataframe/ufunc/ufunc.py +52 -0
  258. maxframe/dataframe/utils.py +1267 -0
  259. maxframe/dataframe/window/__init__.py +29 -0
  260. maxframe/dataframe/window/aggregation.py +96 -0
  261. maxframe/dataframe/window/core.py +69 -0
  262. maxframe/dataframe/window/ewm.py +249 -0
  263. maxframe/dataframe/window/expanding.py +147 -0
  264. maxframe/dataframe/window/rolling.py +376 -0
  265. maxframe/dataframe/window/tests/__init__.py +13 -0
  266. maxframe/dataframe/window/tests/test_ewm.py +70 -0
  267. maxframe/dataframe/window/tests/test_expanding.py +66 -0
  268. maxframe/dataframe/window/tests/test_rolling.py +57 -0
  269. maxframe/env.py +33 -0
  270. maxframe/errors.py +21 -0
  271. maxframe/extension.py +81 -0
  272. maxframe/learn/__init__.py +17 -0
  273. maxframe/learn/contrib/__init__.py +17 -0
  274. maxframe/learn/contrib/pytorch/__init__.py +16 -0
  275. maxframe/learn/contrib/pytorch/run_function.py +110 -0
  276. maxframe/learn/contrib/pytorch/run_script.py +102 -0
  277. maxframe/learn/contrib/pytorch/tests/__init__.py +13 -0
  278. maxframe/learn/contrib/pytorch/tests/test_pytorch.py +42 -0
  279. maxframe/learn/contrib/utils.py +52 -0
  280. maxframe/learn/contrib/xgboost/__init__.py +26 -0
  281. maxframe/learn/contrib/xgboost/classifier.py +86 -0
  282. maxframe/learn/contrib/xgboost/core.py +156 -0
  283. maxframe/learn/contrib/xgboost/dmatrix.py +150 -0
  284. maxframe/learn/contrib/xgboost/predict.py +138 -0
  285. maxframe/learn/contrib/xgboost/regressor.py +78 -0
  286. maxframe/learn/contrib/xgboost/tests/__init__.py +13 -0
  287. maxframe/learn/contrib/xgboost/tests/test_core.py +43 -0
  288. maxframe/learn/contrib/xgboost/train.py +121 -0
  289. maxframe/learn/utils/__init__.py +15 -0
  290. maxframe/learn/utils/core.py +29 -0
  291. maxframe/lib/__init__.py +15 -0
  292. maxframe/lib/aio/__init__.py +27 -0
  293. maxframe/lib/aio/_runners.py +162 -0
  294. maxframe/lib/aio/_threads.py +35 -0
  295. maxframe/lib/aio/base.py +82 -0
  296. maxframe/lib/aio/file.py +85 -0
  297. maxframe/lib/aio/isolation.py +100 -0
  298. maxframe/lib/aio/lru.py +242 -0
  299. maxframe/lib/aio/parallelism.py +37 -0
  300. maxframe/lib/aio/tests/__init__.py +13 -0
  301. maxframe/lib/aio/tests/test_aio_file.py +55 -0
  302. maxframe/lib/compression.py +55 -0
  303. maxframe/lib/cython/__init__.py +13 -0
  304. maxframe/lib/cython/libcpp.pxd +30 -0
  305. maxframe/lib/filesystem/__init__.py +21 -0
  306. maxframe/lib/filesystem/_glob.py +173 -0
  307. maxframe/lib/filesystem/_oss_lib/__init__.py +13 -0
  308. maxframe/lib/filesystem/_oss_lib/common.py +198 -0
  309. maxframe/lib/filesystem/_oss_lib/glob.py +147 -0
  310. maxframe/lib/filesystem/_oss_lib/handle.py +156 -0
  311. maxframe/lib/filesystem/arrow.py +236 -0
  312. maxframe/lib/filesystem/base.py +263 -0
  313. maxframe/lib/filesystem/core.py +95 -0
  314. maxframe/lib/filesystem/fsmap.py +164 -0
  315. maxframe/lib/filesystem/hdfs.py +31 -0
  316. maxframe/lib/filesystem/local.py +112 -0
  317. maxframe/lib/filesystem/oss.py +157 -0
  318. maxframe/lib/filesystem/tests/__init__.py +13 -0
  319. maxframe/lib/filesystem/tests/test_filesystem.py +223 -0
  320. maxframe/lib/filesystem/tests/test_oss.py +182 -0
  321. maxframe/lib/functools_compat.py +81 -0
  322. maxframe/lib/mmh3.cpython-39-darwin.so +0 -0
  323. maxframe/lib/mmh3_src/MurmurHash3.cpp +339 -0
  324. maxframe/lib/mmh3_src/MurmurHash3.h +43 -0
  325. maxframe/lib/mmh3_src/mmh3module.cpp +387 -0
  326. maxframe/lib/sparse/__init__.py +861 -0
  327. maxframe/lib/sparse/array.py +1604 -0
  328. maxframe/lib/sparse/core.py +92 -0
  329. maxframe/lib/sparse/matrix.py +241 -0
  330. maxframe/lib/sparse/tests/__init__.py +15 -0
  331. maxframe/lib/sparse/tests/test_sparse.py +476 -0
  332. maxframe/lib/sparse/vector.py +150 -0
  333. maxframe/lib/tblib/LICENSE +20 -0
  334. maxframe/lib/tblib/__init__.py +327 -0
  335. maxframe/lib/tblib/cpython.py +83 -0
  336. maxframe/lib/tblib/decorators.py +44 -0
  337. maxframe/lib/tblib/pickling_support.py +90 -0
  338. maxframe/lib/tests/__init__.py +13 -0
  339. maxframe/lib/tests/test_wrapped_pickle.py +51 -0
  340. maxframe/lib/version.py +620 -0
  341. maxframe/lib/wrapped_pickle.py +139 -0
  342. maxframe/mixin.py +100 -0
  343. maxframe/odpsio/__init__.py +21 -0
  344. maxframe/odpsio/arrow.py +91 -0
  345. maxframe/odpsio/schema.py +364 -0
  346. maxframe/odpsio/tableio.py +322 -0
  347. maxframe/odpsio/tests/__init__.py +13 -0
  348. maxframe/odpsio/tests/test_arrow.py +88 -0
  349. maxframe/odpsio/tests/test_schema.py +297 -0
  350. maxframe/odpsio/tests/test_tableio.py +136 -0
  351. maxframe/odpsio/tests/test_volumeio.py +90 -0
  352. maxframe/odpsio/volumeio.py +95 -0
  353. maxframe/opcodes.py +590 -0
  354. maxframe/protocol.py +415 -0
  355. maxframe/remote/__init__.py +18 -0
  356. maxframe/remote/core.py +210 -0
  357. maxframe/remote/run_script.py +121 -0
  358. maxframe/serialization/__init__.py +26 -0
  359. maxframe/serialization/arrow.py +95 -0
  360. maxframe/serialization/core.cpython-39-darwin.so +0 -0
  361. maxframe/serialization/core.pxd +44 -0
  362. maxframe/serialization/core.pyi +61 -0
  363. maxframe/serialization/core.pyx +1094 -0
  364. maxframe/serialization/exception.py +86 -0
  365. maxframe/serialization/maxframe_objects.py +39 -0
  366. maxframe/serialization/numpy.py +91 -0
  367. maxframe/serialization/pandas.py +202 -0
  368. maxframe/serialization/scipy.py +71 -0
  369. maxframe/serialization/serializables/__init__.py +55 -0
  370. maxframe/serialization/serializables/core.py +262 -0
  371. maxframe/serialization/serializables/field.py +624 -0
  372. maxframe/serialization/serializables/field_type.py +589 -0
  373. maxframe/serialization/serializables/tests/__init__.py +13 -0
  374. maxframe/serialization/serializables/tests/test_field_type.py +121 -0
  375. maxframe/serialization/serializables/tests/test_serializable.py +250 -0
  376. maxframe/serialization/tests/__init__.py +13 -0
  377. maxframe/serialization/tests/test_serial.py +412 -0
  378. maxframe/session.py +1310 -0
  379. maxframe/tensor/__init__.py +183 -0
  380. maxframe/tensor/arithmetic/__init__.py +315 -0
  381. maxframe/tensor/arithmetic/abs.py +68 -0
  382. maxframe/tensor/arithmetic/absolute.py +68 -0
  383. maxframe/tensor/arithmetic/add.py +82 -0
  384. maxframe/tensor/arithmetic/angle.py +72 -0
  385. maxframe/tensor/arithmetic/arccos.py +104 -0
  386. maxframe/tensor/arithmetic/arccosh.py +91 -0
  387. maxframe/tensor/arithmetic/arcsin.py +94 -0
  388. maxframe/tensor/arithmetic/arcsinh.py +86 -0
  389. maxframe/tensor/arithmetic/arctan.py +106 -0
  390. maxframe/tensor/arithmetic/arctan2.py +128 -0
  391. maxframe/tensor/arithmetic/arctanh.py +86 -0
  392. maxframe/tensor/arithmetic/around.py +114 -0
  393. maxframe/tensor/arithmetic/bitand.py +95 -0
  394. maxframe/tensor/arithmetic/bitor.py +102 -0
  395. maxframe/tensor/arithmetic/bitxor.py +95 -0
  396. maxframe/tensor/arithmetic/cbrt.py +66 -0
  397. maxframe/tensor/arithmetic/ceil.py +71 -0
  398. maxframe/tensor/arithmetic/clip.py +165 -0
  399. maxframe/tensor/arithmetic/conj.py +74 -0
  400. maxframe/tensor/arithmetic/copysign.py +78 -0
  401. maxframe/tensor/arithmetic/core.py +544 -0
  402. maxframe/tensor/arithmetic/cos.py +85 -0
  403. maxframe/tensor/arithmetic/cosh.py +72 -0
  404. maxframe/tensor/arithmetic/deg2rad.py +72 -0
  405. maxframe/tensor/arithmetic/degrees.py +77 -0
  406. maxframe/tensor/arithmetic/divide.py +114 -0
  407. maxframe/tensor/arithmetic/equal.py +76 -0
  408. maxframe/tensor/arithmetic/exp.py +106 -0
  409. maxframe/tensor/arithmetic/exp2.py +67 -0
  410. maxframe/tensor/arithmetic/expm1.py +79 -0
  411. maxframe/tensor/arithmetic/fabs.py +74 -0
  412. maxframe/tensor/arithmetic/fix.py +69 -0
  413. maxframe/tensor/arithmetic/float_power.py +103 -0
  414. maxframe/tensor/arithmetic/floor.py +77 -0
  415. maxframe/tensor/arithmetic/floordiv.py +94 -0
  416. maxframe/tensor/arithmetic/fmax.py +105 -0
  417. maxframe/tensor/arithmetic/fmin.py +106 -0
  418. maxframe/tensor/arithmetic/fmod.py +99 -0
  419. maxframe/tensor/arithmetic/frexp.py +92 -0
  420. maxframe/tensor/arithmetic/greater.py +77 -0
  421. maxframe/tensor/arithmetic/greater_equal.py +69 -0
  422. maxframe/tensor/arithmetic/hypot.py +77 -0
  423. maxframe/tensor/arithmetic/i0.py +89 -0
  424. maxframe/tensor/arithmetic/imag.py +67 -0
  425. maxframe/tensor/arithmetic/invert.py +110 -0
  426. maxframe/tensor/arithmetic/isclose.py +115 -0
  427. maxframe/tensor/arithmetic/iscomplex.py +64 -0
  428. maxframe/tensor/arithmetic/isfinite.py +106 -0
  429. maxframe/tensor/arithmetic/isinf.py +103 -0
  430. maxframe/tensor/arithmetic/isnan.py +82 -0
  431. maxframe/tensor/arithmetic/isreal.py +63 -0
  432. maxframe/tensor/arithmetic/ldexp.py +99 -0
  433. maxframe/tensor/arithmetic/less.py +69 -0
  434. maxframe/tensor/arithmetic/less_equal.py +69 -0
  435. maxframe/tensor/arithmetic/log.py +92 -0
  436. maxframe/tensor/arithmetic/log10.py +85 -0
  437. maxframe/tensor/arithmetic/log1p.py +95 -0
  438. maxframe/tensor/arithmetic/log2.py +85 -0
  439. maxframe/tensor/arithmetic/logaddexp.py +80 -0
  440. maxframe/tensor/arithmetic/logaddexp2.py +78 -0
  441. maxframe/tensor/arithmetic/logical_and.py +81 -0
  442. maxframe/tensor/arithmetic/logical_not.py +74 -0
  443. maxframe/tensor/arithmetic/logical_or.py +82 -0
  444. maxframe/tensor/arithmetic/logical_xor.py +88 -0
  445. maxframe/tensor/arithmetic/lshift.py +82 -0
  446. maxframe/tensor/arithmetic/maximum.py +108 -0
  447. maxframe/tensor/arithmetic/minimum.py +108 -0
  448. maxframe/tensor/arithmetic/mod.py +104 -0
  449. maxframe/tensor/arithmetic/modf.py +83 -0
  450. maxframe/tensor/arithmetic/multiply.py +81 -0
  451. maxframe/tensor/arithmetic/nan_to_num.py +99 -0
  452. maxframe/tensor/arithmetic/negative.py +65 -0
  453. maxframe/tensor/arithmetic/nextafter.py +68 -0
  454. maxframe/tensor/arithmetic/not_equal.py +72 -0
  455. maxframe/tensor/arithmetic/positive.py +47 -0
  456. maxframe/tensor/arithmetic/power.py +106 -0
  457. maxframe/tensor/arithmetic/rad2deg.py +71 -0
  458. maxframe/tensor/arithmetic/radians.py +77 -0
  459. maxframe/tensor/arithmetic/real.py +70 -0
  460. maxframe/tensor/arithmetic/reciprocal.py +76 -0
  461. maxframe/tensor/arithmetic/rint.py +68 -0
  462. maxframe/tensor/arithmetic/rshift.py +81 -0
  463. maxframe/tensor/arithmetic/setimag.py +29 -0
  464. maxframe/tensor/arithmetic/setreal.py +29 -0
  465. maxframe/tensor/arithmetic/sign.py +81 -0
  466. maxframe/tensor/arithmetic/signbit.py +65 -0
  467. maxframe/tensor/arithmetic/sin.py +98 -0
  468. maxframe/tensor/arithmetic/sinc.py +102 -0
  469. maxframe/tensor/arithmetic/sinh.py +93 -0
  470. maxframe/tensor/arithmetic/spacing.py +72 -0
  471. maxframe/tensor/arithmetic/sqrt.py +81 -0
  472. maxframe/tensor/arithmetic/square.py +69 -0
  473. maxframe/tensor/arithmetic/subtract.py +81 -0
  474. maxframe/tensor/arithmetic/tan.py +88 -0
  475. maxframe/tensor/arithmetic/tanh.py +92 -0
  476. maxframe/tensor/arithmetic/tests/__init__.py +15 -0
  477. maxframe/tensor/arithmetic/tests/test_arithmetic.py +414 -0
  478. maxframe/tensor/arithmetic/truediv.py +104 -0
  479. maxframe/tensor/arithmetic/trunc.py +72 -0
  480. maxframe/tensor/arithmetic/utils.py +65 -0
  481. maxframe/tensor/array_utils.py +186 -0
  482. maxframe/tensor/base/__init__.py +34 -0
  483. maxframe/tensor/base/astype.py +119 -0
  484. maxframe/tensor/base/atleast_1d.py +74 -0
  485. maxframe/tensor/base/broadcast_to.py +89 -0
  486. maxframe/tensor/base/ravel.py +92 -0
  487. maxframe/tensor/base/tests/__init__.py +13 -0
  488. maxframe/tensor/base/tests/test_base.py +114 -0
  489. maxframe/tensor/base/transpose.py +125 -0
  490. maxframe/tensor/base/unique.py +205 -0
  491. maxframe/tensor/base/where.py +127 -0
  492. maxframe/tensor/core.py +724 -0
  493. maxframe/tensor/datasource/__init__.py +32 -0
  494. maxframe/tensor/datasource/arange.py +156 -0
  495. maxframe/tensor/datasource/array.py +415 -0
  496. maxframe/tensor/datasource/core.py +109 -0
  497. maxframe/tensor/datasource/empty.py +169 -0
  498. maxframe/tensor/datasource/from_dataframe.py +70 -0
  499. maxframe/tensor/datasource/from_dense.py +54 -0
  500. maxframe/tensor/datasource/from_sparse.py +47 -0
  501. maxframe/tensor/datasource/full.py +186 -0
  502. maxframe/tensor/datasource/ones.py +173 -0
  503. maxframe/tensor/datasource/scalar.py +40 -0
  504. maxframe/tensor/datasource/tests/__init__.py +13 -0
  505. maxframe/tensor/datasource/tests/test_datasource.py +278 -0
  506. maxframe/tensor/datasource/zeros.py +188 -0
  507. maxframe/tensor/fetch/__init__.py +15 -0
  508. maxframe/tensor/fetch/core.py +54 -0
  509. maxframe/tensor/indexing/__init__.py +47 -0
  510. maxframe/tensor/indexing/choose.py +196 -0
  511. maxframe/tensor/indexing/compress.py +124 -0
  512. maxframe/tensor/indexing/core.py +190 -0
  513. maxframe/tensor/indexing/extract.py +71 -0
  514. maxframe/tensor/indexing/fill_diagonal.py +183 -0
  515. maxframe/tensor/indexing/flatnonzero.py +60 -0
  516. maxframe/tensor/indexing/getitem.py +175 -0
  517. maxframe/tensor/indexing/nonzero.py +120 -0
  518. maxframe/tensor/indexing/setitem.py +132 -0
  519. maxframe/tensor/indexing/slice.py +29 -0
  520. maxframe/tensor/indexing/take.py +130 -0
  521. maxframe/tensor/indexing/tests/__init__.py +15 -0
  522. maxframe/tensor/indexing/tests/test_indexing.py +234 -0
  523. maxframe/tensor/indexing/unravel_index.py +103 -0
  524. maxframe/tensor/merge/__init__.py +15 -0
  525. maxframe/tensor/merge/stack.py +132 -0
  526. maxframe/tensor/merge/tests/__init__.py +13 -0
  527. maxframe/tensor/merge/tests/test_merge.py +52 -0
  528. maxframe/tensor/operators.py +123 -0
  529. maxframe/tensor/random/__init__.py +168 -0
  530. maxframe/tensor/random/beta.py +87 -0
  531. maxframe/tensor/random/binomial.py +137 -0
  532. maxframe/tensor/random/bytes.py +39 -0
  533. maxframe/tensor/random/chisquare.py +110 -0
  534. maxframe/tensor/random/choice.py +186 -0
  535. maxframe/tensor/random/core.py +234 -0
  536. maxframe/tensor/random/dirichlet.py +123 -0
  537. maxframe/tensor/random/exponential.py +94 -0
  538. maxframe/tensor/random/f.py +135 -0
  539. maxframe/tensor/random/gamma.py +128 -0
  540. maxframe/tensor/random/geometric.py +93 -0
  541. maxframe/tensor/random/gumbel.py +167 -0
  542. maxframe/tensor/random/hypergeometric.py +148 -0
  543. maxframe/tensor/random/laplace.py +133 -0
  544. maxframe/tensor/random/logistic.py +129 -0
  545. maxframe/tensor/random/lognormal.py +159 -0
  546. maxframe/tensor/random/logseries.py +122 -0
  547. maxframe/tensor/random/multinomial.py +133 -0
  548. maxframe/tensor/random/multivariate_normal.py +192 -0
  549. maxframe/tensor/random/negative_binomial.py +125 -0
  550. maxframe/tensor/random/noncentral_chisquare.py +132 -0
  551. maxframe/tensor/random/noncentral_f.py +126 -0
  552. maxframe/tensor/random/normal.py +143 -0
  553. maxframe/tensor/random/pareto.py +140 -0
  554. maxframe/tensor/random/permutation.py +104 -0
  555. maxframe/tensor/random/poisson.py +111 -0
  556. maxframe/tensor/random/power.py +142 -0
  557. maxframe/tensor/random/rand.py +82 -0
  558. maxframe/tensor/random/randint.py +121 -0
  559. maxframe/tensor/random/randn.py +96 -0
  560. maxframe/tensor/random/random_integers.py +123 -0
  561. maxframe/tensor/random/random_sample.py +86 -0
  562. maxframe/tensor/random/rayleigh.py +110 -0
  563. maxframe/tensor/random/shuffle.py +61 -0
  564. maxframe/tensor/random/standard_cauchy.py +105 -0
  565. maxframe/tensor/random/standard_exponential.py +72 -0
  566. maxframe/tensor/random/standard_gamma.py +120 -0
  567. maxframe/tensor/random/standard_normal.py +74 -0
  568. maxframe/tensor/random/standard_t.py +135 -0
  569. maxframe/tensor/random/tests/__init__.py +15 -0
  570. maxframe/tensor/random/tests/test_random.py +167 -0
  571. maxframe/tensor/random/triangular.py +119 -0
  572. maxframe/tensor/random/uniform.py +131 -0
  573. maxframe/tensor/random/vonmises.py +131 -0
  574. maxframe/tensor/random/wald.py +114 -0
  575. maxframe/tensor/random/weibull.py +140 -0
  576. maxframe/tensor/random/zipf.py +122 -0
  577. maxframe/tensor/rechunk/__init__.py +26 -0
  578. maxframe/tensor/rechunk/rechunk.py +43 -0
  579. maxframe/tensor/reduction/__init__.py +66 -0
  580. maxframe/tensor/reduction/all.py +103 -0
  581. maxframe/tensor/reduction/allclose.py +88 -0
  582. maxframe/tensor/reduction/any.py +105 -0
  583. maxframe/tensor/reduction/argmax.py +103 -0
  584. maxframe/tensor/reduction/argmin.py +103 -0
  585. maxframe/tensor/reduction/array_equal.py +64 -0
  586. maxframe/tensor/reduction/core.py +168 -0
  587. maxframe/tensor/reduction/count_nonzero.py +81 -0
  588. maxframe/tensor/reduction/cumprod.py +97 -0
  589. maxframe/tensor/reduction/cumsum.py +101 -0
  590. maxframe/tensor/reduction/max.py +120 -0
  591. maxframe/tensor/reduction/mean.py +123 -0
  592. maxframe/tensor/reduction/min.py +120 -0
  593. maxframe/tensor/reduction/nanargmax.py +82 -0
  594. maxframe/tensor/reduction/nanargmin.py +76 -0
  595. maxframe/tensor/reduction/nancumprod.py +91 -0
  596. maxframe/tensor/reduction/nancumsum.py +94 -0
  597. maxframe/tensor/reduction/nanmax.py +111 -0
  598. maxframe/tensor/reduction/nanmean.py +106 -0
  599. maxframe/tensor/reduction/nanmin.py +111 -0
  600. maxframe/tensor/reduction/nanprod.py +94 -0
  601. maxframe/tensor/reduction/nanstd.py +126 -0
  602. maxframe/tensor/reduction/nansum.py +115 -0
  603. maxframe/tensor/reduction/nanvar.py +149 -0
  604. maxframe/tensor/reduction/prod.py +130 -0
  605. maxframe/tensor/reduction/std.py +134 -0
  606. maxframe/tensor/reduction/sum.py +125 -0
  607. maxframe/tensor/reduction/tests/__init__.py +13 -0
  608. maxframe/tensor/reduction/tests/test_reduction.py +181 -0
  609. maxframe/tensor/reduction/var.py +176 -0
  610. maxframe/tensor/reshape/__init__.py +17 -0
  611. maxframe/tensor/reshape/reshape.py +188 -0
  612. maxframe/tensor/reshape/tests/__init__.py +15 -0
  613. maxframe/tensor/reshape/tests/test_reshape.py +37 -0
  614. maxframe/tensor/statistics/__init__.py +13 -0
  615. maxframe/tensor/statistics/percentile.py +175 -0
  616. maxframe/tensor/statistics/quantile.py +288 -0
  617. maxframe/tensor/ufunc/__init__.py +26 -0
  618. maxframe/tensor/ufunc/ufunc.py +200 -0
  619. maxframe/tensor/utils.py +718 -0
  620. maxframe/tests/__init__.py +13 -0
  621. maxframe/tests/test_codegen.py +69 -0
  622. maxframe/tests/test_protocol.py +144 -0
  623. maxframe/tests/test_utils.py +376 -0
  624. maxframe/tests/utils.py +164 -0
  625. maxframe/typing_.py +37 -0
  626. maxframe/udf.py +134 -0
  627. maxframe/utils.py +1114 -0
  628. maxframe-0.1.0b5.dist-info/METADATA +104 -0
  629. maxframe-0.1.0b5.dist-info/RECORD +647 -0
  630. maxframe-0.1.0b5.dist-info/WHEEL +5 -0
  631. maxframe-0.1.0b5.dist-info/top_level.txt +3 -0
  632. maxframe_client/__init__.py +17 -0
  633. maxframe_client/clients/__init__.py +13 -0
  634. maxframe_client/clients/framedriver.py +118 -0
  635. maxframe_client/clients/spe.py +104 -0
  636. maxframe_client/conftest.py +15 -0
  637. maxframe_client/fetcher.py +264 -0
  638. maxframe_client/session/__init__.py +22 -0
  639. maxframe_client/session/consts.py +36 -0
  640. maxframe_client/session/graph.py +119 -0
  641. maxframe_client/session/odps.py +482 -0
  642. maxframe_client/session/task.py +280 -0
  643. maxframe_client/session/tests/__init__.py +13 -0
  644. maxframe_client/session/tests/test_task.py +85 -0
  645. maxframe_client/tests/__init__.py +13 -0
  646. maxframe_client/tests/test_fetcher.py +89 -0
  647. maxframe_client/tests/test_session.py +255 -0
@@ -0,0 +1,401 @@
1
+ # Copyright 1999-2024 Alibaba Group Holding Ltd.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ import os
16
+ from collections import OrderedDict
17
+
18
+ import numpy as np
19
+ import pandas as pd
20
+ import pytest
21
+ from odps import ODPS
22
+
23
+ from .... import tensor as mt
24
+ from ....tests.utils import tn
25
+ from ....utils import lazy_import
26
+ from ... import read_odps_query, read_odps_table
27
+ from ...core import DatetimeIndex, Float64Index, IndexValue, Int64Index, MultiIndex
28
+ from ..dataframe import from_pandas as from_pandas_df
29
+ from ..date_range import date_range
30
+ from ..from_tensor import (
31
+ dataframe_from_1d_tileables,
32
+ dataframe_from_tensor,
33
+ series_from_tensor,
34
+ )
35
+ from ..index import from_pandas as from_pandas_index
36
+ from ..index import from_tileable
37
+ from ..read_odps_query import ColumnSchema, _resolve_task_sector
38
+ from ..series import from_pandas as from_pandas_series
39
+
40
+ ray = lazy_import("ray")
41
+
42
+
43
+ def test_from_pandas_dataframe():
44
+ data = pd.DataFrame(
45
+ np.random.rand(10, 10), columns=["c" + str(i) for i in range(10)]
46
+ )
47
+ df = from_pandas_df(data, chunk_size=4)
48
+
49
+ pd.testing.assert_series_equal(df.op.dtypes, data.dtypes)
50
+ assert isinstance(df.index_value._index_value, IndexValue.RangeIndex)
51
+ assert df.index_value._index_value._slice == slice(0, 10, 1)
52
+ assert df.index_value.is_monotonic_increasing is True
53
+ assert df.index_value.is_monotonic_decreasing is False
54
+ assert df.index_value.is_unique is True
55
+ assert df.index_value.min_val == 0
56
+ assert df.index_value.max_val == 9
57
+ np.testing.assert_equal(df.columns_value._index_value._data, data.columns.values)
58
+
59
+ data2 = data[::2]
60
+ df2 = from_pandas_df(data2, chunk_size=4)
61
+
62
+ pd.testing.assert_series_equal(df.op.dtypes, data2.dtypes)
63
+ assert isinstance(df2.index_value._index_value, IndexValue.RangeIndex)
64
+ assert df2.index_value._index_value._slice == slice(0, 10, 2)
65
+
66
+
67
+ def test_from_pandas_dataframe_with_multi_index():
68
+ index = pd.MultiIndex.from_tuples([("k1", "v1")], names=["X", "Y"])
69
+ data = np.random.randint(0, 100, size=(1, 3))
70
+ pdf = pd.DataFrame(data, columns=["A", "B", "C"], index=index)
71
+ df = from_pandas_df(pdf, chunk_size=4)
72
+ assert isinstance(df.index, MultiIndex)
73
+ assert df.index.names == ["X", "Y"]
74
+ assert df.index.name is None
75
+
76
+
77
+ def test_from_pandas_series():
78
+ data = pd.Series(np.random.rand(10), name="a")
79
+ series = from_pandas_series(data, chunk_size=4)
80
+
81
+ assert series.name == data.name
82
+ assert isinstance(series.index_value._index_value, IndexValue.RangeIndex)
83
+ assert series.index_value._index_value._slice == slice(0, 10, 1)
84
+ assert series.index_value.is_monotonic_increasing is True
85
+ assert series.index_value.is_monotonic_decreasing is False
86
+ assert series.index_value.is_unique is True
87
+ assert series.index_value.min_val == 0
88
+ assert series.index_value.max_val == 9
89
+
90
+
91
+ def test_from_pandas_index():
92
+ data = pd.date_range("2020-1-1", periods=10, name="date")
93
+ index = from_pandas_index(data, chunk_size=4)
94
+
95
+ assert isinstance(index, DatetimeIndex)
96
+ assert index.name == data.name
97
+ assert index.dtype == data.dtype
98
+ assert isinstance(index.index_value.value, IndexValue.DatetimeIndex)
99
+
100
+
101
+ def test_from_tileable_index():
102
+ t = mt.random.rand(10, 4)
103
+
104
+ with pytest.raises(ValueError):
105
+ from_tileable(t)
106
+
107
+ pd_df = pd.DataFrame(
108
+ np.random.rand(10, 4), index=np.arange(10, 0, -1).astype(np.int64)
109
+ )
110
+ pd_df.index.name = "ind"
111
+ df = from_pandas_df(pd_df, chunk_size=6)
112
+
113
+ for o in [df, df[0]]:
114
+ index = o.index
115
+ assert isinstance(index, Int64Index)
116
+ assert index.dtype == np.int64
117
+ assert index.name == pd_df.index.name
118
+ assert isinstance(index.index_value.value, IndexValue.Int64Index)
119
+
120
+ t = mt.random.rand(10, chunk_size=6)
121
+ index = from_tileable(t, name="new_name")
122
+
123
+ assert isinstance(index, Float64Index)
124
+ assert index.dtype == np.float64
125
+ assert index.name == "new_name"
126
+ assert isinstance(index.index_value.value, IndexValue.Float64Index)
127
+
128
+
129
+ def test_from_tensor():
130
+ tensor = mt.random.rand(10, 10, chunk_size=5)
131
+ df = dataframe_from_tensor(tensor)
132
+ assert isinstance(df.index_value._index_value, IndexValue.RangeIndex)
133
+ assert df.dtypes[0] == tensor.dtype
134
+
135
+ # test converted from scalar
136
+ scalar = mt.array(1)
137
+ np.testing.assert_equal(scalar.ndim, 0)
138
+ with pytest.raises(TypeError):
139
+ dataframe_from_tensor(scalar)
140
+
141
+ # from tensor with given columns
142
+ df = dataframe_from_tensor(tensor, columns=list("abcdefghij"))
143
+ pd.testing.assert_index_equal(df.dtypes.index, pd.Index(list("abcdefghij")))
144
+
145
+ # test series from tensor
146
+ tensor = mt.random.rand(10, chunk_size=4)
147
+ series = series_from_tensor(tensor, name="a")
148
+
149
+ assert series.dtype == tensor.dtype
150
+ assert series.name == "a"
151
+ pd.testing.assert_index_equal(series.index_value.to_pandas(), pd.RangeIndex(10))
152
+
153
+ d = OrderedDict(
154
+ [(0, mt.tensor(np.random.rand(4))), (1, mt.tensor(np.random.rand(4)))]
155
+ )
156
+ df = dataframe_from_1d_tileables(d)
157
+ pd.testing.assert_index_equal(df.columns_value.to_pandas(), pd.RangeIndex(2))
158
+
159
+ series = series_from_tensor(mt.random.rand(4))
160
+ pd.testing.assert_index_equal(series.index_value.to_pandas(), pd.RangeIndex(4))
161
+
162
+ series = series_from_tensor(mt.random.rand(4), index=[1, 2, 3])
163
+ pd.testing.assert_index_equal(series.op.index, pd.Index([1, 2, 3]))
164
+
165
+ series = series_from_tensor(
166
+ mt.random.rand(4), index=pd.Index([1, 2, 3], name="my_index")
167
+ )
168
+ pd.testing.assert_index_equal(series.op.index, pd.Index([1, 2, 3], name="my_index"))
169
+ assert series.index_value.name == "my_index"
170
+
171
+ with pytest.raises(TypeError):
172
+ series_from_tensor(mt.ones((10, 10)))
173
+
174
+ # index has wrong shape
175
+ with pytest.raises(ValueError):
176
+ dataframe_from_tensor(mt.random.rand(4, 3), index=mt.random.rand(5))
177
+
178
+ # columns have wrong shape
179
+ with pytest.raises(ValueError):
180
+ dataframe_from_tensor(mt.random.rand(4, 3), columns=["a", "b"])
181
+
182
+ # index should be 1-d
183
+ with pytest.raises(ValueError):
184
+ dataframe_from_tensor(
185
+ mt.tensor(np.random.rand(3, 2)), index=mt.tensor(np.random.rand(3, 2))
186
+ )
187
+
188
+ # 1-d tensors should have same shape
189
+ with pytest.raises(ValueError):
190
+ dataframe_from_1d_tileables(
191
+ OrderedDict(
192
+ [(0, mt.tensor(np.random.rand(3))), (1, mt.tensor(np.random.rand(2)))]
193
+ )
194
+ )
195
+
196
+ # index has wrong shape
197
+ with pytest.raises(ValueError):
198
+ dataframe_from_1d_tileables(
199
+ {0: mt.tensor(np.random.rand(3))}, index=mt.tensor(np.random.rand(2))
200
+ )
201
+
202
+ # columns have wrong shape
203
+ with pytest.raises(ValueError):
204
+ dataframe_from_1d_tileables(
205
+ {0: mt.tensor(np.random.rand(3))}, columns=["a", "b"]
206
+ )
207
+
208
+ # index should be 1-d
209
+ with pytest.raises(ValueError):
210
+ series_from_tensor(mt.random.rand(4), index=mt.random.rand(4, 3))
211
+
212
+
213
+ def test_from_odps_table():
214
+ odps_entry = ODPS.from_environments()
215
+ table_name = tn("test_from_odps_table_src")
216
+ odps_entry.delete_table(table_name, if_exists=True)
217
+ test_table = odps_entry.create_table(
218
+ table_name, "col1 string, col2 bigint, col3 double", lifecycle=1
219
+ )
220
+
221
+ part_table_name = tn("test_parted_from_odps_table_src")
222
+ odps_entry.delete_table(part_table_name, if_exists=True)
223
+ test_parted_table = odps_entry.create_table(
224
+ part_table_name,
225
+ ("col1 string, col2 bigint, col3 double", "pt string"),
226
+ lifecycle=1,
227
+ )
228
+
229
+ df = read_odps_table(table_name)
230
+ assert df.op.table_name == test_table.full_table_name
231
+ assert df.index_value.name is None
232
+ assert isinstance(df.index_value.value, IndexValue.RangeIndex)
233
+ assert df.op.get_columns() == ["col1", "col2", "col3"]
234
+ pd.testing.assert_series_equal(
235
+ df.dtypes,
236
+ pd.Series(
237
+ [np.dtype("O"), np.dtype("int64"), np.dtype("float64")],
238
+ index=["col1", "col2", "col3"],
239
+ ),
240
+ )
241
+
242
+ with pytest.raises(ValueError):
243
+ read_odps_table(test_table, columns=["col3", "col4"])
244
+ with pytest.raises(ValueError):
245
+ read_odps_table(test_table, index_col="col4")
246
+ with pytest.raises(ValueError):
247
+ read_odps_table(test_table, columns=["col1", "col2"], index_col="col2")
248
+
249
+ df = read_odps_table(test_table, columns=["Col1", "col2"])
250
+ assert df.op.table_name == test_table.full_table_name
251
+ assert df.index_value.name is None
252
+ assert isinstance(df.index_value.value, IndexValue.RangeIndex)
253
+ assert df.op.get_columns() == ["col1", "col2"]
254
+ pd.testing.assert_series_equal(
255
+ df.dtypes,
256
+ pd.Series([np.dtype("O"), np.dtype("int64")], index=["col1", "col2"]),
257
+ )
258
+
259
+ df = read_odps_table(test_table, index_col="col1")
260
+ assert df.op.table_name == test_table.full_table_name
261
+ assert df.index_value.name == "col1"
262
+ assert isinstance(df.index_value.value, IndexValue.Index)
263
+ assert df.index.dtype == np.dtype("O")
264
+ assert df.op.get_columns() == ["col2", "col3"]
265
+ pd.testing.assert_series_equal(
266
+ df.dtypes,
267
+ pd.Series([np.dtype("int64"), np.dtype("float64")], index=["col2", "col3"]),
268
+ )
269
+
270
+ with pytest.raises(ValueError):
271
+ read_odps_table(test_table, append_partitions=True)
272
+
273
+ df = read_odps_table(test_parted_table, append_partitions=True)
274
+ assert df.op.append_partitions is True
275
+ assert df.op.get_columns() == ["col1", "col2", "col3", "pt"]
276
+ pd.testing.assert_series_equal(
277
+ df.dtypes,
278
+ pd.Series(
279
+ [np.dtype("O"), np.dtype("int64"), np.dtype("float64"), np.dtype("O")],
280
+ index=["col1", "col2", "col3", "pt"],
281
+ ),
282
+ )
283
+
284
+ df = read_odps_table(
285
+ test_parted_table, columns=["col1", "col2", "pt"], partitions="pt=20240103"
286
+ )
287
+ assert df.op.append_partitions is True
288
+ assert df.op.partitions == ["pt=20240103"]
289
+ assert df.op.get_columns() == ["col1", "col2", "pt"]
290
+ pd.testing.assert_series_equal(
291
+ df.dtypes,
292
+ pd.Series(
293
+ [np.dtype("O"), np.dtype("int64"), np.dtype("O")],
294
+ index=["col1", "col2", "pt"],
295
+ ),
296
+ )
297
+
298
+ test_table.drop()
299
+ test_parted_table.drop()
300
+
301
+
302
+ def test_from_odps_query():
303
+ odps_entry = ODPS.from_environments()
304
+ table1_name = tn("test_from_odps_query_src1")
305
+ table2_name = tn("test_from_odps_query_src2")
306
+ odps_entry.delete_table(table1_name, if_exists=True)
307
+ odps_entry.delete_table(table2_name, if_exists=True)
308
+ test_table = odps_entry.create_table(
309
+ table1_name, "col1 string, col2 bigint, col3 double", lifecycle=1
310
+ )
311
+ # need some data to produce complicated plans
312
+ odps_entry.write_table(test_table, [["A", 10, 3.5]])
313
+ test_table2 = odps_entry.create_table(
314
+ table2_name, "col1 string, col2 bigint, col3 double", lifecycle=1
315
+ )
316
+ odps_entry.write_table(test_table2, [["A", 10, 4.5]])
317
+
318
+ with pytest.raises(ValueError) as err_info:
319
+ read_odps_query(f"CREATE TABLE dummy_table AS SELECT * FROM {table1_name}")
320
+ assert "instant query" in err_info.value.args[0]
321
+
322
+ query1 = f"SELECT * FROM {table1_name} WHERE col1 > 10"
323
+ df = read_odps_query(query1)
324
+ assert df.op.query == query1
325
+ assert df.index_value.name is None
326
+ assert isinstance(df.index_value.value, IndexValue.RangeIndex)
327
+ pd.testing.assert_series_equal(
328
+ df.dtypes,
329
+ pd.Series(
330
+ [np.dtype("O"), np.dtype("int64"), np.dtype("float64")],
331
+ index=["col1", "col2", "col3"],
332
+ ),
333
+ )
334
+
335
+ df = read_odps_query(query1, index_col="col1")
336
+ assert df.op.query == query1
337
+ assert df.index_value.name == "col1"
338
+ assert isinstance(df.index_value.value, IndexValue.Index)
339
+ pd.testing.assert_series_equal(
340
+ df.dtypes,
341
+ pd.Series([np.dtype("int64"), np.dtype("float64")], index=["col2", "col3"]),
342
+ )
343
+
344
+ query2 = (
345
+ f"SELECT t1.col1, t1.col2, t1.col3 as c31, t2.col3 as c32 "
346
+ f"FROM {table1_name} t1 "
347
+ f"INNER JOIN {table2_name} t2 "
348
+ f"ON t1.col1 = t2.col1 AND t1.col2 = t2.col2"
349
+ )
350
+ df = read_odps_query(query2, index_col=["col1", "col2"])
351
+ assert df.op.query == query2
352
+ assert df.index_value.names == ["col1", "col2"]
353
+ assert isinstance(df.index_value.value, IndexValue.MultiIndex)
354
+ pd.testing.assert_series_equal(
355
+ df.dtypes,
356
+ pd.Series([np.dtype("float64"), np.dtype("float64")], index=["c31", "c32"]),
357
+ )
358
+
359
+ test_table.drop()
360
+ test_table2.drop()
361
+
362
+
363
+ def test_date_range():
364
+ with pytest.raises(TypeError):
365
+ _ = date_range("2020-1-1", periods="2")
366
+
367
+ with pytest.raises(ValueError):
368
+ _ = date_range("2020-1-1", "2020-1-10", periods=10, freq="D")
369
+
370
+ with pytest.raises(ValueError):
371
+ _ = date_range(pd.NaT, periods=10)
372
+
373
+ expected = pd.date_range("2020-1-1", periods=9.0, name="date")
374
+
375
+ dr = date_range("2020-1-1", periods=9.0, name="date", chunk_size=3)
376
+ assert isinstance(dr, DatetimeIndex)
377
+ assert dr.shape == (9,)
378
+ assert dr.dtype == expected.dtype
379
+ assert isinstance(dr.index_value.value, IndexValue.DatetimeIndex)
380
+ assert dr.index_value.min_val == expected.min()
381
+ assert dr.index_value.min_val_close is True
382
+ assert dr.index_value.max_val == expected.max()
383
+ assert dr.index_value.max_val_close is True
384
+ assert dr.index_value.is_unique == expected.is_unique
385
+ assert dr.index_value.is_monotonic_increasing == expected.is_monotonic_increasing
386
+ assert dr.name == expected.name
387
+
388
+
389
+ def test_resolve_task_sector():
390
+ input_path = os.path.join(os.path.dirname(__file__), "test-data", "task-input.txt")
391
+ with open(input_path, "r") as f:
392
+ sector = f.read()
393
+ actual_sector = _resolve_task_sector("job0", sector)
394
+
395
+ assert actual_sector.job_name == "job0"
396
+ assert actual_sector.task_name == "M1"
397
+ assert actual_sector.output_target == "Screen"
398
+ assert len(actual_sector.schema) == 78
399
+ assert actual_sector.schema[0] == ColumnSchema("unnamed: 0", "bigint", "")
400
+ assert actual_sector.schema[1] == ColumnSchema("id", "bigint", "id_alias")
401
+ assert actual_sector.schema[2] == ColumnSchema("listing_url", "string", "")
@@ -0,0 +1,26 @@
1
+ # Copyright 1999-2024 Alibaba Group Holding Ltd.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ from .to_odps import to_odps_table
16
+
17
+
18
+ def _install():
19
+ from ..core import DATAFRAME_TYPE
20
+
21
+ for t in DATAFRAME_TYPE:
22
+ t.to_odps_table = to_odps_table
23
+
24
+
25
+ _install()
26
+ del _install
@@ -0,0 +1,19 @@
1
+ # Copyright 1999-2024 Alibaba Group Holding Ltd.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ from ..operators import DataFrameOperator, DataFrameOperatorMixin
16
+
17
+
18
+ class DataFrameDataStore(DataFrameOperator, DataFrameOperatorMixin):
19
+ pass
@@ -0,0 +1,227 @@
1
+ # Copyright 1999-2024 Alibaba Group Holding Ltd.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ from ... import opcodes
16
+ from ...serialization.serializables import (
17
+ AnyField,
18
+ BoolField,
19
+ DictField,
20
+ Int32Field,
21
+ Int64Field,
22
+ KeyField,
23
+ ListField,
24
+ StringField,
25
+ )
26
+ from ..utils import parse_index
27
+ from .core import DataFrameDataStore
28
+
29
+
30
+ class DataFrameToCSV(DataFrameDataStore):
31
+ _op_type_ = opcodes.TO_CSV
32
+
33
+ input = KeyField("input")
34
+ path = AnyField("path")
35
+ sep = StringField("sep")
36
+ na_rep = StringField("na_rep")
37
+ float_format = StringField("float_format")
38
+ columns = ListField("columns")
39
+ header = AnyField("header")
40
+ index = BoolField("index")
41
+ index_label = AnyField("index_label")
42
+ mode = StringField("mode")
43
+ encoding = StringField("encoding")
44
+ compression = AnyField("compression")
45
+ quoting = Int32Field("quoting")
46
+ quotechar = StringField("quotechar")
47
+ line_terminator = StringField("line_terminator")
48
+ chunksize = Int64Field("chunksize")
49
+ date_format = StringField("date_format")
50
+ doublequote = BoolField("doublequote")
51
+ escapechar = StringField("escapechar")
52
+ decimal = StringField("decimal")
53
+ storage_options = DictField("storage_options")
54
+
55
+ def __init__(self, output_types=None, **kw):
56
+ super().__init__(_output_types=output_types, **kw)
57
+
58
+ @property
59
+ def one_file(self):
60
+ # if wildcard in path, write csv into multiple files
61
+ return "*" not in self.path
62
+
63
+ @property
64
+ def output_stat(self):
65
+ return self.output_stat
66
+
67
+ @property
68
+ def output_limit(self):
69
+ return 1 if not self.output_stat else 2
70
+
71
+ def _set_inputs(self, inputs):
72
+ super()._set_inputs(inputs)
73
+ self._input = self._inputs[0]
74
+
75
+ def __call__(self, df):
76
+ index_value = parse_index(df.index_value.to_pandas()[:0], df)
77
+ if df.ndim == 2:
78
+ columns_value = parse_index(
79
+ df.columns_value.to_pandas()[:0], store_data=True
80
+ )
81
+ return self.new_dataframe(
82
+ [df],
83
+ shape=(0, 0),
84
+ dtypes=df.dtypes[:0],
85
+ index_value=index_value,
86
+ columns_value=columns_value,
87
+ )
88
+ else:
89
+ return self.new_series(
90
+ [df], shape=(0,), dtype=df.dtype, index_value=index_value
91
+ )
92
+
93
+
94
+ def to_csv(
95
+ df,
96
+ path,
97
+ sep=",",
98
+ na_rep="",
99
+ float_format=None,
100
+ columns=None,
101
+ header=True,
102
+ index=True,
103
+ index_label=None,
104
+ mode="w",
105
+ encoding=None,
106
+ compression="infer",
107
+ quoting=None,
108
+ quotechar='"',
109
+ line_terminator=None,
110
+ chunksize=None,
111
+ date_format=None,
112
+ doublequote=True,
113
+ escapechar=None,
114
+ decimal=".",
115
+ storage_options=None,
116
+ ):
117
+ r"""
118
+ Write object to a comma-separated values (csv) file.
119
+
120
+ Parameters
121
+ ----------
122
+ path : str
123
+ File path.
124
+ If path is a string with wildcard e.g. '/to/path/out-*.csv',
125
+ to_csv will try to write multiple files, for instance,
126
+ chunk (0, 0) will write data into '/to/path/out-0.csv'.
127
+ If path is a string without wildcard,
128
+ all data will be written into a single file.
129
+ sep : str, default ','
130
+ String of length 1. Field delimiter for the output file.
131
+ na_rep : str, default ''
132
+ Missing data representation.
133
+ float_format : str, default None
134
+ Format string for floating point numbers.
135
+ columns : sequence, optional
136
+ Columns to write.
137
+ header : bool or list of str, default True
138
+ Write out the column names. If a list of strings is given it is
139
+ assumed to be aliases for the column names.
140
+ index : bool, default True
141
+ Write row names (index).
142
+ index_label : str or sequence, or False, default None
143
+ Column label for index column(s) if desired. If None is given, and
144
+ `header` and `index` are True, then the index names are used. A
145
+ sequence should be given if the object uses MultiIndex. If
146
+ False do not print fields for index names. Use index_label=False
147
+ for easier importing in R.
148
+ mode : str
149
+ Python write mode, default 'w'.
150
+ encoding : str, optional
151
+ A string representing the encoding to use in the output file,
152
+ defaults to 'utf-8'.
153
+ compression : str or dict, default 'infer'
154
+ If str, represents compression mode. If dict, value at 'method' is
155
+ the compression mode. Compression mode may be any of the following
156
+ possible values: {'infer', 'gzip', 'bz2', 'zip', 'xz', None}. If
157
+ compression mode is 'infer' and `path_or_buf` is path-like, then
158
+ detect compression mode from the following extensions: '.gz',
159
+ '.bz2', '.zip' or '.xz'. (otherwise no compression). If dict given
160
+ and mode is 'zip' or inferred as 'zip', other entries passed as
161
+ additional compression options.
162
+ quoting : optional constant from csv module
163
+ Defaults to csv.QUOTE_MINIMAL. If you have set a `float_format`
164
+ then floats are converted to strings and thus csv.QUOTE_NONNUMERIC
165
+ will treat them as non-numeric.
166
+ quotechar : str, default '\"'
167
+ String of length 1. Character used to quote fields.
168
+ line_terminator : str, optional
169
+ The newline character or character sequence to use in the output
170
+ file. Defaults to `os.linesep`, which depends on the OS in which
171
+ this method is called ('\n' for linux, '\r\n' for Windows, i.e.).
172
+ chunksize : int or None
173
+ Rows to write at a time.
174
+ date_format : str, default None
175
+ Format string for datetime objects.
176
+ doublequote : bool, default True
177
+ Control quoting of `quotechar` inside a field.
178
+ escapechar : str, default None
179
+ String of length 1. Character used to escape `sep` and `quotechar`
180
+ when appropriate.
181
+ decimal : str, default '.'
182
+ Character recognized as decimal separator. E.g. use ',' for
183
+ European data.
184
+ Returns
185
+ -------
186
+ None or str
187
+ If path_or_buf is None, returns the resulting csv format as a
188
+ string. Otherwise returns None.
189
+
190
+ See Also
191
+ --------
192
+ read_csv : Load a CSV file into a DataFrame.
193
+
194
+ Examples
195
+ --------
196
+ >>> import maxframe.dataframe as md
197
+ >>> df = md.DataFrame({'name': ['Raphael', 'Donatello'],
198
+ ... 'mask': ['red', 'purple'],
199
+ ... 'weapon': ['sai', 'bo staff']})
200
+ >>> df.to_csv('out.csv', index=False).execute()
201
+ """
202
+
203
+ if mode != "w": # pragma: no cover
204
+ raise NotImplementedError("only support to_csv with mode 'w' for now")
205
+ op = DataFrameToCSV(
206
+ path=path,
207
+ sep=sep,
208
+ na_rep=na_rep,
209
+ float_format=float_format,
210
+ columns=columns,
211
+ header=header,
212
+ index=index,
213
+ index_label=index_label,
214
+ mode=mode,
215
+ encoding=encoding,
216
+ compression=compression,
217
+ quoting=quoting,
218
+ quotechar=quotechar,
219
+ line_terminator=line_terminator,
220
+ chunksize=chunksize,
221
+ date_format=date_format,
222
+ doublequote=doublequote,
223
+ escapechar=escapechar,
224
+ decimal=decimal,
225
+ storage_options=storage_options,
226
+ )
227
+ return op(df)