maxframe 0.1.0b5__cp39-cp39-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of maxframe might be problematic. Click here for more details.

Files changed (647) hide show
  1. maxframe/__init__.py +32 -0
  2. maxframe/_utils.cpython-39-darwin.so +0 -0
  3. maxframe/_utils.pxd +33 -0
  4. maxframe/_utils.pyx +547 -0
  5. maxframe/codegen.py +528 -0
  6. maxframe/config/__init__.py +15 -0
  7. maxframe/config/config.py +443 -0
  8. maxframe/config/tests/__init__.py +13 -0
  9. maxframe/config/tests/test_config.py +103 -0
  10. maxframe/config/tests/test_validators.py +34 -0
  11. maxframe/config/validators.py +57 -0
  12. maxframe/conftest.py +139 -0
  13. maxframe/core/__init__.py +65 -0
  14. maxframe/core/base.py +156 -0
  15. maxframe/core/entity/__init__.py +44 -0
  16. maxframe/core/entity/chunks.py +68 -0
  17. maxframe/core/entity/core.py +152 -0
  18. maxframe/core/entity/executable.py +337 -0
  19. maxframe/core/entity/fuse.py +73 -0
  20. maxframe/core/entity/objects.py +100 -0
  21. maxframe/core/entity/output_types.py +90 -0
  22. maxframe/core/entity/tileables.py +438 -0
  23. maxframe/core/entity/utils.py +24 -0
  24. maxframe/core/graph/__init__.py +17 -0
  25. maxframe/core/graph/builder/__init__.py +16 -0
  26. maxframe/core/graph/builder/base.py +86 -0
  27. maxframe/core/graph/builder/chunk.py +430 -0
  28. maxframe/core/graph/builder/tileable.py +34 -0
  29. maxframe/core/graph/builder/utils.py +41 -0
  30. maxframe/core/graph/core.cpython-39-darwin.so +0 -0
  31. maxframe/core/graph/core.pyx +467 -0
  32. maxframe/core/graph/entity.py +171 -0
  33. maxframe/core/graph/tests/__init__.py +13 -0
  34. maxframe/core/graph/tests/test_graph.py +205 -0
  35. maxframe/core/mode.py +96 -0
  36. maxframe/core/operator/__init__.py +34 -0
  37. maxframe/core/operator/base.py +450 -0
  38. maxframe/core/operator/core.py +276 -0
  39. maxframe/core/operator/fetch.py +53 -0
  40. maxframe/core/operator/fuse.py +29 -0
  41. maxframe/core/operator/objects.py +72 -0
  42. maxframe/core/operator/shuffle.py +111 -0
  43. maxframe/core/operator/tests/__init__.py +13 -0
  44. maxframe/core/operator/tests/test_core.py +64 -0
  45. maxframe/core/tests/__init__.py +13 -0
  46. maxframe/core/tests/test_mode.py +75 -0
  47. maxframe/dataframe/__init__.py +81 -0
  48. maxframe/dataframe/arithmetic/__init__.py +359 -0
  49. maxframe/dataframe/arithmetic/abs.py +33 -0
  50. maxframe/dataframe/arithmetic/add.py +60 -0
  51. maxframe/dataframe/arithmetic/arccos.py +28 -0
  52. maxframe/dataframe/arithmetic/arccosh.py +28 -0
  53. maxframe/dataframe/arithmetic/arcsin.py +28 -0
  54. maxframe/dataframe/arithmetic/arcsinh.py +28 -0
  55. maxframe/dataframe/arithmetic/arctan.py +28 -0
  56. maxframe/dataframe/arithmetic/arctanh.py +28 -0
  57. maxframe/dataframe/arithmetic/around.py +152 -0
  58. maxframe/dataframe/arithmetic/bitwise_and.py +46 -0
  59. maxframe/dataframe/arithmetic/bitwise_or.py +50 -0
  60. maxframe/dataframe/arithmetic/bitwise_xor.py +46 -0
  61. maxframe/dataframe/arithmetic/ceil.py +28 -0
  62. maxframe/dataframe/arithmetic/core.py +342 -0
  63. maxframe/dataframe/arithmetic/cos.py +28 -0
  64. maxframe/dataframe/arithmetic/cosh.py +28 -0
  65. maxframe/dataframe/arithmetic/degrees.py +28 -0
  66. maxframe/dataframe/arithmetic/docstring.py +442 -0
  67. maxframe/dataframe/arithmetic/equal.py +56 -0
  68. maxframe/dataframe/arithmetic/exp.py +28 -0
  69. maxframe/dataframe/arithmetic/exp2.py +28 -0
  70. maxframe/dataframe/arithmetic/expm1.py +28 -0
  71. maxframe/dataframe/arithmetic/floor.py +28 -0
  72. maxframe/dataframe/arithmetic/floordiv.py +64 -0
  73. maxframe/dataframe/arithmetic/greater.py +57 -0
  74. maxframe/dataframe/arithmetic/greater_equal.py +57 -0
  75. maxframe/dataframe/arithmetic/invert.py +33 -0
  76. maxframe/dataframe/arithmetic/is_ufuncs.py +62 -0
  77. maxframe/dataframe/arithmetic/less.py +57 -0
  78. maxframe/dataframe/arithmetic/less_equal.py +57 -0
  79. maxframe/dataframe/arithmetic/log.py +28 -0
  80. maxframe/dataframe/arithmetic/log10.py +28 -0
  81. maxframe/dataframe/arithmetic/log2.py +28 -0
  82. maxframe/dataframe/arithmetic/mod.py +60 -0
  83. maxframe/dataframe/arithmetic/multiply.py +60 -0
  84. maxframe/dataframe/arithmetic/negative.py +33 -0
  85. maxframe/dataframe/arithmetic/not_equal.py +56 -0
  86. maxframe/dataframe/arithmetic/power.py +68 -0
  87. maxframe/dataframe/arithmetic/radians.py +28 -0
  88. maxframe/dataframe/arithmetic/sin.py +28 -0
  89. maxframe/dataframe/arithmetic/sinh.py +28 -0
  90. maxframe/dataframe/arithmetic/sqrt.py +28 -0
  91. maxframe/dataframe/arithmetic/subtract.py +64 -0
  92. maxframe/dataframe/arithmetic/tan.py +28 -0
  93. maxframe/dataframe/arithmetic/tanh.py +28 -0
  94. maxframe/dataframe/arithmetic/tests/__init__.py +13 -0
  95. maxframe/dataframe/arithmetic/tests/test_arithmetic.py +695 -0
  96. maxframe/dataframe/arithmetic/truediv.py +64 -0
  97. maxframe/dataframe/arithmetic/trunc.py +28 -0
  98. maxframe/dataframe/arrays.py +864 -0
  99. maxframe/dataframe/core.py +2417 -0
  100. maxframe/dataframe/datasource/__init__.py +15 -0
  101. maxframe/dataframe/datasource/core.py +81 -0
  102. maxframe/dataframe/datasource/dataframe.py +59 -0
  103. maxframe/dataframe/datasource/date_range.py +504 -0
  104. maxframe/dataframe/datasource/from_index.py +54 -0
  105. maxframe/dataframe/datasource/from_records.py +107 -0
  106. maxframe/dataframe/datasource/from_tensor.py +419 -0
  107. maxframe/dataframe/datasource/index.py +117 -0
  108. maxframe/dataframe/datasource/read_csv.py +528 -0
  109. maxframe/dataframe/datasource/read_odps_query.py +299 -0
  110. maxframe/dataframe/datasource/read_odps_table.py +253 -0
  111. maxframe/dataframe/datasource/read_parquet.py +421 -0
  112. maxframe/dataframe/datasource/series.py +55 -0
  113. maxframe/dataframe/datasource/tests/__init__.py +13 -0
  114. maxframe/dataframe/datasource/tests/test_datasource.py +401 -0
  115. maxframe/dataframe/datastore/__init__.py +26 -0
  116. maxframe/dataframe/datastore/core.py +19 -0
  117. maxframe/dataframe/datastore/to_csv.py +227 -0
  118. maxframe/dataframe/datastore/to_odps.py +162 -0
  119. maxframe/dataframe/extensions/__init__.py +41 -0
  120. maxframe/dataframe/extensions/accessor.py +50 -0
  121. maxframe/dataframe/extensions/reshuffle.py +83 -0
  122. maxframe/dataframe/extensions/tests/__init__.py +13 -0
  123. maxframe/dataframe/extensions/tests/test_extensions.py +38 -0
  124. maxframe/dataframe/fetch/__init__.py +15 -0
  125. maxframe/dataframe/fetch/core.py +86 -0
  126. maxframe/dataframe/groupby/__init__.py +82 -0
  127. maxframe/dataframe/groupby/aggregation.py +350 -0
  128. maxframe/dataframe/groupby/apply.py +251 -0
  129. maxframe/dataframe/groupby/core.py +179 -0
  130. maxframe/dataframe/groupby/cum.py +124 -0
  131. maxframe/dataframe/groupby/fill.py +141 -0
  132. maxframe/dataframe/groupby/getitem.py +92 -0
  133. maxframe/dataframe/groupby/head.py +105 -0
  134. maxframe/dataframe/groupby/sample.py +214 -0
  135. maxframe/dataframe/groupby/tests/__init__.py +13 -0
  136. maxframe/dataframe/groupby/tests/test_groupby.py +374 -0
  137. maxframe/dataframe/groupby/transform.py +255 -0
  138. maxframe/dataframe/indexing/__init__.py +84 -0
  139. maxframe/dataframe/indexing/add_prefix_suffix.py +110 -0
  140. maxframe/dataframe/indexing/align.py +349 -0
  141. maxframe/dataframe/indexing/at.py +83 -0
  142. maxframe/dataframe/indexing/getitem.py +204 -0
  143. maxframe/dataframe/indexing/iat.py +37 -0
  144. maxframe/dataframe/indexing/iloc.py +566 -0
  145. maxframe/dataframe/indexing/insert.py +86 -0
  146. maxframe/dataframe/indexing/loc.py +411 -0
  147. maxframe/dataframe/indexing/reindex.py +526 -0
  148. maxframe/dataframe/indexing/rename.py +462 -0
  149. maxframe/dataframe/indexing/rename_axis.py +209 -0
  150. maxframe/dataframe/indexing/reset_index.py +402 -0
  151. maxframe/dataframe/indexing/sample.py +221 -0
  152. maxframe/dataframe/indexing/set_axis.py +194 -0
  153. maxframe/dataframe/indexing/set_index.py +61 -0
  154. maxframe/dataframe/indexing/setitem.py +130 -0
  155. maxframe/dataframe/indexing/tests/__init__.py +13 -0
  156. maxframe/dataframe/indexing/tests/test_indexing.py +488 -0
  157. maxframe/dataframe/indexing/where.py +308 -0
  158. maxframe/dataframe/initializer.py +288 -0
  159. maxframe/dataframe/merge/__init__.py +32 -0
  160. maxframe/dataframe/merge/append.py +121 -0
  161. maxframe/dataframe/merge/concat.py +325 -0
  162. maxframe/dataframe/merge/merge.py +593 -0
  163. maxframe/dataframe/merge/tests/__init__.py +13 -0
  164. maxframe/dataframe/merge/tests/test_merge.py +215 -0
  165. maxframe/dataframe/misc/__init__.py +134 -0
  166. maxframe/dataframe/misc/_duplicate.py +46 -0
  167. maxframe/dataframe/misc/accessor.py +276 -0
  168. maxframe/dataframe/misc/apply.py +692 -0
  169. maxframe/dataframe/misc/astype.py +236 -0
  170. maxframe/dataframe/misc/case_when.py +141 -0
  171. maxframe/dataframe/misc/check_monotonic.py +84 -0
  172. maxframe/dataframe/misc/cut.py +383 -0
  173. maxframe/dataframe/misc/datetimes.py +79 -0
  174. maxframe/dataframe/misc/describe.py +108 -0
  175. maxframe/dataframe/misc/diff.py +210 -0
  176. maxframe/dataframe/misc/drop.py +440 -0
  177. maxframe/dataframe/misc/drop_duplicates.py +248 -0
  178. maxframe/dataframe/misc/duplicated.py +292 -0
  179. maxframe/dataframe/misc/eval.py +728 -0
  180. maxframe/dataframe/misc/explode.py +171 -0
  181. maxframe/dataframe/misc/get_dummies.py +208 -0
  182. maxframe/dataframe/misc/isin.py +217 -0
  183. maxframe/dataframe/misc/map.py +236 -0
  184. maxframe/dataframe/misc/melt.py +162 -0
  185. maxframe/dataframe/misc/memory_usage.py +248 -0
  186. maxframe/dataframe/misc/pct_change.py +150 -0
  187. maxframe/dataframe/misc/pivot_table.py +262 -0
  188. maxframe/dataframe/misc/qcut.py +104 -0
  189. maxframe/dataframe/misc/select_dtypes.py +104 -0
  190. maxframe/dataframe/misc/shift.py +256 -0
  191. maxframe/dataframe/misc/stack.py +238 -0
  192. maxframe/dataframe/misc/string_.py +221 -0
  193. maxframe/dataframe/misc/tests/__init__.py +13 -0
  194. maxframe/dataframe/misc/tests/test_misc.py +468 -0
  195. maxframe/dataframe/misc/to_numeric.py +178 -0
  196. maxframe/dataframe/misc/transform.py +361 -0
  197. maxframe/dataframe/misc/transpose.py +136 -0
  198. maxframe/dataframe/misc/value_counts.py +182 -0
  199. maxframe/dataframe/missing/__init__.py +53 -0
  200. maxframe/dataframe/missing/checkna.py +223 -0
  201. maxframe/dataframe/missing/dropna.py +280 -0
  202. maxframe/dataframe/missing/fillna.py +275 -0
  203. maxframe/dataframe/missing/replace.py +439 -0
  204. maxframe/dataframe/missing/tests/__init__.py +13 -0
  205. maxframe/dataframe/missing/tests/test_missing.py +89 -0
  206. maxframe/dataframe/operators.py +273 -0
  207. maxframe/dataframe/plotting/__init__.py +40 -0
  208. maxframe/dataframe/plotting/core.py +78 -0
  209. maxframe/dataframe/plotting/tests/__init__.py +13 -0
  210. maxframe/dataframe/plotting/tests/test_plotting.py +136 -0
  211. maxframe/dataframe/reduction/__init__.py +107 -0
  212. maxframe/dataframe/reduction/aggregation.py +344 -0
  213. maxframe/dataframe/reduction/all.py +78 -0
  214. maxframe/dataframe/reduction/any.py +78 -0
  215. maxframe/dataframe/reduction/core.py +837 -0
  216. maxframe/dataframe/reduction/count.py +59 -0
  217. maxframe/dataframe/reduction/cummax.py +30 -0
  218. maxframe/dataframe/reduction/cummin.py +30 -0
  219. maxframe/dataframe/reduction/cumprod.py +30 -0
  220. maxframe/dataframe/reduction/cumsum.py +30 -0
  221. maxframe/dataframe/reduction/custom_reduction.py +42 -0
  222. maxframe/dataframe/reduction/kurtosis.py +104 -0
  223. maxframe/dataframe/reduction/max.py +65 -0
  224. maxframe/dataframe/reduction/mean.py +61 -0
  225. maxframe/dataframe/reduction/min.py +65 -0
  226. maxframe/dataframe/reduction/nunique.py +141 -0
  227. maxframe/dataframe/reduction/prod.py +76 -0
  228. maxframe/dataframe/reduction/reduction_size.py +36 -0
  229. maxframe/dataframe/reduction/sem.py +69 -0
  230. maxframe/dataframe/reduction/skew.py +89 -0
  231. maxframe/dataframe/reduction/std.py +53 -0
  232. maxframe/dataframe/reduction/str_concat.py +48 -0
  233. maxframe/dataframe/reduction/sum.py +77 -0
  234. maxframe/dataframe/reduction/tests/__init__.py +13 -0
  235. maxframe/dataframe/reduction/tests/test_reduction.py +486 -0
  236. maxframe/dataframe/reduction/unique.py +90 -0
  237. maxframe/dataframe/reduction/var.py +72 -0
  238. maxframe/dataframe/sort/__init__.py +34 -0
  239. maxframe/dataframe/sort/core.py +36 -0
  240. maxframe/dataframe/sort/sort_index.py +153 -0
  241. maxframe/dataframe/sort/sort_values.py +311 -0
  242. maxframe/dataframe/sort/tests/__init__.py +13 -0
  243. maxframe/dataframe/sort/tests/test_sort.py +81 -0
  244. maxframe/dataframe/statistics/__init__.py +33 -0
  245. maxframe/dataframe/statistics/corr.py +280 -0
  246. maxframe/dataframe/statistics/quantile.py +341 -0
  247. maxframe/dataframe/statistics/tests/__init__.py +13 -0
  248. maxframe/dataframe/statistics/tests/test_statistics.py +82 -0
  249. maxframe/dataframe/tests/__init__.py +13 -0
  250. maxframe/dataframe/tests/test_initializer.py +29 -0
  251. maxframe/dataframe/tseries/__init__.py +13 -0
  252. maxframe/dataframe/tseries/tests/__init__.py +13 -0
  253. maxframe/dataframe/tseries/tests/test_tseries.py +30 -0
  254. maxframe/dataframe/tseries/to_datetime.py +297 -0
  255. maxframe/dataframe/ufunc/__init__.py +27 -0
  256. maxframe/dataframe/ufunc/tensor.py +54 -0
  257. maxframe/dataframe/ufunc/ufunc.py +52 -0
  258. maxframe/dataframe/utils.py +1267 -0
  259. maxframe/dataframe/window/__init__.py +29 -0
  260. maxframe/dataframe/window/aggregation.py +96 -0
  261. maxframe/dataframe/window/core.py +69 -0
  262. maxframe/dataframe/window/ewm.py +249 -0
  263. maxframe/dataframe/window/expanding.py +147 -0
  264. maxframe/dataframe/window/rolling.py +376 -0
  265. maxframe/dataframe/window/tests/__init__.py +13 -0
  266. maxframe/dataframe/window/tests/test_ewm.py +70 -0
  267. maxframe/dataframe/window/tests/test_expanding.py +66 -0
  268. maxframe/dataframe/window/tests/test_rolling.py +57 -0
  269. maxframe/env.py +33 -0
  270. maxframe/errors.py +21 -0
  271. maxframe/extension.py +81 -0
  272. maxframe/learn/__init__.py +17 -0
  273. maxframe/learn/contrib/__init__.py +17 -0
  274. maxframe/learn/contrib/pytorch/__init__.py +16 -0
  275. maxframe/learn/contrib/pytorch/run_function.py +110 -0
  276. maxframe/learn/contrib/pytorch/run_script.py +102 -0
  277. maxframe/learn/contrib/pytorch/tests/__init__.py +13 -0
  278. maxframe/learn/contrib/pytorch/tests/test_pytorch.py +42 -0
  279. maxframe/learn/contrib/utils.py +52 -0
  280. maxframe/learn/contrib/xgboost/__init__.py +26 -0
  281. maxframe/learn/contrib/xgboost/classifier.py +86 -0
  282. maxframe/learn/contrib/xgboost/core.py +156 -0
  283. maxframe/learn/contrib/xgboost/dmatrix.py +150 -0
  284. maxframe/learn/contrib/xgboost/predict.py +138 -0
  285. maxframe/learn/contrib/xgboost/regressor.py +78 -0
  286. maxframe/learn/contrib/xgboost/tests/__init__.py +13 -0
  287. maxframe/learn/contrib/xgboost/tests/test_core.py +43 -0
  288. maxframe/learn/contrib/xgboost/train.py +121 -0
  289. maxframe/learn/utils/__init__.py +15 -0
  290. maxframe/learn/utils/core.py +29 -0
  291. maxframe/lib/__init__.py +15 -0
  292. maxframe/lib/aio/__init__.py +27 -0
  293. maxframe/lib/aio/_runners.py +162 -0
  294. maxframe/lib/aio/_threads.py +35 -0
  295. maxframe/lib/aio/base.py +82 -0
  296. maxframe/lib/aio/file.py +85 -0
  297. maxframe/lib/aio/isolation.py +100 -0
  298. maxframe/lib/aio/lru.py +242 -0
  299. maxframe/lib/aio/parallelism.py +37 -0
  300. maxframe/lib/aio/tests/__init__.py +13 -0
  301. maxframe/lib/aio/tests/test_aio_file.py +55 -0
  302. maxframe/lib/compression.py +55 -0
  303. maxframe/lib/cython/__init__.py +13 -0
  304. maxframe/lib/cython/libcpp.pxd +30 -0
  305. maxframe/lib/filesystem/__init__.py +21 -0
  306. maxframe/lib/filesystem/_glob.py +173 -0
  307. maxframe/lib/filesystem/_oss_lib/__init__.py +13 -0
  308. maxframe/lib/filesystem/_oss_lib/common.py +198 -0
  309. maxframe/lib/filesystem/_oss_lib/glob.py +147 -0
  310. maxframe/lib/filesystem/_oss_lib/handle.py +156 -0
  311. maxframe/lib/filesystem/arrow.py +236 -0
  312. maxframe/lib/filesystem/base.py +263 -0
  313. maxframe/lib/filesystem/core.py +95 -0
  314. maxframe/lib/filesystem/fsmap.py +164 -0
  315. maxframe/lib/filesystem/hdfs.py +31 -0
  316. maxframe/lib/filesystem/local.py +112 -0
  317. maxframe/lib/filesystem/oss.py +157 -0
  318. maxframe/lib/filesystem/tests/__init__.py +13 -0
  319. maxframe/lib/filesystem/tests/test_filesystem.py +223 -0
  320. maxframe/lib/filesystem/tests/test_oss.py +182 -0
  321. maxframe/lib/functools_compat.py +81 -0
  322. maxframe/lib/mmh3.cpython-39-darwin.so +0 -0
  323. maxframe/lib/mmh3_src/MurmurHash3.cpp +339 -0
  324. maxframe/lib/mmh3_src/MurmurHash3.h +43 -0
  325. maxframe/lib/mmh3_src/mmh3module.cpp +387 -0
  326. maxframe/lib/sparse/__init__.py +861 -0
  327. maxframe/lib/sparse/array.py +1604 -0
  328. maxframe/lib/sparse/core.py +92 -0
  329. maxframe/lib/sparse/matrix.py +241 -0
  330. maxframe/lib/sparse/tests/__init__.py +15 -0
  331. maxframe/lib/sparse/tests/test_sparse.py +476 -0
  332. maxframe/lib/sparse/vector.py +150 -0
  333. maxframe/lib/tblib/LICENSE +20 -0
  334. maxframe/lib/tblib/__init__.py +327 -0
  335. maxframe/lib/tblib/cpython.py +83 -0
  336. maxframe/lib/tblib/decorators.py +44 -0
  337. maxframe/lib/tblib/pickling_support.py +90 -0
  338. maxframe/lib/tests/__init__.py +13 -0
  339. maxframe/lib/tests/test_wrapped_pickle.py +51 -0
  340. maxframe/lib/version.py +620 -0
  341. maxframe/lib/wrapped_pickle.py +139 -0
  342. maxframe/mixin.py +100 -0
  343. maxframe/odpsio/__init__.py +21 -0
  344. maxframe/odpsio/arrow.py +91 -0
  345. maxframe/odpsio/schema.py +364 -0
  346. maxframe/odpsio/tableio.py +322 -0
  347. maxframe/odpsio/tests/__init__.py +13 -0
  348. maxframe/odpsio/tests/test_arrow.py +88 -0
  349. maxframe/odpsio/tests/test_schema.py +297 -0
  350. maxframe/odpsio/tests/test_tableio.py +136 -0
  351. maxframe/odpsio/tests/test_volumeio.py +90 -0
  352. maxframe/odpsio/volumeio.py +95 -0
  353. maxframe/opcodes.py +590 -0
  354. maxframe/protocol.py +415 -0
  355. maxframe/remote/__init__.py +18 -0
  356. maxframe/remote/core.py +210 -0
  357. maxframe/remote/run_script.py +121 -0
  358. maxframe/serialization/__init__.py +26 -0
  359. maxframe/serialization/arrow.py +95 -0
  360. maxframe/serialization/core.cpython-39-darwin.so +0 -0
  361. maxframe/serialization/core.pxd +44 -0
  362. maxframe/serialization/core.pyi +61 -0
  363. maxframe/serialization/core.pyx +1094 -0
  364. maxframe/serialization/exception.py +86 -0
  365. maxframe/serialization/maxframe_objects.py +39 -0
  366. maxframe/serialization/numpy.py +91 -0
  367. maxframe/serialization/pandas.py +202 -0
  368. maxframe/serialization/scipy.py +71 -0
  369. maxframe/serialization/serializables/__init__.py +55 -0
  370. maxframe/serialization/serializables/core.py +262 -0
  371. maxframe/serialization/serializables/field.py +624 -0
  372. maxframe/serialization/serializables/field_type.py +589 -0
  373. maxframe/serialization/serializables/tests/__init__.py +13 -0
  374. maxframe/serialization/serializables/tests/test_field_type.py +121 -0
  375. maxframe/serialization/serializables/tests/test_serializable.py +250 -0
  376. maxframe/serialization/tests/__init__.py +13 -0
  377. maxframe/serialization/tests/test_serial.py +412 -0
  378. maxframe/session.py +1310 -0
  379. maxframe/tensor/__init__.py +183 -0
  380. maxframe/tensor/arithmetic/__init__.py +315 -0
  381. maxframe/tensor/arithmetic/abs.py +68 -0
  382. maxframe/tensor/arithmetic/absolute.py +68 -0
  383. maxframe/tensor/arithmetic/add.py +82 -0
  384. maxframe/tensor/arithmetic/angle.py +72 -0
  385. maxframe/tensor/arithmetic/arccos.py +104 -0
  386. maxframe/tensor/arithmetic/arccosh.py +91 -0
  387. maxframe/tensor/arithmetic/arcsin.py +94 -0
  388. maxframe/tensor/arithmetic/arcsinh.py +86 -0
  389. maxframe/tensor/arithmetic/arctan.py +106 -0
  390. maxframe/tensor/arithmetic/arctan2.py +128 -0
  391. maxframe/tensor/arithmetic/arctanh.py +86 -0
  392. maxframe/tensor/arithmetic/around.py +114 -0
  393. maxframe/tensor/arithmetic/bitand.py +95 -0
  394. maxframe/tensor/arithmetic/bitor.py +102 -0
  395. maxframe/tensor/arithmetic/bitxor.py +95 -0
  396. maxframe/tensor/arithmetic/cbrt.py +66 -0
  397. maxframe/tensor/arithmetic/ceil.py +71 -0
  398. maxframe/tensor/arithmetic/clip.py +165 -0
  399. maxframe/tensor/arithmetic/conj.py +74 -0
  400. maxframe/tensor/arithmetic/copysign.py +78 -0
  401. maxframe/tensor/arithmetic/core.py +544 -0
  402. maxframe/tensor/arithmetic/cos.py +85 -0
  403. maxframe/tensor/arithmetic/cosh.py +72 -0
  404. maxframe/tensor/arithmetic/deg2rad.py +72 -0
  405. maxframe/tensor/arithmetic/degrees.py +77 -0
  406. maxframe/tensor/arithmetic/divide.py +114 -0
  407. maxframe/tensor/arithmetic/equal.py +76 -0
  408. maxframe/tensor/arithmetic/exp.py +106 -0
  409. maxframe/tensor/arithmetic/exp2.py +67 -0
  410. maxframe/tensor/arithmetic/expm1.py +79 -0
  411. maxframe/tensor/arithmetic/fabs.py +74 -0
  412. maxframe/tensor/arithmetic/fix.py +69 -0
  413. maxframe/tensor/arithmetic/float_power.py +103 -0
  414. maxframe/tensor/arithmetic/floor.py +77 -0
  415. maxframe/tensor/arithmetic/floordiv.py +94 -0
  416. maxframe/tensor/arithmetic/fmax.py +105 -0
  417. maxframe/tensor/arithmetic/fmin.py +106 -0
  418. maxframe/tensor/arithmetic/fmod.py +99 -0
  419. maxframe/tensor/arithmetic/frexp.py +92 -0
  420. maxframe/tensor/arithmetic/greater.py +77 -0
  421. maxframe/tensor/arithmetic/greater_equal.py +69 -0
  422. maxframe/tensor/arithmetic/hypot.py +77 -0
  423. maxframe/tensor/arithmetic/i0.py +89 -0
  424. maxframe/tensor/arithmetic/imag.py +67 -0
  425. maxframe/tensor/arithmetic/invert.py +110 -0
  426. maxframe/tensor/arithmetic/isclose.py +115 -0
  427. maxframe/tensor/arithmetic/iscomplex.py +64 -0
  428. maxframe/tensor/arithmetic/isfinite.py +106 -0
  429. maxframe/tensor/arithmetic/isinf.py +103 -0
  430. maxframe/tensor/arithmetic/isnan.py +82 -0
  431. maxframe/tensor/arithmetic/isreal.py +63 -0
  432. maxframe/tensor/arithmetic/ldexp.py +99 -0
  433. maxframe/tensor/arithmetic/less.py +69 -0
  434. maxframe/tensor/arithmetic/less_equal.py +69 -0
  435. maxframe/tensor/arithmetic/log.py +92 -0
  436. maxframe/tensor/arithmetic/log10.py +85 -0
  437. maxframe/tensor/arithmetic/log1p.py +95 -0
  438. maxframe/tensor/arithmetic/log2.py +85 -0
  439. maxframe/tensor/arithmetic/logaddexp.py +80 -0
  440. maxframe/tensor/arithmetic/logaddexp2.py +78 -0
  441. maxframe/tensor/arithmetic/logical_and.py +81 -0
  442. maxframe/tensor/arithmetic/logical_not.py +74 -0
  443. maxframe/tensor/arithmetic/logical_or.py +82 -0
  444. maxframe/tensor/arithmetic/logical_xor.py +88 -0
  445. maxframe/tensor/arithmetic/lshift.py +82 -0
  446. maxframe/tensor/arithmetic/maximum.py +108 -0
  447. maxframe/tensor/arithmetic/minimum.py +108 -0
  448. maxframe/tensor/arithmetic/mod.py +104 -0
  449. maxframe/tensor/arithmetic/modf.py +83 -0
  450. maxframe/tensor/arithmetic/multiply.py +81 -0
  451. maxframe/tensor/arithmetic/nan_to_num.py +99 -0
  452. maxframe/tensor/arithmetic/negative.py +65 -0
  453. maxframe/tensor/arithmetic/nextafter.py +68 -0
  454. maxframe/tensor/arithmetic/not_equal.py +72 -0
  455. maxframe/tensor/arithmetic/positive.py +47 -0
  456. maxframe/tensor/arithmetic/power.py +106 -0
  457. maxframe/tensor/arithmetic/rad2deg.py +71 -0
  458. maxframe/tensor/arithmetic/radians.py +77 -0
  459. maxframe/tensor/arithmetic/real.py +70 -0
  460. maxframe/tensor/arithmetic/reciprocal.py +76 -0
  461. maxframe/tensor/arithmetic/rint.py +68 -0
  462. maxframe/tensor/arithmetic/rshift.py +81 -0
  463. maxframe/tensor/arithmetic/setimag.py +29 -0
  464. maxframe/tensor/arithmetic/setreal.py +29 -0
  465. maxframe/tensor/arithmetic/sign.py +81 -0
  466. maxframe/tensor/arithmetic/signbit.py +65 -0
  467. maxframe/tensor/arithmetic/sin.py +98 -0
  468. maxframe/tensor/arithmetic/sinc.py +102 -0
  469. maxframe/tensor/arithmetic/sinh.py +93 -0
  470. maxframe/tensor/arithmetic/spacing.py +72 -0
  471. maxframe/tensor/arithmetic/sqrt.py +81 -0
  472. maxframe/tensor/arithmetic/square.py +69 -0
  473. maxframe/tensor/arithmetic/subtract.py +81 -0
  474. maxframe/tensor/arithmetic/tan.py +88 -0
  475. maxframe/tensor/arithmetic/tanh.py +92 -0
  476. maxframe/tensor/arithmetic/tests/__init__.py +15 -0
  477. maxframe/tensor/arithmetic/tests/test_arithmetic.py +414 -0
  478. maxframe/tensor/arithmetic/truediv.py +104 -0
  479. maxframe/tensor/arithmetic/trunc.py +72 -0
  480. maxframe/tensor/arithmetic/utils.py +65 -0
  481. maxframe/tensor/array_utils.py +186 -0
  482. maxframe/tensor/base/__init__.py +34 -0
  483. maxframe/tensor/base/astype.py +119 -0
  484. maxframe/tensor/base/atleast_1d.py +74 -0
  485. maxframe/tensor/base/broadcast_to.py +89 -0
  486. maxframe/tensor/base/ravel.py +92 -0
  487. maxframe/tensor/base/tests/__init__.py +13 -0
  488. maxframe/tensor/base/tests/test_base.py +114 -0
  489. maxframe/tensor/base/transpose.py +125 -0
  490. maxframe/tensor/base/unique.py +205 -0
  491. maxframe/tensor/base/where.py +127 -0
  492. maxframe/tensor/core.py +724 -0
  493. maxframe/tensor/datasource/__init__.py +32 -0
  494. maxframe/tensor/datasource/arange.py +156 -0
  495. maxframe/tensor/datasource/array.py +415 -0
  496. maxframe/tensor/datasource/core.py +109 -0
  497. maxframe/tensor/datasource/empty.py +169 -0
  498. maxframe/tensor/datasource/from_dataframe.py +70 -0
  499. maxframe/tensor/datasource/from_dense.py +54 -0
  500. maxframe/tensor/datasource/from_sparse.py +47 -0
  501. maxframe/tensor/datasource/full.py +186 -0
  502. maxframe/tensor/datasource/ones.py +173 -0
  503. maxframe/tensor/datasource/scalar.py +40 -0
  504. maxframe/tensor/datasource/tests/__init__.py +13 -0
  505. maxframe/tensor/datasource/tests/test_datasource.py +278 -0
  506. maxframe/tensor/datasource/zeros.py +188 -0
  507. maxframe/tensor/fetch/__init__.py +15 -0
  508. maxframe/tensor/fetch/core.py +54 -0
  509. maxframe/tensor/indexing/__init__.py +47 -0
  510. maxframe/tensor/indexing/choose.py +196 -0
  511. maxframe/tensor/indexing/compress.py +124 -0
  512. maxframe/tensor/indexing/core.py +190 -0
  513. maxframe/tensor/indexing/extract.py +71 -0
  514. maxframe/tensor/indexing/fill_diagonal.py +183 -0
  515. maxframe/tensor/indexing/flatnonzero.py +60 -0
  516. maxframe/tensor/indexing/getitem.py +175 -0
  517. maxframe/tensor/indexing/nonzero.py +120 -0
  518. maxframe/tensor/indexing/setitem.py +132 -0
  519. maxframe/tensor/indexing/slice.py +29 -0
  520. maxframe/tensor/indexing/take.py +130 -0
  521. maxframe/tensor/indexing/tests/__init__.py +15 -0
  522. maxframe/tensor/indexing/tests/test_indexing.py +234 -0
  523. maxframe/tensor/indexing/unravel_index.py +103 -0
  524. maxframe/tensor/merge/__init__.py +15 -0
  525. maxframe/tensor/merge/stack.py +132 -0
  526. maxframe/tensor/merge/tests/__init__.py +13 -0
  527. maxframe/tensor/merge/tests/test_merge.py +52 -0
  528. maxframe/tensor/operators.py +123 -0
  529. maxframe/tensor/random/__init__.py +168 -0
  530. maxframe/tensor/random/beta.py +87 -0
  531. maxframe/tensor/random/binomial.py +137 -0
  532. maxframe/tensor/random/bytes.py +39 -0
  533. maxframe/tensor/random/chisquare.py +110 -0
  534. maxframe/tensor/random/choice.py +186 -0
  535. maxframe/tensor/random/core.py +234 -0
  536. maxframe/tensor/random/dirichlet.py +123 -0
  537. maxframe/tensor/random/exponential.py +94 -0
  538. maxframe/tensor/random/f.py +135 -0
  539. maxframe/tensor/random/gamma.py +128 -0
  540. maxframe/tensor/random/geometric.py +93 -0
  541. maxframe/tensor/random/gumbel.py +167 -0
  542. maxframe/tensor/random/hypergeometric.py +148 -0
  543. maxframe/tensor/random/laplace.py +133 -0
  544. maxframe/tensor/random/logistic.py +129 -0
  545. maxframe/tensor/random/lognormal.py +159 -0
  546. maxframe/tensor/random/logseries.py +122 -0
  547. maxframe/tensor/random/multinomial.py +133 -0
  548. maxframe/tensor/random/multivariate_normal.py +192 -0
  549. maxframe/tensor/random/negative_binomial.py +125 -0
  550. maxframe/tensor/random/noncentral_chisquare.py +132 -0
  551. maxframe/tensor/random/noncentral_f.py +126 -0
  552. maxframe/tensor/random/normal.py +143 -0
  553. maxframe/tensor/random/pareto.py +140 -0
  554. maxframe/tensor/random/permutation.py +104 -0
  555. maxframe/tensor/random/poisson.py +111 -0
  556. maxframe/tensor/random/power.py +142 -0
  557. maxframe/tensor/random/rand.py +82 -0
  558. maxframe/tensor/random/randint.py +121 -0
  559. maxframe/tensor/random/randn.py +96 -0
  560. maxframe/tensor/random/random_integers.py +123 -0
  561. maxframe/tensor/random/random_sample.py +86 -0
  562. maxframe/tensor/random/rayleigh.py +110 -0
  563. maxframe/tensor/random/shuffle.py +61 -0
  564. maxframe/tensor/random/standard_cauchy.py +105 -0
  565. maxframe/tensor/random/standard_exponential.py +72 -0
  566. maxframe/tensor/random/standard_gamma.py +120 -0
  567. maxframe/tensor/random/standard_normal.py +74 -0
  568. maxframe/tensor/random/standard_t.py +135 -0
  569. maxframe/tensor/random/tests/__init__.py +15 -0
  570. maxframe/tensor/random/tests/test_random.py +167 -0
  571. maxframe/tensor/random/triangular.py +119 -0
  572. maxframe/tensor/random/uniform.py +131 -0
  573. maxframe/tensor/random/vonmises.py +131 -0
  574. maxframe/tensor/random/wald.py +114 -0
  575. maxframe/tensor/random/weibull.py +140 -0
  576. maxframe/tensor/random/zipf.py +122 -0
  577. maxframe/tensor/rechunk/__init__.py +26 -0
  578. maxframe/tensor/rechunk/rechunk.py +43 -0
  579. maxframe/tensor/reduction/__init__.py +66 -0
  580. maxframe/tensor/reduction/all.py +103 -0
  581. maxframe/tensor/reduction/allclose.py +88 -0
  582. maxframe/tensor/reduction/any.py +105 -0
  583. maxframe/tensor/reduction/argmax.py +103 -0
  584. maxframe/tensor/reduction/argmin.py +103 -0
  585. maxframe/tensor/reduction/array_equal.py +64 -0
  586. maxframe/tensor/reduction/core.py +168 -0
  587. maxframe/tensor/reduction/count_nonzero.py +81 -0
  588. maxframe/tensor/reduction/cumprod.py +97 -0
  589. maxframe/tensor/reduction/cumsum.py +101 -0
  590. maxframe/tensor/reduction/max.py +120 -0
  591. maxframe/tensor/reduction/mean.py +123 -0
  592. maxframe/tensor/reduction/min.py +120 -0
  593. maxframe/tensor/reduction/nanargmax.py +82 -0
  594. maxframe/tensor/reduction/nanargmin.py +76 -0
  595. maxframe/tensor/reduction/nancumprod.py +91 -0
  596. maxframe/tensor/reduction/nancumsum.py +94 -0
  597. maxframe/tensor/reduction/nanmax.py +111 -0
  598. maxframe/tensor/reduction/nanmean.py +106 -0
  599. maxframe/tensor/reduction/nanmin.py +111 -0
  600. maxframe/tensor/reduction/nanprod.py +94 -0
  601. maxframe/tensor/reduction/nanstd.py +126 -0
  602. maxframe/tensor/reduction/nansum.py +115 -0
  603. maxframe/tensor/reduction/nanvar.py +149 -0
  604. maxframe/tensor/reduction/prod.py +130 -0
  605. maxframe/tensor/reduction/std.py +134 -0
  606. maxframe/tensor/reduction/sum.py +125 -0
  607. maxframe/tensor/reduction/tests/__init__.py +13 -0
  608. maxframe/tensor/reduction/tests/test_reduction.py +181 -0
  609. maxframe/tensor/reduction/var.py +176 -0
  610. maxframe/tensor/reshape/__init__.py +17 -0
  611. maxframe/tensor/reshape/reshape.py +188 -0
  612. maxframe/tensor/reshape/tests/__init__.py +15 -0
  613. maxframe/tensor/reshape/tests/test_reshape.py +37 -0
  614. maxframe/tensor/statistics/__init__.py +13 -0
  615. maxframe/tensor/statistics/percentile.py +175 -0
  616. maxframe/tensor/statistics/quantile.py +288 -0
  617. maxframe/tensor/ufunc/__init__.py +26 -0
  618. maxframe/tensor/ufunc/ufunc.py +200 -0
  619. maxframe/tensor/utils.py +718 -0
  620. maxframe/tests/__init__.py +13 -0
  621. maxframe/tests/test_codegen.py +69 -0
  622. maxframe/tests/test_protocol.py +144 -0
  623. maxframe/tests/test_utils.py +376 -0
  624. maxframe/tests/utils.py +164 -0
  625. maxframe/typing_.py +37 -0
  626. maxframe/udf.py +134 -0
  627. maxframe/utils.py +1114 -0
  628. maxframe-0.1.0b5.dist-info/METADATA +104 -0
  629. maxframe-0.1.0b5.dist-info/RECORD +647 -0
  630. maxframe-0.1.0b5.dist-info/WHEEL +5 -0
  631. maxframe-0.1.0b5.dist-info/top_level.txt +3 -0
  632. maxframe_client/__init__.py +17 -0
  633. maxframe_client/clients/__init__.py +13 -0
  634. maxframe_client/clients/framedriver.py +118 -0
  635. maxframe_client/clients/spe.py +104 -0
  636. maxframe_client/conftest.py +15 -0
  637. maxframe_client/fetcher.py +264 -0
  638. maxframe_client/session/__init__.py +22 -0
  639. maxframe_client/session/consts.py +36 -0
  640. maxframe_client/session/graph.py +119 -0
  641. maxframe_client/session/odps.py +482 -0
  642. maxframe_client/session/task.py +280 -0
  643. maxframe_client/session/tests/__init__.py +13 -0
  644. maxframe_client/session/tests/test_task.py +85 -0
  645. maxframe_client/tests/__init__.py +13 -0
  646. maxframe_client/tests/test_fetcher.py +89 -0
  647. maxframe_client/tests/test_session.py +255 -0
@@ -0,0 +1,215 @@
1
+ # Copyright 1999-2024 Alibaba Group Holding Ltd.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ import numpy as np
16
+ import pandas as pd
17
+ import pytest
18
+
19
+ from ...core import IndexValue
20
+ from ...datasource.dataframe import from_pandas
21
+ from .. import DataFrameMerge, concat
22
+
23
+
24
+ def test_merge():
25
+ df1 = pd.DataFrame(
26
+ np.arange(20).reshape((4, 5)) + 1, columns=["a", "b", "c", "d", "e"]
27
+ )
28
+ df2 = pd.DataFrame(np.arange(20).reshape((5, 4)) + 1, columns=["a", "b", "x", "y"])
29
+
30
+ mdf1 = from_pandas(df1, chunk_size=2)
31
+ mdf2 = from_pandas(df2, chunk_size=3)
32
+
33
+ parameters = [
34
+ {},
35
+ {"how": "left", "right_on": "x", "left_index": True},
36
+ {"how": "right", "left_on": "a", "right_index": True},
37
+ {"how": "left", "left_on": "a", "right_on": "x"},
38
+ {"how": "right", "left_on": "a", "right_index": True},
39
+ {"how": "right", "on": "a"},
40
+ {"how": "inner", "on": ["a", "b"]},
41
+ ]
42
+
43
+ for kw in parameters:
44
+ df = mdf1.merge(mdf2, **kw)
45
+
46
+ assert isinstance(df.op, DataFrameMerge)
47
+ assert df.op.how == kw.get("how", "inner")
48
+ pd.testing.assert_index_equal(
49
+ df.columns_value.to_pandas(), df.columns_value.to_pandas()
50
+ )
51
+
52
+
53
+ def test_merge_invalid_parameters():
54
+ pdf1 = pd.DataFrame(
55
+ np.arange(20).reshape((4, 5)) + 1, columns=["a", "b", "c", "d", "e"]
56
+ )
57
+ pdf2 = pd.DataFrame(np.arange(20).reshape((5, 4)) + 1, columns=["a", "b", "x", "y"])
58
+
59
+ df1 = from_pandas(pdf1, chunk_size=2)
60
+ df2 = from_pandas(pdf2, chunk_size=3)
61
+
62
+ with pytest.raises(ValueError):
63
+ df1.merge(df2, bloom_filter="wrong")
64
+
65
+ with pytest.raises(TypeError):
66
+ df1.merge(df2, bloom_filter_options="wrong")
67
+
68
+ with pytest.raises(ValueError):
69
+ df1.merge(df2, bloom_filter_options={"wrong": 1})
70
+
71
+ with pytest.raises(ValueError):
72
+ df1.merge(df2, bloom_filter_options={"filter": "wrong"})
73
+
74
+
75
+ def test_join():
76
+ df1 = pd.DataFrame([[1, 3, 3], [4, 2, 6], [7, 8, 9]], index=["a1", "a2", "a3"])
77
+ df2 = pd.DataFrame([[1, 2, 3], [1, 5, 6], [7, 8, 9]], index=["a1", "b2", "b3"]) + 1
78
+ df2 = pd.concat([df2, df2 + 1])
79
+
80
+ mdf1 = from_pandas(df1, chunk_size=2)
81
+ mdf2 = from_pandas(df2, chunk_size=2)
82
+
83
+ parameters = [
84
+ {"lsuffix": "l_", "rsuffix": "r_"},
85
+ {"lsuffix": "l_", "rsuffix": "r_", "how": "left"},
86
+ {"lsuffix": "l_", "rsuffix": "r_", "how": "right"},
87
+ {"lsuffix": "l_", "rsuffix": "r_", "how": "inner"},
88
+ {"lsuffix": "l_", "rsuffix": "r_", "how": "left"},
89
+ ]
90
+
91
+ for kw in parameters:
92
+ df = mdf1.join(mdf2, auto_merge="none", bloom_filter=False, **kw)
93
+
94
+ assert isinstance(df.op, DataFrameMerge)
95
+ assert df.op.how == kw.get("how", "left")
96
+ pd.testing.assert_index_equal(
97
+ df.columns_value.to_pandas(), df.columns_value.to_pandas()
98
+ )
99
+
100
+
101
+ def test_join_on():
102
+ df1 = pd.DataFrame([[1, 3, 3], [4, 2, 6], [7, 8, 9]], columns=["a1", "a2", "a3"])
103
+ df2 = (
104
+ pd.DataFrame([[1, 2, 3], [1, 5, 6], [7, 8, 9]], columns=["a1", "b2", "b3"]) + 1
105
+ )
106
+ df2 = pd.concat([df2, df2 + 1])
107
+
108
+ mdf1 = from_pandas(df1, chunk_size=2)
109
+ mdf2 = from_pandas(df2, chunk_size=2)
110
+
111
+ parameters = [
112
+ {"lsuffix": "l_", "rsuffix": "r_"},
113
+ {"lsuffix": "l_", "rsuffix": "r_", "how": "left", "on": "a1"},
114
+ {"lsuffix": "l_", "rsuffix": "r_", "how": "right", "on": "a2"},
115
+ {"lsuffix": "l_", "rsuffix": "r_", "how": "inner", "on": "a2"},
116
+ {"lsuffix": "l_", "rsuffix": "r_", "how": "outer", "on": "a2"},
117
+ ]
118
+
119
+ for kw in parameters:
120
+ df = mdf1.join(mdf2, auto_merge="none", bloom_filter=False, **kw)
121
+
122
+ assert isinstance(df.op, DataFrameMerge)
123
+ assert df.op.how == kw.get("how", "left")
124
+ pd.testing.assert_index_equal(
125
+ df.columns_value.to_pandas(), df.columns_value.to_pandas()
126
+ )
127
+
128
+
129
+ def test_append():
130
+ df1 = pd.DataFrame(np.random.rand(10, 4), columns=list("ABCD"))
131
+ df2 = pd.DataFrame(np.random.rand(10, 4), columns=list("ABCD"))
132
+
133
+ mdf1 = from_pandas(df1, chunk_size=3)
134
+ mdf2 = from_pandas(df2, chunk_size=3)
135
+ adf = mdf1.append(mdf2)
136
+
137
+ assert adf.shape == (20, 4)
138
+ assert isinstance(adf.index_value.value, IndexValue.Int64Index)
139
+
140
+ mdf1 = from_pandas(df1, chunk_size=3)
141
+ mdf2 = from_pandas(df2, chunk_size=3)
142
+ adf = mdf1.append(mdf2, ignore_index=True)
143
+
144
+ assert adf.shape == (20, 4)
145
+ assert isinstance(adf.index_value.value, IndexValue.RangeIndex)
146
+ pd.testing.assert_index_equal(adf.index_value.to_pandas(), pd.RangeIndex(20))
147
+
148
+
149
+ def test_concat():
150
+ df1 = pd.DataFrame(np.random.rand(10, 4), columns=list("ABCD"))
151
+ df2 = pd.DataFrame(np.random.rand(10, 4), columns=list("ABCD"))
152
+
153
+ mdf1 = from_pandas(df1, chunk_size=4)
154
+ mdf2 = from_pandas(df2, chunk_size=4)
155
+ r = concat([mdf1, mdf2], axis="index")
156
+
157
+ assert r.shape == (20, 4)
158
+ pd.testing.assert_series_equal(r.dtypes, df1.dtypes)
159
+
160
+ df3 = pd.DataFrame(
161
+ np.random.rand(10, 4), columns=list("ABCD"), index=pd.RangeIndex(10, 20)
162
+ )
163
+
164
+ mdf3 = from_pandas(df3, chunk_size=4)
165
+ r = concat([mdf1, mdf3], axis="index")
166
+
167
+ assert r.shape == (20, 4)
168
+ pd.testing.assert_series_equal(r.dtypes, df1.dtypes)
169
+ pd.testing.assert_index_equal(r.index_value.to_pandas(), pd.RangeIndex(20))
170
+
171
+ df4 = pd.DataFrame(
172
+ np.random.rand(10, 4),
173
+ columns=list("ABCD"),
174
+ index=np.random.permutation(np.arange(10)),
175
+ )
176
+
177
+ mdf4 = from_pandas(df4, chunk_size=4)
178
+ r = concat([mdf1, mdf4], axis="index")
179
+
180
+ assert r.shape == (20, 4)
181
+ pd.testing.assert_series_equal(r.dtypes, df1.dtypes)
182
+ pd.testing.assert_index_equal(
183
+ r.index_value.to_pandas(), pd.Index([], dtype=np.int64)
184
+ )
185
+
186
+ r = concat([mdf4, mdf1], axis="index")
187
+
188
+ assert r.shape == (20, 4)
189
+ pd.testing.assert_series_equal(r.dtypes, df1.dtypes)
190
+ pd.testing.assert_index_equal(
191
+ r.index_value.to_pandas(), pd.Index([], dtype=np.int64)
192
+ )
193
+
194
+ r = concat([mdf4, mdf4], axis="index")
195
+
196
+ assert r.shape == (20, 4)
197
+ pd.testing.assert_series_equal(r.dtypes, df1.dtypes)
198
+ pd.testing.assert_index_equal(
199
+ r.index_value.to_pandas(), pd.Index([], dtype=np.int64)
200
+ )
201
+
202
+ mdf1 = from_pandas(df1, chunk_size=3)
203
+ mdf2 = from_pandas(df2, chunk_size=4)
204
+ r = concat([mdf1, mdf2], axis="columns")
205
+
206
+ assert r.shape == (10, 8)
207
+ expected_dtypes = pd.concat([df1, df2], axis="columns").dtypes
208
+ pd.testing.assert_series_equal(r.dtypes, expected_dtypes)
209
+
210
+ df1 = pd.DataFrame(np.random.rand(10, 4), columns=list("ABCD"))
211
+ df2 = pd.DataFrame(np.random.rand(10, 3), columns=list("ABC"))
212
+ mdf1 = from_pandas(df1, chunk_size=3)
213
+ mdf2 = from_pandas(df2, chunk_size=3)
214
+ r = concat([mdf1, mdf2], join="inner")
215
+ assert r.shape == (20, 3)
@@ -0,0 +1,134 @@
1
+ # Copyright 1999-2024 Alibaba Group Holding Ltd.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ from .apply import df_apply, series_apply
16
+ from .astype import astype, index_astype
17
+ from .case_when import case_when
18
+ from .check_monotonic import (
19
+ check_monotonic,
20
+ is_monotonic,
21
+ is_monotonic_decreasing,
22
+ is_monotonic_increasing,
23
+ )
24
+ from .cut import cut
25
+ from .describe import describe
26
+ from .diff import df_diff, series_diff
27
+ from .drop import df_drop, df_pop, index_drop, series_drop
28
+ from .drop_duplicates import (
29
+ df_drop_duplicates,
30
+ index_drop_duplicates,
31
+ series_drop_duplicates,
32
+ )
33
+ from .duplicated import df_duplicated, index_duplicated, series_duplicated
34
+ from .eval import df_eval, df_query
35
+ from .explode import df_explode, series_explode
36
+ from .isin import df_isin, series_isin
37
+ from .map import index_map, series_map
38
+ from .melt import melt
39
+ from .memory_usage import df_memory_usage, index_memory_usage, series_memory_usage
40
+ from .pct_change import pct_change
41
+ from .pivot_table import pivot_table
42
+ from .qcut import qcut
43
+ from .select_dtypes import select_dtypes
44
+ from .shift import shift, tshift
45
+ from .stack import stack
46
+ from .transform import df_transform, series_transform
47
+ from .transpose import transpose
48
+ from .value_counts import value_counts
49
+
50
+
51
+ def _install():
52
+ from ..core import DATAFRAME_TYPE, INDEX_TYPE, SERIES_TYPE
53
+ from .accessor import CachedAccessor, DatetimeAccessor, StringAccessor
54
+ from .datetimes import _datetime_method_to_handlers
55
+ from .string_ import _string_method_to_handlers
56
+
57
+ for t in DATAFRAME_TYPE:
58
+ setattr(t, "apply", df_apply)
59
+ setattr(t, "astype", astype)
60
+ setattr(t, "describe", describe)
61
+ setattr(
62
+ t, "__delitem__", lambda df, items: df_drop(df, items, axis=1, inplace=True)
63
+ )
64
+ setattr(t, "diff", df_diff)
65
+ setattr(t, "drop_duplicates", df_drop_duplicates)
66
+ setattr(t, "duplicated", df_duplicated)
67
+ setattr(t, "drop", df_drop)
68
+ setattr(t, "eval", df_eval)
69
+ setattr(t, "explode", df_explode)
70
+ setattr(t, "isin", df_isin)
71
+ setattr(t, "melt", melt)
72
+ setattr(t, "memory_usage", df_memory_usage)
73
+ setattr(t, "pct_change", pct_change)
74
+ setattr(t, "pivot_table", pivot_table)
75
+ setattr(t, "pop", df_pop)
76
+ setattr(t, "query", df_query)
77
+ setattr(t, "select_dtypes", select_dtypes)
78
+ setattr(t, "shift", shift)
79
+ setattr(t, "stack", stack)
80
+ setattr(t, "transform", df_transform)
81
+ setattr(t, "transpose", transpose)
82
+ setattr(t, "tshift", tshift)
83
+
84
+ for t in SERIES_TYPE:
85
+ setattr(t, "apply", series_apply)
86
+ setattr(t, "astype", astype)
87
+ setattr(t, "case_when", case_when)
88
+ setattr(t, "check_monotonic", check_monotonic)
89
+ setattr(t, "describe", describe)
90
+ setattr(t, "diff", series_diff)
91
+ setattr(t, "drop", series_drop)
92
+ setattr(t, "drop_duplicates", series_drop_duplicates)
93
+ setattr(t, "duplicated", series_duplicated)
94
+ setattr(t, "explode", series_explode)
95
+ setattr(t, "is_monotonic", property(fget=is_monotonic))
96
+ setattr(t, "is_monotonic_decreasing", property(fget=is_monotonic_decreasing))
97
+ setattr(t, "is_monotonic_increasing", property(fget=is_monotonic_increasing))
98
+ setattr(t, "isin", series_isin)
99
+ setattr(t, "map", series_map)
100
+ setattr(t, "memory_usage", series_memory_usage)
101
+ setattr(t, "pct_change", pct_change)
102
+ setattr(t, "shift", shift)
103
+ setattr(t, "transform", series_transform)
104
+ setattr(t, "tshift", tshift)
105
+ setattr(t, "value_counts", value_counts)
106
+
107
+ for t in INDEX_TYPE:
108
+ setattr(t, "astype", index_astype)
109
+ setattr(t, "check_monotonic", check_monotonic)
110
+ setattr(t, "drop", index_drop)
111
+ setattr(t, "drop_duplicates", index_drop_duplicates)
112
+ setattr(t, "duplicated", index_duplicated)
113
+ setattr(t, "is_monotonic", property(fget=is_monotonic))
114
+ setattr(t, "is_monotonic_increasing", property(fget=is_monotonic_increasing))
115
+ setattr(t, "is_monotonic_decreasing", property(fget=is_monotonic_decreasing))
116
+ setattr(t, "map", index_map)
117
+ setattr(t, "memory_usage", index_memory_usage)
118
+ setattr(t, "value_counts", value_counts)
119
+
120
+ for method in _string_method_to_handlers:
121
+ if not hasattr(StringAccessor, method):
122
+ StringAccessor._register(method)
123
+
124
+ for method in _datetime_method_to_handlers:
125
+ if not hasattr(DatetimeAccessor, method):
126
+ DatetimeAccessor._register(method)
127
+
128
+ for series in SERIES_TYPE:
129
+ series.str = CachedAccessor("str", StringAccessor)
130
+ series.dt = CachedAccessor("dt", DatetimeAccessor)
131
+
132
+
133
+ _install()
134
+ del _install
@@ -0,0 +1,46 @@
1
+ # Copyright 1999-2024 Alibaba Group Holding Ltd.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ import pandas as pd
16
+ from pandas.api.types import is_list_like
17
+
18
+ from ...core.operator import MapReduceOperator
19
+ from ...serialization.serializables import AnyField, KeyField, StringField
20
+ from ..operators import DataFrameOperatorMixin
21
+
22
+
23
+ class DuplicateOperand(MapReduceOperator, DataFrameOperatorMixin):
24
+ input = KeyField("input")
25
+ subset = AnyField("subset", default=None)
26
+ keep = AnyField("keep", default="first")
27
+ method = StringField("method", default=None)
28
+
29
+ def _set_inputs(self, inputs):
30
+ super()._set_inputs(inputs)
31
+ self.input = self._inputs[0]
32
+
33
+
34
+ def validate_subset(df, subset):
35
+ if subset is None:
36
+ return subset
37
+ if not is_list_like(subset):
38
+ subset = [subset]
39
+ else:
40
+ subset = list(subset)
41
+
42
+ for s in subset:
43
+ if s not in df.dtypes:
44
+ raise KeyError(pd.Index([s]))
45
+
46
+ return subset
@@ -0,0 +1,276 @@
1
+ # Copyright 1999-2024 Alibaba Group Holding Ltd.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ from functools import wraps
16
+ from typing import Iterable
17
+
18
+ import pandas as pd
19
+ from pandas.api.types import (
20
+ is_datetime64_dtype,
21
+ is_datetime64tz_dtype,
22
+ is_period_dtype,
23
+ is_timedelta64_dtype,
24
+ )
25
+
26
+ from ...utils import adapt_docstring
27
+ from .datetimes import SeriesDatetimeMethod, _datetime_method_to_handlers
28
+ from .string_ import SeriesStringMethod, _string_method_to_handlers
29
+
30
+
31
+ class StringAccessor:
32
+ """
33
+ Vectorized string functions for Series and Index.
34
+ NAs stay NA unless handled otherwise by a particular method.
35
+ Patterned after Python's string methods, with some inspiration from
36
+ R's stringr package.
37
+ Examples
38
+ --------
39
+ >>> import maxframe.dataframe as md
40
+ >>> s = md.Series(["A_Str_Series"])
41
+ >>> s.execute()
42
+ 0 A_Str_Series
43
+ dtype: object
44
+ >>> s.str.split("_").execute()
45
+ 0 [A, Str, Series]
46
+ dtype: object
47
+ >>> s.str.replace("_", "").execute()
48
+ 0 AStrSeries
49
+ dtype: object
50
+ """
51
+
52
+ def __init__(self, series):
53
+ self._series = series
54
+
55
+ @classmethod
56
+ def _gen_func(cls, method):
57
+ @wraps(getattr(pd.Series.str, method))
58
+ def _inner(self, *args, **kwargs):
59
+ op = SeriesStringMethod(
60
+ method=method, method_args=args, method_kwargs=kwargs
61
+ )
62
+ return op(self._series)
63
+
64
+ _inner.__doc__ = adapt_docstring(getattr(pd.Series.str, method).__doc__)
65
+ return _inner
66
+
67
+ def __getitem__(self, item):
68
+ return self._gen_func("__getitem__")(self, item)
69
+
70
+ def __dir__(self) -> Iterable[str]:
71
+ s = set(super().__dir__())
72
+ s.update(_string_method_to_handlers.keys())
73
+ return list(s)
74
+
75
+ @classmethod
76
+ def _register(cls, method):
77
+ setattr(cls, method, cls._gen_func(method))
78
+
79
+ def split(self, pat=None, n=-1, expand=False):
80
+ r"""
81
+ Split strings around given separator/delimiter.
82
+
83
+ Splits the string in the Series/Index from the beginning,
84
+ at the specified delimiter string. Equivalent to :meth:`str.split`.
85
+
86
+ Parameters
87
+ ----------
88
+ pat : str, optional
89
+ String or regular expression to split on.
90
+ If not specified, split on whitespace.
91
+ n : int, default -1 (all)
92
+ Limit number of splits in output.
93
+ ``None``, 0 and -1 will be interpreted as return all splits.
94
+ expand : bool, default False
95
+ Expand the splitted strings into separate columns.
96
+
97
+ * If ``True``, return DataFrame/MultiIndex expanding dimensionality.
98
+ * If ``False``, return Series/Index, containing lists of strings.
99
+
100
+ Returns
101
+ -------
102
+ Series, Index, DataFrame or MultiIndex
103
+ Type matches caller unless ``expand=True`` (see Notes).
104
+
105
+ See Also
106
+ --------
107
+ Series.str.split : Split strings around given separator/delimiter.
108
+ Series.str.rsplit : Splits string around given separator/delimiter,
109
+ starting from the right.
110
+ Series.str.join : Join lists contained as elements in the Series/Index
111
+ with passed delimiter.
112
+ str.split : Standard library version for split.
113
+ str.rsplit : Standard library version for rsplit.
114
+
115
+ Notes
116
+ -----
117
+ The handling of the `n` keyword depends on the number of found splits:
118
+
119
+ - If found splits > `n`, make first `n` splits only
120
+ - If found splits <= `n`, make all splits
121
+ - If for a certain row the number of found splits < `n`,
122
+ append `None` for padding up to `n` if ``expand=True``
123
+
124
+ If using ``expand=True``, Series and Index callers return DataFrame and
125
+ MultiIndex objects, respectively.
126
+
127
+ Examples
128
+ --------
129
+ >>> import numpy as np
130
+ >>> import maxframe.dataframe as md
131
+ >>> s = md.Series(["this is a regular sentence",
132
+ >>> "https://docs.python.org/3/tutorial/index.html",
133
+ >>> np.nan])
134
+ >>> s.execute()
135
+ 0 this is a regular sentence
136
+ 1 https://docs.python.org/3/tutorial/index.html
137
+ 2 NaN
138
+ dtype: object
139
+
140
+ In the default setting, the string is split by whitespace.
141
+
142
+ >>> s.str.split().execute()
143
+ 0 [this, is, a, regular, sentence]
144
+ 1 [https://docs.python.org/3/tutorial/index.html]
145
+ 2 NaN
146
+ dtype: object
147
+
148
+ Without the `n` parameter, the outputs of `rsplit` and `split`
149
+ are identical.
150
+
151
+ >>> s.str.rsplit().execute()
152
+ 0 [this, is, a, regular, sentence]
153
+ 1 [https://docs.python.org/3/tutorial/index.html]
154
+ 2 NaN
155
+ dtype: object
156
+
157
+ The `n` parameter can be used to limit the number of splits on the
158
+ delimiter. The outputs of `split` and `rsplit` are different.
159
+
160
+ >>> s.str.split(n=2).execute()
161
+ 0 [this, is, a regular sentence]
162
+ 1 [https://docs.python.org/3/tutorial/index.html]
163
+ 2 NaN
164
+ dtype: object
165
+
166
+ >>> s.str.rsplit(n=2).execute()
167
+ 0 [this is a, regular, sentence]
168
+ 1 [https://docs.python.org/3/tutorial/index.html]
169
+ 2 NaN
170
+ dtype: object
171
+
172
+ The `pat` parameter can be used to split by other characters.
173
+
174
+ >>> s.str.split(pat = "/").execute()
175
+ 0 [this is a regular sentence]
176
+ 1 [https:, , docs.python.org, 3, tutorial, index...
177
+ 2 NaN
178
+ dtype: object
179
+
180
+ When using ``expand=True``, the split elements will expand out into
181
+ separate columns. If NaN is present, it is propagated throughout
182
+ the columns during the split.
183
+
184
+ >>> s.str.split(expand=True).execute()
185
+ 0 1 2 3
186
+ 0 this is a regular
187
+ 1 https://docs.python.org/3/tutorial/index.html None None None
188
+ 2 NaN NaN NaN NaN \
189
+ 4
190
+ 0 sentence
191
+ 1 None
192
+ 2 NaN
193
+
194
+ For slightly more complex use cases like splitting the html document name
195
+ from a url, a combination of parameter settings can be used.
196
+
197
+ >>> s.str.rsplit("/", n=1, expand=True).execute()
198
+ 0 1
199
+ 0 this is a regular sentence None
200
+ 1 https://docs.python.org/3/tutorial index.html
201
+ 2 NaN NaN
202
+
203
+ Remember to escape special characters when explicitly using regular
204
+ expressions.
205
+
206
+ >>> s = pd.Series(["1+1=2"])
207
+ >>> s.str.split(r"\+|=", expand=True).execute()
208
+ 0 1 2
209
+ 0 1 1 2
210
+ """
211
+ return self._gen_func("split")(self, pat=pat, n=n, expand=expand)
212
+
213
+ def rsplit(self, pat=None, n=-1, expand=False):
214
+ return self._gen_func("rsplit")(self, pat=pat, n=n, expand=expand)
215
+
216
+ def cat(self, others=None, sep=None, na_rep=None, join="left"):
217
+ return self._gen_func("cat")(
218
+ self, others=others, sep=sep, na_rep=na_rep, join=join
219
+ )
220
+
221
+ rsplit.__doc__ = adapt_docstring(pd.Series.str.rsplit.__doc__)
222
+ cat.__doc__ = adapt_docstring(pd.Series.str.cat.__doc__)
223
+
224
+
225
+ class DatetimeAccessor:
226
+ def __init__(self, series):
227
+ if (
228
+ not is_datetime64_dtype(series.dtype)
229
+ and not is_datetime64tz_dtype(series.dtype)
230
+ and not is_timedelta64_dtype(series.dtype)
231
+ and not is_period_dtype(series.dtype)
232
+ ):
233
+ raise AttributeError("Can only use .dt accessor with datetimelike values")
234
+ self._series = series
235
+
236
+ @classmethod
237
+ def _gen_func(cls, method, is_property):
238
+ @wraps(getattr(pd.Series.dt, method))
239
+ def _inner(self, *args, **kwargs):
240
+ op = SeriesDatetimeMethod(
241
+ method=method,
242
+ is_property=is_property,
243
+ method_args=args,
244
+ method_kwargs=kwargs,
245
+ )
246
+ return op(self._series)
247
+
248
+ _inner.__doc__ = adapt_docstring(getattr(pd.Series.dt, method).__doc__)
249
+ return _inner
250
+
251
+ @classmethod
252
+ def _register(cls, method):
253
+ is_property = not callable(getattr(pd.Series.dt, method))
254
+ func = cls._gen_func(method, is_property)
255
+ if is_property:
256
+ func = property(func)
257
+ setattr(cls, method, func)
258
+
259
+ def __dir__(self) -> Iterable[str]:
260
+ s = set(super().__dir__())
261
+ s.update(_datetime_method_to_handlers.keys())
262
+ return list(s)
263
+
264
+
265
+ class CachedAccessor:
266
+ def __init__(self, name: str, accessor) -> None:
267
+ self._name = name
268
+ self._accessor = accessor
269
+
270
+ def __get__(self, obj, cls):
271
+ if obj is None:
272
+ # we're accessing the attribute of the class, i.e., Dataset.geo
273
+ return self._accessor
274
+ if self._name not in obj._accessors:
275
+ obj._accessors[self._name] = self._accessor(obj)
276
+ return obj._accessors[self._name]