maxframe 0.1.0b5__cp39-cp39-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of maxframe might be problematic. Click here for more details.

Files changed (647) hide show
  1. maxframe/__init__.py +32 -0
  2. maxframe/_utils.cpython-39-darwin.so +0 -0
  3. maxframe/_utils.pxd +33 -0
  4. maxframe/_utils.pyx +547 -0
  5. maxframe/codegen.py +528 -0
  6. maxframe/config/__init__.py +15 -0
  7. maxframe/config/config.py +443 -0
  8. maxframe/config/tests/__init__.py +13 -0
  9. maxframe/config/tests/test_config.py +103 -0
  10. maxframe/config/tests/test_validators.py +34 -0
  11. maxframe/config/validators.py +57 -0
  12. maxframe/conftest.py +139 -0
  13. maxframe/core/__init__.py +65 -0
  14. maxframe/core/base.py +156 -0
  15. maxframe/core/entity/__init__.py +44 -0
  16. maxframe/core/entity/chunks.py +68 -0
  17. maxframe/core/entity/core.py +152 -0
  18. maxframe/core/entity/executable.py +337 -0
  19. maxframe/core/entity/fuse.py +73 -0
  20. maxframe/core/entity/objects.py +100 -0
  21. maxframe/core/entity/output_types.py +90 -0
  22. maxframe/core/entity/tileables.py +438 -0
  23. maxframe/core/entity/utils.py +24 -0
  24. maxframe/core/graph/__init__.py +17 -0
  25. maxframe/core/graph/builder/__init__.py +16 -0
  26. maxframe/core/graph/builder/base.py +86 -0
  27. maxframe/core/graph/builder/chunk.py +430 -0
  28. maxframe/core/graph/builder/tileable.py +34 -0
  29. maxframe/core/graph/builder/utils.py +41 -0
  30. maxframe/core/graph/core.cpython-39-darwin.so +0 -0
  31. maxframe/core/graph/core.pyx +467 -0
  32. maxframe/core/graph/entity.py +171 -0
  33. maxframe/core/graph/tests/__init__.py +13 -0
  34. maxframe/core/graph/tests/test_graph.py +205 -0
  35. maxframe/core/mode.py +96 -0
  36. maxframe/core/operator/__init__.py +34 -0
  37. maxframe/core/operator/base.py +450 -0
  38. maxframe/core/operator/core.py +276 -0
  39. maxframe/core/operator/fetch.py +53 -0
  40. maxframe/core/operator/fuse.py +29 -0
  41. maxframe/core/operator/objects.py +72 -0
  42. maxframe/core/operator/shuffle.py +111 -0
  43. maxframe/core/operator/tests/__init__.py +13 -0
  44. maxframe/core/operator/tests/test_core.py +64 -0
  45. maxframe/core/tests/__init__.py +13 -0
  46. maxframe/core/tests/test_mode.py +75 -0
  47. maxframe/dataframe/__init__.py +81 -0
  48. maxframe/dataframe/arithmetic/__init__.py +359 -0
  49. maxframe/dataframe/arithmetic/abs.py +33 -0
  50. maxframe/dataframe/arithmetic/add.py +60 -0
  51. maxframe/dataframe/arithmetic/arccos.py +28 -0
  52. maxframe/dataframe/arithmetic/arccosh.py +28 -0
  53. maxframe/dataframe/arithmetic/arcsin.py +28 -0
  54. maxframe/dataframe/arithmetic/arcsinh.py +28 -0
  55. maxframe/dataframe/arithmetic/arctan.py +28 -0
  56. maxframe/dataframe/arithmetic/arctanh.py +28 -0
  57. maxframe/dataframe/arithmetic/around.py +152 -0
  58. maxframe/dataframe/arithmetic/bitwise_and.py +46 -0
  59. maxframe/dataframe/arithmetic/bitwise_or.py +50 -0
  60. maxframe/dataframe/arithmetic/bitwise_xor.py +46 -0
  61. maxframe/dataframe/arithmetic/ceil.py +28 -0
  62. maxframe/dataframe/arithmetic/core.py +342 -0
  63. maxframe/dataframe/arithmetic/cos.py +28 -0
  64. maxframe/dataframe/arithmetic/cosh.py +28 -0
  65. maxframe/dataframe/arithmetic/degrees.py +28 -0
  66. maxframe/dataframe/arithmetic/docstring.py +442 -0
  67. maxframe/dataframe/arithmetic/equal.py +56 -0
  68. maxframe/dataframe/arithmetic/exp.py +28 -0
  69. maxframe/dataframe/arithmetic/exp2.py +28 -0
  70. maxframe/dataframe/arithmetic/expm1.py +28 -0
  71. maxframe/dataframe/arithmetic/floor.py +28 -0
  72. maxframe/dataframe/arithmetic/floordiv.py +64 -0
  73. maxframe/dataframe/arithmetic/greater.py +57 -0
  74. maxframe/dataframe/arithmetic/greater_equal.py +57 -0
  75. maxframe/dataframe/arithmetic/invert.py +33 -0
  76. maxframe/dataframe/arithmetic/is_ufuncs.py +62 -0
  77. maxframe/dataframe/arithmetic/less.py +57 -0
  78. maxframe/dataframe/arithmetic/less_equal.py +57 -0
  79. maxframe/dataframe/arithmetic/log.py +28 -0
  80. maxframe/dataframe/arithmetic/log10.py +28 -0
  81. maxframe/dataframe/arithmetic/log2.py +28 -0
  82. maxframe/dataframe/arithmetic/mod.py +60 -0
  83. maxframe/dataframe/arithmetic/multiply.py +60 -0
  84. maxframe/dataframe/arithmetic/negative.py +33 -0
  85. maxframe/dataframe/arithmetic/not_equal.py +56 -0
  86. maxframe/dataframe/arithmetic/power.py +68 -0
  87. maxframe/dataframe/arithmetic/radians.py +28 -0
  88. maxframe/dataframe/arithmetic/sin.py +28 -0
  89. maxframe/dataframe/arithmetic/sinh.py +28 -0
  90. maxframe/dataframe/arithmetic/sqrt.py +28 -0
  91. maxframe/dataframe/arithmetic/subtract.py +64 -0
  92. maxframe/dataframe/arithmetic/tan.py +28 -0
  93. maxframe/dataframe/arithmetic/tanh.py +28 -0
  94. maxframe/dataframe/arithmetic/tests/__init__.py +13 -0
  95. maxframe/dataframe/arithmetic/tests/test_arithmetic.py +695 -0
  96. maxframe/dataframe/arithmetic/truediv.py +64 -0
  97. maxframe/dataframe/arithmetic/trunc.py +28 -0
  98. maxframe/dataframe/arrays.py +864 -0
  99. maxframe/dataframe/core.py +2417 -0
  100. maxframe/dataframe/datasource/__init__.py +15 -0
  101. maxframe/dataframe/datasource/core.py +81 -0
  102. maxframe/dataframe/datasource/dataframe.py +59 -0
  103. maxframe/dataframe/datasource/date_range.py +504 -0
  104. maxframe/dataframe/datasource/from_index.py +54 -0
  105. maxframe/dataframe/datasource/from_records.py +107 -0
  106. maxframe/dataframe/datasource/from_tensor.py +419 -0
  107. maxframe/dataframe/datasource/index.py +117 -0
  108. maxframe/dataframe/datasource/read_csv.py +528 -0
  109. maxframe/dataframe/datasource/read_odps_query.py +299 -0
  110. maxframe/dataframe/datasource/read_odps_table.py +253 -0
  111. maxframe/dataframe/datasource/read_parquet.py +421 -0
  112. maxframe/dataframe/datasource/series.py +55 -0
  113. maxframe/dataframe/datasource/tests/__init__.py +13 -0
  114. maxframe/dataframe/datasource/tests/test_datasource.py +401 -0
  115. maxframe/dataframe/datastore/__init__.py +26 -0
  116. maxframe/dataframe/datastore/core.py +19 -0
  117. maxframe/dataframe/datastore/to_csv.py +227 -0
  118. maxframe/dataframe/datastore/to_odps.py +162 -0
  119. maxframe/dataframe/extensions/__init__.py +41 -0
  120. maxframe/dataframe/extensions/accessor.py +50 -0
  121. maxframe/dataframe/extensions/reshuffle.py +83 -0
  122. maxframe/dataframe/extensions/tests/__init__.py +13 -0
  123. maxframe/dataframe/extensions/tests/test_extensions.py +38 -0
  124. maxframe/dataframe/fetch/__init__.py +15 -0
  125. maxframe/dataframe/fetch/core.py +86 -0
  126. maxframe/dataframe/groupby/__init__.py +82 -0
  127. maxframe/dataframe/groupby/aggregation.py +350 -0
  128. maxframe/dataframe/groupby/apply.py +251 -0
  129. maxframe/dataframe/groupby/core.py +179 -0
  130. maxframe/dataframe/groupby/cum.py +124 -0
  131. maxframe/dataframe/groupby/fill.py +141 -0
  132. maxframe/dataframe/groupby/getitem.py +92 -0
  133. maxframe/dataframe/groupby/head.py +105 -0
  134. maxframe/dataframe/groupby/sample.py +214 -0
  135. maxframe/dataframe/groupby/tests/__init__.py +13 -0
  136. maxframe/dataframe/groupby/tests/test_groupby.py +374 -0
  137. maxframe/dataframe/groupby/transform.py +255 -0
  138. maxframe/dataframe/indexing/__init__.py +84 -0
  139. maxframe/dataframe/indexing/add_prefix_suffix.py +110 -0
  140. maxframe/dataframe/indexing/align.py +349 -0
  141. maxframe/dataframe/indexing/at.py +83 -0
  142. maxframe/dataframe/indexing/getitem.py +204 -0
  143. maxframe/dataframe/indexing/iat.py +37 -0
  144. maxframe/dataframe/indexing/iloc.py +566 -0
  145. maxframe/dataframe/indexing/insert.py +86 -0
  146. maxframe/dataframe/indexing/loc.py +411 -0
  147. maxframe/dataframe/indexing/reindex.py +526 -0
  148. maxframe/dataframe/indexing/rename.py +462 -0
  149. maxframe/dataframe/indexing/rename_axis.py +209 -0
  150. maxframe/dataframe/indexing/reset_index.py +402 -0
  151. maxframe/dataframe/indexing/sample.py +221 -0
  152. maxframe/dataframe/indexing/set_axis.py +194 -0
  153. maxframe/dataframe/indexing/set_index.py +61 -0
  154. maxframe/dataframe/indexing/setitem.py +130 -0
  155. maxframe/dataframe/indexing/tests/__init__.py +13 -0
  156. maxframe/dataframe/indexing/tests/test_indexing.py +488 -0
  157. maxframe/dataframe/indexing/where.py +308 -0
  158. maxframe/dataframe/initializer.py +288 -0
  159. maxframe/dataframe/merge/__init__.py +32 -0
  160. maxframe/dataframe/merge/append.py +121 -0
  161. maxframe/dataframe/merge/concat.py +325 -0
  162. maxframe/dataframe/merge/merge.py +593 -0
  163. maxframe/dataframe/merge/tests/__init__.py +13 -0
  164. maxframe/dataframe/merge/tests/test_merge.py +215 -0
  165. maxframe/dataframe/misc/__init__.py +134 -0
  166. maxframe/dataframe/misc/_duplicate.py +46 -0
  167. maxframe/dataframe/misc/accessor.py +276 -0
  168. maxframe/dataframe/misc/apply.py +692 -0
  169. maxframe/dataframe/misc/astype.py +236 -0
  170. maxframe/dataframe/misc/case_when.py +141 -0
  171. maxframe/dataframe/misc/check_monotonic.py +84 -0
  172. maxframe/dataframe/misc/cut.py +383 -0
  173. maxframe/dataframe/misc/datetimes.py +79 -0
  174. maxframe/dataframe/misc/describe.py +108 -0
  175. maxframe/dataframe/misc/diff.py +210 -0
  176. maxframe/dataframe/misc/drop.py +440 -0
  177. maxframe/dataframe/misc/drop_duplicates.py +248 -0
  178. maxframe/dataframe/misc/duplicated.py +292 -0
  179. maxframe/dataframe/misc/eval.py +728 -0
  180. maxframe/dataframe/misc/explode.py +171 -0
  181. maxframe/dataframe/misc/get_dummies.py +208 -0
  182. maxframe/dataframe/misc/isin.py +217 -0
  183. maxframe/dataframe/misc/map.py +236 -0
  184. maxframe/dataframe/misc/melt.py +162 -0
  185. maxframe/dataframe/misc/memory_usage.py +248 -0
  186. maxframe/dataframe/misc/pct_change.py +150 -0
  187. maxframe/dataframe/misc/pivot_table.py +262 -0
  188. maxframe/dataframe/misc/qcut.py +104 -0
  189. maxframe/dataframe/misc/select_dtypes.py +104 -0
  190. maxframe/dataframe/misc/shift.py +256 -0
  191. maxframe/dataframe/misc/stack.py +238 -0
  192. maxframe/dataframe/misc/string_.py +221 -0
  193. maxframe/dataframe/misc/tests/__init__.py +13 -0
  194. maxframe/dataframe/misc/tests/test_misc.py +468 -0
  195. maxframe/dataframe/misc/to_numeric.py +178 -0
  196. maxframe/dataframe/misc/transform.py +361 -0
  197. maxframe/dataframe/misc/transpose.py +136 -0
  198. maxframe/dataframe/misc/value_counts.py +182 -0
  199. maxframe/dataframe/missing/__init__.py +53 -0
  200. maxframe/dataframe/missing/checkna.py +223 -0
  201. maxframe/dataframe/missing/dropna.py +280 -0
  202. maxframe/dataframe/missing/fillna.py +275 -0
  203. maxframe/dataframe/missing/replace.py +439 -0
  204. maxframe/dataframe/missing/tests/__init__.py +13 -0
  205. maxframe/dataframe/missing/tests/test_missing.py +89 -0
  206. maxframe/dataframe/operators.py +273 -0
  207. maxframe/dataframe/plotting/__init__.py +40 -0
  208. maxframe/dataframe/plotting/core.py +78 -0
  209. maxframe/dataframe/plotting/tests/__init__.py +13 -0
  210. maxframe/dataframe/plotting/tests/test_plotting.py +136 -0
  211. maxframe/dataframe/reduction/__init__.py +107 -0
  212. maxframe/dataframe/reduction/aggregation.py +344 -0
  213. maxframe/dataframe/reduction/all.py +78 -0
  214. maxframe/dataframe/reduction/any.py +78 -0
  215. maxframe/dataframe/reduction/core.py +837 -0
  216. maxframe/dataframe/reduction/count.py +59 -0
  217. maxframe/dataframe/reduction/cummax.py +30 -0
  218. maxframe/dataframe/reduction/cummin.py +30 -0
  219. maxframe/dataframe/reduction/cumprod.py +30 -0
  220. maxframe/dataframe/reduction/cumsum.py +30 -0
  221. maxframe/dataframe/reduction/custom_reduction.py +42 -0
  222. maxframe/dataframe/reduction/kurtosis.py +104 -0
  223. maxframe/dataframe/reduction/max.py +65 -0
  224. maxframe/dataframe/reduction/mean.py +61 -0
  225. maxframe/dataframe/reduction/min.py +65 -0
  226. maxframe/dataframe/reduction/nunique.py +141 -0
  227. maxframe/dataframe/reduction/prod.py +76 -0
  228. maxframe/dataframe/reduction/reduction_size.py +36 -0
  229. maxframe/dataframe/reduction/sem.py +69 -0
  230. maxframe/dataframe/reduction/skew.py +89 -0
  231. maxframe/dataframe/reduction/std.py +53 -0
  232. maxframe/dataframe/reduction/str_concat.py +48 -0
  233. maxframe/dataframe/reduction/sum.py +77 -0
  234. maxframe/dataframe/reduction/tests/__init__.py +13 -0
  235. maxframe/dataframe/reduction/tests/test_reduction.py +486 -0
  236. maxframe/dataframe/reduction/unique.py +90 -0
  237. maxframe/dataframe/reduction/var.py +72 -0
  238. maxframe/dataframe/sort/__init__.py +34 -0
  239. maxframe/dataframe/sort/core.py +36 -0
  240. maxframe/dataframe/sort/sort_index.py +153 -0
  241. maxframe/dataframe/sort/sort_values.py +311 -0
  242. maxframe/dataframe/sort/tests/__init__.py +13 -0
  243. maxframe/dataframe/sort/tests/test_sort.py +81 -0
  244. maxframe/dataframe/statistics/__init__.py +33 -0
  245. maxframe/dataframe/statistics/corr.py +280 -0
  246. maxframe/dataframe/statistics/quantile.py +341 -0
  247. maxframe/dataframe/statistics/tests/__init__.py +13 -0
  248. maxframe/dataframe/statistics/tests/test_statistics.py +82 -0
  249. maxframe/dataframe/tests/__init__.py +13 -0
  250. maxframe/dataframe/tests/test_initializer.py +29 -0
  251. maxframe/dataframe/tseries/__init__.py +13 -0
  252. maxframe/dataframe/tseries/tests/__init__.py +13 -0
  253. maxframe/dataframe/tseries/tests/test_tseries.py +30 -0
  254. maxframe/dataframe/tseries/to_datetime.py +297 -0
  255. maxframe/dataframe/ufunc/__init__.py +27 -0
  256. maxframe/dataframe/ufunc/tensor.py +54 -0
  257. maxframe/dataframe/ufunc/ufunc.py +52 -0
  258. maxframe/dataframe/utils.py +1267 -0
  259. maxframe/dataframe/window/__init__.py +29 -0
  260. maxframe/dataframe/window/aggregation.py +96 -0
  261. maxframe/dataframe/window/core.py +69 -0
  262. maxframe/dataframe/window/ewm.py +249 -0
  263. maxframe/dataframe/window/expanding.py +147 -0
  264. maxframe/dataframe/window/rolling.py +376 -0
  265. maxframe/dataframe/window/tests/__init__.py +13 -0
  266. maxframe/dataframe/window/tests/test_ewm.py +70 -0
  267. maxframe/dataframe/window/tests/test_expanding.py +66 -0
  268. maxframe/dataframe/window/tests/test_rolling.py +57 -0
  269. maxframe/env.py +33 -0
  270. maxframe/errors.py +21 -0
  271. maxframe/extension.py +81 -0
  272. maxframe/learn/__init__.py +17 -0
  273. maxframe/learn/contrib/__init__.py +17 -0
  274. maxframe/learn/contrib/pytorch/__init__.py +16 -0
  275. maxframe/learn/contrib/pytorch/run_function.py +110 -0
  276. maxframe/learn/contrib/pytorch/run_script.py +102 -0
  277. maxframe/learn/contrib/pytorch/tests/__init__.py +13 -0
  278. maxframe/learn/contrib/pytorch/tests/test_pytorch.py +42 -0
  279. maxframe/learn/contrib/utils.py +52 -0
  280. maxframe/learn/contrib/xgboost/__init__.py +26 -0
  281. maxframe/learn/contrib/xgboost/classifier.py +86 -0
  282. maxframe/learn/contrib/xgboost/core.py +156 -0
  283. maxframe/learn/contrib/xgboost/dmatrix.py +150 -0
  284. maxframe/learn/contrib/xgboost/predict.py +138 -0
  285. maxframe/learn/contrib/xgboost/regressor.py +78 -0
  286. maxframe/learn/contrib/xgboost/tests/__init__.py +13 -0
  287. maxframe/learn/contrib/xgboost/tests/test_core.py +43 -0
  288. maxframe/learn/contrib/xgboost/train.py +121 -0
  289. maxframe/learn/utils/__init__.py +15 -0
  290. maxframe/learn/utils/core.py +29 -0
  291. maxframe/lib/__init__.py +15 -0
  292. maxframe/lib/aio/__init__.py +27 -0
  293. maxframe/lib/aio/_runners.py +162 -0
  294. maxframe/lib/aio/_threads.py +35 -0
  295. maxframe/lib/aio/base.py +82 -0
  296. maxframe/lib/aio/file.py +85 -0
  297. maxframe/lib/aio/isolation.py +100 -0
  298. maxframe/lib/aio/lru.py +242 -0
  299. maxframe/lib/aio/parallelism.py +37 -0
  300. maxframe/lib/aio/tests/__init__.py +13 -0
  301. maxframe/lib/aio/tests/test_aio_file.py +55 -0
  302. maxframe/lib/compression.py +55 -0
  303. maxframe/lib/cython/__init__.py +13 -0
  304. maxframe/lib/cython/libcpp.pxd +30 -0
  305. maxframe/lib/filesystem/__init__.py +21 -0
  306. maxframe/lib/filesystem/_glob.py +173 -0
  307. maxframe/lib/filesystem/_oss_lib/__init__.py +13 -0
  308. maxframe/lib/filesystem/_oss_lib/common.py +198 -0
  309. maxframe/lib/filesystem/_oss_lib/glob.py +147 -0
  310. maxframe/lib/filesystem/_oss_lib/handle.py +156 -0
  311. maxframe/lib/filesystem/arrow.py +236 -0
  312. maxframe/lib/filesystem/base.py +263 -0
  313. maxframe/lib/filesystem/core.py +95 -0
  314. maxframe/lib/filesystem/fsmap.py +164 -0
  315. maxframe/lib/filesystem/hdfs.py +31 -0
  316. maxframe/lib/filesystem/local.py +112 -0
  317. maxframe/lib/filesystem/oss.py +157 -0
  318. maxframe/lib/filesystem/tests/__init__.py +13 -0
  319. maxframe/lib/filesystem/tests/test_filesystem.py +223 -0
  320. maxframe/lib/filesystem/tests/test_oss.py +182 -0
  321. maxframe/lib/functools_compat.py +81 -0
  322. maxframe/lib/mmh3.cpython-39-darwin.so +0 -0
  323. maxframe/lib/mmh3_src/MurmurHash3.cpp +339 -0
  324. maxframe/lib/mmh3_src/MurmurHash3.h +43 -0
  325. maxframe/lib/mmh3_src/mmh3module.cpp +387 -0
  326. maxframe/lib/sparse/__init__.py +861 -0
  327. maxframe/lib/sparse/array.py +1604 -0
  328. maxframe/lib/sparse/core.py +92 -0
  329. maxframe/lib/sparse/matrix.py +241 -0
  330. maxframe/lib/sparse/tests/__init__.py +15 -0
  331. maxframe/lib/sparse/tests/test_sparse.py +476 -0
  332. maxframe/lib/sparse/vector.py +150 -0
  333. maxframe/lib/tblib/LICENSE +20 -0
  334. maxframe/lib/tblib/__init__.py +327 -0
  335. maxframe/lib/tblib/cpython.py +83 -0
  336. maxframe/lib/tblib/decorators.py +44 -0
  337. maxframe/lib/tblib/pickling_support.py +90 -0
  338. maxframe/lib/tests/__init__.py +13 -0
  339. maxframe/lib/tests/test_wrapped_pickle.py +51 -0
  340. maxframe/lib/version.py +620 -0
  341. maxframe/lib/wrapped_pickle.py +139 -0
  342. maxframe/mixin.py +100 -0
  343. maxframe/odpsio/__init__.py +21 -0
  344. maxframe/odpsio/arrow.py +91 -0
  345. maxframe/odpsio/schema.py +364 -0
  346. maxframe/odpsio/tableio.py +322 -0
  347. maxframe/odpsio/tests/__init__.py +13 -0
  348. maxframe/odpsio/tests/test_arrow.py +88 -0
  349. maxframe/odpsio/tests/test_schema.py +297 -0
  350. maxframe/odpsio/tests/test_tableio.py +136 -0
  351. maxframe/odpsio/tests/test_volumeio.py +90 -0
  352. maxframe/odpsio/volumeio.py +95 -0
  353. maxframe/opcodes.py +590 -0
  354. maxframe/protocol.py +415 -0
  355. maxframe/remote/__init__.py +18 -0
  356. maxframe/remote/core.py +210 -0
  357. maxframe/remote/run_script.py +121 -0
  358. maxframe/serialization/__init__.py +26 -0
  359. maxframe/serialization/arrow.py +95 -0
  360. maxframe/serialization/core.cpython-39-darwin.so +0 -0
  361. maxframe/serialization/core.pxd +44 -0
  362. maxframe/serialization/core.pyi +61 -0
  363. maxframe/serialization/core.pyx +1094 -0
  364. maxframe/serialization/exception.py +86 -0
  365. maxframe/serialization/maxframe_objects.py +39 -0
  366. maxframe/serialization/numpy.py +91 -0
  367. maxframe/serialization/pandas.py +202 -0
  368. maxframe/serialization/scipy.py +71 -0
  369. maxframe/serialization/serializables/__init__.py +55 -0
  370. maxframe/serialization/serializables/core.py +262 -0
  371. maxframe/serialization/serializables/field.py +624 -0
  372. maxframe/serialization/serializables/field_type.py +589 -0
  373. maxframe/serialization/serializables/tests/__init__.py +13 -0
  374. maxframe/serialization/serializables/tests/test_field_type.py +121 -0
  375. maxframe/serialization/serializables/tests/test_serializable.py +250 -0
  376. maxframe/serialization/tests/__init__.py +13 -0
  377. maxframe/serialization/tests/test_serial.py +412 -0
  378. maxframe/session.py +1310 -0
  379. maxframe/tensor/__init__.py +183 -0
  380. maxframe/tensor/arithmetic/__init__.py +315 -0
  381. maxframe/tensor/arithmetic/abs.py +68 -0
  382. maxframe/tensor/arithmetic/absolute.py +68 -0
  383. maxframe/tensor/arithmetic/add.py +82 -0
  384. maxframe/tensor/arithmetic/angle.py +72 -0
  385. maxframe/tensor/arithmetic/arccos.py +104 -0
  386. maxframe/tensor/arithmetic/arccosh.py +91 -0
  387. maxframe/tensor/arithmetic/arcsin.py +94 -0
  388. maxframe/tensor/arithmetic/arcsinh.py +86 -0
  389. maxframe/tensor/arithmetic/arctan.py +106 -0
  390. maxframe/tensor/arithmetic/arctan2.py +128 -0
  391. maxframe/tensor/arithmetic/arctanh.py +86 -0
  392. maxframe/tensor/arithmetic/around.py +114 -0
  393. maxframe/tensor/arithmetic/bitand.py +95 -0
  394. maxframe/tensor/arithmetic/bitor.py +102 -0
  395. maxframe/tensor/arithmetic/bitxor.py +95 -0
  396. maxframe/tensor/arithmetic/cbrt.py +66 -0
  397. maxframe/tensor/arithmetic/ceil.py +71 -0
  398. maxframe/tensor/arithmetic/clip.py +165 -0
  399. maxframe/tensor/arithmetic/conj.py +74 -0
  400. maxframe/tensor/arithmetic/copysign.py +78 -0
  401. maxframe/tensor/arithmetic/core.py +544 -0
  402. maxframe/tensor/arithmetic/cos.py +85 -0
  403. maxframe/tensor/arithmetic/cosh.py +72 -0
  404. maxframe/tensor/arithmetic/deg2rad.py +72 -0
  405. maxframe/tensor/arithmetic/degrees.py +77 -0
  406. maxframe/tensor/arithmetic/divide.py +114 -0
  407. maxframe/tensor/arithmetic/equal.py +76 -0
  408. maxframe/tensor/arithmetic/exp.py +106 -0
  409. maxframe/tensor/arithmetic/exp2.py +67 -0
  410. maxframe/tensor/arithmetic/expm1.py +79 -0
  411. maxframe/tensor/arithmetic/fabs.py +74 -0
  412. maxframe/tensor/arithmetic/fix.py +69 -0
  413. maxframe/tensor/arithmetic/float_power.py +103 -0
  414. maxframe/tensor/arithmetic/floor.py +77 -0
  415. maxframe/tensor/arithmetic/floordiv.py +94 -0
  416. maxframe/tensor/arithmetic/fmax.py +105 -0
  417. maxframe/tensor/arithmetic/fmin.py +106 -0
  418. maxframe/tensor/arithmetic/fmod.py +99 -0
  419. maxframe/tensor/arithmetic/frexp.py +92 -0
  420. maxframe/tensor/arithmetic/greater.py +77 -0
  421. maxframe/tensor/arithmetic/greater_equal.py +69 -0
  422. maxframe/tensor/arithmetic/hypot.py +77 -0
  423. maxframe/tensor/arithmetic/i0.py +89 -0
  424. maxframe/tensor/arithmetic/imag.py +67 -0
  425. maxframe/tensor/arithmetic/invert.py +110 -0
  426. maxframe/tensor/arithmetic/isclose.py +115 -0
  427. maxframe/tensor/arithmetic/iscomplex.py +64 -0
  428. maxframe/tensor/arithmetic/isfinite.py +106 -0
  429. maxframe/tensor/arithmetic/isinf.py +103 -0
  430. maxframe/tensor/arithmetic/isnan.py +82 -0
  431. maxframe/tensor/arithmetic/isreal.py +63 -0
  432. maxframe/tensor/arithmetic/ldexp.py +99 -0
  433. maxframe/tensor/arithmetic/less.py +69 -0
  434. maxframe/tensor/arithmetic/less_equal.py +69 -0
  435. maxframe/tensor/arithmetic/log.py +92 -0
  436. maxframe/tensor/arithmetic/log10.py +85 -0
  437. maxframe/tensor/arithmetic/log1p.py +95 -0
  438. maxframe/tensor/arithmetic/log2.py +85 -0
  439. maxframe/tensor/arithmetic/logaddexp.py +80 -0
  440. maxframe/tensor/arithmetic/logaddexp2.py +78 -0
  441. maxframe/tensor/arithmetic/logical_and.py +81 -0
  442. maxframe/tensor/arithmetic/logical_not.py +74 -0
  443. maxframe/tensor/arithmetic/logical_or.py +82 -0
  444. maxframe/tensor/arithmetic/logical_xor.py +88 -0
  445. maxframe/tensor/arithmetic/lshift.py +82 -0
  446. maxframe/tensor/arithmetic/maximum.py +108 -0
  447. maxframe/tensor/arithmetic/minimum.py +108 -0
  448. maxframe/tensor/arithmetic/mod.py +104 -0
  449. maxframe/tensor/arithmetic/modf.py +83 -0
  450. maxframe/tensor/arithmetic/multiply.py +81 -0
  451. maxframe/tensor/arithmetic/nan_to_num.py +99 -0
  452. maxframe/tensor/arithmetic/negative.py +65 -0
  453. maxframe/tensor/arithmetic/nextafter.py +68 -0
  454. maxframe/tensor/arithmetic/not_equal.py +72 -0
  455. maxframe/tensor/arithmetic/positive.py +47 -0
  456. maxframe/tensor/arithmetic/power.py +106 -0
  457. maxframe/tensor/arithmetic/rad2deg.py +71 -0
  458. maxframe/tensor/arithmetic/radians.py +77 -0
  459. maxframe/tensor/arithmetic/real.py +70 -0
  460. maxframe/tensor/arithmetic/reciprocal.py +76 -0
  461. maxframe/tensor/arithmetic/rint.py +68 -0
  462. maxframe/tensor/arithmetic/rshift.py +81 -0
  463. maxframe/tensor/arithmetic/setimag.py +29 -0
  464. maxframe/tensor/arithmetic/setreal.py +29 -0
  465. maxframe/tensor/arithmetic/sign.py +81 -0
  466. maxframe/tensor/arithmetic/signbit.py +65 -0
  467. maxframe/tensor/arithmetic/sin.py +98 -0
  468. maxframe/tensor/arithmetic/sinc.py +102 -0
  469. maxframe/tensor/arithmetic/sinh.py +93 -0
  470. maxframe/tensor/arithmetic/spacing.py +72 -0
  471. maxframe/tensor/arithmetic/sqrt.py +81 -0
  472. maxframe/tensor/arithmetic/square.py +69 -0
  473. maxframe/tensor/arithmetic/subtract.py +81 -0
  474. maxframe/tensor/arithmetic/tan.py +88 -0
  475. maxframe/tensor/arithmetic/tanh.py +92 -0
  476. maxframe/tensor/arithmetic/tests/__init__.py +15 -0
  477. maxframe/tensor/arithmetic/tests/test_arithmetic.py +414 -0
  478. maxframe/tensor/arithmetic/truediv.py +104 -0
  479. maxframe/tensor/arithmetic/trunc.py +72 -0
  480. maxframe/tensor/arithmetic/utils.py +65 -0
  481. maxframe/tensor/array_utils.py +186 -0
  482. maxframe/tensor/base/__init__.py +34 -0
  483. maxframe/tensor/base/astype.py +119 -0
  484. maxframe/tensor/base/atleast_1d.py +74 -0
  485. maxframe/tensor/base/broadcast_to.py +89 -0
  486. maxframe/tensor/base/ravel.py +92 -0
  487. maxframe/tensor/base/tests/__init__.py +13 -0
  488. maxframe/tensor/base/tests/test_base.py +114 -0
  489. maxframe/tensor/base/transpose.py +125 -0
  490. maxframe/tensor/base/unique.py +205 -0
  491. maxframe/tensor/base/where.py +127 -0
  492. maxframe/tensor/core.py +724 -0
  493. maxframe/tensor/datasource/__init__.py +32 -0
  494. maxframe/tensor/datasource/arange.py +156 -0
  495. maxframe/tensor/datasource/array.py +415 -0
  496. maxframe/tensor/datasource/core.py +109 -0
  497. maxframe/tensor/datasource/empty.py +169 -0
  498. maxframe/tensor/datasource/from_dataframe.py +70 -0
  499. maxframe/tensor/datasource/from_dense.py +54 -0
  500. maxframe/tensor/datasource/from_sparse.py +47 -0
  501. maxframe/tensor/datasource/full.py +186 -0
  502. maxframe/tensor/datasource/ones.py +173 -0
  503. maxframe/tensor/datasource/scalar.py +40 -0
  504. maxframe/tensor/datasource/tests/__init__.py +13 -0
  505. maxframe/tensor/datasource/tests/test_datasource.py +278 -0
  506. maxframe/tensor/datasource/zeros.py +188 -0
  507. maxframe/tensor/fetch/__init__.py +15 -0
  508. maxframe/tensor/fetch/core.py +54 -0
  509. maxframe/tensor/indexing/__init__.py +47 -0
  510. maxframe/tensor/indexing/choose.py +196 -0
  511. maxframe/tensor/indexing/compress.py +124 -0
  512. maxframe/tensor/indexing/core.py +190 -0
  513. maxframe/tensor/indexing/extract.py +71 -0
  514. maxframe/tensor/indexing/fill_diagonal.py +183 -0
  515. maxframe/tensor/indexing/flatnonzero.py +60 -0
  516. maxframe/tensor/indexing/getitem.py +175 -0
  517. maxframe/tensor/indexing/nonzero.py +120 -0
  518. maxframe/tensor/indexing/setitem.py +132 -0
  519. maxframe/tensor/indexing/slice.py +29 -0
  520. maxframe/tensor/indexing/take.py +130 -0
  521. maxframe/tensor/indexing/tests/__init__.py +15 -0
  522. maxframe/tensor/indexing/tests/test_indexing.py +234 -0
  523. maxframe/tensor/indexing/unravel_index.py +103 -0
  524. maxframe/tensor/merge/__init__.py +15 -0
  525. maxframe/tensor/merge/stack.py +132 -0
  526. maxframe/tensor/merge/tests/__init__.py +13 -0
  527. maxframe/tensor/merge/tests/test_merge.py +52 -0
  528. maxframe/tensor/operators.py +123 -0
  529. maxframe/tensor/random/__init__.py +168 -0
  530. maxframe/tensor/random/beta.py +87 -0
  531. maxframe/tensor/random/binomial.py +137 -0
  532. maxframe/tensor/random/bytes.py +39 -0
  533. maxframe/tensor/random/chisquare.py +110 -0
  534. maxframe/tensor/random/choice.py +186 -0
  535. maxframe/tensor/random/core.py +234 -0
  536. maxframe/tensor/random/dirichlet.py +123 -0
  537. maxframe/tensor/random/exponential.py +94 -0
  538. maxframe/tensor/random/f.py +135 -0
  539. maxframe/tensor/random/gamma.py +128 -0
  540. maxframe/tensor/random/geometric.py +93 -0
  541. maxframe/tensor/random/gumbel.py +167 -0
  542. maxframe/tensor/random/hypergeometric.py +148 -0
  543. maxframe/tensor/random/laplace.py +133 -0
  544. maxframe/tensor/random/logistic.py +129 -0
  545. maxframe/tensor/random/lognormal.py +159 -0
  546. maxframe/tensor/random/logseries.py +122 -0
  547. maxframe/tensor/random/multinomial.py +133 -0
  548. maxframe/tensor/random/multivariate_normal.py +192 -0
  549. maxframe/tensor/random/negative_binomial.py +125 -0
  550. maxframe/tensor/random/noncentral_chisquare.py +132 -0
  551. maxframe/tensor/random/noncentral_f.py +126 -0
  552. maxframe/tensor/random/normal.py +143 -0
  553. maxframe/tensor/random/pareto.py +140 -0
  554. maxframe/tensor/random/permutation.py +104 -0
  555. maxframe/tensor/random/poisson.py +111 -0
  556. maxframe/tensor/random/power.py +142 -0
  557. maxframe/tensor/random/rand.py +82 -0
  558. maxframe/tensor/random/randint.py +121 -0
  559. maxframe/tensor/random/randn.py +96 -0
  560. maxframe/tensor/random/random_integers.py +123 -0
  561. maxframe/tensor/random/random_sample.py +86 -0
  562. maxframe/tensor/random/rayleigh.py +110 -0
  563. maxframe/tensor/random/shuffle.py +61 -0
  564. maxframe/tensor/random/standard_cauchy.py +105 -0
  565. maxframe/tensor/random/standard_exponential.py +72 -0
  566. maxframe/tensor/random/standard_gamma.py +120 -0
  567. maxframe/tensor/random/standard_normal.py +74 -0
  568. maxframe/tensor/random/standard_t.py +135 -0
  569. maxframe/tensor/random/tests/__init__.py +15 -0
  570. maxframe/tensor/random/tests/test_random.py +167 -0
  571. maxframe/tensor/random/triangular.py +119 -0
  572. maxframe/tensor/random/uniform.py +131 -0
  573. maxframe/tensor/random/vonmises.py +131 -0
  574. maxframe/tensor/random/wald.py +114 -0
  575. maxframe/tensor/random/weibull.py +140 -0
  576. maxframe/tensor/random/zipf.py +122 -0
  577. maxframe/tensor/rechunk/__init__.py +26 -0
  578. maxframe/tensor/rechunk/rechunk.py +43 -0
  579. maxframe/tensor/reduction/__init__.py +66 -0
  580. maxframe/tensor/reduction/all.py +103 -0
  581. maxframe/tensor/reduction/allclose.py +88 -0
  582. maxframe/tensor/reduction/any.py +105 -0
  583. maxframe/tensor/reduction/argmax.py +103 -0
  584. maxframe/tensor/reduction/argmin.py +103 -0
  585. maxframe/tensor/reduction/array_equal.py +64 -0
  586. maxframe/tensor/reduction/core.py +168 -0
  587. maxframe/tensor/reduction/count_nonzero.py +81 -0
  588. maxframe/tensor/reduction/cumprod.py +97 -0
  589. maxframe/tensor/reduction/cumsum.py +101 -0
  590. maxframe/tensor/reduction/max.py +120 -0
  591. maxframe/tensor/reduction/mean.py +123 -0
  592. maxframe/tensor/reduction/min.py +120 -0
  593. maxframe/tensor/reduction/nanargmax.py +82 -0
  594. maxframe/tensor/reduction/nanargmin.py +76 -0
  595. maxframe/tensor/reduction/nancumprod.py +91 -0
  596. maxframe/tensor/reduction/nancumsum.py +94 -0
  597. maxframe/tensor/reduction/nanmax.py +111 -0
  598. maxframe/tensor/reduction/nanmean.py +106 -0
  599. maxframe/tensor/reduction/nanmin.py +111 -0
  600. maxframe/tensor/reduction/nanprod.py +94 -0
  601. maxframe/tensor/reduction/nanstd.py +126 -0
  602. maxframe/tensor/reduction/nansum.py +115 -0
  603. maxframe/tensor/reduction/nanvar.py +149 -0
  604. maxframe/tensor/reduction/prod.py +130 -0
  605. maxframe/tensor/reduction/std.py +134 -0
  606. maxframe/tensor/reduction/sum.py +125 -0
  607. maxframe/tensor/reduction/tests/__init__.py +13 -0
  608. maxframe/tensor/reduction/tests/test_reduction.py +181 -0
  609. maxframe/tensor/reduction/var.py +176 -0
  610. maxframe/tensor/reshape/__init__.py +17 -0
  611. maxframe/tensor/reshape/reshape.py +188 -0
  612. maxframe/tensor/reshape/tests/__init__.py +15 -0
  613. maxframe/tensor/reshape/tests/test_reshape.py +37 -0
  614. maxframe/tensor/statistics/__init__.py +13 -0
  615. maxframe/tensor/statistics/percentile.py +175 -0
  616. maxframe/tensor/statistics/quantile.py +288 -0
  617. maxframe/tensor/ufunc/__init__.py +26 -0
  618. maxframe/tensor/ufunc/ufunc.py +200 -0
  619. maxframe/tensor/utils.py +718 -0
  620. maxframe/tests/__init__.py +13 -0
  621. maxframe/tests/test_codegen.py +69 -0
  622. maxframe/tests/test_protocol.py +144 -0
  623. maxframe/tests/test_utils.py +376 -0
  624. maxframe/tests/utils.py +164 -0
  625. maxframe/typing_.py +37 -0
  626. maxframe/udf.py +134 -0
  627. maxframe/utils.py +1114 -0
  628. maxframe-0.1.0b5.dist-info/METADATA +104 -0
  629. maxframe-0.1.0b5.dist-info/RECORD +647 -0
  630. maxframe-0.1.0b5.dist-info/WHEEL +5 -0
  631. maxframe-0.1.0b5.dist-info/top_level.txt +3 -0
  632. maxframe_client/__init__.py +17 -0
  633. maxframe_client/clients/__init__.py +13 -0
  634. maxframe_client/clients/framedriver.py +118 -0
  635. maxframe_client/clients/spe.py +104 -0
  636. maxframe_client/conftest.py +15 -0
  637. maxframe_client/fetcher.py +264 -0
  638. maxframe_client/session/__init__.py +22 -0
  639. maxframe_client/session/consts.py +36 -0
  640. maxframe_client/session/graph.py +119 -0
  641. maxframe_client/session/odps.py +482 -0
  642. maxframe_client/session/task.py +280 -0
  643. maxframe_client/session/tests/__init__.py +13 -0
  644. maxframe_client/session/tests/test_task.py +85 -0
  645. maxframe_client/tests/__init__.py +13 -0
  646. maxframe_client/tests/test_fetcher.py +89 -0
  647. maxframe_client/tests/test_session.py +255 -0
@@ -0,0 +1,486 @@
1
+ # Copyright 1999-2024 Alibaba Group Holding Ltd.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ import functools
16
+ import operator
17
+ from functools import reduce
18
+ from typing import NamedTuple
19
+
20
+ import numpy as np
21
+ import pandas as pd
22
+ import pytest
23
+
24
+ from .... import dataframe as md
25
+ from ....tensor import Tensor
26
+ from ...core import DataFrame, IndexValue, OutputType, Series
27
+ from ...datasource.dataframe import from_pandas as from_pandas_df
28
+ from ...datasource.series import from_pandas as from_pandas_series
29
+ from .. import (
30
+ CustomReduction,
31
+ DataFrameAll,
32
+ DataFrameAny,
33
+ DataFrameCount,
34
+ DataFrameCummax,
35
+ DataFrameCummin,
36
+ DataFrameCumprod,
37
+ DataFrameCumsum,
38
+ DataFrameKurtosis,
39
+ DataFrameMax,
40
+ DataFrameMean,
41
+ DataFrameMin,
42
+ DataFrameNunique,
43
+ DataFrameProd,
44
+ DataFrameSem,
45
+ DataFrameSkew,
46
+ DataFrameSum,
47
+ DataFrameVar,
48
+ )
49
+ from ..aggregation import where_function
50
+ from ..core import ReductionCompiler
51
+
52
+ pytestmark = pytest.mark.pd_compat
53
+
54
+
55
+ class FunctionOptions(NamedTuple):
56
+ has_skipna: bool = True
57
+ has_numeric_only: bool = True
58
+ has_bool_only: bool = False
59
+
60
+
61
+ reduction_functions = [
62
+ ("sum", DataFrameSum, FunctionOptions()),
63
+ ("prod", DataFrameProd, FunctionOptions()),
64
+ ("min", DataFrameMin, FunctionOptions()),
65
+ ("max", DataFrameMax, FunctionOptions()),
66
+ ("count", DataFrameCount, FunctionOptions(has_skipna=False)),
67
+ ("mean", DataFrameMean, FunctionOptions()),
68
+ ("var", DataFrameVar, FunctionOptions()),
69
+ ("skew", DataFrameSkew, FunctionOptions()),
70
+ ("kurt", DataFrameKurtosis, FunctionOptions()),
71
+ ("sem", DataFrameSem, FunctionOptions()),
72
+ ("all", DataFrameAll, FunctionOptions(has_numeric_only=False, has_bool_only=True)),
73
+ ("any", DataFrameAny, FunctionOptions(has_numeric_only=False, has_bool_only=True)),
74
+ ]
75
+
76
+
77
+ @pytest.mark.parametrize("func_name,op,func_opts", reduction_functions)
78
+ def test_series_reduction(func_name, op, func_opts: FunctionOptions):
79
+ data = pd.Series(range(20), index=[str(i) for i in range(20)])
80
+ series = getattr(from_pandas_series(data, chunk_size=3), func_name)()
81
+
82
+ assert isinstance(series, Tensor)
83
+ assert isinstance(series.op, op)
84
+ assert series.shape == ()
85
+
86
+ data = pd.Series(np.random.rand(25), name="a")
87
+ if func_opts.has_skipna:
88
+ kwargs = dict(axis="index", skipna=False)
89
+ else:
90
+ kwargs = dict()
91
+ series = getattr(from_pandas_series(data, chunk_size=7), func_name)(**kwargs)
92
+
93
+ assert isinstance(series, Tensor)
94
+ assert series.shape == ()
95
+
96
+
97
+ @pytest.mark.parametrize("func_name,op,func_opts", reduction_functions)
98
+ def test_dataframe_reduction(func_name, op, func_opts: FunctionOptions):
99
+ data = pd.DataFrame(
100
+ {"a": list(range(20)), "b": list(range(20, 0, -1))},
101
+ index=[str(i) for i in range(20)],
102
+ )
103
+ reduction_df = getattr(from_pandas_df(data, chunk_size=3), func_name)()
104
+
105
+ assert isinstance(reduction_df, Series)
106
+ assert isinstance(reduction_df.op, op)
107
+ assert isinstance(reduction_df.index_value._index_value, IndexValue.Index)
108
+ assert reduction_df.shape == (2,)
109
+
110
+ data = pd.DataFrame(np.random.rand(20, 10))
111
+ reduction_df = getattr(from_pandas_df(data, chunk_size=3), func_name)()
112
+
113
+ assert isinstance(reduction_df, Series)
114
+ assert isinstance(
115
+ reduction_df.index_value._index_value,
116
+ (IndexValue.RangeIndex, IndexValue.Int64Index),
117
+ )
118
+ assert reduction_df.shape == (10,)
119
+
120
+ data = pd.DataFrame(np.random.rand(20, 20), index=[str(i) for i in range(20)])
121
+ reduction_df = getattr(from_pandas_df(data, chunk_size=4), func_name)(
122
+ axis="columns"
123
+ )
124
+
125
+ assert reduction_df.shape == (20,)
126
+
127
+ with pytest.raises(NotImplementedError):
128
+ getattr(from_pandas_df(data, chunk_size=3), func_name)(level=0, axis=1)
129
+
130
+
131
+ cum_reduction_functions = [
132
+ ("cummin", DataFrameCummin, FunctionOptions()),
133
+ ("cummax", DataFrameCummax, FunctionOptions()),
134
+ ("cumprod", DataFrameCumprod, FunctionOptions()),
135
+ ("cumsum", DataFrameCumsum, FunctionOptions()),
136
+ ]
137
+
138
+
139
+ @pytest.mark.parametrize("func_name,op,func_opts", cum_reduction_functions)
140
+ def test_cum_series_reduction(func_name, op, func_opts: FunctionOptions):
141
+ data = pd.Series({"a": list(range(20))}, index=[str(i) for i in range(20)])
142
+ series = getattr(from_pandas_series(data, chunk_size=3), func_name)()
143
+
144
+ assert isinstance(series, Series)
145
+ assert series.shape == (20,)
146
+
147
+ data = pd.Series(np.random.rand(25), name="a")
148
+ if func_opts.has_skipna:
149
+ kwargs = dict(axis="index", skipna=False)
150
+ else:
151
+ kwargs = dict()
152
+ series = getattr(from_pandas_series(data, chunk_size=7), func_name)(**kwargs)
153
+
154
+ assert isinstance(series, Series)
155
+ assert series.shape == (25,)
156
+
157
+
158
+ @pytest.mark.parametrize("func_name,op,func_opts", cum_reduction_functions)
159
+ def test_cum_dataframe_reduction(func_name, op, func_opts: FunctionOptions):
160
+ data = pd.DataFrame(
161
+ {"a": list(range(20)), "b": list(range(20, 0, -1))},
162
+ index=[str(i) for i in range(20)],
163
+ )
164
+ reduction_df = getattr(from_pandas_df(data, chunk_size=3), func_name)()
165
+
166
+ assert isinstance(reduction_df, DataFrame)
167
+ assert isinstance(reduction_df.index_value._index_value, IndexValue.Index)
168
+ assert reduction_df.shape == (20, 2)
169
+
170
+ data = pd.DataFrame(np.random.rand(20, 10))
171
+ reduction_df = getattr(from_pandas_df(data, chunk_size=3), func_name)()
172
+
173
+ assert isinstance(reduction_df, DataFrame)
174
+ assert isinstance(reduction_df.index_value._index_value, IndexValue.RangeIndex)
175
+ assert reduction_df.shape == (20, 10)
176
+
177
+
178
+ def test_nunique():
179
+ data = pd.DataFrame(
180
+ np.random.randint(0, 6, size=(20, 10)),
181
+ columns=["c" + str(i) for i in range(10)],
182
+ )
183
+ df = from_pandas_df(data, chunk_size=3)
184
+ result = df.nunique()
185
+
186
+ assert result.shape == (10,)
187
+ assert result.op.output_types[0] == OutputType.series
188
+ assert isinstance(result.op, DataFrameNunique)
189
+
190
+ data2 = data.copy()
191
+ df2 = from_pandas_df(data2, chunk_size=3)
192
+ result2 = df2.nunique(axis=1)
193
+
194
+ assert result2.shape == (20,)
195
+ assert result2.op.output_types[0] == OutputType.series
196
+ assert isinstance(result2.op, DataFrameNunique)
197
+
198
+
199
+ def test_dataframe_aggregate():
200
+ data = pd.DataFrame(np.random.rand(20, 19))
201
+ agg_funcs = [
202
+ "sum",
203
+ "min",
204
+ "max",
205
+ "mean",
206
+ "var",
207
+ "std",
208
+ "all",
209
+ "any",
210
+ "skew",
211
+ "kurt",
212
+ "sem",
213
+ ]
214
+
215
+ df = from_pandas_df(data)
216
+ result = df.agg(agg_funcs)
217
+ assert result.shape == (len(agg_funcs), data.shape[1])
218
+ assert list(result.columns_value.to_pandas()) == list(range(19))
219
+ assert list(result.index_value.to_pandas()) == agg_funcs
220
+ assert result.op.output_types[0] == OutputType.dataframe
221
+ assert result.op.func == agg_funcs
222
+
223
+ df = from_pandas_df(data, chunk_size=(3, 4))
224
+
225
+ result = df.agg("sum")
226
+ assert result.shape == (data.shape[1],)
227
+ assert list(result.index_value.to_pandas()) == list(range(data.shape[1]))
228
+ assert result.op.output_types[0] == OutputType.series
229
+ assert result.op.func == ["sum"]
230
+
231
+ result = df.agg("sum", axis=1)
232
+ assert result.shape == (data.shape[0],)
233
+ assert list(result.index_value.to_pandas()) == list(range(data.shape[0]))
234
+ assert result.op.output_types[0] == OutputType.series
235
+
236
+ result = df.agg("var", axis=1)
237
+ assert result.shape == (data.shape[0],)
238
+ assert list(result.index_value.to_pandas()) == list(range(data.shape[0]))
239
+ assert result.op.output_types[0] == OutputType.series
240
+ assert result.op.func == ["var"]
241
+
242
+ result = df.agg(agg_funcs)
243
+ assert result.shape == (len(agg_funcs), data.shape[1])
244
+ assert list(result.columns_value.to_pandas()) == list(range(data.shape[1]))
245
+ assert list(result.index_value.to_pandas()) == agg_funcs
246
+ assert result.op.output_types[0] == OutputType.dataframe
247
+ assert result.op.func == agg_funcs
248
+
249
+ result = df.agg(agg_funcs, axis=1)
250
+ assert result.shape == (data.shape[0], len(agg_funcs))
251
+ assert list(result.columns_value.to_pandas()) == agg_funcs
252
+ assert list(result.index_value.to_pandas()) == list(range(data.shape[0]))
253
+ assert result.op.output_types[0] == OutputType.dataframe
254
+ assert result.op.func == agg_funcs
255
+
256
+ dict_fun = {0: "sum", 2: ["var", "max"], 9: ["mean", "var", "std"]}
257
+ all_cols = set(
258
+ reduce(
259
+ operator.add, [[v] if isinstance(v, str) else v for v in dict_fun.values()]
260
+ )
261
+ )
262
+ result = df.agg(dict_fun)
263
+ assert result.shape == (len(all_cols), len(dict_fun))
264
+ assert set(result.columns_value.to_pandas()) == set(dict_fun.keys())
265
+ assert set(result.index_value.to_pandas()) == all_cols
266
+ assert result.op.output_types[0] == OutputType.dataframe
267
+ assert result.op.func[0] == [dict_fun[0]]
268
+ assert result.op.func[2] == dict_fun[2]
269
+
270
+ with pytest.raises(TypeError):
271
+ df.agg(sum_0="sum", mean_0="mean")
272
+ with pytest.raises(NotImplementedError):
273
+ df.agg({0: ["sum", "min", "var"], 9: ["mean", "var", "std"]}, axis=1)
274
+
275
+
276
+ def test_series_aggregate():
277
+ data = pd.Series(np.random.rand(20), index=[str(i) for i in range(20)], name="a")
278
+ agg_funcs = [
279
+ "sum",
280
+ "min",
281
+ "max",
282
+ "mean",
283
+ "var",
284
+ "std",
285
+ "all",
286
+ "any",
287
+ "skew",
288
+ "kurt",
289
+ "sem",
290
+ ]
291
+
292
+ series = from_pandas_series(data)
293
+
294
+ result = series.agg(agg_funcs)
295
+ assert result.shape == (len(agg_funcs),)
296
+ assert list(result.index_value.to_pandas()) == agg_funcs
297
+ assert result.op.output_types[0] == OutputType.series
298
+ assert result.op.func == agg_funcs
299
+
300
+ series = from_pandas_series(data, chunk_size=3)
301
+
302
+ result = series.agg("sum")
303
+ assert result.shape == ()
304
+ assert result.op.output_types[0] == OutputType.scalar
305
+
306
+ result = series.agg(agg_funcs)
307
+ assert result.shape == (len(agg_funcs),)
308
+ assert list(result.index_value.to_pandas()) == agg_funcs
309
+ assert result.op.output_types[0] == OutputType.series
310
+ assert result.op.func == agg_funcs
311
+
312
+ with pytest.raises(TypeError):
313
+ series.agg(sum_0=(0, "sum"), mean_0=(0, "mean"))
314
+
315
+
316
+ def test_compile_function():
317
+ compiler = ReductionCompiler()
318
+ ms = md.Series([1, 2, 3])
319
+ # no MaxFrame objects inside closures
320
+ with pytest.raises(ValueError):
321
+ compiler.add_function(functools.partial(lambda x: (x + ms).sum()), ndim=2)
322
+ # function should return a MaxFrame object
323
+ with pytest.raises(ValueError):
324
+ compiler.add_function(lambda x: x is not None, ndim=2)
325
+ # function should perform some sort of reduction in dimensionality
326
+ with pytest.raises(ValueError):
327
+ compiler.add_function(lambda x: x, ndim=2)
328
+ # function should only contain acceptable operators
329
+ with pytest.raises(ValueError):
330
+ compiler.add_function(lambda x: x.sort_values().max(), ndim=1)
331
+ with pytest.raises(ValueError):
332
+ compiler.add_function(lambda x: x.max().shift(1), ndim=2)
333
+
334
+ # test agg for all data
335
+ for ndim in [1, 2]:
336
+ compiler = ReductionCompiler()
337
+ compiler.add_function(lambda x: (x**2).count() + 1, ndim=ndim)
338
+ result = compiler.compile()
339
+ # check pre_funcs
340
+ assert len(result.pre_funcs) == 1
341
+ assert b"pow" in result.pre_funcs[0].func_idl
342
+ # check agg_funcs
343
+ assert len(result.agg_funcs) == 1
344
+ assert result.agg_funcs[0].map_func_name == "count"
345
+ assert result.agg_funcs[0].agg_func_name == "sum"
346
+ # check post_funcs
347
+ assert len(result.post_funcs) == 1
348
+ assert result.post_funcs[0].func_name == "<lambda>"
349
+ assert b"add" in result.post_funcs[0].func_idl
350
+
351
+ compiler.add_function(
352
+ lambda x: -x.prod() ** 2 + (1 + (x**2).count()), ndim=ndim
353
+ )
354
+ result = compiler.compile()
355
+ # check pre_funcs
356
+ assert len(result.pre_funcs) == 2
357
+ assert (
358
+ b"pow" in result.pre_funcs[0].func_idl
359
+ or b"pow" in result.pre_funcs[1].func_idl
360
+ )
361
+ assert (
362
+ b"pow" not in result.pre_funcs[0].func_idl
363
+ or b"pow" not in result.pre_funcs[1].func_idl
364
+ )
365
+ # check agg_funcs
366
+ assert len(result.agg_funcs) == 2
367
+ assert set(result.agg_funcs[i].map_func_name for i in range(2)) == {
368
+ "count",
369
+ "prod",
370
+ }
371
+ assert set(result.agg_funcs[i].agg_func_name for i in range(2)) == {
372
+ "sum",
373
+ "prod",
374
+ }
375
+ # check post_funcs
376
+ assert len(result.post_funcs) == 2
377
+ assert result.post_funcs[0].func_name == "<lambda_0>"
378
+ assert b"add" in result.post_funcs[0].func_idl
379
+ assert b"add" in result.post_funcs[1].func_idl
380
+
381
+ compiler = ReductionCompiler()
382
+ compiler.add_function(
383
+ lambda x: where_function(x.all(), x.count(), 0), ndim=ndim
384
+ )
385
+ result = compiler.compile()
386
+ # check pre_funcs
387
+ assert len(result.pre_funcs) == 1
388
+ assert result.pre_funcs[0].input_key == result.pre_funcs[0].output_key
389
+ # check agg_funcs
390
+ assert len(result.agg_funcs) == 2
391
+ assert set(result.agg_funcs[i].map_func_name for i in range(2)) == {
392
+ "all",
393
+ "count",
394
+ }
395
+ assert set(result.agg_funcs[i].agg_func_name for i in range(2)) == {
396
+ "sum",
397
+ "all",
398
+ }
399
+ # check post_funcs
400
+ assert len(result.post_funcs) == 1
401
+ assert b"where" in result.post_funcs[0].func_idl
402
+
403
+ # check boolean expressions
404
+ compiler = ReductionCompiler()
405
+ compiler.add_function(lambda x: (x == "1").sum(), ndim=ndim)
406
+ result = compiler.compile()
407
+ # check pre_funcs
408
+ assert len(result.pre_funcs) == 1
409
+ assert b"eq" in result.pre_funcs[0].func_idl
410
+ # check agg_funcs
411
+ assert len(result.agg_funcs) == 1
412
+ assert result.agg_funcs[0].map_func_name == "sum"
413
+ assert result.agg_funcs[0].agg_func_name == "sum"
414
+
415
+ # test agg for specific columns
416
+ compiler = ReductionCompiler()
417
+ compiler.add_function(lambda x: 1 + x.sum(), ndim=2, cols=["a", "b"])
418
+ compiler.add_function(lambda x: -1 + x.sum(), ndim=2, cols=["b", "c"])
419
+ result = compiler.compile()
420
+ # check pre_funcs
421
+ assert len(result.pre_funcs) == 1
422
+ assert set(result.pre_funcs[0].columns) == set("abc")
423
+ # check agg_funcs
424
+ assert len(result.agg_funcs) == 1
425
+ assert result.agg_funcs[0].map_func_name == "sum"
426
+ assert result.agg_funcs[0].agg_func_name == "sum"
427
+ # check post_funcs
428
+ assert len(result.post_funcs) == 2
429
+ assert set("".join(sorted(result.post_funcs[i].columns)) for i in range(2)) == {
430
+ "ab",
431
+ "bc",
432
+ }
433
+
434
+ # test agg for multiple columns
435
+ compiler = ReductionCompiler()
436
+ compiler.add_function(lambda x: x.sum(), ndim=2, cols=["a"])
437
+ compiler.add_function(lambda x: x.sum(), ndim=2, cols=["b"])
438
+ compiler.add_function(lambda x: x.min(), ndim=2, cols=["c"])
439
+ result = compiler.compile()
440
+ # check pre_funcs
441
+ assert len(result.pre_funcs) == 1
442
+ assert set(result.pre_funcs[0].columns) == set("abc")
443
+ # check agg_funcs
444
+ assert len(result.agg_funcs) == 2
445
+ assert result.agg_funcs[0].map_func_name == "sum"
446
+ assert result.agg_funcs[0].agg_func_name == "sum"
447
+ # check post_funcs
448
+ assert len(result.post_funcs) == 2
449
+ assert set(result.post_funcs[0].columns) == set("ab")
450
+
451
+
452
+ def test_custom_aggregation():
453
+ class MockReduction1(CustomReduction):
454
+ def agg(self, v1):
455
+ return v1.sum()
456
+
457
+ class MockReduction2(CustomReduction):
458
+ def pre(self, value):
459
+ return value + 1, value**2
460
+
461
+ def agg(self, v1, v2):
462
+ return v1.sum(), v2.prod()
463
+
464
+ def post(self, v1, v2):
465
+ return v1 + v2
466
+
467
+ for ndim in [1, 2]:
468
+ compiler = ReductionCompiler()
469
+ compiler.add_function(MockReduction1(), ndim=ndim)
470
+ result = compiler.compile()
471
+ # check agg_funcs
472
+ assert len(result.agg_funcs) == 1
473
+ assert result.agg_funcs[0].map_func_name == "custom_reduction"
474
+ assert result.agg_funcs[0].agg_func_name == "custom_reduction"
475
+ assert isinstance(result.agg_funcs[0].custom_reduction, MockReduction1)
476
+ assert result.agg_funcs[0].output_limit == 1
477
+
478
+ compiler = ReductionCompiler()
479
+ compiler.add_function(MockReduction2(), ndim=ndim)
480
+ result = compiler.compile()
481
+ # check agg_funcs
482
+ assert len(result.agg_funcs) == 1
483
+ assert result.agg_funcs[0].map_func_name == "custom_reduction"
484
+ assert result.agg_funcs[0].agg_func_name == "custom_reduction"
485
+ assert isinstance(result.agg_funcs[0].custom_reduction, MockReduction2)
486
+ assert result.agg_funcs[0].output_limit == 2
@@ -0,0 +1,90 @@
1
+ # Copyright 1999-2024 Alibaba Group Holding Ltd.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+
16
+ import numpy as np
17
+ import pandas as pd
18
+
19
+ from ... import opcodes
20
+ from ...core import ENTITY_TYPE, OutputType
21
+ from ...tensor.core import TensorOrder
22
+ from ...utils import lazy_import
23
+ from ..initializer import Series as asseries
24
+ from .core import CustomReduction, DataFrameReductionMixin, DataFrameReductionOperator
25
+
26
+ cudf = lazy_import("cudf")
27
+
28
+
29
+ class UniqueReduction(CustomReduction):
30
+ def agg(self, data): # noqa: W0221 # pylint: disable=arguments-differ
31
+ xdf = cudf if self.is_gpu() else pd
32
+ # convert to series data
33
+ return xdf.Series(data.unique())
34
+
35
+ def post(self, data): # noqa: W0221 # pylint: disable=arguments-differ
36
+ return data.unique()
37
+
38
+
39
+ class DataFrameUnique(DataFrameReductionOperator, DataFrameReductionMixin):
40
+ _op_type_ = opcodes.UNIQUE
41
+ _func_name = "unique"
42
+
43
+ @classmethod
44
+ def get_reduction_callable(cls, op):
45
+ return UniqueReduction(name=cls._func_name, is_gpu=op.is_gpu())
46
+
47
+ def __call__(self, a):
48
+ if not isinstance(a, ENTITY_TYPE):
49
+ a = asseries(a)
50
+ self.output_types = [OutputType.tensor]
51
+ return self.new_tileables(
52
+ [a], shape=(np.nan,), dtype=a.dtype, order=TensorOrder.C_ORDER
53
+ )[0]
54
+
55
+
56
+ def unique(values, method="tree"):
57
+ """
58
+ Uniques are returned in order of appearance. This does NOT sort.
59
+
60
+ Parameters
61
+ ----------
62
+ values : 1d array-like
63
+ method : 'shuffle' or 'tree', 'tree' method provide a better performance, 'shuffle'
64
+ is recommended if the number of unique values is very large.
65
+ See Also
66
+ --------
67
+ Index.unique
68
+ Series.unique
69
+
70
+ Examples
71
+ --------
72
+ >>> import maxframe.dataframe as md
73
+ >>> import pandas as pd
74
+ >>> md.unique(md.Series([2, 1, 3, 3])).execute()
75
+ array([2, 1, 3])
76
+
77
+ >>> md.unique(md.Series([2] + [1] * 5)).execute()
78
+ array([2, 1])
79
+
80
+ >>> md.unique(md.Series([pd.Timestamp('20160101'),
81
+ ... pd.Timestamp('20160101')])).execute()
82
+ array(['2016-01-01T00:00:00.000000000'], dtype='datetime64[ns]')
83
+
84
+ >>> md.unique(md.Series([pd.Timestamp('20160101', tz='US/Eastern'),
85
+ ... pd.Timestamp('20160101', tz='US/Eastern')])).execute()
86
+ array([Timestamp('2016-01-01 00:00:00-0500', tz='US/Eastern')],
87
+ dtype=object)
88
+ """
89
+ op = DataFrameUnique(method=method)
90
+ return op(values)
@@ -0,0 +1,72 @@
1
+ # Copyright 1999-2024 Alibaba Group Holding Ltd.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ from ... import opcodes
16
+ from ...core import OutputType
17
+ from ...serialization.serializables import Int32Field
18
+ from .core import DataFrameReductionMixin, DataFrameReductionOperator
19
+
20
+
21
+ class DataFrameVar(DataFrameReductionOperator, DataFrameReductionMixin):
22
+ _op_type_ = opcodes.VAR
23
+ _func_name = "var"
24
+
25
+ ddof = Int32Field("ddof", default=None)
26
+
27
+ @classmethod
28
+ def get_reduction_callable(cls, op: "DataFrameVar"):
29
+ skipna, ddof = op.skipna, op.ddof
30
+
31
+ def var(x):
32
+ cnt = x.count()
33
+ if ddof == 0:
34
+ return (x**2).mean(skipna=skipna) - (x.mean(skipna=skipna)) ** 2
35
+ return ((x**2).sum(skipna=skipna) - x.sum(skipna=skipna) ** 2 / cnt) / (
36
+ cnt - ddof
37
+ )
38
+
39
+ return var
40
+
41
+
42
+ def var_series(series, axis=None, skipna=True, level=None, ddof=1, method=None):
43
+ op = DataFrameVar(
44
+ axis=axis,
45
+ skipna=skipna,
46
+ level=level,
47
+ ddof=ddof,
48
+ output_types=[OutputType.scalar],
49
+ method=method,
50
+ )
51
+ return op(series)
52
+
53
+
54
+ def var_dataframe(
55
+ df,
56
+ axis=None,
57
+ skipna=True,
58
+ level=None,
59
+ ddof=1,
60
+ numeric_only=None,
61
+ method=None,
62
+ ):
63
+ op = DataFrameVar(
64
+ axis=axis,
65
+ skipna=skipna,
66
+ level=level,
67
+ ddof=ddof,
68
+ numeric_only=numeric_only,
69
+ output_types=[OutputType.series],
70
+ method=method,
71
+ )
72
+ return op(df)
@@ -0,0 +1,34 @@
1
+ # Copyright 1999-2024 Alibaba Group Holding Ltd.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ from .sort_index import DataFrameSortIndex
16
+ from .sort_values import DataFrameSortValues
17
+
18
+
19
+ def _install():
20
+ from ..core import DATAFRAME_TYPE, SERIES_TYPE
21
+ from .sort_index import sort_index
22
+ from .sort_values import dataframe_sort_values, series_sort_values
23
+
24
+ for cls in DATAFRAME_TYPE:
25
+ setattr(cls, "sort_values", dataframe_sort_values)
26
+ setattr(cls, "sort_index", sort_index)
27
+
28
+ for cls in SERIES_TYPE:
29
+ setattr(cls, "sort_values", series_sort_values)
30
+ setattr(cls, "sort_index", sort_index)
31
+
32
+
33
+ _install()
34
+ del _install