maxframe 0.1.0b5__cp38-cp38-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of maxframe might be problematic. Click here for more details.

Files changed (647) hide show
  1. maxframe/__init__.py +32 -0
  2. maxframe/_utils.cpython-38-darwin.so +0 -0
  3. maxframe/_utils.pxd +33 -0
  4. maxframe/_utils.pyx +547 -0
  5. maxframe/codegen.py +528 -0
  6. maxframe/config/__init__.py +15 -0
  7. maxframe/config/config.py +443 -0
  8. maxframe/config/tests/__init__.py +13 -0
  9. maxframe/config/tests/test_config.py +103 -0
  10. maxframe/config/tests/test_validators.py +34 -0
  11. maxframe/config/validators.py +57 -0
  12. maxframe/conftest.py +139 -0
  13. maxframe/core/__init__.py +65 -0
  14. maxframe/core/base.py +156 -0
  15. maxframe/core/entity/__init__.py +44 -0
  16. maxframe/core/entity/chunks.py +68 -0
  17. maxframe/core/entity/core.py +152 -0
  18. maxframe/core/entity/executable.py +337 -0
  19. maxframe/core/entity/fuse.py +73 -0
  20. maxframe/core/entity/objects.py +100 -0
  21. maxframe/core/entity/output_types.py +90 -0
  22. maxframe/core/entity/tileables.py +438 -0
  23. maxframe/core/entity/utils.py +24 -0
  24. maxframe/core/graph/__init__.py +17 -0
  25. maxframe/core/graph/builder/__init__.py +16 -0
  26. maxframe/core/graph/builder/base.py +86 -0
  27. maxframe/core/graph/builder/chunk.py +430 -0
  28. maxframe/core/graph/builder/tileable.py +34 -0
  29. maxframe/core/graph/builder/utils.py +41 -0
  30. maxframe/core/graph/core.cpython-38-darwin.so +0 -0
  31. maxframe/core/graph/core.pyx +467 -0
  32. maxframe/core/graph/entity.py +171 -0
  33. maxframe/core/graph/tests/__init__.py +13 -0
  34. maxframe/core/graph/tests/test_graph.py +205 -0
  35. maxframe/core/mode.py +96 -0
  36. maxframe/core/operator/__init__.py +34 -0
  37. maxframe/core/operator/base.py +450 -0
  38. maxframe/core/operator/core.py +276 -0
  39. maxframe/core/operator/fetch.py +53 -0
  40. maxframe/core/operator/fuse.py +29 -0
  41. maxframe/core/operator/objects.py +72 -0
  42. maxframe/core/operator/shuffle.py +111 -0
  43. maxframe/core/operator/tests/__init__.py +13 -0
  44. maxframe/core/operator/tests/test_core.py +64 -0
  45. maxframe/core/tests/__init__.py +13 -0
  46. maxframe/core/tests/test_mode.py +75 -0
  47. maxframe/dataframe/__init__.py +81 -0
  48. maxframe/dataframe/arithmetic/__init__.py +359 -0
  49. maxframe/dataframe/arithmetic/abs.py +33 -0
  50. maxframe/dataframe/arithmetic/add.py +60 -0
  51. maxframe/dataframe/arithmetic/arccos.py +28 -0
  52. maxframe/dataframe/arithmetic/arccosh.py +28 -0
  53. maxframe/dataframe/arithmetic/arcsin.py +28 -0
  54. maxframe/dataframe/arithmetic/arcsinh.py +28 -0
  55. maxframe/dataframe/arithmetic/arctan.py +28 -0
  56. maxframe/dataframe/arithmetic/arctanh.py +28 -0
  57. maxframe/dataframe/arithmetic/around.py +152 -0
  58. maxframe/dataframe/arithmetic/bitwise_and.py +46 -0
  59. maxframe/dataframe/arithmetic/bitwise_or.py +50 -0
  60. maxframe/dataframe/arithmetic/bitwise_xor.py +46 -0
  61. maxframe/dataframe/arithmetic/ceil.py +28 -0
  62. maxframe/dataframe/arithmetic/core.py +342 -0
  63. maxframe/dataframe/arithmetic/cos.py +28 -0
  64. maxframe/dataframe/arithmetic/cosh.py +28 -0
  65. maxframe/dataframe/arithmetic/degrees.py +28 -0
  66. maxframe/dataframe/arithmetic/docstring.py +442 -0
  67. maxframe/dataframe/arithmetic/equal.py +56 -0
  68. maxframe/dataframe/arithmetic/exp.py +28 -0
  69. maxframe/dataframe/arithmetic/exp2.py +28 -0
  70. maxframe/dataframe/arithmetic/expm1.py +28 -0
  71. maxframe/dataframe/arithmetic/floor.py +28 -0
  72. maxframe/dataframe/arithmetic/floordiv.py +64 -0
  73. maxframe/dataframe/arithmetic/greater.py +57 -0
  74. maxframe/dataframe/arithmetic/greater_equal.py +57 -0
  75. maxframe/dataframe/arithmetic/invert.py +33 -0
  76. maxframe/dataframe/arithmetic/is_ufuncs.py +62 -0
  77. maxframe/dataframe/arithmetic/less.py +57 -0
  78. maxframe/dataframe/arithmetic/less_equal.py +57 -0
  79. maxframe/dataframe/arithmetic/log.py +28 -0
  80. maxframe/dataframe/arithmetic/log10.py +28 -0
  81. maxframe/dataframe/arithmetic/log2.py +28 -0
  82. maxframe/dataframe/arithmetic/mod.py +60 -0
  83. maxframe/dataframe/arithmetic/multiply.py +60 -0
  84. maxframe/dataframe/arithmetic/negative.py +33 -0
  85. maxframe/dataframe/arithmetic/not_equal.py +56 -0
  86. maxframe/dataframe/arithmetic/power.py +68 -0
  87. maxframe/dataframe/arithmetic/radians.py +28 -0
  88. maxframe/dataframe/arithmetic/sin.py +28 -0
  89. maxframe/dataframe/arithmetic/sinh.py +28 -0
  90. maxframe/dataframe/arithmetic/sqrt.py +28 -0
  91. maxframe/dataframe/arithmetic/subtract.py +64 -0
  92. maxframe/dataframe/arithmetic/tan.py +28 -0
  93. maxframe/dataframe/arithmetic/tanh.py +28 -0
  94. maxframe/dataframe/arithmetic/tests/__init__.py +13 -0
  95. maxframe/dataframe/arithmetic/tests/test_arithmetic.py +695 -0
  96. maxframe/dataframe/arithmetic/truediv.py +64 -0
  97. maxframe/dataframe/arithmetic/trunc.py +28 -0
  98. maxframe/dataframe/arrays.py +864 -0
  99. maxframe/dataframe/core.py +2417 -0
  100. maxframe/dataframe/datasource/__init__.py +15 -0
  101. maxframe/dataframe/datasource/core.py +81 -0
  102. maxframe/dataframe/datasource/dataframe.py +59 -0
  103. maxframe/dataframe/datasource/date_range.py +504 -0
  104. maxframe/dataframe/datasource/from_index.py +54 -0
  105. maxframe/dataframe/datasource/from_records.py +107 -0
  106. maxframe/dataframe/datasource/from_tensor.py +419 -0
  107. maxframe/dataframe/datasource/index.py +117 -0
  108. maxframe/dataframe/datasource/read_csv.py +528 -0
  109. maxframe/dataframe/datasource/read_odps_query.py +299 -0
  110. maxframe/dataframe/datasource/read_odps_table.py +253 -0
  111. maxframe/dataframe/datasource/read_parquet.py +421 -0
  112. maxframe/dataframe/datasource/series.py +55 -0
  113. maxframe/dataframe/datasource/tests/__init__.py +13 -0
  114. maxframe/dataframe/datasource/tests/test_datasource.py +401 -0
  115. maxframe/dataframe/datastore/__init__.py +26 -0
  116. maxframe/dataframe/datastore/core.py +19 -0
  117. maxframe/dataframe/datastore/to_csv.py +227 -0
  118. maxframe/dataframe/datastore/to_odps.py +162 -0
  119. maxframe/dataframe/extensions/__init__.py +41 -0
  120. maxframe/dataframe/extensions/accessor.py +50 -0
  121. maxframe/dataframe/extensions/reshuffle.py +83 -0
  122. maxframe/dataframe/extensions/tests/__init__.py +13 -0
  123. maxframe/dataframe/extensions/tests/test_extensions.py +38 -0
  124. maxframe/dataframe/fetch/__init__.py +15 -0
  125. maxframe/dataframe/fetch/core.py +86 -0
  126. maxframe/dataframe/groupby/__init__.py +82 -0
  127. maxframe/dataframe/groupby/aggregation.py +350 -0
  128. maxframe/dataframe/groupby/apply.py +251 -0
  129. maxframe/dataframe/groupby/core.py +179 -0
  130. maxframe/dataframe/groupby/cum.py +124 -0
  131. maxframe/dataframe/groupby/fill.py +141 -0
  132. maxframe/dataframe/groupby/getitem.py +92 -0
  133. maxframe/dataframe/groupby/head.py +105 -0
  134. maxframe/dataframe/groupby/sample.py +214 -0
  135. maxframe/dataframe/groupby/tests/__init__.py +13 -0
  136. maxframe/dataframe/groupby/tests/test_groupby.py +374 -0
  137. maxframe/dataframe/groupby/transform.py +255 -0
  138. maxframe/dataframe/indexing/__init__.py +84 -0
  139. maxframe/dataframe/indexing/add_prefix_suffix.py +110 -0
  140. maxframe/dataframe/indexing/align.py +349 -0
  141. maxframe/dataframe/indexing/at.py +83 -0
  142. maxframe/dataframe/indexing/getitem.py +204 -0
  143. maxframe/dataframe/indexing/iat.py +37 -0
  144. maxframe/dataframe/indexing/iloc.py +566 -0
  145. maxframe/dataframe/indexing/insert.py +86 -0
  146. maxframe/dataframe/indexing/loc.py +411 -0
  147. maxframe/dataframe/indexing/reindex.py +526 -0
  148. maxframe/dataframe/indexing/rename.py +462 -0
  149. maxframe/dataframe/indexing/rename_axis.py +209 -0
  150. maxframe/dataframe/indexing/reset_index.py +402 -0
  151. maxframe/dataframe/indexing/sample.py +221 -0
  152. maxframe/dataframe/indexing/set_axis.py +194 -0
  153. maxframe/dataframe/indexing/set_index.py +61 -0
  154. maxframe/dataframe/indexing/setitem.py +130 -0
  155. maxframe/dataframe/indexing/tests/__init__.py +13 -0
  156. maxframe/dataframe/indexing/tests/test_indexing.py +488 -0
  157. maxframe/dataframe/indexing/where.py +308 -0
  158. maxframe/dataframe/initializer.py +288 -0
  159. maxframe/dataframe/merge/__init__.py +32 -0
  160. maxframe/dataframe/merge/append.py +121 -0
  161. maxframe/dataframe/merge/concat.py +325 -0
  162. maxframe/dataframe/merge/merge.py +593 -0
  163. maxframe/dataframe/merge/tests/__init__.py +13 -0
  164. maxframe/dataframe/merge/tests/test_merge.py +215 -0
  165. maxframe/dataframe/misc/__init__.py +134 -0
  166. maxframe/dataframe/misc/_duplicate.py +46 -0
  167. maxframe/dataframe/misc/accessor.py +276 -0
  168. maxframe/dataframe/misc/apply.py +692 -0
  169. maxframe/dataframe/misc/astype.py +236 -0
  170. maxframe/dataframe/misc/case_when.py +141 -0
  171. maxframe/dataframe/misc/check_monotonic.py +84 -0
  172. maxframe/dataframe/misc/cut.py +383 -0
  173. maxframe/dataframe/misc/datetimes.py +79 -0
  174. maxframe/dataframe/misc/describe.py +108 -0
  175. maxframe/dataframe/misc/diff.py +210 -0
  176. maxframe/dataframe/misc/drop.py +440 -0
  177. maxframe/dataframe/misc/drop_duplicates.py +248 -0
  178. maxframe/dataframe/misc/duplicated.py +292 -0
  179. maxframe/dataframe/misc/eval.py +728 -0
  180. maxframe/dataframe/misc/explode.py +171 -0
  181. maxframe/dataframe/misc/get_dummies.py +208 -0
  182. maxframe/dataframe/misc/isin.py +217 -0
  183. maxframe/dataframe/misc/map.py +236 -0
  184. maxframe/dataframe/misc/melt.py +162 -0
  185. maxframe/dataframe/misc/memory_usage.py +248 -0
  186. maxframe/dataframe/misc/pct_change.py +150 -0
  187. maxframe/dataframe/misc/pivot_table.py +262 -0
  188. maxframe/dataframe/misc/qcut.py +104 -0
  189. maxframe/dataframe/misc/select_dtypes.py +104 -0
  190. maxframe/dataframe/misc/shift.py +256 -0
  191. maxframe/dataframe/misc/stack.py +238 -0
  192. maxframe/dataframe/misc/string_.py +221 -0
  193. maxframe/dataframe/misc/tests/__init__.py +13 -0
  194. maxframe/dataframe/misc/tests/test_misc.py +468 -0
  195. maxframe/dataframe/misc/to_numeric.py +178 -0
  196. maxframe/dataframe/misc/transform.py +361 -0
  197. maxframe/dataframe/misc/transpose.py +136 -0
  198. maxframe/dataframe/misc/value_counts.py +182 -0
  199. maxframe/dataframe/missing/__init__.py +53 -0
  200. maxframe/dataframe/missing/checkna.py +223 -0
  201. maxframe/dataframe/missing/dropna.py +280 -0
  202. maxframe/dataframe/missing/fillna.py +275 -0
  203. maxframe/dataframe/missing/replace.py +439 -0
  204. maxframe/dataframe/missing/tests/__init__.py +13 -0
  205. maxframe/dataframe/missing/tests/test_missing.py +89 -0
  206. maxframe/dataframe/operators.py +273 -0
  207. maxframe/dataframe/plotting/__init__.py +40 -0
  208. maxframe/dataframe/plotting/core.py +78 -0
  209. maxframe/dataframe/plotting/tests/__init__.py +13 -0
  210. maxframe/dataframe/plotting/tests/test_plotting.py +136 -0
  211. maxframe/dataframe/reduction/__init__.py +107 -0
  212. maxframe/dataframe/reduction/aggregation.py +344 -0
  213. maxframe/dataframe/reduction/all.py +78 -0
  214. maxframe/dataframe/reduction/any.py +78 -0
  215. maxframe/dataframe/reduction/core.py +837 -0
  216. maxframe/dataframe/reduction/count.py +59 -0
  217. maxframe/dataframe/reduction/cummax.py +30 -0
  218. maxframe/dataframe/reduction/cummin.py +30 -0
  219. maxframe/dataframe/reduction/cumprod.py +30 -0
  220. maxframe/dataframe/reduction/cumsum.py +30 -0
  221. maxframe/dataframe/reduction/custom_reduction.py +42 -0
  222. maxframe/dataframe/reduction/kurtosis.py +104 -0
  223. maxframe/dataframe/reduction/max.py +65 -0
  224. maxframe/dataframe/reduction/mean.py +61 -0
  225. maxframe/dataframe/reduction/min.py +65 -0
  226. maxframe/dataframe/reduction/nunique.py +141 -0
  227. maxframe/dataframe/reduction/prod.py +76 -0
  228. maxframe/dataframe/reduction/reduction_size.py +36 -0
  229. maxframe/dataframe/reduction/sem.py +69 -0
  230. maxframe/dataframe/reduction/skew.py +89 -0
  231. maxframe/dataframe/reduction/std.py +53 -0
  232. maxframe/dataframe/reduction/str_concat.py +48 -0
  233. maxframe/dataframe/reduction/sum.py +77 -0
  234. maxframe/dataframe/reduction/tests/__init__.py +13 -0
  235. maxframe/dataframe/reduction/tests/test_reduction.py +486 -0
  236. maxframe/dataframe/reduction/unique.py +90 -0
  237. maxframe/dataframe/reduction/var.py +72 -0
  238. maxframe/dataframe/sort/__init__.py +34 -0
  239. maxframe/dataframe/sort/core.py +36 -0
  240. maxframe/dataframe/sort/sort_index.py +153 -0
  241. maxframe/dataframe/sort/sort_values.py +311 -0
  242. maxframe/dataframe/sort/tests/__init__.py +13 -0
  243. maxframe/dataframe/sort/tests/test_sort.py +81 -0
  244. maxframe/dataframe/statistics/__init__.py +33 -0
  245. maxframe/dataframe/statistics/corr.py +280 -0
  246. maxframe/dataframe/statistics/quantile.py +341 -0
  247. maxframe/dataframe/statistics/tests/__init__.py +13 -0
  248. maxframe/dataframe/statistics/tests/test_statistics.py +82 -0
  249. maxframe/dataframe/tests/__init__.py +13 -0
  250. maxframe/dataframe/tests/test_initializer.py +29 -0
  251. maxframe/dataframe/tseries/__init__.py +13 -0
  252. maxframe/dataframe/tseries/tests/__init__.py +13 -0
  253. maxframe/dataframe/tseries/tests/test_tseries.py +30 -0
  254. maxframe/dataframe/tseries/to_datetime.py +297 -0
  255. maxframe/dataframe/ufunc/__init__.py +27 -0
  256. maxframe/dataframe/ufunc/tensor.py +54 -0
  257. maxframe/dataframe/ufunc/ufunc.py +52 -0
  258. maxframe/dataframe/utils.py +1267 -0
  259. maxframe/dataframe/window/__init__.py +29 -0
  260. maxframe/dataframe/window/aggregation.py +96 -0
  261. maxframe/dataframe/window/core.py +69 -0
  262. maxframe/dataframe/window/ewm.py +249 -0
  263. maxframe/dataframe/window/expanding.py +147 -0
  264. maxframe/dataframe/window/rolling.py +376 -0
  265. maxframe/dataframe/window/tests/__init__.py +13 -0
  266. maxframe/dataframe/window/tests/test_ewm.py +70 -0
  267. maxframe/dataframe/window/tests/test_expanding.py +66 -0
  268. maxframe/dataframe/window/tests/test_rolling.py +57 -0
  269. maxframe/env.py +33 -0
  270. maxframe/errors.py +21 -0
  271. maxframe/extension.py +81 -0
  272. maxframe/learn/__init__.py +17 -0
  273. maxframe/learn/contrib/__init__.py +17 -0
  274. maxframe/learn/contrib/pytorch/__init__.py +16 -0
  275. maxframe/learn/contrib/pytorch/run_function.py +110 -0
  276. maxframe/learn/contrib/pytorch/run_script.py +102 -0
  277. maxframe/learn/contrib/pytorch/tests/__init__.py +13 -0
  278. maxframe/learn/contrib/pytorch/tests/test_pytorch.py +42 -0
  279. maxframe/learn/contrib/utils.py +52 -0
  280. maxframe/learn/contrib/xgboost/__init__.py +26 -0
  281. maxframe/learn/contrib/xgboost/classifier.py +86 -0
  282. maxframe/learn/contrib/xgboost/core.py +156 -0
  283. maxframe/learn/contrib/xgboost/dmatrix.py +150 -0
  284. maxframe/learn/contrib/xgboost/predict.py +138 -0
  285. maxframe/learn/contrib/xgboost/regressor.py +78 -0
  286. maxframe/learn/contrib/xgboost/tests/__init__.py +13 -0
  287. maxframe/learn/contrib/xgboost/tests/test_core.py +43 -0
  288. maxframe/learn/contrib/xgboost/train.py +121 -0
  289. maxframe/learn/utils/__init__.py +15 -0
  290. maxframe/learn/utils/core.py +29 -0
  291. maxframe/lib/__init__.py +15 -0
  292. maxframe/lib/aio/__init__.py +27 -0
  293. maxframe/lib/aio/_runners.py +162 -0
  294. maxframe/lib/aio/_threads.py +35 -0
  295. maxframe/lib/aio/base.py +82 -0
  296. maxframe/lib/aio/file.py +85 -0
  297. maxframe/lib/aio/isolation.py +100 -0
  298. maxframe/lib/aio/lru.py +242 -0
  299. maxframe/lib/aio/parallelism.py +37 -0
  300. maxframe/lib/aio/tests/__init__.py +13 -0
  301. maxframe/lib/aio/tests/test_aio_file.py +55 -0
  302. maxframe/lib/compression.py +55 -0
  303. maxframe/lib/cython/__init__.py +13 -0
  304. maxframe/lib/cython/libcpp.pxd +30 -0
  305. maxframe/lib/filesystem/__init__.py +21 -0
  306. maxframe/lib/filesystem/_glob.py +173 -0
  307. maxframe/lib/filesystem/_oss_lib/__init__.py +13 -0
  308. maxframe/lib/filesystem/_oss_lib/common.py +198 -0
  309. maxframe/lib/filesystem/_oss_lib/glob.py +147 -0
  310. maxframe/lib/filesystem/_oss_lib/handle.py +156 -0
  311. maxframe/lib/filesystem/arrow.py +236 -0
  312. maxframe/lib/filesystem/base.py +263 -0
  313. maxframe/lib/filesystem/core.py +95 -0
  314. maxframe/lib/filesystem/fsmap.py +164 -0
  315. maxframe/lib/filesystem/hdfs.py +31 -0
  316. maxframe/lib/filesystem/local.py +112 -0
  317. maxframe/lib/filesystem/oss.py +157 -0
  318. maxframe/lib/filesystem/tests/__init__.py +13 -0
  319. maxframe/lib/filesystem/tests/test_filesystem.py +223 -0
  320. maxframe/lib/filesystem/tests/test_oss.py +182 -0
  321. maxframe/lib/functools_compat.py +81 -0
  322. maxframe/lib/mmh3.cpython-38-darwin.so +0 -0
  323. maxframe/lib/mmh3_src/MurmurHash3.cpp +339 -0
  324. maxframe/lib/mmh3_src/MurmurHash3.h +43 -0
  325. maxframe/lib/mmh3_src/mmh3module.cpp +387 -0
  326. maxframe/lib/sparse/__init__.py +861 -0
  327. maxframe/lib/sparse/array.py +1604 -0
  328. maxframe/lib/sparse/core.py +92 -0
  329. maxframe/lib/sparse/matrix.py +241 -0
  330. maxframe/lib/sparse/tests/__init__.py +15 -0
  331. maxframe/lib/sparse/tests/test_sparse.py +476 -0
  332. maxframe/lib/sparse/vector.py +150 -0
  333. maxframe/lib/tblib/LICENSE +20 -0
  334. maxframe/lib/tblib/__init__.py +327 -0
  335. maxframe/lib/tblib/cpython.py +83 -0
  336. maxframe/lib/tblib/decorators.py +44 -0
  337. maxframe/lib/tblib/pickling_support.py +90 -0
  338. maxframe/lib/tests/__init__.py +13 -0
  339. maxframe/lib/tests/test_wrapped_pickle.py +51 -0
  340. maxframe/lib/version.py +620 -0
  341. maxframe/lib/wrapped_pickle.py +139 -0
  342. maxframe/mixin.py +100 -0
  343. maxframe/odpsio/__init__.py +21 -0
  344. maxframe/odpsio/arrow.py +91 -0
  345. maxframe/odpsio/schema.py +364 -0
  346. maxframe/odpsio/tableio.py +322 -0
  347. maxframe/odpsio/tests/__init__.py +13 -0
  348. maxframe/odpsio/tests/test_arrow.py +88 -0
  349. maxframe/odpsio/tests/test_schema.py +297 -0
  350. maxframe/odpsio/tests/test_tableio.py +136 -0
  351. maxframe/odpsio/tests/test_volumeio.py +90 -0
  352. maxframe/odpsio/volumeio.py +95 -0
  353. maxframe/opcodes.py +590 -0
  354. maxframe/protocol.py +415 -0
  355. maxframe/remote/__init__.py +18 -0
  356. maxframe/remote/core.py +210 -0
  357. maxframe/remote/run_script.py +121 -0
  358. maxframe/serialization/__init__.py +26 -0
  359. maxframe/serialization/arrow.py +95 -0
  360. maxframe/serialization/core.cpython-38-darwin.so +0 -0
  361. maxframe/serialization/core.pxd +44 -0
  362. maxframe/serialization/core.pyi +61 -0
  363. maxframe/serialization/core.pyx +1094 -0
  364. maxframe/serialization/exception.py +86 -0
  365. maxframe/serialization/maxframe_objects.py +39 -0
  366. maxframe/serialization/numpy.py +91 -0
  367. maxframe/serialization/pandas.py +202 -0
  368. maxframe/serialization/scipy.py +71 -0
  369. maxframe/serialization/serializables/__init__.py +55 -0
  370. maxframe/serialization/serializables/core.py +262 -0
  371. maxframe/serialization/serializables/field.py +624 -0
  372. maxframe/serialization/serializables/field_type.py +589 -0
  373. maxframe/serialization/serializables/tests/__init__.py +13 -0
  374. maxframe/serialization/serializables/tests/test_field_type.py +121 -0
  375. maxframe/serialization/serializables/tests/test_serializable.py +250 -0
  376. maxframe/serialization/tests/__init__.py +13 -0
  377. maxframe/serialization/tests/test_serial.py +412 -0
  378. maxframe/session.py +1310 -0
  379. maxframe/tensor/__init__.py +183 -0
  380. maxframe/tensor/arithmetic/__init__.py +315 -0
  381. maxframe/tensor/arithmetic/abs.py +68 -0
  382. maxframe/tensor/arithmetic/absolute.py +68 -0
  383. maxframe/tensor/arithmetic/add.py +82 -0
  384. maxframe/tensor/arithmetic/angle.py +72 -0
  385. maxframe/tensor/arithmetic/arccos.py +104 -0
  386. maxframe/tensor/arithmetic/arccosh.py +91 -0
  387. maxframe/tensor/arithmetic/arcsin.py +94 -0
  388. maxframe/tensor/arithmetic/arcsinh.py +86 -0
  389. maxframe/tensor/arithmetic/arctan.py +106 -0
  390. maxframe/tensor/arithmetic/arctan2.py +128 -0
  391. maxframe/tensor/arithmetic/arctanh.py +86 -0
  392. maxframe/tensor/arithmetic/around.py +114 -0
  393. maxframe/tensor/arithmetic/bitand.py +95 -0
  394. maxframe/tensor/arithmetic/bitor.py +102 -0
  395. maxframe/tensor/arithmetic/bitxor.py +95 -0
  396. maxframe/tensor/arithmetic/cbrt.py +66 -0
  397. maxframe/tensor/arithmetic/ceil.py +71 -0
  398. maxframe/tensor/arithmetic/clip.py +165 -0
  399. maxframe/tensor/arithmetic/conj.py +74 -0
  400. maxframe/tensor/arithmetic/copysign.py +78 -0
  401. maxframe/tensor/arithmetic/core.py +544 -0
  402. maxframe/tensor/arithmetic/cos.py +85 -0
  403. maxframe/tensor/arithmetic/cosh.py +72 -0
  404. maxframe/tensor/arithmetic/deg2rad.py +72 -0
  405. maxframe/tensor/arithmetic/degrees.py +77 -0
  406. maxframe/tensor/arithmetic/divide.py +114 -0
  407. maxframe/tensor/arithmetic/equal.py +76 -0
  408. maxframe/tensor/arithmetic/exp.py +106 -0
  409. maxframe/tensor/arithmetic/exp2.py +67 -0
  410. maxframe/tensor/arithmetic/expm1.py +79 -0
  411. maxframe/tensor/arithmetic/fabs.py +74 -0
  412. maxframe/tensor/arithmetic/fix.py +69 -0
  413. maxframe/tensor/arithmetic/float_power.py +103 -0
  414. maxframe/tensor/arithmetic/floor.py +77 -0
  415. maxframe/tensor/arithmetic/floordiv.py +94 -0
  416. maxframe/tensor/arithmetic/fmax.py +105 -0
  417. maxframe/tensor/arithmetic/fmin.py +106 -0
  418. maxframe/tensor/arithmetic/fmod.py +99 -0
  419. maxframe/tensor/arithmetic/frexp.py +92 -0
  420. maxframe/tensor/arithmetic/greater.py +77 -0
  421. maxframe/tensor/arithmetic/greater_equal.py +69 -0
  422. maxframe/tensor/arithmetic/hypot.py +77 -0
  423. maxframe/tensor/arithmetic/i0.py +89 -0
  424. maxframe/tensor/arithmetic/imag.py +67 -0
  425. maxframe/tensor/arithmetic/invert.py +110 -0
  426. maxframe/tensor/arithmetic/isclose.py +115 -0
  427. maxframe/tensor/arithmetic/iscomplex.py +64 -0
  428. maxframe/tensor/arithmetic/isfinite.py +106 -0
  429. maxframe/tensor/arithmetic/isinf.py +103 -0
  430. maxframe/tensor/arithmetic/isnan.py +82 -0
  431. maxframe/tensor/arithmetic/isreal.py +63 -0
  432. maxframe/tensor/arithmetic/ldexp.py +99 -0
  433. maxframe/tensor/arithmetic/less.py +69 -0
  434. maxframe/tensor/arithmetic/less_equal.py +69 -0
  435. maxframe/tensor/arithmetic/log.py +92 -0
  436. maxframe/tensor/arithmetic/log10.py +85 -0
  437. maxframe/tensor/arithmetic/log1p.py +95 -0
  438. maxframe/tensor/arithmetic/log2.py +85 -0
  439. maxframe/tensor/arithmetic/logaddexp.py +80 -0
  440. maxframe/tensor/arithmetic/logaddexp2.py +78 -0
  441. maxframe/tensor/arithmetic/logical_and.py +81 -0
  442. maxframe/tensor/arithmetic/logical_not.py +74 -0
  443. maxframe/tensor/arithmetic/logical_or.py +82 -0
  444. maxframe/tensor/arithmetic/logical_xor.py +88 -0
  445. maxframe/tensor/arithmetic/lshift.py +82 -0
  446. maxframe/tensor/arithmetic/maximum.py +108 -0
  447. maxframe/tensor/arithmetic/minimum.py +108 -0
  448. maxframe/tensor/arithmetic/mod.py +104 -0
  449. maxframe/tensor/arithmetic/modf.py +83 -0
  450. maxframe/tensor/arithmetic/multiply.py +81 -0
  451. maxframe/tensor/arithmetic/nan_to_num.py +99 -0
  452. maxframe/tensor/arithmetic/negative.py +65 -0
  453. maxframe/tensor/arithmetic/nextafter.py +68 -0
  454. maxframe/tensor/arithmetic/not_equal.py +72 -0
  455. maxframe/tensor/arithmetic/positive.py +47 -0
  456. maxframe/tensor/arithmetic/power.py +106 -0
  457. maxframe/tensor/arithmetic/rad2deg.py +71 -0
  458. maxframe/tensor/arithmetic/radians.py +77 -0
  459. maxframe/tensor/arithmetic/real.py +70 -0
  460. maxframe/tensor/arithmetic/reciprocal.py +76 -0
  461. maxframe/tensor/arithmetic/rint.py +68 -0
  462. maxframe/tensor/arithmetic/rshift.py +81 -0
  463. maxframe/tensor/arithmetic/setimag.py +29 -0
  464. maxframe/tensor/arithmetic/setreal.py +29 -0
  465. maxframe/tensor/arithmetic/sign.py +81 -0
  466. maxframe/tensor/arithmetic/signbit.py +65 -0
  467. maxframe/tensor/arithmetic/sin.py +98 -0
  468. maxframe/tensor/arithmetic/sinc.py +102 -0
  469. maxframe/tensor/arithmetic/sinh.py +93 -0
  470. maxframe/tensor/arithmetic/spacing.py +72 -0
  471. maxframe/tensor/arithmetic/sqrt.py +81 -0
  472. maxframe/tensor/arithmetic/square.py +69 -0
  473. maxframe/tensor/arithmetic/subtract.py +81 -0
  474. maxframe/tensor/arithmetic/tan.py +88 -0
  475. maxframe/tensor/arithmetic/tanh.py +92 -0
  476. maxframe/tensor/arithmetic/tests/__init__.py +15 -0
  477. maxframe/tensor/arithmetic/tests/test_arithmetic.py +414 -0
  478. maxframe/tensor/arithmetic/truediv.py +104 -0
  479. maxframe/tensor/arithmetic/trunc.py +72 -0
  480. maxframe/tensor/arithmetic/utils.py +65 -0
  481. maxframe/tensor/array_utils.py +186 -0
  482. maxframe/tensor/base/__init__.py +34 -0
  483. maxframe/tensor/base/astype.py +119 -0
  484. maxframe/tensor/base/atleast_1d.py +74 -0
  485. maxframe/tensor/base/broadcast_to.py +89 -0
  486. maxframe/tensor/base/ravel.py +92 -0
  487. maxframe/tensor/base/tests/__init__.py +13 -0
  488. maxframe/tensor/base/tests/test_base.py +114 -0
  489. maxframe/tensor/base/transpose.py +125 -0
  490. maxframe/tensor/base/unique.py +205 -0
  491. maxframe/tensor/base/where.py +127 -0
  492. maxframe/tensor/core.py +724 -0
  493. maxframe/tensor/datasource/__init__.py +32 -0
  494. maxframe/tensor/datasource/arange.py +156 -0
  495. maxframe/tensor/datasource/array.py +415 -0
  496. maxframe/tensor/datasource/core.py +109 -0
  497. maxframe/tensor/datasource/empty.py +169 -0
  498. maxframe/tensor/datasource/from_dataframe.py +70 -0
  499. maxframe/tensor/datasource/from_dense.py +54 -0
  500. maxframe/tensor/datasource/from_sparse.py +47 -0
  501. maxframe/tensor/datasource/full.py +186 -0
  502. maxframe/tensor/datasource/ones.py +173 -0
  503. maxframe/tensor/datasource/scalar.py +40 -0
  504. maxframe/tensor/datasource/tests/__init__.py +13 -0
  505. maxframe/tensor/datasource/tests/test_datasource.py +278 -0
  506. maxframe/tensor/datasource/zeros.py +188 -0
  507. maxframe/tensor/fetch/__init__.py +15 -0
  508. maxframe/tensor/fetch/core.py +54 -0
  509. maxframe/tensor/indexing/__init__.py +47 -0
  510. maxframe/tensor/indexing/choose.py +196 -0
  511. maxframe/tensor/indexing/compress.py +124 -0
  512. maxframe/tensor/indexing/core.py +190 -0
  513. maxframe/tensor/indexing/extract.py +71 -0
  514. maxframe/tensor/indexing/fill_diagonal.py +183 -0
  515. maxframe/tensor/indexing/flatnonzero.py +60 -0
  516. maxframe/tensor/indexing/getitem.py +175 -0
  517. maxframe/tensor/indexing/nonzero.py +120 -0
  518. maxframe/tensor/indexing/setitem.py +132 -0
  519. maxframe/tensor/indexing/slice.py +29 -0
  520. maxframe/tensor/indexing/take.py +130 -0
  521. maxframe/tensor/indexing/tests/__init__.py +15 -0
  522. maxframe/tensor/indexing/tests/test_indexing.py +234 -0
  523. maxframe/tensor/indexing/unravel_index.py +103 -0
  524. maxframe/tensor/merge/__init__.py +15 -0
  525. maxframe/tensor/merge/stack.py +132 -0
  526. maxframe/tensor/merge/tests/__init__.py +13 -0
  527. maxframe/tensor/merge/tests/test_merge.py +52 -0
  528. maxframe/tensor/operators.py +123 -0
  529. maxframe/tensor/random/__init__.py +168 -0
  530. maxframe/tensor/random/beta.py +87 -0
  531. maxframe/tensor/random/binomial.py +137 -0
  532. maxframe/tensor/random/bytes.py +39 -0
  533. maxframe/tensor/random/chisquare.py +110 -0
  534. maxframe/tensor/random/choice.py +186 -0
  535. maxframe/tensor/random/core.py +234 -0
  536. maxframe/tensor/random/dirichlet.py +123 -0
  537. maxframe/tensor/random/exponential.py +94 -0
  538. maxframe/tensor/random/f.py +135 -0
  539. maxframe/tensor/random/gamma.py +128 -0
  540. maxframe/tensor/random/geometric.py +93 -0
  541. maxframe/tensor/random/gumbel.py +167 -0
  542. maxframe/tensor/random/hypergeometric.py +148 -0
  543. maxframe/tensor/random/laplace.py +133 -0
  544. maxframe/tensor/random/logistic.py +129 -0
  545. maxframe/tensor/random/lognormal.py +159 -0
  546. maxframe/tensor/random/logseries.py +122 -0
  547. maxframe/tensor/random/multinomial.py +133 -0
  548. maxframe/tensor/random/multivariate_normal.py +192 -0
  549. maxframe/tensor/random/negative_binomial.py +125 -0
  550. maxframe/tensor/random/noncentral_chisquare.py +132 -0
  551. maxframe/tensor/random/noncentral_f.py +126 -0
  552. maxframe/tensor/random/normal.py +143 -0
  553. maxframe/tensor/random/pareto.py +140 -0
  554. maxframe/tensor/random/permutation.py +104 -0
  555. maxframe/tensor/random/poisson.py +111 -0
  556. maxframe/tensor/random/power.py +142 -0
  557. maxframe/tensor/random/rand.py +82 -0
  558. maxframe/tensor/random/randint.py +121 -0
  559. maxframe/tensor/random/randn.py +96 -0
  560. maxframe/tensor/random/random_integers.py +123 -0
  561. maxframe/tensor/random/random_sample.py +86 -0
  562. maxframe/tensor/random/rayleigh.py +110 -0
  563. maxframe/tensor/random/shuffle.py +61 -0
  564. maxframe/tensor/random/standard_cauchy.py +105 -0
  565. maxframe/tensor/random/standard_exponential.py +72 -0
  566. maxframe/tensor/random/standard_gamma.py +120 -0
  567. maxframe/tensor/random/standard_normal.py +74 -0
  568. maxframe/tensor/random/standard_t.py +135 -0
  569. maxframe/tensor/random/tests/__init__.py +15 -0
  570. maxframe/tensor/random/tests/test_random.py +167 -0
  571. maxframe/tensor/random/triangular.py +119 -0
  572. maxframe/tensor/random/uniform.py +131 -0
  573. maxframe/tensor/random/vonmises.py +131 -0
  574. maxframe/tensor/random/wald.py +114 -0
  575. maxframe/tensor/random/weibull.py +140 -0
  576. maxframe/tensor/random/zipf.py +122 -0
  577. maxframe/tensor/rechunk/__init__.py +26 -0
  578. maxframe/tensor/rechunk/rechunk.py +43 -0
  579. maxframe/tensor/reduction/__init__.py +66 -0
  580. maxframe/tensor/reduction/all.py +103 -0
  581. maxframe/tensor/reduction/allclose.py +88 -0
  582. maxframe/tensor/reduction/any.py +105 -0
  583. maxframe/tensor/reduction/argmax.py +103 -0
  584. maxframe/tensor/reduction/argmin.py +103 -0
  585. maxframe/tensor/reduction/array_equal.py +64 -0
  586. maxframe/tensor/reduction/core.py +168 -0
  587. maxframe/tensor/reduction/count_nonzero.py +81 -0
  588. maxframe/tensor/reduction/cumprod.py +97 -0
  589. maxframe/tensor/reduction/cumsum.py +101 -0
  590. maxframe/tensor/reduction/max.py +120 -0
  591. maxframe/tensor/reduction/mean.py +123 -0
  592. maxframe/tensor/reduction/min.py +120 -0
  593. maxframe/tensor/reduction/nanargmax.py +82 -0
  594. maxframe/tensor/reduction/nanargmin.py +76 -0
  595. maxframe/tensor/reduction/nancumprod.py +91 -0
  596. maxframe/tensor/reduction/nancumsum.py +94 -0
  597. maxframe/tensor/reduction/nanmax.py +111 -0
  598. maxframe/tensor/reduction/nanmean.py +106 -0
  599. maxframe/tensor/reduction/nanmin.py +111 -0
  600. maxframe/tensor/reduction/nanprod.py +94 -0
  601. maxframe/tensor/reduction/nanstd.py +126 -0
  602. maxframe/tensor/reduction/nansum.py +115 -0
  603. maxframe/tensor/reduction/nanvar.py +149 -0
  604. maxframe/tensor/reduction/prod.py +130 -0
  605. maxframe/tensor/reduction/std.py +134 -0
  606. maxframe/tensor/reduction/sum.py +125 -0
  607. maxframe/tensor/reduction/tests/__init__.py +13 -0
  608. maxframe/tensor/reduction/tests/test_reduction.py +181 -0
  609. maxframe/tensor/reduction/var.py +176 -0
  610. maxframe/tensor/reshape/__init__.py +17 -0
  611. maxframe/tensor/reshape/reshape.py +188 -0
  612. maxframe/tensor/reshape/tests/__init__.py +15 -0
  613. maxframe/tensor/reshape/tests/test_reshape.py +37 -0
  614. maxframe/tensor/statistics/__init__.py +13 -0
  615. maxframe/tensor/statistics/percentile.py +175 -0
  616. maxframe/tensor/statistics/quantile.py +288 -0
  617. maxframe/tensor/ufunc/__init__.py +26 -0
  618. maxframe/tensor/ufunc/ufunc.py +200 -0
  619. maxframe/tensor/utils.py +718 -0
  620. maxframe/tests/__init__.py +13 -0
  621. maxframe/tests/test_codegen.py +69 -0
  622. maxframe/tests/test_protocol.py +144 -0
  623. maxframe/tests/test_utils.py +376 -0
  624. maxframe/tests/utils.py +164 -0
  625. maxframe/typing_.py +37 -0
  626. maxframe/udf.py +134 -0
  627. maxframe/utils.py +1114 -0
  628. maxframe-0.1.0b5.dist-info/METADATA +104 -0
  629. maxframe-0.1.0b5.dist-info/RECORD +647 -0
  630. maxframe-0.1.0b5.dist-info/WHEEL +5 -0
  631. maxframe-0.1.0b5.dist-info/top_level.txt +2 -0
  632. maxframe_client/__init__.py +17 -0
  633. maxframe_client/clients/__init__.py +13 -0
  634. maxframe_client/clients/framedriver.py +118 -0
  635. maxframe_client/clients/spe.py +104 -0
  636. maxframe_client/conftest.py +15 -0
  637. maxframe_client/fetcher.py +264 -0
  638. maxframe_client/session/__init__.py +22 -0
  639. maxframe_client/session/consts.py +36 -0
  640. maxframe_client/session/graph.py +119 -0
  641. maxframe_client/session/odps.py +482 -0
  642. maxframe_client/session/task.py +280 -0
  643. maxframe_client/session/tests/__init__.py +13 -0
  644. maxframe_client/session/tests/test_task.py +85 -0
  645. maxframe_client/tests/__init__.py +13 -0
  646. maxframe_client/tests/test_fetcher.py +89 -0
  647. maxframe_client/tests/test_session.py +255 -0
@@ -0,0 +1,179 @@
1
+ # Copyright 1999-2024 Alibaba Group Holding Ltd.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ from collections import namedtuple
16
+
17
+ import pandas as pd
18
+
19
+ from ... import opcodes
20
+ from ...core import ENTITY_TYPE, Entity, OutputType
21
+ from ...core.operator import MapReduceOperator
22
+ from ...serialization.serializables import AnyField, BoolField, Int32Field
23
+ from ...utils import lazy_import, no_default, pd_release_version
24
+ from ..core import SERIES_TYPE
25
+ from ..initializer import Series as asseries
26
+ from ..operators import DataFrameOperatorMixin
27
+ from ..utils import build_df, build_series, parse_index
28
+
29
+ cudf = lazy_import("cudf")
30
+
31
+ _GROUP_KEYS_NO_DEFAULT = pd_release_version >= (1, 5, 0)
32
+ _default_group_keys = no_default if _GROUP_KEYS_NO_DEFAULT else True
33
+
34
+
35
+ NamedAgg = namedtuple("NamedAgg", ["column", "aggfunc"])
36
+
37
+
38
+ class DataFrameGroupByOperator(MapReduceOperator, DataFrameOperatorMixin):
39
+ _op_type_ = opcodes.GROUPBY
40
+
41
+ by = AnyField(
42
+ "by",
43
+ default=None,
44
+ on_serialize=lambda x: x.data if isinstance(x, Entity) else x,
45
+ )
46
+ level = AnyField("level", default=None)
47
+ as_index = BoolField("as_index", default=None)
48
+ sort = BoolField("sort", default=None)
49
+ group_keys = BoolField("group_keys", default=None)
50
+
51
+ shuffle_size = Int32Field("shuffle_size", default=None)
52
+
53
+ def __init__(self, output_types=None, **kw):
54
+ super().__init__(_output_types=output_types, **kw)
55
+ if output_types:
56
+ if output_types[0] in (
57
+ OutputType.dataframe,
58
+ OutputType.dataframe_groupby,
59
+ ):
60
+ output_types = [OutputType.dataframe_groupby]
61
+ elif output_types[0] == OutputType.series:
62
+ output_types = [OutputType.series_groupby]
63
+ self.output_types = output_types
64
+
65
+ @property
66
+ def is_dataframe_obj(self):
67
+ return self.output_types[0] in (
68
+ OutputType.dataframe_groupby,
69
+ OutputType.dataframe,
70
+ )
71
+
72
+ @property
73
+ def groupby_params(self):
74
+ return dict(
75
+ by=self.by,
76
+ level=self.level,
77
+ as_index=self.as_index,
78
+ sort=self.sort,
79
+ group_keys=self.group_keys,
80
+ )
81
+
82
+ def build_mock_groupby(self, **kwargs):
83
+ in_df = self.inputs[0]
84
+ if self.is_dataframe_obj:
85
+ mock_obj = build_df(
86
+ in_df, size=[2, 2], fill_value=[1, 2], ensure_string=True
87
+ )
88
+ else:
89
+ mock_obj = build_series(
90
+ in_df,
91
+ size=[2, 2],
92
+ fill_value=[1, 2],
93
+ name=in_df.name,
94
+ ensure_string=True,
95
+ )
96
+
97
+ new_kw = self.groupby_params
98
+ new_kw.update(kwargs)
99
+ if new_kw.get("level"):
100
+ new_kw["level"] = 0
101
+ if isinstance(new_kw["by"], list):
102
+ new_by = []
103
+ for v in new_kw["by"]:
104
+ if isinstance(v, ENTITY_TYPE):
105
+ build_fun = build_df if v.ndim == 2 else build_series
106
+ mock_by = pd.concat(
107
+ [
108
+ build_fun(v, size=2, fill_value=1, name=v.name),
109
+ build_fun(v, size=2, fill_value=2, name=v.name),
110
+ ]
111
+ )
112
+ new_by.append(mock_by)
113
+ else:
114
+ new_by.append(v)
115
+ new_kw["by"] = new_by
116
+ return mock_obj.groupby(**new_kw)
117
+
118
+ def _set_inputs(self, inputs):
119
+ super()._set_inputs(inputs)
120
+ inputs_iter = iter(self._inputs[1:])
121
+ if len(inputs) > 1:
122
+ by = []
123
+ for k in self.by:
124
+ if isinstance(k, SERIES_TYPE):
125
+ by.append(next(inputs_iter))
126
+ else:
127
+ by.append(k)
128
+ self.by = by
129
+
130
+ def __call__(self, df):
131
+ params = df.params.copy()
132
+ params["index_value"] = parse_index(None, df.key, df.index_value.key)
133
+ if df.ndim == 2:
134
+ if isinstance(self.by, list):
135
+ index, types = [], []
136
+ for k in self.by:
137
+ if isinstance(k, SERIES_TYPE):
138
+ index.append(k.name)
139
+ types.append(k.dtype)
140
+ elif k in df.dtypes:
141
+ index.append(k)
142
+ types.append(df.dtypes[k])
143
+ else:
144
+ raise KeyError(k)
145
+ params["key_dtypes"] = pd.Series(types, index=index)
146
+
147
+ inputs = [df]
148
+ if isinstance(self.by, list):
149
+ for k in self.by:
150
+ if isinstance(k, SERIES_TYPE):
151
+ inputs.append(k)
152
+
153
+ return self.new_tileable(inputs, **params)
154
+
155
+
156
+ def groupby(
157
+ df, by=None, level=None, as_index=True, sort=True, group_keys=_default_group_keys
158
+ ):
159
+ if not as_index and df.op.output_types[0] == OutputType.series:
160
+ raise TypeError("as_index=False only valid with DataFrame")
161
+
162
+ output_types = (
163
+ [OutputType.dataframe_groupby] if df.ndim == 2 else [OutputType.series_groupby]
164
+ )
165
+ if isinstance(by, (SERIES_TYPE, pd.Series)):
166
+ if isinstance(by, pd.Series):
167
+ by = asseries(by)
168
+ by = [by]
169
+ elif df.ndim > 1 and by is not None and not isinstance(by, list):
170
+ by = [by]
171
+ op = DataFrameGroupByOperator(
172
+ by=by,
173
+ level=level,
174
+ as_index=as_index,
175
+ sort=sort,
176
+ group_keys=group_keys if group_keys is not no_default else None,
177
+ output_types=output_types,
178
+ )
179
+ return op(df)
@@ -0,0 +1,124 @@
1
+ # Copyright 1999-2024 Alibaba Group Holding Ltd.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ import pandas as pd
16
+
17
+ from ... import opcodes
18
+ from ...core import OutputType
19
+ from ...serialization.serializables import AnyField, BoolField
20
+ from ...utils import lazy_import
21
+ from ..operators import DataFrameOperator, DataFrameOperatorMixin
22
+ from ..utils import parse_index, validate_axis
23
+
24
+ cudf = lazy_import("cudf")
25
+
26
+
27
+ class GroupByCumReductionOperator(DataFrameOperatorMixin, DataFrameOperator):
28
+ _op_module_ = "dataframe.groupby"
29
+
30
+ axis = AnyField("axis", default=None)
31
+ ascending = BoolField("ascending", default=None)
32
+
33
+ def __init__(self, output_types=None, **kw):
34
+ super().__init__(_output_types=output_types, **kw)
35
+
36
+ def _calc_out_dtypes(self, in_groupby):
37
+ mock_groupby = in_groupby.op.build_mock_groupby()
38
+ func_name = getattr(self, "_func_name")
39
+
40
+ if func_name == "cumcount":
41
+ result_df = mock_groupby.cumcount(ascending=self.ascending)
42
+ else:
43
+ result_df = getattr(mock_groupby, func_name)(axis=self.axis)
44
+
45
+ if isinstance(result_df, pd.DataFrame):
46
+ self.output_types = [OutputType.dataframe]
47
+ return result_df.dtypes
48
+ else:
49
+ self.output_types = [OutputType.series]
50
+ return result_df.name, result_df.dtype
51
+
52
+ def __call__(self, groupby):
53
+ in_df = groupby
54
+ while in_df.op.output_types[0] not in (OutputType.dataframe, OutputType.series):
55
+ in_df = in_df.inputs[0]
56
+
57
+ self.axis = validate_axis(self.axis or 0, in_df)
58
+
59
+ out_dtypes = self._calc_out_dtypes(groupby)
60
+
61
+ kw = in_df.params.copy()
62
+ kw["index_value"] = parse_index(pd.RangeIndex(-1), groupby.key)
63
+ if self.output_types[0] == OutputType.dataframe:
64
+ kw.update(
65
+ dict(
66
+ columns_value=parse_index(out_dtypes.index, store_data=True),
67
+ dtypes=out_dtypes,
68
+ shape=(groupby.shape[0], len(out_dtypes)),
69
+ )
70
+ )
71
+ else:
72
+ name, dtype = out_dtypes
73
+ kw.update(dtype=dtype, name=name, shape=(groupby.shape[0],))
74
+ return self.new_tileable([groupby], **kw)
75
+
76
+
77
+ class GroupByCummin(GroupByCumReductionOperator):
78
+ _op_type_ = opcodes.CUMMIN
79
+ _func_name = "cummin"
80
+
81
+
82
+ class GroupByCummax(GroupByCumReductionOperator):
83
+ _op_type_ = opcodes.CUMMAX
84
+ _func_name = "cummax"
85
+
86
+
87
+ class GroupByCumsum(GroupByCumReductionOperator):
88
+ _op_type_ = opcodes.CUMSUM
89
+ _func_name = "cumsum"
90
+
91
+
92
+ class GroupByCumprod(GroupByCumReductionOperator):
93
+ _op_type_ = opcodes.CUMPROD
94
+ _func_name = "cumprod"
95
+
96
+
97
+ class GroupByCumcount(GroupByCumReductionOperator):
98
+ _op_type_ = opcodes.CUMCOUNT
99
+ _func_name = "cumcount"
100
+
101
+
102
+ def cumcount(groupby, ascending: bool = True):
103
+ op = GroupByCumcount(ascending=ascending)
104
+ return op(groupby)
105
+
106
+
107
+ def cummin(groupby, axis=0):
108
+ op = GroupByCummin(axis=axis)
109
+ return op(groupby)
110
+
111
+
112
+ def cummax(groupby, axis=0):
113
+ op = GroupByCummax(axis=axis)
114
+ return op(groupby)
115
+
116
+
117
+ def cumprod(groupby, axis=0):
118
+ op = GroupByCumprod(axis=axis)
119
+ return op(groupby)
120
+
121
+
122
+ def cumsum(groupby, axis=0):
123
+ op = GroupByCumsum(axis=axis)
124
+ return op(groupby)
@@ -0,0 +1,141 @@
1
+ # Copyright 1999-2024 Alibaba Group Holding Ltd.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ import pandas as pd
16
+
17
+ from ... import opcodes
18
+ from ...core import OutputType
19
+ from ...serialization.serializables import AnyField, DictField, Int64Field, StringField
20
+ from ..operators import DataFrameOperator, DataFrameOperatorMixin
21
+ from ..utils import parse_index
22
+
23
+
24
+ class GroupByFillOperator(DataFrameOperator, DataFrameOperatorMixin):
25
+ _op_module_ = "dataframe.groupby"
26
+
27
+ value = AnyField("value", default=None)
28
+ method = StringField("method", default=None)
29
+ axis = AnyField("axis", default=0)
30
+ limit = Int64Field("limit", default=None)
31
+ downcast = DictField("downcast", default=None)
32
+
33
+ def _calc_out_dtypes(self, in_groupby):
34
+ mock_groupby = in_groupby.op.build_mock_groupby()
35
+ func_name = getattr(self, "_func_name")
36
+
37
+ if func_name == "fillna":
38
+ result_df = mock_groupby.fillna(
39
+ value=self.value,
40
+ method=self.method,
41
+ axis=self.axis,
42
+ limit=self.limit,
43
+ downcast=self.downcast,
44
+ )
45
+ else:
46
+ result_df = getattr(mock_groupby, func_name)(limit=self.limit)
47
+
48
+ if isinstance(result_df, pd.DataFrame):
49
+ self.output_types = [OutputType.dataframe]
50
+ return result_df.dtypes
51
+ else:
52
+ self.output_types = [OutputType.series]
53
+ return result_df.name, result_df.dtype
54
+
55
+ def __call__(self, groupby):
56
+ in_df = groupby
57
+ while in_df.op.output_types[0] not in (OutputType.dataframe, OutputType.series):
58
+ in_df = in_df.inputs[0]
59
+ out_dtypes = self._calc_out_dtypes(groupby)
60
+
61
+ kw = in_df.params.copy()
62
+ kw["index_value"] = parse_index(pd.RangeIndex(-1), groupby.key)
63
+ if self.output_types[0] == OutputType.dataframe:
64
+ kw.update(
65
+ dict(
66
+ columns_value=parse_index(out_dtypes.index, store_data=True),
67
+ dtypes=out_dtypes,
68
+ shape=(groupby.shape[0], len(out_dtypes)),
69
+ )
70
+ )
71
+ else:
72
+ name, dtype = out_dtypes
73
+ kw.update(dtype=dtype, name=name, shape=(groupby.shape[0],))
74
+ return self.new_tileable([groupby], **kw)
75
+
76
+
77
+ class GroupByFFill(GroupByFillOperator):
78
+ _op_type_ = opcodes.FILL_NA
79
+ _func_name = "ffill"
80
+
81
+
82
+ class GroupByBFill(GroupByFillOperator):
83
+ _op_type = opcodes.FILL_NA
84
+ _func_name = "bfill"
85
+
86
+
87
+ class GroupByFillNa(GroupByFillOperator):
88
+ _op_type = opcodes.FILL_NA
89
+ _func_name = "fillna"
90
+
91
+
92
+ def ffill(groupby, limit=None):
93
+ """
94
+ Forward fill the values.
95
+
96
+ limit: int, default None
97
+ Limit number of values to fill
98
+
99
+ return: Series or DataFrame
100
+ """
101
+ op = GroupByFFill(limit=limit)
102
+ return op(groupby)
103
+
104
+
105
+ def bfill(groupby, limit=None):
106
+ """
107
+ Backward fill the values.
108
+
109
+ limit: int, default None
110
+ Limit number of values to fill
111
+
112
+ return: Series or DataFrame
113
+ """
114
+ op = GroupByBFill(limit=limit)
115
+ return op(groupby)
116
+
117
+
118
+ def fillna(groupby, value=None, method=None, axis=None, limit=None, downcast=None):
119
+ """
120
+ Fill NA/NaN values using the specified method
121
+
122
+ value: scalar, dict, Series, or DataFrame
123
+ Value to use to fill holes (e.g. 0), alternately a dict/Series/DataFrame
124
+ of values specifying which value to use for each index (for a Series) or
125
+ column (for a DataFrame). Values not in the dict/Series/DataFrame
126
+ will not be filled. This value cannot be a list.
127
+ method: {'backfill','bfill','ffill',None}, default None
128
+ axis: {0 or 'index', 1 or 'column'}
129
+ limit: int, default None
130
+ If method is specified, this is the maximum number of consecutive
131
+ NaN values to forward/backward fill
132
+ downcast: dict, default None
133
+ A dict of item->dtype of what to downcast if possible,
134
+ or the string ‘infer’ which will try to downcast to an appropriate equal type
135
+
136
+ return: DataFrame or None
137
+ """
138
+ op = GroupByFillNa(
139
+ value=value, method=method, axis=axis, limit=limit, downcast=downcast
140
+ )
141
+ return op(groupby)
@@ -0,0 +1,92 @@
1
+ # Copyright 1999-2024 Alibaba Group Holding Ltd.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ from collections.abc import Iterable
16
+
17
+ from ... import opcodes
18
+ from ...core import OutputType
19
+ from ...serialization.serializables import AnyField
20
+ from ..operators import DataFrameOperator, DataFrameOperatorMixin
21
+ from ..utils import parse_index
22
+
23
+
24
+ class GroupByIndex(DataFrameOperatorMixin, DataFrameOperator):
25
+ _op_type_ = opcodes.INDEX
26
+ _op_module_ = "dataframe.groupby"
27
+
28
+ selection = AnyField("selection", default=None)
29
+
30
+ def __init__(self, output_types=None, **kw):
31
+ super().__init__(_output_types=output_types, **kw)
32
+
33
+ @property
34
+ def groupby_params(self):
35
+ params = self.inputs[0].op.groupby_params
36
+ params["selection"] = self.selection
37
+ return params
38
+
39
+ def build_mock_groupby(self, **kwargs):
40
+ groupby_op = self.inputs[0].op
41
+ return groupby_op.build_mock_groupby(**kwargs)[self.selection]
42
+
43
+ def __call__(self, groupby):
44
+ indexed = groupby.op.build_mock_groupby()[self.selection]
45
+
46
+ if indexed.ndim == 1:
47
+ self.output_types = [OutputType.series_groupby]
48
+ params = dict(
49
+ shape=(groupby.shape[0],),
50
+ name=self.selection,
51
+ dtype=groupby.dtypes[self.selection],
52
+ index_value=groupby.index_value,
53
+ key_dtypes=groupby.key_dtypes,
54
+ )
55
+ else:
56
+ self.output_types = [OutputType.dataframe_groupby]
57
+
58
+ if isinstance(self.selection, Iterable) and not isinstance(
59
+ self.selection, str
60
+ ):
61
+ item_list = list(self.selection)
62
+ else:
63
+ item_list = [self.selection]
64
+
65
+ params = groupby.params.copy()
66
+ params["dtypes"] = new_dtypes = groupby.dtypes[item_list]
67
+ params["selection"] = self.selection
68
+ params["shape"] = (groupby.shape[0], len(item_list))
69
+ params["columns_value"] = parse_index(new_dtypes.index, store_data=True)
70
+
71
+ return self.new_tileable([groupby], **params)
72
+
73
+
74
+ def df_groupby_getitem(df_groupby, item):
75
+ try:
76
+ hash(item)
77
+ hashable = True
78
+ except TypeError:
79
+ hashable = False
80
+
81
+ if hashable and item in df_groupby.dtypes:
82
+ output_types = [OutputType.series_groupby]
83
+ elif isinstance(item, Iterable) and all(it in df_groupby.dtypes for it in item):
84
+ output_types = [OutputType.dataframe_groupby]
85
+ else:
86
+ raise NameError(f"Cannot slice groupby with {item!r}")
87
+
88
+ if df_groupby.selection:
89
+ raise IndexError(f"Column(s) {df_groupby.selection!r} already selected")
90
+
91
+ op = GroupByIndex(selection=item, output_types=output_types)
92
+ return op(df_groupby)
@@ -0,0 +1,105 @@
1
+ # Copyright 1999-2024 Alibaba Group Holding Ltd.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ import numpy as np
16
+ import pandas as pd
17
+
18
+ from ... import opcodes
19
+ from ...core import OutputType
20
+ from ...serialization.serializables import BoolField, DictField, Int64Field
21
+ from ...utils import pd_release_version
22
+ from ..core import IndexValue
23
+ from ..operators import DataFrameOperator, DataFrameOperatorMixin
24
+ from ..utils import parse_index
25
+
26
+ _pandas_enable_negative = pd_release_version >= (1, 4, 0)
27
+
28
+
29
+ class GroupByHead(DataFrameOperator, DataFrameOperatorMixin):
30
+ _op_type_ = opcodes.GROUPBY_HEAD
31
+ _op_module_ = "dataframe.groupby"
32
+
33
+ row_count = Int64Field("row_count", default=5)
34
+ groupby_params = DictField("groupby_params", default=dict())
35
+ enable_negative = BoolField("enable_negative", default=_pandas_enable_negative)
36
+
37
+ def __call__(self, groupby):
38
+ df = groupby
39
+ while df.op.output_types[0] not in (OutputType.dataframe, OutputType.series):
40
+ df = df.inputs[0]
41
+
42
+ selection = groupby.op.groupby_params.pop("selection", None)
43
+ if df.ndim > 1 and selection:
44
+ if isinstance(selection, tuple) and selection not in df.dtypes:
45
+ selection = list(selection)
46
+
47
+ result_df = df[selection]
48
+ else:
49
+ result_df = df
50
+
51
+ self._output_types = (
52
+ [OutputType.dataframe] if result_df.ndim == 2 else [OutputType.series]
53
+ )
54
+
55
+ params = result_df.params
56
+ params["shape"] = (np.nan,) + result_df.shape[1:]
57
+ if isinstance(df.index_value.value, IndexValue.RangeIndex):
58
+ params["index_value"] = parse_index(pd.RangeIndex(-1), df.key)
59
+
60
+ return self.new_tileable([df], **params)
61
+
62
+
63
+ def head(groupby, n=5):
64
+ """
65
+ Return first n rows of each group.
66
+
67
+ Similar to ``.apply(lambda x: x.head(n))``, but it returns a subset of rows
68
+ from the original Series or DataFrame with original index and order preserved
69
+ (``as_index`` flag is ignored).
70
+
71
+ Does not work for negative values of `n`.
72
+
73
+ Returns
74
+ -------
75
+ Series or DataFrame
76
+
77
+ See Also
78
+ --------
79
+ Series.groupby
80
+ DataFrame.groupby
81
+
82
+ Examples
83
+ --------
84
+
85
+ >>> import maxframe.dataframe as md
86
+ >>> df = md.DataFrame([[1, 2], [1, 4], [5, 6]],
87
+ ... columns=['A', 'B'])
88
+ >>> df.groupby('A').head(1).execute()
89
+ A B
90
+ 0 1 2
91
+ 2 5 6
92
+ >>> df.groupby('A').head(-1).execute()
93
+ Empty DataFrame
94
+ Columns: [A, B]
95
+ Index: []
96
+ """
97
+ groupby_params = groupby.op.groupby_params.copy()
98
+ groupby_params.pop("as_index", None)
99
+
100
+ op = GroupByHead(
101
+ row_count=n,
102
+ groupby_params=groupby_params,
103
+ enable_negative=_pandas_enable_negative,
104
+ )
105
+ return op(groupby)