maxframe 0.1.0b5__cp38-cp38-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of maxframe might be problematic. Click here for more details.

Files changed (647) hide show
  1. maxframe/__init__.py +32 -0
  2. maxframe/_utils.cpython-38-darwin.so +0 -0
  3. maxframe/_utils.pxd +33 -0
  4. maxframe/_utils.pyx +547 -0
  5. maxframe/codegen.py +528 -0
  6. maxframe/config/__init__.py +15 -0
  7. maxframe/config/config.py +443 -0
  8. maxframe/config/tests/__init__.py +13 -0
  9. maxframe/config/tests/test_config.py +103 -0
  10. maxframe/config/tests/test_validators.py +34 -0
  11. maxframe/config/validators.py +57 -0
  12. maxframe/conftest.py +139 -0
  13. maxframe/core/__init__.py +65 -0
  14. maxframe/core/base.py +156 -0
  15. maxframe/core/entity/__init__.py +44 -0
  16. maxframe/core/entity/chunks.py +68 -0
  17. maxframe/core/entity/core.py +152 -0
  18. maxframe/core/entity/executable.py +337 -0
  19. maxframe/core/entity/fuse.py +73 -0
  20. maxframe/core/entity/objects.py +100 -0
  21. maxframe/core/entity/output_types.py +90 -0
  22. maxframe/core/entity/tileables.py +438 -0
  23. maxframe/core/entity/utils.py +24 -0
  24. maxframe/core/graph/__init__.py +17 -0
  25. maxframe/core/graph/builder/__init__.py +16 -0
  26. maxframe/core/graph/builder/base.py +86 -0
  27. maxframe/core/graph/builder/chunk.py +430 -0
  28. maxframe/core/graph/builder/tileable.py +34 -0
  29. maxframe/core/graph/builder/utils.py +41 -0
  30. maxframe/core/graph/core.cpython-38-darwin.so +0 -0
  31. maxframe/core/graph/core.pyx +467 -0
  32. maxframe/core/graph/entity.py +171 -0
  33. maxframe/core/graph/tests/__init__.py +13 -0
  34. maxframe/core/graph/tests/test_graph.py +205 -0
  35. maxframe/core/mode.py +96 -0
  36. maxframe/core/operator/__init__.py +34 -0
  37. maxframe/core/operator/base.py +450 -0
  38. maxframe/core/operator/core.py +276 -0
  39. maxframe/core/operator/fetch.py +53 -0
  40. maxframe/core/operator/fuse.py +29 -0
  41. maxframe/core/operator/objects.py +72 -0
  42. maxframe/core/operator/shuffle.py +111 -0
  43. maxframe/core/operator/tests/__init__.py +13 -0
  44. maxframe/core/operator/tests/test_core.py +64 -0
  45. maxframe/core/tests/__init__.py +13 -0
  46. maxframe/core/tests/test_mode.py +75 -0
  47. maxframe/dataframe/__init__.py +81 -0
  48. maxframe/dataframe/arithmetic/__init__.py +359 -0
  49. maxframe/dataframe/arithmetic/abs.py +33 -0
  50. maxframe/dataframe/arithmetic/add.py +60 -0
  51. maxframe/dataframe/arithmetic/arccos.py +28 -0
  52. maxframe/dataframe/arithmetic/arccosh.py +28 -0
  53. maxframe/dataframe/arithmetic/arcsin.py +28 -0
  54. maxframe/dataframe/arithmetic/arcsinh.py +28 -0
  55. maxframe/dataframe/arithmetic/arctan.py +28 -0
  56. maxframe/dataframe/arithmetic/arctanh.py +28 -0
  57. maxframe/dataframe/arithmetic/around.py +152 -0
  58. maxframe/dataframe/arithmetic/bitwise_and.py +46 -0
  59. maxframe/dataframe/arithmetic/bitwise_or.py +50 -0
  60. maxframe/dataframe/arithmetic/bitwise_xor.py +46 -0
  61. maxframe/dataframe/arithmetic/ceil.py +28 -0
  62. maxframe/dataframe/arithmetic/core.py +342 -0
  63. maxframe/dataframe/arithmetic/cos.py +28 -0
  64. maxframe/dataframe/arithmetic/cosh.py +28 -0
  65. maxframe/dataframe/arithmetic/degrees.py +28 -0
  66. maxframe/dataframe/arithmetic/docstring.py +442 -0
  67. maxframe/dataframe/arithmetic/equal.py +56 -0
  68. maxframe/dataframe/arithmetic/exp.py +28 -0
  69. maxframe/dataframe/arithmetic/exp2.py +28 -0
  70. maxframe/dataframe/arithmetic/expm1.py +28 -0
  71. maxframe/dataframe/arithmetic/floor.py +28 -0
  72. maxframe/dataframe/arithmetic/floordiv.py +64 -0
  73. maxframe/dataframe/arithmetic/greater.py +57 -0
  74. maxframe/dataframe/arithmetic/greater_equal.py +57 -0
  75. maxframe/dataframe/arithmetic/invert.py +33 -0
  76. maxframe/dataframe/arithmetic/is_ufuncs.py +62 -0
  77. maxframe/dataframe/arithmetic/less.py +57 -0
  78. maxframe/dataframe/arithmetic/less_equal.py +57 -0
  79. maxframe/dataframe/arithmetic/log.py +28 -0
  80. maxframe/dataframe/arithmetic/log10.py +28 -0
  81. maxframe/dataframe/arithmetic/log2.py +28 -0
  82. maxframe/dataframe/arithmetic/mod.py +60 -0
  83. maxframe/dataframe/arithmetic/multiply.py +60 -0
  84. maxframe/dataframe/arithmetic/negative.py +33 -0
  85. maxframe/dataframe/arithmetic/not_equal.py +56 -0
  86. maxframe/dataframe/arithmetic/power.py +68 -0
  87. maxframe/dataframe/arithmetic/radians.py +28 -0
  88. maxframe/dataframe/arithmetic/sin.py +28 -0
  89. maxframe/dataframe/arithmetic/sinh.py +28 -0
  90. maxframe/dataframe/arithmetic/sqrt.py +28 -0
  91. maxframe/dataframe/arithmetic/subtract.py +64 -0
  92. maxframe/dataframe/arithmetic/tan.py +28 -0
  93. maxframe/dataframe/arithmetic/tanh.py +28 -0
  94. maxframe/dataframe/arithmetic/tests/__init__.py +13 -0
  95. maxframe/dataframe/arithmetic/tests/test_arithmetic.py +695 -0
  96. maxframe/dataframe/arithmetic/truediv.py +64 -0
  97. maxframe/dataframe/arithmetic/trunc.py +28 -0
  98. maxframe/dataframe/arrays.py +864 -0
  99. maxframe/dataframe/core.py +2417 -0
  100. maxframe/dataframe/datasource/__init__.py +15 -0
  101. maxframe/dataframe/datasource/core.py +81 -0
  102. maxframe/dataframe/datasource/dataframe.py +59 -0
  103. maxframe/dataframe/datasource/date_range.py +504 -0
  104. maxframe/dataframe/datasource/from_index.py +54 -0
  105. maxframe/dataframe/datasource/from_records.py +107 -0
  106. maxframe/dataframe/datasource/from_tensor.py +419 -0
  107. maxframe/dataframe/datasource/index.py +117 -0
  108. maxframe/dataframe/datasource/read_csv.py +528 -0
  109. maxframe/dataframe/datasource/read_odps_query.py +299 -0
  110. maxframe/dataframe/datasource/read_odps_table.py +253 -0
  111. maxframe/dataframe/datasource/read_parquet.py +421 -0
  112. maxframe/dataframe/datasource/series.py +55 -0
  113. maxframe/dataframe/datasource/tests/__init__.py +13 -0
  114. maxframe/dataframe/datasource/tests/test_datasource.py +401 -0
  115. maxframe/dataframe/datastore/__init__.py +26 -0
  116. maxframe/dataframe/datastore/core.py +19 -0
  117. maxframe/dataframe/datastore/to_csv.py +227 -0
  118. maxframe/dataframe/datastore/to_odps.py +162 -0
  119. maxframe/dataframe/extensions/__init__.py +41 -0
  120. maxframe/dataframe/extensions/accessor.py +50 -0
  121. maxframe/dataframe/extensions/reshuffle.py +83 -0
  122. maxframe/dataframe/extensions/tests/__init__.py +13 -0
  123. maxframe/dataframe/extensions/tests/test_extensions.py +38 -0
  124. maxframe/dataframe/fetch/__init__.py +15 -0
  125. maxframe/dataframe/fetch/core.py +86 -0
  126. maxframe/dataframe/groupby/__init__.py +82 -0
  127. maxframe/dataframe/groupby/aggregation.py +350 -0
  128. maxframe/dataframe/groupby/apply.py +251 -0
  129. maxframe/dataframe/groupby/core.py +179 -0
  130. maxframe/dataframe/groupby/cum.py +124 -0
  131. maxframe/dataframe/groupby/fill.py +141 -0
  132. maxframe/dataframe/groupby/getitem.py +92 -0
  133. maxframe/dataframe/groupby/head.py +105 -0
  134. maxframe/dataframe/groupby/sample.py +214 -0
  135. maxframe/dataframe/groupby/tests/__init__.py +13 -0
  136. maxframe/dataframe/groupby/tests/test_groupby.py +374 -0
  137. maxframe/dataframe/groupby/transform.py +255 -0
  138. maxframe/dataframe/indexing/__init__.py +84 -0
  139. maxframe/dataframe/indexing/add_prefix_suffix.py +110 -0
  140. maxframe/dataframe/indexing/align.py +349 -0
  141. maxframe/dataframe/indexing/at.py +83 -0
  142. maxframe/dataframe/indexing/getitem.py +204 -0
  143. maxframe/dataframe/indexing/iat.py +37 -0
  144. maxframe/dataframe/indexing/iloc.py +566 -0
  145. maxframe/dataframe/indexing/insert.py +86 -0
  146. maxframe/dataframe/indexing/loc.py +411 -0
  147. maxframe/dataframe/indexing/reindex.py +526 -0
  148. maxframe/dataframe/indexing/rename.py +462 -0
  149. maxframe/dataframe/indexing/rename_axis.py +209 -0
  150. maxframe/dataframe/indexing/reset_index.py +402 -0
  151. maxframe/dataframe/indexing/sample.py +221 -0
  152. maxframe/dataframe/indexing/set_axis.py +194 -0
  153. maxframe/dataframe/indexing/set_index.py +61 -0
  154. maxframe/dataframe/indexing/setitem.py +130 -0
  155. maxframe/dataframe/indexing/tests/__init__.py +13 -0
  156. maxframe/dataframe/indexing/tests/test_indexing.py +488 -0
  157. maxframe/dataframe/indexing/where.py +308 -0
  158. maxframe/dataframe/initializer.py +288 -0
  159. maxframe/dataframe/merge/__init__.py +32 -0
  160. maxframe/dataframe/merge/append.py +121 -0
  161. maxframe/dataframe/merge/concat.py +325 -0
  162. maxframe/dataframe/merge/merge.py +593 -0
  163. maxframe/dataframe/merge/tests/__init__.py +13 -0
  164. maxframe/dataframe/merge/tests/test_merge.py +215 -0
  165. maxframe/dataframe/misc/__init__.py +134 -0
  166. maxframe/dataframe/misc/_duplicate.py +46 -0
  167. maxframe/dataframe/misc/accessor.py +276 -0
  168. maxframe/dataframe/misc/apply.py +692 -0
  169. maxframe/dataframe/misc/astype.py +236 -0
  170. maxframe/dataframe/misc/case_when.py +141 -0
  171. maxframe/dataframe/misc/check_monotonic.py +84 -0
  172. maxframe/dataframe/misc/cut.py +383 -0
  173. maxframe/dataframe/misc/datetimes.py +79 -0
  174. maxframe/dataframe/misc/describe.py +108 -0
  175. maxframe/dataframe/misc/diff.py +210 -0
  176. maxframe/dataframe/misc/drop.py +440 -0
  177. maxframe/dataframe/misc/drop_duplicates.py +248 -0
  178. maxframe/dataframe/misc/duplicated.py +292 -0
  179. maxframe/dataframe/misc/eval.py +728 -0
  180. maxframe/dataframe/misc/explode.py +171 -0
  181. maxframe/dataframe/misc/get_dummies.py +208 -0
  182. maxframe/dataframe/misc/isin.py +217 -0
  183. maxframe/dataframe/misc/map.py +236 -0
  184. maxframe/dataframe/misc/melt.py +162 -0
  185. maxframe/dataframe/misc/memory_usage.py +248 -0
  186. maxframe/dataframe/misc/pct_change.py +150 -0
  187. maxframe/dataframe/misc/pivot_table.py +262 -0
  188. maxframe/dataframe/misc/qcut.py +104 -0
  189. maxframe/dataframe/misc/select_dtypes.py +104 -0
  190. maxframe/dataframe/misc/shift.py +256 -0
  191. maxframe/dataframe/misc/stack.py +238 -0
  192. maxframe/dataframe/misc/string_.py +221 -0
  193. maxframe/dataframe/misc/tests/__init__.py +13 -0
  194. maxframe/dataframe/misc/tests/test_misc.py +468 -0
  195. maxframe/dataframe/misc/to_numeric.py +178 -0
  196. maxframe/dataframe/misc/transform.py +361 -0
  197. maxframe/dataframe/misc/transpose.py +136 -0
  198. maxframe/dataframe/misc/value_counts.py +182 -0
  199. maxframe/dataframe/missing/__init__.py +53 -0
  200. maxframe/dataframe/missing/checkna.py +223 -0
  201. maxframe/dataframe/missing/dropna.py +280 -0
  202. maxframe/dataframe/missing/fillna.py +275 -0
  203. maxframe/dataframe/missing/replace.py +439 -0
  204. maxframe/dataframe/missing/tests/__init__.py +13 -0
  205. maxframe/dataframe/missing/tests/test_missing.py +89 -0
  206. maxframe/dataframe/operators.py +273 -0
  207. maxframe/dataframe/plotting/__init__.py +40 -0
  208. maxframe/dataframe/plotting/core.py +78 -0
  209. maxframe/dataframe/plotting/tests/__init__.py +13 -0
  210. maxframe/dataframe/plotting/tests/test_plotting.py +136 -0
  211. maxframe/dataframe/reduction/__init__.py +107 -0
  212. maxframe/dataframe/reduction/aggregation.py +344 -0
  213. maxframe/dataframe/reduction/all.py +78 -0
  214. maxframe/dataframe/reduction/any.py +78 -0
  215. maxframe/dataframe/reduction/core.py +837 -0
  216. maxframe/dataframe/reduction/count.py +59 -0
  217. maxframe/dataframe/reduction/cummax.py +30 -0
  218. maxframe/dataframe/reduction/cummin.py +30 -0
  219. maxframe/dataframe/reduction/cumprod.py +30 -0
  220. maxframe/dataframe/reduction/cumsum.py +30 -0
  221. maxframe/dataframe/reduction/custom_reduction.py +42 -0
  222. maxframe/dataframe/reduction/kurtosis.py +104 -0
  223. maxframe/dataframe/reduction/max.py +65 -0
  224. maxframe/dataframe/reduction/mean.py +61 -0
  225. maxframe/dataframe/reduction/min.py +65 -0
  226. maxframe/dataframe/reduction/nunique.py +141 -0
  227. maxframe/dataframe/reduction/prod.py +76 -0
  228. maxframe/dataframe/reduction/reduction_size.py +36 -0
  229. maxframe/dataframe/reduction/sem.py +69 -0
  230. maxframe/dataframe/reduction/skew.py +89 -0
  231. maxframe/dataframe/reduction/std.py +53 -0
  232. maxframe/dataframe/reduction/str_concat.py +48 -0
  233. maxframe/dataframe/reduction/sum.py +77 -0
  234. maxframe/dataframe/reduction/tests/__init__.py +13 -0
  235. maxframe/dataframe/reduction/tests/test_reduction.py +486 -0
  236. maxframe/dataframe/reduction/unique.py +90 -0
  237. maxframe/dataframe/reduction/var.py +72 -0
  238. maxframe/dataframe/sort/__init__.py +34 -0
  239. maxframe/dataframe/sort/core.py +36 -0
  240. maxframe/dataframe/sort/sort_index.py +153 -0
  241. maxframe/dataframe/sort/sort_values.py +311 -0
  242. maxframe/dataframe/sort/tests/__init__.py +13 -0
  243. maxframe/dataframe/sort/tests/test_sort.py +81 -0
  244. maxframe/dataframe/statistics/__init__.py +33 -0
  245. maxframe/dataframe/statistics/corr.py +280 -0
  246. maxframe/dataframe/statistics/quantile.py +341 -0
  247. maxframe/dataframe/statistics/tests/__init__.py +13 -0
  248. maxframe/dataframe/statistics/tests/test_statistics.py +82 -0
  249. maxframe/dataframe/tests/__init__.py +13 -0
  250. maxframe/dataframe/tests/test_initializer.py +29 -0
  251. maxframe/dataframe/tseries/__init__.py +13 -0
  252. maxframe/dataframe/tseries/tests/__init__.py +13 -0
  253. maxframe/dataframe/tseries/tests/test_tseries.py +30 -0
  254. maxframe/dataframe/tseries/to_datetime.py +297 -0
  255. maxframe/dataframe/ufunc/__init__.py +27 -0
  256. maxframe/dataframe/ufunc/tensor.py +54 -0
  257. maxframe/dataframe/ufunc/ufunc.py +52 -0
  258. maxframe/dataframe/utils.py +1267 -0
  259. maxframe/dataframe/window/__init__.py +29 -0
  260. maxframe/dataframe/window/aggregation.py +96 -0
  261. maxframe/dataframe/window/core.py +69 -0
  262. maxframe/dataframe/window/ewm.py +249 -0
  263. maxframe/dataframe/window/expanding.py +147 -0
  264. maxframe/dataframe/window/rolling.py +376 -0
  265. maxframe/dataframe/window/tests/__init__.py +13 -0
  266. maxframe/dataframe/window/tests/test_ewm.py +70 -0
  267. maxframe/dataframe/window/tests/test_expanding.py +66 -0
  268. maxframe/dataframe/window/tests/test_rolling.py +57 -0
  269. maxframe/env.py +33 -0
  270. maxframe/errors.py +21 -0
  271. maxframe/extension.py +81 -0
  272. maxframe/learn/__init__.py +17 -0
  273. maxframe/learn/contrib/__init__.py +17 -0
  274. maxframe/learn/contrib/pytorch/__init__.py +16 -0
  275. maxframe/learn/contrib/pytorch/run_function.py +110 -0
  276. maxframe/learn/contrib/pytorch/run_script.py +102 -0
  277. maxframe/learn/contrib/pytorch/tests/__init__.py +13 -0
  278. maxframe/learn/contrib/pytorch/tests/test_pytorch.py +42 -0
  279. maxframe/learn/contrib/utils.py +52 -0
  280. maxframe/learn/contrib/xgboost/__init__.py +26 -0
  281. maxframe/learn/contrib/xgboost/classifier.py +86 -0
  282. maxframe/learn/contrib/xgboost/core.py +156 -0
  283. maxframe/learn/contrib/xgboost/dmatrix.py +150 -0
  284. maxframe/learn/contrib/xgboost/predict.py +138 -0
  285. maxframe/learn/contrib/xgboost/regressor.py +78 -0
  286. maxframe/learn/contrib/xgboost/tests/__init__.py +13 -0
  287. maxframe/learn/contrib/xgboost/tests/test_core.py +43 -0
  288. maxframe/learn/contrib/xgboost/train.py +121 -0
  289. maxframe/learn/utils/__init__.py +15 -0
  290. maxframe/learn/utils/core.py +29 -0
  291. maxframe/lib/__init__.py +15 -0
  292. maxframe/lib/aio/__init__.py +27 -0
  293. maxframe/lib/aio/_runners.py +162 -0
  294. maxframe/lib/aio/_threads.py +35 -0
  295. maxframe/lib/aio/base.py +82 -0
  296. maxframe/lib/aio/file.py +85 -0
  297. maxframe/lib/aio/isolation.py +100 -0
  298. maxframe/lib/aio/lru.py +242 -0
  299. maxframe/lib/aio/parallelism.py +37 -0
  300. maxframe/lib/aio/tests/__init__.py +13 -0
  301. maxframe/lib/aio/tests/test_aio_file.py +55 -0
  302. maxframe/lib/compression.py +55 -0
  303. maxframe/lib/cython/__init__.py +13 -0
  304. maxframe/lib/cython/libcpp.pxd +30 -0
  305. maxframe/lib/filesystem/__init__.py +21 -0
  306. maxframe/lib/filesystem/_glob.py +173 -0
  307. maxframe/lib/filesystem/_oss_lib/__init__.py +13 -0
  308. maxframe/lib/filesystem/_oss_lib/common.py +198 -0
  309. maxframe/lib/filesystem/_oss_lib/glob.py +147 -0
  310. maxframe/lib/filesystem/_oss_lib/handle.py +156 -0
  311. maxframe/lib/filesystem/arrow.py +236 -0
  312. maxframe/lib/filesystem/base.py +263 -0
  313. maxframe/lib/filesystem/core.py +95 -0
  314. maxframe/lib/filesystem/fsmap.py +164 -0
  315. maxframe/lib/filesystem/hdfs.py +31 -0
  316. maxframe/lib/filesystem/local.py +112 -0
  317. maxframe/lib/filesystem/oss.py +157 -0
  318. maxframe/lib/filesystem/tests/__init__.py +13 -0
  319. maxframe/lib/filesystem/tests/test_filesystem.py +223 -0
  320. maxframe/lib/filesystem/tests/test_oss.py +182 -0
  321. maxframe/lib/functools_compat.py +81 -0
  322. maxframe/lib/mmh3.cpython-38-darwin.so +0 -0
  323. maxframe/lib/mmh3_src/MurmurHash3.cpp +339 -0
  324. maxframe/lib/mmh3_src/MurmurHash3.h +43 -0
  325. maxframe/lib/mmh3_src/mmh3module.cpp +387 -0
  326. maxframe/lib/sparse/__init__.py +861 -0
  327. maxframe/lib/sparse/array.py +1604 -0
  328. maxframe/lib/sparse/core.py +92 -0
  329. maxframe/lib/sparse/matrix.py +241 -0
  330. maxframe/lib/sparse/tests/__init__.py +15 -0
  331. maxframe/lib/sparse/tests/test_sparse.py +476 -0
  332. maxframe/lib/sparse/vector.py +150 -0
  333. maxframe/lib/tblib/LICENSE +20 -0
  334. maxframe/lib/tblib/__init__.py +327 -0
  335. maxframe/lib/tblib/cpython.py +83 -0
  336. maxframe/lib/tblib/decorators.py +44 -0
  337. maxframe/lib/tblib/pickling_support.py +90 -0
  338. maxframe/lib/tests/__init__.py +13 -0
  339. maxframe/lib/tests/test_wrapped_pickle.py +51 -0
  340. maxframe/lib/version.py +620 -0
  341. maxframe/lib/wrapped_pickle.py +139 -0
  342. maxframe/mixin.py +100 -0
  343. maxframe/odpsio/__init__.py +21 -0
  344. maxframe/odpsio/arrow.py +91 -0
  345. maxframe/odpsio/schema.py +364 -0
  346. maxframe/odpsio/tableio.py +322 -0
  347. maxframe/odpsio/tests/__init__.py +13 -0
  348. maxframe/odpsio/tests/test_arrow.py +88 -0
  349. maxframe/odpsio/tests/test_schema.py +297 -0
  350. maxframe/odpsio/tests/test_tableio.py +136 -0
  351. maxframe/odpsio/tests/test_volumeio.py +90 -0
  352. maxframe/odpsio/volumeio.py +95 -0
  353. maxframe/opcodes.py +590 -0
  354. maxframe/protocol.py +415 -0
  355. maxframe/remote/__init__.py +18 -0
  356. maxframe/remote/core.py +210 -0
  357. maxframe/remote/run_script.py +121 -0
  358. maxframe/serialization/__init__.py +26 -0
  359. maxframe/serialization/arrow.py +95 -0
  360. maxframe/serialization/core.cpython-38-darwin.so +0 -0
  361. maxframe/serialization/core.pxd +44 -0
  362. maxframe/serialization/core.pyi +61 -0
  363. maxframe/serialization/core.pyx +1094 -0
  364. maxframe/serialization/exception.py +86 -0
  365. maxframe/serialization/maxframe_objects.py +39 -0
  366. maxframe/serialization/numpy.py +91 -0
  367. maxframe/serialization/pandas.py +202 -0
  368. maxframe/serialization/scipy.py +71 -0
  369. maxframe/serialization/serializables/__init__.py +55 -0
  370. maxframe/serialization/serializables/core.py +262 -0
  371. maxframe/serialization/serializables/field.py +624 -0
  372. maxframe/serialization/serializables/field_type.py +589 -0
  373. maxframe/serialization/serializables/tests/__init__.py +13 -0
  374. maxframe/serialization/serializables/tests/test_field_type.py +121 -0
  375. maxframe/serialization/serializables/tests/test_serializable.py +250 -0
  376. maxframe/serialization/tests/__init__.py +13 -0
  377. maxframe/serialization/tests/test_serial.py +412 -0
  378. maxframe/session.py +1310 -0
  379. maxframe/tensor/__init__.py +183 -0
  380. maxframe/tensor/arithmetic/__init__.py +315 -0
  381. maxframe/tensor/arithmetic/abs.py +68 -0
  382. maxframe/tensor/arithmetic/absolute.py +68 -0
  383. maxframe/tensor/arithmetic/add.py +82 -0
  384. maxframe/tensor/arithmetic/angle.py +72 -0
  385. maxframe/tensor/arithmetic/arccos.py +104 -0
  386. maxframe/tensor/arithmetic/arccosh.py +91 -0
  387. maxframe/tensor/arithmetic/arcsin.py +94 -0
  388. maxframe/tensor/arithmetic/arcsinh.py +86 -0
  389. maxframe/tensor/arithmetic/arctan.py +106 -0
  390. maxframe/tensor/arithmetic/arctan2.py +128 -0
  391. maxframe/tensor/arithmetic/arctanh.py +86 -0
  392. maxframe/tensor/arithmetic/around.py +114 -0
  393. maxframe/tensor/arithmetic/bitand.py +95 -0
  394. maxframe/tensor/arithmetic/bitor.py +102 -0
  395. maxframe/tensor/arithmetic/bitxor.py +95 -0
  396. maxframe/tensor/arithmetic/cbrt.py +66 -0
  397. maxframe/tensor/arithmetic/ceil.py +71 -0
  398. maxframe/tensor/arithmetic/clip.py +165 -0
  399. maxframe/tensor/arithmetic/conj.py +74 -0
  400. maxframe/tensor/arithmetic/copysign.py +78 -0
  401. maxframe/tensor/arithmetic/core.py +544 -0
  402. maxframe/tensor/arithmetic/cos.py +85 -0
  403. maxframe/tensor/arithmetic/cosh.py +72 -0
  404. maxframe/tensor/arithmetic/deg2rad.py +72 -0
  405. maxframe/tensor/arithmetic/degrees.py +77 -0
  406. maxframe/tensor/arithmetic/divide.py +114 -0
  407. maxframe/tensor/arithmetic/equal.py +76 -0
  408. maxframe/tensor/arithmetic/exp.py +106 -0
  409. maxframe/tensor/arithmetic/exp2.py +67 -0
  410. maxframe/tensor/arithmetic/expm1.py +79 -0
  411. maxframe/tensor/arithmetic/fabs.py +74 -0
  412. maxframe/tensor/arithmetic/fix.py +69 -0
  413. maxframe/tensor/arithmetic/float_power.py +103 -0
  414. maxframe/tensor/arithmetic/floor.py +77 -0
  415. maxframe/tensor/arithmetic/floordiv.py +94 -0
  416. maxframe/tensor/arithmetic/fmax.py +105 -0
  417. maxframe/tensor/arithmetic/fmin.py +106 -0
  418. maxframe/tensor/arithmetic/fmod.py +99 -0
  419. maxframe/tensor/arithmetic/frexp.py +92 -0
  420. maxframe/tensor/arithmetic/greater.py +77 -0
  421. maxframe/tensor/arithmetic/greater_equal.py +69 -0
  422. maxframe/tensor/arithmetic/hypot.py +77 -0
  423. maxframe/tensor/arithmetic/i0.py +89 -0
  424. maxframe/tensor/arithmetic/imag.py +67 -0
  425. maxframe/tensor/arithmetic/invert.py +110 -0
  426. maxframe/tensor/arithmetic/isclose.py +115 -0
  427. maxframe/tensor/arithmetic/iscomplex.py +64 -0
  428. maxframe/tensor/arithmetic/isfinite.py +106 -0
  429. maxframe/tensor/arithmetic/isinf.py +103 -0
  430. maxframe/tensor/arithmetic/isnan.py +82 -0
  431. maxframe/tensor/arithmetic/isreal.py +63 -0
  432. maxframe/tensor/arithmetic/ldexp.py +99 -0
  433. maxframe/tensor/arithmetic/less.py +69 -0
  434. maxframe/tensor/arithmetic/less_equal.py +69 -0
  435. maxframe/tensor/arithmetic/log.py +92 -0
  436. maxframe/tensor/arithmetic/log10.py +85 -0
  437. maxframe/tensor/arithmetic/log1p.py +95 -0
  438. maxframe/tensor/arithmetic/log2.py +85 -0
  439. maxframe/tensor/arithmetic/logaddexp.py +80 -0
  440. maxframe/tensor/arithmetic/logaddexp2.py +78 -0
  441. maxframe/tensor/arithmetic/logical_and.py +81 -0
  442. maxframe/tensor/arithmetic/logical_not.py +74 -0
  443. maxframe/tensor/arithmetic/logical_or.py +82 -0
  444. maxframe/tensor/arithmetic/logical_xor.py +88 -0
  445. maxframe/tensor/arithmetic/lshift.py +82 -0
  446. maxframe/tensor/arithmetic/maximum.py +108 -0
  447. maxframe/tensor/arithmetic/minimum.py +108 -0
  448. maxframe/tensor/arithmetic/mod.py +104 -0
  449. maxframe/tensor/arithmetic/modf.py +83 -0
  450. maxframe/tensor/arithmetic/multiply.py +81 -0
  451. maxframe/tensor/arithmetic/nan_to_num.py +99 -0
  452. maxframe/tensor/arithmetic/negative.py +65 -0
  453. maxframe/tensor/arithmetic/nextafter.py +68 -0
  454. maxframe/tensor/arithmetic/not_equal.py +72 -0
  455. maxframe/tensor/arithmetic/positive.py +47 -0
  456. maxframe/tensor/arithmetic/power.py +106 -0
  457. maxframe/tensor/arithmetic/rad2deg.py +71 -0
  458. maxframe/tensor/arithmetic/radians.py +77 -0
  459. maxframe/tensor/arithmetic/real.py +70 -0
  460. maxframe/tensor/arithmetic/reciprocal.py +76 -0
  461. maxframe/tensor/arithmetic/rint.py +68 -0
  462. maxframe/tensor/arithmetic/rshift.py +81 -0
  463. maxframe/tensor/arithmetic/setimag.py +29 -0
  464. maxframe/tensor/arithmetic/setreal.py +29 -0
  465. maxframe/tensor/arithmetic/sign.py +81 -0
  466. maxframe/tensor/arithmetic/signbit.py +65 -0
  467. maxframe/tensor/arithmetic/sin.py +98 -0
  468. maxframe/tensor/arithmetic/sinc.py +102 -0
  469. maxframe/tensor/arithmetic/sinh.py +93 -0
  470. maxframe/tensor/arithmetic/spacing.py +72 -0
  471. maxframe/tensor/arithmetic/sqrt.py +81 -0
  472. maxframe/tensor/arithmetic/square.py +69 -0
  473. maxframe/tensor/arithmetic/subtract.py +81 -0
  474. maxframe/tensor/arithmetic/tan.py +88 -0
  475. maxframe/tensor/arithmetic/tanh.py +92 -0
  476. maxframe/tensor/arithmetic/tests/__init__.py +15 -0
  477. maxframe/tensor/arithmetic/tests/test_arithmetic.py +414 -0
  478. maxframe/tensor/arithmetic/truediv.py +104 -0
  479. maxframe/tensor/arithmetic/trunc.py +72 -0
  480. maxframe/tensor/arithmetic/utils.py +65 -0
  481. maxframe/tensor/array_utils.py +186 -0
  482. maxframe/tensor/base/__init__.py +34 -0
  483. maxframe/tensor/base/astype.py +119 -0
  484. maxframe/tensor/base/atleast_1d.py +74 -0
  485. maxframe/tensor/base/broadcast_to.py +89 -0
  486. maxframe/tensor/base/ravel.py +92 -0
  487. maxframe/tensor/base/tests/__init__.py +13 -0
  488. maxframe/tensor/base/tests/test_base.py +114 -0
  489. maxframe/tensor/base/transpose.py +125 -0
  490. maxframe/tensor/base/unique.py +205 -0
  491. maxframe/tensor/base/where.py +127 -0
  492. maxframe/tensor/core.py +724 -0
  493. maxframe/tensor/datasource/__init__.py +32 -0
  494. maxframe/tensor/datasource/arange.py +156 -0
  495. maxframe/tensor/datasource/array.py +415 -0
  496. maxframe/tensor/datasource/core.py +109 -0
  497. maxframe/tensor/datasource/empty.py +169 -0
  498. maxframe/tensor/datasource/from_dataframe.py +70 -0
  499. maxframe/tensor/datasource/from_dense.py +54 -0
  500. maxframe/tensor/datasource/from_sparse.py +47 -0
  501. maxframe/tensor/datasource/full.py +186 -0
  502. maxframe/tensor/datasource/ones.py +173 -0
  503. maxframe/tensor/datasource/scalar.py +40 -0
  504. maxframe/tensor/datasource/tests/__init__.py +13 -0
  505. maxframe/tensor/datasource/tests/test_datasource.py +278 -0
  506. maxframe/tensor/datasource/zeros.py +188 -0
  507. maxframe/tensor/fetch/__init__.py +15 -0
  508. maxframe/tensor/fetch/core.py +54 -0
  509. maxframe/tensor/indexing/__init__.py +47 -0
  510. maxframe/tensor/indexing/choose.py +196 -0
  511. maxframe/tensor/indexing/compress.py +124 -0
  512. maxframe/tensor/indexing/core.py +190 -0
  513. maxframe/tensor/indexing/extract.py +71 -0
  514. maxframe/tensor/indexing/fill_diagonal.py +183 -0
  515. maxframe/tensor/indexing/flatnonzero.py +60 -0
  516. maxframe/tensor/indexing/getitem.py +175 -0
  517. maxframe/tensor/indexing/nonzero.py +120 -0
  518. maxframe/tensor/indexing/setitem.py +132 -0
  519. maxframe/tensor/indexing/slice.py +29 -0
  520. maxframe/tensor/indexing/take.py +130 -0
  521. maxframe/tensor/indexing/tests/__init__.py +15 -0
  522. maxframe/tensor/indexing/tests/test_indexing.py +234 -0
  523. maxframe/tensor/indexing/unravel_index.py +103 -0
  524. maxframe/tensor/merge/__init__.py +15 -0
  525. maxframe/tensor/merge/stack.py +132 -0
  526. maxframe/tensor/merge/tests/__init__.py +13 -0
  527. maxframe/tensor/merge/tests/test_merge.py +52 -0
  528. maxframe/tensor/operators.py +123 -0
  529. maxframe/tensor/random/__init__.py +168 -0
  530. maxframe/tensor/random/beta.py +87 -0
  531. maxframe/tensor/random/binomial.py +137 -0
  532. maxframe/tensor/random/bytes.py +39 -0
  533. maxframe/tensor/random/chisquare.py +110 -0
  534. maxframe/tensor/random/choice.py +186 -0
  535. maxframe/tensor/random/core.py +234 -0
  536. maxframe/tensor/random/dirichlet.py +123 -0
  537. maxframe/tensor/random/exponential.py +94 -0
  538. maxframe/tensor/random/f.py +135 -0
  539. maxframe/tensor/random/gamma.py +128 -0
  540. maxframe/tensor/random/geometric.py +93 -0
  541. maxframe/tensor/random/gumbel.py +167 -0
  542. maxframe/tensor/random/hypergeometric.py +148 -0
  543. maxframe/tensor/random/laplace.py +133 -0
  544. maxframe/tensor/random/logistic.py +129 -0
  545. maxframe/tensor/random/lognormal.py +159 -0
  546. maxframe/tensor/random/logseries.py +122 -0
  547. maxframe/tensor/random/multinomial.py +133 -0
  548. maxframe/tensor/random/multivariate_normal.py +192 -0
  549. maxframe/tensor/random/negative_binomial.py +125 -0
  550. maxframe/tensor/random/noncentral_chisquare.py +132 -0
  551. maxframe/tensor/random/noncentral_f.py +126 -0
  552. maxframe/tensor/random/normal.py +143 -0
  553. maxframe/tensor/random/pareto.py +140 -0
  554. maxframe/tensor/random/permutation.py +104 -0
  555. maxframe/tensor/random/poisson.py +111 -0
  556. maxframe/tensor/random/power.py +142 -0
  557. maxframe/tensor/random/rand.py +82 -0
  558. maxframe/tensor/random/randint.py +121 -0
  559. maxframe/tensor/random/randn.py +96 -0
  560. maxframe/tensor/random/random_integers.py +123 -0
  561. maxframe/tensor/random/random_sample.py +86 -0
  562. maxframe/tensor/random/rayleigh.py +110 -0
  563. maxframe/tensor/random/shuffle.py +61 -0
  564. maxframe/tensor/random/standard_cauchy.py +105 -0
  565. maxframe/tensor/random/standard_exponential.py +72 -0
  566. maxframe/tensor/random/standard_gamma.py +120 -0
  567. maxframe/tensor/random/standard_normal.py +74 -0
  568. maxframe/tensor/random/standard_t.py +135 -0
  569. maxframe/tensor/random/tests/__init__.py +15 -0
  570. maxframe/tensor/random/tests/test_random.py +167 -0
  571. maxframe/tensor/random/triangular.py +119 -0
  572. maxframe/tensor/random/uniform.py +131 -0
  573. maxframe/tensor/random/vonmises.py +131 -0
  574. maxframe/tensor/random/wald.py +114 -0
  575. maxframe/tensor/random/weibull.py +140 -0
  576. maxframe/tensor/random/zipf.py +122 -0
  577. maxframe/tensor/rechunk/__init__.py +26 -0
  578. maxframe/tensor/rechunk/rechunk.py +43 -0
  579. maxframe/tensor/reduction/__init__.py +66 -0
  580. maxframe/tensor/reduction/all.py +103 -0
  581. maxframe/tensor/reduction/allclose.py +88 -0
  582. maxframe/tensor/reduction/any.py +105 -0
  583. maxframe/tensor/reduction/argmax.py +103 -0
  584. maxframe/tensor/reduction/argmin.py +103 -0
  585. maxframe/tensor/reduction/array_equal.py +64 -0
  586. maxframe/tensor/reduction/core.py +168 -0
  587. maxframe/tensor/reduction/count_nonzero.py +81 -0
  588. maxframe/tensor/reduction/cumprod.py +97 -0
  589. maxframe/tensor/reduction/cumsum.py +101 -0
  590. maxframe/tensor/reduction/max.py +120 -0
  591. maxframe/tensor/reduction/mean.py +123 -0
  592. maxframe/tensor/reduction/min.py +120 -0
  593. maxframe/tensor/reduction/nanargmax.py +82 -0
  594. maxframe/tensor/reduction/nanargmin.py +76 -0
  595. maxframe/tensor/reduction/nancumprod.py +91 -0
  596. maxframe/tensor/reduction/nancumsum.py +94 -0
  597. maxframe/tensor/reduction/nanmax.py +111 -0
  598. maxframe/tensor/reduction/nanmean.py +106 -0
  599. maxframe/tensor/reduction/nanmin.py +111 -0
  600. maxframe/tensor/reduction/nanprod.py +94 -0
  601. maxframe/tensor/reduction/nanstd.py +126 -0
  602. maxframe/tensor/reduction/nansum.py +115 -0
  603. maxframe/tensor/reduction/nanvar.py +149 -0
  604. maxframe/tensor/reduction/prod.py +130 -0
  605. maxframe/tensor/reduction/std.py +134 -0
  606. maxframe/tensor/reduction/sum.py +125 -0
  607. maxframe/tensor/reduction/tests/__init__.py +13 -0
  608. maxframe/tensor/reduction/tests/test_reduction.py +181 -0
  609. maxframe/tensor/reduction/var.py +176 -0
  610. maxframe/tensor/reshape/__init__.py +17 -0
  611. maxframe/tensor/reshape/reshape.py +188 -0
  612. maxframe/tensor/reshape/tests/__init__.py +15 -0
  613. maxframe/tensor/reshape/tests/test_reshape.py +37 -0
  614. maxframe/tensor/statistics/__init__.py +13 -0
  615. maxframe/tensor/statistics/percentile.py +175 -0
  616. maxframe/tensor/statistics/quantile.py +288 -0
  617. maxframe/tensor/ufunc/__init__.py +26 -0
  618. maxframe/tensor/ufunc/ufunc.py +200 -0
  619. maxframe/tensor/utils.py +718 -0
  620. maxframe/tests/__init__.py +13 -0
  621. maxframe/tests/test_codegen.py +69 -0
  622. maxframe/tests/test_protocol.py +144 -0
  623. maxframe/tests/test_utils.py +376 -0
  624. maxframe/tests/utils.py +164 -0
  625. maxframe/typing_.py +37 -0
  626. maxframe/udf.py +134 -0
  627. maxframe/utils.py +1114 -0
  628. maxframe-0.1.0b5.dist-info/METADATA +104 -0
  629. maxframe-0.1.0b5.dist-info/RECORD +647 -0
  630. maxframe-0.1.0b5.dist-info/WHEEL +5 -0
  631. maxframe-0.1.0b5.dist-info/top_level.txt +2 -0
  632. maxframe_client/__init__.py +17 -0
  633. maxframe_client/clients/__init__.py +13 -0
  634. maxframe_client/clients/framedriver.py +118 -0
  635. maxframe_client/clients/spe.py +104 -0
  636. maxframe_client/conftest.py +15 -0
  637. maxframe_client/fetcher.py +264 -0
  638. maxframe_client/session/__init__.py +22 -0
  639. maxframe_client/session/consts.py +36 -0
  640. maxframe_client/session/graph.py +119 -0
  641. maxframe_client/session/odps.py +482 -0
  642. maxframe_client/session/task.py +280 -0
  643. maxframe_client/session/tests/__init__.py +13 -0
  644. maxframe_client/session/tests/test_task.py +85 -0
  645. maxframe_client/tests/__init__.py +13 -0
  646. maxframe_client/tests/test_fetcher.py +89 -0
  647. maxframe_client/tests/test_session.py +255 -0
@@ -0,0 +1,350 @@
1
+ # Copyright 1999-2024 Alibaba Group Holding Ltd.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ import functools
16
+ import logging
17
+ from typing import Callable, Dict
18
+
19
+ import numpy as np
20
+ import pandas as pd
21
+
22
+ from ... import opcodes
23
+ from ...core import ENTITY_TYPE, OutputType
24
+ from ...serialization.serializables import (
25
+ AnyField,
26
+ DictField,
27
+ Int32Field,
28
+ Int64Field,
29
+ ListField,
30
+ StringField,
31
+ )
32
+ from ...utils import lazy_import, pd_release_version
33
+ from ..core import GROUPBY_TYPE
34
+ from ..operators import DataFrameOperator, DataFrameOperatorMixin
35
+ from ..reduction.aggregation import (
36
+ compile_reduction_funcs,
37
+ is_funcs_aggregate,
38
+ normalize_reduction_funcs,
39
+ )
40
+ from ..utils import is_cudf, parse_index
41
+
42
+ cp = lazy_import("cupy", rename="cp")
43
+ cudf = lazy_import("cudf")
44
+
45
+ logger = logging.getLogger(__name__)
46
+ CV_THRESHOLD = 0.2
47
+ MEAN_RATIO_THRESHOLD = 2 / 3
48
+ _support_get_group_without_as_index = pd_release_version[:2] > (1, 0)
49
+
50
+
51
+ class SizeRecorder:
52
+ def __init__(self):
53
+ self._raw_records = []
54
+ self._agg_records = []
55
+
56
+ def record(self, raw_record: int, agg_record: int):
57
+ self._raw_records.append(raw_record)
58
+ self._agg_records.append(agg_record)
59
+
60
+ def get(self):
61
+ return self._raw_records, self._agg_records
62
+
63
+
64
+ _agg_functions = {
65
+ "sum": lambda x: x.sum(),
66
+ "prod": lambda x: x.prod(),
67
+ "product": lambda x: x.product(),
68
+ "min": lambda x: x.min(),
69
+ "max": lambda x: x.max(),
70
+ "all": lambda x: x.all(),
71
+ "any": lambda x: x.any(),
72
+ "count": lambda x: x.count(),
73
+ "size": lambda x: x._reduction_size(),
74
+ "mean": lambda x: x.mean(),
75
+ "var": lambda x, ddof=1: x.var(ddof=ddof),
76
+ "std": lambda x, ddof=1: x.std(ddof=ddof),
77
+ "sem": lambda x, ddof=1: x.sem(ddof=ddof),
78
+ "skew": lambda x, bias=False: x.skew(bias=bias),
79
+ "kurt": lambda x, bias=False: x.kurt(bias=bias),
80
+ "kurtosis": lambda x, bias=False: x.kurtosis(bias=bias),
81
+ "nunique": lambda x: x.nunique(),
82
+ }
83
+ _series_col_name = "col_name"
84
+
85
+
86
+ def _patch_groupby_kurt():
87
+ try:
88
+ from pandas.core.groupby import DataFrameGroupBy, SeriesGroupBy
89
+
90
+ if not hasattr(DataFrameGroupBy, "kurt"): # pragma: no branch
91
+
92
+ def _kurt_by_frame(a, *args, **kwargs):
93
+ data = a.to_frame().kurt(*args, **kwargs).iloc[0]
94
+ if is_cudf(data): # pragma: no cover
95
+ data = data.copy()
96
+ return data
97
+
98
+ def _group_kurt(x, *args, **kwargs):
99
+ if kwargs.get("numeric_only") is not None:
100
+ return x.agg(functools.partial(_kurt_by_frame, *args, **kwargs))
101
+ else:
102
+ return x.agg(functools.partial(pd.Series.kurt, *args, **kwargs))
103
+
104
+ DataFrameGroupBy.kurt = DataFrameGroupBy.kurtosis = _group_kurt
105
+ SeriesGroupBy.kurt = SeriesGroupBy.kurtosis = _group_kurt
106
+ except (AttributeError, ImportError): # pragma: no cover
107
+ pass
108
+
109
+
110
+ _patch_groupby_kurt()
111
+ del _patch_groupby_kurt
112
+
113
+
114
+ def build_mock_agg_result(
115
+ groupby: GROUPBY_TYPE,
116
+ groupby_params: Dict,
117
+ raw_func: Callable,
118
+ **raw_func_kw,
119
+ ):
120
+ try:
121
+ agg_result = groupby.op.build_mock_groupby().aggregate(raw_func, **raw_func_kw)
122
+ except ValueError:
123
+ if (
124
+ groupby_params.get("as_index") or _support_get_group_without_as_index
125
+ ): # pragma: no cover
126
+ raise
127
+ agg_result = (
128
+ groupby.op.build_mock_groupby(as_index=True)
129
+ .aggregate(raw_func, **raw_func_kw)
130
+ .to_frame()
131
+ )
132
+ agg_result.index.names = [None] * agg_result.index.nlevels
133
+ return agg_result
134
+
135
+
136
+ class DataFrameGroupByAgg(DataFrameOperator, DataFrameOperatorMixin):
137
+ _op_type_ = opcodes.GROUPBY_AGG
138
+
139
+ raw_func = AnyField("raw_func")
140
+ raw_func_kw = DictField("raw_func_kw")
141
+ func = AnyField("func")
142
+ func_rename = ListField("func_rename", default=None)
143
+
144
+ raw_groupby_params = DictField("raw_groupby_params")
145
+ groupby_params = DictField("groupby_params")
146
+
147
+ method = StringField("method")
148
+
149
+ # for chunk
150
+ chunk_store_limit = Int64Field("chunk_store_limit")
151
+ pre_funcs = ListField("pre_funcs")
152
+ agg_funcs = ListField("agg_funcs")
153
+ post_funcs = ListField("post_funcs")
154
+ index_levels = Int32Field("index_levels")
155
+ size_recorder_name = StringField("size_recorder_name")
156
+
157
+ def _set_inputs(self, inputs):
158
+ super()._set_inputs(inputs)
159
+ inputs_iter = iter(self._inputs[1:])
160
+ if len(self._inputs) > 1:
161
+ by = []
162
+ for v in self.groupby_params["by"]:
163
+ if isinstance(v, ENTITY_TYPE):
164
+ by.append(next(inputs_iter))
165
+ else:
166
+ by.append(v)
167
+ self.groupby_params["by"] = by
168
+
169
+ def _get_inputs(self, inputs):
170
+ if isinstance(self.groupby_params["by"], list):
171
+ for v in self.groupby_params["by"]:
172
+ if isinstance(v, ENTITY_TYPE):
173
+ inputs.append(v)
174
+ return inputs
175
+
176
+ def _get_index_levels(self, groupby, mock_index):
177
+ if not self.groupby_params["as_index"]:
178
+ try:
179
+ as_index_agg_df = groupby.op.build_mock_groupby(
180
+ as_index=True
181
+ ).aggregate(self.raw_func, **self.raw_func_kw)
182
+ except: # noqa: E722 # nosec # pylint: disable=bare-except
183
+ # handling cases like mdf.groupby("b", as_index=False).b.agg({"c": "count"})
184
+ if isinstance(self.groupby_params["by"], list):
185
+ return len(self.groupby_params["by"])
186
+ raise # pragma: no cover
187
+ pd_index = as_index_agg_df.index
188
+ else:
189
+ pd_index = mock_index
190
+ return 1 if not isinstance(pd_index, pd.MultiIndex) else len(pd_index.levels)
191
+
192
+ def _fix_as_index(self, result_index: pd.Index):
193
+ # make sure if as_index=False takes effect
194
+ if isinstance(result_index, pd.MultiIndex):
195
+ # if MultiIndex, as_index=False definitely takes no effect
196
+ self.groupby_params["as_index"] = True
197
+ elif result_index.name is not None:
198
+ # if not MultiIndex and agg_df.index has a name
199
+ # means as_index=False takes no effect
200
+ self.groupby_params["as_index"] = True
201
+
202
+ def _call_dataframe(self, groupby, input_df):
203
+ compile_reduction_funcs(self, input_df)
204
+ agg_df = build_mock_agg_result(
205
+ groupby, self.groupby_params, self.raw_func, **self.raw_func_kw
206
+ )
207
+
208
+ shape = (np.nan, agg_df.shape[1])
209
+ if isinstance(agg_df.index, pd.RangeIndex):
210
+ index_value = parse_index(
211
+ pd.RangeIndex(-1), groupby.key, groupby.index_value.key
212
+ )
213
+ else:
214
+ index_value = parse_index(
215
+ agg_df.index, groupby.key, groupby.index_value.key
216
+ )
217
+
218
+ # make sure if as_index=False takes effect
219
+ self._fix_as_index(agg_df.index)
220
+
221
+ # determine num of indices to group in intermediate steps
222
+ self.index_levels = self._get_index_levels(groupby, agg_df.index)
223
+
224
+ inputs = self._get_inputs([input_df])
225
+ return self.new_dataframe(
226
+ inputs,
227
+ shape=shape,
228
+ dtypes=agg_df.dtypes,
229
+ index_value=index_value,
230
+ columns_value=parse_index(agg_df.columns, store_data=True),
231
+ )
232
+
233
+ def _call_series(self, groupby, in_series):
234
+ compile_reduction_funcs(self, in_series)
235
+ agg_result = build_mock_agg_result(
236
+ groupby, self.groupby_params, self.raw_func, **self.raw_func_kw
237
+ )
238
+
239
+ # make sure if as_index=False takes effect
240
+ self._fix_as_index(agg_result.index)
241
+
242
+ index_value = parse_index(
243
+ agg_result.index, groupby.key, groupby.index_value.key
244
+ )
245
+
246
+ inputs = self._get_inputs([in_series])
247
+
248
+ # determine num of indices to group in intermediate steps
249
+ self.index_levels = self._get_index_levels(groupby, agg_result.index)
250
+
251
+ # update value type
252
+ if isinstance(agg_result, pd.DataFrame):
253
+ return self.new_dataframe(
254
+ inputs,
255
+ shape=(np.nan, len(agg_result.columns)),
256
+ dtypes=agg_result.dtypes,
257
+ index_value=index_value,
258
+ columns_value=parse_index(agg_result.columns, store_data=True),
259
+ )
260
+ else:
261
+ return self.new_series(
262
+ inputs,
263
+ shape=(np.nan,),
264
+ dtype=agg_result.dtype,
265
+ name=agg_result.name,
266
+ index_value=index_value,
267
+ )
268
+
269
+ def __call__(self, groupby):
270
+ normalize_reduction_funcs(self, ndim=groupby.ndim)
271
+ df = groupby
272
+ while df.op.output_types[0] not in (OutputType.dataframe, OutputType.series):
273
+ df = df.inputs[0]
274
+
275
+ if self.raw_func == "size":
276
+ self.output_types = [OutputType.series]
277
+ else:
278
+ self.output_types = (
279
+ [OutputType.dataframe]
280
+ if groupby.op.output_types[0] == OutputType.dataframe_groupby
281
+ else [OutputType.series]
282
+ )
283
+
284
+ if self.output_types[0] == OutputType.dataframe:
285
+ return self._call_dataframe(groupby, df)
286
+ else:
287
+ return self._call_series(groupby, df)
288
+
289
+
290
+ def agg(groupby, func=None, method="auto", *args, **kwargs):
291
+ """
292
+ Aggregate using one or more operations on grouped data.
293
+
294
+ Parameters
295
+ ----------
296
+ groupby : MaxFrame Groupby
297
+ Groupby data.
298
+ func : str or list-like
299
+ Aggregation functions.
300
+ method : {'auto', 'shuffle', 'tree'}, default 'auto'
301
+ 'tree' method provide a better performance, 'shuffle' is recommended
302
+ if aggregated result is very large, 'auto' will use 'shuffle' method
303
+ in distributed mode and use 'tree' in local mode.
304
+
305
+
306
+ Returns
307
+ -------
308
+ Series or DataFrame
309
+ Aggregated result.
310
+ """
311
+
312
+ # When perform a computation on the grouped data, we won't shuffle
313
+ # the data in the stage of groupby and do shuffle after aggregation.
314
+
315
+ if not isinstance(groupby, GROUPBY_TYPE):
316
+ raise TypeError(f"Input should be type of groupby, not {type(groupby)}")
317
+
318
+ if method is None:
319
+ method = "auto"
320
+ if method not in ["shuffle", "tree", "auto"]:
321
+ raise ValueError(
322
+ f"Method {method} is not available, please specify 'tree' or 'shuffle"
323
+ )
324
+
325
+ if not is_funcs_aggregate(func, ndim=groupby.ndim):
326
+ # pass index to transform, otherwise it will lose name info for index
327
+ agg_result = build_mock_agg_result(
328
+ groupby, groupby.op.groupby_params, func, **kwargs
329
+ )
330
+ if isinstance(agg_result.index, pd.RangeIndex):
331
+ # set -1 to represent unknown size for RangeIndex
332
+ index_value = parse_index(
333
+ pd.RangeIndex(-1), groupby.key, groupby.index_value.key
334
+ )
335
+ else:
336
+ index_value = parse_index(
337
+ agg_result.index, groupby.key, groupby.index_value.key
338
+ )
339
+ return groupby.transform(
340
+ func, *args, _call_agg=True, index=index_value, **kwargs
341
+ )
342
+
343
+ agg_op = DataFrameGroupByAgg(
344
+ raw_func=func,
345
+ raw_func_kw=kwargs,
346
+ method=method,
347
+ raw_groupby_params=groupby.op.groupby_params,
348
+ groupby_params=groupby.op.groupby_params,
349
+ )
350
+ return agg_op(groupby)
@@ -0,0 +1,251 @@
1
+ # Copyright 1999-2024 Alibaba Group Holding Ltd.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ import numpy as np
16
+ import pandas as pd
17
+
18
+ from ... import opcodes
19
+ from ...core import OutputType
20
+ from ...core.operator import OperatorLogicKeyGeneratorMixin
21
+ from ...serialization.serializables import (
22
+ AnyField,
23
+ BoolField,
24
+ DictField,
25
+ FunctionField,
26
+ StringField,
27
+ TupleField,
28
+ )
29
+ from ...utils import get_func_token, quiet_stdio, tokenize
30
+ from ..operators import DataFrameOperator, DataFrameOperatorMixin
31
+ from ..utils import make_dtype, make_dtypes, parse_index, validate_output_types
32
+
33
+
34
+ class GroupByApplyLogicKeyGeneratorMixin(OperatorLogicKeyGeneratorMixin):
35
+ def _get_logic_key_token_values(self):
36
+ token_values = super()._get_logic_key_token_values()
37
+ if self.func:
38
+ return token_values + [get_func_token(self.func)]
39
+ else: # pragma: no cover
40
+ return token_values
41
+
42
+
43
+ class GroupByApply(
44
+ DataFrameOperator, DataFrameOperatorMixin, GroupByApplyLogicKeyGeneratorMixin
45
+ ):
46
+ _op_type_ = opcodes.APPLY
47
+ _op_module_ = "dataframe.groupby"
48
+
49
+ func = FunctionField("func")
50
+ args = TupleField("args", default_factory=tuple)
51
+ kwds = DictField("kwds", default_factory=dict)
52
+ maybe_agg = BoolField("maybe_agg", default=None)
53
+ logic_key = StringField("logic_key", default=None)
54
+ func_key = AnyField("func_key", default=None)
55
+ need_clean_up_func = BoolField("need_clean_up_func", default=False)
56
+
57
+ def __init__(self, output_types=None, **kw):
58
+ super().__init__(_output_types=output_types, **kw)
59
+
60
+ def _update_key(self):
61
+ values = [v for v in self._values_ if v is not self.func] + [
62
+ get_func_token(self.func)
63
+ ]
64
+ self._obj_set("_key", tokenize(type(self).__name__, *values))
65
+ return self
66
+
67
+ def _infer_df_func_returns(
68
+ self, in_groupby, in_df, dtypes=None, dtype=None, name=None, index=None
69
+ ):
70
+ index_value, output_type, new_dtypes = None, None, None
71
+
72
+ if self.output_types is not None and (dtypes is not None or dtype is not None):
73
+ ret_dtypes = dtypes if dtypes is not None else (dtype, name)
74
+ ret_index_value = parse_index(index) if index is not None else None
75
+ return ret_dtypes, ret_index_value
76
+
77
+ try:
78
+ infer_df = in_groupby.op.build_mock_groupby().apply(
79
+ self.func, *self.args, **self.kwds
80
+ )
81
+
82
+ if len(infer_df) <= 2:
83
+ # we create mock df with 4 rows, 2 groups
84
+ # if return df has 2 rows, we assume that
85
+ # it's an aggregation operation
86
+ self.maybe_agg = True
87
+
88
+ # todo return proper index when sort=True is implemented
89
+ index_value = parse_index(infer_df.index[:0], in_df.key, self.func)
90
+
91
+ # for backward compatibility
92
+ dtype = dtype if dtype is not None else dtypes
93
+ if isinstance(infer_df, pd.DataFrame):
94
+ output_type = output_type or OutputType.dataframe
95
+ new_dtypes = new_dtypes or infer_df.dtypes
96
+ elif isinstance(infer_df, pd.Series):
97
+ output_type = output_type or OutputType.series
98
+ new_dtypes = new_dtypes or (
99
+ name or infer_df.name,
100
+ dtype or infer_df.dtype,
101
+ )
102
+ else:
103
+ output_type = OutputType.series
104
+ new_dtypes = (name, dtype or pd.Series(infer_df).dtype)
105
+ except: # noqa: E722 # nosec
106
+ pass
107
+
108
+ self.output_types = (
109
+ [output_type]
110
+ if not self.output_types and output_type
111
+ else self.output_types
112
+ )
113
+ dtypes = new_dtypes if dtypes is None else dtypes
114
+ index_value = index_value if index is None else parse_index(index)
115
+ return dtypes, index_value
116
+
117
+ def __call__(self, groupby, dtypes=None, dtype=None, name=None, index=None):
118
+ in_df = groupby
119
+ if self.output_types and self.output_types[0] == OutputType.df_or_series:
120
+ return self.new_df_or_series([groupby])
121
+ while in_df.op.output_types[0] not in (OutputType.dataframe, OutputType.series):
122
+ in_df = in_df.inputs[0]
123
+
124
+ with quiet_stdio():
125
+ dtypes, index_value = self._infer_df_func_returns(
126
+ groupby, in_df, dtypes, dtype=dtype, name=name, index=index
127
+ )
128
+ if index_value is None:
129
+ index_value = parse_index(None, (in_df.key, in_df.index_value.key))
130
+ for arg, desc in zip((self.output_types, dtypes), ("output_types", "dtypes")):
131
+ if arg is None:
132
+ raise TypeError(
133
+ f"Cannot determine {desc} by calculating with enumerate data, "
134
+ "please specify it as arguments"
135
+ )
136
+
137
+ if self.output_types[0] == OutputType.dataframe:
138
+ new_shape = (np.nan, len(dtypes))
139
+ return self.new_dataframe(
140
+ [groupby],
141
+ shape=new_shape,
142
+ dtypes=dtypes,
143
+ index_value=index_value,
144
+ columns_value=parse_index(dtypes.index, store_data=True),
145
+ )
146
+ else:
147
+ name = name or dtypes[0]
148
+ dtype = dtype or dtypes[1]
149
+ new_shape = (np.nan,)
150
+ return self.new_series(
151
+ [groupby],
152
+ name=name,
153
+ shape=new_shape,
154
+ dtype=dtype,
155
+ index_value=index_value,
156
+ )
157
+
158
+
159
+ def groupby_apply(
160
+ groupby,
161
+ func,
162
+ *args,
163
+ output_type=None,
164
+ dtypes=None,
165
+ dtype=None,
166
+ name=None,
167
+ index=None,
168
+ skip_infer=None,
169
+ **kwargs,
170
+ ):
171
+ """
172
+ Apply function `func` group-wise and combine the results together.
173
+
174
+ The function passed to `apply` must take a dataframe as its first
175
+ argument and return a DataFrame, Series or scalar. `apply` will
176
+ then take care of combining the results back together into a single
177
+ dataframe or series. `apply` is therefore a highly flexible
178
+ grouping method.
179
+
180
+ While `apply` is a very flexible method, its downside is that
181
+ using it can be quite a bit slower than using more specific methods
182
+ like `agg` or `transform`. Pandas offers a wide range of method that will
183
+ be much faster than using `apply` for their specific purposes, so try to
184
+ use them before reaching for `apply`.
185
+
186
+ Parameters
187
+ ----------
188
+ func : callable
189
+ A callable that takes a dataframe as its first argument, and
190
+ returns a dataframe, a series or a scalar. In addition the
191
+ callable may take positional and keyword arguments.
192
+
193
+ output_type : {'dataframe', 'series'}, default None
194
+ Specify type of returned object. See `Notes` for more details.
195
+
196
+ dtypes : Series, default None
197
+ Specify dtypes of returned DataFrames. See `Notes` for more details.
198
+
199
+ dtype : numpy.dtype, default None
200
+ Specify dtype of returned Series. See `Notes` for more details.
201
+
202
+ name : str, default None
203
+ Specify name of returned Series. See `Notes` for more details.
204
+
205
+ index : Index, default None
206
+ Specify index of returned object. See `Notes` for more details.
207
+
208
+ skip_infer: bool, default False
209
+ Whether infer dtypes when dtypes or output_type is not specified.
210
+
211
+ args, kwargs : tuple and dict
212
+ Optional positional and keyword arguments to pass to `func`.
213
+
214
+ Returns
215
+ -------
216
+ applied : Series or DataFrame
217
+
218
+ See Also
219
+ --------
220
+ pipe : Apply function to the full GroupBy object instead of to each
221
+ group.
222
+ aggregate : Apply aggregate function to the GroupBy object.
223
+ transform : Apply function column-by-column to the GroupBy object.
224
+ Series.apply : Apply a function to a Series.
225
+ DataFrame.apply : Apply a function to each row or column of a DataFrame.
226
+
227
+ Notes
228
+ -----
229
+ When deciding output dtypes and shape of the return value, MaxFrame will
230
+ try applying ``func`` onto a mock grouped object, and the apply call
231
+ may fail. When this happens, you need to specify the type of apply
232
+ call (DataFrame or Series) in output_type.
233
+
234
+ * For DataFrame output, you need to specify a list or a pandas Series
235
+ as ``dtypes`` of output DataFrame. ``index`` of output can also be
236
+ specified.
237
+ * For Series output, you need to specify ``dtype`` and ``name`` of
238
+ output Series.
239
+ """
240
+ output_types = kwargs.pop("output_types", None)
241
+ object_type = kwargs.pop("object_type", None)
242
+ output_types = validate_output_types(
243
+ output_types=output_types, output_type=output_type, object_type=object_type
244
+ )
245
+ if output_types is None and skip_infer:
246
+ output_types = [OutputType.df_or_series]
247
+
248
+ dtypes = make_dtypes(dtypes)
249
+ dtype = make_dtype(dtype)
250
+ op = GroupByApply(func=func, args=args, kwds=kwargs, output_types=output_types)
251
+ return op(groupby, dtypes=dtypes, dtype=dtype, name=name, index=index)