maxframe 0.1.0b5__cp39-cp39-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of maxframe might be problematic. Click here for more details.

Files changed (647) hide show
  1. maxframe/__init__.py +32 -0
  2. maxframe/_utils.cpython-39-darwin.so +0 -0
  3. maxframe/_utils.pxd +33 -0
  4. maxframe/_utils.pyx +547 -0
  5. maxframe/codegen.py +528 -0
  6. maxframe/config/__init__.py +15 -0
  7. maxframe/config/config.py +443 -0
  8. maxframe/config/tests/__init__.py +13 -0
  9. maxframe/config/tests/test_config.py +103 -0
  10. maxframe/config/tests/test_validators.py +34 -0
  11. maxframe/config/validators.py +57 -0
  12. maxframe/conftest.py +139 -0
  13. maxframe/core/__init__.py +65 -0
  14. maxframe/core/base.py +156 -0
  15. maxframe/core/entity/__init__.py +44 -0
  16. maxframe/core/entity/chunks.py +68 -0
  17. maxframe/core/entity/core.py +152 -0
  18. maxframe/core/entity/executable.py +337 -0
  19. maxframe/core/entity/fuse.py +73 -0
  20. maxframe/core/entity/objects.py +100 -0
  21. maxframe/core/entity/output_types.py +90 -0
  22. maxframe/core/entity/tileables.py +438 -0
  23. maxframe/core/entity/utils.py +24 -0
  24. maxframe/core/graph/__init__.py +17 -0
  25. maxframe/core/graph/builder/__init__.py +16 -0
  26. maxframe/core/graph/builder/base.py +86 -0
  27. maxframe/core/graph/builder/chunk.py +430 -0
  28. maxframe/core/graph/builder/tileable.py +34 -0
  29. maxframe/core/graph/builder/utils.py +41 -0
  30. maxframe/core/graph/core.cpython-39-darwin.so +0 -0
  31. maxframe/core/graph/core.pyx +467 -0
  32. maxframe/core/graph/entity.py +171 -0
  33. maxframe/core/graph/tests/__init__.py +13 -0
  34. maxframe/core/graph/tests/test_graph.py +205 -0
  35. maxframe/core/mode.py +96 -0
  36. maxframe/core/operator/__init__.py +34 -0
  37. maxframe/core/operator/base.py +450 -0
  38. maxframe/core/operator/core.py +276 -0
  39. maxframe/core/operator/fetch.py +53 -0
  40. maxframe/core/operator/fuse.py +29 -0
  41. maxframe/core/operator/objects.py +72 -0
  42. maxframe/core/operator/shuffle.py +111 -0
  43. maxframe/core/operator/tests/__init__.py +13 -0
  44. maxframe/core/operator/tests/test_core.py +64 -0
  45. maxframe/core/tests/__init__.py +13 -0
  46. maxframe/core/tests/test_mode.py +75 -0
  47. maxframe/dataframe/__init__.py +81 -0
  48. maxframe/dataframe/arithmetic/__init__.py +359 -0
  49. maxframe/dataframe/arithmetic/abs.py +33 -0
  50. maxframe/dataframe/arithmetic/add.py +60 -0
  51. maxframe/dataframe/arithmetic/arccos.py +28 -0
  52. maxframe/dataframe/arithmetic/arccosh.py +28 -0
  53. maxframe/dataframe/arithmetic/arcsin.py +28 -0
  54. maxframe/dataframe/arithmetic/arcsinh.py +28 -0
  55. maxframe/dataframe/arithmetic/arctan.py +28 -0
  56. maxframe/dataframe/arithmetic/arctanh.py +28 -0
  57. maxframe/dataframe/arithmetic/around.py +152 -0
  58. maxframe/dataframe/arithmetic/bitwise_and.py +46 -0
  59. maxframe/dataframe/arithmetic/bitwise_or.py +50 -0
  60. maxframe/dataframe/arithmetic/bitwise_xor.py +46 -0
  61. maxframe/dataframe/arithmetic/ceil.py +28 -0
  62. maxframe/dataframe/arithmetic/core.py +342 -0
  63. maxframe/dataframe/arithmetic/cos.py +28 -0
  64. maxframe/dataframe/arithmetic/cosh.py +28 -0
  65. maxframe/dataframe/arithmetic/degrees.py +28 -0
  66. maxframe/dataframe/arithmetic/docstring.py +442 -0
  67. maxframe/dataframe/arithmetic/equal.py +56 -0
  68. maxframe/dataframe/arithmetic/exp.py +28 -0
  69. maxframe/dataframe/arithmetic/exp2.py +28 -0
  70. maxframe/dataframe/arithmetic/expm1.py +28 -0
  71. maxframe/dataframe/arithmetic/floor.py +28 -0
  72. maxframe/dataframe/arithmetic/floordiv.py +64 -0
  73. maxframe/dataframe/arithmetic/greater.py +57 -0
  74. maxframe/dataframe/arithmetic/greater_equal.py +57 -0
  75. maxframe/dataframe/arithmetic/invert.py +33 -0
  76. maxframe/dataframe/arithmetic/is_ufuncs.py +62 -0
  77. maxframe/dataframe/arithmetic/less.py +57 -0
  78. maxframe/dataframe/arithmetic/less_equal.py +57 -0
  79. maxframe/dataframe/arithmetic/log.py +28 -0
  80. maxframe/dataframe/arithmetic/log10.py +28 -0
  81. maxframe/dataframe/arithmetic/log2.py +28 -0
  82. maxframe/dataframe/arithmetic/mod.py +60 -0
  83. maxframe/dataframe/arithmetic/multiply.py +60 -0
  84. maxframe/dataframe/arithmetic/negative.py +33 -0
  85. maxframe/dataframe/arithmetic/not_equal.py +56 -0
  86. maxframe/dataframe/arithmetic/power.py +68 -0
  87. maxframe/dataframe/arithmetic/radians.py +28 -0
  88. maxframe/dataframe/arithmetic/sin.py +28 -0
  89. maxframe/dataframe/arithmetic/sinh.py +28 -0
  90. maxframe/dataframe/arithmetic/sqrt.py +28 -0
  91. maxframe/dataframe/arithmetic/subtract.py +64 -0
  92. maxframe/dataframe/arithmetic/tan.py +28 -0
  93. maxframe/dataframe/arithmetic/tanh.py +28 -0
  94. maxframe/dataframe/arithmetic/tests/__init__.py +13 -0
  95. maxframe/dataframe/arithmetic/tests/test_arithmetic.py +695 -0
  96. maxframe/dataframe/arithmetic/truediv.py +64 -0
  97. maxframe/dataframe/arithmetic/trunc.py +28 -0
  98. maxframe/dataframe/arrays.py +864 -0
  99. maxframe/dataframe/core.py +2417 -0
  100. maxframe/dataframe/datasource/__init__.py +15 -0
  101. maxframe/dataframe/datasource/core.py +81 -0
  102. maxframe/dataframe/datasource/dataframe.py +59 -0
  103. maxframe/dataframe/datasource/date_range.py +504 -0
  104. maxframe/dataframe/datasource/from_index.py +54 -0
  105. maxframe/dataframe/datasource/from_records.py +107 -0
  106. maxframe/dataframe/datasource/from_tensor.py +419 -0
  107. maxframe/dataframe/datasource/index.py +117 -0
  108. maxframe/dataframe/datasource/read_csv.py +528 -0
  109. maxframe/dataframe/datasource/read_odps_query.py +299 -0
  110. maxframe/dataframe/datasource/read_odps_table.py +253 -0
  111. maxframe/dataframe/datasource/read_parquet.py +421 -0
  112. maxframe/dataframe/datasource/series.py +55 -0
  113. maxframe/dataframe/datasource/tests/__init__.py +13 -0
  114. maxframe/dataframe/datasource/tests/test_datasource.py +401 -0
  115. maxframe/dataframe/datastore/__init__.py +26 -0
  116. maxframe/dataframe/datastore/core.py +19 -0
  117. maxframe/dataframe/datastore/to_csv.py +227 -0
  118. maxframe/dataframe/datastore/to_odps.py +162 -0
  119. maxframe/dataframe/extensions/__init__.py +41 -0
  120. maxframe/dataframe/extensions/accessor.py +50 -0
  121. maxframe/dataframe/extensions/reshuffle.py +83 -0
  122. maxframe/dataframe/extensions/tests/__init__.py +13 -0
  123. maxframe/dataframe/extensions/tests/test_extensions.py +38 -0
  124. maxframe/dataframe/fetch/__init__.py +15 -0
  125. maxframe/dataframe/fetch/core.py +86 -0
  126. maxframe/dataframe/groupby/__init__.py +82 -0
  127. maxframe/dataframe/groupby/aggregation.py +350 -0
  128. maxframe/dataframe/groupby/apply.py +251 -0
  129. maxframe/dataframe/groupby/core.py +179 -0
  130. maxframe/dataframe/groupby/cum.py +124 -0
  131. maxframe/dataframe/groupby/fill.py +141 -0
  132. maxframe/dataframe/groupby/getitem.py +92 -0
  133. maxframe/dataframe/groupby/head.py +105 -0
  134. maxframe/dataframe/groupby/sample.py +214 -0
  135. maxframe/dataframe/groupby/tests/__init__.py +13 -0
  136. maxframe/dataframe/groupby/tests/test_groupby.py +374 -0
  137. maxframe/dataframe/groupby/transform.py +255 -0
  138. maxframe/dataframe/indexing/__init__.py +84 -0
  139. maxframe/dataframe/indexing/add_prefix_suffix.py +110 -0
  140. maxframe/dataframe/indexing/align.py +349 -0
  141. maxframe/dataframe/indexing/at.py +83 -0
  142. maxframe/dataframe/indexing/getitem.py +204 -0
  143. maxframe/dataframe/indexing/iat.py +37 -0
  144. maxframe/dataframe/indexing/iloc.py +566 -0
  145. maxframe/dataframe/indexing/insert.py +86 -0
  146. maxframe/dataframe/indexing/loc.py +411 -0
  147. maxframe/dataframe/indexing/reindex.py +526 -0
  148. maxframe/dataframe/indexing/rename.py +462 -0
  149. maxframe/dataframe/indexing/rename_axis.py +209 -0
  150. maxframe/dataframe/indexing/reset_index.py +402 -0
  151. maxframe/dataframe/indexing/sample.py +221 -0
  152. maxframe/dataframe/indexing/set_axis.py +194 -0
  153. maxframe/dataframe/indexing/set_index.py +61 -0
  154. maxframe/dataframe/indexing/setitem.py +130 -0
  155. maxframe/dataframe/indexing/tests/__init__.py +13 -0
  156. maxframe/dataframe/indexing/tests/test_indexing.py +488 -0
  157. maxframe/dataframe/indexing/where.py +308 -0
  158. maxframe/dataframe/initializer.py +288 -0
  159. maxframe/dataframe/merge/__init__.py +32 -0
  160. maxframe/dataframe/merge/append.py +121 -0
  161. maxframe/dataframe/merge/concat.py +325 -0
  162. maxframe/dataframe/merge/merge.py +593 -0
  163. maxframe/dataframe/merge/tests/__init__.py +13 -0
  164. maxframe/dataframe/merge/tests/test_merge.py +215 -0
  165. maxframe/dataframe/misc/__init__.py +134 -0
  166. maxframe/dataframe/misc/_duplicate.py +46 -0
  167. maxframe/dataframe/misc/accessor.py +276 -0
  168. maxframe/dataframe/misc/apply.py +692 -0
  169. maxframe/dataframe/misc/astype.py +236 -0
  170. maxframe/dataframe/misc/case_when.py +141 -0
  171. maxframe/dataframe/misc/check_monotonic.py +84 -0
  172. maxframe/dataframe/misc/cut.py +383 -0
  173. maxframe/dataframe/misc/datetimes.py +79 -0
  174. maxframe/dataframe/misc/describe.py +108 -0
  175. maxframe/dataframe/misc/diff.py +210 -0
  176. maxframe/dataframe/misc/drop.py +440 -0
  177. maxframe/dataframe/misc/drop_duplicates.py +248 -0
  178. maxframe/dataframe/misc/duplicated.py +292 -0
  179. maxframe/dataframe/misc/eval.py +728 -0
  180. maxframe/dataframe/misc/explode.py +171 -0
  181. maxframe/dataframe/misc/get_dummies.py +208 -0
  182. maxframe/dataframe/misc/isin.py +217 -0
  183. maxframe/dataframe/misc/map.py +236 -0
  184. maxframe/dataframe/misc/melt.py +162 -0
  185. maxframe/dataframe/misc/memory_usage.py +248 -0
  186. maxframe/dataframe/misc/pct_change.py +150 -0
  187. maxframe/dataframe/misc/pivot_table.py +262 -0
  188. maxframe/dataframe/misc/qcut.py +104 -0
  189. maxframe/dataframe/misc/select_dtypes.py +104 -0
  190. maxframe/dataframe/misc/shift.py +256 -0
  191. maxframe/dataframe/misc/stack.py +238 -0
  192. maxframe/dataframe/misc/string_.py +221 -0
  193. maxframe/dataframe/misc/tests/__init__.py +13 -0
  194. maxframe/dataframe/misc/tests/test_misc.py +468 -0
  195. maxframe/dataframe/misc/to_numeric.py +178 -0
  196. maxframe/dataframe/misc/transform.py +361 -0
  197. maxframe/dataframe/misc/transpose.py +136 -0
  198. maxframe/dataframe/misc/value_counts.py +182 -0
  199. maxframe/dataframe/missing/__init__.py +53 -0
  200. maxframe/dataframe/missing/checkna.py +223 -0
  201. maxframe/dataframe/missing/dropna.py +280 -0
  202. maxframe/dataframe/missing/fillna.py +275 -0
  203. maxframe/dataframe/missing/replace.py +439 -0
  204. maxframe/dataframe/missing/tests/__init__.py +13 -0
  205. maxframe/dataframe/missing/tests/test_missing.py +89 -0
  206. maxframe/dataframe/operators.py +273 -0
  207. maxframe/dataframe/plotting/__init__.py +40 -0
  208. maxframe/dataframe/plotting/core.py +78 -0
  209. maxframe/dataframe/plotting/tests/__init__.py +13 -0
  210. maxframe/dataframe/plotting/tests/test_plotting.py +136 -0
  211. maxframe/dataframe/reduction/__init__.py +107 -0
  212. maxframe/dataframe/reduction/aggregation.py +344 -0
  213. maxframe/dataframe/reduction/all.py +78 -0
  214. maxframe/dataframe/reduction/any.py +78 -0
  215. maxframe/dataframe/reduction/core.py +837 -0
  216. maxframe/dataframe/reduction/count.py +59 -0
  217. maxframe/dataframe/reduction/cummax.py +30 -0
  218. maxframe/dataframe/reduction/cummin.py +30 -0
  219. maxframe/dataframe/reduction/cumprod.py +30 -0
  220. maxframe/dataframe/reduction/cumsum.py +30 -0
  221. maxframe/dataframe/reduction/custom_reduction.py +42 -0
  222. maxframe/dataframe/reduction/kurtosis.py +104 -0
  223. maxframe/dataframe/reduction/max.py +65 -0
  224. maxframe/dataframe/reduction/mean.py +61 -0
  225. maxframe/dataframe/reduction/min.py +65 -0
  226. maxframe/dataframe/reduction/nunique.py +141 -0
  227. maxframe/dataframe/reduction/prod.py +76 -0
  228. maxframe/dataframe/reduction/reduction_size.py +36 -0
  229. maxframe/dataframe/reduction/sem.py +69 -0
  230. maxframe/dataframe/reduction/skew.py +89 -0
  231. maxframe/dataframe/reduction/std.py +53 -0
  232. maxframe/dataframe/reduction/str_concat.py +48 -0
  233. maxframe/dataframe/reduction/sum.py +77 -0
  234. maxframe/dataframe/reduction/tests/__init__.py +13 -0
  235. maxframe/dataframe/reduction/tests/test_reduction.py +486 -0
  236. maxframe/dataframe/reduction/unique.py +90 -0
  237. maxframe/dataframe/reduction/var.py +72 -0
  238. maxframe/dataframe/sort/__init__.py +34 -0
  239. maxframe/dataframe/sort/core.py +36 -0
  240. maxframe/dataframe/sort/sort_index.py +153 -0
  241. maxframe/dataframe/sort/sort_values.py +311 -0
  242. maxframe/dataframe/sort/tests/__init__.py +13 -0
  243. maxframe/dataframe/sort/tests/test_sort.py +81 -0
  244. maxframe/dataframe/statistics/__init__.py +33 -0
  245. maxframe/dataframe/statistics/corr.py +280 -0
  246. maxframe/dataframe/statistics/quantile.py +341 -0
  247. maxframe/dataframe/statistics/tests/__init__.py +13 -0
  248. maxframe/dataframe/statistics/tests/test_statistics.py +82 -0
  249. maxframe/dataframe/tests/__init__.py +13 -0
  250. maxframe/dataframe/tests/test_initializer.py +29 -0
  251. maxframe/dataframe/tseries/__init__.py +13 -0
  252. maxframe/dataframe/tseries/tests/__init__.py +13 -0
  253. maxframe/dataframe/tseries/tests/test_tseries.py +30 -0
  254. maxframe/dataframe/tseries/to_datetime.py +297 -0
  255. maxframe/dataframe/ufunc/__init__.py +27 -0
  256. maxframe/dataframe/ufunc/tensor.py +54 -0
  257. maxframe/dataframe/ufunc/ufunc.py +52 -0
  258. maxframe/dataframe/utils.py +1267 -0
  259. maxframe/dataframe/window/__init__.py +29 -0
  260. maxframe/dataframe/window/aggregation.py +96 -0
  261. maxframe/dataframe/window/core.py +69 -0
  262. maxframe/dataframe/window/ewm.py +249 -0
  263. maxframe/dataframe/window/expanding.py +147 -0
  264. maxframe/dataframe/window/rolling.py +376 -0
  265. maxframe/dataframe/window/tests/__init__.py +13 -0
  266. maxframe/dataframe/window/tests/test_ewm.py +70 -0
  267. maxframe/dataframe/window/tests/test_expanding.py +66 -0
  268. maxframe/dataframe/window/tests/test_rolling.py +57 -0
  269. maxframe/env.py +33 -0
  270. maxframe/errors.py +21 -0
  271. maxframe/extension.py +81 -0
  272. maxframe/learn/__init__.py +17 -0
  273. maxframe/learn/contrib/__init__.py +17 -0
  274. maxframe/learn/contrib/pytorch/__init__.py +16 -0
  275. maxframe/learn/contrib/pytorch/run_function.py +110 -0
  276. maxframe/learn/contrib/pytorch/run_script.py +102 -0
  277. maxframe/learn/contrib/pytorch/tests/__init__.py +13 -0
  278. maxframe/learn/contrib/pytorch/tests/test_pytorch.py +42 -0
  279. maxframe/learn/contrib/utils.py +52 -0
  280. maxframe/learn/contrib/xgboost/__init__.py +26 -0
  281. maxframe/learn/contrib/xgboost/classifier.py +86 -0
  282. maxframe/learn/contrib/xgboost/core.py +156 -0
  283. maxframe/learn/contrib/xgboost/dmatrix.py +150 -0
  284. maxframe/learn/contrib/xgboost/predict.py +138 -0
  285. maxframe/learn/contrib/xgboost/regressor.py +78 -0
  286. maxframe/learn/contrib/xgboost/tests/__init__.py +13 -0
  287. maxframe/learn/contrib/xgboost/tests/test_core.py +43 -0
  288. maxframe/learn/contrib/xgboost/train.py +121 -0
  289. maxframe/learn/utils/__init__.py +15 -0
  290. maxframe/learn/utils/core.py +29 -0
  291. maxframe/lib/__init__.py +15 -0
  292. maxframe/lib/aio/__init__.py +27 -0
  293. maxframe/lib/aio/_runners.py +162 -0
  294. maxframe/lib/aio/_threads.py +35 -0
  295. maxframe/lib/aio/base.py +82 -0
  296. maxframe/lib/aio/file.py +85 -0
  297. maxframe/lib/aio/isolation.py +100 -0
  298. maxframe/lib/aio/lru.py +242 -0
  299. maxframe/lib/aio/parallelism.py +37 -0
  300. maxframe/lib/aio/tests/__init__.py +13 -0
  301. maxframe/lib/aio/tests/test_aio_file.py +55 -0
  302. maxframe/lib/compression.py +55 -0
  303. maxframe/lib/cython/__init__.py +13 -0
  304. maxframe/lib/cython/libcpp.pxd +30 -0
  305. maxframe/lib/filesystem/__init__.py +21 -0
  306. maxframe/lib/filesystem/_glob.py +173 -0
  307. maxframe/lib/filesystem/_oss_lib/__init__.py +13 -0
  308. maxframe/lib/filesystem/_oss_lib/common.py +198 -0
  309. maxframe/lib/filesystem/_oss_lib/glob.py +147 -0
  310. maxframe/lib/filesystem/_oss_lib/handle.py +156 -0
  311. maxframe/lib/filesystem/arrow.py +236 -0
  312. maxframe/lib/filesystem/base.py +263 -0
  313. maxframe/lib/filesystem/core.py +95 -0
  314. maxframe/lib/filesystem/fsmap.py +164 -0
  315. maxframe/lib/filesystem/hdfs.py +31 -0
  316. maxframe/lib/filesystem/local.py +112 -0
  317. maxframe/lib/filesystem/oss.py +157 -0
  318. maxframe/lib/filesystem/tests/__init__.py +13 -0
  319. maxframe/lib/filesystem/tests/test_filesystem.py +223 -0
  320. maxframe/lib/filesystem/tests/test_oss.py +182 -0
  321. maxframe/lib/functools_compat.py +81 -0
  322. maxframe/lib/mmh3.cpython-39-darwin.so +0 -0
  323. maxframe/lib/mmh3_src/MurmurHash3.cpp +339 -0
  324. maxframe/lib/mmh3_src/MurmurHash3.h +43 -0
  325. maxframe/lib/mmh3_src/mmh3module.cpp +387 -0
  326. maxframe/lib/sparse/__init__.py +861 -0
  327. maxframe/lib/sparse/array.py +1604 -0
  328. maxframe/lib/sparse/core.py +92 -0
  329. maxframe/lib/sparse/matrix.py +241 -0
  330. maxframe/lib/sparse/tests/__init__.py +15 -0
  331. maxframe/lib/sparse/tests/test_sparse.py +476 -0
  332. maxframe/lib/sparse/vector.py +150 -0
  333. maxframe/lib/tblib/LICENSE +20 -0
  334. maxframe/lib/tblib/__init__.py +327 -0
  335. maxframe/lib/tblib/cpython.py +83 -0
  336. maxframe/lib/tblib/decorators.py +44 -0
  337. maxframe/lib/tblib/pickling_support.py +90 -0
  338. maxframe/lib/tests/__init__.py +13 -0
  339. maxframe/lib/tests/test_wrapped_pickle.py +51 -0
  340. maxframe/lib/version.py +620 -0
  341. maxframe/lib/wrapped_pickle.py +139 -0
  342. maxframe/mixin.py +100 -0
  343. maxframe/odpsio/__init__.py +21 -0
  344. maxframe/odpsio/arrow.py +91 -0
  345. maxframe/odpsio/schema.py +364 -0
  346. maxframe/odpsio/tableio.py +322 -0
  347. maxframe/odpsio/tests/__init__.py +13 -0
  348. maxframe/odpsio/tests/test_arrow.py +88 -0
  349. maxframe/odpsio/tests/test_schema.py +297 -0
  350. maxframe/odpsio/tests/test_tableio.py +136 -0
  351. maxframe/odpsio/tests/test_volumeio.py +90 -0
  352. maxframe/odpsio/volumeio.py +95 -0
  353. maxframe/opcodes.py +590 -0
  354. maxframe/protocol.py +415 -0
  355. maxframe/remote/__init__.py +18 -0
  356. maxframe/remote/core.py +210 -0
  357. maxframe/remote/run_script.py +121 -0
  358. maxframe/serialization/__init__.py +26 -0
  359. maxframe/serialization/arrow.py +95 -0
  360. maxframe/serialization/core.cpython-39-darwin.so +0 -0
  361. maxframe/serialization/core.pxd +44 -0
  362. maxframe/serialization/core.pyi +61 -0
  363. maxframe/serialization/core.pyx +1094 -0
  364. maxframe/serialization/exception.py +86 -0
  365. maxframe/serialization/maxframe_objects.py +39 -0
  366. maxframe/serialization/numpy.py +91 -0
  367. maxframe/serialization/pandas.py +202 -0
  368. maxframe/serialization/scipy.py +71 -0
  369. maxframe/serialization/serializables/__init__.py +55 -0
  370. maxframe/serialization/serializables/core.py +262 -0
  371. maxframe/serialization/serializables/field.py +624 -0
  372. maxframe/serialization/serializables/field_type.py +589 -0
  373. maxframe/serialization/serializables/tests/__init__.py +13 -0
  374. maxframe/serialization/serializables/tests/test_field_type.py +121 -0
  375. maxframe/serialization/serializables/tests/test_serializable.py +250 -0
  376. maxframe/serialization/tests/__init__.py +13 -0
  377. maxframe/serialization/tests/test_serial.py +412 -0
  378. maxframe/session.py +1310 -0
  379. maxframe/tensor/__init__.py +183 -0
  380. maxframe/tensor/arithmetic/__init__.py +315 -0
  381. maxframe/tensor/arithmetic/abs.py +68 -0
  382. maxframe/tensor/arithmetic/absolute.py +68 -0
  383. maxframe/tensor/arithmetic/add.py +82 -0
  384. maxframe/tensor/arithmetic/angle.py +72 -0
  385. maxframe/tensor/arithmetic/arccos.py +104 -0
  386. maxframe/tensor/arithmetic/arccosh.py +91 -0
  387. maxframe/tensor/arithmetic/arcsin.py +94 -0
  388. maxframe/tensor/arithmetic/arcsinh.py +86 -0
  389. maxframe/tensor/arithmetic/arctan.py +106 -0
  390. maxframe/tensor/arithmetic/arctan2.py +128 -0
  391. maxframe/tensor/arithmetic/arctanh.py +86 -0
  392. maxframe/tensor/arithmetic/around.py +114 -0
  393. maxframe/tensor/arithmetic/bitand.py +95 -0
  394. maxframe/tensor/arithmetic/bitor.py +102 -0
  395. maxframe/tensor/arithmetic/bitxor.py +95 -0
  396. maxframe/tensor/arithmetic/cbrt.py +66 -0
  397. maxframe/tensor/arithmetic/ceil.py +71 -0
  398. maxframe/tensor/arithmetic/clip.py +165 -0
  399. maxframe/tensor/arithmetic/conj.py +74 -0
  400. maxframe/tensor/arithmetic/copysign.py +78 -0
  401. maxframe/tensor/arithmetic/core.py +544 -0
  402. maxframe/tensor/arithmetic/cos.py +85 -0
  403. maxframe/tensor/arithmetic/cosh.py +72 -0
  404. maxframe/tensor/arithmetic/deg2rad.py +72 -0
  405. maxframe/tensor/arithmetic/degrees.py +77 -0
  406. maxframe/tensor/arithmetic/divide.py +114 -0
  407. maxframe/tensor/arithmetic/equal.py +76 -0
  408. maxframe/tensor/arithmetic/exp.py +106 -0
  409. maxframe/tensor/arithmetic/exp2.py +67 -0
  410. maxframe/tensor/arithmetic/expm1.py +79 -0
  411. maxframe/tensor/arithmetic/fabs.py +74 -0
  412. maxframe/tensor/arithmetic/fix.py +69 -0
  413. maxframe/tensor/arithmetic/float_power.py +103 -0
  414. maxframe/tensor/arithmetic/floor.py +77 -0
  415. maxframe/tensor/arithmetic/floordiv.py +94 -0
  416. maxframe/tensor/arithmetic/fmax.py +105 -0
  417. maxframe/tensor/arithmetic/fmin.py +106 -0
  418. maxframe/tensor/arithmetic/fmod.py +99 -0
  419. maxframe/tensor/arithmetic/frexp.py +92 -0
  420. maxframe/tensor/arithmetic/greater.py +77 -0
  421. maxframe/tensor/arithmetic/greater_equal.py +69 -0
  422. maxframe/tensor/arithmetic/hypot.py +77 -0
  423. maxframe/tensor/arithmetic/i0.py +89 -0
  424. maxframe/tensor/arithmetic/imag.py +67 -0
  425. maxframe/tensor/arithmetic/invert.py +110 -0
  426. maxframe/tensor/arithmetic/isclose.py +115 -0
  427. maxframe/tensor/arithmetic/iscomplex.py +64 -0
  428. maxframe/tensor/arithmetic/isfinite.py +106 -0
  429. maxframe/tensor/arithmetic/isinf.py +103 -0
  430. maxframe/tensor/arithmetic/isnan.py +82 -0
  431. maxframe/tensor/arithmetic/isreal.py +63 -0
  432. maxframe/tensor/arithmetic/ldexp.py +99 -0
  433. maxframe/tensor/arithmetic/less.py +69 -0
  434. maxframe/tensor/arithmetic/less_equal.py +69 -0
  435. maxframe/tensor/arithmetic/log.py +92 -0
  436. maxframe/tensor/arithmetic/log10.py +85 -0
  437. maxframe/tensor/arithmetic/log1p.py +95 -0
  438. maxframe/tensor/arithmetic/log2.py +85 -0
  439. maxframe/tensor/arithmetic/logaddexp.py +80 -0
  440. maxframe/tensor/arithmetic/logaddexp2.py +78 -0
  441. maxframe/tensor/arithmetic/logical_and.py +81 -0
  442. maxframe/tensor/arithmetic/logical_not.py +74 -0
  443. maxframe/tensor/arithmetic/logical_or.py +82 -0
  444. maxframe/tensor/arithmetic/logical_xor.py +88 -0
  445. maxframe/tensor/arithmetic/lshift.py +82 -0
  446. maxframe/tensor/arithmetic/maximum.py +108 -0
  447. maxframe/tensor/arithmetic/minimum.py +108 -0
  448. maxframe/tensor/arithmetic/mod.py +104 -0
  449. maxframe/tensor/arithmetic/modf.py +83 -0
  450. maxframe/tensor/arithmetic/multiply.py +81 -0
  451. maxframe/tensor/arithmetic/nan_to_num.py +99 -0
  452. maxframe/tensor/arithmetic/negative.py +65 -0
  453. maxframe/tensor/arithmetic/nextafter.py +68 -0
  454. maxframe/tensor/arithmetic/not_equal.py +72 -0
  455. maxframe/tensor/arithmetic/positive.py +47 -0
  456. maxframe/tensor/arithmetic/power.py +106 -0
  457. maxframe/tensor/arithmetic/rad2deg.py +71 -0
  458. maxframe/tensor/arithmetic/radians.py +77 -0
  459. maxframe/tensor/arithmetic/real.py +70 -0
  460. maxframe/tensor/arithmetic/reciprocal.py +76 -0
  461. maxframe/tensor/arithmetic/rint.py +68 -0
  462. maxframe/tensor/arithmetic/rshift.py +81 -0
  463. maxframe/tensor/arithmetic/setimag.py +29 -0
  464. maxframe/tensor/arithmetic/setreal.py +29 -0
  465. maxframe/tensor/arithmetic/sign.py +81 -0
  466. maxframe/tensor/arithmetic/signbit.py +65 -0
  467. maxframe/tensor/arithmetic/sin.py +98 -0
  468. maxframe/tensor/arithmetic/sinc.py +102 -0
  469. maxframe/tensor/arithmetic/sinh.py +93 -0
  470. maxframe/tensor/arithmetic/spacing.py +72 -0
  471. maxframe/tensor/arithmetic/sqrt.py +81 -0
  472. maxframe/tensor/arithmetic/square.py +69 -0
  473. maxframe/tensor/arithmetic/subtract.py +81 -0
  474. maxframe/tensor/arithmetic/tan.py +88 -0
  475. maxframe/tensor/arithmetic/tanh.py +92 -0
  476. maxframe/tensor/arithmetic/tests/__init__.py +15 -0
  477. maxframe/tensor/arithmetic/tests/test_arithmetic.py +414 -0
  478. maxframe/tensor/arithmetic/truediv.py +104 -0
  479. maxframe/tensor/arithmetic/trunc.py +72 -0
  480. maxframe/tensor/arithmetic/utils.py +65 -0
  481. maxframe/tensor/array_utils.py +186 -0
  482. maxframe/tensor/base/__init__.py +34 -0
  483. maxframe/tensor/base/astype.py +119 -0
  484. maxframe/tensor/base/atleast_1d.py +74 -0
  485. maxframe/tensor/base/broadcast_to.py +89 -0
  486. maxframe/tensor/base/ravel.py +92 -0
  487. maxframe/tensor/base/tests/__init__.py +13 -0
  488. maxframe/tensor/base/tests/test_base.py +114 -0
  489. maxframe/tensor/base/transpose.py +125 -0
  490. maxframe/tensor/base/unique.py +205 -0
  491. maxframe/tensor/base/where.py +127 -0
  492. maxframe/tensor/core.py +724 -0
  493. maxframe/tensor/datasource/__init__.py +32 -0
  494. maxframe/tensor/datasource/arange.py +156 -0
  495. maxframe/tensor/datasource/array.py +415 -0
  496. maxframe/tensor/datasource/core.py +109 -0
  497. maxframe/tensor/datasource/empty.py +169 -0
  498. maxframe/tensor/datasource/from_dataframe.py +70 -0
  499. maxframe/tensor/datasource/from_dense.py +54 -0
  500. maxframe/tensor/datasource/from_sparse.py +47 -0
  501. maxframe/tensor/datasource/full.py +186 -0
  502. maxframe/tensor/datasource/ones.py +173 -0
  503. maxframe/tensor/datasource/scalar.py +40 -0
  504. maxframe/tensor/datasource/tests/__init__.py +13 -0
  505. maxframe/tensor/datasource/tests/test_datasource.py +278 -0
  506. maxframe/tensor/datasource/zeros.py +188 -0
  507. maxframe/tensor/fetch/__init__.py +15 -0
  508. maxframe/tensor/fetch/core.py +54 -0
  509. maxframe/tensor/indexing/__init__.py +47 -0
  510. maxframe/tensor/indexing/choose.py +196 -0
  511. maxframe/tensor/indexing/compress.py +124 -0
  512. maxframe/tensor/indexing/core.py +190 -0
  513. maxframe/tensor/indexing/extract.py +71 -0
  514. maxframe/tensor/indexing/fill_diagonal.py +183 -0
  515. maxframe/tensor/indexing/flatnonzero.py +60 -0
  516. maxframe/tensor/indexing/getitem.py +175 -0
  517. maxframe/tensor/indexing/nonzero.py +120 -0
  518. maxframe/tensor/indexing/setitem.py +132 -0
  519. maxframe/tensor/indexing/slice.py +29 -0
  520. maxframe/tensor/indexing/take.py +130 -0
  521. maxframe/tensor/indexing/tests/__init__.py +15 -0
  522. maxframe/tensor/indexing/tests/test_indexing.py +234 -0
  523. maxframe/tensor/indexing/unravel_index.py +103 -0
  524. maxframe/tensor/merge/__init__.py +15 -0
  525. maxframe/tensor/merge/stack.py +132 -0
  526. maxframe/tensor/merge/tests/__init__.py +13 -0
  527. maxframe/tensor/merge/tests/test_merge.py +52 -0
  528. maxframe/tensor/operators.py +123 -0
  529. maxframe/tensor/random/__init__.py +168 -0
  530. maxframe/tensor/random/beta.py +87 -0
  531. maxframe/tensor/random/binomial.py +137 -0
  532. maxframe/tensor/random/bytes.py +39 -0
  533. maxframe/tensor/random/chisquare.py +110 -0
  534. maxframe/tensor/random/choice.py +186 -0
  535. maxframe/tensor/random/core.py +234 -0
  536. maxframe/tensor/random/dirichlet.py +123 -0
  537. maxframe/tensor/random/exponential.py +94 -0
  538. maxframe/tensor/random/f.py +135 -0
  539. maxframe/tensor/random/gamma.py +128 -0
  540. maxframe/tensor/random/geometric.py +93 -0
  541. maxframe/tensor/random/gumbel.py +167 -0
  542. maxframe/tensor/random/hypergeometric.py +148 -0
  543. maxframe/tensor/random/laplace.py +133 -0
  544. maxframe/tensor/random/logistic.py +129 -0
  545. maxframe/tensor/random/lognormal.py +159 -0
  546. maxframe/tensor/random/logseries.py +122 -0
  547. maxframe/tensor/random/multinomial.py +133 -0
  548. maxframe/tensor/random/multivariate_normal.py +192 -0
  549. maxframe/tensor/random/negative_binomial.py +125 -0
  550. maxframe/tensor/random/noncentral_chisquare.py +132 -0
  551. maxframe/tensor/random/noncentral_f.py +126 -0
  552. maxframe/tensor/random/normal.py +143 -0
  553. maxframe/tensor/random/pareto.py +140 -0
  554. maxframe/tensor/random/permutation.py +104 -0
  555. maxframe/tensor/random/poisson.py +111 -0
  556. maxframe/tensor/random/power.py +142 -0
  557. maxframe/tensor/random/rand.py +82 -0
  558. maxframe/tensor/random/randint.py +121 -0
  559. maxframe/tensor/random/randn.py +96 -0
  560. maxframe/tensor/random/random_integers.py +123 -0
  561. maxframe/tensor/random/random_sample.py +86 -0
  562. maxframe/tensor/random/rayleigh.py +110 -0
  563. maxframe/tensor/random/shuffle.py +61 -0
  564. maxframe/tensor/random/standard_cauchy.py +105 -0
  565. maxframe/tensor/random/standard_exponential.py +72 -0
  566. maxframe/tensor/random/standard_gamma.py +120 -0
  567. maxframe/tensor/random/standard_normal.py +74 -0
  568. maxframe/tensor/random/standard_t.py +135 -0
  569. maxframe/tensor/random/tests/__init__.py +15 -0
  570. maxframe/tensor/random/tests/test_random.py +167 -0
  571. maxframe/tensor/random/triangular.py +119 -0
  572. maxframe/tensor/random/uniform.py +131 -0
  573. maxframe/tensor/random/vonmises.py +131 -0
  574. maxframe/tensor/random/wald.py +114 -0
  575. maxframe/tensor/random/weibull.py +140 -0
  576. maxframe/tensor/random/zipf.py +122 -0
  577. maxframe/tensor/rechunk/__init__.py +26 -0
  578. maxframe/tensor/rechunk/rechunk.py +43 -0
  579. maxframe/tensor/reduction/__init__.py +66 -0
  580. maxframe/tensor/reduction/all.py +103 -0
  581. maxframe/tensor/reduction/allclose.py +88 -0
  582. maxframe/tensor/reduction/any.py +105 -0
  583. maxframe/tensor/reduction/argmax.py +103 -0
  584. maxframe/tensor/reduction/argmin.py +103 -0
  585. maxframe/tensor/reduction/array_equal.py +64 -0
  586. maxframe/tensor/reduction/core.py +168 -0
  587. maxframe/tensor/reduction/count_nonzero.py +81 -0
  588. maxframe/tensor/reduction/cumprod.py +97 -0
  589. maxframe/tensor/reduction/cumsum.py +101 -0
  590. maxframe/tensor/reduction/max.py +120 -0
  591. maxframe/tensor/reduction/mean.py +123 -0
  592. maxframe/tensor/reduction/min.py +120 -0
  593. maxframe/tensor/reduction/nanargmax.py +82 -0
  594. maxframe/tensor/reduction/nanargmin.py +76 -0
  595. maxframe/tensor/reduction/nancumprod.py +91 -0
  596. maxframe/tensor/reduction/nancumsum.py +94 -0
  597. maxframe/tensor/reduction/nanmax.py +111 -0
  598. maxframe/tensor/reduction/nanmean.py +106 -0
  599. maxframe/tensor/reduction/nanmin.py +111 -0
  600. maxframe/tensor/reduction/nanprod.py +94 -0
  601. maxframe/tensor/reduction/nanstd.py +126 -0
  602. maxframe/tensor/reduction/nansum.py +115 -0
  603. maxframe/tensor/reduction/nanvar.py +149 -0
  604. maxframe/tensor/reduction/prod.py +130 -0
  605. maxframe/tensor/reduction/std.py +134 -0
  606. maxframe/tensor/reduction/sum.py +125 -0
  607. maxframe/tensor/reduction/tests/__init__.py +13 -0
  608. maxframe/tensor/reduction/tests/test_reduction.py +181 -0
  609. maxframe/tensor/reduction/var.py +176 -0
  610. maxframe/tensor/reshape/__init__.py +17 -0
  611. maxframe/tensor/reshape/reshape.py +188 -0
  612. maxframe/tensor/reshape/tests/__init__.py +15 -0
  613. maxframe/tensor/reshape/tests/test_reshape.py +37 -0
  614. maxframe/tensor/statistics/__init__.py +13 -0
  615. maxframe/tensor/statistics/percentile.py +175 -0
  616. maxframe/tensor/statistics/quantile.py +288 -0
  617. maxframe/tensor/ufunc/__init__.py +26 -0
  618. maxframe/tensor/ufunc/ufunc.py +200 -0
  619. maxframe/tensor/utils.py +718 -0
  620. maxframe/tests/__init__.py +13 -0
  621. maxframe/tests/test_codegen.py +69 -0
  622. maxframe/tests/test_protocol.py +144 -0
  623. maxframe/tests/test_utils.py +376 -0
  624. maxframe/tests/utils.py +164 -0
  625. maxframe/typing_.py +37 -0
  626. maxframe/udf.py +134 -0
  627. maxframe/utils.py +1114 -0
  628. maxframe-0.1.0b5.dist-info/METADATA +104 -0
  629. maxframe-0.1.0b5.dist-info/RECORD +647 -0
  630. maxframe-0.1.0b5.dist-info/WHEEL +5 -0
  631. maxframe-0.1.0b5.dist-info/top_level.txt +3 -0
  632. maxframe_client/__init__.py +17 -0
  633. maxframe_client/clients/__init__.py +13 -0
  634. maxframe_client/clients/framedriver.py +118 -0
  635. maxframe_client/clients/spe.py +104 -0
  636. maxframe_client/conftest.py +15 -0
  637. maxframe_client/fetcher.py +264 -0
  638. maxframe_client/session/__init__.py +22 -0
  639. maxframe_client/session/consts.py +36 -0
  640. maxframe_client/session/graph.py +119 -0
  641. maxframe_client/session/odps.py +482 -0
  642. maxframe_client/session/task.py +280 -0
  643. maxframe_client/session/tests/__init__.py +13 -0
  644. maxframe_client/session/tests/test_task.py +85 -0
  645. maxframe_client/tests/__init__.py +13 -0
  646. maxframe_client/tests/test_fetcher.py +89 -0
  647. maxframe_client/tests/test_session.py +255 -0
@@ -0,0 +1,214 @@
1
+ # Copyright 1999-2024 Alibaba Group Holding Ltd.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ import copy
16
+ from typing import Optional, Sequence, Union
17
+
18
+ import numpy as np
19
+ import pandas as pd
20
+
21
+ from ... import opcodes
22
+ from ...core import ENTITY_TYPE, OutputType, get_output_types
23
+ from ...serialization.serializables import (
24
+ BoolField,
25
+ DictField,
26
+ Float32Field,
27
+ Int32Field,
28
+ Int64Field,
29
+ KeyField,
30
+ NDArrayField,
31
+ StringField,
32
+ )
33
+ from ...tensor.random import RandomStateField
34
+ from ..initializer import Series as asseries
35
+ from ..operators import DataFrameOperator, DataFrameOperatorMixin
36
+ from ..utils import parse_index
37
+
38
+
39
+ class GroupBySample(DataFrameOperator, DataFrameOperatorMixin):
40
+ _op_type_ = opcodes.RAND_SAMPLE
41
+ _op_module_ = "dataframe.groupby"
42
+
43
+ groupby_params = DictField("groupby_params", default=None)
44
+ size = Int64Field("size", default=None)
45
+ frac = Float32Field("frac", default=None)
46
+ replace = BoolField("replace", default=None)
47
+ weights = KeyField("weights", default=None)
48
+ seed = Int32Field("seed", default=None)
49
+ _random_state = RandomStateField("random_state", default=None)
50
+ errors = StringField("errors", default=None)
51
+
52
+ # for chunks
53
+ # num of instances for chunks
54
+ input_nsplits = NDArrayField("input_nsplits", default=None)
55
+
56
+ def __init__(self, random_state=None, **kw):
57
+ super().__init__(_random_state=random_state, **kw)
58
+
59
+ @property
60
+ def random_state(self):
61
+ return self._random_state
62
+
63
+ def _set_inputs(self, inputs):
64
+ super()._set_inputs(inputs)
65
+ input_iter = iter(inputs)
66
+ next(input_iter)
67
+ if isinstance(self.weights, ENTITY_TYPE):
68
+ self.weights = next(input_iter)
69
+
70
+ def __call__(self, groupby):
71
+ df = groupby
72
+ while df.op.output_types[0] not in (OutputType.dataframe, OutputType.series):
73
+ df = df.inputs[0]
74
+
75
+ selection = groupby.op.groupby_params.pop("selection", None)
76
+ if df.ndim > 1 and selection:
77
+ if isinstance(selection, tuple) and selection not in df.dtypes:
78
+ selection = list(selection)
79
+ result_df = df[selection]
80
+ else:
81
+ result_df = df
82
+
83
+ params = result_df.params
84
+ params["shape"] = (
85
+ (np.nan,) if result_df.ndim == 1 else (np.nan, result_df.shape[-1])
86
+ )
87
+ params["index_value"] = parse_index(result_df.index_value.to_pandas()[:0])
88
+
89
+ input_dfs = [df]
90
+ if isinstance(self.weights, ENTITY_TYPE):
91
+ input_dfs.append(self.weights)
92
+
93
+ self._output_types = get_output_types(result_df)
94
+ return self.new_tileable(input_dfs, **params)
95
+
96
+
97
+ def groupby_sample(
98
+ groupby,
99
+ n: Optional[int] = None,
100
+ frac: Optional[float] = None,
101
+ replace: bool = False,
102
+ weights: Union[Sequence, pd.Series, None] = None,
103
+ random_state: Optional[np.random.RandomState] = None,
104
+ errors: str = "ignore",
105
+ ):
106
+ """
107
+ Return a random sample of items from each group.
108
+
109
+ You can use `random_state` for reproducibility.
110
+
111
+ Parameters
112
+ ----------
113
+ n : int, optional
114
+ Number of items to return for each group. Cannot be used with
115
+ `frac` and must be no larger than the smallest group unless
116
+ `replace` is True. Default is one if `frac` is None.
117
+ frac : float, optional
118
+ Fraction of items to return. Cannot be used with `n`.
119
+ replace : bool, default False
120
+ Allow or disallow sampling of the same row more than once.
121
+ weights : list-like, optional
122
+ Default None results in equal probability weighting.
123
+ If passed a list-like then values must have the same length as
124
+ the underlying DataFrame or Series object and will be used as
125
+ sampling probabilities after normalization within each group.
126
+ Values must be non-negative with at least one positive element
127
+ within each group.
128
+ random_state : int, array-like, BitGenerator, np.random.RandomState, optional
129
+ If int, array-like, or BitGenerator (NumPy>=1.17), seed for
130
+ random number generator
131
+ If np.random.RandomState, use as numpy RandomState object.
132
+ errors : {'ignore', 'raise'}, default 'ignore'
133
+ If ignore, errors will not be raised when `replace` is False
134
+ and size of some group is less than `n`.
135
+
136
+ Returns
137
+ -------
138
+ Series or DataFrame
139
+ A new object of same type as caller containing items randomly
140
+ sampled within each group from the caller object.
141
+
142
+ See Also
143
+ --------
144
+ DataFrame.sample: Generate random samples from a DataFrame object.
145
+ numpy.random.choice: Generate a random sample from a given 1-D numpy
146
+ array.
147
+
148
+ Examples
149
+ --------
150
+ >>> import maxframe.dataframe as md
151
+ >>> df = md.DataFrame(
152
+ ... {"a": ["red"] * 2 + ["blue"] * 2 + ["black"] * 2, "b": range(6)}
153
+ ... )
154
+ >>> df.execute()
155
+ a b
156
+ 0 red 0
157
+ 1 red 1
158
+ 2 blue 2
159
+ 3 blue 3
160
+ 4 black 4
161
+ 5 black 5
162
+
163
+ Select one row at random for each distinct value in column a. The
164
+ `random_state` argument can be used to guarantee reproducibility:
165
+
166
+ >>> df.groupby("a").sample(n=1, random_state=1).execute()
167
+ a b
168
+ 4 black 4
169
+ 2 blue 2
170
+ 1 red 1
171
+
172
+ Set `frac` to sample fixed proportions rather than counts:
173
+
174
+ >>> df.groupby("a")["b"].sample(frac=0.5, random_state=2).execute()
175
+ 5 5
176
+ 2 2
177
+ 0 0
178
+ Name: b, dtype: int64
179
+
180
+ Control sample probabilities within groups by setting weights:
181
+
182
+ >>> df.groupby("a").sample(
183
+ ... n=1,
184
+ ... weights=[1, 1, 1, 0, 0, 1],
185
+ ... random_state=1,
186
+ ... ).execute()
187
+ a b
188
+ 5 black 5
189
+ 2 blue 2
190
+ 0 red 0
191
+ """
192
+ groupby_params = groupby.op.groupby_params.copy()
193
+ groupby_params.pop("as_index", None)
194
+
195
+ if weights is not None and not isinstance(weights, ENTITY_TYPE):
196
+ weights = asseries(weights)
197
+
198
+ n = 1 if n is None and frac is None else n
199
+ rs = copy.deepcopy(
200
+ random_state.to_numpy() if hasattr(random_state, "to_numpy") else random_state
201
+ )
202
+ if not isinstance(rs, np.random.RandomState): # pragma: no cover
203
+ rs = np.random.RandomState(rs)
204
+
205
+ op = GroupBySample(
206
+ size=n,
207
+ frac=frac,
208
+ replace=replace,
209
+ weights=weights,
210
+ random_state=rs,
211
+ groupby_params=groupby_params,
212
+ errors=errors,
213
+ )
214
+ return op(groupby)
@@ -0,0 +1,13 @@
1
+ # Copyright 1999-2024 Alibaba Group Holding Ltd.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
@@ -0,0 +1,374 @@
1
+ # Copyright 1999-2024 Alibaba Group Holding Ltd.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ from collections import OrderedDict
16
+
17
+ import numpy as np
18
+ import pandas as pd
19
+ import pytest
20
+
21
+ from .... import dataframe as md
22
+ from .... import opcodes
23
+ from ....core import OutputType
24
+ from ...core import DataFrame, DataFrameGroupBy, SeriesGroupBy
25
+ from ..aggregation import DataFrameGroupByAgg
26
+ from ..core import DataFrameGroupByOperator
27
+ from ..getitem import GroupByIndex
28
+
29
+
30
+ def test_groupby():
31
+ df = pd.DataFrame(
32
+ {"a": [3, 4, 5, 3, 5, 4, 1, 2, 3], "b": [1, 3, 4, 5, 6, 5, 4, 4, 4]}
33
+ )
34
+ mdf = md.DataFrame(df, chunk_size=2)
35
+ with pytest.raises(KeyError):
36
+ mdf.groupby("c2")
37
+ with pytest.raises(KeyError):
38
+ mdf.groupby(["b", "c2"])
39
+
40
+ grouped = mdf.groupby("b")
41
+ assert isinstance(grouped, DataFrameGroupBy)
42
+ assert isinstance(grouped.op, DataFrameGroupByOperator)
43
+ assert list(grouped.key_dtypes.index) == ["b"]
44
+
45
+ series = pd.Series([3, 4, 5, 3, 5, 4, 1, 2, 3])
46
+ ms = md.Series(series, chunk_size=3)
47
+ grouped = ms.groupby(lambda x: x + 1)
48
+
49
+ assert isinstance(grouped, SeriesGroupBy)
50
+ assert isinstance(grouped.op, DataFrameGroupByOperator)
51
+
52
+ with pytest.raises(TypeError):
53
+ ms.groupby(lambda x: x + 1, as_index=False)
54
+
55
+
56
+ def test_groupby_get_item():
57
+ df1 = pd.DataFrame(
58
+ {
59
+ "a": [3, 4, 5, 3, 5, 4, 1, 2, 3],
60
+ "b": [1, 3, 4, 5, 6, 5, 4, 4, 4],
61
+ "c": list("aabaaddce"),
62
+ }
63
+ )
64
+ mdf = md.DataFrame(df1, chunk_size=3)
65
+
66
+ r = mdf.groupby("b")[["a", "b"]]
67
+ assert isinstance(r, DataFrameGroupBy)
68
+ assert isinstance(r.op, GroupByIndex)
69
+ assert r.selection == ["a", "b"]
70
+ assert list(r.key_dtypes.index) == ["b"]
71
+
72
+ r = mdf.groupby("b").a
73
+ assert isinstance(r, SeriesGroupBy)
74
+ assert isinstance(r.op, GroupByIndex)
75
+ assert r.name == "a"
76
+ assert list(r.key_dtypes.index) == ["b"]
77
+
78
+ with pytest.raises(IndexError):
79
+ getattr(mdf.groupby("b")[["a", "b"]], "a")
80
+
81
+
82
+ def test_groupby_agg():
83
+ df = pd.DataFrame(
84
+ {
85
+ "a": np.random.choice([2, 3, 4], size=(20,)),
86
+ "b": np.random.choice([2, 3, 4], size=(20,)),
87
+ }
88
+ )
89
+ mdf = md.DataFrame(df, chunk_size=3)
90
+ r = mdf.groupby("a").agg("sum", method="tree")
91
+ assert isinstance(r.op, DataFrameGroupByAgg)
92
+ assert isinstance(r, DataFrame)
93
+ assert r.op.method == "tree"
94
+
95
+ df = pd.DataFrame(
96
+ {
97
+ "c1": range(10),
98
+ "c2": np.random.choice(["a", "b", "c"], (10,)),
99
+ "c3": np.random.rand(10),
100
+ }
101
+ )
102
+ mdf = md.DataFrame(df, chunk_size=2)
103
+ r = mdf.groupby("c2", sort=False).sum(method="shuffle")
104
+
105
+ assert isinstance(r.op, DataFrameGroupByAgg)
106
+ assert isinstance(r, DataFrame)
107
+
108
+ r = mdf.groupby(
109
+ "c2",
110
+ ).sum(method="shuffle")
111
+
112
+ assert isinstance(r.op, DataFrameGroupByAgg)
113
+ assert isinstance(r, DataFrame)
114
+
115
+ # test unknown method
116
+ with pytest.raises(ValueError):
117
+ mdf.groupby("c2").sum(method="not_exist")
118
+
119
+
120
+ def test_groupby_apply():
121
+ df1 = pd.DataFrame(
122
+ {
123
+ "a": [3, 4, 5, 3, 5, 4, 1, 2, 3],
124
+ "b": [1, 3, 4, 5, 6, 5, 4, 4, 4],
125
+ "c": list("aabaaddce"),
126
+ }
127
+ )
128
+
129
+ def apply_call_with_err(_):
130
+ raise ValueError
131
+
132
+ def apply_df(df):
133
+ return df.sort_index()
134
+
135
+ def apply_df_with_error(df):
136
+ assert len(df) > 2
137
+ return df.sort_index()
138
+
139
+ def apply_series(s):
140
+ return s.sort_index()
141
+
142
+ mdf = md.DataFrame(df1, chunk_size=3)
143
+
144
+ # when dtype and output_type specified, apply function
145
+ # shall not be called
146
+ applied = mdf.groupby("b").apply(
147
+ apply_call_with_err, output_type="series", dtype=int
148
+ )
149
+ assert applied.dtype == int
150
+ assert applied.op.output_types[0] == OutputType.series
151
+
152
+ with pytest.raises(TypeError):
153
+ mdf.groupby("b").apply(apply_df_with_error)
154
+
155
+ applied = mdf.groupby("b").apply(
156
+ apply_df_with_error, output_type="dataframe", dtypes=df1.dtypes
157
+ )
158
+ pd.testing.assert_series_equal(applied.dtypes, df1.dtypes)
159
+ assert applied.shape == (np.nan, 3)
160
+ assert applied.op._op_type_ == opcodes.APPLY
161
+ assert applied.op.output_types[0] == OutputType.dataframe
162
+
163
+ applied = mdf.groupby("b").apply(apply_df)
164
+ pd.testing.assert_series_equal(applied.dtypes, df1.dtypes)
165
+ assert applied.shape == (np.nan, 3)
166
+ assert applied.op._op_type_ == opcodes.APPLY
167
+ assert applied.op.output_types[0] == OutputType.dataframe
168
+
169
+ applied = mdf.groupby("b").apply(lambda df: df.a)
170
+ assert applied.dtype == df1.a.dtype
171
+ assert applied.shape == (np.nan,)
172
+ assert applied.op._op_type_ == opcodes.APPLY
173
+ assert applied.op.output_types[0] == OutputType.series
174
+
175
+ applied = mdf.groupby("b").apply(lambda df: df.a.sum())
176
+ assert applied.op.maybe_agg is True
177
+ # force set to pass test
178
+ applied.op.maybe_agg = None
179
+ assert applied.dtype == df1.a.dtype
180
+ assert applied.shape == (np.nan,)
181
+ assert applied.op._op_type_ == opcodes.APPLY
182
+ assert applied.op.output_types[0] == OutputType.series
183
+
184
+ series1 = pd.Series([3, 4, 5, 3, 5, 4, 1, 2, 3])
185
+
186
+ ms1 = md.Series(series1, chunk_size=3)
187
+ applied = ms1.groupby(lambda x: x % 3).apply(apply_series)
188
+ assert applied.dtype == series1.dtype
189
+ assert applied.shape == (np.nan,)
190
+ assert applied.op._op_type_ == opcodes.APPLY
191
+ assert applied.op.output_types[0] == OutputType.series
192
+
193
+
194
+ def test_groupby_transform():
195
+ df1 = pd.DataFrame(
196
+ {
197
+ "a": [3, 4, 5, 3, 5, 4, 1, 2, 3],
198
+ "b": [1, 3, 4, 5, 6, 5, 4, 4, 4],
199
+ "c": list("aabaaddce"),
200
+ "d": [3, 4, 5, 3, 5, 4, 1, 2, 3],
201
+ "e": [1, 3, 4, 5, 6, 5, 4, 4, 4],
202
+ "f": list("aabaaddce"),
203
+ }
204
+ )
205
+
206
+ def transform_df(df):
207
+ return df.sort_index()
208
+
209
+ def transform_df_with_err(df):
210
+ assert len(df) > 2
211
+ return df.sort_index()
212
+
213
+ mdf = md.DataFrame(df1, chunk_size=3)
214
+
215
+ with pytest.raises(TypeError):
216
+ mdf.groupby("b").transform(["cummax", "cumcount"])
217
+
218
+ with pytest.raises(TypeError):
219
+ mdf.groupby("b").transform(transform_df_with_err)
220
+
221
+ r = mdf.groupby("b").transform(transform_df_with_err, dtypes=df1.dtypes.drop("b"))
222
+ assert r.dtypes.index.tolist() == list("acdef")
223
+ assert r.shape == (9, 5)
224
+ assert r.op._op_type_ == opcodes.TRANSFORM
225
+ assert r.op.output_types[0] == OutputType.dataframe
226
+
227
+ r = mdf.groupby("b").transform(transform_df)
228
+ assert r.dtypes.index.tolist() == list("acdef")
229
+ assert r.shape == (9, 5)
230
+ assert r.op._op_type_ == opcodes.TRANSFORM
231
+ assert r.op.output_types[0] == OutputType.dataframe
232
+
233
+ r = mdf.groupby("b").transform(["cummax", "cumcount"], _call_agg=True)
234
+ assert r.shape == (np.nan, 6)
235
+ assert r.op._op_type_ == opcodes.TRANSFORM
236
+ assert r.op.output_types[0] == OutputType.dataframe
237
+
238
+ agg_dict = OrderedDict([("d", "cummax"), ("b", "cumsum")])
239
+ r = mdf.groupby("b").transform(agg_dict, _call_agg=True)
240
+ assert r.shape == (np.nan, 2)
241
+ assert r.op._op_type_ == opcodes.TRANSFORM
242
+ assert r.op.output_types[0] == OutputType.dataframe
243
+
244
+ agg_list = ["sum", lambda s: s.sum()]
245
+ r = mdf.groupby("b").transform(agg_list, _call_agg=True)
246
+ assert r.shape == (np.nan, 10)
247
+ assert r.op._op_type_ == opcodes.TRANSFORM
248
+ assert r.op.output_types[0] == OutputType.dataframe
249
+
250
+ series1 = pd.Series([3, 4, 5, 3, 5, 4, 1, 2, 3])
251
+ ms1 = md.Series(series1, chunk_size=3)
252
+
253
+ r = ms1.groupby(lambda x: x % 3).transform(lambda x: x + 1)
254
+ assert r.dtype == series1.dtype
255
+ assert r.shape == series1.shape
256
+ assert r.op._op_type_ == opcodes.TRANSFORM
257
+ assert r.op.output_types[0] == OutputType.series
258
+
259
+ r = ms1.groupby(lambda x: x % 3).transform("cummax", _call_agg=True)
260
+ assert r.shape == (np.nan,)
261
+ assert r.op._op_type_ == opcodes.TRANSFORM
262
+ assert r.op.output_types[0] == OutputType.series
263
+
264
+ agg_list = ["cummax", "cumcount"]
265
+ r = ms1.groupby(lambda x: x % 3).transform(agg_list, _call_agg=True)
266
+ assert r.shape == (np.nan, 2)
267
+ assert r.op._op_type_ == opcodes.TRANSFORM
268
+ assert r.op.output_types[0] == OutputType.dataframe
269
+
270
+
271
+ def test_groupby_cum():
272
+ df1 = pd.DataFrame(
273
+ {
274
+ "a": [3, 5, 2, 7, 1, 2, 4, 6, 2, 4],
275
+ "b": [8, 3, 4, 1, 8, 2, 2, 2, 2, 3],
276
+ "c": [1, 8, 8, 5, 3, 5, 0, 0, 5, 4],
277
+ }
278
+ )
279
+ mdf = md.DataFrame(df1, chunk_size=3)
280
+
281
+ for fun in ["cummin", "cummax", "cumprod", "cumsum"]:
282
+ r = getattr(mdf.groupby("b"), fun)()
283
+ assert r.op.output_types[0] == OutputType.dataframe
284
+ assert r.shape == (len(df1), 2)
285
+
286
+ r = getattr(mdf.groupby("b"), fun)(axis=1)
287
+ assert r.op.output_types[0] == OutputType.dataframe
288
+ assert r.shape == (len(df1), 3)
289
+
290
+ r = mdf.groupby("b").cumcount()
291
+ assert r.op.output_types[0] == OutputType.series
292
+ assert r.shape == (len(df1),)
293
+
294
+ series1 = pd.Series([2, 2, 5, 7, 3, 7, 8, 8, 5, 6])
295
+ ms1 = md.Series(series1, chunk_size=3)
296
+
297
+ for fun in ["cummin", "cummax", "cumprod", "cumsum", "cumcount"]:
298
+ r = getattr(ms1.groupby(lambda x: x % 2), fun)()
299
+ assert r.op.output_types[0] == OutputType.series
300
+ assert r.shape == (len(series1),)
301
+
302
+
303
+ def test_groupby_fill():
304
+ df1 = pd.DataFrame(
305
+ [
306
+ [1, 1, 10],
307
+ [1, 1, np.nan],
308
+ [1, 1, np.nan],
309
+ [1, 2, np.nan],
310
+ [1, 2, 20],
311
+ [1, 2, np.nan],
312
+ [1, 3, np.nan],
313
+ [1, 3, np.nan],
314
+ ],
315
+ columns=["one", "two", "three"],
316
+ )
317
+ mdf = md.DataFrame(df1, chunk_size=3)
318
+
319
+ r = mdf.groupby(["one", "two"]).ffill()
320
+ assert r.op.output_types[0] == OutputType.dataframe
321
+ assert r.shape == (len(df1), 1)
322
+ assert r.dtypes.index.tolist() == ["three"]
323
+
324
+ r = mdf.groupby(["two"]).bfill()
325
+ assert r.op.output_types[0] == OutputType.dataframe
326
+ assert r.shape == (len(df1), 2)
327
+ assert r.dtypes.index.tolist() == ["one", "three"]
328
+
329
+ r = mdf.groupby(["two"]).backfill()
330
+ assert r.op.output_types[0] == OutputType.dataframe
331
+ assert r.shape == (len(df1), 2)
332
+ assert r.dtypes.index.tolist() == ["one", "three"]
333
+
334
+ r = mdf.groupby(["one"]).fillna(5)
335
+ assert r.op.output_types[0] == OutputType.dataframe
336
+ assert r.shape == (len(df1), 2)
337
+ assert r.dtypes.index.tolist() == ["two", "three"]
338
+
339
+ s1 = pd.Series([4, 3, 9, np.nan, np.nan, 7, 10, 8, 1, 6])
340
+ ms1 = md.Series(s1, chunk_size=3)
341
+ r = ms1.groupby(lambda x: x % 2).ffill()
342
+ assert r.op.output_types[0] == OutputType.series
343
+ assert r.shape == (len(s1),)
344
+
345
+ r = ms1.groupby(lambda x: x % 2).bfill()
346
+ assert r.op.output_types[0] == OutputType.series
347
+ assert r.shape == (len(s1),)
348
+
349
+ r = ms1.groupby(lambda x: x % 2).backfill()
350
+ assert r.op.output_types[0] == OutputType.series
351
+ assert r.shape == (len(s1),)
352
+
353
+ r = ms1.groupby(lambda x: x % 2).fillna(5)
354
+ assert r.op.output_types[0] == OutputType.series
355
+ assert r.shape == (len(s1),)
356
+
357
+ s1 = pd.Series([4, 3, 9, np.nan, np.nan, 7, 10, 8, 1, 6])
358
+ ms1 = md.Series(s1, chunk_size=3)
359
+
360
+ r = ms1.groupby(lambda x: x % 2).ffill()
361
+ assert r.op.output_types[0] == OutputType.series
362
+ assert r.shape == (len(s1),)
363
+
364
+ r = ms1.groupby(lambda x: x % 2).bfill()
365
+ assert r.op.output_types[0] == OutputType.series
366
+ assert r.shape == (len(s1),)
367
+
368
+ r = ms1.groupby(lambda x: x % 2).backfill()
369
+ assert r.op.output_types[0] == OutputType.series
370
+ assert r.shape == (len(s1),)
371
+
372
+ r = ms1.groupby(lambda x: x % 2).fillna(5)
373
+ assert r.op.output_types[0] == OutputType.series
374
+ assert r.shape == (len(s1),)