maxframe 0.1.0b5__cp311-cp311-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of maxframe might be problematic. Click here for more details.

Files changed (647) hide show
  1. maxframe/__init__.py +32 -0
  2. maxframe/_utils.cpython-311-darwin.so +0 -0
  3. maxframe/_utils.pxd +33 -0
  4. maxframe/_utils.pyx +547 -0
  5. maxframe/codegen.py +528 -0
  6. maxframe/config/__init__.py +15 -0
  7. maxframe/config/config.py +443 -0
  8. maxframe/config/tests/__init__.py +13 -0
  9. maxframe/config/tests/test_config.py +103 -0
  10. maxframe/config/tests/test_validators.py +34 -0
  11. maxframe/config/validators.py +57 -0
  12. maxframe/conftest.py +139 -0
  13. maxframe/core/__init__.py +65 -0
  14. maxframe/core/base.py +156 -0
  15. maxframe/core/entity/__init__.py +44 -0
  16. maxframe/core/entity/chunks.py +68 -0
  17. maxframe/core/entity/core.py +152 -0
  18. maxframe/core/entity/executable.py +337 -0
  19. maxframe/core/entity/fuse.py +73 -0
  20. maxframe/core/entity/objects.py +100 -0
  21. maxframe/core/entity/output_types.py +90 -0
  22. maxframe/core/entity/tileables.py +438 -0
  23. maxframe/core/entity/utils.py +24 -0
  24. maxframe/core/graph/__init__.py +17 -0
  25. maxframe/core/graph/builder/__init__.py +16 -0
  26. maxframe/core/graph/builder/base.py +86 -0
  27. maxframe/core/graph/builder/chunk.py +430 -0
  28. maxframe/core/graph/builder/tileable.py +34 -0
  29. maxframe/core/graph/builder/utils.py +41 -0
  30. maxframe/core/graph/core.cpython-311-darwin.so +0 -0
  31. maxframe/core/graph/core.pyx +467 -0
  32. maxframe/core/graph/entity.py +171 -0
  33. maxframe/core/graph/tests/__init__.py +13 -0
  34. maxframe/core/graph/tests/test_graph.py +205 -0
  35. maxframe/core/mode.py +96 -0
  36. maxframe/core/operator/__init__.py +34 -0
  37. maxframe/core/operator/base.py +450 -0
  38. maxframe/core/operator/core.py +276 -0
  39. maxframe/core/operator/fetch.py +53 -0
  40. maxframe/core/operator/fuse.py +29 -0
  41. maxframe/core/operator/objects.py +72 -0
  42. maxframe/core/operator/shuffle.py +111 -0
  43. maxframe/core/operator/tests/__init__.py +13 -0
  44. maxframe/core/operator/tests/test_core.py +64 -0
  45. maxframe/core/tests/__init__.py +13 -0
  46. maxframe/core/tests/test_mode.py +75 -0
  47. maxframe/dataframe/__init__.py +81 -0
  48. maxframe/dataframe/arithmetic/__init__.py +359 -0
  49. maxframe/dataframe/arithmetic/abs.py +33 -0
  50. maxframe/dataframe/arithmetic/add.py +60 -0
  51. maxframe/dataframe/arithmetic/arccos.py +28 -0
  52. maxframe/dataframe/arithmetic/arccosh.py +28 -0
  53. maxframe/dataframe/arithmetic/arcsin.py +28 -0
  54. maxframe/dataframe/arithmetic/arcsinh.py +28 -0
  55. maxframe/dataframe/arithmetic/arctan.py +28 -0
  56. maxframe/dataframe/arithmetic/arctanh.py +28 -0
  57. maxframe/dataframe/arithmetic/around.py +152 -0
  58. maxframe/dataframe/arithmetic/bitwise_and.py +46 -0
  59. maxframe/dataframe/arithmetic/bitwise_or.py +50 -0
  60. maxframe/dataframe/arithmetic/bitwise_xor.py +46 -0
  61. maxframe/dataframe/arithmetic/ceil.py +28 -0
  62. maxframe/dataframe/arithmetic/core.py +342 -0
  63. maxframe/dataframe/arithmetic/cos.py +28 -0
  64. maxframe/dataframe/arithmetic/cosh.py +28 -0
  65. maxframe/dataframe/arithmetic/degrees.py +28 -0
  66. maxframe/dataframe/arithmetic/docstring.py +442 -0
  67. maxframe/dataframe/arithmetic/equal.py +56 -0
  68. maxframe/dataframe/arithmetic/exp.py +28 -0
  69. maxframe/dataframe/arithmetic/exp2.py +28 -0
  70. maxframe/dataframe/arithmetic/expm1.py +28 -0
  71. maxframe/dataframe/arithmetic/floor.py +28 -0
  72. maxframe/dataframe/arithmetic/floordiv.py +64 -0
  73. maxframe/dataframe/arithmetic/greater.py +57 -0
  74. maxframe/dataframe/arithmetic/greater_equal.py +57 -0
  75. maxframe/dataframe/arithmetic/invert.py +33 -0
  76. maxframe/dataframe/arithmetic/is_ufuncs.py +62 -0
  77. maxframe/dataframe/arithmetic/less.py +57 -0
  78. maxframe/dataframe/arithmetic/less_equal.py +57 -0
  79. maxframe/dataframe/arithmetic/log.py +28 -0
  80. maxframe/dataframe/arithmetic/log10.py +28 -0
  81. maxframe/dataframe/arithmetic/log2.py +28 -0
  82. maxframe/dataframe/arithmetic/mod.py +60 -0
  83. maxframe/dataframe/arithmetic/multiply.py +60 -0
  84. maxframe/dataframe/arithmetic/negative.py +33 -0
  85. maxframe/dataframe/arithmetic/not_equal.py +56 -0
  86. maxframe/dataframe/arithmetic/power.py +68 -0
  87. maxframe/dataframe/arithmetic/radians.py +28 -0
  88. maxframe/dataframe/arithmetic/sin.py +28 -0
  89. maxframe/dataframe/arithmetic/sinh.py +28 -0
  90. maxframe/dataframe/arithmetic/sqrt.py +28 -0
  91. maxframe/dataframe/arithmetic/subtract.py +64 -0
  92. maxframe/dataframe/arithmetic/tan.py +28 -0
  93. maxframe/dataframe/arithmetic/tanh.py +28 -0
  94. maxframe/dataframe/arithmetic/tests/__init__.py +13 -0
  95. maxframe/dataframe/arithmetic/tests/test_arithmetic.py +695 -0
  96. maxframe/dataframe/arithmetic/truediv.py +64 -0
  97. maxframe/dataframe/arithmetic/trunc.py +28 -0
  98. maxframe/dataframe/arrays.py +864 -0
  99. maxframe/dataframe/core.py +2417 -0
  100. maxframe/dataframe/datasource/__init__.py +15 -0
  101. maxframe/dataframe/datasource/core.py +81 -0
  102. maxframe/dataframe/datasource/dataframe.py +59 -0
  103. maxframe/dataframe/datasource/date_range.py +504 -0
  104. maxframe/dataframe/datasource/from_index.py +54 -0
  105. maxframe/dataframe/datasource/from_records.py +107 -0
  106. maxframe/dataframe/datasource/from_tensor.py +419 -0
  107. maxframe/dataframe/datasource/index.py +117 -0
  108. maxframe/dataframe/datasource/read_csv.py +528 -0
  109. maxframe/dataframe/datasource/read_odps_query.py +299 -0
  110. maxframe/dataframe/datasource/read_odps_table.py +253 -0
  111. maxframe/dataframe/datasource/read_parquet.py +421 -0
  112. maxframe/dataframe/datasource/series.py +55 -0
  113. maxframe/dataframe/datasource/tests/__init__.py +13 -0
  114. maxframe/dataframe/datasource/tests/test_datasource.py +401 -0
  115. maxframe/dataframe/datastore/__init__.py +26 -0
  116. maxframe/dataframe/datastore/core.py +19 -0
  117. maxframe/dataframe/datastore/to_csv.py +227 -0
  118. maxframe/dataframe/datastore/to_odps.py +162 -0
  119. maxframe/dataframe/extensions/__init__.py +41 -0
  120. maxframe/dataframe/extensions/accessor.py +50 -0
  121. maxframe/dataframe/extensions/reshuffle.py +83 -0
  122. maxframe/dataframe/extensions/tests/__init__.py +13 -0
  123. maxframe/dataframe/extensions/tests/test_extensions.py +38 -0
  124. maxframe/dataframe/fetch/__init__.py +15 -0
  125. maxframe/dataframe/fetch/core.py +86 -0
  126. maxframe/dataframe/groupby/__init__.py +82 -0
  127. maxframe/dataframe/groupby/aggregation.py +350 -0
  128. maxframe/dataframe/groupby/apply.py +251 -0
  129. maxframe/dataframe/groupby/core.py +179 -0
  130. maxframe/dataframe/groupby/cum.py +124 -0
  131. maxframe/dataframe/groupby/fill.py +141 -0
  132. maxframe/dataframe/groupby/getitem.py +92 -0
  133. maxframe/dataframe/groupby/head.py +105 -0
  134. maxframe/dataframe/groupby/sample.py +214 -0
  135. maxframe/dataframe/groupby/tests/__init__.py +13 -0
  136. maxframe/dataframe/groupby/tests/test_groupby.py +374 -0
  137. maxframe/dataframe/groupby/transform.py +255 -0
  138. maxframe/dataframe/indexing/__init__.py +84 -0
  139. maxframe/dataframe/indexing/add_prefix_suffix.py +110 -0
  140. maxframe/dataframe/indexing/align.py +349 -0
  141. maxframe/dataframe/indexing/at.py +83 -0
  142. maxframe/dataframe/indexing/getitem.py +204 -0
  143. maxframe/dataframe/indexing/iat.py +37 -0
  144. maxframe/dataframe/indexing/iloc.py +566 -0
  145. maxframe/dataframe/indexing/insert.py +86 -0
  146. maxframe/dataframe/indexing/loc.py +411 -0
  147. maxframe/dataframe/indexing/reindex.py +526 -0
  148. maxframe/dataframe/indexing/rename.py +462 -0
  149. maxframe/dataframe/indexing/rename_axis.py +209 -0
  150. maxframe/dataframe/indexing/reset_index.py +402 -0
  151. maxframe/dataframe/indexing/sample.py +221 -0
  152. maxframe/dataframe/indexing/set_axis.py +194 -0
  153. maxframe/dataframe/indexing/set_index.py +61 -0
  154. maxframe/dataframe/indexing/setitem.py +130 -0
  155. maxframe/dataframe/indexing/tests/__init__.py +13 -0
  156. maxframe/dataframe/indexing/tests/test_indexing.py +488 -0
  157. maxframe/dataframe/indexing/where.py +308 -0
  158. maxframe/dataframe/initializer.py +288 -0
  159. maxframe/dataframe/merge/__init__.py +32 -0
  160. maxframe/dataframe/merge/append.py +121 -0
  161. maxframe/dataframe/merge/concat.py +325 -0
  162. maxframe/dataframe/merge/merge.py +593 -0
  163. maxframe/dataframe/merge/tests/__init__.py +13 -0
  164. maxframe/dataframe/merge/tests/test_merge.py +215 -0
  165. maxframe/dataframe/misc/__init__.py +134 -0
  166. maxframe/dataframe/misc/_duplicate.py +46 -0
  167. maxframe/dataframe/misc/accessor.py +276 -0
  168. maxframe/dataframe/misc/apply.py +692 -0
  169. maxframe/dataframe/misc/astype.py +236 -0
  170. maxframe/dataframe/misc/case_when.py +141 -0
  171. maxframe/dataframe/misc/check_monotonic.py +84 -0
  172. maxframe/dataframe/misc/cut.py +383 -0
  173. maxframe/dataframe/misc/datetimes.py +79 -0
  174. maxframe/dataframe/misc/describe.py +108 -0
  175. maxframe/dataframe/misc/diff.py +210 -0
  176. maxframe/dataframe/misc/drop.py +440 -0
  177. maxframe/dataframe/misc/drop_duplicates.py +248 -0
  178. maxframe/dataframe/misc/duplicated.py +292 -0
  179. maxframe/dataframe/misc/eval.py +728 -0
  180. maxframe/dataframe/misc/explode.py +171 -0
  181. maxframe/dataframe/misc/get_dummies.py +208 -0
  182. maxframe/dataframe/misc/isin.py +217 -0
  183. maxframe/dataframe/misc/map.py +236 -0
  184. maxframe/dataframe/misc/melt.py +162 -0
  185. maxframe/dataframe/misc/memory_usage.py +248 -0
  186. maxframe/dataframe/misc/pct_change.py +150 -0
  187. maxframe/dataframe/misc/pivot_table.py +262 -0
  188. maxframe/dataframe/misc/qcut.py +104 -0
  189. maxframe/dataframe/misc/select_dtypes.py +104 -0
  190. maxframe/dataframe/misc/shift.py +256 -0
  191. maxframe/dataframe/misc/stack.py +238 -0
  192. maxframe/dataframe/misc/string_.py +221 -0
  193. maxframe/dataframe/misc/tests/__init__.py +13 -0
  194. maxframe/dataframe/misc/tests/test_misc.py +468 -0
  195. maxframe/dataframe/misc/to_numeric.py +178 -0
  196. maxframe/dataframe/misc/transform.py +361 -0
  197. maxframe/dataframe/misc/transpose.py +136 -0
  198. maxframe/dataframe/misc/value_counts.py +182 -0
  199. maxframe/dataframe/missing/__init__.py +53 -0
  200. maxframe/dataframe/missing/checkna.py +223 -0
  201. maxframe/dataframe/missing/dropna.py +280 -0
  202. maxframe/dataframe/missing/fillna.py +275 -0
  203. maxframe/dataframe/missing/replace.py +439 -0
  204. maxframe/dataframe/missing/tests/__init__.py +13 -0
  205. maxframe/dataframe/missing/tests/test_missing.py +89 -0
  206. maxframe/dataframe/operators.py +273 -0
  207. maxframe/dataframe/plotting/__init__.py +40 -0
  208. maxframe/dataframe/plotting/core.py +78 -0
  209. maxframe/dataframe/plotting/tests/__init__.py +13 -0
  210. maxframe/dataframe/plotting/tests/test_plotting.py +136 -0
  211. maxframe/dataframe/reduction/__init__.py +107 -0
  212. maxframe/dataframe/reduction/aggregation.py +344 -0
  213. maxframe/dataframe/reduction/all.py +78 -0
  214. maxframe/dataframe/reduction/any.py +78 -0
  215. maxframe/dataframe/reduction/core.py +837 -0
  216. maxframe/dataframe/reduction/count.py +59 -0
  217. maxframe/dataframe/reduction/cummax.py +30 -0
  218. maxframe/dataframe/reduction/cummin.py +30 -0
  219. maxframe/dataframe/reduction/cumprod.py +30 -0
  220. maxframe/dataframe/reduction/cumsum.py +30 -0
  221. maxframe/dataframe/reduction/custom_reduction.py +42 -0
  222. maxframe/dataframe/reduction/kurtosis.py +104 -0
  223. maxframe/dataframe/reduction/max.py +65 -0
  224. maxframe/dataframe/reduction/mean.py +61 -0
  225. maxframe/dataframe/reduction/min.py +65 -0
  226. maxframe/dataframe/reduction/nunique.py +141 -0
  227. maxframe/dataframe/reduction/prod.py +76 -0
  228. maxframe/dataframe/reduction/reduction_size.py +36 -0
  229. maxframe/dataframe/reduction/sem.py +69 -0
  230. maxframe/dataframe/reduction/skew.py +89 -0
  231. maxframe/dataframe/reduction/std.py +53 -0
  232. maxframe/dataframe/reduction/str_concat.py +48 -0
  233. maxframe/dataframe/reduction/sum.py +77 -0
  234. maxframe/dataframe/reduction/tests/__init__.py +13 -0
  235. maxframe/dataframe/reduction/tests/test_reduction.py +486 -0
  236. maxframe/dataframe/reduction/unique.py +90 -0
  237. maxframe/dataframe/reduction/var.py +72 -0
  238. maxframe/dataframe/sort/__init__.py +34 -0
  239. maxframe/dataframe/sort/core.py +36 -0
  240. maxframe/dataframe/sort/sort_index.py +153 -0
  241. maxframe/dataframe/sort/sort_values.py +311 -0
  242. maxframe/dataframe/sort/tests/__init__.py +13 -0
  243. maxframe/dataframe/sort/tests/test_sort.py +81 -0
  244. maxframe/dataframe/statistics/__init__.py +33 -0
  245. maxframe/dataframe/statistics/corr.py +280 -0
  246. maxframe/dataframe/statistics/quantile.py +341 -0
  247. maxframe/dataframe/statistics/tests/__init__.py +13 -0
  248. maxframe/dataframe/statistics/tests/test_statistics.py +82 -0
  249. maxframe/dataframe/tests/__init__.py +13 -0
  250. maxframe/dataframe/tests/test_initializer.py +29 -0
  251. maxframe/dataframe/tseries/__init__.py +13 -0
  252. maxframe/dataframe/tseries/tests/__init__.py +13 -0
  253. maxframe/dataframe/tseries/tests/test_tseries.py +30 -0
  254. maxframe/dataframe/tseries/to_datetime.py +297 -0
  255. maxframe/dataframe/ufunc/__init__.py +27 -0
  256. maxframe/dataframe/ufunc/tensor.py +54 -0
  257. maxframe/dataframe/ufunc/ufunc.py +52 -0
  258. maxframe/dataframe/utils.py +1267 -0
  259. maxframe/dataframe/window/__init__.py +29 -0
  260. maxframe/dataframe/window/aggregation.py +96 -0
  261. maxframe/dataframe/window/core.py +69 -0
  262. maxframe/dataframe/window/ewm.py +249 -0
  263. maxframe/dataframe/window/expanding.py +147 -0
  264. maxframe/dataframe/window/rolling.py +376 -0
  265. maxframe/dataframe/window/tests/__init__.py +13 -0
  266. maxframe/dataframe/window/tests/test_ewm.py +70 -0
  267. maxframe/dataframe/window/tests/test_expanding.py +66 -0
  268. maxframe/dataframe/window/tests/test_rolling.py +57 -0
  269. maxframe/env.py +33 -0
  270. maxframe/errors.py +21 -0
  271. maxframe/extension.py +81 -0
  272. maxframe/learn/__init__.py +17 -0
  273. maxframe/learn/contrib/__init__.py +17 -0
  274. maxframe/learn/contrib/pytorch/__init__.py +16 -0
  275. maxframe/learn/contrib/pytorch/run_function.py +110 -0
  276. maxframe/learn/contrib/pytorch/run_script.py +102 -0
  277. maxframe/learn/contrib/pytorch/tests/__init__.py +13 -0
  278. maxframe/learn/contrib/pytorch/tests/test_pytorch.py +42 -0
  279. maxframe/learn/contrib/utils.py +52 -0
  280. maxframe/learn/contrib/xgboost/__init__.py +26 -0
  281. maxframe/learn/contrib/xgboost/classifier.py +86 -0
  282. maxframe/learn/contrib/xgboost/core.py +156 -0
  283. maxframe/learn/contrib/xgboost/dmatrix.py +150 -0
  284. maxframe/learn/contrib/xgboost/predict.py +138 -0
  285. maxframe/learn/contrib/xgboost/regressor.py +78 -0
  286. maxframe/learn/contrib/xgboost/tests/__init__.py +13 -0
  287. maxframe/learn/contrib/xgboost/tests/test_core.py +43 -0
  288. maxframe/learn/contrib/xgboost/train.py +121 -0
  289. maxframe/learn/utils/__init__.py +15 -0
  290. maxframe/learn/utils/core.py +29 -0
  291. maxframe/lib/__init__.py +15 -0
  292. maxframe/lib/aio/__init__.py +27 -0
  293. maxframe/lib/aio/_runners.py +162 -0
  294. maxframe/lib/aio/_threads.py +35 -0
  295. maxframe/lib/aio/base.py +82 -0
  296. maxframe/lib/aio/file.py +85 -0
  297. maxframe/lib/aio/isolation.py +100 -0
  298. maxframe/lib/aio/lru.py +242 -0
  299. maxframe/lib/aio/parallelism.py +37 -0
  300. maxframe/lib/aio/tests/__init__.py +13 -0
  301. maxframe/lib/aio/tests/test_aio_file.py +55 -0
  302. maxframe/lib/compression.py +55 -0
  303. maxframe/lib/cython/__init__.py +13 -0
  304. maxframe/lib/cython/libcpp.pxd +30 -0
  305. maxframe/lib/filesystem/__init__.py +21 -0
  306. maxframe/lib/filesystem/_glob.py +173 -0
  307. maxframe/lib/filesystem/_oss_lib/__init__.py +13 -0
  308. maxframe/lib/filesystem/_oss_lib/common.py +198 -0
  309. maxframe/lib/filesystem/_oss_lib/glob.py +147 -0
  310. maxframe/lib/filesystem/_oss_lib/handle.py +156 -0
  311. maxframe/lib/filesystem/arrow.py +236 -0
  312. maxframe/lib/filesystem/base.py +263 -0
  313. maxframe/lib/filesystem/core.py +95 -0
  314. maxframe/lib/filesystem/fsmap.py +164 -0
  315. maxframe/lib/filesystem/hdfs.py +31 -0
  316. maxframe/lib/filesystem/local.py +112 -0
  317. maxframe/lib/filesystem/oss.py +157 -0
  318. maxframe/lib/filesystem/tests/__init__.py +13 -0
  319. maxframe/lib/filesystem/tests/test_filesystem.py +223 -0
  320. maxframe/lib/filesystem/tests/test_oss.py +182 -0
  321. maxframe/lib/functools_compat.py +81 -0
  322. maxframe/lib/mmh3.cpython-311-darwin.so +0 -0
  323. maxframe/lib/mmh3_src/MurmurHash3.cpp +339 -0
  324. maxframe/lib/mmh3_src/MurmurHash3.h +43 -0
  325. maxframe/lib/mmh3_src/mmh3module.cpp +387 -0
  326. maxframe/lib/sparse/__init__.py +861 -0
  327. maxframe/lib/sparse/array.py +1604 -0
  328. maxframe/lib/sparse/core.py +92 -0
  329. maxframe/lib/sparse/matrix.py +241 -0
  330. maxframe/lib/sparse/tests/__init__.py +15 -0
  331. maxframe/lib/sparse/tests/test_sparse.py +476 -0
  332. maxframe/lib/sparse/vector.py +150 -0
  333. maxframe/lib/tblib/LICENSE +20 -0
  334. maxframe/lib/tblib/__init__.py +327 -0
  335. maxframe/lib/tblib/cpython.py +83 -0
  336. maxframe/lib/tblib/decorators.py +44 -0
  337. maxframe/lib/tblib/pickling_support.py +90 -0
  338. maxframe/lib/tests/__init__.py +13 -0
  339. maxframe/lib/tests/test_wrapped_pickle.py +51 -0
  340. maxframe/lib/version.py +620 -0
  341. maxframe/lib/wrapped_pickle.py +139 -0
  342. maxframe/mixin.py +100 -0
  343. maxframe/odpsio/__init__.py +21 -0
  344. maxframe/odpsio/arrow.py +91 -0
  345. maxframe/odpsio/schema.py +364 -0
  346. maxframe/odpsio/tableio.py +322 -0
  347. maxframe/odpsio/tests/__init__.py +13 -0
  348. maxframe/odpsio/tests/test_arrow.py +88 -0
  349. maxframe/odpsio/tests/test_schema.py +297 -0
  350. maxframe/odpsio/tests/test_tableio.py +136 -0
  351. maxframe/odpsio/tests/test_volumeio.py +90 -0
  352. maxframe/odpsio/volumeio.py +95 -0
  353. maxframe/opcodes.py +590 -0
  354. maxframe/protocol.py +415 -0
  355. maxframe/remote/__init__.py +18 -0
  356. maxframe/remote/core.py +210 -0
  357. maxframe/remote/run_script.py +121 -0
  358. maxframe/serialization/__init__.py +26 -0
  359. maxframe/serialization/arrow.py +95 -0
  360. maxframe/serialization/core.cpython-311-darwin.so +0 -0
  361. maxframe/serialization/core.pxd +44 -0
  362. maxframe/serialization/core.pyi +61 -0
  363. maxframe/serialization/core.pyx +1094 -0
  364. maxframe/serialization/exception.py +86 -0
  365. maxframe/serialization/maxframe_objects.py +39 -0
  366. maxframe/serialization/numpy.py +91 -0
  367. maxframe/serialization/pandas.py +202 -0
  368. maxframe/serialization/scipy.py +71 -0
  369. maxframe/serialization/serializables/__init__.py +55 -0
  370. maxframe/serialization/serializables/core.py +262 -0
  371. maxframe/serialization/serializables/field.py +624 -0
  372. maxframe/serialization/serializables/field_type.py +589 -0
  373. maxframe/serialization/serializables/tests/__init__.py +13 -0
  374. maxframe/serialization/serializables/tests/test_field_type.py +121 -0
  375. maxframe/serialization/serializables/tests/test_serializable.py +250 -0
  376. maxframe/serialization/tests/__init__.py +13 -0
  377. maxframe/serialization/tests/test_serial.py +412 -0
  378. maxframe/session.py +1310 -0
  379. maxframe/tensor/__init__.py +183 -0
  380. maxframe/tensor/arithmetic/__init__.py +315 -0
  381. maxframe/tensor/arithmetic/abs.py +68 -0
  382. maxframe/tensor/arithmetic/absolute.py +68 -0
  383. maxframe/tensor/arithmetic/add.py +82 -0
  384. maxframe/tensor/arithmetic/angle.py +72 -0
  385. maxframe/tensor/arithmetic/arccos.py +104 -0
  386. maxframe/tensor/arithmetic/arccosh.py +91 -0
  387. maxframe/tensor/arithmetic/arcsin.py +94 -0
  388. maxframe/tensor/arithmetic/arcsinh.py +86 -0
  389. maxframe/tensor/arithmetic/arctan.py +106 -0
  390. maxframe/tensor/arithmetic/arctan2.py +128 -0
  391. maxframe/tensor/arithmetic/arctanh.py +86 -0
  392. maxframe/tensor/arithmetic/around.py +114 -0
  393. maxframe/tensor/arithmetic/bitand.py +95 -0
  394. maxframe/tensor/arithmetic/bitor.py +102 -0
  395. maxframe/tensor/arithmetic/bitxor.py +95 -0
  396. maxframe/tensor/arithmetic/cbrt.py +66 -0
  397. maxframe/tensor/arithmetic/ceil.py +71 -0
  398. maxframe/tensor/arithmetic/clip.py +165 -0
  399. maxframe/tensor/arithmetic/conj.py +74 -0
  400. maxframe/tensor/arithmetic/copysign.py +78 -0
  401. maxframe/tensor/arithmetic/core.py +544 -0
  402. maxframe/tensor/arithmetic/cos.py +85 -0
  403. maxframe/tensor/arithmetic/cosh.py +72 -0
  404. maxframe/tensor/arithmetic/deg2rad.py +72 -0
  405. maxframe/tensor/arithmetic/degrees.py +77 -0
  406. maxframe/tensor/arithmetic/divide.py +114 -0
  407. maxframe/tensor/arithmetic/equal.py +76 -0
  408. maxframe/tensor/arithmetic/exp.py +106 -0
  409. maxframe/tensor/arithmetic/exp2.py +67 -0
  410. maxframe/tensor/arithmetic/expm1.py +79 -0
  411. maxframe/tensor/arithmetic/fabs.py +74 -0
  412. maxframe/tensor/arithmetic/fix.py +69 -0
  413. maxframe/tensor/arithmetic/float_power.py +103 -0
  414. maxframe/tensor/arithmetic/floor.py +77 -0
  415. maxframe/tensor/arithmetic/floordiv.py +94 -0
  416. maxframe/tensor/arithmetic/fmax.py +105 -0
  417. maxframe/tensor/arithmetic/fmin.py +106 -0
  418. maxframe/tensor/arithmetic/fmod.py +99 -0
  419. maxframe/tensor/arithmetic/frexp.py +92 -0
  420. maxframe/tensor/arithmetic/greater.py +77 -0
  421. maxframe/tensor/arithmetic/greater_equal.py +69 -0
  422. maxframe/tensor/arithmetic/hypot.py +77 -0
  423. maxframe/tensor/arithmetic/i0.py +89 -0
  424. maxframe/tensor/arithmetic/imag.py +67 -0
  425. maxframe/tensor/arithmetic/invert.py +110 -0
  426. maxframe/tensor/arithmetic/isclose.py +115 -0
  427. maxframe/tensor/arithmetic/iscomplex.py +64 -0
  428. maxframe/tensor/arithmetic/isfinite.py +106 -0
  429. maxframe/tensor/arithmetic/isinf.py +103 -0
  430. maxframe/tensor/arithmetic/isnan.py +82 -0
  431. maxframe/tensor/arithmetic/isreal.py +63 -0
  432. maxframe/tensor/arithmetic/ldexp.py +99 -0
  433. maxframe/tensor/arithmetic/less.py +69 -0
  434. maxframe/tensor/arithmetic/less_equal.py +69 -0
  435. maxframe/tensor/arithmetic/log.py +92 -0
  436. maxframe/tensor/arithmetic/log10.py +85 -0
  437. maxframe/tensor/arithmetic/log1p.py +95 -0
  438. maxframe/tensor/arithmetic/log2.py +85 -0
  439. maxframe/tensor/arithmetic/logaddexp.py +80 -0
  440. maxframe/tensor/arithmetic/logaddexp2.py +78 -0
  441. maxframe/tensor/arithmetic/logical_and.py +81 -0
  442. maxframe/tensor/arithmetic/logical_not.py +74 -0
  443. maxframe/tensor/arithmetic/logical_or.py +82 -0
  444. maxframe/tensor/arithmetic/logical_xor.py +88 -0
  445. maxframe/tensor/arithmetic/lshift.py +82 -0
  446. maxframe/tensor/arithmetic/maximum.py +108 -0
  447. maxframe/tensor/arithmetic/minimum.py +108 -0
  448. maxframe/tensor/arithmetic/mod.py +104 -0
  449. maxframe/tensor/arithmetic/modf.py +83 -0
  450. maxframe/tensor/arithmetic/multiply.py +81 -0
  451. maxframe/tensor/arithmetic/nan_to_num.py +99 -0
  452. maxframe/tensor/arithmetic/negative.py +65 -0
  453. maxframe/tensor/arithmetic/nextafter.py +68 -0
  454. maxframe/tensor/arithmetic/not_equal.py +72 -0
  455. maxframe/tensor/arithmetic/positive.py +47 -0
  456. maxframe/tensor/arithmetic/power.py +106 -0
  457. maxframe/tensor/arithmetic/rad2deg.py +71 -0
  458. maxframe/tensor/arithmetic/radians.py +77 -0
  459. maxframe/tensor/arithmetic/real.py +70 -0
  460. maxframe/tensor/arithmetic/reciprocal.py +76 -0
  461. maxframe/tensor/arithmetic/rint.py +68 -0
  462. maxframe/tensor/arithmetic/rshift.py +81 -0
  463. maxframe/tensor/arithmetic/setimag.py +29 -0
  464. maxframe/tensor/arithmetic/setreal.py +29 -0
  465. maxframe/tensor/arithmetic/sign.py +81 -0
  466. maxframe/tensor/arithmetic/signbit.py +65 -0
  467. maxframe/tensor/arithmetic/sin.py +98 -0
  468. maxframe/tensor/arithmetic/sinc.py +102 -0
  469. maxframe/tensor/arithmetic/sinh.py +93 -0
  470. maxframe/tensor/arithmetic/spacing.py +72 -0
  471. maxframe/tensor/arithmetic/sqrt.py +81 -0
  472. maxframe/tensor/arithmetic/square.py +69 -0
  473. maxframe/tensor/arithmetic/subtract.py +81 -0
  474. maxframe/tensor/arithmetic/tan.py +88 -0
  475. maxframe/tensor/arithmetic/tanh.py +92 -0
  476. maxframe/tensor/arithmetic/tests/__init__.py +15 -0
  477. maxframe/tensor/arithmetic/tests/test_arithmetic.py +414 -0
  478. maxframe/tensor/arithmetic/truediv.py +104 -0
  479. maxframe/tensor/arithmetic/trunc.py +72 -0
  480. maxframe/tensor/arithmetic/utils.py +65 -0
  481. maxframe/tensor/array_utils.py +186 -0
  482. maxframe/tensor/base/__init__.py +34 -0
  483. maxframe/tensor/base/astype.py +119 -0
  484. maxframe/tensor/base/atleast_1d.py +74 -0
  485. maxframe/tensor/base/broadcast_to.py +89 -0
  486. maxframe/tensor/base/ravel.py +92 -0
  487. maxframe/tensor/base/tests/__init__.py +13 -0
  488. maxframe/tensor/base/tests/test_base.py +114 -0
  489. maxframe/tensor/base/transpose.py +125 -0
  490. maxframe/tensor/base/unique.py +205 -0
  491. maxframe/tensor/base/where.py +127 -0
  492. maxframe/tensor/core.py +724 -0
  493. maxframe/tensor/datasource/__init__.py +32 -0
  494. maxframe/tensor/datasource/arange.py +156 -0
  495. maxframe/tensor/datasource/array.py +415 -0
  496. maxframe/tensor/datasource/core.py +109 -0
  497. maxframe/tensor/datasource/empty.py +169 -0
  498. maxframe/tensor/datasource/from_dataframe.py +70 -0
  499. maxframe/tensor/datasource/from_dense.py +54 -0
  500. maxframe/tensor/datasource/from_sparse.py +47 -0
  501. maxframe/tensor/datasource/full.py +186 -0
  502. maxframe/tensor/datasource/ones.py +173 -0
  503. maxframe/tensor/datasource/scalar.py +40 -0
  504. maxframe/tensor/datasource/tests/__init__.py +13 -0
  505. maxframe/tensor/datasource/tests/test_datasource.py +278 -0
  506. maxframe/tensor/datasource/zeros.py +188 -0
  507. maxframe/tensor/fetch/__init__.py +15 -0
  508. maxframe/tensor/fetch/core.py +54 -0
  509. maxframe/tensor/indexing/__init__.py +47 -0
  510. maxframe/tensor/indexing/choose.py +196 -0
  511. maxframe/tensor/indexing/compress.py +124 -0
  512. maxframe/tensor/indexing/core.py +190 -0
  513. maxframe/tensor/indexing/extract.py +71 -0
  514. maxframe/tensor/indexing/fill_diagonal.py +183 -0
  515. maxframe/tensor/indexing/flatnonzero.py +60 -0
  516. maxframe/tensor/indexing/getitem.py +175 -0
  517. maxframe/tensor/indexing/nonzero.py +120 -0
  518. maxframe/tensor/indexing/setitem.py +132 -0
  519. maxframe/tensor/indexing/slice.py +29 -0
  520. maxframe/tensor/indexing/take.py +130 -0
  521. maxframe/tensor/indexing/tests/__init__.py +15 -0
  522. maxframe/tensor/indexing/tests/test_indexing.py +234 -0
  523. maxframe/tensor/indexing/unravel_index.py +103 -0
  524. maxframe/tensor/merge/__init__.py +15 -0
  525. maxframe/tensor/merge/stack.py +132 -0
  526. maxframe/tensor/merge/tests/__init__.py +13 -0
  527. maxframe/tensor/merge/tests/test_merge.py +52 -0
  528. maxframe/tensor/operators.py +123 -0
  529. maxframe/tensor/random/__init__.py +168 -0
  530. maxframe/tensor/random/beta.py +87 -0
  531. maxframe/tensor/random/binomial.py +137 -0
  532. maxframe/tensor/random/bytes.py +39 -0
  533. maxframe/tensor/random/chisquare.py +110 -0
  534. maxframe/tensor/random/choice.py +186 -0
  535. maxframe/tensor/random/core.py +234 -0
  536. maxframe/tensor/random/dirichlet.py +123 -0
  537. maxframe/tensor/random/exponential.py +94 -0
  538. maxframe/tensor/random/f.py +135 -0
  539. maxframe/tensor/random/gamma.py +128 -0
  540. maxframe/tensor/random/geometric.py +93 -0
  541. maxframe/tensor/random/gumbel.py +167 -0
  542. maxframe/tensor/random/hypergeometric.py +148 -0
  543. maxframe/tensor/random/laplace.py +133 -0
  544. maxframe/tensor/random/logistic.py +129 -0
  545. maxframe/tensor/random/lognormal.py +159 -0
  546. maxframe/tensor/random/logseries.py +122 -0
  547. maxframe/tensor/random/multinomial.py +133 -0
  548. maxframe/tensor/random/multivariate_normal.py +192 -0
  549. maxframe/tensor/random/negative_binomial.py +125 -0
  550. maxframe/tensor/random/noncentral_chisquare.py +132 -0
  551. maxframe/tensor/random/noncentral_f.py +126 -0
  552. maxframe/tensor/random/normal.py +143 -0
  553. maxframe/tensor/random/pareto.py +140 -0
  554. maxframe/tensor/random/permutation.py +104 -0
  555. maxframe/tensor/random/poisson.py +111 -0
  556. maxframe/tensor/random/power.py +142 -0
  557. maxframe/tensor/random/rand.py +82 -0
  558. maxframe/tensor/random/randint.py +121 -0
  559. maxframe/tensor/random/randn.py +96 -0
  560. maxframe/tensor/random/random_integers.py +123 -0
  561. maxframe/tensor/random/random_sample.py +86 -0
  562. maxframe/tensor/random/rayleigh.py +110 -0
  563. maxframe/tensor/random/shuffle.py +61 -0
  564. maxframe/tensor/random/standard_cauchy.py +105 -0
  565. maxframe/tensor/random/standard_exponential.py +72 -0
  566. maxframe/tensor/random/standard_gamma.py +120 -0
  567. maxframe/tensor/random/standard_normal.py +74 -0
  568. maxframe/tensor/random/standard_t.py +135 -0
  569. maxframe/tensor/random/tests/__init__.py +15 -0
  570. maxframe/tensor/random/tests/test_random.py +167 -0
  571. maxframe/tensor/random/triangular.py +119 -0
  572. maxframe/tensor/random/uniform.py +131 -0
  573. maxframe/tensor/random/vonmises.py +131 -0
  574. maxframe/tensor/random/wald.py +114 -0
  575. maxframe/tensor/random/weibull.py +140 -0
  576. maxframe/tensor/random/zipf.py +122 -0
  577. maxframe/tensor/rechunk/__init__.py +26 -0
  578. maxframe/tensor/rechunk/rechunk.py +43 -0
  579. maxframe/tensor/reduction/__init__.py +66 -0
  580. maxframe/tensor/reduction/all.py +103 -0
  581. maxframe/tensor/reduction/allclose.py +88 -0
  582. maxframe/tensor/reduction/any.py +105 -0
  583. maxframe/tensor/reduction/argmax.py +103 -0
  584. maxframe/tensor/reduction/argmin.py +103 -0
  585. maxframe/tensor/reduction/array_equal.py +64 -0
  586. maxframe/tensor/reduction/core.py +168 -0
  587. maxframe/tensor/reduction/count_nonzero.py +81 -0
  588. maxframe/tensor/reduction/cumprod.py +97 -0
  589. maxframe/tensor/reduction/cumsum.py +101 -0
  590. maxframe/tensor/reduction/max.py +120 -0
  591. maxframe/tensor/reduction/mean.py +123 -0
  592. maxframe/tensor/reduction/min.py +120 -0
  593. maxframe/tensor/reduction/nanargmax.py +82 -0
  594. maxframe/tensor/reduction/nanargmin.py +76 -0
  595. maxframe/tensor/reduction/nancumprod.py +91 -0
  596. maxframe/tensor/reduction/nancumsum.py +94 -0
  597. maxframe/tensor/reduction/nanmax.py +111 -0
  598. maxframe/tensor/reduction/nanmean.py +106 -0
  599. maxframe/tensor/reduction/nanmin.py +111 -0
  600. maxframe/tensor/reduction/nanprod.py +94 -0
  601. maxframe/tensor/reduction/nanstd.py +126 -0
  602. maxframe/tensor/reduction/nansum.py +115 -0
  603. maxframe/tensor/reduction/nanvar.py +149 -0
  604. maxframe/tensor/reduction/prod.py +130 -0
  605. maxframe/tensor/reduction/std.py +134 -0
  606. maxframe/tensor/reduction/sum.py +125 -0
  607. maxframe/tensor/reduction/tests/__init__.py +13 -0
  608. maxframe/tensor/reduction/tests/test_reduction.py +181 -0
  609. maxframe/tensor/reduction/var.py +176 -0
  610. maxframe/tensor/reshape/__init__.py +17 -0
  611. maxframe/tensor/reshape/reshape.py +188 -0
  612. maxframe/tensor/reshape/tests/__init__.py +15 -0
  613. maxframe/tensor/reshape/tests/test_reshape.py +37 -0
  614. maxframe/tensor/statistics/__init__.py +13 -0
  615. maxframe/tensor/statistics/percentile.py +175 -0
  616. maxframe/tensor/statistics/quantile.py +288 -0
  617. maxframe/tensor/ufunc/__init__.py +26 -0
  618. maxframe/tensor/ufunc/ufunc.py +200 -0
  619. maxframe/tensor/utils.py +718 -0
  620. maxframe/tests/__init__.py +13 -0
  621. maxframe/tests/test_codegen.py +69 -0
  622. maxframe/tests/test_protocol.py +144 -0
  623. maxframe/tests/test_utils.py +376 -0
  624. maxframe/tests/utils.py +164 -0
  625. maxframe/typing_.py +37 -0
  626. maxframe/udf.py +134 -0
  627. maxframe/utils.py +1114 -0
  628. maxframe-0.1.0b5.dist-info/METADATA +104 -0
  629. maxframe-0.1.0b5.dist-info/RECORD +647 -0
  630. maxframe-0.1.0b5.dist-info/WHEEL +5 -0
  631. maxframe-0.1.0b5.dist-info/top_level.txt +3 -0
  632. maxframe_client/__init__.py +17 -0
  633. maxframe_client/clients/__init__.py +13 -0
  634. maxframe_client/clients/framedriver.py +118 -0
  635. maxframe_client/clients/spe.py +104 -0
  636. maxframe_client/conftest.py +15 -0
  637. maxframe_client/fetcher.py +264 -0
  638. maxframe_client/session/__init__.py +22 -0
  639. maxframe_client/session/consts.py +36 -0
  640. maxframe_client/session/graph.py +119 -0
  641. maxframe_client/session/odps.py +482 -0
  642. maxframe_client/session/task.py +280 -0
  643. maxframe_client/session/tests/__init__.py +13 -0
  644. maxframe_client/session/tests/test_task.py +85 -0
  645. maxframe_client/tests/__init__.py +13 -0
  646. maxframe_client/tests/test_fetcher.py +89 -0
  647. maxframe_client/tests/test_session.py +255 -0
@@ -0,0 +1,383 @@
1
+ # Copyright 1999-2024 Alibaba Group Holding Ltd.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ from numbers import Integral
16
+
17
+ import numpy as np
18
+ import pandas as pd
19
+
20
+ from ... import opcodes
21
+ from ...core import ENTITY_TYPE, ExecutableTuple, OutputType
22
+ from ...serialization.serializables import (
23
+ AnyField,
24
+ BoolField,
25
+ Int32Field,
26
+ KeyField,
27
+ StringField,
28
+ )
29
+ from ...tensor import tensor as astensor
30
+ from ...tensor.core import TensorOrder
31
+ from ..core import INDEX_TYPE, SERIES_TYPE
32
+ from ..initializer import Series as asseries
33
+ from ..operators import DataFrameOperator, DataFrameOperatorMixin
34
+ from ..utils import parse_index
35
+
36
+
37
+ class DataFrameCut(DataFrameOperator, DataFrameOperatorMixin):
38
+ _op_type_ = opcodes.CUT
39
+
40
+ _input = KeyField("input")
41
+ bins = AnyField("bins", default=None)
42
+ right = BoolField("right", default=None)
43
+ labels = AnyField("labels", default=None)
44
+ retbins = BoolField("retbins", default=None)
45
+ precision = Int32Field("precision", default=None)
46
+ include_lowest = BoolField("include_lowest", default=None)
47
+ duplicates = StringField("duplicates", default=None)
48
+ ordered = BoolField("ordered", default=None)
49
+
50
+ @property
51
+ def input(self):
52
+ return self._input
53
+
54
+ @property
55
+ def output_limit(self):
56
+ return 1 if not self.retbins else 2
57
+
58
+ def _set_inputs(self, inputs):
59
+ super()._set_inputs(inputs)
60
+ inputs_iter = iter(self._inputs)
61
+ self._input = next(inputs_iter)
62
+ if isinstance(self.bins, ENTITY_TYPE):
63
+ self.bins = next(inputs_iter)
64
+ if isinstance(self.labels, ENTITY_TYPE):
65
+ self.labels = next(inputs_iter)
66
+
67
+ def __call__(self, x):
68
+ if isinstance(x, pd.Series):
69
+ x = asseries(x)
70
+ elif not isinstance(x, ENTITY_TYPE):
71
+ x = astensor(x)
72
+ if x.ndim != 1:
73
+ raise ValueError("Input array must be 1 dimensional")
74
+ if x.size == 0:
75
+ raise ValueError("Cannot cut empty array")
76
+
77
+ inputs = [x]
78
+ if self.labels is not None and not isinstance(self.labels, (bool, ENTITY_TYPE)):
79
+ self.labels = np.asarray(self.labels)
80
+
81
+ # infer dtype
82
+ x_empty = (
83
+ pd.Series([1], dtype=x.dtype)
84
+ if isinstance(x, SERIES_TYPE)
85
+ else np.asarray([1], dtype=x.dtype)
86
+ )
87
+ if isinstance(self.bins, INDEX_TYPE):
88
+ bins = self.bins.index_value.to_pandas()
89
+ inputs.append(self.bins)
90
+ bins_unknown = True
91
+ elif isinstance(self.bins, ENTITY_TYPE):
92
+ bins = np.asarray([2], dtype=self.bins.dtype)
93
+ inputs.append(self.bins)
94
+ bins_unknown = True
95
+ else:
96
+ bins = self.bins
97
+ bins_unknown = isinstance(self.bins, Integral)
98
+ if isinstance(self.labels, ENTITY_TYPE):
99
+ bins_unknown = True
100
+ labels = None
101
+ inputs.append(self.labels)
102
+ else:
103
+ if self.labels is False or not bins_unknown:
104
+ labels = self.labels
105
+ else:
106
+ labels = None
107
+ ret = pd.cut(
108
+ x_empty,
109
+ bins,
110
+ right=self.right,
111
+ labels=labels,
112
+ retbins=True,
113
+ include_lowest=self.include_lowest,
114
+ duplicates=self.duplicates,
115
+ )
116
+
117
+ kws = []
118
+ output_types = []
119
+ if bins_unknown and isinstance(ret[0].dtype, pd.CategoricalDtype):
120
+ # inaccurate dtype, just create an empty one
121
+ out_dtype = pd.CategoricalDtype()
122
+ else:
123
+ out_dtype = ret[0].dtype
124
+ if isinstance(ret[0], pd.Series):
125
+ output_types.append(OutputType.series)
126
+ kws.append(
127
+ {
128
+ "dtype": out_dtype,
129
+ "shape": x.shape,
130
+ "index_value": x.index_value,
131
+ "name": x.name,
132
+ }
133
+ )
134
+ elif isinstance(ret[0], np.ndarray):
135
+ output_types.append(OutputType.tensor)
136
+ kws.append(
137
+ {"dtype": out_dtype, "shape": x.shape, "order": TensorOrder.C_ORDER}
138
+ )
139
+ else:
140
+ assert isinstance(ret[0], pd.Categorical)
141
+ output_types.append(OutputType.categorical)
142
+ kws.append(
143
+ {
144
+ "dtype": out_dtype,
145
+ "shape": x.shape,
146
+ "categories_value": parse_index(
147
+ out_dtype.categories, store_data=True
148
+ ),
149
+ }
150
+ )
151
+
152
+ if self.retbins:
153
+ if isinstance(self.bins, (pd.IntervalIndex, INDEX_TYPE)):
154
+ output_types.append(OutputType.index)
155
+ kws.append(
156
+ {
157
+ "dtype": self.bins.dtype,
158
+ "shape": self.bins.shape,
159
+ "index_value": self.bins.index_value
160
+ if isinstance(self.bins, INDEX_TYPE)
161
+ else parse_index(self.bins, store_data=False),
162
+ "name": self.bins.name,
163
+ }
164
+ )
165
+ else:
166
+ output_types.append(OutputType.tensor)
167
+ kws.append(
168
+ {
169
+ "dtype": ret[1].dtype,
170
+ "shape": ret[1].shape if ret[1].size > 0 else (np.nan,),
171
+ "order": TensorOrder.C_ORDER,
172
+ }
173
+ )
174
+
175
+ self.output_types = output_types
176
+ return ExecutableTuple(self.new_tileables(inputs, kws=kws))
177
+
178
+
179
+ def cut(
180
+ x,
181
+ bins,
182
+ right: bool = True,
183
+ labels=None,
184
+ retbins: bool = False,
185
+ precision: int = 3,
186
+ include_lowest: bool = False,
187
+ duplicates: str = "raise",
188
+ ordered: bool = True,
189
+ ):
190
+ """
191
+ Bin values into discrete intervals.
192
+
193
+ Use `cut` when you need to segment and sort data values into bins. This
194
+ function is also useful for going from a continuous variable to a
195
+ categorical variable. For example, `cut` could convert ages to groups of
196
+ age ranges. Supports binning into an equal number of bins, or a
197
+ pre-specified array of bins.
198
+
199
+ Parameters
200
+ ----------
201
+ x : array-like
202
+ The input array to be binned. Must be 1-dimensional.
203
+ bins : int, sequence of scalars, or IntervalIndex
204
+ The criteria to bin by.
205
+
206
+ * int : Defines the number of equal-width bins in the range of `x`. The
207
+ range of `x` is extended by .1% on each side to include the minimum
208
+ and maximum values of `x`.
209
+ * sequence of scalars : Defines the bin edges allowing for non-uniform
210
+ width. No extension of the range of `x` is done.
211
+ * IntervalIndex : Defines the exact bins to be used. Note that
212
+ IntervalIndex for `bins` must be non-overlapping.
213
+
214
+ right : bool, default True
215
+ Indicates whether `bins` includes the rightmost edge or not. If
216
+ ``right == True`` (the default), then the `bins` ``[1, 2, 3, 4]``
217
+ indicate (1,2], (2,3], (3,4]. This argument is ignored when
218
+ `bins` is an IntervalIndex.
219
+ labels : array or False, default None
220
+ Specifies the labels for the returned bins. Must be the same length as
221
+ the resulting bins. If False, returns only integer indicators of the
222
+ bins. This affects the type of the output container (see below).
223
+ This argument is ignored when `bins` is an IntervalIndex. If True,
224
+ raises an error.
225
+ retbins : bool, default False
226
+ Whether to return the bins or not. Useful when bins is provided
227
+ as a scalar.
228
+ precision : int, default 3
229
+ The precision at which to store and display the bins labels.
230
+ include_lowest : bool, default False
231
+ Whether the first interval should be left-inclusive or not.
232
+ duplicates : {default 'raise', 'drop'}, optional
233
+ If bin edges are not unique, raise ValueError or drop non-uniques.
234
+ ordered : bool, default True
235
+ Whether the labels are ordered or not. Applies to returned types
236
+ Categorical and Series (with Categorical dtype). If True, the resulting
237
+ categorical will be ordered. If False, the resulting categorical will be
238
+ unordered (labels must be provided).
239
+
240
+ Returns
241
+ -------
242
+ out : Categorical, Series, or Tensor
243
+ An array-like object representing the respective bin for each value
244
+ of `x`. The type depends on the value of `labels`.
245
+
246
+ * True (default) : returns a Series for Series `x` or a
247
+ Categorical for all other inputs. The values stored within
248
+ are Interval dtype.
249
+
250
+ * sequence of scalars : returns a Series for Series `x` or a
251
+ Categorical for all other inputs. The values stored within
252
+ are whatever the type in the sequence is.
253
+
254
+ * False : returns a tensor of integers.
255
+
256
+ bins : Tensor or IntervalIndex.
257
+ The computed or specified bins. Only returned when `retbins=True`.
258
+ For scalar or sequence `bins`, this is a tensor with the computed
259
+ bins. If set `duplicates=drop`, `bins` will drop non-unique bin. For
260
+ an IntervalIndex `bins`, this is equal to `bins`.
261
+
262
+ See Also
263
+ --------
264
+ qcut : Discretize variable into equal-sized buckets based on rank
265
+ or based on sample quantiles.
266
+ Categorical : Array type for storing data that come from a
267
+ fixed set of values.
268
+ Series : One-dimensional array with axis labels (including time series).
269
+ IntervalIndex : Immutable Index implementing an ordered, sliceable set.
270
+
271
+ Notes
272
+ -----
273
+ Any NA values will be NA in the result. Out of bounds values will be NA in
274
+ the resulting Series or Categorical object.
275
+
276
+ Examples
277
+ --------
278
+ Discretize into three equal-sized bins.
279
+
280
+ >>> import maxframe.tensor as mt
281
+ >>> import maxframe.dataframe as md
282
+
283
+ >>> md.cut(mt.array([1, 7, 5, 4, 6, 3]), 3).execute()
284
+ ... # doctest: +ELLIPSIS
285
+ [(0.994, 3.0], (5.0, 7.0], (3.0, 5.0], (3.0, 5.0], (5.0, 7.0], ...
286
+ Categories (3, interval[float64]): [(0.994, 3.0] < (3.0, 5.0] ...
287
+
288
+ >>> md.cut(mt.array([1, 7, 5, 4, 6, 3]), 3, retbins=True).execute()
289
+ ... # doctest: +ELLIPSIS
290
+ ([(0.994, 3.0], (5.0, 7.0], (3.0, 5.0], (3.0, 5.0], (5.0, 7.0], ...
291
+ Categories (3, interval[float64]): [(0.994, 3.0] < (3.0, 5.0] ...
292
+ array([0.994, 3. , 5. , 7. ]))
293
+
294
+ Discovers the same bins, but assign them specific labels. Notice that
295
+ the returned Categorical's categories are `labels` and is ordered.
296
+
297
+ >>> md.cut(mt.array([1, 7, 5, 4, 6, 3]),
298
+ ... 3, labels=["bad", "medium", "good"]).execute()
299
+ [bad, good, medium, medium, good, bad]
300
+ Categories (3, object): [bad < medium < good]
301
+
302
+ ordered=False will result in unordered categories when labels are passed. This parameter
303
+ can be used to allow non-unique labels:
304
+
305
+ >>> md.cut(np.array([1, 7, 5, 4, 6, 3]), 3,
306
+ ... labels=["B", "A", "B"], ordered=False).execute()
307
+ ['B', 'B', 'A', 'A', 'B', 'B']
308
+ Categories (2, object): ['A', 'B']
309
+
310
+ ``labels=False`` implies you just want the bins back.
311
+
312
+ >>> md.cut([0, 1, 1, 2], bins=4, labels=False).execute()
313
+ array([0, 1, 1, 3])
314
+
315
+ Passing a Series as an input returns a Series with categorical dtype:
316
+
317
+ >>> s = md.Series(mt.array([2, 4, 6, 8, 10]),
318
+ ... index=['a', 'b', 'c', 'd', 'e'])
319
+ >>> md.cut(s, 3).execute()
320
+ ... # doctest: +ELLIPSIS
321
+ a (1.992, 4.667]
322
+ b (1.992, 4.667]
323
+ c (4.667, 7.333]
324
+ d (7.333, 10.0]
325
+ e (7.333, 10.0]
326
+ dtype: category
327
+ Categories (3, interval[float64]): [(1.992, 4.667] < (4.667, ...
328
+
329
+ Passing a Series as an input returns a Series with mapping value.
330
+ It is used to map numerically to intervals based on bins.
331
+
332
+ >>> s = md.Series(mt.array([2, 4, 6, 8, 10]),
333
+ ... index=['a', 'b', 'c', 'd', 'e'])
334
+ >>> md.cut(s, [0, 2, 4, 6, 8, 10], labels=False, retbins=True, right=False).execute()
335
+ ... # doctest: +ELLIPSIS
336
+ (a 0.0
337
+ b 1.0
338
+ c 2.0
339
+ d 3.0
340
+ e NaN
341
+ dtype: float64, array([0, 2, 4, 6, 8, 10]))
342
+
343
+ Use `drop` optional when bins is not unique
344
+
345
+ >>> md.cut(s, [0, 2, 4, 6, 10, 10], labels=False, retbins=True,
346
+ ... right=False, duplicates='drop').execute()
347
+ ... # doctest: +ELLIPSIS
348
+ (a 0.0
349
+ b 1.0
350
+ c 2.0
351
+ d 3.0
352
+ e NaN
353
+ dtype: float64, array([0, 2, 4, 6, 10]))
354
+
355
+ Passing an IntervalIndex for `bins` results in those categories exactly.
356
+ Notice that values not covered by the IntervalIndex are set to NaN. 0
357
+ is to the left of the first bin (which is closed on the right), and 1.5
358
+ falls between two bins.
359
+
360
+ >>> bins = md.Index(pd.IntervalIndex.from_tuples([(0, 1), (2, 3), (4, 5)]))
361
+ >>> md.cut([0, 0.5, 1.5, 2.5, 4.5], bins).execute()
362
+ [NaN, (0, 1], NaN, (2, 3], (4, 5]]
363
+ Categories (3, interval[int64]): [(0, 1] < (2, 3] < (4, 5]]
364
+ """
365
+
366
+ if isinstance(bins, Integral) and bins < 1:
367
+ raise ValueError("`bins` should be a positive integer")
368
+
369
+ op = DataFrameCut(
370
+ bins=bins,
371
+ right=right,
372
+ labels=labels,
373
+ retbins=retbins,
374
+ precision=precision,
375
+ include_lowest=include_lowest,
376
+ duplicates=duplicates,
377
+ ordered=ordered,
378
+ )
379
+ ret = op(x)
380
+ if not retbins:
381
+ return ret[0]
382
+ else:
383
+ return ret
@@ -0,0 +1,79 @@
1
+ # Copyright 1999-2024 Alibaba Group Holding Ltd.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ import pandas as pd
16
+
17
+ from ... import opcodes
18
+ from ...core import OutputType
19
+ from ...serialization.serializables import (
20
+ BoolField,
21
+ DictField,
22
+ KeyField,
23
+ StringField,
24
+ TupleField,
25
+ )
26
+ from ..operators import DataFrameOperator, DataFrameOperatorMixin
27
+ from ..utils import build_empty_series
28
+
29
+
30
+ class SeriesDatetimeMethod(DataFrameOperator, DataFrameOperatorMixin):
31
+ _op_type_ = opcodes.DATETIME_METHOD
32
+
33
+ _input = KeyField("input")
34
+ method = StringField("method", default=None)
35
+ method_args = TupleField("method_args", default=None)
36
+ method_kwargs = DictField("method_kwargs", default=None)
37
+ is_property = BoolField("is_property", default=None)
38
+
39
+ def __init__(self, output_types=None, **kw):
40
+ super().__init__(_output_types=output_types, **kw)
41
+ if not self.output_types:
42
+ self.output_types = [OutputType.series]
43
+
44
+ @property
45
+ def input(self):
46
+ return self._input
47
+
48
+ def _set_inputs(self, inputs):
49
+ super()._set_inputs(inputs)
50
+ self._input = self._inputs[0]
51
+
52
+ def __call__(self, inp):
53
+ return _datetime_method_to_handlers[self.method].call(self, inp)
54
+
55
+
56
+ class SeriesDatetimeMethodBaseHandler:
57
+ @classmethod
58
+ def call(cls, op, inp):
59
+ empty_series = build_empty_series(inp.dtype)
60
+ if op.is_property:
61
+ test_obj = getattr(empty_series.dt, op.method)
62
+ else:
63
+ test_obj = getattr(empty_series.dt, op.method)(
64
+ *op.method_args, **op.method_kwargs
65
+ )
66
+ dtype = test_obj.dtype
67
+ return op.new_series(
68
+ [inp],
69
+ shape=inp.shape,
70
+ dtype=dtype,
71
+ index_value=inp.index_value,
72
+ name=inp.name,
73
+ )
74
+
75
+
76
+ _datetime_method_to_handlers = {}
77
+ for method in dir(pd.Series.dt):
78
+ if not method.startswith("_"):
79
+ _datetime_method_to_handlers[method] = SeriesDatetimeMethodBaseHandler
@@ -0,0 +1,108 @@
1
+ # Copyright 1999-2024 Alibaba Group Holding Ltd.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ import numpy as np
16
+ import pandas as pd
17
+
18
+ from ... import opcodes as OperandDef
19
+ from ...serialization.serializables import AnyField, FieldTypes, KeyField, ListField
20
+ from ..core import SERIES_TYPE
21
+ from ..operators import DataFrameOperator, DataFrameOperatorMixin
22
+ from ..utils import build_empty_df, parse_index
23
+
24
+
25
+ class DataFrameDescribe(DataFrameOperator, DataFrameOperatorMixin):
26
+ _op_type_ = OperandDef.DESCRIBE
27
+
28
+ input = KeyField("input", default=None)
29
+ percentiles = ListField("percentiles", FieldTypes.float64, default=None)
30
+ include = AnyField("include", default=None)
31
+ exclude = AnyField("exclude", default=None)
32
+
33
+ def __init__(self, output_types=None, **kw):
34
+ super().__init__(_output_types=output_types, **kw)
35
+
36
+ def _set_inputs(self, inputs):
37
+ super()._set_inputs(inputs)
38
+ self.input = self._inputs[0]
39
+
40
+ def __call__(self, df_or_series):
41
+ if isinstance(df_or_series, SERIES_TYPE):
42
+ if not np.issubdtype(df_or_series.dtype, np.number):
43
+ raise NotImplementedError("non-numeric type is not supported for now")
44
+ test_series = pd.Series([], dtype=df_or_series.dtype).describe(
45
+ percentiles=self.percentiles,
46
+ include=self.include,
47
+ exclude=self.exclude,
48
+ )
49
+ return self.new_series(
50
+ [df_or_series],
51
+ shape=(len(test_series),),
52
+ dtype=test_series.dtype,
53
+ index_value=parse_index(test_series.index, store_data=True),
54
+ )
55
+ else:
56
+ test_inp_df = build_empty_df(df_or_series.dtypes)
57
+ test_df = test_inp_df.describe(
58
+ percentiles=self.percentiles,
59
+ include=self.include,
60
+ exclude=self.exclude,
61
+ )
62
+ if len(self.percentiles) == 0:
63
+ # specify percentiles=False
64
+ # Note: unlike pandas that False is illegal value for percentiles,
65
+ # MaxFrame DataFrame allows user to specify percentiles=False
66
+ # to skip computation about percentiles
67
+ test_df.drop(["50%"], axis=0, inplace=True)
68
+ for dtype in test_df.dtypes:
69
+ if not np.issubdtype(dtype, np.number):
70
+ raise NotImplementedError(
71
+ "non-numeric type is not supported for now"
72
+ )
73
+ return self.new_dataframe(
74
+ [df_or_series],
75
+ shape=test_df.shape,
76
+ dtypes=test_df.dtypes,
77
+ index_value=parse_index(test_df.index, store_data=True),
78
+ columns_value=parse_index(test_df.columns, store_data=True),
79
+ )
80
+
81
+
82
+ def describe(df_or_series, percentiles=None, include=None, exclude=None):
83
+ if percentiles is False:
84
+ percentiles = []
85
+ elif percentiles is None:
86
+ percentiles = [0.25, 0.5, 0.75]
87
+ else:
88
+ percentiles = list(percentiles)
89
+ if percentiles is not None:
90
+ for p in percentiles:
91
+ if p < 0 or p > 1:
92
+ raise ValueError(
93
+ "percentiles should all be in the interval [0, 1]. "
94
+ "Try [{0:.3f}] instead.".format(p / 100)
95
+ )
96
+ # median should always be included
97
+ if 0.5 not in percentiles:
98
+ percentiles.append(0.5)
99
+ percentiles = np.asarray(percentiles)
100
+
101
+ # sort and check for duplicates
102
+ unique_pcts = np.unique(percentiles)
103
+ if len(unique_pcts) < len(percentiles):
104
+ raise ValueError("percentiles cannot contain duplicates")
105
+ percentiles = unique_pcts.tolist()
106
+
107
+ op = DataFrameDescribe(percentiles=percentiles, include=include, exclude=exclude)
108
+ return op(df_or_series)