maxframe 0.1.0b5__cp39-cp39-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of maxframe might be problematic. Click here for more details.

Files changed (647) hide show
  1. maxframe/__init__.py +32 -0
  2. maxframe/_utils.cpython-39-darwin.so +0 -0
  3. maxframe/_utils.pxd +33 -0
  4. maxframe/_utils.pyx +547 -0
  5. maxframe/codegen.py +528 -0
  6. maxframe/config/__init__.py +15 -0
  7. maxframe/config/config.py +443 -0
  8. maxframe/config/tests/__init__.py +13 -0
  9. maxframe/config/tests/test_config.py +103 -0
  10. maxframe/config/tests/test_validators.py +34 -0
  11. maxframe/config/validators.py +57 -0
  12. maxframe/conftest.py +139 -0
  13. maxframe/core/__init__.py +65 -0
  14. maxframe/core/base.py +156 -0
  15. maxframe/core/entity/__init__.py +44 -0
  16. maxframe/core/entity/chunks.py +68 -0
  17. maxframe/core/entity/core.py +152 -0
  18. maxframe/core/entity/executable.py +337 -0
  19. maxframe/core/entity/fuse.py +73 -0
  20. maxframe/core/entity/objects.py +100 -0
  21. maxframe/core/entity/output_types.py +90 -0
  22. maxframe/core/entity/tileables.py +438 -0
  23. maxframe/core/entity/utils.py +24 -0
  24. maxframe/core/graph/__init__.py +17 -0
  25. maxframe/core/graph/builder/__init__.py +16 -0
  26. maxframe/core/graph/builder/base.py +86 -0
  27. maxframe/core/graph/builder/chunk.py +430 -0
  28. maxframe/core/graph/builder/tileable.py +34 -0
  29. maxframe/core/graph/builder/utils.py +41 -0
  30. maxframe/core/graph/core.cpython-39-darwin.so +0 -0
  31. maxframe/core/graph/core.pyx +467 -0
  32. maxframe/core/graph/entity.py +171 -0
  33. maxframe/core/graph/tests/__init__.py +13 -0
  34. maxframe/core/graph/tests/test_graph.py +205 -0
  35. maxframe/core/mode.py +96 -0
  36. maxframe/core/operator/__init__.py +34 -0
  37. maxframe/core/operator/base.py +450 -0
  38. maxframe/core/operator/core.py +276 -0
  39. maxframe/core/operator/fetch.py +53 -0
  40. maxframe/core/operator/fuse.py +29 -0
  41. maxframe/core/operator/objects.py +72 -0
  42. maxframe/core/operator/shuffle.py +111 -0
  43. maxframe/core/operator/tests/__init__.py +13 -0
  44. maxframe/core/operator/tests/test_core.py +64 -0
  45. maxframe/core/tests/__init__.py +13 -0
  46. maxframe/core/tests/test_mode.py +75 -0
  47. maxframe/dataframe/__init__.py +81 -0
  48. maxframe/dataframe/arithmetic/__init__.py +359 -0
  49. maxframe/dataframe/arithmetic/abs.py +33 -0
  50. maxframe/dataframe/arithmetic/add.py +60 -0
  51. maxframe/dataframe/arithmetic/arccos.py +28 -0
  52. maxframe/dataframe/arithmetic/arccosh.py +28 -0
  53. maxframe/dataframe/arithmetic/arcsin.py +28 -0
  54. maxframe/dataframe/arithmetic/arcsinh.py +28 -0
  55. maxframe/dataframe/arithmetic/arctan.py +28 -0
  56. maxframe/dataframe/arithmetic/arctanh.py +28 -0
  57. maxframe/dataframe/arithmetic/around.py +152 -0
  58. maxframe/dataframe/arithmetic/bitwise_and.py +46 -0
  59. maxframe/dataframe/arithmetic/bitwise_or.py +50 -0
  60. maxframe/dataframe/arithmetic/bitwise_xor.py +46 -0
  61. maxframe/dataframe/arithmetic/ceil.py +28 -0
  62. maxframe/dataframe/arithmetic/core.py +342 -0
  63. maxframe/dataframe/arithmetic/cos.py +28 -0
  64. maxframe/dataframe/arithmetic/cosh.py +28 -0
  65. maxframe/dataframe/arithmetic/degrees.py +28 -0
  66. maxframe/dataframe/arithmetic/docstring.py +442 -0
  67. maxframe/dataframe/arithmetic/equal.py +56 -0
  68. maxframe/dataframe/arithmetic/exp.py +28 -0
  69. maxframe/dataframe/arithmetic/exp2.py +28 -0
  70. maxframe/dataframe/arithmetic/expm1.py +28 -0
  71. maxframe/dataframe/arithmetic/floor.py +28 -0
  72. maxframe/dataframe/arithmetic/floordiv.py +64 -0
  73. maxframe/dataframe/arithmetic/greater.py +57 -0
  74. maxframe/dataframe/arithmetic/greater_equal.py +57 -0
  75. maxframe/dataframe/arithmetic/invert.py +33 -0
  76. maxframe/dataframe/arithmetic/is_ufuncs.py +62 -0
  77. maxframe/dataframe/arithmetic/less.py +57 -0
  78. maxframe/dataframe/arithmetic/less_equal.py +57 -0
  79. maxframe/dataframe/arithmetic/log.py +28 -0
  80. maxframe/dataframe/arithmetic/log10.py +28 -0
  81. maxframe/dataframe/arithmetic/log2.py +28 -0
  82. maxframe/dataframe/arithmetic/mod.py +60 -0
  83. maxframe/dataframe/arithmetic/multiply.py +60 -0
  84. maxframe/dataframe/arithmetic/negative.py +33 -0
  85. maxframe/dataframe/arithmetic/not_equal.py +56 -0
  86. maxframe/dataframe/arithmetic/power.py +68 -0
  87. maxframe/dataframe/arithmetic/radians.py +28 -0
  88. maxframe/dataframe/arithmetic/sin.py +28 -0
  89. maxframe/dataframe/arithmetic/sinh.py +28 -0
  90. maxframe/dataframe/arithmetic/sqrt.py +28 -0
  91. maxframe/dataframe/arithmetic/subtract.py +64 -0
  92. maxframe/dataframe/arithmetic/tan.py +28 -0
  93. maxframe/dataframe/arithmetic/tanh.py +28 -0
  94. maxframe/dataframe/arithmetic/tests/__init__.py +13 -0
  95. maxframe/dataframe/arithmetic/tests/test_arithmetic.py +695 -0
  96. maxframe/dataframe/arithmetic/truediv.py +64 -0
  97. maxframe/dataframe/arithmetic/trunc.py +28 -0
  98. maxframe/dataframe/arrays.py +864 -0
  99. maxframe/dataframe/core.py +2417 -0
  100. maxframe/dataframe/datasource/__init__.py +15 -0
  101. maxframe/dataframe/datasource/core.py +81 -0
  102. maxframe/dataframe/datasource/dataframe.py +59 -0
  103. maxframe/dataframe/datasource/date_range.py +504 -0
  104. maxframe/dataframe/datasource/from_index.py +54 -0
  105. maxframe/dataframe/datasource/from_records.py +107 -0
  106. maxframe/dataframe/datasource/from_tensor.py +419 -0
  107. maxframe/dataframe/datasource/index.py +117 -0
  108. maxframe/dataframe/datasource/read_csv.py +528 -0
  109. maxframe/dataframe/datasource/read_odps_query.py +299 -0
  110. maxframe/dataframe/datasource/read_odps_table.py +253 -0
  111. maxframe/dataframe/datasource/read_parquet.py +421 -0
  112. maxframe/dataframe/datasource/series.py +55 -0
  113. maxframe/dataframe/datasource/tests/__init__.py +13 -0
  114. maxframe/dataframe/datasource/tests/test_datasource.py +401 -0
  115. maxframe/dataframe/datastore/__init__.py +26 -0
  116. maxframe/dataframe/datastore/core.py +19 -0
  117. maxframe/dataframe/datastore/to_csv.py +227 -0
  118. maxframe/dataframe/datastore/to_odps.py +162 -0
  119. maxframe/dataframe/extensions/__init__.py +41 -0
  120. maxframe/dataframe/extensions/accessor.py +50 -0
  121. maxframe/dataframe/extensions/reshuffle.py +83 -0
  122. maxframe/dataframe/extensions/tests/__init__.py +13 -0
  123. maxframe/dataframe/extensions/tests/test_extensions.py +38 -0
  124. maxframe/dataframe/fetch/__init__.py +15 -0
  125. maxframe/dataframe/fetch/core.py +86 -0
  126. maxframe/dataframe/groupby/__init__.py +82 -0
  127. maxframe/dataframe/groupby/aggregation.py +350 -0
  128. maxframe/dataframe/groupby/apply.py +251 -0
  129. maxframe/dataframe/groupby/core.py +179 -0
  130. maxframe/dataframe/groupby/cum.py +124 -0
  131. maxframe/dataframe/groupby/fill.py +141 -0
  132. maxframe/dataframe/groupby/getitem.py +92 -0
  133. maxframe/dataframe/groupby/head.py +105 -0
  134. maxframe/dataframe/groupby/sample.py +214 -0
  135. maxframe/dataframe/groupby/tests/__init__.py +13 -0
  136. maxframe/dataframe/groupby/tests/test_groupby.py +374 -0
  137. maxframe/dataframe/groupby/transform.py +255 -0
  138. maxframe/dataframe/indexing/__init__.py +84 -0
  139. maxframe/dataframe/indexing/add_prefix_suffix.py +110 -0
  140. maxframe/dataframe/indexing/align.py +349 -0
  141. maxframe/dataframe/indexing/at.py +83 -0
  142. maxframe/dataframe/indexing/getitem.py +204 -0
  143. maxframe/dataframe/indexing/iat.py +37 -0
  144. maxframe/dataframe/indexing/iloc.py +566 -0
  145. maxframe/dataframe/indexing/insert.py +86 -0
  146. maxframe/dataframe/indexing/loc.py +411 -0
  147. maxframe/dataframe/indexing/reindex.py +526 -0
  148. maxframe/dataframe/indexing/rename.py +462 -0
  149. maxframe/dataframe/indexing/rename_axis.py +209 -0
  150. maxframe/dataframe/indexing/reset_index.py +402 -0
  151. maxframe/dataframe/indexing/sample.py +221 -0
  152. maxframe/dataframe/indexing/set_axis.py +194 -0
  153. maxframe/dataframe/indexing/set_index.py +61 -0
  154. maxframe/dataframe/indexing/setitem.py +130 -0
  155. maxframe/dataframe/indexing/tests/__init__.py +13 -0
  156. maxframe/dataframe/indexing/tests/test_indexing.py +488 -0
  157. maxframe/dataframe/indexing/where.py +308 -0
  158. maxframe/dataframe/initializer.py +288 -0
  159. maxframe/dataframe/merge/__init__.py +32 -0
  160. maxframe/dataframe/merge/append.py +121 -0
  161. maxframe/dataframe/merge/concat.py +325 -0
  162. maxframe/dataframe/merge/merge.py +593 -0
  163. maxframe/dataframe/merge/tests/__init__.py +13 -0
  164. maxframe/dataframe/merge/tests/test_merge.py +215 -0
  165. maxframe/dataframe/misc/__init__.py +134 -0
  166. maxframe/dataframe/misc/_duplicate.py +46 -0
  167. maxframe/dataframe/misc/accessor.py +276 -0
  168. maxframe/dataframe/misc/apply.py +692 -0
  169. maxframe/dataframe/misc/astype.py +236 -0
  170. maxframe/dataframe/misc/case_when.py +141 -0
  171. maxframe/dataframe/misc/check_monotonic.py +84 -0
  172. maxframe/dataframe/misc/cut.py +383 -0
  173. maxframe/dataframe/misc/datetimes.py +79 -0
  174. maxframe/dataframe/misc/describe.py +108 -0
  175. maxframe/dataframe/misc/diff.py +210 -0
  176. maxframe/dataframe/misc/drop.py +440 -0
  177. maxframe/dataframe/misc/drop_duplicates.py +248 -0
  178. maxframe/dataframe/misc/duplicated.py +292 -0
  179. maxframe/dataframe/misc/eval.py +728 -0
  180. maxframe/dataframe/misc/explode.py +171 -0
  181. maxframe/dataframe/misc/get_dummies.py +208 -0
  182. maxframe/dataframe/misc/isin.py +217 -0
  183. maxframe/dataframe/misc/map.py +236 -0
  184. maxframe/dataframe/misc/melt.py +162 -0
  185. maxframe/dataframe/misc/memory_usage.py +248 -0
  186. maxframe/dataframe/misc/pct_change.py +150 -0
  187. maxframe/dataframe/misc/pivot_table.py +262 -0
  188. maxframe/dataframe/misc/qcut.py +104 -0
  189. maxframe/dataframe/misc/select_dtypes.py +104 -0
  190. maxframe/dataframe/misc/shift.py +256 -0
  191. maxframe/dataframe/misc/stack.py +238 -0
  192. maxframe/dataframe/misc/string_.py +221 -0
  193. maxframe/dataframe/misc/tests/__init__.py +13 -0
  194. maxframe/dataframe/misc/tests/test_misc.py +468 -0
  195. maxframe/dataframe/misc/to_numeric.py +178 -0
  196. maxframe/dataframe/misc/transform.py +361 -0
  197. maxframe/dataframe/misc/transpose.py +136 -0
  198. maxframe/dataframe/misc/value_counts.py +182 -0
  199. maxframe/dataframe/missing/__init__.py +53 -0
  200. maxframe/dataframe/missing/checkna.py +223 -0
  201. maxframe/dataframe/missing/dropna.py +280 -0
  202. maxframe/dataframe/missing/fillna.py +275 -0
  203. maxframe/dataframe/missing/replace.py +439 -0
  204. maxframe/dataframe/missing/tests/__init__.py +13 -0
  205. maxframe/dataframe/missing/tests/test_missing.py +89 -0
  206. maxframe/dataframe/operators.py +273 -0
  207. maxframe/dataframe/plotting/__init__.py +40 -0
  208. maxframe/dataframe/plotting/core.py +78 -0
  209. maxframe/dataframe/plotting/tests/__init__.py +13 -0
  210. maxframe/dataframe/plotting/tests/test_plotting.py +136 -0
  211. maxframe/dataframe/reduction/__init__.py +107 -0
  212. maxframe/dataframe/reduction/aggregation.py +344 -0
  213. maxframe/dataframe/reduction/all.py +78 -0
  214. maxframe/dataframe/reduction/any.py +78 -0
  215. maxframe/dataframe/reduction/core.py +837 -0
  216. maxframe/dataframe/reduction/count.py +59 -0
  217. maxframe/dataframe/reduction/cummax.py +30 -0
  218. maxframe/dataframe/reduction/cummin.py +30 -0
  219. maxframe/dataframe/reduction/cumprod.py +30 -0
  220. maxframe/dataframe/reduction/cumsum.py +30 -0
  221. maxframe/dataframe/reduction/custom_reduction.py +42 -0
  222. maxframe/dataframe/reduction/kurtosis.py +104 -0
  223. maxframe/dataframe/reduction/max.py +65 -0
  224. maxframe/dataframe/reduction/mean.py +61 -0
  225. maxframe/dataframe/reduction/min.py +65 -0
  226. maxframe/dataframe/reduction/nunique.py +141 -0
  227. maxframe/dataframe/reduction/prod.py +76 -0
  228. maxframe/dataframe/reduction/reduction_size.py +36 -0
  229. maxframe/dataframe/reduction/sem.py +69 -0
  230. maxframe/dataframe/reduction/skew.py +89 -0
  231. maxframe/dataframe/reduction/std.py +53 -0
  232. maxframe/dataframe/reduction/str_concat.py +48 -0
  233. maxframe/dataframe/reduction/sum.py +77 -0
  234. maxframe/dataframe/reduction/tests/__init__.py +13 -0
  235. maxframe/dataframe/reduction/tests/test_reduction.py +486 -0
  236. maxframe/dataframe/reduction/unique.py +90 -0
  237. maxframe/dataframe/reduction/var.py +72 -0
  238. maxframe/dataframe/sort/__init__.py +34 -0
  239. maxframe/dataframe/sort/core.py +36 -0
  240. maxframe/dataframe/sort/sort_index.py +153 -0
  241. maxframe/dataframe/sort/sort_values.py +311 -0
  242. maxframe/dataframe/sort/tests/__init__.py +13 -0
  243. maxframe/dataframe/sort/tests/test_sort.py +81 -0
  244. maxframe/dataframe/statistics/__init__.py +33 -0
  245. maxframe/dataframe/statistics/corr.py +280 -0
  246. maxframe/dataframe/statistics/quantile.py +341 -0
  247. maxframe/dataframe/statistics/tests/__init__.py +13 -0
  248. maxframe/dataframe/statistics/tests/test_statistics.py +82 -0
  249. maxframe/dataframe/tests/__init__.py +13 -0
  250. maxframe/dataframe/tests/test_initializer.py +29 -0
  251. maxframe/dataframe/tseries/__init__.py +13 -0
  252. maxframe/dataframe/tseries/tests/__init__.py +13 -0
  253. maxframe/dataframe/tseries/tests/test_tseries.py +30 -0
  254. maxframe/dataframe/tseries/to_datetime.py +297 -0
  255. maxframe/dataframe/ufunc/__init__.py +27 -0
  256. maxframe/dataframe/ufunc/tensor.py +54 -0
  257. maxframe/dataframe/ufunc/ufunc.py +52 -0
  258. maxframe/dataframe/utils.py +1267 -0
  259. maxframe/dataframe/window/__init__.py +29 -0
  260. maxframe/dataframe/window/aggregation.py +96 -0
  261. maxframe/dataframe/window/core.py +69 -0
  262. maxframe/dataframe/window/ewm.py +249 -0
  263. maxframe/dataframe/window/expanding.py +147 -0
  264. maxframe/dataframe/window/rolling.py +376 -0
  265. maxframe/dataframe/window/tests/__init__.py +13 -0
  266. maxframe/dataframe/window/tests/test_ewm.py +70 -0
  267. maxframe/dataframe/window/tests/test_expanding.py +66 -0
  268. maxframe/dataframe/window/tests/test_rolling.py +57 -0
  269. maxframe/env.py +33 -0
  270. maxframe/errors.py +21 -0
  271. maxframe/extension.py +81 -0
  272. maxframe/learn/__init__.py +17 -0
  273. maxframe/learn/contrib/__init__.py +17 -0
  274. maxframe/learn/contrib/pytorch/__init__.py +16 -0
  275. maxframe/learn/contrib/pytorch/run_function.py +110 -0
  276. maxframe/learn/contrib/pytorch/run_script.py +102 -0
  277. maxframe/learn/contrib/pytorch/tests/__init__.py +13 -0
  278. maxframe/learn/contrib/pytorch/tests/test_pytorch.py +42 -0
  279. maxframe/learn/contrib/utils.py +52 -0
  280. maxframe/learn/contrib/xgboost/__init__.py +26 -0
  281. maxframe/learn/contrib/xgboost/classifier.py +86 -0
  282. maxframe/learn/contrib/xgboost/core.py +156 -0
  283. maxframe/learn/contrib/xgboost/dmatrix.py +150 -0
  284. maxframe/learn/contrib/xgboost/predict.py +138 -0
  285. maxframe/learn/contrib/xgboost/regressor.py +78 -0
  286. maxframe/learn/contrib/xgboost/tests/__init__.py +13 -0
  287. maxframe/learn/contrib/xgboost/tests/test_core.py +43 -0
  288. maxframe/learn/contrib/xgboost/train.py +121 -0
  289. maxframe/learn/utils/__init__.py +15 -0
  290. maxframe/learn/utils/core.py +29 -0
  291. maxframe/lib/__init__.py +15 -0
  292. maxframe/lib/aio/__init__.py +27 -0
  293. maxframe/lib/aio/_runners.py +162 -0
  294. maxframe/lib/aio/_threads.py +35 -0
  295. maxframe/lib/aio/base.py +82 -0
  296. maxframe/lib/aio/file.py +85 -0
  297. maxframe/lib/aio/isolation.py +100 -0
  298. maxframe/lib/aio/lru.py +242 -0
  299. maxframe/lib/aio/parallelism.py +37 -0
  300. maxframe/lib/aio/tests/__init__.py +13 -0
  301. maxframe/lib/aio/tests/test_aio_file.py +55 -0
  302. maxframe/lib/compression.py +55 -0
  303. maxframe/lib/cython/__init__.py +13 -0
  304. maxframe/lib/cython/libcpp.pxd +30 -0
  305. maxframe/lib/filesystem/__init__.py +21 -0
  306. maxframe/lib/filesystem/_glob.py +173 -0
  307. maxframe/lib/filesystem/_oss_lib/__init__.py +13 -0
  308. maxframe/lib/filesystem/_oss_lib/common.py +198 -0
  309. maxframe/lib/filesystem/_oss_lib/glob.py +147 -0
  310. maxframe/lib/filesystem/_oss_lib/handle.py +156 -0
  311. maxframe/lib/filesystem/arrow.py +236 -0
  312. maxframe/lib/filesystem/base.py +263 -0
  313. maxframe/lib/filesystem/core.py +95 -0
  314. maxframe/lib/filesystem/fsmap.py +164 -0
  315. maxframe/lib/filesystem/hdfs.py +31 -0
  316. maxframe/lib/filesystem/local.py +112 -0
  317. maxframe/lib/filesystem/oss.py +157 -0
  318. maxframe/lib/filesystem/tests/__init__.py +13 -0
  319. maxframe/lib/filesystem/tests/test_filesystem.py +223 -0
  320. maxframe/lib/filesystem/tests/test_oss.py +182 -0
  321. maxframe/lib/functools_compat.py +81 -0
  322. maxframe/lib/mmh3.cpython-39-darwin.so +0 -0
  323. maxframe/lib/mmh3_src/MurmurHash3.cpp +339 -0
  324. maxframe/lib/mmh3_src/MurmurHash3.h +43 -0
  325. maxframe/lib/mmh3_src/mmh3module.cpp +387 -0
  326. maxframe/lib/sparse/__init__.py +861 -0
  327. maxframe/lib/sparse/array.py +1604 -0
  328. maxframe/lib/sparse/core.py +92 -0
  329. maxframe/lib/sparse/matrix.py +241 -0
  330. maxframe/lib/sparse/tests/__init__.py +15 -0
  331. maxframe/lib/sparse/tests/test_sparse.py +476 -0
  332. maxframe/lib/sparse/vector.py +150 -0
  333. maxframe/lib/tblib/LICENSE +20 -0
  334. maxframe/lib/tblib/__init__.py +327 -0
  335. maxframe/lib/tblib/cpython.py +83 -0
  336. maxframe/lib/tblib/decorators.py +44 -0
  337. maxframe/lib/tblib/pickling_support.py +90 -0
  338. maxframe/lib/tests/__init__.py +13 -0
  339. maxframe/lib/tests/test_wrapped_pickle.py +51 -0
  340. maxframe/lib/version.py +620 -0
  341. maxframe/lib/wrapped_pickle.py +139 -0
  342. maxframe/mixin.py +100 -0
  343. maxframe/odpsio/__init__.py +21 -0
  344. maxframe/odpsio/arrow.py +91 -0
  345. maxframe/odpsio/schema.py +364 -0
  346. maxframe/odpsio/tableio.py +322 -0
  347. maxframe/odpsio/tests/__init__.py +13 -0
  348. maxframe/odpsio/tests/test_arrow.py +88 -0
  349. maxframe/odpsio/tests/test_schema.py +297 -0
  350. maxframe/odpsio/tests/test_tableio.py +136 -0
  351. maxframe/odpsio/tests/test_volumeio.py +90 -0
  352. maxframe/odpsio/volumeio.py +95 -0
  353. maxframe/opcodes.py +590 -0
  354. maxframe/protocol.py +415 -0
  355. maxframe/remote/__init__.py +18 -0
  356. maxframe/remote/core.py +210 -0
  357. maxframe/remote/run_script.py +121 -0
  358. maxframe/serialization/__init__.py +26 -0
  359. maxframe/serialization/arrow.py +95 -0
  360. maxframe/serialization/core.cpython-39-darwin.so +0 -0
  361. maxframe/serialization/core.pxd +44 -0
  362. maxframe/serialization/core.pyi +61 -0
  363. maxframe/serialization/core.pyx +1094 -0
  364. maxframe/serialization/exception.py +86 -0
  365. maxframe/serialization/maxframe_objects.py +39 -0
  366. maxframe/serialization/numpy.py +91 -0
  367. maxframe/serialization/pandas.py +202 -0
  368. maxframe/serialization/scipy.py +71 -0
  369. maxframe/serialization/serializables/__init__.py +55 -0
  370. maxframe/serialization/serializables/core.py +262 -0
  371. maxframe/serialization/serializables/field.py +624 -0
  372. maxframe/serialization/serializables/field_type.py +589 -0
  373. maxframe/serialization/serializables/tests/__init__.py +13 -0
  374. maxframe/serialization/serializables/tests/test_field_type.py +121 -0
  375. maxframe/serialization/serializables/tests/test_serializable.py +250 -0
  376. maxframe/serialization/tests/__init__.py +13 -0
  377. maxframe/serialization/tests/test_serial.py +412 -0
  378. maxframe/session.py +1310 -0
  379. maxframe/tensor/__init__.py +183 -0
  380. maxframe/tensor/arithmetic/__init__.py +315 -0
  381. maxframe/tensor/arithmetic/abs.py +68 -0
  382. maxframe/tensor/arithmetic/absolute.py +68 -0
  383. maxframe/tensor/arithmetic/add.py +82 -0
  384. maxframe/tensor/arithmetic/angle.py +72 -0
  385. maxframe/tensor/arithmetic/arccos.py +104 -0
  386. maxframe/tensor/arithmetic/arccosh.py +91 -0
  387. maxframe/tensor/arithmetic/arcsin.py +94 -0
  388. maxframe/tensor/arithmetic/arcsinh.py +86 -0
  389. maxframe/tensor/arithmetic/arctan.py +106 -0
  390. maxframe/tensor/arithmetic/arctan2.py +128 -0
  391. maxframe/tensor/arithmetic/arctanh.py +86 -0
  392. maxframe/tensor/arithmetic/around.py +114 -0
  393. maxframe/tensor/arithmetic/bitand.py +95 -0
  394. maxframe/tensor/arithmetic/bitor.py +102 -0
  395. maxframe/tensor/arithmetic/bitxor.py +95 -0
  396. maxframe/tensor/arithmetic/cbrt.py +66 -0
  397. maxframe/tensor/arithmetic/ceil.py +71 -0
  398. maxframe/tensor/arithmetic/clip.py +165 -0
  399. maxframe/tensor/arithmetic/conj.py +74 -0
  400. maxframe/tensor/arithmetic/copysign.py +78 -0
  401. maxframe/tensor/arithmetic/core.py +544 -0
  402. maxframe/tensor/arithmetic/cos.py +85 -0
  403. maxframe/tensor/arithmetic/cosh.py +72 -0
  404. maxframe/tensor/arithmetic/deg2rad.py +72 -0
  405. maxframe/tensor/arithmetic/degrees.py +77 -0
  406. maxframe/tensor/arithmetic/divide.py +114 -0
  407. maxframe/tensor/arithmetic/equal.py +76 -0
  408. maxframe/tensor/arithmetic/exp.py +106 -0
  409. maxframe/tensor/arithmetic/exp2.py +67 -0
  410. maxframe/tensor/arithmetic/expm1.py +79 -0
  411. maxframe/tensor/arithmetic/fabs.py +74 -0
  412. maxframe/tensor/arithmetic/fix.py +69 -0
  413. maxframe/tensor/arithmetic/float_power.py +103 -0
  414. maxframe/tensor/arithmetic/floor.py +77 -0
  415. maxframe/tensor/arithmetic/floordiv.py +94 -0
  416. maxframe/tensor/arithmetic/fmax.py +105 -0
  417. maxframe/tensor/arithmetic/fmin.py +106 -0
  418. maxframe/tensor/arithmetic/fmod.py +99 -0
  419. maxframe/tensor/arithmetic/frexp.py +92 -0
  420. maxframe/tensor/arithmetic/greater.py +77 -0
  421. maxframe/tensor/arithmetic/greater_equal.py +69 -0
  422. maxframe/tensor/arithmetic/hypot.py +77 -0
  423. maxframe/tensor/arithmetic/i0.py +89 -0
  424. maxframe/tensor/arithmetic/imag.py +67 -0
  425. maxframe/tensor/arithmetic/invert.py +110 -0
  426. maxframe/tensor/arithmetic/isclose.py +115 -0
  427. maxframe/tensor/arithmetic/iscomplex.py +64 -0
  428. maxframe/tensor/arithmetic/isfinite.py +106 -0
  429. maxframe/tensor/arithmetic/isinf.py +103 -0
  430. maxframe/tensor/arithmetic/isnan.py +82 -0
  431. maxframe/tensor/arithmetic/isreal.py +63 -0
  432. maxframe/tensor/arithmetic/ldexp.py +99 -0
  433. maxframe/tensor/arithmetic/less.py +69 -0
  434. maxframe/tensor/arithmetic/less_equal.py +69 -0
  435. maxframe/tensor/arithmetic/log.py +92 -0
  436. maxframe/tensor/arithmetic/log10.py +85 -0
  437. maxframe/tensor/arithmetic/log1p.py +95 -0
  438. maxframe/tensor/arithmetic/log2.py +85 -0
  439. maxframe/tensor/arithmetic/logaddexp.py +80 -0
  440. maxframe/tensor/arithmetic/logaddexp2.py +78 -0
  441. maxframe/tensor/arithmetic/logical_and.py +81 -0
  442. maxframe/tensor/arithmetic/logical_not.py +74 -0
  443. maxframe/tensor/arithmetic/logical_or.py +82 -0
  444. maxframe/tensor/arithmetic/logical_xor.py +88 -0
  445. maxframe/tensor/arithmetic/lshift.py +82 -0
  446. maxframe/tensor/arithmetic/maximum.py +108 -0
  447. maxframe/tensor/arithmetic/minimum.py +108 -0
  448. maxframe/tensor/arithmetic/mod.py +104 -0
  449. maxframe/tensor/arithmetic/modf.py +83 -0
  450. maxframe/tensor/arithmetic/multiply.py +81 -0
  451. maxframe/tensor/arithmetic/nan_to_num.py +99 -0
  452. maxframe/tensor/arithmetic/negative.py +65 -0
  453. maxframe/tensor/arithmetic/nextafter.py +68 -0
  454. maxframe/tensor/arithmetic/not_equal.py +72 -0
  455. maxframe/tensor/arithmetic/positive.py +47 -0
  456. maxframe/tensor/arithmetic/power.py +106 -0
  457. maxframe/tensor/arithmetic/rad2deg.py +71 -0
  458. maxframe/tensor/arithmetic/radians.py +77 -0
  459. maxframe/tensor/arithmetic/real.py +70 -0
  460. maxframe/tensor/arithmetic/reciprocal.py +76 -0
  461. maxframe/tensor/arithmetic/rint.py +68 -0
  462. maxframe/tensor/arithmetic/rshift.py +81 -0
  463. maxframe/tensor/arithmetic/setimag.py +29 -0
  464. maxframe/tensor/arithmetic/setreal.py +29 -0
  465. maxframe/tensor/arithmetic/sign.py +81 -0
  466. maxframe/tensor/arithmetic/signbit.py +65 -0
  467. maxframe/tensor/arithmetic/sin.py +98 -0
  468. maxframe/tensor/arithmetic/sinc.py +102 -0
  469. maxframe/tensor/arithmetic/sinh.py +93 -0
  470. maxframe/tensor/arithmetic/spacing.py +72 -0
  471. maxframe/tensor/arithmetic/sqrt.py +81 -0
  472. maxframe/tensor/arithmetic/square.py +69 -0
  473. maxframe/tensor/arithmetic/subtract.py +81 -0
  474. maxframe/tensor/arithmetic/tan.py +88 -0
  475. maxframe/tensor/arithmetic/tanh.py +92 -0
  476. maxframe/tensor/arithmetic/tests/__init__.py +15 -0
  477. maxframe/tensor/arithmetic/tests/test_arithmetic.py +414 -0
  478. maxframe/tensor/arithmetic/truediv.py +104 -0
  479. maxframe/tensor/arithmetic/trunc.py +72 -0
  480. maxframe/tensor/arithmetic/utils.py +65 -0
  481. maxframe/tensor/array_utils.py +186 -0
  482. maxframe/tensor/base/__init__.py +34 -0
  483. maxframe/tensor/base/astype.py +119 -0
  484. maxframe/tensor/base/atleast_1d.py +74 -0
  485. maxframe/tensor/base/broadcast_to.py +89 -0
  486. maxframe/tensor/base/ravel.py +92 -0
  487. maxframe/tensor/base/tests/__init__.py +13 -0
  488. maxframe/tensor/base/tests/test_base.py +114 -0
  489. maxframe/tensor/base/transpose.py +125 -0
  490. maxframe/tensor/base/unique.py +205 -0
  491. maxframe/tensor/base/where.py +127 -0
  492. maxframe/tensor/core.py +724 -0
  493. maxframe/tensor/datasource/__init__.py +32 -0
  494. maxframe/tensor/datasource/arange.py +156 -0
  495. maxframe/tensor/datasource/array.py +415 -0
  496. maxframe/tensor/datasource/core.py +109 -0
  497. maxframe/tensor/datasource/empty.py +169 -0
  498. maxframe/tensor/datasource/from_dataframe.py +70 -0
  499. maxframe/tensor/datasource/from_dense.py +54 -0
  500. maxframe/tensor/datasource/from_sparse.py +47 -0
  501. maxframe/tensor/datasource/full.py +186 -0
  502. maxframe/tensor/datasource/ones.py +173 -0
  503. maxframe/tensor/datasource/scalar.py +40 -0
  504. maxframe/tensor/datasource/tests/__init__.py +13 -0
  505. maxframe/tensor/datasource/tests/test_datasource.py +278 -0
  506. maxframe/tensor/datasource/zeros.py +188 -0
  507. maxframe/tensor/fetch/__init__.py +15 -0
  508. maxframe/tensor/fetch/core.py +54 -0
  509. maxframe/tensor/indexing/__init__.py +47 -0
  510. maxframe/tensor/indexing/choose.py +196 -0
  511. maxframe/tensor/indexing/compress.py +124 -0
  512. maxframe/tensor/indexing/core.py +190 -0
  513. maxframe/tensor/indexing/extract.py +71 -0
  514. maxframe/tensor/indexing/fill_diagonal.py +183 -0
  515. maxframe/tensor/indexing/flatnonzero.py +60 -0
  516. maxframe/tensor/indexing/getitem.py +175 -0
  517. maxframe/tensor/indexing/nonzero.py +120 -0
  518. maxframe/tensor/indexing/setitem.py +132 -0
  519. maxframe/tensor/indexing/slice.py +29 -0
  520. maxframe/tensor/indexing/take.py +130 -0
  521. maxframe/tensor/indexing/tests/__init__.py +15 -0
  522. maxframe/tensor/indexing/tests/test_indexing.py +234 -0
  523. maxframe/tensor/indexing/unravel_index.py +103 -0
  524. maxframe/tensor/merge/__init__.py +15 -0
  525. maxframe/tensor/merge/stack.py +132 -0
  526. maxframe/tensor/merge/tests/__init__.py +13 -0
  527. maxframe/tensor/merge/tests/test_merge.py +52 -0
  528. maxframe/tensor/operators.py +123 -0
  529. maxframe/tensor/random/__init__.py +168 -0
  530. maxframe/tensor/random/beta.py +87 -0
  531. maxframe/tensor/random/binomial.py +137 -0
  532. maxframe/tensor/random/bytes.py +39 -0
  533. maxframe/tensor/random/chisquare.py +110 -0
  534. maxframe/tensor/random/choice.py +186 -0
  535. maxframe/tensor/random/core.py +234 -0
  536. maxframe/tensor/random/dirichlet.py +123 -0
  537. maxframe/tensor/random/exponential.py +94 -0
  538. maxframe/tensor/random/f.py +135 -0
  539. maxframe/tensor/random/gamma.py +128 -0
  540. maxframe/tensor/random/geometric.py +93 -0
  541. maxframe/tensor/random/gumbel.py +167 -0
  542. maxframe/tensor/random/hypergeometric.py +148 -0
  543. maxframe/tensor/random/laplace.py +133 -0
  544. maxframe/tensor/random/logistic.py +129 -0
  545. maxframe/tensor/random/lognormal.py +159 -0
  546. maxframe/tensor/random/logseries.py +122 -0
  547. maxframe/tensor/random/multinomial.py +133 -0
  548. maxframe/tensor/random/multivariate_normal.py +192 -0
  549. maxframe/tensor/random/negative_binomial.py +125 -0
  550. maxframe/tensor/random/noncentral_chisquare.py +132 -0
  551. maxframe/tensor/random/noncentral_f.py +126 -0
  552. maxframe/tensor/random/normal.py +143 -0
  553. maxframe/tensor/random/pareto.py +140 -0
  554. maxframe/tensor/random/permutation.py +104 -0
  555. maxframe/tensor/random/poisson.py +111 -0
  556. maxframe/tensor/random/power.py +142 -0
  557. maxframe/tensor/random/rand.py +82 -0
  558. maxframe/tensor/random/randint.py +121 -0
  559. maxframe/tensor/random/randn.py +96 -0
  560. maxframe/tensor/random/random_integers.py +123 -0
  561. maxframe/tensor/random/random_sample.py +86 -0
  562. maxframe/tensor/random/rayleigh.py +110 -0
  563. maxframe/tensor/random/shuffle.py +61 -0
  564. maxframe/tensor/random/standard_cauchy.py +105 -0
  565. maxframe/tensor/random/standard_exponential.py +72 -0
  566. maxframe/tensor/random/standard_gamma.py +120 -0
  567. maxframe/tensor/random/standard_normal.py +74 -0
  568. maxframe/tensor/random/standard_t.py +135 -0
  569. maxframe/tensor/random/tests/__init__.py +15 -0
  570. maxframe/tensor/random/tests/test_random.py +167 -0
  571. maxframe/tensor/random/triangular.py +119 -0
  572. maxframe/tensor/random/uniform.py +131 -0
  573. maxframe/tensor/random/vonmises.py +131 -0
  574. maxframe/tensor/random/wald.py +114 -0
  575. maxframe/tensor/random/weibull.py +140 -0
  576. maxframe/tensor/random/zipf.py +122 -0
  577. maxframe/tensor/rechunk/__init__.py +26 -0
  578. maxframe/tensor/rechunk/rechunk.py +43 -0
  579. maxframe/tensor/reduction/__init__.py +66 -0
  580. maxframe/tensor/reduction/all.py +103 -0
  581. maxframe/tensor/reduction/allclose.py +88 -0
  582. maxframe/tensor/reduction/any.py +105 -0
  583. maxframe/tensor/reduction/argmax.py +103 -0
  584. maxframe/tensor/reduction/argmin.py +103 -0
  585. maxframe/tensor/reduction/array_equal.py +64 -0
  586. maxframe/tensor/reduction/core.py +168 -0
  587. maxframe/tensor/reduction/count_nonzero.py +81 -0
  588. maxframe/tensor/reduction/cumprod.py +97 -0
  589. maxframe/tensor/reduction/cumsum.py +101 -0
  590. maxframe/tensor/reduction/max.py +120 -0
  591. maxframe/tensor/reduction/mean.py +123 -0
  592. maxframe/tensor/reduction/min.py +120 -0
  593. maxframe/tensor/reduction/nanargmax.py +82 -0
  594. maxframe/tensor/reduction/nanargmin.py +76 -0
  595. maxframe/tensor/reduction/nancumprod.py +91 -0
  596. maxframe/tensor/reduction/nancumsum.py +94 -0
  597. maxframe/tensor/reduction/nanmax.py +111 -0
  598. maxframe/tensor/reduction/nanmean.py +106 -0
  599. maxframe/tensor/reduction/nanmin.py +111 -0
  600. maxframe/tensor/reduction/nanprod.py +94 -0
  601. maxframe/tensor/reduction/nanstd.py +126 -0
  602. maxframe/tensor/reduction/nansum.py +115 -0
  603. maxframe/tensor/reduction/nanvar.py +149 -0
  604. maxframe/tensor/reduction/prod.py +130 -0
  605. maxframe/tensor/reduction/std.py +134 -0
  606. maxframe/tensor/reduction/sum.py +125 -0
  607. maxframe/tensor/reduction/tests/__init__.py +13 -0
  608. maxframe/tensor/reduction/tests/test_reduction.py +181 -0
  609. maxframe/tensor/reduction/var.py +176 -0
  610. maxframe/tensor/reshape/__init__.py +17 -0
  611. maxframe/tensor/reshape/reshape.py +188 -0
  612. maxframe/tensor/reshape/tests/__init__.py +15 -0
  613. maxframe/tensor/reshape/tests/test_reshape.py +37 -0
  614. maxframe/tensor/statistics/__init__.py +13 -0
  615. maxframe/tensor/statistics/percentile.py +175 -0
  616. maxframe/tensor/statistics/quantile.py +288 -0
  617. maxframe/tensor/ufunc/__init__.py +26 -0
  618. maxframe/tensor/ufunc/ufunc.py +200 -0
  619. maxframe/tensor/utils.py +718 -0
  620. maxframe/tests/__init__.py +13 -0
  621. maxframe/tests/test_codegen.py +69 -0
  622. maxframe/tests/test_protocol.py +144 -0
  623. maxframe/tests/test_utils.py +376 -0
  624. maxframe/tests/utils.py +164 -0
  625. maxframe/typing_.py +37 -0
  626. maxframe/udf.py +134 -0
  627. maxframe/utils.py +1114 -0
  628. maxframe-0.1.0b5.dist-info/METADATA +104 -0
  629. maxframe-0.1.0b5.dist-info/RECORD +647 -0
  630. maxframe-0.1.0b5.dist-info/WHEEL +5 -0
  631. maxframe-0.1.0b5.dist-info/top_level.txt +3 -0
  632. maxframe_client/__init__.py +17 -0
  633. maxframe_client/clients/__init__.py +13 -0
  634. maxframe_client/clients/framedriver.py +118 -0
  635. maxframe_client/clients/spe.py +104 -0
  636. maxframe_client/conftest.py +15 -0
  637. maxframe_client/fetcher.py +264 -0
  638. maxframe_client/session/__init__.py +22 -0
  639. maxframe_client/session/consts.py +36 -0
  640. maxframe_client/session/graph.py +119 -0
  641. maxframe_client/session/odps.py +482 -0
  642. maxframe_client/session/task.py +280 -0
  643. maxframe_client/session/tests/__init__.py +13 -0
  644. maxframe_client/session/tests/test_task.py +85 -0
  645. maxframe_client/tests/__init__.py +13 -0
  646. maxframe_client/tests/test_fetcher.py +89 -0
  647. maxframe_client/tests/test_session.py +255 -0
@@ -0,0 +1,221 @@
1
+ # Copyright 1999-2024 Alibaba Group Holding Ltd.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ import numpy as np
16
+ import pandas as pd
17
+
18
+ from ... import opcodes
19
+ from ...core import OutputType
20
+ from ...serialization.serializables import DictField, KeyField, StringField, TupleField
21
+ from ...tensor import tensor as astensor
22
+ from ...tensor.core import TENSOR_TYPE
23
+ from ..core import SERIES_TYPE
24
+ from ..initializer import Series as asseries
25
+ from ..operators import DataFrameOperator, DataFrameOperatorMixin
26
+ from ..utils import build_empty_series, infer_index_value, parse_index
27
+
28
+
29
+ class SeriesStringMethod(DataFrameOperator, DataFrameOperatorMixin):
30
+ _op_type_ = opcodes.STRING_METHOD
31
+
32
+ _input = KeyField("input")
33
+ method = StringField("method", default=None)
34
+ method_args = TupleField("method_args", default=None)
35
+ method_kwargs = DictField("method_kwargs", default=None)
36
+
37
+ def __init__(self, output_types=None, **kw):
38
+ super().__init__(_output_types=output_types, **kw)
39
+ if not self.output_types:
40
+ self.output_types = [OutputType.series]
41
+
42
+ @property
43
+ def input(self):
44
+ return self._input
45
+
46
+ def _set_inputs(self, inputs):
47
+ super()._set_inputs(inputs)
48
+ self._input = self._inputs[0]
49
+ if len(self._inputs) == 2:
50
+ # for method cat
51
+ self.method_kwargs["others"] = self._inputs[1]
52
+
53
+ def __call__(self, inp):
54
+ return _string_method_to_handlers[self.method].call(self, inp)
55
+
56
+
57
+ class SeriesStringMethodBaseHandler:
58
+ @classmethod
59
+ def call(cls, op: SeriesStringMethod, inp):
60
+ if op.method == "__getitem__":
61
+ op.method = "slice"
62
+ kwargs = {
63
+ "start": op.method_args[0].start,
64
+ "stop": op.method_args[0].stop,
65
+ "step": op.method_args[0].step,
66
+ }
67
+ op.method_args = ()
68
+ op.method_kwargs = {k: v for k, v in kwargs.items() if v is not None}
69
+
70
+ empty_series = build_empty_series(inp.dtype)
71
+ dtype = getattr(empty_series.str, op.method)(
72
+ *op.method_args, **op.method_kwargs
73
+ ).dtype
74
+ return op.new_series(
75
+ [inp],
76
+ shape=inp.shape,
77
+ dtype=dtype,
78
+ index_value=inp.index_value,
79
+ name=inp.name,
80
+ )
81
+
82
+
83
+ class SeriesStringSplitHandler(SeriesStringMethodBaseHandler):
84
+ @classmethod
85
+ def call(cls, op, inp):
86
+ method_kwargs = op.method_kwargs
87
+ if method_kwargs.get("expand", False) is False:
88
+ return super().call(op, inp)
89
+ n = method_kwargs.get("n", -1)
90
+ # does not support if expand and n == -1
91
+ if n == -1: # pragma: no cover
92
+ raise NotImplementedError("`n` needs to be specified when expand=True")
93
+
94
+ op.output_types = [OutputType.dataframe]
95
+ columns = pd.RangeIndex(n + 1)
96
+ columns_value = parse_index(columns, store_data=True)
97
+ dtypes = pd.Series([inp.dtype] * len(columns), index=columns)
98
+ return op.new_dataframe(
99
+ [inp],
100
+ shape=(inp.shape[0], len(columns)),
101
+ dtypes=dtypes,
102
+ columns_value=columns_value,
103
+ index_value=inp.index_value,
104
+ )
105
+
106
+
107
+ class SeriesStringCatHandler(SeriesStringMethodBaseHandler):
108
+ CAT_TYPE_ERROR = (
109
+ "others must be Series, Index, DataFrame, "
110
+ "Tensor, np.ndarrary or list-like "
111
+ "(either containing only strings or "
112
+ "containing only objects of "
113
+ "type Series/Index/Tensor/np.ndarray[1-dim])"
114
+ )
115
+ CAT_LEN_ERROR = (
116
+ "If `others` contains arrays or lists (or other list-likes without an index), "
117
+ "these must all be of the same length as the calling Series/Index."
118
+ )
119
+
120
+ @classmethod
121
+ def call(cls, op, inp):
122
+ method_kwargs = op.method_kwargs
123
+ others = method_kwargs.get("others")
124
+
125
+ if others is None:
126
+ from ..reduction import build_str_concat_object
127
+
128
+ return build_str_concat_object(
129
+ inp,
130
+ sep=op.method_kwargs.get("sep"),
131
+ na_rep=op.method_kwargs.get("na_rep"),
132
+ )
133
+ elif isinstance(others, (tuple, list, np.ndarray, TENSOR_TYPE)):
134
+ others = astensor(others, dtype=object)
135
+ if others.ndim != 1:
136
+ raise TypeError(cls.CAT_TYPE_ERROR)
137
+ if (
138
+ not np.isnan(inp.shape[0])
139
+ and not np.isnan(others.shape[0])
140
+ and inp.shape[0] != others.shape[0]
141
+ ):
142
+ raise ValueError(cls.CAT_LEN_ERROR)
143
+ inputs = [inp]
144
+ if isinstance(others, TENSOR_TYPE):
145
+ inputs.append(others)
146
+ return op.new_series(
147
+ inputs,
148
+ shape=inp.shape,
149
+ dtype=inp.dtype,
150
+ index_value=inp.index_value,
151
+ name=inp.name,
152
+ )
153
+ elif isinstance(others, (pd.Series, SERIES_TYPE)):
154
+ others = asseries(others)
155
+ if op.method_kwargs.get("join") != "outer": # pragma: no cover
156
+ raise NotImplementedError("only outer join supported for now")
157
+ return op.new_series(
158
+ [inp, others],
159
+ shape=inp.shape,
160
+ dtype=inp.dtype,
161
+ index_value=infer_index_value(inp.index_value, others.index_value),
162
+ name=inp.name,
163
+ )
164
+ elif isinstance(others, str) and op.method_kwargs.get("sep") is None:
165
+ raise ValueError("Did you mean to supply a `sep` keyword?")
166
+ else:
167
+ raise TypeError(cls.CAT_TYPE_ERROR)
168
+
169
+
170
+ class SeriesStringExtractHandler(SeriesStringMethodBaseHandler):
171
+ @classmethod
172
+ def call(cls, op, inp):
173
+ empty_series = build_empty_series(
174
+ inp.dtype, index=inp.index_value.to_pandas()[:0]
175
+ )
176
+ test_df = getattr(empty_series.str, op.method)(
177
+ *op.method_args, **op.method_kwargs
178
+ )
179
+ if test_df.ndim == 1:
180
+ return op.new_series(
181
+ [inp],
182
+ shape=inp.shape,
183
+ dtype=test_df.dtype,
184
+ index_value=inp.index_value,
185
+ name=inp.name,
186
+ )
187
+ else:
188
+ op.output_types = [OutputType.dataframe]
189
+ if op.method == "extractall":
190
+ index_value = parse_index(test_df.index, inp)
191
+ shape = (np.nan, test_df.shape[1])
192
+ else:
193
+ index_value = inp.index_value
194
+ shape = (inp.shape[0], test_df.shape[1])
195
+ return op.new_dataframe(
196
+ [inp],
197
+ shape=shape,
198
+ dtypes=test_df.dtypes,
199
+ index_value=index_value,
200
+ columns_value=parse_index(test_df.columns, store_data=True),
201
+ )
202
+
203
+
204
+ _string_method_to_handlers = {}
205
+ _not_implements = ["get_dummies"]
206
+ # start to register handlers for string methods
207
+ # register special methods first
208
+ _string_method_to_handlers["split"] = SeriesStringSplitHandler
209
+ _string_method_to_handlers["rsplit"] = SeriesStringSplitHandler
210
+ _string_method_to_handlers["cat"] = SeriesStringCatHandler
211
+ _string_method_to_handlers["extract"] = SeriesStringExtractHandler
212
+ _string_method_to_handlers["extractall"] = SeriesStringExtractHandler
213
+ # then come to the normal methods
214
+ for method in dir(pd.Series.str):
215
+ if method.startswith("_") and method != "__getitem__":
216
+ continue
217
+ if method in _not_implements:
218
+ continue
219
+ if method in _string_method_to_handlers:
220
+ continue
221
+ _string_method_to_handlers[method] = SeriesStringMethodBaseHandler
@@ -0,0 +1,13 @@
1
+ # Copyright 1999-2024 Alibaba Group Holding Ltd.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
@@ -0,0 +1,468 @@
1
+ # Copyright 1999-2024 Alibaba Group Holding Ltd.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ import numpy as np
16
+ import pandas as pd
17
+ import pytest
18
+
19
+ from .... import opcodes
20
+ from ....core import OutputType
21
+ from ....tensor.core import TENSOR_TYPE
22
+ from ... import eval as maxframe_eval
23
+ from ... import get_dummies, to_numeric
24
+ from ...arithmetic import DataFrameGreater, DataFrameLess
25
+ from ...core import CATEGORICAL_TYPE, DATAFRAME_TYPE, INDEX_TYPE, SERIES_TYPE
26
+ from ...datasource.dataframe import from_pandas as from_pandas_df
27
+ from ...datasource.index import from_pandas as from_pandas_index
28
+ from ...datasource.series import from_pandas as from_pandas_series
29
+ from .. import astype, cut
30
+
31
+
32
+ def test_transform():
33
+ cols = [chr(ord("A") + i) for i in range(10)]
34
+ df_raw = pd.DataFrame(dict((c, [i**2 for i in range(20)]) for c in cols))
35
+ df = from_pandas_df(df_raw, chunk_size=5)
36
+
37
+ idxes = [chr(ord("A") + i) for i in range(20)]
38
+ s_raw = pd.Series([i**2 for i in range(20)], index=idxes)
39
+ series = from_pandas_series(s_raw, chunk_size=5)
40
+
41
+ def rename_fn(f, new_name):
42
+ f.__name__ = new_name
43
+ return f
44
+
45
+ # DATAFRAME CASES
46
+
47
+ # test transform with infer failure
48
+ def transform_df_with_err(v):
49
+ assert len(v) > 2
50
+ return v.sort_values()
51
+
52
+ with pytest.raises(TypeError):
53
+ df.transform(transform_df_with_err)
54
+
55
+ r = df.transform(transform_df_with_err, dtypes=df_raw.dtypes)
56
+ assert r.shape == df.shape
57
+ assert r.op._op_type_ == opcodes.TRANSFORM
58
+ assert r.op.output_types[0] == OutputType.dataframe
59
+
60
+ # test transform scenarios on data frames
61
+ r = df.transform(lambda x: list(range(len(x))))
62
+ assert all(v == np.dtype("int64") for v in r.dtypes) is True
63
+ assert r.shape == df.shape
64
+ assert r.op._op_type_ == opcodes.TRANSFORM
65
+ assert r.op.output_types[0] == OutputType.dataframe
66
+
67
+ r = df.transform(lambda x: list(range(len(x))), axis=1)
68
+ assert all(v == np.dtype("int64") for v in r.dtypes) is True
69
+ assert r.shape == df.shape
70
+ assert r.op._op_type_ == opcodes.TRANSFORM
71
+ assert r.op.output_types[0] == OutputType.dataframe
72
+
73
+ r = df.transform(["cumsum", "cummax", lambda x: x + 1])
74
+ assert all(v == np.dtype("int64") for v in r.dtypes) is True
75
+ assert r.shape == (df.shape[0], df.shape[1] * 3)
76
+ assert r.op._op_type_ == opcodes.TRANSFORM
77
+ assert r.op.output_types[0] == OutputType.dataframe
78
+
79
+ r = df.transform({"A": "cumsum", "D": ["cumsum", "cummax"], "F": lambda x: x + 1})
80
+ assert all(v == np.dtype("int64") for v in r.dtypes) is True
81
+ assert r.shape == (df.shape[0], 4)
82
+ assert r.op._op_type_ == opcodes.TRANSFORM
83
+ assert r.op.output_types[0] == OutputType.dataframe
84
+
85
+ # test agg scenarios on series
86
+ r = df.transform(lambda x: x.iloc[:-1], _call_agg=True)
87
+ assert all(v == np.dtype("int64") for v in r.dtypes) is True
88
+ assert r.shape == (np.nan, df.shape[1])
89
+ assert r.op._op_type_ == opcodes.TRANSFORM
90
+ assert r.op.output_types[0] == OutputType.dataframe
91
+
92
+ r = df.transform(lambda x: x.iloc[:-1], axis=1, _call_agg=True)
93
+ assert all(v == np.dtype("int64") for v in r.dtypes) is True
94
+ assert r.shape == (df.shape[0], np.nan)
95
+ assert r.op._op_type_ == opcodes.TRANSFORM
96
+ assert r.op.output_types[0] == OutputType.dataframe
97
+
98
+ fn_list = [
99
+ rename_fn(lambda x: x.iloc[1:].reset_index(drop=True), "f1"),
100
+ lambda x: x.iloc[:-1].reset_index(drop=True),
101
+ ]
102
+ r = df.transform(fn_list, _call_agg=True)
103
+ assert all(v == np.dtype("int64") for v in r.dtypes) is True
104
+ assert r.shape == (np.nan, df.shape[1] * 2)
105
+ assert r.op._op_type_ == opcodes.TRANSFORM
106
+ assert r.op.output_types[0] == OutputType.dataframe
107
+
108
+ r = df.transform(lambda x: x.sum(), _call_agg=True)
109
+ assert r.dtype == np.dtype("int64")
110
+ assert r.shape == (df.shape[1],)
111
+ assert r.op._op_type_ == opcodes.TRANSFORM
112
+ assert r.op.output_types[0] == OutputType.series
113
+
114
+ fn_dict = {
115
+ "A": rename_fn(lambda x: x.iloc[1:].reset_index(drop=True), "f1"),
116
+ "D": [
117
+ rename_fn(lambda x: x.iloc[1:].reset_index(drop=True), "f1"),
118
+ lambda x: x.iloc[:-1].reset_index(drop=True),
119
+ ],
120
+ "F": lambda x: x.iloc[:-1].reset_index(drop=True),
121
+ }
122
+ r = df.transform(fn_dict, _call_agg=True)
123
+ assert all(v == np.dtype("int64") for v in r.dtypes) is True
124
+ assert r.shape == (np.nan, 4)
125
+ assert r.op._op_type_ == opcodes.TRANSFORM
126
+ assert r.op.output_types[0] == OutputType.dataframe
127
+
128
+ # SERIES CASES
129
+ # test transform scenarios on series
130
+ r = series.transform(lambda x: x + 1)
131
+ assert np.dtype("int64") == r.dtype
132
+ assert r.shape == series.shape
133
+ assert r.op._op_type_ == opcodes.TRANSFORM
134
+ assert r.op.output_types[0] == OutputType.series
135
+
136
+
137
+ def test_string_method():
138
+ s = pd.Series(["a", "b", "c"], name="s")
139
+ series = from_pandas_series(s, chunk_size=2)
140
+
141
+ with pytest.raises(AttributeError):
142
+ _ = series.str.non_exist
143
+
144
+ r = series.str.contains("c")
145
+ assert r.dtype == np.bool_
146
+ assert r.name == s.name
147
+ pd.testing.assert_index_equal(r.index_value.to_pandas(), s.index)
148
+ assert r.shape == s.shape
149
+
150
+ r = series.str.split(",", expand=True, n=1)
151
+ assert r.op.output_types[0] == OutputType.dataframe
152
+ assert r.shape == (3, 2)
153
+ pd.testing.assert_index_equal(r.index_value.to_pandas(), s.index)
154
+ pd.testing.assert_index_equal(r.columns_value.to_pandas(), pd.RangeIndex(2))
155
+
156
+ with pytest.raises(TypeError):
157
+ _ = series.str.cat([["1", "2"]])
158
+
159
+ with pytest.raises(ValueError):
160
+ _ = series.str.cat(["1", "2"])
161
+
162
+ with pytest.raises(ValueError):
163
+ _ = series.str.cat(",")
164
+
165
+ with pytest.raises(TypeError):
166
+ _ = series.str.cat({"1", "2", "3"})
167
+
168
+ r = series.str.cat(sep=",")
169
+ assert r.op.output_types[0] == OutputType.scalar
170
+ assert r.dtype == s.dtype
171
+
172
+ r = series.str.extract(r"[ab](\d)", expand=False)
173
+ assert r.op.output_types[0] == OutputType.series
174
+ assert r.dtype == s.dtype
175
+
176
+ r = series.str.extract(r"[ab](\d)", expand=True)
177
+ assert r.op.output_types[0] == OutputType.dataframe
178
+ assert r.shape == (3, 1)
179
+ pd.testing.assert_index_equal(r.index_value.to_pandas(), s.index)
180
+ pd.testing.assert_index_equal(r.columns_value.to_pandas(), pd.RangeIndex(1))
181
+
182
+ assert "lstrip" in dir(series.str)
183
+
184
+ r = series.str[1:10:2]
185
+ assert r.op.method == "slice"
186
+ assert r.op.method_args == ()
187
+ assert r.op.method_kwargs == {"start": 1, "stop": 10, "step": 2}
188
+
189
+
190
+ def test_datetime_method():
191
+ s = pd.Series(
192
+ [pd.Timestamp("2020-1-1"), pd.Timestamp("2020-2-1"), pd.Timestamp("2020-3-1")],
193
+ name="ss",
194
+ )
195
+ series = from_pandas_series(s, chunk_size=2)
196
+
197
+ r = series.dt.year
198
+ assert r.dtype == s.dt.year.dtype
199
+ pd.testing.assert_index_equal(r.index_value.to_pandas(), s.index)
200
+ assert r.shape == s.shape
201
+ assert r.op.output_types[0] == OutputType.series
202
+ assert r.name == s.dt.year.name
203
+
204
+ with pytest.raises(AttributeError):
205
+ _ = from_pandas_series(pd.Series([1])).dt
206
+ with pytest.raises(AttributeError):
207
+ _ = series.dt.non_exist
208
+
209
+ assert "ceil" in dir(series.dt)
210
+
211
+
212
+ def test_series_isin():
213
+ # one chunk in multiple chunks
214
+ a = from_pandas_series(pd.Series([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]), chunk_size=10)
215
+ b = from_pandas_series(pd.Series([2, 1, 9, 3]), chunk_size=2)
216
+
217
+ r = a.isin(b)
218
+ assert r.dtype == np.dtype("bool")
219
+ assert r.shape == (10,)
220
+ assert len(r.op.inputs) == 2
221
+ assert r.op.output_types[0] == OutputType.series
222
+
223
+ with pytest.raises(TypeError):
224
+ _ = a.isin("sth")
225
+
226
+ with pytest.raises(TypeError):
227
+ _ = a.to_frame().isin("sth")
228
+
229
+
230
+ def test_astype():
231
+ s = from_pandas_series(pd.Series([1, 2, 1, 2], name="a"), chunk_size=2)
232
+ with pytest.raises(KeyError):
233
+ astype(s, {"b": "str"})
234
+
235
+ df = from_pandas_df(
236
+ pd.DataFrame({"a": [1, 2, 1, 2], "b": ["a", "b", "a", "b"]}), chunk_size=2
237
+ )
238
+
239
+ with pytest.raises(KeyError):
240
+ astype(df, {"c": "str", "a": "str"})
241
+
242
+
243
+ def test_eval_query():
244
+ rs = np.random.RandomState(0)
245
+ raw = pd.DataFrame({"a": rs.rand(100), "b": rs.rand(100), "c c": rs.rand(100)})
246
+ df = from_pandas_df(raw, chunk_size=(10, 2))
247
+
248
+ with pytest.raises(NotImplementedError):
249
+ maxframe_eval("df.a * 2", engine="numexpr")
250
+ with pytest.raises(NotImplementedError):
251
+ maxframe_eval("df.a * 2", parser="pandas")
252
+ with pytest.raises(TypeError):
253
+ df.eval(df)
254
+ with pytest.raises(SyntaxError):
255
+ df.query(
256
+ """
257
+ a + b
258
+ a + `c c`
259
+ """
260
+ )
261
+ with pytest.raises(SyntaxError):
262
+ df.eval(
263
+ """
264
+ def a():
265
+ return v
266
+ a()
267
+ """
268
+ )
269
+ with pytest.raises(SyntaxError):
270
+ df.eval("a + `c")
271
+ with pytest.raises(KeyError):
272
+ df.eval("a + c")
273
+ with pytest.raises(ValueError):
274
+ df.eval("p, q = a + c")
275
+ with pytest.raises(ValueError):
276
+ df.query("p = a + c")
277
+
278
+
279
+ def test_cut():
280
+ s = from_pandas_series(pd.Series([1.0, 2.0, 3.0, 4.0]), chunk_size=2)
281
+
282
+ with pytest.raises(ValueError):
283
+ _ = cut(s, -1)
284
+
285
+ with pytest.raises(ValueError):
286
+ _ = cut([[1, 2], [3, 4]], 3)
287
+
288
+ with pytest.raises(ValueError):
289
+ _ = cut([], 3)
290
+
291
+ r, b = cut(s, [1.5, 2.5], retbins=True)
292
+ assert isinstance(r, SERIES_TYPE)
293
+ assert isinstance(b, TENSOR_TYPE)
294
+
295
+ r = cut(s.to_tensor(), [1.5, 2.5])
296
+ assert isinstance(r, CATEGORICAL_TYPE)
297
+ assert len(r) == len(s)
298
+ assert "Categorical" in repr(r)
299
+
300
+ r = cut([0, 1, 1, 2], bins=4, labels=False)
301
+ assert isinstance(r, TENSOR_TYPE)
302
+ e = pd.cut([0, 1, 1, 2], bins=4, labels=False)
303
+ assert r.dtype == e.dtype
304
+
305
+
306
+ def test_drop():
307
+ # test dataframe drop
308
+ rs = np.random.RandomState(0)
309
+ raw = pd.DataFrame(
310
+ rs.randint(1000, size=(20, 8)), columns=["c" + str(i + 1) for i in range(8)]
311
+ )
312
+
313
+ df = from_pandas_df(raw, chunk_size=8)
314
+
315
+ with pytest.raises(KeyError):
316
+ df.drop(columns=["c9"])
317
+ with pytest.raises(NotImplementedError):
318
+ df.drop(columns=from_pandas_series(pd.Series(["c9"])))
319
+
320
+ r = df.drop(columns=["c1"])
321
+ pd.testing.assert_index_equal(r.index_value.to_pandas(), raw.index)
322
+
323
+ df = from_pandas_df(raw, chunk_size=3)
324
+
325
+ columns = ["c2", "c4", "c5", "c6"]
326
+ index = [3, 6, 7]
327
+ r = df.drop(columns=columns, index=index)
328
+ assert isinstance(r, DATAFRAME_TYPE)
329
+
330
+ # test series drop
331
+ raw = pd.Series(rs.randint(1000, size=(20,)))
332
+ series = from_pandas_series(raw, chunk_size=3)
333
+
334
+ r = series.drop(index=index)
335
+ assert isinstance(r, SERIES_TYPE)
336
+
337
+ # test index drop
338
+ ser = pd.Series(range(20))
339
+ rs.shuffle(ser)
340
+ raw = pd.Index(ser)
341
+
342
+ idx = from_pandas_index(raw)
343
+
344
+ r = idx.drop(index)
345
+ assert isinstance(r, INDEX_TYPE)
346
+
347
+
348
+ def test_drop_duplicates():
349
+ rs = np.random.RandomState(0)
350
+ raw = pd.DataFrame(
351
+ rs.randint(1000, size=(20, 7)), columns=["c" + str(i + 1) for i in range(7)]
352
+ )
353
+ raw["c7"] = [f"s{j}" for j in range(20)]
354
+
355
+ df = from_pandas_df(raw, chunk_size=10)
356
+ with pytest.raises(ValueError):
357
+ df.drop_duplicates(keep=True)
358
+ with pytest.raises(ValueError):
359
+ df.drop_duplicates(method="unknown")
360
+ with pytest.raises(KeyError):
361
+ df.drop_duplicates(subset="c8")
362
+
363
+ s = df["c7"]
364
+ with pytest.raises(ValueError):
365
+ s.drop_duplicates(method="unknown")
366
+ with pytest.raises(ValueError):
367
+ s.drop_duplicates(keep=True)
368
+
369
+
370
+ def test_get_dummies():
371
+ raw = pd.DataFrame(
372
+ {
373
+ "a": [1.1, 2.1, 3.1],
374
+ "b": ["5", "-6", "-7"],
375
+ "c": [1, 2, 3],
376
+ "d": ["2", "3", "4"],
377
+ }
378
+ )
379
+ df = from_pandas_df(raw, chunk_size=2)
380
+
381
+ with pytest.raises(TypeError):
382
+ _ = get_dummies(df, columns="a")
383
+
384
+ with pytest.raises(ValueError):
385
+ _ = get_dummies(df, prefix=["col1"])
386
+
387
+ with pytest.raises(ValueError):
388
+ _ = get_dummies(df, columns=["a"], prefix={"a": "col1", "c": "col2"})
389
+
390
+ with pytest.raises(KeyError):
391
+ _ = get_dummies(df, columns=["a", "b"], prefix={"a": "col1", "c": "col2"})
392
+
393
+ r = get_dummies(df)
394
+ assert isinstance(r, DATAFRAME_TYPE)
395
+
396
+
397
+ def test_to_numeric():
398
+ raw = pd.DataFrame({"a": [1.0, 2, 3, -3]})
399
+ df = from_pandas_df(raw, chunk_size=2)
400
+
401
+ with pytest.raises(ValueError):
402
+ _ = to_numeric(df)
403
+
404
+ with pytest.raises(ValueError):
405
+ _ = to_numeric([["1.0", 1]])
406
+
407
+ with pytest.raises(ValueError):
408
+ _ = to_numeric([])
409
+
410
+
411
+ def test_case_when():
412
+ rs = np.random.RandomState(0)
413
+ raw = pd.DataFrame(
414
+ rs.randint(1000, size=(20, 8)), columns=["c" + str(i + 1) for i in range(8)]
415
+ )
416
+ df = from_pandas_df(raw, chunk_size=8)
417
+
418
+ with pytest.raises(TypeError):
419
+ df.c1.case_when(df.c2)
420
+ with pytest.raises(ValueError):
421
+ df.c1.case_when([])
422
+ with pytest.raises(TypeError):
423
+ df.c1.case_when([[]])
424
+ with pytest.raises(ValueError):
425
+ df.c1.case_when([()])
426
+
427
+ col = df.c1.case_when([(df.c2 < 10, 10), (df.c2 > 20, df.c3)])
428
+ assert len(col.inputs) == 4
429
+ assert isinstance(col.inputs[1].op, DataFrameLess)
430
+ assert isinstance(col.inputs[2].op, DataFrameGreater)
431
+
432
+
433
+ def test_pivot_table():
434
+ from ...groupby.aggregation import DataFrameGroupByAgg
435
+ from ...misc.pivot_table import DataFramePivotTable
436
+
437
+ raw = pd.DataFrame(
438
+ {
439
+ "A": "foo foo foo foo foo bar bar bar bar".split(),
440
+ "B": "one one one two two one one two two".split(),
441
+ "C": "small large large small small large small small large".split(),
442
+ "D": [1, 2, 2, 3, 3, 4, 5, 6, 7],
443
+ "E": [2, 4, 5, 5, 6, 6, 8, 9, 9],
444
+ }
445
+ )
446
+ df = from_pandas_df(raw, chunk_size=8)
447
+ with pytest.raises(ValueError):
448
+ df.pivot_table(index=123)
449
+ with pytest.raises(ValueError):
450
+ df.pivot_table(index=["F"])
451
+ with pytest.raises(ValueError):
452
+ df.pivot_table(values=["D", "E"], aggfunc="sum")
453
+
454
+ t = df.pivot_table(index="A")
455
+ assert isinstance(t.op, DataFrameGroupByAgg)
456
+ t = df.pivot_table(index="A", values=["D", "E"], aggfunc="sum")
457
+ assert isinstance(t.op, DataFrameGroupByAgg)
458
+
459
+ t = df.pivot_table(index=["A", "B"], values=["D", "E"], aggfunc="sum", margins=True)
460
+ assert isinstance(t.op, DataFramePivotTable)
461
+
462
+ t = df.pivot_table(index="A", columns=["B", "C"], aggfunc="sum")
463
+ assert isinstance(t.op, DataFramePivotTable)
464
+ assert t.shape == (np.nan, np.nan)
465
+
466
+ t = df.pivot_table(index=["A", "B"], columns="C", aggfunc="sum")
467
+ assert isinstance(t.op, DataFramePivotTable)
468
+ assert t.shape == (np.nan, np.nan)