maxframe 0.1.0b5__cp311-cp311-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of maxframe might be problematic. Click here for more details.

Files changed (647) hide show
  1. maxframe/__init__.py +32 -0
  2. maxframe/_utils.cpython-311-darwin.so +0 -0
  3. maxframe/_utils.pxd +33 -0
  4. maxframe/_utils.pyx +547 -0
  5. maxframe/codegen.py +528 -0
  6. maxframe/config/__init__.py +15 -0
  7. maxframe/config/config.py +443 -0
  8. maxframe/config/tests/__init__.py +13 -0
  9. maxframe/config/tests/test_config.py +103 -0
  10. maxframe/config/tests/test_validators.py +34 -0
  11. maxframe/config/validators.py +57 -0
  12. maxframe/conftest.py +139 -0
  13. maxframe/core/__init__.py +65 -0
  14. maxframe/core/base.py +156 -0
  15. maxframe/core/entity/__init__.py +44 -0
  16. maxframe/core/entity/chunks.py +68 -0
  17. maxframe/core/entity/core.py +152 -0
  18. maxframe/core/entity/executable.py +337 -0
  19. maxframe/core/entity/fuse.py +73 -0
  20. maxframe/core/entity/objects.py +100 -0
  21. maxframe/core/entity/output_types.py +90 -0
  22. maxframe/core/entity/tileables.py +438 -0
  23. maxframe/core/entity/utils.py +24 -0
  24. maxframe/core/graph/__init__.py +17 -0
  25. maxframe/core/graph/builder/__init__.py +16 -0
  26. maxframe/core/graph/builder/base.py +86 -0
  27. maxframe/core/graph/builder/chunk.py +430 -0
  28. maxframe/core/graph/builder/tileable.py +34 -0
  29. maxframe/core/graph/builder/utils.py +41 -0
  30. maxframe/core/graph/core.cpython-311-darwin.so +0 -0
  31. maxframe/core/graph/core.pyx +467 -0
  32. maxframe/core/graph/entity.py +171 -0
  33. maxframe/core/graph/tests/__init__.py +13 -0
  34. maxframe/core/graph/tests/test_graph.py +205 -0
  35. maxframe/core/mode.py +96 -0
  36. maxframe/core/operator/__init__.py +34 -0
  37. maxframe/core/operator/base.py +450 -0
  38. maxframe/core/operator/core.py +276 -0
  39. maxframe/core/operator/fetch.py +53 -0
  40. maxframe/core/operator/fuse.py +29 -0
  41. maxframe/core/operator/objects.py +72 -0
  42. maxframe/core/operator/shuffle.py +111 -0
  43. maxframe/core/operator/tests/__init__.py +13 -0
  44. maxframe/core/operator/tests/test_core.py +64 -0
  45. maxframe/core/tests/__init__.py +13 -0
  46. maxframe/core/tests/test_mode.py +75 -0
  47. maxframe/dataframe/__init__.py +81 -0
  48. maxframe/dataframe/arithmetic/__init__.py +359 -0
  49. maxframe/dataframe/arithmetic/abs.py +33 -0
  50. maxframe/dataframe/arithmetic/add.py +60 -0
  51. maxframe/dataframe/arithmetic/arccos.py +28 -0
  52. maxframe/dataframe/arithmetic/arccosh.py +28 -0
  53. maxframe/dataframe/arithmetic/arcsin.py +28 -0
  54. maxframe/dataframe/arithmetic/arcsinh.py +28 -0
  55. maxframe/dataframe/arithmetic/arctan.py +28 -0
  56. maxframe/dataframe/arithmetic/arctanh.py +28 -0
  57. maxframe/dataframe/arithmetic/around.py +152 -0
  58. maxframe/dataframe/arithmetic/bitwise_and.py +46 -0
  59. maxframe/dataframe/arithmetic/bitwise_or.py +50 -0
  60. maxframe/dataframe/arithmetic/bitwise_xor.py +46 -0
  61. maxframe/dataframe/arithmetic/ceil.py +28 -0
  62. maxframe/dataframe/arithmetic/core.py +342 -0
  63. maxframe/dataframe/arithmetic/cos.py +28 -0
  64. maxframe/dataframe/arithmetic/cosh.py +28 -0
  65. maxframe/dataframe/arithmetic/degrees.py +28 -0
  66. maxframe/dataframe/arithmetic/docstring.py +442 -0
  67. maxframe/dataframe/arithmetic/equal.py +56 -0
  68. maxframe/dataframe/arithmetic/exp.py +28 -0
  69. maxframe/dataframe/arithmetic/exp2.py +28 -0
  70. maxframe/dataframe/arithmetic/expm1.py +28 -0
  71. maxframe/dataframe/arithmetic/floor.py +28 -0
  72. maxframe/dataframe/arithmetic/floordiv.py +64 -0
  73. maxframe/dataframe/arithmetic/greater.py +57 -0
  74. maxframe/dataframe/arithmetic/greater_equal.py +57 -0
  75. maxframe/dataframe/arithmetic/invert.py +33 -0
  76. maxframe/dataframe/arithmetic/is_ufuncs.py +62 -0
  77. maxframe/dataframe/arithmetic/less.py +57 -0
  78. maxframe/dataframe/arithmetic/less_equal.py +57 -0
  79. maxframe/dataframe/arithmetic/log.py +28 -0
  80. maxframe/dataframe/arithmetic/log10.py +28 -0
  81. maxframe/dataframe/arithmetic/log2.py +28 -0
  82. maxframe/dataframe/arithmetic/mod.py +60 -0
  83. maxframe/dataframe/arithmetic/multiply.py +60 -0
  84. maxframe/dataframe/arithmetic/negative.py +33 -0
  85. maxframe/dataframe/arithmetic/not_equal.py +56 -0
  86. maxframe/dataframe/arithmetic/power.py +68 -0
  87. maxframe/dataframe/arithmetic/radians.py +28 -0
  88. maxframe/dataframe/arithmetic/sin.py +28 -0
  89. maxframe/dataframe/arithmetic/sinh.py +28 -0
  90. maxframe/dataframe/arithmetic/sqrt.py +28 -0
  91. maxframe/dataframe/arithmetic/subtract.py +64 -0
  92. maxframe/dataframe/arithmetic/tan.py +28 -0
  93. maxframe/dataframe/arithmetic/tanh.py +28 -0
  94. maxframe/dataframe/arithmetic/tests/__init__.py +13 -0
  95. maxframe/dataframe/arithmetic/tests/test_arithmetic.py +695 -0
  96. maxframe/dataframe/arithmetic/truediv.py +64 -0
  97. maxframe/dataframe/arithmetic/trunc.py +28 -0
  98. maxframe/dataframe/arrays.py +864 -0
  99. maxframe/dataframe/core.py +2417 -0
  100. maxframe/dataframe/datasource/__init__.py +15 -0
  101. maxframe/dataframe/datasource/core.py +81 -0
  102. maxframe/dataframe/datasource/dataframe.py +59 -0
  103. maxframe/dataframe/datasource/date_range.py +504 -0
  104. maxframe/dataframe/datasource/from_index.py +54 -0
  105. maxframe/dataframe/datasource/from_records.py +107 -0
  106. maxframe/dataframe/datasource/from_tensor.py +419 -0
  107. maxframe/dataframe/datasource/index.py +117 -0
  108. maxframe/dataframe/datasource/read_csv.py +528 -0
  109. maxframe/dataframe/datasource/read_odps_query.py +299 -0
  110. maxframe/dataframe/datasource/read_odps_table.py +253 -0
  111. maxframe/dataframe/datasource/read_parquet.py +421 -0
  112. maxframe/dataframe/datasource/series.py +55 -0
  113. maxframe/dataframe/datasource/tests/__init__.py +13 -0
  114. maxframe/dataframe/datasource/tests/test_datasource.py +401 -0
  115. maxframe/dataframe/datastore/__init__.py +26 -0
  116. maxframe/dataframe/datastore/core.py +19 -0
  117. maxframe/dataframe/datastore/to_csv.py +227 -0
  118. maxframe/dataframe/datastore/to_odps.py +162 -0
  119. maxframe/dataframe/extensions/__init__.py +41 -0
  120. maxframe/dataframe/extensions/accessor.py +50 -0
  121. maxframe/dataframe/extensions/reshuffle.py +83 -0
  122. maxframe/dataframe/extensions/tests/__init__.py +13 -0
  123. maxframe/dataframe/extensions/tests/test_extensions.py +38 -0
  124. maxframe/dataframe/fetch/__init__.py +15 -0
  125. maxframe/dataframe/fetch/core.py +86 -0
  126. maxframe/dataframe/groupby/__init__.py +82 -0
  127. maxframe/dataframe/groupby/aggregation.py +350 -0
  128. maxframe/dataframe/groupby/apply.py +251 -0
  129. maxframe/dataframe/groupby/core.py +179 -0
  130. maxframe/dataframe/groupby/cum.py +124 -0
  131. maxframe/dataframe/groupby/fill.py +141 -0
  132. maxframe/dataframe/groupby/getitem.py +92 -0
  133. maxframe/dataframe/groupby/head.py +105 -0
  134. maxframe/dataframe/groupby/sample.py +214 -0
  135. maxframe/dataframe/groupby/tests/__init__.py +13 -0
  136. maxframe/dataframe/groupby/tests/test_groupby.py +374 -0
  137. maxframe/dataframe/groupby/transform.py +255 -0
  138. maxframe/dataframe/indexing/__init__.py +84 -0
  139. maxframe/dataframe/indexing/add_prefix_suffix.py +110 -0
  140. maxframe/dataframe/indexing/align.py +349 -0
  141. maxframe/dataframe/indexing/at.py +83 -0
  142. maxframe/dataframe/indexing/getitem.py +204 -0
  143. maxframe/dataframe/indexing/iat.py +37 -0
  144. maxframe/dataframe/indexing/iloc.py +566 -0
  145. maxframe/dataframe/indexing/insert.py +86 -0
  146. maxframe/dataframe/indexing/loc.py +411 -0
  147. maxframe/dataframe/indexing/reindex.py +526 -0
  148. maxframe/dataframe/indexing/rename.py +462 -0
  149. maxframe/dataframe/indexing/rename_axis.py +209 -0
  150. maxframe/dataframe/indexing/reset_index.py +402 -0
  151. maxframe/dataframe/indexing/sample.py +221 -0
  152. maxframe/dataframe/indexing/set_axis.py +194 -0
  153. maxframe/dataframe/indexing/set_index.py +61 -0
  154. maxframe/dataframe/indexing/setitem.py +130 -0
  155. maxframe/dataframe/indexing/tests/__init__.py +13 -0
  156. maxframe/dataframe/indexing/tests/test_indexing.py +488 -0
  157. maxframe/dataframe/indexing/where.py +308 -0
  158. maxframe/dataframe/initializer.py +288 -0
  159. maxframe/dataframe/merge/__init__.py +32 -0
  160. maxframe/dataframe/merge/append.py +121 -0
  161. maxframe/dataframe/merge/concat.py +325 -0
  162. maxframe/dataframe/merge/merge.py +593 -0
  163. maxframe/dataframe/merge/tests/__init__.py +13 -0
  164. maxframe/dataframe/merge/tests/test_merge.py +215 -0
  165. maxframe/dataframe/misc/__init__.py +134 -0
  166. maxframe/dataframe/misc/_duplicate.py +46 -0
  167. maxframe/dataframe/misc/accessor.py +276 -0
  168. maxframe/dataframe/misc/apply.py +692 -0
  169. maxframe/dataframe/misc/astype.py +236 -0
  170. maxframe/dataframe/misc/case_when.py +141 -0
  171. maxframe/dataframe/misc/check_monotonic.py +84 -0
  172. maxframe/dataframe/misc/cut.py +383 -0
  173. maxframe/dataframe/misc/datetimes.py +79 -0
  174. maxframe/dataframe/misc/describe.py +108 -0
  175. maxframe/dataframe/misc/diff.py +210 -0
  176. maxframe/dataframe/misc/drop.py +440 -0
  177. maxframe/dataframe/misc/drop_duplicates.py +248 -0
  178. maxframe/dataframe/misc/duplicated.py +292 -0
  179. maxframe/dataframe/misc/eval.py +728 -0
  180. maxframe/dataframe/misc/explode.py +171 -0
  181. maxframe/dataframe/misc/get_dummies.py +208 -0
  182. maxframe/dataframe/misc/isin.py +217 -0
  183. maxframe/dataframe/misc/map.py +236 -0
  184. maxframe/dataframe/misc/melt.py +162 -0
  185. maxframe/dataframe/misc/memory_usage.py +248 -0
  186. maxframe/dataframe/misc/pct_change.py +150 -0
  187. maxframe/dataframe/misc/pivot_table.py +262 -0
  188. maxframe/dataframe/misc/qcut.py +104 -0
  189. maxframe/dataframe/misc/select_dtypes.py +104 -0
  190. maxframe/dataframe/misc/shift.py +256 -0
  191. maxframe/dataframe/misc/stack.py +238 -0
  192. maxframe/dataframe/misc/string_.py +221 -0
  193. maxframe/dataframe/misc/tests/__init__.py +13 -0
  194. maxframe/dataframe/misc/tests/test_misc.py +468 -0
  195. maxframe/dataframe/misc/to_numeric.py +178 -0
  196. maxframe/dataframe/misc/transform.py +361 -0
  197. maxframe/dataframe/misc/transpose.py +136 -0
  198. maxframe/dataframe/misc/value_counts.py +182 -0
  199. maxframe/dataframe/missing/__init__.py +53 -0
  200. maxframe/dataframe/missing/checkna.py +223 -0
  201. maxframe/dataframe/missing/dropna.py +280 -0
  202. maxframe/dataframe/missing/fillna.py +275 -0
  203. maxframe/dataframe/missing/replace.py +439 -0
  204. maxframe/dataframe/missing/tests/__init__.py +13 -0
  205. maxframe/dataframe/missing/tests/test_missing.py +89 -0
  206. maxframe/dataframe/operators.py +273 -0
  207. maxframe/dataframe/plotting/__init__.py +40 -0
  208. maxframe/dataframe/plotting/core.py +78 -0
  209. maxframe/dataframe/plotting/tests/__init__.py +13 -0
  210. maxframe/dataframe/plotting/tests/test_plotting.py +136 -0
  211. maxframe/dataframe/reduction/__init__.py +107 -0
  212. maxframe/dataframe/reduction/aggregation.py +344 -0
  213. maxframe/dataframe/reduction/all.py +78 -0
  214. maxframe/dataframe/reduction/any.py +78 -0
  215. maxframe/dataframe/reduction/core.py +837 -0
  216. maxframe/dataframe/reduction/count.py +59 -0
  217. maxframe/dataframe/reduction/cummax.py +30 -0
  218. maxframe/dataframe/reduction/cummin.py +30 -0
  219. maxframe/dataframe/reduction/cumprod.py +30 -0
  220. maxframe/dataframe/reduction/cumsum.py +30 -0
  221. maxframe/dataframe/reduction/custom_reduction.py +42 -0
  222. maxframe/dataframe/reduction/kurtosis.py +104 -0
  223. maxframe/dataframe/reduction/max.py +65 -0
  224. maxframe/dataframe/reduction/mean.py +61 -0
  225. maxframe/dataframe/reduction/min.py +65 -0
  226. maxframe/dataframe/reduction/nunique.py +141 -0
  227. maxframe/dataframe/reduction/prod.py +76 -0
  228. maxframe/dataframe/reduction/reduction_size.py +36 -0
  229. maxframe/dataframe/reduction/sem.py +69 -0
  230. maxframe/dataframe/reduction/skew.py +89 -0
  231. maxframe/dataframe/reduction/std.py +53 -0
  232. maxframe/dataframe/reduction/str_concat.py +48 -0
  233. maxframe/dataframe/reduction/sum.py +77 -0
  234. maxframe/dataframe/reduction/tests/__init__.py +13 -0
  235. maxframe/dataframe/reduction/tests/test_reduction.py +486 -0
  236. maxframe/dataframe/reduction/unique.py +90 -0
  237. maxframe/dataframe/reduction/var.py +72 -0
  238. maxframe/dataframe/sort/__init__.py +34 -0
  239. maxframe/dataframe/sort/core.py +36 -0
  240. maxframe/dataframe/sort/sort_index.py +153 -0
  241. maxframe/dataframe/sort/sort_values.py +311 -0
  242. maxframe/dataframe/sort/tests/__init__.py +13 -0
  243. maxframe/dataframe/sort/tests/test_sort.py +81 -0
  244. maxframe/dataframe/statistics/__init__.py +33 -0
  245. maxframe/dataframe/statistics/corr.py +280 -0
  246. maxframe/dataframe/statistics/quantile.py +341 -0
  247. maxframe/dataframe/statistics/tests/__init__.py +13 -0
  248. maxframe/dataframe/statistics/tests/test_statistics.py +82 -0
  249. maxframe/dataframe/tests/__init__.py +13 -0
  250. maxframe/dataframe/tests/test_initializer.py +29 -0
  251. maxframe/dataframe/tseries/__init__.py +13 -0
  252. maxframe/dataframe/tseries/tests/__init__.py +13 -0
  253. maxframe/dataframe/tseries/tests/test_tseries.py +30 -0
  254. maxframe/dataframe/tseries/to_datetime.py +297 -0
  255. maxframe/dataframe/ufunc/__init__.py +27 -0
  256. maxframe/dataframe/ufunc/tensor.py +54 -0
  257. maxframe/dataframe/ufunc/ufunc.py +52 -0
  258. maxframe/dataframe/utils.py +1267 -0
  259. maxframe/dataframe/window/__init__.py +29 -0
  260. maxframe/dataframe/window/aggregation.py +96 -0
  261. maxframe/dataframe/window/core.py +69 -0
  262. maxframe/dataframe/window/ewm.py +249 -0
  263. maxframe/dataframe/window/expanding.py +147 -0
  264. maxframe/dataframe/window/rolling.py +376 -0
  265. maxframe/dataframe/window/tests/__init__.py +13 -0
  266. maxframe/dataframe/window/tests/test_ewm.py +70 -0
  267. maxframe/dataframe/window/tests/test_expanding.py +66 -0
  268. maxframe/dataframe/window/tests/test_rolling.py +57 -0
  269. maxframe/env.py +33 -0
  270. maxframe/errors.py +21 -0
  271. maxframe/extension.py +81 -0
  272. maxframe/learn/__init__.py +17 -0
  273. maxframe/learn/contrib/__init__.py +17 -0
  274. maxframe/learn/contrib/pytorch/__init__.py +16 -0
  275. maxframe/learn/contrib/pytorch/run_function.py +110 -0
  276. maxframe/learn/contrib/pytorch/run_script.py +102 -0
  277. maxframe/learn/contrib/pytorch/tests/__init__.py +13 -0
  278. maxframe/learn/contrib/pytorch/tests/test_pytorch.py +42 -0
  279. maxframe/learn/contrib/utils.py +52 -0
  280. maxframe/learn/contrib/xgboost/__init__.py +26 -0
  281. maxframe/learn/contrib/xgboost/classifier.py +86 -0
  282. maxframe/learn/contrib/xgboost/core.py +156 -0
  283. maxframe/learn/contrib/xgboost/dmatrix.py +150 -0
  284. maxframe/learn/contrib/xgboost/predict.py +138 -0
  285. maxframe/learn/contrib/xgboost/regressor.py +78 -0
  286. maxframe/learn/contrib/xgboost/tests/__init__.py +13 -0
  287. maxframe/learn/contrib/xgboost/tests/test_core.py +43 -0
  288. maxframe/learn/contrib/xgboost/train.py +121 -0
  289. maxframe/learn/utils/__init__.py +15 -0
  290. maxframe/learn/utils/core.py +29 -0
  291. maxframe/lib/__init__.py +15 -0
  292. maxframe/lib/aio/__init__.py +27 -0
  293. maxframe/lib/aio/_runners.py +162 -0
  294. maxframe/lib/aio/_threads.py +35 -0
  295. maxframe/lib/aio/base.py +82 -0
  296. maxframe/lib/aio/file.py +85 -0
  297. maxframe/lib/aio/isolation.py +100 -0
  298. maxframe/lib/aio/lru.py +242 -0
  299. maxframe/lib/aio/parallelism.py +37 -0
  300. maxframe/lib/aio/tests/__init__.py +13 -0
  301. maxframe/lib/aio/tests/test_aio_file.py +55 -0
  302. maxframe/lib/compression.py +55 -0
  303. maxframe/lib/cython/__init__.py +13 -0
  304. maxframe/lib/cython/libcpp.pxd +30 -0
  305. maxframe/lib/filesystem/__init__.py +21 -0
  306. maxframe/lib/filesystem/_glob.py +173 -0
  307. maxframe/lib/filesystem/_oss_lib/__init__.py +13 -0
  308. maxframe/lib/filesystem/_oss_lib/common.py +198 -0
  309. maxframe/lib/filesystem/_oss_lib/glob.py +147 -0
  310. maxframe/lib/filesystem/_oss_lib/handle.py +156 -0
  311. maxframe/lib/filesystem/arrow.py +236 -0
  312. maxframe/lib/filesystem/base.py +263 -0
  313. maxframe/lib/filesystem/core.py +95 -0
  314. maxframe/lib/filesystem/fsmap.py +164 -0
  315. maxframe/lib/filesystem/hdfs.py +31 -0
  316. maxframe/lib/filesystem/local.py +112 -0
  317. maxframe/lib/filesystem/oss.py +157 -0
  318. maxframe/lib/filesystem/tests/__init__.py +13 -0
  319. maxframe/lib/filesystem/tests/test_filesystem.py +223 -0
  320. maxframe/lib/filesystem/tests/test_oss.py +182 -0
  321. maxframe/lib/functools_compat.py +81 -0
  322. maxframe/lib/mmh3.cpython-311-darwin.so +0 -0
  323. maxframe/lib/mmh3_src/MurmurHash3.cpp +339 -0
  324. maxframe/lib/mmh3_src/MurmurHash3.h +43 -0
  325. maxframe/lib/mmh3_src/mmh3module.cpp +387 -0
  326. maxframe/lib/sparse/__init__.py +861 -0
  327. maxframe/lib/sparse/array.py +1604 -0
  328. maxframe/lib/sparse/core.py +92 -0
  329. maxframe/lib/sparse/matrix.py +241 -0
  330. maxframe/lib/sparse/tests/__init__.py +15 -0
  331. maxframe/lib/sparse/tests/test_sparse.py +476 -0
  332. maxframe/lib/sparse/vector.py +150 -0
  333. maxframe/lib/tblib/LICENSE +20 -0
  334. maxframe/lib/tblib/__init__.py +327 -0
  335. maxframe/lib/tblib/cpython.py +83 -0
  336. maxframe/lib/tblib/decorators.py +44 -0
  337. maxframe/lib/tblib/pickling_support.py +90 -0
  338. maxframe/lib/tests/__init__.py +13 -0
  339. maxframe/lib/tests/test_wrapped_pickle.py +51 -0
  340. maxframe/lib/version.py +620 -0
  341. maxframe/lib/wrapped_pickle.py +139 -0
  342. maxframe/mixin.py +100 -0
  343. maxframe/odpsio/__init__.py +21 -0
  344. maxframe/odpsio/arrow.py +91 -0
  345. maxframe/odpsio/schema.py +364 -0
  346. maxframe/odpsio/tableio.py +322 -0
  347. maxframe/odpsio/tests/__init__.py +13 -0
  348. maxframe/odpsio/tests/test_arrow.py +88 -0
  349. maxframe/odpsio/tests/test_schema.py +297 -0
  350. maxframe/odpsio/tests/test_tableio.py +136 -0
  351. maxframe/odpsio/tests/test_volumeio.py +90 -0
  352. maxframe/odpsio/volumeio.py +95 -0
  353. maxframe/opcodes.py +590 -0
  354. maxframe/protocol.py +415 -0
  355. maxframe/remote/__init__.py +18 -0
  356. maxframe/remote/core.py +210 -0
  357. maxframe/remote/run_script.py +121 -0
  358. maxframe/serialization/__init__.py +26 -0
  359. maxframe/serialization/arrow.py +95 -0
  360. maxframe/serialization/core.cpython-311-darwin.so +0 -0
  361. maxframe/serialization/core.pxd +44 -0
  362. maxframe/serialization/core.pyi +61 -0
  363. maxframe/serialization/core.pyx +1094 -0
  364. maxframe/serialization/exception.py +86 -0
  365. maxframe/serialization/maxframe_objects.py +39 -0
  366. maxframe/serialization/numpy.py +91 -0
  367. maxframe/serialization/pandas.py +202 -0
  368. maxframe/serialization/scipy.py +71 -0
  369. maxframe/serialization/serializables/__init__.py +55 -0
  370. maxframe/serialization/serializables/core.py +262 -0
  371. maxframe/serialization/serializables/field.py +624 -0
  372. maxframe/serialization/serializables/field_type.py +589 -0
  373. maxframe/serialization/serializables/tests/__init__.py +13 -0
  374. maxframe/serialization/serializables/tests/test_field_type.py +121 -0
  375. maxframe/serialization/serializables/tests/test_serializable.py +250 -0
  376. maxframe/serialization/tests/__init__.py +13 -0
  377. maxframe/serialization/tests/test_serial.py +412 -0
  378. maxframe/session.py +1310 -0
  379. maxframe/tensor/__init__.py +183 -0
  380. maxframe/tensor/arithmetic/__init__.py +315 -0
  381. maxframe/tensor/arithmetic/abs.py +68 -0
  382. maxframe/tensor/arithmetic/absolute.py +68 -0
  383. maxframe/tensor/arithmetic/add.py +82 -0
  384. maxframe/tensor/arithmetic/angle.py +72 -0
  385. maxframe/tensor/arithmetic/arccos.py +104 -0
  386. maxframe/tensor/arithmetic/arccosh.py +91 -0
  387. maxframe/tensor/arithmetic/arcsin.py +94 -0
  388. maxframe/tensor/arithmetic/arcsinh.py +86 -0
  389. maxframe/tensor/arithmetic/arctan.py +106 -0
  390. maxframe/tensor/arithmetic/arctan2.py +128 -0
  391. maxframe/tensor/arithmetic/arctanh.py +86 -0
  392. maxframe/tensor/arithmetic/around.py +114 -0
  393. maxframe/tensor/arithmetic/bitand.py +95 -0
  394. maxframe/tensor/arithmetic/bitor.py +102 -0
  395. maxframe/tensor/arithmetic/bitxor.py +95 -0
  396. maxframe/tensor/arithmetic/cbrt.py +66 -0
  397. maxframe/tensor/arithmetic/ceil.py +71 -0
  398. maxframe/tensor/arithmetic/clip.py +165 -0
  399. maxframe/tensor/arithmetic/conj.py +74 -0
  400. maxframe/tensor/arithmetic/copysign.py +78 -0
  401. maxframe/tensor/arithmetic/core.py +544 -0
  402. maxframe/tensor/arithmetic/cos.py +85 -0
  403. maxframe/tensor/arithmetic/cosh.py +72 -0
  404. maxframe/tensor/arithmetic/deg2rad.py +72 -0
  405. maxframe/tensor/arithmetic/degrees.py +77 -0
  406. maxframe/tensor/arithmetic/divide.py +114 -0
  407. maxframe/tensor/arithmetic/equal.py +76 -0
  408. maxframe/tensor/arithmetic/exp.py +106 -0
  409. maxframe/tensor/arithmetic/exp2.py +67 -0
  410. maxframe/tensor/arithmetic/expm1.py +79 -0
  411. maxframe/tensor/arithmetic/fabs.py +74 -0
  412. maxframe/tensor/arithmetic/fix.py +69 -0
  413. maxframe/tensor/arithmetic/float_power.py +103 -0
  414. maxframe/tensor/arithmetic/floor.py +77 -0
  415. maxframe/tensor/arithmetic/floordiv.py +94 -0
  416. maxframe/tensor/arithmetic/fmax.py +105 -0
  417. maxframe/tensor/arithmetic/fmin.py +106 -0
  418. maxframe/tensor/arithmetic/fmod.py +99 -0
  419. maxframe/tensor/arithmetic/frexp.py +92 -0
  420. maxframe/tensor/arithmetic/greater.py +77 -0
  421. maxframe/tensor/arithmetic/greater_equal.py +69 -0
  422. maxframe/tensor/arithmetic/hypot.py +77 -0
  423. maxframe/tensor/arithmetic/i0.py +89 -0
  424. maxframe/tensor/arithmetic/imag.py +67 -0
  425. maxframe/tensor/arithmetic/invert.py +110 -0
  426. maxframe/tensor/arithmetic/isclose.py +115 -0
  427. maxframe/tensor/arithmetic/iscomplex.py +64 -0
  428. maxframe/tensor/arithmetic/isfinite.py +106 -0
  429. maxframe/tensor/arithmetic/isinf.py +103 -0
  430. maxframe/tensor/arithmetic/isnan.py +82 -0
  431. maxframe/tensor/arithmetic/isreal.py +63 -0
  432. maxframe/tensor/arithmetic/ldexp.py +99 -0
  433. maxframe/tensor/arithmetic/less.py +69 -0
  434. maxframe/tensor/arithmetic/less_equal.py +69 -0
  435. maxframe/tensor/arithmetic/log.py +92 -0
  436. maxframe/tensor/arithmetic/log10.py +85 -0
  437. maxframe/tensor/arithmetic/log1p.py +95 -0
  438. maxframe/tensor/arithmetic/log2.py +85 -0
  439. maxframe/tensor/arithmetic/logaddexp.py +80 -0
  440. maxframe/tensor/arithmetic/logaddexp2.py +78 -0
  441. maxframe/tensor/arithmetic/logical_and.py +81 -0
  442. maxframe/tensor/arithmetic/logical_not.py +74 -0
  443. maxframe/tensor/arithmetic/logical_or.py +82 -0
  444. maxframe/tensor/arithmetic/logical_xor.py +88 -0
  445. maxframe/tensor/arithmetic/lshift.py +82 -0
  446. maxframe/tensor/arithmetic/maximum.py +108 -0
  447. maxframe/tensor/arithmetic/minimum.py +108 -0
  448. maxframe/tensor/arithmetic/mod.py +104 -0
  449. maxframe/tensor/arithmetic/modf.py +83 -0
  450. maxframe/tensor/arithmetic/multiply.py +81 -0
  451. maxframe/tensor/arithmetic/nan_to_num.py +99 -0
  452. maxframe/tensor/arithmetic/negative.py +65 -0
  453. maxframe/tensor/arithmetic/nextafter.py +68 -0
  454. maxframe/tensor/arithmetic/not_equal.py +72 -0
  455. maxframe/tensor/arithmetic/positive.py +47 -0
  456. maxframe/tensor/arithmetic/power.py +106 -0
  457. maxframe/tensor/arithmetic/rad2deg.py +71 -0
  458. maxframe/tensor/arithmetic/radians.py +77 -0
  459. maxframe/tensor/arithmetic/real.py +70 -0
  460. maxframe/tensor/arithmetic/reciprocal.py +76 -0
  461. maxframe/tensor/arithmetic/rint.py +68 -0
  462. maxframe/tensor/arithmetic/rshift.py +81 -0
  463. maxframe/tensor/arithmetic/setimag.py +29 -0
  464. maxframe/tensor/arithmetic/setreal.py +29 -0
  465. maxframe/tensor/arithmetic/sign.py +81 -0
  466. maxframe/tensor/arithmetic/signbit.py +65 -0
  467. maxframe/tensor/arithmetic/sin.py +98 -0
  468. maxframe/tensor/arithmetic/sinc.py +102 -0
  469. maxframe/tensor/arithmetic/sinh.py +93 -0
  470. maxframe/tensor/arithmetic/spacing.py +72 -0
  471. maxframe/tensor/arithmetic/sqrt.py +81 -0
  472. maxframe/tensor/arithmetic/square.py +69 -0
  473. maxframe/tensor/arithmetic/subtract.py +81 -0
  474. maxframe/tensor/arithmetic/tan.py +88 -0
  475. maxframe/tensor/arithmetic/tanh.py +92 -0
  476. maxframe/tensor/arithmetic/tests/__init__.py +15 -0
  477. maxframe/tensor/arithmetic/tests/test_arithmetic.py +414 -0
  478. maxframe/tensor/arithmetic/truediv.py +104 -0
  479. maxframe/tensor/arithmetic/trunc.py +72 -0
  480. maxframe/tensor/arithmetic/utils.py +65 -0
  481. maxframe/tensor/array_utils.py +186 -0
  482. maxframe/tensor/base/__init__.py +34 -0
  483. maxframe/tensor/base/astype.py +119 -0
  484. maxframe/tensor/base/atleast_1d.py +74 -0
  485. maxframe/tensor/base/broadcast_to.py +89 -0
  486. maxframe/tensor/base/ravel.py +92 -0
  487. maxframe/tensor/base/tests/__init__.py +13 -0
  488. maxframe/tensor/base/tests/test_base.py +114 -0
  489. maxframe/tensor/base/transpose.py +125 -0
  490. maxframe/tensor/base/unique.py +205 -0
  491. maxframe/tensor/base/where.py +127 -0
  492. maxframe/tensor/core.py +724 -0
  493. maxframe/tensor/datasource/__init__.py +32 -0
  494. maxframe/tensor/datasource/arange.py +156 -0
  495. maxframe/tensor/datasource/array.py +415 -0
  496. maxframe/tensor/datasource/core.py +109 -0
  497. maxframe/tensor/datasource/empty.py +169 -0
  498. maxframe/tensor/datasource/from_dataframe.py +70 -0
  499. maxframe/tensor/datasource/from_dense.py +54 -0
  500. maxframe/tensor/datasource/from_sparse.py +47 -0
  501. maxframe/tensor/datasource/full.py +186 -0
  502. maxframe/tensor/datasource/ones.py +173 -0
  503. maxframe/tensor/datasource/scalar.py +40 -0
  504. maxframe/tensor/datasource/tests/__init__.py +13 -0
  505. maxframe/tensor/datasource/tests/test_datasource.py +278 -0
  506. maxframe/tensor/datasource/zeros.py +188 -0
  507. maxframe/tensor/fetch/__init__.py +15 -0
  508. maxframe/tensor/fetch/core.py +54 -0
  509. maxframe/tensor/indexing/__init__.py +47 -0
  510. maxframe/tensor/indexing/choose.py +196 -0
  511. maxframe/tensor/indexing/compress.py +124 -0
  512. maxframe/tensor/indexing/core.py +190 -0
  513. maxframe/tensor/indexing/extract.py +71 -0
  514. maxframe/tensor/indexing/fill_diagonal.py +183 -0
  515. maxframe/tensor/indexing/flatnonzero.py +60 -0
  516. maxframe/tensor/indexing/getitem.py +175 -0
  517. maxframe/tensor/indexing/nonzero.py +120 -0
  518. maxframe/tensor/indexing/setitem.py +132 -0
  519. maxframe/tensor/indexing/slice.py +29 -0
  520. maxframe/tensor/indexing/take.py +130 -0
  521. maxframe/tensor/indexing/tests/__init__.py +15 -0
  522. maxframe/tensor/indexing/tests/test_indexing.py +234 -0
  523. maxframe/tensor/indexing/unravel_index.py +103 -0
  524. maxframe/tensor/merge/__init__.py +15 -0
  525. maxframe/tensor/merge/stack.py +132 -0
  526. maxframe/tensor/merge/tests/__init__.py +13 -0
  527. maxframe/tensor/merge/tests/test_merge.py +52 -0
  528. maxframe/tensor/operators.py +123 -0
  529. maxframe/tensor/random/__init__.py +168 -0
  530. maxframe/tensor/random/beta.py +87 -0
  531. maxframe/tensor/random/binomial.py +137 -0
  532. maxframe/tensor/random/bytes.py +39 -0
  533. maxframe/tensor/random/chisquare.py +110 -0
  534. maxframe/tensor/random/choice.py +186 -0
  535. maxframe/tensor/random/core.py +234 -0
  536. maxframe/tensor/random/dirichlet.py +123 -0
  537. maxframe/tensor/random/exponential.py +94 -0
  538. maxframe/tensor/random/f.py +135 -0
  539. maxframe/tensor/random/gamma.py +128 -0
  540. maxframe/tensor/random/geometric.py +93 -0
  541. maxframe/tensor/random/gumbel.py +167 -0
  542. maxframe/tensor/random/hypergeometric.py +148 -0
  543. maxframe/tensor/random/laplace.py +133 -0
  544. maxframe/tensor/random/logistic.py +129 -0
  545. maxframe/tensor/random/lognormal.py +159 -0
  546. maxframe/tensor/random/logseries.py +122 -0
  547. maxframe/tensor/random/multinomial.py +133 -0
  548. maxframe/tensor/random/multivariate_normal.py +192 -0
  549. maxframe/tensor/random/negative_binomial.py +125 -0
  550. maxframe/tensor/random/noncentral_chisquare.py +132 -0
  551. maxframe/tensor/random/noncentral_f.py +126 -0
  552. maxframe/tensor/random/normal.py +143 -0
  553. maxframe/tensor/random/pareto.py +140 -0
  554. maxframe/tensor/random/permutation.py +104 -0
  555. maxframe/tensor/random/poisson.py +111 -0
  556. maxframe/tensor/random/power.py +142 -0
  557. maxframe/tensor/random/rand.py +82 -0
  558. maxframe/tensor/random/randint.py +121 -0
  559. maxframe/tensor/random/randn.py +96 -0
  560. maxframe/tensor/random/random_integers.py +123 -0
  561. maxframe/tensor/random/random_sample.py +86 -0
  562. maxframe/tensor/random/rayleigh.py +110 -0
  563. maxframe/tensor/random/shuffle.py +61 -0
  564. maxframe/tensor/random/standard_cauchy.py +105 -0
  565. maxframe/tensor/random/standard_exponential.py +72 -0
  566. maxframe/tensor/random/standard_gamma.py +120 -0
  567. maxframe/tensor/random/standard_normal.py +74 -0
  568. maxframe/tensor/random/standard_t.py +135 -0
  569. maxframe/tensor/random/tests/__init__.py +15 -0
  570. maxframe/tensor/random/tests/test_random.py +167 -0
  571. maxframe/tensor/random/triangular.py +119 -0
  572. maxframe/tensor/random/uniform.py +131 -0
  573. maxframe/tensor/random/vonmises.py +131 -0
  574. maxframe/tensor/random/wald.py +114 -0
  575. maxframe/tensor/random/weibull.py +140 -0
  576. maxframe/tensor/random/zipf.py +122 -0
  577. maxframe/tensor/rechunk/__init__.py +26 -0
  578. maxframe/tensor/rechunk/rechunk.py +43 -0
  579. maxframe/tensor/reduction/__init__.py +66 -0
  580. maxframe/tensor/reduction/all.py +103 -0
  581. maxframe/tensor/reduction/allclose.py +88 -0
  582. maxframe/tensor/reduction/any.py +105 -0
  583. maxframe/tensor/reduction/argmax.py +103 -0
  584. maxframe/tensor/reduction/argmin.py +103 -0
  585. maxframe/tensor/reduction/array_equal.py +64 -0
  586. maxframe/tensor/reduction/core.py +168 -0
  587. maxframe/tensor/reduction/count_nonzero.py +81 -0
  588. maxframe/tensor/reduction/cumprod.py +97 -0
  589. maxframe/tensor/reduction/cumsum.py +101 -0
  590. maxframe/tensor/reduction/max.py +120 -0
  591. maxframe/tensor/reduction/mean.py +123 -0
  592. maxframe/tensor/reduction/min.py +120 -0
  593. maxframe/tensor/reduction/nanargmax.py +82 -0
  594. maxframe/tensor/reduction/nanargmin.py +76 -0
  595. maxframe/tensor/reduction/nancumprod.py +91 -0
  596. maxframe/tensor/reduction/nancumsum.py +94 -0
  597. maxframe/tensor/reduction/nanmax.py +111 -0
  598. maxframe/tensor/reduction/nanmean.py +106 -0
  599. maxframe/tensor/reduction/nanmin.py +111 -0
  600. maxframe/tensor/reduction/nanprod.py +94 -0
  601. maxframe/tensor/reduction/nanstd.py +126 -0
  602. maxframe/tensor/reduction/nansum.py +115 -0
  603. maxframe/tensor/reduction/nanvar.py +149 -0
  604. maxframe/tensor/reduction/prod.py +130 -0
  605. maxframe/tensor/reduction/std.py +134 -0
  606. maxframe/tensor/reduction/sum.py +125 -0
  607. maxframe/tensor/reduction/tests/__init__.py +13 -0
  608. maxframe/tensor/reduction/tests/test_reduction.py +181 -0
  609. maxframe/tensor/reduction/var.py +176 -0
  610. maxframe/tensor/reshape/__init__.py +17 -0
  611. maxframe/tensor/reshape/reshape.py +188 -0
  612. maxframe/tensor/reshape/tests/__init__.py +15 -0
  613. maxframe/tensor/reshape/tests/test_reshape.py +37 -0
  614. maxframe/tensor/statistics/__init__.py +13 -0
  615. maxframe/tensor/statistics/percentile.py +175 -0
  616. maxframe/tensor/statistics/quantile.py +288 -0
  617. maxframe/tensor/ufunc/__init__.py +26 -0
  618. maxframe/tensor/ufunc/ufunc.py +200 -0
  619. maxframe/tensor/utils.py +718 -0
  620. maxframe/tests/__init__.py +13 -0
  621. maxframe/tests/test_codegen.py +69 -0
  622. maxframe/tests/test_protocol.py +144 -0
  623. maxframe/tests/test_utils.py +376 -0
  624. maxframe/tests/utils.py +164 -0
  625. maxframe/typing_.py +37 -0
  626. maxframe/udf.py +134 -0
  627. maxframe/utils.py +1114 -0
  628. maxframe-0.1.0b5.dist-info/METADATA +104 -0
  629. maxframe-0.1.0b5.dist-info/RECORD +647 -0
  630. maxframe-0.1.0b5.dist-info/WHEEL +5 -0
  631. maxframe-0.1.0b5.dist-info/top_level.txt +3 -0
  632. maxframe_client/__init__.py +17 -0
  633. maxframe_client/clients/__init__.py +13 -0
  634. maxframe_client/clients/framedriver.py +118 -0
  635. maxframe_client/clients/spe.py +104 -0
  636. maxframe_client/conftest.py +15 -0
  637. maxframe_client/fetcher.py +264 -0
  638. maxframe_client/session/__init__.py +22 -0
  639. maxframe_client/session/consts.py +36 -0
  640. maxframe_client/session/graph.py +119 -0
  641. maxframe_client/session/odps.py +482 -0
  642. maxframe_client/session/task.py +280 -0
  643. maxframe_client/session/tests/__init__.py +13 -0
  644. maxframe_client/session/tests/test_task.py +85 -0
  645. maxframe_client/tests/__init__.py +13 -0
  646. maxframe_client/tests/test_fetcher.py +89 -0
  647. maxframe_client/tests/test_session.py +255 -0
@@ -0,0 +1,171 @@
1
+ # Copyright 1999-2024 Alibaba Group Holding Ltd.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ import numpy as np
16
+ import pandas as pd
17
+
18
+ from ... import opcodes
19
+ from ...core import OutputType
20
+ from ...serialization.serializables import AnyField, BoolField
21
+ from ..operators import DataFrameOperator, DataFrameOperatorMixin
22
+ from ..utils import parse_index
23
+
24
+
25
+ class DataFrameExplode(DataFrameOperator, DataFrameOperatorMixin):
26
+ _op_type_ = opcodes.EXPLODE
27
+
28
+ column = AnyField("column", default=None)
29
+ ignore_index = BoolField("ignore_field", default=None)
30
+
31
+ def __init__(self, output_types=None, **kw):
32
+ super().__init__(_output_types=output_types, **kw)
33
+
34
+ def _rewrite_params(self, in_obj):
35
+ params = in_obj.params.copy()
36
+ new_shape = list(in_obj.shape)
37
+ new_shape[0] = np.nan
38
+ params["shape"] = tuple(new_shape)
39
+
40
+ if self.ignore_index:
41
+ params["index_value"] = parse_index(
42
+ pd.RangeIndex(-1), (in_obj.key, in_obj.index_value.key)
43
+ )
44
+ else:
45
+ params["index_value"] = parse_index(
46
+ None, (in_obj.key, in_obj.index_value.key)
47
+ )
48
+ return params
49
+
50
+ def __call__(self, df_or_series):
51
+ return self.new_tileable([df_or_series], **self._rewrite_params(df_or_series))
52
+
53
+
54
+ def df_explode(df, column, ignore_index=False):
55
+ """
56
+ Transform each element of a list-like to a row, replicating index values.
57
+
58
+ Parameters
59
+ ----------
60
+ column : str or tuple
61
+ Column to explode.
62
+ ignore_index : bool, default False
63
+ If True, the resulting index will be labeled 0, 1, …, n - 1.
64
+
65
+ Returns
66
+ -------
67
+ DataFrame
68
+ Exploded lists to rows of the subset columns;
69
+ index will be duplicated for these rows.
70
+
71
+ Raises
72
+ ------
73
+ ValueError :
74
+ if columns of the frame are not unique.
75
+
76
+ See Also
77
+ --------
78
+ DataFrame.unstack : Pivot a level of the (necessarily hierarchical)
79
+ index labels.
80
+ DataFrame.melt : Unpivot a DataFrame from wide format to long format.
81
+ Series.explode : Explode a DataFrame from list-like columns to long format.
82
+
83
+ Notes
84
+ -----
85
+ This routine will explode list-likes including lists, tuples,
86
+ Series, and np.ndarray. The result dtype of the subset rows will
87
+ be object. Scalars will be returned unchanged. Empty list-likes will
88
+ result in a np.nan for that row.
89
+
90
+ Examples
91
+ --------
92
+ >>> import maxframe.tensor as mt
93
+ >>> import maxframe.dataframe as md
94
+ >>> df = md.DataFrame({'A': [[1, 2, 3], 'foo', [], [3, 4]], 'B': 1})
95
+ >>> df.execute()
96
+ A B
97
+ 0 [1, 2, 3] 1
98
+ 1 foo 1
99
+ 2 [] 1
100
+ 3 [3, 4] 1
101
+
102
+ >>> df.explode('A').execute()
103
+ A B
104
+ 0 1 1
105
+ 0 2 1
106
+ 0 3 1
107
+ 1 foo 1
108
+ 2 NaN 1
109
+ 3 3 1
110
+ 3 4 1
111
+ """
112
+ op = DataFrameExplode(
113
+ column=column, ignore_index=ignore_index, output_types=[OutputType.dataframe]
114
+ )
115
+ return op(df)
116
+
117
+
118
+ def series_explode(series, ignore_index=False):
119
+ """
120
+ Transform each element of a list-like to a row.
121
+
122
+ Parameters
123
+ ----------
124
+ ignore_index : bool, default False
125
+ If True, the resulting index will be labeled 0, 1, …, n - 1.
126
+
127
+ Returns
128
+ -------
129
+ Series
130
+ Exploded lists to rows; index will be duplicated for these rows.
131
+
132
+ See Also
133
+ --------
134
+ Series.str.split : Split string values on specified separator.
135
+ Series.unstack : Unstack, a.k.a. pivot, Series with MultiIndex
136
+ to produce DataFrame.
137
+ DataFrame.melt : Unpivot a DataFrame from wide format to long format.
138
+ DataFrame.explode : Explode a DataFrame from list-like
139
+ columns to long format.
140
+
141
+ Notes
142
+ -----
143
+ This routine will explode list-likes including lists, tuples,
144
+ Series, and np.ndarray. The result dtype of the subset rows will
145
+ be object. Scalars will be returned unchanged. Empty list-likes will
146
+ result in a np.nan for that row.
147
+
148
+ Examples
149
+ --------
150
+ >>> import maxframe.tensor as mt
151
+ >>> import maxframe.dataframe as md
152
+ >>> s = md.Series([[1, 2, 3], 'foo', [], [3, 4]])
153
+ >>> s.execute()
154
+ 0 [1, 2, 3]
155
+ 1 foo
156
+ 2 []
157
+ 3 [3, 4]
158
+ dtype: object
159
+
160
+ >>> s.explode().execute()
161
+ 0 1
162
+ 0 2
163
+ 0 3
164
+ 1 foo
165
+ 2 NaN
166
+ 3 3
167
+ 3 4
168
+ dtype: object
169
+ """
170
+ op = DataFrameExplode(ignore_index=ignore_index, output_types=[OutputType.series])
171
+ return op(series)
@@ -0,0 +1,208 @@
1
+ # Copyright 1999-2024 Alibaba Group Holding Ltd.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ import numpy as np
16
+ import pandas as pd
17
+
18
+ from ...core import OutputType
19
+ from ...serialization.serializables import AnyField, BoolField, ListField, StringField
20
+ from ..datasource.dataframe import from_pandas as from_pandas_df
21
+ from ..datasource.series import from_pandas as from_pandas_series
22
+ from ..initializer import Series as asseries
23
+ from ..operators import DataFrameOperator, DataFrameOperatorMixin
24
+
25
+ _encoding_dtype_kind = ["O", "S", "U"]
26
+
27
+
28
+ class DataFrameGetDummies(DataFrameOperator, DataFrameOperatorMixin):
29
+ prefix = AnyField("prefix", default=None)
30
+ prefix_sep = StringField("prefix_sep", default=None)
31
+ dummy_na = BoolField("dummy_na", default=None)
32
+ columns = ListField("columns", default=None)
33
+ sparse = BoolField("sparse", default=None)
34
+ drop_first = BoolField("drop_first", default=None)
35
+ dtype = AnyField("dtype", default=None)
36
+
37
+ def __init__(self, **kws):
38
+ super().__init__(**kws)
39
+ self.output_types = [OutputType.dataframe]
40
+
41
+ def __call__(self, data):
42
+ if isinstance(data, (list, tuple)):
43
+ data = asseries(data)
44
+ elif isinstance(data, pd.Series):
45
+ data = from_pandas_series(data)
46
+ elif isinstance(data, pd.DataFrame):
47
+ data = from_pandas_df(data)
48
+
49
+ if self.prefix is not None:
50
+ if isinstance(self.prefix, list):
51
+ if self.columns is not None:
52
+ encoding_col_num = len(self.columns)
53
+ else:
54
+ encoding_col_num = 0
55
+ for dtype in data.dtypes.values:
56
+ if dtype.kind in _encoding_dtype_kind:
57
+ encoding_col_num += 1
58
+ prefix_num = len(self.prefix)
59
+ if prefix_num != encoding_col_num:
60
+ raise ValueError(
61
+ f"Length of 'prefix' ({prefix_num}) did not match "
62
+ + f"the length of the columns being encoded ({encoding_col_num})"
63
+ )
64
+ elif isinstance(self.prefix, dict):
65
+ if self.columns is not None:
66
+ encoding_col_num = len(self.columns)
67
+ prefix_num = len(self.prefix)
68
+ if prefix_num != encoding_col_num:
69
+ raise ValueError(
70
+ f"Length of 'prefix' ({prefix_num}) did not match "
71
+ + f"the length of the columns being encoded ({encoding_col_num})"
72
+ )
73
+ columns = self.prefix.keys()
74
+ for columns_columnname, prefix_columnname in zip(
75
+ columns, list(self.columns)
76
+ ):
77
+ if columns_columnname != prefix_columnname:
78
+ raise KeyError(f"{columns_columnname}")
79
+ else:
80
+ self.columns = list(self.prefix.keys())
81
+ # Convert prefix from dict to list, to simplify tile work
82
+ self.prefix = list(self.prefix.values())
83
+
84
+ return self.new_dataframe(
85
+ [data],
86
+ shape=(np.nan, np.nan),
87
+ dtypes=None,
88
+ index_value=data.index_value,
89
+ columns_value=None,
90
+ )
91
+
92
+
93
+ def get_dummies(
94
+ data,
95
+ prefix=None,
96
+ prefix_sep="_",
97
+ dummy_na=False,
98
+ columns=None,
99
+ sparse=False,
100
+ drop_first=False,
101
+ dtype=None,
102
+ ):
103
+ """
104
+ Convert categorical variable into dummy/indicator variables.
105
+
106
+ Parameters
107
+ ----------
108
+ data : array-like, Series, or DataFrame
109
+ Data of which to get dummy indicators.
110
+ prefix : str, list of str, or dict of str, default None
111
+ String to append DataFrame column names.
112
+ Pass a list with length equal to the number of columns
113
+ when calling get_dummies on a DataFrame. Alternatively, `prefix`
114
+ can be a dictionary mapping column names to prefixes.
115
+ prefix_sep : str, default '_'
116
+ If appending prefix, separator/delimiter to use. Or pass a
117
+ list or dictionary as with `prefix`.
118
+ dummy_na : bool, default False
119
+ Add a column to indicate NaNs, if False NaNs are ignored.
120
+ columns : list-like, default None
121
+ Column names in the DataFrame to be encoded.
122
+ If `columns` is None then all the columns with
123
+ `object` or `category` dtype will be converted.
124
+ sparse : bool, default False
125
+ Whether the dummy-encoded columns should be backed by
126
+ a :class:`SparseArray` (True) or a regular NumPy array (False).
127
+ drop_first : bool, default False
128
+ Whether to get k-1 dummies out of k categorical levels by removing the
129
+ first level.
130
+ dtype : dtype, default np.uint8
131
+ Data type for new columns. Only a single dtype is allowed.
132
+
133
+ Returns
134
+ -------
135
+ DataFrame
136
+ Dummy-coded data.
137
+
138
+ Examples
139
+ --------
140
+ >>> import maxframe.dataframe as md
141
+ >>> s = md.Series(list('abca'))
142
+
143
+ >>> md.get_dummies(s).execute()
144
+ a b c
145
+ 0 1 0 0
146
+ 1 0 1 0
147
+ 2 0 0 1
148
+ 3 1 0 0
149
+
150
+ >>> s1 = ['a', 'b', np.nan]
151
+
152
+ >>> md.get_dummies(s1).execute()
153
+ a b
154
+ 0 1 0
155
+ 1 0 1
156
+ 2 0 0
157
+
158
+ >>> md.get_dummies(s1, dummy_na=True).execute()
159
+ a b NaN
160
+ 0 1 0 0
161
+ 1 0 1 0
162
+ 2 0 0 1
163
+
164
+ >>> df = md.DataFrame({'A': ['a', 'b', 'a'], 'B': ['b', 'a', 'c'],
165
+ ... 'C': [1, 2, 3]})
166
+
167
+ >>> md.get_dummies(df, prefix=['col1', 'col2']).execute()
168
+ C col1_a col1_b col2_a col2_b col2_c
169
+ 0 1 1 0 0 1 0
170
+ 1 2 0 1 1 0 0
171
+ 2 3 1 0 0 0 1
172
+
173
+ >>> md.get_dummies(pd.Series(list('abcaa'))).execute()
174
+ a b c
175
+ 0 1 0 0
176
+ 1 0 1 0
177
+ 2 0 0 1
178
+ 3 1 0 0
179
+ 4 1 0 0
180
+
181
+ >>> md.get_dummies(pd.Series(list('abcaa')), drop_first=True).execute()
182
+ b c
183
+ 0 0 0
184
+ 1 1 0
185
+ 2 0 1
186
+ 3 0 0
187
+ 4 0 0
188
+
189
+ >>> md.get_dummies(pd.Series(list('abc')), dtype=float).execute()
190
+ a b c
191
+ 0 1.0 0.0 0.0
192
+ 1 0.0 1.0 0.0
193
+ 2 0.0 0.0 1.0
194
+ """
195
+ if columns is not None and not isinstance(columns, list):
196
+ raise TypeError("Input must be a list-like for parameter `columns`")
197
+
198
+ op = DataFrameGetDummies(
199
+ prefix=prefix,
200
+ prefix_sep=prefix_sep,
201
+ dummy_na=dummy_na,
202
+ columns=columns,
203
+ sparse=sparse,
204
+ drop_first=drop_first,
205
+ dtype=dtype,
206
+ )
207
+
208
+ return op(data)
@@ -0,0 +1,217 @@
1
+ # Copyright 1999-2024 Alibaba Group Holding Ltd.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ import numpy as np
16
+ import pandas as pd
17
+ from pandas.api.types import is_list_like
18
+
19
+ from ... import opcodes
20
+ from ...core import ENTITY_TYPE
21
+ from ...serialization.serializables import AnyField, KeyField
22
+ from ...tensor.core import TENSOR_TYPE
23
+ from ..core import DATAFRAME_TYPE, INDEX_TYPE, SERIES_TYPE
24
+ from ..operators import DataFrameOperator, DataFrameOperatorMixin
25
+
26
+
27
+ class DataFrameIsin(DataFrameOperator, DataFrameOperatorMixin):
28
+ _op_type_ = opcodes.ISIN
29
+
30
+ input = KeyField("input")
31
+ values = AnyField("values", default=None)
32
+
33
+ def _set_inputs(self, inputs):
34
+ super()._set_inputs(inputs)
35
+ inputs_iter = iter(self._inputs)
36
+ self.input = next(inputs_iter)
37
+ if len(self._inputs) > 1:
38
+ if isinstance(self.values, dict):
39
+ new_values = dict()
40
+ for k, v in self.values.items():
41
+ if isinstance(v, ENTITY_TYPE):
42
+ new_values[k] = next(inputs_iter)
43
+ else:
44
+ new_values[k] = v
45
+ self.values = new_values
46
+ else:
47
+ self.values = self._inputs[1]
48
+
49
+ def __call__(self, elements):
50
+ inputs = [elements]
51
+ if isinstance(self.values, ENTITY_TYPE):
52
+ inputs.append(self.values)
53
+ elif isinstance(self.values, dict):
54
+ for v in self.values.values():
55
+ if isinstance(v, ENTITY_TYPE):
56
+ inputs.append(v)
57
+
58
+ if elements.ndim == 1:
59
+ return self.new_series(
60
+ inputs,
61
+ shape=elements.shape,
62
+ dtype=np.dtype("bool"),
63
+ index_value=elements.index_value,
64
+ name=elements.name,
65
+ )
66
+ else:
67
+ dtypes = pd.Series(
68
+ [np.dtype(bool) for _ in elements.dtypes], index=elements.dtypes.index
69
+ )
70
+ return self.new_dataframe(
71
+ inputs,
72
+ shape=elements.shape,
73
+ index_value=elements.index_value,
74
+ columns_value=elements.columns_value,
75
+ dtypes=dtypes,
76
+ )
77
+
78
+
79
+ def series_isin(elements, values):
80
+ """
81
+ Whether elements in Series are contained in `values`.
82
+
83
+ Return a boolean Series showing whether each element in the Series
84
+ matches an element in the passed sequence of `values` exactly.
85
+
86
+ Parameters
87
+ ----------
88
+ values : set or list-like
89
+ The sequence of values to test. Passing in a single string will
90
+ raise a ``TypeError``. Instead, turn a single string into a
91
+ list of one element.
92
+
93
+ Returns
94
+ -------
95
+ Series
96
+ Series of booleans indicating if each element is in values.
97
+
98
+ Raises
99
+ ------
100
+ TypeError
101
+ * If `values` is a string
102
+
103
+ See Also
104
+ --------
105
+ DataFrame.isin : Equivalent method on DataFrame.
106
+
107
+ Examples
108
+ --------
109
+ >>> import maxframe.dataframe as md
110
+ >>> s = md.Series(['lame', 'cow', 'lame', 'beetle', 'lame',
111
+ ... 'hippo'], name='animal')
112
+ >>> s.isin(['cow', 'lame']).execute()
113
+ 0 True
114
+ 1 True
115
+ 2 True
116
+ 3 False
117
+ 4 True
118
+ 5 False
119
+ Name: animal, dtype: bool
120
+
121
+ Passing a single string as ``s.isin('lame')`` will raise an error. Use
122
+ a list of one element instead:
123
+
124
+ >>> s.isin(['lame']).execute()
125
+ 0 True
126
+ 1 False
127
+ 2 True
128
+ 3 False
129
+ 4 True
130
+ 5 False
131
+ Name: animal, dtype: bool
132
+ """
133
+ if is_list_like(values):
134
+ values = list(values)
135
+ elif not isinstance(values, (SERIES_TYPE, TENSOR_TYPE, INDEX_TYPE)):
136
+ raise TypeError(
137
+ "only list-like objects are allowed to be passed to isin(), "
138
+ f"you passed a [{type(values)}]"
139
+ )
140
+ op = DataFrameIsin(values=values)
141
+ return op(elements)
142
+
143
+
144
+ def df_isin(df, values):
145
+ """
146
+ Whether each element in the DataFrame is contained in values.
147
+
148
+ Parameters
149
+ ----------
150
+ values : iterable, Series, DataFrame or dict
151
+ The result will only be true at a location if all the
152
+ labels match. If `values` is a Series, that's the index. If
153
+ `values` is a dict, the keys must be the column names,
154
+ which must match. If `values` is a DataFrame,
155
+ then both the index and column labels must match.
156
+
157
+ Returns
158
+ -------
159
+ DataFrame
160
+ DataFrame of booleans showing whether each element in the DataFrame
161
+ is contained in values.
162
+
163
+ See Also
164
+ --------
165
+ DataFrame.eq: Equality test for DataFrame.
166
+ Series.isin: Equivalent method on Series.
167
+ Series.str.contains: Test if pattern or regex is contained within a
168
+ string of a Series or Index.
169
+
170
+ Examples
171
+ --------
172
+ >>> import maxframe.dataframe as md
173
+ >>> df = md.DataFrame({'num_legs': [2, 4], 'num_wings': [2, 0]},
174
+ ... index=['falcon', 'dog'])
175
+ >>> df.execute()
176
+ num_legs num_wings
177
+ falcon 2 2
178
+ dog 4 0
179
+
180
+ When ``values`` is a list check whether every value in the DataFrame
181
+ is present in the list (which animals have 0 or 2 legs or wings)
182
+
183
+ >>> df.isin([0, 2]).execute()
184
+ num_legs num_wings
185
+ falcon True True
186
+ dog False True
187
+
188
+ When ``values`` is a dict, we can pass values to check for each
189
+ column separately:
190
+
191
+ >>> df.isin({'num_wings': [0, 3]}).execute()
192
+ num_legs num_wings
193
+ falcon False False
194
+ dog False True
195
+
196
+ When ``values`` is a Series or DataFrame the index and column must
197
+ match. Note that 'falcon' does not match based on the number of legs
198
+ in df2.
199
+
200
+ >>> other = md.DataFrame({'num_legs': [8, 2], 'num_wings': [0, 2]},
201
+ ... index=['spider', 'falcon'])
202
+ >>> df.isin(other).execute()
203
+ num_legs num_wings
204
+ falcon True True
205
+ dog False False
206
+ """
207
+ if is_list_like(values) and not isinstance(values, dict):
208
+ values = list(values)
209
+ elif not isinstance(
210
+ values, (SERIES_TYPE, DATAFRAME_TYPE, TENSOR_TYPE, INDEX_TYPE, dict)
211
+ ):
212
+ raise TypeError(
213
+ "only list-like objects or dict are allowed to be passed to isin(), "
214
+ f"you passed a [{type(values)}]"
215
+ )
216
+ op = DataFrameIsin(values=values)
217
+ return op(df)