maxframe 1.3.0__cp310-cp310-win_amd64.whl → 2.0.0__cp310-cp310-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of maxframe might be problematic. Click here for more details.

Files changed (644) hide show
  1. maxframe/_utils.cp310-win_amd64.pyd +0 -0
  2. maxframe/_utils.pyi +21 -0
  3. maxframe/_utils.pyx +4 -3
  4. maxframe/codegen/__init__.py +27 -0
  5. maxframe/{codegen.py → codegen/core.py} +49 -43
  6. maxframe/codegen/spe/__init__.py +16 -0
  7. maxframe/codegen/spe/core.py +307 -0
  8. maxframe/codegen/spe/dataframe/__init__.py +37 -0
  9. maxframe/codegen/spe/dataframe/accessors/__init__.py +15 -0
  10. maxframe/codegen/spe/dataframe/accessors/base.py +53 -0
  11. maxframe/codegen/spe/dataframe/accessors/dict_.py +194 -0
  12. maxframe/codegen/spe/dataframe/accessors/list_.py +80 -0
  13. maxframe/codegen/spe/dataframe/arithmetic.py +84 -0
  14. maxframe/codegen/spe/dataframe/datasource.py +181 -0
  15. maxframe/codegen/spe/dataframe/datastore.py +204 -0
  16. maxframe/codegen/spe/dataframe/extensions.py +63 -0
  17. maxframe/codegen/spe/dataframe/fetch.py +26 -0
  18. maxframe/codegen/spe/dataframe/groupby.py +224 -0
  19. maxframe/codegen/spe/dataframe/indexing.py +238 -0
  20. maxframe/codegen/spe/dataframe/merge.py +73 -0
  21. maxframe/codegen/spe/dataframe/misc.py +286 -0
  22. maxframe/codegen/spe/dataframe/missing.py +64 -0
  23. maxframe/codegen/spe/dataframe/reduction.py +160 -0
  24. maxframe/codegen/spe/dataframe/sort.py +83 -0
  25. maxframe/codegen/spe/dataframe/statistics.py +46 -0
  26. maxframe/codegen/spe/dataframe/tests/__init__.py +13 -0
  27. maxframe/codegen/spe/dataframe/tests/accessors/__init__.py +13 -0
  28. maxframe/codegen/spe/dataframe/tests/accessors/test_base.py +33 -0
  29. maxframe/codegen/spe/dataframe/tests/accessors/test_dict.py +310 -0
  30. maxframe/codegen/spe/dataframe/tests/accessors/test_list.py +137 -0
  31. maxframe/codegen/spe/dataframe/tests/indexing/__init__.py +13 -0
  32. maxframe/codegen/spe/dataframe/tests/indexing/conftest.py +58 -0
  33. maxframe/codegen/spe/dataframe/tests/indexing/test_getitem.py +124 -0
  34. maxframe/codegen/spe/dataframe/tests/indexing/test_iloc.py +76 -0
  35. maxframe/codegen/spe/dataframe/tests/indexing/test_indexing.py +39 -0
  36. maxframe/codegen/spe/dataframe/tests/indexing/test_rename.py +51 -0
  37. maxframe/codegen/spe/dataframe/tests/indexing/test_reset_index.py +88 -0
  38. maxframe/codegen/spe/dataframe/tests/indexing/test_sample.py +45 -0
  39. maxframe/codegen/spe/dataframe/tests/indexing/test_set_axis.py +45 -0
  40. maxframe/codegen/spe/dataframe/tests/indexing/test_set_index.py +41 -0
  41. maxframe/codegen/spe/dataframe/tests/indexing/test_setitem.py +46 -0
  42. maxframe/codegen/spe/dataframe/tests/misc/__init__.py +13 -0
  43. maxframe/codegen/spe/dataframe/tests/misc/test_apply.py +133 -0
  44. maxframe/codegen/spe/dataframe/tests/misc/test_drop_duplicates.py +92 -0
  45. maxframe/codegen/spe/dataframe/tests/misc/test_misc.py +234 -0
  46. maxframe/codegen/spe/dataframe/tests/missing/__init__.py +13 -0
  47. maxframe/codegen/spe/dataframe/tests/missing/test_checkna.py +94 -0
  48. maxframe/codegen/spe/dataframe/tests/missing/test_dropna.py +50 -0
  49. maxframe/codegen/spe/dataframe/tests/missing/test_fillna.py +94 -0
  50. maxframe/codegen/spe/dataframe/tests/missing/test_replace.py +45 -0
  51. maxframe/codegen/spe/dataframe/tests/test_arithmetic.py +73 -0
  52. maxframe/codegen/spe/dataframe/tests/test_datasource.py +184 -0
  53. maxframe/codegen/spe/dataframe/tests/test_datastore.py +200 -0
  54. maxframe/codegen/spe/dataframe/tests/test_extensions.py +88 -0
  55. maxframe/codegen/spe/dataframe/tests/test_groupby.py +225 -0
  56. maxframe/codegen/spe/dataframe/tests/test_merge.py +400 -0
  57. maxframe/codegen/spe/dataframe/tests/test_reduction.py +104 -0
  58. maxframe/codegen/spe/dataframe/tests/test_sort.py +159 -0
  59. maxframe/codegen/spe/dataframe/tests/test_statistics.py +70 -0
  60. maxframe/codegen/spe/dataframe/tests/test_tseries.py +29 -0
  61. maxframe/codegen/spe/dataframe/tests/test_value_counts.py +60 -0
  62. maxframe/codegen/spe/dataframe/tests/test_window.py +69 -0
  63. maxframe/codegen/spe/dataframe/tseries.py +46 -0
  64. maxframe/codegen/spe/dataframe/udf.py +62 -0
  65. maxframe/codegen/spe/dataframe/value_counts.py +31 -0
  66. maxframe/codegen/spe/dataframe/window.py +65 -0
  67. maxframe/codegen/spe/learn/__init__.py +15 -0
  68. maxframe/codegen/spe/learn/contrib/__init__.py +15 -0
  69. maxframe/codegen/spe/learn/contrib/lightgbm.py +160 -0
  70. maxframe/codegen/spe/learn/contrib/models.py +41 -0
  71. maxframe/codegen/spe/learn/contrib/pytorch.py +49 -0
  72. maxframe/codegen/spe/learn/contrib/tests/__init__.py +13 -0
  73. maxframe/codegen/spe/learn/contrib/tests/test_lightgbm.py +123 -0
  74. maxframe/codegen/spe/learn/contrib/tests/test_models.py +41 -0
  75. maxframe/codegen/spe/learn/contrib/tests/test_pytorch.py +53 -0
  76. maxframe/codegen/spe/learn/contrib/tests/test_xgboost.py +98 -0
  77. maxframe/codegen/spe/learn/contrib/xgboost.py +152 -0
  78. maxframe/codegen/spe/learn/metrics/__init__.py +15 -0
  79. maxframe/codegen/spe/learn/metrics/_classification.py +120 -0
  80. maxframe/codegen/spe/learn/metrics/tests/__init__.py +13 -0
  81. maxframe/codegen/spe/learn/metrics/tests/test_classification.py +93 -0
  82. maxframe/codegen/spe/learn/model_selection/__init__.py +13 -0
  83. maxframe/codegen/spe/learn/model_selection/tests/__init__.py +13 -0
  84. maxframe/codegen/spe/learn/model_selection/tests/test_split.py +41 -0
  85. maxframe/codegen/spe/learn/preprocessing/__init__.py +15 -0
  86. maxframe/codegen/spe/learn/preprocessing/_data.py +37 -0
  87. maxframe/codegen/spe/learn/preprocessing/_label.py +47 -0
  88. maxframe/codegen/spe/learn/preprocessing/tests/__init__.py +13 -0
  89. maxframe/codegen/spe/learn/preprocessing/tests/test_data.py +31 -0
  90. maxframe/codegen/spe/learn/preprocessing/tests/test_label.py +43 -0
  91. maxframe/codegen/spe/learn/utils/__init__.py +15 -0
  92. maxframe/codegen/spe/learn/utils/checks.py +55 -0
  93. maxframe/codegen/spe/learn/utils/multiclass.py +60 -0
  94. maxframe/codegen/spe/learn/utils/shuffle.py +85 -0
  95. maxframe/codegen/spe/learn/utils/sparsefuncs.py +35 -0
  96. maxframe/codegen/spe/learn/utils/tests/__init__.py +13 -0
  97. maxframe/codegen/spe/learn/utils/tests/test_checks.py +48 -0
  98. maxframe/codegen/spe/learn/utils/tests/test_multiclass.py +52 -0
  99. maxframe/codegen/spe/learn/utils/tests/test_shuffle.py +50 -0
  100. maxframe/codegen/spe/learn/utils/tests/test_sparsefuncs.py +34 -0
  101. maxframe/codegen/spe/learn/utils/tests/test_validation.py +44 -0
  102. maxframe/codegen/spe/learn/utils/validation.py +35 -0
  103. maxframe/codegen/spe/objects.py +26 -0
  104. maxframe/codegen/spe/remote.py +29 -0
  105. maxframe/codegen/spe/tensor/__init__.py +28 -0
  106. maxframe/codegen/spe/tensor/arithmetic.py +95 -0
  107. maxframe/codegen/spe/tensor/core.py +41 -0
  108. maxframe/codegen/spe/tensor/datasource.py +165 -0
  109. maxframe/codegen/spe/tensor/extensions.py +35 -0
  110. maxframe/codegen/spe/tensor/fetch.py +26 -0
  111. maxframe/codegen/spe/tensor/indexing.py +63 -0
  112. maxframe/codegen/spe/tensor/linalg.py +63 -0
  113. maxframe/codegen/spe/tensor/merge.py +31 -0
  114. maxframe/codegen/spe/tensor/misc.py +121 -0
  115. maxframe/codegen/spe/tensor/random.py +29 -0
  116. maxframe/codegen/spe/tensor/reduction.py +39 -0
  117. maxframe/codegen/spe/tensor/reshape.py +26 -0
  118. maxframe/codegen/spe/tensor/sort.py +42 -0
  119. maxframe/codegen/spe/tensor/special.py +35 -0
  120. maxframe/codegen/spe/tensor/statistics.py +24 -0
  121. maxframe/codegen/spe/tensor/tests/__init__.py +13 -0
  122. maxframe/codegen/spe/tensor/tests/test_arithmetic.py +103 -0
  123. maxframe/codegen/spe/tensor/tests/test_datasource.py +99 -0
  124. maxframe/codegen/spe/tensor/tests/test_extensions.py +37 -0
  125. maxframe/codegen/spe/tensor/tests/test_indexing.py +44 -0
  126. maxframe/codegen/spe/tensor/tests/test_linalg.py +38 -0
  127. maxframe/codegen/spe/tensor/tests/test_merge.py +28 -0
  128. maxframe/codegen/spe/tensor/tests/test_misc.py +94 -0
  129. maxframe/codegen/spe/tensor/tests/test_random.py +55 -0
  130. maxframe/codegen/spe/tensor/tests/test_reduction.py +65 -0
  131. maxframe/codegen/spe/tensor/tests/test_reshape.py +39 -0
  132. maxframe/codegen/spe/tensor/tests/test_sort.py +49 -0
  133. maxframe/codegen/spe/tensor/tests/test_special.py +28 -0
  134. maxframe/codegen/spe/tensor/tests/test_statistics.py +29 -0
  135. maxframe/codegen/spe/tests/__init__.py +13 -0
  136. maxframe/codegen/spe/tests/test_remote.py +29 -0
  137. maxframe/codegen/spe/tests/test_spe_codegen.py +141 -0
  138. maxframe/codegen/spe/utils.py +54 -0
  139. maxframe/codegen/tests/__init__.py +13 -0
  140. maxframe/{tests → codegen/tests}/test_codegen.py +3 -5
  141. maxframe/config/__init__.py +1 -1
  142. maxframe/config/config.py +50 -23
  143. maxframe/config/tests/test_config.py +4 -12
  144. maxframe/config/validators.py +5 -0
  145. maxframe/conftest.py +38 -10
  146. maxframe/core/__init__.py +1 -0
  147. maxframe/core/context.py +110 -0
  148. maxframe/core/entity/__init__.py +1 -0
  149. maxframe/core/entity/core.py +0 -7
  150. maxframe/core/entity/objects.py +19 -5
  151. maxframe/core/entity/output_types.py +11 -0
  152. maxframe/core/entity/tests/test_objects.py +11 -12
  153. maxframe/core/entity/tileables.py +3 -1
  154. maxframe/core/entity/utils.py +15 -0
  155. maxframe/core/graph/__init__.py +6 -1
  156. maxframe/core/graph/builder/base.py +5 -1
  157. maxframe/core/graph/core.cp310-win_amd64.pyd +0 -0
  158. maxframe/core/graph/core.pyx +17 -6
  159. maxframe/core/graph/entity.py +18 -6
  160. maxframe/core/operator/__init__.py +8 -3
  161. maxframe/core/operator/base.py +35 -12
  162. maxframe/core/operator/core.py +37 -14
  163. maxframe/core/operator/fetch.py +5 -18
  164. maxframe/core/operator/objects.py +0 -20
  165. maxframe/core/operator/shuffle.py +6 -72
  166. maxframe/dataframe/__init__.py +1 -0
  167. maxframe/dataframe/accessors/datetime_/core.py +7 -4
  168. maxframe/dataframe/accessors/string_/core.py +9 -6
  169. maxframe/dataframe/arithmetic/core.py +31 -20
  170. maxframe/dataframe/arithmetic/tests/test_arithmetic.py +6 -0
  171. maxframe/dataframe/core.py +98 -91
  172. maxframe/dataframe/datasource/core.py +8 -1
  173. maxframe/dataframe/datasource/date_range.py +8 -0
  174. maxframe/dataframe/datasource/from_index.py +9 -5
  175. maxframe/dataframe/datasource/from_records.py +9 -2
  176. maxframe/dataframe/datasource/from_tensor.py +32 -21
  177. maxframe/dataframe/datasource/read_csv.py +8 -2
  178. maxframe/dataframe/datasource/read_odps_query.py +109 -19
  179. maxframe/dataframe/datasource/read_odps_table.py +20 -5
  180. maxframe/dataframe/datasource/read_parquet.py +8 -3
  181. maxframe/dataframe/datasource/tests/test_datasource.py +80 -1
  182. maxframe/dataframe/datastore/tests/test_to_odps.py +52 -1
  183. maxframe/dataframe/datastore/to_csv.py +7 -3
  184. maxframe/dataframe/datastore/to_odps.py +42 -6
  185. maxframe/dataframe/extensions/__init__.py +6 -1
  186. maxframe/dataframe/extensions/apply_chunk.py +96 -136
  187. maxframe/dataframe/extensions/flatjson.py +3 -2
  188. maxframe/dataframe/extensions/flatmap.py +15 -7
  189. maxframe/dataframe/fetch/core.py +12 -1
  190. maxframe/dataframe/groupby/__init__.py +7 -0
  191. maxframe/dataframe/groupby/aggregation.py +62 -9
  192. maxframe/dataframe/groupby/apply.py +50 -74
  193. maxframe/dataframe/groupby/apply_chunk.py +393 -0
  194. maxframe/dataframe/groupby/core.py +80 -17
  195. maxframe/dataframe/groupby/extensions.py +26 -0
  196. maxframe/dataframe/groupby/fill.py +9 -4
  197. maxframe/dataframe/groupby/sample.py +7 -7
  198. maxframe/dataframe/groupby/tests/test_groupby.py +3 -3
  199. maxframe/dataframe/groupby/transform.py +57 -54
  200. maxframe/dataframe/indexing/align.py +7 -6
  201. maxframe/dataframe/indexing/getitem.py +9 -8
  202. maxframe/dataframe/indexing/iloc.py +28 -23
  203. maxframe/dataframe/indexing/insert.py +7 -3
  204. maxframe/dataframe/indexing/loc.py +9 -8
  205. maxframe/dataframe/indexing/reindex.py +36 -30
  206. maxframe/dataframe/indexing/rename_axis.py +18 -10
  207. maxframe/dataframe/indexing/reset_index.py +0 -2
  208. maxframe/dataframe/indexing/sample.py +13 -9
  209. maxframe/dataframe/indexing/set_axis.py +9 -6
  210. maxframe/dataframe/indexing/setitem.py +8 -5
  211. maxframe/dataframe/indexing/where.py +12 -9
  212. maxframe/dataframe/merge/__init__.py +0 -1
  213. maxframe/dataframe/merge/concat.py +10 -31
  214. maxframe/dataframe/merge/merge.py +2 -24
  215. maxframe/dataframe/misc/__init__.py +6 -0
  216. maxframe/dataframe/misc/_duplicate.py +7 -3
  217. maxframe/dataframe/misc/apply.py +106 -139
  218. maxframe/dataframe/misc/astype.py +3 -2
  219. maxframe/dataframe/misc/case_when.py +11 -7
  220. maxframe/dataframe/misc/cut.py +11 -10
  221. maxframe/dataframe/misc/describe.py +7 -3
  222. maxframe/dataframe/misc/drop.py +13 -11
  223. maxframe/dataframe/misc/eval.py +0 -2
  224. maxframe/dataframe/misc/get_dummies.py +78 -49
  225. maxframe/dataframe/misc/isin.py +13 -10
  226. maxframe/dataframe/misc/map.py +21 -6
  227. maxframe/dataframe/misc/melt.py +8 -1
  228. maxframe/dataframe/misc/pivot.py +232 -0
  229. maxframe/dataframe/misc/pivot_table.py +52 -40
  230. maxframe/dataframe/misc/rechunk.py +59 -0
  231. maxframe/dataframe/misc/shift.py +7 -4
  232. maxframe/dataframe/misc/stack.py +5 -3
  233. maxframe/dataframe/misc/tests/test_misc.py +167 -1
  234. maxframe/dataframe/misc/transform.py +63 -65
  235. maxframe/dataframe/misc/value_counts.py +7 -4
  236. maxframe/dataframe/missing/dropna.py +16 -7
  237. maxframe/dataframe/missing/fillna.py +18 -10
  238. maxframe/dataframe/missing/replace.py +10 -6
  239. maxframe/dataframe/missing/tests/test_missing.py +2 -2
  240. maxframe/dataframe/operators.py +1 -27
  241. maxframe/dataframe/reduction/aggregation.py +128 -3
  242. maxframe/dataframe/reduction/core.py +20 -6
  243. maxframe/dataframe/reduction/median.py +1 -1
  244. maxframe/dataframe/reduction/tests/test_reduction.py +33 -0
  245. maxframe/dataframe/reduction/unique.py +53 -7
  246. maxframe/dataframe/statistics/corr.py +9 -6
  247. maxframe/dataframe/statistics/quantile.py +9 -6
  248. maxframe/dataframe/tseries/to_datetime.py +6 -4
  249. maxframe/dataframe/utils.py +219 -31
  250. maxframe/dataframe/window/rolling.py +7 -4
  251. maxframe/env.py +1 -0
  252. maxframe/errors.py +9 -0
  253. maxframe/extension.py +13 -2
  254. maxframe/io/objects/core.py +67 -51
  255. maxframe/io/objects/tensor.py +73 -17
  256. maxframe/io/objects/tests/test_object_io.py +10 -55
  257. maxframe/io/odpsio/arrow.py +15 -2
  258. maxframe/io/odpsio/schema.py +43 -13
  259. maxframe/io/odpsio/tableio.py +63 -11
  260. maxframe/io/odpsio/tests/test_arrow.py +1 -2
  261. maxframe/io/odpsio/tests/test_schema.py +114 -1
  262. maxframe/io/odpsio/tests/test_tableio.py +42 -0
  263. maxframe/io/odpsio/tests/test_volumeio.py +21 -58
  264. maxframe/io/odpsio/volumeio.py +23 -8
  265. maxframe/learn/__init__.py +2 -2
  266. maxframe/learn/contrib/__init__.py +2 -2
  267. maxframe/learn/contrib/graph/connected_components.py +2 -1
  268. maxframe/learn/contrib/lightgbm/__init__.py +33 -0
  269. maxframe/learn/contrib/lightgbm/_predict.py +138 -0
  270. maxframe/learn/contrib/lightgbm/_train.py +163 -0
  271. maxframe/learn/contrib/lightgbm/callback.py +114 -0
  272. maxframe/learn/contrib/lightgbm/classifier.py +199 -0
  273. maxframe/learn/contrib/lightgbm/core.py +372 -0
  274. maxframe/learn/contrib/lightgbm/dataset.py +153 -0
  275. maxframe/learn/contrib/lightgbm/regressor.py +29 -0
  276. maxframe/learn/contrib/lightgbm/tests/__init__.py +13 -0
  277. maxframe/learn/contrib/lightgbm/tests/test_callback.py +58 -0
  278. maxframe/learn/contrib/llm/models/dashscope.py +34 -0
  279. maxframe/learn/contrib/llm/models/managed.py +15 -0
  280. maxframe/learn/contrib/llm/multi_modal.py +92 -0
  281. maxframe/learn/contrib/llm/text.py +21 -5
  282. maxframe/learn/contrib/models.py +38 -9
  283. maxframe/learn/contrib/utils.py +55 -0
  284. maxframe/learn/contrib/xgboost/callback.py +86 -0
  285. maxframe/learn/contrib/xgboost/classifier.py +26 -30
  286. maxframe/learn/contrib/xgboost/core.py +54 -42
  287. maxframe/learn/contrib/xgboost/dmatrix.py +19 -12
  288. maxframe/learn/contrib/xgboost/predict.py +13 -8
  289. maxframe/learn/contrib/xgboost/regressor.py +28 -27
  290. maxframe/learn/contrib/xgboost/tests/test_callback.py +41 -0
  291. maxframe/learn/contrib/xgboost/train.py +59 -16
  292. maxframe/learn/core.py +252 -0
  293. maxframe/learn/datasets/__init__.py +20 -0
  294. maxframe/learn/datasets/samples_generator.py +628 -0
  295. maxframe/learn/linear_model/__init__.py +15 -0
  296. maxframe/learn/linear_model/_base.py +163 -0
  297. maxframe/learn/linear_model/_lin_reg.py +175 -0
  298. maxframe/learn/metrics/__init__.py +25 -0
  299. maxframe/learn/metrics/_check_targets.py +95 -0
  300. maxframe/learn/metrics/_classification.py +1121 -0
  301. maxframe/learn/metrics/_regression.py +256 -0
  302. maxframe/learn/model_selection/__init__.py +15 -0
  303. maxframe/learn/model_selection/_split.py +451 -0
  304. maxframe/learn/model_selection/tests/__init__.py +13 -0
  305. maxframe/learn/model_selection/tests/test_split.py +156 -0
  306. maxframe/learn/preprocessing/__init__.py +16 -0
  307. maxframe/learn/preprocessing/_data/__init__.py +17 -0
  308. maxframe/learn/preprocessing/_data/min_max_scaler.py +390 -0
  309. maxframe/learn/preprocessing/_data/normalize.py +127 -0
  310. maxframe/learn/preprocessing/_data/standard_scaler.py +503 -0
  311. maxframe/learn/preprocessing/_data/utils.py +79 -0
  312. maxframe/learn/preprocessing/_label/__init__.py +16 -0
  313. maxframe/learn/preprocessing/_label/_label_binarizer.py +599 -0
  314. maxframe/learn/preprocessing/_label/_label_encoder.py +174 -0
  315. maxframe/learn/utils/__init__.py +4 -0
  316. maxframe/learn/utils/_encode.py +314 -0
  317. maxframe/learn/utils/checks.py +161 -0
  318. maxframe/learn/utils/core.py +33 -0
  319. maxframe/learn/utils/extmath.py +176 -0
  320. maxframe/learn/utils/multiclass.py +292 -0
  321. maxframe/learn/utils/shuffle.py +114 -0
  322. maxframe/learn/utils/sparsefuncs.py +87 -0
  323. maxframe/learn/utils/validation.py +775 -0
  324. maxframe/lib/__init__.py +0 -2
  325. maxframe/lib/compat.py +145 -0
  326. maxframe/lib/filesystem/_oss_lib/glob.py +1 -1
  327. maxframe/lib/mmh3.cp310-win_amd64.pyd +0 -0
  328. maxframe/lib/sparse/__init__.py +10 -15
  329. maxframe/lib/sparse/array.py +45 -33
  330. maxframe/lib/sparse/core.py +0 -2
  331. maxframe/lib/sparse/linalg.py +31 -0
  332. maxframe/lib/sparse/matrix.py +5 -2
  333. maxframe/lib/sparse/tests/__init__.py +0 -2
  334. maxframe/lib/sparse/tests/test_sparse.py +53 -53
  335. maxframe/lib/sparse/vector.py +0 -2
  336. maxframe/mixin.py +59 -2
  337. maxframe/opcodes.py +13 -5
  338. maxframe/protocol.py +67 -14
  339. maxframe/remote/core.py +16 -14
  340. maxframe/remote/run_script.py +6 -3
  341. maxframe/serialization/__init__.py +2 -0
  342. maxframe/serialization/core.cp310-win_amd64.pyd +0 -0
  343. maxframe/serialization/core.pxd +3 -0
  344. maxframe/serialization/core.pyi +3 -1
  345. maxframe/serialization/core.pyx +82 -4
  346. maxframe/serialization/pandas.py +5 -1
  347. maxframe/serialization/serializables/core.py +6 -5
  348. maxframe/serialization/serializables/field.py +2 -2
  349. maxframe/serialization/serializables/tests/test_field_type.py +3 -5
  350. maxframe/serialization/tests/test_serial.py +27 -0
  351. maxframe/session.py +4 -71
  352. maxframe/sperunner.py +165 -0
  353. maxframe/tensor/__init__.py +35 -2
  354. maxframe/tensor/arithmetic/__init__.py +2 -4
  355. maxframe/tensor/arithmetic/abs.py +0 -2
  356. maxframe/tensor/arithmetic/absolute.py +0 -2
  357. maxframe/tensor/arithmetic/add.py +34 -4
  358. maxframe/tensor/arithmetic/angle.py +0 -2
  359. maxframe/tensor/arithmetic/arccos.py +1 -4
  360. maxframe/tensor/arithmetic/arccosh.py +1 -3
  361. maxframe/tensor/arithmetic/arcsin.py +0 -2
  362. maxframe/tensor/arithmetic/arcsinh.py +0 -2
  363. maxframe/tensor/arithmetic/arctan.py +0 -2
  364. maxframe/tensor/arithmetic/arctan2.py +0 -2
  365. maxframe/tensor/arithmetic/arctanh.py +0 -2
  366. maxframe/tensor/arithmetic/around.py +0 -2
  367. maxframe/tensor/arithmetic/bitand.py +0 -2
  368. maxframe/tensor/arithmetic/bitor.py +1 -3
  369. maxframe/tensor/arithmetic/bitxor.py +1 -3
  370. maxframe/tensor/arithmetic/cbrt.py +0 -2
  371. maxframe/tensor/arithmetic/ceil.py +0 -2
  372. maxframe/tensor/arithmetic/clip.py +13 -13
  373. maxframe/tensor/arithmetic/conj.py +0 -2
  374. maxframe/tensor/arithmetic/copysign.py +0 -2
  375. maxframe/tensor/arithmetic/core.py +47 -39
  376. maxframe/tensor/arithmetic/cos.py +1 -3
  377. maxframe/tensor/arithmetic/cosh.py +0 -2
  378. maxframe/tensor/arithmetic/deg2rad.py +0 -2
  379. maxframe/tensor/arithmetic/degrees.py +0 -2
  380. maxframe/tensor/arithmetic/divide.py +0 -2
  381. maxframe/tensor/arithmetic/equal.py +0 -2
  382. maxframe/tensor/arithmetic/exp.py +1 -3
  383. maxframe/tensor/arithmetic/exp2.py +0 -2
  384. maxframe/tensor/arithmetic/expm1.py +0 -2
  385. maxframe/tensor/arithmetic/fabs.py +0 -2
  386. maxframe/tensor/arithmetic/fix.py +0 -2
  387. maxframe/tensor/arithmetic/float_power.py +0 -2
  388. maxframe/tensor/arithmetic/floor.py +0 -2
  389. maxframe/tensor/arithmetic/floordiv.py +0 -2
  390. maxframe/tensor/arithmetic/fmax.py +0 -2
  391. maxframe/tensor/arithmetic/fmin.py +0 -2
  392. maxframe/tensor/arithmetic/fmod.py +0 -2
  393. maxframe/tensor/arithmetic/frexp.py +6 -2
  394. maxframe/tensor/arithmetic/greater.py +0 -2
  395. maxframe/tensor/arithmetic/greater_equal.py +0 -2
  396. maxframe/tensor/arithmetic/hypot.py +0 -2
  397. maxframe/tensor/arithmetic/i0.py +1 -3
  398. maxframe/tensor/arithmetic/imag.py +0 -2
  399. maxframe/tensor/arithmetic/invert.py +1 -3
  400. maxframe/tensor/arithmetic/isclose.py +0 -2
  401. maxframe/tensor/arithmetic/iscomplex.py +0 -2
  402. maxframe/tensor/arithmetic/isfinite.py +1 -3
  403. maxframe/tensor/arithmetic/isinf.py +0 -2
  404. maxframe/tensor/arithmetic/isnan.py +0 -2
  405. maxframe/tensor/arithmetic/isreal.py +0 -2
  406. maxframe/tensor/arithmetic/ldexp.py +0 -2
  407. maxframe/tensor/arithmetic/less.py +0 -2
  408. maxframe/tensor/arithmetic/less_equal.py +0 -2
  409. maxframe/tensor/arithmetic/log.py +1 -3
  410. maxframe/tensor/arithmetic/log10.py +1 -3
  411. maxframe/tensor/arithmetic/log1p.py +1 -3
  412. maxframe/tensor/arithmetic/log2.py +1 -3
  413. maxframe/tensor/arithmetic/logaddexp.py +0 -2
  414. maxframe/tensor/arithmetic/logaddexp2.py +0 -2
  415. maxframe/tensor/arithmetic/logical_and.py +0 -2
  416. maxframe/tensor/arithmetic/logical_not.py +1 -3
  417. maxframe/tensor/arithmetic/logical_or.py +0 -2
  418. maxframe/tensor/arithmetic/logical_xor.py +0 -2
  419. maxframe/tensor/arithmetic/lshift.py +0 -2
  420. maxframe/tensor/arithmetic/maximum.py +0 -2
  421. maxframe/tensor/arithmetic/minimum.py +0 -2
  422. maxframe/tensor/arithmetic/mod.py +0 -2
  423. maxframe/tensor/arithmetic/modf.py +6 -2
  424. maxframe/tensor/arithmetic/multiply.py +37 -4
  425. maxframe/tensor/arithmetic/nan_to_num.py +0 -2
  426. maxframe/tensor/arithmetic/negative.py +0 -2
  427. maxframe/tensor/arithmetic/nextafter.py +0 -2
  428. maxframe/tensor/arithmetic/not_equal.py +0 -2
  429. maxframe/tensor/arithmetic/positive.py +0 -2
  430. maxframe/tensor/arithmetic/power.py +0 -2
  431. maxframe/tensor/arithmetic/rad2deg.py +0 -2
  432. maxframe/tensor/arithmetic/radians.py +0 -2
  433. maxframe/tensor/arithmetic/real.py +0 -2
  434. maxframe/tensor/arithmetic/reciprocal.py +5 -3
  435. maxframe/tensor/arithmetic/rint.py +1 -3
  436. maxframe/tensor/arithmetic/rshift.py +0 -2
  437. maxframe/tensor/arithmetic/setimag.py +0 -2
  438. maxframe/tensor/arithmetic/setreal.py +0 -2
  439. maxframe/tensor/arithmetic/sign.py +0 -2
  440. maxframe/tensor/arithmetic/signbit.py +0 -2
  441. maxframe/tensor/arithmetic/sin.py +0 -2
  442. maxframe/tensor/arithmetic/sinc.py +1 -3
  443. maxframe/tensor/arithmetic/sinh.py +0 -2
  444. maxframe/tensor/arithmetic/spacing.py +0 -2
  445. maxframe/tensor/arithmetic/sqrt.py +0 -2
  446. maxframe/tensor/arithmetic/square.py +0 -2
  447. maxframe/tensor/arithmetic/subtract.py +4 -2
  448. maxframe/tensor/arithmetic/tan.py +0 -2
  449. maxframe/tensor/arithmetic/tanh.py +0 -2
  450. maxframe/tensor/arithmetic/tests/__init__.py +0 -2
  451. maxframe/tensor/arithmetic/tests/test_arithmetic.py +43 -9
  452. maxframe/tensor/arithmetic/truediv.py +0 -2
  453. maxframe/tensor/arithmetic/trunc.py +0 -2
  454. maxframe/tensor/arithmetic/utils.py +32 -6
  455. maxframe/tensor/array_utils.py +3 -25
  456. maxframe/tensor/core.py +6 -6
  457. maxframe/tensor/datasource/__init__.py +10 -2
  458. maxframe/tensor/datasource/arange.py +0 -2
  459. maxframe/tensor/datasource/array.py +3 -22
  460. maxframe/tensor/datasource/core.py +15 -10
  461. maxframe/tensor/datasource/diag.py +140 -0
  462. maxframe/tensor/datasource/diagflat.py +69 -0
  463. maxframe/tensor/datasource/empty.py +0 -2
  464. maxframe/tensor/datasource/eye.py +95 -0
  465. maxframe/tensor/datasource/from_dataframe.py +0 -2
  466. maxframe/tensor/datasource/from_dense.py +0 -17
  467. maxframe/tensor/datasource/from_sparse.py +0 -2
  468. maxframe/tensor/datasource/full.py +0 -2
  469. maxframe/tensor/datasource/identity.py +54 -0
  470. maxframe/tensor/datasource/indices.py +115 -0
  471. maxframe/tensor/datasource/linspace.py +140 -0
  472. maxframe/tensor/datasource/meshgrid.py +135 -0
  473. maxframe/tensor/datasource/ones.py +8 -3
  474. maxframe/tensor/datasource/tests/test_datasource.py +32 -1
  475. maxframe/tensor/datasource/tri_array.py +107 -0
  476. maxframe/tensor/datasource/zeros.py +7 -3
  477. maxframe/tensor/extensions/__init__.py +31 -0
  478. maxframe/tensor/extensions/accessor.py +25 -0
  479. maxframe/tensor/extensions/apply_chunk.py +137 -0
  480. maxframe/tensor/indexing/__init__.py +1 -1
  481. maxframe/tensor/indexing/choose.py +8 -6
  482. maxframe/tensor/indexing/compress.py +0 -2
  483. maxframe/tensor/indexing/extract.py +0 -2
  484. maxframe/tensor/indexing/fill_diagonal.py +9 -6
  485. maxframe/tensor/indexing/flatnonzero.py +1 -3
  486. maxframe/tensor/indexing/getitem.py +10 -43
  487. maxframe/tensor/indexing/nonzero.py +2 -4
  488. maxframe/tensor/indexing/setitem.py +19 -9
  489. maxframe/tensor/indexing/slice.py +6 -3
  490. maxframe/tensor/indexing/take.py +0 -2
  491. maxframe/tensor/indexing/tests/__init__.py +0 -2
  492. maxframe/tensor/indexing/tests/test_indexing.py +0 -2
  493. maxframe/tensor/indexing/unravel_index.py +6 -6
  494. maxframe/tensor/lib/__init__.py +16 -0
  495. maxframe/tensor/lib/index_tricks.py +404 -0
  496. maxframe/tensor/linalg/__init__.py +36 -0
  497. maxframe/tensor/linalg/dot.py +145 -0
  498. maxframe/tensor/linalg/inner.py +36 -0
  499. maxframe/tensor/linalg/inv.py +83 -0
  500. maxframe/tensor/linalg/lu.py +115 -0
  501. maxframe/tensor/linalg/matmul.py +225 -0
  502. maxframe/tensor/linalg/qr.py +124 -0
  503. maxframe/tensor/linalg/solve_triangular.py +103 -0
  504. maxframe/tensor/linalg/svd.py +167 -0
  505. maxframe/tensor/linalg/tensordot.py +213 -0
  506. maxframe/tensor/linalg/vdot.py +73 -0
  507. maxframe/tensor/merge/__init__.py +4 -0
  508. maxframe/tensor/merge/append.py +74 -0
  509. maxframe/tensor/merge/column_stack.py +63 -0
  510. maxframe/tensor/merge/concatenate.py +3 -2
  511. maxframe/tensor/merge/dstack.py +71 -0
  512. maxframe/tensor/merge/hstack.py +70 -0
  513. maxframe/tensor/merge/stack.py +0 -2
  514. maxframe/tensor/merge/tests/test_merge.py +0 -2
  515. maxframe/tensor/misc/__init__.py +18 -5
  516. maxframe/tensor/misc/astype.py +10 -8
  517. maxframe/tensor/misc/broadcast_to.py +1 -1
  518. maxframe/tensor/misc/copy.py +64 -0
  519. maxframe/tensor/misc/diff.py +115 -0
  520. maxframe/tensor/misc/flatten.py +63 -0
  521. maxframe/tensor/misc/in1d.py +94 -0
  522. maxframe/tensor/misc/isin.py +130 -0
  523. maxframe/tensor/misc/ndim.py +53 -0
  524. maxframe/tensor/misc/ravel.py +0 -2
  525. maxframe/tensor/misc/repeat.py +129 -0
  526. maxframe/tensor/misc/searchsorted.py +147 -0
  527. maxframe/tensor/misc/setdiff1d.py +58 -0
  528. maxframe/tensor/misc/squeeze.py +117 -0
  529. maxframe/tensor/misc/swapaxes.py +113 -0
  530. maxframe/tensor/misc/tests/test_misc.py +0 -2
  531. maxframe/tensor/misc/transpose.py +8 -4
  532. maxframe/tensor/misc/trapezoid.py +123 -0
  533. maxframe/tensor/misc/unique.py +0 -1
  534. maxframe/tensor/misc/where.py +10 -8
  535. maxframe/tensor/operators.py +0 -34
  536. maxframe/tensor/random/__init__.py +3 -5
  537. maxframe/tensor/random/binomial.py +0 -2
  538. maxframe/tensor/random/bytes.py +0 -2
  539. maxframe/tensor/random/chisquare.py +0 -2
  540. maxframe/tensor/random/choice.py +9 -8
  541. maxframe/tensor/random/core.py +20 -5
  542. maxframe/tensor/random/dirichlet.py +0 -2
  543. maxframe/tensor/random/exponential.py +0 -2
  544. maxframe/tensor/random/f.py +2 -4
  545. maxframe/tensor/random/gamma.py +0 -2
  546. maxframe/tensor/random/geometric.py +0 -2
  547. maxframe/tensor/random/gumbel.py +0 -2
  548. maxframe/tensor/random/hypergeometric.py +0 -2
  549. maxframe/tensor/random/laplace.py +2 -4
  550. maxframe/tensor/random/logistic.py +0 -2
  551. maxframe/tensor/random/lognormal.py +0 -2
  552. maxframe/tensor/random/logseries.py +0 -2
  553. maxframe/tensor/random/multinomial.py +0 -2
  554. maxframe/tensor/random/multivariate_normal.py +0 -2
  555. maxframe/tensor/random/negative_binomial.py +0 -2
  556. maxframe/tensor/random/noncentral_chisquare.py +0 -2
  557. maxframe/tensor/random/noncentral_f.py +1 -3
  558. maxframe/tensor/random/normal.py +0 -2
  559. maxframe/tensor/random/pareto.py +0 -2
  560. maxframe/tensor/random/permutation.py +6 -3
  561. maxframe/tensor/random/poisson.py +0 -2
  562. maxframe/tensor/random/power.py +0 -2
  563. maxframe/tensor/random/rand.py +0 -2
  564. maxframe/tensor/random/randint.py +0 -2
  565. maxframe/tensor/random/randn.py +0 -2
  566. maxframe/tensor/random/random_integers.py +0 -2
  567. maxframe/tensor/random/random_sample.py +0 -2
  568. maxframe/tensor/random/rayleigh.py +0 -2
  569. maxframe/tensor/random/standard_cauchy.py +0 -2
  570. maxframe/tensor/random/standard_exponential.py +0 -2
  571. maxframe/tensor/random/standard_gamma.py +0 -2
  572. maxframe/tensor/random/standard_normal.py +0 -2
  573. maxframe/tensor/random/standard_t.py +0 -2
  574. maxframe/tensor/random/tests/__init__.py +0 -2
  575. maxframe/tensor/random/tests/test_random.py +0 -2
  576. maxframe/tensor/random/triangular.py +0 -2
  577. maxframe/tensor/random/uniform.py +0 -2
  578. maxframe/tensor/random/vonmises.py +0 -2
  579. maxframe/tensor/random/wald.py +0 -2
  580. maxframe/tensor/random/weibull.py +0 -2
  581. maxframe/tensor/random/zipf.py +0 -2
  582. maxframe/tensor/reduction/__init__.py +0 -2
  583. maxframe/tensor/reduction/all.py +0 -2
  584. maxframe/tensor/reduction/allclose.py +0 -2
  585. maxframe/tensor/reduction/any.py +0 -2
  586. maxframe/tensor/reduction/argmax.py +1 -3
  587. maxframe/tensor/reduction/argmin.py +1 -3
  588. maxframe/tensor/reduction/array_equal.py +0 -2
  589. maxframe/tensor/reduction/core.py +0 -2
  590. maxframe/tensor/reduction/count_nonzero.py +0 -2
  591. maxframe/tensor/reduction/cumprod.py +0 -2
  592. maxframe/tensor/reduction/cumsum.py +0 -2
  593. maxframe/tensor/reduction/max.py +0 -2
  594. maxframe/tensor/reduction/mean.py +0 -2
  595. maxframe/tensor/reduction/min.py +0 -2
  596. maxframe/tensor/reduction/nanargmax.py +0 -2
  597. maxframe/tensor/reduction/nanargmin.py +0 -2
  598. maxframe/tensor/reduction/nancumprod.py +0 -2
  599. maxframe/tensor/reduction/nancumsum.py +0 -2
  600. maxframe/tensor/reduction/nanmax.py +0 -2
  601. maxframe/tensor/reduction/nanmean.py +0 -2
  602. maxframe/tensor/reduction/nanmin.py +0 -2
  603. maxframe/tensor/reduction/nanprod.py +0 -2
  604. maxframe/tensor/reduction/nanstd.py +0 -2
  605. maxframe/tensor/reduction/nansum.py +0 -2
  606. maxframe/tensor/reduction/nanvar.py +0 -2
  607. maxframe/tensor/reduction/prod.py +0 -2
  608. maxframe/tensor/reduction/std.py +0 -2
  609. maxframe/tensor/reduction/sum.py +0 -2
  610. maxframe/tensor/reduction/tests/test_reduction.py +1 -4
  611. maxframe/tensor/reduction/var.py +0 -2
  612. maxframe/tensor/reshape/__init__.py +0 -2
  613. maxframe/tensor/reshape/reshape.py +6 -5
  614. maxframe/tensor/reshape/tests/__init__.py +0 -2
  615. maxframe/tensor/reshape/tests/test_reshape.py +0 -2
  616. maxframe/tensor/sort/__init__.py +16 -0
  617. maxframe/tensor/sort/argsort.py +150 -0
  618. maxframe/tensor/sort/sort.py +295 -0
  619. maxframe/tensor/special/__init__.py +37 -0
  620. maxframe/tensor/special/core.py +38 -0
  621. maxframe/tensor/special/misc.py +142 -0
  622. maxframe/tensor/special/statistical.py +56 -0
  623. maxframe/tensor/statistics/__init__.py +5 -0
  624. maxframe/tensor/statistics/average.py +143 -0
  625. maxframe/tensor/statistics/bincount.py +133 -0
  626. maxframe/tensor/statistics/quantile.py +10 -8
  627. maxframe/tensor/ufunc/__init__.py +0 -2
  628. maxframe/tensor/ufunc/ufunc.py +0 -2
  629. maxframe/tensor/utils.py +21 -3
  630. maxframe/tests/test_protocol.py +3 -3
  631. maxframe/tests/test_utils.py +210 -1
  632. maxframe/tests/utils.py +59 -1
  633. maxframe/udf.py +76 -6
  634. maxframe/utils.py +418 -17
  635. {maxframe-1.3.0.dist-info → maxframe-2.0.0.dist-info}/METADATA +5 -1
  636. maxframe-2.0.0.dist-info/RECORD +939 -0
  637. {maxframe-1.3.0.dist-info → maxframe-2.0.0.dist-info}/WHEEL +1 -1
  638. maxframe_client/clients/framedriver.py +19 -3
  639. maxframe_client/fetcher.py +113 -6
  640. maxframe_client/session/odps.py +173 -38
  641. maxframe_client/session/task.py +3 -1
  642. maxframe_client/tests/test_session.py +41 -5
  643. maxframe-1.3.0.dist-info/RECORD +0 -705
  644. {maxframe-1.3.0.dist-info → maxframe-2.0.0.dist-info}/top_level.txt +0 -0
@@ -12,7 +12,10 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
+ from typing import List
16
+
15
17
  from ... import opcodes
18
+ from ...core import EntityData
16
19
  from ...serialization.serializables import AnyField, Int32Field
17
20
  from ...utils import no_default
18
21
  from ..operators import SERIES_TYPE, DataFrameOperator, DataFrameOperatorMixin
@@ -28,14 +31,15 @@ class DataFrameReplace(DataFrameOperator, DataFrameOperatorMixin):
28
31
  regex = AnyField("regex", default=None)
29
32
  method = AnyField("method", default=no_default)
30
33
 
31
- def _set_inputs(self, inputs):
32
- super()._set_inputs(inputs)
34
+ @classmethod
35
+ def _set_inputs(cls, op: "DataFrameReplace", inputs: List[EntityData]):
36
+ super()._set_inputs(op, inputs)
33
37
  input_iter = iter(inputs)
34
38
  next(input_iter)
35
- if isinstance(self.to_replace, SERIES_TYPE):
36
- self.to_replace = next(input_iter)
37
- if isinstance(self.value, SERIES_TYPE):
38
- self.value = next(input_iter)
39
+ if isinstance(op.to_replace, SERIES_TYPE):
40
+ op.to_replace = next(input_iter)
41
+ if isinstance(op.value, SERIES_TYPE):
42
+ op.value = next(input_iter)
39
43
 
40
44
  def __call__(self, df_or_series):
41
45
  inputs = [df_or_series]
@@ -64,7 +64,7 @@ def test_drop_na():
64
64
  df_raw = pd.DataFrame(np.nan, index=range(0, 20), columns=list("ABCDEFGHIJ"))
65
65
  for _ in range(30):
66
66
  df_raw.iloc[random.randint(0, 19), random.randint(0, 9)] = random.randint(0, 99)
67
- for rowid in range(random.randint(1, 5)):
67
+ for _ in range(random.randint(1, 5)):
68
68
  row = random.randint(0, 19)
69
69
  for idx in range(0, 10):
70
70
  df_raw.iloc[row, idx] = random.randint(0, 99)
@@ -79,7 +79,7 @@ def test_replace():
79
79
  df_raw = pd.DataFrame(-1, index=range(0, 20), columns=list("ABCDEFGHIJ"))
80
80
  for _ in range(30):
81
81
  df_raw.iloc[random.randint(0, 19), random.randint(0, 9)] = random.randint(0, 99)
82
- for rowid in range(random.randint(1, 5)):
82
+ for _ in range(random.randint(1, 5)):
83
83
  row = random.randint(0, 19)
84
84
  for idx in range(0, 10):
85
85
  df_raw.iloc[row, idx] = random.randint(0, 99)
@@ -15,7 +15,7 @@
15
15
  import numpy as np
16
16
  import pandas as pd
17
17
 
18
- from ..core import ENTITY_TYPE, OutputType
18
+ from ..core import OutputType
19
19
  from ..core.operator import Operator, ShuffleProxy, TileableOperatorMixin
20
20
  from ..tensor.core import TENSOR_TYPE
21
21
  from ..tensor.datasource import tensor as astensor
@@ -208,32 +208,6 @@ class DataFrameOperatorMixin(TileableOperatorMixin):
208
208
  inputs, shape=shape, dtype=dtype, categories_value=categories_value, **kw
209
209
  )[0]
210
210
 
211
- @classmethod
212
- def _process_groupby_params(cls, groupby_params):
213
- new_groupby_params = groupby_params.copy()
214
- if isinstance(groupby_params["by"], list):
215
- by = []
216
- for v in groupby_params["by"]:
217
- if isinstance(v, ENTITY_TYPE):
218
- by.append(cls.concat_tileable_chunks(v).chunks[0])
219
- else:
220
- by.append(v)
221
- new_groupby_params["by"] = by
222
- return new_groupby_params
223
-
224
- @classmethod
225
- def _get_groupby_inputs(cls, groupby, groupby_params):
226
- inputs = [groupby]
227
- chunk_inputs = list(groupby.chunks)
228
- if isinstance(groupby_params["by"], list):
229
- for chunk_v, v in zip(
230
- groupby_params["by"], groupby.op.groupby_params["by"]
231
- ):
232
- if isinstance(v, ENTITY_TYPE):
233
- inputs.append(v)
234
- chunk_inputs.append(chunk_v)
235
- return inputs, chunk_inputs
236
-
237
211
  @staticmethod
238
212
  def _process_input(x):
239
213
  from .initializer import DataFrame, Series
@@ -14,6 +14,7 @@
14
14
 
15
15
  import copy
16
16
  import functools
17
+ import inspect
17
18
  import itertools
18
19
  from collections import OrderedDict
19
20
  from collections.abc import Iterable
@@ -21,10 +22,13 @@ from typing import List
21
22
 
22
23
  import numpy as np
23
24
  import pandas as pd
25
+ import pyarrow as pa
24
26
 
25
27
  from ... import opcodes
26
28
  from ... import tensor as maxframe_tensor
27
29
  from ...core import ENTITY_TYPE, OutputType, enter_mode
30
+ from ...io.odpsio.schema import pandas_dtype_to_arrow_type
31
+ from ...lib.dtypes_extension import ArrowDtype
28
32
  from ...serialization.serializables import AnyField, BoolField, DictField, ListField
29
33
  from ...typing_ import TileableType
30
34
  from ...utils import lazy_import, pd_release_version
@@ -37,6 +41,7 @@ from .core import (
37
41
  ReductionPostStep,
38
42
  ReductionPreStep,
39
43
  )
44
+ from .unique import _unique
40
45
 
41
46
  cp = lazy_import("cupy", rename="cp")
42
47
  cudf = lazy_import("cudf")
@@ -71,6 +76,7 @@ _agg_functions = {
71
76
  "kurt": lambda x, skipna=True, bias=False: x.kurt(skipna=skipna, bias=bias),
72
77
  "kurtosis": lambda x, skipna=True, bias=False: x.kurtosis(skipna=skipna, bias=bias),
73
78
  "nunique": lambda x: x.nunique(),
79
+ "unique": lambda x: _unique(x, output_list_scalar=True),
74
80
  "median": lambda x, skipna=True: x.median(skipna=skipna),
75
81
  }
76
82
 
@@ -99,6 +105,46 @@ class DataFrameAggregate(DataFrameOperator, DataFrameOperatorMixin):
99
105
  [np.number, np.bool_] if op.numeric_only else [np.bool_]
100
106
  ).dtypes
101
107
 
108
+ def _fill_df_dtypes(self, in_df, dtypes):
109
+ if all(dt != np.dtype("O") for dt in dtypes):
110
+ return dtypes
111
+
112
+ if isinstance(self.func, dict):
113
+ col_func_it = self.func.items()
114
+ else:
115
+ assert in_df.ndim == 2
116
+ col_func_it = itertools.product(in_df.dtypes.index, self.func)
117
+
118
+ col_to_dt = dict(in_df.dtypes.items())
119
+
120
+ new_dt = OrderedDict()
121
+ for (col_name, func), (out_col_name, dt) in zip(col_func_it, dtypes.items()):
122
+ if dt != np.dtype("O"):
123
+ new_dt[out_col_name] = dt
124
+ elif func == "unique":
125
+ in_dt = col_to_dt[col_name]
126
+ if in_dt == np.dtype("O"):
127
+ in_dt = pd.StringDtype()
128
+ arrow_dt = pandas_dtype_to_arrow_type(in_dt)
129
+ new_dt[out_col_name] = ArrowDtype(pa.list_(arrow_dt))
130
+ else:
131
+ # do nothing as the result might be string
132
+ new_dt[out_col_name] = dt
133
+ return pd.Series(list(new_dt.values()), index=new_dt.keys())
134
+
135
+ def _fill_series_dtype(self, in_data, dtype):
136
+ if len(self.func) != 1 or dtype != np.dtype("O") or in_data.ndim > 1:
137
+ return dtype
138
+
139
+ if self.func[0] == "unique":
140
+ in_dt = in_data.dtype
141
+ if in_dt == np.dtype("O"):
142
+ in_dt = pd.StringDtype()
143
+ arrow_dt = pandas_dtype_to_arrow_type(in_dt)
144
+ return ArrowDtype(pa.list_(arrow_dt))
145
+ else:
146
+ return dtype
147
+
102
148
  def _calc_result_shape(self, df):
103
149
  if df.ndim == 2:
104
150
  if self.numeric_only:
@@ -114,16 +160,23 @@ class DataFrameAggregate(DataFrameOperator, DataFrameOperatorMixin):
114
160
  )
115
161
 
116
162
  result_df = test_obj.agg(self.raw_func, axis=self.axis, **self.raw_func_kw)
163
+ if isinstance(result_df, pd.DataFrame):
164
+ out_dtypes = self._fill_df_dtypes(df, result_df.dtypes)
165
+ elif isinstance(result_df, pd.Series):
166
+ dtype = self._fill_series_dtype(df, result_df.dtype)
167
+ out_dtypes = pd.Series([dtype], index=[result_df.name])
168
+ else:
169
+ out_dtypes = pd.Series([np.array(result_df).dtype], index=[None])
117
170
 
118
171
  if isinstance(result_df, pd.DataFrame):
119
172
  self.output_types = [OutputType.dataframe]
120
- return result_df.dtypes, result_df.index
173
+ return out_dtypes, result_df.index
121
174
  elif isinstance(result_df, pd.Series):
122
175
  self.output_types = [OutputType.series]
123
- return pd.Series([result_df.dtype], index=[result_df.name]), result_df.index
176
+ return out_dtypes, result_df.index
124
177
  else:
125
178
  self.output_types = [OutputType.scalar]
126
- return np.array(result_df).dtype, None
179
+ return out_dtypes.iloc[0], None
127
180
 
128
181
  def __call__(self, df, output_type=None, dtypes=None, index=None):
129
182
  self._output_types = df.op.output_types
@@ -315,7 +368,79 @@ def compile_reduction_funcs(op: DataFrameAggregate, input: TileableType):
315
368
 
316
369
 
317
370
  def aggregate(df, func=None, axis=0, **kw):
371
+ """
372
+ Aggregate using one or more operations over the specified axis.
373
+
374
+ Parameters
375
+ ----------
376
+ df : DataFrame, Series
377
+ Object to aggregate.
378
+ func : list or dict
379
+ Function to use for aggregating the data.
380
+ axis : {0 or ‘index’, 1 or ‘columns’}, default 0
381
+ If 0 or ‘index’: apply function to each column. If 1 or ‘columns’: apply function to each row.
382
+ kw
383
+ Keyword arguments to pass to func.
384
+
385
+ Returns
386
+ -------
387
+ scalar, Series or DataFrame
388
+ The return can be:
389
+
390
+ * scalar : when Series.agg is called with single function
391
+ * Series : when DataFrame.agg is called with a single function
392
+ * DataFrame : when DataFrame.agg is called with several functions
393
+
394
+ Examples
395
+ --------
396
+ >>> import maxframe.dataframe as md
397
+ >>> df = md.DataFrame([[1, 2, 3],
398
+ ... [4, 5, 6],
399
+ ... [7, 8, 9],
400
+ ... [np.nan, np.nan, np.nan]],
401
+ ... columns=['A', 'B', 'C']).execute()
402
+
403
+ Aggregate these functions over the rows.
404
+
405
+ >>> df.agg(['sum', 'min']).execute()
406
+ A B C
407
+ min 1.0 2.0 3.0
408
+ sum 12.0 15.0 18.0
409
+
410
+ Different aggregations per column.
411
+
412
+ >>> df.agg({'A' : ['sum', 'min'], 'B' : ['min', 'max']}).execute()
413
+ A B
414
+ max NaN 8.0
415
+ min 1.0 2.0
416
+ sum 12.0 NaN
417
+
418
+ Aggregate different functions over the columns and rename the index of the resulting DataFrame.
419
+
420
+ >>> df.agg(x=('A', 'max'), y=('B', 'min'), z=('C', 'mean')).execute()
421
+ A B C
422
+ x 7.0 NaN NaN
423
+ y NaN 2.0 NaN
424
+ z NaN NaN 6.0
425
+
426
+ >>> s = md.Series([1, 2, 3, 4])
427
+ >>> s.agg('min').execute()
428
+ 1
429
+
430
+ >>> s.agg(['min', 'max']).execute()
431
+ max 4
432
+ min 1
433
+ """
318
434
  axis = validate_axis(axis, df)
435
+ if func == "unique":
436
+ # workaround for direct call of unique function which
437
+ # returns a tensor directly
438
+ func = getattr(df, func)
439
+ if "axis" in inspect.getfullargspec(func).args:
440
+ kw = kw.copy()
441
+ kw["axis"] = axis
442
+ return func(**kw)
443
+
319
444
  if (
320
445
  df.ndim == 2
321
446
  and isinstance(func, dict)
@@ -226,7 +226,9 @@ class DataFrameReductionMixin(DataFrameOperatorMixin):
226
226
  # handle pandas Dtypes in the future more carefully.
227
227
  reduced_dtype = np.dtype("O")
228
228
  else:
229
- reduced_dtype = np.find_common_type(dtypes, [])
229
+ from pandas.core.dtypes.cast import find_common_type
230
+
231
+ reduced_dtype = find_common_type(dtypes)
230
232
 
231
233
  if level is not None:
232
234
  return self._call_groupby_level(df[reduced_cols], level)
@@ -404,6 +406,7 @@ class ReductionPostStep(NamedTuple):
404
406
  func_name: str
405
407
  columns: Optional[List[str]]
406
408
  func_idl: bytes
409
+ post_func_aliases: Optional[List[str]] = None
407
410
 
408
411
 
409
412
  class ReductionSteps(NamedTuple):
@@ -462,6 +465,7 @@ class ReductionCompiler:
462
465
  self._output_key_to_agg_steps = dict()
463
466
  self._output_key_to_post_steps = dict()
464
467
  self._output_key_to_post_cols = dict()
468
+ self._output_key_to_col_func_mapping = dict()
465
469
 
466
470
  @classmethod
467
471
  def _check_function_valid(cls, func):
@@ -531,6 +535,14 @@ class ReductionCompiler:
531
535
  self._output_key_to_post_steps[step.output_key] = step
532
536
  self._update_col_dict(self._output_key_to_post_cols, step.output_key, cols)
533
537
 
538
+ if cols is not None:
539
+ col_name_map = (
540
+ self._output_key_to_col_func_mapping.get(step.output_key) or {}
541
+ )
542
+ for col in cols:
543
+ col_name_map[col] = func_name
544
+ self._output_key_to_col_func_mapping[step.output_key] = col_name_map
545
+
534
546
  @staticmethod
535
547
  def _build_mock_return_object(func, input_dtype, ndim):
536
548
  from ..initializer import DataFrame as MaxDataFrame
@@ -812,11 +824,12 @@ class ReductionCompiler:
812
824
  agg_funcs.append(step)
813
825
 
814
826
  for key, step in self._output_key_to_post_steps.items():
815
- cols = self._output_key_to_post_cols[key]
816
- if cols and set(cols) == set(referred_cols):
817
- post_cols = None
818
- else:
819
- post_cols = cols
827
+ post_cols = self._output_key_to_post_cols[key]
828
+ func_renames = None
829
+ if post_cols:
830
+ col_map = self._output_key_to_col_func_mapping.get(key)
831
+ if col_map:
832
+ func_renames = [col_map[c] for c in post_cols]
820
833
 
821
834
  func_name = step.func_name
822
835
  if self._lambda_counter == 1 and step.func_name == "<lambda_0>":
@@ -831,6 +844,7 @@ class ReductionCompiler:
831
844
  func_name,
832
845
  post_cols,
833
846
  step.func_idl,
847
+ func_renames,
834
848
  )
835
849
  )
836
850
 
@@ -31,7 +31,7 @@ def median_series(df, axis=None, skipna=True, level=None, method=None):
31
31
  axis=axis,
32
32
  skipna=skipna,
33
33
  level=level,
34
- output_types=[OutputType.scalar if level is not None else OutputType.scalar],
34
+ output_types=[OutputType.series if level is not None else OutputType.scalar],
35
35
  method=method,
36
36
  )
37
37
  return op(df)
@@ -19,9 +19,11 @@ from typing import NamedTuple
19
19
 
20
20
  import numpy as np
21
21
  import pandas as pd
22
+ import pyarrow as pa
22
23
  import pytest
23
24
 
24
25
  from .... import dataframe as md
26
+ from ....lib.dtypes_extension import ArrowDtype
25
27
  from ....tensor import Tensor
26
28
  from ....tests.utils import assert_mf_index_dtype
27
29
  from ...core import DataFrame, IndexValue, OutputType, Series
@@ -29,6 +31,7 @@ from ...datasource.dataframe import from_pandas as from_pandas_df
29
31
  from ...datasource.series import from_pandas as from_pandas_series
30
32
  from .. import (
31
33
  CustomReduction,
34
+ DataFrameAggregate,
32
35
  DataFrameAll,
33
36
  DataFrameAny,
34
37
  DataFrameCount,
@@ -196,6 +199,36 @@ def test_nunique():
196
199
  assert isinstance(result2.op, DataFrameNunique)
197
200
 
198
201
 
202
+ def test_unique():
203
+ pd_df = pd.DataFrame(
204
+ {
205
+ "col1": pd.Series(np.random.choice(["a", "b", "c", "d"], 100)),
206
+ "col2": pd.Series(np.random.choice([0, 1, 2, 3], 100)),
207
+ }
208
+ )
209
+ df = from_pandas_df(pd_df, chunk_size=3)
210
+ result = df.agg(["unique"])
211
+
212
+ assert result.shape == (1, 2)
213
+ assert result.op.output_types[0] == OutputType.dataframe
214
+ assert isinstance(result.op, DataFrameAggregate)
215
+ pd.testing.assert_series_equal(
216
+ result.dtypes,
217
+ pd.Series(
218
+ [ArrowDtype(pa.list_(pa.string())), ArrowDtype(pa.list_(pa.int64()))],
219
+ index=pd_df.columns,
220
+ ),
221
+ )
222
+
223
+ pd_s = pd.Series(np.random.choice(["a", "b", "c", "d"], 100))
224
+ ms = from_pandas_series(pd_s, chunk_size=3)
225
+ result = ms.agg(["unique"])
226
+ assert result.shape == (1,)
227
+ assert result.op.output_types[0] == OutputType.series
228
+ assert isinstance(result.op, DataFrameAggregate)
229
+ assert result.dtype == ArrowDtype(pa.list_(pa.string()))
230
+
231
+
199
232
  def test_dataframe_aggregate():
200
233
  data = pd.DataFrame(np.random.rand(20, 19))
201
234
  agg_funcs = [
@@ -12,14 +12,21 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
-
16
15
  import numpy as np
17
16
  import pandas as pd
17
+ import pyarrow as pa
18
18
 
19
19
  from ... import opcodes
20
20
  from ...core import ENTITY_TYPE, OutputType
21
+ from ...io.odpsio.schema import (
22
+ pandas_dtype_to_arrow_type,
23
+ pandas_dtypes_to_arrow_schema,
24
+ )
25
+ from ...lib.dtypes_extension import ArrowDtype
26
+ from ...serialization.serializables import BoolField
21
27
  from ...tensor.core import TensorOrder
22
28
  from ...utils import lazy_import
29
+ from ..core import DATAFRAME_TYPE
23
30
  from ..initializer import Series as asseries
24
31
  from .core import CustomReduction, DataFrameReductionMixin, DataFrameReductionOperator
25
32
 
@@ -40,6 +47,12 @@ class DataFrameUnique(DataFrameReductionOperator, DataFrameReductionMixin):
40
47
  _op_type_ = opcodes.UNIQUE
41
48
  _func_name = "unique"
42
49
 
50
+ output_list_scalar = BoolField("output_list_scalar", default=False)
51
+
52
+ @property
53
+ def is_atomic(self):
54
+ return True
55
+
43
56
  @classmethod
44
57
  def get_reduction_callable(cls, op):
45
58
  return UniqueReduction(name=cls._func_name, is_gpu=op.is_gpu())
@@ -47,10 +60,43 @@ class DataFrameUnique(DataFrameReductionOperator, DataFrameReductionMixin):
47
60
  def __call__(self, a):
48
61
  if not isinstance(a, ENTITY_TYPE):
49
62
  a = asseries(a)
50
- self.output_types = [OutputType.tensor]
51
- return self.new_tileables(
52
- [a], shape=(np.nan,), dtype=a.dtype, order=TensorOrder.C_ORDER
53
- )[0]
63
+ self.axis = 0
64
+ if isinstance(a, DATAFRAME_TYPE):
65
+ assert self.output_list_scalar and self.axis == 0
66
+ pa_schema = pandas_dtypes_to_arrow_schema(a.dtypes, unknown_as_string=True)
67
+ if len(set(pa_schema.types)) == 1:
68
+ out_dtype = ArrowDtype(pa.list_(pa_schema.types[0]))
69
+ else:
70
+ out_dtype = np.dtype("O")
71
+ kw = {
72
+ "dtype": out_dtype,
73
+ "index_value": a.columns_value,
74
+ "shape": (a.shape[1],),
75
+ }
76
+ self.output_types = [OutputType.series]
77
+ return self.new_tileables([a], **kw)[0]
78
+ else:
79
+ if self.output_list_scalar:
80
+ arrow_type = pa.list_(
81
+ pandas_dtype_to_arrow_type(a.dtype, unknown_as_string=True)
82
+ )
83
+ kw = {
84
+ "dtype": ArrowDtype(arrow_type),
85
+ "shape": (),
86
+ }
87
+ self.output_types = [OutputType.scalar]
88
+ else:
89
+ kw = {
90
+ "dtype": a.dtype,
91
+ "shape": (np.nan,),
92
+ }
93
+ self.output_types = [OutputType.tensor]
94
+ return self.new_tileables([a], order=TensorOrder.C_ORDER, **kw)[0]
95
+
96
+
97
+ def _unique(values, method="tree", **kwargs):
98
+ op = DataFrameUnique(method=method, **kwargs)
99
+ return op(values)
54
100
 
55
101
 
56
102
  def unique(values, method="tree"):
@@ -62,6 +108,7 @@ def unique(values, method="tree"):
62
108
  values : 1d array-like
63
109
  method : 'shuffle' or 'tree', 'tree' method provide a better performance, 'shuffle'
64
110
  is recommended if the number of unique values is very large.
111
+
65
112
  See Also
66
113
  --------
67
114
  Index.unique
@@ -86,5 +133,4 @@ def unique(values, method="tree"):
86
133
  array([Timestamp('2016-01-01 00:00:00-0500', tz='US/Eastern')],
87
134
  dtype=object)
88
135
  """
89
- op = DataFrameUnique(method=method)
90
- return op(values)
136
+ return _unique(values, method=method)
@@ -12,11 +12,13 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
+ from typing import List
16
+
15
17
  import numpy as np
16
18
  import pandas as pd
17
19
 
18
20
  from ... import opcodes
19
- from ...core import ENTITY_TYPE
21
+ from ...core import ENTITY_TYPE, EntityData
20
22
  from ...serialization.serializables import AnyField, BoolField, Int32Field, KeyField
21
23
  from ...tensor.utils import filter_inputs
22
24
  from ..core import DATAFRAME_TYPE, SERIES_TYPE
@@ -33,12 +35,13 @@ class DataFrameCorr(DataFrameOperator, DataFrameOperatorMixin):
33
35
  axis = Int32Field("axis", default=None)
34
36
  drop = BoolField("drop", default=None)
35
37
 
36
- def _set_inputs(self, inputs):
37
- super()._set_inputs(inputs)
38
- inputs_iter = iter(self._inputs)
38
+ @classmethod
39
+ def _set_inputs(cls, op: "DataFrameCorr", inputs: List[EntityData]):
40
+ super()._set_inputs(op, inputs)
41
+ inputs_iter = iter(op._inputs)
39
42
  next(inputs_iter)
40
- if isinstance(self.other, ENTITY_TYPE):
41
- self.other = next(inputs_iter)
43
+ if isinstance(op.other, ENTITY_TYPE):
44
+ op.other = next(inputs_iter)
42
45
 
43
46
  def __call__(self, df_or_series):
44
47
  if isinstance(df_or_series, SERIES_TYPE):
@@ -12,12 +12,14 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
+ from typing import List
16
+
15
17
  import numpy as np
16
18
  import pandas as pd
17
19
  from pandas.core.dtypes.cast import find_common_type
18
20
 
19
21
  from ... import opcodes
20
- from ...core import ENTITY_TYPE
22
+ from ...core import ENTITY_TYPE, EntityData
21
23
  from ...serialization.serializables import (
22
24
  AnyField,
23
25
  BoolField,
@@ -50,11 +52,12 @@ class DataFrameQuantile(DataFrameOperator, DataFrameOperatorMixin):
50
52
  def __init__(self, output_types=None, **kw):
51
53
  super().__init__(_output_types=output_types, **kw)
52
54
 
53
- def _set_inputs(self, inputs):
54
- super()._set_inputs(inputs)
55
- self.input = self._inputs[0]
56
- if isinstance(self.q, TENSOR_TYPE):
57
- self.q = self._inputs[-1]
55
+ @classmethod
56
+ def _set_inputs(cls, op: "DataFrameQuantile", inputs: List[EntityData]):
57
+ super()._set_inputs(op, inputs)
58
+ op.input = op._inputs[0]
59
+ if isinstance(op.q, TENSOR_TYPE):
60
+ op.q = op._inputs[-1]
58
61
 
59
62
  def _calc_dtype_on_axis_1(self, a, dtypes):
60
63
  quantile_dtypes = []
@@ -12,13 +12,14 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
- from typing import Any
15
+ from typing import Any, List
16
16
 
17
17
  import numpy as np
18
18
  import pandas as pd
19
19
  from pandas.api.types import is_dict_like, is_scalar
20
20
 
21
21
  from ... import opcodes
22
+ from ...core import EntityData
22
23
  from ...serialization.serializables import AnyField, BoolField, KeyField, StringField
23
24
  from ...tensor import tensor as astensor
24
25
  from ..core import DATAFRAME_TYPE, INDEX_TYPE, SERIES_TYPE
@@ -54,9 +55,10 @@ class DataFrameToDatetime(DataFrameOperator, DataFrameOperatorMixin):
54
55
  if k not in self._no_copy_attrs_ and k != "arg" and hasattr(self, k)
55
56
  )
56
57
 
57
- def _set_inputs(self, inputs):
58
- super()._set_inputs(inputs)
59
- self.arg = self._inputs[0]
58
+ @classmethod
59
+ def _set_inputs(cls, op: "DataFrameToDatetime", inputs: List[EntityData]):
60
+ super()._set_inputs(op, inputs)
61
+ op.arg = op._inputs[0]
60
62
 
61
63
  def __call__(self, arg):
62
64
  if is_scalar(arg):