maxframe 1.3.1__cp37-cp37m-win_amd64.whl → 2.0.0__cp37-cp37m-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of maxframe might be problematic. Click here for more details.

Files changed (640) hide show
  1. maxframe/_utils.cp37-win_amd64.pyd +0 -0
  2. maxframe/_utils.pyi +21 -0
  3. maxframe/_utils.pyx +4 -3
  4. maxframe/codegen/__init__.py +27 -0
  5. maxframe/{codegen.py → codegen/core.py} +49 -43
  6. maxframe/codegen/spe/__init__.py +16 -0
  7. maxframe/codegen/spe/core.py +307 -0
  8. maxframe/codegen/spe/dataframe/__init__.py +37 -0
  9. maxframe/codegen/spe/dataframe/accessors/__init__.py +15 -0
  10. maxframe/codegen/spe/dataframe/accessors/base.py +53 -0
  11. maxframe/codegen/spe/dataframe/accessors/dict_.py +194 -0
  12. maxframe/codegen/spe/dataframe/accessors/list_.py +80 -0
  13. maxframe/codegen/spe/dataframe/arithmetic.py +84 -0
  14. maxframe/codegen/spe/dataframe/datasource.py +181 -0
  15. maxframe/codegen/spe/dataframe/datastore.py +204 -0
  16. maxframe/codegen/spe/dataframe/extensions.py +63 -0
  17. maxframe/codegen/spe/dataframe/fetch.py +26 -0
  18. maxframe/codegen/spe/dataframe/groupby.py +224 -0
  19. maxframe/codegen/spe/dataframe/indexing.py +238 -0
  20. maxframe/codegen/spe/dataframe/merge.py +73 -0
  21. maxframe/codegen/spe/dataframe/misc.py +286 -0
  22. maxframe/codegen/spe/dataframe/missing.py +64 -0
  23. maxframe/codegen/spe/dataframe/reduction.py +160 -0
  24. maxframe/codegen/spe/dataframe/sort.py +83 -0
  25. maxframe/codegen/spe/dataframe/statistics.py +46 -0
  26. maxframe/codegen/spe/dataframe/tests/__init__.py +13 -0
  27. maxframe/codegen/spe/dataframe/tests/accessors/__init__.py +13 -0
  28. maxframe/codegen/spe/dataframe/tests/accessors/test_base.py +33 -0
  29. maxframe/codegen/spe/dataframe/tests/accessors/test_dict.py +310 -0
  30. maxframe/codegen/spe/dataframe/tests/accessors/test_list.py +137 -0
  31. maxframe/codegen/spe/dataframe/tests/indexing/__init__.py +13 -0
  32. maxframe/codegen/spe/dataframe/tests/indexing/conftest.py +58 -0
  33. maxframe/codegen/spe/dataframe/tests/indexing/test_getitem.py +124 -0
  34. maxframe/codegen/spe/dataframe/tests/indexing/test_iloc.py +76 -0
  35. maxframe/codegen/spe/dataframe/tests/indexing/test_indexing.py +39 -0
  36. maxframe/codegen/spe/dataframe/tests/indexing/test_rename.py +51 -0
  37. maxframe/codegen/spe/dataframe/tests/indexing/test_reset_index.py +88 -0
  38. maxframe/codegen/spe/dataframe/tests/indexing/test_sample.py +45 -0
  39. maxframe/codegen/spe/dataframe/tests/indexing/test_set_axis.py +45 -0
  40. maxframe/codegen/spe/dataframe/tests/indexing/test_set_index.py +41 -0
  41. maxframe/codegen/spe/dataframe/tests/indexing/test_setitem.py +46 -0
  42. maxframe/codegen/spe/dataframe/tests/misc/__init__.py +13 -0
  43. maxframe/codegen/spe/dataframe/tests/misc/test_apply.py +133 -0
  44. maxframe/codegen/spe/dataframe/tests/misc/test_drop_duplicates.py +92 -0
  45. maxframe/codegen/spe/dataframe/tests/misc/test_misc.py +234 -0
  46. maxframe/codegen/spe/dataframe/tests/missing/__init__.py +13 -0
  47. maxframe/codegen/spe/dataframe/tests/missing/test_checkna.py +94 -0
  48. maxframe/codegen/spe/dataframe/tests/missing/test_dropna.py +50 -0
  49. maxframe/codegen/spe/dataframe/tests/missing/test_fillna.py +94 -0
  50. maxframe/codegen/spe/dataframe/tests/missing/test_replace.py +45 -0
  51. maxframe/codegen/spe/dataframe/tests/test_arithmetic.py +73 -0
  52. maxframe/codegen/spe/dataframe/tests/test_datasource.py +184 -0
  53. maxframe/codegen/spe/dataframe/tests/test_datastore.py +200 -0
  54. maxframe/codegen/spe/dataframe/tests/test_extensions.py +88 -0
  55. maxframe/codegen/spe/dataframe/tests/test_groupby.py +225 -0
  56. maxframe/codegen/spe/dataframe/tests/test_merge.py +400 -0
  57. maxframe/codegen/spe/dataframe/tests/test_reduction.py +104 -0
  58. maxframe/codegen/spe/dataframe/tests/test_sort.py +159 -0
  59. maxframe/codegen/spe/dataframe/tests/test_statistics.py +70 -0
  60. maxframe/codegen/spe/dataframe/tests/test_tseries.py +29 -0
  61. maxframe/codegen/spe/dataframe/tests/test_value_counts.py +60 -0
  62. maxframe/codegen/spe/dataframe/tests/test_window.py +69 -0
  63. maxframe/codegen/spe/dataframe/tseries.py +46 -0
  64. maxframe/codegen/spe/dataframe/udf.py +62 -0
  65. maxframe/codegen/spe/dataframe/value_counts.py +31 -0
  66. maxframe/codegen/spe/dataframe/window.py +65 -0
  67. maxframe/codegen/spe/learn/__init__.py +15 -0
  68. maxframe/codegen/spe/learn/contrib/__init__.py +15 -0
  69. maxframe/codegen/spe/learn/contrib/lightgbm.py +160 -0
  70. maxframe/codegen/spe/learn/contrib/models.py +41 -0
  71. maxframe/codegen/spe/learn/contrib/pytorch.py +49 -0
  72. maxframe/codegen/spe/learn/contrib/tests/__init__.py +13 -0
  73. maxframe/codegen/spe/learn/contrib/tests/test_lightgbm.py +123 -0
  74. maxframe/codegen/spe/learn/contrib/tests/test_models.py +41 -0
  75. maxframe/codegen/spe/learn/contrib/tests/test_pytorch.py +53 -0
  76. maxframe/codegen/spe/learn/contrib/tests/test_xgboost.py +98 -0
  77. maxframe/codegen/spe/learn/contrib/xgboost.py +152 -0
  78. maxframe/codegen/spe/learn/metrics/__init__.py +15 -0
  79. maxframe/codegen/spe/learn/metrics/_classification.py +120 -0
  80. maxframe/codegen/spe/learn/metrics/tests/__init__.py +13 -0
  81. maxframe/codegen/spe/learn/metrics/tests/test_classification.py +93 -0
  82. maxframe/codegen/spe/learn/model_selection/__init__.py +13 -0
  83. maxframe/codegen/spe/learn/model_selection/tests/__init__.py +13 -0
  84. maxframe/codegen/spe/learn/model_selection/tests/test_split.py +41 -0
  85. maxframe/codegen/spe/learn/preprocessing/__init__.py +15 -0
  86. maxframe/codegen/spe/learn/preprocessing/_data.py +37 -0
  87. maxframe/codegen/spe/learn/preprocessing/_label.py +47 -0
  88. maxframe/codegen/spe/learn/preprocessing/tests/__init__.py +13 -0
  89. maxframe/codegen/spe/learn/preprocessing/tests/test_data.py +31 -0
  90. maxframe/codegen/spe/learn/preprocessing/tests/test_label.py +43 -0
  91. maxframe/codegen/spe/learn/utils/__init__.py +15 -0
  92. maxframe/codegen/spe/learn/utils/checks.py +55 -0
  93. maxframe/codegen/spe/learn/utils/multiclass.py +60 -0
  94. maxframe/codegen/spe/learn/utils/shuffle.py +85 -0
  95. maxframe/codegen/spe/learn/utils/sparsefuncs.py +35 -0
  96. maxframe/codegen/spe/learn/utils/tests/__init__.py +13 -0
  97. maxframe/codegen/spe/learn/utils/tests/test_checks.py +48 -0
  98. maxframe/codegen/spe/learn/utils/tests/test_multiclass.py +52 -0
  99. maxframe/codegen/spe/learn/utils/tests/test_shuffle.py +50 -0
  100. maxframe/codegen/spe/learn/utils/tests/test_sparsefuncs.py +34 -0
  101. maxframe/codegen/spe/learn/utils/tests/test_validation.py +44 -0
  102. maxframe/codegen/spe/learn/utils/validation.py +35 -0
  103. maxframe/codegen/spe/objects.py +26 -0
  104. maxframe/codegen/spe/remote.py +29 -0
  105. maxframe/codegen/spe/tensor/__init__.py +28 -0
  106. maxframe/codegen/spe/tensor/arithmetic.py +95 -0
  107. maxframe/codegen/spe/tensor/core.py +41 -0
  108. maxframe/codegen/spe/tensor/datasource.py +165 -0
  109. maxframe/codegen/spe/tensor/extensions.py +35 -0
  110. maxframe/codegen/spe/tensor/fetch.py +26 -0
  111. maxframe/codegen/spe/tensor/indexing.py +63 -0
  112. maxframe/codegen/spe/tensor/linalg.py +63 -0
  113. maxframe/codegen/spe/tensor/merge.py +31 -0
  114. maxframe/codegen/spe/tensor/misc.py +121 -0
  115. maxframe/codegen/spe/tensor/random.py +29 -0
  116. maxframe/codegen/spe/tensor/reduction.py +39 -0
  117. maxframe/codegen/spe/tensor/reshape.py +26 -0
  118. maxframe/codegen/spe/tensor/sort.py +42 -0
  119. maxframe/codegen/spe/tensor/special.py +35 -0
  120. maxframe/codegen/spe/tensor/statistics.py +24 -0
  121. maxframe/codegen/spe/tensor/tests/__init__.py +13 -0
  122. maxframe/codegen/spe/tensor/tests/test_arithmetic.py +103 -0
  123. maxframe/codegen/spe/tensor/tests/test_datasource.py +99 -0
  124. maxframe/codegen/spe/tensor/tests/test_extensions.py +37 -0
  125. maxframe/codegen/spe/tensor/tests/test_indexing.py +44 -0
  126. maxframe/codegen/spe/tensor/tests/test_linalg.py +38 -0
  127. maxframe/codegen/spe/tensor/tests/test_merge.py +28 -0
  128. maxframe/codegen/spe/tensor/tests/test_misc.py +94 -0
  129. maxframe/codegen/spe/tensor/tests/test_random.py +55 -0
  130. maxframe/codegen/spe/tensor/tests/test_reduction.py +65 -0
  131. maxframe/codegen/spe/tensor/tests/test_reshape.py +39 -0
  132. maxframe/codegen/spe/tensor/tests/test_sort.py +49 -0
  133. maxframe/codegen/spe/tensor/tests/test_special.py +28 -0
  134. maxframe/codegen/spe/tensor/tests/test_statistics.py +29 -0
  135. maxframe/codegen/spe/tests/__init__.py +13 -0
  136. maxframe/codegen/spe/tests/test_remote.py +29 -0
  137. maxframe/codegen/spe/tests/test_spe_codegen.py +141 -0
  138. maxframe/codegen/spe/utils.py +54 -0
  139. maxframe/codegen/tests/__init__.py +13 -0
  140. maxframe/{tests → codegen/tests}/test_codegen.py +3 -5
  141. maxframe/config/__init__.py +1 -1
  142. maxframe/config/config.py +50 -23
  143. maxframe/config/tests/test_config.py +4 -12
  144. maxframe/config/validators.py +5 -0
  145. maxframe/conftest.py +38 -10
  146. maxframe/core/__init__.py +1 -0
  147. maxframe/core/context.py +110 -0
  148. maxframe/core/entity/__init__.py +1 -0
  149. maxframe/core/entity/core.py +0 -7
  150. maxframe/core/entity/objects.py +19 -5
  151. maxframe/core/entity/output_types.py +11 -0
  152. maxframe/core/entity/tests/test_objects.py +11 -12
  153. maxframe/core/entity/tileables.py +3 -1
  154. maxframe/core/entity/utils.py +15 -0
  155. maxframe/core/graph/__init__.py +6 -1
  156. maxframe/core/graph/builder/base.py +5 -1
  157. maxframe/core/graph/core.cp37-win_amd64.pyd +0 -0
  158. maxframe/core/graph/core.pyx +17 -6
  159. maxframe/core/graph/entity.py +18 -6
  160. maxframe/core/operator/__init__.py +8 -3
  161. maxframe/core/operator/base.py +35 -12
  162. maxframe/core/operator/core.py +37 -14
  163. maxframe/core/operator/fetch.py +5 -18
  164. maxframe/core/operator/objects.py +0 -20
  165. maxframe/core/operator/shuffle.py +6 -72
  166. maxframe/dataframe/__init__.py +1 -0
  167. maxframe/dataframe/accessors/datetime_/core.py +7 -4
  168. maxframe/dataframe/accessors/string_/core.py +9 -6
  169. maxframe/dataframe/arithmetic/core.py +31 -20
  170. maxframe/dataframe/arithmetic/tests/test_arithmetic.py +6 -0
  171. maxframe/dataframe/core.py +98 -91
  172. maxframe/dataframe/datasource/core.py +8 -1
  173. maxframe/dataframe/datasource/date_range.py +8 -0
  174. maxframe/dataframe/datasource/from_index.py +9 -5
  175. maxframe/dataframe/datasource/from_records.py +9 -2
  176. maxframe/dataframe/datasource/from_tensor.py +32 -21
  177. maxframe/dataframe/datasource/read_csv.py +8 -2
  178. maxframe/dataframe/datasource/read_odps_query.py +109 -19
  179. maxframe/dataframe/datasource/read_odps_table.py +20 -5
  180. maxframe/dataframe/datasource/read_parquet.py +8 -3
  181. maxframe/dataframe/datasource/tests/test_datasource.py +80 -1
  182. maxframe/dataframe/datastore/tests/test_to_odps.py +52 -1
  183. maxframe/dataframe/datastore/to_csv.py +7 -3
  184. maxframe/dataframe/datastore/to_odps.py +42 -6
  185. maxframe/dataframe/extensions/__init__.py +6 -1
  186. maxframe/dataframe/extensions/apply_chunk.py +96 -136
  187. maxframe/dataframe/extensions/flatjson.py +3 -2
  188. maxframe/dataframe/extensions/flatmap.py +15 -7
  189. maxframe/dataframe/fetch/core.py +12 -1
  190. maxframe/dataframe/groupby/__init__.py +7 -0
  191. maxframe/dataframe/groupby/aggregation.py +9 -8
  192. maxframe/dataframe/groupby/apply.py +50 -74
  193. maxframe/dataframe/groupby/apply_chunk.py +393 -0
  194. maxframe/dataframe/groupby/core.py +80 -17
  195. maxframe/dataframe/groupby/extensions.py +26 -0
  196. maxframe/dataframe/groupby/fill.py +9 -4
  197. maxframe/dataframe/groupby/sample.py +7 -7
  198. maxframe/dataframe/groupby/tests/test_groupby.py +3 -3
  199. maxframe/dataframe/groupby/transform.py +57 -54
  200. maxframe/dataframe/indexing/align.py +7 -6
  201. maxframe/dataframe/indexing/getitem.py +9 -8
  202. maxframe/dataframe/indexing/iloc.py +28 -23
  203. maxframe/dataframe/indexing/insert.py +7 -3
  204. maxframe/dataframe/indexing/loc.py +9 -8
  205. maxframe/dataframe/indexing/reindex.py +36 -30
  206. maxframe/dataframe/indexing/rename_axis.py +18 -10
  207. maxframe/dataframe/indexing/reset_index.py +0 -2
  208. maxframe/dataframe/indexing/sample.py +13 -9
  209. maxframe/dataframe/indexing/set_axis.py +9 -6
  210. maxframe/dataframe/indexing/setitem.py +8 -5
  211. maxframe/dataframe/indexing/where.py +12 -9
  212. maxframe/dataframe/merge/__init__.py +0 -1
  213. maxframe/dataframe/merge/concat.py +10 -31
  214. maxframe/dataframe/merge/merge.py +2 -24
  215. maxframe/dataframe/misc/__init__.py +6 -0
  216. maxframe/dataframe/misc/_duplicate.py +7 -3
  217. maxframe/dataframe/misc/apply.py +106 -139
  218. maxframe/dataframe/misc/astype.py +3 -2
  219. maxframe/dataframe/misc/case_when.py +11 -7
  220. maxframe/dataframe/misc/cut.py +11 -10
  221. maxframe/dataframe/misc/describe.py +7 -3
  222. maxframe/dataframe/misc/drop.py +13 -11
  223. maxframe/dataframe/misc/eval.py +0 -2
  224. maxframe/dataframe/misc/get_dummies.py +78 -49
  225. maxframe/dataframe/misc/isin.py +13 -10
  226. maxframe/dataframe/misc/map.py +21 -6
  227. maxframe/dataframe/misc/melt.py +8 -1
  228. maxframe/dataframe/misc/pivot.py +232 -0
  229. maxframe/dataframe/misc/pivot_table.py +52 -40
  230. maxframe/dataframe/misc/rechunk.py +59 -0
  231. maxframe/dataframe/misc/shift.py +7 -4
  232. maxframe/dataframe/misc/stack.py +5 -3
  233. maxframe/dataframe/misc/tests/test_misc.py +167 -1
  234. maxframe/dataframe/misc/transform.py +63 -65
  235. maxframe/dataframe/misc/value_counts.py +7 -4
  236. maxframe/dataframe/missing/dropna.py +16 -7
  237. maxframe/dataframe/missing/fillna.py +18 -10
  238. maxframe/dataframe/missing/replace.py +10 -6
  239. maxframe/dataframe/missing/tests/test_missing.py +2 -2
  240. maxframe/dataframe/operators.py +1 -27
  241. maxframe/dataframe/reduction/aggregation.py +65 -3
  242. maxframe/dataframe/reduction/core.py +3 -1
  243. maxframe/dataframe/reduction/median.py +1 -1
  244. maxframe/dataframe/reduction/tests/test_reduction.py +33 -0
  245. maxframe/dataframe/reduction/unique.py +53 -7
  246. maxframe/dataframe/statistics/corr.py +9 -6
  247. maxframe/dataframe/statistics/quantile.py +9 -6
  248. maxframe/dataframe/tseries/to_datetime.py +6 -4
  249. maxframe/dataframe/utils.py +219 -31
  250. maxframe/dataframe/window/rolling.py +7 -4
  251. maxframe/env.py +1 -0
  252. maxframe/errors.py +9 -0
  253. maxframe/extension.py +13 -2
  254. maxframe/io/objects/core.py +67 -51
  255. maxframe/io/objects/tensor.py +73 -17
  256. maxframe/io/objects/tests/test_object_io.py +10 -55
  257. maxframe/io/odpsio/arrow.py +15 -2
  258. maxframe/io/odpsio/schema.py +43 -13
  259. maxframe/io/odpsio/tableio.py +63 -11
  260. maxframe/io/odpsio/tests/test_arrow.py +1 -2
  261. maxframe/io/odpsio/tests/test_schema.py +114 -1
  262. maxframe/io/odpsio/tests/test_tableio.py +42 -0
  263. maxframe/io/odpsio/tests/test_volumeio.py +21 -58
  264. maxframe/io/odpsio/volumeio.py +23 -8
  265. maxframe/learn/__init__.py +2 -2
  266. maxframe/learn/contrib/__init__.py +2 -2
  267. maxframe/learn/contrib/graph/connected_components.py +2 -1
  268. maxframe/learn/contrib/lightgbm/__init__.py +33 -0
  269. maxframe/learn/contrib/lightgbm/_predict.py +138 -0
  270. maxframe/learn/contrib/lightgbm/_train.py +163 -0
  271. maxframe/learn/contrib/lightgbm/callback.py +114 -0
  272. maxframe/learn/contrib/lightgbm/classifier.py +199 -0
  273. maxframe/learn/contrib/lightgbm/core.py +372 -0
  274. maxframe/learn/contrib/lightgbm/dataset.py +153 -0
  275. maxframe/learn/contrib/lightgbm/regressor.py +29 -0
  276. maxframe/learn/contrib/lightgbm/tests/__init__.py +13 -0
  277. maxframe/learn/contrib/lightgbm/tests/test_callback.py +58 -0
  278. maxframe/learn/contrib/models.py +38 -9
  279. maxframe/learn/contrib/utils.py +55 -0
  280. maxframe/learn/contrib/xgboost/callback.py +86 -0
  281. maxframe/learn/contrib/xgboost/classifier.py +26 -30
  282. maxframe/learn/contrib/xgboost/core.py +54 -42
  283. maxframe/learn/contrib/xgboost/dmatrix.py +19 -12
  284. maxframe/learn/contrib/xgboost/predict.py +16 -9
  285. maxframe/learn/contrib/xgboost/regressor.py +28 -27
  286. maxframe/learn/contrib/xgboost/tests/test_callback.py +41 -0
  287. maxframe/learn/contrib/xgboost/train.py +59 -16
  288. maxframe/learn/core.py +252 -0
  289. maxframe/learn/datasets/__init__.py +20 -0
  290. maxframe/learn/datasets/samples_generator.py +628 -0
  291. maxframe/learn/linear_model/__init__.py +15 -0
  292. maxframe/learn/linear_model/_base.py +163 -0
  293. maxframe/learn/linear_model/_lin_reg.py +175 -0
  294. maxframe/learn/metrics/__init__.py +25 -0
  295. maxframe/learn/metrics/_check_targets.py +95 -0
  296. maxframe/learn/metrics/_classification.py +1121 -0
  297. maxframe/learn/metrics/_regression.py +256 -0
  298. maxframe/learn/model_selection/__init__.py +15 -0
  299. maxframe/learn/model_selection/_split.py +451 -0
  300. maxframe/learn/model_selection/tests/__init__.py +13 -0
  301. maxframe/learn/model_selection/tests/test_split.py +156 -0
  302. maxframe/learn/preprocessing/__init__.py +16 -0
  303. maxframe/learn/preprocessing/_data/__init__.py +17 -0
  304. maxframe/learn/preprocessing/_data/min_max_scaler.py +390 -0
  305. maxframe/learn/preprocessing/_data/normalize.py +127 -0
  306. maxframe/learn/preprocessing/_data/standard_scaler.py +503 -0
  307. maxframe/learn/preprocessing/_data/utils.py +79 -0
  308. maxframe/learn/preprocessing/_label/__init__.py +16 -0
  309. maxframe/learn/preprocessing/_label/_label_binarizer.py +599 -0
  310. maxframe/learn/preprocessing/_label/_label_encoder.py +174 -0
  311. maxframe/learn/utils/__init__.py +4 -0
  312. maxframe/learn/utils/_encode.py +314 -0
  313. maxframe/learn/utils/checks.py +161 -0
  314. maxframe/learn/utils/core.py +33 -0
  315. maxframe/learn/utils/extmath.py +176 -0
  316. maxframe/learn/utils/multiclass.py +292 -0
  317. maxframe/learn/utils/shuffle.py +114 -0
  318. maxframe/learn/utils/sparsefuncs.py +87 -0
  319. maxframe/learn/utils/validation.py +775 -0
  320. maxframe/lib/__init__.py +0 -2
  321. maxframe/lib/compat.py +145 -0
  322. maxframe/lib/filesystem/_oss_lib/glob.py +1 -1
  323. maxframe/lib/mmh3.cp37-win_amd64.pyd +0 -0
  324. maxframe/lib/sparse/__init__.py +10 -15
  325. maxframe/lib/sparse/array.py +45 -33
  326. maxframe/lib/sparse/core.py +0 -2
  327. maxframe/lib/sparse/linalg.py +31 -0
  328. maxframe/lib/sparse/matrix.py +5 -2
  329. maxframe/lib/sparse/tests/__init__.py +0 -2
  330. maxframe/lib/sparse/tests/test_sparse.py +53 -53
  331. maxframe/lib/sparse/vector.py +0 -2
  332. maxframe/mixin.py +59 -2
  333. maxframe/opcodes.py +13 -5
  334. maxframe/protocol.py +67 -14
  335. maxframe/remote/core.py +16 -14
  336. maxframe/remote/run_script.py +6 -3
  337. maxframe/serialization/__init__.py +2 -0
  338. maxframe/serialization/core.cp37-win_amd64.pyd +0 -0
  339. maxframe/serialization/core.pxd +3 -0
  340. maxframe/serialization/core.pyi +3 -1
  341. maxframe/serialization/core.pyx +82 -4
  342. maxframe/serialization/pandas.py +5 -1
  343. maxframe/serialization/serializables/core.py +6 -5
  344. maxframe/serialization/serializables/field.py +2 -2
  345. maxframe/serialization/serializables/tests/test_field_type.py +3 -5
  346. maxframe/serialization/tests/test_serial.py +27 -0
  347. maxframe/session.py +4 -71
  348. maxframe/sperunner.py +165 -0
  349. maxframe/tensor/__init__.py +35 -2
  350. maxframe/tensor/arithmetic/__init__.py +2 -4
  351. maxframe/tensor/arithmetic/abs.py +0 -2
  352. maxframe/tensor/arithmetic/absolute.py +0 -2
  353. maxframe/tensor/arithmetic/add.py +34 -4
  354. maxframe/tensor/arithmetic/angle.py +0 -2
  355. maxframe/tensor/arithmetic/arccos.py +1 -4
  356. maxframe/tensor/arithmetic/arccosh.py +1 -3
  357. maxframe/tensor/arithmetic/arcsin.py +0 -2
  358. maxframe/tensor/arithmetic/arcsinh.py +0 -2
  359. maxframe/tensor/arithmetic/arctan.py +0 -2
  360. maxframe/tensor/arithmetic/arctan2.py +0 -2
  361. maxframe/tensor/arithmetic/arctanh.py +0 -2
  362. maxframe/tensor/arithmetic/around.py +0 -2
  363. maxframe/tensor/arithmetic/bitand.py +0 -2
  364. maxframe/tensor/arithmetic/bitor.py +1 -3
  365. maxframe/tensor/arithmetic/bitxor.py +1 -3
  366. maxframe/tensor/arithmetic/cbrt.py +0 -2
  367. maxframe/tensor/arithmetic/ceil.py +0 -2
  368. maxframe/tensor/arithmetic/clip.py +13 -13
  369. maxframe/tensor/arithmetic/conj.py +0 -2
  370. maxframe/tensor/arithmetic/copysign.py +0 -2
  371. maxframe/tensor/arithmetic/core.py +47 -39
  372. maxframe/tensor/arithmetic/cos.py +1 -3
  373. maxframe/tensor/arithmetic/cosh.py +0 -2
  374. maxframe/tensor/arithmetic/deg2rad.py +0 -2
  375. maxframe/tensor/arithmetic/degrees.py +0 -2
  376. maxframe/tensor/arithmetic/divide.py +0 -2
  377. maxframe/tensor/arithmetic/equal.py +0 -2
  378. maxframe/tensor/arithmetic/exp.py +1 -3
  379. maxframe/tensor/arithmetic/exp2.py +0 -2
  380. maxframe/tensor/arithmetic/expm1.py +0 -2
  381. maxframe/tensor/arithmetic/fabs.py +0 -2
  382. maxframe/tensor/arithmetic/fix.py +0 -2
  383. maxframe/tensor/arithmetic/float_power.py +0 -2
  384. maxframe/tensor/arithmetic/floor.py +0 -2
  385. maxframe/tensor/arithmetic/floordiv.py +0 -2
  386. maxframe/tensor/arithmetic/fmax.py +0 -2
  387. maxframe/tensor/arithmetic/fmin.py +0 -2
  388. maxframe/tensor/arithmetic/fmod.py +0 -2
  389. maxframe/tensor/arithmetic/frexp.py +6 -2
  390. maxframe/tensor/arithmetic/greater.py +0 -2
  391. maxframe/tensor/arithmetic/greater_equal.py +0 -2
  392. maxframe/tensor/arithmetic/hypot.py +0 -2
  393. maxframe/tensor/arithmetic/i0.py +1 -3
  394. maxframe/tensor/arithmetic/imag.py +0 -2
  395. maxframe/tensor/arithmetic/invert.py +1 -3
  396. maxframe/tensor/arithmetic/isclose.py +0 -2
  397. maxframe/tensor/arithmetic/iscomplex.py +0 -2
  398. maxframe/tensor/arithmetic/isfinite.py +1 -3
  399. maxframe/tensor/arithmetic/isinf.py +0 -2
  400. maxframe/tensor/arithmetic/isnan.py +0 -2
  401. maxframe/tensor/arithmetic/isreal.py +0 -2
  402. maxframe/tensor/arithmetic/ldexp.py +0 -2
  403. maxframe/tensor/arithmetic/less.py +0 -2
  404. maxframe/tensor/arithmetic/less_equal.py +0 -2
  405. maxframe/tensor/arithmetic/log.py +1 -3
  406. maxframe/tensor/arithmetic/log10.py +1 -3
  407. maxframe/tensor/arithmetic/log1p.py +1 -3
  408. maxframe/tensor/arithmetic/log2.py +1 -3
  409. maxframe/tensor/arithmetic/logaddexp.py +0 -2
  410. maxframe/tensor/arithmetic/logaddexp2.py +0 -2
  411. maxframe/tensor/arithmetic/logical_and.py +0 -2
  412. maxframe/tensor/arithmetic/logical_not.py +1 -3
  413. maxframe/tensor/arithmetic/logical_or.py +0 -2
  414. maxframe/tensor/arithmetic/logical_xor.py +0 -2
  415. maxframe/tensor/arithmetic/lshift.py +0 -2
  416. maxframe/tensor/arithmetic/maximum.py +0 -2
  417. maxframe/tensor/arithmetic/minimum.py +0 -2
  418. maxframe/tensor/arithmetic/mod.py +0 -2
  419. maxframe/tensor/arithmetic/modf.py +6 -2
  420. maxframe/tensor/arithmetic/multiply.py +37 -4
  421. maxframe/tensor/arithmetic/nan_to_num.py +0 -2
  422. maxframe/tensor/arithmetic/negative.py +0 -2
  423. maxframe/tensor/arithmetic/nextafter.py +0 -2
  424. maxframe/tensor/arithmetic/not_equal.py +0 -2
  425. maxframe/tensor/arithmetic/positive.py +0 -2
  426. maxframe/tensor/arithmetic/power.py +0 -2
  427. maxframe/tensor/arithmetic/rad2deg.py +0 -2
  428. maxframe/tensor/arithmetic/radians.py +0 -2
  429. maxframe/tensor/arithmetic/real.py +0 -2
  430. maxframe/tensor/arithmetic/reciprocal.py +5 -3
  431. maxframe/tensor/arithmetic/rint.py +1 -3
  432. maxframe/tensor/arithmetic/rshift.py +0 -2
  433. maxframe/tensor/arithmetic/setimag.py +0 -2
  434. maxframe/tensor/arithmetic/setreal.py +0 -2
  435. maxframe/tensor/arithmetic/sign.py +0 -2
  436. maxframe/tensor/arithmetic/signbit.py +0 -2
  437. maxframe/tensor/arithmetic/sin.py +0 -2
  438. maxframe/tensor/arithmetic/sinc.py +1 -3
  439. maxframe/tensor/arithmetic/sinh.py +0 -2
  440. maxframe/tensor/arithmetic/spacing.py +0 -2
  441. maxframe/tensor/arithmetic/sqrt.py +0 -2
  442. maxframe/tensor/arithmetic/square.py +0 -2
  443. maxframe/tensor/arithmetic/subtract.py +4 -2
  444. maxframe/tensor/arithmetic/tan.py +0 -2
  445. maxframe/tensor/arithmetic/tanh.py +0 -2
  446. maxframe/tensor/arithmetic/tests/__init__.py +0 -2
  447. maxframe/tensor/arithmetic/tests/test_arithmetic.py +43 -9
  448. maxframe/tensor/arithmetic/truediv.py +0 -2
  449. maxframe/tensor/arithmetic/trunc.py +0 -2
  450. maxframe/tensor/arithmetic/utils.py +32 -6
  451. maxframe/tensor/array_utils.py +3 -25
  452. maxframe/tensor/core.py +6 -6
  453. maxframe/tensor/datasource/__init__.py +10 -2
  454. maxframe/tensor/datasource/arange.py +0 -2
  455. maxframe/tensor/datasource/array.py +3 -22
  456. maxframe/tensor/datasource/core.py +15 -10
  457. maxframe/tensor/datasource/diag.py +140 -0
  458. maxframe/tensor/datasource/diagflat.py +69 -0
  459. maxframe/tensor/datasource/empty.py +0 -2
  460. maxframe/tensor/datasource/eye.py +95 -0
  461. maxframe/tensor/datasource/from_dataframe.py +0 -2
  462. maxframe/tensor/datasource/from_dense.py +0 -17
  463. maxframe/tensor/datasource/from_sparse.py +0 -2
  464. maxframe/tensor/datasource/full.py +0 -2
  465. maxframe/tensor/datasource/identity.py +54 -0
  466. maxframe/tensor/datasource/indices.py +115 -0
  467. maxframe/tensor/datasource/linspace.py +140 -0
  468. maxframe/tensor/datasource/meshgrid.py +135 -0
  469. maxframe/tensor/datasource/ones.py +8 -3
  470. maxframe/tensor/datasource/tests/test_datasource.py +32 -1
  471. maxframe/tensor/datasource/tri_array.py +107 -0
  472. maxframe/tensor/datasource/zeros.py +7 -3
  473. maxframe/tensor/extensions/__init__.py +31 -0
  474. maxframe/tensor/extensions/accessor.py +25 -0
  475. maxframe/tensor/extensions/apply_chunk.py +137 -0
  476. maxframe/tensor/indexing/__init__.py +1 -1
  477. maxframe/tensor/indexing/choose.py +8 -6
  478. maxframe/tensor/indexing/compress.py +0 -2
  479. maxframe/tensor/indexing/extract.py +0 -2
  480. maxframe/tensor/indexing/fill_diagonal.py +9 -6
  481. maxframe/tensor/indexing/flatnonzero.py +1 -3
  482. maxframe/tensor/indexing/getitem.py +10 -43
  483. maxframe/tensor/indexing/nonzero.py +2 -4
  484. maxframe/tensor/indexing/setitem.py +19 -9
  485. maxframe/tensor/indexing/slice.py +6 -3
  486. maxframe/tensor/indexing/take.py +0 -2
  487. maxframe/tensor/indexing/tests/__init__.py +0 -2
  488. maxframe/tensor/indexing/tests/test_indexing.py +0 -2
  489. maxframe/tensor/indexing/unravel_index.py +6 -6
  490. maxframe/tensor/lib/__init__.py +16 -0
  491. maxframe/tensor/lib/index_tricks.py +404 -0
  492. maxframe/tensor/linalg/__init__.py +36 -0
  493. maxframe/tensor/linalg/dot.py +145 -0
  494. maxframe/tensor/linalg/inner.py +36 -0
  495. maxframe/tensor/linalg/inv.py +83 -0
  496. maxframe/tensor/linalg/lu.py +115 -0
  497. maxframe/tensor/linalg/matmul.py +225 -0
  498. maxframe/tensor/linalg/qr.py +124 -0
  499. maxframe/tensor/linalg/solve_triangular.py +103 -0
  500. maxframe/tensor/linalg/svd.py +167 -0
  501. maxframe/tensor/linalg/tensordot.py +213 -0
  502. maxframe/tensor/linalg/vdot.py +73 -0
  503. maxframe/tensor/merge/__init__.py +4 -0
  504. maxframe/tensor/merge/append.py +74 -0
  505. maxframe/tensor/merge/column_stack.py +63 -0
  506. maxframe/tensor/merge/concatenate.py +3 -2
  507. maxframe/tensor/merge/dstack.py +71 -0
  508. maxframe/tensor/merge/hstack.py +70 -0
  509. maxframe/tensor/merge/stack.py +0 -2
  510. maxframe/tensor/merge/tests/test_merge.py +0 -2
  511. maxframe/tensor/misc/__init__.py +18 -5
  512. maxframe/tensor/misc/astype.py +10 -8
  513. maxframe/tensor/misc/broadcast_to.py +1 -1
  514. maxframe/tensor/misc/copy.py +64 -0
  515. maxframe/tensor/misc/diff.py +115 -0
  516. maxframe/tensor/misc/flatten.py +63 -0
  517. maxframe/tensor/misc/in1d.py +94 -0
  518. maxframe/tensor/misc/isin.py +130 -0
  519. maxframe/tensor/misc/ndim.py +53 -0
  520. maxframe/tensor/misc/ravel.py +0 -2
  521. maxframe/tensor/misc/repeat.py +129 -0
  522. maxframe/tensor/misc/searchsorted.py +147 -0
  523. maxframe/tensor/misc/setdiff1d.py +58 -0
  524. maxframe/tensor/misc/squeeze.py +117 -0
  525. maxframe/tensor/misc/swapaxes.py +113 -0
  526. maxframe/tensor/misc/tests/test_misc.py +0 -2
  527. maxframe/tensor/misc/transpose.py +8 -4
  528. maxframe/tensor/misc/trapezoid.py +123 -0
  529. maxframe/tensor/misc/unique.py +0 -1
  530. maxframe/tensor/misc/where.py +10 -8
  531. maxframe/tensor/operators.py +0 -34
  532. maxframe/tensor/random/__init__.py +3 -5
  533. maxframe/tensor/random/binomial.py +0 -2
  534. maxframe/tensor/random/bytes.py +0 -2
  535. maxframe/tensor/random/chisquare.py +0 -2
  536. maxframe/tensor/random/choice.py +9 -8
  537. maxframe/tensor/random/core.py +20 -5
  538. maxframe/tensor/random/dirichlet.py +0 -2
  539. maxframe/tensor/random/exponential.py +0 -2
  540. maxframe/tensor/random/f.py +2 -4
  541. maxframe/tensor/random/gamma.py +0 -2
  542. maxframe/tensor/random/geometric.py +0 -2
  543. maxframe/tensor/random/gumbel.py +0 -2
  544. maxframe/tensor/random/hypergeometric.py +0 -2
  545. maxframe/tensor/random/laplace.py +2 -4
  546. maxframe/tensor/random/logistic.py +0 -2
  547. maxframe/tensor/random/lognormal.py +0 -2
  548. maxframe/tensor/random/logseries.py +0 -2
  549. maxframe/tensor/random/multinomial.py +0 -2
  550. maxframe/tensor/random/multivariate_normal.py +0 -2
  551. maxframe/tensor/random/negative_binomial.py +0 -2
  552. maxframe/tensor/random/noncentral_chisquare.py +0 -2
  553. maxframe/tensor/random/noncentral_f.py +1 -3
  554. maxframe/tensor/random/normal.py +0 -2
  555. maxframe/tensor/random/pareto.py +0 -2
  556. maxframe/tensor/random/permutation.py +6 -3
  557. maxframe/tensor/random/poisson.py +0 -2
  558. maxframe/tensor/random/power.py +0 -2
  559. maxframe/tensor/random/rand.py +0 -2
  560. maxframe/tensor/random/randint.py +0 -2
  561. maxframe/tensor/random/randn.py +0 -2
  562. maxframe/tensor/random/random_integers.py +0 -2
  563. maxframe/tensor/random/random_sample.py +0 -2
  564. maxframe/tensor/random/rayleigh.py +0 -2
  565. maxframe/tensor/random/standard_cauchy.py +0 -2
  566. maxframe/tensor/random/standard_exponential.py +0 -2
  567. maxframe/tensor/random/standard_gamma.py +0 -2
  568. maxframe/tensor/random/standard_normal.py +0 -2
  569. maxframe/tensor/random/standard_t.py +0 -2
  570. maxframe/tensor/random/tests/__init__.py +0 -2
  571. maxframe/tensor/random/tests/test_random.py +0 -2
  572. maxframe/tensor/random/triangular.py +0 -2
  573. maxframe/tensor/random/uniform.py +0 -2
  574. maxframe/tensor/random/vonmises.py +0 -2
  575. maxframe/tensor/random/wald.py +0 -2
  576. maxframe/tensor/random/weibull.py +0 -2
  577. maxframe/tensor/random/zipf.py +0 -2
  578. maxframe/tensor/reduction/__init__.py +0 -2
  579. maxframe/tensor/reduction/all.py +0 -2
  580. maxframe/tensor/reduction/allclose.py +0 -2
  581. maxframe/tensor/reduction/any.py +0 -2
  582. maxframe/tensor/reduction/argmax.py +1 -3
  583. maxframe/tensor/reduction/argmin.py +1 -3
  584. maxframe/tensor/reduction/array_equal.py +0 -2
  585. maxframe/tensor/reduction/core.py +0 -2
  586. maxframe/tensor/reduction/count_nonzero.py +0 -2
  587. maxframe/tensor/reduction/cumprod.py +0 -2
  588. maxframe/tensor/reduction/cumsum.py +0 -2
  589. maxframe/tensor/reduction/max.py +0 -2
  590. maxframe/tensor/reduction/mean.py +0 -2
  591. maxframe/tensor/reduction/min.py +0 -2
  592. maxframe/tensor/reduction/nanargmax.py +0 -2
  593. maxframe/tensor/reduction/nanargmin.py +0 -2
  594. maxframe/tensor/reduction/nancumprod.py +0 -2
  595. maxframe/tensor/reduction/nancumsum.py +0 -2
  596. maxframe/tensor/reduction/nanmax.py +0 -2
  597. maxframe/tensor/reduction/nanmean.py +0 -2
  598. maxframe/tensor/reduction/nanmin.py +0 -2
  599. maxframe/tensor/reduction/nanprod.py +0 -2
  600. maxframe/tensor/reduction/nanstd.py +0 -2
  601. maxframe/tensor/reduction/nansum.py +0 -2
  602. maxframe/tensor/reduction/nanvar.py +0 -2
  603. maxframe/tensor/reduction/prod.py +0 -2
  604. maxframe/tensor/reduction/std.py +0 -2
  605. maxframe/tensor/reduction/sum.py +0 -2
  606. maxframe/tensor/reduction/tests/test_reduction.py +1 -4
  607. maxframe/tensor/reduction/var.py +0 -2
  608. maxframe/tensor/reshape/__init__.py +0 -2
  609. maxframe/tensor/reshape/reshape.py +6 -5
  610. maxframe/tensor/reshape/tests/__init__.py +0 -2
  611. maxframe/tensor/reshape/tests/test_reshape.py +0 -2
  612. maxframe/tensor/sort/__init__.py +16 -0
  613. maxframe/tensor/sort/argsort.py +150 -0
  614. maxframe/tensor/sort/sort.py +295 -0
  615. maxframe/tensor/special/__init__.py +37 -0
  616. maxframe/tensor/special/core.py +38 -0
  617. maxframe/tensor/special/misc.py +142 -0
  618. maxframe/tensor/special/statistical.py +56 -0
  619. maxframe/tensor/statistics/__init__.py +5 -0
  620. maxframe/tensor/statistics/average.py +143 -0
  621. maxframe/tensor/statistics/bincount.py +133 -0
  622. maxframe/tensor/statistics/quantile.py +10 -8
  623. maxframe/tensor/ufunc/__init__.py +0 -2
  624. maxframe/tensor/ufunc/ufunc.py +0 -2
  625. maxframe/tensor/utils.py +21 -3
  626. maxframe/tests/test_protocol.py +3 -3
  627. maxframe/tests/test_utils.py +210 -1
  628. maxframe/tests/utils.py +59 -1
  629. maxframe/udf.py +76 -6
  630. maxframe/utils.py +418 -17
  631. {maxframe-1.3.1.dist-info → maxframe-2.0.0.dist-info}/METADATA +4 -1
  632. maxframe-2.0.0.dist-info/RECORD +939 -0
  633. maxframe_client/clients/framedriver.py +19 -3
  634. maxframe_client/fetcher.py +113 -6
  635. maxframe_client/session/odps.py +173 -38
  636. maxframe_client/session/task.py +3 -1
  637. maxframe_client/tests/test_session.py +41 -5
  638. maxframe-1.3.1.dist-info/RECORD +0 -705
  639. {maxframe-1.3.1.dist-info → maxframe-2.0.0.dist-info}/WHEEL +0 -0
  640. {maxframe-1.3.1.dist-info → maxframe-2.0.0.dist-info}/top_level.txt +0 -0
@@ -12,7 +12,10 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
+ from typing import List
16
+
15
17
  from ... import opcodes
18
+ from ...core import EntityData
16
19
  from ...serialization.serializables import (
17
20
  AnyField,
18
21
  BoolField,
@@ -68,9 +71,10 @@ class DataFrameToCSV(DataFrameDataStore):
68
71
  def output_limit(self):
69
72
  return 1 if not self.output_stat else 2
70
73
 
71
- def _set_inputs(self, inputs):
72
- super()._set_inputs(inputs)
73
- self._input = self._inputs[0]
74
+ @classmethod
75
+ def _set_inputs(cls, op: "DataFrameToCSV", inputs: List[EntityData]):
76
+ super()._set_inputs(op, inputs)
77
+ op._input = op._inputs[0]
74
78
 
75
79
  def __call__(self, df):
76
80
  index_value = parse_index(df.index_value.to_pandas()[:0], df)
@@ -1,5 +1,3 @@
1
- #!/usr/bin/env python
2
- # -*- coding: utf-8 -*-
3
1
  # Copyright 1999-2025 Alibaba Group Holding Ltd.
4
2
  #
5
3
  # Licensed under the Apache License, Version 2.0 (the "License");
@@ -14,8 +12,9 @@
14
12
  # See the License for the specific language governing permissions and
15
13
  # limitations under the License.
16
14
 
15
+ import itertools
17
16
  import logging
18
- from typing import List, Optional, Union
17
+ from typing import Any, List, Optional, Union
19
18
 
20
19
  from odps import ODPS
21
20
  from odps.models import Table as ODPSTable
@@ -75,6 +74,25 @@ class DataFrameToODPSTable(DataFrameDataStore):
75
74
  columns_value=columns_value,
76
75
  )
77
76
 
77
+ @classmethod
78
+ def get_index_mapping(
79
+ cls,
80
+ index_label: Optional[List[str]],
81
+ raw_index_levels: List[Any],
82
+ ) -> List[Any]:
83
+ def_labels = index_label or itertools.repeat(None)
84
+ def_labels = itertools.chain(def_labels, itertools.repeat(None))
85
+ names = raw_index_levels
86
+ if len(names) == 1:
87
+ default_labels = ["index"]
88
+ else:
89
+ default_labels = [f"level_{i}" for i in range(len(names))]
90
+ indexes = [
91
+ def_label or name or label
92
+ for def_label, name, label in zip(def_labels, names, default_labels)
93
+ ]
94
+ return [x.lower() for x in indexes]
95
+
78
96
 
79
97
  def to_odps_table(
80
98
  df: TileableType,
@@ -161,11 +179,14 @@ def to_odps_table(
161
179
  f"index_label needs {len(df.index.nlevels)} labels "
162
180
  f"but it only have {len(index_label)}"
163
181
  )
182
+
183
+ # check if table partition columns conflicts with dataframe columns
164
184
  table_cols = set(build_dataframe_table_meta(df).table_column_names)
185
+ partition_col_set = (
186
+ set(x.lower() for x in PartitionSpec(partition).keys()) if partition else set()
187
+ )
165
188
  if partition:
166
- partition_intersect = (
167
- set(x.lower() for x in PartitionSpec(partition).keys()) & table_cols
168
- )
189
+ partition_intersect = partition_col_set & table_cols
169
190
  if partition_intersect:
170
191
  raise ValueError(
171
192
  f"Data column(s) {partition_intersect} in the dataframe"
@@ -173,6 +194,21 @@ def to_odps_table(
173
194
  " Use 'partition_col' instead."
174
195
  )
175
196
 
197
+ if index:
198
+ index_cols = set(
199
+ DataFrameToODPSTable.get_index_mapping(index_label, df.index.names)
200
+ )
201
+ index_table_intersect = index_cols & table_cols
202
+ if index_table_intersect:
203
+ raise ValueError(
204
+ f"Index column(s) {index_table_intersect} conflict with column(s) of the input dataframe."
205
+ )
206
+ index_partition_intersect = index_cols & partition_col_set
207
+ if index_partition_intersect:
208
+ raise ValueError(
209
+ f"Index column(s) {index_partition_intersect} conflict with partition column(s)."
210
+ )
211
+
176
212
  if partition_col:
177
213
  partition_diff = set(x.lower() for x in partition_col) - table_cols
178
214
  if partition_diff:
@@ -18,7 +18,12 @@ from .accessor import (
18
18
  IndexMaxFrameAccessor,
19
19
  SeriesMaxFrameAccessor,
20
20
  )
21
- from .apply_chunk import df_apply_chunk, series_apply_chunk
21
+ from .apply_chunk import (
22
+ DataFrameApplyChunk,
23
+ DataFrameApplyChunkOperator,
24
+ df_apply_chunk,
25
+ series_apply_chunk,
26
+ )
22
27
  from .flatjson import series_flatjson
23
28
  from .flatmap import df_flatmap, series_flatmap
24
29
  from .reshuffle import DataFrameReshuffle, df_reshuffle
@@ -13,7 +13,7 @@
13
13
  # limitations under the License.
14
14
 
15
15
  import functools
16
- from typing import Any, Callable, Dict, List, Tuple, Union
16
+ from typing import Any, Callable, Dict, List, MutableMapping, Tuple, Union
17
17
 
18
18
  import numpy as np
19
19
  import pandas as pd
@@ -26,22 +26,24 @@ from ...serialization.serializables import (
26
26
  Int32Field,
27
27
  TupleField,
28
28
  )
29
- from ...utils import quiet_stdio
29
+ from ...udf import BuiltinFunction, MarkedFunction
30
+ from ...utils import copy_if_possible, make_dtype, make_dtypes
30
31
  from ..core import DATAFRAME_TYPE, DataFrame, IndexValue, Series
31
32
  from ..operators import DataFrameOperator, DataFrameOperatorMixin
32
33
  from ..utils import (
34
+ InferredDataFrameMeta,
33
35
  build_df,
34
- build_series,
35
36
  copy_func_scheduling_hints,
36
- make_dtypes,
37
+ infer_dataframe_return_value,
37
38
  pack_func_args,
38
39
  parse_index,
39
40
  validate_output_types,
40
41
  )
41
42
 
42
43
 
43
- class DataFrameApplyChunkOperator(DataFrameOperator, DataFrameOperatorMixin):
44
+ class DataFrameApplyChunk(DataFrameOperator, DataFrameOperatorMixin):
44
45
  _op_type_ = opcodes.APPLY_CHUNK
46
+ _legacy_name = "DataFrameApplyChunkOperator"
45
47
 
46
48
  func = FunctionField("func")
47
49
  batch_rows = Int32Field("batch_rows", default=None)
@@ -55,7 +57,10 @@ class DataFrameApplyChunkOperator(DataFrameOperator, DataFrameOperatorMixin):
55
57
  if hasattr(self, "func"):
56
58
  copy_func_scheduling_hints(self.func, self)
57
59
 
58
- def _call_dataframe(self, df, dtypes, index_value, element_wise):
60
+ def has_custom_code(self) -> bool:
61
+ return not isinstance(self.func, BuiltinFunction)
62
+
63
+ def _call_dataframe(self, df, dtypes, dtype, name, index_value, element_wise):
59
64
  # return dataframe
60
65
  if self.output_types[0] == OutputType.dataframe:
61
66
  dtypes = make_dtypes(dtypes)
@@ -69,26 +74,13 @@ class DataFrameApplyChunkOperator(DataFrameOperator, DataFrameOperatorMixin):
69
74
  )
70
75
 
71
76
  # return series
72
- if not isinstance(dtypes, tuple):
73
- raise TypeError(
74
- "Cannot determine dtype, " "please specify `dtype` as argument"
75
- )
76
-
77
- name, dtype = dtypes
78
77
  return self.new_series(
79
78
  [df], shape=(np.nan,), name=name, dtype=dtype, index_value=index_value
80
79
  )
81
80
 
82
- def _call_series(self, series, dtypes, index_value, element_wise):
81
+ def _call_series(self, series, dtypes, dtype, name, index_value, element_wise):
83
82
  if self.output_types[0] == OutputType.series:
84
- if not isinstance(dtypes, tuple):
85
- raise TypeError(
86
- "Cannot determine dtype, " "please specify `dtype` as argument"
87
- )
88
-
89
- name, dtype = dtypes
90
83
  shape = series.shape if element_wise else (np.nan,)
91
-
92
84
  return self.new_series(
93
85
  [series],
94
86
  dtype=dtype,
@@ -110,6 +102,8 @@ class DataFrameApplyChunkOperator(DataFrameOperator, DataFrameOperatorMixin):
110
102
  self,
111
103
  df_or_series: Union[DataFrame, Series],
112
104
  dtypes: Union[Tuple[str, Any], Dict[str, Any]] = None,
105
+ dtype: Any = None,
106
+ name: Any = None,
113
107
  output_type=None,
114
108
  index=None,
115
109
  ):
@@ -123,145 +117,104 @@ class DataFrameApplyChunkOperator(DataFrameOperator, DataFrameOperatorMixin):
123
117
  return self.new_df_or_series([df_or_series])
124
118
 
125
119
  # infer return index and dtypes
126
- dtypes, index_value, elementwise = self._infer_batch_func_returns(
120
+ inferred_meta = self._infer_batch_func_returns(
127
121
  df_or_series,
128
- origin_func=self.func,
129
122
  packed_func=packed_func,
130
- given_output_type=output_type,
131
- given_dtypes=dtypes,
132
- given_index=index,
123
+ output_type=output_type,
124
+ dtypes=dtypes,
125
+ dtype=dtype,
126
+ name=name,
127
+ index=index,
133
128
  )
134
129
 
135
- if index_value is None:
136
- index_value = parse_index(
130
+ if inferred_meta.index_value is None:
131
+ inferred_meta.index_value = parse_index(
137
132
  None, (df_or_series.key, df_or_series.index_value.key, self.func)
138
133
  )
139
- for arg, desc in zip((self.output_types, dtypes), ("output_types", "dtypes")):
140
- if arg is None:
141
- raise TypeError(
142
- f"Cannot determine {desc} by calculating with enumerate data, "
143
- "please specify it as arguments"
144
- )
145
-
146
- if dtypes is None or len(dtypes) == 0:
147
- raise TypeError(
148
- "Cannot determine {dtypes} or {dtype} by calculating with enumerate data, "
149
- "please specify it as arguments"
150
- )
134
+ inferred_meta.check_absence("output_type", "dtypes", "dtype")
151
135
 
152
136
  if isinstance(df_or_series, DATAFRAME_TYPE):
153
137
  return self._call_dataframe(
154
138
  df_or_series,
155
- dtypes=dtypes,
156
- index_value=index_value,
157
- element_wise=elementwise,
139
+ dtypes=inferred_meta.dtypes,
140
+ dtype=inferred_meta.dtype,
141
+ name=inferred_meta.name,
142
+ index_value=inferred_meta.index_value,
143
+ element_wise=inferred_meta.elementwise,
158
144
  )
159
145
 
160
146
  return self._call_series(
161
147
  df_or_series,
162
- dtypes=dtypes,
163
- index_value=index_value,
164
- element_wise=elementwise,
148
+ dtypes=inferred_meta.dtypes,
149
+ dtype=inferred_meta.dtype,
150
+ name=inferred_meta.name,
151
+ index_value=inferred_meta.index_value,
152
+ element_wise=inferred_meta.elementwise,
165
153
  )
166
154
 
167
155
  def _infer_batch_func_returns(
168
156
  self,
169
157
  input_df_or_series: Union[DataFrame, Series],
170
- origin_func: Union[str, Callable, np.ufunc],
171
158
  packed_func: Union[Callable, functools.partial],
172
- given_output_type: OutputType,
173
- given_dtypes: Union[Tuple[str, Any], pd.Series, List[Any], Dict[str, Any]],
174
- given_index: Union[pd.Index, IndexValue],
175
- given_elementwise: bool = False,
159
+ output_type: OutputType,
176
160
  *args,
161
+ dtypes: Union[pd.Series, List[Any], Dict[str, Any]] = None,
162
+ dtype: Any = None,
163
+ name: Any = None,
164
+ index: Union[pd.Index, IndexValue] = None,
165
+ elementwise: bool = None,
177
166
  **kwargs,
178
- ):
179
- inferred_output_type = inferred_dtypes = inferred_index_value = None
180
- inferred_is_elementwise = False
181
-
182
- # handle numpy ufunc case
183
- if isinstance(origin_func, np.ufunc):
184
- inferred_output_type = OutputType.dataframe
185
- inferred_dtypes = None
186
- inferred_index_value = input_df_or_series.index_value
187
- inferred_is_elementwise = True
188
- elif self.output_types is not None and given_dtypes is not None:
189
- inferred_dtypes = given_dtypes
190
-
191
- # build same schema frame toto execute
192
- if isinstance(input_df_or_series, DATAFRAME_TYPE):
193
- empty_data = build_df(input_df_or_series, fill_value=1, size=1)
194
- else:
195
- empty_data = build_series(
196
- input_df_or_series, size=1, name=input_df_or_series.name
197
- )
198
-
199
- try:
200
- # execute
201
- with np.errstate(all="ignore"), quiet_stdio():
202
- infer_result = packed_func(empty_data, *args, **kwargs)
203
-
204
- # if executed successfully, get index and dtypes from returned object
205
- if inferred_index_value is None:
206
- if (
207
- infer_result is None
208
- or not hasattr(infer_result, "index")
209
- or infer_result.index is None
210
- ):
211
- inferred_index_value = parse_index(pd.RangeIndex(-1))
212
- elif infer_result.index is empty_data.index:
213
- inferred_index_value = input_df_or_series.index_value
214
- else:
215
- inferred_index_value = parse_index(infer_result.index, packed_func)
216
-
217
- if isinstance(infer_result, pd.DataFrame):
218
- if (
219
- given_output_type is not None
220
- and given_output_type != OutputType.dataframe
221
- ):
222
- raise TypeError(
223
- f'Cannot infer output_type as "series", '
224
- f'please specify `output_type` as "dataframe"'
225
- )
226
- inferred_output_type = given_output_type or OutputType.dataframe
227
- inferred_dtypes = (
228
- given_dtypes if given_dtypes is not None else infer_result.dtypes
229
- )
230
- else:
231
- if (
232
- given_output_type is not None
233
- and given_output_type == OutputType.dataframe
234
- ):
235
- raise TypeError(
236
- f'Cannot infer output_type as "dataframe", '
237
- f'please specify `output_type` as "series"'
238
- )
239
- inferred_output_type = given_output_type or OutputType.series
240
- inferred_dtypes = (infer_result.name, infer_result.dtype)
241
- except: # noqa: E722
242
- pass
167
+ ) -> InferredDataFrameMeta:
168
+ inferred_meta = infer_dataframe_return_value(
169
+ input_df_or_series,
170
+ functools.partial(packed_func, *args, **kwargs),
171
+ output_type=output_type,
172
+ dtypes=dtypes,
173
+ dtype=dtype,
174
+ name=name,
175
+ index=index,
176
+ elementwise=elementwise,
177
+ )
243
178
 
244
179
  # merge specified and inferred index, dtypes, output_type
245
180
  # elementwise used to decide shape
246
181
  self.output_types = (
247
- [inferred_output_type]
248
- if not self.output_types and inferred_output_type
182
+ [inferred_meta.output_type]
183
+ if not self.output_types and inferred_meta.output_type
249
184
  else self.output_types
250
185
  )
251
- inferred_dtypes = given_dtypes if given_dtypes is not None else inferred_dtypes
252
- if given_index is not None:
253
- inferred_index_value = (
254
- parse_index(given_index)
255
- if given_index is not input_df_or_series.index_value
186
+ if self.output_types:
187
+ inferred_meta.output_type = self.output_types[0]
188
+ inferred_meta.dtypes = dtypes if dtypes is not None else inferred_meta.dtypes
189
+ if index is not None:
190
+ inferred_meta.index_value = (
191
+ parse_index(index)
192
+ if index is not input_df_or_series.index_value
256
193
  else input_df_or_series.index_value
257
194
  )
258
- inferred_is_elementwise = given_elementwise or inferred_is_elementwise
259
- return inferred_dtypes, inferred_index_value, inferred_is_elementwise
195
+ inferred_meta.elementwise = elementwise or inferred_meta.elementwise
196
+ return inferred_meta
197
+
198
+ @classmethod
199
+ def estimate_size(
200
+ cls,
201
+ ctx: MutableMapping[str, Union[int, float]],
202
+ op: "DataFrameApplyChunk",
203
+ ) -> None:
204
+ if isinstance(op.func, MarkedFunction):
205
+ ctx[op.outputs[0].key] = float("inf")
206
+ super().estimate_size(ctx, op)
207
+
208
+
209
+ # Keep for import compatibility
210
+ DataFrameApplyChunkOperator = DataFrameApplyChunk
260
211
 
261
212
 
262
213
  def get_packed_func(df, func, *args, **kwargs) -> Any:
263
214
  stub_df = build_df(df, fill_value=1, size=1)
264
- return pack_func_args(stub_df, func, *args, **kwargs)
215
+ n_args = copy_if_possible(args)
216
+ n_kwargs = copy_if_possible(kwargs)
217
+ return pack_func_args(stub_df, func, *n_args, **n_kwargs)
265
218
 
266
219
 
267
220
  def df_apply_chunk(
@@ -477,7 +430,8 @@ def df_apply_chunk(
477
430
  elif batch_rows <= 0:
478
431
  raise ValueError("batch_rows must be greater than 0")
479
432
 
480
- dtypes = (name, dtype) if dtype is not None else dtypes
433
+ if dtype is not None:
434
+ dtype = make_dtype(dtype)
481
435
 
482
436
  output_types = kwargs.pop("output_types", None)
483
437
  object_type = kwargs.pop("object_type", None)
@@ -489,7 +443,7 @@ def df_apply_chunk(
489
443
  output_type = OutputType.df_or_series
490
444
 
491
445
  # bind args and kwargs
492
- op = DataFrameApplyChunkOperator(
446
+ op = DataFrameApplyChunk(
493
447
  func=func,
494
448
  batch_rows=batch_rows,
495
449
  output_type=output_type,
@@ -500,14 +454,17 @@ def df_apply_chunk(
500
454
  return op(
501
455
  dataframe,
502
456
  dtypes=dtypes,
457
+ dtype=dtype,
458
+ name=name,
503
459
  index=index,
460
+ output_type=output_type,
504
461
  )
505
462
 
506
463
 
507
464
  def series_apply_chunk(
508
465
  dataframe_or_series,
509
466
  func: Union[str, Callable],
510
- batch_rows,
467
+ batch_rows=None,
511
468
  dtypes=None,
512
469
  dtype=None,
513
470
  name=None,
@@ -714,11 +671,11 @@ def series_apply_chunk(
714
671
  if not isinstance(func, Callable):
715
672
  raise TypeError("function must be a callable object")
716
673
 
717
- if not isinstance(batch_rows, int):
718
- raise TypeError("batch_rows must be an integer")
719
-
720
- if batch_rows <= 0:
721
- raise ValueError("batch_rows must be greater than 0")
674
+ if batch_rows is not None:
675
+ if not isinstance(batch_rows, int):
676
+ raise TypeError("batch_rows must be an integer")
677
+ if batch_rows <= 0:
678
+ raise ValueError("batch_rows must be greater than 0")
722
679
 
723
680
  # bind args and kwargs
724
681
  output_types = kwargs.pop("output_types", None)
@@ -730,7 +687,7 @@ def series_apply_chunk(
730
687
  if skip_infer and output_type is None:
731
688
  output_type = OutputType.df_or_series
732
689
 
733
- op = DataFrameApplyChunkOperator(
690
+ op = DataFrameApplyChunk(
734
691
  func=func,
735
692
  batch_rows=batch_rows,
736
693
  output_type=output_type,
@@ -738,10 +695,13 @@ def series_apply_chunk(
738
695
  kwargs=kwargs,
739
696
  )
740
697
 
741
- dtypes = (name, dtype) if dtype is not None else dtypes
698
+ if dtype is not None:
699
+ dtype = make_dtype(dtype)
742
700
  return op(
743
701
  dataframe_or_series,
744
- dtypes=dtypes,
702
+ dtypes=make_dtypes(dtypes),
703
+ dtype=dtype,
704
+ name=name,
745
705
  output_type=output_type,
746
706
  index=index,
747
707
  )
@@ -18,9 +18,10 @@ from ... import opcodes
18
18
  from ...core import OutputType
19
19
  from ...serialization.serializables import ListField
20
20
  from ...serialization.serializables.field_type import FieldTypes
21
+ from ...utils import make_dtype, make_dtypes
21
22
  from ..core import DataFrame
22
23
  from ..operators import DataFrameOperator, DataFrameOperatorMixin
23
- from ..utils import make_dtypes, parse_index
24
+ from ..utils import parse_index
24
25
 
25
26
 
26
27
  class SeriesFlatJSONOperator(DataFrameOperator, DataFrameOperatorMixin):
@@ -36,7 +37,7 @@ class SeriesFlatJSONOperator(DataFrameOperator, DataFrameOperatorMixin):
36
37
  shape=series.shape,
37
38
  index_value=series.index_value,
38
39
  name=name,
39
- dtype=dtype,
40
+ dtype=make_dtype(dtype),
40
41
  )
41
42
  return self.new_dataframe(
42
43
  [series],
@@ -12,7 +12,7 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
- from typing import Callable
15
+ from typing import Callable, MutableMapping, Union
16
16
 
17
17
  import numpy as np
18
18
  import pandas as pd
@@ -25,14 +25,11 @@ from ...serialization.serializables import (
25
25
  FunctionField,
26
26
  TupleField,
27
27
  )
28
+ from ...udf import BuiltinFunction, MarkedFunction
29
+ from ...utils import make_dtypes
28
30
  from ..core import DataFrame
29
31
  from ..operators import DataFrameOperator, DataFrameOperatorMixin
30
- from ..utils import (
31
- copy_func_scheduling_hints,
32
- gen_unknown_index_value,
33
- make_dtypes,
34
- parse_index,
35
- )
32
+ from ..utils import copy_func_scheduling_hints, gen_unknown_index_value, parse_index
36
33
 
37
34
 
38
35
  class DataFrameFlatMapOperator(DataFrameOperator, DataFrameOperatorMixin):
@@ -48,6 +45,9 @@ class DataFrameFlatMapOperator(DataFrameOperator, DataFrameOperatorMixin):
48
45
  if hasattr(self, "func"):
49
46
  copy_func_scheduling_hints(self.func, self)
50
47
 
48
+ def has_custom_code(self) -> bool:
49
+ return not isinstance(self.func, BuiltinFunction)
50
+
51
51
  def _call_dataframe(self, df: DataFrame, dtypes: pd.Series):
52
52
  dtypes = make_dtypes(dtypes)
53
53
  index_value = gen_unknown_index_value(
@@ -101,6 +101,14 @@ class DataFrameFlatMapOperator(DataFrameOperator, DataFrameOperatorMixin):
101
101
  else:
102
102
  return self._call_series_or_index(df_or_series, dtypes=dtypes)
103
103
 
104
+ @classmethod
105
+ def estimate_size(
106
+ cls, ctx: MutableMapping[str, Union[int, float]], op: "DataFrameFlatMapOperator"
107
+ ) -> None:
108
+ if isinstance(op.func, MarkedFunction):
109
+ ctx[op.outputs[0].key] = float("inf")
110
+ super().estimate_size(ctx, op)
111
+
104
112
 
105
113
  def df_flatmap(dataframe, func: Callable, dtypes=None, raw=False, args=(), **kwargs):
106
114
  """
@@ -12,8 +12,10 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
+ from typing import MutableMapping, Union
16
+
15
17
  from ...core import OutputType, register_fetch_class
16
- from ...core.operator import Fetch, FetchMixin, FetchShuffle
18
+ from ...core.operator import Fetch, FetchMixin, FetchShuffle, Operator
17
19
  from ...serialization.serializables import FieldTypes, TupleField
18
20
  from ...utils import on_deserialize_shape, on_serialize_shape
19
21
  from ..operators import DataFrameOperatorMixin
@@ -61,6 +63,15 @@ class DataFrameFetch(Fetch, DataFrameFetchMixin):
61
63
  new_kws = self._extract_dataframe_or_series_kws(kws, **kw)
62
64
  return super()._new_tileables(inputs, kws=new_kws, **kw)
63
65
 
66
+ @classmethod
67
+ def estimate_size(
68
+ cls, ctx: MutableMapping[str, Union[int, float]], op: "Operator"
69
+ ) -> None:
70
+ # use infinity to show that the size cannot be inferred
71
+ # todo when local catalyst is implemented, and it should get the estimated size
72
+ # from the source.
73
+ ctx[op.outputs[0].key] = float("inf")
74
+
64
75
 
65
76
  class DataFrameFetchShuffle(FetchShuffle, DataFrameFetchMixin):
66
77
  # required fields
@@ -18,11 +18,14 @@ from .core import NamedAgg
18
18
 
19
19
 
20
20
  def _install():
21
+ from ...core import CachedAccessor
21
22
  from ..core import DATAFRAME_GROUPBY_TYPE, DATAFRAME_TYPE, GROUPBY_TYPE, SERIES_TYPE
22
23
  from .aggregation import agg
23
24
  from .apply import groupby_apply
25
+ from .apply_chunk import df_groupby_apply_chunk
24
26
  from .core import groupby
25
27
  from .cum import cumcount, cummax, cummin, cumprod, cumsum
28
+ from .extensions import DataFrameGroupByMaxFrameAccessor
26
29
  from .fill import bfill, ffill, fillna
27
30
  from .getitem import df_groupby_getitem
28
31
  from .head import head
@@ -75,8 +78,12 @@ def _install():
75
78
  setattr(cls, "backfill", bfill)
76
79
  setattr(cls, "fillna", fillna)
77
80
 
81
+ DataFrameGroupByMaxFrameAccessor._register("apply_chunk", df_groupby_apply_chunk)
82
+
78
83
  for cls in DATAFRAME_GROUPBY_TYPE:
79
84
  setattr(cls, "__getitem__", df_groupby_getitem)
85
+ if DataFrameGroupByMaxFrameAccessor._api_count:
86
+ cls.mf = CachedAccessor("mf", DataFrameGroupByMaxFrameAccessor)
80
87
 
81
88
 
82
89
  _install()
@@ -14,13 +14,13 @@
14
14
 
15
15
  import functools
16
16
  import logging
17
- from typing import Callable, Dict
17
+ from typing import Callable, Dict, List
18
18
 
19
19
  import numpy as np
20
20
  import pandas as pd
21
21
 
22
22
  from ... import opcodes
23
- from ...core import ENTITY_TYPE, OutputType
23
+ from ...core import ENTITY_TYPE, EntityData, OutputType
24
24
  from ...serialization.serializables import (
25
25
  AnyField,
26
26
  DictField,
@@ -155,17 +155,18 @@ class DataFrameGroupByAgg(DataFrameOperator, DataFrameOperatorMixin):
155
155
  index_levels = Int32Field("index_levels")
156
156
  size_recorder_name = StringField("size_recorder_name")
157
157
 
158
- def _set_inputs(self, inputs):
159
- super()._set_inputs(inputs)
160
- inputs_iter = iter(self._inputs[1:])
161
- if len(self._inputs) > 1:
158
+ @classmethod
159
+ def _set_inputs(cls, op: "DataFrameGroupByAgg", inputs: List[EntityData]):
160
+ super()._set_inputs(op, inputs)
161
+ inputs_iter = iter(op._inputs[1:])
162
+ if len(op._inputs) > 1:
162
163
  by = []
163
- for v in self.groupby_params["by"]:
164
+ for v in op.groupby_params["by"]:
164
165
  if isinstance(v, ENTITY_TYPE):
165
166
  by.append(next(inputs_iter))
166
167
  else:
167
168
  by.append(v)
168
- self.groupby_params["by"] = by
169
+ op.groupby_params["by"] = by
169
170
 
170
171
  def _get_inputs(self, inputs):
171
172
  if isinstance(self.groupby_params["by"], list):