maxframe 2.0.0b2__cp37-cp37m-win32.whl → 2.3.0rc1__cp37-cp37m-win32.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of maxframe might be problematic. Click here for more details.

Files changed (443) hide show
  1. maxframe/__init__.py +1 -0
  2. maxframe/_utils.cp37-win32.pyd +0 -0
  3. maxframe/_utils.pyx +14 -1
  4. maxframe/codegen/core.py +9 -8
  5. maxframe/codegen/spe/core.py +1 -1
  6. maxframe/codegen/spe/dataframe/__init__.py +1 -0
  7. maxframe/codegen/spe/dataframe/accessors/base.py +18 -0
  8. maxframe/codegen/spe/dataframe/accessors/dict_.py +25 -130
  9. maxframe/codegen/spe/dataframe/accessors/list_.py +12 -48
  10. maxframe/codegen/spe/dataframe/accessors/struct_.py +28 -0
  11. maxframe/codegen/spe/dataframe/arithmetic.py +7 -2
  12. maxframe/codegen/spe/dataframe/groupby.py +88 -0
  13. maxframe/codegen/spe/dataframe/indexing.py +99 -4
  14. maxframe/codegen/spe/dataframe/merge.py +38 -1
  15. maxframe/codegen/spe/dataframe/misc.py +11 -33
  16. maxframe/codegen/spe/dataframe/reduction.py +32 -9
  17. maxframe/codegen/spe/dataframe/reshape.py +46 -0
  18. maxframe/codegen/spe/dataframe/sort.py +39 -18
  19. maxframe/codegen/spe/dataframe/tests/accessors/test_dict.py +9 -15
  20. maxframe/codegen/spe/dataframe/tests/accessors/test_list.py +4 -7
  21. maxframe/codegen/spe/dataframe/tests/accessors/test_struct.py +75 -0
  22. maxframe/codegen/spe/dataframe/tests/indexing/test_iloc.py +20 -1
  23. maxframe/codegen/spe/dataframe/tests/indexing/test_loc.py +35 -0
  24. maxframe/codegen/spe/dataframe/tests/misc/test_misc.py +0 -32
  25. maxframe/codegen/spe/dataframe/tests/test_groupby.py +81 -18
  26. maxframe/codegen/spe/dataframe/tests/test_merge.py +27 -1
  27. maxframe/codegen/spe/dataframe/tests/test_reduction.py +13 -0
  28. maxframe/codegen/spe/dataframe/tests/test_reshape.py +79 -0
  29. maxframe/codegen/spe/dataframe/tests/test_sort.py +20 -0
  30. maxframe/codegen/spe/dataframe/tseries.py +9 -0
  31. maxframe/codegen/spe/learn/contrib/lightgbm.py +4 -3
  32. maxframe/codegen/spe/learn/contrib/tests/test_xgboost.py +2 -1
  33. maxframe/codegen/spe/learn/metrics/__init__.py +1 -1
  34. maxframe/codegen/spe/learn/metrics/_ranking.py +76 -0
  35. maxframe/codegen/spe/learn/metrics/pairwise.py +51 -0
  36. maxframe/codegen/spe/learn/metrics/tests/test_pairwise.py +36 -0
  37. maxframe/codegen/spe/learn/metrics/tests/test_ranking.py +59 -0
  38. maxframe/codegen/spe/tensor/__init__.py +3 -0
  39. maxframe/codegen/spe/tensor/datasource.py +1 -0
  40. maxframe/codegen/spe/tensor/fft.py +74 -0
  41. maxframe/codegen/spe/tensor/linalg.py +29 -2
  42. maxframe/codegen/spe/tensor/misc.py +79 -25
  43. maxframe/codegen/spe/tensor/spatial.py +45 -0
  44. maxframe/codegen/spe/tensor/statistics.py +44 -0
  45. maxframe/codegen/spe/tensor/tests/test_fft.py +64 -0
  46. maxframe/codegen/spe/tensor/tests/test_linalg.py +15 -1
  47. maxframe/codegen/spe/tensor/tests/test_misc.py +52 -2
  48. maxframe/codegen/spe/tensor/tests/test_spatial.py +33 -0
  49. maxframe/codegen/spe/tensor/tests/test_statistics.py +15 -1
  50. maxframe/codegen/spe/tests/test_spe_codegen.py +6 -12
  51. maxframe/codegen/spe/utils.py +2 -0
  52. maxframe/config/config.py +73 -9
  53. maxframe/config/tests/test_validators.py +13 -1
  54. maxframe/config/validators.py +49 -0
  55. maxframe/conftest.py +54 -17
  56. maxframe/core/accessor.py +2 -2
  57. maxframe/core/base.py +2 -1
  58. maxframe/core/entity/core.py +5 -0
  59. maxframe/core/entity/tileables.py +3 -1
  60. maxframe/core/graph/core.cp37-win32.pyd +0 -0
  61. maxframe/core/graph/entity.py +8 -3
  62. maxframe/core/mode.py +6 -1
  63. maxframe/core/operator/base.py +9 -2
  64. maxframe/core/operator/core.py +10 -2
  65. maxframe/core/operator/utils.py +13 -0
  66. maxframe/dataframe/__init__.py +12 -5
  67. maxframe/dataframe/accessors/__init__.py +1 -1
  68. maxframe/dataframe/accessors/compat.py +45 -0
  69. maxframe/dataframe/accessors/datetime_/__init__.py +4 -1
  70. maxframe/dataframe/accessors/dict_/contains.py +7 -16
  71. maxframe/dataframe/accessors/dict_/core.py +48 -0
  72. maxframe/dataframe/accessors/dict_/getitem.py +17 -21
  73. maxframe/dataframe/accessors/dict_/length.py +7 -16
  74. maxframe/dataframe/accessors/dict_/remove.py +6 -18
  75. maxframe/dataframe/accessors/dict_/setitem.py +8 -18
  76. maxframe/dataframe/accessors/dict_/tests/test_dict_accessor.py +62 -22
  77. maxframe/dataframe/accessors/list_/__init__.py +2 -2
  78. maxframe/dataframe/accessors/list_/core.py +48 -0
  79. maxframe/dataframe/accessors/list_/getitem.py +12 -19
  80. maxframe/dataframe/accessors/list_/length.py +7 -16
  81. maxframe/dataframe/accessors/list_/tests/test_list_accessor.py +11 -9
  82. maxframe/dataframe/accessors/string_/__init__.py +4 -1
  83. maxframe/dataframe/accessors/struct_/__init__.py +37 -0
  84. maxframe/dataframe/accessors/struct_/accessor.py +39 -0
  85. maxframe/dataframe/accessors/struct_/core.py +43 -0
  86. maxframe/dataframe/accessors/struct_/dtypes.py +53 -0
  87. maxframe/dataframe/accessors/struct_/field.py +123 -0
  88. maxframe/dataframe/accessors/struct_/tests/__init__.py +13 -0
  89. maxframe/dataframe/accessors/struct_/tests/test_struct_accessor.py +91 -0
  90. maxframe/dataframe/arithmetic/__init__.py +18 -4
  91. maxframe/dataframe/arithmetic/between.py +106 -0
  92. maxframe/dataframe/arithmetic/dot.py +237 -0
  93. maxframe/dataframe/arithmetic/maximum.py +33 -0
  94. maxframe/dataframe/arithmetic/minimum.py +33 -0
  95. maxframe/dataframe/arithmetic/{around.py → round.py} +11 -7
  96. maxframe/dataframe/core.py +161 -224
  97. maxframe/dataframe/datasource/__init__.py +18 -0
  98. maxframe/dataframe/datasource/core.py +6 -0
  99. maxframe/dataframe/datasource/direct.py +57 -0
  100. maxframe/dataframe/datasource/from_dict.py +124 -0
  101. maxframe/dataframe/datasource/from_index.py +1 -1
  102. maxframe/dataframe/datasource/from_records.py +77 -0
  103. maxframe/dataframe/datasource/from_tensor.py +109 -41
  104. maxframe/dataframe/datasource/read_csv.py +21 -14
  105. maxframe/dataframe/datasource/read_odps_query.py +29 -6
  106. maxframe/dataframe/datasource/read_odps_table.py +32 -10
  107. maxframe/dataframe/datasource/read_parquet.py +38 -39
  108. maxframe/dataframe/datasource/tests/test_datasource.py +37 -0
  109. maxframe/dataframe/datastore/__init__.py +11 -1
  110. maxframe/dataframe/datastore/direct.py +268 -0
  111. maxframe/dataframe/datastore/to_csv.py +29 -41
  112. maxframe/dataframe/datastore/to_odps.py +36 -4
  113. maxframe/dataframe/extensions/__init__.py +20 -4
  114. maxframe/dataframe/extensions/apply_chunk.py +32 -6
  115. maxframe/dataframe/extensions/cartesian_chunk.py +153 -0
  116. maxframe/dataframe/extensions/collect_kv.py +126 -0
  117. maxframe/dataframe/extensions/extract_kv.py +177 -0
  118. maxframe/dataframe/extensions/flatjson.py +2 -1
  119. maxframe/dataframe/extensions/map_reduce.py +263 -0
  120. maxframe/dataframe/extensions/rebalance.py +62 -0
  121. maxframe/dataframe/extensions/tests/test_apply_chunk.py +9 -2
  122. maxframe/dataframe/extensions/tests/test_extensions.py +54 -0
  123. maxframe/dataframe/extensions/tests/test_map_reduce.py +135 -0
  124. maxframe/dataframe/groupby/__init__.py +17 -2
  125. maxframe/dataframe/groupby/aggregation.py +86 -49
  126. maxframe/dataframe/groupby/apply.py +1 -1
  127. maxframe/dataframe/groupby/apply_chunk.py +19 -5
  128. maxframe/dataframe/groupby/core.py +116 -16
  129. maxframe/dataframe/groupby/cum.py +4 -25
  130. maxframe/dataframe/groupby/expanding.py +264 -0
  131. maxframe/dataframe/groupby/fill.py +1 -1
  132. maxframe/dataframe/groupby/getitem.py +12 -5
  133. maxframe/dataframe/groupby/head.py +11 -1
  134. maxframe/dataframe/groupby/rank.py +136 -0
  135. maxframe/dataframe/groupby/rolling.py +206 -0
  136. maxframe/dataframe/groupby/shift.py +114 -0
  137. maxframe/dataframe/groupby/tests/test_groupby.py +0 -5
  138. maxframe/dataframe/indexing/__init__.py +22 -2
  139. maxframe/dataframe/indexing/droplevel.py +195 -0
  140. maxframe/dataframe/indexing/filter.py +169 -0
  141. maxframe/dataframe/indexing/get_level_values.py +76 -0
  142. maxframe/dataframe/indexing/iat.py +45 -0
  143. maxframe/dataframe/indexing/iloc.py +152 -12
  144. maxframe/dataframe/indexing/insert.py +46 -18
  145. maxframe/dataframe/indexing/loc.py +287 -7
  146. maxframe/dataframe/indexing/reindex.py +14 -5
  147. maxframe/dataframe/indexing/rename.py +6 -0
  148. maxframe/dataframe/indexing/rename_axis.py +2 -2
  149. maxframe/dataframe/indexing/reorder_levels.py +143 -0
  150. maxframe/dataframe/indexing/reset_index.py +33 -6
  151. maxframe/dataframe/indexing/sample.py +8 -0
  152. maxframe/dataframe/indexing/setitem.py +3 -3
  153. maxframe/dataframe/indexing/swaplevel.py +185 -0
  154. maxframe/dataframe/indexing/take.py +99 -0
  155. maxframe/dataframe/indexing/truncate.py +140 -0
  156. maxframe/dataframe/indexing/where.py +0 -11
  157. maxframe/dataframe/indexing/xs.py +148 -0
  158. maxframe/dataframe/merge/__init__.py +15 -1
  159. maxframe/dataframe/merge/append.py +97 -98
  160. maxframe/dataframe/merge/combine.py +244 -0
  161. maxframe/dataframe/merge/combine_first.py +120 -0
  162. maxframe/dataframe/merge/compare.py +387 -0
  163. maxframe/dataframe/merge/concat.py +183 -0
  164. maxframe/dataframe/merge/update.py +271 -0
  165. maxframe/dataframe/misc/__init__.py +28 -11
  166. maxframe/dataframe/misc/_duplicate.py +10 -4
  167. maxframe/dataframe/misc/apply.py +1 -1
  168. maxframe/dataframe/misc/check_unique.py +82 -0
  169. maxframe/dataframe/misc/clip.py +145 -0
  170. maxframe/dataframe/misc/describe.py +175 -9
  171. maxframe/dataframe/misc/drop.py +31 -0
  172. maxframe/dataframe/misc/drop_duplicates.py +2 -2
  173. maxframe/dataframe/misc/duplicated.py +2 -2
  174. maxframe/dataframe/misc/get_dummies.py +5 -1
  175. maxframe/dataframe/misc/infer_dtypes.py +251 -0
  176. maxframe/dataframe/misc/isin.py +2 -2
  177. maxframe/dataframe/misc/map.py +125 -18
  178. maxframe/dataframe/misc/repeat.py +159 -0
  179. maxframe/dataframe/misc/tests/test_misc.py +48 -3
  180. maxframe/dataframe/misc/to_numeric.py +3 -0
  181. maxframe/dataframe/misc/transform.py +12 -5
  182. maxframe/dataframe/misc/transpose.py +13 -1
  183. maxframe/dataframe/misc/valid_index.py +115 -0
  184. maxframe/dataframe/misc/value_counts.py +38 -4
  185. maxframe/dataframe/missing/checkna.py +14 -6
  186. maxframe/dataframe/missing/dropna.py +5 -0
  187. maxframe/dataframe/missing/fillna.py +1 -1
  188. maxframe/dataframe/missing/replace.py +7 -4
  189. maxframe/dataframe/reduction/__init__.py +35 -16
  190. maxframe/dataframe/reduction/aggregation.py +43 -14
  191. maxframe/dataframe/reduction/all.py +2 -2
  192. maxframe/dataframe/reduction/any.py +2 -2
  193. maxframe/dataframe/reduction/argmax.py +103 -0
  194. maxframe/dataframe/reduction/argmin.py +103 -0
  195. maxframe/dataframe/reduction/core.py +80 -24
  196. maxframe/dataframe/reduction/count.py +13 -9
  197. maxframe/dataframe/reduction/cov.py +166 -0
  198. maxframe/dataframe/reduction/cummax.py +2 -2
  199. maxframe/dataframe/reduction/cummin.py +2 -2
  200. maxframe/dataframe/reduction/cumprod.py +2 -2
  201. maxframe/dataframe/reduction/cumsum.py +2 -2
  202. maxframe/dataframe/reduction/custom_reduction.py +2 -2
  203. maxframe/dataframe/reduction/idxmax.py +185 -0
  204. maxframe/dataframe/reduction/idxmin.py +185 -0
  205. maxframe/dataframe/reduction/kurtosis.py +37 -30
  206. maxframe/dataframe/reduction/max.py +2 -2
  207. maxframe/dataframe/reduction/mean.py +9 -7
  208. maxframe/dataframe/reduction/median.py +2 -2
  209. maxframe/dataframe/reduction/min.py +2 -2
  210. maxframe/dataframe/reduction/mode.py +144 -0
  211. maxframe/dataframe/reduction/nunique.py +19 -11
  212. maxframe/dataframe/reduction/prod.py +18 -13
  213. maxframe/dataframe/reduction/reduction_size.py +2 -2
  214. maxframe/dataframe/reduction/sem.py +13 -9
  215. maxframe/dataframe/reduction/skew.py +31 -27
  216. maxframe/dataframe/reduction/str_concat.py +10 -7
  217. maxframe/dataframe/reduction/sum.py +18 -14
  218. maxframe/dataframe/reduction/tests/test_reduction.py +12 -0
  219. maxframe/dataframe/reduction/unique.py +20 -3
  220. maxframe/dataframe/reduction/var.py +16 -12
  221. maxframe/dataframe/reshape/__init__.py +38 -0
  222. maxframe/dataframe/{misc → reshape}/pivot.py +1 -0
  223. maxframe/dataframe/{misc → reshape}/pivot_table.py +1 -0
  224. maxframe/dataframe/reshape/unstack.py +114 -0
  225. maxframe/dataframe/sort/__init__.py +16 -1
  226. maxframe/dataframe/sort/argsort.py +68 -0
  227. maxframe/dataframe/sort/core.py +2 -1
  228. maxframe/dataframe/sort/nlargest.py +238 -0
  229. maxframe/dataframe/sort/nsmallest.py +228 -0
  230. maxframe/dataframe/sort/rank.py +147 -0
  231. maxframe/dataframe/statistics/__init__.py +3 -3
  232. maxframe/dataframe/statistics/corr.py +1 -0
  233. maxframe/dataframe/statistics/quantile.py +2 -2
  234. maxframe/dataframe/tests/test_typing.py +104 -0
  235. maxframe/dataframe/tests/test_utils.py +66 -2
  236. maxframe/dataframe/tseries/__init__.py +19 -0
  237. maxframe/dataframe/tseries/at_time.py +61 -0
  238. maxframe/dataframe/tseries/between_time.py +122 -0
  239. maxframe/dataframe/typing_.py +185 -0
  240. maxframe/dataframe/utils.py +125 -52
  241. maxframe/dataframe/window/aggregation.py +8 -4
  242. maxframe/dataframe/window/core.py +14 -1
  243. maxframe/dataframe/window/ewm.py +1 -3
  244. maxframe/dataframe/window/expanding.py +37 -35
  245. maxframe/dataframe/window/rolling.py +49 -39
  246. maxframe/dataframe/window/tests/test_expanding.py +1 -7
  247. maxframe/dataframe/window/tests/test_rolling.py +1 -1
  248. maxframe/env.py +7 -4
  249. maxframe/errors.py +2 -2
  250. maxframe/io/odpsio/schema.py +9 -3
  251. maxframe/io/odpsio/tableio.py +7 -2
  252. maxframe/io/odpsio/tests/test_schema.py +198 -83
  253. maxframe/learn/__init__.py +10 -2
  254. maxframe/learn/cluster/__init__.py +15 -0
  255. maxframe/learn/cluster/_kmeans.py +782 -0
  256. maxframe/learn/contrib/llm/core.py +18 -7
  257. maxframe/learn/contrib/llm/deploy/__init__.py +13 -0
  258. maxframe/learn/contrib/llm/deploy/config.py +221 -0
  259. maxframe/learn/contrib/llm/deploy/core.py +247 -0
  260. maxframe/learn/contrib/llm/deploy/framework.py +35 -0
  261. maxframe/learn/contrib/llm/deploy/loader.py +360 -0
  262. maxframe/learn/contrib/llm/deploy/tests/__init__.py +13 -0
  263. maxframe/learn/contrib/llm/deploy/tests/test_register_models.py +359 -0
  264. maxframe/learn/contrib/llm/models/__init__.py +1 -0
  265. maxframe/learn/contrib/llm/models/dashscope.py +12 -6
  266. maxframe/learn/contrib/llm/models/managed.py +76 -11
  267. maxframe/learn/contrib/llm/models/openai.py +72 -0
  268. maxframe/learn/contrib/llm/tests/__init__.py +13 -0
  269. maxframe/learn/contrib/llm/tests/test_core.py +34 -0
  270. maxframe/learn/contrib/llm/tests/test_openai.py +187 -0
  271. maxframe/learn/contrib/llm/tests/test_text_gen.py +155 -0
  272. maxframe/learn/contrib/llm/text.py +348 -42
  273. maxframe/learn/contrib/models.py +4 -1
  274. maxframe/learn/contrib/xgboost/classifier.py +2 -0
  275. maxframe/learn/contrib/xgboost/core.py +113 -4
  276. maxframe/learn/contrib/xgboost/predict.py +4 -2
  277. maxframe/learn/contrib/xgboost/regressor.py +5 -0
  278. maxframe/learn/contrib/xgboost/train.py +7 -2
  279. maxframe/learn/core.py +66 -0
  280. maxframe/learn/linear_model/_base.py +58 -1
  281. maxframe/learn/linear_model/_lin_reg.py +1 -1
  282. maxframe/learn/metrics/__init__.py +6 -0
  283. maxframe/learn/metrics/_classification.py +145 -0
  284. maxframe/learn/metrics/_ranking.py +477 -0
  285. maxframe/learn/metrics/_scorer.py +60 -0
  286. maxframe/learn/metrics/pairwise/__init__.py +21 -0
  287. maxframe/learn/metrics/pairwise/core.py +77 -0
  288. maxframe/learn/metrics/pairwise/cosine.py +115 -0
  289. maxframe/learn/metrics/pairwise/euclidean.py +176 -0
  290. maxframe/learn/metrics/pairwise/haversine.py +96 -0
  291. maxframe/learn/metrics/pairwise/manhattan.py +80 -0
  292. maxframe/learn/metrics/pairwise/pairwise.py +127 -0
  293. maxframe/learn/metrics/pairwise/pairwise_distances_topk.py +121 -0
  294. maxframe/learn/metrics/pairwise/rbf_kernel.py +51 -0
  295. maxframe/learn/metrics/tests/__init__.py +13 -0
  296. maxframe/learn/metrics/tests/test_scorer.py +26 -0
  297. maxframe/learn/preprocessing/_data/min_max_scaler.py +34 -23
  298. maxframe/learn/preprocessing/_data/standard_scaler.py +34 -25
  299. maxframe/learn/utils/__init__.py +2 -1
  300. maxframe/learn/utils/checks.py +1 -2
  301. maxframe/learn/utils/core.py +59 -0
  302. maxframe/learn/utils/extmath.py +79 -9
  303. maxframe/learn/utils/odpsio.py +262 -0
  304. maxframe/learn/utils/validation.py +2 -2
  305. maxframe/lib/compat.py +40 -0
  306. maxframe/lib/dtypes_extension/__init__.py +16 -1
  307. maxframe/lib/dtypes_extension/_fake_arrow_dtype.py +604 -0
  308. maxframe/lib/dtypes_extension/blob.py +304 -0
  309. maxframe/lib/dtypes_extension/dtypes.py +40 -0
  310. maxframe/lib/dtypes_extension/tests/test_blob.py +88 -0
  311. maxframe/lib/dtypes_extension/tests/test_dtypes.py +16 -1
  312. maxframe/lib/dtypes_extension/tests/test_fake_arrow_dtype.py +75 -0
  313. maxframe/lib/filesystem/_oss_lib/common.py +124 -50
  314. maxframe/lib/filesystem/_oss_lib/glob.py +1 -1
  315. maxframe/lib/filesystem/_oss_lib/handle.py +21 -25
  316. maxframe/lib/filesystem/base.py +1 -1
  317. maxframe/lib/filesystem/core.py +1 -1
  318. maxframe/lib/filesystem/oss.py +115 -46
  319. maxframe/lib/filesystem/tests/test_oss.py +74 -36
  320. maxframe/lib/mmh3.cp37-win32.pyd +0 -0
  321. maxframe/lib/wrapped_pickle.py +10 -0
  322. maxframe/opcodes.py +41 -15
  323. maxframe/protocol.py +12 -0
  324. maxframe/remote/core.py +4 -0
  325. maxframe/serialization/__init__.py +11 -2
  326. maxframe/serialization/arrow.py +38 -13
  327. maxframe/serialization/blob.py +32 -0
  328. maxframe/serialization/core.cp37-win32.pyd +0 -0
  329. maxframe/serialization/core.pyx +39 -1
  330. maxframe/serialization/exception.py +2 -4
  331. maxframe/serialization/numpy.py +11 -0
  332. maxframe/serialization/pandas.py +46 -9
  333. maxframe/serialization/serializables/core.py +2 -2
  334. maxframe/serialization/tests/test_serial.py +31 -4
  335. maxframe/tensor/__init__.py +38 -8
  336. maxframe/tensor/arithmetic/__init__.py +19 -10
  337. maxframe/tensor/arithmetic/core.py +2 -2
  338. maxframe/tensor/arithmetic/iscomplexobj.py +53 -0
  339. maxframe/tensor/arithmetic/tests/test_arithmetic.py +6 -9
  340. maxframe/tensor/core.py +6 -2
  341. maxframe/tensor/datasource/tests/test_datasource.py +2 -1
  342. maxframe/tensor/extensions/__init__.py +2 -0
  343. maxframe/tensor/extensions/apply_chunk.py +3 -3
  344. maxframe/tensor/extensions/rebalance.py +65 -0
  345. maxframe/tensor/fft/__init__.py +32 -0
  346. maxframe/tensor/fft/core.py +168 -0
  347. maxframe/tensor/fft/fft.py +112 -0
  348. maxframe/tensor/fft/fft2.py +118 -0
  349. maxframe/tensor/fft/fftfreq.py +80 -0
  350. maxframe/tensor/fft/fftn.py +123 -0
  351. maxframe/tensor/fft/fftshift.py +79 -0
  352. maxframe/tensor/fft/hfft.py +112 -0
  353. maxframe/tensor/fft/ifft.py +114 -0
  354. maxframe/tensor/fft/ifft2.py +115 -0
  355. maxframe/tensor/fft/ifftn.py +123 -0
  356. maxframe/tensor/fft/ifftshift.py +73 -0
  357. maxframe/tensor/fft/ihfft.py +93 -0
  358. maxframe/tensor/fft/irfft.py +118 -0
  359. maxframe/tensor/fft/irfft2.py +62 -0
  360. maxframe/tensor/fft/irfftn.py +114 -0
  361. maxframe/tensor/fft/rfft.py +116 -0
  362. maxframe/tensor/fft/rfft2.py +63 -0
  363. maxframe/tensor/fft/rfftfreq.py +87 -0
  364. maxframe/tensor/fft/rfftn.py +113 -0
  365. maxframe/tensor/indexing/fill_diagonal.py +1 -7
  366. maxframe/tensor/linalg/__init__.py +7 -0
  367. maxframe/tensor/linalg/_einsumfunc.py +1025 -0
  368. maxframe/tensor/linalg/cholesky.py +117 -0
  369. maxframe/tensor/linalg/einsum.py +339 -0
  370. maxframe/tensor/linalg/lstsq.py +100 -0
  371. maxframe/tensor/linalg/matrix_norm.py +75 -0
  372. maxframe/tensor/linalg/norm.py +249 -0
  373. maxframe/tensor/linalg/solve.py +72 -0
  374. maxframe/tensor/linalg/solve_triangular.py +2 -2
  375. maxframe/tensor/linalg/vector_norm.py +113 -0
  376. maxframe/tensor/misc/__init__.py +24 -1
  377. maxframe/tensor/misc/argwhere.py +72 -0
  378. maxframe/tensor/misc/array_split.py +46 -0
  379. maxframe/tensor/misc/broadcast_arrays.py +57 -0
  380. maxframe/tensor/misc/copyto.py +130 -0
  381. maxframe/tensor/misc/delete.py +104 -0
  382. maxframe/tensor/misc/dsplit.py +68 -0
  383. maxframe/tensor/misc/ediff1d.py +74 -0
  384. maxframe/tensor/misc/expand_dims.py +85 -0
  385. maxframe/tensor/misc/flip.py +90 -0
  386. maxframe/tensor/misc/fliplr.py +64 -0
  387. maxframe/tensor/misc/flipud.py +68 -0
  388. maxframe/tensor/misc/hsplit.py +85 -0
  389. maxframe/tensor/misc/insert.py +139 -0
  390. maxframe/tensor/misc/moveaxis.py +83 -0
  391. maxframe/tensor/misc/result_type.py +88 -0
  392. maxframe/tensor/misc/roll.py +124 -0
  393. maxframe/tensor/misc/rollaxis.py +77 -0
  394. maxframe/tensor/misc/shape.py +89 -0
  395. maxframe/tensor/misc/split.py +190 -0
  396. maxframe/tensor/misc/tile.py +109 -0
  397. maxframe/tensor/misc/vsplit.py +74 -0
  398. maxframe/tensor/reduction/array_equal.py +2 -1
  399. maxframe/tensor/sort/__init__.py +2 -0
  400. maxframe/tensor/sort/argpartition.py +98 -0
  401. maxframe/tensor/sort/partition.py +228 -0
  402. maxframe/tensor/spatial/__init__.py +15 -0
  403. maxframe/tensor/spatial/distance/__init__.py +17 -0
  404. maxframe/tensor/spatial/distance/cdist.py +421 -0
  405. maxframe/tensor/spatial/distance/pdist.py +398 -0
  406. maxframe/tensor/spatial/distance/squareform.py +153 -0
  407. maxframe/tensor/special/__init__.py +159 -21
  408. maxframe/tensor/special/airy.py +55 -0
  409. maxframe/tensor/special/bessel.py +199 -0
  410. maxframe/tensor/special/core.py +65 -4
  411. maxframe/tensor/special/ellip_func_integrals.py +155 -0
  412. maxframe/tensor/special/ellip_harm.py +55 -0
  413. maxframe/tensor/special/err_fresnel.py +223 -0
  414. maxframe/tensor/special/gamma_funcs.py +303 -0
  415. maxframe/tensor/special/hypergeometric_funcs.py +69 -0
  416. maxframe/tensor/special/info_theory.py +189 -0
  417. maxframe/tensor/special/misc.py +21 -0
  418. maxframe/tensor/statistics/__init__.py +6 -0
  419. maxframe/tensor/statistics/corrcoef.py +77 -0
  420. maxframe/tensor/statistics/cov.py +222 -0
  421. maxframe/tensor/statistics/digitize.py +126 -0
  422. maxframe/tensor/statistics/histogram.py +520 -0
  423. maxframe/tensor/statistics/median.py +85 -0
  424. maxframe/tensor/statistics/ptp.py +89 -0
  425. maxframe/tensor/utils.py +3 -3
  426. maxframe/tests/test_udf.py +61 -0
  427. maxframe/tests/test_utils.py +51 -6
  428. maxframe/tests/utils.py +0 -2
  429. maxframe/typing_.py +2 -0
  430. maxframe/udf.py +130 -9
  431. maxframe/utils.py +254 -27
  432. {maxframe-2.0.0b2.dist-info → maxframe-2.3.0rc1.dist-info}/METADATA +3 -3
  433. {maxframe-2.0.0b2.dist-info → maxframe-2.3.0rc1.dist-info}/RECORD +442 -264
  434. maxframe_client/fetcher.py +35 -4
  435. maxframe_client/session/odps.py +7 -2
  436. maxframe_client/session/task.py +8 -1
  437. maxframe_client/tests/test_fetcher.py +76 -3
  438. maxframe_client/tests/test_session.py +28 -1
  439. maxframe/dataframe/arrays.py +0 -864
  440. /maxframe/dataframe/{misc → reshape}/melt.py +0 -0
  441. /maxframe/dataframe/{misc → reshape}/stack.py +0 -0
  442. {maxframe-2.0.0b2.dist-info → maxframe-2.3.0rc1.dist-info}/WHEEL +0 -0
  443. {maxframe-2.0.0b2.dist-info → maxframe-2.3.0rc1.dist-info}/top_level.txt +0 -0
@@ -25,21 +25,22 @@ from typing import TYPE_CHECKING, Any, Callable, List, Optional
25
25
 
26
26
  import numpy as np
27
27
  import pandas as pd
28
- from pandas.api.types import is_string_dtype
29
28
  from pandas.core.dtypes.inference import is_dict_like, is_list_like
30
29
 
31
- from ..core import Entity, ExecutableTuple, OutputType, get_output_types
30
+ from ..config.validators import dtype_backend_validator
31
+ from ..core import ENTITY_TYPE, Entity, ExecutableTuple, OutputType, get_output_types
32
+ from ..lib.dtypes_extension import ExternalBlobDtype, SolidBlob
32
33
  from ..lib.mmh3 import hash as mmh_hash
33
34
  from ..udf import MarkedFunction
34
35
  from ..utils import (
35
36
  ModulePlaceholder,
36
37
  is_full_slice,
37
38
  lazy_import,
38
- make_dtype,
39
39
  make_dtypes,
40
40
  quiet_stdio,
41
41
  sbytes,
42
42
  tokenize,
43
+ validate_and_adjust_resource_ratio,
43
44
  )
44
45
 
45
46
  if TYPE_CHECKING:
@@ -57,7 +58,7 @@ cudf = lazy_import("cudf", rename="cudf")
57
58
  logger = logging.getLogger(__name__)
58
59
 
59
60
  try:
60
- from pandas import ArrowDtype
61
+ from ..lib.dtypes_extension import ArrowDtype
61
62
  except ImportError:
62
63
  ArrowDtype = None
63
64
 
@@ -103,9 +104,9 @@ def hash_dtypes(dtypes, size):
103
104
  return [dtypes[index] for index in hashed_indexes]
104
105
 
105
106
 
106
- def sort_dataframe_inplace(df, *axis):
107
+ def sort_dataframe_inplace(df, *axis, **kw):
107
108
  for ax in axis:
108
- df.sort_index(axis=ax, inplace=True)
109
+ df.sort_index(axis=ax, inplace=True, **kw)
109
110
  return df
110
111
 
111
112
 
@@ -456,7 +457,7 @@ def build_split_idx_to_origin_idx(splits, increase=True):
456
457
 
457
458
 
458
459
  def _generate_value(dtype, fill_value):
459
- if ArrowDtype and isinstance(dtype, pd.ArrowDtype):
460
+ if ArrowDtype and isinstance(dtype, ArrowDtype):
460
461
  return _generate_value(dtype.pyarrow_dtype, fill_value)
461
462
 
462
463
  if isinstance(dtype, pa.ListType):
@@ -470,9 +471,19 @@ def _generate_value(dtype, fill_value):
470
471
  )
471
472
  ]
472
473
 
474
+ if isinstance(dtype, pa.StructType):
475
+ result = {}
476
+ for i in range(dtype.num_fields):
477
+ field = dtype[i]
478
+ result[field.name] = _generate_value(field.type, fill_value)
479
+ return result
480
+
473
481
  if isinstance(dtype, pa.DataType):
474
482
  return _generate_value(dtype.to_pandas_dtype(), fill_value)
475
483
 
484
+ if isinstance(dtype, ExternalBlobDtype):
485
+ return SolidBlob(str(fill_value).encode())
486
+
476
487
  # special handle for datetime64 and timedelta64
477
488
  dispatch = {
478
489
  np.datetime64: pd.Timestamp,
@@ -1012,27 +1023,21 @@ def create_sa_connection(con, **kwargs):
1012
1023
  engine.dispose()
1013
1024
 
1014
1025
 
1015
- def to_arrow_dtypes(dtypes, test_df=None):
1016
- from .arrays import ArrowStringDtype
1026
+ def to_arrow_dtypes(dtypes):
1027
+ from ..io.odpsio.schema import pandas_dtypes_to_arrow_schema
1017
1028
 
1029
+ arrow_schema = pandas_dtypes_to_arrow_schema(dtypes)
1018
1030
  new_dtypes = dtypes.copy()
1019
1031
  for i in range(len(dtypes)):
1020
- dtype = dtypes.iloc[i]
1021
- if is_string_dtype(dtype):
1022
- if test_df is not None:
1023
- series = test_df.iloc[:, i]
1024
- # check value
1025
- non_na_series = series[series.notna()]
1026
- if len(non_na_series) > 0:
1027
- first_value = non_na_series.iloc[0]
1028
- if isinstance(first_value, str):
1029
- new_dtypes.iloc[i] = ArrowStringDtype()
1030
- else: # pragma: no cover
1031
- # empty, set arrow string dtype
1032
- new_dtypes.iloc[i] = ArrowStringDtype()
1033
- else:
1034
- # empty, set arrow string dtype
1035
- new_dtypes.iloc[i] = ArrowStringDtype()
1032
+ arrow_type = arrow_schema.types[i]
1033
+ dt = dtypes.iloc[i]
1034
+ if isinstance(dt, pd.api.extensions.ExtensionDtype):
1035
+ # make existing extension dtype consistent
1036
+ new_dtypes.iloc[i] = dt
1037
+ elif arrow_type == pa.string():
1038
+ new_dtypes.iloc[i] = pd.StringDtype("pyarrow")
1039
+ else:
1040
+ new_dtypes.iloc[i] = ArrowDtype(arrow_type)
1036
1041
  return new_dtypes
1037
1042
 
1038
1043
 
@@ -1305,7 +1310,7 @@ def pack_func_args(df, funcs, *args, args_bind_position=1, **kwargs) -> Any:
1305
1310
  if is_dict_like(funcs):
1306
1311
  return {k: pack_func_args(df, v, *args, **kwargs) for k, v in funcs.items()}
1307
1312
 
1308
- if is_list_like(funcs):
1313
+ if is_list_like(funcs) and not isinstance(funcs, ENTITY_TYPE):
1309
1314
  return [pack_func_args(df, v, *args, **kwargs) for v in funcs]
1310
1315
 
1311
1316
  f = get_callable_by_name(df, funcs) if isinstance(funcs, str) else funcs
@@ -1406,23 +1411,54 @@ def infer_dataframe_return_value(
1406
1411
  inherit_index=False,
1407
1412
  build_kw=None,
1408
1413
  elementwise=None,
1414
+ skip_infer=False,
1409
1415
  ) -> InferredDataFrameMeta:
1410
- from .core import GROUPBY_TYPE
1416
+ from .core import GROUPBY_TYPE, INDEX_TYPE
1417
+ from .typing_ import get_function_output_meta
1418
+
1419
+ unwrapped_func = func
1420
+ if isinstance(unwrapped_func, MarkedFunction):
1421
+ unwrapped_func = unwrapped_func.func
1422
+ while True:
1423
+ if isinstance(unwrapped_func, functools.partial):
1424
+ unwrapped_func = unwrapped_func.func
1425
+ elif hasattr(unwrapped_func, "__wrapped__"):
1426
+ unwrapped_func = unwrapped_func.__wrapped__
1427
+ else:
1428
+ break
1429
+
1430
+ func_annotation_meta = get_function_output_meta(unwrapped_func, df_obj)
1431
+ func_index_value = None
1432
+ if func_annotation_meta:
1433
+ output_type = output_type or func_annotation_meta.output_type
1434
+ dtypes = dtypes if dtypes is not None else func_annotation_meta.dtypes
1435
+ dtype = dtype if dtype is not None else func_annotation_meta.dtype
1436
+ name = name if name is not None else func_annotation_meta.name
1437
+ func_index_value = func_annotation_meta.index_value
1438
+
1439
+ if skip_infer:
1440
+ if isinstance(index, INDEX_TYPE):
1441
+ ret_index_value = index.index_value
1442
+ elif index is not None:
1443
+ ret_index_value = parse_index(index, df_obj.key)
1444
+ else:
1445
+ ret_index_value = func_index_value
1446
+
1447
+ return InferredDataFrameMeta(
1448
+ output_type=output_type,
1449
+ dtypes=dtypes,
1450
+ dtype=dtype,
1451
+ name=name,
1452
+ index_value=ret_index_value,
1453
+ )
1454
+
1455
+ if isinstance(index, INDEX_TYPE):
1456
+ index = index.index_value
1411
1457
 
1412
1458
  if elementwise is None:
1413
- unwrapped_func = func
1414
- if isinstance(unwrapped_func, MarkedFunction):
1415
- unwrapped_func = unwrapped_func.func
1416
- while True:
1417
- if isinstance(unwrapped_func, functools.partial):
1418
- unwrapped_func = unwrapped_func.func
1419
- elif hasattr(unwrapped_func, "__wrapped__"):
1420
- unwrapped_func = unwrapped_func.__wrapped__
1421
- else:
1422
- break
1423
1459
  elementwise = isinstance(unwrapped_func, np.ufunc)
1424
1460
 
1425
- ret_index_value = None
1461
+ ret_index_value = func_index_value
1426
1462
  if output_type is not None and (dtypes is not None or dtype is not None):
1427
1463
  if inherit_index:
1428
1464
  ret_index_value = df_obj.index_value
@@ -1439,7 +1475,8 @@ def infer_dataframe_return_value(
1439
1475
  elementwise=elementwise or False,
1440
1476
  )
1441
1477
 
1442
- ret_output_type = ret_dtypes = None
1478
+ ret_output_type = None
1479
+ ret_dtypes = dtypes
1443
1480
  maybe_agg = False
1444
1481
  build_kw = build_kw or {}
1445
1482
  obj_key = df_obj.key
@@ -1486,7 +1523,8 @@ def infer_dataframe_return_value(
1486
1523
  f'please specify `output_type` as "dataframe"'
1487
1524
  )
1488
1525
  ret_output_type = ret_output_type or OutputType.dataframe
1489
- ret_dtypes = ret_dtypes or infer_df_obj.dtypes
1526
+ if ret_dtypes is None:
1527
+ ret_dtypes = infer_df_obj.dtypes
1490
1528
  else:
1491
1529
  if output_type is not None and output_type == OutputType.dataframe:
1492
1530
  raise TypeError(
@@ -1506,7 +1544,7 @@ def infer_dataframe_return_value(
1506
1544
  return InferredDataFrameMeta(
1507
1545
  ret_output_type,
1508
1546
  make_dtypes(ret_dtypes),
1509
- make_dtype(dtype),
1547
+ make_dtypes(dtype),
1510
1548
  name,
1511
1549
  ret_index_value,
1512
1550
  maybe_agg,
@@ -1519,7 +1557,7 @@ def infer_dataframe_return_value(
1519
1557
  return InferredDataFrameMeta(
1520
1558
  output_type,
1521
1559
  make_dtypes(dtypes),
1522
- make_dtype(dtype),
1560
+ make_dtypes(dtype),
1523
1561
  name,
1524
1562
  ret_index_value,
1525
1563
  maybe_agg,
@@ -1530,20 +1568,37 @@ def infer_dataframe_return_value(
1530
1568
  def copy_func_scheduling_hints(func, op: "DataFrameOperator") -> None:
1531
1569
  from ..config import options
1532
1570
 
1533
- if not isinstance(func, MarkedFunction):
1534
- return
1535
- if func.expect_engine:
1536
- op.expect_engine = func.expect_engine
1571
+ expect_engine = None
1572
+ expect_gpu = None
1573
+ default_options = options.function.default_running_options or {}
1537
1574
 
1538
- expect_resources = func.expect_resources or {}
1539
- default_function_running_options = options.function.default_running_options or {}
1575
+ if isinstance(func, MarkedFunction):
1576
+ # copy from marked function
1577
+ expect_engine = func.expect_engine
1578
+ expect_resources = func.expect_resources or {}
1579
+ expect_gpu = func.gpu
1540
1580
 
1541
- for key, value in default_function_running_options.items():
1542
- if key not in expect_resources or expect_resources.get(key) is None:
1543
- expect_resources[key] = value
1581
+ # merge default options if not set
1582
+ for key, value in default_options.items():
1583
+ if key not in expect_resources or expect_resources.get(key) is None:
1584
+ expect_resources[key] = value
1585
+ else:
1586
+ # copy from default options
1587
+ expect_resources = default_options
1588
+
1589
+ # Validate and adjust resource ratio constraints on client side
1590
+ expect_resources, _ = validate_and_adjust_resource_ratio(
1591
+ expect_resources,
1592
+ max_memory_cpu_ratio=options.function.allowed_max_memory_cpu_ratio,
1593
+ adjust=True,
1594
+ )
1544
1595
 
1545
- if func.expect_resources:
1596
+ if expect_engine:
1597
+ op.expect_engine = expect_engine
1598
+ if expect_resources:
1546
1599
  op.expect_resources = expect_resources
1600
+ if expect_gpu:
1601
+ op.gpu = expect_gpu
1547
1602
 
1548
1603
 
1549
1604
  def make_column_list(col, dtypes_or_columns, level=None):
@@ -1576,3 +1631,21 @@ def make_column_list(col, dtypes_or_columns, level=None):
1576
1631
  return idx[mask]
1577
1632
  except (IndexError, TypeError, ValueError):
1578
1633
  return col
1634
+
1635
+
1636
+ def call_groupby_with_params(df_or_series, groupby_params: dict):
1637
+ params = groupby_params.copy()
1638
+ selection = params.pop("selection", None)
1639
+ res = df_or_series.groupby(**params)
1640
+ if selection:
1641
+ res = res[selection]
1642
+ return res
1643
+
1644
+
1645
+ def validate_dtype_backend(value):
1646
+ if isinstance(value, bool):
1647
+ # compatibility for legacy use_arrow_dtype property
1648
+ value = "pyarrow" if value else "numpy"
1649
+ if not dtype_backend_validator(value):
1650
+ raise ValueError(f"Invalid dtype_backend: {value}")
1651
+ return value
@@ -19,14 +19,14 @@ import numpy as np
19
19
  import pandas as pd
20
20
 
21
21
  from ...serialization.serializables import AnyField, BoolField, Int32Field, Int64Field
22
- from ..core import DATAFRAME_TYPE
22
+ from ..core import DATAFRAME_TYPE, ENTITY_TYPE
23
23
  from ..operators import DataFrameOperator, DataFrameOperatorMixin
24
24
  from ..utils import build_df, build_empty_series, parse_index
25
25
 
26
26
 
27
27
  class BaseDataFrameExpandingAgg(DataFrameOperator, DataFrameOperatorMixin):
28
28
  min_periods = Int64Field("min_periods", default=None)
29
- axis = Int32Field("axis", default=None)
29
+ axis = Int32Field("axis", default=0)
30
30
  func = AnyField("func", default=None)
31
31
 
32
32
  # always treat count as valid. this behavior is cancelled in pandas 1.0
@@ -52,7 +52,7 @@ class BaseDataFrameExpandingAgg(DataFrameOperator, DataFrameOperatorMixin):
52
52
  index_value = parse_index(
53
53
  test_df.index, expanding.params, inp, store_data=False
54
54
  )
55
- self._append_index = test_df.columns.nlevels != empty_df.columns.nlevels
55
+ self.append_index = test_df.columns.nlevels != empty_df.columns.nlevels
56
56
  return self.new_dataframe(
57
57
  [inp],
58
58
  shape=(inp.shape[0], test_df.shape[1]),
@@ -92,5 +92,9 @@ class BaseDataFrameExpandingAgg(DataFrameOperator, DataFrameOperatorMixin):
92
92
  else:
93
93
  new_func[k] = v
94
94
  self.func = new_func
95
- elif isinstance(self.func, Iterable) and not isinstance(self.func, str):
95
+ elif (
96
+ isinstance(self.func, Iterable)
97
+ and not isinstance(self.func, ENTITY_TYPE)
98
+ and not isinstance(self.func, str)
99
+ ):
96
100
  self.func = list(self.func)
@@ -12,11 +12,24 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
- from ...serialization.serializables import KeyField, Serializable
15
+ from ...serialization.serializables import FieldTypes, KeyField, ListField, Serializable
16
16
 
17
17
 
18
18
  class Window(Serializable):
19
+ _mf_specific_fields = ["order_cols", "ascending"]
20
+
19
21
  input = KeyField("input", default=None)
22
+ order_cols = ListField("order_cols", default=None)
23
+ ascending = ListField("ascending", FieldTypes.bool, default=None)
24
+
25
+ def __init__(self, *, order_cols=None, ascending=True, **kwargs):
26
+ if order_cols and not isinstance(order_cols, list):
27
+ order_cols = [order_cols]
28
+ if not isinstance(ascending, list):
29
+ ascending = [ascending]
30
+ elif order_cols and len(order_cols) != len(ascending):
31
+ raise ValueError("order_cols and ascending must have same length")
32
+ super().__init__(order_cols=order_cols, ascending=ascending, **kwargs)
20
33
 
21
34
  @property
22
35
  def params(self):
@@ -36,8 +36,6 @@ _window_has_method = pd_release_version >= (1, 4, 0)
36
36
  class DataFrameEwmAgg(BaseDataFrameExpandingAgg):
37
37
  _op_type_ = opcodes.EWM_AGG
38
38
 
39
- _exec_cache = dict()
40
-
41
39
  alpha = Float64Field("alpha")
42
40
  adjust = BoolField("adjust")
43
41
  alpha_ignore_na = BoolField("alpha_ignore_na")
@@ -234,7 +232,7 @@ def ewm(
234
232
  raise ValueError("alpha must satisfy: 0 < alpha <= 1")
235
233
 
236
234
  if alpha == 1:
237
- return obj.expanding(min_periods=min_periods, axis=axis)
235
+ return obj.expanding(min_periods=min_periods)
238
236
 
239
237
  if _default_min_period_1:
240
238
  min_periods = min_periods or 1
@@ -12,49 +12,53 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
-
16
15
  from collections import OrderedDict
17
16
 
18
17
  from ... import opcodes
19
- from ...serialization.serializables import (
20
- BoolField,
21
- Int32Field,
22
- Int64Field,
23
- StringField,
24
- )
25
- from ...utils import pd_release_version
26
- from ..utils import validate_axis
18
+ from ...serialization.serializables import BoolField, Int64Field
27
19
  from .aggregation import BaseDataFrameExpandingAgg
28
20
  from .core import Window
29
21
 
30
- _window_has_method = pd_release_version >= (1, 3, 0)
31
- _window_has_center = pd_release_version < (2, 0, 0)
32
-
33
22
 
34
23
  class DataFrameExpandingAgg(BaseDataFrameExpandingAgg):
35
24
  _op_type_ = opcodes.EXPANDING_AGG
36
25
 
37
- center = BoolField("center", default=None)
26
+ def __init__(self, *args, **kw):
27
+ # suspend MF-specific args by now
28
+ for key in Expanding._mf_specific_fields:
29
+ kw.pop(key, None)
30
+ super().__init__(*args, **kw)
38
31
 
39
32
 
40
33
  class Expanding(Window):
34
+ _mf_specific_fields = Window._mf_specific_fields + ["shift", "reverse_range"]
35
+
41
36
  min_periods = Int64Field("min_periods")
42
- axis = Int32Field("axis")
43
- center = BoolField("center")
44
- method = StringField("method", default="single")
37
+ # MF specific argument for position shift of window
38
+ shift = Int64Field("shift", default=None)
39
+ # MF specific argument for reversed window (sort of "narrowing")
40
+ reverse_range = BoolField("reverse_range", default=False)
45
41
 
46
42
  def __call__(self, df):
47
- return df.expanding(**self.params)
43
+ try:
44
+ return df.expanding(**self.params)
45
+ except TypeError:
46
+ params = (self.params or dict()).copy()
47
+ for key in self._mf_specific_fields:
48
+ params.pop(key, None)
49
+ return df.expanding(**params)
48
50
 
49
51
  @property
50
52
  def params(self):
51
53
  p = OrderedDict()
52
54
 
53
- args = ["min_periods", "center", "axis", "method"]
54
- if not _window_has_method: # pragma: no cover
55
- args = [a for a in args if a != "method"]
56
- if not _window_has_center:
57
- args = [a for a in args if a != "center"]
55
+ args = [
56
+ "min_periods",
57
+ "shift",
58
+ "reverse_range",
59
+ "order_cols",
60
+ "ascending",
61
+ ]
58
62
 
59
63
  for k in args:
60
64
  p[k] = getattr(self, k)
@@ -73,6 +77,9 @@ class Expanding(Window):
73
77
  def sum(self):
74
78
  return self.aggregate("sum")
75
79
 
80
+ def prod(self):
81
+ return self.aggregate("prod")
82
+
76
83
  def count(self):
77
84
  return self.aggregate("count")
78
85
 
@@ -85,14 +92,14 @@ class Expanding(Window):
85
92
  def mean(self):
86
93
  return self.aggregate("mean")
87
94
 
88
- def var(self):
89
- return self.aggregate("var")
95
+ def var(self, **kwargs):
96
+ return self.aggregate("var", **kwargs)
90
97
 
91
- def std(self):
92
- return self.aggregate("std")
98
+ def std(self, **kwargs):
99
+ return self.aggregate("std", **kwargs)
93
100
 
94
101
 
95
- def expanding(obj, min_periods=1, center=False, axis=0):
102
+ def expanding(obj, min_periods=1, shift=0, reverse_range=False):
96
103
  """
97
104
  Provide expanding transformations.
98
105
 
@@ -139,11 +146,6 @@ def expanding(obj, min_periods=1, center=False, axis=0):
139
146
  3 3.0
140
147
  4 7.0
141
148
  """
142
- axis = validate_axis(axis, obj)
143
-
144
- if center:
145
- raise NotImplementedError("center == True is not supported")
146
- if axis == 1:
147
- raise NotImplementedError("axis other than 0 is not supported")
148
-
149
- return Expanding(input=obj, min_periods=min_periods, center=center, axis=axis)
149
+ return Expanding(
150
+ input=obj, min_periods=min_periods, shift=shift, reverse_range=reverse_range
151
+ )
@@ -23,38 +23,42 @@ from ...serialization.serializables import (
23
23
  AnyField,
24
24
  BoolField,
25
25
  DictField,
26
+ FieldTypes,
26
27
  Int32Field,
27
28
  Int64Field,
28
29
  KeyField,
30
+ ListField,
29
31
  StringField,
30
32
  TupleField,
31
33
  )
32
- from ...utils import pd_release_version
33
34
  from ..core import DATAFRAME_TYPE
34
35
  from ..operators import DataFrameOperator, DataFrameOperatorMixin
35
36
  from ..utils import build_empty_df, build_empty_series, parse_index, validate_axis
36
37
  from .core import Window
37
38
 
38
- _window_has_method = pd_release_version >= (1, 3, 0)
39
- _with_pandas_issue_38908 = pd_release_version == (1, 2, 0)
40
-
41
39
 
42
40
  class DataFrameRollingAgg(DataFrameOperator, DataFrameOperatorMixin):
43
41
  _op_type_ = opcodes.ROLLING_AGG
44
42
 
45
43
  input = KeyField("input")
46
- window = AnyField("window")
47
- min_periods = Int64Field("min_periods")
48
- center = BoolField("center")
49
- win_type = StringField("win_type")
50
- on = StringField("on")
51
- axis = Int32Field("axis")
52
- closed = StringField("closed")
53
- func = AnyField("func")
54
- func_args = TupleField("func_args")
55
- func_kwargs = DictField("func_kwargs")
44
+ window = AnyField("window", default=None)
45
+ min_periods = Int64Field("min_periods", default=None)
46
+ center = BoolField("center", default=None)
47
+ win_type = StringField("win_type", default=None)
48
+ on = StringField("on", default=None)
49
+ axis = Int32Field("axis", default=None)
50
+ closed = StringField("closed", default=None)
51
+ func = AnyField("func", default=None)
52
+ func_args = TupleField("func_args", default=None)
53
+ func_kwargs = DictField("func_kwargs", default=None)
54
+ # for chunks
55
+ preds = ListField("preds", FieldTypes.key, default=None)
56
+ succs = ListField("succs", FieldTypes.key, default=None)
56
57
 
57
58
  def __init__(self, output_types=None, **kw):
59
+ # suspend MF-specific args by now
60
+ for key in Rolling._mf_specific_fields:
61
+ kw.pop(key, None)
58
62
  super().__init__(_output_types=output_types, **kw)
59
63
 
60
64
  @classmethod
@@ -62,6 +66,10 @@ class DataFrameRollingAgg(DataFrameOperator, DataFrameOperatorMixin):
62
66
  super()._set_inputs(op, inputs)
63
67
  input_iter = iter(op._inputs)
64
68
  op.input = next(input_iter)
69
+ if op.preds is not None:
70
+ op.preds = [next(input_iter) for _ in op.preds]
71
+ if op.succs is not None:
72
+ op.succs = [next(input_iter) for _ in op.succs]
65
73
 
66
74
  def __call__(self, rolling):
67
75
  inp = rolling.input
@@ -74,6 +82,8 @@ class DataFrameRollingAgg(DataFrameOperator, DataFrameOperatorMixin):
74
82
  params["win_type"] = None
75
83
  if self.func != "count":
76
84
  empty_df = empty_df._get_numeric_data()
85
+ for key in Rolling._mf_specific_fields:
86
+ params.pop(key, None)
77
87
  test_df = empty_df.rolling(**params).agg(self.func)
78
88
  if self.axis == 0:
79
89
  index_value = inp.index_value
@@ -93,7 +103,10 @@ class DataFrameRollingAgg(DataFrameOperator, DataFrameOperatorMixin):
93
103
  empty_series = build_empty_series(
94
104
  inp.dtype, index=pd_index[:0], name=inp.name
95
105
  )
96
- test_obj = empty_series.rolling(**rolling.params).agg(self.func)
106
+ rolling_params = rolling.params.copy()
107
+ for k in Rolling._mf_specific_fields:
108
+ rolling_params.pop(k, None)
109
+ test_obj = empty_series.rolling(**rolling_params).agg(self.func)
97
110
  if isinstance(test_obj, pd.DataFrame):
98
111
  return self.new_dataframe(
99
112
  [inp],
@@ -113,6 +126,8 @@ class DataFrameRollingAgg(DataFrameOperator, DataFrameOperatorMixin):
113
126
 
114
127
 
115
128
  class Rolling(Window):
129
+ _mf_specific_fields = Window._mf_specific_fields + ["shift"]
130
+
116
131
  window = AnyField("window", default=None)
117
132
  min_periods = Int64Field("min_periods", default=None)
118
133
  center = BoolField("center", default=None)
@@ -120,33 +135,25 @@ class Rolling(Window):
120
135
  on = StringField("on", default=None)
121
136
  axis = Int32Field("axis", default=None)
122
137
  closed = StringField("closed", default=None)
123
- method = StringField("method", default="single")
138
+ # MF specific argument for position shift of window
139
+ shift = Int64Field("shift", default=None)
124
140
 
125
141
  @property
126
142
  def params(self):
127
143
  p = OrderedDict()
128
144
 
129
- if not _window_has_method: # pragma: no cover
130
- args = [
131
- "window",
132
- "min_periods",
133
- "center",
134
- "win_type",
135
- "axis",
136
- "on",
137
- "closed",
138
- ]
139
- else:
140
- args = [
141
- "window",
142
- "min_periods",
143
- "center",
144
- "win_type",
145
- "axis",
146
- "on",
147
- "closed",
148
- "method",
149
- ]
145
+ args = [
146
+ "window",
147
+ "min_periods",
148
+ "center",
149
+ "win_type",
150
+ "axis",
151
+ "on",
152
+ "closed",
153
+ "shift",
154
+ "order_cols",
155
+ "ascending",
156
+ ]
150
157
 
151
158
  for attr in args:
152
159
  p[attr] = getattr(self, attr)
@@ -164,8 +171,11 @@ class Rolling(Window):
164
171
  empty_obj = build_empty_series(
165
172
  self.input.dtype, index=pd_index[:0], name=self.input.name
166
173
  )
167
- pd_rolling = empty_obj.rolling(**self.params)
168
- for k in self.params:
174
+ params = (self.params or dict()).copy()
175
+ for key in self._mf_specific_fields:
176
+ params.pop(key, None)
177
+ pd_rolling = empty_obj.rolling(**params)
178
+ for k in params:
169
179
  # update value according to pandas rolling
170
180
  setattr(self, k, getattr(pd_rolling, k))
171
181
 
@@ -23,15 +23,9 @@ def test_expanding():
23
23
  df = pd.DataFrame(np.random.rand(4, 3), columns=list("abc"))
24
24
  df2 = md.DataFrame(df)
25
25
 
26
- with pytest.raises(NotImplementedError):
27
- _ = df2.expanding(3, center=True)
28
-
29
- with pytest.raises(NotImplementedError):
30
- _ = df2.expanding(3, axis=1)
31
-
32
26
  r = df2.expanding(3)
33
27
  expected = df.expanding(3)
34
- assert repr(r) == repr(expected)
28
+ assert repr(r).split(",", 1)[0] == repr(expected).split(",", 1)[0]
35
29
 
36
30
  assert "b" in dir(r)
37
31