maxframe 2.0.0b2__cp311-cp311-win_amd64.whl → 2.2.0__cp311-cp311-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of maxframe might be problematic. Click here for more details.

Files changed (391) hide show
  1. maxframe/__init__.py +1 -0
  2. maxframe/_utils.cp311-win_amd64.pyd +0 -0
  3. maxframe/_utils.pyx +14 -1
  4. maxframe/codegen/core.py +6 -6
  5. maxframe/codegen/spe/core.py +1 -1
  6. maxframe/codegen/spe/dataframe/__init__.py +1 -0
  7. maxframe/codegen/spe/dataframe/accessors/base.py +18 -0
  8. maxframe/codegen/spe/dataframe/accessors/dict_.py +25 -130
  9. maxframe/codegen/spe/dataframe/accessors/list_.py +12 -48
  10. maxframe/codegen/spe/dataframe/accessors/struct_.py +28 -0
  11. maxframe/codegen/spe/dataframe/arithmetic.py +7 -2
  12. maxframe/codegen/spe/dataframe/groupby.py +88 -0
  13. maxframe/codegen/spe/dataframe/indexing.py +99 -4
  14. maxframe/codegen/spe/dataframe/merge.py +34 -1
  15. maxframe/codegen/spe/dataframe/misc.py +9 -33
  16. maxframe/codegen/spe/dataframe/reduction.py +14 -9
  17. maxframe/codegen/spe/dataframe/reshape.py +46 -0
  18. maxframe/codegen/spe/dataframe/sort.py +30 -17
  19. maxframe/codegen/spe/dataframe/tests/accessors/test_dict.py +9 -15
  20. maxframe/codegen/spe/dataframe/tests/accessors/test_list.py +4 -7
  21. maxframe/codegen/spe/dataframe/tests/accessors/test_struct.py +75 -0
  22. maxframe/codegen/spe/dataframe/tests/indexing/test_iloc.py +20 -1
  23. maxframe/codegen/spe/dataframe/tests/indexing/test_loc.py +35 -0
  24. maxframe/codegen/spe/dataframe/tests/misc/test_misc.py +0 -32
  25. maxframe/codegen/spe/dataframe/tests/test_groupby.py +81 -18
  26. maxframe/codegen/spe/dataframe/tests/test_merge.py +27 -1
  27. maxframe/codegen/spe/dataframe/tests/test_reshape.py +79 -0
  28. maxframe/codegen/spe/dataframe/tests/test_sort.py +20 -0
  29. maxframe/codegen/spe/learn/contrib/tests/test_xgboost.py +2 -1
  30. maxframe/codegen/spe/learn/metrics/__init__.py +1 -1
  31. maxframe/codegen/spe/learn/metrics/_ranking.py +76 -0
  32. maxframe/codegen/spe/learn/metrics/pairwise.py +51 -0
  33. maxframe/codegen/spe/learn/metrics/tests/test_pairwise.py +36 -0
  34. maxframe/codegen/spe/learn/metrics/tests/test_ranking.py +59 -0
  35. maxframe/codegen/spe/tensor/__init__.py +3 -0
  36. maxframe/codegen/spe/tensor/fft.py +74 -0
  37. maxframe/codegen/spe/tensor/linalg.py +29 -2
  38. maxframe/codegen/spe/tensor/misc.py +79 -25
  39. maxframe/codegen/spe/tensor/spatial.py +45 -0
  40. maxframe/codegen/spe/tensor/statistics.py +44 -0
  41. maxframe/codegen/spe/tensor/tests/test_fft.py +64 -0
  42. maxframe/codegen/spe/tensor/tests/test_linalg.py +15 -1
  43. maxframe/codegen/spe/tensor/tests/test_misc.py +52 -2
  44. maxframe/codegen/spe/tensor/tests/test_spatial.py +33 -0
  45. maxframe/codegen/spe/tensor/tests/test_statistics.py +15 -1
  46. maxframe/codegen/spe/tests/test_spe_codegen.py +6 -12
  47. maxframe/codegen/spe/utils.py +2 -0
  48. maxframe/config/config.py +70 -9
  49. maxframe/config/tests/test_validators.py +13 -1
  50. maxframe/config/validators.py +49 -0
  51. maxframe/conftest.py +44 -17
  52. maxframe/core/accessor.py +2 -2
  53. maxframe/core/entity/core.py +5 -0
  54. maxframe/core/entity/tileables.py +1 -1
  55. maxframe/core/graph/core.cp311-win_amd64.pyd +0 -0
  56. maxframe/core/graph/entity.py +1 -2
  57. maxframe/core/operator/base.py +9 -2
  58. maxframe/core/operator/core.py +10 -2
  59. maxframe/core/operator/utils.py +13 -0
  60. maxframe/dataframe/__init__.py +10 -3
  61. maxframe/dataframe/accessors/__init__.py +1 -1
  62. maxframe/dataframe/accessors/compat.py +45 -0
  63. maxframe/dataframe/accessors/datetime_/__init__.py +4 -1
  64. maxframe/dataframe/accessors/dict_/contains.py +7 -16
  65. maxframe/dataframe/accessors/dict_/core.py +48 -0
  66. maxframe/dataframe/accessors/dict_/getitem.py +17 -21
  67. maxframe/dataframe/accessors/dict_/length.py +7 -16
  68. maxframe/dataframe/accessors/dict_/remove.py +6 -18
  69. maxframe/dataframe/accessors/dict_/setitem.py +8 -18
  70. maxframe/dataframe/accessors/dict_/tests/test_dict_accessor.py +62 -22
  71. maxframe/dataframe/accessors/list_/__init__.py +2 -2
  72. maxframe/dataframe/accessors/list_/core.py +48 -0
  73. maxframe/dataframe/accessors/list_/getitem.py +12 -19
  74. maxframe/dataframe/accessors/list_/length.py +7 -16
  75. maxframe/dataframe/accessors/list_/tests/test_list_accessor.py +11 -9
  76. maxframe/dataframe/accessors/string_/__init__.py +4 -1
  77. maxframe/dataframe/accessors/struct_/__init__.py +37 -0
  78. maxframe/dataframe/accessors/struct_/accessor.py +39 -0
  79. maxframe/dataframe/accessors/struct_/core.py +43 -0
  80. maxframe/dataframe/accessors/struct_/dtypes.py +53 -0
  81. maxframe/dataframe/accessors/struct_/field.py +123 -0
  82. maxframe/dataframe/accessors/struct_/tests/__init__.py +13 -0
  83. maxframe/dataframe/accessors/struct_/tests/test_struct_accessor.py +91 -0
  84. maxframe/dataframe/arithmetic/__init__.py +14 -4
  85. maxframe/dataframe/arithmetic/between.py +106 -0
  86. maxframe/dataframe/arithmetic/dot.py +237 -0
  87. maxframe/dataframe/arithmetic/{around.py → round.py} +11 -7
  88. maxframe/dataframe/core.py +63 -118
  89. maxframe/dataframe/datasource/__init__.py +18 -0
  90. maxframe/dataframe/datasource/from_dict.py +124 -0
  91. maxframe/dataframe/datasource/from_index.py +1 -1
  92. maxframe/dataframe/datasource/from_records.py +77 -0
  93. maxframe/dataframe/datasource/from_tensor.py +109 -41
  94. maxframe/dataframe/datasource/read_csv.py +2 -3
  95. maxframe/dataframe/datasource/tests/test_datasource.py +37 -0
  96. maxframe/dataframe/datastore/__init__.py +5 -1
  97. maxframe/dataframe/datastore/to_csv.py +29 -41
  98. maxframe/dataframe/datastore/to_odps.py +30 -4
  99. maxframe/dataframe/extensions/__init__.py +20 -4
  100. maxframe/dataframe/extensions/apply_chunk.py +32 -6
  101. maxframe/dataframe/extensions/cartesian_chunk.py +153 -0
  102. maxframe/dataframe/extensions/collect_kv.py +126 -0
  103. maxframe/dataframe/extensions/extract_kv.py +177 -0
  104. maxframe/dataframe/extensions/map_reduce.py +263 -0
  105. maxframe/dataframe/extensions/rebalance.py +62 -0
  106. maxframe/dataframe/extensions/tests/test_apply_chunk.py +9 -2
  107. maxframe/dataframe/extensions/tests/test_extensions.py +54 -0
  108. maxframe/dataframe/extensions/tests/test_map_reduce.py +135 -0
  109. maxframe/dataframe/groupby/__init__.py +12 -1
  110. maxframe/dataframe/groupby/aggregation.py +78 -45
  111. maxframe/dataframe/groupby/apply.py +1 -1
  112. maxframe/dataframe/groupby/apply_chunk.py +18 -2
  113. maxframe/dataframe/groupby/core.py +96 -12
  114. maxframe/dataframe/groupby/cum.py +4 -25
  115. maxframe/dataframe/groupby/expanding.py +264 -0
  116. maxframe/dataframe/groupby/fill.py +1 -1
  117. maxframe/dataframe/groupby/getitem.py +12 -5
  118. maxframe/dataframe/groupby/head.py +11 -1
  119. maxframe/dataframe/groupby/rank.py +136 -0
  120. maxframe/dataframe/groupby/rolling.py +206 -0
  121. maxframe/dataframe/groupby/shift.py +114 -0
  122. maxframe/dataframe/groupby/tests/test_groupby.py +0 -5
  123. maxframe/dataframe/indexing/__init__.py +20 -1
  124. maxframe/dataframe/indexing/droplevel.py +195 -0
  125. maxframe/dataframe/indexing/filter.py +169 -0
  126. maxframe/dataframe/indexing/get_level_values.py +76 -0
  127. maxframe/dataframe/indexing/iat.py +45 -0
  128. maxframe/dataframe/indexing/iloc.py +152 -12
  129. maxframe/dataframe/indexing/insert.py +1 -1
  130. maxframe/dataframe/indexing/loc.py +287 -7
  131. maxframe/dataframe/indexing/reindex.py +14 -5
  132. maxframe/dataframe/indexing/rename.py +6 -0
  133. maxframe/dataframe/indexing/rename_axis.py +2 -2
  134. maxframe/dataframe/indexing/reorder_levels.py +143 -0
  135. maxframe/dataframe/indexing/reset_index.py +33 -6
  136. maxframe/dataframe/indexing/sample.py +8 -0
  137. maxframe/dataframe/indexing/setitem.py +3 -3
  138. maxframe/dataframe/indexing/swaplevel.py +185 -0
  139. maxframe/dataframe/indexing/take.py +99 -0
  140. maxframe/dataframe/indexing/truncate.py +140 -0
  141. maxframe/dataframe/indexing/where.py +0 -11
  142. maxframe/dataframe/indexing/xs.py +148 -0
  143. maxframe/dataframe/merge/__init__.py +12 -1
  144. maxframe/dataframe/merge/append.py +97 -98
  145. maxframe/dataframe/merge/combine_first.py +120 -0
  146. maxframe/dataframe/merge/compare.py +387 -0
  147. maxframe/dataframe/merge/concat.py +183 -0
  148. maxframe/dataframe/merge/update.py +271 -0
  149. maxframe/dataframe/misc/__init__.py +16 -10
  150. maxframe/dataframe/misc/_duplicate.py +10 -4
  151. maxframe/dataframe/misc/apply.py +1 -1
  152. maxframe/dataframe/misc/check_unique.py +51 -0
  153. maxframe/dataframe/misc/clip.py +145 -0
  154. maxframe/dataframe/misc/describe.py +175 -9
  155. maxframe/dataframe/misc/drop_duplicates.py +2 -2
  156. maxframe/dataframe/misc/duplicated.py +2 -2
  157. maxframe/dataframe/misc/get_dummies.py +5 -1
  158. maxframe/dataframe/misc/isin.py +2 -2
  159. maxframe/dataframe/misc/map.py +94 -0
  160. maxframe/dataframe/misc/tests/test_misc.py +13 -2
  161. maxframe/dataframe/misc/to_numeric.py +3 -0
  162. maxframe/dataframe/misc/transform.py +12 -5
  163. maxframe/dataframe/misc/transpose.py +13 -1
  164. maxframe/dataframe/misc/valid_index.py +115 -0
  165. maxframe/dataframe/misc/value_counts.py +38 -4
  166. maxframe/dataframe/missing/checkna.py +13 -6
  167. maxframe/dataframe/missing/dropna.py +5 -0
  168. maxframe/dataframe/missing/fillna.py +1 -1
  169. maxframe/dataframe/missing/replace.py +7 -4
  170. maxframe/dataframe/reduction/__init__.py +29 -15
  171. maxframe/dataframe/reduction/aggregation.py +38 -9
  172. maxframe/dataframe/reduction/all.py +2 -2
  173. maxframe/dataframe/reduction/any.py +2 -2
  174. maxframe/dataframe/reduction/argmax.py +100 -0
  175. maxframe/dataframe/reduction/argmin.py +100 -0
  176. maxframe/dataframe/reduction/core.py +65 -18
  177. maxframe/dataframe/reduction/count.py +13 -9
  178. maxframe/dataframe/reduction/cov.py +166 -0
  179. maxframe/dataframe/reduction/cummax.py +2 -2
  180. maxframe/dataframe/reduction/cummin.py +2 -2
  181. maxframe/dataframe/reduction/cumprod.py +2 -2
  182. maxframe/dataframe/reduction/cumsum.py +2 -2
  183. maxframe/dataframe/reduction/custom_reduction.py +2 -2
  184. maxframe/dataframe/reduction/idxmax.py +185 -0
  185. maxframe/dataframe/reduction/idxmin.py +185 -0
  186. maxframe/dataframe/reduction/kurtosis.py +37 -30
  187. maxframe/dataframe/reduction/max.py +2 -2
  188. maxframe/dataframe/reduction/mean.py +9 -7
  189. maxframe/dataframe/reduction/median.py +2 -2
  190. maxframe/dataframe/reduction/min.py +2 -2
  191. maxframe/dataframe/reduction/nunique.py +9 -8
  192. maxframe/dataframe/reduction/prod.py +18 -13
  193. maxframe/dataframe/reduction/reduction_size.py +2 -2
  194. maxframe/dataframe/reduction/sem.py +13 -9
  195. maxframe/dataframe/reduction/skew.py +31 -27
  196. maxframe/dataframe/reduction/str_concat.py +10 -7
  197. maxframe/dataframe/reduction/sum.py +18 -14
  198. maxframe/dataframe/reduction/unique.py +20 -3
  199. maxframe/dataframe/reduction/var.py +16 -12
  200. maxframe/dataframe/reshape/__init__.py +38 -0
  201. maxframe/dataframe/{misc → reshape}/pivot.py +1 -0
  202. maxframe/dataframe/{misc → reshape}/pivot_table.py +1 -0
  203. maxframe/dataframe/reshape/unstack.py +114 -0
  204. maxframe/dataframe/sort/__init__.py +8 -0
  205. maxframe/dataframe/sort/argsort.py +62 -0
  206. maxframe/dataframe/sort/core.py +1 -0
  207. maxframe/dataframe/sort/nlargest.py +238 -0
  208. maxframe/dataframe/sort/nsmallest.py +228 -0
  209. maxframe/dataframe/statistics/__init__.py +3 -3
  210. maxframe/dataframe/statistics/corr.py +1 -0
  211. maxframe/dataframe/statistics/quantile.py +2 -2
  212. maxframe/dataframe/tests/test_typing.py +104 -0
  213. maxframe/dataframe/tests/test_utils.py +66 -2
  214. maxframe/dataframe/typing_.py +185 -0
  215. maxframe/dataframe/utils.py +95 -26
  216. maxframe/dataframe/window/aggregation.py +8 -4
  217. maxframe/dataframe/window/core.py +14 -1
  218. maxframe/dataframe/window/ewm.py +1 -3
  219. maxframe/dataframe/window/expanding.py +37 -35
  220. maxframe/dataframe/window/rolling.py +49 -39
  221. maxframe/dataframe/window/tests/test_expanding.py +1 -7
  222. maxframe/dataframe/window/tests/test_rolling.py +1 -1
  223. maxframe/env.py +7 -4
  224. maxframe/errors.py +2 -2
  225. maxframe/io/odpsio/schema.py +9 -3
  226. maxframe/io/odpsio/tableio.py +7 -2
  227. maxframe/io/odpsio/tests/test_schema.py +198 -83
  228. maxframe/learn/__init__.py +10 -2
  229. maxframe/learn/cluster/__init__.py +15 -0
  230. maxframe/learn/cluster/_kmeans.py +782 -0
  231. maxframe/learn/contrib/llm/core.py +2 -0
  232. maxframe/learn/contrib/xgboost/core.py +86 -1
  233. maxframe/learn/contrib/xgboost/train.py +5 -2
  234. maxframe/learn/core.py +66 -0
  235. maxframe/learn/linear_model/_base.py +58 -1
  236. maxframe/learn/linear_model/_lin_reg.py +1 -1
  237. maxframe/learn/metrics/__init__.py +6 -0
  238. maxframe/learn/metrics/_classification.py +145 -0
  239. maxframe/learn/metrics/_ranking.py +477 -0
  240. maxframe/learn/metrics/_scorer.py +60 -0
  241. maxframe/learn/metrics/pairwise/__init__.py +21 -0
  242. maxframe/learn/metrics/pairwise/core.py +77 -0
  243. maxframe/learn/metrics/pairwise/cosine.py +115 -0
  244. maxframe/learn/metrics/pairwise/euclidean.py +176 -0
  245. maxframe/learn/metrics/pairwise/haversine.py +96 -0
  246. maxframe/learn/metrics/pairwise/manhattan.py +80 -0
  247. maxframe/learn/metrics/pairwise/pairwise.py +127 -0
  248. maxframe/learn/metrics/pairwise/pairwise_distances_topk.py +121 -0
  249. maxframe/learn/metrics/pairwise/rbf_kernel.py +51 -0
  250. maxframe/learn/metrics/tests/__init__.py +13 -0
  251. maxframe/learn/metrics/tests/test_scorer.py +26 -0
  252. maxframe/learn/utils/__init__.py +1 -1
  253. maxframe/learn/utils/checks.py +1 -2
  254. maxframe/learn/utils/core.py +59 -0
  255. maxframe/learn/utils/extmath.py +37 -0
  256. maxframe/learn/utils/odpsio.py +193 -0
  257. maxframe/learn/utils/validation.py +2 -2
  258. maxframe/lib/compat.py +40 -0
  259. maxframe/lib/dtypes_extension/__init__.py +16 -1
  260. maxframe/lib/dtypes_extension/_fake_arrow_dtype.py +604 -0
  261. maxframe/lib/dtypes_extension/blob.py +304 -0
  262. maxframe/lib/dtypes_extension/dtypes.py +40 -0
  263. maxframe/lib/dtypes_extension/tests/test_blob.py +88 -0
  264. maxframe/lib/dtypes_extension/tests/test_dtypes.py +16 -1
  265. maxframe/lib/dtypes_extension/tests/test_fake_arrow_dtype.py +75 -0
  266. maxframe/lib/filesystem/_oss_lib/common.py +122 -50
  267. maxframe/lib/filesystem/_oss_lib/glob.py +1 -1
  268. maxframe/lib/filesystem/_oss_lib/handle.py +21 -25
  269. maxframe/lib/filesystem/base.py +1 -1
  270. maxframe/lib/filesystem/core.py +1 -1
  271. maxframe/lib/filesystem/oss.py +115 -46
  272. maxframe/lib/filesystem/tests/test_oss.py +74 -36
  273. maxframe/lib/mmh3.cp311-win_amd64.pyd +0 -0
  274. maxframe/lib/wrapped_pickle.py +10 -0
  275. maxframe/opcodes.py +33 -15
  276. maxframe/protocol.py +12 -0
  277. maxframe/serialization/__init__.py +11 -2
  278. maxframe/serialization/arrow.py +38 -13
  279. maxframe/serialization/blob.py +32 -0
  280. maxframe/serialization/core.cp311-win_amd64.pyd +0 -0
  281. maxframe/serialization/core.pyx +39 -1
  282. maxframe/serialization/exception.py +2 -4
  283. maxframe/serialization/numpy.py +11 -0
  284. maxframe/serialization/pandas.py +46 -9
  285. maxframe/serialization/serializables/core.py +2 -2
  286. maxframe/serialization/tests/test_serial.py +29 -2
  287. maxframe/tensor/__init__.py +38 -8
  288. maxframe/tensor/arithmetic/__init__.py +19 -10
  289. maxframe/tensor/arithmetic/iscomplexobj.py +53 -0
  290. maxframe/tensor/arithmetic/tests/test_arithmetic.py +6 -0
  291. maxframe/tensor/core.py +3 -2
  292. maxframe/tensor/datasource/tests/test_datasource.py +2 -1
  293. maxframe/tensor/extensions/__init__.py +2 -0
  294. maxframe/tensor/extensions/apply_chunk.py +3 -3
  295. maxframe/tensor/extensions/rebalance.py +65 -0
  296. maxframe/tensor/fft/__init__.py +32 -0
  297. maxframe/tensor/fft/core.py +168 -0
  298. maxframe/tensor/fft/fft.py +112 -0
  299. maxframe/tensor/fft/fft2.py +118 -0
  300. maxframe/tensor/fft/fftfreq.py +80 -0
  301. maxframe/tensor/fft/fftn.py +123 -0
  302. maxframe/tensor/fft/fftshift.py +79 -0
  303. maxframe/tensor/fft/hfft.py +112 -0
  304. maxframe/tensor/fft/ifft.py +114 -0
  305. maxframe/tensor/fft/ifft2.py +115 -0
  306. maxframe/tensor/fft/ifftn.py +123 -0
  307. maxframe/tensor/fft/ifftshift.py +73 -0
  308. maxframe/tensor/fft/ihfft.py +93 -0
  309. maxframe/tensor/fft/irfft.py +118 -0
  310. maxframe/tensor/fft/irfft2.py +62 -0
  311. maxframe/tensor/fft/irfftn.py +114 -0
  312. maxframe/tensor/fft/rfft.py +116 -0
  313. maxframe/tensor/fft/rfft2.py +63 -0
  314. maxframe/tensor/fft/rfftfreq.py +87 -0
  315. maxframe/tensor/fft/rfftn.py +113 -0
  316. maxframe/tensor/indexing/fill_diagonal.py +1 -7
  317. maxframe/tensor/linalg/__init__.py +7 -0
  318. maxframe/tensor/linalg/_einsumfunc.py +1025 -0
  319. maxframe/tensor/linalg/cholesky.py +117 -0
  320. maxframe/tensor/linalg/einsum.py +339 -0
  321. maxframe/tensor/linalg/lstsq.py +100 -0
  322. maxframe/tensor/linalg/matrix_norm.py +75 -0
  323. maxframe/tensor/linalg/norm.py +249 -0
  324. maxframe/tensor/linalg/solve.py +72 -0
  325. maxframe/tensor/linalg/solve_triangular.py +2 -2
  326. maxframe/tensor/linalg/vector_norm.py +113 -0
  327. maxframe/tensor/misc/__init__.py +24 -1
  328. maxframe/tensor/misc/argwhere.py +72 -0
  329. maxframe/tensor/misc/array_split.py +46 -0
  330. maxframe/tensor/misc/broadcast_arrays.py +57 -0
  331. maxframe/tensor/misc/copyto.py +130 -0
  332. maxframe/tensor/misc/delete.py +104 -0
  333. maxframe/tensor/misc/dsplit.py +68 -0
  334. maxframe/tensor/misc/ediff1d.py +74 -0
  335. maxframe/tensor/misc/expand_dims.py +85 -0
  336. maxframe/tensor/misc/flip.py +90 -0
  337. maxframe/tensor/misc/fliplr.py +64 -0
  338. maxframe/tensor/misc/flipud.py +68 -0
  339. maxframe/tensor/misc/hsplit.py +85 -0
  340. maxframe/tensor/misc/insert.py +139 -0
  341. maxframe/tensor/misc/moveaxis.py +83 -0
  342. maxframe/tensor/misc/result_type.py +88 -0
  343. maxframe/tensor/misc/roll.py +124 -0
  344. maxframe/tensor/misc/rollaxis.py +77 -0
  345. maxframe/tensor/misc/shape.py +89 -0
  346. maxframe/tensor/misc/split.py +190 -0
  347. maxframe/tensor/misc/tile.py +109 -0
  348. maxframe/tensor/misc/vsplit.py +74 -0
  349. maxframe/tensor/reduction/array_equal.py +2 -1
  350. maxframe/tensor/sort/__init__.py +2 -0
  351. maxframe/tensor/sort/argpartition.py +98 -0
  352. maxframe/tensor/sort/partition.py +228 -0
  353. maxframe/tensor/spatial/__init__.py +15 -0
  354. maxframe/tensor/spatial/distance/__init__.py +17 -0
  355. maxframe/tensor/spatial/distance/cdist.py +421 -0
  356. maxframe/tensor/spatial/distance/pdist.py +398 -0
  357. maxframe/tensor/spatial/distance/squareform.py +153 -0
  358. maxframe/tensor/special/__init__.py +159 -21
  359. maxframe/tensor/special/airy.py +55 -0
  360. maxframe/tensor/special/bessel.py +199 -0
  361. maxframe/tensor/special/core.py +65 -4
  362. maxframe/tensor/special/ellip_func_integrals.py +155 -0
  363. maxframe/tensor/special/ellip_harm.py +55 -0
  364. maxframe/tensor/special/err_fresnel.py +223 -0
  365. maxframe/tensor/special/gamma_funcs.py +303 -0
  366. maxframe/tensor/special/hypergeometric_funcs.py +69 -0
  367. maxframe/tensor/special/info_theory.py +189 -0
  368. maxframe/tensor/special/misc.py +21 -0
  369. maxframe/tensor/statistics/__init__.py +6 -0
  370. maxframe/tensor/statistics/corrcoef.py +77 -0
  371. maxframe/tensor/statistics/cov.py +222 -0
  372. maxframe/tensor/statistics/digitize.py +126 -0
  373. maxframe/tensor/statistics/histogram.py +520 -0
  374. maxframe/tensor/statistics/median.py +85 -0
  375. maxframe/tensor/statistics/ptp.py +89 -0
  376. maxframe/tensor/utils.py +3 -3
  377. maxframe/tests/test_utils.py +43 -1
  378. maxframe/tests/utils.py +0 -2
  379. maxframe/typing_.py +2 -0
  380. maxframe/udf.py +27 -2
  381. maxframe/utils.py +193 -19
  382. {maxframe-2.0.0b2.dist-info → maxframe-2.2.0.dist-info}/METADATA +3 -2
  383. {maxframe-2.0.0b2.dist-info → maxframe-2.2.0.dist-info}/RECORD +391 -236
  384. maxframe_client/fetcher.py +35 -4
  385. maxframe_client/session/odps.py +7 -2
  386. maxframe_client/tests/test_fetcher.py +76 -3
  387. maxframe_client/tests/test_session.py +4 -1
  388. /maxframe/dataframe/{misc → reshape}/melt.py +0 -0
  389. /maxframe/dataframe/{misc → reshape}/stack.py +0 -0
  390. {maxframe-2.0.0b2.dist-info → maxframe-2.2.0.dist-info}/WHEEL +0 -0
  391. {maxframe-2.0.0b2.dist-info → maxframe-2.2.0.dist-info}/top_level.txt +0 -0
@@ -28,7 +28,8 @@ import pandas as pd
28
28
  from pandas.api.types import is_string_dtype
29
29
  from pandas.core.dtypes.inference import is_dict_like, is_list_like
30
30
 
31
- from ..core import Entity, ExecutableTuple, OutputType, get_output_types
31
+ from ..core import ENTITY_TYPE, Entity, ExecutableTuple, OutputType, get_output_types
32
+ from ..lib.dtypes_extension import ExternalBlobDtype, SolidBlob
32
33
  from ..lib.mmh3 import hash as mmh_hash
33
34
  from ..udf import MarkedFunction
34
35
  from ..utils import (
@@ -40,6 +41,7 @@ from ..utils import (
40
41
  quiet_stdio,
41
42
  sbytes,
42
43
  tokenize,
44
+ validate_and_adjust_resource_ratio,
43
45
  )
44
46
 
45
47
  if TYPE_CHECKING:
@@ -57,7 +59,7 @@ cudf = lazy_import("cudf", rename="cudf")
57
59
  logger = logging.getLogger(__name__)
58
60
 
59
61
  try:
60
- from pandas import ArrowDtype
62
+ from ..lib.dtypes_extension import ArrowDtype
61
63
  except ImportError:
62
64
  ArrowDtype = None
63
65
 
@@ -456,7 +458,7 @@ def build_split_idx_to_origin_idx(splits, increase=True):
456
458
 
457
459
 
458
460
  def _generate_value(dtype, fill_value):
459
- if ArrowDtype and isinstance(dtype, pd.ArrowDtype):
461
+ if ArrowDtype and isinstance(dtype, ArrowDtype):
460
462
  return _generate_value(dtype.pyarrow_dtype, fill_value)
461
463
 
462
464
  if isinstance(dtype, pa.ListType):
@@ -470,9 +472,19 @@ def _generate_value(dtype, fill_value):
470
472
  )
471
473
  ]
472
474
 
475
+ if isinstance(dtype, pa.StructType):
476
+ result = {}
477
+ for i in range(dtype.num_fields):
478
+ field = dtype[i]
479
+ result[field.name] = _generate_value(field.type, fill_value)
480
+ return result
481
+
473
482
  if isinstance(dtype, pa.DataType):
474
483
  return _generate_value(dtype.to_pandas_dtype(), fill_value)
475
484
 
485
+ if isinstance(dtype, ExternalBlobDtype):
486
+ return SolidBlob(str(fill_value).encode())
487
+
476
488
  # special handle for datetime64 and timedelta64
477
489
  dispatch = {
478
490
  np.datetime64: pd.Timestamp,
@@ -1305,7 +1317,7 @@ def pack_func_args(df, funcs, *args, args_bind_position=1, **kwargs) -> Any:
1305
1317
  if is_dict_like(funcs):
1306
1318
  return {k: pack_func_args(df, v, *args, **kwargs) for k, v in funcs.items()}
1307
1319
 
1308
- if is_list_like(funcs):
1320
+ if is_list_like(funcs) and not isinstance(funcs, ENTITY_TYPE):
1309
1321
  return [pack_func_args(df, v, *args, **kwargs) for v in funcs]
1310
1322
 
1311
1323
  f = get_callable_by_name(df, funcs) if isinstance(funcs, str) else funcs
@@ -1406,23 +1418,54 @@ def infer_dataframe_return_value(
1406
1418
  inherit_index=False,
1407
1419
  build_kw=None,
1408
1420
  elementwise=None,
1421
+ skip_infer=False,
1409
1422
  ) -> InferredDataFrameMeta:
1410
- from .core import GROUPBY_TYPE
1423
+ from .core import GROUPBY_TYPE, INDEX_TYPE
1424
+ from .typing_ import get_function_output_meta
1425
+
1426
+ unwrapped_func = func
1427
+ if isinstance(unwrapped_func, MarkedFunction):
1428
+ unwrapped_func = unwrapped_func.func
1429
+ while True:
1430
+ if isinstance(unwrapped_func, functools.partial):
1431
+ unwrapped_func = unwrapped_func.func
1432
+ elif hasattr(unwrapped_func, "__wrapped__"):
1433
+ unwrapped_func = unwrapped_func.__wrapped__
1434
+ else:
1435
+ break
1436
+
1437
+ func_annotation_meta = get_function_output_meta(unwrapped_func, df_obj)
1438
+ func_index_value = None
1439
+ if func_annotation_meta:
1440
+ output_type = output_type or func_annotation_meta.output_type
1441
+ dtypes = dtypes if dtypes is not None else func_annotation_meta.dtypes
1442
+ dtype = dtype if dtype is not None else func_annotation_meta.dtype
1443
+ name = name if name is not None else func_annotation_meta.name
1444
+ func_index_value = func_annotation_meta.index_value
1445
+
1446
+ if skip_infer:
1447
+ if isinstance(index, INDEX_TYPE):
1448
+ ret_index_value = index.index_value
1449
+ elif index is not None:
1450
+ ret_index_value = parse_index(index, df_obj.key)
1451
+ else:
1452
+ ret_index_value = func_index_value
1453
+
1454
+ return InferredDataFrameMeta(
1455
+ output_type=output_type,
1456
+ dtypes=dtypes,
1457
+ dtype=dtype,
1458
+ name=name,
1459
+ index_value=ret_index_value,
1460
+ )
1461
+
1462
+ if isinstance(index, INDEX_TYPE):
1463
+ index = index.index_value
1411
1464
 
1412
1465
  if elementwise is None:
1413
- unwrapped_func = func
1414
- if isinstance(unwrapped_func, MarkedFunction):
1415
- unwrapped_func = unwrapped_func.func
1416
- while True:
1417
- if isinstance(unwrapped_func, functools.partial):
1418
- unwrapped_func = unwrapped_func.func
1419
- elif hasattr(unwrapped_func, "__wrapped__"):
1420
- unwrapped_func = unwrapped_func.__wrapped__
1421
- else:
1422
- break
1423
1466
  elementwise = isinstance(unwrapped_func, np.ufunc)
1424
1467
 
1425
- ret_index_value = None
1468
+ ret_index_value = func_index_value
1426
1469
  if output_type is not None and (dtypes is not None or dtype is not None):
1427
1470
  if inherit_index:
1428
1471
  ret_index_value = df_obj.index_value
@@ -1530,20 +1573,37 @@ def infer_dataframe_return_value(
1530
1573
  def copy_func_scheduling_hints(func, op: "DataFrameOperator") -> None:
1531
1574
  from ..config import options
1532
1575
 
1533
- if not isinstance(func, MarkedFunction):
1534
- return
1535
- if func.expect_engine:
1536
- op.expect_engine = func.expect_engine
1576
+ expect_engine = None
1577
+ expect_gpu = None
1578
+ default_options = options.function.default_running_options or {}
1537
1579
 
1538
- expect_resources = func.expect_resources or {}
1539
- default_function_running_options = options.function.default_running_options or {}
1580
+ if isinstance(func, MarkedFunction):
1581
+ # copy from marked function
1582
+ expect_engine = func.expect_engine
1583
+ expect_resources = func.expect_resources or {}
1584
+ expect_gpu = func.gpu
1540
1585
 
1541
- for key, value in default_function_running_options.items():
1542
- if key not in expect_resources or expect_resources.get(key) is None:
1543
- expect_resources[key] = value
1586
+ # merge default options if not set
1587
+ for key, value in default_options.items():
1588
+ if key not in expect_resources or expect_resources.get(key) is None:
1589
+ expect_resources[key] = value
1590
+ else:
1591
+ # copy from default options
1592
+ expect_resources = default_options
1593
+
1594
+ # Validate and adjust resource ratio constraints on client side
1595
+ expect_resources, _ = validate_and_adjust_resource_ratio(
1596
+ expect_resources,
1597
+ max_memory_cpu_ratio=options.function.allowed_max_memory_cpu_ratio,
1598
+ adjust=True,
1599
+ )
1544
1600
 
1545
- if func.expect_resources:
1601
+ if expect_engine:
1602
+ op.expect_engine = expect_engine
1603
+ if expect_resources:
1546
1604
  op.expect_resources = expect_resources
1605
+ if expect_gpu:
1606
+ op.gpu = expect_gpu
1547
1607
 
1548
1608
 
1549
1609
  def make_column_list(col, dtypes_or_columns, level=None):
@@ -1576,3 +1636,12 @@ def make_column_list(col, dtypes_or_columns, level=None):
1576
1636
  return idx[mask]
1577
1637
  except (IndexError, TypeError, ValueError):
1578
1638
  return col
1639
+
1640
+
1641
+ def call_groupby_with_params(df_or_series, groupby_params: dict):
1642
+ params = groupby_params.copy()
1643
+ selection = params.pop("selection", None)
1644
+ res = df_or_series.groupby(**params)
1645
+ if selection:
1646
+ res = res[selection]
1647
+ return res
@@ -19,14 +19,14 @@ import numpy as np
19
19
  import pandas as pd
20
20
 
21
21
  from ...serialization.serializables import AnyField, BoolField, Int32Field, Int64Field
22
- from ..core import DATAFRAME_TYPE
22
+ from ..core import DATAFRAME_TYPE, ENTITY_TYPE
23
23
  from ..operators import DataFrameOperator, DataFrameOperatorMixin
24
24
  from ..utils import build_df, build_empty_series, parse_index
25
25
 
26
26
 
27
27
  class BaseDataFrameExpandingAgg(DataFrameOperator, DataFrameOperatorMixin):
28
28
  min_periods = Int64Field("min_periods", default=None)
29
- axis = Int32Field("axis", default=None)
29
+ axis = Int32Field("axis", default=0)
30
30
  func = AnyField("func", default=None)
31
31
 
32
32
  # always treat count as valid. this behavior is cancelled in pandas 1.0
@@ -52,7 +52,7 @@ class BaseDataFrameExpandingAgg(DataFrameOperator, DataFrameOperatorMixin):
52
52
  index_value = parse_index(
53
53
  test_df.index, expanding.params, inp, store_data=False
54
54
  )
55
- self._append_index = test_df.columns.nlevels != empty_df.columns.nlevels
55
+ self.append_index = test_df.columns.nlevels != empty_df.columns.nlevels
56
56
  return self.new_dataframe(
57
57
  [inp],
58
58
  shape=(inp.shape[0], test_df.shape[1]),
@@ -92,5 +92,9 @@ class BaseDataFrameExpandingAgg(DataFrameOperator, DataFrameOperatorMixin):
92
92
  else:
93
93
  new_func[k] = v
94
94
  self.func = new_func
95
- elif isinstance(self.func, Iterable) and not isinstance(self.func, str):
95
+ elif (
96
+ isinstance(self.func, Iterable)
97
+ and not isinstance(self.func, ENTITY_TYPE)
98
+ and not isinstance(self.func, str)
99
+ ):
96
100
  self.func = list(self.func)
@@ -12,11 +12,24 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
- from ...serialization.serializables import KeyField, Serializable
15
+ from ...serialization.serializables import FieldTypes, KeyField, ListField, Serializable
16
16
 
17
17
 
18
18
  class Window(Serializable):
19
+ _mf_specific_fields = ["order_cols", "ascending"]
20
+
19
21
  input = KeyField("input", default=None)
22
+ order_cols = ListField("order_cols", default=None)
23
+ ascending = ListField("ascending", FieldTypes.bool, default=None)
24
+
25
+ def __init__(self, *, order_cols=None, ascending=True, **kwargs):
26
+ if order_cols and not isinstance(order_cols, list):
27
+ order_cols = [order_cols]
28
+ if not isinstance(ascending, list):
29
+ ascending = [ascending]
30
+ elif order_cols and len(order_cols) != len(ascending):
31
+ raise ValueError("order_cols and ascending must have same length")
32
+ super().__init__(order_cols=order_cols, ascending=ascending, **kwargs)
20
33
 
21
34
  @property
22
35
  def params(self):
@@ -36,8 +36,6 @@ _window_has_method = pd_release_version >= (1, 4, 0)
36
36
  class DataFrameEwmAgg(BaseDataFrameExpandingAgg):
37
37
  _op_type_ = opcodes.EWM_AGG
38
38
 
39
- _exec_cache = dict()
40
-
41
39
  alpha = Float64Field("alpha")
42
40
  adjust = BoolField("adjust")
43
41
  alpha_ignore_na = BoolField("alpha_ignore_na")
@@ -234,7 +232,7 @@ def ewm(
234
232
  raise ValueError("alpha must satisfy: 0 < alpha <= 1")
235
233
 
236
234
  if alpha == 1:
237
- return obj.expanding(min_periods=min_periods, axis=axis)
235
+ return obj.expanding(min_periods=min_periods)
238
236
 
239
237
  if _default_min_period_1:
240
238
  min_periods = min_periods or 1
@@ -12,49 +12,53 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
-
16
15
  from collections import OrderedDict
17
16
 
18
17
  from ... import opcodes
19
- from ...serialization.serializables import (
20
- BoolField,
21
- Int32Field,
22
- Int64Field,
23
- StringField,
24
- )
25
- from ...utils import pd_release_version
26
- from ..utils import validate_axis
18
+ from ...serialization.serializables import BoolField, Int64Field
27
19
  from .aggregation import BaseDataFrameExpandingAgg
28
20
  from .core import Window
29
21
 
30
- _window_has_method = pd_release_version >= (1, 3, 0)
31
- _window_has_center = pd_release_version < (2, 0, 0)
32
-
33
22
 
34
23
  class DataFrameExpandingAgg(BaseDataFrameExpandingAgg):
35
24
  _op_type_ = opcodes.EXPANDING_AGG
36
25
 
37
- center = BoolField("center", default=None)
26
+ def __init__(self, *args, **kw):
27
+ # suspend MF-specific args by now
28
+ for key in Expanding._mf_specific_fields:
29
+ kw.pop(key, None)
30
+ super().__init__(*args, **kw)
38
31
 
39
32
 
40
33
  class Expanding(Window):
34
+ _mf_specific_fields = Window._mf_specific_fields + ["shift", "reverse_range"]
35
+
41
36
  min_periods = Int64Field("min_periods")
42
- axis = Int32Field("axis")
43
- center = BoolField("center")
44
- method = StringField("method", default="single")
37
+ # MF specific argument for position shift of window
38
+ shift = Int64Field("shift", default=None)
39
+ # MF specific argument for reversed window (sort of "narrowing")
40
+ reverse_range = BoolField("reverse_range", default=False)
45
41
 
46
42
  def __call__(self, df):
47
- return df.expanding(**self.params)
43
+ try:
44
+ return df.expanding(**self.params)
45
+ except TypeError:
46
+ params = (self.params or dict()).copy()
47
+ for key in self._mf_specific_fields:
48
+ params.pop(key, None)
49
+ return df.expanding(**params)
48
50
 
49
51
  @property
50
52
  def params(self):
51
53
  p = OrderedDict()
52
54
 
53
- args = ["min_periods", "center", "axis", "method"]
54
- if not _window_has_method: # pragma: no cover
55
- args = [a for a in args if a != "method"]
56
- if not _window_has_center:
57
- args = [a for a in args if a != "center"]
55
+ args = [
56
+ "min_periods",
57
+ "shift",
58
+ "reverse_range",
59
+ "order_cols",
60
+ "ascending",
61
+ ]
58
62
 
59
63
  for k in args:
60
64
  p[k] = getattr(self, k)
@@ -73,6 +77,9 @@ class Expanding(Window):
73
77
  def sum(self):
74
78
  return self.aggregate("sum")
75
79
 
80
+ def prod(self):
81
+ return self.aggregate("prod")
82
+
76
83
  def count(self):
77
84
  return self.aggregate("count")
78
85
 
@@ -85,14 +92,14 @@ class Expanding(Window):
85
92
  def mean(self):
86
93
  return self.aggregate("mean")
87
94
 
88
- def var(self):
89
- return self.aggregate("var")
95
+ def var(self, **kwargs):
96
+ return self.aggregate("var", **kwargs)
90
97
 
91
- def std(self):
92
- return self.aggregate("std")
98
+ def std(self, **kwargs):
99
+ return self.aggregate("std", **kwargs)
93
100
 
94
101
 
95
- def expanding(obj, min_periods=1, center=False, axis=0):
102
+ def expanding(obj, min_periods=1, shift=0, reverse_range=False):
96
103
  """
97
104
  Provide expanding transformations.
98
105
 
@@ -139,11 +146,6 @@ def expanding(obj, min_periods=1, center=False, axis=0):
139
146
  3 3.0
140
147
  4 7.0
141
148
  """
142
- axis = validate_axis(axis, obj)
143
-
144
- if center:
145
- raise NotImplementedError("center == True is not supported")
146
- if axis == 1:
147
- raise NotImplementedError("axis other than 0 is not supported")
148
-
149
- return Expanding(input=obj, min_periods=min_periods, center=center, axis=axis)
149
+ return Expanding(
150
+ input=obj, min_periods=min_periods, shift=shift, reverse_range=reverse_range
151
+ )
@@ -23,38 +23,42 @@ from ...serialization.serializables import (
23
23
  AnyField,
24
24
  BoolField,
25
25
  DictField,
26
+ FieldTypes,
26
27
  Int32Field,
27
28
  Int64Field,
28
29
  KeyField,
30
+ ListField,
29
31
  StringField,
30
32
  TupleField,
31
33
  )
32
- from ...utils import pd_release_version
33
34
  from ..core import DATAFRAME_TYPE
34
35
  from ..operators import DataFrameOperator, DataFrameOperatorMixin
35
36
  from ..utils import build_empty_df, build_empty_series, parse_index, validate_axis
36
37
  from .core import Window
37
38
 
38
- _window_has_method = pd_release_version >= (1, 3, 0)
39
- _with_pandas_issue_38908 = pd_release_version == (1, 2, 0)
40
-
41
39
 
42
40
  class DataFrameRollingAgg(DataFrameOperator, DataFrameOperatorMixin):
43
41
  _op_type_ = opcodes.ROLLING_AGG
44
42
 
45
43
  input = KeyField("input")
46
- window = AnyField("window")
47
- min_periods = Int64Field("min_periods")
48
- center = BoolField("center")
49
- win_type = StringField("win_type")
50
- on = StringField("on")
51
- axis = Int32Field("axis")
52
- closed = StringField("closed")
53
- func = AnyField("func")
54
- func_args = TupleField("func_args")
55
- func_kwargs = DictField("func_kwargs")
44
+ window = AnyField("window", default=None)
45
+ min_periods = Int64Field("min_periods", default=None)
46
+ center = BoolField("center", default=None)
47
+ win_type = StringField("win_type", default=None)
48
+ on = StringField("on", default=None)
49
+ axis = Int32Field("axis", default=None)
50
+ closed = StringField("closed", default=None)
51
+ func = AnyField("func", default=None)
52
+ func_args = TupleField("func_args", default=None)
53
+ func_kwargs = DictField("func_kwargs", default=None)
54
+ # for chunks
55
+ preds = ListField("preds", FieldTypes.key, default=None)
56
+ succs = ListField("succs", FieldTypes.key, default=None)
56
57
 
57
58
  def __init__(self, output_types=None, **kw):
59
+ # suspend MF-specific args by now
60
+ for key in Rolling._mf_specific_fields:
61
+ kw.pop(key, None)
58
62
  super().__init__(_output_types=output_types, **kw)
59
63
 
60
64
  @classmethod
@@ -62,6 +66,10 @@ class DataFrameRollingAgg(DataFrameOperator, DataFrameOperatorMixin):
62
66
  super()._set_inputs(op, inputs)
63
67
  input_iter = iter(op._inputs)
64
68
  op.input = next(input_iter)
69
+ if op.preds is not None:
70
+ op.preds = [next(input_iter) for _ in op.preds]
71
+ if op.succs is not None:
72
+ op.succs = [next(input_iter) for _ in op.succs]
65
73
 
66
74
  def __call__(self, rolling):
67
75
  inp = rolling.input
@@ -74,6 +82,8 @@ class DataFrameRollingAgg(DataFrameOperator, DataFrameOperatorMixin):
74
82
  params["win_type"] = None
75
83
  if self.func != "count":
76
84
  empty_df = empty_df._get_numeric_data()
85
+ for key in Rolling._mf_specific_fields:
86
+ params.pop(key, None)
77
87
  test_df = empty_df.rolling(**params).agg(self.func)
78
88
  if self.axis == 0:
79
89
  index_value = inp.index_value
@@ -93,7 +103,10 @@ class DataFrameRollingAgg(DataFrameOperator, DataFrameOperatorMixin):
93
103
  empty_series = build_empty_series(
94
104
  inp.dtype, index=pd_index[:0], name=inp.name
95
105
  )
96
- test_obj = empty_series.rolling(**rolling.params).agg(self.func)
106
+ rolling_params = rolling.params.copy()
107
+ for k in Rolling._mf_specific_fields:
108
+ rolling_params.pop(k, None)
109
+ test_obj = empty_series.rolling(**rolling_params).agg(self.func)
97
110
  if isinstance(test_obj, pd.DataFrame):
98
111
  return self.new_dataframe(
99
112
  [inp],
@@ -113,6 +126,8 @@ class DataFrameRollingAgg(DataFrameOperator, DataFrameOperatorMixin):
113
126
 
114
127
 
115
128
  class Rolling(Window):
129
+ _mf_specific_fields = Window._mf_specific_fields + ["shift"]
130
+
116
131
  window = AnyField("window", default=None)
117
132
  min_periods = Int64Field("min_periods", default=None)
118
133
  center = BoolField("center", default=None)
@@ -120,33 +135,25 @@ class Rolling(Window):
120
135
  on = StringField("on", default=None)
121
136
  axis = Int32Field("axis", default=None)
122
137
  closed = StringField("closed", default=None)
123
- method = StringField("method", default="single")
138
+ # MF specific argument for position shift of window
139
+ shift = Int64Field("shift", default=None)
124
140
 
125
141
  @property
126
142
  def params(self):
127
143
  p = OrderedDict()
128
144
 
129
- if not _window_has_method: # pragma: no cover
130
- args = [
131
- "window",
132
- "min_periods",
133
- "center",
134
- "win_type",
135
- "axis",
136
- "on",
137
- "closed",
138
- ]
139
- else:
140
- args = [
141
- "window",
142
- "min_periods",
143
- "center",
144
- "win_type",
145
- "axis",
146
- "on",
147
- "closed",
148
- "method",
149
- ]
145
+ args = [
146
+ "window",
147
+ "min_periods",
148
+ "center",
149
+ "win_type",
150
+ "axis",
151
+ "on",
152
+ "closed",
153
+ "shift",
154
+ "order_cols",
155
+ "ascending",
156
+ ]
150
157
 
151
158
  for attr in args:
152
159
  p[attr] = getattr(self, attr)
@@ -164,8 +171,11 @@ class Rolling(Window):
164
171
  empty_obj = build_empty_series(
165
172
  self.input.dtype, index=pd_index[:0], name=self.input.name
166
173
  )
167
- pd_rolling = empty_obj.rolling(**self.params)
168
- for k in self.params:
174
+ params = (self.params or dict()).copy()
175
+ for key in self._mf_specific_fields:
176
+ params.pop(key, None)
177
+ pd_rolling = empty_obj.rolling(**params)
178
+ for k in params:
169
179
  # update value according to pandas rolling
170
180
  setattr(self, k, getattr(pd_rolling, k))
171
181
 
@@ -23,15 +23,9 @@ def test_expanding():
23
23
  df = pd.DataFrame(np.random.rand(4, 3), columns=list("abc"))
24
24
  df2 = md.DataFrame(df)
25
25
 
26
- with pytest.raises(NotImplementedError):
27
- _ = df2.expanding(3, center=True)
28
-
29
- with pytest.raises(NotImplementedError):
30
- _ = df2.expanding(3, axis=1)
31
-
32
26
  r = df2.expanding(3)
33
27
  expected = df.expanding(3)
34
- assert repr(r) == repr(expected)
28
+ assert repr(r).split(",", 1)[0] == repr(expected).split(",", 1)[0]
35
29
 
36
30
  assert "b" in dir(r)
37
31
 
@@ -27,7 +27,7 @@ def test_rolling():
27
27
  expected = df.rolling(
28
28
  3, min_periods=1, center=True, win_type="triang", closed="both"
29
29
  )
30
- assert repr(r) == repr(expected)
30
+ assert repr(r).split(",")[:4] == repr(expected).rsplit(",")[:4]
31
31
 
32
32
  assert "b" in dir(r)
33
33
 
maxframe/env.py CHANGED
@@ -17,12 +17,14 @@ MAXFRAME_NAMESPACE = "MAXFRAME_NAMESPACE"
17
17
 
18
18
  # Maxframe Service common envs
19
19
  MAXFRAME_HTTP_PORT_FILE = "MAXFRAME_PROXY_PORT_FILE"
20
- MAXFRAME_SERVICE_PORT = "MAXFRAME_SERVICE_PORT"
21
- MAXFRAME_SERVICE_PORT_RETRIES = "MAXFRAME_SERVICE_PORT_RETRIES"
20
+ MAXFRAME_INSIDE_TASK = "MAXFRAME_INSIDE_TASK"
21
+ MAXFRAME_SERVICE_BASE_URL = "MF_SERVICE_BASE_URL"
22
+ MAXFRAME_SERVICE_ALLOW_ORIGIN = "MAXFRAME_SERVICE_ALLOW_ORIGIN"
22
23
  MAXFRAME_SERVICE_LISTEN_ADDRESS = "MAXFRAME_SERVICE_LISTEN_ADDRESS"
23
24
  MAXFRAME_SERVICE_LOG_CONFIG_FILE = "MAXFRAME_SERVICE_LOG_CONFIG_FILE"
24
- MAXFRAME_SERVICE_ALLOW_ORIGIN = "MAXFRAME_SERVICE_ALLOW_ORIGIN"
25
- MAXFRAME_SERVICE_BASE_URL = "MF_SERVICE_BASE_URL"
25
+ MAXFRAME_SERVICE_PORT = "MAXFRAME_SERVICE_PORT"
26
+ MAXFRAME_SERVICE_PORT_RETRIES = "MAXFRAME_SERVICE_PORT_RETRIES"
27
+ MAXFRAME_USER_LOG_CONFIG_FILE = "MAXFRAME_USER_LOG_CONFIG_FILE"
26
28
 
27
29
  # ODPS envs
28
30
  ODPS_BEARER_TOKEN = "ODPS_BEARER_TOKEN"
@@ -31,4 +33,5 @@ ODPS_BEARER_TOKEN_TIMESTAMP_FILE = "ODPS_BEARER_TOKEN_TIMESTAMP_FILE"
31
33
  ODPS_PROJECT_NAME = "ODPS_PROJECT_NAME"
32
34
  ODPS_ENDPOINT = "ODPS_ENDPOINT"
33
35
  ODPS_TUNNEL_ENDPOINT = "ODPS_TUNNEL_ENDPOINT"
36
+ ODPS_NAMESPACE = "ODPS_NAMESPACE"
34
37
  ODPS_STORAGE_API_ENDPOINT = "ODPS_STORAGE_API_ENDPOINT"
maxframe/errors.py CHANGED
@@ -43,5 +43,5 @@ class SessionAlreadyClosedError(MaxFrameError):
43
43
 
44
44
 
45
45
  class EngineUnavailableError(MaxFrameIntentionalError):
46
- def __init__(self, engine_type: str):
47
- super().__init__(f"Engine {engine_type} is not ready")
46
+ def __init__(self, msg: str):
47
+ super().__init__(msg)
@@ -22,9 +22,10 @@ import pyarrow as pa
22
22
  from odps import types as odps_types
23
23
  from pandas.api import types as pd_types
24
24
 
25
+ from ...config import options
25
26
  from ...core import TILEABLE_TYPE, OutputType
26
27
  from ...dataframe.core import DATAFRAME_TYPE, INDEX_TYPE, SERIES_TYPE
27
- from ...lib.dtypes_extension import ArrowDtype
28
+ from ...lib.dtypes_extension import ArrowBlobType, ArrowDtype
28
29
  from ...protocol import DataFrameTableMeta
29
30
  from ...tensor.core import TENSOR_TYPE
30
31
  from ...utils import build_temp_table_name
@@ -65,7 +66,11 @@ _odps_type_to_arrow = {
65
66
  odps_types.timestamp_ntz: pa.timestamp("ns"),
66
67
  }
67
68
 
68
- _based_for_pandas_pa_types = (pa.ListType, pa.MapType)
69
+ if hasattr(odps_types, "blob"):
70
+ _arrow_to_odps_types[ArrowBlobType()] = odps_types.blob
71
+ _odps_type_to_arrow[odps_types.blob] = ArrowBlobType()
72
+
73
+ _based_for_pandas_pa_types = (pa.ListType, pa.MapType, pa.StructType)
69
74
 
70
75
 
71
76
  def is_based_for_pandas_dtype(arrow_type: pa.DataType) -> bool:
@@ -204,9 +209,10 @@ def odps_schema_to_pandas_dtypes(
204
209
  def arrow_table_to_pandas_dataframe(
205
210
  table: pa.Table, meta: DataFrameTableMeta = None
206
211
  ) -> pd.DataFrame:
212
+ use_arrow_backend = options.dataframe.dtype_backend == "pyarrow"
207
213
  df = table.to_pandas(
208
214
  types_mapper=lambda x: (
209
- ArrowDtype(x) if is_based_for_pandas_dtype(x) else None
215
+ ArrowDtype(x) if is_based_for_pandas_dtype(x) or use_arrow_backend else None
210
216
  ),
211
217
  ignore_metadata=True,
212
218
  )