maxframe 2.0.0b2__cp37-cp37m-win32.whl → 2.3.0rc1__cp37-cp37m-win32.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of maxframe might be problematic. Click here for more details.

Files changed (443) hide show
  1. maxframe/__init__.py +1 -0
  2. maxframe/_utils.cp37-win32.pyd +0 -0
  3. maxframe/_utils.pyx +14 -1
  4. maxframe/codegen/core.py +9 -8
  5. maxframe/codegen/spe/core.py +1 -1
  6. maxframe/codegen/spe/dataframe/__init__.py +1 -0
  7. maxframe/codegen/spe/dataframe/accessors/base.py +18 -0
  8. maxframe/codegen/spe/dataframe/accessors/dict_.py +25 -130
  9. maxframe/codegen/spe/dataframe/accessors/list_.py +12 -48
  10. maxframe/codegen/spe/dataframe/accessors/struct_.py +28 -0
  11. maxframe/codegen/spe/dataframe/arithmetic.py +7 -2
  12. maxframe/codegen/spe/dataframe/groupby.py +88 -0
  13. maxframe/codegen/spe/dataframe/indexing.py +99 -4
  14. maxframe/codegen/spe/dataframe/merge.py +38 -1
  15. maxframe/codegen/spe/dataframe/misc.py +11 -33
  16. maxframe/codegen/spe/dataframe/reduction.py +32 -9
  17. maxframe/codegen/spe/dataframe/reshape.py +46 -0
  18. maxframe/codegen/spe/dataframe/sort.py +39 -18
  19. maxframe/codegen/spe/dataframe/tests/accessors/test_dict.py +9 -15
  20. maxframe/codegen/spe/dataframe/tests/accessors/test_list.py +4 -7
  21. maxframe/codegen/spe/dataframe/tests/accessors/test_struct.py +75 -0
  22. maxframe/codegen/spe/dataframe/tests/indexing/test_iloc.py +20 -1
  23. maxframe/codegen/spe/dataframe/tests/indexing/test_loc.py +35 -0
  24. maxframe/codegen/spe/dataframe/tests/misc/test_misc.py +0 -32
  25. maxframe/codegen/spe/dataframe/tests/test_groupby.py +81 -18
  26. maxframe/codegen/spe/dataframe/tests/test_merge.py +27 -1
  27. maxframe/codegen/spe/dataframe/tests/test_reduction.py +13 -0
  28. maxframe/codegen/spe/dataframe/tests/test_reshape.py +79 -0
  29. maxframe/codegen/spe/dataframe/tests/test_sort.py +20 -0
  30. maxframe/codegen/spe/dataframe/tseries.py +9 -0
  31. maxframe/codegen/spe/learn/contrib/lightgbm.py +4 -3
  32. maxframe/codegen/spe/learn/contrib/tests/test_xgboost.py +2 -1
  33. maxframe/codegen/spe/learn/metrics/__init__.py +1 -1
  34. maxframe/codegen/spe/learn/metrics/_ranking.py +76 -0
  35. maxframe/codegen/spe/learn/metrics/pairwise.py +51 -0
  36. maxframe/codegen/spe/learn/metrics/tests/test_pairwise.py +36 -0
  37. maxframe/codegen/spe/learn/metrics/tests/test_ranking.py +59 -0
  38. maxframe/codegen/spe/tensor/__init__.py +3 -0
  39. maxframe/codegen/spe/tensor/datasource.py +1 -0
  40. maxframe/codegen/spe/tensor/fft.py +74 -0
  41. maxframe/codegen/spe/tensor/linalg.py +29 -2
  42. maxframe/codegen/spe/tensor/misc.py +79 -25
  43. maxframe/codegen/spe/tensor/spatial.py +45 -0
  44. maxframe/codegen/spe/tensor/statistics.py +44 -0
  45. maxframe/codegen/spe/tensor/tests/test_fft.py +64 -0
  46. maxframe/codegen/spe/tensor/tests/test_linalg.py +15 -1
  47. maxframe/codegen/spe/tensor/tests/test_misc.py +52 -2
  48. maxframe/codegen/spe/tensor/tests/test_spatial.py +33 -0
  49. maxframe/codegen/spe/tensor/tests/test_statistics.py +15 -1
  50. maxframe/codegen/spe/tests/test_spe_codegen.py +6 -12
  51. maxframe/codegen/spe/utils.py +2 -0
  52. maxframe/config/config.py +73 -9
  53. maxframe/config/tests/test_validators.py +13 -1
  54. maxframe/config/validators.py +49 -0
  55. maxframe/conftest.py +54 -17
  56. maxframe/core/accessor.py +2 -2
  57. maxframe/core/base.py +2 -1
  58. maxframe/core/entity/core.py +5 -0
  59. maxframe/core/entity/tileables.py +3 -1
  60. maxframe/core/graph/core.cp37-win32.pyd +0 -0
  61. maxframe/core/graph/entity.py +8 -3
  62. maxframe/core/mode.py +6 -1
  63. maxframe/core/operator/base.py +9 -2
  64. maxframe/core/operator/core.py +10 -2
  65. maxframe/core/operator/utils.py +13 -0
  66. maxframe/dataframe/__init__.py +12 -5
  67. maxframe/dataframe/accessors/__init__.py +1 -1
  68. maxframe/dataframe/accessors/compat.py +45 -0
  69. maxframe/dataframe/accessors/datetime_/__init__.py +4 -1
  70. maxframe/dataframe/accessors/dict_/contains.py +7 -16
  71. maxframe/dataframe/accessors/dict_/core.py +48 -0
  72. maxframe/dataframe/accessors/dict_/getitem.py +17 -21
  73. maxframe/dataframe/accessors/dict_/length.py +7 -16
  74. maxframe/dataframe/accessors/dict_/remove.py +6 -18
  75. maxframe/dataframe/accessors/dict_/setitem.py +8 -18
  76. maxframe/dataframe/accessors/dict_/tests/test_dict_accessor.py +62 -22
  77. maxframe/dataframe/accessors/list_/__init__.py +2 -2
  78. maxframe/dataframe/accessors/list_/core.py +48 -0
  79. maxframe/dataframe/accessors/list_/getitem.py +12 -19
  80. maxframe/dataframe/accessors/list_/length.py +7 -16
  81. maxframe/dataframe/accessors/list_/tests/test_list_accessor.py +11 -9
  82. maxframe/dataframe/accessors/string_/__init__.py +4 -1
  83. maxframe/dataframe/accessors/struct_/__init__.py +37 -0
  84. maxframe/dataframe/accessors/struct_/accessor.py +39 -0
  85. maxframe/dataframe/accessors/struct_/core.py +43 -0
  86. maxframe/dataframe/accessors/struct_/dtypes.py +53 -0
  87. maxframe/dataframe/accessors/struct_/field.py +123 -0
  88. maxframe/dataframe/accessors/struct_/tests/__init__.py +13 -0
  89. maxframe/dataframe/accessors/struct_/tests/test_struct_accessor.py +91 -0
  90. maxframe/dataframe/arithmetic/__init__.py +18 -4
  91. maxframe/dataframe/arithmetic/between.py +106 -0
  92. maxframe/dataframe/arithmetic/dot.py +237 -0
  93. maxframe/dataframe/arithmetic/maximum.py +33 -0
  94. maxframe/dataframe/arithmetic/minimum.py +33 -0
  95. maxframe/dataframe/arithmetic/{around.py → round.py} +11 -7
  96. maxframe/dataframe/core.py +161 -224
  97. maxframe/dataframe/datasource/__init__.py +18 -0
  98. maxframe/dataframe/datasource/core.py +6 -0
  99. maxframe/dataframe/datasource/direct.py +57 -0
  100. maxframe/dataframe/datasource/from_dict.py +124 -0
  101. maxframe/dataframe/datasource/from_index.py +1 -1
  102. maxframe/dataframe/datasource/from_records.py +77 -0
  103. maxframe/dataframe/datasource/from_tensor.py +109 -41
  104. maxframe/dataframe/datasource/read_csv.py +21 -14
  105. maxframe/dataframe/datasource/read_odps_query.py +29 -6
  106. maxframe/dataframe/datasource/read_odps_table.py +32 -10
  107. maxframe/dataframe/datasource/read_parquet.py +38 -39
  108. maxframe/dataframe/datasource/tests/test_datasource.py +37 -0
  109. maxframe/dataframe/datastore/__init__.py +11 -1
  110. maxframe/dataframe/datastore/direct.py +268 -0
  111. maxframe/dataframe/datastore/to_csv.py +29 -41
  112. maxframe/dataframe/datastore/to_odps.py +36 -4
  113. maxframe/dataframe/extensions/__init__.py +20 -4
  114. maxframe/dataframe/extensions/apply_chunk.py +32 -6
  115. maxframe/dataframe/extensions/cartesian_chunk.py +153 -0
  116. maxframe/dataframe/extensions/collect_kv.py +126 -0
  117. maxframe/dataframe/extensions/extract_kv.py +177 -0
  118. maxframe/dataframe/extensions/flatjson.py +2 -1
  119. maxframe/dataframe/extensions/map_reduce.py +263 -0
  120. maxframe/dataframe/extensions/rebalance.py +62 -0
  121. maxframe/dataframe/extensions/tests/test_apply_chunk.py +9 -2
  122. maxframe/dataframe/extensions/tests/test_extensions.py +54 -0
  123. maxframe/dataframe/extensions/tests/test_map_reduce.py +135 -0
  124. maxframe/dataframe/groupby/__init__.py +17 -2
  125. maxframe/dataframe/groupby/aggregation.py +86 -49
  126. maxframe/dataframe/groupby/apply.py +1 -1
  127. maxframe/dataframe/groupby/apply_chunk.py +19 -5
  128. maxframe/dataframe/groupby/core.py +116 -16
  129. maxframe/dataframe/groupby/cum.py +4 -25
  130. maxframe/dataframe/groupby/expanding.py +264 -0
  131. maxframe/dataframe/groupby/fill.py +1 -1
  132. maxframe/dataframe/groupby/getitem.py +12 -5
  133. maxframe/dataframe/groupby/head.py +11 -1
  134. maxframe/dataframe/groupby/rank.py +136 -0
  135. maxframe/dataframe/groupby/rolling.py +206 -0
  136. maxframe/dataframe/groupby/shift.py +114 -0
  137. maxframe/dataframe/groupby/tests/test_groupby.py +0 -5
  138. maxframe/dataframe/indexing/__init__.py +22 -2
  139. maxframe/dataframe/indexing/droplevel.py +195 -0
  140. maxframe/dataframe/indexing/filter.py +169 -0
  141. maxframe/dataframe/indexing/get_level_values.py +76 -0
  142. maxframe/dataframe/indexing/iat.py +45 -0
  143. maxframe/dataframe/indexing/iloc.py +152 -12
  144. maxframe/dataframe/indexing/insert.py +46 -18
  145. maxframe/dataframe/indexing/loc.py +287 -7
  146. maxframe/dataframe/indexing/reindex.py +14 -5
  147. maxframe/dataframe/indexing/rename.py +6 -0
  148. maxframe/dataframe/indexing/rename_axis.py +2 -2
  149. maxframe/dataframe/indexing/reorder_levels.py +143 -0
  150. maxframe/dataframe/indexing/reset_index.py +33 -6
  151. maxframe/dataframe/indexing/sample.py +8 -0
  152. maxframe/dataframe/indexing/setitem.py +3 -3
  153. maxframe/dataframe/indexing/swaplevel.py +185 -0
  154. maxframe/dataframe/indexing/take.py +99 -0
  155. maxframe/dataframe/indexing/truncate.py +140 -0
  156. maxframe/dataframe/indexing/where.py +0 -11
  157. maxframe/dataframe/indexing/xs.py +148 -0
  158. maxframe/dataframe/merge/__init__.py +15 -1
  159. maxframe/dataframe/merge/append.py +97 -98
  160. maxframe/dataframe/merge/combine.py +244 -0
  161. maxframe/dataframe/merge/combine_first.py +120 -0
  162. maxframe/dataframe/merge/compare.py +387 -0
  163. maxframe/dataframe/merge/concat.py +183 -0
  164. maxframe/dataframe/merge/update.py +271 -0
  165. maxframe/dataframe/misc/__init__.py +28 -11
  166. maxframe/dataframe/misc/_duplicate.py +10 -4
  167. maxframe/dataframe/misc/apply.py +1 -1
  168. maxframe/dataframe/misc/check_unique.py +82 -0
  169. maxframe/dataframe/misc/clip.py +145 -0
  170. maxframe/dataframe/misc/describe.py +175 -9
  171. maxframe/dataframe/misc/drop.py +31 -0
  172. maxframe/dataframe/misc/drop_duplicates.py +2 -2
  173. maxframe/dataframe/misc/duplicated.py +2 -2
  174. maxframe/dataframe/misc/get_dummies.py +5 -1
  175. maxframe/dataframe/misc/infer_dtypes.py +251 -0
  176. maxframe/dataframe/misc/isin.py +2 -2
  177. maxframe/dataframe/misc/map.py +125 -18
  178. maxframe/dataframe/misc/repeat.py +159 -0
  179. maxframe/dataframe/misc/tests/test_misc.py +48 -3
  180. maxframe/dataframe/misc/to_numeric.py +3 -0
  181. maxframe/dataframe/misc/transform.py +12 -5
  182. maxframe/dataframe/misc/transpose.py +13 -1
  183. maxframe/dataframe/misc/valid_index.py +115 -0
  184. maxframe/dataframe/misc/value_counts.py +38 -4
  185. maxframe/dataframe/missing/checkna.py +14 -6
  186. maxframe/dataframe/missing/dropna.py +5 -0
  187. maxframe/dataframe/missing/fillna.py +1 -1
  188. maxframe/dataframe/missing/replace.py +7 -4
  189. maxframe/dataframe/reduction/__init__.py +35 -16
  190. maxframe/dataframe/reduction/aggregation.py +43 -14
  191. maxframe/dataframe/reduction/all.py +2 -2
  192. maxframe/dataframe/reduction/any.py +2 -2
  193. maxframe/dataframe/reduction/argmax.py +103 -0
  194. maxframe/dataframe/reduction/argmin.py +103 -0
  195. maxframe/dataframe/reduction/core.py +80 -24
  196. maxframe/dataframe/reduction/count.py +13 -9
  197. maxframe/dataframe/reduction/cov.py +166 -0
  198. maxframe/dataframe/reduction/cummax.py +2 -2
  199. maxframe/dataframe/reduction/cummin.py +2 -2
  200. maxframe/dataframe/reduction/cumprod.py +2 -2
  201. maxframe/dataframe/reduction/cumsum.py +2 -2
  202. maxframe/dataframe/reduction/custom_reduction.py +2 -2
  203. maxframe/dataframe/reduction/idxmax.py +185 -0
  204. maxframe/dataframe/reduction/idxmin.py +185 -0
  205. maxframe/dataframe/reduction/kurtosis.py +37 -30
  206. maxframe/dataframe/reduction/max.py +2 -2
  207. maxframe/dataframe/reduction/mean.py +9 -7
  208. maxframe/dataframe/reduction/median.py +2 -2
  209. maxframe/dataframe/reduction/min.py +2 -2
  210. maxframe/dataframe/reduction/mode.py +144 -0
  211. maxframe/dataframe/reduction/nunique.py +19 -11
  212. maxframe/dataframe/reduction/prod.py +18 -13
  213. maxframe/dataframe/reduction/reduction_size.py +2 -2
  214. maxframe/dataframe/reduction/sem.py +13 -9
  215. maxframe/dataframe/reduction/skew.py +31 -27
  216. maxframe/dataframe/reduction/str_concat.py +10 -7
  217. maxframe/dataframe/reduction/sum.py +18 -14
  218. maxframe/dataframe/reduction/tests/test_reduction.py +12 -0
  219. maxframe/dataframe/reduction/unique.py +20 -3
  220. maxframe/dataframe/reduction/var.py +16 -12
  221. maxframe/dataframe/reshape/__init__.py +38 -0
  222. maxframe/dataframe/{misc → reshape}/pivot.py +1 -0
  223. maxframe/dataframe/{misc → reshape}/pivot_table.py +1 -0
  224. maxframe/dataframe/reshape/unstack.py +114 -0
  225. maxframe/dataframe/sort/__init__.py +16 -1
  226. maxframe/dataframe/sort/argsort.py +68 -0
  227. maxframe/dataframe/sort/core.py +2 -1
  228. maxframe/dataframe/sort/nlargest.py +238 -0
  229. maxframe/dataframe/sort/nsmallest.py +228 -0
  230. maxframe/dataframe/sort/rank.py +147 -0
  231. maxframe/dataframe/statistics/__init__.py +3 -3
  232. maxframe/dataframe/statistics/corr.py +1 -0
  233. maxframe/dataframe/statistics/quantile.py +2 -2
  234. maxframe/dataframe/tests/test_typing.py +104 -0
  235. maxframe/dataframe/tests/test_utils.py +66 -2
  236. maxframe/dataframe/tseries/__init__.py +19 -0
  237. maxframe/dataframe/tseries/at_time.py +61 -0
  238. maxframe/dataframe/tseries/between_time.py +122 -0
  239. maxframe/dataframe/typing_.py +185 -0
  240. maxframe/dataframe/utils.py +125 -52
  241. maxframe/dataframe/window/aggregation.py +8 -4
  242. maxframe/dataframe/window/core.py +14 -1
  243. maxframe/dataframe/window/ewm.py +1 -3
  244. maxframe/dataframe/window/expanding.py +37 -35
  245. maxframe/dataframe/window/rolling.py +49 -39
  246. maxframe/dataframe/window/tests/test_expanding.py +1 -7
  247. maxframe/dataframe/window/tests/test_rolling.py +1 -1
  248. maxframe/env.py +7 -4
  249. maxframe/errors.py +2 -2
  250. maxframe/io/odpsio/schema.py +9 -3
  251. maxframe/io/odpsio/tableio.py +7 -2
  252. maxframe/io/odpsio/tests/test_schema.py +198 -83
  253. maxframe/learn/__init__.py +10 -2
  254. maxframe/learn/cluster/__init__.py +15 -0
  255. maxframe/learn/cluster/_kmeans.py +782 -0
  256. maxframe/learn/contrib/llm/core.py +18 -7
  257. maxframe/learn/contrib/llm/deploy/__init__.py +13 -0
  258. maxframe/learn/contrib/llm/deploy/config.py +221 -0
  259. maxframe/learn/contrib/llm/deploy/core.py +247 -0
  260. maxframe/learn/contrib/llm/deploy/framework.py +35 -0
  261. maxframe/learn/contrib/llm/deploy/loader.py +360 -0
  262. maxframe/learn/contrib/llm/deploy/tests/__init__.py +13 -0
  263. maxframe/learn/contrib/llm/deploy/tests/test_register_models.py +359 -0
  264. maxframe/learn/contrib/llm/models/__init__.py +1 -0
  265. maxframe/learn/contrib/llm/models/dashscope.py +12 -6
  266. maxframe/learn/contrib/llm/models/managed.py +76 -11
  267. maxframe/learn/contrib/llm/models/openai.py +72 -0
  268. maxframe/learn/contrib/llm/tests/__init__.py +13 -0
  269. maxframe/learn/contrib/llm/tests/test_core.py +34 -0
  270. maxframe/learn/contrib/llm/tests/test_openai.py +187 -0
  271. maxframe/learn/contrib/llm/tests/test_text_gen.py +155 -0
  272. maxframe/learn/contrib/llm/text.py +348 -42
  273. maxframe/learn/contrib/models.py +4 -1
  274. maxframe/learn/contrib/xgboost/classifier.py +2 -0
  275. maxframe/learn/contrib/xgboost/core.py +113 -4
  276. maxframe/learn/contrib/xgboost/predict.py +4 -2
  277. maxframe/learn/contrib/xgboost/regressor.py +5 -0
  278. maxframe/learn/contrib/xgboost/train.py +7 -2
  279. maxframe/learn/core.py +66 -0
  280. maxframe/learn/linear_model/_base.py +58 -1
  281. maxframe/learn/linear_model/_lin_reg.py +1 -1
  282. maxframe/learn/metrics/__init__.py +6 -0
  283. maxframe/learn/metrics/_classification.py +145 -0
  284. maxframe/learn/metrics/_ranking.py +477 -0
  285. maxframe/learn/metrics/_scorer.py +60 -0
  286. maxframe/learn/metrics/pairwise/__init__.py +21 -0
  287. maxframe/learn/metrics/pairwise/core.py +77 -0
  288. maxframe/learn/metrics/pairwise/cosine.py +115 -0
  289. maxframe/learn/metrics/pairwise/euclidean.py +176 -0
  290. maxframe/learn/metrics/pairwise/haversine.py +96 -0
  291. maxframe/learn/metrics/pairwise/manhattan.py +80 -0
  292. maxframe/learn/metrics/pairwise/pairwise.py +127 -0
  293. maxframe/learn/metrics/pairwise/pairwise_distances_topk.py +121 -0
  294. maxframe/learn/metrics/pairwise/rbf_kernel.py +51 -0
  295. maxframe/learn/metrics/tests/__init__.py +13 -0
  296. maxframe/learn/metrics/tests/test_scorer.py +26 -0
  297. maxframe/learn/preprocessing/_data/min_max_scaler.py +34 -23
  298. maxframe/learn/preprocessing/_data/standard_scaler.py +34 -25
  299. maxframe/learn/utils/__init__.py +2 -1
  300. maxframe/learn/utils/checks.py +1 -2
  301. maxframe/learn/utils/core.py +59 -0
  302. maxframe/learn/utils/extmath.py +79 -9
  303. maxframe/learn/utils/odpsio.py +262 -0
  304. maxframe/learn/utils/validation.py +2 -2
  305. maxframe/lib/compat.py +40 -0
  306. maxframe/lib/dtypes_extension/__init__.py +16 -1
  307. maxframe/lib/dtypes_extension/_fake_arrow_dtype.py +604 -0
  308. maxframe/lib/dtypes_extension/blob.py +304 -0
  309. maxframe/lib/dtypes_extension/dtypes.py +40 -0
  310. maxframe/lib/dtypes_extension/tests/test_blob.py +88 -0
  311. maxframe/lib/dtypes_extension/tests/test_dtypes.py +16 -1
  312. maxframe/lib/dtypes_extension/tests/test_fake_arrow_dtype.py +75 -0
  313. maxframe/lib/filesystem/_oss_lib/common.py +124 -50
  314. maxframe/lib/filesystem/_oss_lib/glob.py +1 -1
  315. maxframe/lib/filesystem/_oss_lib/handle.py +21 -25
  316. maxframe/lib/filesystem/base.py +1 -1
  317. maxframe/lib/filesystem/core.py +1 -1
  318. maxframe/lib/filesystem/oss.py +115 -46
  319. maxframe/lib/filesystem/tests/test_oss.py +74 -36
  320. maxframe/lib/mmh3.cp37-win32.pyd +0 -0
  321. maxframe/lib/wrapped_pickle.py +10 -0
  322. maxframe/opcodes.py +41 -15
  323. maxframe/protocol.py +12 -0
  324. maxframe/remote/core.py +4 -0
  325. maxframe/serialization/__init__.py +11 -2
  326. maxframe/serialization/arrow.py +38 -13
  327. maxframe/serialization/blob.py +32 -0
  328. maxframe/serialization/core.cp37-win32.pyd +0 -0
  329. maxframe/serialization/core.pyx +39 -1
  330. maxframe/serialization/exception.py +2 -4
  331. maxframe/serialization/numpy.py +11 -0
  332. maxframe/serialization/pandas.py +46 -9
  333. maxframe/serialization/serializables/core.py +2 -2
  334. maxframe/serialization/tests/test_serial.py +31 -4
  335. maxframe/tensor/__init__.py +38 -8
  336. maxframe/tensor/arithmetic/__init__.py +19 -10
  337. maxframe/tensor/arithmetic/core.py +2 -2
  338. maxframe/tensor/arithmetic/iscomplexobj.py +53 -0
  339. maxframe/tensor/arithmetic/tests/test_arithmetic.py +6 -9
  340. maxframe/tensor/core.py +6 -2
  341. maxframe/tensor/datasource/tests/test_datasource.py +2 -1
  342. maxframe/tensor/extensions/__init__.py +2 -0
  343. maxframe/tensor/extensions/apply_chunk.py +3 -3
  344. maxframe/tensor/extensions/rebalance.py +65 -0
  345. maxframe/tensor/fft/__init__.py +32 -0
  346. maxframe/tensor/fft/core.py +168 -0
  347. maxframe/tensor/fft/fft.py +112 -0
  348. maxframe/tensor/fft/fft2.py +118 -0
  349. maxframe/tensor/fft/fftfreq.py +80 -0
  350. maxframe/tensor/fft/fftn.py +123 -0
  351. maxframe/tensor/fft/fftshift.py +79 -0
  352. maxframe/tensor/fft/hfft.py +112 -0
  353. maxframe/tensor/fft/ifft.py +114 -0
  354. maxframe/tensor/fft/ifft2.py +115 -0
  355. maxframe/tensor/fft/ifftn.py +123 -0
  356. maxframe/tensor/fft/ifftshift.py +73 -0
  357. maxframe/tensor/fft/ihfft.py +93 -0
  358. maxframe/tensor/fft/irfft.py +118 -0
  359. maxframe/tensor/fft/irfft2.py +62 -0
  360. maxframe/tensor/fft/irfftn.py +114 -0
  361. maxframe/tensor/fft/rfft.py +116 -0
  362. maxframe/tensor/fft/rfft2.py +63 -0
  363. maxframe/tensor/fft/rfftfreq.py +87 -0
  364. maxframe/tensor/fft/rfftn.py +113 -0
  365. maxframe/tensor/indexing/fill_diagonal.py +1 -7
  366. maxframe/tensor/linalg/__init__.py +7 -0
  367. maxframe/tensor/linalg/_einsumfunc.py +1025 -0
  368. maxframe/tensor/linalg/cholesky.py +117 -0
  369. maxframe/tensor/linalg/einsum.py +339 -0
  370. maxframe/tensor/linalg/lstsq.py +100 -0
  371. maxframe/tensor/linalg/matrix_norm.py +75 -0
  372. maxframe/tensor/linalg/norm.py +249 -0
  373. maxframe/tensor/linalg/solve.py +72 -0
  374. maxframe/tensor/linalg/solve_triangular.py +2 -2
  375. maxframe/tensor/linalg/vector_norm.py +113 -0
  376. maxframe/tensor/misc/__init__.py +24 -1
  377. maxframe/tensor/misc/argwhere.py +72 -0
  378. maxframe/tensor/misc/array_split.py +46 -0
  379. maxframe/tensor/misc/broadcast_arrays.py +57 -0
  380. maxframe/tensor/misc/copyto.py +130 -0
  381. maxframe/tensor/misc/delete.py +104 -0
  382. maxframe/tensor/misc/dsplit.py +68 -0
  383. maxframe/tensor/misc/ediff1d.py +74 -0
  384. maxframe/tensor/misc/expand_dims.py +85 -0
  385. maxframe/tensor/misc/flip.py +90 -0
  386. maxframe/tensor/misc/fliplr.py +64 -0
  387. maxframe/tensor/misc/flipud.py +68 -0
  388. maxframe/tensor/misc/hsplit.py +85 -0
  389. maxframe/tensor/misc/insert.py +139 -0
  390. maxframe/tensor/misc/moveaxis.py +83 -0
  391. maxframe/tensor/misc/result_type.py +88 -0
  392. maxframe/tensor/misc/roll.py +124 -0
  393. maxframe/tensor/misc/rollaxis.py +77 -0
  394. maxframe/tensor/misc/shape.py +89 -0
  395. maxframe/tensor/misc/split.py +190 -0
  396. maxframe/tensor/misc/tile.py +109 -0
  397. maxframe/tensor/misc/vsplit.py +74 -0
  398. maxframe/tensor/reduction/array_equal.py +2 -1
  399. maxframe/tensor/sort/__init__.py +2 -0
  400. maxframe/tensor/sort/argpartition.py +98 -0
  401. maxframe/tensor/sort/partition.py +228 -0
  402. maxframe/tensor/spatial/__init__.py +15 -0
  403. maxframe/tensor/spatial/distance/__init__.py +17 -0
  404. maxframe/tensor/spatial/distance/cdist.py +421 -0
  405. maxframe/tensor/spatial/distance/pdist.py +398 -0
  406. maxframe/tensor/spatial/distance/squareform.py +153 -0
  407. maxframe/tensor/special/__init__.py +159 -21
  408. maxframe/tensor/special/airy.py +55 -0
  409. maxframe/tensor/special/bessel.py +199 -0
  410. maxframe/tensor/special/core.py +65 -4
  411. maxframe/tensor/special/ellip_func_integrals.py +155 -0
  412. maxframe/tensor/special/ellip_harm.py +55 -0
  413. maxframe/tensor/special/err_fresnel.py +223 -0
  414. maxframe/tensor/special/gamma_funcs.py +303 -0
  415. maxframe/tensor/special/hypergeometric_funcs.py +69 -0
  416. maxframe/tensor/special/info_theory.py +189 -0
  417. maxframe/tensor/special/misc.py +21 -0
  418. maxframe/tensor/statistics/__init__.py +6 -0
  419. maxframe/tensor/statistics/corrcoef.py +77 -0
  420. maxframe/tensor/statistics/cov.py +222 -0
  421. maxframe/tensor/statistics/digitize.py +126 -0
  422. maxframe/tensor/statistics/histogram.py +520 -0
  423. maxframe/tensor/statistics/median.py +85 -0
  424. maxframe/tensor/statistics/ptp.py +89 -0
  425. maxframe/tensor/utils.py +3 -3
  426. maxframe/tests/test_udf.py +61 -0
  427. maxframe/tests/test_utils.py +51 -6
  428. maxframe/tests/utils.py +0 -2
  429. maxframe/typing_.py +2 -0
  430. maxframe/udf.py +130 -9
  431. maxframe/utils.py +254 -27
  432. {maxframe-2.0.0b2.dist-info → maxframe-2.3.0rc1.dist-info}/METADATA +3 -3
  433. {maxframe-2.0.0b2.dist-info → maxframe-2.3.0rc1.dist-info}/RECORD +442 -264
  434. maxframe_client/fetcher.py +35 -4
  435. maxframe_client/session/odps.py +7 -2
  436. maxframe_client/session/task.py +8 -1
  437. maxframe_client/tests/test_fetcher.py +76 -3
  438. maxframe_client/tests/test_session.py +28 -1
  439. maxframe/dataframe/arrays.py +0 -864
  440. /maxframe/dataframe/{misc → reshape}/melt.py +0 -0
  441. /maxframe/dataframe/{misc → reshape}/stack.py +0 -0
  442. {maxframe-2.0.0b2.dist-info → maxframe-2.3.0rc1.dist-info}/WHEEL +0 -0
  443. {maxframe-2.0.0b2.dist-info → maxframe-2.3.0rc1.dist-info}/top_level.txt +0 -0
@@ -21,8 +21,8 @@ import pandas as pd
21
21
  from ... import opcodes
22
22
  from ...core import EntityData, OutputType
23
23
  from ...serialization.serializables import AnyField, KeyField, StringField
24
- from ...udf import BuiltinFunction, MarkedFunction
25
- from ...utils import quiet_stdio
24
+ from ...udf import BuiltinFunction, MarkedFunction, ODPSFunction
25
+ from ...utils import make_dtype, quiet_stdio
26
26
  from ..core import SERIES_TYPE
27
27
  from ..operators import DataFrameOperator, DataFrameOperatorMixin
28
28
  from ..utils import build_series, copy_func_scheduling_hints
@@ -40,6 +40,7 @@ class DataFrameMap(DataFrameOperator, DataFrameOperatorMixin):
40
40
  if not self.output_types:
41
41
  self.output_types = [OutputType.series]
42
42
  if hasattr(self, "arg"):
43
+ self.arg = ODPSFunction.wrap(self.arg)
43
44
  copy_func_scheduling_hints(self.arg, self)
44
45
 
45
46
  @classmethod
@@ -55,25 +56,34 @@ class DataFrameMap(DataFrameOperator, DataFrameOperatorMixin):
55
56
  ) and not isinstance(self.arg, BuiltinFunction)
56
57
 
57
58
  def __call__(self, series, dtype, skip_infer=False):
58
- if dtype is None and not skip_infer:
59
- inferred_dtype = None
60
- if callable(self.arg):
59
+ if dtype is not None:
60
+ dtype = make_dtype(dtype)
61
+ else:
62
+ # obtain dtype from existing hints
63
+ if isinstance(self.arg, ODPSFunction):
64
+ if self.arg.result_dtype is not None:
65
+ dtype = self.arg.result_dtype
66
+ elif callable(self.arg):
61
67
  # arg is a function, try to inspect the signature
62
68
  sig = inspect.signature(self.arg)
63
69
  return_type = sig.return_annotation
64
70
  if return_type is not inspect._empty:
65
- inferred_dtype = np.dtype(return_type)
66
- else:
67
- try:
68
- with quiet_stdio():
69
- # try to infer dtype by calling the function
70
- inferred_dtype = (
71
- build_series(series)
72
- .map(self.arg, na_action=self.na_action)
73
- .dtype
74
- )
75
- except: # noqa: E722 # nosec
76
- pass
71
+ dtype = np.dtype(return_type)
72
+
73
+ err_prefix = None
74
+ if dtype is None and not skip_infer:
75
+ inferred_dtype = None
76
+ if callable(self.arg):
77
+ try:
78
+ with quiet_stdio():
79
+ # try to infer dtype by calling the function
80
+ inferred_dtype = (
81
+ build_series(series)
82
+ .map(self.arg, na_action=self.na_action)
83
+ .dtype
84
+ )
85
+ except: # noqa: E722 # nosec
86
+ pass
77
87
  else:
78
88
  if isinstance(self.arg, MutableMapping):
79
89
  inferred_dtype = pd.Series(self.arg).dtype
@@ -86,13 +96,16 @@ class DataFrameMap(DataFrameOperator, DataFrameOperatorMixin):
86
96
  # but for int, due to the nan which may occur,
87
97
  # we cannot infer the dtype
88
98
  dtype = inferred_dtype
99
+ else:
100
+ err_prefix = "int type may not be exact"
89
101
  else:
90
102
  dtype = inferred_dtype
91
103
 
92
104
  if dtype is None:
93
105
  if not skip_infer:
106
+ err_prefix = err_prefix or "cannot infer dtype"
94
107
  raise ValueError(
95
- "cannot infer dtype, it needs to be specified manually for `map`"
108
+ f"{err_prefix}, it needs to be specified manually for `map`"
96
109
  )
97
110
  else:
98
111
  dtype = np.int64 if dtype is int else dtype
@@ -251,3 +264,97 @@ def index_map(
251
264
  """
252
265
  op = DataFrameMap(arg=mapper, na_action=na_action, memory_scale=memory_scale)
253
266
  return op(idx, dtype=dtype, skip_infer=skip_infer)
267
+
268
+
269
+ def df_map(
270
+ df, func, na_action=None, dtypes=None, dtype=None, skip_infer=False, **kwargs
271
+ ):
272
+ """
273
+ Apply a function to a Dataframe elementwise.
274
+
275
+ This method applies a function that accepts and returns a scalar
276
+ to every element of a DataFrame.
277
+
278
+ Parameters
279
+ ----------
280
+ func : callable
281
+ Python function, returns a single value from a single value.
282
+ na_action : {None, 'ignore'}, default None
283
+ If 'ignore', propagate NaN values, without passing them to func.
284
+ dtypes : Series, default None
285
+ Specify dtypes of returned DataFrames.
286
+ dtype : np.dtype, default None
287
+ Specify dtypes of all columns of returned DataFrames, only
288
+ effective when dtypes is not specified.
289
+ skip_infer: bool, default False
290
+ Whether infer dtypes when dtypes or dtype is not specified.
291
+ **kwargs
292
+ Additional keyword arguments to pass as keywords arguments to
293
+ `func`.
294
+
295
+ Returns
296
+ -------
297
+ DataFrame
298
+ Transformed DataFrame.
299
+
300
+ See Also
301
+ --------
302
+ DataFrame.apply : Apply a function along input axis of DataFrame.
303
+ DataFrame.replace: Replace values given in `to_replace` with `value`.
304
+ Series.map : Apply a function elementwise on a Series.
305
+
306
+ Examples
307
+ --------
308
+ >>> import maxframe.dataframe as md
309
+ >>> df = md.DataFrame([[1, 2.12], [3.356, 4.567]])
310
+ >>> df.execute()
311
+ 0 1
312
+ 0 1.000 2.120
313
+ 1 3.356 4.567
314
+
315
+ >>> df.map(lambda x: len(str(x))).execute()
316
+ 0 1
317
+ 0 3 4
318
+ 1 5 5
319
+
320
+ Like Series.map, NA values can be ignored:
321
+
322
+ >>> df_copy = df.copy()
323
+ >>> df_copy.iloc[0, 0] = md.NA
324
+ >>> df_copy.map(lambda x: len(str(x)), na_action='ignore').execute()
325
+ 0 1
326
+ 0 NaN 4
327
+ 1 5.0 5
328
+
329
+ It is also possible to use `map` with functions that are not
330
+ `lambda` functions:
331
+
332
+ >>> df.map(round, ndigits=1).execute()
333
+ 0 1
334
+ 0 1.0 2.1
335
+ 1 3.4 4.6
336
+
337
+ Note that a vectorized version of `func` often exists, which will
338
+ be much faster. You could square each number elementwise.
339
+
340
+ >>> df.map(lambda x: x**2).execute()
341
+ 0 1
342
+ 0 1.000000 4.494400
343
+ 1 11.262736 20.857489
344
+
345
+ But it's better to avoid map in that case.
346
+
347
+ >>> (df ** 2).execute()
348
+ 0 1
349
+ 0 1.000000 4.494400
350
+ 1 11.262736 20.857489
351
+ """
352
+ if dtypes is None and dtype is not None:
353
+ dtypes = pd.Series([dtype] * df.shape[1], index=df.dtypes.index)
354
+
355
+ def _wrapper(row):
356
+ return row.map(func, na_action=na_action, **kwargs)
357
+
358
+ return df.apply(
359
+ _wrapper, axis=1, dtypes=dtypes, skip_infer=skip_infer, elementwise=True
360
+ )
@@ -0,0 +1,159 @@
1
+ # Copyright 1999-2025 Alibaba Group Holding Ltd.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ from typing import List
16
+
17
+ import numpy as np
18
+ from pandas.api.types import is_list_like
19
+
20
+ from ... import opcodes
21
+ from ...core import ENTITY_TYPE, EntityData, get_output_types
22
+ from ...serialization.serializables import AnyField
23
+ from ..operators import DataFrameOperator, DataFrameOperatorMixin
24
+ from ..utils import parse_index, validate_axis
25
+
26
+
27
+ class DataFrameRepeat(DataFrameOperator, DataFrameOperatorMixin):
28
+ _op_type_ = opcodes.REPEAT
29
+
30
+ repeats = AnyField("repeats", default=None)
31
+
32
+ def __init__(self, output_types=None, **kw):
33
+ super().__init__(_output_types=output_types, **kw)
34
+
35
+ @classmethod
36
+ def _set_inputs(cls, op: "DataFrameRepeat", inputs: List[EntityData]):
37
+ super()._set_inputs(op, inputs)
38
+ if isinstance(op.repeats, ENTITY_TYPE):
39
+ op.repeats = inputs[1]
40
+
41
+ def __call__(self, obj, repeats):
42
+ self._output_types = get_output_types(obj)
43
+ test_index = obj.index_value.to_pandas()[:0]
44
+
45
+ params = obj.params
46
+ params["index_value"] = parse_index(test_index, obj, type(self), self.repeats)
47
+ params["shape"] = (np.nan,)
48
+
49
+ inputs = [obj]
50
+ if isinstance(repeats, ENTITY_TYPE):
51
+ inputs.append(repeats)
52
+ return self.new_tileable(inputs, **params)
53
+
54
+
55
+ def _repeat(obj, repeats, axis=None):
56
+ from ...tensor.datasource import tensor
57
+
58
+ axis = validate_axis(axis or 0, obj)
59
+ if is_list_like(repeats):
60
+ repeats = tensor(repeats)
61
+ op = DataFrameRepeat(repeats=repeats, axis=axis)
62
+ return op(obj, repeats)
63
+
64
+
65
+ def series_repeat(obj, repeats, axis=None):
66
+ """
67
+ Repeat elements of a Series.
68
+
69
+ Returns a new Series where each element of the current Series
70
+ is repeated consecutively a given number of times.
71
+
72
+ Parameters
73
+ ----------
74
+ repeats : int or array of ints
75
+ The number of repetitions for each element. This should be a
76
+ non-negative integer. Repeating 0 times will return an empty
77
+ Series.
78
+ axis : None
79
+ Must be ``None``. Has no effect but is accepted for compatibility
80
+ with numpy.
81
+
82
+ Returns
83
+ -------
84
+ Series
85
+ Newly created Series with repeated elements.
86
+
87
+ See Also
88
+ --------
89
+ Index.repeat : Equivalent function for Index.
90
+ numpy.repeat : Similar method for :class:`numpy.ndarray`.
91
+
92
+ Examples
93
+ --------
94
+ >>> import maxframe.dataframe as md
95
+ >>> s = md.Series(['a', 'b', 'c'])
96
+ >>> s.execute()
97
+ 0 a
98
+ 1 b
99
+ 2 c
100
+ dtype: object
101
+ >>> s.repeat(2).execute()
102
+ 0 a
103
+ 0 a
104
+ 1 b
105
+ 1 b
106
+ 2 c
107
+ 2 c
108
+ dtype: object
109
+ >>> s.repeat([1, 2, 3]).execute()
110
+ 0 a
111
+ 1 b
112
+ 1 b
113
+ 2 c
114
+ 2 c
115
+ 2 c
116
+ dtype: object
117
+ """
118
+ return _repeat(obj, repeats, axis=axis)
119
+
120
+
121
+ def index_repeat(obj, repeats, axis=None):
122
+ """
123
+ Repeat elements of an Index.
124
+
125
+ Returns a new Index where each element of the current Index
126
+ is repeated consecutively a given number of times.
127
+
128
+ Parameters
129
+ ----------
130
+ repeats : int or array of ints
131
+ The number of repetitions for each element. This should be a
132
+ non-negative integer. Repeating 0 times will return an empty
133
+ Index.
134
+ axis : None
135
+ Must be ``None``. Has no effect but is accepted for compatibility
136
+ with numpy.
137
+
138
+ Returns
139
+ -------
140
+ repeated_index : Index
141
+ Newly created Index with repeated elements.
142
+
143
+ See Also
144
+ --------
145
+ Series.repeat : Equivalent function for Series.
146
+ numpy.repeat : Similar method for :class:`numpy.ndarray`.
147
+
148
+ Examples
149
+ --------
150
+ >>> import maxframe.dataframe as md
151
+ >>> idx = md.Index(['a', 'b', 'c'])
152
+ >>> idx.execute()
153
+ Index(['a', 'b', 'c'], dtype='object')
154
+ >>> idx.repeat(2).execute()
155
+ Index(['a', 'a', 'b', 'b', 'c', 'c'], dtype='object')
156
+ >>> idx.repeat([1, 2, 3]).execute()
157
+ Index(['a', 'b', 'b', 'c', 'c', 'c'], dtype='object')
158
+ """
159
+ return _repeat(obj, repeats, axis=axis)
@@ -16,11 +16,13 @@ import numpy as np
16
16
  import pandas as pd
17
17
  import pytest
18
18
 
19
+ from maxframe import options
20
+
19
21
  from .... import opcodes
20
22
  from ....core import OutputType
21
23
  from ....dataframe import DataFrame
22
24
  from ....tensor.core import TENSOR_TYPE
23
- from ....udf import with_running_options
25
+ from ....udf import ODPSFunction, with_running_options
24
26
  from ... import eval as maxframe_eval
25
27
  from ... import get_dummies, to_numeric
26
28
  from ...arithmetic import DataFrameGreater, DataFrameLess
@@ -124,6 +126,7 @@ def test_dataframe_apply():
124
126
  dtypes=pd.Series([np.dtype(float)] * 3),
125
127
  )
126
128
  assert df2.ndim == 2
129
+ assert df2.op.expect_resources == options.function.default_running_options
127
130
 
128
131
 
129
132
  def test_series_apply():
@@ -180,6 +183,8 @@ def test_series_apply():
180
183
  pd.Series, output_type="dataframe", dtypes=dtypes, index=pd.RangeIndex(2)
181
184
  )
182
185
  assert r.ndim == 2
186
+ assert r.op.expect_resources == options.function.default_running_options
187
+
183
188
  pd.testing.assert_series_equal(r.dtypes, dtypes)
184
189
  assert r.shape == (2, 3)
185
190
 
@@ -305,6 +310,7 @@ def test_transform():
305
310
  assert r.shape == series.shape
306
311
  assert r.op._op_type_ == opcodes.TRANSFORM
307
312
  assert r.op.output_types[0] == OutputType.series
313
+ assert r.op.expect_resources == options.function.default_running_options
308
314
 
309
315
 
310
316
  def test_series_isin():
@@ -563,12 +569,17 @@ def test_apply():
563
569
  )
564
570
  assert apply_df.shape == (3, 2)
565
571
  assert apply_df.op.expect_engine == "SPE"
566
- assert apply_df.op.expect_resources == {"cpu": 1, "memory": "40GB", "gpu": 0}
572
+ assert apply_df.op.expect_resources == {
573
+ "cpu": 4,
574
+ "memory": "40GB",
575
+ "gpu": 0,
576
+ "gu_quota": None,
577
+ }
567
578
 
568
579
 
569
580
  def test_pivot_table():
570
581
  from ...groupby.aggregation import DataFrameGroupByAgg
571
- from ...misc.pivot_table import DataFramePivotTable
582
+ from ...reshape.pivot_table import DataFramePivotTable
572
583
 
573
584
  raw = pd.DataFrame(
574
585
  {
@@ -602,3 +613,37 @@ def test_pivot_table():
602
613
  t = df.pivot_table(index=["A", "B"], columns="C", aggfunc="sum")
603
614
  assert isinstance(t.op, DataFramePivotTable)
604
615
  assert t.shape == (np.nan, np.nan)
616
+
617
+
618
+ def test_map_with_functions():
619
+ raw = pd.Series([1, 2, 3], name="s_name")
620
+ series = from_pandas_series(raw, chunk_size=2)
621
+
622
+ # inferred type may not be exact
623
+ def fn1(val):
624
+ return val
625
+
626
+ with pytest.raises(ValueError, match="int type"):
627
+ series.map(fn1)
628
+ mapped = series.map(fn1, dtype="float64", skip_infer=True)
629
+ assert mapped.dtype == np.dtype("float64")
630
+
631
+ # test when type infer is valid
632
+ def fn2(val):
633
+ return val * 1.0
634
+
635
+ mapped = series.map(fn2)
636
+ assert mapped.dtype == np.dtype("float64")
637
+
638
+ # test function with type annotations
639
+ def fn3(val) -> int:
640
+ return val
641
+
642
+ mapped = series.map(fn3)
643
+ assert mapped.dtype == np.dtype("int64")
644
+
645
+ # test odps function
646
+ odps_func = ODPSFunction("test_odps_udf", dtype=np.float64)
647
+ mapped = series.map(odps_func)
648
+ assert isinstance(mapped.op.arg, ODPSFunction)
649
+ assert mapped.dtype == np.dtype("float64")
@@ -15,6 +15,7 @@
15
15
  import numpy as np
16
16
  import pandas as pd
17
17
 
18
+ from ... import opcodes
18
19
  from ...core import ENTITY_TYPE, OutputType
19
20
  from ...serialization.serializables import StringField
20
21
  from ...tensor import tensor as astensor
@@ -23,6 +24,8 @@ from ..operators import DataFrameOperator, DataFrameOperatorMixin
23
24
 
24
25
 
25
26
  class DataFrameToNumeric(DataFrameOperator, DataFrameOperatorMixin):
27
+ _op_type_ = opcodes.TO_NUMERIC
28
+
26
29
  errors = StringField("errors")
27
30
  downcast = StringField("downcast")
28
31
 
@@ -38,8 +38,9 @@ from ..utils import (
38
38
  _with_convert_dtype = pd_release_version < (1, 2, 0)
39
39
 
40
40
 
41
- class TransformOperator(DataFrameOperator, DataFrameOperatorMixin):
41
+ class DataFrameTransform(DataFrameOperator, DataFrameOperatorMixin):
42
42
  _op_type_ = opcodes.TRANSFORM
43
+ _legacy_name = "TransformOperator"
43
44
 
44
45
  func = AnyField("func", default=None)
45
46
  axis = AnyField("axis", default=None)
@@ -141,13 +142,17 @@ class TransformOperator(DataFrameOperator, DataFrameOperatorMixin):
141
142
 
142
143
  @classmethod
143
144
  def estimate_size(
144
- cls, ctx: MutableMapping[str, Union[int, float]], op: "TransformOperator"
145
+ cls, ctx: MutableMapping[str, Union[int, float]], op: "DataFrameTransform"
145
146
  ) -> None:
146
147
  if isinstance(op.func, MarkedFunction):
147
148
  ctx[op.outputs[0].key] = float("inf")
148
149
  super().estimate_size(ctx, op)
149
150
 
150
151
 
152
+ # keep for import compatibility
153
+ TransformOperator = DataFrameTransform
154
+
155
+
151
156
  def get_packed_funcs(df, output_type, func, *args, **kwds) -> Any:
152
157
  stub_df = _build_stub_pandas_obj(df, output_type)
153
158
  n_args = copy_if_possible(args)
@@ -235,7 +240,7 @@ def df_transform(df, func, axis=0, *args, dtypes=None, skip_infer=False, **kwarg
235
240
  """
236
241
  call_agg = kwargs.pop("_call_agg", False)
237
242
  func = get_packed_funcs(df, OutputType.dataframe, func, *args, **kwargs)
238
- op = TransformOperator(
243
+ op = DataFrameTransform(
239
244
  func=func,
240
245
  axis=axis,
241
246
  args=args,
@@ -327,13 +332,15 @@ def series_transform(
327
332
  """
328
333
  call_agg = kwargs.pop("_call_agg", False)
329
334
  func = get_packed_funcs(series, OutputType.series, func, *args, **kwargs)
330
- op = TransformOperator(
335
+ op = DataFrameTransform(
331
336
  func=func,
332
337
  axis=axis,
333
338
  convert_dtype=convert_dtype,
334
339
  args=args,
335
340
  kwds=kwargs,
336
- output_types=[OutputType.series],
341
+ output_types=[OutputType.series]
342
+ if not call_agg and not isinstance(func, list)
343
+ else None,
337
344
  call_agg=call_agg,
338
345
  )
339
346
  return op(series, dtype=dtype, name=series.name, skip_infer=skip_infer)
@@ -12,6 +12,8 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
+ import pandas as pd
16
+
15
17
  from ... import opcodes
16
18
  from ...core import OutputType
17
19
  from ..operators import DataFrameOperator, DataFrameOperatorMixin
@@ -30,10 +32,20 @@ class DataFrameTranspose(DataFrameOperator, DataFrameOperatorMixin):
30
32
  new_shape = arg.shape[::-1]
31
33
  columns_value = arg.index_value
32
34
  index_value = parse_index(arg.dtypes.index)
35
+
36
+ if not arg.index_value.has_value:
37
+ dtypes = None
38
+ else:
39
+ from pandas.core.dtypes.cast import find_common_type
40
+
41
+ dtype = find_common_type(list(arg.dtypes))
42
+ pd_index = arg.index_value.to_pandas()
43
+ dtypes = pd.Series([dtype] * len(pd_index), index=pd_index)
44
+
33
45
  return self.new_dataframe(
34
46
  [arg],
35
47
  shape=new_shape,
36
- dtypes=None,
48
+ dtypes=dtypes,
37
49
  columns_value=columns_value,
38
50
  index_value=index_value,
39
51
  )
@@ -0,0 +1,115 @@
1
+ # Copyright 1999-2025 Alibaba Group Holding Ltd.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ from ...udf import builtin_function
16
+
17
+
18
+ @builtin_function
19
+ def _item_or_none(item):
20
+ if len(item) > 0:
21
+ return item[0]
22
+ return None
23
+
24
+
25
+ def _valid_index(df_or_series, slc: slice):
26
+ from ... import tensor as mt
27
+
28
+ idx = df_or_series.dropna(how="all").index[slc]
29
+ return mt.array(idx).mf.apply_chunk(_item_or_none, dtype=idx.dtype)
30
+
31
+
32
+ _doc = """
33
+ Return index for %(pos)s non-NA value or None, if no non-NA value is found.
34
+
35
+ Returns
36
+ -------
37
+ type of index
38
+
39
+ Examples
40
+ --------
41
+ For Series:
42
+
43
+ >>> import maxframe.dataframe as md
44
+ >>> s = md.Series([None, 3, 4])
45
+ >>> s.first_valid_index().execute()
46
+ 1
47
+ >>> s.last_valid_index().execute()
48
+ 2
49
+
50
+ >>> s = md.Series([None, None])
51
+ >>> print(s.first_valid_index()).execute()
52
+ None
53
+ >>> print(s.last_valid_index()).execute()
54
+ None
55
+
56
+ If all elements in Series are NA/null, returns None.
57
+
58
+ >>> s = md.Series()
59
+ >>> print(s.first_valid_index()).execute()
60
+ None
61
+ >>> print(s.last_valid_index()).execute()
62
+ None
63
+
64
+ If Series is empty, returns None.
65
+
66
+ For DataFrame:
67
+
68
+ >>> df = md.DataFrame({'A': [None, None, 2], 'B': [None, 3, 4]})
69
+ >>> df.execute()
70
+ A B
71
+ 0 NaN NaN
72
+ 1 NaN 3.0
73
+ 2 2.0 4.0
74
+ >>> df.first_valid_index().execute()
75
+ 1
76
+ >>> df.last_valid_index().execute()
77
+ 2
78
+
79
+ >>> df = md.DataFrame({'A': [None, None, None], 'B': [None, None, None]})
80
+ >>> df.execute()
81
+ A B
82
+ 0 None None
83
+ 1 None None
84
+ 2 None None
85
+ >>> print(df.first_valid_index()).execute()
86
+ None
87
+ >>> print(df.last_valid_index()).execute()
88
+ None
89
+
90
+ If all elements in DataFrame are NA/null, returns None.
91
+
92
+ >>> df = md.DataFrame()
93
+ >>> df.execute()
94
+ Empty DataFrame
95
+ Columns: []
96
+ Index: []
97
+ >>> print(df.first_valid_index()).execute()
98
+ None
99
+ >>> print(df.last_valid_index()).execute()
100
+ None
101
+
102
+ If DataFrame is empty, returns None.
103
+ """
104
+
105
+
106
+ def first_valid_index(df_or_series):
107
+ return _valid_index(df_or_series, slice(None, 1))
108
+
109
+
110
+ def last_valid_index(df_or_series):
111
+ return _valid_index(df_or_series, slice(-1, None))
112
+
113
+
114
+ first_valid_index.__doc__ = _doc % dict(pos="first")
115
+ last_valid_index.__doc__ = _doc % dict(pos="last")