maxframe 2.0.0b2__cp310-cp310-win_amd64.whl → 2.2.0__cp310-cp310-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of maxframe might be problematic. Click here for more details.

Files changed (391) hide show
  1. maxframe/__init__.py +1 -0
  2. maxframe/_utils.cp310-win_amd64.pyd +0 -0
  3. maxframe/_utils.pyx +14 -1
  4. maxframe/codegen/core.py +6 -6
  5. maxframe/codegen/spe/core.py +1 -1
  6. maxframe/codegen/spe/dataframe/__init__.py +1 -0
  7. maxframe/codegen/spe/dataframe/accessors/base.py +18 -0
  8. maxframe/codegen/spe/dataframe/accessors/dict_.py +25 -130
  9. maxframe/codegen/spe/dataframe/accessors/list_.py +12 -48
  10. maxframe/codegen/spe/dataframe/accessors/struct_.py +28 -0
  11. maxframe/codegen/spe/dataframe/arithmetic.py +7 -2
  12. maxframe/codegen/spe/dataframe/groupby.py +88 -0
  13. maxframe/codegen/spe/dataframe/indexing.py +99 -4
  14. maxframe/codegen/spe/dataframe/merge.py +34 -1
  15. maxframe/codegen/spe/dataframe/misc.py +9 -33
  16. maxframe/codegen/spe/dataframe/reduction.py +14 -9
  17. maxframe/codegen/spe/dataframe/reshape.py +46 -0
  18. maxframe/codegen/spe/dataframe/sort.py +30 -17
  19. maxframe/codegen/spe/dataframe/tests/accessors/test_dict.py +9 -15
  20. maxframe/codegen/spe/dataframe/tests/accessors/test_list.py +4 -7
  21. maxframe/codegen/spe/dataframe/tests/accessors/test_struct.py +75 -0
  22. maxframe/codegen/spe/dataframe/tests/indexing/test_iloc.py +20 -1
  23. maxframe/codegen/spe/dataframe/tests/indexing/test_loc.py +35 -0
  24. maxframe/codegen/spe/dataframe/tests/misc/test_misc.py +0 -32
  25. maxframe/codegen/spe/dataframe/tests/test_groupby.py +81 -18
  26. maxframe/codegen/spe/dataframe/tests/test_merge.py +27 -1
  27. maxframe/codegen/spe/dataframe/tests/test_reshape.py +79 -0
  28. maxframe/codegen/spe/dataframe/tests/test_sort.py +20 -0
  29. maxframe/codegen/spe/learn/contrib/tests/test_xgboost.py +2 -1
  30. maxframe/codegen/spe/learn/metrics/__init__.py +1 -1
  31. maxframe/codegen/spe/learn/metrics/_ranking.py +76 -0
  32. maxframe/codegen/spe/learn/metrics/pairwise.py +51 -0
  33. maxframe/codegen/spe/learn/metrics/tests/test_pairwise.py +36 -0
  34. maxframe/codegen/spe/learn/metrics/tests/test_ranking.py +59 -0
  35. maxframe/codegen/spe/tensor/__init__.py +3 -0
  36. maxframe/codegen/spe/tensor/fft.py +74 -0
  37. maxframe/codegen/spe/tensor/linalg.py +29 -2
  38. maxframe/codegen/spe/tensor/misc.py +79 -25
  39. maxframe/codegen/spe/tensor/spatial.py +45 -0
  40. maxframe/codegen/spe/tensor/statistics.py +44 -0
  41. maxframe/codegen/spe/tensor/tests/test_fft.py +64 -0
  42. maxframe/codegen/spe/tensor/tests/test_linalg.py +15 -1
  43. maxframe/codegen/spe/tensor/tests/test_misc.py +52 -2
  44. maxframe/codegen/spe/tensor/tests/test_spatial.py +33 -0
  45. maxframe/codegen/spe/tensor/tests/test_statistics.py +15 -1
  46. maxframe/codegen/spe/tests/test_spe_codegen.py +6 -12
  47. maxframe/codegen/spe/utils.py +2 -0
  48. maxframe/config/config.py +70 -9
  49. maxframe/config/tests/test_validators.py +13 -1
  50. maxframe/config/validators.py +49 -0
  51. maxframe/conftest.py +44 -17
  52. maxframe/core/accessor.py +2 -2
  53. maxframe/core/entity/core.py +5 -0
  54. maxframe/core/entity/tileables.py +1 -1
  55. maxframe/core/graph/core.cp310-win_amd64.pyd +0 -0
  56. maxframe/core/graph/entity.py +1 -2
  57. maxframe/core/operator/base.py +9 -2
  58. maxframe/core/operator/core.py +10 -2
  59. maxframe/core/operator/utils.py +13 -0
  60. maxframe/dataframe/__init__.py +10 -3
  61. maxframe/dataframe/accessors/__init__.py +1 -1
  62. maxframe/dataframe/accessors/compat.py +45 -0
  63. maxframe/dataframe/accessors/datetime_/__init__.py +4 -1
  64. maxframe/dataframe/accessors/dict_/contains.py +7 -16
  65. maxframe/dataframe/accessors/dict_/core.py +48 -0
  66. maxframe/dataframe/accessors/dict_/getitem.py +17 -21
  67. maxframe/dataframe/accessors/dict_/length.py +7 -16
  68. maxframe/dataframe/accessors/dict_/remove.py +6 -18
  69. maxframe/dataframe/accessors/dict_/setitem.py +8 -18
  70. maxframe/dataframe/accessors/dict_/tests/test_dict_accessor.py +62 -22
  71. maxframe/dataframe/accessors/list_/__init__.py +2 -2
  72. maxframe/dataframe/accessors/list_/core.py +48 -0
  73. maxframe/dataframe/accessors/list_/getitem.py +12 -19
  74. maxframe/dataframe/accessors/list_/length.py +7 -16
  75. maxframe/dataframe/accessors/list_/tests/test_list_accessor.py +11 -9
  76. maxframe/dataframe/accessors/string_/__init__.py +4 -1
  77. maxframe/dataframe/accessors/struct_/__init__.py +37 -0
  78. maxframe/dataframe/accessors/struct_/accessor.py +39 -0
  79. maxframe/dataframe/accessors/struct_/core.py +43 -0
  80. maxframe/dataframe/accessors/struct_/dtypes.py +53 -0
  81. maxframe/dataframe/accessors/struct_/field.py +123 -0
  82. maxframe/dataframe/accessors/struct_/tests/__init__.py +13 -0
  83. maxframe/dataframe/accessors/struct_/tests/test_struct_accessor.py +91 -0
  84. maxframe/dataframe/arithmetic/__init__.py +14 -4
  85. maxframe/dataframe/arithmetic/between.py +106 -0
  86. maxframe/dataframe/arithmetic/dot.py +237 -0
  87. maxframe/dataframe/arithmetic/{around.py → round.py} +11 -7
  88. maxframe/dataframe/core.py +63 -118
  89. maxframe/dataframe/datasource/__init__.py +18 -0
  90. maxframe/dataframe/datasource/from_dict.py +124 -0
  91. maxframe/dataframe/datasource/from_index.py +1 -1
  92. maxframe/dataframe/datasource/from_records.py +77 -0
  93. maxframe/dataframe/datasource/from_tensor.py +109 -41
  94. maxframe/dataframe/datasource/read_csv.py +2 -3
  95. maxframe/dataframe/datasource/tests/test_datasource.py +37 -0
  96. maxframe/dataframe/datastore/__init__.py +5 -1
  97. maxframe/dataframe/datastore/to_csv.py +29 -41
  98. maxframe/dataframe/datastore/to_odps.py +30 -4
  99. maxframe/dataframe/extensions/__init__.py +20 -4
  100. maxframe/dataframe/extensions/apply_chunk.py +32 -6
  101. maxframe/dataframe/extensions/cartesian_chunk.py +153 -0
  102. maxframe/dataframe/extensions/collect_kv.py +126 -0
  103. maxframe/dataframe/extensions/extract_kv.py +177 -0
  104. maxframe/dataframe/extensions/map_reduce.py +263 -0
  105. maxframe/dataframe/extensions/rebalance.py +62 -0
  106. maxframe/dataframe/extensions/tests/test_apply_chunk.py +9 -2
  107. maxframe/dataframe/extensions/tests/test_extensions.py +54 -0
  108. maxframe/dataframe/extensions/tests/test_map_reduce.py +135 -0
  109. maxframe/dataframe/groupby/__init__.py +12 -1
  110. maxframe/dataframe/groupby/aggregation.py +78 -45
  111. maxframe/dataframe/groupby/apply.py +1 -1
  112. maxframe/dataframe/groupby/apply_chunk.py +18 -2
  113. maxframe/dataframe/groupby/core.py +96 -12
  114. maxframe/dataframe/groupby/cum.py +4 -25
  115. maxframe/dataframe/groupby/expanding.py +264 -0
  116. maxframe/dataframe/groupby/fill.py +1 -1
  117. maxframe/dataframe/groupby/getitem.py +12 -5
  118. maxframe/dataframe/groupby/head.py +11 -1
  119. maxframe/dataframe/groupby/rank.py +136 -0
  120. maxframe/dataframe/groupby/rolling.py +206 -0
  121. maxframe/dataframe/groupby/shift.py +114 -0
  122. maxframe/dataframe/groupby/tests/test_groupby.py +0 -5
  123. maxframe/dataframe/indexing/__init__.py +20 -1
  124. maxframe/dataframe/indexing/droplevel.py +195 -0
  125. maxframe/dataframe/indexing/filter.py +169 -0
  126. maxframe/dataframe/indexing/get_level_values.py +76 -0
  127. maxframe/dataframe/indexing/iat.py +45 -0
  128. maxframe/dataframe/indexing/iloc.py +152 -12
  129. maxframe/dataframe/indexing/insert.py +1 -1
  130. maxframe/dataframe/indexing/loc.py +287 -7
  131. maxframe/dataframe/indexing/reindex.py +14 -5
  132. maxframe/dataframe/indexing/rename.py +6 -0
  133. maxframe/dataframe/indexing/rename_axis.py +2 -2
  134. maxframe/dataframe/indexing/reorder_levels.py +143 -0
  135. maxframe/dataframe/indexing/reset_index.py +33 -6
  136. maxframe/dataframe/indexing/sample.py +8 -0
  137. maxframe/dataframe/indexing/setitem.py +3 -3
  138. maxframe/dataframe/indexing/swaplevel.py +185 -0
  139. maxframe/dataframe/indexing/take.py +99 -0
  140. maxframe/dataframe/indexing/truncate.py +140 -0
  141. maxframe/dataframe/indexing/where.py +0 -11
  142. maxframe/dataframe/indexing/xs.py +148 -0
  143. maxframe/dataframe/merge/__init__.py +12 -1
  144. maxframe/dataframe/merge/append.py +97 -98
  145. maxframe/dataframe/merge/combine_first.py +120 -0
  146. maxframe/dataframe/merge/compare.py +387 -0
  147. maxframe/dataframe/merge/concat.py +183 -0
  148. maxframe/dataframe/merge/update.py +271 -0
  149. maxframe/dataframe/misc/__init__.py +16 -10
  150. maxframe/dataframe/misc/_duplicate.py +10 -4
  151. maxframe/dataframe/misc/apply.py +1 -1
  152. maxframe/dataframe/misc/check_unique.py +51 -0
  153. maxframe/dataframe/misc/clip.py +145 -0
  154. maxframe/dataframe/misc/describe.py +175 -9
  155. maxframe/dataframe/misc/drop_duplicates.py +2 -2
  156. maxframe/dataframe/misc/duplicated.py +2 -2
  157. maxframe/dataframe/misc/get_dummies.py +5 -1
  158. maxframe/dataframe/misc/isin.py +2 -2
  159. maxframe/dataframe/misc/map.py +94 -0
  160. maxframe/dataframe/misc/tests/test_misc.py +13 -2
  161. maxframe/dataframe/misc/to_numeric.py +3 -0
  162. maxframe/dataframe/misc/transform.py +12 -5
  163. maxframe/dataframe/misc/transpose.py +13 -1
  164. maxframe/dataframe/misc/valid_index.py +115 -0
  165. maxframe/dataframe/misc/value_counts.py +38 -4
  166. maxframe/dataframe/missing/checkna.py +13 -6
  167. maxframe/dataframe/missing/dropna.py +5 -0
  168. maxframe/dataframe/missing/fillna.py +1 -1
  169. maxframe/dataframe/missing/replace.py +7 -4
  170. maxframe/dataframe/reduction/__init__.py +29 -15
  171. maxframe/dataframe/reduction/aggregation.py +38 -9
  172. maxframe/dataframe/reduction/all.py +2 -2
  173. maxframe/dataframe/reduction/any.py +2 -2
  174. maxframe/dataframe/reduction/argmax.py +100 -0
  175. maxframe/dataframe/reduction/argmin.py +100 -0
  176. maxframe/dataframe/reduction/core.py +65 -18
  177. maxframe/dataframe/reduction/count.py +13 -9
  178. maxframe/dataframe/reduction/cov.py +166 -0
  179. maxframe/dataframe/reduction/cummax.py +2 -2
  180. maxframe/dataframe/reduction/cummin.py +2 -2
  181. maxframe/dataframe/reduction/cumprod.py +2 -2
  182. maxframe/dataframe/reduction/cumsum.py +2 -2
  183. maxframe/dataframe/reduction/custom_reduction.py +2 -2
  184. maxframe/dataframe/reduction/idxmax.py +185 -0
  185. maxframe/dataframe/reduction/idxmin.py +185 -0
  186. maxframe/dataframe/reduction/kurtosis.py +37 -30
  187. maxframe/dataframe/reduction/max.py +2 -2
  188. maxframe/dataframe/reduction/mean.py +9 -7
  189. maxframe/dataframe/reduction/median.py +2 -2
  190. maxframe/dataframe/reduction/min.py +2 -2
  191. maxframe/dataframe/reduction/nunique.py +9 -8
  192. maxframe/dataframe/reduction/prod.py +18 -13
  193. maxframe/dataframe/reduction/reduction_size.py +2 -2
  194. maxframe/dataframe/reduction/sem.py +13 -9
  195. maxframe/dataframe/reduction/skew.py +31 -27
  196. maxframe/dataframe/reduction/str_concat.py +10 -7
  197. maxframe/dataframe/reduction/sum.py +18 -14
  198. maxframe/dataframe/reduction/unique.py +20 -3
  199. maxframe/dataframe/reduction/var.py +16 -12
  200. maxframe/dataframe/reshape/__init__.py +38 -0
  201. maxframe/dataframe/{misc → reshape}/pivot.py +1 -0
  202. maxframe/dataframe/{misc → reshape}/pivot_table.py +1 -0
  203. maxframe/dataframe/reshape/unstack.py +114 -0
  204. maxframe/dataframe/sort/__init__.py +8 -0
  205. maxframe/dataframe/sort/argsort.py +62 -0
  206. maxframe/dataframe/sort/core.py +1 -0
  207. maxframe/dataframe/sort/nlargest.py +238 -0
  208. maxframe/dataframe/sort/nsmallest.py +228 -0
  209. maxframe/dataframe/statistics/__init__.py +3 -3
  210. maxframe/dataframe/statistics/corr.py +1 -0
  211. maxframe/dataframe/statistics/quantile.py +2 -2
  212. maxframe/dataframe/tests/test_typing.py +104 -0
  213. maxframe/dataframe/tests/test_utils.py +66 -2
  214. maxframe/dataframe/typing_.py +185 -0
  215. maxframe/dataframe/utils.py +95 -26
  216. maxframe/dataframe/window/aggregation.py +8 -4
  217. maxframe/dataframe/window/core.py +14 -1
  218. maxframe/dataframe/window/ewm.py +1 -3
  219. maxframe/dataframe/window/expanding.py +37 -35
  220. maxframe/dataframe/window/rolling.py +49 -39
  221. maxframe/dataframe/window/tests/test_expanding.py +1 -7
  222. maxframe/dataframe/window/tests/test_rolling.py +1 -1
  223. maxframe/env.py +7 -4
  224. maxframe/errors.py +2 -2
  225. maxframe/io/odpsio/schema.py +9 -3
  226. maxframe/io/odpsio/tableio.py +7 -2
  227. maxframe/io/odpsio/tests/test_schema.py +198 -83
  228. maxframe/learn/__init__.py +10 -2
  229. maxframe/learn/cluster/__init__.py +15 -0
  230. maxframe/learn/cluster/_kmeans.py +782 -0
  231. maxframe/learn/contrib/llm/core.py +2 -0
  232. maxframe/learn/contrib/xgboost/core.py +86 -1
  233. maxframe/learn/contrib/xgboost/train.py +5 -2
  234. maxframe/learn/core.py +66 -0
  235. maxframe/learn/linear_model/_base.py +58 -1
  236. maxframe/learn/linear_model/_lin_reg.py +1 -1
  237. maxframe/learn/metrics/__init__.py +6 -0
  238. maxframe/learn/metrics/_classification.py +145 -0
  239. maxframe/learn/metrics/_ranking.py +477 -0
  240. maxframe/learn/metrics/_scorer.py +60 -0
  241. maxframe/learn/metrics/pairwise/__init__.py +21 -0
  242. maxframe/learn/metrics/pairwise/core.py +77 -0
  243. maxframe/learn/metrics/pairwise/cosine.py +115 -0
  244. maxframe/learn/metrics/pairwise/euclidean.py +176 -0
  245. maxframe/learn/metrics/pairwise/haversine.py +96 -0
  246. maxframe/learn/metrics/pairwise/manhattan.py +80 -0
  247. maxframe/learn/metrics/pairwise/pairwise.py +127 -0
  248. maxframe/learn/metrics/pairwise/pairwise_distances_topk.py +121 -0
  249. maxframe/learn/metrics/pairwise/rbf_kernel.py +51 -0
  250. maxframe/learn/metrics/tests/__init__.py +13 -0
  251. maxframe/learn/metrics/tests/test_scorer.py +26 -0
  252. maxframe/learn/utils/__init__.py +1 -1
  253. maxframe/learn/utils/checks.py +1 -2
  254. maxframe/learn/utils/core.py +59 -0
  255. maxframe/learn/utils/extmath.py +37 -0
  256. maxframe/learn/utils/odpsio.py +193 -0
  257. maxframe/learn/utils/validation.py +2 -2
  258. maxframe/lib/compat.py +40 -0
  259. maxframe/lib/dtypes_extension/__init__.py +16 -1
  260. maxframe/lib/dtypes_extension/_fake_arrow_dtype.py +604 -0
  261. maxframe/lib/dtypes_extension/blob.py +304 -0
  262. maxframe/lib/dtypes_extension/dtypes.py +40 -0
  263. maxframe/lib/dtypes_extension/tests/test_blob.py +88 -0
  264. maxframe/lib/dtypes_extension/tests/test_dtypes.py +16 -1
  265. maxframe/lib/dtypes_extension/tests/test_fake_arrow_dtype.py +75 -0
  266. maxframe/lib/filesystem/_oss_lib/common.py +122 -50
  267. maxframe/lib/filesystem/_oss_lib/glob.py +1 -1
  268. maxframe/lib/filesystem/_oss_lib/handle.py +21 -25
  269. maxframe/lib/filesystem/base.py +1 -1
  270. maxframe/lib/filesystem/core.py +1 -1
  271. maxframe/lib/filesystem/oss.py +115 -46
  272. maxframe/lib/filesystem/tests/test_oss.py +74 -36
  273. maxframe/lib/mmh3.cp310-win_amd64.pyd +0 -0
  274. maxframe/lib/wrapped_pickle.py +10 -0
  275. maxframe/opcodes.py +33 -15
  276. maxframe/protocol.py +12 -0
  277. maxframe/serialization/__init__.py +11 -2
  278. maxframe/serialization/arrow.py +38 -13
  279. maxframe/serialization/blob.py +32 -0
  280. maxframe/serialization/core.cp310-win_amd64.pyd +0 -0
  281. maxframe/serialization/core.pyx +39 -1
  282. maxframe/serialization/exception.py +2 -4
  283. maxframe/serialization/numpy.py +11 -0
  284. maxframe/serialization/pandas.py +46 -9
  285. maxframe/serialization/serializables/core.py +2 -2
  286. maxframe/serialization/tests/test_serial.py +29 -2
  287. maxframe/tensor/__init__.py +38 -8
  288. maxframe/tensor/arithmetic/__init__.py +19 -10
  289. maxframe/tensor/arithmetic/iscomplexobj.py +53 -0
  290. maxframe/tensor/arithmetic/tests/test_arithmetic.py +6 -0
  291. maxframe/tensor/core.py +3 -2
  292. maxframe/tensor/datasource/tests/test_datasource.py +2 -1
  293. maxframe/tensor/extensions/__init__.py +2 -0
  294. maxframe/tensor/extensions/apply_chunk.py +3 -3
  295. maxframe/tensor/extensions/rebalance.py +65 -0
  296. maxframe/tensor/fft/__init__.py +32 -0
  297. maxframe/tensor/fft/core.py +168 -0
  298. maxframe/tensor/fft/fft.py +112 -0
  299. maxframe/tensor/fft/fft2.py +118 -0
  300. maxframe/tensor/fft/fftfreq.py +80 -0
  301. maxframe/tensor/fft/fftn.py +123 -0
  302. maxframe/tensor/fft/fftshift.py +79 -0
  303. maxframe/tensor/fft/hfft.py +112 -0
  304. maxframe/tensor/fft/ifft.py +114 -0
  305. maxframe/tensor/fft/ifft2.py +115 -0
  306. maxframe/tensor/fft/ifftn.py +123 -0
  307. maxframe/tensor/fft/ifftshift.py +73 -0
  308. maxframe/tensor/fft/ihfft.py +93 -0
  309. maxframe/tensor/fft/irfft.py +118 -0
  310. maxframe/tensor/fft/irfft2.py +62 -0
  311. maxframe/tensor/fft/irfftn.py +114 -0
  312. maxframe/tensor/fft/rfft.py +116 -0
  313. maxframe/tensor/fft/rfft2.py +63 -0
  314. maxframe/tensor/fft/rfftfreq.py +87 -0
  315. maxframe/tensor/fft/rfftn.py +113 -0
  316. maxframe/tensor/indexing/fill_diagonal.py +1 -7
  317. maxframe/tensor/linalg/__init__.py +7 -0
  318. maxframe/tensor/linalg/_einsumfunc.py +1025 -0
  319. maxframe/tensor/linalg/cholesky.py +117 -0
  320. maxframe/tensor/linalg/einsum.py +339 -0
  321. maxframe/tensor/linalg/lstsq.py +100 -0
  322. maxframe/tensor/linalg/matrix_norm.py +75 -0
  323. maxframe/tensor/linalg/norm.py +249 -0
  324. maxframe/tensor/linalg/solve.py +72 -0
  325. maxframe/tensor/linalg/solve_triangular.py +2 -2
  326. maxframe/tensor/linalg/vector_norm.py +113 -0
  327. maxframe/tensor/misc/__init__.py +24 -1
  328. maxframe/tensor/misc/argwhere.py +72 -0
  329. maxframe/tensor/misc/array_split.py +46 -0
  330. maxframe/tensor/misc/broadcast_arrays.py +57 -0
  331. maxframe/tensor/misc/copyto.py +130 -0
  332. maxframe/tensor/misc/delete.py +104 -0
  333. maxframe/tensor/misc/dsplit.py +68 -0
  334. maxframe/tensor/misc/ediff1d.py +74 -0
  335. maxframe/tensor/misc/expand_dims.py +85 -0
  336. maxframe/tensor/misc/flip.py +90 -0
  337. maxframe/tensor/misc/fliplr.py +64 -0
  338. maxframe/tensor/misc/flipud.py +68 -0
  339. maxframe/tensor/misc/hsplit.py +85 -0
  340. maxframe/tensor/misc/insert.py +139 -0
  341. maxframe/tensor/misc/moveaxis.py +83 -0
  342. maxframe/tensor/misc/result_type.py +88 -0
  343. maxframe/tensor/misc/roll.py +124 -0
  344. maxframe/tensor/misc/rollaxis.py +77 -0
  345. maxframe/tensor/misc/shape.py +89 -0
  346. maxframe/tensor/misc/split.py +190 -0
  347. maxframe/tensor/misc/tile.py +109 -0
  348. maxframe/tensor/misc/vsplit.py +74 -0
  349. maxframe/tensor/reduction/array_equal.py +2 -1
  350. maxframe/tensor/sort/__init__.py +2 -0
  351. maxframe/tensor/sort/argpartition.py +98 -0
  352. maxframe/tensor/sort/partition.py +228 -0
  353. maxframe/tensor/spatial/__init__.py +15 -0
  354. maxframe/tensor/spatial/distance/__init__.py +17 -0
  355. maxframe/tensor/spatial/distance/cdist.py +421 -0
  356. maxframe/tensor/spatial/distance/pdist.py +398 -0
  357. maxframe/tensor/spatial/distance/squareform.py +153 -0
  358. maxframe/tensor/special/__init__.py +159 -21
  359. maxframe/tensor/special/airy.py +55 -0
  360. maxframe/tensor/special/bessel.py +199 -0
  361. maxframe/tensor/special/core.py +65 -4
  362. maxframe/tensor/special/ellip_func_integrals.py +155 -0
  363. maxframe/tensor/special/ellip_harm.py +55 -0
  364. maxframe/tensor/special/err_fresnel.py +223 -0
  365. maxframe/tensor/special/gamma_funcs.py +303 -0
  366. maxframe/tensor/special/hypergeometric_funcs.py +69 -0
  367. maxframe/tensor/special/info_theory.py +189 -0
  368. maxframe/tensor/special/misc.py +21 -0
  369. maxframe/tensor/statistics/__init__.py +6 -0
  370. maxframe/tensor/statistics/corrcoef.py +77 -0
  371. maxframe/tensor/statistics/cov.py +222 -0
  372. maxframe/tensor/statistics/digitize.py +126 -0
  373. maxframe/tensor/statistics/histogram.py +520 -0
  374. maxframe/tensor/statistics/median.py +85 -0
  375. maxframe/tensor/statistics/ptp.py +89 -0
  376. maxframe/tensor/utils.py +3 -3
  377. maxframe/tests/test_utils.py +43 -1
  378. maxframe/tests/utils.py +0 -2
  379. maxframe/typing_.py +2 -0
  380. maxframe/udf.py +27 -2
  381. maxframe/utils.py +193 -19
  382. {maxframe-2.0.0b2.dist-info → maxframe-2.2.0.dist-info}/METADATA +3 -2
  383. {maxframe-2.0.0b2.dist-info → maxframe-2.2.0.dist-info}/RECORD +391 -236
  384. maxframe_client/fetcher.py +35 -4
  385. maxframe_client/session/odps.py +7 -2
  386. maxframe_client/tests/test_fetcher.py +76 -3
  387. maxframe_client/tests/test_session.py +4 -1
  388. /maxframe/dataframe/{misc → reshape}/melt.py +0 -0
  389. /maxframe/dataframe/{misc → reshape}/stack.py +0 -0
  390. {maxframe-2.0.0b2.dist-info → maxframe-2.2.0.dist-info}/WHEEL +0 -0
  391. {maxframe-2.0.0b2.dist-info → maxframe-2.2.0.dist-info}/top_level.txt +0 -0
@@ -22,7 +22,7 @@ from ...core import EntityData
22
22
  from ...serialization.serializables import AnyField, FieldTypes, KeyField, ListField
23
23
  from ..core import SERIES_TYPE
24
24
  from ..operators import DataFrameOperator, DataFrameOperatorMixin
25
- from ..utils import build_empty_df, parse_index
25
+ from ..utils import build_df, parse_index
26
26
 
27
27
 
28
28
  class DataFrameDescribe(DataFrameOperator, DataFrameOperatorMixin):
@@ -43,8 +43,6 @@ class DataFrameDescribe(DataFrameOperator, DataFrameOperatorMixin):
43
43
 
44
44
  def __call__(self, df_or_series):
45
45
  if isinstance(df_or_series, SERIES_TYPE):
46
- if not np.issubdtype(df_or_series.dtype, np.number):
47
- raise NotImplementedError("non-numeric type is not supported for now")
48
46
  test_series = pd.Series([], dtype=df_or_series.dtype).describe(
49
47
  percentiles=self.percentiles,
50
48
  include=self.include,
@@ -57,7 +55,7 @@ class DataFrameDescribe(DataFrameOperator, DataFrameOperatorMixin):
57
55
  index_value=parse_index(test_series.index, store_data=True),
58
56
  )
59
57
  else:
60
- test_inp_df = build_empty_df(df_or_series.dtypes)
58
+ test_inp_df = build_df(df_or_series)
61
59
  test_df = test_inp_df.describe(
62
60
  percentiles=self.percentiles,
63
61
  include=self.include,
@@ -69,11 +67,6 @@ class DataFrameDescribe(DataFrameOperator, DataFrameOperatorMixin):
69
67
  # MaxFrame DataFrame allows user to specify percentiles=False
70
68
  # to skip computation about percentiles
71
69
  test_df.drop(["50%"], axis=0, inplace=True)
72
- for dtype in test_df.dtypes:
73
- if not np.issubdtype(dtype, np.number):
74
- raise NotImplementedError(
75
- "non-numeric type is not supported for now"
76
- )
77
70
  return self.new_dataframe(
78
71
  [df_or_series],
79
72
  shape=test_df.shape,
@@ -84,6 +77,179 @@ class DataFrameDescribe(DataFrameOperator, DataFrameOperatorMixin):
84
77
 
85
78
 
86
79
  def describe(df_or_series, percentiles=None, include=None, exclude=None):
80
+ """
81
+ Generate descriptive statistics.
82
+
83
+ Descriptive statistics include those that summarize the central
84
+ tendency, dispersion and shape of a
85
+ dataset's distribution, excluding ``NaN`` values.
86
+
87
+ Analyzes both numeric and object series, as well
88
+ as ``DataFrame`` column sets of mixed data types. The output
89
+ will vary depending on what is provided. Refer to the notes
90
+ below for more detail.
91
+
92
+ Parameters
93
+ ----------
94
+ percentiles : list-like of numbers, optional
95
+ The percentiles to include in the output. All should
96
+ fall between 0 and 1. The default is
97
+ ``[.25, .5, .75]``, which returns the 25th, 50th, and
98
+ 75th percentiles.
99
+ include : 'all', list-like of dtypes or None (default), optional
100
+ A white list of data types to include in the result. Ignored
101
+ for ``Series``. Here are the options:
102
+
103
+ - 'all' : All columns of the input will be included in the output.
104
+ - A list-like of dtypes : Limits the results to the
105
+ provided data types.
106
+ To limit the result to numeric types submit
107
+ ``numpy.number``. To limit it instead to object columns submit
108
+ the ``numpy.object`` data type. Strings
109
+ can also be used in the style of
110
+ ``select_dtypes`` (e.g. ``df.describe(include=['O'])``).
111
+ - None (default) : The result will include all numeric columns.
112
+ exclude : list-like of dtypes or None (default), optional,
113
+ A black list of data types to omit from the result. Ignored
114
+ for ``Series``. Here are the options:
115
+
116
+ - A list-like of dtypes : Excludes the provided data types
117
+ from the result. To exclude numeric types submit
118
+ ``numpy.number``. To exclude object columns submit the data
119
+ type ``numpy.object``. Strings can also be used in the style of
120
+ ``select_dtypes`` (e.g. ``df.describe(exclude=['O'])``).
121
+ - None (default) : The result will exclude nothing.
122
+
123
+ Returns
124
+ -------
125
+ Series or DataFrame
126
+ Summary statistics of the Series or Dataframe provided.
127
+
128
+ See Also
129
+ --------
130
+ DataFrame.count: Count number of non-NA/null observations.
131
+ DataFrame.max: Maximum of the values in the object.
132
+ DataFrame.min: Minimum of the values in the object.
133
+ DataFrame.mean: Mean of the values.
134
+ DataFrame.std: Standard deviation of the observations.
135
+ DataFrame.select_dtypes: Subset of a DataFrame including/excluding
136
+ columns based on their dtype.
137
+
138
+ Notes
139
+ -----
140
+ For numeric data, the result's index will include ``count``,
141
+ ``mean``, ``std``, ``min``, ``max`` as well as lower, ``50`` and
142
+ upper percentiles. By default the lower percentile is ``25`` and the
143
+ upper percentile is ``75``. The ``50`` percentile is the
144
+ same as the median.
145
+
146
+ For object data (e.g. strings or timestamps), the result's index
147
+ will include ``count``, ``unique``, ``top``, and ``freq``. The ``top``
148
+ is the most common value. The ``freq`` is the most common value's
149
+ frequency. Timestamps also include the ``first`` and ``last`` items.
150
+
151
+ If multiple object values have the highest count, then the
152
+ ``count`` and ``top`` results will be arbitrarily chosen from
153
+ among those with the highest count.
154
+
155
+ For mixed data types provided via a ``DataFrame``, the default is to
156
+ return only an analysis of numeric columns. If the dataframe consists
157
+ only of object data without any numeric columns, the default is to
158
+ return an analysis of object columns. If ``include='all'`` is provided
159
+ as an option, the result will include a union of attributes of each type.
160
+
161
+ The `include` and `exclude` parameters can be used to limit
162
+ which columns in a ``DataFrame`` are analyzed for the output.
163
+ The parameters are ignored when analyzing a ``Series``.
164
+
165
+ Examples
166
+ --------
167
+ Describing a numeric ``Series``.
168
+
169
+ >>> import maxframe.tensor as mt
170
+ >>> import maxframe.dataframe as md
171
+ >>> s = md.Series([1, 2, 3])
172
+ >>> s.describe().execute()
173
+ count 3.0
174
+ mean 2.0
175
+ std 1.0
176
+ min 1.0
177
+ 25% 1.5
178
+ 50% 2.0
179
+ 75% 2.5
180
+ max 3.0
181
+ dtype: float64
182
+
183
+ Describing a ``DataFrame``. By default only numeric fields
184
+ are returned.
185
+
186
+ >>> df = md.DataFrame({'numeric': [1, 2, 3],
187
+ ... 'object': ['a', 'b', 'c']
188
+ ... })
189
+ >>> df.describe().execute()
190
+ numeric
191
+ count 3.0
192
+ mean 2.0
193
+ std 1.0
194
+ min 1.0
195
+ 25% 1.5
196
+ 50% 2.0
197
+ 75% 2.5
198
+ max 3.0
199
+
200
+ Describing all columns of a ``DataFrame`` regardless of data type.
201
+
202
+ >>> df.describe(include='all').execute() # doctest: +SKIP.execute()
203
+ numeric object
204
+ count 3.0 3
205
+ unique NaN 3
206
+ top NaN a
207
+ freq NaN 1
208
+ mean 2.0 NaN
209
+ std 1.0 NaN
210
+ min 1.0 NaN
211
+ 25% 1.5 NaN
212
+ 50% 2.0 NaN
213
+ 75% 2.5 NaN
214
+ max 3.0 NaN
215
+
216
+ Describing a column from a ``DataFrame`` by accessing it as
217
+ an attribute.
218
+
219
+ >>> df.numeric.describe().execute()
220
+ count 3.0
221
+ mean 2.0
222
+ std 1.0
223
+ min 1.0
224
+ 25% 1.5
225
+ 50% 2.0
226
+ 75% 2.5
227
+ max 3.0
228
+ Name: numeric, dtype: float64
229
+
230
+ Including only numeric columns in a ``DataFrame`` description.
231
+
232
+ >>> df.describe(include=[mt.number]).execute()
233
+ numeric
234
+ count 3.0
235
+ mean 2.0
236
+ std 1.0
237
+ min 1.0
238
+ 25% 1.5
239
+ 50% 2.0
240
+ 75% 2.5
241
+ max 3.0
242
+
243
+ Including only string columns in a ``DataFrame`` description.
244
+
245
+ >>> df.describe(include=[object]).execute() # doctest: +SKIP.execute()
246
+ object
247
+ count 3
248
+ unique 3
249
+ top a
250
+ freq 1
251
+ """
252
+ # fixme add support for categorical columns once implemented
87
253
  if percentiles is False:
88
254
  percentiles = []
89
255
  elif percentiles is None:
@@ -19,10 +19,10 @@ from ... import opcodes
19
19
  from ...serialization.serializables import BoolField
20
20
  from ..operators import OutputType
21
21
  from ..utils import gen_unknown_index_value, parse_index
22
- from ._duplicate import DuplicateOperand, validate_subset
22
+ from ._duplicate import BaseDuplicateOp, validate_subset
23
23
 
24
24
 
25
- class DataFrameDropDuplicates(DuplicateOperand):
25
+ class DataFrameDropDuplicates(BaseDuplicateOp):
26
26
  _op_type_ = opcodes.DROP_DUPLICATES
27
27
 
28
28
  ignore_index = BoolField("ignore_index", default=True)
@@ -16,10 +16,10 @@ import numpy as np
16
16
 
17
17
  from ... import opcodes
18
18
  from ...core import OutputType
19
- from ._duplicate import DuplicateOperand, validate_subset
19
+ from ._duplicate import BaseDuplicateOp, validate_subset
20
20
 
21
21
 
22
- class DataFrameDuplicated(DuplicateOperand):
22
+ class DataFrameDuplicated(BaseDuplicateOp):
23
23
  _op_type_ = opcodes.DUPLICATED
24
24
 
25
25
  def __init__(self, output_types=None, **kw):
@@ -25,12 +25,14 @@ from ...serialization.serializables import (
25
25
  ListField,
26
26
  StringField,
27
27
  )
28
+ from ...utils import make_dtype, pd_release_version
28
29
  from ..datasource.dataframe import from_pandas as from_pandas_df
29
30
  from ..datasource.series import from_pandas as from_pandas_series
30
31
  from ..initializer import Series as asseries
31
32
  from ..operators import DataFrameOperator, DataFrameOperatorMixin
32
33
 
33
34
  _encoding_dtype_kind = ["O", "S", "U"]
35
+ _ret_uint8 = pd_release_version < (2, 0, 0)
34
36
 
35
37
 
36
38
  class DataFrameGetDummies(DataFrameOperator, DataFrameOperatorMixin):
@@ -181,7 +183,9 @@ def get_dummies(
181
183
  elif isinstance(data, pd.DataFrame):
182
184
  data = from_pandas_df(data)
183
185
 
184
- dtype = dtype if dtype is not None else np.dtype(bool)
186
+ dtype = make_dtype(
187
+ dtype if dtype is not None else np.dtype(np.uint8 if _ret_uint8 else bool)
188
+ )
185
189
 
186
190
  if prefix is not None:
187
191
  if isinstance(prefix, list):
@@ -133,7 +133,7 @@ def series_isin(elements, values):
133
133
  5 False
134
134
  Name: animal, dtype: bool
135
135
  """
136
- if is_list_like(values):
136
+ if is_list_like(values) and not isinstance(values, ENTITY_TYPE):
137
137
  values = list(values)
138
138
  elif not isinstance(values, (SERIES_TYPE, TENSOR_TYPE, INDEX_TYPE)):
139
139
  raise TypeError(
@@ -207,7 +207,7 @@ def df_isin(df, values):
207
207
  falcon True True
208
208
  dog False False
209
209
  """
210
- if is_list_like(values) and not isinstance(values, dict):
210
+ if is_list_like(values) and not isinstance(values, (dict, ENTITY_TYPE)):
211
211
  values = list(values)
212
212
  elif not isinstance(
213
213
  values, (SERIES_TYPE, DATAFRAME_TYPE, TENSOR_TYPE, INDEX_TYPE, dict)
@@ -251,3 +251,97 @@ def index_map(
251
251
  """
252
252
  op = DataFrameMap(arg=mapper, na_action=na_action, memory_scale=memory_scale)
253
253
  return op(idx, dtype=dtype, skip_infer=skip_infer)
254
+
255
+
256
+ def df_map(
257
+ df, func, na_action=None, dtypes=None, dtype=None, skip_infer=False, **kwargs
258
+ ):
259
+ """
260
+ Apply a function to a Dataframe elementwise.
261
+
262
+ This method applies a function that accepts and returns a scalar
263
+ to every element of a DataFrame.
264
+
265
+ Parameters
266
+ ----------
267
+ func : callable
268
+ Python function, returns a single value from a single value.
269
+ na_action : {None, 'ignore'}, default None
270
+ If 'ignore', propagate NaN values, without passing them to func.
271
+ dtypes : Series, default None
272
+ Specify dtypes of returned DataFrames.
273
+ dtype : np.dtype, default None
274
+ Specify dtypes of all columns of returned DataFrames, only
275
+ effective when dtypes is not specified.
276
+ skip_infer: bool, default False
277
+ Whether infer dtypes when dtypes or dtype is not specified.
278
+ **kwargs
279
+ Additional keyword arguments to pass as keywords arguments to
280
+ `func`.
281
+
282
+ Returns
283
+ -------
284
+ DataFrame
285
+ Transformed DataFrame.
286
+
287
+ See Also
288
+ --------
289
+ DataFrame.apply : Apply a function along input axis of DataFrame.
290
+ DataFrame.replace: Replace values given in `to_replace` with `value`.
291
+ Series.map : Apply a function elementwise on a Series.
292
+
293
+ Examples
294
+ --------
295
+ >>> import maxframe.dataframe as md
296
+ >>> df = md.DataFrame([[1, 2.12], [3.356, 4.567]])
297
+ >>> df.execute()
298
+ 0 1
299
+ 0 1.000 2.120
300
+ 1 3.356 4.567
301
+
302
+ >>> df.map(lambda x: len(str(x))).execute()
303
+ 0 1
304
+ 0 3 4
305
+ 1 5 5
306
+
307
+ Like Series.map, NA values can be ignored:
308
+
309
+ >>> df_copy = df.copy()
310
+ >>> df_copy.iloc[0, 0] = md.NA
311
+ >>> df_copy.map(lambda x: len(str(x)), na_action='ignore').execute()
312
+ 0 1
313
+ 0 NaN 4
314
+ 1 5.0 5
315
+
316
+ It is also possible to use `map` with functions that are not
317
+ `lambda` functions:
318
+
319
+ >>> df.map(round, ndigits=1).execute()
320
+ 0 1
321
+ 0 1.0 2.1
322
+ 1 3.4 4.6
323
+
324
+ Note that a vectorized version of `func` often exists, which will
325
+ be much faster. You could square each number elementwise.
326
+
327
+ >>> df.map(lambda x: x**2).execute()
328
+ 0 1
329
+ 0 1.000000 4.494400
330
+ 1 11.262736 20.857489
331
+
332
+ But it's better to avoid map in that case.
333
+
334
+ >>> (df ** 2).execute()
335
+ 0 1
336
+ 0 1.000000 4.494400
337
+ 1 11.262736 20.857489
338
+ """
339
+ if dtypes is None and dtype is not None:
340
+ dtypes = pd.Series([dtype] * df.shape[1], index=df.dtypes.index)
341
+
342
+ def _wrapper(row):
343
+ return row.map(func, na_action=na_action, **kwargs)
344
+
345
+ return df.apply(
346
+ _wrapper, axis=1, dtypes=dtypes, skip_infer=skip_infer, elementwise=True
347
+ )
@@ -16,6 +16,8 @@ import numpy as np
16
16
  import pandas as pd
17
17
  import pytest
18
18
 
19
+ from maxframe import options
20
+
19
21
  from .... import opcodes
20
22
  from ....core import OutputType
21
23
  from ....dataframe import DataFrame
@@ -124,6 +126,7 @@ def test_dataframe_apply():
124
126
  dtypes=pd.Series([np.dtype(float)] * 3),
125
127
  )
126
128
  assert df2.ndim == 2
129
+ assert df2.op.expect_resources == options.function.default_running_options
127
130
 
128
131
 
129
132
  def test_series_apply():
@@ -180,6 +183,8 @@ def test_series_apply():
180
183
  pd.Series, output_type="dataframe", dtypes=dtypes, index=pd.RangeIndex(2)
181
184
  )
182
185
  assert r.ndim == 2
186
+ assert r.op.expect_resources == options.function.default_running_options
187
+
183
188
  pd.testing.assert_series_equal(r.dtypes, dtypes)
184
189
  assert r.shape == (2, 3)
185
190
 
@@ -305,6 +310,7 @@ def test_transform():
305
310
  assert r.shape == series.shape
306
311
  assert r.op._op_type_ == opcodes.TRANSFORM
307
312
  assert r.op.output_types[0] == OutputType.series
313
+ assert r.op.expect_resources == options.function.default_running_options
308
314
 
309
315
 
310
316
  def test_series_isin():
@@ -563,12 +569,17 @@ def test_apply():
563
569
  )
564
570
  assert apply_df.shape == (3, 2)
565
571
  assert apply_df.op.expect_engine == "SPE"
566
- assert apply_df.op.expect_resources == {"cpu": 1, "memory": "40GB", "gpu": 0}
572
+ assert apply_df.op.expect_resources == {
573
+ "cpu": 4,
574
+ "memory": "40GB",
575
+ "gpu": 0,
576
+ "gu_quota": None,
577
+ }
567
578
 
568
579
 
569
580
  def test_pivot_table():
570
581
  from ...groupby.aggregation import DataFrameGroupByAgg
571
- from ...misc.pivot_table import DataFramePivotTable
582
+ from ...reshape.pivot_table import DataFramePivotTable
572
583
 
573
584
  raw = pd.DataFrame(
574
585
  {
@@ -15,6 +15,7 @@
15
15
  import numpy as np
16
16
  import pandas as pd
17
17
 
18
+ from ... import opcodes
18
19
  from ...core import ENTITY_TYPE, OutputType
19
20
  from ...serialization.serializables import StringField
20
21
  from ...tensor import tensor as astensor
@@ -23,6 +24,8 @@ from ..operators import DataFrameOperator, DataFrameOperatorMixin
23
24
 
24
25
 
25
26
  class DataFrameToNumeric(DataFrameOperator, DataFrameOperatorMixin):
27
+ _op_type_ = opcodes.TO_NUMERIC
28
+
26
29
  errors = StringField("errors")
27
30
  downcast = StringField("downcast")
28
31
 
@@ -38,8 +38,9 @@ from ..utils import (
38
38
  _with_convert_dtype = pd_release_version < (1, 2, 0)
39
39
 
40
40
 
41
- class TransformOperator(DataFrameOperator, DataFrameOperatorMixin):
41
+ class DataFrameTransform(DataFrameOperator, DataFrameOperatorMixin):
42
42
  _op_type_ = opcodes.TRANSFORM
43
+ _legacy_name = "TransformOperator"
43
44
 
44
45
  func = AnyField("func", default=None)
45
46
  axis = AnyField("axis", default=None)
@@ -141,13 +142,17 @@ class TransformOperator(DataFrameOperator, DataFrameOperatorMixin):
141
142
 
142
143
  @classmethod
143
144
  def estimate_size(
144
- cls, ctx: MutableMapping[str, Union[int, float]], op: "TransformOperator"
145
+ cls, ctx: MutableMapping[str, Union[int, float]], op: "DataFrameTransform"
145
146
  ) -> None:
146
147
  if isinstance(op.func, MarkedFunction):
147
148
  ctx[op.outputs[0].key] = float("inf")
148
149
  super().estimate_size(ctx, op)
149
150
 
150
151
 
152
+ # keep for import compatibility
153
+ TransformOperator = DataFrameTransform
154
+
155
+
151
156
  def get_packed_funcs(df, output_type, func, *args, **kwds) -> Any:
152
157
  stub_df = _build_stub_pandas_obj(df, output_type)
153
158
  n_args = copy_if_possible(args)
@@ -235,7 +240,7 @@ def df_transform(df, func, axis=0, *args, dtypes=None, skip_infer=False, **kwarg
235
240
  """
236
241
  call_agg = kwargs.pop("_call_agg", False)
237
242
  func = get_packed_funcs(df, OutputType.dataframe, func, *args, **kwargs)
238
- op = TransformOperator(
243
+ op = DataFrameTransform(
239
244
  func=func,
240
245
  axis=axis,
241
246
  args=args,
@@ -327,13 +332,15 @@ def series_transform(
327
332
  """
328
333
  call_agg = kwargs.pop("_call_agg", False)
329
334
  func = get_packed_funcs(series, OutputType.series, func, *args, **kwargs)
330
- op = TransformOperator(
335
+ op = DataFrameTransform(
331
336
  func=func,
332
337
  axis=axis,
333
338
  convert_dtype=convert_dtype,
334
339
  args=args,
335
340
  kwds=kwargs,
336
- output_types=[OutputType.series],
341
+ output_types=[OutputType.series]
342
+ if not call_agg and not isinstance(func, list)
343
+ else None,
337
344
  call_agg=call_agg,
338
345
  )
339
346
  return op(series, dtype=dtype, name=series.name, skip_infer=skip_infer)
@@ -12,6 +12,8 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
+ import pandas as pd
16
+
15
17
  from ... import opcodes
16
18
  from ...core import OutputType
17
19
  from ..operators import DataFrameOperator, DataFrameOperatorMixin
@@ -30,10 +32,20 @@ class DataFrameTranspose(DataFrameOperator, DataFrameOperatorMixin):
30
32
  new_shape = arg.shape[::-1]
31
33
  columns_value = arg.index_value
32
34
  index_value = parse_index(arg.dtypes.index)
35
+
36
+ if not arg.index_value.has_value:
37
+ dtypes = None
38
+ else:
39
+ from pandas.core.dtypes.cast import find_common_type
40
+
41
+ dtype = find_common_type(list(arg.dtypes))
42
+ pd_index = arg.index_value.to_pandas()
43
+ dtypes = pd.Series([dtype] * len(pd_index), index=pd_index)
44
+
33
45
  return self.new_dataframe(
34
46
  [arg],
35
47
  shape=new_shape,
36
- dtypes=None,
48
+ dtypes=dtypes,
37
49
  columns_value=columns_value,
38
50
  index_value=index_value,
39
51
  )
@@ -0,0 +1,115 @@
1
+ # Copyright 1999-2025 Alibaba Group Holding Ltd.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ from ...udf import builtin_function
16
+
17
+
18
+ @builtin_function
19
+ def _item_or_none(item):
20
+ if len(item) > 0:
21
+ return item[0]
22
+ return None
23
+
24
+
25
+ def _valid_index(df_or_series, slc: slice):
26
+ from ... import tensor as mt
27
+
28
+ idx = df_or_series.dropna(how="all").index[slc]
29
+ return mt.array(idx).mf.apply_chunk(_item_or_none, dtype=idx.dtype)
30
+
31
+
32
+ _doc = """
33
+ Return index for %(pos)s non-NA value or None, if no non-NA value is found.
34
+
35
+ Returns
36
+ -------
37
+ type of index
38
+
39
+ Examples
40
+ --------
41
+ For Series:
42
+
43
+ >>> import maxframe.dataframe as md
44
+ >>> s = md.Series([None, 3, 4])
45
+ >>> s.first_valid_index().execute()
46
+ 1
47
+ >>> s.last_valid_index().execute()
48
+ 2
49
+
50
+ >>> s = md.Series([None, None])
51
+ >>> print(s.first_valid_index()).execute()
52
+ None
53
+ >>> print(s.last_valid_index()).execute()
54
+ None
55
+
56
+ If all elements in Series are NA/null, returns None.
57
+
58
+ >>> s = md.Series()
59
+ >>> print(s.first_valid_index()).execute()
60
+ None
61
+ >>> print(s.last_valid_index()).execute()
62
+ None
63
+
64
+ If Series is empty, returns None.
65
+
66
+ For DataFrame:
67
+
68
+ >>> df = md.DataFrame({'A': [None, None, 2], 'B': [None, 3, 4]})
69
+ >>> df.execute()
70
+ A B
71
+ 0 NaN NaN
72
+ 1 NaN 3.0
73
+ 2 2.0 4.0
74
+ >>> df.first_valid_index().execute()
75
+ 1
76
+ >>> df.last_valid_index().execute()
77
+ 2
78
+
79
+ >>> df = md.DataFrame({'A': [None, None, None], 'B': [None, None, None]})
80
+ >>> df.execute()
81
+ A B
82
+ 0 None None
83
+ 1 None None
84
+ 2 None None
85
+ >>> print(df.first_valid_index()).execute()
86
+ None
87
+ >>> print(df.last_valid_index()).execute()
88
+ None
89
+
90
+ If all elements in DataFrame are NA/null, returns None.
91
+
92
+ >>> df = md.DataFrame()
93
+ >>> df.execute()
94
+ Empty DataFrame
95
+ Columns: []
96
+ Index: []
97
+ >>> print(df.first_valid_index()).execute()
98
+ None
99
+ >>> print(df.last_valid_index()).execute()
100
+ None
101
+
102
+ If DataFrame is empty, returns None.
103
+ """
104
+
105
+
106
+ def first_valid_index(df_or_series):
107
+ return _valid_index(df_or_series, slice(None, 1))
108
+
109
+
110
+ def last_valid_index(df_or_series):
111
+ return _valid_index(df_or_series, slice(-1, None))
112
+
113
+
114
+ first_valid_index.__doc__ = _doc % dict(pos="first")
115
+ last_valid_index.__doc__ = _doc % dict(pos="last")