maxframe 2.0.0b1__cp37-cp37m-win_amd64.whl → 2.2.0__cp37-cp37m-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of maxframe might be problematic. Click here for more details.

Files changed (395) hide show
  1. maxframe/__init__.py +1 -0
  2. maxframe/_utils.cp37-win_amd64.pyd +0 -0
  3. maxframe/_utils.pyx +14 -1
  4. maxframe/codegen/core.py +6 -6
  5. maxframe/codegen/spe/core.py +1 -1
  6. maxframe/codegen/spe/dataframe/__init__.py +1 -0
  7. maxframe/codegen/spe/dataframe/accessors/base.py +18 -0
  8. maxframe/codegen/spe/dataframe/accessors/dict_.py +25 -130
  9. maxframe/codegen/spe/dataframe/accessors/list_.py +12 -48
  10. maxframe/codegen/spe/dataframe/accessors/struct_.py +28 -0
  11. maxframe/codegen/spe/dataframe/arithmetic.py +7 -2
  12. maxframe/codegen/spe/dataframe/groupby.py +88 -0
  13. maxframe/codegen/spe/dataframe/indexing.py +99 -4
  14. maxframe/codegen/spe/dataframe/merge.py +34 -1
  15. maxframe/codegen/spe/dataframe/misc.py +9 -33
  16. maxframe/codegen/spe/dataframe/reduction.py +14 -9
  17. maxframe/codegen/spe/dataframe/reshape.py +46 -0
  18. maxframe/codegen/spe/dataframe/sort.py +30 -17
  19. maxframe/codegen/spe/dataframe/tests/accessors/test_dict.py +9 -15
  20. maxframe/codegen/spe/dataframe/tests/accessors/test_list.py +4 -7
  21. maxframe/codegen/spe/dataframe/tests/accessors/test_struct.py +75 -0
  22. maxframe/codegen/spe/dataframe/tests/indexing/test_iloc.py +20 -1
  23. maxframe/codegen/spe/dataframe/tests/indexing/test_loc.py +35 -0
  24. maxframe/codegen/spe/dataframe/tests/misc/test_misc.py +0 -32
  25. maxframe/codegen/spe/dataframe/tests/test_groupby.py +81 -18
  26. maxframe/codegen/spe/dataframe/tests/test_merge.py +27 -1
  27. maxframe/codegen/spe/dataframe/tests/test_reshape.py +79 -0
  28. maxframe/codegen/spe/dataframe/tests/test_sort.py +20 -0
  29. maxframe/codegen/spe/learn/contrib/tests/test_xgboost.py +2 -1
  30. maxframe/codegen/spe/learn/metrics/__init__.py +1 -1
  31. maxframe/codegen/spe/learn/metrics/_ranking.py +76 -0
  32. maxframe/codegen/spe/learn/metrics/pairwise.py +51 -0
  33. maxframe/codegen/spe/learn/metrics/tests/test_pairwise.py +36 -0
  34. maxframe/codegen/spe/learn/metrics/tests/test_ranking.py +59 -0
  35. maxframe/codegen/spe/tensor/__init__.py +3 -0
  36. maxframe/codegen/spe/tensor/fft.py +74 -0
  37. maxframe/codegen/spe/tensor/linalg.py +29 -2
  38. maxframe/codegen/spe/tensor/misc.py +79 -25
  39. maxframe/codegen/spe/tensor/spatial.py +45 -0
  40. maxframe/codegen/spe/tensor/statistics.py +44 -0
  41. maxframe/codegen/spe/tensor/tests/test_fft.py +64 -0
  42. maxframe/codegen/spe/tensor/tests/test_linalg.py +15 -1
  43. maxframe/codegen/spe/tensor/tests/test_misc.py +52 -2
  44. maxframe/codegen/spe/tensor/tests/test_spatial.py +33 -0
  45. maxframe/codegen/spe/tensor/tests/test_statistics.py +15 -1
  46. maxframe/codegen/spe/tests/test_spe_codegen.py +6 -12
  47. maxframe/codegen/spe/utils.py +2 -0
  48. maxframe/config/config.py +70 -9
  49. maxframe/config/tests/test_validators.py +13 -1
  50. maxframe/config/validators.py +49 -0
  51. maxframe/conftest.py +44 -17
  52. maxframe/core/accessor.py +2 -2
  53. maxframe/core/entity/core.py +5 -0
  54. maxframe/core/entity/tileables.py +1 -1
  55. maxframe/core/graph/core.cp37-win_amd64.pyd +0 -0
  56. maxframe/core/graph/entity.py +1 -2
  57. maxframe/core/operator/base.py +9 -2
  58. maxframe/core/operator/core.py +10 -2
  59. maxframe/core/operator/utils.py +13 -0
  60. maxframe/dataframe/__init__.py +10 -3
  61. maxframe/dataframe/accessors/__init__.py +1 -1
  62. maxframe/dataframe/accessors/compat.py +45 -0
  63. maxframe/dataframe/accessors/datetime_/__init__.py +4 -1
  64. maxframe/dataframe/accessors/dict_/contains.py +7 -16
  65. maxframe/dataframe/accessors/dict_/core.py +48 -0
  66. maxframe/dataframe/accessors/dict_/getitem.py +17 -21
  67. maxframe/dataframe/accessors/dict_/length.py +7 -16
  68. maxframe/dataframe/accessors/dict_/remove.py +6 -18
  69. maxframe/dataframe/accessors/dict_/setitem.py +8 -18
  70. maxframe/dataframe/accessors/dict_/tests/test_dict_accessor.py +62 -22
  71. maxframe/dataframe/accessors/list_/__init__.py +2 -2
  72. maxframe/dataframe/accessors/list_/core.py +48 -0
  73. maxframe/dataframe/accessors/list_/getitem.py +12 -19
  74. maxframe/dataframe/accessors/list_/length.py +7 -16
  75. maxframe/dataframe/accessors/list_/tests/test_list_accessor.py +11 -9
  76. maxframe/dataframe/accessors/string_/__init__.py +4 -1
  77. maxframe/dataframe/accessors/struct_/__init__.py +37 -0
  78. maxframe/dataframe/accessors/struct_/accessor.py +39 -0
  79. maxframe/dataframe/accessors/struct_/core.py +43 -0
  80. maxframe/dataframe/accessors/struct_/dtypes.py +53 -0
  81. maxframe/dataframe/accessors/struct_/field.py +123 -0
  82. maxframe/dataframe/accessors/struct_/tests/__init__.py +13 -0
  83. maxframe/dataframe/accessors/struct_/tests/test_struct_accessor.py +91 -0
  84. maxframe/dataframe/arithmetic/__init__.py +14 -4
  85. maxframe/dataframe/arithmetic/between.py +106 -0
  86. maxframe/dataframe/arithmetic/dot.py +237 -0
  87. maxframe/dataframe/arithmetic/{around.py → round.py} +11 -7
  88. maxframe/dataframe/core.py +63 -118
  89. maxframe/dataframe/datasource/__init__.py +18 -0
  90. maxframe/dataframe/datasource/from_dict.py +124 -0
  91. maxframe/dataframe/datasource/from_index.py +1 -1
  92. maxframe/dataframe/datasource/from_records.py +77 -0
  93. maxframe/dataframe/datasource/from_tensor.py +109 -41
  94. maxframe/dataframe/datasource/read_csv.py +2 -3
  95. maxframe/dataframe/datasource/read_odps_query.py +76 -16
  96. maxframe/dataframe/datasource/tests/test_datasource.py +84 -1
  97. maxframe/dataframe/datastore/__init__.py +5 -1
  98. maxframe/dataframe/datastore/to_csv.py +29 -41
  99. maxframe/dataframe/datastore/to_odps.py +30 -4
  100. maxframe/dataframe/extensions/__init__.py +20 -4
  101. maxframe/dataframe/extensions/apply_chunk.py +32 -6
  102. maxframe/dataframe/extensions/cartesian_chunk.py +153 -0
  103. maxframe/dataframe/extensions/collect_kv.py +126 -0
  104. maxframe/dataframe/extensions/extract_kv.py +177 -0
  105. maxframe/dataframe/extensions/map_reduce.py +263 -0
  106. maxframe/dataframe/extensions/rebalance.py +62 -0
  107. maxframe/dataframe/extensions/tests/test_apply_chunk.py +9 -2
  108. maxframe/dataframe/extensions/tests/test_extensions.py +54 -0
  109. maxframe/dataframe/extensions/tests/test_map_reduce.py +135 -0
  110. maxframe/dataframe/groupby/__init__.py +12 -1
  111. maxframe/dataframe/groupby/aggregation.py +78 -45
  112. maxframe/dataframe/groupby/apply.py +1 -1
  113. maxframe/dataframe/groupby/apply_chunk.py +18 -2
  114. maxframe/dataframe/groupby/core.py +96 -12
  115. maxframe/dataframe/groupby/cum.py +4 -25
  116. maxframe/dataframe/groupby/expanding.py +264 -0
  117. maxframe/dataframe/groupby/fill.py +1 -1
  118. maxframe/dataframe/groupby/getitem.py +12 -5
  119. maxframe/dataframe/groupby/head.py +11 -1
  120. maxframe/dataframe/groupby/rank.py +136 -0
  121. maxframe/dataframe/groupby/rolling.py +206 -0
  122. maxframe/dataframe/groupby/shift.py +114 -0
  123. maxframe/dataframe/groupby/tests/test_groupby.py +0 -5
  124. maxframe/dataframe/indexing/__init__.py +20 -1
  125. maxframe/dataframe/indexing/droplevel.py +195 -0
  126. maxframe/dataframe/indexing/filter.py +169 -0
  127. maxframe/dataframe/indexing/get_level_values.py +76 -0
  128. maxframe/dataframe/indexing/iat.py +45 -0
  129. maxframe/dataframe/indexing/iloc.py +152 -12
  130. maxframe/dataframe/indexing/insert.py +1 -1
  131. maxframe/dataframe/indexing/loc.py +287 -7
  132. maxframe/dataframe/indexing/reindex.py +14 -5
  133. maxframe/dataframe/indexing/rename.py +6 -0
  134. maxframe/dataframe/indexing/rename_axis.py +2 -2
  135. maxframe/dataframe/indexing/reorder_levels.py +143 -0
  136. maxframe/dataframe/indexing/reset_index.py +33 -6
  137. maxframe/dataframe/indexing/sample.py +8 -0
  138. maxframe/dataframe/indexing/setitem.py +3 -3
  139. maxframe/dataframe/indexing/swaplevel.py +185 -0
  140. maxframe/dataframe/indexing/take.py +99 -0
  141. maxframe/dataframe/indexing/truncate.py +140 -0
  142. maxframe/dataframe/indexing/where.py +0 -11
  143. maxframe/dataframe/indexing/xs.py +148 -0
  144. maxframe/dataframe/merge/__init__.py +12 -1
  145. maxframe/dataframe/merge/append.py +97 -98
  146. maxframe/dataframe/merge/combine_first.py +120 -0
  147. maxframe/dataframe/merge/compare.py +387 -0
  148. maxframe/dataframe/merge/concat.py +183 -0
  149. maxframe/dataframe/merge/update.py +271 -0
  150. maxframe/dataframe/misc/__init__.py +16 -10
  151. maxframe/dataframe/misc/_duplicate.py +10 -4
  152. maxframe/dataframe/misc/apply.py +1 -1
  153. maxframe/dataframe/misc/check_unique.py +51 -0
  154. maxframe/dataframe/misc/clip.py +145 -0
  155. maxframe/dataframe/misc/describe.py +175 -9
  156. maxframe/dataframe/misc/drop_duplicates.py +2 -2
  157. maxframe/dataframe/misc/duplicated.py +2 -2
  158. maxframe/dataframe/misc/get_dummies.py +5 -1
  159. maxframe/dataframe/misc/isin.py +2 -2
  160. maxframe/dataframe/misc/map.py +94 -0
  161. maxframe/dataframe/misc/tests/test_misc.py +13 -2
  162. maxframe/dataframe/misc/to_numeric.py +3 -0
  163. maxframe/dataframe/misc/transform.py +12 -5
  164. maxframe/dataframe/misc/transpose.py +13 -1
  165. maxframe/dataframe/misc/valid_index.py +115 -0
  166. maxframe/dataframe/misc/value_counts.py +38 -4
  167. maxframe/dataframe/missing/checkna.py +13 -6
  168. maxframe/dataframe/missing/dropna.py +5 -0
  169. maxframe/dataframe/missing/fillna.py +1 -1
  170. maxframe/dataframe/missing/replace.py +7 -4
  171. maxframe/dataframe/reduction/__init__.py +29 -15
  172. maxframe/dataframe/reduction/aggregation.py +38 -9
  173. maxframe/dataframe/reduction/all.py +2 -2
  174. maxframe/dataframe/reduction/any.py +2 -2
  175. maxframe/dataframe/reduction/argmax.py +100 -0
  176. maxframe/dataframe/reduction/argmin.py +100 -0
  177. maxframe/dataframe/reduction/core.py +65 -18
  178. maxframe/dataframe/reduction/count.py +13 -9
  179. maxframe/dataframe/reduction/cov.py +166 -0
  180. maxframe/dataframe/reduction/cummax.py +2 -2
  181. maxframe/dataframe/reduction/cummin.py +2 -2
  182. maxframe/dataframe/reduction/cumprod.py +2 -2
  183. maxframe/dataframe/reduction/cumsum.py +2 -2
  184. maxframe/dataframe/reduction/custom_reduction.py +2 -2
  185. maxframe/dataframe/reduction/idxmax.py +185 -0
  186. maxframe/dataframe/reduction/idxmin.py +185 -0
  187. maxframe/dataframe/reduction/kurtosis.py +37 -30
  188. maxframe/dataframe/reduction/max.py +2 -2
  189. maxframe/dataframe/reduction/mean.py +9 -7
  190. maxframe/dataframe/reduction/median.py +2 -2
  191. maxframe/dataframe/reduction/min.py +2 -2
  192. maxframe/dataframe/reduction/nunique.py +9 -8
  193. maxframe/dataframe/reduction/prod.py +18 -13
  194. maxframe/dataframe/reduction/reduction_size.py +2 -2
  195. maxframe/dataframe/reduction/sem.py +13 -9
  196. maxframe/dataframe/reduction/skew.py +31 -27
  197. maxframe/dataframe/reduction/str_concat.py +10 -7
  198. maxframe/dataframe/reduction/sum.py +18 -14
  199. maxframe/dataframe/reduction/unique.py +20 -3
  200. maxframe/dataframe/reduction/var.py +16 -12
  201. maxframe/dataframe/reshape/__init__.py +38 -0
  202. maxframe/dataframe/{misc → reshape}/pivot.py +1 -0
  203. maxframe/dataframe/{misc → reshape}/pivot_table.py +1 -0
  204. maxframe/dataframe/reshape/unstack.py +114 -0
  205. maxframe/dataframe/sort/__init__.py +8 -0
  206. maxframe/dataframe/sort/argsort.py +62 -0
  207. maxframe/dataframe/sort/core.py +1 -0
  208. maxframe/dataframe/sort/nlargest.py +238 -0
  209. maxframe/dataframe/sort/nsmallest.py +228 -0
  210. maxframe/dataframe/statistics/__init__.py +3 -3
  211. maxframe/dataframe/statistics/corr.py +1 -0
  212. maxframe/dataframe/statistics/quantile.py +2 -2
  213. maxframe/dataframe/tests/test_typing.py +104 -0
  214. maxframe/dataframe/tests/test_utils.py +66 -2
  215. maxframe/dataframe/typing_.py +185 -0
  216. maxframe/dataframe/utils.py +95 -26
  217. maxframe/dataframe/window/aggregation.py +8 -4
  218. maxframe/dataframe/window/core.py +14 -1
  219. maxframe/dataframe/window/ewm.py +1 -3
  220. maxframe/dataframe/window/expanding.py +37 -35
  221. maxframe/dataframe/window/rolling.py +49 -39
  222. maxframe/dataframe/window/tests/test_expanding.py +1 -7
  223. maxframe/dataframe/window/tests/test_rolling.py +1 -1
  224. maxframe/env.py +7 -4
  225. maxframe/errors.py +2 -2
  226. maxframe/io/objects/tests/test_object_io.py +4 -2
  227. maxframe/io/odpsio/schema.py +9 -3
  228. maxframe/io/odpsio/tableio.py +7 -2
  229. maxframe/io/odpsio/tests/test_schema.py +198 -83
  230. maxframe/io/odpsio/tests/test_volumeio.py +4 -15
  231. maxframe/io/odpsio/volumeio.py +23 -8
  232. maxframe/learn/__init__.py +10 -2
  233. maxframe/learn/cluster/__init__.py +15 -0
  234. maxframe/learn/cluster/_kmeans.py +782 -0
  235. maxframe/learn/contrib/llm/core.py +2 -0
  236. maxframe/learn/contrib/xgboost/core.py +87 -1
  237. maxframe/learn/contrib/xgboost/train.py +5 -2
  238. maxframe/learn/core.py +66 -0
  239. maxframe/learn/linear_model/_base.py +58 -1
  240. maxframe/learn/linear_model/_lin_reg.py +1 -1
  241. maxframe/learn/metrics/__init__.py +6 -0
  242. maxframe/learn/metrics/_classification.py +145 -0
  243. maxframe/learn/metrics/_ranking.py +477 -0
  244. maxframe/learn/metrics/_scorer.py +60 -0
  245. maxframe/learn/metrics/pairwise/__init__.py +21 -0
  246. maxframe/learn/metrics/pairwise/core.py +77 -0
  247. maxframe/learn/metrics/pairwise/cosine.py +115 -0
  248. maxframe/learn/metrics/pairwise/euclidean.py +176 -0
  249. maxframe/learn/metrics/pairwise/haversine.py +96 -0
  250. maxframe/learn/metrics/pairwise/manhattan.py +80 -0
  251. maxframe/learn/metrics/pairwise/pairwise.py +127 -0
  252. maxframe/learn/metrics/pairwise/pairwise_distances_topk.py +121 -0
  253. maxframe/learn/metrics/pairwise/rbf_kernel.py +51 -0
  254. maxframe/learn/metrics/tests/__init__.py +13 -0
  255. maxframe/learn/metrics/tests/test_scorer.py +26 -0
  256. maxframe/learn/utils/__init__.py +1 -1
  257. maxframe/learn/utils/checks.py +1 -2
  258. maxframe/learn/utils/core.py +59 -0
  259. maxframe/learn/utils/extmath.py +37 -0
  260. maxframe/learn/utils/odpsio.py +193 -0
  261. maxframe/learn/utils/validation.py +2 -2
  262. maxframe/lib/compat.py +40 -0
  263. maxframe/lib/dtypes_extension/__init__.py +16 -1
  264. maxframe/lib/dtypes_extension/_fake_arrow_dtype.py +604 -0
  265. maxframe/lib/dtypes_extension/blob.py +304 -0
  266. maxframe/lib/dtypes_extension/dtypes.py +40 -0
  267. maxframe/lib/dtypes_extension/tests/test_blob.py +88 -0
  268. maxframe/lib/dtypes_extension/tests/test_dtypes.py +16 -1
  269. maxframe/lib/dtypes_extension/tests/test_fake_arrow_dtype.py +75 -0
  270. maxframe/lib/filesystem/_oss_lib/common.py +122 -50
  271. maxframe/lib/filesystem/_oss_lib/glob.py +1 -1
  272. maxframe/lib/filesystem/_oss_lib/handle.py +21 -25
  273. maxframe/lib/filesystem/base.py +1 -1
  274. maxframe/lib/filesystem/core.py +1 -1
  275. maxframe/lib/filesystem/oss.py +115 -46
  276. maxframe/lib/filesystem/tests/test_oss.py +74 -36
  277. maxframe/lib/mmh3.cp37-win_amd64.pyd +0 -0
  278. maxframe/lib/wrapped_pickle.py +10 -0
  279. maxframe/opcodes.py +33 -15
  280. maxframe/protocol.py +12 -0
  281. maxframe/serialization/__init__.py +11 -2
  282. maxframe/serialization/arrow.py +38 -13
  283. maxframe/serialization/blob.py +32 -0
  284. maxframe/serialization/core.cp37-win_amd64.pyd +0 -0
  285. maxframe/serialization/core.pyx +39 -1
  286. maxframe/serialization/exception.py +2 -4
  287. maxframe/serialization/numpy.py +11 -0
  288. maxframe/serialization/pandas.py +46 -9
  289. maxframe/serialization/serializables/core.py +2 -2
  290. maxframe/serialization/tests/test_serial.py +29 -2
  291. maxframe/tensor/__init__.py +38 -8
  292. maxframe/tensor/arithmetic/__init__.py +19 -10
  293. maxframe/tensor/arithmetic/iscomplexobj.py +53 -0
  294. maxframe/tensor/arithmetic/tests/test_arithmetic.py +6 -0
  295. maxframe/tensor/core.py +3 -2
  296. maxframe/tensor/datasource/tests/test_datasource.py +2 -1
  297. maxframe/tensor/extensions/__init__.py +2 -0
  298. maxframe/tensor/extensions/apply_chunk.py +3 -3
  299. maxframe/tensor/extensions/rebalance.py +65 -0
  300. maxframe/tensor/fft/__init__.py +32 -0
  301. maxframe/tensor/fft/core.py +168 -0
  302. maxframe/tensor/fft/fft.py +112 -0
  303. maxframe/tensor/fft/fft2.py +118 -0
  304. maxframe/tensor/fft/fftfreq.py +80 -0
  305. maxframe/tensor/fft/fftn.py +123 -0
  306. maxframe/tensor/fft/fftshift.py +79 -0
  307. maxframe/tensor/fft/hfft.py +112 -0
  308. maxframe/tensor/fft/ifft.py +114 -0
  309. maxframe/tensor/fft/ifft2.py +115 -0
  310. maxframe/tensor/fft/ifftn.py +123 -0
  311. maxframe/tensor/fft/ifftshift.py +73 -0
  312. maxframe/tensor/fft/ihfft.py +93 -0
  313. maxframe/tensor/fft/irfft.py +118 -0
  314. maxframe/tensor/fft/irfft2.py +62 -0
  315. maxframe/tensor/fft/irfftn.py +114 -0
  316. maxframe/tensor/fft/rfft.py +116 -0
  317. maxframe/tensor/fft/rfft2.py +63 -0
  318. maxframe/tensor/fft/rfftfreq.py +87 -0
  319. maxframe/tensor/fft/rfftn.py +113 -0
  320. maxframe/tensor/indexing/fill_diagonal.py +1 -7
  321. maxframe/tensor/linalg/__init__.py +7 -0
  322. maxframe/tensor/linalg/_einsumfunc.py +1025 -0
  323. maxframe/tensor/linalg/cholesky.py +117 -0
  324. maxframe/tensor/linalg/einsum.py +339 -0
  325. maxframe/tensor/linalg/lstsq.py +100 -0
  326. maxframe/tensor/linalg/matrix_norm.py +75 -0
  327. maxframe/tensor/linalg/norm.py +249 -0
  328. maxframe/tensor/linalg/solve.py +72 -0
  329. maxframe/tensor/linalg/solve_triangular.py +2 -2
  330. maxframe/tensor/linalg/vector_norm.py +113 -0
  331. maxframe/tensor/misc/__init__.py +24 -1
  332. maxframe/tensor/misc/argwhere.py +72 -0
  333. maxframe/tensor/misc/array_split.py +46 -0
  334. maxframe/tensor/misc/broadcast_arrays.py +57 -0
  335. maxframe/tensor/misc/copyto.py +130 -0
  336. maxframe/tensor/misc/delete.py +104 -0
  337. maxframe/tensor/misc/dsplit.py +68 -0
  338. maxframe/tensor/misc/ediff1d.py +74 -0
  339. maxframe/tensor/misc/expand_dims.py +85 -0
  340. maxframe/tensor/misc/flip.py +90 -0
  341. maxframe/tensor/misc/fliplr.py +64 -0
  342. maxframe/tensor/misc/flipud.py +68 -0
  343. maxframe/tensor/misc/hsplit.py +85 -0
  344. maxframe/tensor/misc/insert.py +139 -0
  345. maxframe/tensor/misc/moveaxis.py +83 -0
  346. maxframe/tensor/misc/result_type.py +88 -0
  347. maxframe/tensor/misc/roll.py +124 -0
  348. maxframe/tensor/misc/rollaxis.py +77 -0
  349. maxframe/tensor/misc/shape.py +89 -0
  350. maxframe/tensor/misc/split.py +190 -0
  351. maxframe/tensor/misc/tile.py +109 -0
  352. maxframe/tensor/misc/vsplit.py +74 -0
  353. maxframe/tensor/reduction/array_equal.py +2 -1
  354. maxframe/tensor/sort/__init__.py +2 -0
  355. maxframe/tensor/sort/argpartition.py +98 -0
  356. maxframe/tensor/sort/partition.py +228 -0
  357. maxframe/tensor/spatial/__init__.py +15 -0
  358. maxframe/tensor/spatial/distance/__init__.py +17 -0
  359. maxframe/tensor/spatial/distance/cdist.py +421 -0
  360. maxframe/tensor/spatial/distance/pdist.py +398 -0
  361. maxframe/tensor/spatial/distance/squareform.py +153 -0
  362. maxframe/tensor/special/__init__.py +159 -21
  363. maxframe/tensor/special/airy.py +55 -0
  364. maxframe/tensor/special/bessel.py +199 -0
  365. maxframe/tensor/special/core.py +65 -4
  366. maxframe/tensor/special/ellip_func_integrals.py +155 -0
  367. maxframe/tensor/special/ellip_harm.py +55 -0
  368. maxframe/tensor/special/err_fresnel.py +223 -0
  369. maxframe/tensor/special/gamma_funcs.py +303 -0
  370. maxframe/tensor/special/hypergeometric_funcs.py +69 -0
  371. maxframe/tensor/special/info_theory.py +189 -0
  372. maxframe/tensor/special/misc.py +21 -0
  373. maxframe/tensor/statistics/__init__.py +6 -0
  374. maxframe/tensor/statistics/corrcoef.py +77 -0
  375. maxframe/tensor/statistics/cov.py +222 -0
  376. maxframe/tensor/statistics/digitize.py +126 -0
  377. maxframe/tensor/statistics/histogram.py +520 -0
  378. maxframe/tensor/statistics/median.py +85 -0
  379. maxframe/tensor/statistics/ptp.py +89 -0
  380. maxframe/tensor/utils.py +3 -3
  381. maxframe/tests/test_utils.py +43 -1
  382. maxframe/tests/utils.py +3 -13
  383. maxframe/typing_.py +2 -0
  384. maxframe/udf.py +27 -2
  385. maxframe/utils.py +193 -19
  386. {maxframe-2.0.0b1.dist-info → maxframe-2.2.0.dist-info}/METADATA +3 -2
  387. {maxframe-2.0.0b1.dist-info → maxframe-2.2.0.dist-info}/RECORD +395 -240
  388. maxframe_client/fetcher.py +35 -4
  389. maxframe_client/session/odps.py +7 -2
  390. maxframe_client/tests/test_fetcher.py +76 -3
  391. maxframe_client/tests/test_session.py +4 -1
  392. /maxframe/dataframe/{misc → reshape}/melt.py +0 -0
  393. /maxframe/dataframe/{misc → reshape}/stack.py +0 -0
  394. {maxframe-2.0.0b1.dist-info → maxframe-2.2.0.dist-info}/WHEEL +0 -0
  395. {maxframe-2.0.0b1.dist-info → maxframe-2.2.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,228 @@
1
+ # Copyright 1999-2025 Alibaba Group Holding Ltd.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ from ...core import OutputType
16
+ from .sort_values import DataFrameSortValues
17
+
18
+
19
+ def _nsmallest(df, n, columns=None, keep="first"):
20
+ op = DataFrameSortValues(
21
+ output_types=[OutputType.dataframe],
22
+ axis=0,
23
+ by=columns,
24
+ ignore_index=False,
25
+ ascending=True,
26
+ nrows=n,
27
+ keep_kind=keep,
28
+ )
29
+ return op(df)
30
+
31
+
32
+ def df_nsmallest(df, n, columns, keep="first"):
33
+ """
34
+ Return the first `n` rows ordered by `columns` in ascending order.
35
+
36
+ Return the first `n` rows with the smallest values in `columns`, in
37
+ ascending order. The columns that are not specified are returned as
38
+ well, but not used for ordering.
39
+
40
+ This method is equivalent to
41
+ ``df.sort_values(columns, ascending=True).head(n)``, but more
42
+ performant.
43
+
44
+ Parameters
45
+ ----------
46
+ n : int
47
+ Number of items to retrieve.
48
+ columns : list or str
49
+ Column name or names to order by.
50
+ keep : {'first', 'last', 'all'}, default 'first'
51
+ Where there are duplicate values:
52
+
53
+ - ``first`` : take the first occurrence.
54
+ - ``last`` : take the last occurrence.
55
+ - ``all`` : do not drop any duplicates, even it means
56
+ selecting more than `n` items.
57
+
58
+ Returns
59
+ -------
60
+ DataFrame
61
+
62
+ See Also
63
+ --------
64
+ DataFrame.nlargest : Return the first `n` rows ordered by `columns` in
65
+ descending order.
66
+ DataFrame.sort_values : Sort DataFrame by the values.
67
+ DataFrame.head : Return the first `n` rows without re-ordering.
68
+
69
+ Examples
70
+ --------
71
+ >>> import maxframe.dataframe as md
72
+ >>> df = md.DataFrame({'population': [59000000, 65000000, 434000,
73
+ ... 434000, 434000, 337000, 337000,
74
+ ... 11300, 11300],
75
+ ... 'GDP': [1937894, 2583560 , 12011, 4520, 12128,
76
+ ... 17036, 182, 38, 311],
77
+ ... 'alpha-2': ["IT", "FR", "MT", "MV", "BN",
78
+ ... "IS", "NR", "TV", "AI"]},
79
+ ... index=["Italy", "France", "Malta",
80
+ ... "Maldives", "Brunei", "Iceland",
81
+ ... "Nauru", "Tuvalu", "Anguilla"])
82
+ >>> df.execute()
83
+ population GDP alpha-2
84
+ Italy 59000000 1937894 IT
85
+ France 65000000 2583560 FR
86
+ Malta 434000 12011 MT
87
+ Maldives 434000 4520 MV
88
+ Brunei 434000 12128 BN
89
+ Iceland 337000 17036 IS
90
+ Nauru 337000 182 NR
91
+ Tuvalu 11300 38 TV
92
+ Anguilla 11300 311 AI
93
+
94
+ In the following example, we will use ``nsmallest`` to select the
95
+ three rows having the smallest values in column "population".
96
+
97
+ >>> df.nsmallest(3, 'population').execute()
98
+ population GDP alpha-2
99
+ Tuvalu 11300 38 TV
100
+ Anguilla 11300 311 AI
101
+ Iceland 337000 17036 IS
102
+
103
+ When using ``keep='last'``, ties are resolved in reverse order:
104
+
105
+ >>> df.nsmallest(3, 'population', keep='last').execute()
106
+ population GDP alpha-2
107
+ Anguilla 11300 311 AI
108
+ Tuvalu 11300 38 TV
109
+ Nauru 337000 182 NR
110
+
111
+ When using ``keep='all'``, all duplicate items are maintained:
112
+
113
+ >>> df.nsmallest(3, 'population', keep='all').execute()
114
+ population GDP alpha-2
115
+ Tuvalu 11300 38 TV
116
+ Anguilla 11300 311 AI
117
+ Iceland 337000 17036 IS
118
+ Nauru 337000 182 NR
119
+
120
+ To order by the smallest values in column "population" and then "GDP", we can
121
+ specify multiple columns like in the next example.
122
+
123
+ >>> df.nsmallest(3, ['population', 'GDP']).execute()
124
+ population GDP alpha-2
125
+ Tuvalu 11300 38 TV
126
+ Anguilla 11300 311 AI
127
+ Nauru 337000 182 NR
128
+ """
129
+ return _nsmallest(df, n, columns, keep=keep)
130
+
131
+
132
+ def series_nsmallest(df, n, keep="first"):
133
+ """
134
+ Return the smallest `n` elements.
135
+
136
+ Parameters
137
+ ----------
138
+ n : int, default 5
139
+ Return this many ascending sorted values.
140
+ keep : {'first', 'last', 'all'}, default 'first'
141
+ When there are duplicate values that cannot all fit in a
142
+ Series of `n` elements:
143
+
144
+ - ``first`` : return the first `n` occurrences in order
145
+ of appearance.
146
+ - ``last`` : return the last `n` occurrences in reverse
147
+ order of appearance.
148
+ - ``all`` : keep all occurrences. This can result in a Series of
149
+ size larger than `n`.
150
+
151
+ Returns
152
+ -------
153
+ Series
154
+ The `n` smallest values in the Series, sorted in increasing order.
155
+
156
+ See Also
157
+ --------
158
+ Series.nlargest: Get the `n` largest elements.
159
+ Series.sort_values: Sort Series by values.
160
+ Series.head: Return the first `n` rows.
161
+
162
+ Notes
163
+ -----
164
+ Faster than ``.sort_values().head(n)`` for small `n` relative to
165
+ the size of the ``Series`` object.
166
+
167
+ Examples
168
+ --------
169
+ >>> import maxframe.dataframe as md
170
+ >>> countries_population = {"Italy": 59000000, "France": 65000000,
171
+ ... "Brunei": 434000, "Malta": 434000,
172
+ ... "Maldives": 434000, "Iceland": 337000,
173
+ ... "Nauru": 11300, "Tuvalu": 11300,
174
+ ... "Anguilla": 11300, "Montserrat": 5200}
175
+ >>> s = md.Series(countries_population)
176
+ >>> s.execute()
177
+ Italy 59000000
178
+ France 65000000
179
+ Brunei 434000
180
+ Malta 434000
181
+ Maldives 434000
182
+ Iceland 337000
183
+ Nauru 11300
184
+ Tuvalu 11300
185
+ Anguilla 11300
186
+ Montserrat 5200
187
+ dtype: int64
188
+
189
+ The `n` smallest elements where ``n=5`` by default.
190
+
191
+ >>> s.nsmallest().execute()
192
+ Montserrat 5200
193
+ Nauru 11300
194
+ Tuvalu 11300
195
+ Anguilla 11300
196
+ Iceland 337000
197
+ dtype: int64
198
+
199
+ The `n` smallest elements where ``n=3``. Default `keep` value is
200
+ 'first' so Nauru and Tuvalu will be kept.
201
+
202
+ >>> s.nsmallest(3).execute()
203
+ Montserrat 5200
204
+ Nauru 11300
205
+ Tuvalu 11300
206
+ dtype: int64
207
+
208
+ The `n` smallest elements where ``n=3`` and keeping the last
209
+ duplicates. Anguilla and Tuvalu will be kept since they are the last
210
+ with value 11300 based on the index order.
211
+
212
+ >>> s.nsmallest(3, keep='last').execute()
213
+ Montserrat 5200
214
+ Anguilla 11300
215
+ Tuvalu 11300
216
+ dtype: int64
217
+
218
+ The `n` smallest elements where ``n=3`` with all duplicates kept. Note
219
+ that the returned Series has four elements due to the three duplicates.
220
+
221
+ >>> s.nsmallest(3, keep='all').execute()
222
+ Montserrat 5200
223
+ Nauru 11300
224
+ Tuvalu 11300
225
+ Anguilla 11300
226
+ dtype: int64
227
+ """
228
+ return _nsmallest(df, n, keep=keep)
@@ -16,15 +16,15 @@
16
16
  def _install():
17
17
  from ..core import DATAFRAME_TYPE, SERIES_TYPE
18
18
  from .corr import df_corr, df_corrwith, series_autocorr, series_corr
19
- from .quantile import quantile_dataframe, quantile_series
19
+ from .quantile import dataframe_quantile, series_quantile
20
20
 
21
21
  for t in SERIES_TYPE:
22
- t.quantile = quantile_series
22
+ t.quantile = series_quantile
23
23
  t.corr = series_corr
24
24
  t.autocorr = series_autocorr
25
25
 
26
26
  for t in DATAFRAME_TYPE:
27
- t.quantile = quantile_dataframe
27
+ t.quantile = dataframe_quantile
28
28
  t.corr = df_corr
29
29
  t.corrwith = df_corrwith
30
30
 
@@ -34,6 +34,7 @@ class DataFrameCorr(DataFrameOperator, DataFrameOperatorMixin):
34
34
  min_periods = Int32Field("min_periods", default=None)
35
35
  axis = Int32Field("axis", default=None)
36
36
  drop = BoolField("drop", default=None)
37
+ ddof = Int32Field("ddof", default=0)
37
38
 
38
39
  @classmethod
39
40
  def _set_inputs(cls, op: "DataFrameCorr", inputs: List[EntityData]):
@@ -213,7 +213,7 @@ class DataFrameQuantile(DataFrameOperator, DataFrameOperatorMixin):
213
213
  return self._call_series(a, inputs)
214
214
 
215
215
 
216
- def quantile_series(series, q=0.5, interpolation="linear"):
216
+ def series_quantile(series, q=0.5, interpolation="linear"):
217
217
  """
218
218
  Return value at the given quantile.
219
219
 
@@ -268,7 +268,7 @@ def quantile_series(series, q=0.5, interpolation="linear"):
268
268
  return op(series, q_input=q_input)
269
269
 
270
270
 
271
- def quantile_dataframe(df, q=0.5, axis=0, numeric_only=True, interpolation="linear"):
271
+ def dataframe_quantile(df, q=0.5, axis=0, numeric_only=True, interpolation="linear"):
272
272
  # FIXME: Timedelta not support. Data invalid: ODPS-0010000:InvalidArgument:duration[ns] is not equal to string
273
273
  """
274
274
  Return values at the given quantile over requested axis.
@@ -0,0 +1,104 @@
1
+ # Copyright 1999-2025 Alibaba Group Holding Ltd.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ import numpy as np
16
+ import pandas as pd
17
+
18
+ from ...core import OutputType
19
+ from ..typing_ import get_function_output_meta
20
+
21
+
22
+ def test_dataframe_type_annotation():
23
+ def func() -> pd.DataFrame[int]:
24
+ pass
25
+
26
+ meta = get_function_output_meta(func)
27
+ assert meta is not None
28
+ assert meta.output_type.name == "dataframe"
29
+ assert len(meta.dtypes) == 1
30
+ assert meta.dtypes[0] == np.dtype(int)
31
+
32
+ def func() -> pd.DataFrame[{"col1": int, "col2": float}]: # noqa: F821
33
+ pass
34
+
35
+ meta = get_function_output_meta(func)
36
+ assert meta is not None
37
+ assert meta.output_type.name == "dataframe"
38
+ assert len(meta.dtypes) == 2
39
+ assert meta.dtypes[0] == np.dtype(int)
40
+ assert meta.dtypes[1] == np.dtype(float)
41
+
42
+ def func() -> pd.DataFrame[str, {"col1": int, "col2": float}]: # noqa: F821
43
+ pass
44
+
45
+ meta = get_function_output_meta(func)
46
+ assert meta is not None
47
+ assert meta.output_type.name == "dataframe"
48
+ assert len(meta.dtypes) == 2
49
+ assert meta.index_value.value.dtype == np.dtype("O")
50
+ assert list(meta.dtypes.index) == ["col1", "col2"]
51
+ assert list(meta.dtypes) == [np.dtype(int), np.dtype(float)]
52
+
53
+
54
+ def test_series_type_annotation():
55
+ def func() -> pd.Series[np.str_]:
56
+ pass
57
+
58
+ meta = get_function_output_meta(func)
59
+ assert meta is not None
60
+ assert meta.output_type == OutputType.series
61
+ assert meta.dtype == np.dtype(np.str_)
62
+
63
+ def func() -> pd.Series[("idx_name", str), ("series_name", np.int64)]: # noqa: F821
64
+ pass
65
+
66
+ meta = get_function_output_meta(func)
67
+ assert meta is not None
68
+ assert meta.output_type == OutputType.series
69
+ assert meta.name == "series_name"
70
+ assert meta.dtype == np.dtype(np.int64)
71
+ assert meta.index_value.value._name == "idx_name"
72
+ assert meta.index_value.value.dtype == np.dtype("O")
73
+
74
+
75
+ def test_index_type_annotation():
76
+ def func() -> pd.Index[np.int64]:
77
+ pass
78
+
79
+ meta = get_function_output_meta(func)
80
+ assert meta is not None
81
+ assert meta.output_type == OutputType.index
82
+
83
+ def func() -> pd.Index[[("ix1", str), ("ix2", np.int64)]]: # noqa: F821
84
+ pass
85
+
86
+ meta = get_function_output_meta(func)
87
+ assert meta is not None
88
+ assert meta.output_type == OutputType.index
89
+ assert meta.index_value.value.names == ["ix1", "ix2"]
90
+ assert list(meta.index_value.value.dtypes) == [np.dtype("O"), np.dtype("int64")]
91
+
92
+
93
+ def test_function_output_meta_corner_cases():
94
+ def func():
95
+ pass
96
+
97
+ assert get_function_output_meta(func) is None
98
+ assert get_function_output_meta("non-func-obj") is None
99
+
100
+ def func() -> int:
101
+ pass
102
+
103
+ meta = get_function_output_meta(func)
104
+ assert meta.dtype == np.dtype("int64")
@@ -17,9 +17,16 @@ import pandas as pd
17
17
  import pyarrow as pa
18
18
  import pytest
19
19
 
20
- from ...udf import MarkedFunction, with_python_requirements, with_resources
20
+ from ...config import option_context
21
+ from ...core.operator import Operator
22
+ from ...udf import (
23
+ MarkedFunction,
24
+ with_python_requirements,
25
+ with_resources,
26
+ with_running_options,
27
+ )
21
28
  from ...utils import ARROW_DTYPE_NOT_SUPPORTED
22
- from ..utils import _generate_value, pack_func_args
29
+ from ..utils import _generate_value, copy_func_scheduling_hints, pack_func_args
23
30
 
24
31
  try:
25
32
  from pandas import ArrowDtype
@@ -84,6 +91,20 @@ def test_pack_function(df1):
84
91
  [(np.int32(1), "1")],
85
92
  ),
86
93
  (pa.map_(pa.int32(), pa.string()), 1, [(np.int32(1), "1")]),
94
+ (
95
+ ArrowDtype(
96
+ pa.struct([pa.field("a", pa.int32()), pa.field("b", pa.string())])
97
+ )
98
+ if ArrowDtype
99
+ else None,
100
+ 1,
101
+ {"a": np.int32(1), "b": "1"},
102
+ ),
103
+ (
104
+ pa.struct([pa.field("a", pa.int32()), pa.field("b", pa.string())]),
105
+ 1,
106
+ {"a": np.int32(1), "b": "1"},
107
+ ),
87
108
  (pa.int32(), 1, np.int32(1)),
88
109
  (np.datetime64, "2023-01-01", pd.Timestamp("2023-01-01")),
89
110
  (np.timedelta64, "1D", pd.Timedelta("1D")),
@@ -99,3 +120,46 @@ def test_pack_function(df1):
99
120
  def test_generate_value(dtype, fill_value, expected):
100
121
  result = _generate_value(dtype, fill_value)
101
122
  assert result == expected
123
+
124
+
125
+ def test_copy_func_scheduling_hints():
126
+ # Test with a regular function (no scheduling hints)
127
+ with option_context() as options:
128
+ options.function.default_running_options = {} # No default options
129
+
130
+ def regular_func(x):
131
+ return x + 1
132
+
133
+ op1 = Operator()
134
+ copy_func_scheduling_hints(regular_func, op1)
135
+ # Should not set any attributes since regular function has no hints
136
+ assert not hasattr(op1, "expect_engine") or op1.expect_engine is None
137
+ assert not hasattr(op1, "expect_resources") or op1.expect_resources is None
138
+ assert not hasattr(op1, "gpu") or op1.gpu is None
139
+
140
+ # Test with MarkedFunction with scheduling hints
141
+
142
+ @with_running_options(engine="DPE", cpu=4, memory="8GiB")
143
+ def marked_func(x):
144
+ return x + 1
145
+
146
+ op2 = Operator()
147
+ copy_func_scheduling_hints(marked_func, op2)
148
+ assert op2.expect_engine == "DPE"
149
+ # The expect_resources will include default values for gpu and gu_quota
150
+ expected_resources = {"cpu": 4, "memory": "8GiB", "gpu": 0, "gu_quota": None}
151
+ assert op2.expect_resources == expected_resources
152
+
153
+ # Test with MarkedFunction with GPU
154
+ @with_running_options(gu=2)
155
+ def gpu_func(x):
156
+ return x + 1
157
+
158
+ op3 = Operator()
159
+ copy_func_scheduling_hints(gpu_func, op3)
160
+ assert op3.gpu is True
161
+ # The expect_resources will include the gu value and default values
162
+ # System has default options: {'cpu': 1, 'memory': '4GiB', 'gpu': 0}
163
+ # The with_running_options decorator will override the gpu value with the gu value
164
+ expected_resources = {"gpu": 2, "gu_quota": None, "cpu": 1, "memory": "4GiB"}
165
+ assert op3.expect_resources == expected_resources
@@ -0,0 +1,185 @@
1
+ # Copyright 1999-2025 Alibaba Group Holding Ltd.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ import dataclasses
16
+ import functools
17
+ import inspect
18
+ from typing import Any, Callable, Generic, List, Optional, TypeVar
19
+
20
+ import pandas as pd
21
+
22
+ from ..core import OutputType
23
+ from ..typing_ import PandasDType
24
+ from ..utils import make_dtype
25
+ from .utils import InferredDataFrameMeta, parse_index
26
+
27
+ # TypeVars
28
+ T = TypeVar("T")
29
+
30
+
31
+ @dataclasses.dataclass
32
+ class _FieldDef:
33
+ name: Any
34
+ dtype: PandasDType
35
+
36
+
37
+ def _item_to_field_def(item_):
38
+ if isinstance(item_, tuple):
39
+ tp = make_dtype(item_[1])
40
+ return _FieldDef(name=item_[0], dtype=tp)
41
+ else:
42
+ tp = make_dtype(item_)
43
+ return _FieldDef(name=None, dtype=tp)
44
+
45
+
46
+ class IndexType:
47
+ def __init__(self, index_fields: List[_FieldDef]):
48
+ self.index_fields = index_fields
49
+
50
+ def __repr__(self):
51
+ return f"IndexType({[f.dtype for f in self.index_fields]})"
52
+
53
+ @classmethod
54
+ def from_getitem_args(cls, item) -> "IndexType":
55
+ if isinstance(item, (dict, pd.Series)):
56
+ item = list(item.items())
57
+
58
+ if isinstance(item, list):
59
+ return IndexType([_item_to_field_def(tp) for tp in item])
60
+ else:
61
+ return IndexType([_item_to_field_def(item)])
62
+
63
+
64
+ class SeriesType(Generic[T]):
65
+ def __init__(
66
+ self, index_fields: Optional[List[_FieldDef]], name_and_dtype: _FieldDef
67
+ ):
68
+ self.index_fields = index_fields
69
+ self.name_and_dtype = name_and_dtype
70
+
71
+ def __repr__(self) -> str:
72
+ return "SeriesType[{}]".format(self.name_and_dtype.dtype)
73
+
74
+ @classmethod
75
+ def from_getitem_args(cls, item) -> "SeriesType":
76
+ if not isinstance(item, tuple):
77
+ item = (item,)
78
+ if len(item) == 1:
79
+ tp = _item_to_field_def(item[0])
80
+ return SeriesType(None, tp)
81
+ else:
82
+ tp = _item_to_field_def(item[1])
83
+ idx_fields = IndexType.from_getitem_args(item[0]).index_fields
84
+ return SeriesType(idx_fields, tp)
85
+
86
+
87
+ class DataFrameType:
88
+ def __init__(
89
+ self,
90
+ index_fields: Optional[List[_FieldDef]],
91
+ data_fields: List[_FieldDef],
92
+ ):
93
+ self.index_fields = index_fields
94
+ self.data_fields = data_fields
95
+
96
+ def __repr__(self) -> str:
97
+ types = [field.dtype for field in self.data_fields]
98
+ return f"DataFrameType[{types}]"
99
+
100
+ @classmethod
101
+ def from_getitem_args(cls, item) -> "DataFrameType":
102
+ if not isinstance(item, tuple):
103
+ item = (item,)
104
+ fields = IndexType.from_getitem_args(item[-1]).index_fields
105
+ if len(item) == 1:
106
+ return DataFrameType(None, fields)
107
+ else:
108
+ idx_fields = IndexType.from_getitem_args(item[0]).index_fields
109
+ return DataFrameType(idx_fields, fields)
110
+
111
+
112
+ def get_function_output_meta(
113
+ func: Callable, df_obj=None
114
+ ) -> Optional[InferredDataFrameMeta]:
115
+ try:
116
+ func_argspec = inspect.getfullargspec(func)
117
+ ret_type = (func_argspec.annotations or {}).get("return")
118
+ if ret_type is None:
119
+ return None
120
+ except:
121
+ return None
122
+
123
+ dtypes = dtype = name = None
124
+ index_fields = None
125
+ if isinstance(ret_type, DataFrameType):
126
+ output_type = OutputType.dataframe
127
+ dtypes = pd.Series(
128
+ [fd.dtype for fd in ret_type.data_fields],
129
+ index=[fd.name for fd in ret_type.data_fields],
130
+ )
131
+ index_fields = ret_type.index_fields
132
+ elif isinstance(ret_type, SeriesType):
133
+ output_type = OutputType.series
134
+ dtype = ret_type.name_and_dtype.dtype
135
+ name = ret_type.name_and_dtype.name
136
+ index_fields = ret_type.index_fields
137
+ elif isinstance(ret_type, IndexType):
138
+ output_type = OutputType.index
139
+ index_fields = ret_type.index_fields
140
+ else:
141
+ output_type = OutputType.scalar
142
+ try:
143
+ dtype = make_dtype(ret_type)
144
+ except:
145
+ return None
146
+
147
+ if index_fields is not None:
148
+ if len(index_fields) == 1:
149
+ mock_idx = pd.Index(
150
+ [], dtype=index_fields[0].dtype, name=index_fields[0].name
151
+ )
152
+ else:
153
+ col_names = [index_field.name for index_field in index_fields]
154
+ col_dtypes = pd.Series(
155
+ [index_field.dtype for index_field in index_fields], index=col_names
156
+ )
157
+ mock_df = pd.DataFrame([], columns=col_names).astype(col_dtypes)
158
+ mock_idx = pd.MultiIndex.from_frame(mock_df)
159
+ index_value = parse_index(mock_idx, df_obj, store_data=False)
160
+ else:
161
+ index_value = None
162
+
163
+ return InferredDataFrameMeta(
164
+ output_type=output_type,
165
+ index_value=index_value,
166
+ dtypes=dtypes,
167
+ dtype=dtype,
168
+ name=name,
169
+ )
170
+
171
+
172
+ def register_pandas_typing_funcs():
173
+ def _cls_getitem_func(cls, item, type_cls):
174
+ return type_cls.from_getitem_args(item)
175
+
176
+ for pd_cls, type_cls in [
177
+ (pd.DataFrame, DataFrameType),
178
+ (pd.Series, SeriesType),
179
+ (pd.Index, IndexType),
180
+ ]:
181
+ if hasattr(pd_cls, "__class_getitem__"): # pragma: no cover
182
+ continue
183
+ pd_cls.__class_getitem__ = classmethod(
184
+ functools.partial(_cls_getitem_func, type_cls=type_cls)
185
+ )