maxframe 2.0.0b2__cp37-cp37m-win32.whl → 2.3.0rc1__cp37-cp37m-win32.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of maxframe might be problematic. Click here for more details.

Files changed (443) hide show
  1. maxframe/__init__.py +1 -0
  2. maxframe/_utils.cp37-win32.pyd +0 -0
  3. maxframe/_utils.pyx +14 -1
  4. maxframe/codegen/core.py +9 -8
  5. maxframe/codegen/spe/core.py +1 -1
  6. maxframe/codegen/spe/dataframe/__init__.py +1 -0
  7. maxframe/codegen/spe/dataframe/accessors/base.py +18 -0
  8. maxframe/codegen/spe/dataframe/accessors/dict_.py +25 -130
  9. maxframe/codegen/spe/dataframe/accessors/list_.py +12 -48
  10. maxframe/codegen/spe/dataframe/accessors/struct_.py +28 -0
  11. maxframe/codegen/spe/dataframe/arithmetic.py +7 -2
  12. maxframe/codegen/spe/dataframe/groupby.py +88 -0
  13. maxframe/codegen/spe/dataframe/indexing.py +99 -4
  14. maxframe/codegen/spe/dataframe/merge.py +38 -1
  15. maxframe/codegen/spe/dataframe/misc.py +11 -33
  16. maxframe/codegen/spe/dataframe/reduction.py +32 -9
  17. maxframe/codegen/spe/dataframe/reshape.py +46 -0
  18. maxframe/codegen/spe/dataframe/sort.py +39 -18
  19. maxframe/codegen/spe/dataframe/tests/accessors/test_dict.py +9 -15
  20. maxframe/codegen/spe/dataframe/tests/accessors/test_list.py +4 -7
  21. maxframe/codegen/spe/dataframe/tests/accessors/test_struct.py +75 -0
  22. maxframe/codegen/spe/dataframe/tests/indexing/test_iloc.py +20 -1
  23. maxframe/codegen/spe/dataframe/tests/indexing/test_loc.py +35 -0
  24. maxframe/codegen/spe/dataframe/tests/misc/test_misc.py +0 -32
  25. maxframe/codegen/spe/dataframe/tests/test_groupby.py +81 -18
  26. maxframe/codegen/spe/dataframe/tests/test_merge.py +27 -1
  27. maxframe/codegen/spe/dataframe/tests/test_reduction.py +13 -0
  28. maxframe/codegen/spe/dataframe/tests/test_reshape.py +79 -0
  29. maxframe/codegen/spe/dataframe/tests/test_sort.py +20 -0
  30. maxframe/codegen/spe/dataframe/tseries.py +9 -0
  31. maxframe/codegen/spe/learn/contrib/lightgbm.py +4 -3
  32. maxframe/codegen/spe/learn/contrib/tests/test_xgboost.py +2 -1
  33. maxframe/codegen/spe/learn/metrics/__init__.py +1 -1
  34. maxframe/codegen/spe/learn/metrics/_ranking.py +76 -0
  35. maxframe/codegen/spe/learn/metrics/pairwise.py +51 -0
  36. maxframe/codegen/spe/learn/metrics/tests/test_pairwise.py +36 -0
  37. maxframe/codegen/spe/learn/metrics/tests/test_ranking.py +59 -0
  38. maxframe/codegen/spe/tensor/__init__.py +3 -0
  39. maxframe/codegen/spe/tensor/datasource.py +1 -0
  40. maxframe/codegen/spe/tensor/fft.py +74 -0
  41. maxframe/codegen/spe/tensor/linalg.py +29 -2
  42. maxframe/codegen/spe/tensor/misc.py +79 -25
  43. maxframe/codegen/spe/tensor/spatial.py +45 -0
  44. maxframe/codegen/spe/tensor/statistics.py +44 -0
  45. maxframe/codegen/spe/tensor/tests/test_fft.py +64 -0
  46. maxframe/codegen/spe/tensor/tests/test_linalg.py +15 -1
  47. maxframe/codegen/spe/tensor/tests/test_misc.py +52 -2
  48. maxframe/codegen/spe/tensor/tests/test_spatial.py +33 -0
  49. maxframe/codegen/spe/tensor/tests/test_statistics.py +15 -1
  50. maxframe/codegen/spe/tests/test_spe_codegen.py +6 -12
  51. maxframe/codegen/spe/utils.py +2 -0
  52. maxframe/config/config.py +73 -9
  53. maxframe/config/tests/test_validators.py +13 -1
  54. maxframe/config/validators.py +49 -0
  55. maxframe/conftest.py +54 -17
  56. maxframe/core/accessor.py +2 -2
  57. maxframe/core/base.py +2 -1
  58. maxframe/core/entity/core.py +5 -0
  59. maxframe/core/entity/tileables.py +3 -1
  60. maxframe/core/graph/core.cp37-win32.pyd +0 -0
  61. maxframe/core/graph/entity.py +8 -3
  62. maxframe/core/mode.py +6 -1
  63. maxframe/core/operator/base.py +9 -2
  64. maxframe/core/operator/core.py +10 -2
  65. maxframe/core/operator/utils.py +13 -0
  66. maxframe/dataframe/__init__.py +12 -5
  67. maxframe/dataframe/accessors/__init__.py +1 -1
  68. maxframe/dataframe/accessors/compat.py +45 -0
  69. maxframe/dataframe/accessors/datetime_/__init__.py +4 -1
  70. maxframe/dataframe/accessors/dict_/contains.py +7 -16
  71. maxframe/dataframe/accessors/dict_/core.py +48 -0
  72. maxframe/dataframe/accessors/dict_/getitem.py +17 -21
  73. maxframe/dataframe/accessors/dict_/length.py +7 -16
  74. maxframe/dataframe/accessors/dict_/remove.py +6 -18
  75. maxframe/dataframe/accessors/dict_/setitem.py +8 -18
  76. maxframe/dataframe/accessors/dict_/tests/test_dict_accessor.py +62 -22
  77. maxframe/dataframe/accessors/list_/__init__.py +2 -2
  78. maxframe/dataframe/accessors/list_/core.py +48 -0
  79. maxframe/dataframe/accessors/list_/getitem.py +12 -19
  80. maxframe/dataframe/accessors/list_/length.py +7 -16
  81. maxframe/dataframe/accessors/list_/tests/test_list_accessor.py +11 -9
  82. maxframe/dataframe/accessors/string_/__init__.py +4 -1
  83. maxframe/dataframe/accessors/struct_/__init__.py +37 -0
  84. maxframe/dataframe/accessors/struct_/accessor.py +39 -0
  85. maxframe/dataframe/accessors/struct_/core.py +43 -0
  86. maxframe/dataframe/accessors/struct_/dtypes.py +53 -0
  87. maxframe/dataframe/accessors/struct_/field.py +123 -0
  88. maxframe/dataframe/accessors/struct_/tests/__init__.py +13 -0
  89. maxframe/dataframe/accessors/struct_/tests/test_struct_accessor.py +91 -0
  90. maxframe/dataframe/arithmetic/__init__.py +18 -4
  91. maxframe/dataframe/arithmetic/between.py +106 -0
  92. maxframe/dataframe/arithmetic/dot.py +237 -0
  93. maxframe/dataframe/arithmetic/maximum.py +33 -0
  94. maxframe/dataframe/arithmetic/minimum.py +33 -0
  95. maxframe/dataframe/arithmetic/{around.py → round.py} +11 -7
  96. maxframe/dataframe/core.py +161 -224
  97. maxframe/dataframe/datasource/__init__.py +18 -0
  98. maxframe/dataframe/datasource/core.py +6 -0
  99. maxframe/dataframe/datasource/direct.py +57 -0
  100. maxframe/dataframe/datasource/from_dict.py +124 -0
  101. maxframe/dataframe/datasource/from_index.py +1 -1
  102. maxframe/dataframe/datasource/from_records.py +77 -0
  103. maxframe/dataframe/datasource/from_tensor.py +109 -41
  104. maxframe/dataframe/datasource/read_csv.py +21 -14
  105. maxframe/dataframe/datasource/read_odps_query.py +29 -6
  106. maxframe/dataframe/datasource/read_odps_table.py +32 -10
  107. maxframe/dataframe/datasource/read_parquet.py +38 -39
  108. maxframe/dataframe/datasource/tests/test_datasource.py +37 -0
  109. maxframe/dataframe/datastore/__init__.py +11 -1
  110. maxframe/dataframe/datastore/direct.py +268 -0
  111. maxframe/dataframe/datastore/to_csv.py +29 -41
  112. maxframe/dataframe/datastore/to_odps.py +36 -4
  113. maxframe/dataframe/extensions/__init__.py +20 -4
  114. maxframe/dataframe/extensions/apply_chunk.py +32 -6
  115. maxframe/dataframe/extensions/cartesian_chunk.py +153 -0
  116. maxframe/dataframe/extensions/collect_kv.py +126 -0
  117. maxframe/dataframe/extensions/extract_kv.py +177 -0
  118. maxframe/dataframe/extensions/flatjson.py +2 -1
  119. maxframe/dataframe/extensions/map_reduce.py +263 -0
  120. maxframe/dataframe/extensions/rebalance.py +62 -0
  121. maxframe/dataframe/extensions/tests/test_apply_chunk.py +9 -2
  122. maxframe/dataframe/extensions/tests/test_extensions.py +54 -0
  123. maxframe/dataframe/extensions/tests/test_map_reduce.py +135 -0
  124. maxframe/dataframe/groupby/__init__.py +17 -2
  125. maxframe/dataframe/groupby/aggregation.py +86 -49
  126. maxframe/dataframe/groupby/apply.py +1 -1
  127. maxframe/dataframe/groupby/apply_chunk.py +19 -5
  128. maxframe/dataframe/groupby/core.py +116 -16
  129. maxframe/dataframe/groupby/cum.py +4 -25
  130. maxframe/dataframe/groupby/expanding.py +264 -0
  131. maxframe/dataframe/groupby/fill.py +1 -1
  132. maxframe/dataframe/groupby/getitem.py +12 -5
  133. maxframe/dataframe/groupby/head.py +11 -1
  134. maxframe/dataframe/groupby/rank.py +136 -0
  135. maxframe/dataframe/groupby/rolling.py +206 -0
  136. maxframe/dataframe/groupby/shift.py +114 -0
  137. maxframe/dataframe/groupby/tests/test_groupby.py +0 -5
  138. maxframe/dataframe/indexing/__init__.py +22 -2
  139. maxframe/dataframe/indexing/droplevel.py +195 -0
  140. maxframe/dataframe/indexing/filter.py +169 -0
  141. maxframe/dataframe/indexing/get_level_values.py +76 -0
  142. maxframe/dataframe/indexing/iat.py +45 -0
  143. maxframe/dataframe/indexing/iloc.py +152 -12
  144. maxframe/dataframe/indexing/insert.py +46 -18
  145. maxframe/dataframe/indexing/loc.py +287 -7
  146. maxframe/dataframe/indexing/reindex.py +14 -5
  147. maxframe/dataframe/indexing/rename.py +6 -0
  148. maxframe/dataframe/indexing/rename_axis.py +2 -2
  149. maxframe/dataframe/indexing/reorder_levels.py +143 -0
  150. maxframe/dataframe/indexing/reset_index.py +33 -6
  151. maxframe/dataframe/indexing/sample.py +8 -0
  152. maxframe/dataframe/indexing/setitem.py +3 -3
  153. maxframe/dataframe/indexing/swaplevel.py +185 -0
  154. maxframe/dataframe/indexing/take.py +99 -0
  155. maxframe/dataframe/indexing/truncate.py +140 -0
  156. maxframe/dataframe/indexing/where.py +0 -11
  157. maxframe/dataframe/indexing/xs.py +148 -0
  158. maxframe/dataframe/merge/__init__.py +15 -1
  159. maxframe/dataframe/merge/append.py +97 -98
  160. maxframe/dataframe/merge/combine.py +244 -0
  161. maxframe/dataframe/merge/combine_first.py +120 -0
  162. maxframe/dataframe/merge/compare.py +387 -0
  163. maxframe/dataframe/merge/concat.py +183 -0
  164. maxframe/dataframe/merge/update.py +271 -0
  165. maxframe/dataframe/misc/__init__.py +28 -11
  166. maxframe/dataframe/misc/_duplicate.py +10 -4
  167. maxframe/dataframe/misc/apply.py +1 -1
  168. maxframe/dataframe/misc/check_unique.py +82 -0
  169. maxframe/dataframe/misc/clip.py +145 -0
  170. maxframe/dataframe/misc/describe.py +175 -9
  171. maxframe/dataframe/misc/drop.py +31 -0
  172. maxframe/dataframe/misc/drop_duplicates.py +2 -2
  173. maxframe/dataframe/misc/duplicated.py +2 -2
  174. maxframe/dataframe/misc/get_dummies.py +5 -1
  175. maxframe/dataframe/misc/infer_dtypes.py +251 -0
  176. maxframe/dataframe/misc/isin.py +2 -2
  177. maxframe/dataframe/misc/map.py +125 -18
  178. maxframe/dataframe/misc/repeat.py +159 -0
  179. maxframe/dataframe/misc/tests/test_misc.py +48 -3
  180. maxframe/dataframe/misc/to_numeric.py +3 -0
  181. maxframe/dataframe/misc/transform.py +12 -5
  182. maxframe/dataframe/misc/transpose.py +13 -1
  183. maxframe/dataframe/misc/valid_index.py +115 -0
  184. maxframe/dataframe/misc/value_counts.py +38 -4
  185. maxframe/dataframe/missing/checkna.py +14 -6
  186. maxframe/dataframe/missing/dropna.py +5 -0
  187. maxframe/dataframe/missing/fillna.py +1 -1
  188. maxframe/dataframe/missing/replace.py +7 -4
  189. maxframe/dataframe/reduction/__init__.py +35 -16
  190. maxframe/dataframe/reduction/aggregation.py +43 -14
  191. maxframe/dataframe/reduction/all.py +2 -2
  192. maxframe/dataframe/reduction/any.py +2 -2
  193. maxframe/dataframe/reduction/argmax.py +103 -0
  194. maxframe/dataframe/reduction/argmin.py +103 -0
  195. maxframe/dataframe/reduction/core.py +80 -24
  196. maxframe/dataframe/reduction/count.py +13 -9
  197. maxframe/dataframe/reduction/cov.py +166 -0
  198. maxframe/dataframe/reduction/cummax.py +2 -2
  199. maxframe/dataframe/reduction/cummin.py +2 -2
  200. maxframe/dataframe/reduction/cumprod.py +2 -2
  201. maxframe/dataframe/reduction/cumsum.py +2 -2
  202. maxframe/dataframe/reduction/custom_reduction.py +2 -2
  203. maxframe/dataframe/reduction/idxmax.py +185 -0
  204. maxframe/dataframe/reduction/idxmin.py +185 -0
  205. maxframe/dataframe/reduction/kurtosis.py +37 -30
  206. maxframe/dataframe/reduction/max.py +2 -2
  207. maxframe/dataframe/reduction/mean.py +9 -7
  208. maxframe/dataframe/reduction/median.py +2 -2
  209. maxframe/dataframe/reduction/min.py +2 -2
  210. maxframe/dataframe/reduction/mode.py +144 -0
  211. maxframe/dataframe/reduction/nunique.py +19 -11
  212. maxframe/dataframe/reduction/prod.py +18 -13
  213. maxframe/dataframe/reduction/reduction_size.py +2 -2
  214. maxframe/dataframe/reduction/sem.py +13 -9
  215. maxframe/dataframe/reduction/skew.py +31 -27
  216. maxframe/dataframe/reduction/str_concat.py +10 -7
  217. maxframe/dataframe/reduction/sum.py +18 -14
  218. maxframe/dataframe/reduction/tests/test_reduction.py +12 -0
  219. maxframe/dataframe/reduction/unique.py +20 -3
  220. maxframe/dataframe/reduction/var.py +16 -12
  221. maxframe/dataframe/reshape/__init__.py +38 -0
  222. maxframe/dataframe/{misc → reshape}/pivot.py +1 -0
  223. maxframe/dataframe/{misc → reshape}/pivot_table.py +1 -0
  224. maxframe/dataframe/reshape/unstack.py +114 -0
  225. maxframe/dataframe/sort/__init__.py +16 -1
  226. maxframe/dataframe/sort/argsort.py +68 -0
  227. maxframe/dataframe/sort/core.py +2 -1
  228. maxframe/dataframe/sort/nlargest.py +238 -0
  229. maxframe/dataframe/sort/nsmallest.py +228 -0
  230. maxframe/dataframe/sort/rank.py +147 -0
  231. maxframe/dataframe/statistics/__init__.py +3 -3
  232. maxframe/dataframe/statistics/corr.py +1 -0
  233. maxframe/dataframe/statistics/quantile.py +2 -2
  234. maxframe/dataframe/tests/test_typing.py +104 -0
  235. maxframe/dataframe/tests/test_utils.py +66 -2
  236. maxframe/dataframe/tseries/__init__.py +19 -0
  237. maxframe/dataframe/tseries/at_time.py +61 -0
  238. maxframe/dataframe/tseries/between_time.py +122 -0
  239. maxframe/dataframe/typing_.py +185 -0
  240. maxframe/dataframe/utils.py +125 -52
  241. maxframe/dataframe/window/aggregation.py +8 -4
  242. maxframe/dataframe/window/core.py +14 -1
  243. maxframe/dataframe/window/ewm.py +1 -3
  244. maxframe/dataframe/window/expanding.py +37 -35
  245. maxframe/dataframe/window/rolling.py +49 -39
  246. maxframe/dataframe/window/tests/test_expanding.py +1 -7
  247. maxframe/dataframe/window/tests/test_rolling.py +1 -1
  248. maxframe/env.py +7 -4
  249. maxframe/errors.py +2 -2
  250. maxframe/io/odpsio/schema.py +9 -3
  251. maxframe/io/odpsio/tableio.py +7 -2
  252. maxframe/io/odpsio/tests/test_schema.py +198 -83
  253. maxframe/learn/__init__.py +10 -2
  254. maxframe/learn/cluster/__init__.py +15 -0
  255. maxframe/learn/cluster/_kmeans.py +782 -0
  256. maxframe/learn/contrib/llm/core.py +18 -7
  257. maxframe/learn/contrib/llm/deploy/__init__.py +13 -0
  258. maxframe/learn/contrib/llm/deploy/config.py +221 -0
  259. maxframe/learn/contrib/llm/deploy/core.py +247 -0
  260. maxframe/learn/contrib/llm/deploy/framework.py +35 -0
  261. maxframe/learn/contrib/llm/deploy/loader.py +360 -0
  262. maxframe/learn/contrib/llm/deploy/tests/__init__.py +13 -0
  263. maxframe/learn/contrib/llm/deploy/tests/test_register_models.py +359 -0
  264. maxframe/learn/contrib/llm/models/__init__.py +1 -0
  265. maxframe/learn/contrib/llm/models/dashscope.py +12 -6
  266. maxframe/learn/contrib/llm/models/managed.py +76 -11
  267. maxframe/learn/contrib/llm/models/openai.py +72 -0
  268. maxframe/learn/contrib/llm/tests/__init__.py +13 -0
  269. maxframe/learn/contrib/llm/tests/test_core.py +34 -0
  270. maxframe/learn/contrib/llm/tests/test_openai.py +187 -0
  271. maxframe/learn/contrib/llm/tests/test_text_gen.py +155 -0
  272. maxframe/learn/contrib/llm/text.py +348 -42
  273. maxframe/learn/contrib/models.py +4 -1
  274. maxframe/learn/contrib/xgboost/classifier.py +2 -0
  275. maxframe/learn/contrib/xgboost/core.py +113 -4
  276. maxframe/learn/contrib/xgboost/predict.py +4 -2
  277. maxframe/learn/contrib/xgboost/regressor.py +5 -0
  278. maxframe/learn/contrib/xgboost/train.py +7 -2
  279. maxframe/learn/core.py +66 -0
  280. maxframe/learn/linear_model/_base.py +58 -1
  281. maxframe/learn/linear_model/_lin_reg.py +1 -1
  282. maxframe/learn/metrics/__init__.py +6 -0
  283. maxframe/learn/metrics/_classification.py +145 -0
  284. maxframe/learn/metrics/_ranking.py +477 -0
  285. maxframe/learn/metrics/_scorer.py +60 -0
  286. maxframe/learn/metrics/pairwise/__init__.py +21 -0
  287. maxframe/learn/metrics/pairwise/core.py +77 -0
  288. maxframe/learn/metrics/pairwise/cosine.py +115 -0
  289. maxframe/learn/metrics/pairwise/euclidean.py +176 -0
  290. maxframe/learn/metrics/pairwise/haversine.py +96 -0
  291. maxframe/learn/metrics/pairwise/manhattan.py +80 -0
  292. maxframe/learn/metrics/pairwise/pairwise.py +127 -0
  293. maxframe/learn/metrics/pairwise/pairwise_distances_topk.py +121 -0
  294. maxframe/learn/metrics/pairwise/rbf_kernel.py +51 -0
  295. maxframe/learn/metrics/tests/__init__.py +13 -0
  296. maxframe/learn/metrics/tests/test_scorer.py +26 -0
  297. maxframe/learn/preprocessing/_data/min_max_scaler.py +34 -23
  298. maxframe/learn/preprocessing/_data/standard_scaler.py +34 -25
  299. maxframe/learn/utils/__init__.py +2 -1
  300. maxframe/learn/utils/checks.py +1 -2
  301. maxframe/learn/utils/core.py +59 -0
  302. maxframe/learn/utils/extmath.py +79 -9
  303. maxframe/learn/utils/odpsio.py +262 -0
  304. maxframe/learn/utils/validation.py +2 -2
  305. maxframe/lib/compat.py +40 -0
  306. maxframe/lib/dtypes_extension/__init__.py +16 -1
  307. maxframe/lib/dtypes_extension/_fake_arrow_dtype.py +604 -0
  308. maxframe/lib/dtypes_extension/blob.py +304 -0
  309. maxframe/lib/dtypes_extension/dtypes.py +40 -0
  310. maxframe/lib/dtypes_extension/tests/test_blob.py +88 -0
  311. maxframe/lib/dtypes_extension/tests/test_dtypes.py +16 -1
  312. maxframe/lib/dtypes_extension/tests/test_fake_arrow_dtype.py +75 -0
  313. maxframe/lib/filesystem/_oss_lib/common.py +124 -50
  314. maxframe/lib/filesystem/_oss_lib/glob.py +1 -1
  315. maxframe/lib/filesystem/_oss_lib/handle.py +21 -25
  316. maxframe/lib/filesystem/base.py +1 -1
  317. maxframe/lib/filesystem/core.py +1 -1
  318. maxframe/lib/filesystem/oss.py +115 -46
  319. maxframe/lib/filesystem/tests/test_oss.py +74 -36
  320. maxframe/lib/mmh3.cp37-win32.pyd +0 -0
  321. maxframe/lib/wrapped_pickle.py +10 -0
  322. maxframe/opcodes.py +41 -15
  323. maxframe/protocol.py +12 -0
  324. maxframe/remote/core.py +4 -0
  325. maxframe/serialization/__init__.py +11 -2
  326. maxframe/serialization/arrow.py +38 -13
  327. maxframe/serialization/blob.py +32 -0
  328. maxframe/serialization/core.cp37-win32.pyd +0 -0
  329. maxframe/serialization/core.pyx +39 -1
  330. maxframe/serialization/exception.py +2 -4
  331. maxframe/serialization/numpy.py +11 -0
  332. maxframe/serialization/pandas.py +46 -9
  333. maxframe/serialization/serializables/core.py +2 -2
  334. maxframe/serialization/tests/test_serial.py +31 -4
  335. maxframe/tensor/__init__.py +38 -8
  336. maxframe/tensor/arithmetic/__init__.py +19 -10
  337. maxframe/tensor/arithmetic/core.py +2 -2
  338. maxframe/tensor/arithmetic/iscomplexobj.py +53 -0
  339. maxframe/tensor/arithmetic/tests/test_arithmetic.py +6 -9
  340. maxframe/tensor/core.py +6 -2
  341. maxframe/tensor/datasource/tests/test_datasource.py +2 -1
  342. maxframe/tensor/extensions/__init__.py +2 -0
  343. maxframe/tensor/extensions/apply_chunk.py +3 -3
  344. maxframe/tensor/extensions/rebalance.py +65 -0
  345. maxframe/tensor/fft/__init__.py +32 -0
  346. maxframe/tensor/fft/core.py +168 -0
  347. maxframe/tensor/fft/fft.py +112 -0
  348. maxframe/tensor/fft/fft2.py +118 -0
  349. maxframe/tensor/fft/fftfreq.py +80 -0
  350. maxframe/tensor/fft/fftn.py +123 -0
  351. maxframe/tensor/fft/fftshift.py +79 -0
  352. maxframe/tensor/fft/hfft.py +112 -0
  353. maxframe/tensor/fft/ifft.py +114 -0
  354. maxframe/tensor/fft/ifft2.py +115 -0
  355. maxframe/tensor/fft/ifftn.py +123 -0
  356. maxframe/tensor/fft/ifftshift.py +73 -0
  357. maxframe/tensor/fft/ihfft.py +93 -0
  358. maxframe/tensor/fft/irfft.py +118 -0
  359. maxframe/tensor/fft/irfft2.py +62 -0
  360. maxframe/tensor/fft/irfftn.py +114 -0
  361. maxframe/tensor/fft/rfft.py +116 -0
  362. maxframe/tensor/fft/rfft2.py +63 -0
  363. maxframe/tensor/fft/rfftfreq.py +87 -0
  364. maxframe/tensor/fft/rfftn.py +113 -0
  365. maxframe/tensor/indexing/fill_diagonal.py +1 -7
  366. maxframe/tensor/linalg/__init__.py +7 -0
  367. maxframe/tensor/linalg/_einsumfunc.py +1025 -0
  368. maxframe/tensor/linalg/cholesky.py +117 -0
  369. maxframe/tensor/linalg/einsum.py +339 -0
  370. maxframe/tensor/linalg/lstsq.py +100 -0
  371. maxframe/tensor/linalg/matrix_norm.py +75 -0
  372. maxframe/tensor/linalg/norm.py +249 -0
  373. maxframe/tensor/linalg/solve.py +72 -0
  374. maxframe/tensor/linalg/solve_triangular.py +2 -2
  375. maxframe/tensor/linalg/vector_norm.py +113 -0
  376. maxframe/tensor/misc/__init__.py +24 -1
  377. maxframe/tensor/misc/argwhere.py +72 -0
  378. maxframe/tensor/misc/array_split.py +46 -0
  379. maxframe/tensor/misc/broadcast_arrays.py +57 -0
  380. maxframe/tensor/misc/copyto.py +130 -0
  381. maxframe/tensor/misc/delete.py +104 -0
  382. maxframe/tensor/misc/dsplit.py +68 -0
  383. maxframe/tensor/misc/ediff1d.py +74 -0
  384. maxframe/tensor/misc/expand_dims.py +85 -0
  385. maxframe/tensor/misc/flip.py +90 -0
  386. maxframe/tensor/misc/fliplr.py +64 -0
  387. maxframe/tensor/misc/flipud.py +68 -0
  388. maxframe/tensor/misc/hsplit.py +85 -0
  389. maxframe/tensor/misc/insert.py +139 -0
  390. maxframe/tensor/misc/moveaxis.py +83 -0
  391. maxframe/tensor/misc/result_type.py +88 -0
  392. maxframe/tensor/misc/roll.py +124 -0
  393. maxframe/tensor/misc/rollaxis.py +77 -0
  394. maxframe/tensor/misc/shape.py +89 -0
  395. maxframe/tensor/misc/split.py +190 -0
  396. maxframe/tensor/misc/tile.py +109 -0
  397. maxframe/tensor/misc/vsplit.py +74 -0
  398. maxframe/tensor/reduction/array_equal.py +2 -1
  399. maxframe/tensor/sort/__init__.py +2 -0
  400. maxframe/tensor/sort/argpartition.py +98 -0
  401. maxframe/tensor/sort/partition.py +228 -0
  402. maxframe/tensor/spatial/__init__.py +15 -0
  403. maxframe/tensor/spatial/distance/__init__.py +17 -0
  404. maxframe/tensor/spatial/distance/cdist.py +421 -0
  405. maxframe/tensor/spatial/distance/pdist.py +398 -0
  406. maxframe/tensor/spatial/distance/squareform.py +153 -0
  407. maxframe/tensor/special/__init__.py +159 -21
  408. maxframe/tensor/special/airy.py +55 -0
  409. maxframe/tensor/special/bessel.py +199 -0
  410. maxframe/tensor/special/core.py +65 -4
  411. maxframe/tensor/special/ellip_func_integrals.py +155 -0
  412. maxframe/tensor/special/ellip_harm.py +55 -0
  413. maxframe/tensor/special/err_fresnel.py +223 -0
  414. maxframe/tensor/special/gamma_funcs.py +303 -0
  415. maxframe/tensor/special/hypergeometric_funcs.py +69 -0
  416. maxframe/tensor/special/info_theory.py +189 -0
  417. maxframe/tensor/special/misc.py +21 -0
  418. maxframe/tensor/statistics/__init__.py +6 -0
  419. maxframe/tensor/statistics/corrcoef.py +77 -0
  420. maxframe/tensor/statistics/cov.py +222 -0
  421. maxframe/tensor/statistics/digitize.py +126 -0
  422. maxframe/tensor/statistics/histogram.py +520 -0
  423. maxframe/tensor/statistics/median.py +85 -0
  424. maxframe/tensor/statistics/ptp.py +89 -0
  425. maxframe/tensor/utils.py +3 -3
  426. maxframe/tests/test_udf.py +61 -0
  427. maxframe/tests/test_utils.py +51 -6
  428. maxframe/tests/utils.py +0 -2
  429. maxframe/typing_.py +2 -0
  430. maxframe/udf.py +130 -9
  431. maxframe/utils.py +254 -27
  432. {maxframe-2.0.0b2.dist-info → maxframe-2.3.0rc1.dist-info}/METADATA +3 -3
  433. {maxframe-2.0.0b2.dist-info → maxframe-2.3.0rc1.dist-info}/RECORD +442 -264
  434. maxframe_client/fetcher.py +35 -4
  435. maxframe_client/session/odps.py +7 -2
  436. maxframe_client/session/task.py +8 -1
  437. maxframe_client/tests/test_fetcher.py +76 -3
  438. maxframe_client/tests/test_session.py +28 -1
  439. maxframe/dataframe/arrays.py +0 -864
  440. /maxframe/dataframe/{misc → reshape}/melt.py +0 -0
  441. /maxframe/dataframe/{misc → reshape}/stack.py +0 -0
  442. {maxframe-2.0.0b2.dist-info → maxframe-2.3.0rc1.dist-info}/WHEEL +0 -0
  443. {maxframe-2.0.0b2.dist-info → maxframe-2.3.0rc1.dist-info}/top_level.txt +0 -0
@@ -16,15 +16,15 @@
16
16
  def _install():
17
17
  from ..core import DATAFRAME_TYPE, SERIES_TYPE
18
18
  from .corr import df_corr, df_corrwith, series_autocorr, series_corr
19
- from .quantile import quantile_dataframe, quantile_series
19
+ from .quantile import dataframe_quantile, series_quantile
20
20
 
21
21
  for t in SERIES_TYPE:
22
- t.quantile = quantile_series
22
+ t.quantile = series_quantile
23
23
  t.corr = series_corr
24
24
  t.autocorr = series_autocorr
25
25
 
26
26
  for t in DATAFRAME_TYPE:
27
- t.quantile = quantile_dataframe
27
+ t.quantile = dataframe_quantile
28
28
  t.corr = df_corr
29
29
  t.corrwith = df_corrwith
30
30
 
@@ -34,6 +34,7 @@ class DataFrameCorr(DataFrameOperator, DataFrameOperatorMixin):
34
34
  min_periods = Int32Field("min_periods", default=None)
35
35
  axis = Int32Field("axis", default=None)
36
36
  drop = BoolField("drop", default=None)
37
+ ddof = Int32Field("ddof", default=0)
37
38
 
38
39
  @classmethod
39
40
  def _set_inputs(cls, op: "DataFrameCorr", inputs: List[EntityData]):
@@ -213,7 +213,7 @@ class DataFrameQuantile(DataFrameOperator, DataFrameOperatorMixin):
213
213
  return self._call_series(a, inputs)
214
214
 
215
215
 
216
- def quantile_series(series, q=0.5, interpolation="linear"):
216
+ def series_quantile(series, q=0.5, interpolation="linear"):
217
217
  """
218
218
  Return value at the given quantile.
219
219
 
@@ -268,7 +268,7 @@ def quantile_series(series, q=0.5, interpolation="linear"):
268
268
  return op(series, q_input=q_input)
269
269
 
270
270
 
271
- def quantile_dataframe(df, q=0.5, axis=0, numeric_only=True, interpolation="linear"):
271
+ def dataframe_quantile(df, q=0.5, axis=0, numeric_only=True, interpolation="linear"):
272
272
  # FIXME: Timedelta not support. Data invalid: ODPS-0010000:InvalidArgument:duration[ns] is not equal to string
273
273
  """
274
274
  Return values at the given quantile over requested axis.
@@ -0,0 +1,104 @@
1
+ # Copyright 1999-2025 Alibaba Group Holding Ltd.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ import numpy as np
16
+ import pandas as pd
17
+
18
+ from ...core import OutputType
19
+ from ..typing_ import get_function_output_meta
20
+
21
+
22
+ def test_dataframe_type_annotation():
23
+ def func() -> pd.DataFrame[int]:
24
+ pass
25
+
26
+ meta = get_function_output_meta(func)
27
+ assert meta is not None
28
+ assert meta.output_type.name == "dataframe"
29
+ assert len(meta.dtypes) == 1
30
+ assert meta.dtypes[0] == np.dtype(int)
31
+
32
+ def func() -> pd.DataFrame[{"col1": int, "col2": float}]: # noqa: F821
33
+ pass
34
+
35
+ meta = get_function_output_meta(func)
36
+ assert meta is not None
37
+ assert meta.output_type.name == "dataframe"
38
+ assert len(meta.dtypes) == 2
39
+ assert meta.dtypes[0] == np.dtype(int)
40
+ assert meta.dtypes[1] == np.dtype(float)
41
+
42
+ def func() -> pd.DataFrame[str, {"col1": int, "col2": float}]: # noqa: F821
43
+ pass
44
+
45
+ meta = get_function_output_meta(func)
46
+ assert meta is not None
47
+ assert meta.output_type.name == "dataframe"
48
+ assert len(meta.dtypes) == 2
49
+ assert meta.index_value.value.dtype == np.dtype("O")
50
+ assert list(meta.dtypes.index) == ["col1", "col2"]
51
+ assert list(meta.dtypes) == [np.dtype(int), np.dtype(float)]
52
+
53
+
54
+ def test_series_type_annotation():
55
+ def func() -> pd.Series[np.str_]:
56
+ pass
57
+
58
+ meta = get_function_output_meta(func)
59
+ assert meta is not None
60
+ assert meta.output_type == OutputType.series
61
+ assert meta.dtype == np.dtype(np.str_)
62
+
63
+ def func() -> pd.Series[("idx_name", str), ("series_name", np.int64)]: # noqa: F821
64
+ pass
65
+
66
+ meta = get_function_output_meta(func)
67
+ assert meta is not None
68
+ assert meta.output_type == OutputType.series
69
+ assert meta.name == "series_name"
70
+ assert meta.dtype == np.dtype(np.int64)
71
+ assert meta.index_value.value._name == "idx_name"
72
+ assert meta.index_value.value.dtype == np.dtype("O")
73
+
74
+
75
+ def test_index_type_annotation():
76
+ def func() -> pd.Index[np.int64]:
77
+ pass
78
+
79
+ meta = get_function_output_meta(func)
80
+ assert meta is not None
81
+ assert meta.output_type == OutputType.index
82
+
83
+ def func() -> pd.Index[[("ix1", str), ("ix2", np.int64)]]: # noqa: F821
84
+ pass
85
+
86
+ meta = get_function_output_meta(func)
87
+ assert meta is not None
88
+ assert meta.output_type == OutputType.index
89
+ assert meta.index_value.value.names == ["ix1", "ix2"]
90
+ assert list(meta.index_value.value.dtypes) == [np.dtype("O"), np.dtype("int64")]
91
+
92
+
93
+ def test_function_output_meta_corner_cases():
94
+ def func():
95
+ pass
96
+
97
+ assert get_function_output_meta(func) is None
98
+ assert get_function_output_meta("non-func-obj") is None
99
+
100
+ def func() -> int:
101
+ pass
102
+
103
+ meta = get_function_output_meta(func)
104
+ assert meta.dtype == np.dtype("int64")
@@ -17,9 +17,16 @@ import pandas as pd
17
17
  import pyarrow as pa
18
18
  import pytest
19
19
 
20
- from ...udf import MarkedFunction, with_python_requirements, with_resources
20
+ from ...config import option_context
21
+ from ...core.operator import Operator
22
+ from ...udf import (
23
+ MarkedFunction,
24
+ with_python_requirements,
25
+ with_resources,
26
+ with_running_options,
27
+ )
21
28
  from ...utils import ARROW_DTYPE_NOT_SUPPORTED
22
- from ..utils import _generate_value, pack_func_args
29
+ from ..utils import _generate_value, copy_func_scheduling_hints, pack_func_args
23
30
 
24
31
  try:
25
32
  from pandas import ArrowDtype
@@ -84,6 +91,20 @@ def test_pack_function(df1):
84
91
  [(np.int32(1), "1")],
85
92
  ),
86
93
  (pa.map_(pa.int32(), pa.string()), 1, [(np.int32(1), "1")]),
94
+ (
95
+ ArrowDtype(
96
+ pa.struct([pa.field("a", pa.int32()), pa.field("b", pa.string())])
97
+ )
98
+ if ArrowDtype
99
+ else None,
100
+ 1,
101
+ {"a": np.int32(1), "b": "1"},
102
+ ),
103
+ (
104
+ pa.struct([pa.field("a", pa.int32()), pa.field("b", pa.string())]),
105
+ 1,
106
+ {"a": np.int32(1), "b": "1"},
107
+ ),
87
108
  (pa.int32(), 1, np.int32(1)),
88
109
  (np.datetime64, "2023-01-01", pd.Timestamp("2023-01-01")),
89
110
  (np.timedelta64, "1D", pd.Timedelta("1D")),
@@ -99,3 +120,46 @@ def test_pack_function(df1):
99
120
  def test_generate_value(dtype, fill_value, expected):
100
121
  result = _generate_value(dtype, fill_value)
101
122
  assert result == expected
123
+
124
+
125
+ def test_copy_func_scheduling_hints():
126
+ # Test with a regular function (no scheduling hints)
127
+ with option_context() as options:
128
+ options.function.default_running_options = {} # No default options
129
+
130
+ def regular_func(x):
131
+ return x + 1
132
+
133
+ op1 = Operator()
134
+ copy_func_scheduling_hints(regular_func, op1)
135
+ # Should not set any attributes since regular function has no hints
136
+ assert not hasattr(op1, "expect_engine") or op1.expect_engine is None
137
+ assert not hasattr(op1, "expect_resources") or op1.expect_resources is None
138
+ assert not hasattr(op1, "gpu") or op1.gpu is None
139
+
140
+ # Test with MarkedFunction with scheduling hints
141
+
142
+ @with_running_options(engine="DPE", cpu=4, memory="8GiB")
143
+ def marked_func(x):
144
+ return x + 1
145
+
146
+ op2 = Operator()
147
+ copy_func_scheduling_hints(marked_func, op2)
148
+ assert op2.expect_engine == "DPE"
149
+ # The expect_resources will include default values for gpu and gu_quota
150
+ expected_resources = {"cpu": 4, "memory": "8GiB", "gpu": 0, "gu_quota": None}
151
+ assert op2.expect_resources == expected_resources
152
+
153
+ # Test with MarkedFunction with GPU
154
+ @with_running_options(gu=2)
155
+ def gpu_func(x):
156
+ return x + 1
157
+
158
+ op3 = Operator()
159
+ copy_func_scheduling_hints(gpu_func, op3)
160
+ assert op3.gpu is True
161
+ # The expect_resources will include the gu value and default values
162
+ # System has default options: {'cpu': 1, 'memory': '4GiB', 'gpu': 0}
163
+ # The with_running_options decorator will override the gpu value with the gu value
164
+ expected_resources = {"gpu": 2, "gu_quota": None, "cpu": 1, "memory": "4GiB"}
165
+ assert op3.expect_resources == expected_resources
@@ -11,3 +11,22 @@
11
11
  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
+
15
+
16
+ def _install():
17
+ from ..core import DATAFRAME_TYPE, SERIES_TYPE
18
+ from .at_time import at_time
19
+ from .between_time import between_time
20
+ from .to_datetime import to_datetime # noqa
21
+
22
+ for t in SERIES_TYPE:
23
+ t.at_time = at_time
24
+ t.between_time = between_time
25
+
26
+ for t in DATAFRAME_TYPE:
27
+ t.at_time = at_time
28
+ t.between_time = between_time
29
+
30
+
31
+ _install()
32
+ del _install
@@ -0,0 +1,61 @@
1
+ # Copyright 1999-2025 Alibaba Group Holding Ltd.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+
16
+ def at_time(df_or_series, time, axis=0):
17
+ """
18
+ Select values at particular time of day (e.g., 9:30AM).
19
+
20
+ Parameters
21
+ ----------
22
+ time : datetime.time or str
23
+ The values to select.
24
+ axis : {0 or 'index', 1 or 'columns'}, default 0
25
+ For `Series` this parameter is unused and defaults to 0.
26
+
27
+ Returns
28
+ -------
29
+ Series or DataFrame
30
+
31
+ Raises
32
+ ------
33
+ TypeError
34
+ If the index is not a :class:`DatetimeIndex`
35
+
36
+ See Also
37
+ --------
38
+ between_time : Select values between particular times of the day.
39
+ first : Select initial periods of time series based on a date offset.
40
+ last : Select final periods of time series based on a date offset.
41
+ DatetimeIndex.indexer_at_time : Get just the index locations for
42
+ values at particular time of the day.
43
+
44
+ Examples
45
+ --------
46
+ >>> import maxframe.dataframe as md
47
+ >>> i = md.date_range('2018-04-09', periods=4, freq='12h')
48
+ >>> ts = md.DataFrame({'A': [1, 2, 3, 4]}, index=i)
49
+ >>> ts.execute()
50
+ A
51
+ 2018-04-09 00:00:00 1
52
+ 2018-04-09 12:00:00 2
53
+ 2018-04-10 00:00:00 3
54
+ 2018-04-10 12:00:00 4
55
+
56
+ >>> ts.at_time('12:00').execute()
57
+ A
58
+ 2018-04-09 12:00:00 2
59
+ 2018-04-10 12:00:00 4
60
+ """
61
+ return df_or_series.between_time(time, time, inclusive="both", axis=axis)
@@ -0,0 +1,122 @@
1
+ # Copyright 1999-2025 Alibaba Group Holding Ltd.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ import numpy as np
16
+
17
+ from ... import opcodes
18
+ from ...core import get_output_types
19
+ from ...serialization.serializables import AnyField, Int32Field, StringField
20
+ from ..operators import DataFrameOperator, DataFrameOperatorMixin
21
+ from ..utils import parse_index, validate_axis
22
+
23
+
24
+ class DataFrameBetweenTime(DataFrameOperator, DataFrameOperatorMixin):
25
+ _op_type_ = opcodes.BETWEEN_TIME
26
+
27
+ start_time = AnyField("start_time")
28
+ end_time = AnyField("end_time")
29
+ inclusive = StringField("inclusive")
30
+ axis = Int32Field("axis")
31
+
32
+ def __call__(self, df_or_series):
33
+ self._output_types = get_output_types(df_or_series)
34
+ out_params = df_or_series.params
35
+
36
+ new_shape = list(df_or_series.shape)
37
+ new_shape[self.axis] = np.nan
38
+ out_params["shape"] = tuple(new_shape)
39
+
40
+ idx_key_params = (df_or_series, self.start_time, self.end_time, self.inclusive)
41
+ if self.axis == 0:
42
+ out_params["index_value"] = parse_index(
43
+ df_or_series.index_value.to_pandas()[:0], idx_key_params
44
+ )
45
+ else:
46
+ out_params["columns_value"] = parse_index(
47
+ df_or_series.columns_value.to_pandas()[:0], idx_key_params
48
+ )
49
+
50
+ return self.new_tileable([df_or_series], **out_params)
51
+
52
+
53
+ def between_time(df_or_series, start_time, end_time, inclusive="both", axis=0):
54
+ """
55
+ Select values between particular times of the day (e.g., 9:00-9:30 AM).
56
+
57
+ By setting ``start_time`` to be later than ``end_time``,
58
+ you can get the times that are *not* between the two times.
59
+
60
+ Parameters
61
+ ----------
62
+ start_time : datetime.time or str
63
+ Initial time as a time filter limit.
64
+ end_time : datetime.time or str
65
+ End time as a time filter limit.
66
+ inclusive : {"both", "neither", "left", "right"}, default "both"
67
+ Include boundaries; whether to set each bound as closed or open.
68
+ axis : {0 or 'index', 1 or 'columns'}, default 0
69
+ Determine range time on index or columns value.
70
+ For `Series` this parameter is unused and defaults to 0.
71
+
72
+ Returns
73
+ -------
74
+ Series or DataFrame
75
+ Data from the original object filtered to the specified dates range.
76
+
77
+ Raises
78
+ ------
79
+ TypeError
80
+ If the index is not a :class:`DatetimeIndex`
81
+
82
+ See Also
83
+ --------
84
+ at_time : Select values at a particular time of the day.
85
+ first : Select initial periods of time series based on a date offset.
86
+ last : Select final periods of time series based on a date offset.
87
+ DatetimeIndex.indexer_between_time : Get just the index locations for
88
+ values between particular times of the day.
89
+
90
+ Examples
91
+ --------
92
+ >>> import maxframe.dataframe as md
93
+ >>> i = md.date_range('2018-04-09', periods=4, freq='1D20min')
94
+ >>> ts = md.DataFrame({'A': [1, 2, 3, 4]}, index=i)
95
+ >>> ts.execute()
96
+ A
97
+ 2018-04-09 00:00:00 1
98
+ 2018-04-10 00:20:00 2
99
+ 2018-04-11 00:40:00 3
100
+ 2018-04-12 01:00:00 4
101
+
102
+ >>> ts.between_time('0:15', '0:45').execute()
103
+ A
104
+ 2018-04-10 00:20:00 2
105
+ 2018-04-11 00:40:00 3
106
+
107
+ You get the times that are *not* between two times by setting
108
+ ``start_time`` later than ``end_time``:
109
+
110
+ >>> ts.between_time('0:45', '0:15').execute()
111
+ A
112
+ 2018-04-09 00:00:00 1
113
+ 2018-04-12 01:00:00 4
114
+ """
115
+ axis = validate_axis(axis, df_or_series)
116
+ op = DataFrameBetweenTime(
117
+ start_time=start_time,
118
+ end_time=end_time,
119
+ inclusive=inclusive,
120
+ axis=axis,
121
+ )
122
+ return op(df_or_series)
@@ -0,0 +1,185 @@
1
+ # Copyright 1999-2025 Alibaba Group Holding Ltd.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ import dataclasses
16
+ import functools
17
+ import inspect
18
+ from typing import Any, Callable, Generic, List, Optional, TypeVar
19
+
20
+ import pandas as pd
21
+
22
+ from ..core import OutputType
23
+ from ..typing_ import PandasDType
24
+ from ..utils import make_dtype
25
+ from .utils import InferredDataFrameMeta, parse_index
26
+
27
+ # TypeVars
28
+ T = TypeVar("T")
29
+
30
+
31
+ @dataclasses.dataclass
32
+ class _FieldDef:
33
+ name: Any
34
+ dtype: PandasDType
35
+
36
+
37
+ def _item_to_field_def(item_):
38
+ if isinstance(item_, tuple):
39
+ tp = make_dtype(item_[1])
40
+ return _FieldDef(name=item_[0], dtype=tp)
41
+ else:
42
+ tp = make_dtype(item_)
43
+ return _FieldDef(name=None, dtype=tp)
44
+
45
+
46
+ class IndexType:
47
+ def __init__(self, index_fields: List[_FieldDef]):
48
+ self.index_fields = index_fields
49
+
50
+ def __repr__(self):
51
+ return f"IndexType({[f.dtype for f in self.index_fields]})"
52
+
53
+ @classmethod
54
+ def from_getitem_args(cls, item) -> "IndexType":
55
+ if isinstance(item, (dict, pd.Series)):
56
+ item = list(item.items())
57
+
58
+ if isinstance(item, list):
59
+ return IndexType([_item_to_field_def(tp) for tp in item])
60
+ else:
61
+ return IndexType([_item_to_field_def(item)])
62
+
63
+
64
+ class SeriesType(Generic[T]):
65
+ def __init__(
66
+ self, index_fields: Optional[List[_FieldDef]], name_and_dtype: _FieldDef
67
+ ):
68
+ self.index_fields = index_fields
69
+ self.name_and_dtype = name_and_dtype
70
+
71
+ def __repr__(self) -> str:
72
+ return "SeriesType[{}]".format(self.name_and_dtype.dtype)
73
+
74
+ @classmethod
75
+ def from_getitem_args(cls, item) -> "SeriesType":
76
+ if not isinstance(item, tuple):
77
+ item = (item,)
78
+ if len(item) == 1:
79
+ tp = _item_to_field_def(item[0])
80
+ return SeriesType(None, tp)
81
+ else:
82
+ tp = _item_to_field_def(item[1])
83
+ idx_fields = IndexType.from_getitem_args(item[0]).index_fields
84
+ return SeriesType(idx_fields, tp)
85
+
86
+
87
+ class DataFrameType:
88
+ def __init__(
89
+ self,
90
+ index_fields: Optional[List[_FieldDef]],
91
+ data_fields: List[_FieldDef],
92
+ ):
93
+ self.index_fields = index_fields
94
+ self.data_fields = data_fields
95
+
96
+ def __repr__(self) -> str:
97
+ types = [field.dtype for field in self.data_fields]
98
+ return f"DataFrameType[{types}]"
99
+
100
+ @classmethod
101
+ def from_getitem_args(cls, item) -> "DataFrameType":
102
+ if not isinstance(item, tuple):
103
+ item = (item,)
104
+ fields = IndexType.from_getitem_args(item[-1]).index_fields
105
+ if len(item) == 1:
106
+ return DataFrameType(None, fields)
107
+ else:
108
+ idx_fields = IndexType.from_getitem_args(item[0]).index_fields
109
+ return DataFrameType(idx_fields, fields)
110
+
111
+
112
+ def get_function_output_meta(
113
+ func: Callable, df_obj=None
114
+ ) -> Optional[InferredDataFrameMeta]:
115
+ try:
116
+ func_argspec = inspect.getfullargspec(func)
117
+ ret_type = (func_argspec.annotations or {}).get("return")
118
+ if ret_type is None:
119
+ return None
120
+ except:
121
+ return None
122
+
123
+ dtypes = dtype = name = None
124
+ index_fields = None
125
+ if isinstance(ret_type, DataFrameType):
126
+ output_type = OutputType.dataframe
127
+ dtypes = pd.Series(
128
+ [fd.dtype for fd in ret_type.data_fields],
129
+ index=[fd.name for fd in ret_type.data_fields],
130
+ )
131
+ index_fields = ret_type.index_fields
132
+ elif isinstance(ret_type, SeriesType):
133
+ output_type = OutputType.series
134
+ dtype = ret_type.name_and_dtype.dtype
135
+ name = ret_type.name_and_dtype.name
136
+ index_fields = ret_type.index_fields
137
+ elif isinstance(ret_type, IndexType):
138
+ output_type = OutputType.index
139
+ index_fields = ret_type.index_fields
140
+ else:
141
+ output_type = OutputType.scalar
142
+ try:
143
+ dtype = make_dtype(ret_type)
144
+ except:
145
+ return None
146
+
147
+ if index_fields is not None:
148
+ if len(index_fields) == 1:
149
+ mock_idx = pd.Index(
150
+ [], dtype=index_fields[0].dtype, name=index_fields[0].name
151
+ )
152
+ else:
153
+ col_names = [index_field.name for index_field in index_fields]
154
+ col_dtypes = pd.Series(
155
+ [index_field.dtype for index_field in index_fields], index=col_names
156
+ )
157
+ mock_df = pd.DataFrame([], columns=col_names).astype(col_dtypes)
158
+ mock_idx = pd.MultiIndex.from_frame(mock_df)
159
+ index_value = parse_index(mock_idx, df_obj, store_data=False)
160
+ else:
161
+ index_value = None
162
+
163
+ return InferredDataFrameMeta(
164
+ output_type=output_type,
165
+ index_value=index_value,
166
+ dtypes=dtypes,
167
+ dtype=dtype,
168
+ name=name,
169
+ )
170
+
171
+
172
+ def register_pandas_typing_funcs():
173
+ def _cls_getitem_func(cls, item, type_cls):
174
+ return type_cls.from_getitem_args(item)
175
+
176
+ for pd_cls, type_cls in [
177
+ (pd.DataFrame, DataFrameType),
178
+ (pd.Series, SeriesType),
179
+ (pd.Index, IndexType),
180
+ ]:
181
+ if hasattr(pd_cls, "__class_getitem__"): # pragma: no cover
182
+ continue
183
+ pd_cls.__class_getitem__ = classmethod(
184
+ functools.partial(_cls_getitem_func, type_cls=type_cls)
185
+ )