maxframe 2.0.0b2__cp37-cp37m-win32.whl → 2.3.0rc1__cp37-cp37m-win32.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of maxframe might be problematic. Click here for more details.

Files changed (443) hide show
  1. maxframe/__init__.py +1 -0
  2. maxframe/_utils.cp37-win32.pyd +0 -0
  3. maxframe/_utils.pyx +14 -1
  4. maxframe/codegen/core.py +9 -8
  5. maxframe/codegen/spe/core.py +1 -1
  6. maxframe/codegen/spe/dataframe/__init__.py +1 -0
  7. maxframe/codegen/spe/dataframe/accessors/base.py +18 -0
  8. maxframe/codegen/spe/dataframe/accessors/dict_.py +25 -130
  9. maxframe/codegen/spe/dataframe/accessors/list_.py +12 -48
  10. maxframe/codegen/spe/dataframe/accessors/struct_.py +28 -0
  11. maxframe/codegen/spe/dataframe/arithmetic.py +7 -2
  12. maxframe/codegen/spe/dataframe/groupby.py +88 -0
  13. maxframe/codegen/spe/dataframe/indexing.py +99 -4
  14. maxframe/codegen/spe/dataframe/merge.py +38 -1
  15. maxframe/codegen/spe/dataframe/misc.py +11 -33
  16. maxframe/codegen/spe/dataframe/reduction.py +32 -9
  17. maxframe/codegen/spe/dataframe/reshape.py +46 -0
  18. maxframe/codegen/spe/dataframe/sort.py +39 -18
  19. maxframe/codegen/spe/dataframe/tests/accessors/test_dict.py +9 -15
  20. maxframe/codegen/spe/dataframe/tests/accessors/test_list.py +4 -7
  21. maxframe/codegen/spe/dataframe/tests/accessors/test_struct.py +75 -0
  22. maxframe/codegen/spe/dataframe/tests/indexing/test_iloc.py +20 -1
  23. maxframe/codegen/spe/dataframe/tests/indexing/test_loc.py +35 -0
  24. maxframe/codegen/spe/dataframe/tests/misc/test_misc.py +0 -32
  25. maxframe/codegen/spe/dataframe/tests/test_groupby.py +81 -18
  26. maxframe/codegen/spe/dataframe/tests/test_merge.py +27 -1
  27. maxframe/codegen/spe/dataframe/tests/test_reduction.py +13 -0
  28. maxframe/codegen/spe/dataframe/tests/test_reshape.py +79 -0
  29. maxframe/codegen/spe/dataframe/tests/test_sort.py +20 -0
  30. maxframe/codegen/spe/dataframe/tseries.py +9 -0
  31. maxframe/codegen/spe/learn/contrib/lightgbm.py +4 -3
  32. maxframe/codegen/spe/learn/contrib/tests/test_xgboost.py +2 -1
  33. maxframe/codegen/spe/learn/metrics/__init__.py +1 -1
  34. maxframe/codegen/spe/learn/metrics/_ranking.py +76 -0
  35. maxframe/codegen/spe/learn/metrics/pairwise.py +51 -0
  36. maxframe/codegen/spe/learn/metrics/tests/test_pairwise.py +36 -0
  37. maxframe/codegen/spe/learn/metrics/tests/test_ranking.py +59 -0
  38. maxframe/codegen/spe/tensor/__init__.py +3 -0
  39. maxframe/codegen/spe/tensor/datasource.py +1 -0
  40. maxframe/codegen/spe/tensor/fft.py +74 -0
  41. maxframe/codegen/spe/tensor/linalg.py +29 -2
  42. maxframe/codegen/spe/tensor/misc.py +79 -25
  43. maxframe/codegen/spe/tensor/spatial.py +45 -0
  44. maxframe/codegen/spe/tensor/statistics.py +44 -0
  45. maxframe/codegen/spe/tensor/tests/test_fft.py +64 -0
  46. maxframe/codegen/spe/tensor/tests/test_linalg.py +15 -1
  47. maxframe/codegen/spe/tensor/tests/test_misc.py +52 -2
  48. maxframe/codegen/spe/tensor/tests/test_spatial.py +33 -0
  49. maxframe/codegen/spe/tensor/tests/test_statistics.py +15 -1
  50. maxframe/codegen/spe/tests/test_spe_codegen.py +6 -12
  51. maxframe/codegen/spe/utils.py +2 -0
  52. maxframe/config/config.py +73 -9
  53. maxframe/config/tests/test_validators.py +13 -1
  54. maxframe/config/validators.py +49 -0
  55. maxframe/conftest.py +54 -17
  56. maxframe/core/accessor.py +2 -2
  57. maxframe/core/base.py +2 -1
  58. maxframe/core/entity/core.py +5 -0
  59. maxframe/core/entity/tileables.py +3 -1
  60. maxframe/core/graph/core.cp37-win32.pyd +0 -0
  61. maxframe/core/graph/entity.py +8 -3
  62. maxframe/core/mode.py +6 -1
  63. maxframe/core/operator/base.py +9 -2
  64. maxframe/core/operator/core.py +10 -2
  65. maxframe/core/operator/utils.py +13 -0
  66. maxframe/dataframe/__init__.py +12 -5
  67. maxframe/dataframe/accessors/__init__.py +1 -1
  68. maxframe/dataframe/accessors/compat.py +45 -0
  69. maxframe/dataframe/accessors/datetime_/__init__.py +4 -1
  70. maxframe/dataframe/accessors/dict_/contains.py +7 -16
  71. maxframe/dataframe/accessors/dict_/core.py +48 -0
  72. maxframe/dataframe/accessors/dict_/getitem.py +17 -21
  73. maxframe/dataframe/accessors/dict_/length.py +7 -16
  74. maxframe/dataframe/accessors/dict_/remove.py +6 -18
  75. maxframe/dataframe/accessors/dict_/setitem.py +8 -18
  76. maxframe/dataframe/accessors/dict_/tests/test_dict_accessor.py +62 -22
  77. maxframe/dataframe/accessors/list_/__init__.py +2 -2
  78. maxframe/dataframe/accessors/list_/core.py +48 -0
  79. maxframe/dataframe/accessors/list_/getitem.py +12 -19
  80. maxframe/dataframe/accessors/list_/length.py +7 -16
  81. maxframe/dataframe/accessors/list_/tests/test_list_accessor.py +11 -9
  82. maxframe/dataframe/accessors/string_/__init__.py +4 -1
  83. maxframe/dataframe/accessors/struct_/__init__.py +37 -0
  84. maxframe/dataframe/accessors/struct_/accessor.py +39 -0
  85. maxframe/dataframe/accessors/struct_/core.py +43 -0
  86. maxframe/dataframe/accessors/struct_/dtypes.py +53 -0
  87. maxframe/dataframe/accessors/struct_/field.py +123 -0
  88. maxframe/dataframe/accessors/struct_/tests/__init__.py +13 -0
  89. maxframe/dataframe/accessors/struct_/tests/test_struct_accessor.py +91 -0
  90. maxframe/dataframe/arithmetic/__init__.py +18 -4
  91. maxframe/dataframe/arithmetic/between.py +106 -0
  92. maxframe/dataframe/arithmetic/dot.py +237 -0
  93. maxframe/dataframe/arithmetic/maximum.py +33 -0
  94. maxframe/dataframe/arithmetic/minimum.py +33 -0
  95. maxframe/dataframe/arithmetic/{around.py → round.py} +11 -7
  96. maxframe/dataframe/core.py +161 -224
  97. maxframe/dataframe/datasource/__init__.py +18 -0
  98. maxframe/dataframe/datasource/core.py +6 -0
  99. maxframe/dataframe/datasource/direct.py +57 -0
  100. maxframe/dataframe/datasource/from_dict.py +124 -0
  101. maxframe/dataframe/datasource/from_index.py +1 -1
  102. maxframe/dataframe/datasource/from_records.py +77 -0
  103. maxframe/dataframe/datasource/from_tensor.py +109 -41
  104. maxframe/dataframe/datasource/read_csv.py +21 -14
  105. maxframe/dataframe/datasource/read_odps_query.py +29 -6
  106. maxframe/dataframe/datasource/read_odps_table.py +32 -10
  107. maxframe/dataframe/datasource/read_parquet.py +38 -39
  108. maxframe/dataframe/datasource/tests/test_datasource.py +37 -0
  109. maxframe/dataframe/datastore/__init__.py +11 -1
  110. maxframe/dataframe/datastore/direct.py +268 -0
  111. maxframe/dataframe/datastore/to_csv.py +29 -41
  112. maxframe/dataframe/datastore/to_odps.py +36 -4
  113. maxframe/dataframe/extensions/__init__.py +20 -4
  114. maxframe/dataframe/extensions/apply_chunk.py +32 -6
  115. maxframe/dataframe/extensions/cartesian_chunk.py +153 -0
  116. maxframe/dataframe/extensions/collect_kv.py +126 -0
  117. maxframe/dataframe/extensions/extract_kv.py +177 -0
  118. maxframe/dataframe/extensions/flatjson.py +2 -1
  119. maxframe/dataframe/extensions/map_reduce.py +263 -0
  120. maxframe/dataframe/extensions/rebalance.py +62 -0
  121. maxframe/dataframe/extensions/tests/test_apply_chunk.py +9 -2
  122. maxframe/dataframe/extensions/tests/test_extensions.py +54 -0
  123. maxframe/dataframe/extensions/tests/test_map_reduce.py +135 -0
  124. maxframe/dataframe/groupby/__init__.py +17 -2
  125. maxframe/dataframe/groupby/aggregation.py +86 -49
  126. maxframe/dataframe/groupby/apply.py +1 -1
  127. maxframe/dataframe/groupby/apply_chunk.py +19 -5
  128. maxframe/dataframe/groupby/core.py +116 -16
  129. maxframe/dataframe/groupby/cum.py +4 -25
  130. maxframe/dataframe/groupby/expanding.py +264 -0
  131. maxframe/dataframe/groupby/fill.py +1 -1
  132. maxframe/dataframe/groupby/getitem.py +12 -5
  133. maxframe/dataframe/groupby/head.py +11 -1
  134. maxframe/dataframe/groupby/rank.py +136 -0
  135. maxframe/dataframe/groupby/rolling.py +206 -0
  136. maxframe/dataframe/groupby/shift.py +114 -0
  137. maxframe/dataframe/groupby/tests/test_groupby.py +0 -5
  138. maxframe/dataframe/indexing/__init__.py +22 -2
  139. maxframe/dataframe/indexing/droplevel.py +195 -0
  140. maxframe/dataframe/indexing/filter.py +169 -0
  141. maxframe/dataframe/indexing/get_level_values.py +76 -0
  142. maxframe/dataframe/indexing/iat.py +45 -0
  143. maxframe/dataframe/indexing/iloc.py +152 -12
  144. maxframe/dataframe/indexing/insert.py +46 -18
  145. maxframe/dataframe/indexing/loc.py +287 -7
  146. maxframe/dataframe/indexing/reindex.py +14 -5
  147. maxframe/dataframe/indexing/rename.py +6 -0
  148. maxframe/dataframe/indexing/rename_axis.py +2 -2
  149. maxframe/dataframe/indexing/reorder_levels.py +143 -0
  150. maxframe/dataframe/indexing/reset_index.py +33 -6
  151. maxframe/dataframe/indexing/sample.py +8 -0
  152. maxframe/dataframe/indexing/setitem.py +3 -3
  153. maxframe/dataframe/indexing/swaplevel.py +185 -0
  154. maxframe/dataframe/indexing/take.py +99 -0
  155. maxframe/dataframe/indexing/truncate.py +140 -0
  156. maxframe/dataframe/indexing/where.py +0 -11
  157. maxframe/dataframe/indexing/xs.py +148 -0
  158. maxframe/dataframe/merge/__init__.py +15 -1
  159. maxframe/dataframe/merge/append.py +97 -98
  160. maxframe/dataframe/merge/combine.py +244 -0
  161. maxframe/dataframe/merge/combine_first.py +120 -0
  162. maxframe/dataframe/merge/compare.py +387 -0
  163. maxframe/dataframe/merge/concat.py +183 -0
  164. maxframe/dataframe/merge/update.py +271 -0
  165. maxframe/dataframe/misc/__init__.py +28 -11
  166. maxframe/dataframe/misc/_duplicate.py +10 -4
  167. maxframe/dataframe/misc/apply.py +1 -1
  168. maxframe/dataframe/misc/check_unique.py +82 -0
  169. maxframe/dataframe/misc/clip.py +145 -0
  170. maxframe/dataframe/misc/describe.py +175 -9
  171. maxframe/dataframe/misc/drop.py +31 -0
  172. maxframe/dataframe/misc/drop_duplicates.py +2 -2
  173. maxframe/dataframe/misc/duplicated.py +2 -2
  174. maxframe/dataframe/misc/get_dummies.py +5 -1
  175. maxframe/dataframe/misc/infer_dtypes.py +251 -0
  176. maxframe/dataframe/misc/isin.py +2 -2
  177. maxframe/dataframe/misc/map.py +125 -18
  178. maxframe/dataframe/misc/repeat.py +159 -0
  179. maxframe/dataframe/misc/tests/test_misc.py +48 -3
  180. maxframe/dataframe/misc/to_numeric.py +3 -0
  181. maxframe/dataframe/misc/transform.py +12 -5
  182. maxframe/dataframe/misc/transpose.py +13 -1
  183. maxframe/dataframe/misc/valid_index.py +115 -0
  184. maxframe/dataframe/misc/value_counts.py +38 -4
  185. maxframe/dataframe/missing/checkna.py +14 -6
  186. maxframe/dataframe/missing/dropna.py +5 -0
  187. maxframe/dataframe/missing/fillna.py +1 -1
  188. maxframe/dataframe/missing/replace.py +7 -4
  189. maxframe/dataframe/reduction/__init__.py +35 -16
  190. maxframe/dataframe/reduction/aggregation.py +43 -14
  191. maxframe/dataframe/reduction/all.py +2 -2
  192. maxframe/dataframe/reduction/any.py +2 -2
  193. maxframe/dataframe/reduction/argmax.py +103 -0
  194. maxframe/dataframe/reduction/argmin.py +103 -0
  195. maxframe/dataframe/reduction/core.py +80 -24
  196. maxframe/dataframe/reduction/count.py +13 -9
  197. maxframe/dataframe/reduction/cov.py +166 -0
  198. maxframe/dataframe/reduction/cummax.py +2 -2
  199. maxframe/dataframe/reduction/cummin.py +2 -2
  200. maxframe/dataframe/reduction/cumprod.py +2 -2
  201. maxframe/dataframe/reduction/cumsum.py +2 -2
  202. maxframe/dataframe/reduction/custom_reduction.py +2 -2
  203. maxframe/dataframe/reduction/idxmax.py +185 -0
  204. maxframe/dataframe/reduction/idxmin.py +185 -0
  205. maxframe/dataframe/reduction/kurtosis.py +37 -30
  206. maxframe/dataframe/reduction/max.py +2 -2
  207. maxframe/dataframe/reduction/mean.py +9 -7
  208. maxframe/dataframe/reduction/median.py +2 -2
  209. maxframe/dataframe/reduction/min.py +2 -2
  210. maxframe/dataframe/reduction/mode.py +144 -0
  211. maxframe/dataframe/reduction/nunique.py +19 -11
  212. maxframe/dataframe/reduction/prod.py +18 -13
  213. maxframe/dataframe/reduction/reduction_size.py +2 -2
  214. maxframe/dataframe/reduction/sem.py +13 -9
  215. maxframe/dataframe/reduction/skew.py +31 -27
  216. maxframe/dataframe/reduction/str_concat.py +10 -7
  217. maxframe/dataframe/reduction/sum.py +18 -14
  218. maxframe/dataframe/reduction/tests/test_reduction.py +12 -0
  219. maxframe/dataframe/reduction/unique.py +20 -3
  220. maxframe/dataframe/reduction/var.py +16 -12
  221. maxframe/dataframe/reshape/__init__.py +38 -0
  222. maxframe/dataframe/{misc → reshape}/pivot.py +1 -0
  223. maxframe/dataframe/{misc → reshape}/pivot_table.py +1 -0
  224. maxframe/dataframe/reshape/unstack.py +114 -0
  225. maxframe/dataframe/sort/__init__.py +16 -1
  226. maxframe/dataframe/sort/argsort.py +68 -0
  227. maxframe/dataframe/sort/core.py +2 -1
  228. maxframe/dataframe/sort/nlargest.py +238 -0
  229. maxframe/dataframe/sort/nsmallest.py +228 -0
  230. maxframe/dataframe/sort/rank.py +147 -0
  231. maxframe/dataframe/statistics/__init__.py +3 -3
  232. maxframe/dataframe/statistics/corr.py +1 -0
  233. maxframe/dataframe/statistics/quantile.py +2 -2
  234. maxframe/dataframe/tests/test_typing.py +104 -0
  235. maxframe/dataframe/tests/test_utils.py +66 -2
  236. maxframe/dataframe/tseries/__init__.py +19 -0
  237. maxframe/dataframe/tseries/at_time.py +61 -0
  238. maxframe/dataframe/tseries/between_time.py +122 -0
  239. maxframe/dataframe/typing_.py +185 -0
  240. maxframe/dataframe/utils.py +125 -52
  241. maxframe/dataframe/window/aggregation.py +8 -4
  242. maxframe/dataframe/window/core.py +14 -1
  243. maxframe/dataframe/window/ewm.py +1 -3
  244. maxframe/dataframe/window/expanding.py +37 -35
  245. maxframe/dataframe/window/rolling.py +49 -39
  246. maxframe/dataframe/window/tests/test_expanding.py +1 -7
  247. maxframe/dataframe/window/tests/test_rolling.py +1 -1
  248. maxframe/env.py +7 -4
  249. maxframe/errors.py +2 -2
  250. maxframe/io/odpsio/schema.py +9 -3
  251. maxframe/io/odpsio/tableio.py +7 -2
  252. maxframe/io/odpsio/tests/test_schema.py +198 -83
  253. maxframe/learn/__init__.py +10 -2
  254. maxframe/learn/cluster/__init__.py +15 -0
  255. maxframe/learn/cluster/_kmeans.py +782 -0
  256. maxframe/learn/contrib/llm/core.py +18 -7
  257. maxframe/learn/contrib/llm/deploy/__init__.py +13 -0
  258. maxframe/learn/contrib/llm/deploy/config.py +221 -0
  259. maxframe/learn/contrib/llm/deploy/core.py +247 -0
  260. maxframe/learn/contrib/llm/deploy/framework.py +35 -0
  261. maxframe/learn/contrib/llm/deploy/loader.py +360 -0
  262. maxframe/learn/contrib/llm/deploy/tests/__init__.py +13 -0
  263. maxframe/learn/contrib/llm/deploy/tests/test_register_models.py +359 -0
  264. maxframe/learn/contrib/llm/models/__init__.py +1 -0
  265. maxframe/learn/contrib/llm/models/dashscope.py +12 -6
  266. maxframe/learn/contrib/llm/models/managed.py +76 -11
  267. maxframe/learn/contrib/llm/models/openai.py +72 -0
  268. maxframe/learn/contrib/llm/tests/__init__.py +13 -0
  269. maxframe/learn/contrib/llm/tests/test_core.py +34 -0
  270. maxframe/learn/contrib/llm/tests/test_openai.py +187 -0
  271. maxframe/learn/contrib/llm/tests/test_text_gen.py +155 -0
  272. maxframe/learn/contrib/llm/text.py +348 -42
  273. maxframe/learn/contrib/models.py +4 -1
  274. maxframe/learn/contrib/xgboost/classifier.py +2 -0
  275. maxframe/learn/contrib/xgboost/core.py +113 -4
  276. maxframe/learn/contrib/xgboost/predict.py +4 -2
  277. maxframe/learn/contrib/xgboost/regressor.py +5 -0
  278. maxframe/learn/contrib/xgboost/train.py +7 -2
  279. maxframe/learn/core.py +66 -0
  280. maxframe/learn/linear_model/_base.py +58 -1
  281. maxframe/learn/linear_model/_lin_reg.py +1 -1
  282. maxframe/learn/metrics/__init__.py +6 -0
  283. maxframe/learn/metrics/_classification.py +145 -0
  284. maxframe/learn/metrics/_ranking.py +477 -0
  285. maxframe/learn/metrics/_scorer.py +60 -0
  286. maxframe/learn/metrics/pairwise/__init__.py +21 -0
  287. maxframe/learn/metrics/pairwise/core.py +77 -0
  288. maxframe/learn/metrics/pairwise/cosine.py +115 -0
  289. maxframe/learn/metrics/pairwise/euclidean.py +176 -0
  290. maxframe/learn/metrics/pairwise/haversine.py +96 -0
  291. maxframe/learn/metrics/pairwise/manhattan.py +80 -0
  292. maxframe/learn/metrics/pairwise/pairwise.py +127 -0
  293. maxframe/learn/metrics/pairwise/pairwise_distances_topk.py +121 -0
  294. maxframe/learn/metrics/pairwise/rbf_kernel.py +51 -0
  295. maxframe/learn/metrics/tests/__init__.py +13 -0
  296. maxframe/learn/metrics/tests/test_scorer.py +26 -0
  297. maxframe/learn/preprocessing/_data/min_max_scaler.py +34 -23
  298. maxframe/learn/preprocessing/_data/standard_scaler.py +34 -25
  299. maxframe/learn/utils/__init__.py +2 -1
  300. maxframe/learn/utils/checks.py +1 -2
  301. maxframe/learn/utils/core.py +59 -0
  302. maxframe/learn/utils/extmath.py +79 -9
  303. maxframe/learn/utils/odpsio.py +262 -0
  304. maxframe/learn/utils/validation.py +2 -2
  305. maxframe/lib/compat.py +40 -0
  306. maxframe/lib/dtypes_extension/__init__.py +16 -1
  307. maxframe/lib/dtypes_extension/_fake_arrow_dtype.py +604 -0
  308. maxframe/lib/dtypes_extension/blob.py +304 -0
  309. maxframe/lib/dtypes_extension/dtypes.py +40 -0
  310. maxframe/lib/dtypes_extension/tests/test_blob.py +88 -0
  311. maxframe/lib/dtypes_extension/tests/test_dtypes.py +16 -1
  312. maxframe/lib/dtypes_extension/tests/test_fake_arrow_dtype.py +75 -0
  313. maxframe/lib/filesystem/_oss_lib/common.py +124 -50
  314. maxframe/lib/filesystem/_oss_lib/glob.py +1 -1
  315. maxframe/lib/filesystem/_oss_lib/handle.py +21 -25
  316. maxframe/lib/filesystem/base.py +1 -1
  317. maxframe/lib/filesystem/core.py +1 -1
  318. maxframe/lib/filesystem/oss.py +115 -46
  319. maxframe/lib/filesystem/tests/test_oss.py +74 -36
  320. maxframe/lib/mmh3.cp37-win32.pyd +0 -0
  321. maxframe/lib/wrapped_pickle.py +10 -0
  322. maxframe/opcodes.py +41 -15
  323. maxframe/protocol.py +12 -0
  324. maxframe/remote/core.py +4 -0
  325. maxframe/serialization/__init__.py +11 -2
  326. maxframe/serialization/arrow.py +38 -13
  327. maxframe/serialization/blob.py +32 -0
  328. maxframe/serialization/core.cp37-win32.pyd +0 -0
  329. maxframe/serialization/core.pyx +39 -1
  330. maxframe/serialization/exception.py +2 -4
  331. maxframe/serialization/numpy.py +11 -0
  332. maxframe/serialization/pandas.py +46 -9
  333. maxframe/serialization/serializables/core.py +2 -2
  334. maxframe/serialization/tests/test_serial.py +31 -4
  335. maxframe/tensor/__init__.py +38 -8
  336. maxframe/tensor/arithmetic/__init__.py +19 -10
  337. maxframe/tensor/arithmetic/core.py +2 -2
  338. maxframe/tensor/arithmetic/iscomplexobj.py +53 -0
  339. maxframe/tensor/arithmetic/tests/test_arithmetic.py +6 -9
  340. maxframe/tensor/core.py +6 -2
  341. maxframe/tensor/datasource/tests/test_datasource.py +2 -1
  342. maxframe/tensor/extensions/__init__.py +2 -0
  343. maxframe/tensor/extensions/apply_chunk.py +3 -3
  344. maxframe/tensor/extensions/rebalance.py +65 -0
  345. maxframe/tensor/fft/__init__.py +32 -0
  346. maxframe/tensor/fft/core.py +168 -0
  347. maxframe/tensor/fft/fft.py +112 -0
  348. maxframe/tensor/fft/fft2.py +118 -0
  349. maxframe/tensor/fft/fftfreq.py +80 -0
  350. maxframe/tensor/fft/fftn.py +123 -0
  351. maxframe/tensor/fft/fftshift.py +79 -0
  352. maxframe/tensor/fft/hfft.py +112 -0
  353. maxframe/tensor/fft/ifft.py +114 -0
  354. maxframe/tensor/fft/ifft2.py +115 -0
  355. maxframe/tensor/fft/ifftn.py +123 -0
  356. maxframe/tensor/fft/ifftshift.py +73 -0
  357. maxframe/tensor/fft/ihfft.py +93 -0
  358. maxframe/tensor/fft/irfft.py +118 -0
  359. maxframe/tensor/fft/irfft2.py +62 -0
  360. maxframe/tensor/fft/irfftn.py +114 -0
  361. maxframe/tensor/fft/rfft.py +116 -0
  362. maxframe/tensor/fft/rfft2.py +63 -0
  363. maxframe/tensor/fft/rfftfreq.py +87 -0
  364. maxframe/tensor/fft/rfftn.py +113 -0
  365. maxframe/tensor/indexing/fill_diagonal.py +1 -7
  366. maxframe/tensor/linalg/__init__.py +7 -0
  367. maxframe/tensor/linalg/_einsumfunc.py +1025 -0
  368. maxframe/tensor/linalg/cholesky.py +117 -0
  369. maxframe/tensor/linalg/einsum.py +339 -0
  370. maxframe/tensor/linalg/lstsq.py +100 -0
  371. maxframe/tensor/linalg/matrix_norm.py +75 -0
  372. maxframe/tensor/linalg/norm.py +249 -0
  373. maxframe/tensor/linalg/solve.py +72 -0
  374. maxframe/tensor/linalg/solve_triangular.py +2 -2
  375. maxframe/tensor/linalg/vector_norm.py +113 -0
  376. maxframe/tensor/misc/__init__.py +24 -1
  377. maxframe/tensor/misc/argwhere.py +72 -0
  378. maxframe/tensor/misc/array_split.py +46 -0
  379. maxframe/tensor/misc/broadcast_arrays.py +57 -0
  380. maxframe/tensor/misc/copyto.py +130 -0
  381. maxframe/tensor/misc/delete.py +104 -0
  382. maxframe/tensor/misc/dsplit.py +68 -0
  383. maxframe/tensor/misc/ediff1d.py +74 -0
  384. maxframe/tensor/misc/expand_dims.py +85 -0
  385. maxframe/tensor/misc/flip.py +90 -0
  386. maxframe/tensor/misc/fliplr.py +64 -0
  387. maxframe/tensor/misc/flipud.py +68 -0
  388. maxframe/tensor/misc/hsplit.py +85 -0
  389. maxframe/tensor/misc/insert.py +139 -0
  390. maxframe/tensor/misc/moveaxis.py +83 -0
  391. maxframe/tensor/misc/result_type.py +88 -0
  392. maxframe/tensor/misc/roll.py +124 -0
  393. maxframe/tensor/misc/rollaxis.py +77 -0
  394. maxframe/tensor/misc/shape.py +89 -0
  395. maxframe/tensor/misc/split.py +190 -0
  396. maxframe/tensor/misc/tile.py +109 -0
  397. maxframe/tensor/misc/vsplit.py +74 -0
  398. maxframe/tensor/reduction/array_equal.py +2 -1
  399. maxframe/tensor/sort/__init__.py +2 -0
  400. maxframe/tensor/sort/argpartition.py +98 -0
  401. maxframe/tensor/sort/partition.py +228 -0
  402. maxframe/tensor/spatial/__init__.py +15 -0
  403. maxframe/tensor/spatial/distance/__init__.py +17 -0
  404. maxframe/tensor/spatial/distance/cdist.py +421 -0
  405. maxframe/tensor/spatial/distance/pdist.py +398 -0
  406. maxframe/tensor/spatial/distance/squareform.py +153 -0
  407. maxframe/tensor/special/__init__.py +159 -21
  408. maxframe/tensor/special/airy.py +55 -0
  409. maxframe/tensor/special/bessel.py +199 -0
  410. maxframe/tensor/special/core.py +65 -4
  411. maxframe/tensor/special/ellip_func_integrals.py +155 -0
  412. maxframe/tensor/special/ellip_harm.py +55 -0
  413. maxframe/tensor/special/err_fresnel.py +223 -0
  414. maxframe/tensor/special/gamma_funcs.py +303 -0
  415. maxframe/tensor/special/hypergeometric_funcs.py +69 -0
  416. maxframe/tensor/special/info_theory.py +189 -0
  417. maxframe/tensor/special/misc.py +21 -0
  418. maxframe/tensor/statistics/__init__.py +6 -0
  419. maxframe/tensor/statistics/corrcoef.py +77 -0
  420. maxframe/tensor/statistics/cov.py +222 -0
  421. maxframe/tensor/statistics/digitize.py +126 -0
  422. maxframe/tensor/statistics/histogram.py +520 -0
  423. maxframe/tensor/statistics/median.py +85 -0
  424. maxframe/tensor/statistics/ptp.py +89 -0
  425. maxframe/tensor/utils.py +3 -3
  426. maxframe/tests/test_udf.py +61 -0
  427. maxframe/tests/test_utils.py +51 -6
  428. maxframe/tests/utils.py +0 -2
  429. maxframe/typing_.py +2 -0
  430. maxframe/udf.py +130 -9
  431. maxframe/utils.py +254 -27
  432. {maxframe-2.0.0b2.dist-info → maxframe-2.3.0rc1.dist-info}/METADATA +3 -3
  433. {maxframe-2.0.0b2.dist-info → maxframe-2.3.0rc1.dist-info}/RECORD +442 -264
  434. maxframe_client/fetcher.py +35 -4
  435. maxframe_client/session/odps.py +7 -2
  436. maxframe_client/session/task.py +8 -1
  437. maxframe_client/tests/test_fetcher.py +76 -3
  438. maxframe_client/tests/test_session.py +28 -1
  439. maxframe/dataframe/arrays.py +0 -864
  440. /maxframe/dataframe/{misc → reshape}/melt.py +0 -0
  441. /maxframe/dataframe/{misc → reshape}/stack.py +0 -0
  442. {maxframe-2.0.0b2.dist-info → maxframe-2.3.0rc1.dist-info}/WHEEL +0 -0
  443. {maxframe-2.0.0b2.dist-info → maxframe-2.3.0rc1.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,604 @@
1
+ # Copyright 1999-2025 Alibaba Group Holding Ltd.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ import re
16
+ from datetime import date, datetime, time, timedelta
17
+ from decimal import Decimal
18
+ from typing import Union
19
+
20
+ import numpy as np
21
+ import pandas as pd
22
+ import pyarrow as pa
23
+ from pandas import DatetimeTZDtype, Timedelta, Timestamp
24
+ from pandas.api.extensions import (
25
+ ExtensionArray,
26
+ ExtensionDtype,
27
+ register_extension_dtype,
28
+ )
29
+ from pandas.api.types import is_integer, is_scalar
30
+
31
+ try:
32
+ from pandas._libs.arrays import NDArrayBacked
33
+ except ImportError:
34
+ NDArrayBacked = type("NDArrayBacked", (object,), {"is_fake": True})
35
+
36
+ from ...lib.version import parse as parse_version
37
+ from ..compat import cached_property
38
+
39
+ """
40
+ This module is copied from pandas to use in framedriver as it can only run on python3.7,
41
+ which has the highest supported pandas version 1.3.5 without ArrowDtype defined.
42
+ Once the framedriver can run on python3.11, this class can be removed.
43
+
44
+ This module will be removed in released SDK.
45
+ """
46
+
47
+ _dtype_search_re = re.compile(r"[\[\(].*[\]\)]")
48
+ # when pyarrow<4.0, it fixes type of generated pandas block as ExtensionBlock
49
+ # which needs special handling
50
+ _pyarrow_fix_extension_block = parse_version(pa.__version__).major < 4
51
+ _pd_string_with_storage_option = parse_version(pd.__version__).release[:2] >= (1, 3)
52
+
53
+
54
+ class FakeCategoricalDtypeType(type):
55
+ """
56
+ the type of FakeCategoricalDtype, this metaclass determines subclass ability
57
+ """
58
+
59
+ pass
60
+
61
+
62
+ def _unpack_tuple_and_ellipses(item: tuple):
63
+ """
64
+ Possibly unpack arr[..., n] to arr[n]
65
+ """
66
+ if len(item) > 1:
67
+ # Note: we are assuming this indexing is being done on a 1D arraylike
68
+ if item[0] is Ellipsis:
69
+ item = item[1:]
70
+ elif item[-1] is Ellipsis:
71
+ item = item[:-1]
72
+
73
+ if len(item) > 1:
74
+ raise IndexError("too many indices for array.")
75
+
76
+ item = item[0]
77
+ return item
78
+
79
+
80
+ def _validate_indices(indices: np.ndarray, n: int) -> None:
81
+ if len(indices):
82
+ min_idx = indices.min()
83
+ if min_idx < -1:
84
+ msg = f"'indices' contains values less than allowed ({min_idx} < -1)"
85
+ raise ValueError(msg)
86
+
87
+ max_idx = indices.max()
88
+ if max_idx >= n:
89
+ raise IndexError("indices are out-of-bounds")
90
+
91
+
92
+ def to_pyarrow_type(dtype):
93
+ """
94
+ Convert dtype to a pyarrow type instance.
95
+ """
96
+ if isinstance(dtype, FakeArrowDtype):
97
+ return dtype.pyarrow_dtype
98
+ elif isinstance(dtype, pa.DataType):
99
+ return dtype
100
+ elif isinstance(dtype, DatetimeTZDtype):
101
+ return pa.timestamp(dtype.unit, dtype.tz)
102
+ elif isinstance(dtype, pd.StringDtype):
103
+ return pa.string()
104
+ elif dtype:
105
+ try:
106
+ # Accepts python types too
107
+ # Doesn't handle all numpy types
108
+ return pa.from_numpy_dtype(dtype)
109
+ except pa.ArrowNotImplementedError:
110
+ pass
111
+ return None
112
+
113
+
114
+ class FakeArrowExtensionArray(ExtensionArray, NDArrayBacked):
115
+ """
116
+ In framedriver, we use arrow array as a bridge between pandas and odps datatypes,
117
+ so we only generate empty array here to simplify the code.
118
+ """
119
+
120
+ def __init__(self, values: Union[pa.Array, pa.ChunkedArray]) -> None:
121
+ if isinstance(values, pa.Array):
122
+ self._pa_array = pa.chunked_array([values])
123
+ elif isinstance(values, pa.ChunkedArray):
124
+ self._pa_array = values
125
+ else:
126
+ raise ValueError(
127
+ f"Unsupported type '{type(values)}' for ArrowExtensionArray"
128
+ )
129
+ if getattr(NDArrayBacked, "is_fake", False):
130
+ self._dtype = FakeArrowDtype(self._pa_array.type)
131
+ else:
132
+ NDArrayBacked.__init__(
133
+ self, np.array([]), FakeArrowDtype(self._pa_array.type)
134
+ )
135
+
136
+ @classmethod
137
+ def _from_sequence(
138
+ cls, scalars, dtype=None, copy: bool = False
139
+ ) -> "FakeArrowExtensionArray":
140
+ """
141
+ Construct a new ExtensionArray from a sequence of scalars.
142
+ """
143
+ pa_type = to_pyarrow_type(dtype)
144
+ if isinstance(scalars, (pa.Array, pa.ChunkedArray)):
145
+ pa_array = scalars
146
+ else:
147
+ if len(scalars) == 0:
148
+ # special case where pyarrow raises on empty numpy arrays
149
+ scalars = []
150
+ pa_array = pa.array(scalars, type=pa_type)
151
+ arr = cls(pa_array)
152
+ return arr
153
+
154
+ @classmethod
155
+ def _from_sequence_of_strings(
156
+ cls, strings, dtype=None, copy: bool = False
157
+ ) -> "FakeArrowExtensionArray":
158
+ return cls._from_sequence(strings, dtype, copy)
159
+
160
+ @property
161
+ def dtype(self) -> ExtensionDtype:
162
+ """
163
+ Implementation of ExtensionArray.dtype.
164
+ """
165
+ return self._dtype
166
+
167
+ @property
168
+ def shape(self):
169
+ return (self._pa_array.length(),)
170
+
171
+ @property
172
+ def nbytes(self) -> int:
173
+ """
174
+ The number of bytes needed to store this object in memory.
175
+ """
176
+ return self._pa_array.nbytes
177
+
178
+ def __len__(self) -> int:
179
+ """
180
+ Return an empty array length.
181
+ """
182
+ return len(self._pa_array)
183
+
184
+ def _from_pyarrow_array(self, pa_array):
185
+ return type(self)(pa_array)
186
+
187
+ def __getitem__(self, item):
188
+ # code from ArrowExtensionArray in pandas>=1.5
189
+ if isinstance(item, np.ndarray):
190
+ if not len(item):
191
+ pa_dtype = self._dtype.pyarrow_dtype
192
+ result = pa.chunked_array([], type=pa_dtype)
193
+ return self._from_pyarrow_array(result)
194
+ elif item.dtype.kind in "iu":
195
+ return self.take(item)
196
+ elif item.dtype.kind == "b":
197
+ return self._from_pyarrow_array(self._pa_array.filter(item))
198
+ else:
199
+ raise IndexError(
200
+ "Only integers, slices and integer or "
201
+ "boolean arrays are valid indices."
202
+ )
203
+ elif isinstance(item, tuple):
204
+ item = _unpack_tuple_and_ellipses(item)
205
+
206
+ if item is Ellipsis:
207
+ # TODO: should be handled by pyarrow?
208
+ item = slice(None)
209
+
210
+ if is_scalar(item) and not is_integer(item):
211
+ # e.g. "foo" or 2.5
212
+ # exception message copied from numpy
213
+ raise IndexError(
214
+ r"only integers, slices (`:`), ellipsis (`...`), numpy.newaxis "
215
+ r"(`None`) and integer or boolean arrays are valid indices"
216
+ )
217
+ # We are not an array indexer, so maybe e.g. a slice or integer
218
+ # indexer. We dispatch to pyarrow.
219
+ if isinstance(item, slice):
220
+ # Arrow bug https://github.com/apache/arrow/issues/38768
221
+ if item.start == item.stop:
222
+ pass
223
+ elif (
224
+ item.stop is not None
225
+ and item.stop < -len(self)
226
+ and item.step is not None
227
+ and item.step < 0
228
+ ):
229
+ item = slice(item.start, None, item.step)
230
+ value = self._pa_array[item]
231
+ if isinstance(value, pa.ChunkedArray):
232
+ return self._from_pyarrow_array(value)
233
+ else:
234
+ pa_type = self._pa_array.type
235
+ scalar = value.as_py()
236
+ if scalar is None:
237
+ return self._dtype.na_value
238
+ elif pa.types.is_timestamp(pa_type) and pa_type.unit != "ns":
239
+ # GH 53326
240
+ return Timestamp(scalar).as_unit(pa_type.unit)
241
+ elif pa.types.is_duration(pa_type) and pa_type.unit != "ns":
242
+ # GH 53326
243
+ return Timedelta(scalar).as_unit(pa_type.unit)
244
+ else:
245
+ return scalar
246
+
247
+ def __arrow_array__(self, type=None):
248
+ """Convert myself to a pyarrow ChunkedArray."""
249
+ return self._pa_array
250
+
251
+ def copy(self) -> "FakeArrowExtensionArray":
252
+ return self._from_pyarrow_array(self._pa_array)
253
+
254
+ def isna(self) -> np.ndarray:
255
+ # code from ArrowExtensionArray in pandas>=1.5
256
+ null_count = self._pa_array.null_count
257
+ if null_count == 0:
258
+ return np.zeros(len(self), dtype=np.bool_)
259
+ elif null_count == len(self):
260
+ return np.ones(len(self), dtype=np.bool_)
261
+
262
+ return self._pa_array.is_null().to_numpy()
263
+
264
+ def take(self, indices, allow_fill=False, fill_value=None):
265
+ # code from ArrowExtensionArray in pandas>=1.5
266
+ indices_array = np.asanyarray(indices)
267
+
268
+ if len(self._pa_array) == 0 and (indices_array >= 0).any():
269
+ raise IndexError("cannot do a non-empty take")
270
+ if indices_array.size > 0 and indices_array.max() >= len(self._pa_array):
271
+ raise IndexError("out of bounds value in 'indices'.")
272
+
273
+ if allow_fill:
274
+ fill_mask = indices_array < 0
275
+ if fill_mask.any():
276
+ _validate_indices(indices_array, len(self._pa_array))
277
+ # TODO(ARROW-9433): Treat negative indices as NULL
278
+ indices_array = pa.array(indices_array, mask=fill_mask)
279
+ result = self._pa_array.take(indices_array)
280
+ if pd.isna(fill_value):
281
+ return self._from_pyarrow_array(result)
282
+ # TODO: ArrowNotImplementedError: Function fill_null has no
283
+ # kernel matching input types (array[string], scalar[string])
284
+ result = self._from_pyarrow_array(result)
285
+ result[fill_mask] = fill_value
286
+ return result
287
+ # return type(self)(pc.fill_null(result, pa.scalar(fill_value)))
288
+ else:
289
+ # Nothing to fill
290
+ return self._from_pyarrow_array(self._pa_array.take(indices))
291
+ else: # allow_fill=False
292
+ # TODO(ARROW-9432): Treat negative indices as indices from the right.
293
+ if (indices_array < 0).any():
294
+ # Don't modify in-place
295
+ indices_array = np.copy(indices_array)
296
+ indices_array[indices_array < 0] += len(self._pa_array)
297
+ return self._from_pyarrow_array(self._pa_array.take(indices_array))
298
+
299
+ @classmethod
300
+ def _concat_same_type(cls, to_concat):
301
+ # code from ArrowExtensionArray in pandas>=1.5
302
+ chunks = [array for ea in to_concat for array in ea._pa_array.iterchunks()]
303
+ if to_concat[0].dtype == "string":
304
+ # StringDtype has no attribute pyarrow_dtype
305
+ pa_dtype = pa.large_string()
306
+ else:
307
+ pa_dtype = to_concat[0].dtype.pyarrow_dtype
308
+ arr = pa.chunked_array(chunks, type=pa_dtype)
309
+ return to_concat[0]._from_pyarrow_array(arr)
310
+
311
+
312
+ @register_extension_dtype
313
+ class FakeArrowDtype(ExtensionDtype):
314
+ def __new__(cls, pyarrow_dtype):
315
+ # TODO: here we avoid returning FakeDatetimeTZArrowDtype to make
316
+ # the behavior of timestamp consistent with other types when
317
+ # ExtensionDtype is fixed on pyarrow. Remove `_pyarrow_fix_extension_block`
318
+ # condition once we can enforce pyarrow>=4.0
319
+ if (
320
+ not _pyarrow_fix_extension_block
321
+ and cls is FakeArrowDtype
322
+ and pa.types.is_timestamp(pyarrow_dtype)
323
+ and pyarrow_dtype.unit == "ns"
324
+ ):
325
+ # Need special logic for DatetimeTZDtype
326
+ return FakeDatetimeTZArrowDtype(pyarrow_dtype)
327
+ elif _pd_string_with_storage_option and pyarrow_dtype == pa.string():
328
+ # Use builtin StringDtype with arrow support to
329
+ # avoid compatibility issues
330
+ return pd.StringDtype(storage="pyarrow")
331
+ return object.__new__(cls)
332
+
333
+ def __init__(self, pyarrow_dtype):
334
+ super().__init__()
335
+ self.pyarrow_dtype = pyarrow_dtype
336
+
337
+ @classmethod
338
+ def construct_array_type(cls):
339
+ """
340
+ Return the array type associated with this dtype.
341
+
342
+ Returns
343
+ -------
344
+ type
345
+ """
346
+ return FakeArrowExtensionArray
347
+
348
+ @classmethod
349
+ def construct_from_string(cls, string: str):
350
+ """
351
+ Construct this type from a string.
352
+
353
+ Parameters
354
+ ----------
355
+ string : str
356
+ string should follow the format f"{pyarrow_type}[pyarrow]"
357
+ e.g. int64[pyarrow]
358
+ """
359
+ if not isinstance(string, str):
360
+ raise TypeError(
361
+ f"'construct_from_string' expects a string, got {type(string)}"
362
+ )
363
+ if not string.endswith("[pyarrow]"):
364
+ raise TypeError(f"'{string}' must end with '[pyarrow]'")
365
+ if string == "string[pyarrow]":
366
+ # Ensure Registry.find skips ArrowDtype to use StringDtype instead
367
+ raise TypeError("string[pyarrow] should be constructed by StringDtype")
368
+
369
+ base_type = string[:-9] # get rid of "[pyarrow]"
370
+ try:
371
+ pa_dtype = pa.type_for_alias(base_type)
372
+ except ValueError as err:
373
+ has_parameters = _dtype_search_re.search(base_type)
374
+ if has_parameters:
375
+ # Fallback to try common temporal types
376
+ try:
377
+ return cls._parse_temporal_dtype_string(base_type)
378
+ except (NotImplementedError, ValueError):
379
+ # Fall through to raise with nice exception message below
380
+ pass
381
+
382
+ raise NotImplementedError(
383
+ "Passing pyarrow type specific parameters "
384
+ f"({has_parameters.group()}) in the string is not supported. "
385
+ "Please construct an ArrowDtype object with a pyarrow_dtype "
386
+ "instance with specific parameters."
387
+ ) from err
388
+ raise TypeError(f"'{base_type}' is not a valid pyarrow data type.") from err
389
+ return cls(pa_dtype)
390
+
391
+ @property
392
+ def _is_numeric(self) -> bool:
393
+ """
394
+ Whether columns with this dtype should be considered numeric.
395
+ """
396
+ # TODO: pa.types.is_boolean?
397
+ return (
398
+ pa.types.is_integer(self.pyarrow_dtype)
399
+ or pa.types.is_floating(self.pyarrow_dtype)
400
+ or pa.types.is_decimal(self.pyarrow_dtype)
401
+ )
402
+
403
+ @property
404
+ def _is_boolean(self) -> bool:
405
+ """
406
+ Whether this dtype should be considered boolean.
407
+ """
408
+ return pa.types.is_boolean(self.pyarrow_dtype)
409
+
410
+ def _get_common_dtype(self, dtypes):
411
+ # We unwrap any masked dtypes, find the common dtype we would use
412
+ # for that, then re-mask the result.
413
+ # Mirrors BaseMaskedDtype
414
+ from pandas.core.dtypes.cast import find_common_type
415
+
416
+ null_dtype = type(self)(pa.null())
417
+
418
+ new_dtype = find_common_type(
419
+ [
420
+ dtype.numpy_dtype if isinstance(dtype, FakeArrowDtype) else dtype
421
+ for dtype in dtypes
422
+ if dtype != null_dtype
423
+ ]
424
+ )
425
+ if not isinstance(new_dtype, np.dtype):
426
+ return None
427
+ try:
428
+ pa_dtype = pa.from_numpy_dtype(new_dtype)
429
+ return type(self)(pa_dtype)
430
+ except NotImplementedError:
431
+ return None
432
+
433
+ @property
434
+ def type(self):
435
+ """
436
+ Returns associated scalar type.
437
+ """
438
+ pa_type = self.pyarrow_dtype
439
+ if pa.types.is_integer(pa_type):
440
+ return int
441
+ elif pa.types.is_floating(pa_type):
442
+ return float
443
+ elif pa.types.is_string(pa_type) or pa.types.is_large_string(pa_type):
444
+ return str
445
+ elif (
446
+ pa.types.is_binary(pa_type)
447
+ or pa.types.is_fixed_size_binary(pa_type)
448
+ or pa.types.is_large_binary(pa_type)
449
+ ):
450
+ return bytes
451
+ elif pa.types.is_boolean(pa_type):
452
+ return bool
453
+ elif pa.types.is_duration(pa_type):
454
+ if pa_type.unit == "ns":
455
+ return Timedelta
456
+ else:
457
+ return timedelta
458
+ elif pa.types.is_timestamp(pa_type):
459
+ # TODO: here we avoid returning Timestamp when ExtensionDtype
460
+ # is fixed on pyarrow. Remove `_pyarrow_fix_extension_block`
461
+ # condition once we can enforce pyarrow>=4.0
462
+ if not _pyarrow_fix_extension_block and pa_type.unit == "ns":
463
+ return Timestamp
464
+ else:
465
+ return datetime
466
+ elif pa.types.is_date(pa_type):
467
+ return date
468
+ elif pa.types.is_time(pa_type):
469
+ return time
470
+ elif pa.types.is_decimal(pa_type):
471
+ return Decimal
472
+ elif pa.types.is_dictionary(pa_type):
473
+ # TODO: Potentially change this & CategoricalDtype.type to
474
+ # something more representative of the scalar
475
+ return FakeCategoricalDtypeType
476
+ elif pa.types.is_list(pa_type) or pa.types.is_large_list(pa_type):
477
+ return list
478
+ elif pa.types.is_fixed_size_list(pa_type):
479
+ return list
480
+ elif pa.types.is_map(pa_type):
481
+ return list
482
+ elif pa.types.is_struct(pa_type):
483
+ return dict
484
+ elif pa.types.is_null(pa_type):
485
+ # TODO: None? pd.NA? pa.null?
486
+ return type(pa_type)
487
+ elif isinstance(pa_type, pa.ExtensionType):
488
+ return type(self)(pa_type.storage_type).type
489
+ raise NotImplementedError(pa_type)
490
+
491
+ @property
492
+ def name(self) -> str:
493
+ """
494
+ A string identifying the data type.
495
+ """
496
+ return f"{str(self.pyarrow_dtype)}[pyarrow]"
497
+
498
+ @cached_property
499
+ def numpy_dtype(self) -> np.dtype:
500
+ """Return an instance of the related numpy dtype"""
501
+ if pa.types.is_timestamp(self.pyarrow_dtype):
502
+ # pa.timestamp(unit).to_pandas_dtype() returns ns units
503
+ # regardless of the pyarrow timestamp units.
504
+ # This can be removed if/when pyarrow addresses it:
505
+ # https://github.com/apache/arrow/issues/34462
506
+ return np.dtype(f"datetime64[{self.pyarrow_dtype.unit}]")
507
+ if pa.types.is_duration(self.pyarrow_dtype):
508
+ # pa.duration(unit).to_pandas_dtype() returns ns units
509
+ # regardless of the pyarrow duration units
510
+ # This can be removed if/when pyarrow addresses it:
511
+ # https://github.com/apache/arrow/issues/34462
512
+ return np.dtype(f"timedelta64[{self.pyarrow_dtype.unit}]")
513
+ if pa.types.is_string(self.pyarrow_dtype) or pa.types.is_large_string(
514
+ self.pyarrow_dtype
515
+ ):
516
+ # pa.string().to_pandas_dtype() = object which we don't want
517
+ return np.dtype(str)
518
+ try:
519
+ return np.dtype(self.pyarrow_dtype.to_pandas_dtype())
520
+ except (NotImplementedError, TypeError):
521
+ return np.dtype(object)
522
+
523
+ @cached_property
524
+ def kind(self) -> str:
525
+ if pa.types.is_timestamp(self.pyarrow_dtype):
526
+ # To mirror DatetimeTZDtype
527
+ return "M"
528
+ return self.numpy_dtype.kind
529
+
530
+ @cached_property
531
+ def itemsize(self) -> int:
532
+ """Return the number of bytes in this dtype"""
533
+ return self.numpy_dtype.itemsize
534
+
535
+ def __eq__(self, other: object) -> bool:
536
+ if not isinstance(other, type(self)):
537
+ return super().__eq__(other)
538
+ return self.pyarrow_dtype == other.pyarrow_dtype
539
+
540
+ def __hash__(self) -> int:
541
+ # make myself hashable
542
+ return hash(str(self))
543
+
544
+ def __from_arrow__(self, array):
545
+ array_class = self.construct_array_type()
546
+ arr = array.cast(self.pyarrow_dtype, safe=True)
547
+ return array_class(arr)
548
+
549
+
550
+ class FakeDatetimeTZExtensionArray(FakeArrowExtensionArray):
551
+ """
552
+ Workaround array class for DatetimeTZDtype in pandas when
553
+ arrow type is timestamp[ns]
554
+ """
555
+
556
+ def __init__(self, *args, **kw):
557
+ super().__init__(*args, **kw)
558
+ self._ndim = 1
559
+
560
+ @property
561
+ def shape(self):
562
+ return (
563
+ (self._pa_array.length(),)
564
+ if self._ndim == 1
565
+ else (1, self._pa_array.length())
566
+ )
567
+
568
+ @property
569
+ def ndim(self) -> int:
570
+ return self._ndim
571
+
572
+ def __len__(self) -> int:
573
+ return len(self._pa_array) if self._ndim == 1 else self.shape[0]
574
+
575
+ def __getitem__(self, item):
576
+ if self._ndim == 2 and item == 0:
577
+ return self._from_pyarrow_array(self._pa_array)
578
+ return super().__getitem__(item)
579
+
580
+ def reshape(self, *args, **kwargs):
581
+ if args != (1, -1):
582
+ raise ValueError("Only support reshape to (1, -1)")
583
+ new_arr = self._from_pyarrow_array(self._pa_array)
584
+ new_arr._ndim = 2
585
+ return new_arr
586
+
587
+
588
+ class FakeDatetimeTZArrowDtype(DatetimeTZDtype, FakeArrowDtype):
589
+ """
590
+ Workaround dtype class for DatetimeTZDtype in pandas when
591
+ arrow type is timestamp[ns]
592
+ """
593
+
594
+ def __init__(self, pyarrow_type):
595
+ from ... import options
596
+
597
+ FakeArrowDtype.__init__(self, pyarrow_type)
598
+ DatetimeTZDtype.__init__(
599
+ self, unit=pyarrow_type.unit, tz=pyarrow_type.tz or options.local_timezone
600
+ )
601
+
602
+ @classmethod
603
+ def construct_array_type(cls):
604
+ return FakeDatetimeTZExtensionArray