maxframe 2.0.0b2__cp37-cp37m-win32.whl → 2.3.0rc1__cp37-cp37m-win32.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of maxframe might be problematic. Click here for more details.

Files changed (443) hide show
  1. maxframe/__init__.py +1 -0
  2. maxframe/_utils.cp37-win32.pyd +0 -0
  3. maxframe/_utils.pyx +14 -1
  4. maxframe/codegen/core.py +9 -8
  5. maxframe/codegen/spe/core.py +1 -1
  6. maxframe/codegen/spe/dataframe/__init__.py +1 -0
  7. maxframe/codegen/spe/dataframe/accessors/base.py +18 -0
  8. maxframe/codegen/spe/dataframe/accessors/dict_.py +25 -130
  9. maxframe/codegen/spe/dataframe/accessors/list_.py +12 -48
  10. maxframe/codegen/spe/dataframe/accessors/struct_.py +28 -0
  11. maxframe/codegen/spe/dataframe/arithmetic.py +7 -2
  12. maxframe/codegen/spe/dataframe/groupby.py +88 -0
  13. maxframe/codegen/spe/dataframe/indexing.py +99 -4
  14. maxframe/codegen/spe/dataframe/merge.py +38 -1
  15. maxframe/codegen/spe/dataframe/misc.py +11 -33
  16. maxframe/codegen/spe/dataframe/reduction.py +32 -9
  17. maxframe/codegen/spe/dataframe/reshape.py +46 -0
  18. maxframe/codegen/spe/dataframe/sort.py +39 -18
  19. maxframe/codegen/spe/dataframe/tests/accessors/test_dict.py +9 -15
  20. maxframe/codegen/spe/dataframe/tests/accessors/test_list.py +4 -7
  21. maxframe/codegen/spe/dataframe/tests/accessors/test_struct.py +75 -0
  22. maxframe/codegen/spe/dataframe/tests/indexing/test_iloc.py +20 -1
  23. maxframe/codegen/spe/dataframe/tests/indexing/test_loc.py +35 -0
  24. maxframe/codegen/spe/dataframe/tests/misc/test_misc.py +0 -32
  25. maxframe/codegen/spe/dataframe/tests/test_groupby.py +81 -18
  26. maxframe/codegen/spe/dataframe/tests/test_merge.py +27 -1
  27. maxframe/codegen/spe/dataframe/tests/test_reduction.py +13 -0
  28. maxframe/codegen/spe/dataframe/tests/test_reshape.py +79 -0
  29. maxframe/codegen/spe/dataframe/tests/test_sort.py +20 -0
  30. maxframe/codegen/spe/dataframe/tseries.py +9 -0
  31. maxframe/codegen/spe/learn/contrib/lightgbm.py +4 -3
  32. maxframe/codegen/spe/learn/contrib/tests/test_xgboost.py +2 -1
  33. maxframe/codegen/spe/learn/metrics/__init__.py +1 -1
  34. maxframe/codegen/spe/learn/metrics/_ranking.py +76 -0
  35. maxframe/codegen/spe/learn/metrics/pairwise.py +51 -0
  36. maxframe/codegen/spe/learn/metrics/tests/test_pairwise.py +36 -0
  37. maxframe/codegen/spe/learn/metrics/tests/test_ranking.py +59 -0
  38. maxframe/codegen/spe/tensor/__init__.py +3 -0
  39. maxframe/codegen/spe/tensor/datasource.py +1 -0
  40. maxframe/codegen/spe/tensor/fft.py +74 -0
  41. maxframe/codegen/spe/tensor/linalg.py +29 -2
  42. maxframe/codegen/spe/tensor/misc.py +79 -25
  43. maxframe/codegen/spe/tensor/spatial.py +45 -0
  44. maxframe/codegen/spe/tensor/statistics.py +44 -0
  45. maxframe/codegen/spe/tensor/tests/test_fft.py +64 -0
  46. maxframe/codegen/spe/tensor/tests/test_linalg.py +15 -1
  47. maxframe/codegen/spe/tensor/tests/test_misc.py +52 -2
  48. maxframe/codegen/spe/tensor/tests/test_spatial.py +33 -0
  49. maxframe/codegen/spe/tensor/tests/test_statistics.py +15 -1
  50. maxframe/codegen/spe/tests/test_spe_codegen.py +6 -12
  51. maxframe/codegen/spe/utils.py +2 -0
  52. maxframe/config/config.py +73 -9
  53. maxframe/config/tests/test_validators.py +13 -1
  54. maxframe/config/validators.py +49 -0
  55. maxframe/conftest.py +54 -17
  56. maxframe/core/accessor.py +2 -2
  57. maxframe/core/base.py +2 -1
  58. maxframe/core/entity/core.py +5 -0
  59. maxframe/core/entity/tileables.py +3 -1
  60. maxframe/core/graph/core.cp37-win32.pyd +0 -0
  61. maxframe/core/graph/entity.py +8 -3
  62. maxframe/core/mode.py +6 -1
  63. maxframe/core/operator/base.py +9 -2
  64. maxframe/core/operator/core.py +10 -2
  65. maxframe/core/operator/utils.py +13 -0
  66. maxframe/dataframe/__init__.py +12 -5
  67. maxframe/dataframe/accessors/__init__.py +1 -1
  68. maxframe/dataframe/accessors/compat.py +45 -0
  69. maxframe/dataframe/accessors/datetime_/__init__.py +4 -1
  70. maxframe/dataframe/accessors/dict_/contains.py +7 -16
  71. maxframe/dataframe/accessors/dict_/core.py +48 -0
  72. maxframe/dataframe/accessors/dict_/getitem.py +17 -21
  73. maxframe/dataframe/accessors/dict_/length.py +7 -16
  74. maxframe/dataframe/accessors/dict_/remove.py +6 -18
  75. maxframe/dataframe/accessors/dict_/setitem.py +8 -18
  76. maxframe/dataframe/accessors/dict_/tests/test_dict_accessor.py +62 -22
  77. maxframe/dataframe/accessors/list_/__init__.py +2 -2
  78. maxframe/dataframe/accessors/list_/core.py +48 -0
  79. maxframe/dataframe/accessors/list_/getitem.py +12 -19
  80. maxframe/dataframe/accessors/list_/length.py +7 -16
  81. maxframe/dataframe/accessors/list_/tests/test_list_accessor.py +11 -9
  82. maxframe/dataframe/accessors/string_/__init__.py +4 -1
  83. maxframe/dataframe/accessors/struct_/__init__.py +37 -0
  84. maxframe/dataframe/accessors/struct_/accessor.py +39 -0
  85. maxframe/dataframe/accessors/struct_/core.py +43 -0
  86. maxframe/dataframe/accessors/struct_/dtypes.py +53 -0
  87. maxframe/dataframe/accessors/struct_/field.py +123 -0
  88. maxframe/dataframe/accessors/struct_/tests/__init__.py +13 -0
  89. maxframe/dataframe/accessors/struct_/tests/test_struct_accessor.py +91 -0
  90. maxframe/dataframe/arithmetic/__init__.py +18 -4
  91. maxframe/dataframe/arithmetic/between.py +106 -0
  92. maxframe/dataframe/arithmetic/dot.py +237 -0
  93. maxframe/dataframe/arithmetic/maximum.py +33 -0
  94. maxframe/dataframe/arithmetic/minimum.py +33 -0
  95. maxframe/dataframe/arithmetic/{around.py → round.py} +11 -7
  96. maxframe/dataframe/core.py +161 -224
  97. maxframe/dataframe/datasource/__init__.py +18 -0
  98. maxframe/dataframe/datasource/core.py +6 -0
  99. maxframe/dataframe/datasource/direct.py +57 -0
  100. maxframe/dataframe/datasource/from_dict.py +124 -0
  101. maxframe/dataframe/datasource/from_index.py +1 -1
  102. maxframe/dataframe/datasource/from_records.py +77 -0
  103. maxframe/dataframe/datasource/from_tensor.py +109 -41
  104. maxframe/dataframe/datasource/read_csv.py +21 -14
  105. maxframe/dataframe/datasource/read_odps_query.py +29 -6
  106. maxframe/dataframe/datasource/read_odps_table.py +32 -10
  107. maxframe/dataframe/datasource/read_parquet.py +38 -39
  108. maxframe/dataframe/datasource/tests/test_datasource.py +37 -0
  109. maxframe/dataframe/datastore/__init__.py +11 -1
  110. maxframe/dataframe/datastore/direct.py +268 -0
  111. maxframe/dataframe/datastore/to_csv.py +29 -41
  112. maxframe/dataframe/datastore/to_odps.py +36 -4
  113. maxframe/dataframe/extensions/__init__.py +20 -4
  114. maxframe/dataframe/extensions/apply_chunk.py +32 -6
  115. maxframe/dataframe/extensions/cartesian_chunk.py +153 -0
  116. maxframe/dataframe/extensions/collect_kv.py +126 -0
  117. maxframe/dataframe/extensions/extract_kv.py +177 -0
  118. maxframe/dataframe/extensions/flatjson.py +2 -1
  119. maxframe/dataframe/extensions/map_reduce.py +263 -0
  120. maxframe/dataframe/extensions/rebalance.py +62 -0
  121. maxframe/dataframe/extensions/tests/test_apply_chunk.py +9 -2
  122. maxframe/dataframe/extensions/tests/test_extensions.py +54 -0
  123. maxframe/dataframe/extensions/tests/test_map_reduce.py +135 -0
  124. maxframe/dataframe/groupby/__init__.py +17 -2
  125. maxframe/dataframe/groupby/aggregation.py +86 -49
  126. maxframe/dataframe/groupby/apply.py +1 -1
  127. maxframe/dataframe/groupby/apply_chunk.py +19 -5
  128. maxframe/dataframe/groupby/core.py +116 -16
  129. maxframe/dataframe/groupby/cum.py +4 -25
  130. maxframe/dataframe/groupby/expanding.py +264 -0
  131. maxframe/dataframe/groupby/fill.py +1 -1
  132. maxframe/dataframe/groupby/getitem.py +12 -5
  133. maxframe/dataframe/groupby/head.py +11 -1
  134. maxframe/dataframe/groupby/rank.py +136 -0
  135. maxframe/dataframe/groupby/rolling.py +206 -0
  136. maxframe/dataframe/groupby/shift.py +114 -0
  137. maxframe/dataframe/groupby/tests/test_groupby.py +0 -5
  138. maxframe/dataframe/indexing/__init__.py +22 -2
  139. maxframe/dataframe/indexing/droplevel.py +195 -0
  140. maxframe/dataframe/indexing/filter.py +169 -0
  141. maxframe/dataframe/indexing/get_level_values.py +76 -0
  142. maxframe/dataframe/indexing/iat.py +45 -0
  143. maxframe/dataframe/indexing/iloc.py +152 -12
  144. maxframe/dataframe/indexing/insert.py +46 -18
  145. maxframe/dataframe/indexing/loc.py +287 -7
  146. maxframe/dataframe/indexing/reindex.py +14 -5
  147. maxframe/dataframe/indexing/rename.py +6 -0
  148. maxframe/dataframe/indexing/rename_axis.py +2 -2
  149. maxframe/dataframe/indexing/reorder_levels.py +143 -0
  150. maxframe/dataframe/indexing/reset_index.py +33 -6
  151. maxframe/dataframe/indexing/sample.py +8 -0
  152. maxframe/dataframe/indexing/setitem.py +3 -3
  153. maxframe/dataframe/indexing/swaplevel.py +185 -0
  154. maxframe/dataframe/indexing/take.py +99 -0
  155. maxframe/dataframe/indexing/truncate.py +140 -0
  156. maxframe/dataframe/indexing/where.py +0 -11
  157. maxframe/dataframe/indexing/xs.py +148 -0
  158. maxframe/dataframe/merge/__init__.py +15 -1
  159. maxframe/dataframe/merge/append.py +97 -98
  160. maxframe/dataframe/merge/combine.py +244 -0
  161. maxframe/dataframe/merge/combine_first.py +120 -0
  162. maxframe/dataframe/merge/compare.py +387 -0
  163. maxframe/dataframe/merge/concat.py +183 -0
  164. maxframe/dataframe/merge/update.py +271 -0
  165. maxframe/dataframe/misc/__init__.py +28 -11
  166. maxframe/dataframe/misc/_duplicate.py +10 -4
  167. maxframe/dataframe/misc/apply.py +1 -1
  168. maxframe/dataframe/misc/check_unique.py +82 -0
  169. maxframe/dataframe/misc/clip.py +145 -0
  170. maxframe/dataframe/misc/describe.py +175 -9
  171. maxframe/dataframe/misc/drop.py +31 -0
  172. maxframe/dataframe/misc/drop_duplicates.py +2 -2
  173. maxframe/dataframe/misc/duplicated.py +2 -2
  174. maxframe/dataframe/misc/get_dummies.py +5 -1
  175. maxframe/dataframe/misc/infer_dtypes.py +251 -0
  176. maxframe/dataframe/misc/isin.py +2 -2
  177. maxframe/dataframe/misc/map.py +125 -18
  178. maxframe/dataframe/misc/repeat.py +159 -0
  179. maxframe/dataframe/misc/tests/test_misc.py +48 -3
  180. maxframe/dataframe/misc/to_numeric.py +3 -0
  181. maxframe/dataframe/misc/transform.py +12 -5
  182. maxframe/dataframe/misc/transpose.py +13 -1
  183. maxframe/dataframe/misc/valid_index.py +115 -0
  184. maxframe/dataframe/misc/value_counts.py +38 -4
  185. maxframe/dataframe/missing/checkna.py +14 -6
  186. maxframe/dataframe/missing/dropna.py +5 -0
  187. maxframe/dataframe/missing/fillna.py +1 -1
  188. maxframe/dataframe/missing/replace.py +7 -4
  189. maxframe/dataframe/reduction/__init__.py +35 -16
  190. maxframe/dataframe/reduction/aggregation.py +43 -14
  191. maxframe/dataframe/reduction/all.py +2 -2
  192. maxframe/dataframe/reduction/any.py +2 -2
  193. maxframe/dataframe/reduction/argmax.py +103 -0
  194. maxframe/dataframe/reduction/argmin.py +103 -0
  195. maxframe/dataframe/reduction/core.py +80 -24
  196. maxframe/dataframe/reduction/count.py +13 -9
  197. maxframe/dataframe/reduction/cov.py +166 -0
  198. maxframe/dataframe/reduction/cummax.py +2 -2
  199. maxframe/dataframe/reduction/cummin.py +2 -2
  200. maxframe/dataframe/reduction/cumprod.py +2 -2
  201. maxframe/dataframe/reduction/cumsum.py +2 -2
  202. maxframe/dataframe/reduction/custom_reduction.py +2 -2
  203. maxframe/dataframe/reduction/idxmax.py +185 -0
  204. maxframe/dataframe/reduction/idxmin.py +185 -0
  205. maxframe/dataframe/reduction/kurtosis.py +37 -30
  206. maxframe/dataframe/reduction/max.py +2 -2
  207. maxframe/dataframe/reduction/mean.py +9 -7
  208. maxframe/dataframe/reduction/median.py +2 -2
  209. maxframe/dataframe/reduction/min.py +2 -2
  210. maxframe/dataframe/reduction/mode.py +144 -0
  211. maxframe/dataframe/reduction/nunique.py +19 -11
  212. maxframe/dataframe/reduction/prod.py +18 -13
  213. maxframe/dataframe/reduction/reduction_size.py +2 -2
  214. maxframe/dataframe/reduction/sem.py +13 -9
  215. maxframe/dataframe/reduction/skew.py +31 -27
  216. maxframe/dataframe/reduction/str_concat.py +10 -7
  217. maxframe/dataframe/reduction/sum.py +18 -14
  218. maxframe/dataframe/reduction/tests/test_reduction.py +12 -0
  219. maxframe/dataframe/reduction/unique.py +20 -3
  220. maxframe/dataframe/reduction/var.py +16 -12
  221. maxframe/dataframe/reshape/__init__.py +38 -0
  222. maxframe/dataframe/{misc → reshape}/pivot.py +1 -0
  223. maxframe/dataframe/{misc → reshape}/pivot_table.py +1 -0
  224. maxframe/dataframe/reshape/unstack.py +114 -0
  225. maxframe/dataframe/sort/__init__.py +16 -1
  226. maxframe/dataframe/sort/argsort.py +68 -0
  227. maxframe/dataframe/sort/core.py +2 -1
  228. maxframe/dataframe/sort/nlargest.py +238 -0
  229. maxframe/dataframe/sort/nsmallest.py +228 -0
  230. maxframe/dataframe/sort/rank.py +147 -0
  231. maxframe/dataframe/statistics/__init__.py +3 -3
  232. maxframe/dataframe/statistics/corr.py +1 -0
  233. maxframe/dataframe/statistics/quantile.py +2 -2
  234. maxframe/dataframe/tests/test_typing.py +104 -0
  235. maxframe/dataframe/tests/test_utils.py +66 -2
  236. maxframe/dataframe/tseries/__init__.py +19 -0
  237. maxframe/dataframe/tseries/at_time.py +61 -0
  238. maxframe/dataframe/tseries/between_time.py +122 -0
  239. maxframe/dataframe/typing_.py +185 -0
  240. maxframe/dataframe/utils.py +125 -52
  241. maxframe/dataframe/window/aggregation.py +8 -4
  242. maxframe/dataframe/window/core.py +14 -1
  243. maxframe/dataframe/window/ewm.py +1 -3
  244. maxframe/dataframe/window/expanding.py +37 -35
  245. maxframe/dataframe/window/rolling.py +49 -39
  246. maxframe/dataframe/window/tests/test_expanding.py +1 -7
  247. maxframe/dataframe/window/tests/test_rolling.py +1 -1
  248. maxframe/env.py +7 -4
  249. maxframe/errors.py +2 -2
  250. maxframe/io/odpsio/schema.py +9 -3
  251. maxframe/io/odpsio/tableio.py +7 -2
  252. maxframe/io/odpsio/tests/test_schema.py +198 -83
  253. maxframe/learn/__init__.py +10 -2
  254. maxframe/learn/cluster/__init__.py +15 -0
  255. maxframe/learn/cluster/_kmeans.py +782 -0
  256. maxframe/learn/contrib/llm/core.py +18 -7
  257. maxframe/learn/contrib/llm/deploy/__init__.py +13 -0
  258. maxframe/learn/contrib/llm/deploy/config.py +221 -0
  259. maxframe/learn/contrib/llm/deploy/core.py +247 -0
  260. maxframe/learn/contrib/llm/deploy/framework.py +35 -0
  261. maxframe/learn/contrib/llm/deploy/loader.py +360 -0
  262. maxframe/learn/contrib/llm/deploy/tests/__init__.py +13 -0
  263. maxframe/learn/contrib/llm/deploy/tests/test_register_models.py +359 -0
  264. maxframe/learn/contrib/llm/models/__init__.py +1 -0
  265. maxframe/learn/contrib/llm/models/dashscope.py +12 -6
  266. maxframe/learn/contrib/llm/models/managed.py +76 -11
  267. maxframe/learn/contrib/llm/models/openai.py +72 -0
  268. maxframe/learn/contrib/llm/tests/__init__.py +13 -0
  269. maxframe/learn/contrib/llm/tests/test_core.py +34 -0
  270. maxframe/learn/contrib/llm/tests/test_openai.py +187 -0
  271. maxframe/learn/contrib/llm/tests/test_text_gen.py +155 -0
  272. maxframe/learn/contrib/llm/text.py +348 -42
  273. maxframe/learn/contrib/models.py +4 -1
  274. maxframe/learn/contrib/xgboost/classifier.py +2 -0
  275. maxframe/learn/contrib/xgboost/core.py +113 -4
  276. maxframe/learn/contrib/xgboost/predict.py +4 -2
  277. maxframe/learn/contrib/xgboost/regressor.py +5 -0
  278. maxframe/learn/contrib/xgboost/train.py +7 -2
  279. maxframe/learn/core.py +66 -0
  280. maxframe/learn/linear_model/_base.py +58 -1
  281. maxframe/learn/linear_model/_lin_reg.py +1 -1
  282. maxframe/learn/metrics/__init__.py +6 -0
  283. maxframe/learn/metrics/_classification.py +145 -0
  284. maxframe/learn/metrics/_ranking.py +477 -0
  285. maxframe/learn/metrics/_scorer.py +60 -0
  286. maxframe/learn/metrics/pairwise/__init__.py +21 -0
  287. maxframe/learn/metrics/pairwise/core.py +77 -0
  288. maxframe/learn/metrics/pairwise/cosine.py +115 -0
  289. maxframe/learn/metrics/pairwise/euclidean.py +176 -0
  290. maxframe/learn/metrics/pairwise/haversine.py +96 -0
  291. maxframe/learn/metrics/pairwise/manhattan.py +80 -0
  292. maxframe/learn/metrics/pairwise/pairwise.py +127 -0
  293. maxframe/learn/metrics/pairwise/pairwise_distances_topk.py +121 -0
  294. maxframe/learn/metrics/pairwise/rbf_kernel.py +51 -0
  295. maxframe/learn/metrics/tests/__init__.py +13 -0
  296. maxframe/learn/metrics/tests/test_scorer.py +26 -0
  297. maxframe/learn/preprocessing/_data/min_max_scaler.py +34 -23
  298. maxframe/learn/preprocessing/_data/standard_scaler.py +34 -25
  299. maxframe/learn/utils/__init__.py +2 -1
  300. maxframe/learn/utils/checks.py +1 -2
  301. maxframe/learn/utils/core.py +59 -0
  302. maxframe/learn/utils/extmath.py +79 -9
  303. maxframe/learn/utils/odpsio.py +262 -0
  304. maxframe/learn/utils/validation.py +2 -2
  305. maxframe/lib/compat.py +40 -0
  306. maxframe/lib/dtypes_extension/__init__.py +16 -1
  307. maxframe/lib/dtypes_extension/_fake_arrow_dtype.py +604 -0
  308. maxframe/lib/dtypes_extension/blob.py +304 -0
  309. maxframe/lib/dtypes_extension/dtypes.py +40 -0
  310. maxframe/lib/dtypes_extension/tests/test_blob.py +88 -0
  311. maxframe/lib/dtypes_extension/tests/test_dtypes.py +16 -1
  312. maxframe/lib/dtypes_extension/tests/test_fake_arrow_dtype.py +75 -0
  313. maxframe/lib/filesystem/_oss_lib/common.py +124 -50
  314. maxframe/lib/filesystem/_oss_lib/glob.py +1 -1
  315. maxframe/lib/filesystem/_oss_lib/handle.py +21 -25
  316. maxframe/lib/filesystem/base.py +1 -1
  317. maxframe/lib/filesystem/core.py +1 -1
  318. maxframe/lib/filesystem/oss.py +115 -46
  319. maxframe/lib/filesystem/tests/test_oss.py +74 -36
  320. maxframe/lib/mmh3.cp37-win32.pyd +0 -0
  321. maxframe/lib/wrapped_pickle.py +10 -0
  322. maxframe/opcodes.py +41 -15
  323. maxframe/protocol.py +12 -0
  324. maxframe/remote/core.py +4 -0
  325. maxframe/serialization/__init__.py +11 -2
  326. maxframe/serialization/arrow.py +38 -13
  327. maxframe/serialization/blob.py +32 -0
  328. maxframe/serialization/core.cp37-win32.pyd +0 -0
  329. maxframe/serialization/core.pyx +39 -1
  330. maxframe/serialization/exception.py +2 -4
  331. maxframe/serialization/numpy.py +11 -0
  332. maxframe/serialization/pandas.py +46 -9
  333. maxframe/serialization/serializables/core.py +2 -2
  334. maxframe/serialization/tests/test_serial.py +31 -4
  335. maxframe/tensor/__init__.py +38 -8
  336. maxframe/tensor/arithmetic/__init__.py +19 -10
  337. maxframe/tensor/arithmetic/core.py +2 -2
  338. maxframe/tensor/arithmetic/iscomplexobj.py +53 -0
  339. maxframe/tensor/arithmetic/tests/test_arithmetic.py +6 -9
  340. maxframe/tensor/core.py +6 -2
  341. maxframe/tensor/datasource/tests/test_datasource.py +2 -1
  342. maxframe/tensor/extensions/__init__.py +2 -0
  343. maxframe/tensor/extensions/apply_chunk.py +3 -3
  344. maxframe/tensor/extensions/rebalance.py +65 -0
  345. maxframe/tensor/fft/__init__.py +32 -0
  346. maxframe/tensor/fft/core.py +168 -0
  347. maxframe/tensor/fft/fft.py +112 -0
  348. maxframe/tensor/fft/fft2.py +118 -0
  349. maxframe/tensor/fft/fftfreq.py +80 -0
  350. maxframe/tensor/fft/fftn.py +123 -0
  351. maxframe/tensor/fft/fftshift.py +79 -0
  352. maxframe/tensor/fft/hfft.py +112 -0
  353. maxframe/tensor/fft/ifft.py +114 -0
  354. maxframe/tensor/fft/ifft2.py +115 -0
  355. maxframe/tensor/fft/ifftn.py +123 -0
  356. maxframe/tensor/fft/ifftshift.py +73 -0
  357. maxframe/tensor/fft/ihfft.py +93 -0
  358. maxframe/tensor/fft/irfft.py +118 -0
  359. maxframe/tensor/fft/irfft2.py +62 -0
  360. maxframe/tensor/fft/irfftn.py +114 -0
  361. maxframe/tensor/fft/rfft.py +116 -0
  362. maxframe/tensor/fft/rfft2.py +63 -0
  363. maxframe/tensor/fft/rfftfreq.py +87 -0
  364. maxframe/tensor/fft/rfftn.py +113 -0
  365. maxframe/tensor/indexing/fill_diagonal.py +1 -7
  366. maxframe/tensor/linalg/__init__.py +7 -0
  367. maxframe/tensor/linalg/_einsumfunc.py +1025 -0
  368. maxframe/tensor/linalg/cholesky.py +117 -0
  369. maxframe/tensor/linalg/einsum.py +339 -0
  370. maxframe/tensor/linalg/lstsq.py +100 -0
  371. maxframe/tensor/linalg/matrix_norm.py +75 -0
  372. maxframe/tensor/linalg/norm.py +249 -0
  373. maxframe/tensor/linalg/solve.py +72 -0
  374. maxframe/tensor/linalg/solve_triangular.py +2 -2
  375. maxframe/tensor/linalg/vector_norm.py +113 -0
  376. maxframe/tensor/misc/__init__.py +24 -1
  377. maxframe/tensor/misc/argwhere.py +72 -0
  378. maxframe/tensor/misc/array_split.py +46 -0
  379. maxframe/tensor/misc/broadcast_arrays.py +57 -0
  380. maxframe/tensor/misc/copyto.py +130 -0
  381. maxframe/tensor/misc/delete.py +104 -0
  382. maxframe/tensor/misc/dsplit.py +68 -0
  383. maxframe/tensor/misc/ediff1d.py +74 -0
  384. maxframe/tensor/misc/expand_dims.py +85 -0
  385. maxframe/tensor/misc/flip.py +90 -0
  386. maxframe/tensor/misc/fliplr.py +64 -0
  387. maxframe/tensor/misc/flipud.py +68 -0
  388. maxframe/tensor/misc/hsplit.py +85 -0
  389. maxframe/tensor/misc/insert.py +139 -0
  390. maxframe/tensor/misc/moveaxis.py +83 -0
  391. maxframe/tensor/misc/result_type.py +88 -0
  392. maxframe/tensor/misc/roll.py +124 -0
  393. maxframe/tensor/misc/rollaxis.py +77 -0
  394. maxframe/tensor/misc/shape.py +89 -0
  395. maxframe/tensor/misc/split.py +190 -0
  396. maxframe/tensor/misc/tile.py +109 -0
  397. maxframe/tensor/misc/vsplit.py +74 -0
  398. maxframe/tensor/reduction/array_equal.py +2 -1
  399. maxframe/tensor/sort/__init__.py +2 -0
  400. maxframe/tensor/sort/argpartition.py +98 -0
  401. maxframe/tensor/sort/partition.py +228 -0
  402. maxframe/tensor/spatial/__init__.py +15 -0
  403. maxframe/tensor/spatial/distance/__init__.py +17 -0
  404. maxframe/tensor/spatial/distance/cdist.py +421 -0
  405. maxframe/tensor/spatial/distance/pdist.py +398 -0
  406. maxframe/tensor/spatial/distance/squareform.py +153 -0
  407. maxframe/tensor/special/__init__.py +159 -21
  408. maxframe/tensor/special/airy.py +55 -0
  409. maxframe/tensor/special/bessel.py +199 -0
  410. maxframe/tensor/special/core.py +65 -4
  411. maxframe/tensor/special/ellip_func_integrals.py +155 -0
  412. maxframe/tensor/special/ellip_harm.py +55 -0
  413. maxframe/tensor/special/err_fresnel.py +223 -0
  414. maxframe/tensor/special/gamma_funcs.py +303 -0
  415. maxframe/tensor/special/hypergeometric_funcs.py +69 -0
  416. maxframe/tensor/special/info_theory.py +189 -0
  417. maxframe/tensor/special/misc.py +21 -0
  418. maxframe/tensor/statistics/__init__.py +6 -0
  419. maxframe/tensor/statistics/corrcoef.py +77 -0
  420. maxframe/tensor/statistics/cov.py +222 -0
  421. maxframe/tensor/statistics/digitize.py +126 -0
  422. maxframe/tensor/statistics/histogram.py +520 -0
  423. maxframe/tensor/statistics/median.py +85 -0
  424. maxframe/tensor/statistics/ptp.py +89 -0
  425. maxframe/tensor/utils.py +3 -3
  426. maxframe/tests/test_udf.py +61 -0
  427. maxframe/tests/test_utils.py +51 -6
  428. maxframe/tests/utils.py +0 -2
  429. maxframe/typing_.py +2 -0
  430. maxframe/udf.py +130 -9
  431. maxframe/utils.py +254 -27
  432. {maxframe-2.0.0b2.dist-info → maxframe-2.3.0rc1.dist-info}/METADATA +3 -3
  433. {maxframe-2.0.0b2.dist-info → maxframe-2.3.0rc1.dist-info}/RECORD +442 -264
  434. maxframe_client/fetcher.py +35 -4
  435. maxframe_client/session/odps.py +7 -2
  436. maxframe_client/session/task.py +8 -1
  437. maxframe_client/tests/test_fetcher.py +76 -3
  438. maxframe_client/tests/test_session.py +28 -1
  439. maxframe/dataframe/arrays.py +0 -864
  440. /maxframe/dataframe/{misc → reshape}/melt.py +0 -0
  441. /maxframe/dataframe/{misc → reshape}/stack.py +0 -0
  442. {maxframe-2.0.0b2.dist-info → maxframe-2.3.0rc1.dist-info}/WHEEL +0 -0
  443. {maxframe-2.0.0b2.dist-info → maxframe-2.3.0rc1.dist-info}/top_level.txt +0 -0
@@ -22,7 +22,7 @@ from ...core import EntityData
22
22
  from ...serialization.serializables import AnyField, FieldTypes, KeyField, ListField
23
23
  from ..core import SERIES_TYPE
24
24
  from ..operators import DataFrameOperator, DataFrameOperatorMixin
25
- from ..utils import build_empty_df, parse_index
25
+ from ..utils import build_df, parse_index
26
26
 
27
27
 
28
28
  class DataFrameDescribe(DataFrameOperator, DataFrameOperatorMixin):
@@ -43,8 +43,6 @@ class DataFrameDescribe(DataFrameOperator, DataFrameOperatorMixin):
43
43
 
44
44
  def __call__(self, df_or_series):
45
45
  if isinstance(df_or_series, SERIES_TYPE):
46
- if not np.issubdtype(df_or_series.dtype, np.number):
47
- raise NotImplementedError("non-numeric type is not supported for now")
48
46
  test_series = pd.Series([], dtype=df_or_series.dtype).describe(
49
47
  percentiles=self.percentiles,
50
48
  include=self.include,
@@ -57,7 +55,7 @@ class DataFrameDescribe(DataFrameOperator, DataFrameOperatorMixin):
57
55
  index_value=parse_index(test_series.index, store_data=True),
58
56
  )
59
57
  else:
60
- test_inp_df = build_empty_df(df_or_series.dtypes)
58
+ test_inp_df = build_df(df_or_series)
61
59
  test_df = test_inp_df.describe(
62
60
  percentiles=self.percentiles,
63
61
  include=self.include,
@@ -69,11 +67,6 @@ class DataFrameDescribe(DataFrameOperator, DataFrameOperatorMixin):
69
67
  # MaxFrame DataFrame allows user to specify percentiles=False
70
68
  # to skip computation about percentiles
71
69
  test_df.drop(["50%"], axis=0, inplace=True)
72
- for dtype in test_df.dtypes:
73
- if not np.issubdtype(dtype, np.number):
74
- raise NotImplementedError(
75
- "non-numeric type is not supported for now"
76
- )
77
70
  return self.new_dataframe(
78
71
  [df_or_series],
79
72
  shape=test_df.shape,
@@ -84,6 +77,179 @@ class DataFrameDescribe(DataFrameOperator, DataFrameOperatorMixin):
84
77
 
85
78
 
86
79
  def describe(df_or_series, percentiles=None, include=None, exclude=None):
80
+ """
81
+ Generate descriptive statistics.
82
+
83
+ Descriptive statistics include those that summarize the central
84
+ tendency, dispersion and shape of a
85
+ dataset's distribution, excluding ``NaN`` values.
86
+
87
+ Analyzes both numeric and object series, as well
88
+ as ``DataFrame`` column sets of mixed data types. The output
89
+ will vary depending on what is provided. Refer to the notes
90
+ below for more detail.
91
+
92
+ Parameters
93
+ ----------
94
+ percentiles : list-like of numbers, optional
95
+ The percentiles to include in the output. All should
96
+ fall between 0 and 1. The default is
97
+ ``[.25, .5, .75]``, which returns the 25th, 50th, and
98
+ 75th percentiles.
99
+ include : 'all', list-like of dtypes or None (default), optional
100
+ A white list of data types to include in the result. Ignored
101
+ for ``Series``. Here are the options:
102
+
103
+ - 'all' : All columns of the input will be included in the output.
104
+ - A list-like of dtypes : Limits the results to the
105
+ provided data types.
106
+ To limit the result to numeric types submit
107
+ ``numpy.number``. To limit it instead to object columns submit
108
+ the ``numpy.object`` data type. Strings
109
+ can also be used in the style of
110
+ ``select_dtypes`` (e.g. ``df.describe(include=['O'])``).
111
+ - None (default) : The result will include all numeric columns.
112
+ exclude : list-like of dtypes or None (default), optional,
113
+ A black list of data types to omit from the result. Ignored
114
+ for ``Series``. Here are the options:
115
+
116
+ - A list-like of dtypes : Excludes the provided data types
117
+ from the result. To exclude numeric types submit
118
+ ``numpy.number``. To exclude object columns submit the data
119
+ type ``numpy.object``. Strings can also be used in the style of
120
+ ``select_dtypes`` (e.g. ``df.describe(exclude=['O'])``).
121
+ - None (default) : The result will exclude nothing.
122
+
123
+ Returns
124
+ -------
125
+ Series or DataFrame
126
+ Summary statistics of the Series or Dataframe provided.
127
+
128
+ See Also
129
+ --------
130
+ DataFrame.count: Count number of non-NA/null observations.
131
+ DataFrame.max: Maximum of the values in the object.
132
+ DataFrame.min: Minimum of the values in the object.
133
+ DataFrame.mean: Mean of the values.
134
+ DataFrame.std: Standard deviation of the observations.
135
+ DataFrame.select_dtypes: Subset of a DataFrame including/excluding
136
+ columns based on their dtype.
137
+
138
+ Notes
139
+ -----
140
+ For numeric data, the result's index will include ``count``,
141
+ ``mean``, ``std``, ``min``, ``max`` as well as lower, ``50`` and
142
+ upper percentiles. By default the lower percentile is ``25`` and the
143
+ upper percentile is ``75``. The ``50`` percentile is the
144
+ same as the median.
145
+
146
+ For object data (e.g. strings or timestamps), the result's index
147
+ will include ``count``, ``unique``, ``top``, and ``freq``. The ``top``
148
+ is the most common value. The ``freq`` is the most common value's
149
+ frequency. Timestamps also include the ``first`` and ``last`` items.
150
+
151
+ If multiple object values have the highest count, then the
152
+ ``count`` and ``top`` results will be arbitrarily chosen from
153
+ among those with the highest count.
154
+
155
+ For mixed data types provided via a ``DataFrame``, the default is to
156
+ return only an analysis of numeric columns. If the dataframe consists
157
+ only of object data without any numeric columns, the default is to
158
+ return an analysis of object columns. If ``include='all'`` is provided
159
+ as an option, the result will include a union of attributes of each type.
160
+
161
+ The `include` and `exclude` parameters can be used to limit
162
+ which columns in a ``DataFrame`` are analyzed for the output.
163
+ The parameters are ignored when analyzing a ``Series``.
164
+
165
+ Examples
166
+ --------
167
+ Describing a numeric ``Series``.
168
+
169
+ >>> import maxframe.tensor as mt
170
+ >>> import maxframe.dataframe as md
171
+ >>> s = md.Series([1, 2, 3])
172
+ >>> s.describe().execute()
173
+ count 3.0
174
+ mean 2.0
175
+ std 1.0
176
+ min 1.0
177
+ 25% 1.5
178
+ 50% 2.0
179
+ 75% 2.5
180
+ max 3.0
181
+ dtype: float64
182
+
183
+ Describing a ``DataFrame``. By default only numeric fields
184
+ are returned.
185
+
186
+ >>> df = md.DataFrame({'numeric': [1, 2, 3],
187
+ ... 'object': ['a', 'b', 'c']
188
+ ... })
189
+ >>> df.describe().execute()
190
+ numeric
191
+ count 3.0
192
+ mean 2.0
193
+ std 1.0
194
+ min 1.0
195
+ 25% 1.5
196
+ 50% 2.0
197
+ 75% 2.5
198
+ max 3.0
199
+
200
+ Describing all columns of a ``DataFrame`` regardless of data type.
201
+
202
+ >>> df.describe(include='all').execute() # doctest: +SKIP.execute()
203
+ numeric object
204
+ count 3.0 3
205
+ unique NaN 3
206
+ top NaN a
207
+ freq NaN 1
208
+ mean 2.0 NaN
209
+ std 1.0 NaN
210
+ min 1.0 NaN
211
+ 25% 1.5 NaN
212
+ 50% 2.0 NaN
213
+ 75% 2.5 NaN
214
+ max 3.0 NaN
215
+
216
+ Describing a column from a ``DataFrame`` by accessing it as
217
+ an attribute.
218
+
219
+ >>> df.numeric.describe().execute()
220
+ count 3.0
221
+ mean 2.0
222
+ std 1.0
223
+ min 1.0
224
+ 25% 1.5
225
+ 50% 2.0
226
+ 75% 2.5
227
+ max 3.0
228
+ Name: numeric, dtype: float64
229
+
230
+ Including only numeric columns in a ``DataFrame`` description.
231
+
232
+ >>> df.describe(include=[mt.number]).execute()
233
+ numeric
234
+ count 3.0
235
+ mean 2.0
236
+ std 1.0
237
+ min 1.0
238
+ 25% 1.5
239
+ 50% 2.0
240
+ 75% 2.5
241
+ max 3.0
242
+
243
+ Including only string columns in a ``DataFrame`` description.
244
+
245
+ >>> df.describe(include=[object]).execute() # doctest: +SKIP.execute()
246
+ object
247
+ count 3
248
+ unique 3
249
+ top a
250
+ freq 1
251
+ """
252
+ # fixme add support for categorical columns once implemented
87
253
  if percentiles is False:
88
254
  percentiles = []
89
255
  elif percentiles is None:
@@ -419,6 +419,37 @@ def series_drop(
419
419
  )
420
420
 
421
421
 
422
+ def series_pop(series, item):
423
+ """
424
+ Return item and drops from series. Raise KeyError if not found.
425
+
426
+ Parameters
427
+ ----------
428
+ item : label
429
+ Index of the element that needs to be removed.
430
+
431
+ Returns
432
+ -------
433
+ Value that is popped from series.
434
+
435
+ Examples
436
+ --------
437
+ >>> import maxframe.dataframe as md
438
+ >>> ser = md.Series([1,2,3])
439
+
440
+ >>> ser.pop(0).execute()
441
+ 1
442
+
443
+ >>> ser.execute()
444
+ 1 2
445
+ 2 3
446
+ dtype: int64
447
+ """
448
+ scalar = series.data[item]
449
+ series_drop(series, item, inplace=True)
450
+ return scalar
451
+
452
+
422
453
  def index_drop(index, labels, errors="raise"):
423
454
  """
424
455
  Make new Index with passed list of labels deleted.
@@ -19,10 +19,10 @@ from ... import opcodes
19
19
  from ...serialization.serializables import BoolField
20
20
  from ..operators import OutputType
21
21
  from ..utils import gen_unknown_index_value, parse_index
22
- from ._duplicate import DuplicateOperand, validate_subset
22
+ from ._duplicate import BaseDuplicateOp, validate_subset
23
23
 
24
24
 
25
- class DataFrameDropDuplicates(DuplicateOperand):
25
+ class DataFrameDropDuplicates(BaseDuplicateOp):
26
26
  _op_type_ = opcodes.DROP_DUPLICATES
27
27
 
28
28
  ignore_index = BoolField("ignore_index", default=True)
@@ -16,10 +16,10 @@ import numpy as np
16
16
 
17
17
  from ... import opcodes
18
18
  from ...core import OutputType
19
- from ._duplicate import DuplicateOperand, validate_subset
19
+ from ._duplicate import BaseDuplicateOp, validate_subset
20
20
 
21
21
 
22
- class DataFrameDuplicated(DuplicateOperand):
22
+ class DataFrameDuplicated(BaseDuplicateOp):
23
23
  _op_type_ = opcodes.DUPLICATED
24
24
 
25
25
  def __init__(self, output_types=None, **kw):
@@ -25,12 +25,14 @@ from ...serialization.serializables import (
25
25
  ListField,
26
26
  StringField,
27
27
  )
28
+ from ...utils import make_dtype, pd_release_version
28
29
  from ..datasource.dataframe import from_pandas as from_pandas_df
29
30
  from ..datasource.series import from_pandas as from_pandas_series
30
31
  from ..initializer import Series as asseries
31
32
  from ..operators import DataFrameOperator, DataFrameOperatorMixin
32
33
 
33
34
  _encoding_dtype_kind = ["O", "S", "U"]
35
+ _ret_uint8 = pd_release_version < (2, 0, 0)
34
36
 
35
37
 
36
38
  class DataFrameGetDummies(DataFrameOperator, DataFrameOperatorMixin):
@@ -181,7 +183,9 @@ def get_dummies(
181
183
  elif isinstance(data, pd.DataFrame):
182
184
  data = from_pandas_df(data)
183
185
 
184
- dtype = dtype if dtype is not None else np.dtype(bool)
186
+ dtype = make_dtype(
187
+ dtype if dtype is not None else np.dtype(np.uint8 if _ret_uint8 else bool)
188
+ )
185
189
 
186
190
  if prefix is not None:
187
191
  if isinstance(prefix, list):
@@ -0,0 +1,251 @@
1
+ # Copyright 1999-2025 Alibaba Group Holding Ltd.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ from ... import opcodes
16
+ from ...serialization.serializables import AnyField, StringField
17
+ from ..core import DATAFRAME_TYPE, SERIES_TYPE
18
+ from ..operators import DataFrameOperator, DataFrameOperatorMixin
19
+
20
+
21
+ class DataFrameInferDtypes(DataFrameOperator, DataFrameOperatorMixin):
22
+ _op_type_ = opcodes.DATAFRAME_INFER_DTYPES
23
+
24
+ infer_method = StringField("infer_method")
25
+ infer_kwargs = AnyField("infer_kwargs")
26
+
27
+ infer_stage = StringField("infer_stage", default=None)
28
+
29
+ def __init__(self, output_types=None, **kw):
30
+ super().__init__(_output_types=output_types, **kw)
31
+
32
+ def __call__(self, df):
33
+ if isinstance(df, DATAFRAME_TYPE):
34
+ return self.new_dataframe(
35
+ [df],
36
+ shape=df.shape,
37
+ dtypes=None,
38
+ index_value=df.index_value,
39
+ columns_value=df.columns_value,
40
+ )
41
+ else:
42
+ assert isinstance(df, SERIES_TYPE)
43
+ return self.new_series(
44
+ [df],
45
+ shape=df.shape,
46
+ dtype=None,
47
+ name=df.name,
48
+ index_value=df.index_value,
49
+ )
50
+
51
+
52
+ def convert_dtypes(
53
+ df_or_series,
54
+ infer_objects=True,
55
+ convert_string=True,
56
+ convert_integer=True,
57
+ convert_boolean=True,
58
+ convert_floating=True,
59
+ dtype_backend="numpy",
60
+ ):
61
+ """
62
+ Convert columns to best possible dtypes using dtypes supporting ``pd.NA``.
63
+
64
+ Parameters
65
+ ----------
66
+ infer_objects : bool, default True
67
+ Whether object dtypes should be converted to the best possible types.
68
+ convert_string : bool, default True
69
+ Whether object dtypes should be converted to ``StringDtype()``.
70
+ convert_integer : bool, default True
71
+ Whether, if possible, conversion can be done to integer extension types.
72
+ convert_boolean : bool, defaults True
73
+ Whether object dtypes should be converted to ``BooleanDtypes()``.
74
+ convert_floating : bool, defaults True
75
+ Whether, if possible, conversion can be done to floating extension types.
76
+ If `convert_integer` is also True, preference will be give to integer
77
+ dtypes if the floats can be faithfully casted to integers.
78
+
79
+ Returns
80
+ -------
81
+ Series or DataFrame
82
+ Copy of input object with new dtype.
83
+
84
+ See Also
85
+ --------
86
+ infer_objects : Infer dtypes of objects.
87
+ to_datetime : Convert argument to datetime.
88
+ to_timedelta : Convert argument to timedelta.
89
+ to_numeric : Convert argument to a numeric type.
90
+
91
+ Notes
92
+ -----
93
+ By default, ``convert_dtypes`` will attempt to convert a Series (or each
94
+ Series in a DataFrame) to dtypes that support ``pd.NA``. By using the options
95
+ ``convert_string``, ``convert_integer``, ``convert_boolean`` and
96
+ ``convert_boolean``, it is possible to turn off individual conversions
97
+ to ``StringDtype``, the integer extension types, ``BooleanDtype``
98
+ or floating extension types, respectively.
99
+
100
+ For object-dtyped columns, if ``infer_objects`` is ``True``, use the inference
101
+ rules as during normal Series/DataFrame construction. Then, if possible,
102
+ convert to ``StringDtype``, ``BooleanDtype`` or an appropriate integer
103
+ or floating extension type, otherwise leave as ``object``.
104
+
105
+ If the dtype is integer, convert to an appropriate integer extension type.
106
+
107
+ If the dtype is numeric, and consists of all integers, convert to an
108
+ appropriate integer extension type. Otherwise, convert to an
109
+ appropriate floating extension type.
110
+
111
+ .. versionchanged:: 1.2
112
+ Starting with pandas 1.2, this method also converts float columns
113
+ to the nullable floating extension type.
114
+
115
+ In the future, as new dtypes are added that support ``pd.NA``, the results
116
+ of this method will change to support those new dtypes.
117
+
118
+ Examples
119
+ --------
120
+ >>> import maxframe.tensor as mt
121
+ >>> import maxframe.dataframe as md
122
+ >>> df = md.DataFrame(
123
+ ... {
124
+ ... "a": md.Series([1, 2, 3], dtype=mt.dtype("int32")),
125
+ ... "b": md.Series(["x", "y", "z"], dtype=mt.dtype("O")),
126
+ ... "c": md.Series([True, False, mt.nan], dtype=mt.dtype("O")),
127
+ ... "d": md.Series(["h", "i", mt.nan], dtype=mt.dtype("O")),
128
+ ... "e": md.Series([10, mt.nan, 20], dtype=mt.dtype("float")),
129
+ ... "f": md.Series([mt.nan, 100.5, 200], dtype=mt.dtype("float")),
130
+ ... }
131
+ ... )
132
+
133
+ Start with a DataFrame with default dtypes.
134
+
135
+ >>> df.execute()
136
+ a b c d e f
137
+ 0 1 x True h 10.0 NaN
138
+ 1 2 y False i NaN 100.5
139
+ 2 3 z NaN NaN 20.0 200.0
140
+
141
+ >>> df.dtypes.execute()
142
+ a int32
143
+ b object
144
+ c object
145
+ d object
146
+ e float64
147
+ f float64
148
+ dtype: object
149
+
150
+ Convert the DataFrame to use best possible dtypes.
151
+
152
+ >>> dfn = df.convert_dtypes()
153
+ >>> dfn.execute()
154
+ a b c d e f
155
+ 0 1 x True h 10 <NA>
156
+ 1 2 y False i <NA> 100.5
157
+ 2 3 z <NA> <NA> 20 200.0
158
+
159
+ >>> dfn.dtypes.execute()
160
+ a Int32
161
+ b string
162
+ c boolean
163
+ d string
164
+ e Int64
165
+ f Float64
166
+ dtype: object
167
+
168
+ Start with a Series of strings and missing data represented by ``np.nan``.
169
+
170
+ >>> s = md.Series(["a", "b", mt.nan])
171
+ >>> s.execute()
172
+ 0 a
173
+ 1 b
174
+ 2 NaN
175
+ dtype: object
176
+
177
+ Obtain a Series with dtype ``StringDtype``.
178
+
179
+ >>> s.convert_dtypes().execute()
180
+ 0 a
181
+ 1 b
182
+ 2 <NA>
183
+ dtype: string
184
+ """
185
+ dtype_backend = "numpy" if dtype_backend == "numpy_nullable" else dtype_backend
186
+ op = DataFrameInferDtypes(
187
+ infer_method="convert_dtypes",
188
+ infer_kwargs=dict(
189
+ infer_objects=infer_objects,
190
+ convert_string=convert_string,
191
+ convert_integer=convert_integer,
192
+ convert_boolean=convert_boolean,
193
+ convert_floating=convert_floating,
194
+ dtype_backend=dtype_backend,
195
+ ),
196
+ )
197
+ return op(df_or_series)
198
+
199
+
200
+ def infer_objects(df_or_series, copy=True):
201
+ """
202
+ Attempt to infer better dtypes for object columns.
203
+
204
+ Attempts soft conversion of object-dtyped
205
+ columns, leaving non-object and unconvertible
206
+ columns unchanged. The inference rules are the
207
+ same as during normal Series/DataFrame construction.
208
+
209
+ Returns
210
+ -------
211
+ converted : same type as input object
212
+
213
+ See Also
214
+ --------
215
+ to_datetime : Convert argument to datetime.
216
+ to_timedelta : Convert argument to timedelta.
217
+ to_numeric : Convert argument to numeric type.
218
+ convert_dtypes : Convert argument to best possible dtype.
219
+
220
+ Examples
221
+ --------
222
+ >>> import maxframe.dataframe as md
223
+ >>> df = md.DataFrame({"A": ["a", 1, 2, 3]})
224
+ >>> df = df.iloc[1:]
225
+ >>> df.execute()
226
+ A
227
+ 1 1
228
+ 2 2
229
+ 3 3
230
+
231
+ >>> df.dtypes.execute()
232
+ A object
233
+ dtype: object
234
+
235
+ >>> df.infer_objects().dtypes.execute()
236
+ A int64
237
+ dtype: object
238
+ """
239
+ if (isinstance(df_or_series, SERIES_TYPE) and df_or_series.dtype != "O") or (
240
+ isinstance(df_or_series, DATAFRAME_TYPE)
241
+ and all(dt != "O" for dt in df_or_series.dtypes)
242
+ ):
243
+ # no objects to cast
244
+ return df_or_series
245
+
246
+ _ = copy # in MaxFrame data are immutable, thus ignore the parameter
247
+ op = DataFrameInferDtypes(
248
+ infer_method="infer_objects",
249
+ infer_kwargs={},
250
+ )
251
+ return op(df_or_series)
@@ -133,7 +133,7 @@ def series_isin(elements, values):
133
133
  5 False
134
134
  Name: animal, dtype: bool
135
135
  """
136
- if is_list_like(values):
136
+ if is_list_like(values) and not isinstance(values, ENTITY_TYPE):
137
137
  values = list(values)
138
138
  elif not isinstance(values, (SERIES_TYPE, TENSOR_TYPE, INDEX_TYPE)):
139
139
  raise TypeError(
@@ -207,7 +207,7 @@ def df_isin(df, values):
207
207
  falcon True True
208
208
  dog False False
209
209
  """
210
- if is_list_like(values) and not isinstance(values, dict):
210
+ if is_list_like(values) and not isinstance(values, (dict, ENTITY_TYPE)):
211
211
  values = list(values)
212
212
  elif not isinstance(
213
213
  values, (SERIES_TYPE, DATAFRAME_TYPE, TENSOR_TYPE, INDEX_TYPE, dict)