maxframe 2.0.0b2__cp311-cp311-win32.whl → 2.2.0__cp311-cp311-win32.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of maxframe might be problematic. Click here for more details.

Files changed (391) hide show
  1. maxframe/__init__.py +1 -0
  2. maxframe/_utils.cp311-win32.pyd +0 -0
  3. maxframe/_utils.pyx +14 -1
  4. maxframe/codegen/core.py +6 -6
  5. maxframe/codegen/spe/core.py +1 -1
  6. maxframe/codegen/spe/dataframe/__init__.py +1 -0
  7. maxframe/codegen/spe/dataframe/accessors/base.py +18 -0
  8. maxframe/codegen/spe/dataframe/accessors/dict_.py +25 -130
  9. maxframe/codegen/spe/dataframe/accessors/list_.py +12 -48
  10. maxframe/codegen/spe/dataframe/accessors/struct_.py +28 -0
  11. maxframe/codegen/spe/dataframe/arithmetic.py +7 -2
  12. maxframe/codegen/spe/dataframe/groupby.py +88 -0
  13. maxframe/codegen/spe/dataframe/indexing.py +99 -4
  14. maxframe/codegen/spe/dataframe/merge.py +34 -1
  15. maxframe/codegen/spe/dataframe/misc.py +9 -33
  16. maxframe/codegen/spe/dataframe/reduction.py +14 -9
  17. maxframe/codegen/spe/dataframe/reshape.py +46 -0
  18. maxframe/codegen/spe/dataframe/sort.py +30 -17
  19. maxframe/codegen/spe/dataframe/tests/accessors/test_dict.py +9 -15
  20. maxframe/codegen/spe/dataframe/tests/accessors/test_list.py +4 -7
  21. maxframe/codegen/spe/dataframe/tests/accessors/test_struct.py +75 -0
  22. maxframe/codegen/spe/dataframe/tests/indexing/test_iloc.py +20 -1
  23. maxframe/codegen/spe/dataframe/tests/indexing/test_loc.py +35 -0
  24. maxframe/codegen/spe/dataframe/tests/misc/test_misc.py +0 -32
  25. maxframe/codegen/spe/dataframe/tests/test_groupby.py +81 -18
  26. maxframe/codegen/spe/dataframe/tests/test_merge.py +27 -1
  27. maxframe/codegen/spe/dataframe/tests/test_reshape.py +79 -0
  28. maxframe/codegen/spe/dataframe/tests/test_sort.py +20 -0
  29. maxframe/codegen/spe/learn/contrib/tests/test_xgboost.py +2 -1
  30. maxframe/codegen/spe/learn/metrics/__init__.py +1 -1
  31. maxframe/codegen/spe/learn/metrics/_ranking.py +76 -0
  32. maxframe/codegen/spe/learn/metrics/pairwise.py +51 -0
  33. maxframe/codegen/spe/learn/metrics/tests/test_pairwise.py +36 -0
  34. maxframe/codegen/spe/learn/metrics/tests/test_ranking.py +59 -0
  35. maxframe/codegen/spe/tensor/__init__.py +3 -0
  36. maxframe/codegen/spe/tensor/fft.py +74 -0
  37. maxframe/codegen/spe/tensor/linalg.py +29 -2
  38. maxframe/codegen/spe/tensor/misc.py +79 -25
  39. maxframe/codegen/spe/tensor/spatial.py +45 -0
  40. maxframe/codegen/spe/tensor/statistics.py +44 -0
  41. maxframe/codegen/spe/tensor/tests/test_fft.py +64 -0
  42. maxframe/codegen/spe/tensor/tests/test_linalg.py +15 -1
  43. maxframe/codegen/spe/tensor/tests/test_misc.py +52 -2
  44. maxframe/codegen/spe/tensor/tests/test_spatial.py +33 -0
  45. maxframe/codegen/spe/tensor/tests/test_statistics.py +15 -1
  46. maxframe/codegen/spe/tests/test_spe_codegen.py +6 -12
  47. maxframe/codegen/spe/utils.py +2 -0
  48. maxframe/config/config.py +70 -9
  49. maxframe/config/tests/test_validators.py +13 -1
  50. maxframe/config/validators.py +49 -0
  51. maxframe/conftest.py +44 -17
  52. maxframe/core/accessor.py +2 -2
  53. maxframe/core/entity/core.py +5 -0
  54. maxframe/core/entity/tileables.py +1 -1
  55. maxframe/core/graph/core.cp311-win32.pyd +0 -0
  56. maxframe/core/graph/entity.py +1 -2
  57. maxframe/core/operator/base.py +9 -2
  58. maxframe/core/operator/core.py +10 -2
  59. maxframe/core/operator/utils.py +13 -0
  60. maxframe/dataframe/__init__.py +10 -3
  61. maxframe/dataframe/accessors/__init__.py +1 -1
  62. maxframe/dataframe/accessors/compat.py +45 -0
  63. maxframe/dataframe/accessors/datetime_/__init__.py +4 -1
  64. maxframe/dataframe/accessors/dict_/contains.py +7 -16
  65. maxframe/dataframe/accessors/dict_/core.py +48 -0
  66. maxframe/dataframe/accessors/dict_/getitem.py +17 -21
  67. maxframe/dataframe/accessors/dict_/length.py +7 -16
  68. maxframe/dataframe/accessors/dict_/remove.py +6 -18
  69. maxframe/dataframe/accessors/dict_/setitem.py +8 -18
  70. maxframe/dataframe/accessors/dict_/tests/test_dict_accessor.py +62 -22
  71. maxframe/dataframe/accessors/list_/__init__.py +2 -2
  72. maxframe/dataframe/accessors/list_/core.py +48 -0
  73. maxframe/dataframe/accessors/list_/getitem.py +12 -19
  74. maxframe/dataframe/accessors/list_/length.py +7 -16
  75. maxframe/dataframe/accessors/list_/tests/test_list_accessor.py +11 -9
  76. maxframe/dataframe/accessors/string_/__init__.py +4 -1
  77. maxframe/dataframe/accessors/struct_/__init__.py +37 -0
  78. maxframe/dataframe/accessors/struct_/accessor.py +39 -0
  79. maxframe/dataframe/accessors/struct_/core.py +43 -0
  80. maxframe/dataframe/accessors/struct_/dtypes.py +53 -0
  81. maxframe/dataframe/accessors/struct_/field.py +123 -0
  82. maxframe/dataframe/accessors/struct_/tests/__init__.py +13 -0
  83. maxframe/dataframe/accessors/struct_/tests/test_struct_accessor.py +91 -0
  84. maxframe/dataframe/arithmetic/__init__.py +14 -4
  85. maxframe/dataframe/arithmetic/between.py +106 -0
  86. maxframe/dataframe/arithmetic/dot.py +237 -0
  87. maxframe/dataframe/arithmetic/{around.py → round.py} +11 -7
  88. maxframe/dataframe/core.py +63 -118
  89. maxframe/dataframe/datasource/__init__.py +18 -0
  90. maxframe/dataframe/datasource/from_dict.py +124 -0
  91. maxframe/dataframe/datasource/from_index.py +1 -1
  92. maxframe/dataframe/datasource/from_records.py +77 -0
  93. maxframe/dataframe/datasource/from_tensor.py +109 -41
  94. maxframe/dataframe/datasource/read_csv.py +2 -3
  95. maxframe/dataframe/datasource/tests/test_datasource.py +37 -0
  96. maxframe/dataframe/datastore/__init__.py +5 -1
  97. maxframe/dataframe/datastore/to_csv.py +29 -41
  98. maxframe/dataframe/datastore/to_odps.py +30 -4
  99. maxframe/dataframe/extensions/__init__.py +20 -4
  100. maxframe/dataframe/extensions/apply_chunk.py +32 -6
  101. maxframe/dataframe/extensions/cartesian_chunk.py +153 -0
  102. maxframe/dataframe/extensions/collect_kv.py +126 -0
  103. maxframe/dataframe/extensions/extract_kv.py +177 -0
  104. maxframe/dataframe/extensions/map_reduce.py +263 -0
  105. maxframe/dataframe/extensions/rebalance.py +62 -0
  106. maxframe/dataframe/extensions/tests/test_apply_chunk.py +9 -2
  107. maxframe/dataframe/extensions/tests/test_extensions.py +54 -0
  108. maxframe/dataframe/extensions/tests/test_map_reduce.py +135 -0
  109. maxframe/dataframe/groupby/__init__.py +12 -1
  110. maxframe/dataframe/groupby/aggregation.py +78 -45
  111. maxframe/dataframe/groupby/apply.py +1 -1
  112. maxframe/dataframe/groupby/apply_chunk.py +18 -2
  113. maxframe/dataframe/groupby/core.py +96 -12
  114. maxframe/dataframe/groupby/cum.py +4 -25
  115. maxframe/dataframe/groupby/expanding.py +264 -0
  116. maxframe/dataframe/groupby/fill.py +1 -1
  117. maxframe/dataframe/groupby/getitem.py +12 -5
  118. maxframe/dataframe/groupby/head.py +11 -1
  119. maxframe/dataframe/groupby/rank.py +136 -0
  120. maxframe/dataframe/groupby/rolling.py +206 -0
  121. maxframe/dataframe/groupby/shift.py +114 -0
  122. maxframe/dataframe/groupby/tests/test_groupby.py +0 -5
  123. maxframe/dataframe/indexing/__init__.py +20 -1
  124. maxframe/dataframe/indexing/droplevel.py +195 -0
  125. maxframe/dataframe/indexing/filter.py +169 -0
  126. maxframe/dataframe/indexing/get_level_values.py +76 -0
  127. maxframe/dataframe/indexing/iat.py +45 -0
  128. maxframe/dataframe/indexing/iloc.py +152 -12
  129. maxframe/dataframe/indexing/insert.py +1 -1
  130. maxframe/dataframe/indexing/loc.py +287 -7
  131. maxframe/dataframe/indexing/reindex.py +14 -5
  132. maxframe/dataframe/indexing/rename.py +6 -0
  133. maxframe/dataframe/indexing/rename_axis.py +2 -2
  134. maxframe/dataframe/indexing/reorder_levels.py +143 -0
  135. maxframe/dataframe/indexing/reset_index.py +33 -6
  136. maxframe/dataframe/indexing/sample.py +8 -0
  137. maxframe/dataframe/indexing/setitem.py +3 -3
  138. maxframe/dataframe/indexing/swaplevel.py +185 -0
  139. maxframe/dataframe/indexing/take.py +99 -0
  140. maxframe/dataframe/indexing/truncate.py +140 -0
  141. maxframe/dataframe/indexing/where.py +0 -11
  142. maxframe/dataframe/indexing/xs.py +148 -0
  143. maxframe/dataframe/merge/__init__.py +12 -1
  144. maxframe/dataframe/merge/append.py +97 -98
  145. maxframe/dataframe/merge/combine_first.py +120 -0
  146. maxframe/dataframe/merge/compare.py +387 -0
  147. maxframe/dataframe/merge/concat.py +183 -0
  148. maxframe/dataframe/merge/update.py +271 -0
  149. maxframe/dataframe/misc/__init__.py +16 -10
  150. maxframe/dataframe/misc/_duplicate.py +10 -4
  151. maxframe/dataframe/misc/apply.py +1 -1
  152. maxframe/dataframe/misc/check_unique.py +51 -0
  153. maxframe/dataframe/misc/clip.py +145 -0
  154. maxframe/dataframe/misc/describe.py +175 -9
  155. maxframe/dataframe/misc/drop_duplicates.py +2 -2
  156. maxframe/dataframe/misc/duplicated.py +2 -2
  157. maxframe/dataframe/misc/get_dummies.py +5 -1
  158. maxframe/dataframe/misc/isin.py +2 -2
  159. maxframe/dataframe/misc/map.py +94 -0
  160. maxframe/dataframe/misc/tests/test_misc.py +13 -2
  161. maxframe/dataframe/misc/to_numeric.py +3 -0
  162. maxframe/dataframe/misc/transform.py +12 -5
  163. maxframe/dataframe/misc/transpose.py +13 -1
  164. maxframe/dataframe/misc/valid_index.py +115 -0
  165. maxframe/dataframe/misc/value_counts.py +38 -4
  166. maxframe/dataframe/missing/checkna.py +13 -6
  167. maxframe/dataframe/missing/dropna.py +5 -0
  168. maxframe/dataframe/missing/fillna.py +1 -1
  169. maxframe/dataframe/missing/replace.py +7 -4
  170. maxframe/dataframe/reduction/__init__.py +29 -15
  171. maxframe/dataframe/reduction/aggregation.py +38 -9
  172. maxframe/dataframe/reduction/all.py +2 -2
  173. maxframe/dataframe/reduction/any.py +2 -2
  174. maxframe/dataframe/reduction/argmax.py +100 -0
  175. maxframe/dataframe/reduction/argmin.py +100 -0
  176. maxframe/dataframe/reduction/core.py +65 -18
  177. maxframe/dataframe/reduction/count.py +13 -9
  178. maxframe/dataframe/reduction/cov.py +166 -0
  179. maxframe/dataframe/reduction/cummax.py +2 -2
  180. maxframe/dataframe/reduction/cummin.py +2 -2
  181. maxframe/dataframe/reduction/cumprod.py +2 -2
  182. maxframe/dataframe/reduction/cumsum.py +2 -2
  183. maxframe/dataframe/reduction/custom_reduction.py +2 -2
  184. maxframe/dataframe/reduction/idxmax.py +185 -0
  185. maxframe/dataframe/reduction/idxmin.py +185 -0
  186. maxframe/dataframe/reduction/kurtosis.py +37 -30
  187. maxframe/dataframe/reduction/max.py +2 -2
  188. maxframe/dataframe/reduction/mean.py +9 -7
  189. maxframe/dataframe/reduction/median.py +2 -2
  190. maxframe/dataframe/reduction/min.py +2 -2
  191. maxframe/dataframe/reduction/nunique.py +9 -8
  192. maxframe/dataframe/reduction/prod.py +18 -13
  193. maxframe/dataframe/reduction/reduction_size.py +2 -2
  194. maxframe/dataframe/reduction/sem.py +13 -9
  195. maxframe/dataframe/reduction/skew.py +31 -27
  196. maxframe/dataframe/reduction/str_concat.py +10 -7
  197. maxframe/dataframe/reduction/sum.py +18 -14
  198. maxframe/dataframe/reduction/unique.py +20 -3
  199. maxframe/dataframe/reduction/var.py +16 -12
  200. maxframe/dataframe/reshape/__init__.py +38 -0
  201. maxframe/dataframe/{misc → reshape}/pivot.py +1 -0
  202. maxframe/dataframe/{misc → reshape}/pivot_table.py +1 -0
  203. maxframe/dataframe/reshape/unstack.py +114 -0
  204. maxframe/dataframe/sort/__init__.py +8 -0
  205. maxframe/dataframe/sort/argsort.py +62 -0
  206. maxframe/dataframe/sort/core.py +1 -0
  207. maxframe/dataframe/sort/nlargest.py +238 -0
  208. maxframe/dataframe/sort/nsmallest.py +228 -0
  209. maxframe/dataframe/statistics/__init__.py +3 -3
  210. maxframe/dataframe/statistics/corr.py +1 -0
  211. maxframe/dataframe/statistics/quantile.py +2 -2
  212. maxframe/dataframe/tests/test_typing.py +104 -0
  213. maxframe/dataframe/tests/test_utils.py +66 -2
  214. maxframe/dataframe/typing_.py +185 -0
  215. maxframe/dataframe/utils.py +95 -26
  216. maxframe/dataframe/window/aggregation.py +8 -4
  217. maxframe/dataframe/window/core.py +14 -1
  218. maxframe/dataframe/window/ewm.py +1 -3
  219. maxframe/dataframe/window/expanding.py +37 -35
  220. maxframe/dataframe/window/rolling.py +49 -39
  221. maxframe/dataframe/window/tests/test_expanding.py +1 -7
  222. maxframe/dataframe/window/tests/test_rolling.py +1 -1
  223. maxframe/env.py +7 -4
  224. maxframe/errors.py +2 -2
  225. maxframe/io/odpsio/schema.py +9 -3
  226. maxframe/io/odpsio/tableio.py +7 -2
  227. maxframe/io/odpsio/tests/test_schema.py +198 -83
  228. maxframe/learn/__init__.py +10 -2
  229. maxframe/learn/cluster/__init__.py +15 -0
  230. maxframe/learn/cluster/_kmeans.py +782 -0
  231. maxframe/learn/contrib/llm/core.py +2 -0
  232. maxframe/learn/contrib/xgboost/core.py +86 -1
  233. maxframe/learn/contrib/xgboost/train.py +5 -2
  234. maxframe/learn/core.py +66 -0
  235. maxframe/learn/linear_model/_base.py +58 -1
  236. maxframe/learn/linear_model/_lin_reg.py +1 -1
  237. maxframe/learn/metrics/__init__.py +6 -0
  238. maxframe/learn/metrics/_classification.py +145 -0
  239. maxframe/learn/metrics/_ranking.py +477 -0
  240. maxframe/learn/metrics/_scorer.py +60 -0
  241. maxframe/learn/metrics/pairwise/__init__.py +21 -0
  242. maxframe/learn/metrics/pairwise/core.py +77 -0
  243. maxframe/learn/metrics/pairwise/cosine.py +115 -0
  244. maxframe/learn/metrics/pairwise/euclidean.py +176 -0
  245. maxframe/learn/metrics/pairwise/haversine.py +96 -0
  246. maxframe/learn/metrics/pairwise/manhattan.py +80 -0
  247. maxframe/learn/metrics/pairwise/pairwise.py +127 -0
  248. maxframe/learn/metrics/pairwise/pairwise_distances_topk.py +121 -0
  249. maxframe/learn/metrics/pairwise/rbf_kernel.py +51 -0
  250. maxframe/learn/metrics/tests/__init__.py +13 -0
  251. maxframe/learn/metrics/tests/test_scorer.py +26 -0
  252. maxframe/learn/utils/__init__.py +1 -1
  253. maxframe/learn/utils/checks.py +1 -2
  254. maxframe/learn/utils/core.py +59 -0
  255. maxframe/learn/utils/extmath.py +37 -0
  256. maxframe/learn/utils/odpsio.py +193 -0
  257. maxframe/learn/utils/validation.py +2 -2
  258. maxframe/lib/compat.py +40 -0
  259. maxframe/lib/dtypes_extension/__init__.py +16 -1
  260. maxframe/lib/dtypes_extension/_fake_arrow_dtype.py +604 -0
  261. maxframe/lib/dtypes_extension/blob.py +304 -0
  262. maxframe/lib/dtypes_extension/dtypes.py +40 -0
  263. maxframe/lib/dtypes_extension/tests/test_blob.py +88 -0
  264. maxframe/lib/dtypes_extension/tests/test_dtypes.py +16 -1
  265. maxframe/lib/dtypes_extension/tests/test_fake_arrow_dtype.py +75 -0
  266. maxframe/lib/filesystem/_oss_lib/common.py +122 -50
  267. maxframe/lib/filesystem/_oss_lib/glob.py +1 -1
  268. maxframe/lib/filesystem/_oss_lib/handle.py +21 -25
  269. maxframe/lib/filesystem/base.py +1 -1
  270. maxframe/lib/filesystem/core.py +1 -1
  271. maxframe/lib/filesystem/oss.py +115 -46
  272. maxframe/lib/filesystem/tests/test_oss.py +74 -36
  273. maxframe/lib/mmh3.cp311-win32.pyd +0 -0
  274. maxframe/lib/wrapped_pickle.py +10 -0
  275. maxframe/opcodes.py +33 -15
  276. maxframe/protocol.py +12 -0
  277. maxframe/serialization/__init__.py +11 -2
  278. maxframe/serialization/arrow.py +38 -13
  279. maxframe/serialization/blob.py +32 -0
  280. maxframe/serialization/core.cp311-win32.pyd +0 -0
  281. maxframe/serialization/core.pyx +39 -1
  282. maxframe/serialization/exception.py +2 -4
  283. maxframe/serialization/numpy.py +11 -0
  284. maxframe/serialization/pandas.py +46 -9
  285. maxframe/serialization/serializables/core.py +2 -2
  286. maxframe/serialization/tests/test_serial.py +29 -2
  287. maxframe/tensor/__init__.py +38 -8
  288. maxframe/tensor/arithmetic/__init__.py +19 -10
  289. maxframe/tensor/arithmetic/iscomplexobj.py +53 -0
  290. maxframe/tensor/arithmetic/tests/test_arithmetic.py +6 -0
  291. maxframe/tensor/core.py +3 -2
  292. maxframe/tensor/datasource/tests/test_datasource.py +2 -1
  293. maxframe/tensor/extensions/__init__.py +2 -0
  294. maxframe/tensor/extensions/apply_chunk.py +3 -3
  295. maxframe/tensor/extensions/rebalance.py +65 -0
  296. maxframe/tensor/fft/__init__.py +32 -0
  297. maxframe/tensor/fft/core.py +168 -0
  298. maxframe/tensor/fft/fft.py +112 -0
  299. maxframe/tensor/fft/fft2.py +118 -0
  300. maxframe/tensor/fft/fftfreq.py +80 -0
  301. maxframe/tensor/fft/fftn.py +123 -0
  302. maxframe/tensor/fft/fftshift.py +79 -0
  303. maxframe/tensor/fft/hfft.py +112 -0
  304. maxframe/tensor/fft/ifft.py +114 -0
  305. maxframe/tensor/fft/ifft2.py +115 -0
  306. maxframe/tensor/fft/ifftn.py +123 -0
  307. maxframe/tensor/fft/ifftshift.py +73 -0
  308. maxframe/tensor/fft/ihfft.py +93 -0
  309. maxframe/tensor/fft/irfft.py +118 -0
  310. maxframe/tensor/fft/irfft2.py +62 -0
  311. maxframe/tensor/fft/irfftn.py +114 -0
  312. maxframe/tensor/fft/rfft.py +116 -0
  313. maxframe/tensor/fft/rfft2.py +63 -0
  314. maxframe/tensor/fft/rfftfreq.py +87 -0
  315. maxframe/tensor/fft/rfftn.py +113 -0
  316. maxframe/tensor/indexing/fill_diagonal.py +1 -7
  317. maxframe/tensor/linalg/__init__.py +7 -0
  318. maxframe/tensor/linalg/_einsumfunc.py +1025 -0
  319. maxframe/tensor/linalg/cholesky.py +117 -0
  320. maxframe/tensor/linalg/einsum.py +339 -0
  321. maxframe/tensor/linalg/lstsq.py +100 -0
  322. maxframe/tensor/linalg/matrix_norm.py +75 -0
  323. maxframe/tensor/linalg/norm.py +249 -0
  324. maxframe/tensor/linalg/solve.py +72 -0
  325. maxframe/tensor/linalg/solve_triangular.py +2 -2
  326. maxframe/tensor/linalg/vector_norm.py +113 -0
  327. maxframe/tensor/misc/__init__.py +24 -1
  328. maxframe/tensor/misc/argwhere.py +72 -0
  329. maxframe/tensor/misc/array_split.py +46 -0
  330. maxframe/tensor/misc/broadcast_arrays.py +57 -0
  331. maxframe/tensor/misc/copyto.py +130 -0
  332. maxframe/tensor/misc/delete.py +104 -0
  333. maxframe/tensor/misc/dsplit.py +68 -0
  334. maxframe/tensor/misc/ediff1d.py +74 -0
  335. maxframe/tensor/misc/expand_dims.py +85 -0
  336. maxframe/tensor/misc/flip.py +90 -0
  337. maxframe/tensor/misc/fliplr.py +64 -0
  338. maxframe/tensor/misc/flipud.py +68 -0
  339. maxframe/tensor/misc/hsplit.py +85 -0
  340. maxframe/tensor/misc/insert.py +139 -0
  341. maxframe/tensor/misc/moveaxis.py +83 -0
  342. maxframe/tensor/misc/result_type.py +88 -0
  343. maxframe/tensor/misc/roll.py +124 -0
  344. maxframe/tensor/misc/rollaxis.py +77 -0
  345. maxframe/tensor/misc/shape.py +89 -0
  346. maxframe/tensor/misc/split.py +190 -0
  347. maxframe/tensor/misc/tile.py +109 -0
  348. maxframe/tensor/misc/vsplit.py +74 -0
  349. maxframe/tensor/reduction/array_equal.py +2 -1
  350. maxframe/tensor/sort/__init__.py +2 -0
  351. maxframe/tensor/sort/argpartition.py +98 -0
  352. maxframe/tensor/sort/partition.py +228 -0
  353. maxframe/tensor/spatial/__init__.py +15 -0
  354. maxframe/tensor/spatial/distance/__init__.py +17 -0
  355. maxframe/tensor/spatial/distance/cdist.py +421 -0
  356. maxframe/tensor/spatial/distance/pdist.py +398 -0
  357. maxframe/tensor/spatial/distance/squareform.py +153 -0
  358. maxframe/tensor/special/__init__.py +159 -21
  359. maxframe/tensor/special/airy.py +55 -0
  360. maxframe/tensor/special/bessel.py +199 -0
  361. maxframe/tensor/special/core.py +65 -4
  362. maxframe/tensor/special/ellip_func_integrals.py +155 -0
  363. maxframe/tensor/special/ellip_harm.py +55 -0
  364. maxframe/tensor/special/err_fresnel.py +223 -0
  365. maxframe/tensor/special/gamma_funcs.py +303 -0
  366. maxframe/tensor/special/hypergeometric_funcs.py +69 -0
  367. maxframe/tensor/special/info_theory.py +189 -0
  368. maxframe/tensor/special/misc.py +21 -0
  369. maxframe/tensor/statistics/__init__.py +6 -0
  370. maxframe/tensor/statistics/corrcoef.py +77 -0
  371. maxframe/tensor/statistics/cov.py +222 -0
  372. maxframe/tensor/statistics/digitize.py +126 -0
  373. maxframe/tensor/statistics/histogram.py +520 -0
  374. maxframe/tensor/statistics/median.py +85 -0
  375. maxframe/tensor/statistics/ptp.py +89 -0
  376. maxframe/tensor/utils.py +3 -3
  377. maxframe/tests/test_utils.py +43 -1
  378. maxframe/tests/utils.py +0 -2
  379. maxframe/typing_.py +2 -0
  380. maxframe/udf.py +27 -2
  381. maxframe/utils.py +193 -19
  382. {maxframe-2.0.0b2.dist-info → maxframe-2.2.0.dist-info}/METADATA +3 -2
  383. {maxframe-2.0.0b2.dist-info → maxframe-2.2.0.dist-info}/RECORD +391 -236
  384. maxframe_client/fetcher.py +35 -4
  385. maxframe_client/session/odps.py +7 -2
  386. maxframe_client/tests/test_fetcher.py +76 -3
  387. maxframe_client/tests/test_session.py +4 -1
  388. /maxframe/dataframe/{misc → reshape}/melt.py +0 -0
  389. /maxframe/dataframe/{misc → reshape}/stack.py +0 -0
  390. {maxframe-2.0.0b2.dist-info → maxframe-2.2.0.dist-info}/WHEEL +0 -0
  391. {maxframe-2.0.0b2.dist-info → maxframe-2.2.0.dist-info}/top_level.txt +0 -0
@@ -26,9 +26,10 @@ from ...serialization.serializables import (
26
26
  Int32Field,
27
27
  TupleField,
28
28
  )
29
+ from ...typing_ import TileableType
29
30
  from ...udf import BuiltinFunction, MarkedFunction
30
31
  from ...utils import copy_if_possible, make_dtype, make_dtypes
31
- from ..core import DATAFRAME_TYPE, DataFrame, IndexValue, Series
32
+ from ..core import DATAFRAME_TYPE, INDEX_TYPE, DataFrame, IndexValue, Series
32
33
  from ..operators import DataFrameOperator, DataFrameOperatorMixin
33
34
  from ..utils import (
34
35
  InferredDataFrameMeta,
@@ -43,7 +44,7 @@ from ..utils import (
43
44
 
44
45
  class DataFrameApplyChunk(DataFrameOperator, DataFrameOperatorMixin):
45
46
  _op_type_ = opcodes.APPLY_CHUNK
46
- _legacy_name = "DataFrameApplyChunkOperator"
47
+ _legacy_name = "DataFrameApplyChunkOperator" # since v2.0.0
47
48
 
48
49
  func = FunctionField("func")
49
50
  batch_rows = Int32Field("batch_rows", default=None)
@@ -60,16 +61,26 @@ class DataFrameApplyChunk(DataFrameOperator, DataFrameOperatorMixin):
60
61
  def has_custom_code(self) -> bool:
61
62
  return not isinstance(self.func, BuiltinFunction)
62
63
 
64
+ def check_inputs(self, inputs: List[TileableType]):
65
+ # for apply_chunk we allow called on non-deterministic tileables
66
+ pass
67
+
63
68
  def _call_dataframe(self, df, dtypes, dtype, name, index_value, element_wise):
64
69
  # return dataframe
65
70
  if self.output_types[0] == OutputType.dataframe:
66
71
  dtypes = make_dtypes(dtypes)
72
+ if dtypes is not None:
73
+ shape = df.shape if element_wise else (np.nan, len(dtypes))
74
+ cols_value = parse_index(dtypes.index, store_data=True)
75
+ else:
76
+ shape = (np.nan, np.nan)
77
+ cols_value = None
67
78
  # apply_chunk will use generate new range index for results
68
79
  return self.new_dataframe(
69
80
  [df],
70
- shape=df.shape if element_wise else (np.nan, len(dtypes)),
81
+ shape=shape,
71
82
  index_value=index_value,
72
- columns_value=parse_index(dtypes.index, store_data=True),
83
+ columns_value=cols_value,
73
84
  dtypes=dtypes,
74
85
  )
75
86
 
@@ -106,11 +117,17 @@ class DataFrameApplyChunk(DataFrameOperator, DataFrameOperatorMixin):
106
117
  name: Any = None,
107
118
  output_type=None,
108
119
  index=None,
120
+ skip_infer=False,
109
121
  ):
110
122
  args = self.args or ()
111
123
  kwargs = self.kwargs or {}
112
124
  # if not dtypes and not skip_infer:
113
- packed_func = get_packed_func(df_or_series, self.func, *args, **kwargs)
125
+ try:
126
+ packed_func = get_packed_func(df_or_series, self.func, *args, **kwargs)
127
+ except:
128
+ if not skip_infer:
129
+ raise
130
+ packed_func = self.func
114
131
 
115
132
  # if skip_infer, directly build a frame
116
133
  if self.output_types and self.output_types[0] == OutputType.df_or_series:
@@ -125,13 +142,15 @@ class DataFrameApplyChunk(DataFrameOperator, DataFrameOperatorMixin):
125
142
  dtype=dtype,
126
143
  name=name,
127
144
  index=index,
145
+ skip_infer=skip_infer,
128
146
  )
129
147
 
130
148
  if inferred_meta.index_value is None:
131
149
  inferred_meta.index_value = parse_index(
132
150
  None, (df_or_series.key, df_or_series.index_value.key, self.func)
133
151
  )
134
- inferred_meta.check_absence("output_type", "dtypes", "dtype")
152
+ if not skip_infer:
153
+ inferred_meta.check_absence("output_type", "dtypes", "dtype")
135
154
 
136
155
  if isinstance(df_or_series, DATAFRAME_TYPE):
137
156
  return self._call_dataframe(
@@ -163,6 +182,7 @@ class DataFrameApplyChunk(DataFrameOperator, DataFrameOperatorMixin):
163
182
  name: Any = None,
164
183
  index: Union[pd.Index, IndexValue] = None,
165
184
  elementwise: bool = None,
185
+ skip_infer: bool = False,
166
186
  **kwargs,
167
187
  ) -> InferredDataFrameMeta:
168
188
  inferred_meta = infer_dataframe_return_value(
@@ -174,7 +194,10 @@ class DataFrameApplyChunk(DataFrameOperator, DataFrameOperatorMixin):
174
194
  name=name,
175
195
  index=index,
176
196
  elementwise=elementwise,
197
+ skip_infer=skip_infer,
177
198
  )
199
+ if skip_infer:
200
+ return inferred_meta
178
201
 
179
202
  # merge specified and inferred index, dtypes, output_type
180
203
  # elementwise used to decide shape
@@ -186,6 +209,8 @@ class DataFrameApplyChunk(DataFrameOperator, DataFrameOperatorMixin):
186
209
  if self.output_types:
187
210
  inferred_meta.output_type = self.output_types[0]
188
211
  inferred_meta.dtypes = dtypes if dtypes is not None else inferred_meta.dtypes
212
+ if isinstance(index, INDEX_TYPE):
213
+ index = index.index_value
189
214
  if index is not None:
190
215
  inferred_meta.index_value = (
191
216
  parse_index(index)
@@ -458,6 +483,7 @@ def df_apply_chunk(
458
483
  name=name,
459
484
  index=index,
460
485
  output_type=output_type,
486
+ skip_infer=skip_infer,
461
487
  )
462
488
 
463
489
 
@@ -0,0 +1,153 @@
1
+ # Copyright 1999-2025 Alibaba Group Holding Ltd.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ from typing import List
16
+
17
+ import numpy as np
18
+ import pandas as pd
19
+
20
+ from ... import opcodes
21
+ from ...core import EntityData, OutputType
22
+ from ...serialization.serializables import (
23
+ DictField,
24
+ FunctionField,
25
+ KeyField,
26
+ TupleField,
27
+ )
28
+ from ...udf import BuiltinFunction
29
+ from ...utils import quiet_stdio
30
+ from ..operators import DataFrameOperator, DataFrameOperatorMixin
31
+ from ..utils import (
32
+ build_df,
33
+ build_empty_df,
34
+ build_series,
35
+ parse_index,
36
+ validate_output_types,
37
+ )
38
+
39
+
40
+ class DataFrameCartesianChunk(DataFrameOperator, DataFrameOperatorMixin):
41
+ _op_type_ = opcodes.CARTESIAN_CHUNK
42
+
43
+ left = KeyField("left")
44
+ right = KeyField("right")
45
+ func = FunctionField("func")
46
+ args = TupleField("args")
47
+ kwargs = DictField("kwargs")
48
+
49
+ def __init__(self, output_types=None, **kw):
50
+ super().__init__(_output_types=output_types, **kw)
51
+ if self.memory_scale is None:
52
+ self.memory_scale = 2.0
53
+
54
+ @classmethod
55
+ def _set_inputs(cls, op: "DataFrameCartesianChunk", inputs: List[EntityData]):
56
+ super()._set_inputs(op, inputs)
57
+ op.left, op.right = op.inputs[:2]
58
+
59
+ @staticmethod
60
+ def _build_test_obj(obj):
61
+ return (
62
+ build_df(obj, size=2)
63
+ if obj.ndim == 2
64
+ else build_series(obj, size=2, name=obj.name)
65
+ )
66
+
67
+ def has_custom_code(self) -> bool:
68
+ return not isinstance(self.func, BuiltinFunction)
69
+
70
+ def __call__(self, left, right, index=None, dtypes=None):
71
+ test_left = self._build_test_obj(left)
72
+ test_right = self._build_test_obj(right)
73
+ output_type = self._output_types[0] if self._output_types else None
74
+
75
+ if output_type == OutputType.df_or_series:
76
+ return self.new_df_or_series([left, right])
77
+
78
+ # try run to infer meta
79
+ try:
80
+ with np.errstate(all="ignore"), quiet_stdio():
81
+ obj = self.func(test_left, test_right, *self.args, **self.kwargs)
82
+ except: # noqa: E722 # nosec # pylint: disable=bare-except
83
+ if output_type == OutputType.series:
84
+ obj = pd.Series([], dtype=np.dtype(object))
85
+ elif output_type == OutputType.dataframe and dtypes is not None:
86
+ obj = build_empty_df(dtypes)
87
+ else:
88
+ raise TypeError(
89
+ "Cannot determine `output_type`, "
90
+ "you have to specify it as `dataframe` or `series`, "
91
+ "for dataframe, `dtypes` is required as well "
92
+ "if output_type='dataframe'"
93
+ )
94
+
95
+ if getattr(obj, "ndim", 0) == 1 or output_type == OutputType.series:
96
+ shape = self.kwargs.pop("shape", (np.nan,))
97
+ if index is None:
98
+ index = obj.index
99
+ index_value = parse_index(
100
+ index, left, right, self.func, self.args, self.kwargs
101
+ )
102
+ return self.new_series(
103
+ [left, right],
104
+ dtype=obj.dtype,
105
+ shape=shape,
106
+ index_value=index_value,
107
+ name=obj.name,
108
+ )
109
+ else:
110
+ dtypes = dtypes if dtypes is not None else obj.dtypes
111
+ # dataframe
112
+ shape = (np.nan, len(dtypes))
113
+ columns_value = parse_index(dtypes.index, store_data=True)
114
+ if index is None:
115
+ index = obj.index
116
+ index_value = parse_index(
117
+ index, left, right, self.func, self.args, self.kwargs
118
+ )
119
+ return self.new_dataframe(
120
+ [left, right],
121
+ shape=shape,
122
+ dtypes=dtypes,
123
+ index_value=index_value,
124
+ columns_value=columns_value,
125
+ )
126
+
127
+
128
+ def cartesian_chunk(left, right, func, skip_infer=False, args=(), **kwargs):
129
+ output_type = kwargs.pop("output_type", None)
130
+ output_types = kwargs.pop("output_types", None)
131
+ object_type = kwargs.pop("object_type", None)
132
+ output_types = validate_output_types(
133
+ output_type=output_type, output_types=output_types, object_type=object_type
134
+ )
135
+ output_type = output_types[0] if output_types else None
136
+ if output_type:
137
+ output_types = [output_type]
138
+ elif skip_infer:
139
+ output_types = [OutputType.df_or_series]
140
+ index = kwargs.pop("index", None)
141
+ dtypes = kwargs.pop("dtypes", None)
142
+ memory_scale = kwargs.pop("memory_scale", None)
143
+
144
+ op = DataFrameCartesianChunk(
145
+ left=left,
146
+ right=right,
147
+ func=func,
148
+ args=args,
149
+ kwargs=kwargs,
150
+ output_types=output_types,
151
+ memory_scale=memory_scale,
152
+ )
153
+ return op(left, right, index=index, dtypes=dtypes)
@@ -0,0 +1,126 @@
1
+ # Copyright 1999-2025 Alibaba Group Holding Ltd.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ import numpy as np
16
+ import pandas as pd
17
+
18
+ from ... import opcodes
19
+ from ...serialization.serializables import AnyField, StringField
20
+ from ...utils import no_default
21
+ from ..operators import DataFrameOperator, DataFrameOperatorMixin
22
+ from ..utils import make_column_list
23
+
24
+
25
+ class DataFrameCollectKv(DataFrameOperator, DataFrameOperatorMixin):
26
+ _op_type_ = opcodes.COLLECT_KV
27
+
28
+ columns = AnyField("columns", default=None)
29
+ kv_delim = StringField("kv_delim", default=None)
30
+ item_delim = StringField("item_delim", default=None)
31
+ kv_col = StringField("kv_col", default=None)
32
+
33
+ def __call__(self, df):
34
+ if self.columns is None:
35
+ cols = list(df.dtypes.index)
36
+ else:
37
+ cols = self.columns if isinstance(self.columns, list) else [self.columns]
38
+ new_dtypes = df.dtypes.drop(cols, errors="ignore")
39
+ new_dtypes = pd.concat(
40
+ [new_dtypes, pd.Series([np.dtype("object")], index=[self.kv_col])]
41
+ )
42
+ shape = (df.shape[0], len(new_dtypes))
43
+ return self.new_dataframe(
44
+ [df],
45
+ shape=shape,
46
+ dtypes=new_dtypes,
47
+ index_value=df.index_value,
48
+ columns_value=new_dtypes.index,
49
+ )
50
+
51
+
52
+ def collect_kv(
53
+ data,
54
+ columns=None,
55
+ kv_delim="=",
56
+ item_delim=",",
57
+ kv_col="kv_col",
58
+ ):
59
+ """
60
+ Merge values in specified columns into a key-value represented column.
61
+
62
+ Parameters
63
+ ----------
64
+ columns : list, default None
65
+ The columns to be merged.
66
+ kv_delim : str, default '='
67
+ Delimiter between key and value.
68
+ item_delim : str, default ','
69
+ Delimiter between key-value pairs.
70
+ kv_col : str, default 'kv_col'
71
+ Name of the new key-value column
72
+
73
+ Returns
74
+ -------
75
+ DataFrame
76
+ converted data frame
77
+
78
+ See Also
79
+ --------
80
+ DataFrame.mf.extract_kv
81
+
82
+ Examples
83
+ -------
84
+ >>> import maxframe.dataframe as md
85
+
86
+ >>> df = md.DataFrame({"name": ["name1", "name2", "name3", "name4", "name5"],
87
+ ... "k1": [1.0, NaN, 7.1, NaN, NaN],
88
+ ... "k2": [3.0, 3.0, NaN, 1.2, 1.0],
89
+ ... "k3": [NaN, 5.1, NaN, 1.5, NaN],
90
+ ... "k5": [10.0, NaN, NaN, NaN, NaN,],
91
+ ... "k7": [NaN, NaN, 8.2, NaN, NaN, ],
92
+ ... "k9": [NaN, NaN, NaN, NaN, 1.1]})
93
+ >>> df.execute()
94
+ name k1 k2 k3 k5 k7 k9
95
+ 0 name1 1.0 3.0 NaN 10.0 NaN NaN
96
+ 1 name2 NaN 3.0 5.1 NaN NaN NaN
97
+ 2 name3 7.1 NaN NaN NaN 8.2 NaN
98
+ 3 name4 NaN 1.2 1.5 NaN NaN NaN
99
+ 4 name5 NaN 1.0 NaN NaN NaN 1.1
100
+
101
+ The field names to be merged are specified by columns
102
+ kv_delim is to delimit the key and value and '=' is default
103
+ item_delim is to delimit the Key-Value pairs, ',' is default
104
+ The new column name is specified by kv_col, 'kv_col' is default
105
+
106
+ >>> df.mf.collect_kv(columns=['k1', 'k2', 'k3', 'k5', 'k7', 'k9']).execute()
107
+ name kv_col
108
+ 0 name1 k1=1.0,k2=3.0,k5=10.0
109
+ 1 name2 k2=3.0,k3=5.1
110
+ 2 name3 k1=7.1,k7=8.2
111
+ 3 name4 k2=1.2,k3=1.5
112
+ 4 name5 k2=1.0,k9=1.1
113
+ """
114
+ columns_list = make_column_list(columns, data.dtypes) or []
115
+ non_exist_key = next(
116
+ (c for c in columns_list if c not in data.dtypes.index), no_default
117
+ )
118
+ if columns_list and non_exist_key is not no_default:
119
+ raise ValueError(f"Column {non_exist_key} specified is not a valid column.")
120
+ op = DataFrameCollectKv(
121
+ columns=columns,
122
+ kv_delim=kv_delim,
123
+ item_delim=item_delim,
124
+ kv_col=kv_col,
125
+ )
126
+ return op(data)
@@ -0,0 +1,177 @@
1
+ # Copyright 1999-2025 Alibaba Group Holding Ltd.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ from typing import List
16
+
17
+ import numpy as np
18
+ import pandas as pd
19
+
20
+ from ... import opcodes
21
+ from ...core import EntityData, OutputType
22
+ from ...serialization.serializables import AnyField, KeyField, StringField
23
+ from ...utils import make_dtype, no_default
24
+ from ..operators import DataFrameOperator, DataFrameOperatorMixin
25
+ from ..utils import make_column_list
26
+
27
+
28
+ class DataFrameExtractKv(DataFrameOperator, DataFrameOperatorMixin):
29
+ _op_type_ = opcodes.EXTRACT_KV
30
+
31
+ columns = AnyField("columns", default=None)
32
+ kv_delim = StringField("kv_delim", default="=")
33
+ item_delim = StringField("item_delim", default=",")
34
+ dtype = AnyField("dtype", default=None)
35
+ fill_value = AnyField("fill_value", default=None)
36
+ errors = StringField("errors", default="raise")
37
+ # intermediate agg data
38
+ agg_results = KeyField("agg_results", default=None)
39
+
40
+ def __init__(self, kv_delim="=", item_delim=",", **kw):
41
+ super().__init__(kv_delim=kv_delim, item_delim=item_delim, **kw)
42
+ self.output_types = [OutputType.dataframe]
43
+
44
+ @classmethod
45
+ def _set_inputs(cls, op: "DataFrameExtractKv", inputs: List[EntityData]):
46
+ super()._set_inputs(op, inputs)
47
+ if op.agg_results is not None:
48
+ op.agg_results = inputs[-1]
49
+
50
+ def __call__(self, df):
51
+ shape = (df.shape[0], np.nan)
52
+ errors_arg = self.errors
53
+
54
+ def get_keys(row, cols, kv_delim, item_delim):
55
+ for col in cols:
56
+ if row[col] is not None:
57
+ pairs = row[col].split(item_delim)
58
+ else:
59
+ pairs = []
60
+ for pair in pairs:
61
+ result = pair.split(kv_delim, 1)
62
+ if len(result) == 2:
63
+ yield f"{col}_{result[0]}"
64
+ elif errors_arg == "raise":
65
+ raise ValueError(f"Malformed data {pair} in column '{col}'.")
66
+
67
+ all_keys = df.mf.flatmap(
68
+ get_keys,
69
+ dtypes=pd.Series([str], index=["keys_cols"]),
70
+ cols=self.columns,
71
+ kv_delim=self.kv_delim,
72
+ item_delim=self.item_delim,
73
+ )
74
+ self.agg_results = all_keys.drop_duplicates().sort_values(by="keys_cols")
75
+ inputs = [df]
76
+ inputs.append(self.agg_results)
77
+ return self.new_dataframe(
78
+ inputs,
79
+ shape=shape,
80
+ dtypes=None,
81
+ index_value=df.index_value,
82
+ columns_value=None,
83
+ )
84
+
85
+
86
+ def extract_kv(
87
+ data,
88
+ columns=None,
89
+ kv_delim="=",
90
+ item_delim=",",
91
+ dtype="float",
92
+ fill_value=None,
93
+ errors="raise",
94
+ ):
95
+ """
96
+ Extract values in key-value represented columns into standalone columns.
97
+ New column names will be the name of the key-value column followed by
98
+ an underscore and the key.
99
+
100
+ Parameters
101
+ ----------
102
+ columns : list, default None
103
+ The key-value columns to be extracted.
104
+ kv_delim : str, default '='
105
+ Delimiter between key and value.
106
+ item_delim : str, default ','
107
+ Delimiter between key-value pairs.
108
+ dtype : str
109
+ Type of value columns to generate.
110
+ fill_value : object, default None
111
+ Default value for missing key-value pairs.
112
+ errors : {'ignore', 'raise'}, default 'raise'
113
+ * If 'raise', then invalid parsing will raise an exception.
114
+ * If 'ignore', then invalid parsing will return the input.
115
+
116
+ Returns
117
+ -------
118
+ DataFrame
119
+ extracted data frame
120
+
121
+ See Also
122
+ --------
123
+ DataFrame.mf.collect_kv
124
+
125
+ Examples
126
+ --------
127
+ >>> import numpy as np
128
+ >>> import maxframe.dataframe as md
129
+
130
+ >>> df = md.DataFrame({"name": ["name1", "name2", "name3", "name4", "name5"],
131
+ ... "kv": ["k1=1.0,k2=3.0,k5=10.0",
132
+ ... "k2=3.0,k3=5.1",
133
+ ... "k1=7.1,k7=8.2",
134
+ ... "k2=1.2,k3=1.5",
135
+ ... "k2=1.0,k9=1.1"]})
136
+ >>> df.execute()
137
+ name kv
138
+ 0 name1 k1=1.0,k2=3.0,k5=10.0
139
+ 1 name2 k2=3.0,k3=5.1
140
+ 2 name3 k1=7.1,k7=8.2
141
+ 3 name4 k2=1.2,k3=1.5
142
+ 4 name5 k2=1.0,k9=1.1
143
+
144
+ The field names to be expanded are specified by columns
145
+ kv_delim is to delimit the key and value and '=' is default
146
+ item_delim is to delimit the Key-Value pairs, ',' is default
147
+ The output field name is the original field name connect with the key by "_"
148
+ fill_value is used to fill missing values, None is default
149
+
150
+ >>> df.mf.extract_kv(columns=['kv'], kv_delim='=', item_delim=',').execute()
151
+ name kv_k1 kv_k2 kv_k3 kv_k5 kv_k7 kv_k9
152
+ 0 name1 1.0 3.0 NaN 10.0 NaN NaN
153
+ 1 name2 NaN 3.0 5.1 NaN NaN NaN
154
+ 2 name3 7.1 NaN NaN NaN 8.2 NaN
155
+ 3 name4 NaN 1.2 1.5 NaN NaN NaN
156
+ 4 name5 NaN 1.0 NaN NaN NaN 1.1
157
+ """
158
+ if columns is None:
159
+ columns = data.dtypes.index.tolist()
160
+ columns_list = make_column_list(columns, data.dtypes)
161
+ non_exist_key = next(
162
+ (c for c in columns_list if c not in data.dtypes.index), no_default
163
+ )
164
+ if non_exist_key is not no_default:
165
+ raise ValueError(f"Column {non_exist_key} specified is not a valid column.")
166
+ for col in columns_list:
167
+ if str(data.dtypes[col]) not in ("object", "string"):
168
+ raise ValueError(f"Column '{col}' must be of string type.")
169
+ op = DataFrameExtractKv(
170
+ columns=columns,
171
+ kv_delim=kv_delim,
172
+ item_delim=item_delim,
173
+ dtype=make_dtype(dtype),
174
+ fill_value=fill_value,
175
+ errors=errors,
176
+ )
177
+ return op(data)