maxframe 2.3.0__cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (1117) hide show
  1. maxframe/__init__.py +33 -0
  2. maxframe/_utils.cpython-312-x86_64-linux-gnu.so +0 -0
  3. maxframe/_utils.pxd +33 -0
  4. maxframe/_utils.pyi +21 -0
  5. maxframe/_utils.pyx +561 -0
  6. maxframe/codegen/__init__.py +27 -0
  7. maxframe/codegen/core.py +597 -0
  8. maxframe/codegen/spe/__init__.py +16 -0
  9. maxframe/codegen/spe/core.py +307 -0
  10. maxframe/codegen/spe/dataframe/__init__.py +38 -0
  11. maxframe/codegen/spe/dataframe/accessors/__init__.py +15 -0
  12. maxframe/codegen/spe/dataframe/accessors/base.py +71 -0
  13. maxframe/codegen/spe/dataframe/accessors/dict_.py +89 -0
  14. maxframe/codegen/spe/dataframe/accessors/list_.py +44 -0
  15. maxframe/codegen/spe/dataframe/accessors/struct_.py +28 -0
  16. maxframe/codegen/spe/dataframe/arithmetic.py +89 -0
  17. maxframe/codegen/spe/dataframe/datasource.py +181 -0
  18. maxframe/codegen/spe/dataframe/datastore.py +204 -0
  19. maxframe/codegen/spe/dataframe/extensions.py +63 -0
  20. maxframe/codegen/spe/dataframe/fetch.py +26 -0
  21. maxframe/codegen/spe/dataframe/groupby.py +312 -0
  22. maxframe/codegen/spe/dataframe/indexing.py +333 -0
  23. maxframe/codegen/spe/dataframe/merge.py +110 -0
  24. maxframe/codegen/spe/dataframe/misc.py +264 -0
  25. maxframe/codegen/spe/dataframe/missing.py +64 -0
  26. maxframe/codegen/spe/dataframe/reduction.py +183 -0
  27. maxframe/codegen/spe/dataframe/reshape.py +46 -0
  28. maxframe/codegen/spe/dataframe/sort.py +104 -0
  29. maxframe/codegen/spe/dataframe/statistics.py +46 -0
  30. maxframe/codegen/spe/dataframe/tests/__init__.py +13 -0
  31. maxframe/codegen/spe/dataframe/tests/accessors/__init__.py +13 -0
  32. maxframe/codegen/spe/dataframe/tests/accessors/test_base.py +33 -0
  33. maxframe/codegen/spe/dataframe/tests/accessors/test_dict.py +304 -0
  34. maxframe/codegen/spe/dataframe/tests/accessors/test_list.py +134 -0
  35. maxframe/codegen/spe/dataframe/tests/accessors/test_struct.py +75 -0
  36. maxframe/codegen/spe/dataframe/tests/indexing/__init__.py +13 -0
  37. maxframe/codegen/spe/dataframe/tests/indexing/conftest.py +58 -0
  38. maxframe/codegen/spe/dataframe/tests/indexing/test_getitem.py +124 -0
  39. maxframe/codegen/spe/dataframe/tests/indexing/test_iloc.py +95 -0
  40. maxframe/codegen/spe/dataframe/tests/indexing/test_indexing.py +39 -0
  41. maxframe/codegen/spe/dataframe/tests/indexing/test_loc.py +35 -0
  42. maxframe/codegen/spe/dataframe/tests/indexing/test_rename.py +51 -0
  43. maxframe/codegen/spe/dataframe/tests/indexing/test_reset_index.py +88 -0
  44. maxframe/codegen/spe/dataframe/tests/indexing/test_sample.py +45 -0
  45. maxframe/codegen/spe/dataframe/tests/indexing/test_set_axis.py +45 -0
  46. maxframe/codegen/spe/dataframe/tests/indexing/test_set_index.py +41 -0
  47. maxframe/codegen/spe/dataframe/tests/indexing/test_setitem.py +46 -0
  48. maxframe/codegen/spe/dataframe/tests/misc/__init__.py +13 -0
  49. maxframe/codegen/spe/dataframe/tests/misc/test_apply.py +133 -0
  50. maxframe/codegen/spe/dataframe/tests/misc/test_drop_duplicates.py +92 -0
  51. maxframe/codegen/spe/dataframe/tests/misc/test_misc.py +202 -0
  52. maxframe/codegen/spe/dataframe/tests/missing/__init__.py +13 -0
  53. maxframe/codegen/spe/dataframe/tests/missing/test_checkna.py +94 -0
  54. maxframe/codegen/spe/dataframe/tests/missing/test_dropna.py +50 -0
  55. maxframe/codegen/spe/dataframe/tests/missing/test_fillna.py +94 -0
  56. maxframe/codegen/spe/dataframe/tests/missing/test_replace.py +45 -0
  57. maxframe/codegen/spe/dataframe/tests/test_arithmetic.py +73 -0
  58. maxframe/codegen/spe/dataframe/tests/test_datasource.py +184 -0
  59. maxframe/codegen/spe/dataframe/tests/test_datastore.py +200 -0
  60. maxframe/codegen/spe/dataframe/tests/test_extensions.py +88 -0
  61. maxframe/codegen/spe/dataframe/tests/test_groupby.py +288 -0
  62. maxframe/codegen/spe/dataframe/tests/test_merge.py +426 -0
  63. maxframe/codegen/spe/dataframe/tests/test_reduction.py +117 -0
  64. maxframe/codegen/spe/dataframe/tests/test_reshape.py +79 -0
  65. maxframe/codegen/spe/dataframe/tests/test_sort.py +179 -0
  66. maxframe/codegen/spe/dataframe/tests/test_statistics.py +70 -0
  67. maxframe/codegen/spe/dataframe/tests/test_tseries.py +29 -0
  68. maxframe/codegen/spe/dataframe/tests/test_value_counts.py +60 -0
  69. maxframe/codegen/spe/dataframe/tests/test_window.py +69 -0
  70. maxframe/codegen/spe/dataframe/tseries.py +55 -0
  71. maxframe/codegen/spe/dataframe/udf.py +62 -0
  72. maxframe/codegen/spe/dataframe/value_counts.py +31 -0
  73. maxframe/codegen/spe/dataframe/window.py +65 -0
  74. maxframe/codegen/spe/learn/__init__.py +15 -0
  75. maxframe/codegen/spe/learn/contrib/__init__.py +15 -0
  76. maxframe/codegen/spe/learn/contrib/lightgbm.py +161 -0
  77. maxframe/codegen/spe/learn/contrib/models.py +41 -0
  78. maxframe/codegen/spe/learn/contrib/pytorch.py +49 -0
  79. maxframe/codegen/spe/learn/contrib/tests/__init__.py +13 -0
  80. maxframe/codegen/spe/learn/contrib/tests/test_lightgbm.py +123 -0
  81. maxframe/codegen/spe/learn/contrib/tests/test_models.py +41 -0
  82. maxframe/codegen/spe/learn/contrib/tests/test_pytorch.py +53 -0
  83. maxframe/codegen/spe/learn/contrib/tests/test_xgboost.py +99 -0
  84. maxframe/codegen/spe/learn/contrib/xgboost.py +152 -0
  85. maxframe/codegen/spe/learn/metrics/__init__.py +15 -0
  86. maxframe/codegen/spe/learn/metrics/_classification.py +120 -0
  87. maxframe/codegen/spe/learn/metrics/_ranking.py +76 -0
  88. maxframe/codegen/spe/learn/metrics/pairwise.py +51 -0
  89. maxframe/codegen/spe/learn/metrics/tests/__init__.py +13 -0
  90. maxframe/codegen/spe/learn/metrics/tests/test_classification.py +93 -0
  91. maxframe/codegen/spe/learn/metrics/tests/test_pairwise.py +36 -0
  92. maxframe/codegen/spe/learn/metrics/tests/test_ranking.py +59 -0
  93. maxframe/codegen/spe/learn/model_selection/__init__.py +13 -0
  94. maxframe/codegen/spe/learn/model_selection/tests/__init__.py +13 -0
  95. maxframe/codegen/spe/learn/model_selection/tests/test_split.py +41 -0
  96. maxframe/codegen/spe/learn/preprocessing/__init__.py +15 -0
  97. maxframe/codegen/spe/learn/preprocessing/_data.py +37 -0
  98. maxframe/codegen/spe/learn/preprocessing/_label.py +47 -0
  99. maxframe/codegen/spe/learn/preprocessing/tests/__init__.py +13 -0
  100. maxframe/codegen/spe/learn/preprocessing/tests/test_data.py +31 -0
  101. maxframe/codegen/spe/learn/preprocessing/tests/test_label.py +43 -0
  102. maxframe/codegen/spe/learn/utils/__init__.py +15 -0
  103. maxframe/codegen/spe/learn/utils/checks.py +55 -0
  104. maxframe/codegen/spe/learn/utils/multiclass.py +60 -0
  105. maxframe/codegen/spe/learn/utils/shuffle.py +85 -0
  106. maxframe/codegen/spe/learn/utils/sparsefuncs.py +35 -0
  107. maxframe/codegen/spe/learn/utils/tests/__init__.py +13 -0
  108. maxframe/codegen/spe/learn/utils/tests/test_checks.py +48 -0
  109. maxframe/codegen/spe/learn/utils/tests/test_multiclass.py +52 -0
  110. maxframe/codegen/spe/learn/utils/tests/test_shuffle.py +50 -0
  111. maxframe/codegen/spe/learn/utils/tests/test_sparsefuncs.py +34 -0
  112. maxframe/codegen/spe/learn/utils/tests/test_validation.py +44 -0
  113. maxframe/codegen/spe/learn/utils/validation.py +35 -0
  114. maxframe/codegen/spe/objects.py +26 -0
  115. maxframe/codegen/spe/remote.py +29 -0
  116. maxframe/codegen/spe/tensor/__init__.py +31 -0
  117. maxframe/codegen/spe/tensor/arithmetic.py +95 -0
  118. maxframe/codegen/spe/tensor/core.py +41 -0
  119. maxframe/codegen/spe/tensor/datasource.py +166 -0
  120. maxframe/codegen/spe/tensor/extensions.py +35 -0
  121. maxframe/codegen/spe/tensor/fetch.py +26 -0
  122. maxframe/codegen/spe/tensor/fft.py +74 -0
  123. maxframe/codegen/spe/tensor/indexing.py +63 -0
  124. maxframe/codegen/spe/tensor/linalg.py +90 -0
  125. maxframe/codegen/spe/tensor/merge.py +31 -0
  126. maxframe/codegen/spe/tensor/misc.py +175 -0
  127. maxframe/codegen/spe/tensor/random.py +29 -0
  128. maxframe/codegen/spe/tensor/reduction.py +39 -0
  129. maxframe/codegen/spe/tensor/reshape.py +26 -0
  130. maxframe/codegen/spe/tensor/sort.py +42 -0
  131. maxframe/codegen/spe/tensor/spatial.py +45 -0
  132. maxframe/codegen/spe/tensor/special.py +35 -0
  133. maxframe/codegen/spe/tensor/statistics.py +68 -0
  134. maxframe/codegen/spe/tensor/tests/__init__.py +13 -0
  135. maxframe/codegen/spe/tensor/tests/test_arithmetic.py +103 -0
  136. maxframe/codegen/spe/tensor/tests/test_datasource.py +99 -0
  137. maxframe/codegen/spe/tensor/tests/test_extensions.py +37 -0
  138. maxframe/codegen/spe/tensor/tests/test_fft.py +64 -0
  139. maxframe/codegen/spe/tensor/tests/test_indexing.py +44 -0
  140. maxframe/codegen/spe/tensor/tests/test_linalg.py +52 -0
  141. maxframe/codegen/spe/tensor/tests/test_merge.py +28 -0
  142. maxframe/codegen/spe/tensor/tests/test_misc.py +144 -0
  143. maxframe/codegen/spe/tensor/tests/test_random.py +55 -0
  144. maxframe/codegen/spe/tensor/tests/test_reduction.py +65 -0
  145. maxframe/codegen/spe/tensor/tests/test_reshape.py +39 -0
  146. maxframe/codegen/spe/tensor/tests/test_sort.py +49 -0
  147. maxframe/codegen/spe/tensor/tests/test_spatial.py +33 -0
  148. maxframe/codegen/spe/tensor/tests/test_special.py +28 -0
  149. maxframe/codegen/spe/tensor/tests/test_statistics.py +43 -0
  150. maxframe/codegen/spe/tests/__init__.py +13 -0
  151. maxframe/codegen/spe/tests/test_remote.py +29 -0
  152. maxframe/codegen/spe/tests/test_spe_codegen.py +135 -0
  153. maxframe/codegen/spe/utils.py +56 -0
  154. maxframe/codegen/tests/__init__.py +13 -0
  155. maxframe/codegen/tests/test_codegen.py +67 -0
  156. maxframe/config/__init__.py +15 -0
  157. maxframe/config/config.py +630 -0
  158. maxframe/config/tests/__init__.py +13 -0
  159. maxframe/config/tests/test_config.py +114 -0
  160. maxframe/config/tests/test_validators.py +46 -0
  161. maxframe/config/validators.py +142 -0
  162. maxframe/conftest.py +261 -0
  163. maxframe/core/__init__.py +53 -0
  164. maxframe/core/accessor.py +45 -0
  165. maxframe/core/base.py +157 -0
  166. maxframe/core/context.py +110 -0
  167. maxframe/core/entity/__init__.py +34 -0
  168. maxframe/core/entity/core.py +150 -0
  169. maxframe/core/entity/executable.py +337 -0
  170. maxframe/core/entity/objects.py +115 -0
  171. maxframe/core/entity/output_types.py +98 -0
  172. maxframe/core/entity/tests/__init__.py +13 -0
  173. maxframe/core/entity/tests/test_objects.py +42 -0
  174. maxframe/core/entity/tileables.py +369 -0
  175. maxframe/core/entity/utils.py +39 -0
  176. maxframe/core/graph/__init__.py +22 -0
  177. maxframe/core/graph/builder/__init__.py +15 -0
  178. maxframe/core/graph/builder/base.py +91 -0
  179. maxframe/core/graph/builder/tileable.py +34 -0
  180. maxframe/core/graph/builder/utils.py +37 -0
  181. maxframe/core/graph/core.cpython-312-x86_64-linux-gnu.so +0 -0
  182. maxframe/core/graph/core.pyx +478 -0
  183. maxframe/core/graph/entity.py +164 -0
  184. maxframe/core/graph/tests/__init__.py +13 -0
  185. maxframe/core/graph/tests/test_graph.py +205 -0
  186. maxframe/core/mode.py +101 -0
  187. maxframe/core/operator/__init__.py +32 -0
  188. maxframe/core/operator/base.py +480 -0
  189. maxframe/core/operator/core.py +307 -0
  190. maxframe/core/operator/fetch.py +40 -0
  191. maxframe/core/operator/objects.py +43 -0
  192. maxframe/core/operator/shuffle.py +45 -0
  193. maxframe/core/operator/tests/__init__.py +13 -0
  194. maxframe/core/operator/tests/test_core.py +64 -0
  195. maxframe/core/operator/utils.py +68 -0
  196. maxframe/core/tests/__init__.py +13 -0
  197. maxframe/core/tests/test_mode.py +75 -0
  198. maxframe/dataframe/__init__.py +89 -0
  199. maxframe/dataframe/accessors/__init__.py +15 -0
  200. maxframe/dataframe/accessors/compat.py +45 -0
  201. maxframe/dataframe/accessors/datetime_/__init__.py +35 -0
  202. maxframe/dataframe/accessors/datetime_/accessor.py +67 -0
  203. maxframe/dataframe/accessors/datetime_/core.py +82 -0
  204. maxframe/dataframe/accessors/datetime_/tests/__init__.py +13 -0
  205. maxframe/dataframe/accessors/datetime_/tests/test_datetime_accessor.py +41 -0
  206. maxframe/dataframe/accessors/dict_/__init__.py +43 -0
  207. maxframe/dataframe/accessors/dict_/accessor.py +39 -0
  208. maxframe/dataframe/accessors/dict_/contains.py +72 -0
  209. maxframe/dataframe/accessors/dict_/core.py +48 -0
  210. maxframe/dataframe/accessors/dict_/getitem.py +140 -0
  211. maxframe/dataframe/accessors/dict_/length.py +64 -0
  212. maxframe/dataframe/accessors/dict_/remove.py +75 -0
  213. maxframe/dataframe/accessors/dict_/setitem.py +79 -0
  214. maxframe/dataframe/accessors/dict_/tests/__init__.py +13 -0
  215. maxframe/dataframe/accessors/dict_/tests/test_dict_accessor.py +168 -0
  216. maxframe/dataframe/accessors/list_/__init__.py +37 -0
  217. maxframe/dataframe/accessors/list_/accessor.py +39 -0
  218. maxframe/dataframe/accessors/list_/core.py +48 -0
  219. maxframe/dataframe/accessors/list_/getitem.py +128 -0
  220. maxframe/dataframe/accessors/list_/length.py +64 -0
  221. maxframe/dataframe/accessors/list_/tests/__init__.py +13 -0
  222. maxframe/dataframe/accessors/list_/tests/test_list_accessor.py +81 -0
  223. maxframe/dataframe/accessors/plotting/__init__.py +40 -0
  224. maxframe/dataframe/accessors/plotting/core.py +78 -0
  225. maxframe/dataframe/accessors/plotting/tests/__init__.py +13 -0
  226. maxframe/dataframe/accessors/plotting/tests/test_plotting_accessor.py +136 -0
  227. maxframe/dataframe/accessors/string_/__init__.py +36 -0
  228. maxframe/dataframe/accessors/string_/accessor.py +215 -0
  229. maxframe/dataframe/accessors/string_/core.py +224 -0
  230. maxframe/dataframe/accessors/string_/tests/__init__.py +13 -0
  231. maxframe/dataframe/accessors/string_/tests/test_string_accessor.py +73 -0
  232. maxframe/dataframe/accessors/struct_/__init__.py +37 -0
  233. maxframe/dataframe/accessors/struct_/accessor.py +39 -0
  234. maxframe/dataframe/accessors/struct_/core.py +43 -0
  235. maxframe/dataframe/accessors/struct_/dtypes.py +53 -0
  236. maxframe/dataframe/accessors/struct_/field.py +123 -0
  237. maxframe/dataframe/accessors/struct_/tests/__init__.py +13 -0
  238. maxframe/dataframe/accessors/struct_/tests/test_struct_accessor.py +91 -0
  239. maxframe/dataframe/arithmetic/__init__.py +373 -0
  240. maxframe/dataframe/arithmetic/abs.py +33 -0
  241. maxframe/dataframe/arithmetic/add.py +60 -0
  242. maxframe/dataframe/arithmetic/arccos.py +28 -0
  243. maxframe/dataframe/arithmetic/arccosh.py +28 -0
  244. maxframe/dataframe/arithmetic/arcsin.py +28 -0
  245. maxframe/dataframe/arithmetic/arcsinh.py +28 -0
  246. maxframe/dataframe/arithmetic/arctan.py +28 -0
  247. maxframe/dataframe/arithmetic/arctanh.py +28 -0
  248. maxframe/dataframe/arithmetic/between.py +106 -0
  249. maxframe/dataframe/arithmetic/bitwise_and.py +46 -0
  250. maxframe/dataframe/arithmetic/bitwise_or.py +50 -0
  251. maxframe/dataframe/arithmetic/bitwise_xor.py +46 -0
  252. maxframe/dataframe/arithmetic/ceil.py +28 -0
  253. maxframe/dataframe/arithmetic/core.py +361 -0
  254. maxframe/dataframe/arithmetic/cos.py +28 -0
  255. maxframe/dataframe/arithmetic/cosh.py +28 -0
  256. maxframe/dataframe/arithmetic/degrees.py +28 -0
  257. maxframe/dataframe/arithmetic/docstring.py +416 -0
  258. maxframe/dataframe/arithmetic/dot.py +237 -0
  259. maxframe/dataframe/arithmetic/equal.py +58 -0
  260. maxframe/dataframe/arithmetic/exp.py +28 -0
  261. maxframe/dataframe/arithmetic/exp2.py +28 -0
  262. maxframe/dataframe/arithmetic/expm1.py +28 -0
  263. maxframe/dataframe/arithmetic/floor.py +28 -0
  264. maxframe/dataframe/arithmetic/floordiv.py +64 -0
  265. maxframe/dataframe/arithmetic/greater.py +59 -0
  266. maxframe/dataframe/arithmetic/greater_equal.py +59 -0
  267. maxframe/dataframe/arithmetic/invert.py +33 -0
  268. maxframe/dataframe/arithmetic/is_ufuncs.py +62 -0
  269. maxframe/dataframe/arithmetic/less.py +57 -0
  270. maxframe/dataframe/arithmetic/less_equal.py +59 -0
  271. maxframe/dataframe/arithmetic/log.py +28 -0
  272. maxframe/dataframe/arithmetic/log10.py +28 -0
  273. maxframe/dataframe/arithmetic/log2.py +28 -0
  274. maxframe/dataframe/arithmetic/maximum.py +33 -0
  275. maxframe/dataframe/arithmetic/minimum.py +33 -0
  276. maxframe/dataframe/arithmetic/mod.py +60 -0
  277. maxframe/dataframe/arithmetic/multiply.py +60 -0
  278. maxframe/dataframe/arithmetic/negative.py +33 -0
  279. maxframe/dataframe/arithmetic/not_equal.py +58 -0
  280. maxframe/dataframe/arithmetic/power.py +68 -0
  281. maxframe/dataframe/arithmetic/radians.py +28 -0
  282. maxframe/dataframe/arithmetic/round.py +144 -0
  283. maxframe/dataframe/arithmetic/sin.py +28 -0
  284. maxframe/dataframe/arithmetic/sinh.py +28 -0
  285. maxframe/dataframe/arithmetic/sqrt.py +28 -0
  286. maxframe/dataframe/arithmetic/subtract.py +64 -0
  287. maxframe/dataframe/arithmetic/tan.py +28 -0
  288. maxframe/dataframe/arithmetic/tanh.py +28 -0
  289. maxframe/dataframe/arithmetic/tests/__init__.py +13 -0
  290. maxframe/dataframe/arithmetic/tests/test_arithmetic.py +724 -0
  291. maxframe/dataframe/arithmetic/truediv.py +64 -0
  292. maxframe/dataframe/arithmetic/trunc.py +28 -0
  293. maxframe/dataframe/core.py +2385 -0
  294. maxframe/dataframe/datasource/__init__.py +33 -0
  295. maxframe/dataframe/datasource/core.py +94 -0
  296. maxframe/dataframe/datasource/dataframe.py +59 -0
  297. maxframe/dataframe/datasource/date_range.py +512 -0
  298. maxframe/dataframe/datasource/direct.py +57 -0
  299. maxframe/dataframe/datasource/from_dict.py +124 -0
  300. maxframe/dataframe/datasource/from_index.py +58 -0
  301. maxframe/dataframe/datasource/from_records.py +191 -0
  302. maxframe/dataframe/datasource/from_tensor.py +498 -0
  303. maxframe/dataframe/datasource/index.py +117 -0
  304. maxframe/dataframe/datasource/read_csv.py +541 -0
  305. maxframe/dataframe/datasource/read_odps_query.py +536 -0
  306. maxframe/dataframe/datasource/read_odps_table.py +295 -0
  307. maxframe/dataframe/datasource/read_parquet.py +425 -0
  308. maxframe/dataframe/datasource/series.py +55 -0
  309. maxframe/dataframe/datasource/tests/__init__.py +13 -0
  310. maxframe/dataframe/datasource/tests/test_datasource.py +663 -0
  311. maxframe/dataframe/datastore/__init__.py +36 -0
  312. maxframe/dataframe/datastore/core.py +19 -0
  313. maxframe/dataframe/datastore/direct.py +268 -0
  314. maxframe/dataframe/datastore/tests/__init__.py +13 -0
  315. maxframe/dataframe/datastore/tests/test_to_odps.py +99 -0
  316. maxframe/dataframe/datastore/to_csv.py +219 -0
  317. maxframe/dataframe/datastore/to_odps.py +264 -0
  318. maxframe/dataframe/extensions/__init__.py +70 -0
  319. maxframe/dataframe/extensions/accessor.py +35 -0
  320. maxframe/dataframe/extensions/apply_chunk.py +733 -0
  321. maxframe/dataframe/extensions/cartesian_chunk.py +153 -0
  322. maxframe/dataframe/extensions/collect_kv.py +126 -0
  323. maxframe/dataframe/extensions/extract_kv.py +177 -0
  324. maxframe/dataframe/extensions/flatjson.py +133 -0
  325. maxframe/dataframe/extensions/flatmap.py +329 -0
  326. maxframe/dataframe/extensions/map_reduce.py +263 -0
  327. maxframe/dataframe/extensions/rebalance.py +62 -0
  328. maxframe/dataframe/extensions/reshuffle.py +83 -0
  329. maxframe/dataframe/extensions/tests/__init__.py +13 -0
  330. maxframe/dataframe/extensions/tests/test_apply_chunk.py +194 -0
  331. maxframe/dataframe/extensions/tests/test_extensions.py +198 -0
  332. maxframe/dataframe/extensions/tests/test_map_reduce.py +135 -0
  333. maxframe/dataframe/fetch/__init__.py +15 -0
  334. maxframe/dataframe/fetch/core.py +97 -0
  335. maxframe/dataframe/groupby/__init__.py +105 -0
  336. maxframe/dataframe/groupby/aggregation.py +441 -0
  337. maxframe/dataframe/groupby/apply.py +235 -0
  338. maxframe/dataframe/groupby/apply_chunk.py +407 -0
  339. maxframe/dataframe/groupby/core.py +342 -0
  340. maxframe/dataframe/groupby/cum.py +102 -0
  341. maxframe/dataframe/groupby/expanding.py +264 -0
  342. maxframe/dataframe/groupby/extensions.py +26 -0
  343. maxframe/dataframe/groupby/fill.py +149 -0
  344. maxframe/dataframe/groupby/getitem.py +105 -0
  345. maxframe/dataframe/groupby/head.py +115 -0
  346. maxframe/dataframe/groupby/rank.py +136 -0
  347. maxframe/dataframe/groupby/rolling.py +206 -0
  348. maxframe/dataframe/groupby/sample.py +214 -0
  349. maxframe/dataframe/groupby/shift.py +114 -0
  350. maxframe/dataframe/groupby/tests/__init__.py +13 -0
  351. maxframe/dataframe/groupby/tests/test_groupby.py +373 -0
  352. maxframe/dataframe/groupby/transform.py +264 -0
  353. maxframe/dataframe/indexing/__init__.py +104 -0
  354. maxframe/dataframe/indexing/add_prefix_suffix.py +110 -0
  355. maxframe/dataframe/indexing/align.py +350 -0
  356. maxframe/dataframe/indexing/at.py +83 -0
  357. maxframe/dataframe/indexing/droplevel.py +195 -0
  358. maxframe/dataframe/indexing/filter.py +169 -0
  359. maxframe/dataframe/indexing/get_level_values.py +76 -0
  360. maxframe/dataframe/indexing/getitem.py +205 -0
  361. maxframe/dataframe/indexing/iat.py +82 -0
  362. maxframe/dataframe/indexing/iloc.py +711 -0
  363. maxframe/dataframe/indexing/insert.py +118 -0
  364. maxframe/dataframe/indexing/loc.py +694 -0
  365. maxframe/dataframe/indexing/reindex.py +541 -0
  366. maxframe/dataframe/indexing/rename.py +445 -0
  367. maxframe/dataframe/indexing/rename_axis.py +217 -0
  368. maxframe/dataframe/indexing/reorder_levels.py +143 -0
  369. maxframe/dataframe/indexing/reset_index.py +427 -0
  370. maxframe/dataframe/indexing/sample.py +232 -0
  371. maxframe/dataframe/indexing/set_axis.py +197 -0
  372. maxframe/dataframe/indexing/set_index.py +128 -0
  373. maxframe/dataframe/indexing/setitem.py +133 -0
  374. maxframe/dataframe/indexing/swaplevel.py +185 -0
  375. maxframe/dataframe/indexing/take.py +99 -0
  376. maxframe/dataframe/indexing/tests/__init__.py +13 -0
  377. maxframe/dataframe/indexing/tests/test_indexing.py +488 -0
  378. maxframe/dataframe/indexing/truncate.py +140 -0
  379. maxframe/dataframe/indexing/where.py +300 -0
  380. maxframe/dataframe/indexing/xs.py +148 -0
  381. maxframe/dataframe/initializer.py +298 -0
  382. maxframe/dataframe/merge/__init__.py +53 -0
  383. maxframe/dataframe/merge/append.py +120 -0
  384. maxframe/dataframe/merge/combine.py +244 -0
  385. maxframe/dataframe/merge/combine_first.py +120 -0
  386. maxframe/dataframe/merge/compare.py +387 -0
  387. maxframe/dataframe/merge/concat.py +500 -0
  388. maxframe/dataframe/merge/merge.py +806 -0
  389. maxframe/dataframe/merge/tests/__init__.py +13 -0
  390. maxframe/dataframe/merge/tests/test_merge.py +390 -0
  391. maxframe/dataframe/merge/update.py +271 -0
  392. maxframe/dataframe/misc/__init__.py +142 -0
  393. maxframe/dataframe/misc/_duplicate.py +56 -0
  394. maxframe/dataframe/misc/apply.py +730 -0
  395. maxframe/dataframe/misc/astype.py +237 -0
  396. maxframe/dataframe/misc/case_when.py +145 -0
  397. maxframe/dataframe/misc/check_monotonic.py +84 -0
  398. maxframe/dataframe/misc/check_unique.py +82 -0
  399. maxframe/dataframe/misc/clip.py +145 -0
  400. maxframe/dataframe/misc/cut.py +386 -0
  401. maxframe/dataframe/misc/describe.py +278 -0
  402. maxframe/dataframe/misc/diff.py +210 -0
  403. maxframe/dataframe/misc/drop.py +473 -0
  404. maxframe/dataframe/misc/drop_duplicates.py +251 -0
  405. maxframe/dataframe/misc/duplicated.py +292 -0
  406. maxframe/dataframe/misc/eval.py +730 -0
  407. maxframe/dataframe/misc/explode.py +171 -0
  408. maxframe/dataframe/misc/get_dummies.py +241 -0
  409. maxframe/dataframe/misc/infer_dtypes.py +251 -0
  410. maxframe/dataframe/misc/isin.py +220 -0
  411. maxframe/dataframe/misc/map.py +360 -0
  412. maxframe/dataframe/misc/memory_usage.py +248 -0
  413. maxframe/dataframe/misc/pct_change.py +68 -0
  414. maxframe/dataframe/misc/qcut.py +104 -0
  415. maxframe/dataframe/misc/rechunk.py +59 -0
  416. maxframe/dataframe/misc/repeat.py +159 -0
  417. maxframe/dataframe/misc/select_dtypes.py +104 -0
  418. maxframe/dataframe/misc/shift.py +259 -0
  419. maxframe/dataframe/misc/tests/__init__.py +13 -0
  420. maxframe/dataframe/misc/tests/test_misc.py +649 -0
  421. maxframe/dataframe/misc/to_numeric.py +181 -0
  422. maxframe/dataframe/misc/transform.py +346 -0
  423. maxframe/dataframe/misc/transpose.py +148 -0
  424. maxframe/dataframe/misc/valid_index.py +115 -0
  425. maxframe/dataframe/misc/value_counts.py +206 -0
  426. maxframe/dataframe/missing/__init__.py +53 -0
  427. maxframe/dataframe/missing/checkna.py +231 -0
  428. maxframe/dataframe/missing/dropna.py +294 -0
  429. maxframe/dataframe/missing/fillna.py +283 -0
  430. maxframe/dataframe/missing/replace.py +446 -0
  431. maxframe/dataframe/missing/tests/__init__.py +13 -0
  432. maxframe/dataframe/missing/tests/test_missing.py +90 -0
  433. maxframe/dataframe/operators.py +231 -0
  434. maxframe/dataframe/reduction/__init__.py +129 -0
  435. maxframe/dataframe/reduction/aggregation.py +499 -0
  436. maxframe/dataframe/reduction/all.py +78 -0
  437. maxframe/dataframe/reduction/any.py +78 -0
  438. maxframe/dataframe/reduction/argmax.py +103 -0
  439. maxframe/dataframe/reduction/argmin.py +103 -0
  440. maxframe/dataframe/reduction/core.py +907 -0
  441. maxframe/dataframe/reduction/count.py +63 -0
  442. maxframe/dataframe/reduction/cov.py +166 -0
  443. maxframe/dataframe/reduction/cummax.py +30 -0
  444. maxframe/dataframe/reduction/cummin.py +30 -0
  445. maxframe/dataframe/reduction/cumprod.py +30 -0
  446. maxframe/dataframe/reduction/cumsum.py +30 -0
  447. maxframe/dataframe/reduction/custom_reduction.py +42 -0
  448. maxframe/dataframe/reduction/idxmax.py +185 -0
  449. maxframe/dataframe/reduction/idxmin.py +185 -0
  450. maxframe/dataframe/reduction/kurtosis.py +111 -0
  451. maxframe/dataframe/reduction/max.py +65 -0
  452. maxframe/dataframe/reduction/mean.py +63 -0
  453. maxframe/dataframe/reduction/median.py +56 -0
  454. maxframe/dataframe/reduction/min.py +65 -0
  455. maxframe/dataframe/reduction/mode.py +144 -0
  456. maxframe/dataframe/reduction/nunique.py +149 -0
  457. maxframe/dataframe/reduction/prod.py +81 -0
  458. maxframe/dataframe/reduction/reduction_size.py +36 -0
  459. maxframe/dataframe/reduction/sem.py +73 -0
  460. maxframe/dataframe/reduction/skew.py +93 -0
  461. maxframe/dataframe/reduction/std.py +53 -0
  462. maxframe/dataframe/reduction/str_concat.py +51 -0
  463. maxframe/dataframe/reduction/sum.py +81 -0
  464. maxframe/dataframe/reduction/tests/__init__.py +13 -0
  465. maxframe/dataframe/reduction/tests/test_reduction.py +541 -0
  466. maxframe/dataframe/reduction/unique.py +153 -0
  467. maxframe/dataframe/reduction/var.py +76 -0
  468. maxframe/dataframe/reshape/__init__.py +38 -0
  469. maxframe/dataframe/reshape/melt.py +169 -0
  470. maxframe/dataframe/reshape/pivot.py +233 -0
  471. maxframe/dataframe/reshape/pivot_table.py +275 -0
  472. maxframe/dataframe/reshape/stack.py +240 -0
  473. maxframe/dataframe/reshape/unstack.py +114 -0
  474. maxframe/dataframe/sort/__init__.py +49 -0
  475. maxframe/dataframe/sort/argsort.py +68 -0
  476. maxframe/dataframe/sort/core.py +37 -0
  477. maxframe/dataframe/sort/nlargest.py +238 -0
  478. maxframe/dataframe/sort/nsmallest.py +228 -0
  479. maxframe/dataframe/sort/rank.py +147 -0
  480. maxframe/dataframe/sort/sort_index.py +153 -0
  481. maxframe/dataframe/sort/sort_values.py +301 -0
  482. maxframe/dataframe/sort/tests/__init__.py +13 -0
  483. maxframe/dataframe/sort/tests/test_sort.py +81 -0
  484. maxframe/dataframe/statistics/__init__.py +33 -0
  485. maxframe/dataframe/statistics/corr.py +284 -0
  486. maxframe/dataframe/statistics/quantile.py +338 -0
  487. maxframe/dataframe/statistics/tests/__init__.py +13 -0
  488. maxframe/dataframe/statistics/tests/test_statistics.py +82 -0
  489. maxframe/dataframe/tests/__init__.py +13 -0
  490. maxframe/dataframe/tests/test_initializer.py +60 -0
  491. maxframe/dataframe/tests/test_typing.py +119 -0
  492. maxframe/dataframe/tests/test_utils.py +165 -0
  493. maxframe/dataframe/tseries/__init__.py +32 -0
  494. maxframe/dataframe/tseries/at_time.py +61 -0
  495. maxframe/dataframe/tseries/between_time.py +122 -0
  496. maxframe/dataframe/tseries/tests/__init__.py +13 -0
  497. maxframe/dataframe/tseries/tests/test_tseries.py +30 -0
  498. maxframe/dataframe/tseries/to_datetime.py +299 -0
  499. maxframe/dataframe/typing_.py +196 -0
  500. maxframe/dataframe/ufunc/__init__.py +27 -0
  501. maxframe/dataframe/ufunc/tensor.py +54 -0
  502. maxframe/dataframe/ufunc/ufunc.py +53 -0
  503. maxframe/dataframe/utils.py +1651 -0
  504. maxframe/dataframe/window/__init__.py +29 -0
  505. maxframe/dataframe/window/aggregation.py +100 -0
  506. maxframe/dataframe/window/core.py +82 -0
  507. maxframe/dataframe/window/ewm.py +247 -0
  508. maxframe/dataframe/window/expanding.py +151 -0
  509. maxframe/dataframe/window/rolling.py +389 -0
  510. maxframe/dataframe/window/tests/__init__.py +13 -0
  511. maxframe/dataframe/window/tests/test_ewm.py +70 -0
  512. maxframe/dataframe/window/tests/test_expanding.py +60 -0
  513. maxframe/dataframe/window/tests/test_rolling.py +57 -0
  514. maxframe/env.py +37 -0
  515. maxframe/errors.py +47 -0
  516. maxframe/extension.py +107 -0
  517. maxframe/io/__init__.py +13 -0
  518. maxframe/io/objects/__init__.py +24 -0
  519. maxframe/io/objects/core.py +156 -0
  520. maxframe/io/objects/tensor.py +132 -0
  521. maxframe/io/objects/tests/__init__.py +13 -0
  522. maxframe/io/objects/tests/test_object_io.py +79 -0
  523. maxframe/io/odpsio/__init__.py +23 -0
  524. maxframe/io/odpsio/arrow.py +161 -0
  525. maxframe/io/odpsio/schema.py +496 -0
  526. maxframe/io/odpsio/tableio.py +727 -0
  527. maxframe/io/odpsio/tests/__init__.py +13 -0
  528. maxframe/io/odpsio/tests/test_arrow.py +132 -0
  529. maxframe/io/odpsio/tests/test_schema.py +580 -0
  530. maxframe/io/odpsio/tests/test_tableio.py +205 -0
  531. maxframe/io/odpsio/tests/test_volumeio.py +75 -0
  532. maxframe/io/odpsio/volumeio.py +102 -0
  533. maxframe/learn/__init__.py +25 -0
  534. maxframe/learn/cluster/__init__.py +15 -0
  535. maxframe/learn/cluster/_kmeans.py +782 -0
  536. maxframe/learn/contrib/__init__.py +17 -0
  537. maxframe/learn/contrib/graph/__init__.py +15 -0
  538. maxframe/learn/contrib/graph/connected_components.py +216 -0
  539. maxframe/learn/contrib/graph/tests/__init__.py +13 -0
  540. maxframe/learn/contrib/graph/tests/test_connected_components.py +53 -0
  541. maxframe/learn/contrib/lightgbm/__init__.py +33 -0
  542. maxframe/learn/contrib/lightgbm/_predict.py +138 -0
  543. maxframe/learn/contrib/lightgbm/_train.py +163 -0
  544. maxframe/learn/contrib/lightgbm/callback.py +114 -0
  545. maxframe/learn/contrib/lightgbm/classifier.py +199 -0
  546. maxframe/learn/contrib/lightgbm/core.py +372 -0
  547. maxframe/learn/contrib/lightgbm/dataset.py +153 -0
  548. maxframe/learn/contrib/lightgbm/regressor.py +29 -0
  549. maxframe/learn/contrib/lightgbm/tests/__init__.py +13 -0
  550. maxframe/learn/contrib/lightgbm/tests/test_callback.py +58 -0
  551. maxframe/learn/contrib/llm/__init__.py +17 -0
  552. maxframe/learn/contrib/llm/core.py +86 -0
  553. maxframe/learn/contrib/llm/deploy/__init__.py +13 -0
  554. maxframe/learn/contrib/llm/deploy/config.py +221 -0
  555. maxframe/learn/contrib/llm/deploy/core.py +247 -0
  556. maxframe/learn/contrib/llm/deploy/framework.py +35 -0
  557. maxframe/learn/contrib/llm/deploy/loader.py +360 -0
  558. maxframe/learn/contrib/llm/deploy/tests/__init__.py +13 -0
  559. maxframe/learn/contrib/llm/deploy/tests/test_register_models.py +359 -0
  560. maxframe/learn/contrib/llm/models/__init__.py +16 -0
  561. maxframe/learn/contrib/llm/models/dashscope.py +114 -0
  562. maxframe/learn/contrib/llm/models/managed.py +119 -0
  563. maxframe/learn/contrib/llm/models/openai.py +72 -0
  564. maxframe/learn/contrib/llm/multi_modal.py +135 -0
  565. maxframe/learn/contrib/llm/tests/__init__.py +13 -0
  566. maxframe/learn/contrib/llm/tests/test_core.py +34 -0
  567. maxframe/learn/contrib/llm/tests/test_openai.py +187 -0
  568. maxframe/learn/contrib/llm/tests/test_text_gen.py +155 -0
  569. maxframe/learn/contrib/llm/text.py +608 -0
  570. maxframe/learn/contrib/models.py +109 -0
  571. maxframe/learn/contrib/pytorch/__init__.py +16 -0
  572. maxframe/learn/contrib/pytorch/run_function.py +110 -0
  573. maxframe/learn/contrib/pytorch/run_script.py +102 -0
  574. maxframe/learn/contrib/pytorch/tests/__init__.py +13 -0
  575. maxframe/learn/contrib/pytorch/tests/test_pytorch.py +42 -0
  576. maxframe/learn/contrib/utils.py +108 -0
  577. maxframe/learn/contrib/xgboost/__init__.py +33 -0
  578. maxframe/learn/contrib/xgboost/callback.py +86 -0
  579. maxframe/learn/contrib/xgboost/classifier.py +119 -0
  580. maxframe/learn/contrib/xgboost/core.py +469 -0
  581. maxframe/learn/contrib/xgboost/dmatrix.py +157 -0
  582. maxframe/learn/contrib/xgboost/predict.py +133 -0
  583. maxframe/learn/contrib/xgboost/regressor.py +91 -0
  584. maxframe/learn/contrib/xgboost/tests/__init__.py +13 -0
  585. maxframe/learn/contrib/xgboost/tests/test_callback.py +41 -0
  586. maxframe/learn/contrib/xgboost/tests/test_core.py +43 -0
  587. maxframe/learn/contrib/xgboost/train.py +181 -0
  588. maxframe/learn/core.py +344 -0
  589. maxframe/learn/datasets/__init__.py +20 -0
  590. maxframe/learn/datasets/samples_generator.py +628 -0
  591. maxframe/learn/linear_model/__init__.py +15 -0
  592. maxframe/learn/linear_model/_base.py +220 -0
  593. maxframe/learn/linear_model/_lin_reg.py +175 -0
  594. maxframe/learn/metrics/__init__.py +31 -0
  595. maxframe/learn/metrics/_check_targets.py +95 -0
  596. maxframe/learn/metrics/_classification.py +1266 -0
  597. maxframe/learn/metrics/_ranking.py +477 -0
  598. maxframe/learn/metrics/_regression.py +256 -0
  599. maxframe/learn/metrics/_scorer.py +60 -0
  600. maxframe/learn/metrics/pairwise/__init__.py +21 -0
  601. maxframe/learn/metrics/pairwise/core.py +77 -0
  602. maxframe/learn/metrics/pairwise/cosine.py +115 -0
  603. maxframe/learn/metrics/pairwise/euclidean.py +176 -0
  604. maxframe/learn/metrics/pairwise/haversine.py +96 -0
  605. maxframe/learn/metrics/pairwise/manhattan.py +80 -0
  606. maxframe/learn/metrics/pairwise/pairwise.py +127 -0
  607. maxframe/learn/metrics/pairwise/pairwise_distances_topk.py +121 -0
  608. maxframe/learn/metrics/pairwise/rbf_kernel.py +51 -0
  609. maxframe/learn/metrics/tests/__init__.py +13 -0
  610. maxframe/learn/metrics/tests/test_scorer.py +26 -0
  611. maxframe/learn/model_selection/__init__.py +15 -0
  612. maxframe/learn/model_selection/_split.py +451 -0
  613. maxframe/learn/model_selection/tests/__init__.py +13 -0
  614. maxframe/learn/model_selection/tests/test_split.py +156 -0
  615. maxframe/learn/preprocessing/__init__.py +16 -0
  616. maxframe/learn/preprocessing/_data/__init__.py +17 -0
  617. maxframe/learn/preprocessing/_data/min_max_scaler.py +401 -0
  618. maxframe/learn/preprocessing/_data/normalize.py +127 -0
  619. maxframe/learn/preprocessing/_data/standard_scaler.py +512 -0
  620. maxframe/learn/preprocessing/_data/utils.py +79 -0
  621. maxframe/learn/preprocessing/_label/__init__.py +16 -0
  622. maxframe/learn/preprocessing/_label/_label_binarizer.py +599 -0
  623. maxframe/learn/preprocessing/_label/_label_encoder.py +174 -0
  624. maxframe/learn/utils/__init__.py +20 -0
  625. maxframe/learn/utils/_encode.py +314 -0
  626. maxframe/learn/utils/checks.py +160 -0
  627. maxframe/learn/utils/core.py +121 -0
  628. maxframe/learn/utils/extmath.py +246 -0
  629. maxframe/learn/utils/multiclass.py +292 -0
  630. maxframe/learn/utils/odpsio.py +262 -0
  631. maxframe/learn/utils/shuffle.py +114 -0
  632. maxframe/learn/utils/sparsefuncs.py +87 -0
  633. maxframe/learn/utils/validation.py +775 -0
  634. maxframe/lib/__init__.py +13 -0
  635. maxframe/lib/aio/__init__.py +27 -0
  636. maxframe/lib/aio/_runners.py +162 -0
  637. maxframe/lib/aio/_threads.py +35 -0
  638. maxframe/lib/aio/base.py +82 -0
  639. maxframe/lib/aio/file.py +85 -0
  640. maxframe/lib/aio/isolation.py +100 -0
  641. maxframe/lib/aio/lru.py +242 -0
  642. maxframe/lib/aio/parallelism.py +37 -0
  643. maxframe/lib/aio/tests/__init__.py +13 -0
  644. maxframe/lib/aio/tests/test_aio_file.py +55 -0
  645. maxframe/lib/compat.py +185 -0
  646. maxframe/lib/compression.py +55 -0
  647. maxframe/lib/cython/__init__.py +13 -0
  648. maxframe/lib/cython/libcpp.pxd +30 -0
  649. maxframe/lib/dtypes_extension/__init__.py +30 -0
  650. maxframe/lib/dtypes_extension/_fake_arrow_dtype.py +604 -0
  651. maxframe/lib/dtypes_extension/blob.py +304 -0
  652. maxframe/lib/dtypes_extension/dtypes.py +106 -0
  653. maxframe/lib/dtypes_extension/tests/__init__.py +13 -0
  654. maxframe/lib/dtypes_extension/tests/test_blob.py +88 -0
  655. maxframe/lib/dtypes_extension/tests/test_dtypes.py +63 -0
  656. maxframe/lib/dtypes_extension/tests/test_fake_arrow_dtype.py +75 -0
  657. maxframe/lib/filesystem/__init__.py +21 -0
  658. maxframe/lib/filesystem/_glob.py +173 -0
  659. maxframe/lib/filesystem/_oss_lib/__init__.py +13 -0
  660. maxframe/lib/filesystem/_oss_lib/common.py +272 -0
  661. maxframe/lib/filesystem/_oss_lib/glob.py +147 -0
  662. maxframe/lib/filesystem/_oss_lib/handle.py +152 -0
  663. maxframe/lib/filesystem/arrow.py +236 -0
  664. maxframe/lib/filesystem/base.py +263 -0
  665. maxframe/lib/filesystem/core.py +95 -0
  666. maxframe/lib/filesystem/fsmap.py +164 -0
  667. maxframe/lib/filesystem/hdfs.py +31 -0
  668. maxframe/lib/filesystem/local.py +112 -0
  669. maxframe/lib/filesystem/oss.py +226 -0
  670. maxframe/lib/filesystem/tests/__init__.py +13 -0
  671. maxframe/lib/filesystem/tests/test_filesystem.py +225 -0
  672. maxframe/lib/filesystem/tests/test_oss.py +220 -0
  673. maxframe/lib/functools_compat.py +81 -0
  674. maxframe/lib/mmh3.cpython-312-x86_64-linux-gnu.so +0 -0
  675. maxframe/lib/mmh3.pyi +43 -0
  676. maxframe/lib/mmh3_src/MurmurHash3.cpp +339 -0
  677. maxframe/lib/mmh3_src/MurmurHash3.h +43 -0
  678. maxframe/lib/mmh3_src/mmh3module.cpp +387 -0
  679. maxframe/lib/sparse/__init__.py +856 -0
  680. maxframe/lib/sparse/array.py +1616 -0
  681. maxframe/lib/sparse/core.py +90 -0
  682. maxframe/lib/sparse/linalg.py +31 -0
  683. maxframe/lib/sparse/matrix.py +244 -0
  684. maxframe/lib/sparse/tests/__init__.py +13 -0
  685. maxframe/lib/sparse/tests/test_sparse.py +476 -0
  686. maxframe/lib/sparse/vector.py +148 -0
  687. maxframe/lib/tblib/LICENSE +20 -0
  688. maxframe/lib/tblib/__init__.py +327 -0
  689. maxframe/lib/tblib/cpython.py +83 -0
  690. maxframe/lib/tblib/decorators.py +44 -0
  691. maxframe/lib/tblib/pickling_support.py +90 -0
  692. maxframe/lib/tests/__init__.py +13 -0
  693. maxframe/lib/tests/test_wrapped_pickle.py +51 -0
  694. maxframe/lib/version.py +620 -0
  695. maxframe/lib/wrapped_pickle.py +150 -0
  696. maxframe/mixin.py +157 -0
  697. maxframe/opcodes.py +657 -0
  698. maxframe/protocol.py +607 -0
  699. maxframe/remote/__init__.py +18 -0
  700. maxframe/remote/core.py +212 -0
  701. maxframe/remote/run_script.py +124 -0
  702. maxframe/serialization/__init__.py +39 -0
  703. maxframe/serialization/arrow.py +120 -0
  704. maxframe/serialization/blob.py +32 -0
  705. maxframe/serialization/core.cpython-312-x86_64-linux-gnu.so +0 -0
  706. maxframe/serialization/core.pxd +50 -0
  707. maxframe/serialization/core.pyi +66 -0
  708. maxframe/serialization/core.pyx +1265 -0
  709. maxframe/serialization/exception.py +84 -0
  710. maxframe/serialization/maxframe_objects.py +39 -0
  711. maxframe/serialization/numpy.py +110 -0
  712. maxframe/serialization/pandas.py +278 -0
  713. maxframe/serialization/scipy.py +71 -0
  714. maxframe/serialization/serializables/__init__.py +55 -0
  715. maxframe/serialization/serializables/core.py +469 -0
  716. maxframe/serialization/serializables/field.py +624 -0
  717. maxframe/serialization/serializables/field_type.py +592 -0
  718. maxframe/serialization/serializables/tests/__init__.py +13 -0
  719. maxframe/serialization/serializables/tests/test_field_type.py +119 -0
  720. maxframe/serialization/serializables/tests/test_serializable.py +313 -0
  721. maxframe/serialization/tests/__init__.py +13 -0
  722. maxframe/serialization/tests/test_serial.py +487 -0
  723. maxframe/session.py +1250 -0
  724. maxframe/sperunner.py +165 -0
  725. maxframe/tensor/__init__.py +325 -0
  726. maxframe/tensor/arithmetic/__init__.py +322 -0
  727. maxframe/tensor/arithmetic/abs.py +66 -0
  728. maxframe/tensor/arithmetic/absolute.py +66 -0
  729. maxframe/tensor/arithmetic/add.py +112 -0
  730. maxframe/tensor/arithmetic/angle.py +70 -0
  731. maxframe/tensor/arithmetic/arccos.py +101 -0
  732. maxframe/tensor/arithmetic/arccosh.py +89 -0
  733. maxframe/tensor/arithmetic/arcsin.py +92 -0
  734. maxframe/tensor/arithmetic/arcsinh.py +84 -0
  735. maxframe/tensor/arithmetic/arctan.py +104 -0
  736. maxframe/tensor/arithmetic/arctan2.py +126 -0
  737. maxframe/tensor/arithmetic/arctanh.py +84 -0
  738. maxframe/tensor/arithmetic/around.py +112 -0
  739. maxframe/tensor/arithmetic/bitand.py +93 -0
  740. maxframe/tensor/arithmetic/bitor.py +100 -0
  741. maxframe/tensor/arithmetic/bitxor.py +93 -0
  742. maxframe/tensor/arithmetic/cbrt.py +64 -0
  743. maxframe/tensor/arithmetic/ceil.py +69 -0
  744. maxframe/tensor/arithmetic/clip.py +165 -0
  745. maxframe/tensor/arithmetic/conj.py +72 -0
  746. maxframe/tensor/arithmetic/copysign.py +76 -0
  747. maxframe/tensor/arithmetic/core.py +552 -0
  748. maxframe/tensor/arithmetic/cos.py +83 -0
  749. maxframe/tensor/arithmetic/cosh.py +70 -0
  750. maxframe/tensor/arithmetic/deg2rad.py +70 -0
  751. maxframe/tensor/arithmetic/degrees.py +75 -0
  752. maxframe/tensor/arithmetic/divide.py +112 -0
  753. maxframe/tensor/arithmetic/equal.py +74 -0
  754. maxframe/tensor/arithmetic/exp.py +104 -0
  755. maxframe/tensor/arithmetic/exp2.py +65 -0
  756. maxframe/tensor/arithmetic/expm1.py +77 -0
  757. maxframe/tensor/arithmetic/fabs.py +72 -0
  758. maxframe/tensor/arithmetic/fix.py +67 -0
  759. maxframe/tensor/arithmetic/float_power.py +101 -0
  760. maxframe/tensor/arithmetic/floor.py +75 -0
  761. maxframe/tensor/arithmetic/floordiv.py +92 -0
  762. maxframe/tensor/arithmetic/fmax.py +103 -0
  763. maxframe/tensor/arithmetic/fmin.py +104 -0
  764. maxframe/tensor/arithmetic/fmod.py +97 -0
  765. maxframe/tensor/arithmetic/frexp.py +96 -0
  766. maxframe/tensor/arithmetic/greater.py +75 -0
  767. maxframe/tensor/arithmetic/greater_equal.py +67 -0
  768. maxframe/tensor/arithmetic/hypot.py +75 -0
  769. maxframe/tensor/arithmetic/i0.py +87 -0
  770. maxframe/tensor/arithmetic/imag.py +65 -0
  771. maxframe/tensor/arithmetic/invert.py +108 -0
  772. maxframe/tensor/arithmetic/isclose.py +114 -0
  773. maxframe/tensor/arithmetic/iscomplex.py +62 -0
  774. maxframe/tensor/arithmetic/iscomplexobj.py +53 -0
  775. maxframe/tensor/arithmetic/isfinite.py +104 -0
  776. maxframe/tensor/arithmetic/isinf.py +101 -0
  777. maxframe/tensor/arithmetic/isnan.py +80 -0
  778. maxframe/tensor/arithmetic/isreal.py +61 -0
  779. maxframe/tensor/arithmetic/ldexp.py +97 -0
  780. maxframe/tensor/arithmetic/less.py +67 -0
  781. maxframe/tensor/arithmetic/less_equal.py +67 -0
  782. maxframe/tensor/arithmetic/log.py +90 -0
  783. maxframe/tensor/arithmetic/log10.py +83 -0
  784. maxframe/tensor/arithmetic/log1p.py +93 -0
  785. maxframe/tensor/arithmetic/log2.py +83 -0
  786. maxframe/tensor/arithmetic/logaddexp.py +78 -0
  787. maxframe/tensor/arithmetic/logaddexp2.py +76 -0
  788. maxframe/tensor/arithmetic/logical_and.py +79 -0
  789. maxframe/tensor/arithmetic/logical_not.py +72 -0
  790. maxframe/tensor/arithmetic/logical_or.py +80 -0
  791. maxframe/tensor/arithmetic/logical_xor.py +86 -0
  792. maxframe/tensor/arithmetic/lshift.py +80 -0
  793. maxframe/tensor/arithmetic/maximum.py +106 -0
  794. maxframe/tensor/arithmetic/minimum.py +106 -0
  795. maxframe/tensor/arithmetic/mod.py +102 -0
  796. maxframe/tensor/arithmetic/modf.py +87 -0
  797. maxframe/tensor/arithmetic/multiply.py +114 -0
  798. maxframe/tensor/arithmetic/nan_to_num.py +97 -0
  799. maxframe/tensor/arithmetic/negative.py +63 -0
  800. maxframe/tensor/arithmetic/nextafter.py +66 -0
  801. maxframe/tensor/arithmetic/not_equal.py +70 -0
  802. maxframe/tensor/arithmetic/positive.py +45 -0
  803. maxframe/tensor/arithmetic/power.py +104 -0
  804. maxframe/tensor/arithmetic/rad2deg.py +69 -0
  805. maxframe/tensor/arithmetic/radians.py +75 -0
  806. maxframe/tensor/arithmetic/real.py +68 -0
  807. maxframe/tensor/arithmetic/reciprocal.py +78 -0
  808. maxframe/tensor/arithmetic/rint.py +66 -0
  809. maxframe/tensor/arithmetic/rshift.py +79 -0
  810. maxframe/tensor/arithmetic/setimag.py +27 -0
  811. maxframe/tensor/arithmetic/setreal.py +27 -0
  812. maxframe/tensor/arithmetic/sign.py +79 -0
  813. maxframe/tensor/arithmetic/signbit.py +63 -0
  814. maxframe/tensor/arithmetic/sin.py +96 -0
  815. maxframe/tensor/arithmetic/sinc.py +100 -0
  816. maxframe/tensor/arithmetic/sinh.py +91 -0
  817. maxframe/tensor/arithmetic/spacing.py +70 -0
  818. maxframe/tensor/arithmetic/sqrt.py +79 -0
  819. maxframe/tensor/arithmetic/square.py +67 -0
  820. maxframe/tensor/arithmetic/subtract.py +83 -0
  821. maxframe/tensor/arithmetic/tan.py +86 -0
  822. maxframe/tensor/arithmetic/tanh.py +90 -0
  823. maxframe/tensor/arithmetic/tests/__init__.py +13 -0
  824. maxframe/tensor/arithmetic/tests/test_arithmetic.py +449 -0
  825. maxframe/tensor/arithmetic/truediv.py +102 -0
  826. maxframe/tensor/arithmetic/trunc.py +70 -0
  827. maxframe/tensor/arithmetic/utils.py +91 -0
  828. maxframe/tensor/array_utils.py +164 -0
  829. maxframe/tensor/core.py +597 -0
  830. maxframe/tensor/datasource/__init__.py +40 -0
  831. maxframe/tensor/datasource/arange.py +154 -0
  832. maxframe/tensor/datasource/array.py +399 -0
  833. maxframe/tensor/datasource/core.py +114 -0
  834. maxframe/tensor/datasource/diag.py +140 -0
  835. maxframe/tensor/datasource/diagflat.py +69 -0
  836. maxframe/tensor/datasource/empty.py +167 -0
  837. maxframe/tensor/datasource/eye.py +95 -0
  838. maxframe/tensor/datasource/from_dataframe.py +68 -0
  839. maxframe/tensor/datasource/from_dense.py +37 -0
  840. maxframe/tensor/datasource/from_sparse.py +45 -0
  841. maxframe/tensor/datasource/full.py +184 -0
  842. maxframe/tensor/datasource/identity.py +54 -0
  843. maxframe/tensor/datasource/indices.py +115 -0
  844. maxframe/tensor/datasource/linspace.py +140 -0
  845. maxframe/tensor/datasource/meshgrid.py +135 -0
  846. maxframe/tensor/datasource/ones.py +178 -0
  847. maxframe/tensor/datasource/scalar.py +40 -0
  848. maxframe/tensor/datasource/tests/__init__.py +13 -0
  849. maxframe/tensor/datasource/tests/test_datasource.py +310 -0
  850. maxframe/tensor/datasource/tri_array.py +107 -0
  851. maxframe/tensor/datasource/zeros.py +192 -0
  852. maxframe/tensor/extensions/__init__.py +33 -0
  853. maxframe/tensor/extensions/accessor.py +25 -0
  854. maxframe/tensor/extensions/apply_chunk.py +137 -0
  855. maxframe/tensor/extensions/rebalance.py +65 -0
  856. maxframe/tensor/fetch/__init__.py +15 -0
  857. maxframe/tensor/fetch/core.py +54 -0
  858. maxframe/tensor/fft/__init__.py +32 -0
  859. maxframe/tensor/fft/core.py +168 -0
  860. maxframe/tensor/fft/fft.py +112 -0
  861. maxframe/tensor/fft/fft2.py +118 -0
  862. maxframe/tensor/fft/fftfreq.py +80 -0
  863. maxframe/tensor/fft/fftn.py +123 -0
  864. maxframe/tensor/fft/fftshift.py +79 -0
  865. maxframe/tensor/fft/hfft.py +112 -0
  866. maxframe/tensor/fft/ifft.py +114 -0
  867. maxframe/tensor/fft/ifft2.py +115 -0
  868. maxframe/tensor/fft/ifftn.py +123 -0
  869. maxframe/tensor/fft/ifftshift.py +73 -0
  870. maxframe/tensor/fft/ihfft.py +93 -0
  871. maxframe/tensor/fft/irfft.py +118 -0
  872. maxframe/tensor/fft/irfft2.py +62 -0
  873. maxframe/tensor/fft/irfftn.py +114 -0
  874. maxframe/tensor/fft/rfft.py +116 -0
  875. maxframe/tensor/fft/rfft2.py +63 -0
  876. maxframe/tensor/fft/rfftfreq.py +87 -0
  877. maxframe/tensor/fft/rfftn.py +113 -0
  878. maxframe/tensor/indexing/__init__.py +47 -0
  879. maxframe/tensor/indexing/choose.py +198 -0
  880. maxframe/tensor/indexing/compress.py +122 -0
  881. maxframe/tensor/indexing/core.py +190 -0
  882. maxframe/tensor/indexing/extract.py +69 -0
  883. maxframe/tensor/indexing/fill_diagonal.py +180 -0
  884. maxframe/tensor/indexing/flatnonzero.py +58 -0
  885. maxframe/tensor/indexing/getitem.py +144 -0
  886. maxframe/tensor/indexing/nonzero.py +118 -0
  887. maxframe/tensor/indexing/setitem.py +142 -0
  888. maxframe/tensor/indexing/slice.py +32 -0
  889. maxframe/tensor/indexing/take.py +128 -0
  890. maxframe/tensor/indexing/tests/__init__.py +13 -0
  891. maxframe/tensor/indexing/tests/test_indexing.py +232 -0
  892. maxframe/tensor/indexing/unravel_index.py +103 -0
  893. maxframe/tensor/lib/__init__.py +16 -0
  894. maxframe/tensor/lib/index_tricks.py +404 -0
  895. maxframe/tensor/linalg/__init__.py +43 -0
  896. maxframe/tensor/linalg/_einsumfunc.py +1025 -0
  897. maxframe/tensor/linalg/cholesky.py +117 -0
  898. maxframe/tensor/linalg/dot.py +145 -0
  899. maxframe/tensor/linalg/einsum.py +339 -0
  900. maxframe/tensor/linalg/inner.py +36 -0
  901. maxframe/tensor/linalg/inv.py +83 -0
  902. maxframe/tensor/linalg/lstsq.py +100 -0
  903. maxframe/tensor/linalg/lu.py +115 -0
  904. maxframe/tensor/linalg/matmul.py +225 -0
  905. maxframe/tensor/linalg/matrix_norm.py +75 -0
  906. maxframe/tensor/linalg/norm.py +249 -0
  907. maxframe/tensor/linalg/qr.py +124 -0
  908. maxframe/tensor/linalg/solve.py +72 -0
  909. maxframe/tensor/linalg/solve_triangular.py +103 -0
  910. maxframe/tensor/linalg/svd.py +167 -0
  911. maxframe/tensor/linalg/tensordot.py +213 -0
  912. maxframe/tensor/linalg/vdot.py +73 -0
  913. maxframe/tensor/linalg/vector_norm.py +113 -0
  914. maxframe/tensor/merge/__init__.py +21 -0
  915. maxframe/tensor/merge/append.py +74 -0
  916. maxframe/tensor/merge/column_stack.py +63 -0
  917. maxframe/tensor/merge/concatenate.py +103 -0
  918. maxframe/tensor/merge/dstack.py +71 -0
  919. maxframe/tensor/merge/hstack.py +70 -0
  920. maxframe/tensor/merge/stack.py +130 -0
  921. maxframe/tensor/merge/tests/__init__.py +13 -0
  922. maxframe/tensor/merge/tests/test_merge.py +79 -0
  923. maxframe/tensor/merge/vstack.py +74 -0
  924. maxframe/tensor/misc/__init__.py +72 -0
  925. maxframe/tensor/misc/argwhere.py +72 -0
  926. maxframe/tensor/misc/array_split.py +46 -0
  927. maxframe/tensor/misc/astype.py +121 -0
  928. maxframe/tensor/misc/atleast_1d.py +72 -0
  929. maxframe/tensor/misc/atleast_2d.py +70 -0
  930. maxframe/tensor/misc/atleast_3d.py +85 -0
  931. maxframe/tensor/misc/broadcast_arrays.py +57 -0
  932. maxframe/tensor/misc/broadcast_to.py +89 -0
  933. maxframe/tensor/misc/copy.py +64 -0
  934. maxframe/tensor/misc/copyto.py +130 -0
  935. maxframe/tensor/misc/delete.py +104 -0
  936. maxframe/tensor/misc/diff.py +115 -0
  937. maxframe/tensor/misc/dsplit.py +68 -0
  938. maxframe/tensor/misc/ediff1d.py +74 -0
  939. maxframe/tensor/misc/expand_dims.py +85 -0
  940. maxframe/tensor/misc/flatten.py +63 -0
  941. maxframe/tensor/misc/flip.py +90 -0
  942. maxframe/tensor/misc/fliplr.py +64 -0
  943. maxframe/tensor/misc/flipud.py +68 -0
  944. maxframe/tensor/misc/hsplit.py +85 -0
  945. maxframe/tensor/misc/in1d.py +94 -0
  946. maxframe/tensor/misc/insert.py +139 -0
  947. maxframe/tensor/misc/isin.py +130 -0
  948. maxframe/tensor/misc/moveaxis.py +83 -0
  949. maxframe/tensor/misc/ndim.py +53 -0
  950. maxframe/tensor/misc/ravel.py +90 -0
  951. maxframe/tensor/misc/repeat.py +129 -0
  952. maxframe/tensor/misc/result_type.py +88 -0
  953. maxframe/tensor/misc/roll.py +124 -0
  954. maxframe/tensor/misc/rollaxis.py +77 -0
  955. maxframe/tensor/misc/searchsorted.py +147 -0
  956. maxframe/tensor/misc/setdiff1d.py +58 -0
  957. maxframe/tensor/misc/shape.py +89 -0
  958. maxframe/tensor/misc/split.py +190 -0
  959. maxframe/tensor/misc/squeeze.py +117 -0
  960. maxframe/tensor/misc/swapaxes.py +113 -0
  961. maxframe/tensor/misc/tests/__init__.py +13 -0
  962. maxframe/tensor/misc/tests/test_misc.py +112 -0
  963. maxframe/tensor/misc/tile.py +109 -0
  964. maxframe/tensor/misc/transpose.py +133 -0
  965. maxframe/tensor/misc/trapezoid.py +123 -0
  966. maxframe/tensor/misc/unique.py +205 -0
  967. maxframe/tensor/misc/vsplit.py +74 -0
  968. maxframe/tensor/misc/where.py +129 -0
  969. maxframe/tensor/operators.py +83 -0
  970. maxframe/tensor/random/__init__.py +166 -0
  971. maxframe/tensor/random/beta.py +87 -0
  972. maxframe/tensor/random/binomial.py +135 -0
  973. maxframe/tensor/random/bytes.py +37 -0
  974. maxframe/tensor/random/chisquare.py +108 -0
  975. maxframe/tensor/random/choice.py +187 -0
  976. maxframe/tensor/random/core.py +249 -0
  977. maxframe/tensor/random/dirichlet.py +121 -0
  978. maxframe/tensor/random/exponential.py +92 -0
  979. maxframe/tensor/random/f.py +133 -0
  980. maxframe/tensor/random/gamma.py +126 -0
  981. maxframe/tensor/random/geometric.py +91 -0
  982. maxframe/tensor/random/gumbel.py +165 -0
  983. maxframe/tensor/random/hypergeometric.py +146 -0
  984. maxframe/tensor/random/laplace.py +131 -0
  985. maxframe/tensor/random/logistic.py +127 -0
  986. maxframe/tensor/random/lognormal.py +157 -0
  987. maxframe/tensor/random/logseries.py +120 -0
  988. maxframe/tensor/random/multinomial.py +131 -0
  989. maxframe/tensor/random/multivariate_normal.py +190 -0
  990. maxframe/tensor/random/negative_binomial.py +123 -0
  991. maxframe/tensor/random/noncentral_chisquare.py +130 -0
  992. maxframe/tensor/random/noncentral_f.py +124 -0
  993. maxframe/tensor/random/normal.py +141 -0
  994. maxframe/tensor/random/pareto.py +138 -0
  995. maxframe/tensor/random/permutation.py +107 -0
  996. maxframe/tensor/random/poisson.py +109 -0
  997. maxframe/tensor/random/power.py +140 -0
  998. maxframe/tensor/random/rand.py +80 -0
  999. maxframe/tensor/random/randint.py +119 -0
  1000. maxframe/tensor/random/randn.py +94 -0
  1001. maxframe/tensor/random/random_integers.py +121 -0
  1002. maxframe/tensor/random/random_sample.py +84 -0
  1003. maxframe/tensor/random/rayleigh.py +108 -0
  1004. maxframe/tensor/random/shuffle.py +61 -0
  1005. maxframe/tensor/random/standard_cauchy.py +103 -0
  1006. maxframe/tensor/random/standard_exponential.py +70 -0
  1007. maxframe/tensor/random/standard_gamma.py +118 -0
  1008. maxframe/tensor/random/standard_normal.py +72 -0
  1009. maxframe/tensor/random/standard_t.py +133 -0
  1010. maxframe/tensor/random/tests/__init__.py +13 -0
  1011. maxframe/tensor/random/tests/test_random.py +165 -0
  1012. maxframe/tensor/random/triangular.py +117 -0
  1013. maxframe/tensor/random/uniform.py +129 -0
  1014. maxframe/tensor/random/vonmises.py +129 -0
  1015. maxframe/tensor/random/wald.py +112 -0
  1016. maxframe/tensor/random/weibull.py +138 -0
  1017. maxframe/tensor/random/zipf.py +120 -0
  1018. maxframe/tensor/rechunk/__init__.py +26 -0
  1019. maxframe/tensor/rechunk/rechunk.py +43 -0
  1020. maxframe/tensor/reduction/__init__.py +64 -0
  1021. maxframe/tensor/reduction/all.py +101 -0
  1022. maxframe/tensor/reduction/allclose.py +86 -0
  1023. maxframe/tensor/reduction/any.py +103 -0
  1024. maxframe/tensor/reduction/argmax.py +101 -0
  1025. maxframe/tensor/reduction/argmin.py +101 -0
  1026. maxframe/tensor/reduction/array_equal.py +63 -0
  1027. maxframe/tensor/reduction/core.py +166 -0
  1028. maxframe/tensor/reduction/count_nonzero.py +80 -0
  1029. maxframe/tensor/reduction/cumprod.py +95 -0
  1030. maxframe/tensor/reduction/cumsum.py +99 -0
  1031. maxframe/tensor/reduction/max.py +118 -0
  1032. maxframe/tensor/reduction/mean.py +122 -0
  1033. maxframe/tensor/reduction/min.py +118 -0
  1034. maxframe/tensor/reduction/nanargmax.py +80 -0
  1035. maxframe/tensor/reduction/nanargmin.py +74 -0
  1036. maxframe/tensor/reduction/nancumprod.py +89 -0
  1037. maxframe/tensor/reduction/nancumsum.py +92 -0
  1038. maxframe/tensor/reduction/nanmax.py +109 -0
  1039. maxframe/tensor/reduction/nanmean.py +105 -0
  1040. maxframe/tensor/reduction/nanmin.py +109 -0
  1041. maxframe/tensor/reduction/nanprod.py +92 -0
  1042. maxframe/tensor/reduction/nanstd.py +124 -0
  1043. maxframe/tensor/reduction/nansum.py +113 -0
  1044. maxframe/tensor/reduction/nanvar.py +149 -0
  1045. maxframe/tensor/reduction/prod.py +128 -0
  1046. maxframe/tensor/reduction/std.py +132 -0
  1047. maxframe/tensor/reduction/sum.py +123 -0
  1048. maxframe/tensor/reduction/tests/__init__.py +13 -0
  1049. maxframe/tensor/reduction/tests/test_reduction.py +189 -0
  1050. maxframe/tensor/reduction/var.py +176 -0
  1051. maxframe/tensor/reshape/__init__.py +15 -0
  1052. maxframe/tensor/reshape/reshape.py +192 -0
  1053. maxframe/tensor/reshape/tests/__init__.py +13 -0
  1054. maxframe/tensor/reshape/tests/test_reshape.py +35 -0
  1055. maxframe/tensor/sort/__init__.py +18 -0
  1056. maxframe/tensor/sort/argpartition.py +98 -0
  1057. maxframe/tensor/sort/argsort.py +150 -0
  1058. maxframe/tensor/sort/partition.py +228 -0
  1059. maxframe/tensor/sort/sort.py +295 -0
  1060. maxframe/tensor/spatial/__init__.py +15 -0
  1061. maxframe/tensor/spatial/distance/__init__.py +17 -0
  1062. maxframe/tensor/spatial/distance/cdist.py +421 -0
  1063. maxframe/tensor/spatial/distance/pdist.py +398 -0
  1064. maxframe/tensor/spatial/distance/squareform.py +153 -0
  1065. maxframe/tensor/special/__init__.py +175 -0
  1066. maxframe/tensor/special/airy.py +55 -0
  1067. maxframe/tensor/special/bessel.py +199 -0
  1068. maxframe/tensor/special/core.py +99 -0
  1069. maxframe/tensor/special/ellip_func_integrals.py +155 -0
  1070. maxframe/tensor/special/ellip_harm.py +55 -0
  1071. maxframe/tensor/special/err_fresnel.py +223 -0
  1072. maxframe/tensor/special/gamma_funcs.py +303 -0
  1073. maxframe/tensor/special/hypergeometric_funcs.py +69 -0
  1074. maxframe/tensor/special/info_theory.py +189 -0
  1075. maxframe/tensor/special/misc.py +163 -0
  1076. maxframe/tensor/special/statistical.py +56 -0
  1077. maxframe/tensor/statistics/__init__.py +24 -0
  1078. maxframe/tensor/statistics/average.py +143 -0
  1079. maxframe/tensor/statistics/bincount.py +133 -0
  1080. maxframe/tensor/statistics/corrcoef.py +77 -0
  1081. maxframe/tensor/statistics/cov.py +222 -0
  1082. maxframe/tensor/statistics/digitize.py +126 -0
  1083. maxframe/tensor/statistics/histogram.py +520 -0
  1084. maxframe/tensor/statistics/median.py +85 -0
  1085. maxframe/tensor/statistics/percentile.py +175 -0
  1086. maxframe/tensor/statistics/ptp.py +89 -0
  1087. maxframe/tensor/statistics/quantile.py +290 -0
  1088. maxframe/tensor/ufunc/__init__.py +24 -0
  1089. maxframe/tensor/ufunc/ufunc.py +198 -0
  1090. maxframe/tensor/utils.py +716 -0
  1091. maxframe/tests/__init__.py +13 -0
  1092. maxframe/tests/test_protocol.py +178 -0
  1093. maxframe/tests/test_udf.py +61 -0
  1094. maxframe/tests/test_utils.py +618 -0
  1095. maxframe/tests/utils.py +245 -0
  1096. maxframe/typing_.py +42 -0
  1097. maxframe/udf.py +356 -0
  1098. maxframe/utils.py +1774 -0
  1099. maxframe-2.3.0.dist-info/METADATA +109 -0
  1100. maxframe-2.3.0.dist-info/RECORD +1117 -0
  1101. maxframe-2.3.0.dist-info/WHEEL +6 -0
  1102. maxframe-2.3.0.dist-info/top_level.txt +3 -0
  1103. maxframe_client/__init__.py +16 -0
  1104. maxframe_client/clients/__init__.py +13 -0
  1105. maxframe_client/clients/framedriver.py +137 -0
  1106. maxframe_client/conftest.py +15 -0
  1107. maxframe_client/fetcher.py +411 -0
  1108. maxframe_client/session/__init__.py +22 -0
  1109. maxframe_client/session/consts.py +39 -0
  1110. maxframe_client/session/graph.py +125 -0
  1111. maxframe_client/session/odps.py +802 -0
  1112. maxframe_client/session/task.py +329 -0
  1113. maxframe_client/session/tests/__init__.py +13 -0
  1114. maxframe_client/session/tests/test_task.py +115 -0
  1115. maxframe_client/tests/__init__.py +13 -0
  1116. maxframe_client/tests/test_fetcher.py +180 -0
  1117. maxframe_client/tests/test_session.py +409 -0
@@ -0,0 +1,1651 @@
1
+ # Copyright 1999-2025 Alibaba Group Holding Ltd.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ import dataclasses
16
+ import functools
17
+ import inspect
18
+ import itertools
19
+ import logging
20
+ import operator
21
+ import sys
22
+ from contextlib import contextmanager
23
+ from numbers import Integral
24
+ from typing import TYPE_CHECKING, Any, Callable, List, Optional
25
+
26
+ import numpy as np
27
+ import pandas as pd
28
+ from pandas.core.dtypes.inference import is_dict_like, is_list_like
29
+
30
+ from ..config.validators import dtype_backend_validator
31
+ from ..core import ENTITY_TYPE, Entity, ExecutableTuple, OutputType, get_output_types
32
+ from ..lib.dtypes_extension import ExternalBlobDtype, SolidBlob
33
+ from ..lib.mmh3 import hash as mmh_hash
34
+ from ..udf import MarkedFunction
35
+ from ..utils import (
36
+ ModulePlaceholder,
37
+ is_full_slice,
38
+ lazy_import,
39
+ make_dtypes,
40
+ quiet_stdio,
41
+ sbytes,
42
+ tokenize,
43
+ validate_and_adjust_resource_ratio,
44
+ )
45
+
46
+ if TYPE_CHECKING:
47
+ from .core import IndexValue
48
+
49
+ try:
50
+ import pyarrow as pa
51
+ except ImportError: # pragma: no cover
52
+ pa = ModulePlaceholder("pyarrow")
53
+
54
+ if TYPE_CHECKING:
55
+ from .operators import DataFrameOperator
56
+
57
+ cudf = lazy_import("cudf", rename="cudf")
58
+ logger = logging.getLogger(__name__)
59
+
60
+ try:
61
+ from ..lib.dtypes_extension import ArrowDtype
62
+ except ImportError:
63
+ ArrowDtype = None
64
+
65
+
66
+ def hash_index(index, size):
67
+ def func(x, size):
68
+ return mmh_hash(sbytes(x)) % size
69
+
70
+ f = functools.partial(func, size=size)
71
+ idx_to_grouped = index.groupby(index.map(f))
72
+ return [idx_to_grouped.get(i, list()) for i in range(size)]
73
+
74
+
75
+ def hash_dataframe_on(df, on, size, level=None):
76
+ if on is None:
77
+ idx = df.index
78
+ if level is not None:
79
+ idx = idx.to_frame(False)[level]
80
+ if cudf and isinstance(idx, cudf.Index): # pragma: no cover
81
+ idx = idx.to_pandas()
82
+ hashed_label = pd.util.hash_pandas_object(idx, categorize=False)
83
+ elif callable(on):
84
+ # todo optimization can be added, if ``on`` is a numpy ufunc or sth can be vectorized
85
+ hashed_label = pd.util.hash_pandas_object(df.index.map(on), categorize=False)
86
+ else:
87
+ if isinstance(on, list):
88
+ to_concat = []
89
+ for v in on:
90
+ if isinstance(v, pd.Series):
91
+ to_concat.append(v)
92
+ else:
93
+ to_concat.append(df[v])
94
+ data = pd.concat(to_concat, axis=1)
95
+ else:
96
+ data = df[on]
97
+ hashed_label = pd.util.hash_pandas_object(data, index=False, categorize=False)
98
+ idx_to_grouped = pd.RangeIndex(0, len(hashed_label)).groupby(hashed_label % size)
99
+ return [idx_to_grouped.get(i, pd.Index([])) for i in range(size)]
100
+
101
+
102
+ def hash_dtypes(dtypes, size):
103
+ hashed_indexes = hash_index(dtypes.index, size)
104
+ return [dtypes[index] for index in hashed_indexes]
105
+
106
+
107
+ def sort_dataframe_inplace(df, *axis, **kw):
108
+ for ax in axis:
109
+ df.sort_index(axis=ax, inplace=True, **kw)
110
+ return df
111
+
112
+
113
+ @functools.lru_cache(1)
114
+ def _get_range_index_type():
115
+ if cudf is not None:
116
+ return pd.RangeIndex, cudf.RangeIndex
117
+ else:
118
+ return pd.RangeIndex
119
+
120
+
121
+ @functools.lru_cache(1)
122
+ def _get_multi_index_type():
123
+ if cudf is not None:
124
+ return pd.MultiIndex, cudf.MultiIndex
125
+ else:
126
+ return pd.MultiIndex
127
+
128
+
129
+ def _get_range_index_start(pd_range_index):
130
+ try:
131
+ return pd_range_index.start
132
+ except AttributeError: # pragma: no cover
133
+ return pd_range_index._start
134
+
135
+
136
+ def _get_range_index_stop(pd_range_index):
137
+ try:
138
+ return pd_range_index.stop
139
+ except AttributeError: # pragma: no cover
140
+ return pd_range_index._stop
141
+
142
+
143
+ def _get_range_index_step(pd_range_index):
144
+ try:
145
+ return pd_range_index.step
146
+ except AttributeError: # pragma: no cover
147
+ pass
148
+ try: # pragma: no cover
149
+ return pd_range_index._step
150
+ except AttributeError: # pragma: no cover
151
+ return 1 # cudf does not support step arg
152
+
153
+
154
+ def is_pd_range_empty(pd_range_index):
155
+ start, stop, step = (
156
+ _get_range_index_start(pd_range_index),
157
+ _get_range_index_stop(pd_range_index),
158
+ _get_range_index_step(pd_range_index),
159
+ )
160
+ return (start >= stop and step >= 0) or (start <= stop and step < 0)
161
+
162
+
163
+ def parse_index(index_value, *args, store_data=False, key=None):
164
+ from .core import IndexValue
165
+
166
+ def _extract_property(index, tp, ret_data):
167
+ kw = {
168
+ "_min_val": _get_index_min(index),
169
+ "_max_val": _get_index_max(index),
170
+ "_min_val_close": True,
171
+ "_max_val_close": True,
172
+ "_key": key or _tokenize_index(index, *args),
173
+ }
174
+ if ret_data:
175
+ kw["_data"] = index.values
176
+ for field in tp._FIELDS:
177
+ if field in kw or field == "_data":
178
+ continue
179
+ val = getattr(index, field.lstrip("_"), None)
180
+ if val is not None:
181
+ kw[field] = val
182
+ return kw
183
+
184
+ def _tokenize_index(index, *token_objects):
185
+ if not index.empty:
186
+ return tokenize(index)
187
+ else:
188
+ return tokenize(index, *token_objects)
189
+
190
+ def _get_index_min(index):
191
+ try:
192
+ return index.min()
193
+ except (ValueError, AttributeError):
194
+ if isinstance(index, pd.IntervalIndex):
195
+ return None
196
+ raise
197
+ except TypeError:
198
+ return None
199
+
200
+ def _get_index_max(index):
201
+ try:
202
+ return index.max()
203
+ except (ValueError, AttributeError):
204
+ if isinstance(index, pd.IntervalIndex):
205
+ return None
206
+ raise
207
+ except TypeError:
208
+ return None
209
+
210
+ def _serialize_index(index):
211
+ tp = getattr(IndexValue, type(index).__name__)
212
+ properties = _extract_property(index, tp, store_data)
213
+ properties["_name"] = index.name
214
+ return tp(**properties)
215
+
216
+ def _serialize_range_index(index):
217
+ if is_pd_range_empty(index):
218
+ properties = {
219
+ "_is_monotonic_increasing": True,
220
+ "_is_monotonic_decreasing": False,
221
+ "_is_unique": True,
222
+ "_min_val": _get_index_min(index),
223
+ "_max_val": _get_index_max(index),
224
+ "_min_val_close": True,
225
+ "_max_val_close": False,
226
+ "_key": key or _tokenize_index(index, *args),
227
+ "_name": index.name,
228
+ "_dtype": index.dtype,
229
+ }
230
+ else:
231
+ properties = _extract_property(index, IndexValue.RangeIndex, False)
232
+ return IndexValue.RangeIndex(
233
+ _slice=slice(
234
+ _get_range_index_start(index),
235
+ _get_range_index_stop(index),
236
+ _get_range_index_step(index),
237
+ ),
238
+ **properties,
239
+ )
240
+
241
+ def _serialize_multi_index(index):
242
+ kw = _extract_property(index, IndexValue.MultiIndex, store_data)
243
+ kw["_sortorder"] = index.sortorder
244
+ kw["_dtypes"] = [lev.dtype for lev in index.levels]
245
+ return IndexValue.MultiIndex(**kw)
246
+
247
+ if index_value is None:
248
+ return IndexValue(
249
+ _index_value=IndexValue.Index(
250
+ _is_monotonic_increasing=False,
251
+ _is_monotonic_decreasing=False,
252
+ _is_unique=False,
253
+ _min_val=None,
254
+ _max_val=None,
255
+ _min_val_close=True,
256
+ _max_val_close=True,
257
+ _key=key or tokenize(*args),
258
+ )
259
+ )
260
+ if hasattr(index_value, "to_pandas"): # pragma: no cover
261
+ # convert cudf.Index to pandas
262
+ index_value = index_value.to_pandas()
263
+
264
+ if isinstance(index_value, _get_range_index_type()):
265
+ return IndexValue(_index_value=_serialize_range_index(index_value))
266
+ elif isinstance(index_value, _get_multi_index_type()):
267
+ return IndexValue(_index_value=_serialize_multi_index(index_value))
268
+ else:
269
+ return IndexValue(_index_value=_serialize_index(index_value))
270
+
271
+
272
+ def gen_unknown_index_value(index_value, *args, normalize_range_index=False):
273
+ """
274
+ Generate new index value with the same likes of given index_value and args, but without any value.
275
+
276
+ Parameters
277
+ ----------
278
+ index_value
279
+ Given index value.
280
+ args
281
+ Arguments for parse_index.
282
+ normalize_range_index
283
+ If normalize range index to normal index.
284
+
285
+ Returns
286
+ -------
287
+ New created range index value.
288
+ """
289
+ pd_index = index_value.to_pandas()
290
+ if not normalize_range_index and isinstance(pd_index, pd.RangeIndex):
291
+ return parse_index(pd.RangeIndex(-1, name=pd_index.name), *args)
292
+ elif not isinstance(pd_index, pd.MultiIndex):
293
+ return parse_index(
294
+ pd.Index([], dtype=pd_index.dtype, name=pd_index.name), *args
295
+ )
296
+ else:
297
+ i = pd.MultiIndex.from_arrays(
298
+ [c[:0] for c in pd_index.levels], names=pd_index.names
299
+ )
300
+ return parse_index(i, *args)
301
+
302
+
303
+ def split_monotonic_index_min_max(
304
+ left_min_max, left_increase, right_min_max, right_increase
305
+ ):
306
+ """
307
+ Split the original two min_max into new min_max. Each min_max should be a list
308
+ in which each item should be a 4-tuple indicates that this chunk's min value,
309
+ whether the min value is close, the max value, and whether the max value is close.
310
+ The return value would be a nested list, each item is a list
311
+ indicates that how this chunk should be split into.
312
+
313
+ :param left_min_max: the left min_max
314
+ :param left_increase: if the original data of left is increased
315
+ :param right_min_max: the right min_max
316
+ :param right_increase: if the original data of right is increased
317
+ :return: nested list in which each item indicates how min_max is split
318
+
319
+ >>> left_min_max = [(0, True, 3, True), (4, True, 8, True), (12, True, 18, True),
320
+ ... (20, True, 22, True)]
321
+ >>> right_min_max = [(2, True, 6, True), (7, True, 9, True), (10, True, 14, True),
322
+ ... (18, True, 19, True)]
323
+ >>> l, r = split_monotonic_index_min_max(left_min_max, True, right_min_max, True)
324
+ >>> l
325
+ [[(0, True, 2, False), (2, True, 3, True)], [(3, False, 4, False), (4, True, 6, True), (6, False, 7, False),
326
+ (7, True, 8, True)], [(8, False, 9, True), (10, True, 12, False), (12, True, 14, True), (14, False, 18, False),
327
+ (18, True, 18, True)], [(18, False, 19, True), [20, True, 22, True]]]
328
+ >>> r
329
+ [[(0, True, 2, False), (2, True, 3, True), (3, False, 4, False), (4, True, 6, True)],
330
+ [(6, False, 7, False), (7, True, 8, True), (8, False, 9, True)], [(10, True, 12, False), (12, True, 14, True)],
331
+ [(14, False, 18, False), (18, True, 18, True), (18, False, 19, True), [20, True, 22, True]]]
332
+ """
333
+ left_idx_to_min_max = [[] for _ in left_min_max]
334
+ right_idx_to_min_max = [[] for _ in right_min_max]
335
+ left_curr_min_max = list(left_min_max[0])
336
+ right_curr_min_max = list(right_min_max[0])
337
+ left_curr_idx = right_curr_idx = 0
338
+ left_terminate = right_terminate = False
339
+
340
+ while not left_terminate or not right_terminate:
341
+ if left_terminate:
342
+ left_idx_to_min_max[left_curr_idx].append(tuple(right_curr_min_max))
343
+ right_idx_to_min_max[right_curr_idx].append(tuple(right_curr_min_max))
344
+ if right_curr_idx + 1 >= len(right_min_max):
345
+ right_terminate = True
346
+ else:
347
+ right_curr_idx += 1
348
+ right_curr_min_max = list(right_min_max[right_curr_idx])
349
+ elif right_terminate:
350
+ right_idx_to_min_max[right_curr_idx].append(tuple(left_curr_min_max))
351
+ left_idx_to_min_max[left_curr_idx].append(tuple(left_curr_min_max))
352
+ if left_curr_idx + 1 >= len(left_min_max):
353
+ left_terminate = True
354
+ else:
355
+ left_curr_idx += 1
356
+ left_curr_min_max = list(left_min_max[left_curr_idx])
357
+ elif left_curr_min_max[0] < right_curr_min_max[0]:
358
+ # left min < right min
359
+ right_min = [right_curr_min_max[0], not right_curr_min_max[1]]
360
+ max_val = min(left_curr_min_max[2:], right_min)
361
+ assert len(max_val) == 2
362
+ min_max = (
363
+ left_curr_min_max[0],
364
+ left_curr_min_max[1],
365
+ max_val[0],
366
+ max_val[1],
367
+ )
368
+ left_idx_to_min_max[left_curr_idx].append(min_max)
369
+ right_idx_to_min_max[right_curr_idx].append(min_max)
370
+ if left_curr_min_max[2:] == max_val:
371
+ # left max < right min
372
+ if left_curr_idx + 1 >= len(left_min_max):
373
+ left_terminate = True
374
+ else:
375
+ left_curr_idx += 1
376
+ left_curr_min_max = list(left_min_max[left_curr_idx])
377
+ else:
378
+ # from left min(left min close) to right min(exclude right min close)
379
+ left_curr_min_max[:2] = right_curr_min_max[:2]
380
+ elif left_curr_min_max[0] > right_curr_min_max[0]:
381
+ # left min > right min
382
+ left_min = [left_curr_min_max[0], not left_curr_min_max[1]]
383
+ max_val = min(right_curr_min_max[2:], left_min)
384
+ min_max = (
385
+ right_curr_min_max[0],
386
+ right_curr_min_max[1],
387
+ max_val[0],
388
+ max_val[1],
389
+ )
390
+ left_idx_to_min_max[left_curr_idx].append(min_max)
391
+ right_idx_to_min_max[right_curr_idx].append(min_max)
392
+ if right_curr_min_max[2:] == max_val:
393
+ # right max < left min
394
+ if right_curr_idx + 1 >= len(right_min_max):
395
+ right_terminate = True
396
+ else:
397
+ right_curr_idx += 1
398
+ right_curr_min_max = list(right_min_max[right_curr_idx])
399
+ else:
400
+ # from left min(left min close) to right min(exclude right min close)
401
+ right_curr_min_max[:2] = left_curr_min_max[:2]
402
+ else:
403
+ # left min == right min
404
+ max_val = min(left_curr_min_max[2:], right_curr_min_max[2:])
405
+ assert len(max_val) == 2
406
+ min_max = (
407
+ left_curr_min_max[0],
408
+ left_curr_min_max[1],
409
+ max_val[0],
410
+ max_val[1],
411
+ )
412
+ left_idx_to_min_max[left_curr_idx].append(min_max)
413
+ right_idx_to_min_max[right_curr_idx].append(min_max)
414
+ if max_val == left_curr_min_max[2:]:
415
+ if left_curr_idx + 1 >= len(left_min_max):
416
+ left_terminate = True
417
+ else:
418
+ left_curr_idx += 1
419
+ left_curr_min_max = list(left_min_max[left_curr_idx])
420
+ else:
421
+ left_curr_min_max[:2] = max_val[0], not max_val[1]
422
+ if max_val == right_curr_min_max[2:]:
423
+ if right_curr_idx + 1 >= len(right_min_max):
424
+ right_terminate = True
425
+ else:
426
+ right_curr_idx += 1
427
+ right_curr_min_max = list(right_min_max[right_curr_idx])
428
+ else:
429
+ right_curr_min_max[:2] = max_val[0], not max_val[1]
430
+
431
+ if left_increase is False:
432
+ left_idx_to_min_max = list(reversed(left_idx_to_min_max))
433
+ if right_increase is False:
434
+ right_idx_to_min_max = list(reversed(right_idx_to_min_max))
435
+
436
+ return left_idx_to_min_max, right_idx_to_min_max
437
+
438
+
439
+ def build_split_idx_to_origin_idx(splits, increase=True):
440
+ # splits' len is equal to the original chunk size on a specified axis,
441
+ # splits is sth like [[(0, True, 2, True), (2, False, 3, True)]]
442
+ # which means there is one input chunk, and will be split into 2 out chunks
443
+ # in this function, we want to build a new dict from the out chunk index to
444
+ # the original chunk index and the inner position, like {0: (0, 0), 1: (0, 1)}
445
+ if increase is False:
446
+ splits = list(reversed(splits))
447
+ out_idx = itertools.count(0)
448
+ res = dict()
449
+ for origin_idx, _ in enumerate(splits):
450
+ for pos in range(len(splits[origin_idx])):
451
+ if increase is False:
452
+ o_idx = len(splits) - origin_idx - 1
453
+ else:
454
+ o_idx = origin_idx
455
+ res[next(out_idx)] = o_idx, pos
456
+ return res
457
+
458
+
459
+ def _generate_value(dtype, fill_value):
460
+ if ArrowDtype and isinstance(dtype, ArrowDtype):
461
+ return _generate_value(dtype.pyarrow_dtype, fill_value)
462
+
463
+ if isinstance(dtype, pa.ListType):
464
+ return [_generate_value(dtype.value_type, fill_value)]
465
+
466
+ if isinstance(dtype, pa.MapType):
467
+ return [
468
+ (
469
+ _generate_value(dtype.key_type, fill_value),
470
+ _generate_value(dtype.item_type, fill_value),
471
+ )
472
+ ]
473
+
474
+ if isinstance(dtype, pa.StructType):
475
+ result = {}
476
+ for i in range(dtype.num_fields):
477
+ field = dtype[i]
478
+ result[field.name] = _generate_value(field.type, fill_value)
479
+ return result
480
+
481
+ if isinstance(dtype, pa.DataType):
482
+ return _generate_value(dtype.to_pandas_dtype(), fill_value)
483
+
484
+ if isinstance(dtype, ExternalBlobDtype):
485
+ return SolidBlob(str(fill_value).encode())
486
+
487
+ # special handle for datetime64 and timedelta64
488
+ dispatch = {
489
+ np.datetime64: pd.Timestamp,
490
+ np.timedelta64: pd.Timedelta,
491
+ pd.CategoricalDtype.type: lambda x: pd.CategoricalDtype([x]),
492
+ # for object, we do not know the actual dtype,
493
+ # just convert to str for common usage
494
+ np.object_: lambda x: str(fill_value),
495
+ }
496
+ # otherwise, just use dtype.type itself to convert
497
+ target_dtype = getattr(dtype, "type", dtype)
498
+ convert = dispatch.get(target_dtype, target_dtype)
499
+ return convert(fill_value)
500
+
501
+
502
+ def build_empty_df(dtypes, index=None):
503
+ columns = dtypes.index
504
+ length = len(index) if index is not None else 0
505
+ record = [[_generate_value(dtype, 1) for dtype in dtypes]] * max(1, length)
506
+
507
+ # duplicate column may exist,
508
+ # so use RangeIndex first
509
+ df = pd.DataFrame(record, columns=range(len(dtypes)), index=index)
510
+ for i, dtype in enumerate(dtypes):
511
+ s = df.iloc[:, i]
512
+ if not pd.api.types.is_dtype_equal(s.dtype, dtype):
513
+ df.iloc[:, i] = s.astype(dtype)
514
+
515
+ df.columns = columns
516
+ return df[:length] if len(df) > length else df
517
+
518
+
519
+ def build_df(df_obj, fill_value=1, size=1, ensure_string=False):
520
+ dfs = []
521
+ if not isinstance(size, (list, tuple)):
522
+ sizes = [size]
523
+ else:
524
+ sizes = size
525
+
526
+ if not isinstance(fill_value, (list, tuple)):
527
+ fill_values = [fill_value]
528
+ else:
529
+ fill_values = fill_value
530
+
531
+ from .core import INDEX_TYPE, SERIES_TYPE
532
+
533
+ dtypes = (
534
+ pd.Series([df_obj.dtype], index=[df_obj.name])
535
+ if isinstance(df_obj, (INDEX_TYPE, SERIES_TYPE))
536
+ else df_obj.dtypes
537
+ )
538
+ for size, fill_value in zip(sizes, fill_values):
539
+ record = [[_generate_value(dtype, fill_value) for dtype in dtypes]] * size
540
+ df = pd.DataFrame(record)
541
+ df.columns = dtypes.index
542
+
543
+ if len(record) != 0: # columns is empty in some cases
544
+ target_index = df_obj.index_value.to_pandas()
545
+ if isinstance(target_index, pd.MultiIndex):
546
+ index_val = tuple(
547
+ _generate_value(level.dtype, fill_value)
548
+ for level in target_index.levels
549
+ )
550
+ df.index = pd.MultiIndex.from_tuples(
551
+ [index_val] * size, names=target_index.names
552
+ )
553
+ else:
554
+ index_val = _generate_value(target_index.dtype, fill_value)
555
+ df.index = pd.Index([index_val] * size, name=target_index.name)
556
+
557
+ # make sure dtypes correct
558
+ for i, dtype in enumerate(dtypes):
559
+ s = df.iloc[:, i]
560
+ if not pd.api.types.is_dtype_equal(s.dtype, dtype):
561
+ df[df.columns[i]] = s.astype(dtype)
562
+ dfs.append(df)
563
+ if len(dfs) == 1:
564
+ ret_df = dfs[0]
565
+ else:
566
+ ret_df = pd.concat(dfs)
567
+
568
+ if ensure_string:
569
+ obj_dtypes = dtypes[dtypes == np.dtype("O")]
570
+ ret_df[obj_dtypes.index] = ret_df[obj_dtypes.index].radd("O")
571
+ return ret_df
572
+
573
+
574
+ def build_empty_series(dtype, index=None, name=None):
575
+ length = len(index) if index is not None else 0
576
+ return pd.Series(
577
+ [_generate_value(dtype, 1) for _ in range(length)],
578
+ dtype=dtype,
579
+ index=index,
580
+ name=name,
581
+ )
582
+
583
+
584
+ def build_series(
585
+ series_obj=None,
586
+ fill_value=1,
587
+ size=1,
588
+ name=None,
589
+ ensure_string=False,
590
+ dtype=None,
591
+ index=None,
592
+ ):
593
+ seriess = []
594
+ if not isinstance(size, (list, tuple)):
595
+ sizes = [size]
596
+ else:
597
+ sizes = size
598
+
599
+ if not isinstance(fill_value, (list, tuple)):
600
+ fill_values = [fill_value]
601
+ else:
602
+ fill_values = fill_value
603
+
604
+ if series_obj is not None:
605
+ dtype = series_obj.dtype
606
+ try:
607
+ series_index = series_obj.index_value.to_pandas()[:0]
608
+ except AttributeError:
609
+ series_index = series_obj.index[:0]
610
+ else:
611
+ series_index = index[:0] if index is not None else None
612
+
613
+ name = name or getattr(series_obj, "name", None)
614
+ for size, fill_value in zip(sizes, fill_values):
615
+ empty_series = build_empty_series(dtype, name=name, index=series_index)
616
+ record = _generate_value(dtype, fill_value)
617
+ if isinstance(empty_series.index, pd.MultiIndex):
618
+ index = tuple(
619
+ _generate_value(level.dtype, fill_value)
620
+ for level in empty_series.index.levels
621
+ )
622
+ empty_series = empty_series.reindex(
623
+ index=pd.MultiIndex.from_tuples([index], names=empty_series.index.names)
624
+ )
625
+ empty_series.iloc[0] = record
626
+ else:
627
+ if isinstance(empty_series.index.dtype, pd.CategoricalDtype):
628
+ index = None
629
+ else:
630
+ index = _generate_value(empty_series.index.dtype, fill_value)
631
+ empty_series.loc[index] = record
632
+
633
+ empty_series = pd.concat([empty_series] * size)
634
+ # make sure dtype correct for MultiIndex
635
+ empty_series = empty_series.astype(dtype, copy=False)
636
+ seriess.append(empty_series)
637
+
638
+ if len(seriess) == 1:
639
+ ret_series = seriess[0]
640
+ else:
641
+ ret_series = pd.concat(seriess)
642
+
643
+ if ensure_string and dtype == np.dtype("O"):
644
+ ret_series = ret_series.radd("O")
645
+ return ret_series
646
+
647
+
648
+ def infer_index_value(left_index_value, right_index_value, level=None):
649
+ from .core import IndexValue
650
+
651
+ if isinstance(left_index_value.value, IndexValue.RangeIndex) and isinstance(
652
+ right_index_value.value, IndexValue.RangeIndex
653
+ ):
654
+ if left_index_value.value.slice == right_index_value.value.slice:
655
+ return left_index_value
656
+ return parse_index(
657
+ pd.Index([], dtype=np.int64), left_index_value, right_index_value
658
+ )
659
+
660
+ # when left index and right index is identical, and both of them are elements unique,
661
+ # we can infer that the out index should be identical also
662
+ if (
663
+ left_index_value.is_unique
664
+ and right_index_value.is_unique
665
+ and left_index_value.key == right_index_value.key
666
+ ):
667
+ return left_index_value
668
+
669
+ left_index = left_index_value.to_pandas()
670
+ right_index = right_index_value.to_pandas()
671
+ out_index = left_index.join(right_index, level=level)[:0]
672
+ return parse_index(out_index, left_index_value, right_index_value)
673
+
674
+
675
+ def indexing_index_value(index_value, indexes, store_data=False, rechunk=False):
676
+ pd_index = index_value.to_pandas()
677
+ # when rechunk is True, the output index shall be treated
678
+ # different from the input one
679
+ if not rechunk and isinstance(indexes, slice) and is_full_slice(indexes):
680
+ return index_value
681
+ elif not index_value.has_value():
682
+ new_index_value = parse_index(pd_index, indexes, store_data=store_data)
683
+ new_index_value._index_value._min_val = index_value.min_val
684
+ new_index_value._index_value._min_val_close = index_value.min_val_close
685
+ new_index_value._index_value._max_val = index_value.max_val
686
+ new_index_value._index_value._max_val_close = index_value.max_val_close
687
+ return new_index_value
688
+ else:
689
+ if isinstance(indexes, Integral):
690
+ return parse_index(pd_index[[indexes]], store_data=store_data)
691
+ elif isinstance(indexes, Entity):
692
+ if isinstance(pd_index, pd.RangeIndex):
693
+ return parse_index(
694
+ pd.RangeIndex(-1), indexes, index_value, store_data=False
695
+ )
696
+ else:
697
+ return parse_index(
698
+ type(pd_index)([]), indexes, index_value, store_data=False
699
+ )
700
+ if isinstance(indexes, tuple):
701
+ return parse_index(pd_index[list(indexes)], store_data=store_data)
702
+ else:
703
+ return parse_index(pd_index[indexes], store_data=store_data)
704
+
705
+
706
+ def merge_index_value(to_merge_index_values: dict, store_data: bool = False):
707
+ """
708
+ Merge index value according to their chunk index.
709
+
710
+ Parameters
711
+ ----------
712
+ to_merge_index_values : dict
713
+ index to index_value
714
+ store_data : bool
715
+ store data in index_value
716
+
717
+ Returns
718
+ -------
719
+ merged_index_value
720
+ """
721
+
722
+ pd_index = None
723
+ min_val, min_val_close, max_val, max_val_close = None, None, None, None
724
+ for _, chunk_index_value in sorted(to_merge_index_values.items()):
725
+ if pd_index is None:
726
+ pd_index = chunk_index_value.to_pandas()
727
+ min_val, min_val_close, max_val, max_val_close = (
728
+ chunk_index_value.min_val,
729
+ chunk_index_value.min_val_close,
730
+ chunk_index_value.max_val,
731
+ chunk_index_value.max_val_close,
732
+ )
733
+ else:
734
+ cur_pd_index = chunk_index_value.to_pandas()
735
+ if store_data or (
736
+ isinstance(pd_index, pd.RangeIndex)
737
+ and isinstance(cur_pd_index, pd.RangeIndex)
738
+ and cur_pd_index.step == pd_index.step
739
+ and cur_pd_index.start == pd_index.stop
740
+ ):
741
+ # range index that is continuous
742
+ pd_index = pd_index.append(cur_pd_index)
743
+ else:
744
+ pd_index = pd.Index([], dtype=pd_index.dtype)
745
+ if chunk_index_value.min_val is not None:
746
+ try:
747
+ if min_val is None or min_val > chunk_index_value.min_val:
748
+ min_val = chunk_index_value.min_val
749
+ min_val_close = chunk_index_value.min_val_close
750
+ except TypeError:
751
+ # min_value has different types that cannot compare
752
+ # just stop compare
753
+ continue
754
+ if chunk_index_value.max_val is not None:
755
+ if max_val is None or max_val < chunk_index_value.max_val:
756
+ max_val = chunk_index_value.max_val
757
+ max_val_close = chunk_index_value.max_val_close
758
+
759
+ index_value = parse_index(pd_index, store_data=store_data)
760
+ if not index_value.has_value():
761
+ index_value._index_value._min_val = min_val
762
+ index_value._index_value._min_val_close = min_val_close
763
+ index_value._index_value._max_val = max_val
764
+ index_value._index_value._max_val_close = max_val_close
765
+ return index_value
766
+
767
+
768
+ def infer_dtypes(left_dtypes, right_dtypes, operator):
769
+ left = build_empty_df(left_dtypes)
770
+ right = build_empty_df(right_dtypes)
771
+ return operator(left, right).dtypes
772
+
773
+
774
+ @functools.lru_cache(100)
775
+ def infer_dtype(left_dtype, right_dtype, operator):
776
+ left = build_empty_series(left_dtype)
777
+ right = build_empty_series(right_dtype)
778
+ return operator(left, right).dtype
779
+
780
+
781
+ def filter_dtypes(dtypes, column_min_max):
782
+ left_filter = operator.ge if column_min_max[1] else operator.gt
783
+ left = left_filter(dtypes.index, column_min_max[0])
784
+ right_filter = operator.le if column_min_max[3] else operator.lt
785
+ right = right_filter(dtypes.index, column_min_max[2])
786
+ return dtypes[left & right]
787
+
788
+
789
+ def in_range_index(i, pd_range_index):
790
+ """
791
+ Check whether the input `i` is within `pd_range_index` which is a pd.RangeIndex.
792
+ """
793
+ start, stop, step = (
794
+ _get_range_index_start(pd_range_index),
795
+ _get_range_index_stop(pd_range_index),
796
+ _get_range_index_step(pd_range_index),
797
+ )
798
+ if step > 0 and start <= i < stop and (i - start) % step == 0:
799
+ return True
800
+ if step < 0 and start >= i > stop and (start - i) % step == 0:
801
+ return True
802
+ return False
803
+
804
+
805
+ def wrap_notimplemented_exception(func):
806
+ @functools.wraps(func)
807
+ def wrapper(*args, **kwargs):
808
+ try:
809
+ return func(*args, **kwargs)
810
+ except NotImplementedError:
811
+ return NotImplemented
812
+
813
+ return wrapper
814
+
815
+
816
+ def validate_axis(axis, tileable=None):
817
+ if axis == "index":
818
+ axis = 0
819
+ elif axis == "columns":
820
+ axis = 1
821
+
822
+ illegal = False
823
+ try:
824
+ axis = operator.index(axis)
825
+ if axis < 0 or (tileable is not None and axis >= tileable.ndim):
826
+ illegal = True
827
+ except TypeError:
828
+ illegal = True
829
+
830
+ if illegal:
831
+ raise ValueError(f"No axis named {axis} for object type {type(tileable)}")
832
+ return axis
833
+
834
+
835
+ def validate_axis_style_args(
836
+ data, args, kwargs, arg_name, method_name
837
+ ): # pragma: no cover
838
+ """Argument handler for mixed index, columns / axis functions
839
+
840
+ In an attempt to handle both `.method(index, columns)`, and
841
+ `.method(arg, axis=.)`, we have to do some bad things to argument
842
+ parsing. This translates all arguments to `{index=., columns=.}` style.
843
+
844
+ Parameters
845
+ ----------
846
+ data : DataFrame
847
+ args : tuple
848
+ All positional arguments from the user
849
+ kwargs : dict
850
+ All keyword arguments from the user
851
+ arg_name, method_name : str
852
+ Used for better error messages
853
+
854
+ Returns
855
+ -------
856
+ kwargs : dict
857
+ A dictionary of keyword arguments. Doesn't modify ``kwargs``
858
+ inplace, so update them with the return value here.
859
+ """
860
+ out = {}
861
+ # Goal: fill 'out' with index/columns-style arguments
862
+ # like out = {'index': foo, 'columns': bar}
863
+
864
+ # Start by validating for consistency
865
+ axes_names = ["index"] if data.ndim == 1 else ["index", "columns"]
866
+ if "axis" in kwargs and any(x in kwargs for x in axes_names):
867
+ msg = "Cannot specify both 'axis' and any of 'index' or 'columns'."
868
+ raise TypeError(msg)
869
+
870
+ # First fill with explicit values provided by the user...
871
+ if arg_name in kwargs:
872
+ if args:
873
+ msg = f"{method_name} got multiple values for argument '{arg_name}'"
874
+ raise TypeError(msg)
875
+
876
+ axis = axes_names[validate_axis(kwargs.get("axis", 0), data)]
877
+ out[axis] = kwargs[arg_name]
878
+
879
+ # More user-provided arguments, now from kwargs
880
+ for k, v in kwargs.items():
881
+ try:
882
+ ax = axes_names[validate_axis(k, data)]
883
+ except ValueError:
884
+ pass
885
+ else:
886
+ out[ax] = v
887
+
888
+ # All user-provided kwargs have been handled now.
889
+ # Now we supplement with positional arguments, emitting warnings
890
+ # when there's ambiguity and raising when there's conflicts
891
+
892
+ if len(args) == 0:
893
+ pass # It's up to the function to decide if this is valid
894
+ elif len(args) == 1:
895
+ axis = axes_names[validate_axis(kwargs.get("axis", 0), data)]
896
+ out[axis] = args[0]
897
+ elif len(args) == 2:
898
+ if "axis" in kwargs:
899
+ # Unambiguously wrong
900
+ msg = "Cannot specify both 'axis' and any of 'index' or 'columns'"
901
+ raise TypeError(msg)
902
+
903
+ msg = (
904
+ "Interpreting call\n\t'.{method_name}(a, b)' as "
905
+ "\n\t'.{method_name}(index=a, columns=b)'.\nUse named "
906
+ "arguments to remove any ambiguity."
907
+ )
908
+ raise TypeError(msg.format(method_name=method_name))
909
+ else:
910
+ msg = f"Cannot specify all of '{arg_name}', 'index', 'columns'."
911
+ raise TypeError(msg)
912
+ return out
913
+
914
+
915
+ def validate_output_types(**kwargs):
916
+ from ..core import OutputType
917
+
918
+ output_type = kwargs.pop("object_type", None) or kwargs.pop("output_type", None)
919
+ output_types = kwargs.pop("output_types", None) or (
920
+ [output_type] if output_type is not None else None
921
+ )
922
+ return (
923
+ [
924
+ getattr(OutputType, v.lower()) if isinstance(v, str) else v
925
+ for v in output_types
926
+ ]
927
+ if output_types
928
+ else None
929
+ )
930
+
931
+
932
+ def fetch_corner_data(df_or_series, session=None) -> pd.DataFrame:
933
+ """
934
+ Fetch corner DataFrame or Series for repr usage.
935
+
936
+ :param df_or_series: DataFrame or Series
937
+ :return: corner DataFrame
938
+ """
939
+ from .indexing.iloc import iloc
940
+
941
+ max_rows = pd.get_option("display.max_rows")
942
+ try:
943
+ min_rows = pd.get_option("display.min_rows")
944
+ min_rows = min(min_rows, max_rows)
945
+ except KeyError: # pragma: no cover
946
+ # display.min_rows is introduced in pandas 0.25
947
+ min_rows = max_rows
948
+
949
+ index_size = None
950
+ if (
951
+ df_or_series.shape[0] > max_rows
952
+ and df_or_series.shape[0] > min_rows // 2 * 2 + 2
953
+ ):
954
+ # for pandas, greater than max_rows
955
+ # will display min_rows
956
+ # thus we fetch min_rows + 2 lines
957
+ index_size = min_rows // 2 + 1
958
+
959
+ if index_size is None:
960
+ return df_or_series._fetch(session=session)
961
+ else:
962
+ head = iloc(df_or_series)[:index_size]
963
+ tail = iloc(df_or_series)[-index_size:]
964
+ head_data, tail_data = ExecutableTuple([head, tail]).fetch(session=session)
965
+ xdf = cudf if head.op.is_gpu() else pd
966
+ return xdf.concat([head_data, tail_data], axis="index")
967
+
968
+
969
+ class ReprSeries(pd.Series):
970
+ def __init__(self, corner_data, real_shape):
971
+ super().__init__(corner_data)
972
+ self._real_shape = real_shape
973
+
974
+ def __len__(self):
975
+ # As we only fetch corner data to repr,
976
+ # the length would be wrong and we have no way to control,
977
+ # thus we just overwrite the length to show the real one
978
+ return self._real_shape[0]
979
+
980
+
981
+ def filter_dtypes_by_index(dtypes, index):
982
+ try:
983
+ new_dtypes = dtypes.loc[index].dropna()
984
+ except KeyError:
985
+ dtypes_idx = (
986
+ dtypes.index.to_frame()
987
+ .merge(index.to_frame())
988
+ .set_index(list(range(dtypes.index.nlevels)))
989
+ .index
990
+ )
991
+ new_dtypes = dtypes.loc[dtypes_idx]
992
+ new_dtypes.index.names = dtypes.index.names
993
+ return new_dtypes
994
+
995
+
996
+ @contextmanager
997
+ def create_sa_connection(con, **kwargs):
998
+ import sqlalchemy as sa
999
+ from sqlalchemy.engine import Connection, Engine
1000
+
1001
+ # process con
1002
+ engine = None
1003
+ if isinstance(con, Connection):
1004
+ # connection create by user
1005
+ close = False
1006
+ dispose = False
1007
+ elif isinstance(con, Engine):
1008
+ con = con.connect()
1009
+ close = True
1010
+ dispose = False
1011
+ else:
1012
+ engine = sa.create_engine(con, **kwargs)
1013
+ con = engine.connect()
1014
+ close = True
1015
+ dispose = True
1016
+
1017
+ try:
1018
+ yield con
1019
+ finally:
1020
+ if close:
1021
+ con.close()
1022
+ if dispose:
1023
+ engine.dispose()
1024
+
1025
+
1026
+ def to_arrow_dtypes(dtypes):
1027
+ from ..io.odpsio.schema import pandas_dtypes_to_arrow_schema
1028
+
1029
+ arrow_schema = pandas_dtypes_to_arrow_schema(dtypes)
1030
+ new_dtypes = dtypes.copy()
1031
+ for i in range(len(dtypes)):
1032
+ arrow_type = arrow_schema.types[i]
1033
+ dt = dtypes.iloc[i]
1034
+ if isinstance(dt, pd.api.extensions.ExtensionDtype):
1035
+ # make existing extension dtype consistent
1036
+ new_dtypes.iloc[i] = dt
1037
+ elif arrow_type == pa.string():
1038
+ new_dtypes.iloc[i] = pd.StringDtype("pyarrow")
1039
+ else:
1040
+ new_dtypes.iloc[i] = ArrowDtype(arrow_type)
1041
+ return new_dtypes
1042
+
1043
+
1044
+ def is_dataframe(x):
1045
+ if cudf is not None: # pragma: no cover
1046
+ if isinstance(x, cudf.DataFrame):
1047
+ return True
1048
+ return isinstance(x, pd.DataFrame)
1049
+
1050
+
1051
+ def is_series(x):
1052
+ if cudf is not None: # pragma: no cover
1053
+ if isinstance(x, cudf.Series):
1054
+ return True
1055
+ return isinstance(x, pd.Series)
1056
+
1057
+
1058
+ def is_index(x):
1059
+ if cudf is not None: # pragma: no cover
1060
+ if isinstance(x, cudf.Index):
1061
+ return True
1062
+ return isinstance(x, pd.Index)
1063
+
1064
+
1065
+ def get_xdf(x):
1066
+ if cudf is not None: # pragma: no cover
1067
+ if isinstance(x, (cudf.DataFrame, cudf.Series, cudf.Index)):
1068
+ return cudf
1069
+ return pd
1070
+
1071
+
1072
+ def is_cudf(x):
1073
+ if cudf is not None: # pragma: no cover
1074
+ if isinstance(x, (cudf.DataFrame, cudf.Series, cudf.Index)):
1075
+ return True
1076
+ return False
1077
+
1078
+
1079
+ def whether_to_clean_up(op, threshold):
1080
+ func = op.func
1081
+ counted_bytes = 0
1082
+ max_recursion_depth = 2
1083
+
1084
+ from collections import deque
1085
+ from numbers import Number
1086
+
1087
+ BYPASS_CLASSES = (str, bytes, Number, range, bytearray, pd.DataFrame, pd.Series)
1088
+
1089
+ class GetSizeEarlyStopException(Exception):
1090
+ pass
1091
+
1092
+ def check_exceed_threshold():
1093
+ nonlocal threshold, counted_bytes
1094
+ if counted_bytes >= threshold:
1095
+ raise GetSizeEarlyStopException()
1096
+
1097
+ def getsize(obj_outer):
1098
+ _seen_obj_ids = set()
1099
+
1100
+ def inner_count(obj, recursion_depth):
1101
+ obj_id = id(obj)
1102
+ if obj_id in _seen_obj_ids or recursion_depth > max_recursion_depth:
1103
+ return 0
1104
+ _seen_obj_ids.add(obj_id)
1105
+ recursion_depth += 1
1106
+ size = sys.getsizeof(obj)
1107
+ if isinstance(obj, BYPASS_CLASSES):
1108
+ return size
1109
+ elif isinstance(obj, (tuple, list, set, deque)):
1110
+ size += sum(inner_count(i, recursion_depth) for i in obj)
1111
+ elif hasattr(obj, "items"):
1112
+ size += sum(
1113
+ inner_count(k, recursion_depth) + inner_count(v, recursion_depth)
1114
+ for k, v in getattr(obj, "items")()
1115
+ )
1116
+ if hasattr(obj, "__dict__"):
1117
+ size += inner_count(vars(obj), recursion_depth)
1118
+ if hasattr(obj, "__slots__"):
1119
+ size += sum(
1120
+ inner_count(getattr(obj, s), recursion_depth)
1121
+ for s in obj.__slots__
1122
+ if hasattr(obj, s)
1123
+ )
1124
+ return size
1125
+
1126
+ return inner_count(obj_outer, 0)
1127
+
1128
+ try:
1129
+ # Note: In most cases, func is just a function with closure, while chances are that
1130
+ # func is a callable that doesn't have __closure__ attribute.
1131
+ if inspect.isclass(func):
1132
+ pass
1133
+ elif hasattr(func, "__closure__") and func.__closure__ is not None:
1134
+ for cell in func.__closure__:
1135
+ counted_bytes += getsize(cell.cell_contents)
1136
+ check_exceed_threshold()
1137
+ elif callable(func):
1138
+ if hasattr(func, "__dict__"):
1139
+ for k, v in func.__dict__.items():
1140
+ counted_bytes += sum([getsize(k), getsize(v)])
1141
+ check_exceed_threshold()
1142
+ if hasattr(func, "__slots__"):
1143
+ for slot in func.__slots__:
1144
+ counted_bytes += (
1145
+ getsize(getattr(func, slot)) if hasattr(func, slot) else 0
1146
+ )
1147
+ check_exceed_threshold()
1148
+ except GetSizeEarlyStopException:
1149
+ logger.debug("Func needs cleanup.")
1150
+ op.need_clean_up_func = True
1151
+ else:
1152
+ assert op.need_clean_up_func is False
1153
+ logger.debug("Func doesn't need cleanup.")
1154
+
1155
+ return op.need_clean_up_func
1156
+
1157
+
1158
+ def concat_on_columns(objs: List) -> Any:
1159
+ xdf = get_xdf(objs[0])
1160
+ # In cudf, concat with axis=1 and ignore_index=False by default behaves opposite to pandas.
1161
+ # Cudf would reset the index when axis=1 and ignore_index=False, which does not match with its document.
1162
+ # Therefore, we deal with this case specially.
1163
+ result = xdf.concat(objs, axis=1)
1164
+ if xdf is cudf:
1165
+ result.index = objs[0].index
1166
+ return result
1167
+
1168
+
1169
+ def apply_if_callable(maybe_callable, obj, **kwargs):
1170
+ if callable(maybe_callable):
1171
+ return maybe_callable(obj, **kwargs)
1172
+
1173
+ return maybe_callable
1174
+
1175
+
1176
+ def patch_sa_engine_execute():
1177
+ """
1178
+ pandas did not resolve compatibility issue of sqlalchemy 2.0, the issue
1179
+ is https://github.com/pandas-dev/pandas/issues/40686. We need to patch
1180
+ Engine class in SQLAlchemy, and then our code can work well.
1181
+ """
1182
+ try:
1183
+ from sqlalchemy.engine import Engine
1184
+ except ImportError: # pragma: no cover
1185
+ return
1186
+
1187
+ def execute(self, statement, *multiparams, **params):
1188
+ connection = self.connect()
1189
+ return connection.execute(statement, *multiparams, **params)
1190
+
1191
+ if hasattr(Engine, "execute"): # pragma: no cover
1192
+ return
1193
+ Engine.execute = execute
1194
+
1195
+
1196
+ def bind_func_args_from_pos(func, args_bind_position, *bound_args, **bound_kwargs):
1197
+ """
1198
+ Create a new function with arguments bound from specified position.
1199
+
1200
+ Parameters
1201
+ ----------
1202
+ func : callable
1203
+ Target function to be wrapped.
1204
+ args_bind_position : int
1205
+ Position to start binding arguments (0-based).
1206
+ e.g., n=0 binds from first arg, n=1 binds from second arg.
1207
+ *bound_args : tuple
1208
+ Arguments to be bound from position n.
1209
+ **bound_kwargs : dict
1210
+ Keyword arguments to be bound.
1211
+
1212
+ Returns
1213
+ -------
1214
+ callable
1215
+ Wrapped function with bound arguments.
1216
+
1217
+ Examples
1218
+ --------
1219
+ >>> def func(x, y, z=0):
1220
+ ... return x * y + z
1221
+ >>> f = bind_func_args_from_pos(func, 0, 10) # bind from second position
1222
+ >>> f(5) # equals func(5, 10)
1223
+ 10
1224
+
1225
+ Raises
1226
+ ------
1227
+ TypeError
1228
+ If func is not callable or n is not an integer.
1229
+ ValueError
1230
+ If n is negative or exceeds the number of parameters.
1231
+ """
1232
+
1233
+ @functools.wraps(func)
1234
+ def wrapper(*runtime_args, **runtime_kwargs):
1235
+ try:
1236
+ # Combine arguments
1237
+ all_args = (
1238
+ runtime_args[:args_bind_position]
1239
+ + bound_args
1240
+ + runtime_args[args_bind_position:]
1241
+ )
1242
+ all_kwargs = {**bound_kwargs, **runtime_kwargs}
1243
+
1244
+ return func(*all_args, **all_kwargs)
1245
+ except Exception as e:
1246
+ # Enhance error message with context
1247
+ raise type(e)(
1248
+ f"Error calling {func.__name__} with bound arguments: {str(e)}"
1249
+ ) from e
1250
+
1251
+ return wrapper
1252
+
1253
+
1254
+ def pack_func_args(df, funcs, *args, args_bind_position=1, **kwargs) -> Any:
1255
+ """
1256
+ Pack the funcs with args and kwargs to avoid the ambiguity between other
1257
+ positional and keyword arguments. It will process the funcs by the following rule:
1258
+
1259
+ 1. If there's no such args and kwargs, return funcs itself.
1260
+
1261
+ 2. If the funcs is a dict-like object, it will iterate each key-value pair, pack the
1262
+ value recursively, and return a new dict with the same keys and packed values.
1263
+
1264
+ 3. If the funcs is a list-like object, it will iterate each element, pack it
1265
+ recursively, and return a new list with the packed elements.
1266
+
1267
+ 4. If the funcs is a str object, it will try to get the attribute df.funcs firstly,
1268
+ if it exists and is a callable, return a partial one with args and kwargs packed in.
1269
+ If it exists but isn't a callable, a ValueError is raised. If it doesn't exist, then
1270
+ try to get the attribute of np.funcs, if it exists and df is acceptable by funcs,
1271
+ return a partial one with args and kwargs packed in, otherwise an AttributeValue is
1272
+ raised. This rule is almost the same with pandas.
1273
+
1274
+ 5. Other cases are treated as funcs being a callable, returns the partial one with
1275
+ args and kwargs packed in.
1276
+
1277
+ Parameters
1278
+ ----------
1279
+ df : pandas.DataFrame or pandas.Series
1280
+ The DataFrame or Series object to test the function.
1281
+ funcs : function, str, list-like or dict-like
1282
+ Function to pack. It should have the same type with Dataframe.transform().
1283
+ args_bind_position: int
1284
+ Position to start binding arguments (0-based).
1285
+ e.g., n=0 binds from first arg, n=1 binds from second arg.
1286
+ *args :
1287
+ The positional arguments to func. If funcs contains many functions, each one
1288
+ should be able to accept *args.
1289
+ **kwargs :
1290
+ The keyword arguments to func. If funcs contains many functions, each one
1291
+ should be able to accept **kwargs.
1292
+
1293
+ Returns
1294
+ -------
1295
+ The packed functions having the same structure with funcs.
1296
+
1297
+ Raises
1298
+ ------
1299
+ ValueError :
1300
+ If there's a string but the corresponding function doesn't accept any positional
1301
+ or keyword arguments.
1302
+ AttributeError :
1303
+ If there's a string but no corresponding function is found.
1304
+ """
1305
+ from ..udf import MarkedFunction
1306
+
1307
+ if not args and not kwargs:
1308
+ return funcs
1309
+
1310
+ if is_dict_like(funcs):
1311
+ return {k: pack_func_args(df, v, *args, **kwargs) for k, v in funcs.items()}
1312
+
1313
+ if is_list_like(funcs) and not isinstance(funcs, ENTITY_TYPE):
1314
+ return [pack_func_args(df, v, *args, **kwargs) for v in funcs]
1315
+
1316
+ f = get_callable_by_name(df, funcs) if isinstance(funcs, str) else funcs
1317
+
1318
+ if isinstance(f, MarkedFunction):
1319
+ # for marked function, pack the inner function, and reset as mark function
1320
+ packed_func = f.copy()
1321
+ packed_func.func = bind_func_args_from_pos(
1322
+ f.func, args_bind_position, *args, **kwargs
1323
+ )
1324
+ else:
1325
+ packed_func = bind_func_args_from_pos(f, args_bind_position, *args, **kwargs)
1326
+
1327
+ # Callable
1328
+ return packed_func
1329
+
1330
+
1331
+ def get_callable_by_name(df: Any, func_name: str) -> Callable:
1332
+ """
1333
+ Get the callable by the func name.
1334
+ It will try to get the attribute df.funcs firstly, if it exists and is a callable,
1335
+ return it. If it exists but isn't a callable, a ValueError is raised. If it doesn't
1336
+ exist, then try to get the attribute of np.funcs, if it exists and df is acceptable
1337
+ by funcs, return a partial one with args and kwargs packed in, otherwise an
1338
+ AttributeValue is raised. This rule is almost the same with pandas.
1339
+
1340
+ Parameters
1341
+ ----------
1342
+ df: pandas.Series or pandas.Dataframe
1343
+ The receiver of the func name.
1344
+ func_name : str
1345
+ The func name.
1346
+
1347
+ Returns
1348
+ -------
1349
+ The callable instance.
1350
+
1351
+ Raises
1352
+ ------
1353
+ ValueError :
1354
+ If it's not a valid callable.
1355
+ AttributeError :
1356
+ If there's no corresponding function is found.
1357
+ """
1358
+ if hasattr(df, func_name):
1359
+ f = getattr(df, func_name)
1360
+ if callable(f):
1361
+ return f
1362
+ raise ValueError(f"{func_name} is not a callable")
1363
+
1364
+ if hasattr(np, func_name) and hasattr(df, "__array__"):
1365
+ return getattr(np, func_name)
1366
+
1367
+ raise AttributeError(
1368
+ f"'{func_name}' is not a valid function for '{type(df).__name__}' object"
1369
+ )
1370
+
1371
+
1372
+ @dataclasses.dataclass
1373
+ class InferredDataFrameMeta:
1374
+ output_type: OutputType
1375
+ dtypes: Optional[pd.Series] = None
1376
+ dtype: Optional[Any] = None
1377
+ name: Optional[str] = None
1378
+ index_value: Optional["IndexValue"] = None
1379
+ maybe_agg: bool = False
1380
+ elementwise: bool = False
1381
+
1382
+ def check_absence(self, *args: str) -> None:
1383
+ args_set = set(args)
1384
+ if self.output_type == OutputType.dataframe:
1385
+ args_set.difference_update(["dtype", "name"])
1386
+ else:
1387
+ args_set.difference_update(["dtypes"])
1388
+ absent_args = [arg for arg in sorted(args_set) if getattr(self, arg) is None]
1389
+ if absent_args:
1390
+ raise TypeError(
1391
+ f"Cannot determine {', '.join(absent_args)} by calculating "
1392
+ "with mock data, please specify it as arguments"
1393
+ )
1394
+
1395
+
1396
+ def _get_groupby_input_df(groupby):
1397
+ in_df = groupby
1398
+ while in_df.op.output_types[0] not in (OutputType.dataframe, OutputType.series):
1399
+ in_df = in_df.inputs[0]
1400
+ return in_df
1401
+
1402
+
1403
+ def infer_dataframe_return_value(
1404
+ df_obj,
1405
+ func,
1406
+ output_type=None,
1407
+ dtypes=None,
1408
+ dtype=None,
1409
+ name=None,
1410
+ index=None,
1411
+ inherit_index=False,
1412
+ build_kw=None,
1413
+ elementwise=None,
1414
+ skip_infer=False,
1415
+ ) -> InferredDataFrameMeta:
1416
+ from .core import GROUPBY_TYPE, INDEX_TYPE
1417
+ from .typing_ import get_function_output_meta
1418
+
1419
+ unwrapped_func = func
1420
+ if isinstance(unwrapped_func, MarkedFunction):
1421
+ unwrapped_func = unwrapped_func.func
1422
+ while True:
1423
+ if isinstance(unwrapped_func, functools.partial):
1424
+ unwrapped_func = unwrapped_func.func
1425
+ elif hasattr(unwrapped_func, "__wrapped__"):
1426
+ unwrapped_func = unwrapped_func.__wrapped__
1427
+ else:
1428
+ break
1429
+
1430
+ func_annotation_meta = get_function_output_meta(unwrapped_func, df_obj)
1431
+ func_index_value = None
1432
+ if func_annotation_meta:
1433
+ output_type = output_type or func_annotation_meta.output_type
1434
+ dtypes = dtypes if dtypes is not None else func_annotation_meta.dtypes
1435
+ dtype = dtype if dtype is not None else func_annotation_meta.dtype
1436
+ name = name if name is not None else func_annotation_meta.name
1437
+ func_index_value = func_annotation_meta.index_value
1438
+
1439
+ if skip_infer:
1440
+ if isinstance(index, INDEX_TYPE):
1441
+ ret_index_value = index.index_value
1442
+ elif index is not None:
1443
+ ret_index_value = parse_index(index, df_obj.key)
1444
+ else:
1445
+ ret_index_value = func_index_value
1446
+
1447
+ return InferredDataFrameMeta(
1448
+ output_type=output_type,
1449
+ dtypes=dtypes,
1450
+ dtype=dtype,
1451
+ name=name,
1452
+ index_value=ret_index_value,
1453
+ )
1454
+
1455
+ if isinstance(index, INDEX_TYPE):
1456
+ index = index.index_value
1457
+
1458
+ if elementwise is None:
1459
+ elementwise = isinstance(unwrapped_func, np.ufunc)
1460
+
1461
+ ret_index_value = func_index_value
1462
+ if output_type is not None and (dtypes is not None or dtype is not None):
1463
+ if inherit_index:
1464
+ ret_index_value = df_obj.index_value
1465
+ elif index is not None:
1466
+ ret_index_value = parse_index(index)
1467
+
1468
+ if ret_index_value is not None:
1469
+ return InferredDataFrameMeta(
1470
+ output_type,
1471
+ dtypes,
1472
+ dtype,
1473
+ name,
1474
+ ret_index_value,
1475
+ elementwise=elementwise or False,
1476
+ )
1477
+
1478
+ ret_output_type = None
1479
+ ret_dtypes = dtypes
1480
+ maybe_agg = False
1481
+ build_kw = build_kw or {}
1482
+ obj_key = df_obj.key
1483
+
1484
+ if elementwise:
1485
+ inherit_index = True
1486
+ (ret_output_type,) = get_output_types(df_obj)
1487
+ if index is not None:
1488
+ ret_index_value = parse_index(index)
1489
+
1490
+ if isinstance(df_obj, GROUPBY_TYPE):
1491
+ is_groupby = True
1492
+ empty_df_obj = df_obj.op.build_mock_groupby(**build_kw)
1493
+ else:
1494
+ is_groupby = False
1495
+ empty_df_obj = (
1496
+ build_df(df_obj, **build_kw)
1497
+ if df_obj.ndim == 2
1498
+ else build_series(df_obj, **build_kw)
1499
+ )
1500
+ try:
1501
+ with np.errstate(all="ignore"), quiet_stdio():
1502
+ infer_df_obj = func(empty_df_obj)
1503
+
1504
+ if ret_index_value is None:
1505
+ if (
1506
+ infer_df_obj is None
1507
+ or not hasattr(infer_df_obj, "index")
1508
+ or infer_df_obj.index is None
1509
+ ):
1510
+ ret_index_value = parse_index(pd.RangeIndex(-1))
1511
+ elif (
1512
+ infer_df_obj.index is getattr(empty_df_obj, "index", None)
1513
+ or inherit_index
1514
+ ):
1515
+ ret_index_value = df_obj.index_value
1516
+ else:
1517
+ ret_index_value = parse_index(infer_df_obj.index, obj_key, func)
1518
+
1519
+ if isinstance(infer_df_obj, pd.DataFrame):
1520
+ if output_type is not None and output_type != OutputType.dataframe:
1521
+ raise TypeError(
1522
+ f'Cannot infer output_type as "series", '
1523
+ f'please specify `output_type` as "dataframe"'
1524
+ )
1525
+ ret_output_type = ret_output_type or OutputType.dataframe
1526
+ if ret_dtypes is None:
1527
+ ret_dtypes = infer_df_obj.dtypes
1528
+ else:
1529
+ if output_type is not None and output_type == OutputType.dataframe:
1530
+ raise TypeError(
1531
+ f'Cannot infer output_type as "dataframe", '
1532
+ f'please specify `output_type` as "series"'
1533
+ )
1534
+ ret_output_type = ret_output_type or OutputType.series
1535
+ name = name or getattr(infer_df_obj, "name", None)
1536
+ dtype = dtype or infer_df_obj.dtype
1537
+
1538
+ if is_groupby and len(infer_df_obj) <= 2:
1539
+ # we create mock df with 4 rows, 2 groups
1540
+ # if return df has 2 rows, we assume that
1541
+ # it's an aggregation operation
1542
+ maybe_agg = True
1543
+
1544
+ return InferredDataFrameMeta(
1545
+ ret_output_type,
1546
+ make_dtypes(ret_dtypes),
1547
+ make_dtypes(dtype),
1548
+ name,
1549
+ ret_index_value,
1550
+ maybe_agg,
1551
+ elementwise=elementwise,
1552
+ )
1553
+ except: # noqa: E722 # nosec
1554
+ logger.info(
1555
+ "Exception raised while inferring meta of function result", exc_info=True
1556
+ )
1557
+ return InferredDataFrameMeta(
1558
+ output_type,
1559
+ make_dtypes(dtypes),
1560
+ make_dtypes(dtype),
1561
+ name,
1562
+ ret_index_value,
1563
+ maybe_agg,
1564
+ elementwise=elementwise,
1565
+ )
1566
+
1567
+
1568
+ def copy_func_scheduling_hints(func, op: "DataFrameOperator") -> None:
1569
+ from ..config import options
1570
+
1571
+ expect_engine = None
1572
+ expect_gpu = None
1573
+ default_options = options.function.default_running_options or {}
1574
+
1575
+ if isinstance(func, MarkedFunction):
1576
+ # copy from marked function
1577
+ expect_engine = func.expect_engine
1578
+ expect_resources = func.expect_resources or {}
1579
+ expect_gpu = func.gpu
1580
+
1581
+ # merge default options if not set
1582
+ for key, value in default_options.items():
1583
+ if key not in expect_resources or expect_resources.get(key) is None:
1584
+ expect_resources[key] = value
1585
+ else:
1586
+ # copy from default options
1587
+ expect_resources = default_options
1588
+
1589
+ # Validate and adjust resource ratio constraints on client side
1590
+ expect_resources, _ = validate_and_adjust_resource_ratio(
1591
+ expect_resources,
1592
+ max_memory_cpu_ratio=options.function.allowed_max_memory_cpu_ratio,
1593
+ adjust=True,
1594
+ )
1595
+
1596
+ if expect_engine:
1597
+ op.expect_engine = expect_engine
1598
+ if expect_resources:
1599
+ op.expect_resources = expect_resources
1600
+ if expect_gpu:
1601
+ op.gpu = expect_gpu
1602
+
1603
+
1604
+ def make_column_list(col, dtypes_or_columns, level=None):
1605
+ """Returns [col] if col is a column in dtypes"""
1606
+ try:
1607
+ if isinstance(dtypes_or_columns, pd.Series):
1608
+ idx = dtypes_or_columns.index
1609
+ else:
1610
+ idx = dtypes_or_columns
1611
+
1612
+ if level is None:
1613
+ if col in idx:
1614
+ return [col]
1615
+ elif isinstance(col, int):
1616
+ col = [col]
1617
+ if all(c in idx for c in col):
1618
+ return col
1619
+ if all(isinstance(c, int) for c in col):
1620
+ return [idx[c] for c in col]
1621
+ return col
1622
+ else:
1623
+ level_idx = idx.get_level_values(level)
1624
+ if isinstance(col, list):
1625
+ cols = col
1626
+ else:
1627
+ cols = [col]
1628
+ mask = level_idx.isin(cols)
1629
+ if not mask.any():
1630
+ mask = col
1631
+ return idx[mask]
1632
+ except (IndexError, TypeError, ValueError):
1633
+ return col
1634
+
1635
+
1636
+ def call_groupby_with_params(df_or_series, groupby_params: dict):
1637
+ params = groupby_params.copy()
1638
+ selection = params.pop("selection", None)
1639
+ res = df_or_series.groupby(**params)
1640
+ if selection:
1641
+ res = res[selection]
1642
+ return res
1643
+
1644
+
1645
+ def validate_dtype_backend(value):
1646
+ if isinstance(value, bool):
1647
+ # compatibility for legacy use_arrow_dtype property
1648
+ value = "pyarrow" if value else "numpy"
1649
+ if not dtype_backend_validator(value):
1650
+ raise ValueError(f"Invalid dtype_backend: {value}")
1651
+ return value