maxframe 2.4.0rc1__cp312-cp312-win32.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (1122) hide show
  1. maxframe/__init__.py +33 -0
  2. maxframe/_utils.cp312-win32.pyd +0 -0
  3. maxframe/_utils.pxd +33 -0
  4. maxframe/_utils.pyi +21 -0
  5. maxframe/_utils.pyx +561 -0
  6. maxframe/codegen/__init__.py +27 -0
  7. maxframe/codegen/core.py +597 -0
  8. maxframe/codegen/spe/__init__.py +16 -0
  9. maxframe/codegen/spe/core.py +307 -0
  10. maxframe/codegen/spe/dataframe/__init__.py +38 -0
  11. maxframe/codegen/spe/dataframe/accessors/__init__.py +15 -0
  12. maxframe/codegen/spe/dataframe/accessors/base.py +71 -0
  13. maxframe/codegen/spe/dataframe/accessors/dict_.py +89 -0
  14. maxframe/codegen/spe/dataframe/accessors/list_.py +44 -0
  15. maxframe/codegen/spe/dataframe/accessors/struct_.py +28 -0
  16. maxframe/codegen/spe/dataframe/arithmetic.py +89 -0
  17. maxframe/codegen/spe/dataframe/datasource.py +181 -0
  18. maxframe/codegen/spe/dataframe/datastore.py +204 -0
  19. maxframe/codegen/spe/dataframe/extensions.py +63 -0
  20. maxframe/codegen/spe/dataframe/fetch.py +26 -0
  21. maxframe/codegen/spe/dataframe/groupby.py +312 -0
  22. maxframe/codegen/spe/dataframe/indexing.py +333 -0
  23. maxframe/codegen/spe/dataframe/merge.py +110 -0
  24. maxframe/codegen/spe/dataframe/misc.py +264 -0
  25. maxframe/codegen/spe/dataframe/missing.py +64 -0
  26. maxframe/codegen/spe/dataframe/reduction.py +183 -0
  27. maxframe/codegen/spe/dataframe/reshape.py +46 -0
  28. maxframe/codegen/spe/dataframe/sort.py +104 -0
  29. maxframe/codegen/spe/dataframe/statistics.py +46 -0
  30. maxframe/codegen/spe/dataframe/tests/__init__.py +13 -0
  31. maxframe/codegen/spe/dataframe/tests/accessors/__init__.py +13 -0
  32. maxframe/codegen/spe/dataframe/tests/accessors/test_base.py +33 -0
  33. maxframe/codegen/spe/dataframe/tests/accessors/test_dict.py +304 -0
  34. maxframe/codegen/spe/dataframe/tests/accessors/test_list.py +134 -0
  35. maxframe/codegen/spe/dataframe/tests/accessors/test_struct.py +75 -0
  36. maxframe/codegen/spe/dataframe/tests/indexing/__init__.py +13 -0
  37. maxframe/codegen/spe/dataframe/tests/indexing/conftest.py +58 -0
  38. maxframe/codegen/spe/dataframe/tests/indexing/test_getitem.py +124 -0
  39. maxframe/codegen/spe/dataframe/tests/indexing/test_iloc.py +95 -0
  40. maxframe/codegen/spe/dataframe/tests/indexing/test_indexing.py +39 -0
  41. maxframe/codegen/spe/dataframe/tests/indexing/test_loc.py +35 -0
  42. maxframe/codegen/spe/dataframe/tests/indexing/test_rename.py +51 -0
  43. maxframe/codegen/spe/dataframe/tests/indexing/test_reset_index.py +88 -0
  44. maxframe/codegen/spe/dataframe/tests/indexing/test_sample.py +45 -0
  45. maxframe/codegen/spe/dataframe/tests/indexing/test_set_axis.py +45 -0
  46. maxframe/codegen/spe/dataframe/tests/indexing/test_set_index.py +41 -0
  47. maxframe/codegen/spe/dataframe/tests/indexing/test_setitem.py +46 -0
  48. maxframe/codegen/spe/dataframe/tests/misc/__init__.py +13 -0
  49. maxframe/codegen/spe/dataframe/tests/misc/test_apply.py +133 -0
  50. maxframe/codegen/spe/dataframe/tests/misc/test_drop_duplicates.py +92 -0
  51. maxframe/codegen/spe/dataframe/tests/misc/test_misc.py +202 -0
  52. maxframe/codegen/spe/dataframe/tests/missing/__init__.py +13 -0
  53. maxframe/codegen/spe/dataframe/tests/missing/test_checkna.py +94 -0
  54. maxframe/codegen/spe/dataframe/tests/missing/test_dropna.py +50 -0
  55. maxframe/codegen/spe/dataframe/tests/missing/test_fillna.py +94 -0
  56. maxframe/codegen/spe/dataframe/tests/missing/test_replace.py +45 -0
  57. maxframe/codegen/spe/dataframe/tests/test_arithmetic.py +73 -0
  58. maxframe/codegen/spe/dataframe/tests/test_datasource.py +184 -0
  59. maxframe/codegen/spe/dataframe/tests/test_datastore.py +200 -0
  60. maxframe/codegen/spe/dataframe/tests/test_extensions.py +88 -0
  61. maxframe/codegen/spe/dataframe/tests/test_groupby.py +288 -0
  62. maxframe/codegen/spe/dataframe/tests/test_merge.py +426 -0
  63. maxframe/codegen/spe/dataframe/tests/test_reduction.py +117 -0
  64. maxframe/codegen/spe/dataframe/tests/test_reshape.py +79 -0
  65. maxframe/codegen/spe/dataframe/tests/test_sort.py +179 -0
  66. maxframe/codegen/spe/dataframe/tests/test_statistics.py +70 -0
  67. maxframe/codegen/spe/dataframe/tests/test_tseries.py +29 -0
  68. maxframe/codegen/spe/dataframe/tests/test_value_counts.py +60 -0
  69. maxframe/codegen/spe/dataframe/tests/test_window.py +69 -0
  70. maxframe/codegen/spe/dataframe/tseries.py +55 -0
  71. maxframe/codegen/spe/dataframe/udf.py +62 -0
  72. maxframe/codegen/spe/dataframe/value_counts.py +31 -0
  73. maxframe/codegen/spe/dataframe/window.py +65 -0
  74. maxframe/codegen/spe/learn/__init__.py +15 -0
  75. maxframe/codegen/spe/learn/contrib/__init__.py +15 -0
  76. maxframe/codegen/spe/learn/contrib/lightgbm.py +161 -0
  77. maxframe/codegen/spe/learn/contrib/models.py +41 -0
  78. maxframe/codegen/spe/learn/contrib/pytorch.py +49 -0
  79. maxframe/codegen/spe/learn/contrib/tests/__init__.py +13 -0
  80. maxframe/codegen/spe/learn/contrib/tests/test_lightgbm.py +123 -0
  81. maxframe/codegen/spe/learn/contrib/tests/test_models.py +41 -0
  82. maxframe/codegen/spe/learn/contrib/tests/test_pytorch.py +53 -0
  83. maxframe/codegen/spe/learn/contrib/tests/test_xgboost.py +99 -0
  84. maxframe/codegen/spe/learn/contrib/xgboost.py +152 -0
  85. maxframe/codegen/spe/learn/metrics/__init__.py +15 -0
  86. maxframe/codegen/spe/learn/metrics/_classification.py +120 -0
  87. maxframe/codegen/spe/learn/metrics/_ranking.py +76 -0
  88. maxframe/codegen/spe/learn/metrics/pairwise.py +51 -0
  89. maxframe/codegen/spe/learn/metrics/tests/__init__.py +13 -0
  90. maxframe/codegen/spe/learn/metrics/tests/test_classification.py +93 -0
  91. maxframe/codegen/spe/learn/metrics/tests/test_pairwise.py +36 -0
  92. maxframe/codegen/spe/learn/metrics/tests/test_ranking.py +59 -0
  93. maxframe/codegen/spe/learn/model_selection/__init__.py +13 -0
  94. maxframe/codegen/spe/learn/model_selection/tests/__init__.py +13 -0
  95. maxframe/codegen/spe/learn/model_selection/tests/test_split.py +41 -0
  96. maxframe/codegen/spe/learn/preprocessing/__init__.py +15 -0
  97. maxframe/codegen/spe/learn/preprocessing/_data.py +37 -0
  98. maxframe/codegen/spe/learn/preprocessing/_label.py +47 -0
  99. maxframe/codegen/spe/learn/preprocessing/tests/__init__.py +13 -0
  100. maxframe/codegen/spe/learn/preprocessing/tests/test_data.py +31 -0
  101. maxframe/codegen/spe/learn/preprocessing/tests/test_label.py +43 -0
  102. maxframe/codegen/spe/learn/utils/__init__.py +15 -0
  103. maxframe/codegen/spe/learn/utils/checks.py +55 -0
  104. maxframe/codegen/spe/learn/utils/multiclass.py +60 -0
  105. maxframe/codegen/spe/learn/utils/shuffle.py +85 -0
  106. maxframe/codegen/spe/learn/utils/sparsefuncs.py +35 -0
  107. maxframe/codegen/spe/learn/utils/tests/__init__.py +13 -0
  108. maxframe/codegen/spe/learn/utils/tests/test_checks.py +48 -0
  109. maxframe/codegen/spe/learn/utils/tests/test_multiclass.py +52 -0
  110. maxframe/codegen/spe/learn/utils/tests/test_shuffle.py +50 -0
  111. maxframe/codegen/spe/learn/utils/tests/test_sparsefuncs.py +34 -0
  112. maxframe/codegen/spe/learn/utils/tests/test_validation.py +44 -0
  113. maxframe/codegen/spe/learn/utils/validation.py +35 -0
  114. maxframe/codegen/spe/objects.py +26 -0
  115. maxframe/codegen/spe/remote.py +29 -0
  116. maxframe/codegen/spe/tensor/__init__.py +31 -0
  117. maxframe/codegen/spe/tensor/arithmetic.py +95 -0
  118. maxframe/codegen/spe/tensor/core.py +41 -0
  119. maxframe/codegen/spe/tensor/datasource.py +166 -0
  120. maxframe/codegen/spe/tensor/extensions.py +35 -0
  121. maxframe/codegen/spe/tensor/fetch.py +26 -0
  122. maxframe/codegen/spe/tensor/fft.py +74 -0
  123. maxframe/codegen/spe/tensor/indexing.py +63 -0
  124. maxframe/codegen/spe/tensor/linalg.py +90 -0
  125. maxframe/codegen/spe/tensor/merge.py +31 -0
  126. maxframe/codegen/spe/tensor/misc.py +175 -0
  127. maxframe/codegen/spe/tensor/random.py +29 -0
  128. maxframe/codegen/spe/tensor/reduction.py +39 -0
  129. maxframe/codegen/spe/tensor/reshape.py +26 -0
  130. maxframe/codegen/spe/tensor/sort.py +42 -0
  131. maxframe/codegen/spe/tensor/spatial.py +45 -0
  132. maxframe/codegen/spe/tensor/special.py +35 -0
  133. maxframe/codegen/spe/tensor/statistics.py +68 -0
  134. maxframe/codegen/spe/tensor/tests/__init__.py +13 -0
  135. maxframe/codegen/spe/tensor/tests/test_arithmetic.py +103 -0
  136. maxframe/codegen/spe/tensor/tests/test_datasource.py +99 -0
  137. maxframe/codegen/spe/tensor/tests/test_extensions.py +37 -0
  138. maxframe/codegen/spe/tensor/tests/test_fft.py +64 -0
  139. maxframe/codegen/spe/tensor/tests/test_indexing.py +44 -0
  140. maxframe/codegen/spe/tensor/tests/test_linalg.py +52 -0
  141. maxframe/codegen/spe/tensor/tests/test_merge.py +28 -0
  142. maxframe/codegen/spe/tensor/tests/test_misc.py +144 -0
  143. maxframe/codegen/spe/tensor/tests/test_random.py +55 -0
  144. maxframe/codegen/spe/tensor/tests/test_reduction.py +65 -0
  145. maxframe/codegen/spe/tensor/tests/test_reshape.py +39 -0
  146. maxframe/codegen/spe/tensor/tests/test_sort.py +49 -0
  147. maxframe/codegen/spe/tensor/tests/test_spatial.py +33 -0
  148. maxframe/codegen/spe/tensor/tests/test_special.py +28 -0
  149. maxframe/codegen/spe/tensor/tests/test_statistics.py +43 -0
  150. maxframe/codegen/spe/tests/__init__.py +13 -0
  151. maxframe/codegen/spe/tests/test_remote.py +29 -0
  152. maxframe/codegen/spe/tests/test_spe_codegen.py +135 -0
  153. maxframe/codegen/spe/utils.py +56 -0
  154. maxframe/codegen/tests/__init__.py +13 -0
  155. maxframe/codegen/tests/test_codegen.py +67 -0
  156. maxframe/config/__init__.py +15 -0
  157. maxframe/config/config.py +630 -0
  158. maxframe/config/tests/__init__.py +13 -0
  159. maxframe/config/tests/test_config.py +114 -0
  160. maxframe/config/tests/test_validators.py +46 -0
  161. maxframe/config/validators.py +142 -0
  162. maxframe/conftest.py +261 -0
  163. maxframe/core/__init__.py +53 -0
  164. maxframe/core/accessor.py +45 -0
  165. maxframe/core/base.py +157 -0
  166. maxframe/core/context.py +110 -0
  167. maxframe/core/entity/__init__.py +34 -0
  168. maxframe/core/entity/core.py +150 -0
  169. maxframe/core/entity/executable.py +337 -0
  170. maxframe/core/entity/objects.py +115 -0
  171. maxframe/core/entity/output_types.py +101 -0
  172. maxframe/core/entity/tests/__init__.py +13 -0
  173. maxframe/core/entity/tests/test_objects.py +42 -0
  174. maxframe/core/entity/tileables.py +376 -0
  175. maxframe/core/entity/utils.py +39 -0
  176. maxframe/core/graph/__init__.py +22 -0
  177. maxframe/core/graph/builder/__init__.py +15 -0
  178. maxframe/core/graph/builder/base.py +90 -0
  179. maxframe/core/graph/builder/tileable.py +34 -0
  180. maxframe/core/graph/builder/utils.py +37 -0
  181. maxframe/core/graph/core.cp312-win32.pyd +0 -0
  182. maxframe/core/graph/core.pyx +478 -0
  183. maxframe/core/graph/entity.py +187 -0
  184. maxframe/core/graph/tests/__init__.py +13 -0
  185. maxframe/core/graph/tests/test_graph.py +205 -0
  186. maxframe/core/mode.py +101 -0
  187. maxframe/core/operator/__init__.py +32 -0
  188. maxframe/core/operator/base.py +481 -0
  189. maxframe/core/operator/core.py +307 -0
  190. maxframe/core/operator/fetch.py +40 -0
  191. maxframe/core/operator/objects.py +43 -0
  192. maxframe/core/operator/shuffle.py +45 -0
  193. maxframe/core/operator/tests/__init__.py +13 -0
  194. maxframe/core/operator/tests/test_core.py +64 -0
  195. maxframe/core/operator/utils.py +68 -0
  196. maxframe/core/tests/__init__.py +13 -0
  197. maxframe/core/tests/test_mode.py +75 -0
  198. maxframe/dataframe/__init__.py +90 -0
  199. maxframe/dataframe/accessors/__init__.py +20 -0
  200. maxframe/dataframe/accessors/compat.py +45 -0
  201. maxframe/dataframe/accessors/datetime_/__init__.py +35 -0
  202. maxframe/dataframe/accessors/datetime_/accessor.py +67 -0
  203. maxframe/dataframe/accessors/datetime_/core.py +106 -0
  204. maxframe/dataframe/accessors/datetime_/tests/__init__.py +13 -0
  205. maxframe/dataframe/accessors/datetime_/tests/test_datetime_accessor.py +41 -0
  206. maxframe/dataframe/accessors/dict_/__init__.py +45 -0
  207. maxframe/dataframe/accessors/dict_/accessor.py +39 -0
  208. maxframe/dataframe/accessors/dict_/contains.py +72 -0
  209. maxframe/dataframe/accessors/dict_/core.py +48 -0
  210. maxframe/dataframe/accessors/dict_/getitem.py +140 -0
  211. maxframe/dataframe/accessors/dict_/length.py +64 -0
  212. maxframe/dataframe/accessors/dict_/remove.py +75 -0
  213. maxframe/dataframe/accessors/dict_/setitem.py +79 -0
  214. maxframe/dataframe/accessors/dict_/tests/__init__.py +13 -0
  215. maxframe/dataframe/accessors/dict_/tests/test_dict_accessor.py +168 -0
  216. maxframe/dataframe/accessors/list_/__init__.py +39 -0
  217. maxframe/dataframe/accessors/list_/accessor.py +39 -0
  218. maxframe/dataframe/accessors/list_/core.py +48 -0
  219. maxframe/dataframe/accessors/list_/getitem.py +128 -0
  220. maxframe/dataframe/accessors/list_/length.py +64 -0
  221. maxframe/dataframe/accessors/list_/tests/__init__.py +13 -0
  222. maxframe/dataframe/accessors/list_/tests/test_list_accessor.py +81 -0
  223. maxframe/dataframe/accessors/plotting/__init__.py +40 -0
  224. maxframe/dataframe/accessors/plotting/core.py +78 -0
  225. maxframe/dataframe/accessors/plotting/tests/__init__.py +13 -0
  226. maxframe/dataframe/accessors/plotting/tests/test_plotting_accessor.py +136 -0
  227. maxframe/dataframe/accessors/string_/__init__.py +36 -0
  228. maxframe/dataframe/accessors/string_/accessor.py +215 -0
  229. maxframe/dataframe/accessors/string_/core.py +226 -0
  230. maxframe/dataframe/accessors/string_/tests/__init__.py +13 -0
  231. maxframe/dataframe/accessors/string_/tests/test_string_accessor.py +73 -0
  232. maxframe/dataframe/accessors/struct_/__init__.py +39 -0
  233. maxframe/dataframe/accessors/struct_/accessor.py +39 -0
  234. maxframe/dataframe/accessors/struct_/core.py +43 -0
  235. maxframe/dataframe/accessors/struct_/dtypes.py +53 -0
  236. maxframe/dataframe/accessors/struct_/field.py +123 -0
  237. maxframe/dataframe/accessors/struct_/tests/__init__.py +13 -0
  238. maxframe/dataframe/accessors/struct_/tests/test_struct_accessor.py +91 -0
  239. maxframe/dataframe/arithmetic/__init__.py +373 -0
  240. maxframe/dataframe/arithmetic/abs.py +33 -0
  241. maxframe/dataframe/arithmetic/add.py +60 -0
  242. maxframe/dataframe/arithmetic/arccos.py +28 -0
  243. maxframe/dataframe/arithmetic/arccosh.py +28 -0
  244. maxframe/dataframe/arithmetic/arcsin.py +28 -0
  245. maxframe/dataframe/arithmetic/arcsinh.py +28 -0
  246. maxframe/dataframe/arithmetic/arctan.py +28 -0
  247. maxframe/dataframe/arithmetic/arctanh.py +28 -0
  248. maxframe/dataframe/arithmetic/between.py +106 -0
  249. maxframe/dataframe/arithmetic/bitwise_and.py +46 -0
  250. maxframe/dataframe/arithmetic/bitwise_or.py +50 -0
  251. maxframe/dataframe/arithmetic/bitwise_xor.py +46 -0
  252. maxframe/dataframe/arithmetic/ceil.py +28 -0
  253. maxframe/dataframe/arithmetic/core.py +361 -0
  254. maxframe/dataframe/arithmetic/cos.py +28 -0
  255. maxframe/dataframe/arithmetic/cosh.py +28 -0
  256. maxframe/dataframe/arithmetic/degrees.py +28 -0
  257. maxframe/dataframe/arithmetic/docstring.py +416 -0
  258. maxframe/dataframe/arithmetic/dot.py +237 -0
  259. maxframe/dataframe/arithmetic/equal.py +58 -0
  260. maxframe/dataframe/arithmetic/exp.py +28 -0
  261. maxframe/dataframe/arithmetic/exp2.py +28 -0
  262. maxframe/dataframe/arithmetic/expm1.py +28 -0
  263. maxframe/dataframe/arithmetic/floor.py +28 -0
  264. maxframe/dataframe/arithmetic/floordiv.py +64 -0
  265. maxframe/dataframe/arithmetic/greater.py +59 -0
  266. maxframe/dataframe/arithmetic/greater_equal.py +59 -0
  267. maxframe/dataframe/arithmetic/invert.py +33 -0
  268. maxframe/dataframe/arithmetic/is_ufuncs.py +62 -0
  269. maxframe/dataframe/arithmetic/less.py +57 -0
  270. maxframe/dataframe/arithmetic/less_equal.py +59 -0
  271. maxframe/dataframe/arithmetic/log.py +28 -0
  272. maxframe/dataframe/arithmetic/log10.py +28 -0
  273. maxframe/dataframe/arithmetic/log2.py +28 -0
  274. maxframe/dataframe/arithmetic/maximum.py +33 -0
  275. maxframe/dataframe/arithmetic/minimum.py +33 -0
  276. maxframe/dataframe/arithmetic/mod.py +60 -0
  277. maxframe/dataframe/arithmetic/multiply.py +60 -0
  278. maxframe/dataframe/arithmetic/negative.py +33 -0
  279. maxframe/dataframe/arithmetic/not_equal.py +58 -0
  280. maxframe/dataframe/arithmetic/power.py +68 -0
  281. maxframe/dataframe/arithmetic/radians.py +28 -0
  282. maxframe/dataframe/arithmetic/round.py +144 -0
  283. maxframe/dataframe/arithmetic/sin.py +28 -0
  284. maxframe/dataframe/arithmetic/sinh.py +28 -0
  285. maxframe/dataframe/arithmetic/sqrt.py +28 -0
  286. maxframe/dataframe/arithmetic/subtract.py +64 -0
  287. maxframe/dataframe/arithmetic/tan.py +28 -0
  288. maxframe/dataframe/arithmetic/tanh.py +28 -0
  289. maxframe/dataframe/arithmetic/tests/__init__.py +13 -0
  290. maxframe/dataframe/arithmetic/tests/test_arithmetic.py +747 -0
  291. maxframe/dataframe/arithmetic/truediv.py +64 -0
  292. maxframe/dataframe/arithmetic/trunc.py +28 -0
  293. maxframe/dataframe/core.py +2386 -0
  294. maxframe/dataframe/datasource/__init__.py +33 -0
  295. maxframe/dataframe/datasource/core.py +112 -0
  296. maxframe/dataframe/datasource/dataframe.py +59 -0
  297. maxframe/dataframe/datasource/date_range.py +512 -0
  298. maxframe/dataframe/datasource/direct.py +57 -0
  299. maxframe/dataframe/datasource/from_dict.py +124 -0
  300. maxframe/dataframe/datasource/from_index.py +58 -0
  301. maxframe/dataframe/datasource/from_records.py +191 -0
  302. maxframe/dataframe/datasource/from_tensor.py +503 -0
  303. maxframe/dataframe/datasource/index.py +117 -0
  304. maxframe/dataframe/datasource/read_csv.py +534 -0
  305. maxframe/dataframe/datasource/read_odps_query.py +536 -0
  306. maxframe/dataframe/datasource/read_odps_table.py +295 -0
  307. maxframe/dataframe/datasource/read_parquet.py +278 -0
  308. maxframe/dataframe/datasource/series.py +55 -0
  309. maxframe/dataframe/datasource/tests/__init__.py +13 -0
  310. maxframe/dataframe/datasource/tests/test_datasource.py +663 -0
  311. maxframe/dataframe/datastore/__init__.py +41 -0
  312. maxframe/dataframe/datastore/core.py +28 -0
  313. maxframe/dataframe/datastore/direct.py +268 -0
  314. maxframe/dataframe/datastore/tests/__init__.py +13 -0
  315. maxframe/dataframe/datastore/tests/test_to_odps.py +99 -0
  316. maxframe/dataframe/datastore/to_csv.py +219 -0
  317. maxframe/dataframe/datastore/to_json.py +215 -0
  318. maxframe/dataframe/datastore/to_odps.py +285 -0
  319. maxframe/dataframe/datastore/to_parquet.py +121 -0
  320. maxframe/dataframe/extensions/__init__.py +70 -0
  321. maxframe/dataframe/extensions/accessor.py +35 -0
  322. maxframe/dataframe/extensions/apply_chunk.py +733 -0
  323. maxframe/dataframe/extensions/cartesian_chunk.py +153 -0
  324. maxframe/dataframe/extensions/collect_kv.py +126 -0
  325. maxframe/dataframe/extensions/extract_kv.py +177 -0
  326. maxframe/dataframe/extensions/flatjson.py +133 -0
  327. maxframe/dataframe/extensions/flatmap.py +329 -0
  328. maxframe/dataframe/extensions/map_reduce.py +263 -0
  329. maxframe/dataframe/extensions/rebalance.py +62 -0
  330. maxframe/dataframe/extensions/reshuffle.py +83 -0
  331. maxframe/dataframe/extensions/tests/__init__.py +13 -0
  332. maxframe/dataframe/extensions/tests/test_apply_chunk.py +194 -0
  333. maxframe/dataframe/extensions/tests/test_extensions.py +198 -0
  334. maxframe/dataframe/extensions/tests/test_map_reduce.py +135 -0
  335. maxframe/dataframe/fetch/__init__.py +15 -0
  336. maxframe/dataframe/fetch/core.py +97 -0
  337. maxframe/dataframe/groupby/__init__.py +105 -0
  338. maxframe/dataframe/groupby/aggregation.py +485 -0
  339. maxframe/dataframe/groupby/apply.py +235 -0
  340. maxframe/dataframe/groupby/apply_chunk.py +407 -0
  341. maxframe/dataframe/groupby/core.py +342 -0
  342. maxframe/dataframe/groupby/cum.py +102 -0
  343. maxframe/dataframe/groupby/expanding.py +264 -0
  344. maxframe/dataframe/groupby/extensions.py +26 -0
  345. maxframe/dataframe/groupby/fill.py +149 -0
  346. maxframe/dataframe/groupby/getitem.py +105 -0
  347. maxframe/dataframe/groupby/head.py +115 -0
  348. maxframe/dataframe/groupby/rank.py +136 -0
  349. maxframe/dataframe/groupby/rolling.py +206 -0
  350. maxframe/dataframe/groupby/sample.py +214 -0
  351. maxframe/dataframe/groupby/shift.py +114 -0
  352. maxframe/dataframe/groupby/tests/__init__.py +13 -0
  353. maxframe/dataframe/groupby/tests/test_groupby.py +373 -0
  354. maxframe/dataframe/groupby/transform.py +264 -0
  355. maxframe/dataframe/indexing/__init__.py +104 -0
  356. maxframe/dataframe/indexing/add_prefix_suffix.py +110 -0
  357. maxframe/dataframe/indexing/align.py +350 -0
  358. maxframe/dataframe/indexing/at.py +83 -0
  359. maxframe/dataframe/indexing/droplevel.py +195 -0
  360. maxframe/dataframe/indexing/filter.py +169 -0
  361. maxframe/dataframe/indexing/get_level_values.py +76 -0
  362. maxframe/dataframe/indexing/getitem.py +205 -0
  363. maxframe/dataframe/indexing/iat.py +82 -0
  364. maxframe/dataframe/indexing/iloc.py +711 -0
  365. maxframe/dataframe/indexing/insert.py +118 -0
  366. maxframe/dataframe/indexing/loc.py +694 -0
  367. maxframe/dataframe/indexing/reindex.py +541 -0
  368. maxframe/dataframe/indexing/rename.py +445 -0
  369. maxframe/dataframe/indexing/rename_axis.py +217 -0
  370. maxframe/dataframe/indexing/reorder_levels.py +143 -0
  371. maxframe/dataframe/indexing/reset_index.py +427 -0
  372. maxframe/dataframe/indexing/sample.py +232 -0
  373. maxframe/dataframe/indexing/set_axis.py +197 -0
  374. maxframe/dataframe/indexing/set_index.py +128 -0
  375. maxframe/dataframe/indexing/setitem.py +133 -0
  376. maxframe/dataframe/indexing/swaplevel.py +185 -0
  377. maxframe/dataframe/indexing/take.py +99 -0
  378. maxframe/dataframe/indexing/tests/__init__.py +13 -0
  379. maxframe/dataframe/indexing/tests/test_indexing.py +488 -0
  380. maxframe/dataframe/indexing/truncate.py +140 -0
  381. maxframe/dataframe/indexing/where.py +300 -0
  382. maxframe/dataframe/indexing/xs.py +148 -0
  383. maxframe/dataframe/initializer.py +298 -0
  384. maxframe/dataframe/merge/__init__.py +53 -0
  385. maxframe/dataframe/merge/append.py +120 -0
  386. maxframe/dataframe/merge/combine.py +244 -0
  387. maxframe/dataframe/merge/combine_first.py +120 -0
  388. maxframe/dataframe/merge/compare.py +387 -0
  389. maxframe/dataframe/merge/concat.py +500 -0
  390. maxframe/dataframe/merge/merge.py +806 -0
  391. maxframe/dataframe/merge/tests/__init__.py +13 -0
  392. maxframe/dataframe/merge/tests/test_merge.py +390 -0
  393. maxframe/dataframe/merge/update.py +271 -0
  394. maxframe/dataframe/misc/__init__.py +145 -0
  395. maxframe/dataframe/misc/_duplicate.py +56 -0
  396. maxframe/dataframe/misc/apply.py +730 -0
  397. maxframe/dataframe/misc/astype.py +237 -0
  398. maxframe/dataframe/misc/case_when.py +145 -0
  399. maxframe/dataframe/misc/check_monotonic.py +84 -0
  400. maxframe/dataframe/misc/check_unique.py +82 -0
  401. maxframe/dataframe/misc/clip.py +145 -0
  402. maxframe/dataframe/misc/cut.py +386 -0
  403. maxframe/dataframe/misc/describe.py +278 -0
  404. maxframe/dataframe/misc/diff.py +210 -0
  405. maxframe/dataframe/misc/drop.py +473 -0
  406. maxframe/dataframe/misc/drop_duplicates.py +251 -0
  407. maxframe/dataframe/misc/duplicated.py +292 -0
  408. maxframe/dataframe/misc/eval.py +730 -0
  409. maxframe/dataframe/misc/explode.py +171 -0
  410. maxframe/dataframe/misc/factorize.py +160 -0
  411. maxframe/dataframe/misc/get_dummies.py +241 -0
  412. maxframe/dataframe/misc/infer_dtypes.py +251 -0
  413. maxframe/dataframe/misc/isin.py +220 -0
  414. maxframe/dataframe/misc/map.py +360 -0
  415. maxframe/dataframe/misc/memory_usage.py +248 -0
  416. maxframe/dataframe/misc/pct_change.py +68 -0
  417. maxframe/dataframe/misc/qcut.py +104 -0
  418. maxframe/dataframe/misc/rechunk.py +59 -0
  419. maxframe/dataframe/misc/repeat.py +159 -0
  420. maxframe/dataframe/misc/select_dtypes.py +104 -0
  421. maxframe/dataframe/misc/shift.py +259 -0
  422. maxframe/dataframe/misc/tests/__init__.py +13 -0
  423. maxframe/dataframe/misc/tests/test_misc.py +649 -0
  424. maxframe/dataframe/misc/to_numeric.py +181 -0
  425. maxframe/dataframe/misc/transform.py +346 -0
  426. maxframe/dataframe/misc/transpose.py +148 -0
  427. maxframe/dataframe/misc/valid_index.py +115 -0
  428. maxframe/dataframe/misc/value_counts.py +206 -0
  429. maxframe/dataframe/missing/__init__.py +53 -0
  430. maxframe/dataframe/missing/checkna.py +231 -0
  431. maxframe/dataframe/missing/dropna.py +294 -0
  432. maxframe/dataframe/missing/fillna.py +283 -0
  433. maxframe/dataframe/missing/replace.py +446 -0
  434. maxframe/dataframe/missing/tests/__init__.py +13 -0
  435. maxframe/dataframe/missing/tests/test_missing.py +90 -0
  436. maxframe/dataframe/operators.py +231 -0
  437. maxframe/dataframe/reduction/__init__.py +129 -0
  438. maxframe/dataframe/reduction/aggregation.py +502 -0
  439. maxframe/dataframe/reduction/all.py +78 -0
  440. maxframe/dataframe/reduction/any.py +78 -0
  441. maxframe/dataframe/reduction/argmax.py +103 -0
  442. maxframe/dataframe/reduction/argmin.py +103 -0
  443. maxframe/dataframe/reduction/core.py +923 -0
  444. maxframe/dataframe/reduction/count.py +63 -0
  445. maxframe/dataframe/reduction/cov.py +166 -0
  446. maxframe/dataframe/reduction/cummax.py +30 -0
  447. maxframe/dataframe/reduction/cummin.py +30 -0
  448. maxframe/dataframe/reduction/cumprod.py +30 -0
  449. maxframe/dataframe/reduction/cumsum.py +30 -0
  450. maxframe/dataframe/reduction/custom_reduction.py +42 -0
  451. maxframe/dataframe/reduction/idxmax.py +185 -0
  452. maxframe/dataframe/reduction/idxmin.py +185 -0
  453. maxframe/dataframe/reduction/kurtosis.py +111 -0
  454. maxframe/dataframe/reduction/max.py +65 -0
  455. maxframe/dataframe/reduction/mean.py +63 -0
  456. maxframe/dataframe/reduction/median.py +56 -0
  457. maxframe/dataframe/reduction/min.py +65 -0
  458. maxframe/dataframe/reduction/mode.py +190 -0
  459. maxframe/dataframe/reduction/nunique.py +149 -0
  460. maxframe/dataframe/reduction/prod.py +81 -0
  461. maxframe/dataframe/reduction/reduction_size.py +36 -0
  462. maxframe/dataframe/reduction/sem.py +73 -0
  463. maxframe/dataframe/reduction/skew.py +93 -0
  464. maxframe/dataframe/reduction/std.py +53 -0
  465. maxframe/dataframe/reduction/str_concat.py +51 -0
  466. maxframe/dataframe/reduction/sum.py +81 -0
  467. maxframe/dataframe/reduction/tests/__init__.py +13 -0
  468. maxframe/dataframe/reduction/tests/test_reduction.py +598 -0
  469. maxframe/dataframe/reduction/unique.py +153 -0
  470. maxframe/dataframe/reduction/var.py +76 -0
  471. maxframe/dataframe/reshape/__init__.py +38 -0
  472. maxframe/dataframe/reshape/melt.py +169 -0
  473. maxframe/dataframe/reshape/pivot.py +233 -0
  474. maxframe/dataframe/reshape/pivot_table.py +275 -0
  475. maxframe/dataframe/reshape/stack.py +240 -0
  476. maxframe/dataframe/reshape/unstack.py +114 -0
  477. maxframe/dataframe/sort/__init__.py +49 -0
  478. maxframe/dataframe/sort/argsort.py +68 -0
  479. maxframe/dataframe/sort/core.py +37 -0
  480. maxframe/dataframe/sort/nlargest.py +238 -0
  481. maxframe/dataframe/sort/nsmallest.py +228 -0
  482. maxframe/dataframe/sort/rank.py +147 -0
  483. maxframe/dataframe/sort/sort_index.py +153 -0
  484. maxframe/dataframe/sort/sort_values.py +308 -0
  485. maxframe/dataframe/sort/tests/__init__.py +13 -0
  486. maxframe/dataframe/sort/tests/test_sort.py +85 -0
  487. maxframe/dataframe/statistics/__init__.py +33 -0
  488. maxframe/dataframe/statistics/corr.py +284 -0
  489. maxframe/dataframe/statistics/quantile.py +338 -0
  490. maxframe/dataframe/statistics/tests/__init__.py +13 -0
  491. maxframe/dataframe/statistics/tests/test_statistics.py +82 -0
  492. maxframe/dataframe/tests/__init__.py +13 -0
  493. maxframe/dataframe/tests/test_initializer.py +60 -0
  494. maxframe/dataframe/tests/test_typing.py +119 -0
  495. maxframe/dataframe/tests/test_utils.py +169 -0
  496. maxframe/dataframe/tseries/__init__.py +32 -0
  497. maxframe/dataframe/tseries/at_time.py +61 -0
  498. maxframe/dataframe/tseries/between_time.py +122 -0
  499. maxframe/dataframe/tseries/tests/__init__.py +13 -0
  500. maxframe/dataframe/tseries/tests/test_tseries.py +30 -0
  501. maxframe/dataframe/tseries/to_datetime.py +299 -0
  502. maxframe/dataframe/typing_.py +196 -0
  503. maxframe/dataframe/ufunc/__init__.py +27 -0
  504. maxframe/dataframe/ufunc/tensor.py +54 -0
  505. maxframe/dataframe/ufunc/ufunc.py +53 -0
  506. maxframe/dataframe/utils.py +1728 -0
  507. maxframe/dataframe/window/__init__.py +29 -0
  508. maxframe/dataframe/window/aggregation.py +100 -0
  509. maxframe/dataframe/window/core.py +82 -0
  510. maxframe/dataframe/window/ewm.py +247 -0
  511. maxframe/dataframe/window/expanding.py +151 -0
  512. maxframe/dataframe/window/rolling.py +389 -0
  513. maxframe/dataframe/window/tests/__init__.py +13 -0
  514. maxframe/dataframe/window/tests/test_ewm.py +70 -0
  515. maxframe/dataframe/window/tests/test_expanding.py +60 -0
  516. maxframe/dataframe/window/tests/test_rolling.py +57 -0
  517. maxframe/env.py +37 -0
  518. maxframe/errors.py +52 -0
  519. maxframe/extension.py +131 -0
  520. maxframe/io/__init__.py +13 -0
  521. maxframe/io/objects/__init__.py +24 -0
  522. maxframe/io/objects/core.py +156 -0
  523. maxframe/io/objects/tensor.py +133 -0
  524. maxframe/io/objects/tests/__init__.py +13 -0
  525. maxframe/io/objects/tests/test_object_io.py +85 -0
  526. maxframe/io/odpsio/__init__.py +24 -0
  527. maxframe/io/odpsio/arrow.py +161 -0
  528. maxframe/io/odpsio/schema.py +533 -0
  529. maxframe/io/odpsio/tableio.py +736 -0
  530. maxframe/io/odpsio/tests/__init__.py +13 -0
  531. maxframe/io/odpsio/tests/test_arrow.py +132 -0
  532. maxframe/io/odpsio/tests/test_schema.py +582 -0
  533. maxframe/io/odpsio/tests/test_tableio.py +205 -0
  534. maxframe/io/odpsio/tests/test_volumeio.py +75 -0
  535. maxframe/io/odpsio/volumeio.py +102 -0
  536. maxframe/learn/__init__.py +25 -0
  537. maxframe/learn/cluster/__init__.py +15 -0
  538. maxframe/learn/cluster/_kmeans.py +782 -0
  539. maxframe/learn/contrib/__init__.py +17 -0
  540. maxframe/learn/contrib/graph/__init__.py +15 -0
  541. maxframe/learn/contrib/graph/connected_components.py +216 -0
  542. maxframe/learn/contrib/graph/tests/__init__.py +13 -0
  543. maxframe/learn/contrib/graph/tests/test_connected_components.py +53 -0
  544. maxframe/learn/contrib/lightgbm/__init__.py +33 -0
  545. maxframe/learn/contrib/lightgbm/_predict.py +138 -0
  546. maxframe/learn/contrib/lightgbm/_train.py +163 -0
  547. maxframe/learn/contrib/lightgbm/callback.py +114 -0
  548. maxframe/learn/contrib/lightgbm/classifier.py +199 -0
  549. maxframe/learn/contrib/lightgbm/core.py +372 -0
  550. maxframe/learn/contrib/lightgbm/dataset.py +153 -0
  551. maxframe/learn/contrib/lightgbm/regressor.py +29 -0
  552. maxframe/learn/contrib/lightgbm/tests/__init__.py +13 -0
  553. maxframe/learn/contrib/lightgbm/tests/test_callback.py +58 -0
  554. maxframe/learn/contrib/llm/__init__.py +17 -0
  555. maxframe/learn/contrib/llm/core.py +105 -0
  556. maxframe/learn/contrib/llm/deploy/__init__.py +13 -0
  557. maxframe/learn/contrib/llm/deploy/config.py +221 -0
  558. maxframe/learn/contrib/llm/deploy/core.py +247 -0
  559. maxframe/learn/contrib/llm/deploy/framework.py +35 -0
  560. maxframe/learn/contrib/llm/deploy/loader.py +360 -0
  561. maxframe/learn/contrib/llm/deploy/tests/__init__.py +13 -0
  562. maxframe/learn/contrib/llm/deploy/tests/test_register_models.py +359 -0
  563. maxframe/learn/contrib/llm/models/__init__.py +16 -0
  564. maxframe/learn/contrib/llm/models/dashscope.py +114 -0
  565. maxframe/learn/contrib/llm/models/managed.py +119 -0
  566. maxframe/learn/contrib/llm/models/openai.py +72 -0
  567. maxframe/learn/contrib/llm/multi_modal.py +135 -0
  568. maxframe/learn/contrib/llm/tests/__init__.py +13 -0
  569. maxframe/learn/contrib/llm/tests/test_core.py +34 -0
  570. maxframe/learn/contrib/llm/tests/test_openai.py +187 -0
  571. maxframe/learn/contrib/llm/tests/test_text_gen.py +155 -0
  572. maxframe/learn/contrib/llm/text.py +608 -0
  573. maxframe/learn/contrib/models.py +109 -0
  574. maxframe/learn/contrib/pytorch/__init__.py +16 -0
  575. maxframe/learn/contrib/pytorch/run_function.py +110 -0
  576. maxframe/learn/contrib/pytorch/run_script.py +102 -0
  577. maxframe/learn/contrib/pytorch/tests/__init__.py +13 -0
  578. maxframe/learn/contrib/pytorch/tests/test_pytorch.py +42 -0
  579. maxframe/learn/contrib/utils.py +108 -0
  580. maxframe/learn/contrib/xgboost/__init__.py +33 -0
  581. maxframe/learn/contrib/xgboost/callback.py +86 -0
  582. maxframe/learn/contrib/xgboost/classifier.py +119 -0
  583. maxframe/learn/contrib/xgboost/core.py +469 -0
  584. maxframe/learn/contrib/xgboost/dmatrix.py +157 -0
  585. maxframe/learn/contrib/xgboost/predict.py +133 -0
  586. maxframe/learn/contrib/xgboost/regressor.py +91 -0
  587. maxframe/learn/contrib/xgboost/tests/__init__.py +13 -0
  588. maxframe/learn/contrib/xgboost/tests/test_callback.py +41 -0
  589. maxframe/learn/contrib/xgboost/tests/test_core.py +43 -0
  590. maxframe/learn/contrib/xgboost/train.py +181 -0
  591. maxframe/learn/core.py +344 -0
  592. maxframe/learn/datasets/__init__.py +20 -0
  593. maxframe/learn/datasets/samples_generator.py +628 -0
  594. maxframe/learn/linear_model/__init__.py +15 -0
  595. maxframe/learn/linear_model/_base.py +220 -0
  596. maxframe/learn/linear_model/_lin_reg.py +175 -0
  597. maxframe/learn/metrics/__init__.py +31 -0
  598. maxframe/learn/metrics/_check_targets.py +95 -0
  599. maxframe/learn/metrics/_classification.py +1266 -0
  600. maxframe/learn/metrics/_ranking.py +477 -0
  601. maxframe/learn/metrics/_regression.py +256 -0
  602. maxframe/learn/metrics/_scorer.py +60 -0
  603. maxframe/learn/metrics/pairwise/__init__.py +21 -0
  604. maxframe/learn/metrics/pairwise/core.py +77 -0
  605. maxframe/learn/metrics/pairwise/cosine.py +115 -0
  606. maxframe/learn/metrics/pairwise/euclidean.py +176 -0
  607. maxframe/learn/metrics/pairwise/haversine.py +96 -0
  608. maxframe/learn/metrics/pairwise/manhattan.py +80 -0
  609. maxframe/learn/metrics/pairwise/pairwise.py +127 -0
  610. maxframe/learn/metrics/pairwise/pairwise_distances_topk.py +121 -0
  611. maxframe/learn/metrics/pairwise/rbf_kernel.py +51 -0
  612. maxframe/learn/metrics/tests/__init__.py +13 -0
  613. maxframe/learn/metrics/tests/test_scorer.py +26 -0
  614. maxframe/learn/model_selection/__init__.py +15 -0
  615. maxframe/learn/model_selection/_split.py +451 -0
  616. maxframe/learn/model_selection/tests/__init__.py +13 -0
  617. maxframe/learn/model_selection/tests/test_split.py +156 -0
  618. maxframe/learn/preprocessing/__init__.py +16 -0
  619. maxframe/learn/preprocessing/_data/__init__.py +17 -0
  620. maxframe/learn/preprocessing/_data/min_max_scaler.py +401 -0
  621. maxframe/learn/preprocessing/_data/normalize.py +127 -0
  622. maxframe/learn/preprocessing/_data/standard_scaler.py +512 -0
  623. maxframe/learn/preprocessing/_data/utils.py +79 -0
  624. maxframe/learn/preprocessing/_label/__init__.py +16 -0
  625. maxframe/learn/preprocessing/_label/_label_binarizer.py +599 -0
  626. maxframe/learn/preprocessing/_label/_label_encoder.py +174 -0
  627. maxframe/learn/utils/__init__.py +20 -0
  628. maxframe/learn/utils/_encode.py +312 -0
  629. maxframe/learn/utils/checks.py +160 -0
  630. maxframe/learn/utils/core.py +121 -0
  631. maxframe/learn/utils/extmath.py +246 -0
  632. maxframe/learn/utils/multiclass.py +292 -0
  633. maxframe/learn/utils/odpsio.py +262 -0
  634. maxframe/learn/utils/shuffle.py +114 -0
  635. maxframe/learn/utils/sparsefuncs.py +87 -0
  636. maxframe/learn/utils/validation.py +775 -0
  637. maxframe/lib/__init__.py +13 -0
  638. maxframe/lib/aio/__init__.py +27 -0
  639. maxframe/lib/aio/_runners.py +162 -0
  640. maxframe/lib/aio/_threads.py +35 -0
  641. maxframe/lib/aio/base.py +82 -0
  642. maxframe/lib/aio/file.py +85 -0
  643. maxframe/lib/aio/isolation.py +100 -0
  644. maxframe/lib/aio/lru.py +242 -0
  645. maxframe/lib/aio/parallelism.py +37 -0
  646. maxframe/lib/aio/tests/__init__.py +13 -0
  647. maxframe/lib/aio/tests/test_aio_file.py +55 -0
  648. maxframe/lib/compat.py +185 -0
  649. maxframe/lib/compression.py +55 -0
  650. maxframe/lib/cython/__init__.py +13 -0
  651. maxframe/lib/cython/libcpp.pxd +30 -0
  652. maxframe/lib/dtypes_extension/__init__.py +30 -0
  653. maxframe/lib/dtypes_extension/_fake_arrow_dtype.py +609 -0
  654. maxframe/lib/dtypes_extension/blob.py +304 -0
  655. maxframe/lib/dtypes_extension/dtypes.py +106 -0
  656. maxframe/lib/dtypes_extension/tests/__init__.py +13 -0
  657. maxframe/lib/dtypes_extension/tests/test_blob.py +88 -0
  658. maxframe/lib/dtypes_extension/tests/test_dtypes.py +63 -0
  659. maxframe/lib/dtypes_extension/tests/test_fake_arrow_dtype.py +75 -0
  660. maxframe/lib/filesystem/__init__.py +22 -0
  661. maxframe/lib/filesystem/_glob.py +173 -0
  662. maxframe/lib/filesystem/_oss_lib/__init__.py +13 -0
  663. maxframe/lib/filesystem/_oss_lib/common.py +274 -0
  664. maxframe/lib/filesystem/_oss_lib/glob.py +147 -0
  665. maxframe/lib/filesystem/_oss_lib/handle.py +180 -0
  666. maxframe/lib/filesystem/arrow.py +240 -0
  667. maxframe/lib/filesystem/base.py +327 -0
  668. maxframe/lib/filesystem/core.py +95 -0
  669. maxframe/lib/filesystem/fshandler.py +136 -0
  670. maxframe/lib/filesystem/fsmap.py +164 -0
  671. maxframe/lib/filesystem/hdfs.py +31 -0
  672. maxframe/lib/filesystem/local.py +120 -0
  673. maxframe/lib/filesystem/oss.py +283 -0
  674. maxframe/lib/filesystem/tests/__init__.py +13 -0
  675. maxframe/lib/filesystem/tests/test_filesystem.py +205 -0
  676. maxframe/lib/filesystem/tests/test_fshandler.py +281 -0
  677. maxframe/lib/filesystem/tests/test_oss.py +220 -0
  678. maxframe/lib/functools_compat.py +81 -0
  679. maxframe/lib/mmh3.cp312-win32.pyd +0 -0
  680. maxframe/lib/mmh3.pyi +43 -0
  681. maxframe/lib/mmh3_src/MurmurHash3.cpp +339 -0
  682. maxframe/lib/mmh3_src/MurmurHash3.h +43 -0
  683. maxframe/lib/mmh3_src/mmh3module.cpp +387 -0
  684. maxframe/lib/sparse/__init__.py +856 -0
  685. maxframe/lib/sparse/array.py +1616 -0
  686. maxframe/lib/sparse/core.py +90 -0
  687. maxframe/lib/sparse/linalg.py +31 -0
  688. maxframe/lib/sparse/matrix.py +244 -0
  689. maxframe/lib/sparse/tests/__init__.py +13 -0
  690. maxframe/lib/sparse/tests/test_sparse.py +476 -0
  691. maxframe/lib/sparse/vector.py +148 -0
  692. maxframe/lib/tblib/LICENSE +20 -0
  693. maxframe/lib/tblib/__init__.py +327 -0
  694. maxframe/lib/tblib/cpython.py +83 -0
  695. maxframe/lib/tblib/decorators.py +44 -0
  696. maxframe/lib/tblib/pickling_support.py +90 -0
  697. maxframe/lib/tests/__init__.py +13 -0
  698. maxframe/lib/tests/test_wrapped_pickle.py +51 -0
  699. maxframe/lib/version.py +620 -0
  700. maxframe/lib/wrapped_pickle.py +177 -0
  701. maxframe/mixin.py +157 -0
  702. maxframe/opcodes.py +654 -0
  703. maxframe/protocol.py +611 -0
  704. maxframe/remote/__init__.py +18 -0
  705. maxframe/remote/core.py +212 -0
  706. maxframe/remote/run_script.py +124 -0
  707. maxframe/serialization/__init__.py +39 -0
  708. maxframe/serialization/arrow.py +107 -0
  709. maxframe/serialization/blob.py +32 -0
  710. maxframe/serialization/core.cp312-win32.pyd +0 -0
  711. maxframe/serialization/core.pxd +50 -0
  712. maxframe/serialization/core.pyi +66 -0
  713. maxframe/serialization/core.pyx +1282 -0
  714. maxframe/serialization/exception.py +90 -0
  715. maxframe/serialization/maxframe_objects.py +39 -0
  716. maxframe/serialization/numpy.py +110 -0
  717. maxframe/serialization/pandas.py +278 -0
  718. maxframe/serialization/scipy.py +71 -0
  719. maxframe/serialization/serializables/__init__.py +55 -0
  720. maxframe/serialization/serializables/core.py +469 -0
  721. maxframe/serialization/serializables/field.py +624 -0
  722. maxframe/serialization/serializables/field_type.py +592 -0
  723. maxframe/serialization/serializables/tests/__init__.py +13 -0
  724. maxframe/serialization/serializables/tests/test_field_type.py +119 -0
  725. maxframe/serialization/serializables/tests/test_serializable.py +313 -0
  726. maxframe/serialization/tests/__init__.py +13 -0
  727. maxframe/serialization/tests/test_serial.py +516 -0
  728. maxframe/session.py +1250 -0
  729. maxframe/sperunner.py +165 -0
  730. maxframe/tensor/__init__.py +325 -0
  731. maxframe/tensor/arithmetic/__init__.py +322 -0
  732. maxframe/tensor/arithmetic/abs.py +66 -0
  733. maxframe/tensor/arithmetic/absolute.py +66 -0
  734. maxframe/tensor/arithmetic/add.py +112 -0
  735. maxframe/tensor/arithmetic/angle.py +70 -0
  736. maxframe/tensor/arithmetic/arccos.py +101 -0
  737. maxframe/tensor/arithmetic/arccosh.py +89 -0
  738. maxframe/tensor/arithmetic/arcsin.py +92 -0
  739. maxframe/tensor/arithmetic/arcsinh.py +84 -0
  740. maxframe/tensor/arithmetic/arctan.py +104 -0
  741. maxframe/tensor/arithmetic/arctan2.py +126 -0
  742. maxframe/tensor/arithmetic/arctanh.py +84 -0
  743. maxframe/tensor/arithmetic/around.py +112 -0
  744. maxframe/tensor/arithmetic/bitand.py +93 -0
  745. maxframe/tensor/arithmetic/bitor.py +100 -0
  746. maxframe/tensor/arithmetic/bitxor.py +93 -0
  747. maxframe/tensor/arithmetic/cbrt.py +64 -0
  748. maxframe/tensor/arithmetic/ceil.py +69 -0
  749. maxframe/tensor/arithmetic/clip.py +165 -0
  750. maxframe/tensor/arithmetic/conj.py +72 -0
  751. maxframe/tensor/arithmetic/copysign.py +76 -0
  752. maxframe/tensor/arithmetic/core.py +546 -0
  753. maxframe/tensor/arithmetic/cos.py +83 -0
  754. maxframe/tensor/arithmetic/cosh.py +70 -0
  755. maxframe/tensor/arithmetic/deg2rad.py +70 -0
  756. maxframe/tensor/arithmetic/degrees.py +75 -0
  757. maxframe/tensor/arithmetic/divide.py +112 -0
  758. maxframe/tensor/arithmetic/equal.py +74 -0
  759. maxframe/tensor/arithmetic/exp.py +104 -0
  760. maxframe/tensor/arithmetic/exp2.py +65 -0
  761. maxframe/tensor/arithmetic/expm1.py +77 -0
  762. maxframe/tensor/arithmetic/fabs.py +72 -0
  763. maxframe/tensor/arithmetic/fix.py +67 -0
  764. maxframe/tensor/arithmetic/float_power.py +101 -0
  765. maxframe/tensor/arithmetic/floor.py +75 -0
  766. maxframe/tensor/arithmetic/floordiv.py +92 -0
  767. maxframe/tensor/arithmetic/fmax.py +103 -0
  768. maxframe/tensor/arithmetic/fmin.py +104 -0
  769. maxframe/tensor/arithmetic/fmod.py +97 -0
  770. maxframe/tensor/arithmetic/frexp.py +96 -0
  771. maxframe/tensor/arithmetic/greater.py +75 -0
  772. maxframe/tensor/arithmetic/greater_equal.py +67 -0
  773. maxframe/tensor/arithmetic/hypot.py +75 -0
  774. maxframe/tensor/arithmetic/i0.py +87 -0
  775. maxframe/tensor/arithmetic/imag.py +65 -0
  776. maxframe/tensor/arithmetic/invert.py +108 -0
  777. maxframe/tensor/arithmetic/isclose.py +114 -0
  778. maxframe/tensor/arithmetic/iscomplex.py +62 -0
  779. maxframe/tensor/arithmetic/iscomplexobj.py +53 -0
  780. maxframe/tensor/arithmetic/isfinite.py +104 -0
  781. maxframe/tensor/arithmetic/isinf.py +101 -0
  782. maxframe/tensor/arithmetic/isnan.py +80 -0
  783. maxframe/tensor/arithmetic/isreal.py +61 -0
  784. maxframe/tensor/arithmetic/ldexp.py +97 -0
  785. maxframe/tensor/arithmetic/less.py +67 -0
  786. maxframe/tensor/arithmetic/less_equal.py +67 -0
  787. maxframe/tensor/arithmetic/log.py +90 -0
  788. maxframe/tensor/arithmetic/log10.py +83 -0
  789. maxframe/tensor/arithmetic/log1p.py +93 -0
  790. maxframe/tensor/arithmetic/log2.py +83 -0
  791. maxframe/tensor/arithmetic/logaddexp.py +78 -0
  792. maxframe/tensor/arithmetic/logaddexp2.py +76 -0
  793. maxframe/tensor/arithmetic/logical_and.py +79 -0
  794. maxframe/tensor/arithmetic/logical_not.py +72 -0
  795. maxframe/tensor/arithmetic/logical_or.py +80 -0
  796. maxframe/tensor/arithmetic/logical_xor.py +86 -0
  797. maxframe/tensor/arithmetic/lshift.py +80 -0
  798. maxframe/tensor/arithmetic/maximum.py +106 -0
  799. maxframe/tensor/arithmetic/minimum.py +106 -0
  800. maxframe/tensor/arithmetic/mod.py +102 -0
  801. maxframe/tensor/arithmetic/modf.py +87 -0
  802. maxframe/tensor/arithmetic/multiply.py +114 -0
  803. maxframe/tensor/arithmetic/nan_to_num.py +97 -0
  804. maxframe/tensor/arithmetic/negative.py +63 -0
  805. maxframe/tensor/arithmetic/nextafter.py +66 -0
  806. maxframe/tensor/arithmetic/not_equal.py +70 -0
  807. maxframe/tensor/arithmetic/positive.py +45 -0
  808. maxframe/tensor/arithmetic/power.py +104 -0
  809. maxframe/tensor/arithmetic/rad2deg.py +69 -0
  810. maxframe/tensor/arithmetic/radians.py +75 -0
  811. maxframe/tensor/arithmetic/real.py +68 -0
  812. maxframe/tensor/arithmetic/reciprocal.py +78 -0
  813. maxframe/tensor/arithmetic/rint.py +66 -0
  814. maxframe/tensor/arithmetic/rshift.py +79 -0
  815. maxframe/tensor/arithmetic/setimag.py +27 -0
  816. maxframe/tensor/arithmetic/setreal.py +27 -0
  817. maxframe/tensor/arithmetic/sign.py +79 -0
  818. maxframe/tensor/arithmetic/signbit.py +63 -0
  819. maxframe/tensor/arithmetic/sin.py +96 -0
  820. maxframe/tensor/arithmetic/sinc.py +100 -0
  821. maxframe/tensor/arithmetic/sinh.py +91 -0
  822. maxframe/tensor/arithmetic/spacing.py +70 -0
  823. maxframe/tensor/arithmetic/sqrt.py +79 -0
  824. maxframe/tensor/arithmetic/square.py +67 -0
  825. maxframe/tensor/arithmetic/subtract.py +83 -0
  826. maxframe/tensor/arithmetic/tan.py +86 -0
  827. maxframe/tensor/arithmetic/tanh.py +90 -0
  828. maxframe/tensor/arithmetic/tests/__init__.py +13 -0
  829. maxframe/tensor/arithmetic/tests/test_arithmetic.py +449 -0
  830. maxframe/tensor/arithmetic/truediv.py +102 -0
  831. maxframe/tensor/arithmetic/trunc.py +70 -0
  832. maxframe/tensor/arithmetic/utils.py +91 -0
  833. maxframe/tensor/array_utils.py +164 -0
  834. maxframe/tensor/core.py +597 -0
  835. maxframe/tensor/datasource/__init__.py +40 -0
  836. maxframe/tensor/datasource/arange.py +154 -0
  837. maxframe/tensor/datasource/array.py +399 -0
  838. maxframe/tensor/datasource/core.py +114 -0
  839. maxframe/tensor/datasource/diag.py +140 -0
  840. maxframe/tensor/datasource/diagflat.py +69 -0
  841. maxframe/tensor/datasource/empty.py +167 -0
  842. maxframe/tensor/datasource/eye.py +95 -0
  843. maxframe/tensor/datasource/from_dataframe.py +68 -0
  844. maxframe/tensor/datasource/from_dense.py +37 -0
  845. maxframe/tensor/datasource/from_sparse.py +45 -0
  846. maxframe/tensor/datasource/full.py +184 -0
  847. maxframe/tensor/datasource/identity.py +54 -0
  848. maxframe/tensor/datasource/indices.py +115 -0
  849. maxframe/tensor/datasource/linspace.py +140 -0
  850. maxframe/tensor/datasource/meshgrid.py +135 -0
  851. maxframe/tensor/datasource/ones.py +178 -0
  852. maxframe/tensor/datasource/scalar.py +40 -0
  853. maxframe/tensor/datasource/tests/__init__.py +13 -0
  854. maxframe/tensor/datasource/tests/test_datasource.py +310 -0
  855. maxframe/tensor/datasource/tri_array.py +107 -0
  856. maxframe/tensor/datasource/zeros.py +192 -0
  857. maxframe/tensor/extensions/__init__.py +33 -0
  858. maxframe/tensor/extensions/accessor.py +25 -0
  859. maxframe/tensor/extensions/apply_chunk.py +137 -0
  860. maxframe/tensor/extensions/rebalance.py +65 -0
  861. maxframe/tensor/fetch/__init__.py +15 -0
  862. maxframe/tensor/fetch/core.py +54 -0
  863. maxframe/tensor/fft/__init__.py +32 -0
  864. maxframe/tensor/fft/core.py +168 -0
  865. maxframe/tensor/fft/fft.py +112 -0
  866. maxframe/tensor/fft/fft2.py +118 -0
  867. maxframe/tensor/fft/fftfreq.py +80 -0
  868. maxframe/tensor/fft/fftn.py +123 -0
  869. maxframe/tensor/fft/fftshift.py +79 -0
  870. maxframe/tensor/fft/hfft.py +112 -0
  871. maxframe/tensor/fft/ifft.py +114 -0
  872. maxframe/tensor/fft/ifft2.py +115 -0
  873. maxframe/tensor/fft/ifftn.py +123 -0
  874. maxframe/tensor/fft/ifftshift.py +73 -0
  875. maxframe/tensor/fft/ihfft.py +93 -0
  876. maxframe/tensor/fft/irfft.py +118 -0
  877. maxframe/tensor/fft/irfft2.py +62 -0
  878. maxframe/tensor/fft/irfftn.py +114 -0
  879. maxframe/tensor/fft/rfft.py +116 -0
  880. maxframe/tensor/fft/rfft2.py +63 -0
  881. maxframe/tensor/fft/rfftfreq.py +87 -0
  882. maxframe/tensor/fft/rfftn.py +113 -0
  883. maxframe/tensor/indexing/__init__.py +47 -0
  884. maxframe/tensor/indexing/choose.py +198 -0
  885. maxframe/tensor/indexing/compress.py +122 -0
  886. maxframe/tensor/indexing/core.py +190 -0
  887. maxframe/tensor/indexing/extract.py +69 -0
  888. maxframe/tensor/indexing/fill_diagonal.py +180 -0
  889. maxframe/tensor/indexing/flatnonzero.py +58 -0
  890. maxframe/tensor/indexing/getitem.py +144 -0
  891. maxframe/tensor/indexing/nonzero.py +118 -0
  892. maxframe/tensor/indexing/setitem.py +142 -0
  893. maxframe/tensor/indexing/slice.py +32 -0
  894. maxframe/tensor/indexing/take.py +128 -0
  895. maxframe/tensor/indexing/tests/__init__.py +13 -0
  896. maxframe/tensor/indexing/tests/test_indexing.py +232 -0
  897. maxframe/tensor/indexing/unravel_index.py +103 -0
  898. maxframe/tensor/lib/__init__.py +16 -0
  899. maxframe/tensor/lib/index_tricks.py +404 -0
  900. maxframe/tensor/linalg/__init__.py +43 -0
  901. maxframe/tensor/linalg/_einsumfunc.py +1025 -0
  902. maxframe/tensor/linalg/cholesky.py +117 -0
  903. maxframe/tensor/linalg/dot.py +145 -0
  904. maxframe/tensor/linalg/einsum.py +339 -0
  905. maxframe/tensor/linalg/inner.py +36 -0
  906. maxframe/tensor/linalg/inv.py +83 -0
  907. maxframe/tensor/linalg/lstsq.py +100 -0
  908. maxframe/tensor/linalg/lu.py +115 -0
  909. maxframe/tensor/linalg/matmul.py +225 -0
  910. maxframe/tensor/linalg/matrix_norm.py +75 -0
  911. maxframe/tensor/linalg/norm.py +249 -0
  912. maxframe/tensor/linalg/qr.py +124 -0
  913. maxframe/tensor/linalg/solve.py +72 -0
  914. maxframe/tensor/linalg/solve_triangular.py +103 -0
  915. maxframe/tensor/linalg/svd.py +167 -0
  916. maxframe/tensor/linalg/tensordot.py +213 -0
  917. maxframe/tensor/linalg/vdot.py +73 -0
  918. maxframe/tensor/linalg/vector_norm.py +113 -0
  919. maxframe/tensor/merge/__init__.py +21 -0
  920. maxframe/tensor/merge/append.py +74 -0
  921. maxframe/tensor/merge/column_stack.py +63 -0
  922. maxframe/tensor/merge/concatenate.py +103 -0
  923. maxframe/tensor/merge/dstack.py +71 -0
  924. maxframe/tensor/merge/hstack.py +70 -0
  925. maxframe/tensor/merge/stack.py +130 -0
  926. maxframe/tensor/merge/tests/__init__.py +13 -0
  927. maxframe/tensor/merge/tests/test_merge.py +79 -0
  928. maxframe/tensor/merge/vstack.py +74 -0
  929. maxframe/tensor/misc/__init__.py +72 -0
  930. maxframe/tensor/misc/argwhere.py +72 -0
  931. maxframe/tensor/misc/array_split.py +46 -0
  932. maxframe/tensor/misc/astype.py +121 -0
  933. maxframe/tensor/misc/atleast_1d.py +72 -0
  934. maxframe/tensor/misc/atleast_2d.py +70 -0
  935. maxframe/tensor/misc/atleast_3d.py +85 -0
  936. maxframe/tensor/misc/broadcast_arrays.py +57 -0
  937. maxframe/tensor/misc/broadcast_to.py +89 -0
  938. maxframe/tensor/misc/copy.py +64 -0
  939. maxframe/tensor/misc/copyto.py +130 -0
  940. maxframe/tensor/misc/delete.py +104 -0
  941. maxframe/tensor/misc/diff.py +115 -0
  942. maxframe/tensor/misc/dsplit.py +68 -0
  943. maxframe/tensor/misc/ediff1d.py +74 -0
  944. maxframe/tensor/misc/expand_dims.py +85 -0
  945. maxframe/tensor/misc/flatten.py +63 -0
  946. maxframe/tensor/misc/flip.py +90 -0
  947. maxframe/tensor/misc/fliplr.py +64 -0
  948. maxframe/tensor/misc/flipud.py +68 -0
  949. maxframe/tensor/misc/hsplit.py +85 -0
  950. maxframe/tensor/misc/in1d.py +94 -0
  951. maxframe/tensor/misc/insert.py +139 -0
  952. maxframe/tensor/misc/isin.py +130 -0
  953. maxframe/tensor/misc/moveaxis.py +83 -0
  954. maxframe/tensor/misc/ndim.py +53 -0
  955. maxframe/tensor/misc/ravel.py +90 -0
  956. maxframe/tensor/misc/repeat.py +129 -0
  957. maxframe/tensor/misc/result_type.py +88 -0
  958. maxframe/tensor/misc/roll.py +124 -0
  959. maxframe/tensor/misc/rollaxis.py +77 -0
  960. maxframe/tensor/misc/searchsorted.py +147 -0
  961. maxframe/tensor/misc/setdiff1d.py +58 -0
  962. maxframe/tensor/misc/shape.py +89 -0
  963. maxframe/tensor/misc/split.py +190 -0
  964. maxframe/tensor/misc/squeeze.py +117 -0
  965. maxframe/tensor/misc/swapaxes.py +113 -0
  966. maxframe/tensor/misc/tests/__init__.py +13 -0
  967. maxframe/tensor/misc/tests/test_misc.py +112 -0
  968. maxframe/tensor/misc/tile.py +109 -0
  969. maxframe/tensor/misc/transpose.py +133 -0
  970. maxframe/tensor/misc/trapezoid.py +123 -0
  971. maxframe/tensor/misc/unique.py +227 -0
  972. maxframe/tensor/misc/vsplit.py +74 -0
  973. maxframe/tensor/misc/where.py +129 -0
  974. maxframe/tensor/operators.py +83 -0
  975. maxframe/tensor/random/__init__.py +166 -0
  976. maxframe/tensor/random/beta.py +87 -0
  977. maxframe/tensor/random/binomial.py +135 -0
  978. maxframe/tensor/random/bytes.py +37 -0
  979. maxframe/tensor/random/chisquare.py +108 -0
  980. maxframe/tensor/random/choice.py +187 -0
  981. maxframe/tensor/random/core.py +249 -0
  982. maxframe/tensor/random/dirichlet.py +121 -0
  983. maxframe/tensor/random/exponential.py +92 -0
  984. maxframe/tensor/random/f.py +133 -0
  985. maxframe/tensor/random/gamma.py +126 -0
  986. maxframe/tensor/random/geometric.py +91 -0
  987. maxframe/tensor/random/gumbel.py +165 -0
  988. maxframe/tensor/random/hypergeometric.py +146 -0
  989. maxframe/tensor/random/laplace.py +131 -0
  990. maxframe/tensor/random/logistic.py +127 -0
  991. maxframe/tensor/random/lognormal.py +157 -0
  992. maxframe/tensor/random/logseries.py +120 -0
  993. maxframe/tensor/random/multinomial.py +131 -0
  994. maxframe/tensor/random/multivariate_normal.py +190 -0
  995. maxframe/tensor/random/negative_binomial.py +123 -0
  996. maxframe/tensor/random/noncentral_chisquare.py +130 -0
  997. maxframe/tensor/random/noncentral_f.py +124 -0
  998. maxframe/tensor/random/normal.py +141 -0
  999. maxframe/tensor/random/pareto.py +138 -0
  1000. maxframe/tensor/random/permutation.py +107 -0
  1001. maxframe/tensor/random/poisson.py +109 -0
  1002. maxframe/tensor/random/power.py +140 -0
  1003. maxframe/tensor/random/rand.py +80 -0
  1004. maxframe/tensor/random/randint.py +119 -0
  1005. maxframe/tensor/random/randn.py +94 -0
  1006. maxframe/tensor/random/random_integers.py +121 -0
  1007. maxframe/tensor/random/random_sample.py +84 -0
  1008. maxframe/tensor/random/rayleigh.py +108 -0
  1009. maxframe/tensor/random/shuffle.py +61 -0
  1010. maxframe/tensor/random/standard_cauchy.py +103 -0
  1011. maxframe/tensor/random/standard_exponential.py +70 -0
  1012. maxframe/tensor/random/standard_gamma.py +118 -0
  1013. maxframe/tensor/random/standard_normal.py +72 -0
  1014. maxframe/tensor/random/standard_t.py +133 -0
  1015. maxframe/tensor/random/tests/__init__.py +13 -0
  1016. maxframe/tensor/random/tests/test_random.py +165 -0
  1017. maxframe/tensor/random/triangular.py +117 -0
  1018. maxframe/tensor/random/uniform.py +129 -0
  1019. maxframe/tensor/random/vonmises.py +129 -0
  1020. maxframe/tensor/random/wald.py +112 -0
  1021. maxframe/tensor/random/weibull.py +138 -0
  1022. maxframe/tensor/random/zipf.py +120 -0
  1023. maxframe/tensor/rechunk/__init__.py +26 -0
  1024. maxframe/tensor/rechunk/rechunk.py +43 -0
  1025. maxframe/tensor/reduction/__init__.py +64 -0
  1026. maxframe/tensor/reduction/all.py +101 -0
  1027. maxframe/tensor/reduction/allclose.py +86 -0
  1028. maxframe/tensor/reduction/any.py +103 -0
  1029. maxframe/tensor/reduction/argmax.py +101 -0
  1030. maxframe/tensor/reduction/argmin.py +101 -0
  1031. maxframe/tensor/reduction/array_equal.py +63 -0
  1032. maxframe/tensor/reduction/core.py +166 -0
  1033. maxframe/tensor/reduction/count_nonzero.py +80 -0
  1034. maxframe/tensor/reduction/cumprod.py +95 -0
  1035. maxframe/tensor/reduction/cumsum.py +99 -0
  1036. maxframe/tensor/reduction/max.py +118 -0
  1037. maxframe/tensor/reduction/mean.py +122 -0
  1038. maxframe/tensor/reduction/min.py +118 -0
  1039. maxframe/tensor/reduction/nanargmax.py +80 -0
  1040. maxframe/tensor/reduction/nanargmin.py +74 -0
  1041. maxframe/tensor/reduction/nancumprod.py +89 -0
  1042. maxframe/tensor/reduction/nancumsum.py +92 -0
  1043. maxframe/tensor/reduction/nanmax.py +109 -0
  1044. maxframe/tensor/reduction/nanmean.py +105 -0
  1045. maxframe/tensor/reduction/nanmin.py +109 -0
  1046. maxframe/tensor/reduction/nanprod.py +92 -0
  1047. maxframe/tensor/reduction/nanstd.py +124 -0
  1048. maxframe/tensor/reduction/nansum.py +113 -0
  1049. maxframe/tensor/reduction/nanvar.py +149 -0
  1050. maxframe/tensor/reduction/prod.py +128 -0
  1051. maxframe/tensor/reduction/std.py +132 -0
  1052. maxframe/tensor/reduction/sum.py +123 -0
  1053. maxframe/tensor/reduction/tests/__init__.py +13 -0
  1054. maxframe/tensor/reduction/tests/test_reduction.py +189 -0
  1055. maxframe/tensor/reduction/var.py +176 -0
  1056. maxframe/tensor/reshape/__init__.py +15 -0
  1057. maxframe/tensor/reshape/reshape.py +192 -0
  1058. maxframe/tensor/reshape/tests/__init__.py +13 -0
  1059. maxframe/tensor/reshape/tests/test_reshape.py +35 -0
  1060. maxframe/tensor/sort/__init__.py +18 -0
  1061. maxframe/tensor/sort/argpartition.py +98 -0
  1062. maxframe/tensor/sort/argsort.py +150 -0
  1063. maxframe/tensor/sort/partition.py +228 -0
  1064. maxframe/tensor/sort/sort.py +295 -0
  1065. maxframe/tensor/spatial/__init__.py +15 -0
  1066. maxframe/tensor/spatial/distance/__init__.py +17 -0
  1067. maxframe/tensor/spatial/distance/cdist.py +421 -0
  1068. maxframe/tensor/spatial/distance/pdist.py +398 -0
  1069. maxframe/tensor/spatial/distance/squareform.py +153 -0
  1070. maxframe/tensor/special/__init__.py +175 -0
  1071. maxframe/tensor/special/airy.py +55 -0
  1072. maxframe/tensor/special/bessel.py +199 -0
  1073. maxframe/tensor/special/core.py +99 -0
  1074. maxframe/tensor/special/ellip_func_integrals.py +155 -0
  1075. maxframe/tensor/special/ellip_harm.py +55 -0
  1076. maxframe/tensor/special/err_fresnel.py +223 -0
  1077. maxframe/tensor/special/gamma_funcs.py +303 -0
  1078. maxframe/tensor/special/hypergeometric_funcs.py +69 -0
  1079. maxframe/tensor/special/info_theory.py +189 -0
  1080. maxframe/tensor/special/misc.py +163 -0
  1081. maxframe/tensor/special/statistical.py +56 -0
  1082. maxframe/tensor/statistics/__init__.py +24 -0
  1083. maxframe/tensor/statistics/average.py +143 -0
  1084. maxframe/tensor/statistics/bincount.py +133 -0
  1085. maxframe/tensor/statistics/corrcoef.py +77 -0
  1086. maxframe/tensor/statistics/cov.py +222 -0
  1087. maxframe/tensor/statistics/digitize.py +126 -0
  1088. maxframe/tensor/statistics/histogram.py +520 -0
  1089. maxframe/tensor/statistics/median.py +85 -0
  1090. maxframe/tensor/statistics/percentile.py +175 -0
  1091. maxframe/tensor/statistics/ptp.py +89 -0
  1092. maxframe/tensor/statistics/quantile.py +290 -0
  1093. maxframe/tensor/ufunc/__init__.py +24 -0
  1094. maxframe/tensor/ufunc/ufunc.py +198 -0
  1095. maxframe/tensor/utils.py +719 -0
  1096. maxframe/tests/__init__.py +13 -0
  1097. maxframe/tests/test_protocol.py +178 -0
  1098. maxframe/tests/test_udf.py +61 -0
  1099. maxframe/tests/test_utils.py +627 -0
  1100. maxframe/tests/utils.py +245 -0
  1101. maxframe/typing_.py +42 -0
  1102. maxframe/udf.py +435 -0
  1103. maxframe/utils.py +1774 -0
  1104. maxframe-2.4.0rc1.dist-info/METADATA +109 -0
  1105. maxframe-2.4.0rc1.dist-info/RECORD +1122 -0
  1106. maxframe-2.4.0rc1.dist-info/WHEEL +5 -0
  1107. maxframe-2.4.0rc1.dist-info/top_level.txt +3 -0
  1108. maxframe_client/__init__.py +16 -0
  1109. maxframe_client/clients/__init__.py +13 -0
  1110. maxframe_client/clients/framedriver.py +137 -0
  1111. maxframe_client/conftest.py +15 -0
  1112. maxframe_client/fetcher.py +411 -0
  1113. maxframe_client/session/__init__.py +22 -0
  1114. maxframe_client/session/consts.py +39 -0
  1115. maxframe_client/session/graph.py +125 -0
  1116. maxframe_client/session/odps.py +813 -0
  1117. maxframe_client/session/task.py +329 -0
  1118. maxframe_client/session/tests/__init__.py +13 -0
  1119. maxframe_client/session/tests/test_task.py +115 -0
  1120. maxframe_client/tests/__init__.py +13 -0
  1121. maxframe_client/tests/test_fetcher.py +215 -0
  1122. maxframe_client/tests/test_session.py +409 -0
@@ -0,0 +1,1728 @@
1
+ # Copyright 1999-2025 Alibaba Group Holding Ltd.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ import dataclasses
16
+ import functools
17
+ import inspect
18
+ import itertools
19
+ import logging
20
+ import operator
21
+ import sys
22
+ from contextlib import contextmanager
23
+ from numbers import Integral
24
+ from typing import TYPE_CHECKING, Any, Callable, List, Optional
25
+
26
+ import numpy as np
27
+ import pandas as pd
28
+ from pandas.core.dtypes.inference import is_dict_like, is_list_like
29
+
30
+ from ..config.validators import dtype_backend_validator
31
+ from ..core import ENTITY_TYPE, Entity, ExecutableTuple, OutputType, get_output_types
32
+ from ..lib.dtypes_extension import ExternalBlobDtype, SolidBlob
33
+ from ..lib.mmh3 import hash as mmh_hash
34
+ from ..udf import MarkedFunction
35
+ from ..utils import (
36
+ ModulePlaceholder,
37
+ is_full_slice,
38
+ lazy_import,
39
+ make_dtypes,
40
+ quiet_stdio,
41
+ sbytes,
42
+ tokenize,
43
+ validate_and_adjust_resource_ratio,
44
+ )
45
+
46
+ if TYPE_CHECKING:
47
+ from .core import IndexValue
48
+
49
+ try:
50
+ import pyarrow as pa
51
+ except ImportError: # pragma: no cover
52
+ pa = ModulePlaceholder("pyarrow")
53
+
54
+ if TYPE_CHECKING:
55
+ from .operators import DataFrameOperator
56
+
57
+ cudf = lazy_import("cudf", rename="cudf")
58
+ logger = logging.getLogger(__name__)
59
+
60
+ try:
61
+ from ..lib.dtypes_extension import ArrowDtype
62
+ except ImportError:
63
+ ArrowDtype = None
64
+
65
+
66
+ def hash_index(index, size):
67
+ def func(x, size):
68
+ return mmh_hash(sbytes(x)) % size
69
+
70
+ f = functools.partial(func, size=size)
71
+ idx_to_grouped = index.groupby(index.map(f))
72
+ return [idx_to_grouped.get(i, list()) for i in range(size)]
73
+
74
+
75
+ def hash_dataframe_on(df, on, size, level=None):
76
+ if on is None:
77
+ idx = df.index
78
+ if level is not None:
79
+ idx = idx.to_frame(False)[level]
80
+ if cudf and isinstance(idx, cudf.Index): # pragma: no cover
81
+ idx = idx.to_pandas()
82
+ hashed_label = pd.util.hash_pandas_object(idx, categorize=False)
83
+ elif callable(on):
84
+ # todo optimization can be added, if ``on`` is a numpy ufunc or sth can be vectorized
85
+ hashed_label = pd.util.hash_pandas_object(df.index.map(on), categorize=False)
86
+ else:
87
+ if isinstance(on, list):
88
+ to_concat = []
89
+ for v in on:
90
+ if isinstance(v, pd.Series):
91
+ to_concat.append(v)
92
+ else:
93
+ to_concat.append(df[v])
94
+ data = pd.concat(to_concat, axis=1)
95
+ else:
96
+ data = df[on]
97
+ hashed_label = pd.util.hash_pandas_object(data, index=False, categorize=False)
98
+ idx_to_grouped = pd.RangeIndex(0, len(hashed_label)).groupby(hashed_label % size)
99
+ return [idx_to_grouped.get(i, pd.Index([])) for i in range(size)]
100
+
101
+
102
+ def hash_dtypes(dtypes, size):
103
+ hashed_indexes = hash_index(dtypes.index, size)
104
+ return [dtypes[index] for index in hashed_indexes]
105
+
106
+
107
+ def sort_dataframe_inplace(df, *axis, **kw):
108
+ for ax in axis:
109
+ df.sort_index(axis=ax, inplace=True, **kw)
110
+ return df
111
+
112
+
113
+ @functools.lru_cache(1)
114
+ def _get_range_index_type():
115
+ if cudf is not None:
116
+ return pd.RangeIndex, cudf.RangeIndex
117
+ else:
118
+ return pd.RangeIndex
119
+
120
+
121
+ @functools.lru_cache(1)
122
+ def _get_multi_index_type():
123
+ if cudf is not None:
124
+ return pd.MultiIndex, cudf.MultiIndex
125
+ else:
126
+ return pd.MultiIndex
127
+
128
+
129
+ def _get_range_index_start(pd_range_index):
130
+ try:
131
+ return pd_range_index.start
132
+ except AttributeError: # pragma: no cover
133
+ return pd_range_index._start
134
+
135
+
136
+ def _get_range_index_stop(pd_range_index):
137
+ try:
138
+ return pd_range_index.stop
139
+ except AttributeError: # pragma: no cover
140
+ return pd_range_index._stop
141
+
142
+
143
+ def _get_range_index_step(pd_range_index):
144
+ try:
145
+ return pd_range_index.step
146
+ except AttributeError: # pragma: no cover
147
+ pass
148
+ try: # pragma: no cover
149
+ return pd_range_index._step
150
+ except AttributeError: # pragma: no cover
151
+ return 1 # cudf does not support step arg
152
+
153
+
154
+ def is_pd_range_empty(pd_range_index):
155
+ start, stop, step = (
156
+ _get_range_index_start(pd_range_index),
157
+ _get_range_index_stop(pd_range_index),
158
+ _get_range_index_step(pd_range_index),
159
+ )
160
+ return (start >= stop and step >= 0) or (start <= stop and step < 0)
161
+
162
+
163
+ def parse_index(index_value, *args, store_data=False, key=None):
164
+ from .core import IndexValue
165
+
166
+ def _extract_property(index, tp, ret_data):
167
+ kw = {
168
+ "_min_val": _get_index_min(index),
169
+ "_max_val": _get_index_max(index),
170
+ "_min_val_close": True,
171
+ "_max_val_close": True,
172
+ "_key": key or _tokenize_index(index, *args),
173
+ }
174
+ if ret_data:
175
+ kw["_data"] = index.values
176
+ for field in tp._FIELDS:
177
+ if field in kw or field == "_data":
178
+ continue
179
+ val = getattr(index, field.lstrip("_"), None)
180
+ if val is not None:
181
+ kw[field] = val
182
+ return kw
183
+
184
+ def _tokenize_index(index, *token_objects):
185
+ if not index.empty:
186
+ return tokenize(index)
187
+ else:
188
+ return tokenize(index, *token_objects)
189
+
190
+ def _get_index_min(index):
191
+ try:
192
+ return index.min()
193
+ except (ValueError, AttributeError):
194
+ if isinstance(index, pd.IntervalIndex):
195
+ return None
196
+ raise
197
+ except TypeError:
198
+ return None
199
+
200
+ def _get_index_max(index):
201
+ try:
202
+ return index.max()
203
+ except (ValueError, AttributeError):
204
+ if isinstance(index, pd.IntervalIndex):
205
+ return None
206
+ raise
207
+ except TypeError:
208
+ return None
209
+
210
+ def _serialize_index(index):
211
+ tp = getattr(IndexValue, type(index).__name__)
212
+ properties = _extract_property(index, tp, store_data)
213
+ properties["_name"] = index.name
214
+ return tp(**properties)
215
+
216
+ def _serialize_range_index(index):
217
+ if is_pd_range_empty(index):
218
+ properties = {
219
+ "_is_monotonic_increasing": True,
220
+ "_is_monotonic_decreasing": False,
221
+ "_is_unique": True,
222
+ "_min_val": _get_index_min(index),
223
+ "_max_val": _get_index_max(index),
224
+ "_min_val_close": True,
225
+ "_max_val_close": False,
226
+ "_key": key or _tokenize_index(index, *args),
227
+ "_name": index.name,
228
+ "_dtype": index.dtype,
229
+ }
230
+ else:
231
+ properties = _extract_property(index, IndexValue.RangeIndex, False)
232
+ return IndexValue.RangeIndex(
233
+ _slice=slice(
234
+ _get_range_index_start(index),
235
+ _get_range_index_stop(index),
236
+ _get_range_index_step(index),
237
+ ),
238
+ **properties,
239
+ )
240
+
241
+ def _serialize_multi_index(index):
242
+ kw = _extract_property(index, IndexValue.MultiIndex, store_data)
243
+ kw["_sortorder"] = index.sortorder
244
+ kw["_dtypes"] = [lev.dtype for lev in index.levels]
245
+ return IndexValue.MultiIndex(**kw)
246
+
247
+ if index_value is None:
248
+ return IndexValue(
249
+ _index_value=IndexValue.Index(
250
+ _is_monotonic_increasing=False,
251
+ _is_monotonic_decreasing=False,
252
+ _is_unique=False,
253
+ _min_val=None,
254
+ _max_val=None,
255
+ _min_val_close=True,
256
+ _max_val_close=True,
257
+ _key=key or tokenize(*args),
258
+ )
259
+ )
260
+ if hasattr(index_value, "to_pandas"): # pragma: no cover
261
+ # convert cudf.Index to pandas
262
+ index_value = index_value.to_pandas()
263
+
264
+ if isinstance(index_value, _get_range_index_type()):
265
+ return IndexValue(_index_value=_serialize_range_index(index_value))
266
+ elif isinstance(index_value, _get_multi_index_type()):
267
+ return IndexValue(_index_value=_serialize_multi_index(index_value))
268
+ else:
269
+ return IndexValue(_index_value=_serialize_index(index_value))
270
+
271
+
272
+ def gen_unknown_index_value(index_value, *args, normalize_range_index=False):
273
+ """
274
+ Generate new index value with the same likes of given index_value and args, but without any value.
275
+
276
+ Parameters
277
+ ----------
278
+ index_value
279
+ Given index value.
280
+ args
281
+ Arguments for parse_index.
282
+ normalize_range_index
283
+ If normalize range index to normal index.
284
+
285
+ Returns
286
+ -------
287
+ New created range index value.
288
+ """
289
+ pd_index = index_value.to_pandas()
290
+ if not normalize_range_index and isinstance(pd_index, pd.RangeIndex):
291
+ return parse_index(pd.RangeIndex(-1, name=pd_index.name), *args)
292
+ elif not isinstance(pd_index, pd.MultiIndex):
293
+ return parse_index(
294
+ pd.Index([], dtype=pd_index.dtype, name=pd_index.name), *args
295
+ )
296
+ else:
297
+ i = pd.MultiIndex.from_arrays(
298
+ [c[:0] for c in pd_index.levels], names=pd_index.names
299
+ )
300
+ return parse_index(i, *args)
301
+
302
+
303
+ def split_monotonic_index_min_max(
304
+ left_min_max, left_increase, right_min_max, right_increase
305
+ ):
306
+ """
307
+ Split the original two min_max into new min_max. Each min_max should be a list
308
+ in which each item should be a 4-tuple indicates that this chunk's min value,
309
+ whether the min value is close, the max value, and whether the max value is close.
310
+ The return value would be a nested list, each item is a list
311
+ indicates that how this chunk should be split into.
312
+
313
+ :param left_min_max: the left min_max
314
+ :param left_increase: if the original data of left is increased
315
+ :param right_min_max: the right min_max
316
+ :param right_increase: if the original data of right is increased
317
+ :return: nested list in which each item indicates how min_max is split
318
+
319
+ >>> left_min_max = [(0, True, 3, True), (4, True, 8, True), (12, True, 18, True),
320
+ ... (20, True, 22, True)]
321
+ >>> right_min_max = [(2, True, 6, True), (7, True, 9, True), (10, True, 14, True),
322
+ ... (18, True, 19, True)]
323
+ >>> l, r = split_monotonic_index_min_max(left_min_max, True, right_min_max, True)
324
+ >>> l
325
+ [[(0, True, 2, False), (2, True, 3, True)], [(3, False, 4, False), (4, True, 6, True), (6, False, 7, False),
326
+ (7, True, 8, True)], [(8, False, 9, True), (10, True, 12, False), (12, True, 14, True), (14, False, 18, False),
327
+ (18, True, 18, True)], [(18, False, 19, True), [20, True, 22, True]]]
328
+ >>> r
329
+ [[(0, True, 2, False), (2, True, 3, True), (3, False, 4, False), (4, True, 6, True)],
330
+ [(6, False, 7, False), (7, True, 8, True), (8, False, 9, True)], [(10, True, 12, False), (12, True, 14, True)],
331
+ [(14, False, 18, False), (18, True, 18, True), (18, False, 19, True), [20, True, 22, True]]]
332
+ """
333
+ left_idx_to_min_max = [[] for _ in left_min_max]
334
+ right_idx_to_min_max = [[] for _ in right_min_max]
335
+ left_curr_min_max = list(left_min_max[0])
336
+ right_curr_min_max = list(right_min_max[0])
337
+ left_curr_idx = right_curr_idx = 0
338
+ left_terminate = right_terminate = False
339
+
340
+ while not left_terminate or not right_terminate:
341
+ if left_terminate:
342
+ left_idx_to_min_max[left_curr_idx].append(tuple(right_curr_min_max))
343
+ right_idx_to_min_max[right_curr_idx].append(tuple(right_curr_min_max))
344
+ if right_curr_idx + 1 >= len(right_min_max):
345
+ right_terminate = True
346
+ else:
347
+ right_curr_idx += 1
348
+ right_curr_min_max = list(right_min_max[right_curr_idx])
349
+ elif right_terminate:
350
+ right_idx_to_min_max[right_curr_idx].append(tuple(left_curr_min_max))
351
+ left_idx_to_min_max[left_curr_idx].append(tuple(left_curr_min_max))
352
+ if left_curr_idx + 1 >= len(left_min_max):
353
+ left_terminate = True
354
+ else:
355
+ left_curr_idx += 1
356
+ left_curr_min_max = list(left_min_max[left_curr_idx])
357
+ elif left_curr_min_max[0] < right_curr_min_max[0]:
358
+ # left min < right min
359
+ right_min = [right_curr_min_max[0], not right_curr_min_max[1]]
360
+ max_val = min(left_curr_min_max[2:], right_min)
361
+ assert len(max_val) == 2
362
+ min_max = (
363
+ left_curr_min_max[0],
364
+ left_curr_min_max[1],
365
+ max_val[0],
366
+ max_val[1],
367
+ )
368
+ left_idx_to_min_max[left_curr_idx].append(min_max)
369
+ right_idx_to_min_max[right_curr_idx].append(min_max)
370
+ if left_curr_min_max[2:] == max_val:
371
+ # left max < right min
372
+ if left_curr_idx + 1 >= len(left_min_max):
373
+ left_terminate = True
374
+ else:
375
+ left_curr_idx += 1
376
+ left_curr_min_max = list(left_min_max[left_curr_idx])
377
+ else:
378
+ # from left min(left min close) to right min(exclude right min close)
379
+ left_curr_min_max[:2] = right_curr_min_max[:2]
380
+ elif left_curr_min_max[0] > right_curr_min_max[0]:
381
+ # left min > right min
382
+ left_min = [left_curr_min_max[0], not left_curr_min_max[1]]
383
+ max_val = min(right_curr_min_max[2:], left_min)
384
+ min_max = (
385
+ right_curr_min_max[0],
386
+ right_curr_min_max[1],
387
+ max_val[0],
388
+ max_val[1],
389
+ )
390
+ left_idx_to_min_max[left_curr_idx].append(min_max)
391
+ right_idx_to_min_max[right_curr_idx].append(min_max)
392
+ if right_curr_min_max[2:] == max_val:
393
+ # right max < left min
394
+ if right_curr_idx + 1 >= len(right_min_max):
395
+ right_terminate = True
396
+ else:
397
+ right_curr_idx += 1
398
+ right_curr_min_max = list(right_min_max[right_curr_idx])
399
+ else:
400
+ # from left min(left min close) to right min(exclude right min close)
401
+ right_curr_min_max[:2] = left_curr_min_max[:2]
402
+ else:
403
+ # left min == right min
404
+ max_val = min(left_curr_min_max[2:], right_curr_min_max[2:])
405
+ assert len(max_val) == 2
406
+ min_max = (
407
+ left_curr_min_max[0],
408
+ left_curr_min_max[1],
409
+ max_val[0],
410
+ max_val[1],
411
+ )
412
+ left_idx_to_min_max[left_curr_idx].append(min_max)
413
+ right_idx_to_min_max[right_curr_idx].append(min_max)
414
+ if max_val == left_curr_min_max[2:]:
415
+ if left_curr_idx + 1 >= len(left_min_max):
416
+ left_terminate = True
417
+ else:
418
+ left_curr_idx += 1
419
+ left_curr_min_max = list(left_min_max[left_curr_idx])
420
+ else:
421
+ left_curr_min_max[:2] = max_val[0], not max_val[1]
422
+ if max_val == right_curr_min_max[2:]:
423
+ if right_curr_idx + 1 >= len(right_min_max):
424
+ right_terminate = True
425
+ else:
426
+ right_curr_idx += 1
427
+ right_curr_min_max = list(right_min_max[right_curr_idx])
428
+ else:
429
+ right_curr_min_max[:2] = max_val[0], not max_val[1]
430
+
431
+ if left_increase is False:
432
+ left_idx_to_min_max = list(reversed(left_idx_to_min_max))
433
+ if right_increase is False:
434
+ right_idx_to_min_max = list(reversed(right_idx_to_min_max))
435
+
436
+ return left_idx_to_min_max, right_idx_to_min_max
437
+
438
+
439
+ def build_split_idx_to_origin_idx(splits, increase=True):
440
+ # splits' len is equal to the original chunk size on a specified axis,
441
+ # splits is sth like [[(0, True, 2, True), (2, False, 3, True)]]
442
+ # which means there is one input chunk, and will be split into 2 out chunks
443
+ # in this function, we want to build a new dict from the out chunk index to
444
+ # the original chunk index and the inner position, like {0: (0, 0), 1: (0, 1)}
445
+ if increase is False:
446
+ splits = list(reversed(splits))
447
+ out_idx = itertools.count(0)
448
+ res = dict()
449
+ for origin_idx, _ in enumerate(splits):
450
+ for pos in range(len(splits[origin_idx])):
451
+ if increase is False:
452
+ o_idx = len(splits) - origin_idx - 1
453
+ else:
454
+ o_idx = origin_idx
455
+ res[next(out_idx)] = o_idx, pos
456
+ return res
457
+
458
+
459
+ def _generate_value(dtype, fill_value):
460
+ if ArrowDtype and isinstance(dtype, ArrowDtype):
461
+ return _generate_value(dtype.pyarrow_dtype, fill_value)
462
+
463
+ if isinstance(dtype, pa.ListType):
464
+ return [_generate_value(dtype.value_type, fill_value)]
465
+ elif isinstance(dtype, pa.MapType):
466
+ return [
467
+ (
468
+ _generate_value(dtype.key_type, fill_value),
469
+ _generate_value(dtype.item_type, fill_value),
470
+ )
471
+ ]
472
+ elif isinstance(dtype, pa.StructType):
473
+ result = {}
474
+ for i in range(dtype.num_fields):
475
+ field = dtype[i]
476
+ result[field.name] = _generate_value(field.type, fill_value)
477
+ return result
478
+ elif isinstance(dtype, pa.DataType):
479
+ return pa.array([_generate_value(dtype.to_pandas_dtype(), fill_value)]).cast(
480
+ dtype
481
+ )[0]
482
+ elif isinstance(dtype, ExternalBlobDtype):
483
+ return SolidBlob(str(fill_value).encode())
484
+
485
+ # special handle for datetime64 and timedelta64
486
+ dispatch = {
487
+ np.datetime64: pd.Timestamp,
488
+ np.timedelta64: pd.Timedelta,
489
+ pd.CategoricalDtype.type: lambda x: pd.CategoricalDtype([x]),
490
+ # for object, we do not know the actual dtype,
491
+ # just convert to str for common usage
492
+ np.object_: lambda x: str(fill_value),
493
+ }
494
+ # otherwise, just use dtype.type itself to convert
495
+ target_dtype = getattr(dtype, "type", dtype)
496
+ convert = dispatch.get(target_dtype, target_dtype)
497
+ return convert(fill_value)
498
+
499
+
500
+ def build_empty_df(dtypes, index=None):
501
+ columns = dtypes.index
502
+ length = len(index) if index is not None else 0
503
+ record = [[_generate_value(dtype, 1) for dtype in dtypes]] * max(1, length)
504
+
505
+ # duplicate column may exist,
506
+ # so use RangeIndex first
507
+ df = pd.DataFrame(record, columns=range(len(dtypes)), index=index)
508
+ df = df.astype({i: dt for i, dt in enumerate(dtypes)})
509
+ df.columns = columns
510
+ return df[:length] if len(df) > length else df
511
+
512
+
513
+ def build_df(df_obj, fill_value=1, size=1, ensure_string=False):
514
+ from .core import INDEX_TYPE, SERIES_TYPE
515
+
516
+ dfs = []
517
+ if not isinstance(size, (list, tuple)):
518
+ sizes = [size]
519
+ else:
520
+ sizes = size
521
+
522
+ if not isinstance(fill_value, (list, tuple)):
523
+ fill_values = [fill_value]
524
+ else:
525
+ fill_values = fill_value
526
+
527
+ dtypes = (
528
+ pd.Series([df_obj.dtype], index=[df_obj.name])
529
+ if isinstance(df_obj, (INDEX_TYPE, SERIES_TYPE))
530
+ else df_obj.dtypes
531
+ )
532
+ for size, fill_value in zip(sizes, fill_values):
533
+ record = [[_generate_value(dtype, fill_value) for dtype in dtypes]] * size
534
+ df = (
535
+ pd.DataFrame(record)
536
+ .astype(dtypes.reset_index(drop=True))
537
+ .set_axis(dtypes.index, axis=1)
538
+ )
539
+ if len(record) != 0: # `columns` is empty in some cases
540
+ target_index = (
541
+ df_obj.index_value.to_pandas()
542
+ if hasattr(df_obj, "index_value")
543
+ else df_obj.index
544
+ )
545
+ if isinstance(target_index, pd.MultiIndex):
546
+ index_val = tuple(
547
+ _generate_value(level.dtype, fill_value)
548
+ for level in target_index.levels
549
+ )
550
+ df.index = pd.MultiIndex.from_tuples(
551
+ [index_val] * size, names=target_index.names
552
+ )
553
+ else:
554
+ index_val = _generate_value(target_index.dtype, fill_value)
555
+ df.index = pd.Index([index_val] * size, name=target_index.name).astype(
556
+ target_index.dtype
557
+ )
558
+
559
+ # make sure dtypes correct
560
+ for i, dtype in enumerate(dtypes):
561
+ s = df.iloc[:, i]
562
+ if not pd.api.types.is_dtype_equal(s.dtype, dtype):
563
+ df[df.columns[i]] = s.astype(dtype)
564
+ dfs.append(df)
565
+ if len(dfs) == 1:
566
+ ret_df = dfs[0]
567
+ else:
568
+ ret_df = pd.concat(dfs)
569
+
570
+ if ensure_string:
571
+ obj_dtypes = dtypes[dtypes == np.dtype("O")]
572
+ ret_df[obj_dtypes.index] = ret_df[obj_dtypes.index].radd("O")
573
+ return ret_df
574
+
575
+
576
+ def build_empty_series(dtype, index=None, name=None):
577
+ length = len(index) if index is not None else 0
578
+ return pd.Series(
579
+ [_generate_value(dtype, 1) for _ in range(length)],
580
+ dtype=dtype,
581
+ index=index,
582
+ name=name,
583
+ )
584
+
585
+
586
+ def build_series(
587
+ series_obj=None,
588
+ fill_value=1,
589
+ size=1,
590
+ name=None,
591
+ ensure_string=False,
592
+ dtype=None,
593
+ index=None,
594
+ ):
595
+ seriess = []
596
+ if not isinstance(size, (list, tuple)):
597
+ sizes = [size]
598
+ else:
599
+ sizes = size
600
+
601
+ if not isinstance(fill_value, (list, tuple)):
602
+ fill_values = [fill_value]
603
+ else:
604
+ fill_values = fill_value
605
+
606
+ if series_obj is not None:
607
+ dtype = series_obj.dtype
608
+ try:
609
+ series_index = series_obj.index_value.to_pandas()[:0]
610
+ except AttributeError:
611
+ series_index = series_obj.index[:0]
612
+ else:
613
+ series_index = index[:0] if index is not None else None
614
+
615
+ name = name or getattr(series_obj, "name", None)
616
+ for size, fill_value in zip(sizes, fill_values):
617
+ empty_series = build_empty_series(dtype, name=name, index=series_index)
618
+ record = _generate_value(dtype, fill_value)
619
+ if isinstance(empty_series.index, pd.MultiIndex):
620
+ index = tuple(
621
+ _generate_value(level.dtype, fill_value)
622
+ for level in empty_series.index.levels
623
+ )
624
+ empty_series = empty_series.reindex(
625
+ index=pd.MultiIndex.from_tuples([index], names=empty_series.index.names)
626
+ )
627
+ empty_series.iloc[0] = record
628
+ else:
629
+ if isinstance(empty_series.index.dtype, pd.CategoricalDtype):
630
+ index = None
631
+ else:
632
+ index = _generate_value(empty_series.index.dtype, fill_value)
633
+ empty_series.loc[index] = record
634
+
635
+ empty_series = pd.concat([empty_series] * size)
636
+ # make sure dtype correct for MultiIndex
637
+ empty_series = empty_series.astype(dtype, copy=False)
638
+ seriess.append(empty_series)
639
+
640
+ if len(seriess) == 1:
641
+ ret_series = seriess[0]
642
+ else:
643
+ ret_series = pd.concat(seriess)
644
+
645
+ if ensure_string and dtype == np.dtype("O"):
646
+ ret_series = ret_series.radd("O")
647
+ return ret_series
648
+
649
+
650
+ def infer_index_value(left_index_value, right_index_value, level=None):
651
+ from .core import IndexValue
652
+
653
+ if isinstance(left_index_value.value, IndexValue.RangeIndex) and isinstance(
654
+ right_index_value.value, IndexValue.RangeIndex
655
+ ):
656
+ if left_index_value.value.slice == right_index_value.value.slice:
657
+ return left_index_value
658
+ return parse_index(
659
+ pd.Index([], dtype=np.int64), left_index_value, right_index_value
660
+ )
661
+
662
+ # when left index and right index is identical, and both of them are elements unique,
663
+ # we can infer that the out index should be identical also
664
+ if (
665
+ left_index_value.is_unique
666
+ and right_index_value.is_unique
667
+ and left_index_value.key == right_index_value.key
668
+ ):
669
+ return left_index_value
670
+
671
+ left_index = left_index_value.to_pandas()
672
+ right_index = right_index_value.to_pandas()
673
+ out_index = left_index.join(right_index, level=level)[:0]
674
+ return parse_index(out_index, left_index_value, right_index_value)
675
+
676
+
677
+ def indexing_index_value(index_value, indexes, store_data=False, rechunk=False):
678
+ pd_index = index_value.to_pandas()
679
+ # when rechunk is True, the output index shall be treated
680
+ # different from the input one
681
+ if not rechunk and isinstance(indexes, slice) and is_full_slice(indexes):
682
+ return index_value
683
+ elif not index_value.has_value():
684
+ new_index_value = parse_index(pd_index, indexes, store_data=store_data)
685
+ new_index_value._index_value._min_val = index_value.min_val
686
+ new_index_value._index_value._min_val_close = index_value.min_val_close
687
+ new_index_value._index_value._max_val = index_value.max_val
688
+ new_index_value._index_value._max_val_close = index_value.max_val_close
689
+ return new_index_value
690
+ else:
691
+ if isinstance(indexes, Integral):
692
+ return parse_index(pd_index[[indexes]], store_data=store_data)
693
+ elif isinstance(indexes, Entity):
694
+ if isinstance(pd_index, pd.RangeIndex):
695
+ return parse_index(
696
+ pd.RangeIndex(-1), indexes, index_value, store_data=False
697
+ )
698
+ else:
699
+ return parse_index(
700
+ type(pd_index)([]), indexes, index_value, store_data=False
701
+ )
702
+ if isinstance(indexes, tuple):
703
+ return parse_index(pd_index[list(indexes)], store_data=store_data)
704
+ else:
705
+ return parse_index(pd_index[indexes], store_data=store_data)
706
+
707
+
708
+ def merge_index_value(to_merge_index_values: dict, store_data: bool = False):
709
+ """
710
+ Merge index value according to their chunk index.
711
+
712
+ Parameters
713
+ ----------
714
+ to_merge_index_values : dict
715
+ index to index_value
716
+ store_data : bool
717
+ store data in index_value
718
+
719
+ Returns
720
+ -------
721
+ merged_index_value
722
+ """
723
+
724
+ pd_index = None
725
+ min_val, min_val_close, max_val, max_val_close = None, None, None, None
726
+ for _, chunk_index_value in sorted(to_merge_index_values.items()):
727
+ if pd_index is None:
728
+ pd_index = chunk_index_value.to_pandas()
729
+ min_val, min_val_close, max_val, max_val_close = (
730
+ chunk_index_value.min_val,
731
+ chunk_index_value.min_val_close,
732
+ chunk_index_value.max_val,
733
+ chunk_index_value.max_val_close,
734
+ )
735
+ else:
736
+ cur_pd_index = chunk_index_value.to_pandas()
737
+ if store_data or (
738
+ isinstance(pd_index, pd.RangeIndex)
739
+ and isinstance(cur_pd_index, pd.RangeIndex)
740
+ and cur_pd_index.step == pd_index.step
741
+ and cur_pd_index.start == pd_index.stop
742
+ ):
743
+ # range index that is continuous
744
+ pd_index = pd_index.append(cur_pd_index)
745
+ else:
746
+ pd_index = pd.Index([], dtype=pd_index.dtype)
747
+ if chunk_index_value.min_val is not None:
748
+ try:
749
+ if min_val is None or min_val > chunk_index_value.min_val:
750
+ min_val = chunk_index_value.min_val
751
+ min_val_close = chunk_index_value.min_val_close
752
+ except TypeError:
753
+ # min_value has different types that cannot compare
754
+ # just stop compare
755
+ continue
756
+ if chunk_index_value.max_val is not None:
757
+ if max_val is None or max_val < chunk_index_value.max_val:
758
+ max_val = chunk_index_value.max_val
759
+ max_val_close = chunk_index_value.max_val_close
760
+
761
+ index_value = parse_index(pd_index, store_data=store_data)
762
+ if not index_value.has_value():
763
+ index_value._index_value._min_val = min_val
764
+ index_value._index_value._min_val_close = min_val_close
765
+ index_value._index_value._max_val = max_val
766
+ index_value._index_value._max_val_close = max_val_close
767
+ return index_value
768
+
769
+
770
+ def is_decimal128_dtype(dtype):
771
+ return isinstance(dtype, ArrowDtype) and isinstance(
772
+ dtype.pyarrow_dtype, pa.Decimal128Type
773
+ )
774
+
775
+
776
+ def is_decimal256_dtype(dtype):
777
+ return isinstance(dtype, ArrowDtype) and isinstance(
778
+ dtype.pyarrow_dtype, pa.Decimal256Type
779
+ )
780
+
781
+
782
+ def decimal_128_to_256_dtype(dtype):
783
+ if not is_decimal128_dtype(dtype):
784
+ return dtype
785
+ return ArrowDtype(
786
+ pa.decimal256(dtype.pyarrow_dtype.precision, dtype.pyarrow_dtype.scale)
787
+ )
788
+
789
+
790
+ def safe_decimal_256_to_128_dtype(dtype):
791
+ if not is_decimal256_dtype(dtype) or dtype.pyarrow_dtype.precision > 38:
792
+ return dtype
793
+ return ArrowDtype(
794
+ pa.decimal128(dtype.pyarrow_dtype.precision, dtype.pyarrow_dtype.scale)
795
+ )
796
+
797
+
798
+ def _infer_dtypes(left_dtypes, right_dtypes, operator):
799
+ left = build_empty_df(left_dtypes)
800
+ right = build_empty_df(right_dtypes)
801
+ return operator(left, right).dtypes
802
+
803
+
804
+ def infer_dtypes(left_dtypes, right_dtypes, operator):
805
+ try:
806
+ return _infer_dtypes(left_dtypes, right_dtypes, operator)
807
+ except pa.ArrowInvalid as exc:
808
+ if "Decimal precision" not in str(exc):
809
+ raise
810
+ # automatic upgrade to decimal256 type and downgrade
811
+ # to decimal128 type where possible
812
+ left_dtypes = left_dtypes.map(decimal_128_to_256_dtype)
813
+ right_dtypes = right_dtypes.map(decimal_128_to_256_dtype)
814
+ return _infer_dtypes(left_dtypes, right_dtypes, operator).map(
815
+ safe_decimal_256_to_128_dtype
816
+ )
817
+
818
+
819
+ def _infer_dtype(left_dtype, right_dtype, operator):
820
+ left = build_empty_series(left_dtype)
821
+ right = build_empty_series(right_dtype)
822
+ return operator(left, right).dtype
823
+
824
+
825
+ @functools.lru_cache(100)
826
+ def infer_dtype(left_dtype, right_dtype, operator):
827
+ try:
828
+ return _infer_dtype(left_dtype, right_dtype, operator)
829
+ except pa.ArrowInvalid as exc:
830
+ if "Decimal precision" not in str(exc):
831
+ raise
832
+ # automatic upgrade to decimal256 type
833
+ return _infer_dtype(
834
+ decimal_128_to_256_dtype(left_dtype),
835
+ decimal_128_to_256_dtype(right_dtype),
836
+ operator,
837
+ )
838
+
839
+
840
+ def filter_dtypes(dtypes, column_min_max):
841
+ left_filter = operator.ge if column_min_max[1] else operator.gt
842
+ left = left_filter(dtypes.index, column_min_max[0])
843
+ right_filter = operator.le if column_min_max[3] else operator.lt
844
+ right = right_filter(dtypes.index, column_min_max[2])
845
+ return dtypes[left & right]
846
+
847
+
848
+ def in_range_index(i, pd_range_index):
849
+ """
850
+ Check whether the input `i` is within `pd_range_index` which is a pd.RangeIndex.
851
+ """
852
+ start, stop, step = (
853
+ _get_range_index_start(pd_range_index),
854
+ _get_range_index_stop(pd_range_index),
855
+ _get_range_index_step(pd_range_index),
856
+ )
857
+ if step > 0 and start <= i < stop and (i - start) % step == 0:
858
+ return True
859
+ if step < 0 and start >= i > stop and (start - i) % step == 0:
860
+ return True
861
+ return False
862
+
863
+
864
+ def wrap_notimplemented_exception(func):
865
+ @functools.wraps(func)
866
+ def wrapper(*args, **kwargs):
867
+ try:
868
+ return func(*args, **kwargs)
869
+ except NotImplementedError:
870
+ return NotImplemented
871
+
872
+ return wrapper
873
+
874
+
875
+ def validate_axis(axis, tileable=None):
876
+ if axis == "index":
877
+ axis = 0
878
+ elif axis == "columns":
879
+ axis = 1
880
+
881
+ illegal = False
882
+ try:
883
+ axis = operator.index(axis)
884
+ if axis < 0 or (tileable is not None and axis >= tileable.ndim):
885
+ illegal = True
886
+ except TypeError:
887
+ illegal = True
888
+
889
+ if illegal:
890
+ raise ValueError(f"No axis named {axis} for object type {type(tileable)}")
891
+ return axis
892
+
893
+
894
+ def validate_axis_style_args(
895
+ data, args, kwargs, arg_name, method_name
896
+ ): # pragma: no cover
897
+ """Argument handler for mixed index, columns / axis functions
898
+
899
+ In an attempt to handle both `.method(index, columns)`, and
900
+ `.method(arg, axis=.)`, we have to do some bad things to argument
901
+ parsing. This translates all arguments to `{index=., columns=.}` style.
902
+
903
+ Parameters
904
+ ----------
905
+ data : DataFrame
906
+ args : tuple
907
+ All positional arguments from the user
908
+ kwargs : dict
909
+ All keyword arguments from the user
910
+ arg_name, method_name : str
911
+ Used for better error messages
912
+
913
+ Returns
914
+ -------
915
+ kwargs : dict
916
+ A dictionary of keyword arguments. Doesn't modify ``kwargs``
917
+ inplace, so update them with the return value here.
918
+ """
919
+ out = {}
920
+ # Goal: fill 'out' with index/columns-style arguments
921
+ # like out = {'index': foo, 'columns': bar}
922
+
923
+ # Start by validating for consistency
924
+ axes_names = ["index"] if data.ndim == 1 else ["index", "columns"]
925
+ if "axis" in kwargs and any(x in kwargs for x in axes_names):
926
+ msg = "Cannot specify both 'axis' and any of 'index' or 'columns'."
927
+ raise TypeError(msg)
928
+
929
+ # First fill with explicit values provided by the user...
930
+ if arg_name in kwargs:
931
+ if args:
932
+ msg = f"{method_name} got multiple values for argument '{arg_name}'"
933
+ raise TypeError(msg)
934
+
935
+ axis = axes_names[validate_axis(kwargs.get("axis", 0), data)]
936
+ out[axis] = kwargs[arg_name]
937
+
938
+ # More user-provided arguments, now from kwargs
939
+ for k, v in kwargs.items():
940
+ try:
941
+ ax = axes_names[validate_axis(k, data)]
942
+ except ValueError:
943
+ pass
944
+ else:
945
+ out[ax] = v
946
+
947
+ # All user-provided kwargs have been handled now.
948
+ # Now we supplement with positional arguments, emitting warnings
949
+ # when there's ambiguity and raising when there's conflicts
950
+
951
+ if len(args) == 0:
952
+ pass # It's up to the function to decide if this is valid
953
+ elif len(args) == 1:
954
+ axis = axes_names[validate_axis(kwargs.get("axis", 0), data)]
955
+ out[axis] = args[0]
956
+ elif len(args) == 2:
957
+ if "axis" in kwargs:
958
+ # Unambiguously wrong
959
+ msg = "Cannot specify both 'axis' and any of 'index' or 'columns'"
960
+ raise TypeError(msg)
961
+
962
+ msg = (
963
+ "Interpreting call\n\t'.{method_name}(a, b)' as "
964
+ "\n\t'.{method_name}(index=a, columns=b)'.\nUse named "
965
+ "arguments to remove any ambiguity."
966
+ )
967
+ raise TypeError(msg.format(method_name=method_name))
968
+ else:
969
+ msg = f"Cannot specify all of '{arg_name}', 'index', 'columns'."
970
+ raise TypeError(msg)
971
+ return out
972
+
973
+
974
+ def validate_output_types(**kwargs):
975
+ from ..core import OutputType
976
+
977
+ output_type = kwargs.pop("object_type", None) or kwargs.pop("output_type", None)
978
+ output_types = kwargs.pop("output_types", None) or (
979
+ [output_type] if output_type is not None else None
980
+ )
981
+ return (
982
+ [
983
+ getattr(OutputType, v.lower()) if isinstance(v, str) else v
984
+ for v in output_types
985
+ ]
986
+ if output_types
987
+ else None
988
+ )
989
+
990
+
991
+ def fetch_corner_data(df_or_series, session=None) -> pd.DataFrame:
992
+ """
993
+ Fetch corner DataFrame or Series for repr usage.
994
+
995
+ :param df_or_series: DataFrame or Series
996
+ :return: corner DataFrame
997
+ """
998
+ from .indexing.iloc import iloc
999
+
1000
+ max_rows = pd.get_option("display.max_rows")
1001
+ try:
1002
+ min_rows = pd.get_option("display.min_rows")
1003
+ min_rows = min(min_rows, max_rows)
1004
+ except KeyError: # pragma: no cover
1005
+ # display.min_rows is introduced in pandas 0.25
1006
+ min_rows = max_rows
1007
+
1008
+ index_size = None
1009
+ if (
1010
+ df_or_series.shape[0] > max_rows
1011
+ and df_or_series.shape[0] > min_rows // 2 * 2 + 2
1012
+ ):
1013
+ # for pandas, greater than max_rows
1014
+ # will display min_rows
1015
+ # thus we fetch min_rows + 2 lines
1016
+ index_size = min_rows // 2 + 1
1017
+
1018
+ if index_size is None:
1019
+ return df_or_series._fetch(session=session)
1020
+ else:
1021
+ head = iloc(df_or_series)[:index_size]
1022
+ tail = iloc(df_or_series)[-index_size:]
1023
+ head_data, tail_data = ExecutableTuple([head, tail]).fetch(session=session)
1024
+ xdf = cudf if head.op.is_gpu() else pd
1025
+ return xdf.concat([head_data, tail_data], axis="index")
1026
+
1027
+
1028
+ class ReprSeries(pd.Series):
1029
+ def __init__(self, corner_data, real_shape):
1030
+ super().__init__(corner_data)
1031
+ self._real_shape = real_shape
1032
+
1033
+ def __len__(self):
1034
+ # As we only fetch corner data to repr,
1035
+ # the length would be wrong and we have no way to control,
1036
+ # thus we just overwrite the length to show the real one
1037
+ return self._real_shape[0]
1038
+
1039
+
1040
+ def filter_dtypes_by_index(dtypes, index):
1041
+ try:
1042
+ new_dtypes = dtypes.loc[index].dropna()
1043
+ except KeyError:
1044
+ dtypes_idx = (
1045
+ dtypes.index.to_frame()
1046
+ .merge(index.to_frame())
1047
+ .set_index(list(range(dtypes.index.nlevels)))
1048
+ .index
1049
+ )
1050
+ new_dtypes = dtypes.loc[dtypes_idx]
1051
+ new_dtypes.index.names = dtypes.index.names
1052
+ return new_dtypes
1053
+
1054
+
1055
+ @contextmanager
1056
+ def create_sa_connection(con, **kwargs):
1057
+ import sqlalchemy as sa
1058
+ from sqlalchemy.engine import Connection, Engine
1059
+
1060
+ # process con
1061
+ engine = None
1062
+ if isinstance(con, Connection):
1063
+ # connection create by user
1064
+ close = False
1065
+ dispose = False
1066
+ elif isinstance(con, Engine):
1067
+ con = con.connect()
1068
+ close = True
1069
+ dispose = False
1070
+ else:
1071
+ engine = sa.create_engine(con, **kwargs)
1072
+ con = engine.connect()
1073
+ close = True
1074
+ dispose = True
1075
+
1076
+ try:
1077
+ yield con
1078
+ finally:
1079
+ if close:
1080
+ con.close()
1081
+ if dispose:
1082
+ engine.dispose()
1083
+
1084
+
1085
+ def wrap_arrow_type(arrow_type):
1086
+ if arrow_type == pa.string():
1087
+ return pd.StringDtype("pyarrow")
1088
+ return ArrowDtype(arrow_type)
1089
+
1090
+
1091
+ def to_arrow_dtypes(dtypes):
1092
+ from ..io.odpsio.schema import pandas_dtypes_to_arrow_schema
1093
+
1094
+ if isinstance(dtypes, pa.Schema):
1095
+ arrow_schema = dtypes
1096
+ dtypes = arrow_schema.empty_table().to_pandas().dtypes
1097
+ else:
1098
+ arrow_schema = pandas_dtypes_to_arrow_schema(dtypes)
1099
+ new_dtypes = dtypes.copy()
1100
+ for i in range(len(dtypes)):
1101
+ arrow_type = arrow_schema.types[i]
1102
+ dt = dtypes.iloc[i]
1103
+ if isinstance(dt, pd.api.extensions.ExtensionDtype):
1104
+ # make existing extension dtype consistent
1105
+ new_dtypes.iloc[i] = dt
1106
+ else:
1107
+ new_dtypes.iloc[i] = wrap_arrow_type(arrow_type)
1108
+ return new_dtypes
1109
+
1110
+
1111
+ def is_dataframe(x):
1112
+ if cudf is not None: # pragma: no cover
1113
+ if isinstance(x, cudf.DataFrame):
1114
+ return True
1115
+ return isinstance(x, pd.DataFrame)
1116
+
1117
+
1118
+ def is_series(x):
1119
+ if cudf is not None: # pragma: no cover
1120
+ if isinstance(x, cudf.Series):
1121
+ return True
1122
+ return isinstance(x, pd.Series)
1123
+
1124
+
1125
+ def is_index(x):
1126
+ if cudf is not None: # pragma: no cover
1127
+ if isinstance(x, cudf.Index):
1128
+ return True
1129
+ return isinstance(x, pd.Index)
1130
+
1131
+
1132
+ def get_xdf(x):
1133
+ if cudf is not None: # pragma: no cover
1134
+ if isinstance(x, (cudf.DataFrame, cudf.Series, cudf.Index)):
1135
+ return cudf
1136
+ return pd
1137
+
1138
+
1139
+ def is_cudf(x):
1140
+ if cudf is not None: # pragma: no cover
1141
+ if isinstance(x, (cudf.DataFrame, cudf.Series, cudf.Index)):
1142
+ return True
1143
+ return False
1144
+
1145
+
1146
+ def whether_to_clean_up(op, threshold):
1147
+ func = op.func
1148
+ counted_bytes = 0
1149
+ max_recursion_depth = 2
1150
+
1151
+ from collections import deque
1152
+ from numbers import Number
1153
+
1154
+ BYPASS_CLASSES = (str, bytes, Number, range, bytearray, pd.DataFrame, pd.Series)
1155
+
1156
+ class GetSizeEarlyStopException(Exception):
1157
+ pass
1158
+
1159
+ def check_exceed_threshold():
1160
+ nonlocal threshold, counted_bytes
1161
+ if counted_bytes >= threshold:
1162
+ raise GetSizeEarlyStopException()
1163
+
1164
+ def getsize(obj_outer):
1165
+ _seen_obj_ids = set()
1166
+
1167
+ def inner_count(obj, recursion_depth):
1168
+ obj_id = id(obj)
1169
+ if obj_id in _seen_obj_ids or recursion_depth > max_recursion_depth:
1170
+ return 0
1171
+ _seen_obj_ids.add(obj_id)
1172
+ recursion_depth += 1
1173
+ size = sys.getsizeof(obj)
1174
+ if isinstance(obj, BYPASS_CLASSES):
1175
+ return size
1176
+ elif isinstance(obj, (tuple, list, set, deque)):
1177
+ size += sum(inner_count(i, recursion_depth) for i in obj)
1178
+ elif hasattr(obj, "items"):
1179
+ size += sum(
1180
+ inner_count(k, recursion_depth) + inner_count(v, recursion_depth)
1181
+ for k, v in getattr(obj, "items")()
1182
+ )
1183
+ if hasattr(obj, "__dict__"):
1184
+ size += inner_count(vars(obj), recursion_depth)
1185
+ if hasattr(obj, "__slots__"):
1186
+ size += sum(
1187
+ inner_count(getattr(obj, s), recursion_depth)
1188
+ for s in obj.__slots__
1189
+ if hasattr(obj, s)
1190
+ )
1191
+ return size
1192
+
1193
+ return inner_count(obj_outer, 0)
1194
+
1195
+ try:
1196
+ # Note: In most cases, func is just a function with closure, while chances are that
1197
+ # func is a callable that doesn't have __closure__ attribute.
1198
+ if inspect.isclass(func):
1199
+ pass
1200
+ elif hasattr(func, "__closure__") and func.__closure__ is not None:
1201
+ for cell in func.__closure__:
1202
+ counted_bytes += getsize(cell.cell_contents)
1203
+ check_exceed_threshold()
1204
+ elif callable(func):
1205
+ if hasattr(func, "__dict__"):
1206
+ for k, v in func.__dict__.items():
1207
+ counted_bytes += sum([getsize(k), getsize(v)])
1208
+ check_exceed_threshold()
1209
+ if hasattr(func, "__slots__"):
1210
+ for slot in func.__slots__:
1211
+ counted_bytes += (
1212
+ getsize(getattr(func, slot)) if hasattr(func, slot) else 0
1213
+ )
1214
+ check_exceed_threshold()
1215
+ except GetSizeEarlyStopException:
1216
+ logger.debug("Func needs cleanup.")
1217
+ op.need_clean_up_func = True
1218
+ else:
1219
+ assert op.need_clean_up_func is False
1220
+ logger.debug("Func doesn't need cleanup.")
1221
+
1222
+ return op.need_clean_up_func
1223
+
1224
+
1225
+ def concat_on_columns(objs: List) -> Any:
1226
+ xdf = get_xdf(objs[0])
1227
+ # In cudf, concat with axis=1 and ignore_index=False by default behaves opposite to pandas.
1228
+ # Cudf would reset the index when axis=1 and ignore_index=False, which does not match with its document.
1229
+ # Therefore, we deal with this case specially.
1230
+ result = xdf.concat(objs, axis=1)
1231
+ if xdf is cudf:
1232
+ result.index = objs[0].index
1233
+ return result
1234
+
1235
+
1236
+ def apply_if_callable(maybe_callable, obj, **kwargs):
1237
+ if callable(maybe_callable):
1238
+ return maybe_callable(obj, **kwargs)
1239
+
1240
+ return maybe_callable
1241
+
1242
+
1243
+ def patch_sa_engine_execute():
1244
+ """
1245
+ pandas did not resolve compatibility issue of sqlalchemy 2.0, the issue
1246
+ is https://github.com/pandas-dev/pandas/issues/40686. We need to patch
1247
+ Engine class in SQLAlchemy, and then our code can work well.
1248
+ """
1249
+ try:
1250
+ from sqlalchemy.engine import Engine
1251
+ except ImportError: # pragma: no cover
1252
+ return
1253
+
1254
+ def execute(self, statement, *multiparams, **params):
1255
+ connection = self.connect()
1256
+ return connection.execute(statement, *multiparams, **params)
1257
+
1258
+ if hasattr(Engine, "execute"): # pragma: no cover
1259
+ return
1260
+ Engine.execute = execute
1261
+
1262
+
1263
+ def bind_func_args_from_pos(func, args_bind_position, *bound_args, **bound_kwargs):
1264
+ """
1265
+ Create a new function with arguments bound from specified position.
1266
+
1267
+ Parameters
1268
+ ----------
1269
+ func : callable
1270
+ Target function to be wrapped.
1271
+ args_bind_position : int
1272
+ Position to start binding arguments (0-based).
1273
+ e.g., n=0 binds from first arg, n=1 binds from second arg.
1274
+ *bound_args : tuple
1275
+ Arguments to be bound from position n.
1276
+ **bound_kwargs : dict
1277
+ Keyword arguments to be bound.
1278
+
1279
+ Returns
1280
+ -------
1281
+ callable
1282
+ Wrapped function with bound arguments.
1283
+
1284
+ Examples
1285
+ --------
1286
+ >>> def func(x, y, z=0):
1287
+ ... return x * y + z
1288
+ >>> f = bind_func_args_from_pos(func, 0, 10) # bind from second position
1289
+ >>> f(5) # equals func(5, 10)
1290
+ 10
1291
+
1292
+ Raises
1293
+ ------
1294
+ TypeError
1295
+ If func is not callable or n is not an integer.
1296
+ ValueError
1297
+ If n is negative or exceeds the number of parameters.
1298
+ """
1299
+
1300
+ @functools.wraps(func)
1301
+ def wrapper(*runtime_args, **runtime_kwargs):
1302
+ try:
1303
+ # Combine arguments
1304
+ all_args = (
1305
+ runtime_args[:args_bind_position]
1306
+ + bound_args
1307
+ + runtime_args[args_bind_position:]
1308
+ )
1309
+ all_kwargs = {**bound_kwargs, **runtime_kwargs}
1310
+
1311
+ return func(*all_args, **all_kwargs)
1312
+ except Exception as e:
1313
+ # Enhance error message with context
1314
+ raise type(e)(
1315
+ f"Error calling {func.__name__} with bound arguments: {str(e)}"
1316
+ ) from e
1317
+
1318
+ return wrapper
1319
+
1320
+
1321
+ def pack_func_args(df, funcs, *args, args_bind_position=1, **kwargs) -> Any:
1322
+ """
1323
+ Pack the funcs with args and kwargs to avoid the ambiguity between other
1324
+ positional and keyword arguments. It will process the funcs by the following rule:
1325
+
1326
+ 1. If there's no such args and kwargs, return funcs itself.
1327
+
1328
+ 2. If the funcs is a dict-like object, it will iterate each key-value pair, pack the
1329
+ value recursively, and return a new dict with the same keys and packed values.
1330
+
1331
+ 3. If the funcs is a list-like object, it will iterate each element, pack it
1332
+ recursively, and return a new list with the packed elements.
1333
+
1334
+ 4. If the funcs is a str object, it will try to get the attribute df.funcs firstly,
1335
+ if it exists and is a callable, return a partial one with args and kwargs packed in.
1336
+ If it exists but isn't a callable, a ValueError is raised. If it doesn't exist, then
1337
+ try to get the attribute of np.funcs, if it exists and df is acceptable by funcs,
1338
+ return a partial one with args and kwargs packed in, otherwise an AttributeValue is
1339
+ raised. This rule is almost the same with pandas.
1340
+
1341
+ 5. Other cases are treated as funcs being a callable, returns the partial one with
1342
+ args and kwargs packed in.
1343
+
1344
+ Parameters
1345
+ ----------
1346
+ df : pandas.DataFrame or pandas.Series
1347
+ The DataFrame or Series object to test the function.
1348
+ funcs : function, str, list-like or dict-like
1349
+ Function to pack. It should have the same type with Dataframe.transform().
1350
+ args_bind_position: int
1351
+ Position to start binding arguments (0-based).
1352
+ e.g., n=0 binds from first arg, n=1 binds from second arg.
1353
+ *args :
1354
+ The positional arguments to func. If funcs contains many functions, each one
1355
+ should be able to accept *args.
1356
+ **kwargs :
1357
+ The keyword arguments to func. If funcs contains many functions, each one
1358
+ should be able to accept **kwargs.
1359
+
1360
+ Returns
1361
+ -------
1362
+ The packed functions having the same structure with funcs.
1363
+
1364
+ Raises
1365
+ ------
1366
+ ValueError :
1367
+ If there's a string but the corresponding function doesn't accept any positional
1368
+ or keyword arguments.
1369
+ AttributeError :
1370
+ If there's a string but no corresponding function is found.
1371
+ """
1372
+ from ..udf import MarkedFunction
1373
+
1374
+ if not args and not kwargs:
1375
+ return funcs
1376
+
1377
+ if is_dict_like(funcs):
1378
+ return {k: pack_func_args(df, v, *args, **kwargs) for k, v in funcs.items()}
1379
+
1380
+ if is_list_like(funcs) and not isinstance(funcs, ENTITY_TYPE):
1381
+ return [pack_func_args(df, v, *args, **kwargs) for v in funcs]
1382
+
1383
+ f = get_callable_by_name(df, funcs) if isinstance(funcs, str) else funcs
1384
+
1385
+ if isinstance(f, MarkedFunction):
1386
+ # for marked function, pack the inner function, and reset as mark function
1387
+ packed_func = f.copy()
1388
+ packed_func.func = bind_func_args_from_pos(
1389
+ f.func, args_bind_position, *args, **kwargs
1390
+ )
1391
+ else:
1392
+ packed_func = bind_func_args_from_pos(f, args_bind_position, *args, **kwargs)
1393
+
1394
+ # Callable
1395
+ return packed_func
1396
+
1397
+
1398
+ def get_callable_by_name(df: Any, func_name: str) -> Callable:
1399
+ """
1400
+ Get the callable by the func name.
1401
+ It will try to get the attribute df.funcs firstly, if it exists and is a callable,
1402
+ return it. If it exists but isn't a callable, a ValueError is raised. If it doesn't
1403
+ exist, then try to get the attribute of np.funcs, if it exists and df is acceptable
1404
+ by funcs, return a partial one with args and kwargs packed in, otherwise an
1405
+ AttributeValue is raised. This rule is almost the same with pandas.
1406
+
1407
+ Parameters
1408
+ ----------
1409
+ df: pandas.Series or pandas.Dataframe
1410
+ The receiver of the func name.
1411
+ func_name : str
1412
+ The func name.
1413
+
1414
+ Returns
1415
+ -------
1416
+ The callable instance.
1417
+
1418
+ Raises
1419
+ ------
1420
+ ValueError :
1421
+ If it's not a valid callable.
1422
+ AttributeError :
1423
+ If there's no corresponding function is found.
1424
+ """
1425
+ if hasattr(df, func_name):
1426
+ f = getattr(df, func_name)
1427
+ if callable(f):
1428
+ return f
1429
+ raise ValueError(f"{func_name} is not a callable")
1430
+
1431
+ if hasattr(np, func_name) and hasattr(df, "__array__"):
1432
+ return getattr(np, func_name)
1433
+
1434
+ raise AttributeError(
1435
+ f"'{func_name}' is not a valid function for '{type(df).__name__}' object"
1436
+ )
1437
+
1438
+
1439
+ @dataclasses.dataclass
1440
+ class InferredDataFrameMeta:
1441
+ output_type: OutputType
1442
+ dtypes: Optional[pd.Series] = None
1443
+ dtype: Optional[Any] = None
1444
+ name: Optional[str] = None
1445
+ index_value: Optional["IndexValue"] = None
1446
+ maybe_agg: bool = False
1447
+ elementwise: bool = False
1448
+
1449
+ def check_absence(self, *args: str) -> None:
1450
+ args_set = set(args)
1451
+ if self.output_type == OutputType.dataframe:
1452
+ args_set.difference_update(["dtype", "name"])
1453
+ else:
1454
+ args_set.difference_update(["dtypes"])
1455
+ absent_args = [arg for arg in sorted(args_set) if getattr(self, arg) is None]
1456
+ if absent_args:
1457
+ raise TypeError(
1458
+ f"Cannot determine {', '.join(absent_args)} by calculating "
1459
+ "with mock data, please specify it as arguments"
1460
+ )
1461
+
1462
+
1463
+ def _get_groupby_input_df(groupby):
1464
+ in_df = groupby
1465
+ while in_df.op.output_types[0] not in (OutputType.dataframe, OutputType.series):
1466
+ in_df = in_df.inputs[0]
1467
+ return in_df
1468
+
1469
+
1470
+ def infer_dataframe_return_value(
1471
+ df_obj,
1472
+ func,
1473
+ output_type=None,
1474
+ dtypes=None,
1475
+ dtype=None,
1476
+ name=None,
1477
+ index=None,
1478
+ inherit_index=False,
1479
+ build_kw=None,
1480
+ elementwise=None,
1481
+ skip_infer=False,
1482
+ ) -> InferredDataFrameMeta:
1483
+ from .core import GROUPBY_TYPE, INDEX_TYPE
1484
+ from .typing_ import get_function_output_meta
1485
+
1486
+ unwrapped_func = func
1487
+ if isinstance(unwrapped_func, MarkedFunction):
1488
+ unwrapped_func = unwrapped_func.func
1489
+ while True:
1490
+ if isinstance(unwrapped_func, functools.partial):
1491
+ unwrapped_func = unwrapped_func.func
1492
+ elif hasattr(unwrapped_func, "__wrapped__"):
1493
+ unwrapped_func = unwrapped_func.__wrapped__
1494
+ else:
1495
+ break
1496
+
1497
+ func_annotation_meta = get_function_output_meta(unwrapped_func, df_obj)
1498
+ func_index_value = None
1499
+ if func_annotation_meta:
1500
+ output_type = output_type or func_annotation_meta.output_type
1501
+ dtypes = dtypes if dtypes is not None else func_annotation_meta.dtypes
1502
+ dtype = dtype if dtype is not None else func_annotation_meta.dtype
1503
+ name = name if name is not None else func_annotation_meta.name
1504
+ func_index_value = func_annotation_meta.index_value
1505
+
1506
+ if skip_infer:
1507
+ if isinstance(index, INDEX_TYPE):
1508
+ ret_index_value = index.index_value
1509
+ elif index is not None:
1510
+ ret_index_value = parse_index(index, df_obj.key)
1511
+ else:
1512
+ ret_index_value = func_index_value
1513
+
1514
+ return InferredDataFrameMeta(
1515
+ output_type=output_type,
1516
+ dtypes=dtypes,
1517
+ dtype=dtype,
1518
+ name=name,
1519
+ index_value=ret_index_value,
1520
+ )
1521
+
1522
+ if isinstance(index, INDEX_TYPE):
1523
+ index = index.index_value
1524
+
1525
+ if elementwise is None:
1526
+ elementwise = isinstance(unwrapped_func, np.ufunc)
1527
+
1528
+ ret_index_value = func_index_value
1529
+ if output_type is not None and (dtypes is not None or dtype is not None):
1530
+ if inherit_index:
1531
+ ret_index_value = df_obj.index_value
1532
+ elif index is not None:
1533
+ ret_index_value = parse_index(index)
1534
+
1535
+ if ret_index_value is not None:
1536
+ return InferredDataFrameMeta(
1537
+ output_type,
1538
+ dtypes,
1539
+ dtype,
1540
+ name,
1541
+ ret_index_value,
1542
+ elementwise=elementwise or False,
1543
+ )
1544
+
1545
+ ret_output_type = None
1546
+ ret_dtypes = dtypes
1547
+ maybe_agg = False
1548
+ build_kw = build_kw or {}
1549
+ obj_key = df_obj.key
1550
+
1551
+ if elementwise:
1552
+ inherit_index = True
1553
+ (ret_output_type,) = get_output_types(df_obj)
1554
+ if index is not None:
1555
+ ret_index_value = parse_index(index)
1556
+
1557
+ if isinstance(df_obj, GROUPBY_TYPE):
1558
+ is_groupby = True
1559
+ empty_df_obj = df_obj.op.build_mock_groupby(**build_kw)
1560
+ else:
1561
+ is_groupby = False
1562
+ empty_df_obj = (
1563
+ build_df(df_obj, **build_kw)
1564
+ if df_obj.ndim == 2
1565
+ else build_series(df_obj, **build_kw)
1566
+ )
1567
+ try:
1568
+ with np.errstate(all="ignore"), quiet_stdio():
1569
+ infer_df_obj = func(empty_df_obj)
1570
+
1571
+ if ret_index_value is None:
1572
+ if (
1573
+ infer_df_obj is None
1574
+ or not hasattr(infer_df_obj, "index")
1575
+ or infer_df_obj.index is None
1576
+ ):
1577
+ ret_index_value = parse_index(pd.RangeIndex(-1))
1578
+ elif (
1579
+ infer_df_obj.index is getattr(empty_df_obj, "index", None)
1580
+ or inherit_index
1581
+ ):
1582
+ ret_index_value = df_obj.index_value
1583
+ else:
1584
+ ret_index_value = parse_index(infer_df_obj.index, obj_key, func)
1585
+
1586
+ if isinstance(infer_df_obj, pd.DataFrame):
1587
+ if output_type is not None and output_type != OutputType.dataframe:
1588
+ raise TypeError(
1589
+ f'Cannot infer output_type as "series", '
1590
+ f'please specify `output_type` as "dataframe"'
1591
+ )
1592
+ ret_output_type = ret_output_type or OutputType.dataframe
1593
+ if ret_dtypes is None:
1594
+ ret_dtypes = infer_df_obj.dtypes
1595
+ else:
1596
+ if output_type is not None and output_type == OutputType.dataframe:
1597
+ raise TypeError(
1598
+ f'Cannot infer output_type as "dataframe", '
1599
+ f'please specify `output_type` as "series"'
1600
+ )
1601
+ ret_output_type = ret_output_type or OutputType.series
1602
+ name = name or getattr(infer_df_obj, "name", None)
1603
+ dtype = dtype or infer_df_obj.dtype
1604
+
1605
+ if is_groupby and len(infer_df_obj) <= 2:
1606
+ # we create mock df with 4 rows, 2 groups
1607
+ # if return df has 2 rows, we assume that
1608
+ # it's an aggregation operation
1609
+ maybe_agg = True
1610
+
1611
+ return InferredDataFrameMeta(
1612
+ ret_output_type,
1613
+ make_dtypes(ret_dtypes),
1614
+ make_dtypes(dtype),
1615
+ name,
1616
+ ret_index_value,
1617
+ maybe_agg,
1618
+ elementwise=elementwise,
1619
+ )
1620
+ except: # noqa: E722 # nosec
1621
+ logger.info(
1622
+ "Exception raised while inferring meta of function result", exc_info=True
1623
+ )
1624
+ return InferredDataFrameMeta(
1625
+ output_type,
1626
+ make_dtypes(dtypes),
1627
+ make_dtypes(dtype),
1628
+ name,
1629
+ ret_index_value,
1630
+ maybe_agg,
1631
+ elementwise=elementwise,
1632
+ )
1633
+
1634
+
1635
+ def copy_func_scheduling_hints(func, op: "DataFrameOperator") -> None:
1636
+ from ..config import options
1637
+
1638
+ expect_engine = None
1639
+ expect_gpu = None
1640
+ fs_mount = None
1641
+ default_options = options.function.default_running_options or {}
1642
+
1643
+ if isinstance(func, MarkedFunction):
1644
+ # copy from marked function
1645
+ expect_engine = func.expect_engine
1646
+ expect_resources = func.expect_resources or {}
1647
+ expect_gpu = func.gpu
1648
+ fs_mount = func.fs_mount
1649
+
1650
+ # merge default options if not set
1651
+ for key, value in default_options.items():
1652
+ if key not in expect_resources or expect_resources.get(key) is None:
1653
+ expect_resources[key] = value
1654
+ else:
1655
+ # copy from default options
1656
+ expect_resources = default_options
1657
+
1658
+ # Validate and adjust resource ratio constraints on client side
1659
+ expect_resources, _ = validate_and_adjust_resource_ratio(
1660
+ expect_resources,
1661
+ max_memory_cpu_ratio=options.function.allowed_max_memory_cpu_ratio,
1662
+ adjust=True,
1663
+ )
1664
+
1665
+ # If GPU is required but gu_quota not set, inherit from global setting
1666
+ if expect_resources.get("gpu"):
1667
+ expect_resources["gu_quota"] = expect_resources.get(
1668
+ "gu_quota", [options.session.gu_quota_name]
1669
+ )
1670
+
1671
+ if expect_engine:
1672
+ op.expect_engine = expect_engine
1673
+ if expect_resources:
1674
+ op.expect_resources = expect_resources
1675
+ if expect_gpu:
1676
+ op.gpu = expect_gpu
1677
+ if fs_mount:
1678
+ op.fs_mount = fs_mount
1679
+
1680
+
1681
+ def make_column_list(col, dtypes_or_columns, level=None):
1682
+ """Returns [col] if col is a column in dtypes"""
1683
+ try:
1684
+ if isinstance(dtypes_or_columns, pd.Series):
1685
+ idx = dtypes_or_columns.index
1686
+ else:
1687
+ idx = dtypes_or_columns
1688
+
1689
+ if level is None:
1690
+ if col in idx:
1691
+ return [col]
1692
+ elif isinstance(col, int):
1693
+ col = [col]
1694
+ if all(c in idx for c in col):
1695
+ return col
1696
+ if all(isinstance(c, int) for c in col):
1697
+ return [idx[c] for c in col]
1698
+ return col
1699
+ else:
1700
+ level_idx = idx.get_level_values(level)
1701
+ if isinstance(col, list):
1702
+ cols = col
1703
+ else:
1704
+ cols = [col]
1705
+ mask = level_idx.isin(cols)
1706
+ if not mask.any():
1707
+ mask = col
1708
+ return idx[mask]
1709
+ except (IndexError, TypeError, ValueError):
1710
+ return col
1711
+
1712
+
1713
+ def call_groupby_with_params(df_or_series, groupby_params: dict):
1714
+ params = groupby_params.copy()
1715
+ selection = params.pop("selection", None)
1716
+ res = df_or_series.groupby(**params)
1717
+ if selection:
1718
+ res = res[selection]
1719
+ return res
1720
+
1721
+
1722
+ def validate_dtype_backend(value):
1723
+ if isinstance(value, bool):
1724
+ # compatibility for legacy use_arrow_dtype property
1725
+ value = "pyarrow" if value else "numpy"
1726
+ if not dtype_backend_validator(value):
1727
+ raise ValueError(f"Invalid dtype_backend: {value}")
1728
+ return value