maxframe 2.2.0__cp312-cp312-macosx_10_9_universal2.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of maxframe might be problematic. Click here for more details.

Files changed (1094) hide show
  1. maxframe/__init__.py +33 -0
  2. maxframe/_utils.cpython-312-darwin.so +0 -0
  3. maxframe/_utils.pxd +33 -0
  4. maxframe/_utils.pyi +21 -0
  5. maxframe/_utils.pyx +561 -0
  6. maxframe/codegen/__init__.py +27 -0
  7. maxframe/codegen/core.py +596 -0
  8. maxframe/codegen/spe/__init__.py +16 -0
  9. maxframe/codegen/spe/core.py +307 -0
  10. maxframe/codegen/spe/dataframe/__init__.py +38 -0
  11. maxframe/codegen/spe/dataframe/accessors/__init__.py +15 -0
  12. maxframe/codegen/spe/dataframe/accessors/base.py +71 -0
  13. maxframe/codegen/spe/dataframe/accessors/dict_.py +89 -0
  14. maxframe/codegen/spe/dataframe/accessors/list_.py +44 -0
  15. maxframe/codegen/spe/dataframe/accessors/struct_.py +28 -0
  16. maxframe/codegen/spe/dataframe/arithmetic.py +89 -0
  17. maxframe/codegen/spe/dataframe/datasource.py +181 -0
  18. maxframe/codegen/spe/dataframe/datastore.py +204 -0
  19. maxframe/codegen/spe/dataframe/extensions.py +63 -0
  20. maxframe/codegen/spe/dataframe/fetch.py +26 -0
  21. maxframe/codegen/spe/dataframe/groupby.py +312 -0
  22. maxframe/codegen/spe/dataframe/indexing.py +333 -0
  23. maxframe/codegen/spe/dataframe/merge.py +106 -0
  24. maxframe/codegen/spe/dataframe/misc.py +262 -0
  25. maxframe/codegen/spe/dataframe/missing.py +64 -0
  26. maxframe/codegen/spe/dataframe/reduction.py +165 -0
  27. maxframe/codegen/spe/dataframe/reshape.py +46 -0
  28. maxframe/codegen/spe/dataframe/sort.py +96 -0
  29. maxframe/codegen/spe/dataframe/statistics.py +46 -0
  30. maxframe/codegen/spe/dataframe/tests/__init__.py +13 -0
  31. maxframe/codegen/spe/dataframe/tests/accessors/__init__.py +13 -0
  32. maxframe/codegen/spe/dataframe/tests/accessors/test_base.py +33 -0
  33. maxframe/codegen/spe/dataframe/tests/accessors/test_dict.py +304 -0
  34. maxframe/codegen/spe/dataframe/tests/accessors/test_list.py +134 -0
  35. maxframe/codegen/spe/dataframe/tests/accessors/test_struct.py +75 -0
  36. maxframe/codegen/spe/dataframe/tests/indexing/__init__.py +13 -0
  37. maxframe/codegen/spe/dataframe/tests/indexing/conftest.py +58 -0
  38. maxframe/codegen/spe/dataframe/tests/indexing/test_getitem.py +124 -0
  39. maxframe/codegen/spe/dataframe/tests/indexing/test_iloc.py +95 -0
  40. maxframe/codegen/spe/dataframe/tests/indexing/test_indexing.py +39 -0
  41. maxframe/codegen/spe/dataframe/tests/indexing/test_loc.py +35 -0
  42. maxframe/codegen/spe/dataframe/tests/indexing/test_rename.py +51 -0
  43. maxframe/codegen/spe/dataframe/tests/indexing/test_reset_index.py +88 -0
  44. maxframe/codegen/spe/dataframe/tests/indexing/test_sample.py +45 -0
  45. maxframe/codegen/spe/dataframe/tests/indexing/test_set_axis.py +45 -0
  46. maxframe/codegen/spe/dataframe/tests/indexing/test_set_index.py +41 -0
  47. maxframe/codegen/spe/dataframe/tests/indexing/test_setitem.py +46 -0
  48. maxframe/codegen/spe/dataframe/tests/misc/__init__.py +13 -0
  49. maxframe/codegen/spe/dataframe/tests/misc/test_apply.py +133 -0
  50. maxframe/codegen/spe/dataframe/tests/misc/test_drop_duplicates.py +92 -0
  51. maxframe/codegen/spe/dataframe/tests/misc/test_misc.py +202 -0
  52. maxframe/codegen/spe/dataframe/tests/missing/__init__.py +13 -0
  53. maxframe/codegen/spe/dataframe/tests/missing/test_checkna.py +94 -0
  54. maxframe/codegen/spe/dataframe/tests/missing/test_dropna.py +50 -0
  55. maxframe/codegen/spe/dataframe/tests/missing/test_fillna.py +94 -0
  56. maxframe/codegen/spe/dataframe/tests/missing/test_replace.py +45 -0
  57. maxframe/codegen/spe/dataframe/tests/test_arithmetic.py +73 -0
  58. maxframe/codegen/spe/dataframe/tests/test_datasource.py +184 -0
  59. maxframe/codegen/spe/dataframe/tests/test_datastore.py +200 -0
  60. maxframe/codegen/spe/dataframe/tests/test_extensions.py +88 -0
  61. maxframe/codegen/spe/dataframe/tests/test_groupby.py +288 -0
  62. maxframe/codegen/spe/dataframe/tests/test_merge.py +426 -0
  63. maxframe/codegen/spe/dataframe/tests/test_reduction.py +104 -0
  64. maxframe/codegen/spe/dataframe/tests/test_reshape.py +79 -0
  65. maxframe/codegen/spe/dataframe/tests/test_sort.py +179 -0
  66. maxframe/codegen/spe/dataframe/tests/test_statistics.py +70 -0
  67. maxframe/codegen/spe/dataframe/tests/test_tseries.py +29 -0
  68. maxframe/codegen/spe/dataframe/tests/test_value_counts.py +60 -0
  69. maxframe/codegen/spe/dataframe/tests/test_window.py +69 -0
  70. maxframe/codegen/spe/dataframe/tseries.py +46 -0
  71. maxframe/codegen/spe/dataframe/udf.py +62 -0
  72. maxframe/codegen/spe/dataframe/value_counts.py +31 -0
  73. maxframe/codegen/spe/dataframe/window.py +65 -0
  74. maxframe/codegen/spe/learn/__init__.py +15 -0
  75. maxframe/codegen/spe/learn/contrib/__init__.py +15 -0
  76. maxframe/codegen/spe/learn/contrib/lightgbm.py +160 -0
  77. maxframe/codegen/spe/learn/contrib/models.py +41 -0
  78. maxframe/codegen/spe/learn/contrib/pytorch.py +49 -0
  79. maxframe/codegen/spe/learn/contrib/tests/__init__.py +13 -0
  80. maxframe/codegen/spe/learn/contrib/tests/test_lightgbm.py +123 -0
  81. maxframe/codegen/spe/learn/contrib/tests/test_models.py +41 -0
  82. maxframe/codegen/spe/learn/contrib/tests/test_pytorch.py +53 -0
  83. maxframe/codegen/spe/learn/contrib/tests/test_xgboost.py +99 -0
  84. maxframe/codegen/spe/learn/contrib/xgboost.py +152 -0
  85. maxframe/codegen/spe/learn/metrics/__init__.py +15 -0
  86. maxframe/codegen/spe/learn/metrics/_classification.py +120 -0
  87. maxframe/codegen/spe/learn/metrics/_ranking.py +76 -0
  88. maxframe/codegen/spe/learn/metrics/pairwise.py +51 -0
  89. maxframe/codegen/spe/learn/metrics/tests/__init__.py +13 -0
  90. maxframe/codegen/spe/learn/metrics/tests/test_classification.py +93 -0
  91. maxframe/codegen/spe/learn/metrics/tests/test_pairwise.py +36 -0
  92. maxframe/codegen/spe/learn/metrics/tests/test_ranking.py +59 -0
  93. maxframe/codegen/spe/learn/model_selection/__init__.py +13 -0
  94. maxframe/codegen/spe/learn/model_selection/tests/__init__.py +13 -0
  95. maxframe/codegen/spe/learn/model_selection/tests/test_split.py +41 -0
  96. maxframe/codegen/spe/learn/preprocessing/__init__.py +15 -0
  97. maxframe/codegen/spe/learn/preprocessing/_data.py +37 -0
  98. maxframe/codegen/spe/learn/preprocessing/_label.py +47 -0
  99. maxframe/codegen/spe/learn/preprocessing/tests/__init__.py +13 -0
  100. maxframe/codegen/spe/learn/preprocessing/tests/test_data.py +31 -0
  101. maxframe/codegen/spe/learn/preprocessing/tests/test_label.py +43 -0
  102. maxframe/codegen/spe/learn/utils/__init__.py +15 -0
  103. maxframe/codegen/spe/learn/utils/checks.py +55 -0
  104. maxframe/codegen/spe/learn/utils/multiclass.py +60 -0
  105. maxframe/codegen/spe/learn/utils/shuffle.py +85 -0
  106. maxframe/codegen/spe/learn/utils/sparsefuncs.py +35 -0
  107. maxframe/codegen/spe/learn/utils/tests/__init__.py +13 -0
  108. maxframe/codegen/spe/learn/utils/tests/test_checks.py +48 -0
  109. maxframe/codegen/spe/learn/utils/tests/test_multiclass.py +52 -0
  110. maxframe/codegen/spe/learn/utils/tests/test_shuffle.py +50 -0
  111. maxframe/codegen/spe/learn/utils/tests/test_sparsefuncs.py +34 -0
  112. maxframe/codegen/spe/learn/utils/tests/test_validation.py +44 -0
  113. maxframe/codegen/spe/learn/utils/validation.py +35 -0
  114. maxframe/codegen/spe/objects.py +26 -0
  115. maxframe/codegen/spe/remote.py +29 -0
  116. maxframe/codegen/spe/tensor/__init__.py +31 -0
  117. maxframe/codegen/spe/tensor/arithmetic.py +95 -0
  118. maxframe/codegen/spe/tensor/core.py +41 -0
  119. maxframe/codegen/spe/tensor/datasource.py +165 -0
  120. maxframe/codegen/spe/tensor/extensions.py +35 -0
  121. maxframe/codegen/spe/tensor/fetch.py +26 -0
  122. maxframe/codegen/spe/tensor/fft.py +74 -0
  123. maxframe/codegen/spe/tensor/indexing.py +63 -0
  124. maxframe/codegen/spe/tensor/linalg.py +90 -0
  125. maxframe/codegen/spe/tensor/merge.py +31 -0
  126. maxframe/codegen/spe/tensor/misc.py +175 -0
  127. maxframe/codegen/spe/tensor/random.py +29 -0
  128. maxframe/codegen/spe/tensor/reduction.py +39 -0
  129. maxframe/codegen/spe/tensor/reshape.py +26 -0
  130. maxframe/codegen/spe/tensor/sort.py +42 -0
  131. maxframe/codegen/spe/tensor/spatial.py +45 -0
  132. maxframe/codegen/spe/tensor/special.py +35 -0
  133. maxframe/codegen/spe/tensor/statistics.py +68 -0
  134. maxframe/codegen/spe/tensor/tests/__init__.py +13 -0
  135. maxframe/codegen/spe/tensor/tests/test_arithmetic.py +103 -0
  136. maxframe/codegen/spe/tensor/tests/test_datasource.py +99 -0
  137. maxframe/codegen/spe/tensor/tests/test_extensions.py +37 -0
  138. maxframe/codegen/spe/tensor/tests/test_fft.py +64 -0
  139. maxframe/codegen/spe/tensor/tests/test_indexing.py +44 -0
  140. maxframe/codegen/spe/tensor/tests/test_linalg.py +52 -0
  141. maxframe/codegen/spe/tensor/tests/test_merge.py +28 -0
  142. maxframe/codegen/spe/tensor/tests/test_misc.py +144 -0
  143. maxframe/codegen/spe/tensor/tests/test_random.py +55 -0
  144. maxframe/codegen/spe/tensor/tests/test_reduction.py +65 -0
  145. maxframe/codegen/spe/tensor/tests/test_reshape.py +39 -0
  146. maxframe/codegen/spe/tensor/tests/test_sort.py +49 -0
  147. maxframe/codegen/spe/tensor/tests/test_spatial.py +33 -0
  148. maxframe/codegen/spe/tensor/tests/test_special.py +28 -0
  149. maxframe/codegen/spe/tensor/tests/test_statistics.py +43 -0
  150. maxframe/codegen/spe/tests/__init__.py +13 -0
  151. maxframe/codegen/spe/tests/test_remote.py +29 -0
  152. maxframe/codegen/spe/tests/test_spe_codegen.py +135 -0
  153. maxframe/codegen/spe/utils.py +56 -0
  154. maxframe/codegen/tests/__init__.py +13 -0
  155. maxframe/codegen/tests/test_codegen.py +67 -0
  156. maxframe/config/__init__.py +15 -0
  157. maxframe/config/config.py +627 -0
  158. maxframe/config/tests/__init__.py +13 -0
  159. maxframe/config/tests/test_config.py +114 -0
  160. maxframe/config/tests/test_validators.py +46 -0
  161. maxframe/config/validators.py +142 -0
  162. maxframe/conftest.py +251 -0
  163. maxframe/core/__init__.py +53 -0
  164. maxframe/core/accessor.py +45 -0
  165. maxframe/core/base.py +156 -0
  166. maxframe/core/context.py +110 -0
  167. maxframe/core/entity/__init__.py +34 -0
  168. maxframe/core/entity/core.py +150 -0
  169. maxframe/core/entity/executable.py +337 -0
  170. maxframe/core/entity/objects.py +115 -0
  171. maxframe/core/entity/output_types.py +98 -0
  172. maxframe/core/entity/tests/__init__.py +13 -0
  173. maxframe/core/entity/tests/test_objects.py +42 -0
  174. maxframe/core/entity/tileables.py +367 -0
  175. maxframe/core/entity/utils.py +39 -0
  176. maxframe/core/graph/__init__.py +22 -0
  177. maxframe/core/graph/builder/__init__.py +15 -0
  178. maxframe/core/graph/builder/base.py +91 -0
  179. maxframe/core/graph/builder/tileable.py +34 -0
  180. maxframe/core/graph/builder/utils.py +37 -0
  181. maxframe/core/graph/core.cpython-312-darwin.so +0 -0
  182. maxframe/core/graph/core.pyx +478 -0
  183. maxframe/core/graph/entity.py +158 -0
  184. maxframe/core/graph/tests/__init__.py +13 -0
  185. maxframe/core/graph/tests/test_graph.py +205 -0
  186. maxframe/core/mode.py +96 -0
  187. maxframe/core/operator/__init__.py +32 -0
  188. maxframe/core/operator/base.py +480 -0
  189. maxframe/core/operator/core.py +307 -0
  190. maxframe/core/operator/fetch.py +40 -0
  191. maxframe/core/operator/objects.py +43 -0
  192. maxframe/core/operator/shuffle.py +45 -0
  193. maxframe/core/operator/tests/__init__.py +13 -0
  194. maxframe/core/operator/tests/test_core.py +64 -0
  195. maxframe/core/operator/utils.py +68 -0
  196. maxframe/core/tests/__init__.py +13 -0
  197. maxframe/core/tests/test_mode.py +75 -0
  198. maxframe/dataframe/__init__.py +89 -0
  199. maxframe/dataframe/accessors/__init__.py +15 -0
  200. maxframe/dataframe/accessors/compat.py +45 -0
  201. maxframe/dataframe/accessors/datetime_/__init__.py +35 -0
  202. maxframe/dataframe/accessors/datetime_/accessor.py +67 -0
  203. maxframe/dataframe/accessors/datetime_/core.py +82 -0
  204. maxframe/dataframe/accessors/datetime_/tests/__init__.py +13 -0
  205. maxframe/dataframe/accessors/datetime_/tests/test_datetime_accessor.py +41 -0
  206. maxframe/dataframe/accessors/dict_/__init__.py +43 -0
  207. maxframe/dataframe/accessors/dict_/accessor.py +39 -0
  208. maxframe/dataframe/accessors/dict_/contains.py +72 -0
  209. maxframe/dataframe/accessors/dict_/core.py +48 -0
  210. maxframe/dataframe/accessors/dict_/getitem.py +140 -0
  211. maxframe/dataframe/accessors/dict_/length.py +64 -0
  212. maxframe/dataframe/accessors/dict_/remove.py +75 -0
  213. maxframe/dataframe/accessors/dict_/setitem.py +79 -0
  214. maxframe/dataframe/accessors/dict_/tests/__init__.py +13 -0
  215. maxframe/dataframe/accessors/dict_/tests/test_dict_accessor.py +168 -0
  216. maxframe/dataframe/accessors/list_/__init__.py +37 -0
  217. maxframe/dataframe/accessors/list_/accessor.py +39 -0
  218. maxframe/dataframe/accessors/list_/core.py +48 -0
  219. maxframe/dataframe/accessors/list_/getitem.py +128 -0
  220. maxframe/dataframe/accessors/list_/length.py +64 -0
  221. maxframe/dataframe/accessors/list_/tests/__init__.py +13 -0
  222. maxframe/dataframe/accessors/list_/tests/test_list_accessor.py +81 -0
  223. maxframe/dataframe/accessors/plotting/__init__.py +40 -0
  224. maxframe/dataframe/accessors/plotting/core.py +78 -0
  225. maxframe/dataframe/accessors/plotting/tests/__init__.py +13 -0
  226. maxframe/dataframe/accessors/plotting/tests/test_plotting_accessor.py +136 -0
  227. maxframe/dataframe/accessors/string_/__init__.py +36 -0
  228. maxframe/dataframe/accessors/string_/accessor.py +215 -0
  229. maxframe/dataframe/accessors/string_/core.py +224 -0
  230. maxframe/dataframe/accessors/string_/tests/__init__.py +13 -0
  231. maxframe/dataframe/accessors/string_/tests/test_string_accessor.py +73 -0
  232. maxframe/dataframe/accessors/struct_/__init__.py +37 -0
  233. maxframe/dataframe/accessors/struct_/accessor.py +39 -0
  234. maxframe/dataframe/accessors/struct_/core.py +43 -0
  235. maxframe/dataframe/accessors/struct_/dtypes.py +53 -0
  236. maxframe/dataframe/accessors/struct_/field.py +123 -0
  237. maxframe/dataframe/accessors/struct_/tests/__init__.py +13 -0
  238. maxframe/dataframe/accessors/struct_/tests/test_struct_accessor.py +91 -0
  239. maxframe/dataframe/arithmetic/__init__.py +369 -0
  240. maxframe/dataframe/arithmetic/abs.py +33 -0
  241. maxframe/dataframe/arithmetic/add.py +60 -0
  242. maxframe/dataframe/arithmetic/arccos.py +28 -0
  243. maxframe/dataframe/arithmetic/arccosh.py +28 -0
  244. maxframe/dataframe/arithmetic/arcsin.py +28 -0
  245. maxframe/dataframe/arithmetic/arcsinh.py +28 -0
  246. maxframe/dataframe/arithmetic/arctan.py +28 -0
  247. maxframe/dataframe/arithmetic/arctanh.py +28 -0
  248. maxframe/dataframe/arithmetic/between.py +106 -0
  249. maxframe/dataframe/arithmetic/bitwise_and.py +46 -0
  250. maxframe/dataframe/arithmetic/bitwise_or.py +50 -0
  251. maxframe/dataframe/arithmetic/bitwise_xor.py +46 -0
  252. maxframe/dataframe/arithmetic/ceil.py +28 -0
  253. maxframe/dataframe/arithmetic/core.py +361 -0
  254. maxframe/dataframe/arithmetic/cos.py +28 -0
  255. maxframe/dataframe/arithmetic/cosh.py +28 -0
  256. maxframe/dataframe/arithmetic/degrees.py +28 -0
  257. maxframe/dataframe/arithmetic/docstring.py +416 -0
  258. maxframe/dataframe/arithmetic/dot.py +237 -0
  259. maxframe/dataframe/arithmetic/equal.py +58 -0
  260. maxframe/dataframe/arithmetic/exp.py +28 -0
  261. maxframe/dataframe/arithmetic/exp2.py +28 -0
  262. maxframe/dataframe/arithmetic/expm1.py +28 -0
  263. maxframe/dataframe/arithmetic/floor.py +28 -0
  264. maxframe/dataframe/arithmetic/floordiv.py +64 -0
  265. maxframe/dataframe/arithmetic/greater.py +59 -0
  266. maxframe/dataframe/arithmetic/greater_equal.py +59 -0
  267. maxframe/dataframe/arithmetic/invert.py +33 -0
  268. maxframe/dataframe/arithmetic/is_ufuncs.py +62 -0
  269. maxframe/dataframe/arithmetic/less.py +57 -0
  270. maxframe/dataframe/arithmetic/less_equal.py +59 -0
  271. maxframe/dataframe/arithmetic/log.py +28 -0
  272. maxframe/dataframe/arithmetic/log10.py +28 -0
  273. maxframe/dataframe/arithmetic/log2.py +28 -0
  274. maxframe/dataframe/arithmetic/mod.py +60 -0
  275. maxframe/dataframe/arithmetic/multiply.py +60 -0
  276. maxframe/dataframe/arithmetic/negative.py +33 -0
  277. maxframe/dataframe/arithmetic/not_equal.py +58 -0
  278. maxframe/dataframe/arithmetic/power.py +68 -0
  279. maxframe/dataframe/arithmetic/radians.py +28 -0
  280. maxframe/dataframe/arithmetic/round.py +144 -0
  281. maxframe/dataframe/arithmetic/sin.py +28 -0
  282. maxframe/dataframe/arithmetic/sinh.py +28 -0
  283. maxframe/dataframe/arithmetic/sqrt.py +28 -0
  284. maxframe/dataframe/arithmetic/subtract.py +64 -0
  285. maxframe/dataframe/arithmetic/tan.py +28 -0
  286. maxframe/dataframe/arithmetic/tanh.py +28 -0
  287. maxframe/dataframe/arithmetic/tests/__init__.py +13 -0
  288. maxframe/dataframe/arithmetic/tests/test_arithmetic.py +724 -0
  289. maxframe/dataframe/arithmetic/truediv.py +64 -0
  290. maxframe/dataframe/arithmetic/trunc.py +28 -0
  291. maxframe/dataframe/arrays.py +864 -0
  292. maxframe/dataframe/core.py +2393 -0
  293. maxframe/dataframe/datasource/__init__.py +33 -0
  294. maxframe/dataframe/datasource/core.py +88 -0
  295. maxframe/dataframe/datasource/dataframe.py +59 -0
  296. maxframe/dataframe/datasource/date_range.py +512 -0
  297. maxframe/dataframe/datasource/from_dict.py +124 -0
  298. maxframe/dataframe/datasource/from_index.py +58 -0
  299. maxframe/dataframe/datasource/from_records.py +191 -0
  300. maxframe/dataframe/datasource/from_tensor.py +498 -0
  301. maxframe/dataframe/datasource/index.py +117 -0
  302. maxframe/dataframe/datasource/read_csv.py +533 -0
  303. maxframe/dataframe/datasource/read_odps_query.py +513 -0
  304. maxframe/dataframe/datasource/read_odps_table.py +273 -0
  305. maxframe/dataframe/datasource/read_parquet.py +426 -0
  306. maxframe/dataframe/datasource/series.py +55 -0
  307. maxframe/dataframe/datasource/tests/__init__.py +13 -0
  308. maxframe/dataframe/datasource/tests/test_datasource.py +663 -0
  309. maxframe/dataframe/datastore/__init__.py +30 -0
  310. maxframe/dataframe/datastore/core.py +19 -0
  311. maxframe/dataframe/datastore/tests/__init__.py +13 -0
  312. maxframe/dataframe/datastore/tests/test_to_odps.py +99 -0
  313. maxframe/dataframe/datastore/to_csv.py +219 -0
  314. maxframe/dataframe/datastore/to_odps.py +258 -0
  315. maxframe/dataframe/extensions/__init__.py +70 -0
  316. maxframe/dataframe/extensions/accessor.py +35 -0
  317. maxframe/dataframe/extensions/apply_chunk.py +733 -0
  318. maxframe/dataframe/extensions/cartesian_chunk.py +153 -0
  319. maxframe/dataframe/extensions/collect_kv.py +126 -0
  320. maxframe/dataframe/extensions/extract_kv.py +177 -0
  321. maxframe/dataframe/extensions/flatjson.py +132 -0
  322. maxframe/dataframe/extensions/flatmap.py +329 -0
  323. maxframe/dataframe/extensions/map_reduce.py +263 -0
  324. maxframe/dataframe/extensions/rebalance.py +62 -0
  325. maxframe/dataframe/extensions/reshuffle.py +83 -0
  326. maxframe/dataframe/extensions/tests/__init__.py +13 -0
  327. maxframe/dataframe/extensions/tests/test_apply_chunk.py +194 -0
  328. maxframe/dataframe/extensions/tests/test_extensions.py +198 -0
  329. maxframe/dataframe/extensions/tests/test_map_reduce.py +135 -0
  330. maxframe/dataframe/fetch/__init__.py +15 -0
  331. maxframe/dataframe/fetch/core.py +97 -0
  332. maxframe/dataframe/groupby/__init__.py +101 -0
  333. maxframe/dataframe/groupby/aggregation.py +437 -0
  334. maxframe/dataframe/groupby/apply.py +235 -0
  335. maxframe/dataframe/groupby/apply_chunk.py +409 -0
  336. maxframe/dataframe/groupby/core.py +326 -0
  337. maxframe/dataframe/groupby/cum.py +102 -0
  338. maxframe/dataframe/groupby/expanding.py +264 -0
  339. maxframe/dataframe/groupby/extensions.py +26 -0
  340. maxframe/dataframe/groupby/fill.py +149 -0
  341. maxframe/dataframe/groupby/getitem.py +105 -0
  342. maxframe/dataframe/groupby/head.py +115 -0
  343. maxframe/dataframe/groupby/rank.py +136 -0
  344. maxframe/dataframe/groupby/rolling.py +206 -0
  345. maxframe/dataframe/groupby/sample.py +214 -0
  346. maxframe/dataframe/groupby/shift.py +114 -0
  347. maxframe/dataframe/groupby/tests/__init__.py +13 -0
  348. maxframe/dataframe/groupby/tests/test_groupby.py +373 -0
  349. maxframe/dataframe/groupby/transform.py +264 -0
  350. maxframe/dataframe/indexing/__init__.py +103 -0
  351. maxframe/dataframe/indexing/add_prefix_suffix.py +110 -0
  352. maxframe/dataframe/indexing/align.py +350 -0
  353. maxframe/dataframe/indexing/at.py +83 -0
  354. maxframe/dataframe/indexing/droplevel.py +195 -0
  355. maxframe/dataframe/indexing/filter.py +169 -0
  356. maxframe/dataframe/indexing/get_level_values.py +76 -0
  357. maxframe/dataframe/indexing/getitem.py +205 -0
  358. maxframe/dataframe/indexing/iat.py +82 -0
  359. maxframe/dataframe/indexing/iloc.py +711 -0
  360. maxframe/dataframe/indexing/insert.py +90 -0
  361. maxframe/dataframe/indexing/loc.py +694 -0
  362. maxframe/dataframe/indexing/reindex.py +541 -0
  363. maxframe/dataframe/indexing/rename.py +445 -0
  364. maxframe/dataframe/indexing/rename_axis.py +217 -0
  365. maxframe/dataframe/indexing/reorder_levels.py +143 -0
  366. maxframe/dataframe/indexing/reset_index.py +427 -0
  367. maxframe/dataframe/indexing/sample.py +232 -0
  368. maxframe/dataframe/indexing/set_axis.py +197 -0
  369. maxframe/dataframe/indexing/set_index.py +128 -0
  370. maxframe/dataframe/indexing/setitem.py +133 -0
  371. maxframe/dataframe/indexing/swaplevel.py +185 -0
  372. maxframe/dataframe/indexing/take.py +99 -0
  373. maxframe/dataframe/indexing/tests/__init__.py +13 -0
  374. maxframe/dataframe/indexing/tests/test_indexing.py +488 -0
  375. maxframe/dataframe/indexing/truncate.py +140 -0
  376. maxframe/dataframe/indexing/where.py +300 -0
  377. maxframe/dataframe/indexing/xs.py +148 -0
  378. maxframe/dataframe/initializer.py +298 -0
  379. maxframe/dataframe/merge/__init__.py +50 -0
  380. maxframe/dataframe/merge/append.py +120 -0
  381. maxframe/dataframe/merge/combine_first.py +120 -0
  382. maxframe/dataframe/merge/compare.py +387 -0
  383. maxframe/dataframe/merge/concat.py +500 -0
  384. maxframe/dataframe/merge/merge.py +806 -0
  385. maxframe/dataframe/merge/tests/__init__.py +13 -0
  386. maxframe/dataframe/merge/tests/test_merge.py +390 -0
  387. maxframe/dataframe/merge/update.py +271 -0
  388. maxframe/dataframe/misc/__init__.py +131 -0
  389. maxframe/dataframe/misc/_duplicate.py +56 -0
  390. maxframe/dataframe/misc/apply.py +730 -0
  391. maxframe/dataframe/misc/astype.py +237 -0
  392. maxframe/dataframe/misc/case_when.py +145 -0
  393. maxframe/dataframe/misc/check_monotonic.py +84 -0
  394. maxframe/dataframe/misc/check_unique.py +51 -0
  395. maxframe/dataframe/misc/clip.py +145 -0
  396. maxframe/dataframe/misc/cut.py +386 -0
  397. maxframe/dataframe/misc/describe.py +278 -0
  398. maxframe/dataframe/misc/diff.py +210 -0
  399. maxframe/dataframe/misc/drop.py +442 -0
  400. maxframe/dataframe/misc/drop_duplicates.py +251 -0
  401. maxframe/dataframe/misc/duplicated.py +292 -0
  402. maxframe/dataframe/misc/eval.py +730 -0
  403. maxframe/dataframe/misc/explode.py +171 -0
  404. maxframe/dataframe/misc/get_dummies.py +241 -0
  405. maxframe/dataframe/misc/isin.py +220 -0
  406. maxframe/dataframe/misc/map.py +347 -0
  407. maxframe/dataframe/misc/memory_usage.py +248 -0
  408. maxframe/dataframe/misc/pct_change.py +68 -0
  409. maxframe/dataframe/misc/qcut.py +104 -0
  410. maxframe/dataframe/misc/rechunk.py +59 -0
  411. maxframe/dataframe/misc/select_dtypes.py +104 -0
  412. maxframe/dataframe/misc/shift.py +259 -0
  413. maxframe/dataframe/misc/tests/__init__.py +13 -0
  414. maxframe/dataframe/misc/tests/test_misc.py +615 -0
  415. maxframe/dataframe/misc/to_numeric.py +181 -0
  416. maxframe/dataframe/misc/transform.py +346 -0
  417. maxframe/dataframe/misc/transpose.py +148 -0
  418. maxframe/dataframe/misc/valid_index.py +115 -0
  419. maxframe/dataframe/misc/value_counts.py +206 -0
  420. maxframe/dataframe/missing/__init__.py +53 -0
  421. maxframe/dataframe/missing/checkna.py +230 -0
  422. maxframe/dataframe/missing/dropna.py +294 -0
  423. maxframe/dataframe/missing/fillna.py +283 -0
  424. maxframe/dataframe/missing/replace.py +446 -0
  425. maxframe/dataframe/missing/tests/__init__.py +13 -0
  426. maxframe/dataframe/missing/tests/test_missing.py +90 -0
  427. maxframe/dataframe/operators.py +231 -0
  428. maxframe/dataframe/reduction/__init__.py +124 -0
  429. maxframe/dataframe/reduction/aggregation.py +499 -0
  430. maxframe/dataframe/reduction/all.py +78 -0
  431. maxframe/dataframe/reduction/any.py +78 -0
  432. maxframe/dataframe/reduction/argmax.py +100 -0
  433. maxframe/dataframe/reduction/argmin.py +100 -0
  434. maxframe/dataframe/reduction/core.py +898 -0
  435. maxframe/dataframe/reduction/count.py +63 -0
  436. maxframe/dataframe/reduction/cov.py +166 -0
  437. maxframe/dataframe/reduction/cummax.py +30 -0
  438. maxframe/dataframe/reduction/cummin.py +30 -0
  439. maxframe/dataframe/reduction/cumprod.py +30 -0
  440. maxframe/dataframe/reduction/cumsum.py +30 -0
  441. maxframe/dataframe/reduction/custom_reduction.py +42 -0
  442. maxframe/dataframe/reduction/idxmax.py +185 -0
  443. maxframe/dataframe/reduction/idxmin.py +185 -0
  444. maxframe/dataframe/reduction/kurtosis.py +111 -0
  445. maxframe/dataframe/reduction/max.py +65 -0
  446. maxframe/dataframe/reduction/mean.py +63 -0
  447. maxframe/dataframe/reduction/median.py +56 -0
  448. maxframe/dataframe/reduction/min.py +65 -0
  449. maxframe/dataframe/reduction/nunique.py +142 -0
  450. maxframe/dataframe/reduction/prod.py +81 -0
  451. maxframe/dataframe/reduction/reduction_size.py +36 -0
  452. maxframe/dataframe/reduction/sem.py +73 -0
  453. maxframe/dataframe/reduction/skew.py +93 -0
  454. maxframe/dataframe/reduction/std.py +53 -0
  455. maxframe/dataframe/reduction/str_concat.py +51 -0
  456. maxframe/dataframe/reduction/sum.py +81 -0
  457. maxframe/dataframe/reduction/tests/__init__.py +13 -0
  458. maxframe/dataframe/reduction/tests/test_reduction.py +529 -0
  459. maxframe/dataframe/reduction/unique.py +153 -0
  460. maxframe/dataframe/reduction/var.py +76 -0
  461. maxframe/dataframe/reshape/__init__.py +38 -0
  462. maxframe/dataframe/reshape/melt.py +169 -0
  463. maxframe/dataframe/reshape/pivot.py +233 -0
  464. maxframe/dataframe/reshape/pivot_table.py +275 -0
  465. maxframe/dataframe/reshape/stack.py +240 -0
  466. maxframe/dataframe/reshape/unstack.py +114 -0
  467. maxframe/dataframe/sort/__init__.py +42 -0
  468. maxframe/dataframe/sort/argsort.py +62 -0
  469. maxframe/dataframe/sort/core.py +37 -0
  470. maxframe/dataframe/sort/nlargest.py +238 -0
  471. maxframe/dataframe/sort/nsmallest.py +228 -0
  472. maxframe/dataframe/sort/sort_index.py +153 -0
  473. maxframe/dataframe/sort/sort_values.py +301 -0
  474. maxframe/dataframe/sort/tests/__init__.py +13 -0
  475. maxframe/dataframe/sort/tests/test_sort.py +81 -0
  476. maxframe/dataframe/statistics/__init__.py +33 -0
  477. maxframe/dataframe/statistics/corr.py +284 -0
  478. maxframe/dataframe/statistics/quantile.py +338 -0
  479. maxframe/dataframe/statistics/tests/__init__.py +13 -0
  480. maxframe/dataframe/statistics/tests/test_statistics.py +82 -0
  481. maxframe/dataframe/tests/__init__.py +13 -0
  482. maxframe/dataframe/tests/test_initializer.py +60 -0
  483. maxframe/dataframe/tests/test_typing.py +104 -0
  484. maxframe/dataframe/tests/test_utils.py +165 -0
  485. maxframe/dataframe/tseries/__init__.py +13 -0
  486. maxframe/dataframe/tseries/tests/__init__.py +13 -0
  487. maxframe/dataframe/tseries/tests/test_tseries.py +30 -0
  488. maxframe/dataframe/tseries/to_datetime.py +299 -0
  489. maxframe/dataframe/typing_.py +185 -0
  490. maxframe/dataframe/ufunc/__init__.py +27 -0
  491. maxframe/dataframe/ufunc/tensor.py +54 -0
  492. maxframe/dataframe/ufunc/ufunc.py +53 -0
  493. maxframe/dataframe/utils.py +1647 -0
  494. maxframe/dataframe/window/__init__.py +29 -0
  495. maxframe/dataframe/window/aggregation.py +100 -0
  496. maxframe/dataframe/window/core.py +82 -0
  497. maxframe/dataframe/window/ewm.py +247 -0
  498. maxframe/dataframe/window/expanding.py +151 -0
  499. maxframe/dataframe/window/rolling.py +389 -0
  500. maxframe/dataframe/window/tests/__init__.py +13 -0
  501. maxframe/dataframe/window/tests/test_ewm.py +70 -0
  502. maxframe/dataframe/window/tests/test_expanding.py +60 -0
  503. maxframe/dataframe/window/tests/test_rolling.py +57 -0
  504. maxframe/env.py +37 -0
  505. maxframe/errors.py +47 -0
  506. maxframe/extension.py +107 -0
  507. maxframe/io/__init__.py +13 -0
  508. maxframe/io/objects/__init__.py +24 -0
  509. maxframe/io/objects/core.py +156 -0
  510. maxframe/io/objects/tensor.py +132 -0
  511. maxframe/io/objects/tests/__init__.py +13 -0
  512. maxframe/io/objects/tests/test_object_io.py +79 -0
  513. maxframe/io/odpsio/__init__.py +23 -0
  514. maxframe/io/odpsio/arrow.py +161 -0
  515. maxframe/io/odpsio/schema.py +496 -0
  516. maxframe/io/odpsio/tableio.py +727 -0
  517. maxframe/io/odpsio/tests/__init__.py +13 -0
  518. maxframe/io/odpsio/tests/test_arrow.py +132 -0
  519. maxframe/io/odpsio/tests/test_schema.py +580 -0
  520. maxframe/io/odpsio/tests/test_tableio.py +205 -0
  521. maxframe/io/odpsio/tests/test_volumeio.py +75 -0
  522. maxframe/io/odpsio/volumeio.py +102 -0
  523. maxframe/learn/__init__.py +25 -0
  524. maxframe/learn/cluster/__init__.py +15 -0
  525. maxframe/learn/cluster/_kmeans.py +782 -0
  526. maxframe/learn/contrib/__init__.py +17 -0
  527. maxframe/learn/contrib/graph/__init__.py +15 -0
  528. maxframe/learn/contrib/graph/connected_components.py +216 -0
  529. maxframe/learn/contrib/graph/tests/__init__.py +13 -0
  530. maxframe/learn/contrib/graph/tests/test_connected_components.py +53 -0
  531. maxframe/learn/contrib/lightgbm/__init__.py +33 -0
  532. maxframe/learn/contrib/lightgbm/_predict.py +138 -0
  533. maxframe/learn/contrib/lightgbm/_train.py +163 -0
  534. maxframe/learn/contrib/lightgbm/callback.py +114 -0
  535. maxframe/learn/contrib/lightgbm/classifier.py +199 -0
  536. maxframe/learn/contrib/lightgbm/core.py +372 -0
  537. maxframe/learn/contrib/lightgbm/dataset.py +153 -0
  538. maxframe/learn/contrib/lightgbm/regressor.py +29 -0
  539. maxframe/learn/contrib/lightgbm/tests/__init__.py +13 -0
  540. maxframe/learn/contrib/lightgbm/tests/test_callback.py +58 -0
  541. maxframe/learn/contrib/llm/__init__.py +17 -0
  542. maxframe/learn/contrib/llm/core.py +77 -0
  543. maxframe/learn/contrib/llm/models/__init__.py +15 -0
  544. maxframe/learn/contrib/llm/models/dashscope.py +108 -0
  545. maxframe/learn/contrib/llm/models/managed.py +54 -0
  546. maxframe/learn/contrib/llm/multi_modal.py +135 -0
  547. maxframe/learn/contrib/llm/text.py +302 -0
  548. maxframe/learn/contrib/models.py +106 -0
  549. maxframe/learn/contrib/pytorch/__init__.py +16 -0
  550. maxframe/learn/contrib/pytorch/run_function.py +110 -0
  551. maxframe/learn/contrib/pytorch/run_script.py +102 -0
  552. maxframe/learn/contrib/pytorch/tests/__init__.py +13 -0
  553. maxframe/learn/contrib/pytorch/tests/test_pytorch.py +42 -0
  554. maxframe/learn/contrib/utils.py +108 -0
  555. maxframe/learn/contrib/xgboost/__init__.py +33 -0
  556. maxframe/learn/contrib/xgboost/callback.py +86 -0
  557. maxframe/learn/contrib/xgboost/classifier.py +117 -0
  558. maxframe/learn/contrib/xgboost/core.py +445 -0
  559. maxframe/learn/contrib/xgboost/dmatrix.py +157 -0
  560. maxframe/learn/contrib/xgboost/predict.py +131 -0
  561. maxframe/learn/contrib/xgboost/regressor.py +86 -0
  562. maxframe/learn/contrib/xgboost/tests/__init__.py +13 -0
  563. maxframe/learn/contrib/xgboost/tests/test_callback.py +41 -0
  564. maxframe/learn/contrib/xgboost/tests/test_core.py +43 -0
  565. maxframe/learn/contrib/xgboost/train.py +179 -0
  566. maxframe/learn/core.py +344 -0
  567. maxframe/learn/datasets/__init__.py +20 -0
  568. maxframe/learn/datasets/samples_generator.py +628 -0
  569. maxframe/learn/linear_model/__init__.py +15 -0
  570. maxframe/learn/linear_model/_base.py +220 -0
  571. maxframe/learn/linear_model/_lin_reg.py +175 -0
  572. maxframe/learn/metrics/__init__.py +31 -0
  573. maxframe/learn/metrics/_check_targets.py +95 -0
  574. maxframe/learn/metrics/_classification.py +1266 -0
  575. maxframe/learn/metrics/_ranking.py +477 -0
  576. maxframe/learn/metrics/_regression.py +256 -0
  577. maxframe/learn/metrics/_scorer.py +60 -0
  578. maxframe/learn/metrics/pairwise/__init__.py +21 -0
  579. maxframe/learn/metrics/pairwise/core.py +77 -0
  580. maxframe/learn/metrics/pairwise/cosine.py +115 -0
  581. maxframe/learn/metrics/pairwise/euclidean.py +176 -0
  582. maxframe/learn/metrics/pairwise/haversine.py +96 -0
  583. maxframe/learn/metrics/pairwise/manhattan.py +80 -0
  584. maxframe/learn/metrics/pairwise/pairwise.py +127 -0
  585. maxframe/learn/metrics/pairwise/pairwise_distances_topk.py +121 -0
  586. maxframe/learn/metrics/pairwise/rbf_kernel.py +51 -0
  587. maxframe/learn/metrics/tests/__init__.py +13 -0
  588. maxframe/learn/metrics/tests/test_scorer.py +26 -0
  589. maxframe/learn/model_selection/__init__.py +15 -0
  590. maxframe/learn/model_selection/_split.py +451 -0
  591. maxframe/learn/model_selection/tests/__init__.py +13 -0
  592. maxframe/learn/model_selection/tests/test_split.py +156 -0
  593. maxframe/learn/preprocessing/__init__.py +16 -0
  594. maxframe/learn/preprocessing/_data/__init__.py +17 -0
  595. maxframe/learn/preprocessing/_data/min_max_scaler.py +390 -0
  596. maxframe/learn/preprocessing/_data/normalize.py +127 -0
  597. maxframe/learn/preprocessing/_data/standard_scaler.py +503 -0
  598. maxframe/learn/preprocessing/_data/utils.py +79 -0
  599. maxframe/learn/preprocessing/_label/__init__.py +16 -0
  600. maxframe/learn/preprocessing/_label/_label_binarizer.py +599 -0
  601. maxframe/learn/preprocessing/_label/_label_encoder.py +174 -0
  602. maxframe/learn/utils/__init__.py +19 -0
  603. maxframe/learn/utils/_encode.py +314 -0
  604. maxframe/learn/utils/checks.py +160 -0
  605. maxframe/learn/utils/core.py +121 -0
  606. maxframe/learn/utils/extmath.py +213 -0
  607. maxframe/learn/utils/multiclass.py +292 -0
  608. maxframe/learn/utils/odpsio.py +193 -0
  609. maxframe/learn/utils/shuffle.py +114 -0
  610. maxframe/learn/utils/sparsefuncs.py +87 -0
  611. maxframe/learn/utils/validation.py +775 -0
  612. maxframe/lib/__init__.py +13 -0
  613. maxframe/lib/aio/__init__.py +27 -0
  614. maxframe/lib/aio/_runners.py +162 -0
  615. maxframe/lib/aio/_threads.py +35 -0
  616. maxframe/lib/aio/base.py +82 -0
  617. maxframe/lib/aio/file.py +85 -0
  618. maxframe/lib/aio/isolation.py +100 -0
  619. maxframe/lib/aio/lru.py +242 -0
  620. maxframe/lib/aio/parallelism.py +37 -0
  621. maxframe/lib/aio/tests/__init__.py +13 -0
  622. maxframe/lib/aio/tests/test_aio_file.py +55 -0
  623. maxframe/lib/compat.py +185 -0
  624. maxframe/lib/compression.py +55 -0
  625. maxframe/lib/cython/__init__.py +13 -0
  626. maxframe/lib/cython/libcpp.pxd +30 -0
  627. maxframe/lib/dtypes_extension/__init__.py +30 -0
  628. maxframe/lib/dtypes_extension/_fake_arrow_dtype.py +604 -0
  629. maxframe/lib/dtypes_extension/blob.py +304 -0
  630. maxframe/lib/dtypes_extension/dtypes.py +106 -0
  631. maxframe/lib/dtypes_extension/tests/__init__.py +13 -0
  632. maxframe/lib/dtypes_extension/tests/test_blob.py +88 -0
  633. maxframe/lib/dtypes_extension/tests/test_dtypes.py +63 -0
  634. maxframe/lib/dtypes_extension/tests/test_fake_arrow_dtype.py +75 -0
  635. maxframe/lib/filesystem/__init__.py +21 -0
  636. maxframe/lib/filesystem/_glob.py +173 -0
  637. maxframe/lib/filesystem/_oss_lib/__init__.py +13 -0
  638. maxframe/lib/filesystem/_oss_lib/common.py +270 -0
  639. maxframe/lib/filesystem/_oss_lib/glob.py +147 -0
  640. maxframe/lib/filesystem/_oss_lib/handle.py +152 -0
  641. maxframe/lib/filesystem/arrow.py +236 -0
  642. maxframe/lib/filesystem/base.py +263 -0
  643. maxframe/lib/filesystem/core.py +95 -0
  644. maxframe/lib/filesystem/fsmap.py +164 -0
  645. maxframe/lib/filesystem/hdfs.py +31 -0
  646. maxframe/lib/filesystem/local.py +112 -0
  647. maxframe/lib/filesystem/oss.py +226 -0
  648. maxframe/lib/filesystem/tests/__init__.py +13 -0
  649. maxframe/lib/filesystem/tests/test_filesystem.py +225 -0
  650. maxframe/lib/filesystem/tests/test_oss.py +220 -0
  651. maxframe/lib/functools_compat.py +81 -0
  652. maxframe/lib/mmh3.cpython-312-darwin.so +0 -0
  653. maxframe/lib/mmh3.pyi +43 -0
  654. maxframe/lib/mmh3_src/MurmurHash3.cpp +339 -0
  655. maxframe/lib/mmh3_src/MurmurHash3.h +43 -0
  656. maxframe/lib/mmh3_src/mmh3module.cpp +387 -0
  657. maxframe/lib/sparse/__init__.py +856 -0
  658. maxframe/lib/sparse/array.py +1616 -0
  659. maxframe/lib/sparse/core.py +90 -0
  660. maxframe/lib/sparse/linalg.py +31 -0
  661. maxframe/lib/sparse/matrix.py +244 -0
  662. maxframe/lib/sparse/tests/__init__.py +13 -0
  663. maxframe/lib/sparse/tests/test_sparse.py +476 -0
  664. maxframe/lib/sparse/vector.py +148 -0
  665. maxframe/lib/tblib/LICENSE +20 -0
  666. maxframe/lib/tblib/__init__.py +327 -0
  667. maxframe/lib/tblib/cpython.py +83 -0
  668. maxframe/lib/tblib/decorators.py +44 -0
  669. maxframe/lib/tblib/pickling_support.py +90 -0
  670. maxframe/lib/tests/__init__.py +13 -0
  671. maxframe/lib/tests/test_wrapped_pickle.py +51 -0
  672. maxframe/lib/version.py +620 -0
  673. maxframe/lib/wrapped_pickle.py +150 -0
  674. maxframe/mixin.py +157 -0
  675. maxframe/opcodes.py +649 -0
  676. maxframe/protocol.py +607 -0
  677. maxframe/remote/__init__.py +18 -0
  678. maxframe/remote/core.py +208 -0
  679. maxframe/remote/run_script.py +124 -0
  680. maxframe/serialization/__init__.py +39 -0
  681. maxframe/serialization/arrow.py +120 -0
  682. maxframe/serialization/blob.py +32 -0
  683. maxframe/serialization/core.cpython-312-darwin.so +0 -0
  684. maxframe/serialization/core.pxd +50 -0
  685. maxframe/serialization/core.pyi +66 -0
  686. maxframe/serialization/core.pyx +1265 -0
  687. maxframe/serialization/exception.py +84 -0
  688. maxframe/serialization/maxframe_objects.py +39 -0
  689. maxframe/serialization/numpy.py +110 -0
  690. maxframe/serialization/pandas.py +278 -0
  691. maxframe/serialization/scipy.py +71 -0
  692. maxframe/serialization/serializables/__init__.py +55 -0
  693. maxframe/serialization/serializables/core.py +469 -0
  694. maxframe/serialization/serializables/field.py +624 -0
  695. maxframe/serialization/serializables/field_type.py +592 -0
  696. maxframe/serialization/serializables/tests/__init__.py +13 -0
  697. maxframe/serialization/serializables/tests/test_field_type.py +119 -0
  698. maxframe/serialization/serializables/tests/test_serializable.py +313 -0
  699. maxframe/serialization/tests/__init__.py +13 -0
  700. maxframe/serialization/tests/test_serial.py +487 -0
  701. maxframe/session.py +1250 -0
  702. maxframe/sperunner.py +165 -0
  703. maxframe/tensor/__init__.py +325 -0
  704. maxframe/tensor/arithmetic/__init__.py +322 -0
  705. maxframe/tensor/arithmetic/abs.py +66 -0
  706. maxframe/tensor/arithmetic/absolute.py +66 -0
  707. maxframe/tensor/arithmetic/add.py +112 -0
  708. maxframe/tensor/arithmetic/angle.py +70 -0
  709. maxframe/tensor/arithmetic/arccos.py +101 -0
  710. maxframe/tensor/arithmetic/arccosh.py +89 -0
  711. maxframe/tensor/arithmetic/arcsin.py +92 -0
  712. maxframe/tensor/arithmetic/arcsinh.py +84 -0
  713. maxframe/tensor/arithmetic/arctan.py +104 -0
  714. maxframe/tensor/arithmetic/arctan2.py +126 -0
  715. maxframe/tensor/arithmetic/arctanh.py +84 -0
  716. maxframe/tensor/arithmetic/around.py +112 -0
  717. maxframe/tensor/arithmetic/bitand.py +93 -0
  718. maxframe/tensor/arithmetic/bitor.py +100 -0
  719. maxframe/tensor/arithmetic/bitxor.py +93 -0
  720. maxframe/tensor/arithmetic/cbrt.py +64 -0
  721. maxframe/tensor/arithmetic/ceil.py +69 -0
  722. maxframe/tensor/arithmetic/clip.py +165 -0
  723. maxframe/tensor/arithmetic/conj.py +72 -0
  724. maxframe/tensor/arithmetic/copysign.py +76 -0
  725. maxframe/tensor/arithmetic/core.py +552 -0
  726. maxframe/tensor/arithmetic/cos.py +83 -0
  727. maxframe/tensor/arithmetic/cosh.py +70 -0
  728. maxframe/tensor/arithmetic/deg2rad.py +70 -0
  729. maxframe/tensor/arithmetic/degrees.py +75 -0
  730. maxframe/tensor/arithmetic/divide.py +112 -0
  731. maxframe/tensor/arithmetic/equal.py +74 -0
  732. maxframe/tensor/arithmetic/exp.py +104 -0
  733. maxframe/tensor/arithmetic/exp2.py +65 -0
  734. maxframe/tensor/arithmetic/expm1.py +77 -0
  735. maxframe/tensor/arithmetic/fabs.py +72 -0
  736. maxframe/tensor/arithmetic/fix.py +67 -0
  737. maxframe/tensor/arithmetic/float_power.py +101 -0
  738. maxframe/tensor/arithmetic/floor.py +75 -0
  739. maxframe/tensor/arithmetic/floordiv.py +92 -0
  740. maxframe/tensor/arithmetic/fmax.py +103 -0
  741. maxframe/tensor/arithmetic/fmin.py +104 -0
  742. maxframe/tensor/arithmetic/fmod.py +97 -0
  743. maxframe/tensor/arithmetic/frexp.py +96 -0
  744. maxframe/tensor/arithmetic/greater.py +75 -0
  745. maxframe/tensor/arithmetic/greater_equal.py +67 -0
  746. maxframe/tensor/arithmetic/hypot.py +75 -0
  747. maxframe/tensor/arithmetic/i0.py +87 -0
  748. maxframe/tensor/arithmetic/imag.py +65 -0
  749. maxframe/tensor/arithmetic/invert.py +108 -0
  750. maxframe/tensor/arithmetic/isclose.py +114 -0
  751. maxframe/tensor/arithmetic/iscomplex.py +62 -0
  752. maxframe/tensor/arithmetic/iscomplexobj.py +53 -0
  753. maxframe/tensor/arithmetic/isfinite.py +104 -0
  754. maxframe/tensor/arithmetic/isinf.py +101 -0
  755. maxframe/tensor/arithmetic/isnan.py +80 -0
  756. maxframe/tensor/arithmetic/isreal.py +61 -0
  757. maxframe/tensor/arithmetic/ldexp.py +97 -0
  758. maxframe/tensor/arithmetic/less.py +67 -0
  759. maxframe/tensor/arithmetic/less_equal.py +67 -0
  760. maxframe/tensor/arithmetic/log.py +90 -0
  761. maxframe/tensor/arithmetic/log10.py +83 -0
  762. maxframe/tensor/arithmetic/log1p.py +93 -0
  763. maxframe/tensor/arithmetic/log2.py +83 -0
  764. maxframe/tensor/arithmetic/logaddexp.py +78 -0
  765. maxframe/tensor/arithmetic/logaddexp2.py +76 -0
  766. maxframe/tensor/arithmetic/logical_and.py +79 -0
  767. maxframe/tensor/arithmetic/logical_not.py +72 -0
  768. maxframe/tensor/arithmetic/logical_or.py +80 -0
  769. maxframe/tensor/arithmetic/logical_xor.py +86 -0
  770. maxframe/tensor/arithmetic/lshift.py +80 -0
  771. maxframe/tensor/arithmetic/maximum.py +106 -0
  772. maxframe/tensor/arithmetic/minimum.py +106 -0
  773. maxframe/tensor/arithmetic/mod.py +102 -0
  774. maxframe/tensor/arithmetic/modf.py +87 -0
  775. maxframe/tensor/arithmetic/multiply.py +114 -0
  776. maxframe/tensor/arithmetic/nan_to_num.py +97 -0
  777. maxframe/tensor/arithmetic/negative.py +63 -0
  778. maxframe/tensor/arithmetic/nextafter.py +66 -0
  779. maxframe/tensor/arithmetic/not_equal.py +70 -0
  780. maxframe/tensor/arithmetic/positive.py +45 -0
  781. maxframe/tensor/arithmetic/power.py +104 -0
  782. maxframe/tensor/arithmetic/rad2deg.py +69 -0
  783. maxframe/tensor/arithmetic/radians.py +75 -0
  784. maxframe/tensor/arithmetic/real.py +68 -0
  785. maxframe/tensor/arithmetic/reciprocal.py +78 -0
  786. maxframe/tensor/arithmetic/rint.py +66 -0
  787. maxframe/tensor/arithmetic/rshift.py +79 -0
  788. maxframe/tensor/arithmetic/setimag.py +27 -0
  789. maxframe/tensor/arithmetic/setreal.py +27 -0
  790. maxframe/tensor/arithmetic/sign.py +79 -0
  791. maxframe/tensor/arithmetic/signbit.py +63 -0
  792. maxframe/tensor/arithmetic/sin.py +96 -0
  793. maxframe/tensor/arithmetic/sinc.py +100 -0
  794. maxframe/tensor/arithmetic/sinh.py +91 -0
  795. maxframe/tensor/arithmetic/spacing.py +70 -0
  796. maxframe/tensor/arithmetic/sqrt.py +79 -0
  797. maxframe/tensor/arithmetic/square.py +67 -0
  798. maxframe/tensor/arithmetic/subtract.py +83 -0
  799. maxframe/tensor/arithmetic/tan.py +86 -0
  800. maxframe/tensor/arithmetic/tanh.py +90 -0
  801. maxframe/tensor/arithmetic/tests/__init__.py +13 -0
  802. maxframe/tensor/arithmetic/tests/test_arithmetic.py +458 -0
  803. maxframe/tensor/arithmetic/truediv.py +102 -0
  804. maxframe/tensor/arithmetic/trunc.py +70 -0
  805. maxframe/tensor/arithmetic/utils.py +91 -0
  806. maxframe/tensor/array_utils.py +164 -0
  807. maxframe/tensor/core.py +594 -0
  808. maxframe/tensor/datasource/__init__.py +40 -0
  809. maxframe/tensor/datasource/arange.py +154 -0
  810. maxframe/tensor/datasource/array.py +399 -0
  811. maxframe/tensor/datasource/core.py +114 -0
  812. maxframe/tensor/datasource/diag.py +140 -0
  813. maxframe/tensor/datasource/diagflat.py +69 -0
  814. maxframe/tensor/datasource/empty.py +167 -0
  815. maxframe/tensor/datasource/eye.py +95 -0
  816. maxframe/tensor/datasource/from_dataframe.py +68 -0
  817. maxframe/tensor/datasource/from_dense.py +37 -0
  818. maxframe/tensor/datasource/from_sparse.py +45 -0
  819. maxframe/tensor/datasource/full.py +184 -0
  820. maxframe/tensor/datasource/identity.py +54 -0
  821. maxframe/tensor/datasource/indices.py +115 -0
  822. maxframe/tensor/datasource/linspace.py +140 -0
  823. maxframe/tensor/datasource/meshgrid.py +135 -0
  824. maxframe/tensor/datasource/ones.py +178 -0
  825. maxframe/tensor/datasource/scalar.py +40 -0
  826. maxframe/tensor/datasource/tests/__init__.py +13 -0
  827. maxframe/tensor/datasource/tests/test_datasource.py +310 -0
  828. maxframe/tensor/datasource/tri_array.py +107 -0
  829. maxframe/tensor/datasource/zeros.py +192 -0
  830. maxframe/tensor/extensions/__init__.py +33 -0
  831. maxframe/tensor/extensions/accessor.py +25 -0
  832. maxframe/tensor/extensions/apply_chunk.py +137 -0
  833. maxframe/tensor/extensions/rebalance.py +65 -0
  834. maxframe/tensor/fetch/__init__.py +15 -0
  835. maxframe/tensor/fetch/core.py +54 -0
  836. maxframe/tensor/fft/__init__.py +32 -0
  837. maxframe/tensor/fft/core.py +168 -0
  838. maxframe/tensor/fft/fft.py +112 -0
  839. maxframe/tensor/fft/fft2.py +118 -0
  840. maxframe/tensor/fft/fftfreq.py +80 -0
  841. maxframe/tensor/fft/fftn.py +123 -0
  842. maxframe/tensor/fft/fftshift.py +79 -0
  843. maxframe/tensor/fft/hfft.py +112 -0
  844. maxframe/tensor/fft/ifft.py +114 -0
  845. maxframe/tensor/fft/ifft2.py +115 -0
  846. maxframe/tensor/fft/ifftn.py +123 -0
  847. maxframe/tensor/fft/ifftshift.py +73 -0
  848. maxframe/tensor/fft/ihfft.py +93 -0
  849. maxframe/tensor/fft/irfft.py +118 -0
  850. maxframe/tensor/fft/irfft2.py +62 -0
  851. maxframe/tensor/fft/irfftn.py +114 -0
  852. maxframe/tensor/fft/rfft.py +116 -0
  853. maxframe/tensor/fft/rfft2.py +63 -0
  854. maxframe/tensor/fft/rfftfreq.py +87 -0
  855. maxframe/tensor/fft/rfftn.py +113 -0
  856. maxframe/tensor/indexing/__init__.py +47 -0
  857. maxframe/tensor/indexing/choose.py +198 -0
  858. maxframe/tensor/indexing/compress.py +122 -0
  859. maxframe/tensor/indexing/core.py +190 -0
  860. maxframe/tensor/indexing/extract.py +69 -0
  861. maxframe/tensor/indexing/fill_diagonal.py +180 -0
  862. maxframe/tensor/indexing/flatnonzero.py +58 -0
  863. maxframe/tensor/indexing/getitem.py +144 -0
  864. maxframe/tensor/indexing/nonzero.py +118 -0
  865. maxframe/tensor/indexing/setitem.py +142 -0
  866. maxframe/tensor/indexing/slice.py +32 -0
  867. maxframe/tensor/indexing/take.py +128 -0
  868. maxframe/tensor/indexing/tests/__init__.py +13 -0
  869. maxframe/tensor/indexing/tests/test_indexing.py +232 -0
  870. maxframe/tensor/indexing/unravel_index.py +103 -0
  871. maxframe/tensor/lib/__init__.py +16 -0
  872. maxframe/tensor/lib/index_tricks.py +404 -0
  873. maxframe/tensor/linalg/__init__.py +43 -0
  874. maxframe/tensor/linalg/_einsumfunc.py +1025 -0
  875. maxframe/tensor/linalg/cholesky.py +117 -0
  876. maxframe/tensor/linalg/dot.py +145 -0
  877. maxframe/tensor/linalg/einsum.py +339 -0
  878. maxframe/tensor/linalg/inner.py +36 -0
  879. maxframe/tensor/linalg/inv.py +83 -0
  880. maxframe/tensor/linalg/lstsq.py +100 -0
  881. maxframe/tensor/linalg/lu.py +115 -0
  882. maxframe/tensor/linalg/matmul.py +225 -0
  883. maxframe/tensor/linalg/matrix_norm.py +75 -0
  884. maxframe/tensor/linalg/norm.py +249 -0
  885. maxframe/tensor/linalg/qr.py +124 -0
  886. maxframe/tensor/linalg/solve.py +72 -0
  887. maxframe/tensor/linalg/solve_triangular.py +103 -0
  888. maxframe/tensor/linalg/svd.py +167 -0
  889. maxframe/tensor/linalg/tensordot.py +213 -0
  890. maxframe/tensor/linalg/vdot.py +73 -0
  891. maxframe/tensor/linalg/vector_norm.py +113 -0
  892. maxframe/tensor/merge/__init__.py +21 -0
  893. maxframe/tensor/merge/append.py +74 -0
  894. maxframe/tensor/merge/column_stack.py +63 -0
  895. maxframe/tensor/merge/concatenate.py +103 -0
  896. maxframe/tensor/merge/dstack.py +71 -0
  897. maxframe/tensor/merge/hstack.py +70 -0
  898. maxframe/tensor/merge/stack.py +130 -0
  899. maxframe/tensor/merge/tests/__init__.py +13 -0
  900. maxframe/tensor/merge/tests/test_merge.py +79 -0
  901. maxframe/tensor/merge/vstack.py +74 -0
  902. maxframe/tensor/misc/__init__.py +72 -0
  903. maxframe/tensor/misc/argwhere.py +72 -0
  904. maxframe/tensor/misc/array_split.py +46 -0
  905. maxframe/tensor/misc/astype.py +121 -0
  906. maxframe/tensor/misc/atleast_1d.py +72 -0
  907. maxframe/tensor/misc/atleast_2d.py +70 -0
  908. maxframe/tensor/misc/atleast_3d.py +85 -0
  909. maxframe/tensor/misc/broadcast_arrays.py +57 -0
  910. maxframe/tensor/misc/broadcast_to.py +89 -0
  911. maxframe/tensor/misc/copy.py +64 -0
  912. maxframe/tensor/misc/copyto.py +130 -0
  913. maxframe/tensor/misc/delete.py +104 -0
  914. maxframe/tensor/misc/diff.py +115 -0
  915. maxframe/tensor/misc/dsplit.py +68 -0
  916. maxframe/tensor/misc/ediff1d.py +74 -0
  917. maxframe/tensor/misc/expand_dims.py +85 -0
  918. maxframe/tensor/misc/flatten.py +63 -0
  919. maxframe/tensor/misc/flip.py +90 -0
  920. maxframe/tensor/misc/fliplr.py +64 -0
  921. maxframe/tensor/misc/flipud.py +68 -0
  922. maxframe/tensor/misc/hsplit.py +85 -0
  923. maxframe/tensor/misc/in1d.py +94 -0
  924. maxframe/tensor/misc/insert.py +139 -0
  925. maxframe/tensor/misc/isin.py +130 -0
  926. maxframe/tensor/misc/moveaxis.py +83 -0
  927. maxframe/tensor/misc/ndim.py +53 -0
  928. maxframe/tensor/misc/ravel.py +90 -0
  929. maxframe/tensor/misc/repeat.py +129 -0
  930. maxframe/tensor/misc/result_type.py +88 -0
  931. maxframe/tensor/misc/roll.py +124 -0
  932. maxframe/tensor/misc/rollaxis.py +77 -0
  933. maxframe/tensor/misc/searchsorted.py +147 -0
  934. maxframe/tensor/misc/setdiff1d.py +58 -0
  935. maxframe/tensor/misc/shape.py +89 -0
  936. maxframe/tensor/misc/split.py +190 -0
  937. maxframe/tensor/misc/squeeze.py +117 -0
  938. maxframe/tensor/misc/swapaxes.py +113 -0
  939. maxframe/tensor/misc/tests/__init__.py +13 -0
  940. maxframe/tensor/misc/tests/test_misc.py +112 -0
  941. maxframe/tensor/misc/tile.py +109 -0
  942. maxframe/tensor/misc/transpose.py +133 -0
  943. maxframe/tensor/misc/trapezoid.py +123 -0
  944. maxframe/tensor/misc/unique.py +205 -0
  945. maxframe/tensor/misc/vsplit.py +74 -0
  946. maxframe/tensor/misc/where.py +129 -0
  947. maxframe/tensor/operators.py +83 -0
  948. maxframe/tensor/random/__init__.py +166 -0
  949. maxframe/tensor/random/beta.py +87 -0
  950. maxframe/tensor/random/binomial.py +135 -0
  951. maxframe/tensor/random/bytes.py +37 -0
  952. maxframe/tensor/random/chisquare.py +108 -0
  953. maxframe/tensor/random/choice.py +187 -0
  954. maxframe/tensor/random/core.py +249 -0
  955. maxframe/tensor/random/dirichlet.py +121 -0
  956. maxframe/tensor/random/exponential.py +92 -0
  957. maxframe/tensor/random/f.py +133 -0
  958. maxframe/tensor/random/gamma.py +126 -0
  959. maxframe/tensor/random/geometric.py +91 -0
  960. maxframe/tensor/random/gumbel.py +165 -0
  961. maxframe/tensor/random/hypergeometric.py +146 -0
  962. maxframe/tensor/random/laplace.py +131 -0
  963. maxframe/tensor/random/logistic.py +127 -0
  964. maxframe/tensor/random/lognormal.py +157 -0
  965. maxframe/tensor/random/logseries.py +120 -0
  966. maxframe/tensor/random/multinomial.py +131 -0
  967. maxframe/tensor/random/multivariate_normal.py +190 -0
  968. maxframe/tensor/random/negative_binomial.py +123 -0
  969. maxframe/tensor/random/noncentral_chisquare.py +130 -0
  970. maxframe/tensor/random/noncentral_f.py +124 -0
  971. maxframe/tensor/random/normal.py +141 -0
  972. maxframe/tensor/random/pareto.py +138 -0
  973. maxframe/tensor/random/permutation.py +107 -0
  974. maxframe/tensor/random/poisson.py +109 -0
  975. maxframe/tensor/random/power.py +140 -0
  976. maxframe/tensor/random/rand.py +80 -0
  977. maxframe/tensor/random/randint.py +119 -0
  978. maxframe/tensor/random/randn.py +94 -0
  979. maxframe/tensor/random/random_integers.py +121 -0
  980. maxframe/tensor/random/random_sample.py +84 -0
  981. maxframe/tensor/random/rayleigh.py +108 -0
  982. maxframe/tensor/random/shuffle.py +61 -0
  983. maxframe/tensor/random/standard_cauchy.py +103 -0
  984. maxframe/tensor/random/standard_exponential.py +70 -0
  985. maxframe/tensor/random/standard_gamma.py +118 -0
  986. maxframe/tensor/random/standard_normal.py +72 -0
  987. maxframe/tensor/random/standard_t.py +133 -0
  988. maxframe/tensor/random/tests/__init__.py +13 -0
  989. maxframe/tensor/random/tests/test_random.py +165 -0
  990. maxframe/tensor/random/triangular.py +117 -0
  991. maxframe/tensor/random/uniform.py +129 -0
  992. maxframe/tensor/random/vonmises.py +129 -0
  993. maxframe/tensor/random/wald.py +112 -0
  994. maxframe/tensor/random/weibull.py +138 -0
  995. maxframe/tensor/random/zipf.py +120 -0
  996. maxframe/tensor/rechunk/__init__.py +26 -0
  997. maxframe/tensor/rechunk/rechunk.py +43 -0
  998. maxframe/tensor/reduction/__init__.py +64 -0
  999. maxframe/tensor/reduction/all.py +101 -0
  1000. maxframe/tensor/reduction/allclose.py +86 -0
  1001. maxframe/tensor/reduction/any.py +103 -0
  1002. maxframe/tensor/reduction/argmax.py +101 -0
  1003. maxframe/tensor/reduction/argmin.py +101 -0
  1004. maxframe/tensor/reduction/array_equal.py +63 -0
  1005. maxframe/tensor/reduction/core.py +166 -0
  1006. maxframe/tensor/reduction/count_nonzero.py +80 -0
  1007. maxframe/tensor/reduction/cumprod.py +95 -0
  1008. maxframe/tensor/reduction/cumsum.py +99 -0
  1009. maxframe/tensor/reduction/max.py +118 -0
  1010. maxframe/tensor/reduction/mean.py +122 -0
  1011. maxframe/tensor/reduction/min.py +118 -0
  1012. maxframe/tensor/reduction/nanargmax.py +80 -0
  1013. maxframe/tensor/reduction/nanargmin.py +74 -0
  1014. maxframe/tensor/reduction/nancumprod.py +89 -0
  1015. maxframe/tensor/reduction/nancumsum.py +92 -0
  1016. maxframe/tensor/reduction/nanmax.py +109 -0
  1017. maxframe/tensor/reduction/nanmean.py +105 -0
  1018. maxframe/tensor/reduction/nanmin.py +109 -0
  1019. maxframe/tensor/reduction/nanprod.py +92 -0
  1020. maxframe/tensor/reduction/nanstd.py +124 -0
  1021. maxframe/tensor/reduction/nansum.py +113 -0
  1022. maxframe/tensor/reduction/nanvar.py +149 -0
  1023. maxframe/tensor/reduction/prod.py +128 -0
  1024. maxframe/tensor/reduction/std.py +132 -0
  1025. maxframe/tensor/reduction/sum.py +123 -0
  1026. maxframe/tensor/reduction/tests/__init__.py +13 -0
  1027. maxframe/tensor/reduction/tests/test_reduction.py +189 -0
  1028. maxframe/tensor/reduction/var.py +176 -0
  1029. maxframe/tensor/reshape/__init__.py +15 -0
  1030. maxframe/tensor/reshape/reshape.py +192 -0
  1031. maxframe/tensor/reshape/tests/__init__.py +13 -0
  1032. maxframe/tensor/reshape/tests/test_reshape.py +35 -0
  1033. maxframe/tensor/sort/__init__.py +18 -0
  1034. maxframe/tensor/sort/argpartition.py +98 -0
  1035. maxframe/tensor/sort/argsort.py +150 -0
  1036. maxframe/tensor/sort/partition.py +228 -0
  1037. maxframe/tensor/sort/sort.py +295 -0
  1038. maxframe/tensor/spatial/__init__.py +15 -0
  1039. maxframe/tensor/spatial/distance/__init__.py +17 -0
  1040. maxframe/tensor/spatial/distance/cdist.py +421 -0
  1041. maxframe/tensor/spatial/distance/pdist.py +398 -0
  1042. maxframe/tensor/spatial/distance/squareform.py +153 -0
  1043. maxframe/tensor/special/__init__.py +175 -0
  1044. maxframe/tensor/special/airy.py +55 -0
  1045. maxframe/tensor/special/bessel.py +199 -0
  1046. maxframe/tensor/special/core.py +99 -0
  1047. maxframe/tensor/special/ellip_func_integrals.py +155 -0
  1048. maxframe/tensor/special/ellip_harm.py +55 -0
  1049. maxframe/tensor/special/err_fresnel.py +223 -0
  1050. maxframe/tensor/special/gamma_funcs.py +303 -0
  1051. maxframe/tensor/special/hypergeometric_funcs.py +69 -0
  1052. maxframe/tensor/special/info_theory.py +189 -0
  1053. maxframe/tensor/special/misc.py +163 -0
  1054. maxframe/tensor/special/statistical.py +56 -0
  1055. maxframe/tensor/statistics/__init__.py +24 -0
  1056. maxframe/tensor/statistics/average.py +143 -0
  1057. maxframe/tensor/statistics/bincount.py +133 -0
  1058. maxframe/tensor/statistics/corrcoef.py +77 -0
  1059. maxframe/tensor/statistics/cov.py +222 -0
  1060. maxframe/tensor/statistics/digitize.py +126 -0
  1061. maxframe/tensor/statistics/histogram.py +520 -0
  1062. maxframe/tensor/statistics/median.py +85 -0
  1063. maxframe/tensor/statistics/percentile.py +175 -0
  1064. maxframe/tensor/statistics/ptp.py +89 -0
  1065. maxframe/tensor/statistics/quantile.py +290 -0
  1066. maxframe/tensor/ufunc/__init__.py +24 -0
  1067. maxframe/tensor/ufunc/ufunc.py +198 -0
  1068. maxframe/tensor/utils.py +716 -0
  1069. maxframe/tests/__init__.py +13 -0
  1070. maxframe/tests/test_protocol.py +178 -0
  1071. maxframe/tests/test_utils.py +615 -0
  1072. maxframe/tests/utils.py +245 -0
  1073. maxframe/typing_.py +42 -0
  1074. maxframe/udf.py +260 -0
  1075. maxframe/utils.py +1721 -0
  1076. maxframe-2.2.0.dist-info/METADATA +110 -0
  1077. maxframe-2.2.0.dist-info/RECORD +1094 -0
  1078. maxframe-2.2.0.dist-info/WHEEL +5 -0
  1079. maxframe-2.2.0.dist-info/top_level.txt +3 -0
  1080. maxframe_client/__init__.py +16 -0
  1081. maxframe_client/clients/__init__.py +13 -0
  1082. maxframe_client/clients/framedriver.py +137 -0
  1083. maxframe_client/conftest.py +15 -0
  1084. maxframe_client/fetcher.py +411 -0
  1085. maxframe_client/session/__init__.py +22 -0
  1086. maxframe_client/session/consts.py +39 -0
  1087. maxframe_client/session/graph.py +125 -0
  1088. maxframe_client/session/odps.py +802 -0
  1089. maxframe_client/session/task.py +322 -0
  1090. maxframe_client/session/tests/__init__.py +13 -0
  1091. maxframe_client/session/tests/test_task.py +115 -0
  1092. maxframe_client/tests/__init__.py +13 -0
  1093. maxframe_client/tests/test_fetcher.py +180 -0
  1094. maxframe_client/tests/test_session.py +385 -0
@@ -0,0 +1,1647 @@
1
+ # Copyright 1999-2025 Alibaba Group Holding Ltd.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ import dataclasses
16
+ import functools
17
+ import inspect
18
+ import itertools
19
+ import logging
20
+ import operator
21
+ import sys
22
+ from contextlib import contextmanager
23
+ from numbers import Integral
24
+ from typing import TYPE_CHECKING, Any, Callable, List, Optional
25
+
26
+ import numpy as np
27
+ import pandas as pd
28
+ from pandas.api.types import is_string_dtype
29
+ from pandas.core.dtypes.inference import is_dict_like, is_list_like
30
+
31
+ from ..core import ENTITY_TYPE, Entity, ExecutableTuple, OutputType, get_output_types
32
+ from ..lib.dtypes_extension import ExternalBlobDtype, SolidBlob
33
+ from ..lib.mmh3 import hash as mmh_hash
34
+ from ..udf import MarkedFunction
35
+ from ..utils import (
36
+ ModulePlaceholder,
37
+ is_full_slice,
38
+ lazy_import,
39
+ make_dtype,
40
+ make_dtypes,
41
+ quiet_stdio,
42
+ sbytes,
43
+ tokenize,
44
+ validate_and_adjust_resource_ratio,
45
+ )
46
+
47
+ if TYPE_CHECKING:
48
+ from .core import IndexValue
49
+
50
+ try:
51
+ import pyarrow as pa
52
+ except ImportError: # pragma: no cover
53
+ pa = ModulePlaceholder("pyarrow")
54
+
55
+ if TYPE_CHECKING:
56
+ from .operators import DataFrameOperator
57
+
58
+ cudf = lazy_import("cudf", rename="cudf")
59
+ logger = logging.getLogger(__name__)
60
+
61
+ try:
62
+ from ..lib.dtypes_extension import ArrowDtype
63
+ except ImportError:
64
+ ArrowDtype = None
65
+
66
+
67
+ def hash_index(index, size):
68
+ def func(x, size):
69
+ return mmh_hash(sbytes(x)) % size
70
+
71
+ f = functools.partial(func, size=size)
72
+ idx_to_grouped = index.groupby(index.map(f))
73
+ return [idx_to_grouped.get(i, list()) for i in range(size)]
74
+
75
+
76
+ def hash_dataframe_on(df, on, size, level=None):
77
+ if on is None:
78
+ idx = df.index
79
+ if level is not None:
80
+ idx = idx.to_frame(False)[level]
81
+ if cudf and isinstance(idx, cudf.Index): # pragma: no cover
82
+ idx = idx.to_pandas()
83
+ hashed_label = pd.util.hash_pandas_object(idx, categorize=False)
84
+ elif callable(on):
85
+ # todo optimization can be added, if ``on`` is a numpy ufunc or sth can be vectorized
86
+ hashed_label = pd.util.hash_pandas_object(df.index.map(on), categorize=False)
87
+ else:
88
+ if isinstance(on, list):
89
+ to_concat = []
90
+ for v in on:
91
+ if isinstance(v, pd.Series):
92
+ to_concat.append(v)
93
+ else:
94
+ to_concat.append(df[v])
95
+ data = pd.concat(to_concat, axis=1)
96
+ else:
97
+ data = df[on]
98
+ hashed_label = pd.util.hash_pandas_object(data, index=False, categorize=False)
99
+ idx_to_grouped = pd.RangeIndex(0, len(hashed_label)).groupby(hashed_label % size)
100
+ return [idx_to_grouped.get(i, pd.Index([])) for i in range(size)]
101
+
102
+
103
+ def hash_dtypes(dtypes, size):
104
+ hashed_indexes = hash_index(dtypes.index, size)
105
+ return [dtypes[index] for index in hashed_indexes]
106
+
107
+
108
+ def sort_dataframe_inplace(df, *axis):
109
+ for ax in axis:
110
+ df.sort_index(axis=ax, inplace=True)
111
+ return df
112
+
113
+
114
+ @functools.lru_cache(1)
115
+ def _get_range_index_type():
116
+ if cudf is not None:
117
+ return pd.RangeIndex, cudf.RangeIndex
118
+ else:
119
+ return pd.RangeIndex
120
+
121
+
122
+ @functools.lru_cache(1)
123
+ def _get_multi_index_type():
124
+ if cudf is not None:
125
+ return pd.MultiIndex, cudf.MultiIndex
126
+ else:
127
+ return pd.MultiIndex
128
+
129
+
130
+ def _get_range_index_start(pd_range_index):
131
+ try:
132
+ return pd_range_index.start
133
+ except AttributeError: # pragma: no cover
134
+ return pd_range_index._start
135
+
136
+
137
+ def _get_range_index_stop(pd_range_index):
138
+ try:
139
+ return pd_range_index.stop
140
+ except AttributeError: # pragma: no cover
141
+ return pd_range_index._stop
142
+
143
+
144
+ def _get_range_index_step(pd_range_index):
145
+ try:
146
+ return pd_range_index.step
147
+ except AttributeError: # pragma: no cover
148
+ pass
149
+ try: # pragma: no cover
150
+ return pd_range_index._step
151
+ except AttributeError: # pragma: no cover
152
+ return 1 # cudf does not support step arg
153
+
154
+
155
+ def is_pd_range_empty(pd_range_index):
156
+ start, stop, step = (
157
+ _get_range_index_start(pd_range_index),
158
+ _get_range_index_stop(pd_range_index),
159
+ _get_range_index_step(pd_range_index),
160
+ )
161
+ return (start >= stop and step >= 0) or (start <= stop and step < 0)
162
+
163
+
164
+ def parse_index(index_value, *args, store_data=False, key=None):
165
+ from .core import IndexValue
166
+
167
+ def _extract_property(index, tp, ret_data):
168
+ kw = {
169
+ "_min_val": _get_index_min(index),
170
+ "_max_val": _get_index_max(index),
171
+ "_min_val_close": True,
172
+ "_max_val_close": True,
173
+ "_key": key or _tokenize_index(index, *args),
174
+ }
175
+ if ret_data:
176
+ kw["_data"] = index.values
177
+ for field in tp._FIELDS:
178
+ if field in kw or field == "_data":
179
+ continue
180
+ val = getattr(index, field.lstrip("_"), None)
181
+ if val is not None:
182
+ kw[field] = val
183
+ return kw
184
+
185
+ def _tokenize_index(index, *token_objects):
186
+ if not index.empty:
187
+ return tokenize(index)
188
+ else:
189
+ return tokenize(index, *token_objects)
190
+
191
+ def _get_index_min(index):
192
+ try:
193
+ return index.min()
194
+ except (ValueError, AttributeError):
195
+ if isinstance(index, pd.IntervalIndex):
196
+ return None
197
+ raise
198
+ except TypeError:
199
+ return None
200
+
201
+ def _get_index_max(index):
202
+ try:
203
+ return index.max()
204
+ except (ValueError, AttributeError):
205
+ if isinstance(index, pd.IntervalIndex):
206
+ return None
207
+ raise
208
+ except TypeError:
209
+ return None
210
+
211
+ def _serialize_index(index):
212
+ tp = getattr(IndexValue, type(index).__name__)
213
+ properties = _extract_property(index, tp, store_data)
214
+ properties["_name"] = index.name
215
+ return tp(**properties)
216
+
217
+ def _serialize_range_index(index):
218
+ if is_pd_range_empty(index):
219
+ properties = {
220
+ "_is_monotonic_increasing": True,
221
+ "_is_monotonic_decreasing": False,
222
+ "_is_unique": True,
223
+ "_min_val": _get_index_min(index),
224
+ "_max_val": _get_index_max(index),
225
+ "_min_val_close": True,
226
+ "_max_val_close": False,
227
+ "_key": key or _tokenize_index(index, *args),
228
+ "_name": index.name,
229
+ "_dtype": index.dtype,
230
+ }
231
+ else:
232
+ properties = _extract_property(index, IndexValue.RangeIndex, False)
233
+ return IndexValue.RangeIndex(
234
+ _slice=slice(
235
+ _get_range_index_start(index),
236
+ _get_range_index_stop(index),
237
+ _get_range_index_step(index),
238
+ ),
239
+ **properties,
240
+ )
241
+
242
+ def _serialize_multi_index(index):
243
+ kw = _extract_property(index, IndexValue.MultiIndex, store_data)
244
+ kw["_sortorder"] = index.sortorder
245
+ kw["_dtypes"] = [lev.dtype for lev in index.levels]
246
+ return IndexValue.MultiIndex(**kw)
247
+
248
+ if index_value is None:
249
+ return IndexValue(
250
+ _index_value=IndexValue.Index(
251
+ _is_monotonic_increasing=False,
252
+ _is_monotonic_decreasing=False,
253
+ _is_unique=False,
254
+ _min_val=None,
255
+ _max_val=None,
256
+ _min_val_close=True,
257
+ _max_val_close=True,
258
+ _key=key or tokenize(*args),
259
+ )
260
+ )
261
+ if hasattr(index_value, "to_pandas"): # pragma: no cover
262
+ # convert cudf.Index to pandas
263
+ index_value = index_value.to_pandas()
264
+
265
+ if isinstance(index_value, _get_range_index_type()):
266
+ return IndexValue(_index_value=_serialize_range_index(index_value))
267
+ elif isinstance(index_value, _get_multi_index_type()):
268
+ return IndexValue(_index_value=_serialize_multi_index(index_value))
269
+ else:
270
+ return IndexValue(_index_value=_serialize_index(index_value))
271
+
272
+
273
+ def gen_unknown_index_value(index_value, *args, normalize_range_index=False):
274
+ """
275
+ Generate new index value with the same likes of given index_value and args, but without any value.
276
+
277
+ Parameters
278
+ ----------
279
+ index_value
280
+ Given index value.
281
+ args
282
+ Arguments for parse_index.
283
+ normalize_range_index
284
+ If normalize range index to normal index.
285
+
286
+ Returns
287
+ -------
288
+ New created range index value.
289
+ """
290
+ pd_index = index_value.to_pandas()
291
+ if not normalize_range_index and isinstance(pd_index, pd.RangeIndex):
292
+ return parse_index(pd.RangeIndex(-1, name=pd_index.name), *args)
293
+ elif not isinstance(pd_index, pd.MultiIndex):
294
+ return parse_index(
295
+ pd.Index([], dtype=pd_index.dtype, name=pd_index.name), *args
296
+ )
297
+ else:
298
+ i = pd.MultiIndex.from_arrays(
299
+ [c[:0] for c in pd_index.levels], names=pd_index.names
300
+ )
301
+ return parse_index(i, *args)
302
+
303
+
304
+ def split_monotonic_index_min_max(
305
+ left_min_max, left_increase, right_min_max, right_increase
306
+ ):
307
+ """
308
+ Split the original two min_max into new min_max. Each min_max should be a list
309
+ in which each item should be a 4-tuple indicates that this chunk's min value,
310
+ whether the min value is close, the max value, and whether the max value is close.
311
+ The return value would be a nested list, each item is a list
312
+ indicates that how this chunk should be split into.
313
+
314
+ :param left_min_max: the left min_max
315
+ :param left_increase: if the original data of left is increased
316
+ :param right_min_max: the right min_max
317
+ :param right_increase: if the original data of right is increased
318
+ :return: nested list in which each item indicates how min_max is split
319
+
320
+ >>> left_min_max = [(0, True, 3, True), (4, True, 8, True), (12, True, 18, True),
321
+ ... (20, True, 22, True)]
322
+ >>> right_min_max = [(2, True, 6, True), (7, True, 9, True), (10, True, 14, True),
323
+ ... (18, True, 19, True)]
324
+ >>> l, r = split_monotonic_index_min_max(left_min_max, True, right_min_max, True)
325
+ >>> l
326
+ [[(0, True, 2, False), (2, True, 3, True)], [(3, False, 4, False), (4, True, 6, True), (6, False, 7, False),
327
+ (7, True, 8, True)], [(8, False, 9, True), (10, True, 12, False), (12, True, 14, True), (14, False, 18, False),
328
+ (18, True, 18, True)], [(18, False, 19, True), [20, True, 22, True]]]
329
+ >>> r
330
+ [[(0, True, 2, False), (2, True, 3, True), (3, False, 4, False), (4, True, 6, True)],
331
+ [(6, False, 7, False), (7, True, 8, True), (8, False, 9, True)], [(10, True, 12, False), (12, True, 14, True)],
332
+ [(14, False, 18, False), (18, True, 18, True), (18, False, 19, True), [20, True, 22, True]]]
333
+ """
334
+ left_idx_to_min_max = [[] for _ in left_min_max]
335
+ right_idx_to_min_max = [[] for _ in right_min_max]
336
+ left_curr_min_max = list(left_min_max[0])
337
+ right_curr_min_max = list(right_min_max[0])
338
+ left_curr_idx = right_curr_idx = 0
339
+ left_terminate = right_terminate = False
340
+
341
+ while not left_terminate or not right_terminate:
342
+ if left_terminate:
343
+ left_idx_to_min_max[left_curr_idx].append(tuple(right_curr_min_max))
344
+ right_idx_to_min_max[right_curr_idx].append(tuple(right_curr_min_max))
345
+ if right_curr_idx + 1 >= len(right_min_max):
346
+ right_terminate = True
347
+ else:
348
+ right_curr_idx += 1
349
+ right_curr_min_max = list(right_min_max[right_curr_idx])
350
+ elif right_terminate:
351
+ right_idx_to_min_max[right_curr_idx].append(tuple(left_curr_min_max))
352
+ left_idx_to_min_max[left_curr_idx].append(tuple(left_curr_min_max))
353
+ if left_curr_idx + 1 >= len(left_min_max):
354
+ left_terminate = True
355
+ else:
356
+ left_curr_idx += 1
357
+ left_curr_min_max = list(left_min_max[left_curr_idx])
358
+ elif left_curr_min_max[0] < right_curr_min_max[0]:
359
+ # left min < right min
360
+ right_min = [right_curr_min_max[0], not right_curr_min_max[1]]
361
+ max_val = min(left_curr_min_max[2:], right_min)
362
+ assert len(max_val) == 2
363
+ min_max = (
364
+ left_curr_min_max[0],
365
+ left_curr_min_max[1],
366
+ max_val[0],
367
+ max_val[1],
368
+ )
369
+ left_idx_to_min_max[left_curr_idx].append(min_max)
370
+ right_idx_to_min_max[right_curr_idx].append(min_max)
371
+ if left_curr_min_max[2:] == max_val:
372
+ # left max < right min
373
+ if left_curr_idx + 1 >= len(left_min_max):
374
+ left_terminate = True
375
+ else:
376
+ left_curr_idx += 1
377
+ left_curr_min_max = list(left_min_max[left_curr_idx])
378
+ else:
379
+ # from left min(left min close) to right min(exclude right min close)
380
+ left_curr_min_max[:2] = right_curr_min_max[:2]
381
+ elif left_curr_min_max[0] > right_curr_min_max[0]:
382
+ # left min > right min
383
+ left_min = [left_curr_min_max[0], not left_curr_min_max[1]]
384
+ max_val = min(right_curr_min_max[2:], left_min)
385
+ min_max = (
386
+ right_curr_min_max[0],
387
+ right_curr_min_max[1],
388
+ max_val[0],
389
+ max_val[1],
390
+ )
391
+ left_idx_to_min_max[left_curr_idx].append(min_max)
392
+ right_idx_to_min_max[right_curr_idx].append(min_max)
393
+ if right_curr_min_max[2:] == max_val:
394
+ # right max < left min
395
+ if right_curr_idx + 1 >= len(right_min_max):
396
+ right_terminate = True
397
+ else:
398
+ right_curr_idx += 1
399
+ right_curr_min_max = list(right_min_max[right_curr_idx])
400
+ else:
401
+ # from left min(left min close) to right min(exclude right min close)
402
+ right_curr_min_max[:2] = left_curr_min_max[:2]
403
+ else:
404
+ # left min == right min
405
+ max_val = min(left_curr_min_max[2:], right_curr_min_max[2:])
406
+ assert len(max_val) == 2
407
+ min_max = (
408
+ left_curr_min_max[0],
409
+ left_curr_min_max[1],
410
+ max_val[0],
411
+ max_val[1],
412
+ )
413
+ left_idx_to_min_max[left_curr_idx].append(min_max)
414
+ right_idx_to_min_max[right_curr_idx].append(min_max)
415
+ if max_val == left_curr_min_max[2:]:
416
+ if left_curr_idx + 1 >= len(left_min_max):
417
+ left_terminate = True
418
+ else:
419
+ left_curr_idx += 1
420
+ left_curr_min_max = list(left_min_max[left_curr_idx])
421
+ else:
422
+ left_curr_min_max[:2] = max_val[0], not max_val[1]
423
+ if max_val == right_curr_min_max[2:]:
424
+ if right_curr_idx + 1 >= len(right_min_max):
425
+ right_terminate = True
426
+ else:
427
+ right_curr_idx += 1
428
+ right_curr_min_max = list(right_min_max[right_curr_idx])
429
+ else:
430
+ right_curr_min_max[:2] = max_val[0], not max_val[1]
431
+
432
+ if left_increase is False:
433
+ left_idx_to_min_max = list(reversed(left_idx_to_min_max))
434
+ if right_increase is False:
435
+ right_idx_to_min_max = list(reversed(right_idx_to_min_max))
436
+
437
+ return left_idx_to_min_max, right_idx_to_min_max
438
+
439
+
440
+ def build_split_idx_to_origin_idx(splits, increase=True):
441
+ # splits' len is equal to the original chunk size on a specified axis,
442
+ # splits is sth like [[(0, True, 2, True), (2, False, 3, True)]]
443
+ # which means there is one input chunk, and will be split into 2 out chunks
444
+ # in this function, we want to build a new dict from the out chunk index to
445
+ # the original chunk index and the inner position, like {0: (0, 0), 1: (0, 1)}
446
+ if increase is False:
447
+ splits = list(reversed(splits))
448
+ out_idx = itertools.count(0)
449
+ res = dict()
450
+ for origin_idx, _ in enumerate(splits):
451
+ for pos in range(len(splits[origin_idx])):
452
+ if increase is False:
453
+ o_idx = len(splits) - origin_idx - 1
454
+ else:
455
+ o_idx = origin_idx
456
+ res[next(out_idx)] = o_idx, pos
457
+ return res
458
+
459
+
460
+ def _generate_value(dtype, fill_value):
461
+ if ArrowDtype and isinstance(dtype, ArrowDtype):
462
+ return _generate_value(dtype.pyarrow_dtype, fill_value)
463
+
464
+ if isinstance(dtype, pa.ListType):
465
+ return [_generate_value(dtype.value_type, fill_value)]
466
+
467
+ if isinstance(dtype, pa.MapType):
468
+ return [
469
+ (
470
+ _generate_value(dtype.key_type, fill_value),
471
+ _generate_value(dtype.item_type, fill_value),
472
+ )
473
+ ]
474
+
475
+ if isinstance(dtype, pa.StructType):
476
+ result = {}
477
+ for i in range(dtype.num_fields):
478
+ field = dtype[i]
479
+ result[field.name] = _generate_value(field.type, fill_value)
480
+ return result
481
+
482
+ if isinstance(dtype, pa.DataType):
483
+ return _generate_value(dtype.to_pandas_dtype(), fill_value)
484
+
485
+ if isinstance(dtype, ExternalBlobDtype):
486
+ return SolidBlob(str(fill_value).encode())
487
+
488
+ # special handle for datetime64 and timedelta64
489
+ dispatch = {
490
+ np.datetime64: pd.Timestamp,
491
+ np.timedelta64: pd.Timedelta,
492
+ pd.CategoricalDtype.type: lambda x: pd.CategoricalDtype([x]),
493
+ # for object, we do not know the actual dtype,
494
+ # just convert to str for common usage
495
+ np.object_: lambda x: str(fill_value),
496
+ }
497
+ # otherwise, just use dtype.type itself to convert
498
+ target_dtype = getattr(dtype, "type", dtype)
499
+ convert = dispatch.get(target_dtype, target_dtype)
500
+ return convert(fill_value)
501
+
502
+
503
+ def build_empty_df(dtypes, index=None):
504
+ columns = dtypes.index
505
+ length = len(index) if index is not None else 0
506
+ record = [[_generate_value(dtype, 1) for dtype in dtypes]] * max(1, length)
507
+
508
+ # duplicate column may exist,
509
+ # so use RangeIndex first
510
+ df = pd.DataFrame(record, columns=range(len(dtypes)), index=index)
511
+ for i, dtype in enumerate(dtypes):
512
+ s = df.iloc[:, i]
513
+ if not pd.api.types.is_dtype_equal(s.dtype, dtype):
514
+ df.iloc[:, i] = s.astype(dtype)
515
+
516
+ df.columns = columns
517
+ return df[:length] if len(df) > length else df
518
+
519
+
520
+ def build_df(df_obj, fill_value=1, size=1, ensure_string=False):
521
+ dfs = []
522
+ if not isinstance(size, (list, tuple)):
523
+ sizes = [size]
524
+ else:
525
+ sizes = size
526
+
527
+ if not isinstance(fill_value, (list, tuple)):
528
+ fill_values = [fill_value]
529
+ else:
530
+ fill_values = fill_value
531
+
532
+ from .core import INDEX_TYPE, SERIES_TYPE
533
+
534
+ dtypes = (
535
+ pd.Series([df_obj.dtype], index=[df_obj.name])
536
+ if isinstance(df_obj, (INDEX_TYPE, SERIES_TYPE))
537
+ else df_obj.dtypes
538
+ )
539
+ for size, fill_value in zip(sizes, fill_values):
540
+ record = [[_generate_value(dtype, fill_value) for dtype in dtypes]] * size
541
+ df = pd.DataFrame(record)
542
+ df.columns = dtypes.index
543
+
544
+ if len(record) != 0: # columns is empty in some cases
545
+ target_index = df_obj.index_value.to_pandas()
546
+ if isinstance(target_index, pd.MultiIndex):
547
+ index_val = tuple(
548
+ _generate_value(level.dtype, fill_value)
549
+ for level in target_index.levels
550
+ )
551
+ df.index = pd.MultiIndex.from_tuples(
552
+ [index_val] * size, names=target_index.names
553
+ )
554
+ else:
555
+ index_val = _generate_value(target_index.dtype, fill_value)
556
+ df.index = pd.Index([index_val] * size, name=target_index.name)
557
+
558
+ # make sure dtypes correct
559
+ for i, dtype in enumerate(dtypes):
560
+ s = df.iloc[:, i]
561
+ if not pd.api.types.is_dtype_equal(s.dtype, dtype):
562
+ df[df.columns[i]] = s.astype(dtype)
563
+ dfs.append(df)
564
+ if len(dfs) == 1:
565
+ ret_df = dfs[0]
566
+ else:
567
+ ret_df = pd.concat(dfs)
568
+
569
+ if ensure_string:
570
+ obj_dtypes = dtypes[dtypes == np.dtype("O")]
571
+ ret_df[obj_dtypes.index] = ret_df[obj_dtypes.index].radd("O")
572
+ return ret_df
573
+
574
+
575
+ def build_empty_series(dtype, index=None, name=None):
576
+ length = len(index) if index is not None else 0
577
+ return pd.Series(
578
+ [_generate_value(dtype, 1) for _ in range(length)],
579
+ dtype=dtype,
580
+ index=index,
581
+ name=name,
582
+ )
583
+
584
+
585
+ def build_series(
586
+ series_obj=None,
587
+ fill_value=1,
588
+ size=1,
589
+ name=None,
590
+ ensure_string=False,
591
+ dtype=None,
592
+ index=None,
593
+ ):
594
+ seriess = []
595
+ if not isinstance(size, (list, tuple)):
596
+ sizes = [size]
597
+ else:
598
+ sizes = size
599
+
600
+ if not isinstance(fill_value, (list, tuple)):
601
+ fill_values = [fill_value]
602
+ else:
603
+ fill_values = fill_value
604
+
605
+ if series_obj is not None:
606
+ dtype = series_obj.dtype
607
+ try:
608
+ series_index = series_obj.index_value.to_pandas()[:0]
609
+ except AttributeError:
610
+ series_index = series_obj.index[:0]
611
+ else:
612
+ series_index = index[:0] if index is not None else None
613
+
614
+ name = name or getattr(series_obj, "name", None)
615
+ for size, fill_value in zip(sizes, fill_values):
616
+ empty_series = build_empty_series(dtype, name=name, index=series_index)
617
+ record = _generate_value(dtype, fill_value)
618
+ if isinstance(empty_series.index, pd.MultiIndex):
619
+ index = tuple(
620
+ _generate_value(level.dtype, fill_value)
621
+ for level in empty_series.index.levels
622
+ )
623
+ empty_series = empty_series.reindex(
624
+ index=pd.MultiIndex.from_tuples([index], names=empty_series.index.names)
625
+ )
626
+ empty_series.iloc[0] = record
627
+ else:
628
+ if isinstance(empty_series.index.dtype, pd.CategoricalDtype):
629
+ index = None
630
+ else:
631
+ index = _generate_value(empty_series.index.dtype, fill_value)
632
+ empty_series.loc[index] = record
633
+
634
+ empty_series = pd.concat([empty_series] * size)
635
+ # make sure dtype correct for MultiIndex
636
+ empty_series = empty_series.astype(dtype, copy=False)
637
+ seriess.append(empty_series)
638
+
639
+ if len(seriess) == 1:
640
+ ret_series = seriess[0]
641
+ else:
642
+ ret_series = pd.concat(seriess)
643
+
644
+ if ensure_string and dtype == np.dtype("O"):
645
+ ret_series = ret_series.radd("O")
646
+ return ret_series
647
+
648
+
649
+ def infer_index_value(left_index_value, right_index_value, level=None):
650
+ from .core import IndexValue
651
+
652
+ if isinstance(left_index_value.value, IndexValue.RangeIndex) and isinstance(
653
+ right_index_value.value, IndexValue.RangeIndex
654
+ ):
655
+ if left_index_value.value.slice == right_index_value.value.slice:
656
+ return left_index_value
657
+ return parse_index(
658
+ pd.Index([], dtype=np.int64), left_index_value, right_index_value
659
+ )
660
+
661
+ # when left index and right index is identical, and both of them are elements unique,
662
+ # we can infer that the out index should be identical also
663
+ if (
664
+ left_index_value.is_unique
665
+ and right_index_value.is_unique
666
+ and left_index_value.key == right_index_value.key
667
+ ):
668
+ return left_index_value
669
+
670
+ left_index = left_index_value.to_pandas()
671
+ right_index = right_index_value.to_pandas()
672
+ out_index = left_index.join(right_index, level=level)[:0]
673
+ return parse_index(out_index, left_index_value, right_index_value)
674
+
675
+
676
+ def indexing_index_value(index_value, indexes, store_data=False, rechunk=False):
677
+ pd_index = index_value.to_pandas()
678
+ # when rechunk is True, the output index shall be treated
679
+ # different from the input one
680
+ if not rechunk and isinstance(indexes, slice) and is_full_slice(indexes):
681
+ return index_value
682
+ elif not index_value.has_value():
683
+ new_index_value = parse_index(pd_index, indexes, store_data=store_data)
684
+ new_index_value._index_value._min_val = index_value.min_val
685
+ new_index_value._index_value._min_val_close = index_value.min_val_close
686
+ new_index_value._index_value._max_val = index_value.max_val
687
+ new_index_value._index_value._max_val_close = index_value.max_val_close
688
+ return new_index_value
689
+ else:
690
+ if isinstance(indexes, Integral):
691
+ return parse_index(pd_index[[indexes]], store_data=store_data)
692
+ elif isinstance(indexes, Entity):
693
+ if isinstance(pd_index, pd.RangeIndex):
694
+ return parse_index(
695
+ pd.RangeIndex(-1), indexes, index_value, store_data=False
696
+ )
697
+ else:
698
+ return parse_index(
699
+ type(pd_index)([]), indexes, index_value, store_data=False
700
+ )
701
+ if isinstance(indexes, tuple):
702
+ return parse_index(pd_index[list(indexes)], store_data=store_data)
703
+ else:
704
+ return parse_index(pd_index[indexes], store_data=store_data)
705
+
706
+
707
+ def merge_index_value(to_merge_index_values: dict, store_data: bool = False):
708
+ """
709
+ Merge index value according to their chunk index.
710
+
711
+ Parameters
712
+ ----------
713
+ to_merge_index_values : dict
714
+ index to index_value
715
+ store_data : bool
716
+ store data in index_value
717
+
718
+ Returns
719
+ -------
720
+ merged_index_value
721
+ """
722
+
723
+ pd_index = None
724
+ min_val, min_val_close, max_val, max_val_close = None, None, None, None
725
+ for _, chunk_index_value in sorted(to_merge_index_values.items()):
726
+ if pd_index is None:
727
+ pd_index = chunk_index_value.to_pandas()
728
+ min_val, min_val_close, max_val, max_val_close = (
729
+ chunk_index_value.min_val,
730
+ chunk_index_value.min_val_close,
731
+ chunk_index_value.max_val,
732
+ chunk_index_value.max_val_close,
733
+ )
734
+ else:
735
+ cur_pd_index = chunk_index_value.to_pandas()
736
+ if store_data or (
737
+ isinstance(pd_index, pd.RangeIndex)
738
+ and isinstance(cur_pd_index, pd.RangeIndex)
739
+ and cur_pd_index.step == pd_index.step
740
+ and cur_pd_index.start == pd_index.stop
741
+ ):
742
+ # range index that is continuous
743
+ pd_index = pd_index.append(cur_pd_index)
744
+ else:
745
+ pd_index = pd.Index([], dtype=pd_index.dtype)
746
+ if chunk_index_value.min_val is not None:
747
+ try:
748
+ if min_val is None or min_val > chunk_index_value.min_val:
749
+ min_val = chunk_index_value.min_val
750
+ min_val_close = chunk_index_value.min_val_close
751
+ except TypeError:
752
+ # min_value has different types that cannot compare
753
+ # just stop compare
754
+ continue
755
+ if chunk_index_value.max_val is not None:
756
+ if max_val is None or max_val < chunk_index_value.max_val:
757
+ max_val = chunk_index_value.max_val
758
+ max_val_close = chunk_index_value.max_val_close
759
+
760
+ index_value = parse_index(pd_index, store_data=store_data)
761
+ if not index_value.has_value():
762
+ index_value._index_value._min_val = min_val
763
+ index_value._index_value._min_val_close = min_val_close
764
+ index_value._index_value._max_val = max_val
765
+ index_value._index_value._max_val_close = max_val_close
766
+ return index_value
767
+
768
+
769
+ def infer_dtypes(left_dtypes, right_dtypes, operator):
770
+ left = build_empty_df(left_dtypes)
771
+ right = build_empty_df(right_dtypes)
772
+ return operator(left, right).dtypes
773
+
774
+
775
+ @functools.lru_cache(100)
776
+ def infer_dtype(left_dtype, right_dtype, operator):
777
+ left = build_empty_series(left_dtype)
778
+ right = build_empty_series(right_dtype)
779
+ return operator(left, right).dtype
780
+
781
+
782
+ def filter_dtypes(dtypes, column_min_max):
783
+ left_filter = operator.ge if column_min_max[1] else operator.gt
784
+ left = left_filter(dtypes.index, column_min_max[0])
785
+ right_filter = operator.le if column_min_max[3] else operator.lt
786
+ right = right_filter(dtypes.index, column_min_max[2])
787
+ return dtypes[left & right]
788
+
789
+
790
+ def in_range_index(i, pd_range_index):
791
+ """
792
+ Check whether the input `i` is within `pd_range_index` which is a pd.RangeIndex.
793
+ """
794
+ start, stop, step = (
795
+ _get_range_index_start(pd_range_index),
796
+ _get_range_index_stop(pd_range_index),
797
+ _get_range_index_step(pd_range_index),
798
+ )
799
+ if step > 0 and start <= i < stop and (i - start) % step == 0:
800
+ return True
801
+ if step < 0 and start >= i > stop and (start - i) % step == 0:
802
+ return True
803
+ return False
804
+
805
+
806
+ def wrap_notimplemented_exception(func):
807
+ @functools.wraps(func)
808
+ def wrapper(*args, **kwargs):
809
+ try:
810
+ return func(*args, **kwargs)
811
+ except NotImplementedError:
812
+ return NotImplemented
813
+
814
+ return wrapper
815
+
816
+
817
+ def validate_axis(axis, tileable=None):
818
+ if axis == "index":
819
+ axis = 0
820
+ elif axis == "columns":
821
+ axis = 1
822
+
823
+ illegal = False
824
+ try:
825
+ axis = operator.index(axis)
826
+ if axis < 0 or (tileable is not None and axis >= tileable.ndim):
827
+ illegal = True
828
+ except TypeError:
829
+ illegal = True
830
+
831
+ if illegal:
832
+ raise ValueError(f"No axis named {axis} for object type {type(tileable)}")
833
+ return axis
834
+
835
+
836
+ def validate_axis_style_args(
837
+ data, args, kwargs, arg_name, method_name
838
+ ): # pragma: no cover
839
+ """Argument handler for mixed index, columns / axis functions
840
+
841
+ In an attempt to handle both `.method(index, columns)`, and
842
+ `.method(arg, axis=.)`, we have to do some bad things to argument
843
+ parsing. This translates all arguments to `{index=., columns=.}` style.
844
+
845
+ Parameters
846
+ ----------
847
+ data : DataFrame
848
+ args : tuple
849
+ All positional arguments from the user
850
+ kwargs : dict
851
+ All keyword arguments from the user
852
+ arg_name, method_name : str
853
+ Used for better error messages
854
+
855
+ Returns
856
+ -------
857
+ kwargs : dict
858
+ A dictionary of keyword arguments. Doesn't modify ``kwargs``
859
+ inplace, so update them with the return value here.
860
+ """
861
+ out = {}
862
+ # Goal: fill 'out' with index/columns-style arguments
863
+ # like out = {'index': foo, 'columns': bar}
864
+
865
+ # Start by validating for consistency
866
+ axes_names = ["index"] if data.ndim == 1 else ["index", "columns"]
867
+ if "axis" in kwargs and any(x in kwargs for x in axes_names):
868
+ msg = "Cannot specify both 'axis' and any of 'index' or 'columns'."
869
+ raise TypeError(msg)
870
+
871
+ # First fill with explicit values provided by the user...
872
+ if arg_name in kwargs:
873
+ if args:
874
+ msg = f"{method_name} got multiple values for argument '{arg_name}'"
875
+ raise TypeError(msg)
876
+
877
+ axis = axes_names[validate_axis(kwargs.get("axis", 0), data)]
878
+ out[axis] = kwargs[arg_name]
879
+
880
+ # More user-provided arguments, now from kwargs
881
+ for k, v in kwargs.items():
882
+ try:
883
+ ax = axes_names[validate_axis(k, data)]
884
+ except ValueError:
885
+ pass
886
+ else:
887
+ out[ax] = v
888
+
889
+ # All user-provided kwargs have been handled now.
890
+ # Now we supplement with positional arguments, emitting warnings
891
+ # when there's ambiguity and raising when there's conflicts
892
+
893
+ if len(args) == 0:
894
+ pass # It's up to the function to decide if this is valid
895
+ elif len(args) == 1:
896
+ axis = axes_names[validate_axis(kwargs.get("axis", 0), data)]
897
+ out[axis] = args[0]
898
+ elif len(args) == 2:
899
+ if "axis" in kwargs:
900
+ # Unambiguously wrong
901
+ msg = "Cannot specify both 'axis' and any of 'index' or 'columns'"
902
+ raise TypeError(msg)
903
+
904
+ msg = (
905
+ "Interpreting call\n\t'.{method_name}(a, b)' as "
906
+ "\n\t'.{method_name}(index=a, columns=b)'.\nUse named "
907
+ "arguments to remove any ambiguity."
908
+ )
909
+ raise TypeError(msg.format(method_name=method_name))
910
+ else:
911
+ msg = f"Cannot specify all of '{arg_name}', 'index', 'columns'."
912
+ raise TypeError(msg)
913
+ return out
914
+
915
+
916
+ def validate_output_types(**kwargs):
917
+ from ..core import OutputType
918
+
919
+ output_type = kwargs.pop("object_type", None) or kwargs.pop("output_type", None)
920
+ output_types = kwargs.pop("output_types", None) or (
921
+ [output_type] if output_type is not None else None
922
+ )
923
+ return (
924
+ [
925
+ getattr(OutputType, v.lower()) if isinstance(v, str) else v
926
+ for v in output_types
927
+ ]
928
+ if output_types
929
+ else None
930
+ )
931
+
932
+
933
+ def fetch_corner_data(df_or_series, session=None) -> pd.DataFrame:
934
+ """
935
+ Fetch corner DataFrame or Series for repr usage.
936
+
937
+ :param df_or_series: DataFrame or Series
938
+ :return: corner DataFrame
939
+ """
940
+ from .indexing.iloc import iloc
941
+
942
+ max_rows = pd.get_option("display.max_rows")
943
+ try:
944
+ min_rows = pd.get_option("display.min_rows")
945
+ min_rows = min(min_rows, max_rows)
946
+ except KeyError: # pragma: no cover
947
+ # display.min_rows is introduced in pandas 0.25
948
+ min_rows = max_rows
949
+
950
+ index_size = None
951
+ if (
952
+ df_or_series.shape[0] > max_rows
953
+ and df_or_series.shape[0] > min_rows // 2 * 2 + 2
954
+ ):
955
+ # for pandas, greater than max_rows
956
+ # will display min_rows
957
+ # thus we fetch min_rows + 2 lines
958
+ index_size = min_rows // 2 + 1
959
+
960
+ if index_size is None:
961
+ return df_or_series._fetch(session=session)
962
+ else:
963
+ head = iloc(df_or_series)[:index_size]
964
+ tail = iloc(df_or_series)[-index_size:]
965
+ head_data, tail_data = ExecutableTuple([head, tail]).fetch(session=session)
966
+ xdf = cudf if head.op.is_gpu() else pd
967
+ return xdf.concat([head_data, tail_data], axis="index")
968
+
969
+
970
+ class ReprSeries(pd.Series):
971
+ def __init__(self, corner_data, real_shape):
972
+ super().__init__(corner_data)
973
+ self._real_shape = real_shape
974
+
975
+ def __len__(self):
976
+ # As we only fetch corner data to repr,
977
+ # the length would be wrong and we have no way to control,
978
+ # thus we just overwrite the length to show the real one
979
+ return self._real_shape[0]
980
+
981
+
982
+ def filter_dtypes_by_index(dtypes, index):
983
+ try:
984
+ new_dtypes = dtypes.loc[index].dropna()
985
+ except KeyError:
986
+ dtypes_idx = (
987
+ dtypes.index.to_frame()
988
+ .merge(index.to_frame())
989
+ .set_index(list(range(dtypes.index.nlevels)))
990
+ .index
991
+ )
992
+ new_dtypes = dtypes.loc[dtypes_idx]
993
+ new_dtypes.index.names = dtypes.index.names
994
+ return new_dtypes
995
+
996
+
997
+ @contextmanager
998
+ def create_sa_connection(con, **kwargs):
999
+ import sqlalchemy as sa
1000
+ from sqlalchemy.engine import Connection, Engine
1001
+
1002
+ # process con
1003
+ engine = None
1004
+ if isinstance(con, Connection):
1005
+ # connection create by user
1006
+ close = False
1007
+ dispose = False
1008
+ elif isinstance(con, Engine):
1009
+ con = con.connect()
1010
+ close = True
1011
+ dispose = False
1012
+ else:
1013
+ engine = sa.create_engine(con, **kwargs)
1014
+ con = engine.connect()
1015
+ close = True
1016
+ dispose = True
1017
+
1018
+ try:
1019
+ yield con
1020
+ finally:
1021
+ if close:
1022
+ con.close()
1023
+ if dispose:
1024
+ engine.dispose()
1025
+
1026
+
1027
+ def to_arrow_dtypes(dtypes, test_df=None):
1028
+ from .arrays import ArrowStringDtype
1029
+
1030
+ new_dtypes = dtypes.copy()
1031
+ for i in range(len(dtypes)):
1032
+ dtype = dtypes.iloc[i]
1033
+ if is_string_dtype(dtype):
1034
+ if test_df is not None:
1035
+ series = test_df.iloc[:, i]
1036
+ # check value
1037
+ non_na_series = series[series.notna()]
1038
+ if len(non_na_series) > 0:
1039
+ first_value = non_na_series.iloc[0]
1040
+ if isinstance(first_value, str):
1041
+ new_dtypes.iloc[i] = ArrowStringDtype()
1042
+ else: # pragma: no cover
1043
+ # empty, set arrow string dtype
1044
+ new_dtypes.iloc[i] = ArrowStringDtype()
1045
+ else:
1046
+ # empty, set arrow string dtype
1047
+ new_dtypes.iloc[i] = ArrowStringDtype()
1048
+ return new_dtypes
1049
+
1050
+
1051
+ def is_dataframe(x):
1052
+ if cudf is not None: # pragma: no cover
1053
+ if isinstance(x, cudf.DataFrame):
1054
+ return True
1055
+ return isinstance(x, pd.DataFrame)
1056
+
1057
+
1058
+ def is_series(x):
1059
+ if cudf is not None: # pragma: no cover
1060
+ if isinstance(x, cudf.Series):
1061
+ return True
1062
+ return isinstance(x, pd.Series)
1063
+
1064
+
1065
+ def is_index(x):
1066
+ if cudf is not None: # pragma: no cover
1067
+ if isinstance(x, cudf.Index):
1068
+ return True
1069
+ return isinstance(x, pd.Index)
1070
+
1071
+
1072
+ def get_xdf(x):
1073
+ if cudf is not None: # pragma: no cover
1074
+ if isinstance(x, (cudf.DataFrame, cudf.Series, cudf.Index)):
1075
+ return cudf
1076
+ return pd
1077
+
1078
+
1079
+ def is_cudf(x):
1080
+ if cudf is not None: # pragma: no cover
1081
+ if isinstance(x, (cudf.DataFrame, cudf.Series, cudf.Index)):
1082
+ return True
1083
+ return False
1084
+
1085
+
1086
+ def whether_to_clean_up(op, threshold):
1087
+ func = op.func
1088
+ counted_bytes = 0
1089
+ max_recursion_depth = 2
1090
+
1091
+ from collections import deque
1092
+ from numbers import Number
1093
+
1094
+ BYPASS_CLASSES = (str, bytes, Number, range, bytearray, pd.DataFrame, pd.Series)
1095
+
1096
+ class GetSizeEarlyStopException(Exception):
1097
+ pass
1098
+
1099
+ def check_exceed_threshold():
1100
+ nonlocal threshold, counted_bytes
1101
+ if counted_bytes >= threshold:
1102
+ raise GetSizeEarlyStopException()
1103
+
1104
+ def getsize(obj_outer):
1105
+ _seen_obj_ids = set()
1106
+
1107
+ def inner_count(obj, recursion_depth):
1108
+ obj_id = id(obj)
1109
+ if obj_id in _seen_obj_ids or recursion_depth > max_recursion_depth:
1110
+ return 0
1111
+ _seen_obj_ids.add(obj_id)
1112
+ recursion_depth += 1
1113
+ size = sys.getsizeof(obj)
1114
+ if isinstance(obj, BYPASS_CLASSES):
1115
+ return size
1116
+ elif isinstance(obj, (tuple, list, set, deque)):
1117
+ size += sum(inner_count(i, recursion_depth) for i in obj)
1118
+ elif hasattr(obj, "items"):
1119
+ size += sum(
1120
+ inner_count(k, recursion_depth) + inner_count(v, recursion_depth)
1121
+ for k, v in getattr(obj, "items")()
1122
+ )
1123
+ if hasattr(obj, "__dict__"):
1124
+ size += inner_count(vars(obj), recursion_depth)
1125
+ if hasattr(obj, "__slots__"):
1126
+ size += sum(
1127
+ inner_count(getattr(obj, s), recursion_depth)
1128
+ for s in obj.__slots__
1129
+ if hasattr(obj, s)
1130
+ )
1131
+ return size
1132
+
1133
+ return inner_count(obj_outer, 0)
1134
+
1135
+ try:
1136
+ # Note: In most cases, func is just a function with closure, while chances are that
1137
+ # func is a callable that doesn't have __closure__ attribute.
1138
+ if inspect.isclass(func):
1139
+ pass
1140
+ elif hasattr(func, "__closure__") and func.__closure__ is not None:
1141
+ for cell in func.__closure__:
1142
+ counted_bytes += getsize(cell.cell_contents)
1143
+ check_exceed_threshold()
1144
+ elif callable(func):
1145
+ if hasattr(func, "__dict__"):
1146
+ for k, v in func.__dict__.items():
1147
+ counted_bytes += sum([getsize(k), getsize(v)])
1148
+ check_exceed_threshold()
1149
+ if hasattr(func, "__slots__"):
1150
+ for slot in func.__slots__:
1151
+ counted_bytes += (
1152
+ getsize(getattr(func, slot)) if hasattr(func, slot) else 0
1153
+ )
1154
+ check_exceed_threshold()
1155
+ except GetSizeEarlyStopException:
1156
+ logger.debug("Func needs cleanup.")
1157
+ op.need_clean_up_func = True
1158
+ else:
1159
+ assert op.need_clean_up_func is False
1160
+ logger.debug("Func doesn't need cleanup.")
1161
+
1162
+ return op.need_clean_up_func
1163
+
1164
+
1165
+ def concat_on_columns(objs: List) -> Any:
1166
+ xdf = get_xdf(objs[0])
1167
+ # In cudf, concat with axis=1 and ignore_index=False by default behaves opposite to pandas.
1168
+ # Cudf would reset the index when axis=1 and ignore_index=False, which does not match with its document.
1169
+ # Therefore, we deal with this case specially.
1170
+ result = xdf.concat(objs, axis=1)
1171
+ if xdf is cudf:
1172
+ result.index = objs[0].index
1173
+ return result
1174
+
1175
+
1176
+ def apply_if_callable(maybe_callable, obj, **kwargs):
1177
+ if callable(maybe_callable):
1178
+ return maybe_callable(obj, **kwargs)
1179
+
1180
+ return maybe_callable
1181
+
1182
+
1183
+ def patch_sa_engine_execute():
1184
+ """
1185
+ pandas did not resolve compatibility issue of sqlalchemy 2.0, the issue
1186
+ is https://github.com/pandas-dev/pandas/issues/40686. We need to patch
1187
+ Engine class in SQLAlchemy, and then our code can work well.
1188
+ """
1189
+ try:
1190
+ from sqlalchemy.engine import Engine
1191
+ except ImportError: # pragma: no cover
1192
+ return
1193
+
1194
+ def execute(self, statement, *multiparams, **params):
1195
+ connection = self.connect()
1196
+ return connection.execute(statement, *multiparams, **params)
1197
+
1198
+ if hasattr(Engine, "execute"): # pragma: no cover
1199
+ return
1200
+ Engine.execute = execute
1201
+
1202
+
1203
+ def bind_func_args_from_pos(func, args_bind_position, *bound_args, **bound_kwargs):
1204
+ """
1205
+ Create a new function with arguments bound from specified position.
1206
+
1207
+ Parameters
1208
+ ----------
1209
+ func : callable
1210
+ Target function to be wrapped.
1211
+ args_bind_position : int
1212
+ Position to start binding arguments (0-based).
1213
+ e.g., n=0 binds from first arg, n=1 binds from second arg.
1214
+ *bound_args : tuple
1215
+ Arguments to be bound from position n.
1216
+ **bound_kwargs : dict
1217
+ Keyword arguments to be bound.
1218
+
1219
+ Returns
1220
+ -------
1221
+ callable
1222
+ Wrapped function with bound arguments.
1223
+
1224
+ Examples
1225
+ --------
1226
+ >>> def func(x, y, z=0):
1227
+ ... return x * y + z
1228
+ >>> f = bind_func_args_from_pos(func, 0, 10) # bind from second position
1229
+ >>> f(5) # equals func(5, 10)
1230
+ 10
1231
+
1232
+ Raises
1233
+ ------
1234
+ TypeError
1235
+ If func is not callable or n is not an integer.
1236
+ ValueError
1237
+ If n is negative or exceeds the number of parameters.
1238
+ """
1239
+
1240
+ @functools.wraps(func)
1241
+ def wrapper(*runtime_args, **runtime_kwargs):
1242
+ try:
1243
+ # Combine arguments
1244
+ all_args = (
1245
+ runtime_args[:args_bind_position]
1246
+ + bound_args
1247
+ + runtime_args[args_bind_position:]
1248
+ )
1249
+ all_kwargs = {**bound_kwargs, **runtime_kwargs}
1250
+
1251
+ return func(*all_args, **all_kwargs)
1252
+ except Exception as e:
1253
+ # Enhance error message with context
1254
+ raise type(e)(
1255
+ f"Error calling {func.__name__} with bound arguments: {str(e)}"
1256
+ ) from e
1257
+
1258
+ return wrapper
1259
+
1260
+
1261
+ def pack_func_args(df, funcs, *args, args_bind_position=1, **kwargs) -> Any:
1262
+ """
1263
+ Pack the funcs with args and kwargs to avoid the ambiguity between other
1264
+ positional and keyword arguments. It will process the funcs by the following rule:
1265
+
1266
+ 1. If there's no such args and kwargs, return funcs itself.
1267
+
1268
+ 2. If the funcs is a dict-like object, it will iterate each key-value pair, pack the
1269
+ value recursively, and return a new dict with the same keys and packed values.
1270
+
1271
+ 3. If the funcs is a list-like object, it will iterate each element, pack it
1272
+ recursively, and return a new list with the packed elements.
1273
+
1274
+ 4. If the funcs is a str object, it will try to get the attribute df.funcs firstly,
1275
+ if it exists and is a callable, return a partial one with args and kwargs packed in.
1276
+ If it exists but isn't a callable, a ValueError is raised. If it doesn't exist, then
1277
+ try to get the attribute of np.funcs, if it exists and df is acceptable by funcs,
1278
+ return a partial one with args and kwargs packed in, otherwise an AttributeValue is
1279
+ raised. This rule is almost the same with pandas.
1280
+
1281
+ 5. Other cases are treated as funcs being a callable, returns the partial one with
1282
+ args and kwargs packed in.
1283
+
1284
+ Parameters
1285
+ ----------
1286
+ df : pandas.DataFrame or pandas.Series
1287
+ The DataFrame or Series object to test the function.
1288
+ funcs : function, str, list-like or dict-like
1289
+ Function to pack. It should have the same type with Dataframe.transform().
1290
+ args_bind_position: int
1291
+ Position to start binding arguments (0-based).
1292
+ e.g., n=0 binds from first arg, n=1 binds from second arg.
1293
+ *args :
1294
+ The positional arguments to func. If funcs contains many functions, each one
1295
+ should be able to accept *args.
1296
+ **kwargs :
1297
+ The keyword arguments to func. If funcs contains many functions, each one
1298
+ should be able to accept **kwargs.
1299
+
1300
+ Returns
1301
+ -------
1302
+ The packed functions having the same structure with funcs.
1303
+
1304
+ Raises
1305
+ ------
1306
+ ValueError :
1307
+ If there's a string but the corresponding function doesn't accept any positional
1308
+ or keyword arguments.
1309
+ AttributeError :
1310
+ If there's a string but no corresponding function is found.
1311
+ """
1312
+ from ..udf import MarkedFunction
1313
+
1314
+ if not args and not kwargs:
1315
+ return funcs
1316
+
1317
+ if is_dict_like(funcs):
1318
+ return {k: pack_func_args(df, v, *args, **kwargs) for k, v in funcs.items()}
1319
+
1320
+ if is_list_like(funcs) and not isinstance(funcs, ENTITY_TYPE):
1321
+ return [pack_func_args(df, v, *args, **kwargs) for v in funcs]
1322
+
1323
+ f = get_callable_by_name(df, funcs) if isinstance(funcs, str) else funcs
1324
+
1325
+ if isinstance(f, MarkedFunction):
1326
+ # for marked function, pack the inner function, and reset as mark function
1327
+ packed_func = f.copy()
1328
+ packed_func.func = bind_func_args_from_pos(
1329
+ f.func, args_bind_position, *args, **kwargs
1330
+ )
1331
+ else:
1332
+ packed_func = bind_func_args_from_pos(f, args_bind_position, *args, **kwargs)
1333
+
1334
+ # Callable
1335
+ return packed_func
1336
+
1337
+
1338
+ def get_callable_by_name(df: Any, func_name: str) -> Callable:
1339
+ """
1340
+ Get the callable by the func name.
1341
+ It will try to get the attribute df.funcs firstly, if it exists and is a callable,
1342
+ return it. If it exists but isn't a callable, a ValueError is raised. If it doesn't
1343
+ exist, then try to get the attribute of np.funcs, if it exists and df is acceptable
1344
+ by funcs, return a partial one with args and kwargs packed in, otherwise an
1345
+ AttributeValue is raised. This rule is almost the same with pandas.
1346
+
1347
+ Parameters
1348
+ ----------
1349
+ df: pandas.Series or pandas.Dataframe
1350
+ The receiver of the func name.
1351
+ func_name : str
1352
+ The func name.
1353
+
1354
+ Returns
1355
+ -------
1356
+ The callable instance.
1357
+
1358
+ Raises
1359
+ ------
1360
+ ValueError :
1361
+ If it's not a valid callable.
1362
+ AttributeError :
1363
+ If there's no corresponding function is found.
1364
+ """
1365
+ if hasattr(df, func_name):
1366
+ f = getattr(df, func_name)
1367
+ if callable(f):
1368
+ return f
1369
+ raise ValueError(f"{func_name} is not a callable")
1370
+
1371
+ if hasattr(np, func_name) and hasattr(df, "__array__"):
1372
+ return getattr(np, func_name)
1373
+
1374
+ raise AttributeError(
1375
+ f"'{func_name}' is not a valid function for '{type(df).__name__}' object"
1376
+ )
1377
+
1378
+
1379
+ @dataclasses.dataclass
1380
+ class InferredDataFrameMeta:
1381
+ output_type: OutputType
1382
+ dtypes: Optional[pd.Series] = None
1383
+ dtype: Optional[Any] = None
1384
+ name: Optional[str] = None
1385
+ index_value: Optional["IndexValue"] = None
1386
+ maybe_agg: bool = False
1387
+ elementwise: bool = False
1388
+
1389
+ def check_absence(self, *args: str) -> None:
1390
+ args_set = set(args)
1391
+ if self.output_type == OutputType.dataframe:
1392
+ args_set.difference_update(["dtype", "name"])
1393
+ else:
1394
+ args_set.difference_update(["dtypes"])
1395
+ absent_args = [arg for arg in sorted(args_set) if getattr(self, arg) is None]
1396
+ if absent_args:
1397
+ raise TypeError(
1398
+ f"Cannot determine {', '.join(absent_args)} by calculating "
1399
+ "with mock data, please specify it as arguments"
1400
+ )
1401
+
1402
+
1403
+ def _get_groupby_input_df(groupby):
1404
+ in_df = groupby
1405
+ while in_df.op.output_types[0] not in (OutputType.dataframe, OutputType.series):
1406
+ in_df = in_df.inputs[0]
1407
+ return in_df
1408
+
1409
+
1410
+ def infer_dataframe_return_value(
1411
+ df_obj,
1412
+ func,
1413
+ output_type=None,
1414
+ dtypes=None,
1415
+ dtype=None,
1416
+ name=None,
1417
+ index=None,
1418
+ inherit_index=False,
1419
+ build_kw=None,
1420
+ elementwise=None,
1421
+ skip_infer=False,
1422
+ ) -> InferredDataFrameMeta:
1423
+ from .core import GROUPBY_TYPE, INDEX_TYPE
1424
+ from .typing_ import get_function_output_meta
1425
+
1426
+ unwrapped_func = func
1427
+ if isinstance(unwrapped_func, MarkedFunction):
1428
+ unwrapped_func = unwrapped_func.func
1429
+ while True:
1430
+ if isinstance(unwrapped_func, functools.partial):
1431
+ unwrapped_func = unwrapped_func.func
1432
+ elif hasattr(unwrapped_func, "__wrapped__"):
1433
+ unwrapped_func = unwrapped_func.__wrapped__
1434
+ else:
1435
+ break
1436
+
1437
+ func_annotation_meta = get_function_output_meta(unwrapped_func, df_obj)
1438
+ func_index_value = None
1439
+ if func_annotation_meta:
1440
+ output_type = output_type or func_annotation_meta.output_type
1441
+ dtypes = dtypes if dtypes is not None else func_annotation_meta.dtypes
1442
+ dtype = dtype if dtype is not None else func_annotation_meta.dtype
1443
+ name = name if name is not None else func_annotation_meta.name
1444
+ func_index_value = func_annotation_meta.index_value
1445
+
1446
+ if skip_infer:
1447
+ if isinstance(index, INDEX_TYPE):
1448
+ ret_index_value = index.index_value
1449
+ elif index is not None:
1450
+ ret_index_value = parse_index(index, df_obj.key)
1451
+ else:
1452
+ ret_index_value = func_index_value
1453
+
1454
+ return InferredDataFrameMeta(
1455
+ output_type=output_type,
1456
+ dtypes=dtypes,
1457
+ dtype=dtype,
1458
+ name=name,
1459
+ index_value=ret_index_value,
1460
+ )
1461
+
1462
+ if isinstance(index, INDEX_TYPE):
1463
+ index = index.index_value
1464
+
1465
+ if elementwise is None:
1466
+ elementwise = isinstance(unwrapped_func, np.ufunc)
1467
+
1468
+ ret_index_value = func_index_value
1469
+ if output_type is not None and (dtypes is not None or dtype is not None):
1470
+ if inherit_index:
1471
+ ret_index_value = df_obj.index_value
1472
+ elif index is not None:
1473
+ ret_index_value = parse_index(index)
1474
+
1475
+ if ret_index_value is not None:
1476
+ return InferredDataFrameMeta(
1477
+ output_type,
1478
+ dtypes,
1479
+ dtype,
1480
+ name,
1481
+ ret_index_value,
1482
+ elementwise=elementwise or False,
1483
+ )
1484
+
1485
+ ret_output_type = ret_dtypes = None
1486
+ maybe_agg = False
1487
+ build_kw = build_kw or {}
1488
+ obj_key = df_obj.key
1489
+
1490
+ if elementwise:
1491
+ inherit_index = True
1492
+ (ret_output_type,) = get_output_types(df_obj)
1493
+ if index is not None:
1494
+ ret_index_value = parse_index(index)
1495
+
1496
+ if isinstance(df_obj, GROUPBY_TYPE):
1497
+ is_groupby = True
1498
+ empty_df_obj = df_obj.op.build_mock_groupby(**build_kw)
1499
+ else:
1500
+ is_groupby = False
1501
+ empty_df_obj = (
1502
+ build_df(df_obj, **build_kw)
1503
+ if df_obj.ndim == 2
1504
+ else build_series(df_obj, **build_kw)
1505
+ )
1506
+ try:
1507
+ with np.errstate(all="ignore"), quiet_stdio():
1508
+ infer_df_obj = func(empty_df_obj)
1509
+
1510
+ if ret_index_value is None:
1511
+ if (
1512
+ infer_df_obj is None
1513
+ or not hasattr(infer_df_obj, "index")
1514
+ or infer_df_obj.index is None
1515
+ ):
1516
+ ret_index_value = parse_index(pd.RangeIndex(-1))
1517
+ elif (
1518
+ infer_df_obj.index is getattr(empty_df_obj, "index", None)
1519
+ or inherit_index
1520
+ ):
1521
+ ret_index_value = df_obj.index_value
1522
+ else:
1523
+ ret_index_value = parse_index(infer_df_obj.index, obj_key, func)
1524
+
1525
+ if isinstance(infer_df_obj, pd.DataFrame):
1526
+ if output_type is not None and output_type != OutputType.dataframe:
1527
+ raise TypeError(
1528
+ f'Cannot infer output_type as "series", '
1529
+ f'please specify `output_type` as "dataframe"'
1530
+ )
1531
+ ret_output_type = ret_output_type or OutputType.dataframe
1532
+ ret_dtypes = ret_dtypes or infer_df_obj.dtypes
1533
+ else:
1534
+ if output_type is not None and output_type == OutputType.dataframe:
1535
+ raise TypeError(
1536
+ f'Cannot infer output_type as "dataframe", '
1537
+ f'please specify `output_type` as "series"'
1538
+ )
1539
+ ret_output_type = ret_output_type or OutputType.series
1540
+ name = name or getattr(infer_df_obj, "name", None)
1541
+ dtype = dtype or infer_df_obj.dtype
1542
+
1543
+ if is_groupby and len(infer_df_obj) <= 2:
1544
+ # we create mock df with 4 rows, 2 groups
1545
+ # if return df has 2 rows, we assume that
1546
+ # it's an aggregation operation
1547
+ maybe_agg = True
1548
+
1549
+ return InferredDataFrameMeta(
1550
+ ret_output_type,
1551
+ make_dtypes(ret_dtypes),
1552
+ make_dtype(dtype),
1553
+ name,
1554
+ ret_index_value,
1555
+ maybe_agg,
1556
+ elementwise=elementwise,
1557
+ )
1558
+ except: # noqa: E722 # nosec
1559
+ logger.info(
1560
+ "Exception raised while inferring meta of function result", exc_info=True
1561
+ )
1562
+ return InferredDataFrameMeta(
1563
+ output_type,
1564
+ make_dtypes(dtypes),
1565
+ make_dtype(dtype),
1566
+ name,
1567
+ ret_index_value,
1568
+ maybe_agg,
1569
+ elementwise=elementwise,
1570
+ )
1571
+
1572
+
1573
+ def copy_func_scheduling_hints(func, op: "DataFrameOperator") -> None:
1574
+ from ..config import options
1575
+
1576
+ expect_engine = None
1577
+ expect_gpu = None
1578
+ default_options = options.function.default_running_options or {}
1579
+
1580
+ if isinstance(func, MarkedFunction):
1581
+ # copy from marked function
1582
+ expect_engine = func.expect_engine
1583
+ expect_resources = func.expect_resources or {}
1584
+ expect_gpu = func.gpu
1585
+
1586
+ # merge default options if not set
1587
+ for key, value in default_options.items():
1588
+ if key not in expect_resources or expect_resources.get(key) is None:
1589
+ expect_resources[key] = value
1590
+ else:
1591
+ # copy from default options
1592
+ expect_resources = default_options
1593
+
1594
+ # Validate and adjust resource ratio constraints on client side
1595
+ expect_resources, _ = validate_and_adjust_resource_ratio(
1596
+ expect_resources,
1597
+ max_memory_cpu_ratio=options.function.allowed_max_memory_cpu_ratio,
1598
+ adjust=True,
1599
+ )
1600
+
1601
+ if expect_engine:
1602
+ op.expect_engine = expect_engine
1603
+ if expect_resources:
1604
+ op.expect_resources = expect_resources
1605
+ if expect_gpu:
1606
+ op.gpu = expect_gpu
1607
+
1608
+
1609
+ def make_column_list(col, dtypes_or_columns, level=None):
1610
+ """Returns [col] if col is a column in dtypes"""
1611
+ try:
1612
+ if isinstance(dtypes_or_columns, pd.Series):
1613
+ idx = dtypes_or_columns.index
1614
+ else:
1615
+ idx = dtypes_or_columns
1616
+
1617
+ if level is None:
1618
+ if col in idx:
1619
+ return [col]
1620
+ elif isinstance(col, int):
1621
+ col = [col]
1622
+ if all(c in idx for c in col):
1623
+ return col
1624
+ if all(isinstance(c, int) for c in col):
1625
+ return [idx[c] for c in col]
1626
+ return col
1627
+ else:
1628
+ level_idx = idx.get_level_values(level)
1629
+ if isinstance(col, list):
1630
+ cols = col
1631
+ else:
1632
+ cols = [col]
1633
+ mask = level_idx.isin(cols)
1634
+ if not mask.any():
1635
+ mask = col
1636
+ return idx[mask]
1637
+ except (IndexError, TypeError, ValueError):
1638
+ return col
1639
+
1640
+
1641
+ def call_groupby_with_params(df_or_series, groupby_params: dict):
1642
+ params = groupby_params.copy()
1643
+ selection = params.pop("selection", None)
1644
+ res = df_or_series.groupby(**params)
1645
+ if selection:
1646
+ res = res[selection]
1647
+ return res