maxframe 2.3.0__cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (1117) hide show
  1. maxframe/__init__.py +33 -0
  2. maxframe/_utils.cpython-312-x86_64-linux-gnu.so +0 -0
  3. maxframe/_utils.pxd +33 -0
  4. maxframe/_utils.pyi +21 -0
  5. maxframe/_utils.pyx +561 -0
  6. maxframe/codegen/__init__.py +27 -0
  7. maxframe/codegen/core.py +597 -0
  8. maxframe/codegen/spe/__init__.py +16 -0
  9. maxframe/codegen/spe/core.py +307 -0
  10. maxframe/codegen/spe/dataframe/__init__.py +38 -0
  11. maxframe/codegen/spe/dataframe/accessors/__init__.py +15 -0
  12. maxframe/codegen/spe/dataframe/accessors/base.py +71 -0
  13. maxframe/codegen/spe/dataframe/accessors/dict_.py +89 -0
  14. maxframe/codegen/spe/dataframe/accessors/list_.py +44 -0
  15. maxframe/codegen/spe/dataframe/accessors/struct_.py +28 -0
  16. maxframe/codegen/spe/dataframe/arithmetic.py +89 -0
  17. maxframe/codegen/spe/dataframe/datasource.py +181 -0
  18. maxframe/codegen/spe/dataframe/datastore.py +204 -0
  19. maxframe/codegen/spe/dataframe/extensions.py +63 -0
  20. maxframe/codegen/spe/dataframe/fetch.py +26 -0
  21. maxframe/codegen/spe/dataframe/groupby.py +312 -0
  22. maxframe/codegen/spe/dataframe/indexing.py +333 -0
  23. maxframe/codegen/spe/dataframe/merge.py +110 -0
  24. maxframe/codegen/spe/dataframe/misc.py +264 -0
  25. maxframe/codegen/spe/dataframe/missing.py +64 -0
  26. maxframe/codegen/spe/dataframe/reduction.py +183 -0
  27. maxframe/codegen/spe/dataframe/reshape.py +46 -0
  28. maxframe/codegen/spe/dataframe/sort.py +104 -0
  29. maxframe/codegen/spe/dataframe/statistics.py +46 -0
  30. maxframe/codegen/spe/dataframe/tests/__init__.py +13 -0
  31. maxframe/codegen/spe/dataframe/tests/accessors/__init__.py +13 -0
  32. maxframe/codegen/spe/dataframe/tests/accessors/test_base.py +33 -0
  33. maxframe/codegen/spe/dataframe/tests/accessors/test_dict.py +304 -0
  34. maxframe/codegen/spe/dataframe/tests/accessors/test_list.py +134 -0
  35. maxframe/codegen/spe/dataframe/tests/accessors/test_struct.py +75 -0
  36. maxframe/codegen/spe/dataframe/tests/indexing/__init__.py +13 -0
  37. maxframe/codegen/spe/dataframe/tests/indexing/conftest.py +58 -0
  38. maxframe/codegen/spe/dataframe/tests/indexing/test_getitem.py +124 -0
  39. maxframe/codegen/spe/dataframe/tests/indexing/test_iloc.py +95 -0
  40. maxframe/codegen/spe/dataframe/tests/indexing/test_indexing.py +39 -0
  41. maxframe/codegen/spe/dataframe/tests/indexing/test_loc.py +35 -0
  42. maxframe/codegen/spe/dataframe/tests/indexing/test_rename.py +51 -0
  43. maxframe/codegen/spe/dataframe/tests/indexing/test_reset_index.py +88 -0
  44. maxframe/codegen/spe/dataframe/tests/indexing/test_sample.py +45 -0
  45. maxframe/codegen/spe/dataframe/tests/indexing/test_set_axis.py +45 -0
  46. maxframe/codegen/spe/dataframe/tests/indexing/test_set_index.py +41 -0
  47. maxframe/codegen/spe/dataframe/tests/indexing/test_setitem.py +46 -0
  48. maxframe/codegen/spe/dataframe/tests/misc/__init__.py +13 -0
  49. maxframe/codegen/spe/dataframe/tests/misc/test_apply.py +133 -0
  50. maxframe/codegen/spe/dataframe/tests/misc/test_drop_duplicates.py +92 -0
  51. maxframe/codegen/spe/dataframe/tests/misc/test_misc.py +202 -0
  52. maxframe/codegen/spe/dataframe/tests/missing/__init__.py +13 -0
  53. maxframe/codegen/spe/dataframe/tests/missing/test_checkna.py +94 -0
  54. maxframe/codegen/spe/dataframe/tests/missing/test_dropna.py +50 -0
  55. maxframe/codegen/spe/dataframe/tests/missing/test_fillna.py +94 -0
  56. maxframe/codegen/spe/dataframe/tests/missing/test_replace.py +45 -0
  57. maxframe/codegen/spe/dataframe/tests/test_arithmetic.py +73 -0
  58. maxframe/codegen/spe/dataframe/tests/test_datasource.py +184 -0
  59. maxframe/codegen/spe/dataframe/tests/test_datastore.py +200 -0
  60. maxframe/codegen/spe/dataframe/tests/test_extensions.py +88 -0
  61. maxframe/codegen/spe/dataframe/tests/test_groupby.py +288 -0
  62. maxframe/codegen/spe/dataframe/tests/test_merge.py +426 -0
  63. maxframe/codegen/spe/dataframe/tests/test_reduction.py +117 -0
  64. maxframe/codegen/spe/dataframe/tests/test_reshape.py +79 -0
  65. maxframe/codegen/spe/dataframe/tests/test_sort.py +179 -0
  66. maxframe/codegen/spe/dataframe/tests/test_statistics.py +70 -0
  67. maxframe/codegen/spe/dataframe/tests/test_tseries.py +29 -0
  68. maxframe/codegen/spe/dataframe/tests/test_value_counts.py +60 -0
  69. maxframe/codegen/spe/dataframe/tests/test_window.py +69 -0
  70. maxframe/codegen/spe/dataframe/tseries.py +55 -0
  71. maxframe/codegen/spe/dataframe/udf.py +62 -0
  72. maxframe/codegen/spe/dataframe/value_counts.py +31 -0
  73. maxframe/codegen/spe/dataframe/window.py +65 -0
  74. maxframe/codegen/spe/learn/__init__.py +15 -0
  75. maxframe/codegen/spe/learn/contrib/__init__.py +15 -0
  76. maxframe/codegen/spe/learn/contrib/lightgbm.py +161 -0
  77. maxframe/codegen/spe/learn/contrib/models.py +41 -0
  78. maxframe/codegen/spe/learn/contrib/pytorch.py +49 -0
  79. maxframe/codegen/spe/learn/contrib/tests/__init__.py +13 -0
  80. maxframe/codegen/spe/learn/contrib/tests/test_lightgbm.py +123 -0
  81. maxframe/codegen/spe/learn/contrib/tests/test_models.py +41 -0
  82. maxframe/codegen/spe/learn/contrib/tests/test_pytorch.py +53 -0
  83. maxframe/codegen/spe/learn/contrib/tests/test_xgboost.py +99 -0
  84. maxframe/codegen/spe/learn/contrib/xgboost.py +152 -0
  85. maxframe/codegen/spe/learn/metrics/__init__.py +15 -0
  86. maxframe/codegen/spe/learn/metrics/_classification.py +120 -0
  87. maxframe/codegen/spe/learn/metrics/_ranking.py +76 -0
  88. maxframe/codegen/spe/learn/metrics/pairwise.py +51 -0
  89. maxframe/codegen/spe/learn/metrics/tests/__init__.py +13 -0
  90. maxframe/codegen/spe/learn/metrics/tests/test_classification.py +93 -0
  91. maxframe/codegen/spe/learn/metrics/tests/test_pairwise.py +36 -0
  92. maxframe/codegen/spe/learn/metrics/tests/test_ranking.py +59 -0
  93. maxframe/codegen/spe/learn/model_selection/__init__.py +13 -0
  94. maxframe/codegen/spe/learn/model_selection/tests/__init__.py +13 -0
  95. maxframe/codegen/spe/learn/model_selection/tests/test_split.py +41 -0
  96. maxframe/codegen/spe/learn/preprocessing/__init__.py +15 -0
  97. maxframe/codegen/spe/learn/preprocessing/_data.py +37 -0
  98. maxframe/codegen/spe/learn/preprocessing/_label.py +47 -0
  99. maxframe/codegen/spe/learn/preprocessing/tests/__init__.py +13 -0
  100. maxframe/codegen/spe/learn/preprocessing/tests/test_data.py +31 -0
  101. maxframe/codegen/spe/learn/preprocessing/tests/test_label.py +43 -0
  102. maxframe/codegen/spe/learn/utils/__init__.py +15 -0
  103. maxframe/codegen/spe/learn/utils/checks.py +55 -0
  104. maxframe/codegen/spe/learn/utils/multiclass.py +60 -0
  105. maxframe/codegen/spe/learn/utils/shuffle.py +85 -0
  106. maxframe/codegen/spe/learn/utils/sparsefuncs.py +35 -0
  107. maxframe/codegen/spe/learn/utils/tests/__init__.py +13 -0
  108. maxframe/codegen/spe/learn/utils/tests/test_checks.py +48 -0
  109. maxframe/codegen/spe/learn/utils/tests/test_multiclass.py +52 -0
  110. maxframe/codegen/spe/learn/utils/tests/test_shuffle.py +50 -0
  111. maxframe/codegen/spe/learn/utils/tests/test_sparsefuncs.py +34 -0
  112. maxframe/codegen/spe/learn/utils/tests/test_validation.py +44 -0
  113. maxframe/codegen/spe/learn/utils/validation.py +35 -0
  114. maxframe/codegen/spe/objects.py +26 -0
  115. maxframe/codegen/spe/remote.py +29 -0
  116. maxframe/codegen/spe/tensor/__init__.py +31 -0
  117. maxframe/codegen/spe/tensor/arithmetic.py +95 -0
  118. maxframe/codegen/spe/tensor/core.py +41 -0
  119. maxframe/codegen/spe/tensor/datasource.py +166 -0
  120. maxframe/codegen/spe/tensor/extensions.py +35 -0
  121. maxframe/codegen/spe/tensor/fetch.py +26 -0
  122. maxframe/codegen/spe/tensor/fft.py +74 -0
  123. maxframe/codegen/spe/tensor/indexing.py +63 -0
  124. maxframe/codegen/spe/tensor/linalg.py +90 -0
  125. maxframe/codegen/spe/tensor/merge.py +31 -0
  126. maxframe/codegen/spe/tensor/misc.py +175 -0
  127. maxframe/codegen/spe/tensor/random.py +29 -0
  128. maxframe/codegen/spe/tensor/reduction.py +39 -0
  129. maxframe/codegen/spe/tensor/reshape.py +26 -0
  130. maxframe/codegen/spe/tensor/sort.py +42 -0
  131. maxframe/codegen/spe/tensor/spatial.py +45 -0
  132. maxframe/codegen/spe/tensor/special.py +35 -0
  133. maxframe/codegen/spe/tensor/statistics.py +68 -0
  134. maxframe/codegen/spe/tensor/tests/__init__.py +13 -0
  135. maxframe/codegen/spe/tensor/tests/test_arithmetic.py +103 -0
  136. maxframe/codegen/spe/tensor/tests/test_datasource.py +99 -0
  137. maxframe/codegen/spe/tensor/tests/test_extensions.py +37 -0
  138. maxframe/codegen/spe/tensor/tests/test_fft.py +64 -0
  139. maxframe/codegen/spe/tensor/tests/test_indexing.py +44 -0
  140. maxframe/codegen/spe/tensor/tests/test_linalg.py +52 -0
  141. maxframe/codegen/spe/tensor/tests/test_merge.py +28 -0
  142. maxframe/codegen/spe/tensor/tests/test_misc.py +144 -0
  143. maxframe/codegen/spe/tensor/tests/test_random.py +55 -0
  144. maxframe/codegen/spe/tensor/tests/test_reduction.py +65 -0
  145. maxframe/codegen/spe/tensor/tests/test_reshape.py +39 -0
  146. maxframe/codegen/spe/tensor/tests/test_sort.py +49 -0
  147. maxframe/codegen/spe/tensor/tests/test_spatial.py +33 -0
  148. maxframe/codegen/spe/tensor/tests/test_special.py +28 -0
  149. maxframe/codegen/spe/tensor/tests/test_statistics.py +43 -0
  150. maxframe/codegen/spe/tests/__init__.py +13 -0
  151. maxframe/codegen/spe/tests/test_remote.py +29 -0
  152. maxframe/codegen/spe/tests/test_spe_codegen.py +135 -0
  153. maxframe/codegen/spe/utils.py +56 -0
  154. maxframe/codegen/tests/__init__.py +13 -0
  155. maxframe/codegen/tests/test_codegen.py +67 -0
  156. maxframe/config/__init__.py +15 -0
  157. maxframe/config/config.py +630 -0
  158. maxframe/config/tests/__init__.py +13 -0
  159. maxframe/config/tests/test_config.py +114 -0
  160. maxframe/config/tests/test_validators.py +46 -0
  161. maxframe/config/validators.py +142 -0
  162. maxframe/conftest.py +261 -0
  163. maxframe/core/__init__.py +53 -0
  164. maxframe/core/accessor.py +45 -0
  165. maxframe/core/base.py +157 -0
  166. maxframe/core/context.py +110 -0
  167. maxframe/core/entity/__init__.py +34 -0
  168. maxframe/core/entity/core.py +150 -0
  169. maxframe/core/entity/executable.py +337 -0
  170. maxframe/core/entity/objects.py +115 -0
  171. maxframe/core/entity/output_types.py +98 -0
  172. maxframe/core/entity/tests/__init__.py +13 -0
  173. maxframe/core/entity/tests/test_objects.py +42 -0
  174. maxframe/core/entity/tileables.py +369 -0
  175. maxframe/core/entity/utils.py +39 -0
  176. maxframe/core/graph/__init__.py +22 -0
  177. maxframe/core/graph/builder/__init__.py +15 -0
  178. maxframe/core/graph/builder/base.py +91 -0
  179. maxframe/core/graph/builder/tileable.py +34 -0
  180. maxframe/core/graph/builder/utils.py +37 -0
  181. maxframe/core/graph/core.cpython-312-x86_64-linux-gnu.so +0 -0
  182. maxframe/core/graph/core.pyx +478 -0
  183. maxframe/core/graph/entity.py +164 -0
  184. maxframe/core/graph/tests/__init__.py +13 -0
  185. maxframe/core/graph/tests/test_graph.py +205 -0
  186. maxframe/core/mode.py +101 -0
  187. maxframe/core/operator/__init__.py +32 -0
  188. maxframe/core/operator/base.py +480 -0
  189. maxframe/core/operator/core.py +307 -0
  190. maxframe/core/operator/fetch.py +40 -0
  191. maxframe/core/operator/objects.py +43 -0
  192. maxframe/core/operator/shuffle.py +45 -0
  193. maxframe/core/operator/tests/__init__.py +13 -0
  194. maxframe/core/operator/tests/test_core.py +64 -0
  195. maxframe/core/operator/utils.py +68 -0
  196. maxframe/core/tests/__init__.py +13 -0
  197. maxframe/core/tests/test_mode.py +75 -0
  198. maxframe/dataframe/__init__.py +89 -0
  199. maxframe/dataframe/accessors/__init__.py +15 -0
  200. maxframe/dataframe/accessors/compat.py +45 -0
  201. maxframe/dataframe/accessors/datetime_/__init__.py +35 -0
  202. maxframe/dataframe/accessors/datetime_/accessor.py +67 -0
  203. maxframe/dataframe/accessors/datetime_/core.py +82 -0
  204. maxframe/dataframe/accessors/datetime_/tests/__init__.py +13 -0
  205. maxframe/dataframe/accessors/datetime_/tests/test_datetime_accessor.py +41 -0
  206. maxframe/dataframe/accessors/dict_/__init__.py +43 -0
  207. maxframe/dataframe/accessors/dict_/accessor.py +39 -0
  208. maxframe/dataframe/accessors/dict_/contains.py +72 -0
  209. maxframe/dataframe/accessors/dict_/core.py +48 -0
  210. maxframe/dataframe/accessors/dict_/getitem.py +140 -0
  211. maxframe/dataframe/accessors/dict_/length.py +64 -0
  212. maxframe/dataframe/accessors/dict_/remove.py +75 -0
  213. maxframe/dataframe/accessors/dict_/setitem.py +79 -0
  214. maxframe/dataframe/accessors/dict_/tests/__init__.py +13 -0
  215. maxframe/dataframe/accessors/dict_/tests/test_dict_accessor.py +168 -0
  216. maxframe/dataframe/accessors/list_/__init__.py +37 -0
  217. maxframe/dataframe/accessors/list_/accessor.py +39 -0
  218. maxframe/dataframe/accessors/list_/core.py +48 -0
  219. maxframe/dataframe/accessors/list_/getitem.py +128 -0
  220. maxframe/dataframe/accessors/list_/length.py +64 -0
  221. maxframe/dataframe/accessors/list_/tests/__init__.py +13 -0
  222. maxframe/dataframe/accessors/list_/tests/test_list_accessor.py +81 -0
  223. maxframe/dataframe/accessors/plotting/__init__.py +40 -0
  224. maxframe/dataframe/accessors/plotting/core.py +78 -0
  225. maxframe/dataframe/accessors/plotting/tests/__init__.py +13 -0
  226. maxframe/dataframe/accessors/plotting/tests/test_plotting_accessor.py +136 -0
  227. maxframe/dataframe/accessors/string_/__init__.py +36 -0
  228. maxframe/dataframe/accessors/string_/accessor.py +215 -0
  229. maxframe/dataframe/accessors/string_/core.py +224 -0
  230. maxframe/dataframe/accessors/string_/tests/__init__.py +13 -0
  231. maxframe/dataframe/accessors/string_/tests/test_string_accessor.py +73 -0
  232. maxframe/dataframe/accessors/struct_/__init__.py +37 -0
  233. maxframe/dataframe/accessors/struct_/accessor.py +39 -0
  234. maxframe/dataframe/accessors/struct_/core.py +43 -0
  235. maxframe/dataframe/accessors/struct_/dtypes.py +53 -0
  236. maxframe/dataframe/accessors/struct_/field.py +123 -0
  237. maxframe/dataframe/accessors/struct_/tests/__init__.py +13 -0
  238. maxframe/dataframe/accessors/struct_/tests/test_struct_accessor.py +91 -0
  239. maxframe/dataframe/arithmetic/__init__.py +373 -0
  240. maxframe/dataframe/arithmetic/abs.py +33 -0
  241. maxframe/dataframe/arithmetic/add.py +60 -0
  242. maxframe/dataframe/arithmetic/arccos.py +28 -0
  243. maxframe/dataframe/arithmetic/arccosh.py +28 -0
  244. maxframe/dataframe/arithmetic/arcsin.py +28 -0
  245. maxframe/dataframe/arithmetic/arcsinh.py +28 -0
  246. maxframe/dataframe/arithmetic/arctan.py +28 -0
  247. maxframe/dataframe/arithmetic/arctanh.py +28 -0
  248. maxframe/dataframe/arithmetic/between.py +106 -0
  249. maxframe/dataframe/arithmetic/bitwise_and.py +46 -0
  250. maxframe/dataframe/arithmetic/bitwise_or.py +50 -0
  251. maxframe/dataframe/arithmetic/bitwise_xor.py +46 -0
  252. maxframe/dataframe/arithmetic/ceil.py +28 -0
  253. maxframe/dataframe/arithmetic/core.py +361 -0
  254. maxframe/dataframe/arithmetic/cos.py +28 -0
  255. maxframe/dataframe/arithmetic/cosh.py +28 -0
  256. maxframe/dataframe/arithmetic/degrees.py +28 -0
  257. maxframe/dataframe/arithmetic/docstring.py +416 -0
  258. maxframe/dataframe/arithmetic/dot.py +237 -0
  259. maxframe/dataframe/arithmetic/equal.py +58 -0
  260. maxframe/dataframe/arithmetic/exp.py +28 -0
  261. maxframe/dataframe/arithmetic/exp2.py +28 -0
  262. maxframe/dataframe/arithmetic/expm1.py +28 -0
  263. maxframe/dataframe/arithmetic/floor.py +28 -0
  264. maxframe/dataframe/arithmetic/floordiv.py +64 -0
  265. maxframe/dataframe/arithmetic/greater.py +59 -0
  266. maxframe/dataframe/arithmetic/greater_equal.py +59 -0
  267. maxframe/dataframe/arithmetic/invert.py +33 -0
  268. maxframe/dataframe/arithmetic/is_ufuncs.py +62 -0
  269. maxframe/dataframe/arithmetic/less.py +57 -0
  270. maxframe/dataframe/arithmetic/less_equal.py +59 -0
  271. maxframe/dataframe/arithmetic/log.py +28 -0
  272. maxframe/dataframe/arithmetic/log10.py +28 -0
  273. maxframe/dataframe/arithmetic/log2.py +28 -0
  274. maxframe/dataframe/arithmetic/maximum.py +33 -0
  275. maxframe/dataframe/arithmetic/minimum.py +33 -0
  276. maxframe/dataframe/arithmetic/mod.py +60 -0
  277. maxframe/dataframe/arithmetic/multiply.py +60 -0
  278. maxframe/dataframe/arithmetic/negative.py +33 -0
  279. maxframe/dataframe/arithmetic/not_equal.py +58 -0
  280. maxframe/dataframe/arithmetic/power.py +68 -0
  281. maxframe/dataframe/arithmetic/radians.py +28 -0
  282. maxframe/dataframe/arithmetic/round.py +144 -0
  283. maxframe/dataframe/arithmetic/sin.py +28 -0
  284. maxframe/dataframe/arithmetic/sinh.py +28 -0
  285. maxframe/dataframe/arithmetic/sqrt.py +28 -0
  286. maxframe/dataframe/arithmetic/subtract.py +64 -0
  287. maxframe/dataframe/arithmetic/tan.py +28 -0
  288. maxframe/dataframe/arithmetic/tanh.py +28 -0
  289. maxframe/dataframe/arithmetic/tests/__init__.py +13 -0
  290. maxframe/dataframe/arithmetic/tests/test_arithmetic.py +724 -0
  291. maxframe/dataframe/arithmetic/truediv.py +64 -0
  292. maxframe/dataframe/arithmetic/trunc.py +28 -0
  293. maxframe/dataframe/core.py +2385 -0
  294. maxframe/dataframe/datasource/__init__.py +33 -0
  295. maxframe/dataframe/datasource/core.py +94 -0
  296. maxframe/dataframe/datasource/dataframe.py +59 -0
  297. maxframe/dataframe/datasource/date_range.py +512 -0
  298. maxframe/dataframe/datasource/direct.py +57 -0
  299. maxframe/dataframe/datasource/from_dict.py +124 -0
  300. maxframe/dataframe/datasource/from_index.py +58 -0
  301. maxframe/dataframe/datasource/from_records.py +191 -0
  302. maxframe/dataframe/datasource/from_tensor.py +498 -0
  303. maxframe/dataframe/datasource/index.py +117 -0
  304. maxframe/dataframe/datasource/read_csv.py +541 -0
  305. maxframe/dataframe/datasource/read_odps_query.py +536 -0
  306. maxframe/dataframe/datasource/read_odps_table.py +295 -0
  307. maxframe/dataframe/datasource/read_parquet.py +425 -0
  308. maxframe/dataframe/datasource/series.py +55 -0
  309. maxframe/dataframe/datasource/tests/__init__.py +13 -0
  310. maxframe/dataframe/datasource/tests/test_datasource.py +663 -0
  311. maxframe/dataframe/datastore/__init__.py +36 -0
  312. maxframe/dataframe/datastore/core.py +19 -0
  313. maxframe/dataframe/datastore/direct.py +268 -0
  314. maxframe/dataframe/datastore/tests/__init__.py +13 -0
  315. maxframe/dataframe/datastore/tests/test_to_odps.py +99 -0
  316. maxframe/dataframe/datastore/to_csv.py +219 -0
  317. maxframe/dataframe/datastore/to_odps.py +264 -0
  318. maxframe/dataframe/extensions/__init__.py +70 -0
  319. maxframe/dataframe/extensions/accessor.py +35 -0
  320. maxframe/dataframe/extensions/apply_chunk.py +733 -0
  321. maxframe/dataframe/extensions/cartesian_chunk.py +153 -0
  322. maxframe/dataframe/extensions/collect_kv.py +126 -0
  323. maxframe/dataframe/extensions/extract_kv.py +177 -0
  324. maxframe/dataframe/extensions/flatjson.py +133 -0
  325. maxframe/dataframe/extensions/flatmap.py +329 -0
  326. maxframe/dataframe/extensions/map_reduce.py +263 -0
  327. maxframe/dataframe/extensions/rebalance.py +62 -0
  328. maxframe/dataframe/extensions/reshuffle.py +83 -0
  329. maxframe/dataframe/extensions/tests/__init__.py +13 -0
  330. maxframe/dataframe/extensions/tests/test_apply_chunk.py +194 -0
  331. maxframe/dataframe/extensions/tests/test_extensions.py +198 -0
  332. maxframe/dataframe/extensions/tests/test_map_reduce.py +135 -0
  333. maxframe/dataframe/fetch/__init__.py +15 -0
  334. maxframe/dataframe/fetch/core.py +97 -0
  335. maxframe/dataframe/groupby/__init__.py +105 -0
  336. maxframe/dataframe/groupby/aggregation.py +441 -0
  337. maxframe/dataframe/groupby/apply.py +235 -0
  338. maxframe/dataframe/groupby/apply_chunk.py +407 -0
  339. maxframe/dataframe/groupby/core.py +342 -0
  340. maxframe/dataframe/groupby/cum.py +102 -0
  341. maxframe/dataframe/groupby/expanding.py +264 -0
  342. maxframe/dataframe/groupby/extensions.py +26 -0
  343. maxframe/dataframe/groupby/fill.py +149 -0
  344. maxframe/dataframe/groupby/getitem.py +105 -0
  345. maxframe/dataframe/groupby/head.py +115 -0
  346. maxframe/dataframe/groupby/rank.py +136 -0
  347. maxframe/dataframe/groupby/rolling.py +206 -0
  348. maxframe/dataframe/groupby/sample.py +214 -0
  349. maxframe/dataframe/groupby/shift.py +114 -0
  350. maxframe/dataframe/groupby/tests/__init__.py +13 -0
  351. maxframe/dataframe/groupby/tests/test_groupby.py +373 -0
  352. maxframe/dataframe/groupby/transform.py +264 -0
  353. maxframe/dataframe/indexing/__init__.py +104 -0
  354. maxframe/dataframe/indexing/add_prefix_suffix.py +110 -0
  355. maxframe/dataframe/indexing/align.py +350 -0
  356. maxframe/dataframe/indexing/at.py +83 -0
  357. maxframe/dataframe/indexing/droplevel.py +195 -0
  358. maxframe/dataframe/indexing/filter.py +169 -0
  359. maxframe/dataframe/indexing/get_level_values.py +76 -0
  360. maxframe/dataframe/indexing/getitem.py +205 -0
  361. maxframe/dataframe/indexing/iat.py +82 -0
  362. maxframe/dataframe/indexing/iloc.py +711 -0
  363. maxframe/dataframe/indexing/insert.py +118 -0
  364. maxframe/dataframe/indexing/loc.py +694 -0
  365. maxframe/dataframe/indexing/reindex.py +541 -0
  366. maxframe/dataframe/indexing/rename.py +445 -0
  367. maxframe/dataframe/indexing/rename_axis.py +217 -0
  368. maxframe/dataframe/indexing/reorder_levels.py +143 -0
  369. maxframe/dataframe/indexing/reset_index.py +427 -0
  370. maxframe/dataframe/indexing/sample.py +232 -0
  371. maxframe/dataframe/indexing/set_axis.py +197 -0
  372. maxframe/dataframe/indexing/set_index.py +128 -0
  373. maxframe/dataframe/indexing/setitem.py +133 -0
  374. maxframe/dataframe/indexing/swaplevel.py +185 -0
  375. maxframe/dataframe/indexing/take.py +99 -0
  376. maxframe/dataframe/indexing/tests/__init__.py +13 -0
  377. maxframe/dataframe/indexing/tests/test_indexing.py +488 -0
  378. maxframe/dataframe/indexing/truncate.py +140 -0
  379. maxframe/dataframe/indexing/where.py +300 -0
  380. maxframe/dataframe/indexing/xs.py +148 -0
  381. maxframe/dataframe/initializer.py +298 -0
  382. maxframe/dataframe/merge/__init__.py +53 -0
  383. maxframe/dataframe/merge/append.py +120 -0
  384. maxframe/dataframe/merge/combine.py +244 -0
  385. maxframe/dataframe/merge/combine_first.py +120 -0
  386. maxframe/dataframe/merge/compare.py +387 -0
  387. maxframe/dataframe/merge/concat.py +500 -0
  388. maxframe/dataframe/merge/merge.py +806 -0
  389. maxframe/dataframe/merge/tests/__init__.py +13 -0
  390. maxframe/dataframe/merge/tests/test_merge.py +390 -0
  391. maxframe/dataframe/merge/update.py +271 -0
  392. maxframe/dataframe/misc/__init__.py +142 -0
  393. maxframe/dataframe/misc/_duplicate.py +56 -0
  394. maxframe/dataframe/misc/apply.py +730 -0
  395. maxframe/dataframe/misc/astype.py +237 -0
  396. maxframe/dataframe/misc/case_when.py +145 -0
  397. maxframe/dataframe/misc/check_monotonic.py +84 -0
  398. maxframe/dataframe/misc/check_unique.py +82 -0
  399. maxframe/dataframe/misc/clip.py +145 -0
  400. maxframe/dataframe/misc/cut.py +386 -0
  401. maxframe/dataframe/misc/describe.py +278 -0
  402. maxframe/dataframe/misc/diff.py +210 -0
  403. maxframe/dataframe/misc/drop.py +473 -0
  404. maxframe/dataframe/misc/drop_duplicates.py +251 -0
  405. maxframe/dataframe/misc/duplicated.py +292 -0
  406. maxframe/dataframe/misc/eval.py +730 -0
  407. maxframe/dataframe/misc/explode.py +171 -0
  408. maxframe/dataframe/misc/get_dummies.py +241 -0
  409. maxframe/dataframe/misc/infer_dtypes.py +251 -0
  410. maxframe/dataframe/misc/isin.py +220 -0
  411. maxframe/dataframe/misc/map.py +360 -0
  412. maxframe/dataframe/misc/memory_usage.py +248 -0
  413. maxframe/dataframe/misc/pct_change.py +68 -0
  414. maxframe/dataframe/misc/qcut.py +104 -0
  415. maxframe/dataframe/misc/rechunk.py +59 -0
  416. maxframe/dataframe/misc/repeat.py +159 -0
  417. maxframe/dataframe/misc/select_dtypes.py +104 -0
  418. maxframe/dataframe/misc/shift.py +259 -0
  419. maxframe/dataframe/misc/tests/__init__.py +13 -0
  420. maxframe/dataframe/misc/tests/test_misc.py +649 -0
  421. maxframe/dataframe/misc/to_numeric.py +181 -0
  422. maxframe/dataframe/misc/transform.py +346 -0
  423. maxframe/dataframe/misc/transpose.py +148 -0
  424. maxframe/dataframe/misc/valid_index.py +115 -0
  425. maxframe/dataframe/misc/value_counts.py +206 -0
  426. maxframe/dataframe/missing/__init__.py +53 -0
  427. maxframe/dataframe/missing/checkna.py +231 -0
  428. maxframe/dataframe/missing/dropna.py +294 -0
  429. maxframe/dataframe/missing/fillna.py +283 -0
  430. maxframe/dataframe/missing/replace.py +446 -0
  431. maxframe/dataframe/missing/tests/__init__.py +13 -0
  432. maxframe/dataframe/missing/tests/test_missing.py +90 -0
  433. maxframe/dataframe/operators.py +231 -0
  434. maxframe/dataframe/reduction/__init__.py +129 -0
  435. maxframe/dataframe/reduction/aggregation.py +499 -0
  436. maxframe/dataframe/reduction/all.py +78 -0
  437. maxframe/dataframe/reduction/any.py +78 -0
  438. maxframe/dataframe/reduction/argmax.py +103 -0
  439. maxframe/dataframe/reduction/argmin.py +103 -0
  440. maxframe/dataframe/reduction/core.py +907 -0
  441. maxframe/dataframe/reduction/count.py +63 -0
  442. maxframe/dataframe/reduction/cov.py +166 -0
  443. maxframe/dataframe/reduction/cummax.py +30 -0
  444. maxframe/dataframe/reduction/cummin.py +30 -0
  445. maxframe/dataframe/reduction/cumprod.py +30 -0
  446. maxframe/dataframe/reduction/cumsum.py +30 -0
  447. maxframe/dataframe/reduction/custom_reduction.py +42 -0
  448. maxframe/dataframe/reduction/idxmax.py +185 -0
  449. maxframe/dataframe/reduction/idxmin.py +185 -0
  450. maxframe/dataframe/reduction/kurtosis.py +111 -0
  451. maxframe/dataframe/reduction/max.py +65 -0
  452. maxframe/dataframe/reduction/mean.py +63 -0
  453. maxframe/dataframe/reduction/median.py +56 -0
  454. maxframe/dataframe/reduction/min.py +65 -0
  455. maxframe/dataframe/reduction/mode.py +144 -0
  456. maxframe/dataframe/reduction/nunique.py +149 -0
  457. maxframe/dataframe/reduction/prod.py +81 -0
  458. maxframe/dataframe/reduction/reduction_size.py +36 -0
  459. maxframe/dataframe/reduction/sem.py +73 -0
  460. maxframe/dataframe/reduction/skew.py +93 -0
  461. maxframe/dataframe/reduction/std.py +53 -0
  462. maxframe/dataframe/reduction/str_concat.py +51 -0
  463. maxframe/dataframe/reduction/sum.py +81 -0
  464. maxframe/dataframe/reduction/tests/__init__.py +13 -0
  465. maxframe/dataframe/reduction/tests/test_reduction.py +541 -0
  466. maxframe/dataframe/reduction/unique.py +153 -0
  467. maxframe/dataframe/reduction/var.py +76 -0
  468. maxframe/dataframe/reshape/__init__.py +38 -0
  469. maxframe/dataframe/reshape/melt.py +169 -0
  470. maxframe/dataframe/reshape/pivot.py +233 -0
  471. maxframe/dataframe/reshape/pivot_table.py +275 -0
  472. maxframe/dataframe/reshape/stack.py +240 -0
  473. maxframe/dataframe/reshape/unstack.py +114 -0
  474. maxframe/dataframe/sort/__init__.py +49 -0
  475. maxframe/dataframe/sort/argsort.py +68 -0
  476. maxframe/dataframe/sort/core.py +37 -0
  477. maxframe/dataframe/sort/nlargest.py +238 -0
  478. maxframe/dataframe/sort/nsmallest.py +228 -0
  479. maxframe/dataframe/sort/rank.py +147 -0
  480. maxframe/dataframe/sort/sort_index.py +153 -0
  481. maxframe/dataframe/sort/sort_values.py +301 -0
  482. maxframe/dataframe/sort/tests/__init__.py +13 -0
  483. maxframe/dataframe/sort/tests/test_sort.py +81 -0
  484. maxframe/dataframe/statistics/__init__.py +33 -0
  485. maxframe/dataframe/statistics/corr.py +284 -0
  486. maxframe/dataframe/statistics/quantile.py +338 -0
  487. maxframe/dataframe/statistics/tests/__init__.py +13 -0
  488. maxframe/dataframe/statistics/tests/test_statistics.py +82 -0
  489. maxframe/dataframe/tests/__init__.py +13 -0
  490. maxframe/dataframe/tests/test_initializer.py +60 -0
  491. maxframe/dataframe/tests/test_typing.py +119 -0
  492. maxframe/dataframe/tests/test_utils.py +165 -0
  493. maxframe/dataframe/tseries/__init__.py +32 -0
  494. maxframe/dataframe/tseries/at_time.py +61 -0
  495. maxframe/dataframe/tseries/between_time.py +122 -0
  496. maxframe/dataframe/tseries/tests/__init__.py +13 -0
  497. maxframe/dataframe/tseries/tests/test_tseries.py +30 -0
  498. maxframe/dataframe/tseries/to_datetime.py +299 -0
  499. maxframe/dataframe/typing_.py +196 -0
  500. maxframe/dataframe/ufunc/__init__.py +27 -0
  501. maxframe/dataframe/ufunc/tensor.py +54 -0
  502. maxframe/dataframe/ufunc/ufunc.py +53 -0
  503. maxframe/dataframe/utils.py +1651 -0
  504. maxframe/dataframe/window/__init__.py +29 -0
  505. maxframe/dataframe/window/aggregation.py +100 -0
  506. maxframe/dataframe/window/core.py +82 -0
  507. maxframe/dataframe/window/ewm.py +247 -0
  508. maxframe/dataframe/window/expanding.py +151 -0
  509. maxframe/dataframe/window/rolling.py +389 -0
  510. maxframe/dataframe/window/tests/__init__.py +13 -0
  511. maxframe/dataframe/window/tests/test_ewm.py +70 -0
  512. maxframe/dataframe/window/tests/test_expanding.py +60 -0
  513. maxframe/dataframe/window/tests/test_rolling.py +57 -0
  514. maxframe/env.py +37 -0
  515. maxframe/errors.py +47 -0
  516. maxframe/extension.py +107 -0
  517. maxframe/io/__init__.py +13 -0
  518. maxframe/io/objects/__init__.py +24 -0
  519. maxframe/io/objects/core.py +156 -0
  520. maxframe/io/objects/tensor.py +132 -0
  521. maxframe/io/objects/tests/__init__.py +13 -0
  522. maxframe/io/objects/tests/test_object_io.py +79 -0
  523. maxframe/io/odpsio/__init__.py +23 -0
  524. maxframe/io/odpsio/arrow.py +161 -0
  525. maxframe/io/odpsio/schema.py +496 -0
  526. maxframe/io/odpsio/tableio.py +727 -0
  527. maxframe/io/odpsio/tests/__init__.py +13 -0
  528. maxframe/io/odpsio/tests/test_arrow.py +132 -0
  529. maxframe/io/odpsio/tests/test_schema.py +580 -0
  530. maxframe/io/odpsio/tests/test_tableio.py +205 -0
  531. maxframe/io/odpsio/tests/test_volumeio.py +75 -0
  532. maxframe/io/odpsio/volumeio.py +102 -0
  533. maxframe/learn/__init__.py +25 -0
  534. maxframe/learn/cluster/__init__.py +15 -0
  535. maxframe/learn/cluster/_kmeans.py +782 -0
  536. maxframe/learn/contrib/__init__.py +17 -0
  537. maxframe/learn/contrib/graph/__init__.py +15 -0
  538. maxframe/learn/contrib/graph/connected_components.py +216 -0
  539. maxframe/learn/contrib/graph/tests/__init__.py +13 -0
  540. maxframe/learn/contrib/graph/tests/test_connected_components.py +53 -0
  541. maxframe/learn/contrib/lightgbm/__init__.py +33 -0
  542. maxframe/learn/contrib/lightgbm/_predict.py +138 -0
  543. maxframe/learn/contrib/lightgbm/_train.py +163 -0
  544. maxframe/learn/contrib/lightgbm/callback.py +114 -0
  545. maxframe/learn/contrib/lightgbm/classifier.py +199 -0
  546. maxframe/learn/contrib/lightgbm/core.py +372 -0
  547. maxframe/learn/contrib/lightgbm/dataset.py +153 -0
  548. maxframe/learn/contrib/lightgbm/regressor.py +29 -0
  549. maxframe/learn/contrib/lightgbm/tests/__init__.py +13 -0
  550. maxframe/learn/contrib/lightgbm/tests/test_callback.py +58 -0
  551. maxframe/learn/contrib/llm/__init__.py +17 -0
  552. maxframe/learn/contrib/llm/core.py +86 -0
  553. maxframe/learn/contrib/llm/deploy/__init__.py +13 -0
  554. maxframe/learn/contrib/llm/deploy/config.py +221 -0
  555. maxframe/learn/contrib/llm/deploy/core.py +247 -0
  556. maxframe/learn/contrib/llm/deploy/framework.py +35 -0
  557. maxframe/learn/contrib/llm/deploy/loader.py +360 -0
  558. maxframe/learn/contrib/llm/deploy/tests/__init__.py +13 -0
  559. maxframe/learn/contrib/llm/deploy/tests/test_register_models.py +359 -0
  560. maxframe/learn/contrib/llm/models/__init__.py +16 -0
  561. maxframe/learn/contrib/llm/models/dashscope.py +114 -0
  562. maxframe/learn/contrib/llm/models/managed.py +119 -0
  563. maxframe/learn/contrib/llm/models/openai.py +72 -0
  564. maxframe/learn/contrib/llm/multi_modal.py +135 -0
  565. maxframe/learn/contrib/llm/tests/__init__.py +13 -0
  566. maxframe/learn/contrib/llm/tests/test_core.py +34 -0
  567. maxframe/learn/contrib/llm/tests/test_openai.py +187 -0
  568. maxframe/learn/contrib/llm/tests/test_text_gen.py +155 -0
  569. maxframe/learn/contrib/llm/text.py +608 -0
  570. maxframe/learn/contrib/models.py +109 -0
  571. maxframe/learn/contrib/pytorch/__init__.py +16 -0
  572. maxframe/learn/contrib/pytorch/run_function.py +110 -0
  573. maxframe/learn/contrib/pytorch/run_script.py +102 -0
  574. maxframe/learn/contrib/pytorch/tests/__init__.py +13 -0
  575. maxframe/learn/contrib/pytorch/tests/test_pytorch.py +42 -0
  576. maxframe/learn/contrib/utils.py +108 -0
  577. maxframe/learn/contrib/xgboost/__init__.py +33 -0
  578. maxframe/learn/contrib/xgboost/callback.py +86 -0
  579. maxframe/learn/contrib/xgboost/classifier.py +119 -0
  580. maxframe/learn/contrib/xgboost/core.py +469 -0
  581. maxframe/learn/contrib/xgboost/dmatrix.py +157 -0
  582. maxframe/learn/contrib/xgboost/predict.py +133 -0
  583. maxframe/learn/contrib/xgboost/regressor.py +91 -0
  584. maxframe/learn/contrib/xgboost/tests/__init__.py +13 -0
  585. maxframe/learn/contrib/xgboost/tests/test_callback.py +41 -0
  586. maxframe/learn/contrib/xgboost/tests/test_core.py +43 -0
  587. maxframe/learn/contrib/xgboost/train.py +181 -0
  588. maxframe/learn/core.py +344 -0
  589. maxframe/learn/datasets/__init__.py +20 -0
  590. maxframe/learn/datasets/samples_generator.py +628 -0
  591. maxframe/learn/linear_model/__init__.py +15 -0
  592. maxframe/learn/linear_model/_base.py +220 -0
  593. maxframe/learn/linear_model/_lin_reg.py +175 -0
  594. maxframe/learn/metrics/__init__.py +31 -0
  595. maxframe/learn/metrics/_check_targets.py +95 -0
  596. maxframe/learn/metrics/_classification.py +1266 -0
  597. maxframe/learn/metrics/_ranking.py +477 -0
  598. maxframe/learn/metrics/_regression.py +256 -0
  599. maxframe/learn/metrics/_scorer.py +60 -0
  600. maxframe/learn/metrics/pairwise/__init__.py +21 -0
  601. maxframe/learn/metrics/pairwise/core.py +77 -0
  602. maxframe/learn/metrics/pairwise/cosine.py +115 -0
  603. maxframe/learn/metrics/pairwise/euclidean.py +176 -0
  604. maxframe/learn/metrics/pairwise/haversine.py +96 -0
  605. maxframe/learn/metrics/pairwise/manhattan.py +80 -0
  606. maxframe/learn/metrics/pairwise/pairwise.py +127 -0
  607. maxframe/learn/metrics/pairwise/pairwise_distances_topk.py +121 -0
  608. maxframe/learn/metrics/pairwise/rbf_kernel.py +51 -0
  609. maxframe/learn/metrics/tests/__init__.py +13 -0
  610. maxframe/learn/metrics/tests/test_scorer.py +26 -0
  611. maxframe/learn/model_selection/__init__.py +15 -0
  612. maxframe/learn/model_selection/_split.py +451 -0
  613. maxframe/learn/model_selection/tests/__init__.py +13 -0
  614. maxframe/learn/model_selection/tests/test_split.py +156 -0
  615. maxframe/learn/preprocessing/__init__.py +16 -0
  616. maxframe/learn/preprocessing/_data/__init__.py +17 -0
  617. maxframe/learn/preprocessing/_data/min_max_scaler.py +401 -0
  618. maxframe/learn/preprocessing/_data/normalize.py +127 -0
  619. maxframe/learn/preprocessing/_data/standard_scaler.py +512 -0
  620. maxframe/learn/preprocessing/_data/utils.py +79 -0
  621. maxframe/learn/preprocessing/_label/__init__.py +16 -0
  622. maxframe/learn/preprocessing/_label/_label_binarizer.py +599 -0
  623. maxframe/learn/preprocessing/_label/_label_encoder.py +174 -0
  624. maxframe/learn/utils/__init__.py +20 -0
  625. maxframe/learn/utils/_encode.py +314 -0
  626. maxframe/learn/utils/checks.py +160 -0
  627. maxframe/learn/utils/core.py +121 -0
  628. maxframe/learn/utils/extmath.py +246 -0
  629. maxframe/learn/utils/multiclass.py +292 -0
  630. maxframe/learn/utils/odpsio.py +262 -0
  631. maxframe/learn/utils/shuffle.py +114 -0
  632. maxframe/learn/utils/sparsefuncs.py +87 -0
  633. maxframe/learn/utils/validation.py +775 -0
  634. maxframe/lib/__init__.py +13 -0
  635. maxframe/lib/aio/__init__.py +27 -0
  636. maxframe/lib/aio/_runners.py +162 -0
  637. maxframe/lib/aio/_threads.py +35 -0
  638. maxframe/lib/aio/base.py +82 -0
  639. maxframe/lib/aio/file.py +85 -0
  640. maxframe/lib/aio/isolation.py +100 -0
  641. maxframe/lib/aio/lru.py +242 -0
  642. maxframe/lib/aio/parallelism.py +37 -0
  643. maxframe/lib/aio/tests/__init__.py +13 -0
  644. maxframe/lib/aio/tests/test_aio_file.py +55 -0
  645. maxframe/lib/compat.py +185 -0
  646. maxframe/lib/compression.py +55 -0
  647. maxframe/lib/cython/__init__.py +13 -0
  648. maxframe/lib/cython/libcpp.pxd +30 -0
  649. maxframe/lib/dtypes_extension/__init__.py +30 -0
  650. maxframe/lib/dtypes_extension/_fake_arrow_dtype.py +604 -0
  651. maxframe/lib/dtypes_extension/blob.py +304 -0
  652. maxframe/lib/dtypes_extension/dtypes.py +106 -0
  653. maxframe/lib/dtypes_extension/tests/__init__.py +13 -0
  654. maxframe/lib/dtypes_extension/tests/test_blob.py +88 -0
  655. maxframe/lib/dtypes_extension/tests/test_dtypes.py +63 -0
  656. maxframe/lib/dtypes_extension/tests/test_fake_arrow_dtype.py +75 -0
  657. maxframe/lib/filesystem/__init__.py +21 -0
  658. maxframe/lib/filesystem/_glob.py +173 -0
  659. maxframe/lib/filesystem/_oss_lib/__init__.py +13 -0
  660. maxframe/lib/filesystem/_oss_lib/common.py +272 -0
  661. maxframe/lib/filesystem/_oss_lib/glob.py +147 -0
  662. maxframe/lib/filesystem/_oss_lib/handle.py +152 -0
  663. maxframe/lib/filesystem/arrow.py +236 -0
  664. maxframe/lib/filesystem/base.py +263 -0
  665. maxframe/lib/filesystem/core.py +95 -0
  666. maxframe/lib/filesystem/fsmap.py +164 -0
  667. maxframe/lib/filesystem/hdfs.py +31 -0
  668. maxframe/lib/filesystem/local.py +112 -0
  669. maxframe/lib/filesystem/oss.py +226 -0
  670. maxframe/lib/filesystem/tests/__init__.py +13 -0
  671. maxframe/lib/filesystem/tests/test_filesystem.py +225 -0
  672. maxframe/lib/filesystem/tests/test_oss.py +220 -0
  673. maxframe/lib/functools_compat.py +81 -0
  674. maxframe/lib/mmh3.cpython-312-x86_64-linux-gnu.so +0 -0
  675. maxframe/lib/mmh3.pyi +43 -0
  676. maxframe/lib/mmh3_src/MurmurHash3.cpp +339 -0
  677. maxframe/lib/mmh3_src/MurmurHash3.h +43 -0
  678. maxframe/lib/mmh3_src/mmh3module.cpp +387 -0
  679. maxframe/lib/sparse/__init__.py +856 -0
  680. maxframe/lib/sparse/array.py +1616 -0
  681. maxframe/lib/sparse/core.py +90 -0
  682. maxframe/lib/sparse/linalg.py +31 -0
  683. maxframe/lib/sparse/matrix.py +244 -0
  684. maxframe/lib/sparse/tests/__init__.py +13 -0
  685. maxframe/lib/sparse/tests/test_sparse.py +476 -0
  686. maxframe/lib/sparse/vector.py +148 -0
  687. maxframe/lib/tblib/LICENSE +20 -0
  688. maxframe/lib/tblib/__init__.py +327 -0
  689. maxframe/lib/tblib/cpython.py +83 -0
  690. maxframe/lib/tblib/decorators.py +44 -0
  691. maxframe/lib/tblib/pickling_support.py +90 -0
  692. maxframe/lib/tests/__init__.py +13 -0
  693. maxframe/lib/tests/test_wrapped_pickle.py +51 -0
  694. maxframe/lib/version.py +620 -0
  695. maxframe/lib/wrapped_pickle.py +150 -0
  696. maxframe/mixin.py +157 -0
  697. maxframe/opcodes.py +657 -0
  698. maxframe/protocol.py +607 -0
  699. maxframe/remote/__init__.py +18 -0
  700. maxframe/remote/core.py +212 -0
  701. maxframe/remote/run_script.py +124 -0
  702. maxframe/serialization/__init__.py +39 -0
  703. maxframe/serialization/arrow.py +120 -0
  704. maxframe/serialization/blob.py +32 -0
  705. maxframe/serialization/core.cpython-312-x86_64-linux-gnu.so +0 -0
  706. maxframe/serialization/core.pxd +50 -0
  707. maxframe/serialization/core.pyi +66 -0
  708. maxframe/serialization/core.pyx +1265 -0
  709. maxframe/serialization/exception.py +84 -0
  710. maxframe/serialization/maxframe_objects.py +39 -0
  711. maxframe/serialization/numpy.py +110 -0
  712. maxframe/serialization/pandas.py +278 -0
  713. maxframe/serialization/scipy.py +71 -0
  714. maxframe/serialization/serializables/__init__.py +55 -0
  715. maxframe/serialization/serializables/core.py +469 -0
  716. maxframe/serialization/serializables/field.py +624 -0
  717. maxframe/serialization/serializables/field_type.py +592 -0
  718. maxframe/serialization/serializables/tests/__init__.py +13 -0
  719. maxframe/serialization/serializables/tests/test_field_type.py +119 -0
  720. maxframe/serialization/serializables/tests/test_serializable.py +313 -0
  721. maxframe/serialization/tests/__init__.py +13 -0
  722. maxframe/serialization/tests/test_serial.py +487 -0
  723. maxframe/session.py +1250 -0
  724. maxframe/sperunner.py +165 -0
  725. maxframe/tensor/__init__.py +325 -0
  726. maxframe/tensor/arithmetic/__init__.py +322 -0
  727. maxframe/tensor/arithmetic/abs.py +66 -0
  728. maxframe/tensor/arithmetic/absolute.py +66 -0
  729. maxframe/tensor/arithmetic/add.py +112 -0
  730. maxframe/tensor/arithmetic/angle.py +70 -0
  731. maxframe/tensor/arithmetic/arccos.py +101 -0
  732. maxframe/tensor/arithmetic/arccosh.py +89 -0
  733. maxframe/tensor/arithmetic/arcsin.py +92 -0
  734. maxframe/tensor/arithmetic/arcsinh.py +84 -0
  735. maxframe/tensor/arithmetic/arctan.py +104 -0
  736. maxframe/tensor/arithmetic/arctan2.py +126 -0
  737. maxframe/tensor/arithmetic/arctanh.py +84 -0
  738. maxframe/tensor/arithmetic/around.py +112 -0
  739. maxframe/tensor/arithmetic/bitand.py +93 -0
  740. maxframe/tensor/arithmetic/bitor.py +100 -0
  741. maxframe/tensor/arithmetic/bitxor.py +93 -0
  742. maxframe/tensor/arithmetic/cbrt.py +64 -0
  743. maxframe/tensor/arithmetic/ceil.py +69 -0
  744. maxframe/tensor/arithmetic/clip.py +165 -0
  745. maxframe/tensor/arithmetic/conj.py +72 -0
  746. maxframe/tensor/arithmetic/copysign.py +76 -0
  747. maxframe/tensor/arithmetic/core.py +552 -0
  748. maxframe/tensor/arithmetic/cos.py +83 -0
  749. maxframe/tensor/arithmetic/cosh.py +70 -0
  750. maxframe/tensor/arithmetic/deg2rad.py +70 -0
  751. maxframe/tensor/arithmetic/degrees.py +75 -0
  752. maxframe/tensor/arithmetic/divide.py +112 -0
  753. maxframe/tensor/arithmetic/equal.py +74 -0
  754. maxframe/tensor/arithmetic/exp.py +104 -0
  755. maxframe/tensor/arithmetic/exp2.py +65 -0
  756. maxframe/tensor/arithmetic/expm1.py +77 -0
  757. maxframe/tensor/arithmetic/fabs.py +72 -0
  758. maxframe/tensor/arithmetic/fix.py +67 -0
  759. maxframe/tensor/arithmetic/float_power.py +101 -0
  760. maxframe/tensor/arithmetic/floor.py +75 -0
  761. maxframe/tensor/arithmetic/floordiv.py +92 -0
  762. maxframe/tensor/arithmetic/fmax.py +103 -0
  763. maxframe/tensor/arithmetic/fmin.py +104 -0
  764. maxframe/tensor/arithmetic/fmod.py +97 -0
  765. maxframe/tensor/arithmetic/frexp.py +96 -0
  766. maxframe/tensor/arithmetic/greater.py +75 -0
  767. maxframe/tensor/arithmetic/greater_equal.py +67 -0
  768. maxframe/tensor/arithmetic/hypot.py +75 -0
  769. maxframe/tensor/arithmetic/i0.py +87 -0
  770. maxframe/tensor/arithmetic/imag.py +65 -0
  771. maxframe/tensor/arithmetic/invert.py +108 -0
  772. maxframe/tensor/arithmetic/isclose.py +114 -0
  773. maxframe/tensor/arithmetic/iscomplex.py +62 -0
  774. maxframe/tensor/arithmetic/iscomplexobj.py +53 -0
  775. maxframe/tensor/arithmetic/isfinite.py +104 -0
  776. maxframe/tensor/arithmetic/isinf.py +101 -0
  777. maxframe/tensor/arithmetic/isnan.py +80 -0
  778. maxframe/tensor/arithmetic/isreal.py +61 -0
  779. maxframe/tensor/arithmetic/ldexp.py +97 -0
  780. maxframe/tensor/arithmetic/less.py +67 -0
  781. maxframe/tensor/arithmetic/less_equal.py +67 -0
  782. maxframe/tensor/arithmetic/log.py +90 -0
  783. maxframe/tensor/arithmetic/log10.py +83 -0
  784. maxframe/tensor/arithmetic/log1p.py +93 -0
  785. maxframe/tensor/arithmetic/log2.py +83 -0
  786. maxframe/tensor/arithmetic/logaddexp.py +78 -0
  787. maxframe/tensor/arithmetic/logaddexp2.py +76 -0
  788. maxframe/tensor/arithmetic/logical_and.py +79 -0
  789. maxframe/tensor/arithmetic/logical_not.py +72 -0
  790. maxframe/tensor/arithmetic/logical_or.py +80 -0
  791. maxframe/tensor/arithmetic/logical_xor.py +86 -0
  792. maxframe/tensor/arithmetic/lshift.py +80 -0
  793. maxframe/tensor/arithmetic/maximum.py +106 -0
  794. maxframe/tensor/arithmetic/minimum.py +106 -0
  795. maxframe/tensor/arithmetic/mod.py +102 -0
  796. maxframe/tensor/arithmetic/modf.py +87 -0
  797. maxframe/tensor/arithmetic/multiply.py +114 -0
  798. maxframe/tensor/arithmetic/nan_to_num.py +97 -0
  799. maxframe/tensor/arithmetic/negative.py +63 -0
  800. maxframe/tensor/arithmetic/nextafter.py +66 -0
  801. maxframe/tensor/arithmetic/not_equal.py +70 -0
  802. maxframe/tensor/arithmetic/positive.py +45 -0
  803. maxframe/tensor/arithmetic/power.py +104 -0
  804. maxframe/tensor/arithmetic/rad2deg.py +69 -0
  805. maxframe/tensor/arithmetic/radians.py +75 -0
  806. maxframe/tensor/arithmetic/real.py +68 -0
  807. maxframe/tensor/arithmetic/reciprocal.py +78 -0
  808. maxframe/tensor/arithmetic/rint.py +66 -0
  809. maxframe/tensor/arithmetic/rshift.py +79 -0
  810. maxframe/tensor/arithmetic/setimag.py +27 -0
  811. maxframe/tensor/arithmetic/setreal.py +27 -0
  812. maxframe/tensor/arithmetic/sign.py +79 -0
  813. maxframe/tensor/arithmetic/signbit.py +63 -0
  814. maxframe/tensor/arithmetic/sin.py +96 -0
  815. maxframe/tensor/arithmetic/sinc.py +100 -0
  816. maxframe/tensor/arithmetic/sinh.py +91 -0
  817. maxframe/tensor/arithmetic/spacing.py +70 -0
  818. maxframe/tensor/arithmetic/sqrt.py +79 -0
  819. maxframe/tensor/arithmetic/square.py +67 -0
  820. maxframe/tensor/arithmetic/subtract.py +83 -0
  821. maxframe/tensor/arithmetic/tan.py +86 -0
  822. maxframe/tensor/arithmetic/tanh.py +90 -0
  823. maxframe/tensor/arithmetic/tests/__init__.py +13 -0
  824. maxframe/tensor/arithmetic/tests/test_arithmetic.py +449 -0
  825. maxframe/tensor/arithmetic/truediv.py +102 -0
  826. maxframe/tensor/arithmetic/trunc.py +70 -0
  827. maxframe/tensor/arithmetic/utils.py +91 -0
  828. maxframe/tensor/array_utils.py +164 -0
  829. maxframe/tensor/core.py +597 -0
  830. maxframe/tensor/datasource/__init__.py +40 -0
  831. maxframe/tensor/datasource/arange.py +154 -0
  832. maxframe/tensor/datasource/array.py +399 -0
  833. maxframe/tensor/datasource/core.py +114 -0
  834. maxframe/tensor/datasource/diag.py +140 -0
  835. maxframe/tensor/datasource/diagflat.py +69 -0
  836. maxframe/tensor/datasource/empty.py +167 -0
  837. maxframe/tensor/datasource/eye.py +95 -0
  838. maxframe/tensor/datasource/from_dataframe.py +68 -0
  839. maxframe/tensor/datasource/from_dense.py +37 -0
  840. maxframe/tensor/datasource/from_sparse.py +45 -0
  841. maxframe/tensor/datasource/full.py +184 -0
  842. maxframe/tensor/datasource/identity.py +54 -0
  843. maxframe/tensor/datasource/indices.py +115 -0
  844. maxframe/tensor/datasource/linspace.py +140 -0
  845. maxframe/tensor/datasource/meshgrid.py +135 -0
  846. maxframe/tensor/datasource/ones.py +178 -0
  847. maxframe/tensor/datasource/scalar.py +40 -0
  848. maxframe/tensor/datasource/tests/__init__.py +13 -0
  849. maxframe/tensor/datasource/tests/test_datasource.py +310 -0
  850. maxframe/tensor/datasource/tri_array.py +107 -0
  851. maxframe/tensor/datasource/zeros.py +192 -0
  852. maxframe/tensor/extensions/__init__.py +33 -0
  853. maxframe/tensor/extensions/accessor.py +25 -0
  854. maxframe/tensor/extensions/apply_chunk.py +137 -0
  855. maxframe/tensor/extensions/rebalance.py +65 -0
  856. maxframe/tensor/fetch/__init__.py +15 -0
  857. maxframe/tensor/fetch/core.py +54 -0
  858. maxframe/tensor/fft/__init__.py +32 -0
  859. maxframe/tensor/fft/core.py +168 -0
  860. maxframe/tensor/fft/fft.py +112 -0
  861. maxframe/tensor/fft/fft2.py +118 -0
  862. maxframe/tensor/fft/fftfreq.py +80 -0
  863. maxframe/tensor/fft/fftn.py +123 -0
  864. maxframe/tensor/fft/fftshift.py +79 -0
  865. maxframe/tensor/fft/hfft.py +112 -0
  866. maxframe/tensor/fft/ifft.py +114 -0
  867. maxframe/tensor/fft/ifft2.py +115 -0
  868. maxframe/tensor/fft/ifftn.py +123 -0
  869. maxframe/tensor/fft/ifftshift.py +73 -0
  870. maxframe/tensor/fft/ihfft.py +93 -0
  871. maxframe/tensor/fft/irfft.py +118 -0
  872. maxframe/tensor/fft/irfft2.py +62 -0
  873. maxframe/tensor/fft/irfftn.py +114 -0
  874. maxframe/tensor/fft/rfft.py +116 -0
  875. maxframe/tensor/fft/rfft2.py +63 -0
  876. maxframe/tensor/fft/rfftfreq.py +87 -0
  877. maxframe/tensor/fft/rfftn.py +113 -0
  878. maxframe/tensor/indexing/__init__.py +47 -0
  879. maxframe/tensor/indexing/choose.py +198 -0
  880. maxframe/tensor/indexing/compress.py +122 -0
  881. maxframe/tensor/indexing/core.py +190 -0
  882. maxframe/tensor/indexing/extract.py +69 -0
  883. maxframe/tensor/indexing/fill_diagonal.py +180 -0
  884. maxframe/tensor/indexing/flatnonzero.py +58 -0
  885. maxframe/tensor/indexing/getitem.py +144 -0
  886. maxframe/tensor/indexing/nonzero.py +118 -0
  887. maxframe/tensor/indexing/setitem.py +142 -0
  888. maxframe/tensor/indexing/slice.py +32 -0
  889. maxframe/tensor/indexing/take.py +128 -0
  890. maxframe/tensor/indexing/tests/__init__.py +13 -0
  891. maxframe/tensor/indexing/tests/test_indexing.py +232 -0
  892. maxframe/tensor/indexing/unravel_index.py +103 -0
  893. maxframe/tensor/lib/__init__.py +16 -0
  894. maxframe/tensor/lib/index_tricks.py +404 -0
  895. maxframe/tensor/linalg/__init__.py +43 -0
  896. maxframe/tensor/linalg/_einsumfunc.py +1025 -0
  897. maxframe/tensor/linalg/cholesky.py +117 -0
  898. maxframe/tensor/linalg/dot.py +145 -0
  899. maxframe/tensor/linalg/einsum.py +339 -0
  900. maxframe/tensor/linalg/inner.py +36 -0
  901. maxframe/tensor/linalg/inv.py +83 -0
  902. maxframe/tensor/linalg/lstsq.py +100 -0
  903. maxframe/tensor/linalg/lu.py +115 -0
  904. maxframe/tensor/linalg/matmul.py +225 -0
  905. maxframe/tensor/linalg/matrix_norm.py +75 -0
  906. maxframe/tensor/linalg/norm.py +249 -0
  907. maxframe/tensor/linalg/qr.py +124 -0
  908. maxframe/tensor/linalg/solve.py +72 -0
  909. maxframe/tensor/linalg/solve_triangular.py +103 -0
  910. maxframe/tensor/linalg/svd.py +167 -0
  911. maxframe/tensor/linalg/tensordot.py +213 -0
  912. maxframe/tensor/linalg/vdot.py +73 -0
  913. maxframe/tensor/linalg/vector_norm.py +113 -0
  914. maxframe/tensor/merge/__init__.py +21 -0
  915. maxframe/tensor/merge/append.py +74 -0
  916. maxframe/tensor/merge/column_stack.py +63 -0
  917. maxframe/tensor/merge/concatenate.py +103 -0
  918. maxframe/tensor/merge/dstack.py +71 -0
  919. maxframe/tensor/merge/hstack.py +70 -0
  920. maxframe/tensor/merge/stack.py +130 -0
  921. maxframe/tensor/merge/tests/__init__.py +13 -0
  922. maxframe/tensor/merge/tests/test_merge.py +79 -0
  923. maxframe/tensor/merge/vstack.py +74 -0
  924. maxframe/tensor/misc/__init__.py +72 -0
  925. maxframe/tensor/misc/argwhere.py +72 -0
  926. maxframe/tensor/misc/array_split.py +46 -0
  927. maxframe/tensor/misc/astype.py +121 -0
  928. maxframe/tensor/misc/atleast_1d.py +72 -0
  929. maxframe/tensor/misc/atleast_2d.py +70 -0
  930. maxframe/tensor/misc/atleast_3d.py +85 -0
  931. maxframe/tensor/misc/broadcast_arrays.py +57 -0
  932. maxframe/tensor/misc/broadcast_to.py +89 -0
  933. maxframe/tensor/misc/copy.py +64 -0
  934. maxframe/tensor/misc/copyto.py +130 -0
  935. maxframe/tensor/misc/delete.py +104 -0
  936. maxframe/tensor/misc/diff.py +115 -0
  937. maxframe/tensor/misc/dsplit.py +68 -0
  938. maxframe/tensor/misc/ediff1d.py +74 -0
  939. maxframe/tensor/misc/expand_dims.py +85 -0
  940. maxframe/tensor/misc/flatten.py +63 -0
  941. maxframe/tensor/misc/flip.py +90 -0
  942. maxframe/tensor/misc/fliplr.py +64 -0
  943. maxframe/tensor/misc/flipud.py +68 -0
  944. maxframe/tensor/misc/hsplit.py +85 -0
  945. maxframe/tensor/misc/in1d.py +94 -0
  946. maxframe/tensor/misc/insert.py +139 -0
  947. maxframe/tensor/misc/isin.py +130 -0
  948. maxframe/tensor/misc/moveaxis.py +83 -0
  949. maxframe/tensor/misc/ndim.py +53 -0
  950. maxframe/tensor/misc/ravel.py +90 -0
  951. maxframe/tensor/misc/repeat.py +129 -0
  952. maxframe/tensor/misc/result_type.py +88 -0
  953. maxframe/tensor/misc/roll.py +124 -0
  954. maxframe/tensor/misc/rollaxis.py +77 -0
  955. maxframe/tensor/misc/searchsorted.py +147 -0
  956. maxframe/tensor/misc/setdiff1d.py +58 -0
  957. maxframe/tensor/misc/shape.py +89 -0
  958. maxframe/tensor/misc/split.py +190 -0
  959. maxframe/tensor/misc/squeeze.py +117 -0
  960. maxframe/tensor/misc/swapaxes.py +113 -0
  961. maxframe/tensor/misc/tests/__init__.py +13 -0
  962. maxframe/tensor/misc/tests/test_misc.py +112 -0
  963. maxframe/tensor/misc/tile.py +109 -0
  964. maxframe/tensor/misc/transpose.py +133 -0
  965. maxframe/tensor/misc/trapezoid.py +123 -0
  966. maxframe/tensor/misc/unique.py +205 -0
  967. maxframe/tensor/misc/vsplit.py +74 -0
  968. maxframe/tensor/misc/where.py +129 -0
  969. maxframe/tensor/operators.py +83 -0
  970. maxframe/tensor/random/__init__.py +166 -0
  971. maxframe/tensor/random/beta.py +87 -0
  972. maxframe/tensor/random/binomial.py +135 -0
  973. maxframe/tensor/random/bytes.py +37 -0
  974. maxframe/tensor/random/chisquare.py +108 -0
  975. maxframe/tensor/random/choice.py +187 -0
  976. maxframe/tensor/random/core.py +249 -0
  977. maxframe/tensor/random/dirichlet.py +121 -0
  978. maxframe/tensor/random/exponential.py +92 -0
  979. maxframe/tensor/random/f.py +133 -0
  980. maxframe/tensor/random/gamma.py +126 -0
  981. maxframe/tensor/random/geometric.py +91 -0
  982. maxframe/tensor/random/gumbel.py +165 -0
  983. maxframe/tensor/random/hypergeometric.py +146 -0
  984. maxframe/tensor/random/laplace.py +131 -0
  985. maxframe/tensor/random/logistic.py +127 -0
  986. maxframe/tensor/random/lognormal.py +157 -0
  987. maxframe/tensor/random/logseries.py +120 -0
  988. maxframe/tensor/random/multinomial.py +131 -0
  989. maxframe/tensor/random/multivariate_normal.py +190 -0
  990. maxframe/tensor/random/negative_binomial.py +123 -0
  991. maxframe/tensor/random/noncentral_chisquare.py +130 -0
  992. maxframe/tensor/random/noncentral_f.py +124 -0
  993. maxframe/tensor/random/normal.py +141 -0
  994. maxframe/tensor/random/pareto.py +138 -0
  995. maxframe/tensor/random/permutation.py +107 -0
  996. maxframe/tensor/random/poisson.py +109 -0
  997. maxframe/tensor/random/power.py +140 -0
  998. maxframe/tensor/random/rand.py +80 -0
  999. maxframe/tensor/random/randint.py +119 -0
  1000. maxframe/tensor/random/randn.py +94 -0
  1001. maxframe/tensor/random/random_integers.py +121 -0
  1002. maxframe/tensor/random/random_sample.py +84 -0
  1003. maxframe/tensor/random/rayleigh.py +108 -0
  1004. maxframe/tensor/random/shuffle.py +61 -0
  1005. maxframe/tensor/random/standard_cauchy.py +103 -0
  1006. maxframe/tensor/random/standard_exponential.py +70 -0
  1007. maxframe/tensor/random/standard_gamma.py +118 -0
  1008. maxframe/tensor/random/standard_normal.py +72 -0
  1009. maxframe/tensor/random/standard_t.py +133 -0
  1010. maxframe/tensor/random/tests/__init__.py +13 -0
  1011. maxframe/tensor/random/tests/test_random.py +165 -0
  1012. maxframe/tensor/random/triangular.py +117 -0
  1013. maxframe/tensor/random/uniform.py +129 -0
  1014. maxframe/tensor/random/vonmises.py +129 -0
  1015. maxframe/tensor/random/wald.py +112 -0
  1016. maxframe/tensor/random/weibull.py +138 -0
  1017. maxframe/tensor/random/zipf.py +120 -0
  1018. maxframe/tensor/rechunk/__init__.py +26 -0
  1019. maxframe/tensor/rechunk/rechunk.py +43 -0
  1020. maxframe/tensor/reduction/__init__.py +64 -0
  1021. maxframe/tensor/reduction/all.py +101 -0
  1022. maxframe/tensor/reduction/allclose.py +86 -0
  1023. maxframe/tensor/reduction/any.py +103 -0
  1024. maxframe/tensor/reduction/argmax.py +101 -0
  1025. maxframe/tensor/reduction/argmin.py +101 -0
  1026. maxframe/tensor/reduction/array_equal.py +63 -0
  1027. maxframe/tensor/reduction/core.py +166 -0
  1028. maxframe/tensor/reduction/count_nonzero.py +80 -0
  1029. maxframe/tensor/reduction/cumprod.py +95 -0
  1030. maxframe/tensor/reduction/cumsum.py +99 -0
  1031. maxframe/tensor/reduction/max.py +118 -0
  1032. maxframe/tensor/reduction/mean.py +122 -0
  1033. maxframe/tensor/reduction/min.py +118 -0
  1034. maxframe/tensor/reduction/nanargmax.py +80 -0
  1035. maxframe/tensor/reduction/nanargmin.py +74 -0
  1036. maxframe/tensor/reduction/nancumprod.py +89 -0
  1037. maxframe/tensor/reduction/nancumsum.py +92 -0
  1038. maxframe/tensor/reduction/nanmax.py +109 -0
  1039. maxframe/tensor/reduction/nanmean.py +105 -0
  1040. maxframe/tensor/reduction/nanmin.py +109 -0
  1041. maxframe/tensor/reduction/nanprod.py +92 -0
  1042. maxframe/tensor/reduction/nanstd.py +124 -0
  1043. maxframe/tensor/reduction/nansum.py +113 -0
  1044. maxframe/tensor/reduction/nanvar.py +149 -0
  1045. maxframe/tensor/reduction/prod.py +128 -0
  1046. maxframe/tensor/reduction/std.py +132 -0
  1047. maxframe/tensor/reduction/sum.py +123 -0
  1048. maxframe/tensor/reduction/tests/__init__.py +13 -0
  1049. maxframe/tensor/reduction/tests/test_reduction.py +189 -0
  1050. maxframe/tensor/reduction/var.py +176 -0
  1051. maxframe/tensor/reshape/__init__.py +15 -0
  1052. maxframe/tensor/reshape/reshape.py +192 -0
  1053. maxframe/tensor/reshape/tests/__init__.py +13 -0
  1054. maxframe/tensor/reshape/tests/test_reshape.py +35 -0
  1055. maxframe/tensor/sort/__init__.py +18 -0
  1056. maxframe/tensor/sort/argpartition.py +98 -0
  1057. maxframe/tensor/sort/argsort.py +150 -0
  1058. maxframe/tensor/sort/partition.py +228 -0
  1059. maxframe/tensor/sort/sort.py +295 -0
  1060. maxframe/tensor/spatial/__init__.py +15 -0
  1061. maxframe/tensor/spatial/distance/__init__.py +17 -0
  1062. maxframe/tensor/spatial/distance/cdist.py +421 -0
  1063. maxframe/tensor/spatial/distance/pdist.py +398 -0
  1064. maxframe/tensor/spatial/distance/squareform.py +153 -0
  1065. maxframe/tensor/special/__init__.py +175 -0
  1066. maxframe/tensor/special/airy.py +55 -0
  1067. maxframe/tensor/special/bessel.py +199 -0
  1068. maxframe/tensor/special/core.py +99 -0
  1069. maxframe/tensor/special/ellip_func_integrals.py +155 -0
  1070. maxframe/tensor/special/ellip_harm.py +55 -0
  1071. maxframe/tensor/special/err_fresnel.py +223 -0
  1072. maxframe/tensor/special/gamma_funcs.py +303 -0
  1073. maxframe/tensor/special/hypergeometric_funcs.py +69 -0
  1074. maxframe/tensor/special/info_theory.py +189 -0
  1075. maxframe/tensor/special/misc.py +163 -0
  1076. maxframe/tensor/special/statistical.py +56 -0
  1077. maxframe/tensor/statistics/__init__.py +24 -0
  1078. maxframe/tensor/statistics/average.py +143 -0
  1079. maxframe/tensor/statistics/bincount.py +133 -0
  1080. maxframe/tensor/statistics/corrcoef.py +77 -0
  1081. maxframe/tensor/statistics/cov.py +222 -0
  1082. maxframe/tensor/statistics/digitize.py +126 -0
  1083. maxframe/tensor/statistics/histogram.py +520 -0
  1084. maxframe/tensor/statistics/median.py +85 -0
  1085. maxframe/tensor/statistics/percentile.py +175 -0
  1086. maxframe/tensor/statistics/ptp.py +89 -0
  1087. maxframe/tensor/statistics/quantile.py +290 -0
  1088. maxframe/tensor/ufunc/__init__.py +24 -0
  1089. maxframe/tensor/ufunc/ufunc.py +198 -0
  1090. maxframe/tensor/utils.py +716 -0
  1091. maxframe/tests/__init__.py +13 -0
  1092. maxframe/tests/test_protocol.py +178 -0
  1093. maxframe/tests/test_udf.py +61 -0
  1094. maxframe/tests/test_utils.py +618 -0
  1095. maxframe/tests/utils.py +245 -0
  1096. maxframe/typing_.py +42 -0
  1097. maxframe/udf.py +356 -0
  1098. maxframe/utils.py +1774 -0
  1099. maxframe-2.3.0.dist-info/METADATA +109 -0
  1100. maxframe-2.3.0.dist-info/RECORD +1117 -0
  1101. maxframe-2.3.0.dist-info/WHEEL +6 -0
  1102. maxframe-2.3.0.dist-info/top_level.txt +3 -0
  1103. maxframe_client/__init__.py +16 -0
  1104. maxframe_client/clients/__init__.py +13 -0
  1105. maxframe_client/clients/framedriver.py +137 -0
  1106. maxframe_client/conftest.py +15 -0
  1107. maxframe_client/fetcher.py +411 -0
  1108. maxframe_client/session/__init__.py +22 -0
  1109. maxframe_client/session/consts.py +39 -0
  1110. maxframe_client/session/graph.py +125 -0
  1111. maxframe_client/session/odps.py +802 -0
  1112. maxframe_client/session/task.py +329 -0
  1113. maxframe_client/session/tests/__init__.py +13 -0
  1114. maxframe_client/session/tests/test_task.py +115 -0
  1115. maxframe_client/tests/__init__.py +13 -0
  1116. maxframe_client/tests/test_fetcher.py +180 -0
  1117. maxframe_client/tests/test_session.py +409 -0
@@ -0,0 +1,2385 @@
1
+ # Copyright 1999-2025 Alibaba Group Holding Ltd.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ import functools
16
+ import operator
17
+ import weakref
18
+ from collections.abc import Iterable
19
+ from io import StringIO
20
+ from typing import Any, Dict, List, Tuple, Union
21
+
22
+ import numpy as np
23
+ import pandas as pd
24
+
25
+ from ..core import (
26
+ ENTITY_TYPE,
27
+ HasShapeTileable,
28
+ HasShapeTileableData,
29
+ OutputType,
30
+ Tileable,
31
+ _ExecuteAndFetchMixin,
32
+ is_build_mode,
33
+ register_output_types,
34
+ )
35
+ from ..core.entity.utils import fill_chunk_slices, refresh_tileable_shape
36
+ from ..protocol import DataFrameTableMeta
37
+ from ..serialization.serializables import (
38
+ AnyField,
39
+ BoolField,
40
+ DataTypeField,
41
+ DictField,
42
+ Int32Field,
43
+ IntervalArrayField,
44
+ ListField,
45
+ NDArrayField,
46
+ OneOfField,
47
+ ReferenceField,
48
+ Serializable,
49
+ SeriesField,
50
+ SliceField,
51
+ StringField,
52
+ )
53
+ from ..session import get_default_session
54
+ from ..utils import (
55
+ calc_nsplits,
56
+ ceildiv,
57
+ estimate_pandas_size,
58
+ on_serialize_numpy_type,
59
+ pd_release_version,
60
+ prevent_called_from_pandas,
61
+ tokenize,
62
+ )
63
+ from .typing_ import DataFrameType, IndexType, SeriesType
64
+ from .utils import (
65
+ ReprSeries,
66
+ apply_if_callable,
67
+ fetch_corner_data,
68
+ merge_index_value,
69
+ parse_index,
70
+ )
71
+
72
+ _df_with_iteritems = pd_release_version[:2] < (2, 0)
73
+
74
+
75
+ class IndexValue(Serializable):
76
+ """
77
+ Meta class for index, held by IndexData, SeriesData and DataFrameData
78
+ """
79
+
80
+ __slots__ = ()
81
+
82
+ class IndexBase(Serializable):
83
+ _key = StringField("key") # to identify if the index is the same
84
+ _is_monotonic_increasing = BoolField("is_monotonic_increasing")
85
+ _is_monotonic_decreasing = BoolField("is_monotonic_decreasing")
86
+ _is_unique = BoolField("is_unique")
87
+ _max_val = AnyField("max_val", on_serialize=on_serialize_numpy_type)
88
+ _max_val_close = BoolField("max_val_close")
89
+ _min_val = AnyField("min_val", on_serialize=on_serialize_numpy_type)
90
+ _min_val_close = BoolField("min_val_close")
91
+
92
+ @property
93
+ def is_monotonic_increasing(self):
94
+ return self._is_monotonic_increasing
95
+
96
+ @property
97
+ def is_monotonic_decreasing(self):
98
+ return self._is_monotonic_decreasing
99
+
100
+ @property
101
+ def is_unique(self):
102
+ return self._is_unique
103
+
104
+ @property
105
+ def min_val(self):
106
+ return self._min_val
107
+
108
+ @property
109
+ def min_val_close(self):
110
+ return self._min_val_close
111
+
112
+ @property
113
+ def max_val(self):
114
+ return self._max_val
115
+
116
+ @property
117
+ def max_val_close(self):
118
+ return self._max_val_close
119
+
120
+ @property
121
+ def key(self):
122
+ return self._key
123
+
124
+ @property
125
+ def inferred_type(self):
126
+ return None
127
+
128
+ def to_pandas(self):
129
+ kw = {
130
+ field.tag: getattr(self, attr, None)
131
+ for attr, field in self._FIELDS.items()
132
+ if attr not in super(type(self), self)._FIELDS
133
+ }
134
+ kw = {k: v for k, v in kw.items() if v is not None}
135
+ if kw.get("data") is None:
136
+ kw["data"] = []
137
+
138
+ pd_initializer = getattr(self, "_pd_initializer", None)
139
+ if pd_initializer is None:
140
+ pd_initializer = getattr(pd, type(self).__name__)
141
+ return pd_initializer(**kw)
142
+
143
+ class Index(IndexBase):
144
+ _name = AnyField("name")
145
+ _data = NDArrayField("data")
146
+ _dtype = DataTypeField("dtype")
147
+
148
+ @property
149
+ def dtype(self):
150
+ return getattr(self, "_dtype", None)
151
+
152
+ @property
153
+ def inferred_type(self):
154
+ return "floating" if self.dtype.kind == "f" else "integer"
155
+
156
+ class RangeIndex(IndexBase):
157
+ _name = AnyField("name")
158
+ _slice = SliceField("slice")
159
+ _dtype = DataTypeField("dtype")
160
+
161
+ @property
162
+ def slice(self):
163
+ return self._slice
164
+
165
+ @property
166
+ def dtype(self):
167
+ return getattr(self, "_dtype", np.dtype(np.intc))
168
+
169
+ def to_pandas(self):
170
+ slc = self._slice
171
+ return pd.RangeIndex(
172
+ slc.start, slc.stop, slc.step, name=getattr(self, "_name", None)
173
+ )
174
+
175
+ class CategoricalIndex(IndexBase):
176
+ _name = AnyField("name")
177
+ _data = NDArrayField("data")
178
+ _categories = AnyField("categories")
179
+ _ordered = BoolField("ordered")
180
+
181
+ @property
182
+ def inferred_type(self):
183
+ return "categorical"
184
+
185
+ class IntervalIndex(IndexBase):
186
+ _name = AnyField("name")
187
+ _data = IntervalArrayField("data")
188
+ _closed = StringField("closed")
189
+
190
+ @property
191
+ def inferred_type(self):
192
+ return "interval"
193
+
194
+ class DatetimeIndex(IndexBase):
195
+ _name = AnyField("name")
196
+ _data = NDArrayField("data")
197
+ _freq = AnyField("freq")
198
+ _start = AnyField("start")
199
+ _periods = AnyField("periods")
200
+ _end = AnyField("end")
201
+ _closed = AnyField("closed")
202
+ _tz = AnyField("tz")
203
+ _ambiguous = AnyField("ambiguous")
204
+ _dayfirst = BoolField("dayfirst")
205
+ _yearfirst = BoolField("yearfirst")
206
+
207
+ @property
208
+ def inferred_type(self):
209
+ return "datetime64"
210
+
211
+ @property
212
+ def freq(self):
213
+ return getattr(self, "_freq", None)
214
+
215
+ class TimedeltaIndex(IndexBase):
216
+ _name = AnyField("name")
217
+ _data = NDArrayField("data")
218
+ _unit = AnyField("unit")
219
+ _freq = AnyField("freq")
220
+ _start = AnyField("start")
221
+ _periods = AnyField("periods")
222
+ _end = AnyField("end")
223
+ _closed = AnyField("closed")
224
+
225
+ @property
226
+ def inferred_type(self):
227
+ return "timedelta64"
228
+
229
+ class PeriodIndex(IndexBase):
230
+ _name = AnyField("name")
231
+ _data = NDArrayField("data")
232
+ _freq = AnyField("freq")
233
+ _start = AnyField("start")
234
+ _periods = AnyField("periods")
235
+ _end = AnyField("end")
236
+ _year = AnyField("year")
237
+ _month = AnyField("month")
238
+ _quarter = AnyField("quarter")
239
+ _day = AnyField("day")
240
+ _hour = AnyField("hour")
241
+ _minute = AnyField("minute")
242
+ _second = AnyField("second")
243
+ _tz = AnyField("tz")
244
+ _dtype = DataTypeField("dtype")
245
+
246
+ @property
247
+ def inferred_type(self):
248
+ return "period"
249
+
250
+ class Int64Index(IndexBase):
251
+ _pd_initializer = pd.Index
252
+
253
+ _name = AnyField("name")
254
+ _data = NDArrayField("data")
255
+ _dtype = DataTypeField("dtype")
256
+
257
+ @property
258
+ def dtype(self):
259
+ return getattr(self, "_dtype", None)
260
+
261
+ @property
262
+ def inferred_type(self):
263
+ return "integer"
264
+
265
+ class UInt64Index(IndexBase):
266
+ _pd_initializer = pd.Index
267
+
268
+ _name = AnyField("name")
269
+ _data = NDArrayField("data")
270
+ _dtype = DataTypeField("dtype")
271
+
272
+ @property
273
+ def dtype(self):
274
+ return getattr(self, "_dtype", None)
275
+
276
+ @property
277
+ def inferred_type(self):
278
+ return "integer"
279
+
280
+ class Float64Index(IndexBase):
281
+ _pd_initializer = pd.Index
282
+
283
+ _name = AnyField("name")
284
+ _data = NDArrayField("data")
285
+ _dtype = DataTypeField("dtype")
286
+
287
+ @property
288
+ def dtype(self):
289
+ return getattr(self, "_dtype", None)
290
+
291
+ @property
292
+ def inferred_type(self):
293
+ return "floating"
294
+
295
+ class MultiIndex(IndexBase):
296
+ _names = ListField("names", on_serialize=list)
297
+ _dtypes = ListField("dtypes", on_serialize=list)
298
+ _data = NDArrayField("data")
299
+ _sortorder = Int32Field("sortorder")
300
+
301
+ @property
302
+ def inferred_type(self):
303
+ return "mixed"
304
+
305
+ @property
306
+ def names(self) -> list:
307
+ return self._names
308
+
309
+ @property
310
+ def dtypes(self) -> pd.Series:
311
+ return pd.Series(self._dtypes, index=self._names)
312
+
313
+ def to_pandas(self):
314
+ data = getattr(self, "_data", None)
315
+ sortorder = getattr(self, "_sortorder", None)
316
+
317
+ def _build_empty_array(dtype):
318
+ try:
319
+ return np.array([], dtype=dtype)
320
+ except TypeError: # pragma: no cover
321
+ return pd.array([], dtype=dtype)
322
+
323
+ if data is None:
324
+ return pd.MultiIndex.from_arrays(
325
+ [_build_empty_array(dtype) for dtype in self._dtypes],
326
+ sortorder=sortorder,
327
+ names=self._names,
328
+ )
329
+ return pd.MultiIndex.from_tuples(
330
+ [tuple(d) for d in data], sortorder=sortorder, names=self._names
331
+ )
332
+
333
+ _index_value = OneOfField(
334
+ "index_value",
335
+ index=Index,
336
+ range_index=RangeIndex,
337
+ categorical_index=CategoricalIndex,
338
+ interval_index=IntervalIndex,
339
+ datetime_index=DatetimeIndex,
340
+ timedelta_index=TimedeltaIndex,
341
+ period_index=PeriodIndex,
342
+ int64_index=Int64Index,
343
+ uint64_index=UInt64Index,
344
+ float64_index=Float64Index,
345
+ multi_index=MultiIndex,
346
+ )
347
+
348
+ def __maxframe_tokenize__(self):
349
+ # return object for tokenize
350
+ v = self._index_value
351
+ return v._key
352
+
353
+ @property
354
+ def value(self):
355
+ return self._index_value
356
+
357
+ @property
358
+ def key(self):
359
+ return self._index_value.key
360
+
361
+ @property
362
+ def is_monotonic_increasing(self):
363
+ return self._index_value.is_monotonic_increasing
364
+
365
+ @property
366
+ def is_monotonic_decreasing(self):
367
+ return self._index_value.is_monotonic_decreasing
368
+
369
+ @property
370
+ def is_monotonic_increasing_or_decreasing(self):
371
+ return self.is_monotonic_increasing or self.is_monotonic_decreasing
372
+
373
+ @property
374
+ def is_unique(self):
375
+ return self._index_value.is_unique
376
+
377
+ @property
378
+ def min_val(self):
379
+ return self._index_value.min_val
380
+
381
+ @property
382
+ def min_val_close(self):
383
+ return self._index_value.min_val_close
384
+
385
+ @property
386
+ def max_val(self):
387
+ return self._index_value.max_val
388
+
389
+ @property
390
+ def max_val_close(self):
391
+ return self._index_value.max_val_close
392
+
393
+ @property
394
+ def min_max(self):
395
+ return (
396
+ self._index_value.min_val,
397
+ self._index_value.min_val_close,
398
+ self._index_value.max_val,
399
+ self._index_value.max_val_close,
400
+ )
401
+
402
+ @property
403
+ def name(self):
404
+ return getattr(self._index_value, "_name", None)
405
+
406
+ @property
407
+ def names(self):
408
+ return getattr(self._index_value, "_names", [self.name])
409
+
410
+ @property
411
+ def inferred_type(self):
412
+ return self._index_value.inferred_type
413
+
414
+ def has_value(self):
415
+ if isinstance(self._index_value, self.RangeIndex):
416
+ if np.isnan(self._index_value.max_val):
417
+ return False
418
+ else:
419
+ return True
420
+ elif getattr(self._index_value, "_data", None) is not None:
421
+ return True
422
+ return False
423
+
424
+ def to_pandas(self):
425
+ return self._index_value.to_pandas()
426
+
427
+
428
+ class DtypesValue(Serializable):
429
+ """
430
+ Meta class for dtypes.
431
+ """
432
+
433
+ __slots__ = ()
434
+
435
+ _key = StringField("key")
436
+ _value = SeriesField("value")
437
+
438
+ def __init__(self, key=None, value=None, **kw):
439
+ super().__init__(_key=key, _value=value, **kw)
440
+ if self._key is None:
441
+ self._key = tokenize(self._value)
442
+
443
+ @property
444
+ def key(self):
445
+ return self._key
446
+
447
+ @property
448
+ def value(self):
449
+ return self._value
450
+
451
+
452
+ def refresh_index_value(tileable: ENTITY_TYPE):
453
+ index_to_index_values = dict()
454
+ for chunk in tileable.chunks:
455
+ if chunk.ndim == 1 or chunk.index[1] == 0:
456
+ index_to_index_values[chunk.index] = chunk.index_value
457
+ index_value = merge_index_value(index_to_index_values, store_data=False)
458
+ # keep key as original index_value's
459
+ index_value._index_value._key = tileable.index_value.key
460
+ tileable._index_value = index_value
461
+
462
+
463
+ def refresh_dtypes(tileable: ENTITY_TYPE):
464
+ all_dtypes = [c.dtypes_value.value for c in tileable.chunks if c.index[0] == 0]
465
+ dtypes = pd.concat(all_dtypes)
466
+ tileable._dtypes = dtypes
467
+ columns_values = parse_index(dtypes.index, store_data=True)
468
+ tileable._columns_value = columns_values
469
+ tileable._dtypes_value = DtypesValue(key=tokenize(dtypes), value=dtypes)
470
+
471
+
472
+ _tileable_key_property = "_tileable_key"
473
+ _tileable_dtypes_property = "_tileable_dtypes"
474
+ _tileable_index_value_property = "_tileable_index_value"
475
+ _tileable_columns_value_property = "_tileable_columns_value"
476
+ _nsplits_property = "_tileable_nsplits"
477
+ _lazy_chunk_meta_properties = (
478
+ _tileable_key_property,
479
+ _tileable_dtypes_property,
480
+ _tileable_index_value_property,
481
+ _tileable_columns_value_property,
482
+ _nsplits_property,
483
+ )
484
+
485
+
486
+ def _calc_cum_nsplit(nsplit: Tuple[int]) -> List[int]:
487
+ return [0] + np.cumsum(nsplit).tolist()
488
+
489
+
490
+ def calc_cum_nsplits(nsplits: Tuple[Tuple[int]]) -> List[List[int]]:
491
+ return tuple(_calc_cum_nsplit(nsplit) for nsplit in nsplits)
492
+
493
+
494
+ @functools.lru_cache(maxsize=128)
495
+ def _get_cum_nsplit(nsplit: Tuple[int]) -> List[int]:
496
+ return _calc_cum_nsplit(nsplit)
497
+
498
+
499
+ def _calc_axis_slice(nsplit: Tuple[int], index: int) -> slice:
500
+ if not isinstance(nsplit, tuple):
501
+ nsplit = tuple(nsplit)
502
+ cum_nsplit = _get_cum_nsplit(nsplit)
503
+ return slice(cum_nsplit[index], cum_nsplit[index + 1])
504
+
505
+
506
+ def _on_deserialize_index_value(index_value):
507
+ if index_value is None:
508
+ return
509
+ try:
510
+ getattr(index_value, "value")
511
+ return index_value
512
+ except AttributeError:
513
+ return
514
+
515
+
516
+ class _ToPandasMixin(_ExecuteAndFetchMixin):
517
+ __slots__ = ()
518
+
519
+ def to_pandas(self, session=None, **kw):
520
+ return self._execute_and_fetch(session=session, **kw)
521
+
522
+
523
+ class _BatchedFetcher:
524
+ __slots__ = ()
525
+
526
+ def _iter(self, batch_size=None, session=None, **kw):
527
+ from .indexing.iloc import iloc
528
+
529
+ if batch_size is not None:
530
+ size = self.shape[0]
531
+ n_batch = ceildiv(size, batch_size)
532
+
533
+ if n_batch > 1:
534
+ for i in range(n_batch):
535
+ batch_data = iloc(self)[batch_size * i : batch_size * (i + 1)]
536
+ yield batch_data._fetch(session=session, **kw)
537
+ else:
538
+ yield self._fetch(session=session, **kw)
539
+ else:
540
+ # if batch_size is not specified, use first batch to estimate
541
+ # batch_size.
542
+ default_batch_bytes = 50 * 1024**2
543
+ first_batch = 1000
544
+ size = self.shape[0]
545
+
546
+ if size >= first_batch:
547
+ batch_data = iloc(self)[:first_batch]
548
+ first_batch_data = batch_data._fetch(session=session, **kw)
549
+ yield first_batch_data
550
+ data_size = estimate_pandas_size(first_batch_data)
551
+ batch_size = int(default_batch_bytes / data_size * first_batch)
552
+ n_batch = ceildiv(size - 1000, batch_size)
553
+ for i in range(n_batch):
554
+ batch_data = iloc(self)[
555
+ first_batch
556
+ + batch_size * i : first_batch
557
+ + batch_size * (i + 1)
558
+ ]
559
+ yield batch_data._fetch(session=session, **kw)
560
+ else:
561
+ yield self._fetch(session=session, **kw)
562
+
563
+ def iterbatch(self, batch_size=None, session=None, **kw):
564
+ # stop triggering execution under build mode
565
+ if is_build_mode():
566
+ raise ValueError("Cannot fetch data under build mode")
567
+
568
+ # trigger execution
569
+ self.execute(session=session, **kw)
570
+ return self._iter(batch_size=batch_size, session=session)
571
+
572
+ def fetch(self, session=None, **kw):
573
+ from .indexing.iloc import DataFrameIlocGetItem, SeriesIlocGetItem
574
+
575
+ batch_size = kw.pop("batch_size", None)
576
+ if isinstance(self.op, (DataFrameIlocGetItem, SeriesIlocGetItem)):
577
+ # see GH#1871
578
+ # already iloc, do not trigger batch fetch
579
+ return self._fetch(session=session, **kw)
580
+ else:
581
+ batches = list(self._iter(batch_size=batch_size, session=session, **kw))
582
+ return pd.concat(batches) if len(batches) > 1 else batches[0]
583
+
584
+ def fetch_infos(self, fields=None, session=None, **kw):
585
+ return self._fetch_infos(fields=fields, session=session, **kw)
586
+
587
+
588
+ class IndexData(HasShapeTileableData, _ToPandasMixin):
589
+ __slots__ = ()
590
+ type_name = "Index"
591
+
592
+ # optional field
593
+ _dtype = DataTypeField("dtype")
594
+ _name = AnyField("name")
595
+ _names = AnyField("names")
596
+ _index_value = ReferenceField(
597
+ "index_value", IndexValue, on_deserialize=_on_deserialize_index_value
598
+ )
599
+
600
+ def __init__(
601
+ self,
602
+ op=None,
603
+ shape=None,
604
+ nsplits=None,
605
+ dtype=None,
606
+ name=None,
607
+ names=None,
608
+ index_value=None,
609
+ **kw,
610
+ ):
611
+ super().__init__(
612
+ _op=op,
613
+ _shape=shape,
614
+ _nsplits=nsplits,
615
+ _dtype=dtype,
616
+ _name=name,
617
+ _names=names,
618
+ _index_value=index_value,
619
+ **kw,
620
+ )
621
+
622
+ @property
623
+ def params(self) -> Dict[str, Any]:
624
+ # params return the properties which useful to rebuild a new tileable object
625
+ return {
626
+ "shape": self.shape,
627
+ "dtype": self.dtype,
628
+ "name": self.name,
629
+ "index_value": self.index_value,
630
+ }
631
+
632
+ @params.setter
633
+ def params(self, new_params: Dict[str, Any]):
634
+ params = new_params.copy()
635
+ new_shape = params.pop("shape", None)
636
+ if new_shape is not None:
637
+ self._shape = new_shape
638
+ dtype = params.pop("dtype", None)
639
+ if dtype is not None:
640
+ self._dtype = dtype
641
+ index_value = params.pop("index_value", None)
642
+ if index_value is not None:
643
+ self._index_value = index_value
644
+ name = params.pop("name", None)
645
+ if name is not None:
646
+ self._name = name
647
+ if params: # pragma: no cover
648
+ raise TypeError(f"Unknown params: {list(params)}")
649
+
650
+ def refresh_params(self):
651
+ # refresh params when chunks updated
652
+ refresh_tileable_shape(self)
653
+ fill_chunk_slices(self)
654
+ # refresh_index_value(self)
655
+ # if self._dtype is None:
656
+ # self._dtype = self.chunks[0].dtype
657
+ # if self._name is None:
658
+ # self._name = self.chunks[0].name
659
+
660
+ def refresh_from_table_meta(self, table_meta: DataFrameTableMeta) -> None:
661
+ pass
662
+
663
+ def _to_str(self, representation=False):
664
+ if is_build_mode() or len(self._executed_sessions) == 0:
665
+ # in build mode, or not executed, just return representation
666
+ if representation:
667
+ return f"Index <op={type(self._op).__name__}, key={self.key}"
668
+ else:
669
+ return f"Index(op={type(self._op).__name__})"
670
+ else:
671
+ data = self.fetch(session=self._executed_sessions[-1])
672
+ return repr(data) if repr(data) else str(data)
673
+
674
+ def __str__(self):
675
+ return self._to_str(representation=False)
676
+
677
+ def __repr__(self):
678
+ return self._to_str(representation=True)
679
+
680
+ def _to_maxframe_tensor(self, dtype=None, order="K", extract_multi_index=False):
681
+ tensor = self.to_tensor(extract_multi_index=extract_multi_index)
682
+ dtype = dtype if dtype is not None else tensor.dtype
683
+ return tensor.astype(dtype=dtype, order=order, copy=False)
684
+
685
+ def __maxframe_tensor__(self, dtype=None, order="K"):
686
+ return self._to_maxframe_tensor(dtype=dtype, order=order)
687
+
688
+ @property
689
+ def dtype(self):
690
+ return getattr(self, "_dtype", None) or self.op.dtype
691
+
692
+ @property
693
+ def name(self):
694
+ return self._name
695
+
696
+ @property
697
+ def names(self):
698
+ return getattr(self, "_names", None) or [self.name]
699
+
700
+ @property
701
+ def nlevels(self) -> int:
702
+ return len(self.names)
703
+
704
+ @property
705
+ def index_value(self) -> IndexValue:
706
+ return self._index_value
707
+
708
+ @property
709
+ def inferred_type(self):
710
+ return self._index_value.inferred_type
711
+
712
+ def to_tensor(self, dtype=None, extract_multi_index=False):
713
+ from ..tensor.datasource.from_dataframe import from_index
714
+
715
+ return from_index(self, dtype=dtype, extract_multi_index=extract_multi_index)
716
+
717
+ def to_frame(self, index: bool = True, name=None):
718
+ from . import dataframe_from_tensor
719
+
720
+ if isinstance(self.index_value.value, IndexValue.MultiIndex):
721
+ old_names = self.index_value.value.names
722
+
723
+ if (
724
+ name is not None
725
+ and not isinstance(name, Iterable)
726
+ or isinstance(name, str)
727
+ ):
728
+ raise TypeError("'name' must be a list / sequence of column names.")
729
+
730
+ name = list(name if name is not None else old_names)
731
+ if len(name) != len(old_names):
732
+ raise ValueError(
733
+ "'name' should have same length as number of levels on index."
734
+ )
735
+
736
+ columns = [
737
+ old or new or idx for idx, (old, new) in enumerate(zip(old_names, name))
738
+ ]
739
+ else:
740
+ columns = [name or self.name or 0]
741
+ index_ = self if index else None
742
+ return dataframe_from_tensor(
743
+ self._to_maxframe_tensor(extract_multi_index=True),
744
+ index=index_,
745
+ columns=columns,
746
+ )
747
+
748
+ def to_series(self, index=None, name=None):
749
+ from . import series_from_index
750
+
751
+ return series_from_index(self, index=index, name=name)
752
+
753
+ @property
754
+ def hasnans(self):
755
+ return self.isna().any()
756
+
757
+
758
+ class Index(HasShapeTileable, _ToPandasMixin):
759
+ __slots__ = "_df_or_series", "_parent_key", "_axis"
760
+ _allow_data_type_ = (IndexData,)
761
+ type_name = "Index"
762
+
763
+ def __new__(cls, data: Union[pd.Index, IndexData] = None, **_):
764
+ if data is not None and not isinstance(data, pd.Index):
765
+ # create corresponding Index class
766
+ # according to type of index_value
767
+ clz = globals()[type(data.index_value.value).__name__]
768
+ else:
769
+ clz = cls
770
+ return object.__new__(clz)
771
+
772
+ def __len__(self):
773
+ return len(self._data)
774
+
775
+ def __class_getitem__(cls, item):
776
+ return IndexType.from_getitem_args(item)
777
+
778
+ def __maxframe_tensor__(self, dtype=None, order="K"):
779
+ return self._data.__maxframe_tensor__(dtype=dtype, order=order)
780
+
781
+ def _get_df_or_series(self):
782
+ obj = getattr(self, "_df_or_series", None)
783
+ if obj is not None:
784
+ return obj()
785
+ return None
786
+
787
+ def _set_df_or_series(self, df_or_series, axis):
788
+ self._df_or_series = weakref.ref(df_or_series)
789
+ self._parent_key = df_or_series.key
790
+ self._axis = axis
791
+
792
+ @property
793
+ def T(self):
794
+ """Return the transpose, which is by definition self."""
795
+ return self
796
+
797
+ @property
798
+ def name(self):
799
+ return self._data.name
800
+
801
+ @name.setter
802
+ def name(self, value):
803
+ df_or_series = self._get_df_or_series()
804
+ if df_or_series is not None and df_or_series.key == self._parent_key:
805
+ df_or_series.rename_axis(value, axis=self._axis, inplace=True)
806
+ self.data = df_or_series.axes[self._axis].data
807
+ else:
808
+ self.rename(value, inplace=True)
809
+
810
+ @property
811
+ def names(self):
812
+ return self._data.names
813
+
814
+ @names.setter
815
+ def names(self, value):
816
+ df_or_series = self._get_df_or_series()
817
+ if df_or_series is not None:
818
+ df_or_series.rename_axis(value, axis=self._axis, inplace=True)
819
+ self.data = df_or_series.axes[self._axis].data
820
+ else:
821
+ self.rename(value, inplace=True)
822
+
823
+ @property
824
+ def values(self):
825
+ return self.to_tensor()
826
+
827
+ def to_frame(self, index: bool = True, name=None):
828
+ """
829
+ Create a DataFrame with a column containing the Index.
830
+
831
+ Parameters
832
+ ----------
833
+ index : bool, default True
834
+ Set the index of the returned DataFrame as the original Index.
835
+
836
+ name : object, default None
837
+ The passed name should substitute for the index name (if it has
838
+ one).
839
+
840
+ Returns
841
+ -------
842
+ DataFrame
843
+ DataFrame containing the original Index data.
844
+
845
+ See Also
846
+ --------
847
+ Index.to_series : Convert an Index to a Series.
848
+ Series.to_frame : Convert Series to DataFrame.
849
+
850
+ Examples
851
+ --------
852
+ >>> import maxframe.dataframe as md
853
+ >>> idx = md.Index(['Ant', 'Bear', 'Cow'], name='animal')
854
+ >>> idx.to_frame().execute()
855
+ animal
856
+ animal
857
+ Ant Ant
858
+ Bear Bear
859
+ Cow Cow
860
+
861
+ By default, the original Index is reused. To enforce a new Index:
862
+
863
+ >>> idx.to_frame(index=False).execute()
864
+ animal
865
+ 0 Ant
866
+ 1 Bear
867
+ 2 Cow
868
+
869
+ To override the name of the resulting column, specify `name`:
870
+
871
+ >>> idx.to_frame(index=False, name='zoo').execute()
872
+ zoo
873
+ 0 Ant
874
+ 1 Bear
875
+ 2 Cow
876
+ """
877
+ return self._data.to_frame(index=index, name=name)
878
+
879
+ def to_series(self, index=None, name=None):
880
+ """
881
+ Create a Series with both index and values equal to the index keys.
882
+
883
+ Useful with map for returning an indexer based on an index.
884
+
885
+ Parameters
886
+ ----------
887
+ index : Index, optional
888
+ Index of resulting Series. If None, defaults to original index.
889
+ name : str, optional
890
+ Dame of resulting Series. If None, defaults to name of original
891
+ index.
892
+
893
+ Returns
894
+ -------
895
+ Series
896
+ The dtype will be based on the type of the Index values.
897
+ """
898
+ return self._data.to_series(index=index, name=name)
899
+
900
+ @property
901
+ def hasnans(self):
902
+ """
903
+ Return True if there are any NaNs.
904
+
905
+ Returns
906
+ -------
907
+ bool
908
+
909
+ Examples
910
+ --------
911
+ >>> import maxframe.dataframe as md
912
+ >>> idx = md.Index([1, 2, 3, None])
913
+ >>> idx.execute()
914
+ Index([1.0, 2.0, 3.0, nan], dtype='float64')
915
+ >>> idx.hasnans.execute()
916
+ True
917
+ """
918
+ return self._data.hasnans
919
+
920
+
921
+ class RangeIndex(Index):
922
+ __slots__ = ()
923
+
924
+
925
+ class CategoricalIndex(Index):
926
+ __slots__ = ()
927
+
928
+
929
+ class IntervalIndex(Index):
930
+ __slots__ = ()
931
+
932
+
933
+ class DatetimeIndex(Index):
934
+ __slots__ = ()
935
+
936
+
937
+ class TimedeltaIndex(Index):
938
+ __slots__ = ()
939
+
940
+
941
+ class PeriodIndex(Index):
942
+ __slots__ = ()
943
+
944
+
945
+ class Int64Index(Index):
946
+ __slots__ = ()
947
+
948
+
949
+ class UInt64Index(Index):
950
+ __slots__ = ()
951
+
952
+
953
+ class Float64Index(Index):
954
+ __slots__ = ()
955
+
956
+
957
+ class MultiIndex(Index):
958
+ __slots__ = ()
959
+
960
+
961
+ class BaseSeriesData(HasShapeTileableData, _ToPandasMixin):
962
+ __slots__ = "_cache", "_accessors"
963
+
964
+ # optional field
965
+ _dtype = DataTypeField("dtype")
966
+ _name = AnyField("name")
967
+ _index_value = ReferenceField(
968
+ "index_value", IndexValue, on_deserialize=_on_deserialize_index_value
969
+ )
970
+
971
+ def __init__(
972
+ self,
973
+ op=None,
974
+ shape=None,
975
+ nsplits=None,
976
+ dtype=None,
977
+ name=None,
978
+ index_value=None,
979
+ **kw,
980
+ ):
981
+ super().__init__(
982
+ _op=op,
983
+ _shape=shape,
984
+ _nsplits=nsplits,
985
+ _dtype=dtype,
986
+ _name=name,
987
+ _index_value=index_value,
988
+ **kw,
989
+ )
990
+ self._accessors = dict()
991
+
992
+ def _get_params(self) -> Dict[str, Any]:
993
+ # params return the properties which useful to rebuild a new tileable object
994
+ return {
995
+ "shape": self.shape,
996
+ "dtype": self.dtype,
997
+ "name": self.name,
998
+ "index_value": self.index_value,
999
+ }
1000
+
1001
+ def _set_params(self, new_params: Dict[str, Any]):
1002
+ params = new_params.copy()
1003
+ new_shape = params.pop("shape", None)
1004
+ if new_shape is not None:
1005
+ self._shape = new_shape
1006
+ dtype = params.pop("dtype", None)
1007
+ if dtype is not None:
1008
+ self._dtype = dtype
1009
+ index_value = params.pop("index_value", None)
1010
+ if index_value is not None:
1011
+ self._index_value = index_value
1012
+ name = params.pop("name", None)
1013
+ if name is not None:
1014
+ self._name = name
1015
+ if params: # pragma: no cover
1016
+ raise TypeError(f"Unknown params: {list(params)}")
1017
+
1018
+ params = property(_get_params, _set_params)
1019
+
1020
+ def refresh_params(self):
1021
+ # refresh params when chunks updated
1022
+ refresh_tileable_shape(self)
1023
+ fill_chunk_slices(self)
1024
+ # refresh_index_value(self)
1025
+ if self._dtype is None:
1026
+ self._dtype = getattr(self.chunks[0], "dtype", None)
1027
+ # if self._name is None:
1028
+ # self._name = self.chunks[0].name
1029
+
1030
+ def refresh_from_table_meta(self, table_meta: DataFrameTableMeta) -> None:
1031
+ pass
1032
+
1033
+ def _to_str(self, representation=False):
1034
+ if is_build_mode() or len(self._executed_sessions) == 0:
1035
+ # in build mode, or not executed, just return representation
1036
+ if representation:
1037
+ return (
1038
+ f"{self.type_name} <op={type(self._op).__name__}, key={self.key}>"
1039
+ )
1040
+ else:
1041
+ return f"{self.type_name}(op={type(self._op).__name__})"
1042
+ else:
1043
+ corner_data = fetch_corner_data(self, session=self._executed_sessions[-1])
1044
+
1045
+ buf = StringIO()
1046
+ max_rows = pd.get_option("display.max_rows")
1047
+ corner_max_rows = (
1048
+ max_rows
1049
+ if self.shape[0] <= max_rows or corner_data.shape[0] == 0
1050
+ else corner_data.shape[0] - 1
1051
+ ) # make sure max_rows < corner_data
1052
+
1053
+ with pd.option_context("display.max_rows", corner_max_rows):
1054
+ if self.shape[0] <= max_rows:
1055
+ corner_series = corner_data
1056
+ else:
1057
+ corner_series = ReprSeries(corner_data, self.shape)
1058
+ buf.write(repr(corner_series) if representation else str(corner_series))
1059
+
1060
+ return buf.getvalue()
1061
+
1062
+ def __str__(self):
1063
+ return self._to_str(representation=False)
1064
+
1065
+ def __repr__(self):
1066
+ return self._to_str(representation=True)
1067
+
1068
+ @property
1069
+ def dtype(self):
1070
+ return getattr(self, "_dtype", None) or getattr(self.op, "dtype", None)
1071
+
1072
+ @property
1073
+ def name(self):
1074
+ return self._name
1075
+
1076
+ @property
1077
+ def index_value(self):
1078
+ return self._index_value
1079
+
1080
+ @property
1081
+ def index(self):
1082
+ from .datasource.index import from_tileable
1083
+
1084
+ return from_tileable(self)
1085
+
1086
+ @property
1087
+ def axes(self):
1088
+ return [self.index]
1089
+
1090
+ @property
1091
+ def empty(self):
1092
+ shape = getattr(self, "_shape")
1093
+ if np.any(np.isnan(shape)):
1094
+ raise ValueError("Tileable object must be executed first")
1095
+ return shape == (0,)
1096
+
1097
+ def to_tensor(self, dtype=None):
1098
+ from ..tensor.datasource.from_dataframe import from_series
1099
+
1100
+ return from_series(self, dtype=dtype)
1101
+
1102
+
1103
+ class SeriesData(_BatchedFetcher, BaseSeriesData):
1104
+ type_name = "Series"
1105
+
1106
+ def __maxframe_tensor__(self, dtype=None, order="K"):
1107
+ tensor = self.to_tensor()
1108
+ dtype = dtype if dtype is not None else tensor.dtype
1109
+ return tensor.astype(dtype=dtype, order=order, copy=False)
1110
+
1111
+ def iteritems(self, batch_size=10000, session=None):
1112
+ method_name = "iteritems" if _df_with_iteritems else "items"
1113
+ for batch_data in self.iterbatch(batch_size=batch_size, session=session):
1114
+ yield from getattr(batch_data, method_name)()
1115
+
1116
+ items = iteritems
1117
+
1118
+ def to_frame(self, name=None):
1119
+ from . import dataframe_from_tensor
1120
+
1121
+ name = name or self.name or 0
1122
+ return dataframe_from_tensor(self, columns=[name])
1123
+
1124
+ @property
1125
+ def hasnans(self):
1126
+ """
1127
+ Return True if there are any NaNs.
1128
+
1129
+ Returns
1130
+ -------
1131
+ bool
1132
+
1133
+ Examples
1134
+ --------
1135
+ >>> import maxframe.dataframe as md
1136
+ >>> s = md.Series([1, 2, 3, None])
1137
+ >>> s.execute()
1138
+ 0 1.0
1139
+ 1 2.0
1140
+ 2 3.0
1141
+ 3 NaN
1142
+ dtype: float64
1143
+ >>> s.hasnans.execute()
1144
+ True
1145
+ """
1146
+ return self.isna().any()
1147
+
1148
+
1149
+ class Series(HasShapeTileable, _ToPandasMixin):
1150
+ __slots__ = ("_cache",)
1151
+ _allow_data_type_ = (SeriesData,)
1152
+ type_name = "Series"
1153
+
1154
+ def __class_getitem__(cls, item):
1155
+ return SeriesType.from_getitem_args(item)
1156
+
1157
+ def to_tensor(self, dtype=None):
1158
+ return self._data.to_tensor(dtype=dtype)
1159
+
1160
+ def from_tensor(self, in_tensor, index=None, name=None):
1161
+ return self._data.from_tensor(in_tensor, index=index, name=name)
1162
+
1163
+ @property
1164
+ def T(self):
1165
+ """Return the transpose, which is by definition self."""
1166
+ return self
1167
+
1168
+ @property
1169
+ def ndim(self):
1170
+ """
1171
+ Return an int representing the number of axes / array dimensions.
1172
+
1173
+ Return 1 if Series. Otherwise return 2 if DataFrame.
1174
+
1175
+ See Also
1176
+ --------
1177
+ ndarray.ndim : Number of array dimensions.
1178
+
1179
+ Examples
1180
+ --------
1181
+ >>> import maxframe.dataframe as md
1182
+ >>> s = md.Series({'a': 1, 'b': 2, 'c': 3})
1183
+ >>> s.ndim
1184
+ 1
1185
+
1186
+ >>> df = md.DataFrame({'col1': [1, 2], 'col2': [3, 4]})
1187
+ >>> df.ndim
1188
+ 2
1189
+ """
1190
+ return super().ndim
1191
+
1192
+ @property
1193
+ def index(self):
1194
+ """
1195
+ The index (axis labels) of the Series.
1196
+ """
1197
+ idx = self._data.index
1198
+ idx._set_df_or_series(self, 0)
1199
+ return idx
1200
+
1201
+ @index.setter
1202
+ def index(self, new_index):
1203
+ self.set_axis(new_index, axis=0, inplace=True)
1204
+
1205
+ @property
1206
+ def name(self):
1207
+ return self._data.name
1208
+
1209
+ @name.setter
1210
+ def name(self, val):
1211
+ from .indexing.rename import DataFrameRename
1212
+
1213
+ op = DataFrameRename(new_name=val, output_types=[OutputType.series])
1214
+ new_series = op(self)
1215
+ self.data = new_series.data
1216
+
1217
+ @property
1218
+ def dtype(self):
1219
+ """
1220
+ Return the dtype object of the underlying data.
1221
+ """
1222
+ return self._data.dtype
1223
+
1224
+ def copy(self, deep=True): # pylint: disable=arguments-differ
1225
+ """
1226
+ Make a copy of this object's indices and data.
1227
+
1228
+ When ``deep=True`` (default), a new object will be created with a
1229
+ copy of the calling object's data and indices. Modifications to
1230
+ the data or indices of the copy will not be reflected in the
1231
+ original object (see notes below).
1232
+
1233
+ When ``deep=False``, a new object will be created without copying
1234
+ the calling object's data or index (only references to the data
1235
+ and index are copied). Any changes to the data of the original
1236
+ will be reflected in the shallow copy (and vice versa).
1237
+
1238
+ Parameters
1239
+ ----------
1240
+ deep : bool, default True
1241
+ Make a deep copy, including a copy of the data and the indices.
1242
+ With ``deep=False`` neither the indices nor the data are copied.
1243
+
1244
+ Returns
1245
+ -------
1246
+ copy : Series or DataFrame
1247
+ Object type matches caller.
1248
+ """
1249
+ if deep:
1250
+ return super().copy()
1251
+ else:
1252
+ return super()._view()
1253
+
1254
+ def __iter__(self):
1255
+ # prevent being called by pandas to make sure `__eq__` works
1256
+ prevent_called_from_pandas()
1257
+ return (tp[1] for tp in self.items())
1258
+
1259
+ def __len__(self):
1260
+ return len(self._data)
1261
+
1262
+ def __maxframe_tensor__(self, dtype=None, order="K"):
1263
+ return self._data.__maxframe_tensor__(dtype=dtype, order=order)
1264
+
1265
+ def keys(self):
1266
+ """
1267
+ Return alias for index.
1268
+
1269
+ Returns
1270
+ -------
1271
+ Index
1272
+ Index of the Series.
1273
+ """
1274
+ return self.index
1275
+
1276
+ @property
1277
+ def values(self):
1278
+ return self.to_tensor()
1279
+
1280
+ def iteritems(self, batch_size=10000, session=None):
1281
+ """
1282
+ Lazily iterate over (index, value) tuples.
1283
+
1284
+ This method returns an iterable tuple (index, value). This is
1285
+ convenient if you want to create a lazy iterator.
1286
+
1287
+ Returns
1288
+ -------
1289
+ iterable
1290
+ Iterable of tuples containing the (index, value) pairs from a
1291
+ Series.
1292
+
1293
+ See Also
1294
+ --------
1295
+ DataFrame.items : Iterate over (column name, Series) pairs.
1296
+ DataFrame.iterrows : Iterate over DataFrame rows as (index, Series) pairs.
1297
+
1298
+ Examples
1299
+ --------
1300
+ >>> import maxframe.dataframe as md
1301
+ >>> s = md.Series(['A', 'B', 'C'])
1302
+ >>> for index, value in s.items():
1303
+ ... print(f"Index : {index}, Value : {value}")
1304
+ Index : 0, Value : A
1305
+ Index : 1, Value : B
1306
+ Index : 2, Value : C
1307
+ """
1308
+ return self._data.iteritems(batch_size=batch_size, session=session)
1309
+
1310
+ items = iteritems
1311
+
1312
+ def to_frame(self, name=None):
1313
+ """
1314
+ Convert Series to DataFrame.
1315
+
1316
+ Parameters
1317
+ ----------
1318
+ name : object, default None
1319
+ The passed name should substitute for the series name (if it has
1320
+ one).
1321
+
1322
+ Returns
1323
+ -------
1324
+ DataFrame
1325
+ DataFrame representation of Series.
1326
+
1327
+ Examples
1328
+ --------
1329
+ >>> import maxframe.dataframe as md
1330
+ >>> s = md.Series(["a", "b", "c"], name="vals")
1331
+ >>> s.to_frame().execute()
1332
+ vals
1333
+ 0 a
1334
+ 1 b
1335
+ 2 c
1336
+ """
1337
+ return self._data.to_frame(name=name)
1338
+
1339
+ # def median(
1340
+ # self, axis=None, skipna=True, out=None, overwrite_input=False, keepdims=False
1341
+ # ):
1342
+ # """
1343
+ # Return the median of the values over the requested axis.
1344
+ #
1345
+ # Parameters
1346
+ # ----------
1347
+ # axis : {index (0)}
1348
+ # Axis or axes along which the medians are computed. The default
1349
+ # is to compute the median along a flattened version of the tensor.
1350
+ # A sequence of axes is supported since version 1.9.0.
1351
+ # skipna : bool, optional, default True
1352
+ # Exclude NA/null values when computing the result.
1353
+ # out : Tensor, default None
1354
+ # Output tensor in which to place the result. It must
1355
+ # have the same shape and buffer length as the expected output,
1356
+ # but the type (of the output) will be cast if necessary.
1357
+ # overwrite_input : bool, default False
1358
+ # Just for compatibility with Numpy, would not take effect.
1359
+ # keepdims : bool, default False
1360
+ # If this is set to True, the axes which are reduced are left
1361
+ # in the result as dimensions with size one. With this option,
1362
+ # the result will broadcast correctly against the original `arr`.
1363
+ #
1364
+ # Returns
1365
+ # -------
1366
+ # median : scalar
1367
+ # Return the median of the values over the requested axis.
1368
+ #
1369
+ # See Also
1370
+ # --------
1371
+ # tensor.mean, tensor.percentile
1372
+ #
1373
+ # Notes
1374
+ # -----
1375
+ # Given a vector ``V`` of length ``N``, the median of ``V`` is the
1376
+ # middle value of a sorted copy of ``V``, ``V_sorted`` - i
1377
+ # e., ``V_sorted[(N-1)/2]``, when ``N`` is odd, and the average of the
1378
+ # two middle values of ``V_sorted`` when ``N`` is even.
1379
+ #
1380
+ # Examples
1381
+ # --------
1382
+ # >>> import maxframe.dataframe as md
1383
+ # >>> a = md.Series([10, 7, 4, 3, 2, 1])
1384
+ # >>> a.median().execute()
1385
+ # 2.0
1386
+ # >>> mt.median(a).execute()
1387
+ # 3.5
1388
+ # >>> a = md.Series([10, 7, 4, None, 2, 1])
1389
+ # >>> a.median().execute()
1390
+ # 4.0
1391
+ # >>> a.median(skipna=False).execute()
1392
+ # nan
1393
+ # """
1394
+ # if skipna:
1395
+ # return statistics.median(
1396
+ # self.dropna(),
1397
+ # axis=None,
1398
+ # out=None,
1399
+ # overwrite_input=False,
1400
+ # keepdims=False,
1401
+ # )
1402
+ # else:
1403
+ # return statistics.median(
1404
+ # self, axis=None, out=None, overwrite_input=False, keepdims=False
1405
+ # )
1406
+
1407
+
1408
+ class BaseDataFrameData(HasShapeTileableData, _ToPandasMixin):
1409
+ __slots__ = "_accessors", "_dtypes_value", "_dtypes_dict"
1410
+
1411
+ # optional fields
1412
+ _dtypes = SeriesField("dtypes")
1413
+ _index_value = ReferenceField(
1414
+ "index_value", IndexValue, on_deserialize=_on_deserialize_index_value
1415
+ )
1416
+ _columns_value = ReferenceField("columns_value", IndexValue)
1417
+
1418
+ def __init__(
1419
+ self,
1420
+ op=None,
1421
+ shape=None,
1422
+ nsplits=None,
1423
+ dtypes=None,
1424
+ index_value=None,
1425
+ columns_value=None,
1426
+ **kw,
1427
+ ):
1428
+ super().__init__(
1429
+ _op=op,
1430
+ _shape=shape,
1431
+ _nsplits=nsplits,
1432
+ _dtypes=dtypes,
1433
+ _index_value=index_value,
1434
+ _columns_value=columns_value,
1435
+ **kw,
1436
+ )
1437
+ self._accessors = dict()
1438
+ self._dtypes_value = None
1439
+ self._dtypes_dict = None
1440
+
1441
+ def __on_deserialize__(self):
1442
+ super().__on_deserialize__()
1443
+ self._accessors = dict()
1444
+ self._dtypes_value = None
1445
+ self._dtypes_dict = None
1446
+
1447
+ def _get_params(self) -> Dict[str, Any]:
1448
+ # params return the properties which useful to rebuild a new tileable object
1449
+ return {
1450
+ "shape": self.shape,
1451
+ "dtypes": self.dtypes,
1452
+ "index_value": self.index_value,
1453
+ "columns_value": getattr(self, "columns_value", None),
1454
+ "dtypes_value": getattr(self, "dtypes_value", None),
1455
+ }
1456
+
1457
+ def _set_params(self, new_params: Dict[str, Any]):
1458
+ params = new_params.copy()
1459
+ new_shape = params.pop("shape", None)
1460
+ if new_shape is not None:
1461
+ self._shape = new_shape
1462
+ index_value = params.pop("index_value", None)
1463
+ if index_value is not None:
1464
+ self._index_value = index_value
1465
+ dtypes = params.pop("dtypes", None)
1466
+ if dtypes is not None:
1467
+ self._dtypes = dtypes
1468
+ columns_value = params.pop("columns_value", None)
1469
+ if columns_value is not None:
1470
+ self._columns_value = columns_value
1471
+ dtypes_value = params.pop("dtypes_value", None)
1472
+ if dtypes_value is not None:
1473
+ if dtypes is None:
1474
+ self._dtypes = dtypes_value.value
1475
+ if columns_value is None:
1476
+ self._columns_value = parse_index(self._dtypes.index, store_data=True)
1477
+ self._dtypes_value = dtypes_value
1478
+ if params: # pragma: no cover
1479
+ raise TypeError(f"Unknown params: {list(params)}")
1480
+
1481
+ params = property(_get_params, _set_params)
1482
+
1483
+ def refresh_params(self):
1484
+ # refresh params when chunks updated
1485
+ refresh_tileable_shape(self)
1486
+ fill_chunk_slices(self)
1487
+ # refresh_index_value(self)
1488
+ # refresh_dtypes(self)
1489
+
1490
+ def refresh_from_dtypes(self, dtypes: pd.Series) -> None:
1491
+ self._dtypes = dtypes
1492
+ self._columns_value = parse_index(dtypes.index, store_data=True)
1493
+ self._dtypes_value = DtypesValue(key=tokenize(dtypes), value=dtypes)
1494
+ new_shape = list(self._shape)
1495
+ new_shape[-1] = len(dtypes)
1496
+ self._shape = tuple(new_shape)
1497
+
1498
+ def refresh_from_table_meta(self, table_meta: DataFrameTableMeta) -> None:
1499
+ self.refresh_from_dtypes(table_meta.pd_column_dtypes)
1500
+
1501
+ @property
1502
+ def dtypes(self):
1503
+ dt = getattr(self, "_dtypes", None)
1504
+ if dt is not None:
1505
+ return dt
1506
+ return getattr(self.op, "dtypes", None)
1507
+
1508
+ @property
1509
+ def dtypes_value(self):
1510
+ if self._dtypes_value is not None:
1511
+ return self._dtypes_value
1512
+ # TODO(qinxuye): when creating Dataframe,
1513
+ # dtypes_value instead of dtypes later must be passed into
1514
+ dtypes = self.dtypes
1515
+ if dtypes is not None:
1516
+ self._dtypes_value = DtypesValue(key=tokenize(dtypes), value=dtypes)
1517
+ return self._dtypes_value
1518
+
1519
+ @property
1520
+ def index_value(self):
1521
+ return self._index_value
1522
+
1523
+ @property
1524
+ def columns_value(self):
1525
+ return self._columns_value
1526
+
1527
+ @property
1528
+ def empty(self):
1529
+ shape = getattr(self, "_shape")
1530
+ if np.any(np.isnan(shape)):
1531
+ raise ValueError("Tileable object must be executed first")
1532
+ return 0 in shape
1533
+
1534
+ def to_tensor(self, dtype=None):
1535
+ from ..tensor.datasource.from_dataframe import from_dataframe
1536
+
1537
+ return from_dataframe(self, dtype=dtype)
1538
+
1539
+ @property
1540
+ def index(self):
1541
+ from .datasource.index import from_tileable
1542
+
1543
+ return from_tileable(self)
1544
+
1545
+ @property
1546
+ def columns(self):
1547
+ from .datasource.index import from_pandas as from_pandas_index
1548
+
1549
+ return from_pandas_index(self.dtypes.index, store_data=True)
1550
+
1551
+ @property
1552
+ def axes(self):
1553
+ return [self.index, self.columns]
1554
+
1555
+ def _get_dtypes_dict(self):
1556
+ if self._dtypes_dict is None:
1557
+ self._dtypes_dict = d = dict()
1558
+ for k, v in self.dtypes.items():
1559
+ try:
1560
+ obj_list = d[k]
1561
+ except KeyError:
1562
+ obj_list = d[k] = []
1563
+ obj_list.append(v)
1564
+ return self._dtypes_dict
1565
+
1566
+ def _get_dtypes_by_columns(self, columns: list):
1567
+ dtypes_dict = self._get_dtypes_dict()
1568
+ return functools.reduce(operator.add, (dtypes_dict[c] for c in columns), [])
1569
+
1570
+ def _get_columns_by_columns(self, columns: list):
1571
+ dtypes_dict = self._get_dtypes_dict()
1572
+ return functools.reduce(
1573
+ operator.add, ([c] * len(dtypes_dict[c]) for c in columns), []
1574
+ )
1575
+
1576
+
1577
+ class DataFrameData(_BatchedFetcher, BaseDataFrameData):
1578
+ type_name = "DataFrame"
1579
+
1580
+ def _to_str(self, representation=False):
1581
+ if is_build_mode() or len(self._executed_sessions) == 0:
1582
+ # in build mode, or not executed, just return representation
1583
+ if representation:
1584
+ return (
1585
+ f"{self.type_name} <op={type(self._op).__name__}, key={self.key}>"
1586
+ )
1587
+ else:
1588
+ return f"{self.type_name}(op={type(self._op).__name__})"
1589
+ else:
1590
+ corner_data = fetch_corner_data(self, session=self._executed_sessions[-1])
1591
+
1592
+ buf = StringIO()
1593
+ max_rows = pd.get_option("display.max_rows")
1594
+
1595
+ if self.shape[0] <= max_rows or corner_data.shape[0] == 0:
1596
+ buf.write(repr(corner_data) if representation else str(corner_data))
1597
+ else:
1598
+ # remember we cannot directly call repr(df),
1599
+ # because the [... rows x ... columns] may show wrong rows
1600
+ with pd.option_context(
1601
+ "display.show_dimensions",
1602
+ False,
1603
+ "display.max_rows",
1604
+ corner_data.shape[0] - 1,
1605
+ ):
1606
+ if representation:
1607
+ s = repr(corner_data)
1608
+ else:
1609
+ s = str(corner_data)
1610
+ buf.write(s)
1611
+ if pd.get_option("display.show_dimensions"):
1612
+ n_rows, n_cols = self.shape
1613
+ buf.write(f"\n\n[{n_rows} rows x {n_cols} columns]")
1614
+
1615
+ return buf.getvalue()
1616
+
1617
+ def __str__(self):
1618
+ return self._to_str(representation=False)
1619
+
1620
+ def __repr__(self):
1621
+ return self._to_str(representation=True)
1622
+
1623
+ def __maxframe_tensor__(self, dtype=None, order="K"):
1624
+ return self.to_tensor().astype(dtype=dtype, order=order, copy=False)
1625
+
1626
+ def _repr_html_(self):
1627
+ if len(self._executed_sessions) == 0:
1628
+ # not executed before, fall back to normal repr
1629
+ raise NotImplementedError
1630
+
1631
+ corner_data = fetch_corner_data(self, session=self._executed_sessions[-1])
1632
+ if corner_data is None:
1633
+ return
1634
+
1635
+ buf = StringIO()
1636
+ max_rows = pd.get_option("display.max_rows")
1637
+ if self.shape[0] <= max_rows:
1638
+ buf.write(corner_data._repr_html_())
1639
+ else:
1640
+ with pd.option_context(
1641
+ "display.show_dimensions",
1642
+ False,
1643
+ "display.max_rows",
1644
+ corner_data.shape[0] - 1,
1645
+ ):
1646
+ buf.write(corner_data._repr_html_().rstrip().rstrip("</div>"))
1647
+ if pd.get_option("display.show_dimensions"):
1648
+ n_rows, n_cols = self.shape
1649
+ buf.write(f"<p>{n_rows} rows × {n_cols} columns</p>\n")
1650
+ buf.write("</div>")
1651
+
1652
+ return buf.getvalue()
1653
+
1654
+ def items(self):
1655
+ for col_name in self.dtypes.index:
1656
+ yield col_name, self[col_name]
1657
+
1658
+ iteritems = items
1659
+
1660
+ def iterrows(self, batch_size=1000, session=None):
1661
+ for batch_data in self.iterbatch(batch_size=batch_size, session=session):
1662
+ yield from getattr(batch_data, "iterrows")()
1663
+
1664
+ def itertuples(self, index=True, name="Pandas", batch_size=1000, session=None):
1665
+ for batch_data in self.iterbatch(batch_size=batch_size, session=session):
1666
+ yield from getattr(batch_data, "itertuples")(index=index, name=name)
1667
+
1668
+ def _need_execution(self):
1669
+ if self._dtypes is None:
1670
+ return True
1671
+ return False
1672
+
1673
+
1674
+ class DataFrame(HasShapeTileable, _ToPandasMixin):
1675
+ __slots__ = ("_cache",)
1676
+ _allow_data_type_ = (DataFrameData,)
1677
+ type_name = "DataFrame"
1678
+
1679
+ def __len__(self):
1680
+ return len(self._data)
1681
+
1682
+ def to_tensor(self):
1683
+ return self._data.to_tensor()
1684
+
1685
+ def __maxframe_tensor__(self, dtype=None, order="K"):
1686
+ return self._data.__maxframe_tensor__(dtype=dtype, order=order)
1687
+
1688
+ def __getattr__(self, key):
1689
+ try:
1690
+ return getattr(self._data, key)
1691
+ except AttributeError:
1692
+ if key in self.dtypes:
1693
+ return self[key]
1694
+ else:
1695
+ raise
1696
+
1697
+ def __dir__(self):
1698
+ result = list(super().__dir__())
1699
+ return sorted(
1700
+ result
1701
+ + [k for k in self.dtypes.index if isinstance(k, str) and k.isidentifier()]
1702
+ )
1703
+
1704
+ def __iter__(self):
1705
+ # prevent being called by pandas to make sure `__eq__` works
1706
+ prevent_called_from_pandas()
1707
+ return iter(self.dtypes.index)
1708
+
1709
+ def __class_getitem__(cls, item):
1710
+ return DataFrameType.from_getitem_args(item)
1711
+
1712
+ @property
1713
+ def T(self):
1714
+ return self.transpose()
1715
+
1716
+ @property
1717
+ def ndim(self):
1718
+ """
1719
+ Return an int representing the number of axes / array dimensions.
1720
+
1721
+ Return 1 if Series. Otherwise return 2 if DataFrame.
1722
+
1723
+ See Also
1724
+ --------
1725
+ ndarray.ndim : Number of array dimensions.
1726
+
1727
+ Examples
1728
+ --------
1729
+ >>> import maxframe.dataframe as md
1730
+ >>> s = md.Series({'a': 1, 'b': 2, 'c': 3})
1731
+ >>> s.ndim
1732
+ 1
1733
+
1734
+ >>> df = md.DataFrame({'col1': [1, 2], 'col2': [3, 4]})
1735
+ >>> df.ndim
1736
+ 2
1737
+ """
1738
+ return super().ndim
1739
+
1740
+ @property
1741
+ def index(self):
1742
+ idx = self._data.index
1743
+ idx._set_df_or_series(self, 0)
1744
+ return idx
1745
+
1746
+ @index.setter
1747
+ def index(self, new_index):
1748
+ self.set_axis(new_index, axis=0, inplace=True)
1749
+
1750
+ @property
1751
+ def columns(self):
1752
+ col = self._data.columns
1753
+ col._set_df_or_series(self, 1)
1754
+ return col
1755
+
1756
+ @columns.setter
1757
+ def columns(self, new_columns):
1758
+ self.set_axis(new_columns, axis=1, inplace=True)
1759
+
1760
+ def keys(self):
1761
+ """
1762
+ Get the 'info axis' (see Indexing for more).
1763
+
1764
+ This is index for Series, columns for DataFrame.
1765
+
1766
+ Returns
1767
+ -------
1768
+ Index
1769
+ Info axis.
1770
+ """
1771
+ return self.columns
1772
+
1773
+ @property
1774
+ def values(self):
1775
+ return self.to_tensor()
1776
+
1777
+ @property
1778
+ def dtypes(self):
1779
+ """
1780
+ Return the dtypes in the DataFrame.
1781
+
1782
+ This returns a Series with the data type of each column.
1783
+ The result's index is the original DataFrame's columns. Columns
1784
+ with mixed types are stored with the ``object`` dtype. See
1785
+ :ref:`the User Guide <basics.dtypes>` for more.
1786
+
1787
+ Returns
1788
+ -------
1789
+ pandas.Series
1790
+ The data type of each column.
1791
+
1792
+ Examples
1793
+ --------
1794
+ >>> import maxframe.dataframe as md
1795
+ >>> df = md.DataFrame({'float': [1.0],
1796
+ ... 'int': [1],
1797
+ ... 'datetime': [md.Timestamp('20180310')],
1798
+ ... 'string': ['foo']})
1799
+ >>> df.dtypes
1800
+ float float64
1801
+ int int64
1802
+ datetime datetime64[ns]
1803
+ string object
1804
+ dtype: object
1805
+ """
1806
+ return self._data.dtypes
1807
+
1808
+ def iterrows(self, batch_size=1000, session=None):
1809
+ """
1810
+ Iterate over DataFrame rows as (index, Series) pairs.
1811
+
1812
+ Yields
1813
+ ------
1814
+ index : label or tuple of label
1815
+ The index of the row. A tuple for a `MultiIndex`.
1816
+ data : Series
1817
+ The data of the row as a Series.
1818
+
1819
+ it : generator
1820
+ A generator that iterates over the rows of the frame.
1821
+
1822
+ See Also
1823
+ --------
1824
+ DataFrame.itertuples : Iterate over DataFrame rows as namedtuples of the values.
1825
+ DataFrame.items : Iterate over (column name, Series) pairs.
1826
+
1827
+ Notes
1828
+ -----
1829
+
1830
+ 1. Because ``iterrows`` returns a Series for each row,
1831
+ it does **not** preserve dtypes across the rows (dtypes are
1832
+ preserved across columns for DataFrames). For example,
1833
+
1834
+ >>> import maxframe.dataframe as md
1835
+ >>> df = md.DataFrame([[1, 1.5]], columns=['int', 'float'])
1836
+ >>> row = next(df.iterrows())[1]
1837
+ >>> row
1838
+ int 1.0
1839
+ float 1.5
1840
+ Name: 0, dtype: float64
1841
+ >>> print(row['int'].dtype)
1842
+ float64
1843
+ >>> print(df['int'].dtype)
1844
+ int64
1845
+
1846
+ To preserve dtypes while iterating over the rows, it is better
1847
+ to use :meth:`itertuples` which returns namedtuples of the values
1848
+ and which is generally faster than ``iterrows``.
1849
+
1850
+ 2. You should **never modify** something you are iterating over.
1851
+ This is not guaranteed to work in all cases. Depending on the
1852
+ data types, the iterator returns a copy and not a view, and writing
1853
+ to it will have no effect.
1854
+ """
1855
+ return self._data.iterrows(batch_size=batch_size, session=session)
1856
+
1857
+ def itertuples(self, index=True, name="Pandas", batch_size=1000, session=None):
1858
+ """
1859
+ Iterate over DataFrame rows as namedtuples.
1860
+
1861
+ Parameters
1862
+ ----------
1863
+ index : bool, default True
1864
+ If True, return the index as the first element of the tuple.
1865
+ name : str or None, default "Pandas"
1866
+ The name of the returned namedtuples or None to return regular
1867
+ tuples.
1868
+
1869
+ Returns
1870
+ -------
1871
+ iterator
1872
+ An object to iterate over namedtuples for each row in the
1873
+ DataFrame with the first field possibly being the index and
1874
+ following fields being the column values.
1875
+
1876
+ See Also
1877
+ --------
1878
+ DataFrame.iterrows : Iterate over DataFrame rows as (index, Series)
1879
+ pairs.
1880
+ DataFrame.items : Iterate over (column name, Series) pairs.
1881
+
1882
+ Notes
1883
+ -----
1884
+ The column names will be renamed to positional names if they are
1885
+ invalid Python identifiers, repeated, or start with an underscore.
1886
+ On python versions < 3.7 regular tuples are returned for DataFrames
1887
+ with a large number of columns (>254).
1888
+
1889
+ Examples
1890
+ --------
1891
+ >>> import maxframe.dataframe as md
1892
+ >>> df = md.DataFrame({'num_legs': [4, 2], 'num_wings': [0, 2]},
1893
+ ... index=['dog', 'hawk'])
1894
+ >>> df.execute()
1895
+ num_legs num_wings
1896
+ dog 4 0
1897
+ hawk 2 2
1898
+ >>> for row in df.itertuples():
1899
+ ... print(row)
1900
+ ...
1901
+ Pandas(Index='dog', num_legs=4, num_wings=0)
1902
+ Pandas(Index='hawk', num_legs=2, num_wings=2)
1903
+
1904
+ By setting the `index` parameter to False we can remove the index
1905
+ as the first element of the tuple:
1906
+
1907
+ >>> for row in df.itertuples(index=False):
1908
+ ... print(row)
1909
+ ...
1910
+ Pandas(num_legs=4, num_wings=0)
1911
+ Pandas(num_legs=2, num_wings=2)
1912
+
1913
+ With the `name` parameter set we set a custom name for the yielded
1914
+ namedtuples:
1915
+
1916
+ >>> for row in df.itertuples(name='Animal'):
1917
+ ... print(row)
1918
+ ...
1919
+ Animal(Index='dog', num_legs=4, num_wings=0)
1920
+ Animal(Index='hawk', num_legs=2, num_wings=2)
1921
+ """
1922
+ return self._data.itertuples(
1923
+ batch_size=batch_size, session=session, index=index, name=name
1924
+ )
1925
+
1926
+ def assign(self, **kwargs):
1927
+ """
1928
+ Assign new columns to a DataFrame.
1929
+ Returns a new object with all original columns in addition to new ones.
1930
+ Existing columns that are re-assigned will be overwritten.
1931
+
1932
+ Parameters
1933
+ ----------
1934
+ **kwargs : dict of {str: callable or Series}
1935
+ The column names are keywords. If the values are
1936
+ callable, they are computed on the DataFrame and
1937
+ assigned to the new columns. The callable must not
1938
+ change input DataFrame (though pandas doesn't check it).
1939
+ If the values are not callable, (e.g. a Series, scalar, or array),
1940
+ they are simply assigned.
1941
+
1942
+ Returns
1943
+ -------
1944
+ DataFrame
1945
+ A new DataFrame with the new columns in addition to
1946
+ all the existing columns.
1947
+
1948
+ Notes
1949
+ -----
1950
+ Assigning multiple columns within the same ``assign`` is possible.
1951
+ Later items in 'kwargs' may refer to newly created or modified
1952
+ columns in 'df'; items are computed and assigned into 'df' in order.
1953
+
1954
+ Examples
1955
+ --------
1956
+ >>> import maxframe.dataframe as md
1957
+ >>> df = md.DataFrame({'temp_c': [17.0, 25.0]},
1958
+ ... index=['Portland', 'Berkeley'])
1959
+ >>> df.execute()
1960
+ temp_c
1961
+ Portland 17.0
1962
+ Berkeley 25.0
1963
+
1964
+ Where the value is a callable, evaluated on `df`:
1965
+
1966
+ >>> df.assign(temp_f=lambda x: x.temp_c * 9 / 5 + 32).execute()
1967
+ temp_c temp_f
1968
+ Portland 17.0 62.6
1969
+ Berkeley 25.0 77.0
1970
+
1971
+ Alternatively, the same behavior can be achieved by directly
1972
+ referencing an existing Series or sequence:
1973
+
1974
+ >>> df.assign(temp_f=df['temp_c'] * 9 / 5 + 32).execute()
1975
+ temp_c temp_f
1976
+ Portland 17.0 62.6
1977
+ Berkeley 25.0 77.0
1978
+
1979
+ You can create multiple columns within the same assign where one
1980
+ of the columns depends on another one defined within the same assign:
1981
+
1982
+ >>> df.assign(temp_f=lambda x: x['temp_c'] * 9 / 5 + 32,
1983
+ ... temp_k=lambda x: (x['temp_f'] + 459.67) * 5 / 9).execute()
1984
+ temp_c temp_f temp_k
1985
+ Portland 17.0 62.6 290.15
1986
+ Berkeley 25.0 77.0 298.15
1987
+ """
1988
+
1989
+ data = self.copy()
1990
+
1991
+ for k, v in kwargs.items():
1992
+ data[k] = apply_if_callable(v, data)
1993
+ return data
1994
+
1995
+
1996
+ class DataFrameGroupByData(BaseDataFrameData):
1997
+ type_name = "DataFrameGroupBy"
1998
+
1999
+ _key_dtypes = SeriesField("key_dtypes")
2000
+ _selection = AnyField("selection")
2001
+
2002
+ @property
2003
+ def key_dtypes(self):
2004
+ return self._key_dtypes
2005
+
2006
+ @property
2007
+ def selection(self):
2008
+ return self._selection
2009
+
2010
+ def _get_params(self) -> Dict[str, Any]:
2011
+ p = super()._get_params()
2012
+ p.update(dict(key_dtypes=self.key_dtypes, selection=self.selection))
2013
+ return p
2014
+
2015
+ def _set_params(self, new_params: Dict[str, Any]):
2016
+ params = new_params.copy()
2017
+ key_dtypes = params.pop("key_dtypes", None)
2018
+ if key_dtypes is not None:
2019
+ self._key_dtypes = key_dtypes
2020
+ selection = params.pop("selection", None)
2021
+ if selection is not None:
2022
+ self._selection = selection
2023
+ super()._set_params(params)
2024
+
2025
+ params = property(_get_params, _set_params)
2026
+
2027
+ def __init__(self, key_dtypes=None, selection=None, **kw):
2028
+ super().__init__(_key_dtypes=key_dtypes, _selection=selection, **kw)
2029
+
2030
+ def _equal(self, o):
2031
+ # FIXME We need to implemented a true `==` operator for DataFrameGroupby
2032
+ if is_build_mode():
2033
+ return self is o
2034
+ else:
2035
+ return self == o
2036
+
2037
+
2038
+ class SeriesGroupByData(BaseSeriesData):
2039
+ type_name = "SeriesGroupBy"
2040
+
2041
+ _key_dtypes = AnyField("key_dtypes")
2042
+
2043
+ @property
2044
+ def key_dtypes(self):
2045
+ return self._key_dtypes
2046
+
2047
+ def _get_params(self) -> Dict[str, Any]:
2048
+ p = super()._get_params()
2049
+ p["key_dtypes"] = self.key_dtypes
2050
+ return p
2051
+
2052
+ def _set_params(self, new_params: Dict[str, Any]):
2053
+ params = new_params.copy()
2054
+ key_dtypes = params.pop("key_dtypes", None)
2055
+ if key_dtypes is not None:
2056
+ self._key_dtypes = key_dtypes
2057
+ super()._set_params(params)
2058
+
2059
+ params = property(_get_params, _set_params)
2060
+
2061
+ def __init__(self, key_dtypes=None, **kw):
2062
+ super().__init__(_key_dtypes=key_dtypes, **kw)
2063
+
2064
+ def _equal(self, o):
2065
+ # FIXME We need to implemented a true `==` operator for DataFrameGroupby
2066
+ if is_build_mode():
2067
+ return self is o
2068
+ else:
2069
+ return self == o
2070
+
2071
+
2072
+ class GroupBy(Tileable, _ToPandasMixin):
2073
+ __slots__ = ()
2074
+
2075
+
2076
+ class DataFrameGroupBy(GroupBy):
2077
+ __slots__ = ()
2078
+ _allow_data_type_ = (DataFrameGroupByData,)
2079
+ type_name = "DataFrameGroupBy"
2080
+
2081
+ def __eq__(self, other):
2082
+ return self._equal(other)
2083
+
2084
+ def __hash__(self):
2085
+ # NB: we have customized __eq__ explicitly, thus we need define __hash__ explicitly as well.
2086
+ return super().__hash__()
2087
+
2088
+ def __getattr__(self, item):
2089
+ try:
2090
+ return super().__getattr__(item)
2091
+ except AttributeError:
2092
+ if item in self.dtypes:
2093
+ return self[item]
2094
+ else:
2095
+ raise
2096
+
2097
+ def __dir__(self):
2098
+ result = list(super().__dir__())
2099
+ return sorted(
2100
+ result
2101
+ + [k for k in self.dtypes.index if isinstance(k, str) and k.isidentifier()]
2102
+ )
2103
+
2104
+
2105
+ class SeriesGroupBy(GroupBy):
2106
+ __slots__ = ()
2107
+ _allow_data_type_ = (SeriesGroupByData,)
2108
+ type_name = "SeriesGroupBy"
2109
+
2110
+ def __eq__(self, other):
2111
+ return self._equal(other)
2112
+
2113
+ def __hash__(self):
2114
+ # NB: we have customized __eq__ explicitly, thus we need define __hash__ explicitly as well.
2115
+ return super().__hash__()
2116
+
2117
+
2118
+ class CategoricalData(HasShapeTileableData, _ToPandasMixin):
2119
+ __slots__ = ("_cache",)
2120
+ type_name = "Categorical"
2121
+
2122
+ # optional field
2123
+ _dtype = DataTypeField("dtype")
2124
+ _categories_value = ReferenceField(
2125
+ "categories_value", IndexValue, on_deserialize=_on_deserialize_index_value
2126
+ )
2127
+
2128
+ def __init__(
2129
+ self,
2130
+ op=None,
2131
+ shape=None,
2132
+ nsplits=None,
2133
+ dtype=None,
2134
+ categories_value=None,
2135
+ **kw,
2136
+ ):
2137
+ super().__init__(
2138
+ _op=op,
2139
+ _shape=shape,
2140
+ _nsplits=nsplits,
2141
+ _dtype=dtype,
2142
+ _categories_value=categories_value,
2143
+ **kw,
2144
+ )
2145
+
2146
+ @property
2147
+ def params(self) -> Dict[str, Any]:
2148
+ # params return the properties which useful to rebuild a new tileable object
2149
+ return {
2150
+ "shape": self.shape,
2151
+ "dtype": self.dtype,
2152
+ "categories_value": self.categories_value,
2153
+ }
2154
+
2155
+ @params.setter
2156
+ def params(self, new_params: Dict[str, Any]):
2157
+ params = new_params.copy()
2158
+ new_shape = params.pop("shape", None)
2159
+ if new_shape is not None:
2160
+ self._shape = new_shape
2161
+ dtype = params.pop("dtype", None)
2162
+ if dtype is not None:
2163
+ self._dtype = dtype
2164
+ categories_value = params.pop("categories_value", None)
2165
+ if categories_value is not None:
2166
+ self._categories_value = categories_value
2167
+ if params: # pragma: no cover
2168
+ raise TypeError(f"Unknown params: {list(params)}")
2169
+
2170
+ def refresh_params(self):
2171
+ # refresh params when chunks updated
2172
+ refresh_tileable_shape(self)
2173
+ fill_chunk_slices(self)
2174
+ if self._dtype is None:
2175
+ self._dtype = self.chunks[0].dtype
2176
+ if self._categories_value is None:
2177
+ categories = []
2178
+ for chunk in self.chunks:
2179
+ categories.extend(chunk.categories_value.to_pandas())
2180
+ self._categories_value = parse_index(
2181
+ pd.Categorical(categories).categories, store_data=True
2182
+ )
2183
+
2184
+ def refresh_from_table_meta(self, table_meta: DataFrameTableMeta) -> None:
2185
+ pass
2186
+
2187
+ def _to_str(self, representation=False):
2188
+ if is_build_mode() or len(self._executed_sessions) == 0:
2189
+ # in build mode, or not executed, just return representation
2190
+ if representation:
2191
+ return f"{self.type_name} <op={type(self.op).__name__}, key={self.key}>"
2192
+ else:
2193
+ return f"{self.type_name}(op={type(self.op).__name__})"
2194
+ else:
2195
+ data = self.fetch(session=self._executed_sessions[-1])
2196
+ return repr(data) if repr(data) else str(data)
2197
+
2198
+ def __str__(self):
2199
+ return self._to_str(representation=False)
2200
+
2201
+ def __repr__(self):
2202
+ return self._to_str(representation=True)
2203
+
2204
+ def _equal(self, o):
2205
+ # FIXME We need to implemented a true `==` operator for DataFrameGroupby
2206
+ if is_build_mode():
2207
+ return self is o
2208
+ else: # pragma: no cover
2209
+ return self == o
2210
+
2211
+ @property
2212
+ def dtype(self):
2213
+ return getattr(self, "_dtype", None) or self.op.dtype
2214
+
2215
+ @property
2216
+ def categories_value(self):
2217
+ return self._categories_value
2218
+
2219
+ def __eq__(self, other):
2220
+ return self._equal(other)
2221
+
2222
+ def __hash__(self):
2223
+ # NB: we have customized __eq__ explicitly, thus we need define __hash__ explicitly as well.
2224
+ return super().__hash__()
2225
+
2226
+
2227
+ class Categorical(HasShapeTileable, _ToPandasMixin):
2228
+ __slots__ = ()
2229
+ _allow_data_type_ = (CategoricalData,)
2230
+ type_name = "Categorical"
2231
+
2232
+ def __len__(self):
2233
+ return len(self._data)
2234
+
2235
+ def __eq__(self, other):
2236
+ return self._equal(other)
2237
+
2238
+ def __hash__(self):
2239
+ # NB: we have customized __eq__ explicitly, thus we need define __hash__ explicitly as well.
2240
+ return super().__hash__()
2241
+
2242
+
2243
+ class DataFrameOrSeriesData(HasShapeTileableData, _ToPandasMixin):
2244
+ __slots__ = ()
2245
+
2246
+ _data_type = StringField("data_type")
2247
+ _data_params = DictField("data_params")
2248
+
2249
+ def __init__(
2250
+ self,
2251
+ op=None,
2252
+ data_type=None,
2253
+ data_params=None,
2254
+ **kw,
2255
+ ):
2256
+ self._data_type = data_type
2257
+ self._data_params = data_params or dict()
2258
+ super().__init__(
2259
+ _op=op,
2260
+ **kw,
2261
+ )
2262
+
2263
+ def __getattr__(self, item):
2264
+ if item in self._data_params:
2265
+ return self._data_params[item]
2266
+ raise AttributeError(f"'{type(self)}' object has no attribute '{item}'")
2267
+
2268
+ @property
2269
+ def shape(self):
2270
+ return self._data_params.get("shape", None)
2271
+
2272
+ @property
2273
+ def nsplits(self):
2274
+ return self._data_params.get("nsplits", None)
2275
+
2276
+ @property
2277
+ def data_type(self):
2278
+ return self._data_type
2279
+
2280
+ @property
2281
+ def data_params(self):
2282
+ return self._data_params
2283
+
2284
+ @property
2285
+ def params(self) -> Dict[str, Any]:
2286
+ return {"data_type": self._data_type, "data_params": self._data_params}
2287
+
2288
+ @params.setter
2289
+ def params(self, new_params: Dict[str, Any]):
2290
+ # After execution, create DataFrameFetch, and the data
2291
+ # corresponding to the original key is still DataFrameOrSeries type,
2292
+ # so when restoring DataFrameOrSeries type,
2293
+ # there is no "data_type" field in params.
2294
+ if "data_type" not in new_params:
2295
+ if "dtype" in new_params:
2296
+ self._data_type = "series"
2297
+ else:
2298
+ self._data_type = "dataframe"
2299
+ self._data_params = new_params.copy()
2300
+ else:
2301
+ self._data_type = new_params.get("data_type")
2302
+ self._data_params = {
2303
+ k: v for k, v in new_params.get("data_params", {}).items()
2304
+ }
2305
+
2306
+ def refresh_params(self):
2307
+ index_to_index_values = dict()
2308
+ for chunk in self.chunks:
2309
+ if chunk.ndim == 1:
2310
+ index_to_index_values[chunk.index] = chunk.index_value
2311
+ elif chunk.index[1] == 0:
2312
+ index_to_index_values[chunk.index] = chunk.index_value
2313
+ index_value = merge_index_value(index_to_index_values, store_data=False)
2314
+ nsplits = calc_nsplits({c.index: c.shape for c in self.chunks})
2315
+ shape = tuple(sum(ns) for ns in nsplits)
2316
+
2317
+ data_params = dict()
2318
+ data_params["nsplits"] = nsplits
2319
+ data_params["shape"] = shape
2320
+ data_params["index_value"] = index_value
2321
+
2322
+ self._data_type = self._chunks[0]._data_type
2323
+ if self.data_type == "dataframe":
2324
+ all_dtypes = [c.dtypes_value.value for c in self.chunks if c.index[0] == 0]
2325
+ dtypes = pd.concat(all_dtypes)
2326
+ data_params["dtypes"] = dtypes
2327
+ columns_values = parse_index(dtypes.index, store_data=True)
2328
+ data_params["columns_value"] = columns_values
2329
+ data_params["dtypes_value"] = DtypesValue(
2330
+ key=tokenize(dtypes), value=dtypes
2331
+ )
2332
+ else:
2333
+ data_params["dtype"] = self.chunks[0].dtype
2334
+ data_params["name"] = self.chunks[0].name
2335
+ self._data_params.update(data_params)
2336
+
2337
+ def refresh_from_table_meta(self, table_meta: DataFrameTableMeta) -> None:
2338
+ pass
2339
+
2340
+ def ensure_data(self):
2341
+ from .fetch.core import DataFrameFetch
2342
+
2343
+ self.execute()
2344
+ default_sess = get_default_session()
2345
+ self._detach_session(default_sess._session)
2346
+
2347
+ fetch_tileable = default_sess._session._tileable_to_fetch[self]
2348
+ new = DataFrameFetch(
2349
+ output_types=[getattr(OutputType, self.data_type)]
2350
+ ).new_tileable(
2351
+ [],
2352
+ _key=self.key,
2353
+ chunks=fetch_tileable.chunks,
2354
+ nsplits=fetch_tileable.nsplits,
2355
+ **self.data_params,
2356
+ )
2357
+ new._attach_session(default_sess._session)
2358
+ return new
2359
+
2360
+
2361
+ class DataFrameOrSeries(HasShapeTileable, _ToPandasMixin):
2362
+ __slots__ = ()
2363
+ _allow_data_type_ = (DataFrameOrSeriesData,)
2364
+ type_name = "DataFrameOrSeries"
2365
+
2366
+
2367
+ INDEX_TYPE = (Index, IndexData)
2368
+ SERIES_TYPE = (Series, SeriesData)
2369
+ DATAFRAME_OR_SERIES_TYPE = (DataFrameOrSeries, DataFrameOrSeriesData)
2370
+ DATAFRAME_TYPE = (DataFrame, DataFrameData)
2371
+ DATAFRAME_GROUPBY_TYPE = (DataFrameGroupBy, DataFrameGroupByData)
2372
+ SERIES_GROUPBY_TYPE = (SeriesGroupBy, SeriesGroupByData)
2373
+ GROUPBY_TYPE = (GroupBy,) + DATAFRAME_GROUPBY_TYPE + SERIES_GROUPBY_TYPE
2374
+ CATEGORICAL_TYPE = (Categorical, CategoricalData)
2375
+ TILEABLE_TYPE = (
2376
+ INDEX_TYPE + SERIES_TYPE + DATAFRAME_TYPE + GROUPBY_TYPE + CATEGORICAL_TYPE
2377
+ )
2378
+
2379
+ register_output_types(OutputType.dataframe, DATAFRAME_TYPE)
2380
+ register_output_types(OutputType.series, SERIES_TYPE)
2381
+ register_output_types(OutputType.df_or_series, DATAFRAME_OR_SERIES_TYPE)
2382
+ register_output_types(OutputType.index, INDEX_TYPE)
2383
+ register_output_types(OutputType.categorical, CATEGORICAL_TYPE)
2384
+ register_output_types(OutputType.dataframe_groupby, DATAFRAME_GROUPBY_TYPE)
2385
+ register_output_types(OutputType.series_groupby, SERIES_GROUPBY_TYPE)