maxframe 2.4.0rc1__cp312-cp312-win32.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (1122) hide show
  1. maxframe/__init__.py +33 -0
  2. maxframe/_utils.cp312-win32.pyd +0 -0
  3. maxframe/_utils.pxd +33 -0
  4. maxframe/_utils.pyi +21 -0
  5. maxframe/_utils.pyx +561 -0
  6. maxframe/codegen/__init__.py +27 -0
  7. maxframe/codegen/core.py +597 -0
  8. maxframe/codegen/spe/__init__.py +16 -0
  9. maxframe/codegen/spe/core.py +307 -0
  10. maxframe/codegen/spe/dataframe/__init__.py +38 -0
  11. maxframe/codegen/spe/dataframe/accessors/__init__.py +15 -0
  12. maxframe/codegen/spe/dataframe/accessors/base.py +71 -0
  13. maxframe/codegen/spe/dataframe/accessors/dict_.py +89 -0
  14. maxframe/codegen/spe/dataframe/accessors/list_.py +44 -0
  15. maxframe/codegen/spe/dataframe/accessors/struct_.py +28 -0
  16. maxframe/codegen/spe/dataframe/arithmetic.py +89 -0
  17. maxframe/codegen/spe/dataframe/datasource.py +181 -0
  18. maxframe/codegen/spe/dataframe/datastore.py +204 -0
  19. maxframe/codegen/spe/dataframe/extensions.py +63 -0
  20. maxframe/codegen/spe/dataframe/fetch.py +26 -0
  21. maxframe/codegen/spe/dataframe/groupby.py +312 -0
  22. maxframe/codegen/spe/dataframe/indexing.py +333 -0
  23. maxframe/codegen/spe/dataframe/merge.py +110 -0
  24. maxframe/codegen/spe/dataframe/misc.py +264 -0
  25. maxframe/codegen/spe/dataframe/missing.py +64 -0
  26. maxframe/codegen/spe/dataframe/reduction.py +183 -0
  27. maxframe/codegen/spe/dataframe/reshape.py +46 -0
  28. maxframe/codegen/spe/dataframe/sort.py +104 -0
  29. maxframe/codegen/spe/dataframe/statistics.py +46 -0
  30. maxframe/codegen/spe/dataframe/tests/__init__.py +13 -0
  31. maxframe/codegen/spe/dataframe/tests/accessors/__init__.py +13 -0
  32. maxframe/codegen/spe/dataframe/tests/accessors/test_base.py +33 -0
  33. maxframe/codegen/spe/dataframe/tests/accessors/test_dict.py +304 -0
  34. maxframe/codegen/spe/dataframe/tests/accessors/test_list.py +134 -0
  35. maxframe/codegen/spe/dataframe/tests/accessors/test_struct.py +75 -0
  36. maxframe/codegen/spe/dataframe/tests/indexing/__init__.py +13 -0
  37. maxframe/codegen/spe/dataframe/tests/indexing/conftest.py +58 -0
  38. maxframe/codegen/spe/dataframe/tests/indexing/test_getitem.py +124 -0
  39. maxframe/codegen/spe/dataframe/tests/indexing/test_iloc.py +95 -0
  40. maxframe/codegen/spe/dataframe/tests/indexing/test_indexing.py +39 -0
  41. maxframe/codegen/spe/dataframe/tests/indexing/test_loc.py +35 -0
  42. maxframe/codegen/spe/dataframe/tests/indexing/test_rename.py +51 -0
  43. maxframe/codegen/spe/dataframe/tests/indexing/test_reset_index.py +88 -0
  44. maxframe/codegen/spe/dataframe/tests/indexing/test_sample.py +45 -0
  45. maxframe/codegen/spe/dataframe/tests/indexing/test_set_axis.py +45 -0
  46. maxframe/codegen/spe/dataframe/tests/indexing/test_set_index.py +41 -0
  47. maxframe/codegen/spe/dataframe/tests/indexing/test_setitem.py +46 -0
  48. maxframe/codegen/spe/dataframe/tests/misc/__init__.py +13 -0
  49. maxframe/codegen/spe/dataframe/tests/misc/test_apply.py +133 -0
  50. maxframe/codegen/spe/dataframe/tests/misc/test_drop_duplicates.py +92 -0
  51. maxframe/codegen/spe/dataframe/tests/misc/test_misc.py +202 -0
  52. maxframe/codegen/spe/dataframe/tests/missing/__init__.py +13 -0
  53. maxframe/codegen/spe/dataframe/tests/missing/test_checkna.py +94 -0
  54. maxframe/codegen/spe/dataframe/tests/missing/test_dropna.py +50 -0
  55. maxframe/codegen/spe/dataframe/tests/missing/test_fillna.py +94 -0
  56. maxframe/codegen/spe/dataframe/tests/missing/test_replace.py +45 -0
  57. maxframe/codegen/spe/dataframe/tests/test_arithmetic.py +73 -0
  58. maxframe/codegen/spe/dataframe/tests/test_datasource.py +184 -0
  59. maxframe/codegen/spe/dataframe/tests/test_datastore.py +200 -0
  60. maxframe/codegen/spe/dataframe/tests/test_extensions.py +88 -0
  61. maxframe/codegen/spe/dataframe/tests/test_groupby.py +288 -0
  62. maxframe/codegen/spe/dataframe/tests/test_merge.py +426 -0
  63. maxframe/codegen/spe/dataframe/tests/test_reduction.py +117 -0
  64. maxframe/codegen/spe/dataframe/tests/test_reshape.py +79 -0
  65. maxframe/codegen/spe/dataframe/tests/test_sort.py +179 -0
  66. maxframe/codegen/spe/dataframe/tests/test_statistics.py +70 -0
  67. maxframe/codegen/spe/dataframe/tests/test_tseries.py +29 -0
  68. maxframe/codegen/spe/dataframe/tests/test_value_counts.py +60 -0
  69. maxframe/codegen/spe/dataframe/tests/test_window.py +69 -0
  70. maxframe/codegen/spe/dataframe/tseries.py +55 -0
  71. maxframe/codegen/spe/dataframe/udf.py +62 -0
  72. maxframe/codegen/spe/dataframe/value_counts.py +31 -0
  73. maxframe/codegen/spe/dataframe/window.py +65 -0
  74. maxframe/codegen/spe/learn/__init__.py +15 -0
  75. maxframe/codegen/spe/learn/contrib/__init__.py +15 -0
  76. maxframe/codegen/spe/learn/contrib/lightgbm.py +161 -0
  77. maxframe/codegen/spe/learn/contrib/models.py +41 -0
  78. maxframe/codegen/spe/learn/contrib/pytorch.py +49 -0
  79. maxframe/codegen/spe/learn/contrib/tests/__init__.py +13 -0
  80. maxframe/codegen/spe/learn/contrib/tests/test_lightgbm.py +123 -0
  81. maxframe/codegen/spe/learn/contrib/tests/test_models.py +41 -0
  82. maxframe/codegen/spe/learn/contrib/tests/test_pytorch.py +53 -0
  83. maxframe/codegen/spe/learn/contrib/tests/test_xgboost.py +99 -0
  84. maxframe/codegen/spe/learn/contrib/xgboost.py +152 -0
  85. maxframe/codegen/spe/learn/metrics/__init__.py +15 -0
  86. maxframe/codegen/spe/learn/metrics/_classification.py +120 -0
  87. maxframe/codegen/spe/learn/metrics/_ranking.py +76 -0
  88. maxframe/codegen/spe/learn/metrics/pairwise.py +51 -0
  89. maxframe/codegen/spe/learn/metrics/tests/__init__.py +13 -0
  90. maxframe/codegen/spe/learn/metrics/tests/test_classification.py +93 -0
  91. maxframe/codegen/spe/learn/metrics/tests/test_pairwise.py +36 -0
  92. maxframe/codegen/spe/learn/metrics/tests/test_ranking.py +59 -0
  93. maxframe/codegen/spe/learn/model_selection/__init__.py +13 -0
  94. maxframe/codegen/spe/learn/model_selection/tests/__init__.py +13 -0
  95. maxframe/codegen/spe/learn/model_selection/tests/test_split.py +41 -0
  96. maxframe/codegen/spe/learn/preprocessing/__init__.py +15 -0
  97. maxframe/codegen/spe/learn/preprocessing/_data.py +37 -0
  98. maxframe/codegen/spe/learn/preprocessing/_label.py +47 -0
  99. maxframe/codegen/spe/learn/preprocessing/tests/__init__.py +13 -0
  100. maxframe/codegen/spe/learn/preprocessing/tests/test_data.py +31 -0
  101. maxframe/codegen/spe/learn/preprocessing/tests/test_label.py +43 -0
  102. maxframe/codegen/spe/learn/utils/__init__.py +15 -0
  103. maxframe/codegen/spe/learn/utils/checks.py +55 -0
  104. maxframe/codegen/spe/learn/utils/multiclass.py +60 -0
  105. maxframe/codegen/spe/learn/utils/shuffle.py +85 -0
  106. maxframe/codegen/spe/learn/utils/sparsefuncs.py +35 -0
  107. maxframe/codegen/spe/learn/utils/tests/__init__.py +13 -0
  108. maxframe/codegen/spe/learn/utils/tests/test_checks.py +48 -0
  109. maxframe/codegen/spe/learn/utils/tests/test_multiclass.py +52 -0
  110. maxframe/codegen/spe/learn/utils/tests/test_shuffle.py +50 -0
  111. maxframe/codegen/spe/learn/utils/tests/test_sparsefuncs.py +34 -0
  112. maxframe/codegen/spe/learn/utils/tests/test_validation.py +44 -0
  113. maxframe/codegen/spe/learn/utils/validation.py +35 -0
  114. maxframe/codegen/spe/objects.py +26 -0
  115. maxframe/codegen/spe/remote.py +29 -0
  116. maxframe/codegen/spe/tensor/__init__.py +31 -0
  117. maxframe/codegen/spe/tensor/arithmetic.py +95 -0
  118. maxframe/codegen/spe/tensor/core.py +41 -0
  119. maxframe/codegen/spe/tensor/datasource.py +166 -0
  120. maxframe/codegen/spe/tensor/extensions.py +35 -0
  121. maxframe/codegen/spe/tensor/fetch.py +26 -0
  122. maxframe/codegen/spe/tensor/fft.py +74 -0
  123. maxframe/codegen/spe/tensor/indexing.py +63 -0
  124. maxframe/codegen/spe/tensor/linalg.py +90 -0
  125. maxframe/codegen/spe/tensor/merge.py +31 -0
  126. maxframe/codegen/spe/tensor/misc.py +175 -0
  127. maxframe/codegen/spe/tensor/random.py +29 -0
  128. maxframe/codegen/spe/tensor/reduction.py +39 -0
  129. maxframe/codegen/spe/tensor/reshape.py +26 -0
  130. maxframe/codegen/spe/tensor/sort.py +42 -0
  131. maxframe/codegen/spe/tensor/spatial.py +45 -0
  132. maxframe/codegen/spe/tensor/special.py +35 -0
  133. maxframe/codegen/spe/tensor/statistics.py +68 -0
  134. maxframe/codegen/spe/tensor/tests/__init__.py +13 -0
  135. maxframe/codegen/spe/tensor/tests/test_arithmetic.py +103 -0
  136. maxframe/codegen/spe/tensor/tests/test_datasource.py +99 -0
  137. maxframe/codegen/spe/tensor/tests/test_extensions.py +37 -0
  138. maxframe/codegen/spe/tensor/tests/test_fft.py +64 -0
  139. maxframe/codegen/spe/tensor/tests/test_indexing.py +44 -0
  140. maxframe/codegen/spe/tensor/tests/test_linalg.py +52 -0
  141. maxframe/codegen/spe/tensor/tests/test_merge.py +28 -0
  142. maxframe/codegen/spe/tensor/tests/test_misc.py +144 -0
  143. maxframe/codegen/spe/tensor/tests/test_random.py +55 -0
  144. maxframe/codegen/spe/tensor/tests/test_reduction.py +65 -0
  145. maxframe/codegen/spe/tensor/tests/test_reshape.py +39 -0
  146. maxframe/codegen/spe/tensor/tests/test_sort.py +49 -0
  147. maxframe/codegen/spe/tensor/tests/test_spatial.py +33 -0
  148. maxframe/codegen/spe/tensor/tests/test_special.py +28 -0
  149. maxframe/codegen/spe/tensor/tests/test_statistics.py +43 -0
  150. maxframe/codegen/spe/tests/__init__.py +13 -0
  151. maxframe/codegen/spe/tests/test_remote.py +29 -0
  152. maxframe/codegen/spe/tests/test_spe_codegen.py +135 -0
  153. maxframe/codegen/spe/utils.py +56 -0
  154. maxframe/codegen/tests/__init__.py +13 -0
  155. maxframe/codegen/tests/test_codegen.py +67 -0
  156. maxframe/config/__init__.py +15 -0
  157. maxframe/config/config.py +630 -0
  158. maxframe/config/tests/__init__.py +13 -0
  159. maxframe/config/tests/test_config.py +114 -0
  160. maxframe/config/tests/test_validators.py +46 -0
  161. maxframe/config/validators.py +142 -0
  162. maxframe/conftest.py +261 -0
  163. maxframe/core/__init__.py +53 -0
  164. maxframe/core/accessor.py +45 -0
  165. maxframe/core/base.py +157 -0
  166. maxframe/core/context.py +110 -0
  167. maxframe/core/entity/__init__.py +34 -0
  168. maxframe/core/entity/core.py +150 -0
  169. maxframe/core/entity/executable.py +337 -0
  170. maxframe/core/entity/objects.py +115 -0
  171. maxframe/core/entity/output_types.py +101 -0
  172. maxframe/core/entity/tests/__init__.py +13 -0
  173. maxframe/core/entity/tests/test_objects.py +42 -0
  174. maxframe/core/entity/tileables.py +376 -0
  175. maxframe/core/entity/utils.py +39 -0
  176. maxframe/core/graph/__init__.py +22 -0
  177. maxframe/core/graph/builder/__init__.py +15 -0
  178. maxframe/core/graph/builder/base.py +90 -0
  179. maxframe/core/graph/builder/tileable.py +34 -0
  180. maxframe/core/graph/builder/utils.py +37 -0
  181. maxframe/core/graph/core.cp312-win32.pyd +0 -0
  182. maxframe/core/graph/core.pyx +478 -0
  183. maxframe/core/graph/entity.py +187 -0
  184. maxframe/core/graph/tests/__init__.py +13 -0
  185. maxframe/core/graph/tests/test_graph.py +205 -0
  186. maxframe/core/mode.py +101 -0
  187. maxframe/core/operator/__init__.py +32 -0
  188. maxframe/core/operator/base.py +481 -0
  189. maxframe/core/operator/core.py +307 -0
  190. maxframe/core/operator/fetch.py +40 -0
  191. maxframe/core/operator/objects.py +43 -0
  192. maxframe/core/operator/shuffle.py +45 -0
  193. maxframe/core/operator/tests/__init__.py +13 -0
  194. maxframe/core/operator/tests/test_core.py +64 -0
  195. maxframe/core/operator/utils.py +68 -0
  196. maxframe/core/tests/__init__.py +13 -0
  197. maxframe/core/tests/test_mode.py +75 -0
  198. maxframe/dataframe/__init__.py +90 -0
  199. maxframe/dataframe/accessors/__init__.py +20 -0
  200. maxframe/dataframe/accessors/compat.py +45 -0
  201. maxframe/dataframe/accessors/datetime_/__init__.py +35 -0
  202. maxframe/dataframe/accessors/datetime_/accessor.py +67 -0
  203. maxframe/dataframe/accessors/datetime_/core.py +106 -0
  204. maxframe/dataframe/accessors/datetime_/tests/__init__.py +13 -0
  205. maxframe/dataframe/accessors/datetime_/tests/test_datetime_accessor.py +41 -0
  206. maxframe/dataframe/accessors/dict_/__init__.py +45 -0
  207. maxframe/dataframe/accessors/dict_/accessor.py +39 -0
  208. maxframe/dataframe/accessors/dict_/contains.py +72 -0
  209. maxframe/dataframe/accessors/dict_/core.py +48 -0
  210. maxframe/dataframe/accessors/dict_/getitem.py +140 -0
  211. maxframe/dataframe/accessors/dict_/length.py +64 -0
  212. maxframe/dataframe/accessors/dict_/remove.py +75 -0
  213. maxframe/dataframe/accessors/dict_/setitem.py +79 -0
  214. maxframe/dataframe/accessors/dict_/tests/__init__.py +13 -0
  215. maxframe/dataframe/accessors/dict_/tests/test_dict_accessor.py +168 -0
  216. maxframe/dataframe/accessors/list_/__init__.py +39 -0
  217. maxframe/dataframe/accessors/list_/accessor.py +39 -0
  218. maxframe/dataframe/accessors/list_/core.py +48 -0
  219. maxframe/dataframe/accessors/list_/getitem.py +128 -0
  220. maxframe/dataframe/accessors/list_/length.py +64 -0
  221. maxframe/dataframe/accessors/list_/tests/__init__.py +13 -0
  222. maxframe/dataframe/accessors/list_/tests/test_list_accessor.py +81 -0
  223. maxframe/dataframe/accessors/plotting/__init__.py +40 -0
  224. maxframe/dataframe/accessors/plotting/core.py +78 -0
  225. maxframe/dataframe/accessors/plotting/tests/__init__.py +13 -0
  226. maxframe/dataframe/accessors/plotting/tests/test_plotting_accessor.py +136 -0
  227. maxframe/dataframe/accessors/string_/__init__.py +36 -0
  228. maxframe/dataframe/accessors/string_/accessor.py +215 -0
  229. maxframe/dataframe/accessors/string_/core.py +226 -0
  230. maxframe/dataframe/accessors/string_/tests/__init__.py +13 -0
  231. maxframe/dataframe/accessors/string_/tests/test_string_accessor.py +73 -0
  232. maxframe/dataframe/accessors/struct_/__init__.py +39 -0
  233. maxframe/dataframe/accessors/struct_/accessor.py +39 -0
  234. maxframe/dataframe/accessors/struct_/core.py +43 -0
  235. maxframe/dataframe/accessors/struct_/dtypes.py +53 -0
  236. maxframe/dataframe/accessors/struct_/field.py +123 -0
  237. maxframe/dataframe/accessors/struct_/tests/__init__.py +13 -0
  238. maxframe/dataframe/accessors/struct_/tests/test_struct_accessor.py +91 -0
  239. maxframe/dataframe/arithmetic/__init__.py +373 -0
  240. maxframe/dataframe/arithmetic/abs.py +33 -0
  241. maxframe/dataframe/arithmetic/add.py +60 -0
  242. maxframe/dataframe/arithmetic/arccos.py +28 -0
  243. maxframe/dataframe/arithmetic/arccosh.py +28 -0
  244. maxframe/dataframe/arithmetic/arcsin.py +28 -0
  245. maxframe/dataframe/arithmetic/arcsinh.py +28 -0
  246. maxframe/dataframe/arithmetic/arctan.py +28 -0
  247. maxframe/dataframe/arithmetic/arctanh.py +28 -0
  248. maxframe/dataframe/arithmetic/between.py +106 -0
  249. maxframe/dataframe/arithmetic/bitwise_and.py +46 -0
  250. maxframe/dataframe/arithmetic/bitwise_or.py +50 -0
  251. maxframe/dataframe/arithmetic/bitwise_xor.py +46 -0
  252. maxframe/dataframe/arithmetic/ceil.py +28 -0
  253. maxframe/dataframe/arithmetic/core.py +361 -0
  254. maxframe/dataframe/arithmetic/cos.py +28 -0
  255. maxframe/dataframe/arithmetic/cosh.py +28 -0
  256. maxframe/dataframe/arithmetic/degrees.py +28 -0
  257. maxframe/dataframe/arithmetic/docstring.py +416 -0
  258. maxframe/dataframe/arithmetic/dot.py +237 -0
  259. maxframe/dataframe/arithmetic/equal.py +58 -0
  260. maxframe/dataframe/arithmetic/exp.py +28 -0
  261. maxframe/dataframe/arithmetic/exp2.py +28 -0
  262. maxframe/dataframe/arithmetic/expm1.py +28 -0
  263. maxframe/dataframe/arithmetic/floor.py +28 -0
  264. maxframe/dataframe/arithmetic/floordiv.py +64 -0
  265. maxframe/dataframe/arithmetic/greater.py +59 -0
  266. maxframe/dataframe/arithmetic/greater_equal.py +59 -0
  267. maxframe/dataframe/arithmetic/invert.py +33 -0
  268. maxframe/dataframe/arithmetic/is_ufuncs.py +62 -0
  269. maxframe/dataframe/arithmetic/less.py +57 -0
  270. maxframe/dataframe/arithmetic/less_equal.py +59 -0
  271. maxframe/dataframe/arithmetic/log.py +28 -0
  272. maxframe/dataframe/arithmetic/log10.py +28 -0
  273. maxframe/dataframe/arithmetic/log2.py +28 -0
  274. maxframe/dataframe/arithmetic/maximum.py +33 -0
  275. maxframe/dataframe/arithmetic/minimum.py +33 -0
  276. maxframe/dataframe/arithmetic/mod.py +60 -0
  277. maxframe/dataframe/arithmetic/multiply.py +60 -0
  278. maxframe/dataframe/arithmetic/negative.py +33 -0
  279. maxframe/dataframe/arithmetic/not_equal.py +58 -0
  280. maxframe/dataframe/arithmetic/power.py +68 -0
  281. maxframe/dataframe/arithmetic/radians.py +28 -0
  282. maxframe/dataframe/arithmetic/round.py +144 -0
  283. maxframe/dataframe/arithmetic/sin.py +28 -0
  284. maxframe/dataframe/arithmetic/sinh.py +28 -0
  285. maxframe/dataframe/arithmetic/sqrt.py +28 -0
  286. maxframe/dataframe/arithmetic/subtract.py +64 -0
  287. maxframe/dataframe/arithmetic/tan.py +28 -0
  288. maxframe/dataframe/arithmetic/tanh.py +28 -0
  289. maxframe/dataframe/arithmetic/tests/__init__.py +13 -0
  290. maxframe/dataframe/arithmetic/tests/test_arithmetic.py +747 -0
  291. maxframe/dataframe/arithmetic/truediv.py +64 -0
  292. maxframe/dataframe/arithmetic/trunc.py +28 -0
  293. maxframe/dataframe/core.py +2386 -0
  294. maxframe/dataframe/datasource/__init__.py +33 -0
  295. maxframe/dataframe/datasource/core.py +112 -0
  296. maxframe/dataframe/datasource/dataframe.py +59 -0
  297. maxframe/dataframe/datasource/date_range.py +512 -0
  298. maxframe/dataframe/datasource/direct.py +57 -0
  299. maxframe/dataframe/datasource/from_dict.py +124 -0
  300. maxframe/dataframe/datasource/from_index.py +58 -0
  301. maxframe/dataframe/datasource/from_records.py +191 -0
  302. maxframe/dataframe/datasource/from_tensor.py +503 -0
  303. maxframe/dataframe/datasource/index.py +117 -0
  304. maxframe/dataframe/datasource/read_csv.py +534 -0
  305. maxframe/dataframe/datasource/read_odps_query.py +536 -0
  306. maxframe/dataframe/datasource/read_odps_table.py +295 -0
  307. maxframe/dataframe/datasource/read_parquet.py +278 -0
  308. maxframe/dataframe/datasource/series.py +55 -0
  309. maxframe/dataframe/datasource/tests/__init__.py +13 -0
  310. maxframe/dataframe/datasource/tests/test_datasource.py +663 -0
  311. maxframe/dataframe/datastore/__init__.py +41 -0
  312. maxframe/dataframe/datastore/core.py +28 -0
  313. maxframe/dataframe/datastore/direct.py +268 -0
  314. maxframe/dataframe/datastore/tests/__init__.py +13 -0
  315. maxframe/dataframe/datastore/tests/test_to_odps.py +99 -0
  316. maxframe/dataframe/datastore/to_csv.py +219 -0
  317. maxframe/dataframe/datastore/to_json.py +215 -0
  318. maxframe/dataframe/datastore/to_odps.py +285 -0
  319. maxframe/dataframe/datastore/to_parquet.py +121 -0
  320. maxframe/dataframe/extensions/__init__.py +70 -0
  321. maxframe/dataframe/extensions/accessor.py +35 -0
  322. maxframe/dataframe/extensions/apply_chunk.py +733 -0
  323. maxframe/dataframe/extensions/cartesian_chunk.py +153 -0
  324. maxframe/dataframe/extensions/collect_kv.py +126 -0
  325. maxframe/dataframe/extensions/extract_kv.py +177 -0
  326. maxframe/dataframe/extensions/flatjson.py +133 -0
  327. maxframe/dataframe/extensions/flatmap.py +329 -0
  328. maxframe/dataframe/extensions/map_reduce.py +263 -0
  329. maxframe/dataframe/extensions/rebalance.py +62 -0
  330. maxframe/dataframe/extensions/reshuffle.py +83 -0
  331. maxframe/dataframe/extensions/tests/__init__.py +13 -0
  332. maxframe/dataframe/extensions/tests/test_apply_chunk.py +194 -0
  333. maxframe/dataframe/extensions/tests/test_extensions.py +198 -0
  334. maxframe/dataframe/extensions/tests/test_map_reduce.py +135 -0
  335. maxframe/dataframe/fetch/__init__.py +15 -0
  336. maxframe/dataframe/fetch/core.py +97 -0
  337. maxframe/dataframe/groupby/__init__.py +105 -0
  338. maxframe/dataframe/groupby/aggregation.py +485 -0
  339. maxframe/dataframe/groupby/apply.py +235 -0
  340. maxframe/dataframe/groupby/apply_chunk.py +407 -0
  341. maxframe/dataframe/groupby/core.py +342 -0
  342. maxframe/dataframe/groupby/cum.py +102 -0
  343. maxframe/dataframe/groupby/expanding.py +264 -0
  344. maxframe/dataframe/groupby/extensions.py +26 -0
  345. maxframe/dataframe/groupby/fill.py +149 -0
  346. maxframe/dataframe/groupby/getitem.py +105 -0
  347. maxframe/dataframe/groupby/head.py +115 -0
  348. maxframe/dataframe/groupby/rank.py +136 -0
  349. maxframe/dataframe/groupby/rolling.py +206 -0
  350. maxframe/dataframe/groupby/sample.py +214 -0
  351. maxframe/dataframe/groupby/shift.py +114 -0
  352. maxframe/dataframe/groupby/tests/__init__.py +13 -0
  353. maxframe/dataframe/groupby/tests/test_groupby.py +373 -0
  354. maxframe/dataframe/groupby/transform.py +264 -0
  355. maxframe/dataframe/indexing/__init__.py +104 -0
  356. maxframe/dataframe/indexing/add_prefix_suffix.py +110 -0
  357. maxframe/dataframe/indexing/align.py +350 -0
  358. maxframe/dataframe/indexing/at.py +83 -0
  359. maxframe/dataframe/indexing/droplevel.py +195 -0
  360. maxframe/dataframe/indexing/filter.py +169 -0
  361. maxframe/dataframe/indexing/get_level_values.py +76 -0
  362. maxframe/dataframe/indexing/getitem.py +205 -0
  363. maxframe/dataframe/indexing/iat.py +82 -0
  364. maxframe/dataframe/indexing/iloc.py +711 -0
  365. maxframe/dataframe/indexing/insert.py +118 -0
  366. maxframe/dataframe/indexing/loc.py +694 -0
  367. maxframe/dataframe/indexing/reindex.py +541 -0
  368. maxframe/dataframe/indexing/rename.py +445 -0
  369. maxframe/dataframe/indexing/rename_axis.py +217 -0
  370. maxframe/dataframe/indexing/reorder_levels.py +143 -0
  371. maxframe/dataframe/indexing/reset_index.py +427 -0
  372. maxframe/dataframe/indexing/sample.py +232 -0
  373. maxframe/dataframe/indexing/set_axis.py +197 -0
  374. maxframe/dataframe/indexing/set_index.py +128 -0
  375. maxframe/dataframe/indexing/setitem.py +133 -0
  376. maxframe/dataframe/indexing/swaplevel.py +185 -0
  377. maxframe/dataframe/indexing/take.py +99 -0
  378. maxframe/dataframe/indexing/tests/__init__.py +13 -0
  379. maxframe/dataframe/indexing/tests/test_indexing.py +488 -0
  380. maxframe/dataframe/indexing/truncate.py +140 -0
  381. maxframe/dataframe/indexing/where.py +300 -0
  382. maxframe/dataframe/indexing/xs.py +148 -0
  383. maxframe/dataframe/initializer.py +298 -0
  384. maxframe/dataframe/merge/__init__.py +53 -0
  385. maxframe/dataframe/merge/append.py +120 -0
  386. maxframe/dataframe/merge/combine.py +244 -0
  387. maxframe/dataframe/merge/combine_first.py +120 -0
  388. maxframe/dataframe/merge/compare.py +387 -0
  389. maxframe/dataframe/merge/concat.py +500 -0
  390. maxframe/dataframe/merge/merge.py +806 -0
  391. maxframe/dataframe/merge/tests/__init__.py +13 -0
  392. maxframe/dataframe/merge/tests/test_merge.py +390 -0
  393. maxframe/dataframe/merge/update.py +271 -0
  394. maxframe/dataframe/misc/__init__.py +145 -0
  395. maxframe/dataframe/misc/_duplicate.py +56 -0
  396. maxframe/dataframe/misc/apply.py +730 -0
  397. maxframe/dataframe/misc/astype.py +237 -0
  398. maxframe/dataframe/misc/case_when.py +145 -0
  399. maxframe/dataframe/misc/check_monotonic.py +84 -0
  400. maxframe/dataframe/misc/check_unique.py +82 -0
  401. maxframe/dataframe/misc/clip.py +145 -0
  402. maxframe/dataframe/misc/cut.py +386 -0
  403. maxframe/dataframe/misc/describe.py +278 -0
  404. maxframe/dataframe/misc/diff.py +210 -0
  405. maxframe/dataframe/misc/drop.py +473 -0
  406. maxframe/dataframe/misc/drop_duplicates.py +251 -0
  407. maxframe/dataframe/misc/duplicated.py +292 -0
  408. maxframe/dataframe/misc/eval.py +730 -0
  409. maxframe/dataframe/misc/explode.py +171 -0
  410. maxframe/dataframe/misc/factorize.py +160 -0
  411. maxframe/dataframe/misc/get_dummies.py +241 -0
  412. maxframe/dataframe/misc/infer_dtypes.py +251 -0
  413. maxframe/dataframe/misc/isin.py +220 -0
  414. maxframe/dataframe/misc/map.py +360 -0
  415. maxframe/dataframe/misc/memory_usage.py +248 -0
  416. maxframe/dataframe/misc/pct_change.py +68 -0
  417. maxframe/dataframe/misc/qcut.py +104 -0
  418. maxframe/dataframe/misc/rechunk.py +59 -0
  419. maxframe/dataframe/misc/repeat.py +159 -0
  420. maxframe/dataframe/misc/select_dtypes.py +104 -0
  421. maxframe/dataframe/misc/shift.py +259 -0
  422. maxframe/dataframe/misc/tests/__init__.py +13 -0
  423. maxframe/dataframe/misc/tests/test_misc.py +649 -0
  424. maxframe/dataframe/misc/to_numeric.py +181 -0
  425. maxframe/dataframe/misc/transform.py +346 -0
  426. maxframe/dataframe/misc/transpose.py +148 -0
  427. maxframe/dataframe/misc/valid_index.py +115 -0
  428. maxframe/dataframe/misc/value_counts.py +206 -0
  429. maxframe/dataframe/missing/__init__.py +53 -0
  430. maxframe/dataframe/missing/checkna.py +231 -0
  431. maxframe/dataframe/missing/dropna.py +294 -0
  432. maxframe/dataframe/missing/fillna.py +283 -0
  433. maxframe/dataframe/missing/replace.py +446 -0
  434. maxframe/dataframe/missing/tests/__init__.py +13 -0
  435. maxframe/dataframe/missing/tests/test_missing.py +90 -0
  436. maxframe/dataframe/operators.py +231 -0
  437. maxframe/dataframe/reduction/__init__.py +129 -0
  438. maxframe/dataframe/reduction/aggregation.py +502 -0
  439. maxframe/dataframe/reduction/all.py +78 -0
  440. maxframe/dataframe/reduction/any.py +78 -0
  441. maxframe/dataframe/reduction/argmax.py +103 -0
  442. maxframe/dataframe/reduction/argmin.py +103 -0
  443. maxframe/dataframe/reduction/core.py +923 -0
  444. maxframe/dataframe/reduction/count.py +63 -0
  445. maxframe/dataframe/reduction/cov.py +166 -0
  446. maxframe/dataframe/reduction/cummax.py +30 -0
  447. maxframe/dataframe/reduction/cummin.py +30 -0
  448. maxframe/dataframe/reduction/cumprod.py +30 -0
  449. maxframe/dataframe/reduction/cumsum.py +30 -0
  450. maxframe/dataframe/reduction/custom_reduction.py +42 -0
  451. maxframe/dataframe/reduction/idxmax.py +185 -0
  452. maxframe/dataframe/reduction/idxmin.py +185 -0
  453. maxframe/dataframe/reduction/kurtosis.py +111 -0
  454. maxframe/dataframe/reduction/max.py +65 -0
  455. maxframe/dataframe/reduction/mean.py +63 -0
  456. maxframe/dataframe/reduction/median.py +56 -0
  457. maxframe/dataframe/reduction/min.py +65 -0
  458. maxframe/dataframe/reduction/mode.py +190 -0
  459. maxframe/dataframe/reduction/nunique.py +149 -0
  460. maxframe/dataframe/reduction/prod.py +81 -0
  461. maxframe/dataframe/reduction/reduction_size.py +36 -0
  462. maxframe/dataframe/reduction/sem.py +73 -0
  463. maxframe/dataframe/reduction/skew.py +93 -0
  464. maxframe/dataframe/reduction/std.py +53 -0
  465. maxframe/dataframe/reduction/str_concat.py +51 -0
  466. maxframe/dataframe/reduction/sum.py +81 -0
  467. maxframe/dataframe/reduction/tests/__init__.py +13 -0
  468. maxframe/dataframe/reduction/tests/test_reduction.py +598 -0
  469. maxframe/dataframe/reduction/unique.py +153 -0
  470. maxframe/dataframe/reduction/var.py +76 -0
  471. maxframe/dataframe/reshape/__init__.py +38 -0
  472. maxframe/dataframe/reshape/melt.py +169 -0
  473. maxframe/dataframe/reshape/pivot.py +233 -0
  474. maxframe/dataframe/reshape/pivot_table.py +275 -0
  475. maxframe/dataframe/reshape/stack.py +240 -0
  476. maxframe/dataframe/reshape/unstack.py +114 -0
  477. maxframe/dataframe/sort/__init__.py +49 -0
  478. maxframe/dataframe/sort/argsort.py +68 -0
  479. maxframe/dataframe/sort/core.py +37 -0
  480. maxframe/dataframe/sort/nlargest.py +238 -0
  481. maxframe/dataframe/sort/nsmallest.py +228 -0
  482. maxframe/dataframe/sort/rank.py +147 -0
  483. maxframe/dataframe/sort/sort_index.py +153 -0
  484. maxframe/dataframe/sort/sort_values.py +308 -0
  485. maxframe/dataframe/sort/tests/__init__.py +13 -0
  486. maxframe/dataframe/sort/tests/test_sort.py +85 -0
  487. maxframe/dataframe/statistics/__init__.py +33 -0
  488. maxframe/dataframe/statistics/corr.py +284 -0
  489. maxframe/dataframe/statistics/quantile.py +338 -0
  490. maxframe/dataframe/statistics/tests/__init__.py +13 -0
  491. maxframe/dataframe/statistics/tests/test_statistics.py +82 -0
  492. maxframe/dataframe/tests/__init__.py +13 -0
  493. maxframe/dataframe/tests/test_initializer.py +60 -0
  494. maxframe/dataframe/tests/test_typing.py +119 -0
  495. maxframe/dataframe/tests/test_utils.py +169 -0
  496. maxframe/dataframe/tseries/__init__.py +32 -0
  497. maxframe/dataframe/tseries/at_time.py +61 -0
  498. maxframe/dataframe/tseries/between_time.py +122 -0
  499. maxframe/dataframe/tseries/tests/__init__.py +13 -0
  500. maxframe/dataframe/tseries/tests/test_tseries.py +30 -0
  501. maxframe/dataframe/tseries/to_datetime.py +299 -0
  502. maxframe/dataframe/typing_.py +196 -0
  503. maxframe/dataframe/ufunc/__init__.py +27 -0
  504. maxframe/dataframe/ufunc/tensor.py +54 -0
  505. maxframe/dataframe/ufunc/ufunc.py +53 -0
  506. maxframe/dataframe/utils.py +1728 -0
  507. maxframe/dataframe/window/__init__.py +29 -0
  508. maxframe/dataframe/window/aggregation.py +100 -0
  509. maxframe/dataframe/window/core.py +82 -0
  510. maxframe/dataframe/window/ewm.py +247 -0
  511. maxframe/dataframe/window/expanding.py +151 -0
  512. maxframe/dataframe/window/rolling.py +389 -0
  513. maxframe/dataframe/window/tests/__init__.py +13 -0
  514. maxframe/dataframe/window/tests/test_ewm.py +70 -0
  515. maxframe/dataframe/window/tests/test_expanding.py +60 -0
  516. maxframe/dataframe/window/tests/test_rolling.py +57 -0
  517. maxframe/env.py +37 -0
  518. maxframe/errors.py +52 -0
  519. maxframe/extension.py +131 -0
  520. maxframe/io/__init__.py +13 -0
  521. maxframe/io/objects/__init__.py +24 -0
  522. maxframe/io/objects/core.py +156 -0
  523. maxframe/io/objects/tensor.py +133 -0
  524. maxframe/io/objects/tests/__init__.py +13 -0
  525. maxframe/io/objects/tests/test_object_io.py +85 -0
  526. maxframe/io/odpsio/__init__.py +24 -0
  527. maxframe/io/odpsio/arrow.py +161 -0
  528. maxframe/io/odpsio/schema.py +533 -0
  529. maxframe/io/odpsio/tableio.py +736 -0
  530. maxframe/io/odpsio/tests/__init__.py +13 -0
  531. maxframe/io/odpsio/tests/test_arrow.py +132 -0
  532. maxframe/io/odpsio/tests/test_schema.py +582 -0
  533. maxframe/io/odpsio/tests/test_tableio.py +205 -0
  534. maxframe/io/odpsio/tests/test_volumeio.py +75 -0
  535. maxframe/io/odpsio/volumeio.py +102 -0
  536. maxframe/learn/__init__.py +25 -0
  537. maxframe/learn/cluster/__init__.py +15 -0
  538. maxframe/learn/cluster/_kmeans.py +782 -0
  539. maxframe/learn/contrib/__init__.py +17 -0
  540. maxframe/learn/contrib/graph/__init__.py +15 -0
  541. maxframe/learn/contrib/graph/connected_components.py +216 -0
  542. maxframe/learn/contrib/graph/tests/__init__.py +13 -0
  543. maxframe/learn/contrib/graph/tests/test_connected_components.py +53 -0
  544. maxframe/learn/contrib/lightgbm/__init__.py +33 -0
  545. maxframe/learn/contrib/lightgbm/_predict.py +138 -0
  546. maxframe/learn/contrib/lightgbm/_train.py +163 -0
  547. maxframe/learn/contrib/lightgbm/callback.py +114 -0
  548. maxframe/learn/contrib/lightgbm/classifier.py +199 -0
  549. maxframe/learn/contrib/lightgbm/core.py +372 -0
  550. maxframe/learn/contrib/lightgbm/dataset.py +153 -0
  551. maxframe/learn/contrib/lightgbm/regressor.py +29 -0
  552. maxframe/learn/contrib/lightgbm/tests/__init__.py +13 -0
  553. maxframe/learn/contrib/lightgbm/tests/test_callback.py +58 -0
  554. maxframe/learn/contrib/llm/__init__.py +17 -0
  555. maxframe/learn/contrib/llm/core.py +105 -0
  556. maxframe/learn/contrib/llm/deploy/__init__.py +13 -0
  557. maxframe/learn/contrib/llm/deploy/config.py +221 -0
  558. maxframe/learn/contrib/llm/deploy/core.py +247 -0
  559. maxframe/learn/contrib/llm/deploy/framework.py +35 -0
  560. maxframe/learn/contrib/llm/deploy/loader.py +360 -0
  561. maxframe/learn/contrib/llm/deploy/tests/__init__.py +13 -0
  562. maxframe/learn/contrib/llm/deploy/tests/test_register_models.py +359 -0
  563. maxframe/learn/contrib/llm/models/__init__.py +16 -0
  564. maxframe/learn/contrib/llm/models/dashscope.py +114 -0
  565. maxframe/learn/contrib/llm/models/managed.py +119 -0
  566. maxframe/learn/contrib/llm/models/openai.py +72 -0
  567. maxframe/learn/contrib/llm/multi_modal.py +135 -0
  568. maxframe/learn/contrib/llm/tests/__init__.py +13 -0
  569. maxframe/learn/contrib/llm/tests/test_core.py +34 -0
  570. maxframe/learn/contrib/llm/tests/test_openai.py +187 -0
  571. maxframe/learn/contrib/llm/tests/test_text_gen.py +155 -0
  572. maxframe/learn/contrib/llm/text.py +608 -0
  573. maxframe/learn/contrib/models.py +109 -0
  574. maxframe/learn/contrib/pytorch/__init__.py +16 -0
  575. maxframe/learn/contrib/pytorch/run_function.py +110 -0
  576. maxframe/learn/contrib/pytorch/run_script.py +102 -0
  577. maxframe/learn/contrib/pytorch/tests/__init__.py +13 -0
  578. maxframe/learn/contrib/pytorch/tests/test_pytorch.py +42 -0
  579. maxframe/learn/contrib/utils.py +108 -0
  580. maxframe/learn/contrib/xgboost/__init__.py +33 -0
  581. maxframe/learn/contrib/xgboost/callback.py +86 -0
  582. maxframe/learn/contrib/xgboost/classifier.py +119 -0
  583. maxframe/learn/contrib/xgboost/core.py +469 -0
  584. maxframe/learn/contrib/xgboost/dmatrix.py +157 -0
  585. maxframe/learn/contrib/xgboost/predict.py +133 -0
  586. maxframe/learn/contrib/xgboost/regressor.py +91 -0
  587. maxframe/learn/contrib/xgboost/tests/__init__.py +13 -0
  588. maxframe/learn/contrib/xgboost/tests/test_callback.py +41 -0
  589. maxframe/learn/contrib/xgboost/tests/test_core.py +43 -0
  590. maxframe/learn/contrib/xgboost/train.py +181 -0
  591. maxframe/learn/core.py +344 -0
  592. maxframe/learn/datasets/__init__.py +20 -0
  593. maxframe/learn/datasets/samples_generator.py +628 -0
  594. maxframe/learn/linear_model/__init__.py +15 -0
  595. maxframe/learn/linear_model/_base.py +220 -0
  596. maxframe/learn/linear_model/_lin_reg.py +175 -0
  597. maxframe/learn/metrics/__init__.py +31 -0
  598. maxframe/learn/metrics/_check_targets.py +95 -0
  599. maxframe/learn/metrics/_classification.py +1266 -0
  600. maxframe/learn/metrics/_ranking.py +477 -0
  601. maxframe/learn/metrics/_regression.py +256 -0
  602. maxframe/learn/metrics/_scorer.py +60 -0
  603. maxframe/learn/metrics/pairwise/__init__.py +21 -0
  604. maxframe/learn/metrics/pairwise/core.py +77 -0
  605. maxframe/learn/metrics/pairwise/cosine.py +115 -0
  606. maxframe/learn/metrics/pairwise/euclidean.py +176 -0
  607. maxframe/learn/metrics/pairwise/haversine.py +96 -0
  608. maxframe/learn/metrics/pairwise/manhattan.py +80 -0
  609. maxframe/learn/metrics/pairwise/pairwise.py +127 -0
  610. maxframe/learn/metrics/pairwise/pairwise_distances_topk.py +121 -0
  611. maxframe/learn/metrics/pairwise/rbf_kernel.py +51 -0
  612. maxframe/learn/metrics/tests/__init__.py +13 -0
  613. maxframe/learn/metrics/tests/test_scorer.py +26 -0
  614. maxframe/learn/model_selection/__init__.py +15 -0
  615. maxframe/learn/model_selection/_split.py +451 -0
  616. maxframe/learn/model_selection/tests/__init__.py +13 -0
  617. maxframe/learn/model_selection/tests/test_split.py +156 -0
  618. maxframe/learn/preprocessing/__init__.py +16 -0
  619. maxframe/learn/preprocessing/_data/__init__.py +17 -0
  620. maxframe/learn/preprocessing/_data/min_max_scaler.py +401 -0
  621. maxframe/learn/preprocessing/_data/normalize.py +127 -0
  622. maxframe/learn/preprocessing/_data/standard_scaler.py +512 -0
  623. maxframe/learn/preprocessing/_data/utils.py +79 -0
  624. maxframe/learn/preprocessing/_label/__init__.py +16 -0
  625. maxframe/learn/preprocessing/_label/_label_binarizer.py +599 -0
  626. maxframe/learn/preprocessing/_label/_label_encoder.py +174 -0
  627. maxframe/learn/utils/__init__.py +20 -0
  628. maxframe/learn/utils/_encode.py +312 -0
  629. maxframe/learn/utils/checks.py +160 -0
  630. maxframe/learn/utils/core.py +121 -0
  631. maxframe/learn/utils/extmath.py +246 -0
  632. maxframe/learn/utils/multiclass.py +292 -0
  633. maxframe/learn/utils/odpsio.py +262 -0
  634. maxframe/learn/utils/shuffle.py +114 -0
  635. maxframe/learn/utils/sparsefuncs.py +87 -0
  636. maxframe/learn/utils/validation.py +775 -0
  637. maxframe/lib/__init__.py +13 -0
  638. maxframe/lib/aio/__init__.py +27 -0
  639. maxframe/lib/aio/_runners.py +162 -0
  640. maxframe/lib/aio/_threads.py +35 -0
  641. maxframe/lib/aio/base.py +82 -0
  642. maxframe/lib/aio/file.py +85 -0
  643. maxframe/lib/aio/isolation.py +100 -0
  644. maxframe/lib/aio/lru.py +242 -0
  645. maxframe/lib/aio/parallelism.py +37 -0
  646. maxframe/lib/aio/tests/__init__.py +13 -0
  647. maxframe/lib/aio/tests/test_aio_file.py +55 -0
  648. maxframe/lib/compat.py +185 -0
  649. maxframe/lib/compression.py +55 -0
  650. maxframe/lib/cython/__init__.py +13 -0
  651. maxframe/lib/cython/libcpp.pxd +30 -0
  652. maxframe/lib/dtypes_extension/__init__.py +30 -0
  653. maxframe/lib/dtypes_extension/_fake_arrow_dtype.py +609 -0
  654. maxframe/lib/dtypes_extension/blob.py +304 -0
  655. maxframe/lib/dtypes_extension/dtypes.py +106 -0
  656. maxframe/lib/dtypes_extension/tests/__init__.py +13 -0
  657. maxframe/lib/dtypes_extension/tests/test_blob.py +88 -0
  658. maxframe/lib/dtypes_extension/tests/test_dtypes.py +63 -0
  659. maxframe/lib/dtypes_extension/tests/test_fake_arrow_dtype.py +75 -0
  660. maxframe/lib/filesystem/__init__.py +22 -0
  661. maxframe/lib/filesystem/_glob.py +173 -0
  662. maxframe/lib/filesystem/_oss_lib/__init__.py +13 -0
  663. maxframe/lib/filesystem/_oss_lib/common.py +274 -0
  664. maxframe/lib/filesystem/_oss_lib/glob.py +147 -0
  665. maxframe/lib/filesystem/_oss_lib/handle.py +180 -0
  666. maxframe/lib/filesystem/arrow.py +240 -0
  667. maxframe/lib/filesystem/base.py +327 -0
  668. maxframe/lib/filesystem/core.py +95 -0
  669. maxframe/lib/filesystem/fshandler.py +136 -0
  670. maxframe/lib/filesystem/fsmap.py +164 -0
  671. maxframe/lib/filesystem/hdfs.py +31 -0
  672. maxframe/lib/filesystem/local.py +120 -0
  673. maxframe/lib/filesystem/oss.py +283 -0
  674. maxframe/lib/filesystem/tests/__init__.py +13 -0
  675. maxframe/lib/filesystem/tests/test_filesystem.py +205 -0
  676. maxframe/lib/filesystem/tests/test_fshandler.py +281 -0
  677. maxframe/lib/filesystem/tests/test_oss.py +220 -0
  678. maxframe/lib/functools_compat.py +81 -0
  679. maxframe/lib/mmh3.cp312-win32.pyd +0 -0
  680. maxframe/lib/mmh3.pyi +43 -0
  681. maxframe/lib/mmh3_src/MurmurHash3.cpp +339 -0
  682. maxframe/lib/mmh3_src/MurmurHash3.h +43 -0
  683. maxframe/lib/mmh3_src/mmh3module.cpp +387 -0
  684. maxframe/lib/sparse/__init__.py +856 -0
  685. maxframe/lib/sparse/array.py +1616 -0
  686. maxframe/lib/sparse/core.py +90 -0
  687. maxframe/lib/sparse/linalg.py +31 -0
  688. maxframe/lib/sparse/matrix.py +244 -0
  689. maxframe/lib/sparse/tests/__init__.py +13 -0
  690. maxframe/lib/sparse/tests/test_sparse.py +476 -0
  691. maxframe/lib/sparse/vector.py +148 -0
  692. maxframe/lib/tblib/LICENSE +20 -0
  693. maxframe/lib/tblib/__init__.py +327 -0
  694. maxframe/lib/tblib/cpython.py +83 -0
  695. maxframe/lib/tblib/decorators.py +44 -0
  696. maxframe/lib/tblib/pickling_support.py +90 -0
  697. maxframe/lib/tests/__init__.py +13 -0
  698. maxframe/lib/tests/test_wrapped_pickle.py +51 -0
  699. maxframe/lib/version.py +620 -0
  700. maxframe/lib/wrapped_pickle.py +177 -0
  701. maxframe/mixin.py +157 -0
  702. maxframe/opcodes.py +654 -0
  703. maxframe/protocol.py +611 -0
  704. maxframe/remote/__init__.py +18 -0
  705. maxframe/remote/core.py +212 -0
  706. maxframe/remote/run_script.py +124 -0
  707. maxframe/serialization/__init__.py +39 -0
  708. maxframe/serialization/arrow.py +107 -0
  709. maxframe/serialization/blob.py +32 -0
  710. maxframe/serialization/core.cp312-win32.pyd +0 -0
  711. maxframe/serialization/core.pxd +50 -0
  712. maxframe/serialization/core.pyi +66 -0
  713. maxframe/serialization/core.pyx +1282 -0
  714. maxframe/serialization/exception.py +90 -0
  715. maxframe/serialization/maxframe_objects.py +39 -0
  716. maxframe/serialization/numpy.py +110 -0
  717. maxframe/serialization/pandas.py +278 -0
  718. maxframe/serialization/scipy.py +71 -0
  719. maxframe/serialization/serializables/__init__.py +55 -0
  720. maxframe/serialization/serializables/core.py +469 -0
  721. maxframe/serialization/serializables/field.py +624 -0
  722. maxframe/serialization/serializables/field_type.py +592 -0
  723. maxframe/serialization/serializables/tests/__init__.py +13 -0
  724. maxframe/serialization/serializables/tests/test_field_type.py +119 -0
  725. maxframe/serialization/serializables/tests/test_serializable.py +313 -0
  726. maxframe/serialization/tests/__init__.py +13 -0
  727. maxframe/serialization/tests/test_serial.py +516 -0
  728. maxframe/session.py +1250 -0
  729. maxframe/sperunner.py +165 -0
  730. maxframe/tensor/__init__.py +325 -0
  731. maxframe/tensor/arithmetic/__init__.py +322 -0
  732. maxframe/tensor/arithmetic/abs.py +66 -0
  733. maxframe/tensor/arithmetic/absolute.py +66 -0
  734. maxframe/tensor/arithmetic/add.py +112 -0
  735. maxframe/tensor/arithmetic/angle.py +70 -0
  736. maxframe/tensor/arithmetic/arccos.py +101 -0
  737. maxframe/tensor/arithmetic/arccosh.py +89 -0
  738. maxframe/tensor/arithmetic/arcsin.py +92 -0
  739. maxframe/tensor/arithmetic/arcsinh.py +84 -0
  740. maxframe/tensor/arithmetic/arctan.py +104 -0
  741. maxframe/tensor/arithmetic/arctan2.py +126 -0
  742. maxframe/tensor/arithmetic/arctanh.py +84 -0
  743. maxframe/tensor/arithmetic/around.py +112 -0
  744. maxframe/tensor/arithmetic/bitand.py +93 -0
  745. maxframe/tensor/arithmetic/bitor.py +100 -0
  746. maxframe/tensor/arithmetic/bitxor.py +93 -0
  747. maxframe/tensor/arithmetic/cbrt.py +64 -0
  748. maxframe/tensor/arithmetic/ceil.py +69 -0
  749. maxframe/tensor/arithmetic/clip.py +165 -0
  750. maxframe/tensor/arithmetic/conj.py +72 -0
  751. maxframe/tensor/arithmetic/copysign.py +76 -0
  752. maxframe/tensor/arithmetic/core.py +546 -0
  753. maxframe/tensor/arithmetic/cos.py +83 -0
  754. maxframe/tensor/arithmetic/cosh.py +70 -0
  755. maxframe/tensor/arithmetic/deg2rad.py +70 -0
  756. maxframe/tensor/arithmetic/degrees.py +75 -0
  757. maxframe/tensor/arithmetic/divide.py +112 -0
  758. maxframe/tensor/arithmetic/equal.py +74 -0
  759. maxframe/tensor/arithmetic/exp.py +104 -0
  760. maxframe/tensor/arithmetic/exp2.py +65 -0
  761. maxframe/tensor/arithmetic/expm1.py +77 -0
  762. maxframe/tensor/arithmetic/fabs.py +72 -0
  763. maxframe/tensor/arithmetic/fix.py +67 -0
  764. maxframe/tensor/arithmetic/float_power.py +101 -0
  765. maxframe/tensor/arithmetic/floor.py +75 -0
  766. maxframe/tensor/arithmetic/floordiv.py +92 -0
  767. maxframe/tensor/arithmetic/fmax.py +103 -0
  768. maxframe/tensor/arithmetic/fmin.py +104 -0
  769. maxframe/tensor/arithmetic/fmod.py +97 -0
  770. maxframe/tensor/arithmetic/frexp.py +96 -0
  771. maxframe/tensor/arithmetic/greater.py +75 -0
  772. maxframe/tensor/arithmetic/greater_equal.py +67 -0
  773. maxframe/tensor/arithmetic/hypot.py +75 -0
  774. maxframe/tensor/arithmetic/i0.py +87 -0
  775. maxframe/tensor/arithmetic/imag.py +65 -0
  776. maxframe/tensor/arithmetic/invert.py +108 -0
  777. maxframe/tensor/arithmetic/isclose.py +114 -0
  778. maxframe/tensor/arithmetic/iscomplex.py +62 -0
  779. maxframe/tensor/arithmetic/iscomplexobj.py +53 -0
  780. maxframe/tensor/arithmetic/isfinite.py +104 -0
  781. maxframe/tensor/arithmetic/isinf.py +101 -0
  782. maxframe/tensor/arithmetic/isnan.py +80 -0
  783. maxframe/tensor/arithmetic/isreal.py +61 -0
  784. maxframe/tensor/arithmetic/ldexp.py +97 -0
  785. maxframe/tensor/arithmetic/less.py +67 -0
  786. maxframe/tensor/arithmetic/less_equal.py +67 -0
  787. maxframe/tensor/arithmetic/log.py +90 -0
  788. maxframe/tensor/arithmetic/log10.py +83 -0
  789. maxframe/tensor/arithmetic/log1p.py +93 -0
  790. maxframe/tensor/arithmetic/log2.py +83 -0
  791. maxframe/tensor/arithmetic/logaddexp.py +78 -0
  792. maxframe/tensor/arithmetic/logaddexp2.py +76 -0
  793. maxframe/tensor/arithmetic/logical_and.py +79 -0
  794. maxframe/tensor/arithmetic/logical_not.py +72 -0
  795. maxframe/tensor/arithmetic/logical_or.py +80 -0
  796. maxframe/tensor/arithmetic/logical_xor.py +86 -0
  797. maxframe/tensor/arithmetic/lshift.py +80 -0
  798. maxframe/tensor/arithmetic/maximum.py +106 -0
  799. maxframe/tensor/arithmetic/minimum.py +106 -0
  800. maxframe/tensor/arithmetic/mod.py +102 -0
  801. maxframe/tensor/arithmetic/modf.py +87 -0
  802. maxframe/tensor/arithmetic/multiply.py +114 -0
  803. maxframe/tensor/arithmetic/nan_to_num.py +97 -0
  804. maxframe/tensor/arithmetic/negative.py +63 -0
  805. maxframe/tensor/arithmetic/nextafter.py +66 -0
  806. maxframe/tensor/arithmetic/not_equal.py +70 -0
  807. maxframe/tensor/arithmetic/positive.py +45 -0
  808. maxframe/tensor/arithmetic/power.py +104 -0
  809. maxframe/tensor/arithmetic/rad2deg.py +69 -0
  810. maxframe/tensor/arithmetic/radians.py +75 -0
  811. maxframe/tensor/arithmetic/real.py +68 -0
  812. maxframe/tensor/arithmetic/reciprocal.py +78 -0
  813. maxframe/tensor/arithmetic/rint.py +66 -0
  814. maxframe/tensor/arithmetic/rshift.py +79 -0
  815. maxframe/tensor/arithmetic/setimag.py +27 -0
  816. maxframe/tensor/arithmetic/setreal.py +27 -0
  817. maxframe/tensor/arithmetic/sign.py +79 -0
  818. maxframe/tensor/arithmetic/signbit.py +63 -0
  819. maxframe/tensor/arithmetic/sin.py +96 -0
  820. maxframe/tensor/arithmetic/sinc.py +100 -0
  821. maxframe/tensor/arithmetic/sinh.py +91 -0
  822. maxframe/tensor/arithmetic/spacing.py +70 -0
  823. maxframe/tensor/arithmetic/sqrt.py +79 -0
  824. maxframe/tensor/arithmetic/square.py +67 -0
  825. maxframe/tensor/arithmetic/subtract.py +83 -0
  826. maxframe/tensor/arithmetic/tan.py +86 -0
  827. maxframe/tensor/arithmetic/tanh.py +90 -0
  828. maxframe/tensor/arithmetic/tests/__init__.py +13 -0
  829. maxframe/tensor/arithmetic/tests/test_arithmetic.py +449 -0
  830. maxframe/tensor/arithmetic/truediv.py +102 -0
  831. maxframe/tensor/arithmetic/trunc.py +70 -0
  832. maxframe/tensor/arithmetic/utils.py +91 -0
  833. maxframe/tensor/array_utils.py +164 -0
  834. maxframe/tensor/core.py +597 -0
  835. maxframe/tensor/datasource/__init__.py +40 -0
  836. maxframe/tensor/datasource/arange.py +154 -0
  837. maxframe/tensor/datasource/array.py +399 -0
  838. maxframe/tensor/datasource/core.py +114 -0
  839. maxframe/tensor/datasource/diag.py +140 -0
  840. maxframe/tensor/datasource/diagflat.py +69 -0
  841. maxframe/tensor/datasource/empty.py +167 -0
  842. maxframe/tensor/datasource/eye.py +95 -0
  843. maxframe/tensor/datasource/from_dataframe.py +68 -0
  844. maxframe/tensor/datasource/from_dense.py +37 -0
  845. maxframe/tensor/datasource/from_sparse.py +45 -0
  846. maxframe/tensor/datasource/full.py +184 -0
  847. maxframe/tensor/datasource/identity.py +54 -0
  848. maxframe/tensor/datasource/indices.py +115 -0
  849. maxframe/tensor/datasource/linspace.py +140 -0
  850. maxframe/tensor/datasource/meshgrid.py +135 -0
  851. maxframe/tensor/datasource/ones.py +178 -0
  852. maxframe/tensor/datasource/scalar.py +40 -0
  853. maxframe/tensor/datasource/tests/__init__.py +13 -0
  854. maxframe/tensor/datasource/tests/test_datasource.py +310 -0
  855. maxframe/tensor/datasource/tri_array.py +107 -0
  856. maxframe/tensor/datasource/zeros.py +192 -0
  857. maxframe/tensor/extensions/__init__.py +33 -0
  858. maxframe/tensor/extensions/accessor.py +25 -0
  859. maxframe/tensor/extensions/apply_chunk.py +137 -0
  860. maxframe/tensor/extensions/rebalance.py +65 -0
  861. maxframe/tensor/fetch/__init__.py +15 -0
  862. maxframe/tensor/fetch/core.py +54 -0
  863. maxframe/tensor/fft/__init__.py +32 -0
  864. maxframe/tensor/fft/core.py +168 -0
  865. maxframe/tensor/fft/fft.py +112 -0
  866. maxframe/tensor/fft/fft2.py +118 -0
  867. maxframe/tensor/fft/fftfreq.py +80 -0
  868. maxframe/tensor/fft/fftn.py +123 -0
  869. maxframe/tensor/fft/fftshift.py +79 -0
  870. maxframe/tensor/fft/hfft.py +112 -0
  871. maxframe/tensor/fft/ifft.py +114 -0
  872. maxframe/tensor/fft/ifft2.py +115 -0
  873. maxframe/tensor/fft/ifftn.py +123 -0
  874. maxframe/tensor/fft/ifftshift.py +73 -0
  875. maxframe/tensor/fft/ihfft.py +93 -0
  876. maxframe/tensor/fft/irfft.py +118 -0
  877. maxframe/tensor/fft/irfft2.py +62 -0
  878. maxframe/tensor/fft/irfftn.py +114 -0
  879. maxframe/tensor/fft/rfft.py +116 -0
  880. maxframe/tensor/fft/rfft2.py +63 -0
  881. maxframe/tensor/fft/rfftfreq.py +87 -0
  882. maxframe/tensor/fft/rfftn.py +113 -0
  883. maxframe/tensor/indexing/__init__.py +47 -0
  884. maxframe/tensor/indexing/choose.py +198 -0
  885. maxframe/tensor/indexing/compress.py +122 -0
  886. maxframe/tensor/indexing/core.py +190 -0
  887. maxframe/tensor/indexing/extract.py +69 -0
  888. maxframe/tensor/indexing/fill_diagonal.py +180 -0
  889. maxframe/tensor/indexing/flatnonzero.py +58 -0
  890. maxframe/tensor/indexing/getitem.py +144 -0
  891. maxframe/tensor/indexing/nonzero.py +118 -0
  892. maxframe/tensor/indexing/setitem.py +142 -0
  893. maxframe/tensor/indexing/slice.py +32 -0
  894. maxframe/tensor/indexing/take.py +128 -0
  895. maxframe/tensor/indexing/tests/__init__.py +13 -0
  896. maxframe/tensor/indexing/tests/test_indexing.py +232 -0
  897. maxframe/tensor/indexing/unravel_index.py +103 -0
  898. maxframe/tensor/lib/__init__.py +16 -0
  899. maxframe/tensor/lib/index_tricks.py +404 -0
  900. maxframe/tensor/linalg/__init__.py +43 -0
  901. maxframe/tensor/linalg/_einsumfunc.py +1025 -0
  902. maxframe/tensor/linalg/cholesky.py +117 -0
  903. maxframe/tensor/linalg/dot.py +145 -0
  904. maxframe/tensor/linalg/einsum.py +339 -0
  905. maxframe/tensor/linalg/inner.py +36 -0
  906. maxframe/tensor/linalg/inv.py +83 -0
  907. maxframe/tensor/linalg/lstsq.py +100 -0
  908. maxframe/tensor/linalg/lu.py +115 -0
  909. maxframe/tensor/linalg/matmul.py +225 -0
  910. maxframe/tensor/linalg/matrix_norm.py +75 -0
  911. maxframe/tensor/linalg/norm.py +249 -0
  912. maxframe/tensor/linalg/qr.py +124 -0
  913. maxframe/tensor/linalg/solve.py +72 -0
  914. maxframe/tensor/linalg/solve_triangular.py +103 -0
  915. maxframe/tensor/linalg/svd.py +167 -0
  916. maxframe/tensor/linalg/tensordot.py +213 -0
  917. maxframe/tensor/linalg/vdot.py +73 -0
  918. maxframe/tensor/linalg/vector_norm.py +113 -0
  919. maxframe/tensor/merge/__init__.py +21 -0
  920. maxframe/tensor/merge/append.py +74 -0
  921. maxframe/tensor/merge/column_stack.py +63 -0
  922. maxframe/tensor/merge/concatenate.py +103 -0
  923. maxframe/tensor/merge/dstack.py +71 -0
  924. maxframe/tensor/merge/hstack.py +70 -0
  925. maxframe/tensor/merge/stack.py +130 -0
  926. maxframe/tensor/merge/tests/__init__.py +13 -0
  927. maxframe/tensor/merge/tests/test_merge.py +79 -0
  928. maxframe/tensor/merge/vstack.py +74 -0
  929. maxframe/tensor/misc/__init__.py +72 -0
  930. maxframe/tensor/misc/argwhere.py +72 -0
  931. maxframe/tensor/misc/array_split.py +46 -0
  932. maxframe/tensor/misc/astype.py +121 -0
  933. maxframe/tensor/misc/atleast_1d.py +72 -0
  934. maxframe/tensor/misc/atleast_2d.py +70 -0
  935. maxframe/tensor/misc/atleast_3d.py +85 -0
  936. maxframe/tensor/misc/broadcast_arrays.py +57 -0
  937. maxframe/tensor/misc/broadcast_to.py +89 -0
  938. maxframe/tensor/misc/copy.py +64 -0
  939. maxframe/tensor/misc/copyto.py +130 -0
  940. maxframe/tensor/misc/delete.py +104 -0
  941. maxframe/tensor/misc/diff.py +115 -0
  942. maxframe/tensor/misc/dsplit.py +68 -0
  943. maxframe/tensor/misc/ediff1d.py +74 -0
  944. maxframe/tensor/misc/expand_dims.py +85 -0
  945. maxframe/tensor/misc/flatten.py +63 -0
  946. maxframe/tensor/misc/flip.py +90 -0
  947. maxframe/tensor/misc/fliplr.py +64 -0
  948. maxframe/tensor/misc/flipud.py +68 -0
  949. maxframe/tensor/misc/hsplit.py +85 -0
  950. maxframe/tensor/misc/in1d.py +94 -0
  951. maxframe/tensor/misc/insert.py +139 -0
  952. maxframe/tensor/misc/isin.py +130 -0
  953. maxframe/tensor/misc/moveaxis.py +83 -0
  954. maxframe/tensor/misc/ndim.py +53 -0
  955. maxframe/tensor/misc/ravel.py +90 -0
  956. maxframe/tensor/misc/repeat.py +129 -0
  957. maxframe/tensor/misc/result_type.py +88 -0
  958. maxframe/tensor/misc/roll.py +124 -0
  959. maxframe/tensor/misc/rollaxis.py +77 -0
  960. maxframe/tensor/misc/searchsorted.py +147 -0
  961. maxframe/tensor/misc/setdiff1d.py +58 -0
  962. maxframe/tensor/misc/shape.py +89 -0
  963. maxframe/tensor/misc/split.py +190 -0
  964. maxframe/tensor/misc/squeeze.py +117 -0
  965. maxframe/tensor/misc/swapaxes.py +113 -0
  966. maxframe/tensor/misc/tests/__init__.py +13 -0
  967. maxframe/tensor/misc/tests/test_misc.py +112 -0
  968. maxframe/tensor/misc/tile.py +109 -0
  969. maxframe/tensor/misc/transpose.py +133 -0
  970. maxframe/tensor/misc/trapezoid.py +123 -0
  971. maxframe/tensor/misc/unique.py +227 -0
  972. maxframe/tensor/misc/vsplit.py +74 -0
  973. maxframe/tensor/misc/where.py +129 -0
  974. maxframe/tensor/operators.py +83 -0
  975. maxframe/tensor/random/__init__.py +166 -0
  976. maxframe/tensor/random/beta.py +87 -0
  977. maxframe/tensor/random/binomial.py +135 -0
  978. maxframe/tensor/random/bytes.py +37 -0
  979. maxframe/tensor/random/chisquare.py +108 -0
  980. maxframe/tensor/random/choice.py +187 -0
  981. maxframe/tensor/random/core.py +249 -0
  982. maxframe/tensor/random/dirichlet.py +121 -0
  983. maxframe/tensor/random/exponential.py +92 -0
  984. maxframe/tensor/random/f.py +133 -0
  985. maxframe/tensor/random/gamma.py +126 -0
  986. maxframe/tensor/random/geometric.py +91 -0
  987. maxframe/tensor/random/gumbel.py +165 -0
  988. maxframe/tensor/random/hypergeometric.py +146 -0
  989. maxframe/tensor/random/laplace.py +131 -0
  990. maxframe/tensor/random/logistic.py +127 -0
  991. maxframe/tensor/random/lognormal.py +157 -0
  992. maxframe/tensor/random/logseries.py +120 -0
  993. maxframe/tensor/random/multinomial.py +131 -0
  994. maxframe/tensor/random/multivariate_normal.py +190 -0
  995. maxframe/tensor/random/negative_binomial.py +123 -0
  996. maxframe/tensor/random/noncentral_chisquare.py +130 -0
  997. maxframe/tensor/random/noncentral_f.py +124 -0
  998. maxframe/tensor/random/normal.py +141 -0
  999. maxframe/tensor/random/pareto.py +138 -0
  1000. maxframe/tensor/random/permutation.py +107 -0
  1001. maxframe/tensor/random/poisson.py +109 -0
  1002. maxframe/tensor/random/power.py +140 -0
  1003. maxframe/tensor/random/rand.py +80 -0
  1004. maxframe/tensor/random/randint.py +119 -0
  1005. maxframe/tensor/random/randn.py +94 -0
  1006. maxframe/tensor/random/random_integers.py +121 -0
  1007. maxframe/tensor/random/random_sample.py +84 -0
  1008. maxframe/tensor/random/rayleigh.py +108 -0
  1009. maxframe/tensor/random/shuffle.py +61 -0
  1010. maxframe/tensor/random/standard_cauchy.py +103 -0
  1011. maxframe/tensor/random/standard_exponential.py +70 -0
  1012. maxframe/tensor/random/standard_gamma.py +118 -0
  1013. maxframe/tensor/random/standard_normal.py +72 -0
  1014. maxframe/tensor/random/standard_t.py +133 -0
  1015. maxframe/tensor/random/tests/__init__.py +13 -0
  1016. maxframe/tensor/random/tests/test_random.py +165 -0
  1017. maxframe/tensor/random/triangular.py +117 -0
  1018. maxframe/tensor/random/uniform.py +129 -0
  1019. maxframe/tensor/random/vonmises.py +129 -0
  1020. maxframe/tensor/random/wald.py +112 -0
  1021. maxframe/tensor/random/weibull.py +138 -0
  1022. maxframe/tensor/random/zipf.py +120 -0
  1023. maxframe/tensor/rechunk/__init__.py +26 -0
  1024. maxframe/tensor/rechunk/rechunk.py +43 -0
  1025. maxframe/tensor/reduction/__init__.py +64 -0
  1026. maxframe/tensor/reduction/all.py +101 -0
  1027. maxframe/tensor/reduction/allclose.py +86 -0
  1028. maxframe/tensor/reduction/any.py +103 -0
  1029. maxframe/tensor/reduction/argmax.py +101 -0
  1030. maxframe/tensor/reduction/argmin.py +101 -0
  1031. maxframe/tensor/reduction/array_equal.py +63 -0
  1032. maxframe/tensor/reduction/core.py +166 -0
  1033. maxframe/tensor/reduction/count_nonzero.py +80 -0
  1034. maxframe/tensor/reduction/cumprod.py +95 -0
  1035. maxframe/tensor/reduction/cumsum.py +99 -0
  1036. maxframe/tensor/reduction/max.py +118 -0
  1037. maxframe/tensor/reduction/mean.py +122 -0
  1038. maxframe/tensor/reduction/min.py +118 -0
  1039. maxframe/tensor/reduction/nanargmax.py +80 -0
  1040. maxframe/tensor/reduction/nanargmin.py +74 -0
  1041. maxframe/tensor/reduction/nancumprod.py +89 -0
  1042. maxframe/tensor/reduction/nancumsum.py +92 -0
  1043. maxframe/tensor/reduction/nanmax.py +109 -0
  1044. maxframe/tensor/reduction/nanmean.py +105 -0
  1045. maxframe/tensor/reduction/nanmin.py +109 -0
  1046. maxframe/tensor/reduction/nanprod.py +92 -0
  1047. maxframe/tensor/reduction/nanstd.py +124 -0
  1048. maxframe/tensor/reduction/nansum.py +113 -0
  1049. maxframe/tensor/reduction/nanvar.py +149 -0
  1050. maxframe/tensor/reduction/prod.py +128 -0
  1051. maxframe/tensor/reduction/std.py +132 -0
  1052. maxframe/tensor/reduction/sum.py +123 -0
  1053. maxframe/tensor/reduction/tests/__init__.py +13 -0
  1054. maxframe/tensor/reduction/tests/test_reduction.py +189 -0
  1055. maxframe/tensor/reduction/var.py +176 -0
  1056. maxframe/tensor/reshape/__init__.py +15 -0
  1057. maxframe/tensor/reshape/reshape.py +192 -0
  1058. maxframe/tensor/reshape/tests/__init__.py +13 -0
  1059. maxframe/tensor/reshape/tests/test_reshape.py +35 -0
  1060. maxframe/tensor/sort/__init__.py +18 -0
  1061. maxframe/tensor/sort/argpartition.py +98 -0
  1062. maxframe/tensor/sort/argsort.py +150 -0
  1063. maxframe/tensor/sort/partition.py +228 -0
  1064. maxframe/tensor/sort/sort.py +295 -0
  1065. maxframe/tensor/spatial/__init__.py +15 -0
  1066. maxframe/tensor/spatial/distance/__init__.py +17 -0
  1067. maxframe/tensor/spatial/distance/cdist.py +421 -0
  1068. maxframe/tensor/spatial/distance/pdist.py +398 -0
  1069. maxframe/tensor/spatial/distance/squareform.py +153 -0
  1070. maxframe/tensor/special/__init__.py +175 -0
  1071. maxframe/tensor/special/airy.py +55 -0
  1072. maxframe/tensor/special/bessel.py +199 -0
  1073. maxframe/tensor/special/core.py +99 -0
  1074. maxframe/tensor/special/ellip_func_integrals.py +155 -0
  1075. maxframe/tensor/special/ellip_harm.py +55 -0
  1076. maxframe/tensor/special/err_fresnel.py +223 -0
  1077. maxframe/tensor/special/gamma_funcs.py +303 -0
  1078. maxframe/tensor/special/hypergeometric_funcs.py +69 -0
  1079. maxframe/tensor/special/info_theory.py +189 -0
  1080. maxframe/tensor/special/misc.py +163 -0
  1081. maxframe/tensor/special/statistical.py +56 -0
  1082. maxframe/tensor/statistics/__init__.py +24 -0
  1083. maxframe/tensor/statistics/average.py +143 -0
  1084. maxframe/tensor/statistics/bincount.py +133 -0
  1085. maxframe/tensor/statistics/corrcoef.py +77 -0
  1086. maxframe/tensor/statistics/cov.py +222 -0
  1087. maxframe/tensor/statistics/digitize.py +126 -0
  1088. maxframe/tensor/statistics/histogram.py +520 -0
  1089. maxframe/tensor/statistics/median.py +85 -0
  1090. maxframe/tensor/statistics/percentile.py +175 -0
  1091. maxframe/tensor/statistics/ptp.py +89 -0
  1092. maxframe/tensor/statistics/quantile.py +290 -0
  1093. maxframe/tensor/ufunc/__init__.py +24 -0
  1094. maxframe/tensor/ufunc/ufunc.py +198 -0
  1095. maxframe/tensor/utils.py +719 -0
  1096. maxframe/tests/__init__.py +13 -0
  1097. maxframe/tests/test_protocol.py +178 -0
  1098. maxframe/tests/test_udf.py +61 -0
  1099. maxframe/tests/test_utils.py +627 -0
  1100. maxframe/tests/utils.py +245 -0
  1101. maxframe/typing_.py +42 -0
  1102. maxframe/udf.py +435 -0
  1103. maxframe/utils.py +1774 -0
  1104. maxframe-2.4.0rc1.dist-info/METADATA +109 -0
  1105. maxframe-2.4.0rc1.dist-info/RECORD +1122 -0
  1106. maxframe-2.4.0rc1.dist-info/WHEEL +5 -0
  1107. maxframe-2.4.0rc1.dist-info/top_level.txt +3 -0
  1108. maxframe_client/__init__.py +16 -0
  1109. maxframe_client/clients/__init__.py +13 -0
  1110. maxframe_client/clients/framedriver.py +137 -0
  1111. maxframe_client/conftest.py +15 -0
  1112. maxframe_client/fetcher.py +411 -0
  1113. maxframe_client/session/__init__.py +22 -0
  1114. maxframe_client/session/consts.py +39 -0
  1115. maxframe_client/session/graph.py +125 -0
  1116. maxframe_client/session/odps.py +813 -0
  1117. maxframe_client/session/task.py +329 -0
  1118. maxframe_client/session/tests/__init__.py +13 -0
  1119. maxframe_client/session/tests/test_task.py +115 -0
  1120. maxframe_client/tests/__init__.py +13 -0
  1121. maxframe_client/tests/test_fetcher.py +215 -0
  1122. maxframe_client/tests/test_session.py +409 -0
@@ -0,0 +1,2386 @@
1
+ # Copyright 1999-2025 Alibaba Group Holding Ltd.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ import functools
16
+ import operator
17
+ import weakref
18
+ from collections.abc import Iterable
19
+ from io import StringIO
20
+ from typing import Any, Dict, List, Tuple, Union
21
+
22
+ import numpy as np
23
+ import pandas as pd
24
+
25
+ from ..core import (
26
+ ENTITY_TYPE,
27
+ HasShapeTileable,
28
+ HasShapeTileableData,
29
+ OutputType,
30
+ Tileable,
31
+ _ExecuteAndFetchMixin,
32
+ is_build_mode,
33
+ register_output_types,
34
+ )
35
+ from ..core.entity.utils import fill_chunk_slices, refresh_tileable_shape
36
+ from ..protocol import DataFrameTableMeta
37
+ from ..serialization.serializables import (
38
+ AnyField,
39
+ BoolField,
40
+ DataTypeField,
41
+ DictField,
42
+ Int32Field,
43
+ IntervalArrayField,
44
+ ListField,
45
+ NDArrayField,
46
+ OneOfField,
47
+ ReferenceField,
48
+ Serializable,
49
+ SeriesField,
50
+ SliceField,
51
+ StringField,
52
+ )
53
+ from ..session import get_default_session
54
+ from ..utils import (
55
+ calc_nsplits,
56
+ ceildiv,
57
+ estimate_pandas_size,
58
+ on_serialize_numpy_type,
59
+ pd_release_version,
60
+ prevent_called_from_pandas,
61
+ tokenize,
62
+ )
63
+ from .typing_ import DataFrameType, IndexType, SeriesType
64
+ from .utils import (
65
+ ReprSeries,
66
+ apply_if_callable,
67
+ fetch_corner_data,
68
+ merge_index_value,
69
+ parse_index,
70
+ )
71
+
72
+ _df_with_iteritems = pd_release_version[:2] < (2, 0)
73
+
74
+
75
+ class IndexValue(Serializable):
76
+ """
77
+ Meta class for index, held by IndexData, SeriesData and DataFrameData
78
+ """
79
+
80
+ __slots__ = ()
81
+
82
+ class IndexBase(Serializable):
83
+ _key = StringField("key") # to identify if the index is the same
84
+ _is_monotonic_increasing = BoolField("is_monotonic_increasing")
85
+ _is_monotonic_decreasing = BoolField("is_monotonic_decreasing")
86
+ _is_unique = BoolField("is_unique")
87
+ _max_val = AnyField("max_val", on_serialize=on_serialize_numpy_type)
88
+ _max_val_close = BoolField("max_val_close")
89
+ _min_val = AnyField("min_val", on_serialize=on_serialize_numpy_type)
90
+ _min_val_close = BoolField("min_val_close")
91
+
92
+ @property
93
+ def is_monotonic_increasing(self):
94
+ return self._is_monotonic_increasing
95
+
96
+ @property
97
+ def is_monotonic_decreasing(self):
98
+ return self._is_monotonic_decreasing
99
+
100
+ @property
101
+ def is_unique(self):
102
+ return self._is_unique
103
+
104
+ @property
105
+ def min_val(self):
106
+ return self._min_val
107
+
108
+ @property
109
+ def min_val_close(self):
110
+ return self._min_val_close
111
+
112
+ @property
113
+ def max_val(self):
114
+ return self._max_val
115
+
116
+ @property
117
+ def max_val_close(self):
118
+ return self._max_val_close
119
+
120
+ @property
121
+ def key(self):
122
+ return self._key
123
+
124
+ @property
125
+ def inferred_type(self):
126
+ return None
127
+
128
+ def to_pandas(self):
129
+ kw = {
130
+ field.tag: getattr(self, attr, None)
131
+ for attr, field in self._FIELDS.items()
132
+ if attr not in super(type(self), self)._FIELDS
133
+ }
134
+ kw = {k: v for k, v in kw.items() if v is not None}
135
+ if kw.get("data") is None:
136
+ kw["data"] = []
137
+
138
+ pd_initializer = getattr(self, "_pd_initializer", None)
139
+ if pd_initializer is None:
140
+ pd_initializer = getattr(pd, type(self).__name__)
141
+ return pd_initializer(**kw)
142
+
143
+ class Index(IndexBase):
144
+ _name = AnyField("name")
145
+ _data = NDArrayField("data")
146
+ _dtype = DataTypeField("dtype")
147
+
148
+ @property
149
+ def dtype(self):
150
+ return getattr(self, "_dtype", None)
151
+
152
+ @property
153
+ def inferred_type(self):
154
+ return "floating" if self.dtype.kind == "f" else "integer"
155
+
156
+ class RangeIndex(IndexBase):
157
+ _name = AnyField("name")
158
+ _slice = SliceField("slice")
159
+ _dtype = DataTypeField("dtype")
160
+
161
+ @property
162
+ def slice(self):
163
+ return self._slice
164
+
165
+ @property
166
+ def dtype(self):
167
+ return getattr(self, "_dtype", np.dtype(np.intc))
168
+
169
+ def to_pandas(self):
170
+ slc = self._slice
171
+ return pd.RangeIndex(
172
+ slc.start, slc.stop, slc.step, name=getattr(self, "_name", None)
173
+ )
174
+
175
+ class CategoricalIndex(IndexBase):
176
+ _name = AnyField("name")
177
+ _data = NDArrayField("data")
178
+ _categories = AnyField("categories")
179
+ _ordered = BoolField("ordered")
180
+
181
+ @property
182
+ def inferred_type(self):
183
+ return "categorical"
184
+
185
+ class IntervalIndex(IndexBase):
186
+ _name = AnyField("name")
187
+ _data = IntervalArrayField("data")
188
+ _closed = StringField("closed")
189
+
190
+ @property
191
+ def inferred_type(self):
192
+ return "interval"
193
+
194
+ class DatetimeIndex(IndexBase):
195
+ _name = AnyField("name")
196
+ _data = NDArrayField("data")
197
+ _freq = AnyField("freq")
198
+ _start = AnyField("start")
199
+ _periods = AnyField("periods")
200
+ _end = AnyField("end")
201
+ _closed = AnyField("closed")
202
+ _tz = AnyField("tz")
203
+ _ambiguous = AnyField("ambiguous")
204
+ _dayfirst = BoolField("dayfirst")
205
+ _yearfirst = BoolField("yearfirst")
206
+
207
+ @property
208
+ def inferred_type(self):
209
+ return "datetime64"
210
+
211
+ @property
212
+ def freq(self):
213
+ return getattr(self, "_freq", None)
214
+
215
+ class TimedeltaIndex(IndexBase):
216
+ _name = AnyField("name")
217
+ _data = NDArrayField("data")
218
+ _unit = AnyField("unit")
219
+ _freq = AnyField("freq")
220
+ _start = AnyField("start")
221
+ _periods = AnyField("periods")
222
+ _end = AnyField("end")
223
+ _closed = AnyField("closed")
224
+
225
+ @property
226
+ def inferred_type(self):
227
+ return "timedelta64"
228
+
229
+ class PeriodIndex(IndexBase):
230
+ _name = AnyField("name")
231
+ _data = NDArrayField("data")
232
+ _freq = AnyField("freq")
233
+ _start = AnyField("start")
234
+ _periods = AnyField("periods")
235
+ _end = AnyField("end")
236
+ _year = AnyField("year")
237
+ _month = AnyField("month")
238
+ _quarter = AnyField("quarter")
239
+ _day = AnyField("day")
240
+ _hour = AnyField("hour")
241
+ _minute = AnyField("minute")
242
+ _second = AnyField("second")
243
+ _tz = AnyField("tz")
244
+ _dtype = DataTypeField("dtype")
245
+
246
+ @property
247
+ def inferred_type(self):
248
+ return "period"
249
+
250
+ class Int64Index(IndexBase):
251
+ _pd_initializer = pd.Index
252
+
253
+ _name = AnyField("name")
254
+ _data = NDArrayField("data")
255
+ _dtype = DataTypeField("dtype")
256
+
257
+ @property
258
+ def dtype(self):
259
+ return getattr(self, "_dtype", None)
260
+
261
+ @property
262
+ def inferred_type(self):
263
+ return "integer"
264
+
265
+ class UInt64Index(IndexBase):
266
+ _pd_initializer = pd.Index
267
+
268
+ _name = AnyField("name")
269
+ _data = NDArrayField("data")
270
+ _dtype = DataTypeField("dtype")
271
+
272
+ @property
273
+ def dtype(self):
274
+ return getattr(self, "_dtype", None)
275
+
276
+ @property
277
+ def inferred_type(self):
278
+ return "integer"
279
+
280
+ class Float64Index(IndexBase):
281
+ _pd_initializer = pd.Index
282
+
283
+ _name = AnyField("name")
284
+ _data = NDArrayField("data")
285
+ _dtype = DataTypeField("dtype")
286
+
287
+ @property
288
+ def dtype(self):
289
+ return getattr(self, "_dtype", None)
290
+
291
+ @property
292
+ def inferred_type(self):
293
+ return "floating"
294
+
295
+ class MultiIndex(IndexBase):
296
+ _names = ListField("names", on_serialize=list)
297
+ _dtypes = ListField("dtypes", on_serialize=list)
298
+ _data = NDArrayField("data")
299
+ _sortorder = Int32Field("sortorder")
300
+
301
+ @property
302
+ def inferred_type(self):
303
+ return "mixed"
304
+
305
+ @property
306
+ def names(self) -> list:
307
+ return self._names
308
+
309
+ @property
310
+ def dtypes(self) -> pd.Series:
311
+ return pd.Series(self._dtypes, index=self._names)
312
+
313
+ def to_pandas(self):
314
+ data = getattr(self, "_data", None)
315
+ sortorder = getattr(self, "_sortorder", None)
316
+
317
+ def _build_empty_array(dtype):
318
+ try:
319
+ return np.array([], dtype=dtype)
320
+ except TypeError: # pragma: no cover
321
+ return pd.array([], dtype=dtype)
322
+
323
+ if data is None:
324
+ return pd.MultiIndex.from_arrays(
325
+ [_build_empty_array(dtype) for dtype in self._dtypes],
326
+ sortorder=sortorder,
327
+ names=self._names,
328
+ )
329
+ return pd.MultiIndex.from_tuples(
330
+ [tuple(d) for d in data], sortorder=sortorder, names=self._names
331
+ )
332
+
333
+ _index_value = OneOfField(
334
+ "index_value",
335
+ index=Index,
336
+ range_index=RangeIndex,
337
+ categorical_index=CategoricalIndex,
338
+ interval_index=IntervalIndex,
339
+ datetime_index=DatetimeIndex,
340
+ timedelta_index=TimedeltaIndex,
341
+ period_index=PeriodIndex,
342
+ int64_index=Int64Index,
343
+ uint64_index=UInt64Index,
344
+ float64_index=Float64Index,
345
+ multi_index=MultiIndex,
346
+ )
347
+
348
+ def __maxframe_tokenize__(self):
349
+ # return object for tokenize
350
+ v = self._index_value
351
+ return v._key
352
+
353
+ @property
354
+ def value(self):
355
+ return self._index_value
356
+
357
+ @property
358
+ def key(self):
359
+ return self._index_value.key
360
+
361
+ @property
362
+ def is_monotonic_increasing(self):
363
+ return self._index_value.is_monotonic_increasing
364
+
365
+ @property
366
+ def is_monotonic_decreasing(self):
367
+ return self._index_value.is_monotonic_decreasing
368
+
369
+ @property
370
+ def is_monotonic_increasing_or_decreasing(self):
371
+ return self.is_monotonic_increasing or self.is_monotonic_decreasing
372
+
373
+ @property
374
+ def is_unique(self):
375
+ return self._index_value.is_unique
376
+
377
+ @property
378
+ def min_val(self):
379
+ return self._index_value.min_val
380
+
381
+ @property
382
+ def min_val_close(self):
383
+ return self._index_value.min_val_close
384
+
385
+ @property
386
+ def max_val(self):
387
+ return self._index_value.max_val
388
+
389
+ @property
390
+ def max_val_close(self):
391
+ return self._index_value.max_val_close
392
+
393
+ @property
394
+ def min_max(self):
395
+ return (
396
+ self._index_value.min_val,
397
+ self._index_value.min_val_close,
398
+ self._index_value.max_val,
399
+ self._index_value.max_val_close,
400
+ )
401
+
402
+ @property
403
+ def name(self):
404
+ return getattr(self._index_value, "_name", None)
405
+
406
+ @property
407
+ def names(self):
408
+ return getattr(self._index_value, "_names", [self.name])
409
+
410
+ @property
411
+ def inferred_type(self):
412
+ return self._index_value.inferred_type
413
+
414
+ def has_value(self):
415
+ if isinstance(self._index_value, self.RangeIndex):
416
+ if np.isnan(self._index_value.max_val):
417
+ return False
418
+ else:
419
+ return True
420
+ elif getattr(self._index_value, "_data", None) is not None:
421
+ return True
422
+ return False
423
+
424
+ def to_pandas(self):
425
+ return self._index_value.to_pandas()
426
+
427
+
428
+ class DtypesValue(Serializable):
429
+ """
430
+ Meta class for dtypes.
431
+ """
432
+
433
+ __slots__ = ()
434
+
435
+ _key = StringField("key")
436
+ _value = SeriesField("value")
437
+
438
+ def __init__(self, key=None, value=None, **kw):
439
+ super().__init__(_key=key, _value=value, **kw)
440
+ if self._key is None:
441
+ self._key = tokenize(self._value)
442
+
443
+ @property
444
+ def key(self):
445
+ return self._key
446
+
447
+ @property
448
+ def value(self):
449
+ return self._value
450
+
451
+
452
+ def refresh_index_value(tileable: ENTITY_TYPE):
453
+ index_to_index_values = dict()
454
+ for chunk in tileable.chunks:
455
+ if chunk.ndim == 1 or chunk.index[1] == 0:
456
+ index_to_index_values[chunk.index] = chunk.index_value
457
+ index_value = merge_index_value(index_to_index_values, store_data=False)
458
+ # keep key as original index_value's
459
+ index_value._index_value._key = tileable.index_value.key
460
+ tileable._index_value = index_value
461
+
462
+
463
+ def refresh_dtypes(tileable: ENTITY_TYPE):
464
+ all_dtypes = [c.dtypes_value.value for c in tileable.chunks if c.index[0] == 0]
465
+ dtypes = pd.concat(all_dtypes)
466
+ tileable._dtypes = dtypes
467
+ columns_values = parse_index(dtypes.index, store_data=True)
468
+ tileable._columns_value = columns_values
469
+ tileable._dtypes_value = DtypesValue(key=tokenize(dtypes), value=dtypes)
470
+
471
+
472
+ _tileable_key_property = "_tileable_key"
473
+ _tileable_dtypes_property = "_tileable_dtypes"
474
+ _tileable_index_value_property = "_tileable_index_value"
475
+ _tileable_columns_value_property = "_tileable_columns_value"
476
+ _nsplits_property = "_tileable_nsplits"
477
+ _lazy_chunk_meta_properties = (
478
+ _tileable_key_property,
479
+ _tileable_dtypes_property,
480
+ _tileable_index_value_property,
481
+ _tileable_columns_value_property,
482
+ _nsplits_property,
483
+ )
484
+
485
+
486
+ def _calc_cum_nsplit(nsplit: Tuple[int]) -> List[int]:
487
+ return [0] + np.cumsum(nsplit).tolist()
488
+
489
+
490
+ def calc_cum_nsplits(nsplits: Tuple[Tuple[int]]) -> List[List[int]]:
491
+ return tuple(_calc_cum_nsplit(nsplit) for nsplit in nsplits)
492
+
493
+
494
+ @functools.lru_cache(maxsize=128)
495
+ def _get_cum_nsplit(nsplit: Tuple[int]) -> List[int]:
496
+ return _calc_cum_nsplit(nsplit)
497
+
498
+
499
+ def _calc_axis_slice(nsplit: Tuple[int], index: int) -> slice:
500
+ if not isinstance(nsplit, tuple):
501
+ nsplit = tuple(nsplit)
502
+ cum_nsplit = _get_cum_nsplit(nsplit)
503
+ return slice(cum_nsplit[index], cum_nsplit[index + 1])
504
+
505
+
506
+ def _on_deserialize_index_value(index_value):
507
+ if index_value is None:
508
+ return
509
+ try:
510
+ getattr(index_value, "value")
511
+ return index_value
512
+ except AttributeError:
513
+ return
514
+
515
+
516
+ class _ToPandasMixin(_ExecuteAndFetchMixin):
517
+ __slots__ = ()
518
+
519
+ def to_pandas(self, session=None, **kw):
520
+ return self._execute_and_fetch(session=session, **kw)
521
+
522
+
523
+ class _BatchedFetcher:
524
+ __slots__ = ()
525
+
526
+ def _iter(self, batch_size=None, session=None, **kw):
527
+ from .indexing.iloc import iloc
528
+
529
+ if batch_size is not None:
530
+ size = self.shape[0]
531
+ n_batch = ceildiv(size, batch_size)
532
+
533
+ if n_batch > 1:
534
+ for i in range(n_batch):
535
+ batch_data = iloc(self)[batch_size * i : batch_size * (i + 1)]
536
+ yield batch_data._fetch(session=session, **kw)
537
+ else:
538
+ yield self._fetch(session=session, **kw)
539
+ else:
540
+ # if batch_size is not specified, use first batch to estimate
541
+ # batch_size.
542
+ default_batch_bytes = 50 * 1024**2
543
+ first_batch = 1000
544
+ size = self.shape[0]
545
+
546
+ if size >= first_batch:
547
+ batch_data = iloc(self)[:first_batch]
548
+ first_batch_data = batch_data._fetch(session=session, **kw)
549
+ yield first_batch_data
550
+ data_size = estimate_pandas_size(first_batch_data)
551
+ batch_size = int(default_batch_bytes / data_size * first_batch)
552
+ n_batch = ceildiv(size - 1000, batch_size)
553
+ for i in range(n_batch):
554
+ batch_data = iloc(self)[
555
+ first_batch
556
+ + batch_size * i : first_batch
557
+ + batch_size * (i + 1)
558
+ ]
559
+ yield batch_data._fetch(session=session, **kw)
560
+ else:
561
+ yield self._fetch(session=session, **kw)
562
+
563
+ def iterbatch(self, batch_size=None, session=None, **kw):
564
+ # stop triggering execution under build mode
565
+ if is_build_mode():
566
+ raise ValueError("Cannot fetch data under build mode")
567
+
568
+ # trigger execution
569
+ self.execute(session=session, **kw)
570
+ return self._iter(batch_size=batch_size, session=session)
571
+
572
+ def fetch(self, session=None, **kw):
573
+ from .indexing.iloc import DataFrameIlocGetItem, SeriesIlocGetItem
574
+
575
+ batch_size = kw.pop("batch_size", None)
576
+ if isinstance(self.op, (DataFrameIlocGetItem, SeriesIlocGetItem)):
577
+ # see GH#1871
578
+ # already iloc, do not trigger batch fetch
579
+ return self._fetch(session=session, **kw)
580
+ else:
581
+ batches = list(self._iter(batch_size=batch_size, session=session, **kw))
582
+ return pd.concat(batches) if len(batches) > 1 else batches[0]
583
+
584
+ def fetch_infos(self, fields=None, session=None, **kw):
585
+ return self._fetch_infos(fields=fields, session=session, **kw)
586
+
587
+
588
+ class IndexData(HasShapeTileableData, _ToPandasMixin):
589
+ __slots__ = ()
590
+ type_name = "Index"
591
+
592
+ # optional field
593
+ _dtype = DataTypeField("dtype")
594
+ _name = AnyField("name")
595
+ _names = AnyField("names")
596
+ _index_value = ReferenceField(
597
+ "index_value", IndexValue, on_deserialize=_on_deserialize_index_value
598
+ )
599
+
600
+ def __init__(
601
+ self,
602
+ op=None,
603
+ shape=None,
604
+ nsplits=None,
605
+ dtype=None,
606
+ name=None,
607
+ names=None,
608
+ index_value=None,
609
+ **kw,
610
+ ):
611
+ super().__init__(
612
+ _op=op,
613
+ _shape=shape,
614
+ _nsplits=nsplits,
615
+ _dtype=dtype,
616
+ _name=name,
617
+ _names=names,
618
+ _index_value=index_value,
619
+ **kw,
620
+ )
621
+
622
+ @property
623
+ def params(self) -> Dict[str, Any]:
624
+ # params return the properties which useful to rebuild a new tileable object
625
+ return {
626
+ "shape": self.shape,
627
+ "dtype": self.dtype,
628
+ "name": self.name,
629
+ "index_value": self.index_value,
630
+ }
631
+
632
+ @params.setter
633
+ def params(self, new_params: Dict[str, Any]):
634
+ params = new_params.copy()
635
+ new_shape = params.pop("shape", None)
636
+ if new_shape is not None:
637
+ self._shape = new_shape
638
+ dtype = params.pop("dtype", None)
639
+ if dtype is not None:
640
+ self._dtype = dtype
641
+ index_value = params.pop("index_value", None)
642
+ if index_value is not None:
643
+ self._index_value = index_value
644
+ name = params.pop("name", None)
645
+ if name is not None:
646
+ self._name = name
647
+ if params: # pragma: no cover
648
+ raise TypeError(f"Unknown params: {list(params)}")
649
+
650
+ def refresh_params(self):
651
+ # refresh params when chunks updated
652
+ refresh_tileable_shape(self)
653
+ fill_chunk_slices(self)
654
+ # refresh_index_value(self)
655
+ # if self._dtype is None:
656
+ # self._dtype = self.chunks[0].dtype
657
+ # if self._name is None:
658
+ # self._name = self.chunks[0].name
659
+
660
+ def refresh_from_table_meta(self, table_meta: DataFrameTableMeta) -> None:
661
+ pass
662
+
663
+ def _to_str(self, representation=False):
664
+ if is_build_mode() or len(self._executed_sessions) == 0:
665
+ # in build mode, or not executed, just return representation
666
+ if representation:
667
+ return f"Index <op={type(self._op).__name__}, key={self.key}"
668
+ else:
669
+ return f"Index(op={type(self._op).__name__})"
670
+ else:
671
+ data = self.fetch(session=self._executed_sessions[-1])
672
+ return repr(data) if repr(data) else str(data)
673
+
674
+ def __str__(self):
675
+ return self._to_str(representation=False)
676
+
677
+ def __repr__(self):
678
+ return self._to_str(representation=True)
679
+
680
+ def _to_maxframe_tensor(self, dtype=None, order="K", extract_multi_index=False):
681
+ tensor = self.to_tensor(extract_multi_index=extract_multi_index)
682
+ dtype = dtype if dtype is not None else tensor.dtype
683
+ return tensor.astype(dtype=dtype, order=order, copy=False)
684
+
685
+ def __maxframe_tensor__(self, dtype=None, order="K"):
686
+ return self._to_maxframe_tensor(dtype=dtype, order=order)
687
+
688
+ @property
689
+ def dtype(self):
690
+ return getattr(self, "_dtype", None) or self.op.dtype
691
+
692
+ @property
693
+ def name(self):
694
+ return self._name
695
+
696
+ @property
697
+ def names(self):
698
+ return getattr(self, "_names", None) or [self.name]
699
+
700
+ @property
701
+ def nlevels(self) -> int:
702
+ return len(self.names)
703
+
704
+ @property
705
+ def index_value(self) -> IndexValue:
706
+ return self._index_value
707
+
708
+ @property
709
+ def inferred_type(self):
710
+ return self._index_value.inferred_type
711
+
712
+ def to_tensor(self, dtype=None, extract_multi_index=False):
713
+ from ..tensor.datasource.from_dataframe import from_index
714
+
715
+ return from_index(self, dtype=dtype, extract_multi_index=extract_multi_index)
716
+
717
+ def to_frame(self, index: bool = True, name=None):
718
+ from . import dataframe_from_tensor
719
+
720
+ if isinstance(self.index_value.value, IndexValue.MultiIndex):
721
+ old_names = self.index_value.value.names
722
+
723
+ if (
724
+ name is not None
725
+ and not isinstance(name, Iterable)
726
+ or isinstance(name, str)
727
+ ):
728
+ raise TypeError("'name' must be a list / sequence of column names.")
729
+
730
+ name = list(name if name is not None else old_names)
731
+ if len(name) != len(old_names):
732
+ raise ValueError(
733
+ "'name' should have same length as number of levels on index."
734
+ )
735
+
736
+ columns = [
737
+ old or new or idx for idx, (old, new) in enumerate(zip(old_names, name))
738
+ ]
739
+ else:
740
+ columns = [name or self.name or 0]
741
+ index_ = self if index else None
742
+ return dataframe_from_tensor(
743
+ self._to_maxframe_tensor(extract_multi_index=True),
744
+ index=index_,
745
+ columns=columns,
746
+ check_index_size=False,
747
+ )
748
+
749
+ def to_series(self, index=None, name=None):
750
+ from . import series_from_index
751
+
752
+ return series_from_index(self, index=index, name=name)
753
+
754
+ @property
755
+ def hasnans(self):
756
+ return self.isna().any()
757
+
758
+
759
+ class Index(HasShapeTileable, _ToPandasMixin):
760
+ __slots__ = "_df_or_series", "_parent_key", "_axis"
761
+ _allow_data_type_ = (IndexData,)
762
+ type_name = "Index"
763
+
764
+ def __new__(cls, data: Union[pd.Index, IndexData] = None, **_):
765
+ if data is not None and not isinstance(data, pd.Index):
766
+ # create corresponding Index class
767
+ # according to type of index_value
768
+ clz = globals()[type(data.index_value.value).__name__]
769
+ else:
770
+ clz = cls
771
+ return object.__new__(clz)
772
+
773
+ def __len__(self):
774
+ return len(self._data)
775
+
776
+ def __class_getitem__(cls, item):
777
+ return IndexType.from_getitem_args(item)
778
+
779
+ def __maxframe_tensor__(self, dtype=None, order="K"):
780
+ return self._data.__maxframe_tensor__(dtype=dtype, order=order)
781
+
782
+ def _get_df_or_series(self):
783
+ obj = getattr(self, "_df_or_series", None)
784
+ if obj is not None:
785
+ return obj()
786
+ return None
787
+
788
+ def _set_df_or_series(self, df_or_series, axis):
789
+ self._df_or_series = weakref.ref(df_or_series)
790
+ self._parent_key = df_or_series.key
791
+ self._axis = axis
792
+
793
+ @property
794
+ def T(self):
795
+ """Return the transpose, which is by definition self."""
796
+ return self
797
+
798
+ @property
799
+ def name(self):
800
+ return self._data.name
801
+
802
+ @name.setter
803
+ def name(self, value):
804
+ df_or_series = self._get_df_or_series()
805
+ if df_or_series is not None and df_or_series.key == self._parent_key:
806
+ df_or_series.rename_axis(value, axis=self._axis, inplace=True)
807
+ self.data = df_or_series.axes[self._axis].data
808
+ else:
809
+ self.rename(value, inplace=True)
810
+
811
+ @property
812
+ def names(self):
813
+ return self._data.names
814
+
815
+ @names.setter
816
+ def names(self, value):
817
+ df_or_series = self._get_df_or_series()
818
+ if df_or_series is not None:
819
+ df_or_series.rename_axis(value, axis=self._axis, inplace=True)
820
+ self.data = df_or_series.axes[self._axis].data
821
+ else:
822
+ self.rename(value, inplace=True)
823
+
824
+ @property
825
+ def values(self):
826
+ return self.to_tensor()
827
+
828
+ def to_frame(self, index: bool = True, name=None):
829
+ """
830
+ Create a DataFrame with a column containing the Index.
831
+
832
+ Parameters
833
+ ----------
834
+ index : bool, default True
835
+ Set the index of the returned DataFrame as the original Index.
836
+
837
+ name : object, default None
838
+ The passed name should substitute for the index name (if it has
839
+ one).
840
+
841
+ Returns
842
+ -------
843
+ DataFrame
844
+ DataFrame containing the original Index data.
845
+
846
+ See Also
847
+ --------
848
+ Index.to_series : Convert an Index to a Series.
849
+ Series.to_frame : Convert Series to DataFrame.
850
+
851
+ Examples
852
+ --------
853
+ >>> import maxframe.dataframe as md
854
+ >>> idx = md.Index(['Ant', 'Bear', 'Cow'], name='animal')
855
+ >>> idx.to_frame().execute()
856
+ animal
857
+ animal
858
+ Ant Ant
859
+ Bear Bear
860
+ Cow Cow
861
+
862
+ By default, the original Index is reused. To enforce a new Index:
863
+
864
+ >>> idx.to_frame(index=False).execute()
865
+ animal
866
+ 0 Ant
867
+ 1 Bear
868
+ 2 Cow
869
+
870
+ To override the name of the resulting column, specify `name`:
871
+
872
+ >>> idx.to_frame(index=False, name='zoo').execute()
873
+ zoo
874
+ 0 Ant
875
+ 1 Bear
876
+ 2 Cow
877
+ """
878
+ return self._data.to_frame(index=index, name=name)
879
+
880
+ def to_series(self, index=None, name=None):
881
+ """
882
+ Create a Series with both index and values equal to the index keys.
883
+
884
+ Useful with map for returning an indexer based on an index.
885
+
886
+ Parameters
887
+ ----------
888
+ index : Index, optional
889
+ Index of resulting Series. If None, defaults to original index.
890
+ name : str, optional
891
+ Dame of resulting Series. If None, defaults to name of original
892
+ index.
893
+
894
+ Returns
895
+ -------
896
+ Series
897
+ The dtype will be based on the type of the Index values.
898
+ """
899
+ return self._data.to_series(index=index, name=name)
900
+
901
+ @property
902
+ def hasnans(self):
903
+ """
904
+ Return True if there are any NaNs.
905
+
906
+ Returns
907
+ -------
908
+ bool
909
+
910
+ Examples
911
+ --------
912
+ >>> import maxframe.dataframe as md
913
+ >>> idx = md.Index([1, 2, 3, None])
914
+ >>> idx.execute()
915
+ Index([1.0, 2.0, 3.0, nan], dtype='float64')
916
+ >>> idx.hasnans.execute()
917
+ True
918
+ """
919
+ return self._data.hasnans
920
+
921
+
922
+ class RangeIndex(Index):
923
+ __slots__ = ()
924
+
925
+
926
+ class CategoricalIndex(Index):
927
+ __slots__ = ()
928
+
929
+
930
+ class IntervalIndex(Index):
931
+ __slots__ = ()
932
+
933
+
934
+ class DatetimeIndex(Index):
935
+ __slots__ = ()
936
+
937
+
938
+ class TimedeltaIndex(Index):
939
+ __slots__ = ()
940
+
941
+
942
+ class PeriodIndex(Index):
943
+ __slots__ = ()
944
+
945
+
946
+ class Int64Index(Index):
947
+ __slots__ = ()
948
+
949
+
950
+ class UInt64Index(Index):
951
+ __slots__ = ()
952
+
953
+
954
+ class Float64Index(Index):
955
+ __slots__ = ()
956
+
957
+
958
+ class MultiIndex(Index):
959
+ __slots__ = ()
960
+
961
+
962
+ class BaseSeriesData(HasShapeTileableData, _ToPandasMixin):
963
+ __slots__ = "_cache", "_accessors"
964
+
965
+ # optional field
966
+ _dtype = DataTypeField("dtype")
967
+ _name = AnyField("name")
968
+ _index_value = ReferenceField(
969
+ "index_value", IndexValue, on_deserialize=_on_deserialize_index_value
970
+ )
971
+
972
+ def __init__(
973
+ self,
974
+ op=None,
975
+ shape=None,
976
+ nsplits=None,
977
+ dtype=None,
978
+ name=None,
979
+ index_value=None,
980
+ **kw,
981
+ ):
982
+ super().__init__(
983
+ _op=op,
984
+ _shape=shape,
985
+ _nsplits=nsplits,
986
+ _dtype=dtype,
987
+ _name=name,
988
+ _index_value=index_value,
989
+ **kw,
990
+ )
991
+ self._accessors = dict()
992
+
993
+ def _get_params(self) -> Dict[str, Any]:
994
+ # params return the properties which useful to rebuild a new tileable object
995
+ return {
996
+ "shape": self.shape,
997
+ "dtype": self.dtype,
998
+ "name": self.name,
999
+ "index_value": self.index_value,
1000
+ }
1001
+
1002
+ def _set_params(self, new_params: Dict[str, Any]):
1003
+ params = new_params.copy()
1004
+ new_shape = params.pop("shape", None)
1005
+ if new_shape is not None:
1006
+ self._shape = new_shape
1007
+ dtype = params.pop("dtype", None)
1008
+ if dtype is not None:
1009
+ self._dtype = dtype
1010
+ index_value = params.pop("index_value", None)
1011
+ if index_value is not None:
1012
+ self._index_value = index_value
1013
+ name = params.pop("name", None)
1014
+ if name is not None:
1015
+ self._name = name
1016
+ if params: # pragma: no cover
1017
+ raise TypeError(f"Unknown params: {list(params)}")
1018
+
1019
+ params = property(_get_params, _set_params)
1020
+
1021
+ def refresh_params(self):
1022
+ # refresh params when chunks updated
1023
+ refresh_tileable_shape(self)
1024
+ fill_chunk_slices(self)
1025
+ # refresh_index_value(self)
1026
+ if self._dtype is None:
1027
+ self._dtype = getattr(self.chunks[0], "dtype", None)
1028
+ # if self._name is None:
1029
+ # self._name = self.chunks[0].name
1030
+
1031
+ def refresh_from_table_meta(self, table_meta: DataFrameTableMeta) -> None:
1032
+ pass
1033
+
1034
+ def _to_str(self, representation=False):
1035
+ if is_build_mode() or len(self._executed_sessions) == 0:
1036
+ # in build mode, or not executed, just return representation
1037
+ if representation:
1038
+ return (
1039
+ f"{self.type_name} <op={type(self._op).__name__}, key={self.key}>"
1040
+ )
1041
+ else:
1042
+ return f"{self.type_name}(op={type(self._op).__name__})"
1043
+ else:
1044
+ corner_data = fetch_corner_data(self, session=self._executed_sessions[-1])
1045
+
1046
+ buf = StringIO()
1047
+ max_rows = pd.get_option("display.max_rows")
1048
+ corner_max_rows = (
1049
+ max_rows
1050
+ if self.shape[0] <= max_rows or corner_data.shape[0] == 0
1051
+ else corner_data.shape[0] - 1
1052
+ ) # make sure max_rows < corner_data
1053
+
1054
+ with pd.option_context("display.max_rows", corner_max_rows):
1055
+ if self.shape[0] <= max_rows:
1056
+ corner_series = corner_data
1057
+ else:
1058
+ corner_series = ReprSeries(corner_data, self.shape)
1059
+ buf.write(repr(corner_series) if representation else str(corner_series))
1060
+
1061
+ return buf.getvalue()
1062
+
1063
+ def __str__(self):
1064
+ return self._to_str(representation=False)
1065
+
1066
+ def __repr__(self):
1067
+ return self._to_str(representation=True)
1068
+
1069
+ @property
1070
+ def dtype(self):
1071
+ return getattr(self, "_dtype", None) or getattr(self.op, "dtype", None)
1072
+
1073
+ @property
1074
+ def name(self):
1075
+ return self._name
1076
+
1077
+ @property
1078
+ def index_value(self):
1079
+ return self._index_value
1080
+
1081
+ @property
1082
+ def index(self):
1083
+ from .datasource.index import from_tileable
1084
+
1085
+ return from_tileable(self)
1086
+
1087
+ @property
1088
+ def axes(self):
1089
+ return [self.index]
1090
+
1091
+ @property
1092
+ def empty(self):
1093
+ shape = getattr(self, "_shape")
1094
+ if np.any(np.isnan(shape)):
1095
+ raise ValueError("Tileable object must be executed first")
1096
+ return shape == (0,)
1097
+
1098
+ def to_tensor(self, dtype=None):
1099
+ from ..tensor.datasource.from_dataframe import from_series
1100
+
1101
+ return from_series(self, dtype=dtype)
1102
+
1103
+
1104
+ class SeriesData(_BatchedFetcher, BaseSeriesData):
1105
+ type_name = "Series"
1106
+
1107
+ def __maxframe_tensor__(self, dtype=None, order="K"):
1108
+ tensor = self.to_tensor()
1109
+ dtype = dtype if dtype is not None else tensor.dtype
1110
+ return tensor.astype(dtype=dtype, order=order, copy=False)
1111
+
1112
+ def iteritems(self, batch_size=10000, session=None):
1113
+ method_name = "iteritems" if _df_with_iteritems else "items"
1114
+ for batch_data in self.iterbatch(batch_size=batch_size, session=session):
1115
+ yield from getattr(batch_data, method_name)()
1116
+
1117
+ items = iteritems
1118
+
1119
+ def to_frame(self, name=None):
1120
+ from . import dataframe_from_tensor
1121
+
1122
+ name = name or self.name or 0
1123
+ return dataframe_from_tensor(self, columns=[name])
1124
+
1125
+ @property
1126
+ def hasnans(self):
1127
+ """
1128
+ Return True if there are any NaNs.
1129
+
1130
+ Returns
1131
+ -------
1132
+ bool
1133
+
1134
+ Examples
1135
+ --------
1136
+ >>> import maxframe.dataframe as md
1137
+ >>> s = md.Series([1, 2, 3, None])
1138
+ >>> s.execute()
1139
+ 0 1.0
1140
+ 1 2.0
1141
+ 2 3.0
1142
+ 3 NaN
1143
+ dtype: float64
1144
+ >>> s.hasnans.execute()
1145
+ True
1146
+ """
1147
+ return self.isna().any()
1148
+
1149
+
1150
+ class Series(HasShapeTileable, _ToPandasMixin):
1151
+ __slots__ = ("_cache",)
1152
+ _allow_data_type_ = (SeriesData,)
1153
+ type_name = "Series"
1154
+
1155
+ def __class_getitem__(cls, item):
1156
+ return SeriesType.from_getitem_args(item)
1157
+
1158
+ def to_tensor(self, dtype=None):
1159
+ return self._data.to_tensor(dtype=dtype)
1160
+
1161
+ def from_tensor(self, in_tensor, index=None, name=None):
1162
+ return self._data.from_tensor(in_tensor, index=index, name=name)
1163
+
1164
+ @property
1165
+ def T(self):
1166
+ """Return the transpose, which is by definition self."""
1167
+ return self
1168
+
1169
+ @property
1170
+ def ndim(self):
1171
+ """
1172
+ Return an int representing the number of axes / array dimensions.
1173
+
1174
+ Return 1 if Series. Otherwise return 2 if DataFrame.
1175
+
1176
+ See Also
1177
+ --------
1178
+ ndarray.ndim : Number of array dimensions.
1179
+
1180
+ Examples
1181
+ --------
1182
+ >>> import maxframe.dataframe as md
1183
+ >>> s = md.Series({'a': 1, 'b': 2, 'c': 3})
1184
+ >>> s.ndim
1185
+ 1
1186
+
1187
+ >>> df = md.DataFrame({'col1': [1, 2], 'col2': [3, 4]})
1188
+ >>> df.ndim
1189
+ 2
1190
+ """
1191
+ return super().ndim
1192
+
1193
+ @property
1194
+ def index(self):
1195
+ """
1196
+ The index (axis labels) of the Series.
1197
+ """
1198
+ idx = self._data.index
1199
+ idx._set_df_or_series(self, 0)
1200
+ return idx
1201
+
1202
+ @index.setter
1203
+ def index(self, new_index):
1204
+ self.set_axis(new_index, axis=0, inplace=True)
1205
+
1206
+ @property
1207
+ def name(self):
1208
+ return self._data.name
1209
+
1210
+ @name.setter
1211
+ def name(self, val):
1212
+ from .indexing.rename import DataFrameRename
1213
+
1214
+ op = DataFrameRename(new_name=val, output_types=[OutputType.series])
1215
+ new_series = op(self)
1216
+ self.data = new_series.data
1217
+
1218
+ @property
1219
+ def dtype(self):
1220
+ """
1221
+ Return the dtype object of the underlying data.
1222
+ """
1223
+ return self._data.dtype
1224
+
1225
+ def copy(self, deep=True): # pylint: disable=arguments-differ
1226
+ """
1227
+ Make a copy of this object's indices and data.
1228
+
1229
+ When ``deep=True`` (default), a new object will be created with a
1230
+ copy of the calling object's data and indices. Modifications to
1231
+ the data or indices of the copy will not be reflected in the
1232
+ original object (see notes below).
1233
+
1234
+ When ``deep=False``, a new object will be created without copying
1235
+ the calling object's data or index (only references to the data
1236
+ and index are copied). Any changes to the data of the original
1237
+ will be reflected in the shallow copy (and vice versa).
1238
+
1239
+ Parameters
1240
+ ----------
1241
+ deep : bool, default True
1242
+ Make a deep copy, including a copy of the data and the indices.
1243
+ With ``deep=False`` neither the indices nor the data are copied.
1244
+
1245
+ Returns
1246
+ -------
1247
+ copy : Series or DataFrame
1248
+ Object type matches caller.
1249
+ """
1250
+ if deep:
1251
+ return super().copy()
1252
+ else:
1253
+ return super()._view()
1254
+
1255
+ def __iter__(self):
1256
+ # prevent being called by pandas to make sure `__eq__` works
1257
+ prevent_called_from_pandas()
1258
+ return (tp[1] for tp in self.items())
1259
+
1260
+ def __len__(self):
1261
+ return len(self._data)
1262
+
1263
+ def __maxframe_tensor__(self, dtype=None, order="K"):
1264
+ return self._data.__maxframe_tensor__(dtype=dtype, order=order)
1265
+
1266
+ def keys(self):
1267
+ """
1268
+ Return alias for index.
1269
+
1270
+ Returns
1271
+ -------
1272
+ Index
1273
+ Index of the Series.
1274
+ """
1275
+ return self.index
1276
+
1277
+ @property
1278
+ def values(self):
1279
+ return self.to_tensor()
1280
+
1281
+ def iteritems(self, batch_size=10000, session=None):
1282
+ """
1283
+ Lazily iterate over (index, value) tuples.
1284
+
1285
+ This method returns an iterable tuple (index, value). This is
1286
+ convenient if you want to create a lazy iterator.
1287
+
1288
+ Returns
1289
+ -------
1290
+ iterable
1291
+ Iterable of tuples containing the (index, value) pairs from a
1292
+ Series.
1293
+
1294
+ See Also
1295
+ --------
1296
+ DataFrame.items : Iterate over (column name, Series) pairs.
1297
+ DataFrame.iterrows : Iterate over DataFrame rows as (index, Series) pairs.
1298
+
1299
+ Examples
1300
+ --------
1301
+ >>> import maxframe.dataframe as md
1302
+ >>> s = md.Series(['A', 'B', 'C'])
1303
+ >>> for index, value in s.items():
1304
+ ... print(f"Index : {index}, Value : {value}")
1305
+ Index : 0, Value : A
1306
+ Index : 1, Value : B
1307
+ Index : 2, Value : C
1308
+ """
1309
+ return self._data.iteritems(batch_size=batch_size, session=session)
1310
+
1311
+ items = iteritems
1312
+
1313
+ def to_frame(self, name=None):
1314
+ """
1315
+ Convert Series to DataFrame.
1316
+
1317
+ Parameters
1318
+ ----------
1319
+ name : object, default None
1320
+ The passed name should substitute for the series name (if it has
1321
+ one).
1322
+
1323
+ Returns
1324
+ -------
1325
+ DataFrame
1326
+ DataFrame representation of Series.
1327
+
1328
+ Examples
1329
+ --------
1330
+ >>> import maxframe.dataframe as md
1331
+ >>> s = md.Series(["a", "b", "c"], name="vals")
1332
+ >>> s.to_frame().execute()
1333
+ vals
1334
+ 0 a
1335
+ 1 b
1336
+ 2 c
1337
+ """
1338
+ return self._data.to_frame(name=name)
1339
+
1340
+ # def median(
1341
+ # self, axis=None, skipna=True, out=None, overwrite_input=False, keepdims=False
1342
+ # ):
1343
+ # """
1344
+ # Return the median of the values over the requested axis.
1345
+ #
1346
+ # Parameters
1347
+ # ----------
1348
+ # axis : {index (0)}
1349
+ # Axis or axes along which the medians are computed. The default
1350
+ # is to compute the median along a flattened version of the tensor.
1351
+ # A sequence of axes is supported since version 1.9.0.
1352
+ # skipna : bool, optional, default True
1353
+ # Exclude NA/null values when computing the result.
1354
+ # out : Tensor, default None
1355
+ # Output tensor in which to place the result. It must
1356
+ # have the same shape and buffer length as the expected output,
1357
+ # but the type (of the output) will be cast if necessary.
1358
+ # overwrite_input : bool, default False
1359
+ # Just for compatibility with Numpy, would not take effect.
1360
+ # keepdims : bool, default False
1361
+ # If this is set to True, the axes which are reduced are left
1362
+ # in the result as dimensions with size one. With this option,
1363
+ # the result will broadcast correctly against the original `arr`.
1364
+ #
1365
+ # Returns
1366
+ # -------
1367
+ # median : scalar
1368
+ # Return the median of the values over the requested axis.
1369
+ #
1370
+ # See Also
1371
+ # --------
1372
+ # tensor.mean, tensor.percentile
1373
+ #
1374
+ # Notes
1375
+ # -----
1376
+ # Given a vector ``V`` of length ``N``, the median of ``V`` is the
1377
+ # middle value of a sorted copy of ``V``, ``V_sorted`` - i
1378
+ # e., ``V_sorted[(N-1)/2]``, when ``N`` is odd, and the average of the
1379
+ # two middle values of ``V_sorted`` when ``N`` is even.
1380
+ #
1381
+ # Examples
1382
+ # --------
1383
+ # >>> import maxframe.dataframe as md
1384
+ # >>> a = md.Series([10, 7, 4, 3, 2, 1])
1385
+ # >>> a.median().execute()
1386
+ # 2.0
1387
+ # >>> mt.median(a).execute()
1388
+ # 3.5
1389
+ # >>> a = md.Series([10, 7, 4, None, 2, 1])
1390
+ # >>> a.median().execute()
1391
+ # 4.0
1392
+ # >>> a.median(skipna=False).execute()
1393
+ # nan
1394
+ # """
1395
+ # if skipna:
1396
+ # return statistics.median(
1397
+ # self.dropna(),
1398
+ # axis=None,
1399
+ # out=None,
1400
+ # overwrite_input=False,
1401
+ # keepdims=False,
1402
+ # )
1403
+ # else:
1404
+ # return statistics.median(
1405
+ # self, axis=None, out=None, overwrite_input=False, keepdims=False
1406
+ # )
1407
+
1408
+
1409
+ class BaseDataFrameData(HasShapeTileableData, _ToPandasMixin):
1410
+ __slots__ = "_accessors", "_dtypes_value", "_dtypes_dict"
1411
+
1412
+ # optional fields
1413
+ _dtypes = SeriesField("dtypes")
1414
+ _index_value = ReferenceField(
1415
+ "index_value", IndexValue, on_deserialize=_on_deserialize_index_value
1416
+ )
1417
+ _columns_value = ReferenceField("columns_value", IndexValue)
1418
+
1419
+ def __init__(
1420
+ self,
1421
+ op=None,
1422
+ shape=None,
1423
+ nsplits=None,
1424
+ dtypes=None,
1425
+ index_value=None,
1426
+ columns_value=None,
1427
+ **kw,
1428
+ ):
1429
+ super().__init__(
1430
+ _op=op,
1431
+ _shape=shape,
1432
+ _nsplits=nsplits,
1433
+ _dtypes=dtypes,
1434
+ _index_value=index_value,
1435
+ _columns_value=columns_value,
1436
+ **kw,
1437
+ )
1438
+ self._accessors = dict()
1439
+ self._dtypes_value = None
1440
+ self._dtypes_dict = None
1441
+
1442
+ def __on_deserialize__(self):
1443
+ super().__on_deserialize__()
1444
+ self._accessors = dict()
1445
+ self._dtypes_value = None
1446
+ self._dtypes_dict = None
1447
+
1448
+ def _get_params(self) -> Dict[str, Any]:
1449
+ # params return the properties which useful to rebuild a new tileable object
1450
+ return {
1451
+ "shape": self.shape,
1452
+ "dtypes": self.dtypes,
1453
+ "index_value": self.index_value,
1454
+ "columns_value": getattr(self, "columns_value", None),
1455
+ "dtypes_value": getattr(self, "dtypes_value", None),
1456
+ }
1457
+
1458
+ def _set_params(self, new_params: Dict[str, Any]):
1459
+ params = new_params.copy()
1460
+ new_shape = params.pop("shape", None)
1461
+ if new_shape is not None:
1462
+ self._shape = new_shape
1463
+ index_value = params.pop("index_value", None)
1464
+ if index_value is not None:
1465
+ self._index_value = index_value
1466
+ dtypes = params.pop("dtypes", None)
1467
+ if dtypes is not None:
1468
+ self._dtypes = dtypes
1469
+ columns_value = params.pop("columns_value", None)
1470
+ if columns_value is not None:
1471
+ self._columns_value = columns_value
1472
+ dtypes_value = params.pop("dtypes_value", None)
1473
+ if dtypes_value is not None:
1474
+ if dtypes is None:
1475
+ self._dtypes = dtypes_value.value
1476
+ if columns_value is None:
1477
+ self._columns_value = parse_index(self._dtypes.index, store_data=True)
1478
+ self._dtypes_value = dtypes_value
1479
+ if params: # pragma: no cover
1480
+ raise TypeError(f"Unknown params: {list(params)}")
1481
+
1482
+ params = property(_get_params, _set_params)
1483
+
1484
+ def refresh_params(self):
1485
+ # refresh params when chunks updated
1486
+ refresh_tileable_shape(self)
1487
+ fill_chunk_slices(self)
1488
+ # refresh_index_value(self)
1489
+ # refresh_dtypes(self)
1490
+
1491
+ def refresh_from_dtypes(self, dtypes: pd.Series) -> None:
1492
+ self._dtypes = dtypes
1493
+ self._columns_value = parse_index(dtypes.index, store_data=True)
1494
+ self._dtypes_value = DtypesValue(key=tokenize(dtypes), value=dtypes)
1495
+ new_shape = list(self._shape)
1496
+ new_shape[-1] = len(dtypes)
1497
+ self._shape = tuple(new_shape)
1498
+
1499
+ def refresh_from_table_meta(self, table_meta: DataFrameTableMeta) -> None:
1500
+ self.refresh_from_dtypes(table_meta.pd_column_dtypes)
1501
+
1502
+ @property
1503
+ def dtypes(self):
1504
+ dt = getattr(self, "_dtypes", None)
1505
+ if dt is not None:
1506
+ return dt
1507
+ return getattr(self.op, "dtypes", None)
1508
+
1509
+ @property
1510
+ def dtypes_value(self):
1511
+ if self._dtypes_value is not None:
1512
+ return self._dtypes_value
1513
+ # TODO(qinxuye): when creating Dataframe,
1514
+ # dtypes_value instead of dtypes later must be passed into
1515
+ dtypes = self.dtypes
1516
+ if dtypes is not None:
1517
+ self._dtypes_value = DtypesValue(key=tokenize(dtypes), value=dtypes)
1518
+ return self._dtypes_value
1519
+
1520
+ @property
1521
+ def index_value(self):
1522
+ return self._index_value
1523
+
1524
+ @property
1525
+ def columns_value(self):
1526
+ return self._columns_value
1527
+
1528
+ @property
1529
+ def empty(self):
1530
+ shape = getattr(self, "_shape")
1531
+ if np.any(np.isnan(shape)):
1532
+ raise ValueError("Tileable object must be executed first")
1533
+ return 0 in shape
1534
+
1535
+ def to_tensor(self, dtype=None):
1536
+ from ..tensor.datasource.from_dataframe import from_dataframe
1537
+
1538
+ return from_dataframe(self, dtype=dtype)
1539
+
1540
+ @property
1541
+ def index(self):
1542
+ from .datasource.index import from_tileable
1543
+
1544
+ return from_tileable(self)
1545
+
1546
+ @property
1547
+ def columns(self):
1548
+ from .datasource.index import from_pandas as from_pandas_index
1549
+
1550
+ return from_pandas_index(self.dtypes.index, store_data=True)
1551
+
1552
+ @property
1553
+ def axes(self):
1554
+ return [self.index, self.columns]
1555
+
1556
+ def _get_dtypes_dict(self):
1557
+ if self._dtypes_dict is None:
1558
+ self._dtypes_dict = d = dict()
1559
+ for k, v in self.dtypes.items():
1560
+ try:
1561
+ obj_list = d[k]
1562
+ except KeyError:
1563
+ obj_list = d[k] = []
1564
+ obj_list.append(v)
1565
+ return self._dtypes_dict
1566
+
1567
+ def _get_dtypes_by_columns(self, columns: list):
1568
+ dtypes_dict = self._get_dtypes_dict()
1569
+ return functools.reduce(operator.add, (dtypes_dict[c] for c in columns), [])
1570
+
1571
+ def _get_columns_by_columns(self, columns: list):
1572
+ dtypes_dict = self._get_dtypes_dict()
1573
+ return functools.reduce(
1574
+ operator.add, ([c] * len(dtypes_dict[c]) for c in columns), []
1575
+ )
1576
+
1577
+
1578
+ class DataFrameData(_BatchedFetcher, BaseDataFrameData):
1579
+ type_name = "DataFrame"
1580
+
1581
+ def _to_str(self, representation=False):
1582
+ if is_build_mode() or len(self._executed_sessions) == 0:
1583
+ # in build mode, or not executed, just return representation
1584
+ if representation:
1585
+ return (
1586
+ f"{self.type_name} <op={type(self._op).__name__}, key={self.key}>"
1587
+ )
1588
+ else:
1589
+ return f"{self.type_name}(op={type(self._op).__name__})"
1590
+ else:
1591
+ corner_data = fetch_corner_data(self, session=self._executed_sessions[-1])
1592
+
1593
+ buf = StringIO()
1594
+ max_rows = pd.get_option("display.max_rows")
1595
+
1596
+ if self.shape[0] <= max_rows or corner_data.shape[0] == 0:
1597
+ buf.write(repr(corner_data) if representation else str(corner_data))
1598
+ else:
1599
+ # remember we cannot directly call repr(df),
1600
+ # because the [... rows x ... columns] may show wrong rows
1601
+ with pd.option_context(
1602
+ "display.show_dimensions",
1603
+ False,
1604
+ "display.max_rows",
1605
+ corner_data.shape[0] - 1,
1606
+ ):
1607
+ if representation:
1608
+ s = repr(corner_data)
1609
+ else:
1610
+ s = str(corner_data)
1611
+ buf.write(s)
1612
+ if pd.get_option("display.show_dimensions"):
1613
+ n_rows, n_cols = self.shape
1614
+ buf.write(f"\n\n[{n_rows} rows x {n_cols} columns]")
1615
+
1616
+ return buf.getvalue()
1617
+
1618
+ def __str__(self):
1619
+ return self._to_str(representation=False)
1620
+
1621
+ def __repr__(self):
1622
+ return self._to_str(representation=True)
1623
+
1624
+ def __maxframe_tensor__(self, dtype=None, order="K"):
1625
+ return self.to_tensor().astype(dtype=dtype, order=order, copy=False)
1626
+
1627
+ def _repr_html_(self):
1628
+ if len(self._executed_sessions) == 0:
1629
+ # not executed before, fall back to normal repr
1630
+ raise NotImplementedError
1631
+
1632
+ corner_data = fetch_corner_data(self, session=self._executed_sessions[-1])
1633
+ if corner_data is None:
1634
+ return
1635
+
1636
+ buf = StringIO()
1637
+ max_rows = pd.get_option("display.max_rows")
1638
+ if self.shape[0] <= max_rows:
1639
+ buf.write(corner_data._repr_html_())
1640
+ else:
1641
+ with pd.option_context(
1642
+ "display.show_dimensions",
1643
+ False,
1644
+ "display.max_rows",
1645
+ corner_data.shape[0] - 1,
1646
+ ):
1647
+ buf.write(corner_data._repr_html_().rstrip().rstrip("</div>"))
1648
+ if pd.get_option("display.show_dimensions"):
1649
+ n_rows, n_cols = self.shape
1650
+ buf.write(f"<p>{n_rows} rows × {n_cols} columns</p>\n")
1651
+ buf.write("</div>")
1652
+
1653
+ return buf.getvalue()
1654
+
1655
+ def items(self):
1656
+ for col_name in self.dtypes.index:
1657
+ yield col_name, self[col_name]
1658
+
1659
+ iteritems = items
1660
+
1661
+ def iterrows(self, batch_size=1000, session=None):
1662
+ for batch_data in self.iterbatch(batch_size=batch_size, session=session):
1663
+ yield from getattr(batch_data, "iterrows")()
1664
+
1665
+ def itertuples(self, index=True, name="Pandas", batch_size=1000, session=None):
1666
+ for batch_data in self.iterbatch(batch_size=batch_size, session=session):
1667
+ yield from getattr(batch_data, "itertuples")(index=index, name=name)
1668
+
1669
+ def _need_execution(self):
1670
+ if self._dtypes is None:
1671
+ return True
1672
+ return False
1673
+
1674
+
1675
+ class DataFrame(HasShapeTileable, _ToPandasMixin):
1676
+ __slots__ = ("_cache",)
1677
+ _allow_data_type_ = (DataFrameData,)
1678
+ type_name = "DataFrame"
1679
+
1680
+ def __len__(self):
1681
+ return len(self._data)
1682
+
1683
+ def to_tensor(self):
1684
+ return self._data.to_tensor()
1685
+
1686
+ def __maxframe_tensor__(self, dtype=None, order="K"):
1687
+ return self._data.__maxframe_tensor__(dtype=dtype, order=order)
1688
+
1689
+ def __getattr__(self, key):
1690
+ try:
1691
+ return getattr(self._data, key)
1692
+ except AttributeError:
1693
+ if key in self.dtypes:
1694
+ return self[key]
1695
+ else:
1696
+ raise
1697
+
1698
+ def __dir__(self):
1699
+ result = list(super().__dir__())
1700
+ return sorted(
1701
+ result
1702
+ + [k for k in self.dtypes.index if isinstance(k, str) and k.isidentifier()]
1703
+ )
1704
+
1705
+ def __iter__(self):
1706
+ # prevent being called by pandas to make sure `__eq__` works
1707
+ prevent_called_from_pandas()
1708
+ return iter(self.dtypes.index)
1709
+
1710
+ def __class_getitem__(cls, item):
1711
+ return DataFrameType.from_getitem_args(item)
1712
+
1713
+ @property
1714
+ def T(self):
1715
+ return self.transpose()
1716
+
1717
+ @property
1718
+ def ndim(self):
1719
+ """
1720
+ Return an int representing the number of axes / array dimensions.
1721
+
1722
+ Return 1 if Series. Otherwise return 2 if DataFrame.
1723
+
1724
+ See Also
1725
+ --------
1726
+ ndarray.ndim : Number of array dimensions.
1727
+
1728
+ Examples
1729
+ --------
1730
+ >>> import maxframe.dataframe as md
1731
+ >>> s = md.Series({'a': 1, 'b': 2, 'c': 3})
1732
+ >>> s.ndim
1733
+ 1
1734
+
1735
+ >>> df = md.DataFrame({'col1': [1, 2], 'col2': [3, 4]})
1736
+ >>> df.ndim
1737
+ 2
1738
+ """
1739
+ return super().ndim
1740
+
1741
+ @property
1742
+ def index(self):
1743
+ idx = self._data.index
1744
+ idx._set_df_or_series(self, 0)
1745
+ return idx
1746
+
1747
+ @index.setter
1748
+ def index(self, new_index):
1749
+ self.set_axis(new_index, axis=0, inplace=True)
1750
+
1751
+ @property
1752
+ def columns(self):
1753
+ col = self._data.columns
1754
+ col._set_df_or_series(self, 1)
1755
+ return col
1756
+
1757
+ @columns.setter
1758
+ def columns(self, new_columns):
1759
+ self.set_axis(new_columns, axis=1, inplace=True)
1760
+
1761
+ def keys(self):
1762
+ """
1763
+ Get the 'info axis' (see Indexing for more).
1764
+
1765
+ This is index for Series, columns for DataFrame.
1766
+
1767
+ Returns
1768
+ -------
1769
+ Index
1770
+ Info axis.
1771
+ """
1772
+ return self.columns
1773
+
1774
+ @property
1775
+ def values(self):
1776
+ return self.to_tensor()
1777
+
1778
+ @property
1779
+ def dtypes(self):
1780
+ """
1781
+ Return the dtypes in the DataFrame.
1782
+
1783
+ This returns a Series with the data type of each column.
1784
+ The result's index is the original DataFrame's columns. Columns
1785
+ with mixed types are stored with the ``object`` dtype. See
1786
+ :ref:`the User Guide <basics.dtypes>` for more.
1787
+
1788
+ Returns
1789
+ -------
1790
+ pandas.Series
1791
+ The data type of each column.
1792
+
1793
+ Examples
1794
+ --------
1795
+ >>> import maxframe.dataframe as md
1796
+ >>> df = md.DataFrame({'float': [1.0],
1797
+ ... 'int': [1],
1798
+ ... 'datetime': [md.Timestamp('20180310')],
1799
+ ... 'string': ['foo']})
1800
+ >>> df.dtypes
1801
+ float float64
1802
+ int int64
1803
+ datetime datetime64[ns]
1804
+ string object
1805
+ dtype: object
1806
+ """
1807
+ return self._data.dtypes
1808
+
1809
+ def iterrows(self, batch_size=1000, session=None):
1810
+ """
1811
+ Iterate over DataFrame rows as (index, Series) pairs.
1812
+
1813
+ Yields
1814
+ ------
1815
+ index : label or tuple of label
1816
+ The index of the row. A tuple for a `MultiIndex`.
1817
+ data : Series
1818
+ The data of the row as a Series.
1819
+
1820
+ it : generator
1821
+ A generator that iterates over the rows of the frame.
1822
+
1823
+ See Also
1824
+ --------
1825
+ DataFrame.itertuples : Iterate over DataFrame rows as namedtuples of the values.
1826
+ DataFrame.items : Iterate over (column name, Series) pairs.
1827
+
1828
+ Notes
1829
+ -----
1830
+
1831
+ 1. Because ``iterrows`` returns a Series for each row,
1832
+ it does **not** preserve dtypes across the rows (dtypes are
1833
+ preserved across columns for DataFrames). For example,
1834
+
1835
+ >>> import maxframe.dataframe as md
1836
+ >>> df = md.DataFrame([[1, 1.5]], columns=['int', 'float'])
1837
+ >>> row = next(df.iterrows())[1]
1838
+ >>> row
1839
+ int 1.0
1840
+ float 1.5
1841
+ Name: 0, dtype: float64
1842
+ >>> print(row['int'].dtype)
1843
+ float64
1844
+ >>> print(df['int'].dtype)
1845
+ int64
1846
+
1847
+ To preserve dtypes while iterating over the rows, it is better
1848
+ to use :meth:`itertuples` which returns namedtuples of the values
1849
+ and which is generally faster than ``iterrows``.
1850
+
1851
+ 2. You should **never modify** something you are iterating over.
1852
+ This is not guaranteed to work in all cases. Depending on the
1853
+ data types, the iterator returns a copy and not a view, and writing
1854
+ to it will have no effect.
1855
+ """
1856
+ return self._data.iterrows(batch_size=batch_size, session=session)
1857
+
1858
+ def itertuples(self, index=True, name="Pandas", batch_size=1000, session=None):
1859
+ """
1860
+ Iterate over DataFrame rows as namedtuples.
1861
+
1862
+ Parameters
1863
+ ----------
1864
+ index : bool, default True
1865
+ If True, return the index as the first element of the tuple.
1866
+ name : str or None, default "Pandas"
1867
+ The name of the returned namedtuples or None to return regular
1868
+ tuples.
1869
+
1870
+ Returns
1871
+ -------
1872
+ iterator
1873
+ An object to iterate over namedtuples for each row in the
1874
+ DataFrame with the first field possibly being the index and
1875
+ following fields being the column values.
1876
+
1877
+ See Also
1878
+ --------
1879
+ DataFrame.iterrows : Iterate over DataFrame rows as (index, Series)
1880
+ pairs.
1881
+ DataFrame.items : Iterate over (column name, Series) pairs.
1882
+
1883
+ Notes
1884
+ -----
1885
+ The column names will be renamed to positional names if they are
1886
+ invalid Python identifiers, repeated, or start with an underscore.
1887
+ On python versions < 3.7 regular tuples are returned for DataFrames
1888
+ with a large number of columns (>254).
1889
+
1890
+ Examples
1891
+ --------
1892
+ >>> import maxframe.dataframe as md
1893
+ >>> df = md.DataFrame({'num_legs': [4, 2], 'num_wings': [0, 2]},
1894
+ ... index=['dog', 'hawk'])
1895
+ >>> df.execute()
1896
+ num_legs num_wings
1897
+ dog 4 0
1898
+ hawk 2 2
1899
+ >>> for row in df.itertuples():
1900
+ ... print(row)
1901
+ ...
1902
+ Pandas(Index='dog', num_legs=4, num_wings=0)
1903
+ Pandas(Index='hawk', num_legs=2, num_wings=2)
1904
+
1905
+ By setting the `index` parameter to False we can remove the index
1906
+ as the first element of the tuple:
1907
+
1908
+ >>> for row in df.itertuples(index=False):
1909
+ ... print(row)
1910
+ ...
1911
+ Pandas(num_legs=4, num_wings=0)
1912
+ Pandas(num_legs=2, num_wings=2)
1913
+
1914
+ With the `name` parameter set we set a custom name for the yielded
1915
+ namedtuples:
1916
+
1917
+ >>> for row in df.itertuples(name='Animal'):
1918
+ ... print(row)
1919
+ ...
1920
+ Animal(Index='dog', num_legs=4, num_wings=0)
1921
+ Animal(Index='hawk', num_legs=2, num_wings=2)
1922
+ """
1923
+ return self._data.itertuples(
1924
+ batch_size=batch_size, session=session, index=index, name=name
1925
+ )
1926
+
1927
+ def assign(self, **kwargs):
1928
+ """
1929
+ Assign new columns to a DataFrame.
1930
+ Returns a new object with all original columns in addition to new ones.
1931
+ Existing columns that are re-assigned will be overwritten.
1932
+
1933
+ Parameters
1934
+ ----------
1935
+ **kwargs : dict of {str: callable or Series}
1936
+ The column names are keywords. If the values are
1937
+ callable, they are computed on the DataFrame and
1938
+ assigned to the new columns. The callable must not
1939
+ change input DataFrame (though pandas doesn't check it).
1940
+ If the values are not callable, (e.g. a Series, scalar, or array),
1941
+ they are simply assigned.
1942
+
1943
+ Returns
1944
+ -------
1945
+ DataFrame
1946
+ A new DataFrame with the new columns in addition to
1947
+ all the existing columns.
1948
+
1949
+ Notes
1950
+ -----
1951
+ Assigning multiple columns within the same ``assign`` is possible.
1952
+ Later items in 'kwargs' may refer to newly created or modified
1953
+ columns in 'df'; items are computed and assigned into 'df' in order.
1954
+
1955
+ Examples
1956
+ --------
1957
+ >>> import maxframe.dataframe as md
1958
+ >>> df = md.DataFrame({'temp_c': [17.0, 25.0]},
1959
+ ... index=['Portland', 'Berkeley'])
1960
+ >>> df.execute()
1961
+ temp_c
1962
+ Portland 17.0
1963
+ Berkeley 25.0
1964
+
1965
+ Where the value is a callable, evaluated on `df`:
1966
+
1967
+ >>> df.assign(temp_f=lambda x: x.temp_c * 9 / 5 + 32).execute()
1968
+ temp_c temp_f
1969
+ Portland 17.0 62.6
1970
+ Berkeley 25.0 77.0
1971
+
1972
+ Alternatively, the same behavior can be achieved by directly
1973
+ referencing an existing Series or sequence:
1974
+
1975
+ >>> df.assign(temp_f=df['temp_c'] * 9 / 5 + 32).execute()
1976
+ temp_c temp_f
1977
+ Portland 17.0 62.6
1978
+ Berkeley 25.0 77.0
1979
+
1980
+ You can create multiple columns within the same assign where one
1981
+ of the columns depends on another one defined within the same assign:
1982
+
1983
+ >>> df.assign(temp_f=lambda x: x['temp_c'] * 9 / 5 + 32,
1984
+ ... temp_k=lambda x: (x['temp_f'] + 459.67) * 5 / 9).execute()
1985
+ temp_c temp_f temp_k
1986
+ Portland 17.0 62.6 290.15
1987
+ Berkeley 25.0 77.0 298.15
1988
+ """
1989
+
1990
+ data = self.copy()
1991
+
1992
+ for k, v in kwargs.items():
1993
+ data[k] = apply_if_callable(v, data)
1994
+ return data
1995
+
1996
+
1997
+ class DataFrameGroupByData(BaseDataFrameData):
1998
+ type_name = "DataFrameGroupBy"
1999
+
2000
+ _key_dtypes = SeriesField("key_dtypes")
2001
+ _selection = AnyField("selection")
2002
+
2003
+ @property
2004
+ def key_dtypes(self):
2005
+ return self._key_dtypes
2006
+
2007
+ @property
2008
+ def selection(self):
2009
+ return self._selection
2010
+
2011
+ def _get_params(self) -> Dict[str, Any]:
2012
+ p = super()._get_params()
2013
+ p.update(dict(key_dtypes=self.key_dtypes, selection=self.selection))
2014
+ return p
2015
+
2016
+ def _set_params(self, new_params: Dict[str, Any]):
2017
+ params = new_params.copy()
2018
+ key_dtypes = params.pop("key_dtypes", None)
2019
+ if key_dtypes is not None:
2020
+ self._key_dtypes = key_dtypes
2021
+ selection = params.pop("selection", None)
2022
+ if selection is not None:
2023
+ self._selection = selection
2024
+ super()._set_params(params)
2025
+
2026
+ params = property(_get_params, _set_params)
2027
+
2028
+ def __init__(self, key_dtypes=None, selection=None, **kw):
2029
+ super().__init__(_key_dtypes=key_dtypes, _selection=selection, **kw)
2030
+
2031
+ def _equal(self, o):
2032
+ # FIXME We need to implemented a true `==` operator for DataFrameGroupby
2033
+ if is_build_mode():
2034
+ return self is o
2035
+ else:
2036
+ return self == o
2037
+
2038
+
2039
+ class SeriesGroupByData(BaseSeriesData):
2040
+ type_name = "SeriesGroupBy"
2041
+
2042
+ _key_dtypes = AnyField("key_dtypes")
2043
+
2044
+ @property
2045
+ def key_dtypes(self):
2046
+ return self._key_dtypes
2047
+
2048
+ def _get_params(self) -> Dict[str, Any]:
2049
+ p = super()._get_params()
2050
+ p["key_dtypes"] = self.key_dtypes
2051
+ return p
2052
+
2053
+ def _set_params(self, new_params: Dict[str, Any]):
2054
+ params = new_params.copy()
2055
+ key_dtypes = params.pop("key_dtypes", None)
2056
+ if key_dtypes is not None:
2057
+ self._key_dtypes = key_dtypes
2058
+ super()._set_params(params)
2059
+
2060
+ params = property(_get_params, _set_params)
2061
+
2062
+ def __init__(self, key_dtypes=None, **kw):
2063
+ super().__init__(_key_dtypes=key_dtypes, **kw)
2064
+
2065
+ def _equal(self, o):
2066
+ # FIXME We need to implemented a true `==` operator for DataFrameGroupby
2067
+ if is_build_mode():
2068
+ return self is o
2069
+ else:
2070
+ return self == o
2071
+
2072
+
2073
+ class GroupBy(Tileable, _ToPandasMixin):
2074
+ __slots__ = ()
2075
+
2076
+
2077
+ class DataFrameGroupBy(GroupBy):
2078
+ __slots__ = ()
2079
+ _allow_data_type_ = (DataFrameGroupByData,)
2080
+ type_name = "DataFrameGroupBy"
2081
+
2082
+ def __eq__(self, other):
2083
+ return self._equal(other)
2084
+
2085
+ def __hash__(self):
2086
+ # NB: we have customized __eq__ explicitly, thus we need define __hash__ explicitly as well.
2087
+ return super().__hash__()
2088
+
2089
+ def __getattr__(self, item):
2090
+ try:
2091
+ return super().__getattr__(item)
2092
+ except AttributeError:
2093
+ if item in self.dtypes:
2094
+ return self[item]
2095
+ else:
2096
+ raise
2097
+
2098
+ def __dir__(self):
2099
+ result = list(super().__dir__())
2100
+ return sorted(
2101
+ result
2102
+ + [k for k in self.dtypes.index if isinstance(k, str) and k.isidentifier()]
2103
+ )
2104
+
2105
+
2106
+ class SeriesGroupBy(GroupBy):
2107
+ __slots__ = ()
2108
+ _allow_data_type_ = (SeriesGroupByData,)
2109
+ type_name = "SeriesGroupBy"
2110
+
2111
+ def __eq__(self, other):
2112
+ return self._equal(other)
2113
+
2114
+ def __hash__(self):
2115
+ # NB: we have customized __eq__ explicitly, thus we need define __hash__ explicitly as well.
2116
+ return super().__hash__()
2117
+
2118
+
2119
+ class CategoricalData(HasShapeTileableData, _ToPandasMixin):
2120
+ __slots__ = ("_cache",)
2121
+ type_name = "Categorical"
2122
+
2123
+ # optional field
2124
+ _dtype = DataTypeField("dtype")
2125
+ _categories_value = ReferenceField(
2126
+ "categories_value", IndexValue, on_deserialize=_on_deserialize_index_value
2127
+ )
2128
+
2129
+ def __init__(
2130
+ self,
2131
+ op=None,
2132
+ shape=None,
2133
+ nsplits=None,
2134
+ dtype=None,
2135
+ categories_value=None,
2136
+ **kw,
2137
+ ):
2138
+ super().__init__(
2139
+ _op=op,
2140
+ _shape=shape,
2141
+ _nsplits=nsplits,
2142
+ _dtype=dtype,
2143
+ _categories_value=categories_value,
2144
+ **kw,
2145
+ )
2146
+
2147
+ @property
2148
+ def params(self) -> Dict[str, Any]:
2149
+ # params return the properties which useful to rebuild a new tileable object
2150
+ return {
2151
+ "shape": self.shape,
2152
+ "dtype": self.dtype,
2153
+ "categories_value": self.categories_value,
2154
+ }
2155
+
2156
+ @params.setter
2157
+ def params(self, new_params: Dict[str, Any]):
2158
+ params = new_params.copy()
2159
+ new_shape = params.pop("shape", None)
2160
+ if new_shape is not None:
2161
+ self._shape = new_shape
2162
+ dtype = params.pop("dtype", None)
2163
+ if dtype is not None:
2164
+ self._dtype = dtype
2165
+ categories_value = params.pop("categories_value", None)
2166
+ if categories_value is not None:
2167
+ self._categories_value = categories_value
2168
+ if params: # pragma: no cover
2169
+ raise TypeError(f"Unknown params: {list(params)}")
2170
+
2171
+ def refresh_params(self):
2172
+ # refresh params when chunks updated
2173
+ refresh_tileable_shape(self)
2174
+ fill_chunk_slices(self)
2175
+ if self._dtype is None:
2176
+ self._dtype = self.chunks[0].dtype
2177
+ if self._categories_value is None:
2178
+ categories = []
2179
+ for chunk in self.chunks:
2180
+ categories.extend(chunk.categories_value.to_pandas())
2181
+ self._categories_value = parse_index(
2182
+ pd.Categorical(categories).categories, store_data=True
2183
+ )
2184
+
2185
+ def refresh_from_table_meta(self, table_meta: DataFrameTableMeta) -> None:
2186
+ pass
2187
+
2188
+ def _to_str(self, representation=False):
2189
+ if is_build_mode() or len(self._executed_sessions) == 0:
2190
+ # in build mode, or not executed, just return representation
2191
+ if representation:
2192
+ return f"{self.type_name} <op={type(self.op).__name__}, key={self.key}>"
2193
+ else:
2194
+ return f"{self.type_name}(op={type(self.op).__name__})"
2195
+ else:
2196
+ data = self.fetch(session=self._executed_sessions[-1])
2197
+ return repr(data) if repr(data) else str(data)
2198
+
2199
+ def __str__(self):
2200
+ return self._to_str(representation=False)
2201
+
2202
+ def __repr__(self):
2203
+ return self._to_str(representation=True)
2204
+
2205
+ def _equal(self, o):
2206
+ # FIXME We need to implemented a true `==` operator for DataFrameGroupby
2207
+ if is_build_mode():
2208
+ return self is o
2209
+ else: # pragma: no cover
2210
+ return self == o
2211
+
2212
+ @property
2213
+ def dtype(self):
2214
+ return getattr(self, "_dtype", None) or self.op.dtype
2215
+
2216
+ @property
2217
+ def categories_value(self):
2218
+ return self._categories_value
2219
+
2220
+ def __eq__(self, other):
2221
+ return self._equal(other)
2222
+
2223
+ def __hash__(self):
2224
+ # NB: we have customized __eq__ explicitly, thus we need define __hash__ explicitly as well.
2225
+ return super().__hash__()
2226
+
2227
+
2228
+ class Categorical(HasShapeTileable, _ToPandasMixin):
2229
+ __slots__ = ()
2230
+ _allow_data_type_ = (CategoricalData,)
2231
+ type_name = "Categorical"
2232
+
2233
+ def __len__(self):
2234
+ return len(self._data)
2235
+
2236
+ def __eq__(self, other):
2237
+ return self._equal(other)
2238
+
2239
+ def __hash__(self):
2240
+ # NB: we have customized __eq__ explicitly, thus we need define __hash__ explicitly as well.
2241
+ return super().__hash__()
2242
+
2243
+
2244
+ class DataFrameOrSeriesData(HasShapeTileableData, _ToPandasMixin):
2245
+ __slots__ = ()
2246
+
2247
+ _data_type = StringField("data_type")
2248
+ _data_params = DictField("data_params")
2249
+
2250
+ def __init__(
2251
+ self,
2252
+ op=None,
2253
+ data_type=None,
2254
+ data_params=None,
2255
+ **kw,
2256
+ ):
2257
+ self._data_type = data_type
2258
+ self._data_params = data_params or dict()
2259
+ super().__init__(
2260
+ _op=op,
2261
+ **kw,
2262
+ )
2263
+
2264
+ def __getattr__(self, item):
2265
+ if item in self._data_params:
2266
+ return self._data_params[item]
2267
+ raise AttributeError(f"'{type(self)}' object has no attribute '{item}'")
2268
+
2269
+ @property
2270
+ def shape(self):
2271
+ return self._data_params.get("shape", None)
2272
+
2273
+ @property
2274
+ def nsplits(self):
2275
+ return self._data_params.get("nsplits", None)
2276
+
2277
+ @property
2278
+ def data_type(self):
2279
+ return self._data_type
2280
+
2281
+ @property
2282
+ def data_params(self):
2283
+ return self._data_params
2284
+
2285
+ @property
2286
+ def params(self) -> Dict[str, Any]:
2287
+ return {"data_type": self._data_type, "data_params": self._data_params}
2288
+
2289
+ @params.setter
2290
+ def params(self, new_params: Dict[str, Any]):
2291
+ # After execution, create DataFrameFetch, and the data
2292
+ # corresponding to the original key is still DataFrameOrSeries type,
2293
+ # so when restoring DataFrameOrSeries type,
2294
+ # there is no "data_type" field in params.
2295
+ if "data_type" not in new_params:
2296
+ if "dtype" in new_params:
2297
+ self._data_type = "series"
2298
+ else:
2299
+ self._data_type = "dataframe"
2300
+ self._data_params = new_params.copy()
2301
+ else:
2302
+ self._data_type = new_params.get("data_type")
2303
+ self._data_params = {
2304
+ k: v for k, v in new_params.get("data_params", {}).items()
2305
+ }
2306
+
2307
+ def refresh_params(self):
2308
+ index_to_index_values = dict()
2309
+ for chunk in self.chunks:
2310
+ if chunk.ndim == 1:
2311
+ index_to_index_values[chunk.index] = chunk.index_value
2312
+ elif chunk.index[1] == 0:
2313
+ index_to_index_values[chunk.index] = chunk.index_value
2314
+ index_value = merge_index_value(index_to_index_values, store_data=False)
2315
+ nsplits = calc_nsplits({c.index: c.shape for c in self.chunks})
2316
+ shape = tuple(sum(ns) for ns in nsplits)
2317
+
2318
+ data_params = dict()
2319
+ data_params["nsplits"] = nsplits
2320
+ data_params["shape"] = shape
2321
+ data_params["index_value"] = index_value
2322
+
2323
+ self._data_type = self._chunks[0]._data_type
2324
+ if self.data_type == "dataframe":
2325
+ all_dtypes = [c.dtypes_value.value for c in self.chunks if c.index[0] == 0]
2326
+ dtypes = pd.concat(all_dtypes)
2327
+ data_params["dtypes"] = dtypes
2328
+ columns_values = parse_index(dtypes.index, store_data=True)
2329
+ data_params["columns_value"] = columns_values
2330
+ data_params["dtypes_value"] = DtypesValue(
2331
+ key=tokenize(dtypes), value=dtypes
2332
+ )
2333
+ else:
2334
+ data_params["dtype"] = self.chunks[0].dtype
2335
+ data_params["name"] = self.chunks[0].name
2336
+ self._data_params.update(data_params)
2337
+
2338
+ def refresh_from_table_meta(self, table_meta: DataFrameTableMeta) -> None:
2339
+ pass
2340
+
2341
+ def ensure_data(self):
2342
+ from .fetch.core import DataFrameFetch
2343
+
2344
+ self.execute()
2345
+ default_sess = get_default_session()
2346
+ self._detach_session(default_sess._session)
2347
+
2348
+ fetch_tileable = default_sess._session._tileable_to_fetch[self]
2349
+ new = DataFrameFetch(
2350
+ output_types=[getattr(OutputType, self.data_type)]
2351
+ ).new_tileable(
2352
+ [],
2353
+ _key=self.key,
2354
+ chunks=fetch_tileable.chunks,
2355
+ nsplits=fetch_tileable.nsplits,
2356
+ **self.data_params,
2357
+ )
2358
+ new._attach_session(default_sess._session)
2359
+ return new
2360
+
2361
+
2362
+ class DataFrameOrSeries(HasShapeTileable, _ToPandasMixin):
2363
+ __slots__ = ()
2364
+ _allow_data_type_ = (DataFrameOrSeriesData,)
2365
+ type_name = "DataFrameOrSeries"
2366
+
2367
+
2368
+ INDEX_TYPE = (Index, IndexData)
2369
+ SERIES_TYPE = (Series, SeriesData)
2370
+ DATAFRAME_OR_SERIES_TYPE = (DataFrameOrSeries, DataFrameOrSeriesData)
2371
+ DATAFRAME_TYPE = (DataFrame, DataFrameData)
2372
+ DATAFRAME_GROUPBY_TYPE = (DataFrameGroupBy, DataFrameGroupByData)
2373
+ SERIES_GROUPBY_TYPE = (SeriesGroupBy, SeriesGroupByData)
2374
+ GROUPBY_TYPE = (GroupBy,) + DATAFRAME_GROUPBY_TYPE + SERIES_GROUPBY_TYPE
2375
+ CATEGORICAL_TYPE = (Categorical, CategoricalData)
2376
+ TILEABLE_TYPE = (
2377
+ INDEX_TYPE + SERIES_TYPE + DATAFRAME_TYPE + GROUPBY_TYPE + CATEGORICAL_TYPE
2378
+ )
2379
+
2380
+ register_output_types(OutputType.dataframe, DATAFRAME_TYPE)
2381
+ register_output_types(OutputType.series, SERIES_TYPE)
2382
+ register_output_types(OutputType.df_or_series, DATAFRAME_OR_SERIES_TYPE)
2383
+ register_output_types(OutputType.index, INDEX_TYPE)
2384
+ register_output_types(OutputType.categorical, CATEGORICAL_TYPE)
2385
+ register_output_types(OutputType.dataframe_groupby, DATAFRAME_GROUPBY_TYPE)
2386
+ register_output_types(OutputType.series_groupby, SERIES_GROUPBY_TYPE)