maxframe 2.2.0__cp312-cp312-macosx_10_9_universal2.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of maxframe might be problematic. Click here for more details.

Files changed (1094) hide show
  1. maxframe/__init__.py +33 -0
  2. maxframe/_utils.cpython-312-darwin.so +0 -0
  3. maxframe/_utils.pxd +33 -0
  4. maxframe/_utils.pyi +21 -0
  5. maxframe/_utils.pyx +561 -0
  6. maxframe/codegen/__init__.py +27 -0
  7. maxframe/codegen/core.py +596 -0
  8. maxframe/codegen/spe/__init__.py +16 -0
  9. maxframe/codegen/spe/core.py +307 -0
  10. maxframe/codegen/spe/dataframe/__init__.py +38 -0
  11. maxframe/codegen/spe/dataframe/accessors/__init__.py +15 -0
  12. maxframe/codegen/spe/dataframe/accessors/base.py +71 -0
  13. maxframe/codegen/spe/dataframe/accessors/dict_.py +89 -0
  14. maxframe/codegen/spe/dataframe/accessors/list_.py +44 -0
  15. maxframe/codegen/spe/dataframe/accessors/struct_.py +28 -0
  16. maxframe/codegen/spe/dataframe/arithmetic.py +89 -0
  17. maxframe/codegen/spe/dataframe/datasource.py +181 -0
  18. maxframe/codegen/spe/dataframe/datastore.py +204 -0
  19. maxframe/codegen/spe/dataframe/extensions.py +63 -0
  20. maxframe/codegen/spe/dataframe/fetch.py +26 -0
  21. maxframe/codegen/spe/dataframe/groupby.py +312 -0
  22. maxframe/codegen/spe/dataframe/indexing.py +333 -0
  23. maxframe/codegen/spe/dataframe/merge.py +106 -0
  24. maxframe/codegen/spe/dataframe/misc.py +262 -0
  25. maxframe/codegen/spe/dataframe/missing.py +64 -0
  26. maxframe/codegen/spe/dataframe/reduction.py +165 -0
  27. maxframe/codegen/spe/dataframe/reshape.py +46 -0
  28. maxframe/codegen/spe/dataframe/sort.py +96 -0
  29. maxframe/codegen/spe/dataframe/statistics.py +46 -0
  30. maxframe/codegen/spe/dataframe/tests/__init__.py +13 -0
  31. maxframe/codegen/spe/dataframe/tests/accessors/__init__.py +13 -0
  32. maxframe/codegen/spe/dataframe/tests/accessors/test_base.py +33 -0
  33. maxframe/codegen/spe/dataframe/tests/accessors/test_dict.py +304 -0
  34. maxframe/codegen/spe/dataframe/tests/accessors/test_list.py +134 -0
  35. maxframe/codegen/spe/dataframe/tests/accessors/test_struct.py +75 -0
  36. maxframe/codegen/spe/dataframe/tests/indexing/__init__.py +13 -0
  37. maxframe/codegen/spe/dataframe/tests/indexing/conftest.py +58 -0
  38. maxframe/codegen/spe/dataframe/tests/indexing/test_getitem.py +124 -0
  39. maxframe/codegen/spe/dataframe/tests/indexing/test_iloc.py +95 -0
  40. maxframe/codegen/spe/dataframe/tests/indexing/test_indexing.py +39 -0
  41. maxframe/codegen/spe/dataframe/tests/indexing/test_loc.py +35 -0
  42. maxframe/codegen/spe/dataframe/tests/indexing/test_rename.py +51 -0
  43. maxframe/codegen/spe/dataframe/tests/indexing/test_reset_index.py +88 -0
  44. maxframe/codegen/spe/dataframe/tests/indexing/test_sample.py +45 -0
  45. maxframe/codegen/spe/dataframe/tests/indexing/test_set_axis.py +45 -0
  46. maxframe/codegen/spe/dataframe/tests/indexing/test_set_index.py +41 -0
  47. maxframe/codegen/spe/dataframe/tests/indexing/test_setitem.py +46 -0
  48. maxframe/codegen/spe/dataframe/tests/misc/__init__.py +13 -0
  49. maxframe/codegen/spe/dataframe/tests/misc/test_apply.py +133 -0
  50. maxframe/codegen/spe/dataframe/tests/misc/test_drop_duplicates.py +92 -0
  51. maxframe/codegen/spe/dataframe/tests/misc/test_misc.py +202 -0
  52. maxframe/codegen/spe/dataframe/tests/missing/__init__.py +13 -0
  53. maxframe/codegen/spe/dataframe/tests/missing/test_checkna.py +94 -0
  54. maxframe/codegen/spe/dataframe/tests/missing/test_dropna.py +50 -0
  55. maxframe/codegen/spe/dataframe/tests/missing/test_fillna.py +94 -0
  56. maxframe/codegen/spe/dataframe/tests/missing/test_replace.py +45 -0
  57. maxframe/codegen/spe/dataframe/tests/test_arithmetic.py +73 -0
  58. maxframe/codegen/spe/dataframe/tests/test_datasource.py +184 -0
  59. maxframe/codegen/spe/dataframe/tests/test_datastore.py +200 -0
  60. maxframe/codegen/spe/dataframe/tests/test_extensions.py +88 -0
  61. maxframe/codegen/spe/dataframe/tests/test_groupby.py +288 -0
  62. maxframe/codegen/spe/dataframe/tests/test_merge.py +426 -0
  63. maxframe/codegen/spe/dataframe/tests/test_reduction.py +104 -0
  64. maxframe/codegen/spe/dataframe/tests/test_reshape.py +79 -0
  65. maxframe/codegen/spe/dataframe/tests/test_sort.py +179 -0
  66. maxframe/codegen/spe/dataframe/tests/test_statistics.py +70 -0
  67. maxframe/codegen/spe/dataframe/tests/test_tseries.py +29 -0
  68. maxframe/codegen/spe/dataframe/tests/test_value_counts.py +60 -0
  69. maxframe/codegen/spe/dataframe/tests/test_window.py +69 -0
  70. maxframe/codegen/spe/dataframe/tseries.py +46 -0
  71. maxframe/codegen/spe/dataframe/udf.py +62 -0
  72. maxframe/codegen/spe/dataframe/value_counts.py +31 -0
  73. maxframe/codegen/spe/dataframe/window.py +65 -0
  74. maxframe/codegen/spe/learn/__init__.py +15 -0
  75. maxframe/codegen/spe/learn/contrib/__init__.py +15 -0
  76. maxframe/codegen/spe/learn/contrib/lightgbm.py +160 -0
  77. maxframe/codegen/spe/learn/contrib/models.py +41 -0
  78. maxframe/codegen/spe/learn/contrib/pytorch.py +49 -0
  79. maxframe/codegen/spe/learn/contrib/tests/__init__.py +13 -0
  80. maxframe/codegen/spe/learn/contrib/tests/test_lightgbm.py +123 -0
  81. maxframe/codegen/spe/learn/contrib/tests/test_models.py +41 -0
  82. maxframe/codegen/spe/learn/contrib/tests/test_pytorch.py +53 -0
  83. maxframe/codegen/spe/learn/contrib/tests/test_xgboost.py +99 -0
  84. maxframe/codegen/spe/learn/contrib/xgboost.py +152 -0
  85. maxframe/codegen/spe/learn/metrics/__init__.py +15 -0
  86. maxframe/codegen/spe/learn/metrics/_classification.py +120 -0
  87. maxframe/codegen/spe/learn/metrics/_ranking.py +76 -0
  88. maxframe/codegen/spe/learn/metrics/pairwise.py +51 -0
  89. maxframe/codegen/spe/learn/metrics/tests/__init__.py +13 -0
  90. maxframe/codegen/spe/learn/metrics/tests/test_classification.py +93 -0
  91. maxframe/codegen/spe/learn/metrics/tests/test_pairwise.py +36 -0
  92. maxframe/codegen/spe/learn/metrics/tests/test_ranking.py +59 -0
  93. maxframe/codegen/spe/learn/model_selection/__init__.py +13 -0
  94. maxframe/codegen/spe/learn/model_selection/tests/__init__.py +13 -0
  95. maxframe/codegen/spe/learn/model_selection/tests/test_split.py +41 -0
  96. maxframe/codegen/spe/learn/preprocessing/__init__.py +15 -0
  97. maxframe/codegen/spe/learn/preprocessing/_data.py +37 -0
  98. maxframe/codegen/spe/learn/preprocessing/_label.py +47 -0
  99. maxframe/codegen/spe/learn/preprocessing/tests/__init__.py +13 -0
  100. maxframe/codegen/spe/learn/preprocessing/tests/test_data.py +31 -0
  101. maxframe/codegen/spe/learn/preprocessing/tests/test_label.py +43 -0
  102. maxframe/codegen/spe/learn/utils/__init__.py +15 -0
  103. maxframe/codegen/spe/learn/utils/checks.py +55 -0
  104. maxframe/codegen/spe/learn/utils/multiclass.py +60 -0
  105. maxframe/codegen/spe/learn/utils/shuffle.py +85 -0
  106. maxframe/codegen/spe/learn/utils/sparsefuncs.py +35 -0
  107. maxframe/codegen/spe/learn/utils/tests/__init__.py +13 -0
  108. maxframe/codegen/spe/learn/utils/tests/test_checks.py +48 -0
  109. maxframe/codegen/spe/learn/utils/tests/test_multiclass.py +52 -0
  110. maxframe/codegen/spe/learn/utils/tests/test_shuffle.py +50 -0
  111. maxframe/codegen/spe/learn/utils/tests/test_sparsefuncs.py +34 -0
  112. maxframe/codegen/spe/learn/utils/tests/test_validation.py +44 -0
  113. maxframe/codegen/spe/learn/utils/validation.py +35 -0
  114. maxframe/codegen/spe/objects.py +26 -0
  115. maxframe/codegen/spe/remote.py +29 -0
  116. maxframe/codegen/spe/tensor/__init__.py +31 -0
  117. maxframe/codegen/spe/tensor/arithmetic.py +95 -0
  118. maxframe/codegen/spe/tensor/core.py +41 -0
  119. maxframe/codegen/spe/tensor/datasource.py +165 -0
  120. maxframe/codegen/spe/tensor/extensions.py +35 -0
  121. maxframe/codegen/spe/tensor/fetch.py +26 -0
  122. maxframe/codegen/spe/tensor/fft.py +74 -0
  123. maxframe/codegen/spe/tensor/indexing.py +63 -0
  124. maxframe/codegen/spe/tensor/linalg.py +90 -0
  125. maxframe/codegen/spe/tensor/merge.py +31 -0
  126. maxframe/codegen/spe/tensor/misc.py +175 -0
  127. maxframe/codegen/spe/tensor/random.py +29 -0
  128. maxframe/codegen/spe/tensor/reduction.py +39 -0
  129. maxframe/codegen/spe/tensor/reshape.py +26 -0
  130. maxframe/codegen/spe/tensor/sort.py +42 -0
  131. maxframe/codegen/spe/tensor/spatial.py +45 -0
  132. maxframe/codegen/spe/tensor/special.py +35 -0
  133. maxframe/codegen/spe/tensor/statistics.py +68 -0
  134. maxframe/codegen/spe/tensor/tests/__init__.py +13 -0
  135. maxframe/codegen/spe/tensor/tests/test_arithmetic.py +103 -0
  136. maxframe/codegen/spe/tensor/tests/test_datasource.py +99 -0
  137. maxframe/codegen/spe/tensor/tests/test_extensions.py +37 -0
  138. maxframe/codegen/spe/tensor/tests/test_fft.py +64 -0
  139. maxframe/codegen/spe/tensor/tests/test_indexing.py +44 -0
  140. maxframe/codegen/spe/tensor/tests/test_linalg.py +52 -0
  141. maxframe/codegen/spe/tensor/tests/test_merge.py +28 -0
  142. maxframe/codegen/spe/tensor/tests/test_misc.py +144 -0
  143. maxframe/codegen/spe/tensor/tests/test_random.py +55 -0
  144. maxframe/codegen/spe/tensor/tests/test_reduction.py +65 -0
  145. maxframe/codegen/spe/tensor/tests/test_reshape.py +39 -0
  146. maxframe/codegen/spe/tensor/tests/test_sort.py +49 -0
  147. maxframe/codegen/spe/tensor/tests/test_spatial.py +33 -0
  148. maxframe/codegen/spe/tensor/tests/test_special.py +28 -0
  149. maxframe/codegen/spe/tensor/tests/test_statistics.py +43 -0
  150. maxframe/codegen/spe/tests/__init__.py +13 -0
  151. maxframe/codegen/spe/tests/test_remote.py +29 -0
  152. maxframe/codegen/spe/tests/test_spe_codegen.py +135 -0
  153. maxframe/codegen/spe/utils.py +56 -0
  154. maxframe/codegen/tests/__init__.py +13 -0
  155. maxframe/codegen/tests/test_codegen.py +67 -0
  156. maxframe/config/__init__.py +15 -0
  157. maxframe/config/config.py +627 -0
  158. maxframe/config/tests/__init__.py +13 -0
  159. maxframe/config/tests/test_config.py +114 -0
  160. maxframe/config/tests/test_validators.py +46 -0
  161. maxframe/config/validators.py +142 -0
  162. maxframe/conftest.py +251 -0
  163. maxframe/core/__init__.py +53 -0
  164. maxframe/core/accessor.py +45 -0
  165. maxframe/core/base.py +156 -0
  166. maxframe/core/context.py +110 -0
  167. maxframe/core/entity/__init__.py +34 -0
  168. maxframe/core/entity/core.py +150 -0
  169. maxframe/core/entity/executable.py +337 -0
  170. maxframe/core/entity/objects.py +115 -0
  171. maxframe/core/entity/output_types.py +98 -0
  172. maxframe/core/entity/tests/__init__.py +13 -0
  173. maxframe/core/entity/tests/test_objects.py +42 -0
  174. maxframe/core/entity/tileables.py +367 -0
  175. maxframe/core/entity/utils.py +39 -0
  176. maxframe/core/graph/__init__.py +22 -0
  177. maxframe/core/graph/builder/__init__.py +15 -0
  178. maxframe/core/graph/builder/base.py +91 -0
  179. maxframe/core/graph/builder/tileable.py +34 -0
  180. maxframe/core/graph/builder/utils.py +37 -0
  181. maxframe/core/graph/core.cpython-312-darwin.so +0 -0
  182. maxframe/core/graph/core.pyx +478 -0
  183. maxframe/core/graph/entity.py +158 -0
  184. maxframe/core/graph/tests/__init__.py +13 -0
  185. maxframe/core/graph/tests/test_graph.py +205 -0
  186. maxframe/core/mode.py +96 -0
  187. maxframe/core/operator/__init__.py +32 -0
  188. maxframe/core/operator/base.py +480 -0
  189. maxframe/core/operator/core.py +307 -0
  190. maxframe/core/operator/fetch.py +40 -0
  191. maxframe/core/operator/objects.py +43 -0
  192. maxframe/core/operator/shuffle.py +45 -0
  193. maxframe/core/operator/tests/__init__.py +13 -0
  194. maxframe/core/operator/tests/test_core.py +64 -0
  195. maxframe/core/operator/utils.py +68 -0
  196. maxframe/core/tests/__init__.py +13 -0
  197. maxframe/core/tests/test_mode.py +75 -0
  198. maxframe/dataframe/__init__.py +89 -0
  199. maxframe/dataframe/accessors/__init__.py +15 -0
  200. maxframe/dataframe/accessors/compat.py +45 -0
  201. maxframe/dataframe/accessors/datetime_/__init__.py +35 -0
  202. maxframe/dataframe/accessors/datetime_/accessor.py +67 -0
  203. maxframe/dataframe/accessors/datetime_/core.py +82 -0
  204. maxframe/dataframe/accessors/datetime_/tests/__init__.py +13 -0
  205. maxframe/dataframe/accessors/datetime_/tests/test_datetime_accessor.py +41 -0
  206. maxframe/dataframe/accessors/dict_/__init__.py +43 -0
  207. maxframe/dataframe/accessors/dict_/accessor.py +39 -0
  208. maxframe/dataframe/accessors/dict_/contains.py +72 -0
  209. maxframe/dataframe/accessors/dict_/core.py +48 -0
  210. maxframe/dataframe/accessors/dict_/getitem.py +140 -0
  211. maxframe/dataframe/accessors/dict_/length.py +64 -0
  212. maxframe/dataframe/accessors/dict_/remove.py +75 -0
  213. maxframe/dataframe/accessors/dict_/setitem.py +79 -0
  214. maxframe/dataframe/accessors/dict_/tests/__init__.py +13 -0
  215. maxframe/dataframe/accessors/dict_/tests/test_dict_accessor.py +168 -0
  216. maxframe/dataframe/accessors/list_/__init__.py +37 -0
  217. maxframe/dataframe/accessors/list_/accessor.py +39 -0
  218. maxframe/dataframe/accessors/list_/core.py +48 -0
  219. maxframe/dataframe/accessors/list_/getitem.py +128 -0
  220. maxframe/dataframe/accessors/list_/length.py +64 -0
  221. maxframe/dataframe/accessors/list_/tests/__init__.py +13 -0
  222. maxframe/dataframe/accessors/list_/tests/test_list_accessor.py +81 -0
  223. maxframe/dataframe/accessors/plotting/__init__.py +40 -0
  224. maxframe/dataframe/accessors/plotting/core.py +78 -0
  225. maxframe/dataframe/accessors/plotting/tests/__init__.py +13 -0
  226. maxframe/dataframe/accessors/plotting/tests/test_plotting_accessor.py +136 -0
  227. maxframe/dataframe/accessors/string_/__init__.py +36 -0
  228. maxframe/dataframe/accessors/string_/accessor.py +215 -0
  229. maxframe/dataframe/accessors/string_/core.py +224 -0
  230. maxframe/dataframe/accessors/string_/tests/__init__.py +13 -0
  231. maxframe/dataframe/accessors/string_/tests/test_string_accessor.py +73 -0
  232. maxframe/dataframe/accessors/struct_/__init__.py +37 -0
  233. maxframe/dataframe/accessors/struct_/accessor.py +39 -0
  234. maxframe/dataframe/accessors/struct_/core.py +43 -0
  235. maxframe/dataframe/accessors/struct_/dtypes.py +53 -0
  236. maxframe/dataframe/accessors/struct_/field.py +123 -0
  237. maxframe/dataframe/accessors/struct_/tests/__init__.py +13 -0
  238. maxframe/dataframe/accessors/struct_/tests/test_struct_accessor.py +91 -0
  239. maxframe/dataframe/arithmetic/__init__.py +369 -0
  240. maxframe/dataframe/arithmetic/abs.py +33 -0
  241. maxframe/dataframe/arithmetic/add.py +60 -0
  242. maxframe/dataframe/arithmetic/arccos.py +28 -0
  243. maxframe/dataframe/arithmetic/arccosh.py +28 -0
  244. maxframe/dataframe/arithmetic/arcsin.py +28 -0
  245. maxframe/dataframe/arithmetic/arcsinh.py +28 -0
  246. maxframe/dataframe/arithmetic/arctan.py +28 -0
  247. maxframe/dataframe/arithmetic/arctanh.py +28 -0
  248. maxframe/dataframe/arithmetic/between.py +106 -0
  249. maxframe/dataframe/arithmetic/bitwise_and.py +46 -0
  250. maxframe/dataframe/arithmetic/bitwise_or.py +50 -0
  251. maxframe/dataframe/arithmetic/bitwise_xor.py +46 -0
  252. maxframe/dataframe/arithmetic/ceil.py +28 -0
  253. maxframe/dataframe/arithmetic/core.py +361 -0
  254. maxframe/dataframe/arithmetic/cos.py +28 -0
  255. maxframe/dataframe/arithmetic/cosh.py +28 -0
  256. maxframe/dataframe/arithmetic/degrees.py +28 -0
  257. maxframe/dataframe/arithmetic/docstring.py +416 -0
  258. maxframe/dataframe/arithmetic/dot.py +237 -0
  259. maxframe/dataframe/arithmetic/equal.py +58 -0
  260. maxframe/dataframe/arithmetic/exp.py +28 -0
  261. maxframe/dataframe/arithmetic/exp2.py +28 -0
  262. maxframe/dataframe/arithmetic/expm1.py +28 -0
  263. maxframe/dataframe/arithmetic/floor.py +28 -0
  264. maxframe/dataframe/arithmetic/floordiv.py +64 -0
  265. maxframe/dataframe/arithmetic/greater.py +59 -0
  266. maxframe/dataframe/arithmetic/greater_equal.py +59 -0
  267. maxframe/dataframe/arithmetic/invert.py +33 -0
  268. maxframe/dataframe/arithmetic/is_ufuncs.py +62 -0
  269. maxframe/dataframe/arithmetic/less.py +57 -0
  270. maxframe/dataframe/arithmetic/less_equal.py +59 -0
  271. maxframe/dataframe/arithmetic/log.py +28 -0
  272. maxframe/dataframe/arithmetic/log10.py +28 -0
  273. maxframe/dataframe/arithmetic/log2.py +28 -0
  274. maxframe/dataframe/arithmetic/mod.py +60 -0
  275. maxframe/dataframe/arithmetic/multiply.py +60 -0
  276. maxframe/dataframe/arithmetic/negative.py +33 -0
  277. maxframe/dataframe/arithmetic/not_equal.py +58 -0
  278. maxframe/dataframe/arithmetic/power.py +68 -0
  279. maxframe/dataframe/arithmetic/radians.py +28 -0
  280. maxframe/dataframe/arithmetic/round.py +144 -0
  281. maxframe/dataframe/arithmetic/sin.py +28 -0
  282. maxframe/dataframe/arithmetic/sinh.py +28 -0
  283. maxframe/dataframe/arithmetic/sqrt.py +28 -0
  284. maxframe/dataframe/arithmetic/subtract.py +64 -0
  285. maxframe/dataframe/arithmetic/tan.py +28 -0
  286. maxframe/dataframe/arithmetic/tanh.py +28 -0
  287. maxframe/dataframe/arithmetic/tests/__init__.py +13 -0
  288. maxframe/dataframe/arithmetic/tests/test_arithmetic.py +724 -0
  289. maxframe/dataframe/arithmetic/truediv.py +64 -0
  290. maxframe/dataframe/arithmetic/trunc.py +28 -0
  291. maxframe/dataframe/arrays.py +864 -0
  292. maxframe/dataframe/core.py +2393 -0
  293. maxframe/dataframe/datasource/__init__.py +33 -0
  294. maxframe/dataframe/datasource/core.py +88 -0
  295. maxframe/dataframe/datasource/dataframe.py +59 -0
  296. maxframe/dataframe/datasource/date_range.py +512 -0
  297. maxframe/dataframe/datasource/from_dict.py +124 -0
  298. maxframe/dataframe/datasource/from_index.py +58 -0
  299. maxframe/dataframe/datasource/from_records.py +191 -0
  300. maxframe/dataframe/datasource/from_tensor.py +498 -0
  301. maxframe/dataframe/datasource/index.py +117 -0
  302. maxframe/dataframe/datasource/read_csv.py +533 -0
  303. maxframe/dataframe/datasource/read_odps_query.py +513 -0
  304. maxframe/dataframe/datasource/read_odps_table.py +273 -0
  305. maxframe/dataframe/datasource/read_parquet.py +426 -0
  306. maxframe/dataframe/datasource/series.py +55 -0
  307. maxframe/dataframe/datasource/tests/__init__.py +13 -0
  308. maxframe/dataframe/datasource/tests/test_datasource.py +663 -0
  309. maxframe/dataframe/datastore/__init__.py +30 -0
  310. maxframe/dataframe/datastore/core.py +19 -0
  311. maxframe/dataframe/datastore/tests/__init__.py +13 -0
  312. maxframe/dataframe/datastore/tests/test_to_odps.py +99 -0
  313. maxframe/dataframe/datastore/to_csv.py +219 -0
  314. maxframe/dataframe/datastore/to_odps.py +258 -0
  315. maxframe/dataframe/extensions/__init__.py +70 -0
  316. maxframe/dataframe/extensions/accessor.py +35 -0
  317. maxframe/dataframe/extensions/apply_chunk.py +733 -0
  318. maxframe/dataframe/extensions/cartesian_chunk.py +153 -0
  319. maxframe/dataframe/extensions/collect_kv.py +126 -0
  320. maxframe/dataframe/extensions/extract_kv.py +177 -0
  321. maxframe/dataframe/extensions/flatjson.py +132 -0
  322. maxframe/dataframe/extensions/flatmap.py +329 -0
  323. maxframe/dataframe/extensions/map_reduce.py +263 -0
  324. maxframe/dataframe/extensions/rebalance.py +62 -0
  325. maxframe/dataframe/extensions/reshuffle.py +83 -0
  326. maxframe/dataframe/extensions/tests/__init__.py +13 -0
  327. maxframe/dataframe/extensions/tests/test_apply_chunk.py +194 -0
  328. maxframe/dataframe/extensions/tests/test_extensions.py +198 -0
  329. maxframe/dataframe/extensions/tests/test_map_reduce.py +135 -0
  330. maxframe/dataframe/fetch/__init__.py +15 -0
  331. maxframe/dataframe/fetch/core.py +97 -0
  332. maxframe/dataframe/groupby/__init__.py +101 -0
  333. maxframe/dataframe/groupby/aggregation.py +437 -0
  334. maxframe/dataframe/groupby/apply.py +235 -0
  335. maxframe/dataframe/groupby/apply_chunk.py +409 -0
  336. maxframe/dataframe/groupby/core.py +326 -0
  337. maxframe/dataframe/groupby/cum.py +102 -0
  338. maxframe/dataframe/groupby/expanding.py +264 -0
  339. maxframe/dataframe/groupby/extensions.py +26 -0
  340. maxframe/dataframe/groupby/fill.py +149 -0
  341. maxframe/dataframe/groupby/getitem.py +105 -0
  342. maxframe/dataframe/groupby/head.py +115 -0
  343. maxframe/dataframe/groupby/rank.py +136 -0
  344. maxframe/dataframe/groupby/rolling.py +206 -0
  345. maxframe/dataframe/groupby/sample.py +214 -0
  346. maxframe/dataframe/groupby/shift.py +114 -0
  347. maxframe/dataframe/groupby/tests/__init__.py +13 -0
  348. maxframe/dataframe/groupby/tests/test_groupby.py +373 -0
  349. maxframe/dataframe/groupby/transform.py +264 -0
  350. maxframe/dataframe/indexing/__init__.py +103 -0
  351. maxframe/dataframe/indexing/add_prefix_suffix.py +110 -0
  352. maxframe/dataframe/indexing/align.py +350 -0
  353. maxframe/dataframe/indexing/at.py +83 -0
  354. maxframe/dataframe/indexing/droplevel.py +195 -0
  355. maxframe/dataframe/indexing/filter.py +169 -0
  356. maxframe/dataframe/indexing/get_level_values.py +76 -0
  357. maxframe/dataframe/indexing/getitem.py +205 -0
  358. maxframe/dataframe/indexing/iat.py +82 -0
  359. maxframe/dataframe/indexing/iloc.py +711 -0
  360. maxframe/dataframe/indexing/insert.py +90 -0
  361. maxframe/dataframe/indexing/loc.py +694 -0
  362. maxframe/dataframe/indexing/reindex.py +541 -0
  363. maxframe/dataframe/indexing/rename.py +445 -0
  364. maxframe/dataframe/indexing/rename_axis.py +217 -0
  365. maxframe/dataframe/indexing/reorder_levels.py +143 -0
  366. maxframe/dataframe/indexing/reset_index.py +427 -0
  367. maxframe/dataframe/indexing/sample.py +232 -0
  368. maxframe/dataframe/indexing/set_axis.py +197 -0
  369. maxframe/dataframe/indexing/set_index.py +128 -0
  370. maxframe/dataframe/indexing/setitem.py +133 -0
  371. maxframe/dataframe/indexing/swaplevel.py +185 -0
  372. maxframe/dataframe/indexing/take.py +99 -0
  373. maxframe/dataframe/indexing/tests/__init__.py +13 -0
  374. maxframe/dataframe/indexing/tests/test_indexing.py +488 -0
  375. maxframe/dataframe/indexing/truncate.py +140 -0
  376. maxframe/dataframe/indexing/where.py +300 -0
  377. maxframe/dataframe/indexing/xs.py +148 -0
  378. maxframe/dataframe/initializer.py +298 -0
  379. maxframe/dataframe/merge/__init__.py +50 -0
  380. maxframe/dataframe/merge/append.py +120 -0
  381. maxframe/dataframe/merge/combine_first.py +120 -0
  382. maxframe/dataframe/merge/compare.py +387 -0
  383. maxframe/dataframe/merge/concat.py +500 -0
  384. maxframe/dataframe/merge/merge.py +806 -0
  385. maxframe/dataframe/merge/tests/__init__.py +13 -0
  386. maxframe/dataframe/merge/tests/test_merge.py +390 -0
  387. maxframe/dataframe/merge/update.py +271 -0
  388. maxframe/dataframe/misc/__init__.py +131 -0
  389. maxframe/dataframe/misc/_duplicate.py +56 -0
  390. maxframe/dataframe/misc/apply.py +730 -0
  391. maxframe/dataframe/misc/astype.py +237 -0
  392. maxframe/dataframe/misc/case_when.py +145 -0
  393. maxframe/dataframe/misc/check_monotonic.py +84 -0
  394. maxframe/dataframe/misc/check_unique.py +51 -0
  395. maxframe/dataframe/misc/clip.py +145 -0
  396. maxframe/dataframe/misc/cut.py +386 -0
  397. maxframe/dataframe/misc/describe.py +278 -0
  398. maxframe/dataframe/misc/diff.py +210 -0
  399. maxframe/dataframe/misc/drop.py +442 -0
  400. maxframe/dataframe/misc/drop_duplicates.py +251 -0
  401. maxframe/dataframe/misc/duplicated.py +292 -0
  402. maxframe/dataframe/misc/eval.py +730 -0
  403. maxframe/dataframe/misc/explode.py +171 -0
  404. maxframe/dataframe/misc/get_dummies.py +241 -0
  405. maxframe/dataframe/misc/isin.py +220 -0
  406. maxframe/dataframe/misc/map.py +347 -0
  407. maxframe/dataframe/misc/memory_usage.py +248 -0
  408. maxframe/dataframe/misc/pct_change.py +68 -0
  409. maxframe/dataframe/misc/qcut.py +104 -0
  410. maxframe/dataframe/misc/rechunk.py +59 -0
  411. maxframe/dataframe/misc/select_dtypes.py +104 -0
  412. maxframe/dataframe/misc/shift.py +259 -0
  413. maxframe/dataframe/misc/tests/__init__.py +13 -0
  414. maxframe/dataframe/misc/tests/test_misc.py +615 -0
  415. maxframe/dataframe/misc/to_numeric.py +181 -0
  416. maxframe/dataframe/misc/transform.py +346 -0
  417. maxframe/dataframe/misc/transpose.py +148 -0
  418. maxframe/dataframe/misc/valid_index.py +115 -0
  419. maxframe/dataframe/misc/value_counts.py +206 -0
  420. maxframe/dataframe/missing/__init__.py +53 -0
  421. maxframe/dataframe/missing/checkna.py +230 -0
  422. maxframe/dataframe/missing/dropna.py +294 -0
  423. maxframe/dataframe/missing/fillna.py +283 -0
  424. maxframe/dataframe/missing/replace.py +446 -0
  425. maxframe/dataframe/missing/tests/__init__.py +13 -0
  426. maxframe/dataframe/missing/tests/test_missing.py +90 -0
  427. maxframe/dataframe/operators.py +231 -0
  428. maxframe/dataframe/reduction/__init__.py +124 -0
  429. maxframe/dataframe/reduction/aggregation.py +499 -0
  430. maxframe/dataframe/reduction/all.py +78 -0
  431. maxframe/dataframe/reduction/any.py +78 -0
  432. maxframe/dataframe/reduction/argmax.py +100 -0
  433. maxframe/dataframe/reduction/argmin.py +100 -0
  434. maxframe/dataframe/reduction/core.py +898 -0
  435. maxframe/dataframe/reduction/count.py +63 -0
  436. maxframe/dataframe/reduction/cov.py +166 -0
  437. maxframe/dataframe/reduction/cummax.py +30 -0
  438. maxframe/dataframe/reduction/cummin.py +30 -0
  439. maxframe/dataframe/reduction/cumprod.py +30 -0
  440. maxframe/dataframe/reduction/cumsum.py +30 -0
  441. maxframe/dataframe/reduction/custom_reduction.py +42 -0
  442. maxframe/dataframe/reduction/idxmax.py +185 -0
  443. maxframe/dataframe/reduction/idxmin.py +185 -0
  444. maxframe/dataframe/reduction/kurtosis.py +111 -0
  445. maxframe/dataframe/reduction/max.py +65 -0
  446. maxframe/dataframe/reduction/mean.py +63 -0
  447. maxframe/dataframe/reduction/median.py +56 -0
  448. maxframe/dataframe/reduction/min.py +65 -0
  449. maxframe/dataframe/reduction/nunique.py +142 -0
  450. maxframe/dataframe/reduction/prod.py +81 -0
  451. maxframe/dataframe/reduction/reduction_size.py +36 -0
  452. maxframe/dataframe/reduction/sem.py +73 -0
  453. maxframe/dataframe/reduction/skew.py +93 -0
  454. maxframe/dataframe/reduction/std.py +53 -0
  455. maxframe/dataframe/reduction/str_concat.py +51 -0
  456. maxframe/dataframe/reduction/sum.py +81 -0
  457. maxframe/dataframe/reduction/tests/__init__.py +13 -0
  458. maxframe/dataframe/reduction/tests/test_reduction.py +529 -0
  459. maxframe/dataframe/reduction/unique.py +153 -0
  460. maxframe/dataframe/reduction/var.py +76 -0
  461. maxframe/dataframe/reshape/__init__.py +38 -0
  462. maxframe/dataframe/reshape/melt.py +169 -0
  463. maxframe/dataframe/reshape/pivot.py +233 -0
  464. maxframe/dataframe/reshape/pivot_table.py +275 -0
  465. maxframe/dataframe/reshape/stack.py +240 -0
  466. maxframe/dataframe/reshape/unstack.py +114 -0
  467. maxframe/dataframe/sort/__init__.py +42 -0
  468. maxframe/dataframe/sort/argsort.py +62 -0
  469. maxframe/dataframe/sort/core.py +37 -0
  470. maxframe/dataframe/sort/nlargest.py +238 -0
  471. maxframe/dataframe/sort/nsmallest.py +228 -0
  472. maxframe/dataframe/sort/sort_index.py +153 -0
  473. maxframe/dataframe/sort/sort_values.py +301 -0
  474. maxframe/dataframe/sort/tests/__init__.py +13 -0
  475. maxframe/dataframe/sort/tests/test_sort.py +81 -0
  476. maxframe/dataframe/statistics/__init__.py +33 -0
  477. maxframe/dataframe/statistics/corr.py +284 -0
  478. maxframe/dataframe/statistics/quantile.py +338 -0
  479. maxframe/dataframe/statistics/tests/__init__.py +13 -0
  480. maxframe/dataframe/statistics/tests/test_statistics.py +82 -0
  481. maxframe/dataframe/tests/__init__.py +13 -0
  482. maxframe/dataframe/tests/test_initializer.py +60 -0
  483. maxframe/dataframe/tests/test_typing.py +104 -0
  484. maxframe/dataframe/tests/test_utils.py +165 -0
  485. maxframe/dataframe/tseries/__init__.py +13 -0
  486. maxframe/dataframe/tseries/tests/__init__.py +13 -0
  487. maxframe/dataframe/tseries/tests/test_tseries.py +30 -0
  488. maxframe/dataframe/tseries/to_datetime.py +299 -0
  489. maxframe/dataframe/typing_.py +185 -0
  490. maxframe/dataframe/ufunc/__init__.py +27 -0
  491. maxframe/dataframe/ufunc/tensor.py +54 -0
  492. maxframe/dataframe/ufunc/ufunc.py +53 -0
  493. maxframe/dataframe/utils.py +1647 -0
  494. maxframe/dataframe/window/__init__.py +29 -0
  495. maxframe/dataframe/window/aggregation.py +100 -0
  496. maxframe/dataframe/window/core.py +82 -0
  497. maxframe/dataframe/window/ewm.py +247 -0
  498. maxframe/dataframe/window/expanding.py +151 -0
  499. maxframe/dataframe/window/rolling.py +389 -0
  500. maxframe/dataframe/window/tests/__init__.py +13 -0
  501. maxframe/dataframe/window/tests/test_ewm.py +70 -0
  502. maxframe/dataframe/window/tests/test_expanding.py +60 -0
  503. maxframe/dataframe/window/tests/test_rolling.py +57 -0
  504. maxframe/env.py +37 -0
  505. maxframe/errors.py +47 -0
  506. maxframe/extension.py +107 -0
  507. maxframe/io/__init__.py +13 -0
  508. maxframe/io/objects/__init__.py +24 -0
  509. maxframe/io/objects/core.py +156 -0
  510. maxframe/io/objects/tensor.py +132 -0
  511. maxframe/io/objects/tests/__init__.py +13 -0
  512. maxframe/io/objects/tests/test_object_io.py +79 -0
  513. maxframe/io/odpsio/__init__.py +23 -0
  514. maxframe/io/odpsio/arrow.py +161 -0
  515. maxframe/io/odpsio/schema.py +496 -0
  516. maxframe/io/odpsio/tableio.py +727 -0
  517. maxframe/io/odpsio/tests/__init__.py +13 -0
  518. maxframe/io/odpsio/tests/test_arrow.py +132 -0
  519. maxframe/io/odpsio/tests/test_schema.py +580 -0
  520. maxframe/io/odpsio/tests/test_tableio.py +205 -0
  521. maxframe/io/odpsio/tests/test_volumeio.py +75 -0
  522. maxframe/io/odpsio/volumeio.py +102 -0
  523. maxframe/learn/__init__.py +25 -0
  524. maxframe/learn/cluster/__init__.py +15 -0
  525. maxframe/learn/cluster/_kmeans.py +782 -0
  526. maxframe/learn/contrib/__init__.py +17 -0
  527. maxframe/learn/contrib/graph/__init__.py +15 -0
  528. maxframe/learn/contrib/graph/connected_components.py +216 -0
  529. maxframe/learn/contrib/graph/tests/__init__.py +13 -0
  530. maxframe/learn/contrib/graph/tests/test_connected_components.py +53 -0
  531. maxframe/learn/contrib/lightgbm/__init__.py +33 -0
  532. maxframe/learn/contrib/lightgbm/_predict.py +138 -0
  533. maxframe/learn/contrib/lightgbm/_train.py +163 -0
  534. maxframe/learn/contrib/lightgbm/callback.py +114 -0
  535. maxframe/learn/contrib/lightgbm/classifier.py +199 -0
  536. maxframe/learn/contrib/lightgbm/core.py +372 -0
  537. maxframe/learn/contrib/lightgbm/dataset.py +153 -0
  538. maxframe/learn/contrib/lightgbm/regressor.py +29 -0
  539. maxframe/learn/contrib/lightgbm/tests/__init__.py +13 -0
  540. maxframe/learn/contrib/lightgbm/tests/test_callback.py +58 -0
  541. maxframe/learn/contrib/llm/__init__.py +17 -0
  542. maxframe/learn/contrib/llm/core.py +77 -0
  543. maxframe/learn/contrib/llm/models/__init__.py +15 -0
  544. maxframe/learn/contrib/llm/models/dashscope.py +108 -0
  545. maxframe/learn/contrib/llm/models/managed.py +54 -0
  546. maxframe/learn/contrib/llm/multi_modal.py +135 -0
  547. maxframe/learn/contrib/llm/text.py +302 -0
  548. maxframe/learn/contrib/models.py +106 -0
  549. maxframe/learn/contrib/pytorch/__init__.py +16 -0
  550. maxframe/learn/contrib/pytorch/run_function.py +110 -0
  551. maxframe/learn/contrib/pytorch/run_script.py +102 -0
  552. maxframe/learn/contrib/pytorch/tests/__init__.py +13 -0
  553. maxframe/learn/contrib/pytorch/tests/test_pytorch.py +42 -0
  554. maxframe/learn/contrib/utils.py +108 -0
  555. maxframe/learn/contrib/xgboost/__init__.py +33 -0
  556. maxframe/learn/contrib/xgboost/callback.py +86 -0
  557. maxframe/learn/contrib/xgboost/classifier.py +117 -0
  558. maxframe/learn/contrib/xgboost/core.py +445 -0
  559. maxframe/learn/contrib/xgboost/dmatrix.py +157 -0
  560. maxframe/learn/contrib/xgboost/predict.py +131 -0
  561. maxframe/learn/contrib/xgboost/regressor.py +86 -0
  562. maxframe/learn/contrib/xgboost/tests/__init__.py +13 -0
  563. maxframe/learn/contrib/xgboost/tests/test_callback.py +41 -0
  564. maxframe/learn/contrib/xgboost/tests/test_core.py +43 -0
  565. maxframe/learn/contrib/xgboost/train.py +179 -0
  566. maxframe/learn/core.py +344 -0
  567. maxframe/learn/datasets/__init__.py +20 -0
  568. maxframe/learn/datasets/samples_generator.py +628 -0
  569. maxframe/learn/linear_model/__init__.py +15 -0
  570. maxframe/learn/linear_model/_base.py +220 -0
  571. maxframe/learn/linear_model/_lin_reg.py +175 -0
  572. maxframe/learn/metrics/__init__.py +31 -0
  573. maxframe/learn/metrics/_check_targets.py +95 -0
  574. maxframe/learn/metrics/_classification.py +1266 -0
  575. maxframe/learn/metrics/_ranking.py +477 -0
  576. maxframe/learn/metrics/_regression.py +256 -0
  577. maxframe/learn/metrics/_scorer.py +60 -0
  578. maxframe/learn/metrics/pairwise/__init__.py +21 -0
  579. maxframe/learn/metrics/pairwise/core.py +77 -0
  580. maxframe/learn/metrics/pairwise/cosine.py +115 -0
  581. maxframe/learn/metrics/pairwise/euclidean.py +176 -0
  582. maxframe/learn/metrics/pairwise/haversine.py +96 -0
  583. maxframe/learn/metrics/pairwise/manhattan.py +80 -0
  584. maxframe/learn/metrics/pairwise/pairwise.py +127 -0
  585. maxframe/learn/metrics/pairwise/pairwise_distances_topk.py +121 -0
  586. maxframe/learn/metrics/pairwise/rbf_kernel.py +51 -0
  587. maxframe/learn/metrics/tests/__init__.py +13 -0
  588. maxframe/learn/metrics/tests/test_scorer.py +26 -0
  589. maxframe/learn/model_selection/__init__.py +15 -0
  590. maxframe/learn/model_selection/_split.py +451 -0
  591. maxframe/learn/model_selection/tests/__init__.py +13 -0
  592. maxframe/learn/model_selection/tests/test_split.py +156 -0
  593. maxframe/learn/preprocessing/__init__.py +16 -0
  594. maxframe/learn/preprocessing/_data/__init__.py +17 -0
  595. maxframe/learn/preprocessing/_data/min_max_scaler.py +390 -0
  596. maxframe/learn/preprocessing/_data/normalize.py +127 -0
  597. maxframe/learn/preprocessing/_data/standard_scaler.py +503 -0
  598. maxframe/learn/preprocessing/_data/utils.py +79 -0
  599. maxframe/learn/preprocessing/_label/__init__.py +16 -0
  600. maxframe/learn/preprocessing/_label/_label_binarizer.py +599 -0
  601. maxframe/learn/preprocessing/_label/_label_encoder.py +174 -0
  602. maxframe/learn/utils/__init__.py +19 -0
  603. maxframe/learn/utils/_encode.py +314 -0
  604. maxframe/learn/utils/checks.py +160 -0
  605. maxframe/learn/utils/core.py +121 -0
  606. maxframe/learn/utils/extmath.py +213 -0
  607. maxframe/learn/utils/multiclass.py +292 -0
  608. maxframe/learn/utils/odpsio.py +193 -0
  609. maxframe/learn/utils/shuffle.py +114 -0
  610. maxframe/learn/utils/sparsefuncs.py +87 -0
  611. maxframe/learn/utils/validation.py +775 -0
  612. maxframe/lib/__init__.py +13 -0
  613. maxframe/lib/aio/__init__.py +27 -0
  614. maxframe/lib/aio/_runners.py +162 -0
  615. maxframe/lib/aio/_threads.py +35 -0
  616. maxframe/lib/aio/base.py +82 -0
  617. maxframe/lib/aio/file.py +85 -0
  618. maxframe/lib/aio/isolation.py +100 -0
  619. maxframe/lib/aio/lru.py +242 -0
  620. maxframe/lib/aio/parallelism.py +37 -0
  621. maxframe/lib/aio/tests/__init__.py +13 -0
  622. maxframe/lib/aio/tests/test_aio_file.py +55 -0
  623. maxframe/lib/compat.py +185 -0
  624. maxframe/lib/compression.py +55 -0
  625. maxframe/lib/cython/__init__.py +13 -0
  626. maxframe/lib/cython/libcpp.pxd +30 -0
  627. maxframe/lib/dtypes_extension/__init__.py +30 -0
  628. maxframe/lib/dtypes_extension/_fake_arrow_dtype.py +604 -0
  629. maxframe/lib/dtypes_extension/blob.py +304 -0
  630. maxframe/lib/dtypes_extension/dtypes.py +106 -0
  631. maxframe/lib/dtypes_extension/tests/__init__.py +13 -0
  632. maxframe/lib/dtypes_extension/tests/test_blob.py +88 -0
  633. maxframe/lib/dtypes_extension/tests/test_dtypes.py +63 -0
  634. maxframe/lib/dtypes_extension/tests/test_fake_arrow_dtype.py +75 -0
  635. maxframe/lib/filesystem/__init__.py +21 -0
  636. maxframe/lib/filesystem/_glob.py +173 -0
  637. maxframe/lib/filesystem/_oss_lib/__init__.py +13 -0
  638. maxframe/lib/filesystem/_oss_lib/common.py +270 -0
  639. maxframe/lib/filesystem/_oss_lib/glob.py +147 -0
  640. maxframe/lib/filesystem/_oss_lib/handle.py +152 -0
  641. maxframe/lib/filesystem/arrow.py +236 -0
  642. maxframe/lib/filesystem/base.py +263 -0
  643. maxframe/lib/filesystem/core.py +95 -0
  644. maxframe/lib/filesystem/fsmap.py +164 -0
  645. maxframe/lib/filesystem/hdfs.py +31 -0
  646. maxframe/lib/filesystem/local.py +112 -0
  647. maxframe/lib/filesystem/oss.py +226 -0
  648. maxframe/lib/filesystem/tests/__init__.py +13 -0
  649. maxframe/lib/filesystem/tests/test_filesystem.py +225 -0
  650. maxframe/lib/filesystem/tests/test_oss.py +220 -0
  651. maxframe/lib/functools_compat.py +81 -0
  652. maxframe/lib/mmh3.cpython-312-darwin.so +0 -0
  653. maxframe/lib/mmh3.pyi +43 -0
  654. maxframe/lib/mmh3_src/MurmurHash3.cpp +339 -0
  655. maxframe/lib/mmh3_src/MurmurHash3.h +43 -0
  656. maxframe/lib/mmh3_src/mmh3module.cpp +387 -0
  657. maxframe/lib/sparse/__init__.py +856 -0
  658. maxframe/lib/sparse/array.py +1616 -0
  659. maxframe/lib/sparse/core.py +90 -0
  660. maxframe/lib/sparse/linalg.py +31 -0
  661. maxframe/lib/sparse/matrix.py +244 -0
  662. maxframe/lib/sparse/tests/__init__.py +13 -0
  663. maxframe/lib/sparse/tests/test_sparse.py +476 -0
  664. maxframe/lib/sparse/vector.py +148 -0
  665. maxframe/lib/tblib/LICENSE +20 -0
  666. maxframe/lib/tblib/__init__.py +327 -0
  667. maxframe/lib/tblib/cpython.py +83 -0
  668. maxframe/lib/tblib/decorators.py +44 -0
  669. maxframe/lib/tblib/pickling_support.py +90 -0
  670. maxframe/lib/tests/__init__.py +13 -0
  671. maxframe/lib/tests/test_wrapped_pickle.py +51 -0
  672. maxframe/lib/version.py +620 -0
  673. maxframe/lib/wrapped_pickle.py +150 -0
  674. maxframe/mixin.py +157 -0
  675. maxframe/opcodes.py +649 -0
  676. maxframe/protocol.py +607 -0
  677. maxframe/remote/__init__.py +18 -0
  678. maxframe/remote/core.py +208 -0
  679. maxframe/remote/run_script.py +124 -0
  680. maxframe/serialization/__init__.py +39 -0
  681. maxframe/serialization/arrow.py +120 -0
  682. maxframe/serialization/blob.py +32 -0
  683. maxframe/serialization/core.cpython-312-darwin.so +0 -0
  684. maxframe/serialization/core.pxd +50 -0
  685. maxframe/serialization/core.pyi +66 -0
  686. maxframe/serialization/core.pyx +1265 -0
  687. maxframe/serialization/exception.py +84 -0
  688. maxframe/serialization/maxframe_objects.py +39 -0
  689. maxframe/serialization/numpy.py +110 -0
  690. maxframe/serialization/pandas.py +278 -0
  691. maxframe/serialization/scipy.py +71 -0
  692. maxframe/serialization/serializables/__init__.py +55 -0
  693. maxframe/serialization/serializables/core.py +469 -0
  694. maxframe/serialization/serializables/field.py +624 -0
  695. maxframe/serialization/serializables/field_type.py +592 -0
  696. maxframe/serialization/serializables/tests/__init__.py +13 -0
  697. maxframe/serialization/serializables/tests/test_field_type.py +119 -0
  698. maxframe/serialization/serializables/tests/test_serializable.py +313 -0
  699. maxframe/serialization/tests/__init__.py +13 -0
  700. maxframe/serialization/tests/test_serial.py +487 -0
  701. maxframe/session.py +1250 -0
  702. maxframe/sperunner.py +165 -0
  703. maxframe/tensor/__init__.py +325 -0
  704. maxframe/tensor/arithmetic/__init__.py +322 -0
  705. maxframe/tensor/arithmetic/abs.py +66 -0
  706. maxframe/tensor/arithmetic/absolute.py +66 -0
  707. maxframe/tensor/arithmetic/add.py +112 -0
  708. maxframe/tensor/arithmetic/angle.py +70 -0
  709. maxframe/tensor/arithmetic/arccos.py +101 -0
  710. maxframe/tensor/arithmetic/arccosh.py +89 -0
  711. maxframe/tensor/arithmetic/arcsin.py +92 -0
  712. maxframe/tensor/arithmetic/arcsinh.py +84 -0
  713. maxframe/tensor/arithmetic/arctan.py +104 -0
  714. maxframe/tensor/arithmetic/arctan2.py +126 -0
  715. maxframe/tensor/arithmetic/arctanh.py +84 -0
  716. maxframe/tensor/arithmetic/around.py +112 -0
  717. maxframe/tensor/arithmetic/bitand.py +93 -0
  718. maxframe/tensor/arithmetic/bitor.py +100 -0
  719. maxframe/tensor/arithmetic/bitxor.py +93 -0
  720. maxframe/tensor/arithmetic/cbrt.py +64 -0
  721. maxframe/tensor/arithmetic/ceil.py +69 -0
  722. maxframe/tensor/arithmetic/clip.py +165 -0
  723. maxframe/tensor/arithmetic/conj.py +72 -0
  724. maxframe/tensor/arithmetic/copysign.py +76 -0
  725. maxframe/tensor/arithmetic/core.py +552 -0
  726. maxframe/tensor/arithmetic/cos.py +83 -0
  727. maxframe/tensor/arithmetic/cosh.py +70 -0
  728. maxframe/tensor/arithmetic/deg2rad.py +70 -0
  729. maxframe/tensor/arithmetic/degrees.py +75 -0
  730. maxframe/tensor/arithmetic/divide.py +112 -0
  731. maxframe/tensor/arithmetic/equal.py +74 -0
  732. maxframe/tensor/arithmetic/exp.py +104 -0
  733. maxframe/tensor/arithmetic/exp2.py +65 -0
  734. maxframe/tensor/arithmetic/expm1.py +77 -0
  735. maxframe/tensor/arithmetic/fabs.py +72 -0
  736. maxframe/tensor/arithmetic/fix.py +67 -0
  737. maxframe/tensor/arithmetic/float_power.py +101 -0
  738. maxframe/tensor/arithmetic/floor.py +75 -0
  739. maxframe/tensor/arithmetic/floordiv.py +92 -0
  740. maxframe/tensor/arithmetic/fmax.py +103 -0
  741. maxframe/tensor/arithmetic/fmin.py +104 -0
  742. maxframe/tensor/arithmetic/fmod.py +97 -0
  743. maxframe/tensor/arithmetic/frexp.py +96 -0
  744. maxframe/tensor/arithmetic/greater.py +75 -0
  745. maxframe/tensor/arithmetic/greater_equal.py +67 -0
  746. maxframe/tensor/arithmetic/hypot.py +75 -0
  747. maxframe/tensor/arithmetic/i0.py +87 -0
  748. maxframe/tensor/arithmetic/imag.py +65 -0
  749. maxframe/tensor/arithmetic/invert.py +108 -0
  750. maxframe/tensor/arithmetic/isclose.py +114 -0
  751. maxframe/tensor/arithmetic/iscomplex.py +62 -0
  752. maxframe/tensor/arithmetic/iscomplexobj.py +53 -0
  753. maxframe/tensor/arithmetic/isfinite.py +104 -0
  754. maxframe/tensor/arithmetic/isinf.py +101 -0
  755. maxframe/tensor/arithmetic/isnan.py +80 -0
  756. maxframe/tensor/arithmetic/isreal.py +61 -0
  757. maxframe/tensor/arithmetic/ldexp.py +97 -0
  758. maxframe/tensor/arithmetic/less.py +67 -0
  759. maxframe/tensor/arithmetic/less_equal.py +67 -0
  760. maxframe/tensor/arithmetic/log.py +90 -0
  761. maxframe/tensor/arithmetic/log10.py +83 -0
  762. maxframe/tensor/arithmetic/log1p.py +93 -0
  763. maxframe/tensor/arithmetic/log2.py +83 -0
  764. maxframe/tensor/arithmetic/logaddexp.py +78 -0
  765. maxframe/tensor/arithmetic/logaddexp2.py +76 -0
  766. maxframe/tensor/arithmetic/logical_and.py +79 -0
  767. maxframe/tensor/arithmetic/logical_not.py +72 -0
  768. maxframe/tensor/arithmetic/logical_or.py +80 -0
  769. maxframe/tensor/arithmetic/logical_xor.py +86 -0
  770. maxframe/tensor/arithmetic/lshift.py +80 -0
  771. maxframe/tensor/arithmetic/maximum.py +106 -0
  772. maxframe/tensor/arithmetic/minimum.py +106 -0
  773. maxframe/tensor/arithmetic/mod.py +102 -0
  774. maxframe/tensor/arithmetic/modf.py +87 -0
  775. maxframe/tensor/arithmetic/multiply.py +114 -0
  776. maxframe/tensor/arithmetic/nan_to_num.py +97 -0
  777. maxframe/tensor/arithmetic/negative.py +63 -0
  778. maxframe/tensor/arithmetic/nextafter.py +66 -0
  779. maxframe/tensor/arithmetic/not_equal.py +70 -0
  780. maxframe/tensor/arithmetic/positive.py +45 -0
  781. maxframe/tensor/arithmetic/power.py +104 -0
  782. maxframe/tensor/arithmetic/rad2deg.py +69 -0
  783. maxframe/tensor/arithmetic/radians.py +75 -0
  784. maxframe/tensor/arithmetic/real.py +68 -0
  785. maxframe/tensor/arithmetic/reciprocal.py +78 -0
  786. maxframe/tensor/arithmetic/rint.py +66 -0
  787. maxframe/tensor/arithmetic/rshift.py +79 -0
  788. maxframe/tensor/arithmetic/setimag.py +27 -0
  789. maxframe/tensor/arithmetic/setreal.py +27 -0
  790. maxframe/tensor/arithmetic/sign.py +79 -0
  791. maxframe/tensor/arithmetic/signbit.py +63 -0
  792. maxframe/tensor/arithmetic/sin.py +96 -0
  793. maxframe/tensor/arithmetic/sinc.py +100 -0
  794. maxframe/tensor/arithmetic/sinh.py +91 -0
  795. maxframe/tensor/arithmetic/spacing.py +70 -0
  796. maxframe/tensor/arithmetic/sqrt.py +79 -0
  797. maxframe/tensor/arithmetic/square.py +67 -0
  798. maxframe/tensor/arithmetic/subtract.py +83 -0
  799. maxframe/tensor/arithmetic/tan.py +86 -0
  800. maxframe/tensor/arithmetic/tanh.py +90 -0
  801. maxframe/tensor/arithmetic/tests/__init__.py +13 -0
  802. maxframe/tensor/arithmetic/tests/test_arithmetic.py +458 -0
  803. maxframe/tensor/arithmetic/truediv.py +102 -0
  804. maxframe/tensor/arithmetic/trunc.py +70 -0
  805. maxframe/tensor/arithmetic/utils.py +91 -0
  806. maxframe/tensor/array_utils.py +164 -0
  807. maxframe/tensor/core.py +594 -0
  808. maxframe/tensor/datasource/__init__.py +40 -0
  809. maxframe/tensor/datasource/arange.py +154 -0
  810. maxframe/tensor/datasource/array.py +399 -0
  811. maxframe/tensor/datasource/core.py +114 -0
  812. maxframe/tensor/datasource/diag.py +140 -0
  813. maxframe/tensor/datasource/diagflat.py +69 -0
  814. maxframe/tensor/datasource/empty.py +167 -0
  815. maxframe/tensor/datasource/eye.py +95 -0
  816. maxframe/tensor/datasource/from_dataframe.py +68 -0
  817. maxframe/tensor/datasource/from_dense.py +37 -0
  818. maxframe/tensor/datasource/from_sparse.py +45 -0
  819. maxframe/tensor/datasource/full.py +184 -0
  820. maxframe/tensor/datasource/identity.py +54 -0
  821. maxframe/tensor/datasource/indices.py +115 -0
  822. maxframe/tensor/datasource/linspace.py +140 -0
  823. maxframe/tensor/datasource/meshgrid.py +135 -0
  824. maxframe/tensor/datasource/ones.py +178 -0
  825. maxframe/tensor/datasource/scalar.py +40 -0
  826. maxframe/tensor/datasource/tests/__init__.py +13 -0
  827. maxframe/tensor/datasource/tests/test_datasource.py +310 -0
  828. maxframe/tensor/datasource/tri_array.py +107 -0
  829. maxframe/tensor/datasource/zeros.py +192 -0
  830. maxframe/tensor/extensions/__init__.py +33 -0
  831. maxframe/tensor/extensions/accessor.py +25 -0
  832. maxframe/tensor/extensions/apply_chunk.py +137 -0
  833. maxframe/tensor/extensions/rebalance.py +65 -0
  834. maxframe/tensor/fetch/__init__.py +15 -0
  835. maxframe/tensor/fetch/core.py +54 -0
  836. maxframe/tensor/fft/__init__.py +32 -0
  837. maxframe/tensor/fft/core.py +168 -0
  838. maxframe/tensor/fft/fft.py +112 -0
  839. maxframe/tensor/fft/fft2.py +118 -0
  840. maxframe/tensor/fft/fftfreq.py +80 -0
  841. maxframe/tensor/fft/fftn.py +123 -0
  842. maxframe/tensor/fft/fftshift.py +79 -0
  843. maxframe/tensor/fft/hfft.py +112 -0
  844. maxframe/tensor/fft/ifft.py +114 -0
  845. maxframe/tensor/fft/ifft2.py +115 -0
  846. maxframe/tensor/fft/ifftn.py +123 -0
  847. maxframe/tensor/fft/ifftshift.py +73 -0
  848. maxframe/tensor/fft/ihfft.py +93 -0
  849. maxframe/tensor/fft/irfft.py +118 -0
  850. maxframe/tensor/fft/irfft2.py +62 -0
  851. maxframe/tensor/fft/irfftn.py +114 -0
  852. maxframe/tensor/fft/rfft.py +116 -0
  853. maxframe/tensor/fft/rfft2.py +63 -0
  854. maxframe/tensor/fft/rfftfreq.py +87 -0
  855. maxframe/tensor/fft/rfftn.py +113 -0
  856. maxframe/tensor/indexing/__init__.py +47 -0
  857. maxframe/tensor/indexing/choose.py +198 -0
  858. maxframe/tensor/indexing/compress.py +122 -0
  859. maxframe/tensor/indexing/core.py +190 -0
  860. maxframe/tensor/indexing/extract.py +69 -0
  861. maxframe/tensor/indexing/fill_diagonal.py +180 -0
  862. maxframe/tensor/indexing/flatnonzero.py +58 -0
  863. maxframe/tensor/indexing/getitem.py +144 -0
  864. maxframe/tensor/indexing/nonzero.py +118 -0
  865. maxframe/tensor/indexing/setitem.py +142 -0
  866. maxframe/tensor/indexing/slice.py +32 -0
  867. maxframe/tensor/indexing/take.py +128 -0
  868. maxframe/tensor/indexing/tests/__init__.py +13 -0
  869. maxframe/tensor/indexing/tests/test_indexing.py +232 -0
  870. maxframe/tensor/indexing/unravel_index.py +103 -0
  871. maxframe/tensor/lib/__init__.py +16 -0
  872. maxframe/tensor/lib/index_tricks.py +404 -0
  873. maxframe/tensor/linalg/__init__.py +43 -0
  874. maxframe/tensor/linalg/_einsumfunc.py +1025 -0
  875. maxframe/tensor/linalg/cholesky.py +117 -0
  876. maxframe/tensor/linalg/dot.py +145 -0
  877. maxframe/tensor/linalg/einsum.py +339 -0
  878. maxframe/tensor/linalg/inner.py +36 -0
  879. maxframe/tensor/linalg/inv.py +83 -0
  880. maxframe/tensor/linalg/lstsq.py +100 -0
  881. maxframe/tensor/linalg/lu.py +115 -0
  882. maxframe/tensor/linalg/matmul.py +225 -0
  883. maxframe/tensor/linalg/matrix_norm.py +75 -0
  884. maxframe/tensor/linalg/norm.py +249 -0
  885. maxframe/tensor/linalg/qr.py +124 -0
  886. maxframe/tensor/linalg/solve.py +72 -0
  887. maxframe/tensor/linalg/solve_triangular.py +103 -0
  888. maxframe/tensor/linalg/svd.py +167 -0
  889. maxframe/tensor/linalg/tensordot.py +213 -0
  890. maxframe/tensor/linalg/vdot.py +73 -0
  891. maxframe/tensor/linalg/vector_norm.py +113 -0
  892. maxframe/tensor/merge/__init__.py +21 -0
  893. maxframe/tensor/merge/append.py +74 -0
  894. maxframe/tensor/merge/column_stack.py +63 -0
  895. maxframe/tensor/merge/concatenate.py +103 -0
  896. maxframe/tensor/merge/dstack.py +71 -0
  897. maxframe/tensor/merge/hstack.py +70 -0
  898. maxframe/tensor/merge/stack.py +130 -0
  899. maxframe/tensor/merge/tests/__init__.py +13 -0
  900. maxframe/tensor/merge/tests/test_merge.py +79 -0
  901. maxframe/tensor/merge/vstack.py +74 -0
  902. maxframe/tensor/misc/__init__.py +72 -0
  903. maxframe/tensor/misc/argwhere.py +72 -0
  904. maxframe/tensor/misc/array_split.py +46 -0
  905. maxframe/tensor/misc/astype.py +121 -0
  906. maxframe/tensor/misc/atleast_1d.py +72 -0
  907. maxframe/tensor/misc/atleast_2d.py +70 -0
  908. maxframe/tensor/misc/atleast_3d.py +85 -0
  909. maxframe/tensor/misc/broadcast_arrays.py +57 -0
  910. maxframe/tensor/misc/broadcast_to.py +89 -0
  911. maxframe/tensor/misc/copy.py +64 -0
  912. maxframe/tensor/misc/copyto.py +130 -0
  913. maxframe/tensor/misc/delete.py +104 -0
  914. maxframe/tensor/misc/diff.py +115 -0
  915. maxframe/tensor/misc/dsplit.py +68 -0
  916. maxframe/tensor/misc/ediff1d.py +74 -0
  917. maxframe/tensor/misc/expand_dims.py +85 -0
  918. maxframe/tensor/misc/flatten.py +63 -0
  919. maxframe/tensor/misc/flip.py +90 -0
  920. maxframe/tensor/misc/fliplr.py +64 -0
  921. maxframe/tensor/misc/flipud.py +68 -0
  922. maxframe/tensor/misc/hsplit.py +85 -0
  923. maxframe/tensor/misc/in1d.py +94 -0
  924. maxframe/tensor/misc/insert.py +139 -0
  925. maxframe/tensor/misc/isin.py +130 -0
  926. maxframe/tensor/misc/moveaxis.py +83 -0
  927. maxframe/tensor/misc/ndim.py +53 -0
  928. maxframe/tensor/misc/ravel.py +90 -0
  929. maxframe/tensor/misc/repeat.py +129 -0
  930. maxframe/tensor/misc/result_type.py +88 -0
  931. maxframe/tensor/misc/roll.py +124 -0
  932. maxframe/tensor/misc/rollaxis.py +77 -0
  933. maxframe/tensor/misc/searchsorted.py +147 -0
  934. maxframe/tensor/misc/setdiff1d.py +58 -0
  935. maxframe/tensor/misc/shape.py +89 -0
  936. maxframe/tensor/misc/split.py +190 -0
  937. maxframe/tensor/misc/squeeze.py +117 -0
  938. maxframe/tensor/misc/swapaxes.py +113 -0
  939. maxframe/tensor/misc/tests/__init__.py +13 -0
  940. maxframe/tensor/misc/tests/test_misc.py +112 -0
  941. maxframe/tensor/misc/tile.py +109 -0
  942. maxframe/tensor/misc/transpose.py +133 -0
  943. maxframe/tensor/misc/trapezoid.py +123 -0
  944. maxframe/tensor/misc/unique.py +205 -0
  945. maxframe/tensor/misc/vsplit.py +74 -0
  946. maxframe/tensor/misc/where.py +129 -0
  947. maxframe/tensor/operators.py +83 -0
  948. maxframe/tensor/random/__init__.py +166 -0
  949. maxframe/tensor/random/beta.py +87 -0
  950. maxframe/tensor/random/binomial.py +135 -0
  951. maxframe/tensor/random/bytes.py +37 -0
  952. maxframe/tensor/random/chisquare.py +108 -0
  953. maxframe/tensor/random/choice.py +187 -0
  954. maxframe/tensor/random/core.py +249 -0
  955. maxframe/tensor/random/dirichlet.py +121 -0
  956. maxframe/tensor/random/exponential.py +92 -0
  957. maxframe/tensor/random/f.py +133 -0
  958. maxframe/tensor/random/gamma.py +126 -0
  959. maxframe/tensor/random/geometric.py +91 -0
  960. maxframe/tensor/random/gumbel.py +165 -0
  961. maxframe/tensor/random/hypergeometric.py +146 -0
  962. maxframe/tensor/random/laplace.py +131 -0
  963. maxframe/tensor/random/logistic.py +127 -0
  964. maxframe/tensor/random/lognormal.py +157 -0
  965. maxframe/tensor/random/logseries.py +120 -0
  966. maxframe/tensor/random/multinomial.py +131 -0
  967. maxframe/tensor/random/multivariate_normal.py +190 -0
  968. maxframe/tensor/random/negative_binomial.py +123 -0
  969. maxframe/tensor/random/noncentral_chisquare.py +130 -0
  970. maxframe/tensor/random/noncentral_f.py +124 -0
  971. maxframe/tensor/random/normal.py +141 -0
  972. maxframe/tensor/random/pareto.py +138 -0
  973. maxframe/tensor/random/permutation.py +107 -0
  974. maxframe/tensor/random/poisson.py +109 -0
  975. maxframe/tensor/random/power.py +140 -0
  976. maxframe/tensor/random/rand.py +80 -0
  977. maxframe/tensor/random/randint.py +119 -0
  978. maxframe/tensor/random/randn.py +94 -0
  979. maxframe/tensor/random/random_integers.py +121 -0
  980. maxframe/tensor/random/random_sample.py +84 -0
  981. maxframe/tensor/random/rayleigh.py +108 -0
  982. maxframe/tensor/random/shuffle.py +61 -0
  983. maxframe/tensor/random/standard_cauchy.py +103 -0
  984. maxframe/tensor/random/standard_exponential.py +70 -0
  985. maxframe/tensor/random/standard_gamma.py +118 -0
  986. maxframe/tensor/random/standard_normal.py +72 -0
  987. maxframe/tensor/random/standard_t.py +133 -0
  988. maxframe/tensor/random/tests/__init__.py +13 -0
  989. maxframe/tensor/random/tests/test_random.py +165 -0
  990. maxframe/tensor/random/triangular.py +117 -0
  991. maxframe/tensor/random/uniform.py +129 -0
  992. maxframe/tensor/random/vonmises.py +129 -0
  993. maxframe/tensor/random/wald.py +112 -0
  994. maxframe/tensor/random/weibull.py +138 -0
  995. maxframe/tensor/random/zipf.py +120 -0
  996. maxframe/tensor/rechunk/__init__.py +26 -0
  997. maxframe/tensor/rechunk/rechunk.py +43 -0
  998. maxframe/tensor/reduction/__init__.py +64 -0
  999. maxframe/tensor/reduction/all.py +101 -0
  1000. maxframe/tensor/reduction/allclose.py +86 -0
  1001. maxframe/tensor/reduction/any.py +103 -0
  1002. maxframe/tensor/reduction/argmax.py +101 -0
  1003. maxframe/tensor/reduction/argmin.py +101 -0
  1004. maxframe/tensor/reduction/array_equal.py +63 -0
  1005. maxframe/tensor/reduction/core.py +166 -0
  1006. maxframe/tensor/reduction/count_nonzero.py +80 -0
  1007. maxframe/tensor/reduction/cumprod.py +95 -0
  1008. maxframe/tensor/reduction/cumsum.py +99 -0
  1009. maxframe/tensor/reduction/max.py +118 -0
  1010. maxframe/tensor/reduction/mean.py +122 -0
  1011. maxframe/tensor/reduction/min.py +118 -0
  1012. maxframe/tensor/reduction/nanargmax.py +80 -0
  1013. maxframe/tensor/reduction/nanargmin.py +74 -0
  1014. maxframe/tensor/reduction/nancumprod.py +89 -0
  1015. maxframe/tensor/reduction/nancumsum.py +92 -0
  1016. maxframe/tensor/reduction/nanmax.py +109 -0
  1017. maxframe/tensor/reduction/nanmean.py +105 -0
  1018. maxframe/tensor/reduction/nanmin.py +109 -0
  1019. maxframe/tensor/reduction/nanprod.py +92 -0
  1020. maxframe/tensor/reduction/nanstd.py +124 -0
  1021. maxframe/tensor/reduction/nansum.py +113 -0
  1022. maxframe/tensor/reduction/nanvar.py +149 -0
  1023. maxframe/tensor/reduction/prod.py +128 -0
  1024. maxframe/tensor/reduction/std.py +132 -0
  1025. maxframe/tensor/reduction/sum.py +123 -0
  1026. maxframe/tensor/reduction/tests/__init__.py +13 -0
  1027. maxframe/tensor/reduction/tests/test_reduction.py +189 -0
  1028. maxframe/tensor/reduction/var.py +176 -0
  1029. maxframe/tensor/reshape/__init__.py +15 -0
  1030. maxframe/tensor/reshape/reshape.py +192 -0
  1031. maxframe/tensor/reshape/tests/__init__.py +13 -0
  1032. maxframe/tensor/reshape/tests/test_reshape.py +35 -0
  1033. maxframe/tensor/sort/__init__.py +18 -0
  1034. maxframe/tensor/sort/argpartition.py +98 -0
  1035. maxframe/tensor/sort/argsort.py +150 -0
  1036. maxframe/tensor/sort/partition.py +228 -0
  1037. maxframe/tensor/sort/sort.py +295 -0
  1038. maxframe/tensor/spatial/__init__.py +15 -0
  1039. maxframe/tensor/spatial/distance/__init__.py +17 -0
  1040. maxframe/tensor/spatial/distance/cdist.py +421 -0
  1041. maxframe/tensor/spatial/distance/pdist.py +398 -0
  1042. maxframe/tensor/spatial/distance/squareform.py +153 -0
  1043. maxframe/tensor/special/__init__.py +175 -0
  1044. maxframe/tensor/special/airy.py +55 -0
  1045. maxframe/tensor/special/bessel.py +199 -0
  1046. maxframe/tensor/special/core.py +99 -0
  1047. maxframe/tensor/special/ellip_func_integrals.py +155 -0
  1048. maxframe/tensor/special/ellip_harm.py +55 -0
  1049. maxframe/tensor/special/err_fresnel.py +223 -0
  1050. maxframe/tensor/special/gamma_funcs.py +303 -0
  1051. maxframe/tensor/special/hypergeometric_funcs.py +69 -0
  1052. maxframe/tensor/special/info_theory.py +189 -0
  1053. maxframe/tensor/special/misc.py +163 -0
  1054. maxframe/tensor/special/statistical.py +56 -0
  1055. maxframe/tensor/statistics/__init__.py +24 -0
  1056. maxframe/tensor/statistics/average.py +143 -0
  1057. maxframe/tensor/statistics/bincount.py +133 -0
  1058. maxframe/tensor/statistics/corrcoef.py +77 -0
  1059. maxframe/tensor/statistics/cov.py +222 -0
  1060. maxframe/tensor/statistics/digitize.py +126 -0
  1061. maxframe/tensor/statistics/histogram.py +520 -0
  1062. maxframe/tensor/statistics/median.py +85 -0
  1063. maxframe/tensor/statistics/percentile.py +175 -0
  1064. maxframe/tensor/statistics/ptp.py +89 -0
  1065. maxframe/tensor/statistics/quantile.py +290 -0
  1066. maxframe/tensor/ufunc/__init__.py +24 -0
  1067. maxframe/tensor/ufunc/ufunc.py +198 -0
  1068. maxframe/tensor/utils.py +716 -0
  1069. maxframe/tests/__init__.py +13 -0
  1070. maxframe/tests/test_protocol.py +178 -0
  1071. maxframe/tests/test_utils.py +615 -0
  1072. maxframe/tests/utils.py +245 -0
  1073. maxframe/typing_.py +42 -0
  1074. maxframe/udf.py +260 -0
  1075. maxframe/utils.py +1721 -0
  1076. maxframe-2.2.0.dist-info/METADATA +110 -0
  1077. maxframe-2.2.0.dist-info/RECORD +1094 -0
  1078. maxframe-2.2.0.dist-info/WHEEL +5 -0
  1079. maxframe-2.2.0.dist-info/top_level.txt +3 -0
  1080. maxframe_client/__init__.py +16 -0
  1081. maxframe_client/clients/__init__.py +13 -0
  1082. maxframe_client/clients/framedriver.py +137 -0
  1083. maxframe_client/conftest.py +15 -0
  1084. maxframe_client/fetcher.py +411 -0
  1085. maxframe_client/session/__init__.py +22 -0
  1086. maxframe_client/session/consts.py +39 -0
  1087. maxframe_client/session/graph.py +125 -0
  1088. maxframe_client/session/odps.py +802 -0
  1089. maxframe_client/session/task.py +322 -0
  1090. maxframe_client/session/tests/__init__.py +13 -0
  1091. maxframe_client/session/tests/test_task.py +115 -0
  1092. maxframe_client/tests/__init__.py +13 -0
  1093. maxframe_client/tests/test_fetcher.py +180 -0
  1094. maxframe_client/tests/test_session.py +385 -0
@@ -0,0 +1,2393 @@
1
+ # Copyright 1999-2025 Alibaba Group Holding Ltd.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ import functools
16
+ import operator
17
+ import weakref
18
+ from collections.abc import Iterable
19
+ from io import StringIO
20
+ from typing import Any, Dict, List, Tuple, Union
21
+
22
+ import numpy as np
23
+ import pandas as pd
24
+
25
+ from ..core import (
26
+ ENTITY_TYPE,
27
+ HasShapeTileable,
28
+ HasShapeTileableData,
29
+ OutputType,
30
+ Tileable,
31
+ _ExecuteAndFetchMixin,
32
+ is_build_mode,
33
+ register_output_types,
34
+ )
35
+ from ..core.entity.utils import fill_chunk_slices, refresh_tileable_shape
36
+ from ..protocol import DataFrameTableMeta
37
+ from ..serialization.serializables import (
38
+ AnyField,
39
+ BoolField,
40
+ DataTypeField,
41
+ DictField,
42
+ Int32Field,
43
+ IntervalArrayField,
44
+ ListField,
45
+ NDArrayField,
46
+ OneOfField,
47
+ ReferenceField,
48
+ Serializable,
49
+ SeriesField,
50
+ SliceField,
51
+ StringField,
52
+ )
53
+ from ..session import get_default_session
54
+ from ..utils import (
55
+ calc_nsplits,
56
+ ceildiv,
57
+ estimate_pandas_size,
58
+ on_serialize_numpy_type,
59
+ pd_release_version,
60
+ prevent_called_from_pandas,
61
+ tokenize,
62
+ )
63
+ from .typing_ import DataFrameType, IndexType, SeriesType
64
+ from .utils import (
65
+ ReprSeries,
66
+ apply_if_callable,
67
+ fetch_corner_data,
68
+ merge_index_value,
69
+ parse_index,
70
+ )
71
+
72
+ _df_with_iteritems = pd_release_version[:2] < (2, 0)
73
+
74
+
75
+ class IndexValue(Serializable):
76
+ """
77
+ Meta class for index, held by IndexData, SeriesData and DataFrameData
78
+ """
79
+
80
+ __slots__ = ()
81
+
82
+ class IndexBase(Serializable):
83
+ _key = StringField("key") # to identify if the index is the same
84
+ _is_monotonic_increasing = BoolField("is_monotonic_increasing")
85
+ _is_monotonic_decreasing = BoolField("is_monotonic_decreasing")
86
+ _is_unique = BoolField("is_unique")
87
+ _max_val = AnyField("max_val", on_serialize=on_serialize_numpy_type)
88
+ _max_val_close = BoolField("max_val_close")
89
+ _min_val = AnyField("min_val", on_serialize=on_serialize_numpy_type)
90
+ _min_val_close = BoolField("min_val_close")
91
+
92
+ @property
93
+ def is_monotonic_increasing(self):
94
+ return self._is_monotonic_increasing
95
+
96
+ @property
97
+ def is_monotonic_decreasing(self):
98
+ return self._is_monotonic_decreasing
99
+
100
+ @property
101
+ def is_unique(self):
102
+ return self._is_unique
103
+
104
+ @property
105
+ def min_val(self):
106
+ return self._min_val
107
+
108
+ @property
109
+ def min_val_close(self):
110
+ return self._min_val_close
111
+
112
+ @property
113
+ def max_val(self):
114
+ return self._max_val
115
+
116
+ @property
117
+ def max_val_close(self):
118
+ return self._max_val_close
119
+
120
+ @property
121
+ def key(self):
122
+ return self._key
123
+
124
+ @property
125
+ def inferred_type(self):
126
+ return None
127
+
128
+ def to_pandas(self):
129
+ kw = {
130
+ field.tag: getattr(self, attr, None)
131
+ for attr, field in self._FIELDS.items()
132
+ if attr not in super(type(self), self)._FIELDS
133
+ }
134
+ kw = {k: v for k, v in kw.items() if v is not None}
135
+ if kw.get("data") is None:
136
+ kw["data"] = []
137
+
138
+ pd_initializer = getattr(self, "_pd_initializer", None)
139
+ if pd_initializer is None:
140
+ pd_initializer = getattr(pd, type(self).__name__)
141
+ return pd_initializer(**kw)
142
+
143
+ class Index(IndexBase):
144
+ _name = AnyField("name")
145
+ _data = NDArrayField("data")
146
+ _dtype = DataTypeField("dtype")
147
+
148
+ @property
149
+ def dtype(self):
150
+ return getattr(self, "_dtype", None)
151
+
152
+ @property
153
+ def inferred_type(self):
154
+ return "floating" if self.dtype.kind == "f" else "integer"
155
+
156
+ class RangeIndex(IndexBase):
157
+ _name = AnyField("name")
158
+ _slice = SliceField("slice")
159
+ _dtype = DataTypeField("dtype")
160
+
161
+ @property
162
+ def slice(self):
163
+ return self._slice
164
+
165
+ @property
166
+ def dtype(self):
167
+ return getattr(self, "_dtype", np.dtype(np.intc))
168
+
169
+ def to_pandas(self):
170
+ slc = self._slice
171
+ return pd.RangeIndex(
172
+ slc.start, slc.stop, slc.step, name=getattr(self, "_name", None)
173
+ )
174
+
175
+ class CategoricalIndex(IndexBase):
176
+ _name = AnyField("name")
177
+ _data = NDArrayField("data")
178
+ _categories = AnyField("categories")
179
+ _ordered = BoolField("ordered")
180
+
181
+ @property
182
+ def inferred_type(self):
183
+ return "categorical"
184
+
185
+ class IntervalIndex(IndexBase):
186
+ _name = AnyField("name")
187
+ _data = IntervalArrayField("data")
188
+ _closed = StringField("closed")
189
+
190
+ @property
191
+ def inferred_type(self):
192
+ return "interval"
193
+
194
+ class DatetimeIndex(IndexBase):
195
+ _name = AnyField("name")
196
+ _data = NDArrayField("data")
197
+ _freq = AnyField("freq")
198
+ _start = AnyField("start")
199
+ _periods = AnyField("periods")
200
+ _end = AnyField("end")
201
+ _closed = AnyField("closed")
202
+ _tz = AnyField("tz")
203
+ _ambiguous = AnyField("ambiguous")
204
+ _dayfirst = BoolField("dayfirst")
205
+ _yearfirst = BoolField("yearfirst")
206
+
207
+ @property
208
+ def inferred_type(self):
209
+ return "datetime64"
210
+
211
+ @property
212
+ def freq(self):
213
+ return getattr(self, "_freq", None)
214
+
215
+ class TimedeltaIndex(IndexBase):
216
+ _name = AnyField("name")
217
+ _data = NDArrayField("data")
218
+ _unit = AnyField("unit")
219
+ _freq = AnyField("freq")
220
+ _start = AnyField("start")
221
+ _periods = AnyField("periods")
222
+ _end = AnyField("end")
223
+ _closed = AnyField("closed")
224
+
225
+ @property
226
+ def inferred_type(self):
227
+ return "timedelta64"
228
+
229
+ class PeriodIndex(IndexBase):
230
+ _name = AnyField("name")
231
+ _data = NDArrayField("data")
232
+ _freq = AnyField("freq")
233
+ _start = AnyField("start")
234
+ _periods = AnyField("periods")
235
+ _end = AnyField("end")
236
+ _year = AnyField("year")
237
+ _month = AnyField("month")
238
+ _quarter = AnyField("quarter")
239
+ _day = AnyField("day")
240
+ _hour = AnyField("hour")
241
+ _minute = AnyField("minute")
242
+ _second = AnyField("second")
243
+ _tz = AnyField("tz")
244
+ _dtype = DataTypeField("dtype")
245
+
246
+ @property
247
+ def inferred_type(self):
248
+ return "period"
249
+
250
+ class Int64Index(IndexBase):
251
+ _pd_initializer = pd.Index
252
+
253
+ _name = AnyField("name")
254
+ _data = NDArrayField("data")
255
+ _dtype = DataTypeField("dtype")
256
+
257
+ @property
258
+ def dtype(self):
259
+ return getattr(self, "_dtype", None)
260
+
261
+ @property
262
+ def inferred_type(self):
263
+ return "integer"
264
+
265
+ class UInt64Index(IndexBase):
266
+ _pd_initializer = pd.Index
267
+
268
+ _name = AnyField("name")
269
+ _data = NDArrayField("data")
270
+ _dtype = DataTypeField("dtype")
271
+
272
+ @property
273
+ def dtype(self):
274
+ return getattr(self, "_dtype", None)
275
+
276
+ @property
277
+ def inferred_type(self):
278
+ return "integer"
279
+
280
+ class Float64Index(IndexBase):
281
+ _pd_initializer = pd.Index
282
+
283
+ _name = AnyField("name")
284
+ _data = NDArrayField("data")
285
+ _dtype = DataTypeField("dtype")
286
+
287
+ @property
288
+ def dtype(self):
289
+ return getattr(self, "_dtype", None)
290
+
291
+ @property
292
+ def inferred_type(self):
293
+ return "floating"
294
+
295
+ class MultiIndex(IndexBase):
296
+ _names = ListField("names", on_serialize=list)
297
+ _dtypes = ListField("dtypes", on_serialize=list)
298
+ _data = NDArrayField("data")
299
+ _sortorder = Int32Field("sortorder")
300
+
301
+ @property
302
+ def inferred_type(self):
303
+ return "mixed"
304
+
305
+ @property
306
+ def names(self) -> list:
307
+ return self._names
308
+
309
+ @property
310
+ def dtypes(self) -> pd.Series:
311
+ return pd.Series(self._dtypes, index=self._names)
312
+
313
+ def to_pandas(self):
314
+ data = getattr(self, "_data", None)
315
+ sortorder = getattr(self, "_sortorder", None)
316
+
317
+ def _build_empty_array(dtype):
318
+ try:
319
+ return np.array([], dtype=dtype)
320
+ except TypeError: # pragma: no cover
321
+ return pd.array([], dtype=dtype)
322
+
323
+ if data is None:
324
+ return pd.MultiIndex.from_arrays(
325
+ [_build_empty_array(dtype) for dtype in self._dtypes],
326
+ sortorder=sortorder,
327
+ names=self._names,
328
+ )
329
+ return pd.MultiIndex.from_tuples(
330
+ [tuple(d) for d in data], sortorder=sortorder, names=self._names
331
+ )
332
+
333
+ _index_value = OneOfField(
334
+ "index_value",
335
+ index=Index,
336
+ range_index=RangeIndex,
337
+ categorical_index=CategoricalIndex,
338
+ interval_index=IntervalIndex,
339
+ datetime_index=DatetimeIndex,
340
+ timedelta_index=TimedeltaIndex,
341
+ period_index=PeriodIndex,
342
+ int64_index=Int64Index,
343
+ uint64_index=UInt64Index,
344
+ float64_index=Float64Index,
345
+ multi_index=MultiIndex,
346
+ )
347
+
348
+ def __maxframe_tokenize__(self):
349
+ # return object for tokenize
350
+ v = self._index_value
351
+ return v._key
352
+
353
+ @property
354
+ def value(self):
355
+ return self._index_value
356
+
357
+ @property
358
+ def key(self):
359
+ return self._index_value.key
360
+
361
+ @property
362
+ def is_monotonic_increasing(self):
363
+ return self._index_value.is_monotonic_increasing
364
+
365
+ @property
366
+ def is_monotonic_decreasing(self):
367
+ return self._index_value.is_monotonic_decreasing
368
+
369
+ @property
370
+ def is_monotonic_increasing_or_decreasing(self):
371
+ return self.is_monotonic_increasing or self.is_monotonic_decreasing
372
+
373
+ @property
374
+ def is_unique(self):
375
+ return self._index_value.is_unique
376
+
377
+ @property
378
+ def min_val(self):
379
+ return self._index_value.min_val
380
+
381
+ @property
382
+ def min_val_close(self):
383
+ return self._index_value.min_val_close
384
+
385
+ @property
386
+ def max_val(self):
387
+ return self._index_value.max_val
388
+
389
+ @property
390
+ def max_val_close(self):
391
+ return self._index_value.max_val_close
392
+
393
+ @property
394
+ def min_max(self):
395
+ return (
396
+ self._index_value.min_val,
397
+ self._index_value.min_val_close,
398
+ self._index_value.max_val,
399
+ self._index_value.max_val_close,
400
+ )
401
+
402
+ @property
403
+ def name(self):
404
+ return getattr(self._index_value, "_name", None)
405
+
406
+ @property
407
+ def names(self):
408
+ return getattr(self._index_value, "_names", [self.name])
409
+
410
+ @property
411
+ def inferred_type(self):
412
+ return self._index_value.inferred_type
413
+
414
+ def has_value(self):
415
+ if isinstance(self._index_value, self.RangeIndex):
416
+ if np.isnan(self._index_value.max_val):
417
+ return False
418
+ else:
419
+ return True
420
+ elif getattr(self._index_value, "_data", None) is not None:
421
+ return True
422
+ return False
423
+
424
+ def to_pandas(self):
425
+ return self._index_value.to_pandas()
426
+
427
+
428
+ class DtypesValue(Serializable):
429
+ """
430
+ Meta class for dtypes.
431
+ """
432
+
433
+ __slots__ = ()
434
+
435
+ _key = StringField("key")
436
+ _value = SeriesField("value")
437
+
438
+ def __init__(self, key=None, value=None, **kw):
439
+ super().__init__(_key=key, _value=value, **kw)
440
+ if self._key is None:
441
+ self._key = tokenize(self._value)
442
+
443
+ @property
444
+ def key(self):
445
+ return self._key
446
+
447
+ @property
448
+ def value(self):
449
+ return self._value
450
+
451
+
452
+ def refresh_index_value(tileable: ENTITY_TYPE):
453
+ index_to_index_values = dict()
454
+ for chunk in tileable.chunks:
455
+ if chunk.ndim == 1 or chunk.index[1] == 0:
456
+ index_to_index_values[chunk.index] = chunk.index_value
457
+ index_value = merge_index_value(index_to_index_values, store_data=False)
458
+ # keep key as original index_value's
459
+ index_value._index_value._key = tileable.index_value.key
460
+ tileable._index_value = index_value
461
+
462
+
463
+ def refresh_dtypes(tileable: ENTITY_TYPE):
464
+ all_dtypes = [c.dtypes_value.value for c in tileable.chunks if c.index[0] == 0]
465
+ dtypes = pd.concat(all_dtypes)
466
+ tileable._dtypes = dtypes
467
+ columns_values = parse_index(dtypes.index, store_data=True)
468
+ tileable._columns_value = columns_values
469
+ tileable._dtypes_value = DtypesValue(key=tokenize(dtypes), value=dtypes)
470
+
471
+
472
+ _tileable_key_property = "_tileable_key"
473
+ _tileable_dtypes_property = "_tileable_dtypes"
474
+ _tileable_index_value_property = "_tileable_index_value"
475
+ _tileable_columns_value_property = "_tileable_columns_value"
476
+ _nsplits_property = "_tileable_nsplits"
477
+ _lazy_chunk_meta_properties = (
478
+ _tileable_key_property,
479
+ _tileable_dtypes_property,
480
+ _tileable_index_value_property,
481
+ _tileable_columns_value_property,
482
+ _nsplits_property,
483
+ )
484
+
485
+
486
+ def _calc_cum_nsplit(nsplit: Tuple[int]) -> List[int]:
487
+ return [0] + np.cumsum(nsplit).tolist()
488
+
489
+
490
+ def calc_cum_nsplits(nsplits: Tuple[Tuple[int]]) -> List[List[int]]:
491
+ return tuple(_calc_cum_nsplit(nsplit) for nsplit in nsplits)
492
+
493
+
494
+ @functools.lru_cache(maxsize=128)
495
+ def _get_cum_nsplit(nsplit: Tuple[int]) -> List[int]:
496
+ return _calc_cum_nsplit(nsplit)
497
+
498
+
499
+ def _calc_axis_slice(nsplit: Tuple[int], index: int) -> slice:
500
+ if not isinstance(nsplit, tuple):
501
+ nsplit = tuple(nsplit)
502
+ cum_nsplit = _get_cum_nsplit(nsplit)
503
+ return slice(cum_nsplit[index], cum_nsplit[index + 1])
504
+
505
+
506
+ def _on_deserialize_index_value(index_value):
507
+ if index_value is None:
508
+ return
509
+ try:
510
+ getattr(index_value, "value")
511
+ return index_value
512
+ except AttributeError:
513
+ return
514
+
515
+
516
+ class _ToPandasMixin(_ExecuteAndFetchMixin):
517
+ __slots__ = ()
518
+
519
+ def to_pandas(self, session=None, **kw):
520
+ return self._execute_and_fetch(session=session, **kw)
521
+
522
+
523
+ class _BatchedFetcher:
524
+ __slots__ = ()
525
+
526
+ def _iter(self, batch_size=None, session=None, **kw):
527
+ from .indexing.iloc import iloc
528
+
529
+ if batch_size is not None:
530
+ size = self.shape[0]
531
+ n_batch = ceildiv(size, batch_size)
532
+
533
+ if n_batch > 1:
534
+ for i in range(n_batch):
535
+ batch_data = iloc(self)[batch_size * i : batch_size * (i + 1)]
536
+ yield batch_data._fetch(session=session, **kw)
537
+ else:
538
+ yield self._fetch(session=session, **kw)
539
+ else:
540
+ # if batch_size is not specified, use first batch to estimate
541
+ # batch_size.
542
+ default_batch_bytes = 50 * 1024**2
543
+ first_batch = 1000
544
+ size = self.shape[0]
545
+
546
+ if size >= first_batch:
547
+ batch_data = iloc(self)[:first_batch]
548
+ first_batch_data = batch_data._fetch(session=session, **kw)
549
+ yield first_batch_data
550
+ data_size = estimate_pandas_size(first_batch_data)
551
+ batch_size = int(default_batch_bytes / data_size * first_batch)
552
+ n_batch = ceildiv(size - 1000, batch_size)
553
+ for i in range(n_batch):
554
+ batch_data = iloc(self)[
555
+ first_batch
556
+ + batch_size * i : first_batch
557
+ + batch_size * (i + 1)
558
+ ]
559
+ yield batch_data._fetch(session=session, **kw)
560
+ else:
561
+ yield self._fetch(session=session, **kw)
562
+
563
+ def iterbatch(self, batch_size=None, session=None, **kw):
564
+ # stop triggering execution under build mode
565
+ if is_build_mode():
566
+ raise ValueError("Cannot fetch data under build mode")
567
+
568
+ # trigger execution
569
+ self.execute(session=session, **kw)
570
+ return self._iter(batch_size=batch_size, session=session)
571
+
572
+ def fetch(self, session=None, **kw):
573
+ from .indexing.iloc import DataFrameIlocGetItem, SeriesIlocGetItem
574
+
575
+ batch_size = kw.pop("batch_size", None)
576
+ if isinstance(self.op, (DataFrameIlocGetItem, SeriesIlocGetItem)):
577
+ # see GH#1871
578
+ # already iloc, do not trigger batch fetch
579
+ return self._fetch(session=session, **kw)
580
+ else:
581
+ batches = list(self._iter(batch_size=batch_size, session=session, **kw))
582
+ return pd.concat(batches) if len(batches) > 1 else batches[0]
583
+
584
+ def fetch_infos(self, fields=None, session=None, **kw):
585
+ return self._fetch_infos(fields=fields, session=session, **kw)
586
+
587
+
588
+ class IndexData(HasShapeTileableData, _ToPandasMixin):
589
+ __slots__ = ()
590
+ type_name = "Index"
591
+
592
+ # optional field
593
+ _dtype = DataTypeField("dtype")
594
+ _name = AnyField("name")
595
+ _names = AnyField("names")
596
+ _index_value = ReferenceField(
597
+ "index_value", IndexValue, on_deserialize=_on_deserialize_index_value
598
+ )
599
+
600
+ def __init__(
601
+ self,
602
+ op=None,
603
+ shape=None,
604
+ nsplits=None,
605
+ dtype=None,
606
+ name=None,
607
+ names=None,
608
+ index_value=None,
609
+ **kw,
610
+ ):
611
+ super().__init__(
612
+ _op=op,
613
+ _shape=shape,
614
+ _nsplits=nsplits,
615
+ _dtype=dtype,
616
+ _name=name,
617
+ _names=names,
618
+ _index_value=index_value,
619
+ **kw,
620
+ )
621
+
622
+ @property
623
+ def params(self) -> Dict[str, Any]:
624
+ # params return the properties which useful to rebuild a new tileable object
625
+ return {
626
+ "shape": self.shape,
627
+ "dtype": self.dtype,
628
+ "name": self.name,
629
+ "index_value": self.index_value,
630
+ }
631
+
632
+ @params.setter
633
+ def params(self, new_params: Dict[str, Any]):
634
+ params = new_params.copy()
635
+ new_shape = params.pop("shape", None)
636
+ if new_shape is not None:
637
+ self._shape = new_shape
638
+ dtype = params.pop("dtype", None)
639
+ if dtype is not None:
640
+ self._dtype = dtype
641
+ index_value = params.pop("index_value", None)
642
+ if index_value is not None:
643
+ self._index_value = index_value
644
+ name = params.pop("name", None)
645
+ if name is not None:
646
+ self._name = name
647
+ if params: # pragma: no cover
648
+ raise TypeError(f"Unknown params: {list(params)}")
649
+
650
+ def refresh_params(self):
651
+ # refresh params when chunks updated
652
+ refresh_tileable_shape(self)
653
+ fill_chunk_slices(self)
654
+ # refresh_index_value(self)
655
+ # if self._dtype is None:
656
+ # self._dtype = self.chunks[0].dtype
657
+ # if self._name is None:
658
+ # self._name = self.chunks[0].name
659
+
660
+ def refresh_from_table_meta(self, table_meta: DataFrameTableMeta) -> None:
661
+ pass
662
+
663
+ def _to_str(self, representation=False):
664
+ if is_build_mode() or len(self._executed_sessions) == 0:
665
+ # in build mode, or not executed, just return representation
666
+ if representation:
667
+ return f"Index <op={type(self._op).__name__}, key={self.key}"
668
+ else:
669
+ return f"Index(op={type(self._op).__name__})"
670
+ else:
671
+ data = self.fetch(session=self._executed_sessions[-1])
672
+ return repr(data) if repr(data) else str(data)
673
+
674
+ def __str__(self):
675
+ return self._to_str(representation=False)
676
+
677
+ def __repr__(self):
678
+ return self._to_str(representation=True)
679
+
680
+ def _to_maxframe_tensor(self, dtype=None, order="K", extract_multi_index=False):
681
+ tensor = self.to_tensor(extract_multi_index=extract_multi_index)
682
+ dtype = dtype if dtype is not None else tensor.dtype
683
+ return tensor.astype(dtype=dtype, order=order, copy=False)
684
+
685
+ def __maxframe_tensor__(self, dtype=None, order="K"):
686
+ return self._to_maxframe_tensor(dtype=dtype, order=order)
687
+
688
+ @property
689
+ def dtype(self):
690
+ return getattr(self, "_dtype", None) or self.op.dtype
691
+
692
+ @property
693
+ def name(self):
694
+ return self._name
695
+
696
+ @property
697
+ def names(self):
698
+ return getattr(self, "_names", None) or [self.name]
699
+
700
+ @property
701
+ def nlevels(self) -> int:
702
+ return len(self.names)
703
+
704
+ @property
705
+ def index_value(self) -> IndexValue:
706
+ return self._index_value
707
+
708
+ @property
709
+ def inferred_type(self):
710
+ return self._index_value.inferred_type
711
+
712
+ def to_tensor(self, dtype=None, extract_multi_index=False):
713
+ from ..tensor.datasource.from_dataframe import from_index
714
+
715
+ return from_index(self, dtype=dtype, extract_multi_index=extract_multi_index)
716
+
717
+ def to_frame(self, index: bool = True, name=None):
718
+ """
719
+ Create a DataFrame with a column containing the Index.
720
+
721
+ Parameters
722
+ ----------
723
+ index : bool, default True
724
+ Set the index of the returned DataFrame as the original Index.
725
+
726
+ name : object, default None
727
+ The passed name should substitute for the index name (if it has
728
+ one).
729
+
730
+ Returns
731
+ -------
732
+ DataFrame
733
+ DataFrame containing the original Index data.
734
+
735
+ See Also
736
+ --------
737
+ Index.to_series : Convert an Index to a Series.
738
+ Series.to_frame : Convert Series to DataFrame.
739
+
740
+ Examples
741
+ --------
742
+ >>> import maxframe.dataframe as md
743
+ >>> idx = md.Index(['Ant', 'Bear', 'Cow'], name='animal')
744
+ >>> idx.to_frame().execute()
745
+ animal
746
+ animal
747
+ Ant Ant
748
+ Bear Bear
749
+ Cow Cow
750
+
751
+ By default, the original Index is reused. To enforce a new Index:
752
+
753
+ >>> idx.to_frame(index=False).execute()
754
+ animal
755
+ 0 Ant
756
+ 1 Bear
757
+ 2 Cow
758
+
759
+ To override the name of the resulting column, specify `name`:
760
+
761
+ >>> idx.to_frame(index=False, name='zoo').execute()
762
+ zoo
763
+ 0 Ant
764
+ 1 Bear
765
+ 2 Cow
766
+ """
767
+ from . import dataframe_from_tensor
768
+
769
+ if isinstance(self.index_value.value, IndexValue.MultiIndex):
770
+ old_names = self.index_value.value.names
771
+
772
+ if (
773
+ name is not None
774
+ and not isinstance(name, Iterable)
775
+ or isinstance(name, str)
776
+ ):
777
+ raise TypeError("'name' must be a list / sequence of column names.")
778
+
779
+ name = list(name if name is not None else old_names)
780
+ if len(name) != len(old_names):
781
+ raise ValueError(
782
+ "'name' should have same length as number of levels on index."
783
+ )
784
+
785
+ columns = [
786
+ old or new or idx for idx, (old, new) in enumerate(zip(old_names, name))
787
+ ]
788
+ else:
789
+ columns = [name or self.name or 0]
790
+ index_ = self if index else None
791
+ return dataframe_from_tensor(
792
+ self._to_maxframe_tensor(self, extract_multi_index=True),
793
+ index=index_,
794
+ columns=columns,
795
+ )
796
+
797
+ def to_series(self, index=None, name=None):
798
+ """
799
+ Create a Series with both index and values equal to the index keys.
800
+
801
+ Useful with map for returning an indexer based on an index.
802
+
803
+ Parameters
804
+ ----------
805
+ index : Index, optional
806
+ Index of resulting Series. If None, defaults to original index.
807
+ name : str, optional
808
+ Dame of resulting Series. If None, defaults to name of original
809
+ index.
810
+
811
+ Returns
812
+ -------
813
+ Series
814
+ The dtype will be based on the type of the Index values.
815
+ """
816
+ from . import series_from_index
817
+
818
+ return series_from_index(self, index=index, name=name)
819
+
820
+
821
+ class Index(HasShapeTileable, _ToPandasMixin):
822
+ __slots__ = "_df_or_series", "_parent_key", "_axis"
823
+ _allow_data_type_ = (IndexData,)
824
+ type_name = "Index"
825
+
826
+ def __new__(cls, data: Union[pd.Index, IndexData] = None, **_):
827
+ if data is not None and not isinstance(data, pd.Index):
828
+ # create corresponding Index class
829
+ # according to type of index_value
830
+ clz = globals()[type(data.index_value.value).__name__]
831
+ else:
832
+ clz = cls
833
+ return object.__new__(clz)
834
+
835
+ def __len__(self):
836
+ return len(self._data)
837
+
838
+ def __class_getitem__(cls, item):
839
+ return IndexType.from_getitem_args(item)
840
+
841
+ def __maxframe_tensor__(self, dtype=None, order="K"):
842
+ return self._data.__maxframe_tensor__(dtype=dtype, order=order)
843
+
844
+ def _get_df_or_series(self):
845
+ obj = getattr(self, "_df_or_series", None)
846
+ if obj is not None:
847
+ return obj()
848
+ return None
849
+
850
+ def _set_df_or_series(self, df_or_series, axis):
851
+ self._df_or_series = weakref.ref(df_or_series)
852
+ self._parent_key = df_or_series.key
853
+ self._axis = axis
854
+
855
+ @property
856
+ def T(self):
857
+ """Return the transpose, which is by definition self."""
858
+ return self
859
+
860
+ @property
861
+ def name(self):
862
+ return self._data.name
863
+
864
+ @name.setter
865
+ def name(self, value):
866
+ df_or_series = self._get_df_or_series()
867
+ if df_or_series is not None and df_or_series.key == self._parent_key:
868
+ df_or_series.rename_axis(value, axis=self._axis, inplace=True)
869
+ self.data = df_or_series.axes[self._axis].data
870
+ else:
871
+ self.rename(value, inplace=True)
872
+
873
+ @property
874
+ def names(self):
875
+ return self._data.names
876
+
877
+ @names.setter
878
+ def names(self, value):
879
+ df_or_series = self._get_df_or_series()
880
+ if df_or_series is not None:
881
+ df_or_series.rename_axis(value, axis=self._axis, inplace=True)
882
+ self.data = df_or_series.axes[self._axis].data
883
+ else:
884
+ self.rename(value, inplace=True)
885
+
886
+ @property
887
+ def values(self):
888
+ return self.to_tensor()
889
+
890
+
891
+ class RangeIndex(Index):
892
+ __slots__ = ()
893
+
894
+
895
+ class CategoricalIndex(Index):
896
+ __slots__ = ()
897
+
898
+
899
+ class IntervalIndex(Index):
900
+ __slots__ = ()
901
+
902
+
903
+ class DatetimeIndex(Index):
904
+ __slots__ = ()
905
+
906
+
907
+ class TimedeltaIndex(Index):
908
+ __slots__ = ()
909
+
910
+
911
+ class PeriodIndex(Index):
912
+ __slots__ = ()
913
+
914
+
915
+ class Int64Index(Index):
916
+ __slots__ = ()
917
+
918
+
919
+ class UInt64Index(Index):
920
+ __slots__ = ()
921
+
922
+
923
+ class Float64Index(Index):
924
+ __slots__ = ()
925
+
926
+
927
+ class MultiIndex(Index):
928
+ __slots__ = ()
929
+
930
+
931
+ class BaseSeriesData(HasShapeTileableData, _ToPandasMixin):
932
+ __slots__ = "_cache", "_accessors"
933
+
934
+ # optional field
935
+ _dtype = DataTypeField("dtype")
936
+ _name = AnyField("name")
937
+ _index_value = ReferenceField(
938
+ "index_value", IndexValue, on_deserialize=_on_deserialize_index_value
939
+ )
940
+
941
+ def __init__(
942
+ self,
943
+ op=None,
944
+ shape=None,
945
+ nsplits=None,
946
+ dtype=None,
947
+ name=None,
948
+ index_value=None,
949
+ **kw,
950
+ ):
951
+ super().__init__(
952
+ _op=op,
953
+ _shape=shape,
954
+ _nsplits=nsplits,
955
+ _dtype=dtype,
956
+ _name=name,
957
+ _index_value=index_value,
958
+ **kw,
959
+ )
960
+ self._accessors = dict()
961
+
962
+ def _get_params(self) -> Dict[str, Any]:
963
+ # params return the properties which useful to rebuild a new tileable object
964
+ return {
965
+ "shape": self.shape,
966
+ "dtype": self.dtype,
967
+ "name": self.name,
968
+ "index_value": self.index_value,
969
+ }
970
+
971
+ def _set_params(self, new_params: Dict[str, Any]):
972
+ params = new_params.copy()
973
+ new_shape = params.pop("shape", None)
974
+ if new_shape is not None:
975
+ self._shape = new_shape
976
+ dtype = params.pop("dtype", None)
977
+ if dtype is not None:
978
+ self._dtype = dtype
979
+ index_value = params.pop("index_value", None)
980
+ if index_value is not None:
981
+ self._index_value = index_value
982
+ name = params.pop("name", None)
983
+ if name is not None:
984
+ self._name = name
985
+ if params: # pragma: no cover
986
+ raise TypeError(f"Unknown params: {list(params)}")
987
+
988
+ params = property(_get_params, _set_params)
989
+
990
+ def refresh_params(self):
991
+ # refresh params when chunks updated
992
+ refresh_tileable_shape(self)
993
+ fill_chunk_slices(self)
994
+ # refresh_index_value(self)
995
+ if self._dtype is None:
996
+ self._dtype = getattr(self.chunks[0], "dtype", None)
997
+ # if self._name is None:
998
+ # self._name = self.chunks[0].name
999
+
1000
+ def refresh_from_table_meta(self, table_meta: DataFrameTableMeta) -> None:
1001
+ pass
1002
+
1003
+ def _to_str(self, representation=False):
1004
+ if is_build_mode() or len(self._executed_sessions) == 0:
1005
+ # in build mode, or not executed, just return representation
1006
+ if representation:
1007
+ return (
1008
+ f"{self.type_name} <op={type(self._op).__name__}, key={self.key}>"
1009
+ )
1010
+ else:
1011
+ return f"{self.type_name}(op={type(self._op).__name__})"
1012
+ else:
1013
+ corner_data = fetch_corner_data(self, session=self._executed_sessions[-1])
1014
+
1015
+ buf = StringIO()
1016
+ max_rows = pd.get_option("display.max_rows")
1017
+ corner_max_rows = (
1018
+ max_rows
1019
+ if self.shape[0] <= max_rows or corner_data.shape[0] == 0
1020
+ else corner_data.shape[0] - 1
1021
+ ) # make sure max_rows < corner_data
1022
+
1023
+ with pd.option_context("display.max_rows", corner_max_rows):
1024
+ if self.shape[0] <= max_rows:
1025
+ corner_series = corner_data
1026
+ else:
1027
+ corner_series = ReprSeries(corner_data, self.shape)
1028
+ buf.write(repr(corner_series) if representation else str(corner_series))
1029
+
1030
+ return buf.getvalue()
1031
+
1032
+ def __str__(self):
1033
+ return self._to_str(representation=False)
1034
+
1035
+ def __repr__(self):
1036
+ return self._to_str(representation=True)
1037
+
1038
+ @property
1039
+ def dtype(self):
1040
+ return getattr(self, "_dtype", None) or getattr(self.op, "dtype", None)
1041
+
1042
+ @property
1043
+ def name(self):
1044
+ return self._name
1045
+
1046
+ @property
1047
+ def index_value(self):
1048
+ return self._index_value
1049
+
1050
+ @property
1051
+ def index(self):
1052
+ from .datasource.index import from_tileable
1053
+
1054
+ return from_tileable(self)
1055
+
1056
+ @property
1057
+ def axes(self):
1058
+ return [self.index]
1059
+
1060
+ @property
1061
+ def empty(self):
1062
+ shape = getattr(self, "_shape")
1063
+ if np.any(np.isnan(shape)):
1064
+ raise ValueError("Tileable object must be executed first")
1065
+ return shape == (0,)
1066
+
1067
+ def to_tensor(self, dtype=None):
1068
+ from ..tensor.datasource.from_dataframe import from_series
1069
+
1070
+ return from_series(self, dtype=dtype)
1071
+
1072
+
1073
+ class SeriesData(_BatchedFetcher, BaseSeriesData):
1074
+ type_name = "Series"
1075
+
1076
+ def __maxframe_tensor__(self, dtype=None, order="K"):
1077
+ tensor = self.to_tensor()
1078
+ dtype = dtype if dtype is not None else tensor.dtype
1079
+ return tensor.astype(dtype=dtype, order=order, copy=False)
1080
+
1081
+ def iteritems(self, batch_size=10000, session=None):
1082
+ method_name = "iteritems" if _df_with_iteritems else "items"
1083
+ for batch_data in self.iterbatch(batch_size=batch_size, session=session):
1084
+ yield from getattr(batch_data, method_name)()
1085
+
1086
+ items = iteritems
1087
+
1088
+ def to_dict(self, into=dict, batch_size=10000, session=None):
1089
+ fetch_kwargs = dict(batch_size=batch_size)
1090
+ return self.to_pandas(session=session, fetch_kwargs=fetch_kwargs).to_dict(
1091
+ into=into
1092
+ )
1093
+
1094
+ def to_frame(self, name=None):
1095
+ from . import dataframe_from_tensor
1096
+
1097
+ name = name or self.name or 0
1098
+ return dataframe_from_tensor(self, columns=[name])
1099
+
1100
+ @property
1101
+ def hasnans(self):
1102
+ """
1103
+ Return True if there are any NaNs.
1104
+
1105
+ Returns
1106
+ -------
1107
+ bool
1108
+
1109
+ Examples
1110
+ --------
1111
+ >>> import maxframe.dataframe as md
1112
+ >>> s = md.Series([1, 2, 3, None])
1113
+ >>> s.execute()
1114
+ 0 1.0
1115
+ 1 2.0
1116
+ 2 3.0
1117
+ 3 NaN
1118
+ dtype: float64
1119
+ >>> s.hasnans.execute()
1120
+ True
1121
+ """
1122
+ return self.isna().any()
1123
+
1124
+
1125
+ class Series(HasShapeTileable, _ToPandasMixin):
1126
+ __slots__ = ("_cache",)
1127
+ _allow_data_type_ = (SeriesData,)
1128
+ type_name = "Series"
1129
+
1130
+ def __class_getitem__(cls, item):
1131
+ return SeriesType.from_getitem_args(item)
1132
+
1133
+ def to_tensor(self, dtype=None):
1134
+ return self._data.to_tensor(dtype=dtype)
1135
+
1136
+ def from_tensor(self, in_tensor, index=None, name=None):
1137
+ return self._data.from_tensor(in_tensor, index=index, name=name)
1138
+
1139
+ @property
1140
+ def T(self):
1141
+ """Return the transpose, which is by definition self."""
1142
+ return self
1143
+
1144
+ @property
1145
+ def ndim(self):
1146
+ """
1147
+ Return an int representing the number of axes / array dimensions.
1148
+
1149
+ Return 1 if Series. Otherwise return 2 if DataFrame.
1150
+
1151
+ See Also
1152
+ --------
1153
+ ndarray.ndim : Number of array dimensions.
1154
+
1155
+ Examples
1156
+ --------
1157
+ >>> import maxframe.dataframe as md
1158
+ >>> s = md.Series({'a': 1, 'b': 2, 'c': 3})
1159
+ >>> s.ndim
1160
+ 1
1161
+
1162
+ >>> df = md.DataFrame({'col1': [1, 2], 'col2': [3, 4]})
1163
+ >>> df.ndim
1164
+ 2
1165
+ """
1166
+ return super().ndim
1167
+
1168
+ @property
1169
+ def index(self):
1170
+ """
1171
+ The index (axis labels) of the Series.
1172
+ """
1173
+ idx = self._data.index
1174
+ idx._set_df_or_series(self, 0)
1175
+ return idx
1176
+
1177
+ @index.setter
1178
+ def index(self, new_index):
1179
+ self.set_axis(new_index, axis=0, inplace=True)
1180
+
1181
+ @property
1182
+ def name(self):
1183
+ return self._data.name
1184
+
1185
+ @name.setter
1186
+ def name(self, val):
1187
+ from .indexing.rename import DataFrameRename
1188
+
1189
+ op = DataFrameRename(new_name=val, output_types=[OutputType.series])
1190
+ new_series = op(self)
1191
+ self.data = new_series.data
1192
+
1193
+ @property
1194
+ def dtype(self):
1195
+ """
1196
+ Return the dtype object of the underlying data.
1197
+ """
1198
+ return self._data.dtype
1199
+
1200
+ def copy(self, deep=True): # pylint: disable=arguments-differ
1201
+ """
1202
+ Make a copy of this object's indices and data.
1203
+
1204
+ When ``deep=True`` (default), a new object will be created with a
1205
+ copy of the calling object's data and indices. Modifications to
1206
+ the data or indices of the copy will not be reflected in the
1207
+ original object (see notes below).
1208
+
1209
+ When ``deep=False``, a new object will be created without copying
1210
+ the calling object's data or index (only references to the data
1211
+ and index are copied). Any changes to the data of the original
1212
+ will be reflected in the shallow copy (and vice versa).
1213
+
1214
+ Parameters
1215
+ ----------
1216
+ deep : bool, default True
1217
+ Make a deep copy, including a copy of the data and the indices.
1218
+ With ``deep=False`` neither the indices nor the data are copied.
1219
+
1220
+ Returns
1221
+ -------
1222
+ copy : Series or DataFrame
1223
+ Object type matches caller.
1224
+ """
1225
+ if deep:
1226
+ return super().copy()
1227
+ else:
1228
+ return super()._view()
1229
+
1230
+ def __iter__(self):
1231
+ # prevent being called by pandas to make sure `__eq__` works
1232
+ prevent_called_from_pandas()
1233
+ return (tp[1] for tp in self.items())
1234
+
1235
+ def __len__(self):
1236
+ return len(self._data)
1237
+
1238
+ def __maxframe_tensor__(self, dtype=None, order="K"):
1239
+ return self._data.__maxframe_tensor__(dtype=dtype, order=order)
1240
+
1241
+ def keys(self):
1242
+ """
1243
+ Return alias for index.
1244
+
1245
+ Returns
1246
+ -------
1247
+ Index
1248
+ Index of the Series.
1249
+ """
1250
+ return self.index
1251
+
1252
+ @property
1253
+ def values(self):
1254
+ return self.to_tensor()
1255
+
1256
+ def iteritems(self, batch_size=10000, session=None):
1257
+ """
1258
+ Lazily iterate over (index, value) tuples.
1259
+
1260
+ This method returns an iterable tuple (index, value). This is
1261
+ convenient if you want to create a lazy iterator.
1262
+
1263
+ Returns
1264
+ -------
1265
+ iterable
1266
+ Iterable of tuples containing the (index, value) pairs from a
1267
+ Series.
1268
+
1269
+ See Also
1270
+ --------
1271
+ DataFrame.items : Iterate over (column name, Series) pairs.
1272
+ DataFrame.iterrows : Iterate over DataFrame rows as (index, Series) pairs.
1273
+
1274
+ Examples
1275
+ --------
1276
+ >>> import maxframe.dataframe as md
1277
+ >>> s = md.Series(['A', 'B', 'C'])
1278
+ >>> for index, value in s.items():
1279
+ ... print(f"Index : {index}, Value : {value}")
1280
+ Index : 0, Value : A
1281
+ Index : 1, Value : B
1282
+ Index : 2, Value : C
1283
+ """
1284
+ return self._data.iteritems(batch_size=batch_size, session=session)
1285
+
1286
+ items = iteritems
1287
+
1288
+ def to_dict(self, into=dict, batch_size=10000, session=None):
1289
+ """
1290
+ Convert Series to {label -> value} dict or dict-like object.
1291
+
1292
+ Parameters
1293
+ ----------
1294
+ into : class, default dict
1295
+ The collections.abc.Mapping subclass to use as the return
1296
+ object. Can be the actual class or an empty
1297
+ instance of the mapping type you want. If you want a
1298
+ collections.defaultdict, you must pass it initialized.
1299
+
1300
+ Returns
1301
+ -------
1302
+ collections.abc.Mapping
1303
+ Key-value representation of Series.
1304
+
1305
+ Examples
1306
+ --------
1307
+ >>> import maxframe.dataframe as md
1308
+ >>> s = md.Series([1, 2, 3, 4])
1309
+ >>> s.to_dict()
1310
+ {0: 1, 1: 2, 2: 3, 3: 4}
1311
+ >>> from collections import OrderedDict, defaultdict
1312
+ >>> s.to_dict(OrderedDict)
1313
+ OrderedDict([(0, 1), (1, 2), (2, 3), (3, 4)])
1314
+ >>> dd = defaultdict(list)
1315
+ >>> s.to_dict(dd)
1316
+ defaultdict(<class 'list'>, {0: 1, 1: 2, 2: 3, 3: 4})
1317
+ """
1318
+ return self._data.to_dict(into=into, batch_size=batch_size, session=session)
1319
+
1320
+ def to_frame(self, name=None):
1321
+ """
1322
+ Convert Series to DataFrame.
1323
+
1324
+ Parameters
1325
+ ----------
1326
+ name : object, default None
1327
+ The passed name should substitute for the series name (if it has
1328
+ one).
1329
+
1330
+ Returns
1331
+ -------
1332
+ DataFrame
1333
+ DataFrame representation of Series.
1334
+
1335
+ Examples
1336
+ --------
1337
+ >>> import maxframe.dataframe as md
1338
+ >>> s = md.Series(["a", "b", "c"], name="vals")
1339
+ >>> s.to_frame().execute()
1340
+ vals
1341
+ 0 a
1342
+ 1 b
1343
+ 2 c
1344
+ """
1345
+ return self._data.to_frame(name=name)
1346
+
1347
+ # def median(
1348
+ # self, axis=None, skipna=True, out=None, overwrite_input=False, keepdims=False
1349
+ # ):
1350
+ # """
1351
+ # Return the median of the values over the requested axis.
1352
+ #
1353
+ # Parameters
1354
+ # ----------
1355
+ # axis : {index (0)}
1356
+ # Axis or axes along which the medians are computed. The default
1357
+ # is to compute the median along a flattened version of the tensor.
1358
+ # A sequence of axes is supported since version 1.9.0.
1359
+ # skipna : bool, optional, default True
1360
+ # Exclude NA/null values when computing the result.
1361
+ # out : Tensor, default None
1362
+ # Output tensor in which to place the result. It must
1363
+ # have the same shape and buffer length as the expected output,
1364
+ # but the type (of the output) will be cast if necessary.
1365
+ # overwrite_input : bool, default False
1366
+ # Just for compatibility with Numpy, would not take effect.
1367
+ # keepdims : bool, default False
1368
+ # If this is set to True, the axes which are reduced are left
1369
+ # in the result as dimensions with size one. With this option,
1370
+ # the result will broadcast correctly against the original `arr`.
1371
+ #
1372
+ # Returns
1373
+ # -------
1374
+ # median : scalar
1375
+ # Return the median of the values over the requested axis.
1376
+ #
1377
+ # See Also
1378
+ # --------
1379
+ # tensor.mean, tensor.percentile
1380
+ #
1381
+ # Notes
1382
+ # -----
1383
+ # Given a vector ``V`` of length ``N``, the median of ``V`` is the
1384
+ # middle value of a sorted copy of ``V``, ``V_sorted`` - i
1385
+ # e., ``V_sorted[(N-1)/2]``, when ``N`` is odd, and the average of the
1386
+ # two middle values of ``V_sorted`` when ``N`` is even.
1387
+ #
1388
+ # Examples
1389
+ # --------
1390
+ # >>> import maxframe.dataframe as md
1391
+ # >>> a = md.Series([10, 7, 4, 3, 2, 1])
1392
+ # >>> a.median().execute()
1393
+ # 2.0
1394
+ # >>> mt.median(a).execute()
1395
+ # 3.5
1396
+ # >>> a = md.Series([10, 7, 4, None, 2, 1])
1397
+ # >>> a.median().execute()
1398
+ # 4.0
1399
+ # >>> a.median(skipna=False).execute()
1400
+ # nan
1401
+ # """
1402
+ # if skipna:
1403
+ # return statistics.median(
1404
+ # self.dropna(),
1405
+ # axis=None,
1406
+ # out=None,
1407
+ # overwrite_input=False,
1408
+ # keepdims=False,
1409
+ # )
1410
+ # else:
1411
+ # return statistics.median(
1412
+ # self, axis=None, out=None, overwrite_input=False, keepdims=False
1413
+ # )
1414
+
1415
+
1416
+ class BaseDataFrameData(HasShapeTileableData, _ToPandasMixin):
1417
+ __slots__ = "_accessors", "_dtypes_value", "_dtypes_dict"
1418
+
1419
+ # optional fields
1420
+ _dtypes = SeriesField("dtypes")
1421
+ _index_value = ReferenceField(
1422
+ "index_value", IndexValue, on_deserialize=_on_deserialize_index_value
1423
+ )
1424
+ _columns_value = ReferenceField("columns_value", IndexValue)
1425
+
1426
+ def __init__(
1427
+ self,
1428
+ op=None,
1429
+ shape=None,
1430
+ nsplits=None,
1431
+ dtypes=None,
1432
+ index_value=None,
1433
+ columns_value=None,
1434
+ **kw,
1435
+ ):
1436
+ super().__init__(
1437
+ _op=op,
1438
+ _shape=shape,
1439
+ _nsplits=nsplits,
1440
+ _dtypes=dtypes,
1441
+ _index_value=index_value,
1442
+ _columns_value=columns_value,
1443
+ **kw,
1444
+ )
1445
+ self._accessors = dict()
1446
+ self._dtypes_value = None
1447
+ self._dtypes_dict = None
1448
+
1449
+ def __on_deserialize__(self):
1450
+ super().__on_deserialize__()
1451
+ self._accessors = dict()
1452
+ self._dtypes_value = None
1453
+ self._dtypes_dict = None
1454
+
1455
+ def _get_params(self) -> Dict[str, Any]:
1456
+ # params return the properties which useful to rebuild a new tileable object
1457
+ return {
1458
+ "shape": self.shape,
1459
+ "dtypes": self.dtypes,
1460
+ "index_value": self.index_value,
1461
+ "columns_value": getattr(self, "columns_value", None),
1462
+ "dtypes_value": getattr(self, "dtypes_value", None),
1463
+ }
1464
+
1465
+ def _set_params(self, new_params: Dict[str, Any]):
1466
+ params = new_params.copy()
1467
+ new_shape = params.pop("shape", None)
1468
+ if new_shape is not None:
1469
+ self._shape = new_shape
1470
+ index_value = params.pop("index_value", None)
1471
+ if index_value is not None:
1472
+ self._index_value = index_value
1473
+ dtypes = params.pop("dtypes", None)
1474
+ if dtypes is not None:
1475
+ self._dtypes = dtypes
1476
+ columns_value = params.pop("columns_value", None)
1477
+ if columns_value is not None:
1478
+ self._columns_value = columns_value
1479
+ dtypes_value = params.pop("dtypes_value", None)
1480
+ if dtypes_value is not None:
1481
+ if dtypes is None:
1482
+ self._dtypes = dtypes_value.value
1483
+ if columns_value is None:
1484
+ self._columns_value = parse_index(self._dtypes.index, store_data=True)
1485
+ self._dtypes_value = dtypes_value
1486
+ if params: # pragma: no cover
1487
+ raise TypeError(f"Unknown params: {list(params)}")
1488
+
1489
+ params = property(_get_params, _set_params)
1490
+
1491
+ def refresh_params(self):
1492
+ # refresh params when chunks updated
1493
+ refresh_tileable_shape(self)
1494
+ fill_chunk_slices(self)
1495
+ # refresh_index_value(self)
1496
+ # refresh_dtypes(self)
1497
+
1498
+ def refresh_from_dtypes(self, dtypes: pd.Series) -> None:
1499
+ self._dtypes = dtypes
1500
+ self._columns_value = parse_index(dtypes.index, store_data=True)
1501
+ self._dtypes_value = DtypesValue(key=tokenize(dtypes), value=dtypes)
1502
+ new_shape = list(self._shape)
1503
+ new_shape[-1] = len(dtypes)
1504
+ self._shape = tuple(new_shape)
1505
+
1506
+ def refresh_from_table_meta(self, table_meta: DataFrameTableMeta) -> None:
1507
+ self.refresh_from_dtypes(table_meta.pd_column_dtypes)
1508
+
1509
+ @property
1510
+ def dtypes(self):
1511
+ dt = getattr(self, "_dtypes", None)
1512
+ if dt is not None:
1513
+ return dt
1514
+ return getattr(self.op, "dtypes", None)
1515
+
1516
+ @property
1517
+ def dtypes_value(self):
1518
+ if self._dtypes_value is not None:
1519
+ return self._dtypes_value
1520
+ # TODO(qinxuye): when creating Dataframe,
1521
+ # dtypes_value instead of dtypes later must be passed into
1522
+ dtypes = self.dtypes
1523
+ if dtypes is not None:
1524
+ self._dtypes_value = DtypesValue(key=tokenize(dtypes), value=dtypes)
1525
+ return self._dtypes_value
1526
+
1527
+ @property
1528
+ def index_value(self):
1529
+ return self._index_value
1530
+
1531
+ @property
1532
+ def columns_value(self):
1533
+ return self._columns_value
1534
+
1535
+ @property
1536
+ def empty(self):
1537
+ shape = getattr(self, "_shape")
1538
+ if np.any(np.isnan(shape)):
1539
+ raise ValueError("Tileable object must be executed first")
1540
+ return 0 in shape
1541
+
1542
+ def to_tensor(self, dtype=None):
1543
+ from ..tensor.datasource.from_dataframe import from_dataframe
1544
+
1545
+ return from_dataframe(self, dtype=dtype)
1546
+
1547
+ @property
1548
+ def index(self):
1549
+ from .datasource.index import from_tileable
1550
+
1551
+ return from_tileable(self)
1552
+
1553
+ @property
1554
+ def columns(self):
1555
+ from .datasource.index import from_pandas as from_pandas_index
1556
+
1557
+ return from_pandas_index(self.dtypes.index, store_data=True)
1558
+
1559
+ @property
1560
+ def axes(self):
1561
+ return [self.index, self.columns]
1562
+
1563
+ def _get_dtypes_dict(self):
1564
+ if self._dtypes_dict is None:
1565
+ self._dtypes_dict = d = dict()
1566
+ for k, v in self.dtypes.items():
1567
+ try:
1568
+ obj_list = d[k]
1569
+ except KeyError:
1570
+ obj_list = d[k] = []
1571
+ obj_list.append(v)
1572
+ return self._dtypes_dict
1573
+
1574
+ def _get_dtypes_by_columns(self, columns: list):
1575
+ dtypes_dict = self._get_dtypes_dict()
1576
+ return functools.reduce(operator.add, (dtypes_dict[c] for c in columns), [])
1577
+
1578
+ def _get_columns_by_columns(self, columns: list):
1579
+ dtypes_dict = self._get_dtypes_dict()
1580
+ return functools.reduce(
1581
+ operator.add, ([c] * len(dtypes_dict[c]) for c in columns), []
1582
+ )
1583
+
1584
+
1585
+ class DataFrameData(_BatchedFetcher, BaseDataFrameData):
1586
+ type_name = "DataFrame"
1587
+
1588
+ def _to_str(self, representation=False):
1589
+ if is_build_mode() or len(self._executed_sessions) == 0:
1590
+ # in build mode, or not executed, just return representation
1591
+ if representation:
1592
+ return (
1593
+ f"{self.type_name} <op={type(self._op).__name__}, key={self.key}>"
1594
+ )
1595
+ else:
1596
+ return f"{self.type_name}(op={type(self._op).__name__})"
1597
+ else:
1598
+ corner_data = fetch_corner_data(self, session=self._executed_sessions[-1])
1599
+
1600
+ buf = StringIO()
1601
+ max_rows = pd.get_option("display.max_rows")
1602
+
1603
+ if self.shape[0] <= max_rows or corner_data.shape[0] == 0:
1604
+ buf.write(repr(corner_data) if representation else str(corner_data))
1605
+ else:
1606
+ # remember we cannot directly call repr(df),
1607
+ # because the [... rows x ... columns] may show wrong rows
1608
+ with pd.option_context(
1609
+ "display.show_dimensions",
1610
+ False,
1611
+ "display.max_rows",
1612
+ corner_data.shape[0] - 1,
1613
+ ):
1614
+ if representation:
1615
+ s = repr(corner_data)
1616
+ else:
1617
+ s = str(corner_data)
1618
+ buf.write(s)
1619
+ if pd.get_option("display.show_dimensions"):
1620
+ n_rows, n_cols = self.shape
1621
+ buf.write(f"\n\n[{n_rows} rows x {n_cols} columns]")
1622
+
1623
+ return buf.getvalue()
1624
+
1625
+ def __str__(self):
1626
+ return self._to_str(representation=False)
1627
+
1628
+ def __repr__(self):
1629
+ return self._to_str(representation=True)
1630
+
1631
+ def __maxframe_tensor__(self, dtype=None, order="K"):
1632
+ return self.to_tensor().astype(dtype=dtype, order=order, copy=False)
1633
+
1634
+ def _repr_html_(self):
1635
+ if len(self._executed_sessions) == 0:
1636
+ # not executed before, fall back to normal repr
1637
+ raise NotImplementedError
1638
+
1639
+ corner_data = fetch_corner_data(self, session=self._executed_sessions[-1])
1640
+ if corner_data is None:
1641
+ return
1642
+
1643
+ buf = StringIO()
1644
+ max_rows = pd.get_option("display.max_rows")
1645
+ if self.shape[0] <= max_rows:
1646
+ buf.write(corner_data._repr_html_())
1647
+ else:
1648
+ with pd.option_context(
1649
+ "display.show_dimensions",
1650
+ False,
1651
+ "display.max_rows",
1652
+ corner_data.shape[0] - 1,
1653
+ ):
1654
+ buf.write(corner_data._repr_html_().rstrip().rstrip("</div>"))
1655
+ if pd.get_option("display.show_dimensions"):
1656
+ n_rows, n_cols = self.shape
1657
+ buf.write(f"<p>{n_rows} rows × {n_cols} columns</p>\n")
1658
+ buf.write("</div>")
1659
+
1660
+ return buf.getvalue()
1661
+
1662
+ def items(self):
1663
+ for col_name in self.dtypes.index:
1664
+ yield col_name, self[col_name]
1665
+
1666
+ iteritems = items
1667
+
1668
+ def iterrows(self, batch_size=1000, session=None):
1669
+ for batch_data in self.iterbatch(batch_size=batch_size, session=session):
1670
+ yield from getattr(batch_data, "iterrows")()
1671
+
1672
+ def itertuples(self, index=True, name="Pandas", batch_size=1000, session=None):
1673
+ for batch_data in self.iterbatch(batch_size=batch_size, session=session):
1674
+ yield from getattr(batch_data, "itertuples")(index=index, name=name)
1675
+
1676
+ def _need_execution(self):
1677
+ if self._dtypes is None:
1678
+ return True
1679
+ return False
1680
+
1681
+
1682
+ class DataFrame(HasShapeTileable, _ToPandasMixin):
1683
+ __slots__ = ("_cache",)
1684
+ _allow_data_type_ = (DataFrameData,)
1685
+ type_name = "DataFrame"
1686
+
1687
+ def __len__(self):
1688
+ return len(self._data)
1689
+
1690
+ def to_tensor(self):
1691
+ return self._data.to_tensor()
1692
+
1693
+ def __maxframe_tensor__(self, dtype=None, order="K"):
1694
+ return self._data.__maxframe_tensor__(dtype=dtype, order=order)
1695
+
1696
+ def __getattr__(self, key):
1697
+ try:
1698
+ return getattr(self._data, key)
1699
+ except AttributeError:
1700
+ if key in self.dtypes:
1701
+ return self[key]
1702
+ else:
1703
+ raise
1704
+
1705
+ def __dir__(self):
1706
+ result = list(super().__dir__())
1707
+ return sorted(
1708
+ result
1709
+ + [k for k in self.dtypes.index if isinstance(k, str) and k.isidentifier()]
1710
+ )
1711
+
1712
+ def __iter__(self):
1713
+ # prevent being called by pandas to make sure `__eq__` works
1714
+ prevent_called_from_pandas()
1715
+ return iter(self.dtypes.index)
1716
+
1717
+ def __class_getitem__(cls, item):
1718
+ return DataFrameType.from_getitem_args(item)
1719
+
1720
+ @property
1721
+ def T(self):
1722
+ return self.transpose()
1723
+
1724
+ @property
1725
+ def ndim(self):
1726
+ """
1727
+ Return an int representing the number of axes / array dimensions.
1728
+
1729
+ Return 1 if Series. Otherwise return 2 if DataFrame.
1730
+
1731
+ See Also
1732
+ --------
1733
+ ndarray.ndim : Number of array dimensions.
1734
+
1735
+ Examples
1736
+ --------
1737
+ >>> import maxframe.dataframe as md
1738
+ >>> s = md.Series({'a': 1, 'b': 2, 'c': 3})
1739
+ >>> s.ndim
1740
+ 1
1741
+
1742
+ >>> df = md.DataFrame({'col1': [1, 2], 'col2': [3, 4]})
1743
+ >>> df.ndim
1744
+ 2
1745
+ """
1746
+ return super().ndim
1747
+
1748
+ @property
1749
+ def index(self):
1750
+ idx = self._data.index
1751
+ idx._set_df_or_series(self, 0)
1752
+ return idx
1753
+
1754
+ @index.setter
1755
+ def index(self, new_index):
1756
+ self.set_axis(new_index, axis=0, inplace=True)
1757
+
1758
+ @property
1759
+ def columns(self):
1760
+ col = self._data.columns
1761
+ col._set_df_or_series(self, 1)
1762
+ return col
1763
+
1764
+ @columns.setter
1765
+ def columns(self, new_columns):
1766
+ self.set_axis(new_columns, axis=1, inplace=True)
1767
+
1768
+ def keys(self):
1769
+ """
1770
+ Get the 'info axis' (see Indexing for more).
1771
+
1772
+ This is index for Series, columns for DataFrame.
1773
+
1774
+ Returns
1775
+ -------
1776
+ Index
1777
+ Info axis.
1778
+ """
1779
+ return self.columns
1780
+
1781
+ @property
1782
+ def values(self):
1783
+ return self.to_tensor()
1784
+
1785
+ @property
1786
+ def dtypes(self):
1787
+ """
1788
+ Return the dtypes in the DataFrame.
1789
+
1790
+ This returns a Series with the data type of each column.
1791
+ The result's index is the original DataFrame's columns. Columns
1792
+ with mixed types are stored with the ``object`` dtype. See
1793
+ :ref:`the User Guide <basics.dtypes>` for more.
1794
+
1795
+ Returns
1796
+ -------
1797
+ pandas.Series
1798
+ The data type of each column.
1799
+
1800
+ Examples
1801
+ --------
1802
+ >>> import maxframe.dataframe as md
1803
+ >>> df = md.DataFrame({'float': [1.0],
1804
+ ... 'int': [1],
1805
+ ... 'datetime': [md.Timestamp('20180310')],
1806
+ ... 'string': ['foo']})
1807
+ >>> df.dtypes
1808
+ float float64
1809
+ int int64
1810
+ datetime datetime64[ns]
1811
+ string object
1812
+ dtype: object
1813
+ """
1814
+ return self._data.dtypes
1815
+
1816
+ def iterrows(self, batch_size=1000, session=None):
1817
+ """
1818
+ Iterate over DataFrame rows as (index, Series) pairs.
1819
+
1820
+ Yields
1821
+ ------
1822
+ index : label or tuple of label
1823
+ The index of the row. A tuple for a `MultiIndex`.
1824
+ data : Series
1825
+ The data of the row as a Series.
1826
+
1827
+ it : generator
1828
+ A generator that iterates over the rows of the frame.
1829
+
1830
+ See Also
1831
+ --------
1832
+ DataFrame.itertuples : Iterate over DataFrame rows as namedtuples of the values.
1833
+ DataFrame.items : Iterate over (column name, Series) pairs.
1834
+
1835
+ Notes
1836
+ -----
1837
+
1838
+ 1. Because ``iterrows`` returns a Series for each row,
1839
+ it does **not** preserve dtypes across the rows (dtypes are
1840
+ preserved across columns for DataFrames). For example,
1841
+
1842
+ >>> import maxframe.dataframe as md
1843
+ >>> df = md.DataFrame([[1, 1.5]], columns=['int', 'float'])
1844
+ >>> row = next(df.iterrows())[1]
1845
+ >>> row
1846
+ int 1.0
1847
+ float 1.5
1848
+ Name: 0, dtype: float64
1849
+ >>> print(row['int'].dtype)
1850
+ float64
1851
+ >>> print(df['int'].dtype)
1852
+ int64
1853
+
1854
+ To preserve dtypes while iterating over the rows, it is better
1855
+ to use :meth:`itertuples` which returns namedtuples of the values
1856
+ and which is generally faster than ``iterrows``.
1857
+
1858
+ 2. You should **never modify** something you are iterating over.
1859
+ This is not guaranteed to work in all cases. Depending on the
1860
+ data types, the iterator returns a copy and not a view, and writing
1861
+ to it will have no effect.
1862
+ """
1863
+ return self._data.iterrows(batch_size=batch_size, session=session)
1864
+
1865
+ def itertuples(self, index=True, name="Pandas", batch_size=1000, session=None):
1866
+ """
1867
+ Iterate over DataFrame rows as namedtuples.
1868
+
1869
+ Parameters
1870
+ ----------
1871
+ index : bool, default True
1872
+ If True, return the index as the first element of the tuple.
1873
+ name : str or None, default "Pandas"
1874
+ The name of the returned namedtuples or None to return regular
1875
+ tuples.
1876
+
1877
+ Returns
1878
+ -------
1879
+ iterator
1880
+ An object to iterate over namedtuples for each row in the
1881
+ DataFrame with the first field possibly being the index and
1882
+ following fields being the column values.
1883
+
1884
+ See Also
1885
+ --------
1886
+ DataFrame.iterrows : Iterate over DataFrame rows as (index, Series)
1887
+ pairs.
1888
+ DataFrame.items : Iterate over (column name, Series) pairs.
1889
+
1890
+ Notes
1891
+ -----
1892
+ The column names will be renamed to positional names if they are
1893
+ invalid Python identifiers, repeated, or start with an underscore.
1894
+ On python versions < 3.7 regular tuples are returned for DataFrames
1895
+ with a large number of columns (>254).
1896
+
1897
+ Examples
1898
+ --------
1899
+ >>> import maxframe.dataframe as md
1900
+ >>> df = md.DataFrame({'num_legs': [4, 2], 'num_wings': [0, 2]},
1901
+ ... index=['dog', 'hawk'])
1902
+ >>> df.execute()
1903
+ num_legs num_wings
1904
+ dog 4 0
1905
+ hawk 2 2
1906
+ >>> for row in df.itertuples():
1907
+ ... print(row)
1908
+ ...
1909
+ Pandas(Index='dog', num_legs=4, num_wings=0)
1910
+ Pandas(Index='hawk', num_legs=2, num_wings=2)
1911
+
1912
+ By setting the `index` parameter to False we can remove the index
1913
+ as the first element of the tuple:
1914
+
1915
+ >>> for row in df.itertuples(index=False):
1916
+ ... print(row)
1917
+ ...
1918
+ Pandas(num_legs=4, num_wings=0)
1919
+ Pandas(num_legs=2, num_wings=2)
1920
+
1921
+ With the `name` parameter set we set a custom name for the yielded
1922
+ namedtuples:
1923
+
1924
+ >>> for row in df.itertuples(name='Animal'):
1925
+ ... print(row)
1926
+ ...
1927
+ Animal(Index='dog', num_legs=4, num_wings=0)
1928
+ Animal(Index='hawk', num_legs=2, num_wings=2)
1929
+ """
1930
+ return self._data.itertuples(
1931
+ batch_size=batch_size, session=session, index=index, name=name
1932
+ )
1933
+
1934
+ def assign(self, **kwargs):
1935
+ """
1936
+ Assign new columns to a DataFrame.
1937
+ Returns a new object with all original columns in addition to new ones.
1938
+ Existing columns that are re-assigned will be overwritten.
1939
+
1940
+ Parameters
1941
+ ----------
1942
+ **kwargs : dict of {str: callable or Series}
1943
+ The column names are keywords. If the values are
1944
+ callable, they are computed on the DataFrame and
1945
+ assigned to the new columns. The callable must not
1946
+ change input DataFrame (though pandas doesn't check it).
1947
+ If the values are not callable, (e.g. a Series, scalar, or array),
1948
+ they are simply assigned.
1949
+
1950
+ Returns
1951
+ -------
1952
+ DataFrame
1953
+ A new DataFrame with the new columns in addition to
1954
+ all the existing columns.
1955
+
1956
+ Notes
1957
+ -----
1958
+ Assigning multiple columns within the same ``assign`` is possible.
1959
+ Later items in 'kwargs' may refer to newly created or modified
1960
+ columns in 'df'; items are computed and assigned into 'df' in order.
1961
+
1962
+ Examples
1963
+ --------
1964
+ >>> import maxframe.dataframe as md
1965
+ >>> df = md.DataFrame({'temp_c': [17.0, 25.0]},
1966
+ ... index=['Portland', 'Berkeley'])
1967
+ >>> df.execute()
1968
+ temp_c
1969
+ Portland 17.0
1970
+ Berkeley 25.0
1971
+
1972
+ Where the value is a callable, evaluated on `df`:
1973
+
1974
+ >>> df.assign(temp_f=lambda x: x.temp_c * 9 / 5 + 32).execute()
1975
+ temp_c temp_f
1976
+ Portland 17.0 62.6
1977
+ Berkeley 25.0 77.0
1978
+
1979
+ Alternatively, the same behavior can be achieved by directly
1980
+ referencing an existing Series or sequence:
1981
+
1982
+ >>> df.assign(temp_f=df['temp_c'] * 9 / 5 + 32).execute()
1983
+ temp_c temp_f
1984
+ Portland 17.0 62.6
1985
+ Berkeley 25.0 77.0
1986
+
1987
+ You can create multiple columns within the same assign where one
1988
+ of the columns depends on another one defined within the same assign:
1989
+
1990
+ >>> df.assign(temp_f=lambda x: x['temp_c'] * 9 / 5 + 32,
1991
+ ... temp_k=lambda x: (x['temp_f'] + 459.67) * 5 / 9).execute()
1992
+ temp_c temp_f temp_k
1993
+ Portland 17.0 62.6 290.15
1994
+ Berkeley 25.0 77.0 298.15
1995
+ """
1996
+
1997
+ data = self.copy()
1998
+
1999
+ for k, v in kwargs.items():
2000
+ data[k] = apply_if_callable(v, data)
2001
+ return data
2002
+
2003
+
2004
+ class DataFrameGroupByData(BaseDataFrameData):
2005
+ type_name = "DataFrameGroupBy"
2006
+
2007
+ _key_dtypes = SeriesField("key_dtypes")
2008
+ _selection = AnyField("selection")
2009
+
2010
+ @property
2011
+ def key_dtypes(self):
2012
+ return self._key_dtypes
2013
+
2014
+ @property
2015
+ def selection(self):
2016
+ return self._selection
2017
+
2018
+ def _get_params(self) -> Dict[str, Any]:
2019
+ p = super()._get_params()
2020
+ p.update(dict(key_dtypes=self.key_dtypes, selection=self.selection))
2021
+ return p
2022
+
2023
+ def _set_params(self, new_params: Dict[str, Any]):
2024
+ params = new_params.copy()
2025
+ key_dtypes = params.pop("key_dtypes", None)
2026
+ if key_dtypes is not None:
2027
+ self._key_dtypes = key_dtypes
2028
+ selection = params.pop("selection", None)
2029
+ if selection is not None:
2030
+ self._selection = selection
2031
+ super()._set_params(params)
2032
+
2033
+ params = property(_get_params, _set_params)
2034
+
2035
+ def __init__(self, key_dtypes=None, selection=None, **kw):
2036
+ super().__init__(_key_dtypes=key_dtypes, _selection=selection, **kw)
2037
+
2038
+ def _equal(self, o):
2039
+ # FIXME We need to implemented a true `==` operator for DataFrameGroupby
2040
+ if is_build_mode():
2041
+ return self is o
2042
+ else:
2043
+ return self == o
2044
+
2045
+
2046
+ class SeriesGroupByData(BaseSeriesData):
2047
+ type_name = "SeriesGroupBy"
2048
+
2049
+ _key_dtypes = AnyField("key_dtypes")
2050
+
2051
+ @property
2052
+ def key_dtypes(self):
2053
+ return self._key_dtypes
2054
+
2055
+ def _get_params(self) -> Dict[str, Any]:
2056
+ p = super()._get_params()
2057
+ p["key_dtypes"] = self.key_dtypes
2058
+ return p
2059
+
2060
+ def _set_params(self, new_params: Dict[str, Any]):
2061
+ params = new_params.copy()
2062
+ key_dtypes = params.pop("key_dtypes", None)
2063
+ if key_dtypes is not None:
2064
+ self._key_dtypes = key_dtypes
2065
+ super()._set_params(params)
2066
+
2067
+ params = property(_get_params, _set_params)
2068
+
2069
+ def __init__(self, key_dtypes=None, **kw):
2070
+ super().__init__(_key_dtypes=key_dtypes, **kw)
2071
+
2072
+ def _equal(self, o):
2073
+ # FIXME We need to implemented a true `==` operator for DataFrameGroupby
2074
+ if is_build_mode():
2075
+ return self is o
2076
+ else:
2077
+ return self == o
2078
+
2079
+
2080
+ class GroupBy(Tileable, _ToPandasMixin):
2081
+ __slots__ = ()
2082
+
2083
+
2084
+ class DataFrameGroupBy(GroupBy):
2085
+ __slots__ = ()
2086
+ _allow_data_type_ = (DataFrameGroupByData,)
2087
+ type_name = "DataFrameGroupBy"
2088
+
2089
+ def __eq__(self, other):
2090
+ return self._equal(other)
2091
+
2092
+ def __hash__(self):
2093
+ # NB: we have customized __eq__ explicitly, thus we need define __hash__ explicitly as well.
2094
+ return super().__hash__()
2095
+
2096
+ def __getattr__(self, item):
2097
+ try:
2098
+ return super().__getattr__(item)
2099
+ except AttributeError:
2100
+ if item in self.dtypes:
2101
+ return self[item]
2102
+ else:
2103
+ raise
2104
+
2105
+ def __dir__(self):
2106
+ result = list(super().__dir__())
2107
+ return sorted(
2108
+ result
2109
+ + [k for k in self.dtypes.index if isinstance(k, str) and k.isidentifier()]
2110
+ )
2111
+
2112
+
2113
+ class SeriesGroupBy(GroupBy):
2114
+ __slots__ = ()
2115
+ _allow_data_type_ = (SeriesGroupByData,)
2116
+ type_name = "SeriesGroupBy"
2117
+
2118
+ def __eq__(self, other):
2119
+ return self._equal(other)
2120
+
2121
+ def __hash__(self):
2122
+ # NB: we have customized __eq__ explicitly, thus we need define __hash__ explicitly as well.
2123
+ return super().__hash__()
2124
+
2125
+
2126
+ class CategoricalData(HasShapeTileableData, _ToPandasMixin):
2127
+ __slots__ = ("_cache",)
2128
+ type_name = "Categorical"
2129
+
2130
+ # optional field
2131
+ _dtype = DataTypeField("dtype")
2132
+ _categories_value = ReferenceField(
2133
+ "categories_value", IndexValue, on_deserialize=_on_deserialize_index_value
2134
+ )
2135
+
2136
+ def __init__(
2137
+ self,
2138
+ op=None,
2139
+ shape=None,
2140
+ nsplits=None,
2141
+ dtype=None,
2142
+ categories_value=None,
2143
+ **kw,
2144
+ ):
2145
+ super().__init__(
2146
+ _op=op,
2147
+ _shape=shape,
2148
+ _nsplits=nsplits,
2149
+ _dtype=dtype,
2150
+ _categories_value=categories_value,
2151
+ **kw,
2152
+ )
2153
+
2154
+ @property
2155
+ def params(self) -> Dict[str, Any]:
2156
+ # params return the properties which useful to rebuild a new tileable object
2157
+ return {
2158
+ "shape": self.shape,
2159
+ "dtype": self.dtype,
2160
+ "categories_value": self.categories_value,
2161
+ }
2162
+
2163
+ @params.setter
2164
+ def params(self, new_params: Dict[str, Any]):
2165
+ params = new_params.copy()
2166
+ new_shape = params.pop("shape", None)
2167
+ if new_shape is not None:
2168
+ self._shape = new_shape
2169
+ dtype = params.pop("dtype", None)
2170
+ if dtype is not None:
2171
+ self._dtype = dtype
2172
+ categories_value = params.pop("categories_value", None)
2173
+ if categories_value is not None:
2174
+ self._categories_value = categories_value
2175
+ if params: # pragma: no cover
2176
+ raise TypeError(f"Unknown params: {list(params)}")
2177
+
2178
+ def refresh_params(self):
2179
+ # refresh params when chunks updated
2180
+ refresh_tileable_shape(self)
2181
+ fill_chunk_slices(self)
2182
+ if self._dtype is None:
2183
+ self._dtype = self.chunks[0].dtype
2184
+ if self._categories_value is None:
2185
+ categories = []
2186
+ for chunk in self.chunks:
2187
+ categories.extend(chunk.categories_value.to_pandas())
2188
+ self._categories_value = parse_index(
2189
+ pd.Categorical(categories).categories, store_data=True
2190
+ )
2191
+
2192
+ def refresh_from_table_meta(self, table_meta: DataFrameTableMeta) -> None:
2193
+ pass
2194
+
2195
+ def _to_str(self, representation=False):
2196
+ if is_build_mode() or len(self._executed_sessions) == 0:
2197
+ # in build mode, or not executed, just return representation
2198
+ if representation:
2199
+ return f"{self.type_name} <op={type(self.op).__name__}, key={self.key}>"
2200
+ else:
2201
+ return f"{self.type_name}(op={type(self.op).__name__})"
2202
+ else:
2203
+ data = self.fetch(session=self._executed_sessions[-1])
2204
+ return repr(data) if repr(data) else str(data)
2205
+
2206
+ def __str__(self):
2207
+ return self._to_str(representation=False)
2208
+
2209
+ def __repr__(self):
2210
+ return self._to_str(representation=True)
2211
+
2212
+ def _equal(self, o):
2213
+ # FIXME We need to implemented a true `==` operator for DataFrameGroupby
2214
+ if is_build_mode():
2215
+ return self is o
2216
+ else: # pragma: no cover
2217
+ return self == o
2218
+
2219
+ @property
2220
+ def dtype(self):
2221
+ return getattr(self, "_dtype", None) or self.op.dtype
2222
+
2223
+ @property
2224
+ def categories_value(self):
2225
+ return self._categories_value
2226
+
2227
+ def __eq__(self, other):
2228
+ return self._equal(other)
2229
+
2230
+ def __hash__(self):
2231
+ # NB: we have customized __eq__ explicitly, thus we need define __hash__ explicitly as well.
2232
+ return super().__hash__()
2233
+
2234
+
2235
+ class Categorical(HasShapeTileable, _ToPandasMixin):
2236
+ __slots__ = ()
2237
+ _allow_data_type_ = (CategoricalData,)
2238
+ type_name = "Categorical"
2239
+
2240
+ def __len__(self):
2241
+ return len(self._data)
2242
+
2243
+ def __eq__(self, other):
2244
+ return self._equal(other)
2245
+
2246
+ def __hash__(self):
2247
+ # NB: we have customized __eq__ explicitly, thus we need define __hash__ explicitly as well.
2248
+ return super().__hash__()
2249
+
2250
+
2251
+ class DataFrameOrSeriesData(HasShapeTileableData, _ToPandasMixin):
2252
+ __slots__ = ()
2253
+
2254
+ _data_type = StringField("data_type")
2255
+ _data_params = DictField("data_params")
2256
+
2257
+ def __init__(
2258
+ self,
2259
+ op=None,
2260
+ data_type=None,
2261
+ data_params=None,
2262
+ **kw,
2263
+ ):
2264
+ self._data_type = data_type
2265
+ self._data_params = data_params or dict()
2266
+ super().__init__(
2267
+ _op=op,
2268
+ **kw,
2269
+ )
2270
+
2271
+ def __getattr__(self, item):
2272
+ if item in self._data_params:
2273
+ return self._data_params[item]
2274
+ raise AttributeError(f"'{type(self)}' object has no attribute '{item}'")
2275
+
2276
+ @property
2277
+ def shape(self):
2278
+ return self._data_params.get("shape", None)
2279
+
2280
+ @property
2281
+ def nsplits(self):
2282
+ return self._data_params.get("nsplits", None)
2283
+
2284
+ @property
2285
+ def data_type(self):
2286
+ return self._data_type
2287
+
2288
+ @property
2289
+ def data_params(self):
2290
+ return self._data_params
2291
+
2292
+ @property
2293
+ def params(self) -> Dict[str, Any]:
2294
+ return {"data_type": self._data_type, "data_params": self._data_params}
2295
+
2296
+ @params.setter
2297
+ def params(self, new_params: Dict[str, Any]):
2298
+ # After execution, create DataFrameFetch, and the data
2299
+ # corresponding to the original key is still DataFrameOrSeries type,
2300
+ # so when restoring DataFrameOrSeries type,
2301
+ # there is no "data_type" field in params.
2302
+ if "data_type" not in new_params:
2303
+ if "dtype" in new_params:
2304
+ self._data_type = "series"
2305
+ else:
2306
+ self._data_type = "dataframe"
2307
+ self._data_params = new_params.copy()
2308
+ else:
2309
+ self._data_type = new_params.get("data_type")
2310
+ self._data_params = {
2311
+ k: v for k, v in new_params.get("data_params", {}).items()
2312
+ }
2313
+
2314
+ def refresh_params(self):
2315
+ index_to_index_values = dict()
2316
+ for chunk in self.chunks:
2317
+ if chunk.ndim == 1:
2318
+ index_to_index_values[chunk.index] = chunk.index_value
2319
+ elif chunk.index[1] == 0:
2320
+ index_to_index_values[chunk.index] = chunk.index_value
2321
+ index_value = merge_index_value(index_to_index_values, store_data=False)
2322
+ nsplits = calc_nsplits({c.index: c.shape for c in self.chunks})
2323
+ shape = tuple(sum(ns) for ns in nsplits)
2324
+
2325
+ data_params = dict()
2326
+ data_params["nsplits"] = nsplits
2327
+ data_params["shape"] = shape
2328
+ data_params["index_value"] = index_value
2329
+
2330
+ self._data_type = self._chunks[0]._data_type
2331
+ if self.data_type == "dataframe":
2332
+ all_dtypes = [c.dtypes_value.value for c in self.chunks if c.index[0] == 0]
2333
+ dtypes = pd.concat(all_dtypes)
2334
+ data_params["dtypes"] = dtypes
2335
+ columns_values = parse_index(dtypes.index, store_data=True)
2336
+ data_params["columns_value"] = columns_values
2337
+ data_params["dtypes_value"] = DtypesValue(
2338
+ key=tokenize(dtypes), value=dtypes
2339
+ )
2340
+ else:
2341
+ data_params["dtype"] = self.chunks[0].dtype
2342
+ data_params["name"] = self.chunks[0].name
2343
+ self._data_params.update(data_params)
2344
+
2345
+ def refresh_from_table_meta(self, table_meta: DataFrameTableMeta) -> None:
2346
+ pass
2347
+
2348
+ def ensure_data(self):
2349
+ from .fetch.core import DataFrameFetch
2350
+
2351
+ self.execute()
2352
+ default_sess = get_default_session()
2353
+ self._detach_session(default_sess._session)
2354
+
2355
+ fetch_tileable = default_sess._session._tileable_to_fetch[self]
2356
+ new = DataFrameFetch(
2357
+ output_types=[getattr(OutputType, self.data_type)]
2358
+ ).new_tileable(
2359
+ [],
2360
+ _key=self.key,
2361
+ chunks=fetch_tileable.chunks,
2362
+ nsplits=fetch_tileable.nsplits,
2363
+ **self.data_params,
2364
+ )
2365
+ new._attach_session(default_sess._session)
2366
+ return new
2367
+
2368
+
2369
+ class DataFrameOrSeries(HasShapeTileable, _ToPandasMixin):
2370
+ __slots__ = ()
2371
+ _allow_data_type_ = (DataFrameOrSeriesData,)
2372
+ type_name = "DataFrameOrSeries"
2373
+
2374
+
2375
+ INDEX_TYPE = (Index, IndexData)
2376
+ SERIES_TYPE = (Series, SeriesData)
2377
+ DATAFRAME_OR_SERIES_TYPE = (DataFrameOrSeries, DataFrameOrSeriesData)
2378
+ DATAFRAME_TYPE = (DataFrame, DataFrameData)
2379
+ DATAFRAME_GROUPBY_TYPE = (DataFrameGroupBy, DataFrameGroupByData)
2380
+ SERIES_GROUPBY_TYPE = (SeriesGroupBy, SeriesGroupByData)
2381
+ GROUPBY_TYPE = (GroupBy,) + DATAFRAME_GROUPBY_TYPE + SERIES_GROUPBY_TYPE
2382
+ CATEGORICAL_TYPE = (Categorical, CategoricalData)
2383
+ TILEABLE_TYPE = (
2384
+ INDEX_TYPE + SERIES_TYPE + DATAFRAME_TYPE + GROUPBY_TYPE + CATEGORICAL_TYPE
2385
+ )
2386
+
2387
+ register_output_types(OutputType.dataframe, DATAFRAME_TYPE)
2388
+ register_output_types(OutputType.series, SERIES_TYPE)
2389
+ register_output_types(OutputType.df_or_series, DATAFRAME_OR_SERIES_TYPE)
2390
+ register_output_types(OutputType.index, INDEX_TYPE)
2391
+ register_output_types(OutputType.categorical, CATEGORICAL_TYPE)
2392
+ register_output_types(OutputType.dataframe_groupby, DATAFRAME_GROUPBY_TYPE)
2393
+ register_output_types(OutputType.series_groupby, SERIES_GROUPBY_TYPE)