maxframe 0.1.0b5__cp311-cp311-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of maxframe might be problematic. Click here for more details.

Files changed (647) hide show
  1. maxframe/__init__.py +32 -0
  2. maxframe/_utils.cpython-311-darwin.so +0 -0
  3. maxframe/_utils.pxd +33 -0
  4. maxframe/_utils.pyx +547 -0
  5. maxframe/codegen.py +528 -0
  6. maxframe/config/__init__.py +15 -0
  7. maxframe/config/config.py +443 -0
  8. maxframe/config/tests/__init__.py +13 -0
  9. maxframe/config/tests/test_config.py +103 -0
  10. maxframe/config/tests/test_validators.py +34 -0
  11. maxframe/config/validators.py +57 -0
  12. maxframe/conftest.py +139 -0
  13. maxframe/core/__init__.py +65 -0
  14. maxframe/core/base.py +156 -0
  15. maxframe/core/entity/__init__.py +44 -0
  16. maxframe/core/entity/chunks.py +68 -0
  17. maxframe/core/entity/core.py +152 -0
  18. maxframe/core/entity/executable.py +337 -0
  19. maxframe/core/entity/fuse.py +73 -0
  20. maxframe/core/entity/objects.py +100 -0
  21. maxframe/core/entity/output_types.py +90 -0
  22. maxframe/core/entity/tileables.py +438 -0
  23. maxframe/core/entity/utils.py +24 -0
  24. maxframe/core/graph/__init__.py +17 -0
  25. maxframe/core/graph/builder/__init__.py +16 -0
  26. maxframe/core/graph/builder/base.py +86 -0
  27. maxframe/core/graph/builder/chunk.py +430 -0
  28. maxframe/core/graph/builder/tileable.py +34 -0
  29. maxframe/core/graph/builder/utils.py +41 -0
  30. maxframe/core/graph/core.cpython-311-darwin.so +0 -0
  31. maxframe/core/graph/core.pyx +467 -0
  32. maxframe/core/graph/entity.py +171 -0
  33. maxframe/core/graph/tests/__init__.py +13 -0
  34. maxframe/core/graph/tests/test_graph.py +205 -0
  35. maxframe/core/mode.py +96 -0
  36. maxframe/core/operator/__init__.py +34 -0
  37. maxframe/core/operator/base.py +450 -0
  38. maxframe/core/operator/core.py +276 -0
  39. maxframe/core/operator/fetch.py +53 -0
  40. maxframe/core/operator/fuse.py +29 -0
  41. maxframe/core/operator/objects.py +72 -0
  42. maxframe/core/operator/shuffle.py +111 -0
  43. maxframe/core/operator/tests/__init__.py +13 -0
  44. maxframe/core/operator/tests/test_core.py +64 -0
  45. maxframe/core/tests/__init__.py +13 -0
  46. maxframe/core/tests/test_mode.py +75 -0
  47. maxframe/dataframe/__init__.py +81 -0
  48. maxframe/dataframe/arithmetic/__init__.py +359 -0
  49. maxframe/dataframe/arithmetic/abs.py +33 -0
  50. maxframe/dataframe/arithmetic/add.py +60 -0
  51. maxframe/dataframe/arithmetic/arccos.py +28 -0
  52. maxframe/dataframe/arithmetic/arccosh.py +28 -0
  53. maxframe/dataframe/arithmetic/arcsin.py +28 -0
  54. maxframe/dataframe/arithmetic/arcsinh.py +28 -0
  55. maxframe/dataframe/arithmetic/arctan.py +28 -0
  56. maxframe/dataframe/arithmetic/arctanh.py +28 -0
  57. maxframe/dataframe/arithmetic/around.py +152 -0
  58. maxframe/dataframe/arithmetic/bitwise_and.py +46 -0
  59. maxframe/dataframe/arithmetic/bitwise_or.py +50 -0
  60. maxframe/dataframe/arithmetic/bitwise_xor.py +46 -0
  61. maxframe/dataframe/arithmetic/ceil.py +28 -0
  62. maxframe/dataframe/arithmetic/core.py +342 -0
  63. maxframe/dataframe/arithmetic/cos.py +28 -0
  64. maxframe/dataframe/arithmetic/cosh.py +28 -0
  65. maxframe/dataframe/arithmetic/degrees.py +28 -0
  66. maxframe/dataframe/arithmetic/docstring.py +442 -0
  67. maxframe/dataframe/arithmetic/equal.py +56 -0
  68. maxframe/dataframe/arithmetic/exp.py +28 -0
  69. maxframe/dataframe/arithmetic/exp2.py +28 -0
  70. maxframe/dataframe/arithmetic/expm1.py +28 -0
  71. maxframe/dataframe/arithmetic/floor.py +28 -0
  72. maxframe/dataframe/arithmetic/floordiv.py +64 -0
  73. maxframe/dataframe/arithmetic/greater.py +57 -0
  74. maxframe/dataframe/arithmetic/greater_equal.py +57 -0
  75. maxframe/dataframe/arithmetic/invert.py +33 -0
  76. maxframe/dataframe/arithmetic/is_ufuncs.py +62 -0
  77. maxframe/dataframe/arithmetic/less.py +57 -0
  78. maxframe/dataframe/arithmetic/less_equal.py +57 -0
  79. maxframe/dataframe/arithmetic/log.py +28 -0
  80. maxframe/dataframe/arithmetic/log10.py +28 -0
  81. maxframe/dataframe/arithmetic/log2.py +28 -0
  82. maxframe/dataframe/arithmetic/mod.py +60 -0
  83. maxframe/dataframe/arithmetic/multiply.py +60 -0
  84. maxframe/dataframe/arithmetic/negative.py +33 -0
  85. maxframe/dataframe/arithmetic/not_equal.py +56 -0
  86. maxframe/dataframe/arithmetic/power.py +68 -0
  87. maxframe/dataframe/arithmetic/radians.py +28 -0
  88. maxframe/dataframe/arithmetic/sin.py +28 -0
  89. maxframe/dataframe/arithmetic/sinh.py +28 -0
  90. maxframe/dataframe/arithmetic/sqrt.py +28 -0
  91. maxframe/dataframe/arithmetic/subtract.py +64 -0
  92. maxframe/dataframe/arithmetic/tan.py +28 -0
  93. maxframe/dataframe/arithmetic/tanh.py +28 -0
  94. maxframe/dataframe/arithmetic/tests/__init__.py +13 -0
  95. maxframe/dataframe/arithmetic/tests/test_arithmetic.py +695 -0
  96. maxframe/dataframe/arithmetic/truediv.py +64 -0
  97. maxframe/dataframe/arithmetic/trunc.py +28 -0
  98. maxframe/dataframe/arrays.py +864 -0
  99. maxframe/dataframe/core.py +2417 -0
  100. maxframe/dataframe/datasource/__init__.py +15 -0
  101. maxframe/dataframe/datasource/core.py +81 -0
  102. maxframe/dataframe/datasource/dataframe.py +59 -0
  103. maxframe/dataframe/datasource/date_range.py +504 -0
  104. maxframe/dataframe/datasource/from_index.py +54 -0
  105. maxframe/dataframe/datasource/from_records.py +107 -0
  106. maxframe/dataframe/datasource/from_tensor.py +419 -0
  107. maxframe/dataframe/datasource/index.py +117 -0
  108. maxframe/dataframe/datasource/read_csv.py +528 -0
  109. maxframe/dataframe/datasource/read_odps_query.py +299 -0
  110. maxframe/dataframe/datasource/read_odps_table.py +253 -0
  111. maxframe/dataframe/datasource/read_parquet.py +421 -0
  112. maxframe/dataframe/datasource/series.py +55 -0
  113. maxframe/dataframe/datasource/tests/__init__.py +13 -0
  114. maxframe/dataframe/datasource/tests/test_datasource.py +401 -0
  115. maxframe/dataframe/datastore/__init__.py +26 -0
  116. maxframe/dataframe/datastore/core.py +19 -0
  117. maxframe/dataframe/datastore/to_csv.py +227 -0
  118. maxframe/dataframe/datastore/to_odps.py +162 -0
  119. maxframe/dataframe/extensions/__init__.py +41 -0
  120. maxframe/dataframe/extensions/accessor.py +50 -0
  121. maxframe/dataframe/extensions/reshuffle.py +83 -0
  122. maxframe/dataframe/extensions/tests/__init__.py +13 -0
  123. maxframe/dataframe/extensions/tests/test_extensions.py +38 -0
  124. maxframe/dataframe/fetch/__init__.py +15 -0
  125. maxframe/dataframe/fetch/core.py +86 -0
  126. maxframe/dataframe/groupby/__init__.py +82 -0
  127. maxframe/dataframe/groupby/aggregation.py +350 -0
  128. maxframe/dataframe/groupby/apply.py +251 -0
  129. maxframe/dataframe/groupby/core.py +179 -0
  130. maxframe/dataframe/groupby/cum.py +124 -0
  131. maxframe/dataframe/groupby/fill.py +141 -0
  132. maxframe/dataframe/groupby/getitem.py +92 -0
  133. maxframe/dataframe/groupby/head.py +105 -0
  134. maxframe/dataframe/groupby/sample.py +214 -0
  135. maxframe/dataframe/groupby/tests/__init__.py +13 -0
  136. maxframe/dataframe/groupby/tests/test_groupby.py +374 -0
  137. maxframe/dataframe/groupby/transform.py +255 -0
  138. maxframe/dataframe/indexing/__init__.py +84 -0
  139. maxframe/dataframe/indexing/add_prefix_suffix.py +110 -0
  140. maxframe/dataframe/indexing/align.py +349 -0
  141. maxframe/dataframe/indexing/at.py +83 -0
  142. maxframe/dataframe/indexing/getitem.py +204 -0
  143. maxframe/dataframe/indexing/iat.py +37 -0
  144. maxframe/dataframe/indexing/iloc.py +566 -0
  145. maxframe/dataframe/indexing/insert.py +86 -0
  146. maxframe/dataframe/indexing/loc.py +411 -0
  147. maxframe/dataframe/indexing/reindex.py +526 -0
  148. maxframe/dataframe/indexing/rename.py +462 -0
  149. maxframe/dataframe/indexing/rename_axis.py +209 -0
  150. maxframe/dataframe/indexing/reset_index.py +402 -0
  151. maxframe/dataframe/indexing/sample.py +221 -0
  152. maxframe/dataframe/indexing/set_axis.py +194 -0
  153. maxframe/dataframe/indexing/set_index.py +61 -0
  154. maxframe/dataframe/indexing/setitem.py +130 -0
  155. maxframe/dataframe/indexing/tests/__init__.py +13 -0
  156. maxframe/dataframe/indexing/tests/test_indexing.py +488 -0
  157. maxframe/dataframe/indexing/where.py +308 -0
  158. maxframe/dataframe/initializer.py +288 -0
  159. maxframe/dataframe/merge/__init__.py +32 -0
  160. maxframe/dataframe/merge/append.py +121 -0
  161. maxframe/dataframe/merge/concat.py +325 -0
  162. maxframe/dataframe/merge/merge.py +593 -0
  163. maxframe/dataframe/merge/tests/__init__.py +13 -0
  164. maxframe/dataframe/merge/tests/test_merge.py +215 -0
  165. maxframe/dataframe/misc/__init__.py +134 -0
  166. maxframe/dataframe/misc/_duplicate.py +46 -0
  167. maxframe/dataframe/misc/accessor.py +276 -0
  168. maxframe/dataframe/misc/apply.py +692 -0
  169. maxframe/dataframe/misc/astype.py +236 -0
  170. maxframe/dataframe/misc/case_when.py +141 -0
  171. maxframe/dataframe/misc/check_monotonic.py +84 -0
  172. maxframe/dataframe/misc/cut.py +383 -0
  173. maxframe/dataframe/misc/datetimes.py +79 -0
  174. maxframe/dataframe/misc/describe.py +108 -0
  175. maxframe/dataframe/misc/diff.py +210 -0
  176. maxframe/dataframe/misc/drop.py +440 -0
  177. maxframe/dataframe/misc/drop_duplicates.py +248 -0
  178. maxframe/dataframe/misc/duplicated.py +292 -0
  179. maxframe/dataframe/misc/eval.py +728 -0
  180. maxframe/dataframe/misc/explode.py +171 -0
  181. maxframe/dataframe/misc/get_dummies.py +208 -0
  182. maxframe/dataframe/misc/isin.py +217 -0
  183. maxframe/dataframe/misc/map.py +236 -0
  184. maxframe/dataframe/misc/melt.py +162 -0
  185. maxframe/dataframe/misc/memory_usage.py +248 -0
  186. maxframe/dataframe/misc/pct_change.py +150 -0
  187. maxframe/dataframe/misc/pivot_table.py +262 -0
  188. maxframe/dataframe/misc/qcut.py +104 -0
  189. maxframe/dataframe/misc/select_dtypes.py +104 -0
  190. maxframe/dataframe/misc/shift.py +256 -0
  191. maxframe/dataframe/misc/stack.py +238 -0
  192. maxframe/dataframe/misc/string_.py +221 -0
  193. maxframe/dataframe/misc/tests/__init__.py +13 -0
  194. maxframe/dataframe/misc/tests/test_misc.py +468 -0
  195. maxframe/dataframe/misc/to_numeric.py +178 -0
  196. maxframe/dataframe/misc/transform.py +361 -0
  197. maxframe/dataframe/misc/transpose.py +136 -0
  198. maxframe/dataframe/misc/value_counts.py +182 -0
  199. maxframe/dataframe/missing/__init__.py +53 -0
  200. maxframe/dataframe/missing/checkna.py +223 -0
  201. maxframe/dataframe/missing/dropna.py +280 -0
  202. maxframe/dataframe/missing/fillna.py +275 -0
  203. maxframe/dataframe/missing/replace.py +439 -0
  204. maxframe/dataframe/missing/tests/__init__.py +13 -0
  205. maxframe/dataframe/missing/tests/test_missing.py +89 -0
  206. maxframe/dataframe/operators.py +273 -0
  207. maxframe/dataframe/plotting/__init__.py +40 -0
  208. maxframe/dataframe/plotting/core.py +78 -0
  209. maxframe/dataframe/plotting/tests/__init__.py +13 -0
  210. maxframe/dataframe/plotting/tests/test_plotting.py +136 -0
  211. maxframe/dataframe/reduction/__init__.py +107 -0
  212. maxframe/dataframe/reduction/aggregation.py +344 -0
  213. maxframe/dataframe/reduction/all.py +78 -0
  214. maxframe/dataframe/reduction/any.py +78 -0
  215. maxframe/dataframe/reduction/core.py +837 -0
  216. maxframe/dataframe/reduction/count.py +59 -0
  217. maxframe/dataframe/reduction/cummax.py +30 -0
  218. maxframe/dataframe/reduction/cummin.py +30 -0
  219. maxframe/dataframe/reduction/cumprod.py +30 -0
  220. maxframe/dataframe/reduction/cumsum.py +30 -0
  221. maxframe/dataframe/reduction/custom_reduction.py +42 -0
  222. maxframe/dataframe/reduction/kurtosis.py +104 -0
  223. maxframe/dataframe/reduction/max.py +65 -0
  224. maxframe/dataframe/reduction/mean.py +61 -0
  225. maxframe/dataframe/reduction/min.py +65 -0
  226. maxframe/dataframe/reduction/nunique.py +141 -0
  227. maxframe/dataframe/reduction/prod.py +76 -0
  228. maxframe/dataframe/reduction/reduction_size.py +36 -0
  229. maxframe/dataframe/reduction/sem.py +69 -0
  230. maxframe/dataframe/reduction/skew.py +89 -0
  231. maxframe/dataframe/reduction/std.py +53 -0
  232. maxframe/dataframe/reduction/str_concat.py +48 -0
  233. maxframe/dataframe/reduction/sum.py +77 -0
  234. maxframe/dataframe/reduction/tests/__init__.py +13 -0
  235. maxframe/dataframe/reduction/tests/test_reduction.py +486 -0
  236. maxframe/dataframe/reduction/unique.py +90 -0
  237. maxframe/dataframe/reduction/var.py +72 -0
  238. maxframe/dataframe/sort/__init__.py +34 -0
  239. maxframe/dataframe/sort/core.py +36 -0
  240. maxframe/dataframe/sort/sort_index.py +153 -0
  241. maxframe/dataframe/sort/sort_values.py +311 -0
  242. maxframe/dataframe/sort/tests/__init__.py +13 -0
  243. maxframe/dataframe/sort/tests/test_sort.py +81 -0
  244. maxframe/dataframe/statistics/__init__.py +33 -0
  245. maxframe/dataframe/statistics/corr.py +280 -0
  246. maxframe/dataframe/statistics/quantile.py +341 -0
  247. maxframe/dataframe/statistics/tests/__init__.py +13 -0
  248. maxframe/dataframe/statistics/tests/test_statistics.py +82 -0
  249. maxframe/dataframe/tests/__init__.py +13 -0
  250. maxframe/dataframe/tests/test_initializer.py +29 -0
  251. maxframe/dataframe/tseries/__init__.py +13 -0
  252. maxframe/dataframe/tseries/tests/__init__.py +13 -0
  253. maxframe/dataframe/tseries/tests/test_tseries.py +30 -0
  254. maxframe/dataframe/tseries/to_datetime.py +297 -0
  255. maxframe/dataframe/ufunc/__init__.py +27 -0
  256. maxframe/dataframe/ufunc/tensor.py +54 -0
  257. maxframe/dataframe/ufunc/ufunc.py +52 -0
  258. maxframe/dataframe/utils.py +1267 -0
  259. maxframe/dataframe/window/__init__.py +29 -0
  260. maxframe/dataframe/window/aggregation.py +96 -0
  261. maxframe/dataframe/window/core.py +69 -0
  262. maxframe/dataframe/window/ewm.py +249 -0
  263. maxframe/dataframe/window/expanding.py +147 -0
  264. maxframe/dataframe/window/rolling.py +376 -0
  265. maxframe/dataframe/window/tests/__init__.py +13 -0
  266. maxframe/dataframe/window/tests/test_ewm.py +70 -0
  267. maxframe/dataframe/window/tests/test_expanding.py +66 -0
  268. maxframe/dataframe/window/tests/test_rolling.py +57 -0
  269. maxframe/env.py +33 -0
  270. maxframe/errors.py +21 -0
  271. maxframe/extension.py +81 -0
  272. maxframe/learn/__init__.py +17 -0
  273. maxframe/learn/contrib/__init__.py +17 -0
  274. maxframe/learn/contrib/pytorch/__init__.py +16 -0
  275. maxframe/learn/contrib/pytorch/run_function.py +110 -0
  276. maxframe/learn/contrib/pytorch/run_script.py +102 -0
  277. maxframe/learn/contrib/pytorch/tests/__init__.py +13 -0
  278. maxframe/learn/contrib/pytorch/tests/test_pytorch.py +42 -0
  279. maxframe/learn/contrib/utils.py +52 -0
  280. maxframe/learn/contrib/xgboost/__init__.py +26 -0
  281. maxframe/learn/contrib/xgboost/classifier.py +86 -0
  282. maxframe/learn/contrib/xgboost/core.py +156 -0
  283. maxframe/learn/contrib/xgboost/dmatrix.py +150 -0
  284. maxframe/learn/contrib/xgboost/predict.py +138 -0
  285. maxframe/learn/contrib/xgboost/regressor.py +78 -0
  286. maxframe/learn/contrib/xgboost/tests/__init__.py +13 -0
  287. maxframe/learn/contrib/xgboost/tests/test_core.py +43 -0
  288. maxframe/learn/contrib/xgboost/train.py +121 -0
  289. maxframe/learn/utils/__init__.py +15 -0
  290. maxframe/learn/utils/core.py +29 -0
  291. maxframe/lib/__init__.py +15 -0
  292. maxframe/lib/aio/__init__.py +27 -0
  293. maxframe/lib/aio/_runners.py +162 -0
  294. maxframe/lib/aio/_threads.py +35 -0
  295. maxframe/lib/aio/base.py +82 -0
  296. maxframe/lib/aio/file.py +85 -0
  297. maxframe/lib/aio/isolation.py +100 -0
  298. maxframe/lib/aio/lru.py +242 -0
  299. maxframe/lib/aio/parallelism.py +37 -0
  300. maxframe/lib/aio/tests/__init__.py +13 -0
  301. maxframe/lib/aio/tests/test_aio_file.py +55 -0
  302. maxframe/lib/compression.py +55 -0
  303. maxframe/lib/cython/__init__.py +13 -0
  304. maxframe/lib/cython/libcpp.pxd +30 -0
  305. maxframe/lib/filesystem/__init__.py +21 -0
  306. maxframe/lib/filesystem/_glob.py +173 -0
  307. maxframe/lib/filesystem/_oss_lib/__init__.py +13 -0
  308. maxframe/lib/filesystem/_oss_lib/common.py +198 -0
  309. maxframe/lib/filesystem/_oss_lib/glob.py +147 -0
  310. maxframe/lib/filesystem/_oss_lib/handle.py +156 -0
  311. maxframe/lib/filesystem/arrow.py +236 -0
  312. maxframe/lib/filesystem/base.py +263 -0
  313. maxframe/lib/filesystem/core.py +95 -0
  314. maxframe/lib/filesystem/fsmap.py +164 -0
  315. maxframe/lib/filesystem/hdfs.py +31 -0
  316. maxframe/lib/filesystem/local.py +112 -0
  317. maxframe/lib/filesystem/oss.py +157 -0
  318. maxframe/lib/filesystem/tests/__init__.py +13 -0
  319. maxframe/lib/filesystem/tests/test_filesystem.py +223 -0
  320. maxframe/lib/filesystem/tests/test_oss.py +182 -0
  321. maxframe/lib/functools_compat.py +81 -0
  322. maxframe/lib/mmh3.cpython-311-darwin.so +0 -0
  323. maxframe/lib/mmh3_src/MurmurHash3.cpp +339 -0
  324. maxframe/lib/mmh3_src/MurmurHash3.h +43 -0
  325. maxframe/lib/mmh3_src/mmh3module.cpp +387 -0
  326. maxframe/lib/sparse/__init__.py +861 -0
  327. maxframe/lib/sparse/array.py +1604 -0
  328. maxframe/lib/sparse/core.py +92 -0
  329. maxframe/lib/sparse/matrix.py +241 -0
  330. maxframe/lib/sparse/tests/__init__.py +15 -0
  331. maxframe/lib/sparse/tests/test_sparse.py +476 -0
  332. maxframe/lib/sparse/vector.py +150 -0
  333. maxframe/lib/tblib/LICENSE +20 -0
  334. maxframe/lib/tblib/__init__.py +327 -0
  335. maxframe/lib/tblib/cpython.py +83 -0
  336. maxframe/lib/tblib/decorators.py +44 -0
  337. maxframe/lib/tblib/pickling_support.py +90 -0
  338. maxframe/lib/tests/__init__.py +13 -0
  339. maxframe/lib/tests/test_wrapped_pickle.py +51 -0
  340. maxframe/lib/version.py +620 -0
  341. maxframe/lib/wrapped_pickle.py +139 -0
  342. maxframe/mixin.py +100 -0
  343. maxframe/odpsio/__init__.py +21 -0
  344. maxframe/odpsio/arrow.py +91 -0
  345. maxframe/odpsio/schema.py +364 -0
  346. maxframe/odpsio/tableio.py +322 -0
  347. maxframe/odpsio/tests/__init__.py +13 -0
  348. maxframe/odpsio/tests/test_arrow.py +88 -0
  349. maxframe/odpsio/tests/test_schema.py +297 -0
  350. maxframe/odpsio/tests/test_tableio.py +136 -0
  351. maxframe/odpsio/tests/test_volumeio.py +90 -0
  352. maxframe/odpsio/volumeio.py +95 -0
  353. maxframe/opcodes.py +590 -0
  354. maxframe/protocol.py +415 -0
  355. maxframe/remote/__init__.py +18 -0
  356. maxframe/remote/core.py +210 -0
  357. maxframe/remote/run_script.py +121 -0
  358. maxframe/serialization/__init__.py +26 -0
  359. maxframe/serialization/arrow.py +95 -0
  360. maxframe/serialization/core.cpython-311-darwin.so +0 -0
  361. maxframe/serialization/core.pxd +44 -0
  362. maxframe/serialization/core.pyi +61 -0
  363. maxframe/serialization/core.pyx +1094 -0
  364. maxframe/serialization/exception.py +86 -0
  365. maxframe/serialization/maxframe_objects.py +39 -0
  366. maxframe/serialization/numpy.py +91 -0
  367. maxframe/serialization/pandas.py +202 -0
  368. maxframe/serialization/scipy.py +71 -0
  369. maxframe/serialization/serializables/__init__.py +55 -0
  370. maxframe/serialization/serializables/core.py +262 -0
  371. maxframe/serialization/serializables/field.py +624 -0
  372. maxframe/serialization/serializables/field_type.py +589 -0
  373. maxframe/serialization/serializables/tests/__init__.py +13 -0
  374. maxframe/serialization/serializables/tests/test_field_type.py +121 -0
  375. maxframe/serialization/serializables/tests/test_serializable.py +250 -0
  376. maxframe/serialization/tests/__init__.py +13 -0
  377. maxframe/serialization/tests/test_serial.py +412 -0
  378. maxframe/session.py +1310 -0
  379. maxframe/tensor/__init__.py +183 -0
  380. maxframe/tensor/arithmetic/__init__.py +315 -0
  381. maxframe/tensor/arithmetic/abs.py +68 -0
  382. maxframe/tensor/arithmetic/absolute.py +68 -0
  383. maxframe/tensor/arithmetic/add.py +82 -0
  384. maxframe/tensor/arithmetic/angle.py +72 -0
  385. maxframe/tensor/arithmetic/arccos.py +104 -0
  386. maxframe/tensor/arithmetic/arccosh.py +91 -0
  387. maxframe/tensor/arithmetic/arcsin.py +94 -0
  388. maxframe/tensor/arithmetic/arcsinh.py +86 -0
  389. maxframe/tensor/arithmetic/arctan.py +106 -0
  390. maxframe/tensor/arithmetic/arctan2.py +128 -0
  391. maxframe/tensor/arithmetic/arctanh.py +86 -0
  392. maxframe/tensor/arithmetic/around.py +114 -0
  393. maxframe/tensor/arithmetic/bitand.py +95 -0
  394. maxframe/tensor/arithmetic/bitor.py +102 -0
  395. maxframe/tensor/arithmetic/bitxor.py +95 -0
  396. maxframe/tensor/arithmetic/cbrt.py +66 -0
  397. maxframe/tensor/arithmetic/ceil.py +71 -0
  398. maxframe/tensor/arithmetic/clip.py +165 -0
  399. maxframe/tensor/arithmetic/conj.py +74 -0
  400. maxframe/tensor/arithmetic/copysign.py +78 -0
  401. maxframe/tensor/arithmetic/core.py +544 -0
  402. maxframe/tensor/arithmetic/cos.py +85 -0
  403. maxframe/tensor/arithmetic/cosh.py +72 -0
  404. maxframe/tensor/arithmetic/deg2rad.py +72 -0
  405. maxframe/tensor/arithmetic/degrees.py +77 -0
  406. maxframe/tensor/arithmetic/divide.py +114 -0
  407. maxframe/tensor/arithmetic/equal.py +76 -0
  408. maxframe/tensor/arithmetic/exp.py +106 -0
  409. maxframe/tensor/arithmetic/exp2.py +67 -0
  410. maxframe/tensor/arithmetic/expm1.py +79 -0
  411. maxframe/tensor/arithmetic/fabs.py +74 -0
  412. maxframe/tensor/arithmetic/fix.py +69 -0
  413. maxframe/tensor/arithmetic/float_power.py +103 -0
  414. maxframe/tensor/arithmetic/floor.py +77 -0
  415. maxframe/tensor/arithmetic/floordiv.py +94 -0
  416. maxframe/tensor/arithmetic/fmax.py +105 -0
  417. maxframe/tensor/arithmetic/fmin.py +106 -0
  418. maxframe/tensor/arithmetic/fmod.py +99 -0
  419. maxframe/tensor/arithmetic/frexp.py +92 -0
  420. maxframe/tensor/arithmetic/greater.py +77 -0
  421. maxframe/tensor/arithmetic/greater_equal.py +69 -0
  422. maxframe/tensor/arithmetic/hypot.py +77 -0
  423. maxframe/tensor/arithmetic/i0.py +89 -0
  424. maxframe/tensor/arithmetic/imag.py +67 -0
  425. maxframe/tensor/arithmetic/invert.py +110 -0
  426. maxframe/tensor/arithmetic/isclose.py +115 -0
  427. maxframe/tensor/arithmetic/iscomplex.py +64 -0
  428. maxframe/tensor/arithmetic/isfinite.py +106 -0
  429. maxframe/tensor/arithmetic/isinf.py +103 -0
  430. maxframe/tensor/arithmetic/isnan.py +82 -0
  431. maxframe/tensor/arithmetic/isreal.py +63 -0
  432. maxframe/tensor/arithmetic/ldexp.py +99 -0
  433. maxframe/tensor/arithmetic/less.py +69 -0
  434. maxframe/tensor/arithmetic/less_equal.py +69 -0
  435. maxframe/tensor/arithmetic/log.py +92 -0
  436. maxframe/tensor/arithmetic/log10.py +85 -0
  437. maxframe/tensor/arithmetic/log1p.py +95 -0
  438. maxframe/tensor/arithmetic/log2.py +85 -0
  439. maxframe/tensor/arithmetic/logaddexp.py +80 -0
  440. maxframe/tensor/arithmetic/logaddexp2.py +78 -0
  441. maxframe/tensor/arithmetic/logical_and.py +81 -0
  442. maxframe/tensor/arithmetic/logical_not.py +74 -0
  443. maxframe/tensor/arithmetic/logical_or.py +82 -0
  444. maxframe/tensor/arithmetic/logical_xor.py +88 -0
  445. maxframe/tensor/arithmetic/lshift.py +82 -0
  446. maxframe/tensor/arithmetic/maximum.py +108 -0
  447. maxframe/tensor/arithmetic/minimum.py +108 -0
  448. maxframe/tensor/arithmetic/mod.py +104 -0
  449. maxframe/tensor/arithmetic/modf.py +83 -0
  450. maxframe/tensor/arithmetic/multiply.py +81 -0
  451. maxframe/tensor/arithmetic/nan_to_num.py +99 -0
  452. maxframe/tensor/arithmetic/negative.py +65 -0
  453. maxframe/tensor/arithmetic/nextafter.py +68 -0
  454. maxframe/tensor/arithmetic/not_equal.py +72 -0
  455. maxframe/tensor/arithmetic/positive.py +47 -0
  456. maxframe/tensor/arithmetic/power.py +106 -0
  457. maxframe/tensor/arithmetic/rad2deg.py +71 -0
  458. maxframe/tensor/arithmetic/radians.py +77 -0
  459. maxframe/tensor/arithmetic/real.py +70 -0
  460. maxframe/tensor/arithmetic/reciprocal.py +76 -0
  461. maxframe/tensor/arithmetic/rint.py +68 -0
  462. maxframe/tensor/arithmetic/rshift.py +81 -0
  463. maxframe/tensor/arithmetic/setimag.py +29 -0
  464. maxframe/tensor/arithmetic/setreal.py +29 -0
  465. maxframe/tensor/arithmetic/sign.py +81 -0
  466. maxframe/tensor/arithmetic/signbit.py +65 -0
  467. maxframe/tensor/arithmetic/sin.py +98 -0
  468. maxframe/tensor/arithmetic/sinc.py +102 -0
  469. maxframe/tensor/arithmetic/sinh.py +93 -0
  470. maxframe/tensor/arithmetic/spacing.py +72 -0
  471. maxframe/tensor/arithmetic/sqrt.py +81 -0
  472. maxframe/tensor/arithmetic/square.py +69 -0
  473. maxframe/tensor/arithmetic/subtract.py +81 -0
  474. maxframe/tensor/arithmetic/tan.py +88 -0
  475. maxframe/tensor/arithmetic/tanh.py +92 -0
  476. maxframe/tensor/arithmetic/tests/__init__.py +15 -0
  477. maxframe/tensor/arithmetic/tests/test_arithmetic.py +414 -0
  478. maxframe/tensor/arithmetic/truediv.py +104 -0
  479. maxframe/tensor/arithmetic/trunc.py +72 -0
  480. maxframe/tensor/arithmetic/utils.py +65 -0
  481. maxframe/tensor/array_utils.py +186 -0
  482. maxframe/tensor/base/__init__.py +34 -0
  483. maxframe/tensor/base/astype.py +119 -0
  484. maxframe/tensor/base/atleast_1d.py +74 -0
  485. maxframe/tensor/base/broadcast_to.py +89 -0
  486. maxframe/tensor/base/ravel.py +92 -0
  487. maxframe/tensor/base/tests/__init__.py +13 -0
  488. maxframe/tensor/base/tests/test_base.py +114 -0
  489. maxframe/tensor/base/transpose.py +125 -0
  490. maxframe/tensor/base/unique.py +205 -0
  491. maxframe/tensor/base/where.py +127 -0
  492. maxframe/tensor/core.py +724 -0
  493. maxframe/tensor/datasource/__init__.py +32 -0
  494. maxframe/tensor/datasource/arange.py +156 -0
  495. maxframe/tensor/datasource/array.py +415 -0
  496. maxframe/tensor/datasource/core.py +109 -0
  497. maxframe/tensor/datasource/empty.py +169 -0
  498. maxframe/tensor/datasource/from_dataframe.py +70 -0
  499. maxframe/tensor/datasource/from_dense.py +54 -0
  500. maxframe/tensor/datasource/from_sparse.py +47 -0
  501. maxframe/tensor/datasource/full.py +186 -0
  502. maxframe/tensor/datasource/ones.py +173 -0
  503. maxframe/tensor/datasource/scalar.py +40 -0
  504. maxframe/tensor/datasource/tests/__init__.py +13 -0
  505. maxframe/tensor/datasource/tests/test_datasource.py +278 -0
  506. maxframe/tensor/datasource/zeros.py +188 -0
  507. maxframe/tensor/fetch/__init__.py +15 -0
  508. maxframe/tensor/fetch/core.py +54 -0
  509. maxframe/tensor/indexing/__init__.py +47 -0
  510. maxframe/tensor/indexing/choose.py +196 -0
  511. maxframe/tensor/indexing/compress.py +124 -0
  512. maxframe/tensor/indexing/core.py +190 -0
  513. maxframe/tensor/indexing/extract.py +71 -0
  514. maxframe/tensor/indexing/fill_diagonal.py +183 -0
  515. maxframe/tensor/indexing/flatnonzero.py +60 -0
  516. maxframe/tensor/indexing/getitem.py +175 -0
  517. maxframe/tensor/indexing/nonzero.py +120 -0
  518. maxframe/tensor/indexing/setitem.py +132 -0
  519. maxframe/tensor/indexing/slice.py +29 -0
  520. maxframe/tensor/indexing/take.py +130 -0
  521. maxframe/tensor/indexing/tests/__init__.py +15 -0
  522. maxframe/tensor/indexing/tests/test_indexing.py +234 -0
  523. maxframe/tensor/indexing/unravel_index.py +103 -0
  524. maxframe/tensor/merge/__init__.py +15 -0
  525. maxframe/tensor/merge/stack.py +132 -0
  526. maxframe/tensor/merge/tests/__init__.py +13 -0
  527. maxframe/tensor/merge/tests/test_merge.py +52 -0
  528. maxframe/tensor/operators.py +123 -0
  529. maxframe/tensor/random/__init__.py +168 -0
  530. maxframe/tensor/random/beta.py +87 -0
  531. maxframe/tensor/random/binomial.py +137 -0
  532. maxframe/tensor/random/bytes.py +39 -0
  533. maxframe/tensor/random/chisquare.py +110 -0
  534. maxframe/tensor/random/choice.py +186 -0
  535. maxframe/tensor/random/core.py +234 -0
  536. maxframe/tensor/random/dirichlet.py +123 -0
  537. maxframe/tensor/random/exponential.py +94 -0
  538. maxframe/tensor/random/f.py +135 -0
  539. maxframe/tensor/random/gamma.py +128 -0
  540. maxframe/tensor/random/geometric.py +93 -0
  541. maxframe/tensor/random/gumbel.py +167 -0
  542. maxframe/tensor/random/hypergeometric.py +148 -0
  543. maxframe/tensor/random/laplace.py +133 -0
  544. maxframe/tensor/random/logistic.py +129 -0
  545. maxframe/tensor/random/lognormal.py +159 -0
  546. maxframe/tensor/random/logseries.py +122 -0
  547. maxframe/tensor/random/multinomial.py +133 -0
  548. maxframe/tensor/random/multivariate_normal.py +192 -0
  549. maxframe/tensor/random/negative_binomial.py +125 -0
  550. maxframe/tensor/random/noncentral_chisquare.py +132 -0
  551. maxframe/tensor/random/noncentral_f.py +126 -0
  552. maxframe/tensor/random/normal.py +143 -0
  553. maxframe/tensor/random/pareto.py +140 -0
  554. maxframe/tensor/random/permutation.py +104 -0
  555. maxframe/tensor/random/poisson.py +111 -0
  556. maxframe/tensor/random/power.py +142 -0
  557. maxframe/tensor/random/rand.py +82 -0
  558. maxframe/tensor/random/randint.py +121 -0
  559. maxframe/tensor/random/randn.py +96 -0
  560. maxframe/tensor/random/random_integers.py +123 -0
  561. maxframe/tensor/random/random_sample.py +86 -0
  562. maxframe/tensor/random/rayleigh.py +110 -0
  563. maxframe/tensor/random/shuffle.py +61 -0
  564. maxframe/tensor/random/standard_cauchy.py +105 -0
  565. maxframe/tensor/random/standard_exponential.py +72 -0
  566. maxframe/tensor/random/standard_gamma.py +120 -0
  567. maxframe/tensor/random/standard_normal.py +74 -0
  568. maxframe/tensor/random/standard_t.py +135 -0
  569. maxframe/tensor/random/tests/__init__.py +15 -0
  570. maxframe/tensor/random/tests/test_random.py +167 -0
  571. maxframe/tensor/random/triangular.py +119 -0
  572. maxframe/tensor/random/uniform.py +131 -0
  573. maxframe/tensor/random/vonmises.py +131 -0
  574. maxframe/tensor/random/wald.py +114 -0
  575. maxframe/tensor/random/weibull.py +140 -0
  576. maxframe/tensor/random/zipf.py +122 -0
  577. maxframe/tensor/rechunk/__init__.py +26 -0
  578. maxframe/tensor/rechunk/rechunk.py +43 -0
  579. maxframe/tensor/reduction/__init__.py +66 -0
  580. maxframe/tensor/reduction/all.py +103 -0
  581. maxframe/tensor/reduction/allclose.py +88 -0
  582. maxframe/tensor/reduction/any.py +105 -0
  583. maxframe/tensor/reduction/argmax.py +103 -0
  584. maxframe/tensor/reduction/argmin.py +103 -0
  585. maxframe/tensor/reduction/array_equal.py +64 -0
  586. maxframe/tensor/reduction/core.py +168 -0
  587. maxframe/tensor/reduction/count_nonzero.py +81 -0
  588. maxframe/tensor/reduction/cumprod.py +97 -0
  589. maxframe/tensor/reduction/cumsum.py +101 -0
  590. maxframe/tensor/reduction/max.py +120 -0
  591. maxframe/tensor/reduction/mean.py +123 -0
  592. maxframe/tensor/reduction/min.py +120 -0
  593. maxframe/tensor/reduction/nanargmax.py +82 -0
  594. maxframe/tensor/reduction/nanargmin.py +76 -0
  595. maxframe/tensor/reduction/nancumprod.py +91 -0
  596. maxframe/tensor/reduction/nancumsum.py +94 -0
  597. maxframe/tensor/reduction/nanmax.py +111 -0
  598. maxframe/tensor/reduction/nanmean.py +106 -0
  599. maxframe/tensor/reduction/nanmin.py +111 -0
  600. maxframe/tensor/reduction/nanprod.py +94 -0
  601. maxframe/tensor/reduction/nanstd.py +126 -0
  602. maxframe/tensor/reduction/nansum.py +115 -0
  603. maxframe/tensor/reduction/nanvar.py +149 -0
  604. maxframe/tensor/reduction/prod.py +130 -0
  605. maxframe/tensor/reduction/std.py +134 -0
  606. maxframe/tensor/reduction/sum.py +125 -0
  607. maxframe/tensor/reduction/tests/__init__.py +13 -0
  608. maxframe/tensor/reduction/tests/test_reduction.py +181 -0
  609. maxframe/tensor/reduction/var.py +176 -0
  610. maxframe/tensor/reshape/__init__.py +17 -0
  611. maxframe/tensor/reshape/reshape.py +188 -0
  612. maxframe/tensor/reshape/tests/__init__.py +15 -0
  613. maxframe/tensor/reshape/tests/test_reshape.py +37 -0
  614. maxframe/tensor/statistics/__init__.py +13 -0
  615. maxframe/tensor/statistics/percentile.py +175 -0
  616. maxframe/tensor/statistics/quantile.py +288 -0
  617. maxframe/tensor/ufunc/__init__.py +26 -0
  618. maxframe/tensor/ufunc/ufunc.py +200 -0
  619. maxframe/tensor/utils.py +718 -0
  620. maxframe/tests/__init__.py +13 -0
  621. maxframe/tests/test_codegen.py +69 -0
  622. maxframe/tests/test_protocol.py +144 -0
  623. maxframe/tests/test_utils.py +376 -0
  624. maxframe/tests/utils.py +164 -0
  625. maxframe/typing_.py +37 -0
  626. maxframe/udf.py +134 -0
  627. maxframe/utils.py +1114 -0
  628. maxframe-0.1.0b5.dist-info/METADATA +104 -0
  629. maxframe-0.1.0b5.dist-info/RECORD +647 -0
  630. maxframe-0.1.0b5.dist-info/WHEEL +5 -0
  631. maxframe-0.1.0b5.dist-info/top_level.txt +3 -0
  632. maxframe_client/__init__.py +17 -0
  633. maxframe_client/clients/__init__.py +13 -0
  634. maxframe_client/clients/framedriver.py +118 -0
  635. maxframe_client/clients/spe.py +104 -0
  636. maxframe_client/conftest.py +15 -0
  637. maxframe_client/fetcher.py +264 -0
  638. maxframe_client/session/__init__.py +22 -0
  639. maxframe_client/session/consts.py +36 -0
  640. maxframe_client/session/graph.py +119 -0
  641. maxframe_client/session/odps.py +482 -0
  642. maxframe_client/session/task.py +280 -0
  643. maxframe_client/session/tests/__init__.py +13 -0
  644. maxframe_client/session/tests/test_task.py +85 -0
  645. maxframe_client/tests/__init__.py +13 -0
  646. maxframe_client/tests/test_fetcher.py +89 -0
  647. maxframe_client/tests/test_session.py +255 -0
@@ -0,0 +1,1267 @@
1
+ # Copyright 1999-2024 Alibaba Group Holding Ltd.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ import functools
16
+ import inspect
17
+ import itertools
18
+ import logging
19
+ import operator
20
+ import sys
21
+ from contextlib import contextmanager
22
+ from numbers import Integral
23
+ from typing import Any, Callable, List
24
+
25
+ import numpy as np
26
+ import pandas as pd
27
+ from pandas.api.extensions import ExtensionDtype
28
+ from pandas.api.types import is_string_dtype
29
+ from pandas.core.dtypes.cast import find_common_type
30
+ from pandas.core.dtypes.inference import is_dict_like, is_list_like
31
+
32
+ from ..core import Entity, ExecutableTuple
33
+ from ..lib.mmh3 import hash as mmh_hash
34
+ from ..utils import (
35
+ ModulePlaceholder,
36
+ is_full_slice,
37
+ lazy_import,
38
+ parse_version,
39
+ sbytes,
40
+ tokenize,
41
+ )
42
+
43
+ try:
44
+ import pyarrow as pa
45
+ except ImportError: # pragma: no cover
46
+ pa = ModulePlaceholder("pyarrow")
47
+
48
+ cudf = lazy_import("cudf", rename="cudf")
49
+ vineyard = lazy_import("vineyard")
50
+ try:
51
+ import ray
52
+
53
+ ray_release_version = parse_version(ray.__version__).release
54
+ ray_deprecate_ml_dataset = ray_release_version[:2] >= (2, 0)
55
+ except ImportError:
56
+ ray_release_version = None
57
+ ray_deprecate_ml_dataset = None
58
+ logger = logging.getLogger(__name__)
59
+
60
+
61
+ def hash_index(index, size):
62
+ def func(x, size):
63
+ return mmh_hash(sbytes(x)) % size
64
+
65
+ f = functools.partial(func, size=size)
66
+ idx_to_grouped = index.groupby(index.map(f))
67
+ return [idx_to_grouped.get(i, list()) for i in range(size)]
68
+
69
+
70
+ def hash_dataframe_on(df, on, size, level=None):
71
+ if on is None:
72
+ idx = df.index
73
+ if level is not None:
74
+ idx = idx.to_frame(False)[level]
75
+ if cudf and isinstance(idx, cudf.Index): # pragma: no cover
76
+ idx = idx.to_pandas()
77
+ hashed_label = pd.util.hash_pandas_object(idx, categorize=False)
78
+ elif callable(on):
79
+ # todo optimization can be added, if ``on`` is a numpy ufunc or sth can be vectorized
80
+ hashed_label = pd.util.hash_pandas_object(df.index.map(on), categorize=False)
81
+ else:
82
+ if isinstance(on, list):
83
+ to_concat = []
84
+ for v in on:
85
+ if isinstance(v, pd.Series):
86
+ to_concat.append(v)
87
+ else:
88
+ to_concat.append(df[v])
89
+ data = pd.concat(to_concat, axis=1)
90
+ else:
91
+ data = df[on]
92
+ hashed_label = pd.util.hash_pandas_object(data, index=False, categorize=False)
93
+ idx_to_grouped = pd.RangeIndex(0, len(hashed_label)).groupby(hashed_label % size)
94
+ return [idx_to_grouped.get(i, pd.Index([])) for i in range(size)]
95
+
96
+
97
+ def hash_dtypes(dtypes, size):
98
+ hashed_indexes = hash_index(dtypes.index, size)
99
+ return [dtypes[index] for index in hashed_indexes]
100
+
101
+
102
+ def sort_dataframe_inplace(df, *axis):
103
+ for ax in axis:
104
+ df.sort_index(axis=ax, inplace=True)
105
+ return df
106
+
107
+
108
+ @functools.lru_cache(1)
109
+ def _get_range_index_type():
110
+ if cudf is not None:
111
+ return pd.RangeIndex, cudf.RangeIndex
112
+ else:
113
+ return pd.RangeIndex
114
+
115
+
116
+ @functools.lru_cache(1)
117
+ def _get_multi_index_type():
118
+ if cudf is not None:
119
+ return pd.MultiIndex, cudf.MultiIndex
120
+ else:
121
+ return pd.MultiIndex
122
+
123
+
124
+ def _get_range_index_start(pd_range_index):
125
+ try:
126
+ return pd_range_index.start
127
+ except AttributeError: # pragma: no cover
128
+ return pd_range_index._start
129
+
130
+
131
+ def _get_range_index_stop(pd_range_index):
132
+ try:
133
+ return pd_range_index.stop
134
+ except AttributeError: # pragma: no cover
135
+ return pd_range_index._stop
136
+
137
+
138
+ def _get_range_index_step(pd_range_index):
139
+ try:
140
+ return pd_range_index.step
141
+ except AttributeError: # pragma: no cover
142
+ pass
143
+ try: # pragma: no cover
144
+ return pd_range_index._step
145
+ except AttributeError: # pragma: no cover
146
+ return 1 # cudf does not support step arg
147
+
148
+
149
+ def is_pd_range_empty(pd_range_index):
150
+ start, stop, step = (
151
+ _get_range_index_start(pd_range_index),
152
+ _get_range_index_stop(pd_range_index),
153
+ _get_range_index_step(pd_range_index),
154
+ )
155
+ return (start >= stop and step >= 0) or (start <= stop and step < 0)
156
+
157
+
158
+ def parse_index(index_value, *args, store_data=False, key=None):
159
+ from .core import IndexValue
160
+
161
+ def _extract_property(index, tp, ret_data):
162
+ kw = {
163
+ "_min_val": _get_index_min(index),
164
+ "_max_val": _get_index_max(index),
165
+ "_min_val_close": True,
166
+ "_max_val_close": True,
167
+ "_key": key or _tokenize_index(index, *args),
168
+ }
169
+ if ret_data:
170
+ kw["_data"] = index.values
171
+ for field in tp._FIELDS:
172
+ if field in kw or field == "_data":
173
+ continue
174
+ val = getattr(index, field.lstrip("_"), None)
175
+ if val is not None:
176
+ kw[field] = val
177
+ return kw
178
+
179
+ def _tokenize_index(index, *token_objects):
180
+ if not index.empty:
181
+ return tokenize(index)
182
+ else:
183
+ return tokenize(index, *token_objects)
184
+
185
+ def _get_index_min(index):
186
+ try:
187
+ return index.min()
188
+ except (ValueError, AttributeError):
189
+ if isinstance(index, pd.IntervalIndex):
190
+ return None
191
+ raise
192
+ except TypeError:
193
+ return None
194
+
195
+ def _get_index_max(index):
196
+ try:
197
+ return index.max()
198
+ except (ValueError, AttributeError):
199
+ if isinstance(index, pd.IntervalIndex):
200
+ return None
201
+ raise
202
+ except TypeError:
203
+ return None
204
+
205
+ def _serialize_index(index):
206
+ tp = getattr(IndexValue, type(index).__name__)
207
+ properties = _extract_property(index, tp, store_data)
208
+ properties["_name"] = index.name
209
+ return tp(**properties)
210
+
211
+ def _serialize_range_index(index):
212
+ if is_pd_range_empty(index):
213
+ properties = {
214
+ "_is_monotonic_increasing": True,
215
+ "_is_monotonic_decreasing": False,
216
+ "_is_unique": True,
217
+ "_min_val": _get_index_min(index),
218
+ "_max_val": _get_index_max(index),
219
+ "_min_val_close": True,
220
+ "_max_val_close": False,
221
+ "_key": key or _tokenize_index(index, *args),
222
+ "_name": index.name,
223
+ "_dtype": index.dtype,
224
+ }
225
+ else:
226
+ properties = _extract_property(index, IndexValue.RangeIndex, False)
227
+ return IndexValue.RangeIndex(
228
+ _slice=slice(
229
+ _get_range_index_start(index),
230
+ _get_range_index_stop(index),
231
+ _get_range_index_step(index),
232
+ ),
233
+ **properties,
234
+ )
235
+
236
+ def _serialize_multi_index(index):
237
+ kw = _extract_property(index, IndexValue.MultiIndex, store_data)
238
+ kw["_sortorder"] = index.sortorder
239
+ kw["_dtypes"] = [lev.dtype for lev in index.levels]
240
+ return IndexValue.MultiIndex(**kw)
241
+
242
+ if index_value is None:
243
+ return IndexValue(
244
+ _index_value=IndexValue.Index(
245
+ _is_monotonic_increasing=False,
246
+ _is_monotonic_decreasing=False,
247
+ _is_unique=False,
248
+ _min_val=None,
249
+ _max_val=None,
250
+ _min_val_close=True,
251
+ _max_val_close=True,
252
+ _key=key or tokenize(*args),
253
+ )
254
+ )
255
+ if hasattr(index_value, "to_pandas"): # pragma: no cover
256
+ # convert cudf.Index to pandas
257
+ index_value = index_value.to_pandas()
258
+
259
+ if isinstance(index_value, _get_range_index_type()):
260
+ return IndexValue(_index_value=_serialize_range_index(index_value))
261
+ elif isinstance(index_value, _get_multi_index_type()):
262
+ return IndexValue(_index_value=_serialize_multi_index(index_value))
263
+ else:
264
+ return IndexValue(_index_value=_serialize_index(index_value))
265
+
266
+
267
+ def gen_unknown_index_value(index_value, *args):
268
+ pd_index = index_value.to_pandas()
269
+ if isinstance(pd_index, pd.RangeIndex):
270
+ return parse_index(pd.RangeIndex(-1), *args)
271
+ elif not isinstance(pd_index, pd.MultiIndex):
272
+ return parse_index(pd.Index([], dtype=pd_index.dtype), *args)
273
+ else:
274
+ i = pd.MultiIndex.from_arrays(
275
+ [c[:0] for c in pd_index.levels], names=pd_index.names
276
+ )
277
+ return parse_index(i, *args)
278
+
279
+
280
+ def split_monotonic_index_min_max(
281
+ left_min_max, left_increase, right_min_max, right_increase
282
+ ):
283
+ """
284
+ Split the original two min_max into new min_max. Each min_max should be a list
285
+ in which each item should be a 4-tuple indicates that this chunk's min value,
286
+ whether the min value is close, the max value, and whether the max value is close.
287
+ The return value would be a nested list, each item is a list
288
+ indicates that how this chunk should be split into.
289
+
290
+ :param left_min_max: the left min_max
291
+ :param left_increase: if the original data of left is increased
292
+ :param right_min_max: the right min_max
293
+ :param right_increase: if the original data of right is increased
294
+ :return: nested list in which each item indicates how min_max is split
295
+
296
+ >>> left_min_max = [(0, True, 3, True), (4, True, 8, True), (12, True, 18, True),
297
+ ... (20, True, 22, True)]
298
+ >>> right_min_max = [(2, True, 6, True), (7, True, 9, True), (10, True, 14, True),
299
+ ... (18, True, 19, True)]
300
+ >>> l, r = split_monotonic_index_min_max(left_min_max, True, right_min_max, True)
301
+ >>> l
302
+ [[(0, True, 2, False), (2, True, 3, True)], [(3, False, 4, False), (4, True, 6, True), (6, False, 7, False),
303
+ (7, True, 8, True)], [(8, False, 9, True), (10, True, 12, False), (12, True, 14, True), (14, False, 18, False),
304
+ (18, True, 18, True)], [(18, False, 19, True), [20, True, 22, True]]]
305
+ >>> r
306
+ [[(0, True, 2, False), (2, True, 3, True), (3, False, 4, False), (4, True, 6, True)],
307
+ [(6, False, 7, False), (7, True, 8, True), (8, False, 9, True)], [(10, True, 12, False), (12, True, 14, True)],
308
+ [(14, False, 18, False), (18, True, 18, True), (18, False, 19, True), [20, True, 22, True]]]
309
+ """
310
+ left_idx_to_min_max = [[] for _ in left_min_max]
311
+ right_idx_to_min_max = [[] for _ in right_min_max]
312
+ left_curr_min_max = list(left_min_max[0])
313
+ right_curr_min_max = list(right_min_max[0])
314
+ left_curr_idx = right_curr_idx = 0
315
+ left_terminate = right_terminate = False
316
+
317
+ while not left_terminate or not right_terminate:
318
+ if left_terminate:
319
+ left_idx_to_min_max[left_curr_idx].append(tuple(right_curr_min_max))
320
+ right_idx_to_min_max[right_curr_idx].append(tuple(right_curr_min_max))
321
+ if right_curr_idx + 1 >= len(right_min_max):
322
+ right_terminate = True
323
+ else:
324
+ right_curr_idx += 1
325
+ right_curr_min_max = list(right_min_max[right_curr_idx])
326
+ elif right_terminate:
327
+ right_idx_to_min_max[right_curr_idx].append(tuple(left_curr_min_max))
328
+ left_idx_to_min_max[left_curr_idx].append(tuple(left_curr_min_max))
329
+ if left_curr_idx + 1 >= len(left_min_max):
330
+ left_terminate = True
331
+ else:
332
+ left_curr_idx += 1
333
+ left_curr_min_max = list(left_min_max[left_curr_idx])
334
+ elif left_curr_min_max[0] < right_curr_min_max[0]:
335
+ # left min < right min
336
+ right_min = [right_curr_min_max[0], not right_curr_min_max[1]]
337
+ max_val = min(left_curr_min_max[2:], right_min)
338
+ assert len(max_val) == 2
339
+ min_max = (
340
+ left_curr_min_max[0],
341
+ left_curr_min_max[1],
342
+ max_val[0],
343
+ max_val[1],
344
+ )
345
+ left_idx_to_min_max[left_curr_idx].append(min_max)
346
+ right_idx_to_min_max[right_curr_idx].append(min_max)
347
+ if left_curr_min_max[2:] == max_val:
348
+ # left max < right min
349
+ if left_curr_idx + 1 >= len(left_min_max):
350
+ left_terminate = True
351
+ else:
352
+ left_curr_idx += 1
353
+ left_curr_min_max = list(left_min_max[left_curr_idx])
354
+ else:
355
+ # from left min(left min close) to right min(exclude right min close)
356
+ left_curr_min_max[:2] = right_curr_min_max[:2]
357
+ elif left_curr_min_max[0] > right_curr_min_max[0]:
358
+ # left min > right min
359
+ left_min = [left_curr_min_max[0], not left_curr_min_max[1]]
360
+ max_val = min(right_curr_min_max[2:], left_min)
361
+ min_max = (
362
+ right_curr_min_max[0],
363
+ right_curr_min_max[1],
364
+ max_val[0],
365
+ max_val[1],
366
+ )
367
+ left_idx_to_min_max[left_curr_idx].append(min_max)
368
+ right_idx_to_min_max[right_curr_idx].append(min_max)
369
+ if right_curr_min_max[2:] == max_val:
370
+ # right max < left min
371
+ if right_curr_idx + 1 >= len(right_min_max):
372
+ right_terminate = True
373
+ else:
374
+ right_curr_idx += 1
375
+ right_curr_min_max = list(right_min_max[right_curr_idx])
376
+ else:
377
+ # from left min(left min close) to right min(exclude right min close)
378
+ right_curr_min_max[:2] = left_curr_min_max[:2]
379
+ else:
380
+ # left min == right min
381
+ max_val = min(left_curr_min_max[2:], right_curr_min_max[2:])
382
+ assert len(max_val) == 2
383
+ min_max = (
384
+ left_curr_min_max[0],
385
+ left_curr_min_max[1],
386
+ max_val[0],
387
+ max_val[1],
388
+ )
389
+ left_idx_to_min_max[left_curr_idx].append(min_max)
390
+ right_idx_to_min_max[right_curr_idx].append(min_max)
391
+ if max_val == left_curr_min_max[2:]:
392
+ if left_curr_idx + 1 >= len(left_min_max):
393
+ left_terminate = True
394
+ else:
395
+ left_curr_idx += 1
396
+ left_curr_min_max = list(left_min_max[left_curr_idx])
397
+ else:
398
+ left_curr_min_max[:2] = max_val[0], not max_val[1]
399
+ if max_val == right_curr_min_max[2:]:
400
+ if right_curr_idx + 1 >= len(right_min_max):
401
+ right_terminate = True
402
+ else:
403
+ right_curr_idx += 1
404
+ right_curr_min_max = list(right_min_max[right_curr_idx])
405
+ else:
406
+ right_curr_min_max[:2] = max_val[0], not max_val[1]
407
+
408
+ if left_increase is False:
409
+ left_idx_to_min_max = list(reversed(left_idx_to_min_max))
410
+ if right_increase is False:
411
+ right_idx_to_min_max = list(reversed(right_idx_to_min_max))
412
+
413
+ return left_idx_to_min_max, right_idx_to_min_max
414
+
415
+
416
+ def build_split_idx_to_origin_idx(splits, increase=True):
417
+ # splits' len is equal to the original chunk size on a specified axis,
418
+ # splits is sth like [[(0, True, 2, True), (2, False, 3, True)]]
419
+ # which means there is one input chunk, and will be split into 2 out chunks
420
+ # in this function, we want to build a new dict from the out chunk index to
421
+ # the original chunk index and the inner position, like {0: (0, 0), 1: (0, 1)}
422
+ if increase is False:
423
+ splits = list(reversed(splits))
424
+ out_idx = itertools.count(0)
425
+ res = dict()
426
+ for origin_idx, _ in enumerate(splits):
427
+ for pos in range(len(splits[origin_idx])):
428
+ if increase is False:
429
+ o_idx = len(splits) - origin_idx - 1
430
+ else:
431
+ o_idx = origin_idx
432
+ res[next(out_idx)] = o_idx, pos
433
+ return res
434
+
435
+
436
+ def _generate_value(dtype, fill_value):
437
+ # special handle for datetime64 and timedelta64
438
+ dispatch = {
439
+ np.datetime64: pd.Timestamp,
440
+ np.timedelta64: pd.Timedelta,
441
+ pd.CategoricalDtype.type: lambda x: pd.CategoricalDtype([x]),
442
+ # for object, we do not know the actual dtype,
443
+ # just convert to str for common usage
444
+ np.object_: lambda x: str(fill_value),
445
+ }
446
+ # otherwise, just use dtype.type itself to convert
447
+ convert = dispatch.get(dtype.type, dtype.type)
448
+ return convert(fill_value)
449
+
450
+
451
+ def build_empty_df(dtypes, index=None):
452
+ columns = dtypes.index
453
+ length = len(index) if index is not None else 0
454
+ record = [[_generate_value(dtype, 1) for dtype in dtypes]] * max(1, length)
455
+
456
+ # duplicate column may exist,
457
+ # so use RangeIndex first
458
+ df = pd.DataFrame(record, columns=range(len(dtypes)), index=index)
459
+ for i, dtype in enumerate(dtypes):
460
+ s = df.iloc[:, i]
461
+ if not pd.api.types.is_dtype_equal(s.dtype, dtype):
462
+ df.iloc[:, i] = s.astype(dtype)
463
+
464
+ df.columns = columns
465
+ return df[:length] if len(df) > length else df
466
+
467
+
468
+ def build_df(df_obj, fill_value=1, size=1, ensure_string=False):
469
+ dfs = []
470
+ if not isinstance(size, (list, tuple)):
471
+ sizes = [size]
472
+ else:
473
+ sizes = size
474
+
475
+ if not isinstance(fill_value, (list, tuple)):
476
+ fill_values = [fill_value]
477
+ else:
478
+ fill_values = fill_value
479
+
480
+ from .core import SERIES_TYPE
481
+
482
+ dtypes = (
483
+ pd.Series([df_obj.dtype], index=[df_obj.name])
484
+ if isinstance(df_obj, SERIES_TYPE)
485
+ else df_obj.dtypes
486
+ )
487
+ for size, fill_value in zip(sizes, fill_values):
488
+ record = [[_generate_value(dtype, fill_value) for dtype in dtypes]] * size
489
+ df = pd.DataFrame(record)
490
+ df.columns = dtypes.index
491
+
492
+ if len(record) != 0: # columns is empty in some cases
493
+ target_index = df_obj.index_value.to_pandas()
494
+ if isinstance(target_index, pd.MultiIndex):
495
+ index_val = tuple(
496
+ _generate_value(level.dtype, fill_value)
497
+ for level in target_index.levels
498
+ )
499
+ df.index = pd.MultiIndex.from_tuples(
500
+ [index_val] * size, names=target_index.names
501
+ )
502
+ else:
503
+ index_val = _generate_value(target_index.dtype, fill_value)
504
+ df.index = pd.Index([index_val] * size, name=target_index.name)
505
+
506
+ # make sure dtypes correct
507
+ for i, dtype in enumerate(dtypes):
508
+ s = df.iloc[:, i]
509
+ if not pd.api.types.is_dtype_equal(s.dtype, dtype):
510
+ df[df.columns[i]] = s.astype(dtype)
511
+ dfs.append(df)
512
+ if len(dfs) == 1:
513
+ ret_df = dfs[0]
514
+ else:
515
+ ret_df = pd.concat(dfs)
516
+
517
+ if ensure_string:
518
+ obj_dtypes = dtypes[dtypes == np.dtype("O")]
519
+ ret_df[obj_dtypes.index] = ret_df[obj_dtypes.index].radd("O")
520
+ return ret_df
521
+
522
+
523
+ def build_empty_series(dtype, index=None, name=None):
524
+ length = len(index) if index is not None else 0
525
+ return pd.Series(
526
+ [_generate_value(dtype, 1) for _ in range(length)],
527
+ dtype=dtype,
528
+ index=index,
529
+ name=name,
530
+ )
531
+
532
+
533
+ def build_series(
534
+ series_obj=None,
535
+ fill_value=1,
536
+ size=1,
537
+ name=None,
538
+ ensure_string=False,
539
+ dtype=None,
540
+ index=None,
541
+ ):
542
+ seriess = []
543
+ if not isinstance(size, (list, tuple)):
544
+ sizes = [size]
545
+ else:
546
+ sizes = size
547
+
548
+ if not isinstance(fill_value, (list, tuple)):
549
+ fill_values = [fill_value]
550
+ else:
551
+ fill_values = fill_value
552
+
553
+ if series_obj is not None:
554
+ dtype = series_obj.dtype
555
+ try:
556
+ series_index = series_obj.index_value.to_pandas()[:0]
557
+ except AttributeError:
558
+ series_index = series_obj.index[:0]
559
+ else:
560
+ series_index = index[:0] if index is not None else None
561
+
562
+ for size, fill_value in zip(sizes, fill_values):
563
+ empty_series = build_empty_series(dtype, name=name, index=series_index)
564
+ record = _generate_value(dtype, fill_value)
565
+ if isinstance(empty_series.index, pd.MultiIndex):
566
+ index = tuple(
567
+ _generate_value(level.dtype, fill_value)
568
+ for level in empty_series.index.levels
569
+ )
570
+ empty_series = empty_series.reindex(
571
+ index=pd.MultiIndex.from_tuples([index], names=empty_series.index.names)
572
+ )
573
+ empty_series.iloc[0] = record
574
+ else:
575
+ if isinstance(empty_series.index.dtype, pd.CategoricalDtype):
576
+ index = None
577
+ else:
578
+ index = _generate_value(empty_series.index.dtype, fill_value)
579
+ empty_series.loc[index] = record
580
+
581
+ empty_series = pd.concat([empty_series] * size)
582
+ # make sure dtype correct for MultiIndex
583
+ empty_series = empty_series.astype(dtype, copy=False)
584
+ seriess.append(empty_series)
585
+
586
+ if len(seriess) == 1:
587
+ ret_series = seriess[0]
588
+ else:
589
+ ret_series = pd.concat(seriess)
590
+
591
+ if ensure_string and dtype == np.dtype("O"):
592
+ ret_series = ret_series.radd("O")
593
+ return ret_series
594
+
595
+
596
+ def infer_index_value(left_index_value, right_index_value):
597
+ from .core import IndexValue
598
+
599
+ if isinstance(left_index_value.value, IndexValue.RangeIndex) and isinstance(
600
+ right_index_value.value, IndexValue.RangeIndex
601
+ ):
602
+ if left_index_value.value.slice == right_index_value.value.slice:
603
+ return left_index_value
604
+ return parse_index(
605
+ pd.Index([], dtype=np.int64), left_index_value, right_index_value
606
+ )
607
+
608
+ # when left index and right index is identical, and both of them are elements unique,
609
+ # we can infer that the out index should be identical also
610
+ if (
611
+ left_index_value.is_unique
612
+ and right_index_value.is_unique
613
+ and left_index_value.key == right_index_value.key
614
+ ):
615
+ return left_index_value
616
+
617
+ left_index = left_index_value.to_pandas()
618
+ right_index = right_index_value.to_pandas()
619
+ out_index = pd.Index(
620
+ [], dtype=find_common_type([left_index.dtype, right_index.dtype])
621
+ )
622
+ return parse_index(out_index, left_index_value, right_index_value)
623
+
624
+
625
+ def indexing_index_value(index_value, indexes, store_data=False, rechunk=False):
626
+ pd_index = index_value.to_pandas()
627
+ # when rechunk is True, the output index shall be treated
628
+ # different from the input one
629
+ if not rechunk and isinstance(indexes, slice) and is_full_slice(indexes):
630
+ return index_value
631
+ elif not index_value.has_value():
632
+ new_index_value = parse_index(pd_index, indexes, store_data=store_data)
633
+ new_index_value._index_value._min_val = index_value.min_val
634
+ new_index_value._index_value._min_val_close = index_value.min_val_close
635
+ new_index_value._index_value._max_val = index_value.max_val
636
+ new_index_value._index_value._max_val_close = index_value.max_val_close
637
+ return new_index_value
638
+ else:
639
+ if isinstance(indexes, Integral):
640
+ return parse_index(pd_index[[indexes]], store_data=store_data)
641
+ elif isinstance(indexes, Entity):
642
+ if isinstance(pd_index, pd.RangeIndex):
643
+ return parse_index(
644
+ pd.RangeIndex(-1), indexes, index_value, store_data=False
645
+ )
646
+ else:
647
+ return parse_index(
648
+ type(pd_index)([]), indexes, index_value, store_data=False
649
+ )
650
+ if isinstance(indexes, tuple):
651
+ return parse_index(pd_index[list(indexes)], store_data=store_data)
652
+ else:
653
+ return parse_index(pd_index[indexes], store_data=store_data)
654
+
655
+
656
+ def merge_index_value(to_merge_index_values: dict, store_data: bool = False):
657
+ """
658
+ Merge index value according to their chunk index.
659
+
660
+ Parameters
661
+ ----------
662
+ to_merge_index_values : dict
663
+ index to index_value
664
+ store_data : bool
665
+ store data in index_value
666
+
667
+ Returns
668
+ -------
669
+ merged_index_value
670
+ """
671
+
672
+ pd_index = None
673
+ min_val, min_val_close, max_val, max_val_close = None, None, None, None
674
+ for _, chunk_index_value in sorted(to_merge_index_values.items()):
675
+ if pd_index is None:
676
+ pd_index = chunk_index_value.to_pandas()
677
+ min_val, min_val_close, max_val, max_val_close = (
678
+ chunk_index_value.min_val,
679
+ chunk_index_value.min_val_close,
680
+ chunk_index_value.max_val,
681
+ chunk_index_value.max_val_close,
682
+ )
683
+ else:
684
+ cur_pd_index = chunk_index_value.to_pandas()
685
+ if store_data or (
686
+ isinstance(pd_index, pd.RangeIndex)
687
+ and isinstance(cur_pd_index, pd.RangeIndex)
688
+ and cur_pd_index.step == pd_index.step
689
+ and cur_pd_index.start == pd_index.stop
690
+ ):
691
+ # range index that is continuous
692
+ pd_index = pd_index.append(cur_pd_index)
693
+ else:
694
+ pd_index = pd.Index([], dtype=pd_index.dtype)
695
+ if chunk_index_value.min_val is not None:
696
+ try:
697
+ if min_val is None or min_val > chunk_index_value.min_val:
698
+ min_val = chunk_index_value.min_val
699
+ min_val_close = chunk_index_value.min_val_close
700
+ except TypeError:
701
+ # min_value has different types that cannot compare
702
+ # just stop compare
703
+ continue
704
+ if chunk_index_value.max_val is not None:
705
+ if max_val is None or max_val < chunk_index_value.max_val:
706
+ max_val = chunk_index_value.max_val
707
+ max_val_close = chunk_index_value.max_val_close
708
+
709
+ index_value = parse_index(pd_index, store_data=store_data)
710
+ if not index_value.has_value():
711
+ index_value._index_value._min_val = min_val
712
+ index_value._index_value._min_val_close = min_val_close
713
+ index_value._index_value._max_val = max_val
714
+ index_value._index_value._max_val_close = max_val_close
715
+ return index_value
716
+
717
+
718
+ def infer_dtypes(left_dtypes, right_dtypes, operator):
719
+ left = build_empty_df(left_dtypes)
720
+ right = build_empty_df(right_dtypes)
721
+ return operator(left, right).dtypes
722
+
723
+
724
+ @functools.lru_cache(100)
725
+ def infer_dtype(left_dtype, right_dtype, operator):
726
+ left = build_empty_series(left_dtype)
727
+ right = build_empty_series(right_dtype)
728
+ return operator(left, right).dtype
729
+
730
+
731
+ def filter_dtypes(dtypes, column_min_max):
732
+ left_filter = operator.ge if column_min_max[1] else operator.gt
733
+ left = left_filter(dtypes.index, column_min_max[0])
734
+ right_filter = operator.le if column_min_max[3] else operator.lt
735
+ right = right_filter(dtypes.index, column_min_max[2])
736
+ return dtypes[left & right]
737
+
738
+
739
+ def in_range_index(i, pd_range_index):
740
+ """
741
+ Check whether the input `i` is within `pd_range_index` which is a pd.RangeIndex.
742
+ """
743
+ start, stop, step = (
744
+ _get_range_index_start(pd_range_index),
745
+ _get_range_index_stop(pd_range_index),
746
+ _get_range_index_step(pd_range_index),
747
+ )
748
+ if step > 0 and start <= i < stop and (i - start) % step == 0:
749
+ return True
750
+ if step < 0 and start >= i > stop and (start - i) % step == 0:
751
+ return True
752
+ return False
753
+
754
+
755
+ def wrap_notimplemented_exception(func):
756
+ @functools.wraps(func)
757
+ def wrapper(*args, **kwargs):
758
+ try:
759
+ return func(*args, **kwargs)
760
+ except NotImplementedError:
761
+ return NotImplemented
762
+
763
+ return wrapper
764
+
765
+
766
+ def validate_axis(axis, tileable=None):
767
+ if axis == "index":
768
+ axis = 0
769
+ elif axis == "columns":
770
+ axis = 1
771
+
772
+ illegal = False
773
+ try:
774
+ axis = operator.index(axis)
775
+ if axis < 0 or (tileable is not None and axis >= tileable.ndim):
776
+ illegal = True
777
+ except TypeError:
778
+ illegal = True
779
+
780
+ if illegal:
781
+ raise ValueError(f"No axis named {axis} for object type {type(tileable)}")
782
+ return axis
783
+
784
+
785
+ def validate_axis_style_args(
786
+ data, args, kwargs, arg_name, method_name
787
+ ): # pragma: no cover
788
+ """Argument handler for mixed index, columns / axis functions
789
+
790
+ In an attempt to handle both `.method(index, columns)`, and
791
+ `.method(arg, axis=.)`, we have to do some bad things to argument
792
+ parsing. This translates all arguments to `{index=., columns=.}` style.
793
+
794
+ Parameters
795
+ ----------
796
+ data : DataFrame
797
+ args : tuple
798
+ All positional arguments from the user
799
+ kwargs : dict
800
+ All keyword arguments from the user
801
+ arg_name, method_name : str
802
+ Used for better error messages
803
+
804
+ Returns
805
+ -------
806
+ kwargs : dict
807
+ A dictionary of keyword arguments. Doesn't modify ``kwargs``
808
+ inplace, so update them with the return value here.
809
+ """
810
+ out = {}
811
+ # Goal: fill 'out' with index/columns-style arguments
812
+ # like out = {'index': foo, 'columns': bar}
813
+
814
+ # Start by validating for consistency
815
+ axes_names = ["index"] if data.ndim == 1 else ["index", "columns"]
816
+ if "axis" in kwargs and any(x in kwargs for x in axes_names):
817
+ msg = "Cannot specify both 'axis' and any of 'index' or 'columns'."
818
+ raise TypeError(msg)
819
+
820
+ # First fill with explicit values provided by the user...
821
+ if arg_name in kwargs:
822
+ if args:
823
+ msg = f"{method_name} got multiple values for argument '{arg_name}'"
824
+ raise TypeError(msg)
825
+
826
+ axis = axes_names[validate_axis(kwargs.get("axis", 0), data)]
827
+ out[axis] = kwargs[arg_name]
828
+
829
+ # More user-provided arguments, now from kwargs
830
+ for k, v in kwargs.items():
831
+ try:
832
+ ax = axes_names[validate_axis(k, data)]
833
+ except ValueError:
834
+ pass
835
+ else:
836
+ out[ax] = v
837
+
838
+ # All user-provided kwargs have been handled now.
839
+ # Now we supplement with positional arguments, emitting warnings
840
+ # when there's ambiguity and raising when there's conflicts
841
+
842
+ if len(args) == 0:
843
+ pass # It's up to the function to decide if this is valid
844
+ elif len(args) == 1:
845
+ axis = axes_names[validate_axis(kwargs.get("axis", 0), data)]
846
+ out[axis] = args[0]
847
+ elif len(args) == 2:
848
+ if "axis" in kwargs:
849
+ # Unambiguously wrong
850
+ msg = "Cannot specify both 'axis' and any of 'index' or 'columns'"
851
+ raise TypeError(msg)
852
+
853
+ msg = (
854
+ "Interpreting call\n\t'.{method_name}(a, b)' as "
855
+ "\n\t'.{method_name}(index=a, columns=b)'.\nUse named "
856
+ "arguments to remove any ambiguity."
857
+ )
858
+ raise TypeError(msg.format(method_name=method_name))
859
+ else:
860
+ msg = f"Cannot specify all of '{arg_name}', 'index', 'columns'."
861
+ raise TypeError(msg)
862
+ return out
863
+
864
+
865
+ def validate_output_types(**kwargs):
866
+ from ..core import OutputType
867
+
868
+ output_type = kwargs.pop("object_type", None) or kwargs.pop("output_type", None)
869
+ output_types = kwargs.pop("output_types", None) or (
870
+ [output_type] if output_type is not None else None
871
+ )
872
+ return (
873
+ [
874
+ getattr(OutputType, v.lower()) if isinstance(v, str) else v
875
+ for v in output_types
876
+ ]
877
+ if output_types
878
+ else None
879
+ )
880
+
881
+
882
+ def fetch_corner_data(df_or_series, session=None) -> pd.DataFrame:
883
+ """
884
+ Fetch corner DataFrame or Series for repr usage.
885
+
886
+ :param df_or_series: DataFrame or Series
887
+ :return: corner DataFrame
888
+ """
889
+ from .indexing.iloc import iloc
890
+
891
+ max_rows = pd.get_option("display.max_rows")
892
+ try:
893
+ min_rows = pd.get_option("display.min_rows")
894
+ min_rows = min(min_rows, max_rows)
895
+ except KeyError: # pragma: no cover
896
+ # display.min_rows is introduced in pandas 0.25
897
+ min_rows = max_rows
898
+
899
+ index_size = None
900
+ if (
901
+ df_or_series.shape[0] > max_rows
902
+ and df_or_series.shape[0] > min_rows // 2 * 2 + 2
903
+ ):
904
+ # for pandas, greater than max_rows
905
+ # will display min_rows
906
+ # thus we fetch min_rows + 2 lines
907
+ index_size = min_rows // 2 + 1
908
+
909
+ if index_size is None:
910
+ return df_or_series._fetch(session=session)
911
+ else:
912
+ head = iloc(df_or_series)[:index_size]
913
+ tail = iloc(df_or_series)[-index_size:]
914
+ head_data, tail_data = ExecutableTuple([head, tail]).fetch(session=session)
915
+ xdf = cudf if head.op.is_gpu() else pd
916
+ return xdf.concat([head_data, tail_data], axis="index")
917
+
918
+
919
+ class ReprSeries(pd.Series):
920
+ def __init__(self, corner_data, real_shape):
921
+ super().__init__(corner_data)
922
+ self._real_shape = real_shape
923
+
924
+ def __len__(self):
925
+ # As we only fetch corner data to repr,
926
+ # the length would be wrong and we have no way to control,
927
+ # thus we just overwrite the length to show the real one
928
+ return self._real_shape[0]
929
+
930
+
931
+ def filter_dtypes_by_index(dtypes, index):
932
+ try:
933
+ new_dtypes = dtypes.loc[index].dropna()
934
+ except KeyError:
935
+ dtypes_idx = (
936
+ dtypes.index.to_frame()
937
+ .merge(index.to_frame())
938
+ .set_index(list(range(dtypes.index.nlevels)))
939
+ .index
940
+ )
941
+ new_dtypes = dtypes.loc[dtypes_idx]
942
+ new_dtypes.index.names = dtypes.index.names
943
+ return new_dtypes
944
+
945
+
946
+ @contextmanager
947
+ def create_sa_connection(con, **kwargs):
948
+ import sqlalchemy as sa
949
+ from sqlalchemy.engine import Connection, Engine
950
+
951
+ # process con
952
+ engine = None
953
+ if isinstance(con, Connection):
954
+ # connection create by user
955
+ close = False
956
+ dispose = False
957
+ elif isinstance(con, Engine):
958
+ con = con.connect()
959
+ close = True
960
+ dispose = False
961
+ else:
962
+ engine = sa.create_engine(con, **kwargs)
963
+ con = engine.connect()
964
+ close = True
965
+ dispose = True
966
+
967
+ try:
968
+ yield con
969
+ finally:
970
+ if close:
971
+ con.close()
972
+ if dispose:
973
+ engine.dispose()
974
+
975
+
976
+ def to_arrow_dtypes(dtypes, test_df=None):
977
+ from .arrays import ArrowStringDtype
978
+
979
+ new_dtypes = dtypes.copy()
980
+ for i in range(len(dtypes)):
981
+ dtype = dtypes.iloc[i]
982
+ if is_string_dtype(dtype):
983
+ if test_df is not None:
984
+ series = test_df.iloc[:, i]
985
+ # check value
986
+ non_na_series = series[series.notna()]
987
+ if len(non_na_series) > 0:
988
+ first_value = non_na_series.iloc[0]
989
+ if isinstance(first_value, str):
990
+ new_dtypes.iloc[i] = ArrowStringDtype()
991
+ else: # pragma: no cover
992
+ # empty, set arrow string dtype
993
+ new_dtypes.iloc[i] = ArrowStringDtype()
994
+ else:
995
+ # empty, set arrow string dtype
996
+ new_dtypes.iloc[i] = ArrowStringDtype()
997
+ return new_dtypes
998
+
999
+
1000
+ def make_dtype(dtype):
1001
+ if isinstance(dtype, (np.dtype, ExtensionDtype)):
1002
+ return dtype
1003
+ return np.dtype(dtype) if dtype is not None else None
1004
+
1005
+
1006
+ def make_dtypes(dtypes):
1007
+ if dtypes is None:
1008
+ return None
1009
+ if not isinstance(dtypes, pd.Series):
1010
+ dtypes = pd.Series(dtypes)
1011
+ return dtypes.apply(make_dtype)
1012
+
1013
+
1014
+ def is_dataframe(x):
1015
+ if cudf is not None: # pragma: no cover
1016
+ if isinstance(x, cudf.DataFrame):
1017
+ return True
1018
+ return isinstance(x, pd.DataFrame)
1019
+
1020
+
1021
+ def is_series(x):
1022
+ if cudf is not None: # pragma: no cover
1023
+ if isinstance(x, cudf.Series):
1024
+ return True
1025
+ return isinstance(x, pd.Series)
1026
+
1027
+
1028
+ def is_index(x):
1029
+ if cudf is not None: # pragma: no cover
1030
+ if isinstance(x, cudf.Index):
1031
+ return True
1032
+ return isinstance(x, pd.Index)
1033
+
1034
+
1035
+ def get_xdf(x):
1036
+ if cudf is not None: # pragma: no cover
1037
+ if isinstance(x, (cudf.DataFrame, cudf.Series, cudf.Index)):
1038
+ return cudf
1039
+ return pd
1040
+
1041
+
1042
+ def is_cudf(x):
1043
+ if cudf is not None: # pragma: no cover
1044
+ if isinstance(x, (cudf.DataFrame, cudf.Series, cudf.Index)):
1045
+ return True
1046
+ return False
1047
+
1048
+
1049
+ def whether_to_clean_up(op, threshold):
1050
+ func = op.func
1051
+ counted_bytes = 0
1052
+ max_recursion_depth = 2
1053
+
1054
+ from collections import deque
1055
+ from numbers import Number
1056
+
1057
+ BYPASS_CLASSES = (str, bytes, Number, range, bytearray, pd.DataFrame, pd.Series)
1058
+
1059
+ class GetSizeEarlyStopException(Exception):
1060
+ pass
1061
+
1062
+ def check_exceed_threshold():
1063
+ nonlocal threshold, counted_bytes
1064
+ if counted_bytes >= threshold:
1065
+ raise GetSizeEarlyStopException()
1066
+
1067
+ def getsize(obj_outer):
1068
+ _seen_obj_ids = set()
1069
+
1070
+ def inner_count(obj, recursion_depth):
1071
+ obj_id = id(obj)
1072
+ if obj_id in _seen_obj_ids or recursion_depth > max_recursion_depth:
1073
+ return 0
1074
+ _seen_obj_ids.add(obj_id)
1075
+ recursion_depth += 1
1076
+ size = sys.getsizeof(obj)
1077
+ if isinstance(obj, BYPASS_CLASSES):
1078
+ return size
1079
+ elif isinstance(obj, (tuple, list, set, deque)):
1080
+ size += sum(inner_count(i, recursion_depth) for i in obj)
1081
+ elif hasattr(obj, "items"):
1082
+ size += sum(
1083
+ inner_count(k, recursion_depth) + inner_count(v, recursion_depth)
1084
+ for k, v in getattr(obj, "items")()
1085
+ )
1086
+ if hasattr(obj, "__dict__"):
1087
+ size += inner_count(vars(obj), recursion_depth)
1088
+ if hasattr(obj, "__slots__"):
1089
+ size += sum(
1090
+ inner_count(getattr(obj, s), recursion_depth)
1091
+ for s in obj.__slots__
1092
+ if hasattr(obj, s)
1093
+ )
1094
+ return size
1095
+
1096
+ return inner_count(obj_outer, 0)
1097
+
1098
+ try:
1099
+ # Note: In most cases, func is just a function with closure, while chances are that
1100
+ # func is a callable that doesn't have __closure__ attribute.
1101
+ if inspect.isclass(func):
1102
+ pass
1103
+ elif hasattr(func, "__closure__") and func.__closure__ is not None:
1104
+ for cell in func.__closure__:
1105
+ counted_bytes += getsize(cell.cell_contents)
1106
+ check_exceed_threshold()
1107
+ elif callable(func):
1108
+ if hasattr(func, "__dict__"):
1109
+ for k, v in func.__dict__.items():
1110
+ counted_bytes += sum([getsize(k), getsize(v)])
1111
+ check_exceed_threshold()
1112
+ if hasattr(func, "__slots__"):
1113
+ for slot in func.__slots__:
1114
+ counted_bytes += (
1115
+ getsize(getattr(func, slot)) if hasattr(func, slot) else 0
1116
+ )
1117
+ check_exceed_threshold()
1118
+ except GetSizeEarlyStopException:
1119
+ logger.debug("Func needs cleanup.")
1120
+ op.need_clean_up_func = True
1121
+ else:
1122
+ assert op.need_clean_up_func is False
1123
+ logger.debug("Func doesn't need cleanup.")
1124
+
1125
+ return op.need_clean_up_func
1126
+
1127
+
1128
+ def concat_on_columns(objs: List) -> Any:
1129
+ xdf = get_xdf(objs[0])
1130
+ # In cudf, concat with axis=1 and ignore_index=False by default behaves opposite to pandas.
1131
+ # Cudf would reset the index when axis=1 and ignore_index=False, which does not match with its document.
1132
+ # Therefore, we deal with this case specially.
1133
+ result = xdf.concat(objs, axis=1)
1134
+ if xdf is cudf:
1135
+ result.index = objs[0].index
1136
+ return result
1137
+
1138
+
1139
+ def apply_if_callable(maybe_callable, obj, **kwargs):
1140
+ if callable(maybe_callable):
1141
+ return maybe_callable(obj, **kwargs)
1142
+
1143
+ return maybe_callable
1144
+
1145
+
1146
+ def patch_sa_engine_execute():
1147
+ """
1148
+ pandas did not resolve compatibility issue of sqlalchemy 2.0, the issue
1149
+ is https://github.com/pandas-dev/pandas/issues/40686. We need to patch
1150
+ Engine class in SQLAlchemy, and then our code can work well.
1151
+ """
1152
+ try:
1153
+ from sqlalchemy.engine import Engine
1154
+ except ImportError: # pragma: no cover
1155
+ return
1156
+
1157
+ def execute(self, statement, *multiparams, **params):
1158
+ connection = self.connect()
1159
+ return connection.execute(statement, *multiparams, **params)
1160
+
1161
+ if hasattr(Engine, "execute"): # pragma: no cover
1162
+ return
1163
+ Engine.execute = execute
1164
+
1165
+
1166
+ def pack_func_args(df, funcs, *args, **kwargs) -> Any:
1167
+ """
1168
+ Pack the funcs with args and kwargs to avoid the ambiguity between other
1169
+ positional and keyword arguments. It will process the funcs by the following rule:
1170
+
1171
+ 1. If there's no such args and kwargs, return funcs itself.
1172
+
1173
+ 2. If the funcs is a dict-like object, it will iterate each key-value pair, pack the
1174
+ value recursively, and return a new dict with the same keys and packed values.
1175
+
1176
+ 3. If the funcs is a list-like object, it will iterate each element, pack it
1177
+ recursively, and return a new list with the packed elements.
1178
+
1179
+ 4. If the funcs is a str object, it will try to get the attribute df.funcs firstly,
1180
+ if it exists and is a callable, return a partial one with args and kwargs packed in.
1181
+ If it exists but isn't a callable, a ValueError is raised. If it doesn't exist, then
1182
+ try to get the attribute of np.funcs, if it exists and df is acceptable by funcs,
1183
+ return a partial one with args and kwargs packed in, otherwise an AttributeValue is
1184
+ raised. This rule is almost the same with pandas.
1185
+
1186
+ 5. Other cases are treated as funcs being a callable, returns the partial one with
1187
+ args and kwargs packed in.
1188
+
1189
+ Parameters
1190
+ ----------
1191
+ df : pandas.DataFrame or pandas.Series
1192
+ The DataFrame or Series object to test the function.
1193
+ funcs : function, str, list-like or dict-like
1194
+ Function to pack. It should have the same type with Dataframe.transform().
1195
+ *args :
1196
+ The positional arguments to func. If funcs contains many functions, each one
1197
+ should be able to accept *args.
1198
+ **kwargs :
1199
+ The keyword arguments to func. If funcs contains many functions, each one
1200
+ should be able to accept **kwargs.
1201
+
1202
+ Returns
1203
+ -------
1204
+ The packed functions having the same structure with funcs.
1205
+
1206
+ Raises
1207
+ ------
1208
+ ValueError :
1209
+ If there's a string but the corresponding function doesn't accept any positional
1210
+ or keyword arguments.
1211
+ AttributeError :
1212
+ If there's a string but no corresponding function is found.
1213
+ """
1214
+ if not args and not kwargs:
1215
+ return funcs
1216
+
1217
+ if is_dict_like(funcs):
1218
+ return {k: pack_func_args(df, v, *args, **kwargs) for k, v in funcs.items()}
1219
+
1220
+ if is_list_like(funcs):
1221
+ return [pack_func_args(df, v, *args, **kwargs) for v in funcs]
1222
+
1223
+ f = get_callable_by_name(df, funcs) if isinstance(funcs, str) else funcs
1224
+
1225
+ # Callable
1226
+ return functools.partial(f, *args, **kwargs)
1227
+
1228
+
1229
+ def get_callable_by_name(df: Any, func_name: str) -> Callable:
1230
+ """
1231
+ Get the callable by the func name.
1232
+ It will try to get the attribute df.funcs firstly, if it exists and is a callable,
1233
+ return it. If it exists but isn't a callable, a ValueError is raised. If it doesn't
1234
+ exist, then try to get the attribute of np.funcs, if it exists and df is acceptable
1235
+ by funcs, return a partial one with args and kwargs packed in, otherwise an
1236
+ AttributeValue is raised. This rule is almost the same with pandas.
1237
+
1238
+ Parameters
1239
+ ----------
1240
+ df: padnas.Series or pandas.Dataframe
1241
+ The receiver of the func name.
1242
+ func_name : str
1243
+ The func name.
1244
+
1245
+ Returns
1246
+ -------
1247
+ The callable instance.
1248
+
1249
+ Raises
1250
+ ------
1251
+ ValueError :
1252
+ If it's not a valid callable.
1253
+ AttributeError :
1254
+ If there's no corresponding function is found.
1255
+ """
1256
+ if hasattr(df, func_name):
1257
+ f = getattr(df, func_name)
1258
+ if callable(f):
1259
+ return f
1260
+ raise ValueError(f"{func_name} is not a callable")
1261
+
1262
+ if hasattr(np, func_name) and hasattr(df, "__array__"):
1263
+ return getattr(np, func_name)
1264
+
1265
+ raise AttributeError(
1266
+ f"'{func_name}' is not a valid function for '{type(df).__name__}' object"
1267
+ )