maxframe 0.1.0b5__cp39-cp39-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of maxframe might be problematic. Click here for more details.

Files changed (647) hide show
  1. maxframe/__init__.py +32 -0
  2. maxframe/_utils.cpython-39-darwin.so +0 -0
  3. maxframe/_utils.pxd +33 -0
  4. maxframe/_utils.pyx +547 -0
  5. maxframe/codegen.py +528 -0
  6. maxframe/config/__init__.py +15 -0
  7. maxframe/config/config.py +443 -0
  8. maxframe/config/tests/__init__.py +13 -0
  9. maxframe/config/tests/test_config.py +103 -0
  10. maxframe/config/tests/test_validators.py +34 -0
  11. maxframe/config/validators.py +57 -0
  12. maxframe/conftest.py +139 -0
  13. maxframe/core/__init__.py +65 -0
  14. maxframe/core/base.py +156 -0
  15. maxframe/core/entity/__init__.py +44 -0
  16. maxframe/core/entity/chunks.py +68 -0
  17. maxframe/core/entity/core.py +152 -0
  18. maxframe/core/entity/executable.py +337 -0
  19. maxframe/core/entity/fuse.py +73 -0
  20. maxframe/core/entity/objects.py +100 -0
  21. maxframe/core/entity/output_types.py +90 -0
  22. maxframe/core/entity/tileables.py +438 -0
  23. maxframe/core/entity/utils.py +24 -0
  24. maxframe/core/graph/__init__.py +17 -0
  25. maxframe/core/graph/builder/__init__.py +16 -0
  26. maxframe/core/graph/builder/base.py +86 -0
  27. maxframe/core/graph/builder/chunk.py +430 -0
  28. maxframe/core/graph/builder/tileable.py +34 -0
  29. maxframe/core/graph/builder/utils.py +41 -0
  30. maxframe/core/graph/core.cpython-39-darwin.so +0 -0
  31. maxframe/core/graph/core.pyx +467 -0
  32. maxframe/core/graph/entity.py +171 -0
  33. maxframe/core/graph/tests/__init__.py +13 -0
  34. maxframe/core/graph/tests/test_graph.py +205 -0
  35. maxframe/core/mode.py +96 -0
  36. maxframe/core/operator/__init__.py +34 -0
  37. maxframe/core/operator/base.py +450 -0
  38. maxframe/core/operator/core.py +276 -0
  39. maxframe/core/operator/fetch.py +53 -0
  40. maxframe/core/operator/fuse.py +29 -0
  41. maxframe/core/operator/objects.py +72 -0
  42. maxframe/core/operator/shuffle.py +111 -0
  43. maxframe/core/operator/tests/__init__.py +13 -0
  44. maxframe/core/operator/tests/test_core.py +64 -0
  45. maxframe/core/tests/__init__.py +13 -0
  46. maxframe/core/tests/test_mode.py +75 -0
  47. maxframe/dataframe/__init__.py +81 -0
  48. maxframe/dataframe/arithmetic/__init__.py +359 -0
  49. maxframe/dataframe/arithmetic/abs.py +33 -0
  50. maxframe/dataframe/arithmetic/add.py +60 -0
  51. maxframe/dataframe/arithmetic/arccos.py +28 -0
  52. maxframe/dataframe/arithmetic/arccosh.py +28 -0
  53. maxframe/dataframe/arithmetic/arcsin.py +28 -0
  54. maxframe/dataframe/arithmetic/arcsinh.py +28 -0
  55. maxframe/dataframe/arithmetic/arctan.py +28 -0
  56. maxframe/dataframe/arithmetic/arctanh.py +28 -0
  57. maxframe/dataframe/arithmetic/around.py +152 -0
  58. maxframe/dataframe/arithmetic/bitwise_and.py +46 -0
  59. maxframe/dataframe/arithmetic/bitwise_or.py +50 -0
  60. maxframe/dataframe/arithmetic/bitwise_xor.py +46 -0
  61. maxframe/dataframe/arithmetic/ceil.py +28 -0
  62. maxframe/dataframe/arithmetic/core.py +342 -0
  63. maxframe/dataframe/arithmetic/cos.py +28 -0
  64. maxframe/dataframe/arithmetic/cosh.py +28 -0
  65. maxframe/dataframe/arithmetic/degrees.py +28 -0
  66. maxframe/dataframe/arithmetic/docstring.py +442 -0
  67. maxframe/dataframe/arithmetic/equal.py +56 -0
  68. maxframe/dataframe/arithmetic/exp.py +28 -0
  69. maxframe/dataframe/arithmetic/exp2.py +28 -0
  70. maxframe/dataframe/arithmetic/expm1.py +28 -0
  71. maxframe/dataframe/arithmetic/floor.py +28 -0
  72. maxframe/dataframe/arithmetic/floordiv.py +64 -0
  73. maxframe/dataframe/arithmetic/greater.py +57 -0
  74. maxframe/dataframe/arithmetic/greater_equal.py +57 -0
  75. maxframe/dataframe/arithmetic/invert.py +33 -0
  76. maxframe/dataframe/arithmetic/is_ufuncs.py +62 -0
  77. maxframe/dataframe/arithmetic/less.py +57 -0
  78. maxframe/dataframe/arithmetic/less_equal.py +57 -0
  79. maxframe/dataframe/arithmetic/log.py +28 -0
  80. maxframe/dataframe/arithmetic/log10.py +28 -0
  81. maxframe/dataframe/arithmetic/log2.py +28 -0
  82. maxframe/dataframe/arithmetic/mod.py +60 -0
  83. maxframe/dataframe/arithmetic/multiply.py +60 -0
  84. maxframe/dataframe/arithmetic/negative.py +33 -0
  85. maxframe/dataframe/arithmetic/not_equal.py +56 -0
  86. maxframe/dataframe/arithmetic/power.py +68 -0
  87. maxframe/dataframe/arithmetic/radians.py +28 -0
  88. maxframe/dataframe/arithmetic/sin.py +28 -0
  89. maxframe/dataframe/arithmetic/sinh.py +28 -0
  90. maxframe/dataframe/arithmetic/sqrt.py +28 -0
  91. maxframe/dataframe/arithmetic/subtract.py +64 -0
  92. maxframe/dataframe/arithmetic/tan.py +28 -0
  93. maxframe/dataframe/arithmetic/tanh.py +28 -0
  94. maxframe/dataframe/arithmetic/tests/__init__.py +13 -0
  95. maxframe/dataframe/arithmetic/tests/test_arithmetic.py +695 -0
  96. maxframe/dataframe/arithmetic/truediv.py +64 -0
  97. maxframe/dataframe/arithmetic/trunc.py +28 -0
  98. maxframe/dataframe/arrays.py +864 -0
  99. maxframe/dataframe/core.py +2417 -0
  100. maxframe/dataframe/datasource/__init__.py +15 -0
  101. maxframe/dataframe/datasource/core.py +81 -0
  102. maxframe/dataframe/datasource/dataframe.py +59 -0
  103. maxframe/dataframe/datasource/date_range.py +504 -0
  104. maxframe/dataframe/datasource/from_index.py +54 -0
  105. maxframe/dataframe/datasource/from_records.py +107 -0
  106. maxframe/dataframe/datasource/from_tensor.py +419 -0
  107. maxframe/dataframe/datasource/index.py +117 -0
  108. maxframe/dataframe/datasource/read_csv.py +528 -0
  109. maxframe/dataframe/datasource/read_odps_query.py +299 -0
  110. maxframe/dataframe/datasource/read_odps_table.py +253 -0
  111. maxframe/dataframe/datasource/read_parquet.py +421 -0
  112. maxframe/dataframe/datasource/series.py +55 -0
  113. maxframe/dataframe/datasource/tests/__init__.py +13 -0
  114. maxframe/dataframe/datasource/tests/test_datasource.py +401 -0
  115. maxframe/dataframe/datastore/__init__.py +26 -0
  116. maxframe/dataframe/datastore/core.py +19 -0
  117. maxframe/dataframe/datastore/to_csv.py +227 -0
  118. maxframe/dataframe/datastore/to_odps.py +162 -0
  119. maxframe/dataframe/extensions/__init__.py +41 -0
  120. maxframe/dataframe/extensions/accessor.py +50 -0
  121. maxframe/dataframe/extensions/reshuffle.py +83 -0
  122. maxframe/dataframe/extensions/tests/__init__.py +13 -0
  123. maxframe/dataframe/extensions/tests/test_extensions.py +38 -0
  124. maxframe/dataframe/fetch/__init__.py +15 -0
  125. maxframe/dataframe/fetch/core.py +86 -0
  126. maxframe/dataframe/groupby/__init__.py +82 -0
  127. maxframe/dataframe/groupby/aggregation.py +350 -0
  128. maxframe/dataframe/groupby/apply.py +251 -0
  129. maxframe/dataframe/groupby/core.py +179 -0
  130. maxframe/dataframe/groupby/cum.py +124 -0
  131. maxframe/dataframe/groupby/fill.py +141 -0
  132. maxframe/dataframe/groupby/getitem.py +92 -0
  133. maxframe/dataframe/groupby/head.py +105 -0
  134. maxframe/dataframe/groupby/sample.py +214 -0
  135. maxframe/dataframe/groupby/tests/__init__.py +13 -0
  136. maxframe/dataframe/groupby/tests/test_groupby.py +374 -0
  137. maxframe/dataframe/groupby/transform.py +255 -0
  138. maxframe/dataframe/indexing/__init__.py +84 -0
  139. maxframe/dataframe/indexing/add_prefix_suffix.py +110 -0
  140. maxframe/dataframe/indexing/align.py +349 -0
  141. maxframe/dataframe/indexing/at.py +83 -0
  142. maxframe/dataframe/indexing/getitem.py +204 -0
  143. maxframe/dataframe/indexing/iat.py +37 -0
  144. maxframe/dataframe/indexing/iloc.py +566 -0
  145. maxframe/dataframe/indexing/insert.py +86 -0
  146. maxframe/dataframe/indexing/loc.py +411 -0
  147. maxframe/dataframe/indexing/reindex.py +526 -0
  148. maxframe/dataframe/indexing/rename.py +462 -0
  149. maxframe/dataframe/indexing/rename_axis.py +209 -0
  150. maxframe/dataframe/indexing/reset_index.py +402 -0
  151. maxframe/dataframe/indexing/sample.py +221 -0
  152. maxframe/dataframe/indexing/set_axis.py +194 -0
  153. maxframe/dataframe/indexing/set_index.py +61 -0
  154. maxframe/dataframe/indexing/setitem.py +130 -0
  155. maxframe/dataframe/indexing/tests/__init__.py +13 -0
  156. maxframe/dataframe/indexing/tests/test_indexing.py +488 -0
  157. maxframe/dataframe/indexing/where.py +308 -0
  158. maxframe/dataframe/initializer.py +288 -0
  159. maxframe/dataframe/merge/__init__.py +32 -0
  160. maxframe/dataframe/merge/append.py +121 -0
  161. maxframe/dataframe/merge/concat.py +325 -0
  162. maxframe/dataframe/merge/merge.py +593 -0
  163. maxframe/dataframe/merge/tests/__init__.py +13 -0
  164. maxframe/dataframe/merge/tests/test_merge.py +215 -0
  165. maxframe/dataframe/misc/__init__.py +134 -0
  166. maxframe/dataframe/misc/_duplicate.py +46 -0
  167. maxframe/dataframe/misc/accessor.py +276 -0
  168. maxframe/dataframe/misc/apply.py +692 -0
  169. maxframe/dataframe/misc/astype.py +236 -0
  170. maxframe/dataframe/misc/case_when.py +141 -0
  171. maxframe/dataframe/misc/check_monotonic.py +84 -0
  172. maxframe/dataframe/misc/cut.py +383 -0
  173. maxframe/dataframe/misc/datetimes.py +79 -0
  174. maxframe/dataframe/misc/describe.py +108 -0
  175. maxframe/dataframe/misc/diff.py +210 -0
  176. maxframe/dataframe/misc/drop.py +440 -0
  177. maxframe/dataframe/misc/drop_duplicates.py +248 -0
  178. maxframe/dataframe/misc/duplicated.py +292 -0
  179. maxframe/dataframe/misc/eval.py +728 -0
  180. maxframe/dataframe/misc/explode.py +171 -0
  181. maxframe/dataframe/misc/get_dummies.py +208 -0
  182. maxframe/dataframe/misc/isin.py +217 -0
  183. maxframe/dataframe/misc/map.py +236 -0
  184. maxframe/dataframe/misc/melt.py +162 -0
  185. maxframe/dataframe/misc/memory_usage.py +248 -0
  186. maxframe/dataframe/misc/pct_change.py +150 -0
  187. maxframe/dataframe/misc/pivot_table.py +262 -0
  188. maxframe/dataframe/misc/qcut.py +104 -0
  189. maxframe/dataframe/misc/select_dtypes.py +104 -0
  190. maxframe/dataframe/misc/shift.py +256 -0
  191. maxframe/dataframe/misc/stack.py +238 -0
  192. maxframe/dataframe/misc/string_.py +221 -0
  193. maxframe/dataframe/misc/tests/__init__.py +13 -0
  194. maxframe/dataframe/misc/tests/test_misc.py +468 -0
  195. maxframe/dataframe/misc/to_numeric.py +178 -0
  196. maxframe/dataframe/misc/transform.py +361 -0
  197. maxframe/dataframe/misc/transpose.py +136 -0
  198. maxframe/dataframe/misc/value_counts.py +182 -0
  199. maxframe/dataframe/missing/__init__.py +53 -0
  200. maxframe/dataframe/missing/checkna.py +223 -0
  201. maxframe/dataframe/missing/dropna.py +280 -0
  202. maxframe/dataframe/missing/fillna.py +275 -0
  203. maxframe/dataframe/missing/replace.py +439 -0
  204. maxframe/dataframe/missing/tests/__init__.py +13 -0
  205. maxframe/dataframe/missing/tests/test_missing.py +89 -0
  206. maxframe/dataframe/operators.py +273 -0
  207. maxframe/dataframe/plotting/__init__.py +40 -0
  208. maxframe/dataframe/plotting/core.py +78 -0
  209. maxframe/dataframe/plotting/tests/__init__.py +13 -0
  210. maxframe/dataframe/plotting/tests/test_plotting.py +136 -0
  211. maxframe/dataframe/reduction/__init__.py +107 -0
  212. maxframe/dataframe/reduction/aggregation.py +344 -0
  213. maxframe/dataframe/reduction/all.py +78 -0
  214. maxframe/dataframe/reduction/any.py +78 -0
  215. maxframe/dataframe/reduction/core.py +837 -0
  216. maxframe/dataframe/reduction/count.py +59 -0
  217. maxframe/dataframe/reduction/cummax.py +30 -0
  218. maxframe/dataframe/reduction/cummin.py +30 -0
  219. maxframe/dataframe/reduction/cumprod.py +30 -0
  220. maxframe/dataframe/reduction/cumsum.py +30 -0
  221. maxframe/dataframe/reduction/custom_reduction.py +42 -0
  222. maxframe/dataframe/reduction/kurtosis.py +104 -0
  223. maxframe/dataframe/reduction/max.py +65 -0
  224. maxframe/dataframe/reduction/mean.py +61 -0
  225. maxframe/dataframe/reduction/min.py +65 -0
  226. maxframe/dataframe/reduction/nunique.py +141 -0
  227. maxframe/dataframe/reduction/prod.py +76 -0
  228. maxframe/dataframe/reduction/reduction_size.py +36 -0
  229. maxframe/dataframe/reduction/sem.py +69 -0
  230. maxframe/dataframe/reduction/skew.py +89 -0
  231. maxframe/dataframe/reduction/std.py +53 -0
  232. maxframe/dataframe/reduction/str_concat.py +48 -0
  233. maxframe/dataframe/reduction/sum.py +77 -0
  234. maxframe/dataframe/reduction/tests/__init__.py +13 -0
  235. maxframe/dataframe/reduction/tests/test_reduction.py +486 -0
  236. maxframe/dataframe/reduction/unique.py +90 -0
  237. maxframe/dataframe/reduction/var.py +72 -0
  238. maxframe/dataframe/sort/__init__.py +34 -0
  239. maxframe/dataframe/sort/core.py +36 -0
  240. maxframe/dataframe/sort/sort_index.py +153 -0
  241. maxframe/dataframe/sort/sort_values.py +311 -0
  242. maxframe/dataframe/sort/tests/__init__.py +13 -0
  243. maxframe/dataframe/sort/tests/test_sort.py +81 -0
  244. maxframe/dataframe/statistics/__init__.py +33 -0
  245. maxframe/dataframe/statistics/corr.py +280 -0
  246. maxframe/dataframe/statistics/quantile.py +341 -0
  247. maxframe/dataframe/statistics/tests/__init__.py +13 -0
  248. maxframe/dataframe/statistics/tests/test_statistics.py +82 -0
  249. maxframe/dataframe/tests/__init__.py +13 -0
  250. maxframe/dataframe/tests/test_initializer.py +29 -0
  251. maxframe/dataframe/tseries/__init__.py +13 -0
  252. maxframe/dataframe/tseries/tests/__init__.py +13 -0
  253. maxframe/dataframe/tseries/tests/test_tseries.py +30 -0
  254. maxframe/dataframe/tseries/to_datetime.py +297 -0
  255. maxframe/dataframe/ufunc/__init__.py +27 -0
  256. maxframe/dataframe/ufunc/tensor.py +54 -0
  257. maxframe/dataframe/ufunc/ufunc.py +52 -0
  258. maxframe/dataframe/utils.py +1267 -0
  259. maxframe/dataframe/window/__init__.py +29 -0
  260. maxframe/dataframe/window/aggregation.py +96 -0
  261. maxframe/dataframe/window/core.py +69 -0
  262. maxframe/dataframe/window/ewm.py +249 -0
  263. maxframe/dataframe/window/expanding.py +147 -0
  264. maxframe/dataframe/window/rolling.py +376 -0
  265. maxframe/dataframe/window/tests/__init__.py +13 -0
  266. maxframe/dataframe/window/tests/test_ewm.py +70 -0
  267. maxframe/dataframe/window/tests/test_expanding.py +66 -0
  268. maxframe/dataframe/window/tests/test_rolling.py +57 -0
  269. maxframe/env.py +33 -0
  270. maxframe/errors.py +21 -0
  271. maxframe/extension.py +81 -0
  272. maxframe/learn/__init__.py +17 -0
  273. maxframe/learn/contrib/__init__.py +17 -0
  274. maxframe/learn/contrib/pytorch/__init__.py +16 -0
  275. maxframe/learn/contrib/pytorch/run_function.py +110 -0
  276. maxframe/learn/contrib/pytorch/run_script.py +102 -0
  277. maxframe/learn/contrib/pytorch/tests/__init__.py +13 -0
  278. maxframe/learn/contrib/pytorch/tests/test_pytorch.py +42 -0
  279. maxframe/learn/contrib/utils.py +52 -0
  280. maxframe/learn/contrib/xgboost/__init__.py +26 -0
  281. maxframe/learn/contrib/xgboost/classifier.py +86 -0
  282. maxframe/learn/contrib/xgboost/core.py +156 -0
  283. maxframe/learn/contrib/xgboost/dmatrix.py +150 -0
  284. maxframe/learn/contrib/xgboost/predict.py +138 -0
  285. maxframe/learn/contrib/xgboost/regressor.py +78 -0
  286. maxframe/learn/contrib/xgboost/tests/__init__.py +13 -0
  287. maxframe/learn/contrib/xgboost/tests/test_core.py +43 -0
  288. maxframe/learn/contrib/xgboost/train.py +121 -0
  289. maxframe/learn/utils/__init__.py +15 -0
  290. maxframe/learn/utils/core.py +29 -0
  291. maxframe/lib/__init__.py +15 -0
  292. maxframe/lib/aio/__init__.py +27 -0
  293. maxframe/lib/aio/_runners.py +162 -0
  294. maxframe/lib/aio/_threads.py +35 -0
  295. maxframe/lib/aio/base.py +82 -0
  296. maxframe/lib/aio/file.py +85 -0
  297. maxframe/lib/aio/isolation.py +100 -0
  298. maxframe/lib/aio/lru.py +242 -0
  299. maxframe/lib/aio/parallelism.py +37 -0
  300. maxframe/lib/aio/tests/__init__.py +13 -0
  301. maxframe/lib/aio/tests/test_aio_file.py +55 -0
  302. maxframe/lib/compression.py +55 -0
  303. maxframe/lib/cython/__init__.py +13 -0
  304. maxframe/lib/cython/libcpp.pxd +30 -0
  305. maxframe/lib/filesystem/__init__.py +21 -0
  306. maxframe/lib/filesystem/_glob.py +173 -0
  307. maxframe/lib/filesystem/_oss_lib/__init__.py +13 -0
  308. maxframe/lib/filesystem/_oss_lib/common.py +198 -0
  309. maxframe/lib/filesystem/_oss_lib/glob.py +147 -0
  310. maxframe/lib/filesystem/_oss_lib/handle.py +156 -0
  311. maxframe/lib/filesystem/arrow.py +236 -0
  312. maxframe/lib/filesystem/base.py +263 -0
  313. maxframe/lib/filesystem/core.py +95 -0
  314. maxframe/lib/filesystem/fsmap.py +164 -0
  315. maxframe/lib/filesystem/hdfs.py +31 -0
  316. maxframe/lib/filesystem/local.py +112 -0
  317. maxframe/lib/filesystem/oss.py +157 -0
  318. maxframe/lib/filesystem/tests/__init__.py +13 -0
  319. maxframe/lib/filesystem/tests/test_filesystem.py +223 -0
  320. maxframe/lib/filesystem/tests/test_oss.py +182 -0
  321. maxframe/lib/functools_compat.py +81 -0
  322. maxframe/lib/mmh3.cpython-39-darwin.so +0 -0
  323. maxframe/lib/mmh3_src/MurmurHash3.cpp +339 -0
  324. maxframe/lib/mmh3_src/MurmurHash3.h +43 -0
  325. maxframe/lib/mmh3_src/mmh3module.cpp +387 -0
  326. maxframe/lib/sparse/__init__.py +861 -0
  327. maxframe/lib/sparse/array.py +1604 -0
  328. maxframe/lib/sparse/core.py +92 -0
  329. maxframe/lib/sparse/matrix.py +241 -0
  330. maxframe/lib/sparse/tests/__init__.py +15 -0
  331. maxframe/lib/sparse/tests/test_sparse.py +476 -0
  332. maxframe/lib/sparse/vector.py +150 -0
  333. maxframe/lib/tblib/LICENSE +20 -0
  334. maxframe/lib/tblib/__init__.py +327 -0
  335. maxframe/lib/tblib/cpython.py +83 -0
  336. maxframe/lib/tblib/decorators.py +44 -0
  337. maxframe/lib/tblib/pickling_support.py +90 -0
  338. maxframe/lib/tests/__init__.py +13 -0
  339. maxframe/lib/tests/test_wrapped_pickle.py +51 -0
  340. maxframe/lib/version.py +620 -0
  341. maxframe/lib/wrapped_pickle.py +139 -0
  342. maxframe/mixin.py +100 -0
  343. maxframe/odpsio/__init__.py +21 -0
  344. maxframe/odpsio/arrow.py +91 -0
  345. maxframe/odpsio/schema.py +364 -0
  346. maxframe/odpsio/tableio.py +322 -0
  347. maxframe/odpsio/tests/__init__.py +13 -0
  348. maxframe/odpsio/tests/test_arrow.py +88 -0
  349. maxframe/odpsio/tests/test_schema.py +297 -0
  350. maxframe/odpsio/tests/test_tableio.py +136 -0
  351. maxframe/odpsio/tests/test_volumeio.py +90 -0
  352. maxframe/odpsio/volumeio.py +95 -0
  353. maxframe/opcodes.py +590 -0
  354. maxframe/protocol.py +415 -0
  355. maxframe/remote/__init__.py +18 -0
  356. maxframe/remote/core.py +210 -0
  357. maxframe/remote/run_script.py +121 -0
  358. maxframe/serialization/__init__.py +26 -0
  359. maxframe/serialization/arrow.py +95 -0
  360. maxframe/serialization/core.cpython-39-darwin.so +0 -0
  361. maxframe/serialization/core.pxd +44 -0
  362. maxframe/serialization/core.pyi +61 -0
  363. maxframe/serialization/core.pyx +1094 -0
  364. maxframe/serialization/exception.py +86 -0
  365. maxframe/serialization/maxframe_objects.py +39 -0
  366. maxframe/serialization/numpy.py +91 -0
  367. maxframe/serialization/pandas.py +202 -0
  368. maxframe/serialization/scipy.py +71 -0
  369. maxframe/serialization/serializables/__init__.py +55 -0
  370. maxframe/serialization/serializables/core.py +262 -0
  371. maxframe/serialization/serializables/field.py +624 -0
  372. maxframe/serialization/serializables/field_type.py +589 -0
  373. maxframe/serialization/serializables/tests/__init__.py +13 -0
  374. maxframe/serialization/serializables/tests/test_field_type.py +121 -0
  375. maxframe/serialization/serializables/tests/test_serializable.py +250 -0
  376. maxframe/serialization/tests/__init__.py +13 -0
  377. maxframe/serialization/tests/test_serial.py +412 -0
  378. maxframe/session.py +1310 -0
  379. maxframe/tensor/__init__.py +183 -0
  380. maxframe/tensor/arithmetic/__init__.py +315 -0
  381. maxframe/tensor/arithmetic/abs.py +68 -0
  382. maxframe/tensor/arithmetic/absolute.py +68 -0
  383. maxframe/tensor/arithmetic/add.py +82 -0
  384. maxframe/tensor/arithmetic/angle.py +72 -0
  385. maxframe/tensor/arithmetic/arccos.py +104 -0
  386. maxframe/tensor/arithmetic/arccosh.py +91 -0
  387. maxframe/tensor/arithmetic/arcsin.py +94 -0
  388. maxframe/tensor/arithmetic/arcsinh.py +86 -0
  389. maxframe/tensor/arithmetic/arctan.py +106 -0
  390. maxframe/tensor/arithmetic/arctan2.py +128 -0
  391. maxframe/tensor/arithmetic/arctanh.py +86 -0
  392. maxframe/tensor/arithmetic/around.py +114 -0
  393. maxframe/tensor/arithmetic/bitand.py +95 -0
  394. maxframe/tensor/arithmetic/bitor.py +102 -0
  395. maxframe/tensor/arithmetic/bitxor.py +95 -0
  396. maxframe/tensor/arithmetic/cbrt.py +66 -0
  397. maxframe/tensor/arithmetic/ceil.py +71 -0
  398. maxframe/tensor/arithmetic/clip.py +165 -0
  399. maxframe/tensor/arithmetic/conj.py +74 -0
  400. maxframe/tensor/arithmetic/copysign.py +78 -0
  401. maxframe/tensor/arithmetic/core.py +544 -0
  402. maxframe/tensor/arithmetic/cos.py +85 -0
  403. maxframe/tensor/arithmetic/cosh.py +72 -0
  404. maxframe/tensor/arithmetic/deg2rad.py +72 -0
  405. maxframe/tensor/arithmetic/degrees.py +77 -0
  406. maxframe/tensor/arithmetic/divide.py +114 -0
  407. maxframe/tensor/arithmetic/equal.py +76 -0
  408. maxframe/tensor/arithmetic/exp.py +106 -0
  409. maxframe/tensor/arithmetic/exp2.py +67 -0
  410. maxframe/tensor/arithmetic/expm1.py +79 -0
  411. maxframe/tensor/arithmetic/fabs.py +74 -0
  412. maxframe/tensor/arithmetic/fix.py +69 -0
  413. maxframe/tensor/arithmetic/float_power.py +103 -0
  414. maxframe/tensor/arithmetic/floor.py +77 -0
  415. maxframe/tensor/arithmetic/floordiv.py +94 -0
  416. maxframe/tensor/arithmetic/fmax.py +105 -0
  417. maxframe/tensor/arithmetic/fmin.py +106 -0
  418. maxframe/tensor/arithmetic/fmod.py +99 -0
  419. maxframe/tensor/arithmetic/frexp.py +92 -0
  420. maxframe/tensor/arithmetic/greater.py +77 -0
  421. maxframe/tensor/arithmetic/greater_equal.py +69 -0
  422. maxframe/tensor/arithmetic/hypot.py +77 -0
  423. maxframe/tensor/arithmetic/i0.py +89 -0
  424. maxframe/tensor/arithmetic/imag.py +67 -0
  425. maxframe/tensor/arithmetic/invert.py +110 -0
  426. maxframe/tensor/arithmetic/isclose.py +115 -0
  427. maxframe/tensor/arithmetic/iscomplex.py +64 -0
  428. maxframe/tensor/arithmetic/isfinite.py +106 -0
  429. maxframe/tensor/arithmetic/isinf.py +103 -0
  430. maxframe/tensor/arithmetic/isnan.py +82 -0
  431. maxframe/tensor/arithmetic/isreal.py +63 -0
  432. maxframe/tensor/arithmetic/ldexp.py +99 -0
  433. maxframe/tensor/arithmetic/less.py +69 -0
  434. maxframe/tensor/arithmetic/less_equal.py +69 -0
  435. maxframe/tensor/arithmetic/log.py +92 -0
  436. maxframe/tensor/arithmetic/log10.py +85 -0
  437. maxframe/tensor/arithmetic/log1p.py +95 -0
  438. maxframe/tensor/arithmetic/log2.py +85 -0
  439. maxframe/tensor/arithmetic/logaddexp.py +80 -0
  440. maxframe/tensor/arithmetic/logaddexp2.py +78 -0
  441. maxframe/tensor/arithmetic/logical_and.py +81 -0
  442. maxframe/tensor/arithmetic/logical_not.py +74 -0
  443. maxframe/tensor/arithmetic/logical_or.py +82 -0
  444. maxframe/tensor/arithmetic/logical_xor.py +88 -0
  445. maxframe/tensor/arithmetic/lshift.py +82 -0
  446. maxframe/tensor/arithmetic/maximum.py +108 -0
  447. maxframe/tensor/arithmetic/minimum.py +108 -0
  448. maxframe/tensor/arithmetic/mod.py +104 -0
  449. maxframe/tensor/arithmetic/modf.py +83 -0
  450. maxframe/tensor/arithmetic/multiply.py +81 -0
  451. maxframe/tensor/arithmetic/nan_to_num.py +99 -0
  452. maxframe/tensor/arithmetic/negative.py +65 -0
  453. maxframe/tensor/arithmetic/nextafter.py +68 -0
  454. maxframe/tensor/arithmetic/not_equal.py +72 -0
  455. maxframe/tensor/arithmetic/positive.py +47 -0
  456. maxframe/tensor/arithmetic/power.py +106 -0
  457. maxframe/tensor/arithmetic/rad2deg.py +71 -0
  458. maxframe/tensor/arithmetic/radians.py +77 -0
  459. maxframe/tensor/arithmetic/real.py +70 -0
  460. maxframe/tensor/arithmetic/reciprocal.py +76 -0
  461. maxframe/tensor/arithmetic/rint.py +68 -0
  462. maxframe/tensor/arithmetic/rshift.py +81 -0
  463. maxframe/tensor/arithmetic/setimag.py +29 -0
  464. maxframe/tensor/arithmetic/setreal.py +29 -0
  465. maxframe/tensor/arithmetic/sign.py +81 -0
  466. maxframe/tensor/arithmetic/signbit.py +65 -0
  467. maxframe/tensor/arithmetic/sin.py +98 -0
  468. maxframe/tensor/arithmetic/sinc.py +102 -0
  469. maxframe/tensor/arithmetic/sinh.py +93 -0
  470. maxframe/tensor/arithmetic/spacing.py +72 -0
  471. maxframe/tensor/arithmetic/sqrt.py +81 -0
  472. maxframe/tensor/arithmetic/square.py +69 -0
  473. maxframe/tensor/arithmetic/subtract.py +81 -0
  474. maxframe/tensor/arithmetic/tan.py +88 -0
  475. maxframe/tensor/arithmetic/tanh.py +92 -0
  476. maxframe/tensor/arithmetic/tests/__init__.py +15 -0
  477. maxframe/tensor/arithmetic/tests/test_arithmetic.py +414 -0
  478. maxframe/tensor/arithmetic/truediv.py +104 -0
  479. maxframe/tensor/arithmetic/trunc.py +72 -0
  480. maxframe/tensor/arithmetic/utils.py +65 -0
  481. maxframe/tensor/array_utils.py +186 -0
  482. maxframe/tensor/base/__init__.py +34 -0
  483. maxframe/tensor/base/astype.py +119 -0
  484. maxframe/tensor/base/atleast_1d.py +74 -0
  485. maxframe/tensor/base/broadcast_to.py +89 -0
  486. maxframe/tensor/base/ravel.py +92 -0
  487. maxframe/tensor/base/tests/__init__.py +13 -0
  488. maxframe/tensor/base/tests/test_base.py +114 -0
  489. maxframe/tensor/base/transpose.py +125 -0
  490. maxframe/tensor/base/unique.py +205 -0
  491. maxframe/tensor/base/where.py +127 -0
  492. maxframe/tensor/core.py +724 -0
  493. maxframe/tensor/datasource/__init__.py +32 -0
  494. maxframe/tensor/datasource/arange.py +156 -0
  495. maxframe/tensor/datasource/array.py +415 -0
  496. maxframe/tensor/datasource/core.py +109 -0
  497. maxframe/tensor/datasource/empty.py +169 -0
  498. maxframe/tensor/datasource/from_dataframe.py +70 -0
  499. maxframe/tensor/datasource/from_dense.py +54 -0
  500. maxframe/tensor/datasource/from_sparse.py +47 -0
  501. maxframe/tensor/datasource/full.py +186 -0
  502. maxframe/tensor/datasource/ones.py +173 -0
  503. maxframe/tensor/datasource/scalar.py +40 -0
  504. maxframe/tensor/datasource/tests/__init__.py +13 -0
  505. maxframe/tensor/datasource/tests/test_datasource.py +278 -0
  506. maxframe/tensor/datasource/zeros.py +188 -0
  507. maxframe/tensor/fetch/__init__.py +15 -0
  508. maxframe/tensor/fetch/core.py +54 -0
  509. maxframe/tensor/indexing/__init__.py +47 -0
  510. maxframe/tensor/indexing/choose.py +196 -0
  511. maxframe/tensor/indexing/compress.py +124 -0
  512. maxframe/tensor/indexing/core.py +190 -0
  513. maxframe/tensor/indexing/extract.py +71 -0
  514. maxframe/tensor/indexing/fill_diagonal.py +183 -0
  515. maxframe/tensor/indexing/flatnonzero.py +60 -0
  516. maxframe/tensor/indexing/getitem.py +175 -0
  517. maxframe/tensor/indexing/nonzero.py +120 -0
  518. maxframe/tensor/indexing/setitem.py +132 -0
  519. maxframe/tensor/indexing/slice.py +29 -0
  520. maxframe/tensor/indexing/take.py +130 -0
  521. maxframe/tensor/indexing/tests/__init__.py +15 -0
  522. maxframe/tensor/indexing/tests/test_indexing.py +234 -0
  523. maxframe/tensor/indexing/unravel_index.py +103 -0
  524. maxframe/tensor/merge/__init__.py +15 -0
  525. maxframe/tensor/merge/stack.py +132 -0
  526. maxframe/tensor/merge/tests/__init__.py +13 -0
  527. maxframe/tensor/merge/tests/test_merge.py +52 -0
  528. maxframe/tensor/operators.py +123 -0
  529. maxframe/tensor/random/__init__.py +168 -0
  530. maxframe/tensor/random/beta.py +87 -0
  531. maxframe/tensor/random/binomial.py +137 -0
  532. maxframe/tensor/random/bytes.py +39 -0
  533. maxframe/tensor/random/chisquare.py +110 -0
  534. maxframe/tensor/random/choice.py +186 -0
  535. maxframe/tensor/random/core.py +234 -0
  536. maxframe/tensor/random/dirichlet.py +123 -0
  537. maxframe/tensor/random/exponential.py +94 -0
  538. maxframe/tensor/random/f.py +135 -0
  539. maxframe/tensor/random/gamma.py +128 -0
  540. maxframe/tensor/random/geometric.py +93 -0
  541. maxframe/tensor/random/gumbel.py +167 -0
  542. maxframe/tensor/random/hypergeometric.py +148 -0
  543. maxframe/tensor/random/laplace.py +133 -0
  544. maxframe/tensor/random/logistic.py +129 -0
  545. maxframe/tensor/random/lognormal.py +159 -0
  546. maxframe/tensor/random/logseries.py +122 -0
  547. maxframe/tensor/random/multinomial.py +133 -0
  548. maxframe/tensor/random/multivariate_normal.py +192 -0
  549. maxframe/tensor/random/negative_binomial.py +125 -0
  550. maxframe/tensor/random/noncentral_chisquare.py +132 -0
  551. maxframe/tensor/random/noncentral_f.py +126 -0
  552. maxframe/tensor/random/normal.py +143 -0
  553. maxframe/tensor/random/pareto.py +140 -0
  554. maxframe/tensor/random/permutation.py +104 -0
  555. maxframe/tensor/random/poisson.py +111 -0
  556. maxframe/tensor/random/power.py +142 -0
  557. maxframe/tensor/random/rand.py +82 -0
  558. maxframe/tensor/random/randint.py +121 -0
  559. maxframe/tensor/random/randn.py +96 -0
  560. maxframe/tensor/random/random_integers.py +123 -0
  561. maxframe/tensor/random/random_sample.py +86 -0
  562. maxframe/tensor/random/rayleigh.py +110 -0
  563. maxframe/tensor/random/shuffle.py +61 -0
  564. maxframe/tensor/random/standard_cauchy.py +105 -0
  565. maxframe/tensor/random/standard_exponential.py +72 -0
  566. maxframe/tensor/random/standard_gamma.py +120 -0
  567. maxframe/tensor/random/standard_normal.py +74 -0
  568. maxframe/tensor/random/standard_t.py +135 -0
  569. maxframe/tensor/random/tests/__init__.py +15 -0
  570. maxframe/tensor/random/tests/test_random.py +167 -0
  571. maxframe/tensor/random/triangular.py +119 -0
  572. maxframe/tensor/random/uniform.py +131 -0
  573. maxframe/tensor/random/vonmises.py +131 -0
  574. maxframe/tensor/random/wald.py +114 -0
  575. maxframe/tensor/random/weibull.py +140 -0
  576. maxframe/tensor/random/zipf.py +122 -0
  577. maxframe/tensor/rechunk/__init__.py +26 -0
  578. maxframe/tensor/rechunk/rechunk.py +43 -0
  579. maxframe/tensor/reduction/__init__.py +66 -0
  580. maxframe/tensor/reduction/all.py +103 -0
  581. maxframe/tensor/reduction/allclose.py +88 -0
  582. maxframe/tensor/reduction/any.py +105 -0
  583. maxframe/tensor/reduction/argmax.py +103 -0
  584. maxframe/tensor/reduction/argmin.py +103 -0
  585. maxframe/tensor/reduction/array_equal.py +64 -0
  586. maxframe/tensor/reduction/core.py +168 -0
  587. maxframe/tensor/reduction/count_nonzero.py +81 -0
  588. maxframe/tensor/reduction/cumprod.py +97 -0
  589. maxframe/tensor/reduction/cumsum.py +101 -0
  590. maxframe/tensor/reduction/max.py +120 -0
  591. maxframe/tensor/reduction/mean.py +123 -0
  592. maxframe/tensor/reduction/min.py +120 -0
  593. maxframe/tensor/reduction/nanargmax.py +82 -0
  594. maxframe/tensor/reduction/nanargmin.py +76 -0
  595. maxframe/tensor/reduction/nancumprod.py +91 -0
  596. maxframe/tensor/reduction/nancumsum.py +94 -0
  597. maxframe/tensor/reduction/nanmax.py +111 -0
  598. maxframe/tensor/reduction/nanmean.py +106 -0
  599. maxframe/tensor/reduction/nanmin.py +111 -0
  600. maxframe/tensor/reduction/nanprod.py +94 -0
  601. maxframe/tensor/reduction/nanstd.py +126 -0
  602. maxframe/tensor/reduction/nansum.py +115 -0
  603. maxframe/tensor/reduction/nanvar.py +149 -0
  604. maxframe/tensor/reduction/prod.py +130 -0
  605. maxframe/tensor/reduction/std.py +134 -0
  606. maxframe/tensor/reduction/sum.py +125 -0
  607. maxframe/tensor/reduction/tests/__init__.py +13 -0
  608. maxframe/tensor/reduction/tests/test_reduction.py +181 -0
  609. maxframe/tensor/reduction/var.py +176 -0
  610. maxframe/tensor/reshape/__init__.py +17 -0
  611. maxframe/tensor/reshape/reshape.py +188 -0
  612. maxframe/tensor/reshape/tests/__init__.py +15 -0
  613. maxframe/tensor/reshape/tests/test_reshape.py +37 -0
  614. maxframe/tensor/statistics/__init__.py +13 -0
  615. maxframe/tensor/statistics/percentile.py +175 -0
  616. maxframe/tensor/statistics/quantile.py +288 -0
  617. maxframe/tensor/ufunc/__init__.py +26 -0
  618. maxframe/tensor/ufunc/ufunc.py +200 -0
  619. maxframe/tensor/utils.py +718 -0
  620. maxframe/tests/__init__.py +13 -0
  621. maxframe/tests/test_codegen.py +69 -0
  622. maxframe/tests/test_protocol.py +144 -0
  623. maxframe/tests/test_utils.py +376 -0
  624. maxframe/tests/utils.py +164 -0
  625. maxframe/typing_.py +37 -0
  626. maxframe/udf.py +134 -0
  627. maxframe/utils.py +1114 -0
  628. maxframe-0.1.0b5.dist-info/METADATA +104 -0
  629. maxframe-0.1.0b5.dist-info/RECORD +647 -0
  630. maxframe-0.1.0b5.dist-info/WHEEL +5 -0
  631. maxframe-0.1.0b5.dist-info/top_level.txt +3 -0
  632. maxframe_client/__init__.py +17 -0
  633. maxframe_client/clients/__init__.py +13 -0
  634. maxframe_client/clients/framedriver.py +118 -0
  635. maxframe_client/clients/spe.py +104 -0
  636. maxframe_client/conftest.py +15 -0
  637. maxframe_client/fetcher.py +264 -0
  638. maxframe_client/session/__init__.py +22 -0
  639. maxframe_client/session/consts.py +36 -0
  640. maxframe_client/session/graph.py +119 -0
  641. maxframe_client/session/odps.py +482 -0
  642. maxframe_client/session/task.py +280 -0
  643. maxframe_client/session/tests/__init__.py +13 -0
  644. maxframe_client/session/tests/test_task.py +85 -0
  645. maxframe_client/tests/__init__.py +13 -0
  646. maxframe_client/tests/test_fetcher.py +89 -0
  647. maxframe_client/tests/test_session.py +255 -0
maxframe/utils.py ADDED
@@ -0,0 +1,1114 @@
1
+ # Copyright 1999-2024 Alibaba Group Holding Ltd.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ import asyncio.events
16
+ import concurrent.futures
17
+ import contextvars
18
+ import dataclasses
19
+ import datetime
20
+ import enum
21
+ import functools
22
+ import hashlib
23
+ import importlib
24
+ import inspect
25
+ import io
26
+ import itertools
27
+ import numbers
28
+ import os
29
+ import pkgutil
30
+ import random
31
+ import struct
32
+ import sys
33
+ import threading
34
+ import time
35
+ import tokenize as pytokenize
36
+ import traceback
37
+ import types
38
+ import weakref
39
+ import zlib
40
+ from collections.abc import Hashable, Mapping
41
+ from contextlib import contextmanager
42
+ from typing import (
43
+ Any,
44
+ Awaitable,
45
+ Callable,
46
+ Dict,
47
+ Iterable,
48
+ List,
49
+ Optional,
50
+ Tuple,
51
+ Type,
52
+ TypeVar,
53
+ Union,
54
+ )
55
+
56
+ import msgpack
57
+ import numpy as np
58
+ import pandas as pd
59
+ import traitlets
60
+ from tornado import httpclient, web
61
+ from tornado.simple_httpclient import HTTPTimeoutError
62
+
63
+ from ._utils import ( # noqa: F401 # pylint: disable=unused-import
64
+ NamedType,
65
+ Timer,
66
+ TypeDispatcher,
67
+ ceildiv,
68
+ get_user_call_point,
69
+ new_random_id,
70
+ register_tokenizer,
71
+ reset_id_random_seed,
72
+ to_binary,
73
+ to_str,
74
+ to_text,
75
+ tokenize,
76
+ tokenize_int,
77
+ )
78
+ from .lib.version import parse as parse_version
79
+ from .typing_ import ChunkType, EntityType, TileableType, TimeoutType
80
+
81
+ # make flake8 happy by referencing these imports
82
+ NamedType = NamedType
83
+ TypeDispatcher = TypeDispatcher
84
+ tokenize = tokenize
85
+ register_tokenizer = register_tokenizer
86
+ ceildiv = ceildiv
87
+ reset_id_random_seed = reset_id_random_seed
88
+ new_random_id = new_random_id
89
+ get_user_call_point = get_user_call_point
90
+ _is_ci = (os.environ.get("CI") or "0").lower() in ("1", "true")
91
+ pd_release_version: Tuple[int] = parse_version(pd.__version__).release
92
+
93
+ try:
94
+ from pandas._libs import lib as _pd__libs_lib
95
+ from pandas._libs.lib import NoDefault, no_default
96
+
97
+ _raw__reduce__ = type(NoDefault).__reduce__
98
+
99
+ def _no_default__reduce__(self):
100
+ if self is not NoDefault:
101
+ return _raw__reduce__(self)
102
+ else: # pragma: no cover
103
+ return getattr, (_pd__libs_lib, "NoDefault")
104
+
105
+ if hasattr(_pd__libs_lib, "_NoDefault"): # pragma: no cover
106
+ # need to patch __reduce__ to make sure it can be properly unpickled
107
+ type(NoDefault).__reduce__ = _no_default__reduce__
108
+ else:
109
+ # introduced in pandas 1.5.0 : register for pickle compatibility
110
+ _pd__libs_lib._NoDefault = NoDefault
111
+ except ImportError: # pragma: no cover
112
+
113
+ class NoDefault(enum.Enum):
114
+ no_default = "NO_DEFAULT"
115
+
116
+ def __repr__(self) -> str:
117
+ return "<no_default>"
118
+
119
+ no_default = NoDefault.no_default
120
+
121
+ try:
122
+ # register for pickle compatibility
123
+ from pandas._libs import lib as _pd__libs_lib
124
+
125
+ _pd__libs_lib.NoDefault = NoDefault
126
+ except (ImportError, AttributeError):
127
+ pass
128
+
129
+ try:
130
+ import pyarrow as pa
131
+ except ImportError:
132
+ pa = None
133
+
134
+
135
+ class classproperty:
136
+ def __init__(self, f):
137
+ self.f = f
138
+
139
+ def __get__(self, obj, owner):
140
+ return self.f(owner)
141
+
142
+
143
+ def implements(f: Callable):
144
+ def decorator(g):
145
+ g.__doc__ = f.__doc__
146
+ return g
147
+
148
+ return decorator
149
+
150
+
151
+ class AttributeDict(dict):
152
+ def __getattr__(self, item):
153
+ try:
154
+ return self[item]
155
+ except KeyError:
156
+ raise AttributeError(f"'AttributeDict' object has no attribute {item}")
157
+
158
+
159
+ def on_serialize_shape(shape: Tuple[int]):
160
+ if shape:
161
+ return tuple(s if not np.isnan(s) else -1 for s in shape)
162
+ return shape
163
+
164
+
165
+ def on_deserialize_shape(shape: Tuple[int]):
166
+ if shape:
167
+ return tuple(s if s != -1 else np.nan for s in shape)
168
+ return shape
169
+
170
+
171
+ def on_serialize_numpy_type(value: np.dtype):
172
+ if value is pd.NaT:
173
+ value = None
174
+ return value.item() if isinstance(value, np.generic) else value
175
+
176
+
177
+ def on_serialize_nsplits(value: Tuple[Tuple[int]]):
178
+ if value is None:
179
+ return None
180
+ new_nsplits = []
181
+ for dim_splits in value:
182
+ new_nsplits.append(tuple(None if pd.isna(v) else v for v in dim_splits))
183
+ return tuple(new_nsplits)
184
+
185
+
186
+ def has_unknown_shape(*tiled_tileables: TileableType) -> bool:
187
+ for tileable in tiled_tileables:
188
+ if getattr(tileable, "shape", None) is None:
189
+ continue
190
+ if any(pd.isnull(s) for s in tileable.shape):
191
+ return True
192
+ if any(pd.isnull(s) for s in itertools.chain(*tileable.nsplits)):
193
+ return True
194
+ return False
195
+
196
+
197
+ def calc_nsplits(chunk_idx_to_shape: Dict[Tuple[int], Tuple[int]]) -> Tuple[Tuple[int]]:
198
+ """
199
+ Calculate a tiled entity's nsplits.
200
+
201
+ Parameters
202
+ ----------
203
+ chunk_idx_to_shape : Dict type, {chunk_idx: chunk_shape}
204
+
205
+ Returns
206
+ -------
207
+ nsplits
208
+ """
209
+ ndim = len(next(iter(chunk_idx_to_shape)))
210
+ tileable_nsplits = []
211
+ # for each dimension, record chunk shape whose index is zero on other dimensions
212
+ for i in range(ndim):
213
+ splits = []
214
+ for index, shape in chunk_idx_to_shape.items():
215
+ if all(idx == 0 for j, idx in enumerate(index) if j != i):
216
+ splits.append(shape[i])
217
+ tileable_nsplits.append(tuple(splits))
218
+ return tuple(tileable_nsplits)
219
+
220
+
221
+ def copy_tileables(tileables: List[TileableType], **kwargs):
222
+ inputs = kwargs.pop("inputs", None)
223
+ copy_key = kwargs.pop("copy_key", True)
224
+ copy_id = kwargs.pop("copy_id", True)
225
+ if kwargs:
226
+ raise TypeError(f"got un unexpected keyword argument '{next(iter(kwargs))}'")
227
+ if len(tileables) > 1:
228
+ # cannot handle tileables with different operators here
229
+ # try to copy separately if so
230
+ if len({t.op for t in tileables}) != 1:
231
+ raise TypeError("All tileables' operators should be same.")
232
+
233
+ op = tileables[0].op.copy().reset_key()
234
+ if copy_key:
235
+ op._key = tileables[0].op.key
236
+ kws = []
237
+ for t in tileables:
238
+ params = t.params.copy()
239
+ if copy_key:
240
+ params["_key"] = t.key
241
+ if copy_id:
242
+ params["_id"] = t.id
243
+ params.update(t.extra_params)
244
+ kws.append(params)
245
+ inputs = inputs or op.inputs
246
+ return op.new_tileables(inputs, kws=kws, output_limit=len(kws))
247
+
248
+
249
+ def build_fetch_chunk(chunk: ChunkType, **kwargs) -> ChunkType:
250
+ from .core.operator import ShuffleProxy
251
+
252
+ chunk_op = chunk.op
253
+ params = chunk.params.copy()
254
+ assert not isinstance(chunk_op, ShuffleProxy)
255
+ # for non-shuffle nodes, we build Fetch chunks
256
+ # to replace original chunk
257
+ op = chunk_op.get_fetch_op_cls(chunk)(sparse=chunk.op.sparse, gpu=chunk.op.gpu)
258
+ return op.new_chunk(
259
+ None,
260
+ is_broadcaster=chunk.is_broadcaster,
261
+ kws=[params],
262
+ _key=chunk.key,
263
+ **kwargs,
264
+ )
265
+
266
+
267
+ def build_fetch_tileable(tileable: TileableType) -> TileableType:
268
+ if tileable.is_coarse():
269
+ chunks = None
270
+ else:
271
+ chunks = []
272
+ for c in tileable.chunks:
273
+ fetch_chunk = build_fetch_chunk(c, index=c.index)
274
+ chunks.append(fetch_chunk)
275
+
276
+ tileable_op = tileable.op
277
+ params = tileable.params.copy()
278
+
279
+ new_op = tileable_op.get_fetch_op_cls(tileable)(_id=tileable_op.id)
280
+ return new_op.new_tileables(
281
+ None,
282
+ chunks=chunks,
283
+ nsplits=tileable.nsplits,
284
+ _key=tileable.key,
285
+ _id=tileable.id,
286
+ **params,
287
+ )[0]
288
+
289
+
290
+ def build_fetch(entity: EntityType) -> EntityType:
291
+ from .core import CHUNK_TYPE, ENTITY_TYPE
292
+
293
+ if isinstance(entity, CHUNK_TYPE):
294
+ return build_fetch_chunk(entity)
295
+ elif isinstance(entity, ENTITY_TYPE):
296
+ return build_fetch_tileable(entity)
297
+ else:
298
+ raise TypeError(f"Type {type(entity)} not supported")
299
+
300
+
301
+ def get_dtype(dtype: Union[np.dtype, pd.api.extensions.ExtensionDtype]):
302
+ if pd.api.types.is_extension_array_dtype(dtype):
303
+ return dtype
304
+ elif dtype is pd.Timestamp or dtype is datetime.datetime:
305
+ return np.dtype("datetime64[ns]")
306
+ elif dtype is pd.Timedelta or dtype is datetime.timedelta:
307
+ return np.dtype("timedelta64[ns]")
308
+ else:
309
+ return np.dtype(dtype)
310
+
311
+
312
+ def serialize_serializable(serializable, compress: bool = False):
313
+ from .serialization import serialize
314
+
315
+ bio = io.BytesIO()
316
+ header, buffers = serialize(serializable)
317
+ buf_sizes = [getattr(buf, "nbytes", len(buf)) for buf in buffers]
318
+ header[0]["buf_sizes"] = buf_sizes
319
+ s_header = msgpack.dumps(header)
320
+ bio.write(struct.pack("<Q", len(s_header)))
321
+ bio.write(s_header)
322
+ for buf in buffers:
323
+ bio.write(buf)
324
+ ser_graph = bio.getvalue()
325
+
326
+ if compress:
327
+ ser_graph = zlib.compress(ser_graph)
328
+ return ser_graph
329
+
330
+
331
+ def deserialize_serializable(ser_serializable: bytes):
332
+ from .serialization import deserialize
333
+
334
+ bio = io.BytesIO(ser_serializable)
335
+ s_header_length = struct.unpack("Q", bio.read(8))[0]
336
+ header2 = msgpack.loads(bio.read(s_header_length))
337
+ buffers2 = [bio.read(s) for s in header2[0]["buf_sizes"]]
338
+ return deserialize(header2, buffers2)
339
+
340
+
341
+ def skip_na_call(func: Callable):
342
+ @functools.wraps(func)
343
+ def new_func(x):
344
+ return func(x) if x is not None else None
345
+
346
+ return new_func
347
+
348
+
349
+ def url_path_join(*pieces):
350
+ """Join components of url into a relative url
351
+
352
+ Use to prevent double slash when joining subpath. This will leave the
353
+ initial and final / in place
354
+ """
355
+ initial = pieces[0].startswith("/")
356
+ final = pieces[-1].endswith("/")
357
+ stripped = [s.strip("/") for s in pieces]
358
+ result = "/".join(s for s in stripped if s)
359
+ if initial:
360
+ result = "/" + result
361
+ if final:
362
+ result = result + "/"
363
+ if result == "//":
364
+ result = "/"
365
+ return result
366
+
367
+
368
+ def random_ports(port: int, n: int):
369
+ """Generate a list of n random ports near the given port.
370
+
371
+ The first 5 ports will be sequential, and the remaining n-5 will be
372
+ randomly selected in the range [port-2*n, port+2*n].
373
+ """
374
+ for i in range(min(5, n)):
375
+ yield port + i
376
+ for i in range(n - 5):
377
+ yield max(1, port + random.randint(-2 * n, 2 * n))
378
+
379
+
380
+ def build_temp_table_name(session_id: str, tileable_key: str) -> str:
381
+ return f"tmp_mf_{session_id}_{tileable_key}"
382
+
383
+
384
+ def build_temp_intermediate_table_name(session_id: str, tileable_key: str) -> str:
385
+ temp_table = build_temp_table_name(session_id, tileable_key)
386
+ return f"{temp_table}_intermediate"
387
+
388
+
389
+ def build_session_volume_name(session_id: str) -> str:
390
+ return f"mf_vol_{session_id}"
391
+
392
+
393
+ def build_tileable_dir_name(tileable_key: str) -> str:
394
+ m = hashlib.md5()
395
+ m.update(f"mf_dir_{tileable_key}".encode())
396
+ return m.hexdigest()
397
+
398
+
399
+ def extract_messages_and_stacks(exc: Exception) -> Tuple[List[str], List[str]]:
400
+ cur_exc = exc
401
+ messages, stacks = [], []
402
+ while True:
403
+ messages.append(str(cur_exc))
404
+ stacks.append("".join(traceback.format_tb(cur_exc.__traceback__)))
405
+ if exc.__cause__ is None:
406
+ break
407
+ cur_exc = exc.__cause__
408
+ return messages, stacks
409
+
410
+
411
+ async def wait_http_response(
412
+ url: str, *, request_timeout: TimeoutType = None, **kwargs
413
+ ) -> httpclient.HTTPResponse:
414
+ start_time = time.time()
415
+ while request_timeout is None or time.time() - start_time < request_timeout:
416
+ timeout_left = min(10.0, time.time() - start_time) if request_timeout else None
417
+ try:
418
+ return await httpclient.AsyncHTTPClient().fetch(
419
+ url, request_timeout=timeout_left, **kwargs
420
+ )
421
+ except HTTPTimeoutError:
422
+ pass
423
+ raise TimeoutError
424
+
425
+
426
+ def get_handler_timeout_value(handler: web.RequestHandler) -> TimeoutType:
427
+ wait = bool(int(handler.get_argument("wait", "0")))
428
+ timeout = float(handler.get_argument("timeout", "0"))
429
+ if wait and abs(timeout) < 1e-6:
430
+ timeout = None
431
+ elif not wait:
432
+ timeout = 0
433
+ return timeout
434
+
435
+
436
+ def format_timeout_params(timeout: TimeoutType) -> str:
437
+ if timeout is None:
438
+ return "?wait=1"
439
+ elif abs(timeout) < 1e-6:
440
+ return "?wait=0"
441
+ else:
442
+ return f"?wait=1&timeout={timeout}"
443
+
444
+
445
+ async def to_thread_pool(func, *args, pool=None, **kwargs):
446
+ loop = asyncio.events.get_running_loop()
447
+ ctx = contextvars.copy_context()
448
+ func_call = functools.partial(ctx.run, func, *args, **kwargs)
449
+ return await loop.run_in_executor(pool, func_call)
450
+
451
+
452
+ class ToThreadCancelledError(asyncio.CancelledError):
453
+ def __init__(self, *args, result=None):
454
+ super().__init__(*args)
455
+ self._result = result
456
+
457
+ @property
458
+ def result(self):
459
+ return self._result
460
+
461
+
462
+ _ToThreadRetType = TypeVar("_ToThreadRetType")
463
+
464
+
465
+ class ToThreadMixin:
466
+ _thread_pool_size = 1
467
+ _counter = itertools.count().__next__
468
+
469
+ def __del__(self):
470
+ if hasattr(self, "_pool"):
471
+ kw = {"wait": False}
472
+ if sys.version_info[:2] >= (3, 9):
473
+ kw["cancel_futures"] = True
474
+ self._pool.shutdown(**kw)
475
+
476
+ async def to_thread(
477
+ self,
478
+ func: Callable[..., _ToThreadRetType],
479
+ *args,
480
+ wait_on_cancel: bool = False,
481
+ timeout: float = None,
482
+ **kwargs,
483
+ ) -> _ToThreadRetType:
484
+ if not hasattr(self, "_pool"):
485
+ self._pool = concurrent.futures.ThreadPoolExecutor(
486
+ self._thread_pool_size,
487
+ thread_name_prefix=f"{type(self).__name__}Pool-{self._counter()}",
488
+ )
489
+
490
+ task = asyncio.create_task(
491
+ to_thread_pool(func, *args, **kwargs, pool=self._pool)
492
+ )
493
+ try:
494
+ return await asyncio.wait_for(asyncio.shield(task), timeout)
495
+ except (asyncio.CancelledError, asyncio.TimeoutError) as ex:
496
+ if not wait_on_cancel:
497
+ raise
498
+ result = await task
499
+ raise ToThreadCancelledError(*ex.args, result=result)
500
+
501
+ def ensure_async_call(
502
+ self,
503
+ func: Callable[..., _ToThreadRetType],
504
+ *args,
505
+ wait_on_cancel: bool = False,
506
+ **kwargs,
507
+ ) -> Awaitable[_ToThreadRetType]:
508
+ if asyncio.iscoroutinefunction(func):
509
+ return func(*args, **kwargs)
510
+ return self.to_thread(func, *args, wait_on_cancel=wait_on_cancel, **kwargs)
511
+
512
+
513
+ def config_odps_default_options():
514
+ from odps import options as odps_options
515
+
516
+ odps_options.sql.settings = {
517
+ "odps.longtime.instance": "false",
518
+ "odps.sql.session.select.only": "false",
519
+ "metaservice.client.cache.enable": "false",
520
+ "odps.sql.session.result.cache.enable": "false",
521
+ "odps.sql.submit.mode": "script",
522
+ }
523
+
524
+
525
+ def to_hashable(obj: Any) -> Hashable:
526
+ if isinstance(obj, Mapping):
527
+ items = type(obj)((k, to_hashable(v)) for k, v in obj.items())
528
+ elif not isinstance(obj, str) and isinstance(obj, Iterable):
529
+ items = tuple(to_hashable(item) for item in obj)
530
+ elif isinstance(obj, Hashable):
531
+ items = obj
532
+ else:
533
+ raise TypeError(type(obj))
534
+ return items
535
+
536
+
537
+ def estimate_pandas_size(
538
+ pd_obj, max_samples: int = 10, min_sample_rows: int = 100
539
+ ) -> int:
540
+ if len(pd_obj) <= min_sample_rows or isinstance(pd_obj, pd.RangeIndex):
541
+ return sys.getsizeof(pd_obj)
542
+ if isinstance(pd_obj, pd.MultiIndex):
543
+ # MultiIndex's sample size can't be used to estimate
544
+ return sys.getsizeof(pd_obj)
545
+
546
+ from .dataframe.arrays import ArrowDtype
547
+
548
+ def _is_fast_dtype(dtype):
549
+ if isinstance(dtype, np.dtype):
550
+ return np.issubdtype(dtype, np.number)
551
+ else:
552
+ return isinstance(dtype, ArrowDtype)
553
+
554
+ dtypes = []
555
+ is_series = False
556
+ if isinstance(pd_obj, pd.DataFrame):
557
+ dtypes.extend(pd_obj.dtypes)
558
+ index_obj = pd_obj.index
559
+ elif isinstance(pd_obj, pd.Series):
560
+ dtypes.append(pd_obj.dtype)
561
+ index_obj = pd_obj.index
562
+ is_series = True
563
+ else:
564
+ index_obj = pd_obj
565
+
566
+ # handling possible MultiIndex
567
+ if hasattr(index_obj, "dtypes"):
568
+ dtypes.extend(index_obj.dtypes)
569
+ else:
570
+ dtypes.append(index_obj.dtype)
571
+
572
+ if all(_is_fast_dtype(dtype) for dtype in dtypes):
573
+ return sys.getsizeof(pd_obj)
574
+
575
+ indices = np.sort(np.random.choice(len(pd_obj), size=max_samples, replace=False))
576
+ iloc = pd_obj if isinstance(pd_obj, pd.Index) else pd_obj.iloc
577
+ if isinstance(index_obj, pd.MultiIndex):
578
+ # MultiIndex's sample size is much greater than expected, thus we calculate
579
+ # the size separately.
580
+ index_size = sys.getsizeof(pd_obj.index)
581
+ if is_series:
582
+ sample_frame_size = iloc[indices].memory_usage(deep=True, index=False)
583
+ else:
584
+ sample_frame_size = iloc[indices].memory_usage(deep=True, index=False).sum()
585
+ return index_size + sample_frame_size * len(pd_obj) // max_samples
586
+ else:
587
+ sample_size = sys.getsizeof(iloc[indices])
588
+ return sample_size * len(pd_obj) // max_samples
589
+
590
+
591
+ class ModulePlaceholder:
592
+ def __init__(self, mod_name: str):
593
+ self._mod_name = mod_name
594
+
595
+ def _raises(self):
596
+ raise AttributeError(f"{self._mod_name} is required but not installed.")
597
+
598
+ def __getattr__(self, key):
599
+ self._raises()
600
+
601
+ def __call__(self, *_args, **_kwargs):
602
+ self._raises()
603
+
604
+
605
+ def lazy_import(
606
+ name: str,
607
+ package: str = None,
608
+ globals: Dict = None, # pylint: disable=redefined-builtin
609
+ locals: Dict = None, # pylint: disable=redefined-builtin
610
+ rename: str = None,
611
+ placeholder: bool = False,
612
+ ):
613
+ rename = rename or name
614
+ prefix_name = name.split(".", 1)[0]
615
+ globals = globals or inspect.currentframe().f_back.f_globals
616
+
617
+ class LazyModule(object):
618
+ def __init__(self):
619
+ self._on_loads = []
620
+
621
+ def __getattr__(self, item):
622
+ if item.startswith("_pytest") or item in ("__bases__", "__test__"):
623
+ raise AttributeError(item)
624
+
625
+ real_mod = importlib.import_module(name, package=package)
626
+ if rename in globals:
627
+ globals[rename] = real_mod
628
+ elif locals is not None:
629
+ locals[rename] = real_mod
630
+ ret = getattr(real_mod, item)
631
+ for on_load_func in self._on_loads:
632
+ on_load_func()
633
+ # make sure on_load hooks only executed once
634
+ self._on_loads = []
635
+ return ret
636
+
637
+ def add_load_handler(self, func: Callable):
638
+ self._on_loads.append(func)
639
+ return func
640
+
641
+ if pkgutil.find_loader(prefix_name) is not None:
642
+ return LazyModule()
643
+ elif placeholder:
644
+ return ModulePlaceholder(prefix_name)
645
+ else:
646
+ return None
647
+
648
+
649
+ def sbytes(x: Any) -> bytes:
650
+ # NB: bytes() in Python 3 has different semantic with Python 2, see: help(bytes)
651
+ from numbers import Number
652
+
653
+ if x is None or isinstance(x, Number):
654
+ return bytes(str(x), encoding="ascii")
655
+ elif isinstance(x, list):
656
+ return bytes("[" + ", ".join([str(k) for k in x]) + "]", encoding="utf-8")
657
+ elif isinstance(x, tuple):
658
+ return bytes("(" + ", ".join([str(k) for k in x]) + ")", encoding="utf-8")
659
+ elif isinstance(x, str):
660
+ return bytes(x, encoding="utf-8")
661
+ else:
662
+ return bytes(x)
663
+
664
+
665
+ def is_full_slice(slc: Any) -> bool:
666
+ """Check if the input is a full slice ((:) or (0:))"""
667
+ return (
668
+ isinstance(slc, slice)
669
+ and (slc.start == 0 or slc.start is None)
670
+ and slc.stop is None
671
+ and slc.step is None
672
+ )
673
+
674
+
675
+ _enter_counter = 0
676
+ _initial_session = None
677
+
678
+
679
+ def enter_current_session(func: Callable):
680
+ @functools.wraps(func)
681
+ def wrapped(cls, ctx, op):
682
+ from .session import AbstractSession, get_default_session
683
+
684
+ global _enter_counter, _initial_session
685
+ # skip in some test cases
686
+ if not hasattr(ctx, "get_current_session"):
687
+ return func(cls, ctx, op)
688
+
689
+ with AbstractSession._lock:
690
+ if _enter_counter == 0:
691
+ # to handle nested call, only set initial session
692
+ # in first call
693
+ session = ctx.get_current_session()
694
+ _initial_session = get_default_session()
695
+ session.as_default()
696
+ _enter_counter += 1
697
+
698
+ try:
699
+ result = func(cls, ctx, op)
700
+ finally:
701
+ with AbstractSession._lock:
702
+ _enter_counter -= 1
703
+ if _enter_counter == 0:
704
+ # set previous session when counter is 0
705
+ if _initial_session:
706
+ _initial_session.as_default()
707
+ else:
708
+ AbstractSession.reset_default()
709
+ return result
710
+
711
+ return wrapped
712
+
713
+
714
+ _func_token_cache = weakref.WeakKeyDictionary()
715
+
716
+
717
+ def _get_func_token_values(func):
718
+ if hasattr(func, "__code__"):
719
+ tokens = [func.__code__.co_code]
720
+ if func.__closure__ is not None:
721
+ cvars = tuple([x.cell_contents for x in func.__closure__])
722
+ tokens.append(cvars)
723
+ return tokens
724
+ else:
725
+ tokens = []
726
+ while isinstance(func, functools.partial):
727
+ tokens.extend([func.args, func.keywords])
728
+ func = func.func
729
+ if hasattr(func, "__code__"):
730
+ tokens.extend(_get_func_token_values(func))
731
+ elif isinstance(func, types.BuiltinFunctionType):
732
+ tokens.extend([func.__module__, func.__qualname__])
733
+ else:
734
+ tokens.append(func)
735
+ return tokens
736
+
737
+
738
+ def get_func_token(func):
739
+ try:
740
+ token = _func_token_cache.get(func)
741
+ if token is None:
742
+ fields = _get_func_token_values(func)
743
+ token = tokenize(*fields)
744
+ _func_token_cache[func] = token
745
+ return token
746
+ except TypeError: # cannot create weak reference to func like 'numpy.ufunc'
747
+ return tokenize(*_get_func_token_values(func))
748
+
749
+
750
+ _io_quiet_local = threading.local()
751
+ _io_quiet_lock = threading.Lock()
752
+
753
+
754
+ class _QuietIOWrapper:
755
+ def __init__(self, wrapped):
756
+ self.wrapped = wrapped
757
+
758
+ def __getattr__(self, item):
759
+ return getattr(self.wrapped, item)
760
+
761
+ def write(self, d):
762
+ if getattr(_io_quiet_local, "is_wrapped", False):
763
+ return 0
764
+ return self.wrapped.write(d)
765
+
766
+
767
+ @contextmanager
768
+ def quiet_stdio():
769
+ """Quiets standard outputs when inferring types of functions"""
770
+ with _io_quiet_lock:
771
+ _io_quiet_local.is_wrapped = True
772
+ sys.stdout = _QuietIOWrapper(sys.stdout)
773
+ sys.stderr = _QuietIOWrapper(sys.stderr)
774
+
775
+ try:
776
+ yield
777
+ finally:
778
+ with _io_quiet_lock:
779
+ sys.stdout = sys.stdout.wrapped
780
+ sys.stderr = sys.stderr.wrapped
781
+ if not isinstance(sys.stdout, _QuietIOWrapper):
782
+ _io_quiet_local.is_wrapped = False
783
+
784
+
785
+ # from https://github.com/ericvsmith/dataclasses/blob/master/dataclass_tools.py
786
+ # released under Apache License 2.0
787
+ def dataslots(cls):
788
+ # Need to create a new class, since we can't set __slots__
789
+ # after a class has been created.
790
+
791
+ # Make sure __slots__ isn't already set.
792
+ if "__slots__" in cls.__dict__: # pragma: no cover
793
+ raise TypeError(f"{cls.__name__} already specifies __slots__")
794
+
795
+ # Create a new dict for our new class.
796
+ cls_dict = dict(cls.__dict__)
797
+ field_names = tuple(f.name for f in dataclasses.fields(cls))
798
+ cls_dict["__slots__"] = field_names
799
+ for field_name in field_names:
800
+ # Remove our attributes, if present. They'll still be
801
+ # available in _MARKER.
802
+ cls_dict.pop(field_name, None)
803
+ # Remove __dict__ itself.
804
+ cls_dict.pop("__dict__", None)
805
+ # And finally create the class.
806
+ qualname = getattr(cls, "__qualname__", None)
807
+ cls = type(cls)(cls.__name__, cls.__bases__, cls_dict)
808
+ if qualname is not None:
809
+ cls.__qualname__ = qualname
810
+ return cls
811
+
812
+
813
+ def adapt_docstring(doc: str) -> str:
814
+ """
815
+ Adapt numpy-style docstrings to MaxFrame docstring.
816
+
817
+ This util function will add MaxFrame imports, replace object references
818
+ and add execute calls. Note that check is needed after replacement.
819
+ """
820
+ if doc is None:
821
+ return None
822
+
823
+ lines = []
824
+ first_prompt = True
825
+ prev_prompt = False
826
+ has_numpy = "np." in doc
827
+ has_pandas = "pd." in doc
828
+
829
+ for line in doc.splitlines():
830
+ sp = line.strip()
831
+ if sp.startswith(">>>") or sp.startswith("..."):
832
+ prev_prompt = True
833
+ if first_prompt:
834
+ first_prompt = False
835
+ indent = "".join(itertools.takewhile(lambda x: x in (" ", "\t"), line))
836
+ if has_numpy:
837
+ lines.extend([indent + ">>> import maxframe.tensor as mt"])
838
+ if has_pandas:
839
+ lines.extend([indent + ">>> import maxframe.dataframe as md"])
840
+ line = line.replace("np.", "mt.").replace("pd.", "md.")
841
+ elif prev_prompt:
842
+ prev_prompt = False
843
+ if sp:
844
+ lines[-1] += ".execute()"
845
+ lines.append(line)
846
+ return "\n".join(lines)
847
+
848
+
849
+ def stringify_path(path: Union[str, os.PathLike]) -> str:
850
+ """
851
+ Convert *path* to a string or unicode path if possible.
852
+ """
853
+ if isinstance(path, str):
854
+ return path
855
+
856
+ # checking whether path implements the filesystem protocol
857
+ try:
858
+ return path.__fspath__()
859
+ except AttributeError:
860
+ raise TypeError("not a path-like object")
861
+
862
+
863
+ _memory_size_indices = {"": 0, "k": 1, "m": 2, "g": 3, "t": 4}
864
+
865
+
866
+ def parse_readable_size(value: Union[str, int, float]) -> Tuple[float, bool]:
867
+ if isinstance(value, numbers.Number):
868
+ return float(value), False
869
+
870
+ value = value.strip().lower()
871
+ num_pos = 0
872
+ while num_pos < len(value) and value[num_pos] in "0123456789.-":
873
+ num_pos += 1
874
+
875
+ value, suffix = value[:num_pos], value[num_pos:]
876
+ suffix = suffix.strip()
877
+ if suffix.endswith("%"):
878
+ return float(value) / 100, True
879
+
880
+ try:
881
+ return float(value) * (1024 ** _memory_size_indices[suffix[:1]]), False
882
+ except (ValueError, KeyError):
883
+ raise ValueError(f"Unknown limitation value: {value}")
884
+
885
+
886
+ def remove_suffix(value: str, suffix: str) -> str:
887
+ return value[: -len(suffix)] if value.endswith(suffix) else value
888
+
889
+
890
+ def find_objects(nested: Union[List, Dict], types: Union[Type, Tuple[Type]]) -> List:
891
+ found = []
892
+ stack = [nested]
893
+
894
+ while len(stack) > 0:
895
+ it = stack.pop()
896
+ if isinstance(it, types):
897
+ found.append(it)
898
+ continue
899
+
900
+ if isinstance(it, (list, tuple, set)):
901
+ stack.extend(list(it)[::-1])
902
+ elif isinstance(it, dict):
903
+ stack.extend(list(it.values())[::-1])
904
+
905
+ return found
906
+
907
+
908
+ def replace_objects(nested: Union[List, Dict], mapping: Mapping) -> Union[List, Dict]:
909
+ if not mapping:
910
+ return nested
911
+
912
+ if isinstance(nested, dict):
913
+ vals = list(nested.values())
914
+ else:
915
+ vals = list(nested)
916
+
917
+ new_vals = []
918
+ for val in vals:
919
+ if isinstance(val, (dict, list, tuple, set)):
920
+ new_val = replace_objects(val, mapping)
921
+ else:
922
+ try:
923
+ new_val = mapping.get(val, val)
924
+ except TypeError:
925
+ new_val = val
926
+ new_vals.append(new_val)
927
+
928
+ if isinstance(nested, dict):
929
+ return type(nested)((k, v) for k, v in zip(nested.keys(), new_vals))
930
+ else:
931
+ return type(nested)(new_vals)
932
+
933
+
934
+ def trait_from_env(
935
+ trait_name: str, env: str, trait: Optional[traitlets.TraitType] = None
936
+ ):
937
+ if trait is None:
938
+ prev_locals = inspect.stack()[1].frame.f_locals
939
+ trait = prev_locals[trait_name]
940
+
941
+ default_value = trait.default_value
942
+ sub_trait: traitlets.TraitType = getattr(trait, "_trait", None)
943
+
944
+ def default_value_simple(self):
945
+ env_val = os.getenv(env, default_value)
946
+ if isinstance(env_val, (str, bytes)):
947
+ return trait.from_string(env_val)
948
+ return env_val
949
+
950
+ def default_value_list(self):
951
+ env_val = os.getenv(env, default_value)
952
+ if env_val is None or isinstance(env_val, traitlets.Sentinel):
953
+ return env_val
954
+
955
+ parts = env_val.split(",") if env_val else []
956
+ if sub_trait:
957
+ return [sub_trait.from_string(s) for s in parts]
958
+ else:
959
+ return parts
960
+
961
+ if isinstance(trait, traitlets.List):
962
+ default_value_fun = default_value_list
963
+ else: # pragma: no cover
964
+ default_value_fun = default_value_simple
965
+
966
+ default_value_fun.__name__ = trait_name + "_default"
967
+ return traitlets.default(trait_name)(default_value_fun)
968
+
969
+
970
+ def relay_future(
971
+ dest: Union[asyncio.Future, concurrent.futures.Future],
972
+ src: Union[asyncio.Future, concurrent.futures.Future],
973
+ ) -> None:
974
+ def cb(fut: Union[asyncio.Future, concurrent.futures.Future]):
975
+ try:
976
+ dest.set_result(fut.result())
977
+ except BaseException as ex:
978
+ dest.set_exception(ex)
979
+
980
+ src.add_done_callback(cb)
981
+
982
+
983
+ _arrow_type_constructors = {}
984
+ if pa:
985
+ _arrow_type_constructors = {
986
+ "bool": pa.bool_,
987
+ "list": lambda x: pa.list_(dict(x)["item"]),
988
+ "map": lambda x: pa.map_(*x),
989
+ "struct": pa.struct,
990
+ "fixed_size_binary": pa.binary,
991
+ "halffloat": pa.float16,
992
+ "float": pa.float32,
993
+ "double": pa.float64,
994
+ "decimal": pa.decimal128,
995
+ }
996
+ _plain_arrow_types = """
997
+ null
998
+ int8 int16 int32 int64
999
+ uint8 uint16 uint32 uint64
1000
+ float16 float32 float64
1001
+ date32 date64
1002
+ decimal128 decimal256
1003
+ string utf8 binary
1004
+ time32 time64 duration timestamp
1005
+ month_day_nano_interval
1006
+ """
1007
+ for _type_name in _plain_arrow_types.split():
1008
+ try:
1009
+ _arrow_type_constructors[_type_name] = getattr(pa, _type_name)
1010
+ except AttributeError: # pragma: no cover
1011
+ pass
1012
+
1013
+
1014
+ def arrow_type_from_str(type_str: str) -> pa.DataType:
1015
+ """
1016
+ Convert arrow type representations (for inst., list<item: int64>)
1017
+ into arrow DataType instances
1018
+ """
1019
+ # enable consecutive brackets to be tokenized
1020
+ type_str = type_str.replace("<", "< ").replace(">", " >")
1021
+ token_iter = pytokenize.tokenize(io.BytesIO(type_str.encode()).readline)
1022
+ value_stack, op_stack = [], []
1023
+
1024
+ def _pop_make_type(with_args: bool = False, combined: bool = True) -> None:
1025
+ """
1026
+ Pops tops of value stacks, creates a DataType instance and push back
1027
+
1028
+ Parameters
1029
+ ----------
1030
+ with_args: bool
1031
+ if True, will contain next item (parameter list) in
1032
+ the value stack as parameters
1033
+ combined: bool
1034
+ if True, will use first element of the top of the value stack
1035
+ in DataType constructors
1036
+ """
1037
+ args = () if not with_args else (value_stack.pop(-1),)
1038
+ if not combined:
1039
+ args = args[0]
1040
+ type_name = value_stack.pop(-1)
1041
+ if isinstance(type_name, pa.DataType):
1042
+ value_stack.append(type_name)
1043
+ elif type_name in _arrow_type_constructors:
1044
+ value_stack.append(_arrow_type_constructors[type_name](*args))
1045
+ else: # pragma: no cover
1046
+ value_stack.append(type_name)
1047
+
1048
+ for token in token_iter:
1049
+ if token.type == pytokenize.OP:
1050
+ if token.string == ":":
1051
+ op_stack.append(token.string)
1052
+ elif token.string == ",":
1053
+ # gather previous sub-types
1054
+ if op_stack[-1] in ("<", ":"):
1055
+ _pop_make_type()
1056
+
1057
+ if op_stack[-1] == ":":
1058
+ # parameterized sub-types need to be represented as tuples
1059
+ op_stack.pop(-1)
1060
+ values = value_stack[-2:]
1061
+ value_stack = value_stack[:-2]
1062
+ value_stack.append(tuple(values))
1063
+ # put generated item into the parameter list
1064
+ val = value_stack.pop(-1)
1065
+ value_stack[-1].append(val)
1066
+ elif token.string in ("<", "[", "("):
1067
+ # pushes an empty parameter list for future use
1068
+ value_stack.append([])
1069
+ op_stack.append(token.string)
1070
+ elif token.string in (")", "]"):
1071
+ # put generated item into the parameter list
1072
+ val = value_stack.pop(-1)
1073
+ value_stack[-1].append(val)
1074
+ # make DataType (i.e., fixed_size_binary / decimal) given args
1075
+ _pop_make_type(with_args=True, combined=False)
1076
+ op_stack.pop(-1)
1077
+ elif token.string == ">":
1078
+ _pop_make_type()
1079
+
1080
+ if op_stack[-1] == ":":
1081
+ # parameterized sub-types need to be represented as tuples
1082
+ op_stack.pop(-1)
1083
+ values = value_stack[-2:]
1084
+ value_stack = value_stack[:-2]
1085
+ value_stack.append(tuple(values))
1086
+
1087
+ # put generated item into the parameter list
1088
+ val = value_stack.pop(-1)
1089
+ value_stack[-1].append(val)
1090
+ # make DataType (i.e., list / map / struct) given args
1091
+ _pop_make_type(True)
1092
+ op_stack.pop(-1)
1093
+ elif token.type == pytokenize.NAME:
1094
+ value_stack.append(token.string)
1095
+ elif token.type == pytokenize.NUMBER:
1096
+ value_stack.append(int(token.string))
1097
+ elif token.type == pytokenize.ENDMARKER:
1098
+ # make final type
1099
+ _pop_make_type()
1100
+ if len(value_stack) > 1:
1101
+ raise ValueError(f"Cannot parse type {type_str}")
1102
+ return value_stack[-1]
1103
+
1104
+
1105
+ def get_python_tag():
1106
+ # todo add implementation suffix for non-GIL tags when PEP703 is ready
1107
+ version_info = sys.version_info
1108
+ return f"cp{version_info[0]}{version_info[1]}"
1109
+
1110
+
1111
+ def get_item_if_scalar(val: Any) -> Any:
1112
+ if isinstance(val, np.ndarray) and val.shape == ():
1113
+ return val.item()
1114
+ return val