maxframe 0.1.0b5__cp310-cp310-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of maxframe might be problematic. Click here for more details.

Files changed (647) hide show
  1. maxframe/__init__.py +32 -0
  2. maxframe/_utils.cpython-310-darwin.so +0 -0
  3. maxframe/_utils.pxd +33 -0
  4. maxframe/_utils.pyx +547 -0
  5. maxframe/codegen.py +528 -0
  6. maxframe/config/__init__.py +15 -0
  7. maxframe/config/config.py +443 -0
  8. maxframe/config/tests/__init__.py +13 -0
  9. maxframe/config/tests/test_config.py +103 -0
  10. maxframe/config/tests/test_validators.py +34 -0
  11. maxframe/config/validators.py +57 -0
  12. maxframe/conftest.py +139 -0
  13. maxframe/core/__init__.py +65 -0
  14. maxframe/core/base.py +156 -0
  15. maxframe/core/entity/__init__.py +44 -0
  16. maxframe/core/entity/chunks.py +68 -0
  17. maxframe/core/entity/core.py +152 -0
  18. maxframe/core/entity/executable.py +337 -0
  19. maxframe/core/entity/fuse.py +73 -0
  20. maxframe/core/entity/objects.py +100 -0
  21. maxframe/core/entity/output_types.py +90 -0
  22. maxframe/core/entity/tileables.py +438 -0
  23. maxframe/core/entity/utils.py +24 -0
  24. maxframe/core/graph/__init__.py +17 -0
  25. maxframe/core/graph/builder/__init__.py +16 -0
  26. maxframe/core/graph/builder/base.py +86 -0
  27. maxframe/core/graph/builder/chunk.py +430 -0
  28. maxframe/core/graph/builder/tileable.py +34 -0
  29. maxframe/core/graph/builder/utils.py +41 -0
  30. maxframe/core/graph/core.cpython-310-darwin.so +0 -0
  31. maxframe/core/graph/core.pyx +467 -0
  32. maxframe/core/graph/entity.py +171 -0
  33. maxframe/core/graph/tests/__init__.py +13 -0
  34. maxframe/core/graph/tests/test_graph.py +205 -0
  35. maxframe/core/mode.py +96 -0
  36. maxframe/core/operator/__init__.py +34 -0
  37. maxframe/core/operator/base.py +450 -0
  38. maxframe/core/operator/core.py +276 -0
  39. maxframe/core/operator/fetch.py +53 -0
  40. maxframe/core/operator/fuse.py +29 -0
  41. maxframe/core/operator/objects.py +72 -0
  42. maxframe/core/operator/shuffle.py +111 -0
  43. maxframe/core/operator/tests/__init__.py +13 -0
  44. maxframe/core/operator/tests/test_core.py +64 -0
  45. maxframe/core/tests/__init__.py +13 -0
  46. maxframe/core/tests/test_mode.py +75 -0
  47. maxframe/dataframe/__init__.py +81 -0
  48. maxframe/dataframe/arithmetic/__init__.py +359 -0
  49. maxframe/dataframe/arithmetic/abs.py +33 -0
  50. maxframe/dataframe/arithmetic/add.py +60 -0
  51. maxframe/dataframe/arithmetic/arccos.py +28 -0
  52. maxframe/dataframe/arithmetic/arccosh.py +28 -0
  53. maxframe/dataframe/arithmetic/arcsin.py +28 -0
  54. maxframe/dataframe/arithmetic/arcsinh.py +28 -0
  55. maxframe/dataframe/arithmetic/arctan.py +28 -0
  56. maxframe/dataframe/arithmetic/arctanh.py +28 -0
  57. maxframe/dataframe/arithmetic/around.py +152 -0
  58. maxframe/dataframe/arithmetic/bitwise_and.py +46 -0
  59. maxframe/dataframe/arithmetic/bitwise_or.py +50 -0
  60. maxframe/dataframe/arithmetic/bitwise_xor.py +46 -0
  61. maxframe/dataframe/arithmetic/ceil.py +28 -0
  62. maxframe/dataframe/arithmetic/core.py +342 -0
  63. maxframe/dataframe/arithmetic/cos.py +28 -0
  64. maxframe/dataframe/arithmetic/cosh.py +28 -0
  65. maxframe/dataframe/arithmetic/degrees.py +28 -0
  66. maxframe/dataframe/arithmetic/docstring.py +442 -0
  67. maxframe/dataframe/arithmetic/equal.py +56 -0
  68. maxframe/dataframe/arithmetic/exp.py +28 -0
  69. maxframe/dataframe/arithmetic/exp2.py +28 -0
  70. maxframe/dataframe/arithmetic/expm1.py +28 -0
  71. maxframe/dataframe/arithmetic/floor.py +28 -0
  72. maxframe/dataframe/arithmetic/floordiv.py +64 -0
  73. maxframe/dataframe/arithmetic/greater.py +57 -0
  74. maxframe/dataframe/arithmetic/greater_equal.py +57 -0
  75. maxframe/dataframe/arithmetic/invert.py +33 -0
  76. maxframe/dataframe/arithmetic/is_ufuncs.py +62 -0
  77. maxframe/dataframe/arithmetic/less.py +57 -0
  78. maxframe/dataframe/arithmetic/less_equal.py +57 -0
  79. maxframe/dataframe/arithmetic/log.py +28 -0
  80. maxframe/dataframe/arithmetic/log10.py +28 -0
  81. maxframe/dataframe/arithmetic/log2.py +28 -0
  82. maxframe/dataframe/arithmetic/mod.py +60 -0
  83. maxframe/dataframe/arithmetic/multiply.py +60 -0
  84. maxframe/dataframe/arithmetic/negative.py +33 -0
  85. maxframe/dataframe/arithmetic/not_equal.py +56 -0
  86. maxframe/dataframe/arithmetic/power.py +68 -0
  87. maxframe/dataframe/arithmetic/radians.py +28 -0
  88. maxframe/dataframe/arithmetic/sin.py +28 -0
  89. maxframe/dataframe/arithmetic/sinh.py +28 -0
  90. maxframe/dataframe/arithmetic/sqrt.py +28 -0
  91. maxframe/dataframe/arithmetic/subtract.py +64 -0
  92. maxframe/dataframe/arithmetic/tan.py +28 -0
  93. maxframe/dataframe/arithmetic/tanh.py +28 -0
  94. maxframe/dataframe/arithmetic/tests/__init__.py +13 -0
  95. maxframe/dataframe/arithmetic/tests/test_arithmetic.py +695 -0
  96. maxframe/dataframe/arithmetic/truediv.py +64 -0
  97. maxframe/dataframe/arithmetic/trunc.py +28 -0
  98. maxframe/dataframe/arrays.py +864 -0
  99. maxframe/dataframe/core.py +2417 -0
  100. maxframe/dataframe/datasource/__init__.py +15 -0
  101. maxframe/dataframe/datasource/core.py +81 -0
  102. maxframe/dataframe/datasource/dataframe.py +59 -0
  103. maxframe/dataframe/datasource/date_range.py +504 -0
  104. maxframe/dataframe/datasource/from_index.py +54 -0
  105. maxframe/dataframe/datasource/from_records.py +107 -0
  106. maxframe/dataframe/datasource/from_tensor.py +419 -0
  107. maxframe/dataframe/datasource/index.py +117 -0
  108. maxframe/dataframe/datasource/read_csv.py +528 -0
  109. maxframe/dataframe/datasource/read_odps_query.py +299 -0
  110. maxframe/dataframe/datasource/read_odps_table.py +253 -0
  111. maxframe/dataframe/datasource/read_parquet.py +421 -0
  112. maxframe/dataframe/datasource/series.py +55 -0
  113. maxframe/dataframe/datasource/tests/__init__.py +13 -0
  114. maxframe/dataframe/datasource/tests/test_datasource.py +401 -0
  115. maxframe/dataframe/datastore/__init__.py +26 -0
  116. maxframe/dataframe/datastore/core.py +19 -0
  117. maxframe/dataframe/datastore/to_csv.py +227 -0
  118. maxframe/dataframe/datastore/to_odps.py +162 -0
  119. maxframe/dataframe/extensions/__init__.py +41 -0
  120. maxframe/dataframe/extensions/accessor.py +50 -0
  121. maxframe/dataframe/extensions/reshuffle.py +83 -0
  122. maxframe/dataframe/extensions/tests/__init__.py +13 -0
  123. maxframe/dataframe/extensions/tests/test_extensions.py +38 -0
  124. maxframe/dataframe/fetch/__init__.py +15 -0
  125. maxframe/dataframe/fetch/core.py +86 -0
  126. maxframe/dataframe/groupby/__init__.py +82 -0
  127. maxframe/dataframe/groupby/aggregation.py +350 -0
  128. maxframe/dataframe/groupby/apply.py +251 -0
  129. maxframe/dataframe/groupby/core.py +179 -0
  130. maxframe/dataframe/groupby/cum.py +124 -0
  131. maxframe/dataframe/groupby/fill.py +141 -0
  132. maxframe/dataframe/groupby/getitem.py +92 -0
  133. maxframe/dataframe/groupby/head.py +105 -0
  134. maxframe/dataframe/groupby/sample.py +214 -0
  135. maxframe/dataframe/groupby/tests/__init__.py +13 -0
  136. maxframe/dataframe/groupby/tests/test_groupby.py +374 -0
  137. maxframe/dataframe/groupby/transform.py +255 -0
  138. maxframe/dataframe/indexing/__init__.py +84 -0
  139. maxframe/dataframe/indexing/add_prefix_suffix.py +110 -0
  140. maxframe/dataframe/indexing/align.py +349 -0
  141. maxframe/dataframe/indexing/at.py +83 -0
  142. maxframe/dataframe/indexing/getitem.py +204 -0
  143. maxframe/dataframe/indexing/iat.py +37 -0
  144. maxframe/dataframe/indexing/iloc.py +566 -0
  145. maxframe/dataframe/indexing/insert.py +86 -0
  146. maxframe/dataframe/indexing/loc.py +411 -0
  147. maxframe/dataframe/indexing/reindex.py +526 -0
  148. maxframe/dataframe/indexing/rename.py +462 -0
  149. maxframe/dataframe/indexing/rename_axis.py +209 -0
  150. maxframe/dataframe/indexing/reset_index.py +402 -0
  151. maxframe/dataframe/indexing/sample.py +221 -0
  152. maxframe/dataframe/indexing/set_axis.py +194 -0
  153. maxframe/dataframe/indexing/set_index.py +61 -0
  154. maxframe/dataframe/indexing/setitem.py +130 -0
  155. maxframe/dataframe/indexing/tests/__init__.py +13 -0
  156. maxframe/dataframe/indexing/tests/test_indexing.py +488 -0
  157. maxframe/dataframe/indexing/where.py +308 -0
  158. maxframe/dataframe/initializer.py +288 -0
  159. maxframe/dataframe/merge/__init__.py +32 -0
  160. maxframe/dataframe/merge/append.py +121 -0
  161. maxframe/dataframe/merge/concat.py +325 -0
  162. maxframe/dataframe/merge/merge.py +593 -0
  163. maxframe/dataframe/merge/tests/__init__.py +13 -0
  164. maxframe/dataframe/merge/tests/test_merge.py +215 -0
  165. maxframe/dataframe/misc/__init__.py +134 -0
  166. maxframe/dataframe/misc/_duplicate.py +46 -0
  167. maxframe/dataframe/misc/accessor.py +276 -0
  168. maxframe/dataframe/misc/apply.py +692 -0
  169. maxframe/dataframe/misc/astype.py +236 -0
  170. maxframe/dataframe/misc/case_when.py +141 -0
  171. maxframe/dataframe/misc/check_monotonic.py +84 -0
  172. maxframe/dataframe/misc/cut.py +383 -0
  173. maxframe/dataframe/misc/datetimes.py +79 -0
  174. maxframe/dataframe/misc/describe.py +108 -0
  175. maxframe/dataframe/misc/diff.py +210 -0
  176. maxframe/dataframe/misc/drop.py +440 -0
  177. maxframe/dataframe/misc/drop_duplicates.py +248 -0
  178. maxframe/dataframe/misc/duplicated.py +292 -0
  179. maxframe/dataframe/misc/eval.py +728 -0
  180. maxframe/dataframe/misc/explode.py +171 -0
  181. maxframe/dataframe/misc/get_dummies.py +208 -0
  182. maxframe/dataframe/misc/isin.py +217 -0
  183. maxframe/dataframe/misc/map.py +236 -0
  184. maxframe/dataframe/misc/melt.py +162 -0
  185. maxframe/dataframe/misc/memory_usage.py +248 -0
  186. maxframe/dataframe/misc/pct_change.py +150 -0
  187. maxframe/dataframe/misc/pivot_table.py +262 -0
  188. maxframe/dataframe/misc/qcut.py +104 -0
  189. maxframe/dataframe/misc/select_dtypes.py +104 -0
  190. maxframe/dataframe/misc/shift.py +256 -0
  191. maxframe/dataframe/misc/stack.py +238 -0
  192. maxframe/dataframe/misc/string_.py +221 -0
  193. maxframe/dataframe/misc/tests/__init__.py +13 -0
  194. maxframe/dataframe/misc/tests/test_misc.py +468 -0
  195. maxframe/dataframe/misc/to_numeric.py +178 -0
  196. maxframe/dataframe/misc/transform.py +361 -0
  197. maxframe/dataframe/misc/transpose.py +136 -0
  198. maxframe/dataframe/misc/value_counts.py +182 -0
  199. maxframe/dataframe/missing/__init__.py +53 -0
  200. maxframe/dataframe/missing/checkna.py +223 -0
  201. maxframe/dataframe/missing/dropna.py +280 -0
  202. maxframe/dataframe/missing/fillna.py +275 -0
  203. maxframe/dataframe/missing/replace.py +439 -0
  204. maxframe/dataframe/missing/tests/__init__.py +13 -0
  205. maxframe/dataframe/missing/tests/test_missing.py +89 -0
  206. maxframe/dataframe/operators.py +273 -0
  207. maxframe/dataframe/plotting/__init__.py +40 -0
  208. maxframe/dataframe/plotting/core.py +78 -0
  209. maxframe/dataframe/plotting/tests/__init__.py +13 -0
  210. maxframe/dataframe/plotting/tests/test_plotting.py +136 -0
  211. maxframe/dataframe/reduction/__init__.py +107 -0
  212. maxframe/dataframe/reduction/aggregation.py +344 -0
  213. maxframe/dataframe/reduction/all.py +78 -0
  214. maxframe/dataframe/reduction/any.py +78 -0
  215. maxframe/dataframe/reduction/core.py +837 -0
  216. maxframe/dataframe/reduction/count.py +59 -0
  217. maxframe/dataframe/reduction/cummax.py +30 -0
  218. maxframe/dataframe/reduction/cummin.py +30 -0
  219. maxframe/dataframe/reduction/cumprod.py +30 -0
  220. maxframe/dataframe/reduction/cumsum.py +30 -0
  221. maxframe/dataframe/reduction/custom_reduction.py +42 -0
  222. maxframe/dataframe/reduction/kurtosis.py +104 -0
  223. maxframe/dataframe/reduction/max.py +65 -0
  224. maxframe/dataframe/reduction/mean.py +61 -0
  225. maxframe/dataframe/reduction/min.py +65 -0
  226. maxframe/dataframe/reduction/nunique.py +141 -0
  227. maxframe/dataframe/reduction/prod.py +76 -0
  228. maxframe/dataframe/reduction/reduction_size.py +36 -0
  229. maxframe/dataframe/reduction/sem.py +69 -0
  230. maxframe/dataframe/reduction/skew.py +89 -0
  231. maxframe/dataframe/reduction/std.py +53 -0
  232. maxframe/dataframe/reduction/str_concat.py +48 -0
  233. maxframe/dataframe/reduction/sum.py +77 -0
  234. maxframe/dataframe/reduction/tests/__init__.py +13 -0
  235. maxframe/dataframe/reduction/tests/test_reduction.py +486 -0
  236. maxframe/dataframe/reduction/unique.py +90 -0
  237. maxframe/dataframe/reduction/var.py +72 -0
  238. maxframe/dataframe/sort/__init__.py +34 -0
  239. maxframe/dataframe/sort/core.py +36 -0
  240. maxframe/dataframe/sort/sort_index.py +153 -0
  241. maxframe/dataframe/sort/sort_values.py +311 -0
  242. maxframe/dataframe/sort/tests/__init__.py +13 -0
  243. maxframe/dataframe/sort/tests/test_sort.py +81 -0
  244. maxframe/dataframe/statistics/__init__.py +33 -0
  245. maxframe/dataframe/statistics/corr.py +280 -0
  246. maxframe/dataframe/statistics/quantile.py +341 -0
  247. maxframe/dataframe/statistics/tests/__init__.py +13 -0
  248. maxframe/dataframe/statistics/tests/test_statistics.py +82 -0
  249. maxframe/dataframe/tests/__init__.py +13 -0
  250. maxframe/dataframe/tests/test_initializer.py +29 -0
  251. maxframe/dataframe/tseries/__init__.py +13 -0
  252. maxframe/dataframe/tseries/tests/__init__.py +13 -0
  253. maxframe/dataframe/tseries/tests/test_tseries.py +30 -0
  254. maxframe/dataframe/tseries/to_datetime.py +297 -0
  255. maxframe/dataframe/ufunc/__init__.py +27 -0
  256. maxframe/dataframe/ufunc/tensor.py +54 -0
  257. maxframe/dataframe/ufunc/ufunc.py +52 -0
  258. maxframe/dataframe/utils.py +1267 -0
  259. maxframe/dataframe/window/__init__.py +29 -0
  260. maxframe/dataframe/window/aggregation.py +96 -0
  261. maxframe/dataframe/window/core.py +69 -0
  262. maxframe/dataframe/window/ewm.py +249 -0
  263. maxframe/dataframe/window/expanding.py +147 -0
  264. maxframe/dataframe/window/rolling.py +376 -0
  265. maxframe/dataframe/window/tests/__init__.py +13 -0
  266. maxframe/dataframe/window/tests/test_ewm.py +70 -0
  267. maxframe/dataframe/window/tests/test_expanding.py +66 -0
  268. maxframe/dataframe/window/tests/test_rolling.py +57 -0
  269. maxframe/env.py +33 -0
  270. maxframe/errors.py +21 -0
  271. maxframe/extension.py +81 -0
  272. maxframe/learn/__init__.py +17 -0
  273. maxframe/learn/contrib/__init__.py +17 -0
  274. maxframe/learn/contrib/pytorch/__init__.py +16 -0
  275. maxframe/learn/contrib/pytorch/run_function.py +110 -0
  276. maxframe/learn/contrib/pytorch/run_script.py +102 -0
  277. maxframe/learn/contrib/pytorch/tests/__init__.py +13 -0
  278. maxframe/learn/contrib/pytorch/tests/test_pytorch.py +42 -0
  279. maxframe/learn/contrib/utils.py +52 -0
  280. maxframe/learn/contrib/xgboost/__init__.py +26 -0
  281. maxframe/learn/contrib/xgboost/classifier.py +86 -0
  282. maxframe/learn/contrib/xgboost/core.py +156 -0
  283. maxframe/learn/contrib/xgboost/dmatrix.py +150 -0
  284. maxframe/learn/contrib/xgboost/predict.py +138 -0
  285. maxframe/learn/contrib/xgboost/regressor.py +78 -0
  286. maxframe/learn/contrib/xgboost/tests/__init__.py +13 -0
  287. maxframe/learn/contrib/xgboost/tests/test_core.py +43 -0
  288. maxframe/learn/contrib/xgboost/train.py +121 -0
  289. maxframe/learn/utils/__init__.py +15 -0
  290. maxframe/learn/utils/core.py +29 -0
  291. maxframe/lib/__init__.py +15 -0
  292. maxframe/lib/aio/__init__.py +27 -0
  293. maxframe/lib/aio/_runners.py +162 -0
  294. maxframe/lib/aio/_threads.py +35 -0
  295. maxframe/lib/aio/base.py +82 -0
  296. maxframe/lib/aio/file.py +85 -0
  297. maxframe/lib/aio/isolation.py +100 -0
  298. maxframe/lib/aio/lru.py +242 -0
  299. maxframe/lib/aio/parallelism.py +37 -0
  300. maxframe/lib/aio/tests/__init__.py +13 -0
  301. maxframe/lib/aio/tests/test_aio_file.py +55 -0
  302. maxframe/lib/compression.py +55 -0
  303. maxframe/lib/cython/__init__.py +13 -0
  304. maxframe/lib/cython/libcpp.pxd +30 -0
  305. maxframe/lib/filesystem/__init__.py +21 -0
  306. maxframe/lib/filesystem/_glob.py +173 -0
  307. maxframe/lib/filesystem/_oss_lib/__init__.py +13 -0
  308. maxframe/lib/filesystem/_oss_lib/common.py +198 -0
  309. maxframe/lib/filesystem/_oss_lib/glob.py +147 -0
  310. maxframe/lib/filesystem/_oss_lib/handle.py +156 -0
  311. maxframe/lib/filesystem/arrow.py +236 -0
  312. maxframe/lib/filesystem/base.py +263 -0
  313. maxframe/lib/filesystem/core.py +95 -0
  314. maxframe/lib/filesystem/fsmap.py +164 -0
  315. maxframe/lib/filesystem/hdfs.py +31 -0
  316. maxframe/lib/filesystem/local.py +112 -0
  317. maxframe/lib/filesystem/oss.py +157 -0
  318. maxframe/lib/filesystem/tests/__init__.py +13 -0
  319. maxframe/lib/filesystem/tests/test_filesystem.py +223 -0
  320. maxframe/lib/filesystem/tests/test_oss.py +182 -0
  321. maxframe/lib/functools_compat.py +81 -0
  322. maxframe/lib/mmh3.cpython-310-darwin.so +0 -0
  323. maxframe/lib/mmh3_src/MurmurHash3.cpp +339 -0
  324. maxframe/lib/mmh3_src/MurmurHash3.h +43 -0
  325. maxframe/lib/mmh3_src/mmh3module.cpp +387 -0
  326. maxframe/lib/sparse/__init__.py +861 -0
  327. maxframe/lib/sparse/array.py +1604 -0
  328. maxframe/lib/sparse/core.py +92 -0
  329. maxframe/lib/sparse/matrix.py +241 -0
  330. maxframe/lib/sparse/tests/__init__.py +15 -0
  331. maxframe/lib/sparse/tests/test_sparse.py +476 -0
  332. maxframe/lib/sparse/vector.py +150 -0
  333. maxframe/lib/tblib/LICENSE +20 -0
  334. maxframe/lib/tblib/__init__.py +327 -0
  335. maxframe/lib/tblib/cpython.py +83 -0
  336. maxframe/lib/tblib/decorators.py +44 -0
  337. maxframe/lib/tblib/pickling_support.py +90 -0
  338. maxframe/lib/tests/__init__.py +13 -0
  339. maxframe/lib/tests/test_wrapped_pickle.py +51 -0
  340. maxframe/lib/version.py +620 -0
  341. maxframe/lib/wrapped_pickle.py +139 -0
  342. maxframe/mixin.py +100 -0
  343. maxframe/odpsio/__init__.py +21 -0
  344. maxframe/odpsio/arrow.py +91 -0
  345. maxframe/odpsio/schema.py +364 -0
  346. maxframe/odpsio/tableio.py +322 -0
  347. maxframe/odpsio/tests/__init__.py +13 -0
  348. maxframe/odpsio/tests/test_arrow.py +88 -0
  349. maxframe/odpsio/tests/test_schema.py +297 -0
  350. maxframe/odpsio/tests/test_tableio.py +136 -0
  351. maxframe/odpsio/tests/test_volumeio.py +90 -0
  352. maxframe/odpsio/volumeio.py +95 -0
  353. maxframe/opcodes.py +590 -0
  354. maxframe/protocol.py +415 -0
  355. maxframe/remote/__init__.py +18 -0
  356. maxframe/remote/core.py +210 -0
  357. maxframe/remote/run_script.py +121 -0
  358. maxframe/serialization/__init__.py +26 -0
  359. maxframe/serialization/arrow.py +95 -0
  360. maxframe/serialization/core.cpython-310-darwin.so +0 -0
  361. maxframe/serialization/core.pxd +44 -0
  362. maxframe/serialization/core.pyi +61 -0
  363. maxframe/serialization/core.pyx +1094 -0
  364. maxframe/serialization/exception.py +86 -0
  365. maxframe/serialization/maxframe_objects.py +39 -0
  366. maxframe/serialization/numpy.py +91 -0
  367. maxframe/serialization/pandas.py +202 -0
  368. maxframe/serialization/scipy.py +71 -0
  369. maxframe/serialization/serializables/__init__.py +55 -0
  370. maxframe/serialization/serializables/core.py +262 -0
  371. maxframe/serialization/serializables/field.py +624 -0
  372. maxframe/serialization/serializables/field_type.py +589 -0
  373. maxframe/serialization/serializables/tests/__init__.py +13 -0
  374. maxframe/serialization/serializables/tests/test_field_type.py +121 -0
  375. maxframe/serialization/serializables/tests/test_serializable.py +250 -0
  376. maxframe/serialization/tests/__init__.py +13 -0
  377. maxframe/serialization/tests/test_serial.py +412 -0
  378. maxframe/session.py +1310 -0
  379. maxframe/tensor/__init__.py +183 -0
  380. maxframe/tensor/arithmetic/__init__.py +315 -0
  381. maxframe/tensor/arithmetic/abs.py +68 -0
  382. maxframe/tensor/arithmetic/absolute.py +68 -0
  383. maxframe/tensor/arithmetic/add.py +82 -0
  384. maxframe/tensor/arithmetic/angle.py +72 -0
  385. maxframe/tensor/arithmetic/arccos.py +104 -0
  386. maxframe/tensor/arithmetic/arccosh.py +91 -0
  387. maxframe/tensor/arithmetic/arcsin.py +94 -0
  388. maxframe/tensor/arithmetic/arcsinh.py +86 -0
  389. maxframe/tensor/arithmetic/arctan.py +106 -0
  390. maxframe/tensor/arithmetic/arctan2.py +128 -0
  391. maxframe/tensor/arithmetic/arctanh.py +86 -0
  392. maxframe/tensor/arithmetic/around.py +114 -0
  393. maxframe/tensor/arithmetic/bitand.py +95 -0
  394. maxframe/tensor/arithmetic/bitor.py +102 -0
  395. maxframe/tensor/arithmetic/bitxor.py +95 -0
  396. maxframe/tensor/arithmetic/cbrt.py +66 -0
  397. maxframe/tensor/arithmetic/ceil.py +71 -0
  398. maxframe/tensor/arithmetic/clip.py +165 -0
  399. maxframe/tensor/arithmetic/conj.py +74 -0
  400. maxframe/tensor/arithmetic/copysign.py +78 -0
  401. maxframe/tensor/arithmetic/core.py +544 -0
  402. maxframe/tensor/arithmetic/cos.py +85 -0
  403. maxframe/tensor/arithmetic/cosh.py +72 -0
  404. maxframe/tensor/arithmetic/deg2rad.py +72 -0
  405. maxframe/tensor/arithmetic/degrees.py +77 -0
  406. maxframe/tensor/arithmetic/divide.py +114 -0
  407. maxframe/tensor/arithmetic/equal.py +76 -0
  408. maxframe/tensor/arithmetic/exp.py +106 -0
  409. maxframe/tensor/arithmetic/exp2.py +67 -0
  410. maxframe/tensor/arithmetic/expm1.py +79 -0
  411. maxframe/tensor/arithmetic/fabs.py +74 -0
  412. maxframe/tensor/arithmetic/fix.py +69 -0
  413. maxframe/tensor/arithmetic/float_power.py +103 -0
  414. maxframe/tensor/arithmetic/floor.py +77 -0
  415. maxframe/tensor/arithmetic/floordiv.py +94 -0
  416. maxframe/tensor/arithmetic/fmax.py +105 -0
  417. maxframe/tensor/arithmetic/fmin.py +106 -0
  418. maxframe/tensor/arithmetic/fmod.py +99 -0
  419. maxframe/tensor/arithmetic/frexp.py +92 -0
  420. maxframe/tensor/arithmetic/greater.py +77 -0
  421. maxframe/tensor/arithmetic/greater_equal.py +69 -0
  422. maxframe/tensor/arithmetic/hypot.py +77 -0
  423. maxframe/tensor/arithmetic/i0.py +89 -0
  424. maxframe/tensor/arithmetic/imag.py +67 -0
  425. maxframe/tensor/arithmetic/invert.py +110 -0
  426. maxframe/tensor/arithmetic/isclose.py +115 -0
  427. maxframe/tensor/arithmetic/iscomplex.py +64 -0
  428. maxframe/tensor/arithmetic/isfinite.py +106 -0
  429. maxframe/tensor/arithmetic/isinf.py +103 -0
  430. maxframe/tensor/arithmetic/isnan.py +82 -0
  431. maxframe/tensor/arithmetic/isreal.py +63 -0
  432. maxframe/tensor/arithmetic/ldexp.py +99 -0
  433. maxframe/tensor/arithmetic/less.py +69 -0
  434. maxframe/tensor/arithmetic/less_equal.py +69 -0
  435. maxframe/tensor/arithmetic/log.py +92 -0
  436. maxframe/tensor/arithmetic/log10.py +85 -0
  437. maxframe/tensor/arithmetic/log1p.py +95 -0
  438. maxframe/tensor/arithmetic/log2.py +85 -0
  439. maxframe/tensor/arithmetic/logaddexp.py +80 -0
  440. maxframe/tensor/arithmetic/logaddexp2.py +78 -0
  441. maxframe/tensor/arithmetic/logical_and.py +81 -0
  442. maxframe/tensor/arithmetic/logical_not.py +74 -0
  443. maxframe/tensor/arithmetic/logical_or.py +82 -0
  444. maxframe/tensor/arithmetic/logical_xor.py +88 -0
  445. maxframe/tensor/arithmetic/lshift.py +82 -0
  446. maxframe/tensor/arithmetic/maximum.py +108 -0
  447. maxframe/tensor/arithmetic/minimum.py +108 -0
  448. maxframe/tensor/arithmetic/mod.py +104 -0
  449. maxframe/tensor/arithmetic/modf.py +83 -0
  450. maxframe/tensor/arithmetic/multiply.py +81 -0
  451. maxframe/tensor/arithmetic/nan_to_num.py +99 -0
  452. maxframe/tensor/arithmetic/negative.py +65 -0
  453. maxframe/tensor/arithmetic/nextafter.py +68 -0
  454. maxframe/tensor/arithmetic/not_equal.py +72 -0
  455. maxframe/tensor/arithmetic/positive.py +47 -0
  456. maxframe/tensor/arithmetic/power.py +106 -0
  457. maxframe/tensor/arithmetic/rad2deg.py +71 -0
  458. maxframe/tensor/arithmetic/radians.py +77 -0
  459. maxframe/tensor/arithmetic/real.py +70 -0
  460. maxframe/tensor/arithmetic/reciprocal.py +76 -0
  461. maxframe/tensor/arithmetic/rint.py +68 -0
  462. maxframe/tensor/arithmetic/rshift.py +81 -0
  463. maxframe/tensor/arithmetic/setimag.py +29 -0
  464. maxframe/tensor/arithmetic/setreal.py +29 -0
  465. maxframe/tensor/arithmetic/sign.py +81 -0
  466. maxframe/tensor/arithmetic/signbit.py +65 -0
  467. maxframe/tensor/arithmetic/sin.py +98 -0
  468. maxframe/tensor/arithmetic/sinc.py +102 -0
  469. maxframe/tensor/arithmetic/sinh.py +93 -0
  470. maxframe/tensor/arithmetic/spacing.py +72 -0
  471. maxframe/tensor/arithmetic/sqrt.py +81 -0
  472. maxframe/tensor/arithmetic/square.py +69 -0
  473. maxframe/tensor/arithmetic/subtract.py +81 -0
  474. maxframe/tensor/arithmetic/tan.py +88 -0
  475. maxframe/tensor/arithmetic/tanh.py +92 -0
  476. maxframe/tensor/arithmetic/tests/__init__.py +15 -0
  477. maxframe/tensor/arithmetic/tests/test_arithmetic.py +414 -0
  478. maxframe/tensor/arithmetic/truediv.py +104 -0
  479. maxframe/tensor/arithmetic/trunc.py +72 -0
  480. maxframe/tensor/arithmetic/utils.py +65 -0
  481. maxframe/tensor/array_utils.py +186 -0
  482. maxframe/tensor/base/__init__.py +34 -0
  483. maxframe/tensor/base/astype.py +119 -0
  484. maxframe/tensor/base/atleast_1d.py +74 -0
  485. maxframe/tensor/base/broadcast_to.py +89 -0
  486. maxframe/tensor/base/ravel.py +92 -0
  487. maxframe/tensor/base/tests/__init__.py +13 -0
  488. maxframe/tensor/base/tests/test_base.py +114 -0
  489. maxframe/tensor/base/transpose.py +125 -0
  490. maxframe/tensor/base/unique.py +205 -0
  491. maxframe/tensor/base/where.py +127 -0
  492. maxframe/tensor/core.py +724 -0
  493. maxframe/tensor/datasource/__init__.py +32 -0
  494. maxframe/tensor/datasource/arange.py +156 -0
  495. maxframe/tensor/datasource/array.py +415 -0
  496. maxframe/tensor/datasource/core.py +109 -0
  497. maxframe/tensor/datasource/empty.py +169 -0
  498. maxframe/tensor/datasource/from_dataframe.py +70 -0
  499. maxframe/tensor/datasource/from_dense.py +54 -0
  500. maxframe/tensor/datasource/from_sparse.py +47 -0
  501. maxframe/tensor/datasource/full.py +186 -0
  502. maxframe/tensor/datasource/ones.py +173 -0
  503. maxframe/tensor/datasource/scalar.py +40 -0
  504. maxframe/tensor/datasource/tests/__init__.py +13 -0
  505. maxframe/tensor/datasource/tests/test_datasource.py +278 -0
  506. maxframe/tensor/datasource/zeros.py +188 -0
  507. maxframe/tensor/fetch/__init__.py +15 -0
  508. maxframe/tensor/fetch/core.py +54 -0
  509. maxframe/tensor/indexing/__init__.py +47 -0
  510. maxframe/tensor/indexing/choose.py +196 -0
  511. maxframe/tensor/indexing/compress.py +124 -0
  512. maxframe/tensor/indexing/core.py +190 -0
  513. maxframe/tensor/indexing/extract.py +71 -0
  514. maxframe/tensor/indexing/fill_diagonal.py +183 -0
  515. maxframe/tensor/indexing/flatnonzero.py +60 -0
  516. maxframe/tensor/indexing/getitem.py +175 -0
  517. maxframe/tensor/indexing/nonzero.py +120 -0
  518. maxframe/tensor/indexing/setitem.py +132 -0
  519. maxframe/tensor/indexing/slice.py +29 -0
  520. maxframe/tensor/indexing/take.py +130 -0
  521. maxframe/tensor/indexing/tests/__init__.py +15 -0
  522. maxframe/tensor/indexing/tests/test_indexing.py +234 -0
  523. maxframe/tensor/indexing/unravel_index.py +103 -0
  524. maxframe/tensor/merge/__init__.py +15 -0
  525. maxframe/tensor/merge/stack.py +132 -0
  526. maxframe/tensor/merge/tests/__init__.py +13 -0
  527. maxframe/tensor/merge/tests/test_merge.py +52 -0
  528. maxframe/tensor/operators.py +123 -0
  529. maxframe/tensor/random/__init__.py +168 -0
  530. maxframe/tensor/random/beta.py +87 -0
  531. maxframe/tensor/random/binomial.py +137 -0
  532. maxframe/tensor/random/bytes.py +39 -0
  533. maxframe/tensor/random/chisquare.py +110 -0
  534. maxframe/tensor/random/choice.py +186 -0
  535. maxframe/tensor/random/core.py +234 -0
  536. maxframe/tensor/random/dirichlet.py +123 -0
  537. maxframe/tensor/random/exponential.py +94 -0
  538. maxframe/tensor/random/f.py +135 -0
  539. maxframe/tensor/random/gamma.py +128 -0
  540. maxframe/tensor/random/geometric.py +93 -0
  541. maxframe/tensor/random/gumbel.py +167 -0
  542. maxframe/tensor/random/hypergeometric.py +148 -0
  543. maxframe/tensor/random/laplace.py +133 -0
  544. maxframe/tensor/random/logistic.py +129 -0
  545. maxframe/tensor/random/lognormal.py +159 -0
  546. maxframe/tensor/random/logseries.py +122 -0
  547. maxframe/tensor/random/multinomial.py +133 -0
  548. maxframe/tensor/random/multivariate_normal.py +192 -0
  549. maxframe/tensor/random/negative_binomial.py +125 -0
  550. maxframe/tensor/random/noncentral_chisquare.py +132 -0
  551. maxframe/tensor/random/noncentral_f.py +126 -0
  552. maxframe/tensor/random/normal.py +143 -0
  553. maxframe/tensor/random/pareto.py +140 -0
  554. maxframe/tensor/random/permutation.py +104 -0
  555. maxframe/tensor/random/poisson.py +111 -0
  556. maxframe/tensor/random/power.py +142 -0
  557. maxframe/tensor/random/rand.py +82 -0
  558. maxframe/tensor/random/randint.py +121 -0
  559. maxframe/tensor/random/randn.py +96 -0
  560. maxframe/tensor/random/random_integers.py +123 -0
  561. maxframe/tensor/random/random_sample.py +86 -0
  562. maxframe/tensor/random/rayleigh.py +110 -0
  563. maxframe/tensor/random/shuffle.py +61 -0
  564. maxframe/tensor/random/standard_cauchy.py +105 -0
  565. maxframe/tensor/random/standard_exponential.py +72 -0
  566. maxframe/tensor/random/standard_gamma.py +120 -0
  567. maxframe/tensor/random/standard_normal.py +74 -0
  568. maxframe/tensor/random/standard_t.py +135 -0
  569. maxframe/tensor/random/tests/__init__.py +15 -0
  570. maxframe/tensor/random/tests/test_random.py +167 -0
  571. maxframe/tensor/random/triangular.py +119 -0
  572. maxframe/tensor/random/uniform.py +131 -0
  573. maxframe/tensor/random/vonmises.py +131 -0
  574. maxframe/tensor/random/wald.py +114 -0
  575. maxframe/tensor/random/weibull.py +140 -0
  576. maxframe/tensor/random/zipf.py +122 -0
  577. maxframe/tensor/rechunk/__init__.py +26 -0
  578. maxframe/tensor/rechunk/rechunk.py +43 -0
  579. maxframe/tensor/reduction/__init__.py +66 -0
  580. maxframe/tensor/reduction/all.py +103 -0
  581. maxframe/tensor/reduction/allclose.py +88 -0
  582. maxframe/tensor/reduction/any.py +105 -0
  583. maxframe/tensor/reduction/argmax.py +103 -0
  584. maxframe/tensor/reduction/argmin.py +103 -0
  585. maxframe/tensor/reduction/array_equal.py +64 -0
  586. maxframe/tensor/reduction/core.py +168 -0
  587. maxframe/tensor/reduction/count_nonzero.py +81 -0
  588. maxframe/tensor/reduction/cumprod.py +97 -0
  589. maxframe/tensor/reduction/cumsum.py +101 -0
  590. maxframe/tensor/reduction/max.py +120 -0
  591. maxframe/tensor/reduction/mean.py +123 -0
  592. maxframe/tensor/reduction/min.py +120 -0
  593. maxframe/tensor/reduction/nanargmax.py +82 -0
  594. maxframe/tensor/reduction/nanargmin.py +76 -0
  595. maxframe/tensor/reduction/nancumprod.py +91 -0
  596. maxframe/tensor/reduction/nancumsum.py +94 -0
  597. maxframe/tensor/reduction/nanmax.py +111 -0
  598. maxframe/tensor/reduction/nanmean.py +106 -0
  599. maxframe/tensor/reduction/nanmin.py +111 -0
  600. maxframe/tensor/reduction/nanprod.py +94 -0
  601. maxframe/tensor/reduction/nanstd.py +126 -0
  602. maxframe/tensor/reduction/nansum.py +115 -0
  603. maxframe/tensor/reduction/nanvar.py +149 -0
  604. maxframe/tensor/reduction/prod.py +130 -0
  605. maxframe/tensor/reduction/std.py +134 -0
  606. maxframe/tensor/reduction/sum.py +125 -0
  607. maxframe/tensor/reduction/tests/__init__.py +13 -0
  608. maxframe/tensor/reduction/tests/test_reduction.py +181 -0
  609. maxframe/tensor/reduction/var.py +176 -0
  610. maxframe/tensor/reshape/__init__.py +17 -0
  611. maxframe/tensor/reshape/reshape.py +188 -0
  612. maxframe/tensor/reshape/tests/__init__.py +15 -0
  613. maxframe/tensor/reshape/tests/test_reshape.py +37 -0
  614. maxframe/tensor/statistics/__init__.py +13 -0
  615. maxframe/tensor/statistics/percentile.py +175 -0
  616. maxframe/tensor/statistics/quantile.py +288 -0
  617. maxframe/tensor/ufunc/__init__.py +26 -0
  618. maxframe/tensor/ufunc/ufunc.py +200 -0
  619. maxframe/tensor/utils.py +718 -0
  620. maxframe/tests/__init__.py +13 -0
  621. maxframe/tests/test_codegen.py +69 -0
  622. maxframe/tests/test_protocol.py +144 -0
  623. maxframe/tests/test_utils.py +376 -0
  624. maxframe/tests/utils.py +164 -0
  625. maxframe/typing_.py +37 -0
  626. maxframe/udf.py +134 -0
  627. maxframe/utils.py +1114 -0
  628. maxframe-0.1.0b5.dist-info/METADATA +104 -0
  629. maxframe-0.1.0b5.dist-info/RECORD +647 -0
  630. maxframe-0.1.0b5.dist-info/WHEEL +5 -0
  631. maxframe-0.1.0b5.dist-info/top_level.txt +3 -0
  632. maxframe_client/__init__.py +17 -0
  633. maxframe_client/clients/__init__.py +13 -0
  634. maxframe_client/clients/framedriver.py +118 -0
  635. maxframe_client/clients/spe.py +104 -0
  636. maxframe_client/conftest.py +15 -0
  637. maxframe_client/fetcher.py +264 -0
  638. maxframe_client/session/__init__.py +22 -0
  639. maxframe_client/session/consts.py +36 -0
  640. maxframe_client/session/graph.py +119 -0
  641. maxframe_client/session/odps.py +482 -0
  642. maxframe_client/session/task.py +280 -0
  643. maxframe_client/session/tests/__init__.py +13 -0
  644. maxframe_client/session/tests/test_task.py +85 -0
  645. maxframe_client/tests/__init__.py +13 -0
  646. maxframe_client/tests/test_fetcher.py +89 -0
  647. maxframe_client/tests/test_session.py +255 -0
maxframe/__init__.py ADDED
@@ -0,0 +1,32 @@
1
+ # Copyright 1999-2024 Alibaba Group Holding Ltd.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ from . import dataframe, learn, remote, tensor
16
+ from .config import options
17
+ from .session import execute, fetch, new_session, stop_server
18
+
19
+
20
+ def _get_version():
21
+ try:
22
+ from importlib.metadata import version
23
+ except ImportError:
24
+ from importlib_metadata import version
25
+
26
+ return version("maxframe")
27
+
28
+
29
+ try:
30
+ __version__ = _get_version()
31
+ except ImportError: # pragma: no cover
32
+ pass
Binary file
maxframe/_utils.pxd ADDED
@@ -0,0 +1,33 @@
1
+ # Copyright 1999-2024 Alibaba Group Holding Ltd.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+
16
+ cdef class TypeDispatcher:
17
+ cdef dict _handlers
18
+ cdef dict _lazy_handlers
19
+ cdef dict _inherit_handlers
20
+ cdef object __weakref__
21
+
22
+ cpdef void register(self, object type_, object handler)
23
+ cpdef void unregister(self, object type_)
24
+ cdef _reload_lazy_handlers(self)
25
+ cpdef get_handler(self, object type_)
26
+
27
+
28
+ cpdef str to_str(s, encoding=*)
29
+ cpdef bytes to_binary(s, encoding=*)
30
+ cpdef unicode to_text(s, encoding=*)
31
+ cpdef register_tokenizer(cls, handler)
32
+ cpdef void reset_id_random_seed() except *
33
+ cpdef bytes new_random_id(int byte_len)
maxframe/_utils.pyx ADDED
@@ -0,0 +1,547 @@
1
+ # distutils: language = c++
2
+ # Copyright 1999-2024 Alibaba Group Holding Ltd.
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+
16
+ import collections
17
+ import importlib
18
+ import inspect
19
+ import itertools
20
+ import os
21
+ import pickle
22
+ import pkgutil
23
+ import time
24
+ import types
25
+ import uuid
26
+ import warnings
27
+ from datetime import date, datetime, timedelta, tzinfo
28
+ from enum import Enum
29
+ from functools import lru_cache, partial
30
+ from random import getrandbits
31
+ from weakref import WeakSet
32
+
33
+ import cloudpickle
34
+ import numpy as np
35
+ import pandas as pd
36
+
37
+ cimport cython
38
+ from cpython cimport PyBytes_FromStringAndSize
39
+ from libc.stdint cimport uint8_t, uint32_t, uint_fast64_t
40
+ from libc.stdlib cimport free, malloc
41
+
42
+ from .lib.cython.libcpp cimport mt19937_64
43
+
44
+ try:
45
+ from pandas.tseries.offsets import Tick as PDTick
46
+ except ImportError:
47
+ PDTick = None
48
+
49
+ from .lib.mmh3 import hash as mmh_hash
50
+ from .lib.mmh3 import hash_bytes as mmh_hash_bytes
51
+ from .lib.mmh3 import hash_from_buffer as mmh3_hash_from_buffer
52
+
53
+
54
+ cdef bint _has_cupy = bool(pkgutil.find_loader('cupy'))
55
+ cdef bint _has_cudf = bool(pkgutil.find_loader('cudf'))
56
+ cdef bint _has_sqlalchemy = bool(pkgutil.find_loader('sqlalchemy'))
57
+ cdef bint _has_interval_array_inclusive = hasattr(
58
+ pd.arrays.IntervalArray, "inclusive"
59
+ )
60
+
61
+
62
+ cdef extern from "MurmurHash3.h":
63
+ void MurmurHash3_x64_128(const void * key, Py_ssize_t len, uint32_t seed, void * out)
64
+
65
+
66
+ cdef bytes _get_maxframe_key(const uint8_t[:] bufferview):
67
+ cdef const uint8_t *data = &bufferview[0]
68
+ cdef uint8_t out[16]
69
+ MurmurHash3_x64_128(data, len(bufferview), 0, out)
70
+ out[0] |= 0xC0
71
+ return PyBytes_FromStringAndSize(<char*>out, 16)
72
+
73
+
74
+ cpdef str to_str(s, encoding='utf-8'):
75
+ if type(s) is str:
76
+ return <str>s
77
+ elif isinstance(s, bytes):
78
+ return (<bytes>s).decode(encoding)
79
+ elif isinstance(s, str):
80
+ return str(s)
81
+ elif s is None:
82
+ return s
83
+ else:
84
+ raise TypeError(f"Could not convert from {s} to str.")
85
+
86
+
87
+ cpdef bytes to_binary(s, encoding='utf-8'):
88
+ if type(s) is bytes:
89
+ return <bytes>s
90
+ elif isinstance(s, unicode):
91
+ return (<unicode>s).encode(encoding)
92
+ elif isinstance(s, bytes):
93
+ return bytes(s)
94
+ elif s is None:
95
+ return None
96
+ else:
97
+ raise TypeError(f"Could not convert from {s} to bytes.")
98
+
99
+
100
+ cpdef unicode to_text(s, encoding='utf-8'):
101
+ if type(s) is unicode:
102
+ return <unicode>s
103
+ elif isinstance(s, bytes):
104
+ return (<bytes>s).decode('utf-8')
105
+ elif isinstance(s, unicode):
106
+ return unicode(s)
107
+ elif s is None:
108
+ return None
109
+ else:
110
+ raise TypeError(f"Could not convert from {s} to unicode.")
111
+
112
+
113
+ _type_dispatchers = WeakSet()
114
+
115
+
116
+ NamedType = collections.namedtuple("NamedType", ["name", "type_"])
117
+
118
+
119
+ cdef class TypeDispatcher:
120
+ def __init__(self):
121
+ self._handlers = dict()
122
+ self._lazy_handlers = dict()
123
+ # store inherited handlers to facilitate unregistering
124
+ self._inherit_handlers = dict()
125
+
126
+ _type_dispatchers.add(self)
127
+
128
+ cpdef void register(self, object type_, object handler):
129
+ if isinstance(type_, str):
130
+ self._lazy_handlers[type_] = handler
131
+ elif type(type_) is not NamedType and isinstance(type_, tuple):
132
+ for t in type_:
133
+ self.register(t, handler)
134
+ else:
135
+ self._handlers[type_] = handler
136
+
137
+ cpdef void unregister(self, object type_):
138
+ if type(type_) is not NamedType and isinstance(type_, tuple):
139
+ for t in type_:
140
+ self.unregister(t)
141
+ else:
142
+ self._lazy_handlers.pop(type_, None)
143
+ self._handlers.pop(type_, None)
144
+ self._inherit_handlers.clear()
145
+
146
+ def dump_handlers(self):
147
+ return (
148
+ self._handlers.copy(),
149
+ self._lazy_handlers.copy(),
150
+ self._inherit_handlers.copy(),
151
+ )
152
+
153
+ def load_handlers(self, handlers, lazy_handlers, inherit_handlers):
154
+ self._handlers = handlers
155
+ self._lazy_handlers = lazy_handlers
156
+ self._inherit_handlers = inherit_handlers
157
+
158
+ cdef _reload_lazy_handlers(self):
159
+ for k, v in self._lazy_handlers.items():
160
+ mod_name, obj_name = k.rsplit('.', 1)
161
+ with warnings.catch_warnings():
162
+ # the lazy imported cudf will warn no device found,
163
+ # when we set visible device to -1 for CPU processes,
164
+ # ignore the warning to not distract users
165
+ warnings.simplefilter("ignore")
166
+ mod = importlib.import_module(mod_name, __name__)
167
+ self.register(getattr(mod, obj_name), v)
168
+ self._lazy_handlers = dict()
169
+
170
+ cpdef get_handler(self, object type_):
171
+ try:
172
+ return self._handlers[type_]
173
+ except KeyError:
174
+ pass
175
+
176
+ try:
177
+ return self._inherit_handlers[type_]
178
+ except KeyError:
179
+ self._reload_lazy_handlers()
180
+ if type(type_) is NamedType:
181
+ named_type = partial(NamedType, type_.name)
182
+ mro = itertools.chain(
183
+ *zip(map(named_type, type_.type_.__mro__),
184
+ type_.type_.__mro__)
185
+ )
186
+ else:
187
+ mro = type_.__mro__
188
+ for clz in mro:
189
+ # only lookup self._handlers for mro clz
190
+ handler = self._handlers.get(clz)
191
+ if handler is not None:
192
+ self._inherit_handlers[type_] = handler
193
+ return handler
194
+ raise KeyError(f'Cannot dispatch type {type_}')
195
+
196
+ def __call__(self, object obj, *args, **kwargs):
197
+ return self.get_handler(type(obj))(obj, *args, **kwargs)
198
+
199
+ @staticmethod
200
+ def reload_all_lazy_handlers():
201
+ for dispatcher in _type_dispatchers:
202
+ (<TypeDispatcher>dispatcher)._reload_lazy_handlers()
203
+
204
+
205
+ cdef inline build_canonical_bytes(tuple args, kwargs):
206
+ if kwargs:
207
+ args = args + (kwargs,)
208
+ return pickle.dumps(tokenize_handler(args))
209
+
210
+
211
+ def tokenize(*args, **kwargs):
212
+ return _get_maxframe_key(build_canonical_bytes(args, kwargs)).hex()
213
+
214
+
215
+ def tokenize_int(*args, **kwargs):
216
+ return mmh_hash(build_canonical_bytes(args, kwargs))
217
+
218
+
219
+ cdef class Tokenizer(TypeDispatcher):
220
+ def __call__(self, object obj, *args, **kwargs):
221
+ try:
222
+ return self.get_handler(type(obj))(obj, *args, **kwargs)
223
+ except KeyError:
224
+ if hasattr(obj, '__maxframe_tokenize__') and not isinstance(obj, type):
225
+ if len(args) == 0 and len(kwargs) == 0:
226
+ return obj.__maxframe_tokenize__()
227
+ else:
228
+ obj = obj.__maxframe_tokenize__()
229
+ return self.get_handler(type(obj))(obj, *args, **kwargs)
230
+ if callable(obj):
231
+ if PDTick is not None and not isinstance(obj, PDTick):
232
+ return tokenize_function(obj)
233
+
234
+ try:
235
+ return cloudpickle.dumps(obj)
236
+ except:
237
+ raise TypeError(f'Cannot generate token for {obj}, type: {type(obj)}') from None
238
+
239
+
240
+ cdef inline list iterative_tokenize(object ob):
241
+ cdef list dq = [ob]
242
+ cdef int dq_pos = 0
243
+ cdef list h_list = []
244
+ while dq_pos < len(dq):
245
+ x = dq[dq_pos]
246
+ dq_pos += 1
247
+ if type(x) in _primitive_types:
248
+ h_list.append(x)
249
+ elif isinstance(x, (list, tuple)):
250
+ dq.extend(x)
251
+ elif isinstance(x, set):
252
+ dq.extend(sorted(x))
253
+ elif isinstance(x, dict):
254
+ dq.extend(sorted(x.items()))
255
+ else:
256
+ h_list.append(tokenize_handler(x))
257
+
258
+ if dq_pos >= 64 and len(dq) < dq_pos * 2: # pragma: no cover
259
+ dq = dq[dq_pos:]
260
+ dq_pos = 0
261
+ return h_list
262
+
263
+
264
+ cdef inline tuple tokenize_numpy(ob):
265
+ cdef int offset
266
+
267
+ if not ob.shape:
268
+ return str(ob), ob.dtype
269
+ if hasattr(ob, 'mode') and getattr(ob, 'filename', None):
270
+ if hasattr(ob.base, 'ctypes'):
271
+ offset = (ob.ctypes.get_as_parameter().value -
272
+ ob.base.ctypes.get_as_parameter().value)
273
+ else:
274
+ offset = 0 # root memmap's have mmap object as misc
275
+ return (ob.filename, os.path.getmtime(ob.filename), ob.dtype,
276
+ ob.shape, ob.strides, offset)
277
+ if ob.dtype.hasobject:
278
+ try:
279
+ data = mmh_hash_bytes('-'.join(ob.flat).encode('utf-8', errors='surrogatepass'))
280
+ except UnicodeDecodeError:
281
+ data = mmh_hash_bytes(b'-'.join([to_binary(x) for x in ob.flat]))
282
+ except TypeError:
283
+ try:
284
+ data = mmh_hash_bytes(pickle.dumps(ob, pickle.HIGHEST_PROTOCOL))
285
+ except:
286
+ # nothing can do, generate uuid
287
+ data = uuid.uuid4().hex
288
+ else:
289
+ try:
290
+ data = mmh_hash_bytes(ob.ravel().view('i1').data)
291
+ except (BufferError, AttributeError, ValueError):
292
+ data = mmh_hash_bytes(ob.copy().ravel().view('i1').data)
293
+ return data, ob.dtype, ob.shape, ob.strides
294
+
295
+
296
+ cdef inline _extract_range_index_attr(object range_index, str attr):
297
+ try:
298
+ return getattr(range_index, attr)
299
+ except AttributeError: # pragma: no cover
300
+ return getattr(range_index, '_' + attr)
301
+
302
+
303
+ cdef list tokenize_pandas_index(ob):
304
+ cdef long long start
305
+ cdef long long stop
306
+ cdef long long end
307
+ if isinstance(ob, pd.RangeIndex):
308
+ start = _extract_range_index_attr(ob, 'start')
309
+ stop = _extract_range_index_attr(ob, 'stop')
310
+ step = _extract_range_index_attr(ob, 'step')
311
+ # for range index, there is no need to get the values
312
+ return iterative_tokenize([ob.name, getattr(ob, 'names', None), slice(start, stop, step)])
313
+ else:
314
+ return iterative_tokenize([ob.name, getattr(ob, 'names', None), ob.values])
315
+
316
+
317
+ cdef list tokenize_pandas_series(ob):
318
+ return iterative_tokenize([ob.name, ob.dtype, ob.values, ob.index])
319
+
320
+
321
+ cdef list tokenize_pandas_dataframe(ob):
322
+ l = [block.values for block in ob._data.blocks]
323
+ l.extend([ob.columns, ob.index])
324
+ return iterative_tokenize(l)
325
+
326
+
327
+ cdef list tokenize_pandas_categorical(ob):
328
+ l = ob.to_list()
329
+ l.append(ob.shape)
330
+ return iterative_tokenize(l)
331
+
332
+
333
+ cdef list tokenize_pd_extension_dtype(ob):
334
+ return iterative_tokenize([ob.name])
335
+
336
+
337
+ cdef list tokenize_categories_dtype(ob):
338
+ return iterative_tokenize([ob.categories, ob.ordered])
339
+
340
+
341
+ cdef list tokenize_interval_dtype(ob):
342
+ return iterative_tokenize([type(ob).__name__, ob.subtype])
343
+
344
+
345
+ cdef list tokenize_pandas_time_arrays(ob):
346
+ return iterative_tokenize([ob.asi8, ob.dtype])
347
+
348
+
349
+ cdef list tokenize_pandas_tick(ob):
350
+ return iterative_tokenize([ob.freqstr])
351
+
352
+
353
+ cdef list tokenize_pandas_interval_arrays(ob): # pragma: no cover
354
+ if _has_interval_array_inclusive:
355
+ return iterative_tokenize([ob.left, ob.right, ob.inclusive])
356
+ else:
357
+ return iterative_tokenize([ob.left, ob.right, ob.closed])
358
+
359
+
360
+ cdef list tokenize_sqlalchemy_data_type(ob):
361
+ return iterative_tokenize([repr(ob)])
362
+
363
+
364
+ cdef list tokenize_sqlalchemy_selectable(ob):
365
+ return iterative_tokenize([str(ob)])
366
+
367
+
368
+ cdef list tokenize_enum(ob):
369
+ cls = type(ob)
370
+ return iterative_tokenize([id(cls), cls.__name__, ob.name])
371
+
372
+
373
+ @lru_cache(500)
374
+ def tokenize_function(ob):
375
+ if isinstance(ob, partial):
376
+ args = iterative_tokenize(ob.args)
377
+ keywords = iterative_tokenize(ob.keywords.items()) if ob.keywords else None
378
+ return tokenize_function(ob.func), args, keywords
379
+ else:
380
+ try:
381
+ if isinstance(ob, types.FunctionType):
382
+ return iterative_tokenize([pickle.dumps(ob, protocol=0), id(ob)])
383
+ else:
384
+ return pickle.dumps(ob, protocol=0)
385
+ except:
386
+ pass
387
+ try:
388
+ return cloudpickle.dumps(ob, protocol=0)
389
+ except:
390
+ return str(ob)
391
+
392
+
393
+ @lru_cache(500)
394
+ def tokenize_pickled_with_cache(ob):
395
+ return pickle.dumps(ob)
396
+
397
+
398
+ def tokenize_cupy(ob):
399
+ from .serialization import serialize
400
+ header, _buffers = serialize(ob)
401
+ return iterative_tokenize([header, ob.data.ptr])
402
+
403
+
404
+ def tokenize_cudf(ob):
405
+ from .serialization import serialize
406
+ header, buffers = serialize(ob)
407
+ return iterative_tokenize([header] + [(buf.ptr, buf.size) for buf in buffers])
408
+
409
+
410
+ cdef Tokenizer tokenize_handler = Tokenizer()
411
+
412
+ cdef set _primitive_types = {
413
+ int, float, str, unicode, bytes, complex, type(None), type, slice, date, datetime, timedelta
414
+ }
415
+ for t in _primitive_types:
416
+ tokenize_handler.register(t, lambda ob: ob)
417
+
418
+ for t in (np.dtype, np.generic):
419
+ tokenize_handler.register(t, lambda ob: ob)
420
+
421
+ for t in (list, tuple, dict, set):
422
+ tokenize_handler.register(t, iterative_tokenize)
423
+
424
+ tokenize_handler.register(np.ndarray, tokenize_numpy)
425
+ tokenize_handler.register(np.random.RandomState, lambda ob: iterative_tokenize(ob.get_state()))
426
+ tokenize_handler.register(memoryview, lambda ob: mmh3_hash_from_buffer(ob))
427
+ tokenize_handler.register(Enum, tokenize_enum)
428
+ tokenize_handler.register(pd.Index, tokenize_pandas_index)
429
+ tokenize_handler.register(pd.Series, tokenize_pandas_series)
430
+ tokenize_handler.register(pd.DataFrame, tokenize_pandas_dataframe)
431
+ tokenize_handler.register(pd.Categorical, tokenize_pandas_categorical)
432
+ tokenize_handler.register(pd.CategoricalDtype, tokenize_categories_dtype)
433
+ tokenize_handler.register(pd.IntervalDtype, tokenize_interval_dtype)
434
+ tokenize_handler.register(tzinfo, tokenize_pickled_with_cache)
435
+ tokenize_handler.register(pd.arrays.DatetimeArray, tokenize_pandas_time_arrays)
436
+ tokenize_handler.register(pd.arrays.TimedeltaArray, tokenize_pandas_time_arrays)
437
+ tokenize_handler.register(pd.arrays.PeriodArray, tokenize_pandas_time_arrays)
438
+ tokenize_handler.register(pd.arrays.IntervalArray, tokenize_pandas_interval_arrays)
439
+ tokenize_handler.register(pd.api.extensions.ExtensionDtype, tokenize_pd_extension_dtype)
440
+ if _has_cupy:
441
+ tokenize_handler.register('cupy.ndarray', tokenize_cupy)
442
+ if _has_cudf:
443
+ tokenize_handler.register('cudf.DataFrame', tokenize_cudf)
444
+ tokenize_handler.register('cudf.Series', tokenize_cudf)
445
+ tokenize_handler.register('cudf.Index', tokenize_cudf)
446
+
447
+ if PDTick is not None:
448
+ tokenize_handler.register(PDTick, tokenize_pandas_tick)
449
+ if _has_sqlalchemy:
450
+ tokenize_handler.register(
451
+ "sqlalchemy.sql.sqltypes.TypeEngine", tokenize_sqlalchemy_data_type
452
+ )
453
+ tokenize_handler.register(
454
+ "sqlalchemy.sql.Selectable", tokenize_sqlalchemy_selectable
455
+ )
456
+
457
+ cpdef register_tokenizer(cls, handler):
458
+ tokenize_handler.register(cls, handler)
459
+
460
+
461
+ @cython.nonecheck(False)
462
+ @cython.cdivision(True)
463
+ cpdef long long ceildiv(long long x, long long y) nogil:
464
+ return x // y + (x % y != 0)
465
+
466
+
467
+ cdef class Timer:
468
+ cdef object _start
469
+ cdef readonly object duration
470
+
471
+ def __enter__(self):
472
+ self._start = time.time()
473
+ return self
474
+
475
+ def __exit__(self, *_):
476
+ self.duration = time.time() - self._start
477
+
478
+
479
+ cdef mt19937_64 _rnd_gen
480
+ cdef bint _rnd_is_seed_set = False
481
+
482
+
483
+ cpdef void reset_id_random_seed() except *:
484
+ cdef bytes seed_bytes
485
+ global _rnd_is_seed_set
486
+
487
+ seed_bytes = getrandbits(64).to_bytes(8, "little")
488
+ _rnd_gen.seed((<uint_fast64_t *><char *>seed_bytes)[0])
489
+ _rnd_is_seed_set = True
490
+
491
+
492
+ cpdef bytes new_random_id(int byte_len):
493
+ cdef uint_fast64_t *res_ptr
494
+ cdef uint_fast64_t res_data[4]
495
+ cdef int i, qw_num = byte_len >> 3
496
+ cdef bytes res
497
+
498
+ if not _rnd_is_seed_set:
499
+ reset_id_random_seed()
500
+
501
+ if (qw_num << 3) < byte_len:
502
+ qw_num += 1
503
+
504
+ if qw_num <= 4:
505
+ # use stack memory to accelerate
506
+ res_ptr = res_data
507
+ else:
508
+ res_ptr = <uint_fast64_t *>malloc(qw_num << 3)
509
+
510
+ try:
511
+ for i in range(qw_num):
512
+ res_ptr[i] = _rnd_gen()
513
+ return <bytes>((<char *>&(res_ptr[0]))[:byte_len])
514
+ finally:
515
+ # free memory if allocated by malloc
516
+ if res_ptr != res_data:
517
+ free(res_ptr)
518
+
519
+
520
+ cdef str _package_root = os.path.dirname(__file__)
521
+
522
+
523
+ def get_user_call_point():
524
+ cdef str filename
525
+ cdef object cur_frame = inspect.currentframe()
526
+ while cur_frame is not None:
527
+ filename = cur_frame.f_code.co_filename
528
+ if not filename.startswith(_package_root):
529
+ break
530
+ cur_frame = cur_frame.f_back
531
+ return cur_frame
532
+
533
+
534
+ __all__ = [
535
+ 'ceildiv',
536
+ 'get_user_call_point',
537
+ 'new_random_id',
538
+ 'register_tokenizer',
539
+ 'reset_id_random_seed',
540
+ 'to_str',
541
+ 'to_binary',
542
+ 'to_text',
543
+ 'tokenize',
544
+ 'tokenize_int',
545
+ 'Timer',
546
+ 'TypeDispatcher',
547
+ ]