maxframe 0.1.0b5__cp311-cp311-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of maxframe might be problematic. Click here for more details.

Files changed (647) hide show
  1. maxframe/__init__.py +32 -0
  2. maxframe/_utils.cpython-311-darwin.so +0 -0
  3. maxframe/_utils.pxd +33 -0
  4. maxframe/_utils.pyx +547 -0
  5. maxframe/codegen.py +528 -0
  6. maxframe/config/__init__.py +15 -0
  7. maxframe/config/config.py +443 -0
  8. maxframe/config/tests/__init__.py +13 -0
  9. maxframe/config/tests/test_config.py +103 -0
  10. maxframe/config/tests/test_validators.py +34 -0
  11. maxframe/config/validators.py +57 -0
  12. maxframe/conftest.py +139 -0
  13. maxframe/core/__init__.py +65 -0
  14. maxframe/core/base.py +156 -0
  15. maxframe/core/entity/__init__.py +44 -0
  16. maxframe/core/entity/chunks.py +68 -0
  17. maxframe/core/entity/core.py +152 -0
  18. maxframe/core/entity/executable.py +337 -0
  19. maxframe/core/entity/fuse.py +73 -0
  20. maxframe/core/entity/objects.py +100 -0
  21. maxframe/core/entity/output_types.py +90 -0
  22. maxframe/core/entity/tileables.py +438 -0
  23. maxframe/core/entity/utils.py +24 -0
  24. maxframe/core/graph/__init__.py +17 -0
  25. maxframe/core/graph/builder/__init__.py +16 -0
  26. maxframe/core/graph/builder/base.py +86 -0
  27. maxframe/core/graph/builder/chunk.py +430 -0
  28. maxframe/core/graph/builder/tileable.py +34 -0
  29. maxframe/core/graph/builder/utils.py +41 -0
  30. maxframe/core/graph/core.cpython-311-darwin.so +0 -0
  31. maxframe/core/graph/core.pyx +467 -0
  32. maxframe/core/graph/entity.py +171 -0
  33. maxframe/core/graph/tests/__init__.py +13 -0
  34. maxframe/core/graph/tests/test_graph.py +205 -0
  35. maxframe/core/mode.py +96 -0
  36. maxframe/core/operator/__init__.py +34 -0
  37. maxframe/core/operator/base.py +450 -0
  38. maxframe/core/operator/core.py +276 -0
  39. maxframe/core/operator/fetch.py +53 -0
  40. maxframe/core/operator/fuse.py +29 -0
  41. maxframe/core/operator/objects.py +72 -0
  42. maxframe/core/operator/shuffle.py +111 -0
  43. maxframe/core/operator/tests/__init__.py +13 -0
  44. maxframe/core/operator/tests/test_core.py +64 -0
  45. maxframe/core/tests/__init__.py +13 -0
  46. maxframe/core/tests/test_mode.py +75 -0
  47. maxframe/dataframe/__init__.py +81 -0
  48. maxframe/dataframe/arithmetic/__init__.py +359 -0
  49. maxframe/dataframe/arithmetic/abs.py +33 -0
  50. maxframe/dataframe/arithmetic/add.py +60 -0
  51. maxframe/dataframe/arithmetic/arccos.py +28 -0
  52. maxframe/dataframe/arithmetic/arccosh.py +28 -0
  53. maxframe/dataframe/arithmetic/arcsin.py +28 -0
  54. maxframe/dataframe/arithmetic/arcsinh.py +28 -0
  55. maxframe/dataframe/arithmetic/arctan.py +28 -0
  56. maxframe/dataframe/arithmetic/arctanh.py +28 -0
  57. maxframe/dataframe/arithmetic/around.py +152 -0
  58. maxframe/dataframe/arithmetic/bitwise_and.py +46 -0
  59. maxframe/dataframe/arithmetic/bitwise_or.py +50 -0
  60. maxframe/dataframe/arithmetic/bitwise_xor.py +46 -0
  61. maxframe/dataframe/arithmetic/ceil.py +28 -0
  62. maxframe/dataframe/arithmetic/core.py +342 -0
  63. maxframe/dataframe/arithmetic/cos.py +28 -0
  64. maxframe/dataframe/arithmetic/cosh.py +28 -0
  65. maxframe/dataframe/arithmetic/degrees.py +28 -0
  66. maxframe/dataframe/arithmetic/docstring.py +442 -0
  67. maxframe/dataframe/arithmetic/equal.py +56 -0
  68. maxframe/dataframe/arithmetic/exp.py +28 -0
  69. maxframe/dataframe/arithmetic/exp2.py +28 -0
  70. maxframe/dataframe/arithmetic/expm1.py +28 -0
  71. maxframe/dataframe/arithmetic/floor.py +28 -0
  72. maxframe/dataframe/arithmetic/floordiv.py +64 -0
  73. maxframe/dataframe/arithmetic/greater.py +57 -0
  74. maxframe/dataframe/arithmetic/greater_equal.py +57 -0
  75. maxframe/dataframe/arithmetic/invert.py +33 -0
  76. maxframe/dataframe/arithmetic/is_ufuncs.py +62 -0
  77. maxframe/dataframe/arithmetic/less.py +57 -0
  78. maxframe/dataframe/arithmetic/less_equal.py +57 -0
  79. maxframe/dataframe/arithmetic/log.py +28 -0
  80. maxframe/dataframe/arithmetic/log10.py +28 -0
  81. maxframe/dataframe/arithmetic/log2.py +28 -0
  82. maxframe/dataframe/arithmetic/mod.py +60 -0
  83. maxframe/dataframe/arithmetic/multiply.py +60 -0
  84. maxframe/dataframe/arithmetic/negative.py +33 -0
  85. maxframe/dataframe/arithmetic/not_equal.py +56 -0
  86. maxframe/dataframe/arithmetic/power.py +68 -0
  87. maxframe/dataframe/arithmetic/radians.py +28 -0
  88. maxframe/dataframe/arithmetic/sin.py +28 -0
  89. maxframe/dataframe/arithmetic/sinh.py +28 -0
  90. maxframe/dataframe/arithmetic/sqrt.py +28 -0
  91. maxframe/dataframe/arithmetic/subtract.py +64 -0
  92. maxframe/dataframe/arithmetic/tan.py +28 -0
  93. maxframe/dataframe/arithmetic/tanh.py +28 -0
  94. maxframe/dataframe/arithmetic/tests/__init__.py +13 -0
  95. maxframe/dataframe/arithmetic/tests/test_arithmetic.py +695 -0
  96. maxframe/dataframe/arithmetic/truediv.py +64 -0
  97. maxframe/dataframe/arithmetic/trunc.py +28 -0
  98. maxframe/dataframe/arrays.py +864 -0
  99. maxframe/dataframe/core.py +2417 -0
  100. maxframe/dataframe/datasource/__init__.py +15 -0
  101. maxframe/dataframe/datasource/core.py +81 -0
  102. maxframe/dataframe/datasource/dataframe.py +59 -0
  103. maxframe/dataframe/datasource/date_range.py +504 -0
  104. maxframe/dataframe/datasource/from_index.py +54 -0
  105. maxframe/dataframe/datasource/from_records.py +107 -0
  106. maxframe/dataframe/datasource/from_tensor.py +419 -0
  107. maxframe/dataframe/datasource/index.py +117 -0
  108. maxframe/dataframe/datasource/read_csv.py +528 -0
  109. maxframe/dataframe/datasource/read_odps_query.py +299 -0
  110. maxframe/dataframe/datasource/read_odps_table.py +253 -0
  111. maxframe/dataframe/datasource/read_parquet.py +421 -0
  112. maxframe/dataframe/datasource/series.py +55 -0
  113. maxframe/dataframe/datasource/tests/__init__.py +13 -0
  114. maxframe/dataframe/datasource/tests/test_datasource.py +401 -0
  115. maxframe/dataframe/datastore/__init__.py +26 -0
  116. maxframe/dataframe/datastore/core.py +19 -0
  117. maxframe/dataframe/datastore/to_csv.py +227 -0
  118. maxframe/dataframe/datastore/to_odps.py +162 -0
  119. maxframe/dataframe/extensions/__init__.py +41 -0
  120. maxframe/dataframe/extensions/accessor.py +50 -0
  121. maxframe/dataframe/extensions/reshuffle.py +83 -0
  122. maxframe/dataframe/extensions/tests/__init__.py +13 -0
  123. maxframe/dataframe/extensions/tests/test_extensions.py +38 -0
  124. maxframe/dataframe/fetch/__init__.py +15 -0
  125. maxframe/dataframe/fetch/core.py +86 -0
  126. maxframe/dataframe/groupby/__init__.py +82 -0
  127. maxframe/dataframe/groupby/aggregation.py +350 -0
  128. maxframe/dataframe/groupby/apply.py +251 -0
  129. maxframe/dataframe/groupby/core.py +179 -0
  130. maxframe/dataframe/groupby/cum.py +124 -0
  131. maxframe/dataframe/groupby/fill.py +141 -0
  132. maxframe/dataframe/groupby/getitem.py +92 -0
  133. maxframe/dataframe/groupby/head.py +105 -0
  134. maxframe/dataframe/groupby/sample.py +214 -0
  135. maxframe/dataframe/groupby/tests/__init__.py +13 -0
  136. maxframe/dataframe/groupby/tests/test_groupby.py +374 -0
  137. maxframe/dataframe/groupby/transform.py +255 -0
  138. maxframe/dataframe/indexing/__init__.py +84 -0
  139. maxframe/dataframe/indexing/add_prefix_suffix.py +110 -0
  140. maxframe/dataframe/indexing/align.py +349 -0
  141. maxframe/dataframe/indexing/at.py +83 -0
  142. maxframe/dataframe/indexing/getitem.py +204 -0
  143. maxframe/dataframe/indexing/iat.py +37 -0
  144. maxframe/dataframe/indexing/iloc.py +566 -0
  145. maxframe/dataframe/indexing/insert.py +86 -0
  146. maxframe/dataframe/indexing/loc.py +411 -0
  147. maxframe/dataframe/indexing/reindex.py +526 -0
  148. maxframe/dataframe/indexing/rename.py +462 -0
  149. maxframe/dataframe/indexing/rename_axis.py +209 -0
  150. maxframe/dataframe/indexing/reset_index.py +402 -0
  151. maxframe/dataframe/indexing/sample.py +221 -0
  152. maxframe/dataframe/indexing/set_axis.py +194 -0
  153. maxframe/dataframe/indexing/set_index.py +61 -0
  154. maxframe/dataframe/indexing/setitem.py +130 -0
  155. maxframe/dataframe/indexing/tests/__init__.py +13 -0
  156. maxframe/dataframe/indexing/tests/test_indexing.py +488 -0
  157. maxframe/dataframe/indexing/where.py +308 -0
  158. maxframe/dataframe/initializer.py +288 -0
  159. maxframe/dataframe/merge/__init__.py +32 -0
  160. maxframe/dataframe/merge/append.py +121 -0
  161. maxframe/dataframe/merge/concat.py +325 -0
  162. maxframe/dataframe/merge/merge.py +593 -0
  163. maxframe/dataframe/merge/tests/__init__.py +13 -0
  164. maxframe/dataframe/merge/tests/test_merge.py +215 -0
  165. maxframe/dataframe/misc/__init__.py +134 -0
  166. maxframe/dataframe/misc/_duplicate.py +46 -0
  167. maxframe/dataframe/misc/accessor.py +276 -0
  168. maxframe/dataframe/misc/apply.py +692 -0
  169. maxframe/dataframe/misc/astype.py +236 -0
  170. maxframe/dataframe/misc/case_when.py +141 -0
  171. maxframe/dataframe/misc/check_monotonic.py +84 -0
  172. maxframe/dataframe/misc/cut.py +383 -0
  173. maxframe/dataframe/misc/datetimes.py +79 -0
  174. maxframe/dataframe/misc/describe.py +108 -0
  175. maxframe/dataframe/misc/diff.py +210 -0
  176. maxframe/dataframe/misc/drop.py +440 -0
  177. maxframe/dataframe/misc/drop_duplicates.py +248 -0
  178. maxframe/dataframe/misc/duplicated.py +292 -0
  179. maxframe/dataframe/misc/eval.py +728 -0
  180. maxframe/dataframe/misc/explode.py +171 -0
  181. maxframe/dataframe/misc/get_dummies.py +208 -0
  182. maxframe/dataframe/misc/isin.py +217 -0
  183. maxframe/dataframe/misc/map.py +236 -0
  184. maxframe/dataframe/misc/melt.py +162 -0
  185. maxframe/dataframe/misc/memory_usage.py +248 -0
  186. maxframe/dataframe/misc/pct_change.py +150 -0
  187. maxframe/dataframe/misc/pivot_table.py +262 -0
  188. maxframe/dataframe/misc/qcut.py +104 -0
  189. maxframe/dataframe/misc/select_dtypes.py +104 -0
  190. maxframe/dataframe/misc/shift.py +256 -0
  191. maxframe/dataframe/misc/stack.py +238 -0
  192. maxframe/dataframe/misc/string_.py +221 -0
  193. maxframe/dataframe/misc/tests/__init__.py +13 -0
  194. maxframe/dataframe/misc/tests/test_misc.py +468 -0
  195. maxframe/dataframe/misc/to_numeric.py +178 -0
  196. maxframe/dataframe/misc/transform.py +361 -0
  197. maxframe/dataframe/misc/transpose.py +136 -0
  198. maxframe/dataframe/misc/value_counts.py +182 -0
  199. maxframe/dataframe/missing/__init__.py +53 -0
  200. maxframe/dataframe/missing/checkna.py +223 -0
  201. maxframe/dataframe/missing/dropna.py +280 -0
  202. maxframe/dataframe/missing/fillna.py +275 -0
  203. maxframe/dataframe/missing/replace.py +439 -0
  204. maxframe/dataframe/missing/tests/__init__.py +13 -0
  205. maxframe/dataframe/missing/tests/test_missing.py +89 -0
  206. maxframe/dataframe/operators.py +273 -0
  207. maxframe/dataframe/plotting/__init__.py +40 -0
  208. maxframe/dataframe/plotting/core.py +78 -0
  209. maxframe/dataframe/plotting/tests/__init__.py +13 -0
  210. maxframe/dataframe/plotting/tests/test_plotting.py +136 -0
  211. maxframe/dataframe/reduction/__init__.py +107 -0
  212. maxframe/dataframe/reduction/aggregation.py +344 -0
  213. maxframe/dataframe/reduction/all.py +78 -0
  214. maxframe/dataframe/reduction/any.py +78 -0
  215. maxframe/dataframe/reduction/core.py +837 -0
  216. maxframe/dataframe/reduction/count.py +59 -0
  217. maxframe/dataframe/reduction/cummax.py +30 -0
  218. maxframe/dataframe/reduction/cummin.py +30 -0
  219. maxframe/dataframe/reduction/cumprod.py +30 -0
  220. maxframe/dataframe/reduction/cumsum.py +30 -0
  221. maxframe/dataframe/reduction/custom_reduction.py +42 -0
  222. maxframe/dataframe/reduction/kurtosis.py +104 -0
  223. maxframe/dataframe/reduction/max.py +65 -0
  224. maxframe/dataframe/reduction/mean.py +61 -0
  225. maxframe/dataframe/reduction/min.py +65 -0
  226. maxframe/dataframe/reduction/nunique.py +141 -0
  227. maxframe/dataframe/reduction/prod.py +76 -0
  228. maxframe/dataframe/reduction/reduction_size.py +36 -0
  229. maxframe/dataframe/reduction/sem.py +69 -0
  230. maxframe/dataframe/reduction/skew.py +89 -0
  231. maxframe/dataframe/reduction/std.py +53 -0
  232. maxframe/dataframe/reduction/str_concat.py +48 -0
  233. maxframe/dataframe/reduction/sum.py +77 -0
  234. maxframe/dataframe/reduction/tests/__init__.py +13 -0
  235. maxframe/dataframe/reduction/tests/test_reduction.py +486 -0
  236. maxframe/dataframe/reduction/unique.py +90 -0
  237. maxframe/dataframe/reduction/var.py +72 -0
  238. maxframe/dataframe/sort/__init__.py +34 -0
  239. maxframe/dataframe/sort/core.py +36 -0
  240. maxframe/dataframe/sort/sort_index.py +153 -0
  241. maxframe/dataframe/sort/sort_values.py +311 -0
  242. maxframe/dataframe/sort/tests/__init__.py +13 -0
  243. maxframe/dataframe/sort/tests/test_sort.py +81 -0
  244. maxframe/dataframe/statistics/__init__.py +33 -0
  245. maxframe/dataframe/statistics/corr.py +280 -0
  246. maxframe/dataframe/statistics/quantile.py +341 -0
  247. maxframe/dataframe/statistics/tests/__init__.py +13 -0
  248. maxframe/dataframe/statistics/tests/test_statistics.py +82 -0
  249. maxframe/dataframe/tests/__init__.py +13 -0
  250. maxframe/dataframe/tests/test_initializer.py +29 -0
  251. maxframe/dataframe/tseries/__init__.py +13 -0
  252. maxframe/dataframe/tseries/tests/__init__.py +13 -0
  253. maxframe/dataframe/tseries/tests/test_tseries.py +30 -0
  254. maxframe/dataframe/tseries/to_datetime.py +297 -0
  255. maxframe/dataframe/ufunc/__init__.py +27 -0
  256. maxframe/dataframe/ufunc/tensor.py +54 -0
  257. maxframe/dataframe/ufunc/ufunc.py +52 -0
  258. maxframe/dataframe/utils.py +1267 -0
  259. maxframe/dataframe/window/__init__.py +29 -0
  260. maxframe/dataframe/window/aggregation.py +96 -0
  261. maxframe/dataframe/window/core.py +69 -0
  262. maxframe/dataframe/window/ewm.py +249 -0
  263. maxframe/dataframe/window/expanding.py +147 -0
  264. maxframe/dataframe/window/rolling.py +376 -0
  265. maxframe/dataframe/window/tests/__init__.py +13 -0
  266. maxframe/dataframe/window/tests/test_ewm.py +70 -0
  267. maxframe/dataframe/window/tests/test_expanding.py +66 -0
  268. maxframe/dataframe/window/tests/test_rolling.py +57 -0
  269. maxframe/env.py +33 -0
  270. maxframe/errors.py +21 -0
  271. maxframe/extension.py +81 -0
  272. maxframe/learn/__init__.py +17 -0
  273. maxframe/learn/contrib/__init__.py +17 -0
  274. maxframe/learn/contrib/pytorch/__init__.py +16 -0
  275. maxframe/learn/contrib/pytorch/run_function.py +110 -0
  276. maxframe/learn/contrib/pytorch/run_script.py +102 -0
  277. maxframe/learn/contrib/pytorch/tests/__init__.py +13 -0
  278. maxframe/learn/contrib/pytorch/tests/test_pytorch.py +42 -0
  279. maxframe/learn/contrib/utils.py +52 -0
  280. maxframe/learn/contrib/xgboost/__init__.py +26 -0
  281. maxframe/learn/contrib/xgboost/classifier.py +86 -0
  282. maxframe/learn/contrib/xgboost/core.py +156 -0
  283. maxframe/learn/contrib/xgboost/dmatrix.py +150 -0
  284. maxframe/learn/contrib/xgboost/predict.py +138 -0
  285. maxframe/learn/contrib/xgboost/regressor.py +78 -0
  286. maxframe/learn/contrib/xgboost/tests/__init__.py +13 -0
  287. maxframe/learn/contrib/xgboost/tests/test_core.py +43 -0
  288. maxframe/learn/contrib/xgboost/train.py +121 -0
  289. maxframe/learn/utils/__init__.py +15 -0
  290. maxframe/learn/utils/core.py +29 -0
  291. maxframe/lib/__init__.py +15 -0
  292. maxframe/lib/aio/__init__.py +27 -0
  293. maxframe/lib/aio/_runners.py +162 -0
  294. maxframe/lib/aio/_threads.py +35 -0
  295. maxframe/lib/aio/base.py +82 -0
  296. maxframe/lib/aio/file.py +85 -0
  297. maxframe/lib/aio/isolation.py +100 -0
  298. maxframe/lib/aio/lru.py +242 -0
  299. maxframe/lib/aio/parallelism.py +37 -0
  300. maxframe/lib/aio/tests/__init__.py +13 -0
  301. maxframe/lib/aio/tests/test_aio_file.py +55 -0
  302. maxframe/lib/compression.py +55 -0
  303. maxframe/lib/cython/__init__.py +13 -0
  304. maxframe/lib/cython/libcpp.pxd +30 -0
  305. maxframe/lib/filesystem/__init__.py +21 -0
  306. maxframe/lib/filesystem/_glob.py +173 -0
  307. maxframe/lib/filesystem/_oss_lib/__init__.py +13 -0
  308. maxframe/lib/filesystem/_oss_lib/common.py +198 -0
  309. maxframe/lib/filesystem/_oss_lib/glob.py +147 -0
  310. maxframe/lib/filesystem/_oss_lib/handle.py +156 -0
  311. maxframe/lib/filesystem/arrow.py +236 -0
  312. maxframe/lib/filesystem/base.py +263 -0
  313. maxframe/lib/filesystem/core.py +95 -0
  314. maxframe/lib/filesystem/fsmap.py +164 -0
  315. maxframe/lib/filesystem/hdfs.py +31 -0
  316. maxframe/lib/filesystem/local.py +112 -0
  317. maxframe/lib/filesystem/oss.py +157 -0
  318. maxframe/lib/filesystem/tests/__init__.py +13 -0
  319. maxframe/lib/filesystem/tests/test_filesystem.py +223 -0
  320. maxframe/lib/filesystem/tests/test_oss.py +182 -0
  321. maxframe/lib/functools_compat.py +81 -0
  322. maxframe/lib/mmh3.cpython-311-darwin.so +0 -0
  323. maxframe/lib/mmh3_src/MurmurHash3.cpp +339 -0
  324. maxframe/lib/mmh3_src/MurmurHash3.h +43 -0
  325. maxframe/lib/mmh3_src/mmh3module.cpp +387 -0
  326. maxframe/lib/sparse/__init__.py +861 -0
  327. maxframe/lib/sparse/array.py +1604 -0
  328. maxframe/lib/sparse/core.py +92 -0
  329. maxframe/lib/sparse/matrix.py +241 -0
  330. maxframe/lib/sparse/tests/__init__.py +15 -0
  331. maxframe/lib/sparse/tests/test_sparse.py +476 -0
  332. maxframe/lib/sparse/vector.py +150 -0
  333. maxframe/lib/tblib/LICENSE +20 -0
  334. maxframe/lib/tblib/__init__.py +327 -0
  335. maxframe/lib/tblib/cpython.py +83 -0
  336. maxframe/lib/tblib/decorators.py +44 -0
  337. maxframe/lib/tblib/pickling_support.py +90 -0
  338. maxframe/lib/tests/__init__.py +13 -0
  339. maxframe/lib/tests/test_wrapped_pickle.py +51 -0
  340. maxframe/lib/version.py +620 -0
  341. maxframe/lib/wrapped_pickle.py +139 -0
  342. maxframe/mixin.py +100 -0
  343. maxframe/odpsio/__init__.py +21 -0
  344. maxframe/odpsio/arrow.py +91 -0
  345. maxframe/odpsio/schema.py +364 -0
  346. maxframe/odpsio/tableio.py +322 -0
  347. maxframe/odpsio/tests/__init__.py +13 -0
  348. maxframe/odpsio/tests/test_arrow.py +88 -0
  349. maxframe/odpsio/tests/test_schema.py +297 -0
  350. maxframe/odpsio/tests/test_tableio.py +136 -0
  351. maxframe/odpsio/tests/test_volumeio.py +90 -0
  352. maxframe/odpsio/volumeio.py +95 -0
  353. maxframe/opcodes.py +590 -0
  354. maxframe/protocol.py +415 -0
  355. maxframe/remote/__init__.py +18 -0
  356. maxframe/remote/core.py +210 -0
  357. maxframe/remote/run_script.py +121 -0
  358. maxframe/serialization/__init__.py +26 -0
  359. maxframe/serialization/arrow.py +95 -0
  360. maxframe/serialization/core.cpython-311-darwin.so +0 -0
  361. maxframe/serialization/core.pxd +44 -0
  362. maxframe/serialization/core.pyi +61 -0
  363. maxframe/serialization/core.pyx +1094 -0
  364. maxframe/serialization/exception.py +86 -0
  365. maxframe/serialization/maxframe_objects.py +39 -0
  366. maxframe/serialization/numpy.py +91 -0
  367. maxframe/serialization/pandas.py +202 -0
  368. maxframe/serialization/scipy.py +71 -0
  369. maxframe/serialization/serializables/__init__.py +55 -0
  370. maxframe/serialization/serializables/core.py +262 -0
  371. maxframe/serialization/serializables/field.py +624 -0
  372. maxframe/serialization/serializables/field_type.py +589 -0
  373. maxframe/serialization/serializables/tests/__init__.py +13 -0
  374. maxframe/serialization/serializables/tests/test_field_type.py +121 -0
  375. maxframe/serialization/serializables/tests/test_serializable.py +250 -0
  376. maxframe/serialization/tests/__init__.py +13 -0
  377. maxframe/serialization/tests/test_serial.py +412 -0
  378. maxframe/session.py +1310 -0
  379. maxframe/tensor/__init__.py +183 -0
  380. maxframe/tensor/arithmetic/__init__.py +315 -0
  381. maxframe/tensor/arithmetic/abs.py +68 -0
  382. maxframe/tensor/arithmetic/absolute.py +68 -0
  383. maxframe/tensor/arithmetic/add.py +82 -0
  384. maxframe/tensor/arithmetic/angle.py +72 -0
  385. maxframe/tensor/arithmetic/arccos.py +104 -0
  386. maxframe/tensor/arithmetic/arccosh.py +91 -0
  387. maxframe/tensor/arithmetic/arcsin.py +94 -0
  388. maxframe/tensor/arithmetic/arcsinh.py +86 -0
  389. maxframe/tensor/arithmetic/arctan.py +106 -0
  390. maxframe/tensor/arithmetic/arctan2.py +128 -0
  391. maxframe/tensor/arithmetic/arctanh.py +86 -0
  392. maxframe/tensor/arithmetic/around.py +114 -0
  393. maxframe/tensor/arithmetic/bitand.py +95 -0
  394. maxframe/tensor/arithmetic/bitor.py +102 -0
  395. maxframe/tensor/arithmetic/bitxor.py +95 -0
  396. maxframe/tensor/arithmetic/cbrt.py +66 -0
  397. maxframe/tensor/arithmetic/ceil.py +71 -0
  398. maxframe/tensor/arithmetic/clip.py +165 -0
  399. maxframe/tensor/arithmetic/conj.py +74 -0
  400. maxframe/tensor/arithmetic/copysign.py +78 -0
  401. maxframe/tensor/arithmetic/core.py +544 -0
  402. maxframe/tensor/arithmetic/cos.py +85 -0
  403. maxframe/tensor/arithmetic/cosh.py +72 -0
  404. maxframe/tensor/arithmetic/deg2rad.py +72 -0
  405. maxframe/tensor/arithmetic/degrees.py +77 -0
  406. maxframe/tensor/arithmetic/divide.py +114 -0
  407. maxframe/tensor/arithmetic/equal.py +76 -0
  408. maxframe/tensor/arithmetic/exp.py +106 -0
  409. maxframe/tensor/arithmetic/exp2.py +67 -0
  410. maxframe/tensor/arithmetic/expm1.py +79 -0
  411. maxframe/tensor/arithmetic/fabs.py +74 -0
  412. maxframe/tensor/arithmetic/fix.py +69 -0
  413. maxframe/tensor/arithmetic/float_power.py +103 -0
  414. maxframe/tensor/arithmetic/floor.py +77 -0
  415. maxframe/tensor/arithmetic/floordiv.py +94 -0
  416. maxframe/tensor/arithmetic/fmax.py +105 -0
  417. maxframe/tensor/arithmetic/fmin.py +106 -0
  418. maxframe/tensor/arithmetic/fmod.py +99 -0
  419. maxframe/tensor/arithmetic/frexp.py +92 -0
  420. maxframe/tensor/arithmetic/greater.py +77 -0
  421. maxframe/tensor/arithmetic/greater_equal.py +69 -0
  422. maxframe/tensor/arithmetic/hypot.py +77 -0
  423. maxframe/tensor/arithmetic/i0.py +89 -0
  424. maxframe/tensor/arithmetic/imag.py +67 -0
  425. maxframe/tensor/arithmetic/invert.py +110 -0
  426. maxframe/tensor/arithmetic/isclose.py +115 -0
  427. maxframe/tensor/arithmetic/iscomplex.py +64 -0
  428. maxframe/tensor/arithmetic/isfinite.py +106 -0
  429. maxframe/tensor/arithmetic/isinf.py +103 -0
  430. maxframe/tensor/arithmetic/isnan.py +82 -0
  431. maxframe/tensor/arithmetic/isreal.py +63 -0
  432. maxframe/tensor/arithmetic/ldexp.py +99 -0
  433. maxframe/tensor/arithmetic/less.py +69 -0
  434. maxframe/tensor/arithmetic/less_equal.py +69 -0
  435. maxframe/tensor/arithmetic/log.py +92 -0
  436. maxframe/tensor/arithmetic/log10.py +85 -0
  437. maxframe/tensor/arithmetic/log1p.py +95 -0
  438. maxframe/tensor/arithmetic/log2.py +85 -0
  439. maxframe/tensor/arithmetic/logaddexp.py +80 -0
  440. maxframe/tensor/arithmetic/logaddexp2.py +78 -0
  441. maxframe/tensor/arithmetic/logical_and.py +81 -0
  442. maxframe/tensor/arithmetic/logical_not.py +74 -0
  443. maxframe/tensor/arithmetic/logical_or.py +82 -0
  444. maxframe/tensor/arithmetic/logical_xor.py +88 -0
  445. maxframe/tensor/arithmetic/lshift.py +82 -0
  446. maxframe/tensor/arithmetic/maximum.py +108 -0
  447. maxframe/tensor/arithmetic/minimum.py +108 -0
  448. maxframe/tensor/arithmetic/mod.py +104 -0
  449. maxframe/tensor/arithmetic/modf.py +83 -0
  450. maxframe/tensor/arithmetic/multiply.py +81 -0
  451. maxframe/tensor/arithmetic/nan_to_num.py +99 -0
  452. maxframe/tensor/arithmetic/negative.py +65 -0
  453. maxframe/tensor/arithmetic/nextafter.py +68 -0
  454. maxframe/tensor/arithmetic/not_equal.py +72 -0
  455. maxframe/tensor/arithmetic/positive.py +47 -0
  456. maxframe/tensor/arithmetic/power.py +106 -0
  457. maxframe/tensor/arithmetic/rad2deg.py +71 -0
  458. maxframe/tensor/arithmetic/radians.py +77 -0
  459. maxframe/tensor/arithmetic/real.py +70 -0
  460. maxframe/tensor/arithmetic/reciprocal.py +76 -0
  461. maxframe/tensor/arithmetic/rint.py +68 -0
  462. maxframe/tensor/arithmetic/rshift.py +81 -0
  463. maxframe/tensor/arithmetic/setimag.py +29 -0
  464. maxframe/tensor/arithmetic/setreal.py +29 -0
  465. maxframe/tensor/arithmetic/sign.py +81 -0
  466. maxframe/tensor/arithmetic/signbit.py +65 -0
  467. maxframe/tensor/arithmetic/sin.py +98 -0
  468. maxframe/tensor/arithmetic/sinc.py +102 -0
  469. maxframe/tensor/arithmetic/sinh.py +93 -0
  470. maxframe/tensor/arithmetic/spacing.py +72 -0
  471. maxframe/tensor/arithmetic/sqrt.py +81 -0
  472. maxframe/tensor/arithmetic/square.py +69 -0
  473. maxframe/tensor/arithmetic/subtract.py +81 -0
  474. maxframe/tensor/arithmetic/tan.py +88 -0
  475. maxframe/tensor/arithmetic/tanh.py +92 -0
  476. maxframe/tensor/arithmetic/tests/__init__.py +15 -0
  477. maxframe/tensor/arithmetic/tests/test_arithmetic.py +414 -0
  478. maxframe/tensor/arithmetic/truediv.py +104 -0
  479. maxframe/tensor/arithmetic/trunc.py +72 -0
  480. maxframe/tensor/arithmetic/utils.py +65 -0
  481. maxframe/tensor/array_utils.py +186 -0
  482. maxframe/tensor/base/__init__.py +34 -0
  483. maxframe/tensor/base/astype.py +119 -0
  484. maxframe/tensor/base/atleast_1d.py +74 -0
  485. maxframe/tensor/base/broadcast_to.py +89 -0
  486. maxframe/tensor/base/ravel.py +92 -0
  487. maxframe/tensor/base/tests/__init__.py +13 -0
  488. maxframe/tensor/base/tests/test_base.py +114 -0
  489. maxframe/tensor/base/transpose.py +125 -0
  490. maxframe/tensor/base/unique.py +205 -0
  491. maxframe/tensor/base/where.py +127 -0
  492. maxframe/tensor/core.py +724 -0
  493. maxframe/tensor/datasource/__init__.py +32 -0
  494. maxframe/tensor/datasource/arange.py +156 -0
  495. maxframe/tensor/datasource/array.py +415 -0
  496. maxframe/tensor/datasource/core.py +109 -0
  497. maxframe/tensor/datasource/empty.py +169 -0
  498. maxframe/tensor/datasource/from_dataframe.py +70 -0
  499. maxframe/tensor/datasource/from_dense.py +54 -0
  500. maxframe/tensor/datasource/from_sparse.py +47 -0
  501. maxframe/tensor/datasource/full.py +186 -0
  502. maxframe/tensor/datasource/ones.py +173 -0
  503. maxframe/tensor/datasource/scalar.py +40 -0
  504. maxframe/tensor/datasource/tests/__init__.py +13 -0
  505. maxframe/tensor/datasource/tests/test_datasource.py +278 -0
  506. maxframe/tensor/datasource/zeros.py +188 -0
  507. maxframe/tensor/fetch/__init__.py +15 -0
  508. maxframe/tensor/fetch/core.py +54 -0
  509. maxframe/tensor/indexing/__init__.py +47 -0
  510. maxframe/tensor/indexing/choose.py +196 -0
  511. maxframe/tensor/indexing/compress.py +124 -0
  512. maxframe/tensor/indexing/core.py +190 -0
  513. maxframe/tensor/indexing/extract.py +71 -0
  514. maxframe/tensor/indexing/fill_diagonal.py +183 -0
  515. maxframe/tensor/indexing/flatnonzero.py +60 -0
  516. maxframe/tensor/indexing/getitem.py +175 -0
  517. maxframe/tensor/indexing/nonzero.py +120 -0
  518. maxframe/tensor/indexing/setitem.py +132 -0
  519. maxframe/tensor/indexing/slice.py +29 -0
  520. maxframe/tensor/indexing/take.py +130 -0
  521. maxframe/tensor/indexing/tests/__init__.py +15 -0
  522. maxframe/tensor/indexing/tests/test_indexing.py +234 -0
  523. maxframe/tensor/indexing/unravel_index.py +103 -0
  524. maxframe/tensor/merge/__init__.py +15 -0
  525. maxframe/tensor/merge/stack.py +132 -0
  526. maxframe/tensor/merge/tests/__init__.py +13 -0
  527. maxframe/tensor/merge/tests/test_merge.py +52 -0
  528. maxframe/tensor/operators.py +123 -0
  529. maxframe/tensor/random/__init__.py +168 -0
  530. maxframe/tensor/random/beta.py +87 -0
  531. maxframe/tensor/random/binomial.py +137 -0
  532. maxframe/tensor/random/bytes.py +39 -0
  533. maxframe/tensor/random/chisquare.py +110 -0
  534. maxframe/tensor/random/choice.py +186 -0
  535. maxframe/tensor/random/core.py +234 -0
  536. maxframe/tensor/random/dirichlet.py +123 -0
  537. maxframe/tensor/random/exponential.py +94 -0
  538. maxframe/tensor/random/f.py +135 -0
  539. maxframe/tensor/random/gamma.py +128 -0
  540. maxframe/tensor/random/geometric.py +93 -0
  541. maxframe/tensor/random/gumbel.py +167 -0
  542. maxframe/tensor/random/hypergeometric.py +148 -0
  543. maxframe/tensor/random/laplace.py +133 -0
  544. maxframe/tensor/random/logistic.py +129 -0
  545. maxframe/tensor/random/lognormal.py +159 -0
  546. maxframe/tensor/random/logseries.py +122 -0
  547. maxframe/tensor/random/multinomial.py +133 -0
  548. maxframe/tensor/random/multivariate_normal.py +192 -0
  549. maxframe/tensor/random/negative_binomial.py +125 -0
  550. maxframe/tensor/random/noncentral_chisquare.py +132 -0
  551. maxframe/tensor/random/noncentral_f.py +126 -0
  552. maxframe/tensor/random/normal.py +143 -0
  553. maxframe/tensor/random/pareto.py +140 -0
  554. maxframe/tensor/random/permutation.py +104 -0
  555. maxframe/tensor/random/poisson.py +111 -0
  556. maxframe/tensor/random/power.py +142 -0
  557. maxframe/tensor/random/rand.py +82 -0
  558. maxframe/tensor/random/randint.py +121 -0
  559. maxframe/tensor/random/randn.py +96 -0
  560. maxframe/tensor/random/random_integers.py +123 -0
  561. maxframe/tensor/random/random_sample.py +86 -0
  562. maxframe/tensor/random/rayleigh.py +110 -0
  563. maxframe/tensor/random/shuffle.py +61 -0
  564. maxframe/tensor/random/standard_cauchy.py +105 -0
  565. maxframe/tensor/random/standard_exponential.py +72 -0
  566. maxframe/tensor/random/standard_gamma.py +120 -0
  567. maxframe/tensor/random/standard_normal.py +74 -0
  568. maxframe/tensor/random/standard_t.py +135 -0
  569. maxframe/tensor/random/tests/__init__.py +15 -0
  570. maxframe/tensor/random/tests/test_random.py +167 -0
  571. maxframe/tensor/random/triangular.py +119 -0
  572. maxframe/tensor/random/uniform.py +131 -0
  573. maxframe/tensor/random/vonmises.py +131 -0
  574. maxframe/tensor/random/wald.py +114 -0
  575. maxframe/tensor/random/weibull.py +140 -0
  576. maxframe/tensor/random/zipf.py +122 -0
  577. maxframe/tensor/rechunk/__init__.py +26 -0
  578. maxframe/tensor/rechunk/rechunk.py +43 -0
  579. maxframe/tensor/reduction/__init__.py +66 -0
  580. maxframe/tensor/reduction/all.py +103 -0
  581. maxframe/tensor/reduction/allclose.py +88 -0
  582. maxframe/tensor/reduction/any.py +105 -0
  583. maxframe/tensor/reduction/argmax.py +103 -0
  584. maxframe/tensor/reduction/argmin.py +103 -0
  585. maxframe/tensor/reduction/array_equal.py +64 -0
  586. maxframe/tensor/reduction/core.py +168 -0
  587. maxframe/tensor/reduction/count_nonzero.py +81 -0
  588. maxframe/tensor/reduction/cumprod.py +97 -0
  589. maxframe/tensor/reduction/cumsum.py +101 -0
  590. maxframe/tensor/reduction/max.py +120 -0
  591. maxframe/tensor/reduction/mean.py +123 -0
  592. maxframe/tensor/reduction/min.py +120 -0
  593. maxframe/tensor/reduction/nanargmax.py +82 -0
  594. maxframe/tensor/reduction/nanargmin.py +76 -0
  595. maxframe/tensor/reduction/nancumprod.py +91 -0
  596. maxframe/tensor/reduction/nancumsum.py +94 -0
  597. maxframe/tensor/reduction/nanmax.py +111 -0
  598. maxframe/tensor/reduction/nanmean.py +106 -0
  599. maxframe/tensor/reduction/nanmin.py +111 -0
  600. maxframe/tensor/reduction/nanprod.py +94 -0
  601. maxframe/tensor/reduction/nanstd.py +126 -0
  602. maxframe/tensor/reduction/nansum.py +115 -0
  603. maxframe/tensor/reduction/nanvar.py +149 -0
  604. maxframe/tensor/reduction/prod.py +130 -0
  605. maxframe/tensor/reduction/std.py +134 -0
  606. maxframe/tensor/reduction/sum.py +125 -0
  607. maxframe/tensor/reduction/tests/__init__.py +13 -0
  608. maxframe/tensor/reduction/tests/test_reduction.py +181 -0
  609. maxframe/tensor/reduction/var.py +176 -0
  610. maxframe/tensor/reshape/__init__.py +17 -0
  611. maxframe/tensor/reshape/reshape.py +188 -0
  612. maxframe/tensor/reshape/tests/__init__.py +15 -0
  613. maxframe/tensor/reshape/tests/test_reshape.py +37 -0
  614. maxframe/tensor/statistics/__init__.py +13 -0
  615. maxframe/tensor/statistics/percentile.py +175 -0
  616. maxframe/tensor/statistics/quantile.py +288 -0
  617. maxframe/tensor/ufunc/__init__.py +26 -0
  618. maxframe/tensor/ufunc/ufunc.py +200 -0
  619. maxframe/tensor/utils.py +718 -0
  620. maxframe/tests/__init__.py +13 -0
  621. maxframe/tests/test_codegen.py +69 -0
  622. maxframe/tests/test_protocol.py +144 -0
  623. maxframe/tests/test_utils.py +376 -0
  624. maxframe/tests/utils.py +164 -0
  625. maxframe/typing_.py +37 -0
  626. maxframe/udf.py +134 -0
  627. maxframe/utils.py +1114 -0
  628. maxframe-0.1.0b5.dist-info/METADATA +104 -0
  629. maxframe-0.1.0b5.dist-info/RECORD +647 -0
  630. maxframe-0.1.0b5.dist-info/WHEEL +5 -0
  631. maxframe-0.1.0b5.dist-info/top_level.txt +3 -0
  632. maxframe_client/__init__.py +17 -0
  633. maxframe_client/clients/__init__.py +13 -0
  634. maxframe_client/clients/framedriver.py +118 -0
  635. maxframe_client/clients/spe.py +104 -0
  636. maxframe_client/conftest.py +15 -0
  637. maxframe_client/fetcher.py +264 -0
  638. maxframe_client/session/__init__.py +22 -0
  639. maxframe_client/session/consts.py +36 -0
  640. maxframe_client/session/graph.py +119 -0
  641. maxframe_client/session/odps.py +482 -0
  642. maxframe_client/session/task.py +280 -0
  643. maxframe_client/session/tests/__init__.py +13 -0
  644. maxframe_client/session/tests/test_task.py +85 -0
  645. maxframe_client/tests/__init__.py +13 -0
  646. maxframe_client/tests/test_fetcher.py +89 -0
  647. maxframe_client/tests/test_session.py +255 -0
@@ -0,0 +1,1094 @@
1
+ # distutils: language = c++
2
+ # Copyright 1999-2024 Alibaba Group Holding Ltd.
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+
16
+ import asyncio
17
+ import datetime
18
+ import hashlib
19
+ import importlib
20
+ import re
21
+ from collections import OrderedDict
22
+ from functools import partial, wraps
23
+ from typing import Any, Dict, List, Optional, Union
24
+
25
+ import numpy as np
26
+ import pandas as pd
27
+
28
+ from cpython cimport PyObject
29
+ from libc.stdint cimport int32_t, int64_t, uint32_t, uint64_t, uintptr_t
30
+ from libcpp.unordered_map cimport unordered_map
31
+
32
+ from pandas.api.extensions import ExtensionDtype
33
+ from pandas.api.types import pandas_dtype
34
+
35
+ from .._utils import NamedType
36
+
37
+ from .._utils cimport TypeDispatcher
38
+
39
+ from ..lib import wrapped_pickle as pickle
40
+ from ..utils import arrow_type_from_str
41
+
42
+ try:
43
+ from pandas import ArrowDtype
44
+ except ImportError:
45
+ ArrowDtype = type(None)
46
+
47
+ # resolve pandas pickle compatibility between <1.2 and >=1.3
48
+ try:
49
+ from pandas.core.internals import blocks as pd_blocks
50
+ if not hasattr(pd_blocks, "new_block") and hasattr(pd_blocks, "make_block"):
51
+ # register missing func that would cause errors
52
+ pd_blocks.new_block = pd_blocks.make_block
53
+ except (ImportError, AttributeError):
54
+ pass
55
+
56
+ try:
57
+ import pytz
58
+ from pytz import BaseTzInfo as PyTZ_BaseTzInfo
59
+ except ImportError:
60
+ PyTZ_BaseTzInfo = type(None)
61
+ try:
62
+ import zoneinfo
63
+ from zoneinfo import ZoneInfo
64
+ except ImportError:
65
+ ZoneInfo = type(None)
66
+
67
+ BUFFER_PICKLE_PROTOCOL = max(pickle.DEFAULT_PROTOCOL, 5)
68
+ cdef bint HAS_PICKLE_BUFFER = pickle.HIGHEST_PROTOCOL >= 5
69
+ cdef bint _PANDAS_HAS_MGR = hasattr(pd.Series([0]), "_mgr")
70
+
71
+
72
+ cdef TypeDispatcher _serial_dispatcher = TypeDispatcher()
73
+ cdef dict _deserializers = dict()
74
+
75
+ cdef uint32_t _MAX_STR_PRIMITIVE_LEN = 1024
76
+ # prime modulus for serializer ids
77
+ # use the largest prime number smaller than 32767
78
+ cdef int32_t _SERIALIZER_ID_PRIME = 32749
79
+
80
+ # ids for basic serializers
81
+ cdef:
82
+ int PICKLE_SERIALIZER = 0
83
+ int PRIMITIVE_SERIALIZER = 1
84
+ int BYTES_SERIALIZER = 2
85
+ int STR_SERIALIZER = 3
86
+ int TUPLE_SERIALIZER = 4
87
+ int LIST_SERIALIZER = 5
88
+ int DICT_SERIALIZER = 6
89
+ int PY_DATETIME_SERIALIZER = 7
90
+ int PY_DATE_SERIALIZER = 8
91
+ int PY_TIMEDELTA_SERIALIZER = 9
92
+ int PY_TZINFO_SERIALIZER = 10
93
+ int DTYPE_SERIALIZER = 11
94
+ int COMPLEX_SERIALIZER = 12
95
+ int SLICE_SERIALIZER = 13
96
+ int REGEX_SERIALIZER = 14
97
+ int PLACEHOLDER_SERIALIZER = 4096
98
+
99
+
100
+ cdef dict _type_cache = dict()
101
+
102
+
103
+ cpdef object load_type(str class_name, object parent_class):
104
+ if class_name in _type_cache:
105
+ cls = _type_cache[class_name]
106
+ else:
107
+ try:
108
+ from .deserializer import safe_load_type
109
+
110
+ cls = safe_load_type(class_name, parent_class)
111
+ except ImportError:
112
+ if pickle.is_unpickle_forbidden():
113
+ raise
114
+
115
+ mod_name, cls_name = class_name.rsplit("#", 1)
116
+
117
+ cls = importlib.import_module(mod_name)
118
+ for sub_cls_name in cls_name.split("."):
119
+ cls = getattr(cls, sub_cls_name)
120
+ _type_cache[class_name] = cls
121
+
122
+ if not issubclass(cls, parent_class):
123
+ raise ValueError(f"Class {class_name} not a {parent_class}")
124
+ return cls
125
+
126
+
127
+ cdef Serializer get_deserializer(int32_t deserializer_id):
128
+ return _deserializers[deserializer_id]
129
+
130
+
131
+ cdef class Serializer:
132
+ serializer_id = None
133
+
134
+ def __cinit__(self):
135
+ # make the value can be referenced with C code
136
+ self._serializer_id = self.serializer_id
137
+
138
+ cpdef serial(self, object obj, dict context):
139
+ """
140
+ Returns intermediate serialization result of certain object.
141
+ The returned value can be a Placeholder or a tuple comprising
142
+ of three parts: a header, a group of subcomponents and
143
+ a finalizing flag.
144
+
145
+ * Header is a pickle-serializable tuple
146
+ * Subcomponents are parts or buffers for iterative
147
+ serialization.
148
+ * Flag is a boolean value. If true, subcomponents should be
149
+ buffers (for instance, bytes, memory views, GPU buffers,
150
+ etc.) that can be read and written directly. If false,
151
+ subcomponents will be serialized iteratively.
152
+
153
+ Parameters
154
+ ----------
155
+ obj: Any
156
+ Object to serialize
157
+ context: Dict
158
+ Serialization context to help creating Placeholder objects
159
+ for reducing duplicated serialization
160
+
161
+ Returns
162
+ -------
163
+ result: Placeholder | Tuple[Tuple, List, bool]
164
+ Intermediate result of serialization
165
+ """
166
+ raise NotImplementedError
167
+
168
+ cpdef deserial(self, list serialized, dict context, list subs):
169
+ """
170
+ Returns deserialized object given serialized headers and
171
+ deserialized subcomponents.
172
+
173
+ Parameters
174
+ ----------
175
+ serialized: List
176
+ Serialized object header as a tuple
177
+ context
178
+ Serialization context for instantiation of Placeholder
179
+ objects
180
+ subs: List
181
+ Deserialized subcomponents
182
+
183
+ Returns
184
+ -------
185
+ result: Any
186
+ Deserialized objects
187
+ """
188
+ raise NotImplementedError
189
+
190
+ cpdef on_deserial_error(
191
+ self,
192
+ list serialized,
193
+ dict context,
194
+ list subs_serialized,
195
+ int error_index,
196
+ object exc,
197
+ ):
198
+ """
199
+ Returns rewritten exception when subcomponent deserialization fails
200
+
201
+ Parameters
202
+ ----------
203
+ serialized: List
204
+ Serialized object header as a tuple
205
+ context
206
+ Serialization context for instantiation of Placeholder
207
+ objects
208
+ subs_serialized: List
209
+ Serialized subcomponents
210
+ error_index: int
211
+ Index of subcomponent causing error
212
+ exc: BaseException
213
+ Exception raised
214
+
215
+ Returns
216
+ -------
217
+ exc: BaseException | None
218
+ Rewritten exception. If None, original exception is kept.
219
+ """
220
+ return None
221
+
222
+ @classmethod
223
+ def calc_default_serializer_id(cls):
224
+ s = f"{cls.__module__}.{cls.__qualname__}"
225
+ h = hashlib.md5(s.encode())
226
+ return int(h.hexdigest(), 16) % _SERIALIZER_ID_PRIME
227
+
228
+ @classmethod
229
+ def register(cls, obj_type, name=None):
230
+ if (
231
+ cls.serializer_id is None
232
+ or cls.serializer_id == getattr(super(cls, cls), "serializer_id", None)
233
+ ):
234
+ # a class should have its own serializer_id
235
+ # inherited serializer_id not acceptable
236
+ cls.serializer_id = cls.calc_default_serializer_id()
237
+
238
+ inst = cls()
239
+ if name is not None:
240
+ obj_type = NamedType(name, obj_type)
241
+ _serial_dispatcher.register(obj_type, inst)
242
+ if _deserializers.get(cls.serializer_id) is not None:
243
+ assert type(_deserializers[cls.serializer_id]) is cls
244
+ else:
245
+ _deserializers[cls.serializer_id] = inst
246
+
247
+ @classmethod
248
+ def unregister(cls, obj_type, name=None):
249
+ if name is not None:
250
+ obj_type = NamedType(name, obj_type)
251
+ _serial_dispatcher.unregister(obj_type)
252
+ _deserializers.pop(cls.serializer_id, None)
253
+
254
+ @classmethod
255
+ def dump_handlers(cls):
256
+ return _serial_dispatcher.dump_handlers()
257
+
258
+ @classmethod
259
+ def load_handlers(cls, *args):
260
+ _serial_dispatcher.load_handlers(*args)
261
+
262
+
263
+ cdef inline uint64_t _fast_id(PyObject * obj) nogil:
264
+ return <uintptr_t>obj
265
+
266
+
267
+ def fast_id(obj):
268
+ """C version of id() used for serialization"""
269
+ return _fast_id(<PyObject *>obj)
270
+
271
+
272
+ def buffered(func):
273
+ """
274
+ Wrapper for serial() method to reduce duplicated serialization
275
+ """
276
+ @wraps(func)
277
+ def wrapped(self, obj: Any, dict context):
278
+ cdef uint64_t obj_id = _fast_id(<PyObject*>obj)
279
+ if obj_id in context:
280
+ return Placeholder(_fast_id(<PyObject*>obj))
281
+ else:
282
+ context[obj_id] = obj
283
+ return func(self, obj, context)
284
+
285
+ return wrapped
286
+
287
+
288
+ def pickle_buffers(obj):
289
+ cdef list buffers = [None]
290
+
291
+ if HAS_PICKLE_BUFFER:
292
+
293
+ def buffer_cb(x):
294
+ x = x.raw()
295
+ if x.ndim > 1:
296
+ # ravel n-d memoryview
297
+ x = x.cast(x.format)
298
+ buffers.append(memoryview(x))
299
+
300
+ buffers[0] = pickle.dumps(
301
+ obj,
302
+ buffer_callback=buffer_cb,
303
+ protocol=BUFFER_PICKLE_PROTOCOL,
304
+ )
305
+ else: # pragma: no cover
306
+ buffers[0] = pickle.dumps(obj)
307
+ return buffers
308
+
309
+
310
+ def unpickle_buffers(buffers):
311
+ result = pickle.loads(buffers[0], buffers=buffers[1:])
312
+
313
+ # as pandas prior to 1.1.0 use _data instead of _mgr to hold BlockManager,
314
+ # deserializing from high versions may produce mal-functioned pandas objects,
315
+ # thus the patch is needed
316
+ if _PANDAS_HAS_MGR:
317
+ return result
318
+ else: # pragma: no cover
319
+ if hasattr(result, "_mgr") and isinstance(result, (pd.DataFrame, pd.Series)):
320
+ result._data = getattr(result, "_mgr")
321
+ delattr(result, "_mgr")
322
+ return result
323
+
324
+
325
+ cdef class PickleContainer:
326
+ cdef:
327
+ list buffers
328
+
329
+ def __init__(self, list buffers):
330
+ self.buffers = buffers
331
+
332
+ cpdef get(self):
333
+ return unpickle_buffers(self.buffers)
334
+
335
+ cpdef list get_buffers(self):
336
+ return self.buffers
337
+
338
+
339
+ cdef class PickleSerializer(Serializer):
340
+ serializer_id = PICKLE_SERIALIZER
341
+
342
+ cpdef serial(self, obj: Any, dict context):
343
+ cdef uint64_t obj_id
344
+ obj_id = _fast_id(<PyObject*>obj)
345
+ if obj_id in context:
346
+ return Placeholder(obj_id)
347
+ context[obj_id] = obj
348
+
349
+ if type(obj) is PickleContainer:
350
+ return [], (<PickleContainer>obj).get_buffers(), True
351
+ return [], pickle_buffers(obj), True
352
+
353
+ cpdef deserial(self, list serialized, dict context, list subs):
354
+ from .deserializer import deserial_pickle
355
+
356
+ return deserial_pickle(serialized, context, subs)
357
+
358
+
359
+ cdef set _primitive_types = {
360
+ type(None),
361
+ bool,
362
+ int,
363
+ float,
364
+ }
365
+
366
+
367
+ cdef class PrimitiveSerializer(Serializer):
368
+ serializer_id = PRIMITIVE_SERIALIZER
369
+
370
+ cpdef serial(self, object obj, dict context):
371
+ return [obj,], [], True
372
+
373
+ cpdef deserial(self, list obj, dict context, list subs):
374
+ return obj[0]
375
+
376
+
377
+ cdef class BytesSerializer(Serializer):
378
+ serializer_id = BYTES_SERIALIZER
379
+
380
+ cpdef serial(self, obj: Any, dict context):
381
+ cdef uint64_t obj_id
382
+ obj_id = _fast_id(<PyObject*>obj)
383
+ if obj_id in context:
384
+ return Placeholder(obj_id)
385
+ context[obj_id] = obj
386
+
387
+ return [], [obj], True
388
+
389
+ cpdef deserial(self, list serialized, dict context, list subs):
390
+ return subs[0]
391
+
392
+
393
+ cdef class StrSerializer(Serializer):
394
+ serializer_id = STR_SERIALIZER
395
+
396
+ cpdef serial(self, obj: Any, dict context):
397
+ cdef uint64_t obj_id
398
+ obj_id = _fast_id(<PyObject*>obj)
399
+ if obj_id in context:
400
+ return Placeholder(obj_id)
401
+ context[obj_id] = obj
402
+
403
+ return [], [(<str>obj).encode()], True
404
+
405
+ cpdef deserial(self, list serialized, dict context, list subs):
406
+ buffer = subs[0]
407
+ if type(buffer) is memoryview:
408
+ buffer = buffer.tobytes()
409
+ return buffer.decode()
410
+
411
+
412
+ cdef class CollectionSerializer(Serializer):
413
+ obj_type = None
414
+
415
+ cdef object _obj_type
416
+
417
+ def __cinit__(self):
418
+ # make the value can be referenced with C code
419
+ self._obj_type = self.obj_type
420
+
421
+ cdef tuple _serial_iterable(self, obj: Any):
422
+ cdef list idx_to_propagate = []
423
+ cdef list obj_to_propagate = []
424
+ cdef list obj_list = <list>obj if type(obj) is list else list(obj)
425
+ cdef int64_t idx
426
+ cdef object item
427
+
428
+ for idx in range(len(obj_list)):
429
+ item = obj_list[idx]
430
+
431
+ if type(item) is bytes and len(<bytes>item) < _MAX_STR_PRIMITIVE_LEN:
432
+ # treat short strings as primitives
433
+ continue
434
+ elif type(item) is str and len(<str>item) < _MAX_STR_PRIMITIVE_LEN:
435
+ # treat short strings as primitives
436
+ continue
437
+ elif type(item) in _primitive_types:
438
+ continue
439
+
440
+ if obj is obj_list:
441
+ obj_list = list(obj)
442
+
443
+ obj_list[idx] = None
444
+ idx_to_propagate.append(idx)
445
+ obj_to_propagate.append(item)
446
+
447
+ return [obj_list, idx_to_propagate], obj_to_propagate, False
448
+
449
+ cpdef serial(self, obj: Any, dict context):
450
+ cdef uint64_t obj_id
451
+ obj_id = _fast_id(<PyObject*>obj)
452
+ if obj_id in context:
453
+ return Placeholder(obj_id)
454
+ context[obj_id] = obj
455
+
456
+ return self._serial_iterable(obj)
457
+
458
+ cdef list _deserial_iterable(self, list serialized, list subs):
459
+ cdef list res_list, idx_to_propagate
460
+ cdef int64_t i
461
+
462
+ res_list, idx_to_propagate = serialized
463
+
464
+ for i in range(len(idx_to_propagate)):
465
+ res_list[idx_to_propagate[i]] = subs[i]
466
+ return res_list
467
+
468
+
469
+ cdef class TupleSerializer(CollectionSerializer):
470
+ serializer_id = TUPLE_SERIALIZER
471
+ obj_type = tuple
472
+
473
+ cpdef serial(self, obj: Any, dict context):
474
+ cdef uint64_t obj_id
475
+ cdef list header
476
+ cdef object data, is_leaf
477
+
478
+ obj_id = _fast_id(<PyObject*>obj)
479
+ if obj_id in context:
480
+ return Placeholder(obj_id)
481
+ context[obj_id] = obj
482
+
483
+ header, data, is_leaf = self._serial_iterable(obj)
484
+ if hasattr(type(obj), "_fields"):
485
+ header.append(type(obj).__module__ + "#" + type(obj).__qualname__)
486
+ else:
487
+ header.append(None)
488
+ return header, data, is_leaf
489
+
490
+ cpdef deserial(self, list serialized, dict context, list subs):
491
+ cdef list res
492
+ cdef str tuple_type_name = serialized[-1]
493
+
494
+ res = self._deserial_iterable(serialized[:-1], subs)
495
+ for v in res:
496
+ assert type(v) is not Placeholder
497
+
498
+ if tuple_type_name is None:
499
+ return tuple(res)
500
+ else:
501
+ tuple_type = load_type(tuple_type_name, tuple)
502
+ return tuple_type(*res)
503
+
504
+
505
+ cdef class ListSerializer(CollectionSerializer):
506
+ serializer_id = LIST_SERIALIZER
507
+ obj_type = list
508
+
509
+ cpdef deserial(self, list serialized, dict context, list subs):
510
+ cdef int64_t idx
511
+ cdef list res = self._deserial_iterable(serialized, subs)
512
+
513
+ result = list(res)
514
+
515
+ for idx, v in enumerate(res):
516
+ if type(v) is Placeholder:
517
+ cb = partial(result.__setitem__, idx)
518
+ (<Placeholder>v).callbacks.append(cb)
519
+ return result
520
+
521
+
522
+ def _dict_key_replacer(ret, key, real_key):
523
+ ret[real_key] = ret.pop(key)
524
+
525
+
526
+ def _dict_value_replacer(context, ret, key, real_value):
527
+ if type(key) is Placeholder:
528
+ key = context[(<Placeholder>key).id]
529
+ ret[key] = real_value
530
+
531
+
532
+ cdef:
533
+ object _TYPE_CHAR_ORDERED_DICT = "O"
534
+
535
+
536
+ cdef class DictSerializer(CollectionSerializer):
537
+ serializer_id = DICT_SERIALIZER
538
+
539
+ cpdef serial(self, obj: Any, dict context):
540
+ cdef uint64_t obj_id
541
+ cdef list key_obj, value_obj
542
+ cdef list key_bufs, value_bufs
543
+
544
+ if type(obj) is dict and len(<dict>obj) == 0:
545
+ return [], [], True
546
+
547
+ obj_id = _fast_id(<PyObject*>obj)
548
+ if obj_id in context:
549
+ return Placeholder(obj_id)
550
+ context[obj_id] = obj
551
+
552
+ if isinstance(obj, OrderedDict):
553
+ ser_type = _TYPE_CHAR_ORDERED_DICT
554
+ else:
555
+ ser_type = None
556
+
557
+ key_obj, key_bufs, _ = self._serial_iterable(obj.keys())
558
+ value_obj, value_bufs, _ = self._serial_iterable(obj.values())
559
+ ser_obj = [key_obj, value_obj, len(key_bufs), ser_type]
560
+ return ser_obj, key_bufs + value_bufs, False
561
+
562
+ cpdef deserial(self, list serialized, dict context, list subs):
563
+ cdef int64_t i, num_key_bufs
564
+ cdef list key_subs, value_subs, keys, values
565
+
566
+ if not serialized:
567
+ return {}
568
+ if len(serialized) == 1:
569
+ # serialized directly
570
+ return serialized[0]
571
+
572
+ key_serialized, value_serialized, num_key_bufs, ser_type = serialized
573
+ key_subs = subs[:num_key_bufs]
574
+ value_subs = subs[num_key_bufs:]
575
+
576
+ keys = self._deserial_iterable(<list>key_serialized, key_subs)
577
+ values = self._deserial_iterable(<list>value_serialized, value_subs)
578
+
579
+ if ser_type == _TYPE_CHAR_ORDERED_DICT:
580
+ ret = OrderedDict(zip(keys, values))
581
+ else:
582
+ ret = dict(zip(keys, values))
583
+
584
+ for i in range(len(keys)):
585
+ k, v = keys[i], values[i]
586
+ if type(k) is Placeholder:
587
+ (<Placeholder>k).callbacks.append(
588
+ partial(_dict_key_replacer, ret, k)
589
+ )
590
+ if type(v) is Placeholder:
591
+ (<Placeholder>v).callbacks.append(
592
+ partial(_dict_value_replacer, context, ret, k)
593
+ )
594
+ return ret
595
+
596
+
597
+ cdef class PyDatetimeSerializer(Serializer):
598
+ serializer_id = PY_DATETIME_SERIALIZER
599
+
600
+ cpdef serial(self, obj: datetime.datetime, dict context):
601
+ cdef list ser_tz = (
602
+ _serial_tz(obj.tzinfo) if obj.tzinfo is not None else None
603
+ )
604
+ return [obj.timestamp(), ser_tz], [], True
605
+
606
+ cpdef deserial(self, list serialized, dict context, list subs):
607
+ cdef object tz = (
608
+ _deserialize_tz(serialized[1]) if serialized[1] is not None else None
609
+ )
610
+ return datetime.datetime.fromtimestamp(serialized[0], tz)
611
+
612
+
613
+ cdef class PyDateSerializer(Serializer):
614
+ serializer_id = PY_DATE_SERIALIZER
615
+
616
+ cpdef serial(self, obj: datetime.date, dict context):
617
+ return [obj.toordinal()], [], True
618
+
619
+ cpdef deserial(self, list serialized, dict context, list subs):
620
+ return datetime.date.fromordinal(serialized[0])
621
+
622
+
623
+ cdef class PyTimedeltaSerializer(Serializer):
624
+ serializer_id = PY_TIMEDELTA_SERIALIZER
625
+
626
+ cpdef serial(self, obj: datetime.timedelta, dict context):
627
+ return [obj.days, obj.seconds, obj.microseconds], [], True
628
+
629
+ cpdef deserial(self, list serialized, dict context, list subs):
630
+ return datetime.timedelta(
631
+ days=serialized[0],
632
+ seconds=serialized[1],
633
+ microseconds=serialized[2],
634
+ )
635
+
636
+
637
+ cdef:
638
+ object _TYPE_CHAR_TZ_BASE = "S"
639
+ object _TYPE_CHAR_TZ_ZONEINFO = "ZI"
640
+ object _TYPE_CHAR_TZ_PYTZ = "PT"
641
+
642
+
643
+ cdef inline list _serial_tz(
644
+ obj: datetime.tzinfo, dt: Optional[datetime.datetime] = None
645
+ ):
646
+ cdef object type_char
647
+ if isinstance(obj, PyTZ_BaseTzInfo):
648
+ return [_TYPE_CHAR_TZ_PYTZ, obj.zone]
649
+ elif isinstance(obj, ZoneInfo):
650
+ return [_TYPE_CHAR_TZ_ZONEINFO, obj.key]
651
+ else:
652
+ dt = dt or datetime.datetime.now()
653
+ return [
654
+ _TYPE_CHAR_TZ_BASE,
655
+ obj.tzname(dt),
656
+ int(obj.utcoffset(dt).total_seconds()),
657
+ ]
658
+
659
+
660
+ cdef inline object _deserialize_tz(list serialized):
661
+ if serialized[0] == _TYPE_CHAR_TZ_PYTZ:
662
+ return pytz.timezone(serialized[1])
663
+ elif serialized[0] == _TYPE_CHAR_TZ_ZONEINFO:
664
+ return zoneinfo.ZoneInfo(serialized[1])
665
+ else:
666
+ if serialized[2] == 0:
667
+ return datetime.timezone.utc
668
+ return datetime.timezone(
669
+ datetime.timedelta(seconds=serialized[2]), name=serialized[1]
670
+ )
671
+
672
+
673
+ cdef class TZInfoSerializer(Serializer):
674
+ serializer_id = PY_TZINFO_SERIALIZER
675
+
676
+ cpdef serial(self, object obj: datetime.tzinfo, dict context):
677
+ return _serial_tz(obj), [], True
678
+
679
+ cpdef deserial(self, list serialized, dict context, list subs):
680
+ return _deserialize_tz(serialized)
681
+
682
+
683
+ cdef:
684
+ object _TYPE_CHAR_DTYPE_NUMPY = "N"
685
+ object _TYPE_CHAR_DTYPE_PANDAS_ARROW = "PA"
686
+ object _TYPE_CHAR_DTYPE_PANDAS_CATEGORICAL = "PC"
687
+ object _TYPE_CHAR_DTYPE_PANDAS_INTERVAL = "PI"
688
+ object _TYPE_CHAR_DTYPE_PANDAS_EXTENSION = "PE"
689
+
690
+
691
+ cdef class DtypeSerializer(Serializer):
692
+ serializer_id = DTYPE_SERIALIZER
693
+
694
+ @staticmethod
695
+ def _sort_fields(list fields):
696
+ return sorted(fields, key=lambda k: fields[k][1])
697
+
698
+ cpdef serial(self, obj: Union[np.dtype, ExtensionDtype], dict context):
699
+ if isinstance(obj, np.dtype):
700
+ try:
701
+ return [
702
+ _TYPE_CHAR_DTYPE_NUMPY, np.lib.format.dtype_to_descr(obj), None
703
+ ], [], True
704
+ except ValueError:
705
+ fields = obj.fields
706
+ new_fields = self._sort_fields(fields)
707
+ desc = np.lib.format.dtype_to_descr(obj[new_fields])
708
+ dtype_new_order = list(fields)
709
+ return [_TYPE_CHAR_DTYPE_NUMPY, desc, dtype_new_order], [], True
710
+ elif isinstance(obj, ExtensionDtype):
711
+ if isinstance(obj, ArrowDtype):
712
+ return [_TYPE_CHAR_DTYPE_PANDAS_ARROW, str(obj.pyarrow_dtype)], [], True
713
+ elif isinstance(obj, pd.CategoricalDtype):
714
+ return [
715
+ _TYPE_CHAR_DTYPE_PANDAS_CATEGORICAL, obj.ordered
716
+ ], [obj.categories], False
717
+ elif isinstance(obj, pd.IntervalDtype):
718
+ return [
719
+ _TYPE_CHAR_DTYPE_PANDAS_INTERVAL, obj.closed
720
+ ], [obj.subdtype], False
721
+ else:
722
+ return [_TYPE_CHAR_DTYPE_PANDAS_EXTENSION, repr(obj)], [], True
723
+ else:
724
+ raise NotImplementedError(f"Does not support serializing dtype {obj!r}")
725
+
726
+ cpdef deserial(self, list serialized, dict context, list subs):
727
+ cdef str ser_type = serialized[0]
728
+ if ser_type == _TYPE_CHAR_DTYPE_NUMPY:
729
+ try:
730
+ dt = np.lib.format.descr_to_dtype(serialized[1])
731
+ except AttributeError:
732
+ dt = np.dtype(serialized[1])
733
+
734
+ if serialized[2] is not None:
735
+ # fill dtype_new_order field
736
+ dt = dt[serialized[2]]
737
+ return dt
738
+ elif ser_type == _TYPE_CHAR_DTYPE_PANDAS_ARROW:
739
+ return ArrowDtype(arrow_type_from_str(serialized[1]))
740
+ elif ser_type == _TYPE_CHAR_DTYPE_PANDAS_CATEGORICAL:
741
+ return pd.CategoricalDtype(subs[0], serialized[1])
742
+ elif ser_type == _TYPE_CHAR_DTYPE_PANDAS_INTERVAL:
743
+ return pd.IntervalDtype(subs[0], serialized[1])
744
+ elif ser_type == _TYPE_CHAR_DTYPE_PANDAS_EXTENSION:
745
+ return pandas_dtype(serialized[1])
746
+ else:
747
+ raise NotImplementedError(f"Unknown serialization type {ser_type}")
748
+
749
+
750
+ cdef class ComplexSerializer(Serializer):
751
+ serializer_id = COMPLEX_SERIALIZER
752
+
753
+ cpdef serial(self, object obj: complex, dict context):
754
+ cdef complex cplx = <complex>obj
755
+ return [cplx.real, cplx.imag], [], True
756
+
757
+ cpdef deserial(self, list serialized, dict context, list subs):
758
+ return complex(*serialized[:2])
759
+
760
+
761
+ cdef class SliceSerializer(Serializer):
762
+ serializer_id = SLICE_SERIALIZER
763
+
764
+ cpdef serial(self, object obj: slice, dict context):
765
+ return [obj.start, obj.stop, obj.step], [], True
766
+
767
+ cpdef deserial(self, list serialized, dict context, list subs):
768
+ return slice(*serialized[:3])
769
+
770
+
771
+ cdef class RegexSerializer(Serializer):
772
+ serializer_id = REGEX_SERIALIZER
773
+
774
+ cpdef serial(self, object obj: re.Pattern, dict context):
775
+ cdef uint64_t obj_id
776
+ obj_id = _fast_id(<PyObject*>obj)
777
+ if obj_id in context:
778
+ return Placeholder(obj_id)
779
+ context[obj_id] = obj
780
+
781
+ return [obj.flags], [(<str>(obj.pattern)).encode()], True
782
+
783
+ cpdef deserial(self, list serialized, dict context, list subs):
784
+ return re.compile((<bytes>(subs[0])).decode(), serialized[0])
785
+
786
+
787
+ cdef class Placeholder:
788
+ """
789
+ Placeholder object to reduce duplicated serialization
790
+
791
+ The object records object identifier and keeps callbacks
792
+ to replace itself in parent objects.
793
+ """
794
+ def __init__(self, uint64_t id_):
795
+ self.id = id_
796
+ self.callbacks = []
797
+
798
+ def __hash__(self):
799
+ return self.id
800
+
801
+ def __eq__(self, other): # pragma: no cover
802
+ if type(other) is not Placeholder:
803
+ return False
804
+ return self.id == other.id
805
+
806
+ def __repr__(self):
807
+ return (
808
+ f"Placeholder(id={self.id}, "
809
+ f"callbacks=[list of {len(self.callbacks)}])"
810
+ )
811
+
812
+
813
+ cdef class PlaceholderSerializer(Serializer):
814
+ serializer_id = PLACEHOLDER_SERIALIZER
815
+
816
+ cpdef serial(self, obj: Any, dict context):
817
+ return [], [], True
818
+
819
+ cpdef deserial(self, list serialized, dict context, list subs):
820
+ return Placeholder(0)
821
+
822
+
823
+ PickleSerializer.register(object)
824
+ for _primitive in _primitive_types:
825
+ PrimitiveSerializer.register(_primitive)
826
+ BytesSerializer.register(bytes)
827
+ BytesSerializer.register(memoryview)
828
+ StrSerializer.register(str)
829
+ ListSerializer.register(list)
830
+ TupleSerializer.register(tuple)
831
+ DictSerializer.register(dict)
832
+ PyDatetimeSerializer.register(datetime.datetime)
833
+ PyDateSerializer.register(datetime.date)
834
+ PyTimedeltaSerializer.register(datetime.timedelta)
835
+ TZInfoSerializer.register(datetime.tzinfo)
836
+ DtypeSerializer.register(np.dtype)
837
+ DtypeSerializer.register(ExtensionDtype)
838
+ ComplexSerializer.register(complex)
839
+ SliceSerializer.register(slice)
840
+ RegexSerializer.register(re.Pattern)
841
+ PlaceholderSerializer.register(Placeholder)
842
+
843
+
844
+ cdef class _SerialStackItem:
845
+ cdef public list serialized
846
+ cdef public list subs
847
+ cdef public list subs_serialized
848
+
849
+ def __cinit__(self, list serialized, list subs):
850
+ self.serialized = serialized
851
+ self.subs = subs
852
+ self.subs_serialized = []
853
+
854
+
855
+ cdef class _IdContextHolder:
856
+ cdef public unordered_map[uint64_t, uint64_t] d
857
+ cdef public uint64_t obj_count
858
+
859
+ def __cinit__(self):
860
+ self.obj_count = 0
861
+
862
+
863
+ cdef tuple _serial_single(
864
+ obj, dict context, _IdContextHolder id_context_holder
865
+ ):
866
+ """Serialize single object and return serialized tuples"""
867
+ cdef uint64_t obj_id, ordered_id
868
+ cdef Serializer serializer
869
+ cdef int serializer_id
870
+ cdef list common_header, serialized, subs
871
+
872
+ while True:
873
+ name = context.get("serializer")
874
+ obj_type = type(obj) if name is None else NamedType(name, type(obj))
875
+ serializer = _serial_dispatcher.get_handler(obj_type)
876
+ serializer_id = serializer._serializer_id
877
+ ret_serial = serializer.serial(obj, context)
878
+ if type(ret_serial) is tuple:
879
+ # object is serialized, form a common header and return
880
+ serialized, subs, final = <tuple>ret_serial
881
+
882
+ if type(obj) is Placeholder:
883
+ obj_id = (<Placeholder>obj).id
884
+ ordered_id = id_context_holder.d[obj_id]
885
+ else:
886
+ ordered_id = id_context_holder.obj_count
887
+ id_context_holder.obj_count += 1
888
+ # only need to record object ids for non-primitive types
889
+ if serializer_id != PRIMITIVE_SERIALIZER:
890
+ obj_id = _fast_id(<PyObject*>obj)
891
+ id_context_holder.d[obj_id] = ordered_id
892
+
893
+ # REMEMBER to change _COMMON_HEADER_LEN when content of
894
+ # this header changed
895
+ common_header = [
896
+ serializer_id, ordered_id, len(subs), final
897
+ ]
898
+ break
899
+ else:
900
+ # object is converted into another (usually a Placeholder)
901
+ obj = ret_serial
902
+ common_header.extend(serialized)
903
+ return common_header, subs, final
904
+
905
+
906
+ class _SerializeObjectOverflow(Exception):
907
+ def __init__(self, list cur_serialized, int num_total_serialized):
908
+ super(_SerializeObjectOverflow, self).__init__(cur_serialized)
909
+ self.cur_serialized = cur_serialized
910
+ self.num_total_serialized = num_total_serialized
911
+
912
+
913
+ cpdef object _serialize_with_stack(
914
+ list serial_stack,
915
+ list serialized,
916
+ dict context,
917
+ _IdContextHolder id_context_holder,
918
+ list result_bufs_list,
919
+ int64_t num_overflow = 0,
920
+ int64_t num_total_serialized = 0,
921
+ ):
922
+ cdef _SerialStackItem stack_item
923
+ cdef list subs
924
+ cdef bint final
925
+ cdef int64_t num_sub_serialized
926
+ cdef bint is_resume = num_total_serialized > 0
927
+
928
+ while serial_stack:
929
+ stack_item = serial_stack[-1]
930
+ if serialized is not None:
931
+ # have previously-serialized results, record first
932
+ stack_item.subs_serialized.append(serialized)
933
+
934
+ num_sub_serialized = len(stack_item.subs_serialized)
935
+ if len(stack_item.subs) == num_sub_serialized:
936
+ # all subcomponents serialized, serialization of current is done
937
+ # and we can move to the parent object
938
+ serialized = stack_item.serialized + stack_item.subs_serialized
939
+ num_total_serialized += 1
940
+ serial_stack.pop()
941
+ else:
942
+ # serialize next subcomponent at stack top
943
+ serialized, subs, final = _serial_single(
944
+ stack_item.subs[num_sub_serialized], context, id_context_holder
945
+ )
946
+ num_total_serialized += 1
947
+ if final or not subs:
948
+ # the subcomponent is a leaf
949
+ if subs:
950
+ result_bufs_list.extend(subs)
951
+ else:
952
+ # the subcomponent has its own subcomponents, we push itself
953
+ # into stack and process its children
954
+ stack_item = _SerialStackItem(serialized, subs)
955
+ serial_stack.append(stack_item)
956
+ # note that the serialized header should not be recorded
957
+ # as we are now processing the subcomponent itself
958
+ serialized = None
959
+ if 0 < num_overflow < num_total_serialized:
960
+ raise _SerializeObjectOverflow(serialized, num_total_serialized)
961
+
962
+ # we keep an empty dict for extra metas required for other modules
963
+ if is_resume:
964
+ # returns num of deserialized objects when resumed
965
+ extra_meta = {"_N": num_total_serialized}
966
+ else:
967
+ # otherwise does not record the number to reduce result size
968
+ extra_meta = {}
969
+ return [extra_meta, serialized], result_bufs_list
970
+
971
+
972
+ def serialize(obj, dict context = None):
973
+ """
974
+ Serialize an object and return a header and buffers.
975
+ Buffers are intended for zero-copy data manipulation.
976
+
977
+ Parameters
978
+ ----------
979
+ obj: Any
980
+ Object to serialize
981
+ context:
982
+ Serialization context for instantiation of Placeholder
983
+ objects
984
+
985
+ Returns
986
+ -------
987
+ result: Tuple[Tuple, List]
988
+ Picklable header and buffers
989
+ """
990
+ cdef list serial_stack = []
991
+ cdef list result_bufs_list = []
992
+ cdef list serialized
993
+ cdef list subs
994
+ cdef bint final
995
+ cdef _IdContextHolder id_context_holder = _IdContextHolder()
996
+
997
+ context = context if context is not None else dict()
998
+ serialized, subs, final = _serial_single(obj, context, id_context_holder)
999
+ if final or not subs:
1000
+ # marked as a leaf node, return directly
1001
+ return [{}, serialized], subs
1002
+
1003
+ serial_stack.append(_SerialStackItem(serialized, subs))
1004
+ return _serialize_with_stack(
1005
+ serial_stack, None, context, id_context_holder, result_bufs_list
1006
+ )
1007
+
1008
+
1009
+ async def serialize_with_spawn(
1010
+ obj, dict context = None, int spawn_threshold = 100, object executor = None
1011
+ ):
1012
+ """
1013
+ Serialize an object and return a header and buffers.
1014
+ Buffers are intended for zero-copy data manipulation.
1015
+
1016
+ Parameters
1017
+ ----------
1018
+ obj: Any
1019
+ Object to serialize
1020
+ context: Dict
1021
+ Serialization context for instantiation of Placeholder
1022
+ objects
1023
+ spawn_threshold: int
1024
+ Threshold to spawn into a ThreadPoolExecutor
1025
+ executor: ThreadPoolExecutor
1026
+ ThreadPoolExecutor to spawn rest serialization into
1027
+
1028
+ Returns
1029
+ -------
1030
+ result: Tuple[Tuple, List]
1031
+ Picklable header and buffers
1032
+ """
1033
+ cdef list serial_stack = []
1034
+ cdef list result_bufs_list = []
1035
+ cdef list serialized
1036
+ cdef list subs
1037
+ cdef bint final
1038
+ cdef _IdContextHolder id_context_holder = _IdContextHolder()
1039
+
1040
+ context = context if context is not None else dict()
1041
+ serialized, subs, final = _serial_single(obj, context, id_context_holder)
1042
+ if final or not subs:
1043
+ # marked as a leaf node, return directly
1044
+ return [{}, serialized], subs
1045
+
1046
+ serial_stack.append(_SerialStackItem(serialized, subs))
1047
+
1048
+ try:
1049
+ result = _serialize_with_stack(
1050
+ serial_stack, None, context, id_context_holder, result_bufs_list, spawn_threshold
1051
+ )
1052
+ except _SerializeObjectOverflow as ex:
1053
+ result = await asyncio.get_running_loop().run_in_executor(
1054
+ executor,
1055
+ _serialize_with_stack,
1056
+ serial_stack,
1057
+ ex.cur_serialized,
1058
+ context,
1059
+ id_context_holder,
1060
+ result_bufs_list,
1061
+ 0,
1062
+ ex.num_total_serialized,
1063
+ )
1064
+ return result
1065
+
1066
+
1067
+ cdef object deserialize_impl
1068
+
1069
+
1070
+ def deserialize(list serialized, list buffers, dict context = None):
1071
+ """
1072
+ Deserialize an object with serialized headers and buffers
1073
+
1074
+ Parameters
1075
+ ----------
1076
+ serialized: List
1077
+ Serialized object header
1078
+ buffers: List
1079
+ List of buffers extracted from serialize() calls
1080
+ context: Dict
1081
+ Serialization context for replacing Placeholder
1082
+ objects
1083
+
1084
+ Returns
1085
+ -------
1086
+ result: Any
1087
+ Deserialized object
1088
+ """
1089
+ global deserialize_impl
1090
+
1091
+ if deserialize_impl is None:
1092
+ from .deserializer import deserialize as deserialize_impl
1093
+
1094
+ return deserialize_impl(serialized, buffers, context)