datasketches 0.3.2 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (237) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +6 -0
  3. data/NOTICE +1 -1
  4. data/README.md +0 -2
  5. data/ext/datasketches/cpc_wrapper.cpp +2 -2
  6. data/ext/datasketches/kll_wrapper.cpp +0 -10
  7. data/lib/datasketches/version.rb +1 -1
  8. data/lib/datasketches.rb +1 -1
  9. data/vendor/datasketches-cpp/CMakeLists.txt +1 -0
  10. data/vendor/datasketches-cpp/CODE_OF_CONDUCT.md +3 -0
  11. data/vendor/datasketches-cpp/CONTRIBUTING.md +50 -0
  12. data/vendor/datasketches-cpp/Doxyfile +2827 -0
  13. data/vendor/datasketches-cpp/LICENSE +0 -76
  14. data/vendor/datasketches-cpp/NOTICE +1 -1
  15. data/vendor/datasketches-cpp/README.md +1 -3
  16. data/vendor/datasketches-cpp/common/CMakeLists.txt +12 -11
  17. data/vendor/datasketches-cpp/common/include/common_defs.hpp +11 -8
  18. data/vendor/datasketches-cpp/common/include/count_zeros.hpp +0 -2
  19. data/vendor/datasketches-cpp/common/include/kolmogorov_smirnov.hpp +9 -6
  20. data/vendor/datasketches-cpp/common/include/optional.hpp +148 -0
  21. data/vendor/datasketches-cpp/common/include/quantiles_sorted_view.hpp +95 -2
  22. data/vendor/datasketches-cpp/common/include/quantiles_sorted_view_impl.hpp +1 -1
  23. data/vendor/datasketches-cpp/common/include/serde.hpp +69 -20
  24. data/vendor/datasketches-cpp/common/test/CMakeLists.txt +1 -1
  25. data/vendor/datasketches-cpp/common/test/optional_test.cpp +85 -0
  26. data/vendor/datasketches-cpp/common/test/test_allocator.hpp +14 -14
  27. data/vendor/datasketches-cpp/count/include/count_min.hpp +132 -78
  28. data/vendor/datasketches-cpp/count/include/count_min_impl.hpp +132 -152
  29. data/vendor/datasketches-cpp/count/test/CMakeLists.txt +11 -12
  30. data/vendor/datasketches-cpp/count/test/count_min_allocation_test.cpp +61 -61
  31. data/vendor/datasketches-cpp/count/test/count_min_test.cpp +175 -178
  32. data/vendor/datasketches-cpp/cpc/include/cpc_common.hpp +14 -20
  33. data/vendor/datasketches-cpp/cpc/include/cpc_compressor.hpp +7 -4
  34. data/vendor/datasketches-cpp/cpc/include/cpc_compressor_impl.hpp +17 -17
  35. data/vendor/datasketches-cpp/cpc/include/cpc_sketch.hpp +40 -40
  36. data/vendor/datasketches-cpp/cpc/include/cpc_sketch_impl.hpp +13 -10
  37. data/vendor/datasketches-cpp/cpc/include/cpc_union.hpp +35 -11
  38. data/vendor/datasketches-cpp/cpc/include/cpc_union_impl.hpp +8 -8
  39. data/vendor/datasketches-cpp/cpc/include/u32_table.hpp +3 -2
  40. data/vendor/datasketches-cpp/cpc/include/u32_table_impl.hpp +5 -5
  41. data/vendor/datasketches-cpp/cpc/test/CMakeLists.txt +20 -7
  42. data/vendor/datasketches-cpp/cpc/test/cpc_sketch_deserialize_from_java_test.cpp +60 -0
  43. data/vendor/datasketches-cpp/{python/include/py_object_lt.hpp → cpc/test/cpc_sketch_serialize_for_java.cpp} +15 -14
  44. data/vendor/datasketches-cpp/cpc/test/cpc_sketch_test.cpp +4 -29
  45. data/vendor/datasketches-cpp/cpc/test/cpc_union_test.cpp +4 -4
  46. data/vendor/datasketches-cpp/density/include/density_sketch.hpp +29 -9
  47. data/vendor/datasketches-cpp/density/include/density_sketch_impl.hpp +1 -1
  48. data/vendor/datasketches-cpp/density/test/CMakeLists.txt +0 -1
  49. data/vendor/datasketches-cpp/fi/include/frequent_items_sketch.hpp +21 -9
  50. data/vendor/datasketches-cpp/fi/include/frequent_items_sketch_impl.hpp +6 -4
  51. data/vendor/datasketches-cpp/fi/test/CMakeLists.txt +14 -1
  52. data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_deserialize_from_java_test.cpp +95 -0
  53. data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_serialize_for_java.cpp +83 -0
  54. data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_test.cpp +3 -42
  55. data/vendor/datasketches-cpp/hll/include/CouponList-internal.hpp +2 -2
  56. data/vendor/datasketches-cpp/hll/include/CouponList.hpp +3 -1
  57. data/vendor/datasketches-cpp/hll/include/HllArray-internal.hpp +3 -3
  58. data/vendor/datasketches-cpp/hll/include/HllArray.hpp +5 -3
  59. data/vendor/datasketches-cpp/hll/include/HllSketch-internal.hpp +4 -4
  60. data/vendor/datasketches-cpp/hll/include/HllSketchImpl.hpp +3 -1
  61. data/vendor/datasketches-cpp/hll/include/HllUtil.hpp +0 -12
  62. data/vendor/datasketches-cpp/hll/include/hll.hpp +70 -57
  63. data/vendor/datasketches-cpp/hll/test/CMakeLists.txt +14 -1
  64. data/vendor/datasketches-cpp/hll/test/ToFromByteArrayTest.cpp +0 -68
  65. data/vendor/datasketches-cpp/hll/test/hll_sketch_deserialize_from_java_test.cpp +69 -0
  66. data/vendor/datasketches-cpp/hll/test/hll_sketch_serialize_for_java.cpp +52 -0
  67. data/vendor/datasketches-cpp/kll/include/kll_helper_impl.hpp +2 -2
  68. data/vendor/datasketches-cpp/kll/include/kll_sketch.hpp +71 -50
  69. data/vendor/datasketches-cpp/kll/include/kll_sketch_impl.hpp +59 -130
  70. data/vendor/datasketches-cpp/kll/test/CMakeLists.txt +14 -1
  71. data/vendor/datasketches-cpp/kll/test/kll_sketch_deserialize_from_java_test.cpp +103 -0
  72. data/vendor/datasketches-cpp/kll/test/kll_sketch_serialize_for_java.cpp +62 -0
  73. data/vendor/datasketches-cpp/kll/test/kll_sketch_test.cpp +3 -38
  74. data/vendor/datasketches-cpp/quantiles/include/quantiles_sketch.hpp +68 -51
  75. data/vendor/datasketches-cpp/quantiles/include/quantiles_sketch_impl.hpp +62 -132
  76. data/vendor/datasketches-cpp/quantiles/test/CMakeLists.txt +14 -1
  77. data/vendor/datasketches-cpp/quantiles/test/quantiles_sketch_deserialize_from_java_test.cpp +84 -0
  78. data/vendor/datasketches-cpp/quantiles/test/quantiles_sketch_serialize_for_java.cpp +52 -0
  79. data/vendor/datasketches-cpp/quantiles/test/quantiles_sketch_test.cpp +14 -38
  80. data/vendor/datasketches-cpp/req/include/req_common.hpp +7 -3
  81. data/vendor/datasketches-cpp/req/include/req_compactor_impl.hpp +2 -2
  82. data/vendor/datasketches-cpp/req/include/req_sketch.hpp +97 -23
  83. data/vendor/datasketches-cpp/req/include/req_sketch_impl.hpp +48 -109
  84. data/vendor/datasketches-cpp/req/test/CMakeLists.txt +14 -1
  85. data/vendor/datasketches-cpp/req/test/req_sketch_deserialize_from_java_test.cpp +55 -0
  86. data/vendor/datasketches-cpp/{tuple/include/array_of_doubles_intersection_impl.hpp → req/test/req_sketch_serialize_for_java.cpp} +12 -7
  87. data/vendor/datasketches-cpp/req/test/req_sketch_test.cpp +3 -89
  88. data/vendor/datasketches-cpp/sampling/CMakeLists.txt +4 -0
  89. data/vendor/datasketches-cpp/sampling/include/ebpps_sample.hpp +210 -0
  90. data/vendor/datasketches-cpp/sampling/include/ebpps_sample_impl.hpp +535 -0
  91. data/vendor/datasketches-cpp/sampling/include/ebpps_sketch.hpp +281 -0
  92. data/vendor/datasketches-cpp/sampling/include/ebpps_sketch_impl.hpp +531 -0
  93. data/vendor/datasketches-cpp/sampling/include/var_opt_sketch.hpp +69 -26
  94. data/vendor/datasketches-cpp/sampling/include/var_opt_sketch_impl.hpp +3 -3
  95. data/vendor/datasketches-cpp/sampling/include/var_opt_union.hpp +10 -11
  96. data/vendor/datasketches-cpp/sampling/include/var_opt_union_impl.hpp +4 -4
  97. data/vendor/datasketches-cpp/sampling/test/CMakeLists.txt +55 -8
  98. data/vendor/datasketches-cpp/sampling/test/ebpps_allocation_test.cpp +96 -0
  99. data/vendor/datasketches-cpp/sampling/test/ebpps_sample_test.cpp +137 -0
  100. data/vendor/datasketches-cpp/sampling/test/ebpps_sketch_test.cpp +266 -0
  101. data/vendor/datasketches-cpp/sampling/test/var_opt_sketch_deserialize_from_java_test.cpp +81 -0
  102. data/vendor/datasketches-cpp/sampling/test/var_opt_sketch_serialize_for_java.cpp +54 -0
  103. data/vendor/datasketches-cpp/sampling/test/var_opt_sketch_test.cpp +0 -37
  104. data/vendor/datasketches-cpp/sampling/test/var_opt_union_deserialize_from_java_test.cpp +50 -0
  105. data/vendor/datasketches-cpp/sampling/test/var_opt_union_serialize_for_java.cpp +56 -0
  106. data/vendor/datasketches-cpp/sampling/test/var_opt_union_test.cpp +0 -18
  107. data/vendor/datasketches-cpp/theta/include/bit_packing.hpp +2608 -2608
  108. data/vendor/datasketches-cpp/theta/include/bounds_on_ratios_in_sampled_sets.hpp +1 -0
  109. data/vendor/datasketches-cpp/theta/include/bounds_on_ratios_in_theta_sketched_sets.hpp +7 -6
  110. data/vendor/datasketches-cpp/theta/include/theta_a_not_b.hpp +20 -5
  111. data/vendor/datasketches-cpp/theta/include/theta_constants.hpp +10 -4
  112. data/vendor/datasketches-cpp/theta/include/theta_helpers.hpp +1 -1
  113. data/vendor/datasketches-cpp/theta/include/theta_intersection.hpp +13 -5
  114. data/vendor/datasketches-cpp/theta/include/theta_intersection_base_impl.hpp +5 -5
  115. data/vendor/datasketches-cpp/theta/include/theta_intersection_impl.hpp +3 -3
  116. data/vendor/datasketches-cpp/theta/include/theta_jaccard_similarity.hpp +2 -1
  117. data/vendor/datasketches-cpp/theta/include/theta_jaccard_similarity_base.hpp +1 -0
  118. data/vendor/datasketches-cpp/theta/include/theta_set_difference_base_impl.hpp +1 -1
  119. data/vendor/datasketches-cpp/theta/include/theta_sketch.hpp +126 -27
  120. data/vendor/datasketches-cpp/theta/include/theta_sketch_impl.hpp +8 -8
  121. data/vendor/datasketches-cpp/theta/include/theta_union.hpp +17 -10
  122. data/vendor/datasketches-cpp/theta/include/theta_union_base_impl.hpp +1 -1
  123. data/vendor/datasketches-cpp/theta/include/theta_union_impl.hpp +3 -3
  124. data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base.hpp +5 -2
  125. data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base_impl.hpp +11 -1
  126. data/vendor/datasketches-cpp/theta/test/CMakeLists.txt +14 -1
  127. data/vendor/datasketches-cpp/theta/test/theta_sketch_deserialize_from_java_test.cpp +57 -0
  128. data/vendor/datasketches-cpp/theta/test/theta_sketch_serialize_for_java.cpp +61 -0
  129. data/vendor/datasketches-cpp/theta/test/theta_sketch_test.cpp +0 -188
  130. data/vendor/datasketches-cpp/tuple/CMakeLists.txt +8 -7
  131. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_sketch.hpp +19 -144
  132. data/vendor/datasketches-cpp/tuple/include/{array_of_doubles_a_not_b.hpp → array_tuple_a_not_b.hpp} +24 -16
  133. data/vendor/datasketches-cpp/tuple/include/{array_of_doubles_a_not_b_impl.hpp → array_tuple_a_not_b_impl.hpp} +4 -4
  134. data/vendor/datasketches-cpp/tuple/include/array_tuple_intersection.hpp +65 -0
  135. data/vendor/datasketches-cpp/{python/include/py_object_ostream.hpp → tuple/include/array_tuple_intersection_impl.hpp} +7 -24
  136. data/vendor/datasketches-cpp/tuple/include/array_tuple_sketch.hpp +237 -0
  137. data/vendor/datasketches-cpp/tuple/include/{array_of_doubles_sketch_impl.hpp → array_tuple_sketch_impl.hpp} +40 -41
  138. data/vendor/datasketches-cpp/tuple/include/array_tuple_union.hpp +81 -0
  139. data/vendor/datasketches-cpp/tuple/include/array_tuple_union_impl.hpp +43 -0
  140. data/vendor/datasketches-cpp/tuple/include/tuple_a_not_b.hpp +11 -2
  141. data/vendor/datasketches-cpp/tuple/include/tuple_intersection.hpp +17 -10
  142. data/vendor/datasketches-cpp/tuple/include/tuple_jaccard_similarity.hpp +2 -1
  143. data/vendor/datasketches-cpp/tuple/include/tuple_sketch.hpp +95 -32
  144. data/vendor/datasketches-cpp/tuple/include/tuple_union.hpp +19 -11
  145. data/vendor/datasketches-cpp/tuple/test/CMakeLists.txt +16 -1
  146. data/vendor/datasketches-cpp/tuple/test/aod_sketch_deserialize_from_java_test.cpp +76 -0
  147. data/vendor/datasketches-cpp/tuple/test/aod_sketch_serialize_for_java.cpp +62 -0
  148. data/vendor/datasketches-cpp/tuple/test/array_of_doubles_sketch_test.cpp +5 -129
  149. data/vendor/datasketches-cpp/tuple/test/engagement_test.cpp +85 -89
  150. data/vendor/datasketches-cpp/tuple/test/tuple_jaccard_similarity_test.cpp +3 -1
  151. data/vendor/datasketches-cpp/tuple/test/tuple_sketch_deserialize_from_java_test.cpp +47 -0
  152. data/vendor/datasketches-cpp/tuple/test/tuple_sketch_serialize_for_java.cpp +38 -0
  153. data/vendor/datasketches-cpp/tuple/test/tuple_sketch_test.cpp +1 -1
  154. data/vendor/datasketches-cpp/version.cfg.in +1 -1
  155. metadata +47 -93
  156. data/vendor/datasketches-cpp/MANIFEST.in +0 -39
  157. data/vendor/datasketches-cpp/fi/test/items_sketch_string_from_java.sk +0 -0
  158. data/vendor/datasketches-cpp/fi/test/items_sketch_string_utf8_from_java.sk +0 -0
  159. data/vendor/datasketches-cpp/fi/test/longs_sketch_from_java.sk +0 -0
  160. data/vendor/datasketches-cpp/hll/test/array6_from_java.sk +0 -0
  161. data/vendor/datasketches-cpp/hll/test/compact_array4_from_java.sk +0 -0
  162. data/vendor/datasketches-cpp/hll/test/compact_set_from_java.sk +0 -0
  163. data/vendor/datasketches-cpp/hll/test/list_from_java.sk +0 -0
  164. data/vendor/datasketches-cpp/hll/test/updatable_array4_from_java.sk +0 -0
  165. data/vendor/datasketches-cpp/hll/test/updatable_set_from_java.sk +0 -0
  166. data/vendor/datasketches-cpp/kll/test/kll_sketch_from_java.sk +0 -0
  167. data/vendor/datasketches-cpp/pyproject.toml +0 -23
  168. data/vendor/datasketches-cpp/python/CMakeLists.txt +0 -87
  169. data/vendor/datasketches-cpp/python/README.md +0 -85
  170. data/vendor/datasketches-cpp/python/datasketches/DensityWrapper.py +0 -87
  171. data/vendor/datasketches-cpp/python/datasketches/KernelFunction.py +0 -35
  172. data/vendor/datasketches-cpp/python/datasketches/PySerDe.py +0 -110
  173. data/vendor/datasketches-cpp/python/datasketches/TuplePolicy.py +0 -77
  174. data/vendor/datasketches-cpp/python/datasketches/TupleWrapper.py +0 -205
  175. data/vendor/datasketches-cpp/python/datasketches/__init__.py +0 -38
  176. data/vendor/datasketches-cpp/python/include/kernel_function.hpp +0 -98
  177. data/vendor/datasketches-cpp/python/include/py_serde.hpp +0 -113
  178. data/vendor/datasketches-cpp/python/include/quantile_conditional.hpp +0 -104
  179. data/vendor/datasketches-cpp/python/include/tuple_policy.hpp +0 -136
  180. data/vendor/datasketches-cpp/python/jupyter/CPCSketch.ipynb +0 -345
  181. data/vendor/datasketches-cpp/python/jupyter/FrequentItemsSketch.ipynb +0 -354
  182. data/vendor/datasketches-cpp/python/jupyter/HLLSketch.ipynb +0 -346
  183. data/vendor/datasketches-cpp/python/jupyter/KLLSketch.ipynb +0 -463
  184. data/vendor/datasketches-cpp/python/jupyter/ThetaSketchNotebook.ipynb +0 -403
  185. data/vendor/datasketches-cpp/python/pybind11Path.cmd +0 -21
  186. data/vendor/datasketches-cpp/python/src/__init__.py +0 -18
  187. data/vendor/datasketches-cpp/python/src/count_wrapper.cpp +0 -101
  188. data/vendor/datasketches-cpp/python/src/cpc_wrapper.cpp +0 -76
  189. data/vendor/datasketches-cpp/python/src/datasketches.cpp +0 -58
  190. data/vendor/datasketches-cpp/python/src/density_wrapper.cpp +0 -95
  191. data/vendor/datasketches-cpp/python/src/fi_wrapper.cpp +0 -182
  192. data/vendor/datasketches-cpp/python/src/hll_wrapper.cpp +0 -126
  193. data/vendor/datasketches-cpp/python/src/kll_wrapper.cpp +0 -158
  194. data/vendor/datasketches-cpp/python/src/ks_wrapper.cpp +0 -68
  195. data/vendor/datasketches-cpp/python/src/py_serde.cpp +0 -112
  196. data/vendor/datasketches-cpp/python/src/quantiles_wrapper.cpp +0 -155
  197. data/vendor/datasketches-cpp/python/src/req_wrapper.cpp +0 -154
  198. data/vendor/datasketches-cpp/python/src/theta_wrapper.cpp +0 -166
  199. data/vendor/datasketches-cpp/python/src/tuple_wrapper.cpp +0 -215
  200. data/vendor/datasketches-cpp/python/src/vector_of_kll.cpp +0 -490
  201. data/vendor/datasketches-cpp/python/src/vo_wrapper.cpp +0 -173
  202. data/vendor/datasketches-cpp/python/tests/__init__.py +0 -16
  203. data/vendor/datasketches-cpp/python/tests/count_min_test.py +0 -86
  204. data/vendor/datasketches-cpp/python/tests/cpc_test.py +0 -64
  205. data/vendor/datasketches-cpp/python/tests/density_test.py +0 -93
  206. data/vendor/datasketches-cpp/python/tests/fi_test.py +0 -149
  207. data/vendor/datasketches-cpp/python/tests/hll_test.py +0 -129
  208. data/vendor/datasketches-cpp/python/tests/kll_test.py +0 -159
  209. data/vendor/datasketches-cpp/python/tests/quantiles_test.py +0 -160
  210. data/vendor/datasketches-cpp/python/tests/req_test.py +0 -159
  211. data/vendor/datasketches-cpp/python/tests/theta_test.py +0 -148
  212. data/vendor/datasketches-cpp/python/tests/tuple_test.py +0 -206
  213. data/vendor/datasketches-cpp/python/tests/vector_of_kll_test.py +0 -148
  214. data/vendor/datasketches-cpp/python/tests/vo_test.py +0 -132
  215. data/vendor/datasketches-cpp/req/test/req_float_empty_from_java.sk +0 -0
  216. data/vendor/datasketches-cpp/req/test/req_float_estimation_from_java.sk +0 -0
  217. data/vendor/datasketches-cpp/req/test/req_float_exact_from_java.sk +0 -0
  218. data/vendor/datasketches-cpp/req/test/req_float_raw_items_from_java.sk +0 -0
  219. data/vendor/datasketches-cpp/req/test/req_float_single_item_from_java.sk +0 -0
  220. data/vendor/datasketches-cpp/sampling/test/binaries_from_java.txt +0 -67
  221. data/vendor/datasketches-cpp/sampling/test/varopt_sketch_long_sampling.sk +0 -0
  222. data/vendor/datasketches-cpp/sampling/test/varopt_sketch_string_exact.sk +0 -0
  223. data/vendor/datasketches-cpp/sampling/test/varopt_union_double_sampling.sk +0 -0
  224. data/vendor/datasketches-cpp/setup.py +0 -110
  225. data/vendor/datasketches-cpp/theta/test/theta_compact_empty_from_java.sk +0 -0
  226. data/vendor/datasketches-cpp/theta/test/theta_compact_estimation_from_java.sk +0 -0
  227. data/vendor/datasketches-cpp/theta/test/theta_compact_exact_from_java.sk +0 -0
  228. data/vendor/datasketches-cpp/theta/test/theta_compact_single_item_from_java.sk +0 -0
  229. data/vendor/datasketches-cpp/tox.ini +0 -26
  230. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_intersection.hpp +0 -52
  231. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_union.hpp +0 -81
  232. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_union_impl.hpp +0 -43
  233. data/vendor/datasketches-cpp/tuple/test/aod_1_compact_empty_from_java.sk +0 -1
  234. data/vendor/datasketches-cpp/tuple/test/aod_1_compact_estimation_from_java.sk +0 -0
  235. data/vendor/datasketches-cpp/tuple/test/aod_1_compact_non_empty_no_entries_from_java.sk +0 -0
  236. data/vendor/datasketches-cpp/tuple/test/aod_2_compact_exact_from_java.sk +0 -0
  237. data/vendor/datasketches-cpp/tuple/test/aod_3_compact_empty_from_java.sk +0 -1
@@ -1,35 +0,0 @@
1
- # Licensed to the Apache Software Foundation (ASF) under one
2
- # or more contributor license agreements. See the NOTICE file
3
- # distributed with this work for additional information
4
- # regarding copyright ownership. The ASF licenses this file
5
- # to you under the Apache License, Version 2.0 (the
6
- # "License"); you may not use this file except in compliance
7
- # with the License. You may obtain a copy of the License at
8
- #
9
- # http://www.apache.org/licenses/LICENSE-2.0
10
- #
11
- # Unless required by applicable law or agreed to in writing,
12
- # software distributed under the License is distributed on an
13
- # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
- # KIND, either express or implied. See the License for the
15
- # specific language governing permissions and limitations
16
- # under the License.
17
-
18
- import numpy as np
19
-
20
- from _datasketches import KernelFunction
21
-
22
- # This file provides an example Python Kernel Function implementation.
23
- #
24
- # Each implementation must extend the KernelFunction class
25
- # and define the __call__ method
26
-
27
- # Implements a basic Gaussian Kernel
28
- class GaussianKernel(KernelFunction):
29
- def __init__(self, bandwidth: float=1.0):
30
- KernelFunction.__init__(self)
31
- self._bw = bandwidth
32
- self._scale = -0.5 * (bandwidth ** -2)
33
-
34
- def __call__(self, a: np.array, b: np.array) -> float:
35
- return np.exp(self._scale * np.linalg.norm(a - b)**2)
@@ -1,110 +0,0 @@
1
- # Licensed to the Apache Software Foundation (ASF) under one
2
- # or more contributor license agreements. See the NOTICE file
3
- # distributed with this work for additional information
4
- # regarding copyright ownership. The ASF licenses this file
5
- # to you under the Apache License, Version 2.0 (the
6
- # "License"); you may not use this file except in compliance
7
- # with the License. You may obtain a copy of the License at
8
- #
9
- # http://www.apache.org/licenses/LICENSE-2.0
10
- #
11
- # Unless required by applicable law or agreed to in writing,
12
- # software distributed under the License is distributed on an
13
- # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
- # KIND, either express or implied. See the License for the
15
- # specific language governing permissions and limitations
16
- # under the License.
17
-
18
- from _datasketches import PyObjectSerDe
19
-
20
- import struct
21
-
22
- # This file provides several Python SerDe implementation examples.
23
- #
24
- # Each implementation must extend the PyObjectSerDe class and define
25
- # three methods:
26
- # * get_size(item) returns an int of the number of bytes needed to
27
- # serialize the given item
28
- # * to_bytes(item) returns a bytes object representing a serialized
29
- # version of the given item
30
- # * from_bytes(data, offset) takes a bytes object (data) and an offset
31
- # indicating where in the data array to start reading. The method
32
- # returns a tuple with the newly reconstructed object and the
33
- # total number of bytes beyond the offset read from the input data.
34
-
35
- # Implements a simple string-encoding scheme where a string is
36
- # written as <num_bytes> <string_contents>, with no null termination.
37
- # This format allows pre-allocating each string, at the cost of
38
- # additional storage. Using this format, the serialized string consumes
39
- # 4 + len(item) bytes.
40
- class PyStringsSerDe(PyObjectSerDe):
41
- def get_size(self, item):
42
- return int(4 + len(item))
43
-
44
- def to_bytes(self, item: str):
45
- b = bytearray()
46
- b.extend(len(item).to_bytes(4, 'little'))
47
- b.extend(map(ord,item))
48
- return bytes(b)
49
-
50
- def from_bytes(self, data: bytes, offset: int):
51
- num_chars = int.from_bytes(data[offset:offset+3], 'little')
52
- if (num_chars < 0 or num_chars > offset + len(data)):
53
- raise IndexError(f'num_chars read must be non-negative and not larger than the buffer. Found {num_chars}')
54
- str = data[offset+4:offset+4+num_chars].decode()
55
- return (str, 4+num_chars)
56
-
57
- # Implements an integer encoding scheme where each integer is written
58
- # as a 32-bit (4 byte) little-endian value.
59
- class PyIntsSerDe(PyObjectSerDe):
60
- def get_size(self, item):
61
- return int(4)
62
-
63
- def to_bytes(self, item):
64
- return struct.pack('<i', item)
65
-
66
- def from_bytes(self, data: bytes, offset: int):
67
- val = struct.unpack_from('<i', data, offset)[0]
68
- return (val, 4)
69
-
70
-
71
- # Implements an integer encoding scheme where each integer is written
72
- # as a 64-bit (8 byte) little-endian value.
73
- class PyLongsSerDe(PyObjectSerDe):
74
- def get_size(self, item):
75
- return int(8)
76
-
77
- def to_bytes(self, item):
78
- return struct.pack('<l', item)
79
-
80
- def from_bytes(self, data: bytes, offset: int):
81
- val = struct.unpack_from('<l', data, offset)[0]
82
- return (val, 8)
83
-
84
-
85
- # Implements a floating point encoding scheme where each value is written
86
- # as a 32-bit floating point value.
87
- class PyFloatsSerDe(PyObjectSerDe):
88
- def get_size(self, item):
89
- return int(4)
90
-
91
- def to_bytes(self, item):
92
- return struct.pack('<f', item)
93
-
94
- def from_bytes(self, data: bytes, offset: int):
95
- val = struct.unpack_from('<f', data, offset)[0]
96
- return (val, 4)
97
-
98
-
99
- # Implements a floating point encoding scheme where each value is written
100
- # as a 64-bit floating point value.
101
- class PyDoublesSerDe(PyObjectSerDe):
102
- def get_size(self, item):
103
- return int(8)
104
-
105
- def to_bytes(self, item):
106
- return struct.pack('<d', item)
107
-
108
- def from_bytes(self, data: bytes, offset: int):
109
- val = struct.unpack_from('<d', data, offset)[0]
110
- return (val, 8)
@@ -1,77 +0,0 @@
1
- # Licensed to the Apache Software Foundation (ASF) under one
2
- # or more contributor license agreements. See the NOTICE file
3
- # distributed with this work for additional information
4
- # regarding copyright ownership. The ASF licenses this file
5
- # to you under the Apache License, Version 2.0 (the
6
- # "License"); you may not use this file except in compliance
7
- # with the License. You may obtain a copy of the License at
8
- #
9
- # http://www.apache.org/licenses/LICENSE-2.0
10
- #
11
- # Unless required by applicable law or agreed to in writing,
12
- # software distributed under the License is distributed on an
13
- # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
- # KIND, either express or implied. See the License for the
15
- # specific language governing permissions and limitations
16
- # under the License.
17
-
18
- import sys
19
-
20
- from _datasketches import TuplePolicy
21
-
22
- # This file provides an example Python Tuple Policy implementation.
23
- #
24
- # Each implementation must extend the PyTuplePolicy class and define
25
- # two methods:
26
- # * create_summary() returns a new Summary object
27
- # * update_summary(summary, update) applies the relevant policy to update the
28
- # provided summary with the data in update.
29
- # * __call__ may be similar to update_summary but allows a different
30
- # implementation for set operations (union and intersection)
31
-
32
- # Implements an accumulator summary policy, where new values are
33
- # added to the existing value.
34
- class AccumulatorPolicy(TuplePolicy):
35
- def __init__(self):
36
- TuplePolicy.__init__(self)
37
-
38
- def create_summary(self) -> int:
39
- return int(0)
40
-
41
- def update_summary(self, summary: int, update: int) -> int:
42
- summary += update
43
- return summary
44
-
45
- def __call__(self, summary: int, update: int) -> int:
46
- summary += update
47
- return summary
48
-
49
-
50
- # Implements a MAX rule, where the largest integer value is always kept
51
- class MaxIntPolicy(TuplePolicy):
52
- def __init__(self):
53
- TuplePolicy.__init__(self)
54
-
55
- def create_summary(self) -> int:
56
- return int(-sys.maxsize-1)
57
-
58
- def update_summary(self, summary: int, update: int) -> int:
59
- return max(summary, update)
60
-
61
- def __call__(self, summary: int, update: int) -> int:
62
- return max(summary, update)
63
-
64
-
65
- # Implements a MIN rule, where the smallest integer value is always kept
66
- class MinIntPolicy(TuplePolicy):
67
- def __init__(self):
68
- TuplePolicy.__init__(self)
69
-
70
- def create_summary(self) -> int:
71
- return int(sys.maxsize)
72
-
73
- def update_summary(self, summary: int, update: int) -> int:
74
- return min(summary, update)
75
-
76
- def __call__(self, summary: int, update: int) -> int:
77
- return min(summary, update)
@@ -1,205 +0,0 @@
1
- # Licensed to the Apache Software Foundation (ASF) under one
2
- # or more contributor license agreements. See the NOTICE file
3
- # distributed with this work for additional information
4
- # regarding copyright ownership. The ASF licenses this file
5
- # to you under the Apache License, Version 2.0 (the
6
- # "License"); you may not use this file except in compliance
7
- # with the License. You may obtain a copy of the License at
8
- #
9
- # http://www.apache.org/licenses/LICENSE-2.0
10
- #
11
- # Unless required by applicable law or agreed to in writing,
12
- # software distributed under the License is distributed on an
13
- # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
- # KIND, either express or implied. See the License for the
15
- # specific language governing permissions and limitations
16
- # under the License.
17
-
18
- from abc import ABC, abstractmethod
19
-
20
- from _datasketches import _tuple_sketch, _compact_tuple_sketch, _update_tuple_sketch
21
- from _datasketches import _tuple_union, _tuple_intersection
22
- from _datasketches import _tuple_a_not_b, _tuple_jaccard_similarity
23
- from _datasketches import PyObjectSerDe, theta_sketch, TuplePolicy
24
-
25
- class tuple_sketch(ABC):
26
- """An abstract base class representing a Tuple Sketch."""
27
- _gadget: _tuple_sketch
28
-
29
- def __str__(self, print_items:bool=False):
30
- return self._gadget.to_string(print_items)
31
-
32
- def is_empty(self):
33
- """Returns True if the sketch is empty, otherwise False."""
34
- return self._gadget.is_empty()
35
-
36
- def get_estimate(self):
37
- """Returns an estimate of the distinct count of the input stream."""
38
- return self._gadget.get_estimate()
39
-
40
- def get_upper_bound(self, num_std_devs:int):
41
- """Returns an approximate upper bound on the estimate at standard deviations in {1, 2, 3}."""
42
- return self._gadget.get_upper_bound(num_std_devs)
43
-
44
- def get_lower_bound(self, num_std_devs:int):
45
- """Returns an approximate lower bound on the estimate at standard deviations in {1, 2, 3}."""
46
- return self._gadget.get_lower_bound(num_std_devs)
47
-
48
- def is_estimation_mode(self):
49
- """Returns True if the sketch is in estimation mode, otherwise False."""
50
- return self._gadget.is_estimation_mode()
51
-
52
- def get_theta(self):
53
- """Returns theta (the effective sampling rate) as a fraction from 0 to 1."""
54
- return self._gadget.get_theta()
55
-
56
- def get_theta64(self):
57
- """Returns theta as a 64-bit integer value."""
58
- return self._gadget.get_theta64()
59
-
60
- def get_num_retained(self):
61
- """Returns the number of items currently in the sketch."""
62
- return self._gadget.get_num_retained()
63
-
64
- def get_seed_hash(self):
65
- """Returns a hash of the seed used in the sketch."""
66
- return self._gadget.get_seed_hash()
67
-
68
- def is_ordered(self):
69
- """Returns True if the sketch entries are sorder, otherwise False."""
70
- return self._gadget.is_ordered()
71
-
72
- def __iter__(self):
73
- return self._gadget.__iter__()
74
-
75
-
76
- class compact_tuple_sketch(tuple_sketch):
77
- """An instance of a Tuple Sketch that has been compacted and can no longer accept updates."""
78
-
79
- def __init__(self, other:tuple_sketch, ordered:bool = True):
80
- if other == None:
81
- self._gadget = None
82
- else:
83
- self._gadget = _compact_tuple_sketch(other, ordered)
84
-
85
- def serialize(self, serde:PyObjectSerDe):
86
- """Serializes the sketch into a bytes object with the provided SerDe."""
87
- return self._gadget.serialize(serde)
88
-
89
- @classmethod
90
- def from_theta_sketch(cls, sketch:theta_sketch, summary, seed:int=_tuple_sketch.DEFAULT_SEED):
91
- """Creates a comapct Tuple Sketch from a Theta Sketch using a fixed summary value."""
92
- self = cls.__new__(cls)
93
- self._gadget = _compact_tuple_sketch(sketch, summary, seed)
94
- return self
95
-
96
- @classmethod
97
- def deserialize(cls, data:bytes, serde:PyObjectSerDe, seed:int=_tuple_sketch.DEFAULT_SEED):
98
- """Reads a bytes object and uses the provded SerDe to return the corresponding compact_tuple_sketch."""
99
- self = cls.__new__(cls)
100
- self._gadget = _compact_tuple_sketch.deserialize(data, serde, seed)
101
- return self
102
-
103
-
104
- class update_tuple_sketch(tuple_sketch):
105
- """An instance of a Tuple Sketch that is available for updates. Requires a Policy object to handle Summary values."""
106
-
107
- def __init__(self, policy, lg_k:int = 12, p:float = 1.0, seed:int = _tuple_sketch.DEFAULT_SEED):
108
- self._policy = policy
109
- self._gadget = _update_tuple_sketch(self._policy, lg_k, p, seed)
110
-
111
- def update(self, datum, value):
112
- """Updates the sketch with the provided item and summary value."""
113
- self._gadget.update(datum, value)
114
-
115
- def compact(self, ordered:bool = True) -> compact_tuple_sketch:
116
- """Returns a compacted form of the sketch, optionally sorting it."""
117
- return self._gadget.compact(ordered)
118
-
119
- def reset(self):
120
- """Resets the sketch to the initial empty state."""
121
- self._gadget.reset()
122
-
123
-
124
- class tuple_union:
125
- """An object that can merge Tuple Sketches. Requires a Policy object to handle merging Summaries."""
126
- _policy: TuplePolicy
127
-
128
- def __init__(self, policy:TuplePolicy, lg_k:int = 12, p:float = 1.0, seed:int = _tuple_sketch.DEFAULT_SEED):
129
- self._policy = policy
130
- self._gadget = _tuple_union(self._policy, lg_k, p, seed)
131
-
132
- def update(self, sketch:tuple_sketch):
133
- """Updates the union with the given sketch."""
134
- self._gadget.update(sketch._gadget)
135
-
136
- def get_result(self, ordered:bool = True) -> compact_tuple_sketch:
137
- """Returns the sketch corresponding to the union result, optionally sorted."""
138
- return compact_tuple_sketch(self._gadget.get_result(ordered), ordered)
139
-
140
- def reset(self):
141
- """Resets the union to the initial empty state."""
142
- self._gadget.reset()
143
-
144
-
145
- class tuple_intersection:
146
- """An object that can intersect Tuple Sketches. Requires a Policy object to handle intersecting Summaries."""
147
- _policy: TuplePolicy
148
-
149
- def __init__(self, policy:TuplePolicy, seed:int = _tuple_sketch.DEFAULT_SEED):
150
- self._policy = policy
151
- self._gadget = _tuple_intersection(self._policy, seed)
152
-
153
- def update(self, sketch:tuple_sketch):
154
- """Intersects the provided sketch with the current intersection state."""
155
- self._gadget.update(sketch._gadget)
156
-
157
- def has_result(self) -> bool:
158
- """Returns True if the intersection has a valid result, otherwise False."""
159
- return self._gadget.has_result()
160
-
161
- def get_result(self, ordered:bool = True) -> compact_tuple_sketch:
162
- """Returns the sketch corresponding to the intersection result, optionally sorted."""
163
- return compact_tuple_sketch(self._gadget.get_result(ordered), ordered)
164
-
165
-
166
- class tuple_a_not_b:
167
- """An object that can peform the A-not-B operation between two sketches."""
168
- def __init__(self, seed:int = _tuple_sketch.DEFAULT_SEED):
169
- self._gadget = _tuple_a_not_b(seed)
170
-
171
- def compute(self, a:tuple_sketch, b:tuple_sketch, ordered:bool=True) -> compact_tuple_sketch:
172
- """Returns a sketch with the result of applying the A-not-B operation on the given inputs."""
173
- return compact_tuple_sketch(self._gadget.compute(a._gadget, b._gadget))
174
-
175
-
176
- class tuple_jaccard_similarity:
177
- @staticmethod
178
- def jaccard(a:tuple_sketch, b:tuple_sketch, seed:int=_tuple_sketch.DEFAULT_SEED):
179
- """Returns a list with {lower_bound, estimate, upper_bound} of the Jaccard similarity between sketches."""
180
- return _tuple_jaccard_similarity.jaccard(a._gadget, b._gadget, seed)
181
-
182
- @staticmethod
183
- def exactly_equal(a:tuple_sketch, b:tuple_sketch, seed:int=_tuple_sketch.DEFAULT_SEED):
184
- """Returns True if sketch_a and sketch_b are equivalent, otherwise False."""
185
- return _tuple_jaccard_similarity.exactly_equal(a._gadget, b._gadget, seed)
186
-
187
- @staticmethod
188
- def similarity_test(actual:tuple_sketch, expected:tuple_sketch, threshold:float, seed:int=_tuple_sketch.DEFAULT_SEED):
189
- """Tests similarity of an actual sketch against an expected sketch.
190
-
191
- Computes the lower bound of the Jaccard index J_{LB} of the actual and expected sketches.
192
- If J_{LB} >= threshold, then the sketches are considered to be similar sith a confidence of
193
- 97.7% and returns True, otherwise False.
194
- """
195
- return _tuple_jaccard_similarity.similarity_test(actual._gadget, expected._gadget, threshold, seed)
196
-
197
- @staticmethod
198
- def dissimilarity_test(actual:tuple_sketch, expected:tuple_sketch, threshold:float, seed:int=_tuple_sketch.DEFAULT_SEED):
199
- """Tests dissimilarity of an actual sketch against an expected sketch.
200
-
201
- Computes the upper bound of the Jaccard index J_{UB} of the actual and expected sketches.
202
- If J_{UB} <= threshold, then the sketches are considered to be dissimilar sith a confidence of
203
- 97.7% and returns True, otherwise False.
204
- """
205
- return _tuple_jaccard_similarity.dissimilarity_test(actual._gadget, expected._gadget, threshold, seed)
@@ -1,38 +0,0 @@
1
- # Licensed to the Apache Software Foundation (ASF) under one
2
- # or more contributor license agreements. See the NOTICE file
3
- # distributed with this work for additional information
4
- # regarding copyright ownership. The ASF licenses this file
5
- # to you under the Apache License, Version 2.0 (the
6
- # "License"); you may not use this file except in compliance
7
- # with the License. You may obtain a copy of the License at
8
- #
9
- # http://www.apache.org/licenses/LICENSE-2.0
10
- #
11
- # Unless required by applicable law or agreed to in writing,
12
- # software distributed under the License is distributed on an
13
- # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
- # KIND, either express or implied. See the License for the
15
- # specific language governing permissions and limitations
16
- # under the License.
17
-
18
- """The Apache DataSketches Library for Python
19
-
20
- Provided under the Apache License, Verison 2.0
21
- <http://www.apache.org/licenses/LICENSE-2.0>
22
- """
23
-
24
- name = 'datasketches'
25
-
26
- from _datasketches import *
27
-
28
- from .PySerDe import *
29
- from .TuplePolicy import *
30
- from .KernelFunction import *
31
-
32
- # Wrappers around the pybind11 classes for cases where we
33
- # need to define a python object that is persisted within
34
- # the C++ object. Currently, the native python portion of
35
- # a class derived from a C++ class may be garbage collected
36
- # even though a pointer to the C++ portion remains valid.
37
- from .TupleWrapper import *
38
- from .DensityWrapper import *
@@ -1,98 +0,0 @@
1
- /*
2
- * Licensed to the Apache Software Foundation (ASF) under one
3
- * or more contributor license agreements. See the NOTICE file
4
- * distributed with this work for additional information
5
- * regarding copyright ownership. The ASF licenses this file
6
- * to you under the Apache License, Version 2.0 (the
7
- * "License"); you may not use this file except in compliance
8
- * with the License. You may obtain a copy of the License at
9
- *
10
- * http://www.apache.org/licenses/LICENSE-2.0
11
- *
12
- * Unless required by applicable law or agreed to in writing,
13
- * software distributed under the License is distributed on an
14
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15
- * KIND, either express or implied. See the License for the
16
- * specific language governing permissions and limitations
17
- * under the License.
18
- */
19
-
20
- //#include <memory>
21
- #include <pybind11/pybind11.h>
22
- #include <pybind11/numpy.h>
23
-
24
- #ifndef _KERNEL_FUNCTION_HPP_
25
- #define _KERNEL_FUNCTION_HPP_
26
-
27
- namespace py = pybind11;
28
-
29
- namespace datasketches {
30
-
31
- /**
32
- * @brief kernel_function provides the underlying base class from
33
- * which native Python kernels ultimately inherit. The actual
34
- * kernels implement KernelFunction, as shown in KernelFunction.py
35
- */
36
- struct kernel_function {
37
- virtual double operator()(py::array_t<double>& a, const py::array_t<double>& b) const = 0;
38
- virtual ~kernel_function() = default;
39
- };
40
-
41
- /**
42
- * @brief KernelFunction provides the "trampoline" class for pybind11
43
- * that allows for a native Python implementation of kernel
44
- * functions.
45
- */
46
- struct KernelFunction : public kernel_function {
47
- using kernel_function::kernel_function;
48
-
49
- /**
50
- * @brief Evaluates K(a,b), the kernel function for the given points a and b
51
- * @param a the first vector
52
- * @param b the second vector
53
- * @return The function value K(a,b)
54
- */
55
- double operator()(py::array_t<double>& a, const py::array_t<double>& b) const override {
56
- PYBIND11_OVERRIDE_PURE_NAME(
57
- double, // Return type
58
- kernel_function, // Parent class
59
- "__call__", // Name of function in python
60
- operator(), // Name of function in C++
61
- a, b // Arguemnts
62
- );
63
- }
64
- };
65
-
66
- /* The kernel_function_holder provides a concrete class that dispatches calls
67
- * from the sketch to the kernel_function. This class is needed to provide a
68
- * concrete object to produce a compiled library, but library users should
69
- * never need to use this directly.
70
- */
71
- struct kernel_function_holder {
72
- explicit kernel_function_holder(std::shared_ptr<kernel_function> kernel) : _kernel(kernel) {}
73
- kernel_function_holder(const kernel_function_holder& other) : _kernel(other._kernel) {}
74
- kernel_function_holder(kernel_function_holder&& other) : _kernel(std::move(other._kernel)) {}
75
- kernel_function_holder& operator=(const kernel_function_holder& other) { _kernel = other._kernel; return *this; }
76
- kernel_function_holder& operator=(kernel_function_holder&& other) { std::swap(_kernel, other._kernel); return *this; }
77
-
78
- double operator()(const std::vector<double>& a, const py::array_t<double>& b) const {
79
- py::array_t<double> a_arr(a.size(), a.data(), dummy_array_owner);
80
- return _kernel->operator()(a_arr, b);
81
- }
82
-
83
- double operator()(const std::vector<double>& a, const std::vector<double>& b) const {
84
- py::array_t<double> a_arr(a.size(), a.data(), dummy_array_owner);
85
- py::array_t<double> b_arr(b.size(), b.data(), dummy_array_owner);
86
- return _kernel->operator()(a_arr, b_arr);
87
- }
88
-
89
- private:
90
- // a dummy object to "own" arrays when translating from std::vector to avoid a copy:
91
- // https://github.com/pybind/pybind11/issues/323#issuecomment-575717041
92
- py::str dummy_array_owner;
93
- std::shared_ptr<kernel_function> _kernel;
94
- };
95
-
96
- }
97
-
98
- #endif // _KERNEL_FUNCTION_HPP_
@@ -1,113 +0,0 @@
1
- /*
2
- * Licensed to the Apache Software Foundation (ASF) under one
3
- * or more contributor license agreements. See the NOTICE file
4
- * distributed with this work for additional information
5
- * regarding copyright ownership. The ASF licenses this file
6
- * to you under the Apache License, Version 2.0 (the
7
- * "License"); you may not use this file except in compliance
8
- * with the License. You may obtain a copy of the License at
9
- *
10
- * http://www.apache.org/licenses/LICENSE-2.0
11
- *
12
- * Unless required by applicable law or agreed to in writing,
13
- * software distributed under the License is distributed on an
14
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15
- * KIND, either express or implied. See the License for the
16
- * specific language governing permissions and limitations
17
- * under the License.
18
- */
19
-
20
- #include <pybind11/pybind11.h>
21
- #include <pybind11/functional.h>
22
- #include <sstream>
23
-
24
- #ifndef _PY_SERDE_HPP_
25
- #define _PY_SERDE_HPP_
26
-
27
- namespace py = pybind11;
28
-
29
- namespace datasketches {
30
-
31
- /**
32
- * @brief The py_object_serde is an abstract class that implements the
33
- * datasketches serde interface, and is used to allow custom Python
34
- * serialization of items wrapped as generic py::object types. The actual
35
- * Python implementation classes must extend the PyObjectSerDe class.
36
- */
37
- struct py_object_serde {
38
- /**
39
- * @brief Get the serialized size of an object, in bytes
40
- *
41
- * @param item A provided item
42
- * @return int64_t The serialized size of the item, in bytes
43
- */
44
- virtual int64_t get_size(const py::object& item) const = 0;
45
-
46
- /**
47
- * @brief Serializes an item to a bytes object
48
- *
49
- * @param item A provided item
50
- * @return The serialized image of the item as a Python bytes object
51
- */
52
- virtual py::bytes to_bytes(const py::object& item) const = 0;
53
-
54
- /**
55
- * @brief Constructs an object from a serialized image, reading the
56
- * incoming buffer starting at the specified offset.
57
- *
58
- * @param bytes A buffer containing items from a serialized sketch
59
- * @param offset The starting offset into the bytes buffer
60
- * @return A Python tuple of the reconstructed item and the total number of bytes read
61
- */
62
- virtual py::tuple from_bytes(py::bytes& bytes, size_t offset) const = 0;
63
-
64
- virtual ~py_object_serde() = default;
65
-
66
- // these methods are required by the serde interface; see common/include/serde.hpp for
67
- // default implementations for C++ std::string and numeric types.
68
- size_t size_of_item(const py::object& item) const;
69
- size_t serialize(void* ptr, size_t capacity, const py::object* items, unsigned num) const;
70
- size_t deserialize(const void* ptr, size_t capacity, py::object* items, unsigned num) const;
71
- };
72
-
73
- /**
74
- * @brief The PyObjectSerDe class provides a concrete base class
75
- * that pybind11 uses as a "trampoline" to pass calls through to
76
- * the abstract py_object_serde class. Custom Python serde implementations
77
- * must extend this class.
78
- */
79
- struct PyObjectSerDe : public py_object_serde {
80
- using py_object_serde::py_object_serde;
81
-
82
- // trampoline definitions -- need one for each virtual function
83
- int64_t get_size(const py::object& item) const override {
84
- PYBIND11_OVERRIDE_PURE(
85
- int64_t, // Return type
86
- py_object_serde, // Parent class
87
- get_size, // Name of function in C++ (must match Python name)
88
- item // Argument(s)
89
- );
90
- }
91
-
92
- py::bytes to_bytes(const py::object& item) const override {
93
- PYBIND11_OVERRIDE_PURE(
94
- py::bytes, // Return type
95
- py_object_serde, // Parent class
96
- to_bytes, // Name of function in C++ (must match Python name)
97
- item // Argument(s)
98
- );
99
- }
100
-
101
- py::tuple from_bytes(py::bytes& bytes, size_t offset) const override {
102
- PYBIND11_OVERRIDE_PURE(
103
- py::tuple, // Return type
104
- py_object_serde, // Parent class
105
- from_bytes, // Name of function in C++ (must match Python name)
106
- bytes, offset // Argument(s)
107
- );
108
- }
109
- };
110
-
111
- }
112
-
113
- #endif // _PY_SERDE_HPP_