datasketches 0.3.2 → 0.4.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (237) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +10 -0
  3. data/NOTICE +1 -1
  4. data/README.md +0 -2
  5. data/ext/datasketches/cpc_wrapper.cpp +2 -2
  6. data/ext/datasketches/kll_wrapper.cpp +0 -10
  7. data/lib/datasketches/version.rb +1 -1
  8. data/lib/datasketches.rb +1 -1
  9. data/vendor/datasketches-cpp/CMakeLists.txt +1 -0
  10. data/vendor/datasketches-cpp/CODE_OF_CONDUCT.md +3 -0
  11. data/vendor/datasketches-cpp/CONTRIBUTING.md +50 -0
  12. data/vendor/datasketches-cpp/Doxyfile +2827 -0
  13. data/vendor/datasketches-cpp/LICENSE +0 -76
  14. data/vendor/datasketches-cpp/NOTICE +1 -1
  15. data/vendor/datasketches-cpp/README.md +1 -3
  16. data/vendor/datasketches-cpp/common/CMakeLists.txt +12 -11
  17. data/vendor/datasketches-cpp/common/include/common_defs.hpp +11 -8
  18. data/vendor/datasketches-cpp/common/include/count_zeros.hpp +0 -2
  19. data/vendor/datasketches-cpp/common/include/kolmogorov_smirnov.hpp +9 -6
  20. data/vendor/datasketches-cpp/common/include/optional.hpp +148 -0
  21. data/vendor/datasketches-cpp/common/include/quantiles_sorted_view.hpp +95 -2
  22. data/vendor/datasketches-cpp/common/include/quantiles_sorted_view_impl.hpp +1 -1
  23. data/vendor/datasketches-cpp/common/include/serde.hpp +69 -20
  24. data/vendor/datasketches-cpp/common/test/CMakeLists.txt +1 -1
  25. data/vendor/datasketches-cpp/common/test/optional_test.cpp +85 -0
  26. data/vendor/datasketches-cpp/common/test/test_allocator.hpp +14 -14
  27. data/vendor/datasketches-cpp/count/include/count_min.hpp +132 -78
  28. data/vendor/datasketches-cpp/count/include/count_min_impl.hpp +132 -152
  29. data/vendor/datasketches-cpp/count/test/CMakeLists.txt +11 -12
  30. data/vendor/datasketches-cpp/count/test/count_min_allocation_test.cpp +61 -61
  31. data/vendor/datasketches-cpp/count/test/count_min_test.cpp +175 -178
  32. data/vendor/datasketches-cpp/cpc/include/cpc_common.hpp +14 -20
  33. data/vendor/datasketches-cpp/cpc/include/cpc_compressor.hpp +7 -4
  34. data/vendor/datasketches-cpp/cpc/include/cpc_compressor_impl.hpp +17 -17
  35. data/vendor/datasketches-cpp/cpc/include/cpc_sketch.hpp +40 -40
  36. data/vendor/datasketches-cpp/cpc/include/cpc_sketch_impl.hpp +13 -10
  37. data/vendor/datasketches-cpp/cpc/include/cpc_union.hpp +35 -11
  38. data/vendor/datasketches-cpp/cpc/include/cpc_union_impl.hpp +8 -8
  39. data/vendor/datasketches-cpp/cpc/include/u32_table.hpp +3 -2
  40. data/vendor/datasketches-cpp/cpc/include/u32_table_impl.hpp +5 -5
  41. data/vendor/datasketches-cpp/cpc/test/CMakeLists.txt +20 -7
  42. data/vendor/datasketches-cpp/cpc/test/cpc_sketch_deserialize_from_java_test.cpp +60 -0
  43. data/vendor/datasketches-cpp/{python/include/py_object_lt.hpp → cpc/test/cpc_sketch_serialize_for_java.cpp} +15 -14
  44. data/vendor/datasketches-cpp/cpc/test/cpc_sketch_test.cpp +4 -29
  45. data/vendor/datasketches-cpp/cpc/test/cpc_union_test.cpp +4 -4
  46. data/vendor/datasketches-cpp/density/include/density_sketch.hpp +29 -9
  47. data/vendor/datasketches-cpp/density/include/density_sketch_impl.hpp +1 -1
  48. data/vendor/datasketches-cpp/density/test/CMakeLists.txt +0 -1
  49. data/vendor/datasketches-cpp/fi/include/frequent_items_sketch.hpp +21 -9
  50. data/vendor/datasketches-cpp/fi/include/frequent_items_sketch_impl.hpp +6 -4
  51. data/vendor/datasketches-cpp/fi/test/CMakeLists.txt +14 -1
  52. data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_deserialize_from_java_test.cpp +95 -0
  53. data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_serialize_for_java.cpp +83 -0
  54. data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_test.cpp +3 -42
  55. data/vendor/datasketches-cpp/hll/include/CouponList-internal.hpp +2 -2
  56. data/vendor/datasketches-cpp/hll/include/CouponList.hpp +3 -1
  57. data/vendor/datasketches-cpp/hll/include/HllArray-internal.hpp +3 -3
  58. data/vendor/datasketches-cpp/hll/include/HllArray.hpp +5 -3
  59. data/vendor/datasketches-cpp/hll/include/HllSketch-internal.hpp +4 -4
  60. data/vendor/datasketches-cpp/hll/include/HllSketchImpl.hpp +3 -1
  61. data/vendor/datasketches-cpp/hll/include/HllUtil.hpp +0 -12
  62. data/vendor/datasketches-cpp/hll/include/hll.hpp +70 -57
  63. data/vendor/datasketches-cpp/hll/test/CMakeLists.txt +14 -1
  64. data/vendor/datasketches-cpp/hll/test/ToFromByteArrayTest.cpp +0 -68
  65. data/vendor/datasketches-cpp/hll/test/hll_sketch_deserialize_from_java_test.cpp +69 -0
  66. data/vendor/datasketches-cpp/hll/test/hll_sketch_serialize_for_java.cpp +52 -0
  67. data/vendor/datasketches-cpp/kll/include/kll_helper_impl.hpp +6 -4
  68. data/vendor/datasketches-cpp/kll/include/kll_sketch.hpp +71 -50
  69. data/vendor/datasketches-cpp/kll/include/kll_sketch_impl.hpp +59 -130
  70. data/vendor/datasketches-cpp/kll/test/CMakeLists.txt +14 -1
  71. data/vendor/datasketches-cpp/kll/test/kll_sketch_deserialize_from_java_test.cpp +103 -0
  72. data/vendor/datasketches-cpp/kll/test/kll_sketch_serialize_for_java.cpp +62 -0
  73. data/vendor/datasketches-cpp/kll/test/kll_sketch_test.cpp +3 -38
  74. data/vendor/datasketches-cpp/quantiles/include/quantiles_sketch.hpp +68 -51
  75. data/vendor/datasketches-cpp/quantiles/include/quantiles_sketch_impl.hpp +62 -132
  76. data/vendor/datasketches-cpp/quantiles/test/CMakeLists.txt +14 -1
  77. data/vendor/datasketches-cpp/quantiles/test/quantiles_sketch_deserialize_from_java_test.cpp +84 -0
  78. data/vendor/datasketches-cpp/quantiles/test/quantiles_sketch_serialize_for_java.cpp +52 -0
  79. data/vendor/datasketches-cpp/quantiles/test/quantiles_sketch_test.cpp +14 -38
  80. data/vendor/datasketches-cpp/req/include/req_common.hpp +7 -3
  81. data/vendor/datasketches-cpp/req/include/req_compactor_impl.hpp +2 -2
  82. data/vendor/datasketches-cpp/req/include/req_sketch.hpp +97 -23
  83. data/vendor/datasketches-cpp/req/include/req_sketch_impl.hpp +48 -109
  84. data/vendor/datasketches-cpp/req/test/CMakeLists.txt +14 -1
  85. data/vendor/datasketches-cpp/req/test/req_sketch_deserialize_from_java_test.cpp +55 -0
  86. data/vendor/datasketches-cpp/{tuple/include/array_of_doubles_intersection_impl.hpp → req/test/req_sketch_serialize_for_java.cpp} +12 -7
  87. data/vendor/datasketches-cpp/req/test/req_sketch_test.cpp +3 -89
  88. data/vendor/datasketches-cpp/sampling/CMakeLists.txt +4 -0
  89. data/vendor/datasketches-cpp/sampling/include/ebpps_sample.hpp +210 -0
  90. data/vendor/datasketches-cpp/sampling/include/ebpps_sample_impl.hpp +539 -0
  91. data/vendor/datasketches-cpp/sampling/include/ebpps_sketch.hpp +281 -0
  92. data/vendor/datasketches-cpp/sampling/include/ebpps_sketch_impl.hpp +531 -0
  93. data/vendor/datasketches-cpp/sampling/include/var_opt_sketch.hpp +69 -26
  94. data/vendor/datasketches-cpp/sampling/include/var_opt_sketch_impl.hpp +3 -3
  95. data/vendor/datasketches-cpp/sampling/include/var_opt_union.hpp +10 -11
  96. data/vendor/datasketches-cpp/sampling/include/var_opt_union_impl.hpp +4 -4
  97. data/vendor/datasketches-cpp/sampling/test/CMakeLists.txt +55 -8
  98. data/vendor/datasketches-cpp/sampling/test/ebpps_allocation_test.cpp +96 -0
  99. data/vendor/datasketches-cpp/sampling/test/ebpps_sample_test.cpp +137 -0
  100. data/vendor/datasketches-cpp/sampling/test/ebpps_sketch_test.cpp +266 -0
  101. data/vendor/datasketches-cpp/sampling/test/var_opt_sketch_deserialize_from_java_test.cpp +81 -0
  102. data/vendor/datasketches-cpp/sampling/test/var_opt_sketch_serialize_for_java.cpp +54 -0
  103. data/vendor/datasketches-cpp/sampling/test/var_opt_sketch_test.cpp +0 -37
  104. data/vendor/datasketches-cpp/sampling/test/var_opt_union_deserialize_from_java_test.cpp +50 -0
  105. data/vendor/datasketches-cpp/sampling/test/var_opt_union_serialize_for_java.cpp +56 -0
  106. data/vendor/datasketches-cpp/sampling/test/var_opt_union_test.cpp +0 -18
  107. data/vendor/datasketches-cpp/theta/include/bit_packing.hpp +2608 -2608
  108. data/vendor/datasketches-cpp/theta/include/bounds_on_ratios_in_sampled_sets.hpp +1 -0
  109. data/vendor/datasketches-cpp/theta/include/bounds_on_ratios_in_theta_sketched_sets.hpp +7 -6
  110. data/vendor/datasketches-cpp/theta/include/theta_a_not_b.hpp +20 -5
  111. data/vendor/datasketches-cpp/theta/include/theta_constants.hpp +10 -4
  112. data/vendor/datasketches-cpp/theta/include/theta_helpers.hpp +1 -1
  113. data/vendor/datasketches-cpp/theta/include/theta_intersection.hpp +13 -5
  114. data/vendor/datasketches-cpp/theta/include/theta_intersection_base_impl.hpp +5 -5
  115. data/vendor/datasketches-cpp/theta/include/theta_intersection_impl.hpp +3 -3
  116. data/vendor/datasketches-cpp/theta/include/theta_jaccard_similarity.hpp +2 -1
  117. data/vendor/datasketches-cpp/theta/include/theta_jaccard_similarity_base.hpp +1 -0
  118. data/vendor/datasketches-cpp/theta/include/theta_set_difference_base_impl.hpp +1 -1
  119. data/vendor/datasketches-cpp/theta/include/theta_sketch.hpp +126 -27
  120. data/vendor/datasketches-cpp/theta/include/theta_sketch_impl.hpp +8 -8
  121. data/vendor/datasketches-cpp/theta/include/theta_union.hpp +17 -10
  122. data/vendor/datasketches-cpp/theta/include/theta_union_base_impl.hpp +1 -1
  123. data/vendor/datasketches-cpp/theta/include/theta_union_impl.hpp +3 -3
  124. data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base.hpp +5 -2
  125. data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base_impl.hpp +11 -1
  126. data/vendor/datasketches-cpp/theta/test/CMakeLists.txt +14 -1
  127. data/vendor/datasketches-cpp/theta/test/theta_sketch_deserialize_from_java_test.cpp +57 -0
  128. data/vendor/datasketches-cpp/theta/test/theta_sketch_serialize_for_java.cpp +61 -0
  129. data/vendor/datasketches-cpp/theta/test/theta_sketch_test.cpp +0 -188
  130. data/vendor/datasketches-cpp/tuple/CMakeLists.txt +8 -7
  131. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_sketch.hpp +19 -144
  132. data/vendor/datasketches-cpp/tuple/include/{array_of_doubles_a_not_b.hpp → array_tuple_a_not_b.hpp} +24 -16
  133. data/vendor/datasketches-cpp/tuple/include/{array_of_doubles_a_not_b_impl.hpp → array_tuple_a_not_b_impl.hpp} +4 -4
  134. data/vendor/datasketches-cpp/tuple/include/array_tuple_intersection.hpp +65 -0
  135. data/vendor/datasketches-cpp/{python/include/py_object_ostream.hpp → tuple/include/array_tuple_intersection_impl.hpp} +7 -24
  136. data/vendor/datasketches-cpp/tuple/include/array_tuple_sketch.hpp +237 -0
  137. data/vendor/datasketches-cpp/tuple/include/{array_of_doubles_sketch_impl.hpp → array_tuple_sketch_impl.hpp} +40 -41
  138. data/vendor/datasketches-cpp/tuple/include/array_tuple_union.hpp +81 -0
  139. data/vendor/datasketches-cpp/tuple/include/array_tuple_union_impl.hpp +43 -0
  140. data/vendor/datasketches-cpp/tuple/include/tuple_a_not_b.hpp +11 -2
  141. data/vendor/datasketches-cpp/tuple/include/tuple_intersection.hpp +17 -10
  142. data/vendor/datasketches-cpp/tuple/include/tuple_jaccard_similarity.hpp +2 -1
  143. data/vendor/datasketches-cpp/tuple/include/tuple_sketch.hpp +95 -32
  144. data/vendor/datasketches-cpp/tuple/include/tuple_union.hpp +19 -11
  145. data/vendor/datasketches-cpp/tuple/test/CMakeLists.txt +16 -1
  146. data/vendor/datasketches-cpp/tuple/test/aod_sketch_deserialize_from_java_test.cpp +76 -0
  147. data/vendor/datasketches-cpp/tuple/test/aod_sketch_serialize_for_java.cpp +62 -0
  148. data/vendor/datasketches-cpp/tuple/test/array_of_doubles_sketch_test.cpp +5 -129
  149. data/vendor/datasketches-cpp/tuple/test/engagement_test.cpp +85 -89
  150. data/vendor/datasketches-cpp/tuple/test/tuple_jaccard_similarity_test.cpp +3 -1
  151. data/vendor/datasketches-cpp/tuple/test/tuple_sketch_deserialize_from_java_test.cpp +47 -0
  152. data/vendor/datasketches-cpp/tuple/test/tuple_sketch_serialize_for_java.cpp +38 -0
  153. data/vendor/datasketches-cpp/tuple/test/tuple_sketch_test.cpp +1 -1
  154. data/vendor/datasketches-cpp/version.cfg.in +1 -1
  155. metadata +47 -93
  156. data/vendor/datasketches-cpp/MANIFEST.in +0 -39
  157. data/vendor/datasketches-cpp/fi/test/items_sketch_string_from_java.sk +0 -0
  158. data/vendor/datasketches-cpp/fi/test/items_sketch_string_utf8_from_java.sk +0 -0
  159. data/vendor/datasketches-cpp/fi/test/longs_sketch_from_java.sk +0 -0
  160. data/vendor/datasketches-cpp/hll/test/array6_from_java.sk +0 -0
  161. data/vendor/datasketches-cpp/hll/test/compact_array4_from_java.sk +0 -0
  162. data/vendor/datasketches-cpp/hll/test/compact_set_from_java.sk +0 -0
  163. data/vendor/datasketches-cpp/hll/test/list_from_java.sk +0 -0
  164. data/vendor/datasketches-cpp/hll/test/updatable_array4_from_java.sk +0 -0
  165. data/vendor/datasketches-cpp/hll/test/updatable_set_from_java.sk +0 -0
  166. data/vendor/datasketches-cpp/kll/test/kll_sketch_from_java.sk +0 -0
  167. data/vendor/datasketches-cpp/pyproject.toml +0 -23
  168. data/vendor/datasketches-cpp/python/CMakeLists.txt +0 -87
  169. data/vendor/datasketches-cpp/python/README.md +0 -85
  170. data/vendor/datasketches-cpp/python/datasketches/DensityWrapper.py +0 -87
  171. data/vendor/datasketches-cpp/python/datasketches/KernelFunction.py +0 -35
  172. data/vendor/datasketches-cpp/python/datasketches/PySerDe.py +0 -110
  173. data/vendor/datasketches-cpp/python/datasketches/TuplePolicy.py +0 -77
  174. data/vendor/datasketches-cpp/python/datasketches/TupleWrapper.py +0 -205
  175. data/vendor/datasketches-cpp/python/datasketches/__init__.py +0 -38
  176. data/vendor/datasketches-cpp/python/include/kernel_function.hpp +0 -98
  177. data/vendor/datasketches-cpp/python/include/py_serde.hpp +0 -113
  178. data/vendor/datasketches-cpp/python/include/quantile_conditional.hpp +0 -104
  179. data/vendor/datasketches-cpp/python/include/tuple_policy.hpp +0 -136
  180. data/vendor/datasketches-cpp/python/jupyter/CPCSketch.ipynb +0 -345
  181. data/vendor/datasketches-cpp/python/jupyter/FrequentItemsSketch.ipynb +0 -354
  182. data/vendor/datasketches-cpp/python/jupyter/HLLSketch.ipynb +0 -346
  183. data/vendor/datasketches-cpp/python/jupyter/KLLSketch.ipynb +0 -463
  184. data/vendor/datasketches-cpp/python/jupyter/ThetaSketchNotebook.ipynb +0 -403
  185. data/vendor/datasketches-cpp/python/pybind11Path.cmd +0 -21
  186. data/vendor/datasketches-cpp/python/src/__init__.py +0 -18
  187. data/vendor/datasketches-cpp/python/src/count_wrapper.cpp +0 -101
  188. data/vendor/datasketches-cpp/python/src/cpc_wrapper.cpp +0 -76
  189. data/vendor/datasketches-cpp/python/src/datasketches.cpp +0 -58
  190. data/vendor/datasketches-cpp/python/src/density_wrapper.cpp +0 -95
  191. data/vendor/datasketches-cpp/python/src/fi_wrapper.cpp +0 -182
  192. data/vendor/datasketches-cpp/python/src/hll_wrapper.cpp +0 -126
  193. data/vendor/datasketches-cpp/python/src/kll_wrapper.cpp +0 -158
  194. data/vendor/datasketches-cpp/python/src/ks_wrapper.cpp +0 -68
  195. data/vendor/datasketches-cpp/python/src/py_serde.cpp +0 -112
  196. data/vendor/datasketches-cpp/python/src/quantiles_wrapper.cpp +0 -155
  197. data/vendor/datasketches-cpp/python/src/req_wrapper.cpp +0 -154
  198. data/vendor/datasketches-cpp/python/src/theta_wrapper.cpp +0 -166
  199. data/vendor/datasketches-cpp/python/src/tuple_wrapper.cpp +0 -215
  200. data/vendor/datasketches-cpp/python/src/vector_of_kll.cpp +0 -490
  201. data/vendor/datasketches-cpp/python/src/vo_wrapper.cpp +0 -173
  202. data/vendor/datasketches-cpp/python/tests/__init__.py +0 -16
  203. data/vendor/datasketches-cpp/python/tests/count_min_test.py +0 -86
  204. data/vendor/datasketches-cpp/python/tests/cpc_test.py +0 -64
  205. data/vendor/datasketches-cpp/python/tests/density_test.py +0 -93
  206. data/vendor/datasketches-cpp/python/tests/fi_test.py +0 -149
  207. data/vendor/datasketches-cpp/python/tests/hll_test.py +0 -129
  208. data/vendor/datasketches-cpp/python/tests/kll_test.py +0 -159
  209. data/vendor/datasketches-cpp/python/tests/quantiles_test.py +0 -160
  210. data/vendor/datasketches-cpp/python/tests/req_test.py +0 -159
  211. data/vendor/datasketches-cpp/python/tests/theta_test.py +0 -148
  212. data/vendor/datasketches-cpp/python/tests/tuple_test.py +0 -206
  213. data/vendor/datasketches-cpp/python/tests/vector_of_kll_test.py +0 -148
  214. data/vendor/datasketches-cpp/python/tests/vo_test.py +0 -132
  215. data/vendor/datasketches-cpp/req/test/req_float_empty_from_java.sk +0 -0
  216. data/vendor/datasketches-cpp/req/test/req_float_estimation_from_java.sk +0 -0
  217. data/vendor/datasketches-cpp/req/test/req_float_exact_from_java.sk +0 -0
  218. data/vendor/datasketches-cpp/req/test/req_float_raw_items_from_java.sk +0 -0
  219. data/vendor/datasketches-cpp/req/test/req_float_single_item_from_java.sk +0 -0
  220. data/vendor/datasketches-cpp/sampling/test/binaries_from_java.txt +0 -67
  221. data/vendor/datasketches-cpp/sampling/test/varopt_sketch_long_sampling.sk +0 -0
  222. data/vendor/datasketches-cpp/sampling/test/varopt_sketch_string_exact.sk +0 -0
  223. data/vendor/datasketches-cpp/sampling/test/varopt_union_double_sampling.sk +0 -0
  224. data/vendor/datasketches-cpp/setup.py +0 -110
  225. data/vendor/datasketches-cpp/theta/test/theta_compact_empty_from_java.sk +0 -0
  226. data/vendor/datasketches-cpp/theta/test/theta_compact_estimation_from_java.sk +0 -0
  227. data/vendor/datasketches-cpp/theta/test/theta_compact_exact_from_java.sk +0 -0
  228. data/vendor/datasketches-cpp/theta/test/theta_compact_single_item_from_java.sk +0 -0
  229. data/vendor/datasketches-cpp/tox.ini +0 -26
  230. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_intersection.hpp +0 -52
  231. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_union.hpp +0 -81
  232. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_union_impl.hpp +0 -43
  233. data/vendor/datasketches-cpp/tuple/test/aod_1_compact_empty_from_java.sk +0 -1
  234. data/vendor/datasketches-cpp/tuple/test/aod_1_compact_estimation_from_java.sk +0 -0
  235. data/vendor/datasketches-cpp/tuple/test/aod_1_compact_non_empty_no_entries_from_java.sk +0 -0
  236. data/vendor/datasketches-cpp/tuple/test/aod_2_compact_exact_from_java.sk +0 -0
  237. data/vendor/datasketches-cpp/tuple/test/aod_3_compact_empty_from_java.sk +0 -1
@@ -1,403 +0,0 @@
1
- {
2
- "cells": [
3
- {
4
- "cell_type": "markdown",
5
- "metadata": {},
6
- "source": [
7
- "## Theta Sketch Examples"
8
- ]
9
- },
10
- {
11
- "cell_type": "markdown",
12
- "metadata": {},
13
- "source": [
14
- "### Basic Sketch Usage"
15
- ]
16
- },
17
- {
18
- "cell_type": "code",
19
- "execution_count": 1,
20
- "metadata": {},
21
- "outputs": [],
22
- "source": [
23
- "from datasketches import theta_sketch, update_theta_sketch, compact_theta_sketch\n",
24
- "from datasketches import theta_union, theta_intersection, theta_a_not_b"
25
- ]
26
- },
27
- {
28
- "cell_type": "markdown",
29
- "metadata": {},
30
- "source": [
31
- "To start, we'll create a sketch with 1 million points in order to demonstrate basic sketch operations."
32
- ]
33
- },
34
- {
35
- "cell_type": "code",
36
- "execution_count": 2,
37
- "metadata": {},
38
- "outputs": [
39
- {
40
- "name": "stdout",
41
- "output_type": "stream",
42
- "text": [
43
- "### Theta sketch summary:\n",
44
- " num retained entries : 6560\n",
45
- " seed hash : 37836\n",
46
- " empty? : false\n",
47
- " ordered? : false\n",
48
- " estimation mode? : true\n",
49
- " theta (fraction) : 0.00654224\n",
50
- " theta (raw 64-bit) : 60341508738660257\n",
51
- " estimate : 1.00271e+06\n",
52
- " lower bound 95% conf : 978261\n",
53
- " upper bound 95% conf : 1.02778e+06\n",
54
- " lg nominal size : 12\n",
55
- " lg current size : 13\n",
56
- " resize factor : 8\n",
57
- "### End sketch summary\n",
58
- "\n"
59
- ]
60
- }
61
- ],
62
- "source": [
63
- "n = 1000000\n",
64
- "k = 12\n",
65
- "sk1 = update_theta_sketch(k)\n",
66
- "for i in range(0, n):\n",
67
- " sk1.update(i)\n",
68
- "print(sk1)"
69
- ]
70
- },
71
- {
72
- "cell_type": "markdown",
73
- "metadata": {},
74
- "source": [
75
- "The summary contains most data fo interest, but we can also query for specific information. And in this case, since we know the exact number of distinct items presented ot the sketch, we can look at the estimate, upper, and lower bounds as a percentage of the exact value."
76
- ]
77
- },
78
- {
79
- "cell_type": "code",
80
- "execution_count": 3,
81
- "metadata": {},
82
- "outputs": [
83
- {
84
- "name": "stdout",
85
- "output_type": "stream",
86
- "text": [
87
- "Upper bound (1 std. dev) as % of true value:\t 101.5208\n",
88
- "Sketch estimate as % of true value:\t\t 100.2715\n",
89
- "Lower bound (1 std. dev) as % of true value:\t 99.0374\n"
90
- ]
91
- }
92
- ],
93
- "source": [
94
- "print(\"Upper bound (1 std. dev) as % of true value:\\t\", round(100*sk1.get_upper_bound(1) / n, 4))\n",
95
- "print(\"Sketch estimate as % of true value:\\t\\t\", round(100*sk1.get_estimate() / n, 4))\n",
96
- "print(\"Lower bound (1 std. dev) as % of true value:\\t\", round(100*sk1.get_lower_bound(1) / n, 4))"
97
- ]
98
- },
99
- {
100
- "cell_type": "markdown",
101
- "metadata": {},
102
- "source": [
103
- "We can serialize and reconstruct the sketch. Serialization necessarily produces a compact sketch, meaning the sketch can be deserialized and queried or used for further unions or set operations but can not be updated directly."
104
- ]
105
- },
106
- {
107
- "cell_type": "code",
108
- "execution_count": 4,
109
- "metadata": {},
110
- "outputs": [
111
- {
112
- "data": {
113
- "text/plain": [
114
- "52504"
115
- ]
116
- },
117
- "execution_count": 4,
118
- "metadata": {},
119
- "output_type": "execute_result"
120
- }
121
- ],
122
- "source": [
123
- "sk1_bytes = sk1.compact().serialize()\n",
124
- "len(sk1_bytes)"
125
- ]
126
- },
127
- {
128
- "cell_type": "code",
129
- "execution_count": 5,
130
- "metadata": {},
131
- "outputs": [
132
- {
133
- "name": "stdout",
134
- "output_type": "stream",
135
- "text": [
136
- "Estimate: \t\t 1002714.745231455\n",
137
- "Estimation mode: \t True\n"
138
- ]
139
- }
140
- ],
141
- "source": [
142
- "new_sk1 = compact_theta_sketch.deserialize(sk1_bytes)\n",
143
- "print(\"Estimate: \\t\\t\", new_sk1.get_estimate())\n",
144
- "print(\"Estimation mode: \\t\", new_sk1.is_estimation_mode())"
145
- ]
146
- },
147
- {
148
- "cell_type": "markdown",
149
- "metadata": {},
150
- "source": [
151
- "### Sketch Unions"
152
- ]
153
- },
154
- {
155
- "cell_type": "markdown",
156
- "metadata": {},
157
- "source": [
158
- "Theta Sketch unions make use of a separate union object. The union will accept input sketches with different values of $k$.\n",
159
- "\n",
160
- "For this example, we will create a sketch with distinct values that partially overlap those in `sk1`."
161
- ]
162
- },
163
- {
164
- "cell_type": "code",
165
- "execution_count": 6,
166
- "metadata": {},
167
- "outputs": [
168
- {
169
- "name": "stdout",
170
- "output_type": "stream",
171
- "text": [
172
- "### Theta sketch summary:\n",
173
- " num retained entries : 12488\n",
174
- " seed hash : 37836\n",
175
- " empty? : false\n",
176
- " ordered? : false\n",
177
- " estimation mode? : true\n",
178
- " theta (fraction) : 0.0123336\n",
179
- " theta (raw 64-bit) : 113757656857900725\n",
180
- " estimate : 1.01252e+06\n",
181
- " lower bound 95% conf : 994626\n",
182
- " upper bound 95% conf : 1.03073e+06\n",
183
- " lg nominal size : 13\n",
184
- " lg current size : 14\n",
185
- " resize factor : 8\n",
186
- "### End sketch summary\n",
187
- "\n"
188
- ]
189
- }
190
- ],
191
- "source": [
192
- "offset = int(3 * n / 4)\n",
193
- "sk2 = update_theta_sketch(k+1)\n",
194
- "for i in range(0, n):\n",
195
- " sk2.update(i + offset)\n",
196
- "print(sk2)"
197
- ]
198
- },
199
- {
200
- "cell_type": "markdown",
201
- "metadata": {},
202
- "source": [
203
- "We can now feed the sketches into the union. As constructed, the exact number of unique values presented to the two sketches is $\\frac{7}{4}n$."
204
- ]
205
- },
206
- {
207
- "cell_type": "code",
208
- "execution_count": null,
209
- "metadata": {},
210
- "outputs": [],
211
- "source": []
212
- },
213
- {
214
- "cell_type": "code",
215
- "execution_count": 7,
216
- "metadata": {},
217
- "outputs": [
218
- {
219
- "name": "stdout",
220
- "output_type": "stream",
221
- "text": [
222
- "Union estimate as % of true value: 99.6787\n"
223
- ]
224
- }
225
- ],
226
- "source": [
227
- "union = theta_union(k)\n",
228
- "union.update(sk1)\n",
229
- "union.update(sk2)\n",
230
- "result = union.get_result()\n",
231
- "print(\"Union estimate as % of true value: \", round(100*result.get_estimate()/(1.75*n), 4))"
232
- ]
233
- },
234
- {
235
- "cell_type": "markdown",
236
- "metadata": {},
237
- "source": [
238
- "### Sketch Intersections"
239
- ]
240
- },
241
- {
242
- "cell_type": "markdown",
243
- "metadata": {},
244
- "source": [
245
- "Beyond unions, theta sketches also support intersctions through the use of an intersection object. These set intersections can have vastly superior error bounds than the classic inclusion-exclusion rule used with sketches like HLL."
246
- ]
247
- },
248
- {
249
- "cell_type": "code",
250
- "execution_count": 8,
251
- "metadata": {},
252
- "outputs": [
253
- {
254
- "name": "stdout",
255
- "output_type": "stream",
256
- "text": [
257
- "Has result: True\n",
258
- "### Theta sketch summary:\n",
259
- " num retained entries : 1668\n",
260
- " seed hash : 37836\n",
261
- " empty? : false\n",
262
- " ordered? : true\n",
263
- " estimation mode? : true\n",
264
- " theta (fraction) : 0.00654224\n",
265
- " theta (raw 64-bit) : 60341508738660257\n",
266
- " estimate : 254959\n",
267
- " lower bound 95% conf : 242739\n",
268
- " upper bound 95% conf : 267789\n",
269
- "### End sketch summary\n",
270
- "\n"
271
- ]
272
- }
273
- ],
274
- "source": [
275
- "intersection = theta_intersection()\n",
276
- "intersection.update(sk1)\n",
277
- "intersection.update(sk2)\n",
278
- "print(\"Has result: \", intersection.has_result())\n",
279
- "result = intersection.get_result()\n",
280
- "print(result)"
281
- ]
282
- },
283
- {
284
- "cell_type": "markdown",
285
- "metadata": {},
286
- "source": [
287
- "In this case, we expect the sets to have an overlap of $\\frac{1}{4}n$."
288
- ]
289
- },
290
- {
291
- "cell_type": "code",
292
- "execution_count": 9,
293
- "metadata": {},
294
- "outputs": [
295
- {
296
- "name": "stdout",
297
- "output_type": "stream",
298
- "text": [
299
- "Intersection estimate as % of true value: 101.9834\n"
300
- ]
301
- }
302
- ],
303
- "source": [
304
- "print(\"Intersection estimate as % of true value: \", round(100*result.get_estimate()/(0.25*n), 4))"
305
- ]
306
- },
307
- {
308
- "cell_type": "markdown",
309
- "metadata": {},
310
- "source": [
311
- "### Set Subtraction (A-not-B)"
312
- ]
313
- },
314
- {
315
- "cell_type": "markdown",
316
- "metadata": {},
317
- "source": [
318
- "Finally, we have the set subtraction operation. Unlike `theta_union` and `theta_intersection`, `theta_a_not_b` always takes as input 2 sketches at a time, namely $a$ and $b$, and directly returns the result as a sketch."
319
- ]
320
- },
321
- {
322
- "cell_type": "code",
323
- "execution_count": 10,
324
- "metadata": {},
325
- "outputs": [
326
- {
327
- "name": "stdout",
328
- "output_type": "stream",
329
- "text": [
330
- "### Theta sketch summary:\n",
331
- " num retained entries : 4892\n",
332
- " seed hash : 37836\n",
333
- " empty? : false\n",
334
- " ordered? : true\n",
335
- " estimation mode? : true\n",
336
- " theta (fraction) : 0.00654224\n",
337
- " theta (raw 64-bit) : 60341508738660257\n",
338
- " estimate : 747756\n",
339
- " lower bound 95% conf : 726670\n",
340
- " upper bound 95% conf : 769452\n",
341
- "### End sketch summary\n",
342
- "\n"
343
- ]
344
- }
345
- ],
346
- "source": [
347
- "anb = theta_a_not_b()\n",
348
- "result = anb.compute(sk1, sk2)\n",
349
- "print(result)"
350
- ]
351
- },
352
- {
353
- "cell_type": "markdown",
354
- "metadata": {},
355
- "source": [
356
- "By using the same two sketches as before, the expected result here is $\\frac{3}{4}n$."
357
- ]
358
- },
359
- {
360
- "cell_type": "code",
361
- "execution_count": 11,
362
- "metadata": {},
363
- "outputs": [
364
- {
365
- "name": "stdout",
366
- "output_type": "stream",
367
- "text": [
368
- "A-not-B estimate as % of true value: 99.7008\n"
369
- ]
370
- }
371
- ],
372
- "source": [
373
- "print(\"A-not-B estimate as % of true value: \", round(100*result.get_estimate()/(0.75*n), 4))"
374
- ]
375
- }
376
- ],
377
- "metadata": {
378
- "kernelspec": {
379
- "display_name": "Python 3.10.6 64-bit",
380
- "language": "python",
381
- "name": "python3"
382
- },
383
- "language_info": {
384
- "codemirror_mode": {
385
- "name": "ipython",
386
- "version": 3
387
- },
388
- "file_extension": ".py",
389
- "mimetype": "text/x-python",
390
- "name": "python",
391
- "nbconvert_exporter": "python",
392
- "pygments_lexer": "ipython3",
393
- "version": "3.10.6"
394
- },
395
- "vscode": {
396
- "interpreter": {
397
- "hash": "b0fa6594d8f4cbf19f97940f81e996739fb7646882a419484c72d19e05852a7e"
398
- }
399
- }
400
- },
401
- "nbformat": 4,
402
- "nbformat_minor": 2
403
- }
@@ -1,21 +0,0 @@
1
- :: Licensed to the Apache Software Foundation (ASF) under one
2
- :: or more contributor license agreements. See the NOTICE file
3
- :: distributed with this work for additional information
4
- :: regarding copyright ownership. The ASF licenses this file
5
- :: to you under the Apache License, Version 2.0 (the
6
- :: "License"); you may not use this file except in compliance
7
- :: with the License. You may obtain a copy of the License at
8
- ::
9
- :: http://www.apache.org/licenses/LICENSE-2.0
10
- ::
11
- :: Unless required by applicable law or agreed to in writing,
12
- :: software distributed under the License is distributed on an
13
- :: "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
- :: KIND, either express or implied. See the License for the
15
- :: specific language governing permissions and limitations
16
- :: under the License.
17
-
18
-
19
- @echo off
20
- :: Takes path to the Python interpreter and returns the path to pybind11
21
- %1 -c "import pybind11,sys;sys.stdout.write(pybind11.get_cmake_dir())"
@@ -1,18 +0,0 @@
1
- # Licensed to the Apache Software Foundation (ASF) under one
2
- # or more contributor license agreements. See the NOTICE file
3
- # distributed with this work for additional information
4
- # regarding copyright ownership. The ASF licenses this file
5
- # to you under the Apache License, Version 2.0 (the
6
- # "License"); you may not use this file except in compliance
7
- # with the License. You may obtain a copy of the License at
8
- #
9
- # http://www.apache.org/licenses/LICENSE-2.0
10
- #
11
- # Unless required by applicable law or agreed to in writing,
12
- # software distributed under the License is distributed on an
13
- # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
- # KIND, either express or implied. See the License for the
15
- # specific language governing permissions and limitations
16
- # under the License.
17
-
18
- name = "datasketches"
@@ -1,101 +0,0 @@
1
- /*
2
- * Licensed to the Apache Software Foundation (ASF) under one
3
- * or more contributor license agreements. See the NOTICE file
4
- * distributed with this work for additional information
5
- * regarding copyright ownership. The ASF licenses this file
6
- * to you under the Apache License, Version 2.0 (the
7
- * "License"); you may not use this file except in compliance
8
- * with the License. You may obtain a copy of the License at
9
- *
10
- * http://www.apache.org/licenses/LICENSE-2.0
11
- *
12
- * Unless required by applicable law or agreed to in writing,
13
- * software distributed under the License is distributed on an
14
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15
- * KIND, either express or implied. See the License for the
16
- * specific language governing permissions and limitations
17
- * under the License.
18
- */
19
-
20
- #include <pybind11/pybind11.h>
21
-
22
- #include "count_min.hpp"
23
- #include "common_defs.hpp"
24
-
25
- namespace py = pybind11;
26
-
27
- template<typename W>
28
- void bind_count_min_sketch(py::module &m, const char* name) {
29
- using namespace datasketches;
30
-
31
- py::class_<count_min_sketch<W>>(m, name)
32
- .def(py::init<uint8_t, uint32_t, uint64_t>(), py::arg("num_hashes"), py::arg("num_buckets"), py::arg("seed")=DEFAULT_SEED)
33
- .def(py::init<const count_min_sketch<W>&>())
34
- .def_static("suggest_num_buckets", &count_min_sketch<W>::suggest_num_buckets, py::arg("relative_error"),
35
- "Suggests the number of buckets needed to achieve an accuracy within the provided "
36
- "relative_error. For example, when relative_error = 0.05, the returned frequency estimates "
37
- "satisfy the 'relative_error' guarantee that never overestimates the weights but may "
38
- "underestimate the weights by 5% of the total weight in the sketch. "
39
- "Returns the number of hash buckets at every level of the sketch required in order to obtain "
40
- "the specified relative error.")
41
- .def_static("suggest_num_hashes", &count_min_sketch<W>::suggest_num_hashes, py::arg("confidence"),
42
- "Suggests the number of hashes needed to achieve the provided confidence. For example, "
43
- "with 95% confidence, frequency estimates satisfy the 'relative_error' guarantee. "
44
- "Returns the number of hash functions that are required in order to achieve the specified "
45
- "confidence of the sketch. confidence = 1 - delta, with delta denoting the sketch failure probability.")
46
- .def("__str__", &count_min_sketch<W>::to_string,
47
- "Produces a string summary of the sketch")
48
- .def("to_string", &count_min_sketch<W>::to_string,
49
- "Produces a string summary of the sketch")
50
- .def("is_empty", &count_min_sketch<W>::is_empty,
51
- "Returns True if the sketch has seen no items, otherwise False")
52
- .def("get_num_hashes", &count_min_sketch<W>::get_num_hashes,
53
- "Returns the configured number of hashes for the sketch")
54
- .def("get_num_buckets", &count_min_sketch<W>::get_num_buckets,
55
- "Returns the configured number of buckets for the sketch")
56
- .def("get_seed", &count_min_sketch<W>::get_seed,
57
- "Returns the base hash seed for the sketch")
58
- .def("get_relative_error", &count_min_sketch<W>::get_relative_error,
59
- "Returns the maximum permissible error for any frequency estimate query")
60
- .def("get_total_weight", &count_min_sketch<W>::get_total_weight,
61
- "Returns the total weight currently inserted into the stream")
62
- .def("update", static_cast<void (count_min_sketch<W>::*)(int64_t, W)>(&count_min_sketch<W>::update), py::arg("item"), py::arg("weight")=1.0,
63
- "Updates the sketch with the given 64-bit integer value")
64
- .def("update", static_cast<void (count_min_sketch<W>::*)(const std::string&, W)>(&count_min_sketch<W>::update), py::arg("item"), py::arg("weight")=1.0,
65
- "Updates the sketch with the given string")
66
- .def("get_estimate", static_cast<W (count_min_sketch<W>::*)(int64_t) const>(&count_min_sketch<W>::get_estimate), py::arg("item"),
67
- "Returns an estimate of the frequency of the provided 64-bit integer value")
68
- .def("get_estimate", static_cast<W (count_min_sketch<W>::*)(const std::string&) const>(&count_min_sketch<W>::get_estimate), py::arg("item"),
69
- "Returns an estimate of the frequency of the provided string")
70
- .def("get_upper_bound", static_cast<W (count_min_sketch<W>::*)(int64_t) const>(&count_min_sketch<W>::get_upper_bound), py::arg("item"),
71
- "Returns an upper bound on the estimate for the given 64-bit integer value")
72
- .def("get_upper_bound", static_cast<W (count_min_sketch<W>::*)(const std::string&) const>(&count_min_sketch<W>::get_upper_bound), py::arg("item"),
73
- "Returns an upper bound on the estimate for the provided string")
74
- .def("get_lower_bound", static_cast<W (count_min_sketch<W>::*)(int64_t) const>(&count_min_sketch<W>::get_lower_bound), py::arg("item"),
75
- "Returns an lower bound on the estimate for the given 64-bit integer value")
76
- .def("get_lower_bound", static_cast<W (count_min_sketch<W>::*)(const std::string&) const>(&count_min_sketch<W>::get_lower_bound), py::arg("item"),
77
- "Returns an lower bound on the estimate for the provided string")
78
- .def("merge", &count_min_sketch<W>::merge, py::arg("other"),
79
- "Merges the provided other sketch into this one")
80
- .def("get_serialized_size_bytes", &count_min_sketch<W>::get_serialized_size_bytes,
81
- "Returns the size in bytes of the serialized image of the sketch")
82
- .def(
83
- "serialize",
84
- [](const count_min_sketch<W>& sk) {
85
- auto bytes = sk.serialize();
86
- return py::bytes(reinterpret_cast<const char*>(bytes.data()), bytes.size());
87
- },
88
- "Serializes the sketch into a bytes object"
89
- )
90
- .def_static(
91
- "deserialize",
92
- [](const std::string& bytes) { return count_min_sketch<W>::deserialize(bytes.data(), bytes.size()); },
93
- py::arg("bytes"),
94
- "Reads a bytes object and returns the corresponding count_min_sketch"
95
- );
96
- }
97
-
98
- void init_count_min(py::module &m) {
99
- bind_count_min_sketch<double>(m, "count_min_sketch");
100
- }
101
-
@@ -1,76 +0,0 @@
1
- /*
2
- * Licensed to the Apache Software Foundation (ASF) under one
3
- * or more contributor license agreements. See the NOTICE file
4
- * distributed with this work for additional information
5
- * regarding copyright ownership. The ASF licenses this file
6
- * to you under the Apache License, Version 2.0 (the
7
- * "License"); you may not use this file except in compliance
8
- * with the License. You may obtain a copy of the License at
9
- *
10
- * http://www.apache.org/licenses/LICENSE-2.0
11
- *
12
- * Unless required by applicable law or agreed to in writing,
13
- * software distributed under the License is distributed on an
14
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15
- * KIND, either express or implied. See the License for the
16
- * specific language governing permissions and limitations
17
- * under the License.
18
- */
19
-
20
- #include <pybind11/pybind11.h>
21
-
22
- #include "cpc_sketch.hpp"
23
- #include "cpc_union.hpp"
24
- #include "cpc_common.hpp"
25
- #include "common_defs.hpp"
26
-
27
- namespace py = pybind11;
28
-
29
- void init_cpc(py::module &m) {
30
- using namespace datasketches;
31
-
32
- py::class_<cpc_sketch>(m, "cpc_sketch")
33
- .def(py::init<uint8_t, uint64_t>(), py::arg("lg_k")=cpc_constants::DEFAULT_LG_K, py::arg("seed")=DEFAULT_SEED)
34
- .def(py::init<const cpc_sketch&>())
35
- .def("__str__", &cpc_sketch::to_string,
36
- "Produces a string summary of the sketch")
37
- .def("to_string", &cpc_sketch::to_string,
38
- "Produces a string summary of the sketch")
39
- .def<void (cpc_sketch::*)(uint64_t)>("update", &cpc_sketch::update, py::arg("datum"),
40
- "Updates the sketch with the given 64-bit integer value")
41
- .def<void (cpc_sketch::*)(double)>("update", &cpc_sketch::update, py::arg("datum"),
42
- "Updates the sketch with the given 64-bit floating point")
43
- .def<void (cpc_sketch::*)(const std::string&)>("update", &cpc_sketch::update, py::arg("datum"),
44
- "Updates the sketch with the given string")
45
- .def("is_empty", &cpc_sketch::is_empty,
46
- "Returns True if the sketch is empty, otherwise False")
47
- .def("get_estimate", &cpc_sketch::get_estimate,
48
- "Estimate of the distinct count of the input stream")
49
- .def("get_lower_bound", &cpc_sketch::get_lower_bound, py::arg("kappa"),
50
- "Returns an approximate lower bound on the estimate for kappa values in {1, 2, 3}, roughly corresponding to standard deviations")
51
- .def("get_upper_bound", &cpc_sketch::get_upper_bound, py::arg("kappa"),
52
- "Returns an approximate upper bound on the estimate for kappa values in {1, 2, 3}, roughly corresponding to standard deviations")
53
- .def(
54
- "serialize",
55
- [](const cpc_sketch& sk) {
56
- auto bytes = sk.serialize();
57
- return py::bytes(reinterpret_cast<const char*>(bytes.data()), bytes.size());
58
- },
59
- "Serializes the sketch into a bytes object"
60
- )
61
- .def_static(
62
- "deserialize",
63
- [](const std::string& bytes) { return cpc_sketch::deserialize(bytes.data(), bytes.size()); },
64
- py::arg("bytes"),
65
- "Reads a bytes object and returns the corresponding cpc_sketch"
66
- );
67
-
68
- py::class_<cpc_union>(m, "cpc_union")
69
- .def(py::init<uint8_t, uint64_t>(), py::arg("lg_k"), py::arg("seed")=DEFAULT_SEED)
70
- .def(py::init<const cpc_union&>())
71
- .def("update", (void (cpc_union::*)(const cpc_sketch&)) &cpc_union::update, py::arg("sketch"),
72
- "Updates the union with the provided CPC sketch")
73
- .def("get_result", &cpc_union::get_result,
74
- "Returns a CPC sketch with the result of the union")
75
- ;
76
- }
@@ -1,58 +0,0 @@
1
- /*
2
- * Licensed to the Apache Software Foundation (ASF) under one
3
- * or more contributor license agreements. See the NOTICE file
4
- * distributed with this work for additional information
5
- * regarding copyright ownership. The ASF licenses this file
6
- * to you under the Apache License, Version 2.0 (the
7
- * "License"); you may not use this file except in compliance
8
- * with the License. You may obtain a copy of the License at
9
- *
10
- * http://www.apache.org/licenses/LICENSE-2.0
11
- *
12
- * Unless required by applicable law or agreed to in writing,
13
- * software distributed under the License is distributed on an
14
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15
- * KIND, either express or implied. See the License for the
16
- * specific language governing permissions and limitations
17
- * under the License.
18
- */
19
-
20
- #include <pybind11/pybind11.h>
21
-
22
- namespace py = pybind11;
23
-
24
- // sketches
25
- void init_hll(py::module& m);
26
- void init_kll(py::module& m);
27
- void init_fi(py::module& m);
28
- void init_cpc(py::module& m);
29
- void init_theta(py::module& m);
30
- void init_tuple(py::module& m);
31
- void init_vo(py::module& m);
32
- void init_req(py::module& m);
33
- void init_quantiles(py::module& m);
34
- void init_count_min(py::module& m);
35
- void init_density(py::module& m);
36
- void init_vector_of_kll(py::module& m);
37
-
38
- // supporting objects
39
- void init_kolmogorov_smirnov(py::module& m);
40
- void init_serde(py::module& m);
41
-
42
- PYBIND11_MODULE(_datasketches, m) {
43
- init_hll(m);
44
- init_kll(m);
45
- init_fi(m);
46
- init_cpc(m);
47
- init_theta(m);
48
- init_tuple(m);
49
- init_vo(m);
50
- init_req(m);
51
- init_quantiles(m);
52
- init_count_min(m);
53
- init_density(m);
54
- init_vector_of_kll(m);
55
-
56
- init_kolmogorov_smirnov(m);
57
- init_serde(m);
58
- }