datasketches 0.3.2 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (237) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +6 -0
  3. data/NOTICE +1 -1
  4. data/README.md +0 -2
  5. data/ext/datasketches/cpc_wrapper.cpp +2 -2
  6. data/ext/datasketches/kll_wrapper.cpp +0 -10
  7. data/lib/datasketches/version.rb +1 -1
  8. data/lib/datasketches.rb +1 -1
  9. data/vendor/datasketches-cpp/CMakeLists.txt +1 -0
  10. data/vendor/datasketches-cpp/CODE_OF_CONDUCT.md +3 -0
  11. data/vendor/datasketches-cpp/CONTRIBUTING.md +50 -0
  12. data/vendor/datasketches-cpp/Doxyfile +2827 -0
  13. data/vendor/datasketches-cpp/LICENSE +0 -76
  14. data/vendor/datasketches-cpp/NOTICE +1 -1
  15. data/vendor/datasketches-cpp/README.md +1 -3
  16. data/vendor/datasketches-cpp/common/CMakeLists.txt +12 -11
  17. data/vendor/datasketches-cpp/common/include/common_defs.hpp +11 -8
  18. data/vendor/datasketches-cpp/common/include/count_zeros.hpp +0 -2
  19. data/vendor/datasketches-cpp/common/include/kolmogorov_smirnov.hpp +9 -6
  20. data/vendor/datasketches-cpp/common/include/optional.hpp +148 -0
  21. data/vendor/datasketches-cpp/common/include/quantiles_sorted_view.hpp +95 -2
  22. data/vendor/datasketches-cpp/common/include/quantiles_sorted_view_impl.hpp +1 -1
  23. data/vendor/datasketches-cpp/common/include/serde.hpp +69 -20
  24. data/vendor/datasketches-cpp/common/test/CMakeLists.txt +1 -1
  25. data/vendor/datasketches-cpp/common/test/optional_test.cpp +85 -0
  26. data/vendor/datasketches-cpp/common/test/test_allocator.hpp +14 -14
  27. data/vendor/datasketches-cpp/count/include/count_min.hpp +132 -78
  28. data/vendor/datasketches-cpp/count/include/count_min_impl.hpp +132 -152
  29. data/vendor/datasketches-cpp/count/test/CMakeLists.txt +11 -12
  30. data/vendor/datasketches-cpp/count/test/count_min_allocation_test.cpp +61 -61
  31. data/vendor/datasketches-cpp/count/test/count_min_test.cpp +175 -178
  32. data/vendor/datasketches-cpp/cpc/include/cpc_common.hpp +14 -20
  33. data/vendor/datasketches-cpp/cpc/include/cpc_compressor.hpp +7 -4
  34. data/vendor/datasketches-cpp/cpc/include/cpc_compressor_impl.hpp +17 -17
  35. data/vendor/datasketches-cpp/cpc/include/cpc_sketch.hpp +40 -40
  36. data/vendor/datasketches-cpp/cpc/include/cpc_sketch_impl.hpp +13 -10
  37. data/vendor/datasketches-cpp/cpc/include/cpc_union.hpp +35 -11
  38. data/vendor/datasketches-cpp/cpc/include/cpc_union_impl.hpp +8 -8
  39. data/vendor/datasketches-cpp/cpc/include/u32_table.hpp +3 -2
  40. data/vendor/datasketches-cpp/cpc/include/u32_table_impl.hpp +5 -5
  41. data/vendor/datasketches-cpp/cpc/test/CMakeLists.txt +20 -7
  42. data/vendor/datasketches-cpp/cpc/test/cpc_sketch_deserialize_from_java_test.cpp +60 -0
  43. data/vendor/datasketches-cpp/{python/include/py_object_lt.hpp → cpc/test/cpc_sketch_serialize_for_java.cpp} +15 -14
  44. data/vendor/datasketches-cpp/cpc/test/cpc_sketch_test.cpp +4 -29
  45. data/vendor/datasketches-cpp/cpc/test/cpc_union_test.cpp +4 -4
  46. data/vendor/datasketches-cpp/density/include/density_sketch.hpp +29 -9
  47. data/vendor/datasketches-cpp/density/include/density_sketch_impl.hpp +1 -1
  48. data/vendor/datasketches-cpp/density/test/CMakeLists.txt +0 -1
  49. data/vendor/datasketches-cpp/fi/include/frequent_items_sketch.hpp +21 -9
  50. data/vendor/datasketches-cpp/fi/include/frequent_items_sketch_impl.hpp +6 -4
  51. data/vendor/datasketches-cpp/fi/test/CMakeLists.txt +14 -1
  52. data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_deserialize_from_java_test.cpp +95 -0
  53. data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_serialize_for_java.cpp +83 -0
  54. data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_test.cpp +3 -42
  55. data/vendor/datasketches-cpp/hll/include/CouponList-internal.hpp +2 -2
  56. data/vendor/datasketches-cpp/hll/include/CouponList.hpp +3 -1
  57. data/vendor/datasketches-cpp/hll/include/HllArray-internal.hpp +3 -3
  58. data/vendor/datasketches-cpp/hll/include/HllArray.hpp +5 -3
  59. data/vendor/datasketches-cpp/hll/include/HllSketch-internal.hpp +4 -4
  60. data/vendor/datasketches-cpp/hll/include/HllSketchImpl.hpp +3 -1
  61. data/vendor/datasketches-cpp/hll/include/HllUtil.hpp +0 -12
  62. data/vendor/datasketches-cpp/hll/include/hll.hpp +70 -57
  63. data/vendor/datasketches-cpp/hll/test/CMakeLists.txt +14 -1
  64. data/vendor/datasketches-cpp/hll/test/ToFromByteArrayTest.cpp +0 -68
  65. data/vendor/datasketches-cpp/hll/test/hll_sketch_deserialize_from_java_test.cpp +69 -0
  66. data/vendor/datasketches-cpp/hll/test/hll_sketch_serialize_for_java.cpp +52 -0
  67. data/vendor/datasketches-cpp/kll/include/kll_helper_impl.hpp +2 -2
  68. data/vendor/datasketches-cpp/kll/include/kll_sketch.hpp +71 -50
  69. data/vendor/datasketches-cpp/kll/include/kll_sketch_impl.hpp +59 -130
  70. data/vendor/datasketches-cpp/kll/test/CMakeLists.txt +14 -1
  71. data/vendor/datasketches-cpp/kll/test/kll_sketch_deserialize_from_java_test.cpp +103 -0
  72. data/vendor/datasketches-cpp/kll/test/kll_sketch_serialize_for_java.cpp +62 -0
  73. data/vendor/datasketches-cpp/kll/test/kll_sketch_test.cpp +3 -38
  74. data/vendor/datasketches-cpp/quantiles/include/quantiles_sketch.hpp +68 -51
  75. data/vendor/datasketches-cpp/quantiles/include/quantiles_sketch_impl.hpp +62 -132
  76. data/vendor/datasketches-cpp/quantiles/test/CMakeLists.txt +14 -1
  77. data/vendor/datasketches-cpp/quantiles/test/quantiles_sketch_deserialize_from_java_test.cpp +84 -0
  78. data/vendor/datasketches-cpp/quantiles/test/quantiles_sketch_serialize_for_java.cpp +52 -0
  79. data/vendor/datasketches-cpp/quantiles/test/quantiles_sketch_test.cpp +14 -38
  80. data/vendor/datasketches-cpp/req/include/req_common.hpp +7 -3
  81. data/vendor/datasketches-cpp/req/include/req_compactor_impl.hpp +2 -2
  82. data/vendor/datasketches-cpp/req/include/req_sketch.hpp +97 -23
  83. data/vendor/datasketches-cpp/req/include/req_sketch_impl.hpp +48 -109
  84. data/vendor/datasketches-cpp/req/test/CMakeLists.txt +14 -1
  85. data/vendor/datasketches-cpp/req/test/req_sketch_deserialize_from_java_test.cpp +55 -0
  86. data/vendor/datasketches-cpp/{tuple/include/array_of_doubles_intersection_impl.hpp → req/test/req_sketch_serialize_for_java.cpp} +12 -7
  87. data/vendor/datasketches-cpp/req/test/req_sketch_test.cpp +3 -89
  88. data/vendor/datasketches-cpp/sampling/CMakeLists.txt +4 -0
  89. data/vendor/datasketches-cpp/sampling/include/ebpps_sample.hpp +210 -0
  90. data/vendor/datasketches-cpp/sampling/include/ebpps_sample_impl.hpp +535 -0
  91. data/vendor/datasketches-cpp/sampling/include/ebpps_sketch.hpp +281 -0
  92. data/vendor/datasketches-cpp/sampling/include/ebpps_sketch_impl.hpp +531 -0
  93. data/vendor/datasketches-cpp/sampling/include/var_opt_sketch.hpp +69 -26
  94. data/vendor/datasketches-cpp/sampling/include/var_opt_sketch_impl.hpp +3 -3
  95. data/vendor/datasketches-cpp/sampling/include/var_opt_union.hpp +10 -11
  96. data/vendor/datasketches-cpp/sampling/include/var_opt_union_impl.hpp +4 -4
  97. data/vendor/datasketches-cpp/sampling/test/CMakeLists.txt +55 -8
  98. data/vendor/datasketches-cpp/sampling/test/ebpps_allocation_test.cpp +96 -0
  99. data/vendor/datasketches-cpp/sampling/test/ebpps_sample_test.cpp +137 -0
  100. data/vendor/datasketches-cpp/sampling/test/ebpps_sketch_test.cpp +266 -0
  101. data/vendor/datasketches-cpp/sampling/test/var_opt_sketch_deserialize_from_java_test.cpp +81 -0
  102. data/vendor/datasketches-cpp/sampling/test/var_opt_sketch_serialize_for_java.cpp +54 -0
  103. data/vendor/datasketches-cpp/sampling/test/var_opt_sketch_test.cpp +0 -37
  104. data/vendor/datasketches-cpp/sampling/test/var_opt_union_deserialize_from_java_test.cpp +50 -0
  105. data/vendor/datasketches-cpp/sampling/test/var_opt_union_serialize_for_java.cpp +56 -0
  106. data/vendor/datasketches-cpp/sampling/test/var_opt_union_test.cpp +0 -18
  107. data/vendor/datasketches-cpp/theta/include/bit_packing.hpp +2608 -2608
  108. data/vendor/datasketches-cpp/theta/include/bounds_on_ratios_in_sampled_sets.hpp +1 -0
  109. data/vendor/datasketches-cpp/theta/include/bounds_on_ratios_in_theta_sketched_sets.hpp +7 -6
  110. data/vendor/datasketches-cpp/theta/include/theta_a_not_b.hpp +20 -5
  111. data/vendor/datasketches-cpp/theta/include/theta_constants.hpp +10 -4
  112. data/vendor/datasketches-cpp/theta/include/theta_helpers.hpp +1 -1
  113. data/vendor/datasketches-cpp/theta/include/theta_intersection.hpp +13 -5
  114. data/vendor/datasketches-cpp/theta/include/theta_intersection_base_impl.hpp +5 -5
  115. data/vendor/datasketches-cpp/theta/include/theta_intersection_impl.hpp +3 -3
  116. data/vendor/datasketches-cpp/theta/include/theta_jaccard_similarity.hpp +2 -1
  117. data/vendor/datasketches-cpp/theta/include/theta_jaccard_similarity_base.hpp +1 -0
  118. data/vendor/datasketches-cpp/theta/include/theta_set_difference_base_impl.hpp +1 -1
  119. data/vendor/datasketches-cpp/theta/include/theta_sketch.hpp +126 -27
  120. data/vendor/datasketches-cpp/theta/include/theta_sketch_impl.hpp +8 -8
  121. data/vendor/datasketches-cpp/theta/include/theta_union.hpp +17 -10
  122. data/vendor/datasketches-cpp/theta/include/theta_union_base_impl.hpp +1 -1
  123. data/vendor/datasketches-cpp/theta/include/theta_union_impl.hpp +3 -3
  124. data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base.hpp +5 -2
  125. data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base_impl.hpp +11 -1
  126. data/vendor/datasketches-cpp/theta/test/CMakeLists.txt +14 -1
  127. data/vendor/datasketches-cpp/theta/test/theta_sketch_deserialize_from_java_test.cpp +57 -0
  128. data/vendor/datasketches-cpp/theta/test/theta_sketch_serialize_for_java.cpp +61 -0
  129. data/vendor/datasketches-cpp/theta/test/theta_sketch_test.cpp +0 -188
  130. data/vendor/datasketches-cpp/tuple/CMakeLists.txt +8 -7
  131. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_sketch.hpp +19 -144
  132. data/vendor/datasketches-cpp/tuple/include/{array_of_doubles_a_not_b.hpp → array_tuple_a_not_b.hpp} +24 -16
  133. data/vendor/datasketches-cpp/tuple/include/{array_of_doubles_a_not_b_impl.hpp → array_tuple_a_not_b_impl.hpp} +4 -4
  134. data/vendor/datasketches-cpp/tuple/include/array_tuple_intersection.hpp +65 -0
  135. data/vendor/datasketches-cpp/{python/include/py_object_ostream.hpp → tuple/include/array_tuple_intersection_impl.hpp} +7 -24
  136. data/vendor/datasketches-cpp/tuple/include/array_tuple_sketch.hpp +237 -0
  137. data/vendor/datasketches-cpp/tuple/include/{array_of_doubles_sketch_impl.hpp → array_tuple_sketch_impl.hpp} +40 -41
  138. data/vendor/datasketches-cpp/tuple/include/array_tuple_union.hpp +81 -0
  139. data/vendor/datasketches-cpp/tuple/include/array_tuple_union_impl.hpp +43 -0
  140. data/vendor/datasketches-cpp/tuple/include/tuple_a_not_b.hpp +11 -2
  141. data/vendor/datasketches-cpp/tuple/include/tuple_intersection.hpp +17 -10
  142. data/vendor/datasketches-cpp/tuple/include/tuple_jaccard_similarity.hpp +2 -1
  143. data/vendor/datasketches-cpp/tuple/include/tuple_sketch.hpp +95 -32
  144. data/vendor/datasketches-cpp/tuple/include/tuple_union.hpp +19 -11
  145. data/vendor/datasketches-cpp/tuple/test/CMakeLists.txt +16 -1
  146. data/vendor/datasketches-cpp/tuple/test/aod_sketch_deserialize_from_java_test.cpp +76 -0
  147. data/vendor/datasketches-cpp/tuple/test/aod_sketch_serialize_for_java.cpp +62 -0
  148. data/vendor/datasketches-cpp/tuple/test/array_of_doubles_sketch_test.cpp +5 -129
  149. data/vendor/datasketches-cpp/tuple/test/engagement_test.cpp +85 -89
  150. data/vendor/datasketches-cpp/tuple/test/tuple_jaccard_similarity_test.cpp +3 -1
  151. data/vendor/datasketches-cpp/tuple/test/tuple_sketch_deserialize_from_java_test.cpp +47 -0
  152. data/vendor/datasketches-cpp/tuple/test/tuple_sketch_serialize_for_java.cpp +38 -0
  153. data/vendor/datasketches-cpp/tuple/test/tuple_sketch_test.cpp +1 -1
  154. data/vendor/datasketches-cpp/version.cfg.in +1 -1
  155. metadata +47 -93
  156. data/vendor/datasketches-cpp/MANIFEST.in +0 -39
  157. data/vendor/datasketches-cpp/fi/test/items_sketch_string_from_java.sk +0 -0
  158. data/vendor/datasketches-cpp/fi/test/items_sketch_string_utf8_from_java.sk +0 -0
  159. data/vendor/datasketches-cpp/fi/test/longs_sketch_from_java.sk +0 -0
  160. data/vendor/datasketches-cpp/hll/test/array6_from_java.sk +0 -0
  161. data/vendor/datasketches-cpp/hll/test/compact_array4_from_java.sk +0 -0
  162. data/vendor/datasketches-cpp/hll/test/compact_set_from_java.sk +0 -0
  163. data/vendor/datasketches-cpp/hll/test/list_from_java.sk +0 -0
  164. data/vendor/datasketches-cpp/hll/test/updatable_array4_from_java.sk +0 -0
  165. data/vendor/datasketches-cpp/hll/test/updatable_set_from_java.sk +0 -0
  166. data/vendor/datasketches-cpp/kll/test/kll_sketch_from_java.sk +0 -0
  167. data/vendor/datasketches-cpp/pyproject.toml +0 -23
  168. data/vendor/datasketches-cpp/python/CMakeLists.txt +0 -87
  169. data/vendor/datasketches-cpp/python/README.md +0 -85
  170. data/vendor/datasketches-cpp/python/datasketches/DensityWrapper.py +0 -87
  171. data/vendor/datasketches-cpp/python/datasketches/KernelFunction.py +0 -35
  172. data/vendor/datasketches-cpp/python/datasketches/PySerDe.py +0 -110
  173. data/vendor/datasketches-cpp/python/datasketches/TuplePolicy.py +0 -77
  174. data/vendor/datasketches-cpp/python/datasketches/TupleWrapper.py +0 -205
  175. data/vendor/datasketches-cpp/python/datasketches/__init__.py +0 -38
  176. data/vendor/datasketches-cpp/python/include/kernel_function.hpp +0 -98
  177. data/vendor/datasketches-cpp/python/include/py_serde.hpp +0 -113
  178. data/vendor/datasketches-cpp/python/include/quantile_conditional.hpp +0 -104
  179. data/vendor/datasketches-cpp/python/include/tuple_policy.hpp +0 -136
  180. data/vendor/datasketches-cpp/python/jupyter/CPCSketch.ipynb +0 -345
  181. data/vendor/datasketches-cpp/python/jupyter/FrequentItemsSketch.ipynb +0 -354
  182. data/vendor/datasketches-cpp/python/jupyter/HLLSketch.ipynb +0 -346
  183. data/vendor/datasketches-cpp/python/jupyter/KLLSketch.ipynb +0 -463
  184. data/vendor/datasketches-cpp/python/jupyter/ThetaSketchNotebook.ipynb +0 -403
  185. data/vendor/datasketches-cpp/python/pybind11Path.cmd +0 -21
  186. data/vendor/datasketches-cpp/python/src/__init__.py +0 -18
  187. data/vendor/datasketches-cpp/python/src/count_wrapper.cpp +0 -101
  188. data/vendor/datasketches-cpp/python/src/cpc_wrapper.cpp +0 -76
  189. data/vendor/datasketches-cpp/python/src/datasketches.cpp +0 -58
  190. data/vendor/datasketches-cpp/python/src/density_wrapper.cpp +0 -95
  191. data/vendor/datasketches-cpp/python/src/fi_wrapper.cpp +0 -182
  192. data/vendor/datasketches-cpp/python/src/hll_wrapper.cpp +0 -126
  193. data/vendor/datasketches-cpp/python/src/kll_wrapper.cpp +0 -158
  194. data/vendor/datasketches-cpp/python/src/ks_wrapper.cpp +0 -68
  195. data/vendor/datasketches-cpp/python/src/py_serde.cpp +0 -112
  196. data/vendor/datasketches-cpp/python/src/quantiles_wrapper.cpp +0 -155
  197. data/vendor/datasketches-cpp/python/src/req_wrapper.cpp +0 -154
  198. data/vendor/datasketches-cpp/python/src/theta_wrapper.cpp +0 -166
  199. data/vendor/datasketches-cpp/python/src/tuple_wrapper.cpp +0 -215
  200. data/vendor/datasketches-cpp/python/src/vector_of_kll.cpp +0 -490
  201. data/vendor/datasketches-cpp/python/src/vo_wrapper.cpp +0 -173
  202. data/vendor/datasketches-cpp/python/tests/__init__.py +0 -16
  203. data/vendor/datasketches-cpp/python/tests/count_min_test.py +0 -86
  204. data/vendor/datasketches-cpp/python/tests/cpc_test.py +0 -64
  205. data/vendor/datasketches-cpp/python/tests/density_test.py +0 -93
  206. data/vendor/datasketches-cpp/python/tests/fi_test.py +0 -149
  207. data/vendor/datasketches-cpp/python/tests/hll_test.py +0 -129
  208. data/vendor/datasketches-cpp/python/tests/kll_test.py +0 -159
  209. data/vendor/datasketches-cpp/python/tests/quantiles_test.py +0 -160
  210. data/vendor/datasketches-cpp/python/tests/req_test.py +0 -159
  211. data/vendor/datasketches-cpp/python/tests/theta_test.py +0 -148
  212. data/vendor/datasketches-cpp/python/tests/tuple_test.py +0 -206
  213. data/vendor/datasketches-cpp/python/tests/vector_of_kll_test.py +0 -148
  214. data/vendor/datasketches-cpp/python/tests/vo_test.py +0 -132
  215. data/vendor/datasketches-cpp/req/test/req_float_empty_from_java.sk +0 -0
  216. data/vendor/datasketches-cpp/req/test/req_float_estimation_from_java.sk +0 -0
  217. data/vendor/datasketches-cpp/req/test/req_float_exact_from_java.sk +0 -0
  218. data/vendor/datasketches-cpp/req/test/req_float_raw_items_from_java.sk +0 -0
  219. data/vendor/datasketches-cpp/req/test/req_float_single_item_from_java.sk +0 -0
  220. data/vendor/datasketches-cpp/sampling/test/binaries_from_java.txt +0 -67
  221. data/vendor/datasketches-cpp/sampling/test/varopt_sketch_long_sampling.sk +0 -0
  222. data/vendor/datasketches-cpp/sampling/test/varopt_sketch_string_exact.sk +0 -0
  223. data/vendor/datasketches-cpp/sampling/test/varopt_union_double_sampling.sk +0 -0
  224. data/vendor/datasketches-cpp/setup.py +0 -110
  225. data/vendor/datasketches-cpp/theta/test/theta_compact_empty_from_java.sk +0 -0
  226. data/vendor/datasketches-cpp/theta/test/theta_compact_estimation_from_java.sk +0 -0
  227. data/vendor/datasketches-cpp/theta/test/theta_compact_exact_from_java.sk +0 -0
  228. data/vendor/datasketches-cpp/theta/test/theta_compact_single_item_from_java.sk +0 -0
  229. data/vendor/datasketches-cpp/tox.ini +0 -26
  230. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_intersection.hpp +0 -52
  231. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_union.hpp +0 -81
  232. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_union_impl.hpp +0 -43
  233. data/vendor/datasketches-cpp/tuple/test/aod_1_compact_empty_from_java.sk +0 -1
  234. data/vendor/datasketches-cpp/tuple/test/aod_1_compact_estimation_from_java.sk +0 -0
  235. data/vendor/datasketches-cpp/tuple/test/aod_1_compact_non_empty_no_entries_from_java.sk +0 -0
  236. data/vendor/datasketches-cpp/tuple/test/aod_2_compact_exact_from_java.sk +0 -0
  237. data/vendor/datasketches-cpp/tuple/test/aod_3_compact_empty_from_java.sk +0 -1
@@ -29,6 +29,7 @@
29
29
  namespace datasketches {
30
30
 
31
31
  /**
32
+ * Bounds on ratios in sampled sets.
32
33
  * This class is used to compute the bounds on the estimate of the ratio <i>|B| / |A|</i>, where:
33
34
  * <ul>
34
35
  * <li><i>|A|</i> is the unknown size of a set <i>A</i> of unique identifiers.</li>
@@ -28,6 +28,7 @@
28
28
  namespace datasketches {
29
29
 
30
30
  /**
31
+ * Bounds on ratios in Theta sketched sets.
31
32
  * This is to compute the bounds on the estimate of the ratio <i>B / A</i>, where:
32
33
  * <ul>
33
34
  * <li><i>A</i> is a Theta Sketch of population <i>PopA</i>.</li>
@@ -50,8 +51,8 @@ class bounds_on_ratios_in_theta_sketched_sets {
50
51
  public:
51
52
  /**
52
53
  * Gets the approximate lower bound for B over A based on a 95% confidence interval
53
- * @param sketchA the sketch A
54
- * @param sketchB the sketch B
54
+ * @param sketch_a the sketch A
55
+ * @param sketch_b the sketch B
55
56
  * @return the approximate lower bound for B over A
56
57
  */
57
58
  template<typename SketchA, typename SketchB>
@@ -72,8 +73,8 @@ public:
72
73
 
73
74
  /**
74
75
  * Gets the approximate upper bound for B over A based on a 95% confidence interval
75
- * @param sketchA the sketch A
76
- * @param sketchB the sketch B
76
+ * @param sketch_a the sketch A
77
+ * @param sketch_b the sketch B
77
78
  * @return the approximate upper bound for B over A
78
79
  */
79
80
  template<typename SketchA, typename SketchB>
@@ -94,8 +95,8 @@ public:
94
95
 
95
96
  /**
96
97
  * Gets the estimate for B over A
97
- * @param sketchA the sketch A
98
- * @param sketchB the sketch B
98
+ * @param sketch_a the sketch A
99
+ * @param sketch_b the sketch B
99
100
  * @return the estimate for B over A
100
101
  */
101
102
  template<typename SketchA, typename SketchB>
@@ -25,6 +25,16 @@
25
25
 
26
26
  namespace datasketches {
27
27
 
28
+ // forward declaration
29
+ template<typename A> class theta_a_not_b_alloc;
30
+
31
+ // alias with default allocator for convenience
32
+ using theta_a_not_b = theta_a_not_b_alloc<std::allocator<uint64_t>>;
33
+
34
+ /**
35
+ * Theta A-not-B (set difference).
36
+ * Computes set difference of Theta sketches.
37
+ */
28
38
  template<typename Allocator = std::allocator<uint64_t>>
29
39
  class theta_a_not_b_alloc {
30
40
  public:
@@ -33,11 +43,19 @@ public:
33
43
  using CompactSketch = compact_theta_sketch_alloc<Allocator>;
34
44
  using State = theta_set_difference_base<Entry, ExtractKey, CompactSketch, Allocator>;
35
45
 
46
+ /**
47
+ * Constructor
48
+ * @param seed for the hash function that was used to create the sketch
49
+ * @param allocator to use for allocating and deallocating memory
50
+ */
36
51
  explicit theta_a_not_b_alloc(uint64_t seed = DEFAULT_SEED, const Allocator& allocator = Allocator());
37
52
 
38
53
  /**
39
- * Computes the a-not-b set operation given two sketches.
40
- * @return the result of a-not-b
54
+ * Computes the A-not-B set operation given two sketches.
55
+ * @param a sketch A
56
+ * @param b sketch B
57
+ * @param ordered optional flag to specify if an ordered sketch should be produced
58
+ * @return the result of A-not-B as a compact sketch
41
59
  */
42
60
  template<typename FwdSketch, typename Sketch>
43
61
  CompactSketch compute(FwdSketch&& a, const Sketch& b, bool ordered = true) const;
@@ -46,9 +64,6 @@ private:
46
64
  State state_;
47
65
  };
48
66
 
49
- // alias with default allocator for convenience
50
- using theta_a_not_b = theta_a_not_b_alloc<std::allocator<uint64_t>>;
51
-
52
67
  } /* namespace datasketches */
53
68
 
54
69
  #include "theta_a_not_b_impl.hpp"
@@ -25,15 +25,21 @@
25
25
 
26
26
  namespace datasketches {
27
27
 
28
+ /// Theta constants
28
29
  namespace theta_constants {
30
+ /// hash table resize factor
29
31
  using resize_factor = datasketches::resize_factor;
30
- //enum resize_factor { X1, X2, X4, X8 };
31
- const uint64_t MAX_THETA = LLONG_MAX; // signed max for compatibility with Java
32
+ /// default resize factor
33
+ const resize_factor DEFAULT_RESIZE_FACTOR = resize_factor::X8;
34
+
35
+ /// max theta - signed max for compatibility with Java
36
+ const uint64_t MAX_THETA = LLONG_MAX;
37
+ /// min log2 of K
32
38
  const uint8_t MIN_LG_K = 5;
39
+ /// max log2 of K
33
40
  const uint8_t MAX_LG_K = 26;
34
-
41
+ /// default log2 of K
35
42
  const uint8_t DEFAULT_LG_K = 12;
36
- const resize_factor DEFAULT_RESIZE_FACTOR = resize_factor::X8;
37
43
  }
38
44
 
39
45
  } /* namespace datasketches */
@@ -57,7 +57,7 @@ public:
57
57
  // consistent way of initializing theta from p
58
58
  // avoids multiplication if p == 1 since it might not yield MAX_THETA exactly
59
59
  static uint64_t starting_theta_from_p(float p) {
60
- if (p < 1) return static_cast<float>(theta_constants::MAX_THETA) * p;
60
+ if (p < 1) return static_cast<uint64_t>(theta_constants::MAX_THETA * p);
61
61
  return theta_constants::MAX_THETA;
62
62
  }
63
63
 
@@ -25,6 +25,16 @@
25
25
 
26
26
  namespace datasketches {
27
27
 
28
+ // forward declaration
29
+ template<typename A> class theta_intersection_alloc;
30
+
31
+ // alias with default allocator for convenience
32
+ using theta_intersection = theta_intersection_alloc<std::allocator<uint64_t>>;
33
+
34
+ /**
35
+ * Theta intersection.
36
+ * Computes intersection of Theta sketches.
37
+ */
28
38
  template<typename Allocator = std::allocator<uint64_t>>
29
39
  class theta_intersection_alloc {
30
40
  public:
@@ -33,6 +43,7 @@ public:
33
43
  using Sketch = theta_sketch_alloc<Allocator>;
34
44
  using CompactSketch = compact_theta_sketch_alloc<Allocator>;
35
45
 
46
+ // there is no payload in Theta sketch entry
36
47
  struct nop_policy {
37
48
  void operator()(uint64_t internal_entry, uint64_t incoming_entry) const {
38
49
  unused(incoming_entry);
@@ -41,7 +52,7 @@ public:
41
52
  };
42
53
  using State = theta_intersection_base<Entry, ExtractKey, nop_policy, Sketch, CompactSketch, Allocator>;
43
54
 
44
- /*
55
+ /**
45
56
  * Constructor
46
57
  * @param seed for the hash function that was used to create the sketch
47
58
  * @param allocator to use for allocating and deallocating memory
@@ -61,7 +72,7 @@ public:
61
72
  * Produces a copy of the current state of the intersection.
62
73
  * If update() was not called, the state is the infinite "universe",
63
74
  * which is considered an undefined state, and throws an exception.
64
- * @param ordered optional flag to specify if ordered sketch should be produced
75
+ * @param ordered optional flag to specify if an ordered sketch should be produced
65
76
  * @return the result of the intersection
66
77
  */
67
78
  CompactSketch get_result(bool ordered = true) const;
@@ -76,9 +87,6 @@ private:
76
87
  State state_;
77
88
  };
78
89
 
79
- // alias with default allocator for convenience
80
- using theta_intersection = theta_intersection_alloc<std::allocator<uint64_t>>;
81
-
82
90
  } /* namespace datasketches */
83
91
 
84
92
  #include "theta_intersection_impl.hpp"
@@ -49,8 +49,8 @@ void theta_intersection_base<EN, EK, P, S, CS, A>::update(SS&& sketch) {
49
49
  if (!is_valid_) { // first update, copy or move incoming sketch
50
50
  is_valid_ = true;
51
51
  const uint8_t lg_size = lg_size_from_count(sketch.get_num_retained(), theta_update_sketch_base<EN, EK, A>::REBUILD_THRESHOLD);
52
- table_ = hash_table(lg_size, lg_size, resize_factor::X1, 1, table_.theta_, table_.seed_, table_.allocator_, table_.is_empty_);
53
- for (auto& entry: sketch) {
52
+ table_ = hash_table(lg_size, lg_size - 1, resize_factor::X1, 1, table_.theta_, table_.seed_, table_.allocator_, table_.is_empty_);
53
+ for (auto&& entry: sketch) {
54
54
  auto result = table_.find(EK()(entry));
55
55
  if (result.second) {
56
56
  throw std::invalid_argument("duplicate key, possibly corrupted input sketch");
@@ -64,7 +64,7 @@ void theta_intersection_base<EN, EK, P, S, CS, A>::update(SS&& sketch) {
64
64
  matched_entries.reserve(max_matches);
65
65
  uint32_t match_count = 0;
66
66
  uint32_t count = 0;
67
- for (auto& entry: sketch) {
67
+ for (auto&& entry: sketch) {
68
68
  if (EK()(entry) < table_.theta_) {
69
69
  auto result = table_.find(EK()(entry));
70
70
  if (result.second) {
@@ -88,8 +88,8 @@ void theta_intersection_base<EN, EK, P, S, CS, A>::update(SS&& sketch) {
88
88
  if (table_.theta_ == theta_constants::MAX_THETA) table_.is_empty_ = true;
89
89
  } else {
90
90
  const uint8_t lg_size = lg_size_from_count(match_count, theta_update_sketch_base<EN, EK, A>::REBUILD_THRESHOLD);
91
- table_ = hash_table(lg_size, lg_size, resize_factor::X1, 1, table_.theta_, table_.seed_, table_.allocator_, table_.is_empty_);
92
- for (uint32_t i = 0; i < match_count; i++) {
91
+ table_ = hash_table(lg_size, lg_size - 1, resize_factor::X1, 1, table_.theta_, table_.seed_, table_.allocator_, table_.is_empty_);
92
+ for (uint32_t i = 0; i < match_count; ++i) {
93
93
  auto result = table_.find(EK()(matched_entries[i]));
94
94
  table_.insert(result.first, std::move(matched_entries[i]));
95
95
  }
@@ -28,9 +28,9 @@ state_(seed, nop_policy(), allocator)
28
28
  {}
29
29
 
30
30
  template<typename A>
31
- template<typename SS>
32
- void theta_intersection_alloc<A>::update(SS&& sketch) {
33
- state_.update(std::forward<SS>(sketch));
31
+ template<typename FwdSketch>
32
+ void theta_intersection_alloc<A>::update(FwdSketch&& sketch) {
33
+ state_.update(std::forward<FwdSketch>(sketch));
34
34
  }
35
35
 
36
36
  template<typename A>
@@ -26,10 +26,11 @@
26
26
 
27
27
  namespace datasketches {
28
28
 
29
+ /// Theta Jaccard similarity alias
29
30
  template<typename Allocator = std::allocator<uint64_t>>
30
31
  using theta_jaccard_similarity_alloc = jaccard_similarity_base<theta_union_alloc<Allocator>, theta_intersection_alloc<Allocator>, trivial_extract_key>;
31
32
 
32
- // alias with default allocator for convenience
33
+ /// Theta Jaccard similarity alias with default allocator
33
34
  using theta_jaccard_similarity = theta_jaccard_similarity_alloc<std::allocator<uint64_t>>;
34
35
 
35
36
  } /* namespace datasketches */
@@ -30,6 +30,7 @@
30
30
 
31
31
  namespace datasketches {
32
32
 
33
+ /// Base class for Jaccard similarity
33
34
  template<typename Union, typename Intersection, typename ExtractKey>
34
35
  class jaccard_similarity_base {
35
36
  public:
@@ -65,7 +65,7 @@ CS theta_set_difference_base<EN, EK, CS, A>::compute(FwdSketch&& a, const Sketch
65
65
  }
66
66
 
67
67
  // scan A lookup B
68
- for (auto& entry: a) {
68
+ for (auto&& entry: a) {
69
69
  const uint64_t hash = EK()(entry);
70
70
  if (hash < theta) {
71
71
  auto result = table.find(hash);
@@ -25,6 +25,22 @@
25
25
 
26
26
  namespace datasketches {
27
27
 
28
+ // forward declarations
29
+ template<typename A> class theta_sketch_alloc;
30
+ template<typename A> class update_theta_sketch_alloc;
31
+ template<typename A> class compact_theta_sketch_alloc;
32
+ template<typename A> class wrapped_compact_theta_sketch_alloc;
33
+
34
+ /// Theta sketch alias with default allocator
35
+ using theta_sketch = theta_sketch_alloc<std::allocator<uint64_t>>;
36
+ /// Update Theta sketch alias with default allocator
37
+ using update_theta_sketch = update_theta_sketch_alloc<std::allocator<uint64_t>>;
38
+ /// Compact Theta sketch alias with default allocator
39
+ using compact_theta_sketch = compact_theta_sketch_alloc<std::allocator<uint64_t>>;
40
+ /// Wrapped Compact Theta sketch alias with default allocator
41
+ using wrapped_compact_theta_sketch = wrapped_compact_theta_sketch_alloc<std::allocator<uint64_t>>;
42
+
43
+ /// Abstract base class for Theta sketch
28
44
  template<typename Allocator = std::allocator<uint64_t>>
29
45
  class base_theta_sketch_alloc {
30
46
  public:
@@ -106,6 +122,7 @@ protected:
106
122
  virtual void print_items(std::ostringstream& os) const = 0;
107
123
  };
108
124
 
125
+ /// Base class for the Theta Sketch, a generalization of the Kth Minimum Value (KMV) sketch.
109
126
  template<typename Allocator = std::allocator<uint64_t>>
110
127
  class theta_sketch_alloc: public base_theta_sketch_alloc<Allocator> {
111
128
  public:
@@ -149,6 +166,11 @@ protected:
149
166
  // forward declaration
150
167
  template<typename A> class compact_theta_sketch_alloc;
151
168
 
169
+ /**
170
+ * Update Theta sketch.
171
+ * The purpose of this class is to build a Theta sketch from input data via the update() methods.
172
+ * There is no constructor. Use builder instead.
173
+ */
152
174
  template<typename Allocator = std::allocator<uint64_t>>
153
175
  class update_theta_sketch_alloc: public theta_sketch_alloc<Allocator> {
154
176
  public:
@@ -163,11 +185,33 @@ public:
163
185
  // No constructor here. Use builder instead.
164
186
  class builder;
165
187
 
166
- update_theta_sketch_alloc(const update_theta_sketch_alloc&) = default;
167
- update_theta_sketch_alloc(update_theta_sketch_alloc&&) noexcept = default;
188
+ /**
189
+ * Copy constructor
190
+ * @param other sketch to be copied
191
+ */
192
+ update_theta_sketch_alloc(const update_theta_sketch_alloc& other) = default;
193
+
194
+ /**
195
+ * Move constructor
196
+ * @param other sketch to be moved
197
+ */
198
+ update_theta_sketch_alloc(update_theta_sketch_alloc&& other) noexcept = default;
199
+
168
200
  virtual ~update_theta_sketch_alloc() = default;
169
- update_theta_sketch_alloc& operator=(const update_theta_sketch_alloc&) = default;
170
- update_theta_sketch_alloc& operator=(update_theta_sketch_alloc&&) = default;
201
+
202
+ /**
203
+ * Copy assignment
204
+ * @param other sketch to be copied
205
+ * @return reference to this sketch
206
+ */
207
+ update_theta_sketch_alloc& operator=(const update_theta_sketch_alloc& other) = default;
208
+
209
+ /**
210
+ * Move assignment
211
+ * @param other sketch to be moved
212
+ * @return reference to this sketch
213
+ */
214
+ update_theta_sketch_alloc& operator=(update_theta_sketch_alloc&& other) = default;
171
215
 
172
216
  virtual Allocator get_allocator() const;
173
217
  virtual bool is_empty() const;
@@ -287,7 +331,7 @@ public:
287
331
 
288
332
  /**
289
333
  * Converts this sketch to a compact sketch (ordered or unordered).
290
- * @param ordered optional flag to specify if ordered sketch should be produced
334
+ * @param ordered optional flag to specify if an ordered sketch should be produced
291
335
  * @return compact sketch
292
336
  */
293
337
  compact_theta_sketch_alloc<Allocator> compact(bool ordered = true) const;
@@ -307,8 +351,10 @@ private:
307
351
  virtual void print_specifics(std::ostringstream& os) const;
308
352
  };
309
353
 
310
- // compact sketch
311
-
354
+ /**
355
+ * Compact Theta sketch.
356
+ * This is an immutable form of the Theta sketch, the form that can be serialized and deserialized.
357
+ */
312
358
  template<typename Allocator = std::allocator<uint64_t>>
313
359
  class compact_theta_sketch_alloc: public theta_sketch_alloc<Allocator> {
314
360
  public:
@@ -327,13 +373,42 @@ public:
327
373
  // - as a result of a set operation
328
374
  // - by deserializing a previously serialized compact sketch
329
375
 
376
+ /**
377
+ * Copy constructor.
378
+ * Constructs a compact sketch from any other type of Theta sketch
379
+ * @param other sketch to be constructed from
380
+ * @param ordered if true make the resulting sketch ordered
381
+ */
330
382
  template<typename Other>
331
383
  compact_theta_sketch_alloc(const Other& other, bool ordered);
332
- compact_theta_sketch_alloc(const compact_theta_sketch_alloc&) = default;
333
- compact_theta_sketch_alloc(compact_theta_sketch_alloc&&) noexcept = default;
384
+
385
+ /**
386
+ * Copy constructor
387
+ * @param other sketch to be copied
388
+ */
389
+ compact_theta_sketch_alloc(const compact_theta_sketch_alloc& other) = default;
390
+
391
+ /**
392
+ * Move constructor
393
+ * @param other sketch to be moved
394
+ */
395
+ compact_theta_sketch_alloc(compact_theta_sketch_alloc&& other) noexcept = default;
396
+
334
397
  virtual ~compact_theta_sketch_alloc() = default;
335
- compact_theta_sketch_alloc& operator=(const compact_theta_sketch_alloc&) = default;
336
- compact_theta_sketch_alloc& operator=(compact_theta_sketch_alloc&&) = default;
398
+
399
+ /**
400
+ * Copy assignment
401
+ * @param other sketch to be copied
402
+ * @return reference to this sketch
403
+ */
404
+ compact_theta_sketch_alloc& operator=(const compact_theta_sketch_alloc& other) = default;
405
+
406
+ /**
407
+ * Move assignment
408
+ * @param other sketch to be moved
409
+ * @return reference to this sketch
410
+ */
411
+ compact_theta_sketch_alloc& operator=(compact_theta_sketch_alloc&& other) = default;
337
412
 
338
413
  virtual Allocator get_allocator() const;
339
414
  virtual bool is_empty() const;
@@ -385,6 +460,7 @@ public:
385
460
  * This method deserializes a sketch from a given stream.
386
461
  * @param is input stream
387
462
  * @param seed the seed for the hash function that was used to create the sketch
463
+ * @param allocator instance of an Allocator
388
464
  * @return an instance of the sketch
389
465
  */
390
466
  static compact_theta_sketch_alloc deserialize(std::istream& is,
@@ -395,14 +471,12 @@ public:
395
471
  * @param bytes pointer to the array of bytes
396
472
  * @param size the size of the array
397
473
  * @param seed the seed for the hash function that was used to create the sketch
474
+ * @param allocator instance of an Allocator
398
475
  * @return an instance of the sketch
399
476
  */
400
477
  static compact_theta_sketch_alloc deserialize(const void* bytes, size_t size,
401
478
  uint64_t seed = DEFAULT_SEED, const Allocator& allocator = Allocator());
402
479
 
403
- // for internal use
404
- compact_theta_sketch_alloc(bool is_empty, bool is_ordered, uint16_t seed_hash, uint64_t theta, std::vector<uint64_t, Allocator>&& entries);
405
-
406
480
  private:
407
481
  enum flags { IS_BIG_ENDIAN, IS_READ_ONLY, IS_EMPTY, IS_COMPACT, IS_ORDERED };
408
482
 
@@ -423,20 +497,33 @@ private:
423
497
  static compact_theta_sketch_alloc deserialize_v4(uint8_t preamble_longs, std::istream& is, uint64_t seed, const Allocator& allocator);
424
498
 
425
499
  virtual void print_specifics(std::ostringstream& os) const;
500
+
501
+ template<typename E, typename EK, typename P, typename S, typename CS, typename A> friend class theta_union_base;
502
+ template<typename E, typename EK, typename P, typename S, typename CS, typename A> friend class theta_intersection_base;
503
+ template<typename E, typename EK, typename CS, typename A> friend class theta_set_difference_base;
504
+ compact_theta_sketch_alloc(bool is_empty, bool is_ordered, uint16_t seed_hash, uint64_t theta, std::vector<uint64_t, Allocator>&& entries);
426
505
  };
427
506
 
507
+ /// Update Theta sketch builder
428
508
  template<typename Allocator>
429
509
  class update_theta_sketch_alloc<Allocator>::builder: public theta_base_builder<builder, Allocator> {
430
510
  public:
511
+ /**
512
+ * Constructor
513
+ * @param allocator
514
+ */
431
515
  builder(const Allocator& allocator = Allocator());
516
+ /// @return instance of Update Theta sketch
432
517
  update_theta_sketch_alloc build() const;
433
518
  };
434
519
 
435
- // This is to wrap a buffer containing a serialized compact sketch and use it in a set operation avoiding some cost of deserialization.
436
- // It does not take the ownership of the buffer.
437
-
520
+ /**
521
+ * Wrapped Compact Theta sketch.
522
+ * This is to wrap a buffer containing a serialized compact sketch and use it in a set operation avoiding some cost of deserialization.
523
+ * It does not take the ownership of the buffer.
524
+ */
438
525
  template<typename Allocator = std::allocator<uint64_t>>
439
- class wrapped_compact_theta_sketch_alloc : public base_theta_sketch_alloc<Allocator> {
526
+ class wrapped_compact_theta_sketch_alloc: public base_theta_sketch_alloc<Allocator> {
440
527
  public:
441
528
  class const_iterator;
442
529
 
@@ -447,7 +534,17 @@ public:
447
534
  uint32_t get_num_retained() const;
448
535
  uint16_t get_seed_hash() const;
449
536
 
537
+ /**
538
+ * Const iterator over hash values in this sketch.
539
+ * @return begin iterator
540
+ */
450
541
  const_iterator begin() const;
542
+
543
+ /**
544
+ * Const iterator pointing past the valid range.
545
+ * Not to be incremented or dereferenced.
546
+ * @return end iterator
547
+ */
451
548
  const_iterator end() const;
452
549
 
453
550
  /**
@@ -455,6 +552,7 @@ public:
455
552
  * @param bytes pointer to the array of bytes
456
553
  * @param size the size of the array
457
554
  * @param seed the seed for the hash function that was used to create the sketch
555
+ * @param dump_on_error if true prints hex dump of the input
458
556
  * @return an instance of the sketch
459
557
  */
460
558
  static const wrapped_compact_theta_sketch_alloc wrap(const void* bytes, size_t size, uint64_t seed = DEFAULT_SEED, bool dump_on_error = false);
@@ -471,15 +569,22 @@ private:
471
569
  };
472
570
 
473
571
  template<typename Allocator>
474
- class wrapped_compact_theta_sketch_alloc<Allocator>::const_iterator: public std::iterator<std::input_iterator_tag, uint64_t> {
572
+ class wrapped_compact_theta_sketch_alloc<Allocator>::const_iterator {
475
573
  public:
574
+ using iterator_category = std::input_iterator_tag;
575
+ using value_type = const uint64_t;
576
+ using difference_type = void;
577
+ using pointer = value_type*;
578
+ using reference = uint64_t;
579
+
476
580
  const_iterator(const void* ptr, uint8_t entry_bits, uint32_t num_entries, uint32_t index);
477
581
  const_iterator& operator++();
478
582
  const_iterator operator++(int);
479
583
  bool operator==(const const_iterator& other) const;
480
584
  bool operator!=(const const_iterator& other) const;
481
- const uint64_t& operator*() const;
482
- const uint64_t* operator->() const;
585
+ reference operator*() const;
586
+ pointer operator->() const;
587
+
483
588
  private:
484
589
  const void* ptr_;
485
590
  uint8_t entry_bits_;
@@ -492,12 +597,6 @@ private:
492
597
  uint64_t buffer_[8];
493
598
  };
494
599
 
495
- // aliases with default allocator for convenience
496
- using theta_sketch = theta_sketch_alloc<std::allocator<uint64_t>>;
497
- using update_theta_sketch = update_theta_sketch_alloc<std::allocator<uint64_t>>;
498
- using compact_theta_sketch = compact_theta_sketch_alloc<std::allocator<uint64_t>>;
499
- using wrapped_compact_theta_sketch = wrapped_compact_theta_sketch_alloc<std::allocator<uint64_t>>;
500
-
501
600
  } /* namespace datasketches */
502
601
 
503
602
  #include "theta_sketch_impl.hpp"
@@ -357,7 +357,7 @@ void compact_theta_sketch_alloc<A>::serialize(std::ostream& os) const {
357
357
  write(os, flags_byte);
358
358
  write(os, get_seed_hash());
359
359
  if (preamble_longs > 1) {
360
- write<uint32_t>(os, entries_.size());
360
+ write(os, static_cast<uint32_t>(entries_.size()));
361
361
  write<uint32_t>(os, 0); // unused
362
362
  }
363
363
  if (this->is_estimation_mode()) write(os, this->theta_);
@@ -385,7 +385,7 @@ auto compact_theta_sketch_alloc<A>::serialize(unsigned header_size_bytes) const
385
385
  *ptr++ = flags_byte;
386
386
  ptr += copy_to_mem(get_seed_hash(), ptr);
387
387
  if (preamble_longs > 1) {
388
- ptr += copy_to_mem<uint32_t>(entries_.size(), ptr);
388
+ ptr += copy_to_mem(static_cast<uint32_t>(entries_.size()), ptr);
389
389
  ptr += sizeof(uint32_t); // unused
390
390
  }
391
391
  if (this->is_estimation_mode()) ptr += copy_to_mem(theta_, ptr);
@@ -432,7 +432,7 @@ void compact_theta_sketch_alloc<A>::serialize_version_4(std::ostream& os) const
432
432
  const uint8_t entry_bits = 64 - compute_min_leading_zeros();
433
433
 
434
434
  // store num_entries as whole bytes since whole-byte blocks will follow (most probably)
435
- const uint8_t num_entries_bytes = whole_bytes_to_hold_bits<uint8_t>(32 - count_leading_zeros_in_u32(entries_.size()));
435
+ const uint8_t num_entries_bytes = whole_bytes_to_hold_bits<uint8_t>(32 - count_leading_zeros_in_u32(static_cast<uint32_t>(entries_.size())));
436
436
 
437
437
  write(os, preamble_longs);
438
438
  write(os, COMPRESSED_SERIAL_VERSION);
@@ -447,7 +447,7 @@ void compact_theta_sketch_alloc<A>::serialize_version_4(std::ostream& os) const
447
447
  write(os, flags_byte);
448
448
  write(os, get_seed_hash());
449
449
  if (this->is_estimation_mode()) write(os, this->theta_);
450
- uint32_t num_entries = entries_.size();
450
+ uint32_t num_entries = static_cast<uint32_t>(entries_.size());
451
451
  for (unsigned i = 0; i < num_entries_bytes; ++i) {
452
452
  write<uint8_t>(os, num_entries & 0xff);
453
453
  num_entries >>= 8;
@@ -488,7 +488,7 @@ auto compact_theta_sketch_alloc<A>::serialize_version_4(unsigned header_size_byt
488
488
  const size_t compressed_bits = entry_bits * entries_.size();
489
489
 
490
490
  // store num_entries as whole bytes since whole-byte blocks will follow (most probably)
491
- const uint8_t num_entries_bytes = whole_bytes_to_hold_bits<uint8_t>(32 - count_leading_zeros_in_u32(entries_.size()));
491
+ const uint8_t num_entries_bytes = whole_bytes_to_hold_bits<uint8_t>(32 - count_leading_zeros_in_u32(static_cast<uint32_t>(entries_.size())));
492
492
 
493
493
  const size_t size = header_size_bytes + sizeof(uint64_t) * preamble_longs + num_entries_bytes
494
494
  + whole_bytes_to_hold_bits(compressed_bits);
@@ -510,7 +510,7 @@ auto compact_theta_sketch_alloc<A>::serialize_version_4(unsigned header_size_byt
510
510
  if (this->is_estimation_mode()) {
511
511
  ptr += copy_to_mem(theta_, ptr);
512
512
  }
513
- uint32_t num_entries = entries_.size();
513
+ uint32_t num_entries = static_cast<uint32_t>(entries_.size());
514
514
  for (unsigned i = 0; i < num_entries_bytes; ++i) {
515
515
  *ptr++ = num_entries & 0xff;
516
516
  num_entries >>= 8;
@@ -869,13 +869,13 @@ bool wrapped_compact_theta_sketch_alloc<Allocator>::const_iterator::operator==(c
869
869
  }
870
870
 
871
871
  template<typename Allocator>
872
- const uint64_t& wrapped_compact_theta_sketch_alloc<Allocator>::const_iterator::operator*() const {
872
+ auto wrapped_compact_theta_sketch_alloc<Allocator>::const_iterator::operator*() const -> reference {
873
873
  if (entry_bits_ == 64) return *reinterpret_cast<const uint64_t*>(ptr_);
874
874
  return buffer_[buf_i_];
875
875
  }
876
876
 
877
877
  template<typename Allocator>
878
- const uint64_t* wrapped_compact_theta_sketch_alloc<Allocator>::const_iterator::operator->() const {
878
+ auto wrapped_compact_theta_sketch_alloc<Allocator>::const_iterator::operator->() const -> pointer {
879
879
  if (entry_bits_ == 64) return reinterpret_cast<const uint64_t*>(ptr_);
880
880
  return buffer_ + buf_i_;
881
881
  }