datasketches 0.1.2 → 0.2.3

Sign up to get free protection for your applications and to get access to all the features.
Files changed (205) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +17 -0
  3. data/LICENSE +40 -3
  4. data/NOTICE +1 -1
  5. data/ext/datasketches/cpc_wrapper.cpp +12 -13
  6. data/ext/datasketches/ext.cpp +1 -1
  7. data/ext/datasketches/ext.h +4 -0
  8. data/ext/datasketches/extconf.rb +1 -1
  9. data/ext/datasketches/fi_wrapper.cpp +6 -8
  10. data/ext/datasketches/hll_wrapper.cpp +13 -14
  11. data/ext/datasketches/kll_wrapper.cpp +28 -76
  12. data/ext/datasketches/theta_wrapper.cpp +27 -41
  13. data/ext/datasketches/vo_wrapper.cpp +4 -6
  14. data/lib/datasketches/version.rb +1 -1
  15. data/vendor/datasketches-cpp/CMakeLists.txt +10 -0
  16. data/vendor/datasketches-cpp/LICENSE +40 -3
  17. data/vendor/datasketches-cpp/NOTICE +1 -1
  18. data/vendor/datasketches-cpp/README.md +4 -4
  19. data/vendor/datasketches-cpp/common/include/MurmurHash3.h +18 -7
  20. data/vendor/datasketches-cpp/common/include/binomial_bounds.hpp +8 -8
  21. data/vendor/datasketches-cpp/common/include/bounds_binomial_proportions.hpp +12 -15
  22. data/vendor/datasketches-cpp/common/include/common_defs.hpp +26 -0
  23. data/vendor/datasketches-cpp/common/include/conditional_forward.hpp +20 -8
  24. data/vendor/datasketches-cpp/common/include/count_zeros.hpp +2 -2
  25. data/vendor/datasketches-cpp/common/include/memory_operations.hpp +12 -0
  26. data/vendor/datasketches-cpp/common/include/serde.hpp +7 -7
  27. data/vendor/datasketches-cpp/common/test/CMakeLists.txt +24 -0
  28. data/vendor/datasketches-cpp/common/test/integration_test.cpp +77 -0
  29. data/vendor/datasketches-cpp/common/test/test_allocator.hpp +9 -1
  30. data/vendor/datasketches-cpp/cpc/include/cpc_common.hpp +13 -3
  31. data/vendor/datasketches-cpp/cpc/include/cpc_compressor.hpp +20 -20
  32. data/vendor/datasketches-cpp/cpc/include/cpc_compressor_impl.hpp +116 -105
  33. data/vendor/datasketches-cpp/cpc/include/cpc_sketch.hpp +22 -6
  34. data/vendor/datasketches-cpp/cpc/include/cpc_sketch_impl.hpp +140 -101
  35. data/vendor/datasketches-cpp/cpc/include/cpc_union.hpp +2 -2
  36. data/vendor/datasketches-cpp/cpc/include/cpc_union_impl.hpp +20 -20
  37. data/vendor/datasketches-cpp/cpc/include/cpc_util.hpp +10 -16
  38. data/vendor/datasketches-cpp/cpc/include/icon_estimator.hpp +6 -6
  39. data/vendor/datasketches-cpp/cpc/include/u32_table.hpp +10 -10
  40. data/vendor/datasketches-cpp/cpc/include/u32_table_impl.hpp +21 -21
  41. data/vendor/datasketches-cpp/cpc/test/CMakeLists.txt +1 -0
  42. data/vendor/datasketches-cpp/cpc/test/compression_test.cpp +10 -10
  43. data/vendor/datasketches-cpp/cpc/test/cpc_sketch_allocation_test.cpp +237 -0
  44. data/vendor/datasketches-cpp/cpc/test/cpc_sketch_test.cpp +25 -0
  45. data/vendor/datasketches-cpp/cpc/test/cpc_union_test.cpp +1 -1
  46. data/vendor/datasketches-cpp/fi/include/frequent_items_sketch.hpp +15 -10
  47. data/vendor/datasketches-cpp/fi/include/frequent_items_sketch_impl.hpp +102 -105
  48. data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map.hpp +19 -13
  49. data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map_impl.hpp +141 -125
  50. data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_custom_type_test.cpp +15 -12
  51. data/vendor/datasketches-cpp/fi/test/reverse_purge_hash_map_test.cpp +5 -5
  52. data/vendor/datasketches-cpp/hll/CMakeLists.txt +3 -0
  53. data/vendor/datasketches-cpp/hll/include/AuxHashMap-internal.hpp +81 -109
  54. data/vendor/datasketches-cpp/hll/include/AuxHashMap.hpp +25 -24
  55. data/vendor/datasketches-cpp/hll/include/CompositeInterpolationXTable-internal.hpp +15 -15
  56. data/vendor/datasketches-cpp/hll/include/CompositeInterpolationXTable.hpp +5 -5
  57. data/vendor/datasketches-cpp/hll/include/CouponHashSet-internal.hpp +89 -105
  58. data/vendor/datasketches-cpp/hll/include/CouponHashSet.hpp +13 -13
  59. data/vendor/datasketches-cpp/hll/include/CouponList-internal.hpp +130 -165
  60. data/vendor/datasketches-cpp/hll/include/CouponList.hpp +21 -22
  61. data/vendor/datasketches-cpp/hll/include/CubicInterpolation-internal.hpp +2 -4
  62. data/vendor/datasketches-cpp/hll/include/CubicInterpolation.hpp +2 -2
  63. data/vendor/datasketches-cpp/hll/include/HarmonicNumbers-internal.hpp +1 -1
  64. data/vendor/datasketches-cpp/hll/include/HarmonicNumbers.hpp +2 -2
  65. data/vendor/datasketches-cpp/hll/include/Hll4Array-internal.hpp +88 -83
  66. data/vendor/datasketches-cpp/hll/include/Hll4Array.hpp +9 -9
  67. data/vendor/datasketches-cpp/hll/include/Hll6Array-internal.hpp +34 -45
  68. data/vendor/datasketches-cpp/hll/include/Hll6Array.hpp +7 -8
  69. data/vendor/datasketches-cpp/hll/include/Hll8Array-internal.hpp +41 -52
  70. data/vendor/datasketches-cpp/hll/include/Hll8Array.hpp +7 -8
  71. data/vendor/datasketches-cpp/hll/include/HllArray-internal.hpp +220 -251
  72. data/vendor/datasketches-cpp/hll/include/HllArray.hpp +42 -42
  73. data/vendor/datasketches-cpp/hll/include/HllSketch-internal.hpp +36 -38
  74. data/vendor/datasketches-cpp/hll/include/HllSketchImpl-internal.hpp +22 -22
  75. data/vendor/datasketches-cpp/hll/include/HllSketchImpl.hpp +15 -14
  76. data/vendor/datasketches-cpp/hll/include/HllSketchImplFactory.hpp +47 -44
  77. data/vendor/datasketches-cpp/hll/include/HllUnion-internal.hpp +62 -87
  78. data/vendor/datasketches-cpp/hll/include/HllUtil.hpp +121 -128
  79. data/vendor/datasketches-cpp/hll/include/RelativeErrorTables.hpp +1 -1
  80. data/vendor/datasketches-cpp/hll/include/coupon_iterator-internal.hpp +9 -9
  81. data/vendor/datasketches-cpp/hll/include/coupon_iterator.hpp +5 -5
  82. data/vendor/datasketches-cpp/hll/include/hll.hpp +25 -53
  83. data/vendor/datasketches-cpp/hll/test/AuxHashMapTest.cpp +8 -8
  84. data/vendor/datasketches-cpp/hll/test/CouponHashSetTest.cpp +36 -36
  85. data/vendor/datasketches-cpp/hll/test/CouponListTest.cpp +28 -28
  86. data/vendor/datasketches-cpp/hll/test/CrossCountingTest.cpp +2 -2
  87. data/vendor/datasketches-cpp/hll/test/HllArrayTest.cpp +37 -37
  88. data/vendor/datasketches-cpp/hll/test/HllSketchTest.cpp +57 -61
  89. data/vendor/datasketches-cpp/hll/test/HllUnionTest.cpp +10 -14
  90. data/vendor/datasketches-cpp/hll/test/IsomorphicTest.cpp +3 -3
  91. data/vendor/datasketches-cpp/hll/test/ToFromByteArrayTest.cpp +4 -4
  92. data/vendor/datasketches-cpp/kll/include/kll_helper.hpp +5 -4
  93. data/vendor/datasketches-cpp/kll/include/kll_helper_impl.hpp +6 -6
  94. data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator.hpp +14 -6
  95. data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator_impl.hpp +40 -25
  96. data/vendor/datasketches-cpp/kll/include/kll_sketch.hpp +50 -6
  97. data/vendor/datasketches-cpp/kll/include/kll_sketch_impl.hpp +164 -136
  98. data/vendor/datasketches-cpp/kll/include/kolmogorov_smirnov.hpp +67 -0
  99. data/vendor/datasketches-cpp/kll/include/kolmogorov_smirnov_impl.hpp +78 -0
  100. data/vendor/datasketches-cpp/kll/test/CMakeLists.txt +1 -0
  101. data/vendor/datasketches-cpp/kll/test/kll_sketch_custom_type_test.cpp +11 -10
  102. data/vendor/datasketches-cpp/kll/test/kll_sketch_test.cpp +178 -88
  103. data/vendor/datasketches-cpp/kll/test/kolmogorov_smirnov_test.cpp +111 -0
  104. data/vendor/datasketches-cpp/pyproject.toml +4 -2
  105. data/vendor/datasketches-cpp/python/CMakeLists.txt +12 -6
  106. data/vendor/datasketches-cpp/python/README.md +52 -49
  107. data/vendor/datasketches-cpp/python/pybind11Path.cmd +3 -0
  108. data/vendor/datasketches-cpp/python/src/cpc_wrapper.cpp +1 -1
  109. data/vendor/datasketches-cpp/python/src/datasketches.cpp +2 -0
  110. data/vendor/datasketches-cpp/python/src/hll_wrapper.cpp +4 -6
  111. data/vendor/datasketches-cpp/python/src/kll_wrapper.cpp +4 -2
  112. data/vendor/datasketches-cpp/python/src/req_wrapper.cpp +246 -0
  113. data/vendor/datasketches-cpp/python/src/theta_wrapper.cpp +38 -28
  114. data/vendor/datasketches-cpp/python/src/vector_of_kll.cpp +11 -5
  115. data/vendor/datasketches-cpp/python/src/vo_wrapper.cpp +2 -2
  116. data/vendor/datasketches-cpp/python/tests/hll_test.py +1 -2
  117. data/vendor/datasketches-cpp/python/tests/kll_test.py +5 -5
  118. data/vendor/datasketches-cpp/python/tests/req_test.py +126 -0
  119. data/vendor/datasketches-cpp/python/tests/theta_test.py +28 -3
  120. data/vendor/datasketches-cpp/python/tests/vector_of_kll_test.py +4 -4
  121. data/vendor/datasketches-cpp/python/tests/vo_test.py +3 -3
  122. data/vendor/datasketches-cpp/req/CMakeLists.txt +60 -0
  123. data/vendor/datasketches-cpp/{tuple/include/theta_a_not_b_experimental_impl.hpp → req/include/req_common.hpp} +18 -8
  124. data/vendor/datasketches-cpp/req/include/req_compactor.hpp +137 -0
  125. data/vendor/datasketches-cpp/req/include/req_compactor_impl.hpp +488 -0
  126. data/vendor/datasketches-cpp/req/include/req_quantile_calculator.hpp +69 -0
  127. data/vendor/datasketches-cpp/req/include/req_quantile_calculator_impl.hpp +60 -0
  128. data/vendor/datasketches-cpp/req/include/req_sketch.hpp +395 -0
  129. data/vendor/datasketches-cpp/req/include/req_sketch_impl.hpp +810 -0
  130. data/vendor/datasketches-cpp/req/test/CMakeLists.txt +43 -0
  131. data/vendor/datasketches-cpp/req/test/req_float_empty_from_java.sk +0 -0
  132. data/vendor/datasketches-cpp/req/test/req_float_estimation_from_java.sk +0 -0
  133. data/vendor/datasketches-cpp/req/test/req_float_exact_from_java.sk +0 -0
  134. data/vendor/datasketches-cpp/req/test/req_float_raw_items_from_java.sk +0 -0
  135. data/vendor/datasketches-cpp/req/test/req_float_single_item_from_java.sk +0 -0
  136. data/vendor/datasketches-cpp/req/test/req_sketch_custom_type_test.cpp +128 -0
  137. data/vendor/datasketches-cpp/req/test/req_sketch_test.cpp +494 -0
  138. data/vendor/datasketches-cpp/sampling/include/var_opt_sketch.hpp +19 -13
  139. data/vendor/datasketches-cpp/sampling/include/var_opt_sketch_impl.hpp +130 -127
  140. data/vendor/datasketches-cpp/sampling/include/var_opt_union.hpp +5 -5
  141. data/vendor/datasketches-cpp/sampling/include/var_opt_union_impl.hpp +41 -49
  142. data/vendor/datasketches-cpp/sampling/test/CMakeLists.txt +1 -0
  143. data/vendor/datasketches-cpp/sampling/test/var_opt_allocation_test.cpp +96 -0
  144. data/vendor/datasketches-cpp/sampling/test/var_opt_sketch_test.cpp +6 -6
  145. data/vendor/datasketches-cpp/sampling/test/var_opt_union_test.cpp +13 -44
  146. data/vendor/datasketches-cpp/setup.py +11 -6
  147. data/vendor/datasketches-cpp/theta/CMakeLists.txt +30 -3
  148. data/vendor/datasketches-cpp/{tuple → theta}/include/bounds_on_ratios_in_sampled_sets.hpp +3 -2
  149. data/vendor/datasketches-cpp/{tuple → theta}/include/bounds_on_ratios_in_theta_sketched_sets.hpp +1 -1
  150. data/vendor/datasketches-cpp/theta/include/compact_theta_sketch_parser.hpp +67 -0
  151. data/vendor/datasketches-cpp/theta/include/compact_theta_sketch_parser_impl.hpp +70 -0
  152. data/vendor/datasketches-cpp/theta/include/theta_a_not_b.hpp +12 -29
  153. data/vendor/datasketches-cpp/theta/include/theta_a_not_b_impl.hpp +5 -46
  154. data/vendor/datasketches-cpp/{tuple → theta}/include/theta_comparators.hpp +0 -0
  155. data/vendor/datasketches-cpp/{tuple → theta}/include/theta_constants.hpp +11 -4
  156. data/vendor/datasketches-cpp/{tuple → theta}/include/theta_helpers.hpp +0 -0
  157. data/vendor/datasketches-cpp/theta/include/theta_intersection.hpp +26 -28
  158. data/vendor/datasketches-cpp/{tuple → theta}/include/theta_intersection_base.hpp +0 -0
  159. data/vendor/datasketches-cpp/{tuple → theta}/include/theta_intersection_base_impl.hpp +0 -0
  160. data/vendor/datasketches-cpp/theta/include/theta_intersection_impl.hpp +8 -90
  161. data/vendor/datasketches-cpp/{tuple/test/theta_union_experimental_test.cpp → theta/include/theta_jaccard_similarity.hpp} +11 -18
  162. data/vendor/datasketches-cpp/{tuple/include/jaccard_similarity.hpp → theta/include/theta_jaccard_similarity_base.hpp} +24 -36
  163. data/vendor/datasketches-cpp/{tuple → theta}/include/theta_set_difference_base.hpp +0 -0
  164. data/vendor/datasketches-cpp/{tuple → theta}/include/theta_set_difference_base_impl.hpp +5 -0
  165. data/vendor/datasketches-cpp/theta/include/theta_sketch.hpp +163 -256
  166. data/vendor/datasketches-cpp/theta/include/theta_sketch_impl.hpp +250 -651
  167. data/vendor/datasketches-cpp/theta/include/theta_union.hpp +27 -60
  168. data/vendor/datasketches-cpp/{tuple → theta}/include/theta_union_base.hpp +1 -1
  169. data/vendor/datasketches-cpp/{tuple → theta}/include/theta_union_base_impl.hpp +6 -1
  170. data/vendor/datasketches-cpp/theta/include/theta_union_impl.hpp +13 -69
  171. data/vendor/datasketches-cpp/{tuple → theta}/include/theta_update_sketch_base.hpp +10 -21
  172. data/vendor/datasketches-cpp/{tuple → theta}/include/theta_update_sketch_base_impl.hpp +44 -30
  173. data/vendor/datasketches-cpp/theta/test/CMakeLists.txt +1 -0
  174. data/vendor/datasketches-cpp/theta/test/theta_a_not_b_test.cpp +23 -1
  175. data/vendor/datasketches-cpp/theta/test/theta_intersection_test.cpp +21 -1
  176. data/vendor/datasketches-cpp/{tuple → theta}/test/theta_jaccard_similarity_test.cpp +60 -5
  177. data/vendor/datasketches-cpp/theta/test/theta_sketch_test.cpp +74 -235
  178. data/vendor/datasketches-cpp/theta/test/theta_union_test.cpp +22 -2
  179. data/vendor/datasketches-cpp/tuple/CMakeLists.txt +3 -35
  180. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_sketch_impl.hpp +47 -60
  181. data/vendor/datasketches-cpp/tuple/include/tuple_jaccard_similarity.hpp +38 -0
  182. data/vendor/datasketches-cpp/tuple/include/tuple_sketch.hpp +28 -13
  183. data/vendor/datasketches-cpp/tuple/include/tuple_sketch_impl.hpp +57 -70
  184. data/vendor/datasketches-cpp/tuple/test/CMakeLists.txt +1 -6
  185. data/vendor/datasketches-cpp/tuple/test/array_of_doubles_sketch_test.cpp +1 -1
  186. data/vendor/datasketches-cpp/tuple/test/tuple_a_not_b_test.cpp +18 -21
  187. data/vendor/datasketches-cpp/tuple/test/tuple_intersection_test.cpp +13 -16
  188. data/vendor/datasketches-cpp/tuple/test/tuple_jaccard_similarity_test.cpp +7 -6
  189. data/vendor/datasketches-cpp/tuple/test/tuple_sketch_allocation_test.cpp +3 -3
  190. data/vendor/datasketches-cpp/tuple/test/tuple_sketch_test.cpp +20 -20
  191. data/vendor/datasketches-cpp/tuple/test/tuple_union_test.cpp +13 -16
  192. metadata +51 -36
  193. data/vendor/datasketches-cpp/tuple/include/theta_a_not_b_experimental.hpp +0 -53
  194. data/vendor/datasketches-cpp/tuple/include/theta_intersection_experimental.hpp +0 -78
  195. data/vendor/datasketches-cpp/tuple/include/theta_intersection_experimental_impl.hpp +0 -43
  196. data/vendor/datasketches-cpp/tuple/include/theta_sketch_experimental.hpp +0 -393
  197. data/vendor/datasketches-cpp/tuple/include/theta_sketch_experimental_impl.hpp +0 -481
  198. data/vendor/datasketches-cpp/tuple/include/theta_union_experimental.hpp +0 -88
  199. data/vendor/datasketches-cpp/tuple/include/theta_union_experimental_impl.hpp +0 -47
  200. data/vendor/datasketches-cpp/tuple/test/theta_a_not_b_experimental_test.cpp +0 -250
  201. data/vendor/datasketches-cpp/tuple/test/theta_compact_empty_from_java.sk +0 -0
  202. data/vendor/datasketches-cpp/tuple/test/theta_compact_estimation_from_java.sk +0 -0
  203. data/vendor/datasketches-cpp/tuple/test/theta_compact_single_item_from_java.sk +0 -0
  204. data/vendor/datasketches-cpp/tuple/test/theta_intersection_experimental_test.cpp +0 -224
  205. data/vendor/datasketches-cpp/tuple/test/theta_sketch_experimental_test.cpp +0 -247
@@ -24,10 +24,11 @@
24
24
 
25
25
  namespace datasketches {
26
26
 
27
- typedef hll_sketch_alloc<test_allocator<void>> hll_sketch_test_alloc;
27
+ using hll_sketch_test_alloc = hll_sketch_alloc<test_allocator<uint8_t>>;
28
+ using alloc = test_allocator<uint8_t>;
28
29
 
29
- static void runCheckCopy(int lgConfigK, target_hll_type tgtHllType) {
30
- hll_sketch_test_alloc sk(lgConfigK, tgtHllType);
30
+ static void runCheckCopy(uint8_t lgConfigK, target_hll_type tgtHllType) {
31
+ hll_sketch_test_alloc sk(lgConfigK, tgtHllType, false, 0);
31
32
 
32
33
  for (int i = 0; i < 7; ++i) {
33
34
  sk.update(i);
@@ -65,13 +66,13 @@ TEST_CASE("hll sketch: check copies", "[hll_sketch]") {
65
66
  }
66
67
 
67
68
  static void copyAs(target_hll_type srcType, target_hll_type dstType) {
68
- int lgK = 8;
69
+ uint8_t lgK = 8;
69
70
  int n1 = 7;
70
71
  int n2 = 24;
71
72
  int n3 = 1000;
72
73
  int base = 0;
73
74
 
74
- hll_sketch_test_alloc src(lgK, srcType);
75
+ hll_sketch_test_alloc src(lgK, srcType, false, 0);
75
76
  for (int i = 0; i < n1; ++i) {
76
77
  src.update(i + base);
77
78
  }
@@ -108,9 +109,9 @@ TEST_CASE("hll sketch: check copy as", "[hll_sketch]") {
108
109
  TEST_CASE("hll sketch: check misc1", "[hll_sketch]") {
109
110
  test_allocator_total_bytes = 0;
110
111
  {
111
- int lgConfigK = 8;
112
+ uint8_t lgConfigK = 8;
112
113
  target_hll_type srcType = target_hll_type::HLL_8;
113
- hll_sketch_test_alloc sk(lgConfigK, srcType);
114
+ hll_sketch_test_alloc sk(lgConfigK, srcType, false, 0);
114
115
 
115
116
  for (int i = 0; i < 7; ++i) { sk.update(i); } // LIST
116
117
  REQUIRE(sk.get_compact_serialization_bytes() == 36);
@@ -123,7 +124,7 @@ TEST_CASE("hll sketch: check misc1", "[hll_sketch]") {
123
124
  sk.update(24); // HLL
124
125
  REQUIRE(sk.get_updatable_serialization_bytes() == 40 + 256);
125
126
 
126
- const int hllBytes = HllUtil<>::HLL_BYTE_ARR_START + (1 << lgConfigK);
127
+ const auto hllBytes = hll_constants::HLL_BYTE_ARR_START + (1 << lgConfigK);
127
128
  REQUIRE(sk.get_compact_serialization_bytes() == hllBytes);
128
129
  REQUIRE(hll_sketch::get_max_updatable_serialization_bytes(lgConfigK, HLL_8) == hllBytes);
129
130
  }
@@ -134,22 +135,22 @@ TEST_CASE("hll sketch: check num std dev", "[hll_sketch]") {
134
135
  REQUIRE_THROWS_AS(HllUtil<>::checkNumStdDev(0), std::invalid_argument);
135
136
  }
136
137
 
137
- void checkSerializationSizes(const int lgConfigK, target_hll_type tgtHllType) {
138
- hll_sketch_test_alloc sk(lgConfigK, tgtHllType);
138
+ void checkSerializationSizes(uint8_t lgConfigK, target_hll_type tgtHllType) {
139
+ hll_sketch_test_alloc sk(lgConfigK, tgtHllType, false, 0);
139
140
  int i;
140
141
 
141
142
  // LIST
142
143
  for (i = 0; i < 7; ++i) { sk.update(i); }
143
- int expected = HllUtil<>::LIST_INT_ARR_START + (i << 2);
144
+ auto expected = hll_constants::LIST_INT_ARR_START + (i << 2);
144
145
  REQUIRE(sk.get_compact_serialization_bytes() == expected);
145
- expected = HllUtil<>::LIST_INT_ARR_START + (4 << HllUtil<>::LG_INIT_LIST_SIZE);
146
+ expected = hll_constants::LIST_INT_ARR_START + (4 << hll_constants::LG_INIT_LIST_SIZE);
146
147
  REQUIRE(sk.get_updatable_serialization_bytes() == expected);
147
148
 
148
149
  // SET
149
150
  for (i = 7; i < 24; ++i) { sk.update(i); }
150
- expected = HllUtil<>::HASH_SET_INT_ARR_START + (i << 2);
151
+ expected = hll_constants::HASH_SET_INT_ARR_START + (i << 2);
151
152
  REQUIRE(sk.get_compact_serialization_bytes() == expected);
152
- expected = HllUtil<>::HASH_SET_INT_ARR_START + (4 << HllUtil<>::LG_INIT_SET_SIZE);
153
+ expected = hll_constants::HASH_SET_INT_ARR_START + (4 << hll_constants::LG_INIT_SET_SIZE);
153
154
  REQUIRE(sk.get_updatable_serialization_bytes() == expected);
154
155
  }
155
156
 
@@ -162,27 +163,23 @@ TEST_CASE("hll sketch: check ser sizes", "[hll_sketch]") {
162
163
  }
163
164
 
164
165
  TEST_CASE("hll sketch: exercise to string", "[hll_sketch]") {
165
- test_allocator_total_bytes = 0;
166
- {
167
- hll_sketch_test_alloc sk(15, HLL_4);
168
- for (int i = 0; i < 25; ++i) { sk.update(i); }
169
- std::ostringstream oss(std::ios::binary);
170
- oss << sk.to_string(false, true, true, true);
171
- for (int i = 25; i < (1 << 20); ++i) { sk.update(i); }
172
- oss << sk.to_string(false, true, true, true);
173
- oss << sk.to_string(false, true, true, false);
174
-
175
- sk = hll_sketch_test_alloc(8, HLL_8);
176
- for (int i = 0; i < 25; ++i) { sk.update(i); }
177
- oss << sk.to_string(false, true, true, true);
178
- }
179
- REQUIRE(test_allocator_total_bytes == 0);
166
+ hll_sketch sk(15, HLL_4);
167
+ for (int i = 0; i < 25; ++i) { sk.update(i); }
168
+ std::ostringstream oss(std::ios::binary);
169
+ oss << sk.to_string(false, true, true, true);
170
+ for (int i = 25; i < (1 << 20); ++i) { sk.update(i); }
171
+ oss << sk.to_string(false, true, true, true);
172
+ oss << sk.to_string(false, true, true, false);
173
+
174
+ sk = hll_sketch(8, HLL_8);
175
+ for (int i = 0; i < 25; ++i) { sk.update(i); }
176
+ oss << sk.to_string(false, true, true, true);
180
177
  }
181
178
 
182
179
  // Creates and serializes then deserializes sketch.
183
180
  // Returns true if deserialized sketch is compact.
184
- static bool checkCompact(const int lgK, const int n, const target_hll_type type, bool compact) {
185
- hll_sketch_test_alloc sk(lgK, type);
181
+ static bool checkCompact(uint8_t lgK, const int n, const target_hll_type type, bool compact) {
182
+ hll_sketch_test_alloc sk(lgK, type, false, 0);
186
183
  for (int i = 0; i < n; ++i) { sk.update(i); }
187
184
 
188
185
  std::stringstream ss(std::ios::in | std::ios::out | std::ios::binary);
@@ -194,7 +191,7 @@ static bool checkCompact(const int lgK, const int n, const target_hll_type type,
194
191
  REQUIRE(ss.tellp() == sk.get_updatable_serialization_bytes());
195
192
  }
196
193
 
197
- hll_sketch_test_alloc sk2 = hll_sketch_test_alloc::deserialize(ss);
194
+ hll_sketch_test_alloc sk2 = hll_sketch_test_alloc::deserialize(ss, alloc(0));
198
195
  REQUIRE(sk2.get_estimate() == Approx(n).margin(0.01));
199
196
  bool isCompact = sk2.is_compact();
200
197
 
@@ -204,7 +201,7 @@ static bool checkCompact(const int lgK, const int n, const target_hll_type type,
204
201
  TEST_CASE("hll sketch: check compact flag", "[hll_sketch]") {
205
202
  test_allocator_total_bytes = 0;
206
203
  {
207
- int lgK = 8;
204
+ uint8_t lgK = 8;
208
205
  // unless/until we create non-updatable "direct" versions,
209
206
  // deserialized image should never be compact
210
207
  // LIST: follows serialization request
@@ -233,11 +230,10 @@ TEST_CASE("hll sketch: check compact flag", "[hll_sketch]") {
233
230
  TEST_CASE("hll sketch: check k limits", "[hll_sketch]") {
234
231
  test_allocator_total_bytes = 0;
235
232
  {
236
- hll_sketch_test_alloc sketch1(HllUtil<>::MIN_LOG_K, target_hll_type::HLL_8);
237
- hll_sketch_test_alloc sketch2(HllUtil<>::MAX_LOG_K, target_hll_type::HLL_4);
238
- REQUIRE_THROWS_AS(hll_sketch_test_alloc(HllUtil<>::MIN_LOG_K - 1), std::invalid_argument);
239
-
240
- REQUIRE_THROWS_AS(hll_sketch_test_alloc(HllUtil<>::MAX_LOG_K + 1), std::invalid_argument);
233
+ hll_sketch_test_alloc sketch1(hll_constants::MIN_LOG_K, target_hll_type::HLL_8, false, 0);
234
+ hll_sketch_test_alloc sketch2(hll_constants::MAX_LOG_K, target_hll_type::HLL_4, false, 0);
235
+ REQUIRE_THROWS_AS(hll_sketch_test_alloc(hll_constants::MIN_LOG_K - 1, target_hll_type::HLL_4, false, 0), std::invalid_argument);
236
+ REQUIRE_THROWS_AS(hll_sketch_test_alloc(hll_constants::MAX_LOG_K + 1, target_hll_type::HLL_4, false, 0), std::invalid_argument);
241
237
  }
242
238
  REQUIRE(test_allocator_total_bytes == 0);
243
239
  }
@@ -245,7 +241,7 @@ TEST_CASE("hll sketch: check k limits", "[hll_sketch]") {
245
241
  TEST_CASE("hll sketch: check input types", "[hll_sketch]") {
246
242
  test_allocator_total_bytes = 0;
247
243
  {
248
- hll_sketch_test_alloc sk(8, target_hll_type::HLL_8);
244
+ hll_sketch_test_alloc sk(8, target_hll_type::HLL_8, false, 0);
249
245
 
250
246
  // inserting the same value as a variety of input types
251
247
  sk.update((uint8_t) 102);
@@ -271,19 +267,19 @@ TEST_CASE("hll sketch: check input types", "[hll_sketch]") {
271
267
  sk.update(str.c_str(), str.length());
272
268
  REQUIRE(sk.get_estimate() == Approx(4.0).margin(0.01));
273
269
 
274
- sk = hll_sketch_test_alloc(8, target_hll_type::HLL_6);
270
+ sk = hll_sketch_test_alloc(8, target_hll_type::HLL_6, false, 0);
275
271
  sk.update((float) 0.0);
276
272
  sk.update((float) -0.0);
277
273
  sk.update((double) 0.0);
278
274
  sk.update((double) -0.0);
279
275
  REQUIRE(sk.get_estimate() == Approx(1.0).margin(0.01));
280
276
 
281
- sk = hll_sketch_test_alloc(8, target_hll_type::HLL_4);
277
+ sk = hll_sketch_test_alloc(8, target_hll_type::HLL_4, false, 0);
282
278
  sk.update(std::nanf("3"));
283
279
  sk.update(std::nan("9"));
284
280
  REQUIRE(sk.get_estimate() == Approx(1.0).margin(0.01));
285
281
 
286
- sk = hll_sketch_test_alloc(8, target_hll_type::HLL_4);
282
+ sk = hll_sketch_test_alloc(8, target_hll_type::HLL_4, false, 0);
287
283
  sk.update(nullptr, 0);
288
284
  sk.update("");
289
285
  REQUIRE(sk.is_empty());
@@ -294,24 +290,24 @@ TEST_CASE("hll sketch: check input types", "[hll_sketch]") {
294
290
  TEST_CASE("hll sketch: deserialize list mode buffer overrun", "[hll_sketch]") {
295
291
  test_allocator_total_bytes = 0;
296
292
  {
297
- hll_sketch_test_alloc sketch(10);
293
+ hll_sketch_test_alloc sketch(10, target_hll_type::HLL_4, false, 0);
298
294
  sketch.update(1);
299
295
  auto bytes = sketch.serialize_compact();
300
- REQUIRE_THROWS_AS(hll_sketch_test_alloc::deserialize(bytes.data(), 7), std::out_of_range);
301
- REQUIRE_THROWS_AS(hll_sketch_test_alloc::deserialize(bytes.data(), bytes.size() - 1), std::out_of_range);
296
+ REQUIRE_THROWS_AS(hll_sketch_test_alloc::deserialize(bytes.data(), 7, 0), std::out_of_range);
297
+ REQUIRE_THROWS_AS(hll_sketch_test_alloc::deserialize(bytes.data(), bytes.size() - 1, 0), std::out_of_range);
302
298
 
303
299
  // ckeck for leaks on stream exceptions
304
300
  {
305
301
  std::stringstream ss;
306
302
  ss.exceptions(std::ios::failbit | std::ios::badbit);
307
303
  ss.str(std::string((char*)bytes.data(), 7));
308
- REQUIRE_THROWS_AS(hll_sketch_test_alloc::deserialize(ss), std::ios_base::failure);
304
+ REQUIRE_THROWS_AS(hll_sketch_test_alloc::deserialize(ss, alloc(0)), std::ios_base::failure);
309
305
  }
310
306
  {
311
307
  std::stringstream ss;
312
308
  ss.exceptions(std::ios::failbit | std::ios::badbit);
313
309
  ss.str(std::string((char*)bytes.data(), bytes.size() - 1));
314
- REQUIRE_THROWS_AS(hll_sketch_test_alloc::deserialize(ss), std::ios_base::failure);
310
+ REQUIRE_THROWS_AS(hll_sketch_test_alloc::deserialize(ss, alloc(0)), std::ios_base::failure);
315
311
  }
316
312
  }
317
313
  REQUIRE(test_allocator_total_bytes == 0);
@@ -320,25 +316,25 @@ TEST_CASE("hll sketch: deserialize list mode buffer overrun", "[hll_sketch]") {
320
316
  TEST_CASE("hll sketch: deserialize set mode buffer overrun", "[hll_sketch]") {
321
317
  test_allocator_total_bytes = 0;
322
318
  {
323
- hll_sketch_test_alloc sketch(10);
319
+ hll_sketch_test_alloc sketch(10, target_hll_type::HLL_4, false, 0);
324
320
  for (int i = 0; i < 10; ++i) sketch.update(i);
325
321
  //std::cout << sketch.to_string();
326
322
  auto bytes = sketch.serialize_updatable();
327
- REQUIRE_THROWS_AS(hll_sketch_test_alloc::deserialize(bytes.data(), 7), std::out_of_range);
328
- REQUIRE_THROWS_AS(hll_sketch_test_alloc::deserialize(bytes.data(), bytes.size() - 1), std::out_of_range);
323
+ REQUIRE_THROWS_AS(hll_sketch_test_alloc::deserialize(bytes.data(), 7, 0), std::out_of_range);
324
+ REQUIRE_THROWS_AS(hll_sketch_test_alloc::deserialize(bytes.data(), bytes.size() - 1, 0), std::out_of_range);
329
325
 
330
326
  // ckeck for leaks on stream exceptions
331
327
  {
332
328
  std::stringstream ss;
333
329
  ss.exceptions(std::ios::failbit | std::ios::badbit);
334
330
  ss.str(std::string((char*)bytes.data(), 7));
335
- REQUIRE_THROWS_AS(hll_sketch_test_alloc::deserialize(ss), std::ios_base::failure);
331
+ REQUIRE_THROWS_AS(hll_sketch_test_alloc::deserialize(ss, alloc(0)), std::ios_base::failure);
336
332
  }
337
333
  {
338
334
  std::stringstream ss;
339
335
  ss.exceptions(std::ios::failbit | std::ios::badbit);
340
336
  ss.str(std::string((char*)bytes.data(), bytes.size() - 1));
341
- REQUIRE_THROWS_AS(hll_sketch_test_alloc::deserialize(ss), std::ios_base::failure);
337
+ REQUIRE_THROWS_AS(hll_sketch_test_alloc::deserialize(ss, alloc(0)), std::ios_base::failure);
342
338
  }
343
339
  }
344
340
  REQUIRE(test_allocator_total_bytes == 0);
@@ -348,39 +344,39 @@ TEST_CASE("hll sketch: deserialize HLL mode buffer overrun", "[hll_sketch]") {
348
344
  test_allocator_total_bytes = 0;
349
345
  {
350
346
  // this sketch should have aux table
351
- hll_sketch_test_alloc sketch(15);
347
+ hll_sketch_test_alloc sketch(15, target_hll_type::HLL_4, false, 0);
352
348
  for (int i = 0; i < 14444; ++i) sketch.update(i);
353
349
  //std::cout << sketch.to_string();
354
350
  auto bytes = sketch.serialize_compact();
355
- REQUIRE_THROWS_AS(hll_sketch_test_alloc::deserialize(bytes.data(), 7), std::out_of_range);
356
- REQUIRE_THROWS_AS(hll_sketch_test_alloc::deserialize(bytes.data(), 15), std::out_of_range);
357
- REQUIRE_THROWS_AS(hll_sketch_test_alloc::deserialize(bytes.data(), 16420), std::out_of_range); // before aux table
358
- REQUIRE_THROWS_AS(hll_sketch_test_alloc::deserialize(bytes.data(), bytes.size() - 1), std::out_of_range);
351
+ REQUIRE_THROWS_AS(hll_sketch_test_alloc::deserialize(bytes.data(), 7, 0), std::out_of_range);
352
+ REQUIRE_THROWS_AS(hll_sketch_test_alloc::deserialize(bytes.data(), 15, 0), std::out_of_range);
353
+ REQUIRE_THROWS_AS(hll_sketch_test_alloc::deserialize(bytes.data(), 16420, 0), std::out_of_range); // before aux table
354
+ REQUIRE_THROWS_AS(hll_sketch_test_alloc::deserialize(bytes.data(), bytes.size() - 1, 0), std::out_of_range);
359
355
 
360
356
  // ckeck for leaks on stream exceptions
361
357
  {
362
358
  std::stringstream ss;
363
359
  ss.exceptions(std::ios::failbit | std::ios::badbit);
364
360
  ss.str(std::string((char*)bytes.data(), 7));
365
- REQUIRE_THROWS_AS(hll_sketch_test_alloc::deserialize(ss), std::ios_base::failure);
361
+ REQUIRE_THROWS_AS(hll_sketch_test_alloc::deserialize(ss, alloc(0)), std::ios_base::failure);
366
362
  }
367
363
  {
368
364
  std::stringstream ss;
369
365
  ss.exceptions(std::ios::failbit | std::ios::badbit);
370
366
  ss.str(std::string((char*)bytes.data(), 15));
371
- REQUIRE_THROWS_AS(hll_sketch_test_alloc::deserialize(ss), std::ios_base::failure);
367
+ REQUIRE_THROWS_AS(hll_sketch_test_alloc::deserialize(ss, alloc(0)), std::ios_base::failure);
372
368
  }
373
369
  {
374
370
  std::stringstream ss;
375
371
  ss.exceptions(std::ios::failbit | std::ios::badbit);
376
372
  ss.str(std::string((char*)bytes.data(), 16420)); // before aux table
377
- REQUIRE_THROWS_AS(hll_sketch_test_alloc::deserialize(ss), std::ios_base::failure);
373
+ REQUIRE_THROWS_AS(hll_sketch_test_alloc::deserialize(ss, alloc(0)), std::ios_base::failure);
378
374
  }
379
375
  {
380
376
  std::stringstream ss;
381
377
  ss.exceptions(std::ios::failbit | std::ios::badbit);
382
378
  ss.str(std::string((char*)bytes.data(), bytes.size() - 1));
383
- REQUIRE_THROWS_AS(hll_sketch_test_alloc::deserialize(ss), std::ios_base::failure);
379
+ REQUIRE_THROWS_AS(hll_sketch_test_alloc::deserialize(ss, alloc(0)), std::ios_base::failure);
384
380
  }
385
381
  }
386
382
  REQUIRE(test_allocator_total_bytes == 0);
@@ -24,23 +24,19 @@
24
24
 
25
25
  namespace datasketches {
26
26
 
27
- static int min(int a, int b) {
28
- return (a < b) ? a : b;
29
- }
30
-
31
27
  static void println(std::string& str) {
32
28
  //std::cout << str << "\n";
33
29
  }
34
30
 
35
31
  static void basicUnion(uint64_t n1, uint64_t n2,
36
- uint64_t lgk1, uint64_t lgk2, uint64_t lgMaxK,
32
+ uint8_t lgk1, uint8_t lgk2, uint8_t lgMaxK,
37
33
  target_hll_type type1, target_hll_type type2, target_hll_type resultType) {
38
34
  uint64_t v = 0;
39
35
  //int tot = n1 + n2;
40
36
 
41
37
  hll_sketch h1(lgk1, type1);
42
38
  hll_sketch h2(lgk2, type2);
43
- int lgControlK = min(min(lgk1, lgk2), lgMaxK);
39
+ uint8_t lgControlK = std::min(std::min(lgk1, lgk2), lgMaxK);
44
40
  hll_sketch control(lgControlK, resultType);
45
41
 
46
42
  for (uint64_t i = 0; i < n1; ++i) {
@@ -89,9 +85,9 @@ TEST_CASE("hll union: check unions", "[hll_union]") {
89
85
  target_hll_type type2 = HLL_8;
90
86
  target_hll_type resultType = HLL_8;
91
87
 
92
- uint64_t lgK1 = 7;
93
- uint64_t lgK2 = 7;
94
- uint64_t lgMaxK = 7;
88
+ uint8_t lgK1 = 7;
89
+ uint8_t lgK2 = 7;
90
+ uint8_t lgMaxK = 7;
95
91
  uint64_t n1 = 7;
96
92
  uint64_t n2 = 7;
97
93
  basicUnion(n1, n2, lgK1, lgK2, lgMaxK, type1, type2, resultType);
@@ -108,7 +104,7 @@ TEST_CASE("hll union: check unions", "[hll_union]") {
108
104
  n2 = 14;
109
105
  basicUnion(n1, n2, lgK1, lgK2, lgMaxK, type1, type2, resultType);
110
106
 
111
- int i = 0;
107
+ uint8_t i = 0;
112
108
  for (i = 7; i <= 13; ++i) {
113
109
  lgK1 = i;
114
110
  lgK2 = i;
@@ -184,9 +180,9 @@ TEST_CASE("hll union: check composite estimate", "[hll_union]") {
184
180
  }
185
181
 
186
182
  TEST_CASE("hll union: check config k limits", "[hll_union]") {
187
- REQUIRE_THROWS_AS(hll_union(HllUtil<>::MIN_LOG_K - 1), std::invalid_argument);
183
+ REQUIRE_THROWS_AS(hll_union(hll_constants::MIN_LOG_K - 1), std::invalid_argument);
188
184
 
189
- REQUIRE_THROWS_AS(hll_union(HllUtil<>::MAX_LOG_K + 1), std::invalid_argument);
185
+ REQUIRE_THROWS_AS(hll_union(hll_constants::MAX_LOG_K + 1), std::invalid_argument);
190
186
  }
191
187
 
192
188
  static double getBound(int lgK, bool ub, bool oooFlag, int numStdDev, double est) {
@@ -195,7 +191,7 @@ static double getBound(int lgK, bool ub, bool oooFlag, int numStdDev, double est
195
191
  }
196
192
 
197
193
  TEST_CASE("hll union: check ub lb", "[hll_union]") {
198
- int lgK = 4;
194
+ uint8_t lgK = 4;
199
195
  int n = 1 << 20;
200
196
  bool oooFlag = false;
201
197
 
@@ -223,7 +219,7 @@ TEST_CASE("hll union: check ub lb", "[hll_union]") {
223
219
  }
224
220
 
225
221
  TEST_CASE("hll union: check conversions", "[hll_union]") {
226
- int lgK = 4;
222
+ uint8_t lgK = 4;
227
223
  hll_sketch sk1(lgK, HLL_8);
228
224
  hll_sketch sk2(lgK, HLL_8);
229
225
  int n = 1 << 20;
@@ -57,7 +57,7 @@ static int get_n(int lg_k, hll_mode mode) {
57
57
 
58
58
  static long v = 0;
59
59
 
60
- static hll_sketch build_sketch(int lg_k, target_hll_type hll_type, hll_mode mode) {
60
+ static hll_sketch build_sketch(uint8_t lg_k, target_hll_type hll_type, hll_mode mode) {
61
61
  hll_sketch sk(lg_k, hll_type);
62
62
  int n = get_n(lg_k, mode);
63
63
  for (int i = 0; i < n; i++) sk.update(static_cast<uint64_t>(i + v));
@@ -67,7 +67,7 @@ static hll_sketch build_sketch(int lg_k, target_hll_type hll_type, hll_mode mode
67
67
 
68
68
  // merges a sketch to an empty union and gets result of the same type, checks binary equivalence
69
69
  static void union_one_update(bool compact) {
70
- for (int lg_k = 4; lg_k <= 21; lg_k++) { // all lg_k
70
+ for (uint8_t lg_k = 4; lg_k <= 21; lg_k++) { // all lg_k
71
71
  for (int mode = 0; mode <= 2; mode++) { // List, Set, Hll
72
72
  if ((lg_k < 8) && (mode == 1)) continue; // lg_k < 8 list transitions directly to HLL
73
73
  for (int t = 0; t <= 2; t++) { // HLL_4, HLL_6, HLL_8
@@ -102,7 +102,7 @@ TEST_CASE("hll isomorphic: union one update serialize compact", "[hll_isomorphic
102
102
 
103
103
  // converts a sketch to a different type and converts back to the original type to check binary equivalence
104
104
  static void convert_back_and_forth(bool compact) {
105
- for (int lg_k = 4; lg_k <= 21; lg_k++) { // all lg_k
105
+ for (uint8_t lg_k = 4; lg_k <= 21; lg_k++) { // all lg_k
106
106
  for (int mode = 0; mode <= 2; mode++) { // List, Set, Hll
107
107
  if ((lg_k < 8) && (mode == 1)) continue; // lg_k < 8 list transitions directly to HLL
108
108
  for (int t1 = 0; t1 <= 2; t1++) { // HLL_4, HLL_6, HLL_8
@@ -44,11 +44,11 @@ TEST_CASE("hll to/from byte array: double serialize", "[hll_byte_array]") {
44
44
  auto ser2 = sk.serialize_updatable();
45
45
 
46
46
  REQUIRE(ser1.size() == ser2.size());
47
- int len = ser1.size();
47
+ size_t len = ser1.size();
48
48
  uint8_t* b1 = ser1.data();
49
49
  uint8_t* b2 = ser2.data();
50
50
 
51
- for (int i = 0; i < len; ++i) {
51
+ for (size_t i = 0; i < len; ++i) {
52
52
  REQUIRE(b2[i] == b1[i]);
53
53
  }
54
54
  }
@@ -129,7 +129,7 @@ static void checkSketchEquality(hll_sketch& sk1, hll_sketch& sk2) {
129
129
  REQUIRE(sk1.get_target_type() == sk2.get_target_type());
130
130
  }
131
131
 
132
- static void toFrom(const int lgConfigK, const target_hll_type tgtHllType, const int n) {
132
+ static void toFrom(const uint8_t lgConfigK, const target_hll_type tgtHllType, const int n) {
133
133
  hll_sketch src(lgConfigK, tgtHllType);
134
134
  for (int i = 0; i < n; ++i) {
135
135
  src.update(i);
@@ -157,7 +157,7 @@ static void toFrom(const int lgConfigK, const target_hll_type tgtHllType, const
157
157
  TEST_CASE("hll to/from byte array: to from sketch", "[hll_byte_array]") {
158
158
  for (int i = 0; i < 10; ++i) {
159
159
  int n = nArr[i];
160
- for (int lgK = 4; lgK <= 13; ++lgK) {
160
+ for (uint8_t lgK = 4; lgK <= 13; ++lgK) {
161
161
  toFrom(lgK, HLL_4, n);
162
162
  toFrom(lgK, HLL_6, n);
163
163
  toFrom(lgK, HLL_8, n);
@@ -26,7 +26,8 @@
26
26
 
27
27
  namespace datasketches {
28
28
 
29
- static std::independent_bits_engine<std::mt19937, 1, uint32_t> random_bit(std::chrono::system_clock::now().time_since_epoch().count());
29
+ static std::independent_bits_engine<std::mt19937, 1, uint32_t>
30
+ random_bit(static_cast<uint32_t>(std::chrono::system_clock::now().time_since_epoch().count()));
30
31
 
31
32
  #ifdef KLL_VALIDATION
32
33
  extern uint32_t kll_next_offset;
@@ -46,9 +47,9 @@ class kll_helper {
46
47
  static inline uint8_t floor_of_log2_of_fraction(uint64_t numer, uint64_t denom);
47
48
  static inline uint8_t ub_on_num_levels(uint64_t n);
48
49
  static inline uint32_t compute_total_capacity(uint16_t k, uint8_t m, uint8_t num_levels);
49
- static inline uint32_t level_capacity(uint16_t k, uint8_t numLevels, uint8_t height, uint8_t min_wid);
50
- static inline uint32_t int_cap_aux(uint16_t k, uint8_t depth);
51
- static inline uint32_t int_cap_aux_aux(uint16_t k, uint8_t depth);
50
+ static inline uint16_t level_capacity(uint16_t k, uint8_t numLevels, uint8_t height, uint8_t min_wid);
51
+ static inline uint16_t int_cap_aux(uint16_t k, uint8_t depth);
52
+ static inline uint16_t int_cap_aux_aux(uint16_t k, uint8_t depth);
52
53
  static inline uint64_t sum_the_sample_weights(uint8_t num_levels, const uint32_t* levels);
53
54
 
54
55
  /*
@@ -55,28 +55,28 @@ uint32_t kll_helper::compute_total_capacity(uint16_t k, uint8_t m, uint8_t num_l
55
55
  return total;
56
56
  }
57
57
 
58
- uint32_t kll_helper::level_capacity(uint16_t k, uint8_t numLevels, uint8_t height, uint8_t min_wid) {
58
+ uint16_t kll_helper::level_capacity(uint16_t k, uint8_t numLevels, uint8_t height, uint8_t min_wid) {
59
59
  if (height >= numLevels) throw std::invalid_argument("height >= numLevels");
60
60
  const uint8_t depth = numLevels - height - 1;
61
- return std::max((uint32_t) min_wid, int_cap_aux(k, depth));
61
+ return std::max<uint16_t>(min_wid, int_cap_aux(k, depth));
62
62
  }
63
63
 
64
- uint32_t kll_helper::int_cap_aux(uint16_t k, uint8_t depth) {
64
+ uint16_t kll_helper::int_cap_aux(uint16_t k, uint8_t depth) {
65
65
  if (depth > 60) throw std::invalid_argument("depth > 60");
66
66
  if (depth <= 30) return int_cap_aux_aux(k, depth);
67
67
  const uint8_t half = depth / 2;
68
68
  const uint8_t rest = depth - half;
69
- const uint32_t tmp = int_cap_aux_aux(k, half);
69
+ const uint16_t tmp = int_cap_aux_aux(k, half);
70
70
  return int_cap_aux_aux(tmp, rest);
71
71
  }
72
72
 
73
- uint32_t kll_helper::int_cap_aux_aux(uint16_t k, uint8_t depth) {
73
+ uint16_t kll_helper::int_cap_aux_aux(uint16_t k, uint8_t depth) {
74
74
  if (depth > 30) throw std::invalid_argument("depth > 30");
75
75
  const uint64_t twok = k << 1; // for rounding, we pre-multiply by 2
76
76
  const uint64_t tmp = (uint64_t) (((uint64_t) twok << depth) / powers_of_three[depth]);
77
77
  const uint64_t result = (tmp + 1) >> 1; // then here we add 1 and divide by 2
78
78
  if (result > k) throw std::logic_error("result > k");
79
- return result;
79
+ return static_cast<uint16_t>(result);
80
80
  }
81
81
 
82
82
  uint64_t kll_helper::sum_the_sample_weights(uint8_t num_levels, const uint32_t* levels) {
@@ -24,19 +24,27 @@
24
24
 
25
25
  namespace datasketches {
26
26
 
27
+ // forward declaration
28
+ template<typename T, typename C, typename S, typename A> class kll_sketch;
29
+
27
30
  template <typename T, typename C, typename A>
28
31
  class kll_quantile_calculator {
29
32
  public:
30
- // assumes that all levels are sorted including level 0
31
- kll_quantile_calculator(const T* items, const uint32_t* levels, uint8_t num_levels, uint64_t n);
33
+ using Entry = std::pair<T, uint64_t>;
34
+ using AllocEntry = typename std::allocator_traits<A>::template rebind_alloc<Entry>;
35
+ using Container = std::vector<Entry, AllocEntry>;
36
+ using const_iterator = typename Container::const_iterator;
37
+
38
+ template<typename S>
39
+ kll_quantile_calculator(const kll_sketch<T, C, S, A>& sketch);
40
+
32
41
  T get_quantile(double fraction) const;
42
+ const_iterator begin() const;
43
+ const_iterator end() const;
33
44
 
34
45
  private:
35
46
  using AllocU32 = typename std::allocator_traits<A>::template rebind_alloc<uint32_t>;
36
47
  using vector_u32 = std::vector<uint32_t, AllocU32>;
37
- using Entry = std::pair<T, uint64_t>;
38
- using AllocEntry = typename std::allocator_traits<A>::template rebind_alloc<Entry>;
39
- using Container = std::vector<Entry, AllocEntry>;
40
48
  uint64_t n_;
41
49
  vector_u32 levels_;
42
50
  Container entries_;
@@ -45,7 +53,7 @@ class kll_quantile_calculator {
45
53
  T approximately_answer_positional_query(uint64_t pos) const;
46
54
  void convert_to_preceding_cummulative();
47
55
  uint32_t chunk_containing_pos(uint64_t pos) const;
48
- uint32_t search_for_chunk_containing_pos(uint64_t pos, uint32_t l, uint32_t r) const;
56
+ uint32_t search_for_chunk_containing_pos(uint64_t pos, uint64_t l, uint64_t r) const;
49
57
  static void merge_sorted_blocks(Container& entries, const uint32_t* levels, uint8_t num_levels, uint32_t num_items);
50
58
  static void merge_sorted_blocks_direct(Container& orig, Container& temp, const uint32_t* levels, uint8_t starting_level, uint8_t num_levels);
51
59
  static void merge_sorted_blocks_reversed(Container& orig, Container& temp, const uint32_t* levels, uint8_t starting_level, uint8_t num_levels);