datasketches 0.2.1 → 0.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (117) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +4 -0
  3. data/lib/datasketches/version.rb +1 -1
  4. data/vendor/datasketches-cpp/CMakeLists.txt +7 -0
  5. data/vendor/datasketches-cpp/common/include/MurmurHash3.h +11 -7
  6. data/vendor/datasketches-cpp/common/include/binomial_bounds.hpp +8 -8
  7. data/vendor/datasketches-cpp/common/include/bounds_binomial_proportions.hpp +12 -15
  8. data/vendor/datasketches-cpp/common/include/common_defs.hpp +24 -0
  9. data/vendor/datasketches-cpp/common/include/conditional_forward.hpp +20 -8
  10. data/vendor/datasketches-cpp/common/include/count_zeros.hpp +2 -2
  11. data/vendor/datasketches-cpp/common/include/serde.hpp +7 -7
  12. data/vendor/datasketches-cpp/cpc/include/cpc_compressor.hpp +19 -19
  13. data/vendor/datasketches-cpp/cpc/include/cpc_compressor_impl.hpp +91 -89
  14. data/vendor/datasketches-cpp/cpc/include/cpc_sketch.hpp +14 -1
  15. data/vendor/datasketches-cpp/cpc/include/cpc_sketch_impl.hpp +121 -87
  16. data/vendor/datasketches-cpp/cpc/include/cpc_union_impl.hpp +14 -14
  17. data/vendor/datasketches-cpp/cpc/include/cpc_util.hpp +10 -10
  18. data/vendor/datasketches-cpp/cpc/include/icon_estimator.hpp +4 -4
  19. data/vendor/datasketches-cpp/cpc/include/u32_table.hpp +8 -8
  20. data/vendor/datasketches-cpp/cpc/include/u32_table_impl.hpp +14 -14
  21. data/vendor/datasketches-cpp/cpc/test/compression_test.cpp +10 -10
  22. data/vendor/datasketches-cpp/cpc/test/cpc_sketch_test.cpp +25 -0
  23. data/vendor/datasketches-cpp/cpc/test/cpc_union_test.cpp +1 -1
  24. data/vendor/datasketches-cpp/fi/include/frequent_items_sketch_impl.hpp +65 -80
  25. data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map_impl.hpp +10 -10
  26. data/vendor/datasketches-cpp/fi/test/reverse_purge_hash_map_test.cpp +2 -2
  27. data/vendor/datasketches-cpp/hll/include/AuxHashMap-internal.hpp +60 -63
  28. data/vendor/datasketches-cpp/hll/include/AuxHashMap.hpp +19 -19
  29. data/vendor/datasketches-cpp/hll/include/CompositeInterpolationXTable-internal.hpp +15 -15
  30. data/vendor/datasketches-cpp/hll/include/CompositeInterpolationXTable.hpp +3 -3
  31. data/vendor/datasketches-cpp/hll/include/CouponHashSet-internal.hpp +74 -76
  32. data/vendor/datasketches-cpp/hll/include/CouponHashSet.hpp +6 -6
  33. data/vendor/datasketches-cpp/hll/include/CouponList-internal.hpp +110 -113
  34. data/vendor/datasketches-cpp/hll/include/CouponList.hpp +13 -13
  35. data/vendor/datasketches-cpp/hll/include/CubicInterpolation-internal.hpp +2 -4
  36. data/vendor/datasketches-cpp/hll/include/HarmonicNumbers-internal.hpp +1 -1
  37. data/vendor/datasketches-cpp/hll/include/Hll4Array-internal.hpp +80 -76
  38. data/vendor/datasketches-cpp/hll/include/Hll4Array.hpp +9 -9
  39. data/vendor/datasketches-cpp/hll/include/Hll6Array-internal.hpp +26 -26
  40. data/vendor/datasketches-cpp/hll/include/Hll6Array.hpp +6 -6
  41. data/vendor/datasketches-cpp/hll/include/Hll8Array-internal.hpp +33 -33
  42. data/vendor/datasketches-cpp/hll/include/Hll8Array.hpp +6 -6
  43. data/vendor/datasketches-cpp/hll/include/HllArray-internal.hpp +205 -209
  44. data/vendor/datasketches-cpp/hll/include/HllArray.hpp +36 -36
  45. data/vendor/datasketches-cpp/hll/include/HllSketch-internal.hpp +28 -28
  46. data/vendor/datasketches-cpp/hll/include/HllSketchImpl-internal.hpp +22 -22
  47. data/vendor/datasketches-cpp/hll/include/HllSketchImpl.hpp +13 -13
  48. data/vendor/datasketches-cpp/hll/include/HllSketchImplFactory.hpp +15 -15
  49. data/vendor/datasketches-cpp/hll/include/HllUnion-internal.hpp +61 -61
  50. data/vendor/datasketches-cpp/hll/include/HllUtil.hpp +120 -127
  51. data/vendor/datasketches-cpp/hll/include/coupon_iterator-internal.hpp +9 -9
  52. data/vendor/datasketches-cpp/hll/include/coupon_iterator.hpp +5 -5
  53. data/vendor/datasketches-cpp/hll/include/hll.hpp +21 -21
  54. data/vendor/datasketches-cpp/hll/test/AuxHashMapTest.cpp +1 -1
  55. data/vendor/datasketches-cpp/hll/test/CouponHashSetTest.cpp +34 -34
  56. data/vendor/datasketches-cpp/hll/test/CouponListTest.cpp +25 -25
  57. data/vendor/datasketches-cpp/hll/test/CrossCountingTest.cpp +2 -2
  58. data/vendor/datasketches-cpp/hll/test/HllArrayTest.cpp +35 -35
  59. data/vendor/datasketches-cpp/hll/test/HllSketchTest.cpp +15 -15
  60. data/vendor/datasketches-cpp/hll/test/HllUnionTest.cpp +10 -14
  61. data/vendor/datasketches-cpp/hll/test/IsomorphicTest.cpp +3 -3
  62. data/vendor/datasketches-cpp/hll/test/ToFromByteArrayTest.cpp +4 -4
  63. data/vendor/datasketches-cpp/kll/include/kll_helper.hpp +5 -4
  64. data/vendor/datasketches-cpp/kll/include/kll_helper_impl.hpp +6 -6
  65. data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator.hpp +14 -6
  66. data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator_impl.hpp +39 -24
  67. data/vendor/datasketches-cpp/kll/include/kll_sketch.hpp +34 -2
  68. data/vendor/datasketches-cpp/kll/include/kll_sketch_impl.hpp +72 -62
  69. data/vendor/datasketches-cpp/kll/include/kolmogorov_smirnov.hpp +67 -0
  70. data/vendor/datasketches-cpp/kll/include/kolmogorov_smirnov_impl.hpp +78 -0
  71. data/vendor/datasketches-cpp/kll/test/CMakeLists.txt +1 -0
  72. data/vendor/datasketches-cpp/kll/test/kll_sketch_test.cpp +68 -45
  73. data/vendor/datasketches-cpp/kll/test/kolmogorov_smirnov_test.cpp +111 -0
  74. data/vendor/datasketches-cpp/python/src/hll_wrapper.cpp +4 -4
  75. data/vendor/datasketches-cpp/python/src/theta_wrapper.cpp +6 -6
  76. data/vendor/datasketches-cpp/python/src/vo_wrapper.cpp +2 -2
  77. data/vendor/datasketches-cpp/python/tests/hll_test.py +1 -1
  78. data/vendor/datasketches-cpp/python/tests/vo_test.py +3 -3
  79. data/vendor/datasketches-cpp/req/include/req_common.hpp +2 -1
  80. data/vendor/datasketches-cpp/req/include/req_compactor.hpp +4 -4
  81. data/vendor/datasketches-cpp/req/include/req_compactor_impl.hpp +26 -39
  82. data/vendor/datasketches-cpp/req/include/req_sketch.hpp +1 -1
  83. data/vendor/datasketches-cpp/req/include/req_sketch_impl.hpp +9 -9
  84. data/vendor/datasketches-cpp/req/test/req_sketch_test.cpp +52 -52
  85. data/vendor/datasketches-cpp/sampling/include/var_opt_sketch_impl.hpp +47 -56
  86. data/vendor/datasketches-cpp/sampling/include/var_opt_union_impl.hpp +34 -42
  87. data/vendor/datasketches-cpp/sampling/test/var_opt_sketch_test.cpp +6 -6
  88. data/vendor/datasketches-cpp/sampling/test/var_opt_union_test.cpp +13 -13
  89. data/vendor/datasketches-cpp/setup.py +1 -1
  90. data/vendor/datasketches-cpp/theta/include/bounds_on_ratios_in_sampled_sets.hpp +1 -1
  91. data/vendor/datasketches-cpp/theta/include/compact_theta_sketch_parser.hpp +67 -0
  92. data/vendor/datasketches-cpp/theta/include/compact_theta_sketch_parser_impl.hpp +70 -0
  93. data/vendor/datasketches-cpp/theta/include/theta_intersection.hpp +9 -4
  94. data/vendor/datasketches-cpp/theta/include/theta_intersection_impl.hpp +1 -1
  95. data/vendor/datasketches-cpp/theta/include/theta_jaccard_similarity_base.hpp +18 -14
  96. data/vendor/datasketches-cpp/theta/include/theta_sketch.hpp +42 -1
  97. data/vendor/datasketches-cpp/theta/include/theta_sketch_impl.hpp +107 -58
  98. data/vendor/datasketches-cpp/theta/include/theta_union.hpp +4 -4
  99. data/vendor/datasketches-cpp/theta/include/theta_union_base_impl.hpp +1 -1
  100. data/vendor/datasketches-cpp/theta/include/theta_union_impl.hpp +1 -1
  101. data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base.hpp +2 -0
  102. data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base_impl.hpp +33 -28
  103. data/vendor/datasketches-cpp/theta/test/theta_a_not_b_test.cpp +23 -1
  104. data/vendor/datasketches-cpp/theta/test/theta_intersection_test.cpp +21 -1
  105. data/vendor/datasketches-cpp/theta/test/theta_jaccard_similarity_test.cpp +58 -2
  106. data/vendor/datasketches-cpp/theta/test/theta_sketch_test.cpp +37 -1
  107. data/vendor/datasketches-cpp/theta/test/theta_union_test.cpp +22 -2
  108. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_sketch_impl.hpp +47 -60
  109. data/vendor/datasketches-cpp/tuple/include/tuple_sketch_impl.hpp +51 -64
  110. data/vendor/datasketches-cpp/tuple/test/array_of_doubles_sketch_test.cpp +1 -1
  111. data/vendor/datasketches-cpp/tuple/test/tuple_a_not_b_test.cpp +17 -17
  112. data/vendor/datasketches-cpp/tuple/test/tuple_intersection_test.cpp +12 -12
  113. data/vendor/datasketches-cpp/tuple/test/tuple_jaccard_similarity_test.cpp +5 -5
  114. data/vendor/datasketches-cpp/tuple/test/tuple_sketch_allocation_test.cpp +1 -1
  115. data/vendor/datasketches-cpp/tuple/test/tuple_sketch_test.cpp +20 -20
  116. data/vendor/datasketches-cpp/tuple/test/tuple_union_test.cpp +12 -12
  117. metadata +8 -3
@@ -26,7 +26,7 @@
26
26
 
27
27
  namespace datasketches {
28
28
 
29
- static hll_sketch buildSketch(const int n, const int lgK, const target_hll_type tgtHllType) {
29
+ static hll_sketch buildSketch(const int n, const uint8_t lgK, const target_hll_type tgtHllType) {
30
30
  hll_sketch sketch(lgK, tgtHllType);
31
31
  for (int i = 0; i < n; ++i) {
32
32
  sketch.update(i);
@@ -34,7 +34,7 @@ static hll_sketch buildSketch(const int n, const int lgK, const target_hll_type
34
34
  return sketch;
35
35
  }
36
36
 
37
- static void crossCountingCheck(const int lgK, const int n) {
37
+ static void crossCountingCheck(const uint8_t lgK, const int n) {
38
38
  hll_sketch sk4 = buildSketch(n, lgK, HLL_4);
39
39
  const double est = sk4.get_estimate();
40
40
  const double lb = sk4.get_lower_bound(1);
@@ -25,7 +25,7 @@
25
25
 
26
26
  namespace datasketches {
27
27
 
28
- static void testComposite(const int lgK, const target_hll_type tgtHllType, const int n) {
28
+ static void testComposite(uint8_t lgK, const target_hll_type tgtHllType, const int n) {
29
29
  hll_union u(lgK);
30
30
  hll_sketch sk(lgK, tgtHllType);
31
31
  for (int i = 0; i < n; ++i) {
@@ -45,7 +45,7 @@ TEST_CASE("hll array: check composite estimate", "[hll_array]") {
45
45
  testComposite(13, target_hll_type::HLL_8, 10000);
46
46
  }
47
47
 
48
- static void serializeDeserialize(const int lgK, target_hll_type tgtHllType, const int n) {
48
+ static void serializeDeserialize(uint8_t lgK, target_hll_type tgtHllType, const int n) {
49
49
  hll_sketch sk1(lgK, tgtHllType);
50
50
 
51
51
  for (int i = 0; i < n; ++i) {
@@ -72,7 +72,7 @@ static void serializeDeserialize(const int lgK, target_hll_type tgtHllType, cons
72
72
  }
73
73
 
74
74
  TEST_CASE("hll array: check serialize deserialize", "[hll_array]") {
75
- int lgK = 4;
75
+ uint8_t lgK = 4;
76
76
  int n = 8;
77
77
  serializeDeserialize(lgK, HLL_4, n);
78
78
  serializeDeserialize(lgK, HLL_6, n);
@@ -100,7 +100,7 @@ TEST_CASE("hll array: check is compact", "[hll_array]") {
100
100
  }
101
101
 
102
102
  TEST_CASE("hll array: check corrupt bytearray", "[hll_array]") {
103
- int lgK = 8;
103
+ uint8_t lgK = 8;
104
104
  hll_sketch sk1(lgK, HLL_8);
105
105
  for (int i = 0; i < 50; ++i) {
106
106
  sk1.update(i);
@@ -109,36 +109,36 @@ TEST_CASE("hll array: check corrupt bytearray", "[hll_array]") {
109
109
  uint8_t* bytes = sketchBytes.data();
110
110
  const size_t size = sketchBytes.size();
111
111
 
112
- bytes[HllUtil<>::PREAMBLE_INTS_BYTE] = 0;
112
+ bytes[hll_constants::PREAMBLE_INTS_BYTE] = 0;
113
113
  REQUIRE_THROWS_AS(hll_sketch::deserialize(bytes, size), std::invalid_argument);
114
114
  REQUIRE_THROWS_AS(HllArray<std::allocator<uint8_t>>::newHll(bytes, size, std::allocator<uint8_t>()), std::invalid_argument);
115
- bytes[HllUtil<>::PREAMBLE_INTS_BYTE] = HllUtil<>::HLL_PREINTS;
115
+ bytes[hll_constants::PREAMBLE_INTS_BYTE] = hll_constants::HLL_PREINTS;
116
116
 
117
- bytes[HllUtil<>::SER_VER_BYTE] = 0;
117
+ bytes[hll_constants::SER_VER_BYTE] = 0;
118
118
  REQUIRE_THROWS_AS(hll_sketch::deserialize(bytes, size), std::invalid_argument);
119
- bytes[HllUtil<>::SER_VER_BYTE] = HllUtil<>::SER_VER;
119
+ bytes[hll_constants::SER_VER_BYTE] = hll_constants::SER_VER;
120
120
 
121
- bytes[HllUtil<>::FAMILY_BYTE] = 0;
121
+ bytes[hll_constants::FAMILY_BYTE] = 0;
122
122
  REQUIRE_THROWS_AS(hll_sketch::deserialize(bytes, size), std::invalid_argument);
123
- bytes[HllUtil<>::FAMILY_BYTE] = HllUtil<>::FAMILY_ID;
123
+ bytes[hll_constants::FAMILY_BYTE] = hll_constants::FAMILY_ID;
124
124
 
125
- uint8_t tmp = bytes[HllUtil<>::MODE_BYTE];
126
- bytes[HllUtil<>::MODE_BYTE] = 0x10; // HLL_6, LIST
125
+ uint8_t tmp = bytes[hll_constants::MODE_BYTE];
126
+ bytes[hll_constants::MODE_BYTE] = 0x10; // HLL_6, LIST
127
127
  REQUIRE_THROWS_AS(hll_sketch::deserialize(bytes, size), std::invalid_argument);
128
- bytes[HllUtil<>::MODE_BYTE] = tmp;
128
+ bytes[hll_constants::MODE_BYTE] = tmp;
129
129
 
130
- tmp = bytes[HllUtil<>::LG_ARR_BYTE];
131
- bytes[HllUtil<>::LG_ARR_BYTE] = 0;
130
+ tmp = bytes[hll_constants::LG_ARR_BYTE];
131
+ bytes[hll_constants::LG_ARR_BYTE] = 0;
132
132
  hll_sketch::deserialize(bytes, size);
133
133
  // should work fine despite the corruption
134
- bytes[HllUtil<>::LG_ARR_BYTE] = tmp;
134
+ bytes[hll_constants::LG_ARR_BYTE] = tmp;
135
135
 
136
136
  REQUIRE_THROWS_AS(hll_sketch::deserialize(bytes, size - 1), std::out_of_range);
137
137
  REQUIRE_THROWS_AS(hll_sketch::deserialize(bytes, 3), std::out_of_range);
138
138
  }
139
139
 
140
140
  TEST_CASE("hll array: check corrupt stream", "[hll_array]") {
141
- int lgK = 6;
141
+ uint8_t lgK = 6;
142
142
  hll_sketch sk1(lgK);
143
143
  for (int i = 0; i < 50; ++i) {
144
144
  sk1.update(i);
@@ -146,46 +146,46 @@ TEST_CASE("hll array: check corrupt stream", "[hll_array]") {
146
146
  std::stringstream ss;
147
147
  sk1.serialize_compact(ss);
148
148
 
149
- ss.seekp(HllUtil<>::PREAMBLE_INTS_BYTE);
149
+ ss.seekp(hll_constants::PREAMBLE_INTS_BYTE);
150
150
  ss.put(0);
151
151
  ss.seekg(0);
152
152
  REQUIRE_THROWS_AS(hll_sketch::deserialize(ss), std::invalid_argument);
153
153
  REQUIRE_THROWS_AS(HllArray<std::allocator<uint8_t>>::newHll(ss, std::allocator<uint8_t>()), std::invalid_argument);
154
- ss.seekp(HllUtil<>::PREAMBLE_INTS_BYTE);
155
- ss.put(HllUtil<>::HLL_PREINTS);
154
+ ss.seekp(hll_constants::PREAMBLE_INTS_BYTE);
155
+ ss.put(hll_constants::HLL_PREINTS);
156
156
 
157
- ss.seekp(HllUtil<>::SER_VER_BYTE);
157
+ ss.seekp(hll_constants::SER_VER_BYTE);
158
158
  ss.put(0);
159
159
  ss.seekg(0);
160
160
  REQUIRE_THROWS_AS(hll_sketch::deserialize(ss), std::invalid_argument);
161
- ss.seekp(HllUtil<>::SER_VER_BYTE);
162
- ss.put(HllUtil<>::SER_VER);
161
+ ss.seekp(hll_constants::SER_VER_BYTE);
162
+ ss.put(hll_constants::SER_VER);
163
163
 
164
- ss.seekp(HllUtil<>::FAMILY_BYTE);
164
+ ss.seekp(hll_constants::FAMILY_BYTE);
165
165
  ss.put(0);
166
166
  ss.seekg(0);
167
167
  REQUIRE_THROWS_AS(hll_sketch::deserialize(ss), std::invalid_argument);
168
- ss.seekp(HllUtil<>::FAMILY_BYTE);
169
- ss.put(HllUtil<>::FAMILY_ID);
168
+ ss.seekp(hll_constants::FAMILY_BYTE);
169
+ ss.put(hll_constants::FAMILY_ID);
170
170
 
171
- ss.seekg(HllUtil<>::MODE_BYTE);
172
- uint8_t tmp = ss.get();
173
- ss.seekp(HllUtil<>::MODE_BYTE);
171
+ ss.seekg(hll_constants::MODE_BYTE);
172
+ auto tmp = ss.get();
173
+ ss.seekp(hll_constants::MODE_BYTE);
174
174
  ss.put(0x11); // HLL_6, SET
175
175
  ss.seekg(0);
176
176
  REQUIRE_THROWS_AS(hll_sketch::deserialize(ss), std::invalid_argument);
177
- ss.seekp(HllUtil<>::MODE_BYTE);
178
- ss.put(tmp);
177
+ ss.seekp(hll_constants::MODE_BYTE);
178
+ ss.put((char)tmp);
179
179
 
180
- ss.seekg(HllUtil<>::LG_ARR_BYTE);
180
+ ss.seekg(hll_constants::LG_ARR_BYTE);
181
181
  tmp = ss.get();
182
- ss.seekp(HllUtil<>::LG_ARR_BYTE);
182
+ ss.seekp(hll_constants::LG_ARR_BYTE);
183
183
  ss.put(0);
184
184
  ss.seekg(0);
185
185
  hll_sketch::deserialize(ss);
186
186
  // should work fine despite the corruption
187
- ss.seekp(HllUtil<>::LG_ARR_BYTE);
188
- ss.put(tmp);
187
+ ss.seekp(hll_constants::LG_ARR_BYTE);
188
+ ss.put((char)tmp);
189
189
  }
190
190
 
191
191
  } /* namespace datasketches */
@@ -27,7 +27,7 @@ namespace datasketches {
27
27
  using hll_sketch_test_alloc = hll_sketch_alloc<test_allocator<uint8_t>>;
28
28
  using alloc = test_allocator<uint8_t>;
29
29
 
30
- static void runCheckCopy(int lgConfigK, target_hll_type tgtHllType) {
30
+ static void runCheckCopy(uint8_t lgConfigK, target_hll_type tgtHllType) {
31
31
  hll_sketch_test_alloc sk(lgConfigK, tgtHllType, false, 0);
32
32
 
33
33
  for (int i = 0; i < 7; ++i) {
@@ -66,7 +66,7 @@ TEST_CASE("hll sketch: check copies", "[hll_sketch]") {
66
66
  }
67
67
 
68
68
  static void copyAs(target_hll_type srcType, target_hll_type dstType) {
69
- int lgK = 8;
69
+ uint8_t lgK = 8;
70
70
  int n1 = 7;
71
71
  int n2 = 24;
72
72
  int n3 = 1000;
@@ -109,7 +109,7 @@ TEST_CASE("hll sketch: check copy as", "[hll_sketch]") {
109
109
  TEST_CASE("hll sketch: check misc1", "[hll_sketch]") {
110
110
  test_allocator_total_bytes = 0;
111
111
  {
112
- int lgConfigK = 8;
112
+ uint8_t lgConfigK = 8;
113
113
  target_hll_type srcType = target_hll_type::HLL_8;
114
114
  hll_sketch_test_alloc sk(lgConfigK, srcType, false, 0);
115
115
 
@@ -124,7 +124,7 @@ TEST_CASE("hll sketch: check misc1", "[hll_sketch]") {
124
124
  sk.update(24); // HLL
125
125
  REQUIRE(sk.get_updatable_serialization_bytes() == 40 + 256);
126
126
 
127
- const int hllBytes = HllUtil<>::HLL_BYTE_ARR_START + (1 << lgConfigK);
127
+ const auto hllBytes = hll_constants::HLL_BYTE_ARR_START + (1 << lgConfigK);
128
128
  REQUIRE(sk.get_compact_serialization_bytes() == hllBytes);
129
129
  REQUIRE(hll_sketch::get_max_updatable_serialization_bytes(lgConfigK, HLL_8) == hllBytes);
130
130
  }
@@ -135,22 +135,22 @@ TEST_CASE("hll sketch: check num std dev", "[hll_sketch]") {
135
135
  REQUIRE_THROWS_AS(HllUtil<>::checkNumStdDev(0), std::invalid_argument);
136
136
  }
137
137
 
138
- void checkSerializationSizes(const int lgConfigK, target_hll_type tgtHllType) {
138
+ void checkSerializationSizes(uint8_t lgConfigK, target_hll_type tgtHllType) {
139
139
  hll_sketch_test_alloc sk(lgConfigK, tgtHllType, false, 0);
140
140
  int i;
141
141
 
142
142
  // LIST
143
143
  for (i = 0; i < 7; ++i) { sk.update(i); }
144
- int expected = HllUtil<>::LIST_INT_ARR_START + (i << 2);
144
+ auto expected = hll_constants::LIST_INT_ARR_START + (i << 2);
145
145
  REQUIRE(sk.get_compact_serialization_bytes() == expected);
146
- expected = HllUtil<>::LIST_INT_ARR_START + (4 << HllUtil<>::LG_INIT_LIST_SIZE);
146
+ expected = hll_constants::LIST_INT_ARR_START + (4 << hll_constants::LG_INIT_LIST_SIZE);
147
147
  REQUIRE(sk.get_updatable_serialization_bytes() == expected);
148
148
 
149
149
  // SET
150
150
  for (i = 7; i < 24; ++i) { sk.update(i); }
151
- expected = HllUtil<>::HASH_SET_INT_ARR_START + (i << 2);
151
+ expected = hll_constants::HASH_SET_INT_ARR_START + (i << 2);
152
152
  REQUIRE(sk.get_compact_serialization_bytes() == expected);
153
- expected = HllUtil<>::HASH_SET_INT_ARR_START + (4 << HllUtil<>::LG_INIT_SET_SIZE);
153
+ expected = hll_constants::HASH_SET_INT_ARR_START + (4 << hll_constants::LG_INIT_SET_SIZE);
154
154
  REQUIRE(sk.get_updatable_serialization_bytes() == expected);
155
155
  }
156
156
 
@@ -178,7 +178,7 @@ TEST_CASE("hll sketch: exercise to string", "[hll_sketch]") {
178
178
 
179
179
  // Creates and serializes then deserializes sketch.
180
180
  // Returns true if deserialized sketch is compact.
181
- static bool checkCompact(const int lgK, const int n, const target_hll_type type, bool compact) {
181
+ static bool checkCompact(uint8_t lgK, const int n, const target_hll_type type, bool compact) {
182
182
  hll_sketch_test_alloc sk(lgK, type, false, 0);
183
183
  for (int i = 0; i < n; ++i) { sk.update(i); }
184
184
 
@@ -201,7 +201,7 @@ static bool checkCompact(const int lgK, const int n, const target_hll_type type,
201
201
  TEST_CASE("hll sketch: check compact flag", "[hll_sketch]") {
202
202
  test_allocator_total_bytes = 0;
203
203
  {
204
- int lgK = 8;
204
+ uint8_t lgK = 8;
205
205
  // unless/until we create non-updatable "direct" versions,
206
206
  // deserialized image should never be compact
207
207
  // LIST: follows serialization request
@@ -230,10 +230,10 @@ TEST_CASE("hll sketch: check compact flag", "[hll_sketch]") {
230
230
  TEST_CASE("hll sketch: check k limits", "[hll_sketch]") {
231
231
  test_allocator_total_bytes = 0;
232
232
  {
233
- hll_sketch_test_alloc sketch1(HllUtil<>::MIN_LOG_K, target_hll_type::HLL_8, false, 0);
234
- hll_sketch_test_alloc sketch2(HllUtil<>::MAX_LOG_K, target_hll_type::HLL_4, false, 0);
235
- REQUIRE_THROWS_AS(hll_sketch_test_alloc(HllUtil<>::MIN_LOG_K - 1, target_hll_type::HLL_4, false, 0), std::invalid_argument);
236
- REQUIRE_THROWS_AS(hll_sketch_test_alloc(HllUtil<>::MAX_LOG_K + 1, target_hll_type::HLL_4, false, 0), std::invalid_argument);
233
+ hll_sketch_test_alloc sketch1(hll_constants::MIN_LOG_K, target_hll_type::HLL_8, false, 0);
234
+ hll_sketch_test_alloc sketch2(hll_constants::MAX_LOG_K, target_hll_type::HLL_4, false, 0);
235
+ REQUIRE_THROWS_AS(hll_sketch_test_alloc(hll_constants::MIN_LOG_K - 1, target_hll_type::HLL_4, false, 0), std::invalid_argument);
236
+ REQUIRE_THROWS_AS(hll_sketch_test_alloc(hll_constants::MAX_LOG_K + 1, target_hll_type::HLL_4, false, 0), std::invalid_argument);
237
237
  }
238
238
  REQUIRE(test_allocator_total_bytes == 0);
239
239
  }
@@ -24,23 +24,19 @@
24
24
 
25
25
  namespace datasketches {
26
26
 
27
- static int min(int a, int b) {
28
- return (a < b) ? a : b;
29
- }
30
-
31
27
  static void println(std::string& str) {
32
28
  //std::cout << str << "\n";
33
29
  }
34
30
 
35
31
  static void basicUnion(uint64_t n1, uint64_t n2,
36
- uint64_t lgk1, uint64_t lgk2, uint64_t lgMaxK,
32
+ uint8_t lgk1, uint8_t lgk2, uint8_t lgMaxK,
37
33
  target_hll_type type1, target_hll_type type2, target_hll_type resultType) {
38
34
  uint64_t v = 0;
39
35
  //int tot = n1 + n2;
40
36
 
41
37
  hll_sketch h1(lgk1, type1);
42
38
  hll_sketch h2(lgk2, type2);
43
- int lgControlK = min(min(lgk1, lgk2), lgMaxK);
39
+ uint8_t lgControlK = std::min(std::min(lgk1, lgk2), lgMaxK);
44
40
  hll_sketch control(lgControlK, resultType);
45
41
 
46
42
  for (uint64_t i = 0; i < n1; ++i) {
@@ -89,9 +85,9 @@ TEST_CASE("hll union: check unions", "[hll_union]") {
89
85
  target_hll_type type2 = HLL_8;
90
86
  target_hll_type resultType = HLL_8;
91
87
 
92
- uint64_t lgK1 = 7;
93
- uint64_t lgK2 = 7;
94
- uint64_t lgMaxK = 7;
88
+ uint8_t lgK1 = 7;
89
+ uint8_t lgK2 = 7;
90
+ uint8_t lgMaxK = 7;
95
91
  uint64_t n1 = 7;
96
92
  uint64_t n2 = 7;
97
93
  basicUnion(n1, n2, lgK1, lgK2, lgMaxK, type1, type2, resultType);
@@ -108,7 +104,7 @@ TEST_CASE("hll union: check unions", "[hll_union]") {
108
104
  n2 = 14;
109
105
  basicUnion(n1, n2, lgK1, lgK2, lgMaxK, type1, type2, resultType);
110
106
 
111
- int i = 0;
107
+ uint8_t i = 0;
112
108
  for (i = 7; i <= 13; ++i) {
113
109
  lgK1 = i;
114
110
  lgK2 = i;
@@ -184,9 +180,9 @@ TEST_CASE("hll union: check composite estimate", "[hll_union]") {
184
180
  }
185
181
 
186
182
  TEST_CASE("hll union: check config k limits", "[hll_union]") {
187
- REQUIRE_THROWS_AS(hll_union(HllUtil<>::MIN_LOG_K - 1), std::invalid_argument);
183
+ REQUIRE_THROWS_AS(hll_union(hll_constants::MIN_LOG_K - 1), std::invalid_argument);
188
184
 
189
- REQUIRE_THROWS_AS(hll_union(HllUtil<>::MAX_LOG_K + 1), std::invalid_argument);
185
+ REQUIRE_THROWS_AS(hll_union(hll_constants::MAX_LOG_K + 1), std::invalid_argument);
190
186
  }
191
187
 
192
188
  static double getBound(int lgK, bool ub, bool oooFlag, int numStdDev, double est) {
@@ -195,7 +191,7 @@ static double getBound(int lgK, bool ub, bool oooFlag, int numStdDev, double est
195
191
  }
196
192
 
197
193
  TEST_CASE("hll union: check ub lb", "[hll_union]") {
198
- int lgK = 4;
194
+ uint8_t lgK = 4;
199
195
  int n = 1 << 20;
200
196
  bool oooFlag = false;
201
197
 
@@ -223,7 +219,7 @@ TEST_CASE("hll union: check ub lb", "[hll_union]") {
223
219
  }
224
220
 
225
221
  TEST_CASE("hll union: check conversions", "[hll_union]") {
226
- int lgK = 4;
222
+ uint8_t lgK = 4;
227
223
  hll_sketch sk1(lgK, HLL_8);
228
224
  hll_sketch sk2(lgK, HLL_8);
229
225
  int n = 1 << 20;
@@ -57,7 +57,7 @@ static int get_n(int lg_k, hll_mode mode) {
57
57
 
58
58
  static long v = 0;
59
59
 
60
- static hll_sketch build_sketch(int lg_k, target_hll_type hll_type, hll_mode mode) {
60
+ static hll_sketch build_sketch(uint8_t lg_k, target_hll_type hll_type, hll_mode mode) {
61
61
  hll_sketch sk(lg_k, hll_type);
62
62
  int n = get_n(lg_k, mode);
63
63
  for (int i = 0; i < n; i++) sk.update(static_cast<uint64_t>(i + v));
@@ -67,7 +67,7 @@ static hll_sketch build_sketch(int lg_k, target_hll_type hll_type, hll_mode mode
67
67
 
68
68
  // merges a sketch to an empty union and gets result of the same type, checks binary equivalence
69
69
  static void union_one_update(bool compact) {
70
- for (int lg_k = 4; lg_k <= 21; lg_k++) { // all lg_k
70
+ for (uint8_t lg_k = 4; lg_k <= 21; lg_k++) { // all lg_k
71
71
  for (int mode = 0; mode <= 2; mode++) { // List, Set, Hll
72
72
  if ((lg_k < 8) && (mode == 1)) continue; // lg_k < 8 list transitions directly to HLL
73
73
  for (int t = 0; t <= 2; t++) { // HLL_4, HLL_6, HLL_8
@@ -102,7 +102,7 @@ TEST_CASE("hll isomorphic: union one update serialize compact", "[hll_isomorphic
102
102
 
103
103
  // converts a sketch to a different type and converts back to the original type to check binary equivalence
104
104
  static void convert_back_and_forth(bool compact) {
105
- for (int lg_k = 4; lg_k <= 21; lg_k++) { // all lg_k
105
+ for (uint8_t lg_k = 4; lg_k <= 21; lg_k++) { // all lg_k
106
106
  for (int mode = 0; mode <= 2; mode++) { // List, Set, Hll
107
107
  if ((lg_k < 8) && (mode == 1)) continue; // lg_k < 8 list transitions directly to HLL
108
108
  for (int t1 = 0; t1 <= 2; t1++) { // HLL_4, HLL_6, HLL_8
@@ -44,11 +44,11 @@ TEST_CASE("hll to/from byte array: double serialize", "[hll_byte_array]") {
44
44
  auto ser2 = sk.serialize_updatable();
45
45
 
46
46
  REQUIRE(ser1.size() == ser2.size());
47
- int len = ser1.size();
47
+ size_t len = ser1.size();
48
48
  uint8_t* b1 = ser1.data();
49
49
  uint8_t* b2 = ser2.data();
50
50
 
51
- for (int i = 0; i < len; ++i) {
51
+ for (size_t i = 0; i < len; ++i) {
52
52
  REQUIRE(b2[i] == b1[i]);
53
53
  }
54
54
  }
@@ -129,7 +129,7 @@ static void checkSketchEquality(hll_sketch& sk1, hll_sketch& sk2) {
129
129
  REQUIRE(sk1.get_target_type() == sk2.get_target_type());
130
130
  }
131
131
 
132
- static void toFrom(const int lgConfigK, const target_hll_type tgtHllType, const int n) {
132
+ static void toFrom(const uint8_t lgConfigK, const target_hll_type tgtHllType, const int n) {
133
133
  hll_sketch src(lgConfigK, tgtHllType);
134
134
  for (int i = 0; i < n; ++i) {
135
135
  src.update(i);
@@ -157,7 +157,7 @@ static void toFrom(const int lgConfigK, const target_hll_type tgtHllType, const
157
157
  TEST_CASE("hll to/from byte array: to from sketch", "[hll_byte_array]") {
158
158
  for (int i = 0; i < 10; ++i) {
159
159
  int n = nArr[i];
160
- for (int lgK = 4; lgK <= 13; ++lgK) {
160
+ for (uint8_t lgK = 4; lgK <= 13; ++lgK) {
161
161
  toFrom(lgK, HLL_4, n);
162
162
  toFrom(lgK, HLL_6, n);
163
163
  toFrom(lgK, HLL_8, n);
@@ -26,7 +26,8 @@
26
26
 
27
27
  namespace datasketches {
28
28
 
29
- static std::independent_bits_engine<std::mt19937, 1, uint32_t> random_bit(std::chrono::system_clock::now().time_since_epoch().count());
29
+ static std::independent_bits_engine<std::mt19937, 1, uint32_t>
30
+ random_bit(static_cast<uint32_t>(std::chrono::system_clock::now().time_since_epoch().count()));
30
31
 
31
32
  #ifdef KLL_VALIDATION
32
33
  extern uint32_t kll_next_offset;
@@ -46,9 +47,9 @@ class kll_helper {
46
47
  static inline uint8_t floor_of_log2_of_fraction(uint64_t numer, uint64_t denom);
47
48
  static inline uint8_t ub_on_num_levels(uint64_t n);
48
49
  static inline uint32_t compute_total_capacity(uint16_t k, uint8_t m, uint8_t num_levels);
49
- static inline uint32_t level_capacity(uint16_t k, uint8_t numLevels, uint8_t height, uint8_t min_wid);
50
- static inline uint32_t int_cap_aux(uint16_t k, uint8_t depth);
51
- static inline uint32_t int_cap_aux_aux(uint16_t k, uint8_t depth);
50
+ static inline uint16_t level_capacity(uint16_t k, uint8_t numLevels, uint8_t height, uint8_t min_wid);
51
+ static inline uint16_t int_cap_aux(uint16_t k, uint8_t depth);
52
+ static inline uint16_t int_cap_aux_aux(uint16_t k, uint8_t depth);
52
53
  static inline uint64_t sum_the_sample_weights(uint8_t num_levels, const uint32_t* levels);
53
54
 
54
55
  /*
@@ -55,28 +55,28 @@ uint32_t kll_helper::compute_total_capacity(uint16_t k, uint8_t m, uint8_t num_l
55
55
  return total;
56
56
  }
57
57
 
58
- uint32_t kll_helper::level_capacity(uint16_t k, uint8_t numLevels, uint8_t height, uint8_t min_wid) {
58
+ uint16_t kll_helper::level_capacity(uint16_t k, uint8_t numLevels, uint8_t height, uint8_t min_wid) {
59
59
  if (height >= numLevels) throw std::invalid_argument("height >= numLevels");
60
60
  const uint8_t depth = numLevels - height - 1;
61
- return std::max((uint32_t) min_wid, int_cap_aux(k, depth));
61
+ return std::max<uint16_t>(min_wid, int_cap_aux(k, depth));
62
62
  }
63
63
 
64
- uint32_t kll_helper::int_cap_aux(uint16_t k, uint8_t depth) {
64
+ uint16_t kll_helper::int_cap_aux(uint16_t k, uint8_t depth) {
65
65
  if (depth > 60) throw std::invalid_argument("depth > 60");
66
66
  if (depth <= 30) return int_cap_aux_aux(k, depth);
67
67
  const uint8_t half = depth / 2;
68
68
  const uint8_t rest = depth - half;
69
- const uint32_t tmp = int_cap_aux_aux(k, half);
69
+ const uint16_t tmp = int_cap_aux_aux(k, half);
70
70
  return int_cap_aux_aux(tmp, rest);
71
71
  }
72
72
 
73
- uint32_t kll_helper::int_cap_aux_aux(uint16_t k, uint8_t depth) {
73
+ uint16_t kll_helper::int_cap_aux_aux(uint16_t k, uint8_t depth) {
74
74
  if (depth > 30) throw std::invalid_argument("depth > 30");
75
75
  const uint64_t twok = k << 1; // for rounding, we pre-multiply by 2
76
76
  const uint64_t tmp = (uint64_t) (((uint64_t) twok << depth) / powers_of_three[depth]);
77
77
  const uint64_t result = (tmp + 1) >> 1; // then here we add 1 and divide by 2
78
78
  if (result > k) throw std::logic_error("result > k");
79
- return result;
79
+ return static_cast<uint16_t>(result);
80
80
  }
81
81
 
82
82
  uint64_t kll_helper::sum_the_sample_weights(uint8_t num_levels, const uint32_t* levels) {