datasketches 0.2.1 → 0.2.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (117) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +4 -0
  3. data/lib/datasketches/version.rb +1 -1
  4. data/vendor/datasketches-cpp/CMakeLists.txt +7 -0
  5. data/vendor/datasketches-cpp/common/include/MurmurHash3.h +11 -7
  6. data/vendor/datasketches-cpp/common/include/binomial_bounds.hpp +8 -8
  7. data/vendor/datasketches-cpp/common/include/bounds_binomial_proportions.hpp +12 -15
  8. data/vendor/datasketches-cpp/common/include/common_defs.hpp +24 -0
  9. data/vendor/datasketches-cpp/common/include/conditional_forward.hpp +20 -8
  10. data/vendor/datasketches-cpp/common/include/count_zeros.hpp +2 -2
  11. data/vendor/datasketches-cpp/common/include/serde.hpp +7 -7
  12. data/vendor/datasketches-cpp/cpc/include/cpc_compressor.hpp +19 -19
  13. data/vendor/datasketches-cpp/cpc/include/cpc_compressor_impl.hpp +91 -89
  14. data/vendor/datasketches-cpp/cpc/include/cpc_sketch.hpp +14 -1
  15. data/vendor/datasketches-cpp/cpc/include/cpc_sketch_impl.hpp +121 -87
  16. data/vendor/datasketches-cpp/cpc/include/cpc_union_impl.hpp +14 -14
  17. data/vendor/datasketches-cpp/cpc/include/cpc_util.hpp +10 -10
  18. data/vendor/datasketches-cpp/cpc/include/icon_estimator.hpp +4 -4
  19. data/vendor/datasketches-cpp/cpc/include/u32_table.hpp +8 -8
  20. data/vendor/datasketches-cpp/cpc/include/u32_table_impl.hpp +14 -14
  21. data/vendor/datasketches-cpp/cpc/test/compression_test.cpp +10 -10
  22. data/vendor/datasketches-cpp/cpc/test/cpc_sketch_test.cpp +25 -0
  23. data/vendor/datasketches-cpp/cpc/test/cpc_union_test.cpp +1 -1
  24. data/vendor/datasketches-cpp/fi/include/frequent_items_sketch_impl.hpp +65 -80
  25. data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map_impl.hpp +10 -10
  26. data/vendor/datasketches-cpp/fi/test/reverse_purge_hash_map_test.cpp +2 -2
  27. data/vendor/datasketches-cpp/hll/include/AuxHashMap-internal.hpp +60 -63
  28. data/vendor/datasketches-cpp/hll/include/AuxHashMap.hpp +19 -19
  29. data/vendor/datasketches-cpp/hll/include/CompositeInterpolationXTable-internal.hpp +15 -15
  30. data/vendor/datasketches-cpp/hll/include/CompositeInterpolationXTable.hpp +3 -3
  31. data/vendor/datasketches-cpp/hll/include/CouponHashSet-internal.hpp +74 -76
  32. data/vendor/datasketches-cpp/hll/include/CouponHashSet.hpp +6 -6
  33. data/vendor/datasketches-cpp/hll/include/CouponList-internal.hpp +110 -113
  34. data/vendor/datasketches-cpp/hll/include/CouponList.hpp +13 -13
  35. data/vendor/datasketches-cpp/hll/include/CubicInterpolation-internal.hpp +2 -4
  36. data/vendor/datasketches-cpp/hll/include/HarmonicNumbers-internal.hpp +1 -1
  37. data/vendor/datasketches-cpp/hll/include/Hll4Array-internal.hpp +80 -76
  38. data/vendor/datasketches-cpp/hll/include/Hll4Array.hpp +9 -9
  39. data/vendor/datasketches-cpp/hll/include/Hll6Array-internal.hpp +26 -26
  40. data/vendor/datasketches-cpp/hll/include/Hll6Array.hpp +6 -6
  41. data/vendor/datasketches-cpp/hll/include/Hll8Array-internal.hpp +33 -33
  42. data/vendor/datasketches-cpp/hll/include/Hll8Array.hpp +6 -6
  43. data/vendor/datasketches-cpp/hll/include/HllArray-internal.hpp +205 -209
  44. data/vendor/datasketches-cpp/hll/include/HllArray.hpp +36 -36
  45. data/vendor/datasketches-cpp/hll/include/HllSketch-internal.hpp +28 -28
  46. data/vendor/datasketches-cpp/hll/include/HllSketchImpl-internal.hpp +22 -22
  47. data/vendor/datasketches-cpp/hll/include/HllSketchImpl.hpp +13 -13
  48. data/vendor/datasketches-cpp/hll/include/HllSketchImplFactory.hpp +15 -15
  49. data/vendor/datasketches-cpp/hll/include/HllUnion-internal.hpp +61 -61
  50. data/vendor/datasketches-cpp/hll/include/HllUtil.hpp +120 -127
  51. data/vendor/datasketches-cpp/hll/include/coupon_iterator-internal.hpp +9 -9
  52. data/vendor/datasketches-cpp/hll/include/coupon_iterator.hpp +5 -5
  53. data/vendor/datasketches-cpp/hll/include/hll.hpp +21 -21
  54. data/vendor/datasketches-cpp/hll/test/AuxHashMapTest.cpp +1 -1
  55. data/vendor/datasketches-cpp/hll/test/CouponHashSetTest.cpp +34 -34
  56. data/vendor/datasketches-cpp/hll/test/CouponListTest.cpp +25 -25
  57. data/vendor/datasketches-cpp/hll/test/CrossCountingTest.cpp +2 -2
  58. data/vendor/datasketches-cpp/hll/test/HllArrayTest.cpp +35 -35
  59. data/vendor/datasketches-cpp/hll/test/HllSketchTest.cpp +15 -15
  60. data/vendor/datasketches-cpp/hll/test/HllUnionTest.cpp +10 -14
  61. data/vendor/datasketches-cpp/hll/test/IsomorphicTest.cpp +3 -3
  62. data/vendor/datasketches-cpp/hll/test/ToFromByteArrayTest.cpp +4 -4
  63. data/vendor/datasketches-cpp/kll/include/kll_helper.hpp +5 -4
  64. data/vendor/datasketches-cpp/kll/include/kll_helper_impl.hpp +6 -6
  65. data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator.hpp +14 -6
  66. data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator_impl.hpp +39 -24
  67. data/vendor/datasketches-cpp/kll/include/kll_sketch.hpp +34 -2
  68. data/vendor/datasketches-cpp/kll/include/kll_sketch_impl.hpp +72 -62
  69. data/vendor/datasketches-cpp/kll/include/kolmogorov_smirnov.hpp +67 -0
  70. data/vendor/datasketches-cpp/kll/include/kolmogorov_smirnov_impl.hpp +78 -0
  71. data/vendor/datasketches-cpp/kll/test/CMakeLists.txt +1 -0
  72. data/vendor/datasketches-cpp/kll/test/kll_sketch_test.cpp +68 -45
  73. data/vendor/datasketches-cpp/kll/test/kolmogorov_smirnov_test.cpp +111 -0
  74. data/vendor/datasketches-cpp/python/src/hll_wrapper.cpp +4 -4
  75. data/vendor/datasketches-cpp/python/src/theta_wrapper.cpp +6 -6
  76. data/vendor/datasketches-cpp/python/src/vo_wrapper.cpp +2 -2
  77. data/vendor/datasketches-cpp/python/tests/hll_test.py +1 -1
  78. data/vendor/datasketches-cpp/python/tests/vo_test.py +3 -3
  79. data/vendor/datasketches-cpp/req/include/req_common.hpp +2 -1
  80. data/vendor/datasketches-cpp/req/include/req_compactor.hpp +4 -4
  81. data/vendor/datasketches-cpp/req/include/req_compactor_impl.hpp +26 -39
  82. data/vendor/datasketches-cpp/req/include/req_sketch.hpp +1 -1
  83. data/vendor/datasketches-cpp/req/include/req_sketch_impl.hpp +9 -9
  84. data/vendor/datasketches-cpp/req/test/req_sketch_test.cpp +52 -52
  85. data/vendor/datasketches-cpp/sampling/include/var_opt_sketch_impl.hpp +47 -56
  86. data/vendor/datasketches-cpp/sampling/include/var_opt_union_impl.hpp +34 -42
  87. data/vendor/datasketches-cpp/sampling/test/var_opt_sketch_test.cpp +6 -6
  88. data/vendor/datasketches-cpp/sampling/test/var_opt_union_test.cpp +13 -13
  89. data/vendor/datasketches-cpp/setup.py +1 -1
  90. data/vendor/datasketches-cpp/theta/include/bounds_on_ratios_in_sampled_sets.hpp +1 -1
  91. data/vendor/datasketches-cpp/theta/include/compact_theta_sketch_parser.hpp +67 -0
  92. data/vendor/datasketches-cpp/theta/include/compact_theta_sketch_parser_impl.hpp +70 -0
  93. data/vendor/datasketches-cpp/theta/include/theta_intersection.hpp +9 -4
  94. data/vendor/datasketches-cpp/theta/include/theta_intersection_impl.hpp +1 -1
  95. data/vendor/datasketches-cpp/theta/include/theta_jaccard_similarity_base.hpp +18 -14
  96. data/vendor/datasketches-cpp/theta/include/theta_sketch.hpp +42 -1
  97. data/vendor/datasketches-cpp/theta/include/theta_sketch_impl.hpp +107 -58
  98. data/vendor/datasketches-cpp/theta/include/theta_union.hpp +4 -4
  99. data/vendor/datasketches-cpp/theta/include/theta_union_base_impl.hpp +1 -1
  100. data/vendor/datasketches-cpp/theta/include/theta_union_impl.hpp +1 -1
  101. data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base.hpp +2 -0
  102. data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base_impl.hpp +33 -28
  103. data/vendor/datasketches-cpp/theta/test/theta_a_not_b_test.cpp +23 -1
  104. data/vendor/datasketches-cpp/theta/test/theta_intersection_test.cpp +21 -1
  105. data/vendor/datasketches-cpp/theta/test/theta_jaccard_similarity_test.cpp +58 -2
  106. data/vendor/datasketches-cpp/theta/test/theta_sketch_test.cpp +37 -1
  107. data/vendor/datasketches-cpp/theta/test/theta_union_test.cpp +22 -2
  108. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_sketch_impl.hpp +47 -60
  109. data/vendor/datasketches-cpp/tuple/include/tuple_sketch_impl.hpp +51 -64
  110. data/vendor/datasketches-cpp/tuple/test/array_of_doubles_sketch_test.cpp +1 -1
  111. data/vendor/datasketches-cpp/tuple/test/tuple_a_not_b_test.cpp +17 -17
  112. data/vendor/datasketches-cpp/tuple/test/tuple_intersection_test.cpp +12 -12
  113. data/vendor/datasketches-cpp/tuple/test/tuple_jaccard_similarity_test.cpp +5 -5
  114. data/vendor/datasketches-cpp/tuple/test/tuple_sketch_allocation_test.cpp +1 -1
  115. data/vendor/datasketches-cpp/tuple/test/tuple_sketch_test.cpp +20 -20
  116. data/vendor/datasketches-cpp/tuple/test/tuple_union_test.cpp +12 -12
  117. metadata +8 -3
@@ -26,7 +26,7 @@
26
26
 
27
27
  namespace datasketches {
28
28
 
29
- static hll_sketch buildSketch(const int n, const int lgK, const target_hll_type tgtHllType) {
29
+ static hll_sketch buildSketch(const int n, const uint8_t lgK, const target_hll_type tgtHllType) {
30
30
  hll_sketch sketch(lgK, tgtHllType);
31
31
  for (int i = 0; i < n; ++i) {
32
32
  sketch.update(i);
@@ -34,7 +34,7 @@ static hll_sketch buildSketch(const int n, const int lgK, const target_hll_type
34
34
  return sketch;
35
35
  }
36
36
 
37
- static void crossCountingCheck(const int lgK, const int n) {
37
+ static void crossCountingCheck(const uint8_t lgK, const int n) {
38
38
  hll_sketch sk4 = buildSketch(n, lgK, HLL_4);
39
39
  const double est = sk4.get_estimate();
40
40
  const double lb = sk4.get_lower_bound(1);
@@ -25,7 +25,7 @@
25
25
 
26
26
  namespace datasketches {
27
27
 
28
- static void testComposite(const int lgK, const target_hll_type tgtHllType, const int n) {
28
+ static void testComposite(uint8_t lgK, const target_hll_type tgtHllType, const int n) {
29
29
  hll_union u(lgK);
30
30
  hll_sketch sk(lgK, tgtHllType);
31
31
  for (int i = 0; i < n; ++i) {
@@ -45,7 +45,7 @@ TEST_CASE("hll array: check composite estimate", "[hll_array]") {
45
45
  testComposite(13, target_hll_type::HLL_8, 10000);
46
46
  }
47
47
 
48
- static void serializeDeserialize(const int lgK, target_hll_type tgtHllType, const int n) {
48
+ static void serializeDeserialize(uint8_t lgK, target_hll_type tgtHllType, const int n) {
49
49
  hll_sketch sk1(lgK, tgtHllType);
50
50
 
51
51
  for (int i = 0; i < n; ++i) {
@@ -72,7 +72,7 @@ static void serializeDeserialize(const int lgK, target_hll_type tgtHllType, cons
72
72
  }
73
73
 
74
74
  TEST_CASE("hll array: check serialize deserialize", "[hll_array]") {
75
- int lgK = 4;
75
+ uint8_t lgK = 4;
76
76
  int n = 8;
77
77
  serializeDeserialize(lgK, HLL_4, n);
78
78
  serializeDeserialize(lgK, HLL_6, n);
@@ -100,7 +100,7 @@ TEST_CASE("hll array: check is compact", "[hll_array]") {
100
100
  }
101
101
 
102
102
  TEST_CASE("hll array: check corrupt bytearray", "[hll_array]") {
103
- int lgK = 8;
103
+ uint8_t lgK = 8;
104
104
  hll_sketch sk1(lgK, HLL_8);
105
105
  for (int i = 0; i < 50; ++i) {
106
106
  sk1.update(i);
@@ -109,36 +109,36 @@ TEST_CASE("hll array: check corrupt bytearray", "[hll_array]") {
109
109
  uint8_t* bytes = sketchBytes.data();
110
110
  const size_t size = sketchBytes.size();
111
111
 
112
- bytes[HllUtil<>::PREAMBLE_INTS_BYTE] = 0;
112
+ bytes[hll_constants::PREAMBLE_INTS_BYTE] = 0;
113
113
  REQUIRE_THROWS_AS(hll_sketch::deserialize(bytes, size), std::invalid_argument);
114
114
  REQUIRE_THROWS_AS(HllArray<std::allocator<uint8_t>>::newHll(bytes, size, std::allocator<uint8_t>()), std::invalid_argument);
115
- bytes[HllUtil<>::PREAMBLE_INTS_BYTE] = HllUtil<>::HLL_PREINTS;
115
+ bytes[hll_constants::PREAMBLE_INTS_BYTE] = hll_constants::HLL_PREINTS;
116
116
 
117
- bytes[HllUtil<>::SER_VER_BYTE] = 0;
117
+ bytes[hll_constants::SER_VER_BYTE] = 0;
118
118
  REQUIRE_THROWS_AS(hll_sketch::deserialize(bytes, size), std::invalid_argument);
119
- bytes[HllUtil<>::SER_VER_BYTE] = HllUtil<>::SER_VER;
119
+ bytes[hll_constants::SER_VER_BYTE] = hll_constants::SER_VER;
120
120
 
121
- bytes[HllUtil<>::FAMILY_BYTE] = 0;
121
+ bytes[hll_constants::FAMILY_BYTE] = 0;
122
122
  REQUIRE_THROWS_AS(hll_sketch::deserialize(bytes, size), std::invalid_argument);
123
- bytes[HllUtil<>::FAMILY_BYTE] = HllUtil<>::FAMILY_ID;
123
+ bytes[hll_constants::FAMILY_BYTE] = hll_constants::FAMILY_ID;
124
124
 
125
- uint8_t tmp = bytes[HllUtil<>::MODE_BYTE];
126
- bytes[HllUtil<>::MODE_BYTE] = 0x10; // HLL_6, LIST
125
+ uint8_t tmp = bytes[hll_constants::MODE_BYTE];
126
+ bytes[hll_constants::MODE_BYTE] = 0x10; // HLL_6, LIST
127
127
  REQUIRE_THROWS_AS(hll_sketch::deserialize(bytes, size), std::invalid_argument);
128
- bytes[HllUtil<>::MODE_BYTE] = tmp;
128
+ bytes[hll_constants::MODE_BYTE] = tmp;
129
129
 
130
- tmp = bytes[HllUtil<>::LG_ARR_BYTE];
131
- bytes[HllUtil<>::LG_ARR_BYTE] = 0;
130
+ tmp = bytes[hll_constants::LG_ARR_BYTE];
131
+ bytes[hll_constants::LG_ARR_BYTE] = 0;
132
132
  hll_sketch::deserialize(bytes, size);
133
133
  // should work fine despite the corruption
134
- bytes[HllUtil<>::LG_ARR_BYTE] = tmp;
134
+ bytes[hll_constants::LG_ARR_BYTE] = tmp;
135
135
 
136
136
  REQUIRE_THROWS_AS(hll_sketch::deserialize(bytes, size - 1), std::out_of_range);
137
137
  REQUIRE_THROWS_AS(hll_sketch::deserialize(bytes, 3), std::out_of_range);
138
138
  }
139
139
 
140
140
  TEST_CASE("hll array: check corrupt stream", "[hll_array]") {
141
- int lgK = 6;
141
+ uint8_t lgK = 6;
142
142
  hll_sketch sk1(lgK);
143
143
  for (int i = 0; i < 50; ++i) {
144
144
  sk1.update(i);
@@ -146,46 +146,46 @@ TEST_CASE("hll array: check corrupt stream", "[hll_array]") {
146
146
  std::stringstream ss;
147
147
  sk1.serialize_compact(ss);
148
148
 
149
- ss.seekp(HllUtil<>::PREAMBLE_INTS_BYTE);
149
+ ss.seekp(hll_constants::PREAMBLE_INTS_BYTE);
150
150
  ss.put(0);
151
151
  ss.seekg(0);
152
152
  REQUIRE_THROWS_AS(hll_sketch::deserialize(ss), std::invalid_argument);
153
153
  REQUIRE_THROWS_AS(HllArray<std::allocator<uint8_t>>::newHll(ss, std::allocator<uint8_t>()), std::invalid_argument);
154
- ss.seekp(HllUtil<>::PREAMBLE_INTS_BYTE);
155
- ss.put(HllUtil<>::HLL_PREINTS);
154
+ ss.seekp(hll_constants::PREAMBLE_INTS_BYTE);
155
+ ss.put(hll_constants::HLL_PREINTS);
156
156
 
157
- ss.seekp(HllUtil<>::SER_VER_BYTE);
157
+ ss.seekp(hll_constants::SER_VER_BYTE);
158
158
  ss.put(0);
159
159
  ss.seekg(0);
160
160
  REQUIRE_THROWS_AS(hll_sketch::deserialize(ss), std::invalid_argument);
161
- ss.seekp(HllUtil<>::SER_VER_BYTE);
162
- ss.put(HllUtil<>::SER_VER);
161
+ ss.seekp(hll_constants::SER_VER_BYTE);
162
+ ss.put(hll_constants::SER_VER);
163
163
 
164
- ss.seekp(HllUtil<>::FAMILY_BYTE);
164
+ ss.seekp(hll_constants::FAMILY_BYTE);
165
165
  ss.put(0);
166
166
  ss.seekg(0);
167
167
  REQUIRE_THROWS_AS(hll_sketch::deserialize(ss), std::invalid_argument);
168
- ss.seekp(HllUtil<>::FAMILY_BYTE);
169
- ss.put(HllUtil<>::FAMILY_ID);
168
+ ss.seekp(hll_constants::FAMILY_BYTE);
169
+ ss.put(hll_constants::FAMILY_ID);
170
170
 
171
- ss.seekg(HllUtil<>::MODE_BYTE);
172
- uint8_t tmp = ss.get();
173
- ss.seekp(HllUtil<>::MODE_BYTE);
171
+ ss.seekg(hll_constants::MODE_BYTE);
172
+ auto tmp = ss.get();
173
+ ss.seekp(hll_constants::MODE_BYTE);
174
174
  ss.put(0x11); // HLL_6, SET
175
175
  ss.seekg(0);
176
176
  REQUIRE_THROWS_AS(hll_sketch::deserialize(ss), std::invalid_argument);
177
- ss.seekp(HllUtil<>::MODE_BYTE);
178
- ss.put(tmp);
177
+ ss.seekp(hll_constants::MODE_BYTE);
178
+ ss.put((char)tmp);
179
179
 
180
- ss.seekg(HllUtil<>::LG_ARR_BYTE);
180
+ ss.seekg(hll_constants::LG_ARR_BYTE);
181
181
  tmp = ss.get();
182
- ss.seekp(HllUtil<>::LG_ARR_BYTE);
182
+ ss.seekp(hll_constants::LG_ARR_BYTE);
183
183
  ss.put(0);
184
184
  ss.seekg(0);
185
185
  hll_sketch::deserialize(ss);
186
186
  // should work fine despite the corruption
187
- ss.seekp(HllUtil<>::LG_ARR_BYTE);
188
- ss.put(tmp);
187
+ ss.seekp(hll_constants::LG_ARR_BYTE);
188
+ ss.put((char)tmp);
189
189
  }
190
190
 
191
191
  } /* namespace datasketches */
@@ -27,7 +27,7 @@ namespace datasketches {
27
27
  using hll_sketch_test_alloc = hll_sketch_alloc<test_allocator<uint8_t>>;
28
28
  using alloc = test_allocator<uint8_t>;
29
29
 
30
- static void runCheckCopy(int lgConfigK, target_hll_type tgtHllType) {
30
+ static void runCheckCopy(uint8_t lgConfigK, target_hll_type tgtHllType) {
31
31
  hll_sketch_test_alloc sk(lgConfigK, tgtHllType, false, 0);
32
32
 
33
33
  for (int i = 0; i < 7; ++i) {
@@ -66,7 +66,7 @@ TEST_CASE("hll sketch: check copies", "[hll_sketch]") {
66
66
  }
67
67
 
68
68
  static void copyAs(target_hll_type srcType, target_hll_type dstType) {
69
- int lgK = 8;
69
+ uint8_t lgK = 8;
70
70
  int n1 = 7;
71
71
  int n2 = 24;
72
72
  int n3 = 1000;
@@ -109,7 +109,7 @@ TEST_CASE("hll sketch: check copy as", "[hll_sketch]") {
109
109
  TEST_CASE("hll sketch: check misc1", "[hll_sketch]") {
110
110
  test_allocator_total_bytes = 0;
111
111
  {
112
- int lgConfigK = 8;
112
+ uint8_t lgConfigK = 8;
113
113
  target_hll_type srcType = target_hll_type::HLL_8;
114
114
  hll_sketch_test_alloc sk(lgConfigK, srcType, false, 0);
115
115
 
@@ -124,7 +124,7 @@ TEST_CASE("hll sketch: check misc1", "[hll_sketch]") {
124
124
  sk.update(24); // HLL
125
125
  REQUIRE(sk.get_updatable_serialization_bytes() == 40 + 256);
126
126
 
127
- const int hllBytes = HllUtil<>::HLL_BYTE_ARR_START + (1 << lgConfigK);
127
+ const auto hllBytes = hll_constants::HLL_BYTE_ARR_START + (1 << lgConfigK);
128
128
  REQUIRE(sk.get_compact_serialization_bytes() == hllBytes);
129
129
  REQUIRE(hll_sketch::get_max_updatable_serialization_bytes(lgConfigK, HLL_8) == hllBytes);
130
130
  }
@@ -135,22 +135,22 @@ TEST_CASE("hll sketch: check num std dev", "[hll_sketch]") {
135
135
  REQUIRE_THROWS_AS(HllUtil<>::checkNumStdDev(0), std::invalid_argument);
136
136
  }
137
137
 
138
- void checkSerializationSizes(const int lgConfigK, target_hll_type tgtHllType) {
138
+ void checkSerializationSizes(uint8_t lgConfigK, target_hll_type tgtHllType) {
139
139
  hll_sketch_test_alloc sk(lgConfigK, tgtHllType, false, 0);
140
140
  int i;
141
141
 
142
142
  // LIST
143
143
  for (i = 0; i < 7; ++i) { sk.update(i); }
144
- int expected = HllUtil<>::LIST_INT_ARR_START + (i << 2);
144
+ auto expected = hll_constants::LIST_INT_ARR_START + (i << 2);
145
145
  REQUIRE(sk.get_compact_serialization_bytes() == expected);
146
- expected = HllUtil<>::LIST_INT_ARR_START + (4 << HllUtil<>::LG_INIT_LIST_SIZE);
146
+ expected = hll_constants::LIST_INT_ARR_START + (4 << hll_constants::LG_INIT_LIST_SIZE);
147
147
  REQUIRE(sk.get_updatable_serialization_bytes() == expected);
148
148
 
149
149
  // SET
150
150
  for (i = 7; i < 24; ++i) { sk.update(i); }
151
- expected = HllUtil<>::HASH_SET_INT_ARR_START + (i << 2);
151
+ expected = hll_constants::HASH_SET_INT_ARR_START + (i << 2);
152
152
  REQUIRE(sk.get_compact_serialization_bytes() == expected);
153
- expected = HllUtil<>::HASH_SET_INT_ARR_START + (4 << HllUtil<>::LG_INIT_SET_SIZE);
153
+ expected = hll_constants::HASH_SET_INT_ARR_START + (4 << hll_constants::LG_INIT_SET_SIZE);
154
154
  REQUIRE(sk.get_updatable_serialization_bytes() == expected);
155
155
  }
156
156
 
@@ -178,7 +178,7 @@ TEST_CASE("hll sketch: exercise to string", "[hll_sketch]") {
178
178
 
179
179
  // Creates and serializes then deserializes sketch.
180
180
  // Returns true if deserialized sketch is compact.
181
- static bool checkCompact(const int lgK, const int n, const target_hll_type type, bool compact) {
181
+ static bool checkCompact(uint8_t lgK, const int n, const target_hll_type type, bool compact) {
182
182
  hll_sketch_test_alloc sk(lgK, type, false, 0);
183
183
  for (int i = 0; i < n; ++i) { sk.update(i); }
184
184
 
@@ -201,7 +201,7 @@ static bool checkCompact(const int lgK, const int n, const target_hll_type type,
201
201
  TEST_CASE("hll sketch: check compact flag", "[hll_sketch]") {
202
202
  test_allocator_total_bytes = 0;
203
203
  {
204
- int lgK = 8;
204
+ uint8_t lgK = 8;
205
205
  // unless/until we create non-updatable "direct" versions,
206
206
  // deserialized image should never be compact
207
207
  // LIST: follows serialization request
@@ -230,10 +230,10 @@ TEST_CASE("hll sketch: check compact flag", "[hll_sketch]") {
230
230
  TEST_CASE("hll sketch: check k limits", "[hll_sketch]") {
231
231
  test_allocator_total_bytes = 0;
232
232
  {
233
- hll_sketch_test_alloc sketch1(HllUtil<>::MIN_LOG_K, target_hll_type::HLL_8, false, 0);
234
- hll_sketch_test_alloc sketch2(HllUtil<>::MAX_LOG_K, target_hll_type::HLL_4, false, 0);
235
- REQUIRE_THROWS_AS(hll_sketch_test_alloc(HllUtil<>::MIN_LOG_K - 1, target_hll_type::HLL_4, false, 0), std::invalid_argument);
236
- REQUIRE_THROWS_AS(hll_sketch_test_alloc(HllUtil<>::MAX_LOG_K + 1, target_hll_type::HLL_4, false, 0), std::invalid_argument);
233
+ hll_sketch_test_alloc sketch1(hll_constants::MIN_LOG_K, target_hll_type::HLL_8, false, 0);
234
+ hll_sketch_test_alloc sketch2(hll_constants::MAX_LOG_K, target_hll_type::HLL_4, false, 0);
235
+ REQUIRE_THROWS_AS(hll_sketch_test_alloc(hll_constants::MIN_LOG_K - 1, target_hll_type::HLL_4, false, 0), std::invalid_argument);
236
+ REQUIRE_THROWS_AS(hll_sketch_test_alloc(hll_constants::MAX_LOG_K + 1, target_hll_type::HLL_4, false, 0), std::invalid_argument);
237
237
  }
238
238
  REQUIRE(test_allocator_total_bytes == 0);
239
239
  }
@@ -24,23 +24,19 @@
24
24
 
25
25
  namespace datasketches {
26
26
 
27
- static int min(int a, int b) {
28
- return (a < b) ? a : b;
29
- }
30
-
31
27
  static void println(std::string& str) {
32
28
  //std::cout << str << "\n";
33
29
  }
34
30
 
35
31
  static void basicUnion(uint64_t n1, uint64_t n2,
36
- uint64_t lgk1, uint64_t lgk2, uint64_t lgMaxK,
32
+ uint8_t lgk1, uint8_t lgk2, uint8_t lgMaxK,
37
33
  target_hll_type type1, target_hll_type type2, target_hll_type resultType) {
38
34
  uint64_t v = 0;
39
35
  //int tot = n1 + n2;
40
36
 
41
37
  hll_sketch h1(lgk1, type1);
42
38
  hll_sketch h2(lgk2, type2);
43
- int lgControlK = min(min(lgk1, lgk2), lgMaxK);
39
+ uint8_t lgControlK = std::min(std::min(lgk1, lgk2), lgMaxK);
44
40
  hll_sketch control(lgControlK, resultType);
45
41
 
46
42
  for (uint64_t i = 0; i < n1; ++i) {
@@ -89,9 +85,9 @@ TEST_CASE("hll union: check unions", "[hll_union]") {
89
85
  target_hll_type type2 = HLL_8;
90
86
  target_hll_type resultType = HLL_8;
91
87
 
92
- uint64_t lgK1 = 7;
93
- uint64_t lgK2 = 7;
94
- uint64_t lgMaxK = 7;
88
+ uint8_t lgK1 = 7;
89
+ uint8_t lgK2 = 7;
90
+ uint8_t lgMaxK = 7;
95
91
  uint64_t n1 = 7;
96
92
  uint64_t n2 = 7;
97
93
  basicUnion(n1, n2, lgK1, lgK2, lgMaxK, type1, type2, resultType);
@@ -108,7 +104,7 @@ TEST_CASE("hll union: check unions", "[hll_union]") {
108
104
  n2 = 14;
109
105
  basicUnion(n1, n2, lgK1, lgK2, lgMaxK, type1, type2, resultType);
110
106
 
111
- int i = 0;
107
+ uint8_t i = 0;
112
108
  for (i = 7; i <= 13; ++i) {
113
109
  lgK1 = i;
114
110
  lgK2 = i;
@@ -184,9 +180,9 @@ TEST_CASE("hll union: check composite estimate", "[hll_union]") {
184
180
  }
185
181
 
186
182
  TEST_CASE("hll union: check config k limits", "[hll_union]") {
187
- REQUIRE_THROWS_AS(hll_union(HllUtil<>::MIN_LOG_K - 1), std::invalid_argument);
183
+ REQUIRE_THROWS_AS(hll_union(hll_constants::MIN_LOG_K - 1), std::invalid_argument);
188
184
 
189
- REQUIRE_THROWS_AS(hll_union(HllUtil<>::MAX_LOG_K + 1), std::invalid_argument);
185
+ REQUIRE_THROWS_AS(hll_union(hll_constants::MAX_LOG_K + 1), std::invalid_argument);
190
186
  }
191
187
 
192
188
  static double getBound(int lgK, bool ub, bool oooFlag, int numStdDev, double est) {
@@ -195,7 +191,7 @@ static double getBound(int lgK, bool ub, bool oooFlag, int numStdDev, double est
195
191
  }
196
192
 
197
193
  TEST_CASE("hll union: check ub lb", "[hll_union]") {
198
- int lgK = 4;
194
+ uint8_t lgK = 4;
199
195
  int n = 1 << 20;
200
196
  bool oooFlag = false;
201
197
 
@@ -223,7 +219,7 @@ TEST_CASE("hll union: check ub lb", "[hll_union]") {
223
219
  }
224
220
 
225
221
  TEST_CASE("hll union: check conversions", "[hll_union]") {
226
- int lgK = 4;
222
+ uint8_t lgK = 4;
227
223
  hll_sketch sk1(lgK, HLL_8);
228
224
  hll_sketch sk2(lgK, HLL_8);
229
225
  int n = 1 << 20;
@@ -57,7 +57,7 @@ static int get_n(int lg_k, hll_mode mode) {
57
57
 
58
58
  static long v = 0;
59
59
 
60
- static hll_sketch build_sketch(int lg_k, target_hll_type hll_type, hll_mode mode) {
60
+ static hll_sketch build_sketch(uint8_t lg_k, target_hll_type hll_type, hll_mode mode) {
61
61
  hll_sketch sk(lg_k, hll_type);
62
62
  int n = get_n(lg_k, mode);
63
63
  for (int i = 0; i < n; i++) sk.update(static_cast<uint64_t>(i + v));
@@ -67,7 +67,7 @@ static hll_sketch build_sketch(int lg_k, target_hll_type hll_type, hll_mode mode
67
67
 
68
68
  // merges a sketch to an empty union and gets result of the same type, checks binary equivalence
69
69
  static void union_one_update(bool compact) {
70
- for (int lg_k = 4; lg_k <= 21; lg_k++) { // all lg_k
70
+ for (uint8_t lg_k = 4; lg_k <= 21; lg_k++) { // all lg_k
71
71
  for (int mode = 0; mode <= 2; mode++) { // List, Set, Hll
72
72
  if ((lg_k < 8) && (mode == 1)) continue; // lg_k < 8 list transitions directly to HLL
73
73
  for (int t = 0; t <= 2; t++) { // HLL_4, HLL_6, HLL_8
@@ -102,7 +102,7 @@ TEST_CASE("hll isomorphic: union one update serialize compact", "[hll_isomorphic
102
102
 
103
103
  // converts a sketch to a different type and converts back to the original type to check binary equivalence
104
104
  static void convert_back_and_forth(bool compact) {
105
- for (int lg_k = 4; lg_k <= 21; lg_k++) { // all lg_k
105
+ for (uint8_t lg_k = 4; lg_k <= 21; lg_k++) { // all lg_k
106
106
  for (int mode = 0; mode <= 2; mode++) { // List, Set, Hll
107
107
  if ((lg_k < 8) && (mode == 1)) continue; // lg_k < 8 list transitions directly to HLL
108
108
  for (int t1 = 0; t1 <= 2; t1++) { // HLL_4, HLL_6, HLL_8
@@ -44,11 +44,11 @@ TEST_CASE("hll to/from byte array: double serialize", "[hll_byte_array]") {
44
44
  auto ser2 = sk.serialize_updatable();
45
45
 
46
46
  REQUIRE(ser1.size() == ser2.size());
47
- int len = ser1.size();
47
+ size_t len = ser1.size();
48
48
  uint8_t* b1 = ser1.data();
49
49
  uint8_t* b2 = ser2.data();
50
50
 
51
- for (int i = 0; i < len; ++i) {
51
+ for (size_t i = 0; i < len; ++i) {
52
52
  REQUIRE(b2[i] == b1[i]);
53
53
  }
54
54
  }
@@ -129,7 +129,7 @@ static void checkSketchEquality(hll_sketch& sk1, hll_sketch& sk2) {
129
129
  REQUIRE(sk1.get_target_type() == sk2.get_target_type());
130
130
  }
131
131
 
132
- static void toFrom(const int lgConfigK, const target_hll_type tgtHllType, const int n) {
132
+ static void toFrom(const uint8_t lgConfigK, const target_hll_type tgtHllType, const int n) {
133
133
  hll_sketch src(lgConfigK, tgtHllType);
134
134
  for (int i = 0; i < n; ++i) {
135
135
  src.update(i);
@@ -157,7 +157,7 @@ static void toFrom(const int lgConfigK, const target_hll_type tgtHllType, const
157
157
  TEST_CASE("hll to/from byte array: to from sketch", "[hll_byte_array]") {
158
158
  for (int i = 0; i < 10; ++i) {
159
159
  int n = nArr[i];
160
- for (int lgK = 4; lgK <= 13; ++lgK) {
160
+ for (uint8_t lgK = 4; lgK <= 13; ++lgK) {
161
161
  toFrom(lgK, HLL_4, n);
162
162
  toFrom(lgK, HLL_6, n);
163
163
  toFrom(lgK, HLL_8, n);
@@ -26,7 +26,8 @@
26
26
 
27
27
  namespace datasketches {
28
28
 
29
- static std::independent_bits_engine<std::mt19937, 1, uint32_t> random_bit(std::chrono::system_clock::now().time_since_epoch().count());
29
+ static std::independent_bits_engine<std::mt19937, 1, uint32_t>
30
+ random_bit(static_cast<uint32_t>(std::chrono::system_clock::now().time_since_epoch().count()));
30
31
 
31
32
  #ifdef KLL_VALIDATION
32
33
  extern uint32_t kll_next_offset;
@@ -46,9 +47,9 @@ class kll_helper {
46
47
  static inline uint8_t floor_of_log2_of_fraction(uint64_t numer, uint64_t denom);
47
48
  static inline uint8_t ub_on_num_levels(uint64_t n);
48
49
  static inline uint32_t compute_total_capacity(uint16_t k, uint8_t m, uint8_t num_levels);
49
- static inline uint32_t level_capacity(uint16_t k, uint8_t numLevels, uint8_t height, uint8_t min_wid);
50
- static inline uint32_t int_cap_aux(uint16_t k, uint8_t depth);
51
- static inline uint32_t int_cap_aux_aux(uint16_t k, uint8_t depth);
50
+ static inline uint16_t level_capacity(uint16_t k, uint8_t numLevels, uint8_t height, uint8_t min_wid);
51
+ static inline uint16_t int_cap_aux(uint16_t k, uint8_t depth);
52
+ static inline uint16_t int_cap_aux_aux(uint16_t k, uint8_t depth);
52
53
  static inline uint64_t sum_the_sample_weights(uint8_t num_levels, const uint32_t* levels);
53
54
 
54
55
  /*
@@ -55,28 +55,28 @@ uint32_t kll_helper::compute_total_capacity(uint16_t k, uint8_t m, uint8_t num_l
55
55
  return total;
56
56
  }
57
57
 
58
- uint32_t kll_helper::level_capacity(uint16_t k, uint8_t numLevels, uint8_t height, uint8_t min_wid) {
58
+ uint16_t kll_helper::level_capacity(uint16_t k, uint8_t numLevels, uint8_t height, uint8_t min_wid) {
59
59
  if (height >= numLevels) throw std::invalid_argument("height >= numLevels");
60
60
  const uint8_t depth = numLevels - height - 1;
61
- return std::max((uint32_t) min_wid, int_cap_aux(k, depth));
61
+ return std::max<uint16_t>(min_wid, int_cap_aux(k, depth));
62
62
  }
63
63
 
64
- uint32_t kll_helper::int_cap_aux(uint16_t k, uint8_t depth) {
64
+ uint16_t kll_helper::int_cap_aux(uint16_t k, uint8_t depth) {
65
65
  if (depth > 60) throw std::invalid_argument("depth > 60");
66
66
  if (depth <= 30) return int_cap_aux_aux(k, depth);
67
67
  const uint8_t half = depth / 2;
68
68
  const uint8_t rest = depth - half;
69
- const uint32_t tmp = int_cap_aux_aux(k, half);
69
+ const uint16_t tmp = int_cap_aux_aux(k, half);
70
70
  return int_cap_aux_aux(tmp, rest);
71
71
  }
72
72
 
73
- uint32_t kll_helper::int_cap_aux_aux(uint16_t k, uint8_t depth) {
73
+ uint16_t kll_helper::int_cap_aux_aux(uint16_t k, uint8_t depth) {
74
74
  if (depth > 30) throw std::invalid_argument("depth > 30");
75
75
  const uint64_t twok = k << 1; // for rounding, we pre-multiply by 2
76
76
  const uint64_t tmp = (uint64_t) (((uint64_t) twok << depth) / powers_of_three[depth]);
77
77
  const uint64_t result = (tmp + 1) >> 1; // then here we add 1 and divide by 2
78
78
  if (result > k) throw std::logic_error("result > k");
79
- return result;
79
+ return static_cast<uint16_t>(result);
80
80
  }
81
81
 
82
82
  uint64_t kll_helper::sum_the_sample_weights(uint8_t num_levels, const uint32_t* levels) {