datasketches 0.2.7 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (86) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +5 -0
  3. data/ext/datasketches/kll_wrapper.cpp +20 -20
  4. data/ext/datasketches/theta_wrapper.cpp +2 -2
  5. data/lib/datasketches/version.rb +1 -1
  6. data/vendor/datasketches-cpp/CMakeLists.txt +9 -1
  7. data/vendor/datasketches-cpp/MANIFEST.in +21 -2
  8. data/vendor/datasketches-cpp/common/CMakeLists.txt +5 -2
  9. data/vendor/datasketches-cpp/common/include/common_defs.hpp +10 -0
  10. data/vendor/datasketches-cpp/common/include/kolmogorov_smirnov_impl.hpp +6 -6
  11. data/vendor/datasketches-cpp/common/include/memory_operations.hpp +1 -0
  12. data/vendor/datasketches-cpp/common/include/{quantile_sketch_sorted_view.hpp → quantiles_sorted_view.hpp} +60 -25
  13. data/vendor/datasketches-cpp/common/include/quantiles_sorted_view_impl.hpp +125 -0
  14. data/vendor/datasketches-cpp/common/include/version.hpp.in +36 -0
  15. data/vendor/datasketches-cpp/common/test/CMakeLists.txt +25 -6
  16. data/vendor/datasketches-cpp/common/test/quantiles_sorted_view_test.cpp +459 -0
  17. data/vendor/datasketches-cpp/cpc/test/CMakeLists.txt +1 -1
  18. data/vendor/datasketches-cpp/fi/include/frequent_items_sketch.hpp +28 -44
  19. data/vendor/datasketches-cpp/fi/include/frequent_items_sketch_impl.hpp +70 -78
  20. data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map.hpp +11 -4
  21. data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map_impl.hpp +16 -9
  22. data/vendor/datasketches-cpp/fi/test/CMakeLists.txt +1 -1
  23. data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_custom_type_test.cpp +54 -41
  24. data/vendor/datasketches-cpp/fi/test/reverse_purge_hash_map_test.cpp +3 -3
  25. data/vendor/datasketches-cpp/hll/include/Hll4Array-internal.hpp +2 -2
  26. data/vendor/datasketches-cpp/hll/test/CMakeLists.txt +1 -1
  27. data/vendor/datasketches-cpp/kll/include/kll_helper.hpp +0 -32
  28. data/vendor/datasketches-cpp/kll/include/kll_sketch.hpp +176 -233
  29. data/vendor/datasketches-cpp/kll/include/kll_sketch_impl.hpp +337 -395
  30. data/vendor/datasketches-cpp/kll/test/CMakeLists.txt +1 -1
  31. data/vendor/datasketches-cpp/kll/test/kll_sketch_custom_type_test.cpp +26 -26
  32. data/vendor/datasketches-cpp/kll/test/kll_sketch_test.cpp +196 -232
  33. data/vendor/datasketches-cpp/kll/test/kll_sketch_validation.cpp +41 -31
  34. data/vendor/datasketches-cpp/pyproject.toml +17 -12
  35. data/vendor/datasketches-cpp/python/CMakeLists.txt +8 -1
  36. data/vendor/datasketches-cpp/python/datasketches/PySerDe.py +104 -0
  37. data/vendor/datasketches-cpp/python/datasketches/__init__.py +22 -0
  38. data/vendor/datasketches-cpp/python/include/py_serde.hpp +113 -0
  39. data/vendor/datasketches-cpp/python/jupyter/ThetaSketchNotebook.ipynb +31 -24
  40. data/vendor/datasketches-cpp/python/pybind11Path.cmd +18 -0
  41. data/vendor/datasketches-cpp/python/src/__init__.py +17 -1
  42. data/vendor/datasketches-cpp/python/src/datasketches.cpp +9 -3
  43. data/vendor/datasketches-cpp/python/src/kll_wrapper.cpp +18 -54
  44. data/vendor/datasketches-cpp/python/src/py_serde.cpp +111 -0
  45. data/vendor/datasketches-cpp/python/src/quantiles_wrapper.cpp +17 -53
  46. data/vendor/datasketches-cpp/python/src/req_wrapper.cpp +17 -55
  47. data/vendor/datasketches-cpp/python/src/vector_of_kll.cpp +62 -67
  48. data/vendor/datasketches-cpp/python/src/vo_wrapper.cpp +47 -14
  49. data/vendor/datasketches-cpp/python/tests/__init__.py +16 -0
  50. data/vendor/datasketches-cpp/python/tests/req_test.py +1 -1
  51. data/vendor/datasketches-cpp/python/tests/vo_test.py +25 -1
  52. data/vendor/datasketches-cpp/quantiles/include/quantiles_sketch.hpp +135 -180
  53. data/vendor/datasketches-cpp/quantiles/include/quantiles_sketch_impl.hpp +205 -210
  54. data/vendor/datasketches-cpp/quantiles/test/CMakeLists.txt +1 -1
  55. data/vendor/datasketches-cpp/quantiles/test/quantiles_compatibility_test.cpp +19 -18
  56. data/vendor/datasketches-cpp/quantiles/test/quantiles_sketch_test.cpp +240 -232
  57. data/vendor/datasketches-cpp/req/include/req_compactor.hpp +15 -9
  58. data/vendor/datasketches-cpp/req/include/req_compactor_impl.hpp +35 -19
  59. data/vendor/datasketches-cpp/req/include/req_sketch.hpp +126 -147
  60. data/vendor/datasketches-cpp/req/include/req_sketch_impl.hpp +265 -245
  61. data/vendor/datasketches-cpp/req/test/CMakeLists.txt +1 -1
  62. data/vendor/datasketches-cpp/req/test/req_sketch_custom_type_test.cpp +26 -26
  63. data/vendor/datasketches-cpp/req/test/req_sketch_test.cpp +116 -103
  64. data/vendor/datasketches-cpp/sampling/include/var_opt_sketch.hpp +22 -46
  65. data/vendor/datasketches-cpp/sampling/include/var_opt_sketch_impl.hpp +180 -207
  66. data/vendor/datasketches-cpp/sampling/include/var_opt_union.hpp +18 -39
  67. data/vendor/datasketches-cpp/sampling/include/var_opt_union_impl.hpp +75 -85
  68. data/vendor/datasketches-cpp/sampling/test/CMakeLists.txt +1 -1
  69. data/vendor/datasketches-cpp/sampling/test/var_opt_allocation_test.cpp +6 -6
  70. data/vendor/datasketches-cpp/sampling/test/var_opt_sketch_test.cpp +2 -2
  71. data/vendor/datasketches-cpp/sampling/test/var_opt_union_test.cpp +4 -4
  72. data/vendor/datasketches-cpp/setup.py +14 -2
  73. data/vendor/datasketches-cpp/theta/include/theta_sketch_impl.hpp +15 -25
  74. data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base.hpp +0 -9
  75. data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base_impl.hpp +5 -5
  76. data/vendor/datasketches-cpp/theta/test/CMakeLists.txt +1 -1
  77. data/vendor/datasketches-cpp/theta/test/theta_sketch_test.cpp +2 -1
  78. data/vendor/datasketches-cpp/tox.ini +26 -0
  79. data/vendor/datasketches-cpp/tuple/include/tuple_sketch.hpp +36 -12
  80. data/vendor/datasketches-cpp/tuple/include/tuple_sketch_impl.hpp +16 -4
  81. data/vendor/datasketches-cpp/tuple/test/CMakeLists.txt +2 -1
  82. data/vendor/datasketches-cpp/tuple/test/engagement_test.cpp +299 -0
  83. data/vendor/datasketches-cpp/tuple/test/tuple_sketch_test.cpp +26 -0
  84. data/vendor/datasketches-cpp/version.cfg.in +1 -0
  85. metadata +14 -5
  86. data/vendor/datasketches-cpp/common/include/quantile_sketch_sorted_view_impl.hpp +0 -91
@@ -39,9 +39,9 @@ static std::string testBinaryInputPath = "test/";
39
39
  #endif
40
40
 
41
41
  // typical usage would be just kll_sketch<float> or kll_sketch<std::string>, but here we use test_allocator
42
- using kll_float_sketch = kll_sketch<float, std::less<float>, serde<float>, test_allocator<float>>;
42
+ using kll_float_sketch = kll_sketch<float, std::less<float>, test_allocator<float>>;
43
43
  // let std::string use the default allocator for simplicity, otherwise we need to define "less" and "serde"
44
- using kll_string_sketch = kll_sketch<std::string, std::less<std::string>, serde<std::string>, test_allocator<std::string>>;
44
+ using kll_string_sketch = kll_sketch<std::string, std::less<std::string>, test_allocator<std::string>>;
45
45
 
46
46
  TEST_CASE("kll sketch", "[kll_sketch]") {
47
47
 
@@ -49,71 +49,78 @@ TEST_CASE("kll sketch", "[kll_sketch]") {
49
49
  test_allocator_total_bytes = 0;
50
50
 
51
51
  SECTION("k limits") {
52
- kll_float_sketch sketch1(kll_float_sketch::MIN_K, 0); // this should work
53
- kll_float_sketch sketch2(kll_float_sketch::MAX_K, 0); // this should work
54
- REQUIRE_THROWS_AS(new kll_float_sketch(kll_float_sketch::MIN_K - 1, 0), std::invalid_argument);
52
+ kll_float_sketch sketch1(kll_float_sketch::MIN_K, std::less<float>(), 0); // this should work
53
+ kll_float_sketch sketch2(kll_float_sketch::MAX_K, std::less<float>(), 0); // this should work
54
+ REQUIRE_THROWS_AS(new kll_float_sketch(kll_float_sketch::MIN_K - 1, std::less<float>(), 0), std::invalid_argument);
55
55
  // MAX_K + 1 makes no sense because k is uint16_t
56
+ //std::cout << "sizeof(kll_sketch<float>)=" << sizeof(kll_sketch<float>) << "\n";
57
+ //std::cout << "sizeof(kll_sketch<double>)=" << sizeof(kll_sketch<double>) << "\n";
56
58
  }
57
59
 
58
60
  SECTION("empty") {
59
- kll_float_sketch sketch(200, 0);
61
+ kll_float_sketch sketch(200, std::less<float>(), 0);
60
62
  REQUIRE(sketch.is_empty());
61
63
  REQUIRE_FALSE(sketch.is_estimation_mode());
62
64
  REQUIRE(sketch.get_n() == 0);
63
65
  REQUIRE(sketch.get_num_retained() == 0);
64
- REQUIRE(std::isnan(sketch.get_rank(0)));
65
- REQUIRE(std::isnan(sketch.get_min_value()));
66
- REQUIRE(std::isnan(sketch.get_max_value()));
67
- REQUIRE(std::isnan(sketch.get_quantile(0.5)));
68
- const double fractions[3] {0, 0.5, 1};
69
- REQUIRE(sketch.get_quantiles(fractions, 3).size() == 0);
66
+ REQUIRE_THROWS_AS(sketch.get_min_item(), std::runtime_error);
67
+ REQUIRE_THROWS_AS(sketch.get_max_item(), std::runtime_error);
68
+ REQUIRE_THROWS_AS(sketch.get_rank(0), std::runtime_error);
69
+ REQUIRE_THROWS_AS(sketch.get_quantile(0.5), std::runtime_error);
70
+ const double ranks[3] {0, 0.5, 1};
71
+ REQUIRE_THROWS_AS(sketch.get_quantiles(ranks, 3), std::runtime_error);
70
72
  const float split_points[1] {0};
71
- REQUIRE(sketch.get_PMF(split_points, 1).size() == 0);
72
- REQUIRE(sketch.get_CDF(split_points, 1).size() == 0);
73
+ REQUIRE_THROWS_AS(sketch.get_PMF(split_points, 1), std::runtime_error);
74
+ REQUIRE_THROWS_AS(sketch.get_CDF(split_points, 1), std::runtime_error);
73
75
 
74
- for (auto it: sketch) {
75
- (void) it; // to suppress "unused" warning
76
+ for (auto pair: sketch) {
77
+ unused(pair); // to suppress "unused" warning
76
78
  FAIL("should be no iterations over an empty sketch");
77
79
  }
78
80
  }
79
81
 
80
82
  SECTION("get bad quantile") {
81
- kll_float_sketch sketch(200, 0);
83
+ kll_float_sketch sketch(200, std::less<float>(), 0);
82
84
  sketch.update(0); // has to be non-empty to reach the check
83
85
  REQUIRE_THROWS_AS(sketch.get_quantile(-1), std::invalid_argument);
84
86
  }
85
87
 
86
88
  SECTION("one item") {
87
- kll_float_sketch sketch(200, 0);
89
+ kll_float_sketch sketch(200, std::less<float>(), 0);
88
90
  sketch.update(1.0f);
89
91
  REQUIRE_FALSE(sketch.is_empty());
90
92
  REQUIRE_FALSE(sketch.is_estimation_mode());
91
93
  REQUIRE(sketch.get_n() == 1);
92
94
  REQUIRE(sketch.get_num_retained() == 1);
93
- REQUIRE(sketch.get_rank(1.0f) == 0.0);
94
- REQUIRE(sketch.get_rank<true>(1.0f) == 1.0);
95
- REQUIRE(sketch.get_rank(2.0f) == 1.0);
95
+ REQUIRE(sketch.get_rank(1.0f, false) == 0.0);
96
+ REQUIRE(sketch.get_rank(1.0f) == 1.0);
97
+ REQUIRE(sketch.get_rank(2.0f, false) == 1.0);
96
98
  REQUIRE(sketch.get_rank(std::numeric_limits<float>::infinity()) == 1.0);
97
- REQUIRE(sketch.get_min_value() == 1.0);
98
- REQUIRE(sketch.get_max_value() == 1.0);
99
+ REQUIRE(sketch.get_min_item() == 1.0);
100
+ REQUIRE(sketch.get_max_item() == 1.0);
99
101
  REQUIRE(sketch.get_quantile(0.5) == 1.0);
100
- const double fractions[3] {0, 0.5, 1};
101
- auto quantiles = sketch.get_quantiles(fractions, 3);
102
+ const double ranks[3] {0, 0.5, 1};
103
+ auto quantiles = sketch.get_quantiles(ranks, 3);
102
104
  REQUIRE(quantiles.size() == 3);
103
105
  REQUIRE(quantiles[0] == 1.0);
104
106
  REQUIRE(quantiles[1] == 1.0);
105
107
  REQUIRE(quantiles[2] == 1.0);
106
108
 
107
109
  int count = 0;
108
- for (auto it: sketch) {
109
- REQUIRE(it.second == 1);
110
+ for (auto pair: sketch) {
111
+ REQUIRE(pair.second == 1);
110
112
  ++count;
111
113
  }
112
114
  REQUIRE(count == 1);
115
+
116
+ // iterator dereferencing
117
+ auto it = sketch.begin();
118
+ REQUIRE(it->first == 1.0f);
119
+ REQUIRE((*it).first == 1.0f);
113
120
  }
114
121
 
115
122
  SECTION("NaN") {
116
- kll_float_sketch sketch(200, 0);
123
+ kll_float_sketch sketch(200, std::less<float>(), 0);
117
124
  sketch.update(std::numeric_limits<float>::quiet_NaN());
118
125
  REQUIRE(sketch.is_empty());
119
126
 
@@ -123,44 +130,44 @@ TEST_CASE("kll sketch", "[kll_sketch]") {
123
130
  }
124
131
 
125
132
  SECTION("many items, exact mode") {
126
- kll_float_sketch sketch(200, 0);
133
+ kll_float_sketch sketch(200, std::less<float>(), 0);
127
134
  const uint32_t n = 200;
128
- for (uint32_t i = 0; i < n; i++) {
135
+ for (uint32_t i = 1; i <= n; i++) {
129
136
  sketch.update(static_cast<float>(i));
130
- REQUIRE(sketch.get_n() == i + 1);
137
+ REQUIRE(sketch.get_n() == i);
131
138
  }
132
139
  REQUIRE_FALSE(sketch.is_empty());
133
140
  REQUIRE_FALSE(sketch.is_estimation_mode());
134
141
  REQUIRE(sketch.get_num_retained() == n);
135
- REQUIRE(sketch.get_min_value() == 0.0);
136
- REQUIRE(sketch.get_quantile(0) == 0.0);
137
- REQUIRE(sketch.get_max_value() == n - 1);
138
- REQUIRE(sketch.get_quantile(1) == n - 1);
142
+ REQUIRE(sketch.get_min_item() == 1);
143
+ REQUIRE(sketch.get_quantile(0) == 1);
144
+ REQUIRE(sketch.get_max_item() == n);
145
+ REQUIRE(sketch.get_quantile(1) == n);
139
146
 
140
- const double fractions[3] {0, 0.5, 1};
141
- auto quantiles = sketch.get_quantiles(fractions, 3);
147
+ const double ranks[3] {0, 0.5, 1};
148
+ auto quantiles = sketch.get_quantiles(ranks, 3);
142
149
  REQUIRE(quantiles.size() == 3);
143
- REQUIRE(quantiles[0] == 0.0);
150
+ REQUIRE(quantiles[0] == 1);
144
151
  REQUIRE(quantiles[1] == n / 2);
145
- REQUIRE(quantiles[2] == n - 1 );
146
-
147
- for (uint32_t i = 0; i < n; i++) {
148
- const double true_rank = (double) i / n;
149
- REQUIRE(sketch.get_rank(static_cast<float>(i)) == true_rank);
150
- const double true_rank_inclusive = (double) (i + 1) / n;
151
- REQUIRE(sketch.get_rank<true>(static_cast<float>(i)) == true_rank_inclusive);
152
- }
152
+ REQUIRE(quantiles[2] == n);
153
153
 
154
- // the alternative method must produce the same result
154
+ // alternative method must produce the same result
155
155
  auto quantiles2 = sketch.get_quantiles(3);
156
156
  REQUIRE(quantiles2.size() == 3);
157
157
  REQUIRE(quantiles[0] == quantiles2[0]);
158
158
  REQUIRE(quantiles[1] == quantiles2[1]);
159
159
  REQUIRE(quantiles[2] == quantiles2[2]);
160
+
161
+ for (uint32_t i = 1; i <= n; i++) {
162
+ const double true_rank_inclusive = static_cast<double>(i) / n;
163
+ REQUIRE(sketch.get_rank(static_cast<float>(i)) == true_rank_inclusive);
164
+ const double true_rank_exclusive = static_cast<double>(i - 1) / n;
165
+ REQUIRE(sketch.get_rank(static_cast<float>(i), false) == true_rank_exclusive);
166
+ }
160
167
  }
161
168
 
162
169
  SECTION("10 items") {
163
- kll_float_sketch sketch(200, 0);
170
+ kll_float_sketch sketch(200, std::less<float>(), 0);
164
171
  sketch.update(1.0f);
165
172
  sketch.update(2.0f);
166
173
  sketch.update(3.0f);
@@ -172,23 +179,23 @@ TEST_CASE("kll sketch", "[kll_sketch]") {
172
179
  sketch.update(9.0f);
173
180
  sketch.update(10.0f);
174
181
  REQUIRE(sketch.get_quantile(0) == 1.0);
175
- REQUIRE(sketch.get_quantile(0.5) == 6.0);
182
+ REQUIRE(sketch.get_quantile(0.5) == 5.0);
176
183
  REQUIRE(sketch.get_quantile(0.99) == 10.0);
177
184
  REQUIRE(sketch.get_quantile(1) == 10.0);
178
185
  }
179
186
 
180
187
  SECTION("100 items") {
181
- kll_float_sketch sketch(200, 0);
188
+ kll_float_sketch sketch(200, std::less<float>(), 0);
182
189
  for (int i = 0; i < 100; ++i) sketch.update(static_cast<float>(i));
183
190
  REQUIRE(sketch.get_quantile(0) == 0);
184
- REQUIRE(sketch.get_quantile(0.01) == 1);
185
- REQUIRE(sketch.get_quantile(0.5) == 50);
186
- REQUIRE(sketch.get_quantile(0.99) == 99.0);
191
+ REQUIRE(sketch.get_quantile(0.01) == 0);
192
+ REQUIRE(sketch.get_quantile(0.5) == 49);
193
+ REQUIRE(sketch.get_quantile(0.99) == 98.0);
187
194
  REQUIRE(sketch.get_quantile(1) == 99.0);
188
195
  }
189
196
 
190
197
  SECTION("many items, estimation mode") {
191
- kll_float_sketch sketch(200, 0);
198
+ kll_float_sketch sketch(200, std::less<float>(), 0);
192
199
  const int n = 1000000;
193
200
  for (int i = 0; i < n; i++) {
194
201
  sketch.update(static_cast<float>(i));
@@ -196,87 +203,62 @@ TEST_CASE("kll sketch", "[kll_sketch]") {
196
203
  }
197
204
  REQUIRE_FALSE(sketch.is_empty());
198
205
  REQUIRE(sketch.is_estimation_mode());
199
- REQUIRE(sketch.get_min_value() == 0.0); // min value is exact
200
- REQUIRE(sketch.get_quantile(0) == 0.0); // min value is exact
201
- REQUIRE(sketch.get_max_value() == n - 1); // max value is exact
202
- REQUIRE(sketch.get_quantile(1) == n - 1); // max value is exact
206
+ REQUIRE(sketch.get_min_item() == 0.0); // min value is exact
207
+ REQUIRE(sketch.get_max_item() == n - 1); // max value is exact
203
208
 
204
209
  // test rank
205
210
  for (int i = 0; i < n; i++) {
206
211
  const double trueRank = (double) i / n;
207
- REQUIRE(sketch.get_rank(static_cast<float>(i)) == Approx(trueRank).margin(RANK_EPS_FOR_K_200));
208
- }
209
-
210
- // test quantiles at every 0.1 percentage point
211
- double fractions[1001];
212
- double reverse_fractions[1001]; // check that ordering does not matter
213
- for (int i = 0; i < 1001; i++) {
214
- fractions[i] = (double) i / 1000;
215
- reverse_fractions[1000 - i] = fractions[i];
216
- }
217
- auto quantiles = sketch.get_quantiles(fractions, 1001);
218
- auto reverse_quantiles = sketch.get_quantiles(reverse_fractions, 1001);
219
- float previous_quantile(0);
220
- for (int i = 0; i < 1001; i++) {
221
- // expensive in a loop, just to check the equivalence here, not advised for real code
222
- const float quantile = sketch.get_quantile(fractions[i]);
223
- REQUIRE(quantiles[i] == quantile);
224
- REQUIRE(reverse_quantiles[1000 - i] == quantile);
225
- REQUIRE(previous_quantile <= quantile);
226
- previous_quantile = quantile;
212
+ REQUIRE(sketch.get_rank(static_cast<float>(i), false) == Approx(trueRank).margin(RANK_EPS_FOR_K_200));
227
213
  }
228
214
 
229
215
  //std::cout << sketch.to_string();
230
216
 
231
217
  uint32_t count = 0;
232
218
  uint64_t total_weight = 0;
233
- for (auto it: sketch) {
219
+ for (auto pair: sketch) {
234
220
  ++count;
235
- total_weight += it.second;
221
+ total_weight += pair.second;
236
222
  }
237
223
  REQUIRE(count == sketch.get_num_retained());
238
224
  REQUIRE(total_weight == sketch.get_n());
239
225
  }
240
226
 
241
- SECTION("consistency between get_rank adn get_PMF/CDF") {
242
- kll_float_sketch sketch(200, 0);
243
- const int n = 1000;
227
+ SECTION("consistency between get_rank and get_PMF/CDF") {
228
+ kll_float_sketch sketch(200, std::less<float>(), 0);
229
+ const int n = 200;
244
230
  float values[n];
245
231
  for (int i = 0; i < n; i++) {
246
232
  sketch.update(static_cast<float>(i));
247
233
  values[i] = static_cast<float>(i);
248
234
  }
249
- { // inclusive=false (default)
250
- const auto ranks(sketch.get_CDF(values, n));
251
- const auto pmf(sketch.get_PMF(values, n));
235
+ { // inclusive=false
236
+ const auto ranks(sketch.get_CDF(values, n, false));
237
+ const auto pmf(sketch.get_PMF(values, n, false));
252
238
 
253
239
  double subtotal_pmf = 0;
254
240
  for (int i = 0; i < n; i++) {
255
- if (sketch.get_rank(values[i]) != ranks[i]) {
256
- std::cerr << "checking rank vs CDF for value " << i << std::endl;
257
- REQUIRE(sketch.get_rank(values[i]) == ranks[i]);
241
+ if (sketch.get_rank(values[i], false) != ranks[i]) {
242
+ FAIL("checking rank vs CDF for value " + std::to_string(i));
258
243
  }
259
244
  subtotal_pmf += pmf[i];
260
245
  if (abs(ranks[i] - subtotal_pmf) > NUMERIC_NOISE_TOLERANCE) {
261
- std::cerr << "CDF vs PMF for value " << i << std::endl;
262
- REQUIRE(ranks[i] == Approx(subtotal_pmf).margin(NUMERIC_NOISE_TOLERANCE));
246
+ FAIL("CDF vs PMF for value " + std::to_string(i));
263
247
  }
264
248
  }
265
249
  }
266
- { // inclusive=true
267
- const auto ranks(sketch.get_CDF<true>(values, n));
268
- const auto pmf(sketch.get_PMF<true>(values, n));
250
+ { // inclusive=true (default)
251
+ const auto ranks(sketch.get_CDF(values, n));
252
+ const auto pmf(sketch.get_PMF(values, n));
269
253
 
270
254
  double subtotal_pmf = 0;
271
255
  for (int i = 0; i < n; i++) {
272
- if (sketch.get_rank<true>(values[i]) != ranks[i]) {
273
- std::cerr << "checking rank vs CDF for value " << i << std::endl;
274
- REQUIRE(sketch.get_rank(values[i]) == ranks[i]);
256
+ if (sketch.get_rank(values[i]) != ranks[i]) {
257
+ FAIL("checking rank vs CDF for value " + std::to_string(i));
275
258
  }
276
259
  subtotal_pmf += pmf[i];
277
260
  if (abs(ranks[i] - subtotal_pmf) > NUMERIC_NOISE_TOLERANCE) {
278
- std::cerr << "CDF vs PMF for value " << i << std::endl;
279
- REQUIRE(ranks[i] == Approx(subtotal_pmf).margin(NUMERIC_NOISE_TOLERANCE));
261
+ FAIL("CDF vs PMF for value " + std::to_string(i));
280
262
  }
281
263
  }
282
264
  }
@@ -286,151 +268,151 @@ TEST_CASE("kll sketch", "[kll_sketch]") {
286
268
  std::ifstream is;
287
269
  is.exceptions(std::ios::failbit | std::ios::badbit);
288
270
  is.open(testBinaryInputPath + "kll_sketch_from_java.sk", std::ios::binary);
289
- auto sketch = kll_float_sketch::deserialize(is, test_allocator<float>(0));
271
+ auto sketch = kll_float_sketch::deserialize(is, serde<float>(), std::less<float>(), 0);
290
272
  REQUIRE_FALSE(sketch.is_empty());
291
273
  REQUIRE(sketch.is_estimation_mode());
292
274
  REQUIRE(sketch.get_n() == 1000000);
293
275
  REQUIRE(sketch.get_num_retained() == 614);
294
- REQUIRE(sketch.get_min_value() == 0.0);
295
- REQUIRE(sketch.get_max_value() == 999999.0);
276
+ REQUIRE(sketch.get_min_item() == 0.0);
277
+ REQUIRE(sketch.get_max_item() == 999999.0);
296
278
  }
297
279
 
298
280
  SECTION("stream serialize deserialize empty") {
299
- kll_float_sketch sketch(200, 0);
281
+ kll_float_sketch sketch(200, std::less<float>(), 0);
300
282
  std::stringstream s(std::ios::in | std::ios::out | std::ios::binary);
301
283
  sketch.serialize(s);
302
284
  REQUIRE(static_cast<size_t>(s.tellp()) == sketch.get_serialized_size_bytes());
303
- auto sketch2 = kll_float_sketch::deserialize(s, test_allocator<float>(0));
285
+ auto sketch2 = kll_float_sketch::deserialize(s, serde<float>(), std::less<float>(), 0);
304
286
  REQUIRE(static_cast<size_t>(s.tellp()) == sketch2.get_serialized_size_bytes());
305
287
  REQUIRE(s.tellg() == s.tellp());
306
288
  REQUIRE(sketch2.is_empty() == sketch.is_empty());
307
289
  REQUIRE(sketch2.is_estimation_mode() == sketch.is_estimation_mode());
308
290
  REQUIRE(sketch2.get_n() == sketch.get_n());
309
291
  REQUIRE(sketch2.get_num_retained() == sketch.get_num_retained());
310
- REQUIRE(std::isnan(sketch2.get_min_value()));
311
- REQUIRE(std::isnan(sketch2.get_max_value()));
292
+ REQUIRE_THROWS_AS(sketch2.get_min_item(), std::runtime_error);
293
+ REQUIRE_THROWS_AS(sketch2.get_max_item(), std::runtime_error);
312
294
  REQUIRE(sketch2.get_normalized_rank_error(false) == sketch.get_normalized_rank_error(false));
313
295
  REQUIRE(sketch2.get_normalized_rank_error(true) == sketch.get_normalized_rank_error(true));
314
296
  }
315
297
 
316
298
  SECTION("bytes serialize deserialize empty") {
317
- kll_float_sketch sketch(200, 0);
299
+ kll_float_sketch sketch(200, std::less<float>(), 0);
318
300
  auto bytes = sketch.serialize();
319
- auto sketch2 = kll_float_sketch::deserialize(bytes.data(), bytes.size(), serde<float>(), 0);
301
+ auto sketch2 = kll_float_sketch::deserialize(bytes.data(), bytes.size(), serde<float>(), std::less<float>(), 0);
320
302
  REQUIRE(bytes.size() == sketch.get_serialized_size_bytes());
321
303
  REQUIRE(sketch2.is_empty() == sketch.is_empty());
322
304
  REQUIRE(sketch2.is_estimation_mode() == sketch.is_estimation_mode());
323
305
  REQUIRE(sketch2.get_n() == sketch.get_n());
324
306
  REQUIRE(sketch2.get_num_retained() == sketch.get_num_retained());
325
- REQUIRE(std::isnan(sketch2.get_min_value()));
326
- REQUIRE(std::isnan(sketch2.get_max_value()));
307
+ REQUIRE_THROWS_AS(sketch2.get_min_item(), std::runtime_error);
308
+ REQUIRE_THROWS_AS(sketch2.get_max_item(), std::runtime_error);
327
309
  REQUIRE(sketch2.get_normalized_rank_error(false) == sketch.get_normalized_rank_error(false));
328
310
  REQUIRE(sketch2.get_normalized_rank_error(true) == sketch.get_normalized_rank_error(true));
329
311
  }
330
312
 
331
313
  SECTION("stream serialize deserialize one item") {
332
- kll_float_sketch sketch(200, 0);
314
+ kll_float_sketch sketch(200, std::less<float>(), 0);
333
315
  sketch.update(1.0f);
334
316
  std::stringstream s(std::ios::in | std::ios::out | std::ios::binary);
335
317
  sketch.serialize(s);
336
318
  REQUIRE(static_cast<size_t>(s.tellp()) == sketch.get_serialized_size_bytes());
337
- auto sketch2 = kll_float_sketch::deserialize(s, serde<float>(), 0);
319
+ auto sketch2 = kll_float_sketch::deserialize(s, serde<float>(), std::less<float>(), 0);
338
320
  REQUIRE(static_cast<size_t>(s.tellp()) == sketch2.get_serialized_size_bytes());
339
321
  REQUIRE(s.tellg() == s.tellp());
340
322
  REQUIRE_FALSE(sketch2.is_empty());
341
323
  REQUIRE_FALSE(sketch2.is_estimation_mode());
342
324
  REQUIRE(sketch2.get_n() == 1);
343
325
  REQUIRE(sketch2.get_num_retained() == 1);
344
- REQUIRE(sketch2.get_min_value() == 1.0);
345
- REQUIRE(sketch2.get_max_value() == 1.0);
326
+ REQUIRE(sketch2.get_min_item() == 1.0);
327
+ REQUIRE(sketch2.get_max_item() == 1.0);
346
328
  REQUIRE(sketch2.get_quantile(0.5) == 1.0);
347
- REQUIRE(sketch2.get_rank(1) == 0.0);
348
- REQUIRE(sketch2.get_rank(2) == 1.0);
329
+ REQUIRE(sketch2.get_rank(1, false) == 0.0);
330
+ REQUIRE(sketch2.get_rank(2, false) == 1.0);
349
331
  }
350
332
 
351
333
  SECTION("bytes serialize deserialize one item") {
352
- kll_float_sketch sketch(200, 0);
334
+ kll_float_sketch sketch(200, std::less<float>(), 0);
353
335
  sketch.update(1.0f);
354
336
  auto bytes = sketch.serialize();
355
337
  REQUIRE(bytes.size() == sketch.get_serialized_size_bytes());
356
- auto sketch2 = kll_float_sketch::deserialize(bytes.data(), bytes.size(), serde<float>(), 0);
338
+ auto sketch2 = kll_float_sketch::deserialize(bytes.data(), bytes.size(), serde<float>(), std::less<float>(), 0);
357
339
  REQUIRE(bytes.size() == sketch2.get_serialized_size_bytes());
358
340
  REQUIRE_FALSE(sketch2.is_empty());
359
341
  REQUIRE_FALSE(sketch2.is_estimation_mode());
360
342
  REQUIRE(sketch2.get_n() == 1);
361
343
  REQUIRE(sketch2.get_num_retained() == 1);
362
- REQUIRE(sketch2.get_min_value() == 1.0);
363
- REQUIRE(sketch2.get_max_value() == 1.0);
344
+ REQUIRE(sketch2.get_min_item() == 1.0);
345
+ REQUIRE(sketch2.get_max_item() == 1.0);
364
346
  REQUIRE(sketch2.get_quantile(0.5) == 1.0);
365
- REQUIRE(sketch2.get_rank(1) == 0.0);
366
- REQUIRE(sketch2.get_rank(2) == 1.0);
347
+ REQUIRE(sketch2.get_rank(1, false) == 0.0);
348
+ REQUIRE(sketch2.get_rank(2, false) == 1.0);
367
349
  }
368
350
 
369
351
  SECTION("deserialize one item v1") {
370
352
  std::ifstream is;
371
353
  is.exceptions(std::ios::failbit | std::ios::badbit);
372
354
  is.open(testBinaryInputPath + "kll_sketch_float_one_item_v1.sk", std::ios::binary);
373
- auto sketch = kll_float_sketch::deserialize(is, serde<float>(), 0);
355
+ auto sketch = kll_float_sketch::deserialize(is, serde<float>(), std::less<float>(), 0);
374
356
  REQUIRE_FALSE(sketch.is_empty());
375
357
  REQUIRE_FALSE(sketch.is_estimation_mode());
376
358
  REQUIRE(sketch.get_n() == 1);
377
359
  REQUIRE(sketch.get_num_retained() == 1);
378
- REQUIRE(sketch.get_min_value() == 1.0);
379
- REQUIRE(sketch.get_max_value() == 1.0);
360
+ REQUIRE(sketch.get_min_item() == 1.0);
361
+ REQUIRE(sketch.get_max_item() == 1.0);
380
362
  }
381
363
 
382
364
  SECTION("stream serialize deserialize three items") {
383
- kll_float_sketch sketch(200, 0);
365
+ kll_float_sketch sketch(200, std::less<float>(), 0);
384
366
  sketch.update(1.0f);
385
367
  sketch.update(2.0f);
386
368
  sketch.update(3.0f);
387
369
  std::stringstream s(std::ios::in | std::ios::out | std::ios::binary);
388
370
  sketch.serialize(s);
389
371
  REQUIRE(static_cast<size_t>(s.tellp()) == sketch.get_serialized_size_bytes());
390
- auto sketch2 = kll_float_sketch::deserialize(s, serde<float>(), 0);
372
+ auto sketch2 = kll_float_sketch::deserialize(s, serde<float>(), std::less<float>(), 0);
391
373
  REQUIRE(static_cast<size_t>(s.tellp()) == sketch2.get_serialized_size_bytes());
392
374
  REQUIRE(s.tellg() == s.tellp());
393
375
  REQUIRE_FALSE(sketch2.is_empty());
394
376
  REQUIRE_FALSE(sketch2.is_estimation_mode());
395
377
  REQUIRE(sketch2.get_n() == 3);
396
378
  REQUIRE(sketch2.get_num_retained() == 3);
397
- REQUIRE(sketch2.get_min_value() == 1.0);
398
- REQUIRE(sketch2.get_max_value() == 3.0);
379
+ REQUIRE(sketch2.get_min_item() == 1.0);
380
+ REQUIRE(sketch2.get_max_item() == 3.0);
399
381
  }
400
382
 
401
383
  SECTION("bytes serialize deserialize three items") {
402
- kll_float_sketch sketch(200, 0);
384
+ kll_float_sketch sketch(200, std::less<float>(), 0);
403
385
  sketch.update(1.0f);
404
386
  sketch.update(2.0f);
405
387
  sketch.update(3.0f);
406
388
  auto bytes = sketch.serialize();
407
389
  REQUIRE(bytes.size() == sketch.get_serialized_size_bytes());
408
- auto sketch2 = kll_float_sketch::deserialize(bytes.data(), bytes.size(), serde<float>(), 0);
390
+ auto sketch2 = kll_float_sketch::deserialize(bytes.data(), bytes.size(), serde<float>(), std::less<float>(), 0);
409
391
  REQUIRE(bytes.size() == sketch2.get_serialized_size_bytes());
410
392
  REQUIRE_FALSE(sketch2.is_empty());
411
393
  REQUIRE_FALSE(sketch2.is_estimation_mode());
412
394
  REQUIRE(sketch2.get_n() == 3);
413
395
  REQUIRE(sketch2.get_num_retained() == 3);
414
- REQUIRE(sketch2.get_min_value() == 1.0);
415
- REQUIRE(sketch2.get_max_value() == 3.0);
396
+ REQUIRE(sketch2.get_min_item() == 1.0);
397
+ REQUIRE(sketch2.get_max_item() == 3.0);
416
398
  }
417
399
 
418
400
  SECTION("stream serialize deserialize many floats") {
419
- kll_float_sketch sketch(200, 0);
401
+ kll_float_sketch sketch(200, std::less<float>(), 0);
420
402
  const int n = 1000;
421
403
  for (int i = 0; i < n; i++) sketch.update(static_cast<float>(i));
422
404
  std::stringstream s(std::ios::in | std::ios::out | std::ios::binary);
423
405
  sketch.serialize(s);
424
406
  REQUIRE(static_cast<size_t>(s.tellp()) == sketch.get_serialized_size_bytes());
425
- auto sketch2 = kll_float_sketch::deserialize(s, serde<float>(), 0);
407
+ auto sketch2 = kll_float_sketch::deserialize(s, serde<float>(), std::less<float>(), 0);
426
408
  REQUIRE(static_cast<size_t>(s.tellp()) == sketch2.get_serialized_size_bytes());
427
409
  REQUIRE(s.tellg() == s.tellp());
428
410
  REQUIRE(sketch2.is_empty() == sketch.is_empty());
429
411
  REQUIRE(sketch2.is_estimation_mode() == sketch.is_estimation_mode());
430
412
  REQUIRE(sketch2.get_n() == sketch.get_n());
431
413
  REQUIRE(sketch2.get_num_retained() == sketch.get_num_retained());
432
- REQUIRE(sketch2.get_min_value() == sketch.get_min_value());
433
- REQUIRE(sketch2.get_max_value() == sketch.get_max_value());
414
+ REQUIRE(sketch2.get_min_item() == sketch.get_min_item());
415
+ REQUIRE(sketch2.get_max_item() == sketch.get_max_item());
434
416
  REQUIRE(sketch2.get_normalized_rank_error(false) == sketch.get_normalized_rank_error(false));
435
417
  REQUIRE(sketch2.get_normalized_rank_error(true) == sketch.get_normalized_rank_error(true));
436
418
  REQUIRE(sketch2.get_quantile(0.5) == sketch.get_quantile(0.5));
@@ -439,27 +421,27 @@ TEST_CASE("kll sketch", "[kll_sketch]") {
439
421
  }
440
422
 
441
423
  SECTION("bytes serialize deserialize many floats") {
442
- kll_float_sketch sketch(200, 0);
424
+ kll_float_sketch sketch(200, std::less<float>(), 0);
443
425
  const int n = 1000;
444
426
  for (int i = 0; i < n; i++) sketch.update(static_cast<float>(i));
445
427
  auto bytes = sketch.serialize();
446
428
  REQUIRE(bytes.size() == sketch.get_serialized_size_bytes());
447
- auto sketch2 = kll_float_sketch::deserialize(bytes.data(), bytes.size(), serde<float>(), 0);
429
+ auto sketch2 = kll_float_sketch::deserialize(bytes.data(), bytes.size(), serde<float>(), std::less<float>(), 0);
448
430
  REQUIRE(bytes.size() == sketch2.get_serialized_size_bytes());
449
431
  REQUIRE(sketch2.is_empty() == sketch.is_empty());
450
432
  REQUIRE(sketch2.is_estimation_mode() == sketch.is_estimation_mode());
451
433
  REQUIRE(sketch2.get_n() == sketch.get_n());
452
434
  REQUIRE(sketch2.get_num_retained() == sketch.get_num_retained());
453
- REQUIRE(sketch2.get_min_value() == sketch.get_min_value());
454
- REQUIRE(sketch2.get_max_value() == sketch.get_max_value());
435
+ REQUIRE(sketch2.get_min_item() == sketch.get_min_item());
436
+ REQUIRE(sketch2.get_max_item() == sketch.get_max_item());
455
437
  REQUIRE(sketch2.get_normalized_rank_error(false) == sketch.get_normalized_rank_error(false));
456
438
  REQUIRE(sketch2.get_normalized_rank_error(true) == sketch.get_normalized_rank_error(true));
457
439
  REQUIRE(sketch2.get_quantile(0.5) == sketch.get_quantile(0.5));
458
440
  REQUIRE(sketch2.get_rank(0) == sketch.get_rank(0));
459
441
  REQUIRE(sketch2.get_rank(static_cast<float>(n)) == sketch.get_rank(static_cast<float>(n)));
460
- REQUIRE_THROWS_AS(kll_sketch<int>::deserialize(bytes.data(), 7), std::out_of_range);
461
- REQUIRE_THROWS_AS(kll_sketch<int>::deserialize(bytes.data(), 15), std::out_of_range);
462
- REQUIRE_THROWS_AS(kll_sketch<int>::deserialize(bytes.data(), bytes.size() - 1), std::out_of_range);
442
+ REQUIRE_THROWS_AS(kll_float_sketch::deserialize(bytes.data(), 7, serde<float>(), std::less<float>(), 0), std::out_of_range);
443
+ REQUIRE_THROWS_AS(kll_float_sketch::deserialize(bytes.data(), 15, serde<float>(), std::less<float>(), 0), std::out_of_range);
444
+ REQUIRE_THROWS_AS(kll_float_sketch::deserialize(bytes.data(), bytes.size() - 1, serde<float>(), std::less<float>(), 0), std::out_of_range);
463
445
  }
464
446
 
465
447
  SECTION("bytes serialize deserialize many ints") {
@@ -474,8 +456,8 @@ TEST_CASE("kll sketch", "[kll_sketch]") {
474
456
  REQUIRE(sketch2.is_estimation_mode() == sketch.is_estimation_mode());
475
457
  REQUIRE(sketch2.get_n() == sketch.get_n());
476
458
  REQUIRE(sketch2.get_num_retained() == sketch.get_num_retained());
477
- REQUIRE(sketch2.get_min_value() == sketch.get_min_value());
478
- REQUIRE(sketch2.get_max_value() == sketch.get_max_value());
459
+ REQUIRE(sketch2.get_min_item() == sketch.get_min_item());
460
+ REQUIRE(sketch2.get_max_item() == sketch.get_max_item());
479
461
  REQUIRE(sketch2.get_normalized_rank_error(false) == sketch.get_normalized_rank_error(false));
480
462
  REQUIRE(sketch2.get_normalized_rank_error(true) == sketch.get_normalized_rank_error(true));
481
463
  REQUIRE(sketch2.get_quantile(0.5) == sketch.get_quantile(0.5));
@@ -499,7 +481,7 @@ TEST_CASE("kll sketch", "[kll_sketch]") {
499
481
  }
500
482
 
501
483
  SECTION("out of order split points, float") {
502
- kll_float_sketch sketch(200, 0);
484
+ kll_float_sketch sketch(200, std::less<float>(), 0);
503
485
  sketch.update(0); // has too be non-empty to reach the check
504
486
  float split_points[2] = {1, 0};
505
487
  REQUIRE_THROWS_AS(sketch.get_CDF(split_points, 2), std::invalid_argument);
@@ -513,48 +495,48 @@ TEST_CASE("kll sketch", "[kll_sketch]") {
513
495
  }
514
496
 
515
497
  SECTION("NaN split point") {
516
- kll_float_sketch sketch(200, 0);
498
+ kll_float_sketch sketch(200, std::less<float>(), 0);
517
499
  sketch.update(0); // has too be non-empty to reach the check
518
500
  float split_points[1] = {std::numeric_limits<float>::quiet_NaN()};
519
501
  REQUIRE_THROWS_AS(sketch.get_CDF(split_points, 1), std::invalid_argument);
520
502
  }
521
503
 
522
504
  SECTION("merge") {
523
- kll_float_sketch sketch1(200, 0);
524
- kll_float_sketch sketch2(200, 0);
505
+ kll_float_sketch sketch1(200, std::less<float>(), 0);
506
+ kll_float_sketch sketch2(200, std::less<float>(), 0);
525
507
  const int n = 10000;
526
508
  for (int i = 0; i < n; i++) {
527
509
  sketch1.update(static_cast<float>(i));
528
510
  sketch2.update(static_cast<float>((2 * n) - i - 1));
529
511
  }
530
512
 
531
- REQUIRE(sketch1.get_min_value() == 0.0f);
532
- REQUIRE(sketch1.get_max_value() == n - 1);
533
- REQUIRE(sketch2.get_min_value() == n);
534
- REQUIRE(sketch2.get_max_value() == 2.0f * n - 1);
513
+ REQUIRE(sketch1.get_min_item() == 0.0f);
514
+ REQUIRE(sketch1.get_max_item() == n - 1);
515
+ REQUIRE(sketch2.get_min_item() == n);
516
+ REQUIRE(sketch2.get_max_item() == 2.0f * n - 1);
535
517
 
536
518
  sketch1.merge(sketch2);
537
519
 
538
520
  REQUIRE_FALSE(sketch1.is_empty());
539
521
  REQUIRE(sketch1.get_n() == 2 * n);
540
- REQUIRE(sketch1.get_min_value() == 0.0f);
541
- REQUIRE(sketch1.get_max_value() == 2.0f * n - 1);
522
+ REQUIRE(sketch1.get_min_item() == 0.0f);
523
+ REQUIRE(sketch1.get_max_item() == 2.0f * n - 1);
542
524
  REQUIRE(sketch1.get_quantile(0.5) == Approx(n).margin(n * RANK_EPS_FOR_K_200));
543
525
  }
544
526
 
545
527
  SECTION("merge lower k") {
546
- kll_float_sketch sketch1(256, 0);
547
- kll_float_sketch sketch2(128, 0);
528
+ kll_float_sketch sketch1(256, std::less<float>(), 0);
529
+ kll_float_sketch sketch2(128, std::less<float>(), 0);
548
530
  const int n = 10000;
549
531
  for (int i = 0; i < n; i++) {
550
532
  sketch1.update(static_cast<float>(i));
551
533
  sketch2.update(static_cast<float>((2 * n) - i - 1));
552
534
  }
553
535
 
554
- REQUIRE(sketch1.get_min_value() == 0.0f);
555
- REQUIRE(sketch1.get_max_value() == n - 1);
556
- REQUIRE(sketch2.get_min_value() == n);
557
- REQUIRE(sketch2.get_max_value() == 2.0f * n - 1);
536
+ REQUIRE(sketch1.get_min_item() == 0.0f);
537
+ REQUIRE(sketch1.get_max_item() == n - 1);
538
+ REQUIRE(sketch2.get_min_item() == n);
539
+ REQUIRE(sketch2.get_max_item() == 2.0f * n - 1);
558
540
 
559
541
  REQUIRE(sketch1.get_k() == 256);
560
542
  REQUIRE(sketch2.get_k() == 128);
@@ -570,14 +552,14 @@ TEST_CASE("kll sketch", "[kll_sketch]") {
570
552
 
571
553
  REQUIRE_FALSE(sketch1.is_empty());
572
554
  REQUIRE(sketch1.get_n() == 2 * n);
573
- REQUIRE(sketch1.get_min_value() == 0.0f);
574
- REQUIRE(sketch1.get_max_value() == 2.0f * n - 1);
555
+ REQUIRE(sketch1.get_min_item() == 0.0f);
556
+ REQUIRE(sketch1.get_max_item() == 2.0f * n - 1);
575
557
  REQUIRE(sketch1.get_quantile(0.5) == Approx(n).margin(n * RANK_EPS_FOR_K_200));
576
558
  }
577
559
 
578
560
  SECTION("merge exact mode, lower k") {
579
- kll_float_sketch sketch1(256, 0);
580
- kll_float_sketch sketch2(128, 0);
561
+ kll_float_sketch sketch1(256, std::less<float>(), 0);
562
+ kll_float_sketch sketch2(128, std::less<float>(), 0);
581
563
  const int n = 10000;
582
564
  for (int i = 0; i < n; i++) {
583
565
  sketch1.update(static_cast<float>(i));
@@ -590,8 +572,8 @@ TEST_CASE("kll sketch", "[kll_sketch]") {
590
572
 
591
573
  REQUIRE_FALSE(sketch1.is_empty());
592
574
  REQUIRE(sketch1.get_n() == n);
593
- REQUIRE(sketch1.get_min_value() == 0.0f);
594
- REQUIRE(sketch1.get_max_value() == n - 1);
575
+ REQUIRE(sketch1.get_min_item() == 0.0f);
576
+ REQUIRE(sketch1.get_max_item() == n - 1);
595
577
  REQUIRE(sketch1.get_quantile(0.5) == Approx(n / 2).margin(n / 2 * RANK_EPS_FOR_K_200));
596
578
 
597
579
  sketch2.update(0);
@@ -601,29 +583,29 @@ TEST_CASE("kll sketch", "[kll_sketch]") {
601
583
  }
602
584
 
603
585
  SECTION("merge min value from other") {
604
- kll_float_sketch sketch1(200, 0);
605
- kll_float_sketch sketch2(200, 0);
586
+ kll_float_sketch sketch1(200, std::less<float>(), 0);
587
+ kll_float_sketch sketch2(200, std::less<float>(), 0);
606
588
  sketch1.update(1.0f);
607
589
  sketch2.update(2.0f);
608
590
  sketch2.merge(sketch1);
609
- REQUIRE(sketch2.get_min_value() == 1.0f);
610
- REQUIRE(sketch2.get_max_value() == 2.0f);
591
+ REQUIRE(sketch2.get_min_item() == 1.0f);
592
+ REQUIRE(sketch2.get_max_item() == 2.0f);
611
593
  }
612
594
 
613
595
  SECTION("merge min and max values from other") {
614
- kll_float_sketch sketch1(200, 0);
596
+ kll_float_sketch sketch1(200, std::less<float>(), 0);
615
597
  for (int i = 0; i < 1000000; i++) sketch1.update(static_cast<float>(i));
616
- kll_float_sketch sketch2(200, 0);
598
+ kll_float_sketch sketch2(200, std::less<float>(), 0);
617
599
  sketch2.merge(sketch1);
618
- REQUIRE(sketch2.get_min_value() == 0.0f);
619
- REQUIRE(sketch2.get_max_value() == 999999.0f);
600
+ REQUIRE(sketch2.get_min_item() == 0.0f);
601
+ REQUIRE(sketch2.get_max_item() == 999999.0f);
620
602
  }
621
603
 
622
604
  SECTION("sketch of ints") {
623
605
  kll_sketch<int> sketch;
624
606
  REQUIRE_THROWS_AS(sketch.get_quantile(0), std::runtime_error);
625
- REQUIRE_THROWS_AS(sketch.get_min_value(), std::runtime_error);
626
- REQUIRE_THROWS_AS(sketch.get_max_value(), std::runtime_error);
607
+ REQUIRE_THROWS_AS(sketch.get_min_item(), std::runtime_error);
608
+ REQUIRE_THROWS_AS(sketch.get_max_item(), std::runtime_error);
627
609
 
628
610
  const int n = 1000;
629
611
  for (int i = 0; i < n; i++) sketch.update(i);
@@ -638,8 +620,8 @@ TEST_CASE("kll sketch", "[kll_sketch]") {
638
620
  REQUIRE(sketch2.is_estimation_mode() == sketch.is_estimation_mode());
639
621
  REQUIRE(sketch2.get_n() == sketch.get_n());
640
622
  REQUIRE(sketch2.get_num_retained() == sketch.get_num_retained());
641
- REQUIRE(sketch2.get_min_value() == sketch.get_min_value());
642
- REQUIRE(sketch2.get_max_value() == sketch.get_max_value());
623
+ REQUIRE(sketch2.get_min_item() == sketch.get_min_item());
624
+ REQUIRE(sketch2.get_max_item() == sketch.get_max_item());
643
625
  REQUIRE(sketch2.get_normalized_rank_error(false) == sketch.get_normalized_rank_error(false));
644
626
  REQUIRE(sketch2.get_normalized_rank_error(true) == sketch.get_normalized_rank_error(true));
645
627
  REQUIRE(sketch2.get_quantile(0.5) == sketch.get_quantile(0.5));
@@ -648,30 +630,30 @@ TEST_CASE("kll sketch", "[kll_sketch]") {
648
630
  }
649
631
 
650
632
  SECTION("sketch of strings stream") {
651
- kll_string_sketch sketch1(200, 0);
633
+ kll_string_sketch sketch1(200, std::less<std::string>(), 0);
652
634
  REQUIRE_THROWS_AS(sketch1.get_quantile(0), std::runtime_error);
653
- REQUIRE_THROWS_AS(sketch1.get_min_value(), std::runtime_error);
654
- REQUIRE_THROWS_AS(sketch1.get_max_value(), std::runtime_error);
635
+ REQUIRE_THROWS_AS(sketch1.get_min_item(), std::runtime_error);
636
+ REQUIRE_THROWS_AS(sketch1.get_max_item(), std::runtime_error);
655
637
  REQUIRE(sketch1.get_serialized_size_bytes() == 8);
656
638
 
657
639
  const int n = 1000;
658
640
  for (int i = 0; i < n; i++) sketch1.update(std::to_string(i));
659
641
 
660
- REQUIRE(sketch1.get_min_value() == std::string("0"));
661
- REQUIRE(sketch1.get_max_value() == std::string("999"));
642
+ REQUIRE(sketch1.get_min_item() == std::string("0"));
643
+ REQUIRE(sketch1.get_max_item() == std::string("999"));
662
644
 
663
645
  std::stringstream s(std::ios::in | std::ios::out | std::ios::binary);
664
646
  sketch1.serialize(s);
665
647
  REQUIRE(static_cast<size_t>(s.tellp()) == sketch1.get_serialized_size_bytes());
666
- auto sketch2 = kll_string_sketch::deserialize(s, test_allocator<std::string>(0));
648
+ auto sketch2 = kll_string_sketch::deserialize(s, serde<std::string>(), std::less<std::string>(), 0);
667
649
  REQUIRE(static_cast<size_t>(s.tellp()) == sketch2.get_serialized_size_bytes());
668
650
  REQUIRE(s.tellg() == s.tellp());
669
651
  REQUIRE(sketch2.is_empty() == sketch1.is_empty());
670
652
  REQUIRE(sketch2.is_estimation_mode() == sketch1.is_estimation_mode());
671
653
  REQUIRE(sketch2.get_n() == sketch1.get_n());
672
654
  REQUIRE(sketch2.get_num_retained() == sketch1.get_num_retained());
673
- REQUIRE(sketch2.get_min_value() == sketch1.get_min_value());
674
- REQUIRE(sketch2.get_max_value() == sketch1.get_max_value());
655
+ REQUIRE(sketch2.get_min_item() == sketch1.get_min_item());
656
+ REQUIRE(sketch2.get_max_item() == sketch1.get_max_item());
675
657
  REQUIRE(sketch2.get_normalized_rank_error(false) == sketch1.get_normalized_rank_error(false));
676
658
  REQUIRE(sketch2.get_normalized_rank_error(true) == sketch1.get_normalized_rank_error(true));
677
659
  REQUIRE(sketch2.get_quantile(0.5) == sketch1.get_quantile(0.5));
@@ -687,28 +669,29 @@ TEST_CASE("kll sketch", "[kll_sketch]") {
687
669
  }
688
670
 
689
671
  SECTION("sketch of strings bytes") {
690
- kll_string_sketch sketch1(200, 0);
672
+ kll_string_sketch sketch1(200, std::less<std::string>(), 0);
691
673
  REQUIRE_THROWS_AS(sketch1.get_quantile(0), std::runtime_error);
692
- REQUIRE_THROWS_AS(sketch1.get_min_value(), std::runtime_error);
693
- REQUIRE_THROWS_AS(sketch1.get_max_value(), std::runtime_error);
674
+ REQUIRE_THROWS_AS(sketch1.get_min_item(), std::runtime_error);
675
+ REQUIRE_THROWS_AS(sketch1.get_max_item(), std::runtime_error);
694
676
  REQUIRE(sketch1.get_serialized_size_bytes() == 8);
695
677
 
696
678
  const int n = 1000;
697
679
  for (int i = 0; i < n; i++) sketch1.update(std::to_string(i));
698
680
 
699
- REQUIRE(sketch1.get_min_value() == std::string("0"));
700
- REQUIRE(sketch1.get_max_value() == std::string("999"));
681
+ REQUIRE(sketch1.get_min_item() == std::string("0"));
682
+ REQUIRE(sketch1.get_max_item() == std::string("999"));
701
683
 
702
684
  auto bytes = sketch1.serialize();
703
685
  REQUIRE(bytes.size() == sketch1.get_serialized_size_bytes());
704
- auto sketch2 = kll_string_sketch::deserialize(bytes.data(), bytes.size(), serde<std::string>(), 0);
686
+ auto sketch2 = kll_string_sketch::deserialize(bytes.data(), bytes.size(), serde<std::string>(),
687
+ std::less<std::string>(), 0);
705
688
  REQUIRE(bytes.size() == sketch2.get_serialized_size_bytes());
706
689
  REQUIRE(sketch2.is_empty() == sketch1.is_empty());
707
690
  REQUIRE(sketch2.is_estimation_mode() == sketch1.is_estimation_mode());
708
691
  REQUIRE(sketch2.get_n() == sketch1.get_n());
709
692
  REQUIRE(sketch2.get_num_retained() == sketch1.get_num_retained());
710
- REQUIRE(sketch2.get_min_value() == sketch1.get_min_value());
711
- REQUIRE(sketch2.get_max_value() == sketch1.get_max_value());
693
+ REQUIRE(sketch2.get_min_item() == sketch1.get_min_item());
694
+ REQUIRE(sketch2.get_max_item() == sketch1.get_max_item());
712
695
  REQUIRE(sketch2.get_normalized_rank_error(false) == sketch1.get_normalized_rank_error(false));
713
696
  REQUIRE(sketch2.get_normalized_rank_error(true) == sketch1.get_normalized_rank_error(true));
714
697
  REQUIRE(sketch2.get_quantile(0.5) == sketch1.get_quantile(0.5));
@@ -718,11 +701,12 @@ TEST_CASE("kll sketch", "[kll_sketch]") {
718
701
 
719
702
 
720
703
  SECTION("sketch of strings, single item, bytes") {
721
- kll_string_sketch sketch1(200, 0);
704
+ kll_string_sketch sketch1(200, std::less<std::string>(), 0);
722
705
  sketch1.update("a");
723
706
  auto bytes = sketch1.serialize();
724
707
  REQUIRE(bytes.size() == sketch1.get_serialized_size_bytes());
725
- auto sketch2 = kll_string_sketch::deserialize(bytes.data(), bytes.size(), serde<std::string>(), 0);
708
+ auto sketch2 = kll_string_sketch::deserialize(bytes.data(), bytes.size(), serde<std::string>(),
709
+ std::less<std::string>(), 0);
726
710
  REQUIRE(bytes.size() == sketch2.get_serialized_size_bytes());
727
711
  }
728
712
 
@@ -753,14 +737,14 @@ TEST_CASE("kll sketch", "[kll_sketch]") {
753
737
  // move constructor
754
738
  kll_sketch<int> sketch2(std::move(sketch1));
755
739
  for (int i = 0; i < n; i++) {
756
- REQUIRE(sketch2.get_rank(i) == (double) i / n);
740
+ REQUIRE(sketch2.get_rank(i, false) == (double) i / n);
757
741
  }
758
742
 
759
743
  // move assignment
760
744
  kll_sketch<int> sketch3;
761
745
  sketch3 = std::move(sketch2);
762
746
  for (int i = 0; i < n; i++) {
763
- REQUIRE(sketch3.get_rank(i) == (double) i / n);
747
+ REQUIRE(sketch3.get_rank(i, false) == (double) i / n);
764
748
  }
765
749
  }
766
750
 
@@ -795,44 +779,24 @@ TEST_CASE("kll sketch", "[kll_sketch]") {
795
779
  kll.update(3);
796
780
  kll.update(1);
797
781
 
798
- { // non-cumulative, using operator->
799
- auto view = kll.get_sorted_view(false);
782
+ {
783
+ auto view = kll.get_sorted_view();
800
784
  REQUIRE(view.size() == 3);
801
785
  auto it = view.begin();
802
- REQUIRE(it->first == 1);
786
+ REQUIRE(it->first == 1); // operator->
787
+ REQUIRE((*it).first == 1); // operator*
803
788
  REQUIRE(it->second == 1);
789
+ REQUIRE(it.get_weight() == 1);
804
790
  ++it;
805
791
  REQUIRE(it->first == 2);
806
- REQUIRE(it->second == 1);
807
- ++it;
808
- REQUIRE(it->first == 3);
809
- REQUIRE(it->second == 1);
810
- }
811
- { // cumulative, non-inclusive, using operator->
812
- auto view = kll.get_sorted_view(true);
813
- REQUIRE(view.size() == 3);
814
- auto it = view.begin();
815
- REQUIRE(it->first == 1);
816
- REQUIRE(it->second == 0);
817
- ++it;
818
- REQUIRE(it->first == 2);
819
- REQUIRE(it->second == 1);
820
- ++it;
821
- REQUIRE(it->first == 3);
822
792
  REQUIRE(it->second == 2);
823
- }
824
- { // cumulative, inclusive, using operator*
825
- auto view = kll.get_sorted_view<true>(true);
826
- REQUIRE(view.size() == 3);
827
- auto it = view.begin();
828
- REQUIRE((*it).first == 1);
829
- REQUIRE((*it).second == 1);
793
+ REQUIRE(it.get_weight() == 1);
830
794
  ++it;
831
- REQUIRE((*it).first == 2);
832
- REQUIRE((*it).second == 2);
795
+ REQUIRE(it->first == 3);
796
+ REQUIRE(it->second == 3);
797
+ REQUIRE(it.get_weight() == 1);
833
798
  ++it;
834
- REQUIRE((*it).first == 3);
835
- REQUIRE((*it).second == 3);
799
+ REQUIRE(it == view.end());
836
800
  }
837
801
  }
838
802
 
@@ -854,8 +818,8 @@ TEST_CASE("kll sketch", "[kll_sketch]") {
854
818
  REQUIRE(kll_float.get_n() == kll_double.get_n());
855
819
  REQUIRE(kll_float.get_num_retained() == kll_double.get_num_retained());
856
820
 
857
- auto sv_float = kll_float.get_sorted_view(false);
858
- auto sv_double = kll_double.get_sorted_view(false);
821
+ auto sv_float = kll_float.get_sorted_view();
822
+ auto sv_double = kll_double.get_sorted_view();
859
823
  auto sv_float_it = sv_float.begin();
860
824
  auto sv_double_it = sv_double.begin();
861
825
  while (sv_float_it != sv_float.end()) {