datasketches 0.2.6 → 0.3.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (121) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +9 -0
  3. data/LICENSE +4 -6
  4. data/NOTICE +6 -5
  5. data/ext/datasketches/kll_wrapper.cpp +20 -20
  6. data/ext/datasketches/theta_wrapper.cpp +2 -2
  7. data/lib/datasketches/version.rb +1 -1
  8. data/vendor/datasketches-cpp/CMakeLists.txt +9 -1
  9. data/vendor/datasketches-cpp/LICENSE +4 -6
  10. data/vendor/datasketches-cpp/MANIFEST.in +21 -4
  11. data/vendor/datasketches-cpp/common/CMakeLists.txt +5 -2
  12. data/vendor/datasketches-cpp/common/include/common_defs.hpp +10 -0
  13. data/vendor/datasketches-cpp/common/include/kolmogorov_smirnov_impl.hpp +6 -6
  14. data/vendor/datasketches-cpp/common/include/memory_operations.hpp +1 -0
  15. data/vendor/datasketches-cpp/common/include/{quantile_sketch_sorted_view.hpp → quantiles_sorted_view.hpp} +60 -25
  16. data/vendor/datasketches-cpp/common/include/quantiles_sorted_view_impl.hpp +125 -0
  17. data/vendor/datasketches-cpp/common/{test/test_runner.cpp → include/version.hpp.in} +15 -8
  18. data/vendor/datasketches-cpp/common/test/CMakeLists.txt +37 -7
  19. data/vendor/datasketches-cpp/common/test/catch_runner.cpp +22 -1
  20. data/vendor/datasketches-cpp/common/test/integration_test.cpp +1 -1
  21. data/vendor/datasketches-cpp/common/test/quantiles_sorted_view_test.cpp +459 -0
  22. data/vendor/datasketches-cpp/cpc/test/CMakeLists.txt +1 -1
  23. data/vendor/datasketches-cpp/cpc/test/compression_test.cpp +1 -1
  24. data/vendor/datasketches-cpp/cpc/test/cpc_sketch_allocation_test.cpp +1 -1
  25. data/vendor/datasketches-cpp/cpc/test/cpc_sketch_test.cpp +1 -1
  26. data/vendor/datasketches-cpp/cpc/test/cpc_union_test.cpp +1 -1
  27. data/vendor/datasketches-cpp/fi/include/frequent_items_sketch.hpp +28 -44
  28. data/vendor/datasketches-cpp/fi/include/frequent_items_sketch_impl.hpp +70 -78
  29. data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map.hpp +11 -4
  30. data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map_impl.hpp +17 -10
  31. data/vendor/datasketches-cpp/fi/test/CMakeLists.txt +1 -1
  32. data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_custom_type_test.cpp +55 -42
  33. data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_test.cpp +1 -1
  34. data/vendor/datasketches-cpp/fi/test/reverse_purge_hash_map_test.cpp +4 -4
  35. data/vendor/datasketches-cpp/hll/include/Hll4Array-internal.hpp +2 -2
  36. data/vendor/datasketches-cpp/hll/test/AuxHashMapTest.cpp +1 -1
  37. data/vendor/datasketches-cpp/hll/test/CMakeLists.txt +1 -1
  38. data/vendor/datasketches-cpp/hll/test/CouponHashSetTest.cpp +1 -1
  39. data/vendor/datasketches-cpp/hll/test/CouponListTest.cpp +1 -1
  40. data/vendor/datasketches-cpp/hll/test/CrossCountingTest.cpp +1 -1
  41. data/vendor/datasketches-cpp/hll/test/HllArrayTest.cpp +1 -1
  42. data/vendor/datasketches-cpp/hll/test/HllSketchTest.cpp +1 -1
  43. data/vendor/datasketches-cpp/hll/test/HllUnionTest.cpp +1 -1
  44. data/vendor/datasketches-cpp/hll/test/IsomorphicTest.cpp +1 -1
  45. data/vendor/datasketches-cpp/hll/test/TablesTest.cpp +1 -1
  46. data/vendor/datasketches-cpp/hll/test/ToFromByteArrayTest.cpp +1 -1
  47. data/vendor/datasketches-cpp/kll/include/kll_helper.hpp +0 -32
  48. data/vendor/datasketches-cpp/kll/include/kll_sketch.hpp +176 -233
  49. data/vendor/datasketches-cpp/kll/include/kll_sketch_impl.hpp +337 -395
  50. data/vendor/datasketches-cpp/kll/test/CMakeLists.txt +1 -1
  51. data/vendor/datasketches-cpp/kll/test/kll_sketch_custom_type_test.cpp +27 -27
  52. data/vendor/datasketches-cpp/kll/test/kll_sketch_test.cpp +197 -233
  53. data/vendor/datasketches-cpp/kll/test/kll_sketch_validation.cpp +42 -32
  54. data/vendor/datasketches-cpp/kll/test/kolmogorov_smirnov_test.cpp +1 -1
  55. data/vendor/datasketches-cpp/pyproject.toml +17 -13
  56. data/vendor/datasketches-cpp/python/CMakeLists.txt +8 -1
  57. data/vendor/datasketches-cpp/python/README.md +1 -1
  58. data/vendor/datasketches-cpp/python/datasketches/PySerDe.py +104 -0
  59. data/vendor/datasketches-cpp/python/datasketches/__init__.py +22 -0
  60. data/vendor/datasketches-cpp/python/include/py_serde.hpp +113 -0
  61. data/vendor/datasketches-cpp/python/jupyter/ThetaSketchNotebook.ipynb +31 -24
  62. data/vendor/datasketches-cpp/python/pybind11Path.cmd +19 -1
  63. data/vendor/datasketches-cpp/python/src/__init__.py +17 -1
  64. data/vendor/datasketches-cpp/python/src/datasketches.cpp +9 -3
  65. data/vendor/datasketches-cpp/python/src/kll_wrapper.cpp +18 -54
  66. data/vendor/datasketches-cpp/python/src/py_serde.cpp +111 -0
  67. data/vendor/datasketches-cpp/python/src/quantiles_wrapper.cpp +17 -53
  68. data/vendor/datasketches-cpp/python/src/req_wrapper.cpp +17 -55
  69. data/vendor/datasketches-cpp/python/src/vector_of_kll.cpp +62 -67
  70. data/vendor/datasketches-cpp/python/src/vo_wrapper.cpp +47 -14
  71. data/vendor/datasketches-cpp/python/tests/__init__.py +16 -0
  72. data/vendor/datasketches-cpp/python/tests/req_test.py +1 -1
  73. data/vendor/datasketches-cpp/python/tests/vo_test.py +25 -1
  74. data/vendor/datasketches-cpp/quantiles/include/quantiles_sketch.hpp +135 -180
  75. data/vendor/datasketches-cpp/quantiles/include/quantiles_sketch_impl.hpp +205 -210
  76. data/vendor/datasketches-cpp/quantiles/test/CMakeLists.txt +1 -1
  77. data/vendor/datasketches-cpp/quantiles/test/kolmogorov_smirnov_test.cpp +1 -1
  78. data/vendor/datasketches-cpp/quantiles/test/quantiles_compatibility_test.cpp +20 -19
  79. data/vendor/datasketches-cpp/quantiles/test/quantiles_sketch_test.cpp +241 -233
  80. data/vendor/datasketches-cpp/req/include/req_compactor.hpp +15 -9
  81. data/vendor/datasketches-cpp/req/include/req_compactor_impl.hpp +35 -19
  82. data/vendor/datasketches-cpp/req/include/req_sketch.hpp +126 -147
  83. data/vendor/datasketches-cpp/req/include/req_sketch_impl.hpp +265 -245
  84. data/vendor/datasketches-cpp/req/test/CMakeLists.txt +1 -1
  85. data/vendor/datasketches-cpp/req/test/req_sketch_custom_type_test.cpp +27 -27
  86. data/vendor/datasketches-cpp/req/test/req_sketch_test.cpp +117 -104
  87. data/vendor/datasketches-cpp/sampling/include/var_opt_sketch.hpp +22 -46
  88. data/vendor/datasketches-cpp/sampling/include/var_opt_sketch_impl.hpp +180 -207
  89. data/vendor/datasketches-cpp/sampling/include/var_opt_union.hpp +18 -39
  90. data/vendor/datasketches-cpp/sampling/include/var_opt_union_impl.hpp +75 -85
  91. data/vendor/datasketches-cpp/sampling/test/CMakeLists.txt +1 -1
  92. data/vendor/datasketches-cpp/sampling/test/var_opt_allocation_test.cpp +7 -7
  93. data/vendor/datasketches-cpp/sampling/test/var_opt_sketch_test.cpp +3 -3
  94. data/vendor/datasketches-cpp/sampling/test/var_opt_union_test.cpp +5 -5
  95. data/vendor/datasketches-cpp/setup.py +14 -3
  96. data/vendor/datasketches-cpp/theta/include/theta_sketch_impl.hpp +15 -25
  97. data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base.hpp +0 -9
  98. data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base_impl.hpp +5 -5
  99. data/vendor/datasketches-cpp/theta/test/CMakeLists.txt +1 -1
  100. data/vendor/datasketches-cpp/theta/test/theta_a_not_b_test.cpp +1 -1
  101. data/vendor/datasketches-cpp/theta/test/theta_intersection_test.cpp +1 -1
  102. data/vendor/datasketches-cpp/theta/test/theta_jaccard_similarity_test.cpp +1 -1
  103. data/vendor/datasketches-cpp/theta/test/theta_setop_test.cpp +1 -1
  104. data/vendor/datasketches-cpp/theta/test/theta_sketch_test.cpp +3 -2
  105. data/vendor/datasketches-cpp/theta/test/theta_union_test.cpp +1 -1
  106. data/vendor/datasketches-cpp/tox.ini +26 -0
  107. data/vendor/datasketches-cpp/tuple/include/tuple_sketch.hpp +36 -12
  108. data/vendor/datasketches-cpp/tuple/include/tuple_sketch_impl.hpp +41 -35
  109. data/vendor/datasketches-cpp/tuple/test/CMakeLists.txt +2 -1
  110. data/vendor/datasketches-cpp/tuple/test/array_of_doubles_sketch_test.cpp +1 -1
  111. data/vendor/datasketches-cpp/tuple/test/engagement_test.cpp +299 -0
  112. data/vendor/datasketches-cpp/tuple/test/tuple_a_not_b_test.cpp +1 -1
  113. data/vendor/datasketches-cpp/tuple/test/tuple_intersection_test.cpp +1 -1
  114. data/vendor/datasketches-cpp/tuple/test/tuple_jaccard_similarity_test.cpp +1 -1
  115. data/vendor/datasketches-cpp/tuple/test/tuple_sketch_allocation_test.cpp +1 -1
  116. data/vendor/datasketches-cpp/tuple/test/tuple_sketch_test.cpp +27 -1
  117. data/vendor/datasketches-cpp/tuple/test/tuple_union_test.cpp +1 -1
  118. data/vendor/datasketches-cpp/version.cfg.in +1 -0
  119. metadata +14 -7
  120. data/vendor/datasketches-cpp/common/include/quantile_sketch_sorted_view_impl.hpp +0 -91
  121. data/vendor/datasketches-cpp/common/test/catch.hpp +0 -17618
@@ -17,7 +17,7 @@
17
17
  * under the License.
18
18
  */
19
19
 
20
- #include <catch.hpp>
20
+ #include <catch2/catch.hpp>
21
21
  #include <cmath>
22
22
  #include <cstring>
23
23
  #include <sstream>
@@ -39,9 +39,9 @@ static std::string testBinaryInputPath = "test/";
39
39
  #endif
40
40
 
41
41
  // typical usage would be just kll_sketch<float> or kll_sketch<std::string>, but here we use test_allocator
42
- using kll_float_sketch = kll_sketch<float, std::less<float>, serde<float>, test_allocator<float>>;
42
+ using kll_float_sketch = kll_sketch<float, std::less<float>, test_allocator<float>>;
43
43
  // let std::string use the default allocator for simplicity, otherwise we need to define "less" and "serde"
44
- using kll_string_sketch = kll_sketch<std::string, std::less<std::string>, serde<std::string>, test_allocator<std::string>>;
44
+ using kll_string_sketch = kll_sketch<std::string, std::less<std::string>, test_allocator<std::string>>;
45
45
 
46
46
  TEST_CASE("kll sketch", "[kll_sketch]") {
47
47
 
@@ -49,71 +49,78 @@ TEST_CASE("kll sketch", "[kll_sketch]") {
49
49
  test_allocator_total_bytes = 0;
50
50
 
51
51
  SECTION("k limits") {
52
- kll_float_sketch sketch1(kll_float_sketch::MIN_K, 0); // this should work
53
- kll_float_sketch sketch2(kll_float_sketch::MAX_K, 0); // this should work
54
- REQUIRE_THROWS_AS(new kll_float_sketch(kll_float_sketch::MIN_K - 1, 0), std::invalid_argument);
52
+ kll_float_sketch sketch1(kll_float_sketch::MIN_K, std::less<float>(), 0); // this should work
53
+ kll_float_sketch sketch2(kll_float_sketch::MAX_K, std::less<float>(), 0); // this should work
54
+ REQUIRE_THROWS_AS(new kll_float_sketch(kll_float_sketch::MIN_K - 1, std::less<float>(), 0), std::invalid_argument);
55
55
  // MAX_K + 1 makes no sense because k is uint16_t
56
+ //std::cout << "sizeof(kll_sketch<float>)=" << sizeof(kll_sketch<float>) << "\n";
57
+ //std::cout << "sizeof(kll_sketch<double>)=" << sizeof(kll_sketch<double>) << "\n";
56
58
  }
57
59
 
58
60
  SECTION("empty") {
59
- kll_float_sketch sketch(200, 0);
61
+ kll_float_sketch sketch(200, std::less<float>(), 0);
60
62
  REQUIRE(sketch.is_empty());
61
63
  REQUIRE_FALSE(sketch.is_estimation_mode());
62
64
  REQUIRE(sketch.get_n() == 0);
63
65
  REQUIRE(sketch.get_num_retained() == 0);
64
- REQUIRE(std::isnan(sketch.get_rank(0)));
65
- REQUIRE(std::isnan(sketch.get_min_value()));
66
- REQUIRE(std::isnan(sketch.get_max_value()));
67
- REQUIRE(std::isnan(sketch.get_quantile(0.5)));
68
- const double fractions[3] {0, 0.5, 1};
69
- REQUIRE(sketch.get_quantiles(fractions, 3).size() == 0);
66
+ REQUIRE_THROWS_AS(sketch.get_min_item(), std::runtime_error);
67
+ REQUIRE_THROWS_AS(sketch.get_max_item(), std::runtime_error);
68
+ REQUIRE_THROWS_AS(sketch.get_rank(0), std::runtime_error);
69
+ REQUIRE_THROWS_AS(sketch.get_quantile(0.5), std::runtime_error);
70
+ const double ranks[3] {0, 0.5, 1};
71
+ REQUIRE_THROWS_AS(sketch.get_quantiles(ranks, 3), std::runtime_error);
70
72
  const float split_points[1] {0};
71
- REQUIRE(sketch.get_PMF(split_points, 1).size() == 0);
72
- REQUIRE(sketch.get_CDF(split_points, 1).size() == 0);
73
+ REQUIRE_THROWS_AS(sketch.get_PMF(split_points, 1), std::runtime_error);
74
+ REQUIRE_THROWS_AS(sketch.get_CDF(split_points, 1), std::runtime_error);
73
75
 
74
- for (auto it: sketch) {
75
- (void) it; // to suppress "unused" warning
76
+ for (auto pair: sketch) {
77
+ unused(pair); // to suppress "unused" warning
76
78
  FAIL("should be no iterations over an empty sketch");
77
79
  }
78
80
  }
79
81
 
80
82
  SECTION("get bad quantile") {
81
- kll_float_sketch sketch(200, 0);
83
+ kll_float_sketch sketch(200, std::less<float>(), 0);
82
84
  sketch.update(0); // has to be non-empty to reach the check
83
85
  REQUIRE_THROWS_AS(sketch.get_quantile(-1), std::invalid_argument);
84
86
  }
85
87
 
86
88
  SECTION("one item") {
87
- kll_float_sketch sketch(200, 0);
89
+ kll_float_sketch sketch(200, std::less<float>(), 0);
88
90
  sketch.update(1.0f);
89
91
  REQUIRE_FALSE(sketch.is_empty());
90
92
  REQUIRE_FALSE(sketch.is_estimation_mode());
91
93
  REQUIRE(sketch.get_n() == 1);
92
94
  REQUIRE(sketch.get_num_retained() == 1);
93
- REQUIRE(sketch.get_rank(1.0f) == 0.0);
94
- REQUIRE(sketch.get_rank<true>(1.0f) == 1.0);
95
- REQUIRE(sketch.get_rank(2.0f) == 1.0);
95
+ REQUIRE(sketch.get_rank(1.0f, false) == 0.0);
96
+ REQUIRE(sketch.get_rank(1.0f) == 1.0);
97
+ REQUIRE(sketch.get_rank(2.0f, false) == 1.0);
96
98
  REQUIRE(sketch.get_rank(std::numeric_limits<float>::infinity()) == 1.0);
97
- REQUIRE(sketch.get_min_value() == 1.0);
98
- REQUIRE(sketch.get_max_value() == 1.0);
99
+ REQUIRE(sketch.get_min_item() == 1.0);
100
+ REQUIRE(sketch.get_max_item() == 1.0);
99
101
  REQUIRE(sketch.get_quantile(0.5) == 1.0);
100
- const double fractions[3] {0, 0.5, 1};
101
- auto quantiles = sketch.get_quantiles(fractions, 3);
102
+ const double ranks[3] {0, 0.5, 1};
103
+ auto quantiles = sketch.get_quantiles(ranks, 3);
102
104
  REQUIRE(quantiles.size() == 3);
103
105
  REQUIRE(quantiles[0] == 1.0);
104
106
  REQUIRE(quantiles[1] == 1.0);
105
107
  REQUIRE(quantiles[2] == 1.0);
106
108
 
107
109
  int count = 0;
108
- for (auto it: sketch) {
109
- REQUIRE(it.second == 1);
110
+ for (auto pair: sketch) {
111
+ REQUIRE(pair.second == 1);
110
112
  ++count;
111
113
  }
112
114
  REQUIRE(count == 1);
115
+
116
+ // iterator dereferencing
117
+ auto it = sketch.begin();
118
+ REQUIRE(it->first == 1.0f);
119
+ REQUIRE((*it).first == 1.0f);
113
120
  }
114
121
 
115
122
  SECTION("NaN") {
116
- kll_float_sketch sketch(200, 0);
123
+ kll_float_sketch sketch(200, std::less<float>(), 0);
117
124
  sketch.update(std::numeric_limits<float>::quiet_NaN());
118
125
  REQUIRE(sketch.is_empty());
119
126
 
@@ -123,44 +130,44 @@ TEST_CASE("kll sketch", "[kll_sketch]") {
123
130
  }
124
131
 
125
132
  SECTION("many items, exact mode") {
126
- kll_float_sketch sketch(200, 0);
133
+ kll_float_sketch sketch(200, std::less<float>(), 0);
127
134
  const uint32_t n = 200;
128
- for (uint32_t i = 0; i < n; i++) {
135
+ for (uint32_t i = 1; i <= n; i++) {
129
136
  sketch.update(static_cast<float>(i));
130
- REQUIRE(sketch.get_n() == i + 1);
137
+ REQUIRE(sketch.get_n() == i);
131
138
  }
132
139
  REQUIRE_FALSE(sketch.is_empty());
133
140
  REQUIRE_FALSE(sketch.is_estimation_mode());
134
141
  REQUIRE(sketch.get_num_retained() == n);
135
- REQUIRE(sketch.get_min_value() == 0.0);
136
- REQUIRE(sketch.get_quantile(0) == 0.0);
137
- REQUIRE(sketch.get_max_value() == n - 1);
138
- REQUIRE(sketch.get_quantile(1) == n - 1);
142
+ REQUIRE(sketch.get_min_item() == 1);
143
+ REQUIRE(sketch.get_quantile(0) == 1);
144
+ REQUIRE(sketch.get_max_item() == n);
145
+ REQUIRE(sketch.get_quantile(1) == n);
139
146
 
140
- const double fractions[3] {0, 0.5, 1};
141
- auto quantiles = sketch.get_quantiles(fractions, 3);
147
+ const double ranks[3] {0, 0.5, 1};
148
+ auto quantiles = sketch.get_quantiles(ranks, 3);
142
149
  REQUIRE(quantiles.size() == 3);
143
- REQUIRE(quantiles[0] == 0.0);
150
+ REQUIRE(quantiles[0] == 1);
144
151
  REQUIRE(quantiles[1] == n / 2);
145
- REQUIRE(quantiles[2] == n - 1 );
146
-
147
- for (uint32_t i = 0; i < n; i++) {
148
- const double true_rank = (double) i / n;
149
- REQUIRE(sketch.get_rank(static_cast<float>(i)) == true_rank);
150
- const double true_rank_inclusive = (double) (i + 1) / n;
151
- REQUIRE(sketch.get_rank<true>(static_cast<float>(i)) == true_rank_inclusive);
152
- }
152
+ REQUIRE(quantiles[2] == n);
153
153
 
154
- // the alternative method must produce the same result
154
+ // alternative method must produce the same result
155
155
  auto quantiles2 = sketch.get_quantiles(3);
156
156
  REQUIRE(quantiles2.size() == 3);
157
157
  REQUIRE(quantiles[0] == quantiles2[0]);
158
158
  REQUIRE(quantiles[1] == quantiles2[1]);
159
159
  REQUIRE(quantiles[2] == quantiles2[2]);
160
+
161
+ for (uint32_t i = 1; i <= n; i++) {
162
+ const double true_rank_inclusive = static_cast<double>(i) / n;
163
+ REQUIRE(sketch.get_rank(static_cast<float>(i)) == true_rank_inclusive);
164
+ const double true_rank_exclusive = static_cast<double>(i - 1) / n;
165
+ REQUIRE(sketch.get_rank(static_cast<float>(i), false) == true_rank_exclusive);
166
+ }
160
167
  }
161
168
 
162
169
  SECTION("10 items") {
163
- kll_float_sketch sketch(200, 0);
170
+ kll_float_sketch sketch(200, std::less<float>(), 0);
164
171
  sketch.update(1.0f);
165
172
  sketch.update(2.0f);
166
173
  sketch.update(3.0f);
@@ -172,23 +179,23 @@ TEST_CASE("kll sketch", "[kll_sketch]") {
172
179
  sketch.update(9.0f);
173
180
  sketch.update(10.0f);
174
181
  REQUIRE(sketch.get_quantile(0) == 1.0);
175
- REQUIRE(sketch.get_quantile(0.5) == 6.0);
182
+ REQUIRE(sketch.get_quantile(0.5) == 5.0);
176
183
  REQUIRE(sketch.get_quantile(0.99) == 10.0);
177
184
  REQUIRE(sketch.get_quantile(1) == 10.0);
178
185
  }
179
186
 
180
187
  SECTION("100 items") {
181
- kll_float_sketch sketch(200, 0);
188
+ kll_float_sketch sketch(200, std::less<float>(), 0);
182
189
  for (int i = 0; i < 100; ++i) sketch.update(static_cast<float>(i));
183
190
  REQUIRE(sketch.get_quantile(0) == 0);
184
- REQUIRE(sketch.get_quantile(0.01) == 1);
185
- REQUIRE(sketch.get_quantile(0.5) == 50);
186
- REQUIRE(sketch.get_quantile(0.99) == 99.0);
191
+ REQUIRE(sketch.get_quantile(0.01) == 0);
192
+ REQUIRE(sketch.get_quantile(0.5) == 49);
193
+ REQUIRE(sketch.get_quantile(0.99) == 98.0);
187
194
  REQUIRE(sketch.get_quantile(1) == 99.0);
188
195
  }
189
196
 
190
197
  SECTION("many items, estimation mode") {
191
- kll_float_sketch sketch(200, 0);
198
+ kll_float_sketch sketch(200, std::less<float>(), 0);
192
199
  const int n = 1000000;
193
200
  for (int i = 0; i < n; i++) {
194
201
  sketch.update(static_cast<float>(i));
@@ -196,87 +203,62 @@ TEST_CASE("kll sketch", "[kll_sketch]") {
196
203
  }
197
204
  REQUIRE_FALSE(sketch.is_empty());
198
205
  REQUIRE(sketch.is_estimation_mode());
199
- REQUIRE(sketch.get_min_value() == 0.0); // min value is exact
200
- REQUIRE(sketch.get_quantile(0) == 0.0); // min value is exact
201
- REQUIRE(sketch.get_max_value() == n - 1); // max value is exact
202
- REQUIRE(sketch.get_quantile(1) == n - 1); // max value is exact
206
+ REQUIRE(sketch.get_min_item() == 0.0); // min value is exact
207
+ REQUIRE(sketch.get_max_item() == n - 1); // max value is exact
203
208
 
204
209
  // test rank
205
210
  for (int i = 0; i < n; i++) {
206
211
  const double trueRank = (double) i / n;
207
- REQUIRE(sketch.get_rank(static_cast<float>(i)) == Approx(trueRank).margin(RANK_EPS_FOR_K_200));
208
- }
209
-
210
- // test quantiles at every 0.1 percentage point
211
- double fractions[1001];
212
- double reverse_fractions[1001]; // check that ordering does not matter
213
- for (int i = 0; i < 1001; i++) {
214
- fractions[i] = (double) i / 1000;
215
- reverse_fractions[1000 - i] = fractions[i];
216
- }
217
- auto quantiles = sketch.get_quantiles(fractions, 1001);
218
- auto reverse_quantiles = sketch.get_quantiles(reverse_fractions, 1001);
219
- float previous_quantile(0);
220
- for (int i = 0; i < 1001; i++) {
221
- // expensive in a loop, just to check the equivalence here, not advised for real code
222
- const float quantile = sketch.get_quantile(fractions[i]);
223
- REQUIRE(quantiles[i] == quantile);
224
- REQUIRE(reverse_quantiles[1000 - i] == quantile);
225
- REQUIRE(previous_quantile <= quantile);
226
- previous_quantile = quantile;
212
+ REQUIRE(sketch.get_rank(static_cast<float>(i), false) == Approx(trueRank).margin(RANK_EPS_FOR_K_200));
227
213
  }
228
214
 
229
215
  //std::cout << sketch.to_string();
230
216
 
231
217
  uint32_t count = 0;
232
218
  uint64_t total_weight = 0;
233
- for (auto it: sketch) {
219
+ for (auto pair: sketch) {
234
220
  ++count;
235
- total_weight += it.second;
221
+ total_weight += pair.second;
236
222
  }
237
223
  REQUIRE(count == sketch.get_num_retained());
238
224
  REQUIRE(total_weight == sketch.get_n());
239
225
  }
240
226
 
241
- SECTION("consistency between get_rank adn get_PMF/CDF") {
242
- kll_float_sketch sketch(200, 0);
243
- const int n = 1000;
227
+ SECTION("consistency between get_rank and get_PMF/CDF") {
228
+ kll_float_sketch sketch(200, std::less<float>(), 0);
229
+ const int n = 200;
244
230
  float values[n];
245
231
  for (int i = 0; i < n; i++) {
246
232
  sketch.update(static_cast<float>(i));
247
233
  values[i] = static_cast<float>(i);
248
234
  }
249
- { // inclusive=false (default)
250
- const auto ranks(sketch.get_CDF(values, n));
251
- const auto pmf(sketch.get_PMF(values, n));
235
+ { // inclusive=false
236
+ const auto ranks(sketch.get_CDF(values, n, false));
237
+ const auto pmf(sketch.get_PMF(values, n, false));
252
238
 
253
239
  double subtotal_pmf = 0;
254
240
  for (int i = 0; i < n; i++) {
255
- if (sketch.get_rank(values[i]) != ranks[i]) {
256
- std::cerr << "checking rank vs CDF for value " << i << std::endl;
257
- REQUIRE(sketch.get_rank(values[i]) == ranks[i]);
241
+ if (sketch.get_rank(values[i], false) != ranks[i]) {
242
+ FAIL("checking rank vs CDF for value " + std::to_string(i));
258
243
  }
259
244
  subtotal_pmf += pmf[i];
260
245
  if (abs(ranks[i] - subtotal_pmf) > NUMERIC_NOISE_TOLERANCE) {
261
- std::cerr << "CDF vs PMF for value " << i << std::endl;
262
- REQUIRE(ranks[i] == Approx(subtotal_pmf).margin(NUMERIC_NOISE_TOLERANCE));
246
+ FAIL("CDF vs PMF for value " + std::to_string(i));
263
247
  }
264
248
  }
265
249
  }
266
- { // inclusive=true
267
- const auto ranks(sketch.get_CDF<true>(values, n));
268
- const auto pmf(sketch.get_PMF<true>(values, n));
250
+ { // inclusive=true (default)
251
+ const auto ranks(sketch.get_CDF(values, n));
252
+ const auto pmf(sketch.get_PMF(values, n));
269
253
 
270
254
  double subtotal_pmf = 0;
271
255
  for (int i = 0; i < n; i++) {
272
- if (sketch.get_rank<true>(values[i]) != ranks[i]) {
273
- std::cerr << "checking rank vs CDF for value " << i << std::endl;
274
- REQUIRE(sketch.get_rank(values[i]) == ranks[i]);
256
+ if (sketch.get_rank(values[i]) != ranks[i]) {
257
+ FAIL("checking rank vs CDF for value " + std::to_string(i));
275
258
  }
276
259
  subtotal_pmf += pmf[i];
277
260
  if (abs(ranks[i] - subtotal_pmf) > NUMERIC_NOISE_TOLERANCE) {
278
- std::cerr << "CDF vs PMF for value " << i << std::endl;
279
- REQUIRE(ranks[i] == Approx(subtotal_pmf).margin(NUMERIC_NOISE_TOLERANCE));
261
+ FAIL("CDF vs PMF for value " + std::to_string(i));
280
262
  }
281
263
  }
282
264
  }
@@ -286,151 +268,151 @@ TEST_CASE("kll sketch", "[kll_sketch]") {
286
268
  std::ifstream is;
287
269
  is.exceptions(std::ios::failbit | std::ios::badbit);
288
270
  is.open(testBinaryInputPath + "kll_sketch_from_java.sk", std::ios::binary);
289
- auto sketch = kll_float_sketch::deserialize(is, test_allocator<float>(0));
271
+ auto sketch = kll_float_sketch::deserialize(is, serde<float>(), std::less<float>(), 0);
290
272
  REQUIRE_FALSE(sketch.is_empty());
291
273
  REQUIRE(sketch.is_estimation_mode());
292
274
  REQUIRE(sketch.get_n() == 1000000);
293
275
  REQUIRE(sketch.get_num_retained() == 614);
294
- REQUIRE(sketch.get_min_value() == 0.0);
295
- REQUIRE(sketch.get_max_value() == 999999.0);
276
+ REQUIRE(sketch.get_min_item() == 0.0);
277
+ REQUIRE(sketch.get_max_item() == 999999.0);
296
278
  }
297
279
 
298
280
  SECTION("stream serialize deserialize empty") {
299
- kll_float_sketch sketch(200, 0);
281
+ kll_float_sketch sketch(200, std::less<float>(), 0);
300
282
  std::stringstream s(std::ios::in | std::ios::out | std::ios::binary);
301
283
  sketch.serialize(s);
302
284
  REQUIRE(static_cast<size_t>(s.tellp()) == sketch.get_serialized_size_bytes());
303
- auto sketch2 = kll_float_sketch::deserialize(s, test_allocator<float>(0));
285
+ auto sketch2 = kll_float_sketch::deserialize(s, serde<float>(), std::less<float>(), 0);
304
286
  REQUIRE(static_cast<size_t>(s.tellp()) == sketch2.get_serialized_size_bytes());
305
287
  REQUIRE(s.tellg() == s.tellp());
306
288
  REQUIRE(sketch2.is_empty() == sketch.is_empty());
307
289
  REQUIRE(sketch2.is_estimation_mode() == sketch.is_estimation_mode());
308
290
  REQUIRE(sketch2.get_n() == sketch.get_n());
309
291
  REQUIRE(sketch2.get_num_retained() == sketch.get_num_retained());
310
- REQUIRE(std::isnan(sketch2.get_min_value()));
311
- REQUIRE(std::isnan(sketch2.get_max_value()));
292
+ REQUIRE_THROWS_AS(sketch2.get_min_item(), std::runtime_error);
293
+ REQUIRE_THROWS_AS(sketch2.get_max_item(), std::runtime_error);
312
294
  REQUIRE(sketch2.get_normalized_rank_error(false) == sketch.get_normalized_rank_error(false));
313
295
  REQUIRE(sketch2.get_normalized_rank_error(true) == sketch.get_normalized_rank_error(true));
314
296
  }
315
297
 
316
298
  SECTION("bytes serialize deserialize empty") {
317
- kll_float_sketch sketch(200, 0);
299
+ kll_float_sketch sketch(200, std::less<float>(), 0);
318
300
  auto bytes = sketch.serialize();
319
- auto sketch2 = kll_float_sketch::deserialize(bytes.data(), bytes.size(), serde<float>(), 0);
301
+ auto sketch2 = kll_float_sketch::deserialize(bytes.data(), bytes.size(), serde<float>(), std::less<float>(), 0);
320
302
  REQUIRE(bytes.size() == sketch.get_serialized_size_bytes());
321
303
  REQUIRE(sketch2.is_empty() == sketch.is_empty());
322
304
  REQUIRE(sketch2.is_estimation_mode() == sketch.is_estimation_mode());
323
305
  REQUIRE(sketch2.get_n() == sketch.get_n());
324
306
  REQUIRE(sketch2.get_num_retained() == sketch.get_num_retained());
325
- REQUIRE(std::isnan(sketch2.get_min_value()));
326
- REQUIRE(std::isnan(sketch2.get_max_value()));
307
+ REQUIRE_THROWS_AS(sketch2.get_min_item(), std::runtime_error);
308
+ REQUIRE_THROWS_AS(sketch2.get_max_item(), std::runtime_error);
327
309
  REQUIRE(sketch2.get_normalized_rank_error(false) == sketch.get_normalized_rank_error(false));
328
310
  REQUIRE(sketch2.get_normalized_rank_error(true) == sketch.get_normalized_rank_error(true));
329
311
  }
330
312
 
331
313
  SECTION("stream serialize deserialize one item") {
332
- kll_float_sketch sketch(200, 0);
314
+ kll_float_sketch sketch(200, std::less<float>(), 0);
333
315
  sketch.update(1.0f);
334
316
  std::stringstream s(std::ios::in | std::ios::out | std::ios::binary);
335
317
  sketch.serialize(s);
336
318
  REQUIRE(static_cast<size_t>(s.tellp()) == sketch.get_serialized_size_bytes());
337
- auto sketch2 = kll_float_sketch::deserialize(s, serde<float>(), 0);
319
+ auto sketch2 = kll_float_sketch::deserialize(s, serde<float>(), std::less<float>(), 0);
338
320
  REQUIRE(static_cast<size_t>(s.tellp()) == sketch2.get_serialized_size_bytes());
339
321
  REQUIRE(s.tellg() == s.tellp());
340
322
  REQUIRE_FALSE(sketch2.is_empty());
341
323
  REQUIRE_FALSE(sketch2.is_estimation_mode());
342
324
  REQUIRE(sketch2.get_n() == 1);
343
325
  REQUIRE(sketch2.get_num_retained() == 1);
344
- REQUIRE(sketch2.get_min_value() == 1.0);
345
- REQUIRE(sketch2.get_max_value() == 1.0);
326
+ REQUIRE(sketch2.get_min_item() == 1.0);
327
+ REQUIRE(sketch2.get_max_item() == 1.0);
346
328
  REQUIRE(sketch2.get_quantile(0.5) == 1.0);
347
- REQUIRE(sketch2.get_rank(1) == 0.0);
348
- REQUIRE(sketch2.get_rank(2) == 1.0);
329
+ REQUIRE(sketch2.get_rank(1, false) == 0.0);
330
+ REQUIRE(sketch2.get_rank(2, false) == 1.0);
349
331
  }
350
332
 
351
333
  SECTION("bytes serialize deserialize one item") {
352
- kll_float_sketch sketch(200, 0);
334
+ kll_float_sketch sketch(200, std::less<float>(), 0);
353
335
  sketch.update(1.0f);
354
336
  auto bytes = sketch.serialize();
355
337
  REQUIRE(bytes.size() == sketch.get_serialized_size_bytes());
356
- auto sketch2 = kll_float_sketch::deserialize(bytes.data(), bytes.size(), serde<float>(), 0);
338
+ auto sketch2 = kll_float_sketch::deserialize(bytes.data(), bytes.size(), serde<float>(), std::less<float>(), 0);
357
339
  REQUIRE(bytes.size() == sketch2.get_serialized_size_bytes());
358
340
  REQUIRE_FALSE(sketch2.is_empty());
359
341
  REQUIRE_FALSE(sketch2.is_estimation_mode());
360
342
  REQUIRE(sketch2.get_n() == 1);
361
343
  REQUIRE(sketch2.get_num_retained() == 1);
362
- REQUIRE(sketch2.get_min_value() == 1.0);
363
- REQUIRE(sketch2.get_max_value() == 1.0);
344
+ REQUIRE(sketch2.get_min_item() == 1.0);
345
+ REQUIRE(sketch2.get_max_item() == 1.0);
364
346
  REQUIRE(sketch2.get_quantile(0.5) == 1.0);
365
- REQUIRE(sketch2.get_rank(1) == 0.0);
366
- REQUIRE(sketch2.get_rank(2) == 1.0);
347
+ REQUIRE(sketch2.get_rank(1, false) == 0.0);
348
+ REQUIRE(sketch2.get_rank(2, false) == 1.0);
367
349
  }
368
350
 
369
351
  SECTION("deserialize one item v1") {
370
352
  std::ifstream is;
371
353
  is.exceptions(std::ios::failbit | std::ios::badbit);
372
354
  is.open(testBinaryInputPath + "kll_sketch_float_one_item_v1.sk", std::ios::binary);
373
- auto sketch = kll_float_sketch::deserialize(is, serde<float>(), 0);
355
+ auto sketch = kll_float_sketch::deserialize(is, serde<float>(), std::less<float>(), 0);
374
356
  REQUIRE_FALSE(sketch.is_empty());
375
357
  REQUIRE_FALSE(sketch.is_estimation_mode());
376
358
  REQUIRE(sketch.get_n() == 1);
377
359
  REQUIRE(sketch.get_num_retained() == 1);
378
- REQUIRE(sketch.get_min_value() == 1.0);
379
- REQUIRE(sketch.get_max_value() == 1.0);
360
+ REQUIRE(sketch.get_min_item() == 1.0);
361
+ REQUIRE(sketch.get_max_item() == 1.0);
380
362
  }
381
363
 
382
364
  SECTION("stream serialize deserialize three items") {
383
- kll_float_sketch sketch(200, 0);
365
+ kll_float_sketch sketch(200, std::less<float>(), 0);
384
366
  sketch.update(1.0f);
385
367
  sketch.update(2.0f);
386
368
  sketch.update(3.0f);
387
369
  std::stringstream s(std::ios::in | std::ios::out | std::ios::binary);
388
370
  sketch.serialize(s);
389
371
  REQUIRE(static_cast<size_t>(s.tellp()) == sketch.get_serialized_size_bytes());
390
- auto sketch2 = kll_float_sketch::deserialize(s, serde<float>(), 0);
372
+ auto sketch2 = kll_float_sketch::deserialize(s, serde<float>(), std::less<float>(), 0);
391
373
  REQUIRE(static_cast<size_t>(s.tellp()) == sketch2.get_serialized_size_bytes());
392
374
  REQUIRE(s.tellg() == s.tellp());
393
375
  REQUIRE_FALSE(sketch2.is_empty());
394
376
  REQUIRE_FALSE(sketch2.is_estimation_mode());
395
377
  REQUIRE(sketch2.get_n() == 3);
396
378
  REQUIRE(sketch2.get_num_retained() == 3);
397
- REQUIRE(sketch2.get_min_value() == 1.0);
398
- REQUIRE(sketch2.get_max_value() == 3.0);
379
+ REQUIRE(sketch2.get_min_item() == 1.0);
380
+ REQUIRE(sketch2.get_max_item() == 3.0);
399
381
  }
400
382
 
401
383
  SECTION("bytes serialize deserialize three items") {
402
- kll_float_sketch sketch(200, 0);
384
+ kll_float_sketch sketch(200, std::less<float>(), 0);
403
385
  sketch.update(1.0f);
404
386
  sketch.update(2.0f);
405
387
  sketch.update(3.0f);
406
388
  auto bytes = sketch.serialize();
407
389
  REQUIRE(bytes.size() == sketch.get_serialized_size_bytes());
408
- auto sketch2 = kll_float_sketch::deserialize(bytes.data(), bytes.size(), serde<float>(), 0);
390
+ auto sketch2 = kll_float_sketch::deserialize(bytes.data(), bytes.size(), serde<float>(), std::less<float>(), 0);
409
391
  REQUIRE(bytes.size() == sketch2.get_serialized_size_bytes());
410
392
  REQUIRE_FALSE(sketch2.is_empty());
411
393
  REQUIRE_FALSE(sketch2.is_estimation_mode());
412
394
  REQUIRE(sketch2.get_n() == 3);
413
395
  REQUIRE(sketch2.get_num_retained() == 3);
414
- REQUIRE(sketch2.get_min_value() == 1.0);
415
- REQUIRE(sketch2.get_max_value() == 3.0);
396
+ REQUIRE(sketch2.get_min_item() == 1.0);
397
+ REQUIRE(sketch2.get_max_item() == 3.0);
416
398
  }
417
399
 
418
400
  SECTION("stream serialize deserialize many floats") {
419
- kll_float_sketch sketch(200, 0);
401
+ kll_float_sketch sketch(200, std::less<float>(), 0);
420
402
  const int n = 1000;
421
403
  for (int i = 0; i < n; i++) sketch.update(static_cast<float>(i));
422
404
  std::stringstream s(std::ios::in | std::ios::out | std::ios::binary);
423
405
  sketch.serialize(s);
424
406
  REQUIRE(static_cast<size_t>(s.tellp()) == sketch.get_serialized_size_bytes());
425
- auto sketch2 = kll_float_sketch::deserialize(s, serde<float>(), 0);
407
+ auto sketch2 = kll_float_sketch::deserialize(s, serde<float>(), std::less<float>(), 0);
426
408
  REQUIRE(static_cast<size_t>(s.tellp()) == sketch2.get_serialized_size_bytes());
427
409
  REQUIRE(s.tellg() == s.tellp());
428
410
  REQUIRE(sketch2.is_empty() == sketch.is_empty());
429
411
  REQUIRE(sketch2.is_estimation_mode() == sketch.is_estimation_mode());
430
412
  REQUIRE(sketch2.get_n() == sketch.get_n());
431
413
  REQUIRE(sketch2.get_num_retained() == sketch.get_num_retained());
432
- REQUIRE(sketch2.get_min_value() == sketch.get_min_value());
433
- REQUIRE(sketch2.get_max_value() == sketch.get_max_value());
414
+ REQUIRE(sketch2.get_min_item() == sketch.get_min_item());
415
+ REQUIRE(sketch2.get_max_item() == sketch.get_max_item());
434
416
  REQUIRE(sketch2.get_normalized_rank_error(false) == sketch.get_normalized_rank_error(false));
435
417
  REQUIRE(sketch2.get_normalized_rank_error(true) == sketch.get_normalized_rank_error(true));
436
418
  REQUIRE(sketch2.get_quantile(0.5) == sketch.get_quantile(0.5));
@@ -439,27 +421,27 @@ TEST_CASE("kll sketch", "[kll_sketch]") {
439
421
  }
440
422
 
441
423
  SECTION("bytes serialize deserialize many floats") {
442
- kll_float_sketch sketch(200, 0);
424
+ kll_float_sketch sketch(200, std::less<float>(), 0);
443
425
  const int n = 1000;
444
426
  for (int i = 0; i < n; i++) sketch.update(static_cast<float>(i));
445
427
  auto bytes = sketch.serialize();
446
428
  REQUIRE(bytes.size() == sketch.get_serialized_size_bytes());
447
- auto sketch2 = kll_float_sketch::deserialize(bytes.data(), bytes.size(), serde<float>(), 0);
429
+ auto sketch2 = kll_float_sketch::deserialize(bytes.data(), bytes.size(), serde<float>(), std::less<float>(), 0);
448
430
  REQUIRE(bytes.size() == sketch2.get_serialized_size_bytes());
449
431
  REQUIRE(sketch2.is_empty() == sketch.is_empty());
450
432
  REQUIRE(sketch2.is_estimation_mode() == sketch.is_estimation_mode());
451
433
  REQUIRE(sketch2.get_n() == sketch.get_n());
452
434
  REQUIRE(sketch2.get_num_retained() == sketch.get_num_retained());
453
- REQUIRE(sketch2.get_min_value() == sketch.get_min_value());
454
- REQUIRE(sketch2.get_max_value() == sketch.get_max_value());
435
+ REQUIRE(sketch2.get_min_item() == sketch.get_min_item());
436
+ REQUIRE(sketch2.get_max_item() == sketch.get_max_item());
455
437
  REQUIRE(sketch2.get_normalized_rank_error(false) == sketch.get_normalized_rank_error(false));
456
438
  REQUIRE(sketch2.get_normalized_rank_error(true) == sketch.get_normalized_rank_error(true));
457
439
  REQUIRE(sketch2.get_quantile(0.5) == sketch.get_quantile(0.5));
458
440
  REQUIRE(sketch2.get_rank(0) == sketch.get_rank(0));
459
441
  REQUIRE(sketch2.get_rank(static_cast<float>(n)) == sketch.get_rank(static_cast<float>(n)));
460
- REQUIRE_THROWS_AS(kll_sketch<int>::deserialize(bytes.data(), 7), std::out_of_range);
461
- REQUIRE_THROWS_AS(kll_sketch<int>::deserialize(bytes.data(), 15), std::out_of_range);
462
- REQUIRE_THROWS_AS(kll_sketch<int>::deserialize(bytes.data(), bytes.size() - 1), std::out_of_range);
442
+ REQUIRE_THROWS_AS(kll_float_sketch::deserialize(bytes.data(), 7, serde<float>(), std::less<float>(), 0), std::out_of_range);
443
+ REQUIRE_THROWS_AS(kll_float_sketch::deserialize(bytes.data(), 15, serde<float>(), std::less<float>(), 0), std::out_of_range);
444
+ REQUIRE_THROWS_AS(kll_float_sketch::deserialize(bytes.data(), bytes.size() - 1, serde<float>(), std::less<float>(), 0), std::out_of_range);
463
445
  }
464
446
 
465
447
  SECTION("bytes serialize deserialize many ints") {
@@ -474,8 +456,8 @@ TEST_CASE("kll sketch", "[kll_sketch]") {
474
456
  REQUIRE(sketch2.is_estimation_mode() == sketch.is_estimation_mode());
475
457
  REQUIRE(sketch2.get_n() == sketch.get_n());
476
458
  REQUIRE(sketch2.get_num_retained() == sketch.get_num_retained());
477
- REQUIRE(sketch2.get_min_value() == sketch.get_min_value());
478
- REQUIRE(sketch2.get_max_value() == sketch.get_max_value());
459
+ REQUIRE(sketch2.get_min_item() == sketch.get_min_item());
460
+ REQUIRE(sketch2.get_max_item() == sketch.get_max_item());
479
461
  REQUIRE(sketch2.get_normalized_rank_error(false) == sketch.get_normalized_rank_error(false));
480
462
  REQUIRE(sketch2.get_normalized_rank_error(true) == sketch.get_normalized_rank_error(true));
481
463
  REQUIRE(sketch2.get_quantile(0.5) == sketch.get_quantile(0.5));
@@ -499,7 +481,7 @@ TEST_CASE("kll sketch", "[kll_sketch]") {
499
481
  }
500
482
 
501
483
  SECTION("out of order split points, float") {
502
- kll_float_sketch sketch(200, 0);
484
+ kll_float_sketch sketch(200, std::less<float>(), 0);
503
485
  sketch.update(0); // has too be non-empty to reach the check
504
486
  float split_points[2] = {1, 0};
505
487
  REQUIRE_THROWS_AS(sketch.get_CDF(split_points, 2), std::invalid_argument);
@@ -513,48 +495,48 @@ TEST_CASE("kll sketch", "[kll_sketch]") {
513
495
  }
514
496
 
515
497
  SECTION("NaN split point") {
516
- kll_float_sketch sketch(200, 0);
498
+ kll_float_sketch sketch(200, std::less<float>(), 0);
517
499
  sketch.update(0); // has too be non-empty to reach the check
518
500
  float split_points[1] = {std::numeric_limits<float>::quiet_NaN()};
519
501
  REQUIRE_THROWS_AS(sketch.get_CDF(split_points, 1), std::invalid_argument);
520
502
  }
521
503
 
522
504
  SECTION("merge") {
523
- kll_float_sketch sketch1(200, 0);
524
- kll_float_sketch sketch2(200, 0);
505
+ kll_float_sketch sketch1(200, std::less<float>(), 0);
506
+ kll_float_sketch sketch2(200, std::less<float>(), 0);
525
507
  const int n = 10000;
526
508
  for (int i = 0; i < n; i++) {
527
509
  sketch1.update(static_cast<float>(i));
528
510
  sketch2.update(static_cast<float>((2 * n) - i - 1));
529
511
  }
530
512
 
531
- REQUIRE(sketch1.get_min_value() == 0.0f);
532
- REQUIRE(sketch1.get_max_value() == n - 1);
533
- REQUIRE(sketch2.get_min_value() == n);
534
- REQUIRE(sketch2.get_max_value() == 2.0f * n - 1);
513
+ REQUIRE(sketch1.get_min_item() == 0.0f);
514
+ REQUIRE(sketch1.get_max_item() == n - 1);
515
+ REQUIRE(sketch2.get_min_item() == n);
516
+ REQUIRE(sketch2.get_max_item() == 2.0f * n - 1);
535
517
 
536
518
  sketch1.merge(sketch2);
537
519
 
538
520
  REQUIRE_FALSE(sketch1.is_empty());
539
521
  REQUIRE(sketch1.get_n() == 2 * n);
540
- REQUIRE(sketch1.get_min_value() == 0.0f);
541
- REQUIRE(sketch1.get_max_value() == 2.0f * n - 1);
522
+ REQUIRE(sketch1.get_min_item() == 0.0f);
523
+ REQUIRE(sketch1.get_max_item() == 2.0f * n - 1);
542
524
  REQUIRE(sketch1.get_quantile(0.5) == Approx(n).margin(n * RANK_EPS_FOR_K_200));
543
525
  }
544
526
 
545
527
  SECTION("merge lower k") {
546
- kll_float_sketch sketch1(256, 0);
547
- kll_float_sketch sketch2(128, 0);
528
+ kll_float_sketch sketch1(256, std::less<float>(), 0);
529
+ kll_float_sketch sketch2(128, std::less<float>(), 0);
548
530
  const int n = 10000;
549
531
  for (int i = 0; i < n; i++) {
550
532
  sketch1.update(static_cast<float>(i));
551
533
  sketch2.update(static_cast<float>((2 * n) - i - 1));
552
534
  }
553
535
 
554
- REQUIRE(sketch1.get_min_value() == 0.0f);
555
- REQUIRE(sketch1.get_max_value() == n - 1);
556
- REQUIRE(sketch2.get_min_value() == n);
557
- REQUIRE(sketch2.get_max_value() == 2.0f * n - 1);
536
+ REQUIRE(sketch1.get_min_item() == 0.0f);
537
+ REQUIRE(sketch1.get_max_item() == n - 1);
538
+ REQUIRE(sketch2.get_min_item() == n);
539
+ REQUIRE(sketch2.get_max_item() == 2.0f * n - 1);
558
540
 
559
541
  REQUIRE(sketch1.get_k() == 256);
560
542
  REQUIRE(sketch2.get_k() == 128);
@@ -570,14 +552,14 @@ TEST_CASE("kll sketch", "[kll_sketch]") {
570
552
 
571
553
  REQUIRE_FALSE(sketch1.is_empty());
572
554
  REQUIRE(sketch1.get_n() == 2 * n);
573
- REQUIRE(sketch1.get_min_value() == 0.0f);
574
- REQUIRE(sketch1.get_max_value() == 2.0f * n - 1);
555
+ REQUIRE(sketch1.get_min_item() == 0.0f);
556
+ REQUIRE(sketch1.get_max_item() == 2.0f * n - 1);
575
557
  REQUIRE(sketch1.get_quantile(0.5) == Approx(n).margin(n * RANK_EPS_FOR_K_200));
576
558
  }
577
559
 
578
560
  SECTION("merge exact mode, lower k") {
579
- kll_float_sketch sketch1(256, 0);
580
- kll_float_sketch sketch2(128, 0);
561
+ kll_float_sketch sketch1(256, std::less<float>(), 0);
562
+ kll_float_sketch sketch2(128, std::less<float>(), 0);
581
563
  const int n = 10000;
582
564
  for (int i = 0; i < n; i++) {
583
565
  sketch1.update(static_cast<float>(i));
@@ -590,8 +572,8 @@ TEST_CASE("kll sketch", "[kll_sketch]") {
590
572
 
591
573
  REQUIRE_FALSE(sketch1.is_empty());
592
574
  REQUIRE(sketch1.get_n() == n);
593
- REQUIRE(sketch1.get_min_value() == 0.0f);
594
- REQUIRE(sketch1.get_max_value() == n - 1);
575
+ REQUIRE(sketch1.get_min_item() == 0.0f);
576
+ REQUIRE(sketch1.get_max_item() == n - 1);
595
577
  REQUIRE(sketch1.get_quantile(0.5) == Approx(n / 2).margin(n / 2 * RANK_EPS_FOR_K_200));
596
578
 
597
579
  sketch2.update(0);
@@ -601,29 +583,29 @@ TEST_CASE("kll sketch", "[kll_sketch]") {
601
583
  }
602
584
 
603
585
  SECTION("merge min value from other") {
604
- kll_float_sketch sketch1(200, 0);
605
- kll_float_sketch sketch2(200, 0);
586
+ kll_float_sketch sketch1(200, std::less<float>(), 0);
587
+ kll_float_sketch sketch2(200, std::less<float>(), 0);
606
588
  sketch1.update(1.0f);
607
589
  sketch2.update(2.0f);
608
590
  sketch2.merge(sketch1);
609
- REQUIRE(sketch2.get_min_value() == 1.0f);
610
- REQUIRE(sketch2.get_max_value() == 2.0f);
591
+ REQUIRE(sketch2.get_min_item() == 1.0f);
592
+ REQUIRE(sketch2.get_max_item() == 2.0f);
611
593
  }
612
594
 
613
595
  SECTION("merge min and max values from other") {
614
- kll_float_sketch sketch1(200, 0);
596
+ kll_float_sketch sketch1(200, std::less<float>(), 0);
615
597
  for (int i = 0; i < 1000000; i++) sketch1.update(static_cast<float>(i));
616
- kll_float_sketch sketch2(200, 0);
598
+ kll_float_sketch sketch2(200, std::less<float>(), 0);
617
599
  sketch2.merge(sketch1);
618
- REQUIRE(sketch2.get_min_value() == 0.0f);
619
- REQUIRE(sketch2.get_max_value() == 999999.0f);
600
+ REQUIRE(sketch2.get_min_item() == 0.0f);
601
+ REQUIRE(sketch2.get_max_item() == 999999.0f);
620
602
  }
621
603
 
622
604
  SECTION("sketch of ints") {
623
605
  kll_sketch<int> sketch;
624
606
  REQUIRE_THROWS_AS(sketch.get_quantile(0), std::runtime_error);
625
- REQUIRE_THROWS_AS(sketch.get_min_value(), std::runtime_error);
626
- REQUIRE_THROWS_AS(sketch.get_max_value(), std::runtime_error);
607
+ REQUIRE_THROWS_AS(sketch.get_min_item(), std::runtime_error);
608
+ REQUIRE_THROWS_AS(sketch.get_max_item(), std::runtime_error);
627
609
 
628
610
  const int n = 1000;
629
611
  for (int i = 0; i < n; i++) sketch.update(i);
@@ -638,8 +620,8 @@ TEST_CASE("kll sketch", "[kll_sketch]") {
638
620
  REQUIRE(sketch2.is_estimation_mode() == sketch.is_estimation_mode());
639
621
  REQUIRE(sketch2.get_n() == sketch.get_n());
640
622
  REQUIRE(sketch2.get_num_retained() == sketch.get_num_retained());
641
- REQUIRE(sketch2.get_min_value() == sketch.get_min_value());
642
- REQUIRE(sketch2.get_max_value() == sketch.get_max_value());
623
+ REQUIRE(sketch2.get_min_item() == sketch.get_min_item());
624
+ REQUIRE(sketch2.get_max_item() == sketch.get_max_item());
643
625
  REQUIRE(sketch2.get_normalized_rank_error(false) == sketch.get_normalized_rank_error(false));
644
626
  REQUIRE(sketch2.get_normalized_rank_error(true) == sketch.get_normalized_rank_error(true));
645
627
  REQUIRE(sketch2.get_quantile(0.5) == sketch.get_quantile(0.5));
@@ -648,30 +630,30 @@ TEST_CASE("kll sketch", "[kll_sketch]") {
648
630
  }
649
631
 
650
632
  SECTION("sketch of strings stream") {
651
- kll_string_sketch sketch1(200, 0);
633
+ kll_string_sketch sketch1(200, std::less<std::string>(), 0);
652
634
  REQUIRE_THROWS_AS(sketch1.get_quantile(0), std::runtime_error);
653
- REQUIRE_THROWS_AS(sketch1.get_min_value(), std::runtime_error);
654
- REQUIRE_THROWS_AS(sketch1.get_max_value(), std::runtime_error);
635
+ REQUIRE_THROWS_AS(sketch1.get_min_item(), std::runtime_error);
636
+ REQUIRE_THROWS_AS(sketch1.get_max_item(), std::runtime_error);
655
637
  REQUIRE(sketch1.get_serialized_size_bytes() == 8);
656
638
 
657
639
  const int n = 1000;
658
640
  for (int i = 0; i < n; i++) sketch1.update(std::to_string(i));
659
641
 
660
- REQUIRE(sketch1.get_min_value() == std::string("0"));
661
- REQUIRE(sketch1.get_max_value() == std::string("999"));
642
+ REQUIRE(sketch1.get_min_item() == std::string("0"));
643
+ REQUIRE(sketch1.get_max_item() == std::string("999"));
662
644
 
663
645
  std::stringstream s(std::ios::in | std::ios::out | std::ios::binary);
664
646
  sketch1.serialize(s);
665
647
  REQUIRE(static_cast<size_t>(s.tellp()) == sketch1.get_serialized_size_bytes());
666
- auto sketch2 = kll_string_sketch::deserialize(s, test_allocator<std::string>(0));
648
+ auto sketch2 = kll_string_sketch::deserialize(s, serde<std::string>(), std::less<std::string>(), 0);
667
649
  REQUIRE(static_cast<size_t>(s.tellp()) == sketch2.get_serialized_size_bytes());
668
650
  REQUIRE(s.tellg() == s.tellp());
669
651
  REQUIRE(sketch2.is_empty() == sketch1.is_empty());
670
652
  REQUIRE(sketch2.is_estimation_mode() == sketch1.is_estimation_mode());
671
653
  REQUIRE(sketch2.get_n() == sketch1.get_n());
672
654
  REQUIRE(sketch2.get_num_retained() == sketch1.get_num_retained());
673
- REQUIRE(sketch2.get_min_value() == sketch1.get_min_value());
674
- REQUIRE(sketch2.get_max_value() == sketch1.get_max_value());
655
+ REQUIRE(sketch2.get_min_item() == sketch1.get_min_item());
656
+ REQUIRE(sketch2.get_max_item() == sketch1.get_max_item());
675
657
  REQUIRE(sketch2.get_normalized_rank_error(false) == sketch1.get_normalized_rank_error(false));
676
658
  REQUIRE(sketch2.get_normalized_rank_error(true) == sketch1.get_normalized_rank_error(true));
677
659
  REQUIRE(sketch2.get_quantile(0.5) == sketch1.get_quantile(0.5));
@@ -687,28 +669,29 @@ TEST_CASE("kll sketch", "[kll_sketch]") {
687
669
  }
688
670
 
689
671
  SECTION("sketch of strings bytes") {
690
- kll_string_sketch sketch1(200, 0);
672
+ kll_string_sketch sketch1(200, std::less<std::string>(), 0);
691
673
  REQUIRE_THROWS_AS(sketch1.get_quantile(0), std::runtime_error);
692
- REQUIRE_THROWS_AS(sketch1.get_min_value(), std::runtime_error);
693
- REQUIRE_THROWS_AS(sketch1.get_max_value(), std::runtime_error);
674
+ REQUIRE_THROWS_AS(sketch1.get_min_item(), std::runtime_error);
675
+ REQUIRE_THROWS_AS(sketch1.get_max_item(), std::runtime_error);
694
676
  REQUIRE(sketch1.get_serialized_size_bytes() == 8);
695
677
 
696
678
  const int n = 1000;
697
679
  for (int i = 0; i < n; i++) sketch1.update(std::to_string(i));
698
680
 
699
- REQUIRE(sketch1.get_min_value() == std::string("0"));
700
- REQUIRE(sketch1.get_max_value() == std::string("999"));
681
+ REQUIRE(sketch1.get_min_item() == std::string("0"));
682
+ REQUIRE(sketch1.get_max_item() == std::string("999"));
701
683
 
702
684
  auto bytes = sketch1.serialize();
703
685
  REQUIRE(bytes.size() == sketch1.get_serialized_size_bytes());
704
- auto sketch2 = kll_string_sketch::deserialize(bytes.data(), bytes.size(), serde<std::string>(), 0);
686
+ auto sketch2 = kll_string_sketch::deserialize(bytes.data(), bytes.size(), serde<std::string>(),
687
+ std::less<std::string>(), 0);
705
688
  REQUIRE(bytes.size() == sketch2.get_serialized_size_bytes());
706
689
  REQUIRE(sketch2.is_empty() == sketch1.is_empty());
707
690
  REQUIRE(sketch2.is_estimation_mode() == sketch1.is_estimation_mode());
708
691
  REQUIRE(sketch2.get_n() == sketch1.get_n());
709
692
  REQUIRE(sketch2.get_num_retained() == sketch1.get_num_retained());
710
- REQUIRE(sketch2.get_min_value() == sketch1.get_min_value());
711
- REQUIRE(sketch2.get_max_value() == sketch1.get_max_value());
693
+ REQUIRE(sketch2.get_min_item() == sketch1.get_min_item());
694
+ REQUIRE(sketch2.get_max_item() == sketch1.get_max_item());
712
695
  REQUIRE(sketch2.get_normalized_rank_error(false) == sketch1.get_normalized_rank_error(false));
713
696
  REQUIRE(sketch2.get_normalized_rank_error(true) == sketch1.get_normalized_rank_error(true));
714
697
  REQUIRE(sketch2.get_quantile(0.5) == sketch1.get_quantile(0.5));
@@ -718,11 +701,12 @@ TEST_CASE("kll sketch", "[kll_sketch]") {
718
701
 
719
702
 
720
703
  SECTION("sketch of strings, single item, bytes") {
721
- kll_string_sketch sketch1(200, 0);
704
+ kll_string_sketch sketch1(200, std::less<std::string>(), 0);
722
705
  sketch1.update("a");
723
706
  auto bytes = sketch1.serialize();
724
707
  REQUIRE(bytes.size() == sketch1.get_serialized_size_bytes());
725
- auto sketch2 = kll_string_sketch::deserialize(bytes.data(), bytes.size(), serde<std::string>(), 0);
708
+ auto sketch2 = kll_string_sketch::deserialize(bytes.data(), bytes.size(), serde<std::string>(),
709
+ std::less<std::string>(), 0);
726
710
  REQUIRE(bytes.size() == sketch2.get_serialized_size_bytes());
727
711
  }
728
712
 
@@ -753,14 +737,14 @@ TEST_CASE("kll sketch", "[kll_sketch]") {
753
737
  // move constructor
754
738
  kll_sketch<int> sketch2(std::move(sketch1));
755
739
  for (int i = 0; i < n; i++) {
756
- REQUIRE(sketch2.get_rank(i) == (double) i / n);
740
+ REQUIRE(sketch2.get_rank(i, false) == (double) i / n);
757
741
  }
758
742
 
759
743
  // move assignment
760
744
  kll_sketch<int> sketch3;
761
745
  sketch3 = std::move(sketch2);
762
746
  for (int i = 0; i < n; i++) {
763
- REQUIRE(sketch3.get_rank(i) == (double) i / n);
747
+ REQUIRE(sketch3.get_rank(i, false) == (double) i / n);
764
748
  }
765
749
  }
766
750
 
@@ -795,44 +779,24 @@ TEST_CASE("kll sketch", "[kll_sketch]") {
795
779
  kll.update(3);
796
780
  kll.update(1);
797
781
 
798
- { // non-cumulative, using operator->
799
- auto view = kll.get_sorted_view(false);
782
+ {
783
+ auto view = kll.get_sorted_view();
800
784
  REQUIRE(view.size() == 3);
801
785
  auto it = view.begin();
802
- REQUIRE(it->first == 1);
786
+ REQUIRE(it->first == 1); // operator->
787
+ REQUIRE((*it).first == 1); // operator*
803
788
  REQUIRE(it->second == 1);
789
+ REQUIRE(it.get_weight() == 1);
804
790
  ++it;
805
791
  REQUIRE(it->first == 2);
806
- REQUIRE(it->second == 1);
807
- ++it;
808
- REQUIRE(it->first == 3);
809
- REQUIRE(it->second == 1);
810
- }
811
- { // cumulative, non-inclusive, using operator->
812
- auto view = kll.get_sorted_view(true);
813
- REQUIRE(view.size() == 3);
814
- auto it = view.begin();
815
- REQUIRE(it->first == 1);
816
- REQUIRE(it->second == 0);
817
- ++it;
818
- REQUIRE(it->first == 2);
819
- REQUIRE(it->second == 1);
820
- ++it;
821
- REQUIRE(it->first == 3);
822
792
  REQUIRE(it->second == 2);
823
- }
824
- { // cumulative, inclusive, using operator*
825
- auto view = kll.get_sorted_view<true>(true);
826
- REQUIRE(view.size() == 3);
827
- auto it = view.begin();
828
- REQUIRE((*it).first == 1);
829
- REQUIRE((*it).second == 1);
793
+ REQUIRE(it.get_weight() == 1);
830
794
  ++it;
831
- REQUIRE((*it).first == 2);
832
- REQUIRE((*it).second == 2);
795
+ REQUIRE(it->first == 3);
796
+ REQUIRE(it->second == 3);
797
+ REQUIRE(it.get_weight() == 1);
833
798
  ++it;
834
- REQUIRE((*it).first == 3);
835
- REQUIRE((*it).second == 3);
799
+ REQUIRE(it == view.end());
836
800
  }
837
801
  }
838
802
 
@@ -854,8 +818,8 @@ TEST_CASE("kll sketch", "[kll_sketch]") {
854
818
  REQUIRE(kll_float.get_n() == kll_double.get_n());
855
819
  REQUIRE(kll_float.get_num_retained() == kll_double.get_num_retained());
856
820
 
857
- auto sv_float = kll_float.get_sorted_view(false);
858
- auto sv_double = kll_double.get_sorted_view(false);
821
+ auto sv_float = kll_float.get_sorted_view();
822
+ auto sv_double = kll_double.get_sorted_view();
859
823
  auto sv_float_it = sv_float.begin();
860
824
  auto sv_double_it = sv_double.begin();
861
825
  while (sv_float_it != sv_float.end()) {