datasketches 0.2.7 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +5 -0
- data/ext/datasketches/kll_wrapper.cpp +20 -20
- data/ext/datasketches/theta_wrapper.cpp +2 -2
- data/lib/datasketches/version.rb +1 -1
- data/vendor/datasketches-cpp/CMakeLists.txt +9 -1
- data/vendor/datasketches-cpp/MANIFEST.in +21 -2
- data/vendor/datasketches-cpp/common/CMakeLists.txt +5 -2
- data/vendor/datasketches-cpp/common/include/common_defs.hpp +10 -0
- data/vendor/datasketches-cpp/common/include/kolmogorov_smirnov_impl.hpp +6 -6
- data/vendor/datasketches-cpp/common/include/memory_operations.hpp +1 -0
- data/vendor/datasketches-cpp/common/include/{quantile_sketch_sorted_view.hpp → quantiles_sorted_view.hpp} +60 -25
- data/vendor/datasketches-cpp/common/include/quantiles_sorted_view_impl.hpp +125 -0
- data/vendor/datasketches-cpp/common/include/version.hpp.in +36 -0
- data/vendor/datasketches-cpp/common/test/CMakeLists.txt +25 -6
- data/vendor/datasketches-cpp/common/test/quantiles_sorted_view_test.cpp +459 -0
- data/vendor/datasketches-cpp/cpc/test/CMakeLists.txt +1 -1
- data/vendor/datasketches-cpp/fi/include/frequent_items_sketch.hpp +28 -44
- data/vendor/datasketches-cpp/fi/include/frequent_items_sketch_impl.hpp +70 -78
- data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map.hpp +11 -4
- data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map_impl.hpp +16 -9
- data/vendor/datasketches-cpp/fi/test/CMakeLists.txt +1 -1
- data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_custom_type_test.cpp +54 -41
- data/vendor/datasketches-cpp/fi/test/reverse_purge_hash_map_test.cpp +3 -3
- data/vendor/datasketches-cpp/hll/include/Hll4Array-internal.hpp +2 -2
- data/vendor/datasketches-cpp/hll/test/CMakeLists.txt +1 -1
- data/vendor/datasketches-cpp/kll/include/kll_helper.hpp +0 -32
- data/vendor/datasketches-cpp/kll/include/kll_sketch.hpp +176 -233
- data/vendor/datasketches-cpp/kll/include/kll_sketch_impl.hpp +337 -395
- data/vendor/datasketches-cpp/kll/test/CMakeLists.txt +1 -1
- data/vendor/datasketches-cpp/kll/test/kll_sketch_custom_type_test.cpp +26 -26
- data/vendor/datasketches-cpp/kll/test/kll_sketch_test.cpp +196 -232
- data/vendor/datasketches-cpp/kll/test/kll_sketch_validation.cpp +41 -31
- data/vendor/datasketches-cpp/pyproject.toml +17 -12
- data/vendor/datasketches-cpp/python/CMakeLists.txt +8 -1
- data/vendor/datasketches-cpp/python/datasketches/PySerDe.py +104 -0
- data/vendor/datasketches-cpp/python/datasketches/__init__.py +22 -0
- data/vendor/datasketches-cpp/python/include/py_serde.hpp +113 -0
- data/vendor/datasketches-cpp/python/jupyter/ThetaSketchNotebook.ipynb +31 -24
- data/vendor/datasketches-cpp/python/pybind11Path.cmd +18 -0
- data/vendor/datasketches-cpp/python/src/__init__.py +17 -1
- data/vendor/datasketches-cpp/python/src/datasketches.cpp +9 -3
- data/vendor/datasketches-cpp/python/src/kll_wrapper.cpp +18 -54
- data/vendor/datasketches-cpp/python/src/py_serde.cpp +111 -0
- data/vendor/datasketches-cpp/python/src/quantiles_wrapper.cpp +17 -53
- data/vendor/datasketches-cpp/python/src/req_wrapper.cpp +17 -55
- data/vendor/datasketches-cpp/python/src/vector_of_kll.cpp +62 -67
- data/vendor/datasketches-cpp/python/src/vo_wrapper.cpp +47 -14
- data/vendor/datasketches-cpp/python/tests/__init__.py +16 -0
- data/vendor/datasketches-cpp/python/tests/req_test.py +1 -1
- data/vendor/datasketches-cpp/python/tests/vo_test.py +25 -1
- data/vendor/datasketches-cpp/quantiles/include/quantiles_sketch.hpp +135 -180
- data/vendor/datasketches-cpp/quantiles/include/quantiles_sketch_impl.hpp +205 -210
- data/vendor/datasketches-cpp/quantiles/test/CMakeLists.txt +1 -1
- data/vendor/datasketches-cpp/quantiles/test/quantiles_compatibility_test.cpp +19 -18
- data/vendor/datasketches-cpp/quantiles/test/quantiles_sketch_test.cpp +240 -232
- data/vendor/datasketches-cpp/req/include/req_compactor.hpp +15 -9
- data/vendor/datasketches-cpp/req/include/req_compactor_impl.hpp +35 -19
- data/vendor/datasketches-cpp/req/include/req_sketch.hpp +126 -147
- data/vendor/datasketches-cpp/req/include/req_sketch_impl.hpp +265 -245
- data/vendor/datasketches-cpp/req/test/CMakeLists.txt +1 -1
- data/vendor/datasketches-cpp/req/test/req_sketch_custom_type_test.cpp +26 -26
- data/vendor/datasketches-cpp/req/test/req_sketch_test.cpp +116 -103
- data/vendor/datasketches-cpp/sampling/include/var_opt_sketch.hpp +22 -46
- data/vendor/datasketches-cpp/sampling/include/var_opt_sketch_impl.hpp +180 -207
- data/vendor/datasketches-cpp/sampling/include/var_opt_union.hpp +18 -39
- data/vendor/datasketches-cpp/sampling/include/var_opt_union_impl.hpp +75 -85
- data/vendor/datasketches-cpp/sampling/test/CMakeLists.txt +1 -1
- data/vendor/datasketches-cpp/sampling/test/var_opt_allocation_test.cpp +6 -6
- data/vendor/datasketches-cpp/sampling/test/var_opt_sketch_test.cpp +2 -2
- data/vendor/datasketches-cpp/sampling/test/var_opt_union_test.cpp +4 -4
- data/vendor/datasketches-cpp/setup.py +14 -2
- data/vendor/datasketches-cpp/theta/include/theta_sketch_impl.hpp +15 -25
- data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base.hpp +0 -9
- data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base_impl.hpp +5 -5
- data/vendor/datasketches-cpp/theta/test/CMakeLists.txt +1 -1
- data/vendor/datasketches-cpp/theta/test/theta_sketch_test.cpp +2 -1
- data/vendor/datasketches-cpp/tox.ini +26 -0
- data/vendor/datasketches-cpp/tuple/include/tuple_sketch.hpp +36 -12
- data/vendor/datasketches-cpp/tuple/include/tuple_sketch_impl.hpp +16 -4
- data/vendor/datasketches-cpp/tuple/test/CMakeLists.txt +2 -1
- data/vendor/datasketches-cpp/tuple/test/engagement_test.cpp +299 -0
- data/vendor/datasketches-cpp/tuple/test/tuple_sketch_test.cpp +26 -0
- data/vendor/datasketches-cpp/version.cfg.in +1 -0
- metadata +14 -5
- data/vendor/datasketches-cpp/common/include/quantile_sketch_sorted_view_impl.hpp +0 -91
|
@@ -47,53 +47,57 @@ TEST_CASE("quantiles sketch", "[quantiles_sketch]") {
|
|
|
47
47
|
test_allocator_total_bytes = 0;
|
|
48
48
|
|
|
49
49
|
SECTION("k limits") {
|
|
50
|
-
|
|
51
|
-
quantiles_float_sketch
|
|
52
|
-
|
|
53
|
-
REQUIRE_THROWS_AS(new quantiles_float_sketch(
|
|
50
|
+
//std::cout << "sizeof(quantiles_sketch<float>)=" << sizeof(quantiles_sketch<float>) << '\n';
|
|
51
|
+
quantiles_float_sketch sketch1(quantiles_constants::MIN_K, std::less<float>(), 0); // this should work
|
|
52
|
+
quantiles_float_sketch sketch2(quantiles_constants::MAX_K, std::less<float>(), 0); // this should work
|
|
53
|
+
REQUIRE_THROWS_AS(new quantiles_float_sketch(quantiles_constants::MIN_K - 1, std::less<float>(), 0), std::invalid_argument);
|
|
54
|
+
REQUIRE_THROWS_AS(new quantiles_float_sketch(40, std::less<float>(), 0), std::invalid_argument); // not power of 2
|
|
54
55
|
// MAX_K + 1 makes no sense because k is uint16_t
|
|
55
56
|
}
|
|
56
57
|
|
|
57
58
|
SECTION("empty") {
|
|
58
|
-
quantiles_float_sketch sketch(128, 0);
|
|
59
|
+
quantiles_float_sketch sketch(128, std::less<float>(), 0);
|
|
59
60
|
REQUIRE(sketch.is_empty());
|
|
60
61
|
REQUIRE_FALSE(sketch.is_estimation_mode());
|
|
61
62
|
REQUIRE(sketch.get_n() == 0);
|
|
62
63
|
REQUIRE(sketch.get_num_retained() == 0);
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
64
|
+
REQUIRE_THROWS_AS(sketch.get_min_item(), std::runtime_error);
|
|
65
|
+
REQUIRE_THROWS_AS(sketch.get_max_item(), std::runtime_error);
|
|
66
|
+
REQUIRE_THROWS_AS(sketch.get_rank(0), std::runtime_error);
|
|
67
|
+
REQUIRE_THROWS_AS(sketch.get_quantile(0.5), std::runtime_error);
|
|
67
68
|
const double fractions[3] {0, 0.5, 1};
|
|
68
|
-
|
|
69
|
+
REQUIRE_THROWS_AS(sketch.get_quantiles(fractions, 3).empty(), std::runtime_error);
|
|
69
70
|
const float split_points[1] {0};
|
|
70
|
-
|
|
71
|
-
|
|
71
|
+
REQUIRE_THROWS_AS(sketch.get_PMF(split_points, 1), std::runtime_error);
|
|
72
|
+
REQUIRE_THROWS_AS(sketch.get_CDF(split_points, 1), std::runtime_error);
|
|
72
73
|
|
|
73
|
-
for (auto
|
|
74
|
-
unused(
|
|
74
|
+
for (auto pair: sketch) {
|
|
75
|
+
unused(pair);
|
|
75
76
|
FAIL("should be no iterations over an empty sketch");
|
|
76
77
|
}
|
|
77
|
-
|
|
78
|
+
}
|
|
78
79
|
|
|
79
80
|
SECTION("get bad quantile") {
|
|
80
|
-
quantiles_float_sketch sketch(64, 0);
|
|
81
|
+
quantiles_float_sketch sketch(64, std::less<float>(), 0);
|
|
81
82
|
sketch.update(0.0f); // has to be non-empty to reach the check
|
|
82
83
|
REQUIRE_THROWS_AS(sketch.get_quantile(-1), std::invalid_argument);
|
|
83
84
|
}
|
|
84
85
|
|
|
85
86
|
SECTION("one item") {
|
|
86
|
-
quantiles_float_sketch sketch(128, 0);
|
|
87
|
+
quantiles_float_sketch sketch(128, std::less<float>(), 0);
|
|
87
88
|
sketch.update(1.0f);
|
|
88
89
|
REQUIRE_FALSE(sketch.is_empty());
|
|
89
90
|
REQUIRE_FALSE(sketch.is_estimation_mode());
|
|
90
91
|
REQUIRE(sketch.get_n() == 1);
|
|
91
92
|
REQUIRE(sketch.get_num_retained() == 1);
|
|
92
|
-
REQUIRE(sketch.get_rank(
|
|
93
|
-
REQUIRE(sketch.get_rank(
|
|
94
|
-
REQUIRE(sketch.
|
|
95
|
-
REQUIRE(sketch.
|
|
93
|
+
REQUIRE(sketch.get_rank(0) == 0);
|
|
94
|
+
REQUIRE(sketch.get_rank(1.0f) == 1);
|
|
95
|
+
REQUIRE(sketch.get_rank(1.0f, false) == 0);
|
|
96
|
+
REQUIRE(sketch.get_rank(2.0f, false) == 1);
|
|
97
|
+
REQUIRE(sketch.get_min_item() == 1.0);
|
|
98
|
+
REQUIRE(sketch.get_max_item() == 1.0);
|
|
96
99
|
REQUIRE(sketch.get_quantile(0.5) == 1.0);
|
|
100
|
+
|
|
97
101
|
const double fractions[3] {0, 0.5, 1};
|
|
98
102
|
auto quantiles = sketch.get_quantiles(fractions, 3);
|
|
99
103
|
REQUIRE(quantiles.size() == 3);
|
|
@@ -102,15 +106,20 @@ TEST_CASE("quantiles sketch", "[quantiles_sketch]") {
|
|
|
102
106
|
REQUIRE(quantiles[2] == 1.0);
|
|
103
107
|
|
|
104
108
|
int count = 0;
|
|
105
|
-
for (auto
|
|
106
|
-
REQUIRE(
|
|
109
|
+
for (auto pair: sketch) {
|
|
110
|
+
REQUIRE(pair.second == 1);
|
|
107
111
|
++count;
|
|
108
112
|
}
|
|
109
113
|
REQUIRE(count == 1);
|
|
110
|
-
|
|
114
|
+
|
|
115
|
+
// iterator dereferencing
|
|
116
|
+
auto it = sketch.begin();
|
|
117
|
+
REQUIRE(it->first == 1.0f);
|
|
118
|
+
REQUIRE((*it).first == 1.0f);
|
|
119
|
+
}
|
|
111
120
|
|
|
112
121
|
SECTION("NaN") {
|
|
113
|
-
quantiles_float_sketch sketch(256, 0);
|
|
122
|
+
quantiles_float_sketch sketch(256, std::less<float>(), 0);
|
|
114
123
|
sketch.update(std::numeric_limits<float>::quiet_NaN());
|
|
115
124
|
REQUIRE(sketch.is_empty());
|
|
116
125
|
|
|
@@ -123,45 +132,33 @@ TEST_CASE("quantiles sketch", "[quantiles_sketch]") {
|
|
|
123
132
|
SECTION("sampling mode") {
|
|
124
133
|
const uint16_t k = 8;
|
|
125
134
|
const uint32_t n = 16 * (2 * k) + 1;
|
|
126
|
-
quantiles_float_sketch sk(k, 0);
|
|
135
|
+
quantiles_float_sketch sk(k, std::less<float>(), 0);
|
|
127
136
|
for (uint32_t i = 0; i < n; ++i) {
|
|
128
137
|
sk.update(static_cast<float>(i));
|
|
129
138
|
}
|
|
130
139
|
}
|
|
131
140
|
|
|
132
141
|
SECTION("many items, exact mode") {
|
|
133
|
-
const uint32_t n =
|
|
134
|
-
quantiles_float_sketch sketch(
|
|
135
|
-
for (uint32_t i =
|
|
142
|
+
const uint32_t n = 100;
|
|
143
|
+
quantiles_float_sketch sketch(128, std::less<float>(), 0);
|
|
144
|
+
for (uint32_t i = 1; i <= n; i++) {
|
|
136
145
|
sketch.update(static_cast<float>(i));
|
|
137
|
-
REQUIRE(sketch.get_n() == i
|
|
146
|
+
REQUIRE(sketch.get_n() == i);
|
|
138
147
|
}
|
|
139
148
|
REQUIRE_FALSE(sketch.is_empty());
|
|
140
149
|
REQUIRE_FALSE(sketch.is_estimation_mode());
|
|
141
150
|
REQUIRE(sketch.get_num_retained() == n);
|
|
142
|
-
REQUIRE(sketch.
|
|
143
|
-
REQUIRE(sketch.get_quantile(0) ==
|
|
144
|
-
REQUIRE(sketch.
|
|
145
|
-
REQUIRE(sketch.get_quantile(1) == n
|
|
146
|
-
|
|
147
|
-
int count = 0;
|
|
148
|
-
for (auto it: sketch) {
|
|
149
|
-
REQUIRE(it.second == 1);
|
|
150
|
-
++count;
|
|
151
|
-
}
|
|
152
|
-
REQUIRE(count == n);
|
|
151
|
+
REQUIRE(sketch.get_min_item() == 1);
|
|
152
|
+
REQUIRE(sketch.get_quantile(0) == 1);
|
|
153
|
+
REQUIRE(sketch.get_max_item() == n);
|
|
154
|
+
REQUIRE(sketch.get_quantile(1) == n);
|
|
153
155
|
|
|
154
|
-
const double
|
|
155
|
-
auto quantiles = sketch.get_quantiles(
|
|
156
|
+
const double ranks[3] {0, 0.5, 1};
|
|
157
|
+
auto quantiles = sketch.get_quantiles(ranks, 3);
|
|
156
158
|
REQUIRE(quantiles.size() == 3);
|
|
157
|
-
REQUIRE(quantiles[0] ==
|
|
159
|
+
REQUIRE(quantiles[0] == 1);
|
|
158
160
|
REQUIRE(quantiles[1] == static_cast<float>(n / 2));
|
|
159
|
-
REQUIRE(quantiles[2] == n
|
|
160
|
-
|
|
161
|
-
for (uint32_t i = 0; i < n; i++) {
|
|
162
|
-
const double trueRank = (double) i / n;
|
|
163
|
-
REQUIRE(sketch.get_rank(static_cast<float>(i)) == trueRank);
|
|
164
|
-
}
|
|
161
|
+
REQUIRE(quantiles[2] == n);
|
|
165
162
|
|
|
166
163
|
// the alternative method must produce the same result
|
|
167
164
|
auto quantiles2 = sketch.get_quantiles(3);
|
|
@@ -169,10 +166,24 @@ TEST_CASE("quantiles sketch", "[quantiles_sketch]") {
|
|
|
169
166
|
REQUIRE(quantiles[0] == quantiles2[0]);
|
|
170
167
|
REQUIRE(quantiles[1] == quantiles2[1]);
|
|
171
168
|
REQUIRE(quantiles[2] == quantiles2[2]);
|
|
169
|
+
|
|
170
|
+
int count = 0;
|
|
171
|
+
for (auto pair: sketch) {
|
|
172
|
+
REQUIRE(pair.second == 1);
|
|
173
|
+
++count;
|
|
174
|
+
}
|
|
175
|
+
REQUIRE(count == n);
|
|
176
|
+
|
|
177
|
+
for (uint32_t i = 1; i <= n; i++) {
|
|
178
|
+
const double true_rank_inclusive = static_cast<double>(i) / n;
|
|
179
|
+
REQUIRE(sketch.get_rank(static_cast<float>(i)) == true_rank_inclusive);
|
|
180
|
+
const double true_rank_exclusive = static_cast<double>(i - 1) / n;
|
|
181
|
+
REQUIRE(sketch.get_rank(static_cast<float>(i), false) == true_rank_exclusive);
|
|
182
|
+
}
|
|
172
183
|
}
|
|
173
184
|
|
|
174
185
|
SECTION("10 items") {
|
|
175
|
-
quantiles_float_sketch sketch(128, 0);
|
|
186
|
+
quantiles_float_sketch sketch(128, std::less<float>(), 0);
|
|
176
187
|
sketch.update(1.0f);
|
|
177
188
|
sketch.update(2.0f);
|
|
178
189
|
sketch.update(3.0f);
|
|
@@ -183,24 +194,24 @@ TEST_CASE("quantiles sketch", "[quantiles_sketch]") {
|
|
|
183
194
|
sketch.update(8.0f);
|
|
184
195
|
sketch.update(9.0f);
|
|
185
196
|
sketch.update(10.0f);
|
|
186
|
-
REQUIRE(sketch.get_quantile(0) == 1
|
|
187
|
-
REQUIRE(sketch.get_quantile(0.5) ==
|
|
188
|
-
REQUIRE(sketch.get_quantile(0.99) == 10
|
|
189
|
-
REQUIRE(sketch.get_quantile(1) == 10
|
|
197
|
+
REQUIRE(sketch.get_quantile(0) == 1);
|
|
198
|
+
REQUIRE(sketch.get_quantile(0.5) == 5);
|
|
199
|
+
REQUIRE(sketch.get_quantile(0.99) == 10);
|
|
200
|
+
REQUIRE(sketch.get_quantile(1) == 10);
|
|
190
201
|
}
|
|
191
202
|
|
|
192
|
-
SECTION("100 items") {
|
|
193
|
-
quantiles_float_sketch sketch(128, 0);
|
|
194
|
-
for (int i =
|
|
195
|
-
REQUIRE(sketch.get_quantile(0) ==
|
|
203
|
+
SECTION("100 items, exact mode") {
|
|
204
|
+
quantiles_float_sketch sketch(128, std::less<float>(), 0);
|
|
205
|
+
for (int i = 1; i <= 100; ++i) sketch.update(static_cast<float>(i));
|
|
206
|
+
REQUIRE(sketch.get_quantile(0) == 1);
|
|
196
207
|
REQUIRE(sketch.get_quantile(0.01) == 1);
|
|
197
208
|
REQUIRE(sketch.get_quantile(0.5) == 50);
|
|
198
|
-
REQUIRE(sketch.get_quantile(0.99) == 99
|
|
199
|
-
REQUIRE(sketch.get_quantile(1) ==
|
|
209
|
+
REQUIRE(sketch.get_quantile(0.99) == 99);
|
|
210
|
+
REQUIRE(sketch.get_quantile(1) == 100);
|
|
200
211
|
}
|
|
201
212
|
|
|
202
213
|
SECTION("many items, estimation mode") {
|
|
203
|
-
quantiles_float_sketch sketch(128, 0);
|
|
214
|
+
quantiles_float_sketch sketch(128, std::less<float>(), 0);
|
|
204
215
|
const int n = 1000000;
|
|
205
216
|
for (int i = 0; i < n; i++) {
|
|
206
217
|
sketch.update(static_cast<float>(i));
|
|
@@ -208,51 +219,30 @@ TEST_CASE("quantiles sketch", "[quantiles_sketch]") {
|
|
|
208
219
|
}
|
|
209
220
|
REQUIRE_FALSE(sketch.is_empty());
|
|
210
221
|
REQUIRE(sketch.is_estimation_mode());
|
|
211
|
-
REQUIRE(sketch.
|
|
212
|
-
REQUIRE(sketch.
|
|
213
|
-
REQUIRE(sketch.get_max_value() == n - 1); // max value is exact
|
|
214
|
-
REQUIRE(sketch.get_quantile(1) == n - 1); // max value is exact
|
|
222
|
+
REQUIRE(sketch.get_min_item() == 0.0); // min value is exact
|
|
223
|
+
REQUIRE(sketch.get_max_item() == n - 1); // max value is exact
|
|
215
224
|
|
|
216
225
|
// test rank
|
|
217
226
|
for (int i = 0; i < n; i++) {
|
|
218
|
-
const double trueRank = static_cast<float>(i) / n;
|
|
227
|
+
const double trueRank = static_cast<float>(i + 1) / n;
|
|
219
228
|
const double sketchRank = sketch.get_rank(static_cast<float>(i));
|
|
220
229
|
REQUIRE(sketchRank == Approx(trueRank).margin(RANK_EPS_FOR_K_128));
|
|
221
230
|
}
|
|
222
231
|
|
|
223
|
-
// test quantiles at every 0.1 percentage point
|
|
224
|
-
double fractions[1001];
|
|
225
|
-
double reverse_fractions[1001]; // check that ordering does not matter
|
|
226
|
-
for (int i = 0; i < 1001; i++) {
|
|
227
|
-
fractions[i] = (double) i / 1000;
|
|
228
|
-
reverse_fractions[1000 - i] = fractions[i];
|
|
229
|
-
}
|
|
230
|
-
auto quantiles = sketch.get_quantiles(fractions, 1001);
|
|
231
|
-
auto reverse_quantiles = sketch.get_quantiles(reverse_fractions, 1001);
|
|
232
|
-
float previous_quantile(0);
|
|
233
|
-
for (int i = 0; i < 1001; i++) {
|
|
234
|
-
// expensive in a loop, just to check the equivalence here, not advised for real code
|
|
235
|
-
const float quantile = sketch.get_quantile(fractions[i]);
|
|
236
|
-
REQUIRE(quantiles[i] == quantile);
|
|
237
|
-
REQUIRE(reverse_quantiles[1000 - i] == quantile);
|
|
238
|
-
REQUIRE(previous_quantile <= quantile);
|
|
239
|
-
previous_quantile = quantile;
|
|
240
|
-
}
|
|
241
|
-
|
|
242
232
|
//std::cout << sketch.to_string();
|
|
243
233
|
|
|
244
234
|
uint32_t count = 0;
|
|
245
235
|
uint64_t total_weight = 0;
|
|
246
|
-
for (auto
|
|
236
|
+
for (auto pair: sketch) {
|
|
247
237
|
++count;
|
|
248
|
-
total_weight +=
|
|
238
|
+
total_weight += pair.second;
|
|
249
239
|
}
|
|
250
240
|
REQUIRE(count == sketch.get_num_retained());
|
|
251
241
|
REQUIRE(total_weight == sketch.get_n());
|
|
252
242
|
}
|
|
253
243
|
|
|
254
244
|
SECTION("consistency between get_rank and get_PMF/CDF") {
|
|
255
|
-
quantiles_float_sketch sketch(64, 0);
|
|
245
|
+
quantiles_float_sketch sketch(64, std::less<float>(), 0);
|
|
256
246
|
const int n = 1000;
|
|
257
247
|
float values[n];
|
|
258
248
|
for (int i = 0; i < n; i++) {
|
|
@@ -287,149 +277,154 @@ TEST_CASE("quantiles sketch", "[quantiles_sketch]") {
|
|
|
287
277
|
// get_rank()
|
|
288
278
|
// using knowledge of internal structure
|
|
289
279
|
// value still in the base buffer to avoid randomness
|
|
290
|
-
REQUIRE(sketch.get_rank
|
|
291
|
-
REQUIRE(sketch.get_rank
|
|
280
|
+
REQUIRE(sketch.get_rank(80, false) == 0.79);
|
|
281
|
+
REQUIRE(sketch.get_rank(80, true) == 0.80);
|
|
292
282
|
|
|
293
283
|
// value pushed into higher level
|
|
294
|
-
REQUIRE(sketch.get_rank
|
|
295
|
-
REQUIRE(sketch.get_rank
|
|
284
|
+
REQUIRE(sketch.get_rank(50, false) == Approx(0.49).margin(0.01));
|
|
285
|
+
REQUIRE(sketch.get_rank(50, true) == 0.50);
|
|
296
286
|
|
|
297
287
|
// get_quantile()
|
|
298
288
|
// value still in base buffer
|
|
299
|
-
REQUIRE(sketch.get_quantile
|
|
300
|
-
REQUIRE(sketch.get_quantile
|
|
289
|
+
REQUIRE(sketch.get_quantile(0.70, false) == 71);
|
|
290
|
+
REQUIRE(sketch.get_quantile(0.70, true) == 70);
|
|
301
291
|
|
|
302
292
|
// value pushed into higher levell
|
|
303
|
-
int quantile = sketch.get_quantile
|
|
293
|
+
int quantile = sketch.get_quantile(0.30, false);
|
|
304
294
|
if (quantile != 31 && quantile != 32) { FAIL(); }
|
|
305
295
|
|
|
306
|
-
quantile = sketch.get_quantile
|
|
296
|
+
quantile = sketch.get_quantile(0.30, true);
|
|
307
297
|
if (quantile != 29 && quantile != 30) { FAIL(); }
|
|
308
298
|
}
|
|
309
299
|
|
|
310
300
|
SECTION("stream serialize deserialize empty") {
|
|
311
|
-
quantiles_float_sketch sketch(128, 0);
|
|
301
|
+
quantiles_float_sketch sketch(128, std::less<float>(), 0);
|
|
312
302
|
std::stringstream s(std::ios::in | std::ios::out | std::ios::binary);
|
|
313
303
|
sketch.serialize(s);
|
|
314
304
|
REQUIRE(static_cast<size_t>(s.tellp()) == sketch.get_serialized_size_bytes());
|
|
315
|
-
auto sketch2 = quantiles_float_sketch::deserialize(s, serde<float>(),
|
|
305
|
+
auto sketch2 = quantiles_float_sketch::deserialize(s, serde<float>(), std::less<float>(), 0);
|
|
316
306
|
REQUIRE(static_cast<size_t>(s.tellp()) == sketch2.get_serialized_size_bytes());
|
|
317
307
|
REQUIRE(s.tellg() == s.tellp());
|
|
318
308
|
REQUIRE(sketch2.is_empty() == sketch.is_empty());
|
|
319
309
|
REQUIRE(sketch2.is_estimation_mode() == sketch.is_estimation_mode());
|
|
320
310
|
REQUIRE(sketch2.get_n() == sketch.get_n());
|
|
321
311
|
REQUIRE(sketch2.get_num_retained() == sketch.get_num_retained());
|
|
322
|
-
|
|
323
|
-
|
|
312
|
+
REQUIRE_THROWS_AS(sketch2.get_min_item(), std::runtime_error);
|
|
313
|
+
REQUIRE_THROWS_AS(sketch2.get_max_item(), std::runtime_error);
|
|
324
314
|
REQUIRE(sketch2.get_normalized_rank_error(false) == sketch.get_normalized_rank_error(false));
|
|
325
315
|
REQUIRE(sketch2.get_normalized_rank_error(true) == sketch.get_normalized_rank_error(true));
|
|
326
316
|
}
|
|
327
317
|
|
|
328
318
|
SECTION("bytes serialize deserialize empty") {
|
|
329
|
-
quantiles_float_sketch sketch(256, 0);
|
|
319
|
+
quantiles_float_sketch sketch(256, std::less<float>(), 0);
|
|
330
320
|
auto bytes = sketch.serialize();
|
|
331
|
-
auto sketch2 = quantiles_float_sketch::deserialize(bytes.data(), bytes.size(), serde<float>(),
|
|
321
|
+
auto sketch2 = quantiles_float_sketch::deserialize(bytes.data(), bytes.size(), serde<float>(),
|
|
322
|
+
std::less<float>(), 0);
|
|
332
323
|
REQUIRE(bytes.size() == sketch.get_serialized_size_bytes());
|
|
333
324
|
REQUIRE(sketch2.is_empty() == sketch.is_empty());
|
|
334
325
|
REQUIRE(sketch2.is_estimation_mode() == sketch.is_estimation_mode());
|
|
335
326
|
REQUIRE(sketch2.get_n() == sketch.get_n());
|
|
336
327
|
REQUIRE(sketch2.get_num_retained() == sketch.get_num_retained());
|
|
337
|
-
|
|
338
|
-
|
|
328
|
+
REQUIRE_THROWS_AS(sketch2.get_min_item(), std::runtime_error);
|
|
329
|
+
REQUIRE_THROWS_AS(sketch2.get_max_item(), std::runtime_error);
|
|
339
330
|
REQUIRE(sketch2.get_normalized_rank_error(false) == sketch.get_normalized_rank_error(false));
|
|
340
331
|
REQUIRE(sketch2.get_normalized_rank_error(true) == sketch.get_normalized_rank_error(true));
|
|
341
332
|
}
|
|
342
333
|
|
|
343
334
|
SECTION("stream serialize deserialize one item") {
|
|
344
|
-
quantiles_float_sketch sketch(32, 0);
|
|
335
|
+
quantiles_float_sketch sketch(32, std::less<float>(), 0);
|
|
345
336
|
sketch.update(1.0f);
|
|
346
337
|
std::stringstream s(std::ios::in | std::ios::out | std::ios::binary);
|
|
347
338
|
sketch.serialize(s);
|
|
348
339
|
REQUIRE(static_cast<size_t>(s.tellp()) == sketch.get_serialized_size_bytes());
|
|
349
|
-
auto sketch2 = quantiles_float_sketch::deserialize(s, serde<float>(),
|
|
340
|
+
auto sketch2 = quantiles_float_sketch::deserialize(s, serde<float>(), std::less<float>(), 0);
|
|
350
341
|
REQUIRE(static_cast<size_t>(s.tellp()) == sketch2.get_serialized_size_bytes());
|
|
351
342
|
REQUIRE(s.tellg() == s.tellp());
|
|
352
343
|
REQUIRE_FALSE(sketch2.is_empty());
|
|
353
344
|
REQUIRE_FALSE(sketch2.is_estimation_mode());
|
|
354
345
|
REQUIRE(sketch2.get_n() == 1);
|
|
355
346
|
REQUIRE(sketch2.get_num_retained() == 1);
|
|
356
|
-
REQUIRE(sketch2.
|
|
357
|
-
REQUIRE(sketch2.
|
|
358
|
-
REQUIRE(sketch2.get_quantile(0.5) == 1
|
|
359
|
-
REQUIRE(sketch2.get_rank(
|
|
360
|
-
REQUIRE(sketch2.get_rank(
|
|
347
|
+
REQUIRE(sketch2.get_min_item() == 1);
|
|
348
|
+
REQUIRE(sketch2.get_max_item() == 1);
|
|
349
|
+
REQUIRE(sketch2.get_quantile(0.5) == 1);
|
|
350
|
+
REQUIRE(sketch2.get_rank(0) == 0);
|
|
351
|
+
REQUIRE(sketch2.get_rank(1) == 1);
|
|
352
|
+
REQUIRE(sketch2.get_rank(2) == 1);
|
|
361
353
|
}
|
|
362
354
|
|
|
363
355
|
SECTION("bytes serialize deserialize one item") {
|
|
364
|
-
quantiles_float_sketch sketch(64, 0);
|
|
356
|
+
quantiles_float_sketch sketch(64, std::less<float>(), 0);
|
|
365
357
|
sketch.update(1.0f);
|
|
366
358
|
auto bytes = sketch.serialize();
|
|
367
359
|
REQUIRE(bytes.size() == sketch.get_serialized_size_bytes());
|
|
368
|
-
auto sketch2 = quantiles_float_sketch::deserialize(bytes.data(), bytes.size(), serde<float>(),
|
|
360
|
+
auto sketch2 = quantiles_float_sketch::deserialize(bytes.data(), bytes.size(), serde<float>(),
|
|
361
|
+
std::less<float>(), 0);
|
|
369
362
|
REQUIRE(bytes.size() == sketch2.get_serialized_size_bytes());
|
|
370
363
|
REQUIRE_FALSE(sketch2.is_empty());
|
|
371
364
|
REQUIRE_FALSE(sketch2.is_estimation_mode());
|
|
372
365
|
REQUIRE(sketch2.get_n() == 1);
|
|
373
366
|
REQUIRE(sketch2.get_num_retained() == 1);
|
|
374
|
-
REQUIRE(sketch2.
|
|
375
|
-
REQUIRE(sketch2.
|
|
376
|
-
REQUIRE(sketch2.get_quantile(0.5) == 1
|
|
377
|
-
REQUIRE(sketch2.get_rank(
|
|
378
|
-
REQUIRE(sketch2.get_rank(
|
|
367
|
+
REQUIRE(sketch2.get_min_item() == 1);
|
|
368
|
+
REQUIRE(sketch2.get_max_item() == 1);
|
|
369
|
+
REQUIRE(sketch2.get_quantile(0.5) == 1);
|
|
370
|
+
REQUIRE(sketch2.get_rank(0) == 0);
|
|
371
|
+
REQUIRE(sketch2.get_rank(1) == 1);
|
|
372
|
+
REQUIRE(sketch2.get_rank(2) == 1);
|
|
379
373
|
}
|
|
380
374
|
|
|
381
375
|
SECTION("stream serialize deserialize three items") {
|
|
382
|
-
quantiles_float_sketch sketch(128, 0);
|
|
376
|
+
quantiles_float_sketch sketch(128, std::less<float>(), 0);
|
|
383
377
|
sketch.update(1.0f);
|
|
384
378
|
sketch.update(2.0f);
|
|
385
379
|
sketch.update(3.0f);
|
|
386
380
|
std::stringstream s(std::ios::in | std::ios::out | std::ios::binary);
|
|
387
381
|
sketch.serialize(s);
|
|
388
382
|
REQUIRE(static_cast<size_t>(s.tellp()) == sketch.get_serialized_size_bytes());
|
|
389
|
-
auto sketch2 = quantiles_float_sketch::deserialize(s, serde<float>(),
|
|
383
|
+
auto sketch2 = quantiles_float_sketch::deserialize(s, serde<float>(), std::less<float>(), 0);
|
|
390
384
|
REQUIRE(static_cast<size_t>(s.tellp()) == sketch2.get_serialized_size_bytes());
|
|
391
385
|
REQUIRE(s.tellg() == s.tellp());
|
|
392
386
|
REQUIRE_FALSE(sketch2.is_empty());
|
|
393
387
|
REQUIRE_FALSE(sketch2.is_estimation_mode());
|
|
394
388
|
REQUIRE(sketch2.get_n() == 3);
|
|
395
389
|
REQUIRE(sketch2.get_num_retained() == 3);
|
|
396
|
-
REQUIRE(sketch2.
|
|
397
|
-
REQUIRE(sketch2.
|
|
390
|
+
REQUIRE(sketch2.get_min_item() == 1.0);
|
|
391
|
+
REQUIRE(sketch2.get_max_item() == 3.0);
|
|
398
392
|
}
|
|
399
393
|
|
|
400
394
|
SECTION("bytes serialize deserialize three items") {
|
|
401
|
-
quantiles_float_sketch sketch(128, 0);
|
|
395
|
+
quantiles_float_sketch sketch(128, std::less<float>(), 0);
|
|
402
396
|
sketch.update(1.0f);
|
|
403
397
|
sketch.update(2.0f);
|
|
404
398
|
sketch.update(3.0f);
|
|
405
399
|
auto bytes = sketch.serialize();
|
|
406
400
|
REQUIRE(bytes.size() == sketch.get_serialized_size_bytes());
|
|
407
|
-
auto sketch2 = quantiles_float_sketch::deserialize(bytes.data(), bytes.size(), serde<float>(),
|
|
401
|
+
auto sketch2 = quantiles_float_sketch::deserialize(bytes.data(), bytes.size(), serde<float>(),
|
|
402
|
+
std::less<float>(), 0);
|
|
408
403
|
REQUIRE(bytes.size() == sketch2.get_serialized_size_bytes());
|
|
409
404
|
REQUIRE_FALSE(sketch2.is_empty());
|
|
410
405
|
REQUIRE_FALSE(sketch2.is_estimation_mode());
|
|
411
406
|
REQUIRE(sketch2.get_n() == 3);
|
|
412
407
|
REQUIRE(sketch2.get_num_retained() == 3);
|
|
413
|
-
REQUIRE(sketch2.
|
|
414
|
-
REQUIRE(sketch2.
|
|
408
|
+
REQUIRE(sketch2.get_min_item() == 1.0);
|
|
409
|
+
REQUIRE(sketch2.get_max_item() == 3.0);
|
|
415
410
|
}
|
|
416
411
|
|
|
417
412
|
SECTION("stream serialize deserialize many floats") {
|
|
418
|
-
quantiles_float_sketch sketch(128, 0);
|
|
413
|
+
quantiles_float_sketch sketch(128, std::less<float>(), 0);
|
|
419
414
|
const int n = 1000;
|
|
420
415
|
for (int i = 0; i < n; i++) sketch.update(static_cast<float>(i));
|
|
421
416
|
std::stringstream s(std::ios::in | std::ios::out | std::ios::binary);
|
|
422
417
|
sketch.serialize(s);
|
|
423
418
|
REQUIRE(static_cast<size_t>(s.tellp()) == sketch.get_serialized_size_bytes());
|
|
424
|
-
auto sketch2 = quantiles_float_sketch::deserialize(s, serde<float>(),
|
|
419
|
+
auto sketch2 = quantiles_float_sketch::deserialize(s, serde<float>(), std::less<float>(), 0);
|
|
425
420
|
REQUIRE(static_cast<size_t>(s.tellp()) == sketch2.get_serialized_size_bytes());
|
|
426
421
|
REQUIRE(s.tellg() == s.tellp());
|
|
427
422
|
REQUIRE(sketch2.is_empty() == sketch.is_empty());
|
|
428
423
|
REQUIRE(sketch2.is_estimation_mode() == sketch.is_estimation_mode());
|
|
429
424
|
REQUIRE(sketch2.get_n() == sketch.get_n());
|
|
430
425
|
REQUIRE(sketch2.get_num_retained() == sketch.get_num_retained());
|
|
431
|
-
REQUIRE(sketch2.
|
|
432
|
-
REQUIRE(sketch2.
|
|
426
|
+
REQUIRE(sketch2.get_min_item() == sketch.get_min_item());
|
|
427
|
+
REQUIRE(sketch2.get_max_item() == sketch.get_max_item());
|
|
433
428
|
REQUIRE(sketch2.get_normalized_rank_error(false) == sketch.get_normalized_rank_error(false));
|
|
434
429
|
REQUIRE(sketch2.get_normalized_rank_error(true) == sketch.get_normalized_rank_error(true));
|
|
435
430
|
REQUIRE(sketch2.get_quantile(0.5) == sketch.get_quantile(0.5));
|
|
@@ -437,27 +432,31 @@ TEST_CASE("quantiles sketch", "[quantiles_sketch]") {
|
|
|
437
432
|
REQUIRE(sketch2.get_rank(static_cast<float>(n)) == sketch.get_rank(static_cast<float>(n)));
|
|
438
433
|
}
|
|
439
434
|
SECTION("bytes serialize deserialize many floats") {
|
|
440
|
-
quantiles_float_sketch sketch(128, 0);
|
|
435
|
+
quantiles_float_sketch sketch(128, std::less<float>(), 0);
|
|
441
436
|
const int n = 1000;
|
|
442
437
|
for (int i = 0; i < n; i++) sketch.update(static_cast<float>(i));
|
|
443
438
|
auto bytes = sketch.serialize();
|
|
444
439
|
REQUIRE(bytes.size() == sketch.get_serialized_size_bytes());
|
|
445
|
-
auto sketch2 = quantiles_float_sketch::deserialize(bytes.data(), bytes.size(), serde<float>(),
|
|
440
|
+
auto sketch2 = quantiles_float_sketch::deserialize(bytes.data(), bytes.size(), serde<float>(),
|
|
441
|
+
std::less<float>(), 0);
|
|
446
442
|
REQUIRE(bytes.size() == sketch2.get_serialized_size_bytes());
|
|
447
443
|
REQUIRE(sketch2.is_empty() == sketch.is_empty());
|
|
448
444
|
REQUIRE(sketch2.is_estimation_mode() == sketch.is_estimation_mode());
|
|
449
445
|
REQUIRE(sketch2.get_n() == sketch.get_n());
|
|
450
446
|
REQUIRE(sketch2.get_num_retained() == sketch.get_num_retained());
|
|
451
|
-
REQUIRE(sketch2.
|
|
452
|
-
REQUIRE(sketch2.
|
|
447
|
+
REQUIRE(sketch2.get_min_item() == sketch.get_min_item());
|
|
448
|
+
REQUIRE(sketch2.get_max_item() == sketch.get_max_item());
|
|
453
449
|
REQUIRE(sketch2.get_normalized_rank_error(false) == sketch.get_normalized_rank_error(false));
|
|
454
450
|
REQUIRE(sketch2.get_normalized_rank_error(true) == sketch.get_normalized_rank_error(true));
|
|
455
451
|
REQUIRE(sketch2.get_quantile(0.5) == sketch.get_quantile(0.5));
|
|
456
452
|
REQUIRE(sketch2.get_rank(0) == sketch.get_rank(0));
|
|
457
453
|
REQUIRE(sketch2.get_rank(static_cast<float>(n)) == sketch.get_rank(static_cast<float>(n)));
|
|
458
|
-
REQUIRE_THROWS_AS(
|
|
459
|
-
|
|
460
|
-
REQUIRE_THROWS_AS(
|
|
454
|
+
REQUIRE_THROWS_AS(quantiles_float_sketch::deserialize(bytes.data(), 7, serde<float>(), std::less<float>(), 0),
|
|
455
|
+
std::out_of_range);
|
|
456
|
+
REQUIRE_THROWS_AS(quantiles_float_sketch::deserialize(bytes.data(), 15, serde<float>(), std::less<float>(), 0),
|
|
457
|
+
std::out_of_range);
|
|
458
|
+
REQUIRE_THROWS_AS(quantiles_float_sketch::deserialize(bytes.data(), bytes.size() - 1, serde<float>(),
|
|
459
|
+
std::less<float>(), 0), std::out_of_range);
|
|
461
460
|
}
|
|
462
461
|
|
|
463
462
|
SECTION("bytes serialize deserialize many ints") {
|
|
@@ -472,8 +471,8 @@ TEST_CASE("quantiles sketch", "[quantiles_sketch]") {
|
|
|
472
471
|
REQUIRE(sketch2.is_estimation_mode() == sketch.is_estimation_mode());
|
|
473
472
|
REQUIRE(sketch2.get_n() == sketch.get_n());
|
|
474
473
|
REQUIRE(sketch2.get_num_retained() == sketch.get_num_retained());
|
|
475
|
-
REQUIRE(sketch2.
|
|
476
|
-
REQUIRE(sketch2.
|
|
474
|
+
REQUIRE(sketch2.get_min_item() == sketch.get_min_item());
|
|
475
|
+
REQUIRE(sketch2.get_max_item() == sketch.get_max_item());
|
|
477
476
|
REQUIRE(sketch2.get_normalized_rank_error(false) == sketch.get_normalized_rank_error(false));
|
|
478
477
|
REQUIRE(sketch2.get_normalized_rank_error(true) == sketch.get_normalized_rank_error(true));
|
|
479
478
|
REQUIRE(sketch2.get_quantile(0.5) == sketch.get_quantile(0.5));
|
|
@@ -485,7 +484,7 @@ TEST_CASE("quantiles sketch", "[quantiles_sketch]") {
|
|
|
485
484
|
}
|
|
486
485
|
|
|
487
486
|
SECTION("out of order split points, float") {
|
|
488
|
-
quantiles_float_sketch sketch(256, 0);
|
|
487
|
+
quantiles_float_sketch sketch(256, std::less<float>(), 0);
|
|
489
488
|
sketch.update(0.0f); // has too be non-empty to reach the check
|
|
490
489
|
float split_points[2] = {1, 0};
|
|
491
490
|
REQUIRE_THROWS_AS(sketch.get_CDF(split_points, 2), std::invalid_argument);
|
|
@@ -499,72 +498,72 @@ TEST_CASE("quantiles sketch", "[quantiles_sketch]") {
|
|
|
499
498
|
}
|
|
500
499
|
|
|
501
500
|
SECTION("NaN split point") {
|
|
502
|
-
quantiles_float_sketch sketch(512, 0);
|
|
501
|
+
quantiles_float_sketch sketch(512, std::less<float>(), 0);
|
|
503
502
|
sketch.update(0.0f); // has too be non-empty to reach the check
|
|
504
503
|
float split_points[1] = {std::numeric_limits<float>::quiet_NaN()};
|
|
505
504
|
REQUIRE_THROWS_AS(sketch.get_CDF(split_points, 1), std::invalid_argument);
|
|
506
505
|
}
|
|
507
506
|
|
|
508
507
|
SECTION("merge") {
|
|
509
|
-
quantiles_float_sketch sketch1(128, 0);
|
|
510
|
-
quantiles_float_sketch sketch2(128, 0);
|
|
508
|
+
quantiles_float_sketch sketch1(128, std::less<float>(), 0);
|
|
509
|
+
quantiles_float_sketch sketch2(128, std::less<float>(), 0);
|
|
511
510
|
const int n = 10000;
|
|
512
511
|
for (int i = 0; i < n; i++) {
|
|
513
512
|
sketch1.update(static_cast<float>(i));
|
|
514
513
|
sketch2.update(static_cast<float>((2 * n) - i - 1));
|
|
515
514
|
}
|
|
516
515
|
|
|
517
|
-
REQUIRE(sketch1.
|
|
518
|
-
REQUIRE(sketch1.
|
|
519
|
-
REQUIRE(sketch2.
|
|
520
|
-
REQUIRE(sketch2.
|
|
516
|
+
REQUIRE(sketch1.get_min_item() == 0.0f);
|
|
517
|
+
REQUIRE(sketch1.get_max_item() == n - 1);
|
|
518
|
+
REQUIRE(sketch2.get_min_item() == n);
|
|
519
|
+
REQUIRE(sketch2.get_max_item() == 2.0f * n - 1);
|
|
521
520
|
|
|
522
521
|
sketch1.merge(sketch2);
|
|
523
522
|
|
|
524
523
|
REQUIRE_FALSE(sketch1.is_empty());
|
|
525
524
|
REQUIRE(sketch1.get_n() == 2 * n);
|
|
526
|
-
REQUIRE(sketch1.
|
|
527
|
-
REQUIRE(sketch1.
|
|
525
|
+
REQUIRE(sketch1.get_min_item() == 0.0f);
|
|
526
|
+
REQUIRE(sketch1.get_max_item() == 2.0f * n - 1);
|
|
528
527
|
REQUIRE(sketch1.get_quantile(0.5) == Approx(n).margin(n * RANK_EPS_FOR_K_128));
|
|
529
528
|
}
|
|
530
529
|
|
|
531
530
|
SECTION("merge from const") {
|
|
532
|
-
quantiles_float_sketch sketch1(128, 0);
|
|
533
|
-
quantiles_float_sketch sketch2(128, 0);
|
|
531
|
+
quantiles_float_sketch sketch1(128, std::less<float>(), 0);
|
|
532
|
+
quantiles_float_sketch sketch2(128, std::less<float>(), 0);
|
|
534
533
|
const int n = 10000;
|
|
535
534
|
for (int i = 0; i < n; i++) {
|
|
536
535
|
sketch1.update(static_cast<float>(i));
|
|
537
536
|
sketch2.update(static_cast<float>((2 * n) - i - 1));
|
|
538
537
|
}
|
|
539
538
|
|
|
540
|
-
REQUIRE(sketch1.
|
|
541
|
-
REQUIRE(sketch1.
|
|
542
|
-
REQUIRE(sketch2.
|
|
543
|
-
REQUIRE(sketch2.
|
|
539
|
+
REQUIRE(sketch1.get_min_item() == 0.0f);
|
|
540
|
+
REQUIRE(sketch1.get_max_item() == n - 1);
|
|
541
|
+
REQUIRE(sketch2.get_min_item() == n);
|
|
542
|
+
REQUIRE(sketch2.get_max_item() == 2.0f * n - 1);
|
|
544
543
|
|
|
545
544
|
sketch1.merge(const_cast<const quantiles_float_sketch&>(sketch2));
|
|
546
545
|
|
|
547
546
|
REQUIRE_FALSE(sketch1.is_empty());
|
|
548
547
|
REQUIRE(sketch1.get_n() == 2 * n);
|
|
549
|
-
REQUIRE(sketch1.
|
|
550
|
-
REQUIRE(sketch1.
|
|
548
|
+
REQUIRE(sketch1.get_min_item() == 0.0f);
|
|
549
|
+
REQUIRE(sketch1.get_max_item() == 2.0f * n - 1);
|
|
551
550
|
REQUIRE(sketch1.get_quantile(0.5) == Approx(n).margin(n * RANK_EPS_FOR_K_128));
|
|
552
551
|
}
|
|
553
552
|
|
|
554
553
|
|
|
555
554
|
SECTION("merge lower k") {
|
|
556
|
-
quantiles_float_sketch sketch1(256, 0);
|
|
557
|
-
quantiles_float_sketch sketch2(128, 0);
|
|
555
|
+
quantiles_float_sketch sketch1(256, std::less<float>(), 0);
|
|
556
|
+
quantiles_float_sketch sketch2(128, std::less<float>(), 0);
|
|
558
557
|
const int n = 10000;
|
|
559
558
|
for (int i = 0; i < n; i++) {
|
|
560
559
|
sketch1.update(static_cast<float>(i));
|
|
561
560
|
sketch2.update(static_cast<float>((2 * n) - i - 1));
|
|
562
561
|
}
|
|
563
562
|
|
|
564
|
-
REQUIRE(sketch1.
|
|
565
|
-
REQUIRE(sketch1.
|
|
566
|
-
REQUIRE(sketch2.
|
|
567
|
-
REQUIRE(sketch2.
|
|
563
|
+
REQUIRE(sketch1.get_min_item() == 0.0f);
|
|
564
|
+
REQUIRE(sketch1.get_max_item() == n - 1);
|
|
565
|
+
REQUIRE(sketch2.get_min_item() == n);
|
|
566
|
+
REQUIRE(sketch2.get_max_item() == 2.0f * n - 1);
|
|
568
567
|
|
|
569
568
|
REQUIRE(sketch1.get_k() == 256);
|
|
570
569
|
REQUIRE(sketch2.get_k() == 128);
|
|
@@ -580,14 +579,14 @@ TEST_CASE("quantiles sketch", "[quantiles_sketch]") {
|
|
|
580
579
|
|
|
581
580
|
REQUIRE_FALSE(sketch1.is_empty());
|
|
582
581
|
REQUIRE(sketch1.get_n() == 2 * n);
|
|
583
|
-
REQUIRE(sketch1.
|
|
584
|
-
REQUIRE(sketch1.
|
|
582
|
+
REQUIRE(sketch1.get_min_item() == 0.0f);
|
|
583
|
+
REQUIRE(sketch1.get_max_item() == 2.0f * n - 1);
|
|
585
584
|
REQUIRE(sketch1.get_quantile(0.5) == Approx(n).margin(n * RANK_EPS_FOR_K_128));
|
|
586
585
|
}
|
|
587
586
|
|
|
588
587
|
SECTION("merge exact mode, lower k") {
|
|
589
|
-
quantiles_float_sketch sketch1(256, 0);
|
|
590
|
-
quantiles_float_sketch sketch2(128, 0);
|
|
588
|
+
quantiles_float_sketch sketch1(256, std::less<float>(), 0);
|
|
589
|
+
quantiles_float_sketch sketch2(128, std::less<float>(), 0);
|
|
591
590
|
const int n = 10000;
|
|
592
591
|
for (int i = 0; i < n; i++) {
|
|
593
592
|
sketch1.update(static_cast<float>(i));
|
|
@@ -600,8 +599,8 @@ TEST_CASE("quantiles sketch", "[quantiles_sketch]") {
|
|
|
600
599
|
|
|
601
600
|
REQUIRE_FALSE(sketch1.is_empty());
|
|
602
601
|
REQUIRE(sketch1.get_n() == n);
|
|
603
|
-
REQUIRE(sketch1.
|
|
604
|
-
REQUIRE(sketch1.
|
|
602
|
+
REQUIRE(sketch1.get_min_item() == 0.0f);
|
|
603
|
+
REQUIRE(sketch1.get_max_item() == n - 1);
|
|
605
604
|
REQUIRE(sketch1.get_quantile(0.5) == Approx(n / 2).margin(n / 2 * RANK_EPS_FOR_K_128));
|
|
606
605
|
|
|
607
606
|
sketch2.update(static_cast<float>(0));
|
|
@@ -611,27 +610,27 @@ TEST_CASE("quantiles sketch", "[quantiles_sketch]") {
|
|
|
611
610
|
}
|
|
612
611
|
|
|
613
612
|
SECTION("merge min value from other") {
|
|
614
|
-
quantiles_float_sketch sketch1(128, 0);
|
|
615
|
-
quantiles_float_sketch sketch2(128, 0);
|
|
613
|
+
quantiles_float_sketch sketch1(128, std::less<float>(), 0);
|
|
614
|
+
quantiles_float_sketch sketch2(128, std::less<float>(), 0);
|
|
616
615
|
sketch1.update(1.0f);
|
|
617
616
|
sketch2.update(2.0f);
|
|
618
617
|
sketch2.merge(sketch1);
|
|
619
|
-
REQUIRE(sketch2.
|
|
620
|
-
REQUIRE(sketch2.
|
|
618
|
+
REQUIRE(sketch2.get_min_item() == 1.0f);
|
|
619
|
+
REQUIRE(sketch2.get_max_item() == 2.0f);
|
|
621
620
|
}
|
|
622
621
|
|
|
623
622
|
SECTION("merge min and max values from other") {
|
|
624
|
-
quantiles_float_sketch sketch1(128, 0);
|
|
623
|
+
quantiles_float_sketch sketch1(128, std::less<float>(), 0);
|
|
625
624
|
for (int i = 0; i < 1000000; i++) sketch1.update(static_cast<float>(i));
|
|
626
|
-
quantiles_float_sketch sketch2(128, 0);
|
|
625
|
+
quantiles_float_sketch sketch2(128, std::less<float>(), 0);
|
|
627
626
|
sketch2.merge(sketch1);
|
|
628
|
-
REQUIRE(sketch2.
|
|
629
|
-
REQUIRE(sketch2.
|
|
627
|
+
REQUIRE(sketch2.get_min_item() == 0.0f);
|
|
628
|
+
REQUIRE(sketch2.get_max_item() == 999999.0f);
|
|
630
629
|
}
|
|
631
630
|
|
|
632
631
|
SECTION("merge: two empty") {
|
|
633
|
-
quantiles_float_sketch sk1(128, 0);
|
|
634
|
-
quantiles_float_sketch sk2(64, 0);
|
|
632
|
+
quantiles_float_sketch sk1(128, std::less<float>(), 0);
|
|
633
|
+
quantiles_float_sketch sk2(64, std::less<float>(), 0);
|
|
635
634
|
sk1.merge(sk2);
|
|
636
635
|
REQUIRE(sk1.get_n() == 0);
|
|
637
636
|
REQUIRE(sk1.get_k() == 128);
|
|
@@ -643,8 +642,8 @@ TEST_CASE("quantiles sketch", "[quantiles_sketch]") {
|
|
|
643
642
|
|
|
644
643
|
SECTION("merge: exact as input") {
|
|
645
644
|
const uint16_t k = 128;
|
|
646
|
-
quantiles_float_sketch sketch1(2 * k, 0);
|
|
647
|
-
quantiles_float_sketch sketch2(k, 0);
|
|
645
|
+
quantiles_float_sketch sketch1(2 * k, std::less<float>(), 0);
|
|
646
|
+
quantiles_float_sketch sketch2(k, std::less<float>(), 0);
|
|
648
647
|
|
|
649
648
|
for (int i = 0; i < k / 2; i++) {
|
|
650
649
|
sketch1.update(static_cast<float>(i));
|
|
@@ -658,14 +657,14 @@ TEST_CASE("quantiles sketch", "[quantiles_sketch]") {
|
|
|
658
657
|
sketch1.merge(sketch2);
|
|
659
658
|
REQUIRE(sketch1.get_n() == 101 * k);
|
|
660
659
|
REQUIRE(sketch1.get_k() == 2 * k); // no reason to have shrunk
|
|
661
|
-
REQUIRE(sketch1.
|
|
662
|
-
REQUIRE(sketch1.
|
|
660
|
+
REQUIRE(sketch1.get_min_item() == 0.0f);
|
|
661
|
+
REQUIRE(sketch1.get_max_item() == static_cast<float>(100 * k - 1));
|
|
663
662
|
}
|
|
664
663
|
|
|
665
664
|
SECTION("merge: src estimation, tgt exact, tgt.k > src.k") {
|
|
666
665
|
const uint16_t k = 128;
|
|
667
|
-
quantiles_float_sketch sketch1(2 * k, 0);
|
|
668
|
-
quantiles_float_sketch sketch2(k, 0);
|
|
666
|
+
quantiles_float_sketch sketch1(2 * k, std::less<float>(), 0);
|
|
667
|
+
quantiles_float_sketch sketch2(k, std::less<float>(), 0);
|
|
669
668
|
|
|
670
669
|
for (int i = 0; i < k / 2; i++) {
|
|
671
670
|
sketch1.update(static_cast<float>(i));
|
|
@@ -679,14 +678,14 @@ TEST_CASE("quantiles sketch", "[quantiles_sketch]") {
|
|
|
679
678
|
sketch1.merge(sketch2);
|
|
680
679
|
REQUIRE(sketch1.get_n() == 101 * k);
|
|
681
680
|
REQUIRE(sketch1.get_k() == k); // no reason to have shrunk
|
|
682
|
-
REQUIRE(sketch1.
|
|
683
|
-
REQUIRE(sketch1.
|
|
681
|
+
REQUIRE(sketch1.get_min_item() == 0.0f);
|
|
682
|
+
REQUIRE(sketch1.get_max_item() == static_cast<float>(100 * k - 1));
|
|
684
683
|
}
|
|
685
684
|
|
|
686
685
|
SECTION("merge: both estimation, tgt.k < src.k") {
|
|
687
686
|
const uint16_t k = 128;
|
|
688
|
-
quantiles_float_sketch sketch1(k, 0);
|
|
689
|
-
quantiles_float_sketch sketch2(2 * k, 0);
|
|
687
|
+
quantiles_float_sketch sketch1(k, std::less<float>(), 0);
|
|
688
|
+
quantiles_float_sketch sketch2(2 * k, std::less<float>(), 0);
|
|
690
689
|
|
|
691
690
|
for (int i = 0; i < 100 * k; i++) {
|
|
692
691
|
sketch1.update(static_cast<float>(i));
|
|
@@ -696,15 +695,15 @@ TEST_CASE("quantiles sketch", "[quantiles_sketch]") {
|
|
|
696
695
|
sketch1.merge(sketch2);
|
|
697
696
|
REQUIRE(sketch1.get_n() == 200 * k);
|
|
698
697
|
REQUIRE(sketch1.get_k() == k); // no reason to have shrunk
|
|
699
|
-
REQUIRE(sketch1.
|
|
700
|
-
REQUIRE(sketch1.
|
|
698
|
+
REQUIRE(sketch1.get_min_item() == static_cast<float>(-100 * k + 1));
|
|
699
|
+
REQUIRE(sketch1.get_max_item() == static_cast<float>(100 * k - 1));
|
|
701
700
|
REQUIRE(sketch1.get_quantile(0.5) == Approx(0.0).margin(100 * k * RANK_EPS_FOR_K_128));
|
|
702
701
|
}
|
|
703
702
|
|
|
704
703
|
SECTION("merge: src estimation, tgt exact, equal k") {
|
|
705
704
|
const uint16_t k = 128;
|
|
706
|
-
quantiles_float_sketch sketch1(k, 0);
|
|
707
|
-
quantiles_float_sketch sketch2(k, 0);
|
|
705
|
+
quantiles_float_sketch sketch1(k, std::less<float>(), 0);
|
|
706
|
+
quantiles_float_sketch sketch2(k, std::less<float>(), 0);
|
|
708
707
|
|
|
709
708
|
for (int i = 0; i < k / 2; i++) {
|
|
710
709
|
sketch1.update(static_cast<float>(i));
|
|
@@ -718,16 +717,16 @@ TEST_CASE("quantiles sketch", "[quantiles_sketch]") {
|
|
|
718
717
|
sketch1.merge(sketch2);
|
|
719
718
|
REQUIRE(sketch1.get_n() == 100 * k);
|
|
720
719
|
REQUIRE(sketch1.get_k() == k);
|
|
721
|
-
REQUIRE(sketch1.
|
|
722
|
-
REQUIRE(sketch1.
|
|
720
|
+
REQUIRE(sketch1.get_min_item() == 0.0f);
|
|
721
|
+
REQUIRE(sketch1.get_max_item() == static_cast<float>(100 * k - 1));
|
|
723
722
|
float n = 100 * k - 1;
|
|
724
723
|
REQUIRE(sketch1.get_quantile(0.5) == Approx(n / 2).margin(n / 2 * RANK_EPS_FOR_K_128));
|
|
725
724
|
}
|
|
726
725
|
|
|
727
726
|
SECTION("merge: both estimation, no base buffer, same k") {
|
|
728
727
|
const uint16_t k = 128;
|
|
729
|
-
quantiles_float_sketch sketch1(k, 0);
|
|
730
|
-
quantiles_float_sketch sketch2(k, 0);
|
|
728
|
+
quantiles_float_sketch sketch1(k, std::less<float>(), 0);
|
|
729
|
+
quantiles_float_sketch sketch2(k, std::less<float>(), 0);
|
|
731
730
|
|
|
732
731
|
uint64_t n = 2 * k;
|
|
733
732
|
for (uint64_t i = 0; i < n; i++) {
|
|
@@ -738,15 +737,15 @@ TEST_CASE("quantiles sketch", "[quantiles_sketch]") {
|
|
|
738
737
|
sketch1.merge(sketch2);
|
|
739
738
|
REQUIRE(sketch1.get_n() == 2 * n);
|
|
740
739
|
REQUIRE(sketch1.get_k() == k);
|
|
741
|
-
REQUIRE(sketch1.
|
|
742
|
-
REQUIRE(sketch1.
|
|
740
|
+
REQUIRE(sketch1.get_min_item() == 0.0f);
|
|
741
|
+
REQUIRE(sketch1.get_max_item() == static_cast<float>(2 * n - 1));
|
|
743
742
|
REQUIRE(sketch1.get_quantile(0.5) == Approx(n).margin(n * RANK_EPS_FOR_K_128));
|
|
744
743
|
}
|
|
745
744
|
|
|
746
745
|
SECTION("merge: both estimation, no base buffer, tgt.k < src.k") {
|
|
747
746
|
const uint16_t k = 128;
|
|
748
|
-
quantiles_float_sketch sketch1(k, 0);
|
|
749
|
-
quantiles_float_sketch sketch2(2 * k, 0);
|
|
747
|
+
quantiles_float_sketch sketch1(k, std::less<float>(), 0);
|
|
748
|
+
quantiles_float_sketch sketch2(2 * k, std::less<float>(), 0);
|
|
750
749
|
|
|
751
750
|
uint64_t n = 4 * k;
|
|
752
751
|
for (uint64_t i = 0; i < n; i++) {
|
|
@@ -757,16 +756,16 @@ TEST_CASE("quantiles sketch", "[quantiles_sketch]") {
|
|
|
757
756
|
sketch1.merge(sketch2);
|
|
758
757
|
REQUIRE(sketch1.get_n() == 2 * n);
|
|
759
758
|
REQUIRE(sketch1.get_k() == k);
|
|
760
|
-
REQUIRE(sketch1.
|
|
761
|
-
REQUIRE(sketch1.
|
|
759
|
+
REQUIRE(sketch1.get_min_item() == 0.0f);
|
|
760
|
+
REQUIRE(sketch1.get_max_item() == static_cast<float>(2 * n - 1));
|
|
762
761
|
REQUIRE(sketch1.get_quantile(0.5) == Approx(n).margin(n * RANK_EPS_FOR_K_128));
|
|
763
762
|
}
|
|
764
763
|
|
|
765
764
|
SECTION("sketch of ints") {
|
|
766
765
|
quantiles_sketch<int> sketch;
|
|
767
766
|
REQUIRE_THROWS_AS(sketch.get_quantile(0), std::runtime_error);
|
|
768
|
-
REQUIRE_THROWS_AS(sketch.
|
|
769
|
-
REQUIRE_THROWS_AS(sketch.
|
|
767
|
+
REQUIRE_THROWS_AS(sketch.get_min_item(), std::runtime_error);
|
|
768
|
+
REQUIRE_THROWS_AS(sketch.get_max_item(), std::runtime_error);
|
|
770
769
|
|
|
771
770
|
const int n = 10000;
|
|
772
771
|
for (int i = 0; i < n; i++) sketch.update(i);
|
|
@@ -781,8 +780,8 @@ TEST_CASE("quantiles sketch", "[quantiles_sketch]") {
|
|
|
781
780
|
REQUIRE(sketch2.is_estimation_mode() == sketch.is_estimation_mode());
|
|
782
781
|
REQUIRE(sketch2.get_n() == sketch.get_n());
|
|
783
782
|
REQUIRE(sketch2.get_num_retained() == sketch.get_num_retained());
|
|
784
|
-
REQUIRE(sketch2.
|
|
785
|
-
REQUIRE(sketch2.
|
|
783
|
+
REQUIRE(sketch2.get_min_item() == sketch.get_min_item());
|
|
784
|
+
REQUIRE(sketch2.get_max_item() == sketch.get_max_item());
|
|
786
785
|
REQUIRE(sketch2.get_normalized_rank_error(false) == sketch.get_normalized_rank_error(false));
|
|
787
786
|
REQUIRE(sketch2.get_normalized_rank_error(true) == sketch.get_normalized_rank_error(true));
|
|
788
787
|
REQUIRE(sketch2.get_quantile(0.5) == sketch.get_quantile(0.5));
|
|
@@ -791,30 +790,31 @@ TEST_CASE("quantiles sketch", "[quantiles_sketch]") {
|
|
|
791
790
|
}
|
|
792
791
|
|
|
793
792
|
SECTION("sketch of strings stream") {
|
|
794
|
-
quantiles_string_sketch sketch1(128, 0);
|
|
793
|
+
quantiles_string_sketch sketch1(128, std::less<std::string>(), 0);
|
|
795
794
|
REQUIRE_THROWS_AS(sketch1.get_quantile(0), std::runtime_error);
|
|
796
|
-
REQUIRE_THROWS_AS(sketch1.
|
|
797
|
-
REQUIRE_THROWS_AS(sketch1.
|
|
795
|
+
REQUIRE_THROWS_AS(sketch1.get_min_item(), std::runtime_error);
|
|
796
|
+
REQUIRE_THROWS_AS(sketch1.get_max_item(), std::runtime_error);
|
|
798
797
|
REQUIRE(sketch1.get_serialized_size_bytes() == 8);
|
|
799
798
|
|
|
800
799
|
const int n = 1000;
|
|
801
800
|
for (int i = 0; i < n; i++) sketch1.update(std::to_string(i));
|
|
802
801
|
|
|
803
|
-
REQUIRE(sketch1.
|
|
804
|
-
REQUIRE(sketch1.
|
|
802
|
+
REQUIRE(sketch1.get_min_item() == std::string("0"));
|
|
803
|
+
REQUIRE(sketch1.get_max_item() == std::string("999"));
|
|
805
804
|
|
|
806
805
|
std::stringstream s(std::ios::in | std::ios::out | std::ios::binary);
|
|
807
806
|
sketch1.serialize(s);
|
|
808
807
|
REQUIRE(static_cast<size_t>(s.tellp()) == sketch1.get_serialized_size_bytes());
|
|
809
|
-
auto sketch2 = quantiles_string_sketch::deserialize(s, serde<std::string>(),
|
|
808
|
+
auto sketch2 = quantiles_string_sketch::deserialize(s, serde<std::string>(),
|
|
809
|
+
std::less<std::string>(), 0);
|
|
810
810
|
REQUIRE(static_cast<size_t>(s.tellp()) == sketch2.get_serialized_size_bytes());
|
|
811
811
|
REQUIRE(s.tellg() == s.tellp());
|
|
812
812
|
REQUIRE(sketch2.is_empty() == sketch1.is_empty());
|
|
813
813
|
REQUIRE(sketch2.is_estimation_mode() == sketch1.is_estimation_mode());
|
|
814
814
|
REQUIRE(sketch2.get_n() == sketch1.get_n());
|
|
815
815
|
REQUIRE(sketch2.get_num_retained() == sketch1.get_num_retained());
|
|
816
|
-
REQUIRE(sketch2.
|
|
817
|
-
REQUIRE(sketch2.
|
|
816
|
+
REQUIRE(sketch2.get_min_item() == sketch1.get_min_item());
|
|
817
|
+
REQUIRE(sketch2.get_max_item() == sketch1.get_max_item());
|
|
818
818
|
REQUIRE(sketch2.get_normalized_rank_error(false) == sketch1.get_normalized_rank_error(false));
|
|
819
819
|
REQUIRE(sketch2.get_normalized_rank_error(true) == sketch1.get_normalized_rank_error(true));
|
|
820
820
|
REQUIRE(sketch2.get_quantile(0.5) == sketch1.get_quantile(0.5));
|
|
@@ -827,28 +827,29 @@ TEST_CASE("quantiles sketch", "[quantiles_sketch]") {
|
|
|
827
827
|
}
|
|
828
828
|
|
|
829
829
|
SECTION("sketch of strings bytes") {
|
|
830
|
-
quantiles_string_sketch sketch1(128, 0);
|
|
830
|
+
quantiles_string_sketch sketch1(128, std::less<std::string>(), 0);
|
|
831
831
|
REQUIRE_THROWS_AS(sketch1.get_quantile(0), std::runtime_error);
|
|
832
|
-
REQUIRE_THROWS_AS(sketch1.
|
|
833
|
-
REQUIRE_THROWS_AS(sketch1.
|
|
832
|
+
REQUIRE_THROWS_AS(sketch1.get_min_item(), std::runtime_error);
|
|
833
|
+
REQUIRE_THROWS_AS(sketch1.get_max_item(), std::runtime_error);
|
|
834
834
|
REQUIRE(sketch1.get_serialized_size_bytes() == 8);
|
|
835
835
|
|
|
836
836
|
const int n = 10000;
|
|
837
837
|
for (int i = 0; i < n; i++) sketch1.update(std::to_string(i));
|
|
838
838
|
|
|
839
|
-
REQUIRE(sketch1.
|
|
840
|
-
REQUIRE(sketch1.
|
|
839
|
+
REQUIRE(sketch1.get_min_item() == std::string("0"));
|
|
840
|
+
REQUIRE(sketch1.get_max_item() == std::string("9999"));
|
|
841
841
|
|
|
842
842
|
auto bytes = sketch1.serialize();
|
|
843
843
|
REQUIRE(bytes.size() == sketch1.get_serialized_size_bytes());
|
|
844
|
-
auto sketch2 = quantiles_string_sketch::deserialize(bytes.data(), bytes.size(), serde<std::string>(),
|
|
844
|
+
auto sketch2 = quantiles_string_sketch::deserialize(bytes.data(), bytes.size(), serde<std::string>(),
|
|
845
|
+
std::less<std::string>(), 0);
|
|
845
846
|
REQUIRE(bytes.size() == sketch2.get_serialized_size_bytes());
|
|
846
847
|
REQUIRE(sketch2.is_empty() == sketch1.is_empty());
|
|
847
848
|
REQUIRE(sketch2.is_estimation_mode() == sketch1.is_estimation_mode());
|
|
848
849
|
REQUIRE(sketch2.get_n() == sketch1.get_n());
|
|
849
850
|
REQUIRE(sketch2.get_num_retained() == sketch1.get_num_retained());
|
|
850
|
-
REQUIRE(sketch2.
|
|
851
|
-
REQUIRE(sketch2.
|
|
851
|
+
REQUIRE(sketch2.get_min_item() == sketch1.get_min_item());
|
|
852
|
+
REQUIRE(sketch2.get_max_item() == sketch1.get_max_item());
|
|
852
853
|
REQUIRE(sketch2.get_normalized_rank_error(false) == sketch1.get_normalized_rank_error(false));
|
|
853
854
|
REQUIRE(sketch2.get_normalized_rank_error(true) == sketch1.get_normalized_rank_error(true));
|
|
854
855
|
REQUIRE(sketch2.get_quantile(0.5) == sketch1.get_quantile(0.5));
|
|
@@ -857,11 +858,12 @@ TEST_CASE("quantiles sketch", "[quantiles_sketch]") {
|
|
|
857
858
|
}
|
|
858
859
|
|
|
859
860
|
SECTION("sketch of strings, single item, bytes") {
|
|
860
|
-
quantiles_string_sketch sketch1(64, 0);
|
|
861
|
+
quantiles_string_sketch sketch1(64, std::less<std::string>(), 0);
|
|
861
862
|
sketch1.update("a");
|
|
862
863
|
auto bytes = sketch1.serialize();
|
|
863
864
|
REQUIRE(bytes.size() == sketch1.get_serialized_size_bytes());
|
|
864
|
-
auto sketch2 = quantiles_string_sketch::deserialize(bytes.data(), bytes.size(),
|
|
865
|
+
auto sketch2 = quantiles_string_sketch::deserialize(bytes.data(), bytes.size(),
|
|
866
|
+
serde<std::string>(), std::less<std::string>(), 0);
|
|
865
867
|
REQUIRE(bytes.size() == sketch2.get_serialized_size_bytes());
|
|
866
868
|
}
|
|
867
869
|
|
|
@@ -886,20 +888,20 @@ TEST_CASE("quantiles sketch", "[quantiles_sketch]") {
|
|
|
886
888
|
|
|
887
889
|
SECTION("move") {
|
|
888
890
|
quantiles_sketch<int> sketch1;
|
|
889
|
-
const int n
|
|
891
|
+
const int n = 100;
|
|
890
892
|
for (int i = 0; i < n; i++) sketch1.update(i);
|
|
891
893
|
|
|
892
894
|
// move constructor
|
|
893
895
|
quantiles_sketch<int> sketch2(std::move(sketch1));
|
|
894
896
|
for (int i = 0; i < n; i++) {
|
|
895
|
-
REQUIRE(sketch2.get_rank(i) == (
|
|
897
|
+
REQUIRE(sketch2.get_rank(i) == static_cast<double>(i + 1) / n);
|
|
896
898
|
}
|
|
897
899
|
|
|
898
900
|
// move assignment
|
|
899
901
|
quantiles_sketch<int> sketch3;
|
|
900
902
|
sketch3 = std::move(sketch2);
|
|
901
903
|
for (int i = 0; i < n; i++) {
|
|
902
|
-
REQUIRE(sketch3.get_rank(i) == (
|
|
904
|
+
REQUIRE(sketch3.get_rank(i) == static_cast<double>(i + 1) / n);
|
|
903
905
|
}
|
|
904
906
|
}
|
|
905
907
|
|
|
@@ -908,7 +910,7 @@ TEST_CASE("quantiles sketch", "[quantiles_sketch]") {
|
|
|
908
910
|
const int n = 403;
|
|
909
911
|
quantiles_sketch<double> sk_double(k);
|
|
910
912
|
|
|
911
|
-
quantiles_sketch<float> sk_float(k
|
|
913
|
+
quantiles_sketch<float> sk_float(k);
|
|
912
914
|
REQUIRE(sk_float.is_empty());
|
|
913
915
|
|
|
914
916
|
for (int i = 0; i < n; ++i) sk_double.update(i + .01);
|
|
@@ -918,10 +920,10 @@ TEST_CASE("quantiles sketch", "[quantiles_sketch]") {
|
|
|
918
920
|
REQUIRE(sk_double.get_k() == sk_int.get_k());
|
|
919
921
|
REQUIRE(sk_double.get_num_retained() == sk_int.get_num_retained());
|
|
920
922
|
|
|
921
|
-
auto sv_double = sk_double.get_sorted_view(
|
|
923
|
+
auto sv_double = sk_double.get_sorted_view();
|
|
922
924
|
std::vector<std::pair<double, uint64_t>> vec_double(sv_double.begin(), sv_double.end());
|
|
923
925
|
|
|
924
|
-
auto sv_int = sk_int.get_sorted_view(
|
|
926
|
+
auto sv_int = sk_int.get_sorted_view();
|
|
925
927
|
std::vector<std::pair<int, uint64_t>> vec_int(sv_int.begin(), sv_int.end());
|
|
926
928
|
|
|
927
929
|
REQUIRE(vec_double.size() == vec_int.size());
|
|
@@ -966,6 +968,12 @@ TEST_CASE("quantiles sketch", "[quantiles_sketch]") {
|
|
|
966
968
|
REQUIRE(sb.get_n() == 3);
|
|
967
969
|
}
|
|
968
970
|
|
|
971
|
+
SECTION("comparator and allocator") {
|
|
972
|
+
quantiles_sketch<int> sketch;
|
|
973
|
+
REQUIRE(sketch.get_comparator()(1, 2));
|
|
974
|
+
REQUIRE(sketch.get_allocator() == std::allocator<int>());
|
|
975
|
+
}
|
|
976
|
+
|
|
969
977
|
// cleanup
|
|
970
978
|
if (test_allocator_total_bytes != 0) {
|
|
971
979
|
REQUIRE(test_allocator_total_bytes == 0);
|