datasketches 0.2.7 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +5 -0
- data/ext/datasketches/kll_wrapper.cpp +20 -20
- data/ext/datasketches/theta_wrapper.cpp +2 -2
- data/lib/datasketches/version.rb +1 -1
- data/vendor/datasketches-cpp/CMakeLists.txt +9 -1
- data/vendor/datasketches-cpp/MANIFEST.in +21 -2
- data/vendor/datasketches-cpp/common/CMakeLists.txt +5 -2
- data/vendor/datasketches-cpp/common/include/common_defs.hpp +10 -0
- data/vendor/datasketches-cpp/common/include/kolmogorov_smirnov_impl.hpp +6 -6
- data/vendor/datasketches-cpp/common/include/memory_operations.hpp +1 -0
- data/vendor/datasketches-cpp/common/include/{quantile_sketch_sorted_view.hpp → quantiles_sorted_view.hpp} +60 -25
- data/vendor/datasketches-cpp/common/include/quantiles_sorted_view_impl.hpp +125 -0
- data/vendor/datasketches-cpp/common/include/version.hpp.in +36 -0
- data/vendor/datasketches-cpp/common/test/CMakeLists.txt +25 -6
- data/vendor/datasketches-cpp/common/test/quantiles_sorted_view_test.cpp +459 -0
- data/vendor/datasketches-cpp/cpc/test/CMakeLists.txt +1 -1
- data/vendor/datasketches-cpp/fi/include/frequent_items_sketch.hpp +28 -44
- data/vendor/datasketches-cpp/fi/include/frequent_items_sketch_impl.hpp +70 -78
- data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map.hpp +11 -4
- data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map_impl.hpp +16 -9
- data/vendor/datasketches-cpp/fi/test/CMakeLists.txt +1 -1
- data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_custom_type_test.cpp +54 -41
- data/vendor/datasketches-cpp/fi/test/reverse_purge_hash_map_test.cpp +3 -3
- data/vendor/datasketches-cpp/hll/include/Hll4Array-internal.hpp +2 -2
- data/vendor/datasketches-cpp/hll/test/CMakeLists.txt +1 -1
- data/vendor/datasketches-cpp/kll/include/kll_helper.hpp +0 -32
- data/vendor/datasketches-cpp/kll/include/kll_sketch.hpp +176 -233
- data/vendor/datasketches-cpp/kll/include/kll_sketch_impl.hpp +337 -395
- data/vendor/datasketches-cpp/kll/test/CMakeLists.txt +1 -1
- data/vendor/datasketches-cpp/kll/test/kll_sketch_custom_type_test.cpp +26 -26
- data/vendor/datasketches-cpp/kll/test/kll_sketch_test.cpp +196 -232
- data/vendor/datasketches-cpp/kll/test/kll_sketch_validation.cpp +41 -31
- data/vendor/datasketches-cpp/pyproject.toml +17 -12
- data/vendor/datasketches-cpp/python/CMakeLists.txt +8 -1
- data/vendor/datasketches-cpp/python/datasketches/PySerDe.py +104 -0
- data/vendor/datasketches-cpp/python/datasketches/__init__.py +22 -0
- data/vendor/datasketches-cpp/python/include/py_serde.hpp +113 -0
- data/vendor/datasketches-cpp/python/jupyter/ThetaSketchNotebook.ipynb +31 -24
- data/vendor/datasketches-cpp/python/pybind11Path.cmd +18 -0
- data/vendor/datasketches-cpp/python/src/__init__.py +17 -1
- data/vendor/datasketches-cpp/python/src/datasketches.cpp +9 -3
- data/vendor/datasketches-cpp/python/src/kll_wrapper.cpp +18 -54
- data/vendor/datasketches-cpp/python/src/py_serde.cpp +111 -0
- data/vendor/datasketches-cpp/python/src/quantiles_wrapper.cpp +17 -53
- data/vendor/datasketches-cpp/python/src/req_wrapper.cpp +17 -55
- data/vendor/datasketches-cpp/python/src/vector_of_kll.cpp +62 -67
- data/vendor/datasketches-cpp/python/src/vo_wrapper.cpp +47 -14
- data/vendor/datasketches-cpp/python/tests/__init__.py +16 -0
- data/vendor/datasketches-cpp/python/tests/req_test.py +1 -1
- data/vendor/datasketches-cpp/python/tests/vo_test.py +25 -1
- data/vendor/datasketches-cpp/quantiles/include/quantiles_sketch.hpp +135 -180
- data/vendor/datasketches-cpp/quantiles/include/quantiles_sketch_impl.hpp +205 -210
- data/vendor/datasketches-cpp/quantiles/test/CMakeLists.txt +1 -1
- data/vendor/datasketches-cpp/quantiles/test/quantiles_compatibility_test.cpp +19 -18
- data/vendor/datasketches-cpp/quantiles/test/quantiles_sketch_test.cpp +240 -232
- data/vendor/datasketches-cpp/req/include/req_compactor.hpp +15 -9
- data/vendor/datasketches-cpp/req/include/req_compactor_impl.hpp +35 -19
- data/vendor/datasketches-cpp/req/include/req_sketch.hpp +126 -147
- data/vendor/datasketches-cpp/req/include/req_sketch_impl.hpp +265 -245
- data/vendor/datasketches-cpp/req/test/CMakeLists.txt +1 -1
- data/vendor/datasketches-cpp/req/test/req_sketch_custom_type_test.cpp +26 -26
- data/vendor/datasketches-cpp/req/test/req_sketch_test.cpp +116 -103
- data/vendor/datasketches-cpp/sampling/include/var_opt_sketch.hpp +22 -46
- data/vendor/datasketches-cpp/sampling/include/var_opt_sketch_impl.hpp +180 -207
- data/vendor/datasketches-cpp/sampling/include/var_opt_union.hpp +18 -39
- data/vendor/datasketches-cpp/sampling/include/var_opt_union_impl.hpp +75 -85
- data/vendor/datasketches-cpp/sampling/test/CMakeLists.txt +1 -1
- data/vendor/datasketches-cpp/sampling/test/var_opt_allocation_test.cpp +6 -6
- data/vendor/datasketches-cpp/sampling/test/var_opt_sketch_test.cpp +2 -2
- data/vendor/datasketches-cpp/sampling/test/var_opt_union_test.cpp +4 -4
- data/vendor/datasketches-cpp/setup.py +14 -2
- data/vendor/datasketches-cpp/theta/include/theta_sketch_impl.hpp +15 -25
- data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base.hpp +0 -9
- data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base_impl.hpp +5 -5
- data/vendor/datasketches-cpp/theta/test/CMakeLists.txt +1 -1
- data/vendor/datasketches-cpp/theta/test/theta_sketch_test.cpp +2 -1
- data/vendor/datasketches-cpp/tox.ini +26 -0
- data/vendor/datasketches-cpp/tuple/include/tuple_sketch.hpp +36 -12
- data/vendor/datasketches-cpp/tuple/include/tuple_sketch_impl.hpp +16 -4
- data/vendor/datasketches-cpp/tuple/test/CMakeLists.txt +2 -1
- data/vendor/datasketches-cpp/tuple/test/engagement_test.cpp +299 -0
- data/vendor/datasketches-cpp/tuple/test/tuple_sketch_test.cpp +26 -0
- data/vendor/datasketches-cpp/version.cfg.in +1 -0
- metadata +14 -5
- data/vendor/datasketches-cpp/common/include/quantile_sketch_sorted_view_impl.hpp +0 -91
|
@@ -39,9 +39,9 @@ static std::string testBinaryInputPath = "test/";
|
|
|
39
39
|
#endif
|
|
40
40
|
|
|
41
41
|
// typical usage would be just kll_sketch<float> or kll_sketch<std::string>, but here we use test_allocator
|
|
42
|
-
using kll_float_sketch = kll_sketch<float, std::less<float>,
|
|
42
|
+
using kll_float_sketch = kll_sketch<float, std::less<float>, test_allocator<float>>;
|
|
43
43
|
// let std::string use the default allocator for simplicity, otherwise we need to define "less" and "serde"
|
|
44
|
-
using kll_string_sketch = kll_sketch<std::string, std::less<std::string>,
|
|
44
|
+
using kll_string_sketch = kll_sketch<std::string, std::less<std::string>, test_allocator<std::string>>;
|
|
45
45
|
|
|
46
46
|
TEST_CASE("kll sketch", "[kll_sketch]") {
|
|
47
47
|
|
|
@@ -49,71 +49,78 @@ TEST_CASE("kll sketch", "[kll_sketch]") {
|
|
|
49
49
|
test_allocator_total_bytes = 0;
|
|
50
50
|
|
|
51
51
|
SECTION("k limits") {
|
|
52
|
-
kll_float_sketch sketch1(kll_float_sketch::MIN_K, 0); // this should work
|
|
53
|
-
kll_float_sketch sketch2(kll_float_sketch::MAX_K, 0); // this should work
|
|
54
|
-
REQUIRE_THROWS_AS(new kll_float_sketch(kll_float_sketch::MIN_K - 1, 0), std::invalid_argument);
|
|
52
|
+
kll_float_sketch sketch1(kll_float_sketch::MIN_K, std::less<float>(), 0); // this should work
|
|
53
|
+
kll_float_sketch sketch2(kll_float_sketch::MAX_K, std::less<float>(), 0); // this should work
|
|
54
|
+
REQUIRE_THROWS_AS(new kll_float_sketch(kll_float_sketch::MIN_K - 1, std::less<float>(), 0), std::invalid_argument);
|
|
55
55
|
// MAX_K + 1 makes no sense because k is uint16_t
|
|
56
|
+
//std::cout << "sizeof(kll_sketch<float>)=" << sizeof(kll_sketch<float>) << "\n";
|
|
57
|
+
//std::cout << "sizeof(kll_sketch<double>)=" << sizeof(kll_sketch<double>) << "\n";
|
|
56
58
|
}
|
|
57
59
|
|
|
58
60
|
SECTION("empty") {
|
|
59
|
-
kll_float_sketch sketch(200, 0);
|
|
61
|
+
kll_float_sketch sketch(200, std::less<float>(), 0);
|
|
60
62
|
REQUIRE(sketch.is_empty());
|
|
61
63
|
REQUIRE_FALSE(sketch.is_estimation_mode());
|
|
62
64
|
REQUIRE(sketch.get_n() == 0);
|
|
63
65
|
REQUIRE(sketch.get_num_retained() == 0);
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
const double
|
|
69
|
-
|
|
66
|
+
REQUIRE_THROWS_AS(sketch.get_min_item(), std::runtime_error);
|
|
67
|
+
REQUIRE_THROWS_AS(sketch.get_max_item(), std::runtime_error);
|
|
68
|
+
REQUIRE_THROWS_AS(sketch.get_rank(0), std::runtime_error);
|
|
69
|
+
REQUIRE_THROWS_AS(sketch.get_quantile(0.5), std::runtime_error);
|
|
70
|
+
const double ranks[3] {0, 0.5, 1};
|
|
71
|
+
REQUIRE_THROWS_AS(sketch.get_quantiles(ranks, 3), std::runtime_error);
|
|
70
72
|
const float split_points[1] {0};
|
|
71
|
-
|
|
72
|
-
|
|
73
|
+
REQUIRE_THROWS_AS(sketch.get_PMF(split_points, 1), std::runtime_error);
|
|
74
|
+
REQUIRE_THROWS_AS(sketch.get_CDF(split_points, 1), std::runtime_error);
|
|
73
75
|
|
|
74
|
-
for (auto
|
|
75
|
-
(
|
|
76
|
+
for (auto pair: sketch) {
|
|
77
|
+
unused(pair); // to suppress "unused" warning
|
|
76
78
|
FAIL("should be no iterations over an empty sketch");
|
|
77
79
|
}
|
|
78
80
|
}
|
|
79
81
|
|
|
80
82
|
SECTION("get bad quantile") {
|
|
81
|
-
kll_float_sketch sketch(200, 0);
|
|
83
|
+
kll_float_sketch sketch(200, std::less<float>(), 0);
|
|
82
84
|
sketch.update(0); // has to be non-empty to reach the check
|
|
83
85
|
REQUIRE_THROWS_AS(sketch.get_quantile(-1), std::invalid_argument);
|
|
84
86
|
}
|
|
85
87
|
|
|
86
88
|
SECTION("one item") {
|
|
87
|
-
kll_float_sketch sketch(200, 0);
|
|
89
|
+
kll_float_sketch sketch(200, std::less<float>(), 0);
|
|
88
90
|
sketch.update(1.0f);
|
|
89
91
|
REQUIRE_FALSE(sketch.is_empty());
|
|
90
92
|
REQUIRE_FALSE(sketch.is_estimation_mode());
|
|
91
93
|
REQUIRE(sketch.get_n() == 1);
|
|
92
94
|
REQUIRE(sketch.get_num_retained() == 1);
|
|
93
|
-
REQUIRE(sketch.get_rank(1.0f) == 0.0);
|
|
94
|
-
REQUIRE(sketch.get_rank
|
|
95
|
-
REQUIRE(sketch.get_rank(2.0f) == 1.0);
|
|
95
|
+
REQUIRE(sketch.get_rank(1.0f, false) == 0.0);
|
|
96
|
+
REQUIRE(sketch.get_rank(1.0f) == 1.0);
|
|
97
|
+
REQUIRE(sketch.get_rank(2.0f, false) == 1.0);
|
|
96
98
|
REQUIRE(sketch.get_rank(std::numeric_limits<float>::infinity()) == 1.0);
|
|
97
|
-
REQUIRE(sketch.
|
|
98
|
-
REQUIRE(sketch.
|
|
99
|
+
REQUIRE(sketch.get_min_item() == 1.0);
|
|
100
|
+
REQUIRE(sketch.get_max_item() == 1.0);
|
|
99
101
|
REQUIRE(sketch.get_quantile(0.5) == 1.0);
|
|
100
|
-
const double
|
|
101
|
-
auto quantiles = sketch.get_quantiles(
|
|
102
|
+
const double ranks[3] {0, 0.5, 1};
|
|
103
|
+
auto quantiles = sketch.get_quantiles(ranks, 3);
|
|
102
104
|
REQUIRE(quantiles.size() == 3);
|
|
103
105
|
REQUIRE(quantiles[0] == 1.0);
|
|
104
106
|
REQUIRE(quantiles[1] == 1.0);
|
|
105
107
|
REQUIRE(quantiles[2] == 1.0);
|
|
106
108
|
|
|
107
109
|
int count = 0;
|
|
108
|
-
for (auto
|
|
109
|
-
REQUIRE(
|
|
110
|
+
for (auto pair: sketch) {
|
|
111
|
+
REQUIRE(pair.second == 1);
|
|
110
112
|
++count;
|
|
111
113
|
}
|
|
112
114
|
REQUIRE(count == 1);
|
|
115
|
+
|
|
116
|
+
// iterator dereferencing
|
|
117
|
+
auto it = sketch.begin();
|
|
118
|
+
REQUIRE(it->first == 1.0f);
|
|
119
|
+
REQUIRE((*it).first == 1.0f);
|
|
113
120
|
}
|
|
114
121
|
|
|
115
122
|
SECTION("NaN") {
|
|
116
|
-
kll_float_sketch sketch(200, 0);
|
|
123
|
+
kll_float_sketch sketch(200, std::less<float>(), 0);
|
|
117
124
|
sketch.update(std::numeric_limits<float>::quiet_NaN());
|
|
118
125
|
REQUIRE(sketch.is_empty());
|
|
119
126
|
|
|
@@ -123,44 +130,44 @@ TEST_CASE("kll sketch", "[kll_sketch]") {
|
|
|
123
130
|
}
|
|
124
131
|
|
|
125
132
|
SECTION("many items, exact mode") {
|
|
126
|
-
kll_float_sketch sketch(200, 0);
|
|
133
|
+
kll_float_sketch sketch(200, std::less<float>(), 0);
|
|
127
134
|
const uint32_t n = 200;
|
|
128
|
-
for (uint32_t i =
|
|
135
|
+
for (uint32_t i = 1; i <= n; i++) {
|
|
129
136
|
sketch.update(static_cast<float>(i));
|
|
130
|
-
REQUIRE(sketch.get_n() == i
|
|
137
|
+
REQUIRE(sketch.get_n() == i);
|
|
131
138
|
}
|
|
132
139
|
REQUIRE_FALSE(sketch.is_empty());
|
|
133
140
|
REQUIRE_FALSE(sketch.is_estimation_mode());
|
|
134
141
|
REQUIRE(sketch.get_num_retained() == n);
|
|
135
|
-
REQUIRE(sketch.
|
|
136
|
-
REQUIRE(sketch.get_quantile(0) ==
|
|
137
|
-
REQUIRE(sketch.
|
|
138
|
-
REQUIRE(sketch.get_quantile(1) == n
|
|
142
|
+
REQUIRE(sketch.get_min_item() == 1);
|
|
143
|
+
REQUIRE(sketch.get_quantile(0) == 1);
|
|
144
|
+
REQUIRE(sketch.get_max_item() == n);
|
|
145
|
+
REQUIRE(sketch.get_quantile(1) == n);
|
|
139
146
|
|
|
140
|
-
const double
|
|
141
|
-
auto quantiles = sketch.get_quantiles(
|
|
147
|
+
const double ranks[3] {0, 0.5, 1};
|
|
148
|
+
auto quantiles = sketch.get_quantiles(ranks, 3);
|
|
142
149
|
REQUIRE(quantiles.size() == 3);
|
|
143
|
-
REQUIRE(quantiles[0] ==
|
|
150
|
+
REQUIRE(quantiles[0] == 1);
|
|
144
151
|
REQUIRE(quantiles[1] == n / 2);
|
|
145
|
-
REQUIRE(quantiles[2] == n
|
|
146
|
-
|
|
147
|
-
for (uint32_t i = 0; i < n; i++) {
|
|
148
|
-
const double true_rank = (double) i / n;
|
|
149
|
-
REQUIRE(sketch.get_rank(static_cast<float>(i)) == true_rank);
|
|
150
|
-
const double true_rank_inclusive = (double) (i + 1) / n;
|
|
151
|
-
REQUIRE(sketch.get_rank<true>(static_cast<float>(i)) == true_rank_inclusive);
|
|
152
|
-
}
|
|
152
|
+
REQUIRE(quantiles[2] == n);
|
|
153
153
|
|
|
154
|
-
//
|
|
154
|
+
// alternative method must produce the same result
|
|
155
155
|
auto quantiles2 = sketch.get_quantiles(3);
|
|
156
156
|
REQUIRE(quantiles2.size() == 3);
|
|
157
157
|
REQUIRE(quantiles[0] == quantiles2[0]);
|
|
158
158
|
REQUIRE(quantiles[1] == quantiles2[1]);
|
|
159
159
|
REQUIRE(quantiles[2] == quantiles2[2]);
|
|
160
|
+
|
|
161
|
+
for (uint32_t i = 1; i <= n; i++) {
|
|
162
|
+
const double true_rank_inclusive = static_cast<double>(i) / n;
|
|
163
|
+
REQUIRE(sketch.get_rank(static_cast<float>(i)) == true_rank_inclusive);
|
|
164
|
+
const double true_rank_exclusive = static_cast<double>(i - 1) / n;
|
|
165
|
+
REQUIRE(sketch.get_rank(static_cast<float>(i), false) == true_rank_exclusive);
|
|
166
|
+
}
|
|
160
167
|
}
|
|
161
168
|
|
|
162
169
|
SECTION("10 items") {
|
|
163
|
-
kll_float_sketch sketch(200, 0);
|
|
170
|
+
kll_float_sketch sketch(200, std::less<float>(), 0);
|
|
164
171
|
sketch.update(1.0f);
|
|
165
172
|
sketch.update(2.0f);
|
|
166
173
|
sketch.update(3.0f);
|
|
@@ -172,23 +179,23 @@ TEST_CASE("kll sketch", "[kll_sketch]") {
|
|
|
172
179
|
sketch.update(9.0f);
|
|
173
180
|
sketch.update(10.0f);
|
|
174
181
|
REQUIRE(sketch.get_quantile(0) == 1.0);
|
|
175
|
-
REQUIRE(sketch.get_quantile(0.5) ==
|
|
182
|
+
REQUIRE(sketch.get_quantile(0.5) == 5.0);
|
|
176
183
|
REQUIRE(sketch.get_quantile(0.99) == 10.0);
|
|
177
184
|
REQUIRE(sketch.get_quantile(1) == 10.0);
|
|
178
185
|
}
|
|
179
186
|
|
|
180
187
|
SECTION("100 items") {
|
|
181
|
-
kll_float_sketch sketch(200, 0);
|
|
188
|
+
kll_float_sketch sketch(200, std::less<float>(), 0);
|
|
182
189
|
for (int i = 0; i < 100; ++i) sketch.update(static_cast<float>(i));
|
|
183
190
|
REQUIRE(sketch.get_quantile(0) == 0);
|
|
184
|
-
REQUIRE(sketch.get_quantile(0.01) ==
|
|
185
|
-
REQUIRE(sketch.get_quantile(0.5) ==
|
|
186
|
-
REQUIRE(sketch.get_quantile(0.99) ==
|
|
191
|
+
REQUIRE(sketch.get_quantile(0.01) == 0);
|
|
192
|
+
REQUIRE(sketch.get_quantile(0.5) == 49);
|
|
193
|
+
REQUIRE(sketch.get_quantile(0.99) == 98.0);
|
|
187
194
|
REQUIRE(sketch.get_quantile(1) == 99.0);
|
|
188
195
|
}
|
|
189
196
|
|
|
190
197
|
SECTION("many items, estimation mode") {
|
|
191
|
-
kll_float_sketch sketch(200, 0);
|
|
198
|
+
kll_float_sketch sketch(200, std::less<float>(), 0);
|
|
192
199
|
const int n = 1000000;
|
|
193
200
|
for (int i = 0; i < n; i++) {
|
|
194
201
|
sketch.update(static_cast<float>(i));
|
|
@@ -196,87 +203,62 @@ TEST_CASE("kll sketch", "[kll_sketch]") {
|
|
|
196
203
|
}
|
|
197
204
|
REQUIRE_FALSE(sketch.is_empty());
|
|
198
205
|
REQUIRE(sketch.is_estimation_mode());
|
|
199
|
-
REQUIRE(sketch.
|
|
200
|
-
REQUIRE(sketch.
|
|
201
|
-
REQUIRE(sketch.get_max_value() == n - 1); // max value is exact
|
|
202
|
-
REQUIRE(sketch.get_quantile(1) == n - 1); // max value is exact
|
|
206
|
+
REQUIRE(sketch.get_min_item() == 0.0); // min value is exact
|
|
207
|
+
REQUIRE(sketch.get_max_item() == n - 1); // max value is exact
|
|
203
208
|
|
|
204
209
|
// test rank
|
|
205
210
|
for (int i = 0; i < n; i++) {
|
|
206
211
|
const double trueRank = (double) i / n;
|
|
207
|
-
REQUIRE(sketch.get_rank(static_cast<float>(i)) == Approx(trueRank).margin(RANK_EPS_FOR_K_200));
|
|
208
|
-
}
|
|
209
|
-
|
|
210
|
-
// test quantiles at every 0.1 percentage point
|
|
211
|
-
double fractions[1001];
|
|
212
|
-
double reverse_fractions[1001]; // check that ordering does not matter
|
|
213
|
-
for (int i = 0; i < 1001; i++) {
|
|
214
|
-
fractions[i] = (double) i / 1000;
|
|
215
|
-
reverse_fractions[1000 - i] = fractions[i];
|
|
216
|
-
}
|
|
217
|
-
auto quantiles = sketch.get_quantiles(fractions, 1001);
|
|
218
|
-
auto reverse_quantiles = sketch.get_quantiles(reverse_fractions, 1001);
|
|
219
|
-
float previous_quantile(0);
|
|
220
|
-
for (int i = 0; i < 1001; i++) {
|
|
221
|
-
// expensive in a loop, just to check the equivalence here, not advised for real code
|
|
222
|
-
const float quantile = sketch.get_quantile(fractions[i]);
|
|
223
|
-
REQUIRE(quantiles[i] == quantile);
|
|
224
|
-
REQUIRE(reverse_quantiles[1000 - i] == quantile);
|
|
225
|
-
REQUIRE(previous_quantile <= quantile);
|
|
226
|
-
previous_quantile = quantile;
|
|
212
|
+
REQUIRE(sketch.get_rank(static_cast<float>(i), false) == Approx(trueRank).margin(RANK_EPS_FOR_K_200));
|
|
227
213
|
}
|
|
228
214
|
|
|
229
215
|
//std::cout << sketch.to_string();
|
|
230
216
|
|
|
231
217
|
uint32_t count = 0;
|
|
232
218
|
uint64_t total_weight = 0;
|
|
233
|
-
for (auto
|
|
219
|
+
for (auto pair: sketch) {
|
|
234
220
|
++count;
|
|
235
|
-
total_weight +=
|
|
221
|
+
total_weight += pair.second;
|
|
236
222
|
}
|
|
237
223
|
REQUIRE(count == sketch.get_num_retained());
|
|
238
224
|
REQUIRE(total_weight == sketch.get_n());
|
|
239
225
|
}
|
|
240
226
|
|
|
241
|
-
SECTION("consistency between get_rank
|
|
242
|
-
kll_float_sketch sketch(200, 0);
|
|
243
|
-
const int n =
|
|
227
|
+
SECTION("consistency between get_rank and get_PMF/CDF") {
|
|
228
|
+
kll_float_sketch sketch(200, std::less<float>(), 0);
|
|
229
|
+
const int n = 200;
|
|
244
230
|
float values[n];
|
|
245
231
|
for (int i = 0; i < n; i++) {
|
|
246
232
|
sketch.update(static_cast<float>(i));
|
|
247
233
|
values[i] = static_cast<float>(i);
|
|
248
234
|
}
|
|
249
|
-
{ // inclusive=false
|
|
250
|
-
const auto ranks(sketch.get_CDF(values, n));
|
|
251
|
-
const auto pmf(sketch.get_PMF(values, n));
|
|
235
|
+
{ // inclusive=false
|
|
236
|
+
const auto ranks(sketch.get_CDF(values, n, false));
|
|
237
|
+
const auto pmf(sketch.get_PMF(values, n, false));
|
|
252
238
|
|
|
253
239
|
double subtotal_pmf = 0;
|
|
254
240
|
for (int i = 0; i < n; i++) {
|
|
255
|
-
if (sketch.get_rank(values[i]) != ranks[i]) {
|
|
256
|
-
|
|
257
|
-
REQUIRE(sketch.get_rank(values[i]) == ranks[i]);
|
|
241
|
+
if (sketch.get_rank(values[i], false) != ranks[i]) {
|
|
242
|
+
FAIL("checking rank vs CDF for value " + std::to_string(i));
|
|
258
243
|
}
|
|
259
244
|
subtotal_pmf += pmf[i];
|
|
260
245
|
if (abs(ranks[i] - subtotal_pmf) > NUMERIC_NOISE_TOLERANCE) {
|
|
261
|
-
|
|
262
|
-
REQUIRE(ranks[i] == Approx(subtotal_pmf).margin(NUMERIC_NOISE_TOLERANCE));
|
|
246
|
+
FAIL("CDF vs PMF for value " + std::to_string(i));
|
|
263
247
|
}
|
|
264
248
|
}
|
|
265
249
|
}
|
|
266
|
-
{ // inclusive=true
|
|
267
|
-
const auto ranks(sketch.get_CDF
|
|
268
|
-
const auto pmf(sketch.get_PMF
|
|
250
|
+
{ // inclusive=true (default)
|
|
251
|
+
const auto ranks(sketch.get_CDF(values, n));
|
|
252
|
+
const auto pmf(sketch.get_PMF(values, n));
|
|
269
253
|
|
|
270
254
|
double subtotal_pmf = 0;
|
|
271
255
|
for (int i = 0; i < n; i++) {
|
|
272
|
-
if (sketch.get_rank
|
|
273
|
-
|
|
274
|
-
REQUIRE(sketch.get_rank(values[i]) == ranks[i]);
|
|
256
|
+
if (sketch.get_rank(values[i]) != ranks[i]) {
|
|
257
|
+
FAIL("checking rank vs CDF for value " + std::to_string(i));
|
|
275
258
|
}
|
|
276
259
|
subtotal_pmf += pmf[i];
|
|
277
260
|
if (abs(ranks[i] - subtotal_pmf) > NUMERIC_NOISE_TOLERANCE) {
|
|
278
|
-
|
|
279
|
-
REQUIRE(ranks[i] == Approx(subtotal_pmf).margin(NUMERIC_NOISE_TOLERANCE));
|
|
261
|
+
FAIL("CDF vs PMF for value " + std::to_string(i));
|
|
280
262
|
}
|
|
281
263
|
}
|
|
282
264
|
}
|
|
@@ -286,151 +268,151 @@ TEST_CASE("kll sketch", "[kll_sketch]") {
|
|
|
286
268
|
std::ifstream is;
|
|
287
269
|
is.exceptions(std::ios::failbit | std::ios::badbit);
|
|
288
270
|
is.open(testBinaryInputPath + "kll_sketch_from_java.sk", std::ios::binary);
|
|
289
|
-
auto sketch = kll_float_sketch::deserialize(is,
|
|
271
|
+
auto sketch = kll_float_sketch::deserialize(is, serde<float>(), std::less<float>(), 0);
|
|
290
272
|
REQUIRE_FALSE(sketch.is_empty());
|
|
291
273
|
REQUIRE(sketch.is_estimation_mode());
|
|
292
274
|
REQUIRE(sketch.get_n() == 1000000);
|
|
293
275
|
REQUIRE(sketch.get_num_retained() == 614);
|
|
294
|
-
REQUIRE(sketch.
|
|
295
|
-
REQUIRE(sketch.
|
|
276
|
+
REQUIRE(sketch.get_min_item() == 0.0);
|
|
277
|
+
REQUIRE(sketch.get_max_item() == 999999.0);
|
|
296
278
|
}
|
|
297
279
|
|
|
298
280
|
SECTION("stream serialize deserialize empty") {
|
|
299
|
-
kll_float_sketch sketch(200, 0);
|
|
281
|
+
kll_float_sketch sketch(200, std::less<float>(), 0);
|
|
300
282
|
std::stringstream s(std::ios::in | std::ios::out | std::ios::binary);
|
|
301
283
|
sketch.serialize(s);
|
|
302
284
|
REQUIRE(static_cast<size_t>(s.tellp()) == sketch.get_serialized_size_bytes());
|
|
303
|
-
auto sketch2 = kll_float_sketch::deserialize(s,
|
|
285
|
+
auto sketch2 = kll_float_sketch::deserialize(s, serde<float>(), std::less<float>(), 0);
|
|
304
286
|
REQUIRE(static_cast<size_t>(s.tellp()) == sketch2.get_serialized_size_bytes());
|
|
305
287
|
REQUIRE(s.tellg() == s.tellp());
|
|
306
288
|
REQUIRE(sketch2.is_empty() == sketch.is_empty());
|
|
307
289
|
REQUIRE(sketch2.is_estimation_mode() == sketch.is_estimation_mode());
|
|
308
290
|
REQUIRE(sketch2.get_n() == sketch.get_n());
|
|
309
291
|
REQUIRE(sketch2.get_num_retained() == sketch.get_num_retained());
|
|
310
|
-
|
|
311
|
-
|
|
292
|
+
REQUIRE_THROWS_AS(sketch2.get_min_item(), std::runtime_error);
|
|
293
|
+
REQUIRE_THROWS_AS(sketch2.get_max_item(), std::runtime_error);
|
|
312
294
|
REQUIRE(sketch2.get_normalized_rank_error(false) == sketch.get_normalized_rank_error(false));
|
|
313
295
|
REQUIRE(sketch2.get_normalized_rank_error(true) == sketch.get_normalized_rank_error(true));
|
|
314
296
|
}
|
|
315
297
|
|
|
316
298
|
SECTION("bytes serialize deserialize empty") {
|
|
317
|
-
kll_float_sketch sketch(200, 0);
|
|
299
|
+
kll_float_sketch sketch(200, std::less<float>(), 0);
|
|
318
300
|
auto bytes = sketch.serialize();
|
|
319
|
-
auto sketch2 = kll_float_sketch::deserialize(bytes.data(), bytes.size(), serde<float>(), 0);
|
|
301
|
+
auto sketch2 = kll_float_sketch::deserialize(bytes.data(), bytes.size(), serde<float>(), std::less<float>(), 0);
|
|
320
302
|
REQUIRE(bytes.size() == sketch.get_serialized_size_bytes());
|
|
321
303
|
REQUIRE(sketch2.is_empty() == sketch.is_empty());
|
|
322
304
|
REQUIRE(sketch2.is_estimation_mode() == sketch.is_estimation_mode());
|
|
323
305
|
REQUIRE(sketch2.get_n() == sketch.get_n());
|
|
324
306
|
REQUIRE(sketch2.get_num_retained() == sketch.get_num_retained());
|
|
325
|
-
|
|
326
|
-
|
|
307
|
+
REQUIRE_THROWS_AS(sketch2.get_min_item(), std::runtime_error);
|
|
308
|
+
REQUIRE_THROWS_AS(sketch2.get_max_item(), std::runtime_error);
|
|
327
309
|
REQUIRE(sketch2.get_normalized_rank_error(false) == sketch.get_normalized_rank_error(false));
|
|
328
310
|
REQUIRE(sketch2.get_normalized_rank_error(true) == sketch.get_normalized_rank_error(true));
|
|
329
311
|
}
|
|
330
312
|
|
|
331
313
|
SECTION("stream serialize deserialize one item") {
|
|
332
|
-
kll_float_sketch sketch(200, 0);
|
|
314
|
+
kll_float_sketch sketch(200, std::less<float>(), 0);
|
|
333
315
|
sketch.update(1.0f);
|
|
334
316
|
std::stringstream s(std::ios::in | std::ios::out | std::ios::binary);
|
|
335
317
|
sketch.serialize(s);
|
|
336
318
|
REQUIRE(static_cast<size_t>(s.tellp()) == sketch.get_serialized_size_bytes());
|
|
337
|
-
auto sketch2 = kll_float_sketch::deserialize(s, serde<float>(), 0);
|
|
319
|
+
auto sketch2 = kll_float_sketch::deserialize(s, serde<float>(), std::less<float>(), 0);
|
|
338
320
|
REQUIRE(static_cast<size_t>(s.tellp()) == sketch2.get_serialized_size_bytes());
|
|
339
321
|
REQUIRE(s.tellg() == s.tellp());
|
|
340
322
|
REQUIRE_FALSE(sketch2.is_empty());
|
|
341
323
|
REQUIRE_FALSE(sketch2.is_estimation_mode());
|
|
342
324
|
REQUIRE(sketch2.get_n() == 1);
|
|
343
325
|
REQUIRE(sketch2.get_num_retained() == 1);
|
|
344
|
-
REQUIRE(sketch2.
|
|
345
|
-
REQUIRE(sketch2.
|
|
326
|
+
REQUIRE(sketch2.get_min_item() == 1.0);
|
|
327
|
+
REQUIRE(sketch2.get_max_item() == 1.0);
|
|
346
328
|
REQUIRE(sketch2.get_quantile(0.5) == 1.0);
|
|
347
|
-
REQUIRE(sketch2.get_rank(1) == 0.0);
|
|
348
|
-
REQUIRE(sketch2.get_rank(2) == 1.0);
|
|
329
|
+
REQUIRE(sketch2.get_rank(1, false) == 0.0);
|
|
330
|
+
REQUIRE(sketch2.get_rank(2, false) == 1.0);
|
|
349
331
|
}
|
|
350
332
|
|
|
351
333
|
SECTION("bytes serialize deserialize one item") {
|
|
352
|
-
kll_float_sketch sketch(200, 0);
|
|
334
|
+
kll_float_sketch sketch(200, std::less<float>(), 0);
|
|
353
335
|
sketch.update(1.0f);
|
|
354
336
|
auto bytes = sketch.serialize();
|
|
355
337
|
REQUIRE(bytes.size() == sketch.get_serialized_size_bytes());
|
|
356
|
-
auto sketch2 = kll_float_sketch::deserialize(bytes.data(), bytes.size(), serde<float>(), 0);
|
|
338
|
+
auto sketch2 = kll_float_sketch::deserialize(bytes.data(), bytes.size(), serde<float>(), std::less<float>(), 0);
|
|
357
339
|
REQUIRE(bytes.size() == sketch2.get_serialized_size_bytes());
|
|
358
340
|
REQUIRE_FALSE(sketch2.is_empty());
|
|
359
341
|
REQUIRE_FALSE(sketch2.is_estimation_mode());
|
|
360
342
|
REQUIRE(sketch2.get_n() == 1);
|
|
361
343
|
REQUIRE(sketch2.get_num_retained() == 1);
|
|
362
|
-
REQUIRE(sketch2.
|
|
363
|
-
REQUIRE(sketch2.
|
|
344
|
+
REQUIRE(sketch2.get_min_item() == 1.0);
|
|
345
|
+
REQUIRE(sketch2.get_max_item() == 1.0);
|
|
364
346
|
REQUIRE(sketch2.get_quantile(0.5) == 1.0);
|
|
365
|
-
REQUIRE(sketch2.get_rank(1) == 0.0);
|
|
366
|
-
REQUIRE(sketch2.get_rank(2) == 1.0);
|
|
347
|
+
REQUIRE(sketch2.get_rank(1, false) == 0.0);
|
|
348
|
+
REQUIRE(sketch2.get_rank(2, false) == 1.0);
|
|
367
349
|
}
|
|
368
350
|
|
|
369
351
|
SECTION("deserialize one item v1") {
|
|
370
352
|
std::ifstream is;
|
|
371
353
|
is.exceptions(std::ios::failbit | std::ios::badbit);
|
|
372
354
|
is.open(testBinaryInputPath + "kll_sketch_float_one_item_v1.sk", std::ios::binary);
|
|
373
|
-
auto sketch = kll_float_sketch::deserialize(is, serde<float>(), 0);
|
|
355
|
+
auto sketch = kll_float_sketch::deserialize(is, serde<float>(), std::less<float>(), 0);
|
|
374
356
|
REQUIRE_FALSE(sketch.is_empty());
|
|
375
357
|
REQUIRE_FALSE(sketch.is_estimation_mode());
|
|
376
358
|
REQUIRE(sketch.get_n() == 1);
|
|
377
359
|
REQUIRE(sketch.get_num_retained() == 1);
|
|
378
|
-
REQUIRE(sketch.
|
|
379
|
-
REQUIRE(sketch.
|
|
360
|
+
REQUIRE(sketch.get_min_item() == 1.0);
|
|
361
|
+
REQUIRE(sketch.get_max_item() == 1.0);
|
|
380
362
|
}
|
|
381
363
|
|
|
382
364
|
SECTION("stream serialize deserialize three items") {
|
|
383
|
-
kll_float_sketch sketch(200, 0);
|
|
365
|
+
kll_float_sketch sketch(200, std::less<float>(), 0);
|
|
384
366
|
sketch.update(1.0f);
|
|
385
367
|
sketch.update(2.0f);
|
|
386
368
|
sketch.update(3.0f);
|
|
387
369
|
std::stringstream s(std::ios::in | std::ios::out | std::ios::binary);
|
|
388
370
|
sketch.serialize(s);
|
|
389
371
|
REQUIRE(static_cast<size_t>(s.tellp()) == sketch.get_serialized_size_bytes());
|
|
390
|
-
auto sketch2 = kll_float_sketch::deserialize(s, serde<float>(), 0);
|
|
372
|
+
auto sketch2 = kll_float_sketch::deserialize(s, serde<float>(), std::less<float>(), 0);
|
|
391
373
|
REQUIRE(static_cast<size_t>(s.tellp()) == sketch2.get_serialized_size_bytes());
|
|
392
374
|
REQUIRE(s.tellg() == s.tellp());
|
|
393
375
|
REQUIRE_FALSE(sketch2.is_empty());
|
|
394
376
|
REQUIRE_FALSE(sketch2.is_estimation_mode());
|
|
395
377
|
REQUIRE(sketch2.get_n() == 3);
|
|
396
378
|
REQUIRE(sketch2.get_num_retained() == 3);
|
|
397
|
-
REQUIRE(sketch2.
|
|
398
|
-
REQUIRE(sketch2.
|
|
379
|
+
REQUIRE(sketch2.get_min_item() == 1.0);
|
|
380
|
+
REQUIRE(sketch2.get_max_item() == 3.0);
|
|
399
381
|
}
|
|
400
382
|
|
|
401
383
|
SECTION("bytes serialize deserialize three items") {
|
|
402
|
-
kll_float_sketch sketch(200, 0);
|
|
384
|
+
kll_float_sketch sketch(200, std::less<float>(), 0);
|
|
403
385
|
sketch.update(1.0f);
|
|
404
386
|
sketch.update(2.0f);
|
|
405
387
|
sketch.update(3.0f);
|
|
406
388
|
auto bytes = sketch.serialize();
|
|
407
389
|
REQUIRE(bytes.size() == sketch.get_serialized_size_bytes());
|
|
408
|
-
auto sketch2 = kll_float_sketch::deserialize(bytes.data(), bytes.size(), serde<float>(), 0);
|
|
390
|
+
auto sketch2 = kll_float_sketch::deserialize(bytes.data(), bytes.size(), serde<float>(), std::less<float>(), 0);
|
|
409
391
|
REQUIRE(bytes.size() == sketch2.get_serialized_size_bytes());
|
|
410
392
|
REQUIRE_FALSE(sketch2.is_empty());
|
|
411
393
|
REQUIRE_FALSE(sketch2.is_estimation_mode());
|
|
412
394
|
REQUIRE(sketch2.get_n() == 3);
|
|
413
395
|
REQUIRE(sketch2.get_num_retained() == 3);
|
|
414
|
-
REQUIRE(sketch2.
|
|
415
|
-
REQUIRE(sketch2.
|
|
396
|
+
REQUIRE(sketch2.get_min_item() == 1.0);
|
|
397
|
+
REQUIRE(sketch2.get_max_item() == 3.0);
|
|
416
398
|
}
|
|
417
399
|
|
|
418
400
|
SECTION("stream serialize deserialize many floats") {
|
|
419
|
-
kll_float_sketch sketch(200, 0);
|
|
401
|
+
kll_float_sketch sketch(200, std::less<float>(), 0);
|
|
420
402
|
const int n = 1000;
|
|
421
403
|
for (int i = 0; i < n; i++) sketch.update(static_cast<float>(i));
|
|
422
404
|
std::stringstream s(std::ios::in | std::ios::out | std::ios::binary);
|
|
423
405
|
sketch.serialize(s);
|
|
424
406
|
REQUIRE(static_cast<size_t>(s.tellp()) == sketch.get_serialized_size_bytes());
|
|
425
|
-
auto sketch2 = kll_float_sketch::deserialize(s, serde<float>(), 0);
|
|
407
|
+
auto sketch2 = kll_float_sketch::deserialize(s, serde<float>(), std::less<float>(), 0);
|
|
426
408
|
REQUIRE(static_cast<size_t>(s.tellp()) == sketch2.get_serialized_size_bytes());
|
|
427
409
|
REQUIRE(s.tellg() == s.tellp());
|
|
428
410
|
REQUIRE(sketch2.is_empty() == sketch.is_empty());
|
|
429
411
|
REQUIRE(sketch2.is_estimation_mode() == sketch.is_estimation_mode());
|
|
430
412
|
REQUIRE(sketch2.get_n() == sketch.get_n());
|
|
431
413
|
REQUIRE(sketch2.get_num_retained() == sketch.get_num_retained());
|
|
432
|
-
REQUIRE(sketch2.
|
|
433
|
-
REQUIRE(sketch2.
|
|
414
|
+
REQUIRE(sketch2.get_min_item() == sketch.get_min_item());
|
|
415
|
+
REQUIRE(sketch2.get_max_item() == sketch.get_max_item());
|
|
434
416
|
REQUIRE(sketch2.get_normalized_rank_error(false) == sketch.get_normalized_rank_error(false));
|
|
435
417
|
REQUIRE(sketch2.get_normalized_rank_error(true) == sketch.get_normalized_rank_error(true));
|
|
436
418
|
REQUIRE(sketch2.get_quantile(0.5) == sketch.get_quantile(0.5));
|
|
@@ -439,27 +421,27 @@ TEST_CASE("kll sketch", "[kll_sketch]") {
|
|
|
439
421
|
}
|
|
440
422
|
|
|
441
423
|
SECTION("bytes serialize deserialize many floats") {
|
|
442
|
-
kll_float_sketch sketch(200, 0);
|
|
424
|
+
kll_float_sketch sketch(200, std::less<float>(), 0);
|
|
443
425
|
const int n = 1000;
|
|
444
426
|
for (int i = 0; i < n; i++) sketch.update(static_cast<float>(i));
|
|
445
427
|
auto bytes = sketch.serialize();
|
|
446
428
|
REQUIRE(bytes.size() == sketch.get_serialized_size_bytes());
|
|
447
|
-
auto sketch2 = kll_float_sketch::deserialize(bytes.data(), bytes.size(), serde<float>(), 0);
|
|
429
|
+
auto sketch2 = kll_float_sketch::deserialize(bytes.data(), bytes.size(), serde<float>(), std::less<float>(), 0);
|
|
448
430
|
REQUIRE(bytes.size() == sketch2.get_serialized_size_bytes());
|
|
449
431
|
REQUIRE(sketch2.is_empty() == sketch.is_empty());
|
|
450
432
|
REQUIRE(sketch2.is_estimation_mode() == sketch.is_estimation_mode());
|
|
451
433
|
REQUIRE(sketch2.get_n() == sketch.get_n());
|
|
452
434
|
REQUIRE(sketch2.get_num_retained() == sketch.get_num_retained());
|
|
453
|
-
REQUIRE(sketch2.
|
|
454
|
-
REQUIRE(sketch2.
|
|
435
|
+
REQUIRE(sketch2.get_min_item() == sketch.get_min_item());
|
|
436
|
+
REQUIRE(sketch2.get_max_item() == sketch.get_max_item());
|
|
455
437
|
REQUIRE(sketch2.get_normalized_rank_error(false) == sketch.get_normalized_rank_error(false));
|
|
456
438
|
REQUIRE(sketch2.get_normalized_rank_error(true) == sketch.get_normalized_rank_error(true));
|
|
457
439
|
REQUIRE(sketch2.get_quantile(0.5) == sketch.get_quantile(0.5));
|
|
458
440
|
REQUIRE(sketch2.get_rank(0) == sketch.get_rank(0));
|
|
459
441
|
REQUIRE(sketch2.get_rank(static_cast<float>(n)) == sketch.get_rank(static_cast<float>(n)));
|
|
460
|
-
REQUIRE_THROWS_AS(
|
|
461
|
-
REQUIRE_THROWS_AS(
|
|
462
|
-
REQUIRE_THROWS_AS(
|
|
442
|
+
REQUIRE_THROWS_AS(kll_float_sketch::deserialize(bytes.data(), 7, serde<float>(), std::less<float>(), 0), std::out_of_range);
|
|
443
|
+
REQUIRE_THROWS_AS(kll_float_sketch::deserialize(bytes.data(), 15, serde<float>(), std::less<float>(), 0), std::out_of_range);
|
|
444
|
+
REQUIRE_THROWS_AS(kll_float_sketch::deserialize(bytes.data(), bytes.size() - 1, serde<float>(), std::less<float>(), 0), std::out_of_range);
|
|
463
445
|
}
|
|
464
446
|
|
|
465
447
|
SECTION("bytes serialize deserialize many ints") {
|
|
@@ -474,8 +456,8 @@ TEST_CASE("kll sketch", "[kll_sketch]") {
|
|
|
474
456
|
REQUIRE(sketch2.is_estimation_mode() == sketch.is_estimation_mode());
|
|
475
457
|
REQUIRE(sketch2.get_n() == sketch.get_n());
|
|
476
458
|
REQUIRE(sketch2.get_num_retained() == sketch.get_num_retained());
|
|
477
|
-
REQUIRE(sketch2.
|
|
478
|
-
REQUIRE(sketch2.
|
|
459
|
+
REQUIRE(sketch2.get_min_item() == sketch.get_min_item());
|
|
460
|
+
REQUIRE(sketch2.get_max_item() == sketch.get_max_item());
|
|
479
461
|
REQUIRE(sketch2.get_normalized_rank_error(false) == sketch.get_normalized_rank_error(false));
|
|
480
462
|
REQUIRE(sketch2.get_normalized_rank_error(true) == sketch.get_normalized_rank_error(true));
|
|
481
463
|
REQUIRE(sketch2.get_quantile(0.5) == sketch.get_quantile(0.5));
|
|
@@ -499,7 +481,7 @@ TEST_CASE("kll sketch", "[kll_sketch]") {
|
|
|
499
481
|
}
|
|
500
482
|
|
|
501
483
|
SECTION("out of order split points, float") {
|
|
502
|
-
kll_float_sketch sketch(200, 0);
|
|
484
|
+
kll_float_sketch sketch(200, std::less<float>(), 0);
|
|
503
485
|
sketch.update(0); // has too be non-empty to reach the check
|
|
504
486
|
float split_points[2] = {1, 0};
|
|
505
487
|
REQUIRE_THROWS_AS(sketch.get_CDF(split_points, 2), std::invalid_argument);
|
|
@@ -513,48 +495,48 @@ TEST_CASE("kll sketch", "[kll_sketch]") {
|
|
|
513
495
|
}
|
|
514
496
|
|
|
515
497
|
SECTION("NaN split point") {
|
|
516
|
-
kll_float_sketch sketch(200, 0);
|
|
498
|
+
kll_float_sketch sketch(200, std::less<float>(), 0);
|
|
517
499
|
sketch.update(0); // has too be non-empty to reach the check
|
|
518
500
|
float split_points[1] = {std::numeric_limits<float>::quiet_NaN()};
|
|
519
501
|
REQUIRE_THROWS_AS(sketch.get_CDF(split_points, 1), std::invalid_argument);
|
|
520
502
|
}
|
|
521
503
|
|
|
522
504
|
SECTION("merge") {
|
|
523
|
-
kll_float_sketch sketch1(200, 0);
|
|
524
|
-
kll_float_sketch sketch2(200, 0);
|
|
505
|
+
kll_float_sketch sketch1(200, std::less<float>(), 0);
|
|
506
|
+
kll_float_sketch sketch2(200, std::less<float>(), 0);
|
|
525
507
|
const int n = 10000;
|
|
526
508
|
for (int i = 0; i < n; i++) {
|
|
527
509
|
sketch1.update(static_cast<float>(i));
|
|
528
510
|
sketch2.update(static_cast<float>((2 * n) - i - 1));
|
|
529
511
|
}
|
|
530
512
|
|
|
531
|
-
REQUIRE(sketch1.
|
|
532
|
-
REQUIRE(sketch1.
|
|
533
|
-
REQUIRE(sketch2.
|
|
534
|
-
REQUIRE(sketch2.
|
|
513
|
+
REQUIRE(sketch1.get_min_item() == 0.0f);
|
|
514
|
+
REQUIRE(sketch1.get_max_item() == n - 1);
|
|
515
|
+
REQUIRE(sketch2.get_min_item() == n);
|
|
516
|
+
REQUIRE(sketch2.get_max_item() == 2.0f * n - 1);
|
|
535
517
|
|
|
536
518
|
sketch1.merge(sketch2);
|
|
537
519
|
|
|
538
520
|
REQUIRE_FALSE(sketch1.is_empty());
|
|
539
521
|
REQUIRE(sketch1.get_n() == 2 * n);
|
|
540
|
-
REQUIRE(sketch1.
|
|
541
|
-
REQUIRE(sketch1.
|
|
522
|
+
REQUIRE(sketch1.get_min_item() == 0.0f);
|
|
523
|
+
REQUIRE(sketch1.get_max_item() == 2.0f * n - 1);
|
|
542
524
|
REQUIRE(sketch1.get_quantile(0.5) == Approx(n).margin(n * RANK_EPS_FOR_K_200));
|
|
543
525
|
}
|
|
544
526
|
|
|
545
527
|
SECTION("merge lower k") {
|
|
546
|
-
kll_float_sketch sketch1(256, 0);
|
|
547
|
-
kll_float_sketch sketch2(128, 0);
|
|
528
|
+
kll_float_sketch sketch1(256, std::less<float>(), 0);
|
|
529
|
+
kll_float_sketch sketch2(128, std::less<float>(), 0);
|
|
548
530
|
const int n = 10000;
|
|
549
531
|
for (int i = 0; i < n; i++) {
|
|
550
532
|
sketch1.update(static_cast<float>(i));
|
|
551
533
|
sketch2.update(static_cast<float>((2 * n) - i - 1));
|
|
552
534
|
}
|
|
553
535
|
|
|
554
|
-
REQUIRE(sketch1.
|
|
555
|
-
REQUIRE(sketch1.
|
|
556
|
-
REQUIRE(sketch2.
|
|
557
|
-
REQUIRE(sketch2.
|
|
536
|
+
REQUIRE(sketch1.get_min_item() == 0.0f);
|
|
537
|
+
REQUIRE(sketch1.get_max_item() == n - 1);
|
|
538
|
+
REQUIRE(sketch2.get_min_item() == n);
|
|
539
|
+
REQUIRE(sketch2.get_max_item() == 2.0f * n - 1);
|
|
558
540
|
|
|
559
541
|
REQUIRE(sketch1.get_k() == 256);
|
|
560
542
|
REQUIRE(sketch2.get_k() == 128);
|
|
@@ -570,14 +552,14 @@ TEST_CASE("kll sketch", "[kll_sketch]") {
|
|
|
570
552
|
|
|
571
553
|
REQUIRE_FALSE(sketch1.is_empty());
|
|
572
554
|
REQUIRE(sketch1.get_n() == 2 * n);
|
|
573
|
-
REQUIRE(sketch1.
|
|
574
|
-
REQUIRE(sketch1.
|
|
555
|
+
REQUIRE(sketch1.get_min_item() == 0.0f);
|
|
556
|
+
REQUIRE(sketch1.get_max_item() == 2.0f * n - 1);
|
|
575
557
|
REQUIRE(sketch1.get_quantile(0.5) == Approx(n).margin(n * RANK_EPS_FOR_K_200));
|
|
576
558
|
}
|
|
577
559
|
|
|
578
560
|
SECTION("merge exact mode, lower k") {
|
|
579
|
-
kll_float_sketch sketch1(256, 0);
|
|
580
|
-
kll_float_sketch sketch2(128, 0);
|
|
561
|
+
kll_float_sketch sketch1(256, std::less<float>(), 0);
|
|
562
|
+
kll_float_sketch sketch2(128, std::less<float>(), 0);
|
|
581
563
|
const int n = 10000;
|
|
582
564
|
for (int i = 0; i < n; i++) {
|
|
583
565
|
sketch1.update(static_cast<float>(i));
|
|
@@ -590,8 +572,8 @@ TEST_CASE("kll sketch", "[kll_sketch]") {
|
|
|
590
572
|
|
|
591
573
|
REQUIRE_FALSE(sketch1.is_empty());
|
|
592
574
|
REQUIRE(sketch1.get_n() == n);
|
|
593
|
-
REQUIRE(sketch1.
|
|
594
|
-
REQUIRE(sketch1.
|
|
575
|
+
REQUIRE(sketch1.get_min_item() == 0.0f);
|
|
576
|
+
REQUIRE(sketch1.get_max_item() == n - 1);
|
|
595
577
|
REQUIRE(sketch1.get_quantile(0.5) == Approx(n / 2).margin(n / 2 * RANK_EPS_FOR_K_200));
|
|
596
578
|
|
|
597
579
|
sketch2.update(0);
|
|
@@ -601,29 +583,29 @@ TEST_CASE("kll sketch", "[kll_sketch]") {
|
|
|
601
583
|
}
|
|
602
584
|
|
|
603
585
|
SECTION("merge min value from other") {
|
|
604
|
-
kll_float_sketch sketch1(200, 0);
|
|
605
|
-
kll_float_sketch sketch2(200, 0);
|
|
586
|
+
kll_float_sketch sketch1(200, std::less<float>(), 0);
|
|
587
|
+
kll_float_sketch sketch2(200, std::less<float>(), 0);
|
|
606
588
|
sketch1.update(1.0f);
|
|
607
589
|
sketch2.update(2.0f);
|
|
608
590
|
sketch2.merge(sketch1);
|
|
609
|
-
REQUIRE(sketch2.
|
|
610
|
-
REQUIRE(sketch2.
|
|
591
|
+
REQUIRE(sketch2.get_min_item() == 1.0f);
|
|
592
|
+
REQUIRE(sketch2.get_max_item() == 2.0f);
|
|
611
593
|
}
|
|
612
594
|
|
|
613
595
|
SECTION("merge min and max values from other") {
|
|
614
|
-
kll_float_sketch sketch1(200, 0);
|
|
596
|
+
kll_float_sketch sketch1(200, std::less<float>(), 0);
|
|
615
597
|
for (int i = 0; i < 1000000; i++) sketch1.update(static_cast<float>(i));
|
|
616
|
-
kll_float_sketch sketch2(200, 0);
|
|
598
|
+
kll_float_sketch sketch2(200, std::less<float>(), 0);
|
|
617
599
|
sketch2.merge(sketch1);
|
|
618
|
-
REQUIRE(sketch2.
|
|
619
|
-
REQUIRE(sketch2.
|
|
600
|
+
REQUIRE(sketch2.get_min_item() == 0.0f);
|
|
601
|
+
REQUIRE(sketch2.get_max_item() == 999999.0f);
|
|
620
602
|
}
|
|
621
603
|
|
|
622
604
|
SECTION("sketch of ints") {
|
|
623
605
|
kll_sketch<int> sketch;
|
|
624
606
|
REQUIRE_THROWS_AS(sketch.get_quantile(0), std::runtime_error);
|
|
625
|
-
REQUIRE_THROWS_AS(sketch.
|
|
626
|
-
REQUIRE_THROWS_AS(sketch.
|
|
607
|
+
REQUIRE_THROWS_AS(sketch.get_min_item(), std::runtime_error);
|
|
608
|
+
REQUIRE_THROWS_AS(sketch.get_max_item(), std::runtime_error);
|
|
627
609
|
|
|
628
610
|
const int n = 1000;
|
|
629
611
|
for (int i = 0; i < n; i++) sketch.update(i);
|
|
@@ -638,8 +620,8 @@ TEST_CASE("kll sketch", "[kll_sketch]") {
|
|
|
638
620
|
REQUIRE(sketch2.is_estimation_mode() == sketch.is_estimation_mode());
|
|
639
621
|
REQUIRE(sketch2.get_n() == sketch.get_n());
|
|
640
622
|
REQUIRE(sketch2.get_num_retained() == sketch.get_num_retained());
|
|
641
|
-
REQUIRE(sketch2.
|
|
642
|
-
REQUIRE(sketch2.
|
|
623
|
+
REQUIRE(sketch2.get_min_item() == sketch.get_min_item());
|
|
624
|
+
REQUIRE(sketch2.get_max_item() == sketch.get_max_item());
|
|
643
625
|
REQUIRE(sketch2.get_normalized_rank_error(false) == sketch.get_normalized_rank_error(false));
|
|
644
626
|
REQUIRE(sketch2.get_normalized_rank_error(true) == sketch.get_normalized_rank_error(true));
|
|
645
627
|
REQUIRE(sketch2.get_quantile(0.5) == sketch.get_quantile(0.5));
|
|
@@ -648,30 +630,30 @@ TEST_CASE("kll sketch", "[kll_sketch]") {
|
|
|
648
630
|
}
|
|
649
631
|
|
|
650
632
|
SECTION("sketch of strings stream") {
|
|
651
|
-
kll_string_sketch sketch1(200, 0);
|
|
633
|
+
kll_string_sketch sketch1(200, std::less<std::string>(), 0);
|
|
652
634
|
REQUIRE_THROWS_AS(sketch1.get_quantile(0), std::runtime_error);
|
|
653
|
-
REQUIRE_THROWS_AS(sketch1.
|
|
654
|
-
REQUIRE_THROWS_AS(sketch1.
|
|
635
|
+
REQUIRE_THROWS_AS(sketch1.get_min_item(), std::runtime_error);
|
|
636
|
+
REQUIRE_THROWS_AS(sketch1.get_max_item(), std::runtime_error);
|
|
655
637
|
REQUIRE(sketch1.get_serialized_size_bytes() == 8);
|
|
656
638
|
|
|
657
639
|
const int n = 1000;
|
|
658
640
|
for (int i = 0; i < n; i++) sketch1.update(std::to_string(i));
|
|
659
641
|
|
|
660
|
-
REQUIRE(sketch1.
|
|
661
|
-
REQUIRE(sketch1.
|
|
642
|
+
REQUIRE(sketch1.get_min_item() == std::string("0"));
|
|
643
|
+
REQUIRE(sketch1.get_max_item() == std::string("999"));
|
|
662
644
|
|
|
663
645
|
std::stringstream s(std::ios::in | std::ios::out | std::ios::binary);
|
|
664
646
|
sketch1.serialize(s);
|
|
665
647
|
REQUIRE(static_cast<size_t>(s.tellp()) == sketch1.get_serialized_size_bytes());
|
|
666
|
-
auto sketch2 = kll_string_sketch::deserialize(s,
|
|
648
|
+
auto sketch2 = kll_string_sketch::deserialize(s, serde<std::string>(), std::less<std::string>(), 0);
|
|
667
649
|
REQUIRE(static_cast<size_t>(s.tellp()) == sketch2.get_serialized_size_bytes());
|
|
668
650
|
REQUIRE(s.tellg() == s.tellp());
|
|
669
651
|
REQUIRE(sketch2.is_empty() == sketch1.is_empty());
|
|
670
652
|
REQUIRE(sketch2.is_estimation_mode() == sketch1.is_estimation_mode());
|
|
671
653
|
REQUIRE(sketch2.get_n() == sketch1.get_n());
|
|
672
654
|
REQUIRE(sketch2.get_num_retained() == sketch1.get_num_retained());
|
|
673
|
-
REQUIRE(sketch2.
|
|
674
|
-
REQUIRE(sketch2.
|
|
655
|
+
REQUIRE(sketch2.get_min_item() == sketch1.get_min_item());
|
|
656
|
+
REQUIRE(sketch2.get_max_item() == sketch1.get_max_item());
|
|
675
657
|
REQUIRE(sketch2.get_normalized_rank_error(false) == sketch1.get_normalized_rank_error(false));
|
|
676
658
|
REQUIRE(sketch2.get_normalized_rank_error(true) == sketch1.get_normalized_rank_error(true));
|
|
677
659
|
REQUIRE(sketch2.get_quantile(0.5) == sketch1.get_quantile(0.5));
|
|
@@ -687,28 +669,29 @@ TEST_CASE("kll sketch", "[kll_sketch]") {
|
|
|
687
669
|
}
|
|
688
670
|
|
|
689
671
|
SECTION("sketch of strings bytes") {
|
|
690
|
-
kll_string_sketch sketch1(200, 0);
|
|
672
|
+
kll_string_sketch sketch1(200, std::less<std::string>(), 0);
|
|
691
673
|
REQUIRE_THROWS_AS(sketch1.get_quantile(0), std::runtime_error);
|
|
692
|
-
REQUIRE_THROWS_AS(sketch1.
|
|
693
|
-
REQUIRE_THROWS_AS(sketch1.
|
|
674
|
+
REQUIRE_THROWS_AS(sketch1.get_min_item(), std::runtime_error);
|
|
675
|
+
REQUIRE_THROWS_AS(sketch1.get_max_item(), std::runtime_error);
|
|
694
676
|
REQUIRE(sketch1.get_serialized_size_bytes() == 8);
|
|
695
677
|
|
|
696
678
|
const int n = 1000;
|
|
697
679
|
for (int i = 0; i < n; i++) sketch1.update(std::to_string(i));
|
|
698
680
|
|
|
699
|
-
REQUIRE(sketch1.
|
|
700
|
-
REQUIRE(sketch1.
|
|
681
|
+
REQUIRE(sketch1.get_min_item() == std::string("0"));
|
|
682
|
+
REQUIRE(sketch1.get_max_item() == std::string("999"));
|
|
701
683
|
|
|
702
684
|
auto bytes = sketch1.serialize();
|
|
703
685
|
REQUIRE(bytes.size() == sketch1.get_serialized_size_bytes());
|
|
704
|
-
auto sketch2 = kll_string_sketch::deserialize(bytes.data(), bytes.size(), serde<std::string>(),
|
|
686
|
+
auto sketch2 = kll_string_sketch::deserialize(bytes.data(), bytes.size(), serde<std::string>(),
|
|
687
|
+
std::less<std::string>(), 0);
|
|
705
688
|
REQUIRE(bytes.size() == sketch2.get_serialized_size_bytes());
|
|
706
689
|
REQUIRE(sketch2.is_empty() == sketch1.is_empty());
|
|
707
690
|
REQUIRE(sketch2.is_estimation_mode() == sketch1.is_estimation_mode());
|
|
708
691
|
REQUIRE(sketch2.get_n() == sketch1.get_n());
|
|
709
692
|
REQUIRE(sketch2.get_num_retained() == sketch1.get_num_retained());
|
|
710
|
-
REQUIRE(sketch2.
|
|
711
|
-
REQUIRE(sketch2.
|
|
693
|
+
REQUIRE(sketch2.get_min_item() == sketch1.get_min_item());
|
|
694
|
+
REQUIRE(sketch2.get_max_item() == sketch1.get_max_item());
|
|
712
695
|
REQUIRE(sketch2.get_normalized_rank_error(false) == sketch1.get_normalized_rank_error(false));
|
|
713
696
|
REQUIRE(sketch2.get_normalized_rank_error(true) == sketch1.get_normalized_rank_error(true));
|
|
714
697
|
REQUIRE(sketch2.get_quantile(0.5) == sketch1.get_quantile(0.5));
|
|
@@ -718,11 +701,12 @@ TEST_CASE("kll sketch", "[kll_sketch]") {
|
|
|
718
701
|
|
|
719
702
|
|
|
720
703
|
SECTION("sketch of strings, single item, bytes") {
|
|
721
|
-
kll_string_sketch sketch1(200, 0);
|
|
704
|
+
kll_string_sketch sketch1(200, std::less<std::string>(), 0);
|
|
722
705
|
sketch1.update("a");
|
|
723
706
|
auto bytes = sketch1.serialize();
|
|
724
707
|
REQUIRE(bytes.size() == sketch1.get_serialized_size_bytes());
|
|
725
|
-
auto sketch2 = kll_string_sketch::deserialize(bytes.data(), bytes.size(), serde<std::string>(),
|
|
708
|
+
auto sketch2 = kll_string_sketch::deserialize(bytes.data(), bytes.size(), serde<std::string>(),
|
|
709
|
+
std::less<std::string>(), 0);
|
|
726
710
|
REQUIRE(bytes.size() == sketch2.get_serialized_size_bytes());
|
|
727
711
|
}
|
|
728
712
|
|
|
@@ -753,14 +737,14 @@ TEST_CASE("kll sketch", "[kll_sketch]") {
|
|
|
753
737
|
// move constructor
|
|
754
738
|
kll_sketch<int> sketch2(std::move(sketch1));
|
|
755
739
|
for (int i = 0; i < n; i++) {
|
|
756
|
-
REQUIRE(sketch2.get_rank(i) == (double) i / n);
|
|
740
|
+
REQUIRE(sketch2.get_rank(i, false) == (double) i / n);
|
|
757
741
|
}
|
|
758
742
|
|
|
759
743
|
// move assignment
|
|
760
744
|
kll_sketch<int> sketch3;
|
|
761
745
|
sketch3 = std::move(sketch2);
|
|
762
746
|
for (int i = 0; i < n; i++) {
|
|
763
|
-
REQUIRE(sketch3.get_rank(i) == (double) i / n);
|
|
747
|
+
REQUIRE(sketch3.get_rank(i, false) == (double) i / n);
|
|
764
748
|
}
|
|
765
749
|
}
|
|
766
750
|
|
|
@@ -795,44 +779,24 @@ TEST_CASE("kll sketch", "[kll_sketch]") {
|
|
|
795
779
|
kll.update(3);
|
|
796
780
|
kll.update(1);
|
|
797
781
|
|
|
798
|
-
{
|
|
799
|
-
auto view = kll.get_sorted_view(
|
|
782
|
+
{
|
|
783
|
+
auto view = kll.get_sorted_view();
|
|
800
784
|
REQUIRE(view.size() == 3);
|
|
801
785
|
auto it = view.begin();
|
|
802
|
-
REQUIRE(it->first == 1);
|
|
786
|
+
REQUIRE(it->first == 1); // operator->
|
|
787
|
+
REQUIRE((*it).first == 1); // operator*
|
|
803
788
|
REQUIRE(it->second == 1);
|
|
789
|
+
REQUIRE(it.get_weight() == 1);
|
|
804
790
|
++it;
|
|
805
791
|
REQUIRE(it->first == 2);
|
|
806
|
-
REQUIRE(it->second == 1);
|
|
807
|
-
++it;
|
|
808
|
-
REQUIRE(it->first == 3);
|
|
809
|
-
REQUIRE(it->second == 1);
|
|
810
|
-
}
|
|
811
|
-
{ // cumulative, non-inclusive, using operator->
|
|
812
|
-
auto view = kll.get_sorted_view(true);
|
|
813
|
-
REQUIRE(view.size() == 3);
|
|
814
|
-
auto it = view.begin();
|
|
815
|
-
REQUIRE(it->first == 1);
|
|
816
|
-
REQUIRE(it->second == 0);
|
|
817
|
-
++it;
|
|
818
|
-
REQUIRE(it->first == 2);
|
|
819
|
-
REQUIRE(it->second == 1);
|
|
820
|
-
++it;
|
|
821
|
-
REQUIRE(it->first == 3);
|
|
822
792
|
REQUIRE(it->second == 2);
|
|
823
|
-
|
|
824
|
-
{ // cumulative, inclusive, using operator*
|
|
825
|
-
auto view = kll.get_sorted_view<true>(true);
|
|
826
|
-
REQUIRE(view.size() == 3);
|
|
827
|
-
auto it = view.begin();
|
|
828
|
-
REQUIRE((*it).first == 1);
|
|
829
|
-
REQUIRE((*it).second == 1);
|
|
793
|
+
REQUIRE(it.get_weight() == 1);
|
|
830
794
|
++it;
|
|
831
|
-
REQUIRE(
|
|
832
|
-
REQUIRE(
|
|
795
|
+
REQUIRE(it->first == 3);
|
|
796
|
+
REQUIRE(it->second == 3);
|
|
797
|
+
REQUIRE(it.get_weight() == 1);
|
|
833
798
|
++it;
|
|
834
|
-
REQUIRE(
|
|
835
|
-
REQUIRE((*it).second == 3);
|
|
799
|
+
REQUIRE(it == view.end());
|
|
836
800
|
}
|
|
837
801
|
}
|
|
838
802
|
|
|
@@ -854,8 +818,8 @@ TEST_CASE("kll sketch", "[kll_sketch]") {
|
|
|
854
818
|
REQUIRE(kll_float.get_n() == kll_double.get_n());
|
|
855
819
|
REQUIRE(kll_float.get_num_retained() == kll_double.get_num_retained());
|
|
856
820
|
|
|
857
|
-
auto sv_float = kll_float.get_sorted_view(
|
|
858
|
-
auto sv_double = kll_double.get_sorted_view(
|
|
821
|
+
auto sv_float = kll_float.get_sorted_view();
|
|
822
|
+
auto sv_double = kll_double.get_sorted_view();
|
|
859
823
|
auto sv_float_it = sv_float.begin();
|
|
860
824
|
auto sv_double_it = sv_double.begin();
|
|
861
825
|
while (sv_float_it != sv_float.end()) {
|