datasketches 0.2.6 → 0.3.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +9 -0
- data/LICENSE +4 -6
- data/NOTICE +6 -5
- data/ext/datasketches/kll_wrapper.cpp +20 -20
- data/ext/datasketches/theta_wrapper.cpp +2 -2
- data/lib/datasketches/version.rb +1 -1
- data/vendor/datasketches-cpp/CMakeLists.txt +9 -1
- data/vendor/datasketches-cpp/LICENSE +4 -6
- data/vendor/datasketches-cpp/MANIFEST.in +21 -4
- data/vendor/datasketches-cpp/common/CMakeLists.txt +5 -2
- data/vendor/datasketches-cpp/common/include/common_defs.hpp +10 -0
- data/vendor/datasketches-cpp/common/include/kolmogorov_smirnov_impl.hpp +6 -6
- data/vendor/datasketches-cpp/common/include/memory_operations.hpp +1 -0
- data/vendor/datasketches-cpp/common/include/{quantile_sketch_sorted_view.hpp → quantiles_sorted_view.hpp} +60 -25
- data/vendor/datasketches-cpp/common/include/quantiles_sorted_view_impl.hpp +125 -0
- data/vendor/datasketches-cpp/common/{test/test_runner.cpp → include/version.hpp.in} +15 -8
- data/vendor/datasketches-cpp/common/test/CMakeLists.txt +37 -7
- data/vendor/datasketches-cpp/common/test/catch_runner.cpp +22 -1
- data/vendor/datasketches-cpp/common/test/integration_test.cpp +1 -1
- data/vendor/datasketches-cpp/common/test/quantiles_sorted_view_test.cpp +459 -0
- data/vendor/datasketches-cpp/cpc/test/CMakeLists.txt +1 -1
- data/vendor/datasketches-cpp/cpc/test/compression_test.cpp +1 -1
- data/vendor/datasketches-cpp/cpc/test/cpc_sketch_allocation_test.cpp +1 -1
- data/vendor/datasketches-cpp/cpc/test/cpc_sketch_test.cpp +1 -1
- data/vendor/datasketches-cpp/cpc/test/cpc_union_test.cpp +1 -1
- data/vendor/datasketches-cpp/fi/include/frequent_items_sketch.hpp +28 -44
- data/vendor/datasketches-cpp/fi/include/frequent_items_sketch_impl.hpp +70 -78
- data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map.hpp +11 -4
- data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map_impl.hpp +17 -10
- data/vendor/datasketches-cpp/fi/test/CMakeLists.txt +1 -1
- data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_custom_type_test.cpp +55 -42
- data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_test.cpp +1 -1
- data/vendor/datasketches-cpp/fi/test/reverse_purge_hash_map_test.cpp +4 -4
- data/vendor/datasketches-cpp/hll/include/Hll4Array-internal.hpp +2 -2
- data/vendor/datasketches-cpp/hll/test/AuxHashMapTest.cpp +1 -1
- data/vendor/datasketches-cpp/hll/test/CMakeLists.txt +1 -1
- data/vendor/datasketches-cpp/hll/test/CouponHashSetTest.cpp +1 -1
- data/vendor/datasketches-cpp/hll/test/CouponListTest.cpp +1 -1
- data/vendor/datasketches-cpp/hll/test/CrossCountingTest.cpp +1 -1
- data/vendor/datasketches-cpp/hll/test/HllArrayTest.cpp +1 -1
- data/vendor/datasketches-cpp/hll/test/HllSketchTest.cpp +1 -1
- data/vendor/datasketches-cpp/hll/test/HllUnionTest.cpp +1 -1
- data/vendor/datasketches-cpp/hll/test/IsomorphicTest.cpp +1 -1
- data/vendor/datasketches-cpp/hll/test/TablesTest.cpp +1 -1
- data/vendor/datasketches-cpp/hll/test/ToFromByteArrayTest.cpp +1 -1
- data/vendor/datasketches-cpp/kll/include/kll_helper.hpp +0 -32
- data/vendor/datasketches-cpp/kll/include/kll_sketch.hpp +176 -233
- data/vendor/datasketches-cpp/kll/include/kll_sketch_impl.hpp +337 -395
- data/vendor/datasketches-cpp/kll/test/CMakeLists.txt +1 -1
- data/vendor/datasketches-cpp/kll/test/kll_sketch_custom_type_test.cpp +27 -27
- data/vendor/datasketches-cpp/kll/test/kll_sketch_test.cpp +197 -233
- data/vendor/datasketches-cpp/kll/test/kll_sketch_validation.cpp +42 -32
- data/vendor/datasketches-cpp/kll/test/kolmogorov_smirnov_test.cpp +1 -1
- data/vendor/datasketches-cpp/pyproject.toml +17 -13
- data/vendor/datasketches-cpp/python/CMakeLists.txt +8 -1
- data/vendor/datasketches-cpp/python/README.md +1 -1
- data/vendor/datasketches-cpp/python/datasketches/PySerDe.py +104 -0
- data/vendor/datasketches-cpp/python/datasketches/__init__.py +22 -0
- data/vendor/datasketches-cpp/python/include/py_serde.hpp +113 -0
- data/vendor/datasketches-cpp/python/jupyter/ThetaSketchNotebook.ipynb +31 -24
- data/vendor/datasketches-cpp/python/pybind11Path.cmd +19 -1
- data/vendor/datasketches-cpp/python/src/__init__.py +17 -1
- data/vendor/datasketches-cpp/python/src/datasketches.cpp +9 -3
- data/vendor/datasketches-cpp/python/src/kll_wrapper.cpp +18 -54
- data/vendor/datasketches-cpp/python/src/py_serde.cpp +111 -0
- data/vendor/datasketches-cpp/python/src/quantiles_wrapper.cpp +17 -53
- data/vendor/datasketches-cpp/python/src/req_wrapper.cpp +17 -55
- data/vendor/datasketches-cpp/python/src/vector_of_kll.cpp +62 -67
- data/vendor/datasketches-cpp/python/src/vo_wrapper.cpp +47 -14
- data/vendor/datasketches-cpp/python/tests/__init__.py +16 -0
- data/vendor/datasketches-cpp/python/tests/req_test.py +1 -1
- data/vendor/datasketches-cpp/python/tests/vo_test.py +25 -1
- data/vendor/datasketches-cpp/quantiles/include/quantiles_sketch.hpp +135 -180
- data/vendor/datasketches-cpp/quantiles/include/quantiles_sketch_impl.hpp +205 -210
- data/vendor/datasketches-cpp/quantiles/test/CMakeLists.txt +1 -1
- data/vendor/datasketches-cpp/quantiles/test/kolmogorov_smirnov_test.cpp +1 -1
- data/vendor/datasketches-cpp/quantiles/test/quantiles_compatibility_test.cpp +20 -19
- data/vendor/datasketches-cpp/quantiles/test/quantiles_sketch_test.cpp +241 -233
- data/vendor/datasketches-cpp/req/include/req_compactor.hpp +15 -9
- data/vendor/datasketches-cpp/req/include/req_compactor_impl.hpp +35 -19
- data/vendor/datasketches-cpp/req/include/req_sketch.hpp +126 -147
- data/vendor/datasketches-cpp/req/include/req_sketch_impl.hpp +265 -245
- data/vendor/datasketches-cpp/req/test/CMakeLists.txt +1 -1
- data/vendor/datasketches-cpp/req/test/req_sketch_custom_type_test.cpp +27 -27
- data/vendor/datasketches-cpp/req/test/req_sketch_test.cpp +117 -104
- data/vendor/datasketches-cpp/sampling/include/var_opt_sketch.hpp +22 -46
- data/vendor/datasketches-cpp/sampling/include/var_opt_sketch_impl.hpp +180 -207
- data/vendor/datasketches-cpp/sampling/include/var_opt_union.hpp +18 -39
- data/vendor/datasketches-cpp/sampling/include/var_opt_union_impl.hpp +75 -85
- data/vendor/datasketches-cpp/sampling/test/CMakeLists.txt +1 -1
- data/vendor/datasketches-cpp/sampling/test/var_opt_allocation_test.cpp +7 -7
- data/vendor/datasketches-cpp/sampling/test/var_opt_sketch_test.cpp +3 -3
- data/vendor/datasketches-cpp/sampling/test/var_opt_union_test.cpp +5 -5
- data/vendor/datasketches-cpp/setup.py +14 -3
- data/vendor/datasketches-cpp/theta/include/theta_sketch_impl.hpp +15 -25
- data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base.hpp +0 -9
- data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base_impl.hpp +5 -5
- data/vendor/datasketches-cpp/theta/test/CMakeLists.txt +1 -1
- data/vendor/datasketches-cpp/theta/test/theta_a_not_b_test.cpp +1 -1
- data/vendor/datasketches-cpp/theta/test/theta_intersection_test.cpp +1 -1
- data/vendor/datasketches-cpp/theta/test/theta_jaccard_similarity_test.cpp +1 -1
- data/vendor/datasketches-cpp/theta/test/theta_setop_test.cpp +1 -1
- data/vendor/datasketches-cpp/theta/test/theta_sketch_test.cpp +3 -2
- data/vendor/datasketches-cpp/theta/test/theta_union_test.cpp +1 -1
- data/vendor/datasketches-cpp/tox.ini +26 -0
- data/vendor/datasketches-cpp/tuple/include/tuple_sketch.hpp +36 -12
- data/vendor/datasketches-cpp/tuple/include/tuple_sketch_impl.hpp +41 -35
- data/vendor/datasketches-cpp/tuple/test/CMakeLists.txt +2 -1
- data/vendor/datasketches-cpp/tuple/test/array_of_doubles_sketch_test.cpp +1 -1
- data/vendor/datasketches-cpp/tuple/test/engagement_test.cpp +299 -0
- data/vendor/datasketches-cpp/tuple/test/tuple_a_not_b_test.cpp +1 -1
- data/vendor/datasketches-cpp/tuple/test/tuple_intersection_test.cpp +1 -1
- data/vendor/datasketches-cpp/tuple/test/tuple_jaccard_similarity_test.cpp +1 -1
- data/vendor/datasketches-cpp/tuple/test/tuple_sketch_allocation_test.cpp +1 -1
- data/vendor/datasketches-cpp/tuple/test/tuple_sketch_test.cpp +27 -1
- data/vendor/datasketches-cpp/tuple/test/tuple_union_test.cpp +1 -1
- data/vendor/datasketches-cpp/version.cfg.in +1 -0
- metadata +14 -7
- data/vendor/datasketches-cpp/common/include/quantile_sketch_sorted_view_impl.hpp +0 -91
- data/vendor/datasketches-cpp/common/test/catch.hpp +0 -17618
@@ -17,7 +17,7 @@
|
|
17
17
|
* under the License.
|
18
18
|
*/
|
19
19
|
|
20
|
-
#include <catch.hpp>
|
20
|
+
#include <catch2/catch.hpp>
|
21
21
|
#include <cmath>
|
22
22
|
#include <cstring>
|
23
23
|
#include <sstream>
|
@@ -39,9 +39,9 @@ static std::string testBinaryInputPath = "test/";
|
|
39
39
|
#endif
|
40
40
|
|
41
41
|
// typical usage would be just kll_sketch<float> or kll_sketch<std::string>, but here we use test_allocator
|
42
|
-
using kll_float_sketch = kll_sketch<float, std::less<float>,
|
42
|
+
using kll_float_sketch = kll_sketch<float, std::less<float>, test_allocator<float>>;
|
43
43
|
// let std::string use the default allocator for simplicity, otherwise we need to define "less" and "serde"
|
44
|
-
using kll_string_sketch = kll_sketch<std::string, std::less<std::string>,
|
44
|
+
using kll_string_sketch = kll_sketch<std::string, std::less<std::string>, test_allocator<std::string>>;
|
45
45
|
|
46
46
|
TEST_CASE("kll sketch", "[kll_sketch]") {
|
47
47
|
|
@@ -49,71 +49,78 @@ TEST_CASE("kll sketch", "[kll_sketch]") {
|
|
49
49
|
test_allocator_total_bytes = 0;
|
50
50
|
|
51
51
|
SECTION("k limits") {
|
52
|
-
kll_float_sketch sketch1(kll_float_sketch::MIN_K, 0); // this should work
|
53
|
-
kll_float_sketch sketch2(kll_float_sketch::MAX_K, 0); // this should work
|
54
|
-
REQUIRE_THROWS_AS(new kll_float_sketch(kll_float_sketch::MIN_K - 1, 0), std::invalid_argument);
|
52
|
+
kll_float_sketch sketch1(kll_float_sketch::MIN_K, std::less<float>(), 0); // this should work
|
53
|
+
kll_float_sketch sketch2(kll_float_sketch::MAX_K, std::less<float>(), 0); // this should work
|
54
|
+
REQUIRE_THROWS_AS(new kll_float_sketch(kll_float_sketch::MIN_K - 1, std::less<float>(), 0), std::invalid_argument);
|
55
55
|
// MAX_K + 1 makes no sense because k is uint16_t
|
56
|
+
//std::cout << "sizeof(kll_sketch<float>)=" << sizeof(kll_sketch<float>) << "\n";
|
57
|
+
//std::cout << "sizeof(kll_sketch<double>)=" << sizeof(kll_sketch<double>) << "\n";
|
56
58
|
}
|
57
59
|
|
58
60
|
SECTION("empty") {
|
59
|
-
kll_float_sketch sketch(200, 0);
|
61
|
+
kll_float_sketch sketch(200, std::less<float>(), 0);
|
60
62
|
REQUIRE(sketch.is_empty());
|
61
63
|
REQUIRE_FALSE(sketch.is_estimation_mode());
|
62
64
|
REQUIRE(sketch.get_n() == 0);
|
63
65
|
REQUIRE(sketch.get_num_retained() == 0);
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
const double
|
69
|
-
|
66
|
+
REQUIRE_THROWS_AS(sketch.get_min_item(), std::runtime_error);
|
67
|
+
REQUIRE_THROWS_AS(sketch.get_max_item(), std::runtime_error);
|
68
|
+
REQUIRE_THROWS_AS(sketch.get_rank(0), std::runtime_error);
|
69
|
+
REQUIRE_THROWS_AS(sketch.get_quantile(0.5), std::runtime_error);
|
70
|
+
const double ranks[3] {0, 0.5, 1};
|
71
|
+
REQUIRE_THROWS_AS(sketch.get_quantiles(ranks, 3), std::runtime_error);
|
70
72
|
const float split_points[1] {0};
|
71
|
-
|
72
|
-
|
73
|
+
REQUIRE_THROWS_AS(sketch.get_PMF(split_points, 1), std::runtime_error);
|
74
|
+
REQUIRE_THROWS_AS(sketch.get_CDF(split_points, 1), std::runtime_error);
|
73
75
|
|
74
|
-
for (auto
|
75
|
-
(
|
76
|
+
for (auto pair: sketch) {
|
77
|
+
unused(pair); // to suppress "unused" warning
|
76
78
|
FAIL("should be no iterations over an empty sketch");
|
77
79
|
}
|
78
80
|
}
|
79
81
|
|
80
82
|
SECTION("get bad quantile") {
|
81
|
-
kll_float_sketch sketch(200, 0);
|
83
|
+
kll_float_sketch sketch(200, std::less<float>(), 0);
|
82
84
|
sketch.update(0); // has to be non-empty to reach the check
|
83
85
|
REQUIRE_THROWS_AS(sketch.get_quantile(-1), std::invalid_argument);
|
84
86
|
}
|
85
87
|
|
86
88
|
SECTION("one item") {
|
87
|
-
kll_float_sketch sketch(200, 0);
|
89
|
+
kll_float_sketch sketch(200, std::less<float>(), 0);
|
88
90
|
sketch.update(1.0f);
|
89
91
|
REQUIRE_FALSE(sketch.is_empty());
|
90
92
|
REQUIRE_FALSE(sketch.is_estimation_mode());
|
91
93
|
REQUIRE(sketch.get_n() == 1);
|
92
94
|
REQUIRE(sketch.get_num_retained() == 1);
|
93
|
-
REQUIRE(sketch.get_rank(1.0f) == 0.0);
|
94
|
-
REQUIRE(sketch.get_rank
|
95
|
-
REQUIRE(sketch.get_rank(2.0f) == 1.0);
|
95
|
+
REQUIRE(sketch.get_rank(1.0f, false) == 0.0);
|
96
|
+
REQUIRE(sketch.get_rank(1.0f) == 1.0);
|
97
|
+
REQUIRE(sketch.get_rank(2.0f, false) == 1.0);
|
96
98
|
REQUIRE(sketch.get_rank(std::numeric_limits<float>::infinity()) == 1.0);
|
97
|
-
REQUIRE(sketch.
|
98
|
-
REQUIRE(sketch.
|
99
|
+
REQUIRE(sketch.get_min_item() == 1.0);
|
100
|
+
REQUIRE(sketch.get_max_item() == 1.0);
|
99
101
|
REQUIRE(sketch.get_quantile(0.5) == 1.0);
|
100
|
-
const double
|
101
|
-
auto quantiles = sketch.get_quantiles(
|
102
|
+
const double ranks[3] {0, 0.5, 1};
|
103
|
+
auto quantiles = sketch.get_quantiles(ranks, 3);
|
102
104
|
REQUIRE(quantiles.size() == 3);
|
103
105
|
REQUIRE(quantiles[0] == 1.0);
|
104
106
|
REQUIRE(quantiles[1] == 1.0);
|
105
107
|
REQUIRE(quantiles[2] == 1.0);
|
106
108
|
|
107
109
|
int count = 0;
|
108
|
-
for (auto
|
109
|
-
REQUIRE(
|
110
|
+
for (auto pair: sketch) {
|
111
|
+
REQUIRE(pair.second == 1);
|
110
112
|
++count;
|
111
113
|
}
|
112
114
|
REQUIRE(count == 1);
|
115
|
+
|
116
|
+
// iterator dereferencing
|
117
|
+
auto it = sketch.begin();
|
118
|
+
REQUIRE(it->first == 1.0f);
|
119
|
+
REQUIRE((*it).first == 1.0f);
|
113
120
|
}
|
114
121
|
|
115
122
|
SECTION("NaN") {
|
116
|
-
kll_float_sketch sketch(200, 0);
|
123
|
+
kll_float_sketch sketch(200, std::less<float>(), 0);
|
117
124
|
sketch.update(std::numeric_limits<float>::quiet_NaN());
|
118
125
|
REQUIRE(sketch.is_empty());
|
119
126
|
|
@@ -123,44 +130,44 @@ TEST_CASE("kll sketch", "[kll_sketch]") {
|
|
123
130
|
}
|
124
131
|
|
125
132
|
SECTION("many items, exact mode") {
|
126
|
-
kll_float_sketch sketch(200, 0);
|
133
|
+
kll_float_sketch sketch(200, std::less<float>(), 0);
|
127
134
|
const uint32_t n = 200;
|
128
|
-
for (uint32_t i =
|
135
|
+
for (uint32_t i = 1; i <= n; i++) {
|
129
136
|
sketch.update(static_cast<float>(i));
|
130
|
-
REQUIRE(sketch.get_n() == i
|
137
|
+
REQUIRE(sketch.get_n() == i);
|
131
138
|
}
|
132
139
|
REQUIRE_FALSE(sketch.is_empty());
|
133
140
|
REQUIRE_FALSE(sketch.is_estimation_mode());
|
134
141
|
REQUIRE(sketch.get_num_retained() == n);
|
135
|
-
REQUIRE(sketch.
|
136
|
-
REQUIRE(sketch.get_quantile(0) ==
|
137
|
-
REQUIRE(sketch.
|
138
|
-
REQUIRE(sketch.get_quantile(1) == n
|
142
|
+
REQUIRE(sketch.get_min_item() == 1);
|
143
|
+
REQUIRE(sketch.get_quantile(0) == 1);
|
144
|
+
REQUIRE(sketch.get_max_item() == n);
|
145
|
+
REQUIRE(sketch.get_quantile(1) == n);
|
139
146
|
|
140
|
-
const double
|
141
|
-
auto quantiles = sketch.get_quantiles(
|
147
|
+
const double ranks[3] {0, 0.5, 1};
|
148
|
+
auto quantiles = sketch.get_quantiles(ranks, 3);
|
142
149
|
REQUIRE(quantiles.size() == 3);
|
143
|
-
REQUIRE(quantiles[0] ==
|
150
|
+
REQUIRE(quantiles[0] == 1);
|
144
151
|
REQUIRE(quantiles[1] == n / 2);
|
145
|
-
REQUIRE(quantiles[2] == n
|
146
|
-
|
147
|
-
for (uint32_t i = 0; i < n; i++) {
|
148
|
-
const double true_rank = (double) i / n;
|
149
|
-
REQUIRE(sketch.get_rank(static_cast<float>(i)) == true_rank);
|
150
|
-
const double true_rank_inclusive = (double) (i + 1) / n;
|
151
|
-
REQUIRE(sketch.get_rank<true>(static_cast<float>(i)) == true_rank_inclusive);
|
152
|
-
}
|
152
|
+
REQUIRE(quantiles[2] == n);
|
153
153
|
|
154
|
-
//
|
154
|
+
// alternative method must produce the same result
|
155
155
|
auto quantiles2 = sketch.get_quantiles(3);
|
156
156
|
REQUIRE(quantiles2.size() == 3);
|
157
157
|
REQUIRE(quantiles[0] == quantiles2[0]);
|
158
158
|
REQUIRE(quantiles[1] == quantiles2[1]);
|
159
159
|
REQUIRE(quantiles[2] == quantiles2[2]);
|
160
|
+
|
161
|
+
for (uint32_t i = 1; i <= n; i++) {
|
162
|
+
const double true_rank_inclusive = static_cast<double>(i) / n;
|
163
|
+
REQUIRE(sketch.get_rank(static_cast<float>(i)) == true_rank_inclusive);
|
164
|
+
const double true_rank_exclusive = static_cast<double>(i - 1) / n;
|
165
|
+
REQUIRE(sketch.get_rank(static_cast<float>(i), false) == true_rank_exclusive);
|
166
|
+
}
|
160
167
|
}
|
161
168
|
|
162
169
|
SECTION("10 items") {
|
163
|
-
kll_float_sketch sketch(200, 0);
|
170
|
+
kll_float_sketch sketch(200, std::less<float>(), 0);
|
164
171
|
sketch.update(1.0f);
|
165
172
|
sketch.update(2.0f);
|
166
173
|
sketch.update(3.0f);
|
@@ -172,23 +179,23 @@ TEST_CASE("kll sketch", "[kll_sketch]") {
|
|
172
179
|
sketch.update(9.0f);
|
173
180
|
sketch.update(10.0f);
|
174
181
|
REQUIRE(sketch.get_quantile(0) == 1.0);
|
175
|
-
REQUIRE(sketch.get_quantile(0.5) ==
|
182
|
+
REQUIRE(sketch.get_quantile(0.5) == 5.0);
|
176
183
|
REQUIRE(sketch.get_quantile(0.99) == 10.0);
|
177
184
|
REQUIRE(sketch.get_quantile(1) == 10.0);
|
178
185
|
}
|
179
186
|
|
180
187
|
SECTION("100 items") {
|
181
|
-
kll_float_sketch sketch(200, 0);
|
188
|
+
kll_float_sketch sketch(200, std::less<float>(), 0);
|
182
189
|
for (int i = 0; i < 100; ++i) sketch.update(static_cast<float>(i));
|
183
190
|
REQUIRE(sketch.get_quantile(0) == 0);
|
184
|
-
REQUIRE(sketch.get_quantile(0.01) ==
|
185
|
-
REQUIRE(sketch.get_quantile(0.5) ==
|
186
|
-
REQUIRE(sketch.get_quantile(0.99) ==
|
191
|
+
REQUIRE(sketch.get_quantile(0.01) == 0);
|
192
|
+
REQUIRE(sketch.get_quantile(0.5) == 49);
|
193
|
+
REQUIRE(sketch.get_quantile(0.99) == 98.0);
|
187
194
|
REQUIRE(sketch.get_quantile(1) == 99.0);
|
188
195
|
}
|
189
196
|
|
190
197
|
SECTION("many items, estimation mode") {
|
191
|
-
kll_float_sketch sketch(200, 0);
|
198
|
+
kll_float_sketch sketch(200, std::less<float>(), 0);
|
192
199
|
const int n = 1000000;
|
193
200
|
for (int i = 0; i < n; i++) {
|
194
201
|
sketch.update(static_cast<float>(i));
|
@@ -196,87 +203,62 @@ TEST_CASE("kll sketch", "[kll_sketch]") {
|
|
196
203
|
}
|
197
204
|
REQUIRE_FALSE(sketch.is_empty());
|
198
205
|
REQUIRE(sketch.is_estimation_mode());
|
199
|
-
REQUIRE(sketch.
|
200
|
-
REQUIRE(sketch.
|
201
|
-
REQUIRE(sketch.get_max_value() == n - 1); // max value is exact
|
202
|
-
REQUIRE(sketch.get_quantile(1) == n - 1); // max value is exact
|
206
|
+
REQUIRE(sketch.get_min_item() == 0.0); // min value is exact
|
207
|
+
REQUIRE(sketch.get_max_item() == n - 1); // max value is exact
|
203
208
|
|
204
209
|
// test rank
|
205
210
|
for (int i = 0; i < n; i++) {
|
206
211
|
const double trueRank = (double) i / n;
|
207
|
-
REQUIRE(sketch.get_rank(static_cast<float>(i)) == Approx(trueRank).margin(RANK_EPS_FOR_K_200));
|
208
|
-
}
|
209
|
-
|
210
|
-
// test quantiles at every 0.1 percentage point
|
211
|
-
double fractions[1001];
|
212
|
-
double reverse_fractions[1001]; // check that ordering does not matter
|
213
|
-
for (int i = 0; i < 1001; i++) {
|
214
|
-
fractions[i] = (double) i / 1000;
|
215
|
-
reverse_fractions[1000 - i] = fractions[i];
|
216
|
-
}
|
217
|
-
auto quantiles = sketch.get_quantiles(fractions, 1001);
|
218
|
-
auto reverse_quantiles = sketch.get_quantiles(reverse_fractions, 1001);
|
219
|
-
float previous_quantile(0);
|
220
|
-
for (int i = 0; i < 1001; i++) {
|
221
|
-
// expensive in a loop, just to check the equivalence here, not advised for real code
|
222
|
-
const float quantile = sketch.get_quantile(fractions[i]);
|
223
|
-
REQUIRE(quantiles[i] == quantile);
|
224
|
-
REQUIRE(reverse_quantiles[1000 - i] == quantile);
|
225
|
-
REQUIRE(previous_quantile <= quantile);
|
226
|
-
previous_quantile = quantile;
|
212
|
+
REQUIRE(sketch.get_rank(static_cast<float>(i), false) == Approx(trueRank).margin(RANK_EPS_FOR_K_200));
|
227
213
|
}
|
228
214
|
|
229
215
|
//std::cout << sketch.to_string();
|
230
216
|
|
231
217
|
uint32_t count = 0;
|
232
218
|
uint64_t total_weight = 0;
|
233
|
-
for (auto
|
219
|
+
for (auto pair: sketch) {
|
234
220
|
++count;
|
235
|
-
total_weight +=
|
221
|
+
total_weight += pair.second;
|
236
222
|
}
|
237
223
|
REQUIRE(count == sketch.get_num_retained());
|
238
224
|
REQUIRE(total_weight == sketch.get_n());
|
239
225
|
}
|
240
226
|
|
241
|
-
SECTION("consistency between get_rank
|
242
|
-
kll_float_sketch sketch(200, 0);
|
243
|
-
const int n =
|
227
|
+
SECTION("consistency between get_rank and get_PMF/CDF") {
|
228
|
+
kll_float_sketch sketch(200, std::less<float>(), 0);
|
229
|
+
const int n = 200;
|
244
230
|
float values[n];
|
245
231
|
for (int i = 0; i < n; i++) {
|
246
232
|
sketch.update(static_cast<float>(i));
|
247
233
|
values[i] = static_cast<float>(i);
|
248
234
|
}
|
249
|
-
{ // inclusive=false
|
250
|
-
const auto ranks(sketch.get_CDF(values, n));
|
251
|
-
const auto pmf(sketch.get_PMF(values, n));
|
235
|
+
{ // inclusive=false
|
236
|
+
const auto ranks(sketch.get_CDF(values, n, false));
|
237
|
+
const auto pmf(sketch.get_PMF(values, n, false));
|
252
238
|
|
253
239
|
double subtotal_pmf = 0;
|
254
240
|
for (int i = 0; i < n; i++) {
|
255
|
-
if (sketch.get_rank(values[i]) != ranks[i]) {
|
256
|
-
|
257
|
-
REQUIRE(sketch.get_rank(values[i]) == ranks[i]);
|
241
|
+
if (sketch.get_rank(values[i], false) != ranks[i]) {
|
242
|
+
FAIL("checking rank vs CDF for value " + std::to_string(i));
|
258
243
|
}
|
259
244
|
subtotal_pmf += pmf[i];
|
260
245
|
if (abs(ranks[i] - subtotal_pmf) > NUMERIC_NOISE_TOLERANCE) {
|
261
|
-
|
262
|
-
REQUIRE(ranks[i] == Approx(subtotal_pmf).margin(NUMERIC_NOISE_TOLERANCE));
|
246
|
+
FAIL("CDF vs PMF for value " + std::to_string(i));
|
263
247
|
}
|
264
248
|
}
|
265
249
|
}
|
266
|
-
{ // inclusive=true
|
267
|
-
const auto ranks(sketch.get_CDF
|
268
|
-
const auto pmf(sketch.get_PMF
|
250
|
+
{ // inclusive=true (default)
|
251
|
+
const auto ranks(sketch.get_CDF(values, n));
|
252
|
+
const auto pmf(sketch.get_PMF(values, n));
|
269
253
|
|
270
254
|
double subtotal_pmf = 0;
|
271
255
|
for (int i = 0; i < n; i++) {
|
272
|
-
if (sketch.get_rank
|
273
|
-
|
274
|
-
REQUIRE(sketch.get_rank(values[i]) == ranks[i]);
|
256
|
+
if (sketch.get_rank(values[i]) != ranks[i]) {
|
257
|
+
FAIL("checking rank vs CDF for value " + std::to_string(i));
|
275
258
|
}
|
276
259
|
subtotal_pmf += pmf[i];
|
277
260
|
if (abs(ranks[i] - subtotal_pmf) > NUMERIC_NOISE_TOLERANCE) {
|
278
|
-
|
279
|
-
REQUIRE(ranks[i] == Approx(subtotal_pmf).margin(NUMERIC_NOISE_TOLERANCE));
|
261
|
+
FAIL("CDF vs PMF for value " + std::to_string(i));
|
280
262
|
}
|
281
263
|
}
|
282
264
|
}
|
@@ -286,151 +268,151 @@ TEST_CASE("kll sketch", "[kll_sketch]") {
|
|
286
268
|
std::ifstream is;
|
287
269
|
is.exceptions(std::ios::failbit | std::ios::badbit);
|
288
270
|
is.open(testBinaryInputPath + "kll_sketch_from_java.sk", std::ios::binary);
|
289
|
-
auto sketch = kll_float_sketch::deserialize(is,
|
271
|
+
auto sketch = kll_float_sketch::deserialize(is, serde<float>(), std::less<float>(), 0);
|
290
272
|
REQUIRE_FALSE(sketch.is_empty());
|
291
273
|
REQUIRE(sketch.is_estimation_mode());
|
292
274
|
REQUIRE(sketch.get_n() == 1000000);
|
293
275
|
REQUIRE(sketch.get_num_retained() == 614);
|
294
|
-
REQUIRE(sketch.
|
295
|
-
REQUIRE(sketch.
|
276
|
+
REQUIRE(sketch.get_min_item() == 0.0);
|
277
|
+
REQUIRE(sketch.get_max_item() == 999999.0);
|
296
278
|
}
|
297
279
|
|
298
280
|
SECTION("stream serialize deserialize empty") {
|
299
|
-
kll_float_sketch sketch(200, 0);
|
281
|
+
kll_float_sketch sketch(200, std::less<float>(), 0);
|
300
282
|
std::stringstream s(std::ios::in | std::ios::out | std::ios::binary);
|
301
283
|
sketch.serialize(s);
|
302
284
|
REQUIRE(static_cast<size_t>(s.tellp()) == sketch.get_serialized_size_bytes());
|
303
|
-
auto sketch2 = kll_float_sketch::deserialize(s,
|
285
|
+
auto sketch2 = kll_float_sketch::deserialize(s, serde<float>(), std::less<float>(), 0);
|
304
286
|
REQUIRE(static_cast<size_t>(s.tellp()) == sketch2.get_serialized_size_bytes());
|
305
287
|
REQUIRE(s.tellg() == s.tellp());
|
306
288
|
REQUIRE(sketch2.is_empty() == sketch.is_empty());
|
307
289
|
REQUIRE(sketch2.is_estimation_mode() == sketch.is_estimation_mode());
|
308
290
|
REQUIRE(sketch2.get_n() == sketch.get_n());
|
309
291
|
REQUIRE(sketch2.get_num_retained() == sketch.get_num_retained());
|
310
|
-
|
311
|
-
|
292
|
+
REQUIRE_THROWS_AS(sketch2.get_min_item(), std::runtime_error);
|
293
|
+
REQUIRE_THROWS_AS(sketch2.get_max_item(), std::runtime_error);
|
312
294
|
REQUIRE(sketch2.get_normalized_rank_error(false) == sketch.get_normalized_rank_error(false));
|
313
295
|
REQUIRE(sketch2.get_normalized_rank_error(true) == sketch.get_normalized_rank_error(true));
|
314
296
|
}
|
315
297
|
|
316
298
|
SECTION("bytes serialize deserialize empty") {
|
317
|
-
kll_float_sketch sketch(200, 0);
|
299
|
+
kll_float_sketch sketch(200, std::less<float>(), 0);
|
318
300
|
auto bytes = sketch.serialize();
|
319
|
-
auto sketch2 = kll_float_sketch::deserialize(bytes.data(), bytes.size(), serde<float>(), 0);
|
301
|
+
auto sketch2 = kll_float_sketch::deserialize(bytes.data(), bytes.size(), serde<float>(), std::less<float>(), 0);
|
320
302
|
REQUIRE(bytes.size() == sketch.get_serialized_size_bytes());
|
321
303
|
REQUIRE(sketch2.is_empty() == sketch.is_empty());
|
322
304
|
REQUIRE(sketch2.is_estimation_mode() == sketch.is_estimation_mode());
|
323
305
|
REQUIRE(sketch2.get_n() == sketch.get_n());
|
324
306
|
REQUIRE(sketch2.get_num_retained() == sketch.get_num_retained());
|
325
|
-
|
326
|
-
|
307
|
+
REQUIRE_THROWS_AS(sketch2.get_min_item(), std::runtime_error);
|
308
|
+
REQUIRE_THROWS_AS(sketch2.get_max_item(), std::runtime_error);
|
327
309
|
REQUIRE(sketch2.get_normalized_rank_error(false) == sketch.get_normalized_rank_error(false));
|
328
310
|
REQUIRE(sketch2.get_normalized_rank_error(true) == sketch.get_normalized_rank_error(true));
|
329
311
|
}
|
330
312
|
|
331
313
|
SECTION("stream serialize deserialize one item") {
|
332
|
-
kll_float_sketch sketch(200, 0);
|
314
|
+
kll_float_sketch sketch(200, std::less<float>(), 0);
|
333
315
|
sketch.update(1.0f);
|
334
316
|
std::stringstream s(std::ios::in | std::ios::out | std::ios::binary);
|
335
317
|
sketch.serialize(s);
|
336
318
|
REQUIRE(static_cast<size_t>(s.tellp()) == sketch.get_serialized_size_bytes());
|
337
|
-
auto sketch2 = kll_float_sketch::deserialize(s, serde<float>(), 0);
|
319
|
+
auto sketch2 = kll_float_sketch::deserialize(s, serde<float>(), std::less<float>(), 0);
|
338
320
|
REQUIRE(static_cast<size_t>(s.tellp()) == sketch2.get_serialized_size_bytes());
|
339
321
|
REQUIRE(s.tellg() == s.tellp());
|
340
322
|
REQUIRE_FALSE(sketch2.is_empty());
|
341
323
|
REQUIRE_FALSE(sketch2.is_estimation_mode());
|
342
324
|
REQUIRE(sketch2.get_n() == 1);
|
343
325
|
REQUIRE(sketch2.get_num_retained() == 1);
|
344
|
-
REQUIRE(sketch2.
|
345
|
-
REQUIRE(sketch2.
|
326
|
+
REQUIRE(sketch2.get_min_item() == 1.0);
|
327
|
+
REQUIRE(sketch2.get_max_item() == 1.0);
|
346
328
|
REQUIRE(sketch2.get_quantile(0.5) == 1.0);
|
347
|
-
REQUIRE(sketch2.get_rank(1) == 0.0);
|
348
|
-
REQUIRE(sketch2.get_rank(2) == 1.0);
|
329
|
+
REQUIRE(sketch2.get_rank(1, false) == 0.0);
|
330
|
+
REQUIRE(sketch2.get_rank(2, false) == 1.0);
|
349
331
|
}
|
350
332
|
|
351
333
|
SECTION("bytes serialize deserialize one item") {
|
352
|
-
kll_float_sketch sketch(200, 0);
|
334
|
+
kll_float_sketch sketch(200, std::less<float>(), 0);
|
353
335
|
sketch.update(1.0f);
|
354
336
|
auto bytes = sketch.serialize();
|
355
337
|
REQUIRE(bytes.size() == sketch.get_serialized_size_bytes());
|
356
|
-
auto sketch2 = kll_float_sketch::deserialize(bytes.data(), bytes.size(), serde<float>(), 0);
|
338
|
+
auto sketch2 = kll_float_sketch::deserialize(bytes.data(), bytes.size(), serde<float>(), std::less<float>(), 0);
|
357
339
|
REQUIRE(bytes.size() == sketch2.get_serialized_size_bytes());
|
358
340
|
REQUIRE_FALSE(sketch2.is_empty());
|
359
341
|
REQUIRE_FALSE(sketch2.is_estimation_mode());
|
360
342
|
REQUIRE(sketch2.get_n() == 1);
|
361
343
|
REQUIRE(sketch2.get_num_retained() == 1);
|
362
|
-
REQUIRE(sketch2.
|
363
|
-
REQUIRE(sketch2.
|
344
|
+
REQUIRE(sketch2.get_min_item() == 1.0);
|
345
|
+
REQUIRE(sketch2.get_max_item() == 1.0);
|
364
346
|
REQUIRE(sketch2.get_quantile(0.5) == 1.0);
|
365
|
-
REQUIRE(sketch2.get_rank(1) == 0.0);
|
366
|
-
REQUIRE(sketch2.get_rank(2) == 1.0);
|
347
|
+
REQUIRE(sketch2.get_rank(1, false) == 0.0);
|
348
|
+
REQUIRE(sketch2.get_rank(2, false) == 1.0);
|
367
349
|
}
|
368
350
|
|
369
351
|
SECTION("deserialize one item v1") {
|
370
352
|
std::ifstream is;
|
371
353
|
is.exceptions(std::ios::failbit | std::ios::badbit);
|
372
354
|
is.open(testBinaryInputPath + "kll_sketch_float_one_item_v1.sk", std::ios::binary);
|
373
|
-
auto sketch = kll_float_sketch::deserialize(is, serde<float>(), 0);
|
355
|
+
auto sketch = kll_float_sketch::deserialize(is, serde<float>(), std::less<float>(), 0);
|
374
356
|
REQUIRE_FALSE(sketch.is_empty());
|
375
357
|
REQUIRE_FALSE(sketch.is_estimation_mode());
|
376
358
|
REQUIRE(sketch.get_n() == 1);
|
377
359
|
REQUIRE(sketch.get_num_retained() == 1);
|
378
|
-
REQUIRE(sketch.
|
379
|
-
REQUIRE(sketch.
|
360
|
+
REQUIRE(sketch.get_min_item() == 1.0);
|
361
|
+
REQUIRE(sketch.get_max_item() == 1.0);
|
380
362
|
}
|
381
363
|
|
382
364
|
SECTION("stream serialize deserialize three items") {
|
383
|
-
kll_float_sketch sketch(200, 0);
|
365
|
+
kll_float_sketch sketch(200, std::less<float>(), 0);
|
384
366
|
sketch.update(1.0f);
|
385
367
|
sketch.update(2.0f);
|
386
368
|
sketch.update(3.0f);
|
387
369
|
std::stringstream s(std::ios::in | std::ios::out | std::ios::binary);
|
388
370
|
sketch.serialize(s);
|
389
371
|
REQUIRE(static_cast<size_t>(s.tellp()) == sketch.get_serialized_size_bytes());
|
390
|
-
auto sketch2 = kll_float_sketch::deserialize(s, serde<float>(), 0);
|
372
|
+
auto sketch2 = kll_float_sketch::deserialize(s, serde<float>(), std::less<float>(), 0);
|
391
373
|
REQUIRE(static_cast<size_t>(s.tellp()) == sketch2.get_serialized_size_bytes());
|
392
374
|
REQUIRE(s.tellg() == s.tellp());
|
393
375
|
REQUIRE_FALSE(sketch2.is_empty());
|
394
376
|
REQUIRE_FALSE(sketch2.is_estimation_mode());
|
395
377
|
REQUIRE(sketch2.get_n() == 3);
|
396
378
|
REQUIRE(sketch2.get_num_retained() == 3);
|
397
|
-
REQUIRE(sketch2.
|
398
|
-
REQUIRE(sketch2.
|
379
|
+
REQUIRE(sketch2.get_min_item() == 1.0);
|
380
|
+
REQUIRE(sketch2.get_max_item() == 3.0);
|
399
381
|
}
|
400
382
|
|
401
383
|
SECTION("bytes serialize deserialize three items") {
|
402
|
-
kll_float_sketch sketch(200, 0);
|
384
|
+
kll_float_sketch sketch(200, std::less<float>(), 0);
|
403
385
|
sketch.update(1.0f);
|
404
386
|
sketch.update(2.0f);
|
405
387
|
sketch.update(3.0f);
|
406
388
|
auto bytes = sketch.serialize();
|
407
389
|
REQUIRE(bytes.size() == sketch.get_serialized_size_bytes());
|
408
|
-
auto sketch2 = kll_float_sketch::deserialize(bytes.data(), bytes.size(), serde<float>(), 0);
|
390
|
+
auto sketch2 = kll_float_sketch::deserialize(bytes.data(), bytes.size(), serde<float>(), std::less<float>(), 0);
|
409
391
|
REQUIRE(bytes.size() == sketch2.get_serialized_size_bytes());
|
410
392
|
REQUIRE_FALSE(sketch2.is_empty());
|
411
393
|
REQUIRE_FALSE(sketch2.is_estimation_mode());
|
412
394
|
REQUIRE(sketch2.get_n() == 3);
|
413
395
|
REQUIRE(sketch2.get_num_retained() == 3);
|
414
|
-
REQUIRE(sketch2.
|
415
|
-
REQUIRE(sketch2.
|
396
|
+
REQUIRE(sketch2.get_min_item() == 1.0);
|
397
|
+
REQUIRE(sketch2.get_max_item() == 3.0);
|
416
398
|
}
|
417
399
|
|
418
400
|
SECTION("stream serialize deserialize many floats") {
|
419
|
-
kll_float_sketch sketch(200, 0);
|
401
|
+
kll_float_sketch sketch(200, std::less<float>(), 0);
|
420
402
|
const int n = 1000;
|
421
403
|
for (int i = 0; i < n; i++) sketch.update(static_cast<float>(i));
|
422
404
|
std::stringstream s(std::ios::in | std::ios::out | std::ios::binary);
|
423
405
|
sketch.serialize(s);
|
424
406
|
REQUIRE(static_cast<size_t>(s.tellp()) == sketch.get_serialized_size_bytes());
|
425
|
-
auto sketch2 = kll_float_sketch::deserialize(s, serde<float>(), 0);
|
407
|
+
auto sketch2 = kll_float_sketch::deserialize(s, serde<float>(), std::less<float>(), 0);
|
426
408
|
REQUIRE(static_cast<size_t>(s.tellp()) == sketch2.get_serialized_size_bytes());
|
427
409
|
REQUIRE(s.tellg() == s.tellp());
|
428
410
|
REQUIRE(sketch2.is_empty() == sketch.is_empty());
|
429
411
|
REQUIRE(sketch2.is_estimation_mode() == sketch.is_estimation_mode());
|
430
412
|
REQUIRE(sketch2.get_n() == sketch.get_n());
|
431
413
|
REQUIRE(sketch2.get_num_retained() == sketch.get_num_retained());
|
432
|
-
REQUIRE(sketch2.
|
433
|
-
REQUIRE(sketch2.
|
414
|
+
REQUIRE(sketch2.get_min_item() == sketch.get_min_item());
|
415
|
+
REQUIRE(sketch2.get_max_item() == sketch.get_max_item());
|
434
416
|
REQUIRE(sketch2.get_normalized_rank_error(false) == sketch.get_normalized_rank_error(false));
|
435
417
|
REQUIRE(sketch2.get_normalized_rank_error(true) == sketch.get_normalized_rank_error(true));
|
436
418
|
REQUIRE(sketch2.get_quantile(0.5) == sketch.get_quantile(0.5));
|
@@ -439,27 +421,27 @@ TEST_CASE("kll sketch", "[kll_sketch]") {
|
|
439
421
|
}
|
440
422
|
|
441
423
|
SECTION("bytes serialize deserialize many floats") {
|
442
|
-
kll_float_sketch sketch(200, 0);
|
424
|
+
kll_float_sketch sketch(200, std::less<float>(), 0);
|
443
425
|
const int n = 1000;
|
444
426
|
for (int i = 0; i < n; i++) sketch.update(static_cast<float>(i));
|
445
427
|
auto bytes = sketch.serialize();
|
446
428
|
REQUIRE(bytes.size() == sketch.get_serialized_size_bytes());
|
447
|
-
auto sketch2 = kll_float_sketch::deserialize(bytes.data(), bytes.size(), serde<float>(), 0);
|
429
|
+
auto sketch2 = kll_float_sketch::deserialize(bytes.data(), bytes.size(), serde<float>(), std::less<float>(), 0);
|
448
430
|
REQUIRE(bytes.size() == sketch2.get_serialized_size_bytes());
|
449
431
|
REQUIRE(sketch2.is_empty() == sketch.is_empty());
|
450
432
|
REQUIRE(sketch2.is_estimation_mode() == sketch.is_estimation_mode());
|
451
433
|
REQUIRE(sketch2.get_n() == sketch.get_n());
|
452
434
|
REQUIRE(sketch2.get_num_retained() == sketch.get_num_retained());
|
453
|
-
REQUIRE(sketch2.
|
454
|
-
REQUIRE(sketch2.
|
435
|
+
REQUIRE(sketch2.get_min_item() == sketch.get_min_item());
|
436
|
+
REQUIRE(sketch2.get_max_item() == sketch.get_max_item());
|
455
437
|
REQUIRE(sketch2.get_normalized_rank_error(false) == sketch.get_normalized_rank_error(false));
|
456
438
|
REQUIRE(sketch2.get_normalized_rank_error(true) == sketch.get_normalized_rank_error(true));
|
457
439
|
REQUIRE(sketch2.get_quantile(0.5) == sketch.get_quantile(0.5));
|
458
440
|
REQUIRE(sketch2.get_rank(0) == sketch.get_rank(0));
|
459
441
|
REQUIRE(sketch2.get_rank(static_cast<float>(n)) == sketch.get_rank(static_cast<float>(n)));
|
460
|
-
REQUIRE_THROWS_AS(
|
461
|
-
REQUIRE_THROWS_AS(
|
462
|
-
REQUIRE_THROWS_AS(
|
442
|
+
REQUIRE_THROWS_AS(kll_float_sketch::deserialize(bytes.data(), 7, serde<float>(), std::less<float>(), 0), std::out_of_range);
|
443
|
+
REQUIRE_THROWS_AS(kll_float_sketch::deserialize(bytes.data(), 15, serde<float>(), std::less<float>(), 0), std::out_of_range);
|
444
|
+
REQUIRE_THROWS_AS(kll_float_sketch::deserialize(bytes.data(), bytes.size() - 1, serde<float>(), std::less<float>(), 0), std::out_of_range);
|
463
445
|
}
|
464
446
|
|
465
447
|
SECTION("bytes serialize deserialize many ints") {
|
@@ -474,8 +456,8 @@ TEST_CASE("kll sketch", "[kll_sketch]") {
|
|
474
456
|
REQUIRE(sketch2.is_estimation_mode() == sketch.is_estimation_mode());
|
475
457
|
REQUIRE(sketch2.get_n() == sketch.get_n());
|
476
458
|
REQUIRE(sketch2.get_num_retained() == sketch.get_num_retained());
|
477
|
-
REQUIRE(sketch2.
|
478
|
-
REQUIRE(sketch2.
|
459
|
+
REQUIRE(sketch2.get_min_item() == sketch.get_min_item());
|
460
|
+
REQUIRE(sketch2.get_max_item() == sketch.get_max_item());
|
479
461
|
REQUIRE(sketch2.get_normalized_rank_error(false) == sketch.get_normalized_rank_error(false));
|
480
462
|
REQUIRE(sketch2.get_normalized_rank_error(true) == sketch.get_normalized_rank_error(true));
|
481
463
|
REQUIRE(sketch2.get_quantile(0.5) == sketch.get_quantile(0.5));
|
@@ -499,7 +481,7 @@ TEST_CASE("kll sketch", "[kll_sketch]") {
|
|
499
481
|
}
|
500
482
|
|
501
483
|
SECTION("out of order split points, float") {
|
502
|
-
kll_float_sketch sketch(200, 0);
|
484
|
+
kll_float_sketch sketch(200, std::less<float>(), 0);
|
503
485
|
sketch.update(0); // has too be non-empty to reach the check
|
504
486
|
float split_points[2] = {1, 0};
|
505
487
|
REQUIRE_THROWS_AS(sketch.get_CDF(split_points, 2), std::invalid_argument);
|
@@ -513,48 +495,48 @@ TEST_CASE("kll sketch", "[kll_sketch]") {
|
|
513
495
|
}
|
514
496
|
|
515
497
|
SECTION("NaN split point") {
|
516
|
-
kll_float_sketch sketch(200, 0);
|
498
|
+
kll_float_sketch sketch(200, std::less<float>(), 0);
|
517
499
|
sketch.update(0); // has too be non-empty to reach the check
|
518
500
|
float split_points[1] = {std::numeric_limits<float>::quiet_NaN()};
|
519
501
|
REQUIRE_THROWS_AS(sketch.get_CDF(split_points, 1), std::invalid_argument);
|
520
502
|
}
|
521
503
|
|
522
504
|
SECTION("merge") {
|
523
|
-
kll_float_sketch sketch1(200, 0);
|
524
|
-
kll_float_sketch sketch2(200, 0);
|
505
|
+
kll_float_sketch sketch1(200, std::less<float>(), 0);
|
506
|
+
kll_float_sketch sketch2(200, std::less<float>(), 0);
|
525
507
|
const int n = 10000;
|
526
508
|
for (int i = 0; i < n; i++) {
|
527
509
|
sketch1.update(static_cast<float>(i));
|
528
510
|
sketch2.update(static_cast<float>((2 * n) - i - 1));
|
529
511
|
}
|
530
512
|
|
531
|
-
REQUIRE(sketch1.
|
532
|
-
REQUIRE(sketch1.
|
533
|
-
REQUIRE(sketch2.
|
534
|
-
REQUIRE(sketch2.
|
513
|
+
REQUIRE(sketch1.get_min_item() == 0.0f);
|
514
|
+
REQUIRE(sketch1.get_max_item() == n - 1);
|
515
|
+
REQUIRE(sketch2.get_min_item() == n);
|
516
|
+
REQUIRE(sketch2.get_max_item() == 2.0f * n - 1);
|
535
517
|
|
536
518
|
sketch1.merge(sketch2);
|
537
519
|
|
538
520
|
REQUIRE_FALSE(sketch1.is_empty());
|
539
521
|
REQUIRE(sketch1.get_n() == 2 * n);
|
540
|
-
REQUIRE(sketch1.
|
541
|
-
REQUIRE(sketch1.
|
522
|
+
REQUIRE(sketch1.get_min_item() == 0.0f);
|
523
|
+
REQUIRE(sketch1.get_max_item() == 2.0f * n - 1);
|
542
524
|
REQUIRE(sketch1.get_quantile(0.5) == Approx(n).margin(n * RANK_EPS_FOR_K_200));
|
543
525
|
}
|
544
526
|
|
545
527
|
SECTION("merge lower k") {
|
546
|
-
kll_float_sketch sketch1(256, 0);
|
547
|
-
kll_float_sketch sketch2(128, 0);
|
528
|
+
kll_float_sketch sketch1(256, std::less<float>(), 0);
|
529
|
+
kll_float_sketch sketch2(128, std::less<float>(), 0);
|
548
530
|
const int n = 10000;
|
549
531
|
for (int i = 0; i < n; i++) {
|
550
532
|
sketch1.update(static_cast<float>(i));
|
551
533
|
sketch2.update(static_cast<float>((2 * n) - i - 1));
|
552
534
|
}
|
553
535
|
|
554
|
-
REQUIRE(sketch1.
|
555
|
-
REQUIRE(sketch1.
|
556
|
-
REQUIRE(sketch2.
|
557
|
-
REQUIRE(sketch2.
|
536
|
+
REQUIRE(sketch1.get_min_item() == 0.0f);
|
537
|
+
REQUIRE(sketch1.get_max_item() == n - 1);
|
538
|
+
REQUIRE(sketch2.get_min_item() == n);
|
539
|
+
REQUIRE(sketch2.get_max_item() == 2.0f * n - 1);
|
558
540
|
|
559
541
|
REQUIRE(sketch1.get_k() == 256);
|
560
542
|
REQUIRE(sketch2.get_k() == 128);
|
@@ -570,14 +552,14 @@ TEST_CASE("kll sketch", "[kll_sketch]") {
|
|
570
552
|
|
571
553
|
REQUIRE_FALSE(sketch1.is_empty());
|
572
554
|
REQUIRE(sketch1.get_n() == 2 * n);
|
573
|
-
REQUIRE(sketch1.
|
574
|
-
REQUIRE(sketch1.
|
555
|
+
REQUIRE(sketch1.get_min_item() == 0.0f);
|
556
|
+
REQUIRE(sketch1.get_max_item() == 2.0f * n - 1);
|
575
557
|
REQUIRE(sketch1.get_quantile(0.5) == Approx(n).margin(n * RANK_EPS_FOR_K_200));
|
576
558
|
}
|
577
559
|
|
578
560
|
SECTION("merge exact mode, lower k") {
|
579
|
-
kll_float_sketch sketch1(256, 0);
|
580
|
-
kll_float_sketch sketch2(128, 0);
|
561
|
+
kll_float_sketch sketch1(256, std::less<float>(), 0);
|
562
|
+
kll_float_sketch sketch2(128, std::less<float>(), 0);
|
581
563
|
const int n = 10000;
|
582
564
|
for (int i = 0; i < n; i++) {
|
583
565
|
sketch1.update(static_cast<float>(i));
|
@@ -590,8 +572,8 @@ TEST_CASE("kll sketch", "[kll_sketch]") {
|
|
590
572
|
|
591
573
|
REQUIRE_FALSE(sketch1.is_empty());
|
592
574
|
REQUIRE(sketch1.get_n() == n);
|
593
|
-
REQUIRE(sketch1.
|
594
|
-
REQUIRE(sketch1.
|
575
|
+
REQUIRE(sketch1.get_min_item() == 0.0f);
|
576
|
+
REQUIRE(sketch1.get_max_item() == n - 1);
|
595
577
|
REQUIRE(sketch1.get_quantile(0.5) == Approx(n / 2).margin(n / 2 * RANK_EPS_FOR_K_200));
|
596
578
|
|
597
579
|
sketch2.update(0);
|
@@ -601,29 +583,29 @@ TEST_CASE("kll sketch", "[kll_sketch]") {
|
|
601
583
|
}
|
602
584
|
|
603
585
|
SECTION("merge min value from other") {
|
604
|
-
kll_float_sketch sketch1(200, 0);
|
605
|
-
kll_float_sketch sketch2(200, 0);
|
586
|
+
kll_float_sketch sketch1(200, std::less<float>(), 0);
|
587
|
+
kll_float_sketch sketch2(200, std::less<float>(), 0);
|
606
588
|
sketch1.update(1.0f);
|
607
589
|
sketch2.update(2.0f);
|
608
590
|
sketch2.merge(sketch1);
|
609
|
-
REQUIRE(sketch2.
|
610
|
-
REQUIRE(sketch2.
|
591
|
+
REQUIRE(sketch2.get_min_item() == 1.0f);
|
592
|
+
REQUIRE(sketch2.get_max_item() == 2.0f);
|
611
593
|
}
|
612
594
|
|
613
595
|
SECTION("merge min and max values from other") {
|
614
|
-
kll_float_sketch sketch1(200, 0);
|
596
|
+
kll_float_sketch sketch1(200, std::less<float>(), 0);
|
615
597
|
for (int i = 0; i < 1000000; i++) sketch1.update(static_cast<float>(i));
|
616
|
-
kll_float_sketch sketch2(200, 0);
|
598
|
+
kll_float_sketch sketch2(200, std::less<float>(), 0);
|
617
599
|
sketch2.merge(sketch1);
|
618
|
-
REQUIRE(sketch2.
|
619
|
-
REQUIRE(sketch2.
|
600
|
+
REQUIRE(sketch2.get_min_item() == 0.0f);
|
601
|
+
REQUIRE(sketch2.get_max_item() == 999999.0f);
|
620
602
|
}
|
621
603
|
|
622
604
|
SECTION("sketch of ints") {
|
623
605
|
kll_sketch<int> sketch;
|
624
606
|
REQUIRE_THROWS_AS(sketch.get_quantile(0), std::runtime_error);
|
625
|
-
REQUIRE_THROWS_AS(sketch.
|
626
|
-
REQUIRE_THROWS_AS(sketch.
|
607
|
+
REQUIRE_THROWS_AS(sketch.get_min_item(), std::runtime_error);
|
608
|
+
REQUIRE_THROWS_AS(sketch.get_max_item(), std::runtime_error);
|
627
609
|
|
628
610
|
const int n = 1000;
|
629
611
|
for (int i = 0; i < n; i++) sketch.update(i);
|
@@ -638,8 +620,8 @@ TEST_CASE("kll sketch", "[kll_sketch]") {
|
|
638
620
|
REQUIRE(sketch2.is_estimation_mode() == sketch.is_estimation_mode());
|
639
621
|
REQUIRE(sketch2.get_n() == sketch.get_n());
|
640
622
|
REQUIRE(sketch2.get_num_retained() == sketch.get_num_retained());
|
641
|
-
REQUIRE(sketch2.
|
642
|
-
REQUIRE(sketch2.
|
623
|
+
REQUIRE(sketch2.get_min_item() == sketch.get_min_item());
|
624
|
+
REQUIRE(sketch2.get_max_item() == sketch.get_max_item());
|
643
625
|
REQUIRE(sketch2.get_normalized_rank_error(false) == sketch.get_normalized_rank_error(false));
|
644
626
|
REQUIRE(sketch2.get_normalized_rank_error(true) == sketch.get_normalized_rank_error(true));
|
645
627
|
REQUIRE(sketch2.get_quantile(0.5) == sketch.get_quantile(0.5));
|
@@ -648,30 +630,30 @@ TEST_CASE("kll sketch", "[kll_sketch]") {
|
|
648
630
|
}
|
649
631
|
|
650
632
|
SECTION("sketch of strings stream") {
|
651
|
-
kll_string_sketch sketch1(200, 0);
|
633
|
+
kll_string_sketch sketch1(200, std::less<std::string>(), 0);
|
652
634
|
REQUIRE_THROWS_AS(sketch1.get_quantile(0), std::runtime_error);
|
653
|
-
REQUIRE_THROWS_AS(sketch1.
|
654
|
-
REQUIRE_THROWS_AS(sketch1.
|
635
|
+
REQUIRE_THROWS_AS(sketch1.get_min_item(), std::runtime_error);
|
636
|
+
REQUIRE_THROWS_AS(sketch1.get_max_item(), std::runtime_error);
|
655
637
|
REQUIRE(sketch1.get_serialized_size_bytes() == 8);
|
656
638
|
|
657
639
|
const int n = 1000;
|
658
640
|
for (int i = 0; i < n; i++) sketch1.update(std::to_string(i));
|
659
641
|
|
660
|
-
REQUIRE(sketch1.
|
661
|
-
REQUIRE(sketch1.
|
642
|
+
REQUIRE(sketch1.get_min_item() == std::string("0"));
|
643
|
+
REQUIRE(sketch1.get_max_item() == std::string("999"));
|
662
644
|
|
663
645
|
std::stringstream s(std::ios::in | std::ios::out | std::ios::binary);
|
664
646
|
sketch1.serialize(s);
|
665
647
|
REQUIRE(static_cast<size_t>(s.tellp()) == sketch1.get_serialized_size_bytes());
|
666
|
-
auto sketch2 = kll_string_sketch::deserialize(s,
|
648
|
+
auto sketch2 = kll_string_sketch::deserialize(s, serde<std::string>(), std::less<std::string>(), 0);
|
667
649
|
REQUIRE(static_cast<size_t>(s.tellp()) == sketch2.get_serialized_size_bytes());
|
668
650
|
REQUIRE(s.tellg() == s.tellp());
|
669
651
|
REQUIRE(sketch2.is_empty() == sketch1.is_empty());
|
670
652
|
REQUIRE(sketch2.is_estimation_mode() == sketch1.is_estimation_mode());
|
671
653
|
REQUIRE(sketch2.get_n() == sketch1.get_n());
|
672
654
|
REQUIRE(sketch2.get_num_retained() == sketch1.get_num_retained());
|
673
|
-
REQUIRE(sketch2.
|
674
|
-
REQUIRE(sketch2.
|
655
|
+
REQUIRE(sketch2.get_min_item() == sketch1.get_min_item());
|
656
|
+
REQUIRE(sketch2.get_max_item() == sketch1.get_max_item());
|
675
657
|
REQUIRE(sketch2.get_normalized_rank_error(false) == sketch1.get_normalized_rank_error(false));
|
676
658
|
REQUIRE(sketch2.get_normalized_rank_error(true) == sketch1.get_normalized_rank_error(true));
|
677
659
|
REQUIRE(sketch2.get_quantile(0.5) == sketch1.get_quantile(0.5));
|
@@ -687,28 +669,29 @@ TEST_CASE("kll sketch", "[kll_sketch]") {
|
|
687
669
|
}
|
688
670
|
|
689
671
|
SECTION("sketch of strings bytes") {
|
690
|
-
kll_string_sketch sketch1(200, 0);
|
672
|
+
kll_string_sketch sketch1(200, std::less<std::string>(), 0);
|
691
673
|
REQUIRE_THROWS_AS(sketch1.get_quantile(0), std::runtime_error);
|
692
|
-
REQUIRE_THROWS_AS(sketch1.
|
693
|
-
REQUIRE_THROWS_AS(sketch1.
|
674
|
+
REQUIRE_THROWS_AS(sketch1.get_min_item(), std::runtime_error);
|
675
|
+
REQUIRE_THROWS_AS(sketch1.get_max_item(), std::runtime_error);
|
694
676
|
REQUIRE(sketch1.get_serialized_size_bytes() == 8);
|
695
677
|
|
696
678
|
const int n = 1000;
|
697
679
|
for (int i = 0; i < n; i++) sketch1.update(std::to_string(i));
|
698
680
|
|
699
|
-
REQUIRE(sketch1.
|
700
|
-
REQUIRE(sketch1.
|
681
|
+
REQUIRE(sketch1.get_min_item() == std::string("0"));
|
682
|
+
REQUIRE(sketch1.get_max_item() == std::string("999"));
|
701
683
|
|
702
684
|
auto bytes = sketch1.serialize();
|
703
685
|
REQUIRE(bytes.size() == sketch1.get_serialized_size_bytes());
|
704
|
-
auto sketch2 = kll_string_sketch::deserialize(bytes.data(), bytes.size(), serde<std::string>(),
|
686
|
+
auto sketch2 = kll_string_sketch::deserialize(bytes.data(), bytes.size(), serde<std::string>(),
|
687
|
+
std::less<std::string>(), 0);
|
705
688
|
REQUIRE(bytes.size() == sketch2.get_serialized_size_bytes());
|
706
689
|
REQUIRE(sketch2.is_empty() == sketch1.is_empty());
|
707
690
|
REQUIRE(sketch2.is_estimation_mode() == sketch1.is_estimation_mode());
|
708
691
|
REQUIRE(sketch2.get_n() == sketch1.get_n());
|
709
692
|
REQUIRE(sketch2.get_num_retained() == sketch1.get_num_retained());
|
710
|
-
REQUIRE(sketch2.
|
711
|
-
REQUIRE(sketch2.
|
693
|
+
REQUIRE(sketch2.get_min_item() == sketch1.get_min_item());
|
694
|
+
REQUIRE(sketch2.get_max_item() == sketch1.get_max_item());
|
712
695
|
REQUIRE(sketch2.get_normalized_rank_error(false) == sketch1.get_normalized_rank_error(false));
|
713
696
|
REQUIRE(sketch2.get_normalized_rank_error(true) == sketch1.get_normalized_rank_error(true));
|
714
697
|
REQUIRE(sketch2.get_quantile(0.5) == sketch1.get_quantile(0.5));
|
@@ -718,11 +701,12 @@ TEST_CASE("kll sketch", "[kll_sketch]") {
|
|
718
701
|
|
719
702
|
|
720
703
|
SECTION("sketch of strings, single item, bytes") {
|
721
|
-
kll_string_sketch sketch1(200, 0);
|
704
|
+
kll_string_sketch sketch1(200, std::less<std::string>(), 0);
|
722
705
|
sketch1.update("a");
|
723
706
|
auto bytes = sketch1.serialize();
|
724
707
|
REQUIRE(bytes.size() == sketch1.get_serialized_size_bytes());
|
725
|
-
auto sketch2 = kll_string_sketch::deserialize(bytes.data(), bytes.size(), serde<std::string>(),
|
708
|
+
auto sketch2 = kll_string_sketch::deserialize(bytes.data(), bytes.size(), serde<std::string>(),
|
709
|
+
std::less<std::string>(), 0);
|
726
710
|
REQUIRE(bytes.size() == sketch2.get_serialized_size_bytes());
|
727
711
|
}
|
728
712
|
|
@@ -753,14 +737,14 @@ TEST_CASE("kll sketch", "[kll_sketch]") {
|
|
753
737
|
// move constructor
|
754
738
|
kll_sketch<int> sketch2(std::move(sketch1));
|
755
739
|
for (int i = 0; i < n; i++) {
|
756
|
-
REQUIRE(sketch2.get_rank(i) == (double) i / n);
|
740
|
+
REQUIRE(sketch2.get_rank(i, false) == (double) i / n);
|
757
741
|
}
|
758
742
|
|
759
743
|
// move assignment
|
760
744
|
kll_sketch<int> sketch3;
|
761
745
|
sketch3 = std::move(sketch2);
|
762
746
|
for (int i = 0; i < n; i++) {
|
763
|
-
REQUIRE(sketch3.get_rank(i) == (double) i / n);
|
747
|
+
REQUIRE(sketch3.get_rank(i, false) == (double) i / n);
|
764
748
|
}
|
765
749
|
}
|
766
750
|
|
@@ -795,44 +779,24 @@ TEST_CASE("kll sketch", "[kll_sketch]") {
|
|
795
779
|
kll.update(3);
|
796
780
|
kll.update(1);
|
797
781
|
|
798
|
-
{
|
799
|
-
auto view = kll.get_sorted_view(
|
782
|
+
{
|
783
|
+
auto view = kll.get_sorted_view();
|
800
784
|
REQUIRE(view.size() == 3);
|
801
785
|
auto it = view.begin();
|
802
|
-
REQUIRE(it->first == 1);
|
786
|
+
REQUIRE(it->first == 1); // operator->
|
787
|
+
REQUIRE((*it).first == 1); // operator*
|
803
788
|
REQUIRE(it->second == 1);
|
789
|
+
REQUIRE(it.get_weight() == 1);
|
804
790
|
++it;
|
805
791
|
REQUIRE(it->first == 2);
|
806
|
-
REQUIRE(it->second == 1);
|
807
|
-
++it;
|
808
|
-
REQUIRE(it->first == 3);
|
809
|
-
REQUIRE(it->second == 1);
|
810
|
-
}
|
811
|
-
{ // cumulative, non-inclusive, using operator->
|
812
|
-
auto view = kll.get_sorted_view(true);
|
813
|
-
REQUIRE(view.size() == 3);
|
814
|
-
auto it = view.begin();
|
815
|
-
REQUIRE(it->first == 1);
|
816
|
-
REQUIRE(it->second == 0);
|
817
|
-
++it;
|
818
|
-
REQUIRE(it->first == 2);
|
819
|
-
REQUIRE(it->second == 1);
|
820
|
-
++it;
|
821
|
-
REQUIRE(it->first == 3);
|
822
792
|
REQUIRE(it->second == 2);
|
823
|
-
|
824
|
-
{ // cumulative, inclusive, using operator*
|
825
|
-
auto view = kll.get_sorted_view<true>(true);
|
826
|
-
REQUIRE(view.size() == 3);
|
827
|
-
auto it = view.begin();
|
828
|
-
REQUIRE((*it).first == 1);
|
829
|
-
REQUIRE((*it).second == 1);
|
793
|
+
REQUIRE(it.get_weight() == 1);
|
830
794
|
++it;
|
831
|
-
REQUIRE(
|
832
|
-
REQUIRE(
|
795
|
+
REQUIRE(it->first == 3);
|
796
|
+
REQUIRE(it->second == 3);
|
797
|
+
REQUIRE(it.get_weight() == 1);
|
833
798
|
++it;
|
834
|
-
REQUIRE(
|
835
|
-
REQUIRE((*it).second == 3);
|
799
|
+
REQUIRE(it == view.end());
|
836
800
|
}
|
837
801
|
}
|
838
802
|
|
@@ -854,8 +818,8 @@ TEST_CASE("kll sketch", "[kll_sketch]") {
|
|
854
818
|
REQUIRE(kll_float.get_n() == kll_double.get_n());
|
855
819
|
REQUIRE(kll_float.get_num_retained() == kll_double.get_num_retained());
|
856
820
|
|
857
|
-
auto sv_float = kll_float.get_sorted_view(
|
858
|
-
auto sv_double = kll_double.get_sorted_view(
|
821
|
+
auto sv_float = kll_float.get_sorted_view();
|
822
|
+
auto sv_double = kll_double.get_sorted_view();
|
859
823
|
auto sv_float_it = sv_float.begin();
|
860
824
|
auto sv_double_it = sv_double.begin();
|
861
825
|
while (sv_float_it != sv_float.end()) {
|