datasketches 0.2.7 → 0.3.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +9 -0
- data/ext/datasketches/kll_wrapper.cpp +20 -20
- data/ext/datasketches/theta_wrapper.cpp +2 -2
- data/lib/datasketches/version.rb +1 -1
- data/vendor/datasketches-cpp/CMakeLists.txt +9 -1
- data/vendor/datasketches-cpp/MANIFEST.in +21 -2
- data/vendor/datasketches-cpp/NOTICE +1 -1
- data/vendor/datasketches-cpp/common/CMakeLists.txt +5 -2
- data/vendor/datasketches-cpp/common/include/common_defs.hpp +10 -0
- data/vendor/datasketches-cpp/common/include/kolmogorov_smirnov_impl.hpp +6 -6
- data/vendor/datasketches-cpp/common/include/memory_operations.hpp +1 -0
- data/vendor/datasketches-cpp/common/include/{quantile_sketch_sorted_view.hpp → quantiles_sorted_view.hpp} +60 -25
- data/vendor/datasketches-cpp/common/include/quantiles_sorted_view_impl.hpp +125 -0
- data/vendor/datasketches-cpp/common/include/version.hpp.in +36 -0
- data/vendor/datasketches-cpp/common/test/CMakeLists.txt +25 -6
- data/vendor/datasketches-cpp/common/test/quantiles_sorted_view_test.cpp +459 -0
- data/vendor/datasketches-cpp/cpc/include/cpc_compressor_impl.hpp +1 -1
- data/vendor/datasketches-cpp/cpc/test/CMakeLists.txt +1 -1
- data/vendor/datasketches-cpp/fi/include/frequent_items_sketch.hpp +28 -44
- data/vendor/datasketches-cpp/fi/include/frequent_items_sketch_impl.hpp +70 -78
- data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map.hpp +11 -4
- data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map_impl.hpp +16 -9
- data/vendor/datasketches-cpp/fi/test/CMakeLists.txt +1 -1
- data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_custom_type_test.cpp +54 -41
- data/vendor/datasketches-cpp/fi/test/reverse_purge_hash_map_test.cpp +3 -3
- data/vendor/datasketches-cpp/hll/include/Hll4Array-internal.hpp +2 -2
- data/vendor/datasketches-cpp/hll/test/CMakeLists.txt +1 -1
- data/vendor/datasketches-cpp/kll/include/kll_helper.hpp +0 -32
- data/vendor/datasketches-cpp/kll/include/kll_sketch.hpp +176 -233
- data/vendor/datasketches-cpp/kll/include/kll_sketch_impl.hpp +337 -395
- data/vendor/datasketches-cpp/kll/test/CMakeLists.txt +1 -1
- data/vendor/datasketches-cpp/kll/test/kll_sketch_custom_type_test.cpp +26 -26
- data/vendor/datasketches-cpp/kll/test/kll_sketch_test.cpp +196 -232
- data/vendor/datasketches-cpp/kll/test/kll_sketch_validation.cpp +41 -31
- data/vendor/datasketches-cpp/pyproject.toml +17 -12
- data/vendor/datasketches-cpp/python/CMakeLists.txt +8 -1
- data/vendor/datasketches-cpp/python/datasketches/PySerDe.py +104 -0
- data/vendor/datasketches-cpp/python/datasketches/__init__.py +22 -0
- data/vendor/datasketches-cpp/python/include/py_serde.hpp +113 -0
- data/vendor/datasketches-cpp/python/jupyter/ThetaSketchNotebook.ipynb +31 -24
- data/vendor/datasketches-cpp/python/pybind11Path.cmd +18 -0
- data/vendor/datasketches-cpp/python/src/__init__.py +17 -1
- data/vendor/datasketches-cpp/python/src/datasketches.cpp +9 -3
- data/vendor/datasketches-cpp/python/src/kll_wrapper.cpp +18 -54
- data/vendor/datasketches-cpp/python/src/py_serde.cpp +111 -0
- data/vendor/datasketches-cpp/python/src/quantiles_wrapper.cpp +17 -53
- data/vendor/datasketches-cpp/python/src/req_wrapper.cpp +17 -55
- data/vendor/datasketches-cpp/python/src/vector_of_kll.cpp +63 -68
- data/vendor/datasketches-cpp/python/src/vo_wrapper.cpp +47 -14
- data/vendor/datasketches-cpp/python/tests/__init__.py +16 -0
- data/vendor/datasketches-cpp/python/tests/req_test.py +1 -1
- data/vendor/datasketches-cpp/python/tests/vo_test.py +25 -1
- data/vendor/datasketches-cpp/quantiles/include/quantiles_sketch.hpp +135 -180
- data/vendor/datasketches-cpp/quantiles/include/quantiles_sketch_impl.hpp +205 -210
- data/vendor/datasketches-cpp/quantiles/test/CMakeLists.txt +1 -1
- data/vendor/datasketches-cpp/quantiles/test/quantiles_compatibility_test.cpp +19 -18
- data/vendor/datasketches-cpp/quantiles/test/quantiles_sketch_test.cpp +240 -232
- data/vendor/datasketches-cpp/req/include/req_compactor.hpp +15 -9
- data/vendor/datasketches-cpp/req/include/req_compactor_impl.hpp +35 -19
- data/vendor/datasketches-cpp/req/include/req_sketch.hpp +126 -147
- data/vendor/datasketches-cpp/req/include/req_sketch_impl.hpp +265 -245
- data/vendor/datasketches-cpp/req/test/CMakeLists.txt +1 -1
- data/vendor/datasketches-cpp/req/test/req_sketch_custom_type_test.cpp +26 -26
- data/vendor/datasketches-cpp/req/test/req_sketch_test.cpp +116 -103
- data/vendor/datasketches-cpp/sampling/include/var_opt_sketch.hpp +22 -46
- data/vendor/datasketches-cpp/sampling/include/var_opt_sketch_impl.hpp +180 -207
- data/vendor/datasketches-cpp/sampling/include/var_opt_union.hpp +18 -39
- data/vendor/datasketches-cpp/sampling/include/var_opt_union_impl.hpp +75 -85
- data/vendor/datasketches-cpp/sampling/test/CMakeLists.txt +1 -1
- data/vendor/datasketches-cpp/sampling/test/var_opt_allocation_test.cpp +6 -6
- data/vendor/datasketches-cpp/sampling/test/var_opt_sketch_test.cpp +2 -2
- data/vendor/datasketches-cpp/sampling/test/var_opt_union_test.cpp +4 -4
- data/vendor/datasketches-cpp/setup.py +14 -2
- data/vendor/datasketches-cpp/theta/include/theta_sketch_impl.hpp +15 -25
- data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base.hpp +0 -9
- data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base_impl.hpp +5 -5
- data/vendor/datasketches-cpp/theta/test/CMakeLists.txt +1 -1
- data/vendor/datasketches-cpp/theta/test/theta_sketch_test.cpp +2 -1
- data/vendor/datasketches-cpp/tox.ini +26 -0
- data/vendor/datasketches-cpp/tuple/include/tuple_sketch.hpp +36 -12
- data/vendor/datasketches-cpp/tuple/include/tuple_sketch_impl.hpp +16 -4
- data/vendor/datasketches-cpp/tuple/test/CMakeLists.txt +2 -1
- data/vendor/datasketches-cpp/tuple/test/engagement_test.cpp +299 -0
- data/vendor/datasketches-cpp/tuple/test/tuple_sketch_test.cpp +26 -0
- data/vendor/datasketches-cpp/version.cfg.in +1 -0
- metadata +15 -6
- data/vendor/datasketches-cpp/common/include/quantile_sketch_sorted_view_impl.hpp +0 -91
@@ -0,0 +1,459 @@
|
|
1
|
+
/*
|
2
|
+
* Licensed to the Apache Software Foundation (ASF) under one
|
3
|
+
* or more contributor license agreements. See the NOTICE file
|
4
|
+
* distributed with this work for additional information
|
5
|
+
* regarding copyright ownership. The ASF licenses this file
|
6
|
+
* to you under the Apache License, Version 2.0 (the
|
7
|
+
* "License"); you may not use this file except in compliance
|
8
|
+
* with the License. You may obtain a copy of the License at
|
9
|
+
*
|
10
|
+
* http://www.apache.org/licenses/LICENSE-2.0
|
11
|
+
*
|
12
|
+
* Unless required by applicable law or agreed to in writing,
|
13
|
+
* software distributed under the License is distributed on an
|
14
|
+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
15
|
+
* KIND, either express or implied. See the License for the
|
16
|
+
* specific language governing permissions and limitations
|
17
|
+
* under the License.
|
18
|
+
*/
|
19
|
+
|
20
|
+
#include <catch2/catch.hpp>
|
21
|
+
|
22
|
+
#include <vector>
|
23
|
+
#include <utility>
|
24
|
+
|
25
|
+
#include <iostream>
|
26
|
+
|
27
|
+
#include "quantiles_sorted_view.hpp"
|
28
|
+
|
29
|
+
namespace datasketches {
|
30
|
+
|
31
|
+
TEST_CASE("empty", "sorted view") {
|
32
|
+
auto view = quantiles_sorted_view<float, std::less<float>, std::allocator<float>>(1, std::less<float>(), std::allocator<float>());
|
33
|
+
REQUIRE_THROWS_AS(view.get_rank(0), std::runtime_error);
|
34
|
+
REQUIRE_THROWS_AS(view.get_quantile(0), std::runtime_error);
|
35
|
+
const float split_points[1] {0};
|
36
|
+
REQUIRE_THROWS_AS(view.get_CDF(split_points, 1), std::runtime_error);
|
37
|
+
REQUIRE_THROWS_AS(view.get_PMF(split_points, 1), std::runtime_error);
|
38
|
+
}
|
39
|
+
|
40
|
+
TEST_CASE("set 0", "sorted view") {
|
41
|
+
auto view = quantiles_sorted_view<float, std::less<float>, std::allocator<float>>(1, std::less<float>(), std::allocator<float>());
|
42
|
+
std::vector<float> l0 {10};
|
43
|
+
view.add(l0.begin(), l0.end(), 1);
|
44
|
+
view.convert_to_cummulative();
|
45
|
+
REQUIRE(view.size() == 1);
|
46
|
+
|
47
|
+
auto it = view.begin();
|
48
|
+
// using operator->
|
49
|
+
REQUIRE(it->first == 10);
|
50
|
+
REQUIRE(it->second == 1);
|
51
|
+
// using operator*
|
52
|
+
REQUIRE((*it).first == 10);
|
53
|
+
REQUIRE((*it).second == 1);
|
54
|
+
REQUIRE(it.get_weight() == 1);
|
55
|
+
REQUIRE(it.get_cumulative_weight() == 1);
|
56
|
+
REQUIRE(it.get_cumulative_weight(false) == 0);
|
57
|
+
++it;
|
58
|
+
REQUIRE(it == view.end());
|
59
|
+
|
60
|
+
REQUIRE(view.get_rank(5, true) == 0);
|
61
|
+
REQUIRE(view.get_rank(10, true) == 1);
|
62
|
+
REQUIRE(view.get_rank(15, true) == 1);
|
63
|
+
|
64
|
+
REQUIRE(view.get_rank(5, false) == 0);
|
65
|
+
REQUIRE(view.get_rank(10, false) == 0);
|
66
|
+
REQUIRE(view.get_rank(15, false) == 1);
|
67
|
+
|
68
|
+
REQUIRE(view.get_quantile(0, true) == 10);
|
69
|
+
REQUIRE(view.get_quantile(0.5, true) == 10);
|
70
|
+
REQUIRE(view.get_quantile(1, true) == 10);
|
71
|
+
|
72
|
+
REQUIRE(view.get_quantile(0, false) == 10);
|
73
|
+
REQUIRE(view.get_quantile(0.5, false) == 10);
|
74
|
+
REQUIRE(view.get_quantile(1, false) == 10);
|
75
|
+
}
|
76
|
+
|
77
|
+
TEST_CASE("set 1", "sorted view") {
|
78
|
+
auto view = quantiles_sorted_view<float, std::less<float>, std::allocator<float>>(1, std::less<float>(), std::allocator<float>());
|
79
|
+
std::vector<float> l0 {10, 10};
|
80
|
+
view.add(l0.begin(), l0.end(), 1);
|
81
|
+
view.convert_to_cummulative();
|
82
|
+
REQUIRE(view.size() == 2);
|
83
|
+
|
84
|
+
auto it = view.begin();
|
85
|
+
REQUIRE(it->first == 10);
|
86
|
+
REQUIRE(it->second == 1);
|
87
|
+
REQUIRE(it.get_weight() == 1);
|
88
|
+
REQUIRE(it.get_cumulative_weight() == 1);
|
89
|
+
REQUIRE(it.get_cumulative_weight(false) == 0);
|
90
|
+
++it;
|
91
|
+
REQUIRE(it->first == 10);
|
92
|
+
REQUIRE(it->second == 2);
|
93
|
+
REQUIRE(it.get_weight() == 1);
|
94
|
+
REQUIRE(it.get_cumulative_weight() == 2);
|
95
|
+
REQUIRE(it.get_cumulative_weight(false) == 1);
|
96
|
+
++it;
|
97
|
+
REQUIRE(it == view.end());
|
98
|
+
|
99
|
+
REQUIRE(view.get_rank(5, true) == 0);
|
100
|
+
REQUIRE(view.get_rank(10, true) == 1);
|
101
|
+
REQUIRE(view.get_rank(15, true) == 1);
|
102
|
+
|
103
|
+
REQUIRE(view.get_rank(5, false) == 0);
|
104
|
+
REQUIRE(view.get_rank(10, false) == 0);
|
105
|
+
REQUIRE(view.get_rank(15, false) == 1);
|
106
|
+
|
107
|
+
REQUIRE(view.get_quantile(0, true) == 10);
|
108
|
+
REQUIRE(view.get_quantile(0.25, true) == 10);
|
109
|
+
REQUIRE(view.get_quantile(0.5, true) == 10);
|
110
|
+
REQUIRE(view.get_quantile(0.75, true) == 10);
|
111
|
+
REQUIRE(view.get_quantile(1, true) == 10);
|
112
|
+
|
113
|
+
REQUIRE(view.get_quantile(0, false) == 10);
|
114
|
+
REQUIRE(view.get_quantile(0.25, false) == 10);
|
115
|
+
REQUIRE(view.get_quantile(0.5, false) == 10);
|
116
|
+
REQUIRE(view.get_quantile(0.75, false) == 10);
|
117
|
+
REQUIRE(view.get_quantile(1, false) == 10);
|
118
|
+
}
|
119
|
+
|
120
|
+
TEST_CASE("set 2", "sorted view") {
|
121
|
+
auto view = quantiles_sorted_view<float, std::less<float>, std::allocator<float>>(1, std::less<float>(), std::allocator<float>());
|
122
|
+
std::vector<float> l1 {10, 20, 30, 40};
|
123
|
+
view.add(l1.begin(), l1.end(), 2);
|
124
|
+
view.convert_to_cummulative();
|
125
|
+
REQUIRE(view.size() == 4);
|
126
|
+
|
127
|
+
auto it = view.begin();
|
128
|
+
REQUIRE(it->first == 10);
|
129
|
+
REQUIRE(it->second == 2);
|
130
|
+
REQUIRE(it.get_weight() == 2);
|
131
|
+
REQUIRE(it.get_cumulative_weight() == 2);
|
132
|
+
REQUIRE(it.get_cumulative_weight(false) == 0);
|
133
|
+
++it;
|
134
|
+
REQUIRE(it->first == 20);
|
135
|
+
REQUIRE(it->second == 4);
|
136
|
+
REQUIRE(it.get_weight() == 2);
|
137
|
+
REQUIRE(it.get_cumulative_weight() == 4);
|
138
|
+
REQUIRE(it.get_cumulative_weight(false) == 2);
|
139
|
+
++it;
|
140
|
+
REQUIRE(it->first == 30);
|
141
|
+
REQUIRE(it->second == 6);
|
142
|
+
REQUIRE(it.get_weight() == 2);
|
143
|
+
REQUIRE(it.get_cumulative_weight() == 6);
|
144
|
+
REQUIRE(it.get_cumulative_weight(false) == 4);
|
145
|
+
++it;
|
146
|
+
REQUIRE(it->first == 40);
|
147
|
+
REQUIRE(it->second == 8);
|
148
|
+
REQUIRE(it.get_weight() == 2);
|
149
|
+
REQUIRE(it.get_cumulative_weight() == 8);
|
150
|
+
REQUIRE(it.get_cumulative_weight(false) == 6);
|
151
|
+
++it;
|
152
|
+
REQUIRE(it == view.end());
|
153
|
+
|
154
|
+
REQUIRE(view.get_rank(5, true) == 0);
|
155
|
+
REQUIRE(view.get_rank(10, true) == 0.25);
|
156
|
+
REQUIRE(view.get_rank(15, true) == 0.25);
|
157
|
+
REQUIRE(view.get_rank(20, true) == 0.5);
|
158
|
+
REQUIRE(view.get_rank(25, true) == 0.5);
|
159
|
+
REQUIRE(view.get_rank(30, true) == 0.75);
|
160
|
+
REQUIRE(view.get_rank(35, true) == 0.75);
|
161
|
+
REQUIRE(view.get_rank(40, true) == 1);
|
162
|
+
REQUIRE(view.get_rank(45, true) == 1);
|
163
|
+
|
164
|
+
REQUIRE(view.get_rank(5, false) == 0);
|
165
|
+
REQUIRE(view.get_rank(10, false) == 0);
|
166
|
+
REQUIRE(view.get_rank(15, false) == 0.25);
|
167
|
+
REQUIRE(view.get_rank(20, false) == 0.25);
|
168
|
+
REQUIRE(view.get_rank(25, false) == 0.5);
|
169
|
+
REQUIRE(view.get_rank(30, false) == 0.5);
|
170
|
+
REQUIRE(view.get_rank(35, false) == 0.75);
|
171
|
+
REQUIRE(view.get_rank(40, false) == 0.75);
|
172
|
+
REQUIRE(view.get_rank(45, false) == 1);
|
173
|
+
|
174
|
+
REQUIRE(view.get_quantile(0, true) == 10);
|
175
|
+
REQUIRE(view.get_quantile(0.0625, true) == 10);
|
176
|
+
REQUIRE(view.get_quantile(0.125, true) == 10);
|
177
|
+
REQUIRE(view.get_quantile(0.1875, true) == 10);
|
178
|
+
REQUIRE(view.get_quantile(0.25, true) == 10);
|
179
|
+
REQUIRE(view.get_quantile(0.3125, true) == 20);
|
180
|
+
REQUIRE(view.get_quantile(0.375, true) == 20);
|
181
|
+
REQUIRE(view.get_quantile(0.4375, true) == 20);
|
182
|
+
REQUIRE(view.get_quantile(0.5, true) == 20);
|
183
|
+
REQUIRE(view.get_quantile(0.5625, true) == 30);
|
184
|
+
REQUIRE(view.get_quantile(0.625, true) == 30);
|
185
|
+
REQUIRE(view.get_quantile(0.6875, true) == 30);
|
186
|
+
REQUIRE(view.get_quantile(0.75, true) == 30);
|
187
|
+
REQUIRE(view.get_quantile(0.8125, true) == 40);
|
188
|
+
REQUIRE(view.get_quantile(0.875, true) == 40);
|
189
|
+
REQUIRE(view.get_quantile(0.9375, true) == 40);
|
190
|
+
REQUIRE(view.get_quantile(1, true) == 40);
|
191
|
+
|
192
|
+
REQUIRE(view.get_quantile(0, false) == 10);
|
193
|
+
REQUIRE(view.get_quantile(0.0625, false) == 10);
|
194
|
+
REQUIRE(view.get_quantile(0.125, false) == 10);
|
195
|
+
REQUIRE(view.get_quantile(0.1875, false) == 10);
|
196
|
+
REQUIRE(view.get_quantile(0.25, false) == 20);
|
197
|
+
REQUIRE(view.get_quantile(0.3125, false) == 20);
|
198
|
+
REQUIRE(view.get_quantile(0.375, false) == 20);
|
199
|
+
REQUIRE(view.get_quantile(0.4375, false) == 20);
|
200
|
+
REQUIRE(view.get_quantile(0.5, false) == 30);
|
201
|
+
REQUIRE(view.get_quantile(0.5625, false) == 30);
|
202
|
+
REQUIRE(view.get_quantile(0.625, false) == 30);
|
203
|
+
REQUIRE(view.get_quantile(0.6875, false) == 30);
|
204
|
+
REQUIRE(view.get_quantile(0.75, false) == 40);
|
205
|
+
REQUIRE(view.get_quantile(0.8125, false) == 40);
|
206
|
+
REQUIRE(view.get_quantile(0.875, false) == 40);
|
207
|
+
REQUIRE(view.get_quantile(0.9375, false) == 40);
|
208
|
+
REQUIRE(view.get_quantile(1, false) == 40);
|
209
|
+
}
|
210
|
+
|
211
|
+
TEST_CASE("set 3", "sorted view") {
|
212
|
+
auto view = quantiles_sorted_view<float, std::less<float>, std::allocator<float>>(8, std::less<float>(), std::allocator<float>());
|
213
|
+
std::vector<float> l1 {10, 20, 20, 30, 30, 30, 40, 50};
|
214
|
+
view.add(l1.begin(), l1.end(), 2);
|
215
|
+
view.convert_to_cummulative();
|
216
|
+
REQUIRE(view.size() == 8);
|
217
|
+
|
218
|
+
auto it = view.begin();
|
219
|
+
REQUIRE(it->first == 10);
|
220
|
+
REQUIRE(it->second == 2);
|
221
|
+
REQUIRE(it.get_weight() == 2);
|
222
|
+
++it;
|
223
|
+
REQUIRE(it->first == 20);
|
224
|
+
REQUIRE(it->second == 4);
|
225
|
+
REQUIRE(it.get_weight() == 2);
|
226
|
+
++it;
|
227
|
+
REQUIRE(it->first == 20);
|
228
|
+
REQUIRE(it->second == 6);
|
229
|
+
REQUIRE(it.get_weight() == 2);
|
230
|
+
++it;
|
231
|
+
REQUIRE(it->first == 30);
|
232
|
+
REQUIRE(it->second == 8);
|
233
|
+
REQUIRE(it.get_weight() == 2);
|
234
|
+
++it;
|
235
|
+
REQUIRE(it->first == 30);
|
236
|
+
REQUIRE(it->second == 10);
|
237
|
+
REQUIRE(it.get_weight() == 2);
|
238
|
+
++it;
|
239
|
+
REQUIRE(it->first == 30);
|
240
|
+
REQUIRE(it->second == 12);
|
241
|
+
REQUIRE(it.get_weight() == 2);
|
242
|
+
++it;
|
243
|
+
REQUIRE(it->first == 40);
|
244
|
+
REQUIRE(it->second == 14);
|
245
|
+
REQUIRE(it.get_weight() == 2);
|
246
|
+
++it;
|
247
|
+
REQUIRE(it->first == 50);
|
248
|
+
REQUIRE(it->second == 16);
|
249
|
+
REQUIRE(it.get_weight() == 2);
|
250
|
+
|
251
|
+
REQUIRE(view.get_rank(5, true) == 0);
|
252
|
+
REQUIRE(view.get_rank(10, true) == 0.125);
|
253
|
+
REQUIRE(view.get_rank(15, true) == 0.125);
|
254
|
+
REQUIRE(view.get_rank(20, true) == 0.375);
|
255
|
+
REQUIRE(view.get_rank(25, true) == 0.375);
|
256
|
+
REQUIRE(view.get_rank(30, true) == 0.75);
|
257
|
+
REQUIRE(view.get_rank(35, true) == 0.75);
|
258
|
+
REQUIRE(view.get_rank(40, true) == 0.875);
|
259
|
+
REQUIRE(view.get_rank(45, true) == 0.875);
|
260
|
+
REQUIRE(view.get_rank(50, true) == 1);
|
261
|
+
REQUIRE(view.get_rank(55, true) == 1);
|
262
|
+
|
263
|
+
REQUIRE(view.get_rank(5, false) == 0);
|
264
|
+
REQUIRE(view.get_rank(10, false) == 0);
|
265
|
+
REQUIRE(view.get_rank(15, false) == 0.125);
|
266
|
+
REQUIRE(view.get_rank(20, false) == 0.125);
|
267
|
+
REQUIRE(view.get_rank(25, false) == 0.375);
|
268
|
+
REQUIRE(view.get_rank(30, false) == 0.375);
|
269
|
+
REQUIRE(view.get_rank(35, false) == 0.75);
|
270
|
+
REQUIRE(view.get_rank(40, false) == 0.75);
|
271
|
+
REQUIRE(view.get_rank(45, false) == 0.875);
|
272
|
+
REQUIRE(view.get_rank(50, false) == 0.875);
|
273
|
+
REQUIRE(view.get_rank(55, false) == 1);
|
274
|
+
|
275
|
+
REQUIRE(view.get_quantile(0, true) == 10);
|
276
|
+
REQUIRE(view.get_quantile(0.03125, true) == 10);
|
277
|
+
REQUIRE(view.get_quantile(0.0625, true) == 10);
|
278
|
+
REQUIRE(view.get_quantile(0.09375, true) == 10);
|
279
|
+
REQUIRE(view.get_quantile(0.125, true) == 10);
|
280
|
+
REQUIRE(view.get_quantile(0.15625, true) == 20);
|
281
|
+
REQUIRE(view.get_quantile(0.1875, true) == 20);
|
282
|
+
REQUIRE(view.get_quantile(0.21875, true) == 20);
|
283
|
+
REQUIRE(view.get_quantile(0.25, true) == 20);
|
284
|
+
REQUIRE(view.get_quantile(0.28125, true) == 20);
|
285
|
+
REQUIRE(view.get_quantile(0.3125, true) == 20);
|
286
|
+
REQUIRE(view.get_quantile(0.34375, true) == 20);
|
287
|
+
REQUIRE(view.get_quantile(0.375, true) == 20);
|
288
|
+
REQUIRE(view.get_quantile(0.40625, true) == 30);
|
289
|
+
REQUIRE(view.get_quantile(0.4375, true) == 30);
|
290
|
+
REQUIRE(view.get_quantile(0.46875, true) == 30);
|
291
|
+
REQUIRE(view.get_quantile(0.5, true) == 30);
|
292
|
+
REQUIRE(view.get_quantile(0.53125, true) == 30);
|
293
|
+
REQUIRE(view.get_quantile(0.5625, true) == 30);
|
294
|
+
REQUIRE(view.get_quantile(0.59375, true) == 30);
|
295
|
+
REQUIRE(view.get_quantile(0.625, true) == 30);
|
296
|
+
REQUIRE(view.get_quantile(0.65625, true) == 30);
|
297
|
+
REQUIRE(view.get_quantile(0.6875, true) == 30);
|
298
|
+
REQUIRE(view.get_quantile(0.71875, true) == 30);
|
299
|
+
REQUIRE(view.get_quantile(0.75, true) == 30);
|
300
|
+
REQUIRE(view.get_quantile(0.78125, true) == 40);
|
301
|
+
REQUIRE(view.get_quantile(0.8125, true) == 40);
|
302
|
+
REQUIRE(view.get_quantile(0.84375, true) == 40);
|
303
|
+
REQUIRE(view.get_quantile(0.875, true) == 40);
|
304
|
+
REQUIRE(view.get_quantile(0.90625, true) == 50);
|
305
|
+
REQUIRE(view.get_quantile(0.9375, true) == 50);
|
306
|
+
REQUIRE(view.get_quantile(0.96875, true) == 50);
|
307
|
+
REQUIRE(view.get_quantile(1, true) == 50);
|
308
|
+
|
309
|
+
REQUIRE(view.get_quantile(0, false) == 10);
|
310
|
+
REQUIRE(view.get_quantile(0.03125, false) == 10);
|
311
|
+
REQUIRE(view.get_quantile(0.0625, false) == 10);
|
312
|
+
REQUIRE(view.get_quantile(0.09375, false) == 10);
|
313
|
+
REQUIRE(view.get_quantile(0.125, false) == 20);
|
314
|
+
REQUIRE(view.get_quantile(0.15625, false) == 20);
|
315
|
+
REQUIRE(view.get_quantile(0.1875, false) == 20);
|
316
|
+
REQUIRE(view.get_quantile(0.21875, false) == 20);
|
317
|
+
REQUIRE(view.get_quantile(0.25, false) == 20);
|
318
|
+
REQUIRE(view.get_quantile(0.28125, false) == 20);
|
319
|
+
REQUIRE(view.get_quantile(0.3125, false) == 20);
|
320
|
+
REQUIRE(view.get_quantile(0.34375, false) == 20);
|
321
|
+
REQUIRE(view.get_quantile(0.375, false) == 30);
|
322
|
+
REQUIRE(view.get_quantile(0.40625, false) == 30);
|
323
|
+
REQUIRE(view.get_quantile(0.4375, false) == 30);
|
324
|
+
REQUIRE(view.get_quantile(0.46875, false) == 30);
|
325
|
+
REQUIRE(view.get_quantile(0.5, false) == 30);
|
326
|
+
REQUIRE(view.get_quantile(0.53125, false) == 30);
|
327
|
+
REQUIRE(view.get_quantile(0.5625, false) == 30);
|
328
|
+
REQUIRE(view.get_quantile(0.59375, false) == 30);
|
329
|
+
REQUIRE(view.get_quantile(0.625, false) == 30);
|
330
|
+
REQUIRE(view.get_quantile(0.65625, false) == 30);
|
331
|
+
REQUIRE(view.get_quantile(0.6875, false) == 30);
|
332
|
+
REQUIRE(view.get_quantile(0.71875, false) == 30);
|
333
|
+
REQUIRE(view.get_quantile(0.75, false) == 40);
|
334
|
+
REQUIRE(view.get_quantile(0.78125, false) == 40);
|
335
|
+
REQUIRE(view.get_quantile(0.8125, false) == 40);
|
336
|
+
REQUIRE(view.get_quantile(0.84375, false) == 40);
|
337
|
+
REQUIRE(view.get_quantile(0.875, false) == 50);
|
338
|
+
REQUIRE(view.get_quantile(0.90625, false) == 50);
|
339
|
+
REQUIRE(view.get_quantile(0.9375, false) == 50);
|
340
|
+
REQUIRE(view.get_quantile(0.96875, false) == 50);
|
341
|
+
REQUIRE(view.get_quantile(1, false) == 50);
|
342
|
+
}
|
343
|
+
|
344
|
+
TEST_CASE("set 4", "sorted view") {
|
345
|
+
auto view = quantiles_sorted_view<float, std::less<float>, std::allocator<float>>(8, std::less<float>(), std::allocator<float>());
|
346
|
+
std::vector<float> l1 {10, 20, 30, 40};
|
347
|
+
view.add(l1.begin(), l1.end(), 2);
|
348
|
+
std::vector<float> l0 {10, 20, 30, 40};
|
349
|
+
view.add(l0.begin(), l0.end(), 1);
|
350
|
+
view.convert_to_cummulative();
|
351
|
+
REQUIRE(view.size() == 8);
|
352
|
+
|
353
|
+
auto it = view.begin();
|
354
|
+
REQUIRE(it->first == 10);
|
355
|
+
REQUIRE(it->second == 2);
|
356
|
+
REQUIRE(it.get_weight() == 2);
|
357
|
+
++it;
|
358
|
+
REQUIRE(it->first == 10);
|
359
|
+
REQUIRE(it->second == 3);
|
360
|
+
REQUIRE(it.get_weight() == 1);
|
361
|
+
++it;
|
362
|
+
REQUIRE(it->first == 20);
|
363
|
+
REQUIRE(it->second == 5);
|
364
|
+
REQUIRE(it.get_weight() == 2);
|
365
|
+
++it;
|
366
|
+
REQUIRE(it->first == 20);
|
367
|
+
REQUIRE(it->second == 6);
|
368
|
+
REQUIRE(it.get_weight() == 1);
|
369
|
+
++it;
|
370
|
+
REQUIRE(it->first == 30);
|
371
|
+
REQUIRE(it->second == 8);
|
372
|
+
REQUIRE(it.get_weight() == 2);
|
373
|
+
++it;
|
374
|
+
REQUIRE(it->first == 30);
|
375
|
+
REQUIRE(it->second == 9);
|
376
|
+
REQUIRE(it.get_weight() == 1);
|
377
|
+
++it;
|
378
|
+
REQUIRE(it->first == 40);
|
379
|
+
REQUIRE(it->second == 11);
|
380
|
+
REQUIRE(it.get_weight() == 2);
|
381
|
+
++it;
|
382
|
+
REQUIRE(it->first == 40);
|
383
|
+
REQUIRE(it->second == 12);
|
384
|
+
REQUIRE(it.get_weight() == 1);
|
385
|
+
|
386
|
+
REQUIRE(view.get_rank(5, true) == 0);
|
387
|
+
REQUIRE(view.get_rank(10, true) == 0.25);
|
388
|
+
REQUIRE(view.get_rank(15, true) == 0.25);
|
389
|
+
REQUIRE(view.get_rank(20, true) == 0.5);
|
390
|
+
REQUIRE(view.get_rank(25, true) == 0.5);
|
391
|
+
REQUIRE(view.get_rank(30, true) == 0.75);
|
392
|
+
REQUIRE(view.get_rank(35, true) == 0.75);
|
393
|
+
REQUIRE(view.get_rank(40, true) == 1);
|
394
|
+
REQUIRE(view.get_rank(45, true) == 1);
|
395
|
+
|
396
|
+
REQUIRE(view.get_rank(5, false) == 0);
|
397
|
+
REQUIRE(view.get_rank(10, false) == 0);
|
398
|
+
REQUIRE(view.get_rank(15, false) == 0.25);
|
399
|
+
REQUIRE(view.get_rank(20, false) == 0.25);
|
400
|
+
REQUIRE(view.get_rank(25, false) == 0.5);
|
401
|
+
REQUIRE(view.get_rank(30, false) == 0.5);
|
402
|
+
REQUIRE(view.get_rank(35, false) == 0.75);
|
403
|
+
REQUIRE(view.get_rank(40, false) == 0.75);
|
404
|
+
REQUIRE(view.get_rank(45, false) == 1);
|
405
|
+
|
406
|
+
REQUIRE(view.get_quantile(0, true) == 10);
|
407
|
+
REQUIRE(view.get_quantile(0.0417, true) == 10);
|
408
|
+
REQUIRE(view.get_quantile(0.0833, true) == 10);
|
409
|
+
REQUIRE(view.get_quantile(0.125, true) == 10);
|
410
|
+
REQUIRE(view.get_quantile(0.1667, true) == 10);
|
411
|
+
REQUIRE(view.get_quantile(0.2083, true) == 10);
|
412
|
+
REQUIRE(view.get_quantile(0.25, true) == 10);
|
413
|
+
REQUIRE(view.get_quantile(0.2917, true) == 20);
|
414
|
+
REQUIRE(view.get_quantile(0.3333, true) == 20);
|
415
|
+
REQUIRE(view.get_quantile(0.375, true) == 20);
|
416
|
+
REQUIRE(view.get_quantile(0.4167, true) == 20);
|
417
|
+
REQUIRE(view.get_quantile(0.4583, true) == 20);
|
418
|
+
REQUIRE(view.get_quantile(0.5, true) == 20);
|
419
|
+
REQUIRE(view.get_quantile(0.5417, true) == 30);
|
420
|
+
REQUIRE(view.get_quantile(0.5833, true) == 30);
|
421
|
+
REQUIRE(view.get_quantile(0.625, true) == 30);
|
422
|
+
REQUIRE(view.get_quantile(0.6667, true) == 30);
|
423
|
+
REQUIRE(view.get_quantile(0.7083, true) == 30);
|
424
|
+
REQUIRE(view.get_quantile(0.75, true) == 30);
|
425
|
+
REQUIRE(view.get_quantile(0.7917, true) == 40);
|
426
|
+
REQUIRE(view.get_quantile(0.8333, true) == 40);
|
427
|
+
REQUIRE(view.get_quantile(0.875, true) == 40);
|
428
|
+
REQUIRE(view.get_quantile(0.9167, true) == 40);
|
429
|
+
REQUIRE(view.get_quantile(0.9583, true) == 40);
|
430
|
+
REQUIRE(view.get_quantile(1, true) == 40);
|
431
|
+
|
432
|
+
REQUIRE(view.get_quantile(0, false) == 10);
|
433
|
+
REQUIRE(view.get_quantile(0.0417, false) == 10);
|
434
|
+
REQUIRE(view.get_quantile(0.0833, false) == 10);
|
435
|
+
REQUIRE(view.get_quantile(0.125, false) == 10);
|
436
|
+
REQUIRE(view.get_quantile(0.1667, false) == 10);
|
437
|
+
REQUIRE(view.get_quantile(0.2083, false) == 10);
|
438
|
+
REQUIRE(view.get_quantile(0.25, false) == 20);
|
439
|
+
REQUIRE(view.get_quantile(0.2917, false) == 20);
|
440
|
+
REQUIRE(view.get_quantile(0.3333, false) == 20);
|
441
|
+
REQUIRE(view.get_quantile(0.375, false) == 20);
|
442
|
+
REQUIRE(view.get_quantile(0.4167, false) == 20);
|
443
|
+
REQUIRE(view.get_quantile(0.4583, false) == 20);
|
444
|
+
REQUIRE(view.get_quantile(0.5, false) == 30);
|
445
|
+
REQUIRE(view.get_quantile(0.5417, false) == 30);
|
446
|
+
REQUIRE(view.get_quantile(0.5833, false) == 30);
|
447
|
+
REQUIRE(view.get_quantile(0.625, false) == 30);
|
448
|
+
REQUIRE(view.get_quantile(0.6667, false) == 30);
|
449
|
+
REQUIRE(view.get_quantile(0.7083, false) == 30);
|
450
|
+
REQUIRE(view.get_quantile(0.75, false) == 40);
|
451
|
+
REQUIRE(view.get_quantile(0.7917, false) == 40);
|
452
|
+
REQUIRE(view.get_quantile(0.8333, false) == 40);
|
453
|
+
REQUIRE(view.get_quantile(0.875, false) == 40);
|
454
|
+
REQUIRE(view.get_quantile(0.9167, false) == 40);
|
455
|
+
REQUIRE(view.get_quantile(0.9583, false) == 40);
|
456
|
+
REQUIRE(view.get_quantile(1, false) == 40);
|
457
|
+
}
|
458
|
+
|
459
|
+
} /* namespace datasketches */
|
@@ -449,7 +449,7 @@ uint8_t cpc_compressor<A>::determine_pseudo_phase(uint8_t lg_k, uint32_t c) {
|
|
449
449
|
if (lg_k < 4) throw std::logic_error("lgK < 4");
|
450
450
|
const size_t tmp = c >> (lg_k - 4);
|
451
451
|
const uint8_t phase = tmp & 15;
|
452
|
-
if (phase
|
452
|
+
if (phase >= 16) throw std::out_of_range("wrong phase");
|
453
453
|
return phase;
|
454
454
|
}
|
455
455
|
}
|
@@ -34,7 +34,7 @@ namespace datasketches {
|
|
34
34
|
|
35
35
|
/*
|
36
36
|
* Based on Java implementation here:
|
37
|
-
* https://github.com/
|
37
|
+
* https://github.com/apache/datasketches-java/blob/master/src/main/java/org/apache/datasketches/frequencies/ItemsSketch.java
|
38
38
|
* author Alexander Saydakov
|
39
39
|
*/
|
40
40
|
|
@@ -46,7 +46,6 @@ template<
|
|
46
46
|
typename W = uint64_t,
|
47
47
|
typename H = std::hash<T>,
|
48
48
|
typename E = std::equal_to<T>,
|
49
|
-
typename S = serde<T>, // deprecated, to be removed in the next major version
|
50
49
|
typename A = std::allocator<T>
|
51
50
|
>
|
52
51
|
class frequent_items_sketch {
|
@@ -60,11 +59,13 @@ public:
|
|
60
59
|
* @param lg_max_map_size Log2 of the physical size of the internal hash map managed by this
|
61
60
|
* sketch. The maximum capacity of this internal hash map is 0.75 times 2^lg_max_map_size.
|
62
61
|
* Both the ultimate accuracy and size of this sketch are functions of lg_max_map_size.
|
63
|
-
*
|
64
62
|
* @param lg_start_map_size Log2 of the starting physical size of the internal hash
|
65
63
|
* map managed by this sketch.
|
64
|
+
* @param equal instance of Equality operator
|
65
|
+
* @param allocator instance of an Allocator
|
66
66
|
*/
|
67
|
-
explicit frequent_items_sketch(uint8_t lg_max_map_size, uint8_t lg_start_map_size = LG_MIN_MAP_SIZE,
|
67
|
+
explicit frequent_items_sketch(uint8_t lg_max_map_size, uint8_t lg_start_map_size = LG_MIN_MAP_SIZE,
|
68
|
+
const E& equal = E(), const A& allocator = A());
|
68
69
|
|
69
70
|
/**
|
70
71
|
* Update this sketch with an item and a positive weight (frequency count).
|
@@ -158,7 +159,7 @@ public:
|
|
158
159
|
/**
|
159
160
|
* Returns epsilon used to compute <i>a priori</i> error.
|
160
161
|
* This is just the value <i>3.5 / maxMapSize</i>.
|
161
|
-
* @param
|
162
|
+
* @param lg_max_map_size the planned map size to be used when constructing this sketch.
|
162
163
|
* @return epsilon used to compute <i>a priori</i> error.
|
163
164
|
*/
|
164
165
|
static double get_epsilon(uint8_t lg_max_map_size);
|
@@ -167,13 +168,13 @@ public:
|
|
167
168
|
* Returns the estimated <i>a priori</i> error given the max_map_size for the sketch and the
|
168
169
|
* estimated_total_stream_weight.
|
169
170
|
* @param lg_max_map_size the planned map size to be used when constructing this sketch.
|
170
|
-
* @param
|
171
|
+
* @param estimated_total_weight the estimated total stream weight.
|
171
172
|
* @return the estimated <i>a priori</i> error.
|
172
173
|
*/
|
173
174
|
static double get_apriori_error(uint8_t lg_max_map_size, W estimated_total_weight);
|
174
175
|
|
175
176
|
class row;
|
176
|
-
|
177
|
+
using vector_row = typename std::vector<row, typename std::allocator_traits<A>::template rebind_alloc<row>>;
|
177
178
|
|
178
179
|
/**
|
179
180
|
* Returns an array of rows that include frequent items, estimates, upper and lower bounds
|
@@ -225,18 +226,18 @@ public:
|
|
225
226
|
/**
|
226
227
|
* Computes size needed to serialize the current state of the sketch.
|
227
228
|
* This can be expensive since every item needs to be looked at.
|
228
|
-
* @param instance of a SerDe
|
229
|
+
* @param sd instance of a SerDe
|
229
230
|
* @return size in bytes needed to serialize this sketch
|
230
231
|
*/
|
231
|
-
template<typename SerDe =
|
232
|
+
template<typename SerDe = serde<T>>
|
232
233
|
size_t get_serialized_size_bytes(const SerDe& sd = SerDe()) const;
|
233
234
|
|
234
235
|
/**
|
235
236
|
* This method serializes the sketch into a given stream in a binary form
|
236
237
|
* @param os output stream
|
237
|
-
* @param instance of a SerDe
|
238
|
+
* @param sd instance of a SerDe
|
238
239
|
*/
|
239
|
-
template<typename SerDe =
|
240
|
+
template<typename SerDe = serde<T>>
|
240
241
|
void serialize(std::ostream& os, const SerDe& sd = SerDe()) const;
|
241
242
|
|
242
243
|
// This is a convenience alias for users
|
@@ -249,53 +250,36 @@ public:
|
|
249
250
|
* It is a blank space of a given size.
|
250
251
|
* This header is used in Datasketches PostgreSQL extension.
|
251
252
|
* @param header_size_bytes space to reserve in front of the sketch
|
252
|
-
* @param instance of a SerDe
|
253
|
+
* @param sd instance of a SerDe
|
253
254
|
* @return serialized sketch as a vector of bytes
|
254
255
|
*/
|
255
|
-
template<typename SerDe =
|
256
|
+
template<typename SerDe = serde<T>>
|
256
257
|
vector_bytes serialize(unsigned header_size_bytes = 0, const SerDe& sd = SerDe()) const;
|
257
258
|
|
258
259
|
/**
|
259
260
|
* This method deserializes a sketch from a given stream.
|
260
261
|
* @param is input stream
|
261
|
-
* @param instance of
|
262
|
-
* @
|
263
|
-
*
|
264
|
-
* Deprecated, to be removed in the next major version
|
265
|
-
*/
|
266
|
-
static frequent_items_sketch deserialize(std::istream& is, const A& allocator = A());
|
267
|
-
|
268
|
-
/**
|
269
|
-
* This method deserializes a sketch from a given stream.
|
270
|
-
* @param is input stream
|
271
|
-
* @param instance of a SerDe
|
272
|
-
* @param instance of an Allocator
|
262
|
+
* @param sd instance of a SerDe
|
263
|
+
* @param equal instance of Equality operator
|
264
|
+
* @param allocator instance of an Allocator
|
273
265
|
* @return an instance of the sketch
|
274
266
|
*/
|
275
|
-
template<typename SerDe =
|
276
|
-
static frequent_items_sketch deserialize(std::istream& is, const SerDe& sd = SerDe(),
|
277
|
-
|
278
|
-
/**
|
279
|
-
* This method deserializes a sketch from a given array of bytes.
|
280
|
-
* @param bytes pointer to the array of bytes
|
281
|
-
* @param size the size of the array
|
282
|
-
* @param instance of an Allocator
|
283
|
-
* @return an instance of the sketch
|
284
|
-
*
|
285
|
-
* Deprecated, to be removed in the next major version
|
286
|
-
*/
|
287
|
-
static frequent_items_sketch deserialize(const void* bytes, size_t size, const A& allocator = A());
|
267
|
+
template<typename SerDe = serde<T>>
|
268
|
+
static frequent_items_sketch deserialize(std::istream& is, const SerDe& sd = SerDe(),
|
269
|
+
const E& equal = E(), const A& allocator = A());
|
288
270
|
|
289
271
|
/**
|
290
272
|
* This method deserializes a sketch from a given array of bytes.
|
291
273
|
* @param bytes pointer to the array of bytes
|
292
274
|
* @param size the size of the array
|
293
|
-
* @param instance of a SerDe
|
294
|
-
* @param instance of
|
275
|
+
* @param sd instance of a SerDe
|
276
|
+
* @param equal instance of Equality operator
|
277
|
+
* @param allocator instance of an Allocator
|
295
278
|
* @return an instance of the sketch
|
296
279
|
*/
|
297
|
-
template<typename SerDe =
|
298
|
-
static frequent_items_sketch deserialize(const void* bytes, size_t size, const SerDe& sd = SerDe(),
|
280
|
+
template<typename SerDe = serde<T>>
|
281
|
+
static frequent_items_sketch deserialize(const void* bytes, size_t size, const SerDe& sd = SerDe(),
|
282
|
+
const E& equal = E(), const A& allocator = A());
|
299
283
|
|
300
284
|
/**
|
301
285
|
* Returns a human readable summary of this sketch
|
@@ -334,8 +318,8 @@ private:
|
|
334
318
|
class items_deleter;
|
335
319
|
};
|
336
320
|
|
337
|
-
template<typename T, typename W, typename H, typename E, typename
|
338
|
-
class frequent_items_sketch<T, W, H, E,
|
321
|
+
template<typename T, typename W, typename H, typename E, typename A>
|
322
|
+
class frequent_items_sketch<T, W, H, E, A>::row {
|
339
323
|
public:
|
340
324
|
row(const T* item, W weight, W offset):
|
341
325
|
item(item), weight(weight), offset(offset) {}
|