datasketches 0.3.1 → 0.3.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +4 -0
- data/ext/datasketches/cpc_wrapper.cpp +1 -1
- data/lib/datasketches/version.rb +1 -1
- data/vendor/datasketches-cpp/CMakeLists.txt +22 -20
- data/vendor/datasketches-cpp/NOTICE +1 -1
- data/vendor/datasketches-cpp/common/include/MurmurHash3.h +25 -27
- data/vendor/datasketches-cpp/common/include/common_defs.hpp +8 -6
- data/vendor/datasketches-cpp/common/include/count_zeros.hpp +11 -0
- data/vendor/datasketches-cpp/common/include/memory_operations.hpp +5 -4
- data/vendor/datasketches-cpp/common/test/CMakeLists.txt +1 -1
- data/vendor/datasketches-cpp/common/test/integration_test.cpp +6 -0
- data/vendor/datasketches-cpp/count/CMakeLists.txt +42 -0
- data/vendor/datasketches-cpp/count/include/count_min.hpp +351 -0
- data/vendor/datasketches-cpp/count/include/count_min_impl.hpp +517 -0
- data/vendor/datasketches-cpp/count/test/CMakeLists.txt +43 -0
- data/vendor/datasketches-cpp/count/test/count_min_allocation_test.cpp +155 -0
- data/vendor/datasketches-cpp/count/test/count_min_test.cpp +306 -0
- data/vendor/datasketches-cpp/cpc/include/cpc_confidence.hpp +3 -3
- data/vendor/datasketches-cpp/cpc/include/cpc_sketch_impl.hpp +1 -1
- data/vendor/datasketches-cpp/cpc/include/cpc_util.hpp +16 -8
- data/vendor/datasketches-cpp/density/CMakeLists.txt +42 -0
- data/vendor/datasketches-cpp/density/include/density_sketch.hpp +236 -0
- data/vendor/datasketches-cpp/density/include/density_sketch_impl.hpp +543 -0
- data/vendor/datasketches-cpp/density/test/CMakeLists.txt +35 -0
- data/vendor/datasketches-cpp/density/test/density_sketch_test.cpp +244 -0
- data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map.hpp +9 -3
- data/vendor/datasketches-cpp/hll/include/Hll4Array-internal.hpp +19 -11
- data/vendor/datasketches-cpp/hll/include/Hll4Array.hpp +2 -5
- data/vendor/datasketches-cpp/hll/include/Hll6Array-internal.hpp +19 -7
- data/vendor/datasketches-cpp/hll/include/Hll6Array.hpp +1 -1
- data/vendor/datasketches-cpp/hll/include/Hll8Array-internal.hpp +98 -42
- data/vendor/datasketches-cpp/hll/include/Hll8Array.hpp +2 -0
- data/vendor/datasketches-cpp/hll/include/HllArray-internal.hpp +92 -59
- data/vendor/datasketches-cpp/hll/include/HllArray.hpp +16 -6
- data/vendor/datasketches-cpp/hll/include/HllSketchImplFactory.hpp +3 -21
- data/vendor/datasketches-cpp/hll/include/HllUnion-internal.hpp +8 -0
- data/vendor/datasketches-cpp/hll/include/HllUtil.hpp +14 -6
- data/vendor/datasketches-cpp/hll/include/coupon_iterator-internal.hpp +1 -1
- data/vendor/datasketches-cpp/hll/include/coupon_iterator.hpp +8 -2
- data/vendor/datasketches-cpp/hll/include/hll.hpp +9 -8
- data/vendor/datasketches-cpp/hll/test/HllUnionTest.cpp +7 -1
- data/vendor/datasketches-cpp/kll/include/kll_helper.hpp +0 -1
- data/vendor/datasketches-cpp/kll/include/kll_sketch.hpp +8 -3
- data/vendor/datasketches-cpp/kll/include/kll_sketch_impl.hpp +2 -2
- data/vendor/datasketches-cpp/kll/test/kll_sketch_test.cpp +2 -2
- data/vendor/datasketches-cpp/python/CMakeLists.txt +6 -0
- data/vendor/datasketches-cpp/python/README.md +5 -5
- data/vendor/datasketches-cpp/python/datasketches/DensityWrapper.py +87 -0
- data/vendor/datasketches-cpp/python/datasketches/KernelFunction.py +35 -0
- data/vendor/datasketches-cpp/python/datasketches/PySerDe.py +15 -9
- data/vendor/datasketches-cpp/python/datasketches/TuplePolicy.py +77 -0
- data/vendor/datasketches-cpp/python/datasketches/TupleWrapper.py +205 -0
- data/vendor/datasketches-cpp/python/datasketches/__init__.py +17 -1
- data/vendor/datasketches-cpp/python/include/kernel_function.hpp +98 -0
- data/vendor/datasketches-cpp/python/include/py_object_lt.hpp +37 -0
- data/vendor/datasketches-cpp/python/include/py_object_ostream.hpp +48 -0
- data/vendor/datasketches-cpp/python/include/quantile_conditional.hpp +104 -0
- data/vendor/datasketches-cpp/python/include/tuple_policy.hpp +136 -0
- data/vendor/datasketches-cpp/python/src/count_wrapper.cpp +101 -0
- data/vendor/datasketches-cpp/python/src/cpc_wrapper.cpp +16 -30
- data/vendor/datasketches-cpp/python/src/datasketches.cpp +6 -0
- data/vendor/datasketches-cpp/python/src/density_wrapper.cpp +95 -0
- data/vendor/datasketches-cpp/python/src/fi_wrapper.cpp +127 -73
- data/vendor/datasketches-cpp/python/src/hll_wrapper.cpp +28 -36
- data/vendor/datasketches-cpp/python/src/kll_wrapper.cpp +108 -160
- data/vendor/datasketches-cpp/python/src/py_serde.cpp +5 -4
- data/vendor/datasketches-cpp/python/src/quantiles_wrapper.cpp +99 -148
- data/vendor/datasketches-cpp/python/src/req_wrapper.cpp +117 -178
- data/vendor/datasketches-cpp/python/src/theta_wrapper.cpp +67 -73
- data/vendor/datasketches-cpp/python/src/tuple_wrapper.cpp +215 -0
- data/vendor/datasketches-cpp/python/src/vo_wrapper.cpp +1 -1
- data/vendor/datasketches-cpp/python/tests/count_min_test.py +86 -0
- data/vendor/datasketches-cpp/python/tests/cpc_test.py +10 -10
- data/vendor/datasketches-cpp/python/tests/density_test.py +93 -0
- data/vendor/datasketches-cpp/python/tests/fi_test.py +41 -2
- data/vendor/datasketches-cpp/python/tests/hll_test.py +19 -20
- data/vendor/datasketches-cpp/python/tests/kll_test.py +40 -6
- data/vendor/datasketches-cpp/python/tests/quantiles_test.py +39 -5
- data/vendor/datasketches-cpp/python/tests/req_test.py +38 -5
- data/vendor/datasketches-cpp/python/tests/theta_test.py +16 -14
- data/vendor/datasketches-cpp/python/tests/tuple_test.py +206 -0
- data/vendor/datasketches-cpp/python/tests/vo_test.py +7 -0
- data/vendor/datasketches-cpp/quantiles/include/quantiles_sketch.hpp +8 -3
- data/vendor/datasketches-cpp/quantiles/include/quantiles_sketch_impl.hpp +4 -4
- data/vendor/datasketches-cpp/quantiles/test/quantiles_sketch_test.cpp +1 -1
- data/vendor/datasketches-cpp/req/include/req_compactor_impl.hpp +0 -2
- data/vendor/datasketches-cpp/req/include/req_sketch.hpp +8 -3
- data/vendor/datasketches-cpp/req/include/req_sketch_impl.hpp +2 -2
- data/vendor/datasketches-cpp/sampling/include/var_opt_sketch.hpp +20 -6
- data/vendor/datasketches-cpp/sampling/include/var_opt_sketch_impl.hpp +30 -16
- data/vendor/datasketches-cpp/sampling/include/var_opt_union.hpp +5 -1
- data/vendor/datasketches-cpp/sampling/include/var_opt_union_impl.hpp +19 -15
- data/vendor/datasketches-cpp/sampling/test/var_opt_sketch_test.cpp +33 -14
- data/vendor/datasketches-cpp/sampling/test/var_opt_union_test.cpp +0 -2
- data/vendor/datasketches-cpp/setup.py +1 -1
- data/vendor/datasketches-cpp/theta/CMakeLists.txt +1 -0
- data/vendor/datasketches-cpp/theta/include/bit_packing.hpp +6279 -0
- data/vendor/datasketches-cpp/theta/include/compact_theta_sketch_parser.hpp +14 -8
- data/vendor/datasketches-cpp/theta/include/compact_theta_sketch_parser_impl.hpp +60 -46
- data/vendor/datasketches-cpp/theta/include/theta_helpers.hpp +4 -2
- data/vendor/datasketches-cpp/theta/include/theta_sketch.hpp +58 -10
- data/vendor/datasketches-cpp/theta/include/theta_sketch_impl.hpp +430 -130
- data/vendor/datasketches-cpp/theta/include/theta_union_base_impl.hpp +9 -9
- data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base.hpp +16 -4
- data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base_impl.hpp +2 -2
- data/vendor/datasketches-cpp/theta/test/CMakeLists.txt +1 -0
- data/vendor/datasketches-cpp/theta/test/bit_packing_test.cpp +80 -0
- data/vendor/datasketches-cpp/theta/test/theta_sketch_test.cpp +42 -3
- data/vendor/datasketches-cpp/theta/test/theta_union_test.cpp +25 -0
- data/vendor/datasketches-cpp/tuple/include/tuple_sketch_impl.hpp +2 -1
- data/vendor/datasketches-cpp/version.cfg.in +1 -1
- metadata +31 -3
@@ -0,0 +1,244 @@
|
|
1
|
+
/*
|
2
|
+
* Licensed to the Apache Software Foundation (ASF) under one
|
3
|
+
* or more contributor license agreements. See the NOTICE file
|
4
|
+
* distributed with this work for additional information
|
5
|
+
* regarding copyright ownership. The ASF licenses this file
|
6
|
+
* to you under the Apache License, Version 2.0 (the
|
7
|
+
* "License"); you may not use this file except in compliance
|
8
|
+
* with the License. You may obtain a copy of the License at
|
9
|
+
*
|
10
|
+
* http://www.apache.org/licenses/LICENSE-2.0
|
11
|
+
*
|
12
|
+
* Unless required by applicable law or agreed to in writing,
|
13
|
+
* software distributed under the License is distributed on an
|
14
|
+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
15
|
+
* KIND, either express or implied. See the License for the
|
16
|
+
* specific language governing permissions and limitations
|
17
|
+
* under the License.
|
18
|
+
*/
|
19
|
+
|
20
|
+
#include <cmath>
|
21
|
+
#include <catch2/catch.hpp>
|
22
|
+
|
23
|
+
#include <density_sketch.hpp>
|
24
|
+
|
25
|
+
namespace datasketches {
|
26
|
+
|
27
|
+
TEST_CASE("density sketch: empty", "[density_sketch]") {
|
28
|
+
density_sketch<float> sketch(10, 3);
|
29
|
+
REQUIRE(sketch.is_empty());
|
30
|
+
REQUIRE_THROWS_AS(sketch.get_estimate({0, 0, 0}), std::runtime_error);
|
31
|
+
}
|
32
|
+
|
33
|
+
TEST_CASE("density sketch: one item", "[density_sketch]") {
|
34
|
+
density_sketch<float> sketch(10, 3);
|
35
|
+
|
36
|
+
// dimension mismatch
|
37
|
+
REQUIRE_THROWS_AS(sketch.update(std::vector<float>({0, 0})), std::invalid_argument);
|
38
|
+
|
39
|
+
sketch.update(std::vector<float>({0, 0, 0}));
|
40
|
+
REQUIRE_FALSE(sketch.is_empty());
|
41
|
+
REQUIRE_FALSE(sketch.is_estimation_mode());
|
42
|
+
REQUIRE(sketch.get_estimate({0, 0, 0}) == 1);
|
43
|
+
REQUIRE(sketch.get_estimate({0.01, 0.01, 0.01}) > 0.95);
|
44
|
+
REQUIRE(sketch.get_estimate({1, 1, 1}) < 0.05);
|
45
|
+
}
|
46
|
+
|
47
|
+
TEST_CASE("density sketch: merge", "[density_sketch]") {
|
48
|
+
density_sketch<float> sketch1(10, 4);
|
49
|
+
sketch1.update(std::vector<float>({0, 0, 0, 0}));
|
50
|
+
sketch1.update(std::vector<float>({1, 2, 3, 4}));
|
51
|
+
|
52
|
+
density_sketch<float> sketch2(10, 4);
|
53
|
+
sketch2.update(std::vector<float>({5, 6, 7, 8}));
|
54
|
+
|
55
|
+
sketch1.merge(sketch2);
|
56
|
+
|
57
|
+
REQUIRE(sketch1.get_n() == 3);
|
58
|
+
REQUIRE(sketch1.get_num_retained() == 3);
|
59
|
+
}
|
60
|
+
|
61
|
+
TEST_CASE("density sketch: iterator", "[density_sketch]") {
|
62
|
+
density_sketch<float> sketch(10, 3);
|
63
|
+
unsigned n = 1000;
|
64
|
+
for (unsigned i = 1; i <= n; ++i) sketch.update(std::vector<float>(3, i));
|
65
|
+
REQUIRE(sketch.get_n() == n);
|
66
|
+
REQUIRE(sketch.is_estimation_mode());
|
67
|
+
//std::cout << sketch.to_string(true, true);
|
68
|
+
unsigned count = 0;
|
69
|
+
for (auto pair: sketch) {
|
70
|
+
++count;
|
71
|
+
// just to assert something about the output
|
72
|
+
REQUIRE(pair.first.size() == sketch.get_dim());
|
73
|
+
}
|
74
|
+
REQUIRE(count == sketch.get_num_retained());
|
75
|
+
}
|
76
|
+
|
77
|
+
// spherical kernel for testing, returns 1 for vectors within radius and 0 otherwise
|
78
|
+
template<typename T>
|
79
|
+
struct spherical_kernel {
|
80
|
+
spherical_kernel(T radius = 1.0) : _radius_squared(radius * radius) {}
|
81
|
+
T operator()(const std::vector<T>& v1, const std::vector<T>& v2) const {
|
82
|
+
return std::inner_product(v1.begin(), v1.end(), v2.begin(), 0.0, std::plus<T>(), [](T a, T b){return (a-b)*(a-b);}) <= _radius_squared ? 1.0 : 0.0;
|
83
|
+
}
|
84
|
+
private:
|
85
|
+
T _radius_squared;
|
86
|
+
};
|
87
|
+
|
88
|
+
TEST_CASE("custom kernel", "[density_sketch]") {
|
89
|
+
density_sketch<float, spherical_kernel<float>> sketch(10, 3, spherical_kernel<float>(0.5));
|
90
|
+
|
91
|
+
// update with (1,1,1) and test points inside and outside the kernel
|
92
|
+
sketch.update(std::vector<float>(3, 1.0));
|
93
|
+
REQUIRE(sketch.get_estimate(std::vector<float>(3, 1.001)) == 1.0);
|
94
|
+
REQUIRE(sketch.get_estimate(std::vector<float>(3, 2.0)) == 0.0);
|
95
|
+
|
96
|
+
// rest of test follows iterator test above
|
97
|
+
unsigned n = 1000;
|
98
|
+
for (unsigned i = 2; i <= n; ++i) sketch.update(std::vector<float>(3, i));
|
99
|
+
REQUIRE(sketch.get_n() == n);
|
100
|
+
REQUIRE(sketch.is_estimation_mode());
|
101
|
+
unsigned count = 0;
|
102
|
+
for (auto pair: sketch) {
|
103
|
+
++count;
|
104
|
+
// just to assert something about the output
|
105
|
+
REQUIRE(pair.first.size() == sketch.get_dim());
|
106
|
+
}
|
107
|
+
REQUIRE(count == sketch.get_num_retained());
|
108
|
+
}
|
109
|
+
|
110
|
+
TEST_CASE("serialize empty", "[density_sketch]") {
|
111
|
+
density_sketch<double> sk(10, 2);
|
112
|
+
auto bytes = sk.serialize();
|
113
|
+
auto sk2 = density_sketch<double>::deserialize(bytes.data(), bytes.size());
|
114
|
+
REQUIRE(sk2.is_empty());
|
115
|
+
REQUIRE(!sk2.is_estimation_mode());
|
116
|
+
REQUIRE(sk.get_k() == sk2.get_k());
|
117
|
+
REQUIRE(sk.get_dim() == sk2.get_dim());
|
118
|
+
REQUIRE(sk.get_n() == sk2.get_n());
|
119
|
+
REQUIRE(sk.get_num_retained() == sk2.get_num_retained());
|
120
|
+
|
121
|
+
std::stringstream s(std::ios::in | std::ios::out | std::ios::binary);
|
122
|
+
sk.serialize(s);
|
123
|
+
auto sk3 = density_sketch<double>::deserialize(s);
|
124
|
+
REQUIRE(sk3.is_empty());
|
125
|
+
REQUIRE(!sk3.is_estimation_mode());
|
126
|
+
REQUIRE(sk.get_k() == sk3.get_k());
|
127
|
+
REQUIRE(sk.get_dim() == sk3.get_dim());
|
128
|
+
REQUIRE(sk.get_n() == sk3.get_n());
|
129
|
+
REQUIRE(sk.get_num_retained() == sk3.get_num_retained());
|
130
|
+
}
|
131
|
+
|
132
|
+
TEST_CASE("serialize bytes", "[density_sketch]") {
|
133
|
+
uint16_t k = 10;
|
134
|
+
uint32_t dim = 3;
|
135
|
+
density_sketch<double> sk(k, dim);
|
136
|
+
|
137
|
+
for (uint16_t i = 0; i < k; ++i) {
|
138
|
+
double val = static_cast<double>(i);
|
139
|
+
sk.update(std::vector<double>({val, std::sqrt(val), -val}));
|
140
|
+
}
|
141
|
+
REQUIRE(!sk.is_estimation_mode());
|
142
|
+
|
143
|
+
// exact mode
|
144
|
+
auto bytes = sk.serialize();
|
145
|
+
auto sk2 = density_sketch<double>::deserialize(bytes.data(), bytes.size());
|
146
|
+
REQUIRE(!sk2.is_empty());
|
147
|
+
REQUIRE(!sk2.is_estimation_mode());
|
148
|
+
REQUIRE(sk.get_k() == sk2.get_k());
|
149
|
+
REQUIRE(sk.get_dim() == sk2.get_dim());
|
150
|
+
REQUIRE(sk.get_n() == sk2.get_n());
|
151
|
+
REQUIRE(sk.get_num_retained() == sk2.get_num_retained());
|
152
|
+
auto it1 = sk.begin();
|
153
|
+
auto it2 = sk2.begin();
|
154
|
+
while (it1 != sk.end()) {
|
155
|
+
REQUIRE(it1->first[0] == it2->first[0]);
|
156
|
+
REQUIRE(it1->second == it2->second);
|
157
|
+
++it1;
|
158
|
+
++it2;
|
159
|
+
}
|
160
|
+
|
161
|
+
// estimation mode
|
162
|
+
size_t n = 1031;
|
163
|
+
for (uint32_t i = k; i < n; ++i) {
|
164
|
+
double val = static_cast<double>(i);
|
165
|
+
sk.update(std::vector<double>({val, std::sqrt(val), -val}));
|
166
|
+
}
|
167
|
+
REQUIRE(sk.is_estimation_mode());
|
168
|
+
|
169
|
+
bytes = sk.serialize();
|
170
|
+
sk2 = density_sketch<double>::deserialize(bytes.data(), bytes.size());
|
171
|
+
REQUIRE(!sk2.is_empty());
|
172
|
+
REQUIRE(sk2.is_estimation_mode());
|
173
|
+
REQUIRE(sk.get_k() == sk2.get_k());
|
174
|
+
REQUIRE(sk.get_dim() == sk2.get_dim());
|
175
|
+
REQUIRE(sk.get_n() == sk2.get_n());
|
176
|
+
REQUIRE(sk.get_num_retained() == sk2.get_num_retained());
|
177
|
+
it1 = sk.begin();
|
178
|
+
it2 = sk2.begin();
|
179
|
+
while (it1 != sk.end()) {
|
180
|
+
REQUIRE(it1->first[0] == it2->first[0]);
|
181
|
+
REQUIRE(it1->second == it2->second);
|
182
|
+
++it1;
|
183
|
+
++it2;
|
184
|
+
}
|
185
|
+
}
|
186
|
+
|
187
|
+
TEST_CASE("serialize stream", "[density_sketch]") {
|
188
|
+
uint16_t k = 10;
|
189
|
+
uint32_t dim = 3;
|
190
|
+
density_sketch<float> sk(k, dim);
|
191
|
+
|
192
|
+
for (uint16_t i = 0; i < k; ++i) {
|
193
|
+
float val = static_cast<float>(i);
|
194
|
+
sk.update(std::vector<float>({val, std::sin(val), std::cos(val)}));
|
195
|
+
}
|
196
|
+
REQUIRE(!sk.is_estimation_mode());
|
197
|
+
|
198
|
+
// exact mode
|
199
|
+
std::stringstream s(std::ios::in | std::ios::out | std::ios::binary);
|
200
|
+
sk.serialize(s);
|
201
|
+
auto sk2 = density_sketch<float>::deserialize(s);
|
202
|
+
REQUIRE(!sk2.is_empty());
|
203
|
+
REQUIRE(!sk2.is_estimation_mode());
|
204
|
+
REQUIRE(sk.get_k() == sk2.get_k());
|
205
|
+
REQUIRE(sk.get_dim() == sk2.get_dim());
|
206
|
+
REQUIRE(sk.get_n() == sk2.get_n());
|
207
|
+
REQUIRE(sk.get_num_retained() == sk2.get_num_retained());
|
208
|
+
auto it1 = sk.begin();
|
209
|
+
auto it2 = sk2.begin();
|
210
|
+
while (it1 != sk.end()) {
|
211
|
+
REQUIRE(it1->first[0] == it2->first[0]);
|
212
|
+
REQUIRE(it1->second == it2->second);
|
213
|
+
++it1;
|
214
|
+
++it2;
|
215
|
+
}
|
216
|
+
|
217
|
+
// estimation mode
|
218
|
+
size_t n = 1031;
|
219
|
+
for (uint32_t i = k; i < n; ++i) {
|
220
|
+
float val = static_cast<float>(i);
|
221
|
+
sk.update(std::vector<float>({val, std::sqrt(val), -val}));
|
222
|
+
}
|
223
|
+
REQUIRE(sk.is_estimation_mode());
|
224
|
+
|
225
|
+
std::stringstream s2(std::ios::in | std::ios::out | std::ios::binary);
|
226
|
+
sk.serialize(s2);
|
227
|
+
sk2 = density_sketch<float>::deserialize(s2);
|
228
|
+
REQUIRE(!sk2.is_empty());
|
229
|
+
REQUIRE(sk2.is_estimation_mode());
|
230
|
+
REQUIRE(sk.get_k() == sk2.get_k());
|
231
|
+
REQUIRE(sk.get_dim() == sk2.get_dim());
|
232
|
+
REQUIRE(sk.get_n() == sk2.get_n());
|
233
|
+
REQUIRE(sk.get_num_retained() == sk2.get_num_retained());
|
234
|
+
it1 = sk.begin();
|
235
|
+
it2 = sk2.begin();
|
236
|
+
while (it1 != sk.end()) {
|
237
|
+
REQUIRE(it1->first[0] == it2->first[0]);
|
238
|
+
REQUIRE(it1->second == it2->second);
|
239
|
+
++it1;
|
240
|
+
++it2;
|
241
|
+
}
|
242
|
+
}
|
243
|
+
|
244
|
+
} /* namespace datasketches */
|
@@ -91,8 +91,14 @@ private:
|
|
91
91
|
|
92
92
|
// This iterator uses strides based on golden ratio to avoid clustering during merge
|
93
93
|
template<typename K, typename V, typename H, typename E, typename A>
|
94
|
-
class reverse_purge_hash_map<K, V, H, E, A>::iterator
|
94
|
+
class reverse_purge_hash_map<K, V, H, E, A>::iterator {
|
95
95
|
public:
|
96
|
+
using iterator_category = std::input_iterator_tag;
|
97
|
+
using value_type = std::pair<K&, V>;
|
98
|
+
using difference_type = void;
|
99
|
+
using pointer = void;
|
100
|
+
using reference = const value_type;
|
101
|
+
|
96
102
|
friend class reverse_purge_hash_map<K, V, H, E, A>;
|
97
103
|
iterator& operator++() {
|
98
104
|
++count;
|
@@ -107,8 +113,8 @@ public:
|
|
107
113
|
iterator operator++(int) { iterator tmp(*this); operator++(); return tmp; }
|
108
114
|
bool operator==(const iterator& rhs) const { return count == rhs.count; }
|
109
115
|
bool operator!=(const iterator& rhs) const { return count != rhs.count; }
|
110
|
-
|
111
|
-
return
|
116
|
+
reference operator*() const {
|
117
|
+
return value_type(map->keys_[index], map->values_[index]);
|
112
118
|
}
|
113
119
|
private:
|
114
120
|
static constexpr double GOLDEN_RATIO_RECIPROCAL = 0.6180339887498949; // = (sqrt(5) - 1) / 2
|
@@ -51,6 +51,22 @@ Hll4Array<A>::Hll4Array(const Hll4Array<A>& that) :
|
|
51
51
|
}
|
52
52
|
}
|
53
53
|
|
54
|
+
template<typename A>
|
55
|
+
Hll4Array<A>::Hll4Array(const HllArray<A>& other) :
|
56
|
+
HllArray<A>(other.getLgConfigK(), target_hll_type::HLL_4, other.isStartFullSize(), other.getAllocator()),
|
57
|
+
auxHashMap_(nullptr)
|
58
|
+
{
|
59
|
+
const int numBytes = this->hll4ArrBytes(this->lgConfigK_);
|
60
|
+
this->hllByteArr_.resize(numBytes, 0);
|
61
|
+
this->oooFlag_ = other.isOutOfOrderFlag();
|
62
|
+
|
63
|
+
for (const auto coupon : other) { // all = false, so skip empty values
|
64
|
+
internalCouponUpdate(coupon); // updates KxQ registers
|
65
|
+
}
|
66
|
+
this->hipAccum_ = other.getHipAccum();
|
67
|
+
this->rebuild_kxq_curmin_ = false;
|
68
|
+
}
|
69
|
+
|
54
70
|
template<typename A>
|
55
71
|
Hll4Array<A>::~Hll4Array() {
|
56
72
|
// hllByteArr deleted in parent
|
@@ -114,10 +130,9 @@ uint8_t Hll4Array<A>::getSlot(uint32_t slotNo) const {
|
|
114
130
|
}
|
115
131
|
|
116
132
|
template<typename A>
|
117
|
-
uint8_t Hll4Array<A>::
|
118
|
-
const uint8_t value = getSlot(index);
|
133
|
+
uint8_t Hll4Array<A>::adjustRawValue(uint32_t slot, uint8_t value) const {
|
119
134
|
if (value != hll_constants::AUX_TOKEN) return value + this->curMin_;
|
120
|
-
return auxHashMap_->mustFindValueFor(
|
135
|
+
return auxHashMap_->mustFindValueFor(slot);
|
121
136
|
}
|
122
137
|
|
123
138
|
template<typename A>
|
@@ -210,7 +225,7 @@ void Hll4Array<A>::internalHll4Update(uint32_t slotNo, uint8_t newVal) {
|
|
210
225
|
|
211
226
|
// we just increased a pair value, so it might be time to change curMin
|
212
227
|
if (actualOldValue == this->curMin_) { // 908
|
213
|
-
this->
|
228
|
+
--(this->numAtCurMin_);
|
214
229
|
while (this->numAtCurMin_ == 0) {
|
215
230
|
shiftToBiggerCurMin(); // increases curMin by 1, builds a new aux table
|
216
231
|
// shifts values in 4-bit table and recounts curMin
|
@@ -328,13 +343,6 @@ typename HllArray<A>::const_iterator Hll4Array<A>::end() const {
|
|
328
343
|
this->tgtHllType_, auxHashMap_, this->curMin_, false);
|
329
344
|
}
|
330
345
|
|
331
|
-
template<typename A>
|
332
|
-
void Hll4Array<A>::mergeHll(const HllArray<A>& src) {
|
333
|
-
for (const auto coupon: src) {
|
334
|
-
internalCouponUpdate(coupon);
|
335
|
-
}
|
336
|
-
}
|
337
|
-
|
338
346
|
}
|
339
347
|
|
340
348
|
#endif // _HLL4ARRAY_INTERNAL_HPP_
|
@@ -25,14 +25,12 @@
|
|
25
25
|
|
26
26
|
namespace datasketches {
|
27
27
|
|
28
|
-
template<typename A>
|
29
|
-
class Hll4Iterator;
|
30
|
-
|
31
28
|
template<typename A>
|
32
29
|
class Hll4Array final : public HllArray<A> {
|
33
30
|
public:
|
34
31
|
explicit Hll4Array(uint8_t lgConfigK, bool startFullSize, const A& allocator);
|
35
32
|
explicit Hll4Array(const Hll4Array<A>& that);
|
33
|
+
explicit Hll4Array(const HllArray<A>& that);
|
36
34
|
|
37
35
|
virtual ~Hll4Array();
|
38
36
|
virtual std::function<void(HllSketchImpl<A>*)> get_deleter() const;
|
@@ -41,13 +39,12 @@ class Hll4Array final : public HllArray<A> {
|
|
41
39
|
|
42
40
|
inline uint8_t getSlot(uint32_t slotNo) const;
|
43
41
|
inline void putSlot(uint32_t slotNo, uint8_t value);
|
44
|
-
inline uint8_t
|
42
|
+
inline uint8_t adjustRawValue(uint32_t index, uint8_t value) const;
|
45
43
|
|
46
44
|
virtual uint32_t getUpdatableSerializationBytes() const;
|
47
45
|
virtual uint32_t getHllByteArrBytes() const;
|
48
46
|
|
49
47
|
virtual HllSketchImpl<A>* couponUpdate(uint32_t coupon) final;
|
50
|
-
void mergeHll(const HllArray<A>& src);
|
51
48
|
|
52
49
|
virtual AuxHashMap<A>* getAuxHashMap() const;
|
53
50
|
// does *not* delete old map if overwriting
|
@@ -34,6 +34,25 @@ HllArray<A>(lgConfigK, target_hll_type::HLL_6, startFullSize, allocator)
|
|
34
34
|
this->hllByteArr_.resize(numBytes, 0);
|
35
35
|
}
|
36
36
|
|
37
|
+
template<typename A>
|
38
|
+
Hll6Array<A>::Hll6Array(const HllArray<A>& other) :
|
39
|
+
HllArray<A>(other.getLgConfigK(), target_hll_type::HLL_6, other.isStartFullSize(), other.getAllocator())
|
40
|
+
{
|
41
|
+
const int numBytes = this->hll6ArrBytes(this->lgConfigK_);
|
42
|
+
this->hllByteArr_.resize(numBytes, 0);
|
43
|
+
this->oooFlag_ = other.isOutOfOrderFlag();
|
44
|
+
uint32_t num_zeros = 1 << this->lgConfigK_;
|
45
|
+
|
46
|
+
for (const auto coupon : other) { // all = false, so skip empty values
|
47
|
+
num_zeros--;
|
48
|
+
internalCouponUpdate(coupon); // updates KxQ registers
|
49
|
+
}
|
50
|
+
|
51
|
+
this->numAtCurMin_ = num_zeros;
|
52
|
+
this->hipAccum_ = other.getHipAccum();
|
53
|
+
this->rebuild_kxq_curmin_ = false;
|
54
|
+
}
|
55
|
+
|
37
56
|
template<typename A>
|
38
57
|
std::function<void(HllSketchImpl<A>*)> Hll6Array<A>::get_deleter() const {
|
39
58
|
return [](HllSketchImpl<A>* ptr) {
|
@@ -101,13 +120,6 @@ void Hll6Array<A>::internalCouponUpdate(uint32_t coupon) {
|
|
101
120
|
}
|
102
121
|
}
|
103
122
|
|
104
|
-
template<typename A>
|
105
|
-
void Hll6Array<A>::mergeHll(const HllArray<A>& src) {
|
106
|
-
for (const auto coupon: src) {
|
107
|
-
internalCouponUpdate(coupon);
|
108
|
-
}
|
109
|
-
}
|
110
|
-
|
111
123
|
}
|
112
124
|
|
113
125
|
#endif // _HLL6ARRAY_INTERNAL_HPP_
|
@@ -31,6 +31,7 @@ template<typename A>
|
|
31
31
|
class Hll6Array final : public HllArray<A> {
|
32
32
|
public:
|
33
33
|
Hll6Array(uint8_t lgConfigK, bool startFullSize, const A& allocator);
|
34
|
+
explicit Hll6Array(const HllArray<A>& that);
|
34
35
|
|
35
36
|
virtual ~Hll6Array() = default;
|
36
37
|
virtual std::function<void(HllSketchImpl<A>*)> get_deleter() const;
|
@@ -41,7 +42,6 @@ class Hll6Array final : public HllArray<A> {
|
|
41
42
|
inline void putSlot(uint32_t slotNo, uint8_t value);
|
42
43
|
|
43
44
|
virtual HllSketchImpl<A>* couponUpdate(uint32_t coupon) final;
|
44
|
-
void mergeHll(const HllArray<A>& src);
|
45
45
|
|
46
46
|
virtual uint32_t getHllByteArrBytes() const;
|
47
47
|
|
@@ -32,6 +32,25 @@ HllArray<A>(lgConfigK, target_hll_type::HLL_8, startFullSize, allocator)
|
|
32
32
|
this->hllByteArr_.resize(numBytes, 0);
|
33
33
|
}
|
34
34
|
|
35
|
+
template<typename A>
|
36
|
+
Hll8Array<A>::Hll8Array(const HllArray<A>& other):
|
37
|
+
HllArray<A>(other.getLgConfigK(), target_hll_type::HLL_8, other.isStartFullSize(), other.getAllocator())
|
38
|
+
{
|
39
|
+
const int numBytes = this->hll8ArrBytes(this->lgConfigK_);
|
40
|
+
this->hllByteArr_.resize(numBytes, 0);
|
41
|
+
this->oooFlag_ = other.isOutOfOrderFlag();
|
42
|
+
uint32_t num_zeros = 1 << this->lgConfigK_;
|
43
|
+
|
44
|
+
for (const auto coupon : other) { // all = false, so skip empty values
|
45
|
+
num_zeros--;
|
46
|
+
internalCouponUpdate(coupon); // updates KxQ registers
|
47
|
+
}
|
48
|
+
|
49
|
+
this->numAtCurMin_ = num_zeros;
|
50
|
+
this->hipAccum_ = other.getHipAccum();
|
51
|
+
this->rebuild_kxq_curmin_ = false;
|
52
|
+
}
|
53
|
+
|
35
54
|
template<typename A>
|
36
55
|
std::function<void(HllSketchImpl<A>*)> Hll8Array<A>::get_deleter() const {
|
37
56
|
return [](HllSketchImpl<A>* ptr) {
|
@@ -77,13 +96,11 @@ void Hll8Array<A>::internalCouponUpdate(uint32_t coupon) {
|
|
77
96
|
const uint32_t slotNo = HllUtil<A>::getLow26(coupon) & configKmask;
|
78
97
|
const uint8_t newVal = HllUtil<A>::getValue(coupon);
|
79
98
|
|
80
|
-
const uint8_t curVal =
|
99
|
+
const uint8_t curVal = this->hllByteArr_[slotNo];
|
81
100
|
if (newVal > curVal) {
|
82
|
-
|
101
|
+
this->hllByteArr_[slotNo] = newVal;
|
83
102
|
this->hipAndKxQIncrementalUpdate(curVal, newVal);
|
84
|
-
|
85
|
-
this->numAtCurMin_--; // interpret numAtCurMin as num zeros
|
86
|
-
}
|
103
|
+
this->numAtCurMin_ -= curVal == 0; // interpret numAtCurMin as num zeros
|
87
104
|
}
|
88
105
|
}
|
89
106
|
|
@@ -97,49 +114,88 @@ void Hll8Array<A>::mergeList(const CouponList<A>& src) {
|
|
97
114
|
template<typename A>
|
98
115
|
void Hll8Array<A>::mergeHll(const HllArray<A>& src) {
|
99
116
|
// at this point src_k >= dst_k
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
const uint8_t old_v = this->hllByteArr_[j];
|
108
|
-
if (new_v > old_v) {
|
109
|
-
this->hllByteArr_[j] = new_v;
|
110
|
-
this->hipAndKxQIncrementalUpdate(old_v, new_v);
|
111
|
-
if (old_v == 0) {
|
112
|
-
this->numAtCurMin_--;
|
113
|
-
}
|
117
|
+
// we can optimize further when the k values are equal
|
118
|
+
if (this->getLgConfigK() == src.getLgConfigK()) {
|
119
|
+
if (src.getTgtHllType() == target_hll_type::HLL_8) {
|
120
|
+
uint32_t i = 0;
|
121
|
+
for (const auto value: src.getHllArray()) {
|
122
|
+
this->hllByteArr_[i] = std::max(this->hllByteArr_[i], value);
|
123
|
+
++i;
|
114
124
|
}
|
115
|
-
}
|
116
|
-
|
117
|
-
|
118
|
-
const uint8_t
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
125
|
+
} else if (src.getTgtHllType() == target_hll_type::HLL_6) {
|
126
|
+
const uint32_t src_k = 1 << src.getLgConfigK();
|
127
|
+
uint32_t i = 0;
|
128
|
+
const uint8_t* ptr = src.getHllArray().data();
|
129
|
+
while (i < src_k) {
|
130
|
+
uint8_t value = *ptr & 0x3f;
|
131
|
+
this->hllByteArr_[i] = std::max(this->hllByteArr_[i], value);
|
132
|
+
++i;
|
133
|
+
value = *ptr++ >> 6;
|
134
|
+
value |= (*ptr & 0x0f) << 2;
|
135
|
+
this->hllByteArr_[i] = std::max(this->hllByteArr_[i], value);
|
136
|
+
++i;
|
137
|
+
value = *ptr++ >> 4;
|
138
|
+
value |= (*ptr & 3) << 4;
|
139
|
+
this->hllByteArr_[i] = std::max(this->hllByteArr_[i], value);
|
140
|
+
++i;
|
141
|
+
value = *ptr++ >> 2;
|
142
|
+
this->hllByteArr_[i] = std::max(this->hllByteArr_[i], value);
|
143
|
+
++i;
|
144
|
+
}
|
145
|
+
} else { // HLL_4
|
146
|
+
const auto& src4 = static_cast<const Hll4Array<A>&>(src);
|
147
|
+
uint32_t i = 0;
|
148
|
+
for (const auto byte: src.getHllArray()) {
|
149
|
+
this->hllByteArr_[i] = std::max(this->hllByteArr_[i], src4.adjustRawValue(i, byte & hll_constants::loNibbleMask));
|
150
|
+
++i;
|
151
|
+
this->hllByteArr_[i] = std::max(this->hllByteArr_[i], src4.adjustRawValue(i, byte >> 4));
|
152
|
+
++i;
|
127
153
|
}
|
128
154
|
}
|
129
|
-
} else {
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
155
|
+
} else {
|
156
|
+
// src_k > dst_k
|
157
|
+
const uint32_t dst_mask = (1 << this->getLgConfigK()) - 1;
|
158
|
+
// special treatment below to optimize performance
|
159
|
+
if (src.getTgtHllType() == target_hll_type::HLL_8) {
|
160
|
+
uint32_t i = 0;
|
161
|
+
for (const auto value: src.getHllArray()) {
|
162
|
+
processValue(i++, dst_mask, value);
|
163
|
+
}
|
164
|
+
} else if (src.getTgtHllType() == target_hll_type::HLL_6) {
|
165
|
+
const uint32_t src_k = 1 << src.getLgConfigK();
|
166
|
+
uint32_t i = 0;
|
167
|
+
const uint8_t* ptr = src.getHllArray().data();
|
168
|
+
while (i < src_k) {
|
169
|
+
uint8_t value = *ptr & 0x3f;
|
170
|
+
processValue(i++, dst_mask, value);
|
171
|
+
value = *ptr++ >> 6;
|
172
|
+
value |= (*ptr & 0x0f) << 2;
|
173
|
+
processValue(i++, dst_mask, value);
|
174
|
+
value = *ptr++ >> 4;
|
175
|
+
value |= (*ptr & 3) << 4;
|
176
|
+
processValue(i++, dst_mask, value);
|
177
|
+
value = *ptr++ >> 2;
|
178
|
+
processValue(i++, dst_mask, value);
|
179
|
+
}
|
180
|
+
} else { // HLL_4
|
181
|
+
const auto& src4 = static_cast<const Hll4Array<A>&>(src);
|
182
|
+
uint32_t i = 0;
|
183
|
+
for (const auto byte: src.getHllArray()) {
|
184
|
+
processValue(i, dst_mask, src4.adjustRawValue(i, byte & hll_constants::loNibbleMask));
|
185
|
+
++i;
|
186
|
+
processValue(i, dst_mask, src4.adjustRawValue(i, byte >> 4));
|
187
|
+
++i;
|
140
188
|
}
|
141
189
|
}
|
142
190
|
}
|
191
|
+
this->setRebuildKxqCurminFlag(true);
|
192
|
+
}
|
193
|
+
|
194
|
+
|
195
|
+
template<typename A>
|
196
|
+
void Hll8Array<A>::processValue(uint32_t slot, uint32_t mask, uint8_t new_val) {
|
197
|
+
const size_t index = slot & mask;
|
198
|
+
this->hllByteArr_[index] = std::max(this->hllByteArr_[index], new_val);
|
143
199
|
}
|
144
200
|
|
145
201
|
}
|
@@ -31,6 +31,7 @@ template<typename A>
|
|
31
31
|
class Hll8Array final : public HllArray<A> {
|
32
32
|
public:
|
33
33
|
Hll8Array(uint8_t lgConfigK, bool startFullSize, const A& allocator);
|
34
|
+
explicit Hll8Array(const HllArray<A>& that);
|
34
35
|
|
35
36
|
virtual ~Hll8Array() = default;
|
36
37
|
virtual std::function<void(HllSketchImpl<A>*)> get_deleter() const;
|
@@ -48,6 +49,7 @@ class Hll8Array final : public HllArray<A> {
|
|
48
49
|
|
49
50
|
private:
|
50
51
|
inline void internalCouponUpdate(uint32_t coupon);
|
52
|
+
inline void processValue(uint32_t slot, uint32_t mask, uint8_t new_val);
|
51
53
|
};
|
52
54
|
|
53
55
|
}
|