datasketches 0.2.1 → 0.2.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +4 -0
- data/lib/datasketches/version.rb +1 -1
- data/vendor/datasketches-cpp/CMakeLists.txt +7 -0
- data/vendor/datasketches-cpp/common/include/MurmurHash3.h +11 -7
- data/vendor/datasketches-cpp/common/include/binomial_bounds.hpp +8 -8
- data/vendor/datasketches-cpp/common/include/bounds_binomial_proportions.hpp +12 -15
- data/vendor/datasketches-cpp/common/include/common_defs.hpp +24 -0
- data/vendor/datasketches-cpp/common/include/conditional_forward.hpp +20 -8
- data/vendor/datasketches-cpp/common/include/count_zeros.hpp +2 -2
- data/vendor/datasketches-cpp/common/include/serde.hpp +7 -7
- data/vendor/datasketches-cpp/cpc/include/cpc_compressor.hpp +19 -19
- data/vendor/datasketches-cpp/cpc/include/cpc_compressor_impl.hpp +91 -89
- data/vendor/datasketches-cpp/cpc/include/cpc_sketch.hpp +14 -1
- data/vendor/datasketches-cpp/cpc/include/cpc_sketch_impl.hpp +121 -87
- data/vendor/datasketches-cpp/cpc/include/cpc_union_impl.hpp +14 -14
- data/vendor/datasketches-cpp/cpc/include/cpc_util.hpp +10 -10
- data/vendor/datasketches-cpp/cpc/include/icon_estimator.hpp +4 -4
- data/vendor/datasketches-cpp/cpc/include/u32_table.hpp +8 -8
- data/vendor/datasketches-cpp/cpc/include/u32_table_impl.hpp +14 -14
- data/vendor/datasketches-cpp/cpc/test/compression_test.cpp +10 -10
- data/vendor/datasketches-cpp/cpc/test/cpc_sketch_test.cpp +25 -0
- data/vendor/datasketches-cpp/cpc/test/cpc_union_test.cpp +1 -1
- data/vendor/datasketches-cpp/fi/include/frequent_items_sketch_impl.hpp +65 -80
- data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map_impl.hpp +10 -10
- data/vendor/datasketches-cpp/fi/test/reverse_purge_hash_map_test.cpp +2 -2
- data/vendor/datasketches-cpp/hll/include/AuxHashMap-internal.hpp +60 -63
- data/vendor/datasketches-cpp/hll/include/AuxHashMap.hpp +19 -19
- data/vendor/datasketches-cpp/hll/include/CompositeInterpolationXTable-internal.hpp +15 -15
- data/vendor/datasketches-cpp/hll/include/CompositeInterpolationXTable.hpp +3 -3
- data/vendor/datasketches-cpp/hll/include/CouponHashSet-internal.hpp +74 -76
- data/vendor/datasketches-cpp/hll/include/CouponHashSet.hpp +6 -6
- data/vendor/datasketches-cpp/hll/include/CouponList-internal.hpp +110 -113
- data/vendor/datasketches-cpp/hll/include/CouponList.hpp +13 -13
- data/vendor/datasketches-cpp/hll/include/CubicInterpolation-internal.hpp +2 -4
- data/vendor/datasketches-cpp/hll/include/HarmonicNumbers-internal.hpp +1 -1
- data/vendor/datasketches-cpp/hll/include/Hll4Array-internal.hpp +80 -76
- data/vendor/datasketches-cpp/hll/include/Hll4Array.hpp +9 -9
- data/vendor/datasketches-cpp/hll/include/Hll6Array-internal.hpp +26 -26
- data/vendor/datasketches-cpp/hll/include/Hll6Array.hpp +6 -6
- data/vendor/datasketches-cpp/hll/include/Hll8Array-internal.hpp +33 -33
- data/vendor/datasketches-cpp/hll/include/Hll8Array.hpp +6 -6
- data/vendor/datasketches-cpp/hll/include/HllArray-internal.hpp +205 -209
- data/vendor/datasketches-cpp/hll/include/HllArray.hpp +36 -36
- data/vendor/datasketches-cpp/hll/include/HllSketch-internal.hpp +28 -28
- data/vendor/datasketches-cpp/hll/include/HllSketchImpl-internal.hpp +22 -22
- data/vendor/datasketches-cpp/hll/include/HllSketchImpl.hpp +13 -13
- data/vendor/datasketches-cpp/hll/include/HllSketchImplFactory.hpp +15 -15
- data/vendor/datasketches-cpp/hll/include/HllUnion-internal.hpp +61 -61
- data/vendor/datasketches-cpp/hll/include/HllUtil.hpp +120 -127
- data/vendor/datasketches-cpp/hll/include/coupon_iterator-internal.hpp +9 -9
- data/vendor/datasketches-cpp/hll/include/coupon_iterator.hpp +5 -5
- data/vendor/datasketches-cpp/hll/include/hll.hpp +21 -21
- data/vendor/datasketches-cpp/hll/test/AuxHashMapTest.cpp +1 -1
- data/vendor/datasketches-cpp/hll/test/CouponHashSetTest.cpp +34 -34
- data/vendor/datasketches-cpp/hll/test/CouponListTest.cpp +25 -25
- data/vendor/datasketches-cpp/hll/test/CrossCountingTest.cpp +2 -2
- data/vendor/datasketches-cpp/hll/test/HllArrayTest.cpp +35 -35
- data/vendor/datasketches-cpp/hll/test/HllSketchTest.cpp +15 -15
- data/vendor/datasketches-cpp/hll/test/HllUnionTest.cpp +10 -14
- data/vendor/datasketches-cpp/hll/test/IsomorphicTest.cpp +3 -3
- data/vendor/datasketches-cpp/hll/test/ToFromByteArrayTest.cpp +4 -4
- data/vendor/datasketches-cpp/kll/include/kll_helper.hpp +5 -4
- data/vendor/datasketches-cpp/kll/include/kll_helper_impl.hpp +6 -6
- data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator.hpp +14 -6
- data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator_impl.hpp +39 -24
- data/vendor/datasketches-cpp/kll/include/kll_sketch.hpp +34 -2
- data/vendor/datasketches-cpp/kll/include/kll_sketch_impl.hpp +72 -62
- data/vendor/datasketches-cpp/kll/include/kolmogorov_smirnov.hpp +67 -0
- data/vendor/datasketches-cpp/kll/include/kolmogorov_smirnov_impl.hpp +78 -0
- data/vendor/datasketches-cpp/kll/test/CMakeLists.txt +1 -0
- data/vendor/datasketches-cpp/kll/test/kll_sketch_test.cpp +68 -45
- data/vendor/datasketches-cpp/kll/test/kolmogorov_smirnov_test.cpp +111 -0
- data/vendor/datasketches-cpp/python/src/hll_wrapper.cpp +4 -4
- data/vendor/datasketches-cpp/python/src/theta_wrapper.cpp +6 -6
- data/vendor/datasketches-cpp/python/src/vo_wrapper.cpp +2 -2
- data/vendor/datasketches-cpp/python/tests/hll_test.py +1 -1
- data/vendor/datasketches-cpp/python/tests/vo_test.py +3 -3
- data/vendor/datasketches-cpp/req/include/req_common.hpp +2 -1
- data/vendor/datasketches-cpp/req/include/req_compactor.hpp +4 -4
- data/vendor/datasketches-cpp/req/include/req_compactor_impl.hpp +26 -39
- data/vendor/datasketches-cpp/req/include/req_sketch.hpp +1 -1
- data/vendor/datasketches-cpp/req/include/req_sketch_impl.hpp +9 -9
- data/vendor/datasketches-cpp/req/test/req_sketch_test.cpp +52 -52
- data/vendor/datasketches-cpp/sampling/include/var_opt_sketch_impl.hpp +47 -56
- data/vendor/datasketches-cpp/sampling/include/var_opt_union_impl.hpp +34 -42
- data/vendor/datasketches-cpp/sampling/test/var_opt_sketch_test.cpp +6 -6
- data/vendor/datasketches-cpp/sampling/test/var_opt_union_test.cpp +13 -13
- data/vendor/datasketches-cpp/setup.py +1 -1
- data/vendor/datasketches-cpp/theta/include/bounds_on_ratios_in_sampled_sets.hpp +1 -1
- data/vendor/datasketches-cpp/theta/include/compact_theta_sketch_parser.hpp +67 -0
- data/vendor/datasketches-cpp/theta/include/compact_theta_sketch_parser_impl.hpp +70 -0
- data/vendor/datasketches-cpp/theta/include/theta_intersection.hpp +9 -4
- data/vendor/datasketches-cpp/theta/include/theta_intersection_impl.hpp +1 -1
- data/vendor/datasketches-cpp/theta/include/theta_jaccard_similarity_base.hpp +18 -14
- data/vendor/datasketches-cpp/theta/include/theta_sketch.hpp +42 -1
- data/vendor/datasketches-cpp/theta/include/theta_sketch_impl.hpp +107 -58
- data/vendor/datasketches-cpp/theta/include/theta_union.hpp +4 -4
- data/vendor/datasketches-cpp/theta/include/theta_union_base_impl.hpp +1 -1
- data/vendor/datasketches-cpp/theta/include/theta_union_impl.hpp +1 -1
- data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base.hpp +2 -0
- data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base_impl.hpp +33 -28
- data/vendor/datasketches-cpp/theta/test/theta_a_not_b_test.cpp +23 -1
- data/vendor/datasketches-cpp/theta/test/theta_intersection_test.cpp +21 -1
- data/vendor/datasketches-cpp/theta/test/theta_jaccard_similarity_test.cpp +58 -2
- data/vendor/datasketches-cpp/theta/test/theta_sketch_test.cpp +37 -1
- data/vendor/datasketches-cpp/theta/test/theta_union_test.cpp +22 -2
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_sketch_impl.hpp +47 -60
- data/vendor/datasketches-cpp/tuple/include/tuple_sketch_impl.hpp +51 -64
- data/vendor/datasketches-cpp/tuple/test/array_of_doubles_sketch_test.cpp +1 -1
- data/vendor/datasketches-cpp/tuple/test/tuple_a_not_b_test.cpp +17 -17
- data/vendor/datasketches-cpp/tuple/test/tuple_intersection_test.cpp +12 -12
- data/vendor/datasketches-cpp/tuple/test/tuple_jaccard_similarity_test.cpp +5 -5
- data/vendor/datasketches-cpp/tuple/test/tuple_sketch_allocation_test.cpp +1 -1
- data/vendor/datasketches-cpp/tuple/test/tuple_sketch_test.cpp +20 -20
- data/vendor/datasketches-cpp/tuple/test/tuple_union_test.cpp +12 -12
- metadata +8 -3
@@ -35,13 +35,13 @@ public:
|
|
35
35
|
using CompactSketch = compact_theta_sketch_alloc<Allocator>;
|
36
36
|
using resize_factor = theta_constants::resize_factor;
|
37
37
|
|
38
|
-
struct
|
39
|
-
|
38
|
+
struct nop_policy {
|
39
|
+
void operator()(uint64_t internal_entry, uint64_t incoming_entry) const {
|
40
|
+
unused(internal_entry);
|
40
41
|
unused(incoming_entry);
|
41
|
-
return internal_entry;
|
42
42
|
}
|
43
43
|
};
|
44
|
-
using State = theta_union_base<Entry, ExtractKey,
|
44
|
+
using State = theta_union_base<Entry, ExtractKey, nop_policy, Sketch, CompactSketch, Allocator>;
|
45
45
|
|
46
46
|
// No constructor here. Use builder instead.
|
47
47
|
class builder;
|
@@ -43,7 +43,7 @@ void theta_union_base<EN, EK, P, S, CS, A>::update(SS&& sketch) {
|
|
43
43
|
if (sketch.get_theta64() < union_theta_) union_theta_ = sketch.get_theta64();
|
44
44
|
for (auto& entry: sketch) {
|
45
45
|
const uint64_t hash = EK()(entry);
|
46
|
-
if (hash < union_theta_) {
|
46
|
+
if (hash < union_theta_ && hash < table_.theta_) {
|
47
47
|
auto result = table_.find(hash);
|
48
48
|
if (!result.second) {
|
49
49
|
table_.insert(result.first, conditional_forward<SS>(entry));
|
@@ -24,7 +24,7 @@ namespace datasketches {
|
|
24
24
|
|
25
25
|
template<typename A>
|
26
26
|
theta_union_alloc<A>::theta_union_alloc(uint8_t lg_cur_size, uint8_t lg_nom_size, resize_factor rf, uint64_t theta, uint64_t seed, const A& allocator):
|
27
|
-
state_(lg_cur_size, lg_nom_size, rf, theta, seed,
|
27
|
+
state_(lg_cur_size, lg_nom_size, rf, theta, seed, nop_policy(), allocator)
|
28
28
|
{}
|
29
29
|
|
30
30
|
template<typename A>
|
@@ -53,6 +53,8 @@ struct theta_update_sketch_base {
|
|
53
53
|
inline uint64_t hash_and_screen(const void* data, size_t length);
|
54
54
|
|
55
55
|
inline std::pair<iterator, bool> find(uint64_t key) const;
|
56
|
+
static inline std::pair<iterator, bool> find(Entry* entries, uint8_t lg_size, uint64_t key);
|
57
|
+
|
56
58
|
|
57
59
|
template<typename FwdEntry>
|
58
60
|
inline void insert(iterator it, FwdEntry&& entry);
|
@@ -39,7 +39,7 @@ seed_(seed),
|
|
39
39
|
entries_(nullptr)
|
40
40
|
{
|
41
41
|
if (lg_cur_size > 0) {
|
42
|
-
const size_t size =
|
42
|
+
const size_t size = 1ULL << lg_cur_size;
|
43
43
|
entries_ = allocator_.allocate(size);
|
44
44
|
for (size_t i = 0; i < size; ++i) EK()(entries_[i]) = 0;
|
45
45
|
}
|
@@ -58,7 +58,7 @@ seed_(other.seed_),
|
|
58
58
|
entries_(nullptr)
|
59
59
|
{
|
60
60
|
if (other.entries_ != nullptr) {
|
61
|
-
const size_t size =
|
61
|
+
const size_t size = 1ULL << lg_cur_size_;
|
62
62
|
entries_ = allocator_.allocate(size);
|
63
63
|
for (size_t i = 0; i < size; ++i) {
|
64
64
|
if (EK()(other.entries_[i]) != 0) {
|
@@ -89,7 +89,7 @@ template<typename EN, typename EK, typename A>
|
|
89
89
|
theta_update_sketch_base<EN, EK, A>::~theta_update_sketch_base()
|
90
90
|
{
|
91
91
|
if (entries_ != nullptr) {
|
92
|
-
const size_t size =
|
92
|
+
const size_t size = 1ULL << lg_cur_size_;
|
93
93
|
for (size_t i = 0; i < size; ++i) {
|
94
94
|
if (EK()(entries_[i]) != 0) entries_[i].~EN();
|
95
95
|
}
|
@@ -136,18 +136,23 @@ uint64_t theta_update_sketch_base<EN, EK, A>::hash_and_screen(const void* data,
|
|
136
136
|
|
137
137
|
template<typename EN, typename EK, typename A>
|
138
138
|
auto theta_update_sketch_base<EN, EK, A>::find(uint64_t key) const -> std::pair<iterator, bool> {
|
139
|
-
|
140
|
-
|
141
|
-
|
139
|
+
return find(entries_, lg_cur_size_, key);
|
140
|
+
}
|
141
|
+
|
142
|
+
template<typename EN, typename EK, typename A>
|
143
|
+
auto theta_update_sketch_base<EN, EK, A>::find(EN* entries, uint8_t lg_size, uint64_t key) -> std::pair<iterator, bool> {
|
144
|
+
const uint32_t size = 1 << lg_size;
|
145
|
+
const uint32_t mask = size - 1;
|
146
|
+
const uint32_t stride = get_stride(key, lg_size);
|
142
147
|
uint32_t index = static_cast<uint32_t>(key) & mask;
|
143
148
|
// search for duplicate or zero
|
144
149
|
const uint32_t loop_index = index;
|
145
150
|
do {
|
146
|
-
const uint64_t probe = EK()(
|
151
|
+
const uint64_t probe = EK()(entries[index]);
|
147
152
|
if (probe == 0) {
|
148
|
-
return std::pair<iterator, bool>(&
|
153
|
+
return std::pair<iterator, bool>(&entries[index], false);
|
149
154
|
} else if (probe == key) {
|
150
|
-
return std::pair<iterator, bool>(&
|
155
|
+
return std::pair<iterator, bool>(&entries[index], true);
|
151
156
|
}
|
152
157
|
index = (index + stride) & mask;
|
153
158
|
} while (index != loop_index);
|
@@ -175,13 +180,13 @@ auto theta_update_sketch_base<EN, EK, A>::begin() const -> iterator {
|
|
175
180
|
|
176
181
|
template<typename EN, typename EK, typename A>
|
177
182
|
auto theta_update_sketch_base<EN, EK, A>::end() const -> iterator {
|
178
|
-
return &entries_[
|
183
|
+
return &entries_[1ULL << lg_cur_size_];
|
179
184
|
}
|
180
185
|
|
181
186
|
template<typename EN, typename EK, typename A>
|
182
187
|
uint32_t theta_update_sketch_base<EN, EK, A>::get_capacity(uint8_t lg_cur_size, uint8_t lg_nom_size) {
|
183
188
|
const double fraction = (lg_cur_size <= lg_nom_size) ? RESIZE_THRESHOLD : REBUILD_THRESHOLD;
|
184
|
-
return std::floor(fraction * (1 << lg_cur_size));
|
189
|
+
return static_cast<uint32_t>(std::floor(fraction * (1 << lg_cur_size)));
|
185
190
|
}
|
186
191
|
|
187
192
|
template<typename EN, typename EK, typename A>
|
@@ -192,29 +197,29 @@ uint32_t theta_update_sketch_base<EN, EK, A>::get_stride(uint64_t key, uint8_t l
|
|
192
197
|
|
193
198
|
template<typename EN, typename EK, typename A>
|
194
199
|
void theta_update_sketch_base<EN, EK, A>::resize() {
|
195
|
-
const size_t old_size =
|
196
|
-
const uint8_t
|
197
|
-
const
|
198
|
-
|
199
|
-
|
200
|
-
EN* old_entries = entries_;
|
201
|
-
entries_ = allocator_.allocate(new_size);
|
202
|
-
for (size_t i = 0; i < new_size; ++i) EK()(entries_[i]) = 0;
|
203
|
-
num_entries_ = 0;
|
200
|
+
const size_t old_size = 1ULL << lg_cur_size_;
|
201
|
+
const uint8_t lg_new_size = std::min<uint8_t>(lg_cur_size_ + static_cast<uint8_t>(rf_), lg_nom_size_ + 1);
|
202
|
+
const size_t new_size = 1ULL << lg_new_size;
|
203
|
+
EN* new_entries = allocator_.allocate(new_size);
|
204
|
+
for (size_t i = 0; i < new_size; ++i) EK()(new_entries[i]) = 0;
|
204
205
|
for (size_t i = 0; i < old_size; ++i) {
|
205
|
-
const uint64_t key = EK()(
|
206
|
+
const uint64_t key = EK()(entries_[i]);
|
206
207
|
if (key != 0) {
|
207
|
-
|
208
|
-
|
208
|
+
// always finds an empty slot in a larger table
|
209
|
+
new (find(new_entries, lg_new_size, key).first) EN(std::move(entries_[i]));
|
210
|
+
entries_[i].~EN();
|
211
|
+
EK()(entries_[i]) = 0;
|
209
212
|
}
|
210
213
|
}
|
211
|
-
|
214
|
+
std::swap(entries_, new_entries);
|
215
|
+
lg_cur_size_ = lg_new_size;
|
216
|
+
allocator_.deallocate(new_entries, old_size);
|
212
217
|
}
|
213
218
|
|
214
219
|
// assumes number of entries > nominal size
|
215
220
|
template<typename EN, typename EK, typename A>
|
216
221
|
void theta_update_sketch_base<EN, EK, A>::rebuild() {
|
217
|
-
const size_t size =
|
222
|
+
const size_t size = 1ULL << lg_cur_size_;
|
218
223
|
const uint32_t nominal_size = 1 << lg_nom_size_;
|
219
224
|
|
220
225
|
// empty entries have uninitialized payloads
|
@@ -227,10 +232,10 @@ void theta_update_sketch_base<EN, EK, A>::rebuild() {
|
|
227
232
|
const size_t num_old_entries = num_entries_;
|
228
233
|
entries_ = allocator_.allocate(size);
|
229
234
|
for (size_t i = 0; i < size; ++i) EK()(entries_[i]) = 0;
|
230
|
-
num_entries_ =
|
235
|
+
num_entries_ = nominal_size;
|
231
236
|
// relies on consolidating non-empty entries to the front
|
232
237
|
for (size_t i = 0; i < nominal_size; ++i) {
|
233
|
-
|
238
|
+
new (find(EK()(old_entries[i])).first) EN(std::move(old_entries[i]));
|
234
239
|
old_entries[i].~EN();
|
235
240
|
}
|
236
241
|
for (size_t i = nominal_size; i < num_old_entries; ++i) old_entries[i].~EN();
|
@@ -301,7 +306,7 @@ Derived& theta_base_builder<Derived, Allocator>::set_seed(uint64_t seed) {
|
|
301
306
|
|
302
307
|
template<typename Derived, typename Allocator>
|
303
308
|
uint64_t theta_base_builder<Derived, Allocator>::starting_theta() const {
|
304
|
-
if (p_ < 1) return theta_constants::MAX_THETA * p_;
|
309
|
+
if (p_ < 1) return static_cast<uint64_t>(theta_constants::MAX_THETA * p_);
|
305
310
|
return theta_constants::MAX_THETA;
|
306
311
|
}
|
307
312
|
|
@@ -37,7 +37,7 @@ TEST_CASE("theta a-not-b: empty", "[theta_a_not_b]") {
|
|
37
37
|
TEST_CASE("theta a-not-b: non empty no retained keys", "[theta_a_not_b]") {
|
38
38
|
update_theta_sketch a = update_theta_sketch::builder().build();
|
39
39
|
a.update(1);
|
40
|
-
update_theta_sketch b = update_theta_sketch::builder().set_p(0.
|
40
|
+
update_theta_sketch b = update_theta_sketch::builder().set_p(0.001f).build();
|
41
41
|
theta_a_not_b a_not_b;
|
42
42
|
|
43
43
|
// B is still empty
|
@@ -167,6 +167,28 @@ TEST_CASE("theta a-not-b: estimation mode half overlap", "[theta_a_not_b]") {
|
|
167
167
|
REQUIRE(result.get_estimate() == Approx(5000).margin(5000 * 0.02));
|
168
168
|
}
|
169
169
|
|
170
|
+
TEST_CASE("theta a-not-b: estimation mode half overlap wrapped compact", "[theta_a_not_b]") {
|
171
|
+
update_theta_sketch a = update_theta_sketch::builder().build();
|
172
|
+
int value = 0;
|
173
|
+
for (int i = 0; i < 10000; i++) a.update(value++);
|
174
|
+
auto bytes_a = a.compact().serialize();
|
175
|
+
|
176
|
+
update_theta_sketch b = update_theta_sketch::builder().build();
|
177
|
+
value = 5000;
|
178
|
+
for (int i = 0; i < 10000; i++) b.update(value++);
|
179
|
+
auto bytes_b = b.compact().serialize();
|
180
|
+
|
181
|
+
theta_a_not_b a_not_b;
|
182
|
+
|
183
|
+
auto result = a_not_b.compute(
|
184
|
+
wrapped_compact_theta_sketch::wrap(bytes_a.data(), bytes_a.size()),
|
185
|
+
wrapped_compact_theta_sketch::wrap(bytes_b.data(), bytes_b.size())
|
186
|
+
);
|
187
|
+
REQUIRE_FALSE(result.is_empty());
|
188
|
+
REQUIRE(result.is_estimation_mode());
|
189
|
+
REQUIRE(result.get_estimate() == Approx(5000).margin(5000 * 0.02));
|
190
|
+
}
|
191
|
+
|
170
192
|
TEST_CASE("theta a-not-b: estimation mode disjoint", "[theta_a_not_b]") {
|
171
193
|
update_theta_sketch a = update_theta_sketch::builder().build();
|
172
194
|
int value = 0;
|
@@ -48,7 +48,7 @@ TEST_CASE("theta intersection: empty", "[theta_intersection]") {
|
|
48
48
|
}
|
49
49
|
|
50
50
|
TEST_CASE("theta intersection: non empty no retained keys", "[theta_intersection]") {
|
51
|
-
update_theta_sketch sketch = update_theta_sketch::builder().set_p(0.
|
51
|
+
update_theta_sketch sketch = update_theta_sketch::builder().set_p(0.001f).build();
|
52
52
|
sketch.update(1);
|
53
53
|
theta_intersection intersection;
|
54
54
|
intersection.update(sketch);
|
@@ -174,6 +174,26 @@ TEST_CASE("theta intersection: estimation mode half overlap ordered", "[theta_in
|
|
174
174
|
REQUIRE(result.get_estimate() == Approx(5000).margin(5000 * 0.02));
|
175
175
|
}
|
176
176
|
|
177
|
+
TEST_CASE("theta intersection: estimation mode half overlap ordered wrapped compact", "[theta_intersection]") {
|
178
|
+
update_theta_sketch sketch1 = update_theta_sketch::builder().build();
|
179
|
+
int value = 0;
|
180
|
+
for (int i = 0; i < 10000; i++) sketch1.update(value++);
|
181
|
+
auto bytes1 = sketch1.compact().serialize();
|
182
|
+
|
183
|
+
update_theta_sketch sketch2 = update_theta_sketch::builder().build();
|
184
|
+
value = 5000;
|
185
|
+
for (int i = 0; i < 10000; i++) sketch2.update(value++);
|
186
|
+
auto bytes2 = sketch2.compact().serialize();
|
187
|
+
|
188
|
+
theta_intersection intersection;
|
189
|
+
intersection.update(wrapped_compact_theta_sketch::wrap(bytes1.data(), bytes1.size()));
|
190
|
+
intersection.update(wrapped_compact_theta_sketch::wrap(bytes2.data(), bytes2.size()));
|
191
|
+
compact_theta_sketch result = intersection.get_result();
|
192
|
+
REQUIRE_FALSE(result.is_empty());
|
193
|
+
REQUIRE(result.is_estimation_mode());
|
194
|
+
REQUIRE(result.get_estimate() == Approx(5000).margin(5000 * 0.02));
|
195
|
+
}
|
196
|
+
|
177
197
|
TEST_CASE("theta intersection: estimation mode disjoint unordered", "[theta_intersection]") {
|
178
198
|
update_theta_sketch sketch1 = update_theta_sketch::builder().build();
|
179
199
|
int value = 0;
|
@@ -100,6 +100,28 @@ TEST_CASE("theta jaccard: half overlap estimation mode", "[theta_sketch]") {
|
|
100
100
|
REQUIRE(jc[2] == Approx(0.33).margin(0.01));
|
101
101
|
}
|
102
102
|
|
103
|
+
TEST_CASE("theta jaccard: half overlap estimation mode custom seed", "[theta_sketch]") {
|
104
|
+
const uint64_t seed = 123;
|
105
|
+
auto sk_a = update_theta_sketch::builder().set_seed(seed).build();
|
106
|
+
auto sk_b = update_theta_sketch::builder().set_seed(seed).build();
|
107
|
+
for (int i = 0; i < 10000; ++i) {
|
108
|
+
sk_a.update(i);
|
109
|
+
sk_b.update(i + 5000);
|
110
|
+
}
|
111
|
+
|
112
|
+
// update sketches
|
113
|
+
auto jc = theta_jaccard_similarity::jaccard(sk_a, sk_b, seed);
|
114
|
+
REQUIRE(jc[0] == Approx(0.33).margin(0.01));
|
115
|
+
REQUIRE(jc[1] == Approx(0.33).margin(0.01));
|
116
|
+
REQUIRE(jc[2] == Approx(0.33).margin(0.01));
|
117
|
+
|
118
|
+
// compact sketches
|
119
|
+
jc = theta_jaccard_similarity::jaccard(sk_a.compact(), sk_b.compact(), seed);
|
120
|
+
REQUIRE(jc[0] == Approx(0.33).margin(0.01));
|
121
|
+
REQUIRE(jc[1] == Approx(0.33).margin(0.01));
|
122
|
+
REQUIRE(jc[2] == Approx(0.33).margin(0.01));
|
123
|
+
}
|
124
|
+
|
103
125
|
/**
|
104
126
|
* The distribution is quite tight, about +/- 0.7%, which is pretty good since the accuracy of the
|
105
127
|
* underlying sketch is about +/- 1.56%.
|
@@ -107,7 +129,7 @@ TEST_CASE("theta jaccard: half overlap estimation mode", "[theta_sketch]") {
|
|
107
129
|
TEST_CASE("theta jaccard: similarity test", "[theta_sketch]") {
|
108
130
|
const int8_t min_lg_k = 12;
|
109
131
|
const int u1 = 1 << 20;
|
110
|
-
const int u2 = u1 * 0.95;
|
132
|
+
const int u2 = static_cast<int>(u1 * 0.95);
|
111
133
|
const double threshold = 0.943;
|
112
134
|
|
113
135
|
auto expected = update_theta_sketch::builder().set_lg_k(min_lg_k).build();
|
@@ -120,6 +142,23 @@ TEST_CASE("theta jaccard: similarity test", "[theta_sketch]") {
|
|
120
142
|
REQUIRE(theta_jaccard_similarity::similarity_test(actual, actual, threshold));
|
121
143
|
}
|
122
144
|
|
145
|
+
TEST_CASE("theta jaccard: similarity test custom seed", "[theta_sketch]") {
|
146
|
+
const int8_t min_lg_k = 12;
|
147
|
+
const int u1 = 1 << 20;
|
148
|
+
const int u2 = static_cast<int>(u1 * 0.95);
|
149
|
+
const double threshold = 0.943;
|
150
|
+
const uint64_t seed = 1234;
|
151
|
+
|
152
|
+
auto expected = update_theta_sketch::builder().set_lg_k(min_lg_k).set_seed(seed).build();
|
153
|
+
for (int i = 0; i < u1; ++i) expected.update(i);
|
154
|
+
|
155
|
+
auto actual = update_theta_sketch::builder().set_lg_k(min_lg_k).set_seed(seed).build();
|
156
|
+
for (int i = 0; i < u2; ++i) actual.update(i);
|
157
|
+
|
158
|
+
REQUIRE(theta_jaccard_similarity::similarity_test(actual, expected, threshold, seed));
|
159
|
+
REQUIRE(theta_jaccard_similarity::similarity_test(actual, actual, threshold, seed));
|
160
|
+
}
|
161
|
+
|
123
162
|
/**
|
124
163
|
* The distribution is much looser here, about +/- 14%. This is due to the fact that intersections loose accuracy
|
125
164
|
* as the ratio of intersection to the union becomes a small number.
|
@@ -127,7 +166,7 @@ TEST_CASE("theta jaccard: similarity test", "[theta_sketch]") {
|
|
127
166
|
TEST_CASE("theta jaccard: dissimilarity test", "[theta_sketch]") {
|
128
167
|
const int8_t min_lg_k = 12;
|
129
168
|
const int u1 = 1 << 20;
|
130
|
-
const int u2 = u1 * 0.05;
|
169
|
+
const int u2 = static_cast<int>(u1 * 0.05);
|
131
170
|
const double threshold = 0.061;
|
132
171
|
|
133
172
|
auto expected = update_theta_sketch::builder().set_lg_k(min_lg_k).build();
|
@@ -140,4 +179,21 @@ TEST_CASE("theta jaccard: dissimilarity test", "[theta_sketch]") {
|
|
140
179
|
REQUIRE_FALSE(theta_jaccard_similarity::dissimilarity_test(actual, actual, threshold));
|
141
180
|
}
|
142
181
|
|
182
|
+
TEST_CASE("theta jaccard: dissimilarity test custom seed", "[theta_sketch]") {
|
183
|
+
const int8_t min_lg_k = 12;
|
184
|
+
const int u1 = 1 << 20;
|
185
|
+
const int u2 = static_cast<int>(u1 * 0.05);
|
186
|
+
const double threshold = 0.061;
|
187
|
+
const uint64_t seed = 1234;
|
188
|
+
|
189
|
+
auto expected = update_theta_sketch::builder().set_lg_k(min_lg_k).set_seed(seed).build();
|
190
|
+
for (int i = 0; i < u1; ++i) expected.update(i);
|
191
|
+
|
192
|
+
auto actual = update_theta_sketch::builder().set_lg_k(min_lg_k).set_seed(seed).build();
|
193
|
+
for (int i = 0; i < u2; ++i) actual.update(i);
|
194
|
+
|
195
|
+
REQUIRE(theta_jaccard_similarity::dissimilarity_test(actual, expected, threshold, seed));
|
196
|
+
REQUIRE_FALSE(theta_jaccard_similarity::dissimilarity_test(actual, actual, threshold, seed));
|
197
|
+
}
|
198
|
+
|
143
199
|
} /* namespace datasketches */
|
@@ -50,7 +50,7 @@ TEST_CASE("theta sketch: empty", "[theta_sketch]") {
|
|
50
50
|
}
|
51
51
|
|
52
52
|
TEST_CASE("theta sketch: non empty no retained keys", "[theta_sketch]") {
|
53
|
-
update_theta_sketch update_sketch = update_theta_sketch::builder().set_p(0.
|
53
|
+
update_theta_sketch update_sketch = update_theta_sketch::builder().set_p(0.001f).build();
|
54
54
|
update_sketch.update(1);
|
55
55
|
//std::cerr << update_sketch.to_string();
|
56
56
|
REQUIRE(update_sketch.get_num_retained() == 0);
|
@@ -238,4 +238,40 @@ TEST_CASE("theta sketch: deserialize compact single item buffer overrun", "[thet
|
|
238
238
|
REQUIRE_THROWS_AS(compact_theta_sketch::deserialize(bytes.data(), bytes.size() - 1), std::out_of_range);
|
239
239
|
}
|
240
240
|
|
241
|
+
TEST_CASE("theta sketch: conversion constructor and wrapped compact", "[theta_sketch]") {
|
242
|
+
update_theta_sketch update_sketch = update_theta_sketch::builder().build();
|
243
|
+
const int n = 8192;
|
244
|
+
for (int i = 0; i < n; i++) update_sketch.update(i);
|
245
|
+
|
246
|
+
// unordered
|
247
|
+
auto unordered_compact1 = update_sketch.compact(false);
|
248
|
+
compact_theta_sketch unordered_compact2(update_sketch, false);
|
249
|
+
auto it = unordered_compact1.begin();
|
250
|
+
for (auto entry: unordered_compact2) {
|
251
|
+
REQUIRE(*it == entry);
|
252
|
+
++it;
|
253
|
+
}
|
254
|
+
|
255
|
+
// ordered
|
256
|
+
auto ordered_compact1 = update_sketch.compact();
|
257
|
+
compact_theta_sketch ordered_compact2(update_sketch, true);
|
258
|
+
it = ordered_compact1.begin();
|
259
|
+
for (auto entry: ordered_compact2) {
|
260
|
+
REQUIRE(*it == entry);
|
261
|
+
++it;
|
262
|
+
}
|
263
|
+
|
264
|
+
// wrapped compact
|
265
|
+
auto bytes = ordered_compact1.serialize();
|
266
|
+
auto ordered_compact3 = wrapped_compact_theta_sketch::wrap(bytes.data(), bytes.size());
|
267
|
+
it = ordered_compact1.begin();
|
268
|
+
for (auto entry: ordered_compact3) {
|
269
|
+
REQUIRE(*it == entry);
|
270
|
+
++it;
|
271
|
+
}
|
272
|
+
|
273
|
+
// seed mismatch
|
274
|
+
REQUIRE_THROWS_AS(wrapped_compact_theta_sketch::wrap(bytes.data(), bytes.size(), 0), std::invalid_argument);
|
275
|
+
}
|
276
|
+
|
241
277
|
} /* namespace datasketches */
|
@@ -39,7 +39,7 @@ TEST_CASE("theta union: empty", "[theta_union]") {
|
|
39
39
|
}
|
40
40
|
|
41
41
|
TEST_CASE("theta union: non empty no retained keys", "[theta_union]") {
|
42
|
-
update_theta_sketch update_sketch = update_theta_sketch::builder().set_p(0.
|
42
|
+
update_theta_sketch update_sketch = update_theta_sketch::builder().set_p(0.001f).build();
|
43
43
|
update_sketch.update(1);
|
44
44
|
theta_union u = theta_union::builder().build();
|
45
45
|
u.update(update_sketch);
|
@@ -65,7 +65,27 @@ TEST_CASE("theta union: exact mode half overlap", "[theta_union]") {
|
|
65
65
|
compact_theta_sketch sketch3 = u.get_result();
|
66
66
|
REQUIRE_FALSE(sketch3.is_empty());
|
67
67
|
REQUIRE_FALSE(sketch3.is_estimation_mode());
|
68
|
-
REQUIRE(sketch3.get_estimate() ==
|
68
|
+
REQUIRE(sketch3.get_estimate() == 1500.0);
|
69
|
+
}
|
70
|
+
|
71
|
+
TEST_CASE("theta union: exact mode half overlap wrapped compact", "[theta_union]") {
|
72
|
+
update_theta_sketch sketch1 = update_theta_sketch::builder().build();
|
73
|
+
int value = 0;
|
74
|
+
for (int i = 0; i < 1000; i++) sketch1.update(value++);
|
75
|
+
auto bytes1 = sketch1.compact().serialize();
|
76
|
+
|
77
|
+
update_theta_sketch sketch2 = update_theta_sketch::builder().build();
|
78
|
+
value = 500;
|
79
|
+
for (int i = 0; i < 1000; i++) sketch2.update(value++);
|
80
|
+
auto bytes2 = sketch2.compact().serialize();
|
81
|
+
|
82
|
+
theta_union u = theta_union::builder().build();
|
83
|
+
u.update(wrapped_compact_theta_sketch::wrap(bytes1.data(), bytes1.size()));
|
84
|
+
u.update(wrapped_compact_theta_sketch::wrap(bytes2.data(), bytes2.size()));
|
85
|
+
compact_theta_sketch sketch3 = u.get_result();
|
86
|
+
REQUIRE_FALSE(sketch3.is_empty());
|
87
|
+
REQUIRE_FALSE(sketch3.is_estimation_mode());
|
88
|
+
REQUIRE(sketch3.get_estimate() == 1500.0);
|
69
89
|
}
|
70
90
|
|
71
91
|
TEST_CASE("theta union: estimation mode half overlap", "[theta_union]") {
|
@@ -70,33 +70,33 @@ uint8_t compact_array_of_doubles_sketch_alloc<A>::get_num_values() const {
|
|
70
70
|
template<typename A>
|
71
71
|
void compact_array_of_doubles_sketch_alloc<A>::serialize(std::ostream& os) const {
|
72
72
|
const uint8_t preamble_longs = 1;
|
73
|
-
|
73
|
+
write(os, preamble_longs);
|
74
74
|
const uint8_t serial_version = SERIAL_VERSION;
|
75
|
-
|
75
|
+
write(os, serial_version);
|
76
76
|
const uint8_t family = SKETCH_FAMILY;
|
77
|
-
|
77
|
+
write(os, family);
|
78
78
|
const uint8_t type = SKETCH_TYPE;
|
79
|
-
|
79
|
+
write(os, type);
|
80
80
|
const uint8_t flags_byte(
|
81
81
|
(this->is_empty() ? 1 << flags::IS_EMPTY : 0) |
|
82
82
|
(this->get_num_retained() > 0 ? 1 << flags::HAS_ENTRIES : 0) |
|
83
83
|
(this->is_ordered() ? 1 << flags::IS_ORDERED : 0)
|
84
84
|
);
|
85
|
-
|
86
|
-
|
85
|
+
write(os, flags_byte);
|
86
|
+
write(os, num_values_);
|
87
87
|
const uint16_t seed_hash = this->get_seed_hash();
|
88
|
-
|
89
|
-
|
88
|
+
write(os, seed_hash);
|
89
|
+
write(os, this->theta_);
|
90
90
|
if (this->get_num_retained() > 0) {
|
91
|
-
const uint32_t num_entries = this->entries_.size();
|
92
|
-
|
91
|
+
const uint32_t num_entries = static_cast<uint32_t>(this->entries_.size());
|
92
|
+
write(os, num_entries);
|
93
93
|
const uint32_t unused32 = 0;
|
94
|
-
|
94
|
+
write(os, unused32);
|
95
95
|
for (const auto& it: this->entries_) {
|
96
|
-
|
96
|
+
write(os, it.first);
|
97
97
|
}
|
98
98
|
for (const auto& it: this->entries_) {
|
99
|
-
|
99
|
+
write(os, it.second.data(), it.second.size() * sizeof(double));
|
100
100
|
}
|
101
101
|
}
|
102
102
|
}
|
@@ -110,30 +110,29 @@ auto compact_array_of_doubles_sketch_alloc<A>::serialize(unsigned header_size_by
|
|
110
110
|
vector_bytes bytes(size, 0, this->entries_.get_allocator());
|
111
111
|
uint8_t* ptr = bytes.data() + header_size_bytes;
|
112
112
|
|
113
|
-
ptr += copy_to_mem(
|
113
|
+
ptr += copy_to_mem(preamble_longs, ptr);
|
114
114
|
const uint8_t serial_version = SERIAL_VERSION;
|
115
|
-
ptr += copy_to_mem(
|
115
|
+
ptr += copy_to_mem(serial_version, ptr);
|
116
116
|
const uint8_t family = SKETCH_FAMILY;
|
117
|
-
ptr += copy_to_mem(
|
117
|
+
ptr += copy_to_mem(family, ptr);
|
118
118
|
const uint8_t type = SKETCH_TYPE;
|
119
|
-
ptr += copy_to_mem(
|
119
|
+
ptr += copy_to_mem(type, ptr);
|
120
120
|
const uint8_t flags_byte(
|
121
121
|
(this->is_empty() ? 1 << flags::IS_EMPTY : 0) |
|
122
122
|
(this->get_num_retained() ? 1 << flags::HAS_ENTRIES : 0) |
|
123
123
|
(this->is_ordered() ? 1 << flags::IS_ORDERED : 0)
|
124
124
|
);
|
125
|
-
ptr += copy_to_mem(
|
126
|
-
ptr += copy_to_mem(
|
125
|
+
ptr += copy_to_mem(flags_byte, ptr);
|
126
|
+
ptr += copy_to_mem(num_values_, ptr);
|
127
127
|
const uint16_t seed_hash = this->get_seed_hash();
|
128
|
-
ptr += copy_to_mem(
|
129
|
-
ptr += copy_to_mem(
|
128
|
+
ptr += copy_to_mem(seed_hash, ptr);
|
129
|
+
ptr += copy_to_mem((this->theta_), ptr);
|
130
130
|
if (this->get_num_retained() > 0) {
|
131
|
-
const uint32_t num_entries = this->entries_.size();
|
132
|
-
ptr += copy_to_mem(
|
133
|
-
|
134
|
-
ptr += copy_to_mem(&unused32, ptr, sizeof(unused32));
|
131
|
+
const uint32_t num_entries = static_cast<uint32_t>(this->entries_.size());
|
132
|
+
ptr += copy_to_mem(num_entries, ptr);
|
133
|
+
ptr += sizeof(uint32_t); // unused
|
135
134
|
for (const auto& it: this->entries_) {
|
136
|
-
ptr += copy_to_mem(
|
135
|
+
ptr += copy_to_mem(it.first, ptr);
|
137
136
|
}
|
138
137
|
for (const auto& it: this->entries_) {
|
139
138
|
ptr += copy_to_mem(it.second.data(), ptr, it.second.size() * sizeof(double));
|
@@ -144,40 +143,30 @@ auto compact_array_of_doubles_sketch_alloc<A>::serialize(unsigned header_size_by
|
|
144
143
|
|
145
144
|
template<typename A>
|
146
145
|
compact_array_of_doubles_sketch_alloc<A> compact_array_of_doubles_sketch_alloc<A>::deserialize(std::istream& is, uint64_t seed, const A& allocator) {
|
147
|
-
uint8_t
|
148
|
-
|
149
|
-
uint8_t
|
150
|
-
|
151
|
-
uint8_t
|
152
|
-
|
153
|
-
|
154
|
-
is.read(reinterpret_cast<char*>(&type), sizeof(type));
|
155
|
-
uint8_t flags_byte;
|
156
|
-
is.read(reinterpret_cast<char*>(&flags_byte), sizeof(flags_byte));
|
157
|
-
uint8_t num_values;
|
158
|
-
is.read(reinterpret_cast<char*>(&num_values), sizeof(num_values));
|
159
|
-
uint16_t seed_hash;
|
160
|
-
is.read(reinterpret_cast<char*>(&seed_hash), sizeof(seed_hash));
|
146
|
+
read<uint8_t>(is); // unused
|
147
|
+
const auto serial_version = read<uint8_t>(is);
|
148
|
+
const auto family = read<uint8_t>(is);
|
149
|
+
const auto type = read<uint8_t>(is);
|
150
|
+
const auto flags_byte = read<uint8_t>(is);
|
151
|
+
const auto num_values = read<uint8_t>(is);
|
152
|
+
const auto seed_hash = read<uint16_t>(is);
|
161
153
|
checker<true>::check_serial_version(serial_version, SERIAL_VERSION);
|
162
154
|
checker<true>::check_sketch_family(family, SKETCH_FAMILY);
|
163
155
|
checker<true>::check_sketch_type(type, SKETCH_TYPE);
|
164
156
|
const bool has_entries = flags_byte & (1 << flags::HAS_ENTRIES);
|
165
157
|
if (has_entries) checker<true>::check_seed_hash(seed_hash, compute_seed_hash(seed));
|
166
158
|
|
167
|
-
|
168
|
-
is.read(reinterpret_cast<char*>(&theta), sizeof(theta));
|
159
|
+
const auto theta = read<uint64_t>(is);
|
169
160
|
std::vector<Entry, AllocEntry> entries(allocator);
|
170
161
|
if (has_entries) {
|
171
|
-
|
172
|
-
|
173
|
-
uint32_t unused32;
|
174
|
-
is.read(reinterpret_cast<char*>(&unused32), sizeof(unused32));
|
162
|
+
const auto num_entries = read<uint32_t>(is);
|
163
|
+
read<uint32_t>(is); // unused
|
175
164
|
entries.reserve(num_entries);
|
176
165
|
std::vector<uint64_t, AllocU64> keys(num_entries, 0, allocator);
|
177
|
-
|
166
|
+
read(is, keys.data(), num_entries * sizeof(uint64_t));
|
178
167
|
for (size_t i = 0; i < num_entries; ++i) {
|
179
168
|
aod<A> summary(num_values, allocator);
|
180
|
-
|
169
|
+
read(is, summary.data(), num_values * sizeof(double));
|
181
170
|
entries.push_back(Entry(keys[i], std::move(summary)));
|
182
171
|
}
|
183
172
|
}
|
@@ -191,20 +180,19 @@ template<typename A>
|
|
191
180
|
compact_array_of_doubles_sketch_alloc<A> compact_array_of_doubles_sketch_alloc<A>::deserialize(const void* bytes, size_t size, uint64_t seed, const A& allocator) {
|
192
181
|
ensure_minimum_memory(size, 16);
|
193
182
|
const char* ptr = static_cast<const char*>(bytes);
|
194
|
-
uint8_t
|
195
|
-
ptr += copy_from_mem(ptr, &preamble_longs, sizeof(preamble_longs));
|
183
|
+
ptr += sizeof(uint8_t); // unused
|
196
184
|
uint8_t serial_version;
|
197
|
-
ptr += copy_from_mem(ptr,
|
185
|
+
ptr += copy_from_mem(ptr, serial_version);
|
198
186
|
uint8_t family;
|
199
|
-
ptr += copy_from_mem(ptr,
|
187
|
+
ptr += copy_from_mem(ptr, family);
|
200
188
|
uint8_t type;
|
201
|
-
ptr += copy_from_mem(ptr,
|
189
|
+
ptr += copy_from_mem(ptr, type);
|
202
190
|
uint8_t flags_byte;
|
203
|
-
ptr += copy_from_mem(ptr,
|
191
|
+
ptr += copy_from_mem(ptr, flags_byte);
|
204
192
|
uint8_t num_values;
|
205
|
-
ptr += copy_from_mem(ptr,
|
193
|
+
ptr += copy_from_mem(ptr, num_values);
|
206
194
|
uint16_t seed_hash;
|
207
|
-
ptr += copy_from_mem(ptr,
|
195
|
+
ptr += copy_from_mem(ptr, seed_hash);
|
208
196
|
checker<true>::check_serial_version(serial_version, SERIAL_VERSION);
|
209
197
|
checker<true>::check_sketch_family(family, SKETCH_FAMILY);
|
210
198
|
checker<true>::check_sketch_type(type, SKETCH_TYPE);
|
@@ -212,14 +200,13 @@ compact_array_of_doubles_sketch_alloc<A> compact_array_of_doubles_sketch_alloc<A
|
|
212
200
|
if (has_entries) checker<true>::check_seed_hash(seed_hash, compute_seed_hash(seed));
|
213
201
|
|
214
202
|
uint64_t theta;
|
215
|
-
ptr += copy_from_mem(ptr,
|
203
|
+
ptr += copy_from_mem(ptr, theta);
|
216
204
|
std::vector<Entry, AllocEntry> entries(allocator);
|
217
205
|
if (has_entries) {
|
218
206
|
ensure_minimum_memory(size, 24);
|
219
207
|
uint32_t num_entries;
|
220
|
-
ptr += copy_from_mem(ptr,
|
221
|
-
uint32_t
|
222
|
-
ptr += copy_from_mem(ptr, &unused32, sizeof(unused32));
|
208
|
+
ptr += copy_from_mem(ptr, num_entries);
|
209
|
+
ptr += sizeof(uint32_t); // unused
|
223
210
|
ensure_minimum_memory(size, 24 + (sizeof(uint64_t) + sizeof(double) * num_values) * num_entries);
|
224
211
|
entries.reserve(num_entries);
|
225
212
|
std::vector<uint64_t, AllocU64> keys(num_entries, 0, allocator);
|