datasketches 0.2.0 → 0.2.4
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +16 -0
- data/LICENSE +40 -3
- data/NOTICE +1 -1
- data/README.md +7 -7
- data/ext/datasketches/extconf.rb +1 -1
- data/ext/datasketches/theta_wrapper.cpp +20 -4
- data/lib/datasketches/version.rb +1 -1
- data/vendor/datasketches-cpp/CMakeLists.txt +31 -3
- data/vendor/datasketches-cpp/LICENSE +40 -3
- data/vendor/datasketches-cpp/MANIFEST.in +3 -0
- data/vendor/datasketches-cpp/NOTICE +1 -1
- data/vendor/datasketches-cpp/README.md +76 -9
- data/vendor/datasketches-cpp/cmake/DataSketchesConfig.cmake.in +10 -0
- data/vendor/datasketches-cpp/common/CMakeLists.txt +14 -13
- data/vendor/datasketches-cpp/common/include/MurmurHash3.h +11 -7
- data/vendor/datasketches-cpp/common/include/binomial_bounds.hpp +8 -8
- data/vendor/datasketches-cpp/common/include/bounds_binomial_proportions.hpp +12 -15
- data/vendor/datasketches-cpp/common/include/common_defs.hpp +26 -0
- data/vendor/datasketches-cpp/common/include/conditional_forward.hpp +20 -8
- data/vendor/datasketches-cpp/common/include/count_zeros.hpp +2 -2
- data/vendor/datasketches-cpp/common/include/serde.hpp +7 -7
- data/vendor/datasketches-cpp/cpc/CMakeLists.txt +15 -35
- data/vendor/datasketches-cpp/cpc/include/cpc_common.hpp +10 -3
- data/vendor/datasketches-cpp/cpc/include/cpc_compressor.hpp +19 -19
- data/vendor/datasketches-cpp/cpc/include/cpc_compressor_impl.hpp +91 -89
- data/vendor/datasketches-cpp/cpc/include/cpc_sketch.hpp +15 -2
- data/vendor/datasketches-cpp/cpc/include/cpc_sketch_impl.hpp +126 -90
- data/vendor/datasketches-cpp/cpc/include/cpc_union.hpp +1 -1
- data/vendor/datasketches-cpp/cpc/include/cpc_union_impl.hpp +22 -20
- data/vendor/datasketches-cpp/cpc/include/cpc_util.hpp +10 -10
- data/vendor/datasketches-cpp/cpc/include/icon_estimator.hpp +4 -4
- data/vendor/datasketches-cpp/cpc/include/u32_table.hpp +8 -8
- data/vendor/datasketches-cpp/cpc/include/u32_table_impl.hpp +14 -14
- data/vendor/datasketches-cpp/cpc/test/compression_test.cpp +10 -10
- data/vendor/datasketches-cpp/cpc/test/cpc_sketch_allocation_test.cpp +17 -0
- data/vendor/datasketches-cpp/cpc/test/cpc_sketch_test.cpp +25 -0
- data/vendor/datasketches-cpp/cpc/test/cpc_union_test.cpp +1 -1
- data/vendor/datasketches-cpp/fi/CMakeLists.txt +5 -15
- data/vendor/datasketches-cpp/fi/include/frequent_items_sketch_impl.hpp +69 -82
- data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map_impl.hpp +10 -10
- data/vendor/datasketches-cpp/fi/test/reverse_purge_hash_map_test.cpp +2 -2
- data/vendor/datasketches-cpp/hll/CMakeLists.txt +33 -56
- data/vendor/datasketches-cpp/hll/include/AuxHashMap-internal.hpp +60 -63
- data/vendor/datasketches-cpp/hll/include/AuxHashMap.hpp +19 -19
- data/vendor/datasketches-cpp/hll/include/CompositeInterpolationXTable-internal.hpp +15 -15
- data/vendor/datasketches-cpp/hll/include/CompositeInterpolationXTable.hpp +3 -3
- data/vendor/datasketches-cpp/hll/include/CouponHashSet-internal.hpp +74 -76
- data/vendor/datasketches-cpp/hll/include/CouponHashSet.hpp +6 -6
- data/vendor/datasketches-cpp/hll/include/CouponList-internal.hpp +110 -113
- data/vendor/datasketches-cpp/hll/include/CouponList.hpp +13 -13
- data/vendor/datasketches-cpp/hll/include/CubicInterpolation-internal.hpp +2 -4
- data/vendor/datasketches-cpp/hll/include/HarmonicNumbers-internal.hpp +1 -1
- data/vendor/datasketches-cpp/hll/include/Hll4Array-internal.hpp +80 -76
- data/vendor/datasketches-cpp/hll/include/Hll4Array.hpp +9 -9
- data/vendor/datasketches-cpp/hll/include/Hll6Array-internal.hpp +26 -26
- data/vendor/datasketches-cpp/hll/include/Hll6Array.hpp +6 -6
- data/vendor/datasketches-cpp/hll/include/Hll8Array-internal.hpp +33 -33
- data/vendor/datasketches-cpp/hll/include/Hll8Array.hpp +6 -6
- data/vendor/datasketches-cpp/hll/include/HllArray-internal.hpp +205 -209
- data/vendor/datasketches-cpp/hll/include/HllArray.hpp +36 -36
- data/vendor/datasketches-cpp/hll/include/HllSketch-internal.hpp +34 -32
- data/vendor/datasketches-cpp/hll/include/HllSketchImpl-internal.hpp +22 -22
- data/vendor/datasketches-cpp/hll/include/HllSketchImpl.hpp +13 -13
- data/vendor/datasketches-cpp/hll/include/HllSketchImplFactory.hpp +15 -15
- data/vendor/datasketches-cpp/hll/include/HllUnion-internal.hpp +61 -61
- data/vendor/datasketches-cpp/hll/include/HllUtil.hpp +120 -127
- data/vendor/datasketches-cpp/hll/include/coupon_iterator-internal.hpp +9 -9
- data/vendor/datasketches-cpp/hll/include/coupon_iterator.hpp +5 -5
- data/vendor/datasketches-cpp/hll/include/hll.hpp +21 -21
- data/vendor/datasketches-cpp/hll/test/AuxHashMapTest.cpp +1 -1
- data/vendor/datasketches-cpp/hll/test/CouponHashSetTest.cpp +34 -34
- data/vendor/datasketches-cpp/hll/test/CouponListTest.cpp +25 -25
- data/vendor/datasketches-cpp/hll/test/CrossCountingTest.cpp +2 -2
- data/vendor/datasketches-cpp/hll/test/HllArrayTest.cpp +35 -35
- data/vendor/datasketches-cpp/hll/test/HllSketchTest.cpp +15 -15
- data/vendor/datasketches-cpp/hll/test/HllUnionTest.cpp +10 -14
- data/vendor/datasketches-cpp/hll/test/IsomorphicTest.cpp +3 -3
- data/vendor/datasketches-cpp/hll/test/ToFromByteArrayTest.cpp +4 -4
- data/vendor/datasketches-cpp/kll/CMakeLists.txt +9 -19
- data/vendor/datasketches-cpp/kll/include/kll_helper.hpp +5 -4
- data/vendor/datasketches-cpp/kll/include/kll_helper_impl.hpp +6 -6
- data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator.hpp +14 -6
- data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator_impl.hpp +39 -24
- data/vendor/datasketches-cpp/kll/include/kll_sketch.hpp +41 -4
- data/vendor/datasketches-cpp/kll/include/kll_sketch_impl.hpp +76 -64
- data/vendor/datasketches-cpp/kll/include/kolmogorov_smirnov.hpp +67 -0
- data/vendor/datasketches-cpp/kll/include/kolmogorov_smirnov_impl.hpp +78 -0
- data/vendor/datasketches-cpp/kll/test/CMakeLists.txt +1 -0
- data/vendor/datasketches-cpp/kll/test/kll_sketch_test.cpp +133 -46
- data/vendor/datasketches-cpp/kll/test/kolmogorov_smirnov_test.cpp +111 -0
- data/vendor/datasketches-cpp/pyproject.toml +4 -2
- data/vendor/datasketches-cpp/python/CMakeLists.txt +10 -6
- data/vendor/datasketches-cpp/python/README.md +50 -50
- data/vendor/datasketches-cpp/python/pybind11Path.cmd +3 -0
- data/vendor/datasketches-cpp/python/src/cpc_wrapper.cpp +1 -1
- data/vendor/datasketches-cpp/python/src/hll_wrapper.cpp +4 -4
- data/vendor/datasketches-cpp/python/src/kll_wrapper.cpp +1 -1
- data/vendor/datasketches-cpp/python/src/theta_wrapper.cpp +8 -8
- data/vendor/datasketches-cpp/python/src/vector_of_kll.cpp +11 -5
- data/vendor/datasketches-cpp/python/src/vo_wrapper.cpp +2 -2
- data/vendor/datasketches-cpp/python/tests/hll_test.py +1 -1
- data/vendor/datasketches-cpp/python/tests/kll_test.py +2 -2
- data/vendor/datasketches-cpp/python/tests/req_test.py +2 -2
- data/vendor/datasketches-cpp/python/tests/vector_of_kll_test.py +4 -4
- data/vendor/datasketches-cpp/python/tests/vo_test.py +3 -3
- data/vendor/datasketches-cpp/req/CMakeLists.txt +8 -21
- data/vendor/datasketches-cpp/req/include/req_common.hpp +2 -1
- data/vendor/datasketches-cpp/req/include/req_compactor.hpp +4 -4
- data/vendor/datasketches-cpp/req/include/req_compactor_impl.hpp +26 -39
- data/vendor/datasketches-cpp/req/include/req_sketch.hpp +1 -1
- data/vendor/datasketches-cpp/req/include/req_sketch_impl.hpp +13 -11
- data/vendor/datasketches-cpp/req/test/req_sketch_test.cpp +52 -52
- data/vendor/datasketches-cpp/sampling/CMakeLists.txt +5 -9
- data/vendor/datasketches-cpp/sampling/include/var_opt_sketch.hpp +10 -5
- data/vendor/datasketches-cpp/sampling/include/var_opt_sketch_impl.hpp +61 -64
- data/vendor/datasketches-cpp/sampling/include/var_opt_union_impl.hpp +42 -48
- data/vendor/datasketches-cpp/sampling/test/var_opt_sketch_test.cpp +6 -6
- data/vendor/datasketches-cpp/sampling/test/var_opt_union_test.cpp +13 -13
- data/vendor/datasketches-cpp/setup.py +10 -7
- data/vendor/datasketches-cpp/theta/CMakeLists.txt +26 -45
- data/vendor/datasketches-cpp/theta/include/bounds_on_ratios_in_sampled_sets.hpp +1 -1
- data/vendor/datasketches-cpp/theta/include/compact_theta_sketch_parser.hpp +67 -0
- data/vendor/datasketches-cpp/theta/include/compact_theta_sketch_parser_impl.hpp +137 -0
- data/vendor/datasketches-cpp/theta/include/theta_constants.hpp +9 -4
- data/vendor/datasketches-cpp/theta/include/theta_helpers.hpp +15 -0
- data/vendor/datasketches-cpp/theta/include/theta_intersection.hpp +9 -4
- data/vendor/datasketches-cpp/theta/include/theta_intersection_base_impl.hpp +6 -6
- data/vendor/datasketches-cpp/theta/include/theta_intersection_impl.hpp +1 -1
- data/vendor/datasketches-cpp/theta/include/theta_jaccard_similarity_base.hpp +18 -14
- data/vendor/datasketches-cpp/theta/include/theta_set_difference_base_impl.hpp +2 -2
- data/vendor/datasketches-cpp/theta/include/theta_sketch.hpp +73 -15
- data/vendor/datasketches-cpp/theta/include/theta_sketch_impl.hpp +247 -103
- data/vendor/datasketches-cpp/theta/include/theta_union.hpp +10 -5
- data/vendor/datasketches-cpp/theta/include/theta_union_base.hpp +3 -1
- data/vendor/datasketches-cpp/theta/include/theta_union_base_impl.hpp +9 -3
- data/vendor/datasketches-cpp/theta/include/theta_union_impl.hpp +8 -5
- data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base.hpp +11 -5
- data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base_impl.hpp +70 -37
- data/vendor/datasketches-cpp/theta/test/CMakeLists.txt +1 -0
- data/vendor/datasketches-cpp/theta/test/theta_a_not_b_test.cpp +23 -1
- data/vendor/datasketches-cpp/theta/test/theta_compact_empty_from_java_v1.sk +0 -0
- data/vendor/datasketches-cpp/theta/test/theta_compact_empty_from_java_v2.sk +0 -0
- data/vendor/datasketches-cpp/theta/test/theta_compact_estimation_from_java_v1.sk +0 -0
- data/vendor/datasketches-cpp/theta/test/theta_compact_estimation_from_java_v2.sk +0 -0
- data/vendor/datasketches-cpp/theta/test/theta_compact_exact_from_java.sk +0 -0
- data/vendor/datasketches-cpp/theta/test/theta_intersection_test.cpp +21 -1
- data/vendor/datasketches-cpp/theta/test/theta_jaccard_similarity_test.cpp +58 -2
- data/vendor/datasketches-cpp/theta/test/theta_setop_test.cpp +445 -0
- data/vendor/datasketches-cpp/theta/test/theta_sketch_test.cpp +437 -1
- data/vendor/datasketches-cpp/theta/test/theta_union_test.cpp +41 -9
- data/vendor/datasketches-cpp/tuple/CMakeLists.txt +18 -33
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_sketch.hpp +1 -1
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_sketch_impl.hpp +50 -63
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_union.hpp +1 -1
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_union_impl.hpp +3 -3
- data/vendor/datasketches-cpp/tuple/include/tuple_sketch.hpp +13 -9
- data/vendor/datasketches-cpp/tuple/include/tuple_sketch_impl.hpp +84 -78
- data/vendor/datasketches-cpp/tuple/include/tuple_union.hpp +6 -1
- data/vendor/datasketches-cpp/tuple/include/tuple_union_impl.hpp +8 -3
- data/vendor/datasketches-cpp/tuple/test/array_of_doubles_sketch_test.cpp +17 -1
- data/vendor/datasketches-cpp/tuple/test/tuple_a_not_b_test.cpp +17 -17
- data/vendor/datasketches-cpp/tuple/test/tuple_intersection_test.cpp +12 -12
- data/vendor/datasketches-cpp/tuple/test/tuple_jaccard_similarity_test.cpp +5 -5
- data/vendor/datasketches-cpp/tuple/test/tuple_sketch_allocation_test.cpp +1 -1
- data/vendor/datasketches-cpp/tuple/test/tuple_sketch_test.cpp +66 -28
- data/vendor/datasketches-cpp/tuple/test/tuple_union_test.cpp +19 -12
- metadata +18 -7
- data/vendor/datasketches-cpp/theta/test/theta_update_empty_from_java.sk +0 -0
- data/vendor/datasketches-cpp/theta/test/theta_update_estimation_from_java.sk +0 -0
@@ -21,8 +21,8 @@ namespace datasketches {
|
|
21
21
|
|
22
22
|
template<typename A>
|
23
23
|
update_array_of_doubles_sketch_alloc<A>::update_array_of_doubles_sketch_alloc(uint8_t lg_cur_size, uint8_t lg_nom_size, resize_factor rf,
|
24
|
-
uint64_t theta, uint64_t seed, const array_of_doubles_update_policy<A>& policy, const A& allocator):
|
25
|
-
Base(lg_cur_size, lg_nom_size, rf, theta, seed, policy, allocator) {}
|
24
|
+
float p, uint64_t theta, uint64_t seed, const array_of_doubles_update_policy<A>& policy, const A& allocator):
|
25
|
+
Base(lg_cur_size, lg_nom_size, rf, p, theta, seed, policy, allocator) {}
|
26
26
|
|
27
27
|
|
28
28
|
template<typename A>
|
@@ -43,7 +43,7 @@ tuple_base_builder<builder, array_of_doubles_update_policy<A>, A>(policy, alloca
|
|
43
43
|
|
44
44
|
template<typename A>
|
45
45
|
update_array_of_doubles_sketch_alloc<A> update_array_of_doubles_sketch_alloc<A>::builder::build() const {
|
46
|
-
return update_array_of_doubles_sketch_alloc<A>(this->starting_lg_size(), this->lg_k_, this->rf_, this->starting_theta(), this->seed_, this->policy_, this->allocator_);
|
46
|
+
return update_array_of_doubles_sketch_alloc<A>(this->starting_lg_size(), this->lg_k_, this->rf_, this->p_, this->starting_theta(), this->seed_, this->policy_, this->allocator_);
|
47
47
|
}
|
48
48
|
|
49
49
|
// compact sketch
|
@@ -70,33 +70,33 @@ uint8_t compact_array_of_doubles_sketch_alloc<A>::get_num_values() const {
|
|
70
70
|
template<typename A>
|
71
71
|
void compact_array_of_doubles_sketch_alloc<A>::serialize(std::ostream& os) const {
|
72
72
|
const uint8_t preamble_longs = 1;
|
73
|
-
|
73
|
+
write(os, preamble_longs);
|
74
74
|
const uint8_t serial_version = SERIAL_VERSION;
|
75
|
-
|
75
|
+
write(os, serial_version);
|
76
76
|
const uint8_t family = SKETCH_FAMILY;
|
77
|
-
|
77
|
+
write(os, family);
|
78
78
|
const uint8_t type = SKETCH_TYPE;
|
79
|
-
|
79
|
+
write(os, type);
|
80
80
|
const uint8_t flags_byte(
|
81
81
|
(this->is_empty() ? 1 << flags::IS_EMPTY : 0) |
|
82
82
|
(this->get_num_retained() > 0 ? 1 << flags::HAS_ENTRIES : 0) |
|
83
83
|
(this->is_ordered() ? 1 << flags::IS_ORDERED : 0)
|
84
84
|
);
|
85
|
-
|
86
|
-
|
85
|
+
write(os, flags_byte);
|
86
|
+
write(os, num_values_);
|
87
87
|
const uint16_t seed_hash = this->get_seed_hash();
|
88
|
-
|
89
|
-
|
88
|
+
write(os, seed_hash);
|
89
|
+
write(os, this->theta_);
|
90
90
|
if (this->get_num_retained() > 0) {
|
91
|
-
const uint32_t num_entries = this->entries_.size();
|
92
|
-
|
91
|
+
const uint32_t num_entries = static_cast<uint32_t>(this->entries_.size());
|
92
|
+
write(os, num_entries);
|
93
93
|
const uint32_t unused32 = 0;
|
94
|
-
|
94
|
+
write(os, unused32);
|
95
95
|
for (const auto& it: this->entries_) {
|
96
|
-
|
96
|
+
write(os, it.first);
|
97
97
|
}
|
98
98
|
for (const auto& it: this->entries_) {
|
99
|
-
|
99
|
+
write(os, it.second.data(), it.second.size() * sizeof(double));
|
100
100
|
}
|
101
101
|
}
|
102
102
|
}
|
@@ -110,30 +110,29 @@ auto compact_array_of_doubles_sketch_alloc<A>::serialize(unsigned header_size_by
|
|
110
110
|
vector_bytes bytes(size, 0, this->entries_.get_allocator());
|
111
111
|
uint8_t* ptr = bytes.data() + header_size_bytes;
|
112
112
|
|
113
|
-
ptr += copy_to_mem(
|
113
|
+
ptr += copy_to_mem(preamble_longs, ptr);
|
114
114
|
const uint8_t serial_version = SERIAL_VERSION;
|
115
|
-
ptr += copy_to_mem(
|
115
|
+
ptr += copy_to_mem(serial_version, ptr);
|
116
116
|
const uint8_t family = SKETCH_FAMILY;
|
117
|
-
ptr += copy_to_mem(
|
117
|
+
ptr += copy_to_mem(family, ptr);
|
118
118
|
const uint8_t type = SKETCH_TYPE;
|
119
|
-
ptr += copy_to_mem(
|
119
|
+
ptr += copy_to_mem(type, ptr);
|
120
120
|
const uint8_t flags_byte(
|
121
121
|
(this->is_empty() ? 1 << flags::IS_EMPTY : 0) |
|
122
122
|
(this->get_num_retained() ? 1 << flags::HAS_ENTRIES : 0) |
|
123
123
|
(this->is_ordered() ? 1 << flags::IS_ORDERED : 0)
|
124
124
|
);
|
125
|
-
ptr += copy_to_mem(
|
126
|
-
ptr += copy_to_mem(
|
125
|
+
ptr += copy_to_mem(flags_byte, ptr);
|
126
|
+
ptr += copy_to_mem(num_values_, ptr);
|
127
127
|
const uint16_t seed_hash = this->get_seed_hash();
|
128
|
-
ptr += copy_to_mem(
|
129
|
-
ptr += copy_to_mem(
|
128
|
+
ptr += copy_to_mem(seed_hash, ptr);
|
129
|
+
ptr += copy_to_mem((this->theta_), ptr);
|
130
130
|
if (this->get_num_retained() > 0) {
|
131
|
-
const uint32_t num_entries = this->entries_.size();
|
132
|
-
ptr += copy_to_mem(
|
133
|
-
|
134
|
-
ptr += copy_to_mem(&unused32, ptr, sizeof(unused32));
|
131
|
+
const uint32_t num_entries = static_cast<uint32_t>(this->entries_.size());
|
132
|
+
ptr += copy_to_mem(num_entries, ptr);
|
133
|
+
ptr += sizeof(uint32_t); // unused
|
135
134
|
for (const auto& it: this->entries_) {
|
136
|
-
ptr += copy_to_mem(
|
135
|
+
ptr += copy_to_mem(it.first, ptr);
|
137
136
|
}
|
138
137
|
for (const auto& it: this->entries_) {
|
139
138
|
ptr += copy_to_mem(it.second.data(), ptr, it.second.size() * sizeof(double));
|
@@ -144,40 +143,30 @@ auto compact_array_of_doubles_sketch_alloc<A>::serialize(unsigned header_size_by
|
|
144
143
|
|
145
144
|
template<typename A>
|
146
145
|
compact_array_of_doubles_sketch_alloc<A> compact_array_of_doubles_sketch_alloc<A>::deserialize(std::istream& is, uint64_t seed, const A& allocator) {
|
147
|
-
uint8_t
|
148
|
-
|
149
|
-
uint8_t
|
150
|
-
|
151
|
-
uint8_t
|
152
|
-
|
153
|
-
|
154
|
-
is.read(reinterpret_cast<char*>(&type), sizeof(type));
|
155
|
-
uint8_t flags_byte;
|
156
|
-
is.read(reinterpret_cast<char*>(&flags_byte), sizeof(flags_byte));
|
157
|
-
uint8_t num_values;
|
158
|
-
is.read(reinterpret_cast<char*>(&num_values), sizeof(num_values));
|
159
|
-
uint16_t seed_hash;
|
160
|
-
is.read(reinterpret_cast<char*>(&seed_hash), sizeof(seed_hash));
|
146
|
+
read<uint8_t>(is); // unused
|
147
|
+
const auto serial_version = read<uint8_t>(is);
|
148
|
+
const auto family = read<uint8_t>(is);
|
149
|
+
const auto type = read<uint8_t>(is);
|
150
|
+
const auto flags_byte = read<uint8_t>(is);
|
151
|
+
const auto num_values = read<uint8_t>(is);
|
152
|
+
const auto seed_hash = read<uint16_t>(is);
|
161
153
|
checker<true>::check_serial_version(serial_version, SERIAL_VERSION);
|
162
154
|
checker<true>::check_sketch_family(family, SKETCH_FAMILY);
|
163
155
|
checker<true>::check_sketch_type(type, SKETCH_TYPE);
|
164
156
|
const bool has_entries = flags_byte & (1 << flags::HAS_ENTRIES);
|
165
157
|
if (has_entries) checker<true>::check_seed_hash(seed_hash, compute_seed_hash(seed));
|
166
158
|
|
167
|
-
|
168
|
-
is.read(reinterpret_cast<char*>(&theta), sizeof(theta));
|
159
|
+
const auto theta = read<uint64_t>(is);
|
169
160
|
std::vector<Entry, AllocEntry> entries(allocator);
|
170
161
|
if (has_entries) {
|
171
|
-
|
172
|
-
|
173
|
-
uint32_t unused32;
|
174
|
-
is.read(reinterpret_cast<char*>(&unused32), sizeof(unused32));
|
162
|
+
const auto num_entries = read<uint32_t>(is);
|
163
|
+
read<uint32_t>(is); // unused
|
175
164
|
entries.reserve(num_entries);
|
176
165
|
std::vector<uint64_t, AllocU64> keys(num_entries, 0, allocator);
|
177
|
-
|
166
|
+
read(is, keys.data(), num_entries * sizeof(uint64_t));
|
178
167
|
for (size_t i = 0; i < num_entries; ++i) {
|
179
168
|
aod<A> summary(num_values, allocator);
|
180
|
-
|
169
|
+
read(is, summary.data(), num_values * sizeof(double));
|
181
170
|
entries.push_back(Entry(keys[i], std::move(summary)));
|
182
171
|
}
|
183
172
|
}
|
@@ -191,20 +180,19 @@ template<typename A>
|
|
191
180
|
compact_array_of_doubles_sketch_alloc<A> compact_array_of_doubles_sketch_alloc<A>::deserialize(const void* bytes, size_t size, uint64_t seed, const A& allocator) {
|
192
181
|
ensure_minimum_memory(size, 16);
|
193
182
|
const char* ptr = static_cast<const char*>(bytes);
|
194
|
-
uint8_t
|
195
|
-
ptr += copy_from_mem(ptr, &preamble_longs, sizeof(preamble_longs));
|
183
|
+
ptr += sizeof(uint8_t); // unused
|
196
184
|
uint8_t serial_version;
|
197
|
-
ptr += copy_from_mem(ptr,
|
185
|
+
ptr += copy_from_mem(ptr, serial_version);
|
198
186
|
uint8_t family;
|
199
|
-
ptr += copy_from_mem(ptr,
|
187
|
+
ptr += copy_from_mem(ptr, family);
|
200
188
|
uint8_t type;
|
201
|
-
ptr += copy_from_mem(ptr,
|
189
|
+
ptr += copy_from_mem(ptr, type);
|
202
190
|
uint8_t flags_byte;
|
203
|
-
ptr += copy_from_mem(ptr,
|
191
|
+
ptr += copy_from_mem(ptr, flags_byte);
|
204
192
|
uint8_t num_values;
|
205
|
-
ptr += copy_from_mem(ptr,
|
193
|
+
ptr += copy_from_mem(ptr, num_values);
|
206
194
|
uint16_t seed_hash;
|
207
|
-
ptr += copy_from_mem(ptr,
|
195
|
+
ptr += copy_from_mem(ptr, seed_hash);
|
208
196
|
checker<true>::check_serial_version(serial_version, SERIAL_VERSION);
|
209
197
|
checker<true>::check_sketch_family(family, SKETCH_FAMILY);
|
210
198
|
checker<true>::check_sketch_type(type, SKETCH_TYPE);
|
@@ -212,14 +200,13 @@ compact_array_of_doubles_sketch_alloc<A> compact_array_of_doubles_sketch_alloc<A
|
|
212
200
|
if (has_entries) checker<true>::check_seed_hash(seed_hash, compute_seed_hash(seed));
|
213
201
|
|
214
202
|
uint64_t theta;
|
215
|
-
ptr += copy_from_mem(ptr,
|
203
|
+
ptr += copy_from_mem(ptr, theta);
|
216
204
|
std::vector<Entry, AllocEntry> entries(allocator);
|
217
205
|
if (has_entries) {
|
218
206
|
ensure_minimum_memory(size, 24);
|
219
207
|
uint32_t num_entries;
|
220
|
-
ptr += copy_from_mem(ptr,
|
221
|
-
uint32_t
|
222
|
-
ptr += copy_from_mem(ptr, &unused32, sizeof(unused32));
|
208
|
+
ptr += copy_from_mem(ptr, num_entries);
|
209
|
+
ptr += sizeof(uint32_t); // unused
|
223
210
|
ensure_minimum_memory(size, 24 + (sizeof(uint64_t) + sizeof(double) * num_values) * num_entries);
|
224
211
|
entries.reserve(num_entries);
|
225
212
|
std::vector<uint64_t, AllocU64> keys(num_entries, 0, allocator);
|
@@ -61,7 +61,7 @@ public:
|
|
61
61
|
|
62
62
|
private:
|
63
63
|
// for builder
|
64
|
-
array_of_doubles_union_alloc(uint8_t lg_cur_size, uint8_t lg_nom_size, resize_factor rf, uint64_t theta, uint64_t seed, const Policy& policy, const Allocator& allocator);
|
64
|
+
array_of_doubles_union_alloc(uint8_t lg_cur_size, uint8_t lg_nom_size, resize_factor rf, float p, uint64_t theta, uint64_t seed, const Policy& policy, const Allocator& allocator);
|
65
65
|
};
|
66
66
|
|
67
67
|
template<typename Allocator>
|
@@ -20,8 +20,8 @@
|
|
20
20
|
namespace datasketches {
|
21
21
|
|
22
22
|
template<typename A>
|
23
|
-
array_of_doubles_union_alloc<A>::array_of_doubles_union_alloc(uint8_t lg_cur_size, uint8_t lg_nom_size, resize_factor rf, uint64_t theta, uint64_t seed, const Policy& policy, const A& allocator):
|
24
|
-
Base(lg_cur_size, lg_nom_size, rf, theta, seed, policy, allocator)
|
23
|
+
array_of_doubles_union_alloc<A>::array_of_doubles_union_alloc(uint8_t lg_cur_size, uint8_t lg_nom_size, resize_factor rf, float p, uint64_t theta, uint64_t seed, const Policy& policy, const A& allocator):
|
24
|
+
Base(lg_cur_size, lg_nom_size, rf, p, theta, seed, policy, allocator)
|
25
25
|
{}
|
26
26
|
|
27
27
|
template<typename A>
|
@@ -37,7 +37,7 @@ tuple_base_builder<builder, Policy, A>(policy, allocator) {}
|
|
37
37
|
|
38
38
|
template<typename A>
|
39
39
|
array_of_doubles_union_alloc<A> array_of_doubles_union_alloc<A>::builder::build() const {
|
40
|
-
return array_of_doubles_union_alloc<A>(this->starting_lg_size(), this->lg_k_, this->rf_, this->starting_theta(), this->seed_, this->policy_, this->allocator_);
|
40
|
+
return array_of_doubles_union_alloc<A>(this->starting_lg_size(), this->lg_k_, this->rf_, this->p_, this->starting_theta(), this->seed_, this->policy_, this->allocator_);
|
41
41
|
}
|
42
42
|
|
43
43
|
} /* namespace datasketches */
|
@@ -153,8 +153,7 @@ public:
|
|
153
153
|
virtual const_iterator end() const = 0;
|
154
154
|
|
155
155
|
protected:
|
156
|
-
|
157
|
-
virtual void print_specifics(ostrstream& os) const = 0;
|
156
|
+
virtual void print_specifics(std::ostringstream& os) const = 0;
|
158
157
|
|
159
158
|
static uint16_t get_seed_hash(uint64_t seed);
|
160
159
|
|
@@ -325,6 +324,11 @@ public:
|
|
325
324
|
*/
|
326
325
|
void trim();
|
327
326
|
|
327
|
+
/**
|
328
|
+
* Reset the sketch to the initial empty state
|
329
|
+
*/
|
330
|
+
void reset();
|
331
|
+
|
328
332
|
/**
|
329
333
|
* Converts this sketch to a compact sketch (ordered or unordered).
|
330
334
|
* @param ordered optional flag to specify if ordered sketch should be produced
|
@@ -342,10 +346,9 @@ protected:
|
|
342
346
|
tuple_map map_;
|
343
347
|
|
344
348
|
// for builder
|
345
|
-
update_tuple_sketch(uint8_t lg_cur_size, uint8_t lg_nom_size, resize_factor rf, uint64_t theta, uint64_t seed, const Policy& policy, const Allocator& allocator);
|
349
|
+
update_tuple_sketch(uint8_t lg_cur_size, uint8_t lg_nom_size, resize_factor rf, float p, uint64_t theta, uint64_t seed, const Policy& policy, const Allocator& allocator);
|
346
350
|
|
347
|
-
|
348
|
-
virtual void print_specifics(ostrstream& os) const;
|
351
|
+
virtual void print_specifics(std::ostringstream& os) const;
|
349
352
|
};
|
350
353
|
|
351
354
|
// compact sketch
|
@@ -367,9 +370,11 @@ public:
|
|
367
370
|
using vector_bytes = std::vector<uint8_t, AllocBytes>;
|
368
371
|
using comparator = compare_by_key<ExtractKey>;
|
369
372
|
|
370
|
-
static const uint8_t
|
373
|
+
static const uint8_t SERIAL_VERSION_LEGACY = 1;
|
374
|
+
static const uint8_t SERIAL_VERSION = 3;
|
371
375
|
static const uint8_t SKETCH_FAMILY = 9;
|
372
|
-
static const uint8_t SKETCH_TYPE =
|
376
|
+
static const uint8_t SKETCH_TYPE = 1;
|
377
|
+
static const uint8_t SKETCH_TYPE_LEGACY = 5;
|
373
378
|
enum flags { IS_BIG_ENDIAN, IS_READ_ONLY, IS_EMPTY, IS_COMPACT, IS_ORDERED };
|
374
379
|
|
375
380
|
// Instances of this type can be obtained:
|
@@ -473,8 +478,7 @@ protected:
|
|
473
478
|
bool destroy_;
|
474
479
|
};
|
475
480
|
|
476
|
-
|
477
|
-
virtual void print_specifics(ostrstream& os) const;
|
481
|
+
virtual void print_specifics(std::ostringstream& os) const;
|
478
482
|
|
479
483
|
};
|
480
484
|
|
@@ -53,7 +53,9 @@ double tuple_sketch<S, A>::get_upper_bound(uint8_t num_std_devs) const {
|
|
53
53
|
|
54
54
|
template<typename S, typename A>
|
55
55
|
string<A> tuple_sketch<S, A>::to_string(bool detail) const {
|
56
|
-
|
56
|
+
// Using a temporary stream for implementation here does not comply with AllocatorAwareContainer requirements.
|
57
|
+
// The stream does not support passing an allocator instance, and alternatives are complicated.
|
58
|
+
std::ostringstream os;
|
57
59
|
os << "### Tuple sketch summary:" << std::endl;
|
58
60
|
os << " num retained entries : " << get_num_retained() << std::endl;
|
59
61
|
os << " seed hash : " << get_seed_hash() << std::endl;
|
@@ -74,15 +76,15 @@ string<A> tuple_sketch<S, A>::to_string(bool detail) const {
|
|
74
76
|
}
|
75
77
|
os << "### End retained entries" << std::endl;
|
76
78
|
}
|
77
|
-
return os.str();
|
79
|
+
return string<A>(os.str().c_str(), get_allocator());
|
78
80
|
}
|
79
81
|
|
80
82
|
// update sketch
|
81
83
|
|
82
84
|
template<typename S, typename U, typename P, typename A>
|
83
|
-
update_tuple_sketch<S, U, P, A>::update_tuple_sketch(uint8_t lg_cur_size, uint8_t lg_nom_size, resize_factor rf, uint64_t theta, uint64_t seed, const P& policy, const A& allocator):
|
85
|
+
update_tuple_sketch<S, U, P, A>::update_tuple_sketch(uint8_t lg_cur_size, uint8_t lg_nom_size, resize_factor rf, float p, uint64_t theta, uint64_t seed, const P& policy, const A& allocator):
|
84
86
|
policy_(policy),
|
85
|
-
map_(lg_cur_size, lg_nom_size, rf, theta, seed, allocator)
|
87
|
+
map_(lg_cur_size, lg_nom_size, rf, p, theta, seed, allocator)
|
86
88
|
{}
|
87
89
|
|
88
90
|
template<typename S, typename U, typename P, typename A>
|
@@ -97,12 +99,12 @@ bool update_tuple_sketch<S, U, P, A>::is_empty() const {
|
|
97
99
|
|
98
100
|
template<typename S, typename U, typename P, typename A>
|
99
101
|
bool update_tuple_sketch<S, U, P, A>::is_ordered() const {
|
100
|
-
return false
|
102
|
+
return map_.num_entries_ > 1 ? false : true;;
|
101
103
|
}
|
102
104
|
|
103
105
|
template<typename S, typename U, typename P, typename A>
|
104
106
|
uint64_t update_tuple_sketch<S, U, P, A>::get_theta64() const {
|
105
|
-
return map_.theta_;
|
107
|
+
return is_empty() ? theta_constants::MAX_THETA : map_.theta_;
|
106
108
|
}
|
107
109
|
|
108
110
|
template<typename S, typename U, typename P, typename A>
|
@@ -212,6 +214,11 @@ void update_tuple_sketch<S, U, P, A>::trim() {
|
|
212
214
|
map_.trim();
|
213
215
|
}
|
214
216
|
|
217
|
+
template<typename S, typename U, typename P, typename A>
|
218
|
+
void update_tuple_sketch<S, U, P, A>::reset() {
|
219
|
+
map_.reset();
|
220
|
+
}
|
221
|
+
|
215
222
|
template<typename S, typename U, typename P, typename A>
|
216
223
|
auto update_tuple_sketch<S, U, P, A>::begin() -> iterator {
|
217
224
|
return iterator(map_.entries_, 1 << map_.lg_cur_size_, 0);
|
@@ -238,7 +245,7 @@ compact_tuple_sketch<S, A> update_tuple_sketch<S, U, P, A>::compact(bool ordered
|
|
238
245
|
}
|
239
246
|
|
240
247
|
template<typename S, typename U, typename P, typename A>
|
241
|
-
void update_tuple_sketch<S, U, P, A>::print_specifics(
|
248
|
+
void update_tuple_sketch<S, U, P, A>::print_specifics(std::ostringstream& os) const {
|
242
249
|
os << " lg nominal size : " << (int) map_.lg_nom_size_ << std::endl;
|
243
250
|
os << " lg current size : " << (int) map_.lg_cur_size_ << std::endl;
|
244
251
|
os << " resize factor : " << (1 << map_.rf_) << std::endl;
|
@@ -250,7 +257,7 @@ template<typename S, typename A>
|
|
250
257
|
compact_tuple_sketch<S, A>::compact_tuple_sketch(bool is_empty, bool is_ordered, uint16_t seed_hash, uint64_t theta,
|
251
258
|
std::vector<Entry, AllocEntry>&& entries):
|
252
259
|
is_empty_(is_empty),
|
253
|
-
is_ordered_(is_ordered),
|
260
|
+
is_ordered_(is_ordered || (entries.size() <= 1ULL)),
|
254
261
|
seed_hash_(seed_hash),
|
255
262
|
theta_(theta),
|
256
263
|
entries_(std::move(entries))
|
@@ -315,7 +322,7 @@ uint64_t compact_tuple_sketch<S, A>::get_theta64() const {
|
|
315
322
|
|
316
323
|
template<typename S, typename A>
|
317
324
|
uint32_t compact_tuple_sketch<S, A>::get_num_retained() const {
|
318
|
-
return entries_.size();
|
325
|
+
return static_cast<uint32_t>(entries_.size());
|
319
326
|
}
|
320
327
|
|
321
328
|
template<typename S, typename A>
|
@@ -347,36 +354,36 @@ template<typename SerDe>
|
|
347
354
|
void compact_tuple_sketch<S, A>::serialize(std::ostream& os, const SerDe& sd) const {
|
348
355
|
const bool is_single_item = entries_.size() == 1 && !this->is_estimation_mode();
|
349
356
|
const uint8_t preamble_longs = this->is_empty() || is_single_item ? 1 : this->is_estimation_mode() ? 3 : 2;
|
350
|
-
|
357
|
+
write(os, preamble_longs);
|
351
358
|
const uint8_t serial_version = SERIAL_VERSION;
|
352
|
-
|
359
|
+
write(os, serial_version);
|
353
360
|
const uint8_t family = SKETCH_FAMILY;
|
354
|
-
|
361
|
+
write(os, family);
|
355
362
|
const uint8_t type = SKETCH_TYPE;
|
356
|
-
|
363
|
+
write(os, type);
|
357
364
|
const uint8_t unused8 = 0;
|
358
|
-
|
365
|
+
write(os, unused8);
|
359
366
|
const uint8_t flags_byte(
|
360
367
|
(1 << flags::IS_COMPACT) |
|
361
368
|
(1 << flags::IS_READ_ONLY) |
|
362
369
|
(this->is_empty() ? 1 << flags::IS_EMPTY : 0) |
|
363
370
|
(this->is_ordered() ? 1 << flags::IS_ORDERED : 0)
|
364
371
|
);
|
365
|
-
|
372
|
+
write(os, flags_byte);
|
366
373
|
const uint16_t seed_hash = get_seed_hash();
|
367
|
-
|
374
|
+
write(os, seed_hash);
|
368
375
|
if (!this->is_empty()) {
|
369
376
|
if (!is_single_item) {
|
370
|
-
const uint32_t num_entries = entries_.size();
|
371
|
-
|
377
|
+
const uint32_t num_entries = static_cast<uint32_t>(entries_.size());
|
378
|
+
write(os, num_entries);
|
372
379
|
const uint32_t unused32 = 0;
|
373
|
-
|
380
|
+
write(os, unused32);
|
374
381
|
if (this->is_estimation_mode()) {
|
375
|
-
|
382
|
+
write(os, this->theta_);
|
376
383
|
}
|
377
384
|
}
|
378
385
|
for (const auto& it: entries_) {
|
379
|
-
|
386
|
+
write(os, it.first);
|
380
387
|
sd.serialize(os, &it.second, 1);
|
381
388
|
}
|
382
389
|
}
|
@@ -393,36 +400,34 @@ auto compact_tuple_sketch<S, A>::serialize(unsigned header_size_bytes, const Ser
|
|
393
400
|
uint8_t* ptr = bytes.data() + header_size_bytes;
|
394
401
|
const uint8_t* end_ptr = ptr + size;
|
395
402
|
|
396
|
-
ptr += copy_to_mem(
|
403
|
+
ptr += copy_to_mem(preamble_longs, ptr);
|
397
404
|
const uint8_t serial_version = SERIAL_VERSION;
|
398
|
-
ptr += copy_to_mem(
|
405
|
+
ptr += copy_to_mem(serial_version, ptr);
|
399
406
|
const uint8_t family = SKETCH_FAMILY;
|
400
|
-
ptr += copy_to_mem(
|
407
|
+
ptr += copy_to_mem(family, ptr);
|
401
408
|
const uint8_t type = SKETCH_TYPE;
|
402
|
-
ptr += copy_to_mem(
|
403
|
-
|
404
|
-
ptr += copy_to_mem(&unused8, ptr, sizeof(unused8));
|
409
|
+
ptr += copy_to_mem(type, ptr);
|
410
|
+
ptr += sizeof(uint8_t); // unused
|
405
411
|
const uint8_t flags_byte(
|
406
412
|
(1 << flags::IS_COMPACT) |
|
407
413
|
(1 << flags::IS_READ_ONLY) |
|
408
414
|
(this->is_empty() ? 1 << flags::IS_EMPTY : 0) |
|
409
415
|
(this->is_ordered() ? 1 << flags::IS_ORDERED : 0)
|
410
416
|
);
|
411
|
-
ptr += copy_to_mem(
|
417
|
+
ptr += copy_to_mem(flags_byte, ptr);
|
412
418
|
const uint16_t seed_hash = get_seed_hash();
|
413
|
-
ptr += copy_to_mem(
|
419
|
+
ptr += copy_to_mem(seed_hash, ptr);
|
414
420
|
if (!this->is_empty()) {
|
415
421
|
if (!is_single_item) {
|
416
|
-
const uint32_t num_entries = entries_.size();
|
417
|
-
ptr += copy_to_mem(
|
418
|
-
|
419
|
-
ptr += copy_to_mem(&unused32, ptr, sizeof(unused32));
|
422
|
+
const uint32_t num_entries = static_cast<uint32_t>(entries_.size());
|
423
|
+
ptr += copy_to_mem(num_entries, ptr);
|
424
|
+
ptr += sizeof(uint32_t); // unused
|
420
425
|
if (this->is_estimation_mode()) {
|
421
|
-
ptr += copy_to_mem(
|
426
|
+
ptr += copy_to_mem(theta_, ptr);
|
422
427
|
}
|
423
428
|
}
|
424
429
|
for (const auto& it: entries_) {
|
425
|
-
ptr += copy_to_mem(
|
430
|
+
ptr += copy_to_mem(it.first, ptr);
|
426
431
|
ptr += sd.serialize(ptr, end_ptr - ptr, &it.second, 1);
|
427
432
|
}
|
428
433
|
}
|
@@ -432,23 +437,22 @@ auto compact_tuple_sketch<S, A>::serialize(unsigned header_size_bytes, const Ser
|
|
432
437
|
template<typename S, typename A>
|
433
438
|
template<typename SerDe>
|
434
439
|
compact_tuple_sketch<S, A> compact_tuple_sketch<S, A>::deserialize(std::istream& is, uint64_t seed, const SerDe& sd, const A& allocator) {
|
435
|
-
|
436
|
-
|
437
|
-
uint8_t
|
438
|
-
|
439
|
-
uint8_t
|
440
|
-
|
441
|
-
|
442
|
-
|
443
|
-
|
444
|
-
|
445
|
-
|
446
|
-
is.read(reinterpret_cast<char*>(&flags_byte), sizeof(flags_byte));
|
447
|
-
uint16_t seed_hash;
|
448
|
-
is.read(reinterpret_cast<char*>(&seed_hash), sizeof(seed_hash));
|
449
|
-
checker<true>::check_serial_version(serial_version, SERIAL_VERSION);
|
440
|
+
const auto preamble_longs = read<uint8_t>(is);
|
441
|
+
const auto serial_version = read<uint8_t>(is);
|
442
|
+
const auto family = read<uint8_t>(is);
|
443
|
+
const auto type = read<uint8_t>(is);
|
444
|
+
read<uint8_t>(is); // unused
|
445
|
+
const auto flags_byte = read<uint8_t>(is);
|
446
|
+
const auto seed_hash = read<uint16_t>(is);
|
447
|
+
if (serial_version != SERIAL_VERSION && serial_version != SERIAL_VERSION_LEGACY) {
|
448
|
+
throw std::invalid_argument("serial version mismatch: expected " + std::to_string(SERIAL_VERSION) + " or "
|
449
|
+
+ std::to_string(SERIAL_VERSION_LEGACY) + ", actual " + std::to_string(serial_version));
|
450
|
+
}
|
450
451
|
checker<true>::check_sketch_family(family, SKETCH_FAMILY);
|
451
|
-
|
452
|
+
if (type != SKETCH_TYPE && type != SKETCH_TYPE_LEGACY) {
|
453
|
+
throw std::invalid_argument("sketch type mismatch: expected " + std::to_string(SKETCH_TYPE) + " or "
|
454
|
+
+ std::to_string(SKETCH_TYPE_LEGACY) + ", actual " + std::to_string(type));
|
455
|
+
}
|
452
456
|
const bool is_empty = flags_byte & (1 << flags::IS_EMPTY);
|
453
457
|
if (!is_empty) checker<true>::check_seed_hash(seed_hash, compute_seed_hash(seed));
|
454
458
|
|
@@ -458,11 +462,10 @@ compact_tuple_sketch<S, A> compact_tuple_sketch<S, A>::deserialize(std::istream&
|
|
458
462
|
if (preamble_longs == 1) {
|
459
463
|
num_entries = 1;
|
460
464
|
} else {
|
461
|
-
|
462
|
-
uint32_t
|
463
|
-
is.read(reinterpret_cast<char*>(&unused32), sizeof(unused32));
|
465
|
+
num_entries = read<uint32_t>(is);
|
466
|
+
read<uint32_t>(is); // unused
|
464
467
|
if (preamble_longs > 2) {
|
465
|
-
|
468
|
+
theta = read<uint64_t>(is);
|
466
469
|
}
|
467
470
|
}
|
468
471
|
}
|
@@ -472,8 +475,7 @@ compact_tuple_sketch<S, A> compact_tuple_sketch<S, A>::deserialize(std::istream&
|
|
472
475
|
entries.reserve(num_entries);
|
473
476
|
std::unique_ptr<S, deleter_of_summaries> summary(alloc.allocate(1), deleter_of_summaries(1, false, allocator));
|
474
477
|
for (size_t i = 0; i < num_entries; ++i) {
|
475
|
-
|
476
|
-
is.read(reinterpret_cast<char*>(&key), sizeof(uint64_t));
|
478
|
+
const auto key = read<uint64_t>(is);
|
477
479
|
sd.deserialize(is, summary.get(), 1);
|
478
480
|
entries.push_back(Entry(key, std::move(*summary)));
|
479
481
|
(*summary).~S();
|
@@ -491,22 +493,27 @@ compact_tuple_sketch<S, A> compact_tuple_sketch<S, A>::deserialize(const void* b
|
|
491
493
|
const char* ptr = static_cast<const char*>(bytes);
|
492
494
|
const char* base = ptr;
|
493
495
|
uint8_t preamble_longs;
|
494
|
-
ptr += copy_from_mem(ptr,
|
496
|
+
ptr += copy_from_mem(ptr, preamble_longs);
|
495
497
|
uint8_t serial_version;
|
496
|
-
ptr += copy_from_mem(ptr,
|
498
|
+
ptr += copy_from_mem(ptr, serial_version);
|
497
499
|
uint8_t family;
|
498
|
-
ptr += copy_from_mem(ptr,
|
500
|
+
ptr += copy_from_mem(ptr, family);
|
499
501
|
uint8_t type;
|
500
|
-
ptr += copy_from_mem(ptr,
|
501
|
-
uint8_t
|
502
|
-
ptr += copy_from_mem(ptr, &unused8, sizeof(unused8));
|
502
|
+
ptr += copy_from_mem(ptr, type);
|
503
|
+
ptr += sizeof(uint8_t); // unused
|
503
504
|
uint8_t flags_byte;
|
504
|
-
ptr += copy_from_mem(ptr,
|
505
|
+
ptr += copy_from_mem(ptr, flags_byte);
|
505
506
|
uint16_t seed_hash;
|
506
|
-
ptr += copy_from_mem(ptr,
|
507
|
-
|
507
|
+
ptr += copy_from_mem(ptr, seed_hash);
|
508
|
+
if (serial_version != SERIAL_VERSION && serial_version != SERIAL_VERSION_LEGACY) {
|
509
|
+
throw std::invalid_argument("serial version mismatch: expected " + std::to_string(SERIAL_VERSION) + " or "
|
510
|
+
+ std::to_string(SERIAL_VERSION_LEGACY) + ", actual " + std::to_string(serial_version));
|
511
|
+
}
|
508
512
|
checker<true>::check_sketch_family(family, SKETCH_FAMILY);
|
509
|
-
|
513
|
+
if (type != SKETCH_TYPE && type != SKETCH_TYPE_LEGACY) {
|
514
|
+
throw std::invalid_argument("sketch type mismatch: expected " + std::to_string(SKETCH_TYPE) + " or "
|
515
|
+
+ std::to_string(SKETCH_TYPE_LEGACY) + ", actual " + std::to_string(type));
|
516
|
+
}
|
510
517
|
const bool is_empty = flags_byte & (1 << flags::IS_EMPTY);
|
511
518
|
if (!is_empty) checker<true>::check_seed_hash(seed_hash, compute_seed_hash(seed));
|
512
519
|
|
@@ -518,12 +525,11 @@ compact_tuple_sketch<S, A> compact_tuple_sketch<S, A>::deserialize(const void* b
|
|
518
525
|
num_entries = 1;
|
519
526
|
} else {
|
520
527
|
ensure_minimum_memory(size, 8); // read the first prelong before this method
|
521
|
-
ptr += copy_from_mem(ptr,
|
522
|
-
uint32_t
|
523
|
-
ptr += copy_from_mem(ptr, &unused32, sizeof(unused32));
|
528
|
+
ptr += copy_from_mem(ptr, num_entries);
|
529
|
+
ptr += sizeof(uint32_t); // unused
|
524
530
|
if (preamble_longs > 2) {
|
525
531
|
ensure_minimum_memory(size, (preamble_longs - 1) << 3);
|
526
|
-
ptr += copy_from_mem(ptr,
|
532
|
+
ptr += copy_from_mem(ptr, theta);
|
527
533
|
}
|
528
534
|
}
|
529
535
|
}
|
@@ -536,7 +542,7 @@ compact_tuple_sketch<S, A> compact_tuple_sketch<S, A>::deserialize(const void* b
|
|
536
542
|
std::unique_ptr<S, deleter_of_summaries> summary(alloc.allocate(1), deleter_of_summaries(1, false, allocator));
|
537
543
|
for (size_t i = 0; i < num_entries; ++i) {
|
538
544
|
uint64_t key;
|
539
|
-
ptr += copy_from_mem(ptr,
|
545
|
+
ptr += copy_from_mem(ptr, key);
|
540
546
|
ptr += sd.deserialize(ptr, base + size - ptr, summary.get(), 1);
|
541
547
|
entries.push_back(Entry(key, std::move(*summary)));
|
542
548
|
(*summary).~S();
|
@@ -548,26 +554,26 @@ compact_tuple_sketch<S, A> compact_tuple_sketch<S, A>::deserialize(const void* b
|
|
548
554
|
|
549
555
|
template<typename S, typename A>
|
550
556
|
auto compact_tuple_sketch<S, A>::begin() -> iterator {
|
551
|
-
return iterator(entries_.data(), entries_.size(), 0);
|
557
|
+
return iterator(entries_.data(), static_cast<uint32_t>(entries_.size()), 0);
|
552
558
|
}
|
553
559
|
|
554
560
|
template<typename S, typename A>
|
555
561
|
auto compact_tuple_sketch<S, A>::end() -> iterator {
|
556
|
-
return iterator(nullptr, 0, entries_.size());
|
562
|
+
return iterator(nullptr, 0, static_cast<uint32_t>(entries_.size()));
|
557
563
|
}
|
558
564
|
|
559
565
|
template<typename S, typename A>
|
560
566
|
auto compact_tuple_sketch<S, A>::begin() const -> const_iterator {
|
561
|
-
return const_iterator(entries_.data(), entries_.size(), 0);
|
567
|
+
return const_iterator(entries_.data(), static_cast<uint32_t>(entries_.size()), 0);
|
562
568
|
}
|
563
569
|
|
564
570
|
template<typename S, typename A>
|
565
571
|
auto compact_tuple_sketch<S, A>::end() const -> const_iterator {
|
566
|
-
return const_iterator(nullptr, 0, entries_.size());
|
572
|
+
return const_iterator(nullptr, 0, static_cast<uint32_t>(entries_.size()));
|
567
573
|
}
|
568
574
|
|
569
575
|
template<typename S, typename A>
|
570
|
-
void compact_tuple_sketch<S, A>::print_specifics(
|
576
|
+
void compact_tuple_sketch<S, A>::print_specifics(std::ostringstream&) const {}
|
571
577
|
|
572
578
|
// builder
|
573
579
|
|
@@ -581,7 +587,7 @@ tuple_base_builder<builder, P, A>(policy, allocator) {}
|
|
581
587
|
|
582
588
|
template<typename S, typename U, typename P, typename A>
|
583
589
|
auto update_tuple_sketch<S, U, P, A>::builder::build() const -> update_tuple_sketch {
|
584
|
-
return update_tuple_sketch(this->starting_lg_size(), this->lg_k_, this->rf_, this->starting_theta(), this->seed_, this->policy_, this->allocator_);
|
590
|
+
return update_tuple_sketch(this->starting_lg_size(), this->lg_k_, this->rf_, this->p_, this->starting_theta(), this->seed_, this->policy_, this->allocator_);
|
585
591
|
}
|
586
592
|
|
587
593
|
} /* namespace datasketches */
|