datasketches 0.2.0 → 0.2.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +16 -0
- data/LICENSE +40 -3
- data/NOTICE +1 -1
- data/README.md +7 -7
- data/ext/datasketches/extconf.rb +1 -1
- data/ext/datasketches/theta_wrapper.cpp +20 -4
- data/lib/datasketches/version.rb +1 -1
- data/vendor/datasketches-cpp/CMakeLists.txt +31 -3
- data/vendor/datasketches-cpp/LICENSE +40 -3
- data/vendor/datasketches-cpp/MANIFEST.in +3 -0
- data/vendor/datasketches-cpp/NOTICE +1 -1
- data/vendor/datasketches-cpp/README.md +76 -9
- data/vendor/datasketches-cpp/cmake/DataSketchesConfig.cmake.in +10 -0
- data/vendor/datasketches-cpp/common/CMakeLists.txt +14 -13
- data/vendor/datasketches-cpp/common/include/MurmurHash3.h +11 -7
- data/vendor/datasketches-cpp/common/include/binomial_bounds.hpp +8 -8
- data/vendor/datasketches-cpp/common/include/bounds_binomial_proportions.hpp +12 -15
- data/vendor/datasketches-cpp/common/include/common_defs.hpp +26 -0
- data/vendor/datasketches-cpp/common/include/conditional_forward.hpp +20 -8
- data/vendor/datasketches-cpp/common/include/count_zeros.hpp +2 -2
- data/vendor/datasketches-cpp/common/include/serde.hpp +7 -7
- data/vendor/datasketches-cpp/cpc/CMakeLists.txt +15 -35
- data/vendor/datasketches-cpp/cpc/include/cpc_common.hpp +10 -3
- data/vendor/datasketches-cpp/cpc/include/cpc_compressor.hpp +19 -19
- data/vendor/datasketches-cpp/cpc/include/cpc_compressor_impl.hpp +91 -89
- data/vendor/datasketches-cpp/cpc/include/cpc_sketch.hpp +15 -2
- data/vendor/datasketches-cpp/cpc/include/cpc_sketch_impl.hpp +126 -90
- data/vendor/datasketches-cpp/cpc/include/cpc_union.hpp +1 -1
- data/vendor/datasketches-cpp/cpc/include/cpc_union_impl.hpp +22 -20
- data/vendor/datasketches-cpp/cpc/include/cpc_util.hpp +10 -10
- data/vendor/datasketches-cpp/cpc/include/icon_estimator.hpp +4 -4
- data/vendor/datasketches-cpp/cpc/include/u32_table.hpp +8 -8
- data/vendor/datasketches-cpp/cpc/include/u32_table_impl.hpp +14 -14
- data/vendor/datasketches-cpp/cpc/test/compression_test.cpp +10 -10
- data/vendor/datasketches-cpp/cpc/test/cpc_sketch_allocation_test.cpp +17 -0
- data/vendor/datasketches-cpp/cpc/test/cpc_sketch_test.cpp +25 -0
- data/vendor/datasketches-cpp/cpc/test/cpc_union_test.cpp +1 -1
- data/vendor/datasketches-cpp/fi/CMakeLists.txt +5 -15
- data/vendor/datasketches-cpp/fi/include/frequent_items_sketch_impl.hpp +69 -82
- data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map_impl.hpp +10 -10
- data/vendor/datasketches-cpp/fi/test/reverse_purge_hash_map_test.cpp +2 -2
- data/vendor/datasketches-cpp/hll/CMakeLists.txt +33 -56
- data/vendor/datasketches-cpp/hll/include/AuxHashMap-internal.hpp +60 -63
- data/vendor/datasketches-cpp/hll/include/AuxHashMap.hpp +19 -19
- data/vendor/datasketches-cpp/hll/include/CompositeInterpolationXTable-internal.hpp +15 -15
- data/vendor/datasketches-cpp/hll/include/CompositeInterpolationXTable.hpp +3 -3
- data/vendor/datasketches-cpp/hll/include/CouponHashSet-internal.hpp +74 -76
- data/vendor/datasketches-cpp/hll/include/CouponHashSet.hpp +6 -6
- data/vendor/datasketches-cpp/hll/include/CouponList-internal.hpp +110 -113
- data/vendor/datasketches-cpp/hll/include/CouponList.hpp +13 -13
- data/vendor/datasketches-cpp/hll/include/CubicInterpolation-internal.hpp +2 -4
- data/vendor/datasketches-cpp/hll/include/HarmonicNumbers-internal.hpp +1 -1
- data/vendor/datasketches-cpp/hll/include/Hll4Array-internal.hpp +80 -76
- data/vendor/datasketches-cpp/hll/include/Hll4Array.hpp +9 -9
- data/vendor/datasketches-cpp/hll/include/Hll6Array-internal.hpp +26 -26
- data/vendor/datasketches-cpp/hll/include/Hll6Array.hpp +6 -6
- data/vendor/datasketches-cpp/hll/include/Hll8Array-internal.hpp +33 -33
- data/vendor/datasketches-cpp/hll/include/Hll8Array.hpp +6 -6
- data/vendor/datasketches-cpp/hll/include/HllArray-internal.hpp +205 -209
- data/vendor/datasketches-cpp/hll/include/HllArray.hpp +36 -36
- data/vendor/datasketches-cpp/hll/include/HllSketch-internal.hpp +34 -32
- data/vendor/datasketches-cpp/hll/include/HllSketchImpl-internal.hpp +22 -22
- data/vendor/datasketches-cpp/hll/include/HllSketchImpl.hpp +13 -13
- data/vendor/datasketches-cpp/hll/include/HllSketchImplFactory.hpp +15 -15
- data/vendor/datasketches-cpp/hll/include/HllUnion-internal.hpp +61 -61
- data/vendor/datasketches-cpp/hll/include/HllUtil.hpp +120 -127
- data/vendor/datasketches-cpp/hll/include/coupon_iterator-internal.hpp +9 -9
- data/vendor/datasketches-cpp/hll/include/coupon_iterator.hpp +5 -5
- data/vendor/datasketches-cpp/hll/include/hll.hpp +21 -21
- data/vendor/datasketches-cpp/hll/test/AuxHashMapTest.cpp +1 -1
- data/vendor/datasketches-cpp/hll/test/CouponHashSetTest.cpp +34 -34
- data/vendor/datasketches-cpp/hll/test/CouponListTest.cpp +25 -25
- data/vendor/datasketches-cpp/hll/test/CrossCountingTest.cpp +2 -2
- data/vendor/datasketches-cpp/hll/test/HllArrayTest.cpp +35 -35
- data/vendor/datasketches-cpp/hll/test/HllSketchTest.cpp +15 -15
- data/vendor/datasketches-cpp/hll/test/HllUnionTest.cpp +10 -14
- data/vendor/datasketches-cpp/hll/test/IsomorphicTest.cpp +3 -3
- data/vendor/datasketches-cpp/hll/test/ToFromByteArrayTest.cpp +4 -4
- data/vendor/datasketches-cpp/kll/CMakeLists.txt +9 -19
- data/vendor/datasketches-cpp/kll/include/kll_helper.hpp +5 -4
- data/vendor/datasketches-cpp/kll/include/kll_helper_impl.hpp +6 -6
- data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator.hpp +14 -6
- data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator_impl.hpp +39 -24
- data/vendor/datasketches-cpp/kll/include/kll_sketch.hpp +41 -4
- data/vendor/datasketches-cpp/kll/include/kll_sketch_impl.hpp +76 -64
- data/vendor/datasketches-cpp/kll/include/kolmogorov_smirnov.hpp +67 -0
- data/vendor/datasketches-cpp/kll/include/kolmogorov_smirnov_impl.hpp +78 -0
- data/vendor/datasketches-cpp/kll/test/CMakeLists.txt +1 -0
- data/vendor/datasketches-cpp/kll/test/kll_sketch_test.cpp +133 -46
- data/vendor/datasketches-cpp/kll/test/kolmogorov_smirnov_test.cpp +111 -0
- data/vendor/datasketches-cpp/pyproject.toml +4 -2
- data/vendor/datasketches-cpp/python/CMakeLists.txt +10 -6
- data/vendor/datasketches-cpp/python/README.md +50 -50
- data/vendor/datasketches-cpp/python/pybind11Path.cmd +3 -0
- data/vendor/datasketches-cpp/python/src/cpc_wrapper.cpp +1 -1
- data/vendor/datasketches-cpp/python/src/hll_wrapper.cpp +4 -4
- data/vendor/datasketches-cpp/python/src/kll_wrapper.cpp +1 -1
- data/vendor/datasketches-cpp/python/src/theta_wrapper.cpp +8 -8
- data/vendor/datasketches-cpp/python/src/vector_of_kll.cpp +11 -5
- data/vendor/datasketches-cpp/python/src/vo_wrapper.cpp +2 -2
- data/vendor/datasketches-cpp/python/tests/hll_test.py +1 -1
- data/vendor/datasketches-cpp/python/tests/kll_test.py +2 -2
- data/vendor/datasketches-cpp/python/tests/req_test.py +2 -2
- data/vendor/datasketches-cpp/python/tests/vector_of_kll_test.py +4 -4
- data/vendor/datasketches-cpp/python/tests/vo_test.py +3 -3
- data/vendor/datasketches-cpp/req/CMakeLists.txt +8 -21
- data/vendor/datasketches-cpp/req/include/req_common.hpp +2 -1
- data/vendor/datasketches-cpp/req/include/req_compactor.hpp +4 -4
- data/vendor/datasketches-cpp/req/include/req_compactor_impl.hpp +26 -39
- data/vendor/datasketches-cpp/req/include/req_sketch.hpp +1 -1
- data/vendor/datasketches-cpp/req/include/req_sketch_impl.hpp +13 -11
- data/vendor/datasketches-cpp/req/test/req_sketch_test.cpp +52 -52
- data/vendor/datasketches-cpp/sampling/CMakeLists.txt +5 -9
- data/vendor/datasketches-cpp/sampling/include/var_opt_sketch.hpp +10 -5
- data/vendor/datasketches-cpp/sampling/include/var_opt_sketch_impl.hpp +61 -64
- data/vendor/datasketches-cpp/sampling/include/var_opt_union_impl.hpp +42 -48
- data/vendor/datasketches-cpp/sampling/test/var_opt_sketch_test.cpp +6 -6
- data/vendor/datasketches-cpp/sampling/test/var_opt_union_test.cpp +13 -13
- data/vendor/datasketches-cpp/setup.py +10 -7
- data/vendor/datasketches-cpp/theta/CMakeLists.txt +26 -45
- data/vendor/datasketches-cpp/theta/include/bounds_on_ratios_in_sampled_sets.hpp +1 -1
- data/vendor/datasketches-cpp/theta/include/compact_theta_sketch_parser.hpp +67 -0
- data/vendor/datasketches-cpp/theta/include/compact_theta_sketch_parser_impl.hpp +137 -0
- data/vendor/datasketches-cpp/theta/include/theta_constants.hpp +9 -4
- data/vendor/datasketches-cpp/theta/include/theta_helpers.hpp +15 -0
- data/vendor/datasketches-cpp/theta/include/theta_intersection.hpp +9 -4
- data/vendor/datasketches-cpp/theta/include/theta_intersection_base_impl.hpp +6 -6
- data/vendor/datasketches-cpp/theta/include/theta_intersection_impl.hpp +1 -1
- data/vendor/datasketches-cpp/theta/include/theta_jaccard_similarity_base.hpp +18 -14
- data/vendor/datasketches-cpp/theta/include/theta_set_difference_base_impl.hpp +2 -2
- data/vendor/datasketches-cpp/theta/include/theta_sketch.hpp +73 -15
- data/vendor/datasketches-cpp/theta/include/theta_sketch_impl.hpp +247 -103
- data/vendor/datasketches-cpp/theta/include/theta_union.hpp +10 -5
- data/vendor/datasketches-cpp/theta/include/theta_union_base.hpp +3 -1
- data/vendor/datasketches-cpp/theta/include/theta_union_base_impl.hpp +9 -3
- data/vendor/datasketches-cpp/theta/include/theta_union_impl.hpp +8 -5
- data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base.hpp +11 -5
- data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base_impl.hpp +70 -37
- data/vendor/datasketches-cpp/theta/test/CMakeLists.txt +1 -0
- data/vendor/datasketches-cpp/theta/test/theta_a_not_b_test.cpp +23 -1
- data/vendor/datasketches-cpp/theta/test/theta_compact_empty_from_java_v1.sk +0 -0
- data/vendor/datasketches-cpp/theta/test/theta_compact_empty_from_java_v2.sk +0 -0
- data/vendor/datasketches-cpp/theta/test/theta_compact_estimation_from_java_v1.sk +0 -0
- data/vendor/datasketches-cpp/theta/test/theta_compact_estimation_from_java_v2.sk +0 -0
- data/vendor/datasketches-cpp/theta/test/theta_compact_exact_from_java.sk +0 -0
- data/vendor/datasketches-cpp/theta/test/theta_intersection_test.cpp +21 -1
- data/vendor/datasketches-cpp/theta/test/theta_jaccard_similarity_test.cpp +58 -2
- data/vendor/datasketches-cpp/theta/test/theta_setop_test.cpp +445 -0
- data/vendor/datasketches-cpp/theta/test/theta_sketch_test.cpp +437 -1
- data/vendor/datasketches-cpp/theta/test/theta_union_test.cpp +41 -9
- data/vendor/datasketches-cpp/tuple/CMakeLists.txt +18 -33
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_sketch.hpp +1 -1
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_sketch_impl.hpp +50 -63
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_union.hpp +1 -1
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_union_impl.hpp +3 -3
- data/vendor/datasketches-cpp/tuple/include/tuple_sketch.hpp +13 -9
- data/vendor/datasketches-cpp/tuple/include/tuple_sketch_impl.hpp +84 -78
- data/vendor/datasketches-cpp/tuple/include/tuple_union.hpp +6 -1
- data/vendor/datasketches-cpp/tuple/include/tuple_union_impl.hpp +8 -3
- data/vendor/datasketches-cpp/tuple/test/array_of_doubles_sketch_test.cpp +17 -1
- data/vendor/datasketches-cpp/tuple/test/tuple_a_not_b_test.cpp +17 -17
- data/vendor/datasketches-cpp/tuple/test/tuple_intersection_test.cpp +12 -12
- data/vendor/datasketches-cpp/tuple/test/tuple_jaccard_similarity_test.cpp +5 -5
- data/vendor/datasketches-cpp/tuple/test/tuple_sketch_allocation_test.cpp +1 -1
- data/vendor/datasketches-cpp/tuple/test/tuple_sketch_test.cpp +66 -28
- data/vendor/datasketches-cpp/tuple/test/tuple_union_test.cpp +19 -12
- metadata +18 -7
- data/vendor/datasketches-cpp/theta/test/theta_update_empty_from_java.sk +0 -0
- data/vendor/datasketches-cpp/theta/test/theta_update_estimation_from_java.sk +0 -0
|
@@ -21,8 +21,8 @@ namespace datasketches {
|
|
|
21
21
|
|
|
22
22
|
template<typename A>
|
|
23
23
|
update_array_of_doubles_sketch_alloc<A>::update_array_of_doubles_sketch_alloc(uint8_t lg_cur_size, uint8_t lg_nom_size, resize_factor rf,
|
|
24
|
-
uint64_t theta, uint64_t seed, const array_of_doubles_update_policy<A>& policy, const A& allocator):
|
|
25
|
-
Base(lg_cur_size, lg_nom_size, rf, theta, seed, policy, allocator) {}
|
|
24
|
+
float p, uint64_t theta, uint64_t seed, const array_of_doubles_update_policy<A>& policy, const A& allocator):
|
|
25
|
+
Base(lg_cur_size, lg_nom_size, rf, p, theta, seed, policy, allocator) {}
|
|
26
26
|
|
|
27
27
|
|
|
28
28
|
template<typename A>
|
|
@@ -43,7 +43,7 @@ tuple_base_builder<builder, array_of_doubles_update_policy<A>, A>(policy, alloca
|
|
|
43
43
|
|
|
44
44
|
template<typename A>
|
|
45
45
|
update_array_of_doubles_sketch_alloc<A> update_array_of_doubles_sketch_alloc<A>::builder::build() const {
|
|
46
|
-
return update_array_of_doubles_sketch_alloc<A>(this->starting_lg_size(), this->lg_k_, this->rf_, this->starting_theta(), this->seed_, this->policy_, this->allocator_);
|
|
46
|
+
return update_array_of_doubles_sketch_alloc<A>(this->starting_lg_size(), this->lg_k_, this->rf_, this->p_, this->starting_theta(), this->seed_, this->policy_, this->allocator_);
|
|
47
47
|
}
|
|
48
48
|
|
|
49
49
|
// compact sketch
|
|
@@ -70,33 +70,33 @@ uint8_t compact_array_of_doubles_sketch_alloc<A>::get_num_values() const {
|
|
|
70
70
|
template<typename A>
|
|
71
71
|
void compact_array_of_doubles_sketch_alloc<A>::serialize(std::ostream& os) const {
|
|
72
72
|
const uint8_t preamble_longs = 1;
|
|
73
|
-
|
|
73
|
+
write(os, preamble_longs);
|
|
74
74
|
const uint8_t serial_version = SERIAL_VERSION;
|
|
75
|
-
|
|
75
|
+
write(os, serial_version);
|
|
76
76
|
const uint8_t family = SKETCH_FAMILY;
|
|
77
|
-
|
|
77
|
+
write(os, family);
|
|
78
78
|
const uint8_t type = SKETCH_TYPE;
|
|
79
|
-
|
|
79
|
+
write(os, type);
|
|
80
80
|
const uint8_t flags_byte(
|
|
81
81
|
(this->is_empty() ? 1 << flags::IS_EMPTY : 0) |
|
|
82
82
|
(this->get_num_retained() > 0 ? 1 << flags::HAS_ENTRIES : 0) |
|
|
83
83
|
(this->is_ordered() ? 1 << flags::IS_ORDERED : 0)
|
|
84
84
|
);
|
|
85
|
-
|
|
86
|
-
|
|
85
|
+
write(os, flags_byte);
|
|
86
|
+
write(os, num_values_);
|
|
87
87
|
const uint16_t seed_hash = this->get_seed_hash();
|
|
88
|
-
|
|
89
|
-
|
|
88
|
+
write(os, seed_hash);
|
|
89
|
+
write(os, this->theta_);
|
|
90
90
|
if (this->get_num_retained() > 0) {
|
|
91
|
-
const uint32_t num_entries = this->entries_.size();
|
|
92
|
-
|
|
91
|
+
const uint32_t num_entries = static_cast<uint32_t>(this->entries_.size());
|
|
92
|
+
write(os, num_entries);
|
|
93
93
|
const uint32_t unused32 = 0;
|
|
94
|
-
|
|
94
|
+
write(os, unused32);
|
|
95
95
|
for (const auto& it: this->entries_) {
|
|
96
|
-
|
|
96
|
+
write(os, it.first);
|
|
97
97
|
}
|
|
98
98
|
for (const auto& it: this->entries_) {
|
|
99
|
-
|
|
99
|
+
write(os, it.second.data(), it.second.size() * sizeof(double));
|
|
100
100
|
}
|
|
101
101
|
}
|
|
102
102
|
}
|
|
@@ -110,30 +110,29 @@ auto compact_array_of_doubles_sketch_alloc<A>::serialize(unsigned header_size_by
|
|
|
110
110
|
vector_bytes bytes(size, 0, this->entries_.get_allocator());
|
|
111
111
|
uint8_t* ptr = bytes.data() + header_size_bytes;
|
|
112
112
|
|
|
113
|
-
ptr += copy_to_mem(
|
|
113
|
+
ptr += copy_to_mem(preamble_longs, ptr);
|
|
114
114
|
const uint8_t serial_version = SERIAL_VERSION;
|
|
115
|
-
ptr += copy_to_mem(
|
|
115
|
+
ptr += copy_to_mem(serial_version, ptr);
|
|
116
116
|
const uint8_t family = SKETCH_FAMILY;
|
|
117
|
-
ptr += copy_to_mem(
|
|
117
|
+
ptr += copy_to_mem(family, ptr);
|
|
118
118
|
const uint8_t type = SKETCH_TYPE;
|
|
119
|
-
ptr += copy_to_mem(
|
|
119
|
+
ptr += copy_to_mem(type, ptr);
|
|
120
120
|
const uint8_t flags_byte(
|
|
121
121
|
(this->is_empty() ? 1 << flags::IS_EMPTY : 0) |
|
|
122
122
|
(this->get_num_retained() ? 1 << flags::HAS_ENTRIES : 0) |
|
|
123
123
|
(this->is_ordered() ? 1 << flags::IS_ORDERED : 0)
|
|
124
124
|
);
|
|
125
|
-
ptr += copy_to_mem(
|
|
126
|
-
ptr += copy_to_mem(
|
|
125
|
+
ptr += copy_to_mem(flags_byte, ptr);
|
|
126
|
+
ptr += copy_to_mem(num_values_, ptr);
|
|
127
127
|
const uint16_t seed_hash = this->get_seed_hash();
|
|
128
|
-
ptr += copy_to_mem(
|
|
129
|
-
ptr += copy_to_mem(
|
|
128
|
+
ptr += copy_to_mem(seed_hash, ptr);
|
|
129
|
+
ptr += copy_to_mem((this->theta_), ptr);
|
|
130
130
|
if (this->get_num_retained() > 0) {
|
|
131
|
-
const uint32_t num_entries = this->entries_.size();
|
|
132
|
-
ptr += copy_to_mem(
|
|
133
|
-
|
|
134
|
-
ptr += copy_to_mem(&unused32, ptr, sizeof(unused32));
|
|
131
|
+
const uint32_t num_entries = static_cast<uint32_t>(this->entries_.size());
|
|
132
|
+
ptr += copy_to_mem(num_entries, ptr);
|
|
133
|
+
ptr += sizeof(uint32_t); // unused
|
|
135
134
|
for (const auto& it: this->entries_) {
|
|
136
|
-
ptr += copy_to_mem(
|
|
135
|
+
ptr += copy_to_mem(it.first, ptr);
|
|
137
136
|
}
|
|
138
137
|
for (const auto& it: this->entries_) {
|
|
139
138
|
ptr += copy_to_mem(it.second.data(), ptr, it.second.size() * sizeof(double));
|
|
@@ -144,40 +143,30 @@ auto compact_array_of_doubles_sketch_alloc<A>::serialize(unsigned header_size_by
|
|
|
144
143
|
|
|
145
144
|
template<typename A>
|
|
146
145
|
compact_array_of_doubles_sketch_alloc<A> compact_array_of_doubles_sketch_alloc<A>::deserialize(std::istream& is, uint64_t seed, const A& allocator) {
|
|
147
|
-
uint8_t
|
|
148
|
-
|
|
149
|
-
uint8_t
|
|
150
|
-
|
|
151
|
-
uint8_t
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
is.read(reinterpret_cast<char*>(&type), sizeof(type));
|
|
155
|
-
uint8_t flags_byte;
|
|
156
|
-
is.read(reinterpret_cast<char*>(&flags_byte), sizeof(flags_byte));
|
|
157
|
-
uint8_t num_values;
|
|
158
|
-
is.read(reinterpret_cast<char*>(&num_values), sizeof(num_values));
|
|
159
|
-
uint16_t seed_hash;
|
|
160
|
-
is.read(reinterpret_cast<char*>(&seed_hash), sizeof(seed_hash));
|
|
146
|
+
read<uint8_t>(is); // unused
|
|
147
|
+
const auto serial_version = read<uint8_t>(is);
|
|
148
|
+
const auto family = read<uint8_t>(is);
|
|
149
|
+
const auto type = read<uint8_t>(is);
|
|
150
|
+
const auto flags_byte = read<uint8_t>(is);
|
|
151
|
+
const auto num_values = read<uint8_t>(is);
|
|
152
|
+
const auto seed_hash = read<uint16_t>(is);
|
|
161
153
|
checker<true>::check_serial_version(serial_version, SERIAL_VERSION);
|
|
162
154
|
checker<true>::check_sketch_family(family, SKETCH_FAMILY);
|
|
163
155
|
checker<true>::check_sketch_type(type, SKETCH_TYPE);
|
|
164
156
|
const bool has_entries = flags_byte & (1 << flags::HAS_ENTRIES);
|
|
165
157
|
if (has_entries) checker<true>::check_seed_hash(seed_hash, compute_seed_hash(seed));
|
|
166
158
|
|
|
167
|
-
|
|
168
|
-
is.read(reinterpret_cast<char*>(&theta), sizeof(theta));
|
|
159
|
+
const auto theta = read<uint64_t>(is);
|
|
169
160
|
std::vector<Entry, AllocEntry> entries(allocator);
|
|
170
161
|
if (has_entries) {
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
uint32_t unused32;
|
|
174
|
-
is.read(reinterpret_cast<char*>(&unused32), sizeof(unused32));
|
|
162
|
+
const auto num_entries = read<uint32_t>(is);
|
|
163
|
+
read<uint32_t>(is); // unused
|
|
175
164
|
entries.reserve(num_entries);
|
|
176
165
|
std::vector<uint64_t, AllocU64> keys(num_entries, 0, allocator);
|
|
177
|
-
|
|
166
|
+
read(is, keys.data(), num_entries * sizeof(uint64_t));
|
|
178
167
|
for (size_t i = 0; i < num_entries; ++i) {
|
|
179
168
|
aod<A> summary(num_values, allocator);
|
|
180
|
-
|
|
169
|
+
read(is, summary.data(), num_values * sizeof(double));
|
|
181
170
|
entries.push_back(Entry(keys[i], std::move(summary)));
|
|
182
171
|
}
|
|
183
172
|
}
|
|
@@ -191,20 +180,19 @@ template<typename A>
|
|
|
191
180
|
compact_array_of_doubles_sketch_alloc<A> compact_array_of_doubles_sketch_alloc<A>::deserialize(const void* bytes, size_t size, uint64_t seed, const A& allocator) {
|
|
192
181
|
ensure_minimum_memory(size, 16);
|
|
193
182
|
const char* ptr = static_cast<const char*>(bytes);
|
|
194
|
-
uint8_t
|
|
195
|
-
ptr += copy_from_mem(ptr, &preamble_longs, sizeof(preamble_longs));
|
|
183
|
+
ptr += sizeof(uint8_t); // unused
|
|
196
184
|
uint8_t serial_version;
|
|
197
|
-
ptr += copy_from_mem(ptr,
|
|
185
|
+
ptr += copy_from_mem(ptr, serial_version);
|
|
198
186
|
uint8_t family;
|
|
199
|
-
ptr += copy_from_mem(ptr,
|
|
187
|
+
ptr += copy_from_mem(ptr, family);
|
|
200
188
|
uint8_t type;
|
|
201
|
-
ptr += copy_from_mem(ptr,
|
|
189
|
+
ptr += copy_from_mem(ptr, type);
|
|
202
190
|
uint8_t flags_byte;
|
|
203
|
-
ptr += copy_from_mem(ptr,
|
|
191
|
+
ptr += copy_from_mem(ptr, flags_byte);
|
|
204
192
|
uint8_t num_values;
|
|
205
|
-
ptr += copy_from_mem(ptr,
|
|
193
|
+
ptr += copy_from_mem(ptr, num_values);
|
|
206
194
|
uint16_t seed_hash;
|
|
207
|
-
ptr += copy_from_mem(ptr,
|
|
195
|
+
ptr += copy_from_mem(ptr, seed_hash);
|
|
208
196
|
checker<true>::check_serial_version(serial_version, SERIAL_VERSION);
|
|
209
197
|
checker<true>::check_sketch_family(family, SKETCH_FAMILY);
|
|
210
198
|
checker<true>::check_sketch_type(type, SKETCH_TYPE);
|
|
@@ -212,14 +200,13 @@ compact_array_of_doubles_sketch_alloc<A> compact_array_of_doubles_sketch_alloc<A
|
|
|
212
200
|
if (has_entries) checker<true>::check_seed_hash(seed_hash, compute_seed_hash(seed));
|
|
213
201
|
|
|
214
202
|
uint64_t theta;
|
|
215
|
-
ptr += copy_from_mem(ptr,
|
|
203
|
+
ptr += copy_from_mem(ptr, theta);
|
|
216
204
|
std::vector<Entry, AllocEntry> entries(allocator);
|
|
217
205
|
if (has_entries) {
|
|
218
206
|
ensure_minimum_memory(size, 24);
|
|
219
207
|
uint32_t num_entries;
|
|
220
|
-
ptr += copy_from_mem(ptr,
|
|
221
|
-
uint32_t
|
|
222
|
-
ptr += copy_from_mem(ptr, &unused32, sizeof(unused32));
|
|
208
|
+
ptr += copy_from_mem(ptr, num_entries);
|
|
209
|
+
ptr += sizeof(uint32_t); // unused
|
|
223
210
|
ensure_minimum_memory(size, 24 + (sizeof(uint64_t) + sizeof(double) * num_values) * num_entries);
|
|
224
211
|
entries.reserve(num_entries);
|
|
225
212
|
std::vector<uint64_t, AllocU64> keys(num_entries, 0, allocator);
|
|
@@ -61,7 +61,7 @@ public:
|
|
|
61
61
|
|
|
62
62
|
private:
|
|
63
63
|
// for builder
|
|
64
|
-
array_of_doubles_union_alloc(uint8_t lg_cur_size, uint8_t lg_nom_size, resize_factor rf, uint64_t theta, uint64_t seed, const Policy& policy, const Allocator& allocator);
|
|
64
|
+
array_of_doubles_union_alloc(uint8_t lg_cur_size, uint8_t lg_nom_size, resize_factor rf, float p, uint64_t theta, uint64_t seed, const Policy& policy, const Allocator& allocator);
|
|
65
65
|
};
|
|
66
66
|
|
|
67
67
|
template<typename Allocator>
|
|
@@ -20,8 +20,8 @@
|
|
|
20
20
|
namespace datasketches {
|
|
21
21
|
|
|
22
22
|
template<typename A>
|
|
23
|
-
array_of_doubles_union_alloc<A>::array_of_doubles_union_alloc(uint8_t lg_cur_size, uint8_t lg_nom_size, resize_factor rf, uint64_t theta, uint64_t seed, const Policy& policy, const A& allocator):
|
|
24
|
-
Base(lg_cur_size, lg_nom_size, rf, theta, seed, policy, allocator)
|
|
23
|
+
array_of_doubles_union_alloc<A>::array_of_doubles_union_alloc(uint8_t lg_cur_size, uint8_t lg_nom_size, resize_factor rf, float p, uint64_t theta, uint64_t seed, const Policy& policy, const A& allocator):
|
|
24
|
+
Base(lg_cur_size, lg_nom_size, rf, p, theta, seed, policy, allocator)
|
|
25
25
|
{}
|
|
26
26
|
|
|
27
27
|
template<typename A>
|
|
@@ -37,7 +37,7 @@ tuple_base_builder<builder, Policy, A>(policy, allocator) {}
|
|
|
37
37
|
|
|
38
38
|
template<typename A>
|
|
39
39
|
array_of_doubles_union_alloc<A> array_of_doubles_union_alloc<A>::builder::build() const {
|
|
40
|
-
return array_of_doubles_union_alloc<A>(this->starting_lg_size(), this->lg_k_, this->rf_, this->starting_theta(), this->seed_, this->policy_, this->allocator_);
|
|
40
|
+
return array_of_doubles_union_alloc<A>(this->starting_lg_size(), this->lg_k_, this->rf_, this->p_, this->starting_theta(), this->seed_, this->policy_, this->allocator_);
|
|
41
41
|
}
|
|
42
42
|
|
|
43
43
|
} /* namespace datasketches */
|
|
@@ -153,8 +153,7 @@ public:
|
|
|
153
153
|
virtual const_iterator end() const = 0;
|
|
154
154
|
|
|
155
155
|
protected:
|
|
156
|
-
|
|
157
|
-
virtual void print_specifics(ostrstream& os) const = 0;
|
|
156
|
+
virtual void print_specifics(std::ostringstream& os) const = 0;
|
|
158
157
|
|
|
159
158
|
static uint16_t get_seed_hash(uint64_t seed);
|
|
160
159
|
|
|
@@ -325,6 +324,11 @@ public:
|
|
|
325
324
|
*/
|
|
326
325
|
void trim();
|
|
327
326
|
|
|
327
|
+
/**
|
|
328
|
+
* Reset the sketch to the initial empty state
|
|
329
|
+
*/
|
|
330
|
+
void reset();
|
|
331
|
+
|
|
328
332
|
/**
|
|
329
333
|
* Converts this sketch to a compact sketch (ordered or unordered).
|
|
330
334
|
* @param ordered optional flag to specify if ordered sketch should be produced
|
|
@@ -342,10 +346,9 @@ protected:
|
|
|
342
346
|
tuple_map map_;
|
|
343
347
|
|
|
344
348
|
// for builder
|
|
345
|
-
update_tuple_sketch(uint8_t lg_cur_size, uint8_t lg_nom_size, resize_factor rf, uint64_t theta, uint64_t seed, const Policy& policy, const Allocator& allocator);
|
|
349
|
+
update_tuple_sketch(uint8_t lg_cur_size, uint8_t lg_nom_size, resize_factor rf, float p, uint64_t theta, uint64_t seed, const Policy& policy, const Allocator& allocator);
|
|
346
350
|
|
|
347
|
-
|
|
348
|
-
virtual void print_specifics(ostrstream& os) const;
|
|
351
|
+
virtual void print_specifics(std::ostringstream& os) const;
|
|
349
352
|
};
|
|
350
353
|
|
|
351
354
|
// compact sketch
|
|
@@ -367,9 +370,11 @@ public:
|
|
|
367
370
|
using vector_bytes = std::vector<uint8_t, AllocBytes>;
|
|
368
371
|
using comparator = compare_by_key<ExtractKey>;
|
|
369
372
|
|
|
370
|
-
static const uint8_t
|
|
373
|
+
static const uint8_t SERIAL_VERSION_LEGACY = 1;
|
|
374
|
+
static const uint8_t SERIAL_VERSION = 3;
|
|
371
375
|
static const uint8_t SKETCH_FAMILY = 9;
|
|
372
|
-
static const uint8_t SKETCH_TYPE =
|
|
376
|
+
static const uint8_t SKETCH_TYPE = 1;
|
|
377
|
+
static const uint8_t SKETCH_TYPE_LEGACY = 5;
|
|
373
378
|
enum flags { IS_BIG_ENDIAN, IS_READ_ONLY, IS_EMPTY, IS_COMPACT, IS_ORDERED };
|
|
374
379
|
|
|
375
380
|
// Instances of this type can be obtained:
|
|
@@ -473,8 +478,7 @@ protected:
|
|
|
473
478
|
bool destroy_;
|
|
474
479
|
};
|
|
475
480
|
|
|
476
|
-
|
|
477
|
-
virtual void print_specifics(ostrstream& os) const;
|
|
481
|
+
virtual void print_specifics(std::ostringstream& os) const;
|
|
478
482
|
|
|
479
483
|
};
|
|
480
484
|
|
|
@@ -53,7 +53,9 @@ double tuple_sketch<S, A>::get_upper_bound(uint8_t num_std_devs) const {
|
|
|
53
53
|
|
|
54
54
|
template<typename S, typename A>
|
|
55
55
|
string<A> tuple_sketch<S, A>::to_string(bool detail) const {
|
|
56
|
-
|
|
56
|
+
// Using a temporary stream for implementation here does not comply with AllocatorAwareContainer requirements.
|
|
57
|
+
// The stream does not support passing an allocator instance, and alternatives are complicated.
|
|
58
|
+
std::ostringstream os;
|
|
57
59
|
os << "### Tuple sketch summary:" << std::endl;
|
|
58
60
|
os << " num retained entries : " << get_num_retained() << std::endl;
|
|
59
61
|
os << " seed hash : " << get_seed_hash() << std::endl;
|
|
@@ -74,15 +76,15 @@ string<A> tuple_sketch<S, A>::to_string(bool detail) const {
|
|
|
74
76
|
}
|
|
75
77
|
os << "### End retained entries" << std::endl;
|
|
76
78
|
}
|
|
77
|
-
return os.str();
|
|
79
|
+
return string<A>(os.str().c_str(), get_allocator());
|
|
78
80
|
}
|
|
79
81
|
|
|
80
82
|
// update sketch
|
|
81
83
|
|
|
82
84
|
template<typename S, typename U, typename P, typename A>
|
|
83
|
-
update_tuple_sketch<S, U, P, A>::update_tuple_sketch(uint8_t lg_cur_size, uint8_t lg_nom_size, resize_factor rf, uint64_t theta, uint64_t seed, const P& policy, const A& allocator):
|
|
85
|
+
update_tuple_sketch<S, U, P, A>::update_tuple_sketch(uint8_t lg_cur_size, uint8_t lg_nom_size, resize_factor rf, float p, uint64_t theta, uint64_t seed, const P& policy, const A& allocator):
|
|
84
86
|
policy_(policy),
|
|
85
|
-
map_(lg_cur_size, lg_nom_size, rf, theta, seed, allocator)
|
|
87
|
+
map_(lg_cur_size, lg_nom_size, rf, p, theta, seed, allocator)
|
|
86
88
|
{}
|
|
87
89
|
|
|
88
90
|
template<typename S, typename U, typename P, typename A>
|
|
@@ -97,12 +99,12 @@ bool update_tuple_sketch<S, U, P, A>::is_empty() const {
|
|
|
97
99
|
|
|
98
100
|
template<typename S, typename U, typename P, typename A>
|
|
99
101
|
bool update_tuple_sketch<S, U, P, A>::is_ordered() const {
|
|
100
|
-
return false
|
|
102
|
+
return map_.num_entries_ > 1 ? false : true;;
|
|
101
103
|
}
|
|
102
104
|
|
|
103
105
|
template<typename S, typename U, typename P, typename A>
|
|
104
106
|
uint64_t update_tuple_sketch<S, U, P, A>::get_theta64() const {
|
|
105
|
-
return map_.theta_;
|
|
107
|
+
return is_empty() ? theta_constants::MAX_THETA : map_.theta_;
|
|
106
108
|
}
|
|
107
109
|
|
|
108
110
|
template<typename S, typename U, typename P, typename A>
|
|
@@ -212,6 +214,11 @@ void update_tuple_sketch<S, U, P, A>::trim() {
|
|
|
212
214
|
map_.trim();
|
|
213
215
|
}
|
|
214
216
|
|
|
217
|
+
template<typename S, typename U, typename P, typename A>
|
|
218
|
+
void update_tuple_sketch<S, U, P, A>::reset() {
|
|
219
|
+
map_.reset();
|
|
220
|
+
}
|
|
221
|
+
|
|
215
222
|
template<typename S, typename U, typename P, typename A>
|
|
216
223
|
auto update_tuple_sketch<S, U, P, A>::begin() -> iterator {
|
|
217
224
|
return iterator(map_.entries_, 1 << map_.lg_cur_size_, 0);
|
|
@@ -238,7 +245,7 @@ compact_tuple_sketch<S, A> update_tuple_sketch<S, U, P, A>::compact(bool ordered
|
|
|
238
245
|
}
|
|
239
246
|
|
|
240
247
|
template<typename S, typename U, typename P, typename A>
|
|
241
|
-
void update_tuple_sketch<S, U, P, A>::print_specifics(
|
|
248
|
+
void update_tuple_sketch<S, U, P, A>::print_specifics(std::ostringstream& os) const {
|
|
242
249
|
os << " lg nominal size : " << (int) map_.lg_nom_size_ << std::endl;
|
|
243
250
|
os << " lg current size : " << (int) map_.lg_cur_size_ << std::endl;
|
|
244
251
|
os << " resize factor : " << (1 << map_.rf_) << std::endl;
|
|
@@ -250,7 +257,7 @@ template<typename S, typename A>
|
|
|
250
257
|
compact_tuple_sketch<S, A>::compact_tuple_sketch(bool is_empty, bool is_ordered, uint16_t seed_hash, uint64_t theta,
|
|
251
258
|
std::vector<Entry, AllocEntry>&& entries):
|
|
252
259
|
is_empty_(is_empty),
|
|
253
|
-
is_ordered_(is_ordered),
|
|
260
|
+
is_ordered_(is_ordered || (entries.size() <= 1ULL)),
|
|
254
261
|
seed_hash_(seed_hash),
|
|
255
262
|
theta_(theta),
|
|
256
263
|
entries_(std::move(entries))
|
|
@@ -315,7 +322,7 @@ uint64_t compact_tuple_sketch<S, A>::get_theta64() const {
|
|
|
315
322
|
|
|
316
323
|
template<typename S, typename A>
|
|
317
324
|
uint32_t compact_tuple_sketch<S, A>::get_num_retained() const {
|
|
318
|
-
return entries_.size();
|
|
325
|
+
return static_cast<uint32_t>(entries_.size());
|
|
319
326
|
}
|
|
320
327
|
|
|
321
328
|
template<typename S, typename A>
|
|
@@ -347,36 +354,36 @@ template<typename SerDe>
|
|
|
347
354
|
void compact_tuple_sketch<S, A>::serialize(std::ostream& os, const SerDe& sd) const {
|
|
348
355
|
const bool is_single_item = entries_.size() == 1 && !this->is_estimation_mode();
|
|
349
356
|
const uint8_t preamble_longs = this->is_empty() || is_single_item ? 1 : this->is_estimation_mode() ? 3 : 2;
|
|
350
|
-
|
|
357
|
+
write(os, preamble_longs);
|
|
351
358
|
const uint8_t serial_version = SERIAL_VERSION;
|
|
352
|
-
|
|
359
|
+
write(os, serial_version);
|
|
353
360
|
const uint8_t family = SKETCH_FAMILY;
|
|
354
|
-
|
|
361
|
+
write(os, family);
|
|
355
362
|
const uint8_t type = SKETCH_TYPE;
|
|
356
|
-
|
|
363
|
+
write(os, type);
|
|
357
364
|
const uint8_t unused8 = 0;
|
|
358
|
-
|
|
365
|
+
write(os, unused8);
|
|
359
366
|
const uint8_t flags_byte(
|
|
360
367
|
(1 << flags::IS_COMPACT) |
|
|
361
368
|
(1 << flags::IS_READ_ONLY) |
|
|
362
369
|
(this->is_empty() ? 1 << flags::IS_EMPTY : 0) |
|
|
363
370
|
(this->is_ordered() ? 1 << flags::IS_ORDERED : 0)
|
|
364
371
|
);
|
|
365
|
-
|
|
372
|
+
write(os, flags_byte);
|
|
366
373
|
const uint16_t seed_hash = get_seed_hash();
|
|
367
|
-
|
|
374
|
+
write(os, seed_hash);
|
|
368
375
|
if (!this->is_empty()) {
|
|
369
376
|
if (!is_single_item) {
|
|
370
|
-
const uint32_t num_entries = entries_.size();
|
|
371
|
-
|
|
377
|
+
const uint32_t num_entries = static_cast<uint32_t>(entries_.size());
|
|
378
|
+
write(os, num_entries);
|
|
372
379
|
const uint32_t unused32 = 0;
|
|
373
|
-
|
|
380
|
+
write(os, unused32);
|
|
374
381
|
if (this->is_estimation_mode()) {
|
|
375
|
-
|
|
382
|
+
write(os, this->theta_);
|
|
376
383
|
}
|
|
377
384
|
}
|
|
378
385
|
for (const auto& it: entries_) {
|
|
379
|
-
|
|
386
|
+
write(os, it.first);
|
|
380
387
|
sd.serialize(os, &it.second, 1);
|
|
381
388
|
}
|
|
382
389
|
}
|
|
@@ -393,36 +400,34 @@ auto compact_tuple_sketch<S, A>::serialize(unsigned header_size_bytes, const Ser
|
|
|
393
400
|
uint8_t* ptr = bytes.data() + header_size_bytes;
|
|
394
401
|
const uint8_t* end_ptr = ptr + size;
|
|
395
402
|
|
|
396
|
-
ptr += copy_to_mem(
|
|
403
|
+
ptr += copy_to_mem(preamble_longs, ptr);
|
|
397
404
|
const uint8_t serial_version = SERIAL_VERSION;
|
|
398
|
-
ptr += copy_to_mem(
|
|
405
|
+
ptr += copy_to_mem(serial_version, ptr);
|
|
399
406
|
const uint8_t family = SKETCH_FAMILY;
|
|
400
|
-
ptr += copy_to_mem(
|
|
407
|
+
ptr += copy_to_mem(family, ptr);
|
|
401
408
|
const uint8_t type = SKETCH_TYPE;
|
|
402
|
-
ptr += copy_to_mem(
|
|
403
|
-
|
|
404
|
-
ptr += copy_to_mem(&unused8, ptr, sizeof(unused8));
|
|
409
|
+
ptr += copy_to_mem(type, ptr);
|
|
410
|
+
ptr += sizeof(uint8_t); // unused
|
|
405
411
|
const uint8_t flags_byte(
|
|
406
412
|
(1 << flags::IS_COMPACT) |
|
|
407
413
|
(1 << flags::IS_READ_ONLY) |
|
|
408
414
|
(this->is_empty() ? 1 << flags::IS_EMPTY : 0) |
|
|
409
415
|
(this->is_ordered() ? 1 << flags::IS_ORDERED : 0)
|
|
410
416
|
);
|
|
411
|
-
ptr += copy_to_mem(
|
|
417
|
+
ptr += copy_to_mem(flags_byte, ptr);
|
|
412
418
|
const uint16_t seed_hash = get_seed_hash();
|
|
413
|
-
ptr += copy_to_mem(
|
|
419
|
+
ptr += copy_to_mem(seed_hash, ptr);
|
|
414
420
|
if (!this->is_empty()) {
|
|
415
421
|
if (!is_single_item) {
|
|
416
|
-
const uint32_t num_entries = entries_.size();
|
|
417
|
-
ptr += copy_to_mem(
|
|
418
|
-
|
|
419
|
-
ptr += copy_to_mem(&unused32, ptr, sizeof(unused32));
|
|
422
|
+
const uint32_t num_entries = static_cast<uint32_t>(entries_.size());
|
|
423
|
+
ptr += copy_to_mem(num_entries, ptr);
|
|
424
|
+
ptr += sizeof(uint32_t); // unused
|
|
420
425
|
if (this->is_estimation_mode()) {
|
|
421
|
-
ptr += copy_to_mem(
|
|
426
|
+
ptr += copy_to_mem(theta_, ptr);
|
|
422
427
|
}
|
|
423
428
|
}
|
|
424
429
|
for (const auto& it: entries_) {
|
|
425
|
-
ptr += copy_to_mem(
|
|
430
|
+
ptr += copy_to_mem(it.first, ptr);
|
|
426
431
|
ptr += sd.serialize(ptr, end_ptr - ptr, &it.second, 1);
|
|
427
432
|
}
|
|
428
433
|
}
|
|
@@ -432,23 +437,22 @@ auto compact_tuple_sketch<S, A>::serialize(unsigned header_size_bytes, const Ser
|
|
|
432
437
|
template<typename S, typename A>
|
|
433
438
|
template<typename SerDe>
|
|
434
439
|
compact_tuple_sketch<S, A> compact_tuple_sketch<S, A>::deserialize(std::istream& is, uint64_t seed, const SerDe& sd, const A& allocator) {
|
|
435
|
-
|
|
436
|
-
|
|
437
|
-
uint8_t
|
|
438
|
-
|
|
439
|
-
uint8_t
|
|
440
|
-
|
|
441
|
-
|
|
442
|
-
|
|
443
|
-
|
|
444
|
-
|
|
445
|
-
|
|
446
|
-
is.read(reinterpret_cast<char*>(&flags_byte), sizeof(flags_byte));
|
|
447
|
-
uint16_t seed_hash;
|
|
448
|
-
is.read(reinterpret_cast<char*>(&seed_hash), sizeof(seed_hash));
|
|
449
|
-
checker<true>::check_serial_version(serial_version, SERIAL_VERSION);
|
|
440
|
+
const auto preamble_longs = read<uint8_t>(is);
|
|
441
|
+
const auto serial_version = read<uint8_t>(is);
|
|
442
|
+
const auto family = read<uint8_t>(is);
|
|
443
|
+
const auto type = read<uint8_t>(is);
|
|
444
|
+
read<uint8_t>(is); // unused
|
|
445
|
+
const auto flags_byte = read<uint8_t>(is);
|
|
446
|
+
const auto seed_hash = read<uint16_t>(is);
|
|
447
|
+
if (serial_version != SERIAL_VERSION && serial_version != SERIAL_VERSION_LEGACY) {
|
|
448
|
+
throw std::invalid_argument("serial version mismatch: expected " + std::to_string(SERIAL_VERSION) + " or "
|
|
449
|
+
+ std::to_string(SERIAL_VERSION_LEGACY) + ", actual " + std::to_string(serial_version));
|
|
450
|
+
}
|
|
450
451
|
checker<true>::check_sketch_family(family, SKETCH_FAMILY);
|
|
451
|
-
|
|
452
|
+
if (type != SKETCH_TYPE && type != SKETCH_TYPE_LEGACY) {
|
|
453
|
+
throw std::invalid_argument("sketch type mismatch: expected " + std::to_string(SKETCH_TYPE) + " or "
|
|
454
|
+
+ std::to_string(SKETCH_TYPE_LEGACY) + ", actual " + std::to_string(type));
|
|
455
|
+
}
|
|
452
456
|
const bool is_empty = flags_byte & (1 << flags::IS_EMPTY);
|
|
453
457
|
if (!is_empty) checker<true>::check_seed_hash(seed_hash, compute_seed_hash(seed));
|
|
454
458
|
|
|
@@ -458,11 +462,10 @@ compact_tuple_sketch<S, A> compact_tuple_sketch<S, A>::deserialize(std::istream&
|
|
|
458
462
|
if (preamble_longs == 1) {
|
|
459
463
|
num_entries = 1;
|
|
460
464
|
} else {
|
|
461
|
-
|
|
462
|
-
uint32_t
|
|
463
|
-
is.read(reinterpret_cast<char*>(&unused32), sizeof(unused32));
|
|
465
|
+
num_entries = read<uint32_t>(is);
|
|
466
|
+
read<uint32_t>(is); // unused
|
|
464
467
|
if (preamble_longs > 2) {
|
|
465
|
-
|
|
468
|
+
theta = read<uint64_t>(is);
|
|
466
469
|
}
|
|
467
470
|
}
|
|
468
471
|
}
|
|
@@ -472,8 +475,7 @@ compact_tuple_sketch<S, A> compact_tuple_sketch<S, A>::deserialize(std::istream&
|
|
|
472
475
|
entries.reserve(num_entries);
|
|
473
476
|
std::unique_ptr<S, deleter_of_summaries> summary(alloc.allocate(1), deleter_of_summaries(1, false, allocator));
|
|
474
477
|
for (size_t i = 0; i < num_entries; ++i) {
|
|
475
|
-
|
|
476
|
-
is.read(reinterpret_cast<char*>(&key), sizeof(uint64_t));
|
|
478
|
+
const auto key = read<uint64_t>(is);
|
|
477
479
|
sd.deserialize(is, summary.get(), 1);
|
|
478
480
|
entries.push_back(Entry(key, std::move(*summary)));
|
|
479
481
|
(*summary).~S();
|
|
@@ -491,22 +493,27 @@ compact_tuple_sketch<S, A> compact_tuple_sketch<S, A>::deserialize(const void* b
|
|
|
491
493
|
const char* ptr = static_cast<const char*>(bytes);
|
|
492
494
|
const char* base = ptr;
|
|
493
495
|
uint8_t preamble_longs;
|
|
494
|
-
ptr += copy_from_mem(ptr,
|
|
496
|
+
ptr += copy_from_mem(ptr, preamble_longs);
|
|
495
497
|
uint8_t serial_version;
|
|
496
|
-
ptr += copy_from_mem(ptr,
|
|
498
|
+
ptr += copy_from_mem(ptr, serial_version);
|
|
497
499
|
uint8_t family;
|
|
498
|
-
ptr += copy_from_mem(ptr,
|
|
500
|
+
ptr += copy_from_mem(ptr, family);
|
|
499
501
|
uint8_t type;
|
|
500
|
-
ptr += copy_from_mem(ptr,
|
|
501
|
-
uint8_t
|
|
502
|
-
ptr += copy_from_mem(ptr, &unused8, sizeof(unused8));
|
|
502
|
+
ptr += copy_from_mem(ptr, type);
|
|
503
|
+
ptr += sizeof(uint8_t); // unused
|
|
503
504
|
uint8_t flags_byte;
|
|
504
|
-
ptr += copy_from_mem(ptr,
|
|
505
|
+
ptr += copy_from_mem(ptr, flags_byte);
|
|
505
506
|
uint16_t seed_hash;
|
|
506
|
-
ptr += copy_from_mem(ptr,
|
|
507
|
-
|
|
507
|
+
ptr += copy_from_mem(ptr, seed_hash);
|
|
508
|
+
if (serial_version != SERIAL_VERSION && serial_version != SERIAL_VERSION_LEGACY) {
|
|
509
|
+
throw std::invalid_argument("serial version mismatch: expected " + std::to_string(SERIAL_VERSION) + " or "
|
|
510
|
+
+ std::to_string(SERIAL_VERSION_LEGACY) + ", actual " + std::to_string(serial_version));
|
|
511
|
+
}
|
|
508
512
|
checker<true>::check_sketch_family(family, SKETCH_FAMILY);
|
|
509
|
-
|
|
513
|
+
if (type != SKETCH_TYPE && type != SKETCH_TYPE_LEGACY) {
|
|
514
|
+
throw std::invalid_argument("sketch type mismatch: expected " + std::to_string(SKETCH_TYPE) + " or "
|
|
515
|
+
+ std::to_string(SKETCH_TYPE_LEGACY) + ", actual " + std::to_string(type));
|
|
516
|
+
}
|
|
510
517
|
const bool is_empty = flags_byte & (1 << flags::IS_EMPTY);
|
|
511
518
|
if (!is_empty) checker<true>::check_seed_hash(seed_hash, compute_seed_hash(seed));
|
|
512
519
|
|
|
@@ -518,12 +525,11 @@ compact_tuple_sketch<S, A> compact_tuple_sketch<S, A>::deserialize(const void* b
|
|
|
518
525
|
num_entries = 1;
|
|
519
526
|
} else {
|
|
520
527
|
ensure_minimum_memory(size, 8); // read the first prelong before this method
|
|
521
|
-
ptr += copy_from_mem(ptr,
|
|
522
|
-
uint32_t
|
|
523
|
-
ptr += copy_from_mem(ptr, &unused32, sizeof(unused32));
|
|
528
|
+
ptr += copy_from_mem(ptr, num_entries);
|
|
529
|
+
ptr += sizeof(uint32_t); // unused
|
|
524
530
|
if (preamble_longs > 2) {
|
|
525
531
|
ensure_minimum_memory(size, (preamble_longs - 1) << 3);
|
|
526
|
-
ptr += copy_from_mem(ptr,
|
|
532
|
+
ptr += copy_from_mem(ptr, theta);
|
|
527
533
|
}
|
|
528
534
|
}
|
|
529
535
|
}
|
|
@@ -536,7 +542,7 @@ compact_tuple_sketch<S, A> compact_tuple_sketch<S, A>::deserialize(const void* b
|
|
|
536
542
|
std::unique_ptr<S, deleter_of_summaries> summary(alloc.allocate(1), deleter_of_summaries(1, false, allocator));
|
|
537
543
|
for (size_t i = 0; i < num_entries; ++i) {
|
|
538
544
|
uint64_t key;
|
|
539
|
-
ptr += copy_from_mem(ptr,
|
|
545
|
+
ptr += copy_from_mem(ptr, key);
|
|
540
546
|
ptr += sd.deserialize(ptr, base + size - ptr, summary.get(), 1);
|
|
541
547
|
entries.push_back(Entry(key, std::move(*summary)));
|
|
542
548
|
(*summary).~S();
|
|
@@ -548,26 +554,26 @@ compact_tuple_sketch<S, A> compact_tuple_sketch<S, A>::deserialize(const void* b
|
|
|
548
554
|
|
|
549
555
|
template<typename S, typename A>
|
|
550
556
|
auto compact_tuple_sketch<S, A>::begin() -> iterator {
|
|
551
|
-
return iterator(entries_.data(), entries_.size(), 0);
|
|
557
|
+
return iterator(entries_.data(), static_cast<uint32_t>(entries_.size()), 0);
|
|
552
558
|
}
|
|
553
559
|
|
|
554
560
|
template<typename S, typename A>
|
|
555
561
|
auto compact_tuple_sketch<S, A>::end() -> iterator {
|
|
556
|
-
return iterator(nullptr, 0, entries_.size());
|
|
562
|
+
return iterator(nullptr, 0, static_cast<uint32_t>(entries_.size()));
|
|
557
563
|
}
|
|
558
564
|
|
|
559
565
|
template<typename S, typename A>
|
|
560
566
|
auto compact_tuple_sketch<S, A>::begin() const -> const_iterator {
|
|
561
|
-
return const_iterator(entries_.data(), entries_.size(), 0);
|
|
567
|
+
return const_iterator(entries_.data(), static_cast<uint32_t>(entries_.size()), 0);
|
|
562
568
|
}
|
|
563
569
|
|
|
564
570
|
template<typename S, typename A>
|
|
565
571
|
auto compact_tuple_sketch<S, A>::end() const -> const_iterator {
|
|
566
|
-
return const_iterator(nullptr, 0, entries_.size());
|
|
572
|
+
return const_iterator(nullptr, 0, static_cast<uint32_t>(entries_.size()));
|
|
567
573
|
}
|
|
568
574
|
|
|
569
575
|
template<typename S, typename A>
|
|
570
|
-
void compact_tuple_sketch<S, A>::print_specifics(
|
|
576
|
+
void compact_tuple_sketch<S, A>::print_specifics(std::ostringstream&) const {}
|
|
571
577
|
|
|
572
578
|
// builder
|
|
573
579
|
|
|
@@ -581,7 +587,7 @@ tuple_base_builder<builder, P, A>(policy, allocator) {}
|
|
|
581
587
|
|
|
582
588
|
template<typename S, typename U, typename P, typename A>
|
|
583
589
|
auto update_tuple_sketch<S, U, P, A>::builder::build() const -> update_tuple_sketch {
|
|
584
|
-
return update_tuple_sketch(this->starting_lg_size(), this->lg_k_, this->rf_, this->starting_theta(), this->seed_, this->policy_, this->allocator_);
|
|
590
|
+
return update_tuple_sketch(this->starting_lg_size(), this->lg_k_, this->rf_, this->p_, this->starting_theta(), this->seed_, this->policy_, this->allocator_);
|
|
585
591
|
}
|
|
586
592
|
|
|
587
593
|
} /* namespace datasketches */
|