datasketches 0.2.1 → 0.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +4 -0
- data/lib/datasketches/version.rb +1 -1
- data/vendor/datasketches-cpp/CMakeLists.txt +7 -0
- data/vendor/datasketches-cpp/common/include/MurmurHash3.h +11 -7
- data/vendor/datasketches-cpp/common/include/binomial_bounds.hpp +8 -8
- data/vendor/datasketches-cpp/common/include/bounds_binomial_proportions.hpp +12 -15
- data/vendor/datasketches-cpp/common/include/common_defs.hpp +24 -0
- data/vendor/datasketches-cpp/common/include/conditional_forward.hpp +20 -8
- data/vendor/datasketches-cpp/common/include/count_zeros.hpp +2 -2
- data/vendor/datasketches-cpp/common/include/serde.hpp +7 -7
- data/vendor/datasketches-cpp/cpc/include/cpc_compressor.hpp +19 -19
- data/vendor/datasketches-cpp/cpc/include/cpc_compressor_impl.hpp +91 -89
- data/vendor/datasketches-cpp/cpc/include/cpc_sketch.hpp +14 -1
- data/vendor/datasketches-cpp/cpc/include/cpc_sketch_impl.hpp +121 -87
- data/vendor/datasketches-cpp/cpc/include/cpc_union_impl.hpp +14 -14
- data/vendor/datasketches-cpp/cpc/include/cpc_util.hpp +10 -10
- data/vendor/datasketches-cpp/cpc/include/icon_estimator.hpp +4 -4
- data/vendor/datasketches-cpp/cpc/include/u32_table.hpp +8 -8
- data/vendor/datasketches-cpp/cpc/include/u32_table_impl.hpp +14 -14
- data/vendor/datasketches-cpp/cpc/test/compression_test.cpp +10 -10
- data/vendor/datasketches-cpp/cpc/test/cpc_sketch_test.cpp +25 -0
- data/vendor/datasketches-cpp/cpc/test/cpc_union_test.cpp +1 -1
- data/vendor/datasketches-cpp/fi/include/frequent_items_sketch_impl.hpp +65 -80
- data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map_impl.hpp +10 -10
- data/vendor/datasketches-cpp/fi/test/reverse_purge_hash_map_test.cpp +2 -2
- data/vendor/datasketches-cpp/hll/include/AuxHashMap-internal.hpp +60 -63
- data/vendor/datasketches-cpp/hll/include/AuxHashMap.hpp +19 -19
- data/vendor/datasketches-cpp/hll/include/CompositeInterpolationXTable-internal.hpp +15 -15
- data/vendor/datasketches-cpp/hll/include/CompositeInterpolationXTable.hpp +3 -3
- data/vendor/datasketches-cpp/hll/include/CouponHashSet-internal.hpp +74 -76
- data/vendor/datasketches-cpp/hll/include/CouponHashSet.hpp +6 -6
- data/vendor/datasketches-cpp/hll/include/CouponList-internal.hpp +110 -113
- data/vendor/datasketches-cpp/hll/include/CouponList.hpp +13 -13
- data/vendor/datasketches-cpp/hll/include/CubicInterpolation-internal.hpp +2 -4
- data/vendor/datasketches-cpp/hll/include/HarmonicNumbers-internal.hpp +1 -1
- data/vendor/datasketches-cpp/hll/include/Hll4Array-internal.hpp +80 -76
- data/vendor/datasketches-cpp/hll/include/Hll4Array.hpp +9 -9
- data/vendor/datasketches-cpp/hll/include/Hll6Array-internal.hpp +26 -26
- data/vendor/datasketches-cpp/hll/include/Hll6Array.hpp +6 -6
- data/vendor/datasketches-cpp/hll/include/Hll8Array-internal.hpp +33 -33
- data/vendor/datasketches-cpp/hll/include/Hll8Array.hpp +6 -6
- data/vendor/datasketches-cpp/hll/include/HllArray-internal.hpp +205 -209
- data/vendor/datasketches-cpp/hll/include/HllArray.hpp +36 -36
- data/vendor/datasketches-cpp/hll/include/HllSketch-internal.hpp +28 -28
- data/vendor/datasketches-cpp/hll/include/HllSketchImpl-internal.hpp +22 -22
- data/vendor/datasketches-cpp/hll/include/HllSketchImpl.hpp +13 -13
- data/vendor/datasketches-cpp/hll/include/HllSketchImplFactory.hpp +15 -15
- data/vendor/datasketches-cpp/hll/include/HllUnion-internal.hpp +61 -61
- data/vendor/datasketches-cpp/hll/include/HllUtil.hpp +120 -127
- data/vendor/datasketches-cpp/hll/include/coupon_iterator-internal.hpp +9 -9
- data/vendor/datasketches-cpp/hll/include/coupon_iterator.hpp +5 -5
- data/vendor/datasketches-cpp/hll/include/hll.hpp +21 -21
- data/vendor/datasketches-cpp/hll/test/AuxHashMapTest.cpp +1 -1
- data/vendor/datasketches-cpp/hll/test/CouponHashSetTest.cpp +34 -34
- data/vendor/datasketches-cpp/hll/test/CouponListTest.cpp +25 -25
- data/vendor/datasketches-cpp/hll/test/CrossCountingTest.cpp +2 -2
- data/vendor/datasketches-cpp/hll/test/HllArrayTest.cpp +35 -35
- data/vendor/datasketches-cpp/hll/test/HllSketchTest.cpp +15 -15
- data/vendor/datasketches-cpp/hll/test/HllUnionTest.cpp +10 -14
- data/vendor/datasketches-cpp/hll/test/IsomorphicTest.cpp +3 -3
- data/vendor/datasketches-cpp/hll/test/ToFromByteArrayTest.cpp +4 -4
- data/vendor/datasketches-cpp/kll/include/kll_helper.hpp +5 -4
- data/vendor/datasketches-cpp/kll/include/kll_helper_impl.hpp +6 -6
- data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator.hpp +14 -6
- data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator_impl.hpp +39 -24
- data/vendor/datasketches-cpp/kll/include/kll_sketch.hpp +34 -2
- data/vendor/datasketches-cpp/kll/include/kll_sketch_impl.hpp +72 -62
- data/vendor/datasketches-cpp/kll/include/kolmogorov_smirnov.hpp +67 -0
- data/vendor/datasketches-cpp/kll/include/kolmogorov_smirnov_impl.hpp +78 -0
- data/vendor/datasketches-cpp/kll/test/CMakeLists.txt +1 -0
- data/vendor/datasketches-cpp/kll/test/kll_sketch_test.cpp +68 -45
- data/vendor/datasketches-cpp/kll/test/kolmogorov_smirnov_test.cpp +111 -0
- data/vendor/datasketches-cpp/python/src/hll_wrapper.cpp +4 -4
- data/vendor/datasketches-cpp/python/src/theta_wrapper.cpp +6 -6
- data/vendor/datasketches-cpp/python/src/vo_wrapper.cpp +2 -2
- data/vendor/datasketches-cpp/python/tests/hll_test.py +1 -1
- data/vendor/datasketches-cpp/python/tests/vo_test.py +3 -3
- data/vendor/datasketches-cpp/req/include/req_common.hpp +2 -1
- data/vendor/datasketches-cpp/req/include/req_compactor.hpp +4 -4
- data/vendor/datasketches-cpp/req/include/req_compactor_impl.hpp +26 -39
- data/vendor/datasketches-cpp/req/include/req_sketch.hpp +1 -1
- data/vendor/datasketches-cpp/req/include/req_sketch_impl.hpp +9 -9
- data/vendor/datasketches-cpp/req/test/req_sketch_test.cpp +52 -52
- data/vendor/datasketches-cpp/sampling/include/var_opt_sketch_impl.hpp +47 -56
- data/vendor/datasketches-cpp/sampling/include/var_opt_union_impl.hpp +34 -42
- data/vendor/datasketches-cpp/sampling/test/var_opt_sketch_test.cpp +6 -6
- data/vendor/datasketches-cpp/sampling/test/var_opt_union_test.cpp +13 -13
- data/vendor/datasketches-cpp/setup.py +1 -1
- data/vendor/datasketches-cpp/theta/include/bounds_on_ratios_in_sampled_sets.hpp +1 -1
- data/vendor/datasketches-cpp/theta/include/compact_theta_sketch_parser.hpp +67 -0
- data/vendor/datasketches-cpp/theta/include/compact_theta_sketch_parser_impl.hpp +70 -0
- data/vendor/datasketches-cpp/theta/include/theta_intersection.hpp +9 -4
- data/vendor/datasketches-cpp/theta/include/theta_intersection_impl.hpp +1 -1
- data/vendor/datasketches-cpp/theta/include/theta_jaccard_similarity_base.hpp +18 -14
- data/vendor/datasketches-cpp/theta/include/theta_sketch.hpp +42 -1
- data/vendor/datasketches-cpp/theta/include/theta_sketch_impl.hpp +107 -58
- data/vendor/datasketches-cpp/theta/include/theta_union.hpp +4 -4
- data/vendor/datasketches-cpp/theta/include/theta_union_base_impl.hpp +1 -1
- data/vendor/datasketches-cpp/theta/include/theta_union_impl.hpp +1 -1
- data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base.hpp +2 -0
- data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base_impl.hpp +33 -28
- data/vendor/datasketches-cpp/theta/test/theta_a_not_b_test.cpp +23 -1
- data/vendor/datasketches-cpp/theta/test/theta_intersection_test.cpp +21 -1
- data/vendor/datasketches-cpp/theta/test/theta_jaccard_similarity_test.cpp +58 -2
- data/vendor/datasketches-cpp/theta/test/theta_sketch_test.cpp +37 -1
- data/vendor/datasketches-cpp/theta/test/theta_union_test.cpp +22 -2
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_sketch_impl.hpp +47 -60
- data/vendor/datasketches-cpp/tuple/include/tuple_sketch_impl.hpp +51 -64
- data/vendor/datasketches-cpp/tuple/test/array_of_doubles_sketch_test.cpp +1 -1
- data/vendor/datasketches-cpp/tuple/test/tuple_a_not_b_test.cpp +17 -17
- data/vendor/datasketches-cpp/tuple/test/tuple_intersection_test.cpp +12 -12
- data/vendor/datasketches-cpp/tuple/test/tuple_jaccard_similarity_test.cpp +5 -5
- data/vendor/datasketches-cpp/tuple/test/tuple_sketch_allocation_test.cpp +1 -1
- data/vendor/datasketches-cpp/tuple/test/tuple_sketch_test.cpp +20 -20
- data/vendor/datasketches-cpp/tuple/test/tuple_union_test.cpp +12 -12
- metadata +8 -3
|
@@ -79,7 +79,7 @@ class CMakeBuild(build_ext):
|
|
|
79
79
|
|
|
80
80
|
setup(
|
|
81
81
|
name='datasketches',
|
|
82
|
-
version='3.
|
|
82
|
+
version='3.1.0',
|
|
83
83
|
author='Apache DataSketches Developers',
|
|
84
84
|
author_email='dev@datasketches.apache.org',
|
|
85
85
|
description='A wrapper for the C++ Apache DataSketches library',
|
|
@@ -90,7 +90,7 @@ public:
|
|
|
90
90
|
* @param f the inclusion probability used to produce the set with size <i>a</i>.
|
|
91
91
|
* @return the approximate lower bound
|
|
92
92
|
*/
|
|
93
|
-
static double estimate_of_a(uint64_t a,
|
|
93
|
+
static double estimate_of_a(uint64_t a, double f) {
|
|
94
94
|
check_inputs(a, 1, f);
|
|
95
95
|
return a / f;
|
|
96
96
|
}
|
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* Licensed to the Apache Software Foundation (ASF) under one
|
|
3
|
+
* or more contributor license agreements. See the NOTICE file
|
|
4
|
+
* distributed with this work for additional information
|
|
5
|
+
* regarding copyright ownership. The ASF licenses this file
|
|
6
|
+
* to you under the Apache License, Version 2.0 (the
|
|
7
|
+
* "License"); you may not use this file except in compliance
|
|
8
|
+
* with the License. You may obtain a copy of the License at
|
|
9
|
+
*
|
|
10
|
+
* http://www.apache.org/licenses/LICENSE-2.0
|
|
11
|
+
*
|
|
12
|
+
* Unless required by applicable law or agreed to in writing,
|
|
13
|
+
* software distributed under the License is distributed on an
|
|
14
|
+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
15
|
+
* KIND, either express or implied. See the License for the
|
|
16
|
+
* specific language governing permissions and limitations
|
|
17
|
+
* under the License.
|
|
18
|
+
*/
|
|
19
|
+
|
|
20
|
+
#ifndef COMPACT_THETA_SKETCH_PARSER_HPP_
|
|
21
|
+
#define COMPACT_THETA_SKETCH_PARSER_HPP_
|
|
22
|
+
|
|
23
|
+
#include <stdint.h>
|
|
24
|
+
|
|
25
|
+
namespace datasketches {
|
|
26
|
+
|
|
27
|
+
template<bool dummy>
|
|
28
|
+
class compact_theta_sketch_parser {
|
|
29
|
+
public:
|
|
30
|
+
struct compact_theta_sketch_data {
|
|
31
|
+
bool is_empty;
|
|
32
|
+
bool is_ordered;
|
|
33
|
+
uint16_t seed_hash;
|
|
34
|
+
uint32_t num_entries;
|
|
35
|
+
uint64_t theta;
|
|
36
|
+
const uint64_t* entries;
|
|
37
|
+
};
|
|
38
|
+
|
|
39
|
+
static compact_theta_sketch_data parse(const void* ptr, size_t size, uint64_t seed, bool dump_on_error = false);
|
|
40
|
+
|
|
41
|
+
private:
|
|
42
|
+
// offsets are in sizeof(type)
|
|
43
|
+
static const size_t COMPACT_SKETCH_PRE_LONGS_BYTE = 0;
|
|
44
|
+
static const size_t COMPACT_SKETCH_SERIAL_VERSION_BYTE = 1;
|
|
45
|
+
static const size_t COMPACT_SKETCH_TYPE_BYTE = 2;
|
|
46
|
+
static const size_t COMPACT_SKETCH_FLAGS_BYTE = 5;
|
|
47
|
+
static const size_t COMPACT_SKETCH_SEED_HASH_U16 = 3;
|
|
48
|
+
static const size_t COMPACT_SKETCH_NUM_ENTRIES_U32 = 2;
|
|
49
|
+
static const size_t COMPACT_SKETCH_SINGLE_ENTRY_U64 = 1;
|
|
50
|
+
static const size_t COMPACT_SKETCH_ENTRIES_EXACT_U64 = 2;
|
|
51
|
+
static const size_t COMPACT_SKETCH_THETA_U64 = 2;
|
|
52
|
+
static const size_t COMPACT_SKETCH_ENTRIES_ESTIMATION_U64 = 3;
|
|
53
|
+
|
|
54
|
+
static const uint8_t COMPACT_SKETCH_IS_EMPTY_FLAG = 2;
|
|
55
|
+
static const uint8_t COMPACT_SKETCH_IS_ORDERED_FLAG = 4;
|
|
56
|
+
|
|
57
|
+
static const uint8_t COMPACT_SKETCH_SERIAL_VERSION = 3;
|
|
58
|
+
static const uint8_t COMPACT_SKETCH_TYPE = 3;
|
|
59
|
+
|
|
60
|
+
static std::string hex_dump(const uint8_t* ptr, size_t size);
|
|
61
|
+
};
|
|
62
|
+
|
|
63
|
+
} /* namespace datasketches */
|
|
64
|
+
|
|
65
|
+
#include "compact_theta_sketch_parser_impl.hpp"
|
|
66
|
+
|
|
67
|
+
#endif
|
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* Licensed to the Apache Software Foundation (ASF) under one
|
|
3
|
+
* or more contributor license agreements. See the NOTICE file
|
|
4
|
+
* distributed with this work for additional information
|
|
5
|
+
* regarding copyright ownership. The ASF licenses this file
|
|
6
|
+
* to you under the Apache License, Version 2.0 (the
|
|
7
|
+
* "License"); you may not use this file except in compliance
|
|
8
|
+
* with the License. You may obtain a copy of the License at
|
|
9
|
+
*
|
|
10
|
+
* http://www.apache.org/licenses/LICENSE-2.0
|
|
11
|
+
*
|
|
12
|
+
* Unless required by applicable law or agreed to in writing,
|
|
13
|
+
* software distributed under the License is distributed on an
|
|
14
|
+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
15
|
+
* KIND, either express or implied. See the License for the
|
|
16
|
+
* specific language governing permissions and limitations
|
|
17
|
+
* under the License.
|
|
18
|
+
*/
|
|
19
|
+
|
|
20
|
+
#ifndef COMPACT_THETA_SKETCH_PARSER_IMPL_HPP_
|
|
21
|
+
#define COMPACT_THETA_SKETCH_PARSER_IMPL_HPP_
|
|
22
|
+
|
|
23
|
+
#include <iostream>
|
|
24
|
+
#include <iomanip>
|
|
25
|
+
|
|
26
|
+
namespace datasketches {
|
|
27
|
+
|
|
28
|
+
template<bool dummy>
|
|
29
|
+
auto compact_theta_sketch_parser<dummy>::parse(const void* ptr, size_t size, uint64_t seed, bool dump_on_error) -> compact_theta_sketch_data {
|
|
30
|
+
if (size < 8) throw std::invalid_argument("at least 8 bytes expected, actual " + std::to_string(size)
|
|
31
|
+
+ (dump_on_error ? (", sketch dump: " + hex_dump(reinterpret_cast<const uint8_t*>(ptr), size)) : ""));
|
|
32
|
+
checker<true>::check_serial_version(reinterpret_cast<const uint8_t*>(ptr)[COMPACT_SKETCH_SERIAL_VERSION_BYTE], COMPACT_SKETCH_SERIAL_VERSION);
|
|
33
|
+
checker<true>::check_sketch_type(reinterpret_cast<const uint8_t*>(ptr)[COMPACT_SKETCH_TYPE_BYTE], COMPACT_SKETCH_TYPE);
|
|
34
|
+
uint64_t theta = theta_constants::MAX_THETA;
|
|
35
|
+
const uint16_t seed_hash = reinterpret_cast<const uint16_t*>(ptr)[COMPACT_SKETCH_SEED_HASH_U16];
|
|
36
|
+
checker<true>::check_seed_hash(seed_hash, compute_seed_hash(seed));
|
|
37
|
+
if (reinterpret_cast<const uint8_t*>(ptr)[COMPACT_SKETCH_FLAGS_BYTE] & (1 << COMPACT_SKETCH_IS_EMPTY_FLAG)) {
|
|
38
|
+
return {true, true, seed_hash, 0, theta, nullptr};
|
|
39
|
+
}
|
|
40
|
+
const bool has_theta = reinterpret_cast<const uint8_t*>(ptr)[COMPACT_SKETCH_PRE_LONGS_BYTE] > 2;
|
|
41
|
+
if (has_theta) {
|
|
42
|
+
if (size < 16) throw std::invalid_argument("at least 16 bytes expected, actual " + std::to_string(size));
|
|
43
|
+
theta = reinterpret_cast<const uint64_t*>(ptr)[COMPACT_SKETCH_THETA_U64];
|
|
44
|
+
}
|
|
45
|
+
if (reinterpret_cast<const uint8_t*>(ptr)[COMPACT_SKETCH_PRE_LONGS_BYTE] == 1) {
|
|
46
|
+
return {false, true, seed_hash, 1, theta, reinterpret_cast<const uint64_t*>(ptr) + COMPACT_SKETCH_SINGLE_ENTRY_U64};
|
|
47
|
+
}
|
|
48
|
+
const uint32_t num_entries = reinterpret_cast<const uint32_t*>(ptr)[COMPACT_SKETCH_NUM_ENTRIES_U32];
|
|
49
|
+
const size_t entries_start_u64 = has_theta ? COMPACT_SKETCH_ENTRIES_ESTIMATION_U64 : COMPACT_SKETCH_ENTRIES_EXACT_U64;
|
|
50
|
+
const uint64_t* entries = reinterpret_cast<const uint64_t*>(ptr) + entries_start_u64;
|
|
51
|
+
const size_t expected_size_bytes = (entries_start_u64 + num_entries) * sizeof(uint64_t);
|
|
52
|
+
if (size < expected_size_bytes) {
|
|
53
|
+
throw std::invalid_argument(std::to_string(expected_size_bytes) + " bytes expected, actual " + std::to_string(size)
|
|
54
|
+
+ (dump_on_error ? (", sketch dump: " + hex_dump(reinterpret_cast<const uint8_t*>(ptr), size)) : ""));
|
|
55
|
+
}
|
|
56
|
+
const bool is_ordered = reinterpret_cast<const uint8_t*>(ptr)[COMPACT_SKETCH_FLAGS_BYTE] & (1 << COMPACT_SKETCH_IS_ORDERED_FLAG);
|
|
57
|
+
return {false, is_ordered, seed_hash, num_entries, theta, entries};
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
template<bool dummy>
|
|
61
|
+
std::string compact_theta_sketch_parser<dummy>::hex_dump(const uint8_t* ptr, size_t size) {
|
|
62
|
+
std::stringstream s;
|
|
63
|
+
s << std::hex << std::setfill('0') << std::uppercase;
|
|
64
|
+
for (size_t i = 0; i < size; ++i) s << std::setw(2) << (ptr[i] & 0xff);
|
|
65
|
+
return s.str();
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
} /* namespace datasketches */
|
|
69
|
+
|
|
70
|
+
#endif
|
|
@@ -33,14 +33,19 @@ public:
|
|
|
33
33
|
using Sketch = theta_sketch_alloc<Allocator>;
|
|
34
34
|
using CompactSketch = compact_theta_sketch_alloc<Allocator>;
|
|
35
35
|
|
|
36
|
-
struct
|
|
37
|
-
|
|
36
|
+
struct nop_policy {
|
|
37
|
+
void operator()(uint64_t internal_entry, uint64_t incoming_entry) const {
|
|
38
38
|
unused(incoming_entry);
|
|
39
|
-
|
|
39
|
+
unused(internal_entry);
|
|
40
40
|
}
|
|
41
41
|
};
|
|
42
|
-
using State = theta_intersection_base<Entry, ExtractKey,
|
|
42
|
+
using State = theta_intersection_base<Entry, ExtractKey, nop_policy, Sketch, CompactSketch, Allocator>;
|
|
43
43
|
|
|
44
|
+
/*
|
|
45
|
+
* Constructor
|
|
46
|
+
* @param seed for the hash function that was used to create the sketch
|
|
47
|
+
* @param allocator to use for allocating and deallocating memory
|
|
48
|
+
*/
|
|
44
49
|
explicit theta_intersection_alloc(uint64_t seed = DEFAULT_SEED, const Allocator& allocator = Allocator());
|
|
45
50
|
|
|
46
51
|
/**
|
|
@@ -24,7 +24,7 @@ namespace datasketches {
|
|
|
24
24
|
|
|
25
25
|
template<typename A>
|
|
26
26
|
theta_intersection_alloc<A>::theta_intersection_alloc(uint64_t seed, const A& allocator):
|
|
27
|
-
state_(seed,
|
|
27
|
+
state_(seed, nop_policy(), allocator)
|
|
28
28
|
{}
|
|
29
29
|
|
|
30
30
|
template<typename A>
|
|
@@ -46,20 +46,21 @@ public:
|
|
|
46
46
|
*
|
|
47
47
|
* @param sketch_a given sketch A
|
|
48
48
|
* @param sketch_b given sketch B
|
|
49
|
+
* @param seed for the hash function that was used to create the sketch
|
|
49
50
|
* @return a double array {LowerBound, Estimate, UpperBound} of the Jaccard index.
|
|
50
51
|
* The Upper and Lower bounds are for a confidence interval of 95.4% or +/- 2 standard deviations.
|
|
51
52
|
*/
|
|
52
53
|
template<typename SketchA, typename SketchB>
|
|
53
|
-
static std::array<double, 3> jaccard(const SketchA& sketch_a, const SketchB& sketch_b) {
|
|
54
|
+
static std::array<double, 3> jaccard(const SketchA& sketch_a, const SketchB& sketch_b, uint64_t seed = DEFAULT_SEED) {
|
|
54
55
|
if (reinterpret_cast<const void*>(&sketch_a) == reinterpret_cast<const void*>(&sketch_b)) return {1, 1, 1};
|
|
55
56
|
if (sketch_a.is_empty() && sketch_b.is_empty()) return {1, 1, 1};
|
|
56
57
|
if (sketch_a.is_empty() || sketch_b.is_empty()) return {0, 0, 0};
|
|
57
58
|
|
|
58
|
-
auto union_ab = compute_union(sketch_a, sketch_b);
|
|
59
|
+
auto union_ab = compute_union(sketch_a, sketch_b, seed);
|
|
59
60
|
if (identical_sets(sketch_a, sketch_b, union_ab)) return {1, 1, 1};
|
|
60
61
|
|
|
61
62
|
// intersection
|
|
62
|
-
Intersection i;
|
|
63
|
+
Intersection i(seed);
|
|
63
64
|
i.update(sketch_a);
|
|
64
65
|
i.update(sketch_b);
|
|
65
66
|
i.update(union_ab); // ensures that intersection is a subset of the union
|
|
@@ -76,15 +77,16 @@ public:
|
|
|
76
77
|
* Returns true if the two given sketches are equivalent.
|
|
77
78
|
* @param sketch_a the given sketch A
|
|
78
79
|
* @param sketch_b the given sketch B
|
|
80
|
+
* @param seed for the hash function that was used to create the sketch
|
|
79
81
|
* @return true if the two given sketches are exactly equal
|
|
80
82
|
*/
|
|
81
83
|
template<typename SketchA, typename SketchB>
|
|
82
|
-
static bool exactly_equal(const SketchA& sketch_a, const SketchB& sketch_b) {
|
|
84
|
+
static bool exactly_equal(const SketchA& sketch_a, const SketchB& sketch_b, uint64_t seed = DEFAULT_SEED) {
|
|
83
85
|
if (reinterpret_cast<const void*>(&sketch_a) == reinterpret_cast<const void*>(&sketch_b)) return true;
|
|
84
86
|
if (sketch_a.is_empty() && sketch_b.is_empty()) return true;
|
|
85
87
|
if (sketch_a.is_empty() || sketch_b.is_empty()) return false;
|
|
86
88
|
|
|
87
|
-
auto union_ab = compute_union(sketch_a, sketch_b);
|
|
89
|
+
auto union_ab = compute_union(sketch_a, sketch_b, seed);
|
|
88
90
|
if (identical_sets(sketch_a, sketch_b, union_ab)) return true;
|
|
89
91
|
return false;
|
|
90
92
|
}
|
|
@@ -99,12 +101,13 @@ public:
|
|
|
99
101
|
* @param actual the sketch to be tested
|
|
100
102
|
* @param expected the reference sketch that is considered to be correct
|
|
101
103
|
* @param threshold a real value between zero and one
|
|
104
|
+
* @param seed for the hash function that was used to create the sketch
|
|
102
105
|
* @return true if the similarity of the two sketches is greater than the given threshold
|
|
103
106
|
* with at least 97.7% confidence
|
|
104
107
|
*/
|
|
105
108
|
template<typename SketchA, typename SketchB>
|
|
106
|
-
static bool similarity_test(const SketchA& actual, const SketchB& expected, double threshold) {
|
|
107
|
-
auto jc = jaccard(actual, expected);
|
|
109
|
+
static bool similarity_test(const SketchA& actual, const SketchB& expected, double threshold, uint64_t seed = DEFAULT_SEED) {
|
|
110
|
+
auto jc = jaccard(actual, expected, seed);
|
|
108
111
|
return jc[0] >= threshold;
|
|
109
112
|
}
|
|
110
113
|
|
|
@@ -118,23 +121,24 @@ public:
|
|
|
118
121
|
* @param actual the sketch to be tested
|
|
119
122
|
* @param expected the reference sketch that is considered to be correct
|
|
120
123
|
* @param threshold a real value between zero and one
|
|
124
|
+
* @param seed for the hash function that was used to create the sketch
|
|
121
125
|
* @return true if the dissimilarity of the two sketches is greater than the given threshold
|
|
122
126
|
* with at least 97.7% confidence
|
|
123
127
|
*/
|
|
124
128
|
template<typename SketchA, typename SketchB>
|
|
125
|
-
static bool dissimilarity_test(const SketchA& actual, const SketchB& expected, double threshold) {
|
|
126
|
-
auto jc = jaccard(actual, expected);
|
|
129
|
+
static bool dissimilarity_test(const SketchA& actual, const SketchB& expected, double threshold, uint64_t seed = DEFAULT_SEED) {
|
|
130
|
+
auto jc = jaccard(actual, expected, seed);
|
|
127
131
|
return jc[2] <= threshold;
|
|
128
132
|
}
|
|
129
133
|
|
|
130
134
|
private:
|
|
131
135
|
|
|
132
136
|
template<typename SketchA, typename SketchB>
|
|
133
|
-
static typename Union::CompactSketch compute_union(const SketchA& sketch_a, const SketchB& sketch_b) {
|
|
134
|
-
const
|
|
135
|
-
const
|
|
136
|
-
const
|
|
137
|
-
auto u = typename Union::builder().set_lg_k(lg_k).build();
|
|
137
|
+
static typename Union::CompactSketch compute_union(const SketchA& sketch_a, const SketchB& sketch_b, uint64_t seed) {
|
|
138
|
+
const auto count_a = sketch_a.get_num_retained();
|
|
139
|
+
const auto count_b = sketch_b.get_num_retained();
|
|
140
|
+
const uint8_t lg_k = std::min(std::max(log2(ceiling_power_of_2(count_a + count_b)), theta_constants::MIN_LG_K), theta_constants::MAX_LG_K);
|
|
141
|
+
auto u = typename Union::builder().set_lg_k(lg_k).set_seed(seed).build();
|
|
138
142
|
u.update(sketch_a);
|
|
139
143
|
u.update(sketch_b);
|
|
140
144
|
return u.get_result(false);
|
|
@@ -311,7 +311,8 @@ public:
|
|
|
311
311
|
// - as a result of a set operation
|
|
312
312
|
// - by deserializing a previously serialized compact sketch
|
|
313
313
|
|
|
314
|
-
|
|
314
|
+
template<typename Other>
|
|
315
|
+
compact_theta_sketch_alloc(const Other& other, bool ordered);
|
|
315
316
|
compact_theta_sketch_alloc(const compact_theta_sketch_alloc&) = default;
|
|
316
317
|
compact_theta_sketch_alloc(compact_theta_sketch_alloc&&) noexcept = default;
|
|
317
318
|
virtual ~compact_theta_sketch_alloc() = default;
|
|
@@ -387,10 +388,50 @@ public:
|
|
|
387
388
|
update_theta_sketch_alloc build() const;
|
|
388
389
|
};
|
|
389
390
|
|
|
391
|
+
// This is to wrap a buffer containing a serialized compact sketch and use it in a set operation avoiding some cost of deserialization.
|
|
392
|
+
// It does not take the ownership of the buffer.
|
|
393
|
+
|
|
394
|
+
template<typename Allocator = std::allocator<uint64_t>>
|
|
395
|
+
class wrapped_compact_theta_sketch_alloc {
|
|
396
|
+
public:
|
|
397
|
+
using const_iterator = const uint64_t*;
|
|
398
|
+
|
|
399
|
+
Allocator get_allocator() const;
|
|
400
|
+
bool is_empty() const;
|
|
401
|
+
bool is_ordered() const;
|
|
402
|
+
uint64_t get_theta64() const;
|
|
403
|
+
uint32_t get_num_retained() const;
|
|
404
|
+
uint16_t get_seed_hash() const;
|
|
405
|
+
|
|
406
|
+
const_iterator begin() const;
|
|
407
|
+
const_iterator end() const;
|
|
408
|
+
|
|
409
|
+
/**
|
|
410
|
+
* This method wraps a serialized compact sketch as an array of bytes.
|
|
411
|
+
* @param bytes pointer to the array of bytes
|
|
412
|
+
* @param size the size of the array
|
|
413
|
+
* @param seed the seed for the hash function that was used to create the sketch
|
|
414
|
+
* @return an instance of the sketch
|
|
415
|
+
*/
|
|
416
|
+
static const wrapped_compact_theta_sketch_alloc wrap(const void* bytes, size_t size, uint64_t seed = DEFAULT_SEED, bool dump_on_error = false);
|
|
417
|
+
|
|
418
|
+
private:
|
|
419
|
+
bool is_empty_;
|
|
420
|
+
bool is_ordered_;
|
|
421
|
+
uint16_t seed_hash_;
|
|
422
|
+
uint32_t num_entries_;
|
|
423
|
+
uint64_t theta_;
|
|
424
|
+
const uint64_t* entries_;
|
|
425
|
+
|
|
426
|
+
wrapped_compact_theta_sketch_alloc(bool is_empty, bool is_ordered, uint16_t seed_hash, uint32_t num_entries,
|
|
427
|
+
uint64_t theta, const uint64_t* entries);
|
|
428
|
+
};
|
|
429
|
+
|
|
390
430
|
// aliases with default allocator for convenience
|
|
391
431
|
using theta_sketch = theta_sketch_alloc<std::allocator<uint64_t>>;
|
|
392
432
|
using update_theta_sketch = update_theta_sketch_alloc<std::allocator<uint64_t>>;
|
|
393
433
|
using compact_theta_sketch = compact_theta_sketch_alloc<std::allocator<uint64_t>>;
|
|
434
|
+
using wrapped_compact_theta_sketch = wrapped_compact_theta_sketch_alloc<std::allocator<uint64_t>>;
|
|
394
435
|
|
|
395
436
|
} /* namespace datasketches */
|
|
396
437
|
|
|
@@ -26,6 +26,7 @@
|
|
|
26
26
|
#include "serde.hpp"
|
|
27
27
|
#include "binomial_bounds.hpp"
|
|
28
28
|
#include "theta_helpers.hpp"
|
|
29
|
+
#include "compact_theta_sketch_parser.hpp"
|
|
29
30
|
|
|
30
31
|
namespace datasketches {
|
|
31
32
|
|
|
@@ -246,7 +247,8 @@ update_theta_sketch_alloc<A> update_theta_sketch_alloc<A>::builder::build() cons
|
|
|
246
247
|
// compact sketch
|
|
247
248
|
|
|
248
249
|
template<typename A>
|
|
249
|
-
|
|
250
|
+
template<typename Other>
|
|
251
|
+
compact_theta_sketch_alloc<A>::compact_theta_sketch_alloc(const Other& other, bool ordered):
|
|
250
252
|
is_empty_(other.is_empty()),
|
|
251
253
|
is_ordered_(other.is_ordered() || ordered),
|
|
252
254
|
seed_hash_(other.get_seed_hash()),
|
|
@@ -290,7 +292,7 @@ uint64_t compact_theta_sketch_alloc<A>::get_theta64() const {
|
|
|
290
292
|
|
|
291
293
|
template<typename A>
|
|
292
294
|
uint32_t compact_theta_sketch_alloc<A>::get_num_retained() const {
|
|
293
|
-
return entries_.size();
|
|
295
|
+
return static_cast<uint32_t>(entries_.size());
|
|
294
296
|
}
|
|
295
297
|
|
|
296
298
|
template<typename A>
|
|
@@ -300,22 +302,22 @@ uint16_t compact_theta_sketch_alloc<A>::get_seed_hash() const {
|
|
|
300
302
|
|
|
301
303
|
template<typename A>
|
|
302
304
|
auto compact_theta_sketch_alloc<A>::begin() -> iterator {
|
|
303
|
-
return iterator(entries_.data(), entries_.size(), 0);
|
|
305
|
+
return iterator(entries_.data(), static_cast<uint32_t>(entries_.size()), 0);
|
|
304
306
|
}
|
|
305
307
|
|
|
306
308
|
template<typename A>
|
|
307
309
|
auto compact_theta_sketch_alloc<A>::end() -> iterator {
|
|
308
|
-
return iterator(nullptr, 0, entries_.size());
|
|
310
|
+
return iterator(nullptr, 0, static_cast<uint32_t>(entries_.size()));
|
|
309
311
|
}
|
|
310
312
|
|
|
311
313
|
template<typename A>
|
|
312
314
|
auto compact_theta_sketch_alloc<A>::begin() const -> const_iterator {
|
|
313
|
-
return const_iterator(entries_.data(), entries_.size(), 0);
|
|
315
|
+
return const_iterator(entries_.data(), static_cast<uint32_t>(entries_.size()), 0);
|
|
314
316
|
}
|
|
315
317
|
|
|
316
318
|
template<typename A>
|
|
317
319
|
auto compact_theta_sketch_alloc<A>::end() const -> const_iterator {
|
|
318
|
-
return const_iterator(nullptr, 0, entries_.size());
|
|
320
|
+
return const_iterator(nullptr, 0, static_cast<uint32_t>(entries_.size()));
|
|
319
321
|
}
|
|
320
322
|
|
|
321
323
|
template<typename A>
|
|
@@ -325,33 +327,33 @@ template<typename A>
|
|
|
325
327
|
void compact_theta_sketch_alloc<A>::serialize(std::ostream& os) const {
|
|
326
328
|
const bool is_single_item = entries_.size() == 1 && !this->is_estimation_mode();
|
|
327
329
|
const uint8_t preamble_longs = this->is_empty() || is_single_item ? 1 : this->is_estimation_mode() ? 3 : 2;
|
|
328
|
-
|
|
330
|
+
write(os, preamble_longs);
|
|
329
331
|
const uint8_t serial_version = SERIAL_VERSION;
|
|
330
|
-
|
|
332
|
+
write(os, serial_version);
|
|
331
333
|
const uint8_t type = SKETCH_TYPE;
|
|
332
|
-
|
|
334
|
+
write(os, type);
|
|
333
335
|
const uint16_t unused16 = 0;
|
|
334
|
-
|
|
336
|
+
write(os, unused16);
|
|
335
337
|
const uint8_t flags_byte(
|
|
336
338
|
(1 << flags::IS_COMPACT) |
|
|
337
339
|
(1 << flags::IS_READ_ONLY) |
|
|
338
340
|
(this->is_empty() ? 1 << flags::IS_EMPTY : 0) |
|
|
339
341
|
(this->is_ordered() ? 1 << flags::IS_ORDERED : 0)
|
|
340
342
|
);
|
|
341
|
-
|
|
343
|
+
write(os, flags_byte);
|
|
342
344
|
const uint16_t seed_hash = get_seed_hash();
|
|
343
|
-
|
|
345
|
+
write(os, seed_hash);
|
|
344
346
|
if (!this->is_empty()) {
|
|
345
347
|
if (!is_single_item) {
|
|
346
|
-
const uint32_t num_entries = entries_.size();
|
|
347
|
-
|
|
348
|
+
const uint32_t num_entries = static_cast<uint32_t>(entries_.size());
|
|
349
|
+
write(os, num_entries);
|
|
348
350
|
const uint32_t unused32 = 0;
|
|
349
|
-
|
|
351
|
+
write(os, unused32);
|
|
350
352
|
if (this->is_estimation_mode()) {
|
|
351
|
-
|
|
353
|
+
write(os, this->theta_);
|
|
352
354
|
}
|
|
353
355
|
}
|
|
354
|
-
|
|
356
|
+
write(os, entries_.data(), entries_.size() * sizeof(uint64_t));
|
|
355
357
|
}
|
|
356
358
|
}
|
|
357
359
|
|
|
@@ -364,30 +366,28 @@ auto compact_theta_sketch_alloc<A>::serialize(unsigned header_size_bytes) const
|
|
|
364
366
|
vector_bytes bytes(size, 0, entries_.get_allocator());
|
|
365
367
|
uint8_t* ptr = bytes.data() + header_size_bytes;
|
|
366
368
|
|
|
367
|
-
ptr += copy_to_mem(
|
|
369
|
+
ptr += copy_to_mem(preamble_longs, ptr);
|
|
368
370
|
const uint8_t serial_version = SERIAL_VERSION;
|
|
369
|
-
ptr += copy_to_mem(
|
|
371
|
+
ptr += copy_to_mem(serial_version, ptr);
|
|
370
372
|
const uint8_t type = SKETCH_TYPE;
|
|
371
|
-
ptr += copy_to_mem(
|
|
372
|
-
|
|
373
|
-
ptr += copy_to_mem(&unused16, ptr, sizeof(unused16));
|
|
373
|
+
ptr += copy_to_mem(type, ptr);
|
|
374
|
+
ptr += sizeof(uint16_t); // unused
|
|
374
375
|
const uint8_t flags_byte(
|
|
375
376
|
(1 << flags::IS_COMPACT) |
|
|
376
377
|
(1 << flags::IS_READ_ONLY) |
|
|
377
378
|
(this->is_empty() ? 1 << flags::IS_EMPTY : 0) |
|
|
378
379
|
(this->is_ordered() ? 1 << flags::IS_ORDERED : 0)
|
|
379
380
|
);
|
|
380
|
-
ptr += copy_to_mem(
|
|
381
|
+
ptr += copy_to_mem(flags_byte, ptr);
|
|
381
382
|
const uint16_t seed_hash = get_seed_hash();
|
|
382
|
-
ptr += copy_to_mem(
|
|
383
|
+
ptr += copy_to_mem(seed_hash, ptr);
|
|
383
384
|
if (!this->is_empty()) {
|
|
384
385
|
if (!is_single_item) {
|
|
385
|
-
const uint32_t num_entries = entries_.size();
|
|
386
|
-
ptr += copy_to_mem(
|
|
387
|
-
|
|
388
|
-
ptr += copy_to_mem(&unused32, ptr, sizeof(unused32));
|
|
386
|
+
const uint32_t num_entries = static_cast<uint32_t>(entries_.size());
|
|
387
|
+
ptr += copy_to_mem(num_entries, ptr);
|
|
388
|
+
ptr += sizeof(uint32_t);
|
|
389
389
|
if (this->is_estimation_mode()) {
|
|
390
|
-
ptr += copy_to_mem(
|
|
390
|
+
ptr += copy_to_mem(theta_, ptr);
|
|
391
391
|
}
|
|
392
392
|
}
|
|
393
393
|
ptr += copy_to_mem(entries_.data(), ptr, entries_.size() * sizeof(uint64_t));
|
|
@@ -397,18 +397,12 @@ auto compact_theta_sketch_alloc<A>::serialize(unsigned header_size_bytes) const
|
|
|
397
397
|
|
|
398
398
|
template<typename A>
|
|
399
399
|
compact_theta_sketch_alloc<A> compact_theta_sketch_alloc<A>::deserialize(std::istream& is, uint64_t seed, const A& allocator) {
|
|
400
|
-
|
|
401
|
-
|
|
402
|
-
uint8_t
|
|
403
|
-
|
|
404
|
-
uint8_t
|
|
405
|
-
|
|
406
|
-
uint16_t unused16;
|
|
407
|
-
is.read(reinterpret_cast<char*>(&unused16), sizeof(unused16));
|
|
408
|
-
uint8_t flags_byte;
|
|
409
|
-
is.read(reinterpret_cast<char*>(&flags_byte), sizeof(flags_byte));
|
|
410
|
-
uint16_t seed_hash;
|
|
411
|
-
is.read(reinterpret_cast<char*>(&seed_hash), sizeof(seed_hash));
|
|
400
|
+
const auto preamble_longs = read<uint8_t>(is);
|
|
401
|
+
const auto serial_version = read<uint8_t>(is);
|
|
402
|
+
const auto type = read<uint8_t>(is);
|
|
403
|
+
read<uint16_t>(is); // unused
|
|
404
|
+
const auto flags_byte = read<uint8_t>(is);
|
|
405
|
+
const auto seed_hash = read<uint16_t>(is);
|
|
412
406
|
checker<true>::check_sketch_type(type, SKETCH_TYPE);
|
|
413
407
|
checker<true>::check_serial_version(serial_version, SERIAL_VERSION);
|
|
414
408
|
const bool is_empty = flags_byte & (1 << flags::IS_EMPTY);
|
|
@@ -420,16 +414,15 @@ compact_theta_sketch_alloc<A> compact_theta_sketch_alloc<A>::deserialize(std::is
|
|
|
420
414
|
if (preamble_longs == 1) {
|
|
421
415
|
num_entries = 1;
|
|
422
416
|
} else {
|
|
423
|
-
|
|
424
|
-
uint32_t
|
|
425
|
-
is.read(reinterpret_cast<char*>(&unused32), sizeof(unused32));
|
|
417
|
+
num_entries = read<uint32_t>(is);
|
|
418
|
+
read<uint32_t>(is); // unused
|
|
426
419
|
if (preamble_longs > 2) {
|
|
427
|
-
|
|
420
|
+
theta = read<uint64_t>(is);
|
|
428
421
|
}
|
|
429
422
|
}
|
|
430
423
|
}
|
|
431
424
|
std::vector<uint64_t, A> entries(num_entries, 0, allocator);
|
|
432
|
-
if (!is_empty)
|
|
425
|
+
if (!is_empty) read(is, entries.data(), sizeof(uint64_t) * entries.size());
|
|
433
426
|
|
|
434
427
|
const bool is_ordered = flags_byte & (1 << flags::IS_ORDERED);
|
|
435
428
|
if (!is.good()) throw std::runtime_error("error reading from std::istream");
|
|
@@ -442,17 +435,16 @@ compact_theta_sketch_alloc<A> compact_theta_sketch_alloc<A>::deserialize(const v
|
|
|
442
435
|
const char* ptr = static_cast<const char*>(bytes);
|
|
443
436
|
const char* base = ptr;
|
|
444
437
|
uint8_t preamble_longs;
|
|
445
|
-
ptr += copy_from_mem(ptr,
|
|
438
|
+
ptr += copy_from_mem(ptr, preamble_longs);
|
|
446
439
|
uint8_t serial_version;
|
|
447
|
-
ptr += copy_from_mem(ptr,
|
|
440
|
+
ptr += copy_from_mem(ptr, serial_version);
|
|
448
441
|
uint8_t type;
|
|
449
|
-
ptr += copy_from_mem(ptr,
|
|
450
|
-
uint16_t
|
|
451
|
-
ptr += copy_from_mem(ptr, &unused16, sizeof(unused16));
|
|
442
|
+
ptr += copy_from_mem(ptr, type);
|
|
443
|
+
ptr += sizeof(uint16_t); // unused
|
|
452
444
|
uint8_t flags_byte;
|
|
453
|
-
ptr += copy_from_mem(ptr,
|
|
445
|
+
ptr += copy_from_mem(ptr, flags_byte);
|
|
454
446
|
uint16_t seed_hash;
|
|
455
|
-
ptr += copy_from_mem(ptr,
|
|
447
|
+
ptr += copy_from_mem(ptr, seed_hash);
|
|
456
448
|
checker<true>::check_sketch_type(type, SKETCH_TYPE);
|
|
457
449
|
checker<true>::check_serial_version(serial_version, SERIAL_VERSION);
|
|
458
450
|
const bool is_empty = flags_byte & (1 << flags::IS_EMPTY);
|
|
@@ -465,12 +457,11 @@ compact_theta_sketch_alloc<A> compact_theta_sketch_alloc<A>::deserialize(const v
|
|
|
465
457
|
num_entries = 1;
|
|
466
458
|
} else {
|
|
467
459
|
ensure_minimum_memory(size, 8); // read the first prelong before this method
|
|
468
|
-
ptr += copy_from_mem(ptr,
|
|
469
|
-
uint32_t
|
|
470
|
-
ptr += copy_from_mem(ptr, &unused32, sizeof(unused32));
|
|
460
|
+
ptr += copy_from_mem(ptr, num_entries);
|
|
461
|
+
ptr += sizeof(uint32_t); // unused
|
|
471
462
|
if (preamble_longs > 2) {
|
|
472
463
|
ensure_minimum_memory(size, (preamble_longs - 1) << 3);
|
|
473
|
-
ptr += copy_from_mem(ptr,
|
|
464
|
+
ptr += copy_from_mem(ptr, theta);
|
|
474
465
|
}
|
|
475
466
|
}
|
|
476
467
|
}
|
|
@@ -483,7 +474,65 @@ compact_theta_sketch_alloc<A> compact_theta_sketch_alloc<A>::deserialize(const v
|
|
|
483
474
|
return compact_theta_sketch_alloc(is_empty, is_ordered, seed_hash, theta, std::move(entries));
|
|
484
475
|
}
|
|
485
476
|
|
|
477
|
+
// wrapped compact sketch
|
|
478
|
+
|
|
479
|
+
template<typename A>
|
|
480
|
+
wrapped_compact_theta_sketch_alloc<A>::wrapped_compact_theta_sketch_alloc(bool is_empty, bool is_ordered, uint16_t seed_hash, uint32_t num_entries,
|
|
481
|
+
uint64_t theta, const uint64_t* entries):
|
|
482
|
+
is_empty_(is_empty),
|
|
483
|
+
is_ordered_(is_ordered),
|
|
484
|
+
seed_hash_(seed_hash),
|
|
485
|
+
num_entries_(num_entries),
|
|
486
|
+
theta_(theta),
|
|
487
|
+
entries_(entries)
|
|
488
|
+
{}
|
|
489
|
+
|
|
490
|
+
template<typename A>
|
|
491
|
+
const wrapped_compact_theta_sketch_alloc<A> wrapped_compact_theta_sketch_alloc<A>::wrap(const void* bytes, size_t size, uint64_t seed, bool dump_on_error) {
|
|
492
|
+
auto data = compact_theta_sketch_parser<true>::parse(bytes, size, seed, dump_on_error);
|
|
493
|
+
return wrapped_compact_theta_sketch_alloc(data.is_empty, data.is_ordered, data.seed_hash, data.num_entries, data.theta, data.entries);
|
|
494
|
+
}
|
|
495
|
+
|
|
496
|
+
template<typename A>
|
|
497
|
+
A wrapped_compact_theta_sketch_alloc<A>::get_allocator() const {
|
|
498
|
+
return A();
|
|
499
|
+
}
|
|
500
|
+
|
|
501
|
+
template<typename A>
|
|
502
|
+
bool wrapped_compact_theta_sketch_alloc<A>::is_empty() const {
|
|
503
|
+
return is_empty_;
|
|
504
|
+
}
|
|
505
|
+
|
|
506
|
+
template<typename A>
|
|
507
|
+
bool wrapped_compact_theta_sketch_alloc<A>::is_ordered() const {
|
|
508
|
+
return is_ordered_;
|
|
509
|
+
}
|
|
510
|
+
|
|
511
|
+
template<typename A>
|
|
512
|
+
uint64_t wrapped_compact_theta_sketch_alloc<A>::get_theta64() const {
|
|
513
|
+
return theta_;
|
|
514
|
+
}
|
|
515
|
+
|
|
516
|
+
template<typename A>
|
|
517
|
+
uint32_t wrapped_compact_theta_sketch_alloc<A>::get_num_retained() const {
|
|
518
|
+
return static_cast<uint32_t>(num_entries_);
|
|
519
|
+
}
|
|
520
|
+
|
|
521
|
+
template<typename A>
|
|
522
|
+
uint16_t wrapped_compact_theta_sketch_alloc<A>::get_seed_hash() const {
|
|
523
|
+
return seed_hash_;
|
|
524
|
+
}
|
|
525
|
+
|
|
526
|
+
template<typename A>
|
|
527
|
+
auto wrapped_compact_theta_sketch_alloc<A>::begin() const -> const_iterator {
|
|
528
|
+
return entries_;
|
|
529
|
+
}
|
|
530
|
+
|
|
531
|
+
template<typename A>
|
|
532
|
+
auto wrapped_compact_theta_sketch_alloc<A>::end() const -> const_iterator {
|
|
533
|
+
return entries_ + num_entries_;
|
|
534
|
+
}
|
|
535
|
+
|
|
486
536
|
} /* namespace datasketches */
|
|
487
537
|
|
|
488
538
|
#endif
|
|
489
|
-
|