datasketches 0.1.2 → 0.2.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +17 -0
- data/LICENSE +40 -3
- data/NOTICE +1 -1
- data/ext/datasketches/cpc_wrapper.cpp +12 -13
- data/ext/datasketches/ext.cpp +1 -1
- data/ext/datasketches/ext.h +4 -0
- data/ext/datasketches/extconf.rb +1 -1
- data/ext/datasketches/fi_wrapper.cpp +6 -8
- data/ext/datasketches/hll_wrapper.cpp +13 -14
- data/ext/datasketches/kll_wrapper.cpp +28 -76
- data/ext/datasketches/theta_wrapper.cpp +27 -41
- data/ext/datasketches/vo_wrapper.cpp +4 -6
- data/lib/datasketches/version.rb +1 -1
- data/vendor/datasketches-cpp/CMakeLists.txt +10 -0
- data/vendor/datasketches-cpp/LICENSE +40 -3
- data/vendor/datasketches-cpp/NOTICE +1 -1
- data/vendor/datasketches-cpp/README.md +4 -4
- data/vendor/datasketches-cpp/common/include/MurmurHash3.h +18 -7
- data/vendor/datasketches-cpp/common/include/binomial_bounds.hpp +8 -8
- data/vendor/datasketches-cpp/common/include/bounds_binomial_proportions.hpp +12 -15
- data/vendor/datasketches-cpp/common/include/common_defs.hpp +26 -0
- data/vendor/datasketches-cpp/common/include/conditional_forward.hpp +20 -8
- data/vendor/datasketches-cpp/common/include/count_zeros.hpp +2 -2
- data/vendor/datasketches-cpp/common/include/memory_operations.hpp +12 -0
- data/vendor/datasketches-cpp/common/include/serde.hpp +7 -7
- data/vendor/datasketches-cpp/common/test/CMakeLists.txt +24 -0
- data/vendor/datasketches-cpp/common/test/integration_test.cpp +77 -0
- data/vendor/datasketches-cpp/common/test/test_allocator.hpp +9 -1
- data/vendor/datasketches-cpp/cpc/include/cpc_common.hpp +13 -3
- data/vendor/datasketches-cpp/cpc/include/cpc_compressor.hpp +20 -20
- data/vendor/datasketches-cpp/cpc/include/cpc_compressor_impl.hpp +116 -105
- data/vendor/datasketches-cpp/cpc/include/cpc_sketch.hpp +22 -6
- data/vendor/datasketches-cpp/cpc/include/cpc_sketch_impl.hpp +140 -101
- data/vendor/datasketches-cpp/cpc/include/cpc_union.hpp +2 -2
- data/vendor/datasketches-cpp/cpc/include/cpc_union_impl.hpp +20 -20
- data/vendor/datasketches-cpp/cpc/include/cpc_util.hpp +10 -16
- data/vendor/datasketches-cpp/cpc/include/icon_estimator.hpp +6 -6
- data/vendor/datasketches-cpp/cpc/include/u32_table.hpp +10 -10
- data/vendor/datasketches-cpp/cpc/include/u32_table_impl.hpp +21 -21
- data/vendor/datasketches-cpp/cpc/test/CMakeLists.txt +1 -0
- data/vendor/datasketches-cpp/cpc/test/compression_test.cpp +10 -10
- data/vendor/datasketches-cpp/cpc/test/cpc_sketch_allocation_test.cpp +237 -0
- data/vendor/datasketches-cpp/cpc/test/cpc_sketch_test.cpp +25 -0
- data/vendor/datasketches-cpp/cpc/test/cpc_union_test.cpp +1 -1
- data/vendor/datasketches-cpp/fi/include/frequent_items_sketch.hpp +15 -10
- data/vendor/datasketches-cpp/fi/include/frequent_items_sketch_impl.hpp +102 -105
- data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map.hpp +19 -13
- data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map_impl.hpp +141 -125
- data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_custom_type_test.cpp +15 -12
- data/vendor/datasketches-cpp/fi/test/reverse_purge_hash_map_test.cpp +5 -5
- data/vendor/datasketches-cpp/hll/CMakeLists.txt +3 -0
- data/vendor/datasketches-cpp/hll/include/AuxHashMap-internal.hpp +81 -109
- data/vendor/datasketches-cpp/hll/include/AuxHashMap.hpp +25 -24
- data/vendor/datasketches-cpp/hll/include/CompositeInterpolationXTable-internal.hpp +15 -15
- data/vendor/datasketches-cpp/hll/include/CompositeInterpolationXTable.hpp +5 -5
- data/vendor/datasketches-cpp/hll/include/CouponHashSet-internal.hpp +89 -105
- data/vendor/datasketches-cpp/hll/include/CouponHashSet.hpp +13 -13
- data/vendor/datasketches-cpp/hll/include/CouponList-internal.hpp +130 -165
- data/vendor/datasketches-cpp/hll/include/CouponList.hpp +21 -22
- data/vendor/datasketches-cpp/hll/include/CubicInterpolation-internal.hpp +2 -4
- data/vendor/datasketches-cpp/hll/include/CubicInterpolation.hpp +2 -2
- data/vendor/datasketches-cpp/hll/include/HarmonicNumbers-internal.hpp +1 -1
- data/vendor/datasketches-cpp/hll/include/HarmonicNumbers.hpp +2 -2
- data/vendor/datasketches-cpp/hll/include/Hll4Array-internal.hpp +88 -83
- data/vendor/datasketches-cpp/hll/include/Hll4Array.hpp +9 -9
- data/vendor/datasketches-cpp/hll/include/Hll6Array-internal.hpp +34 -45
- data/vendor/datasketches-cpp/hll/include/Hll6Array.hpp +7 -8
- data/vendor/datasketches-cpp/hll/include/Hll8Array-internal.hpp +41 -52
- data/vendor/datasketches-cpp/hll/include/Hll8Array.hpp +7 -8
- data/vendor/datasketches-cpp/hll/include/HllArray-internal.hpp +220 -251
- data/vendor/datasketches-cpp/hll/include/HllArray.hpp +42 -42
- data/vendor/datasketches-cpp/hll/include/HllSketch-internal.hpp +36 -38
- data/vendor/datasketches-cpp/hll/include/HllSketchImpl-internal.hpp +22 -22
- data/vendor/datasketches-cpp/hll/include/HllSketchImpl.hpp +15 -14
- data/vendor/datasketches-cpp/hll/include/HllSketchImplFactory.hpp +47 -44
- data/vendor/datasketches-cpp/hll/include/HllUnion-internal.hpp +62 -87
- data/vendor/datasketches-cpp/hll/include/HllUtil.hpp +121 -128
- data/vendor/datasketches-cpp/hll/include/RelativeErrorTables.hpp +1 -1
- data/vendor/datasketches-cpp/hll/include/coupon_iterator-internal.hpp +9 -9
- data/vendor/datasketches-cpp/hll/include/coupon_iterator.hpp +5 -5
- data/vendor/datasketches-cpp/hll/include/hll.hpp +25 -53
- data/vendor/datasketches-cpp/hll/test/AuxHashMapTest.cpp +8 -8
- data/vendor/datasketches-cpp/hll/test/CouponHashSetTest.cpp +36 -36
- data/vendor/datasketches-cpp/hll/test/CouponListTest.cpp +28 -28
- data/vendor/datasketches-cpp/hll/test/CrossCountingTest.cpp +2 -2
- data/vendor/datasketches-cpp/hll/test/HllArrayTest.cpp +37 -37
- data/vendor/datasketches-cpp/hll/test/HllSketchTest.cpp +57 -61
- data/vendor/datasketches-cpp/hll/test/HllUnionTest.cpp +10 -14
- data/vendor/datasketches-cpp/hll/test/IsomorphicTest.cpp +3 -3
- data/vendor/datasketches-cpp/hll/test/ToFromByteArrayTest.cpp +4 -4
- data/vendor/datasketches-cpp/kll/include/kll_helper.hpp +5 -4
- data/vendor/datasketches-cpp/kll/include/kll_helper_impl.hpp +6 -6
- data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator.hpp +14 -6
- data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator_impl.hpp +40 -25
- data/vendor/datasketches-cpp/kll/include/kll_sketch.hpp +50 -6
- data/vendor/datasketches-cpp/kll/include/kll_sketch_impl.hpp +164 -136
- data/vendor/datasketches-cpp/kll/include/kolmogorov_smirnov.hpp +67 -0
- data/vendor/datasketches-cpp/kll/include/kolmogorov_smirnov_impl.hpp +78 -0
- data/vendor/datasketches-cpp/kll/test/CMakeLists.txt +1 -0
- data/vendor/datasketches-cpp/kll/test/kll_sketch_custom_type_test.cpp +11 -10
- data/vendor/datasketches-cpp/kll/test/kll_sketch_test.cpp +178 -88
- data/vendor/datasketches-cpp/kll/test/kolmogorov_smirnov_test.cpp +111 -0
- data/vendor/datasketches-cpp/pyproject.toml +4 -2
- data/vendor/datasketches-cpp/python/CMakeLists.txt +12 -6
- data/vendor/datasketches-cpp/python/README.md +52 -49
- data/vendor/datasketches-cpp/python/pybind11Path.cmd +3 -0
- data/vendor/datasketches-cpp/python/src/cpc_wrapper.cpp +1 -1
- data/vendor/datasketches-cpp/python/src/datasketches.cpp +2 -0
- data/vendor/datasketches-cpp/python/src/hll_wrapper.cpp +4 -6
- data/vendor/datasketches-cpp/python/src/kll_wrapper.cpp +4 -2
- data/vendor/datasketches-cpp/python/src/req_wrapper.cpp +246 -0
- data/vendor/datasketches-cpp/python/src/theta_wrapper.cpp +38 -28
- data/vendor/datasketches-cpp/python/src/vector_of_kll.cpp +11 -5
- data/vendor/datasketches-cpp/python/src/vo_wrapper.cpp +2 -2
- data/vendor/datasketches-cpp/python/tests/hll_test.py +1 -2
- data/vendor/datasketches-cpp/python/tests/kll_test.py +5 -5
- data/vendor/datasketches-cpp/python/tests/req_test.py +126 -0
- data/vendor/datasketches-cpp/python/tests/theta_test.py +28 -3
- data/vendor/datasketches-cpp/python/tests/vector_of_kll_test.py +4 -4
- data/vendor/datasketches-cpp/python/tests/vo_test.py +3 -3
- data/vendor/datasketches-cpp/req/CMakeLists.txt +60 -0
- data/vendor/datasketches-cpp/{tuple/include/theta_a_not_b_experimental_impl.hpp → req/include/req_common.hpp} +18 -8
- data/vendor/datasketches-cpp/req/include/req_compactor.hpp +137 -0
- data/vendor/datasketches-cpp/req/include/req_compactor_impl.hpp +488 -0
- data/vendor/datasketches-cpp/req/include/req_quantile_calculator.hpp +69 -0
- data/vendor/datasketches-cpp/req/include/req_quantile_calculator_impl.hpp +60 -0
- data/vendor/datasketches-cpp/req/include/req_sketch.hpp +395 -0
- data/vendor/datasketches-cpp/req/include/req_sketch_impl.hpp +810 -0
- data/vendor/datasketches-cpp/req/test/CMakeLists.txt +43 -0
- data/vendor/datasketches-cpp/req/test/req_float_empty_from_java.sk +0 -0
- data/vendor/datasketches-cpp/req/test/req_float_estimation_from_java.sk +0 -0
- data/vendor/datasketches-cpp/req/test/req_float_exact_from_java.sk +0 -0
- data/vendor/datasketches-cpp/req/test/req_float_raw_items_from_java.sk +0 -0
- data/vendor/datasketches-cpp/req/test/req_float_single_item_from_java.sk +0 -0
- data/vendor/datasketches-cpp/req/test/req_sketch_custom_type_test.cpp +128 -0
- data/vendor/datasketches-cpp/req/test/req_sketch_test.cpp +494 -0
- data/vendor/datasketches-cpp/sampling/include/var_opt_sketch.hpp +19 -13
- data/vendor/datasketches-cpp/sampling/include/var_opt_sketch_impl.hpp +130 -127
- data/vendor/datasketches-cpp/sampling/include/var_opt_union.hpp +5 -5
- data/vendor/datasketches-cpp/sampling/include/var_opt_union_impl.hpp +41 -49
- data/vendor/datasketches-cpp/sampling/test/CMakeLists.txt +1 -0
- data/vendor/datasketches-cpp/sampling/test/var_opt_allocation_test.cpp +96 -0
- data/vendor/datasketches-cpp/sampling/test/var_opt_sketch_test.cpp +6 -6
- data/vendor/datasketches-cpp/sampling/test/var_opt_union_test.cpp +13 -44
- data/vendor/datasketches-cpp/setup.py +11 -6
- data/vendor/datasketches-cpp/theta/CMakeLists.txt +30 -3
- data/vendor/datasketches-cpp/{tuple → theta}/include/bounds_on_ratios_in_sampled_sets.hpp +3 -2
- data/vendor/datasketches-cpp/{tuple → theta}/include/bounds_on_ratios_in_theta_sketched_sets.hpp +1 -1
- data/vendor/datasketches-cpp/theta/include/compact_theta_sketch_parser.hpp +67 -0
- data/vendor/datasketches-cpp/theta/include/compact_theta_sketch_parser_impl.hpp +70 -0
- data/vendor/datasketches-cpp/theta/include/theta_a_not_b.hpp +12 -29
- data/vendor/datasketches-cpp/theta/include/theta_a_not_b_impl.hpp +5 -46
- data/vendor/datasketches-cpp/{tuple → theta}/include/theta_comparators.hpp +0 -0
- data/vendor/datasketches-cpp/{tuple → theta}/include/theta_constants.hpp +11 -4
- data/vendor/datasketches-cpp/{tuple → theta}/include/theta_helpers.hpp +0 -0
- data/vendor/datasketches-cpp/theta/include/theta_intersection.hpp +26 -28
- data/vendor/datasketches-cpp/{tuple → theta}/include/theta_intersection_base.hpp +0 -0
- data/vendor/datasketches-cpp/{tuple → theta}/include/theta_intersection_base_impl.hpp +0 -0
- data/vendor/datasketches-cpp/theta/include/theta_intersection_impl.hpp +8 -90
- data/vendor/datasketches-cpp/{tuple/test/theta_union_experimental_test.cpp → theta/include/theta_jaccard_similarity.hpp} +11 -18
- data/vendor/datasketches-cpp/{tuple/include/jaccard_similarity.hpp → theta/include/theta_jaccard_similarity_base.hpp} +24 -36
- data/vendor/datasketches-cpp/{tuple → theta}/include/theta_set_difference_base.hpp +0 -0
- data/vendor/datasketches-cpp/{tuple → theta}/include/theta_set_difference_base_impl.hpp +5 -0
- data/vendor/datasketches-cpp/theta/include/theta_sketch.hpp +163 -256
- data/vendor/datasketches-cpp/theta/include/theta_sketch_impl.hpp +250 -651
- data/vendor/datasketches-cpp/theta/include/theta_union.hpp +27 -60
- data/vendor/datasketches-cpp/{tuple → theta}/include/theta_union_base.hpp +1 -1
- data/vendor/datasketches-cpp/{tuple → theta}/include/theta_union_base_impl.hpp +6 -1
- data/vendor/datasketches-cpp/theta/include/theta_union_impl.hpp +13 -69
- data/vendor/datasketches-cpp/{tuple → theta}/include/theta_update_sketch_base.hpp +10 -21
- data/vendor/datasketches-cpp/{tuple → theta}/include/theta_update_sketch_base_impl.hpp +44 -30
- data/vendor/datasketches-cpp/theta/test/CMakeLists.txt +1 -0
- data/vendor/datasketches-cpp/theta/test/theta_a_not_b_test.cpp +23 -1
- data/vendor/datasketches-cpp/theta/test/theta_intersection_test.cpp +21 -1
- data/vendor/datasketches-cpp/{tuple → theta}/test/theta_jaccard_similarity_test.cpp +60 -5
- data/vendor/datasketches-cpp/theta/test/theta_sketch_test.cpp +74 -235
- data/vendor/datasketches-cpp/theta/test/theta_union_test.cpp +22 -2
- data/vendor/datasketches-cpp/tuple/CMakeLists.txt +3 -35
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_sketch_impl.hpp +47 -60
- data/vendor/datasketches-cpp/tuple/include/tuple_jaccard_similarity.hpp +38 -0
- data/vendor/datasketches-cpp/tuple/include/tuple_sketch.hpp +28 -13
- data/vendor/datasketches-cpp/tuple/include/tuple_sketch_impl.hpp +57 -70
- data/vendor/datasketches-cpp/tuple/test/CMakeLists.txt +1 -6
- data/vendor/datasketches-cpp/tuple/test/array_of_doubles_sketch_test.cpp +1 -1
- data/vendor/datasketches-cpp/tuple/test/tuple_a_not_b_test.cpp +18 -21
- data/vendor/datasketches-cpp/tuple/test/tuple_intersection_test.cpp +13 -16
- data/vendor/datasketches-cpp/tuple/test/tuple_jaccard_similarity_test.cpp +7 -6
- data/vendor/datasketches-cpp/tuple/test/tuple_sketch_allocation_test.cpp +3 -3
- data/vendor/datasketches-cpp/tuple/test/tuple_sketch_test.cpp +20 -20
- data/vendor/datasketches-cpp/tuple/test/tuple_union_test.cpp +13 -16
- metadata +51 -36
- data/vendor/datasketches-cpp/tuple/include/theta_a_not_b_experimental.hpp +0 -53
- data/vendor/datasketches-cpp/tuple/include/theta_intersection_experimental.hpp +0 -78
- data/vendor/datasketches-cpp/tuple/include/theta_intersection_experimental_impl.hpp +0 -43
- data/vendor/datasketches-cpp/tuple/include/theta_sketch_experimental.hpp +0 -393
- data/vendor/datasketches-cpp/tuple/include/theta_sketch_experimental_impl.hpp +0 -481
- data/vendor/datasketches-cpp/tuple/include/theta_union_experimental.hpp +0 -88
- data/vendor/datasketches-cpp/tuple/include/theta_union_experimental_impl.hpp +0 -47
- data/vendor/datasketches-cpp/tuple/test/theta_a_not_b_experimental_test.cpp +0 -250
- data/vendor/datasketches-cpp/tuple/test/theta_compact_empty_from_java.sk +0 -0
- data/vendor/datasketches-cpp/tuple/test/theta_compact_estimation_from_java.sk +0 -0
- data/vendor/datasketches-cpp/tuple/test/theta_compact_single_item_from_java.sk +0 -0
- data/vendor/datasketches-cpp/tuple/test/theta_intersection_experimental_test.cpp +0 -224
- data/vendor/datasketches-cpp/tuple/test/theta_sketch_experimental_test.cpp +0 -247
@@ -28,43 +28,42 @@ namespace datasketches {
|
|
28
28
|
template<typename A>
|
29
29
|
class AuxHashMap;
|
30
30
|
|
31
|
-
template<typename A
|
31
|
+
template<typename A>
|
32
32
|
class HllArray : public HllSketchImpl<A> {
|
33
33
|
public:
|
34
|
-
|
35
|
-
explicit HllArray(const HllArray<A>& that);
|
34
|
+
HllArray(uint8_t lgConfigK, target_hll_type tgtHllType, bool startFullSize, const A& allocator);
|
36
35
|
|
37
|
-
static HllArray* newHll(const void* bytes, size_t len);
|
38
|
-
static HllArray* newHll(std::istream& is);
|
36
|
+
static HllArray* newHll(const void* bytes, size_t len, const A& allocator);
|
37
|
+
static HllArray* newHll(std::istream& is, const A& allocator);
|
39
38
|
|
40
39
|
virtual vector_u8<A> serialize(bool compact, unsigned header_size_bytes) const;
|
41
40
|
virtual void serialize(std::ostream& os, bool compact) const;
|
42
41
|
|
43
|
-
virtual ~HllArray();
|
42
|
+
virtual ~HllArray() = default;
|
44
43
|
virtual std::function<void(HllSketchImpl<A>*)> get_deleter() const = 0;
|
45
44
|
|
46
45
|
virtual HllArray* copy() const = 0;
|
47
46
|
virtual HllArray* copyAs(target_hll_type tgtHllType) const;
|
48
47
|
|
49
|
-
virtual HllSketchImpl<A>* couponUpdate(
|
48
|
+
virtual HllSketchImpl<A>* couponUpdate(uint32_t coupon) = 0;
|
50
49
|
|
51
50
|
virtual double getEstimate() const;
|
52
51
|
virtual double getCompositeEstimate() const;
|
53
|
-
virtual double getLowerBound(
|
54
|
-
virtual double getUpperBound(
|
52
|
+
virtual double getLowerBound(uint8_t numStdDev) const;
|
53
|
+
virtual double getUpperBound(uint8_t numStdDev) const;
|
55
54
|
|
56
55
|
inline void addToHipAccum(double delta);
|
57
56
|
|
58
57
|
inline void decNumAtCurMin();
|
59
58
|
|
60
|
-
inline
|
61
|
-
inline
|
59
|
+
inline uint8_t getCurMin() const;
|
60
|
+
inline uint32_t getNumAtCurMin() const;
|
62
61
|
inline double getHipAccum() const;
|
63
62
|
|
64
|
-
virtual
|
63
|
+
virtual uint32_t getHllByteArrBytes() const = 0;
|
65
64
|
|
66
|
-
virtual
|
67
|
-
virtual
|
65
|
+
virtual uint32_t getUpdatableSerializationBytes() const;
|
66
|
+
virtual uint32_t getCompactSerializationBytes() const;
|
68
67
|
|
69
68
|
virtual bool isOutOfOrderFlag() const;
|
70
69
|
virtual bool isEmpty() const;
|
@@ -75,19 +74,19 @@ class HllArray : public HllSketchImpl<A> {
|
|
75
74
|
inline double getKxQ0() const;
|
76
75
|
inline double getKxQ1() const;
|
77
76
|
|
78
|
-
virtual
|
79
|
-
virtual
|
77
|
+
virtual uint32_t getMemDataStart() const;
|
78
|
+
virtual uint8_t getPreInts() const;
|
80
79
|
|
81
|
-
void putCurMin(
|
80
|
+
void putCurMin(uint8_t curMin);
|
82
81
|
void putHipAccum(double hipAccum);
|
83
82
|
inline void putKxQ0(double kxq0);
|
84
83
|
inline void putKxQ1(double kxq1);
|
85
|
-
void putNumAtCurMin(
|
84
|
+
void putNumAtCurMin(uint32_t numAtCurMin);
|
86
85
|
|
87
|
-
static
|
88
|
-
static
|
89
|
-
static
|
90
|
-
static
|
86
|
+
static uint32_t hllArrBytes(target_hll_type tgtHllType, uint8_t lgConfigK);
|
87
|
+
static uint32_t hll4ArrBytes(uint8_t lgConfigK);
|
88
|
+
static uint32_t hll6ArrBytes(uint8_t lgConfigK);
|
89
|
+
static uint32_t hll8ArrBytes(uint8_t lgConfigK);
|
91
90
|
|
92
91
|
virtual AuxHashMap<A>* getAuxHashMap() const;
|
93
92
|
|
@@ -95,18 +94,20 @@ class HllArray : public HllSketchImpl<A> {
|
|
95
94
|
virtual const_iterator begin(bool all = false) const;
|
96
95
|
virtual const_iterator end() const;
|
97
96
|
|
97
|
+
virtual A getAllocator() const;
|
98
|
+
|
98
99
|
protected:
|
99
100
|
void hipAndKxQIncrementalUpdate(uint8_t oldValue, uint8_t newValue);
|
100
|
-
double getHllBitMapEstimate(
|
101
|
-
double getHllRawEstimate(
|
101
|
+
double getHllBitMapEstimate() const;
|
102
|
+
double getHllRawEstimate() const;
|
102
103
|
|
103
|
-
double
|
104
|
-
double
|
105
|
-
double
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
bool
|
104
|
+
double hipAccum_;
|
105
|
+
double kxq0_;
|
106
|
+
double kxq1_;
|
107
|
+
vector_u8<A> hllByteArr_; //init by sub-classes
|
108
|
+
uint8_t curMin_; //always zero for Hll6 and Hll8, only tracked by Hll4Array
|
109
|
+
uint32_t numAtCurMin_; //interpreted as num zeros when curMin == 0
|
110
|
+
bool oooFlag_; //Out-Of-Order Flag
|
110
111
|
|
111
112
|
friend class HllSketchImplFactory<A>;
|
112
113
|
};
|
@@ -114,21 +115,20 @@ class HllArray : public HllSketchImpl<A> {
|
|
114
115
|
template<typename A>
|
115
116
|
class HllArray<A>::const_iterator: public std::iterator<std::input_iterator_tag, uint32_t> {
|
116
117
|
public:
|
117
|
-
const_iterator(const uint8_t* array,
|
118
|
-
//const_iterator(const uint8_t* array, size_t array_slze, size_t index, target_hll_type hll_type, const AuxHashMap<A>* exceptions, uint8_t offset);
|
118
|
+
const_iterator(const uint8_t* array, uint32_t array_slze, uint32_t index, target_hll_type hll_type, const AuxHashMap<A>* exceptions, uint8_t offset, bool all);
|
119
119
|
const_iterator& operator++();
|
120
120
|
bool operator!=(const const_iterator& other) const;
|
121
121
|
uint32_t operator*() const;
|
122
122
|
private:
|
123
|
-
const uint8_t*
|
124
|
-
|
125
|
-
|
126
|
-
target_hll_type
|
127
|
-
const AuxHashMap<A>*
|
128
|
-
uint8_t
|
129
|
-
bool
|
130
|
-
uint8_t
|
131
|
-
static inline uint8_t get_value(const uint8_t* array,
|
123
|
+
const uint8_t* array_;
|
124
|
+
uint32_t array_size_;
|
125
|
+
uint32_t index_;
|
126
|
+
target_hll_type hll_type_;
|
127
|
+
const AuxHashMap<A>* exceptions_;
|
128
|
+
uint8_t offset_;
|
129
|
+
bool all_;
|
130
|
+
uint8_t value_; // cached value to avoid computing in operator++ and in operator*()
|
131
|
+
static inline uint8_t get_value(const uint8_t* array, uint32_t index, target_hll_type hll_type, const AuxHashMap<A>* exceptions, uint8_t offset);
|
132
132
|
};
|
133
133
|
|
134
134
|
}
|
@@ -42,28 +42,26 @@ typedef union {
|
|
42
42
|
} longDoubleUnion;
|
43
43
|
|
44
44
|
template<typename A>
|
45
|
-
hll_sketch_alloc<A>::hll_sketch_alloc(
|
45
|
+
hll_sketch_alloc<A>::hll_sketch_alloc(uint8_t lg_config_k, target_hll_type tgt_type, bool start_full_size, const A& allocator) {
|
46
46
|
HllUtil<A>::checkLgK(lg_config_k);
|
47
47
|
if (start_full_size) {
|
48
|
-
sketch_impl = HllSketchImplFactory<A>::newHll(lg_config_k, tgt_type, start_full_size);
|
48
|
+
sketch_impl = HllSketchImplFactory<A>::newHll(lg_config_k, tgt_type, start_full_size, allocator);
|
49
49
|
} else {
|
50
50
|
typedef typename std::allocator_traits<A>::template rebind_alloc<CouponList<A>> clAlloc;
|
51
|
-
sketch_impl = new (clAlloc().allocate(1)) CouponList<A>(lg_config_k, tgt_type, hll_mode::LIST);
|
51
|
+
sketch_impl = new (clAlloc(allocator).allocate(1)) CouponList<A>(lg_config_k, tgt_type, hll_mode::LIST, allocator);
|
52
52
|
}
|
53
53
|
}
|
54
54
|
|
55
55
|
template<typename A>
|
56
|
-
hll_sketch_alloc<A> hll_sketch_alloc<A>::deserialize(std::istream& is) {
|
57
|
-
HllSketchImpl<A>* impl = HllSketchImplFactory<A>::deserialize(is);
|
58
|
-
hll_sketch_alloc<A>
|
59
|
-
return sketch;
|
56
|
+
hll_sketch_alloc<A> hll_sketch_alloc<A>::deserialize(std::istream& is, const A& allocator) {
|
57
|
+
HllSketchImpl<A>* impl = HllSketchImplFactory<A>::deserialize(is, allocator);
|
58
|
+
return hll_sketch_alloc<A>(impl);
|
60
59
|
}
|
61
60
|
|
62
61
|
template<typename A>
|
63
|
-
hll_sketch_alloc<A> hll_sketch_alloc<A>::deserialize(const void* bytes, size_t len) {
|
64
|
-
HllSketchImpl<A>* impl = HllSketchImplFactory<A>::deserialize(bytes, len);
|
65
|
-
hll_sketch_alloc<A>
|
66
|
-
return sketch;
|
62
|
+
hll_sketch_alloc<A> hll_sketch_alloc<A>::deserialize(const void* bytes, size_t len, const A& allocator) {
|
63
|
+
HllSketchImpl<A>* impl = HllSketchImplFactory<A>::deserialize(bytes, len, allocator);
|
64
|
+
return hll_sketch_alloc<A>(impl);
|
67
65
|
}
|
68
66
|
|
69
67
|
template<typename A>
|
@@ -124,7 +122,7 @@ void hll_sketch_alloc<A>::update(const std::string& datum) {
|
|
124
122
|
}
|
125
123
|
|
126
124
|
template<typename A>
|
127
|
-
void hll_sketch_alloc<A>::update(
|
125
|
+
void hll_sketch_alloc<A>::update(uint64_t datum) {
|
128
126
|
// no sign extension with 64 bits so no need to cast to signed value
|
129
127
|
HashState hashResult;
|
130
128
|
HllUtil<A>::hash(&datum, sizeof(uint64_t), DEFAULT_SEED, hashResult);
|
@@ -132,53 +130,53 @@ void hll_sketch_alloc<A>::update(const uint64_t datum) {
|
|
132
130
|
}
|
133
131
|
|
134
132
|
template<typename A>
|
135
|
-
void hll_sketch_alloc<A>::update(
|
133
|
+
void hll_sketch_alloc<A>::update(uint32_t datum) {
|
136
134
|
update(static_cast<int32_t>(datum));
|
137
135
|
}
|
138
136
|
|
139
137
|
template<typename A>
|
140
|
-
void hll_sketch_alloc<A>::update(
|
138
|
+
void hll_sketch_alloc<A>::update(uint16_t datum) {
|
141
139
|
update(static_cast<int16_t>(datum));
|
142
140
|
}
|
143
141
|
|
144
142
|
template<typename A>
|
145
|
-
void hll_sketch_alloc<A>::update(
|
143
|
+
void hll_sketch_alloc<A>::update(uint8_t datum) {
|
146
144
|
update(static_cast<int8_t>(datum));
|
147
145
|
}
|
148
146
|
|
149
147
|
template<typename A>
|
150
|
-
void hll_sketch_alloc<A>::update(
|
148
|
+
void hll_sketch_alloc<A>::update(int64_t datum) {
|
151
149
|
HashState hashResult;
|
152
150
|
HllUtil<A>::hash(&datum, sizeof(int64_t), DEFAULT_SEED, hashResult);
|
153
151
|
coupon_update(HllUtil<A>::coupon(hashResult));
|
154
152
|
}
|
155
153
|
|
156
154
|
template<typename A>
|
157
|
-
void hll_sketch_alloc<A>::update(
|
158
|
-
int64_t val = static_cast<int64_t>(datum);
|
155
|
+
void hll_sketch_alloc<A>::update(int32_t datum) {
|
156
|
+
const int64_t val = static_cast<int64_t>(datum);
|
159
157
|
HashState hashResult;
|
160
158
|
HllUtil<A>::hash(&val, sizeof(int64_t), DEFAULT_SEED, hashResult);
|
161
159
|
coupon_update(HllUtil<A>::coupon(hashResult));
|
162
160
|
}
|
163
161
|
|
164
162
|
template<typename A>
|
165
|
-
void hll_sketch_alloc<A>::update(
|
166
|
-
int64_t val = static_cast<int64_t>(datum);
|
163
|
+
void hll_sketch_alloc<A>::update(int16_t datum) {
|
164
|
+
const int64_t val = static_cast<int64_t>(datum);
|
167
165
|
HashState hashResult;
|
168
166
|
HllUtil<A>::hash(&val, sizeof(int64_t), DEFAULT_SEED, hashResult);
|
169
167
|
coupon_update(HllUtil<A>::coupon(hashResult));
|
170
168
|
}
|
171
169
|
|
172
170
|
template<typename A>
|
173
|
-
void hll_sketch_alloc<A>::update(
|
174
|
-
int64_t val = static_cast<int64_t>(datum);
|
171
|
+
void hll_sketch_alloc<A>::update(int8_t datum) {
|
172
|
+
const int64_t val = static_cast<int64_t>(datum);
|
175
173
|
HashState hashResult;
|
176
174
|
HllUtil<A>::hash(&val, sizeof(int64_t), DEFAULT_SEED, hashResult);
|
177
175
|
coupon_update(HllUtil<A>::coupon(hashResult));
|
178
176
|
}
|
179
177
|
|
180
178
|
template<typename A>
|
181
|
-
void hll_sketch_alloc<A>::update(
|
179
|
+
void hll_sketch_alloc<A>::update(double datum) {
|
182
180
|
longDoubleUnion d;
|
183
181
|
d.doubleBytes = static_cast<double>(datum);
|
184
182
|
if (datum == 0.0) {
|
@@ -192,7 +190,7 @@ void hll_sketch_alloc<A>::update(const double datum) {
|
|
192
190
|
}
|
193
191
|
|
194
192
|
template<typename A>
|
195
|
-
void hll_sketch_alloc<A>::update(
|
193
|
+
void hll_sketch_alloc<A>::update(float datum) {
|
196
194
|
longDoubleUnion d;
|
197
195
|
d.doubleBytes = static_cast<double>(datum);
|
198
196
|
if (datum == 0.0) {
|
@@ -206,7 +204,7 @@ void hll_sketch_alloc<A>::update(const float datum) {
|
|
206
204
|
}
|
207
205
|
|
208
206
|
template<typename A>
|
209
|
-
void hll_sketch_alloc<A>::update(const void* data,
|
207
|
+
void hll_sketch_alloc<A>::update(const void* data, size_t lengthBytes) {
|
210
208
|
if (data == nullptr) { return; }
|
211
209
|
HashState hashResult;
|
212
210
|
HllUtil<A>::hash(data, lengthBytes, DEFAULT_SEED, hashResult);
|
@@ -214,8 +212,8 @@ void hll_sketch_alloc<A>::update(const void* data, const size_t lengthBytes) {
|
|
214
212
|
}
|
215
213
|
|
216
214
|
template<typename A>
|
217
|
-
void hll_sketch_alloc<A>::coupon_update(
|
218
|
-
if (coupon ==
|
215
|
+
void hll_sketch_alloc<A>::coupon_update(uint32_t coupon) {
|
216
|
+
if (coupon == hll_constants::EMPTY) { return; }
|
219
217
|
HllSketchImpl<A>* result = this->sketch_impl->couponUpdate(coupon);
|
220
218
|
if (result != this->sketch_impl) {
|
221
219
|
this->sketch_impl->get_deleter()(this->sketch_impl);
|
@@ -354,12 +352,12 @@ double hll_sketch_alloc<A>::get_composite_estimate() const {
|
|
354
352
|
}
|
355
353
|
|
356
354
|
template<typename A>
|
357
|
-
double hll_sketch_alloc<A>::get_lower_bound(
|
355
|
+
double hll_sketch_alloc<A>::get_lower_bound(uint8_t numStdDev) const {
|
358
356
|
return sketch_impl->getLowerBound(numStdDev);
|
359
357
|
}
|
360
358
|
|
361
359
|
template<typename A>
|
362
|
-
double hll_sketch_alloc<A>::get_upper_bound(
|
360
|
+
double hll_sketch_alloc<A>::get_upper_bound(uint8_t numStdDev) const {
|
363
361
|
return sketch_impl->getUpperBound(numStdDev);
|
364
362
|
}
|
365
363
|
|
@@ -369,7 +367,7 @@ hll_mode hll_sketch_alloc<A>::get_current_mode() const {
|
|
369
367
|
}
|
370
368
|
|
371
369
|
template<typename A>
|
372
|
-
|
370
|
+
uint8_t hll_sketch_alloc<A>::get_lg_config_k() const {
|
373
371
|
return sketch_impl->getLgConfigK();
|
374
372
|
}
|
375
373
|
|
@@ -389,12 +387,12 @@ bool hll_sketch_alloc<A>::is_estimation_mode() const {
|
|
389
387
|
}
|
390
388
|
|
391
389
|
template<typename A>
|
392
|
-
|
390
|
+
uint32_t hll_sketch_alloc<A>::get_updatable_serialization_bytes() const {
|
393
391
|
return sketch_impl->getUpdatableSerializationBytes();
|
394
392
|
}
|
395
393
|
|
396
394
|
template<typename A>
|
397
|
-
|
395
|
+
uint32_t hll_sketch_alloc<A>::get_compact_serialization_bytes() const {
|
398
396
|
return sketch_impl->getCompactSerializationBytes();
|
399
397
|
}
|
400
398
|
|
@@ -437,23 +435,23 @@ std::string hll_sketch_alloc<A>::mode_as_string() const {
|
|
437
435
|
}
|
438
436
|
|
439
437
|
template<typename A>
|
440
|
-
|
438
|
+
uint32_t hll_sketch_alloc<A>::get_max_updatable_serialization_bytes(uint8_t lg_config_k,
|
441
439
|
const target_hll_type tgtHllType) {
|
442
|
-
|
440
|
+
uint32_t arrBytes;
|
443
441
|
if (tgtHllType == target_hll_type::HLL_4) {
|
444
|
-
const
|
442
|
+
const uint32_t auxBytes = 4 << hll_constants::LG_AUX_ARR_INTS[lg_config_k];
|
445
443
|
arrBytes = HllArray<A>::hll4ArrBytes(lg_config_k) + auxBytes;
|
446
444
|
} else if (tgtHllType == target_hll_type::HLL_6) {
|
447
445
|
arrBytes = HllArray<A>::hll6ArrBytes(lg_config_k);
|
448
446
|
} else { //HLL_8
|
449
447
|
arrBytes = HllArray<A>::hll8ArrBytes(lg_config_k);
|
450
448
|
}
|
451
|
-
return
|
449
|
+
return hll_constants::HLL_BYTE_ARR_START + arrBytes;
|
452
450
|
}
|
453
451
|
|
454
452
|
template<typename A>
|
455
|
-
double hll_sketch_alloc<A>::get_rel_err(
|
456
|
-
|
453
|
+
double hll_sketch_alloc<A>::get_rel_err(bool upperBound, bool unioned,
|
454
|
+
uint8_t lg_config_k, uint8_t numStdDev) {
|
457
455
|
return HllUtil<A>::getRelErr(upperBound, unioned, lg_config_k, numStdDev);
|
458
456
|
}
|
459
457
|
|
@@ -26,12 +26,12 @@
|
|
26
26
|
namespace datasketches {
|
27
27
|
|
28
28
|
template<typename A>
|
29
|
-
HllSketchImpl<A>::HllSketchImpl(
|
30
|
-
|
31
|
-
:
|
32
|
-
|
33
|
-
|
34
|
-
|
29
|
+
HllSketchImpl<A>::HllSketchImpl(uint8_t lgConfigK, target_hll_type tgtHllType,
|
30
|
+
hll_mode mode, bool startFullSize)
|
31
|
+
: lgConfigK_(lgConfigK),
|
32
|
+
tgtHllType_(tgtHllType),
|
33
|
+
mode_(mode),
|
34
|
+
startFullSize_(startFullSize)
|
35
35
|
{
|
36
36
|
}
|
37
37
|
|
@@ -40,7 +40,7 @@ HllSketchImpl<A>::~HllSketchImpl() {
|
|
40
40
|
}
|
41
41
|
|
42
42
|
template<typename A>
|
43
|
-
target_hll_type HllSketchImpl<A>::extractTgtHllType(
|
43
|
+
target_hll_type HllSketchImpl<A>::extractTgtHllType(uint8_t modeByte) {
|
44
44
|
switch ((modeByte >> 2) & 0x3) {
|
45
45
|
case 0:
|
46
46
|
return target_hll_type::HLL_4;
|
@@ -54,7 +54,7 @@ target_hll_type HllSketchImpl<A>::extractTgtHllType(const uint8_t modeByte) {
|
|
54
54
|
}
|
55
55
|
|
56
56
|
template<typename A>
|
57
|
-
hll_mode HllSketchImpl<A>::extractCurMode(
|
57
|
+
hll_mode HllSketchImpl<A>::extractCurMode(uint8_t modeByte) {
|
58
58
|
switch (modeByte & 0x3) {
|
59
59
|
case 0:
|
60
60
|
return hll_mode::LIST;
|
@@ -68,12 +68,12 @@ hll_mode HllSketchImpl<A>::extractCurMode(const uint8_t modeByte) {
|
|
68
68
|
}
|
69
69
|
|
70
70
|
template<typename A>
|
71
|
-
uint8_t HllSketchImpl<A>::makeFlagsByte(
|
72
|
-
uint8_t flags
|
73
|
-
flags |= (isEmpty() ?
|
74
|
-
flags |= (compact ?
|
75
|
-
flags |= (isOutOfOrderFlag() ?
|
76
|
-
flags |= (
|
71
|
+
uint8_t HllSketchImpl<A>::makeFlagsByte(bool compact) const {
|
72
|
+
uint8_t flags = 0;
|
73
|
+
flags |= (isEmpty() ? hll_constants::EMPTY_FLAG_MASK : 0);
|
74
|
+
flags |= (compact ? hll_constants::COMPACT_FLAG_MASK : 0);
|
75
|
+
flags |= (isOutOfOrderFlag() ? hll_constants::OUT_OF_ORDER_FLAG_MASK : 0);
|
76
|
+
flags |= (startFullSize_ ? hll_constants::FULL_SIZE_FLAG_MASK : 0);
|
77
77
|
return flags;
|
78
78
|
}
|
79
79
|
|
@@ -92,7 +92,7 @@ template<typename A>
|
|
92
92
|
uint8_t HllSketchImpl<A>::makeModeByte() const {
|
93
93
|
uint8_t byte = 0;
|
94
94
|
|
95
|
-
switch (
|
95
|
+
switch (mode_) {
|
96
96
|
case LIST:
|
97
97
|
byte = 0;
|
98
98
|
break;
|
@@ -104,7 +104,7 @@ uint8_t HllSketchImpl<A>::makeModeByte() const {
|
|
104
104
|
break;
|
105
105
|
}
|
106
106
|
|
107
|
-
switch (
|
107
|
+
switch (tgtHllType_) {
|
108
108
|
case HLL_4:
|
109
109
|
byte |= (0 << 2); // for completeness
|
110
110
|
break;
|
@@ -121,27 +121,27 @@ uint8_t HllSketchImpl<A>::makeModeByte() const {
|
|
121
121
|
|
122
122
|
template<typename A>
|
123
123
|
HllSketchImpl<A>* HllSketchImpl<A>::reset() {
|
124
|
-
return HllSketchImplFactory<A>::reset(this,
|
124
|
+
return HllSketchImplFactory<A>::reset(this, startFullSize_);
|
125
125
|
}
|
126
126
|
|
127
127
|
template<typename A>
|
128
128
|
target_hll_type HllSketchImpl<A>::getTgtHllType() const {
|
129
|
-
return
|
129
|
+
return tgtHllType_;
|
130
130
|
}
|
131
131
|
|
132
132
|
template<typename A>
|
133
|
-
|
134
|
-
return
|
133
|
+
uint8_t HllSketchImpl<A>::getLgConfigK() const {
|
134
|
+
return lgConfigK_;
|
135
135
|
}
|
136
136
|
|
137
137
|
template<typename A>
|
138
138
|
hll_mode HllSketchImpl<A>::getCurMode() const {
|
139
|
-
return
|
139
|
+
return mode_;
|
140
140
|
}
|
141
141
|
|
142
142
|
template<typename A>
|
143
143
|
bool HllSketchImpl<A>::isStartFullSize() const {
|
144
|
-
return
|
144
|
+
return startFullSize_;
|
145
145
|
}
|
146
146
|
|
147
147
|
}
|
@@ -27,10 +27,10 @@
|
|
27
27
|
|
28
28
|
namespace datasketches {
|
29
29
|
|
30
|
-
template<typename A
|
30
|
+
template<typename A>
|
31
31
|
class HllSketchImpl {
|
32
32
|
public:
|
33
|
-
HllSketchImpl(
|
33
|
+
HllSketchImpl(uint8_t lgConfigK, target_hll_type tgtHllType, hll_mode mode, bool startFullSize);
|
34
34
|
virtual ~HllSketchImpl();
|
35
35
|
|
36
36
|
virtual void serialize(std::ostream& os, bool compact) const = 0;
|
@@ -42,30 +42,31 @@ class HllSketchImpl {
|
|
42
42
|
|
43
43
|
virtual std::function<void(HllSketchImpl<A>*)> get_deleter() const = 0;
|
44
44
|
|
45
|
-
virtual HllSketchImpl* couponUpdate(
|
45
|
+
virtual HllSketchImpl* couponUpdate(uint32_t coupon) = 0;
|
46
46
|
|
47
47
|
hll_mode getCurMode() const;
|
48
48
|
|
49
49
|
virtual double getEstimate() const = 0;
|
50
50
|
virtual double getCompositeEstimate() const = 0;
|
51
|
-
virtual double getUpperBound(
|
52
|
-
virtual double getLowerBound(
|
51
|
+
virtual double getUpperBound(uint8_t numStdDev) const = 0;
|
52
|
+
virtual double getLowerBound(uint8_t numStdDev) const = 0;
|
53
53
|
|
54
|
-
inline
|
54
|
+
inline uint8_t getLgConfigK() const;
|
55
55
|
|
56
|
-
virtual
|
56
|
+
virtual uint32_t getMemDataStart() const = 0;
|
57
57
|
|
58
|
-
virtual
|
58
|
+
virtual uint8_t getPreInts() const = 0;
|
59
59
|
|
60
60
|
target_hll_type getTgtHllType() const;
|
61
61
|
|
62
|
-
virtual
|
63
|
-
virtual
|
62
|
+
virtual uint32_t getUpdatableSerializationBytes() const = 0;
|
63
|
+
virtual uint32_t getCompactSerializationBytes() const = 0;
|
64
64
|
|
65
65
|
virtual bool isCompact() const = 0;
|
66
66
|
virtual bool isEmpty() const = 0;
|
67
67
|
virtual bool isOutOfOrderFlag() const = 0;
|
68
68
|
virtual void putOutOfOrderFlag(bool oooFlag) = 0;
|
69
|
+
virtual A getAllocator() const = 0;
|
69
70
|
bool isStartFullSize() const;
|
70
71
|
|
71
72
|
protected:
|
@@ -74,10 +75,10 @@ class HllSketchImpl {
|
|
74
75
|
uint8_t makeFlagsByte(bool compact) const;
|
75
76
|
uint8_t makeModeByte() const;
|
76
77
|
|
77
|
-
const
|
78
|
-
const target_hll_type
|
79
|
-
const hll_mode
|
80
|
-
const bool
|
78
|
+
const uint8_t lgConfigK_;
|
79
|
+
const target_hll_type tgtHllType_;
|
80
|
+
const hll_mode mode_;
|
81
|
+
const bool startFullSize_;
|
81
82
|
};
|
82
83
|
|
83
84
|
}
|