datasketches 0.2.1 → 0.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +4 -0
- data/lib/datasketches/version.rb +1 -1
- data/vendor/datasketches-cpp/CMakeLists.txt +7 -0
- data/vendor/datasketches-cpp/common/include/MurmurHash3.h +11 -7
- data/vendor/datasketches-cpp/common/include/binomial_bounds.hpp +8 -8
- data/vendor/datasketches-cpp/common/include/bounds_binomial_proportions.hpp +12 -15
- data/vendor/datasketches-cpp/common/include/common_defs.hpp +24 -0
- data/vendor/datasketches-cpp/common/include/conditional_forward.hpp +20 -8
- data/vendor/datasketches-cpp/common/include/count_zeros.hpp +2 -2
- data/vendor/datasketches-cpp/common/include/serde.hpp +7 -7
- data/vendor/datasketches-cpp/cpc/include/cpc_compressor.hpp +19 -19
- data/vendor/datasketches-cpp/cpc/include/cpc_compressor_impl.hpp +91 -89
- data/vendor/datasketches-cpp/cpc/include/cpc_sketch.hpp +14 -1
- data/vendor/datasketches-cpp/cpc/include/cpc_sketch_impl.hpp +121 -87
- data/vendor/datasketches-cpp/cpc/include/cpc_union_impl.hpp +14 -14
- data/vendor/datasketches-cpp/cpc/include/cpc_util.hpp +10 -10
- data/vendor/datasketches-cpp/cpc/include/icon_estimator.hpp +4 -4
- data/vendor/datasketches-cpp/cpc/include/u32_table.hpp +8 -8
- data/vendor/datasketches-cpp/cpc/include/u32_table_impl.hpp +14 -14
- data/vendor/datasketches-cpp/cpc/test/compression_test.cpp +10 -10
- data/vendor/datasketches-cpp/cpc/test/cpc_sketch_test.cpp +25 -0
- data/vendor/datasketches-cpp/cpc/test/cpc_union_test.cpp +1 -1
- data/vendor/datasketches-cpp/fi/include/frequent_items_sketch_impl.hpp +65 -80
- data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map_impl.hpp +10 -10
- data/vendor/datasketches-cpp/fi/test/reverse_purge_hash_map_test.cpp +2 -2
- data/vendor/datasketches-cpp/hll/include/AuxHashMap-internal.hpp +60 -63
- data/vendor/datasketches-cpp/hll/include/AuxHashMap.hpp +19 -19
- data/vendor/datasketches-cpp/hll/include/CompositeInterpolationXTable-internal.hpp +15 -15
- data/vendor/datasketches-cpp/hll/include/CompositeInterpolationXTable.hpp +3 -3
- data/vendor/datasketches-cpp/hll/include/CouponHashSet-internal.hpp +74 -76
- data/vendor/datasketches-cpp/hll/include/CouponHashSet.hpp +6 -6
- data/vendor/datasketches-cpp/hll/include/CouponList-internal.hpp +110 -113
- data/vendor/datasketches-cpp/hll/include/CouponList.hpp +13 -13
- data/vendor/datasketches-cpp/hll/include/CubicInterpolation-internal.hpp +2 -4
- data/vendor/datasketches-cpp/hll/include/HarmonicNumbers-internal.hpp +1 -1
- data/vendor/datasketches-cpp/hll/include/Hll4Array-internal.hpp +80 -76
- data/vendor/datasketches-cpp/hll/include/Hll4Array.hpp +9 -9
- data/vendor/datasketches-cpp/hll/include/Hll6Array-internal.hpp +26 -26
- data/vendor/datasketches-cpp/hll/include/Hll6Array.hpp +6 -6
- data/vendor/datasketches-cpp/hll/include/Hll8Array-internal.hpp +33 -33
- data/vendor/datasketches-cpp/hll/include/Hll8Array.hpp +6 -6
- data/vendor/datasketches-cpp/hll/include/HllArray-internal.hpp +205 -209
- data/vendor/datasketches-cpp/hll/include/HllArray.hpp +36 -36
- data/vendor/datasketches-cpp/hll/include/HllSketch-internal.hpp +28 -28
- data/vendor/datasketches-cpp/hll/include/HllSketchImpl-internal.hpp +22 -22
- data/vendor/datasketches-cpp/hll/include/HllSketchImpl.hpp +13 -13
- data/vendor/datasketches-cpp/hll/include/HllSketchImplFactory.hpp +15 -15
- data/vendor/datasketches-cpp/hll/include/HllUnion-internal.hpp +61 -61
- data/vendor/datasketches-cpp/hll/include/HllUtil.hpp +120 -127
- data/vendor/datasketches-cpp/hll/include/coupon_iterator-internal.hpp +9 -9
- data/vendor/datasketches-cpp/hll/include/coupon_iterator.hpp +5 -5
- data/vendor/datasketches-cpp/hll/include/hll.hpp +21 -21
- data/vendor/datasketches-cpp/hll/test/AuxHashMapTest.cpp +1 -1
- data/vendor/datasketches-cpp/hll/test/CouponHashSetTest.cpp +34 -34
- data/vendor/datasketches-cpp/hll/test/CouponListTest.cpp +25 -25
- data/vendor/datasketches-cpp/hll/test/CrossCountingTest.cpp +2 -2
- data/vendor/datasketches-cpp/hll/test/HllArrayTest.cpp +35 -35
- data/vendor/datasketches-cpp/hll/test/HllSketchTest.cpp +15 -15
- data/vendor/datasketches-cpp/hll/test/HllUnionTest.cpp +10 -14
- data/vendor/datasketches-cpp/hll/test/IsomorphicTest.cpp +3 -3
- data/vendor/datasketches-cpp/hll/test/ToFromByteArrayTest.cpp +4 -4
- data/vendor/datasketches-cpp/kll/include/kll_helper.hpp +5 -4
- data/vendor/datasketches-cpp/kll/include/kll_helper_impl.hpp +6 -6
- data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator.hpp +14 -6
- data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator_impl.hpp +39 -24
- data/vendor/datasketches-cpp/kll/include/kll_sketch.hpp +34 -2
- data/vendor/datasketches-cpp/kll/include/kll_sketch_impl.hpp +72 -62
- data/vendor/datasketches-cpp/kll/include/kolmogorov_smirnov.hpp +67 -0
- data/vendor/datasketches-cpp/kll/include/kolmogorov_smirnov_impl.hpp +78 -0
- data/vendor/datasketches-cpp/kll/test/CMakeLists.txt +1 -0
- data/vendor/datasketches-cpp/kll/test/kll_sketch_test.cpp +68 -45
- data/vendor/datasketches-cpp/kll/test/kolmogorov_smirnov_test.cpp +111 -0
- data/vendor/datasketches-cpp/python/src/hll_wrapper.cpp +4 -4
- data/vendor/datasketches-cpp/python/src/theta_wrapper.cpp +6 -6
- data/vendor/datasketches-cpp/python/src/vo_wrapper.cpp +2 -2
- data/vendor/datasketches-cpp/python/tests/hll_test.py +1 -1
- data/vendor/datasketches-cpp/python/tests/vo_test.py +3 -3
- data/vendor/datasketches-cpp/req/include/req_common.hpp +2 -1
- data/vendor/datasketches-cpp/req/include/req_compactor.hpp +4 -4
- data/vendor/datasketches-cpp/req/include/req_compactor_impl.hpp +26 -39
- data/vendor/datasketches-cpp/req/include/req_sketch.hpp +1 -1
- data/vendor/datasketches-cpp/req/include/req_sketch_impl.hpp +9 -9
- data/vendor/datasketches-cpp/req/test/req_sketch_test.cpp +52 -52
- data/vendor/datasketches-cpp/sampling/include/var_opt_sketch_impl.hpp +47 -56
- data/vendor/datasketches-cpp/sampling/include/var_opt_union_impl.hpp +34 -42
- data/vendor/datasketches-cpp/sampling/test/var_opt_sketch_test.cpp +6 -6
- data/vendor/datasketches-cpp/sampling/test/var_opt_union_test.cpp +13 -13
- data/vendor/datasketches-cpp/setup.py +1 -1
- data/vendor/datasketches-cpp/theta/include/bounds_on_ratios_in_sampled_sets.hpp +1 -1
- data/vendor/datasketches-cpp/theta/include/compact_theta_sketch_parser.hpp +67 -0
- data/vendor/datasketches-cpp/theta/include/compact_theta_sketch_parser_impl.hpp +70 -0
- data/vendor/datasketches-cpp/theta/include/theta_intersection.hpp +9 -4
- data/vendor/datasketches-cpp/theta/include/theta_intersection_impl.hpp +1 -1
- data/vendor/datasketches-cpp/theta/include/theta_jaccard_similarity_base.hpp +18 -14
- data/vendor/datasketches-cpp/theta/include/theta_sketch.hpp +42 -1
- data/vendor/datasketches-cpp/theta/include/theta_sketch_impl.hpp +107 -58
- data/vendor/datasketches-cpp/theta/include/theta_union.hpp +4 -4
- data/vendor/datasketches-cpp/theta/include/theta_union_base_impl.hpp +1 -1
- data/vendor/datasketches-cpp/theta/include/theta_union_impl.hpp +1 -1
- data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base.hpp +2 -0
- data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base_impl.hpp +33 -28
- data/vendor/datasketches-cpp/theta/test/theta_a_not_b_test.cpp +23 -1
- data/vendor/datasketches-cpp/theta/test/theta_intersection_test.cpp +21 -1
- data/vendor/datasketches-cpp/theta/test/theta_jaccard_similarity_test.cpp +58 -2
- data/vendor/datasketches-cpp/theta/test/theta_sketch_test.cpp +37 -1
- data/vendor/datasketches-cpp/theta/test/theta_union_test.cpp +22 -2
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_sketch_impl.hpp +47 -60
- data/vendor/datasketches-cpp/tuple/include/tuple_sketch_impl.hpp +51 -64
- data/vendor/datasketches-cpp/tuple/test/array_of_doubles_sketch_test.cpp +1 -1
- data/vendor/datasketches-cpp/tuple/test/tuple_a_not_b_test.cpp +17 -17
- data/vendor/datasketches-cpp/tuple/test/tuple_intersection_test.cpp +12 -12
- data/vendor/datasketches-cpp/tuple/test/tuple_jaccard_similarity_test.cpp +5 -5
- data/vendor/datasketches-cpp/tuple/test/tuple_sketch_allocation_test.cpp +1 -1
- data/vendor/datasketches-cpp/tuple/test/tuple_sketch_test.cpp +20 -20
- data/vendor/datasketches-cpp/tuple/test/tuple_union_test.cpp +12 -12
- metadata +8 -3
|
@@ -31,7 +31,7 @@ class AuxHashMap;
|
|
|
31
31
|
template<typename A>
|
|
32
32
|
class HllArray : public HllSketchImpl<A> {
|
|
33
33
|
public:
|
|
34
|
-
HllArray(
|
|
34
|
+
HllArray(uint8_t lgConfigK, target_hll_type tgtHllType, bool startFullSize, const A& allocator);
|
|
35
35
|
|
|
36
36
|
static HllArray* newHll(const void* bytes, size_t len, const A& allocator);
|
|
37
37
|
static HllArray* newHll(std::istream& is, const A& allocator);
|
|
@@ -45,25 +45,25 @@ class HllArray : public HllSketchImpl<A> {
|
|
|
45
45
|
virtual HllArray* copy() const = 0;
|
|
46
46
|
virtual HllArray* copyAs(target_hll_type tgtHllType) const;
|
|
47
47
|
|
|
48
|
-
virtual HllSketchImpl<A>* couponUpdate(
|
|
48
|
+
virtual HllSketchImpl<A>* couponUpdate(uint32_t coupon) = 0;
|
|
49
49
|
|
|
50
50
|
virtual double getEstimate() const;
|
|
51
51
|
virtual double getCompositeEstimate() const;
|
|
52
|
-
virtual double getLowerBound(
|
|
53
|
-
virtual double getUpperBound(
|
|
52
|
+
virtual double getLowerBound(uint8_t numStdDev) const;
|
|
53
|
+
virtual double getUpperBound(uint8_t numStdDev) const;
|
|
54
54
|
|
|
55
55
|
inline void addToHipAccum(double delta);
|
|
56
56
|
|
|
57
57
|
inline void decNumAtCurMin();
|
|
58
58
|
|
|
59
|
-
inline
|
|
60
|
-
inline
|
|
59
|
+
inline uint8_t getCurMin() const;
|
|
60
|
+
inline uint32_t getNumAtCurMin() const;
|
|
61
61
|
inline double getHipAccum() const;
|
|
62
62
|
|
|
63
|
-
virtual
|
|
63
|
+
virtual uint32_t getHllByteArrBytes() const = 0;
|
|
64
64
|
|
|
65
|
-
virtual
|
|
66
|
-
virtual
|
|
65
|
+
virtual uint32_t getUpdatableSerializationBytes() const;
|
|
66
|
+
virtual uint32_t getCompactSerializationBytes() const;
|
|
67
67
|
|
|
68
68
|
virtual bool isOutOfOrderFlag() const;
|
|
69
69
|
virtual bool isEmpty() const;
|
|
@@ -74,19 +74,19 @@ class HllArray : public HllSketchImpl<A> {
|
|
|
74
74
|
inline double getKxQ0() const;
|
|
75
75
|
inline double getKxQ1() const;
|
|
76
76
|
|
|
77
|
-
virtual
|
|
78
|
-
virtual
|
|
77
|
+
virtual uint32_t getMemDataStart() const;
|
|
78
|
+
virtual uint8_t getPreInts() const;
|
|
79
79
|
|
|
80
|
-
void putCurMin(
|
|
80
|
+
void putCurMin(uint8_t curMin);
|
|
81
81
|
void putHipAccum(double hipAccum);
|
|
82
82
|
inline void putKxQ0(double kxq0);
|
|
83
83
|
inline void putKxQ1(double kxq1);
|
|
84
|
-
void putNumAtCurMin(
|
|
84
|
+
void putNumAtCurMin(uint32_t numAtCurMin);
|
|
85
85
|
|
|
86
|
-
static
|
|
87
|
-
static
|
|
88
|
-
static
|
|
89
|
-
static
|
|
86
|
+
static uint32_t hllArrBytes(target_hll_type tgtHllType, uint8_t lgConfigK);
|
|
87
|
+
static uint32_t hll4ArrBytes(uint8_t lgConfigK);
|
|
88
|
+
static uint32_t hll6ArrBytes(uint8_t lgConfigK);
|
|
89
|
+
static uint32_t hll8ArrBytes(uint8_t lgConfigK);
|
|
90
90
|
|
|
91
91
|
virtual AuxHashMap<A>* getAuxHashMap() const;
|
|
92
92
|
|
|
@@ -98,16 +98,16 @@ class HllArray : public HllSketchImpl<A> {
|
|
|
98
98
|
|
|
99
99
|
protected:
|
|
100
100
|
void hipAndKxQIncrementalUpdate(uint8_t oldValue, uint8_t newValue);
|
|
101
|
-
double getHllBitMapEstimate(
|
|
102
|
-
double getHllRawEstimate(
|
|
101
|
+
double getHllBitMapEstimate() const;
|
|
102
|
+
double getHllRawEstimate() const;
|
|
103
103
|
|
|
104
|
-
double
|
|
105
|
-
double
|
|
106
|
-
double
|
|
107
|
-
vector_u8<A>
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
bool
|
|
104
|
+
double hipAccum_;
|
|
105
|
+
double kxq0_;
|
|
106
|
+
double kxq1_;
|
|
107
|
+
vector_u8<A> hllByteArr_; //init by sub-classes
|
|
108
|
+
uint8_t curMin_; //always zero for Hll6 and Hll8, only tracked by Hll4Array
|
|
109
|
+
uint32_t numAtCurMin_; //interpreted as num zeros when curMin == 0
|
|
110
|
+
bool oooFlag_; //Out-Of-Order Flag
|
|
111
111
|
|
|
112
112
|
friend class HllSketchImplFactory<A>;
|
|
113
113
|
};
|
|
@@ -115,20 +115,20 @@ class HllArray : public HllSketchImpl<A> {
|
|
|
115
115
|
template<typename A>
|
|
116
116
|
class HllArray<A>::const_iterator: public std::iterator<std::input_iterator_tag, uint32_t> {
|
|
117
117
|
public:
|
|
118
|
-
const_iterator(const uint8_t* array,
|
|
118
|
+
const_iterator(const uint8_t* array, uint32_t array_slze, uint32_t index, target_hll_type hll_type, const AuxHashMap<A>* exceptions, uint8_t offset, bool all);
|
|
119
119
|
const_iterator& operator++();
|
|
120
120
|
bool operator!=(const const_iterator& other) const;
|
|
121
121
|
uint32_t operator*() const;
|
|
122
122
|
private:
|
|
123
|
-
const uint8_t*
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
target_hll_type
|
|
127
|
-
const AuxHashMap<A>*
|
|
128
|
-
uint8_t
|
|
129
|
-
bool
|
|
130
|
-
uint8_t
|
|
131
|
-
static inline uint8_t get_value(const uint8_t* array,
|
|
123
|
+
const uint8_t* array_;
|
|
124
|
+
uint32_t array_size_;
|
|
125
|
+
uint32_t index_;
|
|
126
|
+
target_hll_type hll_type_;
|
|
127
|
+
const AuxHashMap<A>* exceptions_;
|
|
128
|
+
uint8_t offset_;
|
|
129
|
+
bool all_;
|
|
130
|
+
uint8_t value_; // cached value to avoid computing in operator++ and in operator*()
|
|
131
|
+
static inline uint8_t get_value(const uint8_t* array, uint32_t index, target_hll_type hll_type, const AuxHashMap<A>* exceptions, uint8_t offset);
|
|
132
132
|
};
|
|
133
133
|
|
|
134
134
|
}
|
|
@@ -42,7 +42,7 @@ typedef union {
|
|
|
42
42
|
} longDoubleUnion;
|
|
43
43
|
|
|
44
44
|
template<typename A>
|
|
45
|
-
hll_sketch_alloc<A>::hll_sketch_alloc(
|
|
45
|
+
hll_sketch_alloc<A>::hll_sketch_alloc(uint8_t lg_config_k, target_hll_type tgt_type, bool start_full_size, const A& allocator) {
|
|
46
46
|
HllUtil<A>::checkLgK(lg_config_k);
|
|
47
47
|
if (start_full_size) {
|
|
48
48
|
sketch_impl = HllSketchImplFactory<A>::newHll(lg_config_k, tgt_type, start_full_size, allocator);
|
|
@@ -122,7 +122,7 @@ void hll_sketch_alloc<A>::update(const std::string& datum) {
|
|
|
122
122
|
}
|
|
123
123
|
|
|
124
124
|
template<typename A>
|
|
125
|
-
void hll_sketch_alloc<A>::update(
|
|
125
|
+
void hll_sketch_alloc<A>::update(uint64_t datum) {
|
|
126
126
|
// no sign extension with 64 bits so no need to cast to signed value
|
|
127
127
|
HashState hashResult;
|
|
128
128
|
HllUtil<A>::hash(&datum, sizeof(uint64_t), DEFAULT_SEED, hashResult);
|
|
@@ -130,53 +130,53 @@ void hll_sketch_alloc<A>::update(const uint64_t datum) {
|
|
|
130
130
|
}
|
|
131
131
|
|
|
132
132
|
template<typename A>
|
|
133
|
-
void hll_sketch_alloc<A>::update(
|
|
133
|
+
void hll_sketch_alloc<A>::update(uint32_t datum) {
|
|
134
134
|
update(static_cast<int32_t>(datum));
|
|
135
135
|
}
|
|
136
136
|
|
|
137
137
|
template<typename A>
|
|
138
|
-
void hll_sketch_alloc<A>::update(
|
|
138
|
+
void hll_sketch_alloc<A>::update(uint16_t datum) {
|
|
139
139
|
update(static_cast<int16_t>(datum));
|
|
140
140
|
}
|
|
141
141
|
|
|
142
142
|
template<typename A>
|
|
143
|
-
void hll_sketch_alloc<A>::update(
|
|
143
|
+
void hll_sketch_alloc<A>::update(uint8_t datum) {
|
|
144
144
|
update(static_cast<int8_t>(datum));
|
|
145
145
|
}
|
|
146
146
|
|
|
147
147
|
template<typename A>
|
|
148
|
-
void hll_sketch_alloc<A>::update(
|
|
148
|
+
void hll_sketch_alloc<A>::update(int64_t datum) {
|
|
149
149
|
HashState hashResult;
|
|
150
150
|
HllUtil<A>::hash(&datum, sizeof(int64_t), DEFAULT_SEED, hashResult);
|
|
151
151
|
coupon_update(HllUtil<A>::coupon(hashResult));
|
|
152
152
|
}
|
|
153
153
|
|
|
154
154
|
template<typename A>
|
|
155
|
-
void hll_sketch_alloc<A>::update(
|
|
156
|
-
int64_t val = static_cast<int64_t>(datum);
|
|
155
|
+
void hll_sketch_alloc<A>::update(int32_t datum) {
|
|
156
|
+
const int64_t val = static_cast<int64_t>(datum);
|
|
157
157
|
HashState hashResult;
|
|
158
158
|
HllUtil<A>::hash(&val, sizeof(int64_t), DEFAULT_SEED, hashResult);
|
|
159
159
|
coupon_update(HllUtil<A>::coupon(hashResult));
|
|
160
160
|
}
|
|
161
161
|
|
|
162
162
|
template<typename A>
|
|
163
|
-
void hll_sketch_alloc<A>::update(
|
|
164
|
-
int64_t val = static_cast<int64_t>(datum);
|
|
163
|
+
void hll_sketch_alloc<A>::update(int16_t datum) {
|
|
164
|
+
const int64_t val = static_cast<int64_t>(datum);
|
|
165
165
|
HashState hashResult;
|
|
166
166
|
HllUtil<A>::hash(&val, sizeof(int64_t), DEFAULT_SEED, hashResult);
|
|
167
167
|
coupon_update(HllUtil<A>::coupon(hashResult));
|
|
168
168
|
}
|
|
169
169
|
|
|
170
170
|
template<typename A>
|
|
171
|
-
void hll_sketch_alloc<A>::update(
|
|
172
|
-
int64_t val = static_cast<int64_t>(datum);
|
|
171
|
+
void hll_sketch_alloc<A>::update(int8_t datum) {
|
|
172
|
+
const int64_t val = static_cast<int64_t>(datum);
|
|
173
173
|
HashState hashResult;
|
|
174
174
|
HllUtil<A>::hash(&val, sizeof(int64_t), DEFAULT_SEED, hashResult);
|
|
175
175
|
coupon_update(HllUtil<A>::coupon(hashResult));
|
|
176
176
|
}
|
|
177
177
|
|
|
178
178
|
template<typename A>
|
|
179
|
-
void hll_sketch_alloc<A>::update(
|
|
179
|
+
void hll_sketch_alloc<A>::update(double datum) {
|
|
180
180
|
longDoubleUnion d;
|
|
181
181
|
d.doubleBytes = static_cast<double>(datum);
|
|
182
182
|
if (datum == 0.0) {
|
|
@@ -190,7 +190,7 @@ void hll_sketch_alloc<A>::update(const double datum) {
|
|
|
190
190
|
}
|
|
191
191
|
|
|
192
192
|
template<typename A>
|
|
193
|
-
void hll_sketch_alloc<A>::update(
|
|
193
|
+
void hll_sketch_alloc<A>::update(float datum) {
|
|
194
194
|
longDoubleUnion d;
|
|
195
195
|
d.doubleBytes = static_cast<double>(datum);
|
|
196
196
|
if (datum == 0.0) {
|
|
@@ -204,7 +204,7 @@ void hll_sketch_alloc<A>::update(const float datum) {
|
|
|
204
204
|
}
|
|
205
205
|
|
|
206
206
|
template<typename A>
|
|
207
|
-
void hll_sketch_alloc<A>::update(const void* data,
|
|
207
|
+
void hll_sketch_alloc<A>::update(const void* data, size_t lengthBytes) {
|
|
208
208
|
if (data == nullptr) { return; }
|
|
209
209
|
HashState hashResult;
|
|
210
210
|
HllUtil<A>::hash(data, lengthBytes, DEFAULT_SEED, hashResult);
|
|
@@ -212,8 +212,8 @@ void hll_sketch_alloc<A>::update(const void* data, const size_t lengthBytes) {
|
|
|
212
212
|
}
|
|
213
213
|
|
|
214
214
|
template<typename A>
|
|
215
|
-
void hll_sketch_alloc<A>::coupon_update(
|
|
216
|
-
if (coupon ==
|
|
215
|
+
void hll_sketch_alloc<A>::coupon_update(uint32_t coupon) {
|
|
216
|
+
if (coupon == hll_constants::EMPTY) { return; }
|
|
217
217
|
HllSketchImpl<A>* result = this->sketch_impl->couponUpdate(coupon);
|
|
218
218
|
if (result != this->sketch_impl) {
|
|
219
219
|
this->sketch_impl->get_deleter()(this->sketch_impl);
|
|
@@ -352,12 +352,12 @@ double hll_sketch_alloc<A>::get_composite_estimate() const {
|
|
|
352
352
|
}
|
|
353
353
|
|
|
354
354
|
template<typename A>
|
|
355
|
-
double hll_sketch_alloc<A>::get_lower_bound(
|
|
355
|
+
double hll_sketch_alloc<A>::get_lower_bound(uint8_t numStdDev) const {
|
|
356
356
|
return sketch_impl->getLowerBound(numStdDev);
|
|
357
357
|
}
|
|
358
358
|
|
|
359
359
|
template<typename A>
|
|
360
|
-
double hll_sketch_alloc<A>::get_upper_bound(
|
|
360
|
+
double hll_sketch_alloc<A>::get_upper_bound(uint8_t numStdDev) const {
|
|
361
361
|
return sketch_impl->getUpperBound(numStdDev);
|
|
362
362
|
}
|
|
363
363
|
|
|
@@ -367,7 +367,7 @@ hll_mode hll_sketch_alloc<A>::get_current_mode() const {
|
|
|
367
367
|
}
|
|
368
368
|
|
|
369
369
|
template<typename A>
|
|
370
|
-
|
|
370
|
+
uint8_t hll_sketch_alloc<A>::get_lg_config_k() const {
|
|
371
371
|
return sketch_impl->getLgConfigK();
|
|
372
372
|
}
|
|
373
373
|
|
|
@@ -387,12 +387,12 @@ bool hll_sketch_alloc<A>::is_estimation_mode() const {
|
|
|
387
387
|
}
|
|
388
388
|
|
|
389
389
|
template<typename A>
|
|
390
|
-
|
|
390
|
+
uint32_t hll_sketch_alloc<A>::get_updatable_serialization_bytes() const {
|
|
391
391
|
return sketch_impl->getUpdatableSerializationBytes();
|
|
392
392
|
}
|
|
393
393
|
|
|
394
394
|
template<typename A>
|
|
395
|
-
|
|
395
|
+
uint32_t hll_sketch_alloc<A>::get_compact_serialization_bytes() const {
|
|
396
396
|
return sketch_impl->getCompactSerializationBytes();
|
|
397
397
|
}
|
|
398
398
|
|
|
@@ -435,23 +435,23 @@ std::string hll_sketch_alloc<A>::mode_as_string() const {
|
|
|
435
435
|
}
|
|
436
436
|
|
|
437
437
|
template<typename A>
|
|
438
|
-
|
|
438
|
+
uint32_t hll_sketch_alloc<A>::get_max_updatable_serialization_bytes(uint8_t lg_config_k,
|
|
439
439
|
const target_hll_type tgtHllType) {
|
|
440
|
-
|
|
440
|
+
uint32_t arrBytes;
|
|
441
441
|
if (tgtHllType == target_hll_type::HLL_4) {
|
|
442
|
-
const
|
|
442
|
+
const uint32_t auxBytes = 4 << hll_constants::LG_AUX_ARR_INTS[lg_config_k];
|
|
443
443
|
arrBytes = HllArray<A>::hll4ArrBytes(lg_config_k) + auxBytes;
|
|
444
444
|
} else if (tgtHllType == target_hll_type::HLL_6) {
|
|
445
445
|
arrBytes = HllArray<A>::hll6ArrBytes(lg_config_k);
|
|
446
446
|
} else { //HLL_8
|
|
447
447
|
arrBytes = HllArray<A>::hll8ArrBytes(lg_config_k);
|
|
448
448
|
}
|
|
449
|
-
return
|
|
449
|
+
return hll_constants::HLL_BYTE_ARR_START + arrBytes;
|
|
450
450
|
}
|
|
451
451
|
|
|
452
452
|
template<typename A>
|
|
453
|
-
double hll_sketch_alloc<A>::get_rel_err(
|
|
454
|
-
|
|
453
|
+
double hll_sketch_alloc<A>::get_rel_err(bool upperBound, bool unioned,
|
|
454
|
+
uint8_t lg_config_k, uint8_t numStdDev) {
|
|
455
455
|
return HllUtil<A>::getRelErr(upperBound, unioned, lg_config_k, numStdDev);
|
|
456
456
|
}
|
|
457
457
|
|
|
@@ -26,12 +26,12 @@
|
|
|
26
26
|
namespace datasketches {
|
|
27
27
|
|
|
28
28
|
template<typename A>
|
|
29
|
-
HllSketchImpl<A>::HllSketchImpl(
|
|
30
|
-
|
|
31
|
-
:
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
29
|
+
HllSketchImpl<A>::HllSketchImpl(uint8_t lgConfigK, target_hll_type tgtHllType,
|
|
30
|
+
hll_mode mode, bool startFullSize)
|
|
31
|
+
: lgConfigK_(lgConfigK),
|
|
32
|
+
tgtHllType_(tgtHllType),
|
|
33
|
+
mode_(mode),
|
|
34
|
+
startFullSize_(startFullSize)
|
|
35
35
|
{
|
|
36
36
|
}
|
|
37
37
|
|
|
@@ -40,7 +40,7 @@ HllSketchImpl<A>::~HllSketchImpl() {
|
|
|
40
40
|
}
|
|
41
41
|
|
|
42
42
|
template<typename A>
|
|
43
|
-
target_hll_type HllSketchImpl<A>::extractTgtHllType(
|
|
43
|
+
target_hll_type HllSketchImpl<A>::extractTgtHllType(uint8_t modeByte) {
|
|
44
44
|
switch ((modeByte >> 2) & 0x3) {
|
|
45
45
|
case 0:
|
|
46
46
|
return target_hll_type::HLL_4;
|
|
@@ -54,7 +54,7 @@ target_hll_type HllSketchImpl<A>::extractTgtHllType(const uint8_t modeByte) {
|
|
|
54
54
|
}
|
|
55
55
|
|
|
56
56
|
template<typename A>
|
|
57
|
-
hll_mode HllSketchImpl<A>::extractCurMode(
|
|
57
|
+
hll_mode HllSketchImpl<A>::extractCurMode(uint8_t modeByte) {
|
|
58
58
|
switch (modeByte & 0x3) {
|
|
59
59
|
case 0:
|
|
60
60
|
return hll_mode::LIST;
|
|
@@ -68,12 +68,12 @@ hll_mode HllSketchImpl<A>::extractCurMode(const uint8_t modeByte) {
|
|
|
68
68
|
}
|
|
69
69
|
|
|
70
70
|
template<typename A>
|
|
71
|
-
uint8_t HllSketchImpl<A>::makeFlagsByte(
|
|
72
|
-
uint8_t flags
|
|
73
|
-
flags |= (isEmpty() ?
|
|
74
|
-
flags |= (compact ?
|
|
75
|
-
flags |= (isOutOfOrderFlag() ?
|
|
76
|
-
flags |= (
|
|
71
|
+
uint8_t HllSketchImpl<A>::makeFlagsByte(bool compact) const {
|
|
72
|
+
uint8_t flags = 0;
|
|
73
|
+
flags |= (isEmpty() ? hll_constants::EMPTY_FLAG_MASK : 0);
|
|
74
|
+
flags |= (compact ? hll_constants::COMPACT_FLAG_MASK : 0);
|
|
75
|
+
flags |= (isOutOfOrderFlag() ? hll_constants::OUT_OF_ORDER_FLAG_MASK : 0);
|
|
76
|
+
flags |= (startFullSize_ ? hll_constants::FULL_SIZE_FLAG_MASK : 0);
|
|
77
77
|
return flags;
|
|
78
78
|
}
|
|
79
79
|
|
|
@@ -92,7 +92,7 @@ template<typename A>
|
|
|
92
92
|
uint8_t HllSketchImpl<A>::makeModeByte() const {
|
|
93
93
|
uint8_t byte = 0;
|
|
94
94
|
|
|
95
|
-
switch (
|
|
95
|
+
switch (mode_) {
|
|
96
96
|
case LIST:
|
|
97
97
|
byte = 0;
|
|
98
98
|
break;
|
|
@@ -104,7 +104,7 @@ uint8_t HllSketchImpl<A>::makeModeByte() const {
|
|
|
104
104
|
break;
|
|
105
105
|
}
|
|
106
106
|
|
|
107
|
-
switch (
|
|
107
|
+
switch (tgtHllType_) {
|
|
108
108
|
case HLL_4:
|
|
109
109
|
byte |= (0 << 2); // for completeness
|
|
110
110
|
break;
|
|
@@ -121,27 +121,27 @@ uint8_t HllSketchImpl<A>::makeModeByte() const {
|
|
|
121
121
|
|
|
122
122
|
template<typename A>
|
|
123
123
|
HllSketchImpl<A>* HllSketchImpl<A>::reset() {
|
|
124
|
-
return HllSketchImplFactory<A>::reset(this,
|
|
124
|
+
return HllSketchImplFactory<A>::reset(this, startFullSize_);
|
|
125
125
|
}
|
|
126
126
|
|
|
127
127
|
template<typename A>
|
|
128
128
|
target_hll_type HllSketchImpl<A>::getTgtHllType() const {
|
|
129
|
-
return
|
|
129
|
+
return tgtHllType_;
|
|
130
130
|
}
|
|
131
131
|
|
|
132
132
|
template<typename A>
|
|
133
|
-
|
|
134
|
-
return
|
|
133
|
+
uint8_t HllSketchImpl<A>::getLgConfigK() const {
|
|
134
|
+
return lgConfigK_;
|
|
135
135
|
}
|
|
136
136
|
|
|
137
137
|
template<typename A>
|
|
138
138
|
hll_mode HllSketchImpl<A>::getCurMode() const {
|
|
139
|
-
return
|
|
139
|
+
return mode_;
|
|
140
140
|
}
|
|
141
141
|
|
|
142
142
|
template<typename A>
|
|
143
143
|
bool HllSketchImpl<A>::isStartFullSize() const {
|
|
144
|
-
return
|
|
144
|
+
return startFullSize_;
|
|
145
145
|
}
|
|
146
146
|
|
|
147
147
|
}
|
|
@@ -30,7 +30,7 @@ namespace datasketches {
|
|
|
30
30
|
template<typename A>
|
|
31
31
|
class HllSketchImpl {
|
|
32
32
|
public:
|
|
33
|
-
HllSketchImpl(
|
|
33
|
+
HllSketchImpl(uint8_t lgConfigK, target_hll_type tgtHllType, hll_mode mode, bool startFullSize);
|
|
34
34
|
virtual ~HllSketchImpl();
|
|
35
35
|
|
|
36
36
|
virtual void serialize(std::ostream& os, bool compact) const = 0;
|
|
@@ -42,25 +42,25 @@ class HllSketchImpl {
|
|
|
42
42
|
|
|
43
43
|
virtual std::function<void(HllSketchImpl<A>*)> get_deleter() const = 0;
|
|
44
44
|
|
|
45
|
-
virtual HllSketchImpl* couponUpdate(
|
|
45
|
+
virtual HllSketchImpl* couponUpdate(uint32_t coupon) = 0;
|
|
46
46
|
|
|
47
47
|
hll_mode getCurMode() const;
|
|
48
48
|
|
|
49
49
|
virtual double getEstimate() const = 0;
|
|
50
50
|
virtual double getCompositeEstimate() const = 0;
|
|
51
|
-
virtual double getUpperBound(
|
|
52
|
-
virtual double getLowerBound(
|
|
51
|
+
virtual double getUpperBound(uint8_t numStdDev) const = 0;
|
|
52
|
+
virtual double getLowerBound(uint8_t numStdDev) const = 0;
|
|
53
53
|
|
|
54
|
-
inline
|
|
54
|
+
inline uint8_t getLgConfigK() const;
|
|
55
55
|
|
|
56
|
-
virtual
|
|
56
|
+
virtual uint32_t getMemDataStart() const = 0;
|
|
57
57
|
|
|
58
|
-
virtual
|
|
58
|
+
virtual uint8_t getPreInts() const = 0;
|
|
59
59
|
|
|
60
60
|
target_hll_type getTgtHllType() const;
|
|
61
61
|
|
|
62
|
-
virtual
|
|
63
|
-
virtual
|
|
62
|
+
virtual uint32_t getUpdatableSerializationBytes() const = 0;
|
|
63
|
+
virtual uint32_t getCompactSerializationBytes() const = 0;
|
|
64
64
|
|
|
65
65
|
virtual bool isCompact() const = 0;
|
|
66
66
|
virtual bool isEmpty() const = 0;
|
|
@@ -75,10 +75,10 @@ class HllSketchImpl {
|
|
|
75
75
|
uint8_t makeFlagsByte(bool compact) const;
|
|
76
76
|
uint8_t makeModeByte() const;
|
|
77
77
|
|
|
78
|
-
const
|
|
79
|
-
const target_hll_type
|
|
80
|
-
const hll_mode
|
|
81
|
-
const bool
|
|
78
|
+
const uint8_t lgConfigK_;
|
|
79
|
+
const target_hll_type tgtHllType_;
|
|
80
|
+
const hll_mode mode_;
|
|
81
|
+
const bool startFullSize_;
|
|
82
82
|
};
|
|
83
83
|
|
|
84
84
|
}
|