datasketches 0.2.1 → 0.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +4 -0
- data/lib/datasketches/version.rb +1 -1
- data/vendor/datasketches-cpp/CMakeLists.txt +7 -0
- data/vendor/datasketches-cpp/common/include/MurmurHash3.h +11 -7
- data/vendor/datasketches-cpp/common/include/binomial_bounds.hpp +8 -8
- data/vendor/datasketches-cpp/common/include/bounds_binomial_proportions.hpp +12 -15
- data/vendor/datasketches-cpp/common/include/common_defs.hpp +24 -0
- data/vendor/datasketches-cpp/common/include/conditional_forward.hpp +20 -8
- data/vendor/datasketches-cpp/common/include/count_zeros.hpp +2 -2
- data/vendor/datasketches-cpp/common/include/serde.hpp +7 -7
- data/vendor/datasketches-cpp/cpc/include/cpc_compressor.hpp +19 -19
- data/vendor/datasketches-cpp/cpc/include/cpc_compressor_impl.hpp +91 -89
- data/vendor/datasketches-cpp/cpc/include/cpc_sketch.hpp +14 -1
- data/vendor/datasketches-cpp/cpc/include/cpc_sketch_impl.hpp +121 -87
- data/vendor/datasketches-cpp/cpc/include/cpc_union_impl.hpp +14 -14
- data/vendor/datasketches-cpp/cpc/include/cpc_util.hpp +10 -10
- data/vendor/datasketches-cpp/cpc/include/icon_estimator.hpp +4 -4
- data/vendor/datasketches-cpp/cpc/include/u32_table.hpp +8 -8
- data/vendor/datasketches-cpp/cpc/include/u32_table_impl.hpp +14 -14
- data/vendor/datasketches-cpp/cpc/test/compression_test.cpp +10 -10
- data/vendor/datasketches-cpp/cpc/test/cpc_sketch_test.cpp +25 -0
- data/vendor/datasketches-cpp/cpc/test/cpc_union_test.cpp +1 -1
- data/vendor/datasketches-cpp/fi/include/frequent_items_sketch_impl.hpp +65 -80
- data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map_impl.hpp +10 -10
- data/vendor/datasketches-cpp/fi/test/reverse_purge_hash_map_test.cpp +2 -2
- data/vendor/datasketches-cpp/hll/include/AuxHashMap-internal.hpp +60 -63
- data/vendor/datasketches-cpp/hll/include/AuxHashMap.hpp +19 -19
- data/vendor/datasketches-cpp/hll/include/CompositeInterpolationXTable-internal.hpp +15 -15
- data/vendor/datasketches-cpp/hll/include/CompositeInterpolationXTable.hpp +3 -3
- data/vendor/datasketches-cpp/hll/include/CouponHashSet-internal.hpp +74 -76
- data/vendor/datasketches-cpp/hll/include/CouponHashSet.hpp +6 -6
- data/vendor/datasketches-cpp/hll/include/CouponList-internal.hpp +110 -113
- data/vendor/datasketches-cpp/hll/include/CouponList.hpp +13 -13
- data/vendor/datasketches-cpp/hll/include/CubicInterpolation-internal.hpp +2 -4
- data/vendor/datasketches-cpp/hll/include/HarmonicNumbers-internal.hpp +1 -1
- data/vendor/datasketches-cpp/hll/include/Hll4Array-internal.hpp +80 -76
- data/vendor/datasketches-cpp/hll/include/Hll4Array.hpp +9 -9
- data/vendor/datasketches-cpp/hll/include/Hll6Array-internal.hpp +26 -26
- data/vendor/datasketches-cpp/hll/include/Hll6Array.hpp +6 -6
- data/vendor/datasketches-cpp/hll/include/Hll8Array-internal.hpp +33 -33
- data/vendor/datasketches-cpp/hll/include/Hll8Array.hpp +6 -6
- data/vendor/datasketches-cpp/hll/include/HllArray-internal.hpp +205 -209
- data/vendor/datasketches-cpp/hll/include/HllArray.hpp +36 -36
- data/vendor/datasketches-cpp/hll/include/HllSketch-internal.hpp +28 -28
- data/vendor/datasketches-cpp/hll/include/HllSketchImpl-internal.hpp +22 -22
- data/vendor/datasketches-cpp/hll/include/HllSketchImpl.hpp +13 -13
- data/vendor/datasketches-cpp/hll/include/HllSketchImplFactory.hpp +15 -15
- data/vendor/datasketches-cpp/hll/include/HllUnion-internal.hpp +61 -61
- data/vendor/datasketches-cpp/hll/include/HllUtil.hpp +120 -127
- data/vendor/datasketches-cpp/hll/include/coupon_iterator-internal.hpp +9 -9
- data/vendor/datasketches-cpp/hll/include/coupon_iterator.hpp +5 -5
- data/vendor/datasketches-cpp/hll/include/hll.hpp +21 -21
- data/vendor/datasketches-cpp/hll/test/AuxHashMapTest.cpp +1 -1
- data/vendor/datasketches-cpp/hll/test/CouponHashSetTest.cpp +34 -34
- data/vendor/datasketches-cpp/hll/test/CouponListTest.cpp +25 -25
- data/vendor/datasketches-cpp/hll/test/CrossCountingTest.cpp +2 -2
- data/vendor/datasketches-cpp/hll/test/HllArrayTest.cpp +35 -35
- data/vendor/datasketches-cpp/hll/test/HllSketchTest.cpp +15 -15
- data/vendor/datasketches-cpp/hll/test/HllUnionTest.cpp +10 -14
- data/vendor/datasketches-cpp/hll/test/IsomorphicTest.cpp +3 -3
- data/vendor/datasketches-cpp/hll/test/ToFromByteArrayTest.cpp +4 -4
- data/vendor/datasketches-cpp/kll/include/kll_helper.hpp +5 -4
- data/vendor/datasketches-cpp/kll/include/kll_helper_impl.hpp +6 -6
- data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator.hpp +14 -6
- data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator_impl.hpp +39 -24
- data/vendor/datasketches-cpp/kll/include/kll_sketch.hpp +34 -2
- data/vendor/datasketches-cpp/kll/include/kll_sketch_impl.hpp +72 -62
- data/vendor/datasketches-cpp/kll/include/kolmogorov_smirnov.hpp +67 -0
- data/vendor/datasketches-cpp/kll/include/kolmogorov_smirnov_impl.hpp +78 -0
- data/vendor/datasketches-cpp/kll/test/CMakeLists.txt +1 -0
- data/vendor/datasketches-cpp/kll/test/kll_sketch_test.cpp +68 -45
- data/vendor/datasketches-cpp/kll/test/kolmogorov_smirnov_test.cpp +111 -0
- data/vendor/datasketches-cpp/python/src/hll_wrapper.cpp +4 -4
- data/vendor/datasketches-cpp/python/src/theta_wrapper.cpp +6 -6
- data/vendor/datasketches-cpp/python/src/vo_wrapper.cpp +2 -2
- data/vendor/datasketches-cpp/python/tests/hll_test.py +1 -1
- data/vendor/datasketches-cpp/python/tests/vo_test.py +3 -3
- data/vendor/datasketches-cpp/req/include/req_common.hpp +2 -1
- data/vendor/datasketches-cpp/req/include/req_compactor.hpp +4 -4
- data/vendor/datasketches-cpp/req/include/req_compactor_impl.hpp +26 -39
- data/vendor/datasketches-cpp/req/include/req_sketch.hpp +1 -1
- data/vendor/datasketches-cpp/req/include/req_sketch_impl.hpp +9 -9
- data/vendor/datasketches-cpp/req/test/req_sketch_test.cpp +52 -52
- data/vendor/datasketches-cpp/sampling/include/var_opt_sketch_impl.hpp +47 -56
- data/vendor/datasketches-cpp/sampling/include/var_opt_union_impl.hpp +34 -42
- data/vendor/datasketches-cpp/sampling/test/var_opt_sketch_test.cpp +6 -6
- data/vendor/datasketches-cpp/sampling/test/var_opt_union_test.cpp +13 -13
- data/vendor/datasketches-cpp/setup.py +1 -1
- data/vendor/datasketches-cpp/theta/include/bounds_on_ratios_in_sampled_sets.hpp +1 -1
- data/vendor/datasketches-cpp/theta/include/compact_theta_sketch_parser.hpp +67 -0
- data/vendor/datasketches-cpp/theta/include/compact_theta_sketch_parser_impl.hpp +70 -0
- data/vendor/datasketches-cpp/theta/include/theta_intersection.hpp +9 -4
- data/vendor/datasketches-cpp/theta/include/theta_intersection_impl.hpp +1 -1
- data/vendor/datasketches-cpp/theta/include/theta_jaccard_similarity_base.hpp +18 -14
- data/vendor/datasketches-cpp/theta/include/theta_sketch.hpp +42 -1
- data/vendor/datasketches-cpp/theta/include/theta_sketch_impl.hpp +107 -58
- data/vendor/datasketches-cpp/theta/include/theta_union.hpp +4 -4
- data/vendor/datasketches-cpp/theta/include/theta_union_base_impl.hpp +1 -1
- data/vendor/datasketches-cpp/theta/include/theta_union_impl.hpp +1 -1
- data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base.hpp +2 -0
- data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base_impl.hpp +33 -28
- data/vendor/datasketches-cpp/theta/test/theta_a_not_b_test.cpp +23 -1
- data/vendor/datasketches-cpp/theta/test/theta_intersection_test.cpp +21 -1
- data/vendor/datasketches-cpp/theta/test/theta_jaccard_similarity_test.cpp +58 -2
- data/vendor/datasketches-cpp/theta/test/theta_sketch_test.cpp +37 -1
- data/vendor/datasketches-cpp/theta/test/theta_union_test.cpp +22 -2
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_sketch_impl.hpp +47 -60
- data/vendor/datasketches-cpp/tuple/include/tuple_sketch_impl.hpp +51 -64
- data/vendor/datasketches-cpp/tuple/test/array_of_doubles_sketch_test.cpp +1 -1
- data/vendor/datasketches-cpp/tuple/test/tuple_a_not_b_test.cpp +17 -17
- data/vendor/datasketches-cpp/tuple/test/tuple_intersection_test.cpp +12 -12
- data/vendor/datasketches-cpp/tuple/test/tuple_jaccard_similarity_test.cpp +5 -5
- data/vendor/datasketches-cpp/tuple/test/tuple_sketch_allocation_test.cpp +1 -1
- data/vendor/datasketches-cpp/tuple/test/tuple_sketch_test.cpp +20 -20
- data/vendor/datasketches-cpp/tuple/test/tuple_union_test.cpp +12 -12
- metadata +8 -3
|
@@ -33,7 +33,7 @@ class HllSketchImplFactory;
|
|
|
33
33
|
template<typename A>
|
|
34
34
|
class CouponList : public HllSketchImpl<A> {
|
|
35
35
|
public:
|
|
36
|
-
CouponList(
|
|
36
|
+
CouponList(uint8_t lgConfigK, target_hll_type tgtHllType, hll_mode mode, const A& allocator);
|
|
37
37
|
CouponList(const CouponList& that, target_hll_type tgtHllType);
|
|
38
38
|
|
|
39
39
|
static CouponList* newList(const void* bytes, size_t len, const A& allocator);
|
|
@@ -47,15 +47,15 @@ class CouponList : public HllSketchImpl<A> {
|
|
|
47
47
|
virtual CouponList* copy() const;
|
|
48
48
|
virtual CouponList* copyAs(target_hll_type tgtHllType) const;
|
|
49
49
|
|
|
50
|
-
virtual HllSketchImpl<A>* couponUpdate(
|
|
50
|
+
virtual HllSketchImpl<A>* couponUpdate(uint32_t coupon);
|
|
51
51
|
|
|
52
52
|
virtual double getEstimate() const;
|
|
53
53
|
virtual double getCompositeEstimate() const;
|
|
54
|
-
virtual double getUpperBound(
|
|
55
|
-
virtual double getLowerBound(
|
|
54
|
+
virtual double getUpperBound(uint8_t numStdDev) const;
|
|
55
|
+
virtual double getLowerBound(uint8_t numStdDev) const;
|
|
56
56
|
|
|
57
57
|
virtual bool isEmpty() const;
|
|
58
|
-
virtual
|
|
58
|
+
virtual uint32_t getCouponCount() const;
|
|
59
59
|
|
|
60
60
|
coupon_iterator<A> begin(bool all = false) const;
|
|
61
61
|
coupon_iterator<A> end() const;
|
|
@@ -63,24 +63,24 @@ class CouponList : public HllSketchImpl<A> {
|
|
|
63
63
|
protected:
|
|
64
64
|
using ClAlloc = typename std::allocator_traits<A>::template rebind_alloc<CouponList<A>>;
|
|
65
65
|
|
|
66
|
-
using vector_int = std::vector<
|
|
66
|
+
using vector_int = std::vector<uint32_t, typename std::allocator_traits<A>::template rebind_alloc<uint32_t>>;
|
|
67
67
|
|
|
68
68
|
HllSketchImpl<A>* promoteHeapListToSet(CouponList& list);
|
|
69
69
|
HllSketchImpl<A>* promoteHeapListOrSetToHll(CouponList& src);
|
|
70
70
|
|
|
71
|
-
virtual
|
|
72
|
-
virtual
|
|
73
|
-
virtual
|
|
74
|
-
virtual
|
|
71
|
+
virtual uint32_t getUpdatableSerializationBytes() const;
|
|
72
|
+
virtual uint32_t getCompactSerializationBytes() const;
|
|
73
|
+
virtual uint32_t getMemDataStart() const;
|
|
74
|
+
virtual uint8_t getPreInts() const;
|
|
75
75
|
virtual bool isCompact() const;
|
|
76
76
|
virtual bool isOutOfOrderFlag() const;
|
|
77
77
|
virtual void putOutOfOrderFlag(bool oooFlag);
|
|
78
78
|
|
|
79
79
|
virtual A getAllocator() const;
|
|
80
80
|
|
|
81
|
-
|
|
82
|
-
bool
|
|
83
|
-
vector_int
|
|
81
|
+
uint32_t couponCount_;
|
|
82
|
+
bool oooFlag_;
|
|
83
|
+
vector_int coupons_;
|
|
84
84
|
|
|
85
85
|
friend class HllSketchImplFactory<A>;
|
|
86
86
|
};
|
|
@@ -102,10 +102,8 @@ double CubicInterpolation<A>::usingXAndYTables(const double xArr[], const double
|
|
|
102
102
|
else if (offset == numEntries-2) { // corner case
|
|
103
103
|
return (interpolateUsingXAndYTables<A>(xArr, yArr, (offset-2), x));
|
|
104
104
|
}
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
}
|
|
108
|
-
throw std::logic_error("Exception should be unreachable");
|
|
105
|
+
// main case
|
|
106
|
+
return (interpolateUsingXAndYTables<A>(xArr, yArr, (offset-1), x));
|
|
109
107
|
}
|
|
110
108
|
|
|
111
109
|
// In C: again-two-registers cubic_interpolate_aux L1368
|
|
@@ -68,7 +68,7 @@ double HarmonicNumbers<A>::harmonicNumber(const uint64_t x_i) {
|
|
|
68
68
|
if (x_i < NUM_EXACT_HARMONIC_NUMBERS) {
|
|
69
69
|
return tableOfExactHarmonicNumbers[x_i];
|
|
70
70
|
} else {
|
|
71
|
-
double x = x_i;
|
|
71
|
+
double x = static_cast<double>(x_i);
|
|
72
72
|
double invSq = 1.0 / (x * x);
|
|
73
73
|
double sum = log(x) + EULER_MASCHERONI_CONSTANT + (1.0 / (2.0 * x));
|
|
74
74
|
/* note: the number of terms included from this series expansion is appropriate
|
|
@@ -30,12 +30,12 @@
|
|
|
30
30
|
namespace datasketches {
|
|
31
31
|
|
|
32
32
|
template<typename A>
|
|
33
|
-
Hll4Array<A>::Hll4Array(
|
|
33
|
+
Hll4Array<A>::Hll4Array(uint8_t lgConfigK, bool startFullSize, const A& allocator):
|
|
34
34
|
HllArray<A>(lgConfigK, target_hll_type::HLL_4, startFullSize, allocator),
|
|
35
|
-
|
|
35
|
+
auxHashMap_(nullptr)
|
|
36
36
|
{
|
|
37
|
-
const
|
|
38
|
-
this->
|
|
37
|
+
const uint32_t numBytes = this->hll4ArrBytes(lgConfigK);
|
|
38
|
+
this->hllByteArr_.resize(numBytes, 0);
|
|
39
39
|
}
|
|
40
40
|
|
|
41
41
|
template<typename A>
|
|
@@ -44,18 +44,18 @@ Hll4Array<A>::Hll4Array(const Hll4Array<A>& that) :
|
|
|
44
44
|
{
|
|
45
45
|
// can determine hllByteArr size in parent class, no need to allocate here
|
|
46
46
|
// but parent class doesn't handle the auxHashMap
|
|
47
|
-
if (that.
|
|
48
|
-
|
|
47
|
+
if (that.auxHashMap_ != nullptr) {
|
|
48
|
+
auxHashMap_ = that.auxHashMap_->copy();
|
|
49
49
|
} else {
|
|
50
|
-
|
|
50
|
+
auxHashMap_ = nullptr;
|
|
51
51
|
}
|
|
52
52
|
}
|
|
53
53
|
|
|
54
54
|
template<typename A>
|
|
55
55
|
Hll4Array<A>::~Hll4Array() {
|
|
56
56
|
// hllByteArr deleted in parent
|
|
57
|
-
if (
|
|
58
|
-
AuxHashMap<A>::make_deleter()(
|
|
57
|
+
if (auxHashMap_ != nullptr) {
|
|
58
|
+
AuxHashMap<A>::make_deleter()(auxHashMap_);
|
|
59
59
|
}
|
|
60
60
|
}
|
|
61
61
|
|
|
@@ -78,91 +78,91 @@ Hll4Array<A>* Hll4Array<A>::copy() const {
|
|
|
78
78
|
}
|
|
79
79
|
|
|
80
80
|
template<typename A>
|
|
81
|
-
|
|
81
|
+
uint32_t Hll4Array<A>::getUpdatableSerializationBytes() const {
|
|
82
82
|
AuxHashMap<A>* auxHashMap = getAuxHashMap();
|
|
83
|
-
|
|
83
|
+
uint32_t auxBytes;
|
|
84
84
|
if (auxHashMap == nullptr) {
|
|
85
|
-
auxBytes = 4 <<
|
|
85
|
+
auxBytes = 4 << hll_constants::LG_AUX_ARR_INTS[this->lgConfigK_];
|
|
86
86
|
} else {
|
|
87
87
|
auxBytes = 4 << auxHashMap->getLgAuxArrInts();
|
|
88
88
|
}
|
|
89
|
-
return
|
|
89
|
+
return hll_constants::HLL_BYTE_ARR_START + getHllByteArrBytes() + auxBytes;
|
|
90
90
|
}
|
|
91
91
|
|
|
92
92
|
template<typename A>
|
|
93
|
-
|
|
94
|
-
return this->hll4ArrBytes(this->
|
|
93
|
+
uint32_t Hll4Array<A>::getHllByteArrBytes() const {
|
|
94
|
+
return this->hll4ArrBytes(this->lgConfigK_);
|
|
95
95
|
}
|
|
96
96
|
|
|
97
97
|
template<typename A>
|
|
98
98
|
AuxHashMap<A>* Hll4Array<A>::getAuxHashMap() const {
|
|
99
|
-
return
|
|
99
|
+
return auxHashMap_;
|
|
100
100
|
}
|
|
101
101
|
|
|
102
102
|
template<typename A>
|
|
103
103
|
void Hll4Array<A>::putAuxHashMap(AuxHashMap<A>* auxHashMap) {
|
|
104
|
-
this->
|
|
104
|
+
this->auxHashMap_ = auxHashMap;
|
|
105
105
|
}
|
|
106
106
|
|
|
107
107
|
template<typename A>
|
|
108
|
-
uint8_t Hll4Array<A>::getSlot(
|
|
109
|
-
const uint8_t byte = this->
|
|
108
|
+
uint8_t Hll4Array<A>::getSlot(uint32_t slotNo) const {
|
|
109
|
+
const uint8_t byte = this->hllByteArr_[slotNo >> 1];
|
|
110
110
|
if ((slotNo & 1) > 0) { // odd?
|
|
111
111
|
return byte >> 4;
|
|
112
112
|
}
|
|
113
|
-
return byte &
|
|
113
|
+
return byte & hll_constants::loNibbleMask;
|
|
114
114
|
}
|
|
115
115
|
|
|
116
116
|
template<typename A>
|
|
117
117
|
uint8_t Hll4Array<A>::get_value(uint32_t index) const {
|
|
118
118
|
const uint8_t value = getSlot(index);
|
|
119
|
-
if (value !=
|
|
120
|
-
return
|
|
119
|
+
if (value != hll_constants::AUX_TOKEN) return value + this->curMin_;
|
|
120
|
+
return auxHashMap_->mustFindValueFor(index);
|
|
121
121
|
}
|
|
122
122
|
|
|
123
123
|
template<typename A>
|
|
124
|
-
HllSketchImpl<A>* Hll4Array<A>::couponUpdate(
|
|
124
|
+
HllSketchImpl<A>* Hll4Array<A>::couponUpdate(uint32_t coupon) {
|
|
125
125
|
internalCouponUpdate(coupon);
|
|
126
126
|
return this;
|
|
127
127
|
}
|
|
128
128
|
|
|
129
129
|
template<typename A>
|
|
130
|
-
void Hll4Array<A>::internalCouponUpdate(
|
|
131
|
-
const
|
|
132
|
-
if (newValue <= this->
|
|
130
|
+
void Hll4Array<A>::internalCouponUpdate(uint32_t coupon) {
|
|
131
|
+
const uint8_t newValue = HllUtil<A>::getValue(coupon);
|
|
132
|
+
if (newValue <= this->curMin_) {
|
|
133
133
|
return; // quick rejection, but only works for large N
|
|
134
134
|
}
|
|
135
|
-
const
|
|
136
|
-
const
|
|
135
|
+
const uint32_t configKmask = (1 << this->lgConfigK_) - 1;
|
|
136
|
+
const uint32_t slotNo = HllUtil<A>::getLow26(coupon) & configKmask;
|
|
137
137
|
internalHll4Update(slotNo, newValue);
|
|
138
138
|
}
|
|
139
139
|
|
|
140
140
|
template<typename A>
|
|
141
|
-
void Hll4Array<A>::putSlot(
|
|
142
|
-
const
|
|
143
|
-
const uint8_t oldValue = this->
|
|
141
|
+
void Hll4Array<A>::putSlot(uint32_t slotNo, uint8_t newValue) {
|
|
142
|
+
const uint32_t byteno = slotNo >> 1;
|
|
143
|
+
const uint8_t oldValue = this->hllByteArr_[byteno];
|
|
144
144
|
if ((slotNo & 1) == 0) { // set low nibble
|
|
145
|
-
this->
|
|
146
|
-
= ((oldValue &
|
|
145
|
+
this->hllByteArr_[byteno]
|
|
146
|
+
= ((oldValue & hll_constants::hiNibbleMask) | (newValue & hll_constants::loNibbleMask));
|
|
147
147
|
} else { // set high nibble
|
|
148
|
-
this->
|
|
149
|
-
= ((oldValue &
|
|
148
|
+
this->hllByteArr_[byteno]
|
|
149
|
+
= ((oldValue & hll_constants::loNibbleMask) | ((newValue << 4) & hll_constants::hiNibbleMask));
|
|
150
150
|
}
|
|
151
151
|
}
|
|
152
152
|
|
|
153
153
|
//In C: two-registers.c Line 836 in "hhb_abstract_set_slot_if_new_value_bigger" non-sparse
|
|
154
154
|
template<typename A>
|
|
155
|
-
void Hll4Array<A>::internalHll4Update(
|
|
155
|
+
void Hll4Array<A>::internalHll4Update(uint32_t slotNo, uint8_t newVal) {
|
|
156
156
|
|
|
157
|
-
const
|
|
157
|
+
const uint8_t rawStoredOldValue = getSlot(slotNo); // could be a 0
|
|
158
158
|
// this is provably a LB:
|
|
159
|
-
const
|
|
159
|
+
const uint8_t lbOnOldValue = rawStoredOldValue + this->curMin_; // lower bound, could be 0
|
|
160
160
|
|
|
161
161
|
if (newVal > lbOnOldValue) { // 842
|
|
162
162
|
// Note: if an AUX_TOKEN exists, then auxHashMap must already exist
|
|
163
163
|
// 846: rawStoredOldValue == AUX_TOKEN
|
|
164
|
-
const
|
|
165
|
-
? (lbOnOldValue) : (
|
|
164
|
+
const uint8_t actualOldValue = (rawStoredOldValue < hll_constants::AUX_TOKEN)
|
|
165
|
+
? (lbOnOldValue) : (auxHashMap_->mustFindValueFor(slotNo));
|
|
166
166
|
|
|
167
167
|
if (newVal > actualOldValue) { // 848: actualOldValue could still be 0; newValue > 0
|
|
168
168
|
// we know that the array will change, but we haven't actually updated yet
|
|
@@ -170,35 +170,36 @@ void Hll4Array<A>::internalHll4Update(const int slotNo, const int newVal) {
|
|
|
170
170
|
|
|
171
171
|
// newVal >= curMin
|
|
172
172
|
|
|
173
|
-
const
|
|
173
|
+
const uint8_t shiftedNewValue = newVal - this->curMin_; // 874
|
|
174
174
|
// redundant since we know newVal >= curMin,
|
|
175
175
|
// and lgConfigK bounds do not allow overflowing an int
|
|
176
176
|
//assert(shiftedNewValue >= 0);
|
|
177
177
|
|
|
178
|
-
if (rawStoredOldValue ==
|
|
178
|
+
if (rawStoredOldValue == hll_constants::AUX_TOKEN) { // 879
|
|
179
179
|
// Given that we have an AUX_TOKEN, there are 4 cases for how to
|
|
180
180
|
// actually modify the data structure
|
|
181
181
|
|
|
182
|
-
if (shiftedNewValue >=
|
|
182
|
+
if (shiftedNewValue >= hll_constants::AUX_TOKEN) { // case 1: 881
|
|
183
183
|
// the byte array already contains aux token
|
|
184
184
|
// This is the case where old and new values are both exceptions.
|
|
185
185
|
// The 4-bit array already is AUX_TOKEN, only need to update auxHashMap
|
|
186
|
-
|
|
186
|
+
auxHashMap_->mustReplace(slotNo, newVal);
|
|
187
187
|
}
|
|
188
188
|
else { // case 2: 885
|
|
189
189
|
// This is the hypothetical case where the old value is an exception and the new one is not,
|
|
190
190
|
// which is impossible given that curMin has not changed here and newVal > oldValue
|
|
191
191
|
}
|
|
192
192
|
} else { // rawStoredOldValue != AUX_TOKEN
|
|
193
|
-
if (shiftedNewValue >=
|
|
193
|
+
if (shiftedNewValue >= hll_constants::AUX_TOKEN) { // case 3: 892
|
|
194
194
|
// This is the case where the old value is not an exception and the new value is.
|
|
195
195
|
// The AUX_TOKEN must be stored in the 4-bit array and the new value
|
|
196
196
|
// added to the exception table
|
|
197
|
-
putSlot(slotNo,
|
|
198
|
-
if (
|
|
199
|
-
|
|
197
|
+
putSlot(slotNo, hll_constants::AUX_TOKEN);
|
|
198
|
+
if (auxHashMap_ == nullptr) {
|
|
199
|
+
auxHashMap_ = AuxHashMap<A>::newAuxHashMap(hll_constants::LG_AUX_ARR_INTS[this->lgConfigK_],
|
|
200
|
+
this->lgConfigK_, this->getAllocator());
|
|
200
201
|
}
|
|
201
|
-
|
|
202
|
+
auxHashMap_->mustAdd(slotNo, newVal);
|
|
202
203
|
}
|
|
203
204
|
else { // case 4: 897
|
|
204
205
|
// This is the case where neither the old value nor the new value is an exception.
|
|
@@ -208,9 +209,9 @@ void Hll4Array<A>::internalHll4Update(const int slotNo, const int newVal) {
|
|
|
208
209
|
}
|
|
209
210
|
|
|
210
211
|
// we just increased a pair value, so it might be time to change curMin
|
|
211
|
-
if (actualOldValue == this->
|
|
212
|
+
if (actualOldValue == this->curMin_) { // 908
|
|
212
213
|
this->decNumAtCurMin();
|
|
213
|
-
while (this->
|
|
214
|
+
while (this->numAtCurMin_ == 0) {
|
|
214
215
|
shiftToBiggerCurMin(); // increases curMin by 1, builds a new aux table
|
|
215
216
|
// shifts values in 4-bit table and recounts curMin
|
|
216
217
|
}
|
|
@@ -227,29 +228,29 @@ void Hll4Array<A>::internalHll4Update(const int slotNo, const int newVal) {
|
|
|
227
228
|
// In C: again-two-registers.c Lines 710 "hhb_shift_to_bigger_curmin"
|
|
228
229
|
template<typename A>
|
|
229
230
|
void Hll4Array<A>::shiftToBiggerCurMin() {
|
|
230
|
-
const
|
|
231
|
-
const
|
|
232
|
-
const
|
|
231
|
+
const uint8_t newCurMin = this->curMin_ + 1;
|
|
232
|
+
const uint32_t configK = 1 << this->lgConfigK_;
|
|
233
|
+
const uint32_t configKmask = configK - 1;
|
|
233
234
|
|
|
234
|
-
|
|
235
|
-
|
|
235
|
+
uint32_t numAtNewCurMin = 0;
|
|
236
|
+
uint32_t numAuxTokens = 0;
|
|
236
237
|
|
|
237
238
|
// Walk through the slots of 4-bit array decrementing stored values by one unless it
|
|
238
239
|
// equals AUX_TOKEN, where it is left alone but counted to be checked later.
|
|
239
240
|
// If oldStoredValue is 0 it is an error.
|
|
240
241
|
// If the decremented value is 0, we increment numAtNewCurMin.
|
|
241
242
|
// Because getNibble is masked to 4 bits oldStoredValue can never be > 15 or negative
|
|
242
|
-
for (
|
|
243
|
-
|
|
243
|
+
for (uint32_t i = 0; i < configK; i++) { //724
|
|
244
|
+
uint8_t oldStoredValue = getSlot(i);
|
|
244
245
|
if (oldStoredValue == 0) {
|
|
245
246
|
throw std::runtime_error("Array slots cannot be 0 at this point.");
|
|
246
247
|
}
|
|
247
|
-
if (oldStoredValue <
|
|
248
|
+
if (oldStoredValue < hll_constants::AUX_TOKEN) {
|
|
248
249
|
putSlot(i, --oldStoredValue);
|
|
249
250
|
if (oldStoredValue == 0) { numAtNewCurMin++; }
|
|
250
251
|
} else { //oldStoredValue == AUX_TOKEN
|
|
251
252
|
numAuxTokens++;
|
|
252
|
-
if (
|
|
253
|
+
if (auxHashMap_ == nullptr) {
|
|
253
254
|
throw std::logic_error("auxHashMap cannot be null at this point");
|
|
254
255
|
}
|
|
255
256
|
}
|
|
@@ -258,12 +259,12 @@ void Hll4Array<A>::shiftToBiggerCurMin() {
|
|
|
258
259
|
// If old AuxHashMap exists, walk through it updating some slots and build a new AuxHashMap
|
|
259
260
|
// if needed.
|
|
260
261
|
AuxHashMap<A>* newAuxMap = nullptr;
|
|
261
|
-
if (
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
262
|
+
if (auxHashMap_ != nullptr) {
|
|
263
|
+
uint32_t slotNum;
|
|
264
|
+
uint8_t oldActualVal;
|
|
265
|
+
uint8_t newShiftedVal;
|
|
265
266
|
|
|
266
|
-
for (auto coupon: *
|
|
267
|
+
for (const auto coupon: *auxHashMap_) {
|
|
267
268
|
slotNum = HllUtil<A>::getLow26(coupon) & configKmask;
|
|
268
269
|
oldActualVal = HllUtil<A>::getValue(coupon);
|
|
269
270
|
newShiftedVal = oldActualVal - newCurMin;
|
|
@@ -271,11 +272,11 @@ void Hll4Array<A>::shiftToBiggerCurMin() {
|
|
|
271
272
|
throw std::logic_error("oldActualVal < newCurMin when incrementing curMin");
|
|
272
273
|
}
|
|
273
274
|
|
|
274
|
-
if (getSlot(slotNum) !=
|
|
275
|
+
if (getSlot(slotNum) != hll_constants::AUX_TOKEN) {
|
|
275
276
|
throw std::logic_error("getSlot(slotNum) != AUX_TOKEN for item in auxiliary hash map");
|
|
276
277
|
}
|
|
277
278
|
// Array slot != AUX_TOKEN at getSlot(slotNum);
|
|
278
|
-
if (newShiftedVal <
|
|
279
|
+
if (newShiftedVal < hll_constants::AUX_TOKEN) { // 756
|
|
279
280
|
if (newShiftedVal != 14) {
|
|
280
281
|
throw std::logic_error("newShiftedVal != 14 for item in old auxHashMap despite curMin increment");
|
|
281
282
|
}
|
|
@@ -286,7 +287,8 @@ void Hll4Array<A>::shiftToBiggerCurMin() {
|
|
|
286
287
|
} else { //newShiftedVal >= AUX_TOKEN
|
|
287
288
|
// the former exception remains an exception, so must be added to the newAuxMap
|
|
288
289
|
if (newAuxMap == nullptr) {
|
|
289
|
-
newAuxMap = AuxHashMap<A>::newAuxHashMap(
|
|
290
|
+
newAuxMap = AuxHashMap<A>::newAuxHashMap(hll_constants::LG_AUX_ARR_INTS[this->lgConfigK_],
|
|
291
|
+
this->lgConfigK_, this->getAllocator());
|
|
290
292
|
}
|
|
291
293
|
newAuxMap->mustAdd(slotNum, oldActualVal);
|
|
292
294
|
}
|
|
@@ -305,28 +307,30 @@ void Hll4Array<A>::shiftToBiggerCurMin() {
|
|
|
305
307
|
}
|
|
306
308
|
}
|
|
307
309
|
|
|
308
|
-
if (
|
|
309
|
-
AuxHashMap<A>::make_deleter()(
|
|
310
|
+
if (auxHashMap_ != nullptr) {
|
|
311
|
+
AuxHashMap<A>::make_deleter()(auxHashMap_);
|
|
310
312
|
}
|
|
311
|
-
|
|
313
|
+
auxHashMap_ = newAuxMap;
|
|
312
314
|
|
|
313
|
-
this->
|
|
314
|
-
this->
|
|
315
|
+
this->curMin_ = newCurMin;
|
|
316
|
+
this->numAtCurMin_ = numAtNewCurMin;
|
|
315
317
|
}
|
|
316
318
|
|
|
317
319
|
template<typename A>
|
|
318
320
|
typename HllArray<A>::const_iterator Hll4Array<A>::begin(bool all) const {
|
|
319
|
-
return typename HllArray<A>::const_iterator(this->
|
|
321
|
+
return typename HllArray<A>::const_iterator(this->hllByteArr_.data(), 1 << this->lgConfigK_, 0, this->tgtHllType_,
|
|
322
|
+
auxHashMap_, this->curMin_, all);
|
|
320
323
|
}
|
|
321
324
|
|
|
322
325
|
template<typename A>
|
|
323
326
|
typename HllArray<A>::const_iterator Hll4Array<A>::end() const {
|
|
324
|
-
return typename HllArray<A>::const_iterator(this->
|
|
327
|
+
return typename HllArray<A>::const_iterator(this->hllByteArr_.data(), 1 << this->lgConfigK_, 1 << this->lgConfigK_,
|
|
328
|
+
this->tgtHllType_, auxHashMap_, this->curMin_, false);
|
|
325
329
|
}
|
|
326
330
|
|
|
327
331
|
template<typename A>
|
|
328
332
|
void Hll4Array<A>::mergeHll(const HllArray<A>& src) {
|
|
329
|
-
for (auto coupon: src) {
|
|
333
|
+
for (const auto coupon: src) {
|
|
330
334
|
internalCouponUpdate(coupon);
|
|
331
335
|
}
|
|
332
336
|
}
|
|
@@ -31,7 +31,7 @@ class Hll4Iterator;
|
|
|
31
31
|
template<typename A>
|
|
32
32
|
class Hll4Array final : public HllArray<A> {
|
|
33
33
|
public:
|
|
34
|
-
explicit Hll4Array(
|
|
34
|
+
explicit Hll4Array(uint8_t lgConfigK, bool startFullSize, const A& allocator);
|
|
35
35
|
explicit Hll4Array(const Hll4Array<A>& that);
|
|
36
36
|
|
|
37
37
|
virtual ~Hll4Array();
|
|
@@ -39,14 +39,14 @@ class Hll4Array final : public HllArray<A> {
|
|
|
39
39
|
|
|
40
40
|
virtual Hll4Array* copy() const;
|
|
41
41
|
|
|
42
|
-
inline uint8_t getSlot(
|
|
43
|
-
inline void putSlot(
|
|
42
|
+
inline uint8_t getSlot(uint32_t slotNo) const;
|
|
43
|
+
inline void putSlot(uint32_t slotNo, uint8_t value);
|
|
44
44
|
inline uint8_t get_value(uint32_t index) const;
|
|
45
45
|
|
|
46
|
-
virtual
|
|
47
|
-
virtual
|
|
46
|
+
virtual uint32_t getUpdatableSerializationBytes() const;
|
|
47
|
+
virtual uint32_t getHllByteArrBytes() const;
|
|
48
48
|
|
|
49
|
-
virtual HllSketchImpl<A>* couponUpdate(
|
|
49
|
+
virtual HllSketchImpl<A>* couponUpdate(uint32_t coupon) final;
|
|
50
50
|
void mergeHll(const HllArray<A>& src);
|
|
51
51
|
|
|
52
52
|
virtual AuxHashMap<A>* getAuxHashMap() const;
|
|
@@ -57,11 +57,11 @@ class Hll4Array final : public HllArray<A> {
|
|
|
57
57
|
virtual typename HllArray<A>::const_iterator end() const;
|
|
58
58
|
|
|
59
59
|
private:
|
|
60
|
-
void internalCouponUpdate(
|
|
61
|
-
void internalHll4Update(
|
|
60
|
+
void internalCouponUpdate(uint32_t coupon);
|
|
61
|
+
void internalHll4Update(uint32_t slotNo, uint8_t newVal);
|
|
62
62
|
void shiftToBiggerCurMin();
|
|
63
63
|
|
|
64
|
-
AuxHashMap<A>*
|
|
64
|
+
AuxHashMap<A>* auxHashMap_;
|
|
65
65
|
};
|
|
66
66
|
|
|
67
67
|
}
|