datasketches 0.2.1 → 0.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +4 -0
- data/lib/datasketches/version.rb +1 -1
- data/vendor/datasketches-cpp/CMakeLists.txt +7 -0
- data/vendor/datasketches-cpp/common/include/MurmurHash3.h +11 -7
- data/vendor/datasketches-cpp/common/include/binomial_bounds.hpp +8 -8
- data/vendor/datasketches-cpp/common/include/bounds_binomial_proportions.hpp +12 -15
- data/vendor/datasketches-cpp/common/include/common_defs.hpp +24 -0
- data/vendor/datasketches-cpp/common/include/conditional_forward.hpp +20 -8
- data/vendor/datasketches-cpp/common/include/count_zeros.hpp +2 -2
- data/vendor/datasketches-cpp/common/include/serde.hpp +7 -7
- data/vendor/datasketches-cpp/cpc/include/cpc_compressor.hpp +19 -19
- data/vendor/datasketches-cpp/cpc/include/cpc_compressor_impl.hpp +91 -89
- data/vendor/datasketches-cpp/cpc/include/cpc_sketch.hpp +14 -1
- data/vendor/datasketches-cpp/cpc/include/cpc_sketch_impl.hpp +121 -87
- data/vendor/datasketches-cpp/cpc/include/cpc_union_impl.hpp +14 -14
- data/vendor/datasketches-cpp/cpc/include/cpc_util.hpp +10 -10
- data/vendor/datasketches-cpp/cpc/include/icon_estimator.hpp +4 -4
- data/vendor/datasketches-cpp/cpc/include/u32_table.hpp +8 -8
- data/vendor/datasketches-cpp/cpc/include/u32_table_impl.hpp +14 -14
- data/vendor/datasketches-cpp/cpc/test/compression_test.cpp +10 -10
- data/vendor/datasketches-cpp/cpc/test/cpc_sketch_test.cpp +25 -0
- data/vendor/datasketches-cpp/cpc/test/cpc_union_test.cpp +1 -1
- data/vendor/datasketches-cpp/fi/include/frequent_items_sketch_impl.hpp +65 -80
- data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map_impl.hpp +10 -10
- data/vendor/datasketches-cpp/fi/test/reverse_purge_hash_map_test.cpp +2 -2
- data/vendor/datasketches-cpp/hll/include/AuxHashMap-internal.hpp +60 -63
- data/vendor/datasketches-cpp/hll/include/AuxHashMap.hpp +19 -19
- data/vendor/datasketches-cpp/hll/include/CompositeInterpolationXTable-internal.hpp +15 -15
- data/vendor/datasketches-cpp/hll/include/CompositeInterpolationXTable.hpp +3 -3
- data/vendor/datasketches-cpp/hll/include/CouponHashSet-internal.hpp +74 -76
- data/vendor/datasketches-cpp/hll/include/CouponHashSet.hpp +6 -6
- data/vendor/datasketches-cpp/hll/include/CouponList-internal.hpp +110 -113
- data/vendor/datasketches-cpp/hll/include/CouponList.hpp +13 -13
- data/vendor/datasketches-cpp/hll/include/CubicInterpolation-internal.hpp +2 -4
- data/vendor/datasketches-cpp/hll/include/HarmonicNumbers-internal.hpp +1 -1
- data/vendor/datasketches-cpp/hll/include/Hll4Array-internal.hpp +80 -76
- data/vendor/datasketches-cpp/hll/include/Hll4Array.hpp +9 -9
- data/vendor/datasketches-cpp/hll/include/Hll6Array-internal.hpp +26 -26
- data/vendor/datasketches-cpp/hll/include/Hll6Array.hpp +6 -6
- data/vendor/datasketches-cpp/hll/include/Hll8Array-internal.hpp +33 -33
- data/vendor/datasketches-cpp/hll/include/Hll8Array.hpp +6 -6
- data/vendor/datasketches-cpp/hll/include/HllArray-internal.hpp +205 -209
- data/vendor/datasketches-cpp/hll/include/HllArray.hpp +36 -36
- data/vendor/datasketches-cpp/hll/include/HllSketch-internal.hpp +28 -28
- data/vendor/datasketches-cpp/hll/include/HllSketchImpl-internal.hpp +22 -22
- data/vendor/datasketches-cpp/hll/include/HllSketchImpl.hpp +13 -13
- data/vendor/datasketches-cpp/hll/include/HllSketchImplFactory.hpp +15 -15
- data/vendor/datasketches-cpp/hll/include/HllUnion-internal.hpp +61 -61
- data/vendor/datasketches-cpp/hll/include/HllUtil.hpp +120 -127
- data/vendor/datasketches-cpp/hll/include/coupon_iterator-internal.hpp +9 -9
- data/vendor/datasketches-cpp/hll/include/coupon_iterator.hpp +5 -5
- data/vendor/datasketches-cpp/hll/include/hll.hpp +21 -21
- data/vendor/datasketches-cpp/hll/test/AuxHashMapTest.cpp +1 -1
- data/vendor/datasketches-cpp/hll/test/CouponHashSetTest.cpp +34 -34
- data/vendor/datasketches-cpp/hll/test/CouponListTest.cpp +25 -25
- data/vendor/datasketches-cpp/hll/test/CrossCountingTest.cpp +2 -2
- data/vendor/datasketches-cpp/hll/test/HllArrayTest.cpp +35 -35
- data/vendor/datasketches-cpp/hll/test/HllSketchTest.cpp +15 -15
- data/vendor/datasketches-cpp/hll/test/HllUnionTest.cpp +10 -14
- data/vendor/datasketches-cpp/hll/test/IsomorphicTest.cpp +3 -3
- data/vendor/datasketches-cpp/hll/test/ToFromByteArrayTest.cpp +4 -4
- data/vendor/datasketches-cpp/kll/include/kll_helper.hpp +5 -4
- data/vendor/datasketches-cpp/kll/include/kll_helper_impl.hpp +6 -6
- data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator.hpp +14 -6
- data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator_impl.hpp +39 -24
- data/vendor/datasketches-cpp/kll/include/kll_sketch.hpp +34 -2
- data/vendor/datasketches-cpp/kll/include/kll_sketch_impl.hpp +72 -62
- data/vendor/datasketches-cpp/kll/include/kolmogorov_smirnov.hpp +67 -0
- data/vendor/datasketches-cpp/kll/include/kolmogorov_smirnov_impl.hpp +78 -0
- data/vendor/datasketches-cpp/kll/test/CMakeLists.txt +1 -0
- data/vendor/datasketches-cpp/kll/test/kll_sketch_test.cpp +68 -45
- data/vendor/datasketches-cpp/kll/test/kolmogorov_smirnov_test.cpp +111 -0
- data/vendor/datasketches-cpp/python/src/hll_wrapper.cpp +4 -4
- data/vendor/datasketches-cpp/python/src/theta_wrapper.cpp +6 -6
- data/vendor/datasketches-cpp/python/src/vo_wrapper.cpp +2 -2
- data/vendor/datasketches-cpp/python/tests/hll_test.py +1 -1
- data/vendor/datasketches-cpp/python/tests/vo_test.py +3 -3
- data/vendor/datasketches-cpp/req/include/req_common.hpp +2 -1
- data/vendor/datasketches-cpp/req/include/req_compactor.hpp +4 -4
- data/vendor/datasketches-cpp/req/include/req_compactor_impl.hpp +26 -39
- data/vendor/datasketches-cpp/req/include/req_sketch.hpp +1 -1
- data/vendor/datasketches-cpp/req/include/req_sketch_impl.hpp +9 -9
- data/vendor/datasketches-cpp/req/test/req_sketch_test.cpp +52 -52
- data/vendor/datasketches-cpp/sampling/include/var_opt_sketch_impl.hpp +47 -56
- data/vendor/datasketches-cpp/sampling/include/var_opt_union_impl.hpp +34 -42
- data/vendor/datasketches-cpp/sampling/test/var_opt_sketch_test.cpp +6 -6
- data/vendor/datasketches-cpp/sampling/test/var_opt_union_test.cpp +13 -13
- data/vendor/datasketches-cpp/setup.py +1 -1
- data/vendor/datasketches-cpp/theta/include/bounds_on_ratios_in_sampled_sets.hpp +1 -1
- data/vendor/datasketches-cpp/theta/include/compact_theta_sketch_parser.hpp +67 -0
- data/vendor/datasketches-cpp/theta/include/compact_theta_sketch_parser_impl.hpp +70 -0
- data/vendor/datasketches-cpp/theta/include/theta_intersection.hpp +9 -4
- data/vendor/datasketches-cpp/theta/include/theta_intersection_impl.hpp +1 -1
- data/vendor/datasketches-cpp/theta/include/theta_jaccard_similarity_base.hpp +18 -14
- data/vendor/datasketches-cpp/theta/include/theta_sketch.hpp +42 -1
- data/vendor/datasketches-cpp/theta/include/theta_sketch_impl.hpp +107 -58
- data/vendor/datasketches-cpp/theta/include/theta_union.hpp +4 -4
- data/vendor/datasketches-cpp/theta/include/theta_union_base_impl.hpp +1 -1
- data/vendor/datasketches-cpp/theta/include/theta_union_impl.hpp +1 -1
- data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base.hpp +2 -0
- data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base_impl.hpp +33 -28
- data/vendor/datasketches-cpp/theta/test/theta_a_not_b_test.cpp +23 -1
- data/vendor/datasketches-cpp/theta/test/theta_intersection_test.cpp +21 -1
- data/vendor/datasketches-cpp/theta/test/theta_jaccard_similarity_test.cpp +58 -2
- data/vendor/datasketches-cpp/theta/test/theta_sketch_test.cpp +37 -1
- data/vendor/datasketches-cpp/theta/test/theta_union_test.cpp +22 -2
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_sketch_impl.hpp +47 -60
- data/vendor/datasketches-cpp/tuple/include/tuple_sketch_impl.hpp +51 -64
- data/vendor/datasketches-cpp/tuple/test/array_of_doubles_sketch_test.cpp +1 -1
- data/vendor/datasketches-cpp/tuple/test/tuple_a_not_b_test.cpp +17 -17
- data/vendor/datasketches-cpp/tuple/test/tuple_intersection_test.cpp +12 -12
- data/vendor/datasketches-cpp/tuple/test/tuple_jaccard_similarity_test.cpp +5 -5
- data/vendor/datasketches-cpp/tuple/test/tuple_sketch_allocation_test.cpp +1 -1
- data/vendor/datasketches-cpp/tuple/test/tuple_sketch_test.cpp +20 -20
- data/vendor/datasketches-cpp/tuple/test/tuple_union_test.cpp +12 -12
- metadata +8 -3
|
@@ -39,7 +39,7 @@ public:
|
|
|
39
39
|
|
|
40
40
|
static CouponHashSet<A>* promoteListToSet(const CouponList<A>& list);
|
|
41
41
|
static HllArray<A>* promoteListOrSetToHll(const CouponList<A>& list);
|
|
42
|
-
static HllArray<A>* newHll(
|
|
42
|
+
static HllArray<A>* newHll(uint8_t lgConfigK, target_hll_type tgtHllType, bool startFullSize, const A& allocator);
|
|
43
43
|
|
|
44
44
|
// resets the input impl, deleting the input pointer and returning a new pointer
|
|
45
45
|
static HllSketchImpl<A>* reset(HllSketchImpl<A>* impl, bool startFullSize);
|
|
@@ -53,7 +53,7 @@ template<typename A>
|
|
|
53
53
|
CouponHashSet<A>* HllSketchImplFactory<A>::promoteListToSet(const CouponList<A>& list) {
|
|
54
54
|
using ChsAlloc = typename std::allocator_traits<A>::template rebind_alloc<CouponHashSet<A>>;
|
|
55
55
|
CouponHashSet<A>* chSet = new (ChsAlloc(list.getAllocator()).allocate(1)) CouponHashSet<A>(list.getLgConfigK(), list.getTgtHllType(), list.getAllocator());
|
|
56
|
-
for (auto coupon: list) {
|
|
56
|
+
for (const auto coupon: list) {
|
|
57
57
|
chSet->couponUpdate(coupon);
|
|
58
58
|
}
|
|
59
59
|
return chSet;
|
|
@@ -63,7 +63,7 @@ template<typename A>
|
|
|
63
63
|
HllArray<A>* HllSketchImplFactory<A>::promoteListOrSetToHll(const CouponList<A>& src) {
|
|
64
64
|
HllArray<A>* tgtHllArr = HllSketchImplFactory<A>::newHll(src.getLgConfigK(), src.getTgtHllType(), false, src.getAllocator());
|
|
65
65
|
tgtHllArr->putKxQ0(1 << src.getLgConfigK());
|
|
66
|
-
for (auto coupon: src) {
|
|
66
|
+
for (const auto coupon: src) {
|
|
67
67
|
tgtHllArr->couponUpdate(coupon);
|
|
68
68
|
}
|
|
69
69
|
tgtHllArr->putHipAccum(src.getEstimate());
|
|
@@ -75,12 +75,12 @@ template<typename A>
|
|
|
75
75
|
HllSketchImpl<A>* HllSketchImplFactory<A>::deserialize(std::istream& is, const A& allocator) {
|
|
76
76
|
// we'll hand off the sketch based on PreInts so we don't need
|
|
77
77
|
// to move the stream pointer back and forth -- perhaps somewhat fragile?
|
|
78
|
-
const
|
|
79
|
-
if (preInts ==
|
|
78
|
+
const uint8_t preInts = static_cast<uint8_t>(is.peek());
|
|
79
|
+
if (preInts == hll_constants::HLL_PREINTS) {
|
|
80
80
|
return HllArray<A>::newHll(is, allocator);
|
|
81
|
-
} else if (preInts ==
|
|
81
|
+
} else if (preInts == hll_constants::HASH_SET_PREINTS) {
|
|
82
82
|
return CouponHashSet<A>::newSet(is, allocator);
|
|
83
|
-
} else if (preInts ==
|
|
83
|
+
} else if (preInts == hll_constants::LIST_PREINTS) {
|
|
84
84
|
return CouponList<A>::newList(is, allocator);
|
|
85
85
|
} else {
|
|
86
86
|
throw std::invalid_argument("Attempt to deserialize unknown object type");
|
|
@@ -90,12 +90,12 @@ HllSketchImpl<A>* HllSketchImplFactory<A>::deserialize(std::istream& is, const A
|
|
|
90
90
|
template<typename A>
|
|
91
91
|
HllSketchImpl<A>* HllSketchImplFactory<A>::deserialize(const void* bytes, size_t len, const A& allocator) {
|
|
92
92
|
// read current mode directly
|
|
93
|
-
const
|
|
94
|
-
if (preInts ==
|
|
93
|
+
const uint8_t preInts = static_cast<const uint8_t*>(bytes)[0];
|
|
94
|
+
if (preInts == hll_constants::HLL_PREINTS) {
|
|
95
95
|
return HllArray<A>::newHll(bytes, len, allocator);
|
|
96
|
-
} else if (preInts ==
|
|
96
|
+
} else if (preInts == hll_constants::HASH_SET_PREINTS) {
|
|
97
97
|
return CouponHashSet<A>::newSet(bytes, len, allocator);
|
|
98
|
-
} else if (preInts ==
|
|
98
|
+
} else if (preInts == hll_constants::LIST_PREINTS) {
|
|
99
99
|
return CouponList<A>::newList(bytes, len, allocator);
|
|
100
100
|
} else {
|
|
101
101
|
throw std::invalid_argument("Attempt to deserialize unknown object type");
|
|
@@ -103,7 +103,7 @@ HllSketchImpl<A>* HllSketchImplFactory<A>::deserialize(const void* bytes, size_t
|
|
|
103
103
|
}
|
|
104
104
|
|
|
105
105
|
template<typename A>
|
|
106
|
-
HllArray<A>* HllSketchImplFactory<A>::newHll(
|
|
106
|
+
HllArray<A>* HllSketchImplFactory<A>::newHll(uint8_t lgConfigK, target_hll_type tgtHllType, bool startFullSize, const A& allocator) {
|
|
107
107
|
switch (tgtHllType) {
|
|
108
108
|
case HLL_8:
|
|
109
109
|
using Hll8Alloc = typename std::allocator_traits<A>::template rebind_alloc<Hll8Array<A>>;
|
|
@@ -134,7 +134,7 @@ HllSketchImpl<A>* HllSketchImplFactory<A>::reset(HllSketchImpl<A>* impl, bool st
|
|
|
134
134
|
|
|
135
135
|
template<typename A>
|
|
136
136
|
Hll4Array<A>* HllSketchImplFactory<A>::convertToHll4(const HllArray<A>& srcHllArr) {
|
|
137
|
-
const
|
|
137
|
+
const uint8_t lgConfigK = srcHllArr.getLgConfigK();
|
|
138
138
|
using Hll4Alloc = typename std::allocator_traits<A>::template rebind_alloc<Hll4Array<A>>;
|
|
139
139
|
Hll4Array<A>* hll4Array = new (Hll4Alloc(srcHllArr.getAllocator()).allocate(1))
|
|
140
140
|
Hll4Array<A>(lgConfigK, srcHllArr.isStartFullSize(), srcHllArr.getAllocator());
|
|
@@ -146,7 +146,7 @@ Hll4Array<A>* HllSketchImplFactory<A>::convertToHll4(const HllArray<A>& srcHllAr
|
|
|
146
146
|
|
|
147
147
|
template<typename A>
|
|
148
148
|
Hll6Array<A>* HllSketchImplFactory<A>::convertToHll6(const HllArray<A>& srcHllArr) {
|
|
149
|
-
const
|
|
149
|
+
const uint8_t lgConfigK = srcHllArr.getLgConfigK();
|
|
150
150
|
using Hll6Alloc = typename std::allocator_traits<A>::template rebind_alloc<Hll6Array<A>>;
|
|
151
151
|
Hll6Array<A>* hll6Array = new (Hll6Alloc(srcHllArr.getAllocator()).allocate(1))
|
|
152
152
|
Hll6Array<A>(lgConfigK, srcHllArr.isStartFullSize(), srcHllArr.getAllocator());
|
|
@@ -158,7 +158,7 @@ Hll6Array<A>* HllSketchImplFactory<A>::convertToHll6(const HllArray<A>& srcHllAr
|
|
|
158
158
|
|
|
159
159
|
template<typename A>
|
|
160
160
|
Hll8Array<A>* HllSketchImplFactory<A>::convertToHll8(const HllArray<A>& srcHllArr) {
|
|
161
|
-
const
|
|
161
|
+
const uint8_t lgConfigK = srcHllArr.getLgConfigK();
|
|
162
162
|
using Hll8Alloc = typename std::allocator_traits<A>::template rebind_alloc<Hll8Array<A>>;
|
|
163
163
|
Hll8Array<A>* hll8Array = new (Hll8Alloc(srcHllArr.getAllocator()).allocate(1))
|
|
164
164
|
Hll8Array<A>(lgConfigK, srcHllArr.isStartFullSize(), srcHllArr.getAllocator());
|
|
@@ -32,151 +32,151 @@
|
|
|
32
32
|
namespace datasketches {
|
|
33
33
|
|
|
34
34
|
template<typename A>
|
|
35
|
-
hll_union_alloc<A>::hll_union_alloc(
|
|
36
|
-
|
|
37
|
-
|
|
35
|
+
hll_union_alloc<A>::hll_union_alloc(uint8_t lg_max_k, const A& allocator):
|
|
36
|
+
lg_max_k_(HllUtil<A>::checkLgK(lg_max_k)),
|
|
37
|
+
gadget_(lg_max_k, target_hll_type::HLL_8, false, allocator)
|
|
38
38
|
{}
|
|
39
39
|
|
|
40
40
|
template<typename A>
|
|
41
41
|
hll_sketch_alloc<A> hll_union_alloc<A>::get_result(target_hll_type target_type) const {
|
|
42
|
-
return hll_sketch_alloc<A>(
|
|
42
|
+
return hll_sketch_alloc<A>(gadget_, target_type);
|
|
43
43
|
}
|
|
44
44
|
|
|
45
45
|
template<typename A>
|
|
46
46
|
void hll_union_alloc<A>::update(const hll_sketch_alloc<A>& sketch) {
|
|
47
47
|
if (sketch.is_empty()) return;
|
|
48
|
-
union_impl(sketch,
|
|
48
|
+
union_impl(sketch, lg_max_k_);
|
|
49
49
|
}
|
|
50
50
|
|
|
51
51
|
template<typename A>
|
|
52
52
|
void hll_union_alloc<A>::update(hll_sketch_alloc<A>&& sketch) {
|
|
53
53
|
if (sketch.is_empty()) return;
|
|
54
|
-
if (
|
|
55
|
-
if (sketch.get_current_mode() == HLL || sketch.get_lg_config_k() ==
|
|
56
|
-
|
|
54
|
+
if (gadget_.is_empty() && sketch.get_target_type() == HLL_8 && sketch.get_lg_config_k() <= lg_max_k_) {
|
|
55
|
+
if (sketch.get_current_mode() == HLL || sketch.get_lg_config_k() == lg_max_k_) {
|
|
56
|
+
gadget_ = std::move(sketch);
|
|
57
57
|
}
|
|
58
58
|
}
|
|
59
|
-
union_impl(sketch,
|
|
59
|
+
union_impl(sketch, lg_max_k_);
|
|
60
60
|
}
|
|
61
61
|
|
|
62
62
|
template<typename A>
|
|
63
63
|
void hll_union_alloc<A>::update(const std::string& datum) {
|
|
64
|
-
|
|
64
|
+
gadget_.update(datum);
|
|
65
65
|
}
|
|
66
66
|
|
|
67
67
|
template<typename A>
|
|
68
|
-
void hll_union_alloc<A>::update(
|
|
69
|
-
|
|
68
|
+
void hll_union_alloc<A>::update(uint64_t datum) {
|
|
69
|
+
gadget_.update(datum);
|
|
70
70
|
}
|
|
71
71
|
|
|
72
72
|
template<typename A>
|
|
73
|
-
void hll_union_alloc<A>::update(
|
|
74
|
-
|
|
73
|
+
void hll_union_alloc<A>::update(uint32_t datum) {
|
|
74
|
+
gadget_.update(datum);
|
|
75
75
|
}
|
|
76
76
|
|
|
77
77
|
template<typename A>
|
|
78
|
-
void hll_union_alloc<A>::update(
|
|
79
|
-
|
|
78
|
+
void hll_union_alloc<A>::update(uint16_t datum) {
|
|
79
|
+
gadget_.update(datum);
|
|
80
80
|
}
|
|
81
81
|
|
|
82
82
|
template<typename A>
|
|
83
|
-
void hll_union_alloc<A>::update(
|
|
84
|
-
|
|
83
|
+
void hll_union_alloc<A>::update(uint8_t datum) {
|
|
84
|
+
gadget_.update(datum);
|
|
85
85
|
}
|
|
86
86
|
|
|
87
87
|
template<typename A>
|
|
88
|
-
void hll_union_alloc<A>::update(
|
|
89
|
-
|
|
88
|
+
void hll_union_alloc<A>::update(int64_t datum) {
|
|
89
|
+
gadget_.update(datum);
|
|
90
90
|
}
|
|
91
91
|
|
|
92
92
|
template<typename A>
|
|
93
|
-
void hll_union_alloc<A>::update(
|
|
94
|
-
|
|
93
|
+
void hll_union_alloc<A>::update(int32_t datum) {
|
|
94
|
+
gadget_.update(datum);
|
|
95
95
|
}
|
|
96
96
|
|
|
97
97
|
template<typename A>
|
|
98
|
-
void hll_union_alloc<A>::update(
|
|
99
|
-
|
|
98
|
+
void hll_union_alloc<A>::update(int16_t datum) {
|
|
99
|
+
gadget_.update(datum);
|
|
100
100
|
}
|
|
101
101
|
|
|
102
102
|
template<typename A>
|
|
103
|
-
void hll_union_alloc<A>::update(
|
|
104
|
-
|
|
103
|
+
void hll_union_alloc<A>::update(int8_t datum) {
|
|
104
|
+
gadget_.update(datum);
|
|
105
105
|
}
|
|
106
106
|
|
|
107
107
|
template<typename A>
|
|
108
|
-
void hll_union_alloc<A>::update(
|
|
109
|
-
|
|
108
|
+
void hll_union_alloc<A>::update(double datum) {
|
|
109
|
+
gadget_.update(datum);
|
|
110
110
|
}
|
|
111
111
|
|
|
112
112
|
template<typename A>
|
|
113
|
-
void hll_union_alloc<A>::update(
|
|
114
|
-
|
|
113
|
+
void hll_union_alloc<A>::update(float datum) {
|
|
114
|
+
gadget_.update(datum);
|
|
115
115
|
}
|
|
116
116
|
|
|
117
117
|
template<typename A>
|
|
118
|
-
void hll_union_alloc<A>::update(const void* data,
|
|
119
|
-
|
|
118
|
+
void hll_union_alloc<A>::update(const void* data, size_t length_bytes) {
|
|
119
|
+
gadget_.update(data, length_bytes);
|
|
120
120
|
}
|
|
121
121
|
|
|
122
122
|
template<typename A>
|
|
123
|
-
void hll_union_alloc<A>::coupon_update(
|
|
123
|
+
void hll_union_alloc<A>::coupon_update(uint32_t coupon) {
|
|
124
124
|
if (coupon == HllUtil<A>::EMPTY) { return; }
|
|
125
|
-
HllSketchImpl<A>* result =
|
|
126
|
-
if (result !=
|
|
127
|
-
if (
|
|
128
|
-
|
|
125
|
+
HllSketchImpl<A>* result = gadget_.sketch_impl->coupon_update(coupon);
|
|
126
|
+
if (result != gadget_.sketch_impl) {
|
|
127
|
+
if (gadget_.sketch_impl != nullptr) { gadget_.sketch_impl->get_deleter()(gadget_.sketch_impl); }
|
|
128
|
+
gadget_.sketch_impl = result;
|
|
129
129
|
}
|
|
130
130
|
}
|
|
131
131
|
|
|
132
132
|
template<typename A>
|
|
133
133
|
double hll_union_alloc<A>::get_estimate() const {
|
|
134
|
-
return
|
|
134
|
+
return gadget_.get_estimate();
|
|
135
135
|
}
|
|
136
136
|
|
|
137
137
|
template<typename A>
|
|
138
138
|
double hll_union_alloc<A>::get_composite_estimate() const {
|
|
139
|
-
return
|
|
139
|
+
return gadget_.get_composite_estimate();
|
|
140
140
|
}
|
|
141
141
|
|
|
142
142
|
template<typename A>
|
|
143
|
-
double hll_union_alloc<A>::get_lower_bound(
|
|
144
|
-
return
|
|
143
|
+
double hll_union_alloc<A>::get_lower_bound(uint8_t num_std_dev) const {
|
|
144
|
+
return gadget_.get_lower_bound(num_std_dev);
|
|
145
145
|
}
|
|
146
146
|
|
|
147
147
|
template<typename A>
|
|
148
|
-
double hll_union_alloc<A>::get_upper_bound(
|
|
149
|
-
return
|
|
148
|
+
double hll_union_alloc<A>::get_upper_bound(uint8_t num_std_dev) const {
|
|
149
|
+
return gadget_.get_upper_bound(num_std_dev);
|
|
150
150
|
}
|
|
151
151
|
|
|
152
152
|
template<typename A>
|
|
153
|
-
|
|
154
|
-
return
|
|
153
|
+
uint8_t hll_union_alloc<A>::get_lg_config_k() const {
|
|
154
|
+
return gadget_.get_lg_config_k();
|
|
155
155
|
}
|
|
156
156
|
|
|
157
157
|
template<typename A>
|
|
158
158
|
void hll_union_alloc<A>::reset() {
|
|
159
|
-
|
|
159
|
+
gadget_.reset();
|
|
160
160
|
}
|
|
161
161
|
|
|
162
162
|
template<typename A>
|
|
163
163
|
bool hll_union_alloc<A>::is_empty() const {
|
|
164
|
-
return
|
|
164
|
+
return gadget_.is_empty();
|
|
165
165
|
}
|
|
166
166
|
|
|
167
167
|
template<typename A>
|
|
168
168
|
bool hll_union_alloc<A>::is_out_of_order_flag() const {
|
|
169
|
-
return
|
|
169
|
+
return gadget_.is_out_of_order_flag();
|
|
170
170
|
}
|
|
171
171
|
|
|
172
172
|
template<typename A>
|
|
173
173
|
hll_mode hll_union_alloc<A>::get_current_mode() const {
|
|
174
|
-
return
|
|
174
|
+
return gadget_.get_current_mode();
|
|
175
175
|
}
|
|
176
176
|
|
|
177
177
|
template<typename A>
|
|
178
178
|
bool hll_union_alloc<A>::is_estimation_mode() const {
|
|
179
|
-
return
|
|
179
|
+
return gadget_.is_estimation_mode();
|
|
180
180
|
}
|
|
181
181
|
|
|
182
182
|
template<typename A>
|
|
@@ -185,13 +185,13 @@ target_hll_type hll_union_alloc<A>::get_target_type() const {
|
|
|
185
185
|
}
|
|
186
186
|
|
|
187
187
|
template<typename A>
|
|
188
|
-
double hll_union_alloc<A>::get_rel_err(
|
|
189
|
-
|
|
188
|
+
double hll_union_alloc<A>::get_rel_err(bool upper_bound, bool unioned,
|
|
189
|
+
uint8_t lg_config_k, uint8_t num_std_dev) {
|
|
190
190
|
return HllUtil<A>::getRelErr(upper_bound, unioned, lg_config_k, num_std_dev);
|
|
191
191
|
}
|
|
192
192
|
|
|
193
193
|
template<typename A>
|
|
194
|
-
HllSketchImpl<A>* hll_union_alloc<A>::copy_or_downsample(const HllSketchImpl<A>* src_impl,
|
|
194
|
+
HllSketchImpl<A>* hll_union_alloc<A>::copy_or_downsample(const HllSketchImpl<A>* src_impl, uint8_t tgt_lg_k) {
|
|
195
195
|
if (src_impl->getCurMode() != HLL) {
|
|
196
196
|
throw std::logic_error("Attempt to downsample non-HLL sketch");
|
|
197
197
|
}
|
|
@@ -210,7 +210,7 @@ HllSketchImpl<A>* hll_union_alloc<A>::copy_or_downsample(const HllSketchImpl<A>*
|
|
|
210
210
|
}
|
|
211
211
|
|
|
212
212
|
template<typename A>
|
|
213
|
-
inline HllSketchImpl<A>* hll_union_alloc<A>::leak_free_coupon_update(HllSketchImpl<A>* impl,
|
|
213
|
+
inline HllSketchImpl<A>* hll_union_alloc<A>::leak_free_coupon_update(HllSketchImpl<A>* impl, uint32_t coupon) {
|
|
214
214
|
HllSketchImpl<A>* result = impl->couponUpdate(coupon);
|
|
215
215
|
if (result != impl) {
|
|
216
216
|
impl->get_deleter()(impl);
|
|
@@ -219,13 +219,13 @@ inline HllSketchImpl<A>* hll_union_alloc<A>::leak_free_coupon_update(HllSketchIm
|
|
|
219
219
|
}
|
|
220
220
|
|
|
221
221
|
template<typename A>
|
|
222
|
-
void hll_union_alloc<A>::union_impl(const hll_sketch_alloc<A>& sketch,
|
|
222
|
+
void hll_union_alloc<A>::union_impl(const hll_sketch_alloc<A>& sketch, uint8_t lg_max_k) {
|
|
223
223
|
const HllSketchImpl<A>* src_impl = sketch.sketch_impl; //default
|
|
224
|
-
HllSketchImpl<A>* dst_impl =
|
|
224
|
+
HllSketchImpl<A>* dst_impl = gadget_.sketch_impl; //default
|
|
225
225
|
if (src_impl->getCurMode() == LIST || src_impl->getCurMode() == SET) {
|
|
226
226
|
if (dst_impl->isEmpty() && src_impl->getLgConfigK() == dst_impl->getLgConfigK()) {
|
|
227
227
|
dst_impl = src_impl->copyAs(HLL_8);
|
|
228
|
-
|
|
228
|
+
gadget_.sketch_impl->get_deleter()(gadget_.sketch_impl); // gadget to be replaced
|
|
229
229
|
} else {
|
|
230
230
|
const CouponList<A>* src = static_cast<const CouponList<A>*>(src_impl);
|
|
231
231
|
for (auto coupon: *src) {
|
|
@@ -239,11 +239,11 @@ void hll_union_alloc<A>::union_impl(const hll_sketch_alloc<A>& sketch, const int
|
|
|
239
239
|
const CouponList<A>* src = static_cast<const CouponList<A>*>(dst_impl);
|
|
240
240
|
dst_impl = copy_or_downsample(src_impl, lg_max_k);
|
|
241
241
|
static_cast<Hll8Array<A>*>(dst_impl)->mergeList(*src);
|
|
242
|
-
|
|
242
|
+
gadget_.sketch_impl->get_deleter()(gadget_.sketch_impl); // gadget to be replaced
|
|
243
243
|
} else { // gadget is HLL
|
|
244
244
|
if (src_impl->getLgConfigK() < dst_impl->getLgConfigK()) {
|
|
245
245
|
dst_impl = copy_or_downsample(dst_impl, sketch.get_lg_config_k());
|
|
246
|
-
|
|
246
|
+
gadget_.sketch_impl->get_deleter()(gadget_.sketch_impl); // gadget to be replaced
|
|
247
247
|
}
|
|
248
248
|
const HllArray<A>* src = static_cast<const HllArray<A>*>(src_impl);
|
|
249
249
|
static_cast<Hll8Array<A>*>(dst_impl)->mergeHll(*src);
|
|
@@ -252,9 +252,9 @@ void hll_union_alloc<A>::union_impl(const hll_sketch_alloc<A>& sketch, const int
|
|
|
252
252
|
}
|
|
253
253
|
} else { // src is HLL, gadget is empty
|
|
254
254
|
dst_impl = copy_or_downsample(src_impl, lg_max_k);
|
|
255
|
-
|
|
255
|
+
gadget_.sketch_impl->get_deleter()(gadget_.sketch_impl); // gadget to be replaced
|
|
256
256
|
}
|
|
257
|
-
|
|
257
|
+
gadget_.sketch_impl = dst_impl; // gadget replaced
|
|
258
258
|
}
|
|
259
259
|
|
|
260
260
|
}
|
|
@@ -34,140 +34,133 @@ namespace datasketches {
|
|
|
34
34
|
|
|
35
35
|
enum hll_mode { LIST = 0, SET, HLL };
|
|
36
36
|
|
|
37
|
+
namespace hll_constants {
|
|
38
|
+
|
|
39
|
+
// preamble stuff
|
|
40
|
+
static const uint8_t SER_VER = 1;
|
|
41
|
+
static const uint8_t FAMILY_ID = 7;
|
|
42
|
+
|
|
43
|
+
static const uint8_t EMPTY_FLAG_MASK = 4;
|
|
44
|
+
static const uint8_t COMPACT_FLAG_MASK = 8;
|
|
45
|
+
static const uint8_t OUT_OF_ORDER_FLAG_MASK = 16;
|
|
46
|
+
static const uint8_t FULL_SIZE_FLAG_MASK = 32;
|
|
47
|
+
|
|
48
|
+
static const uint32_t PREAMBLE_INTS_BYTE = 0;
|
|
49
|
+
static const uint32_t SER_VER_BYTE = 1;
|
|
50
|
+
static const uint32_t FAMILY_BYTE = 2;
|
|
51
|
+
static const uint32_t LG_K_BYTE = 3;
|
|
52
|
+
static const uint32_t LG_ARR_BYTE = 4;
|
|
53
|
+
static const uint32_t FLAGS_BYTE = 5;
|
|
54
|
+
static const uint32_t LIST_COUNT_BYTE = 6;
|
|
55
|
+
static const uint32_t HLL_CUR_MIN_BYTE = 6;
|
|
56
|
+
static const uint32_t MODE_BYTE = 7; // lo2bits = curMode, next 2 bits = tgtHllMode
|
|
57
|
+
|
|
58
|
+
// Coupon List
|
|
59
|
+
static const uint32_t LIST_INT_ARR_START = 8;
|
|
60
|
+
static const uint8_t LIST_PREINTS = 2;
|
|
61
|
+
// Coupon Hash Set
|
|
62
|
+
static const uint32_t HASH_SET_COUNT_INT = 8;
|
|
63
|
+
static const uint32_t HASH_SET_INT_ARR_START = 12;
|
|
64
|
+
static const uint8_t HASH_SET_PREINTS = 3;
|
|
65
|
+
// HLL
|
|
66
|
+
static const uint8_t HLL_PREINTS = 10;
|
|
67
|
+
static const uint32_t HLL_BYTE_ARR_START = 40;
|
|
68
|
+
static const uint32_t HIP_ACCUM_DOUBLE = 8;
|
|
69
|
+
static const uint32_t KXQ0_DOUBLE = 16;
|
|
70
|
+
static const uint32_t KXQ1_DOUBLE = 24;
|
|
71
|
+
static const uint32_t CUR_MIN_COUNT_INT = 32;
|
|
72
|
+
static const uint32_t AUX_COUNT_INT = 36;
|
|
73
|
+
|
|
74
|
+
static const uint32_t EMPTY_SKETCH_SIZE_BYTES = 8;
|
|
75
|
+
|
|
76
|
+
// other HllUtil stuff
|
|
77
|
+
static const uint8_t KEY_BITS_26 = 26;
|
|
78
|
+
static const uint8_t VAL_BITS_6 = 6;
|
|
79
|
+
static const uint32_t KEY_MASK_26 = (1 << KEY_BITS_26) - 1;
|
|
80
|
+
static const uint32_t VAL_MASK_6 = (1 << VAL_BITS_6) - 1;
|
|
81
|
+
static const uint32_t EMPTY = 0;
|
|
82
|
+
static const uint8_t MIN_LOG_K = 4;
|
|
83
|
+
static const uint8_t MAX_LOG_K = 21;
|
|
84
|
+
|
|
85
|
+
static const double HLL_HIP_RSE_FACTOR = 0.8325546; // sqrt(ln(2))
|
|
86
|
+
static const double HLL_NON_HIP_RSE_FACTOR = 1.03896; // sqrt((3 * ln(2)) - 1)
|
|
87
|
+
static const double COUPON_RSE_FACTOR = 0.409; // at transition point not the asymptote
|
|
88
|
+
static const double COUPON_RSE = COUPON_RSE_FACTOR / (1 << 13);
|
|
89
|
+
|
|
90
|
+
static const uint8_t LG_INIT_LIST_SIZE = 3;
|
|
91
|
+
static const uint8_t LG_INIT_SET_SIZE = 5;
|
|
92
|
+
static const uint32_t RESIZE_NUMER = 3;
|
|
93
|
+
static const uint32_t RESIZE_DENOM = 4;
|
|
94
|
+
|
|
95
|
+
static const uint8_t loNibbleMask = 0x0f;
|
|
96
|
+
static const uint8_t hiNibbleMask = 0xf0;
|
|
97
|
+
static const uint8_t AUX_TOKEN = 0xf;
|
|
98
|
+
|
|
99
|
+
/**
|
|
100
|
+
* Log2 table sizes for exceptions based on lgK from 0 to 26.
|
|
101
|
+
* However, only lgK from 4 to 21 are used.
|
|
102
|
+
*/
|
|
103
|
+
static const uint8_t LG_AUX_ARR_INTS[] = {
|
|
104
|
+
0, 2, 2, 2, 2, 2, 2, 3, 3, 3, // 0 - 9
|
|
105
|
+
4, 4, 5, 5, 6, 7, 8, 9, 10, 11, // 10-19
|
|
106
|
+
12, 13, 14, 15, 16, 17, 18 // 20-26
|
|
107
|
+
};
|
|
108
|
+
|
|
109
|
+
} // namespace hll_constants
|
|
110
|
+
|
|
111
|
+
|
|
37
112
|
// template provides internal consistency and allows static float values
|
|
38
113
|
// but we don't use the template parameter anywhere
|
|
39
114
|
template<typename A = std::allocator<uint8_t> >
|
|
40
115
|
class HllUtil final {
|
|
41
116
|
public:
|
|
42
|
-
|
|
43
|
-
static const
|
|
44
|
-
static const
|
|
45
|
-
|
|
46
|
-
static
|
|
47
|
-
static const int COMPACT_FLAG_MASK = 8;
|
|
48
|
-
static const int OUT_OF_ORDER_FLAG_MASK = 16;
|
|
49
|
-
static const int FULL_SIZE_FLAG_MASK = 32;
|
|
50
|
-
|
|
51
|
-
static const int PREAMBLE_INTS_BYTE = 0;
|
|
52
|
-
static const int SER_VER_BYTE = 1;
|
|
53
|
-
static const int FAMILY_BYTE = 2;
|
|
54
|
-
static const int LG_K_BYTE = 3;
|
|
55
|
-
static const int LG_ARR_BYTE = 4;
|
|
56
|
-
static const int FLAGS_BYTE = 5;
|
|
57
|
-
static const int LIST_COUNT_BYTE = 6;
|
|
58
|
-
static const int HLL_CUR_MIN_BYTE = 6;
|
|
59
|
-
static const int MODE_BYTE = 7; // lo2bits = curMode, next 2 bits = tgtHllMode
|
|
60
|
-
|
|
61
|
-
// Coupon List
|
|
62
|
-
static const int LIST_INT_ARR_START = 8;
|
|
63
|
-
static const int LIST_PREINTS = 2;
|
|
64
|
-
// Coupon Hash Set
|
|
65
|
-
static const int HASH_SET_COUNT_INT = 8;
|
|
66
|
-
static const int HASH_SET_INT_ARR_START = 12;
|
|
67
|
-
static const int HASH_SET_PREINTS = 3;
|
|
68
|
-
// HLL
|
|
69
|
-
static const int HLL_PREINTS = 10;
|
|
70
|
-
static const int HLL_BYTE_ARR_START = 40;
|
|
71
|
-
static const int HIP_ACCUM_DOUBLE = 8;
|
|
72
|
-
static const int KXQ0_DOUBLE = 16;
|
|
73
|
-
static const int KXQ1_DOUBLE = 24;
|
|
74
|
-
static const int CUR_MIN_COUNT_INT = 32;
|
|
75
|
-
static const int AUX_COUNT_INT = 36;
|
|
76
|
-
|
|
77
|
-
static const int EMPTY_SKETCH_SIZE_BYTES = 8;
|
|
78
|
-
|
|
79
|
-
// other HllUtil stuff
|
|
80
|
-
static const int KEY_BITS_26 = 26;
|
|
81
|
-
static const int VAL_BITS_6 = 6;
|
|
82
|
-
static const int KEY_MASK_26 = (1 << KEY_BITS_26) - 1;
|
|
83
|
-
static const int VAL_MASK_6 = (1 << VAL_BITS_6) - 1;
|
|
84
|
-
static const int EMPTY = 0;
|
|
85
|
-
static const int MIN_LOG_K = 4;
|
|
86
|
-
static const int MAX_LOG_K = 21;
|
|
87
|
-
|
|
88
|
-
static const double HLL_HIP_RSE_FACTOR; // sqrt(log(2.0)) = 0.8325546
|
|
89
|
-
static const double HLL_NON_HIP_RSE_FACTOR; // sqrt((3.0 * log(2.0)) - 1.0) = 1.03896
|
|
90
|
-
static const double COUPON_RSE_FACTOR; // 0.409 at transition point not the asymptote
|
|
91
|
-
static const double COUPON_RSE; // COUPON_RSE_FACTOR / (1 << 13);
|
|
92
|
-
|
|
93
|
-
static const int LG_INIT_LIST_SIZE = 3;
|
|
94
|
-
static const int LG_INIT_SET_SIZE = 5;
|
|
95
|
-
static const int RESIZE_NUMER = 3;
|
|
96
|
-
static const int RESIZE_DENOM = 4;
|
|
97
|
-
|
|
98
|
-
static const int loNibbleMask = 0x0f;
|
|
99
|
-
static const int hiNibbleMask = 0xf0;
|
|
100
|
-
static const int AUX_TOKEN = 0xf;
|
|
101
|
-
|
|
102
|
-
/**
|
|
103
|
-
* Log2 table sizes for exceptions based on lgK from 0 to 26.
|
|
104
|
-
* However, only lgK from 4 to 21 are used.
|
|
105
|
-
*/
|
|
106
|
-
static const int LG_AUX_ARR_INTS[];
|
|
107
|
-
|
|
108
|
-
static int coupon(const uint64_t hash[]);
|
|
109
|
-
static int coupon(const HashState& hashState);
|
|
110
|
-
static void hash(const void* key, int keyLen, uint64_t seed, HashState& result);
|
|
111
|
-
static int checkLgK(int lgK);
|
|
117
|
+
|
|
118
|
+
static uint32_t coupon(const uint64_t hash[]);
|
|
119
|
+
static uint32_t coupon(const HashState& hashState);
|
|
120
|
+
static void hash(const void* key, size_t keyLen, uint64_t seed, HashState& result);
|
|
121
|
+
static uint8_t checkLgK(uint8_t lgK);
|
|
112
122
|
static void checkMemSize(uint64_t minBytes, uint64_t capBytes);
|
|
113
|
-
static inline void checkNumStdDev(
|
|
114
|
-
static
|
|
115
|
-
static
|
|
116
|
-
static
|
|
117
|
-
static double invPow2(
|
|
118
|
-
static
|
|
119
|
-
static
|
|
120
|
-
static
|
|
121
|
-
static double getRelErr(bool upperBound, bool unioned,
|
|
122
|
-
int lgConfigK, int numStdDev);
|
|
123
|
+
static inline void checkNumStdDev(uint8_t numStdDev);
|
|
124
|
+
static uint32_t pair(uint32_t slotNo, uint8_t value);
|
|
125
|
+
static uint32_t getLow26(uint32_t coupon);
|
|
126
|
+
static uint8_t getValue(uint32_t coupon);
|
|
127
|
+
static double invPow2(uint8_t e);
|
|
128
|
+
static uint8_t ceilingPowerOf2(uint32_t n);
|
|
129
|
+
static uint8_t simpleIntLog2(uint32_t n); // n must be power of 2
|
|
130
|
+
static uint8_t computeLgArrInts(hll_mode mode, uint32_t count, uint8_t lgConfigK);
|
|
131
|
+
static double getRelErr(bool upperBound, bool unioned, uint8_t lgConfigK, uint8_t numStdDev);
|
|
123
132
|
};
|
|
124
133
|
|
|
125
134
|
template<typename A>
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
template<typename A>
|
|
132
|
-
const double HllUtil<A>::COUPON_RSE = COUPON_RSE_FACTOR / (1 << 13);
|
|
133
|
-
|
|
134
|
-
template<typename A>
|
|
135
|
-
const int HllUtil<A>::LG_AUX_ARR_INTS[] = {
|
|
136
|
-
0, 2, 2, 2, 2, 2, 2, 3, 3, 3, // 0 - 9
|
|
137
|
-
4, 4, 5, 5, 6, 7, 8, 9, 10, 11, // 10-19
|
|
138
|
-
12, 13, 14, 15, 16, 17, 18 // 20-26
|
|
139
|
-
};
|
|
140
|
-
|
|
141
|
-
template<typename A>
|
|
142
|
-
inline int HllUtil<A>::coupon(const uint64_t hash[]) {
|
|
143
|
-
int addr26 = (int) (hash[0] & KEY_MASK_26);
|
|
144
|
-
int lz = count_leading_zeros_in_u64(hash[1]);
|
|
145
|
-
int value = ((lz > 62 ? 62 : lz) + 1);
|
|
146
|
-
return (value << KEY_BITS_26) | addr26;
|
|
135
|
+
inline uint32_t HllUtil<A>::coupon(const uint64_t hash[]) {
|
|
136
|
+
uint32_t addr26 = hash[0] & hll_constants::KEY_MASK_26;
|
|
137
|
+
uint8_t lz = count_leading_zeros_in_u64(hash[1]);
|
|
138
|
+
uint8_t value = ((lz > 62 ? 62 : lz) + 1);
|
|
139
|
+
return (value << hll_constants::KEY_BITS_26) | addr26;
|
|
147
140
|
}
|
|
148
141
|
|
|
149
142
|
template<typename A>
|
|
150
|
-
inline
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
return (value << KEY_BITS_26) | addr26;
|
|
143
|
+
inline uint32_t HllUtil<A>::coupon(const HashState& hashState) {
|
|
144
|
+
uint32_t addr26 = (int) (hashState.h1 & hll_constants::KEY_MASK_26);
|
|
145
|
+
uint8_t lz = count_leading_zeros_in_u64(hashState.h2);
|
|
146
|
+
uint8_t value = ((lz > 62 ? 62 : lz) + 1);
|
|
147
|
+
return (value << hll_constants::KEY_BITS_26) | addr26;
|
|
155
148
|
}
|
|
156
149
|
|
|
157
150
|
template<typename A>
|
|
158
|
-
inline void HllUtil<A>::hash(const void* key,
|
|
151
|
+
inline void HllUtil<A>::hash(const void* key, size_t keyLen, uint64_t seed, HashState& result) {
|
|
159
152
|
MurmurHash3_x64_128(key, keyLen, seed, result);
|
|
160
153
|
}
|
|
161
154
|
|
|
162
155
|
template<typename A>
|
|
163
|
-
inline double HllUtil<A>::getRelErr(
|
|
164
|
-
|
|
156
|
+
inline double HllUtil<A>::getRelErr(bool upperBound, bool unioned,
|
|
157
|
+
uint8_t lgConfigK, uint8_t numStdDev) {
|
|
165
158
|
return RelativeErrorTables<A>::getRelErr(upperBound, unioned, lgConfigK, numStdDev);
|
|
166
159
|
}
|
|
167
160
|
|
|
168
161
|
template<typename A>
|
|
169
|
-
inline
|
|
170
|
-
if ((lgK >=
|
|
162
|
+
inline uint8_t HllUtil<A>::checkLgK(uint8_t lgK) {
|
|
163
|
+
if ((lgK >= hll_constants::MIN_LOG_K) && (lgK <= hll_constants::MAX_LOG_K)) {
|
|
171
164
|
return lgK;
|
|
172
165
|
} else {
|
|
173
166
|
throw std::invalid_argument("Invalid value of k: " + std::to_string(lgK));
|
|
@@ -175,36 +168,36 @@ inline int HllUtil<A>::checkLgK(const int lgK) {
|
|
|
175
168
|
}
|
|
176
169
|
|
|
177
170
|
template<typename A>
|
|
178
|
-
inline void HllUtil<A>::checkMemSize(
|
|
171
|
+
inline void HllUtil<A>::checkMemSize(uint64_t minBytes, uint64_t capBytes) {
|
|
179
172
|
if (capBytes < minBytes) {
|
|
180
173
|
throw std::invalid_argument("Given destination array is not large enough: " + std::to_string(capBytes));
|
|
181
174
|
}
|
|
182
175
|
}
|
|
183
176
|
|
|
184
177
|
template<typename A>
|
|
185
|
-
inline void HllUtil<A>::checkNumStdDev(
|
|
178
|
+
inline void HllUtil<A>::checkNumStdDev(uint8_t numStdDev) {
|
|
186
179
|
if ((numStdDev < 1) || (numStdDev > 3)) {
|
|
187
180
|
throw std::invalid_argument("NumStdDev may not be less than 1 or greater than 3.");
|
|
188
181
|
}
|
|
189
182
|
}
|
|
190
183
|
|
|
191
184
|
template<typename A>
|
|
192
|
-
inline
|
|
193
|
-
return (value <<
|
|
185
|
+
inline uint32_t HllUtil<A>::pair(uint32_t slotNo, uint8_t value) {
|
|
186
|
+
return (value << hll_constants::KEY_BITS_26) | (slotNo & hll_constants::KEY_MASK_26);
|
|
194
187
|
}
|
|
195
188
|
|
|
196
189
|
template<typename A>
|
|
197
|
-
inline
|
|
198
|
-
return coupon &
|
|
190
|
+
inline uint32_t HllUtil<A>::getLow26(uint32_t coupon) {
|
|
191
|
+
return coupon & hll_constants::KEY_MASK_26;
|
|
199
192
|
}
|
|
200
193
|
|
|
201
194
|
template<typename A>
|
|
202
|
-
inline
|
|
203
|
-
return coupon >>
|
|
195
|
+
inline uint8_t HllUtil<A>::getValue(uint32_t coupon) {
|
|
196
|
+
return coupon >> hll_constants::KEY_BITS_26;
|
|
204
197
|
}
|
|
205
198
|
|
|
206
199
|
template<typename A>
|
|
207
|
-
inline double HllUtil<A>::invPow2(
|
|
200
|
+
inline double HllUtil<A>::invPow2(uint8_t e) {
|
|
208
201
|
union {
|
|
209
202
|
long long longVal;
|
|
210
203
|
double doubleVal;
|
|
@@ -214,7 +207,7 @@ inline double HllUtil<A>::invPow2(const int e) {
|
|
|
214
207
|
}
|
|
215
208
|
|
|
216
209
|
template<typename A>
|
|
217
|
-
inline
|
|
210
|
+
inline uint8_t HllUtil<A>::simpleIntLog2(uint32_t n) {
|
|
218
211
|
if (n == 0) {
|
|
219
212
|
throw std::logic_error("cannot take log of 0");
|
|
220
213
|
}
|
|
@@ -222,16 +215,16 @@ inline uint32_t HllUtil<A>::simpleIntLog2(uint32_t n) {
|
|
|
222
215
|
}
|
|
223
216
|
|
|
224
217
|
template<typename A>
|
|
225
|
-
inline
|
|
218
|
+
inline uint8_t HllUtil<A>::computeLgArrInts(hll_mode mode, uint32_t count, uint8_t lgConfigK) {
|
|
226
219
|
// assume value missing and recompute
|
|
227
|
-
if (mode == LIST) { return
|
|
228
|
-
|
|
229
|
-
if ((
|
|
220
|
+
if (mode == LIST) { return hll_constants::LG_INIT_LIST_SIZE; }
|
|
221
|
+
uint32_t ceilPwr2 = ceiling_power_of_2(count);
|
|
222
|
+
if ((hll_constants::RESIZE_DENOM * count) > (hll_constants::RESIZE_NUMER * ceilPwr2)) { ceilPwr2 <<= 1;}
|
|
230
223
|
if (mode == SET) {
|
|
231
|
-
return
|
|
224
|
+
return std::max(hll_constants::LG_INIT_SET_SIZE, HllUtil<A>::simpleIntLog2(ceilPwr2));
|
|
232
225
|
}
|
|
233
226
|
//only used for HLL4
|
|
234
|
-
return
|
|
227
|
+
return std::max(hll_constants::LG_AUX_ARR_INTS[lgConfigK], HllUtil<A>::simpleIntLog2(ceilPwr2));
|
|
235
228
|
}
|
|
236
229
|
|
|
237
230
|
}
|