datasketches 0.2.0 → 0.2.4
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +16 -0
- data/LICENSE +40 -3
- data/NOTICE +1 -1
- data/README.md +7 -7
- data/ext/datasketches/extconf.rb +1 -1
- data/ext/datasketches/theta_wrapper.cpp +20 -4
- data/lib/datasketches/version.rb +1 -1
- data/vendor/datasketches-cpp/CMakeLists.txt +31 -3
- data/vendor/datasketches-cpp/LICENSE +40 -3
- data/vendor/datasketches-cpp/MANIFEST.in +3 -0
- data/vendor/datasketches-cpp/NOTICE +1 -1
- data/vendor/datasketches-cpp/README.md +76 -9
- data/vendor/datasketches-cpp/cmake/DataSketchesConfig.cmake.in +10 -0
- data/vendor/datasketches-cpp/common/CMakeLists.txt +14 -13
- data/vendor/datasketches-cpp/common/include/MurmurHash3.h +11 -7
- data/vendor/datasketches-cpp/common/include/binomial_bounds.hpp +8 -8
- data/vendor/datasketches-cpp/common/include/bounds_binomial_proportions.hpp +12 -15
- data/vendor/datasketches-cpp/common/include/common_defs.hpp +26 -0
- data/vendor/datasketches-cpp/common/include/conditional_forward.hpp +20 -8
- data/vendor/datasketches-cpp/common/include/count_zeros.hpp +2 -2
- data/vendor/datasketches-cpp/common/include/serde.hpp +7 -7
- data/vendor/datasketches-cpp/cpc/CMakeLists.txt +15 -35
- data/vendor/datasketches-cpp/cpc/include/cpc_common.hpp +10 -3
- data/vendor/datasketches-cpp/cpc/include/cpc_compressor.hpp +19 -19
- data/vendor/datasketches-cpp/cpc/include/cpc_compressor_impl.hpp +91 -89
- data/vendor/datasketches-cpp/cpc/include/cpc_sketch.hpp +15 -2
- data/vendor/datasketches-cpp/cpc/include/cpc_sketch_impl.hpp +126 -90
- data/vendor/datasketches-cpp/cpc/include/cpc_union.hpp +1 -1
- data/vendor/datasketches-cpp/cpc/include/cpc_union_impl.hpp +22 -20
- data/vendor/datasketches-cpp/cpc/include/cpc_util.hpp +10 -10
- data/vendor/datasketches-cpp/cpc/include/icon_estimator.hpp +4 -4
- data/vendor/datasketches-cpp/cpc/include/u32_table.hpp +8 -8
- data/vendor/datasketches-cpp/cpc/include/u32_table_impl.hpp +14 -14
- data/vendor/datasketches-cpp/cpc/test/compression_test.cpp +10 -10
- data/vendor/datasketches-cpp/cpc/test/cpc_sketch_allocation_test.cpp +17 -0
- data/vendor/datasketches-cpp/cpc/test/cpc_sketch_test.cpp +25 -0
- data/vendor/datasketches-cpp/cpc/test/cpc_union_test.cpp +1 -1
- data/vendor/datasketches-cpp/fi/CMakeLists.txt +5 -15
- data/vendor/datasketches-cpp/fi/include/frequent_items_sketch_impl.hpp +69 -82
- data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map_impl.hpp +10 -10
- data/vendor/datasketches-cpp/fi/test/reverse_purge_hash_map_test.cpp +2 -2
- data/vendor/datasketches-cpp/hll/CMakeLists.txt +33 -56
- data/vendor/datasketches-cpp/hll/include/AuxHashMap-internal.hpp +60 -63
- data/vendor/datasketches-cpp/hll/include/AuxHashMap.hpp +19 -19
- data/vendor/datasketches-cpp/hll/include/CompositeInterpolationXTable-internal.hpp +15 -15
- data/vendor/datasketches-cpp/hll/include/CompositeInterpolationXTable.hpp +3 -3
- data/vendor/datasketches-cpp/hll/include/CouponHashSet-internal.hpp +74 -76
- data/vendor/datasketches-cpp/hll/include/CouponHashSet.hpp +6 -6
- data/vendor/datasketches-cpp/hll/include/CouponList-internal.hpp +110 -113
- data/vendor/datasketches-cpp/hll/include/CouponList.hpp +13 -13
- data/vendor/datasketches-cpp/hll/include/CubicInterpolation-internal.hpp +2 -4
- data/vendor/datasketches-cpp/hll/include/HarmonicNumbers-internal.hpp +1 -1
- data/vendor/datasketches-cpp/hll/include/Hll4Array-internal.hpp +80 -76
- data/vendor/datasketches-cpp/hll/include/Hll4Array.hpp +9 -9
- data/vendor/datasketches-cpp/hll/include/Hll6Array-internal.hpp +26 -26
- data/vendor/datasketches-cpp/hll/include/Hll6Array.hpp +6 -6
- data/vendor/datasketches-cpp/hll/include/Hll8Array-internal.hpp +33 -33
- data/vendor/datasketches-cpp/hll/include/Hll8Array.hpp +6 -6
- data/vendor/datasketches-cpp/hll/include/HllArray-internal.hpp +205 -209
- data/vendor/datasketches-cpp/hll/include/HllArray.hpp +36 -36
- data/vendor/datasketches-cpp/hll/include/HllSketch-internal.hpp +34 -32
- data/vendor/datasketches-cpp/hll/include/HllSketchImpl-internal.hpp +22 -22
- data/vendor/datasketches-cpp/hll/include/HllSketchImpl.hpp +13 -13
- data/vendor/datasketches-cpp/hll/include/HllSketchImplFactory.hpp +15 -15
- data/vendor/datasketches-cpp/hll/include/HllUnion-internal.hpp +61 -61
- data/vendor/datasketches-cpp/hll/include/HllUtil.hpp +120 -127
- data/vendor/datasketches-cpp/hll/include/coupon_iterator-internal.hpp +9 -9
- data/vendor/datasketches-cpp/hll/include/coupon_iterator.hpp +5 -5
- data/vendor/datasketches-cpp/hll/include/hll.hpp +21 -21
- data/vendor/datasketches-cpp/hll/test/AuxHashMapTest.cpp +1 -1
- data/vendor/datasketches-cpp/hll/test/CouponHashSetTest.cpp +34 -34
- data/vendor/datasketches-cpp/hll/test/CouponListTest.cpp +25 -25
- data/vendor/datasketches-cpp/hll/test/CrossCountingTest.cpp +2 -2
- data/vendor/datasketches-cpp/hll/test/HllArrayTest.cpp +35 -35
- data/vendor/datasketches-cpp/hll/test/HllSketchTest.cpp +15 -15
- data/vendor/datasketches-cpp/hll/test/HllUnionTest.cpp +10 -14
- data/vendor/datasketches-cpp/hll/test/IsomorphicTest.cpp +3 -3
- data/vendor/datasketches-cpp/hll/test/ToFromByteArrayTest.cpp +4 -4
- data/vendor/datasketches-cpp/kll/CMakeLists.txt +9 -19
- data/vendor/datasketches-cpp/kll/include/kll_helper.hpp +5 -4
- data/vendor/datasketches-cpp/kll/include/kll_helper_impl.hpp +6 -6
- data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator.hpp +14 -6
- data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator_impl.hpp +39 -24
- data/vendor/datasketches-cpp/kll/include/kll_sketch.hpp +41 -4
- data/vendor/datasketches-cpp/kll/include/kll_sketch_impl.hpp +76 -64
- data/vendor/datasketches-cpp/kll/include/kolmogorov_smirnov.hpp +67 -0
- data/vendor/datasketches-cpp/kll/include/kolmogorov_smirnov_impl.hpp +78 -0
- data/vendor/datasketches-cpp/kll/test/CMakeLists.txt +1 -0
- data/vendor/datasketches-cpp/kll/test/kll_sketch_test.cpp +133 -46
- data/vendor/datasketches-cpp/kll/test/kolmogorov_smirnov_test.cpp +111 -0
- data/vendor/datasketches-cpp/pyproject.toml +4 -2
- data/vendor/datasketches-cpp/python/CMakeLists.txt +10 -6
- data/vendor/datasketches-cpp/python/README.md +50 -50
- data/vendor/datasketches-cpp/python/pybind11Path.cmd +3 -0
- data/vendor/datasketches-cpp/python/src/cpc_wrapper.cpp +1 -1
- data/vendor/datasketches-cpp/python/src/hll_wrapper.cpp +4 -4
- data/vendor/datasketches-cpp/python/src/kll_wrapper.cpp +1 -1
- data/vendor/datasketches-cpp/python/src/theta_wrapper.cpp +8 -8
- data/vendor/datasketches-cpp/python/src/vector_of_kll.cpp +11 -5
- data/vendor/datasketches-cpp/python/src/vo_wrapper.cpp +2 -2
- data/vendor/datasketches-cpp/python/tests/hll_test.py +1 -1
- data/vendor/datasketches-cpp/python/tests/kll_test.py +2 -2
- data/vendor/datasketches-cpp/python/tests/req_test.py +2 -2
- data/vendor/datasketches-cpp/python/tests/vector_of_kll_test.py +4 -4
- data/vendor/datasketches-cpp/python/tests/vo_test.py +3 -3
- data/vendor/datasketches-cpp/req/CMakeLists.txt +8 -21
- data/vendor/datasketches-cpp/req/include/req_common.hpp +2 -1
- data/vendor/datasketches-cpp/req/include/req_compactor.hpp +4 -4
- data/vendor/datasketches-cpp/req/include/req_compactor_impl.hpp +26 -39
- data/vendor/datasketches-cpp/req/include/req_sketch.hpp +1 -1
- data/vendor/datasketches-cpp/req/include/req_sketch_impl.hpp +13 -11
- data/vendor/datasketches-cpp/req/test/req_sketch_test.cpp +52 -52
- data/vendor/datasketches-cpp/sampling/CMakeLists.txt +5 -9
- data/vendor/datasketches-cpp/sampling/include/var_opt_sketch.hpp +10 -5
- data/vendor/datasketches-cpp/sampling/include/var_opt_sketch_impl.hpp +61 -64
- data/vendor/datasketches-cpp/sampling/include/var_opt_union_impl.hpp +42 -48
- data/vendor/datasketches-cpp/sampling/test/var_opt_sketch_test.cpp +6 -6
- data/vendor/datasketches-cpp/sampling/test/var_opt_union_test.cpp +13 -13
- data/vendor/datasketches-cpp/setup.py +10 -7
- data/vendor/datasketches-cpp/theta/CMakeLists.txt +26 -45
- data/vendor/datasketches-cpp/theta/include/bounds_on_ratios_in_sampled_sets.hpp +1 -1
- data/vendor/datasketches-cpp/theta/include/compact_theta_sketch_parser.hpp +67 -0
- data/vendor/datasketches-cpp/theta/include/compact_theta_sketch_parser_impl.hpp +137 -0
- data/vendor/datasketches-cpp/theta/include/theta_constants.hpp +9 -4
- data/vendor/datasketches-cpp/theta/include/theta_helpers.hpp +15 -0
- data/vendor/datasketches-cpp/theta/include/theta_intersection.hpp +9 -4
- data/vendor/datasketches-cpp/theta/include/theta_intersection_base_impl.hpp +6 -6
- data/vendor/datasketches-cpp/theta/include/theta_intersection_impl.hpp +1 -1
- data/vendor/datasketches-cpp/theta/include/theta_jaccard_similarity_base.hpp +18 -14
- data/vendor/datasketches-cpp/theta/include/theta_set_difference_base_impl.hpp +2 -2
- data/vendor/datasketches-cpp/theta/include/theta_sketch.hpp +73 -15
- data/vendor/datasketches-cpp/theta/include/theta_sketch_impl.hpp +247 -103
- data/vendor/datasketches-cpp/theta/include/theta_union.hpp +10 -5
- data/vendor/datasketches-cpp/theta/include/theta_union_base.hpp +3 -1
- data/vendor/datasketches-cpp/theta/include/theta_union_base_impl.hpp +9 -3
- data/vendor/datasketches-cpp/theta/include/theta_union_impl.hpp +8 -5
- data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base.hpp +11 -5
- data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base_impl.hpp +70 -37
- data/vendor/datasketches-cpp/theta/test/CMakeLists.txt +1 -0
- data/vendor/datasketches-cpp/theta/test/theta_a_not_b_test.cpp +23 -1
- data/vendor/datasketches-cpp/theta/test/theta_compact_empty_from_java_v1.sk +0 -0
- data/vendor/datasketches-cpp/theta/test/theta_compact_empty_from_java_v2.sk +0 -0
- data/vendor/datasketches-cpp/theta/test/theta_compact_estimation_from_java_v1.sk +0 -0
- data/vendor/datasketches-cpp/theta/test/theta_compact_estimation_from_java_v2.sk +0 -0
- data/vendor/datasketches-cpp/theta/test/theta_compact_exact_from_java.sk +0 -0
- data/vendor/datasketches-cpp/theta/test/theta_intersection_test.cpp +21 -1
- data/vendor/datasketches-cpp/theta/test/theta_jaccard_similarity_test.cpp +58 -2
- data/vendor/datasketches-cpp/theta/test/theta_setop_test.cpp +445 -0
- data/vendor/datasketches-cpp/theta/test/theta_sketch_test.cpp +437 -1
- data/vendor/datasketches-cpp/theta/test/theta_union_test.cpp +41 -9
- data/vendor/datasketches-cpp/tuple/CMakeLists.txt +18 -33
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_sketch.hpp +1 -1
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_sketch_impl.hpp +50 -63
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_union.hpp +1 -1
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_union_impl.hpp +3 -3
- data/vendor/datasketches-cpp/tuple/include/tuple_sketch.hpp +13 -9
- data/vendor/datasketches-cpp/tuple/include/tuple_sketch_impl.hpp +84 -78
- data/vendor/datasketches-cpp/tuple/include/tuple_union.hpp +6 -1
- data/vendor/datasketches-cpp/tuple/include/tuple_union_impl.hpp +8 -3
- data/vendor/datasketches-cpp/tuple/test/array_of_doubles_sketch_test.cpp +17 -1
- data/vendor/datasketches-cpp/tuple/test/tuple_a_not_b_test.cpp +17 -17
- data/vendor/datasketches-cpp/tuple/test/tuple_intersection_test.cpp +12 -12
- data/vendor/datasketches-cpp/tuple/test/tuple_jaccard_similarity_test.cpp +5 -5
- data/vendor/datasketches-cpp/tuple/test/tuple_sketch_allocation_test.cpp +1 -1
- data/vendor/datasketches-cpp/tuple/test/tuple_sketch_test.cpp +66 -28
- data/vendor/datasketches-cpp/tuple/test/tuple_union_test.cpp +19 -12
- metadata +18 -7
- data/vendor/datasketches-cpp/theta/test/theta_update_empty_from_java.sk +0 -0
- data/vendor/datasketches-cpp/theta/test/theta_update_estimation_from_java.sk +0 -0
@@ -35,19 +35,19 @@
|
|
35
35
|
namespace datasketches {
|
36
36
|
|
37
37
|
template<typename A>
|
38
|
-
HllArray<A>::HllArray(
|
38
|
+
HllArray<A>::HllArray(uint8_t lgConfigK, target_hll_type tgtHllType, bool startFullSize, const A& allocator):
|
39
39
|
HllSketchImpl<A>(lgConfigK, tgtHllType, hll_mode::HLL, startFullSize),
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
40
|
+
hipAccum_(0.0),
|
41
|
+
kxq0_(1 << lgConfigK),
|
42
|
+
kxq1_(0.0),
|
43
|
+
hllByteArr_(allocator),
|
44
|
+
curMin_(0),
|
45
|
+
numAtCurMin_(1 << lgConfigK),
|
46
|
+
oooFlag_(false)
|
47
47
|
{}
|
48
48
|
|
49
49
|
template<typename A>
|
50
|
-
HllArray<A>* HllArray<A>::copyAs(
|
50
|
+
HllArray<A>* HllArray<A>::copyAs(target_hll_type tgtHllType) const {
|
51
51
|
if (tgtHllType == this->getTgtHllType()) {
|
52
52
|
return static_cast<HllArray*>(copy());
|
53
53
|
}
|
@@ -62,54 +62,54 @@ HllArray<A>* HllArray<A>::copyAs(const target_hll_type tgtHllType) const {
|
|
62
62
|
|
63
63
|
template<typename A>
|
64
64
|
HllArray<A>* HllArray<A>::newHll(const void* bytes, size_t len, const A& allocator) {
|
65
|
-
if (len <
|
65
|
+
if (len < hll_constants::HLL_BYTE_ARR_START) {
|
66
66
|
throw std::out_of_range("Input data length insufficient to hold HLL array");
|
67
67
|
}
|
68
68
|
|
69
69
|
const uint8_t* data = static_cast<const uint8_t*>(bytes);
|
70
|
-
if (data[
|
70
|
+
if (data[hll_constants::PREAMBLE_INTS_BYTE] != hll_constants::HLL_PREINTS) {
|
71
71
|
throw std::invalid_argument("Incorrect number of preInts in input stream");
|
72
72
|
}
|
73
|
-
if (data[
|
73
|
+
if (data[hll_constants::SER_VER_BYTE] != hll_constants::SER_VER) {
|
74
74
|
throw std::invalid_argument("Wrong ser ver in input stream");
|
75
75
|
}
|
76
|
-
if (data[
|
76
|
+
if (data[hll_constants::FAMILY_BYTE] != hll_constants::FAMILY_ID) {
|
77
77
|
throw std::invalid_argument("Input array is not an HLL sketch");
|
78
78
|
}
|
79
79
|
|
80
|
-
const hll_mode mode = HllSketchImpl<A>::extractCurMode(data[
|
80
|
+
const hll_mode mode = HllSketchImpl<A>::extractCurMode(data[hll_constants::MODE_BYTE]);
|
81
81
|
if (mode != HLL) {
|
82
|
-
throw std::invalid_argument("Calling HLL array
|
82
|
+
throw std::invalid_argument("Calling HLL array constructor with non-HLL mode data");
|
83
83
|
}
|
84
84
|
|
85
|
-
const target_hll_type tgtHllType = HllSketchImpl<A>::extractTgtHllType(data[
|
86
|
-
const bool oooFlag = ((data[
|
87
|
-
const bool comapctFlag = ((data[
|
88
|
-
const bool startFullSizeFlag = ((data[
|
85
|
+
const target_hll_type tgtHllType = HllSketchImpl<A>::extractTgtHllType(data[hll_constants::MODE_BYTE]);
|
86
|
+
const bool oooFlag = ((data[hll_constants::FLAGS_BYTE] & hll_constants::OUT_OF_ORDER_FLAG_MASK) ? true : false);
|
87
|
+
const bool comapctFlag = ((data[hll_constants::FLAGS_BYTE] & hll_constants::COMPACT_FLAG_MASK) ? true : false);
|
88
|
+
const bool startFullSizeFlag = ((data[hll_constants::FLAGS_BYTE] & hll_constants::FULL_SIZE_FLAG_MASK) ? true : false);
|
89
89
|
|
90
|
-
const
|
91
|
-
const
|
90
|
+
const uint8_t lgK = data[hll_constants::LG_K_BYTE];
|
91
|
+
const uint8_t curMin = data[hll_constants::HLL_CUR_MIN_BYTE];
|
92
92
|
|
93
|
-
const
|
94
|
-
if (len < static_cast<size_t>(
|
93
|
+
const uint32_t arrayBytes = hllArrBytes(tgtHllType, lgK);
|
94
|
+
if (len < static_cast<size_t>(hll_constants::HLL_BYTE_ARR_START + arrayBytes)) {
|
95
95
|
throw std::out_of_range("Input array too small to hold sketch image");
|
96
96
|
}
|
97
97
|
|
98
98
|
double hip, kxq0, kxq1;
|
99
|
-
std::memcpy(&hip, data +
|
100
|
-
std::memcpy(&kxq0, data +
|
101
|
-
std::memcpy(&kxq1, data +
|
99
|
+
std::memcpy(&hip, data + hll_constants::HIP_ACCUM_DOUBLE, sizeof(double));
|
100
|
+
std::memcpy(&kxq0, data + hll_constants::KXQ0_DOUBLE, sizeof(double));
|
101
|
+
std::memcpy(&kxq1, data + hll_constants::KXQ1_DOUBLE, sizeof(double));
|
102
102
|
|
103
|
-
|
104
|
-
std::memcpy(&numAtCurMin, data +
|
105
|
-
std::memcpy(&auxCount, data +
|
103
|
+
uint32_t numAtCurMin, auxCount;
|
104
|
+
std::memcpy(&numAtCurMin, data + hll_constants::CUR_MIN_COUNT_INT, sizeof(int));
|
105
|
+
std::memcpy(&auxCount, data + hll_constants::AUX_COUNT_INT, sizeof(int));
|
106
106
|
|
107
107
|
AuxHashMap<A>* auxHashMap = nullptr;
|
108
108
|
typedef std::unique_ptr<AuxHashMap<A>, std::function<void(AuxHashMap<A>*)>> aux_hash_map_ptr;
|
109
109
|
aux_hash_map_ptr aux_ptr;
|
110
110
|
if (auxCount > 0) { // necessarily TgtHllType == HLL_4
|
111
|
-
|
112
|
-
const size_t offset =
|
111
|
+
uint8_t auxLgIntArrSize = data[4];
|
112
|
+
const size_t offset = hll_constants::HLL_BYTE_ARR_START + arrayBytes;
|
113
113
|
const uint8_t* auxDataStart = data + offset;
|
114
114
|
auxHashMap = AuxHashMap<A>::deserialize(auxDataStart, len - offset, lgK, auxCount, auxLgIntArrSize, comapctFlag, allocator);
|
115
115
|
aux_ptr = aux_hash_map_ptr(auxHashMap, auxHashMap->make_deleter());
|
@@ -123,7 +123,7 @@ HllArray<A>* HllArray<A>::newHll(const void* bytes, size_t len, const A& allocat
|
|
123
123
|
sketch->putKxQ1(kxq1);
|
124
124
|
sketch->putNumAtCurMin(numAtCurMin);
|
125
125
|
|
126
|
-
std::memcpy(sketch->
|
126
|
+
std::memcpy(sketch->hllByteArr_.data(), data + hll_constants::HLL_BYTE_ARR_START, arrayBytes);
|
127
127
|
|
128
128
|
if (auxHashMap != nullptr)
|
129
129
|
((Hll4Array<A>*)sketch)->putAuxHashMap(auxHashMap);
|
@@ -135,30 +135,30 @@ HllArray<A>* HllArray<A>::newHll(const void* bytes, size_t len, const A& allocat
|
|
135
135
|
template<typename A>
|
136
136
|
HllArray<A>* HllArray<A>::newHll(std::istream& is, const A& allocator) {
|
137
137
|
uint8_t listHeader[8];
|
138
|
-
|
138
|
+
read(is, listHeader, 8 * sizeof(uint8_t));
|
139
139
|
|
140
|
-
if (listHeader[
|
140
|
+
if (listHeader[hll_constants::PREAMBLE_INTS_BYTE] != hll_constants::HLL_PREINTS) {
|
141
141
|
throw std::invalid_argument("Incorrect number of preInts in input stream");
|
142
142
|
}
|
143
|
-
if (listHeader[
|
143
|
+
if (listHeader[hll_constants::SER_VER_BYTE] != hll_constants::SER_VER) {
|
144
144
|
throw std::invalid_argument("Wrong ser ver in input stream");
|
145
145
|
}
|
146
|
-
if (listHeader[
|
146
|
+
if (listHeader[hll_constants::FAMILY_BYTE] != hll_constants::FAMILY_ID) {
|
147
147
|
throw std::invalid_argument("Input stream is not an HLL sketch");
|
148
148
|
}
|
149
149
|
|
150
|
-
hll_mode mode = HllSketchImpl<A>::extractCurMode(listHeader[
|
150
|
+
hll_mode mode = HllSketchImpl<A>::extractCurMode(listHeader[hll_constants::MODE_BYTE]);
|
151
151
|
if (mode != HLL) {
|
152
152
|
throw std::invalid_argument("Calling HLL construtor with non-HLL mode data");
|
153
153
|
}
|
154
154
|
|
155
|
-
const target_hll_type tgtHllType = HllSketchImpl<A>::extractTgtHllType(listHeader[
|
156
|
-
const bool oooFlag = ((listHeader[
|
157
|
-
const bool comapctFlag = ((listHeader[
|
158
|
-
const bool startFullSizeFlag = ((listHeader[
|
155
|
+
const target_hll_type tgtHllType = HllSketchImpl<A>::extractTgtHllType(listHeader[hll_constants::MODE_BYTE]);
|
156
|
+
const bool oooFlag = ((listHeader[hll_constants::FLAGS_BYTE] & hll_constants::OUT_OF_ORDER_FLAG_MASK) ? true : false);
|
157
|
+
const bool comapctFlag = ((listHeader[hll_constants::FLAGS_BYTE] & hll_constants::COMPACT_FLAG_MASK) ? true : false);
|
158
|
+
const bool startFullSizeFlag = ((listHeader[hll_constants::FLAGS_BYTE] & hll_constants::FULL_SIZE_FLAG_MASK) ? true : false);
|
159
159
|
|
160
|
-
const
|
161
|
-
const
|
160
|
+
const uint8_t lgK = listHeader[hll_constants::LG_K_BYTE];
|
161
|
+
const uint8_t curMin = listHeader[hll_constants::HLL_CUR_MIN_BYTE];
|
162
162
|
|
163
163
|
HllArray* sketch = HllSketchImplFactory<A>::newHll(lgK, tgtHllType, startFullSizeFlag, allocator);
|
164
164
|
typedef std::unique_ptr<HllArray<A>, std::function<void(HllSketchImpl<A>*)>> hll_array_ptr;
|
@@ -166,23 +166,21 @@ HllArray<A>* HllArray<A>::newHll(std::istream& is, const A& allocator) {
|
|
166
166
|
sketch->putCurMin(curMin);
|
167
167
|
sketch->putOutOfOrderFlag(oooFlag);
|
168
168
|
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
is.read((char*)&kxq1, sizeof(kxq1));
|
169
|
+
const auto hip = read<double>(is);
|
170
|
+
const auto kxq0 = read<double>(is);
|
171
|
+
const auto kxq1 = read<double>(is);
|
173
172
|
if (!oooFlag) sketch->putHipAccum(hip);
|
174
173
|
sketch->putKxQ0(kxq0);
|
175
174
|
sketch->putKxQ1(kxq1);
|
176
175
|
|
177
|
-
|
178
|
-
|
179
|
-
is.read((char*)&auxCount, sizeof(auxCount));
|
176
|
+
const auto numAtCurMin = read<uint32_t>(is);
|
177
|
+
const auto auxCount = read<uint32_t>(is);
|
180
178
|
sketch->putNumAtCurMin(numAtCurMin);
|
181
179
|
|
182
|
-
|
180
|
+
read(is, sketch->hllByteArr_.data(), sketch->getHllByteArrBytes());
|
183
181
|
|
184
182
|
if (auxCount > 0) { // necessarily TgtHllType == HLL_4
|
185
|
-
|
183
|
+
uint8_t auxLgIntArrSize = listHeader[4];
|
186
184
|
AuxHashMap<A>* auxHashMap = AuxHashMap<A>::deserialize(is, lgK, auxCount, auxLgIntArrSize, comapctFlag, allocator);
|
187
185
|
((Hll4Array<A>*)sketch)->putAuxHashMap(auxHashMap);
|
188
186
|
}
|
@@ -200,31 +198,31 @@ vector_u8<A> HllArray<A>::serialize(bool compact, unsigned header_size_bytes) co
|
|
200
198
|
uint8_t* bytes = byteArr.data() + header_size_bytes;
|
201
199
|
AuxHashMap<A>* auxHashMap = getAuxHashMap();
|
202
200
|
|
203
|
-
bytes[
|
204
|
-
bytes[
|
205
|
-
bytes[
|
206
|
-
bytes[
|
207
|
-
bytes[
|
208
|
-
bytes[
|
209
|
-
bytes[
|
210
|
-
bytes[
|
211
|
-
|
212
|
-
std::memcpy(bytes +
|
213
|
-
std::memcpy(bytes +
|
214
|
-
std::memcpy(bytes +
|
215
|
-
std::memcpy(bytes +
|
216
|
-
const
|
217
|
-
std::memcpy(bytes +
|
218
|
-
|
219
|
-
const
|
220
|
-
std::memcpy(bytes + getMemDataStart(),
|
201
|
+
bytes[hll_constants::PREAMBLE_INTS_BYTE] = static_cast<uint8_t>(getPreInts());
|
202
|
+
bytes[hll_constants::SER_VER_BYTE] = static_cast<uint8_t>(hll_constants::SER_VER);
|
203
|
+
bytes[hll_constants::FAMILY_BYTE] = static_cast<uint8_t>(hll_constants::FAMILY_ID);
|
204
|
+
bytes[hll_constants::LG_K_BYTE] = static_cast<uint8_t>(this->lgConfigK_);
|
205
|
+
bytes[hll_constants::LG_ARR_BYTE] = static_cast<uint8_t>(auxHashMap == nullptr ? 0 : auxHashMap->getLgAuxArrInts());
|
206
|
+
bytes[hll_constants::FLAGS_BYTE] = this->makeFlagsByte(compact);
|
207
|
+
bytes[hll_constants::HLL_CUR_MIN_BYTE] = static_cast<uint8_t>(curMin_);
|
208
|
+
bytes[hll_constants::MODE_BYTE] = this->makeModeByte();
|
209
|
+
|
210
|
+
std::memcpy(bytes + hll_constants::HIP_ACCUM_DOUBLE, &hipAccum_, sizeof(double));
|
211
|
+
std::memcpy(bytes + hll_constants::KXQ0_DOUBLE, &kxq0_, sizeof(double));
|
212
|
+
std::memcpy(bytes + hll_constants::KXQ1_DOUBLE, &kxq1_, sizeof(double));
|
213
|
+
std::memcpy(bytes + hll_constants::CUR_MIN_COUNT_INT, &numAtCurMin_, sizeof(uint32_t));
|
214
|
+
const uint32_t auxCount = (auxHashMap == nullptr ? 0 : auxHashMap->getAuxCount());
|
215
|
+
std::memcpy(bytes + hll_constants::AUX_COUNT_INT, &auxCount, sizeof(uint32_t));
|
216
|
+
|
217
|
+
const uint32_t hllByteArrBytes = getHllByteArrBytes();
|
218
|
+
std::memcpy(bytes + getMemDataStart(), hllByteArr_.data(), hllByteArrBytes);
|
221
219
|
|
222
220
|
// aux map if HLL_4
|
223
|
-
if (this->
|
221
|
+
if (this->tgtHllType_ == HLL_4) {
|
224
222
|
bytes += getMemDataStart() + hllByteArrBytes; // start of auxHashMap
|
225
223
|
if (auxHashMap != nullptr) {
|
226
224
|
if (compact) {
|
227
|
-
for (uint32_t coupon: *auxHashMap) {
|
225
|
+
for (const uint32_t coupon: *auxHashMap) {
|
228
226
|
std::memcpy(bytes, &coupon, sizeof(coupon));
|
229
227
|
bytes += sizeof(coupon);
|
230
228
|
}
|
@@ -233,8 +231,8 @@ vector_u8<A> HllArray<A>::serialize(bool compact, unsigned header_size_bytes) co
|
|
233
231
|
}
|
234
232
|
} else if (!compact) {
|
235
233
|
// if updatable, we write even if currently unused so the binary can be wrapped
|
236
|
-
|
237
|
-
std::fill_n(bytes, auxBytes, 0);
|
234
|
+
uint32_t auxBytes = 4 << hll_constants::LG_AUX_ARR_INTS[this->lgConfigK_];
|
235
|
+
std::fill_n(bytes, auxBytes, static_cast<uint8_t>(0));
|
238
236
|
}
|
239
237
|
}
|
240
238
|
|
@@ -242,64 +240,63 @@ vector_u8<A> HllArray<A>::serialize(bool compact, unsigned header_size_bytes) co
|
|
242
240
|
}
|
243
241
|
|
244
242
|
template<typename A>
|
245
|
-
void HllArray<A>::serialize(std::ostream& os,
|
243
|
+
void HllArray<A>::serialize(std::ostream& os, bool compact) const {
|
246
244
|
// header
|
247
|
-
const uint8_t preInts
|
248
|
-
|
249
|
-
const uint8_t serialVersion
|
250
|
-
|
251
|
-
const uint8_t familyId
|
252
|
-
|
253
|
-
const uint8_t lgKByte
|
254
|
-
|
245
|
+
const uint8_t preInts = getPreInts();
|
246
|
+
write(os, preInts);
|
247
|
+
const uint8_t serialVersion = hll_constants::SER_VER;
|
248
|
+
write(os, serialVersion);
|
249
|
+
const uint8_t familyId = hll_constants::FAMILY_ID;
|
250
|
+
write(os, familyId);
|
251
|
+
const uint8_t lgKByte = this->lgConfigK_;
|
252
|
+
write(os, lgKByte);
|
255
253
|
|
256
254
|
AuxHashMap<A>* auxHashMap = getAuxHashMap();
|
257
|
-
uint8_t lgArrByte
|
255
|
+
uint8_t lgArrByte = 0;
|
258
256
|
if (auxHashMap != nullptr) {
|
259
257
|
lgArrByte = auxHashMap->getLgAuxArrInts();
|
260
258
|
}
|
261
|
-
|
259
|
+
write(os, lgArrByte);
|
262
260
|
|
263
|
-
const uint8_t flagsByte
|
264
|
-
|
265
|
-
|
266
|
-
|
267
|
-
|
268
|
-
os.write((char*)&modeByte, sizeof(modeByte));
|
261
|
+
const uint8_t flagsByte = this->makeFlagsByte(compact);
|
262
|
+
write(os, flagsByte);
|
263
|
+
write(os, curMin_);
|
264
|
+
const uint8_t modeByte = this->makeModeByte();
|
265
|
+
write(os, modeByte);
|
269
266
|
|
270
267
|
// estimator data
|
271
|
-
|
272
|
-
|
273
|
-
|
268
|
+
write(os, hipAccum_);
|
269
|
+
write(os, kxq0_);
|
270
|
+
write(os, kxq1_);
|
274
271
|
|
275
272
|
// array data
|
276
|
-
|
273
|
+
write(os, numAtCurMin_);
|
277
274
|
|
278
|
-
const
|
279
|
-
|
280
|
-
|
275
|
+
const uint32_t auxCount = (auxHashMap == nullptr ? 0 : auxHashMap->getAuxCount());
|
276
|
+
write(os, auxCount);
|
277
|
+
write(os, hllByteArr_.data(), getHllByteArrBytes());
|
281
278
|
|
282
279
|
// aux map if HLL_4
|
283
|
-
if (this->
|
280
|
+
if (this->tgtHllType_ == HLL_4) {
|
284
281
|
if (auxHashMap != nullptr) {
|
285
282
|
if (compact) {
|
286
|
-
for (uint32_t coupon: *auxHashMap) {
|
287
|
-
|
283
|
+
for (const uint32_t coupon: *auxHashMap) {
|
284
|
+
write(os, coupon);
|
288
285
|
}
|
289
286
|
} else {
|
290
|
-
|
287
|
+
write(os, auxHashMap->getAuxIntArr(), auxHashMap->getUpdatableSizeBytes());
|
291
288
|
}
|
292
289
|
} else if (!compact) {
|
293
290
|
// if updatable, we write even if currently unused so the binary can be wrapped
|
294
|
-
|
295
|
-
std::fill_n(std::ostreambuf_iterator<char>(os), auxBytes, 0);
|
291
|
+
uint32_t auxBytes = 4 << hll_constants::LG_AUX_ARR_INTS[this->lgConfigK_];
|
292
|
+
std::fill_n(std::ostreambuf_iterator<char>(os), auxBytes, static_cast<char>(0));
|
296
293
|
}
|
297
294
|
}
|
298
295
|
}
|
299
296
|
|
300
297
|
template<typename A>
|
301
298
|
double HllArray<A>::getEstimate() const {
|
302
|
-
if (
|
299
|
+
if (oooFlag_) {
|
303
300
|
return getCompositeEstimate();
|
304
301
|
}
|
305
302
|
return getHipAccum();
|
@@ -321,50 +318,50 @@ double HllArray<A>::getEstimate() const {
|
|
321
318
|
* the very small values <= k where curMin = 0 still apply.
|
322
319
|
*/
|
323
320
|
template<typename A>
|
324
|
-
double HllArray<A>::getLowerBound(
|
321
|
+
double HllArray<A>::getLowerBound(uint8_t numStdDev) const {
|
325
322
|
HllUtil<A>::checkNumStdDev(numStdDev);
|
326
|
-
const
|
327
|
-
const double numNonZeros = ((
|
323
|
+
const uint32_t configK = 1 << this->lgConfigK_;
|
324
|
+
const double numNonZeros = ((curMin_ == 0) ? (configK - numAtCurMin_) : configK);
|
328
325
|
|
329
326
|
double estimate;
|
330
327
|
double rseFactor;
|
331
|
-
if (
|
328
|
+
if (oooFlag_) {
|
332
329
|
estimate = getCompositeEstimate();
|
333
|
-
rseFactor =
|
330
|
+
rseFactor = hll_constants::HLL_NON_HIP_RSE_FACTOR;
|
334
331
|
} else {
|
335
|
-
estimate =
|
336
|
-
rseFactor =
|
332
|
+
estimate = hipAccum_;
|
333
|
+
rseFactor = hll_constants::HLL_HIP_RSE_FACTOR;
|
337
334
|
}
|
338
335
|
|
339
336
|
double relErr;
|
340
|
-
if (this->
|
337
|
+
if (this->lgConfigK_ > 12) {
|
341
338
|
relErr = (numStdDev * rseFactor) / sqrt(configK);
|
342
339
|
} else {
|
343
|
-
relErr = HllUtil<A>::getRelErr(false,
|
340
|
+
relErr = HllUtil<A>::getRelErr(false, oooFlag_, this->lgConfigK_, numStdDev);
|
344
341
|
}
|
345
342
|
return fmax(estimate / (1.0 + relErr), numNonZeros);
|
346
343
|
}
|
347
344
|
|
348
345
|
template<typename A>
|
349
|
-
double HllArray<A>::getUpperBound(
|
346
|
+
double HllArray<A>::getUpperBound(uint8_t numStdDev) const {
|
350
347
|
HllUtil<A>::checkNumStdDev(numStdDev);
|
351
|
-
const
|
348
|
+
const uint32_t configK = 1 << this->lgConfigK_;
|
352
349
|
|
353
350
|
double estimate;
|
354
351
|
double rseFactor;
|
355
|
-
if (
|
352
|
+
if (oooFlag_) {
|
356
353
|
estimate = getCompositeEstimate();
|
357
|
-
rseFactor =
|
354
|
+
rseFactor = hll_constants::HLL_NON_HIP_RSE_FACTOR;
|
358
355
|
} else {
|
359
|
-
estimate =
|
360
|
-
rseFactor =
|
356
|
+
estimate = hipAccum_;
|
357
|
+
rseFactor = hll_constants::HLL_HIP_RSE_FACTOR;
|
361
358
|
}
|
362
359
|
|
363
360
|
double relErr;
|
364
|
-
if (this->
|
361
|
+
if (this->lgConfigK_ > 12) {
|
365
362
|
relErr = (-1.0) * (numStdDev * rseFactor) / sqrt(configK);
|
366
363
|
} else {
|
367
|
-
relErr = HllUtil<A>::getRelErr(true,
|
364
|
+
relErr = HllUtil<A>::getRelErr(true, oooFlag_, this->lgConfigK_, numStdDev);
|
368
365
|
}
|
369
366
|
return estimate / (1.0 + relErr);
|
370
367
|
}
|
@@ -378,21 +375,21 @@ double HllArray<A>::getUpperBound(const int numStdDev) const {
|
|
378
375
|
// Original C: again-two-registers.c hhb_get_composite_estimate L1489
|
379
376
|
template<typename A>
|
380
377
|
double HllArray<A>::getCompositeEstimate() const {
|
381
|
-
const double rawEst = getHllRawEstimate(
|
378
|
+
const double rawEst = getHllRawEstimate();
|
382
379
|
|
383
|
-
const double* xArr = CompositeInterpolationXTable<A>::get_x_arr(this->
|
384
|
-
const
|
385
|
-
const double yStride = CompositeInterpolationXTable<A>::get_y_stride(this->
|
380
|
+
const double* xArr = CompositeInterpolationXTable<A>::get_x_arr(this->lgConfigK_);
|
381
|
+
const uint32_t xArrLen = CompositeInterpolationXTable<A>::get_x_arr_length();
|
382
|
+
const double yStride = CompositeInterpolationXTable<A>::get_y_stride(this->lgConfigK_);
|
386
383
|
|
387
384
|
if (rawEst < xArr[0]) {
|
388
385
|
return 0;
|
389
386
|
}
|
390
387
|
|
391
|
-
const
|
388
|
+
const uint32_t xArrLenM1 = xArrLen - 1;
|
392
389
|
|
393
390
|
if (rawEst > xArr[xArrLenM1]) {
|
394
|
-
double finalY = yStride * xArrLenM1;
|
395
|
-
double factor = finalY / xArr[xArrLenM1];
|
391
|
+
const double finalY = yStride * xArrLenM1;
|
392
|
+
const double factor = finalY / xArr[xArrLenM1];
|
396
393
|
return rawEst * factor;
|
397
394
|
}
|
398
395
|
|
@@ -401,10 +398,9 @@ double HllArray<A>::getCompositeEstimate() const {
|
|
401
398
|
// We need to completely avoid the linear_counting estimator if it might have a crazy value.
|
402
399
|
// Empirical evidence suggests that the threshold 3*k will keep us safe if 2^4 <= k <= 2^21.
|
403
400
|
|
404
|
-
if (adjEst > (3 << this->
|
401
|
+
if (adjEst > (3 << this->lgConfigK_)) { return adjEst; }
|
405
402
|
|
406
|
-
const double linEst =
|
407
|
-
getHllBitMapEstimate(this->lgConfigK, curMin, numAtCurMin);
|
403
|
+
const double linEst = getHllBitMapEstimate();
|
408
404
|
|
409
405
|
// Bias is created when the value of an estimator is compared with a threshold to decide whether
|
410
406
|
// to use that estimator or a different one.
|
@@ -416,70 +412,70 @@ double HllArray<A>::getCompositeEstimate() const {
|
|
416
412
|
// The following constants comes from empirical measurements of the crossover point
|
417
413
|
// between the average error of the linear estimator and the adjusted hll estimator
|
418
414
|
double crossOver = 0.64;
|
419
|
-
if (this->
|
420
|
-
else if (this->
|
415
|
+
if (this->lgConfigK_ == 4) { crossOver = 0.718; }
|
416
|
+
else if (this->lgConfigK_ == 5) { crossOver = 0.672; }
|
421
417
|
|
422
|
-
return (avgEst > (crossOver * (1 << this->
|
418
|
+
return (avgEst > (crossOver * (1 << this->lgConfigK_))) ? adjEst : linEst;
|
423
419
|
}
|
424
420
|
|
425
421
|
template<typename A>
|
426
422
|
double HllArray<A>::getKxQ0() const {
|
427
|
-
return
|
423
|
+
return kxq0_;
|
428
424
|
}
|
429
425
|
|
430
426
|
template<typename A>
|
431
427
|
double HllArray<A>::getKxQ1() const {
|
432
|
-
return
|
428
|
+
return kxq1_;
|
433
429
|
}
|
434
430
|
|
435
431
|
template<typename A>
|
436
432
|
double HllArray<A>::getHipAccum() const {
|
437
|
-
return
|
433
|
+
return hipAccum_;
|
438
434
|
}
|
439
435
|
|
440
436
|
template<typename A>
|
441
|
-
|
442
|
-
return
|
437
|
+
uint8_t HllArray<A>::getCurMin() const {
|
438
|
+
return curMin_;
|
443
439
|
}
|
444
440
|
|
445
441
|
template<typename A>
|
446
|
-
|
447
|
-
return
|
442
|
+
uint32_t HllArray<A>::getNumAtCurMin() const {
|
443
|
+
return numAtCurMin_;
|
448
444
|
}
|
449
445
|
|
450
446
|
template<typename A>
|
451
|
-
void HllArray<A>::putKxQ0(
|
452
|
-
|
447
|
+
void HllArray<A>::putKxQ0(double kxq0) {
|
448
|
+
kxq0_ = kxq0;
|
453
449
|
}
|
454
450
|
|
455
451
|
template<typename A>
|
456
|
-
void HllArray<A>::putKxQ1(
|
457
|
-
|
452
|
+
void HllArray<A>::putKxQ1(double kxq1) {
|
453
|
+
kxq1_ = kxq1;
|
458
454
|
}
|
459
455
|
|
460
456
|
template<typename A>
|
461
|
-
void HllArray<A>::putHipAccum(
|
462
|
-
|
457
|
+
void HllArray<A>::putHipAccum(double hipAccum) {
|
458
|
+
hipAccum_ = hipAccum;
|
463
459
|
}
|
464
460
|
|
465
461
|
template<typename A>
|
466
|
-
void HllArray<A>::putCurMin(
|
467
|
-
|
462
|
+
void HllArray<A>::putCurMin(uint8_t curMin) {
|
463
|
+
curMin_ = curMin;
|
468
464
|
}
|
469
465
|
|
470
466
|
template<typename A>
|
471
|
-
void HllArray<A>::putNumAtCurMin(
|
472
|
-
|
467
|
+
void HllArray<A>::putNumAtCurMin(uint32_t numAtCurMin) {
|
468
|
+
numAtCurMin_ = numAtCurMin;
|
473
469
|
}
|
474
470
|
|
475
471
|
template<typename A>
|
476
472
|
void HllArray<A>::decNumAtCurMin() {
|
477
|
-
--
|
473
|
+
--numAtCurMin_;
|
478
474
|
}
|
479
475
|
|
480
476
|
template<typename A>
|
481
|
-
void HllArray<A>::addToHipAccum(
|
482
|
-
|
477
|
+
void HllArray<A>::addToHipAccum(double delta) {
|
478
|
+
hipAccum_ += delta;
|
483
479
|
}
|
484
480
|
|
485
481
|
template<typename A>
|
@@ -489,22 +485,22 @@ bool HllArray<A>::isCompact() const {
|
|
489
485
|
|
490
486
|
template<typename A>
|
491
487
|
bool HllArray<A>::isEmpty() const {
|
492
|
-
const
|
488
|
+
const uint32_t configK = 1 << this->lgConfigK_;
|
493
489
|
return (getCurMin() == 0) && (getNumAtCurMin() == configK);
|
494
490
|
}
|
495
491
|
|
496
492
|
template<typename A>
|
497
493
|
void HllArray<A>::putOutOfOrderFlag(bool flag) {
|
498
|
-
|
494
|
+
oooFlag_ = flag;
|
499
495
|
}
|
500
496
|
|
501
497
|
template<typename A>
|
502
498
|
bool HllArray<A>::isOutOfOrderFlag() const {
|
503
|
-
return
|
499
|
+
return oooFlag_;
|
504
500
|
}
|
505
501
|
|
506
502
|
template<typename A>
|
507
|
-
|
503
|
+
uint32_t HllArray<A>::hllArrBytes(target_hll_type tgtHllType, uint8_t lgConfigK) {
|
508
504
|
switch (tgtHllType) {
|
509
505
|
case HLL_4:
|
510
506
|
return hll4ArrBytes(lgConfigK);
|
@@ -518,41 +514,41 @@ int HllArray<A>::hllArrBytes(target_hll_type tgtHllType, int lgConfigK) {
|
|
518
514
|
}
|
519
515
|
|
520
516
|
template<typename A>
|
521
|
-
|
517
|
+
uint32_t HllArray<A>::hll4ArrBytes(uint8_t lgConfigK) {
|
522
518
|
return 1 << (lgConfigK - 1);
|
523
519
|
}
|
524
520
|
|
525
521
|
template<typename A>
|
526
|
-
|
527
|
-
const
|
522
|
+
uint32_t HllArray<A>::hll6ArrBytes(uint8_t lgConfigK) {
|
523
|
+
const uint32_t numSlots = 1 << lgConfigK;
|
528
524
|
return ((numSlots * 3) >> 2) + 1;
|
529
525
|
}
|
530
526
|
|
531
527
|
template<typename A>
|
532
|
-
|
528
|
+
uint32_t HllArray<A>::hll8ArrBytes(uint8_t lgConfigK) {
|
533
529
|
return 1 << lgConfigK;
|
534
530
|
}
|
535
531
|
|
536
532
|
template<typename A>
|
537
|
-
|
538
|
-
return
|
533
|
+
uint32_t HllArray<A>::getMemDataStart() const {
|
534
|
+
return hll_constants::HLL_BYTE_ARR_START;
|
539
535
|
}
|
540
536
|
|
541
537
|
template<typename A>
|
542
|
-
|
543
|
-
return
|
538
|
+
uint32_t HllArray<A>::getUpdatableSerializationBytes() const {
|
539
|
+
return hll_constants::HLL_BYTE_ARR_START + getHllByteArrBytes();
|
544
540
|
}
|
545
541
|
|
546
542
|
template<typename A>
|
547
|
-
|
543
|
+
uint32_t HllArray<A>::getCompactSerializationBytes() const {
|
548
544
|
AuxHashMap<A>* auxHashMap = getAuxHashMap();
|
549
|
-
const
|
550
|
-
return
|
545
|
+
const uint32_t auxCountBytes = ((auxHashMap == nullptr) ? 0 : auxHashMap->getCompactSizeBytes());
|
546
|
+
return hll_constants::HLL_BYTE_ARR_START + getHllByteArrBytes() + auxCountBytes;
|
551
547
|
}
|
552
548
|
|
553
549
|
template<typename A>
|
554
|
-
|
555
|
-
return
|
550
|
+
uint8_t HllArray<A>::getPreInts() const {
|
551
|
+
return hll_constants::HLL_PREINTS;
|
556
552
|
}
|
557
553
|
|
558
554
|
template<typename A>
|
@@ -562,14 +558,14 @@ AuxHashMap<A>* HllArray<A>::getAuxHashMap() const {
|
|
562
558
|
|
563
559
|
template<typename A>
|
564
560
|
void HllArray<A>::hipAndKxQIncrementalUpdate(uint8_t oldValue, uint8_t newValue) {
|
565
|
-
const
|
561
|
+
const uint32_t configK = 1 << this->getLgConfigK();
|
566
562
|
// update hip BEFORE updating kxq
|
567
|
-
if (!
|
563
|
+
if (!oooFlag_) hipAccum_ += configK / (kxq0_ + kxq1_);
|
568
564
|
// update kxq0 and kxq1; subtract first, then add
|
569
|
-
if (oldValue < 32) {
|
570
|
-
else {
|
571
|
-
if (newValue < 32) {
|
572
|
-
else {
|
565
|
+
if (oldValue < 32) { kxq0_ -= INVERSE_POWERS_OF_2[oldValue]; }
|
566
|
+
else { kxq1_ -= INVERSE_POWERS_OF_2[oldValue]; }
|
567
|
+
if (newValue < 32) { kxq0_ += INVERSE_POWERS_OF_2[newValue]; }
|
568
|
+
else { kxq1_ += INVERSE_POWERS_OF_2[newValue]; }
|
573
569
|
}
|
574
570
|
|
575
571
|
/**
|
@@ -579,91 +575,91 @@ void HllArray<A>::hipAndKxQIncrementalUpdate(uint8_t oldValue, uint8_t newValue)
|
|
579
575
|
*/
|
580
576
|
//In C: again-two-registers.c hhb_get_improved_linear_counting_estimate L1274
|
581
577
|
template<typename A>
|
582
|
-
double HllArray<A>::getHllBitMapEstimate(
|
583
|
-
const
|
584
|
-
const
|
578
|
+
double HllArray<A>::getHllBitMapEstimate() const {
|
579
|
+
const uint32_t configK = 1 << this->lgConfigK_;
|
580
|
+
const uint32_t numUnhitBuckets = curMin_ == 0 ? numAtCurMin_ : 0;
|
585
581
|
|
586
582
|
//This will eventually go away.
|
587
583
|
if (numUnhitBuckets == 0) {
|
588
584
|
return configK * log(configK / 0.5);
|
589
585
|
}
|
590
586
|
|
591
|
-
const
|
587
|
+
const uint32_t numHitBuckets = configK - numUnhitBuckets;
|
592
588
|
return HarmonicNumbers<A>::getBitMapEstimate(configK, numHitBuckets);
|
593
589
|
}
|
594
590
|
|
595
591
|
//In C: again-two-registers.c hhb_get_raw_estimate L1167
|
596
592
|
template<typename A>
|
597
|
-
double HllArray<A>::getHllRawEstimate(
|
598
|
-
const
|
593
|
+
double HllArray<A>::getHllRawEstimate() const {
|
594
|
+
const uint32_t configK = 1 << this->lgConfigK_;
|
599
595
|
double correctionFactor;
|
600
|
-
if (
|
601
|
-
else if (
|
602
|
-
else if (
|
596
|
+
if (this->lgConfigK_ == 4) { correctionFactor = 0.673; }
|
597
|
+
else if (this->lgConfigK_ == 5) { correctionFactor = 0.697; }
|
598
|
+
else if (this->lgConfigK_ == 6) { correctionFactor = 0.709; }
|
603
599
|
else { correctionFactor = 0.7213 / (1.0 + (1.079 / configK)); }
|
604
|
-
const double hyperEst = (correctionFactor * configK * configK) /
|
600
|
+
const double hyperEst = (correctionFactor * configK * configK) / (kxq0_ + kxq1_);
|
605
601
|
return hyperEst;
|
606
602
|
}
|
607
603
|
|
608
604
|
template<typename A>
|
609
605
|
typename HllArray<A>::const_iterator HllArray<A>::begin(bool all) const {
|
610
|
-
return const_iterator(
|
606
|
+
return const_iterator(hllByteArr_.data(), 1 << this->lgConfigK_, 0, this->tgtHllType_, nullptr, 0, all);
|
611
607
|
}
|
612
608
|
|
613
609
|
template<typename A>
|
614
610
|
typename HllArray<A>::const_iterator HllArray<A>::end() const {
|
615
|
-
return const_iterator(
|
611
|
+
return const_iterator(hllByteArr_.data(), 1 << this->lgConfigK_, 1 << this->lgConfigK_, this->tgtHllType_, nullptr, 0, false);
|
616
612
|
}
|
617
613
|
|
618
614
|
template<typename A>
|
619
|
-
HllArray<A>::const_iterator::const_iterator(const uint8_t* array,
|
620
|
-
|
615
|
+
HllArray<A>::const_iterator::const_iterator(const uint8_t* array, uint32_t array_size, uint32_t index, target_hll_type hll_type, const AuxHashMap<A>* exceptions, uint8_t offset, bool all):
|
616
|
+
array_(array), array_size_(array_size), index_(index), hll_type_(hll_type), exceptions_(exceptions), offset_(offset), all_(all)
|
621
617
|
{
|
622
|
-
while (
|
623
|
-
|
624
|
-
if (
|
625
|
-
|
618
|
+
while (index_ < array_size_) {
|
619
|
+
value_ = get_value(array_, index_, hll_type_, exceptions_, offset_);
|
620
|
+
if (all_ || value_ != hll_constants::EMPTY) break;
|
621
|
+
++index_;
|
626
622
|
}
|
627
623
|
}
|
628
624
|
|
629
625
|
template<typename A>
|
630
626
|
typename HllArray<A>::const_iterator& HllArray<A>::const_iterator::operator++() {
|
631
|
-
while (++
|
632
|
-
|
633
|
-
if (
|
627
|
+
while (++index_ < array_size_) {
|
628
|
+
value_ = get_value(array_, index_, hll_type_, exceptions_, offset_);
|
629
|
+
if (all_ || value_ != hll_constants::EMPTY) break;
|
634
630
|
}
|
635
631
|
return *this;
|
636
632
|
}
|
637
633
|
|
638
634
|
template<typename A>
|
639
635
|
bool HllArray<A>::const_iterator::operator!=(const const_iterator& other) const {
|
640
|
-
return
|
636
|
+
return index_ != other.index_;
|
641
637
|
}
|
642
638
|
|
643
639
|
template<typename A>
|
644
640
|
uint32_t HllArray<A>::const_iterator::operator*() const {
|
645
|
-
return HllUtil<A>::pair(
|
641
|
+
return HllUtil<A>::pair(index_, value_);
|
646
642
|
}
|
647
643
|
|
648
644
|
template<typename A>
|
649
|
-
uint8_t HllArray<A>::const_iterator::get_value(const uint8_t* array,
|
645
|
+
uint8_t HllArray<A>::const_iterator::get_value(const uint8_t* array, uint32_t index, target_hll_type hll_type, const AuxHashMap<A>* exceptions, uint8_t offset) {
|
650
646
|
if (hll_type == target_hll_type::HLL_4) {
|
651
647
|
uint8_t value = array[index >> 1];
|
652
648
|
if ((index & 1) > 0) { // odd
|
653
649
|
value >>= 4;
|
654
650
|
} else {
|
655
|
-
value &=
|
651
|
+
value &= hll_constants::loNibbleMask;
|
656
652
|
}
|
657
|
-
if (value ==
|
653
|
+
if (value == hll_constants::AUX_TOKEN) { // exception
|
658
654
|
return exceptions->mustFindValueFor(index);
|
659
655
|
}
|
660
656
|
return value + offset;
|
661
657
|
} else if (hll_type == target_hll_type::HLL_6) {
|
662
|
-
const
|
663
|
-
const
|
664
|
-
const
|
658
|
+
const size_t start_bit = index * 6;
|
659
|
+
const uint8_t shift = start_bit & 0x7;
|
660
|
+
const size_t byte_idx = start_bit >> 3;
|
665
661
|
const uint16_t two_byte_val = (array[byte_idx + 1] << 8) | array[byte_idx];
|
666
|
-
return (two_byte_val >> shift) &
|
662
|
+
return (two_byte_val >> shift) & hll_constants::VAL_MASK_6;
|
667
663
|
}
|
668
664
|
// HLL_8
|
669
665
|
return array[index];
|
@@ -671,7 +667,7 @@ uint8_t HllArray<A>::const_iterator::get_value(const uint8_t* array, size_t inde
|
|
671
667
|
|
672
668
|
template<typename A>
|
673
669
|
A HllArray<A>::getAllocator() const {
|
674
|
-
return
|
670
|
+
return hllByteArr_.get_allocator();
|
675
671
|
}
|
676
672
|
|
677
673
|
}
|