datasketches 0.2.0 → 0.2.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +16 -0
- data/LICENSE +40 -3
- data/NOTICE +1 -1
- data/README.md +7 -7
- data/ext/datasketches/extconf.rb +1 -1
- data/ext/datasketches/theta_wrapper.cpp +20 -4
- data/lib/datasketches/version.rb +1 -1
- data/vendor/datasketches-cpp/CMakeLists.txt +31 -3
- data/vendor/datasketches-cpp/LICENSE +40 -3
- data/vendor/datasketches-cpp/MANIFEST.in +3 -0
- data/vendor/datasketches-cpp/NOTICE +1 -1
- data/vendor/datasketches-cpp/README.md +76 -9
- data/vendor/datasketches-cpp/cmake/DataSketchesConfig.cmake.in +10 -0
- data/vendor/datasketches-cpp/common/CMakeLists.txt +14 -13
- data/vendor/datasketches-cpp/common/include/MurmurHash3.h +11 -7
- data/vendor/datasketches-cpp/common/include/binomial_bounds.hpp +8 -8
- data/vendor/datasketches-cpp/common/include/bounds_binomial_proportions.hpp +12 -15
- data/vendor/datasketches-cpp/common/include/common_defs.hpp +26 -0
- data/vendor/datasketches-cpp/common/include/conditional_forward.hpp +20 -8
- data/vendor/datasketches-cpp/common/include/count_zeros.hpp +2 -2
- data/vendor/datasketches-cpp/common/include/serde.hpp +7 -7
- data/vendor/datasketches-cpp/cpc/CMakeLists.txt +15 -35
- data/vendor/datasketches-cpp/cpc/include/cpc_common.hpp +10 -3
- data/vendor/datasketches-cpp/cpc/include/cpc_compressor.hpp +19 -19
- data/vendor/datasketches-cpp/cpc/include/cpc_compressor_impl.hpp +91 -89
- data/vendor/datasketches-cpp/cpc/include/cpc_sketch.hpp +15 -2
- data/vendor/datasketches-cpp/cpc/include/cpc_sketch_impl.hpp +126 -90
- data/vendor/datasketches-cpp/cpc/include/cpc_union.hpp +1 -1
- data/vendor/datasketches-cpp/cpc/include/cpc_union_impl.hpp +22 -20
- data/vendor/datasketches-cpp/cpc/include/cpc_util.hpp +10 -10
- data/vendor/datasketches-cpp/cpc/include/icon_estimator.hpp +4 -4
- data/vendor/datasketches-cpp/cpc/include/u32_table.hpp +8 -8
- data/vendor/datasketches-cpp/cpc/include/u32_table_impl.hpp +14 -14
- data/vendor/datasketches-cpp/cpc/test/compression_test.cpp +10 -10
- data/vendor/datasketches-cpp/cpc/test/cpc_sketch_allocation_test.cpp +17 -0
- data/vendor/datasketches-cpp/cpc/test/cpc_sketch_test.cpp +25 -0
- data/vendor/datasketches-cpp/cpc/test/cpc_union_test.cpp +1 -1
- data/vendor/datasketches-cpp/fi/CMakeLists.txt +5 -15
- data/vendor/datasketches-cpp/fi/include/frequent_items_sketch_impl.hpp +69 -82
- data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map_impl.hpp +10 -10
- data/vendor/datasketches-cpp/fi/test/reverse_purge_hash_map_test.cpp +2 -2
- data/vendor/datasketches-cpp/hll/CMakeLists.txt +33 -56
- data/vendor/datasketches-cpp/hll/include/AuxHashMap-internal.hpp +60 -63
- data/vendor/datasketches-cpp/hll/include/AuxHashMap.hpp +19 -19
- data/vendor/datasketches-cpp/hll/include/CompositeInterpolationXTable-internal.hpp +15 -15
- data/vendor/datasketches-cpp/hll/include/CompositeInterpolationXTable.hpp +3 -3
- data/vendor/datasketches-cpp/hll/include/CouponHashSet-internal.hpp +74 -76
- data/vendor/datasketches-cpp/hll/include/CouponHashSet.hpp +6 -6
- data/vendor/datasketches-cpp/hll/include/CouponList-internal.hpp +110 -113
- data/vendor/datasketches-cpp/hll/include/CouponList.hpp +13 -13
- data/vendor/datasketches-cpp/hll/include/CubicInterpolation-internal.hpp +2 -4
- data/vendor/datasketches-cpp/hll/include/HarmonicNumbers-internal.hpp +1 -1
- data/vendor/datasketches-cpp/hll/include/Hll4Array-internal.hpp +80 -76
- data/vendor/datasketches-cpp/hll/include/Hll4Array.hpp +9 -9
- data/vendor/datasketches-cpp/hll/include/Hll6Array-internal.hpp +26 -26
- data/vendor/datasketches-cpp/hll/include/Hll6Array.hpp +6 -6
- data/vendor/datasketches-cpp/hll/include/Hll8Array-internal.hpp +33 -33
- data/vendor/datasketches-cpp/hll/include/Hll8Array.hpp +6 -6
- data/vendor/datasketches-cpp/hll/include/HllArray-internal.hpp +205 -209
- data/vendor/datasketches-cpp/hll/include/HllArray.hpp +36 -36
- data/vendor/datasketches-cpp/hll/include/HllSketch-internal.hpp +34 -32
- data/vendor/datasketches-cpp/hll/include/HllSketchImpl-internal.hpp +22 -22
- data/vendor/datasketches-cpp/hll/include/HllSketchImpl.hpp +13 -13
- data/vendor/datasketches-cpp/hll/include/HllSketchImplFactory.hpp +15 -15
- data/vendor/datasketches-cpp/hll/include/HllUnion-internal.hpp +61 -61
- data/vendor/datasketches-cpp/hll/include/HllUtil.hpp +120 -127
- data/vendor/datasketches-cpp/hll/include/coupon_iterator-internal.hpp +9 -9
- data/vendor/datasketches-cpp/hll/include/coupon_iterator.hpp +5 -5
- data/vendor/datasketches-cpp/hll/include/hll.hpp +21 -21
- data/vendor/datasketches-cpp/hll/test/AuxHashMapTest.cpp +1 -1
- data/vendor/datasketches-cpp/hll/test/CouponHashSetTest.cpp +34 -34
- data/vendor/datasketches-cpp/hll/test/CouponListTest.cpp +25 -25
- data/vendor/datasketches-cpp/hll/test/CrossCountingTest.cpp +2 -2
- data/vendor/datasketches-cpp/hll/test/HllArrayTest.cpp +35 -35
- data/vendor/datasketches-cpp/hll/test/HllSketchTest.cpp +15 -15
- data/vendor/datasketches-cpp/hll/test/HllUnionTest.cpp +10 -14
- data/vendor/datasketches-cpp/hll/test/IsomorphicTest.cpp +3 -3
- data/vendor/datasketches-cpp/hll/test/ToFromByteArrayTest.cpp +4 -4
- data/vendor/datasketches-cpp/kll/CMakeLists.txt +9 -19
- data/vendor/datasketches-cpp/kll/include/kll_helper.hpp +5 -4
- data/vendor/datasketches-cpp/kll/include/kll_helper_impl.hpp +6 -6
- data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator.hpp +14 -6
- data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator_impl.hpp +39 -24
- data/vendor/datasketches-cpp/kll/include/kll_sketch.hpp +41 -4
- data/vendor/datasketches-cpp/kll/include/kll_sketch_impl.hpp +76 -64
- data/vendor/datasketches-cpp/kll/include/kolmogorov_smirnov.hpp +67 -0
- data/vendor/datasketches-cpp/kll/include/kolmogorov_smirnov_impl.hpp +78 -0
- data/vendor/datasketches-cpp/kll/test/CMakeLists.txt +1 -0
- data/vendor/datasketches-cpp/kll/test/kll_sketch_test.cpp +133 -46
- data/vendor/datasketches-cpp/kll/test/kolmogorov_smirnov_test.cpp +111 -0
- data/vendor/datasketches-cpp/pyproject.toml +4 -2
- data/vendor/datasketches-cpp/python/CMakeLists.txt +10 -6
- data/vendor/datasketches-cpp/python/README.md +50 -50
- data/vendor/datasketches-cpp/python/pybind11Path.cmd +3 -0
- data/vendor/datasketches-cpp/python/src/cpc_wrapper.cpp +1 -1
- data/vendor/datasketches-cpp/python/src/hll_wrapper.cpp +4 -4
- data/vendor/datasketches-cpp/python/src/kll_wrapper.cpp +1 -1
- data/vendor/datasketches-cpp/python/src/theta_wrapper.cpp +8 -8
- data/vendor/datasketches-cpp/python/src/vector_of_kll.cpp +11 -5
- data/vendor/datasketches-cpp/python/src/vo_wrapper.cpp +2 -2
- data/vendor/datasketches-cpp/python/tests/hll_test.py +1 -1
- data/vendor/datasketches-cpp/python/tests/kll_test.py +2 -2
- data/vendor/datasketches-cpp/python/tests/req_test.py +2 -2
- data/vendor/datasketches-cpp/python/tests/vector_of_kll_test.py +4 -4
- data/vendor/datasketches-cpp/python/tests/vo_test.py +3 -3
- data/vendor/datasketches-cpp/req/CMakeLists.txt +8 -21
- data/vendor/datasketches-cpp/req/include/req_common.hpp +2 -1
- data/vendor/datasketches-cpp/req/include/req_compactor.hpp +4 -4
- data/vendor/datasketches-cpp/req/include/req_compactor_impl.hpp +26 -39
- data/vendor/datasketches-cpp/req/include/req_sketch.hpp +1 -1
- data/vendor/datasketches-cpp/req/include/req_sketch_impl.hpp +13 -11
- data/vendor/datasketches-cpp/req/test/req_sketch_test.cpp +52 -52
- data/vendor/datasketches-cpp/sampling/CMakeLists.txt +5 -9
- data/vendor/datasketches-cpp/sampling/include/var_opt_sketch.hpp +10 -5
- data/vendor/datasketches-cpp/sampling/include/var_opt_sketch_impl.hpp +61 -64
- data/vendor/datasketches-cpp/sampling/include/var_opt_union_impl.hpp +42 -48
- data/vendor/datasketches-cpp/sampling/test/var_opt_sketch_test.cpp +6 -6
- data/vendor/datasketches-cpp/sampling/test/var_opt_union_test.cpp +13 -13
- data/vendor/datasketches-cpp/setup.py +10 -7
- data/vendor/datasketches-cpp/theta/CMakeLists.txt +26 -45
- data/vendor/datasketches-cpp/theta/include/bounds_on_ratios_in_sampled_sets.hpp +1 -1
- data/vendor/datasketches-cpp/theta/include/compact_theta_sketch_parser.hpp +67 -0
- data/vendor/datasketches-cpp/theta/include/compact_theta_sketch_parser_impl.hpp +137 -0
- data/vendor/datasketches-cpp/theta/include/theta_constants.hpp +9 -4
- data/vendor/datasketches-cpp/theta/include/theta_helpers.hpp +15 -0
- data/vendor/datasketches-cpp/theta/include/theta_intersection.hpp +9 -4
- data/vendor/datasketches-cpp/theta/include/theta_intersection_base_impl.hpp +6 -6
- data/vendor/datasketches-cpp/theta/include/theta_intersection_impl.hpp +1 -1
- data/vendor/datasketches-cpp/theta/include/theta_jaccard_similarity_base.hpp +18 -14
- data/vendor/datasketches-cpp/theta/include/theta_set_difference_base_impl.hpp +2 -2
- data/vendor/datasketches-cpp/theta/include/theta_sketch.hpp +73 -15
- data/vendor/datasketches-cpp/theta/include/theta_sketch_impl.hpp +247 -103
- data/vendor/datasketches-cpp/theta/include/theta_union.hpp +10 -5
- data/vendor/datasketches-cpp/theta/include/theta_union_base.hpp +3 -1
- data/vendor/datasketches-cpp/theta/include/theta_union_base_impl.hpp +9 -3
- data/vendor/datasketches-cpp/theta/include/theta_union_impl.hpp +8 -5
- data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base.hpp +11 -5
- data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base_impl.hpp +70 -37
- data/vendor/datasketches-cpp/theta/test/CMakeLists.txt +1 -0
- data/vendor/datasketches-cpp/theta/test/theta_a_not_b_test.cpp +23 -1
- data/vendor/datasketches-cpp/theta/test/theta_compact_empty_from_java_v1.sk +0 -0
- data/vendor/datasketches-cpp/theta/test/theta_compact_empty_from_java_v2.sk +0 -0
- data/vendor/datasketches-cpp/theta/test/theta_compact_estimation_from_java_v1.sk +0 -0
- data/vendor/datasketches-cpp/theta/test/theta_compact_estimation_from_java_v2.sk +0 -0
- data/vendor/datasketches-cpp/theta/test/theta_compact_exact_from_java.sk +0 -0
- data/vendor/datasketches-cpp/theta/test/theta_intersection_test.cpp +21 -1
- data/vendor/datasketches-cpp/theta/test/theta_jaccard_similarity_test.cpp +58 -2
- data/vendor/datasketches-cpp/theta/test/theta_setop_test.cpp +445 -0
- data/vendor/datasketches-cpp/theta/test/theta_sketch_test.cpp +437 -1
- data/vendor/datasketches-cpp/theta/test/theta_union_test.cpp +41 -9
- data/vendor/datasketches-cpp/tuple/CMakeLists.txt +18 -33
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_sketch.hpp +1 -1
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_sketch_impl.hpp +50 -63
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_union.hpp +1 -1
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_union_impl.hpp +3 -3
- data/vendor/datasketches-cpp/tuple/include/tuple_sketch.hpp +13 -9
- data/vendor/datasketches-cpp/tuple/include/tuple_sketch_impl.hpp +84 -78
- data/vendor/datasketches-cpp/tuple/include/tuple_union.hpp +6 -1
- data/vendor/datasketches-cpp/tuple/include/tuple_union_impl.hpp +8 -3
- data/vendor/datasketches-cpp/tuple/test/array_of_doubles_sketch_test.cpp +17 -1
- data/vendor/datasketches-cpp/tuple/test/tuple_a_not_b_test.cpp +17 -17
- data/vendor/datasketches-cpp/tuple/test/tuple_intersection_test.cpp +12 -12
- data/vendor/datasketches-cpp/tuple/test/tuple_jaccard_similarity_test.cpp +5 -5
- data/vendor/datasketches-cpp/tuple/test/tuple_sketch_allocation_test.cpp +1 -1
- data/vendor/datasketches-cpp/tuple/test/tuple_sketch_test.cpp +66 -28
- data/vendor/datasketches-cpp/tuple/test/tuple_union_test.cpp +19 -12
- metadata +18 -7
- data/vendor/datasketches-cpp/theta/test/theta_update_empty_from_java.sk +0 -0
- data/vendor/datasketches-cpp/theta/test/theta_update_estimation_from_java.sk +0 -0
|
@@ -31,19 +31,19 @@
|
|
|
31
31
|
namespace datasketches {
|
|
32
32
|
|
|
33
33
|
template<typename A>
|
|
34
|
-
CouponList<A>::CouponList(
|
|
34
|
+
CouponList<A>::CouponList(uint8_t lgConfigK, target_hll_type tgtHllType, hll_mode mode, const A& allocator):
|
|
35
35
|
HllSketchImpl<A>(lgConfigK, tgtHllType, mode, false),
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
36
|
+
couponCount_(0),
|
|
37
|
+
oooFlag_(false),
|
|
38
|
+
coupons_(1ULL << (mode == hll_mode::LIST ? hll_constants::LG_INIT_LIST_SIZE : hll_constants::LG_INIT_SET_SIZE), 0, allocator)
|
|
39
39
|
{}
|
|
40
40
|
|
|
41
41
|
template<typename A>
|
|
42
42
|
CouponList<A>::CouponList(const CouponList& that, const target_hll_type tgtHllType):
|
|
43
|
-
HllSketchImpl<A>(that.
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
43
|
+
HllSketchImpl<A>(that.lgConfigK_, tgtHllType, that.mode_, false),
|
|
44
|
+
couponCount_(that.couponCount_),
|
|
45
|
+
oooFlag_(that.oooFlag_),
|
|
46
|
+
coupons_(that.coupons_)
|
|
47
47
|
{}
|
|
48
48
|
|
|
49
49
|
template<typename A>
|
|
@@ -58,48 +58,48 @@ std::function<void(HllSketchImpl<A>*)> CouponList<A>::get_deleter() const {
|
|
|
58
58
|
|
|
59
59
|
template<typename A>
|
|
60
60
|
CouponList<A>* CouponList<A>::copy() const {
|
|
61
|
-
ClAlloc cla(
|
|
61
|
+
ClAlloc cla(coupons_.get_allocator());
|
|
62
62
|
return new (cla.allocate(1)) CouponList<A>(*this);
|
|
63
63
|
}
|
|
64
64
|
|
|
65
65
|
template<typename A>
|
|
66
66
|
CouponList<A>* CouponList<A>::copyAs(target_hll_type tgtHllType) const {
|
|
67
|
-
ClAlloc cla(
|
|
67
|
+
ClAlloc cla(coupons_.get_allocator());
|
|
68
68
|
return new (cla.allocate(1)) CouponList<A>(*this, tgtHllType);
|
|
69
69
|
}
|
|
70
70
|
|
|
71
71
|
template<typename A>
|
|
72
72
|
CouponList<A>* CouponList<A>::newList(const void* bytes, size_t len, const A& allocator) {
|
|
73
|
-
if (len <
|
|
73
|
+
if (len < hll_constants::LIST_INT_ARR_START) {
|
|
74
74
|
throw std::out_of_range("Input data length insufficient to hold CouponHashSet");
|
|
75
75
|
}
|
|
76
76
|
|
|
77
77
|
const uint8_t* data = static_cast<const uint8_t*>(bytes);
|
|
78
|
-
if (data[
|
|
78
|
+
if (data[hll_constants::PREAMBLE_INTS_BYTE] != hll_constants::LIST_PREINTS) {
|
|
79
79
|
throw std::invalid_argument("Incorrect number of preInts in input stream");
|
|
80
80
|
}
|
|
81
|
-
if (data[
|
|
81
|
+
if (data[hll_constants::SER_VER_BYTE] != hll_constants::SER_VER) {
|
|
82
82
|
throw std::invalid_argument("Wrong ser ver in input stream");
|
|
83
83
|
}
|
|
84
|
-
if (data[
|
|
84
|
+
if (data[hll_constants::FAMILY_BYTE] != hll_constants::FAMILY_ID) {
|
|
85
85
|
throw std::invalid_argument("Input stream is not an HLL sketch");
|
|
86
86
|
}
|
|
87
87
|
|
|
88
|
-
hll_mode mode = HllSketchImpl<A>::extractCurMode(data[
|
|
88
|
+
hll_mode mode = HllSketchImpl<A>::extractCurMode(data[hll_constants::MODE_BYTE]);
|
|
89
89
|
if (mode != LIST) {
|
|
90
90
|
throw std::invalid_argument("Calling list constructor with non-list mode data");
|
|
91
91
|
}
|
|
92
92
|
|
|
93
|
-
target_hll_type tgtHllType = HllSketchImpl<A>::extractTgtHllType(data[
|
|
93
|
+
target_hll_type tgtHllType = HllSketchImpl<A>::extractTgtHllType(data[hll_constants::MODE_BYTE]);
|
|
94
94
|
|
|
95
|
-
const
|
|
96
|
-
const bool compact = ((data[
|
|
97
|
-
const bool oooFlag = ((data[
|
|
98
|
-
const bool emptyFlag = ((data[
|
|
95
|
+
const uint8_t lgK = data[hll_constants::LG_K_BYTE];
|
|
96
|
+
const bool compact = ((data[hll_constants::FLAGS_BYTE] & hll_constants::COMPACT_FLAG_MASK) ? true : false);
|
|
97
|
+
const bool oooFlag = ((data[hll_constants::FLAGS_BYTE] & hll_constants::OUT_OF_ORDER_FLAG_MASK) ? true : false);
|
|
98
|
+
const bool emptyFlag = ((data[hll_constants::FLAGS_BYTE] & hll_constants::EMPTY_FLAG_MASK) ? true : false);
|
|
99
99
|
|
|
100
|
-
const
|
|
101
|
-
const
|
|
102
|
-
const size_t expectedLength =
|
|
100
|
+
const uint32_t couponCount = data[hll_constants::LIST_COUNT_BYTE];
|
|
101
|
+
const uint32_t couponsInArray = (compact ? couponCount : (1 << HllUtil<A>::computeLgArrInts(LIST, couponCount, lgK)));
|
|
102
|
+
const size_t expectedLength = hll_constants::LIST_INT_ARR_START + (couponsInArray * sizeof(uint32_t));
|
|
103
103
|
if (len < expectedLength) {
|
|
104
104
|
throw std::out_of_range("Byte array too short for sketch. Expected " + std::to_string(expectedLength)
|
|
105
105
|
+ ", found: " + std::to_string(len));
|
|
@@ -107,12 +107,12 @@ CouponList<A>* CouponList<A>::newList(const void* bytes, size_t len, const A& al
|
|
|
107
107
|
|
|
108
108
|
ClAlloc cla(allocator);
|
|
109
109
|
CouponList<A>* sketch = new (cla.allocate(1)) CouponList<A>(lgK, tgtHllType, mode, allocator);
|
|
110
|
-
sketch->
|
|
110
|
+
sketch->couponCount_ = couponCount;
|
|
111
111
|
sketch->putOutOfOrderFlag(oooFlag); // should always be false for LIST
|
|
112
112
|
|
|
113
113
|
if (!emptyFlag) {
|
|
114
114
|
// only need to read valid coupons, unlike in stream case
|
|
115
|
-
std::memcpy(sketch->
|
|
115
|
+
std::memcpy(sketch->coupons_.data(), data + hll_constants::LIST_INT_ARR_START, couponCount * sizeof(uint32_t));
|
|
116
116
|
}
|
|
117
117
|
|
|
118
118
|
return sketch;
|
|
@@ -121,44 +121,44 @@ CouponList<A>* CouponList<A>::newList(const void* bytes, size_t len, const A& al
|
|
|
121
121
|
template<typename A>
|
|
122
122
|
CouponList<A>* CouponList<A>::newList(std::istream& is, const A& allocator) {
|
|
123
123
|
uint8_t listHeader[8];
|
|
124
|
-
|
|
124
|
+
read(is, listHeader, 8 * sizeof(uint8_t));
|
|
125
125
|
|
|
126
|
-
if (listHeader[
|
|
126
|
+
if (listHeader[hll_constants::PREAMBLE_INTS_BYTE] != hll_constants::LIST_PREINTS) {
|
|
127
127
|
throw std::invalid_argument("Incorrect number of preInts in input stream");
|
|
128
128
|
}
|
|
129
|
-
if (listHeader[
|
|
129
|
+
if (listHeader[hll_constants::SER_VER_BYTE] != hll_constants::SER_VER) {
|
|
130
130
|
throw std::invalid_argument("Wrong ser ver in input stream");
|
|
131
131
|
}
|
|
132
|
-
if (listHeader[
|
|
132
|
+
if (listHeader[hll_constants::FAMILY_BYTE] != hll_constants::FAMILY_ID) {
|
|
133
133
|
throw std::invalid_argument("Input stream is not an HLL sketch");
|
|
134
134
|
}
|
|
135
135
|
|
|
136
|
-
hll_mode mode = HllSketchImpl<A>::extractCurMode(listHeader[
|
|
136
|
+
hll_mode mode = HllSketchImpl<A>::extractCurMode(listHeader[hll_constants::MODE_BYTE]);
|
|
137
137
|
if (mode != LIST) {
|
|
138
138
|
throw std::invalid_argument("Calling list constructor with non-list mode data");
|
|
139
139
|
}
|
|
140
140
|
|
|
141
|
-
const target_hll_type tgtHllType = HllSketchImpl<A>::extractTgtHllType(listHeader[
|
|
141
|
+
const target_hll_type tgtHllType = HllSketchImpl<A>::extractTgtHllType(listHeader[hll_constants::MODE_BYTE]);
|
|
142
142
|
|
|
143
|
-
const
|
|
144
|
-
const bool compact = ((listHeader[
|
|
145
|
-
const bool oooFlag = ((listHeader[
|
|
146
|
-
const bool emptyFlag = ((listHeader[
|
|
143
|
+
const uint8_t lgK = listHeader[hll_constants::LG_K_BYTE];
|
|
144
|
+
const bool compact = ((listHeader[hll_constants::FLAGS_BYTE] & hll_constants::COMPACT_FLAG_MASK) ? true : false);
|
|
145
|
+
const bool oooFlag = ((listHeader[hll_constants::FLAGS_BYTE] & hll_constants::OUT_OF_ORDER_FLAG_MASK) ? true : false);
|
|
146
|
+
const bool emptyFlag = ((listHeader[hll_constants::FLAGS_BYTE] & hll_constants::EMPTY_FLAG_MASK) ? true : false);
|
|
147
147
|
|
|
148
148
|
ClAlloc cla(allocator);
|
|
149
149
|
CouponList<A>* sketch = new (cla.allocate(1)) CouponList<A>(lgK, tgtHllType, mode, allocator);
|
|
150
150
|
using coupon_list_ptr = std::unique_ptr<CouponList<A>, std::function<void(HllSketchImpl<A>*)>>;
|
|
151
151
|
coupon_list_ptr ptr(sketch, sketch->get_deleter());
|
|
152
|
-
const
|
|
153
|
-
sketch->
|
|
152
|
+
const uint32_t couponCount = listHeader[hll_constants::LIST_COUNT_BYTE];
|
|
153
|
+
sketch->couponCount_ = couponCount;
|
|
154
154
|
sketch->putOutOfOrderFlag(oooFlag); // should always be false for LIST
|
|
155
155
|
|
|
156
156
|
if (!emptyFlag) {
|
|
157
157
|
// For stream processing, need to read entire number written to stream so read
|
|
158
158
|
// pointer ends up set correctly.
|
|
159
159
|
// If not compact, still need to read empty items even though in order.
|
|
160
|
-
const
|
|
161
|
-
|
|
160
|
+
const uint32_t numToRead = (compact ? couponCount : static_cast<uint32_t>(sketch->coupons_.size()));
|
|
161
|
+
read(is, sketch->coupons_.data(), numToRead * sizeof(uint32_t));
|
|
162
162
|
}
|
|
163
163
|
|
|
164
164
|
if (!is.good())
|
|
@@ -173,17 +173,17 @@ vector_u8<A> CouponList<A>::serialize(bool compact, unsigned header_size_bytes)
|
|
|
173
173
|
vector_u8<A> byteArr(sketchSizeBytes, 0, getAllocator());
|
|
174
174
|
uint8_t* bytes = byteArr.data() + header_size_bytes;
|
|
175
175
|
|
|
176
|
-
bytes[
|
|
177
|
-
bytes[
|
|
178
|
-
bytes[
|
|
179
|
-
bytes[
|
|
180
|
-
bytes[
|
|
181
|
-
bytes[
|
|
182
|
-
bytes[
|
|
183
|
-
bytes[
|
|
184
|
-
|
|
185
|
-
if (this->
|
|
186
|
-
std::memcpy(bytes +
|
|
176
|
+
bytes[hll_constants::PREAMBLE_INTS_BYTE] = static_cast<uint8_t>(getPreInts());
|
|
177
|
+
bytes[hll_constants::SER_VER_BYTE] = static_cast<uint8_t>(hll_constants::SER_VER);
|
|
178
|
+
bytes[hll_constants::FAMILY_BYTE] = static_cast<uint8_t>(hll_constants::FAMILY_ID);
|
|
179
|
+
bytes[hll_constants::LG_K_BYTE] = static_cast<uint8_t>(this->lgConfigK_);
|
|
180
|
+
bytes[hll_constants::LG_ARR_BYTE] = count_trailing_zeros_in_u32(static_cast<uint32_t>(coupons_.size()));
|
|
181
|
+
bytes[hll_constants::FLAGS_BYTE] = this->makeFlagsByte(compact);
|
|
182
|
+
bytes[hll_constants::LIST_COUNT_BYTE] = static_cast<uint8_t>(this->mode_ == LIST ? couponCount_ : 0);
|
|
183
|
+
bytes[hll_constants::MODE_BYTE] = this->makeModeByte();
|
|
184
|
+
|
|
185
|
+
if (this->mode_ == SET) {
|
|
186
|
+
std::memcpy(bytes + hll_constants::HASH_SET_COUNT_INT, &couponCount_, sizeof(couponCount_));
|
|
187
187
|
}
|
|
188
188
|
|
|
189
189
|
// coupons
|
|
@@ -191,12 +191,12 @@ vector_u8<A> CouponList<A>::serialize(bool compact, unsigned header_size_bytes)
|
|
|
191
191
|
const int sw = (isCompact() ? 2 : 0) | (compact ? 1 : 0);
|
|
192
192
|
switch (sw) {
|
|
193
193
|
case 0: { // src updatable, dst updatable
|
|
194
|
-
std::memcpy(bytes + getMemDataStart(),
|
|
194
|
+
std::memcpy(bytes + getMemDataStart(), coupons_.data(), coupons_.size() * sizeof(uint32_t));
|
|
195
195
|
break;
|
|
196
196
|
}
|
|
197
197
|
case 1: { // src updatable, dst compact
|
|
198
198
|
bytes += getMemDataStart(); // reusing pointer for incremental writes
|
|
199
|
-
for (uint32_t coupon: *this) {
|
|
199
|
+
for (const uint32_t coupon: *this) {
|
|
200
200
|
std::memcpy(bytes, &coupon, sizeof(coupon));
|
|
201
201
|
bytes += sizeof(coupon);
|
|
202
202
|
}
|
|
@@ -213,33 +213,33 @@ vector_u8<A> CouponList<A>::serialize(bool compact, unsigned header_size_bytes)
|
|
|
213
213
|
template<typename A>
|
|
214
214
|
void CouponList<A>::serialize(std::ostream& os, const bool compact) const {
|
|
215
215
|
// header
|
|
216
|
-
const uint8_t preInts
|
|
217
|
-
|
|
218
|
-
const uint8_t serialVersion(
|
|
219
|
-
|
|
220
|
-
const uint8_t familyId(
|
|
221
|
-
|
|
222
|
-
const uint8_t lgKByte
|
|
223
|
-
|
|
224
|
-
const uint8_t lgArrIntsByte
|
|
225
|
-
|
|
226
|
-
const uint8_t flagsByte
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
if (this->
|
|
230
|
-
const uint8_t listCount(
|
|
231
|
-
|
|
216
|
+
const uint8_t preInts = getPreInts();
|
|
217
|
+
write(os, preInts);
|
|
218
|
+
const uint8_t serialVersion(hll_constants::SER_VER);
|
|
219
|
+
write(os, serialVersion);
|
|
220
|
+
const uint8_t familyId(hll_constants::FAMILY_ID);
|
|
221
|
+
write(os, familyId);
|
|
222
|
+
const uint8_t lgKByte = this->lgConfigK_;
|
|
223
|
+
write(os, lgKByte);
|
|
224
|
+
const uint8_t lgArrIntsByte = count_trailing_zeros_in_u32(static_cast<uint32_t>(coupons_.size()));
|
|
225
|
+
write(os, lgArrIntsByte);
|
|
226
|
+
const uint8_t flagsByte = this->makeFlagsByte(compact);
|
|
227
|
+
write(os, flagsByte);
|
|
228
|
+
|
|
229
|
+
if (this->mode_ == LIST) {
|
|
230
|
+
const uint8_t listCount = static_cast<uint8_t>(couponCount_);
|
|
231
|
+
write(os, listCount);
|
|
232
232
|
} else { // mode == SET
|
|
233
|
-
|
|
234
|
-
|
|
233
|
+
const uint8_t unused = 0;
|
|
234
|
+
write(os, unused);
|
|
235
235
|
}
|
|
236
236
|
|
|
237
|
-
const uint8_t modeByte
|
|
238
|
-
|
|
237
|
+
const uint8_t modeByte = this->makeModeByte();
|
|
238
|
+
write(os, modeByte);
|
|
239
239
|
|
|
240
|
-
if (this->
|
|
240
|
+
if (this->mode_ == SET) {
|
|
241
241
|
// writing as int, already stored as int
|
|
242
|
-
|
|
242
|
+
write(os, couponCount_);
|
|
243
243
|
}
|
|
244
244
|
|
|
245
245
|
// coupons
|
|
@@ -247,12 +247,12 @@ void CouponList<A>::serialize(std::ostream& os, const bool compact) const {
|
|
|
247
247
|
const int sw = (isCompact() ? 2 : 0) | (compact ? 1 : 0);
|
|
248
248
|
switch (sw) {
|
|
249
249
|
case 0: { // src updatable, dst updatable
|
|
250
|
-
|
|
250
|
+
write(os, coupons_.data(), coupons_.size() * sizeof(uint32_t));
|
|
251
251
|
break;
|
|
252
252
|
}
|
|
253
253
|
case 1: { // src updatable, dst compact
|
|
254
|
-
for (uint32_t coupon: *this) {
|
|
255
|
-
|
|
254
|
+
for (const uint32_t coupon: *this) {
|
|
255
|
+
write(os, coupon);
|
|
256
256
|
}
|
|
257
257
|
break;
|
|
258
258
|
}
|
|
@@ -265,14 +265,14 @@ void CouponList<A>::serialize(std::ostream& os, const bool compact) const {
|
|
|
265
265
|
}
|
|
266
266
|
|
|
267
267
|
template<typename A>
|
|
268
|
-
HllSketchImpl<A>* CouponList<A>::couponUpdate(
|
|
269
|
-
for (size_t i = 0; i <
|
|
270
|
-
const
|
|
271
|
-
if (couponAtIdx ==
|
|
272
|
-
|
|
273
|
-
++
|
|
274
|
-
if (
|
|
275
|
-
if (this->
|
|
268
|
+
HllSketchImpl<A>* CouponList<A>::couponUpdate(uint32_t coupon) {
|
|
269
|
+
for (size_t i = 0; i < coupons_.size(); ++i) { // search for empty slot
|
|
270
|
+
const uint32_t couponAtIdx = coupons_[i];
|
|
271
|
+
if (couponAtIdx == hll_constants::EMPTY) {
|
|
272
|
+
coupons_[i] = coupon; // the actual update
|
|
273
|
+
++couponCount_;
|
|
274
|
+
if (couponCount_ == static_cast<uint32_t>(coupons_.size())) { // array full
|
|
275
|
+
if (this->lgConfigK_ < 8) {
|
|
276
276
|
return promoteHeapListOrSetToHll(*this);
|
|
277
277
|
}
|
|
278
278
|
return promoteHeapListToSet(*this);
|
|
@@ -293,71 +293,68 @@ double CouponList<A>::getCompositeEstimate() const { return getEstimate(); }
|
|
|
293
293
|
|
|
294
294
|
template<typename A>
|
|
295
295
|
double CouponList<A>::getEstimate() const {
|
|
296
|
-
const
|
|
297
|
-
|
|
298
|
-
return fmax(est, couponCount);
|
|
296
|
+
const double est = CubicInterpolation<A>::usingXAndYTables(couponCount_);
|
|
297
|
+
return fmax(est, couponCount_);
|
|
299
298
|
}
|
|
300
299
|
|
|
301
300
|
template<typename A>
|
|
302
|
-
double CouponList<A>::getLowerBound(
|
|
301
|
+
double CouponList<A>::getLowerBound(uint8_t numStdDev) const {
|
|
303
302
|
HllUtil<A>::checkNumStdDev(numStdDev);
|
|
304
|
-
const
|
|
305
|
-
const double
|
|
306
|
-
|
|
307
|
-
return fmax(tmp, couponCount);
|
|
303
|
+
const double est = CubicInterpolation<A>::usingXAndYTables(couponCount_);
|
|
304
|
+
const double tmp = est / (1.0 + (numStdDev * hll_constants::COUPON_RSE));
|
|
305
|
+
return fmax(tmp, couponCount_);
|
|
308
306
|
}
|
|
309
307
|
|
|
310
308
|
template<typename A>
|
|
311
|
-
double CouponList<A>::getUpperBound(
|
|
309
|
+
double CouponList<A>::getUpperBound(uint8_t numStdDev) const {
|
|
312
310
|
HllUtil<A>::checkNumStdDev(numStdDev);
|
|
313
|
-
const
|
|
314
|
-
const double
|
|
315
|
-
|
|
316
|
-
return fmax(tmp, couponCount);
|
|
311
|
+
const double est = CubicInterpolation<A>::usingXAndYTables(couponCount_);
|
|
312
|
+
const double tmp = est / (1.0 - (numStdDev * hll_constants::COUPON_RSE));
|
|
313
|
+
return fmax(tmp, couponCount_);
|
|
317
314
|
}
|
|
318
315
|
|
|
319
316
|
template<typename A>
|
|
320
317
|
bool CouponList<A>::isEmpty() const { return getCouponCount() == 0; }
|
|
321
318
|
|
|
322
319
|
template<typename A>
|
|
323
|
-
|
|
324
|
-
return getMemDataStart() +
|
|
320
|
+
uint32_t CouponList<A>::getUpdatableSerializationBytes() const {
|
|
321
|
+
return getMemDataStart() + static_cast<uint32_t>(coupons_.size()) * sizeof(uint32_t);
|
|
325
322
|
}
|
|
326
323
|
|
|
327
324
|
template<typename A>
|
|
328
|
-
|
|
329
|
-
return
|
|
325
|
+
uint32_t CouponList<A>::getCouponCount() const {
|
|
326
|
+
return couponCount_;
|
|
330
327
|
}
|
|
331
328
|
|
|
332
329
|
template<typename A>
|
|
333
|
-
|
|
334
|
-
return getMemDataStart() + (
|
|
330
|
+
uint32_t CouponList<A>::getCompactSerializationBytes() const {
|
|
331
|
+
return getMemDataStart() + (couponCount_ << 2);
|
|
335
332
|
}
|
|
336
333
|
|
|
337
334
|
template<typename A>
|
|
338
|
-
|
|
339
|
-
return
|
|
335
|
+
uint32_t CouponList<A>::getMemDataStart() const {
|
|
336
|
+
return hll_constants::LIST_INT_ARR_START;
|
|
340
337
|
}
|
|
341
338
|
|
|
342
339
|
template<typename A>
|
|
343
|
-
|
|
344
|
-
return
|
|
340
|
+
uint8_t CouponList<A>::getPreInts() const {
|
|
341
|
+
return hll_constants::LIST_PREINTS;
|
|
345
342
|
}
|
|
346
343
|
|
|
347
344
|
template<typename A>
|
|
348
345
|
bool CouponList<A>::isCompact() const { return false; }
|
|
349
346
|
|
|
350
347
|
template<typename A>
|
|
351
|
-
bool CouponList<A>::isOutOfOrderFlag() const { return
|
|
348
|
+
bool CouponList<A>::isOutOfOrderFlag() const { return oooFlag_; }
|
|
352
349
|
|
|
353
350
|
template<typename A>
|
|
354
351
|
void CouponList<A>::putOutOfOrderFlag(bool oooFlag) {
|
|
355
|
-
|
|
352
|
+
oooFlag_ = oooFlag;
|
|
356
353
|
}
|
|
357
354
|
|
|
358
355
|
template<typename A>
|
|
359
356
|
A CouponList<A>::getAllocator() const {
|
|
360
|
-
return
|
|
357
|
+
return coupons_.get_allocator();
|
|
361
358
|
}
|
|
362
359
|
|
|
363
360
|
template<typename A>
|
|
@@ -372,12 +369,12 @@ HllSketchImpl<A>* CouponList<A>::promoteHeapListOrSetToHll(CouponList& src) {
|
|
|
372
369
|
|
|
373
370
|
template<typename A>
|
|
374
371
|
coupon_iterator<A> CouponList<A>::begin(bool all) const {
|
|
375
|
-
return coupon_iterator<A>(
|
|
372
|
+
return coupon_iterator<A>(coupons_.data(), coupons_.size(), 0, all);
|
|
376
373
|
}
|
|
377
374
|
|
|
378
375
|
template<typename A>
|
|
379
376
|
coupon_iterator<A> CouponList<A>::end() const {
|
|
380
|
-
return coupon_iterator<A>(
|
|
377
|
+
return coupon_iterator<A>(coupons_.data(), coupons_.size(), coupons_.size(), false);
|
|
381
378
|
}
|
|
382
379
|
|
|
383
380
|
}
|
|
@@ -33,7 +33,7 @@ class HllSketchImplFactory;
|
|
|
33
33
|
template<typename A>
|
|
34
34
|
class CouponList : public HllSketchImpl<A> {
|
|
35
35
|
public:
|
|
36
|
-
CouponList(
|
|
36
|
+
CouponList(uint8_t lgConfigK, target_hll_type tgtHllType, hll_mode mode, const A& allocator);
|
|
37
37
|
CouponList(const CouponList& that, target_hll_type tgtHllType);
|
|
38
38
|
|
|
39
39
|
static CouponList* newList(const void* bytes, size_t len, const A& allocator);
|
|
@@ -47,15 +47,15 @@ class CouponList : public HllSketchImpl<A> {
|
|
|
47
47
|
virtual CouponList* copy() const;
|
|
48
48
|
virtual CouponList* copyAs(target_hll_type tgtHllType) const;
|
|
49
49
|
|
|
50
|
-
virtual HllSketchImpl<A>* couponUpdate(
|
|
50
|
+
virtual HllSketchImpl<A>* couponUpdate(uint32_t coupon);
|
|
51
51
|
|
|
52
52
|
virtual double getEstimate() const;
|
|
53
53
|
virtual double getCompositeEstimate() const;
|
|
54
|
-
virtual double getUpperBound(
|
|
55
|
-
virtual double getLowerBound(
|
|
54
|
+
virtual double getUpperBound(uint8_t numStdDev) const;
|
|
55
|
+
virtual double getLowerBound(uint8_t numStdDev) const;
|
|
56
56
|
|
|
57
57
|
virtual bool isEmpty() const;
|
|
58
|
-
virtual
|
|
58
|
+
virtual uint32_t getCouponCount() const;
|
|
59
59
|
|
|
60
60
|
coupon_iterator<A> begin(bool all = false) const;
|
|
61
61
|
coupon_iterator<A> end() const;
|
|
@@ -63,24 +63,24 @@ class CouponList : public HllSketchImpl<A> {
|
|
|
63
63
|
protected:
|
|
64
64
|
using ClAlloc = typename std::allocator_traits<A>::template rebind_alloc<CouponList<A>>;
|
|
65
65
|
|
|
66
|
-
using vector_int = std::vector<
|
|
66
|
+
using vector_int = std::vector<uint32_t, typename std::allocator_traits<A>::template rebind_alloc<uint32_t>>;
|
|
67
67
|
|
|
68
68
|
HllSketchImpl<A>* promoteHeapListToSet(CouponList& list);
|
|
69
69
|
HllSketchImpl<A>* promoteHeapListOrSetToHll(CouponList& src);
|
|
70
70
|
|
|
71
|
-
virtual
|
|
72
|
-
virtual
|
|
73
|
-
virtual
|
|
74
|
-
virtual
|
|
71
|
+
virtual uint32_t getUpdatableSerializationBytes() const;
|
|
72
|
+
virtual uint32_t getCompactSerializationBytes() const;
|
|
73
|
+
virtual uint32_t getMemDataStart() const;
|
|
74
|
+
virtual uint8_t getPreInts() const;
|
|
75
75
|
virtual bool isCompact() const;
|
|
76
76
|
virtual bool isOutOfOrderFlag() const;
|
|
77
77
|
virtual void putOutOfOrderFlag(bool oooFlag);
|
|
78
78
|
|
|
79
79
|
virtual A getAllocator() const;
|
|
80
80
|
|
|
81
|
-
|
|
82
|
-
bool
|
|
83
|
-
vector_int
|
|
81
|
+
uint32_t couponCount_;
|
|
82
|
+
bool oooFlag_;
|
|
83
|
+
vector_int coupons_;
|
|
84
84
|
|
|
85
85
|
friend class HllSketchImplFactory<A>;
|
|
86
86
|
};
|
|
@@ -102,10 +102,8 @@ double CubicInterpolation<A>::usingXAndYTables(const double xArr[], const double
|
|
|
102
102
|
else if (offset == numEntries-2) { // corner case
|
|
103
103
|
return (interpolateUsingXAndYTables<A>(xArr, yArr, (offset-2), x));
|
|
104
104
|
}
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
}
|
|
108
|
-
throw std::logic_error("Exception should be unreachable");
|
|
105
|
+
// main case
|
|
106
|
+
return (interpolateUsingXAndYTables<A>(xArr, yArr, (offset-1), x));
|
|
109
107
|
}
|
|
110
108
|
|
|
111
109
|
// In C: again-two-registers cubic_interpolate_aux L1368
|
|
@@ -68,7 +68,7 @@ double HarmonicNumbers<A>::harmonicNumber(const uint64_t x_i) {
|
|
|
68
68
|
if (x_i < NUM_EXACT_HARMONIC_NUMBERS) {
|
|
69
69
|
return tableOfExactHarmonicNumbers[x_i];
|
|
70
70
|
} else {
|
|
71
|
-
double x = x_i;
|
|
71
|
+
double x = static_cast<double>(x_i);
|
|
72
72
|
double invSq = 1.0 / (x * x);
|
|
73
73
|
double sum = log(x) + EULER_MASCHERONI_CONSTANT + (1.0 / (2.0 * x));
|
|
74
74
|
/* note: the number of terms included from this series expansion is appropriate
|