datasketches 0.2.1 → 0.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +4 -0
- data/lib/datasketches/version.rb +1 -1
- data/vendor/datasketches-cpp/CMakeLists.txt +7 -0
- data/vendor/datasketches-cpp/common/include/MurmurHash3.h +11 -7
- data/vendor/datasketches-cpp/common/include/binomial_bounds.hpp +8 -8
- data/vendor/datasketches-cpp/common/include/bounds_binomial_proportions.hpp +12 -15
- data/vendor/datasketches-cpp/common/include/common_defs.hpp +24 -0
- data/vendor/datasketches-cpp/common/include/conditional_forward.hpp +20 -8
- data/vendor/datasketches-cpp/common/include/count_zeros.hpp +2 -2
- data/vendor/datasketches-cpp/common/include/serde.hpp +7 -7
- data/vendor/datasketches-cpp/cpc/include/cpc_compressor.hpp +19 -19
- data/vendor/datasketches-cpp/cpc/include/cpc_compressor_impl.hpp +91 -89
- data/vendor/datasketches-cpp/cpc/include/cpc_sketch.hpp +14 -1
- data/vendor/datasketches-cpp/cpc/include/cpc_sketch_impl.hpp +121 -87
- data/vendor/datasketches-cpp/cpc/include/cpc_union_impl.hpp +14 -14
- data/vendor/datasketches-cpp/cpc/include/cpc_util.hpp +10 -10
- data/vendor/datasketches-cpp/cpc/include/icon_estimator.hpp +4 -4
- data/vendor/datasketches-cpp/cpc/include/u32_table.hpp +8 -8
- data/vendor/datasketches-cpp/cpc/include/u32_table_impl.hpp +14 -14
- data/vendor/datasketches-cpp/cpc/test/compression_test.cpp +10 -10
- data/vendor/datasketches-cpp/cpc/test/cpc_sketch_test.cpp +25 -0
- data/vendor/datasketches-cpp/cpc/test/cpc_union_test.cpp +1 -1
- data/vendor/datasketches-cpp/fi/include/frequent_items_sketch_impl.hpp +65 -80
- data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map_impl.hpp +10 -10
- data/vendor/datasketches-cpp/fi/test/reverse_purge_hash_map_test.cpp +2 -2
- data/vendor/datasketches-cpp/hll/include/AuxHashMap-internal.hpp +60 -63
- data/vendor/datasketches-cpp/hll/include/AuxHashMap.hpp +19 -19
- data/vendor/datasketches-cpp/hll/include/CompositeInterpolationXTable-internal.hpp +15 -15
- data/vendor/datasketches-cpp/hll/include/CompositeInterpolationXTable.hpp +3 -3
- data/vendor/datasketches-cpp/hll/include/CouponHashSet-internal.hpp +74 -76
- data/vendor/datasketches-cpp/hll/include/CouponHashSet.hpp +6 -6
- data/vendor/datasketches-cpp/hll/include/CouponList-internal.hpp +110 -113
- data/vendor/datasketches-cpp/hll/include/CouponList.hpp +13 -13
- data/vendor/datasketches-cpp/hll/include/CubicInterpolation-internal.hpp +2 -4
- data/vendor/datasketches-cpp/hll/include/HarmonicNumbers-internal.hpp +1 -1
- data/vendor/datasketches-cpp/hll/include/Hll4Array-internal.hpp +80 -76
- data/vendor/datasketches-cpp/hll/include/Hll4Array.hpp +9 -9
- data/vendor/datasketches-cpp/hll/include/Hll6Array-internal.hpp +26 -26
- data/vendor/datasketches-cpp/hll/include/Hll6Array.hpp +6 -6
- data/vendor/datasketches-cpp/hll/include/Hll8Array-internal.hpp +33 -33
- data/vendor/datasketches-cpp/hll/include/Hll8Array.hpp +6 -6
- data/vendor/datasketches-cpp/hll/include/HllArray-internal.hpp +205 -209
- data/vendor/datasketches-cpp/hll/include/HllArray.hpp +36 -36
- data/vendor/datasketches-cpp/hll/include/HllSketch-internal.hpp +28 -28
- data/vendor/datasketches-cpp/hll/include/HllSketchImpl-internal.hpp +22 -22
- data/vendor/datasketches-cpp/hll/include/HllSketchImpl.hpp +13 -13
- data/vendor/datasketches-cpp/hll/include/HllSketchImplFactory.hpp +15 -15
- data/vendor/datasketches-cpp/hll/include/HllUnion-internal.hpp +61 -61
- data/vendor/datasketches-cpp/hll/include/HllUtil.hpp +120 -127
- data/vendor/datasketches-cpp/hll/include/coupon_iterator-internal.hpp +9 -9
- data/vendor/datasketches-cpp/hll/include/coupon_iterator.hpp +5 -5
- data/vendor/datasketches-cpp/hll/include/hll.hpp +21 -21
- data/vendor/datasketches-cpp/hll/test/AuxHashMapTest.cpp +1 -1
- data/vendor/datasketches-cpp/hll/test/CouponHashSetTest.cpp +34 -34
- data/vendor/datasketches-cpp/hll/test/CouponListTest.cpp +25 -25
- data/vendor/datasketches-cpp/hll/test/CrossCountingTest.cpp +2 -2
- data/vendor/datasketches-cpp/hll/test/HllArrayTest.cpp +35 -35
- data/vendor/datasketches-cpp/hll/test/HllSketchTest.cpp +15 -15
- data/vendor/datasketches-cpp/hll/test/HllUnionTest.cpp +10 -14
- data/vendor/datasketches-cpp/hll/test/IsomorphicTest.cpp +3 -3
- data/vendor/datasketches-cpp/hll/test/ToFromByteArrayTest.cpp +4 -4
- data/vendor/datasketches-cpp/kll/include/kll_helper.hpp +5 -4
- data/vendor/datasketches-cpp/kll/include/kll_helper_impl.hpp +6 -6
- data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator.hpp +14 -6
- data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator_impl.hpp +39 -24
- data/vendor/datasketches-cpp/kll/include/kll_sketch.hpp +34 -2
- data/vendor/datasketches-cpp/kll/include/kll_sketch_impl.hpp +72 -62
- data/vendor/datasketches-cpp/kll/include/kolmogorov_smirnov.hpp +67 -0
- data/vendor/datasketches-cpp/kll/include/kolmogorov_smirnov_impl.hpp +78 -0
- data/vendor/datasketches-cpp/kll/test/CMakeLists.txt +1 -0
- data/vendor/datasketches-cpp/kll/test/kll_sketch_test.cpp +68 -45
- data/vendor/datasketches-cpp/kll/test/kolmogorov_smirnov_test.cpp +111 -0
- data/vendor/datasketches-cpp/python/src/hll_wrapper.cpp +4 -4
- data/vendor/datasketches-cpp/python/src/theta_wrapper.cpp +6 -6
- data/vendor/datasketches-cpp/python/src/vo_wrapper.cpp +2 -2
- data/vendor/datasketches-cpp/python/tests/hll_test.py +1 -1
- data/vendor/datasketches-cpp/python/tests/vo_test.py +3 -3
- data/vendor/datasketches-cpp/req/include/req_common.hpp +2 -1
- data/vendor/datasketches-cpp/req/include/req_compactor.hpp +4 -4
- data/vendor/datasketches-cpp/req/include/req_compactor_impl.hpp +26 -39
- data/vendor/datasketches-cpp/req/include/req_sketch.hpp +1 -1
- data/vendor/datasketches-cpp/req/include/req_sketch_impl.hpp +9 -9
- data/vendor/datasketches-cpp/req/test/req_sketch_test.cpp +52 -52
- data/vendor/datasketches-cpp/sampling/include/var_opt_sketch_impl.hpp +47 -56
- data/vendor/datasketches-cpp/sampling/include/var_opt_union_impl.hpp +34 -42
- data/vendor/datasketches-cpp/sampling/test/var_opt_sketch_test.cpp +6 -6
- data/vendor/datasketches-cpp/sampling/test/var_opt_union_test.cpp +13 -13
- data/vendor/datasketches-cpp/setup.py +1 -1
- data/vendor/datasketches-cpp/theta/include/bounds_on_ratios_in_sampled_sets.hpp +1 -1
- data/vendor/datasketches-cpp/theta/include/compact_theta_sketch_parser.hpp +67 -0
- data/vendor/datasketches-cpp/theta/include/compact_theta_sketch_parser_impl.hpp +70 -0
- data/vendor/datasketches-cpp/theta/include/theta_intersection.hpp +9 -4
- data/vendor/datasketches-cpp/theta/include/theta_intersection_impl.hpp +1 -1
- data/vendor/datasketches-cpp/theta/include/theta_jaccard_similarity_base.hpp +18 -14
- data/vendor/datasketches-cpp/theta/include/theta_sketch.hpp +42 -1
- data/vendor/datasketches-cpp/theta/include/theta_sketch_impl.hpp +107 -58
- data/vendor/datasketches-cpp/theta/include/theta_union.hpp +4 -4
- data/vendor/datasketches-cpp/theta/include/theta_union_base_impl.hpp +1 -1
- data/vendor/datasketches-cpp/theta/include/theta_union_impl.hpp +1 -1
- data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base.hpp +2 -0
- data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base_impl.hpp +33 -28
- data/vendor/datasketches-cpp/theta/test/theta_a_not_b_test.cpp +23 -1
- data/vendor/datasketches-cpp/theta/test/theta_intersection_test.cpp +21 -1
- data/vendor/datasketches-cpp/theta/test/theta_jaccard_similarity_test.cpp +58 -2
- data/vendor/datasketches-cpp/theta/test/theta_sketch_test.cpp +37 -1
- data/vendor/datasketches-cpp/theta/test/theta_union_test.cpp +22 -2
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_sketch_impl.hpp +47 -60
- data/vendor/datasketches-cpp/tuple/include/tuple_sketch_impl.hpp +51 -64
- data/vendor/datasketches-cpp/tuple/test/array_of_doubles_sketch_test.cpp +1 -1
- data/vendor/datasketches-cpp/tuple/test/tuple_a_not_b_test.cpp +17 -17
- data/vendor/datasketches-cpp/tuple/test/tuple_intersection_test.cpp +12 -12
- data/vendor/datasketches-cpp/tuple/test/tuple_jaccard_similarity_test.cpp +5 -5
- data/vendor/datasketches-cpp/tuple/test/tuple_sketch_allocation_test.cpp +1 -1
- data/vendor/datasketches-cpp/tuple/test/tuple_sketch_test.cpp +20 -20
- data/vendor/datasketches-cpp/tuple/test/tuple_union_test.cpp +12 -12
- metadata +8 -3
|
@@ -39,15 +39,15 @@ allocator_(allocator),
|
|
|
39
39
|
lg_cur_size_(lg_cur_size),
|
|
40
40
|
lg_max_size_(lg_max_size),
|
|
41
41
|
num_active_(0),
|
|
42
|
-
keys_(allocator_.allocate(
|
|
42
|
+
keys_(allocator_.allocate(1ULL << lg_cur_size)),
|
|
43
43
|
values_(nullptr),
|
|
44
44
|
states_(nullptr)
|
|
45
45
|
{
|
|
46
46
|
AllocV av(allocator_);
|
|
47
|
-
values_ = av.allocate(
|
|
47
|
+
values_ = av.allocate(1ULL << lg_cur_size);
|
|
48
48
|
AllocU16 au16(allocator_);
|
|
49
|
-
states_ = au16.allocate(
|
|
50
|
-
std::fill(states_, states_ + (
|
|
49
|
+
states_ = au16.allocate(1ULL << lg_cur_size);
|
|
50
|
+
std::fill(states_, states_ + (1ULL << lg_cur_size), static_cast<uint16_t>(0));
|
|
51
51
|
}
|
|
52
52
|
|
|
53
53
|
template<typename K, typename V, typename H, typename E, typename A>
|
|
@@ -56,14 +56,14 @@ allocator_(other.allocator_),
|
|
|
56
56
|
lg_cur_size_(other.lg_cur_size_),
|
|
57
57
|
lg_max_size_(other.lg_max_size_),
|
|
58
58
|
num_active_(other.num_active_),
|
|
59
|
-
keys_(allocator_.allocate(
|
|
59
|
+
keys_(allocator_.allocate(1ULL << lg_cur_size_)),
|
|
60
60
|
values_(nullptr),
|
|
61
61
|
states_(nullptr)
|
|
62
62
|
{
|
|
63
63
|
AllocV av(allocator_);
|
|
64
|
-
values_ = av.allocate(
|
|
64
|
+
values_ = av.allocate(1ULL << lg_cur_size_);
|
|
65
65
|
AllocU16 au16(allocator_);
|
|
66
|
-
states_ = au16.allocate(
|
|
66
|
+
states_ = au16.allocate(1ULL << lg_cur_size_);
|
|
67
67
|
const uint32_t size = 1 << lg_cur_size_;
|
|
68
68
|
if (num_active_ > 0) {
|
|
69
69
|
auto num = num_active_;
|
|
@@ -177,7 +177,7 @@ uint8_t reverse_purge_hash_map<K, V, H, E, A>::get_lg_max_size() const {
|
|
|
177
177
|
|
|
178
178
|
template<typename K, typename V, typename H, typename E, typename A>
|
|
179
179
|
uint32_t reverse_purge_hash_map<K, V, H, E, A>::get_capacity() const {
|
|
180
|
-
return (1 << lg_cur_size_) * LOAD_FACTOR;
|
|
180
|
+
return static_cast<uint32_t>((1 << lg_cur_size_) * LOAD_FACTOR);
|
|
181
181
|
}
|
|
182
182
|
|
|
183
183
|
template<typename K, typename V, typename H, typename E, typename A>
|
|
@@ -246,7 +246,7 @@ void reverse_purge_hash_map<K, V, H, E, A>::hash_delete(uint32_t delete_index) {
|
|
|
246
246
|
// if none are found, the status is changed
|
|
247
247
|
states_[delete_index] = 0; // mark as empty
|
|
248
248
|
keys_[delete_index].~K();
|
|
249
|
-
|
|
249
|
+
uint16_t drift = 1;
|
|
250
250
|
const uint32_t mask = (1 << lg_cur_size_) - 1;
|
|
251
251
|
uint32_t probe = (delete_index + drift) & mask; // map length must be a power of 2
|
|
252
252
|
// advance until we find a free location replacing locations as needed
|
|
@@ -322,7 +322,7 @@ void reverse_purge_hash_map<K, V, H, E, A>::resize(uint8_t lg_new_size) {
|
|
|
322
322
|
values_ = av.allocate(new_size);
|
|
323
323
|
AllocU16 au16(allocator_);
|
|
324
324
|
states_ = au16.allocate(new_size);
|
|
325
|
-
std::fill(states_, states_ + new_size, 0);
|
|
325
|
+
std::fill(states_, states_ + new_size, static_cast<uint16_t>(0));
|
|
326
326
|
num_active_ = 0;
|
|
327
327
|
lg_cur_size_ = lg_new_size;
|
|
328
328
|
for (uint32_t i = 0; i < old_size; i++) {
|
|
@@ -39,8 +39,8 @@ TEST_CASE("reverse purge hash map: one item", "[frequent_items_sketch]") {
|
|
|
39
39
|
TEST_CASE("reverse purge hash map: iterator", "[frequent_items_sketch]") {
|
|
40
40
|
reverse_purge_hash_map<int> map(3, 4, std::allocator<int>());
|
|
41
41
|
for (int i = 0; i < 11; i++) map.adjust_or_insert(i, 1); // this should fit with no purge
|
|
42
|
-
|
|
43
|
-
for (auto
|
|
42
|
+
uint64_t sum = 0;
|
|
43
|
+
for (auto it: map) sum += it.second;
|
|
44
44
|
REQUIRE(sum == 11);
|
|
45
45
|
}
|
|
46
46
|
|
|
@@ -26,15 +26,15 @@
|
|
|
26
26
|
namespace datasketches {
|
|
27
27
|
|
|
28
28
|
template<typename A>
|
|
29
|
-
AuxHashMap<A>::AuxHashMap(
|
|
29
|
+
AuxHashMap<A>::AuxHashMap(uint8_t lgAuxArrInts, uint8_t lgConfigK, const A& allocator):
|
|
30
30
|
lgConfigK(lgConfigK),
|
|
31
31
|
lgAuxArrInts(lgAuxArrInts),
|
|
32
32
|
auxCount(0),
|
|
33
|
-
entries(
|
|
33
|
+
entries(1ULL << lgAuxArrInts, 0, allocator)
|
|
34
34
|
{}
|
|
35
35
|
|
|
36
36
|
template<typename A>
|
|
37
|
-
AuxHashMap<A>* AuxHashMap<A>::newAuxHashMap(
|
|
37
|
+
AuxHashMap<A>* AuxHashMap<A>::newAuxHashMap(uint8_t lgAuxArrInts, uint8_t lgConfigK, const A& allocator) {
|
|
38
38
|
return new (ahmAlloc(allocator).allocate(1)) AuxHashMap<A>(lgAuxArrInts, lgConfigK, allocator);
|
|
39
39
|
}
|
|
40
40
|
|
|
@@ -45,42 +45,42 @@ AuxHashMap<A>* AuxHashMap<A>::newAuxHashMap(const AuxHashMap& that) {
|
|
|
45
45
|
|
|
46
46
|
template<typename A>
|
|
47
47
|
AuxHashMap<A>* AuxHashMap<A>::deserialize(const void* bytes, size_t len,
|
|
48
|
-
|
|
49
|
-
|
|
48
|
+
uint8_t lgConfigK,
|
|
49
|
+
uint32_t auxCount, uint8_t lgAuxArrInts,
|
|
50
50
|
bool srcCompact, const A& allocator) {
|
|
51
|
-
|
|
51
|
+
uint8_t lgArrInts = lgAuxArrInts;
|
|
52
52
|
if (srcCompact) { // early compact versions didn't use LgArr byte field so ignore input
|
|
53
53
|
lgArrInts = HllUtil<A>::computeLgArrInts(HLL, auxCount, lgConfigK);
|
|
54
54
|
} else { // updatable
|
|
55
55
|
lgArrInts = lgAuxArrInts;
|
|
56
56
|
}
|
|
57
57
|
|
|
58
|
-
|
|
58
|
+
const uint32_t configKmask = (1 << lgConfigK) - 1;
|
|
59
59
|
|
|
60
60
|
AuxHashMap<A>* auxHashMap;
|
|
61
|
-
const
|
|
61
|
+
const uint32_t* auxPtr = static_cast<const uint32_t*>(bytes);
|
|
62
62
|
if (srcCompact) {
|
|
63
63
|
if (len < auxCount * sizeof(int)) {
|
|
64
64
|
throw std::out_of_range("Input array too small to hold AuxHashMap image");
|
|
65
65
|
}
|
|
66
66
|
auxHashMap = new (ahmAlloc(allocator).allocate(1)) AuxHashMap<A>(lgArrInts, lgConfigK, allocator);
|
|
67
|
-
for (
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
67
|
+
for (uint32_t i = 0; i < auxCount; ++i) {
|
|
68
|
+
const uint32_t pair = auxPtr[i];
|
|
69
|
+
const uint32_t slotNo = HllUtil<A>::getLow26(pair) & configKmask;
|
|
70
|
+
const uint8_t value = HllUtil<A>::getValue(pair);
|
|
71
71
|
auxHashMap->mustAdd(slotNo, value);
|
|
72
72
|
}
|
|
73
73
|
} else { // updatable
|
|
74
|
-
|
|
75
|
-
if (len < itemsToRead * sizeof(
|
|
74
|
+
uint32_t itemsToRead = 1 << lgAuxArrInts;
|
|
75
|
+
if (len < itemsToRead * sizeof(uint32_t)) {
|
|
76
76
|
throw std::out_of_range("Input array too small to hold AuxHashMap image");
|
|
77
77
|
}
|
|
78
78
|
auxHashMap = new (ahmAlloc(allocator).allocate(1)) AuxHashMap<A>(lgArrInts, lgConfigK, allocator);
|
|
79
|
-
for (
|
|
80
|
-
|
|
81
|
-
if (pair ==
|
|
82
|
-
|
|
83
|
-
|
|
79
|
+
for (uint32_t i = 0; i < itemsToRead; ++i) {
|
|
80
|
+
const uint32_t pair = auxPtr[i];
|
|
81
|
+
if (pair == hll_constants::EMPTY) { continue; }
|
|
82
|
+
const uint32_t slotNo = HllUtil<A>::getLow26(pair) & configKmask;
|
|
83
|
+
const uint8_t value = HllUtil<A>::getValue(pair);
|
|
84
84
|
auxHashMap->mustAdd(slotNo, value);
|
|
85
85
|
}
|
|
86
86
|
}
|
|
@@ -94,10 +94,10 @@ AuxHashMap<A>* AuxHashMap<A>::deserialize(const void* bytes, size_t len,
|
|
|
94
94
|
}
|
|
95
95
|
|
|
96
96
|
template<typename A>
|
|
97
|
-
AuxHashMap<A>* AuxHashMap<A>::deserialize(std::istream& is,
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
97
|
+
AuxHashMap<A>* AuxHashMap<A>::deserialize(std::istream& is, uint8_t lgConfigK,
|
|
98
|
+
uint32_t auxCount, uint8_t lgAuxArrInts,
|
|
99
|
+
bool srcCompact, const A& allocator) {
|
|
100
|
+
uint8_t lgArrInts = lgAuxArrInts;
|
|
101
101
|
if (srcCompact) { // early compact versions didn't use LgArr byte field so ignore input
|
|
102
102
|
lgArrInts = HllUtil<A>::computeLgArrInts(HLL, auxCount, lgConfigK);
|
|
103
103
|
} else { // updatable
|
|
@@ -108,24 +108,22 @@ AuxHashMap<A>* AuxHashMap<A>::deserialize(std::istream& is, const int lgConfigK,
|
|
|
108
108
|
typedef std::unique_ptr<AuxHashMap<A>, std::function<void(AuxHashMap<A>*)>> aux_hash_map_ptr;
|
|
109
109
|
aux_hash_map_ptr aux_ptr(auxHashMap, auxHashMap->make_deleter());
|
|
110
110
|
|
|
111
|
-
|
|
111
|
+
const uint32_t configKmask = (1 << lgConfigK) - 1;
|
|
112
112
|
|
|
113
113
|
if (srcCompact) {
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
int value = HllUtil<A>::getValue(pair);
|
|
114
|
+
for (uint32_t i = 0; i < auxCount; ++i) {
|
|
115
|
+
const auto pair = read<int>(is);
|
|
116
|
+
uint32_t slotNo = HllUtil<A>::getLow26(pair) & configKmask;
|
|
117
|
+
uint8_t value = HllUtil<A>::getValue(pair);
|
|
119
118
|
auxHashMap->mustAdd(slotNo, value);
|
|
120
119
|
}
|
|
121
120
|
} else { // updatable
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
int value = HllUtil<A>::getValue(pair);
|
|
121
|
+
const uint32_t itemsToRead = 1 << lgAuxArrInts;
|
|
122
|
+
for (uint32_t i = 0; i < itemsToRead; ++i) {
|
|
123
|
+
const auto pair = read<int>(is);
|
|
124
|
+
if (pair == hll_constants::EMPTY) { continue; }
|
|
125
|
+
const uint32_t slotNo = HllUtil<A>::getLow26(pair) & configKmask;
|
|
126
|
+
const uint8_t value = HllUtil<A>::getValue(pair);
|
|
129
127
|
auxHashMap->mustAdd(slotNo, value);
|
|
130
128
|
}
|
|
131
129
|
}
|
|
@@ -153,34 +151,34 @@ AuxHashMap<A>* AuxHashMap<A>::copy() const {
|
|
|
153
151
|
}
|
|
154
152
|
|
|
155
153
|
template<typename A>
|
|
156
|
-
|
|
154
|
+
uint32_t AuxHashMap<A>::getAuxCount() const {
|
|
157
155
|
return auxCount;
|
|
158
156
|
}
|
|
159
157
|
|
|
160
158
|
template<typename A>
|
|
161
|
-
|
|
159
|
+
uint32_t* AuxHashMap<A>::getAuxIntArr(){
|
|
162
160
|
return entries.data();
|
|
163
161
|
}
|
|
164
162
|
|
|
165
163
|
template<typename A>
|
|
166
|
-
|
|
164
|
+
uint8_t AuxHashMap<A>::getLgAuxArrInts() const {
|
|
167
165
|
return lgAuxArrInts;
|
|
168
166
|
}
|
|
169
167
|
|
|
170
168
|
template<typename A>
|
|
171
|
-
|
|
169
|
+
uint32_t AuxHashMap<A>::getCompactSizeBytes() const {
|
|
172
170
|
return auxCount << 2;
|
|
173
171
|
}
|
|
174
172
|
|
|
175
173
|
template<typename A>
|
|
176
|
-
|
|
174
|
+
uint32_t AuxHashMap<A>::getUpdatableSizeBytes() const {
|
|
177
175
|
return 4 << lgAuxArrInts;
|
|
178
176
|
}
|
|
179
177
|
|
|
180
178
|
template<typename A>
|
|
181
|
-
void AuxHashMap<A>::mustAdd(
|
|
182
|
-
const
|
|
183
|
-
const
|
|
179
|
+
void AuxHashMap<A>::mustAdd(uint32_t slotNo, uint8_t value) {
|
|
180
|
+
const int32_t index = find(entries.data(), lgAuxArrInts, lgConfigK, slotNo);
|
|
181
|
+
const uint32_t entry_pair = HllUtil<A>::pair(slotNo, value);
|
|
184
182
|
if (index >= 0) {
|
|
185
183
|
throw std::invalid_argument("Found a slotNo that should not be there: SlotNo: "
|
|
186
184
|
+ std::to_string(slotNo) + ", Value: " + std::to_string(value));
|
|
@@ -193,8 +191,8 @@ void AuxHashMap<A>::mustAdd(const int slotNo, const int value) {
|
|
|
193
191
|
}
|
|
194
192
|
|
|
195
193
|
template<typename A>
|
|
196
|
-
|
|
197
|
-
const
|
|
194
|
+
uint8_t AuxHashMap<A>::mustFindValueFor(uint32_t slotNo) const {
|
|
195
|
+
const int32_t index = find(entries.data(), lgAuxArrInts, lgConfigK, slotNo);
|
|
198
196
|
if (index >= 0) {
|
|
199
197
|
return HllUtil<A>::getValue(entries[index]);
|
|
200
198
|
}
|
|
@@ -203,8 +201,8 @@ int AuxHashMap<A>::mustFindValueFor(const int slotNo) const {
|
|
|
203
201
|
}
|
|
204
202
|
|
|
205
203
|
template<typename A>
|
|
206
|
-
void AuxHashMap<A>::mustReplace(
|
|
207
|
-
const
|
|
204
|
+
void AuxHashMap<A>::mustReplace(uint32_t slotNo, uint8_t value) {
|
|
205
|
+
const int32_t idx = find(entries.data(), lgAuxArrInts, lgConfigK, slotNo);
|
|
208
206
|
if (idx >= 0) {
|
|
209
207
|
entries[idx] = HllUtil<A>::pair(slotNo, value);
|
|
210
208
|
return;
|
|
@@ -216,7 +214,7 @@ void AuxHashMap<A>::mustReplace(const int slotNo, const int value) {
|
|
|
216
214
|
|
|
217
215
|
template<typename A>
|
|
218
216
|
void AuxHashMap<A>::checkGrow() {
|
|
219
|
-
if ((
|
|
217
|
+
if ((hll_constants::RESIZE_DENOM * auxCount) > (hll_constants::RESIZE_NUMER * (1 << lgAuxArrInts))) {
|
|
220
218
|
growAuxSpace();
|
|
221
219
|
}
|
|
222
220
|
}
|
|
@@ -227,10 +225,10 @@ void AuxHashMap<A>::growAuxSpace() {
|
|
|
227
225
|
const int newArrLen = 1 << ++lgAuxArrInts;
|
|
228
226
|
vector_int entries_new(newArrLen, 0, entries.get_allocator());
|
|
229
227
|
for (size_t i = 0; i < entries.size(); ++i) {
|
|
230
|
-
const
|
|
231
|
-
if (fetched !=
|
|
228
|
+
const uint32_t fetched = entries[i];
|
|
229
|
+
if (fetched != hll_constants::EMPTY) {
|
|
232
230
|
// find empty in new array
|
|
233
|
-
const
|
|
231
|
+
const int32_t idx = find(entries_new.data(), lgAuxArrInts, lgConfigK, fetched & configKmask);
|
|
234
232
|
entries_new[~idx] = fetched;
|
|
235
233
|
}
|
|
236
234
|
}
|
|
@@ -243,21 +241,20 @@ void AuxHashMap<A>::growAuxSpace() {
|
|
|
243
241
|
//Continues searching.
|
|
244
242
|
//If the probe comes back to original index, throws an exception.
|
|
245
243
|
template<typename A>
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
const
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
const int loopIndex = probe;
|
|
244
|
+
int32_t AuxHashMap<A>::find(const uint32_t* auxArr, uint8_t lgAuxArrInts, uint8_t lgConfigK, uint32_t slotNo) {
|
|
245
|
+
const uint32_t auxArrMask = (1 << lgAuxArrInts) - 1;
|
|
246
|
+
const uint32_t configKmask = (1 << lgConfigK) - 1;
|
|
247
|
+
uint32_t probe = slotNo & auxArrMask;
|
|
248
|
+
const uint32_t loopIndex = probe;
|
|
252
249
|
do {
|
|
253
|
-
const
|
|
254
|
-
if (arrVal ==
|
|
250
|
+
const uint32_t arrVal = auxArr[probe];
|
|
251
|
+
if (arrVal == hll_constants::EMPTY) { //Compares on entire entry
|
|
255
252
|
return ~probe; //empty
|
|
256
253
|
}
|
|
257
254
|
else if (slotNo == (arrVal & configKmask)) { //Compares only on slotNo
|
|
258
255
|
return probe; //found given slotNo, return probe = index into aux array
|
|
259
256
|
}
|
|
260
|
-
const
|
|
257
|
+
const uint32_t stride = (slotNo >> lgAuxArrInts) | 1;
|
|
261
258
|
probe = (probe + stride) & auxArrMask;
|
|
262
259
|
} while (probe != loopIndex);
|
|
263
260
|
throw std::runtime_error("Key not found and no empty slots!");
|
|
@@ -265,12 +262,12 @@ int AuxHashMap<A>::find(const int* auxArr, const int lgAuxArrInts, const int lgC
|
|
|
265
262
|
|
|
266
263
|
template<typename A>
|
|
267
264
|
coupon_iterator<A> AuxHashMap<A>::begin(bool all) const {
|
|
268
|
-
return coupon_iterator<A>(entries.data(),
|
|
265
|
+
return coupon_iterator<A>(entries.data(), 1ULL << lgAuxArrInts, 0, all);
|
|
269
266
|
}
|
|
270
267
|
|
|
271
268
|
template<typename A>
|
|
272
269
|
coupon_iterator<A> AuxHashMap<A>::end() const {
|
|
273
|
-
return coupon_iterator<A>(entries.data(),
|
|
270
|
+
return coupon_iterator<A>(entries.data(), 1ULL << lgAuxArrInts, 1ULL << lgAuxArrInts, false);
|
|
274
271
|
}
|
|
275
272
|
|
|
276
273
|
}
|
|
@@ -31,49 +31,49 @@ namespace datasketches {
|
|
|
31
31
|
template<typename A>
|
|
32
32
|
class AuxHashMap final {
|
|
33
33
|
public:
|
|
34
|
-
AuxHashMap(
|
|
35
|
-
static AuxHashMap* newAuxHashMap(
|
|
34
|
+
AuxHashMap(uint8_t lgAuxArrInts, uint8_t lgConfigK, const A& allocator);
|
|
35
|
+
static AuxHashMap* newAuxHashMap(uint8_t lgAuxArrInts, uint8_t lgConfigK, const A& allocator);
|
|
36
36
|
static AuxHashMap* newAuxHashMap(const AuxHashMap<A>& that);
|
|
37
37
|
|
|
38
38
|
static AuxHashMap* deserialize(const void* bytes, size_t len,
|
|
39
|
-
|
|
40
|
-
|
|
39
|
+
uint8_t lgConfigK,
|
|
40
|
+
uint32_t auxCount, uint8_t lgAuxArrInts,
|
|
41
41
|
bool srcCompact, const A& allocator);
|
|
42
|
-
static AuxHashMap* deserialize(std::istream& is,
|
|
43
|
-
|
|
42
|
+
static AuxHashMap* deserialize(std::istream& is, uint8_t lgConfigK,
|
|
43
|
+
uint32_t auxCount, uint8_t lgAuxArrInts,
|
|
44
44
|
bool srcCompact, const A& allocator);
|
|
45
45
|
virtual ~AuxHashMap() = default;
|
|
46
46
|
static std::function<void(AuxHashMap<A>*)> make_deleter();
|
|
47
47
|
|
|
48
48
|
AuxHashMap* copy() const;
|
|
49
|
-
|
|
50
|
-
|
|
49
|
+
uint32_t getUpdatableSizeBytes() const;
|
|
50
|
+
uint32_t getCompactSizeBytes() const;
|
|
51
51
|
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
52
|
+
uint32_t getAuxCount() const;
|
|
53
|
+
uint32_t* getAuxIntArr();
|
|
54
|
+
uint8_t getLgAuxArrInts() const;
|
|
55
55
|
|
|
56
56
|
coupon_iterator<A> begin(bool all = false) const;
|
|
57
57
|
coupon_iterator<A> end() const;
|
|
58
58
|
|
|
59
|
-
void mustAdd(
|
|
60
|
-
|
|
61
|
-
void mustReplace(
|
|
59
|
+
void mustAdd(uint32_t slotNo, uint8_t value);
|
|
60
|
+
uint8_t mustFindValueFor(uint32_t slotNo) const;
|
|
61
|
+
void mustReplace(uint32_t slotNo, uint8_t value);
|
|
62
62
|
|
|
63
63
|
private:
|
|
64
64
|
typedef typename std::allocator_traits<A>::template rebind_alloc<AuxHashMap<A>> ahmAlloc;
|
|
65
65
|
|
|
66
|
-
using vector_int = std::vector<
|
|
66
|
+
using vector_int = std::vector<uint32_t, typename std::allocator_traits<A>::template rebind_alloc<uint32_t>>;
|
|
67
67
|
|
|
68
68
|
// static so it can be used when resizing
|
|
69
|
-
static
|
|
69
|
+
static int32_t find(const uint32_t* auxArr, uint8_t lgAuxArrInts, uint8_t lgConfigK, uint32_t slotNo);
|
|
70
70
|
|
|
71
71
|
void checkGrow();
|
|
72
72
|
void growAuxSpace();
|
|
73
73
|
|
|
74
|
-
const
|
|
75
|
-
|
|
76
|
-
|
|
74
|
+
const uint8_t lgConfigK;
|
|
75
|
+
uint8_t lgAuxArrInts;
|
|
76
|
+
uint32_t auxCount;
|
|
77
77
|
vector_int entries;
|
|
78
78
|
};
|
|
79
79
|
|
|
@@ -27,30 +27,30 @@
|
|
|
27
27
|
|
|
28
28
|
namespace datasketches {
|
|
29
29
|
|
|
30
|
-
static const
|
|
30
|
+
static const uint32_t numXArrValues = 257;
|
|
31
31
|
|
|
32
32
|
/**
|
|
33
33
|
* 18 Values, index 0 is LgK = 4, index 17 is LgK = 21.
|
|
34
34
|
*/
|
|
35
|
-
static const
|
|
35
|
+
static const uint32_t yStrides[] =
|
|
36
36
|
{1, 2, 3, 5, 10, 20, 40, 80, 160, 320, 640, 1280, 2560, 5120, 10240, 20480, 40960, 81920};
|
|
37
37
|
|
|
38
38
|
template<typename A>
|
|
39
|
-
|
|
40
|
-
if (logK <
|
|
41
|
-
throw std::invalid_argument("logK must be in range [" + std::to_string(
|
|
42
|
-
+ ", " + std::to_string(
|
|
39
|
+
uint32_t CompositeInterpolationXTable<A>::get_y_stride(uint8_t logK) {
|
|
40
|
+
if (logK < hll_constants::MIN_LOG_K || logK > hll_constants::MAX_LOG_K) {
|
|
41
|
+
throw std::invalid_argument("logK must be in range [" + std::to_string(hll_constants::MIN_LOG_K)
|
|
42
|
+
+ ", " + std::to_string(hll_constants::MAX_LOG_K) + "]. Found: "
|
|
43
43
|
+ std::to_string(logK));
|
|
44
44
|
}
|
|
45
|
-
return yStrides[logK -
|
|
45
|
+
return yStrides[logK - hll_constants::MIN_LOG_K];
|
|
46
46
|
}
|
|
47
47
|
|
|
48
48
|
template<typename A>
|
|
49
|
-
|
|
49
|
+
uint32_t CompositeInterpolationXTable<A>::get_x_arr_length() {
|
|
50
50
|
return numXArrValues;
|
|
51
51
|
}
|
|
52
52
|
|
|
53
|
-
static const double
|
|
53
|
+
static const double xArray[18][numXArrValues] = {
|
|
54
54
|
{
|
|
55
55
|
10.767999803534, 11.237701481774, 11.722738717438, 12.223246391222,
|
|
56
56
|
12.739366773787, 13.271184824495, 13.818759686650, 14.382159835785,
|
|
@@ -797,15 +797,15 @@ static const double xArr[18][numXArrValues] = {
|
|
|
797
797
|
};
|
|
798
798
|
|
|
799
799
|
template<typename A>
|
|
800
|
-
const double* CompositeInterpolationXTable<A>::get_x_arr(
|
|
801
|
-
if (logK <
|
|
802
|
-
throw std::invalid_argument("logK must be in range [" + std::to_string(
|
|
803
|
-
+ ", " + std::to_string(
|
|
800
|
+
const double* CompositeInterpolationXTable<A>::get_x_arr(uint8_t logK) {
|
|
801
|
+
if (logK < hll_constants::MIN_LOG_K || logK > hll_constants::MAX_LOG_K) {
|
|
802
|
+
throw std::invalid_argument("logK must be in range [" + std::to_string(hll_constants::MIN_LOG_K)
|
|
803
|
+
+ ", " + std::to_string(hll_constants::MAX_LOG_K) + "]. Found: "
|
|
804
804
|
+ std::to_string(logK));
|
|
805
805
|
}
|
|
806
|
-
return
|
|
806
|
+
return xArray[logK - hll_constants::MIN_LOG_K];
|
|
807
807
|
}
|
|
808
808
|
|
|
809
809
|
}
|
|
810
810
|
|
|
811
|
-
#endif // _COMPOSITEINTERPOLATIONXTABLE_INTERNAL_HPP_
|
|
811
|
+
#endif // _COMPOSITEINTERPOLATIONXTABLE_INTERNAL_HPP_
|