datasketches 0.3.1 → 0.3.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +4 -0
- data/ext/datasketches/cpc_wrapper.cpp +1 -1
- data/lib/datasketches/version.rb +1 -1
- data/vendor/datasketches-cpp/CMakeLists.txt +22 -20
- data/vendor/datasketches-cpp/NOTICE +1 -1
- data/vendor/datasketches-cpp/common/include/MurmurHash3.h +25 -27
- data/vendor/datasketches-cpp/common/include/common_defs.hpp +8 -6
- data/vendor/datasketches-cpp/common/include/count_zeros.hpp +11 -0
- data/vendor/datasketches-cpp/common/include/memory_operations.hpp +5 -4
- data/vendor/datasketches-cpp/common/test/CMakeLists.txt +1 -1
- data/vendor/datasketches-cpp/common/test/integration_test.cpp +6 -0
- data/vendor/datasketches-cpp/count/CMakeLists.txt +42 -0
- data/vendor/datasketches-cpp/count/include/count_min.hpp +351 -0
- data/vendor/datasketches-cpp/count/include/count_min_impl.hpp +517 -0
- data/vendor/datasketches-cpp/count/test/CMakeLists.txt +43 -0
- data/vendor/datasketches-cpp/count/test/count_min_allocation_test.cpp +155 -0
- data/vendor/datasketches-cpp/count/test/count_min_test.cpp +306 -0
- data/vendor/datasketches-cpp/cpc/include/cpc_confidence.hpp +3 -3
- data/vendor/datasketches-cpp/cpc/include/cpc_sketch_impl.hpp +1 -1
- data/vendor/datasketches-cpp/cpc/include/cpc_util.hpp +16 -8
- data/vendor/datasketches-cpp/density/CMakeLists.txt +42 -0
- data/vendor/datasketches-cpp/density/include/density_sketch.hpp +236 -0
- data/vendor/datasketches-cpp/density/include/density_sketch_impl.hpp +543 -0
- data/vendor/datasketches-cpp/density/test/CMakeLists.txt +35 -0
- data/vendor/datasketches-cpp/density/test/density_sketch_test.cpp +244 -0
- data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map.hpp +9 -3
- data/vendor/datasketches-cpp/hll/include/Hll4Array-internal.hpp +19 -11
- data/vendor/datasketches-cpp/hll/include/Hll4Array.hpp +2 -5
- data/vendor/datasketches-cpp/hll/include/Hll6Array-internal.hpp +19 -7
- data/vendor/datasketches-cpp/hll/include/Hll6Array.hpp +1 -1
- data/vendor/datasketches-cpp/hll/include/Hll8Array-internal.hpp +98 -42
- data/vendor/datasketches-cpp/hll/include/Hll8Array.hpp +2 -0
- data/vendor/datasketches-cpp/hll/include/HllArray-internal.hpp +92 -59
- data/vendor/datasketches-cpp/hll/include/HllArray.hpp +16 -6
- data/vendor/datasketches-cpp/hll/include/HllSketchImplFactory.hpp +3 -21
- data/vendor/datasketches-cpp/hll/include/HllUnion-internal.hpp +8 -0
- data/vendor/datasketches-cpp/hll/include/HllUtil.hpp +14 -6
- data/vendor/datasketches-cpp/hll/include/coupon_iterator-internal.hpp +1 -1
- data/vendor/datasketches-cpp/hll/include/coupon_iterator.hpp +8 -2
- data/vendor/datasketches-cpp/hll/include/hll.hpp +9 -8
- data/vendor/datasketches-cpp/hll/test/HllUnionTest.cpp +7 -1
- data/vendor/datasketches-cpp/kll/include/kll_helper.hpp +0 -1
- data/vendor/datasketches-cpp/kll/include/kll_sketch.hpp +8 -3
- data/vendor/datasketches-cpp/kll/include/kll_sketch_impl.hpp +2 -2
- data/vendor/datasketches-cpp/kll/test/kll_sketch_test.cpp +2 -2
- data/vendor/datasketches-cpp/python/CMakeLists.txt +6 -0
- data/vendor/datasketches-cpp/python/README.md +5 -5
- data/vendor/datasketches-cpp/python/datasketches/DensityWrapper.py +87 -0
- data/vendor/datasketches-cpp/python/datasketches/KernelFunction.py +35 -0
- data/vendor/datasketches-cpp/python/datasketches/PySerDe.py +15 -9
- data/vendor/datasketches-cpp/python/datasketches/TuplePolicy.py +77 -0
- data/vendor/datasketches-cpp/python/datasketches/TupleWrapper.py +205 -0
- data/vendor/datasketches-cpp/python/datasketches/__init__.py +17 -1
- data/vendor/datasketches-cpp/python/include/kernel_function.hpp +98 -0
- data/vendor/datasketches-cpp/python/include/py_object_lt.hpp +37 -0
- data/vendor/datasketches-cpp/python/include/py_object_ostream.hpp +48 -0
- data/vendor/datasketches-cpp/python/include/quantile_conditional.hpp +104 -0
- data/vendor/datasketches-cpp/python/include/tuple_policy.hpp +136 -0
- data/vendor/datasketches-cpp/python/src/count_wrapper.cpp +101 -0
- data/vendor/datasketches-cpp/python/src/cpc_wrapper.cpp +16 -30
- data/vendor/datasketches-cpp/python/src/datasketches.cpp +6 -0
- data/vendor/datasketches-cpp/python/src/density_wrapper.cpp +95 -0
- data/vendor/datasketches-cpp/python/src/fi_wrapper.cpp +127 -73
- data/vendor/datasketches-cpp/python/src/hll_wrapper.cpp +28 -36
- data/vendor/datasketches-cpp/python/src/kll_wrapper.cpp +108 -160
- data/vendor/datasketches-cpp/python/src/py_serde.cpp +5 -4
- data/vendor/datasketches-cpp/python/src/quantiles_wrapper.cpp +99 -148
- data/vendor/datasketches-cpp/python/src/req_wrapper.cpp +117 -178
- data/vendor/datasketches-cpp/python/src/theta_wrapper.cpp +67 -73
- data/vendor/datasketches-cpp/python/src/tuple_wrapper.cpp +215 -0
- data/vendor/datasketches-cpp/python/src/vo_wrapper.cpp +1 -1
- data/vendor/datasketches-cpp/python/tests/count_min_test.py +86 -0
- data/vendor/datasketches-cpp/python/tests/cpc_test.py +10 -10
- data/vendor/datasketches-cpp/python/tests/density_test.py +93 -0
- data/vendor/datasketches-cpp/python/tests/fi_test.py +41 -2
- data/vendor/datasketches-cpp/python/tests/hll_test.py +19 -20
- data/vendor/datasketches-cpp/python/tests/kll_test.py +40 -6
- data/vendor/datasketches-cpp/python/tests/quantiles_test.py +39 -5
- data/vendor/datasketches-cpp/python/tests/req_test.py +38 -5
- data/vendor/datasketches-cpp/python/tests/theta_test.py +16 -14
- data/vendor/datasketches-cpp/python/tests/tuple_test.py +206 -0
- data/vendor/datasketches-cpp/python/tests/vo_test.py +7 -0
- data/vendor/datasketches-cpp/quantiles/include/quantiles_sketch.hpp +8 -3
- data/vendor/datasketches-cpp/quantiles/include/quantiles_sketch_impl.hpp +4 -4
- data/vendor/datasketches-cpp/quantiles/test/quantiles_sketch_test.cpp +1 -1
- data/vendor/datasketches-cpp/req/include/req_compactor_impl.hpp +0 -2
- data/vendor/datasketches-cpp/req/include/req_sketch.hpp +8 -3
- data/vendor/datasketches-cpp/req/include/req_sketch_impl.hpp +2 -2
- data/vendor/datasketches-cpp/sampling/include/var_opt_sketch.hpp +20 -6
- data/vendor/datasketches-cpp/sampling/include/var_opt_sketch_impl.hpp +30 -16
- data/vendor/datasketches-cpp/sampling/include/var_opt_union.hpp +5 -1
- data/vendor/datasketches-cpp/sampling/include/var_opt_union_impl.hpp +19 -15
- data/vendor/datasketches-cpp/sampling/test/var_opt_sketch_test.cpp +33 -14
- data/vendor/datasketches-cpp/sampling/test/var_opt_union_test.cpp +0 -2
- data/vendor/datasketches-cpp/setup.py +1 -1
- data/vendor/datasketches-cpp/theta/CMakeLists.txt +1 -0
- data/vendor/datasketches-cpp/theta/include/bit_packing.hpp +6279 -0
- data/vendor/datasketches-cpp/theta/include/compact_theta_sketch_parser.hpp +14 -8
- data/vendor/datasketches-cpp/theta/include/compact_theta_sketch_parser_impl.hpp +60 -46
- data/vendor/datasketches-cpp/theta/include/theta_helpers.hpp +4 -2
- data/vendor/datasketches-cpp/theta/include/theta_sketch.hpp +58 -10
- data/vendor/datasketches-cpp/theta/include/theta_sketch_impl.hpp +430 -130
- data/vendor/datasketches-cpp/theta/include/theta_union_base_impl.hpp +9 -9
- data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base.hpp +16 -4
- data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base_impl.hpp +2 -2
- data/vendor/datasketches-cpp/theta/test/CMakeLists.txt +1 -0
- data/vendor/datasketches-cpp/theta/test/bit_packing_test.cpp +80 -0
- data/vendor/datasketches-cpp/theta/test/theta_sketch_test.cpp +42 -3
- data/vendor/datasketches-cpp/theta/test/theta_union_test.cpp +25 -0
- data/vendor/datasketches-cpp/tuple/include/tuple_sketch_impl.hpp +2 -1
- data/vendor/datasketches-cpp/version.cfg.in +1 -1
- metadata +31 -3
@@ -41,7 +41,7 @@ void theta_union_base<EN, EK, P, S, CS, A>::update(SS&& sketch) {
|
|
41
41
|
if (sketch.is_empty()) return;
|
42
42
|
if (sketch.get_seed_hash() != compute_seed_hash(table_.seed_)) throw std::invalid_argument("seed hash mismatch");
|
43
43
|
table_.is_empty_ = false;
|
44
|
-
|
44
|
+
union_theta_ = std::min(union_theta_, sketch.get_theta64());
|
45
45
|
for (auto& entry: sketch) {
|
46
46
|
const uint64_t hash = EK()(entry);
|
47
47
|
if (hash < union_theta_ && hash < table_.theta_) {
|
@@ -55,7 +55,7 @@ void theta_union_base<EN, EK, P, S, CS, A>::update(SS&& sketch) {
|
|
55
55
|
if (sketch.is_ordered()) break; // early stop
|
56
56
|
}
|
57
57
|
}
|
58
|
-
|
58
|
+
union_theta_ = std::min(union_theta_, table_.theta_);
|
59
59
|
}
|
60
60
|
|
61
61
|
template<typename EN, typename EK, typename P, typename S, typename CS, typename A>
|
@@ -65,16 +65,16 @@ CS theta_union_base<EN, EK, P, S, CS, A>::get_result(bool ordered) const {
|
|
65
65
|
entries.reserve(table_.num_entries_);
|
66
66
|
uint64_t theta = std::min(union_theta_, table_.theta_);
|
67
67
|
const uint32_t nominal_num = 1 << table_.lg_nom_size_;
|
68
|
-
if (union_theta_ >=
|
68
|
+
if (union_theta_ >= table_.theta_) {
|
69
69
|
std::copy_if(table_.begin(), table_.end(), std::back_inserter(entries), key_not_zero<EN, EK>());
|
70
70
|
} else {
|
71
71
|
std::copy_if(table_.begin(), table_.end(), std::back_inserter(entries), key_not_zero_less_than<uint64_t, EN, EK>(theta));
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
72
|
+
}
|
73
|
+
if (entries.size() > nominal_num) {
|
74
|
+
std::nth_element(entries.begin(), entries.begin() + nominal_num, entries.end(), comparator());
|
75
|
+
theta = EK()(entries[nominal_num]);
|
76
|
+
entries.erase(entries.begin() + nominal_num, entries.end());
|
77
|
+
entries.shrink_to_fit();
|
78
78
|
}
|
79
79
|
if (ordered) std::sort(entries.begin(), entries.end(), comparator());
|
80
80
|
return CS(table_.is_empty_, ordered, compute_seed_hash(table_.seed_), theta, std::move(entries));
|
@@ -23,8 +23,8 @@
|
|
23
23
|
#include <vector>
|
24
24
|
#include <climits>
|
25
25
|
#include <cmath>
|
26
|
+
#include <iterator>
|
26
27
|
|
27
|
-
#include "common_defs.hpp"
|
28
28
|
#include "MurmurHash3.h"
|
29
29
|
#include "theta_comparators.hpp"
|
30
30
|
#include "theta_constants.hpp"
|
@@ -185,8 +185,14 @@ static inline uint64_t compute_hash(const void* data, size_t length, uint64_t se
|
|
185
185
|
// iterators
|
186
186
|
|
187
187
|
template<typename Entry, typename ExtractKey>
|
188
|
-
class theta_iterator
|
188
|
+
class theta_iterator {
|
189
189
|
public:
|
190
|
+
using iterator_category = std::input_iterator_tag;
|
191
|
+
using value_type = Entry;
|
192
|
+
using difference_type = std::ptrdiff_t;
|
193
|
+
using pointer = Entry*;
|
194
|
+
using reference = Entry&;
|
195
|
+
|
190
196
|
theta_iterator(Entry* entries, uint32_t size, uint32_t index);
|
191
197
|
theta_iterator& operator++();
|
192
198
|
theta_iterator operator++(int);
|
@@ -201,14 +207,20 @@ private:
|
|
201
207
|
};
|
202
208
|
|
203
209
|
template<typename Entry, typename ExtractKey>
|
204
|
-
class theta_const_iterator
|
210
|
+
class theta_const_iterator {
|
205
211
|
public:
|
212
|
+
using iterator_category = std::input_iterator_tag;
|
213
|
+
using value_type = const Entry;
|
214
|
+
using difference_type = std::ptrdiff_t;
|
215
|
+
using pointer = const Entry*;
|
216
|
+
using reference = const Entry&;
|
217
|
+
|
206
218
|
theta_const_iterator(const Entry* entries, uint32_t size, uint32_t index);
|
207
219
|
theta_const_iterator& operator++();
|
208
220
|
theta_const_iterator operator++(int);
|
209
221
|
bool operator==(const theta_const_iterator& other) const;
|
210
222
|
bool operator!=(const theta_const_iterator& other) const;
|
211
|
-
|
223
|
+
reference operator*() const;
|
212
224
|
|
213
225
|
private:
|
214
226
|
const Entry* entries_;
|
@@ -188,7 +188,7 @@ auto theta_update_sketch_base<EN, EK, A>::begin() const -> iterator {
|
|
188
188
|
|
189
189
|
template<typename EN, typename EK, typename A>
|
190
190
|
auto theta_update_sketch_base<EN, EK, A>::end() const -> iterator {
|
191
|
-
return
|
191
|
+
return entries_ + (1ULL << lg_cur_size_);
|
192
192
|
}
|
193
193
|
|
194
194
|
template<typename EN, typename EK, typename A>
|
@@ -382,7 +382,7 @@ bool theta_iterator<Entry, ExtractKey>::operator==(const theta_iterator& other)
|
|
382
382
|
}
|
383
383
|
|
384
384
|
template<typename Entry, typename ExtractKey>
|
385
|
-
auto theta_iterator<Entry, ExtractKey>::operator*() const ->
|
385
|
+
auto theta_iterator<Entry, ExtractKey>::operator*() const -> reference {
|
386
386
|
return entries_[index_];
|
387
387
|
}
|
388
388
|
|
@@ -0,0 +1,80 @@
|
|
1
|
+
/*
|
2
|
+
* Licensed to the Apache Software Foundation (ASF) under one
|
3
|
+
* or more contributor license agreements. See the NOTICE file
|
4
|
+
* distributed with this work for additional information
|
5
|
+
* regarding copyright ownership. The ASF licenses this file
|
6
|
+
* to you under the Apache License, Version 2.0 (the
|
7
|
+
* "License"); you may not use this file except in compliance
|
8
|
+
* with the License. You may obtain a copy of the License at
|
9
|
+
*
|
10
|
+
* http://www.apache.org/licenses/LICENSE-2.0
|
11
|
+
*
|
12
|
+
* Unless required by applicable law or agreed to in writing,
|
13
|
+
* software distributed under the License is distributed on an
|
14
|
+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
15
|
+
* KIND, either express or implied. See the License for the
|
16
|
+
* specific language governing permissions and limitations
|
17
|
+
* under the License.
|
18
|
+
*/
|
19
|
+
|
20
|
+
#include <catch2/catch.hpp>
|
21
|
+
#include <bit_packing.hpp>
|
22
|
+
|
23
|
+
namespace datasketches {
|
24
|
+
|
25
|
+
// for every number of bits from 1 to 63
|
26
|
+
// generate pseudo-random data, pack, unpack and compare
|
27
|
+
|
28
|
+
// inverse golden ratio (0.618.. of max uint64_t)
|
29
|
+
static const uint64_t IGOLDEN64 = 0x9e3779b97f4a7c13ULL;
|
30
|
+
|
31
|
+
TEST_CASE("pack unpack bits") {
|
32
|
+
for (uint8_t bits = 1; bits <= 63; ++bits) {
|
33
|
+
const uint64_t mask = (1ULL << bits) - 1;
|
34
|
+
std::vector<uint64_t> input(8, 0);
|
35
|
+
const uint64_t igolden64 = IGOLDEN64;
|
36
|
+
uint64_t value = 0xaa55aa55aa55aa55ULL; // arbitrary starting value
|
37
|
+
for (int i = 0; i < 8; ++i) {
|
38
|
+
input[i] = value & mask;
|
39
|
+
value += igolden64;
|
40
|
+
}
|
41
|
+
std::vector<uint8_t> bytes(8 * sizeof(uint64_t), 0);
|
42
|
+
uint8_t offset = 0;
|
43
|
+
uint8_t* ptr = bytes.data();
|
44
|
+
for (int i = 0; i < 8; ++i) {
|
45
|
+
offset = pack_bits(input[i], bits, ptr, offset);
|
46
|
+
}
|
47
|
+
|
48
|
+
std::vector<uint64_t> output(8, 0);
|
49
|
+
offset = 0;
|
50
|
+
const uint8_t* cptr = bytes.data();
|
51
|
+
for (int i = 0; i < 8; ++i) {
|
52
|
+
offset = unpack_bits(output[i], bits, cptr, offset);
|
53
|
+
}
|
54
|
+
for (int i = 0; i < 8; ++i) {
|
55
|
+
REQUIRE((input[i] & mask) == output[i]);
|
56
|
+
}
|
57
|
+
}
|
58
|
+
}
|
59
|
+
|
60
|
+
TEST_CASE("pack unpack blocks") {
|
61
|
+
for (uint8_t bits = 1; bits <= 63; ++bits) {
|
62
|
+
const uint64_t mask = (1ULL << bits) - 1;
|
63
|
+
std::vector<uint64_t> input(8, 0);
|
64
|
+
const uint64_t igolden64 = IGOLDEN64;
|
65
|
+
uint64_t value = 0xaa55aa55aa55aa55ULL; // arbitrary starting value
|
66
|
+
for (int i = 0; i < 8; ++i) {
|
67
|
+
input[i] = value & mask;
|
68
|
+
value += igolden64;
|
69
|
+
}
|
70
|
+
std::vector<uint8_t> bytes(8 * sizeof(uint64_t), 0);
|
71
|
+
pack_bits_block8(input.data(), bytes.data(), bits);
|
72
|
+
std::vector<uint64_t> output(8, 0);
|
73
|
+
unpack_bits_block8(output.data(), bytes.data(), bits);
|
74
|
+
for (int i = 0; i < 8; ++i) {
|
75
|
+
REQUIRE((input[i] & mask) == output[i]);
|
76
|
+
}
|
77
|
+
}
|
78
|
+
}
|
79
|
+
|
80
|
+
} /* namespace datasketches */
|
@@ -607,7 +607,7 @@ TEST_CASE("theta sketch: wrap compact estimation from java", "[theta_sketch]") {
|
|
607
607
|
compact_theta_sketch compact_sketch = update_sketch.compact();
|
608
608
|
// the sketches are ordered, so the iteration sequence must match exactly
|
609
609
|
auto iter = sketch.begin();
|
610
|
-
for (const auto
|
610
|
+
for (const auto key: compact_sketch) {
|
611
611
|
REQUIRE(*iter == key);
|
612
612
|
++iter;
|
613
613
|
}
|
@@ -652,7 +652,7 @@ TEST_CASE("theta sketch: wrap compact v1 estimation from java", "[theta_sketch]"
|
|
652
652
|
compact_theta_sketch compact_sketch = update_sketch.compact();
|
653
653
|
// the sketches are ordered, so the iteration sequence must match exactly
|
654
654
|
auto iter = sketch.begin();
|
655
|
-
for (const auto
|
655
|
+
for (const auto key: compact_sketch) {
|
656
656
|
REQUIRE(*iter == key);
|
657
657
|
++iter;
|
658
658
|
}
|
@@ -697,7 +697,46 @@ TEST_CASE("theta sketch: wrap compact v2 estimation from java", "[theta_sketch]"
|
|
697
697
|
compact_theta_sketch compact_sketch = update_sketch.compact();
|
698
698
|
// the sketches are ordered, so the iteration sequence must match exactly
|
699
699
|
auto iter = sketch.begin();
|
700
|
-
for (const auto
|
700
|
+
for (const auto key: compact_sketch) {
|
701
|
+
REQUIRE(*iter == key);
|
702
|
+
++iter;
|
703
|
+
}
|
704
|
+
}
|
705
|
+
|
706
|
+
TEST_CASE("theta sketch: serialize deserialize compressed", "[theta_sketch]") {
|
707
|
+
auto update_sketch = update_theta_sketch::builder().build();
|
708
|
+
for (int i = 0; i < 10000; i++) update_sketch.update(i);
|
709
|
+
auto compact_sketch = update_sketch.compact();
|
710
|
+
|
711
|
+
auto bytes = compact_sketch.serialize_compressed();
|
712
|
+
{ // deserialize bytes
|
713
|
+
auto deserialized_sketch = compact_theta_sketch::deserialize(bytes.data(), bytes.size());
|
714
|
+
REQUIRE(deserialized_sketch.get_num_retained() == compact_sketch.get_num_retained());
|
715
|
+
REQUIRE(deserialized_sketch.get_theta() == compact_sketch.get_theta());
|
716
|
+
auto iter = deserialized_sketch.begin();
|
717
|
+
for (const auto key: compact_sketch) {
|
718
|
+
REQUIRE(*iter == key);
|
719
|
+
++iter;
|
720
|
+
}
|
721
|
+
}
|
722
|
+
{ // wrap bytes
|
723
|
+
auto wrapped_sketch = wrapped_compact_theta_sketch::wrap(bytes.data(), bytes.size());
|
724
|
+
REQUIRE(wrapped_sketch.get_num_retained() == compact_sketch.get_num_retained());
|
725
|
+
REQUIRE(wrapped_sketch.get_theta() == compact_sketch.get_theta());
|
726
|
+
auto iter = wrapped_sketch.begin();
|
727
|
+
for (const auto key: compact_sketch) {
|
728
|
+
REQUIRE(*iter == key);
|
729
|
+
++iter;
|
730
|
+
}
|
731
|
+
}
|
732
|
+
|
733
|
+
std::stringstream s(std::ios::in | std::ios::out | std::ios::binary);
|
734
|
+
compact_sketch.serialize_compressed(s);
|
735
|
+
auto deserialized_sketch = compact_theta_sketch::deserialize(s);
|
736
|
+
REQUIRE(deserialized_sketch.get_num_retained() == compact_sketch.get_num_retained());
|
737
|
+
REQUIRE(deserialized_sketch.get_theta() == compact_sketch.get_theta());
|
738
|
+
auto iter = deserialized_sketch.begin();
|
739
|
+
for (const auto key: compact_sketch) {
|
701
740
|
REQUIRE(*iter == key);
|
702
741
|
++iter;
|
703
742
|
}
|
@@ -128,4 +128,29 @@ TEST_CASE("theta union: seed mismatch", "[theta_union]") {
|
|
128
128
|
REQUIRE_THROWS_AS(u.update(sketch), std::invalid_argument);
|
129
129
|
}
|
130
130
|
|
131
|
+
TEST_CASE("theta union: larger K", "[theta_union]") {
|
132
|
+
auto update_sketch1 = datasketches::update_theta_sketch::builder().set_lg_k(14).build();
|
133
|
+
for(int i = 0; i < 16384; ++i) update_sketch1.update(i);
|
134
|
+
|
135
|
+
auto update_sketch2 = datasketches::update_theta_sketch::builder().set_lg_k(14).build();
|
136
|
+
for(int i = 0; i < 26384; ++i) update_sketch2.update(i);
|
137
|
+
|
138
|
+
auto update_sketch3 = datasketches::update_theta_sketch::builder().set_lg_k(14).build();
|
139
|
+
for(int i = 0; i < 86384; ++i) update_sketch3.update(i);
|
140
|
+
|
141
|
+
auto union1 = datasketches::theta_union::builder().set_lg_k(16).build();
|
142
|
+
union1.update(update_sketch2);
|
143
|
+
union1.update(update_sketch1);
|
144
|
+
union1.update(update_sketch3);
|
145
|
+
auto result1 = union1.get_result();
|
146
|
+
REQUIRE(result1.get_estimate() == update_sketch3.get_estimate());
|
147
|
+
|
148
|
+
auto union2 = datasketches::theta_union::builder().set_lg_k(16).build();
|
149
|
+
union2.update(update_sketch1);
|
150
|
+
union2.update(update_sketch3);
|
151
|
+
union2.update(update_sketch2);
|
152
|
+
auto result2 = union2.get_result();
|
153
|
+
REQUIRE(result2.get_estimate() == update_sketch3.get_estimate());
|
154
|
+
}
|
155
|
+
|
131
156
|
} /* namespace datasketches */
|
@@ -32,7 +32,8 @@ bool tuple_sketch<S, A>::is_estimation_mode() const {
|
|
32
32
|
|
33
33
|
template<typename S, typename A>
|
34
34
|
double tuple_sketch<S, A>::get_theta() const {
|
35
|
-
return static_cast<double>(get_theta64()) /
|
35
|
+
return static_cast<double>(get_theta64()) /
|
36
|
+
static_cast<double>(theta_constants::MAX_THETA);
|
36
37
|
}
|
37
38
|
|
38
39
|
template<typename S, typename A>
|
@@ -1 +1 @@
|
|
1
|
-
4.0
|
1
|
+
4.1.0
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: datasketches
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.3.
|
4
|
+
version: 0.3.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Andrew Kane
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2023-
|
11
|
+
date: 2023-05-03 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rice
|
@@ -76,6 +76,12 @@ files:
|
|
76
76
|
- vendor/datasketches-cpp/common/test/test_allocator.cpp
|
77
77
|
- vendor/datasketches-cpp/common/test/test_allocator.hpp
|
78
78
|
- vendor/datasketches-cpp/common/test/test_type.hpp
|
79
|
+
- vendor/datasketches-cpp/count/CMakeLists.txt
|
80
|
+
- vendor/datasketches-cpp/count/include/count_min.hpp
|
81
|
+
- vendor/datasketches-cpp/count/include/count_min_impl.hpp
|
82
|
+
- vendor/datasketches-cpp/count/test/CMakeLists.txt
|
83
|
+
- vendor/datasketches-cpp/count/test/count_min_allocation_test.cpp
|
84
|
+
- vendor/datasketches-cpp/count/test/count_min_test.cpp
|
79
85
|
- vendor/datasketches-cpp/cpc/CMakeLists.txt
|
80
86
|
- vendor/datasketches-cpp/cpc/include/compression_data.hpp
|
81
87
|
- vendor/datasketches-cpp/cpc/include/cpc_common.hpp
|
@@ -96,6 +102,11 @@ files:
|
|
96
102
|
- vendor/datasketches-cpp/cpc/test/cpc_sketch_allocation_test.cpp
|
97
103
|
- vendor/datasketches-cpp/cpc/test/cpc_sketch_test.cpp
|
98
104
|
- vendor/datasketches-cpp/cpc/test/cpc_union_test.cpp
|
105
|
+
- vendor/datasketches-cpp/density/CMakeLists.txt
|
106
|
+
- vendor/datasketches-cpp/density/include/density_sketch.hpp
|
107
|
+
- vendor/datasketches-cpp/density/include/density_sketch_impl.hpp
|
108
|
+
- vendor/datasketches-cpp/density/test/CMakeLists.txt
|
109
|
+
- vendor/datasketches-cpp/density/test/density_sketch_test.cpp
|
99
110
|
- vendor/datasketches-cpp/fi/CMakeLists.txt
|
100
111
|
- vendor/datasketches-cpp/fi/include/frequent_items_sketch.hpp
|
101
112
|
- vendor/datasketches-cpp/fi/include/frequent_items_sketch_impl.hpp
|
@@ -173,9 +184,18 @@ files:
|
|
173
184
|
- vendor/datasketches-cpp/pyproject.toml
|
174
185
|
- vendor/datasketches-cpp/python/CMakeLists.txt
|
175
186
|
- vendor/datasketches-cpp/python/README.md
|
187
|
+
- vendor/datasketches-cpp/python/datasketches/DensityWrapper.py
|
188
|
+
- vendor/datasketches-cpp/python/datasketches/KernelFunction.py
|
176
189
|
- vendor/datasketches-cpp/python/datasketches/PySerDe.py
|
190
|
+
- vendor/datasketches-cpp/python/datasketches/TuplePolicy.py
|
191
|
+
- vendor/datasketches-cpp/python/datasketches/TupleWrapper.py
|
177
192
|
- vendor/datasketches-cpp/python/datasketches/__init__.py
|
193
|
+
- vendor/datasketches-cpp/python/include/kernel_function.hpp
|
194
|
+
- vendor/datasketches-cpp/python/include/py_object_lt.hpp
|
195
|
+
- vendor/datasketches-cpp/python/include/py_object_ostream.hpp
|
178
196
|
- vendor/datasketches-cpp/python/include/py_serde.hpp
|
197
|
+
- vendor/datasketches-cpp/python/include/quantile_conditional.hpp
|
198
|
+
- vendor/datasketches-cpp/python/include/tuple_policy.hpp
|
179
199
|
- vendor/datasketches-cpp/python/jupyter/CPCSketch.ipynb
|
180
200
|
- vendor/datasketches-cpp/python/jupyter/FrequentItemsSketch.ipynb
|
181
201
|
- vendor/datasketches-cpp/python/jupyter/HLLSketch.ipynb
|
@@ -183,8 +203,10 @@ files:
|
|
183
203
|
- vendor/datasketches-cpp/python/jupyter/ThetaSketchNotebook.ipynb
|
184
204
|
- vendor/datasketches-cpp/python/pybind11Path.cmd
|
185
205
|
- vendor/datasketches-cpp/python/src/__init__.py
|
206
|
+
- vendor/datasketches-cpp/python/src/count_wrapper.cpp
|
186
207
|
- vendor/datasketches-cpp/python/src/cpc_wrapper.cpp
|
187
208
|
- vendor/datasketches-cpp/python/src/datasketches.cpp
|
209
|
+
- vendor/datasketches-cpp/python/src/density_wrapper.cpp
|
188
210
|
- vendor/datasketches-cpp/python/src/fi_wrapper.cpp
|
189
211
|
- vendor/datasketches-cpp/python/src/hll_wrapper.cpp
|
190
212
|
- vendor/datasketches-cpp/python/src/kll_wrapper.cpp
|
@@ -193,16 +215,20 @@ files:
|
|
193
215
|
- vendor/datasketches-cpp/python/src/quantiles_wrapper.cpp
|
194
216
|
- vendor/datasketches-cpp/python/src/req_wrapper.cpp
|
195
217
|
- vendor/datasketches-cpp/python/src/theta_wrapper.cpp
|
218
|
+
- vendor/datasketches-cpp/python/src/tuple_wrapper.cpp
|
196
219
|
- vendor/datasketches-cpp/python/src/vector_of_kll.cpp
|
197
220
|
- vendor/datasketches-cpp/python/src/vo_wrapper.cpp
|
198
221
|
- vendor/datasketches-cpp/python/tests/__init__.py
|
222
|
+
- vendor/datasketches-cpp/python/tests/count_min_test.py
|
199
223
|
- vendor/datasketches-cpp/python/tests/cpc_test.py
|
224
|
+
- vendor/datasketches-cpp/python/tests/density_test.py
|
200
225
|
- vendor/datasketches-cpp/python/tests/fi_test.py
|
201
226
|
- vendor/datasketches-cpp/python/tests/hll_test.py
|
202
227
|
- vendor/datasketches-cpp/python/tests/kll_test.py
|
203
228
|
- vendor/datasketches-cpp/python/tests/quantiles_test.py
|
204
229
|
- vendor/datasketches-cpp/python/tests/req_test.py
|
205
230
|
- vendor/datasketches-cpp/python/tests/theta_test.py
|
231
|
+
- vendor/datasketches-cpp/python/tests/tuple_test.py
|
206
232
|
- vendor/datasketches-cpp/python/tests/vector_of_kll_test.py
|
207
233
|
- vendor/datasketches-cpp/python/tests/vo_test.py
|
208
234
|
- vendor/datasketches-cpp/quantiles/CMakeLists.txt
|
@@ -249,6 +275,7 @@ files:
|
|
249
275
|
- vendor/datasketches-cpp/sampling/test/varopt_union_double_sampling.sk
|
250
276
|
- vendor/datasketches-cpp/setup.py
|
251
277
|
- vendor/datasketches-cpp/theta/CMakeLists.txt
|
278
|
+
- vendor/datasketches-cpp/theta/include/bit_packing.hpp
|
252
279
|
- vendor/datasketches-cpp/theta/include/bounds_on_ratios_in_sampled_sets.hpp
|
253
280
|
- vendor/datasketches-cpp/theta/include/bounds_on_ratios_in_theta_sketched_sets.hpp
|
254
281
|
- vendor/datasketches-cpp/theta/include/compact_theta_sketch_parser.hpp
|
@@ -275,6 +302,7 @@ files:
|
|
275
302
|
- vendor/datasketches-cpp/theta/include/theta_update_sketch_base.hpp
|
276
303
|
- vendor/datasketches-cpp/theta/include/theta_update_sketch_base_impl.hpp
|
277
304
|
- vendor/datasketches-cpp/theta/test/CMakeLists.txt
|
305
|
+
- vendor/datasketches-cpp/theta/test/bit_packing_test.cpp
|
278
306
|
- vendor/datasketches-cpp/theta/test/theta_a_not_b_test.cpp
|
279
307
|
- vendor/datasketches-cpp/theta/test/theta_compact_empty_from_java.sk
|
280
308
|
- vendor/datasketches-cpp/theta/test/theta_compact_empty_from_java_v1.sk
|
@@ -342,7 +370,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
342
370
|
- !ruby/object:Gem::Version
|
343
371
|
version: '0'
|
344
372
|
requirements: []
|
345
|
-
rubygems_version: 3.4.
|
373
|
+
rubygems_version: 3.4.10
|
346
374
|
signing_key:
|
347
375
|
specification_version: 4
|
348
376
|
summary: Sketch data structures for Ruby
|