datasketches 0.2.0 → 0.2.4
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +16 -0
- data/LICENSE +40 -3
- data/NOTICE +1 -1
- data/README.md +7 -7
- data/ext/datasketches/extconf.rb +1 -1
- data/ext/datasketches/theta_wrapper.cpp +20 -4
- data/lib/datasketches/version.rb +1 -1
- data/vendor/datasketches-cpp/CMakeLists.txt +31 -3
- data/vendor/datasketches-cpp/LICENSE +40 -3
- data/vendor/datasketches-cpp/MANIFEST.in +3 -0
- data/vendor/datasketches-cpp/NOTICE +1 -1
- data/vendor/datasketches-cpp/README.md +76 -9
- data/vendor/datasketches-cpp/cmake/DataSketchesConfig.cmake.in +10 -0
- data/vendor/datasketches-cpp/common/CMakeLists.txt +14 -13
- data/vendor/datasketches-cpp/common/include/MurmurHash3.h +11 -7
- data/vendor/datasketches-cpp/common/include/binomial_bounds.hpp +8 -8
- data/vendor/datasketches-cpp/common/include/bounds_binomial_proportions.hpp +12 -15
- data/vendor/datasketches-cpp/common/include/common_defs.hpp +26 -0
- data/vendor/datasketches-cpp/common/include/conditional_forward.hpp +20 -8
- data/vendor/datasketches-cpp/common/include/count_zeros.hpp +2 -2
- data/vendor/datasketches-cpp/common/include/serde.hpp +7 -7
- data/vendor/datasketches-cpp/cpc/CMakeLists.txt +15 -35
- data/vendor/datasketches-cpp/cpc/include/cpc_common.hpp +10 -3
- data/vendor/datasketches-cpp/cpc/include/cpc_compressor.hpp +19 -19
- data/vendor/datasketches-cpp/cpc/include/cpc_compressor_impl.hpp +91 -89
- data/vendor/datasketches-cpp/cpc/include/cpc_sketch.hpp +15 -2
- data/vendor/datasketches-cpp/cpc/include/cpc_sketch_impl.hpp +126 -90
- data/vendor/datasketches-cpp/cpc/include/cpc_union.hpp +1 -1
- data/vendor/datasketches-cpp/cpc/include/cpc_union_impl.hpp +22 -20
- data/vendor/datasketches-cpp/cpc/include/cpc_util.hpp +10 -10
- data/vendor/datasketches-cpp/cpc/include/icon_estimator.hpp +4 -4
- data/vendor/datasketches-cpp/cpc/include/u32_table.hpp +8 -8
- data/vendor/datasketches-cpp/cpc/include/u32_table_impl.hpp +14 -14
- data/vendor/datasketches-cpp/cpc/test/compression_test.cpp +10 -10
- data/vendor/datasketches-cpp/cpc/test/cpc_sketch_allocation_test.cpp +17 -0
- data/vendor/datasketches-cpp/cpc/test/cpc_sketch_test.cpp +25 -0
- data/vendor/datasketches-cpp/cpc/test/cpc_union_test.cpp +1 -1
- data/vendor/datasketches-cpp/fi/CMakeLists.txt +5 -15
- data/vendor/datasketches-cpp/fi/include/frequent_items_sketch_impl.hpp +69 -82
- data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map_impl.hpp +10 -10
- data/vendor/datasketches-cpp/fi/test/reverse_purge_hash_map_test.cpp +2 -2
- data/vendor/datasketches-cpp/hll/CMakeLists.txt +33 -56
- data/vendor/datasketches-cpp/hll/include/AuxHashMap-internal.hpp +60 -63
- data/vendor/datasketches-cpp/hll/include/AuxHashMap.hpp +19 -19
- data/vendor/datasketches-cpp/hll/include/CompositeInterpolationXTable-internal.hpp +15 -15
- data/vendor/datasketches-cpp/hll/include/CompositeInterpolationXTable.hpp +3 -3
- data/vendor/datasketches-cpp/hll/include/CouponHashSet-internal.hpp +74 -76
- data/vendor/datasketches-cpp/hll/include/CouponHashSet.hpp +6 -6
- data/vendor/datasketches-cpp/hll/include/CouponList-internal.hpp +110 -113
- data/vendor/datasketches-cpp/hll/include/CouponList.hpp +13 -13
- data/vendor/datasketches-cpp/hll/include/CubicInterpolation-internal.hpp +2 -4
- data/vendor/datasketches-cpp/hll/include/HarmonicNumbers-internal.hpp +1 -1
- data/vendor/datasketches-cpp/hll/include/Hll4Array-internal.hpp +80 -76
- data/vendor/datasketches-cpp/hll/include/Hll4Array.hpp +9 -9
- data/vendor/datasketches-cpp/hll/include/Hll6Array-internal.hpp +26 -26
- data/vendor/datasketches-cpp/hll/include/Hll6Array.hpp +6 -6
- data/vendor/datasketches-cpp/hll/include/Hll8Array-internal.hpp +33 -33
- data/vendor/datasketches-cpp/hll/include/Hll8Array.hpp +6 -6
- data/vendor/datasketches-cpp/hll/include/HllArray-internal.hpp +205 -209
- data/vendor/datasketches-cpp/hll/include/HllArray.hpp +36 -36
- data/vendor/datasketches-cpp/hll/include/HllSketch-internal.hpp +34 -32
- data/vendor/datasketches-cpp/hll/include/HllSketchImpl-internal.hpp +22 -22
- data/vendor/datasketches-cpp/hll/include/HllSketchImpl.hpp +13 -13
- data/vendor/datasketches-cpp/hll/include/HllSketchImplFactory.hpp +15 -15
- data/vendor/datasketches-cpp/hll/include/HllUnion-internal.hpp +61 -61
- data/vendor/datasketches-cpp/hll/include/HllUtil.hpp +120 -127
- data/vendor/datasketches-cpp/hll/include/coupon_iterator-internal.hpp +9 -9
- data/vendor/datasketches-cpp/hll/include/coupon_iterator.hpp +5 -5
- data/vendor/datasketches-cpp/hll/include/hll.hpp +21 -21
- data/vendor/datasketches-cpp/hll/test/AuxHashMapTest.cpp +1 -1
- data/vendor/datasketches-cpp/hll/test/CouponHashSetTest.cpp +34 -34
- data/vendor/datasketches-cpp/hll/test/CouponListTest.cpp +25 -25
- data/vendor/datasketches-cpp/hll/test/CrossCountingTest.cpp +2 -2
- data/vendor/datasketches-cpp/hll/test/HllArrayTest.cpp +35 -35
- data/vendor/datasketches-cpp/hll/test/HllSketchTest.cpp +15 -15
- data/vendor/datasketches-cpp/hll/test/HllUnionTest.cpp +10 -14
- data/vendor/datasketches-cpp/hll/test/IsomorphicTest.cpp +3 -3
- data/vendor/datasketches-cpp/hll/test/ToFromByteArrayTest.cpp +4 -4
- data/vendor/datasketches-cpp/kll/CMakeLists.txt +9 -19
- data/vendor/datasketches-cpp/kll/include/kll_helper.hpp +5 -4
- data/vendor/datasketches-cpp/kll/include/kll_helper_impl.hpp +6 -6
- data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator.hpp +14 -6
- data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator_impl.hpp +39 -24
- data/vendor/datasketches-cpp/kll/include/kll_sketch.hpp +41 -4
- data/vendor/datasketches-cpp/kll/include/kll_sketch_impl.hpp +76 -64
- data/vendor/datasketches-cpp/kll/include/kolmogorov_smirnov.hpp +67 -0
- data/vendor/datasketches-cpp/kll/include/kolmogorov_smirnov_impl.hpp +78 -0
- data/vendor/datasketches-cpp/kll/test/CMakeLists.txt +1 -0
- data/vendor/datasketches-cpp/kll/test/kll_sketch_test.cpp +133 -46
- data/vendor/datasketches-cpp/kll/test/kolmogorov_smirnov_test.cpp +111 -0
- data/vendor/datasketches-cpp/pyproject.toml +4 -2
- data/vendor/datasketches-cpp/python/CMakeLists.txt +10 -6
- data/vendor/datasketches-cpp/python/README.md +50 -50
- data/vendor/datasketches-cpp/python/pybind11Path.cmd +3 -0
- data/vendor/datasketches-cpp/python/src/cpc_wrapper.cpp +1 -1
- data/vendor/datasketches-cpp/python/src/hll_wrapper.cpp +4 -4
- data/vendor/datasketches-cpp/python/src/kll_wrapper.cpp +1 -1
- data/vendor/datasketches-cpp/python/src/theta_wrapper.cpp +8 -8
- data/vendor/datasketches-cpp/python/src/vector_of_kll.cpp +11 -5
- data/vendor/datasketches-cpp/python/src/vo_wrapper.cpp +2 -2
- data/vendor/datasketches-cpp/python/tests/hll_test.py +1 -1
- data/vendor/datasketches-cpp/python/tests/kll_test.py +2 -2
- data/vendor/datasketches-cpp/python/tests/req_test.py +2 -2
- data/vendor/datasketches-cpp/python/tests/vector_of_kll_test.py +4 -4
- data/vendor/datasketches-cpp/python/tests/vo_test.py +3 -3
- data/vendor/datasketches-cpp/req/CMakeLists.txt +8 -21
- data/vendor/datasketches-cpp/req/include/req_common.hpp +2 -1
- data/vendor/datasketches-cpp/req/include/req_compactor.hpp +4 -4
- data/vendor/datasketches-cpp/req/include/req_compactor_impl.hpp +26 -39
- data/vendor/datasketches-cpp/req/include/req_sketch.hpp +1 -1
- data/vendor/datasketches-cpp/req/include/req_sketch_impl.hpp +13 -11
- data/vendor/datasketches-cpp/req/test/req_sketch_test.cpp +52 -52
- data/vendor/datasketches-cpp/sampling/CMakeLists.txt +5 -9
- data/vendor/datasketches-cpp/sampling/include/var_opt_sketch.hpp +10 -5
- data/vendor/datasketches-cpp/sampling/include/var_opt_sketch_impl.hpp +61 -64
- data/vendor/datasketches-cpp/sampling/include/var_opt_union_impl.hpp +42 -48
- data/vendor/datasketches-cpp/sampling/test/var_opt_sketch_test.cpp +6 -6
- data/vendor/datasketches-cpp/sampling/test/var_opt_union_test.cpp +13 -13
- data/vendor/datasketches-cpp/setup.py +10 -7
- data/vendor/datasketches-cpp/theta/CMakeLists.txt +26 -45
- data/vendor/datasketches-cpp/theta/include/bounds_on_ratios_in_sampled_sets.hpp +1 -1
- data/vendor/datasketches-cpp/theta/include/compact_theta_sketch_parser.hpp +67 -0
- data/vendor/datasketches-cpp/theta/include/compact_theta_sketch_parser_impl.hpp +137 -0
- data/vendor/datasketches-cpp/theta/include/theta_constants.hpp +9 -4
- data/vendor/datasketches-cpp/theta/include/theta_helpers.hpp +15 -0
- data/vendor/datasketches-cpp/theta/include/theta_intersection.hpp +9 -4
- data/vendor/datasketches-cpp/theta/include/theta_intersection_base_impl.hpp +6 -6
- data/vendor/datasketches-cpp/theta/include/theta_intersection_impl.hpp +1 -1
- data/vendor/datasketches-cpp/theta/include/theta_jaccard_similarity_base.hpp +18 -14
- data/vendor/datasketches-cpp/theta/include/theta_set_difference_base_impl.hpp +2 -2
- data/vendor/datasketches-cpp/theta/include/theta_sketch.hpp +73 -15
- data/vendor/datasketches-cpp/theta/include/theta_sketch_impl.hpp +247 -103
- data/vendor/datasketches-cpp/theta/include/theta_union.hpp +10 -5
- data/vendor/datasketches-cpp/theta/include/theta_union_base.hpp +3 -1
- data/vendor/datasketches-cpp/theta/include/theta_union_base_impl.hpp +9 -3
- data/vendor/datasketches-cpp/theta/include/theta_union_impl.hpp +8 -5
- data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base.hpp +11 -5
- data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base_impl.hpp +70 -37
- data/vendor/datasketches-cpp/theta/test/CMakeLists.txt +1 -0
- data/vendor/datasketches-cpp/theta/test/theta_a_not_b_test.cpp +23 -1
- data/vendor/datasketches-cpp/theta/test/theta_compact_empty_from_java_v1.sk +0 -0
- data/vendor/datasketches-cpp/theta/test/theta_compact_empty_from_java_v2.sk +0 -0
- data/vendor/datasketches-cpp/theta/test/theta_compact_estimation_from_java_v1.sk +0 -0
- data/vendor/datasketches-cpp/theta/test/theta_compact_estimation_from_java_v2.sk +0 -0
- data/vendor/datasketches-cpp/theta/test/theta_compact_exact_from_java.sk +0 -0
- data/vendor/datasketches-cpp/theta/test/theta_intersection_test.cpp +21 -1
- data/vendor/datasketches-cpp/theta/test/theta_jaccard_similarity_test.cpp +58 -2
- data/vendor/datasketches-cpp/theta/test/theta_setop_test.cpp +445 -0
- data/vendor/datasketches-cpp/theta/test/theta_sketch_test.cpp +437 -1
- data/vendor/datasketches-cpp/theta/test/theta_union_test.cpp +41 -9
- data/vendor/datasketches-cpp/tuple/CMakeLists.txt +18 -33
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_sketch.hpp +1 -1
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_sketch_impl.hpp +50 -63
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_union.hpp +1 -1
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_union_impl.hpp +3 -3
- data/vendor/datasketches-cpp/tuple/include/tuple_sketch.hpp +13 -9
- data/vendor/datasketches-cpp/tuple/include/tuple_sketch_impl.hpp +84 -78
- data/vendor/datasketches-cpp/tuple/include/tuple_union.hpp +6 -1
- data/vendor/datasketches-cpp/tuple/include/tuple_union_impl.hpp +8 -3
- data/vendor/datasketches-cpp/tuple/test/array_of_doubles_sketch_test.cpp +17 -1
- data/vendor/datasketches-cpp/tuple/test/tuple_a_not_b_test.cpp +17 -17
- data/vendor/datasketches-cpp/tuple/test/tuple_intersection_test.cpp +12 -12
- data/vendor/datasketches-cpp/tuple/test/tuple_jaccard_similarity_test.cpp +5 -5
- data/vendor/datasketches-cpp/tuple/test/tuple_sketch_allocation_test.cpp +1 -1
- data/vendor/datasketches-cpp/tuple/test/tuple_sketch_test.cpp +66 -28
- data/vendor/datasketches-cpp/tuple/test/tuple_union_test.cpp +19 -12
- metadata +18 -7
- data/vendor/datasketches-cpp/theta/test/theta_update_empty_from_java.sk +0 -0
- data/vendor/datasketches-cpp/theta/test/theta_update_estimation_from_java.sk +0 -0
@@ -27,7 +27,7 @@ namespace datasketches {
|
|
27
27
|
using hll_sketch_test_alloc = hll_sketch_alloc<test_allocator<uint8_t>>;
|
28
28
|
using alloc = test_allocator<uint8_t>;
|
29
29
|
|
30
|
-
static void runCheckCopy(
|
30
|
+
static void runCheckCopy(uint8_t lgConfigK, target_hll_type tgtHllType) {
|
31
31
|
hll_sketch_test_alloc sk(lgConfigK, tgtHllType, false, 0);
|
32
32
|
|
33
33
|
for (int i = 0; i < 7; ++i) {
|
@@ -66,7 +66,7 @@ TEST_CASE("hll sketch: check copies", "[hll_sketch]") {
|
|
66
66
|
}
|
67
67
|
|
68
68
|
static void copyAs(target_hll_type srcType, target_hll_type dstType) {
|
69
|
-
|
69
|
+
uint8_t lgK = 8;
|
70
70
|
int n1 = 7;
|
71
71
|
int n2 = 24;
|
72
72
|
int n3 = 1000;
|
@@ -109,7 +109,7 @@ TEST_CASE("hll sketch: check copy as", "[hll_sketch]") {
|
|
109
109
|
TEST_CASE("hll sketch: check misc1", "[hll_sketch]") {
|
110
110
|
test_allocator_total_bytes = 0;
|
111
111
|
{
|
112
|
-
|
112
|
+
uint8_t lgConfigK = 8;
|
113
113
|
target_hll_type srcType = target_hll_type::HLL_8;
|
114
114
|
hll_sketch_test_alloc sk(lgConfigK, srcType, false, 0);
|
115
115
|
|
@@ -124,7 +124,7 @@ TEST_CASE("hll sketch: check misc1", "[hll_sketch]") {
|
|
124
124
|
sk.update(24); // HLL
|
125
125
|
REQUIRE(sk.get_updatable_serialization_bytes() == 40 + 256);
|
126
126
|
|
127
|
-
const
|
127
|
+
const auto hllBytes = hll_constants::HLL_BYTE_ARR_START + (1 << lgConfigK);
|
128
128
|
REQUIRE(sk.get_compact_serialization_bytes() == hllBytes);
|
129
129
|
REQUIRE(hll_sketch::get_max_updatable_serialization_bytes(lgConfigK, HLL_8) == hllBytes);
|
130
130
|
}
|
@@ -135,22 +135,22 @@ TEST_CASE("hll sketch: check num std dev", "[hll_sketch]") {
|
|
135
135
|
REQUIRE_THROWS_AS(HllUtil<>::checkNumStdDev(0), std::invalid_argument);
|
136
136
|
}
|
137
137
|
|
138
|
-
void checkSerializationSizes(
|
138
|
+
void checkSerializationSizes(uint8_t lgConfigK, target_hll_type tgtHllType) {
|
139
139
|
hll_sketch_test_alloc sk(lgConfigK, tgtHllType, false, 0);
|
140
140
|
int i;
|
141
141
|
|
142
142
|
// LIST
|
143
143
|
for (i = 0; i < 7; ++i) { sk.update(i); }
|
144
|
-
|
144
|
+
auto expected = hll_constants::LIST_INT_ARR_START + (i << 2);
|
145
145
|
REQUIRE(sk.get_compact_serialization_bytes() == expected);
|
146
|
-
expected =
|
146
|
+
expected = hll_constants::LIST_INT_ARR_START + (4 << hll_constants::LG_INIT_LIST_SIZE);
|
147
147
|
REQUIRE(sk.get_updatable_serialization_bytes() == expected);
|
148
148
|
|
149
149
|
// SET
|
150
150
|
for (i = 7; i < 24; ++i) { sk.update(i); }
|
151
|
-
expected =
|
151
|
+
expected = hll_constants::HASH_SET_INT_ARR_START + (i << 2);
|
152
152
|
REQUIRE(sk.get_compact_serialization_bytes() == expected);
|
153
|
-
expected =
|
153
|
+
expected = hll_constants::HASH_SET_INT_ARR_START + (4 << hll_constants::LG_INIT_SET_SIZE);
|
154
154
|
REQUIRE(sk.get_updatable_serialization_bytes() == expected);
|
155
155
|
}
|
156
156
|
|
@@ -178,7 +178,7 @@ TEST_CASE("hll sketch: exercise to string", "[hll_sketch]") {
|
|
178
178
|
|
179
179
|
// Creates and serializes then deserializes sketch.
|
180
180
|
// Returns true if deserialized sketch is compact.
|
181
|
-
static bool checkCompact(
|
181
|
+
static bool checkCompact(uint8_t lgK, const int n, const target_hll_type type, bool compact) {
|
182
182
|
hll_sketch_test_alloc sk(lgK, type, false, 0);
|
183
183
|
for (int i = 0; i < n; ++i) { sk.update(i); }
|
184
184
|
|
@@ -201,7 +201,7 @@ static bool checkCompact(const int lgK, const int n, const target_hll_type type,
|
|
201
201
|
TEST_CASE("hll sketch: check compact flag", "[hll_sketch]") {
|
202
202
|
test_allocator_total_bytes = 0;
|
203
203
|
{
|
204
|
-
|
204
|
+
uint8_t lgK = 8;
|
205
205
|
// unless/until we create non-updatable "direct" versions,
|
206
206
|
// deserialized image should never be compact
|
207
207
|
// LIST: follows serialization request
|
@@ -230,10 +230,10 @@ TEST_CASE("hll sketch: check compact flag", "[hll_sketch]") {
|
|
230
230
|
TEST_CASE("hll sketch: check k limits", "[hll_sketch]") {
|
231
231
|
test_allocator_total_bytes = 0;
|
232
232
|
{
|
233
|
-
hll_sketch_test_alloc sketch1(
|
234
|
-
hll_sketch_test_alloc sketch2(
|
235
|
-
REQUIRE_THROWS_AS(hll_sketch_test_alloc(
|
236
|
-
REQUIRE_THROWS_AS(hll_sketch_test_alloc(
|
233
|
+
hll_sketch_test_alloc sketch1(hll_constants::MIN_LOG_K, target_hll_type::HLL_8, false, 0);
|
234
|
+
hll_sketch_test_alloc sketch2(hll_constants::MAX_LOG_K, target_hll_type::HLL_4, false, 0);
|
235
|
+
REQUIRE_THROWS_AS(hll_sketch_test_alloc(hll_constants::MIN_LOG_K - 1, target_hll_type::HLL_4, false, 0), std::invalid_argument);
|
236
|
+
REQUIRE_THROWS_AS(hll_sketch_test_alloc(hll_constants::MAX_LOG_K + 1, target_hll_type::HLL_4, false, 0), std::invalid_argument);
|
237
237
|
}
|
238
238
|
REQUIRE(test_allocator_total_bytes == 0);
|
239
239
|
}
|
@@ -24,23 +24,19 @@
|
|
24
24
|
|
25
25
|
namespace datasketches {
|
26
26
|
|
27
|
-
static int min(int a, int b) {
|
28
|
-
return (a < b) ? a : b;
|
29
|
-
}
|
30
|
-
|
31
27
|
static void println(std::string& str) {
|
32
28
|
//std::cout << str << "\n";
|
33
29
|
}
|
34
30
|
|
35
31
|
static void basicUnion(uint64_t n1, uint64_t n2,
|
36
|
-
|
32
|
+
uint8_t lgk1, uint8_t lgk2, uint8_t lgMaxK,
|
37
33
|
target_hll_type type1, target_hll_type type2, target_hll_type resultType) {
|
38
34
|
uint64_t v = 0;
|
39
35
|
//int tot = n1 + n2;
|
40
36
|
|
41
37
|
hll_sketch h1(lgk1, type1);
|
42
38
|
hll_sketch h2(lgk2, type2);
|
43
|
-
|
39
|
+
uint8_t lgControlK = std::min(std::min(lgk1, lgk2), lgMaxK);
|
44
40
|
hll_sketch control(lgControlK, resultType);
|
45
41
|
|
46
42
|
for (uint64_t i = 0; i < n1; ++i) {
|
@@ -89,9 +85,9 @@ TEST_CASE("hll union: check unions", "[hll_union]") {
|
|
89
85
|
target_hll_type type2 = HLL_8;
|
90
86
|
target_hll_type resultType = HLL_8;
|
91
87
|
|
92
|
-
|
93
|
-
|
94
|
-
|
88
|
+
uint8_t lgK1 = 7;
|
89
|
+
uint8_t lgK2 = 7;
|
90
|
+
uint8_t lgMaxK = 7;
|
95
91
|
uint64_t n1 = 7;
|
96
92
|
uint64_t n2 = 7;
|
97
93
|
basicUnion(n1, n2, lgK1, lgK2, lgMaxK, type1, type2, resultType);
|
@@ -108,7 +104,7 @@ TEST_CASE("hll union: check unions", "[hll_union]") {
|
|
108
104
|
n2 = 14;
|
109
105
|
basicUnion(n1, n2, lgK1, lgK2, lgMaxK, type1, type2, resultType);
|
110
106
|
|
111
|
-
|
107
|
+
uint8_t i = 0;
|
112
108
|
for (i = 7; i <= 13; ++i) {
|
113
109
|
lgK1 = i;
|
114
110
|
lgK2 = i;
|
@@ -184,9 +180,9 @@ TEST_CASE("hll union: check composite estimate", "[hll_union]") {
|
|
184
180
|
}
|
185
181
|
|
186
182
|
TEST_CASE("hll union: check config k limits", "[hll_union]") {
|
187
|
-
REQUIRE_THROWS_AS(hll_union(
|
183
|
+
REQUIRE_THROWS_AS(hll_union(hll_constants::MIN_LOG_K - 1), std::invalid_argument);
|
188
184
|
|
189
|
-
REQUIRE_THROWS_AS(hll_union(
|
185
|
+
REQUIRE_THROWS_AS(hll_union(hll_constants::MAX_LOG_K + 1), std::invalid_argument);
|
190
186
|
}
|
191
187
|
|
192
188
|
static double getBound(int lgK, bool ub, bool oooFlag, int numStdDev, double est) {
|
@@ -195,7 +191,7 @@ static double getBound(int lgK, bool ub, bool oooFlag, int numStdDev, double est
|
|
195
191
|
}
|
196
192
|
|
197
193
|
TEST_CASE("hll union: check ub lb", "[hll_union]") {
|
198
|
-
|
194
|
+
uint8_t lgK = 4;
|
199
195
|
int n = 1 << 20;
|
200
196
|
bool oooFlag = false;
|
201
197
|
|
@@ -223,7 +219,7 @@ TEST_CASE("hll union: check ub lb", "[hll_union]") {
|
|
223
219
|
}
|
224
220
|
|
225
221
|
TEST_CASE("hll union: check conversions", "[hll_union]") {
|
226
|
-
|
222
|
+
uint8_t lgK = 4;
|
227
223
|
hll_sketch sk1(lgK, HLL_8);
|
228
224
|
hll_sketch sk2(lgK, HLL_8);
|
229
225
|
int n = 1 << 20;
|
@@ -57,7 +57,7 @@ static int get_n(int lg_k, hll_mode mode) {
|
|
57
57
|
|
58
58
|
static long v = 0;
|
59
59
|
|
60
|
-
static hll_sketch build_sketch(
|
60
|
+
static hll_sketch build_sketch(uint8_t lg_k, target_hll_type hll_type, hll_mode mode) {
|
61
61
|
hll_sketch sk(lg_k, hll_type);
|
62
62
|
int n = get_n(lg_k, mode);
|
63
63
|
for (int i = 0; i < n; i++) sk.update(static_cast<uint64_t>(i + v));
|
@@ -67,7 +67,7 @@ static hll_sketch build_sketch(int lg_k, target_hll_type hll_type, hll_mode mode
|
|
67
67
|
|
68
68
|
// merges a sketch to an empty union and gets result of the same type, checks binary equivalence
|
69
69
|
static void union_one_update(bool compact) {
|
70
|
-
for (
|
70
|
+
for (uint8_t lg_k = 4; lg_k <= 21; lg_k++) { // all lg_k
|
71
71
|
for (int mode = 0; mode <= 2; mode++) { // List, Set, Hll
|
72
72
|
if ((lg_k < 8) && (mode == 1)) continue; // lg_k < 8 list transitions directly to HLL
|
73
73
|
for (int t = 0; t <= 2; t++) { // HLL_4, HLL_6, HLL_8
|
@@ -102,7 +102,7 @@ TEST_CASE("hll isomorphic: union one update serialize compact", "[hll_isomorphic
|
|
102
102
|
|
103
103
|
// converts a sketch to a different type and converts back to the original type to check binary equivalence
|
104
104
|
static void convert_back_and_forth(bool compact) {
|
105
|
-
for (
|
105
|
+
for (uint8_t lg_k = 4; lg_k <= 21; lg_k++) { // all lg_k
|
106
106
|
for (int mode = 0; mode <= 2; mode++) { // List, Set, Hll
|
107
107
|
if ((lg_k < 8) && (mode == 1)) continue; // lg_k < 8 list transitions directly to HLL
|
108
108
|
for (int t1 = 0; t1 <= 2; t1++) { // HLL_4, HLL_6, HLL_8
|
@@ -44,11 +44,11 @@ TEST_CASE("hll to/from byte array: double serialize", "[hll_byte_array]") {
|
|
44
44
|
auto ser2 = sk.serialize_updatable();
|
45
45
|
|
46
46
|
REQUIRE(ser1.size() == ser2.size());
|
47
|
-
|
47
|
+
size_t len = ser1.size();
|
48
48
|
uint8_t* b1 = ser1.data();
|
49
49
|
uint8_t* b2 = ser2.data();
|
50
50
|
|
51
|
-
for (
|
51
|
+
for (size_t i = 0; i < len; ++i) {
|
52
52
|
REQUIRE(b2[i] == b1[i]);
|
53
53
|
}
|
54
54
|
}
|
@@ -129,7 +129,7 @@ static void checkSketchEquality(hll_sketch& sk1, hll_sketch& sk2) {
|
|
129
129
|
REQUIRE(sk1.get_target_type() == sk2.get_target_type());
|
130
130
|
}
|
131
131
|
|
132
|
-
static void toFrom(const
|
132
|
+
static void toFrom(const uint8_t lgConfigK, const target_hll_type tgtHllType, const int n) {
|
133
133
|
hll_sketch src(lgConfigK, tgtHllType);
|
134
134
|
for (int i = 0; i < n; ++i) {
|
135
135
|
src.update(i);
|
@@ -157,7 +157,7 @@ static void toFrom(const int lgConfigK, const target_hll_type tgtHllType, const
|
|
157
157
|
TEST_CASE("hll to/from byte array: to from sketch", "[hll_byte_array]") {
|
158
158
|
for (int i = 0; i < 10; ++i) {
|
159
159
|
int n = nArr[i];
|
160
|
-
for (
|
160
|
+
for (uint8_t lgK = 4; lgK <= 13; ++lgK) {
|
161
161
|
toFrom(lgK, HLL_4, n);
|
162
162
|
toFrom(lgK, HLL_6, n);
|
163
163
|
toFrom(lgK, HLL_8, n);
|
@@ -32,27 +32,17 @@ target_include_directories(kll
|
|
32
32
|
target_link_libraries(kll INTERFACE common)
|
33
33
|
target_compile_features(kll INTERFACE cxx_std_11)
|
34
34
|
|
35
|
-
set(kll_HEADERS "")
|
36
|
-
list(APPEND kll_HEADERS "include/kll_sketch.hpp")
|
37
|
-
list(APPEND kll_HEADERS "include/kll_sketch_impl.hpp")
|
38
|
-
list(APPEND kll_HEADERS "include/kll_helper.hpp")
|
39
|
-
list(APPEND kll_HEADERS "include/kll_helper_impl.hpp")
|
40
|
-
list(APPEND kll_HEADERS "include/kll_quantile_calculator.hpp")
|
41
|
-
list(APPEND kll_HEADERS "include/kll_quantile_calculator_impl.hpp")
|
42
|
-
|
43
35
|
install(TARGETS kll
|
44
36
|
EXPORT ${PROJECT_NAME}
|
45
37
|
)
|
46
38
|
|
47
|
-
install(FILES
|
39
|
+
install(FILES
|
40
|
+
include/kll_sketch.hpp
|
41
|
+
include/kll_sketch_impl.hpp
|
42
|
+
include/kll_helper.hpp
|
43
|
+
include/kll_helper_impl.hpp
|
44
|
+
include/kll_quantile_calculator.hpp
|
45
|
+
include/kll_quantile_calculator_impl.hpp
|
46
|
+
include/kolmogorov_smirnov.hpp
|
47
|
+
include/kolmogorov_smirnov_impl.hpp
|
48
48
|
DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/DataSketches")
|
49
|
-
|
50
|
-
target_sources(kll
|
51
|
-
INTERFACE
|
52
|
-
${CMAKE_CURRENT_SOURCE_DIR}/include/kll_helper.hpp
|
53
|
-
${CMAKE_CURRENT_SOURCE_DIR}/include/kll_helper_impl.hpp
|
54
|
-
${CMAKE_CURRENT_SOURCE_DIR}/include/kll_sketch.hpp
|
55
|
-
${CMAKE_CURRENT_SOURCE_DIR}/include/kll_sketch_impl.hpp
|
56
|
-
${CMAKE_CURRENT_SOURCE_DIR}/include/kll_quantile_calculator.hpp
|
57
|
-
${CMAKE_CURRENT_SOURCE_DIR}/include/kll_quantile_calculator_impl.hpp
|
58
|
-
)
|
@@ -26,7 +26,8 @@
|
|
26
26
|
|
27
27
|
namespace datasketches {
|
28
28
|
|
29
|
-
static std::independent_bits_engine<std::mt19937, 1, uint32_t>
|
29
|
+
static std::independent_bits_engine<std::mt19937, 1, uint32_t>
|
30
|
+
random_bit(static_cast<uint32_t>(std::chrono::system_clock::now().time_since_epoch().count()));
|
30
31
|
|
31
32
|
#ifdef KLL_VALIDATION
|
32
33
|
extern uint32_t kll_next_offset;
|
@@ -46,9 +47,9 @@ class kll_helper {
|
|
46
47
|
static inline uint8_t floor_of_log2_of_fraction(uint64_t numer, uint64_t denom);
|
47
48
|
static inline uint8_t ub_on_num_levels(uint64_t n);
|
48
49
|
static inline uint32_t compute_total_capacity(uint16_t k, uint8_t m, uint8_t num_levels);
|
49
|
-
static inline
|
50
|
-
static inline
|
51
|
-
static inline
|
50
|
+
static inline uint16_t level_capacity(uint16_t k, uint8_t numLevels, uint8_t height, uint8_t min_wid);
|
51
|
+
static inline uint16_t int_cap_aux(uint16_t k, uint8_t depth);
|
52
|
+
static inline uint16_t int_cap_aux_aux(uint16_t k, uint8_t depth);
|
52
53
|
static inline uint64_t sum_the_sample_weights(uint8_t num_levels, const uint32_t* levels);
|
53
54
|
|
54
55
|
/*
|
@@ -55,28 +55,28 @@ uint32_t kll_helper::compute_total_capacity(uint16_t k, uint8_t m, uint8_t num_l
|
|
55
55
|
return total;
|
56
56
|
}
|
57
57
|
|
58
|
-
|
58
|
+
uint16_t kll_helper::level_capacity(uint16_t k, uint8_t numLevels, uint8_t height, uint8_t min_wid) {
|
59
59
|
if (height >= numLevels) throw std::invalid_argument("height >= numLevels");
|
60
60
|
const uint8_t depth = numLevels - height - 1;
|
61
|
-
return std::max(
|
61
|
+
return std::max<uint16_t>(min_wid, int_cap_aux(k, depth));
|
62
62
|
}
|
63
63
|
|
64
|
-
|
64
|
+
uint16_t kll_helper::int_cap_aux(uint16_t k, uint8_t depth) {
|
65
65
|
if (depth > 60) throw std::invalid_argument("depth > 60");
|
66
66
|
if (depth <= 30) return int_cap_aux_aux(k, depth);
|
67
67
|
const uint8_t half = depth / 2;
|
68
68
|
const uint8_t rest = depth - half;
|
69
|
-
const
|
69
|
+
const uint16_t tmp = int_cap_aux_aux(k, half);
|
70
70
|
return int_cap_aux_aux(tmp, rest);
|
71
71
|
}
|
72
72
|
|
73
|
-
|
73
|
+
uint16_t kll_helper::int_cap_aux_aux(uint16_t k, uint8_t depth) {
|
74
74
|
if (depth > 30) throw std::invalid_argument("depth > 30");
|
75
75
|
const uint64_t twok = k << 1; // for rounding, we pre-multiply by 2
|
76
76
|
const uint64_t tmp = (uint64_t) (((uint64_t) twok << depth) / powers_of_three[depth]);
|
77
77
|
const uint64_t result = (tmp + 1) >> 1; // then here we add 1 and divide by 2
|
78
78
|
if (result > k) throw std::logic_error("result > k");
|
79
|
-
return result;
|
79
|
+
return static_cast<uint16_t>(result);
|
80
80
|
}
|
81
81
|
|
82
82
|
uint64_t kll_helper::sum_the_sample_weights(uint8_t num_levels, const uint32_t* levels) {
|
@@ -24,19 +24,27 @@
|
|
24
24
|
|
25
25
|
namespace datasketches {
|
26
26
|
|
27
|
+
// forward declaration
|
28
|
+
template<typename T, typename C, typename S, typename A> class kll_sketch;
|
29
|
+
|
27
30
|
template <typename T, typename C, typename A>
|
28
31
|
class kll_quantile_calculator {
|
29
32
|
public:
|
30
|
-
|
31
|
-
|
33
|
+
using Entry = std::pair<T, uint64_t>;
|
34
|
+
using AllocEntry = typename std::allocator_traits<A>::template rebind_alloc<Entry>;
|
35
|
+
using Container = std::vector<Entry, AllocEntry>;
|
36
|
+
using const_iterator = typename Container::const_iterator;
|
37
|
+
|
38
|
+
template<typename S>
|
39
|
+
kll_quantile_calculator(const kll_sketch<T, C, S, A>& sketch);
|
40
|
+
|
32
41
|
T get_quantile(double fraction) const;
|
42
|
+
const_iterator begin() const;
|
43
|
+
const_iterator end() const;
|
33
44
|
|
34
45
|
private:
|
35
46
|
using AllocU32 = typename std::allocator_traits<A>::template rebind_alloc<uint32_t>;
|
36
47
|
using vector_u32 = std::vector<uint32_t, AllocU32>;
|
37
|
-
using Entry = std::pair<T, uint64_t>;
|
38
|
-
using AllocEntry = typename std::allocator_traits<A>::template rebind_alloc<Entry>;
|
39
|
-
using Container = std::vector<Entry, AllocEntry>;
|
40
48
|
uint64_t n_;
|
41
49
|
vector_u32 levels_;
|
42
50
|
Container entries_;
|
@@ -45,7 +53,7 @@ class kll_quantile_calculator {
|
|
45
53
|
T approximately_answer_positional_query(uint64_t pos) const;
|
46
54
|
void convert_to_preceding_cummulative();
|
47
55
|
uint32_t chunk_containing_pos(uint64_t pos) const;
|
48
|
-
uint32_t search_for_chunk_containing_pos(uint64_t pos,
|
56
|
+
uint32_t search_for_chunk_containing_pos(uint64_t pos, uint64_t l, uint64_t r) const;
|
49
57
|
static void merge_sorted_blocks(Container& entries, const uint32_t* levels, uint8_t num_levels, uint32_t num_items);
|
50
58
|
static void merge_sorted_blocks_direct(Container& orig, Container& temp, const uint32_t* levels, uint8_t starting_level, uint8_t num_levels);
|
51
59
|
static void merge_sorted_blocks_reversed(Container& orig, Container& temp, const uint32_t* levels, uint8_t starting_level, uint8_t num_levels);
|
@@ -28,24 +28,38 @@
|
|
28
28
|
|
29
29
|
namespace datasketches {
|
30
30
|
|
31
|
-
template
|
32
|
-
|
33
|
-
|
31
|
+
template<typename T, typename C, typename A>
|
32
|
+
template<typename S>
|
33
|
+
kll_quantile_calculator<T, C, A>::kll_quantile_calculator(const kll_sketch<T, C, S, A>& sketch):
|
34
|
+
n_(sketch.n_), levels_(sketch.num_levels_ + 1, 0, sketch.allocator_), entries_(sketch.allocator_)
|
34
35
|
{
|
35
|
-
const uint32_t num_items =
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
36
|
+
const uint32_t num_items = sketch.levels_[sketch.num_levels_] - sketch.levels_[0];
|
37
|
+
if (num_items > 0) {
|
38
|
+
entries_.reserve(num_items);
|
39
|
+
populate_from_sketch(sketch.items_, sketch.levels_.data(), sketch.num_levels_);
|
40
|
+
if (!sketch.is_level_zero_sorted_) std::sort(entries_.begin(), entries_.begin() + levels_[1], compare_pair_by_first<C>());
|
41
|
+
merge_sorted_blocks(entries_, levels_.data(), static_cast<uint8_t>(levels_.size()) - 1, num_items);
|
42
|
+
if (!is_sorted(entries_.begin(), entries_.end(), compare_pair_by_first<C>())) throw std::logic_error("entries must be sorted");
|
43
|
+
convert_to_preceding_cummulative();
|
44
|
+
}
|
41
45
|
}
|
42
46
|
|
43
|
-
template
|
47
|
+
template<typename T, typename C, typename A>
|
44
48
|
T kll_quantile_calculator<T, C, A>::get_quantile(double fraction) const {
|
45
49
|
return approximately_answer_positional_query(pos_of_phi(fraction, n_));
|
46
50
|
}
|
47
51
|
|
48
|
-
template
|
52
|
+
template<typename T, typename C, typename A>
|
53
|
+
auto kll_quantile_calculator<T, C, A>::begin() const -> const_iterator {
|
54
|
+
return entries_.begin();
|
55
|
+
}
|
56
|
+
|
57
|
+
template<typename T, typename C, typename A>
|
58
|
+
auto kll_quantile_calculator<T, C, A>::end() const -> const_iterator {
|
59
|
+
return entries_.end();
|
60
|
+
}
|
61
|
+
|
62
|
+
template<typename T, typename C, typename A>
|
49
63
|
void kll_quantile_calculator<T, C, A>::populate_from_sketch(const T* items, const uint32_t* levels, uint8_t num_levels) {
|
50
64
|
size_t src_level = 0;
|
51
65
|
size_t dst_level = 0;
|
@@ -68,7 +82,7 @@ void kll_quantile_calculator<T, C, A>::populate_from_sketch(const T* items, cons
|
|
68
82
|
if (levels_.size() > static_cast<size_t>(dst_level + 1)) levels_.resize(dst_level + 1);
|
69
83
|
}
|
70
84
|
|
71
|
-
template
|
85
|
+
template<typename T, typename C, typename A>
|
72
86
|
T kll_quantile_calculator<T, C, A>::approximately_answer_positional_query(uint64_t pos) const {
|
73
87
|
if (pos >= n_) throw std::logic_error("position out of range");
|
74
88
|
const uint32_t num_items = levels_[levels_.size() - 1];
|
@@ -77,7 +91,7 @@ T kll_quantile_calculator<T, C, A>::approximately_answer_positional_query(uint64
|
|
77
91
|
return entries_[index].first;
|
78
92
|
}
|
79
93
|
|
80
|
-
template
|
94
|
+
template<typename T, typename C, typename A>
|
81
95
|
void kll_quantile_calculator<T, C, A>::convert_to_preceding_cummulative() {
|
82
96
|
uint64_t subtotal = 0;
|
83
97
|
for (auto& entry: entries_) {
|
@@ -87,13 +101,13 @@ void kll_quantile_calculator<T, C, A>::convert_to_preceding_cummulative() {
|
|
87
101
|
}
|
88
102
|
}
|
89
103
|
|
90
|
-
template
|
104
|
+
template<typename T, typename C, typename A>
|
91
105
|
uint64_t kll_quantile_calculator<T, C, A>::pos_of_phi(double phi, uint64_t n) {
|
92
|
-
const uint64_t pos = std::floor(phi * n);
|
106
|
+
const uint64_t pos = static_cast<uint64_t>(std::floor(phi * n));
|
93
107
|
return (pos == n) ? n - 1 : pos;
|
94
108
|
}
|
95
109
|
|
96
|
-
template
|
110
|
+
template<typename T, typename C, typename A>
|
97
111
|
uint32_t kll_quantile_calculator<T, C, A>::chunk_containing_pos(uint64_t pos) const {
|
98
112
|
if (entries_.size() < 1) throw std::logic_error("array too short");
|
99
113
|
if (pos < entries_[0].second) throw std::logic_error("position too small");
|
@@ -101,19 +115,19 @@ uint32_t kll_quantile_calculator<T, C, A>::chunk_containing_pos(uint64_t pos) co
|
|
101
115
|
return search_for_chunk_containing_pos(pos, 0, entries_.size());
|
102
116
|
}
|
103
117
|
|
104
|
-
template
|
105
|
-
uint32_t kll_quantile_calculator<T, C, A>::search_for_chunk_containing_pos(uint64_t pos,
|
118
|
+
template<typename T, typename C, typename A>
|
119
|
+
uint32_t kll_quantile_calculator<T, C, A>::search_for_chunk_containing_pos(uint64_t pos, uint64_t l, uint64_t r) const {
|
106
120
|
if (l + 1 == r) {
|
107
|
-
return l;
|
121
|
+
return static_cast<uint32_t>(l);
|
108
122
|
}
|
109
|
-
const
|
123
|
+
const uint64_t m = l + (r - l) / 2;
|
110
124
|
if (entries_[m].second <= pos) {
|
111
125
|
return search_for_chunk_containing_pos(pos, m, r);
|
112
126
|
}
|
113
127
|
return search_for_chunk_containing_pos(pos, l, m);
|
114
128
|
}
|
115
129
|
|
116
|
-
template
|
130
|
+
template<typename T, typename C, typename A>
|
117
131
|
void kll_quantile_calculator<T, C, A>::merge_sorted_blocks(Container& entries, const uint32_t* levels, uint8_t num_levels, uint32_t num_items) {
|
118
132
|
if (num_levels == 1) return;
|
119
133
|
Container temporary(entries.get_allocator());
|
@@ -121,7 +135,7 @@ void kll_quantile_calculator<T, C, A>::merge_sorted_blocks(Container& entries, c
|
|
121
135
|
merge_sorted_blocks_direct(entries, temporary, levels, 0, num_levels);
|
122
136
|
}
|
123
137
|
|
124
|
-
template
|
138
|
+
template<typename T, typename C, typename A>
|
125
139
|
void kll_quantile_calculator<T, C, A>::merge_sorted_blocks_direct(Container& orig, Container& temp, const uint32_t* levels,
|
126
140
|
uint8_t starting_level, uint8_t num_levels) {
|
127
141
|
if (num_levels == 1) return;
|
@@ -129,10 +143,11 @@ void kll_quantile_calculator<T, C, A>::merge_sorted_blocks_direct(Container& ori
|
|
129
143
|
const uint8_t num_levels_2 = num_levels - num_levels_1;
|
130
144
|
const uint8_t starting_level_1 = starting_level;
|
131
145
|
const uint8_t starting_level_2 = starting_level + num_levels_1;
|
132
|
-
const auto
|
146
|
+
const auto initial_size = temp.size();
|
133
147
|
merge_sorted_blocks_reversed(orig, temp, levels, starting_level_1, num_levels_1);
|
134
148
|
merge_sorted_blocks_reversed(orig, temp, levels, starting_level_2, num_levels_2);
|
135
149
|
const uint32_t num_items_1 = levels[starting_level_1 + num_levels_1] - levels[starting_level_1];
|
150
|
+
const auto chunk_begin = temp.begin() + initial_size;
|
136
151
|
std::merge(
|
137
152
|
std::make_move_iterator(chunk_begin), std::make_move_iterator(chunk_begin + num_items_1),
|
138
153
|
std::make_move_iterator(chunk_begin + num_items_1), std::make_move_iterator(temp.end()),
|
@@ -141,7 +156,7 @@ void kll_quantile_calculator<T, C, A>::merge_sorted_blocks_direct(Container& ori
|
|
141
156
|
temp.erase(chunk_begin, temp.end());
|
142
157
|
}
|
143
158
|
|
144
|
-
template
|
159
|
+
template<typename T, typename C, typename A>
|
145
160
|
void kll_quantile_calculator<T, C, A>::merge_sorted_blocks_reversed(Container& orig, Container& temp, const uint32_t* levels,
|
146
161
|
uint8_t starting_level, uint8_t num_levels) {
|
147
162
|
if (num_levels == 1) {
|
@@ -153,15 +153,23 @@ template<typename A> using vector_u32 = std::vector<uint32_t, AllocU32<A>>;
|
|
153
153
|
template<typename A> using AllocD = typename std::allocator_traits<A>::template rebind_alloc<double>;
|
154
154
|
template<typename A> using vector_d = std::vector<double, AllocD<A>>;
|
155
155
|
|
156
|
+
namespace kll_constants {
|
157
|
+
const uint16_t DEFAULT_K = 200;
|
158
|
+
}
|
159
|
+
|
156
160
|
template <typename T, typename C = std::less<T>, typename S = serde<T>, typename A = std::allocator<T>>
|
157
161
|
class kll_sketch {
|
158
162
|
public:
|
163
|
+
using value_type = T;
|
164
|
+
using comparator = C;
|
165
|
+
|
159
166
|
static const uint8_t DEFAULT_M = 8;
|
160
|
-
|
167
|
+
// TODO: Redundant and deprecated. Will be remove din next major version.
|
168
|
+
static const uint16_t DEFAULT_K = kll_constants::DEFAULT_K;
|
161
169
|
static const uint16_t MIN_K = DEFAULT_M;
|
162
170
|
static const uint16_t MAX_K = (1 << 16) - 1;
|
163
171
|
|
164
|
-
explicit kll_sketch(uint16_t k = DEFAULT_K, const A& allocator = A());
|
172
|
+
explicit kll_sketch(uint16_t k = kll_constants::DEFAULT_K, const A& allocator = A());
|
165
173
|
kll_sketch(const kll_sketch& other);
|
166
174
|
kll_sketch(kll_sketch&& other) noexcept;
|
167
175
|
~kll_sketch();
|
@@ -296,7 +304,7 @@ class kll_sketch {
|
|
296
304
|
*
|
297
305
|
* @return array of approximations to the given number of evenly-spaced fractional ranks.
|
298
306
|
*/
|
299
|
-
std::vector<T, A> get_quantiles(
|
307
|
+
std::vector<T, A> get_quantiles(uint32_t num) const;
|
300
308
|
|
301
309
|
/**
|
302
310
|
* Returns an approximation to the normalized (fractional) rank of the given value from 0 to 1,
|
@@ -383,6 +391,33 @@ class kll_sketch {
|
|
383
391
|
template<typename TT = T, typename std::enable_if<!std::is_arithmetic<TT>::value, int>::type = 0>
|
384
392
|
size_t get_serialized_size_bytes() const;
|
385
393
|
|
394
|
+
/**
|
395
|
+
* Returns upper bound on the serialized size of a sketch given a parameter <em>k</em> and stream
|
396
|
+
* length. The resulting size is an overestimate to make sure actual sketches don't exceed it.
|
397
|
+
* This method can be used if allocation of storage is necessary beforehand, but it is not
|
398
|
+
* optimal.
|
399
|
+
* This method is for arithmetic types (integral and floating point)
|
400
|
+
* @param k parameter that controls size of the sketch and accuracy of estimates
|
401
|
+
* @param n stream length
|
402
|
+
* @return upper bound on the serialized size
|
403
|
+
*/
|
404
|
+
template<typename TT = T, typename std::enable_if<std::is_arithmetic<TT>::value, int>::type = 0>
|
405
|
+
static size_t get_max_serialized_size_bytes(uint16_t k, uint64_t n);
|
406
|
+
|
407
|
+
/**
|
408
|
+
* Returns upper bound on the serialized size of a sketch given a parameter <em>k</em> and stream
|
409
|
+
* length. The resulting size is an overestimate to make sure actual sketches don't exceed it.
|
410
|
+
* This method can be used if allocation of storage is necessary beforehand, but it is not
|
411
|
+
* optimal.
|
412
|
+
* This method is for all other non-arithmetic types, and it takes a max size of an item as input.
|
413
|
+
* @param k parameter that controls size of the sketch and accuracy of estimates
|
414
|
+
* @param n stream length
|
415
|
+
* @param max_item_size_bytes maximum size of an item in bytes
|
416
|
+
* @return upper bound on the serialized size
|
417
|
+
*/
|
418
|
+
template<typename TT = T, typename std::enable_if<!std::is_arithmetic<TT>::value, int>::type = 0>
|
419
|
+
static size_t get_max_serialized_size_bytes(uint16_t k, uint64_t n, size_t max_item_size_bytes);
|
420
|
+
|
386
421
|
/**
|
387
422
|
* This method serializes the sketch into a given stream in a binary form
|
388
423
|
* @param os output stream
|
@@ -391,7 +426,7 @@ class kll_sketch {
|
|
391
426
|
|
392
427
|
// This is a convenience alias for users
|
393
428
|
// The type returned by the following serialize method
|
394
|
-
|
429
|
+
using vector_bytes = vector_u8<A>;
|
395
430
|
|
396
431
|
/**
|
397
432
|
* This method serializes the sketch as a vector of bytes.
|
@@ -480,6 +515,8 @@ class kll_sketch {
|
|
480
515
|
T* max_value_;
|
481
516
|
bool is_level_zero_sorted_;
|
482
517
|
|
518
|
+
friend class kll_quantile_calculator<T, C, A>;
|
519
|
+
|
483
520
|
// for deserialization
|
484
521
|
class item_deleter;
|
485
522
|
class items_deleter;
|