datasketches 0.2.4 → 0.2.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +4 -0
- data/README.md +1 -1
- data/ext/datasketches/kll_wrapper.cpp +5 -1
- data/lib/datasketches/version.rb +1 -1
- data/vendor/datasketches-cpp/CMakeLists.txt +4 -3
- data/vendor/datasketches-cpp/common/CMakeLists.txt +4 -0
- data/vendor/datasketches-cpp/common/include/binomial_bounds.hpp +1 -0
- data/vendor/datasketches-cpp/common/include/common_defs.hpp +14 -0
- data/vendor/datasketches-cpp/{kll → common}/include/kolmogorov_smirnov.hpp +5 -3
- data/vendor/datasketches-cpp/{kll → common}/include/kolmogorov_smirnov_impl.hpp +13 -16
- data/vendor/datasketches-cpp/common/include/quantile_sketch_sorted_view.hpp +121 -0
- data/vendor/datasketches-cpp/common/include/quantile_sketch_sorted_view_impl.hpp +91 -0
- data/vendor/datasketches-cpp/common/test/test_type.hpp +2 -0
- data/vendor/datasketches-cpp/cpc/include/cpc_compressor_impl.hpp +1 -0
- data/vendor/datasketches-cpp/cpc/include/cpc_confidence.hpp +1 -0
- data/vendor/datasketches-cpp/cpc/include/cpc_union_impl.hpp +2 -0
- data/vendor/datasketches-cpp/cpc/test/cpc_sketch_test.cpp +1 -0
- data/vendor/datasketches-cpp/cpc/test/cpc_union_test.cpp +2 -0
- data/vendor/datasketches-cpp/fi/include/frequent_items_sketch.hpp +37 -5
- data/vendor/datasketches-cpp/fi/include/frequent_items_sketch_impl.hpp +25 -9
- data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_custom_type_test.cpp +2 -1
- data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_test.cpp +1 -0
- data/vendor/datasketches-cpp/hll/include/AuxHashMap-internal.hpp +2 -0
- data/vendor/datasketches-cpp/hll/include/CompositeInterpolationXTable-internal.hpp +1 -0
- data/vendor/datasketches-cpp/hll/include/CouponHashSet-internal.hpp +2 -2
- data/vendor/datasketches-cpp/hll/include/CouponList-internal.hpp +1 -0
- data/vendor/datasketches-cpp/hll/include/HllSketchImpl-internal.hpp +2 -0
- data/vendor/datasketches-cpp/hll/include/HllSketchImplFactory.hpp +2 -0
- data/vendor/datasketches-cpp/hll/test/AuxHashMapTest.cpp +1 -0
- data/vendor/datasketches-cpp/hll/test/CouponHashSetTest.cpp +1 -0
- data/vendor/datasketches-cpp/hll/test/CouponListTest.cpp +2 -0
- data/vendor/datasketches-cpp/hll/test/HllArrayTest.cpp +1 -0
- data/vendor/datasketches-cpp/hll/test/HllSketchTest.cpp +59 -0
- data/vendor/datasketches-cpp/hll/test/HllUnionTest.cpp +2 -0
- data/vendor/datasketches-cpp/hll/test/TablesTest.cpp +1 -0
- data/vendor/datasketches-cpp/kll/CMakeLists.txt +0 -4
- data/vendor/datasketches-cpp/kll/include/kll_helper.hpp +0 -4
- data/vendor/datasketches-cpp/kll/include/kll_helper_impl.hpp +3 -0
- data/vendor/datasketches-cpp/kll/include/kll_sketch.hpp +96 -42
- data/vendor/datasketches-cpp/kll/include/kll_sketch_impl.hpp +105 -127
- data/vendor/datasketches-cpp/kll/test/kll_sketch_test.cpp +94 -25
- data/vendor/datasketches-cpp/kll/test/kolmogorov_smirnov_test.cpp +1 -1
- data/vendor/datasketches-cpp/pyproject.toml +1 -1
- data/vendor/datasketches-cpp/python/CMakeLists.txt +8 -1
- data/vendor/datasketches-cpp/python/README.md +7 -0
- data/vendor/datasketches-cpp/python/src/datasketches.cpp +4 -0
- data/vendor/datasketches-cpp/python/src/fi_wrapper.cpp +6 -1
- data/vendor/datasketches-cpp/python/src/kll_wrapper.cpp +48 -13
- data/vendor/datasketches-cpp/python/src/ks_wrapper.cpp +68 -0
- data/vendor/datasketches-cpp/python/src/quantiles_wrapper.cpp +240 -0
- data/vendor/datasketches-cpp/python/src/req_wrapper.cpp +9 -2
- data/vendor/datasketches-cpp/python/src/vector_of_kll.cpp +1 -0
- data/vendor/datasketches-cpp/python/tests/kll_test.py +10 -4
- data/vendor/datasketches-cpp/python/tests/quantiles_test.py +126 -0
- data/vendor/datasketches-cpp/quantiles/CMakeLists.txt +42 -0
- data/vendor/datasketches-cpp/quantiles/include/quantiles_sketch.hpp +641 -0
- data/vendor/datasketches-cpp/quantiles/include/quantiles_sketch_impl.hpp +1309 -0
- data/vendor/datasketches-cpp/quantiles/test/CMakeLists.txt +44 -0
- data/vendor/datasketches-cpp/quantiles/test/Qk128_n1000_v0.3.0.sk +0 -0
- data/vendor/datasketches-cpp/quantiles/test/Qk128_n1000_v0.6.0.sk +0 -0
- data/vendor/datasketches-cpp/quantiles/test/Qk128_n1000_v0.8.0.sk +0 -0
- data/vendor/datasketches-cpp/quantiles/test/Qk128_n1000_v0.8.3.sk +0 -0
- data/vendor/datasketches-cpp/quantiles/test/Qk128_n50_v0.3.0.sk +0 -0
- data/vendor/datasketches-cpp/quantiles/test/Qk128_n50_v0.6.0.sk +0 -0
- data/vendor/datasketches-cpp/quantiles/test/Qk128_n50_v0.8.0.sk +0 -0
- data/vendor/datasketches-cpp/quantiles/test/Qk128_n50_v0.8.3.sk +0 -0
- data/vendor/datasketches-cpp/quantiles/test/kolmogorov_smirnov_test.cpp +110 -0
- data/vendor/datasketches-cpp/quantiles/test/quantiles_compatibility_test.cpp +129 -0
- data/vendor/datasketches-cpp/quantiles/test/quantiles_sketch_test.cpp +912 -0
- data/vendor/datasketches-cpp/req/CMakeLists.txt +0 -2
- data/vendor/datasketches-cpp/req/include/req_common.hpp +0 -5
- data/vendor/datasketches-cpp/req/include/req_compactor_impl.hpp +3 -2
- data/vendor/datasketches-cpp/req/include/req_sketch.hpp +62 -23
- data/vendor/datasketches-cpp/req/include/req_sketch_impl.hpp +62 -59
- data/vendor/datasketches-cpp/req/test/req_sketch_test.cpp +5 -0
- data/vendor/datasketches-cpp/sampling/include/var_opt_sketch.hpp +44 -7
- data/vendor/datasketches-cpp/sampling/include/var_opt_sketch_impl.hpp +31 -26
- data/vendor/datasketches-cpp/sampling/include/var_opt_union.hpp +41 -6
- data/vendor/datasketches-cpp/sampling/include/var_opt_union_impl.hpp +25 -9
- data/vendor/datasketches-cpp/sampling/test/var_opt_allocation_test.cpp +2 -2
- data/vendor/datasketches-cpp/sampling/test/var_opt_sketch_test.cpp +1 -0
- data/vendor/datasketches-cpp/sampling/test/var_opt_union_test.cpp +1 -0
- data/vendor/datasketches-cpp/setup.py +1 -1
- data/vendor/datasketches-cpp/theta/include/bounds_on_ratios_in_sampled_sets.hpp +1 -0
- data/vendor/datasketches-cpp/theta/include/compact_theta_sketch_parser_impl.hpp +8 -6
- data/vendor/datasketches-cpp/theta/include/theta_intersection_base_impl.hpp +1 -0
- data/vendor/datasketches-cpp/theta/include/theta_set_difference_base_impl.hpp +1 -0
- data/vendor/datasketches-cpp/theta/include/theta_sketch_impl.hpp +7 -45
- data/vendor/datasketches-cpp/theta/include/theta_union_base_impl.hpp +1 -0
- data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base_impl.hpp +1 -0
- data/vendor/datasketches-cpp/theta/test/theta_a_not_b_test.cpp +2 -0
- data/vendor/datasketches-cpp/theta/test/theta_intersection_test.cpp +2 -0
- data/vendor/datasketches-cpp/theta/test/theta_setop_test.cpp +1 -0
- data/vendor/datasketches-cpp/theta/test/theta_sketch_test.cpp +29 -1
- data/vendor/datasketches-cpp/theta/test/theta_union_test.cpp +2 -0
- data/vendor/datasketches-cpp/tuple/include/tuple_sketch.hpp +16 -0
- data/vendor/datasketches-cpp/tuple/include/tuple_sketch_impl.hpp +1 -0
- data/vendor/datasketches-cpp/tuple/test/tuple_a_not_b_test.cpp +1 -0
- data/vendor/datasketches-cpp/tuple/test/tuple_intersection_test.cpp +1 -0
- data/vendor/datasketches-cpp/tuple/test/tuple_union_test.cpp +1 -0
- metadata +25 -9
- data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator.hpp +0 -75
- data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator_impl.hpp +0 -184
- data/vendor/datasketches-cpp/req/include/req_quantile_calculator.hpp +0 -69
- data/vendor/datasketches-cpp/req/include/req_quantile_calculator_impl.hpp +0 -60
|
@@ -21,6 +21,7 @@
|
|
|
21
21
|
#include <fstream>
|
|
22
22
|
#include <sstream>
|
|
23
23
|
#include <vector>
|
|
24
|
+
#include <stdexcept>
|
|
24
25
|
|
|
25
26
|
#include <catch.hpp>
|
|
26
27
|
#include <theta_sketch.hpp>
|
|
@@ -394,7 +395,13 @@ TEST_CASE("theta sketch: serialize deserialize stream and bytes equivalence", "[
|
|
|
394
395
|
}
|
|
395
396
|
}
|
|
396
397
|
|
|
397
|
-
TEST_CASE("theta sketch: deserialize
|
|
398
|
+
TEST_CASE("theta sketch: deserialize empty buffer overrun", "[theta_sketch]") {
|
|
399
|
+
update_theta_sketch update_sketch = update_theta_sketch::builder().build();
|
|
400
|
+
auto bytes = update_sketch.compact().serialize();
|
|
401
|
+
REQUIRE_THROWS_AS(compact_theta_sketch::deserialize(bytes.data(), bytes.size() - 1), std::out_of_range);
|
|
402
|
+
}
|
|
403
|
+
|
|
404
|
+
TEST_CASE("theta sketch: deserialize single item buffer overrun", "[theta_sketch]") {
|
|
398
405
|
update_theta_sketch update_sketch = update_theta_sketch::builder().build();
|
|
399
406
|
update_sketch.update(1);
|
|
400
407
|
auto bytes = update_sketch.compact().serialize();
|
|
@@ -402,6 +409,27 @@ TEST_CASE("theta sketch: deserialize compact single item buffer overrun", "[thet
|
|
|
402
409
|
REQUIRE_THROWS_AS(compact_theta_sketch::deserialize(bytes.data(), bytes.size() - 1), std::out_of_range);
|
|
403
410
|
}
|
|
404
411
|
|
|
412
|
+
TEST_CASE("theta sketch: deserialize exact mode buffer overrun", "[theta_sketch]") {
|
|
413
|
+
update_theta_sketch update_sketch = update_theta_sketch::builder().build();
|
|
414
|
+
for (int i = 0; i < 1000; ++i) update_sketch.update(i);
|
|
415
|
+
auto bytes = update_sketch.compact().serialize();
|
|
416
|
+
REQUIRE_THROWS_AS(compact_theta_sketch::deserialize(bytes.data(), 7), std::out_of_range);
|
|
417
|
+
REQUIRE_THROWS_AS(compact_theta_sketch::deserialize(bytes.data(), 8), std::out_of_range);
|
|
418
|
+
REQUIRE_THROWS_AS(compact_theta_sketch::deserialize(bytes.data(), 16), std::out_of_range);
|
|
419
|
+
REQUIRE_THROWS_AS(compact_theta_sketch::deserialize(bytes.data(), bytes.size() - 1), std::out_of_range);
|
|
420
|
+
}
|
|
421
|
+
|
|
422
|
+
TEST_CASE("theta sketch: deserialize estimation mode buffer overrun", "[theta_sketch]") {
|
|
423
|
+
update_theta_sketch update_sketch = update_theta_sketch::builder().build();
|
|
424
|
+
for (int i = 0; i < 10000; ++i) update_sketch.update(i);
|
|
425
|
+
auto bytes = update_sketch.compact().serialize();
|
|
426
|
+
REQUIRE_THROWS_AS(compact_theta_sketch::deserialize(bytes.data(), 7), std::out_of_range);
|
|
427
|
+
REQUIRE_THROWS_AS(compact_theta_sketch::deserialize(bytes.data(), 8), std::out_of_range);
|
|
428
|
+
REQUIRE_THROWS_AS(compact_theta_sketch::deserialize(bytes.data(), 16), std::out_of_range);
|
|
429
|
+
REQUIRE_THROWS_AS(compact_theta_sketch::deserialize(bytes.data(), 24), std::out_of_range);
|
|
430
|
+
REQUIRE_THROWS_AS(compact_theta_sketch::deserialize(bytes.data(), bytes.size() - 1), std::out_of_range);
|
|
431
|
+
}
|
|
432
|
+
|
|
405
433
|
TEST_CASE("theta sketch: conversion constructor and wrapped compact", "[theta_sketch]") {
|
|
406
434
|
update_theta_sketch update_sketch = update_theta_sketch::builder().build();
|
|
407
435
|
const int n = 8192;
|
|
@@ -398,9 +398,23 @@ public:
|
|
|
398
398
|
virtual uint32_t get_num_retained() const;
|
|
399
399
|
virtual uint16_t get_seed_hash() const;
|
|
400
400
|
|
|
401
|
+
/**
|
|
402
|
+
* This method serializes the sketch into a given stream in a binary form
|
|
403
|
+
* @param os output stream
|
|
404
|
+
* @param instance of a SerDe
|
|
405
|
+
*/
|
|
401
406
|
template<typename SerDe = serde<Summary>>
|
|
402
407
|
void serialize(std::ostream& os, const SerDe& sd = SerDe()) const;
|
|
403
408
|
|
|
409
|
+
/**
|
|
410
|
+
* This method serializes the sketch as a vector of bytes.
|
|
411
|
+
* An optional header can be reserved in front of the sketch.
|
|
412
|
+
* It is a blank space of a given size.
|
|
413
|
+
* This header is used in Datasketches PostgreSQL extension.
|
|
414
|
+
* @param header_size_bytes space to reserve in front of the sketch
|
|
415
|
+
* @param instance of a SerDe
|
|
416
|
+
* @return serialized sketch as a vector of bytes
|
|
417
|
+
*/
|
|
404
418
|
template<typename SerDe = serde<Summary>>
|
|
405
419
|
vector_bytes serialize(unsigned header_size_bytes = 0, const SerDe& sd = SerDe()) const;
|
|
406
420
|
|
|
@@ -414,6 +428,7 @@ public:
|
|
|
414
428
|
* @param is input stream
|
|
415
429
|
* @param seed the seed for the hash function that was used to create the sketch
|
|
416
430
|
* @param instance of a SerDe
|
|
431
|
+
* @param instance of an Allocator
|
|
417
432
|
* @return an instance of a sketch
|
|
418
433
|
*/
|
|
419
434
|
template<typename SerDe = serde<Summary>>
|
|
@@ -426,6 +441,7 @@ public:
|
|
|
426
441
|
* @param size the size of the array
|
|
427
442
|
* @param seed the seed for the hash function that was used to create the sketch
|
|
428
443
|
* @param instance of a SerDe
|
|
444
|
+
* @param instance of an Allocator
|
|
429
445
|
* @return an instance of the sketch
|
|
430
446
|
*/
|
|
431
447
|
template<typename SerDe = serde<Summary>>
|
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: datasketches
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.2.
|
|
4
|
+
version: 0.2.5
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Andrew Kane
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: bin
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date:
|
|
11
|
+
date: 2022-05-21 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
name: rice
|
|
@@ -62,7 +62,11 @@ files:
|
|
|
62
62
|
- vendor/datasketches-cpp/common/include/conditional_forward.hpp
|
|
63
63
|
- vendor/datasketches-cpp/common/include/count_zeros.hpp
|
|
64
64
|
- vendor/datasketches-cpp/common/include/inv_pow2_table.hpp
|
|
65
|
+
- vendor/datasketches-cpp/common/include/kolmogorov_smirnov.hpp
|
|
66
|
+
- vendor/datasketches-cpp/common/include/kolmogorov_smirnov_impl.hpp
|
|
65
67
|
- vendor/datasketches-cpp/common/include/memory_operations.hpp
|
|
68
|
+
- vendor/datasketches-cpp/common/include/quantile_sketch_sorted_view.hpp
|
|
69
|
+
- vendor/datasketches-cpp/common/include/quantile_sketch_sorted_view_impl.hpp
|
|
66
70
|
- vendor/datasketches-cpp/common/include/serde.hpp
|
|
67
71
|
- vendor/datasketches-cpp/common/test/CMakeLists.txt
|
|
68
72
|
- vendor/datasketches-cpp/common/test/catch.hpp
|
|
@@ -157,12 +161,8 @@ files:
|
|
|
157
161
|
- vendor/datasketches-cpp/kll/CMakeLists.txt
|
|
158
162
|
- vendor/datasketches-cpp/kll/include/kll_helper.hpp
|
|
159
163
|
- vendor/datasketches-cpp/kll/include/kll_helper_impl.hpp
|
|
160
|
-
- vendor/datasketches-cpp/kll/include/kll_quantile_calculator.hpp
|
|
161
|
-
- vendor/datasketches-cpp/kll/include/kll_quantile_calculator_impl.hpp
|
|
162
164
|
- vendor/datasketches-cpp/kll/include/kll_sketch.hpp
|
|
163
165
|
- vendor/datasketches-cpp/kll/include/kll_sketch_impl.hpp
|
|
164
|
-
- vendor/datasketches-cpp/kll/include/kolmogorov_smirnov.hpp
|
|
165
|
-
- vendor/datasketches-cpp/kll/include/kolmogorov_smirnov_impl.hpp
|
|
166
166
|
- vendor/datasketches-cpp/kll/test/CMakeLists.txt
|
|
167
167
|
- vendor/datasketches-cpp/kll/test/kll_sketch_custom_type_test.cpp
|
|
168
168
|
- vendor/datasketches-cpp/kll/test/kll_sketch_float_one_item_v1.sk
|
|
@@ -185,6 +185,8 @@ files:
|
|
|
185
185
|
- vendor/datasketches-cpp/python/src/fi_wrapper.cpp
|
|
186
186
|
- vendor/datasketches-cpp/python/src/hll_wrapper.cpp
|
|
187
187
|
- vendor/datasketches-cpp/python/src/kll_wrapper.cpp
|
|
188
|
+
- vendor/datasketches-cpp/python/src/ks_wrapper.cpp
|
|
189
|
+
- vendor/datasketches-cpp/python/src/quantiles_wrapper.cpp
|
|
188
190
|
- vendor/datasketches-cpp/python/src/req_wrapper.cpp
|
|
189
191
|
- vendor/datasketches-cpp/python/src/theta_wrapper.cpp
|
|
190
192
|
- vendor/datasketches-cpp/python/src/vector_of_kll.cpp
|
|
@@ -194,16 +196,30 @@ files:
|
|
|
194
196
|
- vendor/datasketches-cpp/python/tests/fi_test.py
|
|
195
197
|
- vendor/datasketches-cpp/python/tests/hll_test.py
|
|
196
198
|
- vendor/datasketches-cpp/python/tests/kll_test.py
|
|
199
|
+
- vendor/datasketches-cpp/python/tests/quantiles_test.py
|
|
197
200
|
- vendor/datasketches-cpp/python/tests/req_test.py
|
|
198
201
|
- vendor/datasketches-cpp/python/tests/theta_test.py
|
|
199
202
|
- vendor/datasketches-cpp/python/tests/vector_of_kll_test.py
|
|
200
203
|
- vendor/datasketches-cpp/python/tests/vo_test.py
|
|
204
|
+
- vendor/datasketches-cpp/quantiles/CMakeLists.txt
|
|
205
|
+
- vendor/datasketches-cpp/quantiles/include/quantiles_sketch.hpp
|
|
206
|
+
- vendor/datasketches-cpp/quantiles/include/quantiles_sketch_impl.hpp
|
|
207
|
+
- vendor/datasketches-cpp/quantiles/test/CMakeLists.txt
|
|
208
|
+
- vendor/datasketches-cpp/quantiles/test/Qk128_n1000_v0.3.0.sk
|
|
209
|
+
- vendor/datasketches-cpp/quantiles/test/Qk128_n1000_v0.6.0.sk
|
|
210
|
+
- vendor/datasketches-cpp/quantiles/test/Qk128_n1000_v0.8.0.sk
|
|
211
|
+
- vendor/datasketches-cpp/quantiles/test/Qk128_n1000_v0.8.3.sk
|
|
212
|
+
- vendor/datasketches-cpp/quantiles/test/Qk128_n50_v0.3.0.sk
|
|
213
|
+
- vendor/datasketches-cpp/quantiles/test/Qk128_n50_v0.6.0.sk
|
|
214
|
+
- vendor/datasketches-cpp/quantiles/test/Qk128_n50_v0.8.0.sk
|
|
215
|
+
- vendor/datasketches-cpp/quantiles/test/Qk128_n50_v0.8.3.sk
|
|
216
|
+
- vendor/datasketches-cpp/quantiles/test/kolmogorov_smirnov_test.cpp
|
|
217
|
+
- vendor/datasketches-cpp/quantiles/test/quantiles_compatibility_test.cpp
|
|
218
|
+
- vendor/datasketches-cpp/quantiles/test/quantiles_sketch_test.cpp
|
|
201
219
|
- vendor/datasketches-cpp/req/CMakeLists.txt
|
|
202
220
|
- vendor/datasketches-cpp/req/include/req_common.hpp
|
|
203
221
|
- vendor/datasketches-cpp/req/include/req_compactor.hpp
|
|
204
222
|
- vendor/datasketches-cpp/req/include/req_compactor_impl.hpp
|
|
205
|
-
- vendor/datasketches-cpp/req/include/req_quantile_calculator.hpp
|
|
206
|
-
- vendor/datasketches-cpp/req/include/req_quantile_calculator_impl.hpp
|
|
207
223
|
- vendor/datasketches-cpp/req/include/req_sketch.hpp
|
|
208
224
|
- vendor/datasketches-cpp/req/include/req_sketch_impl.hpp
|
|
209
225
|
- vendor/datasketches-cpp/req/test/CMakeLists.txt
|
|
@@ -319,7 +335,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
|
319
335
|
- !ruby/object:Gem::Version
|
|
320
336
|
version: '0'
|
|
321
337
|
requirements: []
|
|
322
|
-
rubygems_version: 3.3.
|
|
338
|
+
rubygems_version: 3.3.7
|
|
323
339
|
signing_key:
|
|
324
340
|
specification_version: 4
|
|
325
341
|
summary: Sketch data structures for Ruby
|
|
@@ -1,75 +0,0 @@
|
|
|
1
|
-
/*
|
|
2
|
-
* Licensed to the Apache Software Foundation (ASF) under one
|
|
3
|
-
* or more contributor license agreements. See the NOTICE file
|
|
4
|
-
* distributed with this work for additional information
|
|
5
|
-
* regarding copyright ownership. The ASF licenses this file
|
|
6
|
-
* to you under the Apache License, Version 2.0 (the
|
|
7
|
-
* "License"); you may not use this file except in compliance
|
|
8
|
-
* with the License. You may obtain a copy of the License at
|
|
9
|
-
*
|
|
10
|
-
* http://www.apache.org/licenses/LICENSE-2.0
|
|
11
|
-
*
|
|
12
|
-
* Unless required by applicable law or agreed to in writing,
|
|
13
|
-
* software distributed under the License is distributed on an
|
|
14
|
-
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
15
|
-
* KIND, either express or implied. See the License for the
|
|
16
|
-
* specific language governing permissions and limitations
|
|
17
|
-
* under the License.
|
|
18
|
-
*/
|
|
19
|
-
|
|
20
|
-
#ifndef KLL_QUANTILE_CALCULATOR_HPP_
|
|
21
|
-
#define KLL_QUANTILE_CALCULATOR_HPP_
|
|
22
|
-
|
|
23
|
-
#include <memory>
|
|
24
|
-
|
|
25
|
-
namespace datasketches {
|
|
26
|
-
|
|
27
|
-
// forward declaration
|
|
28
|
-
template<typename T, typename C, typename S, typename A> class kll_sketch;
|
|
29
|
-
|
|
30
|
-
template <typename T, typename C, typename A>
|
|
31
|
-
class kll_quantile_calculator {
|
|
32
|
-
public:
|
|
33
|
-
using Entry = std::pair<T, uint64_t>;
|
|
34
|
-
using AllocEntry = typename std::allocator_traits<A>::template rebind_alloc<Entry>;
|
|
35
|
-
using Container = std::vector<Entry, AllocEntry>;
|
|
36
|
-
using const_iterator = typename Container::const_iterator;
|
|
37
|
-
|
|
38
|
-
template<typename S>
|
|
39
|
-
kll_quantile_calculator(const kll_sketch<T, C, S, A>& sketch);
|
|
40
|
-
|
|
41
|
-
T get_quantile(double fraction) const;
|
|
42
|
-
const_iterator begin() const;
|
|
43
|
-
const_iterator end() const;
|
|
44
|
-
|
|
45
|
-
private:
|
|
46
|
-
using AllocU32 = typename std::allocator_traits<A>::template rebind_alloc<uint32_t>;
|
|
47
|
-
using vector_u32 = std::vector<uint32_t, AllocU32>;
|
|
48
|
-
uint64_t n_;
|
|
49
|
-
vector_u32 levels_;
|
|
50
|
-
Container entries_;
|
|
51
|
-
|
|
52
|
-
void populate_from_sketch(const T* items, const uint32_t* levels, uint8_t num_levels);
|
|
53
|
-
T approximately_answer_positional_query(uint64_t pos) const;
|
|
54
|
-
void convert_to_preceding_cummulative();
|
|
55
|
-
uint32_t chunk_containing_pos(uint64_t pos) const;
|
|
56
|
-
uint32_t search_for_chunk_containing_pos(uint64_t pos, uint64_t l, uint64_t r) const;
|
|
57
|
-
static void merge_sorted_blocks(Container& entries, const uint32_t* levels, uint8_t num_levels, uint32_t num_items);
|
|
58
|
-
static void merge_sorted_blocks_direct(Container& orig, Container& temp, const uint32_t* levels, uint8_t starting_level, uint8_t num_levels);
|
|
59
|
-
static void merge_sorted_blocks_reversed(Container& orig, Container& temp, const uint32_t* levels, uint8_t starting_level, uint8_t num_levels);
|
|
60
|
-
static uint64_t pos_of_phi(double phi, uint64_t n);
|
|
61
|
-
|
|
62
|
-
template<typename Comparator>
|
|
63
|
-
struct compare_pair_by_first {
|
|
64
|
-
template<typename Entry1, typename Entry2>
|
|
65
|
-
bool operator()(Entry1&& a, Entry2&& b) const {
|
|
66
|
-
return Comparator()(std::forward<Entry1>(a).first, std::forward<Entry2>(b).first);
|
|
67
|
-
}
|
|
68
|
-
};
|
|
69
|
-
};
|
|
70
|
-
|
|
71
|
-
} /* namespace datasketches */
|
|
72
|
-
|
|
73
|
-
#include "kll_quantile_calculator_impl.hpp"
|
|
74
|
-
|
|
75
|
-
#endif // KLL_QUANTILE_CALCULATOR_HPP_
|
|
@@ -1,184 +0,0 @@
|
|
|
1
|
-
/*
|
|
2
|
-
* Licensed to the Apache Software Foundation (ASF) under one
|
|
3
|
-
* or more contributor license agreements. See the NOTICE file
|
|
4
|
-
* distributed with this work for additional information
|
|
5
|
-
* regarding copyright ownership. The ASF licenses this file
|
|
6
|
-
* to you under the Apache License, Version 2.0 (the
|
|
7
|
-
* "License"); you may not use this file except in compliance
|
|
8
|
-
* with the License. You may obtain a copy of the License at
|
|
9
|
-
*
|
|
10
|
-
* http://www.apache.org/licenses/LICENSE-2.0
|
|
11
|
-
*
|
|
12
|
-
* Unless required by applicable law or agreed to in writing,
|
|
13
|
-
* software distributed under the License is distributed on an
|
|
14
|
-
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
15
|
-
* KIND, either express or implied. See the License for the
|
|
16
|
-
* specific language governing permissions and limitations
|
|
17
|
-
* under the License.
|
|
18
|
-
*/
|
|
19
|
-
|
|
20
|
-
#ifndef KLL_QUANTILE_CALCULATOR_IMPL_HPP_
|
|
21
|
-
#define KLL_QUANTILE_CALCULATOR_IMPL_HPP_
|
|
22
|
-
|
|
23
|
-
#include <memory>
|
|
24
|
-
#include <cmath>
|
|
25
|
-
#include <algorithm>
|
|
26
|
-
|
|
27
|
-
#include "kll_helper.hpp"
|
|
28
|
-
|
|
29
|
-
namespace datasketches {
|
|
30
|
-
|
|
31
|
-
template<typename T, typename C, typename A>
|
|
32
|
-
template<typename S>
|
|
33
|
-
kll_quantile_calculator<T, C, A>::kll_quantile_calculator(const kll_sketch<T, C, S, A>& sketch):
|
|
34
|
-
n_(sketch.n_), levels_(sketch.num_levels_ + 1, 0, sketch.allocator_), entries_(sketch.allocator_)
|
|
35
|
-
{
|
|
36
|
-
const uint32_t num_items = sketch.levels_[sketch.num_levels_] - sketch.levels_[0];
|
|
37
|
-
if (num_items > 0) {
|
|
38
|
-
entries_.reserve(num_items);
|
|
39
|
-
populate_from_sketch(sketch.items_, sketch.levels_.data(), sketch.num_levels_);
|
|
40
|
-
if (!sketch.is_level_zero_sorted_) std::sort(entries_.begin(), entries_.begin() + levels_[1], compare_pair_by_first<C>());
|
|
41
|
-
merge_sorted_blocks(entries_, levels_.data(), static_cast<uint8_t>(levels_.size()) - 1, num_items);
|
|
42
|
-
if (!is_sorted(entries_.begin(), entries_.end(), compare_pair_by_first<C>())) throw std::logic_error("entries must be sorted");
|
|
43
|
-
convert_to_preceding_cummulative();
|
|
44
|
-
}
|
|
45
|
-
}
|
|
46
|
-
|
|
47
|
-
template<typename T, typename C, typename A>
|
|
48
|
-
T kll_quantile_calculator<T, C, A>::get_quantile(double fraction) const {
|
|
49
|
-
return approximately_answer_positional_query(pos_of_phi(fraction, n_));
|
|
50
|
-
}
|
|
51
|
-
|
|
52
|
-
template<typename T, typename C, typename A>
|
|
53
|
-
auto kll_quantile_calculator<T, C, A>::begin() const -> const_iterator {
|
|
54
|
-
return entries_.begin();
|
|
55
|
-
}
|
|
56
|
-
|
|
57
|
-
template<typename T, typename C, typename A>
|
|
58
|
-
auto kll_quantile_calculator<T, C, A>::end() const -> const_iterator {
|
|
59
|
-
return entries_.end();
|
|
60
|
-
}
|
|
61
|
-
|
|
62
|
-
template<typename T, typename C, typename A>
|
|
63
|
-
void kll_quantile_calculator<T, C, A>::populate_from_sketch(const T* items, const uint32_t* levels, uint8_t num_levels) {
|
|
64
|
-
size_t src_level = 0;
|
|
65
|
-
size_t dst_level = 0;
|
|
66
|
-
uint64_t weight = 1;
|
|
67
|
-
uint32_t offset = levels[0];
|
|
68
|
-
while (src_level < num_levels) {
|
|
69
|
-
const uint32_t from_index(levels[src_level] - offset);
|
|
70
|
-
const uint32_t to_index(levels[src_level + 1] - offset); // exclusive
|
|
71
|
-
if (from_index < to_index) { // skip empty levels
|
|
72
|
-
for (uint32_t i = from_index; i < to_index; ++i) {
|
|
73
|
-
entries_.push_back(Entry(items[i + offset], weight));
|
|
74
|
-
}
|
|
75
|
-
levels_[dst_level] = from_index;
|
|
76
|
-
levels_[dst_level + 1] = to_index;
|
|
77
|
-
dst_level++;
|
|
78
|
-
}
|
|
79
|
-
src_level++;
|
|
80
|
-
weight *= 2;
|
|
81
|
-
}
|
|
82
|
-
if (levels_.size() > static_cast<size_t>(dst_level + 1)) levels_.resize(dst_level + 1);
|
|
83
|
-
}
|
|
84
|
-
|
|
85
|
-
template<typename T, typename C, typename A>
|
|
86
|
-
T kll_quantile_calculator<T, C, A>::approximately_answer_positional_query(uint64_t pos) const {
|
|
87
|
-
if (pos >= n_) throw std::logic_error("position out of range");
|
|
88
|
-
const uint32_t num_items = levels_[levels_.size() - 1];
|
|
89
|
-
if (pos > entries_[num_items - 1].second) return entries_[num_items - 1].first;
|
|
90
|
-
const uint32_t index = chunk_containing_pos(pos);
|
|
91
|
-
return entries_[index].first;
|
|
92
|
-
}
|
|
93
|
-
|
|
94
|
-
template<typename T, typename C, typename A>
|
|
95
|
-
void kll_quantile_calculator<T, C, A>::convert_to_preceding_cummulative() {
|
|
96
|
-
uint64_t subtotal = 0;
|
|
97
|
-
for (auto& entry: entries_) {
|
|
98
|
-
const uint64_t new_subtotal = subtotal + entry.second;
|
|
99
|
-
entry.second = subtotal;
|
|
100
|
-
subtotal = new_subtotal;
|
|
101
|
-
}
|
|
102
|
-
}
|
|
103
|
-
|
|
104
|
-
template<typename T, typename C, typename A>
|
|
105
|
-
uint64_t kll_quantile_calculator<T, C, A>::pos_of_phi(double phi, uint64_t n) {
|
|
106
|
-
const uint64_t pos = static_cast<uint64_t>(std::floor(phi * n));
|
|
107
|
-
return (pos == n) ? n - 1 : pos;
|
|
108
|
-
}
|
|
109
|
-
|
|
110
|
-
template<typename T, typename C, typename A>
|
|
111
|
-
uint32_t kll_quantile_calculator<T, C, A>::chunk_containing_pos(uint64_t pos) const {
|
|
112
|
-
if (entries_.size() < 1) throw std::logic_error("array too short");
|
|
113
|
-
if (pos < entries_[0].second) throw std::logic_error("position too small");
|
|
114
|
-
if (pos > entries_[entries_.size() - 1].second) throw std::logic_error("position too large");
|
|
115
|
-
return search_for_chunk_containing_pos(pos, 0, entries_.size());
|
|
116
|
-
}
|
|
117
|
-
|
|
118
|
-
template<typename T, typename C, typename A>
|
|
119
|
-
uint32_t kll_quantile_calculator<T, C, A>::search_for_chunk_containing_pos(uint64_t pos, uint64_t l, uint64_t r) const {
|
|
120
|
-
if (l + 1 == r) {
|
|
121
|
-
return static_cast<uint32_t>(l);
|
|
122
|
-
}
|
|
123
|
-
const uint64_t m = l + (r - l) / 2;
|
|
124
|
-
if (entries_[m].second <= pos) {
|
|
125
|
-
return search_for_chunk_containing_pos(pos, m, r);
|
|
126
|
-
}
|
|
127
|
-
return search_for_chunk_containing_pos(pos, l, m);
|
|
128
|
-
}
|
|
129
|
-
|
|
130
|
-
template<typename T, typename C, typename A>
|
|
131
|
-
void kll_quantile_calculator<T, C, A>::merge_sorted_blocks(Container& entries, const uint32_t* levels, uint8_t num_levels, uint32_t num_items) {
|
|
132
|
-
if (num_levels == 1) return;
|
|
133
|
-
Container temporary(entries.get_allocator());
|
|
134
|
-
temporary.reserve(num_items);
|
|
135
|
-
merge_sorted_blocks_direct(entries, temporary, levels, 0, num_levels);
|
|
136
|
-
}
|
|
137
|
-
|
|
138
|
-
template<typename T, typename C, typename A>
|
|
139
|
-
void kll_quantile_calculator<T, C, A>::merge_sorted_blocks_direct(Container& orig, Container& temp, const uint32_t* levels,
|
|
140
|
-
uint8_t starting_level, uint8_t num_levels) {
|
|
141
|
-
if (num_levels == 1) return;
|
|
142
|
-
const uint8_t num_levels_1 = num_levels / 2;
|
|
143
|
-
const uint8_t num_levels_2 = num_levels - num_levels_1;
|
|
144
|
-
const uint8_t starting_level_1 = starting_level;
|
|
145
|
-
const uint8_t starting_level_2 = starting_level + num_levels_1;
|
|
146
|
-
const auto initial_size = temp.size();
|
|
147
|
-
merge_sorted_blocks_reversed(orig, temp, levels, starting_level_1, num_levels_1);
|
|
148
|
-
merge_sorted_blocks_reversed(orig, temp, levels, starting_level_2, num_levels_2);
|
|
149
|
-
const uint32_t num_items_1 = levels[starting_level_1 + num_levels_1] - levels[starting_level_1];
|
|
150
|
-
const auto chunk_begin = temp.begin() + initial_size;
|
|
151
|
-
std::merge(
|
|
152
|
-
std::make_move_iterator(chunk_begin), std::make_move_iterator(chunk_begin + num_items_1),
|
|
153
|
-
std::make_move_iterator(chunk_begin + num_items_1), std::make_move_iterator(temp.end()),
|
|
154
|
-
orig.begin() + levels[starting_level], compare_pair_by_first<C>()
|
|
155
|
-
);
|
|
156
|
-
temp.erase(chunk_begin, temp.end());
|
|
157
|
-
}
|
|
158
|
-
|
|
159
|
-
template<typename T, typename C, typename A>
|
|
160
|
-
void kll_quantile_calculator<T, C, A>::merge_sorted_blocks_reversed(Container& orig, Container& temp, const uint32_t* levels,
|
|
161
|
-
uint8_t starting_level, uint8_t num_levels) {
|
|
162
|
-
if (num_levels == 1) {
|
|
163
|
-
std::move(orig.begin() + levels[starting_level], orig.begin() + levels[starting_level + 1], std::back_inserter(temp));
|
|
164
|
-
return;
|
|
165
|
-
}
|
|
166
|
-
const uint8_t num_levels_1 = num_levels / 2;
|
|
167
|
-
const uint8_t num_levels_2 = num_levels - num_levels_1;
|
|
168
|
-
const uint8_t starting_level_1 = starting_level;
|
|
169
|
-
const uint8_t starting_level_2 = starting_level + num_levels_1;
|
|
170
|
-
merge_sorted_blocks_direct(orig, temp, levels, starting_level_1, num_levels_1);
|
|
171
|
-
merge_sorted_blocks_direct(orig, temp, levels, starting_level_2, num_levels_2);
|
|
172
|
-
std::merge(
|
|
173
|
-
std::make_move_iterator(orig.begin() + levels[starting_level_1]),
|
|
174
|
-
std::make_move_iterator(orig.begin() + levels[starting_level_1 + num_levels_1]),
|
|
175
|
-
std::make_move_iterator(orig.begin() + levels[starting_level_2]),
|
|
176
|
-
std::make_move_iterator(orig.begin() + levels[starting_level_2 + num_levels_2]),
|
|
177
|
-
std::back_inserter(temp),
|
|
178
|
-
compare_pair_by_first<C>()
|
|
179
|
-
);
|
|
180
|
-
}
|
|
181
|
-
|
|
182
|
-
} /* namespace datasketches */
|
|
183
|
-
|
|
184
|
-
#endif // KLL_QUANTILE_CALCULATOR_IMPL_HPP_
|
|
@@ -1,69 +0,0 @@
|
|
|
1
|
-
/*
|
|
2
|
-
* Licensed to the Apache Software Foundation (ASF) under one
|
|
3
|
-
* or more contributor license agreements. See the NOTICE file
|
|
4
|
-
* distributed with this work for additional information
|
|
5
|
-
* regarding copyright ownership. The ASF licenses this file
|
|
6
|
-
* to you under the Apache License, Version 2.0 (the
|
|
7
|
-
* "License"); you may not use this file except in compliance
|
|
8
|
-
* with the License. You may obtain a copy of the License at
|
|
9
|
-
*
|
|
10
|
-
* http://www.apache.org/licenses/LICENSE-2.0
|
|
11
|
-
*
|
|
12
|
-
* Unless required by applicable law or agreed to in writing,
|
|
13
|
-
* software distributed under the License is distributed on an
|
|
14
|
-
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
15
|
-
* KIND, either express or implied. See the License for the
|
|
16
|
-
* specific language governing permissions and limitations
|
|
17
|
-
* under the License.
|
|
18
|
-
*/
|
|
19
|
-
|
|
20
|
-
#ifndef REQ_QUANTILE_CALCULATOR_HPP_
|
|
21
|
-
#define REQ_QUANTILE_CALCULATOR_HPP_
|
|
22
|
-
|
|
23
|
-
#include <functional>
|
|
24
|
-
|
|
25
|
-
namespace datasketches {
|
|
26
|
-
|
|
27
|
-
template<
|
|
28
|
-
typename T,
|
|
29
|
-
typename Comparator,
|
|
30
|
-
typename Allocator
|
|
31
|
-
>
|
|
32
|
-
class req_quantile_calculator {
|
|
33
|
-
public:
|
|
34
|
-
req_quantile_calculator(uint64_t n, const Allocator& allocator);
|
|
35
|
-
|
|
36
|
-
void add(const T* begin, const T* end, uint8_t lg_weight);
|
|
37
|
-
|
|
38
|
-
template<bool inclusive>
|
|
39
|
-
void convert_to_cummulative();
|
|
40
|
-
|
|
41
|
-
const T* get_quantile(double rank) const;
|
|
42
|
-
|
|
43
|
-
private:
|
|
44
|
-
using Entry = std::pair<const T*, uint64_t>;
|
|
45
|
-
using AllocEntry = typename std::allocator_traits<Allocator>::template rebind_alloc<Entry>;
|
|
46
|
-
using Container = std::vector<Entry, AllocEntry>;
|
|
47
|
-
|
|
48
|
-
template<typename C>
|
|
49
|
-
struct compare_pairs_by_first_ptr {
|
|
50
|
-
bool operator()(const Entry& a, const Entry& b) {
|
|
51
|
-
return C()(*a.first, *b.first);
|
|
52
|
-
}
|
|
53
|
-
};
|
|
54
|
-
|
|
55
|
-
struct compare_pairs_by_second {
|
|
56
|
-
bool operator()(const Entry& a, const Entry& b) {
|
|
57
|
-
return a.second < b.second;
|
|
58
|
-
}
|
|
59
|
-
};
|
|
60
|
-
|
|
61
|
-
uint64_t n_;
|
|
62
|
-
Container entries_;
|
|
63
|
-
};
|
|
64
|
-
|
|
65
|
-
} /* namespace datasketches */
|
|
66
|
-
|
|
67
|
-
#include "req_quantile_calculator_impl.hpp"
|
|
68
|
-
|
|
69
|
-
#endif
|
|
@@ -1,60 +0,0 @@
|
|
|
1
|
-
/*
|
|
2
|
-
* Licensed to the Apache Software Foundation (ASF) under one
|
|
3
|
-
* or more contributor license agreements. See the NOTICE file
|
|
4
|
-
* distributed with this work for additional information
|
|
5
|
-
* regarding copyright ownership. The ASF licenses this file
|
|
6
|
-
* to you under the Apache License, Version 2.0 (the
|
|
7
|
-
* "License"); you may not use this file except in compliance
|
|
8
|
-
* with the License. You may obtain a copy of the License at
|
|
9
|
-
*
|
|
10
|
-
* http://www.apache.org/licenses/LICENSE-2.0
|
|
11
|
-
*
|
|
12
|
-
* Unless required by applicable law or agreed to in writing,
|
|
13
|
-
* software distributed under the License is distributed on an
|
|
14
|
-
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
15
|
-
* KIND, either express or implied. See the License for the
|
|
16
|
-
* specific language governing permissions and limitations
|
|
17
|
-
* under the License.
|
|
18
|
-
*/
|
|
19
|
-
|
|
20
|
-
#ifndef REQ_QUANTILE_CALCULATOR_IMPL_HPP_
|
|
21
|
-
#define REQ_QUANTILE_CALCULATOR_IMPL_HPP_
|
|
22
|
-
|
|
23
|
-
namespace datasketches {
|
|
24
|
-
|
|
25
|
-
template<typename T, typename C, typename A>
|
|
26
|
-
req_quantile_calculator<T, C, A>::req_quantile_calculator(uint64_t n, const A& allocator):
|
|
27
|
-
n_(n),
|
|
28
|
-
entries_(allocator)
|
|
29
|
-
{}
|
|
30
|
-
|
|
31
|
-
template<typename T, typename C, typename A>
|
|
32
|
-
void req_quantile_calculator<T, C, A>::add(const T* begin, const T* end, uint8_t lg_weight) {
|
|
33
|
-
if (entries_.capacity() < entries_.size() + std::distance(begin, end)) entries_.reserve(entries_.size() + std::distance(begin, end));
|
|
34
|
-
const size_t size_before = entries_.size();
|
|
35
|
-
for (auto it = begin; it != end; ++it) entries_.push_back(Entry(it, 1 << lg_weight));
|
|
36
|
-
if (size_before > 0) std::inplace_merge(entries_.begin(), entries_.begin() + size_before, entries_.end(), compare_pairs_by_first_ptr<C>());
|
|
37
|
-
}
|
|
38
|
-
|
|
39
|
-
template<typename T, typename C, typename A>
|
|
40
|
-
template<bool inclusive>
|
|
41
|
-
void req_quantile_calculator<T, C, A>::convert_to_cummulative() {
|
|
42
|
-
uint64_t subtotal = 0;
|
|
43
|
-
for (auto& entry: entries_) {
|
|
44
|
-
const uint64_t new_subtotal = subtotal + entry.second;
|
|
45
|
-
entry.second = inclusive ? new_subtotal : subtotal;
|
|
46
|
-
subtotal = new_subtotal;
|
|
47
|
-
}
|
|
48
|
-
}
|
|
49
|
-
|
|
50
|
-
template<typename T, typename C, typename A>
|
|
51
|
-
const T* req_quantile_calculator<T, C, A>::get_quantile(double rank) const {
|
|
52
|
-
uint64_t weight = static_cast<uint64_t>(rank * n_);
|
|
53
|
-
auto it = std::lower_bound(entries_.begin(), entries_.end(), Entry(nullptr, weight), compare_pairs_by_second());
|
|
54
|
-
if (it == entries_.end()) return entries_[entries_.size() - 1].first;
|
|
55
|
-
return it->first;
|
|
56
|
-
}
|
|
57
|
-
|
|
58
|
-
} /* namespace datasketches */
|
|
59
|
-
|
|
60
|
-
#endif
|