datasketches 0.2.2 → 0.2.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +12 -0
- data/LICENSE +40 -3
- data/NOTICE +1 -1
- data/README.md +8 -8
- data/ext/datasketches/kll_wrapper.cpp +5 -1
- data/ext/datasketches/theta_wrapper.cpp +20 -4
- data/lib/datasketches/version.rb +1 -1
- data/vendor/datasketches-cpp/CMakeLists.txt +27 -5
- data/vendor/datasketches-cpp/LICENSE +40 -3
- data/vendor/datasketches-cpp/MANIFEST.in +3 -0
- data/vendor/datasketches-cpp/NOTICE +1 -1
- data/vendor/datasketches-cpp/README.md +76 -9
- data/vendor/datasketches-cpp/cmake/DataSketchesConfig.cmake.in +10 -0
- data/vendor/datasketches-cpp/common/CMakeLists.txt +18 -13
- data/vendor/datasketches-cpp/common/include/binomial_bounds.hpp +1 -0
- data/vendor/datasketches-cpp/common/include/common_defs.hpp +16 -0
- data/vendor/datasketches-cpp/{kll → common}/include/kolmogorov_smirnov.hpp +5 -3
- data/vendor/datasketches-cpp/{kll → common}/include/kolmogorov_smirnov_impl.hpp +13 -16
- data/vendor/datasketches-cpp/common/include/quantile_sketch_sorted_view.hpp +121 -0
- data/vendor/datasketches-cpp/common/include/quantile_sketch_sorted_view_impl.hpp +91 -0
- data/vendor/datasketches-cpp/common/test/test_type.hpp +2 -0
- data/vendor/datasketches-cpp/cpc/CMakeLists.txt +15 -35
- data/vendor/datasketches-cpp/cpc/include/cpc_common.hpp +10 -3
- data/vendor/datasketches-cpp/cpc/include/cpc_compressor_impl.hpp +1 -0
- data/vendor/datasketches-cpp/cpc/include/cpc_confidence.hpp +1 -0
- data/vendor/datasketches-cpp/cpc/include/cpc_sketch.hpp +1 -1
- data/vendor/datasketches-cpp/cpc/include/cpc_sketch_impl.hpp +5 -3
- data/vendor/datasketches-cpp/cpc/include/cpc_union.hpp +1 -1
- data/vendor/datasketches-cpp/cpc/include/cpc_union_impl.hpp +10 -6
- data/vendor/datasketches-cpp/cpc/test/cpc_sketch_allocation_test.cpp +17 -0
- data/vendor/datasketches-cpp/cpc/test/cpc_sketch_test.cpp +1 -0
- data/vendor/datasketches-cpp/cpc/test/cpc_union_test.cpp +2 -0
- data/vendor/datasketches-cpp/fi/CMakeLists.txt +5 -15
- data/vendor/datasketches-cpp/fi/include/frequent_items_sketch.hpp +37 -5
- data/vendor/datasketches-cpp/fi/include/frequent_items_sketch_impl.hpp +30 -12
- data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_custom_type_test.cpp +2 -1
- data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_test.cpp +1 -0
- data/vendor/datasketches-cpp/hll/CMakeLists.txt +33 -56
- data/vendor/datasketches-cpp/hll/include/AuxHashMap-internal.hpp +2 -0
- data/vendor/datasketches-cpp/hll/include/CompositeInterpolationXTable-internal.hpp +1 -0
- data/vendor/datasketches-cpp/hll/include/CouponHashSet-internal.hpp +2 -2
- data/vendor/datasketches-cpp/hll/include/CouponList-internal.hpp +1 -0
- data/vendor/datasketches-cpp/hll/include/HllSketch-internal.hpp +6 -4
- data/vendor/datasketches-cpp/hll/include/HllSketchImpl-internal.hpp +2 -0
- data/vendor/datasketches-cpp/hll/include/HllSketchImplFactory.hpp +2 -0
- data/vendor/datasketches-cpp/hll/test/AuxHashMapTest.cpp +1 -0
- data/vendor/datasketches-cpp/hll/test/CouponHashSetTest.cpp +1 -0
- data/vendor/datasketches-cpp/hll/test/CouponListTest.cpp +2 -0
- data/vendor/datasketches-cpp/hll/test/HllArrayTest.cpp +1 -0
- data/vendor/datasketches-cpp/hll/test/HllSketchTest.cpp +59 -0
- data/vendor/datasketches-cpp/hll/test/HllUnionTest.cpp +2 -0
- data/vendor/datasketches-cpp/hll/test/TablesTest.cpp +1 -0
- data/vendor/datasketches-cpp/kll/CMakeLists.txt +5 -19
- data/vendor/datasketches-cpp/kll/include/kll_helper.hpp +0 -4
- data/vendor/datasketches-cpp/kll/include/kll_helper_impl.hpp +3 -0
- data/vendor/datasketches-cpp/kll/include/kll_sketch.hpp +103 -44
- data/vendor/datasketches-cpp/kll/include/kll_sketch_impl.hpp +110 -130
- data/vendor/datasketches-cpp/kll/test/kll_sketch_test.cpp +156 -23
- data/vendor/datasketches-cpp/kll/test/kolmogorov_smirnov_test.cpp +1 -1
- data/vendor/datasketches-cpp/pyproject.toml +4 -2
- data/vendor/datasketches-cpp/python/CMakeLists.txt +17 -6
- data/vendor/datasketches-cpp/python/README.md +57 -50
- data/vendor/datasketches-cpp/python/pybind11Path.cmd +3 -0
- data/vendor/datasketches-cpp/python/src/cpc_wrapper.cpp +1 -1
- data/vendor/datasketches-cpp/python/src/datasketches.cpp +4 -0
- data/vendor/datasketches-cpp/python/src/fi_wrapper.cpp +6 -1
- data/vendor/datasketches-cpp/python/src/kll_wrapper.cpp +49 -14
- data/vendor/datasketches-cpp/python/src/ks_wrapper.cpp +68 -0
- data/vendor/datasketches-cpp/python/src/quantiles_wrapper.cpp +240 -0
- data/vendor/datasketches-cpp/python/src/req_wrapper.cpp +9 -2
- data/vendor/datasketches-cpp/python/src/theta_wrapper.cpp +2 -2
- data/vendor/datasketches-cpp/python/src/vector_of_kll.cpp +12 -5
- data/vendor/datasketches-cpp/python/tests/kll_test.py +12 -6
- data/vendor/datasketches-cpp/python/tests/quantiles_test.py +126 -0
- data/vendor/datasketches-cpp/python/tests/req_test.py +2 -2
- data/vendor/datasketches-cpp/python/tests/vector_of_kll_test.py +4 -4
- data/vendor/datasketches-cpp/quantiles/CMakeLists.txt +42 -0
- data/vendor/datasketches-cpp/quantiles/include/quantiles_sketch.hpp +641 -0
- data/vendor/datasketches-cpp/quantiles/include/quantiles_sketch_impl.hpp +1309 -0
- data/vendor/datasketches-cpp/quantiles/test/CMakeLists.txt +44 -0
- data/vendor/datasketches-cpp/quantiles/test/Qk128_n1000_v0.3.0.sk +0 -0
- data/vendor/datasketches-cpp/quantiles/test/Qk128_n1000_v0.6.0.sk +0 -0
- data/vendor/datasketches-cpp/quantiles/test/Qk128_n1000_v0.8.0.sk +0 -0
- data/vendor/datasketches-cpp/quantiles/test/Qk128_n1000_v0.8.3.sk +0 -0
- data/vendor/datasketches-cpp/quantiles/test/Qk128_n50_v0.3.0.sk +0 -0
- data/vendor/datasketches-cpp/quantiles/test/Qk128_n50_v0.6.0.sk +0 -0
- data/vendor/datasketches-cpp/quantiles/test/Qk128_n50_v0.8.0.sk +0 -0
- data/vendor/datasketches-cpp/quantiles/test/Qk128_n50_v0.8.3.sk +0 -0
- data/vendor/datasketches-cpp/quantiles/test/kolmogorov_smirnov_test.cpp +110 -0
- data/vendor/datasketches-cpp/quantiles/test/quantiles_compatibility_test.cpp +129 -0
- data/vendor/datasketches-cpp/quantiles/test/quantiles_sketch_test.cpp +912 -0
- data/vendor/datasketches-cpp/req/CMakeLists.txt +6 -21
- data/vendor/datasketches-cpp/req/include/req_common.hpp +0 -5
- data/vendor/datasketches-cpp/req/include/req_compactor_impl.hpp +3 -2
- data/vendor/datasketches-cpp/req/include/req_sketch.hpp +62 -23
- data/vendor/datasketches-cpp/req/include/req_sketch_impl.hpp +66 -61
- data/vendor/datasketches-cpp/req/test/req_sketch_test.cpp +5 -0
- data/vendor/datasketches-cpp/sampling/CMakeLists.txt +5 -9
- data/vendor/datasketches-cpp/sampling/include/var_opt_sketch.hpp +54 -12
- data/vendor/datasketches-cpp/sampling/include/var_opt_sketch_impl.hpp +45 -34
- data/vendor/datasketches-cpp/sampling/include/var_opt_union.hpp +41 -6
- data/vendor/datasketches-cpp/sampling/include/var_opt_union_impl.hpp +33 -15
- data/vendor/datasketches-cpp/sampling/test/var_opt_allocation_test.cpp +2 -2
- data/vendor/datasketches-cpp/sampling/test/var_opt_sketch_test.cpp +1 -0
- data/vendor/datasketches-cpp/sampling/test/var_opt_union_test.cpp +1 -0
- data/vendor/datasketches-cpp/setup.py +10 -7
- data/vendor/datasketches-cpp/theta/CMakeLists.txt +26 -45
- data/vendor/datasketches-cpp/theta/include/bounds_on_ratios_in_sampled_sets.hpp +1 -0
- data/vendor/datasketches-cpp/theta/include/compact_theta_sketch_parser_impl.hpp +92 -23
- data/vendor/datasketches-cpp/theta/include/theta_constants.hpp +9 -4
- data/vendor/datasketches-cpp/theta/include/theta_helpers.hpp +15 -0
- data/vendor/datasketches-cpp/theta/include/theta_intersection_base_impl.hpp +7 -6
- data/vendor/datasketches-cpp/theta/include/theta_set_difference_base_impl.hpp +3 -2
- data/vendor/datasketches-cpp/theta/include/theta_sketch.hpp +32 -15
- data/vendor/datasketches-cpp/theta/include/theta_sketch_impl.hpp +150 -93
- data/vendor/datasketches-cpp/theta/include/theta_union.hpp +6 -1
- data/vendor/datasketches-cpp/theta/include/theta_union_base.hpp +3 -1
- data/vendor/datasketches-cpp/theta/include/theta_union_base_impl.hpp +9 -2
- data/vendor/datasketches-cpp/theta/include/theta_union_impl.hpp +8 -5
- data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base.hpp +9 -5
- data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base_impl.hpp +39 -10
- data/vendor/datasketches-cpp/theta/test/CMakeLists.txt +1 -0
- data/vendor/datasketches-cpp/theta/test/theta_a_not_b_test.cpp +2 -0
- data/vendor/datasketches-cpp/theta/test/theta_compact_empty_from_java_v1.sk +0 -0
- data/vendor/datasketches-cpp/theta/test/theta_compact_empty_from_java_v2.sk +0 -0
- data/vendor/datasketches-cpp/theta/test/theta_compact_estimation_from_java_v1.sk +0 -0
- data/vendor/datasketches-cpp/theta/test/theta_compact_estimation_from_java_v2.sk +0 -0
- data/vendor/datasketches-cpp/theta/test/theta_compact_exact_from_java.sk +0 -0
- data/vendor/datasketches-cpp/theta/test/theta_intersection_test.cpp +2 -0
- data/vendor/datasketches-cpp/theta/test/theta_setop_test.cpp +446 -0
- data/vendor/datasketches-cpp/theta/test/theta_sketch_test.cpp +429 -1
- data/vendor/datasketches-cpp/theta/test/theta_union_test.cpp +25 -11
- data/vendor/datasketches-cpp/tuple/CMakeLists.txt +18 -33
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_sketch.hpp +1 -1
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_sketch_impl.hpp +3 -3
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_union.hpp +1 -1
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_union_impl.hpp +3 -3
- data/vendor/datasketches-cpp/tuple/include/tuple_sketch.hpp +29 -9
- data/vendor/datasketches-cpp/tuple/include/tuple_sketch_impl.hpp +34 -14
- data/vendor/datasketches-cpp/tuple/include/tuple_union.hpp +6 -1
- data/vendor/datasketches-cpp/tuple/include/tuple_union_impl.hpp +8 -3
- data/vendor/datasketches-cpp/tuple/test/array_of_doubles_sketch_test.cpp +16 -0
- data/vendor/datasketches-cpp/tuple/test/tuple_a_not_b_test.cpp +1 -0
- data/vendor/datasketches-cpp/tuple/test/tuple_intersection_test.cpp +1 -0
- data/vendor/datasketches-cpp/tuple/test/tuple_sketch_test.cpp +46 -8
- data/vendor/datasketches-cpp/tuple/test/tuple_union_test.cpp +8 -0
- metadata +34 -12
- data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator.hpp +0 -75
- data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator_impl.hpp +0 -184
- data/vendor/datasketches-cpp/req/include/req_quantile_calculator.hpp +0 -69
- data/vendor/datasketches-cpp/req/include/req_quantile_calculator_impl.hpp +0 -60
- data/vendor/datasketches-cpp/theta/test/theta_update_empty_from_java.sk +0 -0
- data/vendor/datasketches-cpp/theta/test/theta_update_estimation_from_java.sk +0 -0
|
@@ -1,75 +0,0 @@
|
|
|
1
|
-
/*
|
|
2
|
-
* Licensed to the Apache Software Foundation (ASF) under one
|
|
3
|
-
* or more contributor license agreements. See the NOTICE file
|
|
4
|
-
* distributed with this work for additional information
|
|
5
|
-
* regarding copyright ownership. The ASF licenses this file
|
|
6
|
-
* to you under the Apache License, Version 2.0 (the
|
|
7
|
-
* "License"); you may not use this file except in compliance
|
|
8
|
-
* with the License. You may obtain a copy of the License at
|
|
9
|
-
*
|
|
10
|
-
* http://www.apache.org/licenses/LICENSE-2.0
|
|
11
|
-
*
|
|
12
|
-
* Unless required by applicable law or agreed to in writing,
|
|
13
|
-
* software distributed under the License is distributed on an
|
|
14
|
-
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
15
|
-
* KIND, either express or implied. See the License for the
|
|
16
|
-
* specific language governing permissions and limitations
|
|
17
|
-
* under the License.
|
|
18
|
-
*/
|
|
19
|
-
|
|
20
|
-
#ifndef KLL_QUANTILE_CALCULATOR_HPP_
|
|
21
|
-
#define KLL_QUANTILE_CALCULATOR_HPP_
|
|
22
|
-
|
|
23
|
-
#include <memory>
|
|
24
|
-
|
|
25
|
-
namespace datasketches {
|
|
26
|
-
|
|
27
|
-
// forward declaration
|
|
28
|
-
template<typename T, typename C, typename S, typename A> class kll_sketch;
|
|
29
|
-
|
|
30
|
-
template <typename T, typename C, typename A>
|
|
31
|
-
class kll_quantile_calculator {
|
|
32
|
-
public:
|
|
33
|
-
using Entry = std::pair<T, uint64_t>;
|
|
34
|
-
using AllocEntry = typename std::allocator_traits<A>::template rebind_alloc<Entry>;
|
|
35
|
-
using Container = std::vector<Entry, AllocEntry>;
|
|
36
|
-
using const_iterator = typename Container::const_iterator;
|
|
37
|
-
|
|
38
|
-
template<typename S>
|
|
39
|
-
kll_quantile_calculator(const kll_sketch<T, C, S, A>& sketch);
|
|
40
|
-
|
|
41
|
-
T get_quantile(double fraction) const;
|
|
42
|
-
const_iterator begin() const;
|
|
43
|
-
const_iterator end() const;
|
|
44
|
-
|
|
45
|
-
private:
|
|
46
|
-
using AllocU32 = typename std::allocator_traits<A>::template rebind_alloc<uint32_t>;
|
|
47
|
-
using vector_u32 = std::vector<uint32_t, AllocU32>;
|
|
48
|
-
uint64_t n_;
|
|
49
|
-
vector_u32 levels_;
|
|
50
|
-
Container entries_;
|
|
51
|
-
|
|
52
|
-
void populate_from_sketch(const T* items, const uint32_t* levels, uint8_t num_levels);
|
|
53
|
-
T approximately_answer_positional_query(uint64_t pos) const;
|
|
54
|
-
void convert_to_preceding_cummulative();
|
|
55
|
-
uint32_t chunk_containing_pos(uint64_t pos) const;
|
|
56
|
-
uint32_t search_for_chunk_containing_pos(uint64_t pos, uint64_t l, uint64_t r) const;
|
|
57
|
-
static void merge_sorted_blocks(Container& entries, const uint32_t* levels, uint8_t num_levels, uint32_t num_items);
|
|
58
|
-
static void merge_sorted_blocks_direct(Container& orig, Container& temp, const uint32_t* levels, uint8_t starting_level, uint8_t num_levels);
|
|
59
|
-
static void merge_sorted_blocks_reversed(Container& orig, Container& temp, const uint32_t* levels, uint8_t starting_level, uint8_t num_levels);
|
|
60
|
-
static uint64_t pos_of_phi(double phi, uint64_t n);
|
|
61
|
-
|
|
62
|
-
template<typename Comparator>
|
|
63
|
-
struct compare_pair_by_first {
|
|
64
|
-
template<typename Entry1, typename Entry2>
|
|
65
|
-
bool operator()(Entry1&& a, Entry2&& b) const {
|
|
66
|
-
return Comparator()(std::forward<Entry1>(a).first, std::forward<Entry2>(b).first);
|
|
67
|
-
}
|
|
68
|
-
};
|
|
69
|
-
};
|
|
70
|
-
|
|
71
|
-
} /* namespace datasketches */
|
|
72
|
-
|
|
73
|
-
#include "kll_quantile_calculator_impl.hpp"
|
|
74
|
-
|
|
75
|
-
#endif // KLL_QUANTILE_CALCULATOR_HPP_
|
|
@@ -1,184 +0,0 @@
|
|
|
1
|
-
/*
|
|
2
|
-
* Licensed to the Apache Software Foundation (ASF) under one
|
|
3
|
-
* or more contributor license agreements. See the NOTICE file
|
|
4
|
-
* distributed with this work for additional information
|
|
5
|
-
* regarding copyright ownership. The ASF licenses this file
|
|
6
|
-
* to you under the Apache License, Version 2.0 (the
|
|
7
|
-
* "License"); you may not use this file except in compliance
|
|
8
|
-
* with the License. You may obtain a copy of the License at
|
|
9
|
-
*
|
|
10
|
-
* http://www.apache.org/licenses/LICENSE-2.0
|
|
11
|
-
*
|
|
12
|
-
* Unless required by applicable law or agreed to in writing,
|
|
13
|
-
* software distributed under the License is distributed on an
|
|
14
|
-
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
15
|
-
* KIND, either express or implied. See the License for the
|
|
16
|
-
* specific language governing permissions and limitations
|
|
17
|
-
* under the License.
|
|
18
|
-
*/
|
|
19
|
-
|
|
20
|
-
#ifndef KLL_QUANTILE_CALCULATOR_IMPL_HPP_
|
|
21
|
-
#define KLL_QUANTILE_CALCULATOR_IMPL_HPP_
|
|
22
|
-
|
|
23
|
-
#include <memory>
|
|
24
|
-
#include <cmath>
|
|
25
|
-
#include <algorithm>
|
|
26
|
-
|
|
27
|
-
#include "kll_helper.hpp"
|
|
28
|
-
|
|
29
|
-
namespace datasketches {
|
|
30
|
-
|
|
31
|
-
template<typename T, typename C, typename A>
|
|
32
|
-
template<typename S>
|
|
33
|
-
kll_quantile_calculator<T, C, A>::kll_quantile_calculator(const kll_sketch<T, C, S, A>& sketch):
|
|
34
|
-
n_(sketch.n_), levels_(sketch.num_levels_ + 1, 0, sketch.allocator_), entries_(sketch.allocator_)
|
|
35
|
-
{
|
|
36
|
-
const uint32_t num_items = sketch.levels_[sketch.num_levels_] - sketch.levels_[0];
|
|
37
|
-
if (num_items > 0) {
|
|
38
|
-
entries_.reserve(num_items);
|
|
39
|
-
populate_from_sketch(sketch.items_, sketch.levels_.data(), sketch.num_levels_);
|
|
40
|
-
if (!sketch.is_level_zero_sorted_) std::sort(entries_.begin(), entries_.begin() + levels_[1], compare_pair_by_first<C>());
|
|
41
|
-
merge_sorted_blocks(entries_, levels_.data(), static_cast<uint8_t>(levels_.size()) - 1, num_items);
|
|
42
|
-
if (!is_sorted(entries_.begin(), entries_.end(), compare_pair_by_first<C>())) throw std::logic_error("entries must be sorted");
|
|
43
|
-
convert_to_preceding_cummulative();
|
|
44
|
-
}
|
|
45
|
-
}
|
|
46
|
-
|
|
47
|
-
template<typename T, typename C, typename A>
|
|
48
|
-
T kll_quantile_calculator<T, C, A>::get_quantile(double fraction) const {
|
|
49
|
-
return approximately_answer_positional_query(pos_of_phi(fraction, n_));
|
|
50
|
-
}
|
|
51
|
-
|
|
52
|
-
template<typename T, typename C, typename A>
|
|
53
|
-
auto kll_quantile_calculator<T, C, A>::begin() const -> const_iterator {
|
|
54
|
-
return entries_.begin();
|
|
55
|
-
}
|
|
56
|
-
|
|
57
|
-
template<typename T, typename C, typename A>
|
|
58
|
-
auto kll_quantile_calculator<T, C, A>::end() const -> const_iterator {
|
|
59
|
-
return entries_.end();
|
|
60
|
-
}
|
|
61
|
-
|
|
62
|
-
template<typename T, typename C, typename A>
|
|
63
|
-
void kll_quantile_calculator<T, C, A>::populate_from_sketch(const T* items, const uint32_t* levels, uint8_t num_levels) {
|
|
64
|
-
size_t src_level = 0;
|
|
65
|
-
size_t dst_level = 0;
|
|
66
|
-
uint64_t weight = 1;
|
|
67
|
-
uint32_t offset = levels[0];
|
|
68
|
-
while (src_level < num_levels) {
|
|
69
|
-
const uint32_t from_index(levels[src_level] - offset);
|
|
70
|
-
const uint32_t to_index(levels[src_level + 1] - offset); // exclusive
|
|
71
|
-
if (from_index < to_index) { // skip empty levels
|
|
72
|
-
for (uint32_t i = from_index; i < to_index; ++i) {
|
|
73
|
-
entries_.push_back(Entry(items[i + offset], weight));
|
|
74
|
-
}
|
|
75
|
-
levels_[dst_level] = from_index;
|
|
76
|
-
levels_[dst_level + 1] = to_index;
|
|
77
|
-
dst_level++;
|
|
78
|
-
}
|
|
79
|
-
src_level++;
|
|
80
|
-
weight *= 2;
|
|
81
|
-
}
|
|
82
|
-
if (levels_.size() > static_cast<size_t>(dst_level + 1)) levels_.resize(dst_level + 1);
|
|
83
|
-
}
|
|
84
|
-
|
|
85
|
-
template<typename T, typename C, typename A>
|
|
86
|
-
T kll_quantile_calculator<T, C, A>::approximately_answer_positional_query(uint64_t pos) const {
|
|
87
|
-
if (pos >= n_) throw std::logic_error("position out of range");
|
|
88
|
-
const uint32_t num_items = levels_[levels_.size() - 1];
|
|
89
|
-
if (pos > entries_[num_items - 1].second) return entries_[num_items - 1].first;
|
|
90
|
-
const uint32_t index = chunk_containing_pos(pos);
|
|
91
|
-
return entries_[index].first;
|
|
92
|
-
}
|
|
93
|
-
|
|
94
|
-
template<typename T, typename C, typename A>
|
|
95
|
-
void kll_quantile_calculator<T, C, A>::convert_to_preceding_cummulative() {
|
|
96
|
-
uint64_t subtotal = 0;
|
|
97
|
-
for (auto& entry: entries_) {
|
|
98
|
-
const uint64_t new_subtotal = subtotal + entry.second;
|
|
99
|
-
entry.second = subtotal;
|
|
100
|
-
subtotal = new_subtotal;
|
|
101
|
-
}
|
|
102
|
-
}
|
|
103
|
-
|
|
104
|
-
template<typename T, typename C, typename A>
|
|
105
|
-
uint64_t kll_quantile_calculator<T, C, A>::pos_of_phi(double phi, uint64_t n) {
|
|
106
|
-
const uint64_t pos = static_cast<uint64_t>(std::floor(phi * n));
|
|
107
|
-
return (pos == n) ? n - 1 : pos;
|
|
108
|
-
}
|
|
109
|
-
|
|
110
|
-
template<typename T, typename C, typename A>
|
|
111
|
-
uint32_t kll_quantile_calculator<T, C, A>::chunk_containing_pos(uint64_t pos) const {
|
|
112
|
-
if (entries_.size() < 1) throw std::logic_error("array too short");
|
|
113
|
-
if (pos < entries_[0].second) throw std::logic_error("position too small");
|
|
114
|
-
if (pos > entries_[entries_.size() - 1].second) throw std::logic_error("position too large");
|
|
115
|
-
return search_for_chunk_containing_pos(pos, 0, entries_.size());
|
|
116
|
-
}
|
|
117
|
-
|
|
118
|
-
template<typename T, typename C, typename A>
|
|
119
|
-
uint32_t kll_quantile_calculator<T, C, A>::search_for_chunk_containing_pos(uint64_t pos, uint64_t l, uint64_t r) const {
|
|
120
|
-
if (l + 1 == r) {
|
|
121
|
-
return static_cast<uint32_t>(l);
|
|
122
|
-
}
|
|
123
|
-
const uint64_t m = l + (r - l) / 2;
|
|
124
|
-
if (entries_[m].second <= pos) {
|
|
125
|
-
return search_for_chunk_containing_pos(pos, m, r);
|
|
126
|
-
}
|
|
127
|
-
return search_for_chunk_containing_pos(pos, l, m);
|
|
128
|
-
}
|
|
129
|
-
|
|
130
|
-
template<typename T, typename C, typename A>
|
|
131
|
-
void kll_quantile_calculator<T, C, A>::merge_sorted_blocks(Container& entries, const uint32_t* levels, uint8_t num_levels, uint32_t num_items) {
|
|
132
|
-
if (num_levels == 1) return;
|
|
133
|
-
Container temporary(entries.get_allocator());
|
|
134
|
-
temporary.reserve(num_items);
|
|
135
|
-
merge_sorted_blocks_direct(entries, temporary, levels, 0, num_levels);
|
|
136
|
-
}
|
|
137
|
-
|
|
138
|
-
template<typename T, typename C, typename A>
|
|
139
|
-
void kll_quantile_calculator<T, C, A>::merge_sorted_blocks_direct(Container& orig, Container& temp, const uint32_t* levels,
|
|
140
|
-
uint8_t starting_level, uint8_t num_levels) {
|
|
141
|
-
if (num_levels == 1) return;
|
|
142
|
-
const uint8_t num_levels_1 = num_levels / 2;
|
|
143
|
-
const uint8_t num_levels_2 = num_levels - num_levels_1;
|
|
144
|
-
const uint8_t starting_level_1 = starting_level;
|
|
145
|
-
const uint8_t starting_level_2 = starting_level + num_levels_1;
|
|
146
|
-
const auto initial_size = temp.size();
|
|
147
|
-
merge_sorted_blocks_reversed(orig, temp, levels, starting_level_1, num_levels_1);
|
|
148
|
-
merge_sorted_blocks_reversed(orig, temp, levels, starting_level_2, num_levels_2);
|
|
149
|
-
const uint32_t num_items_1 = levels[starting_level_1 + num_levels_1] - levels[starting_level_1];
|
|
150
|
-
const auto chunk_begin = temp.begin() + initial_size;
|
|
151
|
-
std::merge(
|
|
152
|
-
std::make_move_iterator(chunk_begin), std::make_move_iterator(chunk_begin + num_items_1),
|
|
153
|
-
std::make_move_iterator(chunk_begin + num_items_1), std::make_move_iterator(temp.end()),
|
|
154
|
-
orig.begin() + levels[starting_level], compare_pair_by_first<C>()
|
|
155
|
-
);
|
|
156
|
-
temp.erase(chunk_begin, temp.end());
|
|
157
|
-
}
|
|
158
|
-
|
|
159
|
-
template<typename T, typename C, typename A>
|
|
160
|
-
void kll_quantile_calculator<T, C, A>::merge_sorted_blocks_reversed(Container& orig, Container& temp, const uint32_t* levels,
|
|
161
|
-
uint8_t starting_level, uint8_t num_levels) {
|
|
162
|
-
if (num_levels == 1) {
|
|
163
|
-
std::move(orig.begin() + levels[starting_level], orig.begin() + levels[starting_level + 1], std::back_inserter(temp));
|
|
164
|
-
return;
|
|
165
|
-
}
|
|
166
|
-
const uint8_t num_levels_1 = num_levels / 2;
|
|
167
|
-
const uint8_t num_levels_2 = num_levels - num_levels_1;
|
|
168
|
-
const uint8_t starting_level_1 = starting_level;
|
|
169
|
-
const uint8_t starting_level_2 = starting_level + num_levels_1;
|
|
170
|
-
merge_sorted_blocks_direct(orig, temp, levels, starting_level_1, num_levels_1);
|
|
171
|
-
merge_sorted_blocks_direct(orig, temp, levels, starting_level_2, num_levels_2);
|
|
172
|
-
std::merge(
|
|
173
|
-
std::make_move_iterator(orig.begin() + levels[starting_level_1]),
|
|
174
|
-
std::make_move_iterator(orig.begin() + levels[starting_level_1 + num_levels_1]),
|
|
175
|
-
std::make_move_iterator(orig.begin() + levels[starting_level_2]),
|
|
176
|
-
std::make_move_iterator(orig.begin() + levels[starting_level_2 + num_levels_2]),
|
|
177
|
-
std::back_inserter(temp),
|
|
178
|
-
compare_pair_by_first<C>()
|
|
179
|
-
);
|
|
180
|
-
}
|
|
181
|
-
|
|
182
|
-
} /* namespace datasketches */
|
|
183
|
-
|
|
184
|
-
#endif // KLL_QUANTILE_CALCULATOR_IMPL_HPP_
|
|
@@ -1,69 +0,0 @@
|
|
|
1
|
-
/*
|
|
2
|
-
* Licensed to the Apache Software Foundation (ASF) under one
|
|
3
|
-
* or more contributor license agreements. See the NOTICE file
|
|
4
|
-
* distributed with this work for additional information
|
|
5
|
-
* regarding copyright ownership. The ASF licenses this file
|
|
6
|
-
* to you under the Apache License, Version 2.0 (the
|
|
7
|
-
* "License"); you may not use this file except in compliance
|
|
8
|
-
* with the License. You may obtain a copy of the License at
|
|
9
|
-
*
|
|
10
|
-
* http://www.apache.org/licenses/LICENSE-2.0
|
|
11
|
-
*
|
|
12
|
-
* Unless required by applicable law or agreed to in writing,
|
|
13
|
-
* software distributed under the License is distributed on an
|
|
14
|
-
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
15
|
-
* KIND, either express or implied. See the License for the
|
|
16
|
-
* specific language governing permissions and limitations
|
|
17
|
-
* under the License.
|
|
18
|
-
*/
|
|
19
|
-
|
|
20
|
-
#ifndef REQ_QUANTILE_CALCULATOR_HPP_
|
|
21
|
-
#define REQ_QUANTILE_CALCULATOR_HPP_
|
|
22
|
-
|
|
23
|
-
#include <functional>
|
|
24
|
-
|
|
25
|
-
namespace datasketches {
|
|
26
|
-
|
|
27
|
-
template<
|
|
28
|
-
typename T,
|
|
29
|
-
typename Comparator,
|
|
30
|
-
typename Allocator
|
|
31
|
-
>
|
|
32
|
-
class req_quantile_calculator {
|
|
33
|
-
public:
|
|
34
|
-
req_quantile_calculator(uint64_t n, const Allocator& allocator);
|
|
35
|
-
|
|
36
|
-
void add(const T* begin, const T* end, uint8_t lg_weight);
|
|
37
|
-
|
|
38
|
-
template<bool inclusive>
|
|
39
|
-
void convert_to_cummulative();
|
|
40
|
-
|
|
41
|
-
const T* get_quantile(double rank) const;
|
|
42
|
-
|
|
43
|
-
private:
|
|
44
|
-
using Entry = std::pair<const T*, uint64_t>;
|
|
45
|
-
using AllocEntry = typename std::allocator_traits<Allocator>::template rebind_alloc<Entry>;
|
|
46
|
-
using Container = std::vector<Entry, AllocEntry>;
|
|
47
|
-
|
|
48
|
-
template<typename C>
|
|
49
|
-
struct compare_pairs_by_first_ptr {
|
|
50
|
-
bool operator()(const Entry& a, const Entry& b) {
|
|
51
|
-
return C()(*a.first, *b.first);
|
|
52
|
-
}
|
|
53
|
-
};
|
|
54
|
-
|
|
55
|
-
struct compare_pairs_by_second {
|
|
56
|
-
bool operator()(const Entry& a, const Entry& b) {
|
|
57
|
-
return a.second < b.second;
|
|
58
|
-
}
|
|
59
|
-
};
|
|
60
|
-
|
|
61
|
-
uint64_t n_;
|
|
62
|
-
Container entries_;
|
|
63
|
-
};
|
|
64
|
-
|
|
65
|
-
} /* namespace datasketches */
|
|
66
|
-
|
|
67
|
-
#include "req_quantile_calculator_impl.hpp"
|
|
68
|
-
|
|
69
|
-
#endif
|
|
@@ -1,60 +0,0 @@
|
|
|
1
|
-
/*
|
|
2
|
-
* Licensed to the Apache Software Foundation (ASF) under one
|
|
3
|
-
* or more contributor license agreements. See the NOTICE file
|
|
4
|
-
* distributed with this work for additional information
|
|
5
|
-
* regarding copyright ownership. The ASF licenses this file
|
|
6
|
-
* to you under the Apache License, Version 2.0 (the
|
|
7
|
-
* "License"); you may not use this file except in compliance
|
|
8
|
-
* with the License. You may obtain a copy of the License at
|
|
9
|
-
*
|
|
10
|
-
* http://www.apache.org/licenses/LICENSE-2.0
|
|
11
|
-
*
|
|
12
|
-
* Unless required by applicable law or agreed to in writing,
|
|
13
|
-
* software distributed under the License is distributed on an
|
|
14
|
-
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
15
|
-
* KIND, either express or implied. See the License for the
|
|
16
|
-
* specific language governing permissions and limitations
|
|
17
|
-
* under the License.
|
|
18
|
-
*/
|
|
19
|
-
|
|
20
|
-
#ifndef REQ_QUANTILE_CALCULATOR_IMPL_HPP_
|
|
21
|
-
#define REQ_QUANTILE_CALCULATOR_IMPL_HPP_
|
|
22
|
-
|
|
23
|
-
namespace datasketches {
|
|
24
|
-
|
|
25
|
-
template<typename T, typename C, typename A>
|
|
26
|
-
req_quantile_calculator<T, C, A>::req_quantile_calculator(uint64_t n, const A& allocator):
|
|
27
|
-
n_(n),
|
|
28
|
-
entries_(allocator)
|
|
29
|
-
{}
|
|
30
|
-
|
|
31
|
-
template<typename T, typename C, typename A>
|
|
32
|
-
void req_quantile_calculator<T, C, A>::add(const T* begin, const T* end, uint8_t lg_weight) {
|
|
33
|
-
if (entries_.capacity() < entries_.size() + std::distance(begin, end)) entries_.reserve(entries_.size() + std::distance(begin, end));
|
|
34
|
-
const size_t size_before = entries_.size();
|
|
35
|
-
for (auto it = begin; it != end; ++it) entries_.push_back(Entry(it, 1 << lg_weight));
|
|
36
|
-
if (size_before > 0) std::inplace_merge(entries_.begin(), entries_.begin() + size_before, entries_.end(), compare_pairs_by_first_ptr<C>());
|
|
37
|
-
}
|
|
38
|
-
|
|
39
|
-
template<typename T, typename C, typename A>
|
|
40
|
-
template<bool inclusive>
|
|
41
|
-
void req_quantile_calculator<T, C, A>::convert_to_cummulative() {
|
|
42
|
-
uint64_t subtotal = 0;
|
|
43
|
-
for (auto& entry: entries_) {
|
|
44
|
-
const uint64_t new_subtotal = subtotal + entry.second;
|
|
45
|
-
entry.second = inclusive ? new_subtotal : subtotal;
|
|
46
|
-
subtotal = new_subtotal;
|
|
47
|
-
}
|
|
48
|
-
}
|
|
49
|
-
|
|
50
|
-
template<typename T, typename C, typename A>
|
|
51
|
-
const T* req_quantile_calculator<T, C, A>::get_quantile(double rank) const {
|
|
52
|
-
uint64_t weight = static_cast<uint64_t>(rank * n_);
|
|
53
|
-
auto it = std::lower_bound(entries_.begin(), entries_.end(), Entry(nullptr, weight), compare_pairs_by_second());
|
|
54
|
-
if (it == entries_.end()) return entries_[entries_.size() - 1].first;
|
|
55
|
-
return it->first;
|
|
56
|
-
}
|
|
57
|
-
|
|
58
|
-
} /* namespace datasketches */
|
|
59
|
-
|
|
60
|
-
#endif
|
|
Binary file
|
|
Binary file
|