datasketches 0.2.7 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +5 -0
- data/ext/datasketches/kll_wrapper.cpp +20 -20
- data/ext/datasketches/theta_wrapper.cpp +2 -2
- data/lib/datasketches/version.rb +1 -1
- data/vendor/datasketches-cpp/CMakeLists.txt +9 -1
- data/vendor/datasketches-cpp/MANIFEST.in +21 -2
- data/vendor/datasketches-cpp/common/CMakeLists.txt +5 -2
- data/vendor/datasketches-cpp/common/include/common_defs.hpp +10 -0
- data/vendor/datasketches-cpp/common/include/kolmogorov_smirnov_impl.hpp +6 -6
- data/vendor/datasketches-cpp/common/include/memory_operations.hpp +1 -0
- data/vendor/datasketches-cpp/common/include/{quantile_sketch_sorted_view.hpp → quantiles_sorted_view.hpp} +60 -25
- data/vendor/datasketches-cpp/common/include/quantiles_sorted_view_impl.hpp +125 -0
- data/vendor/datasketches-cpp/common/include/version.hpp.in +36 -0
- data/vendor/datasketches-cpp/common/test/CMakeLists.txt +25 -6
- data/vendor/datasketches-cpp/common/test/quantiles_sorted_view_test.cpp +459 -0
- data/vendor/datasketches-cpp/cpc/test/CMakeLists.txt +1 -1
- data/vendor/datasketches-cpp/fi/include/frequent_items_sketch.hpp +28 -44
- data/vendor/datasketches-cpp/fi/include/frequent_items_sketch_impl.hpp +70 -78
- data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map.hpp +11 -4
- data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map_impl.hpp +16 -9
- data/vendor/datasketches-cpp/fi/test/CMakeLists.txt +1 -1
- data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_custom_type_test.cpp +54 -41
- data/vendor/datasketches-cpp/fi/test/reverse_purge_hash_map_test.cpp +3 -3
- data/vendor/datasketches-cpp/hll/include/Hll4Array-internal.hpp +2 -2
- data/vendor/datasketches-cpp/hll/test/CMakeLists.txt +1 -1
- data/vendor/datasketches-cpp/kll/include/kll_helper.hpp +0 -32
- data/vendor/datasketches-cpp/kll/include/kll_sketch.hpp +176 -233
- data/vendor/datasketches-cpp/kll/include/kll_sketch_impl.hpp +337 -395
- data/vendor/datasketches-cpp/kll/test/CMakeLists.txt +1 -1
- data/vendor/datasketches-cpp/kll/test/kll_sketch_custom_type_test.cpp +26 -26
- data/vendor/datasketches-cpp/kll/test/kll_sketch_test.cpp +196 -232
- data/vendor/datasketches-cpp/kll/test/kll_sketch_validation.cpp +41 -31
- data/vendor/datasketches-cpp/pyproject.toml +17 -12
- data/vendor/datasketches-cpp/python/CMakeLists.txt +8 -1
- data/vendor/datasketches-cpp/python/datasketches/PySerDe.py +104 -0
- data/vendor/datasketches-cpp/python/datasketches/__init__.py +22 -0
- data/vendor/datasketches-cpp/python/include/py_serde.hpp +113 -0
- data/vendor/datasketches-cpp/python/jupyter/ThetaSketchNotebook.ipynb +31 -24
- data/vendor/datasketches-cpp/python/pybind11Path.cmd +18 -0
- data/vendor/datasketches-cpp/python/src/__init__.py +17 -1
- data/vendor/datasketches-cpp/python/src/datasketches.cpp +9 -3
- data/vendor/datasketches-cpp/python/src/kll_wrapper.cpp +18 -54
- data/vendor/datasketches-cpp/python/src/py_serde.cpp +111 -0
- data/vendor/datasketches-cpp/python/src/quantiles_wrapper.cpp +17 -53
- data/vendor/datasketches-cpp/python/src/req_wrapper.cpp +17 -55
- data/vendor/datasketches-cpp/python/src/vector_of_kll.cpp +62 -67
- data/vendor/datasketches-cpp/python/src/vo_wrapper.cpp +47 -14
- data/vendor/datasketches-cpp/python/tests/__init__.py +16 -0
- data/vendor/datasketches-cpp/python/tests/req_test.py +1 -1
- data/vendor/datasketches-cpp/python/tests/vo_test.py +25 -1
- data/vendor/datasketches-cpp/quantiles/include/quantiles_sketch.hpp +135 -180
- data/vendor/datasketches-cpp/quantiles/include/quantiles_sketch_impl.hpp +205 -210
- data/vendor/datasketches-cpp/quantiles/test/CMakeLists.txt +1 -1
- data/vendor/datasketches-cpp/quantiles/test/quantiles_compatibility_test.cpp +19 -18
- data/vendor/datasketches-cpp/quantiles/test/quantiles_sketch_test.cpp +240 -232
- data/vendor/datasketches-cpp/req/include/req_compactor.hpp +15 -9
- data/vendor/datasketches-cpp/req/include/req_compactor_impl.hpp +35 -19
- data/vendor/datasketches-cpp/req/include/req_sketch.hpp +126 -147
- data/vendor/datasketches-cpp/req/include/req_sketch_impl.hpp +265 -245
- data/vendor/datasketches-cpp/req/test/CMakeLists.txt +1 -1
- data/vendor/datasketches-cpp/req/test/req_sketch_custom_type_test.cpp +26 -26
- data/vendor/datasketches-cpp/req/test/req_sketch_test.cpp +116 -103
- data/vendor/datasketches-cpp/sampling/include/var_opt_sketch.hpp +22 -46
- data/vendor/datasketches-cpp/sampling/include/var_opt_sketch_impl.hpp +180 -207
- data/vendor/datasketches-cpp/sampling/include/var_opt_union.hpp +18 -39
- data/vendor/datasketches-cpp/sampling/include/var_opt_union_impl.hpp +75 -85
- data/vendor/datasketches-cpp/sampling/test/CMakeLists.txt +1 -1
- data/vendor/datasketches-cpp/sampling/test/var_opt_allocation_test.cpp +6 -6
- data/vendor/datasketches-cpp/sampling/test/var_opt_sketch_test.cpp +2 -2
- data/vendor/datasketches-cpp/sampling/test/var_opt_union_test.cpp +4 -4
- data/vendor/datasketches-cpp/setup.py +14 -2
- data/vendor/datasketches-cpp/theta/include/theta_sketch_impl.hpp +15 -25
- data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base.hpp +0 -9
- data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base_impl.hpp +5 -5
- data/vendor/datasketches-cpp/theta/test/CMakeLists.txt +1 -1
- data/vendor/datasketches-cpp/theta/test/theta_sketch_test.cpp +2 -1
- data/vendor/datasketches-cpp/tox.ini +26 -0
- data/vendor/datasketches-cpp/tuple/include/tuple_sketch.hpp +36 -12
- data/vendor/datasketches-cpp/tuple/include/tuple_sketch_impl.hpp +16 -4
- data/vendor/datasketches-cpp/tuple/test/CMakeLists.txt +2 -1
- data/vendor/datasketches-cpp/tuple/test/engagement_test.cpp +299 -0
- data/vendor/datasketches-cpp/tuple/test/tuple_sketch_test.cpp +26 -0
- data/vendor/datasketches-cpp/version.cfg.in +1 -0
- metadata +14 -5
- data/vendor/datasketches-cpp/common/include/quantile_sketch_sorted_view_impl.hpp +0 -91
|
@@ -20,14 +20,12 @@
|
|
|
20
20
|
#ifndef KLL_SKETCH_HPP_
|
|
21
21
|
#define KLL_SKETCH_HPP_
|
|
22
22
|
|
|
23
|
-
#include <functional>
|
|
24
23
|
#include <memory>
|
|
25
24
|
#include <vector>
|
|
26
|
-
#include <cmath>
|
|
27
25
|
|
|
28
|
-
#include "quantile_sketch_sorted_view.hpp"
|
|
29
26
|
#include "common_defs.hpp"
|
|
30
27
|
#include "serde.hpp"
|
|
28
|
+
#include "quantiles_sorted_view.hpp"
|
|
31
29
|
|
|
32
30
|
namespace datasketches {
|
|
33
31
|
|
|
@@ -37,9 +35,9 @@ namespace datasketches {
|
|
|
37
35
|
* See <a href="https://arxiv.org/abs/1603.05346v2">Optimal Quantile Approximation in Streams</a>.
|
|
38
36
|
*
|
|
39
37
|
* <p>This is a stochastic streaming sketch that enables near real-time analysis of the
|
|
40
|
-
* approximate distribution of
|
|
41
|
-
* that the
|
|
42
|
-
* The analysis is obtained using <i>get_quantile()</i>
|
|
38
|
+
* approximate distribution of items from a very large stream in a single pass, requiring only
|
|
39
|
+
* that the items are comparable.
|
|
40
|
+
* The analysis is obtained using <i>get_quantile()</i> function or the
|
|
43
41
|
* inverse functions get_rank(), get_PMF() (Probability Mass Function), and get_CDF()
|
|
44
42
|
* (Cumulative Distribution Function).
|
|
45
43
|
*
|
|
@@ -47,14 +45,15 @@ namespace datasketches {
|
|
|
47
45
|
* with the equivalent Java implementation only when template parameter T = float
|
|
48
46
|
* (32-bit single precision values).
|
|
49
47
|
*
|
|
50
|
-
* <p>Given an input stream of <i>N</i>
|
|
51
|
-
*
|
|
52
|
-
* <i>N</i>
|
|
48
|
+
* <p>Given an input stream of <i>N</i> items, the <i>natural rank</i> of any specific
|
|
49
|
+
* item is defined as its index <i>(1 to N)</i> in inclusive mode
|
|
50
|
+
* or <i>(0 to N-1)</i> in exclusive mode
|
|
51
|
+
* in the hypothetical sorted stream of all <i>N</i> input items.
|
|
53
52
|
*
|
|
54
|
-
* <p>The <i>normalized rank</i> (<i>rank</i>) of any specific
|
|
55
|
-
* <i>
|
|
56
|
-
* Thus, the <i>normalized rank</i> is
|
|
57
|
-
* In the documentation for this sketch <i>
|
|
53
|
+
* <p>The <i>normalized rank</i> (<i>rank</i>) of any specific item is defined as its
|
|
54
|
+
* <i>natural rank</i> divided by <i>N</i>.
|
|
55
|
+
* Thus, the <i>normalized rank</i> is between zero and one.
|
|
56
|
+
* In the documentation for this sketch <i>natural rank</i> is never used so any
|
|
58
57
|
* reference to just <i>rank</i> should be interpreted to mean <i>normalized rank</i>.
|
|
59
58
|
*
|
|
60
59
|
* <p>This sketch is configured with a parameter <i>k</i>, which affects the size of the sketch
|
|
@@ -63,18 +62,18 @@ namespace datasketches {
|
|
|
63
62
|
* <p>The estimation error is commonly called <i>epsilon</i> (or <i>eps</i>) and is a fraction
|
|
64
63
|
* between zero and one. Larger values of <i>k</i> result in smaller values of epsilon.
|
|
65
64
|
* Epsilon is always with respect to the rank and cannot be applied to the
|
|
66
|
-
* corresponding
|
|
65
|
+
* corresponding items.
|
|
67
66
|
*
|
|
68
|
-
* <p>The relationship between the normalized rank and the corresponding
|
|
67
|
+
* <p>The relationship between the normalized rank and the corresponding items can be viewed
|
|
69
68
|
* as a two dimensional monotonic plot with the normalized rank on one axis and the
|
|
70
|
-
* corresponding
|
|
69
|
+
* corresponding items on the other axis. If the y-axis is specified as the item-axis and
|
|
71
70
|
* the x-axis as the normalized rank, then <i>y = get_quantile(x)</i> is a monotonically
|
|
72
71
|
* increasing function.
|
|
73
72
|
*
|
|
74
|
-
* <p>The
|
|
75
|
-
* corresponding
|
|
73
|
+
* <p>The function <i>get_quantile(rank)</i> translates ranks into
|
|
74
|
+
* corresponding quantiles. The functions <i>get_rank(item),
|
|
76
75
|
* get_CDF(...) (Cumulative Distribution Function), and get_PMF(...)
|
|
77
|
-
* (Probability Mass Function)</i> perform the opposite operation and translate
|
|
76
|
+
* (Probability Mass Function)</i> perform the opposite operation and translate items into ranks.
|
|
78
77
|
*
|
|
79
78
|
* <p>The <i>getPMF(...)</i> function has about 13 to 47% worse rank error (depending
|
|
80
79
|
* on <i>k</i>) than the other queries because the mass of each "bin" of the PMF has
|
|
@@ -86,60 +85,60 @@ namespace datasketches {
|
|
|
86
85
|
*
|
|
87
86
|
* <p>A <i>get_quantile(rank)</i> query has the following guarantees:
|
|
88
87
|
* <ul>
|
|
89
|
-
* <li>Let <i>
|
|
90
|
-
* <li>The
|
|
91
|
-
* <li>Let <i>trueRank</i> be the true rank of <i>
|
|
92
|
-
* stream of all <i>N</i>
|
|
88
|
+
* <li>Let <i>q = get_quantile(r)</i> where <i>r</i> is the rank between zero and one.</li>
|
|
89
|
+
* <li>The quantile <i>q</i> will be an item from the input stream.</li>
|
|
90
|
+
* <li>Let <i>trueRank</i> be the true rank of <i>q</i> derived from the hypothetical sorted
|
|
91
|
+
* stream of all <i>N</i> items.</li>
|
|
93
92
|
* <li>Let <i>eps = get_normalized_rank_error(false)</i>.</li>
|
|
94
93
|
* <li>Then <i>r - eps ≤ trueRank ≤ r + eps</i> with a confidence of 99%. Note that the
|
|
95
|
-
* error is on the rank, not the
|
|
94
|
+
* error is on the rank, not the quantile.</li>
|
|
96
95
|
* </ul>
|
|
97
96
|
*
|
|
98
|
-
* <p>A <i>get_rank(
|
|
97
|
+
* <p>A <i>get_rank(item)</i> query has the following guarantees:
|
|
99
98
|
* <ul>
|
|
100
|
-
* <li>Let <i>r = get_rank(
|
|
99
|
+
* <li>Let <i>r = get_rank(i)</i> where <i>i</i> is an item between the min and max items of
|
|
101
100
|
* the input stream.</li>
|
|
102
|
-
* <li>Let <i>true_rank</i> be the true rank of <i>
|
|
103
|
-
* stream of all <i>N</i>
|
|
101
|
+
* <li>Let <i>true_rank</i> be the true rank of <i>i</i> derived from the hypothetical sorted
|
|
102
|
+
* stream of all <i>N</i> items.</li>
|
|
104
103
|
* <li>Let <i>eps = get_normalized_rank_error(false)</i>.</li>
|
|
105
104
|
* <li>Then <i>r - eps ≤ trueRank ≤ r + eps</i> with a confidence of 99%.</li>
|
|
106
105
|
* </ul>
|
|
107
106
|
*
|
|
108
107
|
* <p>A <i>get_PMF()</i> query has the following guarantees:
|
|
109
108
|
* <ul>
|
|
110
|
-
* <li>Let <i>{r1, r2, ..., r(m+1)} = get_PMF(
|
|
111
|
-
* between the min and max
|
|
112
|
-
* <li>Let <i>mass<sub>i</sub> = estimated mass between
|
|
113
|
-
* <li>Let <i>trueMass</i> be the true mass between the
|
|
114
|
-
*
|
|
109
|
+
* <li>Let <i>{r1, r2, ..., r(m+1)} = get_PMF(s1, s2, ..., sm)</i> where <i>s1, s2</i> are
|
|
110
|
+
* split points (items from the input domain) between the min and max items of the input stream.
|
|
111
|
+
* <li>Let <i>mass<sub>i</sub> = estimated mass between s<sub>i</sub> and s<sub>i+1</sub></i>.</li>
|
|
112
|
+
* <li>Let <i>trueMass</i> be the true mass between the items of <i>s<sub>i</sub>,
|
|
113
|
+
* s<sub>i+1</sub></i> derived from the hypothetical sorted stream of all <i>N</i> items.</li>
|
|
115
114
|
* <li>Let <i>eps = get_normalized_rank_error(true)</i>.</li>
|
|
116
115
|
* <li>then <i>mass - eps ≤ trueMass ≤ mass + eps</i> with a confidence of 99%.</li>
|
|
117
|
-
* <li>r(m+1) includes the mass of all points larger than
|
|
116
|
+
* <li>r(m+1) includes the mass of all points larger than sm.</li>
|
|
118
117
|
* </ul>
|
|
119
118
|
*
|
|
120
119
|
* <p>A <i>get_CDF(...)</i> query has the following guarantees;
|
|
121
120
|
* <ul>
|
|
122
|
-
* <li>Let <i>{r1, r2, ..., r(m+1)} = get_CDF(
|
|
123
|
-
* between the min and max
|
|
121
|
+
* <li>Let <i>{r1, r2, ..., r(m+1)} = get_CDF(s1, s2, ..., sm)</i> where <i>s1, s2, ...</i> are
|
|
122
|
+
* split points (items from the input domain) between the min and max items of the input stream.
|
|
124
123
|
* <li>Let <i>mass<sub>i</sub> = r<sub>i+1</sub> - r<sub>i</sub></i>.</li>
|
|
125
|
-
* <li>Let <i>trueMass</i> be the true mass between the true ranks of <i>
|
|
126
|
-
*
|
|
124
|
+
* <li>Let <i>trueMass</i> be the true mass between the true ranks of <i>s<sub>i</sub>,
|
|
125
|
+
* s<sub>i+1</sub></i> derived from the hypothetical sorted stream of all <i>N</i> items.</li>
|
|
127
126
|
* <li>Let <i>eps = get_normalized_rank_error(true)</i>.</li>
|
|
128
127
|
* <li>then <i>mass - eps ≤ trueMass ≤ mass + eps</i> with a confidence of 99%.</li>
|
|
129
|
-
* <li>1 - r(m+1) includes the mass of all points larger than
|
|
128
|
+
* <li>1 - r(m+1) includes the mass of all points larger than sm.</li>
|
|
130
129
|
* </ul>
|
|
131
130
|
*
|
|
132
131
|
* <p>From the above, it might seem like we could make some estimates to bound the
|
|
133
|
-
* <em>
|
|
134
|
-
* let us derive error bounds or confidences around
|
|
132
|
+
* <em>item</em> returned from a call to <em>get_quantile()</em>. The sketch, however, does not
|
|
133
|
+
* let us derive error bounds or confidences around items. Because errors are independent, we
|
|
135
134
|
* can approximately bracket a value as shown below, but there are no error estimates available.
|
|
136
135
|
* Additionally, the interval may be quite large for certain distributions.
|
|
137
136
|
* <ul>
|
|
138
|
-
* <li>Let <i>
|
|
137
|
+
* <li>Let <i>q = get_quantile(r)</i>, the estimated quantile of rank <i>r</i>.</li>
|
|
139
138
|
* <li>Let <i>eps = get_normalized_rank_error(false)</i>.</li>
|
|
140
|
-
* <li>Let <i>
|
|
141
|
-
* <li>Let <i>
|
|
142
|
-
* <li>Then <i>
|
|
139
|
+
* <li>Let <i>q<sub>lo</sub></i> = estimated quantile of rank <i>(r - eps)</i>.</li>
|
|
140
|
+
* <li>Let <i>q<sub>hi</sub></i> = estimated quantile of rank <i>(r + eps)</i>.</li>
|
|
141
|
+
* <li>Then <i>q<sub>lo</sub> ≤ q ≤ q<sub>hi</sub></i>, with 99% confidence.</li>
|
|
143
142
|
* </ul>
|
|
144
143
|
*
|
|
145
144
|
* author Kevin Lang
|
|
@@ -147,13 +146,6 @@ namespace datasketches {
|
|
|
147
146
|
* author Lee Rhodes
|
|
148
147
|
*/
|
|
149
148
|
|
|
150
|
-
template<typename A> using AllocU8 = typename std::allocator_traits<A>::template rebind_alloc<uint8_t>;
|
|
151
|
-
template<typename A> using vector_u8 = std::vector<uint8_t, AllocU8<A>>;
|
|
152
|
-
template<typename A> using AllocU32 = typename std::allocator_traits<A>::template rebind_alloc<uint32_t>;
|
|
153
|
-
template<typename A> using vector_u32 = std::vector<uint32_t, AllocU32<A>>;
|
|
154
|
-
template<typename A> using AllocD = typename std::allocator_traits<A>::template rebind_alloc<double>;
|
|
155
|
-
template<typename A> using vector_d = std::vector<double, AllocD<A>>;
|
|
156
|
-
|
|
157
149
|
namespace kll_constants {
|
|
158
150
|
const uint16_t DEFAULT_K = 200;
|
|
159
151
|
}
|
|
@@ -161,21 +153,19 @@ namespace kll_constants {
|
|
|
161
153
|
template <
|
|
162
154
|
typename T,
|
|
163
155
|
typename C = std::less<T>, // strict weak ordering function (see C++ named requirements: Compare)
|
|
164
|
-
typename S = serde<T>, // deprecated, to be removed in the next major version
|
|
165
156
|
typename A = std::allocator<T>
|
|
166
157
|
>
|
|
167
158
|
class kll_sketch {
|
|
168
159
|
public:
|
|
169
160
|
using value_type = T;
|
|
170
161
|
using comparator = C;
|
|
162
|
+
using vector_u32 = std::vector<uint32_t, typename std::allocator_traits<A>::template rebind_alloc<uint32_t>>;
|
|
171
163
|
|
|
172
164
|
static const uint8_t DEFAULT_M = 8;
|
|
173
|
-
// TODO: Redundant and deprecated. Will be removed in next major version.
|
|
174
|
-
static const uint16_t DEFAULT_K = kll_constants::DEFAULT_K;
|
|
175
165
|
static const uint16_t MIN_K = DEFAULT_M;
|
|
176
166
|
static const uint16_t MAX_K = (1 << 16) - 1;
|
|
177
167
|
|
|
178
|
-
explicit kll_sketch(uint16_t k = kll_constants::DEFAULT_K, const A& allocator = A());
|
|
168
|
+
explicit kll_sketch(uint16_t k = kll_constants::DEFAULT_K, const C& comparator = C(), const A& allocator = A());
|
|
179
169
|
kll_sketch(const kll_sketch& other);
|
|
180
170
|
kll_sketch(kll_sketch&& other) noexcept;
|
|
181
171
|
~kll_sketch();
|
|
@@ -185,17 +175,18 @@ class kll_sketch {
|
|
|
185
175
|
/*
|
|
186
176
|
* Type converting constructor.
|
|
187
177
|
* @param other sketch of a different type
|
|
178
|
+
* @param comparator instance of a Comparator
|
|
188
179
|
* @param allocator instance of an Allocator
|
|
189
180
|
*/
|
|
190
|
-
template<typename TT, typename CC, typename
|
|
191
|
-
explicit kll_sketch(const kll_sketch<TT, CC,
|
|
181
|
+
template<typename TT, typename CC, typename AA>
|
|
182
|
+
explicit kll_sketch(const kll_sketch<TT, CC, AA>& other, const C& comparator = C(), const A& allocator = A());
|
|
192
183
|
|
|
193
184
|
/**
|
|
194
185
|
* Updates this sketch with the given data item.
|
|
195
|
-
* @param
|
|
186
|
+
* @param item from a stream of items
|
|
196
187
|
*/
|
|
197
188
|
template<typename FwdT>
|
|
198
|
-
void update(FwdT&&
|
|
189
|
+
void update(FwdT&& item);
|
|
199
190
|
|
|
200
191
|
/**
|
|
201
192
|
* Merges another sketch into this one.
|
|
@@ -235,20 +226,18 @@ class kll_sketch {
|
|
|
235
226
|
bool is_estimation_mode() const;
|
|
236
227
|
|
|
237
228
|
/**
|
|
238
|
-
* Returns the min
|
|
239
|
-
*
|
|
240
|
-
*
|
|
241
|
-
* @return the min value of the stream
|
|
229
|
+
* Returns the min item of the stream.
|
|
230
|
+
* If the sketch is empty this throws std::runtime_error.
|
|
231
|
+
* @return the min item of the stream
|
|
242
232
|
*/
|
|
243
|
-
T
|
|
233
|
+
T get_min_item() const;
|
|
244
234
|
|
|
245
235
|
/**
|
|
246
|
-
* Returns the max
|
|
247
|
-
*
|
|
248
|
-
*
|
|
249
|
-
* @return the max value of the stream
|
|
236
|
+
* Returns the max item of the stream.
|
|
237
|
+
* If the sketch is empty this throws std::runtime_error.
|
|
238
|
+
* @return the max item of the stream
|
|
250
239
|
*/
|
|
251
|
-
T
|
|
240
|
+
T get_max_item() const;
|
|
252
241
|
|
|
253
242
|
/**
|
|
254
243
|
* Returns an instance of the comparator for this sketch.
|
|
@@ -257,134 +246,128 @@ class kll_sketch {
|
|
|
257
246
|
C get_comparator() const;
|
|
258
247
|
|
|
259
248
|
/**
|
|
260
|
-
* Returns an
|
|
261
|
-
*
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
*
|
|
267
|
-
*
|
|
268
|
-
*
|
|
269
|
-
*
|
|
249
|
+
* Returns an instance of the allocator for this sketch.
|
|
250
|
+
* @return allocator
|
|
251
|
+
*/
|
|
252
|
+
A get_allocator() const;
|
|
253
|
+
|
|
254
|
+
/**
|
|
255
|
+
* Returns an item from the sketch that is the best approximation to an item
|
|
256
|
+
* from the original stream with the given rank.
|
|
257
|
+
*
|
|
258
|
+
* <p>If the sketch is empty this throws std::runtime_error.
|
|
270
259
|
*
|
|
271
|
-
* @param
|
|
272
|
-
*
|
|
273
|
-
* If fraction = 0.0, the true minimum value of the stream is returned.
|
|
274
|
-
* If fraction = 1.0, the true maximum value of the stream is returned.
|
|
275
|
-
* If the parameter inclusive=true, the given rank is considered inclusive (includes the weight of an item)
|
|
260
|
+
* @param rank of an item in the hypothetical sorted stream.
|
|
261
|
+
* @param inclusive if true, the given rank is considered inclusive (includes weight of an item)
|
|
276
262
|
*
|
|
277
|
-
* @return
|
|
263
|
+
* @return approximate quantile associated with the given rank
|
|
278
264
|
*/
|
|
279
|
-
using quantile_return_type = typename
|
|
280
|
-
|
|
281
|
-
quantile_return_type get_quantile(double fraction) const;
|
|
265
|
+
using quantile_return_type = typename quantiles_sorted_view<T, C, A>::quantile_return_type;
|
|
266
|
+
quantile_return_type get_quantile(double rank, bool inclusive = true) const;
|
|
282
267
|
|
|
283
268
|
/**
|
|
284
|
-
* This is a more efficient multiple-query version of get_quantile().
|
|
285
|
-
* <p>
|
|
286
269
|
* This returns an array that could have been generated by using get_quantile() for each
|
|
287
|
-
*
|
|
288
|
-
*
|
|
289
|
-
*
|
|
290
|
-
* to get_quantile().
|
|
270
|
+
* rank separately.
|
|
271
|
+
*
|
|
272
|
+
* <p>If the sketch is empty this throws std::runtime_error.
|
|
291
273
|
*
|
|
292
|
-
*
|
|
274
|
+
* @param ranks given array of ranks in the hypothetical sorted stream.
|
|
275
|
+
* These ranks must be in the interval [0.0, 1.0].
|
|
276
|
+
* @param size the number of ranks in the array
|
|
277
|
+
* @param inclusive if true, the given ranks are considered inclusive (include weights of items)
|
|
293
278
|
*
|
|
294
|
-
* @
|
|
295
|
-
* These are also called normalized ranks or fractional ranks.
|
|
296
|
-
* These fractions must be in the interval [0.0, 1.0], inclusive.
|
|
297
|
-
* If the parameter inclusive=true, the given fractions are considered inclusive (include weights of items)
|
|
279
|
+
* @return array of approximate quantiles corresponding to the given ranks in the same order.
|
|
298
280
|
*
|
|
299
|
-
*
|
|
300
|
-
* in the input array.
|
|
281
|
+
* Deprecated. Will be removed in the next major version. Use get_quantile() instead.
|
|
301
282
|
*/
|
|
302
|
-
|
|
303
|
-
std::vector<T, A> get_quantiles(const double* fractions, uint32_t size) const;
|
|
283
|
+
std::vector<T, A> get_quantiles(const double* ranks, uint32_t size, bool inclusive = true) const;
|
|
304
284
|
|
|
305
285
|
/**
|
|
306
286
|
* This is a multiple-query version of get_quantile() that allows the caller to
|
|
307
|
-
* specify the number of evenly-spaced
|
|
287
|
+
* specify the number of evenly-spaced ranks.
|
|
308
288
|
*
|
|
309
|
-
* <p>If the sketch is empty this
|
|
289
|
+
* <p>If the sketch is empty this throws std::runtime_error.
|
|
310
290
|
*
|
|
311
|
-
* @param num an integer that specifies the number of evenly-spaced
|
|
312
|
-
* This must be an integer greater than 0. A value of 1 will return the
|
|
313
|
-
* A value of 2 will return
|
|
314
|
-
*
|
|
291
|
+
* @param num an integer that specifies the number of evenly-spaced ranks.
|
|
292
|
+
* This must be an integer greater than 0. A value of 1 will return the quantile of rank 0.
|
|
293
|
+
* A value of 2 will return quantiles of ranks 0 and 1. A value of 3 will return quantiles of ranks 0,
|
|
294
|
+
* 0.5 (median) and 1, etc.
|
|
295
|
+
* @param inclusive if true, the ranks are considered inclusive (include weights of items)
|
|
315
296
|
*
|
|
316
|
-
* @return array of
|
|
297
|
+
* @return array of approximate quantiles corresponding to the given number of evenly-spaced ranks.
|
|
298
|
+
*
|
|
299
|
+
* Deprecated. Will be removed in the next major version. Use get_quantile() instead.
|
|
317
300
|
*/
|
|
318
|
-
|
|
319
|
-
std::vector<T, A> get_quantiles(uint32_t num) const;
|
|
301
|
+
std::vector<T, A> get_quantiles(uint32_t num, bool inclusive = true) const;
|
|
320
302
|
|
|
321
303
|
/**
|
|
322
|
-
* Returns an approximation to the normalized
|
|
323
|
-
* inclusive.
|
|
324
|
-
* With the template parameter inclusive=true the weight of the given value is included into the rank.
|
|
325
|
-
* Otherwise the rank equals the sum of the weights of all values that are less than the given value
|
|
326
|
-
* according to the comparator C.
|
|
304
|
+
* Returns an approximation to the normalized rank of the given item from 0 to 1, inclusive.
|
|
327
305
|
*
|
|
328
306
|
* <p>The resulting approximation has a probabilistic guarantee that can be obtained from the
|
|
329
307
|
* get_normalized_rank_error(false) function.
|
|
330
308
|
*
|
|
331
|
-
* <p>If the sketch is empty this
|
|
309
|
+
* <p>If the sketch is empty this throws std::runtime_error.
|
|
310
|
+
*
|
|
311
|
+
* @param item to be ranked.
|
|
312
|
+
* @param inclusive if true the weight of the given item is included into the rank.
|
|
313
|
+
* Otherwise the rank equals the sum of the weights of all items that are less than the given item
|
|
314
|
+
* according to the comparator C.
|
|
332
315
|
*
|
|
333
|
-
* @
|
|
334
|
-
* @return an approximate rank of the given value
|
|
316
|
+
* @return an approximate rank of the given item
|
|
335
317
|
*/
|
|
336
|
-
|
|
337
|
-
double get_rank(const T& value) const;
|
|
318
|
+
double get_rank(const T& item, bool inclusive = true) const;
|
|
338
319
|
|
|
339
320
|
/**
|
|
340
321
|
* Returns an approximation to the Probability Mass Function (PMF) of the input stream
|
|
341
|
-
* given a set of split points (
|
|
322
|
+
* given a set of split points (items).
|
|
342
323
|
*
|
|
343
324
|
* <p>The resulting approximations have a probabilistic guarantee that can be obtained from the
|
|
344
325
|
* get_normalized_rank_error(true) function.
|
|
345
326
|
*
|
|
346
|
-
* <p>If the sketch is empty this
|
|
327
|
+
* <p>If the sketch is empty this throws std::runtime_error.
|
|
347
328
|
*
|
|
348
|
-
* @param split_points an array of <i>m</i> unique, monotonically increasing
|
|
349
|
-
* that divide the input domain into <i>m+1</i> consecutive disjoint intervals.
|
|
350
|
-
*
|
|
351
|
-
*
|
|
352
|
-
*
|
|
353
|
-
*
|
|
329
|
+
* @param split_points an array of <i>m</i> unique, monotonically increasing items
|
|
330
|
+
* that divide the input domain into <i>m+1</i> consecutive disjoint intervals (bins).
|
|
331
|
+
*
|
|
332
|
+
* @param size the number of split points in the array
|
|
333
|
+
*
|
|
334
|
+
* @param inclusive if true the rank of an item includes its own weight, and therefore
|
|
335
|
+
* if the sketch contains items equal to a slit point, then in PMF such items are
|
|
336
|
+
* included into the interval to the left of split point. Otherwise they are included into the interval
|
|
337
|
+
* to the right of split point.
|
|
354
338
|
*
|
|
355
339
|
* @return an array of m+1 doubles each of which is an approximation
|
|
356
|
-
* to the fraction of the input stream
|
|
357
|
-
* If the template parameter inclusive=false, the definition of an "interval" is inclusive of the left split point and exclusive of the right
|
|
358
|
-
* split point, with the exception that the last interval will include the maximum value.
|
|
359
|
-
* If the template parameter inclusive=true, the definition of an "interval" is exclusive of the left split point and inclusive of the right
|
|
360
|
-
* split point.
|
|
340
|
+
* to the fraction of the input stream items (the mass) that fall into one of those intervals.
|
|
361
341
|
*/
|
|
362
|
-
|
|
363
|
-
|
|
342
|
+
using vector_double = typename quantiles_sorted_view<T, C, A>::vector_double;
|
|
343
|
+
vector_double get_PMF(const T* split_points, uint32_t size, bool inclusive = true) const;
|
|
364
344
|
|
|
365
345
|
/**
|
|
366
346
|
* Returns an approximation to the Cumulative Distribution Function (CDF), which is the
|
|
367
|
-
* cumulative analog of the PMF, of the input stream given a set of split points (
|
|
347
|
+
* cumulative analog of the PMF, of the input stream given a set of split points (items).
|
|
368
348
|
*
|
|
369
349
|
* <p>The resulting approximations have a probabilistic guarantee that can be obtained from the
|
|
370
350
|
* get_normalized_rank_error(false) function.
|
|
371
351
|
*
|
|
372
|
-
* <p>If the sketch is empty this
|
|
352
|
+
* <p>If the sketch is empty this throws std::runtime_error.
|
|
373
353
|
*
|
|
374
|
-
* @param split_points an array of <i>m</i> unique, monotonically increasing
|
|
354
|
+
* @param split_points an array of <i>m</i> unique, monotonically increasing items
|
|
375
355
|
* that divide the input domain into <i>m+1</i> consecutive disjoint intervals.
|
|
376
|
-
* The definition of an "interval" is inclusive of the left split point (or minimum value) and
|
|
377
|
-
* exclusive of the right split point, with the exception that the last interval will include
|
|
378
|
-
* the maximum value.
|
|
379
|
-
* It is not necessary to include either the min or max values in these split points.
|
|
380
356
|
*
|
|
381
|
-
* @
|
|
357
|
+
* @param size the number of split points in the array
|
|
358
|
+
*
|
|
359
|
+
* @param inclusive if true the rank of an item includes its own weight, and therefore
|
|
360
|
+
* if the sketch contains items equal to a slit point, then in CDF such items are
|
|
361
|
+
* included into the interval to the left of split point. Otherwise they are included into
|
|
362
|
+
* the interval to the right of split point.
|
|
363
|
+
*
|
|
364
|
+
* @return an array of m+1 doubles, which are a consecutive approximation to the CDF
|
|
382
365
|
* of the input stream given the split_points. The value at array position j of the returned
|
|
383
366
|
* CDF array is the sum of the returned values in positions 0 through j of the returned PMF
|
|
384
|
-
* array.
|
|
367
|
+
* array. This can be viewed as array of ranks of the given split points plus one more value
|
|
368
|
+
* that is always 1.
|
|
385
369
|
*/
|
|
386
|
-
|
|
387
|
-
vector_d<A> get_CDF(const T* split_points, uint32_t size) const;
|
|
370
|
+
vector_double get_CDF(const T* split_points, uint32_t size, bool inclusive = true) const;
|
|
388
371
|
|
|
389
372
|
/**
|
|
390
373
|
* Gets the approximate rank error of this sketch normalized as a fraction between zero and one.
|
|
@@ -398,19 +381,19 @@ class kll_sketch {
|
|
|
398
381
|
/**
|
|
399
382
|
* Computes size needed to serialize the current state of the sketch.
|
|
400
383
|
* This version is for fixed-size arithmetic types (integral and floating point).
|
|
401
|
-
* @param
|
|
384
|
+
* @param sd instance of a SerDe
|
|
402
385
|
* @return size in bytes needed to serialize this sketch
|
|
403
386
|
*/
|
|
404
|
-
template<typename TT = T, typename SerDe =
|
|
387
|
+
template<typename TT = T, typename SerDe = serde<T>, typename std::enable_if<std::is_arithmetic<TT>::value, int>::type = 0>
|
|
405
388
|
size_t get_serialized_size_bytes(const SerDe& sd = SerDe()) const;
|
|
406
389
|
|
|
407
390
|
/**
|
|
408
391
|
* Computes size needed to serialize the current state of the sketch.
|
|
409
392
|
* This version is for all other types and can be expensive since every item needs to be looked at.
|
|
410
|
-
* @param
|
|
393
|
+
* @param sd instance of a SerDe
|
|
411
394
|
* @return size in bytes needed to serialize this sketch
|
|
412
395
|
*/
|
|
413
|
-
template<typename TT = T, typename SerDe =
|
|
396
|
+
template<typename TT = T, typename SerDe = serde<T>, typename std::enable_if<!std::is_arithmetic<TT>::value, int>::type = 0>
|
|
414
397
|
size_t get_serialized_size_bytes(const SerDe& sd = SerDe()) const;
|
|
415
398
|
|
|
416
399
|
/**
|
|
@@ -443,14 +426,14 @@ class kll_sketch {
|
|
|
443
426
|
/**
|
|
444
427
|
* This method serializes the sketch into a given stream in a binary form
|
|
445
428
|
* @param os output stream
|
|
446
|
-
* @param instance of a SerDe
|
|
429
|
+
* @param sd instance of a SerDe
|
|
447
430
|
*/
|
|
448
|
-
template<typename SerDe =
|
|
431
|
+
template<typename SerDe = serde<T>>
|
|
449
432
|
void serialize(std::ostream& os, const SerDe& sd = SerDe()) const;
|
|
450
433
|
|
|
451
434
|
// This is a convenience alias for users
|
|
452
435
|
// The type returned by the following serialize method
|
|
453
|
-
using vector_bytes =
|
|
436
|
+
using vector_bytes = std::vector<uint8_t, typename std::allocator_traits<A>::template rebind_alloc<uint8_t>>;
|
|
454
437
|
|
|
455
438
|
/**
|
|
456
439
|
* This method serializes the sketch as a vector of bytes.
|
|
@@ -458,53 +441,36 @@ class kll_sketch {
|
|
|
458
441
|
* It is a blank space of a given size.
|
|
459
442
|
* This header is used in Datasketches PostgreSQL extension.
|
|
460
443
|
* @param header_size_bytes space to reserve in front of the sketch
|
|
461
|
-
* @param instance of a SerDe
|
|
444
|
+
* @param sd instance of a SerDe
|
|
462
445
|
* @return serialized sketch as a vector of bytes
|
|
463
446
|
*/
|
|
464
|
-
template<typename SerDe =
|
|
447
|
+
template<typename SerDe = serde<T>>
|
|
465
448
|
vector_bytes serialize(unsigned header_size_bytes = 0, const SerDe& sd = SerDe()) const;
|
|
466
449
|
|
|
467
450
|
/**
|
|
468
451
|
* This method deserializes a sketch from a given stream.
|
|
469
452
|
* @param is input stream
|
|
470
|
-
* @param
|
|
471
|
-
* @
|
|
472
|
-
*
|
|
473
|
-
* Deprecated, to be removed in the next major version
|
|
474
|
-
*/
|
|
475
|
-
static kll_sketch deserialize(std::istream& is, const A& allocator = A());
|
|
476
|
-
|
|
477
|
-
/**
|
|
478
|
-
* This method deserializes a sketch from a given stream.
|
|
479
|
-
* @param is input stream
|
|
480
|
-
* @param serde instance of a SerDe
|
|
453
|
+
* @param sd instance of a SerDe
|
|
454
|
+
* @param comparator instance of a Comparator
|
|
481
455
|
* @param allocator instance of an Allocator
|
|
482
456
|
* @return an instance of a sketch
|
|
483
457
|
*/
|
|
484
|
-
template<typename SerDe =
|
|
485
|
-
static kll_sketch deserialize(std::istream& is, const SerDe& sd = SerDe(),
|
|
458
|
+
template<typename SerDe = serde<T>>
|
|
459
|
+
static kll_sketch deserialize(std::istream& is, const SerDe& sd = SerDe(),
|
|
460
|
+
const C& comparator = C(), const A& allocator = A());
|
|
486
461
|
|
|
487
462
|
/**
|
|
488
463
|
* This method deserializes a sketch from a given array of bytes.
|
|
489
464
|
* @param bytes pointer to the array of bytes
|
|
490
465
|
* @param size the size of the array
|
|
466
|
+
* @param sd instance of a SerDe
|
|
467
|
+
* @param comparator instance of a Comparator
|
|
491
468
|
* @param allocator instance of an Allocator
|
|
492
469
|
* @return an instance of a sketch
|
|
493
|
-
*
|
|
494
|
-
* Deprecated, to be removed in the next major version
|
|
495
470
|
*/
|
|
496
|
-
|
|
497
|
-
|
|
498
|
-
|
|
499
|
-
* This method deserializes a sketch from a given array of bytes.
|
|
500
|
-
* @param bytes pointer to the array of bytes
|
|
501
|
-
* @param size the size of the array
|
|
502
|
-
* @param serde instance of a SerDe
|
|
503
|
-
* @param allocator instance of an Allocator
|
|
504
|
-
* @return an instance of a sketch
|
|
505
|
-
*/
|
|
506
|
-
template<typename SerDe = S>
|
|
507
|
-
static kll_sketch deserialize(const void* bytes, size_t size, const SerDe& sd = SerDe(), const A& allocator = A());
|
|
471
|
+
template<typename SerDe = serde<T>>
|
|
472
|
+
static kll_sketch deserialize(const void* bytes, size_t size, const SerDe& sd = SerDe(),
|
|
473
|
+
const C& comparator = C(), const A& allocator = A());
|
|
508
474
|
|
|
509
475
|
/*
|
|
510
476
|
* Gets the normalized rank error given k and pmf.
|
|
@@ -526,14 +492,7 @@ class kll_sketch {
|
|
|
526
492
|
const_iterator begin() const;
|
|
527
493
|
const_iterator end() const;
|
|
528
494
|
|
|
529
|
-
|
|
530
|
-
quantile_sketch_sorted_view<T, C, A> get_sorted_view(bool cumulative) const;
|
|
531
|
-
|
|
532
|
-
#ifdef KLL_VALIDATION
|
|
533
|
-
uint8_t get_num_levels() { return num_levels_; }
|
|
534
|
-
uint32_t* get_levels() { return levels_; }
|
|
535
|
-
T* get_items() { return items_; }
|
|
536
|
-
#endif
|
|
495
|
+
quantiles_sorted_view<T, C, A> get_sorted_view() const;
|
|
537
496
|
|
|
538
497
|
private:
|
|
539
498
|
/* Serialized sketch layout:
|
|
@@ -559,28 +518,30 @@ class kll_sketch {
|
|
|
559
518
|
static const uint8_t PREAMBLE_INTS_SHORT = 2; // for empty and single item
|
|
560
519
|
static const uint8_t PREAMBLE_INTS_FULL = 5;
|
|
561
520
|
|
|
521
|
+
C comparator_;
|
|
562
522
|
A allocator_;
|
|
563
523
|
uint16_t k_;
|
|
564
524
|
uint8_t m_; // minimum buffer "width"
|
|
565
525
|
uint16_t min_k_; // for error estimation after merging with different k
|
|
566
|
-
uint64_t n_;
|
|
567
526
|
uint8_t num_levels_;
|
|
568
|
-
|
|
527
|
+
bool is_level_zero_sorted_;
|
|
528
|
+
uint64_t n_;
|
|
529
|
+
vector_u32 levels_;
|
|
569
530
|
T* items_;
|
|
570
531
|
uint32_t items_size_;
|
|
571
|
-
T*
|
|
572
|
-
T*
|
|
573
|
-
|
|
532
|
+
T* min_item_;
|
|
533
|
+
T* max_item_;
|
|
534
|
+
mutable quantiles_sorted_view<T, C, A>* sorted_view_;
|
|
574
535
|
|
|
575
536
|
// for deserialization
|
|
576
537
|
class item_deleter;
|
|
577
538
|
class items_deleter;
|
|
578
|
-
kll_sketch(uint16_t k, uint16_t min_k, uint64_t n, uint8_t num_levels, vector_u32
|
|
579
|
-
std::unique_ptr<T, items_deleter> items, uint32_t items_size, std::unique_ptr<T, item_deleter>
|
|
580
|
-
std::unique_ptr<T, item_deleter>
|
|
539
|
+
kll_sketch(uint16_t k, uint16_t min_k, uint64_t n, uint8_t num_levels, vector_u32&& levels,
|
|
540
|
+
std::unique_ptr<T, items_deleter> items, uint32_t items_size, std::unique_ptr<T, item_deleter> min_item,
|
|
541
|
+
std::unique_ptr<T, item_deleter> max_item, bool is_level_zero_sorted, const C& comparator);
|
|
581
542
|
|
|
582
543
|
// common update code
|
|
583
|
-
inline void update_min_max(const T&
|
|
544
|
+
inline void update_min_max(const T& item);
|
|
584
545
|
inline uint32_t internal_update();
|
|
585
546
|
|
|
586
547
|
// The following code is only valid in the special case of exactly reaching capacity while updating.
|
|
@@ -591,15 +552,6 @@ class kll_sketch {
|
|
|
591
552
|
void add_empty_top_level_to_completely_full_sketch();
|
|
592
553
|
void sort_level_zero();
|
|
593
554
|
|
|
594
|
-
template<bool inclusive>
|
|
595
|
-
vector_d<A> get_PMF_or_CDF(const T* split_points, uint32_t size, bool is_CDF) const;
|
|
596
|
-
template<bool inclusive>
|
|
597
|
-
void increment_buckets_unsorted_level(uint32_t from_index, uint32_t to_index, uint64_t weight,
|
|
598
|
-
const T* split_points, uint32_t size, double* buckets) const;
|
|
599
|
-
template<bool inclusive>
|
|
600
|
-
void increment_buckets_sorted_level(uint32_t from_index, uint32_t to_index, uint64_t weight,
|
|
601
|
-
const T* split_points, uint32_t size, double* buckets) const;
|
|
602
|
-
|
|
603
555
|
template<typename O> void merge_higher_levels(O&& other, uint64_t final_n);
|
|
604
556
|
|
|
605
557
|
template<typename FwdSk>
|
|
@@ -616,43 +568,34 @@ class kll_sketch {
|
|
|
616
568
|
|
|
617
569
|
void check_sorting() const;
|
|
618
570
|
|
|
619
|
-
// implementations for floating point types
|
|
620
571
|
template<typename TT = T, typename std::enable_if<std::is_floating_point<TT>::value, int>::type = 0>
|
|
621
|
-
static
|
|
622
|
-
|
|
623
|
-
return value;
|
|
572
|
+
static inline bool check_update_item(TT item) {
|
|
573
|
+
return !std::isnan(item);
|
|
624
574
|
}
|
|
625
575
|
|
|
626
|
-
template<typename TT = T, typename std::enable_if<std::is_floating_point<TT>::value, int>::type = 0>
|
|
627
|
-
static inline bool check_update_value(TT value) {
|
|
628
|
-
return !std::isnan(value);
|
|
629
|
-
}
|
|
630
|
-
|
|
631
|
-
// implementations for all other types
|
|
632
576
|
template<typename TT = T, typename std::enable_if<!std::is_floating_point<TT>::value, int>::type = 0>
|
|
633
|
-
static
|
|
634
|
-
throw std::runtime_error("getting quantiles from empty sketch is not supported for this type of value");
|
|
635
|
-
}
|
|
636
|
-
|
|
637
|
-
template<typename TT = T, typename std::enable_if<!std::is_floating_point<TT>::value, int>::type = 0>
|
|
638
|
-
static inline bool check_update_value(TT) {
|
|
577
|
+
static inline bool check_update_item(TT) {
|
|
639
578
|
return true;
|
|
640
579
|
}
|
|
641
580
|
|
|
642
581
|
// for type converting constructor
|
|
643
|
-
template<typename TT, typename CC, typename
|
|
644
|
-
|
|
582
|
+
template<typename TT, typename CC, typename AA> friend class kll_sketch;
|
|
583
|
+
|
|
584
|
+
void setup_sorted_view() const; // modifies mutable state
|
|
585
|
+
void reset_sorted_view();
|
|
645
586
|
};
|
|
646
587
|
|
|
647
|
-
template<typename T, typename C, typename
|
|
648
|
-
class kll_sketch<T, C,
|
|
588
|
+
template<typename T, typename C, typename A>
|
|
589
|
+
class kll_sketch<T, C, A>::const_iterator: public std::iterator<std::input_iterator_tag, T> {
|
|
649
590
|
public:
|
|
650
|
-
|
|
591
|
+
using value_type = std::pair<const T&, const uint64_t>;
|
|
592
|
+
friend class kll_sketch<T, C, A>;
|
|
651
593
|
const_iterator& operator++();
|
|
652
594
|
const_iterator& operator++(int);
|
|
653
595
|
bool operator==(const const_iterator& other) const;
|
|
654
596
|
bool operator!=(const const_iterator& other) const;
|
|
655
|
-
const
|
|
597
|
+
const value_type operator*() const;
|
|
598
|
+
const return_value_holder<value_type> operator->() const;
|
|
656
599
|
private:
|
|
657
600
|
const T* items;
|
|
658
601
|
const uint32_t* levels;
|