datasketches 0.3.2 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +6 -0
- data/NOTICE +1 -1
- data/README.md +0 -2
- data/ext/datasketches/cpc_wrapper.cpp +2 -2
- data/ext/datasketches/kll_wrapper.cpp +0 -10
- data/lib/datasketches/version.rb +1 -1
- data/lib/datasketches.rb +1 -1
- data/vendor/datasketches-cpp/CMakeLists.txt +1 -0
- data/vendor/datasketches-cpp/CODE_OF_CONDUCT.md +3 -0
- data/vendor/datasketches-cpp/CONTRIBUTING.md +50 -0
- data/vendor/datasketches-cpp/Doxyfile +2827 -0
- data/vendor/datasketches-cpp/LICENSE +0 -76
- data/vendor/datasketches-cpp/NOTICE +1 -1
- data/vendor/datasketches-cpp/README.md +1 -3
- data/vendor/datasketches-cpp/common/CMakeLists.txt +12 -11
- data/vendor/datasketches-cpp/common/include/common_defs.hpp +11 -8
- data/vendor/datasketches-cpp/common/include/count_zeros.hpp +0 -2
- data/vendor/datasketches-cpp/common/include/kolmogorov_smirnov.hpp +9 -6
- data/vendor/datasketches-cpp/common/include/optional.hpp +148 -0
- data/vendor/datasketches-cpp/common/include/quantiles_sorted_view.hpp +95 -2
- data/vendor/datasketches-cpp/common/include/quantiles_sorted_view_impl.hpp +1 -1
- data/vendor/datasketches-cpp/common/include/serde.hpp +69 -20
- data/vendor/datasketches-cpp/common/test/CMakeLists.txt +1 -1
- data/vendor/datasketches-cpp/common/test/optional_test.cpp +85 -0
- data/vendor/datasketches-cpp/common/test/test_allocator.hpp +14 -14
- data/vendor/datasketches-cpp/count/include/count_min.hpp +132 -78
- data/vendor/datasketches-cpp/count/include/count_min_impl.hpp +132 -152
- data/vendor/datasketches-cpp/count/test/CMakeLists.txt +11 -12
- data/vendor/datasketches-cpp/count/test/count_min_allocation_test.cpp +61 -61
- data/vendor/datasketches-cpp/count/test/count_min_test.cpp +175 -178
- data/vendor/datasketches-cpp/cpc/include/cpc_common.hpp +14 -20
- data/vendor/datasketches-cpp/cpc/include/cpc_compressor.hpp +7 -4
- data/vendor/datasketches-cpp/cpc/include/cpc_compressor_impl.hpp +17 -17
- data/vendor/datasketches-cpp/cpc/include/cpc_sketch.hpp +40 -40
- data/vendor/datasketches-cpp/cpc/include/cpc_sketch_impl.hpp +13 -10
- data/vendor/datasketches-cpp/cpc/include/cpc_union.hpp +35 -11
- data/vendor/datasketches-cpp/cpc/include/cpc_union_impl.hpp +8 -8
- data/vendor/datasketches-cpp/cpc/include/u32_table.hpp +3 -2
- data/vendor/datasketches-cpp/cpc/include/u32_table_impl.hpp +5 -5
- data/vendor/datasketches-cpp/cpc/test/CMakeLists.txt +20 -7
- data/vendor/datasketches-cpp/cpc/test/cpc_sketch_deserialize_from_java_test.cpp +60 -0
- data/vendor/datasketches-cpp/{python/include/py_object_lt.hpp → cpc/test/cpc_sketch_serialize_for_java.cpp} +15 -14
- data/vendor/datasketches-cpp/cpc/test/cpc_sketch_test.cpp +4 -29
- data/vendor/datasketches-cpp/cpc/test/cpc_union_test.cpp +4 -4
- data/vendor/datasketches-cpp/density/include/density_sketch.hpp +29 -9
- data/vendor/datasketches-cpp/density/include/density_sketch_impl.hpp +1 -1
- data/vendor/datasketches-cpp/density/test/CMakeLists.txt +0 -1
- data/vendor/datasketches-cpp/fi/include/frequent_items_sketch.hpp +21 -9
- data/vendor/datasketches-cpp/fi/include/frequent_items_sketch_impl.hpp +6 -4
- data/vendor/datasketches-cpp/fi/test/CMakeLists.txt +14 -1
- data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_deserialize_from_java_test.cpp +95 -0
- data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_serialize_for_java.cpp +83 -0
- data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_test.cpp +3 -42
- data/vendor/datasketches-cpp/hll/include/CouponList-internal.hpp +2 -2
- data/vendor/datasketches-cpp/hll/include/CouponList.hpp +3 -1
- data/vendor/datasketches-cpp/hll/include/HllArray-internal.hpp +3 -3
- data/vendor/datasketches-cpp/hll/include/HllArray.hpp +5 -3
- data/vendor/datasketches-cpp/hll/include/HllSketch-internal.hpp +4 -4
- data/vendor/datasketches-cpp/hll/include/HllSketchImpl.hpp +3 -1
- data/vendor/datasketches-cpp/hll/include/HllUtil.hpp +0 -12
- data/vendor/datasketches-cpp/hll/include/hll.hpp +70 -57
- data/vendor/datasketches-cpp/hll/test/CMakeLists.txt +14 -1
- data/vendor/datasketches-cpp/hll/test/ToFromByteArrayTest.cpp +0 -68
- data/vendor/datasketches-cpp/hll/test/hll_sketch_deserialize_from_java_test.cpp +69 -0
- data/vendor/datasketches-cpp/hll/test/hll_sketch_serialize_for_java.cpp +52 -0
- data/vendor/datasketches-cpp/kll/include/kll_helper_impl.hpp +2 -2
- data/vendor/datasketches-cpp/kll/include/kll_sketch.hpp +71 -50
- data/vendor/datasketches-cpp/kll/include/kll_sketch_impl.hpp +59 -130
- data/vendor/datasketches-cpp/kll/test/CMakeLists.txt +14 -1
- data/vendor/datasketches-cpp/kll/test/kll_sketch_deserialize_from_java_test.cpp +103 -0
- data/vendor/datasketches-cpp/kll/test/kll_sketch_serialize_for_java.cpp +62 -0
- data/vendor/datasketches-cpp/kll/test/kll_sketch_test.cpp +3 -38
- data/vendor/datasketches-cpp/quantiles/include/quantiles_sketch.hpp +68 -51
- data/vendor/datasketches-cpp/quantiles/include/quantiles_sketch_impl.hpp +62 -132
- data/vendor/datasketches-cpp/quantiles/test/CMakeLists.txt +14 -1
- data/vendor/datasketches-cpp/quantiles/test/quantiles_sketch_deserialize_from_java_test.cpp +84 -0
- data/vendor/datasketches-cpp/quantiles/test/quantiles_sketch_serialize_for_java.cpp +52 -0
- data/vendor/datasketches-cpp/quantiles/test/quantiles_sketch_test.cpp +14 -38
- data/vendor/datasketches-cpp/req/include/req_common.hpp +7 -3
- data/vendor/datasketches-cpp/req/include/req_compactor_impl.hpp +2 -2
- data/vendor/datasketches-cpp/req/include/req_sketch.hpp +97 -23
- data/vendor/datasketches-cpp/req/include/req_sketch_impl.hpp +48 -109
- data/vendor/datasketches-cpp/req/test/CMakeLists.txt +14 -1
- data/vendor/datasketches-cpp/req/test/req_sketch_deserialize_from_java_test.cpp +55 -0
- data/vendor/datasketches-cpp/{tuple/include/array_of_doubles_intersection_impl.hpp → req/test/req_sketch_serialize_for_java.cpp} +12 -7
- data/vendor/datasketches-cpp/req/test/req_sketch_test.cpp +3 -89
- data/vendor/datasketches-cpp/sampling/CMakeLists.txt +4 -0
- data/vendor/datasketches-cpp/sampling/include/ebpps_sample.hpp +210 -0
- data/vendor/datasketches-cpp/sampling/include/ebpps_sample_impl.hpp +535 -0
- data/vendor/datasketches-cpp/sampling/include/ebpps_sketch.hpp +281 -0
- data/vendor/datasketches-cpp/sampling/include/ebpps_sketch_impl.hpp +531 -0
- data/vendor/datasketches-cpp/sampling/include/var_opt_sketch.hpp +69 -26
- data/vendor/datasketches-cpp/sampling/include/var_opt_sketch_impl.hpp +3 -3
- data/vendor/datasketches-cpp/sampling/include/var_opt_union.hpp +10 -11
- data/vendor/datasketches-cpp/sampling/include/var_opt_union_impl.hpp +4 -4
- data/vendor/datasketches-cpp/sampling/test/CMakeLists.txt +55 -8
- data/vendor/datasketches-cpp/sampling/test/ebpps_allocation_test.cpp +96 -0
- data/vendor/datasketches-cpp/sampling/test/ebpps_sample_test.cpp +137 -0
- data/vendor/datasketches-cpp/sampling/test/ebpps_sketch_test.cpp +266 -0
- data/vendor/datasketches-cpp/sampling/test/var_opt_sketch_deserialize_from_java_test.cpp +81 -0
- data/vendor/datasketches-cpp/sampling/test/var_opt_sketch_serialize_for_java.cpp +54 -0
- data/vendor/datasketches-cpp/sampling/test/var_opt_sketch_test.cpp +0 -37
- data/vendor/datasketches-cpp/sampling/test/var_opt_union_deserialize_from_java_test.cpp +50 -0
- data/vendor/datasketches-cpp/sampling/test/var_opt_union_serialize_for_java.cpp +56 -0
- data/vendor/datasketches-cpp/sampling/test/var_opt_union_test.cpp +0 -18
- data/vendor/datasketches-cpp/theta/include/bit_packing.hpp +2608 -2608
- data/vendor/datasketches-cpp/theta/include/bounds_on_ratios_in_sampled_sets.hpp +1 -0
- data/vendor/datasketches-cpp/theta/include/bounds_on_ratios_in_theta_sketched_sets.hpp +7 -6
- data/vendor/datasketches-cpp/theta/include/theta_a_not_b.hpp +20 -5
- data/vendor/datasketches-cpp/theta/include/theta_constants.hpp +10 -4
- data/vendor/datasketches-cpp/theta/include/theta_helpers.hpp +1 -1
- data/vendor/datasketches-cpp/theta/include/theta_intersection.hpp +13 -5
- data/vendor/datasketches-cpp/theta/include/theta_intersection_base_impl.hpp +5 -5
- data/vendor/datasketches-cpp/theta/include/theta_intersection_impl.hpp +3 -3
- data/vendor/datasketches-cpp/theta/include/theta_jaccard_similarity.hpp +2 -1
- data/vendor/datasketches-cpp/theta/include/theta_jaccard_similarity_base.hpp +1 -0
- data/vendor/datasketches-cpp/theta/include/theta_set_difference_base_impl.hpp +1 -1
- data/vendor/datasketches-cpp/theta/include/theta_sketch.hpp +126 -27
- data/vendor/datasketches-cpp/theta/include/theta_sketch_impl.hpp +8 -8
- data/vendor/datasketches-cpp/theta/include/theta_union.hpp +17 -10
- data/vendor/datasketches-cpp/theta/include/theta_union_base_impl.hpp +1 -1
- data/vendor/datasketches-cpp/theta/include/theta_union_impl.hpp +3 -3
- data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base.hpp +5 -2
- data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base_impl.hpp +11 -1
- data/vendor/datasketches-cpp/theta/test/CMakeLists.txt +14 -1
- data/vendor/datasketches-cpp/theta/test/theta_sketch_deserialize_from_java_test.cpp +57 -0
- data/vendor/datasketches-cpp/theta/test/theta_sketch_serialize_for_java.cpp +61 -0
- data/vendor/datasketches-cpp/theta/test/theta_sketch_test.cpp +0 -188
- data/vendor/datasketches-cpp/tuple/CMakeLists.txt +8 -7
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_sketch.hpp +19 -144
- data/vendor/datasketches-cpp/tuple/include/{array_of_doubles_a_not_b.hpp → array_tuple_a_not_b.hpp} +24 -16
- data/vendor/datasketches-cpp/tuple/include/{array_of_doubles_a_not_b_impl.hpp → array_tuple_a_not_b_impl.hpp} +4 -4
- data/vendor/datasketches-cpp/tuple/include/array_tuple_intersection.hpp +65 -0
- data/vendor/datasketches-cpp/{python/include/py_object_ostream.hpp → tuple/include/array_tuple_intersection_impl.hpp} +7 -24
- data/vendor/datasketches-cpp/tuple/include/array_tuple_sketch.hpp +237 -0
- data/vendor/datasketches-cpp/tuple/include/{array_of_doubles_sketch_impl.hpp → array_tuple_sketch_impl.hpp} +40 -41
- data/vendor/datasketches-cpp/tuple/include/array_tuple_union.hpp +81 -0
- data/vendor/datasketches-cpp/tuple/include/array_tuple_union_impl.hpp +43 -0
- data/vendor/datasketches-cpp/tuple/include/tuple_a_not_b.hpp +11 -2
- data/vendor/datasketches-cpp/tuple/include/tuple_intersection.hpp +17 -10
- data/vendor/datasketches-cpp/tuple/include/tuple_jaccard_similarity.hpp +2 -1
- data/vendor/datasketches-cpp/tuple/include/tuple_sketch.hpp +95 -32
- data/vendor/datasketches-cpp/tuple/include/tuple_union.hpp +19 -11
- data/vendor/datasketches-cpp/tuple/test/CMakeLists.txt +16 -1
- data/vendor/datasketches-cpp/tuple/test/aod_sketch_deserialize_from_java_test.cpp +76 -0
- data/vendor/datasketches-cpp/tuple/test/aod_sketch_serialize_for_java.cpp +62 -0
- data/vendor/datasketches-cpp/tuple/test/array_of_doubles_sketch_test.cpp +5 -129
- data/vendor/datasketches-cpp/tuple/test/engagement_test.cpp +85 -89
- data/vendor/datasketches-cpp/tuple/test/tuple_jaccard_similarity_test.cpp +3 -1
- data/vendor/datasketches-cpp/tuple/test/tuple_sketch_deserialize_from_java_test.cpp +47 -0
- data/vendor/datasketches-cpp/tuple/test/tuple_sketch_serialize_for_java.cpp +38 -0
- data/vendor/datasketches-cpp/tuple/test/tuple_sketch_test.cpp +1 -1
- data/vendor/datasketches-cpp/version.cfg.in +1 -1
- metadata +47 -93
- data/vendor/datasketches-cpp/MANIFEST.in +0 -39
- data/vendor/datasketches-cpp/fi/test/items_sketch_string_from_java.sk +0 -0
- data/vendor/datasketches-cpp/fi/test/items_sketch_string_utf8_from_java.sk +0 -0
- data/vendor/datasketches-cpp/fi/test/longs_sketch_from_java.sk +0 -0
- data/vendor/datasketches-cpp/hll/test/array6_from_java.sk +0 -0
- data/vendor/datasketches-cpp/hll/test/compact_array4_from_java.sk +0 -0
- data/vendor/datasketches-cpp/hll/test/compact_set_from_java.sk +0 -0
- data/vendor/datasketches-cpp/hll/test/list_from_java.sk +0 -0
- data/vendor/datasketches-cpp/hll/test/updatable_array4_from_java.sk +0 -0
- data/vendor/datasketches-cpp/hll/test/updatable_set_from_java.sk +0 -0
- data/vendor/datasketches-cpp/kll/test/kll_sketch_from_java.sk +0 -0
- data/vendor/datasketches-cpp/pyproject.toml +0 -23
- data/vendor/datasketches-cpp/python/CMakeLists.txt +0 -87
- data/vendor/datasketches-cpp/python/README.md +0 -85
- data/vendor/datasketches-cpp/python/datasketches/DensityWrapper.py +0 -87
- data/vendor/datasketches-cpp/python/datasketches/KernelFunction.py +0 -35
- data/vendor/datasketches-cpp/python/datasketches/PySerDe.py +0 -110
- data/vendor/datasketches-cpp/python/datasketches/TuplePolicy.py +0 -77
- data/vendor/datasketches-cpp/python/datasketches/TupleWrapper.py +0 -205
- data/vendor/datasketches-cpp/python/datasketches/__init__.py +0 -38
- data/vendor/datasketches-cpp/python/include/kernel_function.hpp +0 -98
- data/vendor/datasketches-cpp/python/include/py_serde.hpp +0 -113
- data/vendor/datasketches-cpp/python/include/quantile_conditional.hpp +0 -104
- data/vendor/datasketches-cpp/python/include/tuple_policy.hpp +0 -136
- data/vendor/datasketches-cpp/python/jupyter/CPCSketch.ipynb +0 -345
- data/vendor/datasketches-cpp/python/jupyter/FrequentItemsSketch.ipynb +0 -354
- data/vendor/datasketches-cpp/python/jupyter/HLLSketch.ipynb +0 -346
- data/vendor/datasketches-cpp/python/jupyter/KLLSketch.ipynb +0 -463
- data/vendor/datasketches-cpp/python/jupyter/ThetaSketchNotebook.ipynb +0 -403
- data/vendor/datasketches-cpp/python/pybind11Path.cmd +0 -21
- data/vendor/datasketches-cpp/python/src/__init__.py +0 -18
- data/vendor/datasketches-cpp/python/src/count_wrapper.cpp +0 -101
- data/vendor/datasketches-cpp/python/src/cpc_wrapper.cpp +0 -76
- data/vendor/datasketches-cpp/python/src/datasketches.cpp +0 -58
- data/vendor/datasketches-cpp/python/src/density_wrapper.cpp +0 -95
- data/vendor/datasketches-cpp/python/src/fi_wrapper.cpp +0 -182
- data/vendor/datasketches-cpp/python/src/hll_wrapper.cpp +0 -126
- data/vendor/datasketches-cpp/python/src/kll_wrapper.cpp +0 -158
- data/vendor/datasketches-cpp/python/src/ks_wrapper.cpp +0 -68
- data/vendor/datasketches-cpp/python/src/py_serde.cpp +0 -112
- data/vendor/datasketches-cpp/python/src/quantiles_wrapper.cpp +0 -155
- data/vendor/datasketches-cpp/python/src/req_wrapper.cpp +0 -154
- data/vendor/datasketches-cpp/python/src/theta_wrapper.cpp +0 -166
- data/vendor/datasketches-cpp/python/src/tuple_wrapper.cpp +0 -215
- data/vendor/datasketches-cpp/python/src/vector_of_kll.cpp +0 -490
- data/vendor/datasketches-cpp/python/src/vo_wrapper.cpp +0 -173
- data/vendor/datasketches-cpp/python/tests/__init__.py +0 -16
- data/vendor/datasketches-cpp/python/tests/count_min_test.py +0 -86
- data/vendor/datasketches-cpp/python/tests/cpc_test.py +0 -64
- data/vendor/datasketches-cpp/python/tests/density_test.py +0 -93
- data/vendor/datasketches-cpp/python/tests/fi_test.py +0 -149
- data/vendor/datasketches-cpp/python/tests/hll_test.py +0 -129
- data/vendor/datasketches-cpp/python/tests/kll_test.py +0 -159
- data/vendor/datasketches-cpp/python/tests/quantiles_test.py +0 -160
- data/vendor/datasketches-cpp/python/tests/req_test.py +0 -159
- data/vendor/datasketches-cpp/python/tests/theta_test.py +0 -148
- data/vendor/datasketches-cpp/python/tests/tuple_test.py +0 -206
- data/vendor/datasketches-cpp/python/tests/vector_of_kll_test.py +0 -148
- data/vendor/datasketches-cpp/python/tests/vo_test.py +0 -132
- data/vendor/datasketches-cpp/req/test/req_float_empty_from_java.sk +0 -0
- data/vendor/datasketches-cpp/req/test/req_float_estimation_from_java.sk +0 -0
- data/vendor/datasketches-cpp/req/test/req_float_exact_from_java.sk +0 -0
- data/vendor/datasketches-cpp/req/test/req_float_raw_items_from_java.sk +0 -0
- data/vendor/datasketches-cpp/req/test/req_float_single_item_from_java.sk +0 -0
- data/vendor/datasketches-cpp/sampling/test/binaries_from_java.txt +0 -67
- data/vendor/datasketches-cpp/sampling/test/varopt_sketch_long_sampling.sk +0 -0
- data/vendor/datasketches-cpp/sampling/test/varopt_sketch_string_exact.sk +0 -0
- data/vendor/datasketches-cpp/sampling/test/varopt_union_double_sampling.sk +0 -0
- data/vendor/datasketches-cpp/setup.py +0 -110
- data/vendor/datasketches-cpp/theta/test/theta_compact_empty_from_java.sk +0 -0
- data/vendor/datasketches-cpp/theta/test/theta_compact_estimation_from_java.sk +0 -0
- data/vendor/datasketches-cpp/theta/test/theta_compact_exact_from_java.sk +0 -0
- data/vendor/datasketches-cpp/theta/test/theta_compact_single_item_from_java.sk +0 -0
- data/vendor/datasketches-cpp/tox.ini +0 -26
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_intersection.hpp +0 -52
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_union.hpp +0 -81
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_union_impl.hpp +0 -43
- data/vendor/datasketches-cpp/tuple/test/aod_1_compact_empty_from_java.sk +0 -1
- data/vendor/datasketches-cpp/tuple/test/aod_1_compact_estimation_from_java.sk +0 -0
- data/vendor/datasketches-cpp/tuple/test/aod_1_compact_non_empty_no_entries_from_java.sk +0 -0
- data/vendor/datasketches-cpp/tuple/test/aod_2_compact_exact_from_java.sk +0 -0
- data/vendor/datasketches-cpp/tuple/test/aod_3_compact_empty_from_java.sk +0 -1
|
@@ -27,9 +27,20 @@
|
|
|
27
27
|
#include "quantiles_sorted_view.hpp"
|
|
28
28
|
#include "common_defs.hpp"
|
|
29
29
|
#include "serde.hpp"
|
|
30
|
+
#include "optional.hpp"
|
|
30
31
|
|
|
31
32
|
namespace datasketches {
|
|
32
33
|
|
|
34
|
+
/// Constants for Quantiles sketch
|
|
35
|
+
namespace quantiles_constants {
|
|
36
|
+
/// default value of parameter K
|
|
37
|
+
const uint16_t DEFAULT_K = 128;
|
|
38
|
+
/// minimum value of parameter K
|
|
39
|
+
const uint16_t MIN_K = 2;
|
|
40
|
+
/// maximum value of parameter K
|
|
41
|
+
const uint16_t MAX_K = 1 << 15;
|
|
42
|
+
}
|
|
43
|
+
|
|
33
44
|
/**
|
|
34
45
|
* This is a stochastic streaming sketch that enables near-real time analysis of the
|
|
35
46
|
* approximate distribution from a very large stream in a single pass.
|
|
@@ -136,13 +147,6 @@ Table Guide for DoublesSketch Size in Bytes and Approximate Error:
|
|
|
136
147
|
* @author Alexander Saydakov
|
|
137
148
|
* @author Jon Malkin
|
|
138
149
|
*/
|
|
139
|
-
|
|
140
|
-
namespace quantiles_constants {
|
|
141
|
-
const uint16_t DEFAULT_K = 128;
|
|
142
|
-
const uint16_t MIN_K = 2;
|
|
143
|
-
const uint16_t MAX_K = 1 << 15;
|
|
144
|
-
}
|
|
145
|
-
|
|
146
150
|
template <typename T,
|
|
147
151
|
typename Comparator = std::less<T>, // strict weak ordering function (see C++ named requirements: Compare)
|
|
148
152
|
typename Allocator = std::allocator<T>>
|
|
@@ -151,13 +155,43 @@ public:
|
|
|
151
155
|
using value_type = T;
|
|
152
156
|
using allocator_type = Allocator;
|
|
153
157
|
using comparator = Comparator;
|
|
158
|
+
using quantile_return_type = typename quantiles_sorted_view<T, Comparator, Allocator>::quantile_return_type;
|
|
159
|
+
using vector_double = typename quantiles_sorted_view<T, Comparator, Allocator>::vector_double;
|
|
154
160
|
|
|
161
|
+
/**
|
|
162
|
+
* Constructor
|
|
163
|
+
* @param k affects the size of the sketch and its estimation error
|
|
164
|
+
* @param comparator strict weak ordering function (see C++ named requirements: Compare)
|
|
165
|
+
* @param allocator used to allocate memory
|
|
166
|
+
*/
|
|
155
167
|
explicit quantiles_sketch(uint16_t k = quantiles_constants::DEFAULT_K,
|
|
156
168
|
const Comparator& comparator = Comparator(), const Allocator& allocator = Allocator());
|
|
169
|
+
|
|
170
|
+
/**
|
|
171
|
+
* Copy constructor
|
|
172
|
+
* @param other sketch to be copied
|
|
173
|
+
*/
|
|
157
174
|
quantiles_sketch(const quantiles_sketch& other);
|
|
175
|
+
|
|
176
|
+
/** Move constructor
|
|
177
|
+
* @param other sketch to be moved
|
|
178
|
+
*/
|
|
158
179
|
quantiles_sketch(quantiles_sketch&& other) noexcept;
|
|
180
|
+
|
|
159
181
|
~quantiles_sketch();
|
|
182
|
+
|
|
183
|
+
/**
|
|
184
|
+
* Copy assignment
|
|
185
|
+
* @param other sketch to be copied
|
|
186
|
+
* @return reference to this sketch
|
|
187
|
+
*/
|
|
160
188
|
quantiles_sketch& operator=(const quantiles_sketch& other);
|
|
189
|
+
|
|
190
|
+
/**
|
|
191
|
+
* Move assignment
|
|
192
|
+
* @param other sketch to be moved
|
|
193
|
+
* @return reference to this sketch
|
|
194
|
+
*/
|
|
161
195
|
quantiles_sketch& operator=(quantiles_sketch&& other) noexcept;
|
|
162
196
|
|
|
163
197
|
/**
|
|
@@ -247,48 +281,13 @@ public:
|
|
|
247
281
|
* If the sketch is empty this throws std::runtime_error.
|
|
248
282
|
*
|
|
249
283
|
* @param rank the specified normalized rank in the hypothetical sorted stream.
|
|
250
|
-
*
|
|
284
|
+
* @param inclusive if true the weight of the given item is included into the rank.
|
|
285
|
+
* Otherwise the rank equals the sum of the weights of all items that are less than the given item
|
|
286
|
+
* according to the Comparator.
|
|
251
287
|
* @return the approximation to the item at the given rank
|
|
252
288
|
*/
|
|
253
|
-
using quantile_return_type = typename quantiles_sorted_view<T, Comparator, Allocator>::quantile_return_type;
|
|
254
289
|
quantile_return_type get_quantile(double rank, bool inclusive = true) const;
|
|
255
290
|
|
|
256
|
-
/**
|
|
257
|
-
* This is a multiple-query version of get_quantile().
|
|
258
|
-
* <p>
|
|
259
|
-
* This returns an array that could have been generated by using get_quantile() for each
|
|
260
|
-
* normalized rank separately.
|
|
261
|
-
*
|
|
262
|
-
* <p>If the sketch is empty this throws std::runtime_error.
|
|
263
|
-
*
|
|
264
|
-
* @param ranks given array of normalized ranks in the hypothetical sorted stream.
|
|
265
|
-
* These ranks must be in the interval [0.0, 1.0], inclusive.
|
|
266
|
-
* @param size the number of ranks in the array
|
|
267
|
-
*
|
|
268
|
-
* @return array of approximations to items associated with given ranks in the same order as given ranks
|
|
269
|
-
* in the input array.
|
|
270
|
-
*
|
|
271
|
-
* Deprecated. Will be removed in the next major version. Use get_quantile() instead.
|
|
272
|
-
*/
|
|
273
|
-
std::vector<T, Allocator> get_quantiles(const double* ranks, uint32_t size, bool inclusive = true) const;
|
|
274
|
-
|
|
275
|
-
/**
|
|
276
|
-
* This is a multiple-query version of get_quantile() that allows the caller to
|
|
277
|
-
* specify the number of evenly-spaced normalized ranks.
|
|
278
|
-
*
|
|
279
|
-
* <p>If the sketch is empty this throws std::runtime_error.
|
|
280
|
-
*
|
|
281
|
-
* @param num an integer that specifies the number of evenly-spaced ranks.
|
|
282
|
-
* This must be an integer greater than 0. A value of 1 is equivalent to get_quantiles([0]).
|
|
283
|
-
* A value of 2 is equivalent to get_quantiles([0, 1]). A value of 3 is equivalent to
|
|
284
|
-
* get_quantiles([0, 0.5, 1]), etc.
|
|
285
|
-
*
|
|
286
|
-
* @return array of approximations to items associated with the given number of evenly-spaced normalized ranks.
|
|
287
|
-
*
|
|
288
|
-
* Deprecated. Will be removed in the next major version. Use get_quantile() instead.
|
|
289
|
-
*/
|
|
290
|
-
std::vector<T, Allocator> get_quantiles(uint32_t num, bool inclusive = true) const;
|
|
291
|
-
|
|
292
291
|
/**
|
|
293
292
|
* Returns an approximation to the normalized rank of the given item from 0 to 1, inclusive.
|
|
294
293
|
*
|
|
@@ -300,7 +299,7 @@ public:
|
|
|
300
299
|
* @param item to be ranked
|
|
301
300
|
* @param inclusive if true the weight of the given item is included into the rank.
|
|
302
301
|
* Otherwise the rank equals the sum of the weights of all items that are less than the given item
|
|
303
|
-
* according to the
|
|
302
|
+
* according to the Comparator.
|
|
304
303
|
* @return an approximate normalized rank of the given item
|
|
305
304
|
*/
|
|
306
305
|
double get_rank(const T& item, bool inclusive = true) const;
|
|
@@ -327,7 +326,6 @@ public:
|
|
|
327
326
|
* @return an array of m+1 doubles each of which is an approximation
|
|
328
327
|
* to the fraction of the input stream items (the mass) that fall into one of those intervals.
|
|
329
328
|
*/
|
|
330
|
-
using vector_double = typename quantiles_sorted_view<T, Comparator, Allocator>::vector_double;
|
|
331
329
|
vector_double get_PMF(const T* split_points, uint32_t size, bool inclusive = true) const;
|
|
332
330
|
|
|
333
331
|
/**
|
|
@@ -451,9 +449,26 @@ public:
|
|
|
451
449
|
string<Allocator> to_string(bool print_levels = false, bool print_items = false) const;
|
|
452
450
|
|
|
453
451
|
class const_iterator;
|
|
452
|
+
|
|
453
|
+
/**
|
|
454
|
+
* Iterator pointing to the first item in the sketch.
|
|
455
|
+
* If the sketch is empty, the returned iterator must not be dereferenced or incremented.
|
|
456
|
+
* @return iterator pointing to the first item in the sketch
|
|
457
|
+
*/
|
|
454
458
|
const_iterator begin() const;
|
|
459
|
+
|
|
460
|
+
/**
|
|
461
|
+
* Iterator pointing to the past-the-end item in the sketch.
|
|
462
|
+
* The past-the-end item is the hypothetical item that would follow the last item.
|
|
463
|
+
* It does not point to any item, and must not be dereferenced or incremented.
|
|
464
|
+
* @return iterator pointing to the past-the-end item in the sketch
|
|
465
|
+
*/
|
|
455
466
|
const_iterator end() const;
|
|
456
467
|
|
|
468
|
+
/**
|
|
469
|
+
* Gets the sorted view of this sketch
|
|
470
|
+
* @return the sorted view of this sketch
|
|
471
|
+
*/
|
|
457
472
|
quantiles_sorted_view<T, Comparator, Allocator> get_sorted_view() const;
|
|
458
473
|
|
|
459
474
|
private:
|
|
@@ -493,19 +508,18 @@ private:
|
|
|
493
508
|
uint64_t bit_pattern_;
|
|
494
509
|
Level base_buffer_;
|
|
495
510
|
VectorLevels levels_;
|
|
496
|
-
T
|
|
497
|
-
T
|
|
511
|
+
optional<T> min_item_;
|
|
512
|
+
optional<T> max_item_;
|
|
498
513
|
mutable quantiles_sorted_view<T, Comparator, Allocator>* sorted_view_;
|
|
499
514
|
|
|
500
515
|
void setup_sorted_view() const; // modifies mutable state
|
|
501
516
|
void reset_sorted_view();
|
|
502
517
|
|
|
503
518
|
// for deserialization
|
|
504
|
-
class item_deleter;
|
|
505
519
|
class items_deleter;
|
|
506
520
|
quantiles_sketch(uint16_t k, uint64_t n, uint64_t bit_pattern,
|
|
507
521
|
Level&& base_buffer, VectorLevels&& levels,
|
|
508
|
-
|
|
522
|
+
optional<T>&& min_item, optional<T>&& max_item,
|
|
509
523
|
bool is_sorted, const Comparator& comparator = Comparator(), const Allocator& allocator = Allocator());
|
|
510
524
|
|
|
511
525
|
void grow_base_buffer();
|
|
@@ -576,6 +590,9 @@ private:
|
|
|
576
590
|
static inline bool check_update_item(TT) {
|
|
577
591
|
return true;
|
|
578
592
|
}
|
|
593
|
+
|
|
594
|
+
// for type converting constructor
|
|
595
|
+
template<typename From, typename FC, typename FA> friend class quantiles_sketch;
|
|
579
596
|
};
|
|
580
597
|
|
|
581
598
|
|
|
@@ -41,8 +41,8 @@ n_(0),
|
|
|
41
41
|
bit_pattern_(0),
|
|
42
42
|
base_buffer_(allocator_),
|
|
43
43
|
levels_(allocator_),
|
|
44
|
-
min_item_(
|
|
45
|
-
max_item_(
|
|
44
|
+
min_item_(),
|
|
45
|
+
max_item_(),
|
|
46
46
|
sorted_view_(nullptr)
|
|
47
47
|
{
|
|
48
48
|
check_k(k_);
|
|
@@ -59,12 +59,10 @@ n_(other.n_),
|
|
|
59
59
|
bit_pattern_(other.bit_pattern_),
|
|
60
60
|
base_buffer_(other.base_buffer_),
|
|
61
61
|
levels_(other.levels_),
|
|
62
|
-
min_item_(
|
|
63
|
-
max_item_(
|
|
62
|
+
min_item_(other.min_item_),
|
|
63
|
+
max_item_(other.max_item_),
|
|
64
64
|
sorted_view_(nullptr)
|
|
65
65
|
{
|
|
66
|
-
if (other.min_item_ != nullptr) min_item_ = new (allocator_.allocate(1)) T(*other.min_item_);
|
|
67
|
-
if (other.max_item_ != nullptr) max_item_ = new (allocator_.allocate(1)) T(*other.max_item_);
|
|
68
66
|
for (size_t i = 0; i < levels_.size(); ++i) {
|
|
69
67
|
if (levels_[i].capacity() != other.levels_[i].capacity()) {
|
|
70
68
|
levels_[i].reserve(other.levels_[i].capacity());
|
|
@@ -82,13 +80,10 @@ n_(other.n_),
|
|
|
82
80
|
bit_pattern_(other.bit_pattern_),
|
|
83
81
|
base_buffer_(std::move(other.base_buffer_)),
|
|
84
82
|
levels_(std::move(other.levels_)),
|
|
85
|
-
min_item_(other.min_item_),
|
|
86
|
-
max_item_(other.max_item_),
|
|
83
|
+
min_item_(std::move(other.min_item_)),
|
|
84
|
+
max_item_(std::move(other.max_item_)),
|
|
87
85
|
sorted_view_(nullptr)
|
|
88
|
-
{
|
|
89
|
-
other.min_item_ = nullptr;
|
|
90
|
-
other.max_item_ = nullptr;
|
|
91
|
-
}
|
|
86
|
+
{}
|
|
92
87
|
|
|
93
88
|
template<typename T, typename C, typename A>
|
|
94
89
|
quantiles_sketch<T, C, A>& quantiles_sketch<T, C, A>::operator=(const quantiles_sketch& other) {
|
|
@@ -126,7 +121,7 @@ quantiles_sketch<T, C, A>& quantiles_sketch<T, C, A>::operator=(quantiles_sketch
|
|
|
126
121
|
template<typename T, typename C, typename A>
|
|
127
122
|
quantiles_sketch<T, C, A>::quantiles_sketch(uint16_t k, uint64_t n, uint64_t bit_pattern,
|
|
128
123
|
Level&& base_buffer, VectorLevels&& levels,
|
|
129
|
-
|
|
124
|
+
optional<T>&& min_item, optional<T>&& max_item,
|
|
130
125
|
bool is_sorted, const C& comparator, const A& allocator):
|
|
131
126
|
comparator_(comparator),
|
|
132
127
|
allocator_(allocator),
|
|
@@ -136,13 +131,13 @@ n_(n),
|
|
|
136
131
|
bit_pattern_(bit_pattern),
|
|
137
132
|
base_buffer_(std::move(base_buffer)),
|
|
138
133
|
levels_(std::move(levels)),
|
|
139
|
-
min_item_(min_item
|
|
140
|
-
max_item_(max_item
|
|
134
|
+
min_item_(std::move(min_item)),
|
|
135
|
+
max_item_(std::move(max_item)),
|
|
141
136
|
sorted_view_(nullptr)
|
|
142
137
|
{
|
|
143
|
-
uint32_t item_count = base_buffer_.size();
|
|
138
|
+
uint32_t item_count = static_cast<uint32_t>(base_buffer_.size());
|
|
144
139
|
for (Level& lvl : levels_) {
|
|
145
|
-
item_count += lvl.size();
|
|
140
|
+
item_count += static_cast<uint32_t>(lvl.size());
|
|
146
141
|
}
|
|
147
142
|
if (item_count != compute_retained_items(k_, n_))
|
|
148
143
|
throw std::logic_error("Item count does not match value computed from k, n");
|
|
@@ -160,8 +155,8 @@ n_(other.get_n()),
|
|
|
160
155
|
bit_pattern_(compute_bit_pattern(other.get_k(), other.get_n())),
|
|
161
156
|
base_buffer_(allocator),
|
|
162
157
|
levels_(allocator),
|
|
163
|
-
min_item_(
|
|
164
|
-
max_item_(
|
|
158
|
+
min_item_(other.min_item_),
|
|
159
|
+
max_item_(other.max_item_),
|
|
165
160
|
sorted_view_(nullptr)
|
|
166
161
|
{
|
|
167
162
|
static_assert(std::is_constructible<T, From>::value,
|
|
@@ -170,9 +165,6 @@ sorted_view_(nullptr)
|
|
|
170
165
|
base_buffer_.reserve(2 * std::min(quantiles_constants::MIN_K, k_));
|
|
171
166
|
|
|
172
167
|
if (!other.is_empty()) {
|
|
173
|
-
min_item_ = new (allocator_.allocate(1)) T(other.get_min_item());
|
|
174
|
-
max_item_ = new (allocator_.allocate(1)) T(other.get_max_item());
|
|
175
|
-
|
|
176
168
|
// reserve space in levels
|
|
177
169
|
const uint8_t num_levels = compute_levels_needed(k_, n_);
|
|
178
170
|
levels_.reserve(num_levels);
|
|
@@ -212,14 +204,6 @@ sorted_view_(nullptr)
|
|
|
212
204
|
|
|
213
205
|
template<typename T, typename C, typename A>
|
|
214
206
|
quantiles_sketch<T, C, A>::~quantiles_sketch() {
|
|
215
|
-
if (min_item_ != nullptr) {
|
|
216
|
-
min_item_->~T();
|
|
217
|
-
allocator_.deallocate(min_item_, 1);
|
|
218
|
-
}
|
|
219
|
-
if (max_item_ != nullptr) {
|
|
220
|
-
max_item_->~T();
|
|
221
|
-
allocator_.deallocate(max_item_, 1);
|
|
222
|
-
}
|
|
223
207
|
reset_sorted_view();
|
|
224
208
|
}
|
|
225
209
|
|
|
@@ -228,8 +212,8 @@ template<typename FwdT>
|
|
|
228
212
|
void quantiles_sketch<T, C, A>::update(FwdT&& item) {
|
|
229
213
|
if (!check_update_item(item)) { return; }
|
|
230
214
|
if (is_empty()) {
|
|
231
|
-
min_item_
|
|
232
|
-
max_item_
|
|
215
|
+
min_item_.emplace(item);
|
|
216
|
+
max_item_.emplace(item);
|
|
233
217
|
} else {
|
|
234
218
|
if (comparator_(item, *min_item_)) *min_item_ = item;
|
|
235
219
|
if (comparator_(*max_item_, item)) *max_item_ = item;
|
|
@@ -263,17 +247,17 @@ void quantiles_sketch<T, C, A>::merge(FwdSk&& other) {
|
|
|
263
247
|
// other has data and is in estimation mode
|
|
264
248
|
if (is_estimation_mode()) {
|
|
265
249
|
if (k_ == other.get_k()) {
|
|
266
|
-
standard_merge(*this, other);
|
|
250
|
+
standard_merge(*this, std::forward<FwdSk>(other));
|
|
267
251
|
} else if (k_ > other.get_k()) {
|
|
268
|
-
quantiles_sketch sk_copy(other);
|
|
269
|
-
downsampling_merge(sk_copy, *this);
|
|
270
|
-
*this = sk_copy;
|
|
252
|
+
quantiles_sketch sk_copy(std::forward<FwdSk>(other));
|
|
253
|
+
downsampling_merge(sk_copy, std::move(*this));
|
|
254
|
+
*this = std::move(sk_copy);
|
|
271
255
|
} else { // k_ < other.get_k()
|
|
272
|
-
downsampling_merge(*this, other);
|
|
256
|
+
downsampling_merge(*this, std::forward<FwdSk>(other));
|
|
273
257
|
}
|
|
274
258
|
} else {
|
|
275
259
|
// exact or empty
|
|
276
|
-
quantiles_sketch sk_copy(other);
|
|
260
|
+
quantiles_sketch sk_copy(std::forward<FwdSk>(other));
|
|
277
261
|
if (k_ <= other.get_k()) {
|
|
278
262
|
if (!is_empty()) {
|
|
279
263
|
for (uint16_t i = 0; i < base_buffer_.size(); ++i) {
|
|
@@ -281,9 +265,9 @@ void quantiles_sketch<T, C, A>::merge(FwdSk&& other) {
|
|
|
281
265
|
}
|
|
282
266
|
}
|
|
283
267
|
} else { // k_ > other.get_k()
|
|
284
|
-
downsampling_merge(sk_copy, *this);
|
|
268
|
+
downsampling_merge(sk_copy, std::move(*this));
|
|
285
269
|
}
|
|
286
|
-
*this = sk_copy;
|
|
270
|
+
*this = std::move(sk_copy);
|
|
287
271
|
}
|
|
288
272
|
reset_sorted_view();
|
|
289
273
|
}
|
|
@@ -317,8 +301,8 @@ void quantiles_sketch<T, C, A>::serialize(std::ostream& os, const SerDe& serde)
|
|
|
317
301
|
write(os, n_);
|
|
318
302
|
|
|
319
303
|
// min and max
|
|
320
|
-
serde.serialize(os, min_item_, 1);
|
|
321
|
-
serde.serialize(os, max_item_, 1);
|
|
304
|
+
serde.serialize(os, &*min_item_, 1);
|
|
305
|
+
serde.serialize(os, &*max_item_, 1);
|
|
322
306
|
|
|
323
307
|
// base buffer items
|
|
324
308
|
serde.serialize(os, base_buffer_.data(), static_cast<unsigned>(base_buffer_.size()));
|
|
@@ -365,8 +349,8 @@ auto quantiles_sketch<T, C, A>::serialize(unsigned header_size_bytes, const SerD
|
|
|
365
349
|
ptr += copy_to_mem(n_, ptr);
|
|
366
350
|
|
|
367
351
|
// min and max
|
|
368
|
-
ptr += serde.serialize(ptr, end_ptr - ptr, min_item_, 1);
|
|
369
|
-
ptr += serde.serialize(ptr, end_ptr - ptr, max_item_, 1);
|
|
352
|
+
ptr += serde.serialize(ptr, end_ptr - ptr, &*min_item_, 1);
|
|
353
|
+
ptr += serde.serialize(ptr, end_ptr - ptr, &*max_item_, 1);
|
|
370
354
|
|
|
371
355
|
// base buffer items
|
|
372
356
|
if (base_buffer_.size() > 0)
|
|
@@ -409,19 +393,18 @@ auto quantiles_sketch<T, C, A>::deserialize(std::istream &is, const SerDe& serde
|
|
|
409
393
|
const bool is_compact = (serial_version == 2) | ((flags_byte & (1 << flags::IS_COMPACT)) > 0);
|
|
410
394
|
const bool is_sorted = (flags_byte & (1 << flags::IS_SORTED)) > 0;
|
|
411
395
|
|
|
412
|
-
|
|
413
|
-
|
|
414
|
-
|
|
415
|
-
|
|
416
|
-
|
|
417
|
-
|
|
418
|
-
|
|
419
|
-
|
|
420
|
-
|
|
421
|
-
|
|
422
|
-
|
|
423
|
-
|
|
424
|
-
max_item = std::unique_ptr<T, item_deleter>(max_item_buffer.release(), item_deleter(allocator));
|
|
396
|
+
optional<T> tmp; // space to deserialize min and max
|
|
397
|
+
optional<T> min_item;
|
|
398
|
+
optional<T> max_item;
|
|
399
|
+
|
|
400
|
+
serde.deserialize(is, &*tmp, 1);
|
|
401
|
+
// serde call did not throw, repackage and cleanup
|
|
402
|
+
min_item.emplace(*tmp);
|
|
403
|
+
(*tmp).~T();
|
|
404
|
+
serde.deserialize(is, &*tmp, 1);
|
|
405
|
+
// serde call did not throw, repackage and cleanup
|
|
406
|
+
max_item.emplace(*tmp);
|
|
407
|
+
(*tmp).~T();
|
|
425
408
|
|
|
426
409
|
if (serial_version == 1) {
|
|
427
410
|
read<uint64_t>(is); // no longer used
|
|
@@ -477,7 +460,7 @@ auto quantiles_sketch<T, C, A>::deserialize_array(std::istream& is, uint32_t num
|
|
|
477
460
|
items.get_deleter().set_destroy(true);
|
|
478
461
|
if (!is.good()) throw std::runtime_error("error reading from std::istream");
|
|
479
462
|
|
|
480
|
-
//
|
|
463
|
+
// successfully read, now put into a Level
|
|
481
464
|
Level level(allocator);
|
|
482
465
|
level.reserve(capacity);
|
|
483
466
|
level.insert(level.begin(),
|
|
@@ -524,19 +507,18 @@ auto quantiles_sketch<T, C, A>::deserialize(const void* bytes, size_t size, cons
|
|
|
524
507
|
const bool is_compact = (serial_version == 2) | ((flags_byte & (1 << flags::IS_COMPACT)) > 0);
|
|
525
508
|
const bool is_sorted = (flags_byte & (1 << flags::IS_SORTED)) > 0;
|
|
526
509
|
|
|
527
|
-
|
|
528
|
-
|
|
529
|
-
|
|
530
|
-
|
|
531
|
-
|
|
532
|
-
|
|
533
|
-
|
|
534
|
-
|
|
535
|
-
|
|
536
|
-
|
|
537
|
-
|
|
538
|
-
|
|
539
|
-
max_item = std::unique_ptr<T, item_deleter>(max_item_buffer.release(), item_deleter(allocator));
|
|
510
|
+
optional<T> tmp; // space to deserialize min and max
|
|
511
|
+
optional<T> min_item;
|
|
512
|
+
optional<T> max_item;
|
|
513
|
+
|
|
514
|
+
ptr += serde.deserialize(ptr, end_ptr - ptr, &*tmp, 1);
|
|
515
|
+
// serde call did not throw, repackage and cleanup
|
|
516
|
+
min_item.emplace(*tmp);
|
|
517
|
+
(*tmp).~T();
|
|
518
|
+
ptr += serde.deserialize(ptr, end_ptr - ptr, &*tmp, 1);
|
|
519
|
+
// serde call did not throw, repackage and cleanup
|
|
520
|
+
max_item.emplace(*tmp);
|
|
521
|
+
(*tmp).~T();
|
|
540
522
|
|
|
541
523
|
if (serial_version == 1) {
|
|
542
524
|
uint64_t unused_long;
|
|
@@ -769,42 +751,6 @@ auto quantiles_sketch<T, C, A>::get_quantile(double rank, bool inclusive) const
|
|
|
769
751
|
return sorted_view_->get_quantile(rank, inclusive);
|
|
770
752
|
}
|
|
771
753
|
|
|
772
|
-
template<typename T, typename C, typename A>
|
|
773
|
-
std::vector<T, A> quantiles_sketch<T, C, A>::get_quantiles(const double* ranks, uint32_t size, bool inclusive) const {
|
|
774
|
-
if (is_empty()) throw std::runtime_error("operation is undefined for an empty sketch");
|
|
775
|
-
std::vector<T, A> quantiles(allocator_);
|
|
776
|
-
quantiles.reserve(size);
|
|
777
|
-
|
|
778
|
-
// possible side-effect: sorting base buffer
|
|
779
|
-
setup_sorted_view();
|
|
780
|
-
|
|
781
|
-
for (uint32_t i = 0; i < size; ++i) {
|
|
782
|
-
const double rank = ranks[i];
|
|
783
|
-
if ((rank < 0.0) || (rank > 1.0)) {
|
|
784
|
-
throw std::invalid_argument("Normalized rank cannot be less than 0 or greater than 1");
|
|
785
|
-
}
|
|
786
|
-
quantiles.push_back(sorted_view_->get_quantile(rank, inclusive));
|
|
787
|
-
}
|
|
788
|
-
return quantiles;
|
|
789
|
-
}
|
|
790
|
-
|
|
791
|
-
template<typename T, typename C, typename A>
|
|
792
|
-
std::vector<T, A> quantiles_sketch<T, C, A>::get_quantiles(uint32_t num, bool inclusive) const {
|
|
793
|
-
if (is_empty()) throw std::runtime_error("operation is undefined for an empty sketch");
|
|
794
|
-
if (num == 0) {
|
|
795
|
-
throw std::invalid_argument("num must be > 0");
|
|
796
|
-
}
|
|
797
|
-
vector_double ranks(num, 0, allocator_);
|
|
798
|
-
ranks[0] = 0.0;
|
|
799
|
-
for (size_t i = 1; i < num; i++) {
|
|
800
|
-
ranks[i] = static_cast<double>(i) / (num - 1);
|
|
801
|
-
}
|
|
802
|
-
if (num > 1) {
|
|
803
|
-
ranks[num - 1] = 1.0;
|
|
804
|
-
}
|
|
805
|
-
return get_quantiles(ranks.data(), num, inclusive);
|
|
806
|
-
}
|
|
807
|
-
|
|
808
754
|
template<typename T, typename C, typename A>
|
|
809
755
|
double quantiles_sketch<T, C, A>::get_rank(const T& item, bool inclusive) const {
|
|
810
756
|
if (is_empty()) throw std::runtime_error("operation is undefined for an empty sketch");
|
|
@@ -1012,7 +958,7 @@ void quantiles_sketch<T, C, A>::zip_buffer(Level& buf_in, Level& buf_out) {
|
|
|
1012
958
|
uint32_t rand_offset = next_offset;
|
|
1013
959
|
next_offset = 1 - next_offset;
|
|
1014
960
|
#else
|
|
1015
|
-
uint32_t rand_offset = random_bit();
|
|
961
|
+
uint32_t rand_offset = random_utils::random_bit();
|
|
1016
962
|
#endif
|
|
1017
963
|
if ((buf_in.size() != 2 * buf_out.capacity())
|
|
1018
964
|
|| (buf_out.size() > 0)) {
|
|
@@ -1127,15 +1073,14 @@ void quantiles_sketch<T, C, A>::standard_merge(quantiles_sketch& tgt, FwdSk&& sr
|
|
|
1127
1073
|
// update min and max items
|
|
1128
1074
|
// can't just check is_empty() since min and max might not have been set if
|
|
1129
1075
|
// there were no base buffer items added via update()
|
|
1130
|
-
if (tgt.min_item_
|
|
1131
|
-
tgt.min_item_
|
|
1076
|
+
if (!tgt.min_item_) {
|
|
1077
|
+
tgt.min_item_.emplace(conditional_forward<FwdSk>(*src.min_item_));
|
|
1132
1078
|
} else {
|
|
1133
1079
|
if (tgt.comparator_(*src.min_item_, *tgt.min_item_))
|
|
1134
1080
|
*tgt.min_item_ = conditional_forward<FwdSk>(*src.min_item_);
|
|
1135
1081
|
}
|
|
1136
|
-
|
|
1137
|
-
|
|
1138
|
-
tgt.max_item_ = new (tgt.allocator_.allocate(1)) T(*src.max_item_);
|
|
1082
|
+
if (!tgt.max_item_) {
|
|
1083
|
+
tgt.max_item_.emplace(conditional_forward<FwdSk>(*src.max_item_));
|
|
1139
1084
|
} else {
|
|
1140
1085
|
if (tgt.comparator_(*tgt.max_item_, *src.max_item_))
|
|
1141
1086
|
*tgt.max_item_ = conditional_forward<FwdSk>(*src.max_item_);
|
|
@@ -1203,15 +1148,14 @@ void quantiles_sketch<T, C, A>::downsampling_merge(quantiles_sketch& tgt, FwdSk&
|
|
|
1203
1148
|
// update min and max items
|
|
1204
1149
|
// can't just check is_empty() since min and max might not have been set if
|
|
1205
1150
|
// there were no base buffer items added via update()
|
|
1206
|
-
if (tgt.min_item_
|
|
1207
|
-
tgt.min_item_
|
|
1151
|
+
if (!tgt.min_item_) {
|
|
1152
|
+
tgt.min_item_.emplace(conditional_forward<FwdSk>(*src.min_item_));
|
|
1208
1153
|
} else {
|
|
1209
1154
|
if (tgt.comparator_(*src.min_item_, *tgt.min_item_))
|
|
1210
1155
|
*tgt.min_item_ = conditional_forward<FwdSk>(*src.min_item_);
|
|
1211
1156
|
}
|
|
1212
|
-
|
|
1213
|
-
|
|
1214
|
-
tgt.max_item_ = new (tgt.allocator_.allocate(1)) T(*src.max_item_);
|
|
1157
|
+
if (!tgt.max_item_) {
|
|
1158
|
+
tgt.max_item_.emplace(conditional_forward<FwdSk>(*src.max_item_));
|
|
1215
1159
|
} else {
|
|
1216
1160
|
if (tgt.comparator_(*tgt.max_item_, *src.max_item_))
|
|
1217
1161
|
*tgt.max_item_ = conditional_forward<FwdSk>(*src.max_item_);
|
|
@@ -1230,20 +1174,6 @@ uint8_t quantiles_sketch<T, C, A>::lowest_zero_bit_starting_at(uint64_t bits, ui
|
|
|
1230
1174
|
return pos;
|
|
1231
1175
|
}
|
|
1232
1176
|
|
|
1233
|
-
template<typename T, typename C, typename A>
|
|
1234
|
-
class quantiles_sketch<T, C, A>::item_deleter {
|
|
1235
|
-
public:
|
|
1236
|
-
item_deleter(const A& allocator): allocator_(allocator) {}
|
|
1237
|
-
void operator() (T* ptr) {
|
|
1238
|
-
if (ptr != nullptr) {
|
|
1239
|
-
ptr->~T();
|
|
1240
|
-
allocator_.deallocate(ptr, 1);
|
|
1241
|
-
}
|
|
1242
|
-
}
|
|
1243
|
-
private:
|
|
1244
|
-
A allocator_;
|
|
1245
|
-
};
|
|
1246
|
-
|
|
1247
1177
|
template<typename T, typename C, typename A>
|
|
1248
1178
|
class quantiles_sketch<T, C, A>::items_deleter {
|
|
1249
1179
|
public:
|
|
@@ -20,7 +20,6 @@ add_executable(quantiles_test)
|
|
|
20
20
|
target_link_libraries(quantiles_test quantiles common common_test_lib)
|
|
21
21
|
|
|
22
22
|
set_target_properties(quantiles_test PROPERTIES
|
|
23
|
-
CXX_STANDARD 11
|
|
24
23
|
CXX_STANDARD_REQUIRED YES
|
|
25
24
|
)
|
|
26
25
|
|
|
@@ -42,3 +41,17 @@ target_sources(quantiles_test
|
|
|
42
41
|
quantiles_compatibility_test.cpp
|
|
43
42
|
kolmogorov_smirnov_test.cpp
|
|
44
43
|
)
|
|
44
|
+
|
|
45
|
+
if (SERDE_COMPAT)
|
|
46
|
+
target_sources(quantiles_test
|
|
47
|
+
PRIVATE
|
|
48
|
+
quantiles_sketch_deserialize_from_java_test.cpp
|
|
49
|
+
)
|
|
50
|
+
endif()
|
|
51
|
+
|
|
52
|
+
if (GENERATE)
|
|
53
|
+
target_sources(quantiles_test
|
|
54
|
+
PRIVATE
|
|
55
|
+
quantiles_sketch_serialize_for_java.cpp
|
|
56
|
+
)
|
|
57
|
+
endif()
|
|
@@ -0,0 +1,84 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* Licensed to the Apache Software Foundation (ASF) under one
|
|
3
|
+
* or more contributor license agreements. See the NOTICE file
|
|
4
|
+
* distributed with this work for additional information
|
|
5
|
+
* regarding copyright ownership. The ASF licenses this file
|
|
6
|
+
* to you under the Apache License, Version 2.0 (the
|
|
7
|
+
* "License"); you may not use this file except in compliance
|
|
8
|
+
* with the License. You may obtain a copy of the License at
|
|
9
|
+
*
|
|
10
|
+
* http://www.apache.org/licenses/LICENSE-2.0
|
|
11
|
+
*
|
|
12
|
+
* Unless required by applicable law or agreed to in writing,
|
|
13
|
+
* software distributed under the License is distributed on an
|
|
14
|
+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
15
|
+
* KIND, either express or implied. See the License for the
|
|
16
|
+
* specific language governing permissions and limitations
|
|
17
|
+
* under the License.
|
|
18
|
+
*/
|
|
19
|
+
|
|
20
|
+
#include <catch2/catch.hpp>
|
|
21
|
+
#include <fstream>
|
|
22
|
+
#include <quantiles_sketch.hpp>
|
|
23
|
+
|
|
24
|
+
namespace datasketches {
|
|
25
|
+
|
|
26
|
+
// assume the binary sketches for this test have been generated by datasketches-java code
|
|
27
|
+
// in the subdirectory called "java" in the root directory of this project
|
|
28
|
+
static std::string testBinaryInputPath = std::string(TEST_BINARY_INPUT_PATH) + "../../java/";
|
|
29
|
+
|
|
30
|
+
TEST_CASE("quantiles double", "[serde_compat]") {
|
|
31
|
+
const unsigned n_arr[] = {0, 1, 10, 100, 1000, 10000, 100000, 1000000};
|
|
32
|
+
for (const unsigned n: n_arr) {
|
|
33
|
+
std::ifstream is;
|
|
34
|
+
is.exceptions(std::ios::failbit | std::ios::badbit);
|
|
35
|
+
is.open(testBinaryInputPath + "quantiles_double_n" + std::to_string(n) + "_java.sk", std::ios::binary);
|
|
36
|
+
const auto sketch = quantiles_sketch<double>::deserialize(is);
|
|
37
|
+
REQUIRE(sketch.is_empty() == (n == 0));
|
|
38
|
+
REQUIRE(sketch.is_estimation_mode() == (n > quantiles_constants::DEFAULT_K));
|
|
39
|
+
REQUIRE(sketch.get_n() == n);
|
|
40
|
+
if (n > 0) {
|
|
41
|
+
REQUIRE(sketch.get_min_item() == 1.0);
|
|
42
|
+
REQUIRE(sketch.get_max_item() == static_cast<double>(n));
|
|
43
|
+
uint64_t weight = 0;
|
|
44
|
+
for (const auto pair: sketch) {
|
|
45
|
+
REQUIRE(pair.first >= sketch.get_min_item());
|
|
46
|
+
REQUIRE(pair.first <= sketch.get_max_item());
|
|
47
|
+
weight += pair.second;
|
|
48
|
+
}
|
|
49
|
+
REQUIRE(weight == sketch.get_n());
|
|
50
|
+
}
|
|
51
|
+
}
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
struct string_as_number_less {
|
|
55
|
+
bool operator()(const std::string& a, const std::string& b) const {
|
|
56
|
+
return std::stoi(a) < std::stoi(b);
|
|
57
|
+
}
|
|
58
|
+
};
|
|
59
|
+
|
|
60
|
+
TEST_CASE("quantiles string", "[serde_compat]") {
|
|
61
|
+
const unsigned n_arr[] = {0, 1, 10, 100, 1000, 10000, 100000, 1000000};
|
|
62
|
+
for (const unsigned n: n_arr) {
|
|
63
|
+
std::ifstream is;
|
|
64
|
+
is.exceptions(std::ios::failbit | std::ios::badbit);
|
|
65
|
+
is.open(testBinaryInputPath + "quantiles_string_n" + std::to_string(n) + "_java.sk", std::ios::binary);
|
|
66
|
+
const auto sketch = quantiles_sketch<std::string, string_as_number_less>::deserialize(is);
|
|
67
|
+
REQUIRE(sketch.is_empty() == (n == 0));
|
|
68
|
+
REQUIRE(sketch.is_estimation_mode() == (n > quantiles_constants::DEFAULT_K));
|
|
69
|
+
REQUIRE(sketch.get_n() == n);
|
|
70
|
+
if (n > 0) {
|
|
71
|
+
REQUIRE(sketch.get_min_item() == "1");
|
|
72
|
+
REQUIRE(sketch.get_max_item() == std::to_string(n));
|
|
73
|
+
uint64_t weight = 0;
|
|
74
|
+
for (const auto pair: sketch) {
|
|
75
|
+
REQUIRE(std::stoi(pair.first) >= std::stoi(sketch.get_min_item()));
|
|
76
|
+
REQUIRE(std::stoi(pair.first) <= std::stoi(sketch.get_max_item()));
|
|
77
|
+
weight += pair.second;
|
|
78
|
+
}
|
|
79
|
+
REQUIRE(weight == sketch.get_n());
|
|
80
|
+
}
|
|
81
|
+
}
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
} /* namespace datasketches */
|