datasketches 0.3.1 → 0.4.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +10 -0
- data/NOTICE +1 -1
- data/README.md +0 -2
- data/ext/datasketches/cpc_wrapper.cpp +3 -3
- data/ext/datasketches/kll_wrapper.cpp +0 -10
- data/lib/datasketches/version.rb +1 -1
- data/lib/datasketches.rb +1 -1
- data/vendor/datasketches-cpp/CMakeLists.txt +23 -20
- data/vendor/datasketches-cpp/CODE_OF_CONDUCT.md +3 -0
- data/vendor/datasketches-cpp/CONTRIBUTING.md +50 -0
- data/vendor/datasketches-cpp/Doxyfile +2827 -0
- data/vendor/datasketches-cpp/LICENSE +0 -76
- data/vendor/datasketches-cpp/README.md +1 -3
- data/vendor/datasketches-cpp/common/CMakeLists.txt +12 -11
- data/vendor/datasketches-cpp/common/include/MurmurHash3.h +25 -27
- data/vendor/datasketches-cpp/common/include/common_defs.hpp +15 -10
- data/vendor/datasketches-cpp/common/include/count_zeros.hpp +11 -2
- data/vendor/datasketches-cpp/common/include/kolmogorov_smirnov.hpp +9 -6
- data/vendor/datasketches-cpp/common/include/memory_operations.hpp +5 -4
- data/vendor/datasketches-cpp/common/include/optional.hpp +148 -0
- data/vendor/datasketches-cpp/common/include/quantiles_sorted_view.hpp +95 -2
- data/vendor/datasketches-cpp/common/include/quantiles_sorted_view_impl.hpp +1 -1
- data/vendor/datasketches-cpp/common/include/serde.hpp +69 -20
- data/vendor/datasketches-cpp/common/test/CMakeLists.txt +2 -2
- data/vendor/datasketches-cpp/common/test/integration_test.cpp +6 -0
- data/vendor/datasketches-cpp/common/test/optional_test.cpp +85 -0
- data/vendor/datasketches-cpp/common/test/test_allocator.hpp +14 -14
- data/vendor/datasketches-cpp/{python/src/__init__.py → count/CMakeLists.txt} +25 -1
- data/vendor/datasketches-cpp/count/include/count_min.hpp +405 -0
- data/vendor/datasketches-cpp/count/include/count_min_impl.hpp +497 -0
- data/vendor/datasketches-cpp/{MANIFEST.in → count/test/CMakeLists.txt} +23 -20
- data/vendor/datasketches-cpp/count/test/count_min_allocation_test.cpp +155 -0
- data/vendor/datasketches-cpp/count/test/count_min_test.cpp +303 -0
- data/vendor/datasketches-cpp/cpc/include/cpc_common.hpp +14 -20
- data/vendor/datasketches-cpp/cpc/include/cpc_compressor.hpp +7 -4
- data/vendor/datasketches-cpp/cpc/include/cpc_compressor_impl.hpp +17 -17
- data/vendor/datasketches-cpp/cpc/include/cpc_confidence.hpp +3 -3
- data/vendor/datasketches-cpp/cpc/include/cpc_sketch.hpp +40 -40
- data/vendor/datasketches-cpp/cpc/include/cpc_sketch_impl.hpp +14 -11
- data/vendor/datasketches-cpp/cpc/include/cpc_union.hpp +35 -11
- data/vendor/datasketches-cpp/cpc/include/cpc_union_impl.hpp +8 -8
- data/vendor/datasketches-cpp/cpc/include/cpc_util.hpp +16 -8
- data/vendor/datasketches-cpp/cpc/include/u32_table.hpp +3 -2
- data/vendor/datasketches-cpp/cpc/include/u32_table_impl.hpp +5 -5
- data/vendor/datasketches-cpp/cpc/test/CMakeLists.txt +20 -7
- data/vendor/datasketches-cpp/cpc/test/cpc_sketch_deserialize_from_java_test.cpp +60 -0
- data/vendor/datasketches-cpp/cpc/test/cpc_sketch_serialize_for_java.cpp +38 -0
- data/vendor/datasketches-cpp/cpc/test/cpc_sketch_test.cpp +4 -29
- data/vendor/datasketches-cpp/cpc/test/cpc_union_test.cpp +4 -4
- data/vendor/datasketches-cpp/{tox.ini → density/CMakeLists.txt} +24 -8
- data/vendor/datasketches-cpp/density/include/density_sketch.hpp +256 -0
- data/vendor/datasketches-cpp/density/include/density_sketch_impl.hpp +543 -0
- data/vendor/datasketches-cpp/{python/datasketches/__init__.py → density/test/CMakeLists.txt} +15 -3
- data/vendor/datasketches-cpp/density/test/density_sketch_test.cpp +244 -0
- data/vendor/datasketches-cpp/fi/include/frequent_items_sketch.hpp +21 -9
- data/vendor/datasketches-cpp/fi/include/frequent_items_sketch_impl.hpp +6 -4
- data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map.hpp +9 -3
- data/vendor/datasketches-cpp/fi/test/CMakeLists.txt +14 -1
- data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_deserialize_from_java_test.cpp +95 -0
- data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_serialize_for_java.cpp +83 -0
- data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_test.cpp +3 -42
- data/vendor/datasketches-cpp/hll/include/CouponList-internal.hpp +2 -2
- data/vendor/datasketches-cpp/hll/include/CouponList.hpp +3 -1
- data/vendor/datasketches-cpp/hll/include/Hll4Array-internal.hpp +19 -11
- data/vendor/datasketches-cpp/hll/include/Hll4Array.hpp +2 -5
- data/vendor/datasketches-cpp/hll/include/Hll6Array-internal.hpp +19 -7
- data/vendor/datasketches-cpp/hll/include/Hll6Array.hpp +1 -1
- data/vendor/datasketches-cpp/hll/include/Hll8Array-internal.hpp +98 -42
- data/vendor/datasketches-cpp/hll/include/Hll8Array.hpp +2 -0
- data/vendor/datasketches-cpp/hll/include/HllArray-internal.hpp +94 -61
- data/vendor/datasketches-cpp/hll/include/HllArray.hpp +20 -8
- data/vendor/datasketches-cpp/hll/include/HllSketch-internal.hpp +4 -4
- data/vendor/datasketches-cpp/hll/include/HllSketchImpl.hpp +3 -1
- data/vendor/datasketches-cpp/hll/include/HllSketchImplFactory.hpp +3 -21
- data/vendor/datasketches-cpp/hll/include/HllUnion-internal.hpp +8 -0
- data/vendor/datasketches-cpp/hll/include/HllUtil.hpp +14 -18
- data/vendor/datasketches-cpp/hll/include/coupon_iterator-internal.hpp +1 -1
- data/vendor/datasketches-cpp/hll/include/coupon_iterator.hpp +8 -2
- data/vendor/datasketches-cpp/hll/include/hll.hpp +79 -65
- data/vendor/datasketches-cpp/hll/test/CMakeLists.txt +14 -1
- data/vendor/datasketches-cpp/hll/test/HllUnionTest.cpp +7 -1
- data/vendor/datasketches-cpp/hll/test/ToFromByteArrayTest.cpp +0 -68
- data/vendor/datasketches-cpp/hll/test/hll_sketch_deserialize_from_java_test.cpp +69 -0
- data/vendor/datasketches-cpp/hll/test/hll_sketch_serialize_for_java.cpp +52 -0
- data/vendor/datasketches-cpp/kll/include/kll_helper.hpp +0 -1
- data/vendor/datasketches-cpp/kll/include/kll_helper_impl.hpp +2 -2
- data/vendor/datasketches-cpp/kll/include/kll_sketch.hpp +79 -53
- data/vendor/datasketches-cpp/kll/include/kll_sketch_impl.hpp +61 -132
- data/vendor/datasketches-cpp/kll/test/CMakeLists.txt +14 -1
- data/vendor/datasketches-cpp/kll/test/kll_sketch_deserialize_from_java_test.cpp +103 -0
- data/vendor/datasketches-cpp/kll/test/kll_sketch_serialize_for_java.cpp +62 -0
- data/vendor/datasketches-cpp/kll/test/kll_sketch_test.cpp +5 -40
- data/vendor/datasketches-cpp/quantiles/include/quantiles_sketch.hpp +76 -54
- data/vendor/datasketches-cpp/quantiles/include/quantiles_sketch_impl.hpp +66 -136
- data/vendor/datasketches-cpp/quantiles/test/CMakeLists.txt +14 -1
- data/vendor/datasketches-cpp/quantiles/test/quantiles_sketch_deserialize_from_java_test.cpp +84 -0
- data/vendor/datasketches-cpp/quantiles/test/quantiles_sketch_serialize_for_java.cpp +52 -0
- data/vendor/datasketches-cpp/quantiles/test/quantiles_sketch_test.cpp +15 -39
- data/vendor/datasketches-cpp/req/include/req_common.hpp +7 -3
- data/vendor/datasketches-cpp/req/include/req_compactor_impl.hpp +2 -4
- data/vendor/datasketches-cpp/req/include/req_sketch.hpp +105 -26
- data/vendor/datasketches-cpp/req/include/req_sketch_impl.hpp +50 -111
- data/vendor/datasketches-cpp/req/test/CMakeLists.txt +14 -1
- data/vendor/datasketches-cpp/req/test/req_sketch_deserialize_from_java_test.cpp +55 -0
- data/vendor/datasketches-cpp/{tuple/include/array_of_doubles_intersection_impl.hpp → req/test/req_sketch_serialize_for_java.cpp} +12 -7
- data/vendor/datasketches-cpp/req/test/req_sketch_test.cpp +3 -89
- data/vendor/datasketches-cpp/sampling/CMakeLists.txt +4 -0
- data/vendor/datasketches-cpp/sampling/include/ebpps_sample.hpp +210 -0
- data/vendor/datasketches-cpp/sampling/include/ebpps_sample_impl.hpp +535 -0
- data/vendor/datasketches-cpp/sampling/include/ebpps_sketch.hpp +281 -0
- data/vendor/datasketches-cpp/sampling/include/ebpps_sketch_impl.hpp +531 -0
- data/vendor/datasketches-cpp/sampling/include/var_opt_sketch.hpp +89 -32
- data/vendor/datasketches-cpp/sampling/include/var_opt_sketch_impl.hpp +33 -19
- data/vendor/datasketches-cpp/sampling/include/var_opt_union.hpp +13 -10
- data/vendor/datasketches-cpp/sampling/include/var_opt_union_impl.hpp +23 -19
- data/vendor/datasketches-cpp/sampling/test/CMakeLists.txt +55 -8
- data/vendor/datasketches-cpp/sampling/test/ebpps_allocation_test.cpp +96 -0
- data/vendor/datasketches-cpp/sampling/test/ebpps_sample_test.cpp +137 -0
- data/vendor/datasketches-cpp/sampling/test/ebpps_sketch_test.cpp +266 -0
- data/vendor/datasketches-cpp/sampling/test/var_opt_sketch_deserialize_from_java_test.cpp +81 -0
- data/vendor/datasketches-cpp/sampling/test/var_opt_sketch_serialize_for_java.cpp +54 -0
- data/vendor/datasketches-cpp/sampling/test/var_opt_sketch_test.cpp +33 -51
- data/vendor/datasketches-cpp/sampling/test/var_opt_union_deserialize_from_java_test.cpp +50 -0
- data/vendor/datasketches-cpp/sampling/test/var_opt_union_serialize_for_java.cpp +56 -0
- data/vendor/datasketches-cpp/sampling/test/var_opt_union_test.cpp +0 -20
- data/vendor/datasketches-cpp/theta/CMakeLists.txt +1 -0
- data/vendor/datasketches-cpp/theta/include/bit_packing.hpp +6279 -0
- data/vendor/datasketches-cpp/theta/include/bounds_on_ratios_in_sampled_sets.hpp +1 -0
- data/vendor/datasketches-cpp/theta/include/bounds_on_ratios_in_theta_sketched_sets.hpp +7 -6
- data/vendor/datasketches-cpp/theta/include/compact_theta_sketch_parser.hpp +14 -8
- data/vendor/datasketches-cpp/theta/include/compact_theta_sketch_parser_impl.hpp +60 -46
- data/vendor/datasketches-cpp/theta/include/theta_a_not_b.hpp +20 -5
- data/vendor/datasketches-cpp/theta/include/theta_constants.hpp +10 -4
- data/vendor/datasketches-cpp/theta/include/theta_helpers.hpp +3 -1
- data/vendor/datasketches-cpp/theta/include/theta_intersection.hpp +13 -5
- data/vendor/datasketches-cpp/theta/include/theta_intersection_base_impl.hpp +5 -5
- data/vendor/datasketches-cpp/theta/include/theta_intersection_impl.hpp +3 -3
- data/vendor/datasketches-cpp/theta/include/theta_jaccard_similarity.hpp +2 -1
- data/vendor/datasketches-cpp/theta/include/theta_jaccard_similarity_base.hpp +1 -0
- data/vendor/datasketches-cpp/theta/include/theta_set_difference_base_impl.hpp +1 -1
- data/vendor/datasketches-cpp/theta/include/theta_sketch.hpp +180 -33
- data/vendor/datasketches-cpp/theta/include/theta_sketch_impl.hpp +430 -130
- data/vendor/datasketches-cpp/theta/include/theta_union.hpp +17 -10
- data/vendor/datasketches-cpp/theta/include/theta_union_base_impl.hpp +10 -10
- data/vendor/datasketches-cpp/theta/include/theta_union_impl.hpp +3 -3
- data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base.hpp +21 -6
- data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base_impl.hpp +13 -3
- data/vendor/datasketches-cpp/theta/test/CMakeLists.txt +15 -1
- data/vendor/datasketches-cpp/theta/test/bit_packing_test.cpp +80 -0
- data/vendor/datasketches-cpp/theta/test/theta_sketch_deserialize_from_java_test.cpp +57 -0
- data/vendor/datasketches-cpp/theta/test/theta_sketch_serialize_for_java.cpp +61 -0
- data/vendor/datasketches-cpp/theta/test/theta_sketch_test.cpp +39 -188
- data/vendor/datasketches-cpp/theta/test/theta_union_test.cpp +25 -0
- data/vendor/datasketches-cpp/tuple/CMakeLists.txt +8 -7
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_sketch.hpp +19 -144
- data/vendor/datasketches-cpp/tuple/include/{array_of_doubles_a_not_b.hpp → array_tuple_a_not_b.hpp} +24 -16
- data/vendor/datasketches-cpp/tuple/include/{array_of_doubles_a_not_b_impl.hpp → array_tuple_a_not_b_impl.hpp} +4 -4
- data/vendor/datasketches-cpp/tuple/include/array_tuple_intersection.hpp +65 -0
- data/vendor/datasketches-cpp/tuple/include/array_tuple_intersection_impl.hpp +31 -0
- data/vendor/datasketches-cpp/tuple/include/array_tuple_sketch.hpp +237 -0
- data/vendor/datasketches-cpp/tuple/include/{array_of_doubles_sketch_impl.hpp → array_tuple_sketch_impl.hpp} +40 -41
- data/vendor/datasketches-cpp/tuple/include/array_tuple_union.hpp +81 -0
- data/vendor/datasketches-cpp/tuple/include/array_tuple_union_impl.hpp +43 -0
- data/vendor/datasketches-cpp/tuple/include/tuple_a_not_b.hpp +11 -2
- data/vendor/datasketches-cpp/tuple/include/tuple_intersection.hpp +17 -10
- data/vendor/datasketches-cpp/tuple/include/tuple_jaccard_similarity.hpp +2 -1
- data/vendor/datasketches-cpp/tuple/include/tuple_sketch.hpp +95 -32
- data/vendor/datasketches-cpp/tuple/include/tuple_sketch_impl.hpp +2 -1
- data/vendor/datasketches-cpp/tuple/include/tuple_union.hpp +19 -11
- data/vendor/datasketches-cpp/tuple/test/CMakeLists.txt +16 -1
- data/vendor/datasketches-cpp/tuple/test/aod_sketch_deserialize_from_java_test.cpp +76 -0
- data/vendor/datasketches-cpp/tuple/test/aod_sketch_serialize_for_java.cpp +62 -0
- data/vendor/datasketches-cpp/tuple/test/array_of_doubles_sketch_test.cpp +5 -129
- data/vendor/datasketches-cpp/tuple/test/engagement_test.cpp +85 -89
- data/vendor/datasketches-cpp/tuple/test/tuple_jaccard_similarity_test.cpp +3 -1
- data/vendor/datasketches-cpp/tuple/test/tuple_sketch_deserialize_from_java_test.cpp +47 -0
- data/vendor/datasketches-cpp/{python/src/datasketches.cpp → tuple/test/tuple_sketch_serialize_for_java.cpp} +16 -30
- data/vendor/datasketches-cpp/tuple/test/tuple_sketch_test.cpp +1 -1
- data/vendor/datasketches-cpp/version.cfg.in +1 -1
- metadata +61 -79
- data/vendor/datasketches-cpp/fi/test/items_sketch_string_from_java.sk +0 -0
- data/vendor/datasketches-cpp/fi/test/items_sketch_string_utf8_from_java.sk +0 -0
- data/vendor/datasketches-cpp/fi/test/longs_sketch_from_java.sk +0 -0
- data/vendor/datasketches-cpp/hll/test/array6_from_java.sk +0 -0
- data/vendor/datasketches-cpp/hll/test/compact_array4_from_java.sk +0 -0
- data/vendor/datasketches-cpp/hll/test/compact_set_from_java.sk +0 -0
- data/vendor/datasketches-cpp/hll/test/list_from_java.sk +0 -0
- data/vendor/datasketches-cpp/hll/test/updatable_array4_from_java.sk +0 -0
- data/vendor/datasketches-cpp/hll/test/updatable_set_from_java.sk +0 -0
- data/vendor/datasketches-cpp/kll/test/kll_sketch_from_java.sk +0 -0
- data/vendor/datasketches-cpp/pyproject.toml +0 -23
- data/vendor/datasketches-cpp/python/CMakeLists.txt +0 -81
- data/vendor/datasketches-cpp/python/README.md +0 -85
- data/vendor/datasketches-cpp/python/datasketches/PySerDe.py +0 -104
- data/vendor/datasketches-cpp/python/include/py_serde.hpp +0 -113
- data/vendor/datasketches-cpp/python/jupyter/CPCSketch.ipynb +0 -345
- data/vendor/datasketches-cpp/python/jupyter/FrequentItemsSketch.ipynb +0 -354
- data/vendor/datasketches-cpp/python/jupyter/HLLSketch.ipynb +0 -346
- data/vendor/datasketches-cpp/python/jupyter/KLLSketch.ipynb +0 -463
- data/vendor/datasketches-cpp/python/jupyter/ThetaSketchNotebook.ipynb +0 -403
- data/vendor/datasketches-cpp/python/pybind11Path.cmd +0 -21
- data/vendor/datasketches-cpp/python/src/cpc_wrapper.cpp +0 -90
- data/vendor/datasketches-cpp/python/src/fi_wrapper.cpp +0 -128
- data/vendor/datasketches-cpp/python/src/hll_wrapper.cpp +0 -134
- data/vendor/datasketches-cpp/python/src/kll_wrapper.cpp +0 -210
- data/vendor/datasketches-cpp/python/src/ks_wrapper.cpp +0 -68
- data/vendor/datasketches-cpp/python/src/py_serde.cpp +0 -111
- data/vendor/datasketches-cpp/python/src/quantiles_wrapper.cpp +0 -204
- data/vendor/datasketches-cpp/python/src/req_wrapper.cpp +0 -215
- data/vendor/datasketches-cpp/python/src/theta_wrapper.cpp +0 -172
- data/vendor/datasketches-cpp/python/src/vector_of_kll.cpp +0 -490
- data/vendor/datasketches-cpp/python/src/vo_wrapper.cpp +0 -173
- data/vendor/datasketches-cpp/python/tests/__init__.py +0 -16
- data/vendor/datasketches-cpp/python/tests/cpc_test.py +0 -64
- data/vendor/datasketches-cpp/python/tests/fi_test.py +0 -110
- data/vendor/datasketches-cpp/python/tests/hll_test.py +0 -130
- data/vendor/datasketches-cpp/python/tests/kll_test.py +0 -125
- data/vendor/datasketches-cpp/python/tests/quantiles_test.py +0 -126
- data/vendor/datasketches-cpp/python/tests/req_test.py +0 -126
- data/vendor/datasketches-cpp/python/tests/theta_test.py +0 -146
- data/vendor/datasketches-cpp/python/tests/vector_of_kll_test.py +0 -148
- data/vendor/datasketches-cpp/python/tests/vo_test.py +0 -125
- data/vendor/datasketches-cpp/req/test/req_float_empty_from_java.sk +0 -0
- data/vendor/datasketches-cpp/req/test/req_float_estimation_from_java.sk +0 -0
- data/vendor/datasketches-cpp/req/test/req_float_exact_from_java.sk +0 -0
- data/vendor/datasketches-cpp/req/test/req_float_raw_items_from_java.sk +0 -0
- data/vendor/datasketches-cpp/req/test/req_float_single_item_from_java.sk +0 -0
- data/vendor/datasketches-cpp/sampling/test/binaries_from_java.txt +0 -67
- data/vendor/datasketches-cpp/sampling/test/varopt_sketch_long_sampling.sk +0 -0
- data/vendor/datasketches-cpp/sampling/test/varopt_sketch_string_exact.sk +0 -0
- data/vendor/datasketches-cpp/sampling/test/varopt_union_double_sampling.sk +0 -0
- data/vendor/datasketches-cpp/setup.py +0 -110
- data/vendor/datasketches-cpp/theta/test/theta_compact_empty_from_java.sk +0 -0
- data/vendor/datasketches-cpp/theta/test/theta_compact_estimation_from_java.sk +0 -0
- data/vendor/datasketches-cpp/theta/test/theta_compact_exact_from_java.sk +0 -0
- data/vendor/datasketches-cpp/theta/test/theta_compact_single_item_from_java.sk +0 -0
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_intersection.hpp +0 -52
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_union.hpp +0 -81
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_union_impl.hpp +0 -43
- data/vendor/datasketches-cpp/tuple/test/aod_1_compact_empty_from_java.sk +0 -1
- data/vendor/datasketches-cpp/tuple/test/aod_1_compact_estimation_from_java.sk +0 -0
- data/vendor/datasketches-cpp/tuple/test/aod_1_compact_non_empty_no_entries_from_java.sk +0 -0
- data/vendor/datasketches-cpp/tuple/test/aod_2_compact_exact_from_java.sk +0 -0
- data/vendor/datasketches-cpp/tuple/test/aod_3_compact_empty_from_java.sk +0 -1
@@ -27,9 +27,20 @@
|
|
27
27
|
#include "quantiles_sorted_view.hpp"
|
28
28
|
#include "common_defs.hpp"
|
29
29
|
#include "serde.hpp"
|
30
|
+
#include "optional.hpp"
|
30
31
|
|
31
32
|
namespace datasketches {
|
32
33
|
|
34
|
+
/// Constants for Quantiles sketch
|
35
|
+
namespace quantiles_constants {
|
36
|
+
/// default value of parameter K
|
37
|
+
const uint16_t DEFAULT_K = 128;
|
38
|
+
/// minimum value of parameter K
|
39
|
+
const uint16_t MIN_K = 2;
|
40
|
+
/// maximum value of parameter K
|
41
|
+
const uint16_t MAX_K = 1 << 15;
|
42
|
+
}
|
43
|
+
|
33
44
|
/**
|
34
45
|
* This is a stochastic streaming sketch that enables near-real time analysis of the
|
35
46
|
* approximate distribution from a very large stream in a single pass.
|
@@ -136,13 +147,6 @@ Table Guide for DoublesSketch Size in Bytes and Approximate Error:
|
|
136
147
|
* @author Alexander Saydakov
|
137
148
|
* @author Jon Malkin
|
138
149
|
*/
|
139
|
-
|
140
|
-
namespace quantiles_constants {
|
141
|
-
const uint16_t DEFAULT_K = 128;
|
142
|
-
const uint16_t MIN_K = 2;
|
143
|
-
const uint16_t MAX_K = 1 << 15;
|
144
|
-
}
|
145
|
-
|
146
150
|
template <typename T,
|
147
151
|
typename Comparator = std::less<T>, // strict weak ordering function (see C++ named requirements: Compare)
|
148
152
|
typename Allocator = std::allocator<T>>
|
@@ -151,13 +155,43 @@ public:
|
|
151
155
|
using value_type = T;
|
152
156
|
using allocator_type = Allocator;
|
153
157
|
using comparator = Comparator;
|
158
|
+
using quantile_return_type = typename quantiles_sorted_view<T, Comparator, Allocator>::quantile_return_type;
|
159
|
+
using vector_double = typename quantiles_sorted_view<T, Comparator, Allocator>::vector_double;
|
154
160
|
|
161
|
+
/**
|
162
|
+
* Constructor
|
163
|
+
* @param k affects the size of the sketch and its estimation error
|
164
|
+
* @param comparator strict weak ordering function (see C++ named requirements: Compare)
|
165
|
+
* @param allocator used to allocate memory
|
166
|
+
*/
|
155
167
|
explicit quantiles_sketch(uint16_t k = quantiles_constants::DEFAULT_K,
|
156
168
|
const Comparator& comparator = Comparator(), const Allocator& allocator = Allocator());
|
169
|
+
|
170
|
+
/**
|
171
|
+
* Copy constructor
|
172
|
+
* @param other sketch to be copied
|
173
|
+
*/
|
157
174
|
quantiles_sketch(const quantiles_sketch& other);
|
175
|
+
|
176
|
+
/** Move constructor
|
177
|
+
* @param other sketch to be moved
|
178
|
+
*/
|
158
179
|
quantiles_sketch(quantiles_sketch&& other) noexcept;
|
180
|
+
|
159
181
|
~quantiles_sketch();
|
182
|
+
|
183
|
+
/**
|
184
|
+
* Copy assignment
|
185
|
+
* @param other sketch to be copied
|
186
|
+
* @return reference to this sketch
|
187
|
+
*/
|
160
188
|
quantiles_sketch& operator=(const quantiles_sketch& other);
|
189
|
+
|
190
|
+
/**
|
191
|
+
* Move assignment
|
192
|
+
* @param other sketch to be moved
|
193
|
+
* @return reference to this sketch
|
194
|
+
*/
|
161
195
|
quantiles_sketch& operator=(quantiles_sketch&& other) noexcept;
|
162
196
|
|
163
197
|
/**
|
@@ -247,48 +281,13 @@ public:
|
|
247
281
|
* If the sketch is empty this throws std::runtime_error.
|
248
282
|
*
|
249
283
|
* @param rank the specified normalized rank in the hypothetical sorted stream.
|
250
|
-
*
|
284
|
+
* @param inclusive if true the weight of the given item is included into the rank.
|
285
|
+
* Otherwise the rank equals the sum of the weights of all items that are less than the given item
|
286
|
+
* according to the Comparator.
|
251
287
|
* @return the approximation to the item at the given rank
|
252
288
|
*/
|
253
|
-
using quantile_return_type = typename quantiles_sorted_view<T, Comparator, Allocator>::quantile_return_type;
|
254
289
|
quantile_return_type get_quantile(double rank, bool inclusive = true) const;
|
255
290
|
|
256
|
-
/**
|
257
|
-
* This is a multiple-query version of get_quantile().
|
258
|
-
* <p>
|
259
|
-
* This returns an array that could have been generated by using get_quantile() for each
|
260
|
-
* normalized rank separately.
|
261
|
-
*
|
262
|
-
* <p>If the sketch is empty this throws std::runtime_error.
|
263
|
-
*
|
264
|
-
* @param ranks given array of normalized ranks in the hypothetical sorted stream.
|
265
|
-
* These ranks must be in the interval [0.0, 1.0], inclusive.
|
266
|
-
* @param size the number of ranks in the array
|
267
|
-
*
|
268
|
-
* @return array of approximations to items associated with given ranks in the same order as given ranks
|
269
|
-
* in the input array.
|
270
|
-
*
|
271
|
-
* Deprecated. Will be removed in the next major version. Use get_quantile() instead.
|
272
|
-
*/
|
273
|
-
std::vector<T, Allocator> get_quantiles(const double* ranks, uint32_t size, bool inclusive = true) const;
|
274
|
-
|
275
|
-
/**
|
276
|
-
* This is a multiple-query version of get_quantile() that allows the caller to
|
277
|
-
* specify the number of evenly-spaced normalized ranks.
|
278
|
-
*
|
279
|
-
* <p>If the sketch is empty this throws std::runtime_error.
|
280
|
-
*
|
281
|
-
* @param num an integer that specifies the number of evenly-spaced ranks.
|
282
|
-
* This must be an integer greater than 0. A value of 1 is equivalent to get_quantiles([0]).
|
283
|
-
* A value of 2 is equivalent to get_quantiles([0, 1]). A value of 3 is equivalent to
|
284
|
-
* get_quantiles([0, 0.5, 1]), etc.
|
285
|
-
*
|
286
|
-
* @return array of approximations to items associated with the given number of evenly-spaced normalized ranks.
|
287
|
-
*
|
288
|
-
* Deprecated. Will be removed in the next major version. Use get_quantile() instead.
|
289
|
-
*/
|
290
|
-
std::vector<T, Allocator> get_quantiles(uint32_t num, bool inclusive = true) const;
|
291
|
-
|
292
291
|
/**
|
293
292
|
* Returns an approximation to the normalized rank of the given item from 0 to 1, inclusive.
|
294
293
|
*
|
@@ -300,7 +299,7 @@ public:
|
|
300
299
|
* @param item to be ranked
|
301
300
|
* @param inclusive if true the weight of the given item is included into the rank.
|
302
301
|
* Otherwise the rank equals the sum of the weights of all items that are less than the given item
|
303
|
-
* according to the
|
302
|
+
* according to the Comparator.
|
304
303
|
* @return an approximate normalized rank of the given item
|
305
304
|
*/
|
306
305
|
double get_rank(const T& item, bool inclusive = true) const;
|
@@ -327,7 +326,6 @@ public:
|
|
327
326
|
* @return an array of m+1 doubles each of which is an approximation
|
328
327
|
* to the fraction of the input stream items (the mass) that fall into one of those intervals.
|
329
328
|
*/
|
330
|
-
using vector_double = typename quantiles_sorted_view<T, Comparator, Allocator>::vector_double;
|
331
329
|
vector_double get_PMF(const T* split_points, uint32_t size, bool inclusive = true) const;
|
332
330
|
|
333
331
|
/**
|
@@ -451,9 +449,26 @@ public:
|
|
451
449
|
string<Allocator> to_string(bool print_levels = false, bool print_items = false) const;
|
452
450
|
|
453
451
|
class const_iterator;
|
452
|
+
|
453
|
+
/**
|
454
|
+
* Iterator pointing to the first item in the sketch.
|
455
|
+
* If the sketch is empty, the returned iterator must not be dereferenced or incremented.
|
456
|
+
* @return iterator pointing to the first item in the sketch
|
457
|
+
*/
|
454
458
|
const_iterator begin() const;
|
459
|
+
|
460
|
+
/**
|
461
|
+
* Iterator pointing to the past-the-end item in the sketch.
|
462
|
+
* The past-the-end item is the hypothetical item that would follow the last item.
|
463
|
+
* It does not point to any item, and must not be dereferenced or incremented.
|
464
|
+
* @return iterator pointing to the past-the-end item in the sketch
|
465
|
+
*/
|
455
466
|
const_iterator end() const;
|
456
467
|
|
468
|
+
/**
|
469
|
+
* Gets the sorted view of this sketch
|
470
|
+
* @return the sorted view of this sketch
|
471
|
+
*/
|
457
472
|
quantiles_sorted_view<T, Comparator, Allocator> get_sorted_view() const;
|
458
473
|
|
459
474
|
private:
|
@@ -493,19 +508,18 @@ private:
|
|
493
508
|
uint64_t bit_pattern_;
|
494
509
|
Level base_buffer_;
|
495
510
|
VectorLevels levels_;
|
496
|
-
T
|
497
|
-
T
|
511
|
+
optional<T> min_item_;
|
512
|
+
optional<T> max_item_;
|
498
513
|
mutable quantiles_sorted_view<T, Comparator, Allocator>* sorted_view_;
|
499
514
|
|
500
515
|
void setup_sorted_view() const; // modifies mutable state
|
501
516
|
void reset_sorted_view();
|
502
517
|
|
503
518
|
// for deserialization
|
504
|
-
class item_deleter;
|
505
519
|
class items_deleter;
|
506
520
|
quantiles_sketch(uint16_t k, uint64_t n, uint64_t bit_pattern,
|
507
521
|
Level&& base_buffer, VectorLevels&& levels,
|
508
|
-
|
522
|
+
optional<T>&& min_item, optional<T>&& max_item,
|
509
523
|
bool is_sorted, const Comparator& comparator = Comparator(), const Allocator& allocator = Allocator());
|
510
524
|
|
511
525
|
void grow_base_buffer();
|
@@ -576,19 +590,27 @@ private:
|
|
576
590
|
static inline bool check_update_item(TT) {
|
577
591
|
return true;
|
578
592
|
}
|
593
|
+
|
594
|
+
// for type converting constructor
|
595
|
+
template<typename From, typename FC, typename FA> friend class quantiles_sketch;
|
579
596
|
};
|
580
597
|
|
581
598
|
|
582
599
|
template<typename T, typename C, typename A>
|
583
|
-
class quantiles_sketch<T, C, A>::const_iterator
|
600
|
+
class quantiles_sketch<T, C, A>::const_iterator {
|
584
601
|
public:
|
602
|
+
using iterator_category = std::input_iterator_tag;
|
585
603
|
using value_type = std::pair<const T&, const uint64_t>;
|
604
|
+
using difference_type = void;
|
605
|
+
using pointer = const return_value_holder<value_type>;
|
606
|
+
using reference = const value_type;
|
607
|
+
|
586
608
|
const_iterator& operator++();
|
587
609
|
const_iterator& operator++(int);
|
588
610
|
bool operator==(const const_iterator& other) const;
|
589
611
|
bool operator!=(const const_iterator& other) const;
|
590
|
-
|
591
|
-
|
612
|
+
reference operator*() const;
|
613
|
+
pointer operator->() const;
|
592
614
|
private:
|
593
615
|
friend class quantiles_sketch<T, C, A>;
|
594
616
|
using Level = std::vector<T, A>;
|
@@ -41,8 +41,8 @@ n_(0),
|
|
41
41
|
bit_pattern_(0),
|
42
42
|
base_buffer_(allocator_),
|
43
43
|
levels_(allocator_),
|
44
|
-
min_item_(
|
45
|
-
max_item_(
|
44
|
+
min_item_(),
|
45
|
+
max_item_(),
|
46
46
|
sorted_view_(nullptr)
|
47
47
|
{
|
48
48
|
check_k(k_);
|
@@ -59,12 +59,10 @@ n_(other.n_),
|
|
59
59
|
bit_pattern_(other.bit_pattern_),
|
60
60
|
base_buffer_(other.base_buffer_),
|
61
61
|
levels_(other.levels_),
|
62
|
-
min_item_(
|
63
|
-
max_item_(
|
62
|
+
min_item_(other.min_item_),
|
63
|
+
max_item_(other.max_item_),
|
64
64
|
sorted_view_(nullptr)
|
65
65
|
{
|
66
|
-
if (other.min_item_ != nullptr) min_item_ = new (allocator_.allocate(1)) T(*other.min_item_);
|
67
|
-
if (other.max_item_ != nullptr) max_item_ = new (allocator_.allocate(1)) T(*other.max_item_);
|
68
66
|
for (size_t i = 0; i < levels_.size(); ++i) {
|
69
67
|
if (levels_[i].capacity() != other.levels_[i].capacity()) {
|
70
68
|
levels_[i].reserve(other.levels_[i].capacity());
|
@@ -82,13 +80,10 @@ n_(other.n_),
|
|
82
80
|
bit_pattern_(other.bit_pattern_),
|
83
81
|
base_buffer_(std::move(other.base_buffer_)),
|
84
82
|
levels_(std::move(other.levels_)),
|
85
|
-
min_item_(other.min_item_),
|
86
|
-
max_item_(other.max_item_),
|
83
|
+
min_item_(std::move(other.min_item_)),
|
84
|
+
max_item_(std::move(other.max_item_)),
|
87
85
|
sorted_view_(nullptr)
|
88
|
-
{
|
89
|
-
other.min_item_ = nullptr;
|
90
|
-
other.max_item_ = nullptr;
|
91
|
-
}
|
86
|
+
{}
|
92
87
|
|
93
88
|
template<typename T, typename C, typename A>
|
94
89
|
quantiles_sketch<T, C, A>& quantiles_sketch<T, C, A>::operator=(const quantiles_sketch& other) {
|
@@ -126,7 +121,7 @@ quantiles_sketch<T, C, A>& quantiles_sketch<T, C, A>::operator=(quantiles_sketch
|
|
126
121
|
template<typename T, typename C, typename A>
|
127
122
|
quantiles_sketch<T, C, A>::quantiles_sketch(uint16_t k, uint64_t n, uint64_t bit_pattern,
|
128
123
|
Level&& base_buffer, VectorLevels&& levels,
|
129
|
-
|
124
|
+
optional<T>&& min_item, optional<T>&& max_item,
|
130
125
|
bool is_sorted, const C& comparator, const A& allocator):
|
131
126
|
comparator_(comparator),
|
132
127
|
allocator_(allocator),
|
@@ -136,13 +131,13 @@ n_(n),
|
|
136
131
|
bit_pattern_(bit_pattern),
|
137
132
|
base_buffer_(std::move(base_buffer)),
|
138
133
|
levels_(std::move(levels)),
|
139
|
-
min_item_(min_item
|
140
|
-
max_item_(max_item
|
134
|
+
min_item_(std::move(min_item)),
|
135
|
+
max_item_(std::move(max_item)),
|
141
136
|
sorted_view_(nullptr)
|
142
137
|
{
|
143
|
-
uint32_t item_count = base_buffer_.size();
|
138
|
+
uint32_t item_count = static_cast<uint32_t>(base_buffer_.size());
|
144
139
|
for (Level& lvl : levels_) {
|
145
|
-
item_count += lvl.size();
|
140
|
+
item_count += static_cast<uint32_t>(lvl.size());
|
146
141
|
}
|
147
142
|
if (item_count != compute_retained_items(k_, n_))
|
148
143
|
throw std::logic_error("Item count does not match value computed from k, n");
|
@@ -160,8 +155,8 @@ n_(other.get_n()),
|
|
160
155
|
bit_pattern_(compute_bit_pattern(other.get_k(), other.get_n())),
|
161
156
|
base_buffer_(allocator),
|
162
157
|
levels_(allocator),
|
163
|
-
min_item_(
|
164
|
-
max_item_(
|
158
|
+
min_item_(other.min_item_),
|
159
|
+
max_item_(other.max_item_),
|
165
160
|
sorted_view_(nullptr)
|
166
161
|
{
|
167
162
|
static_assert(std::is_constructible<T, From>::value,
|
@@ -170,9 +165,6 @@ sorted_view_(nullptr)
|
|
170
165
|
base_buffer_.reserve(2 * std::min(quantiles_constants::MIN_K, k_));
|
171
166
|
|
172
167
|
if (!other.is_empty()) {
|
173
|
-
min_item_ = new (allocator_.allocate(1)) T(other.get_min_item());
|
174
|
-
max_item_ = new (allocator_.allocate(1)) T(other.get_max_item());
|
175
|
-
|
176
168
|
// reserve space in levels
|
177
169
|
const uint8_t num_levels = compute_levels_needed(k_, n_);
|
178
170
|
levels_.reserve(num_levels);
|
@@ -212,14 +204,6 @@ sorted_view_(nullptr)
|
|
212
204
|
|
213
205
|
template<typename T, typename C, typename A>
|
214
206
|
quantiles_sketch<T, C, A>::~quantiles_sketch() {
|
215
|
-
if (min_item_ != nullptr) {
|
216
|
-
min_item_->~T();
|
217
|
-
allocator_.deallocate(min_item_, 1);
|
218
|
-
}
|
219
|
-
if (max_item_ != nullptr) {
|
220
|
-
max_item_->~T();
|
221
|
-
allocator_.deallocate(max_item_, 1);
|
222
|
-
}
|
223
207
|
reset_sorted_view();
|
224
208
|
}
|
225
209
|
|
@@ -228,8 +212,8 @@ template<typename FwdT>
|
|
228
212
|
void quantiles_sketch<T, C, A>::update(FwdT&& item) {
|
229
213
|
if (!check_update_item(item)) { return; }
|
230
214
|
if (is_empty()) {
|
231
|
-
min_item_
|
232
|
-
max_item_
|
215
|
+
min_item_.emplace(item);
|
216
|
+
max_item_.emplace(item);
|
233
217
|
} else {
|
234
218
|
if (comparator_(item, *min_item_)) *min_item_ = item;
|
235
219
|
if (comparator_(*max_item_, item)) *max_item_ = item;
|
@@ -263,17 +247,17 @@ void quantiles_sketch<T, C, A>::merge(FwdSk&& other) {
|
|
263
247
|
// other has data and is in estimation mode
|
264
248
|
if (is_estimation_mode()) {
|
265
249
|
if (k_ == other.get_k()) {
|
266
|
-
standard_merge(*this, other);
|
250
|
+
standard_merge(*this, std::forward<FwdSk>(other));
|
267
251
|
} else if (k_ > other.get_k()) {
|
268
|
-
quantiles_sketch sk_copy(other);
|
269
|
-
downsampling_merge(sk_copy, *this);
|
270
|
-
*this = sk_copy;
|
252
|
+
quantiles_sketch sk_copy(std::forward<FwdSk>(other));
|
253
|
+
downsampling_merge(sk_copy, std::move(*this));
|
254
|
+
*this = std::move(sk_copy);
|
271
255
|
} else { // k_ < other.get_k()
|
272
|
-
downsampling_merge(*this, other);
|
256
|
+
downsampling_merge(*this, std::forward<FwdSk>(other));
|
273
257
|
}
|
274
258
|
} else {
|
275
259
|
// exact or empty
|
276
|
-
quantiles_sketch sk_copy(other);
|
260
|
+
quantiles_sketch sk_copy(std::forward<FwdSk>(other));
|
277
261
|
if (k_ <= other.get_k()) {
|
278
262
|
if (!is_empty()) {
|
279
263
|
for (uint16_t i = 0; i < base_buffer_.size(); ++i) {
|
@@ -281,9 +265,9 @@ void quantiles_sketch<T, C, A>::merge(FwdSk&& other) {
|
|
281
265
|
}
|
282
266
|
}
|
283
267
|
} else { // k_ > other.get_k()
|
284
|
-
downsampling_merge(sk_copy, *this);
|
268
|
+
downsampling_merge(sk_copy, std::move(*this));
|
285
269
|
}
|
286
|
-
*this = sk_copy;
|
270
|
+
*this = std::move(sk_copy);
|
287
271
|
}
|
288
272
|
reset_sorted_view();
|
289
273
|
}
|
@@ -317,8 +301,8 @@ void quantiles_sketch<T, C, A>::serialize(std::ostream& os, const SerDe& serde)
|
|
317
301
|
write(os, n_);
|
318
302
|
|
319
303
|
// min and max
|
320
|
-
serde.serialize(os, min_item_, 1);
|
321
|
-
serde.serialize(os, max_item_, 1);
|
304
|
+
serde.serialize(os, &*min_item_, 1);
|
305
|
+
serde.serialize(os, &*max_item_, 1);
|
322
306
|
|
323
307
|
// base buffer items
|
324
308
|
serde.serialize(os, base_buffer_.data(), static_cast<unsigned>(base_buffer_.size()));
|
@@ -365,8 +349,8 @@ auto quantiles_sketch<T, C, A>::serialize(unsigned header_size_bytes, const SerD
|
|
365
349
|
ptr += copy_to_mem(n_, ptr);
|
366
350
|
|
367
351
|
// min and max
|
368
|
-
ptr += serde.serialize(ptr, end_ptr - ptr, min_item_, 1);
|
369
|
-
ptr += serde.serialize(ptr, end_ptr - ptr, max_item_, 1);
|
352
|
+
ptr += serde.serialize(ptr, end_ptr - ptr, &*min_item_, 1);
|
353
|
+
ptr += serde.serialize(ptr, end_ptr - ptr, &*max_item_, 1);
|
370
354
|
|
371
355
|
// base buffer items
|
372
356
|
if (base_buffer_.size() > 0)
|
@@ -409,19 +393,18 @@ auto quantiles_sketch<T, C, A>::deserialize(std::istream &is, const SerDe& serde
|
|
409
393
|
const bool is_compact = (serial_version == 2) | ((flags_byte & (1 << flags::IS_COMPACT)) > 0);
|
410
394
|
const bool is_sorted = (flags_byte & (1 << flags::IS_SORTED)) > 0;
|
411
395
|
|
412
|
-
|
413
|
-
|
414
|
-
|
415
|
-
|
416
|
-
|
417
|
-
|
418
|
-
|
419
|
-
|
420
|
-
|
421
|
-
|
422
|
-
|
423
|
-
|
424
|
-
max_item = std::unique_ptr<T, item_deleter>(max_item_buffer.release(), item_deleter(allocator));
|
396
|
+
optional<T> tmp; // space to deserialize min and max
|
397
|
+
optional<T> min_item;
|
398
|
+
optional<T> max_item;
|
399
|
+
|
400
|
+
serde.deserialize(is, &*tmp, 1);
|
401
|
+
// serde call did not throw, repackage and cleanup
|
402
|
+
min_item.emplace(*tmp);
|
403
|
+
(*tmp).~T();
|
404
|
+
serde.deserialize(is, &*tmp, 1);
|
405
|
+
// serde call did not throw, repackage and cleanup
|
406
|
+
max_item.emplace(*tmp);
|
407
|
+
(*tmp).~T();
|
425
408
|
|
426
409
|
if (serial_version == 1) {
|
427
410
|
read<uint64_t>(is); // no longer used
|
@@ -477,7 +460,7 @@ auto quantiles_sketch<T, C, A>::deserialize_array(std::istream& is, uint32_t num
|
|
477
460
|
items.get_deleter().set_destroy(true);
|
478
461
|
if (!is.good()) throw std::runtime_error("error reading from std::istream");
|
479
462
|
|
480
|
-
//
|
463
|
+
// successfully read, now put into a Level
|
481
464
|
Level level(allocator);
|
482
465
|
level.reserve(capacity);
|
483
466
|
level.insert(level.begin(),
|
@@ -524,19 +507,18 @@ auto quantiles_sketch<T, C, A>::deserialize(const void* bytes, size_t size, cons
|
|
524
507
|
const bool is_compact = (serial_version == 2) | ((flags_byte & (1 << flags::IS_COMPACT)) > 0);
|
525
508
|
const bool is_sorted = (flags_byte & (1 << flags::IS_SORTED)) > 0;
|
526
509
|
|
527
|
-
|
528
|
-
|
529
|
-
|
530
|
-
|
531
|
-
|
532
|
-
|
533
|
-
|
534
|
-
|
535
|
-
|
536
|
-
|
537
|
-
|
538
|
-
|
539
|
-
max_item = std::unique_ptr<T, item_deleter>(max_item_buffer.release(), item_deleter(allocator));
|
510
|
+
optional<T> tmp; // space to deserialize min and max
|
511
|
+
optional<T> min_item;
|
512
|
+
optional<T> max_item;
|
513
|
+
|
514
|
+
ptr += serde.deserialize(ptr, end_ptr - ptr, &*tmp, 1);
|
515
|
+
// serde call did not throw, repackage and cleanup
|
516
|
+
min_item.emplace(*tmp);
|
517
|
+
(*tmp).~T();
|
518
|
+
ptr += serde.deserialize(ptr, end_ptr - ptr, &*tmp, 1);
|
519
|
+
// serde call did not throw, repackage and cleanup
|
520
|
+
max_item.emplace(*tmp);
|
521
|
+
(*tmp).~T();
|
540
522
|
|
541
523
|
if (serial_version == 1) {
|
542
524
|
uint64_t unused_long;
|
@@ -645,12 +627,12 @@ string<A> quantiles_sketch<T, C, A>::to_string(bool print_levels, bool print_ite
|
|
645
627
|
uint8_t level = 0;
|
646
628
|
os << " BB:" << std::endl;
|
647
629
|
for (const T& item : base_buffer_) {
|
648
|
-
os << " " <<
|
630
|
+
os << " " << item << std::endl;
|
649
631
|
}
|
650
632
|
for (uint8_t i = 0; i < levels_.size(); ++i) {
|
651
633
|
os << " level " << static_cast<unsigned int>(level) << ":" << std::endl;
|
652
634
|
for (const T& item : levels_[i]) {
|
653
|
-
os << " " <<
|
635
|
+
os << " " << item << std::endl;
|
654
636
|
}
|
655
637
|
}
|
656
638
|
os << "### End sketch data" << std::endl;
|
@@ -769,42 +751,6 @@ auto quantiles_sketch<T, C, A>::get_quantile(double rank, bool inclusive) const
|
|
769
751
|
return sorted_view_->get_quantile(rank, inclusive);
|
770
752
|
}
|
771
753
|
|
772
|
-
template<typename T, typename C, typename A>
|
773
|
-
std::vector<T, A> quantiles_sketch<T, C, A>::get_quantiles(const double* ranks, uint32_t size, bool inclusive) const {
|
774
|
-
if (is_empty()) throw std::runtime_error("operation is undefined for an empty sketch");
|
775
|
-
std::vector<T, A> quantiles(allocator_);
|
776
|
-
quantiles.reserve(size);
|
777
|
-
|
778
|
-
// possible side-effect: sorting base buffer
|
779
|
-
setup_sorted_view();
|
780
|
-
|
781
|
-
for (uint32_t i = 0; i < size; ++i) {
|
782
|
-
const double rank = ranks[i];
|
783
|
-
if ((rank < 0.0) || (rank > 1.0)) {
|
784
|
-
throw std::invalid_argument("Normalized rank cannot be less than 0 or greater than 1");
|
785
|
-
}
|
786
|
-
quantiles.push_back(sorted_view_->get_quantile(rank, inclusive));
|
787
|
-
}
|
788
|
-
return quantiles;
|
789
|
-
}
|
790
|
-
|
791
|
-
template<typename T, typename C, typename A>
|
792
|
-
std::vector<T, A> quantiles_sketch<T, C, A>::get_quantiles(uint32_t num, bool inclusive) const {
|
793
|
-
if (is_empty()) throw std::runtime_error("operation is undefined for an empty sketch");
|
794
|
-
if (num == 0) {
|
795
|
-
throw std::invalid_argument("num must be > 0");
|
796
|
-
}
|
797
|
-
vector_double ranks(num, 0, allocator_);
|
798
|
-
ranks[0] = 0.0;
|
799
|
-
for (size_t i = 1; i < num; i++) {
|
800
|
-
ranks[i] = static_cast<double>(i) / (num - 1);
|
801
|
-
}
|
802
|
-
if (num > 1) {
|
803
|
-
ranks[num - 1] = 1.0;
|
804
|
-
}
|
805
|
-
return get_quantiles(ranks.data(), num, inclusive);
|
806
|
-
}
|
807
|
-
|
808
754
|
template<typename T, typename C, typename A>
|
809
755
|
double quantiles_sketch<T, C, A>::get_rank(const T& item, bool inclusive) const {
|
810
756
|
if (is_empty()) throw std::runtime_error("operation is undefined for an empty sketch");
|
@@ -1012,7 +958,7 @@ void quantiles_sketch<T, C, A>::zip_buffer(Level& buf_in, Level& buf_out) {
|
|
1012
958
|
uint32_t rand_offset = next_offset;
|
1013
959
|
next_offset = 1 - next_offset;
|
1014
960
|
#else
|
1015
|
-
uint32_t rand_offset = random_bit();
|
961
|
+
uint32_t rand_offset = random_utils::random_bit();
|
1016
962
|
#endif
|
1017
963
|
if ((buf_in.size() != 2 * buf_out.capacity())
|
1018
964
|
|| (buf_out.size() > 0)) {
|
@@ -1127,15 +1073,14 @@ void quantiles_sketch<T, C, A>::standard_merge(quantiles_sketch& tgt, FwdSk&& sr
|
|
1127
1073
|
// update min and max items
|
1128
1074
|
// can't just check is_empty() since min and max might not have been set if
|
1129
1075
|
// there were no base buffer items added via update()
|
1130
|
-
if (tgt.min_item_
|
1131
|
-
tgt.min_item_
|
1076
|
+
if (!tgt.min_item_) {
|
1077
|
+
tgt.min_item_.emplace(conditional_forward<FwdSk>(*src.min_item_));
|
1132
1078
|
} else {
|
1133
1079
|
if (tgt.comparator_(*src.min_item_, *tgt.min_item_))
|
1134
1080
|
*tgt.min_item_ = conditional_forward<FwdSk>(*src.min_item_);
|
1135
1081
|
}
|
1136
|
-
|
1137
|
-
|
1138
|
-
tgt.max_item_ = new (tgt.allocator_.allocate(1)) T(*src.max_item_);
|
1082
|
+
if (!tgt.max_item_) {
|
1083
|
+
tgt.max_item_.emplace(conditional_forward<FwdSk>(*src.max_item_));
|
1139
1084
|
} else {
|
1140
1085
|
if (tgt.comparator_(*tgt.max_item_, *src.max_item_))
|
1141
1086
|
*tgt.max_item_ = conditional_forward<FwdSk>(*src.max_item_);
|
@@ -1203,15 +1148,14 @@ void quantiles_sketch<T, C, A>::downsampling_merge(quantiles_sketch& tgt, FwdSk&
|
|
1203
1148
|
// update min and max items
|
1204
1149
|
// can't just check is_empty() since min and max might not have been set if
|
1205
1150
|
// there were no base buffer items added via update()
|
1206
|
-
if (tgt.min_item_
|
1207
|
-
tgt.min_item_
|
1151
|
+
if (!tgt.min_item_) {
|
1152
|
+
tgt.min_item_.emplace(conditional_forward<FwdSk>(*src.min_item_));
|
1208
1153
|
} else {
|
1209
1154
|
if (tgt.comparator_(*src.min_item_, *tgt.min_item_))
|
1210
1155
|
*tgt.min_item_ = conditional_forward<FwdSk>(*src.min_item_);
|
1211
1156
|
}
|
1212
|
-
|
1213
|
-
|
1214
|
-
tgt.max_item_ = new (tgt.allocator_.allocate(1)) T(*src.max_item_);
|
1157
|
+
if (!tgt.max_item_) {
|
1158
|
+
tgt.max_item_.emplace(conditional_forward<FwdSk>(*src.max_item_));
|
1215
1159
|
} else {
|
1216
1160
|
if (tgt.comparator_(*tgt.max_item_, *src.max_item_))
|
1217
1161
|
*tgt.max_item_ = conditional_forward<FwdSk>(*src.max_item_);
|
@@ -1230,20 +1174,6 @@ uint8_t quantiles_sketch<T, C, A>::lowest_zero_bit_starting_at(uint64_t bits, ui
|
|
1230
1174
|
return pos;
|
1231
1175
|
}
|
1232
1176
|
|
1233
|
-
template<typename T, typename C, typename A>
|
1234
|
-
class quantiles_sketch<T, C, A>::item_deleter {
|
1235
|
-
public:
|
1236
|
-
item_deleter(const A& allocator): allocator_(allocator) {}
|
1237
|
-
void operator() (T* ptr) {
|
1238
|
-
if (ptr != nullptr) {
|
1239
|
-
ptr->~T();
|
1240
|
-
allocator_.deallocate(ptr, 1);
|
1241
|
-
}
|
1242
|
-
}
|
1243
|
-
private:
|
1244
|
-
A allocator_;
|
1245
|
-
};
|
1246
|
-
|
1247
1177
|
template<typename T, typename C, typename A>
|
1248
1178
|
class quantiles_sketch<T, C, A>::items_deleter {
|
1249
1179
|
public:
|
@@ -1354,12 +1284,12 @@ bool quantiles_sketch<T, C, A>::const_iterator::operator!=(const const_iterator&
|
|
1354
1284
|
}
|
1355
1285
|
|
1356
1286
|
template<typename T, typename C, typename A>
|
1357
|
-
auto quantiles_sketch<T, C, A>::const_iterator::operator*() const ->
|
1287
|
+
auto quantiles_sketch<T, C, A>::const_iterator::operator*() const -> reference {
|
1358
1288
|
return value_type(level_ == -1 ? base_buffer_[index_] : levels_[level_][index_], weight_);
|
1359
1289
|
}
|
1360
1290
|
|
1361
1291
|
template<typename T, typename C, typename A>
|
1362
|
-
auto quantiles_sketch<T, C, A>::const_iterator::operator->() const ->
|
1292
|
+
auto quantiles_sketch<T, C, A>::const_iterator::operator->() const -> pointer {
|
1363
1293
|
return **this;
|
1364
1294
|
}
|
1365
1295
|
|
@@ -20,7 +20,6 @@ add_executable(quantiles_test)
|
|
20
20
|
target_link_libraries(quantiles_test quantiles common common_test_lib)
|
21
21
|
|
22
22
|
set_target_properties(quantiles_test PROPERTIES
|
23
|
-
CXX_STANDARD 11
|
24
23
|
CXX_STANDARD_REQUIRED YES
|
25
24
|
)
|
26
25
|
|
@@ -42,3 +41,17 @@ target_sources(quantiles_test
|
|
42
41
|
quantiles_compatibility_test.cpp
|
43
42
|
kolmogorov_smirnov_test.cpp
|
44
43
|
)
|
44
|
+
|
45
|
+
if (SERDE_COMPAT)
|
46
|
+
target_sources(quantiles_test
|
47
|
+
PRIVATE
|
48
|
+
quantiles_sketch_deserialize_from_java_test.cpp
|
49
|
+
)
|
50
|
+
endif()
|
51
|
+
|
52
|
+
if (GENERATE)
|
53
|
+
target_sources(quantiles_test
|
54
|
+
PRIVATE
|
55
|
+
quantiles_sketch_serialize_for_java.cpp
|
56
|
+
)
|
57
|
+
endif()
|