datasketches 0.2.0 → 0.2.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +16 -0
- data/LICENSE +40 -3
- data/NOTICE +1 -1
- data/README.md +7 -7
- data/ext/datasketches/extconf.rb +1 -1
- data/ext/datasketches/theta_wrapper.cpp +20 -4
- data/lib/datasketches/version.rb +1 -1
- data/vendor/datasketches-cpp/CMakeLists.txt +31 -3
- data/vendor/datasketches-cpp/LICENSE +40 -3
- data/vendor/datasketches-cpp/MANIFEST.in +3 -0
- data/vendor/datasketches-cpp/NOTICE +1 -1
- data/vendor/datasketches-cpp/README.md +76 -9
- data/vendor/datasketches-cpp/cmake/DataSketchesConfig.cmake.in +10 -0
- data/vendor/datasketches-cpp/common/CMakeLists.txt +14 -13
- data/vendor/datasketches-cpp/common/include/MurmurHash3.h +11 -7
- data/vendor/datasketches-cpp/common/include/binomial_bounds.hpp +8 -8
- data/vendor/datasketches-cpp/common/include/bounds_binomial_proportions.hpp +12 -15
- data/vendor/datasketches-cpp/common/include/common_defs.hpp +26 -0
- data/vendor/datasketches-cpp/common/include/conditional_forward.hpp +20 -8
- data/vendor/datasketches-cpp/common/include/count_zeros.hpp +2 -2
- data/vendor/datasketches-cpp/common/include/serde.hpp +7 -7
- data/vendor/datasketches-cpp/cpc/CMakeLists.txt +15 -35
- data/vendor/datasketches-cpp/cpc/include/cpc_common.hpp +10 -3
- data/vendor/datasketches-cpp/cpc/include/cpc_compressor.hpp +19 -19
- data/vendor/datasketches-cpp/cpc/include/cpc_compressor_impl.hpp +91 -89
- data/vendor/datasketches-cpp/cpc/include/cpc_sketch.hpp +15 -2
- data/vendor/datasketches-cpp/cpc/include/cpc_sketch_impl.hpp +126 -90
- data/vendor/datasketches-cpp/cpc/include/cpc_union.hpp +1 -1
- data/vendor/datasketches-cpp/cpc/include/cpc_union_impl.hpp +22 -20
- data/vendor/datasketches-cpp/cpc/include/cpc_util.hpp +10 -10
- data/vendor/datasketches-cpp/cpc/include/icon_estimator.hpp +4 -4
- data/vendor/datasketches-cpp/cpc/include/u32_table.hpp +8 -8
- data/vendor/datasketches-cpp/cpc/include/u32_table_impl.hpp +14 -14
- data/vendor/datasketches-cpp/cpc/test/compression_test.cpp +10 -10
- data/vendor/datasketches-cpp/cpc/test/cpc_sketch_allocation_test.cpp +17 -0
- data/vendor/datasketches-cpp/cpc/test/cpc_sketch_test.cpp +25 -0
- data/vendor/datasketches-cpp/cpc/test/cpc_union_test.cpp +1 -1
- data/vendor/datasketches-cpp/fi/CMakeLists.txt +5 -15
- data/vendor/datasketches-cpp/fi/include/frequent_items_sketch_impl.hpp +69 -82
- data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map_impl.hpp +10 -10
- data/vendor/datasketches-cpp/fi/test/reverse_purge_hash_map_test.cpp +2 -2
- data/vendor/datasketches-cpp/hll/CMakeLists.txt +33 -56
- data/vendor/datasketches-cpp/hll/include/AuxHashMap-internal.hpp +60 -63
- data/vendor/datasketches-cpp/hll/include/AuxHashMap.hpp +19 -19
- data/vendor/datasketches-cpp/hll/include/CompositeInterpolationXTable-internal.hpp +15 -15
- data/vendor/datasketches-cpp/hll/include/CompositeInterpolationXTable.hpp +3 -3
- data/vendor/datasketches-cpp/hll/include/CouponHashSet-internal.hpp +74 -76
- data/vendor/datasketches-cpp/hll/include/CouponHashSet.hpp +6 -6
- data/vendor/datasketches-cpp/hll/include/CouponList-internal.hpp +110 -113
- data/vendor/datasketches-cpp/hll/include/CouponList.hpp +13 -13
- data/vendor/datasketches-cpp/hll/include/CubicInterpolation-internal.hpp +2 -4
- data/vendor/datasketches-cpp/hll/include/HarmonicNumbers-internal.hpp +1 -1
- data/vendor/datasketches-cpp/hll/include/Hll4Array-internal.hpp +80 -76
- data/vendor/datasketches-cpp/hll/include/Hll4Array.hpp +9 -9
- data/vendor/datasketches-cpp/hll/include/Hll6Array-internal.hpp +26 -26
- data/vendor/datasketches-cpp/hll/include/Hll6Array.hpp +6 -6
- data/vendor/datasketches-cpp/hll/include/Hll8Array-internal.hpp +33 -33
- data/vendor/datasketches-cpp/hll/include/Hll8Array.hpp +6 -6
- data/vendor/datasketches-cpp/hll/include/HllArray-internal.hpp +205 -209
- data/vendor/datasketches-cpp/hll/include/HllArray.hpp +36 -36
- data/vendor/datasketches-cpp/hll/include/HllSketch-internal.hpp +34 -32
- data/vendor/datasketches-cpp/hll/include/HllSketchImpl-internal.hpp +22 -22
- data/vendor/datasketches-cpp/hll/include/HllSketchImpl.hpp +13 -13
- data/vendor/datasketches-cpp/hll/include/HllSketchImplFactory.hpp +15 -15
- data/vendor/datasketches-cpp/hll/include/HllUnion-internal.hpp +61 -61
- data/vendor/datasketches-cpp/hll/include/HllUtil.hpp +120 -127
- data/vendor/datasketches-cpp/hll/include/coupon_iterator-internal.hpp +9 -9
- data/vendor/datasketches-cpp/hll/include/coupon_iterator.hpp +5 -5
- data/vendor/datasketches-cpp/hll/include/hll.hpp +21 -21
- data/vendor/datasketches-cpp/hll/test/AuxHashMapTest.cpp +1 -1
- data/vendor/datasketches-cpp/hll/test/CouponHashSetTest.cpp +34 -34
- data/vendor/datasketches-cpp/hll/test/CouponListTest.cpp +25 -25
- data/vendor/datasketches-cpp/hll/test/CrossCountingTest.cpp +2 -2
- data/vendor/datasketches-cpp/hll/test/HllArrayTest.cpp +35 -35
- data/vendor/datasketches-cpp/hll/test/HllSketchTest.cpp +15 -15
- data/vendor/datasketches-cpp/hll/test/HllUnionTest.cpp +10 -14
- data/vendor/datasketches-cpp/hll/test/IsomorphicTest.cpp +3 -3
- data/vendor/datasketches-cpp/hll/test/ToFromByteArrayTest.cpp +4 -4
- data/vendor/datasketches-cpp/kll/CMakeLists.txt +9 -19
- data/vendor/datasketches-cpp/kll/include/kll_helper.hpp +5 -4
- data/vendor/datasketches-cpp/kll/include/kll_helper_impl.hpp +6 -6
- data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator.hpp +14 -6
- data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator_impl.hpp +39 -24
- data/vendor/datasketches-cpp/kll/include/kll_sketch.hpp +41 -4
- data/vendor/datasketches-cpp/kll/include/kll_sketch_impl.hpp +76 -64
- data/vendor/datasketches-cpp/kll/include/kolmogorov_smirnov.hpp +67 -0
- data/vendor/datasketches-cpp/kll/include/kolmogorov_smirnov_impl.hpp +78 -0
- data/vendor/datasketches-cpp/kll/test/CMakeLists.txt +1 -0
- data/vendor/datasketches-cpp/kll/test/kll_sketch_test.cpp +133 -46
- data/vendor/datasketches-cpp/kll/test/kolmogorov_smirnov_test.cpp +111 -0
- data/vendor/datasketches-cpp/pyproject.toml +4 -2
- data/vendor/datasketches-cpp/python/CMakeLists.txt +10 -6
- data/vendor/datasketches-cpp/python/README.md +50 -50
- data/vendor/datasketches-cpp/python/pybind11Path.cmd +3 -0
- data/vendor/datasketches-cpp/python/src/cpc_wrapper.cpp +1 -1
- data/vendor/datasketches-cpp/python/src/hll_wrapper.cpp +4 -4
- data/vendor/datasketches-cpp/python/src/kll_wrapper.cpp +1 -1
- data/vendor/datasketches-cpp/python/src/theta_wrapper.cpp +8 -8
- data/vendor/datasketches-cpp/python/src/vector_of_kll.cpp +11 -5
- data/vendor/datasketches-cpp/python/src/vo_wrapper.cpp +2 -2
- data/vendor/datasketches-cpp/python/tests/hll_test.py +1 -1
- data/vendor/datasketches-cpp/python/tests/kll_test.py +2 -2
- data/vendor/datasketches-cpp/python/tests/req_test.py +2 -2
- data/vendor/datasketches-cpp/python/tests/vector_of_kll_test.py +4 -4
- data/vendor/datasketches-cpp/python/tests/vo_test.py +3 -3
- data/vendor/datasketches-cpp/req/CMakeLists.txt +8 -21
- data/vendor/datasketches-cpp/req/include/req_common.hpp +2 -1
- data/vendor/datasketches-cpp/req/include/req_compactor.hpp +4 -4
- data/vendor/datasketches-cpp/req/include/req_compactor_impl.hpp +26 -39
- data/vendor/datasketches-cpp/req/include/req_sketch.hpp +1 -1
- data/vendor/datasketches-cpp/req/include/req_sketch_impl.hpp +13 -11
- data/vendor/datasketches-cpp/req/test/req_sketch_test.cpp +52 -52
- data/vendor/datasketches-cpp/sampling/CMakeLists.txt +5 -9
- data/vendor/datasketches-cpp/sampling/include/var_opt_sketch.hpp +10 -5
- data/vendor/datasketches-cpp/sampling/include/var_opt_sketch_impl.hpp +61 -64
- data/vendor/datasketches-cpp/sampling/include/var_opt_union_impl.hpp +42 -48
- data/vendor/datasketches-cpp/sampling/test/var_opt_sketch_test.cpp +6 -6
- data/vendor/datasketches-cpp/sampling/test/var_opt_union_test.cpp +13 -13
- data/vendor/datasketches-cpp/setup.py +10 -7
- data/vendor/datasketches-cpp/theta/CMakeLists.txt +26 -45
- data/vendor/datasketches-cpp/theta/include/bounds_on_ratios_in_sampled_sets.hpp +1 -1
- data/vendor/datasketches-cpp/theta/include/compact_theta_sketch_parser.hpp +67 -0
- data/vendor/datasketches-cpp/theta/include/compact_theta_sketch_parser_impl.hpp +137 -0
- data/vendor/datasketches-cpp/theta/include/theta_constants.hpp +9 -4
- data/vendor/datasketches-cpp/theta/include/theta_helpers.hpp +15 -0
- data/vendor/datasketches-cpp/theta/include/theta_intersection.hpp +9 -4
- data/vendor/datasketches-cpp/theta/include/theta_intersection_base_impl.hpp +6 -6
- data/vendor/datasketches-cpp/theta/include/theta_intersection_impl.hpp +1 -1
- data/vendor/datasketches-cpp/theta/include/theta_jaccard_similarity_base.hpp +18 -14
- data/vendor/datasketches-cpp/theta/include/theta_set_difference_base_impl.hpp +2 -2
- data/vendor/datasketches-cpp/theta/include/theta_sketch.hpp +73 -15
- data/vendor/datasketches-cpp/theta/include/theta_sketch_impl.hpp +247 -103
- data/vendor/datasketches-cpp/theta/include/theta_union.hpp +10 -5
- data/vendor/datasketches-cpp/theta/include/theta_union_base.hpp +3 -1
- data/vendor/datasketches-cpp/theta/include/theta_union_base_impl.hpp +9 -3
- data/vendor/datasketches-cpp/theta/include/theta_union_impl.hpp +8 -5
- data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base.hpp +11 -5
- data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base_impl.hpp +70 -37
- data/vendor/datasketches-cpp/theta/test/CMakeLists.txt +1 -0
- data/vendor/datasketches-cpp/theta/test/theta_a_not_b_test.cpp +23 -1
- data/vendor/datasketches-cpp/theta/test/theta_compact_empty_from_java_v1.sk +0 -0
- data/vendor/datasketches-cpp/theta/test/theta_compact_empty_from_java_v2.sk +0 -0
- data/vendor/datasketches-cpp/theta/test/theta_compact_estimation_from_java_v1.sk +0 -0
- data/vendor/datasketches-cpp/theta/test/theta_compact_estimation_from_java_v2.sk +0 -0
- data/vendor/datasketches-cpp/theta/test/theta_compact_exact_from_java.sk +0 -0
- data/vendor/datasketches-cpp/theta/test/theta_intersection_test.cpp +21 -1
- data/vendor/datasketches-cpp/theta/test/theta_jaccard_similarity_test.cpp +58 -2
- data/vendor/datasketches-cpp/theta/test/theta_setop_test.cpp +445 -0
- data/vendor/datasketches-cpp/theta/test/theta_sketch_test.cpp +437 -1
- data/vendor/datasketches-cpp/theta/test/theta_union_test.cpp +41 -9
- data/vendor/datasketches-cpp/tuple/CMakeLists.txt +18 -33
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_sketch.hpp +1 -1
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_sketch_impl.hpp +50 -63
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_union.hpp +1 -1
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_union_impl.hpp +3 -3
- data/vendor/datasketches-cpp/tuple/include/tuple_sketch.hpp +13 -9
- data/vendor/datasketches-cpp/tuple/include/tuple_sketch_impl.hpp +84 -78
- data/vendor/datasketches-cpp/tuple/include/tuple_union.hpp +6 -1
- data/vendor/datasketches-cpp/tuple/include/tuple_union_impl.hpp +8 -3
- data/vendor/datasketches-cpp/tuple/test/array_of_doubles_sketch_test.cpp +17 -1
- data/vendor/datasketches-cpp/tuple/test/tuple_a_not_b_test.cpp +17 -17
- data/vendor/datasketches-cpp/tuple/test/tuple_intersection_test.cpp +12 -12
- data/vendor/datasketches-cpp/tuple/test/tuple_jaccard_similarity_test.cpp +5 -5
- data/vendor/datasketches-cpp/tuple/test/tuple_sketch_allocation_test.cpp +1 -1
- data/vendor/datasketches-cpp/tuple/test/tuple_sketch_test.cpp +66 -28
- data/vendor/datasketches-cpp/tuple/test/tuple_union_test.cpp +19 -12
- metadata +18 -7
- data/vendor/datasketches-cpp/theta/test/theta_update_empty_from_java.sk +0 -0
- data/vendor/datasketches-cpp/theta/test/theta_update_estimation_from_java.sk +0 -0
|
@@ -56,9 +56,9 @@ void init_hll(py::module &m) {
|
|
|
56
56
|
.export_values();
|
|
57
57
|
|
|
58
58
|
py::class_<hll_sketch>(m, "hll_sketch")
|
|
59
|
-
.def(py::init<
|
|
60
|
-
.def(py::init<
|
|
61
|
-
.def(py::init<
|
|
59
|
+
.def(py::init<uint8_t>(), py::arg("lg_k"))
|
|
60
|
+
.def(py::init<uint8_t, target_hll_type>(), py::arg("lg_k"), py::arg("tgt_type"))
|
|
61
|
+
.def(py::init<uint8_t, target_hll_type, bool>(), py::arg("lg_k"), py::arg("tgt_type"), py::arg("start_max_size")=false)
|
|
62
62
|
.def_static("deserialize", &dspy::hll_sketch_deserialize,
|
|
63
63
|
"Reads a bytes object and returns the corresponding hll_sketch")
|
|
64
64
|
.def("serialize_compact", &dspy::hll_sketch_serialize_compact,
|
|
@@ -104,7 +104,7 @@ void init_hll(py::module &m) {
|
|
|
104
104
|
;
|
|
105
105
|
|
|
106
106
|
py::class_<hll_union>(m, "hll_union")
|
|
107
|
-
.def(py::init<
|
|
107
|
+
.def(py::init<uint8_t>(), py::arg("lg_max_k"))
|
|
108
108
|
.def_property_readonly("lg_config_k", &hll_union::get_lg_config_k, "Configured lg_k value for the union")
|
|
109
109
|
.def_property_readonly("tgt_type", &hll_union::get_target_type, "Returns the HLL type (4, 6, or 8) when in estimation mode")
|
|
110
110
|
.def("get_estimate", &hll_union::get_estimate,
|
|
@@ -116,7 +116,7 @@ void bind_kll_sketch(py::module &m, const char* name) {
|
|
|
116
116
|
using namespace datasketches;
|
|
117
117
|
|
|
118
118
|
py::class_<kll_sketch<T>>(m, name)
|
|
119
|
-
.def(py::init<uint16_t>(), py::arg("k")=
|
|
119
|
+
.def(py::init<uint16_t>(), py::arg("k")=kll_constants::DEFAULT_K)
|
|
120
120
|
.def(py::init<const kll_sketch<T>&>())
|
|
121
121
|
.def("update", (void (kll_sketch<T>::*)(const T&)) &kll_sketch<T>::update, py::arg("item"),
|
|
122
122
|
"Updates the sketch with the given value")
|
|
@@ -64,8 +64,8 @@ compact_theta_sketch compact_theta_sketch_deserialize(py::bytes skBytes, uint64_
|
|
|
64
64
|
return compact_theta_sketch::deserialize(skStr.c_str(), skStr.length(), seed);
|
|
65
65
|
}
|
|
66
66
|
|
|
67
|
-
py::list theta_jaccard_sim_computation(const theta_sketch& sketch_a, const theta_sketch& sketch_b) {
|
|
68
|
-
return py::cast(theta_jaccard_similarity::jaccard(sketch_a, sketch_b));
|
|
67
|
+
py::list theta_jaccard_sim_computation(const theta_sketch& sketch_a, const theta_sketch& sketch_b, uint64_t seed) {
|
|
68
|
+
return py::cast(theta_jaccard_similarity::jaccard(sketch_a, sketch_b, seed));
|
|
69
69
|
}
|
|
70
70
|
|
|
71
71
|
}
|
|
@@ -103,7 +103,7 @@ void init_theta(py::module &m) {
|
|
|
103
103
|
|
|
104
104
|
py::class_<update_theta_sketch, theta_sketch>(m, "update_theta_sketch")
|
|
105
105
|
.def(py::init(&dspy::update_theta_sketch_factory),
|
|
106
|
-
py::arg("lg_k")=
|
|
106
|
+
py::arg("lg_k")=theta_constants::DEFAULT_LG_K, py::arg("p")=1.0, py::arg("seed")=DEFAULT_SEED)
|
|
107
107
|
.def(py::init<const update_theta_sketch&>())
|
|
108
108
|
.def("update", (void (update_theta_sketch::*)(int64_t)) &update_theta_sketch::update, py::arg("datum"),
|
|
109
109
|
"Updates the sketch with the given integral value")
|
|
@@ -127,7 +127,7 @@ void init_theta(py::module &m) {
|
|
|
127
127
|
|
|
128
128
|
py::class_<theta_union>(m, "theta_union")
|
|
129
129
|
.def(py::init(&dspy::theta_union_factory),
|
|
130
|
-
py::arg("lg_k")=
|
|
130
|
+
py::arg("lg_k")=theta_constants::DEFAULT_LG_K, py::arg("p")=1.0, py::arg("seed")=DEFAULT_SEED)
|
|
131
131
|
.def("update", &theta_union::update<const theta_sketch&>, py::arg("sketch"),
|
|
132
132
|
"Updates the union with the given sketch")
|
|
133
133
|
.def("get_result", &theta_union::get_result, py::arg("ordered")=true,
|
|
@@ -153,18 +153,18 @@ void init_theta(py::module &m) {
|
|
|
153
153
|
|
|
154
154
|
py::class_<theta_jaccard_similarity>(m, "theta_jaccard_similarity")
|
|
155
155
|
.def_static("jaccard", &dspy::theta_jaccard_sim_computation,
|
|
156
|
-
py::arg("sketch_a"), py::arg("sketch_b"),
|
|
156
|
+
py::arg("sketch_a"), py::arg("sketch_b"), py::arg("seed")=DEFAULT_SEED,
|
|
157
157
|
"Returns a list with {lower_bound, estimate, upper_bound} of the Jaccard similarity between sketches")
|
|
158
158
|
.def_static("exactly_equal", &theta_jaccard_similarity::exactly_equal<const theta_sketch&, const theta_sketch&>,
|
|
159
|
-
py::arg("sketch_a"), py::arg("sketch_b"),
|
|
159
|
+
py::arg("sketch_a"), py::arg("sketch_b"), py::arg("seed")=DEFAULT_SEED,
|
|
160
160
|
"Returns True if sketch_a and sketch_b are equivalent, otherwise False")
|
|
161
161
|
.def_static("similarity_test", &theta_jaccard_similarity::similarity_test<const theta_sketch&, const theta_sketch&>,
|
|
162
|
-
py::arg("actual"), py::arg("expected"), py::arg("threshold"),
|
|
162
|
+
py::arg("actual"), py::arg("expected"), py::arg("threshold"), py::arg("seed")=DEFAULT_SEED,
|
|
163
163
|
"Tests similarity of an actual sketch against an expected sketch. Computers the lower bound of the Jaccard "
|
|
164
164
|
"index J_{LB} of the actual and expected sketches. If J_{LB} >= threshold, then the sketches are considered "
|
|
165
165
|
"to be similar sith a confidence of 97.7% and returns True, otherwise False.")
|
|
166
166
|
.def_static("dissimilarity_test", &theta_jaccard_similarity::dissimilarity_test<const theta_sketch&, const theta_sketch&>,
|
|
167
|
-
py::arg("actual"), py::arg("expected"), py::arg("threshold"),
|
|
167
|
+
py::arg("actual"), py::arg("expected"), py::arg("threshold"), py::arg("seed")=DEFAULT_SEED,
|
|
168
168
|
"Tests dissimilarity of an actual sketch against an expected sketch. Computers the lower bound of the Jaccard "
|
|
169
169
|
"index J_{UB} of the actual and expected sketches. If J_{UB} <= threshold, then the sketches are considered "
|
|
170
170
|
"to be dissimilar sith a confidence of 97.7% and returns True, otherwise False.")
|
|
@@ -29,14 +29,20 @@ namespace py = pybind11;
|
|
|
29
29
|
|
|
30
30
|
namespace datasketches {
|
|
31
31
|
|
|
32
|
+
namespace vector_of_kll_constants {
|
|
33
|
+
static const uint32_t DEFAULT_K = kll_constants::DEFAULT_K;
|
|
34
|
+
static const uint32_t DEFAULT_D = 1;
|
|
35
|
+
}
|
|
36
|
+
|
|
32
37
|
// Wrapper class for Numpy compatibility
|
|
33
38
|
template <typename T, typename C = std::less<T>, typename S = serde<T>>
|
|
34
39
|
class vector_of_kll_sketches {
|
|
35
40
|
public:
|
|
36
|
-
|
|
37
|
-
static const uint32_t
|
|
41
|
+
// TODO: Redundant and deprecated. Will be removed in next major version release.
|
|
42
|
+
static const uint32_t DEFAULT_K = vector_of_kll_constants::DEFAULT_K;
|
|
43
|
+
static const uint32_t DEFAULT_D = vector_of_kll_constants::DEFAULT_D;
|
|
38
44
|
|
|
39
|
-
explicit vector_of_kll_sketches(uint32_t k = DEFAULT_K, uint32_t d = DEFAULT_D);
|
|
45
|
+
explicit vector_of_kll_sketches(uint32_t k = vector_of_kll_constants::DEFAULT_K, uint32_t d = vector_of_kll_constants::DEFAULT_D);
|
|
40
46
|
vector_of_kll_sketches(const vector_of_kll_sketches& other);
|
|
41
47
|
vector_of_kll_sketches(vector_of_kll_sketches&& other) noexcept;
|
|
42
48
|
vector_of_kll_sketches<T,C,S>& operator=(const vector_of_kll_sketches& other);
|
|
@@ -432,8 +438,8 @@ void bind_vector_of_kll_sketches(py::module &m, const char* name) {
|
|
|
432
438
|
using namespace datasketches;
|
|
433
439
|
|
|
434
440
|
py::class_<vector_of_kll_sketches<T>>(m, name)
|
|
435
|
-
.def(py::init<uint32_t, uint32_t>(), py::arg("k")=
|
|
436
|
-
py::arg("d")=
|
|
441
|
+
.def(py::init<uint32_t, uint32_t>(), py::arg("k")=vector_of_kll_constants::DEFAULT_K,
|
|
442
|
+
py::arg("d")=vector_of_kll_constants::DEFAULT_D)
|
|
437
443
|
.def(py::init<const vector_of_kll_sketches<T>&>())
|
|
438
444
|
// allow user to retrieve k or d, in case it's instantiated w/ defaults
|
|
439
445
|
.def("get_k", &vector_of_kll_sketches<T>::get_k,
|
|
@@ -32,7 +32,7 @@ namespace python {
|
|
|
32
32
|
template<typename T>
|
|
33
33
|
py::list vo_sketch_get_samples(const var_opt_sketch<T>& sk) {
|
|
34
34
|
py::list list;
|
|
35
|
-
for (auto
|
|
35
|
+
for (auto item : sk) {
|
|
36
36
|
py::tuple t = py::make_tuple(item.first, item.second);
|
|
37
37
|
list.append(t);
|
|
38
38
|
}
|
|
@@ -57,7 +57,7 @@ std::string vo_sketch_to_string(const var_opt_sketch<T>& sk, bool print_items) {
|
|
|
57
57
|
ss << sk.to_string();
|
|
58
58
|
ss << "### VarOpt Sketch Items" << std::endl;
|
|
59
59
|
int i = 0;
|
|
60
|
-
for (auto
|
|
60
|
+
for (auto item : sk) {
|
|
61
61
|
// item.second is always a double
|
|
62
62
|
// item.first is an arbitrary py::object, so get the value by
|
|
63
63
|
// using internal str() method then casting to C++ std::string
|
|
@@ -58,7 +58,7 @@ class HllTest(unittest.TestCase):
|
|
|
58
58
|
self.assertEqual(len(sk_bytes), result.get_compact_serialization_bytes())
|
|
59
59
|
new_hll = hll_sketch.deserialize(sk_bytes)
|
|
60
60
|
|
|
61
|
-
# the sketch can self-report its
|
|
61
|
+
# the sketch can self-report its configuration and status
|
|
62
62
|
self.assertEqual(new_hll.lg_config_k, k)
|
|
63
63
|
self.assertEqual(new_hll.tgt_type, tgt_hll_type.HLL_4)
|
|
64
64
|
self.assertFalse(new_hll.is_empty())
|
|
@@ -30,10 +30,10 @@ class KllTest(unittest.TestCase):
|
|
|
30
30
|
kll.update(0.0)
|
|
31
31
|
|
|
32
32
|
# 0 should be near the median
|
|
33
|
-
self.assertAlmostEqual(0.5, kll.get_rank(0.0), delta=0.
|
|
33
|
+
self.assertAlmostEqual(0.5, kll.get_rank(0.0), delta=0.035)
|
|
34
34
|
|
|
35
35
|
# the median should be near 0
|
|
36
|
-
self.assertAlmostEqual(0.0, kll.get_quantile(0.5), delta=0.
|
|
36
|
+
self.assertAlmostEqual(0.0, kll.get_quantile(0.5), delta=0.035)
|
|
37
37
|
|
|
38
38
|
# we also track the min/max independently from the rest of the data
|
|
39
39
|
# which lets us know the full observed data range
|
|
@@ -30,10 +30,10 @@ class reqTest(unittest.TestCase):
|
|
|
30
30
|
req.update(0.0)
|
|
31
31
|
|
|
32
32
|
# 0 should be near the median
|
|
33
|
-
self.assertAlmostEqual(0.5, req.get_rank(0.0), delta=0.
|
|
33
|
+
self.assertAlmostEqual(0.5, req.get_rank(0.0), delta=0.045)
|
|
34
34
|
|
|
35
35
|
# the median should be near 0
|
|
36
|
-
self.assertAlmostEqual(0.0, req.get_quantile(0.5), delta=0.
|
|
36
|
+
self.assertAlmostEqual(0.0, req.get_quantile(0.5), delta=0.045)
|
|
37
37
|
|
|
38
38
|
# we also track the min/max independently from the rest of the data
|
|
39
39
|
# which lets us know the full observed data range
|
|
@@ -39,9 +39,9 @@ class VectorOfKllSketchesTest(unittest.TestCase):
|
|
|
39
39
|
kll.update(dat)
|
|
40
40
|
|
|
41
41
|
# 0 should be near the median
|
|
42
|
-
np.testing.assert_allclose(0.5, kll.get_ranks(0.0), atol=0.
|
|
42
|
+
np.testing.assert_allclose(0.5, kll.get_ranks(0.0), atol=0.035)
|
|
43
43
|
# the median should be near 0
|
|
44
|
-
np.testing.assert_allclose(0.0, kll.get_quantiles(0.5), atol=0.
|
|
44
|
+
np.testing.assert_allclose(0.0, kll.get_quantiles(0.5), atol=0.035)
|
|
45
45
|
# we also track the min/max independently from the rest of the data
|
|
46
46
|
# which lets us know the full observed data range
|
|
47
47
|
np.testing.assert_allclose(kll.get_min_values(), smin)
|
|
@@ -118,9 +118,9 @@ class VectorOfKllSketchesTest(unittest.TestCase):
|
|
|
118
118
|
kll.update(dat)
|
|
119
119
|
|
|
120
120
|
# 0 should be near the median
|
|
121
|
-
np.testing.assert_allclose(0.5, kll.get_ranks(0.0), atol=0.
|
|
121
|
+
np.testing.assert_allclose(0.5, kll.get_ranks(0.0), atol=0.035)
|
|
122
122
|
# the median should be near 0
|
|
123
|
-
np.testing.assert_allclose(0.0, kll.get_quantiles(0.5), atol=0.
|
|
123
|
+
np.testing.assert_allclose(0.0, kll.get_quantiles(0.5), atol=0.035)
|
|
124
124
|
# we also track the min/max independently from the rest of the data
|
|
125
125
|
# which lets us know the full observed data range
|
|
126
126
|
np.testing.assert_allclose(kll.get_min_values(), smin)
|
|
@@ -46,7 +46,7 @@ class VoTest(unittest.TestCase):
|
|
|
46
46
|
self.assertEqual(len(items), k)
|
|
47
47
|
|
|
48
48
|
# we can also apply a predicate to the sketch to get an estimate
|
|
49
|
-
# (with
|
|
49
|
+
# (with optimally minimal variance) of the subset sum of items
|
|
50
50
|
# matching that predicate among the entire population
|
|
51
51
|
|
|
52
52
|
# we'll use a lambda here, but any function operating on a single
|
|
@@ -89,11 +89,11 @@ class VoTest(unittest.TestCase):
|
|
|
89
89
|
# the union and a sketch.
|
|
90
90
|
print(union)
|
|
91
91
|
|
|
92
|
-
# if we want to print the list of
|
|
92
|
+
# if we want to print the list of items, there must be a
|
|
93
93
|
# __str__() method for each item (which need not be the same
|
|
94
94
|
# type; they're all generic python objects when used from
|
|
95
95
|
# python), otherwise you may trigger an exception.
|
|
96
|
-
# to_string() is provided as a
|
|
96
|
+
# to_string() is provided as a convenience to avoid direct
|
|
97
97
|
# calls to __str__() with parameters.
|
|
98
98
|
print(result.to_string(True))
|
|
99
99
|
|
|
@@ -32,29 +32,16 @@ target_include_directories(req
|
|
|
32
32
|
target_link_libraries(req INTERFACE common)
|
|
33
33
|
target_compile_features(req INTERFACE cxx_std_11)
|
|
34
34
|
|
|
35
|
-
set(req_HEADERS "")
|
|
36
|
-
list(APPEND req_HEADERS "include/req_common.hpp")
|
|
37
|
-
list(APPEND req_HEADERS "include/req_sketch.hpp")
|
|
38
|
-
list(APPEND req_HEADERS "include/req_sketch_impl.hpp")
|
|
39
|
-
list(APPEND req_HEADERS "include/req_compactor.hpp")
|
|
40
|
-
list(APPEND req_HEADERS "include/req_compactor_impl.hpp")
|
|
41
|
-
list(APPEND req_HEADERS "include/req_quantile_calculator.hpp")
|
|
42
|
-
list(APPEND req_HEADERS "include/req_quantile_calculator_impl.hpp")
|
|
43
|
-
|
|
44
35
|
install(TARGETS req
|
|
45
36
|
EXPORT ${PROJECT_NAME}
|
|
46
37
|
)
|
|
47
38
|
|
|
48
|
-
install(FILES
|
|
39
|
+
install(FILES
|
|
40
|
+
include/req_common.hpp
|
|
41
|
+
include/req_sketch.hpp
|
|
42
|
+
include/req_sketch_impl.hpp
|
|
43
|
+
include/req_compactor.hpp
|
|
44
|
+
include/req_compactor_impl.hpp
|
|
45
|
+
include/req_quantile_calculator.hpp
|
|
46
|
+
include/req_quantile_calculator_impl.hpp
|
|
49
47
|
DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/DataSketches")
|
|
50
|
-
|
|
51
|
-
target_sources(req
|
|
52
|
-
INTERFACE
|
|
53
|
-
${CMAKE_CURRENT_SOURCE_DIR}/include/req_common.hpp
|
|
54
|
-
${CMAKE_CURRENT_SOURCE_DIR}/include/req_sketch.hpp
|
|
55
|
-
${CMAKE_CURRENT_SOURCE_DIR}/include/req_sketch_impl.hpp
|
|
56
|
-
${CMAKE_CURRENT_SOURCE_DIR}/include/req_compactor.hpp
|
|
57
|
-
${CMAKE_CURRENT_SOURCE_DIR}/include/req_compactor_impl.hpp
|
|
58
|
-
${CMAKE_CURRENT_SOURCE_DIR}/include/req_quantile_calculator.hpp
|
|
59
|
-
${CMAKE_CURRENT_SOURCE_DIR}/include/req_quantile_calculator_impl.hpp
|
|
60
|
-
)
|
|
@@ -29,7 +29,8 @@
|
|
|
29
29
|
namespace datasketches {
|
|
30
30
|
|
|
31
31
|
// TODO: have a common random bit with KLL
|
|
32
|
-
static std::independent_bits_engine<std::mt19937, 1, unsigned>
|
|
32
|
+
static std::independent_bits_engine<std::mt19937, 1, unsigned>
|
|
33
|
+
req_random_bit(static_cast<unsigned>(std::chrono::system_clock::now().time_since_epoch().count()));
|
|
33
34
|
|
|
34
35
|
namespace req_constants {
|
|
35
36
|
static const uint16_t MIN_K = 4;
|
|
@@ -110,8 +110,8 @@ private:
|
|
|
110
110
|
|
|
111
111
|
bool ensure_enough_sections();
|
|
112
112
|
std::pair<uint32_t, uint32_t> compute_compaction_range(uint32_t secs_to_compact) const;
|
|
113
|
-
void grow(
|
|
114
|
-
void ensure_space(
|
|
113
|
+
void grow(uint32_t new_capacity);
|
|
114
|
+
void ensure_space(uint32_t num);
|
|
115
115
|
|
|
116
116
|
static uint32_t nearest_even(float value);
|
|
117
117
|
|
|
@@ -123,10 +123,10 @@ private:
|
|
|
123
123
|
req_compactor(bool hra, uint8_t lg_weight, bool sorted, float section_size_raw, uint8_t num_sections, uint64_t state, std::unique_ptr<T, items_deleter> items, uint32_t num_items, const Allocator& allocator);
|
|
124
124
|
|
|
125
125
|
template<typename S>
|
|
126
|
-
static std::unique_ptr<T, items_deleter> deserialize_items(std::istream& is, const S& serde, const Allocator& allocator,
|
|
126
|
+
static std::unique_ptr<T, items_deleter> deserialize_items(std::istream& is, const S& serde, const Allocator& allocator, uint32_t num);
|
|
127
127
|
|
|
128
128
|
template<typename S>
|
|
129
|
-
static std::pair<std::unique_ptr<T, items_deleter>, size_t> deserialize_items(const void* bytes, size_t size, const S& serde, const Allocator& allocator,
|
|
129
|
+
static std::pair<std::unique_ptr<T, items_deleter>, size_t> deserialize_items(const void* bytes, size_t size, const S& serde, const Allocator& allocator, uint32_t num);
|
|
130
130
|
|
|
131
131
|
};
|
|
132
132
|
|
|
@@ -38,7 +38,7 @@ lg_weight_(lg_weight),
|
|
|
38
38
|
hra_(hra),
|
|
39
39
|
coin_(false),
|
|
40
40
|
sorted_(sorted),
|
|
41
|
-
section_size_raw_(section_size),
|
|
41
|
+
section_size_raw_(static_cast<float>(section_size)),
|
|
42
42
|
section_size_(section_size),
|
|
43
43
|
num_sections_(req_constants::INIT_NUM_SECTIONS),
|
|
44
44
|
state_(0),
|
|
@@ -72,9 +72,9 @@ items_(nullptr)
|
|
|
72
72
|
{
|
|
73
73
|
if (other.items_ != nullptr) {
|
|
74
74
|
items_ = allocator_.allocate(capacity_);
|
|
75
|
-
const
|
|
76
|
-
const
|
|
77
|
-
for (
|
|
75
|
+
const uint32_t from = hra_ ? capacity_ - num_items_ : 0;
|
|
76
|
+
const uint32_t to = hra_ ? capacity_ : num_items_;
|
|
77
|
+
for (uint32_t i = from; i < to; ++i) new (items_ + i) T(other.items_[i]);
|
|
78
78
|
}
|
|
79
79
|
}
|
|
80
80
|
|
|
@@ -165,16 +165,16 @@ template<typename T, typename C, typename A>
|
|
|
165
165
|
template<typename FwdT>
|
|
166
166
|
void req_compactor<T, C, A>::append(FwdT&& item) {
|
|
167
167
|
if (num_items_ == capacity_) grow(capacity_ + get_nom_capacity());
|
|
168
|
-
const
|
|
168
|
+
const uint32_t i = hra_ ? capacity_ - num_items_ - 1 : num_items_;
|
|
169
169
|
new (items_ + i) T(std::forward<FwdT>(item));
|
|
170
170
|
++num_items_;
|
|
171
171
|
if (num_items_ > 1) sorted_ = false;
|
|
172
172
|
}
|
|
173
173
|
|
|
174
174
|
template<typename T, typename C, typename A>
|
|
175
|
-
void req_compactor<T, C, A>::grow(
|
|
175
|
+
void req_compactor<T, C, A>::grow(uint32_t new_capacity) {
|
|
176
176
|
T* new_items = allocator_.allocate(new_capacity);
|
|
177
|
-
|
|
177
|
+
uint32_t new_i = hra_ ? new_capacity - num_items_ : 0;
|
|
178
178
|
for (auto it = begin(); it != end(); ++it, ++new_i) {
|
|
179
179
|
new (new_items + new_i) T(std::move(*it));
|
|
180
180
|
(*it).~T();
|
|
@@ -185,7 +185,7 @@ void req_compactor<T, C, A>::grow(size_t new_capacity) {
|
|
|
185
185
|
}
|
|
186
186
|
|
|
187
187
|
template<typename T, typename C, typename A>
|
|
188
|
-
void req_compactor<T, C, A>::ensure_space(
|
|
188
|
+
void req_compactor<T, C, A>::ensure_space(uint32_t num) {
|
|
189
189
|
if (num_items_ + num > capacity_) grow(num_items_ + num + get_nom_capacity());
|
|
190
190
|
}
|
|
191
191
|
|
|
@@ -218,13 +218,13 @@ void req_compactor<T, C, A>::merge(FwdC&& other) {
|
|
|
218
218
|
while (ensure_enough_sections()) {}
|
|
219
219
|
ensure_space(other.get_num_items());
|
|
220
220
|
sort();
|
|
221
|
-
auto
|
|
221
|
+
auto offset = hra_ ? capacity_ - num_items_ : num_items_;
|
|
222
222
|
auto from = hra_ ? begin() - other.get_num_items() : end();
|
|
223
223
|
auto to = from + other.get_num_items();
|
|
224
224
|
auto other_it = other.begin();
|
|
225
225
|
for (auto it = from; it != to; ++it, ++other_it) new (it) T(conditional_forward<FwdC>(*other_it));
|
|
226
226
|
if (!other.sorted_) std::sort(from, to, C());
|
|
227
|
-
if (num_items_ > 0) std::inplace_merge(hra_ ? from : begin(),
|
|
227
|
+
if (num_items_ > 0) std::inplace_merge(hra_ ? from : begin(), items_ + offset, hra_ ? end() : to, C());
|
|
228
228
|
num_items_ += other.get_num_items();
|
|
229
229
|
}
|
|
230
230
|
|
|
@@ -240,7 +240,7 @@ template<typename T, typename C, typename A>
|
|
|
240
240
|
std::pair<uint32_t, uint32_t> req_compactor<T, C, A>::compact(req_compactor& next) {
|
|
241
241
|
const uint32_t starting_nom_capacity = get_nom_capacity();
|
|
242
242
|
// choose a part of the buffer to compact
|
|
243
|
-
const uint32_t secs_to_compact = std::min
|
|
243
|
+
const uint32_t secs_to_compact = std::min<uint32_t>(count_trailing_zeros_in_u64(~state_) + 1, num_sections_);
|
|
244
244
|
auto compaction_range = compute_compaction_range(secs_to_compact);
|
|
245
245
|
if (compaction_range.second - compaction_range.first < 2) throw std::logic_error("compaction range error");
|
|
246
246
|
|
|
@@ -267,9 +267,9 @@ std::pair<uint32_t, uint32_t> req_compactor<T, C, A>::compact(req_compactor& nex
|
|
|
267
267
|
|
|
268
268
|
template<typename T, typename C, typename A>
|
|
269
269
|
bool req_compactor<T, C, A>::ensure_enough_sections() {
|
|
270
|
-
const float ssr = section_size_raw_ /
|
|
270
|
+
const float ssr = section_size_raw_ / sqrtf(2);
|
|
271
271
|
const uint32_t ne = nearest_even(ssr);
|
|
272
|
-
if (state_ >= static_cast<uint64_t>(
|
|
272
|
+
if (state_ >= static_cast<uint64_t>(1ULL << (num_sections_ - 1)) && ne >= req_constants::MIN_K) {
|
|
273
273
|
section_size_raw_ = ssr;
|
|
274
274
|
section_size_ = ne;
|
|
275
275
|
num_sections_ <<= 1;
|
|
@@ -284,8 +284,8 @@ std::pair<uint32_t, uint32_t> req_compactor<T, C, A>::compute_compaction_range(u
|
|
|
284
284
|
uint32_t non_compact = get_nom_capacity() / 2 + (num_sections_ - secs_to_compact) * section_size_;
|
|
285
285
|
// make compacted region even
|
|
286
286
|
if (((num_items_ - non_compact) & 1) == 1) ++non_compact;
|
|
287
|
-
const
|
|
288
|
-
const
|
|
287
|
+
const uint32_t low = hra_ ? 0 : non_compact;
|
|
288
|
+
const uint32_t high = hra_ ? num_items_ - non_compact : num_items_;
|
|
289
289
|
return std::pair<uint32_t, uint32_t>(low, high);
|
|
290
290
|
}
|
|
291
291
|
|
|
@@ -309,19 +309,6 @@ void req_compactor<T, C, A>::promote_evens_or_odds(InIter from, InIter to, bool
|
|
|
309
309
|
}
|
|
310
310
|
}
|
|
311
311
|
|
|
312
|
-
// helpers for integral types
|
|
313
|
-
template<typename T>
|
|
314
|
-
static inline T read(std::istream& is) {
|
|
315
|
-
T value;
|
|
316
|
-
is.read(reinterpret_cast<char*>(&value), sizeof(T));
|
|
317
|
-
return value;
|
|
318
|
-
}
|
|
319
|
-
|
|
320
|
-
template<typename T>
|
|
321
|
-
static inline void write(std::ostream& os, T value) {
|
|
322
|
-
os.write(reinterpret_cast<const char*>(&value), sizeof(T));
|
|
323
|
-
}
|
|
324
|
-
|
|
325
312
|
// implementation for fixed-size arithmetic types (integral and floating point)
|
|
326
313
|
template<typename T, typename C, typename A>
|
|
327
314
|
template<typename S, typename TT, typename std::enable_if<std::is_arithmetic<TT>::value, int>::type>
|
|
@@ -394,7 +381,7 @@ req_compactor<T, C, A> req_compactor<T, C, A>::deserialize(std::istream& is, con
|
|
|
394
381
|
|
|
395
382
|
template<typename T, typename C, typename A>
|
|
396
383
|
template<typename S>
|
|
397
|
-
auto req_compactor<T, C, A>::deserialize_items(std::istream& is, const S& serde, const A& allocator,
|
|
384
|
+
auto req_compactor<T, C, A>::deserialize_items(std::istream& is, const S& serde, const A& allocator, uint32_t num)
|
|
398
385
|
-> std::unique_ptr<T, items_deleter> {
|
|
399
386
|
A alloc(allocator);
|
|
400
387
|
std::unique_ptr<T, items_deleter> items(alloc.allocate(num), items_deleter(allocator, false, num));
|
|
@@ -402,7 +389,7 @@ auto req_compactor<T, C, A>::deserialize_items(std::istream& is, const S& serde,
|
|
|
402
389
|
// serde did not throw, enable destructors
|
|
403
390
|
items.get_deleter().set_destroy(true);
|
|
404
391
|
if (!is.good()) throw std::runtime_error("error reading from std::istream");
|
|
405
|
-
return
|
|
392
|
+
return items;
|
|
406
393
|
}
|
|
407
394
|
|
|
408
395
|
template<typename T, typename C, typename A>
|
|
@@ -443,7 +430,7 @@ std::pair<req_compactor<T, C, A>, size_t> req_compactor<T, C, A>::deserialize(co
|
|
|
443
430
|
|
|
444
431
|
template<typename T, typename C, typename A>
|
|
445
432
|
template<typename S>
|
|
446
|
-
auto req_compactor<T, C, A>::deserialize_items(const void* bytes, size_t size, const S& serde, const A& allocator,
|
|
433
|
+
auto req_compactor<T, C, A>::deserialize_items(const void* bytes, size_t size, const S& serde, const A& allocator, uint32_t num)
|
|
447
434
|
-> std::pair<std::unique_ptr<T, items_deleter>, size_t> {
|
|
448
435
|
const char* ptr = static_cast<const char*>(bytes);
|
|
449
436
|
const char* end_ptr = static_cast<const char*>(bytes) + size;
|
|
@@ -478,22 +465,22 @@ items_(items.release())
|
|
|
478
465
|
template<typename T, typename C, typename A>
|
|
479
466
|
class req_compactor<T, C, A>::items_deleter {
|
|
480
467
|
public:
|
|
481
|
-
items_deleter(const A& allocator, bool destroy,
|
|
468
|
+
items_deleter(const A& allocator, bool destroy, size_t num): allocator_(allocator), destroy_(destroy), num_(num) {}
|
|
482
469
|
void operator() (T* ptr) {
|
|
483
470
|
if (ptr != nullptr) {
|
|
484
|
-
if (
|
|
485
|
-
for (
|
|
471
|
+
if (destroy_) {
|
|
472
|
+
for (size_t i = 0; i < num_; ++i) {
|
|
486
473
|
ptr[i].~T();
|
|
487
474
|
}
|
|
488
475
|
}
|
|
489
|
-
|
|
476
|
+
allocator_.deallocate(ptr, num_);
|
|
490
477
|
}
|
|
491
478
|
}
|
|
492
|
-
void set_destroy(bool destroy) {
|
|
479
|
+
void set_destroy(bool destroy) { destroy_ = destroy; }
|
|
493
480
|
private:
|
|
494
|
-
A
|
|
495
|
-
bool
|
|
496
|
-
|
|
481
|
+
A allocator_;
|
|
482
|
+
bool destroy_;
|
|
483
|
+
size_t num_;
|
|
497
484
|
};
|
|
498
485
|
|
|
499
486
|
} /* namespace datasketches */
|
|
@@ -319,7 +319,7 @@ private:
|
|
|
319
319
|
|
|
320
320
|
// for deserialization
|
|
321
321
|
class item_deleter;
|
|
322
|
-
req_sketch(
|
|
322
|
+
req_sketch(uint16_t k, bool hra, uint64_t n, std::unique_ptr<T, item_deleter> min_value, std::unique_ptr<T, item_deleter> max_value, std::vector<Compactor, AllocCompactor>&& compactors);
|
|
323
323
|
|
|
324
324
|
static void check_preamble_ints(uint8_t preamble_ints, uint8_t num_levels);
|
|
325
325
|
static void check_serial_version(uint8_t serial_version);
|
|
@@ -28,7 +28,7 @@ namespace datasketches {
|
|
|
28
28
|
template<typename T, typename C, typename S, typename A>
|
|
29
29
|
req_sketch<T, C, S, A>::req_sketch(uint16_t k, bool hra, const A& allocator):
|
|
30
30
|
allocator_(allocator),
|
|
31
|
-
k_(std::max(static_cast<int>(k) & -2, static_cast<int>(req_constants::MIN_K))), //rounds down one if odd
|
|
31
|
+
k_(std::max<uint8_t>(static_cast<int>(k) & -2, static_cast<int>(req_constants::MIN_K))), //rounds down one if odd
|
|
32
32
|
hra_(hra),
|
|
33
33
|
max_nom_size_(0),
|
|
34
34
|
num_retained_(0),
|
|
@@ -401,7 +401,7 @@ void req_sketch<T, C, S, A>::serialize(std::ostream& os) const {
|
|
|
401
401
|
write(os, k_);
|
|
402
402
|
const uint8_t num_levels = is_empty() ? 0 : get_num_levels();
|
|
403
403
|
write(os, num_levels);
|
|
404
|
-
const uint8_t num_raw_items = raw_items ? n_ : 0;
|
|
404
|
+
const uint8_t num_raw_items = raw_items ? static_cast<uint8_t>(n_) : 0;
|
|
405
405
|
write(os, num_raw_items);
|
|
406
406
|
if (is_empty()) return;
|
|
407
407
|
if (is_estimation_mode()) {
|
|
@@ -440,7 +440,7 @@ auto req_sketch<T, C, S, A>::serialize(unsigned header_size_bytes) const -> vect
|
|
|
440
440
|
ptr += copy_to_mem(k_, ptr);
|
|
441
441
|
const uint8_t num_levels = is_empty() ? 0 : get_num_levels();
|
|
442
442
|
ptr += copy_to_mem(num_levels, ptr);
|
|
443
|
-
const uint8_t num_raw_items = raw_items ? n_ : 0;
|
|
443
|
+
const uint8_t num_raw_items = raw_items ? static_cast<uint8_t>(n_) : 0;
|
|
444
444
|
ptr += copy_to_mem(num_raw_items, ptr);
|
|
445
445
|
if (!is_empty()) {
|
|
446
446
|
if (is_estimation_mode()) {
|
|
@@ -620,7 +620,7 @@ void req_sketch<T, C, S, A>::grow() {
|
|
|
620
620
|
|
|
621
621
|
template<typename T, typename C, typename S, typename A>
|
|
622
622
|
uint8_t req_sketch<T, C, S, A>::get_num_levels() const {
|
|
623
|
-
return compactors_.size();
|
|
623
|
+
return static_cast<uint8_t>(compactors_.size());
|
|
624
624
|
}
|
|
625
625
|
|
|
626
626
|
template<typename T, typename C, typename S, typename A>
|
|
@@ -653,7 +653,9 @@ void req_sketch<T, C, S, A>::compress() {
|
|
|
653
653
|
|
|
654
654
|
template<typename T, typename C, typename S, typename A>
|
|
655
655
|
string<A> req_sketch<T, C, S, A>::to_string(bool print_levels, bool print_items) const {
|
|
656
|
-
|
|
656
|
+
// Using a temporary stream for implementation here does not comply with AllocatorAwareContainer requirements.
|
|
657
|
+
// The stream does not support passing an allocator instance, and alternatives are complicated.
|
|
658
|
+
std::ostringstream os;
|
|
657
659
|
os << "### REQ sketch summary:" << std::endl;
|
|
658
660
|
os << " K : " << k_ << std::endl;
|
|
659
661
|
os << " High Rank Acc : " << (hra_ ? "true" : "false") << std::endl;
|
|
@@ -693,7 +695,7 @@ string<A> req_sketch<T, C, S, A>::to_string(bool print_levels, bool print_items)
|
|
|
693
695
|
}
|
|
694
696
|
os << "### End sketch data" << std::endl;
|
|
695
697
|
}
|
|
696
|
-
return os.str();
|
|
698
|
+
return string<A>(os.str().c_str(), allocator_);
|
|
697
699
|
}
|
|
698
700
|
|
|
699
701
|
template<typename T, typename C, typename S, typename A>
|
|
@@ -711,7 +713,7 @@ class req_sketch<T, C, S, A>::item_deleter {
|
|
|
711
713
|
};
|
|
712
714
|
|
|
713
715
|
template<typename T, typename C, typename S, typename A>
|
|
714
|
-
req_sketch<T, C, S, A>::req_sketch(
|
|
716
|
+
req_sketch<T, C, S, A>::req_sketch(uint16_t k, bool hra, uint64_t n, std::unique_ptr<T, item_deleter> min_value, std::unique_ptr<T, item_deleter> max_value, std::vector<Compactor, AllocCompactor>&& compactors):
|
|
715
717
|
allocator_(compactors.get_allocator()),
|
|
716
718
|
k_(k),
|
|
717
719
|
hra_(hra),
|
|
@@ -766,9 +768,9 @@ auto req_sketch<T, C, S, A>::end() const -> const_iterator {
|
|
|
766
768
|
|
|
767
769
|
template<typename T, typename C, typename S, typename A>
|
|
768
770
|
req_sketch<T, C, S, A>::const_iterator::const_iterator(LevelsIterator begin, LevelsIterator end):
|
|
769
|
-
levels_it_(begin),
|
|
770
|
-
levels_end_(end),
|
|
771
|
-
compactor_it_((*levels_it_).begin())
|
|
771
|
+
levels_it_(begin),
|
|
772
|
+
levels_end_(end),
|
|
773
|
+
compactor_it_(begin == end ? nullptr : (*levels_it_).begin())
|
|
772
774
|
{}
|
|
773
775
|
|
|
774
776
|
template<typename T, typename C, typename S, typename A>
|
|
@@ -802,7 +804,7 @@ bool req_sketch<T, C, S, A>::const_iterator::operator!=(const const_iterator& ot
|
|
|
802
804
|
|
|
803
805
|
template<typename T, typename C, typename S, typename A>
|
|
804
806
|
std::pair<const T&, const uint64_t> req_sketch<T, C, S, A>::const_iterator::operator*() const {
|
|
805
|
-
return std::pair<const T&, const uint64_t>(*compactor_it_,
|
|
807
|
+
return std::pair<const T&, const uint64_t>(*compactor_it_, 1ULL << (*levels_it_).get_lg_weight());
|
|
806
808
|
}
|
|
807
809
|
|
|
808
810
|
} /* namespace datasketches */
|