datasketches 0.2.1 → 0.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +4 -0
- data/lib/datasketches/version.rb +1 -1
- data/vendor/datasketches-cpp/CMakeLists.txt +7 -0
- data/vendor/datasketches-cpp/common/include/MurmurHash3.h +11 -7
- data/vendor/datasketches-cpp/common/include/binomial_bounds.hpp +8 -8
- data/vendor/datasketches-cpp/common/include/bounds_binomial_proportions.hpp +12 -15
- data/vendor/datasketches-cpp/common/include/common_defs.hpp +24 -0
- data/vendor/datasketches-cpp/common/include/conditional_forward.hpp +20 -8
- data/vendor/datasketches-cpp/common/include/count_zeros.hpp +2 -2
- data/vendor/datasketches-cpp/common/include/serde.hpp +7 -7
- data/vendor/datasketches-cpp/cpc/include/cpc_compressor.hpp +19 -19
- data/vendor/datasketches-cpp/cpc/include/cpc_compressor_impl.hpp +91 -89
- data/vendor/datasketches-cpp/cpc/include/cpc_sketch.hpp +14 -1
- data/vendor/datasketches-cpp/cpc/include/cpc_sketch_impl.hpp +121 -87
- data/vendor/datasketches-cpp/cpc/include/cpc_union_impl.hpp +14 -14
- data/vendor/datasketches-cpp/cpc/include/cpc_util.hpp +10 -10
- data/vendor/datasketches-cpp/cpc/include/icon_estimator.hpp +4 -4
- data/vendor/datasketches-cpp/cpc/include/u32_table.hpp +8 -8
- data/vendor/datasketches-cpp/cpc/include/u32_table_impl.hpp +14 -14
- data/vendor/datasketches-cpp/cpc/test/compression_test.cpp +10 -10
- data/vendor/datasketches-cpp/cpc/test/cpc_sketch_test.cpp +25 -0
- data/vendor/datasketches-cpp/cpc/test/cpc_union_test.cpp +1 -1
- data/vendor/datasketches-cpp/fi/include/frequent_items_sketch_impl.hpp +65 -80
- data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map_impl.hpp +10 -10
- data/vendor/datasketches-cpp/fi/test/reverse_purge_hash_map_test.cpp +2 -2
- data/vendor/datasketches-cpp/hll/include/AuxHashMap-internal.hpp +60 -63
- data/vendor/datasketches-cpp/hll/include/AuxHashMap.hpp +19 -19
- data/vendor/datasketches-cpp/hll/include/CompositeInterpolationXTable-internal.hpp +15 -15
- data/vendor/datasketches-cpp/hll/include/CompositeInterpolationXTable.hpp +3 -3
- data/vendor/datasketches-cpp/hll/include/CouponHashSet-internal.hpp +74 -76
- data/vendor/datasketches-cpp/hll/include/CouponHashSet.hpp +6 -6
- data/vendor/datasketches-cpp/hll/include/CouponList-internal.hpp +110 -113
- data/vendor/datasketches-cpp/hll/include/CouponList.hpp +13 -13
- data/vendor/datasketches-cpp/hll/include/CubicInterpolation-internal.hpp +2 -4
- data/vendor/datasketches-cpp/hll/include/HarmonicNumbers-internal.hpp +1 -1
- data/vendor/datasketches-cpp/hll/include/Hll4Array-internal.hpp +80 -76
- data/vendor/datasketches-cpp/hll/include/Hll4Array.hpp +9 -9
- data/vendor/datasketches-cpp/hll/include/Hll6Array-internal.hpp +26 -26
- data/vendor/datasketches-cpp/hll/include/Hll6Array.hpp +6 -6
- data/vendor/datasketches-cpp/hll/include/Hll8Array-internal.hpp +33 -33
- data/vendor/datasketches-cpp/hll/include/Hll8Array.hpp +6 -6
- data/vendor/datasketches-cpp/hll/include/HllArray-internal.hpp +205 -209
- data/vendor/datasketches-cpp/hll/include/HllArray.hpp +36 -36
- data/vendor/datasketches-cpp/hll/include/HllSketch-internal.hpp +28 -28
- data/vendor/datasketches-cpp/hll/include/HllSketchImpl-internal.hpp +22 -22
- data/vendor/datasketches-cpp/hll/include/HllSketchImpl.hpp +13 -13
- data/vendor/datasketches-cpp/hll/include/HllSketchImplFactory.hpp +15 -15
- data/vendor/datasketches-cpp/hll/include/HllUnion-internal.hpp +61 -61
- data/vendor/datasketches-cpp/hll/include/HllUtil.hpp +120 -127
- data/vendor/datasketches-cpp/hll/include/coupon_iterator-internal.hpp +9 -9
- data/vendor/datasketches-cpp/hll/include/coupon_iterator.hpp +5 -5
- data/vendor/datasketches-cpp/hll/include/hll.hpp +21 -21
- data/vendor/datasketches-cpp/hll/test/AuxHashMapTest.cpp +1 -1
- data/vendor/datasketches-cpp/hll/test/CouponHashSetTest.cpp +34 -34
- data/vendor/datasketches-cpp/hll/test/CouponListTest.cpp +25 -25
- data/vendor/datasketches-cpp/hll/test/CrossCountingTest.cpp +2 -2
- data/vendor/datasketches-cpp/hll/test/HllArrayTest.cpp +35 -35
- data/vendor/datasketches-cpp/hll/test/HllSketchTest.cpp +15 -15
- data/vendor/datasketches-cpp/hll/test/HllUnionTest.cpp +10 -14
- data/vendor/datasketches-cpp/hll/test/IsomorphicTest.cpp +3 -3
- data/vendor/datasketches-cpp/hll/test/ToFromByteArrayTest.cpp +4 -4
- data/vendor/datasketches-cpp/kll/include/kll_helper.hpp +5 -4
- data/vendor/datasketches-cpp/kll/include/kll_helper_impl.hpp +6 -6
- data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator.hpp +14 -6
- data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator_impl.hpp +39 -24
- data/vendor/datasketches-cpp/kll/include/kll_sketch.hpp +34 -2
- data/vendor/datasketches-cpp/kll/include/kll_sketch_impl.hpp +72 -62
- data/vendor/datasketches-cpp/kll/include/kolmogorov_smirnov.hpp +67 -0
- data/vendor/datasketches-cpp/kll/include/kolmogorov_smirnov_impl.hpp +78 -0
- data/vendor/datasketches-cpp/kll/test/CMakeLists.txt +1 -0
- data/vendor/datasketches-cpp/kll/test/kll_sketch_test.cpp +68 -45
- data/vendor/datasketches-cpp/kll/test/kolmogorov_smirnov_test.cpp +111 -0
- data/vendor/datasketches-cpp/python/src/hll_wrapper.cpp +4 -4
- data/vendor/datasketches-cpp/python/src/theta_wrapper.cpp +6 -6
- data/vendor/datasketches-cpp/python/src/vo_wrapper.cpp +2 -2
- data/vendor/datasketches-cpp/python/tests/hll_test.py +1 -1
- data/vendor/datasketches-cpp/python/tests/vo_test.py +3 -3
- data/vendor/datasketches-cpp/req/include/req_common.hpp +2 -1
- data/vendor/datasketches-cpp/req/include/req_compactor.hpp +4 -4
- data/vendor/datasketches-cpp/req/include/req_compactor_impl.hpp +26 -39
- data/vendor/datasketches-cpp/req/include/req_sketch.hpp +1 -1
- data/vendor/datasketches-cpp/req/include/req_sketch_impl.hpp +9 -9
- data/vendor/datasketches-cpp/req/test/req_sketch_test.cpp +52 -52
- data/vendor/datasketches-cpp/sampling/include/var_opt_sketch_impl.hpp +47 -56
- data/vendor/datasketches-cpp/sampling/include/var_opt_union_impl.hpp +34 -42
- data/vendor/datasketches-cpp/sampling/test/var_opt_sketch_test.cpp +6 -6
- data/vendor/datasketches-cpp/sampling/test/var_opt_union_test.cpp +13 -13
- data/vendor/datasketches-cpp/setup.py +1 -1
- data/vendor/datasketches-cpp/theta/include/bounds_on_ratios_in_sampled_sets.hpp +1 -1
- data/vendor/datasketches-cpp/theta/include/compact_theta_sketch_parser.hpp +67 -0
- data/vendor/datasketches-cpp/theta/include/compact_theta_sketch_parser_impl.hpp +70 -0
- data/vendor/datasketches-cpp/theta/include/theta_intersection.hpp +9 -4
- data/vendor/datasketches-cpp/theta/include/theta_intersection_impl.hpp +1 -1
- data/vendor/datasketches-cpp/theta/include/theta_jaccard_similarity_base.hpp +18 -14
- data/vendor/datasketches-cpp/theta/include/theta_sketch.hpp +42 -1
- data/vendor/datasketches-cpp/theta/include/theta_sketch_impl.hpp +107 -58
- data/vendor/datasketches-cpp/theta/include/theta_union.hpp +4 -4
- data/vendor/datasketches-cpp/theta/include/theta_union_base_impl.hpp +1 -1
- data/vendor/datasketches-cpp/theta/include/theta_union_impl.hpp +1 -1
- data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base.hpp +2 -0
- data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base_impl.hpp +33 -28
- data/vendor/datasketches-cpp/theta/test/theta_a_not_b_test.cpp +23 -1
- data/vendor/datasketches-cpp/theta/test/theta_intersection_test.cpp +21 -1
- data/vendor/datasketches-cpp/theta/test/theta_jaccard_similarity_test.cpp +58 -2
- data/vendor/datasketches-cpp/theta/test/theta_sketch_test.cpp +37 -1
- data/vendor/datasketches-cpp/theta/test/theta_union_test.cpp +22 -2
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_sketch_impl.hpp +47 -60
- data/vendor/datasketches-cpp/tuple/include/tuple_sketch_impl.hpp +51 -64
- data/vendor/datasketches-cpp/tuple/test/array_of_doubles_sketch_test.cpp +1 -1
- data/vendor/datasketches-cpp/tuple/test/tuple_a_not_b_test.cpp +17 -17
- data/vendor/datasketches-cpp/tuple/test/tuple_intersection_test.cpp +12 -12
- data/vendor/datasketches-cpp/tuple/test/tuple_jaccard_similarity_test.cpp +5 -5
- data/vendor/datasketches-cpp/tuple/test/tuple_sketch_allocation_test.cpp +1 -1
- data/vendor/datasketches-cpp/tuple/test/tuple_sketch_test.cpp +20 -20
- data/vendor/datasketches-cpp/tuple/test/tuple_union_test.cpp +12 -12
- metadata +8 -3
|
@@ -56,9 +56,9 @@ void init_hll(py::module &m) {
|
|
|
56
56
|
.export_values();
|
|
57
57
|
|
|
58
58
|
py::class_<hll_sketch>(m, "hll_sketch")
|
|
59
|
-
.def(py::init<
|
|
60
|
-
.def(py::init<
|
|
61
|
-
.def(py::init<
|
|
59
|
+
.def(py::init<uint8_t>(), py::arg("lg_k"))
|
|
60
|
+
.def(py::init<uint8_t, target_hll_type>(), py::arg("lg_k"), py::arg("tgt_type"))
|
|
61
|
+
.def(py::init<uint8_t, target_hll_type, bool>(), py::arg("lg_k"), py::arg("tgt_type"), py::arg("start_max_size")=false)
|
|
62
62
|
.def_static("deserialize", &dspy::hll_sketch_deserialize,
|
|
63
63
|
"Reads a bytes object and returns the corresponding hll_sketch")
|
|
64
64
|
.def("serialize_compact", &dspy::hll_sketch_serialize_compact,
|
|
@@ -104,7 +104,7 @@ void init_hll(py::module &m) {
|
|
|
104
104
|
;
|
|
105
105
|
|
|
106
106
|
py::class_<hll_union>(m, "hll_union")
|
|
107
|
-
.def(py::init<
|
|
107
|
+
.def(py::init<uint8_t>(), py::arg("lg_max_k"))
|
|
108
108
|
.def_property_readonly("lg_config_k", &hll_union::get_lg_config_k, "Configured lg_k value for the union")
|
|
109
109
|
.def_property_readonly("tgt_type", &hll_union::get_target_type, "Returns the HLL type (4, 6, or 8) when in estimation mode")
|
|
110
110
|
.def("get_estimate", &hll_union::get_estimate,
|
|
@@ -64,8 +64,8 @@ compact_theta_sketch compact_theta_sketch_deserialize(py::bytes skBytes, uint64_
|
|
|
64
64
|
return compact_theta_sketch::deserialize(skStr.c_str(), skStr.length(), seed);
|
|
65
65
|
}
|
|
66
66
|
|
|
67
|
-
py::list theta_jaccard_sim_computation(const theta_sketch& sketch_a, const theta_sketch& sketch_b) {
|
|
68
|
-
return py::cast(theta_jaccard_similarity::jaccard(sketch_a, sketch_b));
|
|
67
|
+
py::list theta_jaccard_sim_computation(const theta_sketch& sketch_a, const theta_sketch& sketch_b, uint64_t seed) {
|
|
68
|
+
return py::cast(theta_jaccard_similarity::jaccard(sketch_a, sketch_b, seed));
|
|
69
69
|
}
|
|
70
70
|
|
|
71
71
|
}
|
|
@@ -153,18 +153,18 @@ void init_theta(py::module &m) {
|
|
|
153
153
|
|
|
154
154
|
py::class_<theta_jaccard_similarity>(m, "theta_jaccard_similarity")
|
|
155
155
|
.def_static("jaccard", &dspy::theta_jaccard_sim_computation,
|
|
156
|
-
py::arg("sketch_a"), py::arg("sketch_b"),
|
|
156
|
+
py::arg("sketch_a"), py::arg("sketch_b"), py::arg("seed")=DEFAULT_SEED,
|
|
157
157
|
"Returns a list with {lower_bound, estimate, upper_bound} of the Jaccard similarity between sketches")
|
|
158
158
|
.def_static("exactly_equal", &theta_jaccard_similarity::exactly_equal<const theta_sketch&, const theta_sketch&>,
|
|
159
|
-
py::arg("sketch_a"), py::arg("sketch_b"),
|
|
159
|
+
py::arg("sketch_a"), py::arg("sketch_b"), py::arg("seed")=DEFAULT_SEED,
|
|
160
160
|
"Returns True if sketch_a and sketch_b are equivalent, otherwise False")
|
|
161
161
|
.def_static("similarity_test", &theta_jaccard_similarity::similarity_test<const theta_sketch&, const theta_sketch&>,
|
|
162
|
-
py::arg("actual"), py::arg("expected"), py::arg("threshold"),
|
|
162
|
+
py::arg("actual"), py::arg("expected"), py::arg("threshold"), py::arg("seed")=DEFAULT_SEED,
|
|
163
163
|
"Tests similarity of an actual sketch against an expected sketch. Computers the lower bound of the Jaccard "
|
|
164
164
|
"index J_{LB} of the actual and expected sketches. If J_{LB} >= threshold, then the sketches are considered "
|
|
165
165
|
"to be similar sith a confidence of 97.7% and returns True, otherwise False.")
|
|
166
166
|
.def_static("dissimilarity_test", &theta_jaccard_similarity::dissimilarity_test<const theta_sketch&, const theta_sketch&>,
|
|
167
|
-
py::arg("actual"), py::arg("expected"), py::arg("threshold"),
|
|
167
|
+
py::arg("actual"), py::arg("expected"), py::arg("threshold"), py::arg("seed")=DEFAULT_SEED,
|
|
168
168
|
"Tests dissimilarity of an actual sketch against an expected sketch. Computers the lower bound of the Jaccard "
|
|
169
169
|
"index J_{UB} of the actual and expected sketches. If J_{UB} <= threshold, then the sketches are considered "
|
|
170
170
|
"to be dissimilar sith a confidence of 97.7% and returns True, otherwise False.")
|
|
@@ -32,7 +32,7 @@ namespace python {
|
|
|
32
32
|
template<typename T>
|
|
33
33
|
py::list vo_sketch_get_samples(const var_opt_sketch<T>& sk) {
|
|
34
34
|
py::list list;
|
|
35
|
-
for (auto
|
|
35
|
+
for (auto item : sk) {
|
|
36
36
|
py::tuple t = py::make_tuple(item.first, item.second);
|
|
37
37
|
list.append(t);
|
|
38
38
|
}
|
|
@@ -57,7 +57,7 @@ std::string vo_sketch_to_string(const var_opt_sketch<T>& sk, bool print_items) {
|
|
|
57
57
|
ss << sk.to_string();
|
|
58
58
|
ss << "### VarOpt Sketch Items" << std::endl;
|
|
59
59
|
int i = 0;
|
|
60
|
-
for (auto
|
|
60
|
+
for (auto item : sk) {
|
|
61
61
|
// item.second is always a double
|
|
62
62
|
// item.first is an arbitrary py::object, so get the value by
|
|
63
63
|
// using internal str() method then casting to C++ std::string
|
|
@@ -58,7 +58,7 @@ class HllTest(unittest.TestCase):
|
|
|
58
58
|
self.assertEqual(len(sk_bytes), result.get_compact_serialization_bytes())
|
|
59
59
|
new_hll = hll_sketch.deserialize(sk_bytes)
|
|
60
60
|
|
|
61
|
-
# the sketch can self-report its
|
|
61
|
+
# the sketch can self-report its configuration and status
|
|
62
62
|
self.assertEqual(new_hll.lg_config_k, k)
|
|
63
63
|
self.assertEqual(new_hll.tgt_type, tgt_hll_type.HLL_4)
|
|
64
64
|
self.assertFalse(new_hll.is_empty())
|
|
@@ -46,7 +46,7 @@ class VoTest(unittest.TestCase):
|
|
|
46
46
|
self.assertEqual(len(items), k)
|
|
47
47
|
|
|
48
48
|
# we can also apply a predicate to the sketch to get an estimate
|
|
49
|
-
# (with
|
|
49
|
+
# (with optimally minimal variance) of the subset sum of items
|
|
50
50
|
# matching that predicate among the entire population
|
|
51
51
|
|
|
52
52
|
# we'll use a lambda here, but any function operating on a single
|
|
@@ -89,11 +89,11 @@ class VoTest(unittest.TestCase):
|
|
|
89
89
|
# the union and a sketch.
|
|
90
90
|
print(union)
|
|
91
91
|
|
|
92
|
-
# if we want to print the list of
|
|
92
|
+
# if we want to print the list of items, there must be a
|
|
93
93
|
# __str__() method for each item (which need not be the same
|
|
94
94
|
# type; they're all generic python objects when used from
|
|
95
95
|
# python), otherwise you may trigger an exception.
|
|
96
|
-
# to_string() is provided as a
|
|
96
|
+
# to_string() is provided as a convenience to avoid direct
|
|
97
97
|
# calls to __str__() with parameters.
|
|
98
98
|
print(result.to_string(True))
|
|
99
99
|
|
|
@@ -29,7 +29,8 @@
|
|
|
29
29
|
namespace datasketches {
|
|
30
30
|
|
|
31
31
|
// TODO: have a common random bit with KLL
|
|
32
|
-
static std::independent_bits_engine<std::mt19937, 1, unsigned>
|
|
32
|
+
static std::independent_bits_engine<std::mt19937, 1, unsigned>
|
|
33
|
+
req_random_bit(static_cast<unsigned>(std::chrono::system_clock::now().time_since_epoch().count()));
|
|
33
34
|
|
|
34
35
|
namespace req_constants {
|
|
35
36
|
static const uint16_t MIN_K = 4;
|
|
@@ -110,8 +110,8 @@ private:
|
|
|
110
110
|
|
|
111
111
|
bool ensure_enough_sections();
|
|
112
112
|
std::pair<uint32_t, uint32_t> compute_compaction_range(uint32_t secs_to_compact) const;
|
|
113
|
-
void grow(
|
|
114
|
-
void ensure_space(
|
|
113
|
+
void grow(uint32_t new_capacity);
|
|
114
|
+
void ensure_space(uint32_t num);
|
|
115
115
|
|
|
116
116
|
static uint32_t nearest_even(float value);
|
|
117
117
|
|
|
@@ -123,10 +123,10 @@ private:
|
|
|
123
123
|
req_compactor(bool hra, uint8_t lg_weight, bool sorted, float section_size_raw, uint8_t num_sections, uint64_t state, std::unique_ptr<T, items_deleter> items, uint32_t num_items, const Allocator& allocator);
|
|
124
124
|
|
|
125
125
|
template<typename S>
|
|
126
|
-
static std::unique_ptr<T, items_deleter> deserialize_items(std::istream& is, const S& serde, const Allocator& allocator,
|
|
126
|
+
static std::unique_ptr<T, items_deleter> deserialize_items(std::istream& is, const S& serde, const Allocator& allocator, uint32_t num);
|
|
127
127
|
|
|
128
128
|
template<typename S>
|
|
129
|
-
static std::pair<std::unique_ptr<T, items_deleter>, size_t> deserialize_items(const void* bytes, size_t size, const S& serde, const Allocator& allocator,
|
|
129
|
+
static std::pair<std::unique_ptr<T, items_deleter>, size_t> deserialize_items(const void* bytes, size_t size, const S& serde, const Allocator& allocator, uint32_t num);
|
|
130
130
|
|
|
131
131
|
};
|
|
132
132
|
|
|
@@ -38,7 +38,7 @@ lg_weight_(lg_weight),
|
|
|
38
38
|
hra_(hra),
|
|
39
39
|
coin_(false),
|
|
40
40
|
sorted_(sorted),
|
|
41
|
-
section_size_raw_(section_size),
|
|
41
|
+
section_size_raw_(static_cast<float>(section_size)),
|
|
42
42
|
section_size_(section_size),
|
|
43
43
|
num_sections_(req_constants::INIT_NUM_SECTIONS),
|
|
44
44
|
state_(0),
|
|
@@ -72,9 +72,9 @@ items_(nullptr)
|
|
|
72
72
|
{
|
|
73
73
|
if (other.items_ != nullptr) {
|
|
74
74
|
items_ = allocator_.allocate(capacity_);
|
|
75
|
-
const
|
|
76
|
-
const
|
|
77
|
-
for (
|
|
75
|
+
const uint32_t from = hra_ ? capacity_ - num_items_ : 0;
|
|
76
|
+
const uint32_t to = hra_ ? capacity_ : num_items_;
|
|
77
|
+
for (uint32_t i = from; i < to; ++i) new (items_ + i) T(other.items_[i]);
|
|
78
78
|
}
|
|
79
79
|
}
|
|
80
80
|
|
|
@@ -165,16 +165,16 @@ template<typename T, typename C, typename A>
|
|
|
165
165
|
template<typename FwdT>
|
|
166
166
|
void req_compactor<T, C, A>::append(FwdT&& item) {
|
|
167
167
|
if (num_items_ == capacity_) grow(capacity_ + get_nom_capacity());
|
|
168
|
-
const
|
|
168
|
+
const uint32_t i = hra_ ? capacity_ - num_items_ - 1 : num_items_;
|
|
169
169
|
new (items_ + i) T(std::forward<FwdT>(item));
|
|
170
170
|
++num_items_;
|
|
171
171
|
if (num_items_ > 1) sorted_ = false;
|
|
172
172
|
}
|
|
173
173
|
|
|
174
174
|
template<typename T, typename C, typename A>
|
|
175
|
-
void req_compactor<T, C, A>::grow(
|
|
175
|
+
void req_compactor<T, C, A>::grow(uint32_t new_capacity) {
|
|
176
176
|
T* new_items = allocator_.allocate(new_capacity);
|
|
177
|
-
|
|
177
|
+
uint32_t new_i = hra_ ? new_capacity - num_items_ : 0;
|
|
178
178
|
for (auto it = begin(); it != end(); ++it, ++new_i) {
|
|
179
179
|
new (new_items + new_i) T(std::move(*it));
|
|
180
180
|
(*it).~T();
|
|
@@ -185,7 +185,7 @@ void req_compactor<T, C, A>::grow(size_t new_capacity) {
|
|
|
185
185
|
}
|
|
186
186
|
|
|
187
187
|
template<typename T, typename C, typename A>
|
|
188
|
-
void req_compactor<T, C, A>::ensure_space(
|
|
188
|
+
void req_compactor<T, C, A>::ensure_space(uint32_t num) {
|
|
189
189
|
if (num_items_ + num > capacity_) grow(num_items_ + num + get_nom_capacity());
|
|
190
190
|
}
|
|
191
191
|
|
|
@@ -218,13 +218,13 @@ void req_compactor<T, C, A>::merge(FwdC&& other) {
|
|
|
218
218
|
while (ensure_enough_sections()) {}
|
|
219
219
|
ensure_space(other.get_num_items());
|
|
220
220
|
sort();
|
|
221
|
-
auto
|
|
221
|
+
auto offset = hra_ ? capacity_ - num_items_ : num_items_;
|
|
222
222
|
auto from = hra_ ? begin() - other.get_num_items() : end();
|
|
223
223
|
auto to = from + other.get_num_items();
|
|
224
224
|
auto other_it = other.begin();
|
|
225
225
|
for (auto it = from; it != to; ++it, ++other_it) new (it) T(conditional_forward<FwdC>(*other_it));
|
|
226
226
|
if (!other.sorted_) std::sort(from, to, C());
|
|
227
|
-
if (num_items_ > 0) std::inplace_merge(hra_ ? from : begin(),
|
|
227
|
+
if (num_items_ > 0) std::inplace_merge(hra_ ? from : begin(), items_ + offset, hra_ ? end() : to, C());
|
|
228
228
|
num_items_ += other.get_num_items();
|
|
229
229
|
}
|
|
230
230
|
|
|
@@ -240,7 +240,7 @@ template<typename T, typename C, typename A>
|
|
|
240
240
|
std::pair<uint32_t, uint32_t> req_compactor<T, C, A>::compact(req_compactor& next) {
|
|
241
241
|
const uint32_t starting_nom_capacity = get_nom_capacity();
|
|
242
242
|
// choose a part of the buffer to compact
|
|
243
|
-
const uint32_t secs_to_compact = std::min
|
|
243
|
+
const uint32_t secs_to_compact = std::min<uint32_t>(count_trailing_zeros_in_u64(~state_) + 1, num_sections_);
|
|
244
244
|
auto compaction_range = compute_compaction_range(secs_to_compact);
|
|
245
245
|
if (compaction_range.second - compaction_range.first < 2) throw std::logic_error("compaction range error");
|
|
246
246
|
|
|
@@ -267,9 +267,9 @@ std::pair<uint32_t, uint32_t> req_compactor<T, C, A>::compact(req_compactor& nex
|
|
|
267
267
|
|
|
268
268
|
template<typename T, typename C, typename A>
|
|
269
269
|
bool req_compactor<T, C, A>::ensure_enough_sections() {
|
|
270
|
-
const float ssr = section_size_raw_ /
|
|
270
|
+
const float ssr = section_size_raw_ / sqrtf(2);
|
|
271
271
|
const uint32_t ne = nearest_even(ssr);
|
|
272
|
-
if (state_ >= static_cast<uint64_t>(
|
|
272
|
+
if (state_ >= static_cast<uint64_t>(1ULL << (num_sections_ - 1)) && ne >= req_constants::MIN_K) {
|
|
273
273
|
section_size_raw_ = ssr;
|
|
274
274
|
section_size_ = ne;
|
|
275
275
|
num_sections_ <<= 1;
|
|
@@ -284,8 +284,8 @@ std::pair<uint32_t, uint32_t> req_compactor<T, C, A>::compute_compaction_range(u
|
|
|
284
284
|
uint32_t non_compact = get_nom_capacity() / 2 + (num_sections_ - secs_to_compact) * section_size_;
|
|
285
285
|
// make compacted region even
|
|
286
286
|
if (((num_items_ - non_compact) & 1) == 1) ++non_compact;
|
|
287
|
-
const
|
|
288
|
-
const
|
|
287
|
+
const uint32_t low = hra_ ? 0 : non_compact;
|
|
288
|
+
const uint32_t high = hra_ ? num_items_ - non_compact : num_items_;
|
|
289
289
|
return std::pair<uint32_t, uint32_t>(low, high);
|
|
290
290
|
}
|
|
291
291
|
|
|
@@ -309,19 +309,6 @@ void req_compactor<T, C, A>::promote_evens_or_odds(InIter from, InIter to, bool
|
|
|
309
309
|
}
|
|
310
310
|
}
|
|
311
311
|
|
|
312
|
-
// helpers for integral types
|
|
313
|
-
template<typename T>
|
|
314
|
-
static inline T read(std::istream& is) {
|
|
315
|
-
T value;
|
|
316
|
-
is.read(reinterpret_cast<char*>(&value), sizeof(T));
|
|
317
|
-
return value;
|
|
318
|
-
}
|
|
319
|
-
|
|
320
|
-
template<typename T>
|
|
321
|
-
static inline void write(std::ostream& os, T value) {
|
|
322
|
-
os.write(reinterpret_cast<const char*>(&value), sizeof(T));
|
|
323
|
-
}
|
|
324
|
-
|
|
325
312
|
// implementation for fixed-size arithmetic types (integral and floating point)
|
|
326
313
|
template<typename T, typename C, typename A>
|
|
327
314
|
template<typename S, typename TT, typename std::enable_if<std::is_arithmetic<TT>::value, int>::type>
|
|
@@ -394,7 +381,7 @@ req_compactor<T, C, A> req_compactor<T, C, A>::deserialize(std::istream& is, con
|
|
|
394
381
|
|
|
395
382
|
template<typename T, typename C, typename A>
|
|
396
383
|
template<typename S>
|
|
397
|
-
auto req_compactor<T, C, A>::deserialize_items(std::istream& is, const S& serde, const A& allocator,
|
|
384
|
+
auto req_compactor<T, C, A>::deserialize_items(std::istream& is, const S& serde, const A& allocator, uint32_t num)
|
|
398
385
|
-> std::unique_ptr<T, items_deleter> {
|
|
399
386
|
A alloc(allocator);
|
|
400
387
|
std::unique_ptr<T, items_deleter> items(alloc.allocate(num), items_deleter(allocator, false, num));
|
|
@@ -402,7 +389,7 @@ auto req_compactor<T, C, A>::deserialize_items(std::istream& is, const S& serde,
|
|
|
402
389
|
// serde did not throw, enable destructors
|
|
403
390
|
items.get_deleter().set_destroy(true);
|
|
404
391
|
if (!is.good()) throw std::runtime_error("error reading from std::istream");
|
|
405
|
-
return
|
|
392
|
+
return items;
|
|
406
393
|
}
|
|
407
394
|
|
|
408
395
|
template<typename T, typename C, typename A>
|
|
@@ -443,7 +430,7 @@ std::pair<req_compactor<T, C, A>, size_t> req_compactor<T, C, A>::deserialize(co
|
|
|
443
430
|
|
|
444
431
|
template<typename T, typename C, typename A>
|
|
445
432
|
template<typename S>
|
|
446
|
-
auto req_compactor<T, C, A>::deserialize_items(const void* bytes, size_t size, const S& serde, const A& allocator,
|
|
433
|
+
auto req_compactor<T, C, A>::deserialize_items(const void* bytes, size_t size, const S& serde, const A& allocator, uint32_t num)
|
|
447
434
|
-> std::pair<std::unique_ptr<T, items_deleter>, size_t> {
|
|
448
435
|
const char* ptr = static_cast<const char*>(bytes);
|
|
449
436
|
const char* end_ptr = static_cast<const char*>(bytes) + size;
|
|
@@ -478,22 +465,22 @@ items_(items.release())
|
|
|
478
465
|
template<typename T, typename C, typename A>
|
|
479
466
|
class req_compactor<T, C, A>::items_deleter {
|
|
480
467
|
public:
|
|
481
|
-
items_deleter(const A& allocator, bool destroy,
|
|
468
|
+
items_deleter(const A& allocator, bool destroy, size_t num): allocator_(allocator), destroy_(destroy), num_(num) {}
|
|
482
469
|
void operator() (T* ptr) {
|
|
483
470
|
if (ptr != nullptr) {
|
|
484
|
-
if (
|
|
485
|
-
for (
|
|
471
|
+
if (destroy_) {
|
|
472
|
+
for (size_t i = 0; i < num_; ++i) {
|
|
486
473
|
ptr[i].~T();
|
|
487
474
|
}
|
|
488
475
|
}
|
|
489
|
-
|
|
476
|
+
allocator_.deallocate(ptr, num_);
|
|
490
477
|
}
|
|
491
478
|
}
|
|
492
|
-
void set_destroy(bool destroy) {
|
|
479
|
+
void set_destroy(bool destroy) { destroy_ = destroy; }
|
|
493
480
|
private:
|
|
494
|
-
A
|
|
495
|
-
bool
|
|
496
|
-
|
|
481
|
+
A allocator_;
|
|
482
|
+
bool destroy_;
|
|
483
|
+
size_t num_;
|
|
497
484
|
};
|
|
498
485
|
|
|
499
486
|
} /* namespace datasketches */
|
|
@@ -319,7 +319,7 @@ private:
|
|
|
319
319
|
|
|
320
320
|
// for deserialization
|
|
321
321
|
class item_deleter;
|
|
322
|
-
req_sketch(
|
|
322
|
+
req_sketch(uint16_t k, bool hra, uint64_t n, std::unique_ptr<T, item_deleter> min_value, std::unique_ptr<T, item_deleter> max_value, std::vector<Compactor, AllocCompactor>&& compactors);
|
|
323
323
|
|
|
324
324
|
static void check_preamble_ints(uint8_t preamble_ints, uint8_t num_levels);
|
|
325
325
|
static void check_serial_version(uint8_t serial_version);
|
|
@@ -28,7 +28,7 @@ namespace datasketches {
|
|
|
28
28
|
template<typename T, typename C, typename S, typename A>
|
|
29
29
|
req_sketch<T, C, S, A>::req_sketch(uint16_t k, bool hra, const A& allocator):
|
|
30
30
|
allocator_(allocator),
|
|
31
|
-
k_(std::max(static_cast<int>(k) & -2, static_cast<int>(req_constants::MIN_K))), //rounds down one if odd
|
|
31
|
+
k_(std::max<uint8_t>(static_cast<int>(k) & -2, static_cast<int>(req_constants::MIN_K))), //rounds down one if odd
|
|
32
32
|
hra_(hra),
|
|
33
33
|
max_nom_size_(0),
|
|
34
34
|
num_retained_(0),
|
|
@@ -401,7 +401,7 @@ void req_sketch<T, C, S, A>::serialize(std::ostream& os) const {
|
|
|
401
401
|
write(os, k_);
|
|
402
402
|
const uint8_t num_levels = is_empty() ? 0 : get_num_levels();
|
|
403
403
|
write(os, num_levels);
|
|
404
|
-
const uint8_t num_raw_items = raw_items ? n_ : 0;
|
|
404
|
+
const uint8_t num_raw_items = raw_items ? static_cast<uint8_t>(n_) : 0;
|
|
405
405
|
write(os, num_raw_items);
|
|
406
406
|
if (is_empty()) return;
|
|
407
407
|
if (is_estimation_mode()) {
|
|
@@ -440,7 +440,7 @@ auto req_sketch<T, C, S, A>::serialize(unsigned header_size_bytes) const -> vect
|
|
|
440
440
|
ptr += copy_to_mem(k_, ptr);
|
|
441
441
|
const uint8_t num_levels = is_empty() ? 0 : get_num_levels();
|
|
442
442
|
ptr += copy_to_mem(num_levels, ptr);
|
|
443
|
-
const uint8_t num_raw_items = raw_items ? n_ : 0;
|
|
443
|
+
const uint8_t num_raw_items = raw_items ? static_cast<uint8_t>(n_) : 0;
|
|
444
444
|
ptr += copy_to_mem(num_raw_items, ptr);
|
|
445
445
|
if (!is_empty()) {
|
|
446
446
|
if (is_estimation_mode()) {
|
|
@@ -620,7 +620,7 @@ void req_sketch<T, C, S, A>::grow() {
|
|
|
620
620
|
|
|
621
621
|
template<typename T, typename C, typename S, typename A>
|
|
622
622
|
uint8_t req_sketch<T, C, S, A>::get_num_levels() const {
|
|
623
|
-
return compactors_.size();
|
|
623
|
+
return static_cast<uint8_t>(compactors_.size());
|
|
624
624
|
}
|
|
625
625
|
|
|
626
626
|
template<typename T, typename C, typename S, typename A>
|
|
@@ -711,7 +711,7 @@ class req_sketch<T, C, S, A>::item_deleter {
|
|
|
711
711
|
};
|
|
712
712
|
|
|
713
713
|
template<typename T, typename C, typename S, typename A>
|
|
714
|
-
req_sketch<T, C, S, A>::req_sketch(
|
|
714
|
+
req_sketch<T, C, S, A>::req_sketch(uint16_t k, bool hra, uint64_t n, std::unique_ptr<T, item_deleter> min_value, std::unique_ptr<T, item_deleter> max_value, std::vector<Compactor, AllocCompactor>&& compactors):
|
|
715
715
|
allocator_(compactors.get_allocator()),
|
|
716
716
|
k_(k),
|
|
717
717
|
hra_(hra),
|
|
@@ -766,9 +766,9 @@ auto req_sketch<T, C, S, A>::end() const -> const_iterator {
|
|
|
766
766
|
|
|
767
767
|
template<typename T, typename C, typename S, typename A>
|
|
768
768
|
req_sketch<T, C, S, A>::const_iterator::const_iterator(LevelsIterator begin, LevelsIterator end):
|
|
769
|
-
levels_it_(begin),
|
|
770
|
-
levels_end_(end),
|
|
771
|
-
compactor_it_((*levels_it_).begin())
|
|
769
|
+
levels_it_(begin),
|
|
770
|
+
levels_end_(end),
|
|
771
|
+
compactor_it_(begin == end ? nullptr : (*levels_it_).begin())
|
|
772
772
|
{}
|
|
773
773
|
|
|
774
774
|
template<typename T, typename C, typename S, typename A>
|
|
@@ -802,7 +802,7 @@ bool req_sketch<T, C, S, A>::const_iterator::operator!=(const const_iterator& ot
|
|
|
802
802
|
|
|
803
803
|
template<typename T, typename C, typename S, typename A>
|
|
804
804
|
std::pair<const T&, const uint64_t> req_sketch<T, C, S, A>::const_iterator::operator*() const {
|
|
805
|
-
return std::pair<const T&, const uint64_t>(*compactor_it_,
|
|
805
|
+
return std::pair<const T&, const uint64_t>(*compactor_it_, 1ULL << (*levels_it_).get_lg_weight());
|
|
806
806
|
}
|
|
807
807
|
|
|
808
808
|
} /* namespace datasketches */
|
|
@@ -55,15 +55,15 @@ TEST_CASE("req sketch: empty", "[req_sketch]") {
|
|
|
55
55
|
|
|
56
56
|
TEST_CASE("req sketch: single value, lra", "[req_sketch]") {
|
|
57
57
|
req_sketch<float> sketch(12, false);
|
|
58
|
-
sketch.update(1);
|
|
58
|
+
sketch.update(1.0f);
|
|
59
59
|
REQUIRE_FALSE(sketch.is_HRA());
|
|
60
60
|
REQUIRE_FALSE(sketch.is_empty());
|
|
61
61
|
REQUIRE_FALSE(sketch.is_estimation_mode());
|
|
62
62
|
REQUIRE(sketch.get_n() == 1);
|
|
63
63
|
REQUIRE(sketch.get_num_retained() == 1);
|
|
64
|
-
REQUIRE(sketch.get_rank(1) == 0);
|
|
65
|
-
REQUIRE(sketch.get_rank<true>(1) == 1);
|
|
66
|
-
REQUIRE(sketch.get_rank(1.
|
|
64
|
+
REQUIRE(sketch.get_rank(1.0f) == 0);
|
|
65
|
+
REQUIRE(sketch.get_rank<true>(1.0f) == 1);
|
|
66
|
+
REQUIRE(sketch.get_rank(1.1f) == 1);
|
|
67
67
|
REQUIRE(sketch.get_rank(std::numeric_limits<float>::infinity()) == 1);
|
|
68
68
|
REQUIRE(sketch.get_quantile(0) == 1);
|
|
69
69
|
REQUIRE(sketch.get_quantile(0.5) == 1);
|
|
@@ -86,43 +86,43 @@ TEST_CASE("req sketch: single value, lra", "[req_sketch]") {
|
|
|
86
86
|
|
|
87
87
|
TEST_CASE("req sketch: repeated values", "[req_sketch]") {
|
|
88
88
|
req_sketch<float> sketch(12);
|
|
89
|
-
sketch.update(1);
|
|
90
|
-
sketch.update(1);
|
|
91
|
-
sketch.update(1);
|
|
92
|
-
sketch.update(2);
|
|
93
|
-
sketch.update(2);
|
|
94
|
-
sketch.update(2);
|
|
89
|
+
sketch.update(1.0f);
|
|
90
|
+
sketch.update(1.0f);
|
|
91
|
+
sketch.update(1.0f);
|
|
92
|
+
sketch.update(2.0f);
|
|
93
|
+
sketch.update(2.0f);
|
|
94
|
+
sketch.update(2.0f);
|
|
95
95
|
REQUIRE_FALSE(sketch.is_empty());
|
|
96
96
|
REQUIRE_FALSE(sketch.is_estimation_mode());
|
|
97
97
|
REQUIRE(sketch.get_n() == 6);
|
|
98
98
|
REQUIRE(sketch.get_num_retained() == 6);
|
|
99
|
-
REQUIRE(sketch.get_rank(1) == 0);
|
|
100
|
-
REQUIRE(sketch.get_rank<true>(1) == 0.5);
|
|
101
|
-
REQUIRE(sketch.get_rank(2) == 0.5);
|
|
102
|
-
REQUIRE(sketch.get_rank<true>(2) == 1);
|
|
99
|
+
REQUIRE(sketch.get_rank(1.0f) == 0);
|
|
100
|
+
REQUIRE(sketch.get_rank<true>(1.0f) == 0.5);
|
|
101
|
+
REQUIRE(sketch.get_rank(2.0f) == 0.5);
|
|
102
|
+
REQUIRE(sketch.get_rank<true>(2.0f) == 1);
|
|
103
103
|
}
|
|
104
104
|
|
|
105
105
|
TEST_CASE("req sketch: exact mode", "[req_sketch]") {
|
|
106
106
|
req_sketch<float> sketch(12);
|
|
107
|
-
for (size_t i = 1; i <= 10; ++i) sketch.update(i);
|
|
107
|
+
for (size_t i = 1; i <= 10; ++i) sketch.update(static_cast<float>(i));
|
|
108
108
|
REQUIRE_FALSE(sketch.is_empty());
|
|
109
109
|
REQUIRE_FALSE(sketch.is_estimation_mode());
|
|
110
110
|
REQUIRE(sketch.get_n() == 10);
|
|
111
111
|
REQUIRE(sketch.get_num_retained() == 10);
|
|
112
112
|
|
|
113
113
|
// like KLL
|
|
114
|
-
REQUIRE(sketch.get_rank(1) == 0);
|
|
115
|
-
REQUIRE(sketch.get_rank(2) == 0.1);
|
|
116
|
-
REQUIRE(sketch.get_rank(6) == 0.5);
|
|
117
|
-
REQUIRE(sketch.get_rank(9) == 0.8);
|
|
118
|
-
REQUIRE(sketch.get_rank(10) == 0.9);
|
|
114
|
+
REQUIRE(sketch.get_rank(1.0f) == 0);
|
|
115
|
+
REQUIRE(sketch.get_rank(2.0f) == 0.1);
|
|
116
|
+
REQUIRE(sketch.get_rank(6.0f) == 0.5);
|
|
117
|
+
REQUIRE(sketch.get_rank(9.0f) == 0.8);
|
|
118
|
+
REQUIRE(sketch.get_rank(10.0f) == 0.9);
|
|
119
119
|
|
|
120
120
|
// inclusive
|
|
121
|
-
REQUIRE(sketch.get_rank<true>(1) == 0.1);
|
|
122
|
-
REQUIRE(sketch.get_rank<true>(2) == 0.2);
|
|
123
|
-
REQUIRE(sketch.get_rank<true>(5) == 0.5);
|
|
124
|
-
REQUIRE(sketch.get_rank<true>(9) == 0.9);
|
|
125
|
-
REQUIRE(sketch.get_rank<true>(10) == 1);
|
|
121
|
+
REQUIRE(sketch.get_rank<true>(1.0f) == 0.1);
|
|
122
|
+
REQUIRE(sketch.get_rank<true>(2.0f) == 0.2);
|
|
123
|
+
REQUIRE(sketch.get_rank<true>(5.0f) == 0.5);
|
|
124
|
+
REQUIRE(sketch.get_rank<true>(9.0f) == 0.9);
|
|
125
|
+
REQUIRE(sketch.get_rank<true>(10.0f) == 1);
|
|
126
126
|
|
|
127
127
|
// like KLL
|
|
128
128
|
REQUIRE(sketch.get_quantile(0) == 1);
|
|
@@ -164,16 +164,16 @@ TEST_CASE("req sketch: exact mode", "[req_sketch]") {
|
|
|
164
164
|
TEST_CASE("req sketch: estimation mode", "[req_sketch]") {
|
|
165
165
|
req_sketch<float> sketch(12);
|
|
166
166
|
const size_t n = 100000;
|
|
167
|
-
for (size_t i = 0; i < n; ++i) sketch.update(i);
|
|
167
|
+
for (size_t i = 0; i < n; ++i) sketch.update(static_cast<float>(i));
|
|
168
168
|
REQUIRE_FALSE(sketch.is_empty());
|
|
169
169
|
REQUIRE(sketch.is_estimation_mode());
|
|
170
170
|
REQUIRE(sketch.get_n() == n);
|
|
171
171
|
// std::cout << sketch.to_string(true);
|
|
172
172
|
REQUIRE(sketch.get_num_retained() < n);
|
|
173
173
|
REQUIRE(sketch.get_rank(0) == 0);
|
|
174
|
-
REQUIRE(sketch.get_rank(n) == 1);
|
|
175
|
-
REQUIRE(sketch.get_rank(n / 2) == Approx(0.5).margin(0.01));
|
|
176
|
-
REQUIRE(sketch.get_rank(n - 1) == Approx(1).margin(0.01));
|
|
174
|
+
REQUIRE(sketch.get_rank(static_cast<float>(n)) == 1);
|
|
175
|
+
REQUIRE(sketch.get_rank(n / 2.0f) == Approx(0.5).margin(0.01));
|
|
176
|
+
REQUIRE(sketch.get_rank(n - 1.0f) == Approx(1).margin(0.01));
|
|
177
177
|
REQUIRE(sketch.get_min_value() == 0);
|
|
178
178
|
REQUIRE(sketch.get_max_value() == n - 1);
|
|
179
179
|
REQUIRE(sketch.get_rank_lower_bound(0.5, 1) < 0.5);
|
|
@@ -219,7 +219,7 @@ TEST_CASE("req sketch: byte serialize-deserialize empty", "[req_sketch]") {
|
|
|
219
219
|
|
|
220
220
|
TEST_CASE("req sketch: stream serialize-deserialize single item", "[req_sketch]") {
|
|
221
221
|
req_sketch<float> sketch(12);
|
|
222
|
-
sketch.update(1);
|
|
222
|
+
sketch.update(1.0f);
|
|
223
223
|
|
|
224
224
|
std::stringstream s(std::ios::in | std::ios::out | std::ios::binary);
|
|
225
225
|
sketch.serialize(s);
|
|
@@ -235,7 +235,7 @@ TEST_CASE("req sketch: stream serialize-deserialize single item", "[req_sketch]"
|
|
|
235
235
|
|
|
236
236
|
TEST_CASE("req sketch: byte serialize-deserialize single item", "[req_sketch]") {
|
|
237
237
|
req_sketch<float> sketch(12);
|
|
238
|
-
sketch.update(1);
|
|
238
|
+
sketch.update(1.0f);
|
|
239
239
|
|
|
240
240
|
auto bytes = sketch.serialize();
|
|
241
241
|
REQUIRE(bytes.size() == sketch.get_serialized_size_bytes());
|
|
@@ -253,7 +253,7 @@ TEST_CASE("req sketch: byte serialize-deserialize single item", "[req_sketch]")
|
|
|
253
253
|
TEST_CASE("req sketch: stream serialize-deserialize exact mode", "[req_sketch]") {
|
|
254
254
|
req_sketch<float> sketch(12);
|
|
255
255
|
const size_t n = 50;
|
|
256
|
-
for (size_t i = 0; i < n; ++i) sketch.update(i);
|
|
256
|
+
for (size_t i = 0; i < n; ++i) sketch.update(static_cast<float>(i));
|
|
257
257
|
REQUIRE_FALSE(sketch.is_estimation_mode());
|
|
258
258
|
|
|
259
259
|
std::stringstream s(std::ios::in | std::ios::out | std::ios::binary);
|
|
@@ -271,7 +271,7 @@ TEST_CASE("req sketch: stream serialize-deserialize exact mode", "[req_sketch]")
|
|
|
271
271
|
TEST_CASE("req sketch: byte serialize-deserialize exact mode", "[req_sketch]") {
|
|
272
272
|
req_sketch<float> sketch(12);
|
|
273
273
|
const size_t n = 50;
|
|
274
|
-
for (size_t i = 0; i < n; ++i) sketch.update(i);
|
|
274
|
+
for (size_t i = 0; i < n; ++i) sketch.update(static_cast<float>(i));
|
|
275
275
|
REQUIRE_FALSE(sketch.is_estimation_mode());
|
|
276
276
|
|
|
277
277
|
auto bytes = sketch.serialize();
|
|
@@ -290,7 +290,7 @@ TEST_CASE("req sketch: byte serialize-deserialize exact mode", "[req_sketch]") {
|
|
|
290
290
|
TEST_CASE("req sketch: stream serialize-deserialize estimation mode", "[req_sketch]") {
|
|
291
291
|
req_sketch<float> sketch(12);
|
|
292
292
|
const size_t n = 100000;
|
|
293
|
-
for (size_t i = 0; i < n; ++i) sketch.update(i);
|
|
293
|
+
for (size_t i = 0; i < n; ++i) sketch.update(static_cast<float>(i));
|
|
294
294
|
REQUIRE(sketch.is_estimation_mode());
|
|
295
295
|
|
|
296
296
|
std::stringstream s(std::ios::in | std::ios::out | std::ios::binary);
|
|
@@ -308,7 +308,7 @@ TEST_CASE("req sketch: stream serialize-deserialize estimation mode", "[req_sket
|
|
|
308
308
|
TEST_CASE("req sketch: byte serialize-deserialize estimation mode", "[req_sketch]") {
|
|
309
309
|
req_sketch<float> sketch(12);
|
|
310
310
|
const size_t n = 100000;
|
|
311
|
-
for (size_t i = 0; i < n; ++i) sketch.update(i);
|
|
311
|
+
for (size_t i = 0; i < n; ++i) sketch.update(static_cast<float>(i));
|
|
312
312
|
REQUIRE(sketch.is_estimation_mode());
|
|
313
313
|
|
|
314
314
|
auto bytes = sketch.serialize();
|
|
@@ -326,7 +326,7 @@ TEST_CASE("req sketch: byte serialize-deserialize estimation mode", "[req_sketch
|
|
|
326
326
|
TEST_CASE("req sketch: serialize deserialize stream and bytes equivalence", "[req_sketch]") {
|
|
327
327
|
req_sketch<float> sketch(12);
|
|
328
328
|
const size_t n = 100000;
|
|
329
|
-
for (size_t i = 0; i < n; ++i) sketch.update(i);
|
|
329
|
+
for (size_t i = 0; i < n; ++i) sketch.update(static_cast<float>(i));
|
|
330
330
|
REQUIRE(sketch.is_estimation_mode());
|
|
331
331
|
|
|
332
332
|
std::stringstream s(std::ios::in | std::ios::out | std::ios::binary);
|
|
@@ -373,8 +373,8 @@ TEST_CASE("req sketch: stream deserialize from Java - single item", "[req_sketch
|
|
|
373
373
|
REQUIRE(sketch.get_num_retained() == 1);
|
|
374
374
|
REQUIRE(sketch.get_min_value() == 1);
|
|
375
375
|
REQUIRE(sketch.get_max_value() == 1);
|
|
376
|
-
REQUIRE(sketch.get_rank(1) == 0);
|
|
377
|
-
REQUIRE(sketch.get_rank<true>(1) == 1);
|
|
376
|
+
REQUIRE(sketch.get_rank(1.0f) == 0);
|
|
377
|
+
REQUIRE(sketch.get_rank<true>(1.0f) == 1);
|
|
378
378
|
}
|
|
379
379
|
|
|
380
380
|
TEST_CASE("req sketch: stream deserialize from Java - raw items", "[req_sketch]") {
|
|
@@ -388,7 +388,7 @@ TEST_CASE("req sketch: stream deserialize from Java - raw items", "[req_sketch]"
|
|
|
388
388
|
REQUIRE(sketch.get_num_retained() == 4);
|
|
389
389
|
REQUIRE(sketch.get_min_value() == 0);
|
|
390
390
|
REQUIRE(sketch.get_max_value() == 3);
|
|
391
|
-
REQUIRE(sketch.get_rank(2) == 0.5);
|
|
391
|
+
REQUIRE(sketch.get_rank(2.0f) == 0.5);
|
|
392
392
|
}
|
|
393
393
|
|
|
394
394
|
TEST_CASE("req sketch: stream deserialize from Java - exact mode", "[req_sketch]") {
|
|
@@ -402,7 +402,7 @@ TEST_CASE("req sketch: stream deserialize from Java - exact mode", "[req_sketch]
|
|
|
402
402
|
REQUIRE(sketch.get_num_retained() == 100);
|
|
403
403
|
REQUIRE(sketch.get_min_value() == 0);
|
|
404
404
|
REQUIRE(sketch.get_max_value() == 99);
|
|
405
|
-
REQUIRE(sketch.get_rank(50) == 0.5);
|
|
405
|
+
REQUIRE(sketch.get_rank(50.0f) == 0.5);
|
|
406
406
|
}
|
|
407
407
|
|
|
408
408
|
TEST_CASE("req sketch: stream deserialize from Java - estimation mode", "[req_sketch]") {
|
|
@@ -416,14 +416,14 @@ TEST_CASE("req sketch: stream deserialize from Java - estimation mode", "[req_sk
|
|
|
416
416
|
REQUIRE(sketch.get_num_retained() == 2942);
|
|
417
417
|
REQUIRE(sketch.get_min_value() == 0);
|
|
418
418
|
REQUIRE(sketch.get_max_value() == 9999);
|
|
419
|
-
REQUIRE(sketch.get_rank(5000) == 0.5);
|
|
419
|
+
REQUIRE(sketch.get_rank(5000.0f) == 0.5);
|
|
420
420
|
}
|
|
421
421
|
|
|
422
422
|
TEST_CASE("req sketch: merge into empty", "[req_sketch]") {
|
|
423
423
|
req_sketch<float> sketch1(40);
|
|
424
424
|
|
|
425
425
|
req_sketch<float> sketch2(40);
|
|
426
|
-
for (size_t i = 0; i < 1000; ++i) sketch2.update(i);
|
|
426
|
+
for (size_t i = 0; i < 1000; ++i) sketch2.update(static_cast<float>(i));
|
|
427
427
|
|
|
428
428
|
sketch1.merge(sketch2);
|
|
429
429
|
REQUIRE(sketch1.get_min_value() == 0);
|
|
@@ -431,15 +431,15 @@ TEST_CASE("req sketch: merge into empty", "[req_sketch]") {
|
|
|
431
431
|
REQUIRE(sketch1.get_quantile(0.25) == Approx(250).margin(3));
|
|
432
432
|
REQUIRE(sketch1.get_quantile(0.5) == Approx(500).margin(3));
|
|
433
433
|
REQUIRE(sketch1.get_quantile(0.75) == Approx(750).margin(3));
|
|
434
|
-
REQUIRE(sketch1.get_rank(500) == Approx(0.5).margin(0.01));
|
|
434
|
+
REQUIRE(sketch1.get_rank(500.0f) == Approx(0.5).margin(0.01));
|
|
435
435
|
}
|
|
436
436
|
|
|
437
437
|
TEST_CASE("req sketch: merge", "[req_sketch]") {
|
|
438
438
|
req_sketch<float> sketch1(100);
|
|
439
|
-
for (size_t i = 0; i < 1000; ++i) sketch1.update(i);
|
|
439
|
+
for (size_t i = 0; i < 1000; ++i) sketch1.update(static_cast<float>(i));
|
|
440
440
|
|
|
441
441
|
req_sketch<float> sketch2(100);
|
|
442
|
-
for (size_t i = 1000; i < 2000; ++i) sketch2.update(i);
|
|
442
|
+
for (size_t i = 1000; i < 2000; ++i) sketch2.update(static_cast<float>(i));
|
|
443
443
|
|
|
444
444
|
sketch1.merge(sketch2);
|
|
445
445
|
REQUIRE(sketch1.get_min_value() == 0);
|
|
@@ -447,18 +447,18 @@ TEST_CASE("req sketch: merge", "[req_sketch]") {
|
|
|
447
447
|
REQUIRE(sketch1.get_quantile(0.25) == Approx(500).margin(3));
|
|
448
448
|
REQUIRE(sketch1.get_quantile(0.5) == Approx(1000).margin(1));
|
|
449
449
|
REQUIRE(sketch1.get_quantile(0.75) == Approx(1500).margin(1));
|
|
450
|
-
REQUIRE(sketch1.get_rank(1000) == Approx(0.5).margin(0.01));
|
|
450
|
+
REQUIRE(sketch1.get_rank(1000.0f) == Approx(0.5).margin(0.01));
|
|
451
451
|
}
|
|
452
452
|
|
|
453
453
|
TEST_CASE("req sketch: merge multiple", "[req_sketch]") {
|
|
454
454
|
req_sketch<float> sketch1(12);
|
|
455
|
-
for (size_t i = 0; i < 40; ++i) sketch1.update(i);
|
|
455
|
+
for (size_t i = 0; i < 40; ++i) sketch1.update(static_cast<float>(i));
|
|
456
456
|
|
|
457
457
|
req_sketch<float> sketch2(12);
|
|
458
|
-
for (size_t i = 40; i < 80; ++i) sketch2.update(i);
|
|
458
|
+
for (size_t i = 40; i < 80; ++i) sketch2.update(static_cast<float>(i));
|
|
459
459
|
|
|
460
460
|
req_sketch<float> sketch3(12);
|
|
461
|
-
for (size_t i = 80; i < 120; ++i) sketch3.update(i);
|
|
461
|
+
for (size_t i = 80; i < 120; ++i) sketch3.update(static_cast<float>(i));
|
|
462
462
|
|
|
463
463
|
req_sketch<float> sketch(12);
|
|
464
464
|
sketch.merge(sketch1);
|
|
@@ -467,15 +467,15 @@ TEST_CASE("req sketch: merge multiple", "[req_sketch]") {
|
|
|
467
467
|
REQUIRE(sketch.get_min_value() == 0);
|
|
468
468
|
REQUIRE(sketch.get_max_value() == 119);
|
|
469
469
|
REQUIRE(sketch.get_quantile(0.5) == Approx(60).margin(3));
|
|
470
|
-
REQUIRE(sketch.get_rank(60) == Approx(0.5).margin(0.01));
|
|
470
|
+
REQUIRE(sketch.get_rank(60.0f) == Approx(0.5).margin(0.01));
|
|
471
471
|
}
|
|
472
472
|
|
|
473
473
|
TEST_CASE("req sketch: merge incompatible HRA and LRA", "[req_sketch]") {
|
|
474
474
|
req_sketch<float> sketch1(12);
|
|
475
|
-
sketch1.update(1);
|
|
475
|
+
sketch1.update(1.0f);
|
|
476
476
|
|
|
477
477
|
req_sketch<float> sketch2(12, false);
|
|
478
|
-
sketch2.update(1);
|
|
478
|
+
sketch2.update(1.0f);
|
|
479
479
|
|
|
480
480
|
REQUIRE_THROWS_AS(sketch1.merge(sketch2), std::invalid_argument);
|
|
481
481
|
}
|