datasketches 0.2.5 → 0.2.6
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +4 -0
- data/ext/datasketches/kll_wrapper.cpp +2 -2
- data/lib/datasketches/version.rb +1 -1
- data/vendor/datasketches-cpp/CMakeLists.txt +1 -1
- data/vendor/datasketches-cpp/NOTICE +6 -5
- data/vendor/datasketches-cpp/common/CMakeLists.txt +2 -2
- data/vendor/datasketches-cpp/cpc/include/cpc_compressor_impl.hpp +2 -1
- data/vendor/datasketches-cpp/kll/include/kll_helper_impl.hpp +2 -2
- data/vendor/datasketches-cpp/kll/include/kll_sketch.hpp +22 -9
- data/vendor/datasketches-cpp/kll/include/kll_sketch_impl.hpp +47 -9
- data/vendor/datasketches-cpp/kll/test/kll_sketch_test.cpp +71 -6
- data/vendor/datasketches-cpp/python/README.md +6 -9
- data/vendor/datasketches-cpp/quantiles/include/quantiles_sketch.hpp +15 -0
- data/vendor/datasketches-cpp/quantiles/include/quantiles_sketch_impl.hpp +78 -14
- data/vendor/datasketches-cpp/quantiles/test/kolmogorov_smirnov_test.cpp +2 -2
- data/vendor/datasketches-cpp/quantiles/test/quantiles_sketch_test.cpp +63 -0
- data/vendor/datasketches-cpp/req/include/req_compactor.hpp +6 -0
- data/vendor/datasketches-cpp/req/include/req_compactor_impl.hpp +27 -0
- data/vendor/datasketches-cpp/req/include/req_sketch.hpp +11 -0
- data/vendor/datasketches-cpp/req/include/req_sketch_impl.hpp +29 -2
- data/vendor/datasketches-cpp/req/test/req_sketch_test.cpp +69 -3
- data/vendor/datasketches-cpp/setup.py +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: cf1ea0f9f2d12b0e46c2d4c7dec21f41992e711e73eca68ea1ef03a4bb711077
|
4
|
+
data.tar.gz: 92f56b63da0254962be47d8d3e00a6950a271053bf3152167f95e6fdb99528e6
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 5841d4a70f1e852faa150f57ebfefc7b975de020782c41eebdad87a01d016be9bdf86f86173600632bf6f56300df0c9c4196251aa5df02a47ecd357ac844ef80
|
7
|
+
data.tar.gz: d6ae7c811e0e2c2008b912e29f86d1b99491c74cd878790dfd800811a007f0dbf9c49bb59db30345450ff82673381f2c036a84a57dc44a6f6751610d9be2ee88
|
data/CHANGELOG.md
CHANGED
@@ -55,12 +55,12 @@ void bind_kll_sketch(Rice::Module& m, const char* name) {
|
|
55
55
|
})
|
56
56
|
.define_method(
|
57
57
|
"pmf",
|
58
|
-
[](kll_sketch<T>& self, std::vector<T
|
58
|
+
[](kll_sketch<T>& self, const std::vector<T>& split_points) {
|
59
59
|
return self.get_PMF(&split_points[0], split_points.size());
|
60
60
|
})
|
61
61
|
.define_method(
|
62
62
|
"cdf",
|
63
|
-
[](kll_sketch<T>& self, std::vector<T
|
63
|
+
[](kll_sketch<T>& self, const std::vector<T>& split_points) {
|
64
64
|
return self.get_CDF(&split_points[0], split_points.size());
|
65
65
|
})
|
66
66
|
.define_method(
|
data/lib/datasketches/version.rb
CHANGED
@@ -1,11 +1,12 @@
|
|
1
|
-
Apache DataSketches
|
2
|
-
Copyright
|
1
|
+
Apache DataSketches C++ and Python
|
2
|
+
Copyright 2022 The Apache Software Foundation
|
3
3
|
|
4
|
-
Copyright 2015-2018 Yahoo
|
5
|
-
Copyright 2019 Verizon Media
|
4
|
+
Copyright 2015-2018 Yahoo Inc.
|
5
|
+
Copyright 2019-2020 Verizon Media
|
6
|
+
Copyright 2021 Yahoo Inc.
|
6
7
|
|
7
8
|
This product includes software developed at
|
8
9
|
The Apache Software Foundation (http://www.apache.org/).
|
9
10
|
|
10
11
|
Prior to moving to ASF, the software for this project was developed at
|
11
|
-
Yahoo
|
12
|
+
Yahoo Inc. (https://developer.yahoo.com).
|
@@ -43,8 +43,8 @@ install(FILES
|
|
43
43
|
include/conditional_forward.hpp
|
44
44
|
include/ceiling_power_of_2.hpp
|
45
45
|
include/bounds_binomial_proportions.hpp
|
46
|
-
include/kolmogorov_smirnov.hpp
|
47
|
-
include/kolmogorov_smirnov_impl.hpp
|
48
46
|
include/quantile_sketch_sorted_view.hpp
|
49
47
|
include/quantile_sketch_sorted_view_impl.hpp
|
48
|
+
include/kolmogorov_smirnov.hpp
|
49
|
+
include/kolmogorov_smirnov_impl.hpp
|
50
50
|
DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/DataSketches")
|
@@ -297,6 +297,7 @@ void cpc_compressor<A>::compress_sliding_flavor(const cpc_sketch_alloc<A>& sourc
|
|
297
297
|
// changes the implied ordering of the pairs, so we must do it before sorting.
|
298
298
|
|
299
299
|
const uint8_t pseudo_phase = determine_pseudo_phase(source.get_lg_k(), source.get_num_coupons());
|
300
|
+
if (pseudo_phase >= 16) throw std::logic_error("unexpected pseudo phase for sliding flavor");
|
300
301
|
const uint8_t* permutation = column_permutations_for_encoding[pseudo_phase];
|
301
302
|
|
302
303
|
const uint8_t offset = source.window_offset;
|
@@ -333,7 +334,7 @@ void cpc_compressor<A>::uncompress_sliding_flavor(const compressed_state<A>& sou
|
|
333
334
|
lg_k, source.table_data.get_allocator());
|
334
335
|
|
335
336
|
const uint8_t pseudo_phase = determine_pseudo_phase(lg_k, num_coupons);
|
336
|
-
if (pseudo_phase >= 16) throw std::logic_error("pseudo phase
|
337
|
+
if (pseudo_phase >= 16) throw std::logic_error("unexpected pseudo phase for sliding flavor");
|
337
338
|
const uint8_t* permutation = column_permutations_for_decoding[pseudo_phase];
|
338
339
|
|
339
340
|
uint8_t offset = cpc_sketch_alloc<A>::determine_correct_offset(lg_k, num_coupons);
|
@@ -230,7 +230,7 @@ kll_helper::compress_result kll_helper::general_compress(uint16_t k, uint8_t m,
|
|
230
230
|
// move level over as is
|
231
231
|
// make sure we are not moving data upwards
|
232
232
|
if (raw_beg < out_levels[current_level]) throw std::logic_error("wrong move");
|
233
|
-
std::move(
|
233
|
+
std::move(items + raw_beg, items + raw_lim, items + out_levels[current_level]);
|
234
234
|
out_levels[current_level + 1] = out_levels[current_level] + raw_pop;
|
235
235
|
} else {
|
236
236
|
// The sketch is too full AND this level is too full, so we compact it
|
@@ -251,7 +251,7 @@ kll_helper::compress_result kll_helper::general_compress(uint16_t k, uint8_t m,
|
|
251
251
|
|
252
252
|
// level zero might not be sorted, so we must sort it if we wish to compact it
|
253
253
|
if ((current_level == 0) && !is_level_zero_sorted) {
|
254
|
-
std::sort(
|
254
|
+
std::sort(items + adj_beg, items + adj_beg + adj_pop, C());
|
255
255
|
}
|
256
256
|
|
257
257
|
if (pop_above == 0) { // Level above is empty, so halve up
|
@@ -170,7 +170,7 @@ class kll_sketch {
|
|
170
170
|
using comparator = C;
|
171
171
|
|
172
172
|
static const uint8_t DEFAULT_M = 8;
|
173
|
-
// TODO: Redundant and deprecated. Will be
|
173
|
+
// TODO: Redundant and deprecated. Will be removed in next major version.
|
174
174
|
static const uint16_t DEFAULT_K = kll_constants::DEFAULT_K;
|
175
175
|
static const uint16_t MIN_K = DEFAULT_M;
|
176
176
|
static const uint16_t MAX_K = (1 << 16) - 1;
|
@@ -182,6 +182,14 @@ class kll_sketch {
|
|
182
182
|
kll_sketch& operator=(const kll_sketch& other);
|
183
183
|
kll_sketch& operator=(kll_sketch&& other);
|
184
184
|
|
185
|
+
/*
|
186
|
+
* Type converting constructor.
|
187
|
+
* @param other sketch of a different type
|
188
|
+
* @param allocator instance of an Allocator
|
189
|
+
*/
|
190
|
+
template<typename TT, typename CC, typename SS, typename AA>
|
191
|
+
explicit kll_sketch(const kll_sketch<TT, CC, SS, AA>& other, const A& allocator = A());
|
192
|
+
|
185
193
|
/**
|
186
194
|
* Updates this sketch with the given data item.
|
187
195
|
* @param value an item from a stream of items
|
@@ -390,7 +398,7 @@ class kll_sketch {
|
|
390
398
|
/**
|
391
399
|
* Computes size needed to serialize the current state of the sketch.
|
392
400
|
* This version is for fixed-size arithmetic types (integral and floating point).
|
393
|
-
* @param instance of a SerDe
|
401
|
+
* @param serde instance of a SerDe
|
394
402
|
* @return size in bytes needed to serialize this sketch
|
395
403
|
*/
|
396
404
|
template<typename TT = T, typename SerDe = S, typename std::enable_if<std::is_arithmetic<TT>::value, int>::type = 0>
|
@@ -399,7 +407,7 @@ class kll_sketch {
|
|
399
407
|
/**
|
400
408
|
* Computes size needed to serialize the current state of the sketch.
|
401
409
|
* This version is for all other types and can be expensive since every item needs to be looked at.
|
402
|
-
* @param instance of a SerDe
|
410
|
+
* @param serde instance of a SerDe
|
403
411
|
* @return size in bytes needed to serialize this sketch
|
404
412
|
*/
|
405
413
|
template<typename TT = T, typename SerDe = S, typename std::enable_if<!std::is_arithmetic<TT>::value, int>::type = 0>
|
@@ -459,7 +467,7 @@ class kll_sketch {
|
|
459
467
|
/**
|
460
468
|
* This method deserializes a sketch from a given stream.
|
461
469
|
* @param is input stream
|
462
|
-
* @param instance of an Allocator
|
470
|
+
* @param allocator instance of an Allocator
|
463
471
|
* @return an instance of a sketch
|
464
472
|
*
|
465
473
|
* Deprecated, to be removed in the next major version
|
@@ -469,8 +477,8 @@ class kll_sketch {
|
|
469
477
|
/**
|
470
478
|
* This method deserializes a sketch from a given stream.
|
471
479
|
* @param is input stream
|
472
|
-
* @param instance of a SerDe
|
473
|
-
* @param instance of an Allocator
|
480
|
+
* @param serde instance of a SerDe
|
481
|
+
* @param allocator instance of an Allocator
|
474
482
|
* @return an instance of a sketch
|
475
483
|
*/
|
476
484
|
template<typename SerDe = S>
|
@@ -480,7 +488,7 @@ class kll_sketch {
|
|
480
488
|
* This method deserializes a sketch from a given array of bytes.
|
481
489
|
* @param bytes pointer to the array of bytes
|
482
490
|
* @param size the size of the array
|
483
|
-
* @param instance of an Allocator
|
491
|
+
* @param allocator instance of an Allocator
|
484
492
|
* @return an instance of a sketch
|
485
493
|
*
|
486
494
|
* Deprecated, to be removed in the next major version
|
@@ -491,8 +499,8 @@ class kll_sketch {
|
|
491
499
|
* This method deserializes a sketch from a given array of bytes.
|
492
500
|
* @param bytes pointer to the array of bytes
|
493
501
|
* @param size the size of the array
|
494
|
-
* @param instance of a SerDe
|
495
|
-
* @param instance of an Allocator
|
502
|
+
* @param serde instance of a SerDe
|
503
|
+
* @param allocator instance of an Allocator
|
496
504
|
* @return an instance of a sketch
|
497
505
|
*/
|
498
506
|
template<typename SerDe = S>
|
@@ -606,6 +614,8 @@ class kll_sketch {
|
|
606
614
|
static void check_serial_version(uint8_t serial_version);
|
607
615
|
static void check_family_id(uint8_t family_id);
|
608
616
|
|
617
|
+
void check_sorting() const;
|
618
|
+
|
609
619
|
// implementations for floating point types
|
610
620
|
template<typename TT = T, typename std::enable_if<std::is_floating_point<TT>::value, int>::type = 0>
|
611
621
|
static const TT& get_invalid_value() {
|
@@ -629,6 +639,9 @@ class kll_sketch {
|
|
629
639
|
return true;
|
630
640
|
}
|
631
641
|
|
642
|
+
// for type converting constructor
|
643
|
+
template<typename TT, typename CC, typename SS, typename AA>
|
644
|
+
friend class kll_sketch;
|
632
645
|
};
|
633
646
|
|
634
647
|
template<typename T, typename C, typename S, typename A>
|
@@ -26,6 +26,7 @@
|
|
26
26
|
#include <stdexcept>
|
27
27
|
|
28
28
|
#include "conditional_forward.hpp"
|
29
|
+
#include "count_zeros.hpp"
|
29
30
|
#include "memory_operations.hpp"
|
30
31
|
#include "kll_helper.hpp"
|
31
32
|
|
@@ -69,7 +70,7 @@ max_value_(nullptr),
|
|
69
70
|
is_level_zero_sorted_(other.is_level_zero_sorted_)
|
70
71
|
{
|
71
72
|
items_ = allocator_.allocate(items_size_);
|
72
|
-
|
73
|
+
for (auto i = levels_[0]; i < levels_[num_levels_]; ++i) new (&items_[i]) T(other.items_[i]);
|
73
74
|
if (other.min_value_ != nullptr) min_value_ = new (allocator_.allocate(1)) T(*other.min_value_);
|
74
75
|
if (other.max_value_ != nullptr) max_value_ = new (allocator_.allocate(1)) T(*other.max_value_);
|
75
76
|
}
|
@@ -147,6 +148,33 @@ kll_sketch<T, C, S, A>::~kll_sketch() {
|
|
147
148
|
}
|
148
149
|
}
|
149
150
|
|
151
|
+
template<typename T, typename C, typename S, typename A>
|
152
|
+
template<typename TT, typename CC, typename SS, typename AA>
|
153
|
+
kll_sketch<T, C, S, A>::kll_sketch(const kll_sketch<TT, CC, SS, AA>& other, const A& allocator):
|
154
|
+
allocator_(allocator),
|
155
|
+
k_(other.k_),
|
156
|
+
m_(other.m_),
|
157
|
+
min_k_(other.min_k_),
|
158
|
+
n_(other.n_),
|
159
|
+
num_levels_(other.num_levels_),
|
160
|
+
levels_(other.levels_, allocator_),
|
161
|
+
items_(nullptr),
|
162
|
+
items_size_(other.items_size_),
|
163
|
+
min_value_(nullptr),
|
164
|
+
max_value_(nullptr),
|
165
|
+
is_level_zero_sorted_(other.is_level_zero_sorted_)
|
166
|
+
{
|
167
|
+
static_assert(
|
168
|
+
std::is_constructible<T, TT>::value,
|
169
|
+
"Type converting constructor requires new type to be constructible from existing type"
|
170
|
+
);
|
171
|
+
items_ = allocator_.allocate(items_size_);
|
172
|
+
for (auto i = levels_[0]; i < levels_[num_levels_]; ++i) new (&items_[i]) T(other.items_[i]);
|
173
|
+
if (other.min_value_ != nullptr) min_value_ = new (allocator_.allocate(1)) T(*other.min_value_);
|
174
|
+
if (other.max_value_ != nullptr) max_value_ = new (allocator_.allocate(1)) T(*other.max_value_);
|
175
|
+
check_sorting();
|
176
|
+
}
|
177
|
+
|
150
178
|
template<typename T, typename C, typename S, typename A>
|
151
179
|
template<typename FwdT>
|
152
180
|
void kll_sketch<T, C, S, A>::update(FwdT&& value) {
|
@@ -305,8 +333,8 @@ double kll_sketch<T, C, S, A>::get_rank(const T& value) const {
|
|
305
333
|
uint64_t weight = 1;
|
306
334
|
uint64_t total = 0;
|
307
335
|
while (level < num_levels_) {
|
308
|
-
const auto from_index
|
309
|
-
const auto to_index
|
336
|
+
const auto from_index = levels_[level];
|
337
|
+
const auto to_index = levels_[level + 1]; // exclusive
|
310
338
|
for (uint32_t i = from_index; i < to_index; i++) {
|
311
339
|
if (inclusive ? !C()(value, items_[i]) : C()(items_[i], value)) {
|
312
340
|
total += weight;
|
@@ -694,7 +722,7 @@ void kll_sketch<T, C, S, A>::compress_while_updating(void) {
|
|
694
722
|
// level zero might not be sorted, so we must sort it if we wish to compact it
|
695
723
|
// sort_level_zero() is not used here because of the adjustment for odd number of items
|
696
724
|
if ((level == 0) && !is_level_zero_sorted_) {
|
697
|
-
std::sort(
|
725
|
+
std::sort(items_ + adj_beg, items_ + adj_beg + adj_pop, C());
|
698
726
|
}
|
699
727
|
if (pop_above == 0) {
|
700
728
|
kll_helper::randomly_halve_up(items_, adj_beg, adj_pop);
|
@@ -717,7 +745,7 @@ void kll_sketch<T, C, S, A>::compress_while_updating(void) {
|
|
717
745
|
// so that the freed-up space can be used by level zero
|
718
746
|
if (level > 0) {
|
719
747
|
const uint32_t amount = raw_beg - levels_[0];
|
720
|
-
std::move_backward(
|
748
|
+
std::move_backward(items_ + levels_[0], items_ + levels_[0] + amount, items_ + levels_[0] + half_adj_pop + amount);
|
721
749
|
for (uint8_t lvl = 0; lvl < level; lvl++) levels_[lvl] += half_adj_pop;
|
722
750
|
}
|
723
751
|
for (uint32_t i = 0; i < half_adj_pop; i++) items_[i + destroy_beg].~T();
|
@@ -775,22 +803,32 @@ void kll_sketch<T, C, S, A>::add_empty_top_level_to_completely_full_sketch() {
|
|
775
803
|
template<typename T, typename C, typename S, typename A>
|
776
804
|
void kll_sketch<T, C, S, A>::sort_level_zero() {
|
777
805
|
if (!is_level_zero_sorted_) {
|
778
|
-
std::sort(
|
806
|
+
std::sort(items_ + levels_[0], items_ + levels_[1], C());
|
779
807
|
is_level_zero_sorted_ = true;
|
780
808
|
}
|
781
809
|
}
|
782
810
|
|
811
|
+
template<typename T, typename C, typename S, typename A>
|
812
|
+
void kll_sketch<T, C, S, A>::check_sorting() const {
|
813
|
+
// not checking level 0
|
814
|
+
for (uint8_t level = 1; level < num_levels_; ++level) {
|
815
|
+
const auto from = items_ + levels_[level];
|
816
|
+
const auto to = items_ + levels_[level + 1];
|
817
|
+
if (!std::is_sorted(from, to, C())) {
|
818
|
+
throw std::logic_error("levels must be sorted");
|
819
|
+
}
|
820
|
+
}
|
821
|
+
}
|
822
|
+
|
783
823
|
template<typename T, typename C, typename S, typename A>
|
784
824
|
template<bool inclusive>
|
785
825
|
quantile_sketch_sorted_view<T, C, A> kll_sketch<T, C, S, A>::get_sorted_view(bool cumulative) const {
|
786
826
|
const_cast<kll_sketch*>(this)->sort_level_zero(); // allow this side effect
|
787
827
|
quantile_sketch_sorted_view<T, C, A> view(get_num_retained(), allocator_);
|
788
|
-
uint8_t level = 0;
|
789
|
-
while (level < num_levels_) {
|
828
|
+
for (uint8_t level = 0; level < num_levels_; ++level) {
|
790
829
|
const auto from = items_ + levels_[level];
|
791
830
|
const auto to = items_ + levels_[level + 1]; // exclusive
|
792
831
|
view.add(from, to, 1 << level);
|
793
|
-
++level;
|
794
832
|
}
|
795
833
|
if (cumulative) view.template convert_to_cummulative<inclusive>();
|
796
834
|
return view;
|
@@ -39,9 +39,9 @@ static std::string testBinaryInputPath = "test/";
|
|
39
39
|
#endif
|
40
40
|
|
41
41
|
// typical usage would be just kll_sketch<float> or kll_sketch<std::string>, but here we use test_allocator
|
42
|
-
|
42
|
+
using kll_float_sketch = kll_sketch<float, std::less<float>, serde<float>, test_allocator<float>>;
|
43
43
|
// let std::string use the default allocator for simplicity, otherwise we need to define "less" and "serde"
|
44
|
-
|
44
|
+
using kll_string_sketch = kll_sketch<std::string, std::less<std::string>, serde<std::string>, test_allocator<std::string>>;
|
45
45
|
|
46
46
|
TEST_CASE("kll sketch", "[kll_sketch]") {
|
47
47
|
|
@@ -75,7 +75,7 @@ TEST_CASE("kll sketch", "[kll_sketch]") {
|
|
75
75
|
(void) it; // to suppress "unused" warning
|
76
76
|
FAIL("should be no iterations over an empty sketch");
|
77
77
|
}
|
78
|
-
}
|
78
|
+
}
|
79
79
|
|
80
80
|
SECTION("get bad quantile") {
|
81
81
|
kll_float_sketch sketch(200, 0);
|
@@ -835,10 +835,75 @@ TEST_CASE("kll sketch", "[kll_sketch]") {
|
|
835
835
|
REQUIRE((*it).second == 3);
|
836
836
|
}
|
837
837
|
}
|
838
|
-
|
839
|
-
|
840
|
-
|
838
|
+
|
839
|
+
SECTION("type conversion: empty") {
|
840
|
+
kll_sketch<double> kll_double;
|
841
|
+
kll_sketch<float> kll_float(kll_double);
|
842
|
+
REQUIRE(kll_float.is_empty());
|
843
|
+
REQUIRE(kll_float.get_k() == kll_double.get_k());
|
844
|
+
REQUIRE(kll_float.get_n() == 0);
|
845
|
+
REQUIRE(kll_float.get_num_retained() == 0);
|
846
|
+
}
|
847
|
+
|
848
|
+
SECTION("type conversion: over k") {
|
849
|
+
kll_sketch<double> kll_double;
|
850
|
+
for (int i = 0; i < 1000; ++i) kll_double.update(static_cast<double>(i));
|
851
|
+
kll_sketch<float> kll_float(kll_double);
|
852
|
+
REQUIRE(!kll_float.is_empty());
|
853
|
+
REQUIRE(kll_float.get_k() == kll_double.get_k());
|
854
|
+
REQUIRE(kll_float.get_n() == kll_double.get_n());
|
855
|
+
REQUIRE(kll_float.get_num_retained() == kll_double.get_num_retained());
|
856
|
+
|
857
|
+
auto sv_float = kll_float.get_sorted_view(false);
|
858
|
+
auto sv_double = kll_double.get_sorted_view(false);
|
859
|
+
auto sv_float_it = sv_float.begin();
|
860
|
+
auto sv_double_it = sv_double.begin();
|
861
|
+
while (sv_float_it != sv_float.end()) {
|
862
|
+
REQUIRE(sv_double_it != sv_double.end());
|
863
|
+
auto float_pair = *sv_float_it;
|
864
|
+
auto double_pair = *sv_double_it;
|
865
|
+
REQUIRE(float_pair.first == Approx(double_pair.first).margin(0.01));
|
866
|
+
REQUIRE(float_pair.second == double_pair.second);
|
867
|
+
++sv_float_it;
|
868
|
+
++sv_double_it;
|
869
|
+
}
|
870
|
+
REQUIRE(sv_double_it == sv_double.end());
|
871
|
+
}
|
872
|
+
|
873
|
+
class A {
|
874
|
+
int val;
|
875
|
+
public:
|
876
|
+
A(int val): val(val) {}
|
877
|
+
int get_val() const { return val; }
|
878
|
+
};
|
879
|
+
|
880
|
+
struct less_A {
|
881
|
+
bool operator()(const A& a1, const A& a2) const { return a1.get_val() < a2.get_val(); }
|
882
|
+
};
|
883
|
+
|
884
|
+
class B {
|
885
|
+
int val;
|
886
|
+
public:
|
887
|
+
explicit B(const A& a): val(a.get_val()) {}
|
888
|
+
int get_val() const { return val; }
|
889
|
+
};
|
890
|
+
|
891
|
+
struct less_B {
|
892
|
+
bool operator()(const B& b1, const B& b2) const { return b1.get_val() < b2.get_val(); }
|
893
|
+
};
|
894
|
+
|
895
|
+
SECTION("type conversion: custom types") {
|
896
|
+
kll_sketch<A, less_A> sa;
|
897
|
+
sa.update(1);
|
898
|
+
sa.update(2);
|
899
|
+
sa.update(3);
|
900
|
+
|
901
|
+
kll_sketch<B, less_B> sb(sa);
|
902
|
+
REQUIRE(sb.get_n() == 3);
|
841
903
|
}
|
904
|
+
|
905
|
+
// cleanup
|
906
|
+
REQUIRE(test_allocator_total_bytes == 0);
|
842
907
|
}
|
843
908
|
|
844
909
|
} /* namespace datasketches */
|
@@ -12,16 +12,18 @@ This package provides a variety of sketches as described below. Wherever a speci
|
|
12
12
|
|
13
13
|
## Building and Installation
|
14
14
|
|
15
|
-
Once cloned, the library can be installed by running `
|
15
|
+
Once cloned, the library can be installed by running `python3 -m pip install .` in the project root directory -- not the python subdirectory -- which will also install the necessary dependencies, namely numpy and [pybind11[global]](https://github.com/pybind/pybind11).
|
16
16
|
|
17
|
-
If you prefer to call the `setup.py` build script directly, you must first install `pybind11[global]`, as well as any other dependencies listed under the build-system section in `pyproject.toml`.
|
17
|
+
If you prefer to call the `setup.py` build script directly, which is discoraged, you must first install `pybind11[global]`, as well as any other dependencies listed under the build-system section in `pyproject.toml`.
|
18
18
|
|
19
|
-
The library is also available from PyPI via `
|
19
|
+
The library is also available from PyPI via `python3 -m pip install datasketches`.
|
20
20
|
|
21
21
|
## Usage
|
22
22
|
|
23
23
|
Having installed the library, loading the Apache Datasketches Library in Python is simple: `import datasketches`.
|
24
24
|
|
25
|
+
The unit tests are mostly structured in a tutorial style and can be used as a reference example for how to feed data into and query the different types of sketches.
|
26
|
+
|
25
27
|
## Available Sketch Classes
|
26
28
|
|
27
29
|
- KLL (Absolute Error Quantiles)
|
@@ -74,12 +76,7 @@ The only developer-specific instructions relate to running unit tests.
|
|
74
76
|
|
75
77
|
### Unit tests
|
76
78
|
|
77
|
-
The Python unit tests are run
|
78
|
-
|
79
|
-
```bash
|
80
|
-
python -m pip install --upgrade tox
|
81
|
-
tox
|
82
|
-
```
|
79
|
+
The Python unit tests are run via `tox`, with no arguments, from the project root directory -- not the python subdirectory. Tox creates a temporary virtual environment in which to build and run teh unit tests. In the event you are missing the necessary pacakge, tox may be installed with `python3 -m pip install --upgrade tox`.
|
83
80
|
|
84
81
|
## License
|
85
82
|
|
@@ -151,6 +151,7 @@ template <typename T,
|
|
151
151
|
class quantiles_sketch {
|
152
152
|
public:
|
153
153
|
using value_type = T;
|
154
|
+
using allocator_type = Allocator;
|
154
155
|
using comparator = Comparator;
|
155
156
|
using vector_double = std::vector<double, typename std::allocator_traits<Allocator>::template rebind_alloc<double>>;
|
156
157
|
|
@@ -161,6 +162,14 @@ public:
|
|
161
162
|
quantiles_sketch& operator=(const quantiles_sketch& other);
|
162
163
|
quantiles_sketch& operator=(quantiles_sketch&& other) noexcept;
|
163
164
|
|
165
|
+
/**
|
166
|
+
* @brief Type converting constructor
|
167
|
+
* @param other quantiles sketch of a different type
|
168
|
+
* @param allocator instance of an Allocator
|
169
|
+
*/
|
170
|
+
template<typename From, typename FC, typename FA>
|
171
|
+
explicit quantiles_sketch(const quantiles_sketch<From, FC, FA>& other, const Allocator& allocator = Allocator());
|
172
|
+
|
164
173
|
/**
|
165
174
|
* Updates this sketch with the given data item.
|
166
175
|
* @param value an item from a stream of items
|
@@ -227,6 +236,12 @@ public:
|
|
227
236
|
*/
|
228
237
|
Comparator get_comparator() const;
|
229
238
|
|
239
|
+
/**
|
240
|
+
* Returns the allocator for this sketch.
|
241
|
+
* @return allocator
|
242
|
+
*/
|
243
|
+
allocator_type get_allocator() const;
|
244
|
+
|
230
245
|
/**
|
231
246
|
* Returns an approximation to the value of the data item
|
232
247
|
* that would be preceded by the given fraction of a hypothetical sorted
|
@@ -138,6 +138,65 @@ is_sorted_(is_sorted)
|
|
138
138
|
throw std::logic_error("Item count does not match value computed from k, n");
|
139
139
|
}
|
140
140
|
|
141
|
+
template<typename T, typename C, typename A>
|
142
|
+
template<typename From, typename FC, typename FA>
|
143
|
+
quantiles_sketch<T, C, A>::quantiles_sketch(const quantiles_sketch<From, FC, FA>& other, const A& allocator) :
|
144
|
+
allocator_(allocator),
|
145
|
+
k_(other.get_k()),
|
146
|
+
n_(other.get_n()),
|
147
|
+
bit_pattern_(compute_bit_pattern(other.get_k(), other.get_n())),
|
148
|
+
base_buffer_(allocator),
|
149
|
+
levels_(allocator),
|
150
|
+
min_value_(nullptr),
|
151
|
+
max_value_(nullptr),
|
152
|
+
is_sorted_(false)
|
153
|
+
{
|
154
|
+
static_assert(std::is_constructible<T, From>::value,
|
155
|
+
"Type converting constructor requires new type to be constructible from existing type");
|
156
|
+
|
157
|
+
base_buffer_.reserve(2 * std::min(quantiles_constants::MIN_K, k_));
|
158
|
+
|
159
|
+
if (!other.is_empty()) {
|
160
|
+
min_value_ = new (allocator_.allocate(1)) T(other.get_min_value());
|
161
|
+
max_value_ = new (allocator_.allocate(1)) T(other.get_max_value());
|
162
|
+
|
163
|
+
// reserve space in levels
|
164
|
+
const uint8_t num_levels = compute_levels_needed(k_, n_);
|
165
|
+
levels_.reserve(num_levels);
|
166
|
+
for (int i = 0; i < num_levels; ++i) {
|
167
|
+
Level level(allocator);
|
168
|
+
level.reserve(k_);
|
169
|
+
levels_.push_back(std::move(level));
|
170
|
+
}
|
171
|
+
|
172
|
+
// iterate through points, assigning to the correct level as needed
|
173
|
+
for (auto pair : other) {
|
174
|
+
const uint64_t wt = pair.second;
|
175
|
+
if (wt == 1) {
|
176
|
+
base_buffer_.push_back(T(pair.first));
|
177
|
+
// resize where needed as if adding points via update()
|
178
|
+
if (base_buffer_.size() + 1 > base_buffer_.capacity()) {
|
179
|
+
const size_t new_size = std::max(std::min(static_cast<size_t>(2 * k_), 2 * base_buffer_.size()), static_cast<size_t>(1));
|
180
|
+
base_buffer_.reserve(new_size);
|
181
|
+
}
|
182
|
+
}
|
183
|
+
else {
|
184
|
+
const uint8_t idx = count_trailing_zeros_in_u64(pair.second) - 1;
|
185
|
+
levels_[idx].push_back(T(pair.first));
|
186
|
+
}
|
187
|
+
}
|
188
|
+
|
189
|
+
// validate that ordering within each level is preserved
|
190
|
+
// base_buffer_ can be considered unsorted for this purpose
|
191
|
+
for (int i = 0; i < num_levels; ++i) {
|
192
|
+
if (!std::is_sorted(levels_[i].begin(), levels_[i].end(), C())) {
|
193
|
+
throw std::logic_error("Copy construction across types produces invalid sorting");
|
194
|
+
}
|
195
|
+
}
|
196
|
+
}
|
197
|
+
}
|
198
|
+
|
199
|
+
|
141
200
|
template<typename T, typename C, typename A>
|
142
201
|
quantiles_sketch<T, C, A>::~quantiles_sketch() {
|
143
202
|
if (min_value_ != nullptr) {
|
@@ -238,7 +297,7 @@ void quantiles_sketch<T, C, A>::serialize(std::ostream& os, const SerDe& serde)
|
|
238
297
|
);
|
239
298
|
write(os, flags_byte);
|
240
299
|
write(os, k_);
|
241
|
-
uint16_t unused = 0;
|
300
|
+
const uint16_t unused = 0;
|
242
301
|
write(os, unused);
|
243
302
|
|
244
303
|
if (!is_empty()) {
|
@@ -624,6 +683,11 @@ C quantiles_sketch<T, C, A>::get_comparator() const {
|
|
624
683
|
return C();
|
625
684
|
}
|
626
685
|
|
686
|
+
template<typename T, typename C, typename A>
|
687
|
+
A quantiles_sketch<T, C, A>::get_allocator() const {
|
688
|
+
return allocator_;
|
689
|
+
}
|
690
|
+
|
627
691
|
// implementation for fixed-size arithmetic types (integral and floating point)
|
628
692
|
template<typename T, typename C, typename A>
|
629
693
|
template<typename SerDe, typename TT, typename std::enable_if<std::is_arithmetic<TT>::value, int>::type>
|
@@ -783,9 +847,9 @@ auto quantiles_sketch<T, C, A>::get_CDF(const T* split_points, uint32_t size) co
|
|
783
847
|
|
784
848
|
template<typename T, typename C, typename A>
|
785
849
|
uint32_t quantiles_sketch<T, C, A>::compute_retained_items(const uint16_t k, const uint64_t n) {
|
786
|
-
uint32_t bb_count = compute_base_buffer_items(k, n);
|
787
|
-
uint64_t bit_pattern = compute_bit_pattern(k, n);
|
788
|
-
uint32_t valid_levels = compute_valid_levels(bit_pattern);
|
850
|
+
const uint32_t bb_count = compute_base_buffer_items(k, n);
|
851
|
+
const uint64_t bit_pattern = compute_bit_pattern(k, n);
|
852
|
+
const uint32_t valid_levels = compute_valid_levels(bit_pattern);
|
789
853
|
return bb_count + (k * valid_levels);
|
790
854
|
}
|
791
855
|
|
@@ -843,11 +907,11 @@ void quantiles_sketch<T, C, A>::check_family_id(uint8_t family_id) {
|
|
843
907
|
|
844
908
|
template<typename T, typename C, typename A>
|
845
909
|
void quantiles_sketch<T, C, A>::check_header_validity(uint8_t preamble_longs, uint8_t flags_byte, uint8_t serial_version) {
|
846
|
-
bool empty = (flags_byte & (1 << flags::IS_EMPTY)) > 0;
|
847
|
-
bool compact = (flags_byte & (1 << flags::IS_COMPACT)) > 0;
|
910
|
+
const bool empty = (flags_byte & (1 << flags::IS_EMPTY)) > 0;
|
911
|
+
const bool compact = (flags_byte & (1 << flags::IS_COMPACT)) > 0;
|
848
912
|
|
849
|
-
uint8_t sw = (compact ? 1 : 0) + (2 * (empty ? 1 : 0))
|
850
|
-
|
913
|
+
const uint8_t sw = (compact ? 1 : 0) + (2 * (empty ? 1 : 0))
|
914
|
+
+ (4 * (serial_version & 0xF)) + (32 * (preamble_longs & 0x3F));
|
851
915
|
bool valid = true;
|
852
916
|
|
853
917
|
switch (sw) { // exhaustive list and description of all valid cases
|
@@ -888,7 +952,7 @@ typename quantiles_sketch<T, C, A>::const_iterator quantiles_sketch<T, C, A>::en
|
|
888
952
|
|
889
953
|
template<typename T, typename C, typename A>
|
890
954
|
void quantiles_sketch<T, C, A>::grow_base_buffer() {
|
891
|
-
size_t new_size = std::max(std::min(static_cast<size_t>(2 * k_), 2 * base_buffer_.size()), static_cast<size_t>(1));
|
955
|
+
const size_t new_size = std::max(std::min(static_cast<size_t>(2 * k_), 2 * base_buffer_.size()), static_cast<size_t>(1));
|
892
956
|
base_buffer_.reserve(new_size);
|
893
957
|
}
|
894
958
|
|
@@ -912,7 +976,7 @@ void quantiles_sketch<T, C, A>::process_full_base_buffer() {
|
|
912
976
|
|
913
977
|
template<typename T, typename C, typename A>
|
914
978
|
bool quantiles_sketch<T, C, A>::grow_levels_if_needed() {
|
915
|
-
uint8_t levels_needed = compute_levels_needed(k_, n_);
|
979
|
+
const uint8_t levels_needed = compute_levels_needed(k_, n_);
|
916
980
|
if (levels_needed == 0)
|
917
981
|
return false; // don't need levels and might have small base buffer. Possible during merges.
|
918
982
|
|
@@ -992,7 +1056,7 @@ template<typename FwdV>
|
|
992
1056
|
void quantiles_sketch<T, C, A>::zip_buffer_with_stride(FwdV&& buf_in, Level& buf_out, uint16_t stride) {
|
993
1057
|
// Random offset in range [0, stride)
|
994
1058
|
std::uniform_int_distribution<uint16_t> dist(0, stride - 1);
|
995
|
-
uint16_t rand_offset = dist(random_utils::rand);
|
1059
|
+
const uint16_t rand_offset = dist(random_utils::rand);
|
996
1060
|
|
997
1061
|
if ((buf_in.size() != stride * buf_out.capacity())
|
998
1062
|
|| (buf_out.size() > 0)) {
|
@@ -1000,7 +1064,7 @@ void quantiles_sketch<T, C, A>::zip_buffer_with_stride(FwdV&& buf_in, Level& buf
|
|
1000
1064
|
"stride*buf_out.capacity() and empty buf_out");
|
1001
1065
|
}
|
1002
1066
|
|
1003
|
-
size_t k = buf_out.capacity();
|
1067
|
+
const size_t k = buf_out.capacity();
|
1004
1068
|
for (uint16_t i = rand_offset, o = 0; o < k; i += stride, ++o) {
|
1005
1069
|
buf_out.push_back(conditional_forward<FwdV>(buf_in[i]));
|
1006
1070
|
}
|
@@ -1117,7 +1181,7 @@ void quantiles_sketch<T, C, A>::downsampling_merge(quantiles_sketch& tgt, FwdSk&
|
|
1117
1181
|
const uint16_t downsample_factor = src.get_k() / tgt.get_k();
|
1118
1182
|
const uint8_t lg_sample_factor = count_trailing_zeros_in_u32(downsample_factor);
|
1119
1183
|
|
1120
|
-
uint64_t new_n = src.get_n() + tgt.get_n();
|
1184
|
+
const uint64_t new_n = src.get_n() + tgt.get_n();
|
1121
1185
|
|
1122
1186
|
// move items from src's base buffer
|
1123
1187
|
for (uint16_t i = 0; i < src.base_buffer_.size(); ++i) {
|
@@ -1125,7 +1189,7 @@ void quantiles_sketch<T, C, A>::downsampling_merge(quantiles_sketch& tgt, FwdSk&
|
|
1125
1189
|
}
|
1126
1190
|
|
1127
1191
|
// check (after moving raw items) if we need to extend levels array
|
1128
|
-
uint8_t levels_needed = compute_levels_needed(tgt.get_k(), new_n);
|
1192
|
+
const uint8_t levels_needed = compute_levels_needed(tgt.get_k(), new_n);
|
1129
1193
|
if (levels_needed > tgt.levels_.size()) {
|
1130
1194
|
tgt.levels_.reserve(levels_needed);
|
1131
1195
|
while (tgt.levels_.size() < levels_needed) {
|
@@ -82,7 +82,7 @@ TEST_CASE("kolmogorov-smirnov slightly different distributions", "[quantiles_ske
|
|
82
82
|
const double delta = kolmogorov_smirnov::delta(sketch1, sketch2);
|
83
83
|
REQUIRE(delta == Approx(0.02).margin(0.01));
|
84
84
|
const double threshold = kolmogorov_smirnov::threshold(sketch1, sketch2, 0.05);
|
85
|
-
|
85
|
+
|
86
86
|
REQUIRE_FALSE(delta > threshold);
|
87
87
|
REQUIRE_FALSE(kolmogorov_smirnov::test(sketch1, sketch2, 0.05));
|
88
88
|
}
|
@@ -102,7 +102,7 @@ TEST_CASE("kolmogorov-smirnov slightly different distributions high resolution",
|
|
102
102
|
const double delta = kolmogorov_smirnov::delta(sketch1, sketch2);
|
103
103
|
REQUIRE(delta == Approx(0.02).margin(0.01));
|
104
104
|
const double threshold = kolmogorov_smirnov::threshold(sketch1, sketch2, 0.05);
|
105
|
-
|
105
|
+
|
106
106
|
REQUIRE(delta > threshold);
|
107
107
|
REQUIRE(kolmogorov_smirnov::test(sketch1, sketch2, 0.05));
|
108
108
|
}
|
@@ -903,6 +903,69 @@ TEST_CASE("quantiles sketch", "[quantiles_sketch]") {
|
|
903
903
|
}
|
904
904
|
}
|
905
905
|
|
906
|
+
SECTION("Type converting copy constructor") {
|
907
|
+
const uint16_t k = 8;
|
908
|
+
const int n = 403;
|
909
|
+
quantiles_sketch<double> sk_double(k);
|
910
|
+
|
911
|
+
quantiles_sketch<float> sk_float(k, sk_double.get_allocator());
|
912
|
+
REQUIRE(sk_float.is_empty());
|
913
|
+
|
914
|
+
for (int i = 0; i < n; ++i) sk_double.update(i + .01);
|
915
|
+
|
916
|
+
quantiles_sketch<int> sk_int(sk_double);
|
917
|
+
REQUIRE(sk_double.get_n() == sk_int.get_n());
|
918
|
+
REQUIRE(sk_double.get_k() == sk_int.get_k());
|
919
|
+
REQUIRE(sk_double.get_num_retained() == sk_int.get_num_retained());
|
920
|
+
|
921
|
+
auto sv_double = sk_double.get_sorted_view(false);
|
922
|
+
std::vector<std::pair<double, uint64_t>> vec_double(sv_double.begin(), sv_double.end());
|
923
|
+
|
924
|
+
auto sv_int = sk_int.get_sorted_view(false);
|
925
|
+
std::vector<std::pair<int, uint64_t>> vec_int(sv_int.begin(), sv_int.end());
|
926
|
+
|
927
|
+
REQUIRE(vec_double.size() == vec_int.size());
|
928
|
+
|
929
|
+
for (size_t i = 0; i < vec_int.size(); ++i) {
|
930
|
+
// known truncation with conversion so approximate result
|
931
|
+
REQUIRE(vec_double[i].first == Approx(vec_int[i].first).margin(0.1));
|
932
|
+
// exact equality for weights
|
933
|
+
REQUIRE(vec_double[i].second == vec_int[i].second);
|
934
|
+
}
|
935
|
+
}
|
936
|
+
|
937
|
+
class A {
|
938
|
+
int val;
|
939
|
+
public:
|
940
|
+
A(int val): val(val) {}
|
941
|
+
int get_val() const { return val; }
|
942
|
+
};
|
943
|
+
|
944
|
+
struct less_A {
|
945
|
+
bool operator()(const A& a1, const A& a2) const { return a1.get_val() < a2.get_val(); }
|
946
|
+
};
|
947
|
+
|
948
|
+
class B {
|
949
|
+
int val;
|
950
|
+
public:
|
951
|
+
explicit B(const A& a): val(a.get_val()) {}
|
952
|
+
int get_val() const { return val; }
|
953
|
+
};
|
954
|
+
|
955
|
+
struct less_B {
|
956
|
+
bool operator()(const B& b1, const B& b2) const { return b1.get_val() < b2.get_val(); }
|
957
|
+
};
|
958
|
+
|
959
|
+
SECTION("type conversion: custom types") {
|
960
|
+
quantiles_sketch<A, less_A> sa;
|
961
|
+
sa.update(1);
|
962
|
+
sa.update(2);
|
963
|
+
sa.update(3);
|
964
|
+
|
965
|
+
quantiles_sketch<B, less_B> sb(sa);
|
966
|
+
REQUIRE(sb.get_n() == 3);
|
967
|
+
}
|
968
|
+
|
906
969
|
// cleanup
|
907
970
|
if (test_allocator_total_bytes != 0) {
|
908
971
|
REQUIRE(test_allocator_total_bytes == 0);
|
@@ -38,6 +38,9 @@ public:
|
|
38
38
|
req_compactor& operator=(const req_compactor& other);
|
39
39
|
req_compactor& operator=(req_compactor&& other);
|
40
40
|
|
41
|
+
template<typename TT, typename CC, typename AA>
|
42
|
+
req_compactor(const req_compactor<TT, CC, AA>& other, const Allocator& allocator);
|
43
|
+
|
41
44
|
bool is_sorted() const;
|
42
45
|
uint32_t get_num_items() const;
|
43
46
|
uint32_t get_nom_capacity() const;
|
@@ -128,6 +131,9 @@ private:
|
|
128
131
|
template<typename S>
|
129
132
|
static std::pair<std::unique_ptr<T, items_deleter>, size_t> deserialize_items(const void* bytes, size_t size, const S& serde, const Allocator& allocator, uint32_t num);
|
130
133
|
|
134
|
+
// for type converting constructor
|
135
|
+
template<typename TT, typename CC, typename AA>
|
136
|
+
friend class req_compactor;
|
131
137
|
};
|
132
138
|
|
133
139
|
} /* namespace datasketches */
|
@@ -132,6 +132,33 @@ req_compactor<T, C, A>& req_compactor<T, C, A>::operator=(req_compactor&& other)
|
|
132
132
|
return *this;
|
133
133
|
}
|
134
134
|
|
135
|
+
template<typename T, typename C, typename A>
|
136
|
+
template<typename TT, typename CC, typename AA>
|
137
|
+
req_compactor<T, C, A>::req_compactor(const req_compactor<TT, CC, AA>& other, const A& allocator):
|
138
|
+
allocator_(allocator),
|
139
|
+
lg_weight_(other.lg_weight_),
|
140
|
+
hra_(other.hra_),
|
141
|
+
coin_(other.coin_),
|
142
|
+
sorted_(other.sorted_),
|
143
|
+
section_size_raw_(other.section_size_raw_),
|
144
|
+
section_size_(other.section_size_),
|
145
|
+
num_sections_(other.num_sections_),
|
146
|
+
state_(other.state_),
|
147
|
+
num_items_(other.num_items_),
|
148
|
+
capacity_(other.capacity_),
|
149
|
+
items_(nullptr)
|
150
|
+
{
|
151
|
+
if (other.items_ != nullptr) {
|
152
|
+
items_ = allocator_.allocate(capacity_);
|
153
|
+
const uint32_t from = hra_ ? capacity_ - num_items_ : 0;
|
154
|
+
const uint32_t to = hra_ ? capacity_ : num_items_;
|
155
|
+
for (uint32_t i = from; i < to; ++i) new (items_ + i) T(other.items_[i]);
|
156
|
+
if (sorted_ && !std::is_sorted(items_ + from, items_ + to, C())) {
|
157
|
+
throw std::logic_error("items must be sorted");
|
158
|
+
}
|
159
|
+
}
|
160
|
+
}
|
161
|
+
|
135
162
|
template<typename T, typename C, typename A>
|
136
163
|
bool req_compactor<T, C, A>::is_sorted() const {
|
137
164
|
return sorted_;
|
@@ -58,6 +58,14 @@ public:
|
|
58
58
|
req_sketch& operator=(const req_sketch& other);
|
59
59
|
req_sketch& operator=(req_sketch&& other);
|
60
60
|
|
61
|
+
/*
|
62
|
+
* Type converting constructor.
|
63
|
+
* @param other sketch of a different type
|
64
|
+
* @param allocator instance of an Allocator
|
65
|
+
*/
|
66
|
+
template<typename TT, typename CC, typename SS, typename AA>
|
67
|
+
explicit req_sketch(const req_sketch<TT, CC, SS, AA>& other, const Allocator& allocator = Allocator());
|
68
|
+
|
61
69
|
/**
|
62
70
|
* Returns configured parameter K
|
63
71
|
* @return parameter K
|
@@ -408,6 +416,9 @@ private:
|
|
408
416
|
}
|
409
417
|
}
|
410
418
|
|
419
|
+
// for type converting constructor
|
420
|
+
template<typename TT, typename CC, typename SS, typename AA>
|
421
|
+
friend class req_sketch;
|
411
422
|
};
|
412
423
|
|
413
424
|
template<typename T, typename C, typename S, typename A>
|
@@ -64,8 +64,8 @@ compactors_(other.compactors_),
|
|
64
64
|
min_value_(nullptr),
|
65
65
|
max_value_(nullptr)
|
66
66
|
{
|
67
|
-
if (other.min_value_ != nullptr) min_value_ = new (
|
68
|
-
if (other.max_value_ != nullptr) max_value_ = new (
|
67
|
+
if (other.min_value_ != nullptr) min_value_ = new (allocator_.allocate(1)) T(*other.min_value_);
|
68
|
+
if (other.max_value_ != nullptr) max_value_ = new (allocator_.allocate(1)) T(*other.max_value_);
|
69
69
|
}
|
70
70
|
|
71
71
|
template<typename T, typename C, typename S, typename A>
|
@@ -113,6 +113,33 @@ req_sketch<T, C, S, A>& req_sketch<T, C, S, A>::operator=(req_sketch&& other) {
|
|
113
113
|
return *this;
|
114
114
|
}
|
115
115
|
|
116
|
+
template<typename T, typename C, typename S, typename A>
|
117
|
+
template<typename TT, typename CC, typename SS, typename AA>
|
118
|
+
req_sketch<T, C, S, A>::req_sketch(const req_sketch<TT, CC, SS, AA>& other, const A& allocator):
|
119
|
+
allocator_(allocator),
|
120
|
+
k_(other.k_),
|
121
|
+
hra_(other.hra_),
|
122
|
+
max_nom_size_(other.max_nom_size_),
|
123
|
+
num_retained_(other.num_retained_),
|
124
|
+
n_(other.n_),
|
125
|
+
compactors_(allocator),
|
126
|
+
min_value_(nullptr),
|
127
|
+
max_value_(nullptr)
|
128
|
+
{
|
129
|
+
static_assert(
|
130
|
+
std::is_constructible<T, TT>::value,
|
131
|
+
"Type converting constructor requires new type to be constructible from existing type"
|
132
|
+
);
|
133
|
+
compactors_.reserve(other.compactors_.size());
|
134
|
+
for (const auto& compactor: other.compactors_) {
|
135
|
+
compactors_.push_back(req_compactor<T, C, A>(compactor, allocator_));
|
136
|
+
}
|
137
|
+
if (!other.is_empty()) {
|
138
|
+
min_value_ = new (allocator_.allocate(1)) T(other.get_min_value());
|
139
|
+
max_value_ = new (allocator_.allocate(1)) T(other.get_max_value());
|
140
|
+
}
|
141
|
+
}
|
142
|
+
|
116
143
|
template<typename T, typename C, typename S, typename A>
|
117
144
|
uint16_t req_sketch<T, C, S, A>::get_k() const {
|
118
145
|
return k_;
|
@@ -35,7 +35,7 @@ const std::string input_path = "test/";
|
|
35
35
|
#endif
|
36
36
|
|
37
37
|
TEST_CASE("req sketch: empty", "[req_sketch]") {
|
38
|
-
std::cout << "sizeof(req_float_sketch)=" << sizeof(req_sketch<float>) << "\n";
|
38
|
+
//std::cout << "sizeof(req_float_sketch)=" << sizeof(req_sketch<float>) << "\n";
|
39
39
|
req_sketch<float> sketch(12);
|
40
40
|
REQUIRE(sketch.get_k() == 12);
|
41
41
|
REQUIRE(sketch.is_HRA());
|
@@ -245,7 +245,7 @@ TEST_CASE("req sketch: byte serialize-deserialize single item", "[req_sketch]")
|
|
245
245
|
auto bytes = sketch.serialize();
|
246
246
|
REQUIRE(bytes.size() == sketch.get_serialized_size_bytes());
|
247
247
|
auto sketch2 = req_sketch<float>::deserialize(bytes.data(), bytes.size());
|
248
|
-
std::cout << sketch2.to_string(true);
|
248
|
+
//std::cout << sketch2.to_string(true);
|
249
249
|
REQUIRE(bytes.size() == sketch2.get_serialized_size_bytes());
|
250
250
|
REQUIRE(sketch2.is_empty() == sketch.is_empty());
|
251
251
|
REQUIRE(sketch2.is_estimation_mode() == sketch.is_estimation_mode());
|
@@ -282,7 +282,7 @@ TEST_CASE("req sketch: byte serialize-deserialize exact mode", "[req_sketch]") {
|
|
282
282
|
auto bytes = sketch.serialize();
|
283
283
|
REQUIRE(bytes.size() == sketch.get_serialized_size_bytes());
|
284
284
|
auto sketch2 = req_sketch<float>::deserialize(bytes.data(), bytes.size());
|
285
|
-
std::cout << sketch2.to_string(true);
|
285
|
+
//std::cout << sketch2.to_string(true);
|
286
286
|
REQUIRE(bytes.size() == sketch2.get_serialized_size_bytes());
|
287
287
|
REQUIRE(sketch2.is_empty() == sketch.is_empty());
|
288
288
|
REQUIRE(sketch2.is_estimation_mode() == sketch.is_estimation_mode());
|
@@ -485,6 +485,72 @@ TEST_CASE("req sketch: merge incompatible HRA and LRA", "[req_sketch]") {
|
|
485
485
|
REQUIRE_THROWS_AS(sketch1.merge(sketch2), std::invalid_argument);
|
486
486
|
}
|
487
487
|
|
488
|
+
TEST_CASE("req sketch: type conversion - empty", "[req_sketch]") {
|
489
|
+
req_sketch<double> req_double(12);
|
490
|
+
req_sketch<float> req_float(req_double);
|
491
|
+
REQUIRE(req_float.is_empty());
|
492
|
+
REQUIRE(req_float.get_k() == req_double.get_k());
|
493
|
+
REQUIRE(req_float.get_n() == 0);
|
494
|
+
REQUIRE(req_float.get_num_retained() == 0);
|
495
|
+
}
|
496
|
+
|
497
|
+
TEST_CASE("req sketch: type conversion - several levels", "[req_sketch]") {
|
498
|
+
req_sketch<double> req_double(12);
|
499
|
+
for (int i = 0; i < 1000; ++i) req_double.update(static_cast<double>(i));
|
500
|
+
req_sketch<float> req_float(req_double);
|
501
|
+
REQUIRE(!req_float.is_empty());
|
502
|
+
REQUIRE(req_float.get_k() == req_double.get_k());
|
503
|
+
REQUIRE(req_float.get_n() == req_double.get_n());
|
504
|
+
REQUIRE(req_float.get_num_retained() == req_double.get_num_retained());
|
505
|
+
|
506
|
+
auto sv_float = req_float.get_sorted_view(false);
|
507
|
+
auto sv_double = req_double.get_sorted_view(false);
|
508
|
+
auto sv_float_it = sv_float.begin();
|
509
|
+
auto sv_double_it = sv_double.begin();
|
510
|
+
while (sv_float_it != sv_float.end()) {
|
511
|
+
REQUIRE(sv_double_it != sv_double.end());
|
512
|
+
auto float_pair = *sv_float_it;
|
513
|
+
auto double_pair = *sv_double_it;
|
514
|
+
REQUIRE(float_pair.first == Approx(double_pair.first).margin(0.01));
|
515
|
+
REQUIRE(float_pair.second == double_pair.second);
|
516
|
+
++sv_float_it;
|
517
|
+
++sv_double_it;
|
518
|
+
}
|
519
|
+
REQUIRE(sv_double_it == sv_double.end());
|
520
|
+
}
|
521
|
+
|
522
|
+
class A {
|
523
|
+
int val;
|
524
|
+
public:
|
525
|
+
A(int val): val(val) {}
|
526
|
+
int get_val() const { return val; }
|
527
|
+
};
|
528
|
+
|
529
|
+
struct less_A {
|
530
|
+
bool operator()(const A& a1, const A& a2) const { return a1.get_val() < a2.get_val(); }
|
531
|
+
};
|
532
|
+
|
533
|
+
class B {
|
534
|
+
int val;
|
535
|
+
public:
|
536
|
+
explicit B(const A& a): val(a.get_val()) {}
|
537
|
+
int get_val() const { return val; }
|
538
|
+
};
|
539
|
+
|
540
|
+
struct less_B {
|
541
|
+
bool operator()(const B& b1, const B& b2) const { return b1.get_val() < b2.get_val(); }
|
542
|
+
};
|
543
|
+
|
544
|
+
TEST_CASE("req sketch: type conversion - custom types") {
|
545
|
+
req_sketch<A, less_A> sa(4);
|
546
|
+
sa.update(1);
|
547
|
+
sa.update(2);
|
548
|
+
sa.update(3);
|
549
|
+
|
550
|
+
req_sketch<B, less_B> sb(sa);
|
551
|
+
REQUIRE(sb.get_n() == 3);
|
552
|
+
}
|
553
|
+
|
488
554
|
//TEST_CASE("for manual comparison with Java") {
|
489
555
|
// req_sketch<float> sketch(12, false);
|
490
556
|
// for (size_t i = 0; i < 100000; ++i) sketch.update(i);
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: datasketches
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.6
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Andrew Kane
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2022-
|
11
|
+
date: 2022-07-13 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rice
|