datasketches 0.2.5 → 0.2.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +4 -0
- data/ext/datasketches/kll_wrapper.cpp +2 -2
- data/lib/datasketches/version.rb +1 -1
- data/vendor/datasketches-cpp/CMakeLists.txt +1 -1
- data/vendor/datasketches-cpp/NOTICE +6 -5
- data/vendor/datasketches-cpp/common/CMakeLists.txt +2 -2
- data/vendor/datasketches-cpp/cpc/include/cpc_compressor_impl.hpp +2 -1
- data/vendor/datasketches-cpp/kll/include/kll_helper_impl.hpp +2 -2
- data/vendor/datasketches-cpp/kll/include/kll_sketch.hpp +22 -9
- data/vendor/datasketches-cpp/kll/include/kll_sketch_impl.hpp +47 -9
- data/vendor/datasketches-cpp/kll/test/kll_sketch_test.cpp +71 -6
- data/vendor/datasketches-cpp/python/README.md +6 -9
- data/vendor/datasketches-cpp/quantiles/include/quantiles_sketch.hpp +15 -0
- data/vendor/datasketches-cpp/quantiles/include/quantiles_sketch_impl.hpp +78 -14
- data/vendor/datasketches-cpp/quantiles/test/kolmogorov_smirnov_test.cpp +2 -2
- data/vendor/datasketches-cpp/quantiles/test/quantiles_sketch_test.cpp +63 -0
- data/vendor/datasketches-cpp/req/include/req_compactor.hpp +6 -0
- data/vendor/datasketches-cpp/req/include/req_compactor_impl.hpp +27 -0
- data/vendor/datasketches-cpp/req/include/req_sketch.hpp +11 -0
- data/vendor/datasketches-cpp/req/include/req_sketch_impl.hpp +29 -2
- data/vendor/datasketches-cpp/req/test/req_sketch_test.cpp +69 -3
- data/vendor/datasketches-cpp/setup.py +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: cf1ea0f9f2d12b0e46c2d4c7dec21f41992e711e73eca68ea1ef03a4bb711077
|
4
|
+
data.tar.gz: 92f56b63da0254962be47d8d3e00a6950a271053bf3152167f95e6fdb99528e6
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 5841d4a70f1e852faa150f57ebfefc7b975de020782c41eebdad87a01d016be9bdf86f86173600632bf6f56300df0c9c4196251aa5df02a47ecd357ac844ef80
|
7
|
+
data.tar.gz: d6ae7c811e0e2c2008b912e29f86d1b99491c74cd878790dfd800811a007f0dbf9c49bb59db30345450ff82673381f2c036a84a57dc44a6f6751610d9be2ee88
|
data/CHANGELOG.md
CHANGED
@@ -55,12 +55,12 @@ void bind_kll_sketch(Rice::Module& m, const char* name) {
|
|
55
55
|
})
|
56
56
|
.define_method(
|
57
57
|
"pmf",
|
58
|
-
[](kll_sketch<T>& self, std::vector<T
|
58
|
+
[](kll_sketch<T>& self, const std::vector<T>& split_points) {
|
59
59
|
return self.get_PMF(&split_points[0], split_points.size());
|
60
60
|
})
|
61
61
|
.define_method(
|
62
62
|
"cdf",
|
63
|
-
[](kll_sketch<T>& self, std::vector<T
|
63
|
+
[](kll_sketch<T>& self, const std::vector<T>& split_points) {
|
64
64
|
return self.get_CDF(&split_points[0], split_points.size());
|
65
65
|
})
|
66
66
|
.define_method(
|
data/lib/datasketches/version.rb
CHANGED
@@ -1,11 +1,12 @@
|
|
1
|
-
Apache DataSketches
|
2
|
-
Copyright
|
1
|
+
Apache DataSketches C++ and Python
|
2
|
+
Copyright 2022 The Apache Software Foundation
|
3
3
|
|
4
|
-
Copyright 2015-2018 Yahoo
|
5
|
-
Copyright 2019 Verizon Media
|
4
|
+
Copyright 2015-2018 Yahoo Inc.
|
5
|
+
Copyright 2019-2020 Verizon Media
|
6
|
+
Copyright 2021 Yahoo Inc.
|
6
7
|
|
7
8
|
This product includes software developed at
|
8
9
|
The Apache Software Foundation (http://www.apache.org/).
|
9
10
|
|
10
11
|
Prior to moving to ASF, the software for this project was developed at
|
11
|
-
Yahoo
|
12
|
+
Yahoo Inc. (https://developer.yahoo.com).
|
@@ -43,8 +43,8 @@ install(FILES
|
|
43
43
|
include/conditional_forward.hpp
|
44
44
|
include/ceiling_power_of_2.hpp
|
45
45
|
include/bounds_binomial_proportions.hpp
|
46
|
-
include/kolmogorov_smirnov.hpp
|
47
|
-
include/kolmogorov_smirnov_impl.hpp
|
48
46
|
include/quantile_sketch_sorted_view.hpp
|
49
47
|
include/quantile_sketch_sorted_view_impl.hpp
|
48
|
+
include/kolmogorov_smirnov.hpp
|
49
|
+
include/kolmogorov_smirnov_impl.hpp
|
50
50
|
DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/DataSketches")
|
@@ -297,6 +297,7 @@ void cpc_compressor<A>::compress_sliding_flavor(const cpc_sketch_alloc<A>& sourc
|
|
297
297
|
// changes the implied ordering of the pairs, so we must do it before sorting.
|
298
298
|
|
299
299
|
const uint8_t pseudo_phase = determine_pseudo_phase(source.get_lg_k(), source.get_num_coupons());
|
300
|
+
if (pseudo_phase >= 16) throw std::logic_error("unexpected pseudo phase for sliding flavor");
|
300
301
|
const uint8_t* permutation = column_permutations_for_encoding[pseudo_phase];
|
301
302
|
|
302
303
|
const uint8_t offset = source.window_offset;
|
@@ -333,7 +334,7 @@ void cpc_compressor<A>::uncompress_sliding_flavor(const compressed_state<A>& sou
|
|
333
334
|
lg_k, source.table_data.get_allocator());
|
334
335
|
|
335
336
|
const uint8_t pseudo_phase = determine_pseudo_phase(lg_k, num_coupons);
|
336
|
-
if (pseudo_phase >= 16) throw std::logic_error("pseudo phase
|
337
|
+
if (pseudo_phase >= 16) throw std::logic_error("unexpected pseudo phase for sliding flavor");
|
337
338
|
const uint8_t* permutation = column_permutations_for_decoding[pseudo_phase];
|
338
339
|
|
339
340
|
uint8_t offset = cpc_sketch_alloc<A>::determine_correct_offset(lg_k, num_coupons);
|
@@ -230,7 +230,7 @@ kll_helper::compress_result kll_helper::general_compress(uint16_t k, uint8_t m,
|
|
230
230
|
// move level over as is
|
231
231
|
// make sure we are not moving data upwards
|
232
232
|
if (raw_beg < out_levels[current_level]) throw std::logic_error("wrong move");
|
233
|
-
std::move(
|
233
|
+
std::move(items + raw_beg, items + raw_lim, items + out_levels[current_level]);
|
234
234
|
out_levels[current_level + 1] = out_levels[current_level] + raw_pop;
|
235
235
|
} else {
|
236
236
|
// The sketch is too full AND this level is too full, so we compact it
|
@@ -251,7 +251,7 @@ kll_helper::compress_result kll_helper::general_compress(uint16_t k, uint8_t m,
|
|
251
251
|
|
252
252
|
// level zero might not be sorted, so we must sort it if we wish to compact it
|
253
253
|
if ((current_level == 0) && !is_level_zero_sorted) {
|
254
|
-
std::sort(
|
254
|
+
std::sort(items + adj_beg, items + adj_beg + adj_pop, C());
|
255
255
|
}
|
256
256
|
|
257
257
|
if (pop_above == 0) { // Level above is empty, so halve up
|
@@ -170,7 +170,7 @@ class kll_sketch {
|
|
170
170
|
using comparator = C;
|
171
171
|
|
172
172
|
static const uint8_t DEFAULT_M = 8;
|
173
|
-
// TODO: Redundant and deprecated. Will be
|
173
|
+
// TODO: Redundant and deprecated. Will be removed in next major version.
|
174
174
|
static const uint16_t DEFAULT_K = kll_constants::DEFAULT_K;
|
175
175
|
static const uint16_t MIN_K = DEFAULT_M;
|
176
176
|
static const uint16_t MAX_K = (1 << 16) - 1;
|
@@ -182,6 +182,14 @@ class kll_sketch {
|
|
182
182
|
kll_sketch& operator=(const kll_sketch& other);
|
183
183
|
kll_sketch& operator=(kll_sketch&& other);
|
184
184
|
|
185
|
+
/*
|
186
|
+
* Type converting constructor.
|
187
|
+
* @param other sketch of a different type
|
188
|
+
* @param allocator instance of an Allocator
|
189
|
+
*/
|
190
|
+
template<typename TT, typename CC, typename SS, typename AA>
|
191
|
+
explicit kll_sketch(const kll_sketch<TT, CC, SS, AA>& other, const A& allocator = A());
|
192
|
+
|
185
193
|
/**
|
186
194
|
* Updates this sketch with the given data item.
|
187
195
|
* @param value an item from a stream of items
|
@@ -390,7 +398,7 @@ class kll_sketch {
|
|
390
398
|
/**
|
391
399
|
* Computes size needed to serialize the current state of the sketch.
|
392
400
|
* This version is for fixed-size arithmetic types (integral and floating point).
|
393
|
-
* @param instance of a SerDe
|
401
|
+
* @param serde instance of a SerDe
|
394
402
|
* @return size in bytes needed to serialize this sketch
|
395
403
|
*/
|
396
404
|
template<typename TT = T, typename SerDe = S, typename std::enable_if<std::is_arithmetic<TT>::value, int>::type = 0>
|
@@ -399,7 +407,7 @@ class kll_sketch {
|
|
399
407
|
/**
|
400
408
|
* Computes size needed to serialize the current state of the sketch.
|
401
409
|
* This version is for all other types and can be expensive since every item needs to be looked at.
|
402
|
-
* @param instance of a SerDe
|
410
|
+
* @param serde instance of a SerDe
|
403
411
|
* @return size in bytes needed to serialize this sketch
|
404
412
|
*/
|
405
413
|
template<typename TT = T, typename SerDe = S, typename std::enable_if<!std::is_arithmetic<TT>::value, int>::type = 0>
|
@@ -459,7 +467,7 @@ class kll_sketch {
|
|
459
467
|
/**
|
460
468
|
* This method deserializes a sketch from a given stream.
|
461
469
|
* @param is input stream
|
462
|
-
* @param instance of an Allocator
|
470
|
+
* @param allocator instance of an Allocator
|
463
471
|
* @return an instance of a sketch
|
464
472
|
*
|
465
473
|
* Deprecated, to be removed in the next major version
|
@@ -469,8 +477,8 @@ class kll_sketch {
|
|
469
477
|
/**
|
470
478
|
* This method deserializes a sketch from a given stream.
|
471
479
|
* @param is input stream
|
472
|
-
* @param instance of a SerDe
|
473
|
-
* @param instance of an Allocator
|
480
|
+
* @param serde instance of a SerDe
|
481
|
+
* @param allocator instance of an Allocator
|
474
482
|
* @return an instance of a sketch
|
475
483
|
*/
|
476
484
|
template<typename SerDe = S>
|
@@ -480,7 +488,7 @@ class kll_sketch {
|
|
480
488
|
* This method deserializes a sketch from a given array of bytes.
|
481
489
|
* @param bytes pointer to the array of bytes
|
482
490
|
* @param size the size of the array
|
483
|
-
* @param instance of an Allocator
|
491
|
+
* @param allocator instance of an Allocator
|
484
492
|
* @return an instance of a sketch
|
485
493
|
*
|
486
494
|
* Deprecated, to be removed in the next major version
|
@@ -491,8 +499,8 @@ class kll_sketch {
|
|
491
499
|
* This method deserializes a sketch from a given array of bytes.
|
492
500
|
* @param bytes pointer to the array of bytes
|
493
501
|
* @param size the size of the array
|
494
|
-
* @param instance of a SerDe
|
495
|
-
* @param instance of an Allocator
|
502
|
+
* @param serde instance of a SerDe
|
503
|
+
* @param allocator instance of an Allocator
|
496
504
|
* @return an instance of a sketch
|
497
505
|
*/
|
498
506
|
template<typename SerDe = S>
|
@@ -606,6 +614,8 @@ class kll_sketch {
|
|
606
614
|
static void check_serial_version(uint8_t serial_version);
|
607
615
|
static void check_family_id(uint8_t family_id);
|
608
616
|
|
617
|
+
void check_sorting() const;
|
618
|
+
|
609
619
|
// implementations for floating point types
|
610
620
|
template<typename TT = T, typename std::enable_if<std::is_floating_point<TT>::value, int>::type = 0>
|
611
621
|
static const TT& get_invalid_value() {
|
@@ -629,6 +639,9 @@ class kll_sketch {
|
|
629
639
|
return true;
|
630
640
|
}
|
631
641
|
|
642
|
+
// for type converting constructor
|
643
|
+
template<typename TT, typename CC, typename SS, typename AA>
|
644
|
+
friend class kll_sketch;
|
632
645
|
};
|
633
646
|
|
634
647
|
template<typename T, typename C, typename S, typename A>
|
@@ -26,6 +26,7 @@
|
|
26
26
|
#include <stdexcept>
|
27
27
|
|
28
28
|
#include "conditional_forward.hpp"
|
29
|
+
#include "count_zeros.hpp"
|
29
30
|
#include "memory_operations.hpp"
|
30
31
|
#include "kll_helper.hpp"
|
31
32
|
|
@@ -69,7 +70,7 @@ max_value_(nullptr),
|
|
69
70
|
is_level_zero_sorted_(other.is_level_zero_sorted_)
|
70
71
|
{
|
71
72
|
items_ = allocator_.allocate(items_size_);
|
72
|
-
|
73
|
+
for (auto i = levels_[0]; i < levels_[num_levels_]; ++i) new (&items_[i]) T(other.items_[i]);
|
73
74
|
if (other.min_value_ != nullptr) min_value_ = new (allocator_.allocate(1)) T(*other.min_value_);
|
74
75
|
if (other.max_value_ != nullptr) max_value_ = new (allocator_.allocate(1)) T(*other.max_value_);
|
75
76
|
}
|
@@ -147,6 +148,33 @@ kll_sketch<T, C, S, A>::~kll_sketch() {
|
|
147
148
|
}
|
148
149
|
}
|
149
150
|
|
151
|
+
template<typename T, typename C, typename S, typename A>
|
152
|
+
template<typename TT, typename CC, typename SS, typename AA>
|
153
|
+
kll_sketch<T, C, S, A>::kll_sketch(const kll_sketch<TT, CC, SS, AA>& other, const A& allocator):
|
154
|
+
allocator_(allocator),
|
155
|
+
k_(other.k_),
|
156
|
+
m_(other.m_),
|
157
|
+
min_k_(other.min_k_),
|
158
|
+
n_(other.n_),
|
159
|
+
num_levels_(other.num_levels_),
|
160
|
+
levels_(other.levels_, allocator_),
|
161
|
+
items_(nullptr),
|
162
|
+
items_size_(other.items_size_),
|
163
|
+
min_value_(nullptr),
|
164
|
+
max_value_(nullptr),
|
165
|
+
is_level_zero_sorted_(other.is_level_zero_sorted_)
|
166
|
+
{
|
167
|
+
static_assert(
|
168
|
+
std::is_constructible<T, TT>::value,
|
169
|
+
"Type converting constructor requires new type to be constructible from existing type"
|
170
|
+
);
|
171
|
+
items_ = allocator_.allocate(items_size_);
|
172
|
+
for (auto i = levels_[0]; i < levels_[num_levels_]; ++i) new (&items_[i]) T(other.items_[i]);
|
173
|
+
if (other.min_value_ != nullptr) min_value_ = new (allocator_.allocate(1)) T(*other.min_value_);
|
174
|
+
if (other.max_value_ != nullptr) max_value_ = new (allocator_.allocate(1)) T(*other.max_value_);
|
175
|
+
check_sorting();
|
176
|
+
}
|
177
|
+
|
150
178
|
template<typename T, typename C, typename S, typename A>
|
151
179
|
template<typename FwdT>
|
152
180
|
void kll_sketch<T, C, S, A>::update(FwdT&& value) {
|
@@ -305,8 +333,8 @@ double kll_sketch<T, C, S, A>::get_rank(const T& value) const {
|
|
305
333
|
uint64_t weight = 1;
|
306
334
|
uint64_t total = 0;
|
307
335
|
while (level < num_levels_) {
|
308
|
-
const auto from_index
|
309
|
-
const auto to_index
|
336
|
+
const auto from_index = levels_[level];
|
337
|
+
const auto to_index = levels_[level + 1]; // exclusive
|
310
338
|
for (uint32_t i = from_index; i < to_index; i++) {
|
311
339
|
if (inclusive ? !C()(value, items_[i]) : C()(items_[i], value)) {
|
312
340
|
total += weight;
|
@@ -694,7 +722,7 @@ void kll_sketch<T, C, S, A>::compress_while_updating(void) {
|
|
694
722
|
// level zero might not be sorted, so we must sort it if we wish to compact it
|
695
723
|
// sort_level_zero() is not used here because of the adjustment for odd number of items
|
696
724
|
if ((level == 0) && !is_level_zero_sorted_) {
|
697
|
-
std::sort(
|
725
|
+
std::sort(items_ + adj_beg, items_ + adj_beg + adj_pop, C());
|
698
726
|
}
|
699
727
|
if (pop_above == 0) {
|
700
728
|
kll_helper::randomly_halve_up(items_, adj_beg, adj_pop);
|
@@ -717,7 +745,7 @@ void kll_sketch<T, C, S, A>::compress_while_updating(void) {
|
|
717
745
|
// so that the freed-up space can be used by level zero
|
718
746
|
if (level > 0) {
|
719
747
|
const uint32_t amount = raw_beg - levels_[0];
|
720
|
-
std::move_backward(
|
748
|
+
std::move_backward(items_ + levels_[0], items_ + levels_[0] + amount, items_ + levels_[0] + half_adj_pop + amount);
|
721
749
|
for (uint8_t lvl = 0; lvl < level; lvl++) levels_[lvl] += half_adj_pop;
|
722
750
|
}
|
723
751
|
for (uint32_t i = 0; i < half_adj_pop; i++) items_[i + destroy_beg].~T();
|
@@ -775,22 +803,32 @@ void kll_sketch<T, C, S, A>::add_empty_top_level_to_completely_full_sketch() {
|
|
775
803
|
template<typename T, typename C, typename S, typename A>
|
776
804
|
void kll_sketch<T, C, S, A>::sort_level_zero() {
|
777
805
|
if (!is_level_zero_sorted_) {
|
778
|
-
std::sort(
|
806
|
+
std::sort(items_ + levels_[0], items_ + levels_[1], C());
|
779
807
|
is_level_zero_sorted_ = true;
|
780
808
|
}
|
781
809
|
}
|
782
810
|
|
811
|
+
template<typename T, typename C, typename S, typename A>
|
812
|
+
void kll_sketch<T, C, S, A>::check_sorting() const {
|
813
|
+
// not checking level 0
|
814
|
+
for (uint8_t level = 1; level < num_levels_; ++level) {
|
815
|
+
const auto from = items_ + levels_[level];
|
816
|
+
const auto to = items_ + levels_[level + 1];
|
817
|
+
if (!std::is_sorted(from, to, C())) {
|
818
|
+
throw std::logic_error("levels must be sorted");
|
819
|
+
}
|
820
|
+
}
|
821
|
+
}
|
822
|
+
|
783
823
|
template<typename T, typename C, typename S, typename A>
|
784
824
|
template<bool inclusive>
|
785
825
|
quantile_sketch_sorted_view<T, C, A> kll_sketch<T, C, S, A>::get_sorted_view(bool cumulative) const {
|
786
826
|
const_cast<kll_sketch*>(this)->sort_level_zero(); // allow this side effect
|
787
827
|
quantile_sketch_sorted_view<T, C, A> view(get_num_retained(), allocator_);
|
788
|
-
uint8_t level = 0;
|
789
|
-
while (level < num_levels_) {
|
828
|
+
for (uint8_t level = 0; level < num_levels_; ++level) {
|
790
829
|
const auto from = items_ + levels_[level];
|
791
830
|
const auto to = items_ + levels_[level + 1]; // exclusive
|
792
831
|
view.add(from, to, 1 << level);
|
793
|
-
++level;
|
794
832
|
}
|
795
833
|
if (cumulative) view.template convert_to_cummulative<inclusive>();
|
796
834
|
return view;
|
@@ -39,9 +39,9 @@ static std::string testBinaryInputPath = "test/";
|
|
39
39
|
#endif
|
40
40
|
|
41
41
|
// typical usage would be just kll_sketch<float> or kll_sketch<std::string>, but here we use test_allocator
|
42
|
-
|
42
|
+
using kll_float_sketch = kll_sketch<float, std::less<float>, serde<float>, test_allocator<float>>;
|
43
43
|
// let std::string use the default allocator for simplicity, otherwise we need to define "less" and "serde"
|
44
|
-
|
44
|
+
using kll_string_sketch = kll_sketch<std::string, std::less<std::string>, serde<std::string>, test_allocator<std::string>>;
|
45
45
|
|
46
46
|
TEST_CASE("kll sketch", "[kll_sketch]") {
|
47
47
|
|
@@ -75,7 +75,7 @@ TEST_CASE("kll sketch", "[kll_sketch]") {
|
|
75
75
|
(void) it; // to suppress "unused" warning
|
76
76
|
FAIL("should be no iterations over an empty sketch");
|
77
77
|
}
|
78
|
-
}
|
78
|
+
}
|
79
79
|
|
80
80
|
SECTION("get bad quantile") {
|
81
81
|
kll_float_sketch sketch(200, 0);
|
@@ -835,10 +835,75 @@ TEST_CASE("kll sketch", "[kll_sketch]") {
|
|
835
835
|
REQUIRE((*it).second == 3);
|
836
836
|
}
|
837
837
|
}
|
838
|
-
|
839
|
-
|
840
|
-
|
838
|
+
|
839
|
+
SECTION("type conversion: empty") {
|
840
|
+
kll_sketch<double> kll_double;
|
841
|
+
kll_sketch<float> kll_float(kll_double);
|
842
|
+
REQUIRE(kll_float.is_empty());
|
843
|
+
REQUIRE(kll_float.get_k() == kll_double.get_k());
|
844
|
+
REQUIRE(kll_float.get_n() == 0);
|
845
|
+
REQUIRE(kll_float.get_num_retained() == 0);
|
846
|
+
}
|
847
|
+
|
848
|
+
SECTION("type conversion: over k") {
|
849
|
+
kll_sketch<double> kll_double;
|
850
|
+
for (int i = 0; i < 1000; ++i) kll_double.update(static_cast<double>(i));
|
851
|
+
kll_sketch<float> kll_float(kll_double);
|
852
|
+
REQUIRE(!kll_float.is_empty());
|
853
|
+
REQUIRE(kll_float.get_k() == kll_double.get_k());
|
854
|
+
REQUIRE(kll_float.get_n() == kll_double.get_n());
|
855
|
+
REQUIRE(kll_float.get_num_retained() == kll_double.get_num_retained());
|
856
|
+
|
857
|
+
auto sv_float = kll_float.get_sorted_view(false);
|
858
|
+
auto sv_double = kll_double.get_sorted_view(false);
|
859
|
+
auto sv_float_it = sv_float.begin();
|
860
|
+
auto sv_double_it = sv_double.begin();
|
861
|
+
while (sv_float_it != sv_float.end()) {
|
862
|
+
REQUIRE(sv_double_it != sv_double.end());
|
863
|
+
auto float_pair = *sv_float_it;
|
864
|
+
auto double_pair = *sv_double_it;
|
865
|
+
REQUIRE(float_pair.first == Approx(double_pair.first).margin(0.01));
|
866
|
+
REQUIRE(float_pair.second == double_pair.second);
|
867
|
+
++sv_float_it;
|
868
|
+
++sv_double_it;
|
869
|
+
}
|
870
|
+
REQUIRE(sv_double_it == sv_double.end());
|
871
|
+
}
|
872
|
+
|
873
|
+
class A {
|
874
|
+
int val;
|
875
|
+
public:
|
876
|
+
A(int val): val(val) {}
|
877
|
+
int get_val() const { return val; }
|
878
|
+
};
|
879
|
+
|
880
|
+
struct less_A {
|
881
|
+
bool operator()(const A& a1, const A& a2) const { return a1.get_val() < a2.get_val(); }
|
882
|
+
};
|
883
|
+
|
884
|
+
class B {
|
885
|
+
int val;
|
886
|
+
public:
|
887
|
+
explicit B(const A& a): val(a.get_val()) {}
|
888
|
+
int get_val() const { return val; }
|
889
|
+
};
|
890
|
+
|
891
|
+
struct less_B {
|
892
|
+
bool operator()(const B& b1, const B& b2) const { return b1.get_val() < b2.get_val(); }
|
893
|
+
};
|
894
|
+
|
895
|
+
SECTION("type conversion: custom types") {
|
896
|
+
kll_sketch<A, less_A> sa;
|
897
|
+
sa.update(1);
|
898
|
+
sa.update(2);
|
899
|
+
sa.update(3);
|
900
|
+
|
901
|
+
kll_sketch<B, less_B> sb(sa);
|
902
|
+
REQUIRE(sb.get_n() == 3);
|
841
903
|
}
|
904
|
+
|
905
|
+
// cleanup
|
906
|
+
REQUIRE(test_allocator_total_bytes == 0);
|
842
907
|
}
|
843
908
|
|
844
909
|
} /* namespace datasketches */
|
@@ -12,16 +12,18 @@ This package provides a variety of sketches as described below. Wherever a speci
|
|
12
12
|
|
13
13
|
## Building and Installation
|
14
14
|
|
15
|
-
Once cloned, the library can be installed by running `
|
15
|
+
Once cloned, the library can be installed by running `python3 -m pip install .` in the project root directory -- not the python subdirectory -- which will also install the necessary dependencies, namely numpy and [pybind11[global]](https://github.com/pybind/pybind11).
|
16
16
|
|
17
|
-
If you prefer to call the `setup.py` build script directly, you must first install `pybind11[global]`, as well as any other dependencies listed under the build-system section in `pyproject.toml`.
|
17
|
+
If you prefer to call the `setup.py` build script directly, which is discoraged, you must first install `pybind11[global]`, as well as any other dependencies listed under the build-system section in `pyproject.toml`.
|
18
18
|
|
19
|
-
The library is also available from PyPI via `
|
19
|
+
The library is also available from PyPI via `python3 -m pip install datasketches`.
|
20
20
|
|
21
21
|
## Usage
|
22
22
|
|
23
23
|
Having installed the library, loading the Apache Datasketches Library in Python is simple: `import datasketches`.
|
24
24
|
|
25
|
+
The unit tests are mostly structured in a tutorial style and can be used as a reference example for how to feed data into and query the different types of sketches.
|
26
|
+
|
25
27
|
## Available Sketch Classes
|
26
28
|
|
27
29
|
- KLL (Absolute Error Quantiles)
|
@@ -74,12 +76,7 @@ The only developer-specific instructions relate to running unit tests.
|
|
74
76
|
|
75
77
|
### Unit tests
|
76
78
|
|
77
|
-
The Python unit tests are run
|
78
|
-
|
79
|
-
```bash
|
80
|
-
python -m pip install --upgrade tox
|
81
|
-
tox
|
82
|
-
```
|
79
|
+
The Python unit tests are run via `tox`, with no arguments, from the project root directory -- not the python subdirectory. Tox creates a temporary virtual environment in which to build and run teh unit tests. In the event you are missing the necessary pacakge, tox may be installed with `python3 -m pip install --upgrade tox`.
|
83
80
|
|
84
81
|
## License
|
85
82
|
|
@@ -151,6 +151,7 @@ template <typename T,
|
|
151
151
|
class quantiles_sketch {
|
152
152
|
public:
|
153
153
|
using value_type = T;
|
154
|
+
using allocator_type = Allocator;
|
154
155
|
using comparator = Comparator;
|
155
156
|
using vector_double = std::vector<double, typename std::allocator_traits<Allocator>::template rebind_alloc<double>>;
|
156
157
|
|
@@ -161,6 +162,14 @@ public:
|
|
161
162
|
quantiles_sketch& operator=(const quantiles_sketch& other);
|
162
163
|
quantiles_sketch& operator=(quantiles_sketch&& other) noexcept;
|
163
164
|
|
165
|
+
/**
|
166
|
+
* @brief Type converting constructor
|
167
|
+
* @param other quantiles sketch of a different type
|
168
|
+
* @param allocator instance of an Allocator
|
169
|
+
*/
|
170
|
+
template<typename From, typename FC, typename FA>
|
171
|
+
explicit quantiles_sketch(const quantiles_sketch<From, FC, FA>& other, const Allocator& allocator = Allocator());
|
172
|
+
|
164
173
|
/**
|
165
174
|
* Updates this sketch with the given data item.
|
166
175
|
* @param value an item from a stream of items
|
@@ -227,6 +236,12 @@ public:
|
|
227
236
|
*/
|
228
237
|
Comparator get_comparator() const;
|
229
238
|
|
239
|
+
/**
|
240
|
+
* Returns the allocator for this sketch.
|
241
|
+
* @return allocator
|
242
|
+
*/
|
243
|
+
allocator_type get_allocator() const;
|
244
|
+
|
230
245
|
/**
|
231
246
|
* Returns an approximation to the value of the data item
|
232
247
|
* that would be preceded by the given fraction of a hypothetical sorted
|
@@ -138,6 +138,65 @@ is_sorted_(is_sorted)
|
|
138
138
|
throw std::logic_error("Item count does not match value computed from k, n");
|
139
139
|
}
|
140
140
|
|
141
|
+
template<typename T, typename C, typename A>
|
142
|
+
template<typename From, typename FC, typename FA>
|
143
|
+
quantiles_sketch<T, C, A>::quantiles_sketch(const quantiles_sketch<From, FC, FA>& other, const A& allocator) :
|
144
|
+
allocator_(allocator),
|
145
|
+
k_(other.get_k()),
|
146
|
+
n_(other.get_n()),
|
147
|
+
bit_pattern_(compute_bit_pattern(other.get_k(), other.get_n())),
|
148
|
+
base_buffer_(allocator),
|
149
|
+
levels_(allocator),
|
150
|
+
min_value_(nullptr),
|
151
|
+
max_value_(nullptr),
|
152
|
+
is_sorted_(false)
|
153
|
+
{
|
154
|
+
static_assert(std::is_constructible<T, From>::value,
|
155
|
+
"Type converting constructor requires new type to be constructible from existing type");
|
156
|
+
|
157
|
+
base_buffer_.reserve(2 * std::min(quantiles_constants::MIN_K, k_));
|
158
|
+
|
159
|
+
if (!other.is_empty()) {
|
160
|
+
min_value_ = new (allocator_.allocate(1)) T(other.get_min_value());
|
161
|
+
max_value_ = new (allocator_.allocate(1)) T(other.get_max_value());
|
162
|
+
|
163
|
+
// reserve space in levels
|
164
|
+
const uint8_t num_levels = compute_levels_needed(k_, n_);
|
165
|
+
levels_.reserve(num_levels);
|
166
|
+
for (int i = 0; i < num_levels; ++i) {
|
167
|
+
Level level(allocator);
|
168
|
+
level.reserve(k_);
|
169
|
+
levels_.push_back(std::move(level));
|
170
|
+
}
|
171
|
+
|
172
|
+
// iterate through points, assigning to the correct level as needed
|
173
|
+
for (auto pair : other) {
|
174
|
+
const uint64_t wt = pair.second;
|
175
|
+
if (wt == 1) {
|
176
|
+
base_buffer_.push_back(T(pair.first));
|
177
|
+
// resize where needed as if adding points via update()
|
178
|
+
if (base_buffer_.size() + 1 > base_buffer_.capacity()) {
|
179
|
+
const size_t new_size = std::max(std::min(static_cast<size_t>(2 * k_), 2 * base_buffer_.size()), static_cast<size_t>(1));
|
180
|
+
base_buffer_.reserve(new_size);
|
181
|
+
}
|
182
|
+
}
|
183
|
+
else {
|
184
|
+
const uint8_t idx = count_trailing_zeros_in_u64(pair.second) - 1;
|
185
|
+
levels_[idx].push_back(T(pair.first));
|
186
|
+
}
|
187
|
+
}
|
188
|
+
|
189
|
+
// validate that ordering within each level is preserved
|
190
|
+
// base_buffer_ can be considered unsorted for this purpose
|
191
|
+
for (int i = 0; i < num_levels; ++i) {
|
192
|
+
if (!std::is_sorted(levels_[i].begin(), levels_[i].end(), C())) {
|
193
|
+
throw std::logic_error("Copy construction across types produces invalid sorting");
|
194
|
+
}
|
195
|
+
}
|
196
|
+
}
|
197
|
+
}
|
198
|
+
|
199
|
+
|
141
200
|
template<typename T, typename C, typename A>
|
142
201
|
quantiles_sketch<T, C, A>::~quantiles_sketch() {
|
143
202
|
if (min_value_ != nullptr) {
|
@@ -238,7 +297,7 @@ void quantiles_sketch<T, C, A>::serialize(std::ostream& os, const SerDe& serde)
|
|
238
297
|
);
|
239
298
|
write(os, flags_byte);
|
240
299
|
write(os, k_);
|
241
|
-
uint16_t unused = 0;
|
300
|
+
const uint16_t unused = 0;
|
242
301
|
write(os, unused);
|
243
302
|
|
244
303
|
if (!is_empty()) {
|
@@ -624,6 +683,11 @@ C quantiles_sketch<T, C, A>::get_comparator() const {
|
|
624
683
|
return C();
|
625
684
|
}
|
626
685
|
|
686
|
+
template<typename T, typename C, typename A>
|
687
|
+
A quantiles_sketch<T, C, A>::get_allocator() const {
|
688
|
+
return allocator_;
|
689
|
+
}
|
690
|
+
|
627
691
|
// implementation for fixed-size arithmetic types (integral and floating point)
|
628
692
|
template<typename T, typename C, typename A>
|
629
693
|
template<typename SerDe, typename TT, typename std::enable_if<std::is_arithmetic<TT>::value, int>::type>
|
@@ -783,9 +847,9 @@ auto quantiles_sketch<T, C, A>::get_CDF(const T* split_points, uint32_t size) co
|
|
783
847
|
|
784
848
|
template<typename T, typename C, typename A>
|
785
849
|
uint32_t quantiles_sketch<T, C, A>::compute_retained_items(const uint16_t k, const uint64_t n) {
|
786
|
-
uint32_t bb_count = compute_base_buffer_items(k, n);
|
787
|
-
uint64_t bit_pattern = compute_bit_pattern(k, n);
|
788
|
-
uint32_t valid_levels = compute_valid_levels(bit_pattern);
|
850
|
+
const uint32_t bb_count = compute_base_buffer_items(k, n);
|
851
|
+
const uint64_t bit_pattern = compute_bit_pattern(k, n);
|
852
|
+
const uint32_t valid_levels = compute_valid_levels(bit_pattern);
|
789
853
|
return bb_count + (k * valid_levels);
|
790
854
|
}
|
791
855
|
|
@@ -843,11 +907,11 @@ void quantiles_sketch<T, C, A>::check_family_id(uint8_t family_id) {
|
|
843
907
|
|
844
908
|
template<typename T, typename C, typename A>
|
845
909
|
void quantiles_sketch<T, C, A>::check_header_validity(uint8_t preamble_longs, uint8_t flags_byte, uint8_t serial_version) {
|
846
|
-
bool empty = (flags_byte & (1 << flags::IS_EMPTY)) > 0;
|
847
|
-
bool compact = (flags_byte & (1 << flags::IS_COMPACT)) > 0;
|
910
|
+
const bool empty = (flags_byte & (1 << flags::IS_EMPTY)) > 0;
|
911
|
+
const bool compact = (flags_byte & (1 << flags::IS_COMPACT)) > 0;
|
848
912
|
|
849
|
-
uint8_t sw = (compact ? 1 : 0) + (2 * (empty ? 1 : 0))
|
850
|
-
|
913
|
+
const uint8_t sw = (compact ? 1 : 0) + (2 * (empty ? 1 : 0))
|
914
|
+
+ (4 * (serial_version & 0xF)) + (32 * (preamble_longs & 0x3F));
|
851
915
|
bool valid = true;
|
852
916
|
|
853
917
|
switch (sw) { // exhaustive list and description of all valid cases
|
@@ -888,7 +952,7 @@ typename quantiles_sketch<T, C, A>::const_iterator quantiles_sketch<T, C, A>::en
|
|
888
952
|
|
889
953
|
template<typename T, typename C, typename A>
|
890
954
|
void quantiles_sketch<T, C, A>::grow_base_buffer() {
|
891
|
-
size_t new_size = std::max(std::min(static_cast<size_t>(2 * k_), 2 * base_buffer_.size()), static_cast<size_t>(1));
|
955
|
+
const size_t new_size = std::max(std::min(static_cast<size_t>(2 * k_), 2 * base_buffer_.size()), static_cast<size_t>(1));
|
892
956
|
base_buffer_.reserve(new_size);
|
893
957
|
}
|
894
958
|
|
@@ -912,7 +976,7 @@ void quantiles_sketch<T, C, A>::process_full_base_buffer() {
|
|
912
976
|
|
913
977
|
template<typename T, typename C, typename A>
|
914
978
|
bool quantiles_sketch<T, C, A>::grow_levels_if_needed() {
|
915
|
-
uint8_t levels_needed = compute_levels_needed(k_, n_);
|
979
|
+
const uint8_t levels_needed = compute_levels_needed(k_, n_);
|
916
980
|
if (levels_needed == 0)
|
917
981
|
return false; // don't need levels and might have small base buffer. Possible during merges.
|
918
982
|
|
@@ -992,7 +1056,7 @@ template<typename FwdV>
|
|
992
1056
|
void quantiles_sketch<T, C, A>::zip_buffer_with_stride(FwdV&& buf_in, Level& buf_out, uint16_t stride) {
|
993
1057
|
// Random offset in range [0, stride)
|
994
1058
|
std::uniform_int_distribution<uint16_t> dist(0, stride - 1);
|
995
|
-
uint16_t rand_offset = dist(random_utils::rand);
|
1059
|
+
const uint16_t rand_offset = dist(random_utils::rand);
|
996
1060
|
|
997
1061
|
if ((buf_in.size() != stride * buf_out.capacity())
|
998
1062
|
|| (buf_out.size() > 0)) {
|
@@ -1000,7 +1064,7 @@ void quantiles_sketch<T, C, A>::zip_buffer_with_stride(FwdV&& buf_in, Level& buf
|
|
1000
1064
|
"stride*buf_out.capacity() and empty buf_out");
|
1001
1065
|
}
|
1002
1066
|
|
1003
|
-
size_t k = buf_out.capacity();
|
1067
|
+
const size_t k = buf_out.capacity();
|
1004
1068
|
for (uint16_t i = rand_offset, o = 0; o < k; i += stride, ++o) {
|
1005
1069
|
buf_out.push_back(conditional_forward<FwdV>(buf_in[i]));
|
1006
1070
|
}
|
@@ -1117,7 +1181,7 @@ void quantiles_sketch<T, C, A>::downsampling_merge(quantiles_sketch& tgt, FwdSk&
|
|
1117
1181
|
const uint16_t downsample_factor = src.get_k() / tgt.get_k();
|
1118
1182
|
const uint8_t lg_sample_factor = count_trailing_zeros_in_u32(downsample_factor);
|
1119
1183
|
|
1120
|
-
uint64_t new_n = src.get_n() + tgt.get_n();
|
1184
|
+
const uint64_t new_n = src.get_n() + tgt.get_n();
|
1121
1185
|
|
1122
1186
|
// move items from src's base buffer
|
1123
1187
|
for (uint16_t i = 0; i < src.base_buffer_.size(); ++i) {
|
@@ -1125,7 +1189,7 @@ void quantiles_sketch<T, C, A>::downsampling_merge(quantiles_sketch& tgt, FwdSk&
|
|
1125
1189
|
}
|
1126
1190
|
|
1127
1191
|
// check (after moving raw items) if we need to extend levels array
|
1128
|
-
uint8_t levels_needed = compute_levels_needed(tgt.get_k(), new_n);
|
1192
|
+
const uint8_t levels_needed = compute_levels_needed(tgt.get_k(), new_n);
|
1129
1193
|
if (levels_needed > tgt.levels_.size()) {
|
1130
1194
|
tgt.levels_.reserve(levels_needed);
|
1131
1195
|
while (tgt.levels_.size() < levels_needed) {
|
@@ -82,7 +82,7 @@ TEST_CASE("kolmogorov-smirnov slightly different distributions", "[quantiles_ske
|
|
82
82
|
const double delta = kolmogorov_smirnov::delta(sketch1, sketch2);
|
83
83
|
REQUIRE(delta == Approx(0.02).margin(0.01));
|
84
84
|
const double threshold = kolmogorov_smirnov::threshold(sketch1, sketch2, 0.05);
|
85
|
-
|
85
|
+
|
86
86
|
REQUIRE_FALSE(delta > threshold);
|
87
87
|
REQUIRE_FALSE(kolmogorov_smirnov::test(sketch1, sketch2, 0.05));
|
88
88
|
}
|
@@ -102,7 +102,7 @@ TEST_CASE("kolmogorov-smirnov slightly different distributions high resolution",
|
|
102
102
|
const double delta = kolmogorov_smirnov::delta(sketch1, sketch2);
|
103
103
|
REQUIRE(delta == Approx(0.02).margin(0.01));
|
104
104
|
const double threshold = kolmogorov_smirnov::threshold(sketch1, sketch2, 0.05);
|
105
|
-
|
105
|
+
|
106
106
|
REQUIRE(delta > threshold);
|
107
107
|
REQUIRE(kolmogorov_smirnov::test(sketch1, sketch2, 0.05));
|
108
108
|
}
|
@@ -903,6 +903,69 @@ TEST_CASE("quantiles sketch", "[quantiles_sketch]") {
|
|
903
903
|
}
|
904
904
|
}
|
905
905
|
|
906
|
+
SECTION("Type converting copy constructor") {
|
907
|
+
const uint16_t k = 8;
|
908
|
+
const int n = 403;
|
909
|
+
quantiles_sketch<double> sk_double(k);
|
910
|
+
|
911
|
+
quantiles_sketch<float> sk_float(k, sk_double.get_allocator());
|
912
|
+
REQUIRE(sk_float.is_empty());
|
913
|
+
|
914
|
+
for (int i = 0; i < n; ++i) sk_double.update(i + .01);
|
915
|
+
|
916
|
+
quantiles_sketch<int> sk_int(sk_double);
|
917
|
+
REQUIRE(sk_double.get_n() == sk_int.get_n());
|
918
|
+
REQUIRE(sk_double.get_k() == sk_int.get_k());
|
919
|
+
REQUIRE(sk_double.get_num_retained() == sk_int.get_num_retained());
|
920
|
+
|
921
|
+
auto sv_double = sk_double.get_sorted_view(false);
|
922
|
+
std::vector<std::pair<double, uint64_t>> vec_double(sv_double.begin(), sv_double.end());
|
923
|
+
|
924
|
+
auto sv_int = sk_int.get_sorted_view(false);
|
925
|
+
std::vector<std::pair<int, uint64_t>> vec_int(sv_int.begin(), sv_int.end());
|
926
|
+
|
927
|
+
REQUIRE(vec_double.size() == vec_int.size());
|
928
|
+
|
929
|
+
for (size_t i = 0; i < vec_int.size(); ++i) {
|
930
|
+
// known truncation with conversion so approximate result
|
931
|
+
REQUIRE(vec_double[i].first == Approx(vec_int[i].first).margin(0.1));
|
932
|
+
// exact equality for weights
|
933
|
+
REQUIRE(vec_double[i].second == vec_int[i].second);
|
934
|
+
}
|
935
|
+
}
|
936
|
+
|
937
|
+
class A {
|
938
|
+
int val;
|
939
|
+
public:
|
940
|
+
A(int val): val(val) {}
|
941
|
+
int get_val() const { return val; }
|
942
|
+
};
|
943
|
+
|
944
|
+
struct less_A {
|
945
|
+
bool operator()(const A& a1, const A& a2) const { return a1.get_val() < a2.get_val(); }
|
946
|
+
};
|
947
|
+
|
948
|
+
class B {
|
949
|
+
int val;
|
950
|
+
public:
|
951
|
+
explicit B(const A& a): val(a.get_val()) {}
|
952
|
+
int get_val() const { return val; }
|
953
|
+
};
|
954
|
+
|
955
|
+
struct less_B {
|
956
|
+
bool operator()(const B& b1, const B& b2) const { return b1.get_val() < b2.get_val(); }
|
957
|
+
};
|
958
|
+
|
959
|
+
SECTION("type conversion: custom types") {
|
960
|
+
quantiles_sketch<A, less_A> sa;
|
961
|
+
sa.update(1);
|
962
|
+
sa.update(2);
|
963
|
+
sa.update(3);
|
964
|
+
|
965
|
+
quantiles_sketch<B, less_B> sb(sa);
|
966
|
+
REQUIRE(sb.get_n() == 3);
|
967
|
+
}
|
968
|
+
|
906
969
|
// cleanup
|
907
970
|
if (test_allocator_total_bytes != 0) {
|
908
971
|
REQUIRE(test_allocator_total_bytes == 0);
|
@@ -38,6 +38,9 @@ public:
|
|
38
38
|
req_compactor& operator=(const req_compactor& other);
|
39
39
|
req_compactor& operator=(req_compactor&& other);
|
40
40
|
|
41
|
+
template<typename TT, typename CC, typename AA>
|
42
|
+
req_compactor(const req_compactor<TT, CC, AA>& other, const Allocator& allocator);
|
43
|
+
|
41
44
|
bool is_sorted() const;
|
42
45
|
uint32_t get_num_items() const;
|
43
46
|
uint32_t get_nom_capacity() const;
|
@@ -128,6 +131,9 @@ private:
|
|
128
131
|
template<typename S>
|
129
132
|
static std::pair<std::unique_ptr<T, items_deleter>, size_t> deserialize_items(const void* bytes, size_t size, const S& serde, const Allocator& allocator, uint32_t num);
|
130
133
|
|
134
|
+
// for type converting constructor
|
135
|
+
template<typename TT, typename CC, typename AA>
|
136
|
+
friend class req_compactor;
|
131
137
|
};
|
132
138
|
|
133
139
|
} /* namespace datasketches */
|
@@ -132,6 +132,33 @@ req_compactor<T, C, A>& req_compactor<T, C, A>::operator=(req_compactor&& other)
|
|
132
132
|
return *this;
|
133
133
|
}
|
134
134
|
|
135
|
+
template<typename T, typename C, typename A>
|
136
|
+
template<typename TT, typename CC, typename AA>
|
137
|
+
req_compactor<T, C, A>::req_compactor(const req_compactor<TT, CC, AA>& other, const A& allocator):
|
138
|
+
allocator_(allocator),
|
139
|
+
lg_weight_(other.lg_weight_),
|
140
|
+
hra_(other.hra_),
|
141
|
+
coin_(other.coin_),
|
142
|
+
sorted_(other.sorted_),
|
143
|
+
section_size_raw_(other.section_size_raw_),
|
144
|
+
section_size_(other.section_size_),
|
145
|
+
num_sections_(other.num_sections_),
|
146
|
+
state_(other.state_),
|
147
|
+
num_items_(other.num_items_),
|
148
|
+
capacity_(other.capacity_),
|
149
|
+
items_(nullptr)
|
150
|
+
{
|
151
|
+
if (other.items_ != nullptr) {
|
152
|
+
items_ = allocator_.allocate(capacity_);
|
153
|
+
const uint32_t from = hra_ ? capacity_ - num_items_ : 0;
|
154
|
+
const uint32_t to = hra_ ? capacity_ : num_items_;
|
155
|
+
for (uint32_t i = from; i < to; ++i) new (items_ + i) T(other.items_[i]);
|
156
|
+
if (sorted_ && !std::is_sorted(items_ + from, items_ + to, C())) {
|
157
|
+
throw std::logic_error("items must be sorted");
|
158
|
+
}
|
159
|
+
}
|
160
|
+
}
|
161
|
+
|
135
162
|
template<typename T, typename C, typename A>
|
136
163
|
bool req_compactor<T, C, A>::is_sorted() const {
|
137
164
|
return sorted_;
|
@@ -58,6 +58,14 @@ public:
|
|
58
58
|
req_sketch& operator=(const req_sketch& other);
|
59
59
|
req_sketch& operator=(req_sketch&& other);
|
60
60
|
|
61
|
+
/*
|
62
|
+
* Type converting constructor.
|
63
|
+
* @param other sketch of a different type
|
64
|
+
* @param allocator instance of an Allocator
|
65
|
+
*/
|
66
|
+
template<typename TT, typename CC, typename SS, typename AA>
|
67
|
+
explicit req_sketch(const req_sketch<TT, CC, SS, AA>& other, const Allocator& allocator = Allocator());
|
68
|
+
|
61
69
|
/**
|
62
70
|
* Returns configured parameter K
|
63
71
|
* @return parameter K
|
@@ -408,6 +416,9 @@ private:
|
|
408
416
|
}
|
409
417
|
}
|
410
418
|
|
419
|
+
// for type converting constructor
|
420
|
+
template<typename TT, typename CC, typename SS, typename AA>
|
421
|
+
friend class req_sketch;
|
411
422
|
};
|
412
423
|
|
413
424
|
template<typename T, typename C, typename S, typename A>
|
@@ -64,8 +64,8 @@ compactors_(other.compactors_),
|
|
64
64
|
min_value_(nullptr),
|
65
65
|
max_value_(nullptr)
|
66
66
|
{
|
67
|
-
if (other.min_value_ != nullptr) min_value_ = new (
|
68
|
-
if (other.max_value_ != nullptr) max_value_ = new (
|
67
|
+
if (other.min_value_ != nullptr) min_value_ = new (allocator_.allocate(1)) T(*other.min_value_);
|
68
|
+
if (other.max_value_ != nullptr) max_value_ = new (allocator_.allocate(1)) T(*other.max_value_);
|
69
69
|
}
|
70
70
|
|
71
71
|
template<typename T, typename C, typename S, typename A>
|
@@ -113,6 +113,33 @@ req_sketch<T, C, S, A>& req_sketch<T, C, S, A>::operator=(req_sketch&& other) {
|
|
113
113
|
return *this;
|
114
114
|
}
|
115
115
|
|
116
|
+
template<typename T, typename C, typename S, typename A>
|
117
|
+
template<typename TT, typename CC, typename SS, typename AA>
|
118
|
+
req_sketch<T, C, S, A>::req_sketch(const req_sketch<TT, CC, SS, AA>& other, const A& allocator):
|
119
|
+
allocator_(allocator),
|
120
|
+
k_(other.k_),
|
121
|
+
hra_(other.hra_),
|
122
|
+
max_nom_size_(other.max_nom_size_),
|
123
|
+
num_retained_(other.num_retained_),
|
124
|
+
n_(other.n_),
|
125
|
+
compactors_(allocator),
|
126
|
+
min_value_(nullptr),
|
127
|
+
max_value_(nullptr)
|
128
|
+
{
|
129
|
+
static_assert(
|
130
|
+
std::is_constructible<T, TT>::value,
|
131
|
+
"Type converting constructor requires new type to be constructible from existing type"
|
132
|
+
);
|
133
|
+
compactors_.reserve(other.compactors_.size());
|
134
|
+
for (const auto& compactor: other.compactors_) {
|
135
|
+
compactors_.push_back(req_compactor<T, C, A>(compactor, allocator_));
|
136
|
+
}
|
137
|
+
if (!other.is_empty()) {
|
138
|
+
min_value_ = new (allocator_.allocate(1)) T(other.get_min_value());
|
139
|
+
max_value_ = new (allocator_.allocate(1)) T(other.get_max_value());
|
140
|
+
}
|
141
|
+
}
|
142
|
+
|
116
143
|
template<typename T, typename C, typename S, typename A>
|
117
144
|
uint16_t req_sketch<T, C, S, A>::get_k() const {
|
118
145
|
return k_;
|
@@ -35,7 +35,7 @@ const std::string input_path = "test/";
|
|
35
35
|
#endif
|
36
36
|
|
37
37
|
TEST_CASE("req sketch: empty", "[req_sketch]") {
|
38
|
-
std::cout << "sizeof(req_float_sketch)=" << sizeof(req_sketch<float>) << "\n";
|
38
|
+
//std::cout << "sizeof(req_float_sketch)=" << sizeof(req_sketch<float>) << "\n";
|
39
39
|
req_sketch<float> sketch(12);
|
40
40
|
REQUIRE(sketch.get_k() == 12);
|
41
41
|
REQUIRE(sketch.is_HRA());
|
@@ -245,7 +245,7 @@ TEST_CASE("req sketch: byte serialize-deserialize single item", "[req_sketch]")
|
|
245
245
|
auto bytes = sketch.serialize();
|
246
246
|
REQUIRE(bytes.size() == sketch.get_serialized_size_bytes());
|
247
247
|
auto sketch2 = req_sketch<float>::deserialize(bytes.data(), bytes.size());
|
248
|
-
std::cout << sketch2.to_string(true);
|
248
|
+
//std::cout << sketch2.to_string(true);
|
249
249
|
REQUIRE(bytes.size() == sketch2.get_serialized_size_bytes());
|
250
250
|
REQUIRE(sketch2.is_empty() == sketch.is_empty());
|
251
251
|
REQUIRE(sketch2.is_estimation_mode() == sketch.is_estimation_mode());
|
@@ -282,7 +282,7 @@ TEST_CASE("req sketch: byte serialize-deserialize exact mode", "[req_sketch]") {
|
|
282
282
|
auto bytes = sketch.serialize();
|
283
283
|
REQUIRE(bytes.size() == sketch.get_serialized_size_bytes());
|
284
284
|
auto sketch2 = req_sketch<float>::deserialize(bytes.data(), bytes.size());
|
285
|
-
std::cout << sketch2.to_string(true);
|
285
|
+
//std::cout << sketch2.to_string(true);
|
286
286
|
REQUIRE(bytes.size() == sketch2.get_serialized_size_bytes());
|
287
287
|
REQUIRE(sketch2.is_empty() == sketch.is_empty());
|
288
288
|
REQUIRE(sketch2.is_estimation_mode() == sketch.is_estimation_mode());
|
@@ -485,6 +485,72 @@ TEST_CASE("req sketch: merge incompatible HRA and LRA", "[req_sketch]") {
|
|
485
485
|
REQUIRE_THROWS_AS(sketch1.merge(sketch2), std::invalid_argument);
|
486
486
|
}
|
487
487
|
|
488
|
+
TEST_CASE("req sketch: type conversion - empty", "[req_sketch]") {
|
489
|
+
req_sketch<double> req_double(12);
|
490
|
+
req_sketch<float> req_float(req_double);
|
491
|
+
REQUIRE(req_float.is_empty());
|
492
|
+
REQUIRE(req_float.get_k() == req_double.get_k());
|
493
|
+
REQUIRE(req_float.get_n() == 0);
|
494
|
+
REQUIRE(req_float.get_num_retained() == 0);
|
495
|
+
}
|
496
|
+
|
497
|
+
TEST_CASE("req sketch: type conversion - several levels", "[req_sketch]") {
|
498
|
+
req_sketch<double> req_double(12);
|
499
|
+
for (int i = 0; i < 1000; ++i) req_double.update(static_cast<double>(i));
|
500
|
+
req_sketch<float> req_float(req_double);
|
501
|
+
REQUIRE(!req_float.is_empty());
|
502
|
+
REQUIRE(req_float.get_k() == req_double.get_k());
|
503
|
+
REQUIRE(req_float.get_n() == req_double.get_n());
|
504
|
+
REQUIRE(req_float.get_num_retained() == req_double.get_num_retained());
|
505
|
+
|
506
|
+
auto sv_float = req_float.get_sorted_view(false);
|
507
|
+
auto sv_double = req_double.get_sorted_view(false);
|
508
|
+
auto sv_float_it = sv_float.begin();
|
509
|
+
auto sv_double_it = sv_double.begin();
|
510
|
+
while (sv_float_it != sv_float.end()) {
|
511
|
+
REQUIRE(sv_double_it != sv_double.end());
|
512
|
+
auto float_pair = *sv_float_it;
|
513
|
+
auto double_pair = *sv_double_it;
|
514
|
+
REQUIRE(float_pair.first == Approx(double_pair.first).margin(0.01));
|
515
|
+
REQUIRE(float_pair.second == double_pair.second);
|
516
|
+
++sv_float_it;
|
517
|
+
++sv_double_it;
|
518
|
+
}
|
519
|
+
REQUIRE(sv_double_it == sv_double.end());
|
520
|
+
}
|
521
|
+
|
522
|
+
class A {
|
523
|
+
int val;
|
524
|
+
public:
|
525
|
+
A(int val): val(val) {}
|
526
|
+
int get_val() const { return val; }
|
527
|
+
};
|
528
|
+
|
529
|
+
struct less_A {
|
530
|
+
bool operator()(const A& a1, const A& a2) const { return a1.get_val() < a2.get_val(); }
|
531
|
+
};
|
532
|
+
|
533
|
+
class B {
|
534
|
+
int val;
|
535
|
+
public:
|
536
|
+
explicit B(const A& a): val(a.get_val()) {}
|
537
|
+
int get_val() const { return val; }
|
538
|
+
};
|
539
|
+
|
540
|
+
struct less_B {
|
541
|
+
bool operator()(const B& b1, const B& b2) const { return b1.get_val() < b2.get_val(); }
|
542
|
+
};
|
543
|
+
|
544
|
+
TEST_CASE("req sketch: type conversion - custom types") {
|
545
|
+
req_sketch<A, less_A> sa(4);
|
546
|
+
sa.update(1);
|
547
|
+
sa.update(2);
|
548
|
+
sa.update(3);
|
549
|
+
|
550
|
+
req_sketch<B, less_B> sb(sa);
|
551
|
+
REQUIRE(sb.get_n() == 3);
|
552
|
+
}
|
553
|
+
|
488
554
|
//TEST_CASE("for manual comparison with Java") {
|
489
555
|
// req_sketch<float> sketch(12, false);
|
490
556
|
// for (size_t i = 0; i < 100000; ++i) sketch.update(i);
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: datasketches
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.6
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Andrew Kane
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2022-
|
11
|
+
date: 2022-07-13 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rice
|