datasketches 0.2.5 → 0.2.6

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 9eaa8a17efdbc591b3e56f94650e887babd30dc79d95db3a7986df0261184191
4
- data.tar.gz: 5544326a0edf165d87373a680d8bf5b80acba2894b9048f92cbdb261fcd66d57
3
+ metadata.gz: cf1ea0f9f2d12b0e46c2d4c7dec21f41992e711e73eca68ea1ef03a4bb711077
4
+ data.tar.gz: 92f56b63da0254962be47d8d3e00a6950a271053bf3152167f95e6fdb99528e6
5
5
  SHA512:
6
- metadata.gz: 5a28c093ecda083762367149800770f59fee8e630c0d983d3f29ed32d027fae2e2515dff243ee11bbd41f4875c7cea622f7bc5cc5d7e73176e785503ed19fc0b
7
- data.tar.gz: 6b210f2fdca1ae3cbd4e4cbf88e284855014b5a1e1c883085dc96a057da29e370005163ce628e54351c9127b00fae4b7b33a4ca63e6f4b90e0665e93b7742a66
6
+ metadata.gz: 5841d4a70f1e852faa150f57ebfefc7b975de020782c41eebdad87a01d016be9bdf86f86173600632bf6f56300df0c9c4196251aa5df02a47ecd357ac844ef80
7
+ data.tar.gz: d6ae7c811e0e2c2008b912e29f86d1b99491c74cd878790dfd800811a007f0dbf9c49bb59db30345450ff82673381f2c036a84a57dc44a6f6751610d9be2ee88
data/CHANGELOG.md CHANGED
@@ -1,3 +1,7 @@
1
+ ## 0.2.6 (2022-07-13)
2
+
3
+ - Updated DataSketches to 3.5.0
4
+
1
5
  ## 0.2.5 (2022-05-21)
2
6
 
3
7
  - Updated DataSketches to 3.4.0
@@ -55,12 +55,12 @@ void bind_kll_sketch(Rice::Module& m, const char* name) {
55
55
  })
56
56
  .define_method(
57
57
  "pmf",
58
- [](kll_sketch<T>& self, std::vector<T> split_points) {
58
+ [](kll_sketch<T>& self, const std::vector<T>& split_points) {
59
59
  return self.get_PMF(&split_points[0], split_points.size());
60
60
  })
61
61
  .define_method(
62
62
  "cdf",
63
- [](kll_sketch<T>& self, std::vector<T> split_points) {
63
+ [](kll_sketch<T>& self, const std::vector<T>& split_points) {
64
64
  return self.get_CDF(&split_points[0], split_points.size());
65
65
  })
66
66
  .define_method(
@@ -1,3 +1,3 @@
1
1
  module DataSketches
2
- VERSION = "0.2.5"
2
+ VERSION = "0.2.6"
3
3
  end
@@ -17,7 +17,7 @@
17
17
 
18
18
  cmake_minimum_required(VERSION 3.16.0)
19
19
  project(DataSketches
20
- VERSION 3.4.0
20
+ VERSION 3.5.0
21
21
  LANGUAGES CXX)
22
22
 
23
23
  include(GNUInstallDirs)
@@ -1,11 +1,12 @@
1
- Apache DataSketches-cpp
2
- Copyright 2020-2021 The Apache Software Foundation
1
+ Apache DataSketches C++ and Python
2
+ Copyright 2022 The Apache Software Foundation
3
3
 
4
- Copyright 2015-2018 Yahoo
5
- Copyright 2019 Verizon Media
4
+ Copyright 2015-2018 Yahoo Inc.
5
+ Copyright 2019-2020 Verizon Media
6
+ Copyright 2021 Yahoo Inc.
6
7
 
7
8
  This product includes software developed at
8
9
  The Apache Software Foundation (http://www.apache.org/).
9
10
 
10
11
  Prior to moving to ASF, the software for this project was developed at
11
- Yahoo (now Verizon Media) (https://developer.yahoo.com).
12
+ Yahoo Inc. (https://developer.yahoo.com).
@@ -43,8 +43,8 @@ install(FILES
43
43
  include/conditional_forward.hpp
44
44
  include/ceiling_power_of_2.hpp
45
45
  include/bounds_binomial_proportions.hpp
46
- include/kolmogorov_smirnov.hpp
47
- include/kolmogorov_smirnov_impl.hpp
48
46
  include/quantile_sketch_sorted_view.hpp
49
47
  include/quantile_sketch_sorted_view_impl.hpp
48
+ include/kolmogorov_smirnov.hpp
49
+ include/kolmogorov_smirnov_impl.hpp
50
50
  DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/DataSketches")
@@ -297,6 +297,7 @@ void cpc_compressor<A>::compress_sliding_flavor(const cpc_sketch_alloc<A>& sourc
297
297
  // changes the implied ordering of the pairs, so we must do it before sorting.
298
298
 
299
299
  const uint8_t pseudo_phase = determine_pseudo_phase(source.get_lg_k(), source.get_num_coupons());
300
+ if (pseudo_phase >= 16) throw std::logic_error("unexpected pseudo phase for sliding flavor");
300
301
  const uint8_t* permutation = column_permutations_for_encoding[pseudo_phase];
301
302
 
302
303
  const uint8_t offset = source.window_offset;
@@ -333,7 +334,7 @@ void cpc_compressor<A>::uncompress_sliding_flavor(const compressed_state<A>& sou
333
334
  lg_k, source.table_data.get_allocator());
334
335
 
335
336
  const uint8_t pseudo_phase = determine_pseudo_phase(lg_k, num_coupons);
336
- if (pseudo_phase >= 16) throw std::logic_error("pseudo phase >= 16");
337
+ if (pseudo_phase >= 16) throw std::logic_error("unexpected pseudo phase for sliding flavor");
337
338
  const uint8_t* permutation = column_permutations_for_decoding[pseudo_phase];
338
339
 
339
340
  uint8_t offset = cpc_sketch_alloc<A>::determine_correct_offset(lg_k, num_coupons);
@@ -230,7 +230,7 @@ kll_helper::compress_result kll_helper::general_compress(uint16_t k, uint8_t m,
230
230
  // move level over as is
231
231
  // make sure we are not moving data upwards
232
232
  if (raw_beg < out_levels[current_level]) throw std::logic_error("wrong move");
233
- std::move(&items[raw_beg], &items[raw_lim], &items[out_levels[current_level]]);
233
+ std::move(items + raw_beg, items + raw_lim, items + out_levels[current_level]);
234
234
  out_levels[current_level + 1] = out_levels[current_level] + raw_pop;
235
235
  } else {
236
236
  // The sketch is too full AND this level is too full, so we compact it
@@ -251,7 +251,7 @@ kll_helper::compress_result kll_helper::general_compress(uint16_t k, uint8_t m,
251
251
 
252
252
  // level zero might not be sorted, so we must sort it if we wish to compact it
253
253
  if ((current_level == 0) && !is_level_zero_sorted) {
254
- std::sort(&items[adj_beg], &items[adj_beg + adj_pop], C());
254
+ std::sort(items + adj_beg, items + adj_beg + adj_pop, C());
255
255
  }
256
256
 
257
257
  if (pop_above == 0) { // Level above is empty, so halve up
@@ -170,7 +170,7 @@ class kll_sketch {
170
170
  using comparator = C;
171
171
 
172
172
  static const uint8_t DEFAULT_M = 8;
173
- // TODO: Redundant and deprecated. Will be remove din next major version.
173
+ // TODO: Redundant and deprecated. Will be removed in next major version.
174
174
  static const uint16_t DEFAULT_K = kll_constants::DEFAULT_K;
175
175
  static const uint16_t MIN_K = DEFAULT_M;
176
176
  static const uint16_t MAX_K = (1 << 16) - 1;
@@ -182,6 +182,14 @@ class kll_sketch {
182
182
  kll_sketch& operator=(const kll_sketch& other);
183
183
  kll_sketch& operator=(kll_sketch&& other);
184
184
 
185
+ /*
186
+ * Type converting constructor.
187
+ * @param other sketch of a different type
188
+ * @param allocator instance of an Allocator
189
+ */
190
+ template<typename TT, typename CC, typename SS, typename AA>
191
+ explicit kll_sketch(const kll_sketch<TT, CC, SS, AA>& other, const A& allocator = A());
192
+
185
193
  /**
186
194
  * Updates this sketch with the given data item.
187
195
  * @param value an item from a stream of items
@@ -390,7 +398,7 @@ class kll_sketch {
390
398
  /**
391
399
  * Computes size needed to serialize the current state of the sketch.
392
400
  * This version is for fixed-size arithmetic types (integral and floating point).
393
- * @param instance of a SerDe
401
+ * @param serde instance of a SerDe
394
402
  * @return size in bytes needed to serialize this sketch
395
403
  */
396
404
  template<typename TT = T, typename SerDe = S, typename std::enable_if<std::is_arithmetic<TT>::value, int>::type = 0>
@@ -399,7 +407,7 @@ class kll_sketch {
399
407
  /**
400
408
  * Computes size needed to serialize the current state of the sketch.
401
409
  * This version is for all other types and can be expensive since every item needs to be looked at.
402
- * @param instance of a SerDe
410
+ * @param serde instance of a SerDe
403
411
  * @return size in bytes needed to serialize this sketch
404
412
  */
405
413
  template<typename TT = T, typename SerDe = S, typename std::enable_if<!std::is_arithmetic<TT>::value, int>::type = 0>
@@ -459,7 +467,7 @@ class kll_sketch {
459
467
  /**
460
468
  * This method deserializes a sketch from a given stream.
461
469
  * @param is input stream
462
- * @param instance of an Allocator
470
+ * @param allocator instance of an Allocator
463
471
  * @return an instance of a sketch
464
472
  *
465
473
  * Deprecated, to be removed in the next major version
@@ -469,8 +477,8 @@ class kll_sketch {
469
477
  /**
470
478
  * This method deserializes a sketch from a given stream.
471
479
  * @param is input stream
472
- * @param instance of a SerDe
473
- * @param instance of an Allocator
480
+ * @param serde instance of a SerDe
481
+ * @param allocator instance of an Allocator
474
482
  * @return an instance of a sketch
475
483
  */
476
484
  template<typename SerDe = S>
@@ -480,7 +488,7 @@ class kll_sketch {
480
488
  * This method deserializes a sketch from a given array of bytes.
481
489
  * @param bytes pointer to the array of bytes
482
490
  * @param size the size of the array
483
- * @param instance of an Allocator
491
+ * @param allocator instance of an Allocator
484
492
  * @return an instance of a sketch
485
493
  *
486
494
  * Deprecated, to be removed in the next major version
@@ -491,8 +499,8 @@ class kll_sketch {
491
499
  * This method deserializes a sketch from a given array of bytes.
492
500
  * @param bytes pointer to the array of bytes
493
501
  * @param size the size of the array
494
- * @param instance of a SerDe
495
- * @param instance of an Allocator
502
+ * @param serde instance of a SerDe
503
+ * @param allocator instance of an Allocator
496
504
  * @return an instance of a sketch
497
505
  */
498
506
  template<typename SerDe = S>
@@ -606,6 +614,8 @@ class kll_sketch {
606
614
  static void check_serial_version(uint8_t serial_version);
607
615
  static void check_family_id(uint8_t family_id);
608
616
 
617
+ void check_sorting() const;
618
+
609
619
  // implementations for floating point types
610
620
  template<typename TT = T, typename std::enable_if<std::is_floating_point<TT>::value, int>::type = 0>
611
621
  static const TT& get_invalid_value() {
@@ -629,6 +639,9 @@ class kll_sketch {
629
639
  return true;
630
640
  }
631
641
 
642
+ // for type converting constructor
643
+ template<typename TT, typename CC, typename SS, typename AA>
644
+ friend class kll_sketch;
632
645
  };
633
646
 
634
647
  template<typename T, typename C, typename S, typename A>
@@ -26,6 +26,7 @@
26
26
  #include <stdexcept>
27
27
 
28
28
  #include "conditional_forward.hpp"
29
+ #include "count_zeros.hpp"
29
30
  #include "memory_operations.hpp"
30
31
  #include "kll_helper.hpp"
31
32
 
@@ -69,7 +70,7 @@ max_value_(nullptr),
69
70
  is_level_zero_sorted_(other.is_level_zero_sorted_)
70
71
  {
71
72
  items_ = allocator_.allocate(items_size_);
72
- std::copy(&other.items_[levels_[0]], &other.items_[levels_[num_levels_]], &items_[levels_[0]]);
73
+ for (auto i = levels_[0]; i < levels_[num_levels_]; ++i) new (&items_[i]) T(other.items_[i]);
73
74
  if (other.min_value_ != nullptr) min_value_ = new (allocator_.allocate(1)) T(*other.min_value_);
74
75
  if (other.max_value_ != nullptr) max_value_ = new (allocator_.allocate(1)) T(*other.max_value_);
75
76
  }
@@ -147,6 +148,33 @@ kll_sketch<T, C, S, A>::~kll_sketch() {
147
148
  }
148
149
  }
149
150
 
151
+ template<typename T, typename C, typename S, typename A>
152
+ template<typename TT, typename CC, typename SS, typename AA>
153
+ kll_sketch<T, C, S, A>::kll_sketch(const kll_sketch<TT, CC, SS, AA>& other, const A& allocator):
154
+ allocator_(allocator),
155
+ k_(other.k_),
156
+ m_(other.m_),
157
+ min_k_(other.min_k_),
158
+ n_(other.n_),
159
+ num_levels_(other.num_levels_),
160
+ levels_(other.levels_, allocator_),
161
+ items_(nullptr),
162
+ items_size_(other.items_size_),
163
+ min_value_(nullptr),
164
+ max_value_(nullptr),
165
+ is_level_zero_sorted_(other.is_level_zero_sorted_)
166
+ {
167
+ static_assert(
168
+ std::is_constructible<T, TT>::value,
169
+ "Type converting constructor requires new type to be constructible from existing type"
170
+ );
171
+ items_ = allocator_.allocate(items_size_);
172
+ for (auto i = levels_[0]; i < levels_[num_levels_]; ++i) new (&items_[i]) T(other.items_[i]);
173
+ if (other.min_value_ != nullptr) min_value_ = new (allocator_.allocate(1)) T(*other.min_value_);
174
+ if (other.max_value_ != nullptr) max_value_ = new (allocator_.allocate(1)) T(*other.max_value_);
175
+ check_sorting();
176
+ }
177
+
150
178
  template<typename T, typename C, typename S, typename A>
151
179
  template<typename FwdT>
152
180
  void kll_sketch<T, C, S, A>::update(FwdT&& value) {
@@ -305,8 +333,8 @@ double kll_sketch<T, C, S, A>::get_rank(const T& value) const {
305
333
  uint64_t weight = 1;
306
334
  uint64_t total = 0;
307
335
  while (level < num_levels_) {
308
- const auto from_index(levels_[level]);
309
- const auto to_index(levels_[level + 1]); // exclusive
336
+ const auto from_index = levels_[level];
337
+ const auto to_index = levels_[level + 1]; // exclusive
310
338
  for (uint32_t i = from_index; i < to_index; i++) {
311
339
  if (inclusive ? !C()(value, items_[i]) : C()(items_[i], value)) {
312
340
  total += weight;
@@ -694,7 +722,7 @@ void kll_sketch<T, C, S, A>::compress_while_updating(void) {
694
722
  // level zero might not be sorted, so we must sort it if we wish to compact it
695
723
  // sort_level_zero() is not used here because of the adjustment for odd number of items
696
724
  if ((level == 0) && !is_level_zero_sorted_) {
697
- std::sort(&items_[adj_beg], &items_[adj_beg + adj_pop], C());
725
+ std::sort(items_ + adj_beg, items_ + adj_beg + adj_pop, C());
698
726
  }
699
727
  if (pop_above == 0) {
700
728
  kll_helper::randomly_halve_up(items_, adj_beg, adj_pop);
@@ -717,7 +745,7 @@ void kll_sketch<T, C, S, A>::compress_while_updating(void) {
717
745
  // so that the freed-up space can be used by level zero
718
746
  if (level > 0) {
719
747
  const uint32_t amount = raw_beg - levels_[0];
720
- std::move_backward(&items_[levels_[0]], &items_[levels_[0] + amount], &items_[levels_[0] + half_adj_pop + amount]);
748
+ std::move_backward(items_ + levels_[0], items_ + levels_[0] + amount, items_ + levels_[0] + half_adj_pop + amount);
721
749
  for (uint8_t lvl = 0; lvl < level; lvl++) levels_[lvl] += half_adj_pop;
722
750
  }
723
751
  for (uint32_t i = 0; i < half_adj_pop; i++) items_[i + destroy_beg].~T();
@@ -775,22 +803,32 @@ void kll_sketch<T, C, S, A>::add_empty_top_level_to_completely_full_sketch() {
775
803
  template<typename T, typename C, typename S, typename A>
776
804
  void kll_sketch<T, C, S, A>::sort_level_zero() {
777
805
  if (!is_level_zero_sorted_) {
778
- std::sort(&items_[levels_[0]], &items_[levels_[1]], C());
806
+ std::sort(items_ + levels_[0], items_ + levels_[1], C());
779
807
  is_level_zero_sorted_ = true;
780
808
  }
781
809
  }
782
810
 
811
+ template<typename T, typename C, typename S, typename A>
812
+ void kll_sketch<T, C, S, A>::check_sorting() const {
813
+ // not checking level 0
814
+ for (uint8_t level = 1; level < num_levels_; ++level) {
815
+ const auto from = items_ + levels_[level];
816
+ const auto to = items_ + levels_[level + 1];
817
+ if (!std::is_sorted(from, to, C())) {
818
+ throw std::logic_error("levels must be sorted");
819
+ }
820
+ }
821
+ }
822
+
783
823
  template<typename T, typename C, typename S, typename A>
784
824
  template<bool inclusive>
785
825
  quantile_sketch_sorted_view<T, C, A> kll_sketch<T, C, S, A>::get_sorted_view(bool cumulative) const {
786
826
  const_cast<kll_sketch*>(this)->sort_level_zero(); // allow this side effect
787
827
  quantile_sketch_sorted_view<T, C, A> view(get_num_retained(), allocator_);
788
- uint8_t level = 0;
789
- while (level < num_levels_) {
828
+ for (uint8_t level = 0; level < num_levels_; ++level) {
790
829
  const auto from = items_ + levels_[level];
791
830
  const auto to = items_ + levels_[level + 1]; // exclusive
792
831
  view.add(from, to, 1 << level);
793
- ++level;
794
832
  }
795
833
  if (cumulative) view.template convert_to_cummulative<inclusive>();
796
834
  return view;
@@ -39,9 +39,9 @@ static std::string testBinaryInputPath = "test/";
39
39
  #endif
40
40
 
41
41
  // typical usage would be just kll_sketch<float> or kll_sketch<std::string>, but here we use test_allocator
42
- typedef kll_sketch<float, std::less<float>, serde<float>, test_allocator<float>> kll_float_sketch;
42
+ using kll_float_sketch = kll_sketch<float, std::less<float>, serde<float>, test_allocator<float>>;
43
43
  // let std::string use the default allocator for simplicity, otherwise we need to define "less" and "serde"
44
- typedef kll_sketch<std::string, std::less<std::string>, serde<std::string>, test_allocator<std::string>> kll_string_sketch;
44
+ using kll_string_sketch = kll_sketch<std::string, std::less<std::string>, serde<std::string>, test_allocator<std::string>>;
45
45
 
46
46
  TEST_CASE("kll sketch", "[kll_sketch]") {
47
47
 
@@ -75,7 +75,7 @@ TEST_CASE("kll sketch", "[kll_sketch]") {
75
75
  (void) it; // to suppress "unused" warning
76
76
  FAIL("should be no iterations over an empty sketch");
77
77
  }
78
- }
78
+ }
79
79
 
80
80
  SECTION("get bad quantile") {
81
81
  kll_float_sketch sketch(200, 0);
@@ -835,10 +835,75 @@ TEST_CASE("kll sketch", "[kll_sketch]") {
835
835
  REQUIRE((*it).second == 3);
836
836
  }
837
837
  }
838
- // cleanup
839
- if (test_allocator_total_bytes != 0) {
840
- REQUIRE(test_allocator_total_bytes == 0);
838
+
839
+ SECTION("type conversion: empty") {
840
+ kll_sketch<double> kll_double;
841
+ kll_sketch<float> kll_float(kll_double);
842
+ REQUIRE(kll_float.is_empty());
843
+ REQUIRE(kll_float.get_k() == kll_double.get_k());
844
+ REQUIRE(kll_float.get_n() == 0);
845
+ REQUIRE(kll_float.get_num_retained() == 0);
846
+ }
847
+
848
+ SECTION("type conversion: over k") {
849
+ kll_sketch<double> kll_double;
850
+ for (int i = 0; i < 1000; ++i) kll_double.update(static_cast<double>(i));
851
+ kll_sketch<float> kll_float(kll_double);
852
+ REQUIRE(!kll_float.is_empty());
853
+ REQUIRE(kll_float.get_k() == kll_double.get_k());
854
+ REQUIRE(kll_float.get_n() == kll_double.get_n());
855
+ REQUIRE(kll_float.get_num_retained() == kll_double.get_num_retained());
856
+
857
+ auto sv_float = kll_float.get_sorted_view(false);
858
+ auto sv_double = kll_double.get_sorted_view(false);
859
+ auto sv_float_it = sv_float.begin();
860
+ auto sv_double_it = sv_double.begin();
861
+ while (sv_float_it != sv_float.end()) {
862
+ REQUIRE(sv_double_it != sv_double.end());
863
+ auto float_pair = *sv_float_it;
864
+ auto double_pair = *sv_double_it;
865
+ REQUIRE(float_pair.first == Approx(double_pair.first).margin(0.01));
866
+ REQUIRE(float_pair.second == double_pair.second);
867
+ ++sv_float_it;
868
+ ++sv_double_it;
869
+ }
870
+ REQUIRE(sv_double_it == sv_double.end());
871
+ }
872
+
873
+ class A {
874
+ int val;
875
+ public:
876
+ A(int val): val(val) {}
877
+ int get_val() const { return val; }
878
+ };
879
+
880
+ struct less_A {
881
+ bool operator()(const A& a1, const A& a2) const { return a1.get_val() < a2.get_val(); }
882
+ };
883
+
884
+ class B {
885
+ int val;
886
+ public:
887
+ explicit B(const A& a): val(a.get_val()) {}
888
+ int get_val() const { return val; }
889
+ };
890
+
891
+ struct less_B {
892
+ bool operator()(const B& b1, const B& b2) const { return b1.get_val() < b2.get_val(); }
893
+ };
894
+
895
+ SECTION("type conversion: custom types") {
896
+ kll_sketch<A, less_A> sa;
897
+ sa.update(1);
898
+ sa.update(2);
899
+ sa.update(3);
900
+
901
+ kll_sketch<B, less_B> sb(sa);
902
+ REQUIRE(sb.get_n() == 3);
841
903
  }
904
+
905
+ // cleanup
906
+ REQUIRE(test_allocator_total_bytes == 0);
842
907
  }
843
908
 
844
909
  } /* namespace datasketches */
@@ -12,16 +12,18 @@ This package provides a variety of sketches as described below. Wherever a speci
12
12
 
13
13
  ## Building and Installation
14
14
 
15
- Once cloned, the library can be installed by running `python -m pip install .` in the project root directory, which will also install the necessary dependencies, namely numpy and [pybind11[global]](https://github.com/pybind/pybind11).
15
+ Once cloned, the library can be installed by running `python3 -m pip install .` in the project root directory -- not the python subdirectory -- which will also install the necessary dependencies, namely numpy and [pybind11[global]](https://github.com/pybind/pybind11).
16
16
 
17
- If you prefer to call the `setup.py` build script directly, you must first install `pybind11[global]`, as well as any other dependencies listed under the build-system section in `pyproject.toml`.
17
+ If you prefer to call the `setup.py` build script directly, which is discoraged, you must first install `pybind11[global]`, as well as any other dependencies listed under the build-system section in `pyproject.toml`.
18
18
 
19
- The library is also available from PyPI via `python -m pip install datasketches`.
19
+ The library is also available from PyPI via `python3 -m pip install datasketches`.
20
20
 
21
21
  ## Usage
22
22
 
23
23
  Having installed the library, loading the Apache Datasketches Library in Python is simple: `import datasketches`.
24
24
 
25
+ The unit tests are mostly structured in a tutorial style and can be used as a reference example for how to feed data into and query the different types of sketches.
26
+
25
27
  ## Available Sketch Classes
26
28
 
27
29
  - KLL (Absolute Error Quantiles)
@@ -74,12 +76,7 @@ The only developer-specific instructions relate to running unit tests.
74
76
 
75
77
  ### Unit tests
76
78
 
77
- The Python unit tests are run with `tox`. To ensure you have all the needed package, from the package base directory run:
78
-
79
- ```bash
80
- python -m pip install --upgrade tox
81
- tox
82
- ```
79
+ The Python unit tests are run via `tox`, with no arguments, from the project root directory -- not the python subdirectory. Tox creates a temporary virtual environment in which to build and run teh unit tests. In the event you are missing the necessary pacakge, tox may be installed with `python3 -m pip install --upgrade tox`.
83
80
 
84
81
  ## License
85
82
 
@@ -151,6 +151,7 @@ template <typename T,
151
151
  class quantiles_sketch {
152
152
  public:
153
153
  using value_type = T;
154
+ using allocator_type = Allocator;
154
155
  using comparator = Comparator;
155
156
  using vector_double = std::vector<double, typename std::allocator_traits<Allocator>::template rebind_alloc<double>>;
156
157
 
@@ -161,6 +162,14 @@ public:
161
162
  quantiles_sketch& operator=(const quantiles_sketch& other);
162
163
  quantiles_sketch& operator=(quantiles_sketch&& other) noexcept;
163
164
 
165
+ /**
166
+ * @brief Type converting constructor
167
+ * @param other quantiles sketch of a different type
168
+ * @param allocator instance of an Allocator
169
+ */
170
+ template<typename From, typename FC, typename FA>
171
+ explicit quantiles_sketch(const quantiles_sketch<From, FC, FA>& other, const Allocator& allocator = Allocator());
172
+
164
173
  /**
165
174
  * Updates this sketch with the given data item.
166
175
  * @param value an item from a stream of items
@@ -227,6 +236,12 @@ public:
227
236
  */
228
237
  Comparator get_comparator() const;
229
238
 
239
+ /**
240
+ * Returns the allocator for this sketch.
241
+ * @return allocator
242
+ */
243
+ allocator_type get_allocator() const;
244
+
230
245
  /**
231
246
  * Returns an approximation to the value of the data item
232
247
  * that would be preceded by the given fraction of a hypothetical sorted
@@ -138,6 +138,65 @@ is_sorted_(is_sorted)
138
138
  throw std::logic_error("Item count does not match value computed from k, n");
139
139
  }
140
140
 
141
+ template<typename T, typename C, typename A>
142
+ template<typename From, typename FC, typename FA>
143
+ quantiles_sketch<T, C, A>::quantiles_sketch(const quantiles_sketch<From, FC, FA>& other, const A& allocator) :
144
+ allocator_(allocator),
145
+ k_(other.get_k()),
146
+ n_(other.get_n()),
147
+ bit_pattern_(compute_bit_pattern(other.get_k(), other.get_n())),
148
+ base_buffer_(allocator),
149
+ levels_(allocator),
150
+ min_value_(nullptr),
151
+ max_value_(nullptr),
152
+ is_sorted_(false)
153
+ {
154
+ static_assert(std::is_constructible<T, From>::value,
155
+ "Type converting constructor requires new type to be constructible from existing type");
156
+
157
+ base_buffer_.reserve(2 * std::min(quantiles_constants::MIN_K, k_));
158
+
159
+ if (!other.is_empty()) {
160
+ min_value_ = new (allocator_.allocate(1)) T(other.get_min_value());
161
+ max_value_ = new (allocator_.allocate(1)) T(other.get_max_value());
162
+
163
+ // reserve space in levels
164
+ const uint8_t num_levels = compute_levels_needed(k_, n_);
165
+ levels_.reserve(num_levels);
166
+ for (int i = 0; i < num_levels; ++i) {
167
+ Level level(allocator);
168
+ level.reserve(k_);
169
+ levels_.push_back(std::move(level));
170
+ }
171
+
172
+ // iterate through points, assigning to the correct level as needed
173
+ for (auto pair : other) {
174
+ const uint64_t wt = pair.second;
175
+ if (wt == 1) {
176
+ base_buffer_.push_back(T(pair.first));
177
+ // resize where needed as if adding points via update()
178
+ if (base_buffer_.size() + 1 > base_buffer_.capacity()) {
179
+ const size_t new_size = std::max(std::min(static_cast<size_t>(2 * k_), 2 * base_buffer_.size()), static_cast<size_t>(1));
180
+ base_buffer_.reserve(new_size);
181
+ }
182
+ }
183
+ else {
184
+ const uint8_t idx = count_trailing_zeros_in_u64(pair.second) - 1;
185
+ levels_[idx].push_back(T(pair.first));
186
+ }
187
+ }
188
+
189
+ // validate that ordering within each level is preserved
190
+ // base_buffer_ can be considered unsorted for this purpose
191
+ for (int i = 0; i < num_levels; ++i) {
192
+ if (!std::is_sorted(levels_[i].begin(), levels_[i].end(), C())) {
193
+ throw std::logic_error("Copy construction across types produces invalid sorting");
194
+ }
195
+ }
196
+ }
197
+ }
198
+
199
+
141
200
  template<typename T, typename C, typename A>
142
201
  quantiles_sketch<T, C, A>::~quantiles_sketch() {
143
202
  if (min_value_ != nullptr) {
@@ -238,7 +297,7 @@ void quantiles_sketch<T, C, A>::serialize(std::ostream& os, const SerDe& serde)
238
297
  );
239
298
  write(os, flags_byte);
240
299
  write(os, k_);
241
- uint16_t unused = 0;
300
+ const uint16_t unused = 0;
242
301
  write(os, unused);
243
302
 
244
303
  if (!is_empty()) {
@@ -624,6 +683,11 @@ C quantiles_sketch<T, C, A>::get_comparator() const {
624
683
  return C();
625
684
  }
626
685
 
686
+ template<typename T, typename C, typename A>
687
+ A quantiles_sketch<T, C, A>::get_allocator() const {
688
+ return allocator_;
689
+ }
690
+
627
691
  // implementation for fixed-size arithmetic types (integral and floating point)
628
692
  template<typename T, typename C, typename A>
629
693
  template<typename SerDe, typename TT, typename std::enable_if<std::is_arithmetic<TT>::value, int>::type>
@@ -783,9 +847,9 @@ auto quantiles_sketch<T, C, A>::get_CDF(const T* split_points, uint32_t size) co
783
847
 
784
848
  template<typename T, typename C, typename A>
785
849
  uint32_t quantiles_sketch<T, C, A>::compute_retained_items(const uint16_t k, const uint64_t n) {
786
- uint32_t bb_count = compute_base_buffer_items(k, n);
787
- uint64_t bit_pattern = compute_bit_pattern(k, n);
788
- uint32_t valid_levels = compute_valid_levels(bit_pattern);
850
+ const uint32_t bb_count = compute_base_buffer_items(k, n);
851
+ const uint64_t bit_pattern = compute_bit_pattern(k, n);
852
+ const uint32_t valid_levels = compute_valid_levels(bit_pattern);
789
853
  return bb_count + (k * valid_levels);
790
854
  }
791
855
 
@@ -843,11 +907,11 @@ void quantiles_sketch<T, C, A>::check_family_id(uint8_t family_id) {
843
907
 
844
908
  template<typename T, typename C, typename A>
845
909
  void quantiles_sketch<T, C, A>::check_header_validity(uint8_t preamble_longs, uint8_t flags_byte, uint8_t serial_version) {
846
- bool empty = (flags_byte & (1 << flags::IS_EMPTY)) > 0;
847
- bool compact = (flags_byte & (1 << flags::IS_COMPACT)) > 0;
910
+ const bool empty = (flags_byte & (1 << flags::IS_EMPTY)) > 0;
911
+ const bool compact = (flags_byte & (1 << flags::IS_COMPACT)) > 0;
848
912
 
849
- uint8_t sw = (compact ? 1 : 0) + (2 * (empty ? 1 : 0))
850
- + (4 * (serial_version & 0xF)) + (32 * (preamble_longs & 0x3F));
913
+ const uint8_t sw = (compact ? 1 : 0) + (2 * (empty ? 1 : 0))
914
+ + (4 * (serial_version & 0xF)) + (32 * (preamble_longs & 0x3F));
851
915
  bool valid = true;
852
916
 
853
917
  switch (sw) { // exhaustive list and description of all valid cases
@@ -888,7 +952,7 @@ typename quantiles_sketch<T, C, A>::const_iterator quantiles_sketch<T, C, A>::en
888
952
 
889
953
  template<typename T, typename C, typename A>
890
954
  void quantiles_sketch<T, C, A>::grow_base_buffer() {
891
- size_t new_size = std::max(std::min(static_cast<size_t>(2 * k_), 2 * base_buffer_.size()), static_cast<size_t>(1));
955
+ const size_t new_size = std::max(std::min(static_cast<size_t>(2 * k_), 2 * base_buffer_.size()), static_cast<size_t>(1));
892
956
  base_buffer_.reserve(new_size);
893
957
  }
894
958
 
@@ -912,7 +976,7 @@ void quantiles_sketch<T, C, A>::process_full_base_buffer() {
912
976
 
913
977
  template<typename T, typename C, typename A>
914
978
  bool quantiles_sketch<T, C, A>::grow_levels_if_needed() {
915
- uint8_t levels_needed = compute_levels_needed(k_, n_);
979
+ const uint8_t levels_needed = compute_levels_needed(k_, n_);
916
980
  if (levels_needed == 0)
917
981
  return false; // don't need levels and might have small base buffer. Possible during merges.
918
982
 
@@ -992,7 +1056,7 @@ template<typename FwdV>
992
1056
  void quantiles_sketch<T, C, A>::zip_buffer_with_stride(FwdV&& buf_in, Level& buf_out, uint16_t stride) {
993
1057
  // Random offset in range [0, stride)
994
1058
  std::uniform_int_distribution<uint16_t> dist(0, stride - 1);
995
- uint16_t rand_offset = dist(random_utils::rand);
1059
+ const uint16_t rand_offset = dist(random_utils::rand);
996
1060
 
997
1061
  if ((buf_in.size() != stride * buf_out.capacity())
998
1062
  || (buf_out.size() > 0)) {
@@ -1000,7 +1064,7 @@ void quantiles_sketch<T, C, A>::zip_buffer_with_stride(FwdV&& buf_in, Level& buf
1000
1064
  "stride*buf_out.capacity() and empty buf_out");
1001
1065
  }
1002
1066
 
1003
- size_t k = buf_out.capacity();
1067
+ const size_t k = buf_out.capacity();
1004
1068
  for (uint16_t i = rand_offset, o = 0; o < k; i += stride, ++o) {
1005
1069
  buf_out.push_back(conditional_forward<FwdV>(buf_in[i]));
1006
1070
  }
@@ -1117,7 +1181,7 @@ void quantiles_sketch<T, C, A>::downsampling_merge(quantiles_sketch& tgt, FwdSk&
1117
1181
  const uint16_t downsample_factor = src.get_k() / tgt.get_k();
1118
1182
  const uint8_t lg_sample_factor = count_trailing_zeros_in_u32(downsample_factor);
1119
1183
 
1120
- uint64_t new_n = src.get_n() + tgt.get_n();
1184
+ const uint64_t new_n = src.get_n() + tgt.get_n();
1121
1185
 
1122
1186
  // move items from src's base buffer
1123
1187
  for (uint16_t i = 0; i < src.base_buffer_.size(); ++i) {
@@ -1125,7 +1189,7 @@ void quantiles_sketch<T, C, A>::downsampling_merge(quantiles_sketch& tgt, FwdSk&
1125
1189
  }
1126
1190
 
1127
1191
  // check (after moving raw items) if we need to extend levels array
1128
- uint8_t levels_needed = compute_levels_needed(tgt.get_k(), new_n);
1192
+ const uint8_t levels_needed = compute_levels_needed(tgt.get_k(), new_n);
1129
1193
  if (levels_needed > tgt.levels_.size()) {
1130
1194
  tgt.levels_.reserve(levels_needed);
1131
1195
  while (tgt.levels_.size() < levels_needed) {
@@ -82,7 +82,7 @@ TEST_CASE("kolmogorov-smirnov slightly different distributions", "[quantiles_ske
82
82
  const double delta = kolmogorov_smirnov::delta(sketch1, sketch2);
83
83
  REQUIRE(delta == Approx(0.02).margin(0.01));
84
84
  const double threshold = kolmogorov_smirnov::threshold(sketch1, sketch2, 0.05);
85
- std::cout << "delta=" << delta << ", threshold=" << threshold << "\n";
85
+
86
86
  REQUIRE_FALSE(delta > threshold);
87
87
  REQUIRE_FALSE(kolmogorov_smirnov::test(sketch1, sketch2, 0.05));
88
88
  }
@@ -102,7 +102,7 @@ TEST_CASE("kolmogorov-smirnov slightly different distributions high resolution",
102
102
  const double delta = kolmogorov_smirnov::delta(sketch1, sketch2);
103
103
  REQUIRE(delta == Approx(0.02).margin(0.01));
104
104
  const double threshold = kolmogorov_smirnov::threshold(sketch1, sketch2, 0.05);
105
- std::cout << "delta=" << delta << ", threshold=" << threshold << "\n";
105
+
106
106
  REQUIRE(delta > threshold);
107
107
  REQUIRE(kolmogorov_smirnov::test(sketch1, sketch2, 0.05));
108
108
  }
@@ -903,6 +903,69 @@ TEST_CASE("quantiles sketch", "[quantiles_sketch]") {
903
903
  }
904
904
  }
905
905
 
906
+ SECTION("Type converting copy constructor") {
907
+ const uint16_t k = 8;
908
+ const int n = 403;
909
+ quantiles_sketch<double> sk_double(k);
910
+
911
+ quantiles_sketch<float> sk_float(k, sk_double.get_allocator());
912
+ REQUIRE(sk_float.is_empty());
913
+
914
+ for (int i = 0; i < n; ++i) sk_double.update(i + .01);
915
+
916
+ quantiles_sketch<int> sk_int(sk_double);
917
+ REQUIRE(sk_double.get_n() == sk_int.get_n());
918
+ REQUIRE(sk_double.get_k() == sk_int.get_k());
919
+ REQUIRE(sk_double.get_num_retained() == sk_int.get_num_retained());
920
+
921
+ auto sv_double = sk_double.get_sorted_view(false);
922
+ std::vector<std::pair<double, uint64_t>> vec_double(sv_double.begin(), sv_double.end());
923
+
924
+ auto sv_int = sk_int.get_sorted_view(false);
925
+ std::vector<std::pair<int, uint64_t>> vec_int(sv_int.begin(), sv_int.end());
926
+
927
+ REQUIRE(vec_double.size() == vec_int.size());
928
+
929
+ for (size_t i = 0; i < vec_int.size(); ++i) {
930
+ // known truncation with conversion so approximate result
931
+ REQUIRE(vec_double[i].first == Approx(vec_int[i].first).margin(0.1));
932
+ // exact equality for weights
933
+ REQUIRE(vec_double[i].second == vec_int[i].second);
934
+ }
935
+ }
936
+
937
+ class A {
938
+ int val;
939
+ public:
940
+ A(int val): val(val) {}
941
+ int get_val() const { return val; }
942
+ };
943
+
944
+ struct less_A {
945
+ bool operator()(const A& a1, const A& a2) const { return a1.get_val() < a2.get_val(); }
946
+ };
947
+
948
+ class B {
949
+ int val;
950
+ public:
951
+ explicit B(const A& a): val(a.get_val()) {}
952
+ int get_val() const { return val; }
953
+ };
954
+
955
+ struct less_B {
956
+ bool operator()(const B& b1, const B& b2) const { return b1.get_val() < b2.get_val(); }
957
+ };
958
+
959
+ SECTION("type conversion: custom types") {
960
+ quantiles_sketch<A, less_A> sa;
961
+ sa.update(1);
962
+ sa.update(2);
963
+ sa.update(3);
964
+
965
+ quantiles_sketch<B, less_B> sb(sa);
966
+ REQUIRE(sb.get_n() == 3);
967
+ }
968
+
906
969
  // cleanup
907
970
  if (test_allocator_total_bytes != 0) {
908
971
  REQUIRE(test_allocator_total_bytes == 0);
@@ -38,6 +38,9 @@ public:
38
38
  req_compactor& operator=(const req_compactor& other);
39
39
  req_compactor& operator=(req_compactor&& other);
40
40
 
41
+ template<typename TT, typename CC, typename AA>
42
+ req_compactor(const req_compactor<TT, CC, AA>& other, const Allocator& allocator);
43
+
41
44
  bool is_sorted() const;
42
45
  uint32_t get_num_items() const;
43
46
  uint32_t get_nom_capacity() const;
@@ -128,6 +131,9 @@ private:
128
131
  template<typename S>
129
132
  static std::pair<std::unique_ptr<T, items_deleter>, size_t> deserialize_items(const void* bytes, size_t size, const S& serde, const Allocator& allocator, uint32_t num);
130
133
 
134
+ // for type converting constructor
135
+ template<typename TT, typename CC, typename AA>
136
+ friend class req_compactor;
131
137
  };
132
138
 
133
139
  } /* namespace datasketches */
@@ -132,6 +132,33 @@ req_compactor<T, C, A>& req_compactor<T, C, A>::operator=(req_compactor&& other)
132
132
  return *this;
133
133
  }
134
134
 
135
+ template<typename T, typename C, typename A>
136
+ template<typename TT, typename CC, typename AA>
137
+ req_compactor<T, C, A>::req_compactor(const req_compactor<TT, CC, AA>& other, const A& allocator):
138
+ allocator_(allocator),
139
+ lg_weight_(other.lg_weight_),
140
+ hra_(other.hra_),
141
+ coin_(other.coin_),
142
+ sorted_(other.sorted_),
143
+ section_size_raw_(other.section_size_raw_),
144
+ section_size_(other.section_size_),
145
+ num_sections_(other.num_sections_),
146
+ state_(other.state_),
147
+ num_items_(other.num_items_),
148
+ capacity_(other.capacity_),
149
+ items_(nullptr)
150
+ {
151
+ if (other.items_ != nullptr) {
152
+ items_ = allocator_.allocate(capacity_);
153
+ const uint32_t from = hra_ ? capacity_ - num_items_ : 0;
154
+ const uint32_t to = hra_ ? capacity_ : num_items_;
155
+ for (uint32_t i = from; i < to; ++i) new (items_ + i) T(other.items_[i]);
156
+ if (sorted_ && !std::is_sorted(items_ + from, items_ + to, C())) {
157
+ throw std::logic_error("items must be sorted");
158
+ }
159
+ }
160
+ }
161
+
135
162
  template<typename T, typename C, typename A>
136
163
  bool req_compactor<T, C, A>::is_sorted() const {
137
164
  return sorted_;
@@ -58,6 +58,14 @@ public:
58
58
  req_sketch& operator=(const req_sketch& other);
59
59
  req_sketch& operator=(req_sketch&& other);
60
60
 
61
+ /*
62
+ * Type converting constructor.
63
+ * @param other sketch of a different type
64
+ * @param allocator instance of an Allocator
65
+ */
66
+ template<typename TT, typename CC, typename SS, typename AA>
67
+ explicit req_sketch(const req_sketch<TT, CC, SS, AA>& other, const Allocator& allocator = Allocator());
68
+
61
69
  /**
62
70
  * Returns configured parameter K
63
71
  * @return parameter K
@@ -408,6 +416,9 @@ private:
408
416
  }
409
417
  }
410
418
 
419
+ // for type converting constructor
420
+ template<typename TT, typename CC, typename SS, typename AA>
421
+ friend class req_sketch;
411
422
  };
412
423
 
413
424
  template<typename T, typename C, typename S, typename A>
@@ -64,8 +64,8 @@ compactors_(other.compactors_),
64
64
  min_value_(nullptr),
65
65
  max_value_(nullptr)
66
66
  {
67
- if (other.min_value_ != nullptr) min_value_ = new (A().allocate(1)) T(*other.min_value_);
68
- if (other.max_value_ != nullptr) max_value_ = new (A().allocate(1)) T(*other.max_value_);
67
+ if (other.min_value_ != nullptr) min_value_ = new (allocator_.allocate(1)) T(*other.min_value_);
68
+ if (other.max_value_ != nullptr) max_value_ = new (allocator_.allocate(1)) T(*other.max_value_);
69
69
  }
70
70
 
71
71
  template<typename T, typename C, typename S, typename A>
@@ -113,6 +113,33 @@ req_sketch<T, C, S, A>& req_sketch<T, C, S, A>::operator=(req_sketch&& other) {
113
113
  return *this;
114
114
  }
115
115
 
116
+ template<typename T, typename C, typename S, typename A>
117
+ template<typename TT, typename CC, typename SS, typename AA>
118
+ req_sketch<T, C, S, A>::req_sketch(const req_sketch<TT, CC, SS, AA>& other, const A& allocator):
119
+ allocator_(allocator),
120
+ k_(other.k_),
121
+ hra_(other.hra_),
122
+ max_nom_size_(other.max_nom_size_),
123
+ num_retained_(other.num_retained_),
124
+ n_(other.n_),
125
+ compactors_(allocator),
126
+ min_value_(nullptr),
127
+ max_value_(nullptr)
128
+ {
129
+ static_assert(
130
+ std::is_constructible<T, TT>::value,
131
+ "Type converting constructor requires new type to be constructible from existing type"
132
+ );
133
+ compactors_.reserve(other.compactors_.size());
134
+ for (const auto& compactor: other.compactors_) {
135
+ compactors_.push_back(req_compactor<T, C, A>(compactor, allocator_));
136
+ }
137
+ if (!other.is_empty()) {
138
+ min_value_ = new (allocator_.allocate(1)) T(other.get_min_value());
139
+ max_value_ = new (allocator_.allocate(1)) T(other.get_max_value());
140
+ }
141
+ }
142
+
116
143
  template<typename T, typename C, typename S, typename A>
117
144
  uint16_t req_sketch<T, C, S, A>::get_k() const {
118
145
  return k_;
@@ -35,7 +35,7 @@ const std::string input_path = "test/";
35
35
  #endif
36
36
 
37
37
  TEST_CASE("req sketch: empty", "[req_sketch]") {
38
- std::cout << "sizeof(req_float_sketch)=" << sizeof(req_sketch<float>) << "\n";
38
+ //std::cout << "sizeof(req_float_sketch)=" << sizeof(req_sketch<float>) << "\n";
39
39
  req_sketch<float> sketch(12);
40
40
  REQUIRE(sketch.get_k() == 12);
41
41
  REQUIRE(sketch.is_HRA());
@@ -245,7 +245,7 @@ TEST_CASE("req sketch: byte serialize-deserialize single item", "[req_sketch]")
245
245
  auto bytes = sketch.serialize();
246
246
  REQUIRE(bytes.size() == sketch.get_serialized_size_bytes());
247
247
  auto sketch2 = req_sketch<float>::deserialize(bytes.data(), bytes.size());
248
- std::cout << sketch2.to_string(true);
248
+ //std::cout << sketch2.to_string(true);
249
249
  REQUIRE(bytes.size() == sketch2.get_serialized_size_bytes());
250
250
  REQUIRE(sketch2.is_empty() == sketch.is_empty());
251
251
  REQUIRE(sketch2.is_estimation_mode() == sketch.is_estimation_mode());
@@ -282,7 +282,7 @@ TEST_CASE("req sketch: byte serialize-deserialize exact mode", "[req_sketch]") {
282
282
  auto bytes = sketch.serialize();
283
283
  REQUIRE(bytes.size() == sketch.get_serialized_size_bytes());
284
284
  auto sketch2 = req_sketch<float>::deserialize(bytes.data(), bytes.size());
285
- std::cout << sketch2.to_string(true);
285
+ //std::cout << sketch2.to_string(true);
286
286
  REQUIRE(bytes.size() == sketch2.get_serialized_size_bytes());
287
287
  REQUIRE(sketch2.is_empty() == sketch.is_empty());
288
288
  REQUIRE(sketch2.is_estimation_mode() == sketch.is_estimation_mode());
@@ -485,6 +485,72 @@ TEST_CASE("req sketch: merge incompatible HRA and LRA", "[req_sketch]") {
485
485
  REQUIRE_THROWS_AS(sketch1.merge(sketch2), std::invalid_argument);
486
486
  }
487
487
 
488
+ TEST_CASE("req sketch: type conversion - empty", "[req_sketch]") {
489
+ req_sketch<double> req_double(12);
490
+ req_sketch<float> req_float(req_double);
491
+ REQUIRE(req_float.is_empty());
492
+ REQUIRE(req_float.get_k() == req_double.get_k());
493
+ REQUIRE(req_float.get_n() == 0);
494
+ REQUIRE(req_float.get_num_retained() == 0);
495
+ }
496
+
497
+ TEST_CASE("req sketch: type conversion - several levels", "[req_sketch]") {
498
+ req_sketch<double> req_double(12);
499
+ for (int i = 0; i < 1000; ++i) req_double.update(static_cast<double>(i));
500
+ req_sketch<float> req_float(req_double);
501
+ REQUIRE(!req_float.is_empty());
502
+ REQUIRE(req_float.get_k() == req_double.get_k());
503
+ REQUIRE(req_float.get_n() == req_double.get_n());
504
+ REQUIRE(req_float.get_num_retained() == req_double.get_num_retained());
505
+
506
+ auto sv_float = req_float.get_sorted_view(false);
507
+ auto sv_double = req_double.get_sorted_view(false);
508
+ auto sv_float_it = sv_float.begin();
509
+ auto sv_double_it = sv_double.begin();
510
+ while (sv_float_it != sv_float.end()) {
511
+ REQUIRE(sv_double_it != sv_double.end());
512
+ auto float_pair = *sv_float_it;
513
+ auto double_pair = *sv_double_it;
514
+ REQUIRE(float_pair.first == Approx(double_pair.first).margin(0.01));
515
+ REQUIRE(float_pair.second == double_pair.second);
516
+ ++sv_float_it;
517
+ ++sv_double_it;
518
+ }
519
+ REQUIRE(sv_double_it == sv_double.end());
520
+ }
521
+
522
+ class A {
523
+ int val;
524
+ public:
525
+ A(int val): val(val) {}
526
+ int get_val() const { return val; }
527
+ };
528
+
529
+ struct less_A {
530
+ bool operator()(const A& a1, const A& a2) const { return a1.get_val() < a2.get_val(); }
531
+ };
532
+
533
+ class B {
534
+ int val;
535
+ public:
536
+ explicit B(const A& a): val(a.get_val()) {}
537
+ int get_val() const { return val; }
538
+ };
539
+
540
+ struct less_B {
541
+ bool operator()(const B& b1, const B& b2) const { return b1.get_val() < b2.get_val(); }
542
+ };
543
+
544
+ TEST_CASE("req sketch: type conversion - custom types") {
545
+ req_sketch<A, less_A> sa(4);
546
+ sa.update(1);
547
+ sa.update(2);
548
+ sa.update(3);
549
+
550
+ req_sketch<B, less_B> sb(sa);
551
+ REQUIRE(sb.get_n() == 3);
552
+ }
553
+
488
554
  //TEST_CASE("for manual comparison with Java") {
489
555
  // req_sketch<float> sketch(12, false);
490
556
  // for (size_t i = 0; i < 100000; ++i) sketch.update(i);
@@ -81,7 +81,7 @@ class CMakeBuild(build_ext):
81
81
 
82
82
  setup(
83
83
  name='datasketches',
84
- version='3.4.0',
84
+ version='3.5.0',
85
85
  author='Apache Software Foundation',
86
86
  author_email='dev@datasketches.apache.org',
87
87
  description='The Apache DataSketches Library for Python',
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: datasketches
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.5
4
+ version: 0.2.6
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andrew Kane
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2022-05-21 00:00:00.000000000 Z
11
+ date: 2022-07-13 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rice