datasketches 0.2.5 → 0.2.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 9eaa8a17efdbc591b3e56f94650e887babd30dc79d95db3a7986df0261184191
4
- data.tar.gz: 5544326a0edf165d87373a680d8bf5b80acba2894b9048f92cbdb261fcd66d57
3
+ metadata.gz: cf1ea0f9f2d12b0e46c2d4c7dec21f41992e711e73eca68ea1ef03a4bb711077
4
+ data.tar.gz: 92f56b63da0254962be47d8d3e00a6950a271053bf3152167f95e6fdb99528e6
5
5
  SHA512:
6
- metadata.gz: 5a28c093ecda083762367149800770f59fee8e630c0d983d3f29ed32d027fae2e2515dff243ee11bbd41f4875c7cea622f7bc5cc5d7e73176e785503ed19fc0b
7
- data.tar.gz: 6b210f2fdca1ae3cbd4e4cbf88e284855014b5a1e1c883085dc96a057da29e370005163ce628e54351c9127b00fae4b7b33a4ca63e6f4b90e0665e93b7742a66
6
+ metadata.gz: 5841d4a70f1e852faa150f57ebfefc7b975de020782c41eebdad87a01d016be9bdf86f86173600632bf6f56300df0c9c4196251aa5df02a47ecd357ac844ef80
7
+ data.tar.gz: d6ae7c811e0e2c2008b912e29f86d1b99491c74cd878790dfd800811a007f0dbf9c49bb59db30345450ff82673381f2c036a84a57dc44a6f6751610d9be2ee88
data/CHANGELOG.md CHANGED
@@ -1,3 +1,7 @@
1
+ ## 0.2.6 (2022-07-13)
2
+
3
+ - Updated DataSketches to 3.5.0
4
+
1
5
  ## 0.2.5 (2022-05-21)
2
6
 
3
7
  - Updated DataSketches to 3.4.0
@@ -55,12 +55,12 @@ void bind_kll_sketch(Rice::Module& m, const char* name) {
55
55
  })
56
56
  .define_method(
57
57
  "pmf",
58
- [](kll_sketch<T>& self, std::vector<T> split_points) {
58
+ [](kll_sketch<T>& self, const std::vector<T>& split_points) {
59
59
  return self.get_PMF(&split_points[0], split_points.size());
60
60
  })
61
61
  .define_method(
62
62
  "cdf",
63
- [](kll_sketch<T>& self, std::vector<T> split_points) {
63
+ [](kll_sketch<T>& self, const std::vector<T>& split_points) {
64
64
  return self.get_CDF(&split_points[0], split_points.size());
65
65
  })
66
66
  .define_method(
@@ -1,3 +1,3 @@
1
1
  module DataSketches
2
- VERSION = "0.2.5"
2
+ VERSION = "0.2.6"
3
3
  end
@@ -17,7 +17,7 @@
17
17
 
18
18
  cmake_minimum_required(VERSION 3.16.0)
19
19
  project(DataSketches
20
- VERSION 3.4.0
20
+ VERSION 3.5.0
21
21
  LANGUAGES CXX)
22
22
 
23
23
  include(GNUInstallDirs)
@@ -1,11 +1,12 @@
1
- Apache DataSketches-cpp
2
- Copyright 2020-2021 The Apache Software Foundation
1
+ Apache DataSketches C++ and Python
2
+ Copyright 2022 The Apache Software Foundation
3
3
 
4
- Copyright 2015-2018 Yahoo
5
- Copyright 2019 Verizon Media
4
+ Copyright 2015-2018 Yahoo Inc.
5
+ Copyright 2019-2020 Verizon Media
6
+ Copyright 2021 Yahoo Inc.
6
7
 
7
8
  This product includes software developed at
8
9
  The Apache Software Foundation (http://www.apache.org/).
9
10
 
10
11
  Prior to moving to ASF, the software for this project was developed at
11
- Yahoo (now Verizon Media) (https://developer.yahoo.com).
12
+ Yahoo Inc. (https://developer.yahoo.com).
@@ -43,8 +43,8 @@ install(FILES
43
43
  include/conditional_forward.hpp
44
44
  include/ceiling_power_of_2.hpp
45
45
  include/bounds_binomial_proportions.hpp
46
- include/kolmogorov_smirnov.hpp
47
- include/kolmogorov_smirnov_impl.hpp
48
46
  include/quantile_sketch_sorted_view.hpp
49
47
  include/quantile_sketch_sorted_view_impl.hpp
48
+ include/kolmogorov_smirnov.hpp
49
+ include/kolmogorov_smirnov_impl.hpp
50
50
  DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/DataSketches")
@@ -297,6 +297,7 @@ void cpc_compressor<A>::compress_sliding_flavor(const cpc_sketch_alloc<A>& sourc
297
297
  // changes the implied ordering of the pairs, so we must do it before sorting.
298
298
 
299
299
  const uint8_t pseudo_phase = determine_pseudo_phase(source.get_lg_k(), source.get_num_coupons());
300
+ if (pseudo_phase >= 16) throw std::logic_error("unexpected pseudo phase for sliding flavor");
300
301
  const uint8_t* permutation = column_permutations_for_encoding[pseudo_phase];
301
302
 
302
303
  const uint8_t offset = source.window_offset;
@@ -333,7 +334,7 @@ void cpc_compressor<A>::uncompress_sliding_flavor(const compressed_state<A>& sou
333
334
  lg_k, source.table_data.get_allocator());
334
335
 
335
336
  const uint8_t pseudo_phase = determine_pseudo_phase(lg_k, num_coupons);
336
- if (pseudo_phase >= 16) throw std::logic_error("pseudo phase >= 16");
337
+ if (pseudo_phase >= 16) throw std::logic_error("unexpected pseudo phase for sliding flavor");
337
338
  const uint8_t* permutation = column_permutations_for_decoding[pseudo_phase];
338
339
 
339
340
  uint8_t offset = cpc_sketch_alloc<A>::determine_correct_offset(lg_k, num_coupons);
@@ -230,7 +230,7 @@ kll_helper::compress_result kll_helper::general_compress(uint16_t k, uint8_t m,
230
230
  // move level over as is
231
231
  // make sure we are not moving data upwards
232
232
  if (raw_beg < out_levels[current_level]) throw std::logic_error("wrong move");
233
- std::move(&items[raw_beg], &items[raw_lim], &items[out_levels[current_level]]);
233
+ std::move(items + raw_beg, items + raw_lim, items + out_levels[current_level]);
234
234
  out_levels[current_level + 1] = out_levels[current_level] + raw_pop;
235
235
  } else {
236
236
  // The sketch is too full AND this level is too full, so we compact it
@@ -251,7 +251,7 @@ kll_helper::compress_result kll_helper::general_compress(uint16_t k, uint8_t m,
251
251
 
252
252
  // level zero might not be sorted, so we must sort it if we wish to compact it
253
253
  if ((current_level == 0) && !is_level_zero_sorted) {
254
- std::sort(&items[adj_beg], &items[adj_beg + adj_pop], C());
254
+ std::sort(items + adj_beg, items + adj_beg + adj_pop, C());
255
255
  }
256
256
 
257
257
  if (pop_above == 0) { // Level above is empty, so halve up
@@ -170,7 +170,7 @@ class kll_sketch {
170
170
  using comparator = C;
171
171
 
172
172
  static const uint8_t DEFAULT_M = 8;
173
- // TODO: Redundant and deprecated. Will be remove din next major version.
173
+ // TODO: Redundant and deprecated. Will be removed in next major version.
174
174
  static const uint16_t DEFAULT_K = kll_constants::DEFAULT_K;
175
175
  static const uint16_t MIN_K = DEFAULT_M;
176
176
  static const uint16_t MAX_K = (1 << 16) - 1;
@@ -182,6 +182,14 @@ class kll_sketch {
182
182
  kll_sketch& operator=(const kll_sketch& other);
183
183
  kll_sketch& operator=(kll_sketch&& other);
184
184
 
185
+ /*
186
+ * Type converting constructor.
187
+ * @param other sketch of a different type
188
+ * @param allocator instance of an Allocator
189
+ */
190
+ template<typename TT, typename CC, typename SS, typename AA>
191
+ explicit kll_sketch(const kll_sketch<TT, CC, SS, AA>& other, const A& allocator = A());
192
+
185
193
  /**
186
194
  * Updates this sketch with the given data item.
187
195
  * @param value an item from a stream of items
@@ -390,7 +398,7 @@ class kll_sketch {
390
398
  /**
391
399
  * Computes size needed to serialize the current state of the sketch.
392
400
  * This version is for fixed-size arithmetic types (integral and floating point).
393
- * @param instance of a SerDe
401
+ * @param serde instance of a SerDe
394
402
  * @return size in bytes needed to serialize this sketch
395
403
  */
396
404
  template<typename TT = T, typename SerDe = S, typename std::enable_if<std::is_arithmetic<TT>::value, int>::type = 0>
@@ -399,7 +407,7 @@ class kll_sketch {
399
407
  /**
400
408
  * Computes size needed to serialize the current state of the sketch.
401
409
  * This version is for all other types and can be expensive since every item needs to be looked at.
402
- * @param instance of a SerDe
410
+ * @param serde instance of a SerDe
403
411
  * @return size in bytes needed to serialize this sketch
404
412
  */
405
413
  template<typename TT = T, typename SerDe = S, typename std::enable_if<!std::is_arithmetic<TT>::value, int>::type = 0>
@@ -459,7 +467,7 @@ class kll_sketch {
459
467
  /**
460
468
  * This method deserializes a sketch from a given stream.
461
469
  * @param is input stream
462
- * @param instance of an Allocator
470
+ * @param allocator instance of an Allocator
463
471
  * @return an instance of a sketch
464
472
  *
465
473
  * Deprecated, to be removed in the next major version
@@ -469,8 +477,8 @@ class kll_sketch {
469
477
  /**
470
478
  * This method deserializes a sketch from a given stream.
471
479
  * @param is input stream
472
- * @param instance of a SerDe
473
- * @param instance of an Allocator
480
+ * @param serde instance of a SerDe
481
+ * @param allocator instance of an Allocator
474
482
  * @return an instance of a sketch
475
483
  */
476
484
  template<typename SerDe = S>
@@ -480,7 +488,7 @@ class kll_sketch {
480
488
  * This method deserializes a sketch from a given array of bytes.
481
489
  * @param bytes pointer to the array of bytes
482
490
  * @param size the size of the array
483
- * @param instance of an Allocator
491
+ * @param allocator instance of an Allocator
484
492
  * @return an instance of a sketch
485
493
  *
486
494
  * Deprecated, to be removed in the next major version
@@ -491,8 +499,8 @@ class kll_sketch {
491
499
  * This method deserializes a sketch from a given array of bytes.
492
500
  * @param bytes pointer to the array of bytes
493
501
  * @param size the size of the array
494
- * @param instance of a SerDe
495
- * @param instance of an Allocator
502
+ * @param serde instance of a SerDe
503
+ * @param allocator instance of an Allocator
496
504
  * @return an instance of a sketch
497
505
  */
498
506
  template<typename SerDe = S>
@@ -606,6 +614,8 @@ class kll_sketch {
606
614
  static void check_serial_version(uint8_t serial_version);
607
615
  static void check_family_id(uint8_t family_id);
608
616
 
617
+ void check_sorting() const;
618
+
609
619
  // implementations for floating point types
610
620
  template<typename TT = T, typename std::enable_if<std::is_floating_point<TT>::value, int>::type = 0>
611
621
  static const TT& get_invalid_value() {
@@ -629,6 +639,9 @@ class kll_sketch {
629
639
  return true;
630
640
  }
631
641
 
642
+ // for type converting constructor
643
+ template<typename TT, typename CC, typename SS, typename AA>
644
+ friend class kll_sketch;
632
645
  };
633
646
 
634
647
  template<typename T, typename C, typename S, typename A>
@@ -26,6 +26,7 @@
26
26
  #include <stdexcept>
27
27
 
28
28
  #include "conditional_forward.hpp"
29
+ #include "count_zeros.hpp"
29
30
  #include "memory_operations.hpp"
30
31
  #include "kll_helper.hpp"
31
32
 
@@ -69,7 +70,7 @@ max_value_(nullptr),
69
70
  is_level_zero_sorted_(other.is_level_zero_sorted_)
70
71
  {
71
72
  items_ = allocator_.allocate(items_size_);
72
- std::copy(&other.items_[levels_[0]], &other.items_[levels_[num_levels_]], &items_[levels_[0]]);
73
+ for (auto i = levels_[0]; i < levels_[num_levels_]; ++i) new (&items_[i]) T(other.items_[i]);
73
74
  if (other.min_value_ != nullptr) min_value_ = new (allocator_.allocate(1)) T(*other.min_value_);
74
75
  if (other.max_value_ != nullptr) max_value_ = new (allocator_.allocate(1)) T(*other.max_value_);
75
76
  }
@@ -147,6 +148,33 @@ kll_sketch<T, C, S, A>::~kll_sketch() {
147
148
  }
148
149
  }
149
150
 
151
+ template<typename T, typename C, typename S, typename A>
152
+ template<typename TT, typename CC, typename SS, typename AA>
153
+ kll_sketch<T, C, S, A>::kll_sketch(const kll_sketch<TT, CC, SS, AA>& other, const A& allocator):
154
+ allocator_(allocator),
155
+ k_(other.k_),
156
+ m_(other.m_),
157
+ min_k_(other.min_k_),
158
+ n_(other.n_),
159
+ num_levels_(other.num_levels_),
160
+ levels_(other.levels_, allocator_),
161
+ items_(nullptr),
162
+ items_size_(other.items_size_),
163
+ min_value_(nullptr),
164
+ max_value_(nullptr),
165
+ is_level_zero_sorted_(other.is_level_zero_sorted_)
166
+ {
167
+ static_assert(
168
+ std::is_constructible<T, TT>::value,
169
+ "Type converting constructor requires new type to be constructible from existing type"
170
+ );
171
+ items_ = allocator_.allocate(items_size_);
172
+ for (auto i = levels_[0]; i < levels_[num_levels_]; ++i) new (&items_[i]) T(other.items_[i]);
173
+ if (other.min_value_ != nullptr) min_value_ = new (allocator_.allocate(1)) T(*other.min_value_);
174
+ if (other.max_value_ != nullptr) max_value_ = new (allocator_.allocate(1)) T(*other.max_value_);
175
+ check_sorting();
176
+ }
177
+
150
178
  template<typename T, typename C, typename S, typename A>
151
179
  template<typename FwdT>
152
180
  void kll_sketch<T, C, S, A>::update(FwdT&& value) {
@@ -305,8 +333,8 @@ double kll_sketch<T, C, S, A>::get_rank(const T& value) const {
305
333
  uint64_t weight = 1;
306
334
  uint64_t total = 0;
307
335
  while (level < num_levels_) {
308
- const auto from_index(levels_[level]);
309
- const auto to_index(levels_[level + 1]); // exclusive
336
+ const auto from_index = levels_[level];
337
+ const auto to_index = levels_[level + 1]; // exclusive
310
338
  for (uint32_t i = from_index; i < to_index; i++) {
311
339
  if (inclusive ? !C()(value, items_[i]) : C()(items_[i], value)) {
312
340
  total += weight;
@@ -694,7 +722,7 @@ void kll_sketch<T, C, S, A>::compress_while_updating(void) {
694
722
  // level zero might not be sorted, so we must sort it if we wish to compact it
695
723
  // sort_level_zero() is not used here because of the adjustment for odd number of items
696
724
  if ((level == 0) && !is_level_zero_sorted_) {
697
- std::sort(&items_[adj_beg], &items_[adj_beg + adj_pop], C());
725
+ std::sort(items_ + adj_beg, items_ + adj_beg + adj_pop, C());
698
726
  }
699
727
  if (pop_above == 0) {
700
728
  kll_helper::randomly_halve_up(items_, adj_beg, adj_pop);
@@ -717,7 +745,7 @@ void kll_sketch<T, C, S, A>::compress_while_updating(void) {
717
745
  // so that the freed-up space can be used by level zero
718
746
  if (level > 0) {
719
747
  const uint32_t amount = raw_beg - levels_[0];
720
- std::move_backward(&items_[levels_[0]], &items_[levels_[0] + amount], &items_[levels_[0] + half_adj_pop + amount]);
748
+ std::move_backward(items_ + levels_[0], items_ + levels_[0] + amount, items_ + levels_[0] + half_adj_pop + amount);
721
749
  for (uint8_t lvl = 0; lvl < level; lvl++) levels_[lvl] += half_adj_pop;
722
750
  }
723
751
  for (uint32_t i = 0; i < half_adj_pop; i++) items_[i + destroy_beg].~T();
@@ -775,22 +803,32 @@ void kll_sketch<T, C, S, A>::add_empty_top_level_to_completely_full_sketch() {
775
803
  template<typename T, typename C, typename S, typename A>
776
804
  void kll_sketch<T, C, S, A>::sort_level_zero() {
777
805
  if (!is_level_zero_sorted_) {
778
- std::sort(&items_[levels_[0]], &items_[levels_[1]], C());
806
+ std::sort(items_ + levels_[0], items_ + levels_[1], C());
779
807
  is_level_zero_sorted_ = true;
780
808
  }
781
809
  }
782
810
 
811
+ template<typename T, typename C, typename S, typename A>
812
+ void kll_sketch<T, C, S, A>::check_sorting() const {
813
+ // not checking level 0
814
+ for (uint8_t level = 1; level < num_levels_; ++level) {
815
+ const auto from = items_ + levels_[level];
816
+ const auto to = items_ + levels_[level + 1];
817
+ if (!std::is_sorted(from, to, C())) {
818
+ throw std::logic_error("levels must be sorted");
819
+ }
820
+ }
821
+ }
822
+
783
823
  template<typename T, typename C, typename S, typename A>
784
824
  template<bool inclusive>
785
825
  quantile_sketch_sorted_view<T, C, A> kll_sketch<T, C, S, A>::get_sorted_view(bool cumulative) const {
786
826
  const_cast<kll_sketch*>(this)->sort_level_zero(); // allow this side effect
787
827
  quantile_sketch_sorted_view<T, C, A> view(get_num_retained(), allocator_);
788
- uint8_t level = 0;
789
- while (level < num_levels_) {
828
+ for (uint8_t level = 0; level < num_levels_; ++level) {
790
829
  const auto from = items_ + levels_[level];
791
830
  const auto to = items_ + levels_[level + 1]; // exclusive
792
831
  view.add(from, to, 1 << level);
793
- ++level;
794
832
  }
795
833
  if (cumulative) view.template convert_to_cummulative<inclusive>();
796
834
  return view;
@@ -39,9 +39,9 @@ static std::string testBinaryInputPath = "test/";
39
39
  #endif
40
40
 
41
41
  // typical usage would be just kll_sketch<float> or kll_sketch<std::string>, but here we use test_allocator
42
- typedef kll_sketch<float, std::less<float>, serde<float>, test_allocator<float>> kll_float_sketch;
42
+ using kll_float_sketch = kll_sketch<float, std::less<float>, serde<float>, test_allocator<float>>;
43
43
  // let std::string use the default allocator for simplicity, otherwise we need to define "less" and "serde"
44
- typedef kll_sketch<std::string, std::less<std::string>, serde<std::string>, test_allocator<std::string>> kll_string_sketch;
44
+ using kll_string_sketch = kll_sketch<std::string, std::less<std::string>, serde<std::string>, test_allocator<std::string>>;
45
45
 
46
46
  TEST_CASE("kll sketch", "[kll_sketch]") {
47
47
 
@@ -75,7 +75,7 @@ TEST_CASE("kll sketch", "[kll_sketch]") {
75
75
  (void) it; // to suppress "unused" warning
76
76
  FAIL("should be no iterations over an empty sketch");
77
77
  }
78
- }
78
+ }
79
79
 
80
80
  SECTION("get bad quantile") {
81
81
  kll_float_sketch sketch(200, 0);
@@ -835,10 +835,75 @@ TEST_CASE("kll sketch", "[kll_sketch]") {
835
835
  REQUIRE((*it).second == 3);
836
836
  }
837
837
  }
838
- // cleanup
839
- if (test_allocator_total_bytes != 0) {
840
- REQUIRE(test_allocator_total_bytes == 0);
838
+
839
+ SECTION("type conversion: empty") {
840
+ kll_sketch<double> kll_double;
841
+ kll_sketch<float> kll_float(kll_double);
842
+ REQUIRE(kll_float.is_empty());
843
+ REQUIRE(kll_float.get_k() == kll_double.get_k());
844
+ REQUIRE(kll_float.get_n() == 0);
845
+ REQUIRE(kll_float.get_num_retained() == 0);
846
+ }
847
+
848
+ SECTION("type conversion: over k") {
849
+ kll_sketch<double> kll_double;
850
+ for (int i = 0; i < 1000; ++i) kll_double.update(static_cast<double>(i));
851
+ kll_sketch<float> kll_float(kll_double);
852
+ REQUIRE(!kll_float.is_empty());
853
+ REQUIRE(kll_float.get_k() == kll_double.get_k());
854
+ REQUIRE(kll_float.get_n() == kll_double.get_n());
855
+ REQUIRE(kll_float.get_num_retained() == kll_double.get_num_retained());
856
+
857
+ auto sv_float = kll_float.get_sorted_view(false);
858
+ auto sv_double = kll_double.get_sorted_view(false);
859
+ auto sv_float_it = sv_float.begin();
860
+ auto sv_double_it = sv_double.begin();
861
+ while (sv_float_it != sv_float.end()) {
862
+ REQUIRE(sv_double_it != sv_double.end());
863
+ auto float_pair = *sv_float_it;
864
+ auto double_pair = *sv_double_it;
865
+ REQUIRE(float_pair.first == Approx(double_pair.first).margin(0.01));
866
+ REQUIRE(float_pair.second == double_pair.second);
867
+ ++sv_float_it;
868
+ ++sv_double_it;
869
+ }
870
+ REQUIRE(sv_double_it == sv_double.end());
871
+ }
872
+
873
+ class A {
874
+ int val;
875
+ public:
876
+ A(int val): val(val) {}
877
+ int get_val() const { return val; }
878
+ };
879
+
880
+ struct less_A {
881
+ bool operator()(const A& a1, const A& a2) const { return a1.get_val() < a2.get_val(); }
882
+ };
883
+
884
+ class B {
885
+ int val;
886
+ public:
887
+ explicit B(const A& a): val(a.get_val()) {}
888
+ int get_val() const { return val; }
889
+ };
890
+
891
+ struct less_B {
892
+ bool operator()(const B& b1, const B& b2) const { return b1.get_val() < b2.get_val(); }
893
+ };
894
+
895
+ SECTION("type conversion: custom types") {
896
+ kll_sketch<A, less_A> sa;
897
+ sa.update(1);
898
+ sa.update(2);
899
+ sa.update(3);
900
+
901
+ kll_sketch<B, less_B> sb(sa);
902
+ REQUIRE(sb.get_n() == 3);
841
903
  }
904
+
905
+ // cleanup
906
+ REQUIRE(test_allocator_total_bytes == 0);
842
907
  }
843
908
 
844
909
  } /* namespace datasketches */
@@ -12,16 +12,18 @@ This package provides a variety of sketches as described below. Wherever a speci
12
12
 
13
13
  ## Building and Installation
14
14
 
15
- Once cloned, the library can be installed by running `python -m pip install .` in the project root directory, which will also install the necessary dependencies, namely numpy and [pybind11[global]](https://github.com/pybind/pybind11).
15
+ Once cloned, the library can be installed by running `python3 -m pip install .` in the project root directory -- not the python subdirectory -- which will also install the necessary dependencies, namely numpy and [pybind11[global]](https://github.com/pybind/pybind11).
16
16
 
17
- If you prefer to call the `setup.py` build script directly, you must first install `pybind11[global]`, as well as any other dependencies listed under the build-system section in `pyproject.toml`.
17
+ If you prefer to call the `setup.py` build script directly, which is discoraged, you must first install `pybind11[global]`, as well as any other dependencies listed under the build-system section in `pyproject.toml`.
18
18
 
19
- The library is also available from PyPI via `python -m pip install datasketches`.
19
+ The library is also available from PyPI via `python3 -m pip install datasketches`.
20
20
 
21
21
  ## Usage
22
22
 
23
23
  Having installed the library, loading the Apache Datasketches Library in Python is simple: `import datasketches`.
24
24
 
25
+ The unit tests are mostly structured in a tutorial style and can be used as a reference example for how to feed data into and query the different types of sketches.
26
+
25
27
  ## Available Sketch Classes
26
28
 
27
29
  - KLL (Absolute Error Quantiles)
@@ -74,12 +76,7 @@ The only developer-specific instructions relate to running unit tests.
74
76
 
75
77
  ### Unit tests
76
78
 
77
- The Python unit tests are run with `tox`. To ensure you have all the needed package, from the package base directory run:
78
-
79
- ```bash
80
- python -m pip install --upgrade tox
81
- tox
82
- ```
79
+ The Python unit tests are run via `tox`, with no arguments, from the project root directory -- not the python subdirectory. Tox creates a temporary virtual environment in which to build and run teh unit tests. In the event you are missing the necessary pacakge, tox may be installed with `python3 -m pip install --upgrade tox`.
83
80
 
84
81
  ## License
85
82
 
@@ -151,6 +151,7 @@ template <typename T,
151
151
  class quantiles_sketch {
152
152
  public:
153
153
  using value_type = T;
154
+ using allocator_type = Allocator;
154
155
  using comparator = Comparator;
155
156
  using vector_double = std::vector<double, typename std::allocator_traits<Allocator>::template rebind_alloc<double>>;
156
157
 
@@ -161,6 +162,14 @@ public:
161
162
  quantiles_sketch& operator=(const quantiles_sketch& other);
162
163
  quantiles_sketch& operator=(quantiles_sketch&& other) noexcept;
163
164
 
165
+ /**
166
+ * @brief Type converting constructor
167
+ * @param other quantiles sketch of a different type
168
+ * @param allocator instance of an Allocator
169
+ */
170
+ template<typename From, typename FC, typename FA>
171
+ explicit quantiles_sketch(const quantiles_sketch<From, FC, FA>& other, const Allocator& allocator = Allocator());
172
+
164
173
  /**
165
174
  * Updates this sketch with the given data item.
166
175
  * @param value an item from a stream of items
@@ -227,6 +236,12 @@ public:
227
236
  */
228
237
  Comparator get_comparator() const;
229
238
 
239
+ /**
240
+ * Returns the allocator for this sketch.
241
+ * @return allocator
242
+ */
243
+ allocator_type get_allocator() const;
244
+
230
245
  /**
231
246
  * Returns an approximation to the value of the data item
232
247
  * that would be preceded by the given fraction of a hypothetical sorted
@@ -138,6 +138,65 @@ is_sorted_(is_sorted)
138
138
  throw std::logic_error("Item count does not match value computed from k, n");
139
139
  }
140
140
 
141
+ template<typename T, typename C, typename A>
142
+ template<typename From, typename FC, typename FA>
143
+ quantiles_sketch<T, C, A>::quantiles_sketch(const quantiles_sketch<From, FC, FA>& other, const A& allocator) :
144
+ allocator_(allocator),
145
+ k_(other.get_k()),
146
+ n_(other.get_n()),
147
+ bit_pattern_(compute_bit_pattern(other.get_k(), other.get_n())),
148
+ base_buffer_(allocator),
149
+ levels_(allocator),
150
+ min_value_(nullptr),
151
+ max_value_(nullptr),
152
+ is_sorted_(false)
153
+ {
154
+ static_assert(std::is_constructible<T, From>::value,
155
+ "Type converting constructor requires new type to be constructible from existing type");
156
+
157
+ base_buffer_.reserve(2 * std::min(quantiles_constants::MIN_K, k_));
158
+
159
+ if (!other.is_empty()) {
160
+ min_value_ = new (allocator_.allocate(1)) T(other.get_min_value());
161
+ max_value_ = new (allocator_.allocate(1)) T(other.get_max_value());
162
+
163
+ // reserve space in levels
164
+ const uint8_t num_levels = compute_levels_needed(k_, n_);
165
+ levels_.reserve(num_levels);
166
+ for (int i = 0; i < num_levels; ++i) {
167
+ Level level(allocator);
168
+ level.reserve(k_);
169
+ levels_.push_back(std::move(level));
170
+ }
171
+
172
+ // iterate through points, assigning to the correct level as needed
173
+ for (auto pair : other) {
174
+ const uint64_t wt = pair.second;
175
+ if (wt == 1) {
176
+ base_buffer_.push_back(T(pair.first));
177
+ // resize where needed as if adding points via update()
178
+ if (base_buffer_.size() + 1 > base_buffer_.capacity()) {
179
+ const size_t new_size = std::max(std::min(static_cast<size_t>(2 * k_), 2 * base_buffer_.size()), static_cast<size_t>(1));
180
+ base_buffer_.reserve(new_size);
181
+ }
182
+ }
183
+ else {
184
+ const uint8_t idx = count_trailing_zeros_in_u64(pair.second) - 1;
185
+ levels_[idx].push_back(T(pair.first));
186
+ }
187
+ }
188
+
189
+ // validate that ordering within each level is preserved
190
+ // base_buffer_ can be considered unsorted for this purpose
191
+ for (int i = 0; i < num_levels; ++i) {
192
+ if (!std::is_sorted(levels_[i].begin(), levels_[i].end(), C())) {
193
+ throw std::logic_error("Copy construction across types produces invalid sorting");
194
+ }
195
+ }
196
+ }
197
+ }
198
+
199
+
141
200
  template<typename T, typename C, typename A>
142
201
  quantiles_sketch<T, C, A>::~quantiles_sketch() {
143
202
  if (min_value_ != nullptr) {
@@ -238,7 +297,7 @@ void quantiles_sketch<T, C, A>::serialize(std::ostream& os, const SerDe& serde)
238
297
  );
239
298
  write(os, flags_byte);
240
299
  write(os, k_);
241
- uint16_t unused = 0;
300
+ const uint16_t unused = 0;
242
301
  write(os, unused);
243
302
 
244
303
  if (!is_empty()) {
@@ -624,6 +683,11 @@ C quantiles_sketch<T, C, A>::get_comparator() const {
624
683
  return C();
625
684
  }
626
685
 
686
+ template<typename T, typename C, typename A>
687
+ A quantiles_sketch<T, C, A>::get_allocator() const {
688
+ return allocator_;
689
+ }
690
+
627
691
  // implementation for fixed-size arithmetic types (integral and floating point)
628
692
  template<typename T, typename C, typename A>
629
693
  template<typename SerDe, typename TT, typename std::enable_if<std::is_arithmetic<TT>::value, int>::type>
@@ -783,9 +847,9 @@ auto quantiles_sketch<T, C, A>::get_CDF(const T* split_points, uint32_t size) co
783
847
 
784
848
  template<typename T, typename C, typename A>
785
849
  uint32_t quantiles_sketch<T, C, A>::compute_retained_items(const uint16_t k, const uint64_t n) {
786
- uint32_t bb_count = compute_base_buffer_items(k, n);
787
- uint64_t bit_pattern = compute_bit_pattern(k, n);
788
- uint32_t valid_levels = compute_valid_levels(bit_pattern);
850
+ const uint32_t bb_count = compute_base_buffer_items(k, n);
851
+ const uint64_t bit_pattern = compute_bit_pattern(k, n);
852
+ const uint32_t valid_levels = compute_valid_levels(bit_pattern);
789
853
  return bb_count + (k * valid_levels);
790
854
  }
791
855
 
@@ -843,11 +907,11 @@ void quantiles_sketch<T, C, A>::check_family_id(uint8_t family_id) {
843
907
 
844
908
  template<typename T, typename C, typename A>
845
909
  void quantiles_sketch<T, C, A>::check_header_validity(uint8_t preamble_longs, uint8_t flags_byte, uint8_t serial_version) {
846
- bool empty = (flags_byte & (1 << flags::IS_EMPTY)) > 0;
847
- bool compact = (flags_byte & (1 << flags::IS_COMPACT)) > 0;
910
+ const bool empty = (flags_byte & (1 << flags::IS_EMPTY)) > 0;
911
+ const bool compact = (flags_byte & (1 << flags::IS_COMPACT)) > 0;
848
912
 
849
- uint8_t sw = (compact ? 1 : 0) + (2 * (empty ? 1 : 0))
850
- + (4 * (serial_version & 0xF)) + (32 * (preamble_longs & 0x3F));
913
+ const uint8_t sw = (compact ? 1 : 0) + (2 * (empty ? 1 : 0))
914
+ + (4 * (serial_version & 0xF)) + (32 * (preamble_longs & 0x3F));
851
915
  bool valid = true;
852
916
 
853
917
  switch (sw) { // exhaustive list and description of all valid cases
@@ -888,7 +952,7 @@ typename quantiles_sketch<T, C, A>::const_iterator quantiles_sketch<T, C, A>::en
888
952
 
889
953
  template<typename T, typename C, typename A>
890
954
  void quantiles_sketch<T, C, A>::grow_base_buffer() {
891
- size_t new_size = std::max(std::min(static_cast<size_t>(2 * k_), 2 * base_buffer_.size()), static_cast<size_t>(1));
955
+ const size_t new_size = std::max(std::min(static_cast<size_t>(2 * k_), 2 * base_buffer_.size()), static_cast<size_t>(1));
892
956
  base_buffer_.reserve(new_size);
893
957
  }
894
958
 
@@ -912,7 +976,7 @@ void quantiles_sketch<T, C, A>::process_full_base_buffer() {
912
976
 
913
977
  template<typename T, typename C, typename A>
914
978
  bool quantiles_sketch<T, C, A>::grow_levels_if_needed() {
915
- uint8_t levels_needed = compute_levels_needed(k_, n_);
979
+ const uint8_t levels_needed = compute_levels_needed(k_, n_);
916
980
  if (levels_needed == 0)
917
981
  return false; // don't need levels and might have small base buffer. Possible during merges.
918
982
 
@@ -992,7 +1056,7 @@ template<typename FwdV>
992
1056
  void quantiles_sketch<T, C, A>::zip_buffer_with_stride(FwdV&& buf_in, Level& buf_out, uint16_t stride) {
993
1057
  // Random offset in range [0, stride)
994
1058
  std::uniform_int_distribution<uint16_t> dist(0, stride - 1);
995
- uint16_t rand_offset = dist(random_utils::rand);
1059
+ const uint16_t rand_offset = dist(random_utils::rand);
996
1060
 
997
1061
  if ((buf_in.size() != stride * buf_out.capacity())
998
1062
  || (buf_out.size() > 0)) {
@@ -1000,7 +1064,7 @@ void quantiles_sketch<T, C, A>::zip_buffer_with_stride(FwdV&& buf_in, Level& buf
1000
1064
  "stride*buf_out.capacity() and empty buf_out");
1001
1065
  }
1002
1066
 
1003
- size_t k = buf_out.capacity();
1067
+ const size_t k = buf_out.capacity();
1004
1068
  for (uint16_t i = rand_offset, o = 0; o < k; i += stride, ++o) {
1005
1069
  buf_out.push_back(conditional_forward<FwdV>(buf_in[i]));
1006
1070
  }
@@ -1117,7 +1181,7 @@ void quantiles_sketch<T, C, A>::downsampling_merge(quantiles_sketch& tgt, FwdSk&
1117
1181
  const uint16_t downsample_factor = src.get_k() / tgt.get_k();
1118
1182
  const uint8_t lg_sample_factor = count_trailing_zeros_in_u32(downsample_factor);
1119
1183
 
1120
- uint64_t new_n = src.get_n() + tgt.get_n();
1184
+ const uint64_t new_n = src.get_n() + tgt.get_n();
1121
1185
 
1122
1186
  // move items from src's base buffer
1123
1187
  for (uint16_t i = 0; i < src.base_buffer_.size(); ++i) {
@@ -1125,7 +1189,7 @@ void quantiles_sketch<T, C, A>::downsampling_merge(quantiles_sketch& tgt, FwdSk&
1125
1189
  }
1126
1190
 
1127
1191
  // check (after moving raw items) if we need to extend levels array
1128
- uint8_t levels_needed = compute_levels_needed(tgt.get_k(), new_n);
1192
+ const uint8_t levels_needed = compute_levels_needed(tgt.get_k(), new_n);
1129
1193
  if (levels_needed > tgt.levels_.size()) {
1130
1194
  tgt.levels_.reserve(levels_needed);
1131
1195
  while (tgt.levels_.size() < levels_needed) {
@@ -82,7 +82,7 @@ TEST_CASE("kolmogorov-smirnov slightly different distributions", "[quantiles_ske
82
82
  const double delta = kolmogorov_smirnov::delta(sketch1, sketch2);
83
83
  REQUIRE(delta == Approx(0.02).margin(0.01));
84
84
  const double threshold = kolmogorov_smirnov::threshold(sketch1, sketch2, 0.05);
85
- std::cout << "delta=" << delta << ", threshold=" << threshold << "\n";
85
+
86
86
  REQUIRE_FALSE(delta > threshold);
87
87
  REQUIRE_FALSE(kolmogorov_smirnov::test(sketch1, sketch2, 0.05));
88
88
  }
@@ -102,7 +102,7 @@ TEST_CASE("kolmogorov-smirnov slightly different distributions high resolution",
102
102
  const double delta = kolmogorov_smirnov::delta(sketch1, sketch2);
103
103
  REQUIRE(delta == Approx(0.02).margin(0.01));
104
104
  const double threshold = kolmogorov_smirnov::threshold(sketch1, sketch2, 0.05);
105
- std::cout << "delta=" << delta << ", threshold=" << threshold << "\n";
105
+
106
106
  REQUIRE(delta > threshold);
107
107
  REQUIRE(kolmogorov_smirnov::test(sketch1, sketch2, 0.05));
108
108
  }
@@ -903,6 +903,69 @@ TEST_CASE("quantiles sketch", "[quantiles_sketch]") {
903
903
  }
904
904
  }
905
905
 
906
+ SECTION("Type converting copy constructor") {
907
+ const uint16_t k = 8;
908
+ const int n = 403;
909
+ quantiles_sketch<double> sk_double(k);
910
+
911
+ quantiles_sketch<float> sk_float(k, sk_double.get_allocator());
912
+ REQUIRE(sk_float.is_empty());
913
+
914
+ for (int i = 0; i < n; ++i) sk_double.update(i + .01);
915
+
916
+ quantiles_sketch<int> sk_int(sk_double);
917
+ REQUIRE(sk_double.get_n() == sk_int.get_n());
918
+ REQUIRE(sk_double.get_k() == sk_int.get_k());
919
+ REQUIRE(sk_double.get_num_retained() == sk_int.get_num_retained());
920
+
921
+ auto sv_double = sk_double.get_sorted_view(false);
922
+ std::vector<std::pair<double, uint64_t>> vec_double(sv_double.begin(), sv_double.end());
923
+
924
+ auto sv_int = sk_int.get_sorted_view(false);
925
+ std::vector<std::pair<int, uint64_t>> vec_int(sv_int.begin(), sv_int.end());
926
+
927
+ REQUIRE(vec_double.size() == vec_int.size());
928
+
929
+ for (size_t i = 0; i < vec_int.size(); ++i) {
930
+ // known truncation with conversion so approximate result
931
+ REQUIRE(vec_double[i].first == Approx(vec_int[i].first).margin(0.1));
932
+ // exact equality for weights
933
+ REQUIRE(vec_double[i].second == vec_int[i].second);
934
+ }
935
+ }
936
+
937
+ class A {
938
+ int val;
939
+ public:
940
+ A(int val): val(val) {}
941
+ int get_val() const { return val; }
942
+ };
943
+
944
+ struct less_A {
945
+ bool operator()(const A& a1, const A& a2) const { return a1.get_val() < a2.get_val(); }
946
+ };
947
+
948
+ class B {
949
+ int val;
950
+ public:
951
+ explicit B(const A& a): val(a.get_val()) {}
952
+ int get_val() const { return val; }
953
+ };
954
+
955
+ struct less_B {
956
+ bool operator()(const B& b1, const B& b2) const { return b1.get_val() < b2.get_val(); }
957
+ };
958
+
959
+ SECTION("type conversion: custom types") {
960
+ quantiles_sketch<A, less_A> sa;
961
+ sa.update(1);
962
+ sa.update(2);
963
+ sa.update(3);
964
+
965
+ quantiles_sketch<B, less_B> sb(sa);
966
+ REQUIRE(sb.get_n() == 3);
967
+ }
968
+
906
969
  // cleanup
907
970
  if (test_allocator_total_bytes != 0) {
908
971
  REQUIRE(test_allocator_total_bytes == 0);
@@ -38,6 +38,9 @@ public:
38
38
  req_compactor& operator=(const req_compactor& other);
39
39
  req_compactor& operator=(req_compactor&& other);
40
40
 
41
+ template<typename TT, typename CC, typename AA>
42
+ req_compactor(const req_compactor<TT, CC, AA>& other, const Allocator& allocator);
43
+
41
44
  bool is_sorted() const;
42
45
  uint32_t get_num_items() const;
43
46
  uint32_t get_nom_capacity() const;
@@ -128,6 +131,9 @@ private:
128
131
  template<typename S>
129
132
  static std::pair<std::unique_ptr<T, items_deleter>, size_t> deserialize_items(const void* bytes, size_t size, const S& serde, const Allocator& allocator, uint32_t num);
130
133
 
134
+ // for type converting constructor
135
+ template<typename TT, typename CC, typename AA>
136
+ friend class req_compactor;
131
137
  };
132
138
 
133
139
  } /* namespace datasketches */
@@ -132,6 +132,33 @@ req_compactor<T, C, A>& req_compactor<T, C, A>::operator=(req_compactor&& other)
132
132
  return *this;
133
133
  }
134
134
 
135
+ template<typename T, typename C, typename A>
136
+ template<typename TT, typename CC, typename AA>
137
+ req_compactor<T, C, A>::req_compactor(const req_compactor<TT, CC, AA>& other, const A& allocator):
138
+ allocator_(allocator),
139
+ lg_weight_(other.lg_weight_),
140
+ hra_(other.hra_),
141
+ coin_(other.coin_),
142
+ sorted_(other.sorted_),
143
+ section_size_raw_(other.section_size_raw_),
144
+ section_size_(other.section_size_),
145
+ num_sections_(other.num_sections_),
146
+ state_(other.state_),
147
+ num_items_(other.num_items_),
148
+ capacity_(other.capacity_),
149
+ items_(nullptr)
150
+ {
151
+ if (other.items_ != nullptr) {
152
+ items_ = allocator_.allocate(capacity_);
153
+ const uint32_t from = hra_ ? capacity_ - num_items_ : 0;
154
+ const uint32_t to = hra_ ? capacity_ : num_items_;
155
+ for (uint32_t i = from; i < to; ++i) new (items_ + i) T(other.items_[i]);
156
+ if (sorted_ && !std::is_sorted(items_ + from, items_ + to, C())) {
157
+ throw std::logic_error("items must be sorted");
158
+ }
159
+ }
160
+ }
161
+
135
162
  template<typename T, typename C, typename A>
136
163
  bool req_compactor<T, C, A>::is_sorted() const {
137
164
  return sorted_;
@@ -58,6 +58,14 @@ public:
58
58
  req_sketch& operator=(const req_sketch& other);
59
59
  req_sketch& operator=(req_sketch&& other);
60
60
 
61
+ /*
62
+ * Type converting constructor.
63
+ * @param other sketch of a different type
64
+ * @param allocator instance of an Allocator
65
+ */
66
+ template<typename TT, typename CC, typename SS, typename AA>
67
+ explicit req_sketch(const req_sketch<TT, CC, SS, AA>& other, const Allocator& allocator = Allocator());
68
+
61
69
  /**
62
70
  * Returns configured parameter K
63
71
  * @return parameter K
@@ -408,6 +416,9 @@ private:
408
416
  }
409
417
  }
410
418
 
419
+ // for type converting constructor
420
+ template<typename TT, typename CC, typename SS, typename AA>
421
+ friend class req_sketch;
411
422
  };
412
423
 
413
424
  template<typename T, typename C, typename S, typename A>
@@ -64,8 +64,8 @@ compactors_(other.compactors_),
64
64
  min_value_(nullptr),
65
65
  max_value_(nullptr)
66
66
  {
67
- if (other.min_value_ != nullptr) min_value_ = new (A().allocate(1)) T(*other.min_value_);
68
- if (other.max_value_ != nullptr) max_value_ = new (A().allocate(1)) T(*other.max_value_);
67
+ if (other.min_value_ != nullptr) min_value_ = new (allocator_.allocate(1)) T(*other.min_value_);
68
+ if (other.max_value_ != nullptr) max_value_ = new (allocator_.allocate(1)) T(*other.max_value_);
69
69
  }
70
70
 
71
71
  template<typename T, typename C, typename S, typename A>
@@ -113,6 +113,33 @@ req_sketch<T, C, S, A>& req_sketch<T, C, S, A>::operator=(req_sketch&& other) {
113
113
  return *this;
114
114
  }
115
115
 
116
+ template<typename T, typename C, typename S, typename A>
117
+ template<typename TT, typename CC, typename SS, typename AA>
118
+ req_sketch<T, C, S, A>::req_sketch(const req_sketch<TT, CC, SS, AA>& other, const A& allocator):
119
+ allocator_(allocator),
120
+ k_(other.k_),
121
+ hra_(other.hra_),
122
+ max_nom_size_(other.max_nom_size_),
123
+ num_retained_(other.num_retained_),
124
+ n_(other.n_),
125
+ compactors_(allocator),
126
+ min_value_(nullptr),
127
+ max_value_(nullptr)
128
+ {
129
+ static_assert(
130
+ std::is_constructible<T, TT>::value,
131
+ "Type converting constructor requires new type to be constructible from existing type"
132
+ );
133
+ compactors_.reserve(other.compactors_.size());
134
+ for (const auto& compactor: other.compactors_) {
135
+ compactors_.push_back(req_compactor<T, C, A>(compactor, allocator_));
136
+ }
137
+ if (!other.is_empty()) {
138
+ min_value_ = new (allocator_.allocate(1)) T(other.get_min_value());
139
+ max_value_ = new (allocator_.allocate(1)) T(other.get_max_value());
140
+ }
141
+ }
142
+
116
143
  template<typename T, typename C, typename S, typename A>
117
144
  uint16_t req_sketch<T, C, S, A>::get_k() const {
118
145
  return k_;
@@ -35,7 +35,7 @@ const std::string input_path = "test/";
35
35
  #endif
36
36
 
37
37
  TEST_CASE("req sketch: empty", "[req_sketch]") {
38
- std::cout << "sizeof(req_float_sketch)=" << sizeof(req_sketch<float>) << "\n";
38
+ //std::cout << "sizeof(req_float_sketch)=" << sizeof(req_sketch<float>) << "\n";
39
39
  req_sketch<float> sketch(12);
40
40
  REQUIRE(sketch.get_k() == 12);
41
41
  REQUIRE(sketch.is_HRA());
@@ -245,7 +245,7 @@ TEST_CASE("req sketch: byte serialize-deserialize single item", "[req_sketch]")
245
245
  auto bytes = sketch.serialize();
246
246
  REQUIRE(bytes.size() == sketch.get_serialized_size_bytes());
247
247
  auto sketch2 = req_sketch<float>::deserialize(bytes.data(), bytes.size());
248
- std::cout << sketch2.to_string(true);
248
+ //std::cout << sketch2.to_string(true);
249
249
  REQUIRE(bytes.size() == sketch2.get_serialized_size_bytes());
250
250
  REQUIRE(sketch2.is_empty() == sketch.is_empty());
251
251
  REQUIRE(sketch2.is_estimation_mode() == sketch.is_estimation_mode());
@@ -282,7 +282,7 @@ TEST_CASE("req sketch: byte serialize-deserialize exact mode", "[req_sketch]") {
282
282
  auto bytes = sketch.serialize();
283
283
  REQUIRE(bytes.size() == sketch.get_serialized_size_bytes());
284
284
  auto sketch2 = req_sketch<float>::deserialize(bytes.data(), bytes.size());
285
- std::cout << sketch2.to_string(true);
285
+ //std::cout << sketch2.to_string(true);
286
286
  REQUIRE(bytes.size() == sketch2.get_serialized_size_bytes());
287
287
  REQUIRE(sketch2.is_empty() == sketch.is_empty());
288
288
  REQUIRE(sketch2.is_estimation_mode() == sketch.is_estimation_mode());
@@ -485,6 +485,72 @@ TEST_CASE("req sketch: merge incompatible HRA and LRA", "[req_sketch]") {
485
485
  REQUIRE_THROWS_AS(sketch1.merge(sketch2), std::invalid_argument);
486
486
  }
487
487
 
488
+ TEST_CASE("req sketch: type conversion - empty", "[req_sketch]") {
489
+ req_sketch<double> req_double(12);
490
+ req_sketch<float> req_float(req_double);
491
+ REQUIRE(req_float.is_empty());
492
+ REQUIRE(req_float.get_k() == req_double.get_k());
493
+ REQUIRE(req_float.get_n() == 0);
494
+ REQUIRE(req_float.get_num_retained() == 0);
495
+ }
496
+
497
+ TEST_CASE("req sketch: type conversion - several levels", "[req_sketch]") {
498
+ req_sketch<double> req_double(12);
499
+ for (int i = 0; i < 1000; ++i) req_double.update(static_cast<double>(i));
500
+ req_sketch<float> req_float(req_double);
501
+ REQUIRE(!req_float.is_empty());
502
+ REQUIRE(req_float.get_k() == req_double.get_k());
503
+ REQUIRE(req_float.get_n() == req_double.get_n());
504
+ REQUIRE(req_float.get_num_retained() == req_double.get_num_retained());
505
+
506
+ auto sv_float = req_float.get_sorted_view(false);
507
+ auto sv_double = req_double.get_sorted_view(false);
508
+ auto sv_float_it = sv_float.begin();
509
+ auto sv_double_it = sv_double.begin();
510
+ while (sv_float_it != sv_float.end()) {
511
+ REQUIRE(sv_double_it != sv_double.end());
512
+ auto float_pair = *sv_float_it;
513
+ auto double_pair = *sv_double_it;
514
+ REQUIRE(float_pair.first == Approx(double_pair.first).margin(0.01));
515
+ REQUIRE(float_pair.second == double_pair.second);
516
+ ++sv_float_it;
517
+ ++sv_double_it;
518
+ }
519
+ REQUIRE(sv_double_it == sv_double.end());
520
+ }
521
+
522
+ class A {
523
+ int val;
524
+ public:
525
+ A(int val): val(val) {}
526
+ int get_val() const { return val; }
527
+ };
528
+
529
+ struct less_A {
530
+ bool operator()(const A& a1, const A& a2) const { return a1.get_val() < a2.get_val(); }
531
+ };
532
+
533
+ class B {
534
+ int val;
535
+ public:
536
+ explicit B(const A& a): val(a.get_val()) {}
537
+ int get_val() const { return val; }
538
+ };
539
+
540
+ struct less_B {
541
+ bool operator()(const B& b1, const B& b2) const { return b1.get_val() < b2.get_val(); }
542
+ };
543
+
544
+ TEST_CASE("req sketch: type conversion - custom types") {
545
+ req_sketch<A, less_A> sa(4);
546
+ sa.update(1);
547
+ sa.update(2);
548
+ sa.update(3);
549
+
550
+ req_sketch<B, less_B> sb(sa);
551
+ REQUIRE(sb.get_n() == 3);
552
+ }
553
+
488
554
  //TEST_CASE("for manual comparison with Java") {
489
555
  // req_sketch<float> sketch(12, false);
490
556
  // for (size_t i = 0; i < 100000; ++i) sketch.update(i);
@@ -81,7 +81,7 @@ class CMakeBuild(build_ext):
81
81
 
82
82
  setup(
83
83
  name='datasketches',
84
- version='3.4.0',
84
+ version='3.5.0',
85
85
  author='Apache Software Foundation',
86
86
  author_email='dev@datasketches.apache.org',
87
87
  description='The Apache DataSketches Library for Python',
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: datasketches
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.5
4
+ version: 0.2.6
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andrew Kane
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2022-05-21 00:00:00.000000000 Z
11
+ date: 2022-07-13 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rice