datasketches 0.2.5 → 0.2.7

Sign up to get free protection for your applications and to get access to all the features.
Files changed (75) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +8 -0
  3. data/LICENSE +4 -6
  4. data/NOTICE +6 -5
  5. data/ext/datasketches/kll_wrapper.cpp +2 -2
  6. data/lib/datasketches/version.rb +1 -1
  7. data/vendor/datasketches-cpp/CMakeLists.txt +1 -1
  8. data/vendor/datasketches-cpp/LICENSE +4 -6
  9. data/vendor/datasketches-cpp/MANIFEST.in +0 -2
  10. data/vendor/datasketches-cpp/NOTICE +6 -5
  11. data/vendor/datasketches-cpp/common/CMakeLists.txt +2 -2
  12. data/vendor/datasketches-cpp/common/test/CMakeLists.txt +13 -2
  13. data/vendor/datasketches-cpp/common/test/catch_runner.cpp +22 -1
  14. data/vendor/datasketches-cpp/common/test/integration_test.cpp +1 -1
  15. data/vendor/datasketches-cpp/cpc/include/cpc_compressor_impl.hpp +2 -1
  16. data/vendor/datasketches-cpp/cpc/test/compression_test.cpp +1 -1
  17. data/vendor/datasketches-cpp/cpc/test/cpc_sketch_allocation_test.cpp +1 -1
  18. data/vendor/datasketches-cpp/cpc/test/cpc_sketch_test.cpp +1 -1
  19. data/vendor/datasketches-cpp/cpc/test/cpc_union_test.cpp +1 -1
  20. data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map_impl.hpp +1 -1
  21. data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_custom_type_test.cpp +1 -1
  22. data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_test.cpp +1 -1
  23. data/vendor/datasketches-cpp/fi/test/reverse_purge_hash_map_test.cpp +1 -1
  24. data/vendor/datasketches-cpp/hll/test/AuxHashMapTest.cpp +1 -1
  25. data/vendor/datasketches-cpp/hll/test/CouponHashSetTest.cpp +1 -1
  26. data/vendor/datasketches-cpp/hll/test/CouponListTest.cpp +1 -1
  27. data/vendor/datasketches-cpp/hll/test/CrossCountingTest.cpp +1 -1
  28. data/vendor/datasketches-cpp/hll/test/HllArrayTest.cpp +1 -1
  29. data/vendor/datasketches-cpp/hll/test/HllSketchTest.cpp +1 -1
  30. data/vendor/datasketches-cpp/hll/test/HllUnionTest.cpp +1 -1
  31. data/vendor/datasketches-cpp/hll/test/IsomorphicTest.cpp +1 -1
  32. data/vendor/datasketches-cpp/hll/test/TablesTest.cpp +1 -1
  33. data/vendor/datasketches-cpp/hll/test/ToFromByteArrayTest.cpp +1 -1
  34. data/vendor/datasketches-cpp/kll/include/kll_helper_impl.hpp +2 -2
  35. data/vendor/datasketches-cpp/kll/include/kll_sketch.hpp +22 -9
  36. data/vendor/datasketches-cpp/kll/include/kll_sketch_impl.hpp +47 -9
  37. data/vendor/datasketches-cpp/kll/test/kll_sketch_custom_type_test.cpp +1 -1
  38. data/vendor/datasketches-cpp/kll/test/kll_sketch_test.cpp +72 -7
  39. data/vendor/datasketches-cpp/kll/test/kll_sketch_validation.cpp +1 -1
  40. data/vendor/datasketches-cpp/kll/test/kolmogorov_smirnov_test.cpp +1 -1
  41. data/vendor/datasketches-cpp/pyproject.toml +0 -1
  42. data/vendor/datasketches-cpp/python/README.md +6 -9
  43. data/vendor/datasketches-cpp/python/pybind11Path.cmd +1 -1
  44. data/vendor/datasketches-cpp/quantiles/include/quantiles_sketch.hpp +15 -0
  45. data/vendor/datasketches-cpp/quantiles/include/quantiles_sketch_impl.hpp +78 -14
  46. data/vendor/datasketches-cpp/quantiles/test/kolmogorov_smirnov_test.cpp +3 -3
  47. data/vendor/datasketches-cpp/quantiles/test/quantiles_compatibility_test.cpp +1 -1
  48. data/vendor/datasketches-cpp/quantiles/test/quantiles_sketch_test.cpp +64 -1
  49. data/vendor/datasketches-cpp/req/include/req_compactor.hpp +6 -0
  50. data/vendor/datasketches-cpp/req/include/req_compactor_impl.hpp +27 -0
  51. data/vendor/datasketches-cpp/req/include/req_sketch.hpp +11 -0
  52. data/vendor/datasketches-cpp/req/include/req_sketch_impl.hpp +29 -2
  53. data/vendor/datasketches-cpp/req/test/req_sketch_custom_type_test.cpp +1 -1
  54. data/vendor/datasketches-cpp/req/test/req_sketch_test.cpp +70 -4
  55. data/vendor/datasketches-cpp/sampling/test/var_opt_allocation_test.cpp +1 -1
  56. data/vendor/datasketches-cpp/sampling/test/var_opt_sketch_test.cpp +1 -1
  57. data/vendor/datasketches-cpp/sampling/test/var_opt_union_test.cpp +1 -1
  58. data/vendor/datasketches-cpp/setup.py +2 -3
  59. data/vendor/datasketches-cpp/theta/test/theta_a_not_b_test.cpp +1 -1
  60. data/vendor/datasketches-cpp/theta/test/theta_intersection_test.cpp +1 -1
  61. data/vendor/datasketches-cpp/theta/test/theta_jaccard_similarity_test.cpp +1 -1
  62. data/vendor/datasketches-cpp/theta/test/theta_setop_test.cpp +1 -1
  63. data/vendor/datasketches-cpp/theta/test/theta_sketch_test.cpp +1 -1
  64. data/vendor/datasketches-cpp/theta/test/theta_union_test.cpp +1 -1
  65. data/vendor/datasketches-cpp/tuple/include/tuple_sketch_impl.hpp +25 -31
  66. data/vendor/datasketches-cpp/tuple/test/array_of_doubles_sketch_test.cpp +1 -1
  67. data/vendor/datasketches-cpp/tuple/test/tuple_a_not_b_test.cpp +1 -1
  68. data/vendor/datasketches-cpp/tuple/test/tuple_intersection_test.cpp +1 -1
  69. data/vendor/datasketches-cpp/tuple/test/tuple_jaccard_similarity_test.cpp +1 -1
  70. data/vendor/datasketches-cpp/tuple/test/tuple_sketch_allocation_test.cpp +1 -1
  71. data/vendor/datasketches-cpp/tuple/test/tuple_sketch_test.cpp +1 -1
  72. data/vendor/datasketches-cpp/tuple/test/tuple_union_test.cpp +1 -1
  73. metadata +2 -4
  74. data/vendor/datasketches-cpp/common/test/catch.hpp +0 -17618
  75. data/vendor/datasketches-cpp/common/test/test_runner.cpp +0 -29
@@ -26,6 +26,7 @@
26
26
  #include <stdexcept>
27
27
 
28
28
  #include "conditional_forward.hpp"
29
+ #include "count_zeros.hpp"
29
30
  #include "memory_operations.hpp"
30
31
  #include "kll_helper.hpp"
31
32
 
@@ -69,7 +70,7 @@ max_value_(nullptr),
69
70
  is_level_zero_sorted_(other.is_level_zero_sorted_)
70
71
  {
71
72
  items_ = allocator_.allocate(items_size_);
72
- std::copy(&other.items_[levels_[0]], &other.items_[levels_[num_levels_]], &items_[levels_[0]]);
73
+ for (auto i = levels_[0]; i < levels_[num_levels_]; ++i) new (&items_[i]) T(other.items_[i]);
73
74
  if (other.min_value_ != nullptr) min_value_ = new (allocator_.allocate(1)) T(*other.min_value_);
74
75
  if (other.max_value_ != nullptr) max_value_ = new (allocator_.allocate(1)) T(*other.max_value_);
75
76
  }
@@ -147,6 +148,33 @@ kll_sketch<T, C, S, A>::~kll_sketch() {
147
148
  }
148
149
  }
149
150
 
151
+ template<typename T, typename C, typename S, typename A>
152
+ template<typename TT, typename CC, typename SS, typename AA>
153
+ kll_sketch<T, C, S, A>::kll_sketch(const kll_sketch<TT, CC, SS, AA>& other, const A& allocator):
154
+ allocator_(allocator),
155
+ k_(other.k_),
156
+ m_(other.m_),
157
+ min_k_(other.min_k_),
158
+ n_(other.n_),
159
+ num_levels_(other.num_levels_),
160
+ levels_(other.levels_, allocator_),
161
+ items_(nullptr),
162
+ items_size_(other.items_size_),
163
+ min_value_(nullptr),
164
+ max_value_(nullptr),
165
+ is_level_zero_sorted_(other.is_level_zero_sorted_)
166
+ {
167
+ static_assert(
168
+ std::is_constructible<T, TT>::value,
169
+ "Type converting constructor requires new type to be constructible from existing type"
170
+ );
171
+ items_ = allocator_.allocate(items_size_);
172
+ for (auto i = levels_[0]; i < levels_[num_levels_]; ++i) new (&items_[i]) T(other.items_[i]);
173
+ if (other.min_value_ != nullptr) min_value_ = new (allocator_.allocate(1)) T(*other.min_value_);
174
+ if (other.max_value_ != nullptr) max_value_ = new (allocator_.allocate(1)) T(*other.max_value_);
175
+ check_sorting();
176
+ }
177
+
150
178
  template<typename T, typename C, typename S, typename A>
151
179
  template<typename FwdT>
152
180
  void kll_sketch<T, C, S, A>::update(FwdT&& value) {
@@ -305,8 +333,8 @@ double kll_sketch<T, C, S, A>::get_rank(const T& value) const {
305
333
  uint64_t weight = 1;
306
334
  uint64_t total = 0;
307
335
  while (level < num_levels_) {
308
- const auto from_index(levels_[level]);
309
- const auto to_index(levels_[level + 1]); // exclusive
336
+ const auto from_index = levels_[level];
337
+ const auto to_index = levels_[level + 1]; // exclusive
310
338
  for (uint32_t i = from_index; i < to_index; i++) {
311
339
  if (inclusive ? !C()(value, items_[i]) : C()(items_[i], value)) {
312
340
  total += weight;
@@ -694,7 +722,7 @@ void kll_sketch<T, C, S, A>::compress_while_updating(void) {
694
722
  // level zero might not be sorted, so we must sort it if we wish to compact it
695
723
  // sort_level_zero() is not used here because of the adjustment for odd number of items
696
724
  if ((level == 0) && !is_level_zero_sorted_) {
697
- std::sort(&items_[adj_beg], &items_[adj_beg + adj_pop], C());
725
+ std::sort(items_ + adj_beg, items_ + adj_beg + adj_pop, C());
698
726
  }
699
727
  if (pop_above == 0) {
700
728
  kll_helper::randomly_halve_up(items_, adj_beg, adj_pop);
@@ -717,7 +745,7 @@ void kll_sketch<T, C, S, A>::compress_while_updating(void) {
717
745
  // so that the freed-up space can be used by level zero
718
746
  if (level > 0) {
719
747
  const uint32_t amount = raw_beg - levels_[0];
720
- std::move_backward(&items_[levels_[0]], &items_[levels_[0] + amount], &items_[levels_[0] + half_adj_pop + amount]);
748
+ std::move_backward(items_ + levels_[0], items_ + levels_[0] + amount, items_ + levels_[0] + half_adj_pop + amount);
721
749
  for (uint8_t lvl = 0; lvl < level; lvl++) levels_[lvl] += half_adj_pop;
722
750
  }
723
751
  for (uint32_t i = 0; i < half_adj_pop; i++) items_[i + destroy_beg].~T();
@@ -775,22 +803,32 @@ void kll_sketch<T, C, S, A>::add_empty_top_level_to_completely_full_sketch() {
775
803
  template<typename T, typename C, typename S, typename A>
776
804
  void kll_sketch<T, C, S, A>::sort_level_zero() {
777
805
  if (!is_level_zero_sorted_) {
778
- std::sort(&items_[levels_[0]], &items_[levels_[1]], C());
806
+ std::sort(items_ + levels_[0], items_ + levels_[1], C());
779
807
  is_level_zero_sorted_ = true;
780
808
  }
781
809
  }
782
810
 
811
+ template<typename T, typename C, typename S, typename A>
812
+ void kll_sketch<T, C, S, A>::check_sorting() const {
813
+ // not checking level 0
814
+ for (uint8_t level = 1; level < num_levels_; ++level) {
815
+ const auto from = items_ + levels_[level];
816
+ const auto to = items_ + levels_[level + 1];
817
+ if (!std::is_sorted(from, to, C())) {
818
+ throw std::logic_error("levels must be sorted");
819
+ }
820
+ }
821
+ }
822
+
783
823
  template<typename T, typename C, typename S, typename A>
784
824
  template<bool inclusive>
785
825
  quantile_sketch_sorted_view<T, C, A> kll_sketch<T, C, S, A>::get_sorted_view(bool cumulative) const {
786
826
  const_cast<kll_sketch*>(this)->sort_level_zero(); // allow this side effect
787
827
  quantile_sketch_sorted_view<T, C, A> view(get_num_retained(), allocator_);
788
- uint8_t level = 0;
789
- while (level < num_levels_) {
828
+ for (uint8_t level = 0; level < num_levels_; ++level) {
790
829
  const auto from = items_ + levels_[level];
791
830
  const auto to = items_ + levels_[level + 1]; // exclusive
792
831
  view.add(from, to, 1 << level);
793
- ++level;
794
832
  }
795
833
  if (cumulative) view.template convert_to_cummulative<inclusive>();
796
834
  return view;
@@ -17,7 +17,7 @@
17
17
  * under the License.
18
18
  */
19
19
 
20
- #include <catch.hpp>
20
+ #include <catch2/catch.hpp>
21
21
  #include <sstream>
22
22
 
23
23
  #include <kll_sketch.hpp>
@@ -17,7 +17,7 @@
17
17
  * under the License.
18
18
  */
19
19
 
20
- #include <catch.hpp>
20
+ #include <catch2/catch.hpp>
21
21
  #include <cmath>
22
22
  #include <cstring>
23
23
  #include <sstream>
@@ -39,9 +39,9 @@ static std::string testBinaryInputPath = "test/";
39
39
  #endif
40
40
 
41
41
  // typical usage would be just kll_sketch<float> or kll_sketch<std::string>, but here we use test_allocator
42
- typedef kll_sketch<float, std::less<float>, serde<float>, test_allocator<float>> kll_float_sketch;
42
+ using kll_float_sketch = kll_sketch<float, std::less<float>, serde<float>, test_allocator<float>>;
43
43
  // let std::string use the default allocator for simplicity, otherwise we need to define "less" and "serde"
44
- typedef kll_sketch<std::string, std::less<std::string>, serde<std::string>, test_allocator<std::string>> kll_string_sketch;
44
+ using kll_string_sketch = kll_sketch<std::string, std::less<std::string>, serde<std::string>, test_allocator<std::string>>;
45
45
 
46
46
  TEST_CASE("kll sketch", "[kll_sketch]") {
47
47
 
@@ -75,7 +75,7 @@ TEST_CASE("kll sketch", "[kll_sketch]") {
75
75
  (void) it; // to suppress "unused" warning
76
76
  FAIL("should be no iterations over an empty sketch");
77
77
  }
78
- }
78
+ }
79
79
 
80
80
  SECTION("get bad quantile") {
81
81
  kll_float_sketch sketch(200, 0);
@@ -835,10 +835,75 @@ TEST_CASE("kll sketch", "[kll_sketch]") {
835
835
  REQUIRE((*it).second == 3);
836
836
  }
837
837
  }
838
- // cleanup
839
- if (test_allocator_total_bytes != 0) {
840
- REQUIRE(test_allocator_total_bytes == 0);
838
+
839
+ SECTION("type conversion: empty") {
840
+ kll_sketch<double> kll_double;
841
+ kll_sketch<float> kll_float(kll_double);
842
+ REQUIRE(kll_float.is_empty());
843
+ REQUIRE(kll_float.get_k() == kll_double.get_k());
844
+ REQUIRE(kll_float.get_n() == 0);
845
+ REQUIRE(kll_float.get_num_retained() == 0);
846
+ }
847
+
848
+ SECTION("type conversion: over k") {
849
+ kll_sketch<double> kll_double;
850
+ for (int i = 0; i < 1000; ++i) kll_double.update(static_cast<double>(i));
851
+ kll_sketch<float> kll_float(kll_double);
852
+ REQUIRE(!kll_float.is_empty());
853
+ REQUIRE(kll_float.get_k() == kll_double.get_k());
854
+ REQUIRE(kll_float.get_n() == kll_double.get_n());
855
+ REQUIRE(kll_float.get_num_retained() == kll_double.get_num_retained());
856
+
857
+ auto sv_float = kll_float.get_sorted_view(false);
858
+ auto sv_double = kll_double.get_sorted_view(false);
859
+ auto sv_float_it = sv_float.begin();
860
+ auto sv_double_it = sv_double.begin();
861
+ while (sv_float_it != sv_float.end()) {
862
+ REQUIRE(sv_double_it != sv_double.end());
863
+ auto float_pair = *sv_float_it;
864
+ auto double_pair = *sv_double_it;
865
+ REQUIRE(float_pair.first == Approx(double_pair.first).margin(0.01));
866
+ REQUIRE(float_pair.second == double_pair.second);
867
+ ++sv_float_it;
868
+ ++sv_double_it;
869
+ }
870
+ REQUIRE(sv_double_it == sv_double.end());
871
+ }
872
+
873
+ class A {
874
+ int val;
875
+ public:
876
+ A(int val): val(val) {}
877
+ int get_val() const { return val; }
878
+ };
879
+
880
+ struct less_A {
881
+ bool operator()(const A& a1, const A& a2) const { return a1.get_val() < a2.get_val(); }
882
+ };
883
+
884
+ class B {
885
+ int val;
886
+ public:
887
+ explicit B(const A& a): val(a.get_val()) {}
888
+ int get_val() const { return val; }
889
+ };
890
+
891
+ struct less_B {
892
+ bool operator()(const B& b1, const B& b2) const { return b1.get_val() < b2.get_val(); }
893
+ };
894
+
895
+ SECTION("type conversion: custom types") {
896
+ kll_sketch<A, less_A> sa;
897
+ sa.update(1);
898
+ sa.update(2);
899
+ sa.update(3);
900
+
901
+ kll_sketch<B, less_B> sb(sa);
902
+ REQUIRE(sb.get_n() == 3);
841
903
  }
904
+
905
+ // cleanup
906
+ REQUIRE(test_allocator_total_bytes == 0);
842
907
  }
843
908
 
844
909
  } /* namespace datasketches */
@@ -17,7 +17,7 @@
17
17
  * under the License.
18
18
  */
19
19
 
20
- #include <catch.hpp>
20
+ #include <catch2/catch.hpp>
21
21
 
22
22
  #include <kll_sketch.hpp>
23
23
  #include <kll_helper.hpp>
@@ -17,7 +17,7 @@
17
17
  * under the License.
18
18
  */
19
19
 
20
- #include <catch.hpp>
20
+ #include <catch2/catch.hpp>
21
21
 
22
22
  #include <random>
23
23
 
@@ -2,7 +2,6 @@
2
2
  requires = ["wheel",
3
3
  "setuptools >= 30.3.0",
4
4
  "cmake >= 3.16",
5
- "pip >= 10.0",
6
5
  "pybind11[global] >= 2.6.0"]
7
6
  build-backend = "setuptools.build_meta"
8
7
 
@@ -12,16 +12,18 @@ This package provides a variety of sketches as described below. Wherever a speci
12
12
 
13
13
  ## Building and Installation
14
14
 
15
- Once cloned, the library can be installed by running `python -m pip install .` in the project root directory, which will also install the necessary dependencies, namely numpy and [pybind11[global]](https://github.com/pybind/pybind11).
15
+ Once cloned, the library can be installed by running `python3 -m pip install .` in the project root directory -- not the python subdirectory -- which will also install the necessary dependencies, namely numpy and [pybind11[global]](https://github.com/pybind/pybind11).
16
16
 
17
- If you prefer to call the `setup.py` build script directly, you must first install `pybind11[global]`, as well as any other dependencies listed under the build-system section in `pyproject.toml`.
17
+ If you prefer to call the `setup.py` build script directly, which is discoraged, you must first install `pybind11[global]`, as well as any other dependencies listed under the build-system section in `pyproject.toml`.
18
18
 
19
- The library is also available from PyPI via `python -m pip install datasketches`.
19
+ The library is also available from PyPI via `python3 -m pip install datasketches`.
20
20
 
21
21
  ## Usage
22
22
 
23
23
  Having installed the library, loading the Apache Datasketches Library in Python is simple: `import datasketches`.
24
24
 
25
+ The unit tests are mostly structured in a tutorial style and can be used as a reference example for how to feed data into and query the different types of sketches.
26
+
25
27
  ## Available Sketch Classes
26
28
 
27
29
  - KLL (Absolute Error Quantiles)
@@ -74,12 +76,7 @@ The only developer-specific instructions relate to running unit tests.
74
76
 
75
77
  ### Unit tests
76
78
 
77
- The Python unit tests are run with `tox`. To ensure you have all the needed package, from the package base directory run:
78
-
79
- ```bash
80
- python -m pip install --upgrade tox
81
- tox
82
- ```
79
+ The Python unit tests are run via `tox`, with no arguments, from the project root directory -- not the python subdirectory. Tox creates a temporary virtual environment in which to build and run the unit tests. In the event you are missing the necessary pacakge, tox may be installed with `python3 -m pip install --upgrade tox`.
83
80
 
84
81
  ## License
85
82
 
@@ -1,3 +1,3 @@
1
1
  @echo off
2
2
  :: Takes path to the Python interpreter and returns the path to pybind11
3
- %1 -m pip show pybind11 | %1 -c "import sys,re;[sys.stdout.write(re.sub('^Location:\\s+','',line)) for line in sys.stdin if re.search('^Location:\\s+',line)]"
3
+ %1 -c "import pybind11,sys;sys.stdout.write(pybind11.get_cmake_dir())"
@@ -151,6 +151,7 @@ template <typename T,
151
151
  class quantiles_sketch {
152
152
  public:
153
153
  using value_type = T;
154
+ using allocator_type = Allocator;
154
155
  using comparator = Comparator;
155
156
  using vector_double = std::vector<double, typename std::allocator_traits<Allocator>::template rebind_alloc<double>>;
156
157
 
@@ -161,6 +162,14 @@ public:
161
162
  quantiles_sketch& operator=(const quantiles_sketch& other);
162
163
  quantiles_sketch& operator=(quantiles_sketch&& other) noexcept;
163
164
 
165
+ /**
166
+ * @brief Type converting constructor
167
+ * @param other quantiles sketch of a different type
168
+ * @param allocator instance of an Allocator
169
+ */
170
+ template<typename From, typename FC, typename FA>
171
+ explicit quantiles_sketch(const quantiles_sketch<From, FC, FA>& other, const Allocator& allocator = Allocator());
172
+
164
173
  /**
165
174
  * Updates this sketch with the given data item.
166
175
  * @param value an item from a stream of items
@@ -227,6 +236,12 @@ public:
227
236
  */
228
237
  Comparator get_comparator() const;
229
238
 
239
+ /**
240
+ * Returns the allocator for this sketch.
241
+ * @return allocator
242
+ */
243
+ allocator_type get_allocator() const;
244
+
230
245
  /**
231
246
  * Returns an approximation to the value of the data item
232
247
  * that would be preceded by the given fraction of a hypothetical sorted
@@ -138,6 +138,65 @@ is_sorted_(is_sorted)
138
138
  throw std::logic_error("Item count does not match value computed from k, n");
139
139
  }
140
140
 
141
+ template<typename T, typename C, typename A>
142
+ template<typename From, typename FC, typename FA>
143
+ quantiles_sketch<T, C, A>::quantiles_sketch(const quantiles_sketch<From, FC, FA>& other, const A& allocator) :
144
+ allocator_(allocator),
145
+ k_(other.get_k()),
146
+ n_(other.get_n()),
147
+ bit_pattern_(compute_bit_pattern(other.get_k(), other.get_n())),
148
+ base_buffer_(allocator),
149
+ levels_(allocator),
150
+ min_value_(nullptr),
151
+ max_value_(nullptr),
152
+ is_sorted_(false)
153
+ {
154
+ static_assert(std::is_constructible<T, From>::value,
155
+ "Type converting constructor requires new type to be constructible from existing type");
156
+
157
+ base_buffer_.reserve(2 * std::min(quantiles_constants::MIN_K, k_));
158
+
159
+ if (!other.is_empty()) {
160
+ min_value_ = new (allocator_.allocate(1)) T(other.get_min_value());
161
+ max_value_ = new (allocator_.allocate(1)) T(other.get_max_value());
162
+
163
+ // reserve space in levels
164
+ const uint8_t num_levels = compute_levels_needed(k_, n_);
165
+ levels_.reserve(num_levels);
166
+ for (int i = 0; i < num_levels; ++i) {
167
+ Level level(allocator);
168
+ level.reserve(k_);
169
+ levels_.push_back(std::move(level));
170
+ }
171
+
172
+ // iterate through points, assigning to the correct level as needed
173
+ for (auto pair : other) {
174
+ const uint64_t wt = pair.second;
175
+ if (wt == 1) {
176
+ base_buffer_.push_back(T(pair.first));
177
+ // resize where needed as if adding points via update()
178
+ if (base_buffer_.size() + 1 > base_buffer_.capacity()) {
179
+ const size_t new_size = std::max(std::min(static_cast<size_t>(2 * k_), 2 * base_buffer_.size()), static_cast<size_t>(1));
180
+ base_buffer_.reserve(new_size);
181
+ }
182
+ }
183
+ else {
184
+ const uint8_t idx = count_trailing_zeros_in_u64(pair.second) - 1;
185
+ levels_[idx].push_back(T(pair.first));
186
+ }
187
+ }
188
+
189
+ // validate that ordering within each level is preserved
190
+ // base_buffer_ can be considered unsorted for this purpose
191
+ for (int i = 0; i < num_levels; ++i) {
192
+ if (!std::is_sorted(levels_[i].begin(), levels_[i].end(), C())) {
193
+ throw std::logic_error("Copy construction across types produces invalid sorting");
194
+ }
195
+ }
196
+ }
197
+ }
198
+
199
+
141
200
  template<typename T, typename C, typename A>
142
201
  quantiles_sketch<T, C, A>::~quantiles_sketch() {
143
202
  if (min_value_ != nullptr) {
@@ -238,7 +297,7 @@ void quantiles_sketch<T, C, A>::serialize(std::ostream& os, const SerDe& serde)
238
297
  );
239
298
  write(os, flags_byte);
240
299
  write(os, k_);
241
- uint16_t unused = 0;
300
+ const uint16_t unused = 0;
242
301
  write(os, unused);
243
302
 
244
303
  if (!is_empty()) {
@@ -624,6 +683,11 @@ C quantiles_sketch<T, C, A>::get_comparator() const {
624
683
  return C();
625
684
  }
626
685
 
686
+ template<typename T, typename C, typename A>
687
+ A quantiles_sketch<T, C, A>::get_allocator() const {
688
+ return allocator_;
689
+ }
690
+
627
691
  // implementation for fixed-size arithmetic types (integral and floating point)
628
692
  template<typename T, typename C, typename A>
629
693
  template<typename SerDe, typename TT, typename std::enable_if<std::is_arithmetic<TT>::value, int>::type>
@@ -783,9 +847,9 @@ auto quantiles_sketch<T, C, A>::get_CDF(const T* split_points, uint32_t size) co
783
847
 
784
848
  template<typename T, typename C, typename A>
785
849
  uint32_t quantiles_sketch<T, C, A>::compute_retained_items(const uint16_t k, const uint64_t n) {
786
- uint32_t bb_count = compute_base_buffer_items(k, n);
787
- uint64_t bit_pattern = compute_bit_pattern(k, n);
788
- uint32_t valid_levels = compute_valid_levels(bit_pattern);
850
+ const uint32_t bb_count = compute_base_buffer_items(k, n);
851
+ const uint64_t bit_pattern = compute_bit_pattern(k, n);
852
+ const uint32_t valid_levels = compute_valid_levels(bit_pattern);
789
853
  return bb_count + (k * valid_levels);
790
854
  }
791
855
 
@@ -843,11 +907,11 @@ void quantiles_sketch<T, C, A>::check_family_id(uint8_t family_id) {
843
907
 
844
908
  template<typename T, typename C, typename A>
845
909
  void quantiles_sketch<T, C, A>::check_header_validity(uint8_t preamble_longs, uint8_t flags_byte, uint8_t serial_version) {
846
- bool empty = (flags_byte & (1 << flags::IS_EMPTY)) > 0;
847
- bool compact = (flags_byte & (1 << flags::IS_COMPACT)) > 0;
910
+ const bool empty = (flags_byte & (1 << flags::IS_EMPTY)) > 0;
911
+ const bool compact = (flags_byte & (1 << flags::IS_COMPACT)) > 0;
848
912
 
849
- uint8_t sw = (compact ? 1 : 0) + (2 * (empty ? 1 : 0))
850
- + (4 * (serial_version & 0xF)) + (32 * (preamble_longs & 0x3F));
913
+ const uint8_t sw = (compact ? 1 : 0) + (2 * (empty ? 1 : 0))
914
+ + (4 * (serial_version & 0xF)) + (32 * (preamble_longs & 0x3F));
851
915
  bool valid = true;
852
916
 
853
917
  switch (sw) { // exhaustive list and description of all valid cases
@@ -888,7 +952,7 @@ typename quantiles_sketch<T, C, A>::const_iterator quantiles_sketch<T, C, A>::en
888
952
 
889
953
  template<typename T, typename C, typename A>
890
954
  void quantiles_sketch<T, C, A>::grow_base_buffer() {
891
- size_t new_size = std::max(std::min(static_cast<size_t>(2 * k_), 2 * base_buffer_.size()), static_cast<size_t>(1));
955
+ const size_t new_size = std::max(std::min(static_cast<size_t>(2 * k_), 2 * base_buffer_.size()), static_cast<size_t>(1));
892
956
  base_buffer_.reserve(new_size);
893
957
  }
894
958
 
@@ -912,7 +976,7 @@ void quantiles_sketch<T, C, A>::process_full_base_buffer() {
912
976
 
913
977
  template<typename T, typename C, typename A>
914
978
  bool quantiles_sketch<T, C, A>::grow_levels_if_needed() {
915
- uint8_t levels_needed = compute_levels_needed(k_, n_);
979
+ const uint8_t levels_needed = compute_levels_needed(k_, n_);
916
980
  if (levels_needed == 0)
917
981
  return false; // don't need levels and might have small base buffer. Possible during merges.
918
982
 
@@ -992,7 +1056,7 @@ template<typename FwdV>
992
1056
  void quantiles_sketch<T, C, A>::zip_buffer_with_stride(FwdV&& buf_in, Level& buf_out, uint16_t stride) {
993
1057
  // Random offset in range [0, stride)
994
1058
  std::uniform_int_distribution<uint16_t> dist(0, stride - 1);
995
- uint16_t rand_offset = dist(random_utils::rand);
1059
+ const uint16_t rand_offset = dist(random_utils::rand);
996
1060
 
997
1061
  if ((buf_in.size() != stride * buf_out.capacity())
998
1062
  || (buf_out.size() > 0)) {
@@ -1000,7 +1064,7 @@ void quantiles_sketch<T, C, A>::zip_buffer_with_stride(FwdV&& buf_in, Level& buf
1000
1064
  "stride*buf_out.capacity() and empty buf_out");
1001
1065
  }
1002
1066
 
1003
- size_t k = buf_out.capacity();
1067
+ const size_t k = buf_out.capacity();
1004
1068
  for (uint16_t i = rand_offset, o = 0; o < k; i += stride, ++o) {
1005
1069
  buf_out.push_back(conditional_forward<FwdV>(buf_in[i]));
1006
1070
  }
@@ -1117,7 +1181,7 @@ void quantiles_sketch<T, C, A>::downsampling_merge(quantiles_sketch& tgt, FwdSk&
1117
1181
  const uint16_t downsample_factor = src.get_k() / tgt.get_k();
1118
1182
  const uint8_t lg_sample_factor = count_trailing_zeros_in_u32(downsample_factor);
1119
1183
 
1120
- uint64_t new_n = src.get_n() + tgt.get_n();
1184
+ const uint64_t new_n = src.get_n() + tgt.get_n();
1121
1185
 
1122
1186
  // move items from src's base buffer
1123
1187
  for (uint16_t i = 0; i < src.base_buffer_.size(); ++i) {
@@ -1125,7 +1189,7 @@ void quantiles_sketch<T, C, A>::downsampling_merge(quantiles_sketch& tgt, FwdSk&
1125
1189
  }
1126
1190
 
1127
1191
  // check (after moving raw items) if we need to extend levels array
1128
- uint8_t levels_needed = compute_levels_needed(tgt.get_k(), new_n);
1192
+ const uint8_t levels_needed = compute_levels_needed(tgt.get_k(), new_n);
1129
1193
  if (levels_needed > tgt.levels_.size()) {
1130
1194
  tgt.levels_.reserve(levels_needed);
1131
1195
  while (tgt.levels_.size() < levels_needed) {
@@ -17,7 +17,7 @@
17
17
  * under the License.
18
18
  */
19
19
 
20
- #include <catch.hpp>
20
+ #include <catch2/catch.hpp>
21
21
 
22
22
  #include <random>
23
23
 
@@ -82,7 +82,7 @@ TEST_CASE("kolmogorov-smirnov slightly different distributions", "[quantiles_ske
82
82
  const double delta = kolmogorov_smirnov::delta(sketch1, sketch2);
83
83
  REQUIRE(delta == Approx(0.02).margin(0.01));
84
84
  const double threshold = kolmogorov_smirnov::threshold(sketch1, sketch2, 0.05);
85
- std::cout << "delta=" << delta << ", threshold=" << threshold << "\n";
85
+
86
86
  REQUIRE_FALSE(delta > threshold);
87
87
  REQUIRE_FALSE(kolmogorov_smirnov::test(sketch1, sketch2, 0.05));
88
88
  }
@@ -102,7 +102,7 @@ TEST_CASE("kolmogorov-smirnov slightly different distributions high resolution",
102
102
  const double delta = kolmogorov_smirnov::delta(sketch1, sketch2);
103
103
  REQUIRE(delta == Approx(0.02).margin(0.01));
104
104
  const double threshold = kolmogorov_smirnov::threshold(sketch1, sketch2, 0.05);
105
- std::cout << "delta=" << delta << ", threshold=" << threshold << "\n";
105
+
106
106
  REQUIRE(delta > threshold);
107
107
  REQUIRE(kolmogorov_smirnov::test(sketch1, sketch2, 0.05));
108
108
  }
@@ -17,7 +17,7 @@
17
17
  * under the License.
18
18
  */
19
19
 
20
- #include <catch.hpp>
20
+ #include <catch2/catch.hpp>
21
21
  #include <cmath>
22
22
  #include <sstream>
23
23
  #include <fstream>
@@ -17,7 +17,7 @@
17
17
  * under the License.
18
18
  */
19
19
 
20
- #include <catch.hpp>
20
+ #include <catch2/catch.hpp>
21
21
  #include <cmath>
22
22
  #include <sstream>
23
23
  #include <fstream>
@@ -903,6 +903,69 @@ TEST_CASE("quantiles sketch", "[quantiles_sketch]") {
903
903
  }
904
904
  }
905
905
 
906
+ SECTION("Type converting copy constructor") {
907
+ const uint16_t k = 8;
908
+ const int n = 403;
909
+ quantiles_sketch<double> sk_double(k);
910
+
911
+ quantiles_sketch<float> sk_float(k, sk_double.get_allocator());
912
+ REQUIRE(sk_float.is_empty());
913
+
914
+ for (int i = 0; i < n; ++i) sk_double.update(i + .01);
915
+
916
+ quantiles_sketch<int> sk_int(sk_double);
917
+ REQUIRE(sk_double.get_n() == sk_int.get_n());
918
+ REQUIRE(sk_double.get_k() == sk_int.get_k());
919
+ REQUIRE(sk_double.get_num_retained() == sk_int.get_num_retained());
920
+
921
+ auto sv_double = sk_double.get_sorted_view(false);
922
+ std::vector<std::pair<double, uint64_t>> vec_double(sv_double.begin(), sv_double.end());
923
+
924
+ auto sv_int = sk_int.get_sorted_view(false);
925
+ std::vector<std::pair<int, uint64_t>> vec_int(sv_int.begin(), sv_int.end());
926
+
927
+ REQUIRE(vec_double.size() == vec_int.size());
928
+
929
+ for (size_t i = 0; i < vec_int.size(); ++i) {
930
+ // known truncation with conversion so approximate result
931
+ REQUIRE(vec_double[i].first == Approx(vec_int[i].first).margin(0.1));
932
+ // exact equality for weights
933
+ REQUIRE(vec_double[i].second == vec_int[i].second);
934
+ }
935
+ }
936
+
937
+ class A {
938
+ int val;
939
+ public:
940
+ A(int val): val(val) {}
941
+ int get_val() const { return val; }
942
+ };
943
+
944
+ struct less_A {
945
+ bool operator()(const A& a1, const A& a2) const { return a1.get_val() < a2.get_val(); }
946
+ };
947
+
948
+ class B {
949
+ int val;
950
+ public:
951
+ explicit B(const A& a): val(a.get_val()) {}
952
+ int get_val() const { return val; }
953
+ };
954
+
955
+ struct less_B {
956
+ bool operator()(const B& b1, const B& b2) const { return b1.get_val() < b2.get_val(); }
957
+ };
958
+
959
+ SECTION("type conversion: custom types") {
960
+ quantiles_sketch<A, less_A> sa;
961
+ sa.update(1);
962
+ sa.update(2);
963
+ sa.update(3);
964
+
965
+ quantiles_sketch<B, less_B> sb(sa);
966
+ REQUIRE(sb.get_n() == 3);
967
+ }
968
+
906
969
  // cleanup
907
970
  if (test_allocator_total_bytes != 0) {
908
971
  REQUIRE(test_allocator_total_bytes == 0);
@@ -38,6 +38,9 @@ public:
38
38
  req_compactor& operator=(const req_compactor& other);
39
39
  req_compactor& operator=(req_compactor&& other);
40
40
 
41
+ template<typename TT, typename CC, typename AA>
42
+ req_compactor(const req_compactor<TT, CC, AA>& other, const Allocator& allocator);
43
+
41
44
  bool is_sorted() const;
42
45
  uint32_t get_num_items() const;
43
46
  uint32_t get_nom_capacity() const;
@@ -128,6 +131,9 @@ private:
128
131
  template<typename S>
129
132
  static std::pair<std::unique_ptr<T, items_deleter>, size_t> deserialize_items(const void* bytes, size_t size, const S& serde, const Allocator& allocator, uint32_t num);
130
133
 
134
+ // for type converting constructor
135
+ template<typename TT, typename CC, typename AA>
136
+ friend class req_compactor;
131
137
  };
132
138
 
133
139
  } /* namespace datasketches */