datasketches 0.2.4 → 0.2.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (106) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +4 -0
  3. data/README.md +1 -1
  4. data/ext/datasketches/kll_wrapper.cpp +5 -1
  5. data/lib/datasketches/version.rb +1 -1
  6. data/vendor/datasketches-cpp/CMakeLists.txt +4 -3
  7. data/vendor/datasketches-cpp/common/CMakeLists.txt +4 -0
  8. data/vendor/datasketches-cpp/common/include/binomial_bounds.hpp +1 -0
  9. data/vendor/datasketches-cpp/common/include/common_defs.hpp +14 -0
  10. data/vendor/datasketches-cpp/{kll → common}/include/kolmogorov_smirnov.hpp +5 -3
  11. data/vendor/datasketches-cpp/{kll → common}/include/kolmogorov_smirnov_impl.hpp +13 -16
  12. data/vendor/datasketches-cpp/common/include/quantile_sketch_sorted_view.hpp +121 -0
  13. data/vendor/datasketches-cpp/common/include/quantile_sketch_sorted_view_impl.hpp +91 -0
  14. data/vendor/datasketches-cpp/common/test/test_type.hpp +2 -0
  15. data/vendor/datasketches-cpp/cpc/include/cpc_compressor_impl.hpp +1 -0
  16. data/vendor/datasketches-cpp/cpc/include/cpc_confidence.hpp +1 -0
  17. data/vendor/datasketches-cpp/cpc/include/cpc_union_impl.hpp +2 -0
  18. data/vendor/datasketches-cpp/cpc/test/cpc_sketch_test.cpp +1 -0
  19. data/vendor/datasketches-cpp/cpc/test/cpc_union_test.cpp +2 -0
  20. data/vendor/datasketches-cpp/fi/include/frequent_items_sketch.hpp +37 -5
  21. data/vendor/datasketches-cpp/fi/include/frequent_items_sketch_impl.hpp +25 -9
  22. data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_custom_type_test.cpp +2 -1
  23. data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_test.cpp +1 -0
  24. data/vendor/datasketches-cpp/hll/include/AuxHashMap-internal.hpp +2 -0
  25. data/vendor/datasketches-cpp/hll/include/CompositeInterpolationXTable-internal.hpp +1 -0
  26. data/vendor/datasketches-cpp/hll/include/CouponHashSet-internal.hpp +2 -2
  27. data/vendor/datasketches-cpp/hll/include/CouponList-internal.hpp +1 -0
  28. data/vendor/datasketches-cpp/hll/include/HllSketchImpl-internal.hpp +2 -0
  29. data/vendor/datasketches-cpp/hll/include/HllSketchImplFactory.hpp +2 -0
  30. data/vendor/datasketches-cpp/hll/test/AuxHashMapTest.cpp +1 -0
  31. data/vendor/datasketches-cpp/hll/test/CouponHashSetTest.cpp +1 -0
  32. data/vendor/datasketches-cpp/hll/test/CouponListTest.cpp +2 -0
  33. data/vendor/datasketches-cpp/hll/test/HllArrayTest.cpp +1 -0
  34. data/vendor/datasketches-cpp/hll/test/HllSketchTest.cpp +59 -0
  35. data/vendor/datasketches-cpp/hll/test/HllUnionTest.cpp +2 -0
  36. data/vendor/datasketches-cpp/hll/test/TablesTest.cpp +1 -0
  37. data/vendor/datasketches-cpp/kll/CMakeLists.txt +0 -4
  38. data/vendor/datasketches-cpp/kll/include/kll_helper.hpp +0 -4
  39. data/vendor/datasketches-cpp/kll/include/kll_helper_impl.hpp +3 -0
  40. data/vendor/datasketches-cpp/kll/include/kll_sketch.hpp +96 -42
  41. data/vendor/datasketches-cpp/kll/include/kll_sketch_impl.hpp +105 -127
  42. data/vendor/datasketches-cpp/kll/test/kll_sketch_test.cpp +94 -25
  43. data/vendor/datasketches-cpp/kll/test/kolmogorov_smirnov_test.cpp +1 -1
  44. data/vendor/datasketches-cpp/pyproject.toml +1 -1
  45. data/vendor/datasketches-cpp/python/CMakeLists.txt +8 -1
  46. data/vendor/datasketches-cpp/python/README.md +7 -0
  47. data/vendor/datasketches-cpp/python/src/datasketches.cpp +4 -0
  48. data/vendor/datasketches-cpp/python/src/fi_wrapper.cpp +6 -1
  49. data/vendor/datasketches-cpp/python/src/kll_wrapper.cpp +48 -13
  50. data/vendor/datasketches-cpp/python/src/ks_wrapper.cpp +68 -0
  51. data/vendor/datasketches-cpp/python/src/quantiles_wrapper.cpp +240 -0
  52. data/vendor/datasketches-cpp/python/src/req_wrapper.cpp +9 -2
  53. data/vendor/datasketches-cpp/python/src/vector_of_kll.cpp +1 -0
  54. data/vendor/datasketches-cpp/python/tests/kll_test.py +10 -4
  55. data/vendor/datasketches-cpp/python/tests/quantiles_test.py +126 -0
  56. data/vendor/datasketches-cpp/quantiles/CMakeLists.txt +42 -0
  57. data/vendor/datasketches-cpp/quantiles/include/quantiles_sketch.hpp +641 -0
  58. data/vendor/datasketches-cpp/quantiles/include/quantiles_sketch_impl.hpp +1309 -0
  59. data/vendor/datasketches-cpp/quantiles/test/CMakeLists.txt +44 -0
  60. data/vendor/datasketches-cpp/quantiles/test/Qk128_n1000_v0.3.0.sk +0 -0
  61. data/vendor/datasketches-cpp/quantiles/test/Qk128_n1000_v0.6.0.sk +0 -0
  62. data/vendor/datasketches-cpp/quantiles/test/Qk128_n1000_v0.8.0.sk +0 -0
  63. data/vendor/datasketches-cpp/quantiles/test/Qk128_n1000_v0.8.3.sk +0 -0
  64. data/vendor/datasketches-cpp/quantiles/test/Qk128_n50_v0.3.0.sk +0 -0
  65. data/vendor/datasketches-cpp/quantiles/test/Qk128_n50_v0.6.0.sk +0 -0
  66. data/vendor/datasketches-cpp/quantiles/test/Qk128_n50_v0.8.0.sk +0 -0
  67. data/vendor/datasketches-cpp/quantiles/test/Qk128_n50_v0.8.3.sk +0 -0
  68. data/vendor/datasketches-cpp/quantiles/test/kolmogorov_smirnov_test.cpp +110 -0
  69. data/vendor/datasketches-cpp/quantiles/test/quantiles_compatibility_test.cpp +129 -0
  70. data/vendor/datasketches-cpp/quantiles/test/quantiles_sketch_test.cpp +912 -0
  71. data/vendor/datasketches-cpp/req/CMakeLists.txt +0 -2
  72. data/vendor/datasketches-cpp/req/include/req_common.hpp +0 -5
  73. data/vendor/datasketches-cpp/req/include/req_compactor_impl.hpp +3 -2
  74. data/vendor/datasketches-cpp/req/include/req_sketch.hpp +62 -23
  75. data/vendor/datasketches-cpp/req/include/req_sketch_impl.hpp +62 -59
  76. data/vendor/datasketches-cpp/req/test/req_sketch_test.cpp +5 -0
  77. data/vendor/datasketches-cpp/sampling/include/var_opt_sketch.hpp +44 -7
  78. data/vendor/datasketches-cpp/sampling/include/var_opt_sketch_impl.hpp +31 -26
  79. data/vendor/datasketches-cpp/sampling/include/var_opt_union.hpp +41 -6
  80. data/vendor/datasketches-cpp/sampling/include/var_opt_union_impl.hpp +25 -9
  81. data/vendor/datasketches-cpp/sampling/test/var_opt_allocation_test.cpp +2 -2
  82. data/vendor/datasketches-cpp/sampling/test/var_opt_sketch_test.cpp +1 -0
  83. data/vendor/datasketches-cpp/sampling/test/var_opt_union_test.cpp +1 -0
  84. data/vendor/datasketches-cpp/setup.py +1 -1
  85. data/vendor/datasketches-cpp/theta/include/bounds_on_ratios_in_sampled_sets.hpp +1 -0
  86. data/vendor/datasketches-cpp/theta/include/compact_theta_sketch_parser_impl.hpp +8 -6
  87. data/vendor/datasketches-cpp/theta/include/theta_intersection_base_impl.hpp +1 -0
  88. data/vendor/datasketches-cpp/theta/include/theta_set_difference_base_impl.hpp +1 -0
  89. data/vendor/datasketches-cpp/theta/include/theta_sketch_impl.hpp +7 -45
  90. data/vendor/datasketches-cpp/theta/include/theta_union_base_impl.hpp +1 -0
  91. data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base_impl.hpp +1 -0
  92. data/vendor/datasketches-cpp/theta/test/theta_a_not_b_test.cpp +2 -0
  93. data/vendor/datasketches-cpp/theta/test/theta_intersection_test.cpp +2 -0
  94. data/vendor/datasketches-cpp/theta/test/theta_setop_test.cpp +1 -0
  95. data/vendor/datasketches-cpp/theta/test/theta_sketch_test.cpp +29 -1
  96. data/vendor/datasketches-cpp/theta/test/theta_union_test.cpp +2 -0
  97. data/vendor/datasketches-cpp/tuple/include/tuple_sketch.hpp +16 -0
  98. data/vendor/datasketches-cpp/tuple/include/tuple_sketch_impl.hpp +1 -0
  99. data/vendor/datasketches-cpp/tuple/test/tuple_a_not_b_test.cpp +1 -0
  100. data/vendor/datasketches-cpp/tuple/test/tuple_intersection_test.cpp +1 -0
  101. data/vendor/datasketches-cpp/tuple/test/tuple_union_test.cpp +1 -0
  102. metadata +25 -9
  103. data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator.hpp +0 -75
  104. data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator_impl.hpp +0 -184
  105. data/vendor/datasketches-cpp/req/include/req_quantile_calculator.hpp +0 -69
  106. data/vendor/datasketches-cpp/req/include/req_quantile_calculator_impl.hpp +0 -60
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 897dbc30f97ce17f0415630b6347a0092dac05196b0ef61e80939410d65cdf17
4
- data.tar.gz: 61302f9cadde8a8badc97b455eb5c32d913c3b1fea8ed571e2da93a29e65afa9
3
+ metadata.gz: 9eaa8a17efdbc591b3e56f94650e887babd30dc79d95db3a7986df0261184191
4
+ data.tar.gz: 5544326a0edf165d87373a680d8bf5b80acba2894b9048f92cbdb261fcd66d57
5
5
  SHA512:
6
- metadata.gz: 4d541ba7f96a86f3f8de44f069f6e39d51ba6f28fa5d8c8d1d99a8434a95c5fe1a26470e6b062f348808fe5c0a444134d0dc96385437b4cb946c4a92044a2a5c
7
- data.tar.gz: bc1bdacb7cbe69f9bb1382fd2ac7019bec04baf444dc963d63a594e989fd201d9eb9aadd0e463ac4efef8f7ba53915a594d8fb00f74ae295674b9024269a0406
6
+ metadata.gz: 5a28c093ecda083762367149800770f59fee8e630c0d983d3f29ed32d027fae2e2515dff243ee11bbd41f4875c7cea622f7bc5cc5d7e73176e785503ed19fc0b
7
+ data.tar.gz: 6b210f2fdca1ae3cbd4e4cbf88e284855014b5a1e1c883085dc96a057da29e370005163ce628e54351c9127b00fae4b7b33a4ca63e6f4b90e0665e93b7742a66
data/CHANGELOG.md CHANGED
@@ -1,3 +1,7 @@
1
+ ## 0.2.5 (2022-05-21)
2
+
3
+ - Updated DataSketches to 3.4.0
4
+
1
5
  ## 0.2.4 (2021-12-28)
2
6
 
3
7
  - Updated DataSketches to 3.3.0
data/README.md CHANGED
@@ -9,7 +9,7 @@
9
9
  Add this line to your application’s Gemfile:
10
10
 
11
11
  ```ruby
12
- gem 'datasketches'
12
+ gem "datasketches"
13
13
  ```
14
14
 
15
15
  ## Sketch Families
@@ -33,7 +33,11 @@ void bind_kll_sketch(Rice::Module& m, const char* name) {
33
33
  .define_method("estimation_mode?", &kll_sketch<T>::is_estimation_mode)
34
34
  .define_method("min_value", &kll_sketch<T>::get_min_value)
35
35
  .define_method("max_value", &kll_sketch<T>::get_max_value)
36
- .define_method("quantile", &kll_sketch<T>::get_quantile)
36
+ .define_method(
37
+ "quantile",
38
+ [](kll_sketch<T>& self, double fraction) {
39
+ return self.get_quantile(fraction);
40
+ })
37
41
  .define_method(
38
42
  "quantiles",
39
43
  [](kll_sketch<T>& self, Rice::Object obj) {
@@ -1,3 +1,3 @@
1
1
  module DataSketches
2
- VERSION = "0.2.4"
2
+ VERSION = "0.2.5"
3
3
  end
@@ -15,9 +15,9 @@
15
15
  # specific language governing permissions and limitations
16
16
  # under the License.
17
17
 
18
- cmake_minimum_required(VERSION 3.12.0)
18
+ cmake_minimum_required(VERSION 3.16.0)
19
19
  project(DataSketches
20
- VERSION 3.2.0
20
+ VERSION 3.4.0
21
21
  LANGUAGES CXX)
22
22
 
23
23
  include(GNUInstallDirs)
@@ -106,12 +106,13 @@ add_subdirectory(theta)
106
106
  add_subdirectory(sampling)
107
107
  add_subdirectory(tuple)
108
108
  add_subdirectory(req)
109
+ add_subdirectory(quantiles)
109
110
 
110
111
  if (WITH_PYTHON)
111
112
  add_subdirectory(python)
112
113
  endif()
113
114
 
114
- target_link_libraries(datasketches INTERFACE hll cpc kll fi theta sampling)
115
+ target_link_libraries(datasketches INTERFACE hll cpc kll fi theta sampling req quantiles)
115
116
 
116
117
  if (COVERAGE)
117
118
  find_program(LCOV_PATH NAMES "lcov")
@@ -43,4 +43,8 @@ install(FILES
43
43
  include/conditional_forward.hpp
44
44
  include/ceiling_power_of_2.hpp
45
45
  include/bounds_binomial_proportions.hpp
46
+ include/kolmogorov_smirnov.hpp
47
+ include/kolmogorov_smirnov_impl.hpp
48
+ include/quantile_sketch_sorted_view.hpp
49
+ include/quantile_sketch_sorted_view_impl.hpp
46
50
  DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/DataSketches")
@@ -22,6 +22,7 @@
22
22
 
23
23
  #include <algorithm>
24
24
  #include <cmath>
25
+ #include <stdexcept>
25
26
 
26
27
  /*
27
28
  * This class enables the estimation of error bounds given a sample set size, the sampling
@@ -24,6 +24,8 @@
24
24
  #include <string>
25
25
  #include <memory>
26
26
  #include <iostream>
27
+ #include <random>
28
+ #include <chrono>
27
29
 
28
30
  namespace datasketches {
29
31
 
@@ -34,6 +36,18 @@ enum resize_factor { X1 = 0, X2, X4, X8 };
34
36
  template<typename A> using AllocChar = typename std::allocator_traits<A>::template rebind_alloc<char>;
35
37
  template<typename A> using string = std::basic_string<char, std::char_traits<char>, AllocChar<A>>;
36
38
 
39
+ // random bit
40
+ static std::independent_bits_engine<std::mt19937, 1, uint32_t>
41
+ random_bit(static_cast<uint32_t>(std::chrono::system_clock::now().time_since_epoch().count()));
42
+
43
+ // common random declarations
44
+ namespace random_utils {
45
+ static std::random_device rd; // possibly unsafe in MinGW with GCC < 9.2
46
+ static std::mt19937_64 rand(rd());
47
+ static std::uniform_real_distribution<> next_double(0.0, 1.0);
48
+ }
49
+
50
+
37
51
  // utility function to hide unused compiler warning
38
52
  // usually has no additional cost
39
53
  template<typename T> void unused(T&&...) {}
@@ -25,7 +25,8 @@ namespace datasketches {
25
25
  class kolmogorov_smirnov {
26
26
  public:
27
27
  /**
28
- * Computes the raw delta area between two KLL quantile sketches for the Kolmogorov-Smirnov Test.
28
+ * Computes the raw delta area between two quantile sketches for the Kolmogorov-Smirnov Test.
29
+ * Will work for a type-matched pair of KLL or Quantiles sketches of the same parameterized type T.
29
30
  * @param sketch1 KLL sketch 1
30
31
  * @param sketch2 KLL sketch 2
31
32
  * @return the raw delta between two KLL quantile sketches
@@ -37,6 +38,7 @@ public:
37
38
  * Computes the adjusted delta area threshold for the Kolmogorov-Smirnov Test.
38
39
  * Adjusts the computed threshold by the error epsilons of the two given sketches.
39
40
  * See <a href="https://en.wikipedia.org/wiki/Kolmogorov-Smirnov_test">Kolmogorov–Smirnov Test</a>
41
+ * Will work for a type-matched pair of KLL or Quantiles sketches of the same parameterized type T.
40
42
  * @param sketch1 KLL sketch 1
41
43
  * @param sketch2 KLL sketch 2
42
44
  * @param p Target p-value. Typically .001 to .1, e.g., .05.
@@ -46,7 +48,8 @@ public:
46
48
  static double threshold(const Sketch& sketch1, const Sketch& sketch2, double p);
47
49
 
48
50
  /**
49
- * Performs the Kolmogorov-Smirnov Test between two KLL quantiles sketches.
51
+ * Performs the Kolmogorov-Smirnov Test between two quantile sketches.
52
+ * Will work for a type-matched pair of KLL or Quantiles sketches of the same parameterized type T.
50
53
  * Note: if the given sketches have insufficient data or if the sketch sizes are too small,
51
54
  * this will return false.
52
55
  * @param sketch1 KLL sketch 1
@@ -57,7 +60,6 @@ public:
57
60
  */
58
61
  template<typename Sketch>
59
62
  static bool test(const Sketch& sketch1, const Sketch& sketch2, double p);
60
-
61
63
  };
62
64
 
63
65
  } /* namespace datasketches */
@@ -20,39 +20,36 @@
20
20
  #ifndef KOLMOGOROV_SMIRNOV_IMPL_HPP_
21
21
  #define KOLMOGOROV_SMIRNOV_IMPL_HPP_
22
22
 
23
- namespace datasketches {
23
+ #include <cmath>
24
+ #include <algorithm>
24
25
 
25
- // type resolver
26
- template<typename T, typename C, typename S, typename A>
27
- kll_quantile_calculator<T, C, A> make_quantile_calculator(const kll_sketch<T, C, S, A>& sketch) {
28
- return kll_quantile_calculator<T, C, A>(sketch);
29
- }
26
+ namespace datasketches {
30
27
 
31
28
  template<typename Sketch>
32
29
  double kolmogorov_smirnov::delta(const Sketch& sketch1, const Sketch& sketch2) {
33
- using Comparator = typename Sketch::comparator;
34
- auto calc1 = make_quantile_calculator(sketch1);
35
- auto calc2 = make_quantile_calculator(sketch2);
36
- auto it1 = calc1.begin();
37
- auto it2 = calc2.begin();
30
+ auto comparator = sketch1.get_comparator(); // assuming the same comparator in sketch2
31
+ auto view1 = sketch1.get_sorted_view(true);
32
+ auto view2 = sketch2.get_sorted_view(true);
33
+ auto it1 = view1.begin();
34
+ auto it2 = view2.begin();
38
35
  const auto n1 = sketch1.get_n();
39
36
  const auto n2 = sketch2.get_n();
40
37
  double delta = 0;
41
- while (it1 != calc1.end() && it2 != calc2.end()) {
38
+ while (it1 != view1.end() && it2 != view2.end()) {
42
39
  const double norm_cum_wt1 = static_cast<double>((*it1).second) / n1;
43
40
  const double norm_cum_wt2 = static_cast<double>((*it2).second) / n2;
44
41
  delta = std::max(delta, std::abs(norm_cum_wt1 - norm_cum_wt2));
45
- if (Comparator()((*it1).first, (*it2).first)) {
42
+ if (comparator((*it1).first, (*it2).first)) {
46
43
  ++it1;
47
- } else if (Comparator()((*it2).first, (*it1).first)) {
44
+ } else if (comparator((*it2).first, (*it1).first)) {
48
45
  ++it2;
49
46
  } else {
50
47
  ++it1;
51
48
  ++it2;
52
49
  }
53
50
  }
54
- const double norm_cum_wt1 = it1 == calc1.end() ? 1 : static_cast<double>((*it1).second) / n1;
55
- const double norm_cum_wt2 = it2 == calc2.end() ? 1 : static_cast<double>((*it2).second) / n2;
51
+ const double norm_cum_wt1 = it1 == view1.end() ? 1 : static_cast<double>((*it1).second) / n1;
52
+ const double norm_cum_wt2 = it2 == view2.end() ? 1 : static_cast<double>((*it2).second) / n2;
56
53
  delta = std::max(delta, std::abs(norm_cum_wt1 - norm_cum_wt2));
57
54
  return delta;
58
55
  }
@@ -0,0 +1,121 @@
1
+ /*
2
+ * Licensed to the Apache Software Foundation (ASF) under one
3
+ * or more contributor license agreements. See the NOTICE file
4
+ * distributed with this work for additional information
5
+ * regarding copyright ownership. The ASF licenses this file
6
+ * to you under the Apache License, Version 2.0 (the
7
+ * "License"); you may not use this file except in compliance
8
+ * with the License. You may obtain a copy of the License at
9
+ *
10
+ * http://www.apache.org/licenses/LICENSE-2.0
11
+ *
12
+ * Unless required by applicable law or agreed to in writing,
13
+ * software distributed under the License is distributed on an
14
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15
+ * KIND, either express or implied. See the License for the
16
+ * specific language governing permissions and limitations
17
+ * under the License.
18
+ */
19
+
20
+ #ifndef QUANTILE_SKETCH_SORTED_VIEW_HPP_
21
+ #define QUANTILE_SKETCH_SORTED_VIEW_HPP_
22
+
23
+ #include <functional>
24
+
25
+ namespace datasketches {
26
+
27
+ template<
28
+ typename T,
29
+ typename Comparator, // strict weak ordering function (see C++ named requirements: Compare)
30
+ typename Allocator
31
+ >
32
+ class quantile_sketch_sorted_view {
33
+ public:
34
+ using Entry = typename std::conditional<std::is_arithmetic<T>::value, std::pair<T, uint64_t>, std::pair<const T*, uint64_t>>::type;
35
+ using AllocEntry = typename std::allocator_traits<Allocator>::template rebind_alloc<Entry>;
36
+ using Container = std::vector<Entry, AllocEntry>;
37
+
38
+ quantile_sketch_sorted_view(uint32_t num, const Allocator& allocator);
39
+
40
+ template<typename Iterator>
41
+ void add(Iterator begin, Iterator end, uint64_t weight);
42
+
43
+ template<bool inclusive>
44
+ void convert_to_cummulative();
45
+
46
+ class const_iterator;
47
+ const_iterator begin() const;
48
+ const_iterator end() const;
49
+
50
+ size_t size() const;
51
+
52
+ // makes sense only with cumulative weight
53
+ using quantile_return_type = typename std::conditional<std::is_arithmetic<T>::value, T, const T&>::type;
54
+ quantile_return_type get_quantile(double rank) const;
55
+
56
+ private:
57
+ static inline const T& deref_helper(const T* t) { return *t; }
58
+ static inline T deref_helper(T t) { return t; }
59
+
60
+ struct compare_pairs_by_first {
61
+ bool operator()(const Entry& a, const Entry& b) const {
62
+ return Comparator()(deref_helper(a.first), deref_helper(b.first));
63
+ }
64
+ };
65
+
66
+ struct compare_pairs_by_second {
67
+ bool operator()(const Entry& a, const Entry& b) const {
68
+ return a.second < b.second;
69
+ }
70
+ };
71
+
72
+ template<typename TT = T, typename std::enable_if<std::is_arithmetic<TT>::value, int>::type = 0>
73
+ static inline T ref_helper(const T& t) { return t; }
74
+
75
+ template<typename TT = T, typename std::enable_if<!std::is_arithmetic<TT>::value, int>::type = 0>
76
+ static inline const T* ref_helper(const T& t) { return std::addressof(t); }
77
+
78
+ template<typename TT = T, typename std::enable_if<std::is_arithmetic<TT>::value, int>::type = 0>
79
+ static inline Entry make_dummy_entry(uint64_t weight) { return Entry(0, weight); }
80
+
81
+ template<typename TT = T, typename std::enable_if<!std::is_arithmetic<TT>::value, int>::type = 0>
82
+ static inline Entry make_dummy_entry(uint64_t weight) { return Entry(nullptr, weight); }
83
+
84
+ uint64_t total_weight_;
85
+ Container entries_;
86
+ };
87
+
88
+ template<typename T, typename C, typename A>
89
+ class quantile_sketch_sorted_view<T, C, A>::const_iterator: public quantile_sketch_sorted_view<T, C, A>::Container::const_iterator {
90
+ public:
91
+ using Base = typename quantile_sketch_sorted_view<T, C, A>::Container::const_iterator;
92
+ using value_type = typename std::conditional<std::is_arithmetic<T>::value, typename Base::value_type, std::pair<const T&, const uint64_t>>::type;
93
+
94
+ const_iterator(const Base& it): Base(it) {}
95
+
96
+ template<typename TT = T, typename std::enable_if<std::is_arithmetic<TT>::value, int>::type = 0>
97
+ value_type operator*() const { return Base::operator*(); }
98
+
99
+ template<typename TT = T, typename std::enable_if<!std::is_arithmetic<TT>::value, int>::type = 0>
100
+ value_type operator*() const { return value_type(*(Base::operator*().first), Base::operator*().second); }
101
+
102
+ class return_value_holder {
103
+ public:
104
+ return_value_holder(value_type value): value_(value) {}
105
+ const value_type* operator->() const { return &value_; }
106
+ private:
107
+ value_type value_;
108
+ };
109
+
110
+ template<typename TT = T, typename std::enable_if<std::is_arithmetic<TT>::value, int>::type = 0>
111
+ const value_type* operator->() const { return Base::operator->(); }
112
+
113
+ template<typename TT = T, typename std::enable_if<!std::is_arithmetic<TT>::value, int>::type = 0>
114
+ return_value_holder operator->() const { return **this; }
115
+ };
116
+
117
+ } /* namespace datasketches */
118
+
119
+ #include "quantile_sketch_sorted_view_impl.hpp"
120
+
121
+ #endif
@@ -0,0 +1,91 @@
1
+ /*
2
+ * Licensed to the Apache Software Foundation (ASF) under one
3
+ * or more contributor license agreements. See the NOTICE file
4
+ * distributed with this work for additional information
5
+ * regarding copyright ownership. The ASF licenses this file
6
+ * to you under the Apache License, Version 2.0 (the
7
+ * "License"); you may not use this file except in compliance
8
+ * with the License. You may obtain a copy of the License at
9
+ *
10
+ * http://www.apache.org/licenses/LICENSE-2.0
11
+ *
12
+ * Unless required by applicable law or agreed to in writing,
13
+ * software distributed under the License is distributed on an
14
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15
+ * KIND, either express or implied. See the License for the
16
+ * specific language governing permissions and limitations
17
+ * under the License.
18
+ */
19
+
20
+ #ifndef QUANTILE_SKETCH_SORTED_VIEW_IMPL_HPP_
21
+ #define QUANTILE_SKETCH_SORTED_VIEW_IMPL_HPP_
22
+
23
+ #include <algorithm>
24
+ #include <stdexcept>
25
+
26
+ namespace datasketches {
27
+
28
+ template<typename T, typename C, typename A>
29
+ quantile_sketch_sorted_view<T, C, A>::quantile_sketch_sorted_view(uint32_t num, const A& allocator):
30
+ total_weight_(0),
31
+ entries_(allocator)
32
+ {
33
+ entries_.reserve(num);
34
+ }
35
+
36
+ template<typename T, typename C, typename A>
37
+ template<typename Iterator>
38
+ void quantile_sketch_sorted_view<T, C, A>::add(Iterator first, Iterator last, uint64_t weight) {
39
+ const size_t size_before = entries_.size();
40
+ for (auto it = first; it != last; ++it) entries_.push_back(Entry(ref_helper(*it), weight));
41
+ if (size_before > 0) {
42
+ Container tmp(entries_.get_allocator());
43
+ tmp.reserve(entries_.capacity());
44
+ std::merge(
45
+ entries_.begin(), entries_.begin() + size_before,
46
+ entries_.begin() + size_before, entries_.end(),
47
+ std::back_inserter(tmp), compare_pairs_by_first()
48
+ );
49
+ std::swap(tmp, entries_);
50
+ }
51
+ }
52
+
53
+ template<typename T, typename C, typename A>
54
+ template<bool inclusive>
55
+ void quantile_sketch_sorted_view<T, C, A>::convert_to_cummulative() {
56
+ uint64_t subtotal = 0;
57
+ for (auto& entry: entries_) {
58
+ const uint64_t new_subtotal = subtotal + entry.second;
59
+ entry.second = inclusive ? new_subtotal : subtotal;
60
+ subtotal = new_subtotal;
61
+ }
62
+ total_weight_ = subtotal;
63
+ }
64
+
65
+ template<typename T, typename C, typename A>
66
+ auto quantile_sketch_sorted_view<T, C, A>::get_quantile(double rank) const -> quantile_return_type {
67
+ if (total_weight_ == 0) throw std::invalid_argument("supported for cumulative weight only");
68
+ uint64_t weight = static_cast<uint64_t>(rank * total_weight_);
69
+ auto it = std::lower_bound(entries_.begin(), entries_.end(), make_dummy_entry<T>(weight), compare_pairs_by_second());
70
+ if (it == entries_.end()) return deref_helper(entries_[entries_.size() - 1].first);
71
+ return deref_helper(it->first);
72
+ }
73
+
74
+ template<typename T, typename C, typename A>
75
+ auto quantile_sketch_sorted_view<T, C, A>::begin() const -> const_iterator {
76
+ return entries_.begin();
77
+ }
78
+
79
+ template<typename T, typename C, typename A>
80
+ auto quantile_sketch_sorted_view<T, C, A>::end() const -> const_iterator {
81
+ return entries_.end();
82
+ }
83
+
84
+ template<typename T, typename C, typename A>
85
+ size_t quantile_sketch_sorted_view<T, C, A>::size() const {
86
+ return entries_.size();
87
+ }
88
+
89
+ } /* namespace datasketches */
90
+
91
+ #endif
@@ -20,7 +20,9 @@
20
20
  #ifndef CLASS_TEST_TYPE_HPP_
21
21
  #define CLASS_TEST_TYPE_HPP_
22
22
 
23
+ #include <cstring>
23
24
  #include <iostream>
25
+ #include "memory_operations.hpp"
24
26
 
25
27
  namespace datasketches {
26
28
 
@@ -23,6 +23,7 @@
23
23
  #define CPC_COMPRESSOR_IMPL_HPP_
24
24
 
25
25
  #include <memory>
26
+ #include <stdexcept>
26
27
 
27
28
  #include "compression_data.hpp"
28
29
  #include "cpc_util.hpp"
@@ -23,6 +23,7 @@
23
23
  #define CPC_CONFIDENCE_HPP_
24
24
 
25
25
  #include <cmath>
26
+ #include <stdexcept>
26
27
 
27
28
  #include "cpc_sketch.hpp"
28
29
 
@@ -22,6 +22,8 @@
22
22
 
23
23
  #include "count_zeros.hpp"
24
24
 
25
+ #include <stdexcept>
26
+
25
27
  namespace datasketches {
26
28
 
27
29
  template<typename A>
@@ -21,6 +21,7 @@
21
21
  #include <cstring>
22
22
  #include <sstream>
23
23
  #include <fstream>
24
+ #include <stdexcept>
24
25
 
25
26
  #include <catch.hpp>
26
27
 
@@ -21,6 +21,8 @@
21
21
 
22
22
  #include "cpc_union.hpp"
23
23
 
24
+ #include <stdexcept>
25
+
24
26
  namespace datasketches {
25
27
 
26
28
  static const double RELATIVE_ERROR_FOR_LG_K_11 = 0.02;
@@ -46,7 +46,7 @@ template<
46
46
  typename W = uint64_t,
47
47
  typename H = std::hash<T>,
48
48
  typename E = std::equal_to<T>,
49
- typename S = serde<T>,
49
+ typename S = serde<T>, // deprecated, to be removed in the next major version
50
50
  typename A = std::allocator<T>
51
51
  >
52
52
  class frequent_items_sketch {
@@ -225,46 +225,78 @@ public:
225
225
  /**
226
226
  * Computes size needed to serialize the current state of the sketch.
227
227
  * This can be expensive since every item needs to be looked at.
228
+ * @param instance of a SerDe
228
229
  * @return size in bytes needed to serialize this sketch
229
230
  */
230
- size_t get_serialized_size_bytes() const;
231
+ template<typename SerDe = S>
232
+ size_t get_serialized_size_bytes(const SerDe& sd = SerDe()) const;
231
233
 
232
234
  /**
233
235
  * This method serializes the sketch into a given stream in a binary form
234
236
  * @param os output stream
237
+ * @param instance of a SerDe
235
238
  */
236
- void serialize(std::ostream& os) const;
239
+ template<typename SerDe = S>
240
+ void serialize(std::ostream& os, const SerDe& sd = SerDe()) const;
237
241
 
238
242
  // This is a convenience alias for users
239
243
  // The type returned by the following serialize method
240
244
  using vector_bytes = std::vector<uint8_t, typename std::allocator_traits<A>::template rebind_alloc<uint8_t>>;
241
245
 
242
-
243
246
  /**
244
247
  * This method serializes the sketch as a vector of bytes.
245
248
  * An optional header can be reserved in front of the sketch.
246
249
  * It is a blank space of a given size.
247
250
  * This header is used in Datasketches PostgreSQL extension.
248
251
  * @param header_size_bytes space to reserve in front of the sketch
252
+ * @param instance of a SerDe
249
253
  * @return serialized sketch as a vector of bytes
250
254
  */
251
- vector_bytes serialize(unsigned header_size_bytes = 0) const;
255
+ template<typename SerDe = S>
256
+ vector_bytes serialize(unsigned header_size_bytes = 0, const SerDe& sd = SerDe()) const;
252
257
 
253
258
  /**
254
259
  * This method deserializes a sketch from a given stream.
255
260
  * @param is input stream
261
+ * @param instance of an Allocator
256
262
  * @return an instance of the sketch
263
+ *
264
+ * Deprecated, to be removed in the next major version
257
265
  */
258
266
  static frequent_items_sketch deserialize(std::istream& is, const A& allocator = A());
259
267
 
268
+ /**
269
+ * This method deserializes a sketch from a given stream.
270
+ * @param is input stream
271
+ * @param instance of a SerDe
272
+ * @param instance of an Allocator
273
+ * @return an instance of the sketch
274
+ */
275
+ template<typename SerDe = S>
276
+ static frequent_items_sketch deserialize(std::istream& is, const SerDe& sd = SerDe(), const A& allocator = A());
277
+
260
278
  /**
261
279
  * This method deserializes a sketch from a given array of bytes.
262
280
  * @param bytes pointer to the array of bytes
263
281
  * @param size the size of the array
282
+ * @param instance of an Allocator
264
283
  * @return an instance of the sketch
284
+ *
285
+ * Deprecated, to be removed in the next major version
265
286
  */
266
287
  static frequent_items_sketch deserialize(const void* bytes, size_t size, const A& allocator = A());
267
288
 
289
+ /**
290
+ * This method deserializes a sketch from a given array of bytes.
291
+ * @param bytes pointer to the array of bytes
292
+ * @param size the size of the array
293
+ * @param instance of a SerDe
294
+ * @param instance of an Allocator
295
+ * @return an instance of the sketch
296
+ */
297
+ template<typename SerDe = S>
298
+ static frequent_items_sketch deserialize(const void* bytes, size_t size, const SerDe& sd = SerDe(), const A& allocator = A());
299
+
268
300
  /**
269
301
  * Returns a human readable summary of this sketch
270
302
  * @param print_items if true include the list of items retained by the sketch