datasketches 0.2.4 → 0.2.5

Sign up to get free protection for your applications and to get access to all the features.
Files changed (106) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +4 -0
  3. data/README.md +1 -1
  4. data/ext/datasketches/kll_wrapper.cpp +5 -1
  5. data/lib/datasketches/version.rb +1 -1
  6. data/vendor/datasketches-cpp/CMakeLists.txt +4 -3
  7. data/vendor/datasketches-cpp/common/CMakeLists.txt +4 -0
  8. data/vendor/datasketches-cpp/common/include/binomial_bounds.hpp +1 -0
  9. data/vendor/datasketches-cpp/common/include/common_defs.hpp +14 -0
  10. data/vendor/datasketches-cpp/{kll → common}/include/kolmogorov_smirnov.hpp +5 -3
  11. data/vendor/datasketches-cpp/{kll → common}/include/kolmogorov_smirnov_impl.hpp +13 -16
  12. data/vendor/datasketches-cpp/common/include/quantile_sketch_sorted_view.hpp +121 -0
  13. data/vendor/datasketches-cpp/common/include/quantile_sketch_sorted_view_impl.hpp +91 -0
  14. data/vendor/datasketches-cpp/common/test/test_type.hpp +2 -0
  15. data/vendor/datasketches-cpp/cpc/include/cpc_compressor_impl.hpp +1 -0
  16. data/vendor/datasketches-cpp/cpc/include/cpc_confidence.hpp +1 -0
  17. data/vendor/datasketches-cpp/cpc/include/cpc_union_impl.hpp +2 -0
  18. data/vendor/datasketches-cpp/cpc/test/cpc_sketch_test.cpp +1 -0
  19. data/vendor/datasketches-cpp/cpc/test/cpc_union_test.cpp +2 -0
  20. data/vendor/datasketches-cpp/fi/include/frequent_items_sketch.hpp +37 -5
  21. data/vendor/datasketches-cpp/fi/include/frequent_items_sketch_impl.hpp +25 -9
  22. data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_custom_type_test.cpp +2 -1
  23. data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_test.cpp +1 -0
  24. data/vendor/datasketches-cpp/hll/include/AuxHashMap-internal.hpp +2 -0
  25. data/vendor/datasketches-cpp/hll/include/CompositeInterpolationXTable-internal.hpp +1 -0
  26. data/vendor/datasketches-cpp/hll/include/CouponHashSet-internal.hpp +2 -2
  27. data/vendor/datasketches-cpp/hll/include/CouponList-internal.hpp +1 -0
  28. data/vendor/datasketches-cpp/hll/include/HllSketchImpl-internal.hpp +2 -0
  29. data/vendor/datasketches-cpp/hll/include/HllSketchImplFactory.hpp +2 -0
  30. data/vendor/datasketches-cpp/hll/test/AuxHashMapTest.cpp +1 -0
  31. data/vendor/datasketches-cpp/hll/test/CouponHashSetTest.cpp +1 -0
  32. data/vendor/datasketches-cpp/hll/test/CouponListTest.cpp +2 -0
  33. data/vendor/datasketches-cpp/hll/test/HllArrayTest.cpp +1 -0
  34. data/vendor/datasketches-cpp/hll/test/HllSketchTest.cpp +59 -0
  35. data/vendor/datasketches-cpp/hll/test/HllUnionTest.cpp +2 -0
  36. data/vendor/datasketches-cpp/hll/test/TablesTest.cpp +1 -0
  37. data/vendor/datasketches-cpp/kll/CMakeLists.txt +0 -4
  38. data/vendor/datasketches-cpp/kll/include/kll_helper.hpp +0 -4
  39. data/vendor/datasketches-cpp/kll/include/kll_helper_impl.hpp +3 -0
  40. data/vendor/datasketches-cpp/kll/include/kll_sketch.hpp +96 -42
  41. data/vendor/datasketches-cpp/kll/include/kll_sketch_impl.hpp +105 -127
  42. data/vendor/datasketches-cpp/kll/test/kll_sketch_test.cpp +94 -25
  43. data/vendor/datasketches-cpp/kll/test/kolmogorov_smirnov_test.cpp +1 -1
  44. data/vendor/datasketches-cpp/pyproject.toml +1 -1
  45. data/vendor/datasketches-cpp/python/CMakeLists.txt +8 -1
  46. data/vendor/datasketches-cpp/python/README.md +7 -0
  47. data/vendor/datasketches-cpp/python/src/datasketches.cpp +4 -0
  48. data/vendor/datasketches-cpp/python/src/fi_wrapper.cpp +6 -1
  49. data/vendor/datasketches-cpp/python/src/kll_wrapper.cpp +48 -13
  50. data/vendor/datasketches-cpp/python/src/ks_wrapper.cpp +68 -0
  51. data/vendor/datasketches-cpp/python/src/quantiles_wrapper.cpp +240 -0
  52. data/vendor/datasketches-cpp/python/src/req_wrapper.cpp +9 -2
  53. data/vendor/datasketches-cpp/python/src/vector_of_kll.cpp +1 -0
  54. data/vendor/datasketches-cpp/python/tests/kll_test.py +10 -4
  55. data/vendor/datasketches-cpp/python/tests/quantiles_test.py +126 -0
  56. data/vendor/datasketches-cpp/quantiles/CMakeLists.txt +42 -0
  57. data/vendor/datasketches-cpp/quantiles/include/quantiles_sketch.hpp +641 -0
  58. data/vendor/datasketches-cpp/quantiles/include/quantiles_sketch_impl.hpp +1309 -0
  59. data/vendor/datasketches-cpp/quantiles/test/CMakeLists.txt +44 -0
  60. data/vendor/datasketches-cpp/quantiles/test/Qk128_n1000_v0.3.0.sk +0 -0
  61. data/vendor/datasketches-cpp/quantiles/test/Qk128_n1000_v0.6.0.sk +0 -0
  62. data/vendor/datasketches-cpp/quantiles/test/Qk128_n1000_v0.8.0.sk +0 -0
  63. data/vendor/datasketches-cpp/quantiles/test/Qk128_n1000_v0.8.3.sk +0 -0
  64. data/vendor/datasketches-cpp/quantiles/test/Qk128_n50_v0.3.0.sk +0 -0
  65. data/vendor/datasketches-cpp/quantiles/test/Qk128_n50_v0.6.0.sk +0 -0
  66. data/vendor/datasketches-cpp/quantiles/test/Qk128_n50_v0.8.0.sk +0 -0
  67. data/vendor/datasketches-cpp/quantiles/test/Qk128_n50_v0.8.3.sk +0 -0
  68. data/vendor/datasketches-cpp/quantiles/test/kolmogorov_smirnov_test.cpp +110 -0
  69. data/vendor/datasketches-cpp/quantiles/test/quantiles_compatibility_test.cpp +129 -0
  70. data/vendor/datasketches-cpp/quantiles/test/quantiles_sketch_test.cpp +912 -0
  71. data/vendor/datasketches-cpp/req/CMakeLists.txt +0 -2
  72. data/vendor/datasketches-cpp/req/include/req_common.hpp +0 -5
  73. data/vendor/datasketches-cpp/req/include/req_compactor_impl.hpp +3 -2
  74. data/vendor/datasketches-cpp/req/include/req_sketch.hpp +62 -23
  75. data/vendor/datasketches-cpp/req/include/req_sketch_impl.hpp +62 -59
  76. data/vendor/datasketches-cpp/req/test/req_sketch_test.cpp +5 -0
  77. data/vendor/datasketches-cpp/sampling/include/var_opt_sketch.hpp +44 -7
  78. data/vendor/datasketches-cpp/sampling/include/var_opt_sketch_impl.hpp +31 -26
  79. data/vendor/datasketches-cpp/sampling/include/var_opt_union.hpp +41 -6
  80. data/vendor/datasketches-cpp/sampling/include/var_opt_union_impl.hpp +25 -9
  81. data/vendor/datasketches-cpp/sampling/test/var_opt_allocation_test.cpp +2 -2
  82. data/vendor/datasketches-cpp/sampling/test/var_opt_sketch_test.cpp +1 -0
  83. data/vendor/datasketches-cpp/sampling/test/var_opt_union_test.cpp +1 -0
  84. data/vendor/datasketches-cpp/setup.py +1 -1
  85. data/vendor/datasketches-cpp/theta/include/bounds_on_ratios_in_sampled_sets.hpp +1 -0
  86. data/vendor/datasketches-cpp/theta/include/compact_theta_sketch_parser_impl.hpp +8 -6
  87. data/vendor/datasketches-cpp/theta/include/theta_intersection_base_impl.hpp +1 -0
  88. data/vendor/datasketches-cpp/theta/include/theta_set_difference_base_impl.hpp +1 -0
  89. data/vendor/datasketches-cpp/theta/include/theta_sketch_impl.hpp +7 -45
  90. data/vendor/datasketches-cpp/theta/include/theta_union_base_impl.hpp +1 -0
  91. data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base_impl.hpp +1 -0
  92. data/vendor/datasketches-cpp/theta/test/theta_a_not_b_test.cpp +2 -0
  93. data/vendor/datasketches-cpp/theta/test/theta_intersection_test.cpp +2 -0
  94. data/vendor/datasketches-cpp/theta/test/theta_setop_test.cpp +1 -0
  95. data/vendor/datasketches-cpp/theta/test/theta_sketch_test.cpp +29 -1
  96. data/vendor/datasketches-cpp/theta/test/theta_union_test.cpp +2 -0
  97. data/vendor/datasketches-cpp/tuple/include/tuple_sketch.hpp +16 -0
  98. data/vendor/datasketches-cpp/tuple/include/tuple_sketch_impl.hpp +1 -0
  99. data/vendor/datasketches-cpp/tuple/test/tuple_a_not_b_test.cpp +1 -0
  100. data/vendor/datasketches-cpp/tuple/test/tuple_intersection_test.cpp +1 -0
  101. data/vendor/datasketches-cpp/tuple/test/tuple_union_test.cpp +1 -0
  102. metadata +25 -9
  103. data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator.hpp +0 -75
  104. data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator_impl.hpp +0 -184
  105. data/vendor/datasketches-cpp/req/include/req_quantile_calculator.hpp +0 -69
  106. data/vendor/datasketches-cpp/req/include/req_quantile_calculator_impl.hpp +0 -60
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 897dbc30f97ce17f0415630b6347a0092dac05196b0ef61e80939410d65cdf17
4
- data.tar.gz: 61302f9cadde8a8badc97b455eb5c32d913c3b1fea8ed571e2da93a29e65afa9
3
+ metadata.gz: 9eaa8a17efdbc591b3e56f94650e887babd30dc79d95db3a7986df0261184191
4
+ data.tar.gz: 5544326a0edf165d87373a680d8bf5b80acba2894b9048f92cbdb261fcd66d57
5
5
  SHA512:
6
- metadata.gz: 4d541ba7f96a86f3f8de44f069f6e39d51ba6f28fa5d8c8d1d99a8434a95c5fe1a26470e6b062f348808fe5c0a444134d0dc96385437b4cb946c4a92044a2a5c
7
- data.tar.gz: bc1bdacb7cbe69f9bb1382fd2ac7019bec04baf444dc963d63a594e989fd201d9eb9aadd0e463ac4efef8f7ba53915a594d8fb00f74ae295674b9024269a0406
6
+ metadata.gz: 5a28c093ecda083762367149800770f59fee8e630c0d983d3f29ed32d027fae2e2515dff243ee11bbd41f4875c7cea622f7bc5cc5d7e73176e785503ed19fc0b
7
+ data.tar.gz: 6b210f2fdca1ae3cbd4e4cbf88e284855014b5a1e1c883085dc96a057da29e370005163ce628e54351c9127b00fae4b7b33a4ca63e6f4b90e0665e93b7742a66
data/CHANGELOG.md CHANGED
@@ -1,3 +1,7 @@
1
+ ## 0.2.5 (2022-05-21)
2
+
3
+ - Updated DataSketches to 3.4.0
4
+
1
5
  ## 0.2.4 (2021-12-28)
2
6
 
3
7
  - Updated DataSketches to 3.3.0
data/README.md CHANGED
@@ -9,7 +9,7 @@
9
9
  Add this line to your application’s Gemfile:
10
10
 
11
11
  ```ruby
12
- gem 'datasketches'
12
+ gem "datasketches"
13
13
  ```
14
14
 
15
15
  ## Sketch Families
@@ -33,7 +33,11 @@ void bind_kll_sketch(Rice::Module& m, const char* name) {
33
33
  .define_method("estimation_mode?", &kll_sketch<T>::is_estimation_mode)
34
34
  .define_method("min_value", &kll_sketch<T>::get_min_value)
35
35
  .define_method("max_value", &kll_sketch<T>::get_max_value)
36
- .define_method("quantile", &kll_sketch<T>::get_quantile)
36
+ .define_method(
37
+ "quantile",
38
+ [](kll_sketch<T>& self, double fraction) {
39
+ return self.get_quantile(fraction);
40
+ })
37
41
  .define_method(
38
42
  "quantiles",
39
43
  [](kll_sketch<T>& self, Rice::Object obj) {
@@ -1,3 +1,3 @@
1
1
  module DataSketches
2
- VERSION = "0.2.4"
2
+ VERSION = "0.2.5"
3
3
  end
@@ -15,9 +15,9 @@
15
15
  # specific language governing permissions and limitations
16
16
  # under the License.
17
17
 
18
- cmake_minimum_required(VERSION 3.12.0)
18
+ cmake_minimum_required(VERSION 3.16.0)
19
19
  project(DataSketches
20
- VERSION 3.2.0
20
+ VERSION 3.4.0
21
21
  LANGUAGES CXX)
22
22
 
23
23
  include(GNUInstallDirs)
@@ -106,12 +106,13 @@ add_subdirectory(theta)
106
106
  add_subdirectory(sampling)
107
107
  add_subdirectory(tuple)
108
108
  add_subdirectory(req)
109
+ add_subdirectory(quantiles)
109
110
 
110
111
  if (WITH_PYTHON)
111
112
  add_subdirectory(python)
112
113
  endif()
113
114
 
114
- target_link_libraries(datasketches INTERFACE hll cpc kll fi theta sampling)
115
+ target_link_libraries(datasketches INTERFACE hll cpc kll fi theta sampling req quantiles)
115
116
 
116
117
  if (COVERAGE)
117
118
  find_program(LCOV_PATH NAMES "lcov")
@@ -43,4 +43,8 @@ install(FILES
43
43
  include/conditional_forward.hpp
44
44
  include/ceiling_power_of_2.hpp
45
45
  include/bounds_binomial_proportions.hpp
46
+ include/kolmogorov_smirnov.hpp
47
+ include/kolmogorov_smirnov_impl.hpp
48
+ include/quantile_sketch_sorted_view.hpp
49
+ include/quantile_sketch_sorted_view_impl.hpp
46
50
  DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/DataSketches")
@@ -22,6 +22,7 @@
22
22
 
23
23
  #include <algorithm>
24
24
  #include <cmath>
25
+ #include <stdexcept>
25
26
 
26
27
  /*
27
28
  * This class enables the estimation of error bounds given a sample set size, the sampling
@@ -24,6 +24,8 @@
24
24
  #include <string>
25
25
  #include <memory>
26
26
  #include <iostream>
27
+ #include <random>
28
+ #include <chrono>
27
29
 
28
30
  namespace datasketches {
29
31
 
@@ -34,6 +36,18 @@ enum resize_factor { X1 = 0, X2, X4, X8 };
34
36
  template<typename A> using AllocChar = typename std::allocator_traits<A>::template rebind_alloc<char>;
35
37
  template<typename A> using string = std::basic_string<char, std::char_traits<char>, AllocChar<A>>;
36
38
 
39
+ // random bit
40
+ static std::independent_bits_engine<std::mt19937, 1, uint32_t>
41
+ random_bit(static_cast<uint32_t>(std::chrono::system_clock::now().time_since_epoch().count()));
42
+
43
+ // common random declarations
44
+ namespace random_utils {
45
+ static std::random_device rd; // possibly unsafe in MinGW with GCC < 9.2
46
+ static std::mt19937_64 rand(rd());
47
+ static std::uniform_real_distribution<> next_double(0.0, 1.0);
48
+ }
49
+
50
+
37
51
  // utility function to hide unused compiler warning
38
52
  // usually has no additional cost
39
53
  template<typename T> void unused(T&&...) {}
@@ -25,7 +25,8 @@ namespace datasketches {
25
25
  class kolmogorov_smirnov {
26
26
  public:
27
27
  /**
28
- * Computes the raw delta area between two KLL quantile sketches for the Kolmogorov-Smirnov Test.
28
+ * Computes the raw delta area between two quantile sketches for the Kolmogorov-Smirnov Test.
29
+ * Will work for a type-matched pair of KLL or Quantiles sketches of the same parameterized type T.
29
30
  * @param sketch1 KLL sketch 1
30
31
  * @param sketch2 KLL sketch 2
31
32
  * @return the raw delta between two KLL quantile sketches
@@ -37,6 +38,7 @@ public:
37
38
  * Computes the adjusted delta area threshold for the Kolmogorov-Smirnov Test.
38
39
  * Adjusts the computed threshold by the error epsilons of the two given sketches.
39
40
  * See <a href="https://en.wikipedia.org/wiki/Kolmogorov-Smirnov_test">Kolmogorov–Smirnov Test</a>
41
+ * Will work for a type-matched pair of KLL or Quantiles sketches of the same parameterized type T.
40
42
  * @param sketch1 KLL sketch 1
41
43
  * @param sketch2 KLL sketch 2
42
44
  * @param p Target p-value. Typically .001 to .1, e.g., .05.
@@ -46,7 +48,8 @@ public:
46
48
  static double threshold(const Sketch& sketch1, const Sketch& sketch2, double p);
47
49
 
48
50
  /**
49
- * Performs the Kolmogorov-Smirnov Test between two KLL quantiles sketches.
51
+ * Performs the Kolmogorov-Smirnov Test between two quantile sketches.
52
+ * Will work for a type-matched pair of KLL or Quantiles sketches of the same parameterized type T.
50
53
  * Note: if the given sketches have insufficient data or if the sketch sizes are too small,
51
54
  * this will return false.
52
55
  * @param sketch1 KLL sketch 1
@@ -57,7 +60,6 @@ public:
57
60
  */
58
61
  template<typename Sketch>
59
62
  static bool test(const Sketch& sketch1, const Sketch& sketch2, double p);
60
-
61
63
  };
62
64
 
63
65
  } /* namespace datasketches */
@@ -20,39 +20,36 @@
20
20
  #ifndef KOLMOGOROV_SMIRNOV_IMPL_HPP_
21
21
  #define KOLMOGOROV_SMIRNOV_IMPL_HPP_
22
22
 
23
- namespace datasketches {
23
+ #include <cmath>
24
+ #include <algorithm>
24
25
 
25
- // type resolver
26
- template<typename T, typename C, typename S, typename A>
27
- kll_quantile_calculator<T, C, A> make_quantile_calculator(const kll_sketch<T, C, S, A>& sketch) {
28
- return kll_quantile_calculator<T, C, A>(sketch);
29
- }
26
+ namespace datasketches {
30
27
 
31
28
  template<typename Sketch>
32
29
  double kolmogorov_smirnov::delta(const Sketch& sketch1, const Sketch& sketch2) {
33
- using Comparator = typename Sketch::comparator;
34
- auto calc1 = make_quantile_calculator(sketch1);
35
- auto calc2 = make_quantile_calculator(sketch2);
36
- auto it1 = calc1.begin();
37
- auto it2 = calc2.begin();
30
+ auto comparator = sketch1.get_comparator(); // assuming the same comparator in sketch2
31
+ auto view1 = sketch1.get_sorted_view(true);
32
+ auto view2 = sketch2.get_sorted_view(true);
33
+ auto it1 = view1.begin();
34
+ auto it2 = view2.begin();
38
35
  const auto n1 = sketch1.get_n();
39
36
  const auto n2 = sketch2.get_n();
40
37
  double delta = 0;
41
- while (it1 != calc1.end() && it2 != calc2.end()) {
38
+ while (it1 != view1.end() && it2 != view2.end()) {
42
39
  const double norm_cum_wt1 = static_cast<double>((*it1).second) / n1;
43
40
  const double norm_cum_wt2 = static_cast<double>((*it2).second) / n2;
44
41
  delta = std::max(delta, std::abs(norm_cum_wt1 - norm_cum_wt2));
45
- if (Comparator()((*it1).first, (*it2).first)) {
42
+ if (comparator((*it1).first, (*it2).first)) {
46
43
  ++it1;
47
- } else if (Comparator()((*it2).first, (*it1).first)) {
44
+ } else if (comparator((*it2).first, (*it1).first)) {
48
45
  ++it2;
49
46
  } else {
50
47
  ++it1;
51
48
  ++it2;
52
49
  }
53
50
  }
54
- const double norm_cum_wt1 = it1 == calc1.end() ? 1 : static_cast<double>((*it1).second) / n1;
55
- const double norm_cum_wt2 = it2 == calc2.end() ? 1 : static_cast<double>((*it2).second) / n2;
51
+ const double norm_cum_wt1 = it1 == view1.end() ? 1 : static_cast<double>((*it1).second) / n1;
52
+ const double norm_cum_wt2 = it2 == view2.end() ? 1 : static_cast<double>((*it2).second) / n2;
56
53
  delta = std::max(delta, std::abs(norm_cum_wt1 - norm_cum_wt2));
57
54
  return delta;
58
55
  }
@@ -0,0 +1,121 @@
1
+ /*
2
+ * Licensed to the Apache Software Foundation (ASF) under one
3
+ * or more contributor license agreements. See the NOTICE file
4
+ * distributed with this work for additional information
5
+ * regarding copyright ownership. The ASF licenses this file
6
+ * to you under the Apache License, Version 2.0 (the
7
+ * "License"); you may not use this file except in compliance
8
+ * with the License. You may obtain a copy of the License at
9
+ *
10
+ * http://www.apache.org/licenses/LICENSE-2.0
11
+ *
12
+ * Unless required by applicable law or agreed to in writing,
13
+ * software distributed under the License is distributed on an
14
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15
+ * KIND, either express or implied. See the License for the
16
+ * specific language governing permissions and limitations
17
+ * under the License.
18
+ */
19
+
20
+ #ifndef QUANTILE_SKETCH_SORTED_VIEW_HPP_
21
+ #define QUANTILE_SKETCH_SORTED_VIEW_HPP_
22
+
23
+ #include <functional>
24
+
25
+ namespace datasketches {
26
+
27
+ template<
28
+ typename T,
29
+ typename Comparator, // strict weak ordering function (see C++ named requirements: Compare)
30
+ typename Allocator
31
+ >
32
+ class quantile_sketch_sorted_view {
33
+ public:
34
+ using Entry = typename std::conditional<std::is_arithmetic<T>::value, std::pair<T, uint64_t>, std::pair<const T*, uint64_t>>::type;
35
+ using AllocEntry = typename std::allocator_traits<Allocator>::template rebind_alloc<Entry>;
36
+ using Container = std::vector<Entry, AllocEntry>;
37
+
38
+ quantile_sketch_sorted_view(uint32_t num, const Allocator& allocator);
39
+
40
+ template<typename Iterator>
41
+ void add(Iterator begin, Iterator end, uint64_t weight);
42
+
43
+ template<bool inclusive>
44
+ void convert_to_cummulative();
45
+
46
+ class const_iterator;
47
+ const_iterator begin() const;
48
+ const_iterator end() const;
49
+
50
+ size_t size() const;
51
+
52
+ // makes sense only with cumulative weight
53
+ using quantile_return_type = typename std::conditional<std::is_arithmetic<T>::value, T, const T&>::type;
54
+ quantile_return_type get_quantile(double rank) const;
55
+
56
+ private:
57
+ static inline const T& deref_helper(const T* t) { return *t; }
58
+ static inline T deref_helper(T t) { return t; }
59
+
60
+ struct compare_pairs_by_first {
61
+ bool operator()(const Entry& a, const Entry& b) const {
62
+ return Comparator()(deref_helper(a.first), deref_helper(b.first));
63
+ }
64
+ };
65
+
66
+ struct compare_pairs_by_second {
67
+ bool operator()(const Entry& a, const Entry& b) const {
68
+ return a.second < b.second;
69
+ }
70
+ };
71
+
72
+ template<typename TT = T, typename std::enable_if<std::is_arithmetic<TT>::value, int>::type = 0>
73
+ static inline T ref_helper(const T& t) { return t; }
74
+
75
+ template<typename TT = T, typename std::enable_if<!std::is_arithmetic<TT>::value, int>::type = 0>
76
+ static inline const T* ref_helper(const T& t) { return std::addressof(t); }
77
+
78
+ template<typename TT = T, typename std::enable_if<std::is_arithmetic<TT>::value, int>::type = 0>
79
+ static inline Entry make_dummy_entry(uint64_t weight) { return Entry(0, weight); }
80
+
81
+ template<typename TT = T, typename std::enable_if<!std::is_arithmetic<TT>::value, int>::type = 0>
82
+ static inline Entry make_dummy_entry(uint64_t weight) { return Entry(nullptr, weight); }
83
+
84
+ uint64_t total_weight_;
85
+ Container entries_;
86
+ };
87
+
88
+ template<typename T, typename C, typename A>
89
+ class quantile_sketch_sorted_view<T, C, A>::const_iterator: public quantile_sketch_sorted_view<T, C, A>::Container::const_iterator {
90
+ public:
91
+ using Base = typename quantile_sketch_sorted_view<T, C, A>::Container::const_iterator;
92
+ using value_type = typename std::conditional<std::is_arithmetic<T>::value, typename Base::value_type, std::pair<const T&, const uint64_t>>::type;
93
+
94
+ const_iterator(const Base& it): Base(it) {}
95
+
96
+ template<typename TT = T, typename std::enable_if<std::is_arithmetic<TT>::value, int>::type = 0>
97
+ value_type operator*() const { return Base::operator*(); }
98
+
99
+ template<typename TT = T, typename std::enable_if<!std::is_arithmetic<TT>::value, int>::type = 0>
100
+ value_type operator*() const { return value_type(*(Base::operator*().first), Base::operator*().second); }
101
+
102
+ class return_value_holder {
103
+ public:
104
+ return_value_holder(value_type value): value_(value) {}
105
+ const value_type* operator->() const { return &value_; }
106
+ private:
107
+ value_type value_;
108
+ };
109
+
110
+ template<typename TT = T, typename std::enable_if<std::is_arithmetic<TT>::value, int>::type = 0>
111
+ const value_type* operator->() const { return Base::operator->(); }
112
+
113
+ template<typename TT = T, typename std::enable_if<!std::is_arithmetic<TT>::value, int>::type = 0>
114
+ return_value_holder operator->() const { return **this; }
115
+ };
116
+
117
+ } /* namespace datasketches */
118
+
119
+ #include "quantile_sketch_sorted_view_impl.hpp"
120
+
121
+ #endif
@@ -0,0 +1,91 @@
1
+ /*
2
+ * Licensed to the Apache Software Foundation (ASF) under one
3
+ * or more contributor license agreements. See the NOTICE file
4
+ * distributed with this work for additional information
5
+ * regarding copyright ownership. The ASF licenses this file
6
+ * to you under the Apache License, Version 2.0 (the
7
+ * "License"); you may not use this file except in compliance
8
+ * with the License. You may obtain a copy of the License at
9
+ *
10
+ * http://www.apache.org/licenses/LICENSE-2.0
11
+ *
12
+ * Unless required by applicable law or agreed to in writing,
13
+ * software distributed under the License is distributed on an
14
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15
+ * KIND, either express or implied. See the License for the
16
+ * specific language governing permissions and limitations
17
+ * under the License.
18
+ */
19
+
20
+ #ifndef QUANTILE_SKETCH_SORTED_VIEW_IMPL_HPP_
21
+ #define QUANTILE_SKETCH_SORTED_VIEW_IMPL_HPP_
22
+
23
+ #include <algorithm>
24
+ #include <stdexcept>
25
+
26
+ namespace datasketches {
27
+
28
+ template<typename T, typename C, typename A>
29
+ quantile_sketch_sorted_view<T, C, A>::quantile_sketch_sorted_view(uint32_t num, const A& allocator):
30
+ total_weight_(0),
31
+ entries_(allocator)
32
+ {
33
+ entries_.reserve(num);
34
+ }
35
+
36
+ template<typename T, typename C, typename A>
37
+ template<typename Iterator>
38
+ void quantile_sketch_sorted_view<T, C, A>::add(Iterator first, Iterator last, uint64_t weight) {
39
+ const size_t size_before = entries_.size();
40
+ for (auto it = first; it != last; ++it) entries_.push_back(Entry(ref_helper(*it), weight));
41
+ if (size_before > 0) {
42
+ Container tmp(entries_.get_allocator());
43
+ tmp.reserve(entries_.capacity());
44
+ std::merge(
45
+ entries_.begin(), entries_.begin() + size_before,
46
+ entries_.begin() + size_before, entries_.end(),
47
+ std::back_inserter(tmp), compare_pairs_by_first()
48
+ );
49
+ std::swap(tmp, entries_);
50
+ }
51
+ }
52
+
53
+ template<typename T, typename C, typename A>
54
+ template<bool inclusive>
55
+ void quantile_sketch_sorted_view<T, C, A>::convert_to_cummulative() {
56
+ uint64_t subtotal = 0;
57
+ for (auto& entry: entries_) {
58
+ const uint64_t new_subtotal = subtotal + entry.second;
59
+ entry.second = inclusive ? new_subtotal : subtotal;
60
+ subtotal = new_subtotal;
61
+ }
62
+ total_weight_ = subtotal;
63
+ }
64
+
65
+ template<typename T, typename C, typename A>
66
+ auto quantile_sketch_sorted_view<T, C, A>::get_quantile(double rank) const -> quantile_return_type {
67
+ if (total_weight_ == 0) throw std::invalid_argument("supported for cumulative weight only");
68
+ uint64_t weight = static_cast<uint64_t>(rank * total_weight_);
69
+ auto it = std::lower_bound(entries_.begin(), entries_.end(), make_dummy_entry<T>(weight), compare_pairs_by_second());
70
+ if (it == entries_.end()) return deref_helper(entries_[entries_.size() - 1].first);
71
+ return deref_helper(it->first);
72
+ }
73
+
74
+ template<typename T, typename C, typename A>
75
+ auto quantile_sketch_sorted_view<T, C, A>::begin() const -> const_iterator {
76
+ return entries_.begin();
77
+ }
78
+
79
+ template<typename T, typename C, typename A>
80
+ auto quantile_sketch_sorted_view<T, C, A>::end() const -> const_iterator {
81
+ return entries_.end();
82
+ }
83
+
84
+ template<typename T, typename C, typename A>
85
+ size_t quantile_sketch_sorted_view<T, C, A>::size() const {
86
+ return entries_.size();
87
+ }
88
+
89
+ } /* namespace datasketches */
90
+
91
+ #endif
@@ -20,7 +20,9 @@
20
20
  #ifndef CLASS_TEST_TYPE_HPP_
21
21
  #define CLASS_TEST_TYPE_HPP_
22
22
 
23
+ #include <cstring>
23
24
  #include <iostream>
25
+ #include "memory_operations.hpp"
24
26
 
25
27
  namespace datasketches {
26
28
 
@@ -23,6 +23,7 @@
23
23
  #define CPC_COMPRESSOR_IMPL_HPP_
24
24
 
25
25
  #include <memory>
26
+ #include <stdexcept>
26
27
 
27
28
  #include "compression_data.hpp"
28
29
  #include "cpc_util.hpp"
@@ -23,6 +23,7 @@
23
23
  #define CPC_CONFIDENCE_HPP_
24
24
 
25
25
  #include <cmath>
26
+ #include <stdexcept>
26
27
 
27
28
  #include "cpc_sketch.hpp"
28
29
 
@@ -22,6 +22,8 @@
22
22
 
23
23
  #include "count_zeros.hpp"
24
24
 
25
+ #include <stdexcept>
26
+
25
27
  namespace datasketches {
26
28
 
27
29
  template<typename A>
@@ -21,6 +21,7 @@
21
21
  #include <cstring>
22
22
  #include <sstream>
23
23
  #include <fstream>
24
+ #include <stdexcept>
24
25
 
25
26
  #include <catch.hpp>
26
27
 
@@ -21,6 +21,8 @@
21
21
 
22
22
  #include "cpc_union.hpp"
23
23
 
24
+ #include <stdexcept>
25
+
24
26
  namespace datasketches {
25
27
 
26
28
  static const double RELATIVE_ERROR_FOR_LG_K_11 = 0.02;
@@ -46,7 +46,7 @@ template<
46
46
  typename W = uint64_t,
47
47
  typename H = std::hash<T>,
48
48
  typename E = std::equal_to<T>,
49
- typename S = serde<T>,
49
+ typename S = serde<T>, // deprecated, to be removed in the next major version
50
50
  typename A = std::allocator<T>
51
51
  >
52
52
  class frequent_items_sketch {
@@ -225,46 +225,78 @@ public:
225
225
  /**
226
226
  * Computes size needed to serialize the current state of the sketch.
227
227
  * This can be expensive since every item needs to be looked at.
228
+ * @param instance of a SerDe
228
229
  * @return size in bytes needed to serialize this sketch
229
230
  */
230
- size_t get_serialized_size_bytes() const;
231
+ template<typename SerDe = S>
232
+ size_t get_serialized_size_bytes(const SerDe& sd = SerDe()) const;
231
233
 
232
234
  /**
233
235
  * This method serializes the sketch into a given stream in a binary form
234
236
  * @param os output stream
237
+ * @param instance of a SerDe
235
238
  */
236
- void serialize(std::ostream& os) const;
239
+ template<typename SerDe = S>
240
+ void serialize(std::ostream& os, const SerDe& sd = SerDe()) const;
237
241
 
238
242
  // This is a convenience alias for users
239
243
  // The type returned by the following serialize method
240
244
  using vector_bytes = std::vector<uint8_t, typename std::allocator_traits<A>::template rebind_alloc<uint8_t>>;
241
245
 
242
-
243
246
  /**
244
247
  * This method serializes the sketch as a vector of bytes.
245
248
  * An optional header can be reserved in front of the sketch.
246
249
  * It is a blank space of a given size.
247
250
  * This header is used in Datasketches PostgreSQL extension.
248
251
  * @param header_size_bytes space to reserve in front of the sketch
252
+ * @param instance of a SerDe
249
253
  * @return serialized sketch as a vector of bytes
250
254
  */
251
- vector_bytes serialize(unsigned header_size_bytes = 0) const;
255
+ template<typename SerDe = S>
256
+ vector_bytes serialize(unsigned header_size_bytes = 0, const SerDe& sd = SerDe()) const;
252
257
 
253
258
  /**
254
259
  * This method deserializes a sketch from a given stream.
255
260
  * @param is input stream
261
+ * @param instance of an Allocator
256
262
  * @return an instance of the sketch
263
+ *
264
+ * Deprecated, to be removed in the next major version
257
265
  */
258
266
  static frequent_items_sketch deserialize(std::istream& is, const A& allocator = A());
259
267
 
268
+ /**
269
+ * This method deserializes a sketch from a given stream.
270
+ * @param is input stream
271
+ * @param instance of a SerDe
272
+ * @param instance of an Allocator
273
+ * @return an instance of the sketch
274
+ */
275
+ template<typename SerDe = S>
276
+ static frequent_items_sketch deserialize(std::istream& is, const SerDe& sd = SerDe(), const A& allocator = A());
277
+
260
278
  /**
261
279
  * This method deserializes a sketch from a given array of bytes.
262
280
  * @param bytes pointer to the array of bytes
263
281
  * @param size the size of the array
282
+ * @param instance of an Allocator
264
283
  * @return an instance of the sketch
284
+ *
285
+ * Deprecated, to be removed in the next major version
265
286
  */
266
287
  static frequent_items_sketch deserialize(const void* bytes, size_t size, const A& allocator = A());
267
288
 
289
+ /**
290
+ * This method deserializes a sketch from a given array of bytes.
291
+ * @param bytes pointer to the array of bytes
292
+ * @param size the size of the array
293
+ * @param instance of a SerDe
294
+ * @param instance of an Allocator
295
+ * @return an instance of the sketch
296
+ */
297
+ template<typename SerDe = S>
298
+ static frequent_items_sketch deserialize(const void* bytes, size_t size, const SerDe& sd = SerDe(), const A& allocator = A());
299
+
268
300
  /**
269
301
  * Returns a human readable summary of this sketch
270
302
  * @param print_items if true include the list of items retained by the sketch