datasketches 0.2.7 → 0.3.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (86) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +5 -0
  3. data/ext/datasketches/kll_wrapper.cpp +20 -20
  4. data/ext/datasketches/theta_wrapper.cpp +2 -2
  5. data/lib/datasketches/version.rb +1 -1
  6. data/vendor/datasketches-cpp/CMakeLists.txt +9 -1
  7. data/vendor/datasketches-cpp/MANIFEST.in +21 -2
  8. data/vendor/datasketches-cpp/common/CMakeLists.txt +5 -2
  9. data/vendor/datasketches-cpp/common/include/common_defs.hpp +10 -0
  10. data/vendor/datasketches-cpp/common/include/kolmogorov_smirnov_impl.hpp +6 -6
  11. data/vendor/datasketches-cpp/common/include/memory_operations.hpp +1 -0
  12. data/vendor/datasketches-cpp/common/include/{quantile_sketch_sorted_view.hpp → quantiles_sorted_view.hpp} +60 -25
  13. data/vendor/datasketches-cpp/common/include/quantiles_sorted_view_impl.hpp +125 -0
  14. data/vendor/datasketches-cpp/common/include/version.hpp.in +36 -0
  15. data/vendor/datasketches-cpp/common/test/CMakeLists.txt +25 -6
  16. data/vendor/datasketches-cpp/common/test/quantiles_sorted_view_test.cpp +459 -0
  17. data/vendor/datasketches-cpp/cpc/test/CMakeLists.txt +1 -1
  18. data/vendor/datasketches-cpp/fi/include/frequent_items_sketch.hpp +28 -44
  19. data/vendor/datasketches-cpp/fi/include/frequent_items_sketch_impl.hpp +70 -78
  20. data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map.hpp +11 -4
  21. data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map_impl.hpp +16 -9
  22. data/vendor/datasketches-cpp/fi/test/CMakeLists.txt +1 -1
  23. data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_custom_type_test.cpp +54 -41
  24. data/vendor/datasketches-cpp/fi/test/reverse_purge_hash_map_test.cpp +3 -3
  25. data/vendor/datasketches-cpp/hll/include/Hll4Array-internal.hpp +2 -2
  26. data/vendor/datasketches-cpp/hll/test/CMakeLists.txt +1 -1
  27. data/vendor/datasketches-cpp/kll/include/kll_helper.hpp +0 -32
  28. data/vendor/datasketches-cpp/kll/include/kll_sketch.hpp +176 -233
  29. data/vendor/datasketches-cpp/kll/include/kll_sketch_impl.hpp +337 -395
  30. data/vendor/datasketches-cpp/kll/test/CMakeLists.txt +1 -1
  31. data/vendor/datasketches-cpp/kll/test/kll_sketch_custom_type_test.cpp +26 -26
  32. data/vendor/datasketches-cpp/kll/test/kll_sketch_test.cpp +196 -232
  33. data/vendor/datasketches-cpp/kll/test/kll_sketch_validation.cpp +41 -31
  34. data/vendor/datasketches-cpp/pyproject.toml +17 -12
  35. data/vendor/datasketches-cpp/python/CMakeLists.txt +8 -1
  36. data/vendor/datasketches-cpp/python/datasketches/PySerDe.py +104 -0
  37. data/vendor/datasketches-cpp/python/datasketches/__init__.py +22 -0
  38. data/vendor/datasketches-cpp/python/include/py_serde.hpp +113 -0
  39. data/vendor/datasketches-cpp/python/jupyter/ThetaSketchNotebook.ipynb +31 -24
  40. data/vendor/datasketches-cpp/python/pybind11Path.cmd +18 -0
  41. data/vendor/datasketches-cpp/python/src/__init__.py +17 -1
  42. data/vendor/datasketches-cpp/python/src/datasketches.cpp +9 -3
  43. data/vendor/datasketches-cpp/python/src/kll_wrapper.cpp +18 -54
  44. data/vendor/datasketches-cpp/python/src/py_serde.cpp +111 -0
  45. data/vendor/datasketches-cpp/python/src/quantiles_wrapper.cpp +17 -53
  46. data/vendor/datasketches-cpp/python/src/req_wrapper.cpp +17 -55
  47. data/vendor/datasketches-cpp/python/src/vector_of_kll.cpp +62 -67
  48. data/vendor/datasketches-cpp/python/src/vo_wrapper.cpp +47 -14
  49. data/vendor/datasketches-cpp/python/tests/__init__.py +16 -0
  50. data/vendor/datasketches-cpp/python/tests/req_test.py +1 -1
  51. data/vendor/datasketches-cpp/python/tests/vo_test.py +25 -1
  52. data/vendor/datasketches-cpp/quantiles/include/quantiles_sketch.hpp +135 -180
  53. data/vendor/datasketches-cpp/quantiles/include/quantiles_sketch_impl.hpp +205 -210
  54. data/vendor/datasketches-cpp/quantiles/test/CMakeLists.txt +1 -1
  55. data/vendor/datasketches-cpp/quantiles/test/quantiles_compatibility_test.cpp +19 -18
  56. data/vendor/datasketches-cpp/quantiles/test/quantiles_sketch_test.cpp +240 -232
  57. data/vendor/datasketches-cpp/req/include/req_compactor.hpp +15 -9
  58. data/vendor/datasketches-cpp/req/include/req_compactor_impl.hpp +35 -19
  59. data/vendor/datasketches-cpp/req/include/req_sketch.hpp +126 -147
  60. data/vendor/datasketches-cpp/req/include/req_sketch_impl.hpp +265 -245
  61. data/vendor/datasketches-cpp/req/test/CMakeLists.txt +1 -1
  62. data/vendor/datasketches-cpp/req/test/req_sketch_custom_type_test.cpp +26 -26
  63. data/vendor/datasketches-cpp/req/test/req_sketch_test.cpp +116 -103
  64. data/vendor/datasketches-cpp/sampling/include/var_opt_sketch.hpp +22 -46
  65. data/vendor/datasketches-cpp/sampling/include/var_opt_sketch_impl.hpp +180 -207
  66. data/vendor/datasketches-cpp/sampling/include/var_opt_union.hpp +18 -39
  67. data/vendor/datasketches-cpp/sampling/include/var_opt_union_impl.hpp +75 -85
  68. data/vendor/datasketches-cpp/sampling/test/CMakeLists.txt +1 -1
  69. data/vendor/datasketches-cpp/sampling/test/var_opt_allocation_test.cpp +6 -6
  70. data/vendor/datasketches-cpp/sampling/test/var_opt_sketch_test.cpp +2 -2
  71. data/vendor/datasketches-cpp/sampling/test/var_opt_union_test.cpp +4 -4
  72. data/vendor/datasketches-cpp/setup.py +14 -2
  73. data/vendor/datasketches-cpp/theta/include/theta_sketch_impl.hpp +15 -25
  74. data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base.hpp +0 -9
  75. data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base_impl.hpp +5 -5
  76. data/vendor/datasketches-cpp/theta/test/CMakeLists.txt +1 -1
  77. data/vendor/datasketches-cpp/theta/test/theta_sketch_test.cpp +2 -1
  78. data/vendor/datasketches-cpp/tox.ini +26 -0
  79. data/vendor/datasketches-cpp/tuple/include/tuple_sketch.hpp +36 -12
  80. data/vendor/datasketches-cpp/tuple/include/tuple_sketch_impl.hpp +16 -4
  81. data/vendor/datasketches-cpp/tuple/test/CMakeLists.txt +2 -1
  82. data/vendor/datasketches-cpp/tuple/test/engagement_test.cpp +299 -0
  83. data/vendor/datasketches-cpp/tuple/test/tuple_sketch_test.cpp +26 -0
  84. data/vendor/datasketches-cpp/version.cfg.in +1 -0
  85. metadata +14 -5
  86. data/vendor/datasketches-cpp/common/include/quantile_sketch_sorted_view_impl.hpp +0 -91
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 4a7fef851c11ba93a002a6215f76170b973628f8b1f0a89fb89a5e655f6a421a
4
- data.tar.gz: a590d7cde413596640f8bf169db67742696d7e304097be8bc0820244ed32ebd5
3
+ metadata.gz: d8db863a37a8fa081bff6bf269666cdff6d4e8a4cf860a0fafac235858709f62
4
+ data.tar.gz: a858071aae33a8aeb5d92cec1f4fdf4cc3cb5d12b07ed629152c953674c06dff
5
5
  SHA512:
6
- metadata.gz: dea9986097b4e9e4c7aba8b6f69108dce21caa2f6fa1e8723a9bf8be2077b925507bf84bd92bc794c1831285963f8ecb8f4739797c3246a15a438a82816043d6
7
- data.tar.gz: 64f2ed1ed4656c09057892ae493a9a2c7178b3090dd84c24105b9e2f16d45f9aaed728c2bb6154e6518e018bbd240f5c2e562fa2edbee397b5deccb8b1dd11f5
6
+ metadata.gz: 03acf7acb3ecb617713d3549e289ed5829f55bd65a52c28eaf1a603cc2e9e577f7d7ffdebd230b01d259d1116ebbef06640a1a9b1d00baa8d0e970cd31357923
7
+ data.tar.gz: eb4730c27379a392b330cbdf9cf17d6d1207307e46ca2c0203b1e4eab237218115824249646f5916737af01b96b3451a373ce51c2020576af49a1da31a5070b5
data/CHANGELOG.md CHANGED
@@ -1,3 +1,8 @@
1
+ ## 0.3.0 (2022-12-08)
2
+
3
+ - Updated DataSketches to 4.0.0
4
+ - Dropped support for Ruby < 2.7
5
+
1
6
  ## 0.2.7 (2022-11-05)
2
7
 
3
8
  - Updated DataSketches to 3.5.1
@@ -26,43 +26,43 @@ namespace Rice::detail
26
26
  template<typename T>
27
27
  void bind_kll_sketch(Rice::Module& m, const char* name) {
28
28
  Rice::define_class_under<kll_sketch<T>>(m, name)
29
- .define_constructor(Rice::Constructor<kll_sketch<T>, uint16_t>(), Rice::Arg("k")=kll_sketch<T>::DEFAULT_K)
29
+ .define_constructor(Rice::Constructor<kll_sketch<T>, uint16_t>(), Rice::Arg("k")=datasketches::kll_constants::DEFAULT_K)
30
30
  .define_method("empty?", &kll_sketch<T>::is_empty)
31
31
  .define_method("n", &kll_sketch<T>::get_n)
32
32
  .define_method("num_retained", &kll_sketch<T>::get_num_retained)
33
33
  .define_method("estimation_mode?", &kll_sketch<T>::is_estimation_mode)
34
- .define_method("min_value", &kll_sketch<T>::get_min_value)
35
- .define_method("max_value", &kll_sketch<T>::get_max_value)
34
+ .define_method("min_value", &kll_sketch<T>::get_min_item)
35
+ .define_method("max_value", &kll_sketch<T>::get_max_item)
36
36
  .define_method(
37
37
  "quantile",
38
- [](kll_sketch<T>& self, double fraction) {
39
- return self.get_quantile(fraction);
40
- })
38
+ [](kll_sketch<T>& self, double rank, bool inclusive) {
39
+ return self.get_quantile(rank, inclusive);
40
+ }, Rice::Arg("rank"), Rice::Arg("inclusive")=false)
41
41
  .define_method(
42
42
  "quantiles",
43
- [](kll_sketch<T>& self, Rice::Object obj) {
43
+ [](kll_sketch<T>& self, Rice::Object obj, bool inclusive) {
44
44
  if (obj.is_a(rb_cArray)) {
45
- auto fractions = Rice::detail::From_Ruby<std::vector<double>>().convert(obj);
46
- return self.get_quantiles(&fractions[0], fractions.size());
45
+ auto ranks = Rice::detail::From_Ruby<std::vector<double>>().convert(obj);
46
+ return self.get_quantiles(&ranks[0], ranks.size(), inclusive);
47
47
  } else {
48
- return self.get_quantiles(Rice::detail::From_Ruby<size_t>().convert(obj));
48
+ return self.get_quantiles(Rice::detail::From_Ruby<size_t>().convert(obj), inclusive);
49
49
  }
50
- })
50
+ }, Rice::Arg("obj"), Rice::Arg("inclusive")=false)
51
51
  .define_method(
52
52
  "rank",
53
- [](kll_sketch<T>& self, const T item) {
54
- return self.get_rank(item);
55
- })
53
+ [](kll_sketch<T>& self, const T item, bool inclusive) {
54
+ return self.get_rank(item, inclusive);
55
+ }, Rice::Arg("item"), Rice::Arg("inclusive")=false)
56
56
  .define_method(
57
57
  "pmf",
58
- [](kll_sketch<T>& self, const std::vector<T>& split_points) {
59
- return self.get_PMF(&split_points[0], split_points.size());
60
- })
58
+ [](kll_sketch<T>& self, const std::vector<T>& split_points, bool inclusive) {
59
+ return self.get_PMF(&split_points[0], split_points.size(), inclusive);
60
+ }, Rice::Arg("split_points"), Rice::Arg("inclusive")=false)
61
61
  .define_method(
62
62
  "cdf",
63
- [](kll_sketch<T>& self, const std::vector<T>& split_points) {
64
- return self.get_CDF(&split_points[0], split_points.size());
65
- })
63
+ [](kll_sketch<T>& self, const std::vector<T>& split_points, bool inclusive) {
64
+ return self.get_CDF(&split_points[0], split_points.size(), inclusive);
65
+ }, Rice::Arg("split_points"), Rice::Arg("inclusive")=false)
66
66
  .define_method(
67
67
  "merge",
68
68
  [](kll_sketch<T>& self, const kll_sketch<T>& other) {
@@ -59,7 +59,7 @@ void init_theta(Rice::Module& m) {
59
59
  builder.set_seed(seed);
60
60
  return builder.build();
61
61
  },
62
- Arg("lg_k")=update_theta_sketch::builder::DEFAULT_LG_K, Arg("p")=1.0, Arg("seed")=DEFAULT_SEED)
62
+ Arg("lg_k")=datasketches::theta_constants::DEFAULT_LG_K, Arg("p")=1.0, Arg("seed")=DEFAULT_SEED)
63
63
  .define_method("compact", &update_theta_sketch::compact, Arg("ordered")=true)
64
64
  .define_method(
65
65
  "update",
@@ -88,7 +88,7 @@ void init_theta(Rice::Module& m) {
88
88
  builder.set_seed(seed);
89
89
  return builder.build();
90
90
  },
91
- Arg("lg_k")=update_theta_sketch::builder::DEFAULT_LG_K, Arg("p")=1.0, Arg("seed")=DEFAULT_SEED)
91
+ Arg("lg_k")=datasketches::theta_constants::DEFAULT_LG_K, Arg("p")=1.0, Arg("seed")=DEFAULT_SEED)
92
92
  .define_method("update", &theta_union::update<const theta_sketch&>)
93
93
  .define_method("result", &theta_union::get_result, Arg("ordered")=true);
94
94
 
@@ -1,3 +1,3 @@
1
1
  module DataSketches
2
- VERSION = "0.2.7"
2
+ VERSION = "0.3.0"
3
3
  end
@@ -16,10 +16,18 @@
16
16
  # under the License.
17
17
 
18
18
  cmake_minimum_required(VERSION 3.16.0)
19
+
20
+ string(TIMESTAMP DT %Y%m%d UTC)
21
+ string(TIMESTAMP HHMM %H%M UTC)
22
+ configure_file(version.cfg.in version.cfg @ONLY)
23
+ file(STRINGS ${CMAKE_CURRENT_BINARY_DIR}/version.cfg BASE_VERSION)
24
+
19
25
  project(DataSketches
20
- VERSION 3.5.1
26
+ VERSION ${BASE_VERSION}
21
27
  LANGUAGES CXX)
22
28
 
29
+ message("Configuring DataSketches version ${BASE_VERSION}")
30
+
23
31
  include(GNUInstallDirs)
24
32
  include(CMakeDependentOption)
25
33
 
@@ -1,11 +1,27 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+
1
18
  global-include CMakeLists.txt
2
19
  global-include *.cpp
3
20
  global-include *.c
4
21
  global-include *.hpp
5
22
  global-include *.h
6
23
  global-include *.bin
7
-
8
- global-exclude .git*
24
+ global-include *.in
9
25
 
10
26
  graft cmake
11
27
  graft common
@@ -18,3 +34,6 @@ graft theta
18
34
  graft tuple
19
35
  graft sampling
20
36
  graft python
37
+
38
+ # exclusions appear after including subdirectories
39
+ prune build
@@ -17,6 +17,8 @@
17
17
 
18
18
  add_library(common INTERFACE)
19
19
 
20
+ configure_file(include/version.hpp.in include/version.hpp @ONLY)
21
+
20
22
  if (BUILD_TESTS)
21
23
  add_subdirectory(test)
22
24
  endif()
@@ -32,6 +34,7 @@ target_compile_features(common INTERFACE cxx_std_11)
32
34
  install(TARGETS common EXPORT ${PROJECT_NAME})
33
35
 
34
36
  install(FILES
37
+ ${CMAKE_CURRENT_BINARY_DIR}/include/version.hpp
35
38
  include/common_defs.hpp
36
39
  include/memory_operations.hpp
37
40
  include/MurmurHash3.h
@@ -43,8 +46,8 @@ install(FILES
43
46
  include/conditional_forward.hpp
44
47
  include/ceiling_power_of_2.hpp
45
48
  include/bounds_binomial_proportions.hpp
46
- include/quantile_sketch_sorted_view.hpp
47
- include/quantile_sketch_sorted_view_impl.hpp
49
+ include/quantiles_sorted_view.hpp
50
+ include/quantiles_sorted_view_impl.hpp
48
51
  include/kolmogorov_smirnov.hpp
49
52
  include/kolmogorov_smirnov_impl.hpp
50
53
  DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/DataSketches")
@@ -86,6 +86,16 @@ static inline void write(std::ostream& os, const T* ptr, size_t size_bytes) {
86
86
  os.write(reinterpret_cast<const char*>(ptr), size_bytes);
87
87
  }
88
88
 
89
+ // wrapper for iterators to implement operator-> returning temporary value
90
+ template<typename T>
91
+ class return_value_holder {
92
+ public:
93
+ return_value_holder(T value): value_(value) {}
94
+ const T* operator->() const { return std::addressof(value_); }
95
+ private:
96
+ T value_;
97
+ };
98
+
89
99
  } // namespace
90
100
 
91
101
  #endif // _COMMON_DEFS_HPP_
@@ -28,16 +28,16 @@ namespace datasketches {
28
28
  template<typename Sketch>
29
29
  double kolmogorov_smirnov::delta(const Sketch& sketch1, const Sketch& sketch2) {
30
30
  auto comparator = sketch1.get_comparator(); // assuming the same comparator in sketch2
31
- auto view1 = sketch1.get_sorted_view(true);
32
- auto view2 = sketch2.get_sorted_view(true);
31
+ auto view1 = sketch1.get_sorted_view();
32
+ auto view2 = sketch2.get_sorted_view();
33
33
  auto it1 = view1.begin();
34
34
  auto it2 = view2.begin();
35
35
  const auto n1 = sketch1.get_n();
36
36
  const auto n2 = sketch2.get_n();
37
37
  double delta = 0;
38
38
  while (it1 != view1.end() && it2 != view2.end()) {
39
- const double norm_cum_wt1 = static_cast<double>((*it1).second) / n1;
40
- const double norm_cum_wt2 = static_cast<double>((*it2).second) / n2;
39
+ const double norm_cum_wt1 = static_cast<double>(it1.get_cumulative_weight(false)) / n1;
40
+ const double norm_cum_wt2 = static_cast<double>(it2.get_cumulative_weight(false)) / n2;
41
41
  delta = std::max(delta, std::abs(norm_cum_wt1 - norm_cum_wt2));
42
42
  if (comparator((*it1).first, (*it2).first)) {
43
43
  ++it1;
@@ -48,8 +48,8 @@ double kolmogorov_smirnov::delta(const Sketch& sketch1, const Sketch& sketch2) {
48
48
  ++it2;
49
49
  }
50
50
  }
51
- const double norm_cum_wt1 = it1 == view1.end() ? 1 : static_cast<double>((*it1).second) / n1;
52
- const double norm_cum_wt2 = it2 == view2.end() ? 1 : static_cast<double>((*it2).second) / n2;
51
+ const double norm_cum_wt1 = it1 == view1.end() ? 1 : static_cast<double>(it1.get_cumulative_weight(false)) / n1;
52
+ const double norm_cum_wt2 = it2 == view2.end() ? 1 : static_cast<double>(it2.get_cumulative_weight(false)) / n2;
53
53
  delta = std::max(delta, std::abs(norm_cum_wt1 - norm_cum_wt2));
54
54
  return delta;
55
55
  }
@@ -23,6 +23,7 @@
23
23
  #include <memory>
24
24
  #include <exception>
25
25
  #include <iostream>
26
+ #include <string>
26
27
 
27
28
  namespace datasketches {
28
29
 
@@ -17,10 +17,13 @@
17
17
  * under the License.
18
18
  */
19
19
 
20
- #ifndef QUANTILE_SKETCH_SORTED_VIEW_HPP_
21
- #define QUANTILE_SKETCH_SORTED_VIEW_HPP_
20
+ #ifndef QUANTILES_SORTED_VIEW_HPP_
21
+ #define QUANTILES_SORTED_VIEW_HPP_
22
22
 
23
23
  #include <functional>
24
+ #include <cmath>
25
+
26
+ #include "common_defs.hpp"
24
27
 
25
28
  namespace datasketches {
26
29
 
@@ -29,18 +32,17 @@ template<
29
32
  typename Comparator, // strict weak ordering function (see C++ named requirements: Compare)
30
33
  typename Allocator
31
34
  >
32
- class quantile_sketch_sorted_view {
35
+ class quantiles_sorted_view {
33
36
  public:
34
37
  using Entry = typename std::conditional<std::is_arithmetic<T>::value, std::pair<T, uint64_t>, std::pair<const T*, uint64_t>>::type;
35
38
  using AllocEntry = typename std::allocator_traits<Allocator>::template rebind_alloc<Entry>;
36
39
  using Container = std::vector<Entry, AllocEntry>;
37
40
 
38
- quantile_sketch_sorted_view(uint32_t num, const Allocator& allocator);
41
+ quantiles_sorted_view(uint32_t num, const Comparator& comparator, const Allocator& allocator);
39
42
 
40
43
  template<typename Iterator>
41
44
  void add(Iterator begin, Iterator end, uint64_t weight);
42
45
 
43
- template<bool inclusive>
44
46
  void convert_to_cummulative();
45
47
 
46
48
  class const_iterator;
@@ -49,18 +51,29 @@ public:
49
51
 
50
52
  size_t size() const;
51
53
 
52
- // makes sense only with cumulative weight
54
+ double get_rank(const T& item, bool inclusive = true) const;
55
+
53
56
  using quantile_return_type = typename std::conditional<std::is_arithmetic<T>::value, T, const T&>::type;
54
- quantile_return_type get_quantile(double rank) const;
57
+ quantile_return_type get_quantile(double rank, bool inclusive = true) const;
58
+
59
+ using vector_double = std::vector<double, typename std::allocator_traits<Allocator>::template rebind_alloc<double>>;
60
+ vector_double get_CDF(const T* split_points, uint32_t size, bool inclusive = true) const;
61
+ vector_double get_PMF(const T* split_points, uint32_t size, bool inclusive = true) const;
55
62
 
56
63
  private:
64
+ Comparator comparator_;
65
+ uint64_t total_weight_;
66
+ Container entries_;
67
+
57
68
  static inline const T& deref_helper(const T* t) { return *t; }
58
69
  static inline T deref_helper(T t) { return t; }
59
70
 
60
71
  struct compare_pairs_by_first {
72
+ explicit compare_pairs_by_first(const Comparator& comparator): comparator_(comparator) {}
61
73
  bool operator()(const Entry& a, const Entry& b) const {
62
- return Comparator()(deref_helper(a.first), deref_helper(b.first));
74
+ return comparator_(deref_helper(a.first), deref_helper(b.first));
63
75
  }
76
+ Comparator comparator_;
64
77
  };
65
78
 
66
79
  struct compare_pairs_by_second {
@@ -81,41 +94,63 @@ private:
81
94
  template<typename TT = T, typename std::enable_if<!std::is_arithmetic<TT>::value, int>::type = 0>
82
95
  static inline Entry make_dummy_entry(uint64_t weight) { return Entry(nullptr, weight); }
83
96
 
84
- uint64_t total_weight_;
85
- Container entries_;
97
+ template<typename TT = T, typename std::enable_if<std::is_floating_point<TT>::value, int>::type = 0>
98
+ static inline void check_split_points(const T* items, uint32_t size) {
99
+ for (uint32_t i = 0; i < size ; i++) {
100
+ if (std::isnan(items[i])) {
101
+ throw std::invalid_argument("Values must not be NaN");
102
+ }
103
+ if ((i < (size - 1)) && !(Comparator()(items[i], items[i + 1]))) {
104
+ throw std::invalid_argument("Values must be unique and monotonically increasing");
105
+ }
106
+ }
107
+ }
108
+
109
+ template<typename TT = T, typename std::enable_if<!std::is_floating_point<TT>::value, int>::type = 0>
110
+ static inline void check_split_points(const T* items, uint32_t size) {
111
+ for (uint32_t i = 0; i < size ; i++) {
112
+ if ((i < (size - 1)) && !(Comparator()(items[i], items[i + 1]))) {
113
+ throw std::invalid_argument("Items must be unique and monotonically increasing");
114
+ }
115
+ }
116
+ }
86
117
  };
87
118
 
88
119
  template<typename T, typename C, typename A>
89
- class quantile_sketch_sorted_view<T, C, A>::const_iterator: public quantile_sketch_sorted_view<T, C, A>::Container::const_iterator {
120
+ class quantiles_sorted_view<T, C, A>::const_iterator: public quantiles_sorted_view<T, C, A>::Container::const_iterator {
90
121
  public:
91
- using Base = typename quantile_sketch_sorted_view<T, C, A>::Container::const_iterator;
122
+ using Base = typename quantiles_sorted_view<T, C, A>::Container::const_iterator;
92
123
  using value_type = typename std::conditional<std::is_arithmetic<T>::value, typename Base::value_type, std::pair<const T&, const uint64_t>>::type;
93
124
 
94
- const_iterator(const Base& it): Base(it) {}
125
+ const_iterator(const Base& it, const Base& begin): Base(it), begin(begin) {}
95
126
 
96
127
  template<typename TT = T, typename std::enable_if<std::is_arithmetic<TT>::value, int>::type = 0>
97
- value_type operator*() const { return Base::operator*(); }
128
+ const value_type operator*() const { return Base::operator*(); }
98
129
 
99
130
  template<typename TT = T, typename std::enable_if<!std::is_arithmetic<TT>::value, int>::type = 0>
100
- value_type operator*() const { return value_type(*(Base::operator*().first), Base::operator*().second); }
101
-
102
- class return_value_holder {
103
- public:
104
- return_value_holder(value_type value): value_(value) {}
105
- const value_type* operator->() const { return &value_; }
106
- private:
107
- value_type value_;
108
- };
131
+ const value_type operator*() const { return value_type(*(Base::operator*().first), Base::operator*().second); }
109
132
 
110
133
  template<typename TT = T, typename std::enable_if<std::is_arithmetic<TT>::value, int>::type = 0>
111
134
  const value_type* operator->() const { return Base::operator->(); }
112
135
 
113
136
  template<typename TT = T, typename std::enable_if<!std::is_arithmetic<TT>::value, int>::type = 0>
114
- return_value_holder operator->() const { return **this; }
137
+ const return_value_holder<value_type> operator->() const { return **this; }
138
+
139
+ uint64_t get_weight() const {
140
+ if (*this == begin) return Base::operator*().second;
141
+ return Base::operator*().second - (*this - 1).operator*().second;
142
+ }
143
+
144
+ uint64_t get_cumulative_weight(bool inclusive = true) const {
145
+ return inclusive ? Base::operator*().second : Base::operator*().second - get_weight();
146
+ }
147
+
148
+ private:
149
+ Base begin;
115
150
  };
116
151
 
117
152
  } /* namespace datasketches */
118
153
 
119
- #include "quantile_sketch_sorted_view_impl.hpp"
154
+ #include "quantiles_sorted_view_impl.hpp"
120
155
 
121
156
  #endif
@@ -0,0 +1,125 @@
1
+ /*
2
+ * Licensed to the Apache Software Foundation (ASF) under one
3
+ * or more contributor license agreements. See the NOTICE file
4
+ * distributed with this work for additional information
5
+ * regarding copyright ownership. The ASF licenses this file
6
+ * to you under the Apache License, Version 2.0 (the
7
+ * "License"); you may not use this file except in compliance
8
+ * with the License. You may obtain a copy of the License at
9
+ *
10
+ * http://www.apache.org/licenses/LICENSE-2.0
11
+ *
12
+ * Unless required by applicable law or agreed to in writing,
13
+ * software distributed under the License is distributed on an
14
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15
+ * KIND, either express or implied. See the License for the
16
+ * specific language governing permissions and limitations
17
+ * under the License.
18
+ */
19
+
20
+ #ifndef QUANTILES_SORTED_VIEW_IMPL_HPP_
21
+ #define QUANTILES_SORTED_VIEW_IMPL_HPP_
22
+
23
+ #include <algorithm>
24
+ #include <stdexcept>
25
+ #include <cmath>
26
+
27
+ namespace datasketches {
28
+
29
+ template<typename T, typename C, typename A>
30
+ quantiles_sorted_view<T, C, A>::quantiles_sorted_view(uint32_t num, const C& comparator, const A& allocator):
31
+ comparator_(comparator),
32
+ total_weight_(0),
33
+ entries_(allocator)
34
+ {
35
+ entries_.reserve(num);
36
+ }
37
+
38
+ template<typename T, typename C, typename A>
39
+ template<typename Iterator>
40
+ void quantiles_sorted_view<T, C, A>::add(Iterator first, Iterator last, uint64_t weight) {
41
+ const size_t size_before = entries_.size();
42
+ for (auto it = first; it != last; ++it) entries_.push_back(Entry(ref_helper(*it), weight));
43
+ if (size_before > 0) {
44
+ Container tmp(entries_.get_allocator());
45
+ tmp.reserve(entries_.capacity());
46
+ std::merge(
47
+ entries_.begin(), entries_.begin() + size_before,
48
+ entries_.begin() + size_before, entries_.end(),
49
+ std::back_inserter(tmp), compare_pairs_by_first(comparator_)
50
+ );
51
+ std::swap(tmp, entries_);
52
+ }
53
+ }
54
+
55
+ template<typename T, typename C, typename A>
56
+ void quantiles_sorted_view<T, C, A>::convert_to_cummulative() {
57
+ for (auto& entry: entries_) {
58
+ total_weight_ += entry.second;
59
+ entry.second = total_weight_;
60
+ }
61
+ }
62
+
63
+ template<typename T, typename C, typename A>
64
+ double quantiles_sorted_view<T, C, A>::get_rank(const T& item, bool inclusive) const {
65
+ if (entries_.empty()) throw std::runtime_error("operation is undefined for an empty sketch");
66
+ auto it = inclusive ?
67
+ std::upper_bound(entries_.begin(), entries_.end(), Entry(ref_helper(item), 0), compare_pairs_by_first(comparator_))
68
+ : std::lower_bound(entries_.begin(), entries_.end(), Entry(ref_helper(item), 0), compare_pairs_by_first(comparator_));
69
+ // we need item just before
70
+ if (it == entries_.begin()) return 0;
71
+ --it;
72
+ return static_cast<double>(it->second) / total_weight_;
73
+ }
74
+
75
+ template<typename T, typename C, typename A>
76
+ auto quantiles_sorted_view<T, C, A>::get_quantile(double rank, bool inclusive) const -> quantile_return_type {
77
+ if (entries_.empty()) throw std::runtime_error("operation is undefined for an empty sketch");
78
+ uint64_t weight = inclusive ? std::ceil(rank * total_weight_) : rank * total_weight_;
79
+ auto it = inclusive ?
80
+ std::lower_bound(entries_.begin(), entries_.end(), make_dummy_entry<T>(weight), compare_pairs_by_second())
81
+ : std::upper_bound(entries_.begin(), entries_.end(), make_dummy_entry<T>(weight), compare_pairs_by_second());
82
+ if (it == entries_.end()) return deref_helper(entries_[entries_.size() - 1].first);
83
+ return deref_helper(it->first);
84
+ }
85
+
86
+ template<typename T, typename C, typename A>
87
+ auto quantiles_sorted_view<T, C, A>::get_CDF(const T* split_points, uint32_t size, bool inclusive) const -> vector_double {
88
+ if (entries_.empty()) throw std::runtime_error("operation is undefined for an empty sketch");
89
+ vector_double buckets(entries_.get_allocator());
90
+ if (entries_.size() == 0) return buckets;
91
+ check_split_points(split_points, size);
92
+ buckets.reserve(size + 1);
93
+ for (uint32_t i = 0; i < size; ++i) buckets.push_back(get_rank(split_points[i], inclusive));
94
+ buckets.push_back(1);
95
+ return buckets;
96
+ }
97
+
98
+ template<typename T, typename C, typename A>
99
+ auto quantiles_sorted_view<T, C, A>::get_PMF(const T* split_points, uint32_t size, bool inclusive) const -> vector_double {
100
+ auto buckets = get_CDF(split_points, size, inclusive);
101
+ if (buckets.size() == 0) return buckets;
102
+ for (uint32_t i = size; i > 0; --i) {
103
+ buckets[i] -= buckets[i - 1];
104
+ }
105
+ return buckets;
106
+ }
107
+
108
+ template<typename T, typename C, typename A>
109
+ auto quantiles_sorted_view<T, C, A>::begin() const -> const_iterator {
110
+ return const_iterator(entries_.begin(), entries_.begin());
111
+ }
112
+
113
+ template<typename T, typename C, typename A>
114
+ auto quantiles_sorted_view<T, C, A>::end() const -> const_iterator {
115
+ return const_iterator(entries_.end(), entries_.begin());
116
+ }
117
+
118
+ template<typename T, typename C, typename A>
119
+ size_t quantiles_sorted_view<T, C, A>::size() const {
120
+ return entries_.size();
121
+ }
122
+
123
+ } /* namespace datasketches */
124
+
125
+ #endif
@@ -0,0 +1,36 @@
1
+ /*
2
+ * Licensed to the Apache Software Foundation (ASF) under one
3
+ * or more contributor license agreements. See the NOTICE file
4
+ * distributed with this work for additional information
5
+ * regarding copyright ownership. The ASF licenses this file
6
+ * to you under the Apache License, Version 2.0 (the
7
+ * "License"); you may not use this file except in compliance
8
+ * with the License. You may obtain a copy of the License at
9
+ *
10
+ * http://www.apache.org/licenses/LICENSE-2.0
11
+ *
12
+ * Unless required by applicable law or agreed to in writing,
13
+ * software distributed under the License is distributed on an
14
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15
+ * KIND, either express or implied. See the License for the
16
+ * specific language governing permissions and limitations
17
+ * under the License.
18
+ */
19
+
20
+ #ifndef _VERSION_HPP_
21
+ #define _VERSION_HPP_
22
+
23
+ namespace datasketches {
24
+
25
+ // the configured options and settings for DataSketches
26
+ constexpr int VERSION_MAJOR {@DataSketches_VERSION_MAJOR@};
27
+ constexpr int VERSION_MINOR {@DataSketches_VERSION_MINOR@};
28
+ constexpr int VERSION_PATCH {@DataSketches_VERSION_PATCH@};
29
+ constexpr int VERSION_TWEAK {@DataSketches_VERSION_TWEAK@};
30
+
31
+ constexpr auto VERSION_STR = "@DataSketches_VERSION@";
32
+ constexpr auto SOURCE_URL = "https://github.com/apache/datasketches-cpp";
33
+
34
+ }
35
+
36
+ #endif // _VERSION_HPP_
@@ -19,7 +19,7 @@
19
19
  # and an integration test using the other parts of the library.
20
20
 
21
21
  # common dependencies for tests
22
- add_library(common_test OBJECT "")
22
+ add_library(common_test_lib OBJECT "")
23
23
 
24
24
  include(FetchContent)
25
25
 
@@ -31,19 +31,19 @@ FetchContent_Declare(
31
31
 
32
32
  FetchContent_MakeAvailable(Catch2)
33
33
 
34
- target_link_libraries(common_test PUBLIC Catch2::Catch2)
34
+ target_link_libraries(common_test_lib PUBLIC Catch2::Catch2)
35
35
 
36
- set_target_properties(common_test PROPERTIES
36
+ set_target_properties(common_test_lib PROPERTIES
37
37
  CXX_STANDARD 11
38
38
  CXX_STANDARD_REQUIRED YES
39
39
  )
40
40
 
41
- target_include_directories(common_test
41
+ target_include_directories(common_test_lib
42
42
  INTERFACE
43
43
  ${CMAKE_CURRENT_SOURCE_DIR}
44
44
  )
45
45
 
46
- target_sources(common_test
46
+ target_sources(common_test_lib
47
47
  INTERFACE
48
48
  ${CMAKE_CURRENT_SOURCE_DIR}/test_allocator.hpp
49
49
  ${CMAKE_CURRENT_SOURCE_DIR}/test_type.hpp
@@ -52,10 +52,29 @@ target_sources(common_test
52
52
  ${CMAKE_CURRENT_SOURCE_DIR}/test_allocator.cpp
53
53
  )
54
54
 
55
+ add_executable(common_test)
56
+
57
+ target_link_libraries(common_test common common_test_lib)
58
+
59
+ set_target_properties(common_test PROPERTIES
60
+ CXX_STANDARD 11
61
+ CXX_STANDARD_REQUIRED YES
62
+ )
63
+
64
+ add_test(
65
+ NAME common_test
66
+ COMMAND common_test
67
+ )
68
+
69
+ target_sources(common_test
70
+ PRIVATE
71
+ quantiles_sorted_view_test.cpp
72
+ )
73
+
55
74
  # now the integration test part
56
75
  add_executable(integration_test)
57
76
 
58
- target_link_libraries(integration_test cpc fi hll kll req sampling theta tuple common_test)
77
+ target_link_libraries(integration_test cpc fi hll kll req sampling theta tuple common_test_lib)
59
78
 
60
79
  set_target_properties(integration_test PROPERTIES
61
80
  CXX_STANDARD 11