datasketches 0.2.7 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (86) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +5 -0
  3. data/ext/datasketches/kll_wrapper.cpp +20 -20
  4. data/ext/datasketches/theta_wrapper.cpp +2 -2
  5. data/lib/datasketches/version.rb +1 -1
  6. data/vendor/datasketches-cpp/CMakeLists.txt +9 -1
  7. data/vendor/datasketches-cpp/MANIFEST.in +21 -2
  8. data/vendor/datasketches-cpp/common/CMakeLists.txt +5 -2
  9. data/vendor/datasketches-cpp/common/include/common_defs.hpp +10 -0
  10. data/vendor/datasketches-cpp/common/include/kolmogorov_smirnov_impl.hpp +6 -6
  11. data/vendor/datasketches-cpp/common/include/memory_operations.hpp +1 -0
  12. data/vendor/datasketches-cpp/common/include/{quantile_sketch_sorted_view.hpp → quantiles_sorted_view.hpp} +60 -25
  13. data/vendor/datasketches-cpp/common/include/quantiles_sorted_view_impl.hpp +125 -0
  14. data/vendor/datasketches-cpp/common/include/version.hpp.in +36 -0
  15. data/vendor/datasketches-cpp/common/test/CMakeLists.txt +25 -6
  16. data/vendor/datasketches-cpp/common/test/quantiles_sorted_view_test.cpp +459 -0
  17. data/vendor/datasketches-cpp/cpc/test/CMakeLists.txt +1 -1
  18. data/vendor/datasketches-cpp/fi/include/frequent_items_sketch.hpp +28 -44
  19. data/vendor/datasketches-cpp/fi/include/frequent_items_sketch_impl.hpp +70 -78
  20. data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map.hpp +11 -4
  21. data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map_impl.hpp +16 -9
  22. data/vendor/datasketches-cpp/fi/test/CMakeLists.txt +1 -1
  23. data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_custom_type_test.cpp +54 -41
  24. data/vendor/datasketches-cpp/fi/test/reverse_purge_hash_map_test.cpp +3 -3
  25. data/vendor/datasketches-cpp/hll/include/Hll4Array-internal.hpp +2 -2
  26. data/vendor/datasketches-cpp/hll/test/CMakeLists.txt +1 -1
  27. data/vendor/datasketches-cpp/kll/include/kll_helper.hpp +0 -32
  28. data/vendor/datasketches-cpp/kll/include/kll_sketch.hpp +176 -233
  29. data/vendor/datasketches-cpp/kll/include/kll_sketch_impl.hpp +337 -395
  30. data/vendor/datasketches-cpp/kll/test/CMakeLists.txt +1 -1
  31. data/vendor/datasketches-cpp/kll/test/kll_sketch_custom_type_test.cpp +26 -26
  32. data/vendor/datasketches-cpp/kll/test/kll_sketch_test.cpp +196 -232
  33. data/vendor/datasketches-cpp/kll/test/kll_sketch_validation.cpp +41 -31
  34. data/vendor/datasketches-cpp/pyproject.toml +17 -12
  35. data/vendor/datasketches-cpp/python/CMakeLists.txt +8 -1
  36. data/vendor/datasketches-cpp/python/datasketches/PySerDe.py +104 -0
  37. data/vendor/datasketches-cpp/python/datasketches/__init__.py +22 -0
  38. data/vendor/datasketches-cpp/python/include/py_serde.hpp +113 -0
  39. data/vendor/datasketches-cpp/python/jupyter/ThetaSketchNotebook.ipynb +31 -24
  40. data/vendor/datasketches-cpp/python/pybind11Path.cmd +18 -0
  41. data/vendor/datasketches-cpp/python/src/__init__.py +17 -1
  42. data/vendor/datasketches-cpp/python/src/datasketches.cpp +9 -3
  43. data/vendor/datasketches-cpp/python/src/kll_wrapper.cpp +18 -54
  44. data/vendor/datasketches-cpp/python/src/py_serde.cpp +111 -0
  45. data/vendor/datasketches-cpp/python/src/quantiles_wrapper.cpp +17 -53
  46. data/vendor/datasketches-cpp/python/src/req_wrapper.cpp +17 -55
  47. data/vendor/datasketches-cpp/python/src/vector_of_kll.cpp +62 -67
  48. data/vendor/datasketches-cpp/python/src/vo_wrapper.cpp +47 -14
  49. data/vendor/datasketches-cpp/python/tests/__init__.py +16 -0
  50. data/vendor/datasketches-cpp/python/tests/req_test.py +1 -1
  51. data/vendor/datasketches-cpp/python/tests/vo_test.py +25 -1
  52. data/vendor/datasketches-cpp/quantiles/include/quantiles_sketch.hpp +135 -180
  53. data/vendor/datasketches-cpp/quantiles/include/quantiles_sketch_impl.hpp +205 -210
  54. data/vendor/datasketches-cpp/quantiles/test/CMakeLists.txt +1 -1
  55. data/vendor/datasketches-cpp/quantiles/test/quantiles_compatibility_test.cpp +19 -18
  56. data/vendor/datasketches-cpp/quantiles/test/quantiles_sketch_test.cpp +240 -232
  57. data/vendor/datasketches-cpp/req/include/req_compactor.hpp +15 -9
  58. data/vendor/datasketches-cpp/req/include/req_compactor_impl.hpp +35 -19
  59. data/vendor/datasketches-cpp/req/include/req_sketch.hpp +126 -147
  60. data/vendor/datasketches-cpp/req/include/req_sketch_impl.hpp +265 -245
  61. data/vendor/datasketches-cpp/req/test/CMakeLists.txt +1 -1
  62. data/vendor/datasketches-cpp/req/test/req_sketch_custom_type_test.cpp +26 -26
  63. data/vendor/datasketches-cpp/req/test/req_sketch_test.cpp +116 -103
  64. data/vendor/datasketches-cpp/sampling/include/var_opt_sketch.hpp +22 -46
  65. data/vendor/datasketches-cpp/sampling/include/var_opt_sketch_impl.hpp +180 -207
  66. data/vendor/datasketches-cpp/sampling/include/var_opt_union.hpp +18 -39
  67. data/vendor/datasketches-cpp/sampling/include/var_opt_union_impl.hpp +75 -85
  68. data/vendor/datasketches-cpp/sampling/test/CMakeLists.txt +1 -1
  69. data/vendor/datasketches-cpp/sampling/test/var_opt_allocation_test.cpp +6 -6
  70. data/vendor/datasketches-cpp/sampling/test/var_opt_sketch_test.cpp +2 -2
  71. data/vendor/datasketches-cpp/sampling/test/var_opt_union_test.cpp +4 -4
  72. data/vendor/datasketches-cpp/setup.py +14 -2
  73. data/vendor/datasketches-cpp/theta/include/theta_sketch_impl.hpp +15 -25
  74. data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base.hpp +0 -9
  75. data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base_impl.hpp +5 -5
  76. data/vendor/datasketches-cpp/theta/test/CMakeLists.txt +1 -1
  77. data/vendor/datasketches-cpp/theta/test/theta_sketch_test.cpp +2 -1
  78. data/vendor/datasketches-cpp/tox.ini +26 -0
  79. data/vendor/datasketches-cpp/tuple/include/tuple_sketch.hpp +36 -12
  80. data/vendor/datasketches-cpp/tuple/include/tuple_sketch_impl.hpp +16 -4
  81. data/vendor/datasketches-cpp/tuple/test/CMakeLists.txt +2 -1
  82. data/vendor/datasketches-cpp/tuple/test/engagement_test.cpp +299 -0
  83. data/vendor/datasketches-cpp/tuple/test/tuple_sketch_test.cpp +26 -0
  84. data/vendor/datasketches-cpp/version.cfg.in +1 -0
  85. metadata +14 -5
  86. data/vendor/datasketches-cpp/common/include/quantile_sketch_sorted_view_impl.hpp +0 -91
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 4a7fef851c11ba93a002a6215f76170b973628f8b1f0a89fb89a5e655f6a421a
4
- data.tar.gz: a590d7cde413596640f8bf169db67742696d7e304097be8bc0820244ed32ebd5
3
+ metadata.gz: d8db863a37a8fa081bff6bf269666cdff6d4e8a4cf860a0fafac235858709f62
4
+ data.tar.gz: a858071aae33a8aeb5d92cec1f4fdf4cc3cb5d12b07ed629152c953674c06dff
5
5
  SHA512:
6
- metadata.gz: dea9986097b4e9e4c7aba8b6f69108dce21caa2f6fa1e8723a9bf8be2077b925507bf84bd92bc794c1831285963f8ecb8f4739797c3246a15a438a82816043d6
7
- data.tar.gz: 64f2ed1ed4656c09057892ae493a9a2c7178b3090dd84c24105b9e2f16d45f9aaed728c2bb6154e6518e018bbd240f5c2e562fa2edbee397b5deccb8b1dd11f5
6
+ metadata.gz: 03acf7acb3ecb617713d3549e289ed5829f55bd65a52c28eaf1a603cc2e9e577f7d7ffdebd230b01d259d1116ebbef06640a1a9b1d00baa8d0e970cd31357923
7
+ data.tar.gz: eb4730c27379a392b330cbdf9cf17d6d1207307e46ca2c0203b1e4eab237218115824249646f5916737af01b96b3451a373ce51c2020576af49a1da31a5070b5
data/CHANGELOG.md CHANGED
@@ -1,3 +1,8 @@
1
+ ## 0.3.0 (2022-12-08)
2
+
3
+ - Updated DataSketches to 4.0.0
4
+ - Dropped support for Ruby < 2.7
5
+
1
6
  ## 0.2.7 (2022-11-05)
2
7
 
3
8
  - Updated DataSketches to 3.5.1
@@ -26,43 +26,43 @@ namespace Rice::detail
26
26
  template<typename T>
27
27
  void bind_kll_sketch(Rice::Module& m, const char* name) {
28
28
  Rice::define_class_under<kll_sketch<T>>(m, name)
29
- .define_constructor(Rice::Constructor<kll_sketch<T>, uint16_t>(), Rice::Arg("k")=kll_sketch<T>::DEFAULT_K)
29
+ .define_constructor(Rice::Constructor<kll_sketch<T>, uint16_t>(), Rice::Arg("k")=datasketches::kll_constants::DEFAULT_K)
30
30
  .define_method("empty?", &kll_sketch<T>::is_empty)
31
31
  .define_method("n", &kll_sketch<T>::get_n)
32
32
  .define_method("num_retained", &kll_sketch<T>::get_num_retained)
33
33
  .define_method("estimation_mode?", &kll_sketch<T>::is_estimation_mode)
34
- .define_method("min_value", &kll_sketch<T>::get_min_value)
35
- .define_method("max_value", &kll_sketch<T>::get_max_value)
34
+ .define_method("min_value", &kll_sketch<T>::get_min_item)
35
+ .define_method("max_value", &kll_sketch<T>::get_max_item)
36
36
  .define_method(
37
37
  "quantile",
38
- [](kll_sketch<T>& self, double fraction) {
39
- return self.get_quantile(fraction);
40
- })
38
+ [](kll_sketch<T>& self, double rank, bool inclusive) {
39
+ return self.get_quantile(rank, inclusive);
40
+ }, Rice::Arg("rank"), Rice::Arg("inclusive")=false)
41
41
  .define_method(
42
42
  "quantiles",
43
- [](kll_sketch<T>& self, Rice::Object obj) {
43
+ [](kll_sketch<T>& self, Rice::Object obj, bool inclusive) {
44
44
  if (obj.is_a(rb_cArray)) {
45
- auto fractions = Rice::detail::From_Ruby<std::vector<double>>().convert(obj);
46
- return self.get_quantiles(&fractions[0], fractions.size());
45
+ auto ranks = Rice::detail::From_Ruby<std::vector<double>>().convert(obj);
46
+ return self.get_quantiles(&ranks[0], ranks.size(), inclusive);
47
47
  } else {
48
- return self.get_quantiles(Rice::detail::From_Ruby<size_t>().convert(obj));
48
+ return self.get_quantiles(Rice::detail::From_Ruby<size_t>().convert(obj), inclusive);
49
49
  }
50
- })
50
+ }, Rice::Arg("obj"), Rice::Arg("inclusive")=false)
51
51
  .define_method(
52
52
  "rank",
53
- [](kll_sketch<T>& self, const T item) {
54
- return self.get_rank(item);
55
- })
53
+ [](kll_sketch<T>& self, const T item, bool inclusive) {
54
+ return self.get_rank(item, inclusive);
55
+ }, Rice::Arg("item"), Rice::Arg("inclusive")=false)
56
56
  .define_method(
57
57
  "pmf",
58
- [](kll_sketch<T>& self, const std::vector<T>& split_points) {
59
- return self.get_PMF(&split_points[0], split_points.size());
60
- })
58
+ [](kll_sketch<T>& self, const std::vector<T>& split_points, bool inclusive) {
59
+ return self.get_PMF(&split_points[0], split_points.size(), inclusive);
60
+ }, Rice::Arg("split_points"), Rice::Arg("inclusive")=false)
61
61
  .define_method(
62
62
  "cdf",
63
- [](kll_sketch<T>& self, const std::vector<T>& split_points) {
64
- return self.get_CDF(&split_points[0], split_points.size());
65
- })
63
+ [](kll_sketch<T>& self, const std::vector<T>& split_points, bool inclusive) {
64
+ return self.get_CDF(&split_points[0], split_points.size(), inclusive);
65
+ }, Rice::Arg("split_points"), Rice::Arg("inclusive")=false)
66
66
  .define_method(
67
67
  "merge",
68
68
  [](kll_sketch<T>& self, const kll_sketch<T>& other) {
@@ -59,7 +59,7 @@ void init_theta(Rice::Module& m) {
59
59
  builder.set_seed(seed);
60
60
  return builder.build();
61
61
  },
62
- Arg("lg_k")=update_theta_sketch::builder::DEFAULT_LG_K, Arg("p")=1.0, Arg("seed")=DEFAULT_SEED)
62
+ Arg("lg_k")=datasketches::theta_constants::DEFAULT_LG_K, Arg("p")=1.0, Arg("seed")=DEFAULT_SEED)
63
63
  .define_method("compact", &update_theta_sketch::compact, Arg("ordered")=true)
64
64
  .define_method(
65
65
  "update",
@@ -88,7 +88,7 @@ void init_theta(Rice::Module& m) {
88
88
  builder.set_seed(seed);
89
89
  return builder.build();
90
90
  },
91
- Arg("lg_k")=update_theta_sketch::builder::DEFAULT_LG_K, Arg("p")=1.0, Arg("seed")=DEFAULT_SEED)
91
+ Arg("lg_k")=datasketches::theta_constants::DEFAULT_LG_K, Arg("p")=1.0, Arg("seed")=DEFAULT_SEED)
92
92
  .define_method("update", &theta_union::update<const theta_sketch&>)
93
93
  .define_method("result", &theta_union::get_result, Arg("ordered")=true);
94
94
 
@@ -1,3 +1,3 @@
1
1
  module DataSketches
2
- VERSION = "0.2.7"
2
+ VERSION = "0.3.0"
3
3
  end
@@ -16,10 +16,18 @@
16
16
  # under the License.
17
17
 
18
18
  cmake_minimum_required(VERSION 3.16.0)
19
+
20
+ string(TIMESTAMP DT %Y%m%d UTC)
21
+ string(TIMESTAMP HHMM %H%M UTC)
22
+ configure_file(version.cfg.in version.cfg @ONLY)
23
+ file(STRINGS ${CMAKE_CURRENT_BINARY_DIR}/version.cfg BASE_VERSION)
24
+
19
25
  project(DataSketches
20
- VERSION 3.5.1
26
+ VERSION ${BASE_VERSION}
21
27
  LANGUAGES CXX)
22
28
 
29
+ message("Configuring DataSketches version ${BASE_VERSION}")
30
+
23
31
  include(GNUInstallDirs)
24
32
  include(CMakeDependentOption)
25
33
 
@@ -1,11 +1,27 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+
1
18
  global-include CMakeLists.txt
2
19
  global-include *.cpp
3
20
  global-include *.c
4
21
  global-include *.hpp
5
22
  global-include *.h
6
23
  global-include *.bin
7
-
8
- global-exclude .git*
24
+ global-include *.in
9
25
 
10
26
  graft cmake
11
27
  graft common
@@ -18,3 +34,6 @@ graft theta
18
34
  graft tuple
19
35
  graft sampling
20
36
  graft python
37
+
38
+ # exclusions appear after including subdirectories
39
+ prune build
@@ -17,6 +17,8 @@
17
17
 
18
18
  add_library(common INTERFACE)
19
19
 
20
+ configure_file(include/version.hpp.in include/version.hpp @ONLY)
21
+
20
22
  if (BUILD_TESTS)
21
23
  add_subdirectory(test)
22
24
  endif()
@@ -32,6 +34,7 @@ target_compile_features(common INTERFACE cxx_std_11)
32
34
  install(TARGETS common EXPORT ${PROJECT_NAME})
33
35
 
34
36
  install(FILES
37
+ ${CMAKE_CURRENT_BINARY_DIR}/include/version.hpp
35
38
  include/common_defs.hpp
36
39
  include/memory_operations.hpp
37
40
  include/MurmurHash3.h
@@ -43,8 +46,8 @@ install(FILES
43
46
  include/conditional_forward.hpp
44
47
  include/ceiling_power_of_2.hpp
45
48
  include/bounds_binomial_proportions.hpp
46
- include/quantile_sketch_sorted_view.hpp
47
- include/quantile_sketch_sorted_view_impl.hpp
49
+ include/quantiles_sorted_view.hpp
50
+ include/quantiles_sorted_view_impl.hpp
48
51
  include/kolmogorov_smirnov.hpp
49
52
  include/kolmogorov_smirnov_impl.hpp
50
53
  DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/DataSketches")
@@ -86,6 +86,16 @@ static inline void write(std::ostream& os, const T* ptr, size_t size_bytes) {
86
86
  os.write(reinterpret_cast<const char*>(ptr), size_bytes);
87
87
  }
88
88
 
89
+ // wrapper for iterators to implement operator-> returning temporary value
90
+ template<typename T>
91
+ class return_value_holder {
92
+ public:
93
+ return_value_holder(T value): value_(value) {}
94
+ const T* operator->() const { return std::addressof(value_); }
95
+ private:
96
+ T value_;
97
+ };
98
+
89
99
  } // namespace
90
100
 
91
101
  #endif // _COMMON_DEFS_HPP_
@@ -28,16 +28,16 @@ namespace datasketches {
28
28
  template<typename Sketch>
29
29
  double kolmogorov_smirnov::delta(const Sketch& sketch1, const Sketch& sketch2) {
30
30
  auto comparator = sketch1.get_comparator(); // assuming the same comparator in sketch2
31
- auto view1 = sketch1.get_sorted_view(true);
32
- auto view2 = sketch2.get_sorted_view(true);
31
+ auto view1 = sketch1.get_sorted_view();
32
+ auto view2 = sketch2.get_sorted_view();
33
33
  auto it1 = view1.begin();
34
34
  auto it2 = view2.begin();
35
35
  const auto n1 = sketch1.get_n();
36
36
  const auto n2 = sketch2.get_n();
37
37
  double delta = 0;
38
38
  while (it1 != view1.end() && it2 != view2.end()) {
39
- const double norm_cum_wt1 = static_cast<double>((*it1).second) / n1;
40
- const double norm_cum_wt2 = static_cast<double>((*it2).second) / n2;
39
+ const double norm_cum_wt1 = static_cast<double>(it1.get_cumulative_weight(false)) / n1;
40
+ const double norm_cum_wt2 = static_cast<double>(it2.get_cumulative_weight(false)) / n2;
41
41
  delta = std::max(delta, std::abs(norm_cum_wt1 - norm_cum_wt2));
42
42
  if (comparator((*it1).first, (*it2).first)) {
43
43
  ++it1;
@@ -48,8 +48,8 @@ double kolmogorov_smirnov::delta(const Sketch& sketch1, const Sketch& sketch2) {
48
48
  ++it2;
49
49
  }
50
50
  }
51
- const double norm_cum_wt1 = it1 == view1.end() ? 1 : static_cast<double>((*it1).second) / n1;
52
- const double norm_cum_wt2 = it2 == view2.end() ? 1 : static_cast<double>((*it2).second) / n2;
51
+ const double norm_cum_wt1 = it1 == view1.end() ? 1 : static_cast<double>(it1.get_cumulative_weight(false)) / n1;
52
+ const double norm_cum_wt2 = it2 == view2.end() ? 1 : static_cast<double>(it2.get_cumulative_weight(false)) / n2;
53
53
  delta = std::max(delta, std::abs(norm_cum_wt1 - norm_cum_wt2));
54
54
  return delta;
55
55
  }
@@ -23,6 +23,7 @@
23
23
  #include <memory>
24
24
  #include <exception>
25
25
  #include <iostream>
26
+ #include <string>
26
27
 
27
28
  namespace datasketches {
28
29
 
@@ -17,10 +17,13 @@
17
17
  * under the License.
18
18
  */
19
19
 
20
- #ifndef QUANTILE_SKETCH_SORTED_VIEW_HPP_
21
- #define QUANTILE_SKETCH_SORTED_VIEW_HPP_
20
+ #ifndef QUANTILES_SORTED_VIEW_HPP_
21
+ #define QUANTILES_SORTED_VIEW_HPP_
22
22
 
23
23
  #include <functional>
24
+ #include <cmath>
25
+
26
+ #include "common_defs.hpp"
24
27
 
25
28
  namespace datasketches {
26
29
 
@@ -29,18 +32,17 @@ template<
29
32
  typename Comparator, // strict weak ordering function (see C++ named requirements: Compare)
30
33
  typename Allocator
31
34
  >
32
- class quantile_sketch_sorted_view {
35
+ class quantiles_sorted_view {
33
36
  public:
34
37
  using Entry = typename std::conditional<std::is_arithmetic<T>::value, std::pair<T, uint64_t>, std::pair<const T*, uint64_t>>::type;
35
38
  using AllocEntry = typename std::allocator_traits<Allocator>::template rebind_alloc<Entry>;
36
39
  using Container = std::vector<Entry, AllocEntry>;
37
40
 
38
- quantile_sketch_sorted_view(uint32_t num, const Allocator& allocator);
41
+ quantiles_sorted_view(uint32_t num, const Comparator& comparator, const Allocator& allocator);
39
42
 
40
43
  template<typename Iterator>
41
44
  void add(Iterator begin, Iterator end, uint64_t weight);
42
45
 
43
- template<bool inclusive>
44
46
  void convert_to_cummulative();
45
47
 
46
48
  class const_iterator;
@@ -49,18 +51,29 @@ public:
49
51
 
50
52
  size_t size() const;
51
53
 
52
- // makes sense only with cumulative weight
54
+ double get_rank(const T& item, bool inclusive = true) const;
55
+
53
56
  using quantile_return_type = typename std::conditional<std::is_arithmetic<T>::value, T, const T&>::type;
54
- quantile_return_type get_quantile(double rank) const;
57
+ quantile_return_type get_quantile(double rank, bool inclusive = true) const;
58
+
59
+ using vector_double = std::vector<double, typename std::allocator_traits<Allocator>::template rebind_alloc<double>>;
60
+ vector_double get_CDF(const T* split_points, uint32_t size, bool inclusive = true) const;
61
+ vector_double get_PMF(const T* split_points, uint32_t size, bool inclusive = true) const;
55
62
 
56
63
  private:
64
+ Comparator comparator_;
65
+ uint64_t total_weight_;
66
+ Container entries_;
67
+
57
68
  static inline const T& deref_helper(const T* t) { return *t; }
58
69
  static inline T deref_helper(T t) { return t; }
59
70
 
60
71
  struct compare_pairs_by_first {
72
+ explicit compare_pairs_by_first(const Comparator& comparator): comparator_(comparator) {}
61
73
  bool operator()(const Entry& a, const Entry& b) const {
62
- return Comparator()(deref_helper(a.first), deref_helper(b.first));
74
+ return comparator_(deref_helper(a.first), deref_helper(b.first));
63
75
  }
76
+ Comparator comparator_;
64
77
  };
65
78
 
66
79
  struct compare_pairs_by_second {
@@ -81,41 +94,63 @@ private:
81
94
  template<typename TT = T, typename std::enable_if<!std::is_arithmetic<TT>::value, int>::type = 0>
82
95
  static inline Entry make_dummy_entry(uint64_t weight) { return Entry(nullptr, weight); }
83
96
 
84
- uint64_t total_weight_;
85
- Container entries_;
97
+ template<typename TT = T, typename std::enable_if<std::is_floating_point<TT>::value, int>::type = 0>
98
+ static inline void check_split_points(const T* items, uint32_t size) {
99
+ for (uint32_t i = 0; i < size ; i++) {
100
+ if (std::isnan(items[i])) {
101
+ throw std::invalid_argument("Values must not be NaN");
102
+ }
103
+ if ((i < (size - 1)) && !(Comparator()(items[i], items[i + 1]))) {
104
+ throw std::invalid_argument("Values must be unique and monotonically increasing");
105
+ }
106
+ }
107
+ }
108
+
109
+ template<typename TT = T, typename std::enable_if<!std::is_floating_point<TT>::value, int>::type = 0>
110
+ static inline void check_split_points(const T* items, uint32_t size) {
111
+ for (uint32_t i = 0; i < size ; i++) {
112
+ if ((i < (size - 1)) && !(Comparator()(items[i], items[i + 1]))) {
113
+ throw std::invalid_argument("Items must be unique and monotonically increasing");
114
+ }
115
+ }
116
+ }
86
117
  };
87
118
 
88
119
  template<typename T, typename C, typename A>
89
- class quantile_sketch_sorted_view<T, C, A>::const_iterator: public quantile_sketch_sorted_view<T, C, A>::Container::const_iterator {
120
+ class quantiles_sorted_view<T, C, A>::const_iterator: public quantiles_sorted_view<T, C, A>::Container::const_iterator {
90
121
  public:
91
- using Base = typename quantile_sketch_sorted_view<T, C, A>::Container::const_iterator;
122
+ using Base = typename quantiles_sorted_view<T, C, A>::Container::const_iterator;
92
123
  using value_type = typename std::conditional<std::is_arithmetic<T>::value, typename Base::value_type, std::pair<const T&, const uint64_t>>::type;
93
124
 
94
- const_iterator(const Base& it): Base(it) {}
125
+ const_iterator(const Base& it, const Base& begin): Base(it), begin(begin) {}
95
126
 
96
127
  template<typename TT = T, typename std::enable_if<std::is_arithmetic<TT>::value, int>::type = 0>
97
- value_type operator*() const { return Base::operator*(); }
128
+ const value_type operator*() const { return Base::operator*(); }
98
129
 
99
130
  template<typename TT = T, typename std::enable_if<!std::is_arithmetic<TT>::value, int>::type = 0>
100
- value_type operator*() const { return value_type(*(Base::operator*().first), Base::operator*().second); }
101
-
102
- class return_value_holder {
103
- public:
104
- return_value_holder(value_type value): value_(value) {}
105
- const value_type* operator->() const { return &value_; }
106
- private:
107
- value_type value_;
108
- };
131
+ const value_type operator*() const { return value_type(*(Base::operator*().first), Base::operator*().second); }
109
132
 
110
133
  template<typename TT = T, typename std::enable_if<std::is_arithmetic<TT>::value, int>::type = 0>
111
134
  const value_type* operator->() const { return Base::operator->(); }
112
135
 
113
136
  template<typename TT = T, typename std::enable_if<!std::is_arithmetic<TT>::value, int>::type = 0>
114
- return_value_holder operator->() const { return **this; }
137
+ const return_value_holder<value_type> operator->() const { return **this; }
138
+
139
+ uint64_t get_weight() const {
140
+ if (*this == begin) return Base::operator*().second;
141
+ return Base::operator*().second - (*this - 1).operator*().second;
142
+ }
143
+
144
+ uint64_t get_cumulative_weight(bool inclusive = true) const {
145
+ return inclusive ? Base::operator*().second : Base::operator*().second - get_weight();
146
+ }
147
+
148
+ private:
149
+ Base begin;
115
150
  };
116
151
 
117
152
  } /* namespace datasketches */
118
153
 
119
- #include "quantile_sketch_sorted_view_impl.hpp"
154
+ #include "quantiles_sorted_view_impl.hpp"
120
155
 
121
156
  #endif
@@ -0,0 +1,125 @@
1
+ /*
2
+ * Licensed to the Apache Software Foundation (ASF) under one
3
+ * or more contributor license agreements. See the NOTICE file
4
+ * distributed with this work for additional information
5
+ * regarding copyright ownership. The ASF licenses this file
6
+ * to you under the Apache License, Version 2.0 (the
7
+ * "License"); you may not use this file except in compliance
8
+ * with the License. You may obtain a copy of the License at
9
+ *
10
+ * http://www.apache.org/licenses/LICENSE-2.0
11
+ *
12
+ * Unless required by applicable law or agreed to in writing,
13
+ * software distributed under the License is distributed on an
14
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15
+ * KIND, either express or implied. See the License for the
16
+ * specific language governing permissions and limitations
17
+ * under the License.
18
+ */
19
+
20
+ #ifndef QUANTILES_SORTED_VIEW_IMPL_HPP_
21
+ #define QUANTILES_SORTED_VIEW_IMPL_HPP_
22
+
23
+ #include <algorithm>
24
+ #include <stdexcept>
25
+ #include <cmath>
26
+
27
+ namespace datasketches {
28
+
29
+ template<typename T, typename C, typename A>
30
+ quantiles_sorted_view<T, C, A>::quantiles_sorted_view(uint32_t num, const C& comparator, const A& allocator):
31
+ comparator_(comparator),
32
+ total_weight_(0),
33
+ entries_(allocator)
34
+ {
35
+ entries_.reserve(num);
36
+ }
37
+
38
+ template<typename T, typename C, typename A>
39
+ template<typename Iterator>
40
+ void quantiles_sorted_view<T, C, A>::add(Iterator first, Iterator last, uint64_t weight) {
41
+ const size_t size_before = entries_.size();
42
+ for (auto it = first; it != last; ++it) entries_.push_back(Entry(ref_helper(*it), weight));
43
+ if (size_before > 0) {
44
+ Container tmp(entries_.get_allocator());
45
+ tmp.reserve(entries_.capacity());
46
+ std::merge(
47
+ entries_.begin(), entries_.begin() + size_before,
48
+ entries_.begin() + size_before, entries_.end(),
49
+ std::back_inserter(tmp), compare_pairs_by_first(comparator_)
50
+ );
51
+ std::swap(tmp, entries_);
52
+ }
53
+ }
54
+
55
+ template<typename T, typename C, typename A>
56
+ void quantiles_sorted_view<T, C, A>::convert_to_cummulative() {
57
+ for (auto& entry: entries_) {
58
+ total_weight_ += entry.second;
59
+ entry.second = total_weight_;
60
+ }
61
+ }
62
+
63
+ template<typename T, typename C, typename A>
64
+ double quantiles_sorted_view<T, C, A>::get_rank(const T& item, bool inclusive) const {
65
+ if (entries_.empty()) throw std::runtime_error("operation is undefined for an empty sketch");
66
+ auto it = inclusive ?
67
+ std::upper_bound(entries_.begin(), entries_.end(), Entry(ref_helper(item), 0), compare_pairs_by_first(comparator_))
68
+ : std::lower_bound(entries_.begin(), entries_.end(), Entry(ref_helper(item), 0), compare_pairs_by_first(comparator_));
69
+ // we need item just before
70
+ if (it == entries_.begin()) return 0;
71
+ --it;
72
+ return static_cast<double>(it->second) / total_weight_;
73
+ }
74
+
75
+ template<typename T, typename C, typename A>
76
+ auto quantiles_sorted_view<T, C, A>::get_quantile(double rank, bool inclusive) const -> quantile_return_type {
77
+ if (entries_.empty()) throw std::runtime_error("operation is undefined for an empty sketch");
78
+ uint64_t weight = inclusive ? std::ceil(rank * total_weight_) : rank * total_weight_;
79
+ auto it = inclusive ?
80
+ std::lower_bound(entries_.begin(), entries_.end(), make_dummy_entry<T>(weight), compare_pairs_by_second())
81
+ : std::upper_bound(entries_.begin(), entries_.end(), make_dummy_entry<T>(weight), compare_pairs_by_second());
82
+ if (it == entries_.end()) return deref_helper(entries_[entries_.size() - 1].first);
83
+ return deref_helper(it->first);
84
+ }
85
+
86
+ template<typename T, typename C, typename A>
87
+ auto quantiles_sorted_view<T, C, A>::get_CDF(const T* split_points, uint32_t size, bool inclusive) const -> vector_double {
88
+ if (entries_.empty()) throw std::runtime_error("operation is undefined for an empty sketch");
89
+ vector_double buckets(entries_.get_allocator());
90
+ if (entries_.size() == 0) return buckets;
91
+ check_split_points(split_points, size);
92
+ buckets.reserve(size + 1);
93
+ for (uint32_t i = 0; i < size; ++i) buckets.push_back(get_rank(split_points[i], inclusive));
94
+ buckets.push_back(1);
95
+ return buckets;
96
+ }
97
+
98
+ template<typename T, typename C, typename A>
99
+ auto quantiles_sorted_view<T, C, A>::get_PMF(const T* split_points, uint32_t size, bool inclusive) const -> vector_double {
100
+ auto buckets = get_CDF(split_points, size, inclusive);
101
+ if (buckets.size() == 0) return buckets;
102
+ for (uint32_t i = size; i > 0; --i) {
103
+ buckets[i] -= buckets[i - 1];
104
+ }
105
+ return buckets;
106
+ }
107
+
108
+ template<typename T, typename C, typename A>
109
+ auto quantiles_sorted_view<T, C, A>::begin() const -> const_iterator {
110
+ return const_iterator(entries_.begin(), entries_.begin());
111
+ }
112
+
113
+ template<typename T, typename C, typename A>
114
+ auto quantiles_sorted_view<T, C, A>::end() const -> const_iterator {
115
+ return const_iterator(entries_.end(), entries_.begin());
116
+ }
117
+
118
+ template<typename T, typename C, typename A>
119
+ size_t quantiles_sorted_view<T, C, A>::size() const {
120
+ return entries_.size();
121
+ }
122
+
123
+ } /* namespace datasketches */
124
+
125
+ #endif
@@ -0,0 +1,36 @@
1
+ /*
2
+ * Licensed to the Apache Software Foundation (ASF) under one
3
+ * or more contributor license agreements. See the NOTICE file
4
+ * distributed with this work for additional information
5
+ * regarding copyright ownership. The ASF licenses this file
6
+ * to you under the Apache License, Version 2.0 (the
7
+ * "License"); you may not use this file except in compliance
8
+ * with the License. You may obtain a copy of the License at
9
+ *
10
+ * http://www.apache.org/licenses/LICENSE-2.0
11
+ *
12
+ * Unless required by applicable law or agreed to in writing,
13
+ * software distributed under the License is distributed on an
14
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15
+ * KIND, either express or implied. See the License for the
16
+ * specific language governing permissions and limitations
17
+ * under the License.
18
+ */
19
+
20
+ #ifndef _VERSION_HPP_
21
+ #define _VERSION_HPP_
22
+
23
+ namespace datasketches {
24
+
25
+ // the configured options and settings for DataSketches
26
+ constexpr int VERSION_MAJOR {@DataSketches_VERSION_MAJOR@};
27
+ constexpr int VERSION_MINOR {@DataSketches_VERSION_MINOR@};
28
+ constexpr int VERSION_PATCH {@DataSketches_VERSION_PATCH@};
29
+ constexpr int VERSION_TWEAK {@DataSketches_VERSION_TWEAK@};
30
+
31
+ constexpr auto VERSION_STR = "@DataSketches_VERSION@";
32
+ constexpr auto SOURCE_URL = "https://github.com/apache/datasketches-cpp";
33
+
34
+ }
35
+
36
+ #endif // _VERSION_HPP_
@@ -19,7 +19,7 @@
19
19
  # and an integration test using the other parts of the library.
20
20
 
21
21
  # common dependencies for tests
22
- add_library(common_test OBJECT "")
22
+ add_library(common_test_lib OBJECT "")
23
23
 
24
24
  include(FetchContent)
25
25
 
@@ -31,19 +31,19 @@ FetchContent_Declare(
31
31
 
32
32
  FetchContent_MakeAvailable(Catch2)
33
33
 
34
- target_link_libraries(common_test PUBLIC Catch2::Catch2)
34
+ target_link_libraries(common_test_lib PUBLIC Catch2::Catch2)
35
35
 
36
- set_target_properties(common_test PROPERTIES
36
+ set_target_properties(common_test_lib PROPERTIES
37
37
  CXX_STANDARD 11
38
38
  CXX_STANDARD_REQUIRED YES
39
39
  )
40
40
 
41
- target_include_directories(common_test
41
+ target_include_directories(common_test_lib
42
42
  INTERFACE
43
43
  ${CMAKE_CURRENT_SOURCE_DIR}
44
44
  )
45
45
 
46
- target_sources(common_test
46
+ target_sources(common_test_lib
47
47
  INTERFACE
48
48
  ${CMAKE_CURRENT_SOURCE_DIR}/test_allocator.hpp
49
49
  ${CMAKE_CURRENT_SOURCE_DIR}/test_type.hpp
@@ -52,10 +52,29 @@ target_sources(common_test
52
52
  ${CMAKE_CURRENT_SOURCE_DIR}/test_allocator.cpp
53
53
  )
54
54
 
55
+ add_executable(common_test)
56
+
57
+ target_link_libraries(common_test common common_test_lib)
58
+
59
+ set_target_properties(common_test PROPERTIES
60
+ CXX_STANDARD 11
61
+ CXX_STANDARD_REQUIRED YES
62
+ )
63
+
64
+ add_test(
65
+ NAME common_test
66
+ COMMAND common_test
67
+ )
68
+
69
+ target_sources(common_test
70
+ PRIVATE
71
+ quantiles_sorted_view_test.cpp
72
+ )
73
+
55
74
  # now the integration test part
56
75
  add_executable(integration_test)
57
76
 
58
- target_link_libraries(integration_test cpc fi hll kll req sampling theta tuple common_test)
77
+ target_link_libraries(integration_test cpc fi hll kll req sampling theta tuple common_test_lib)
59
78
 
60
79
  set_target_properties(integration_test PROPERTIES
61
80
  CXX_STANDARD 11