datasketches 0.2.2 → 0.2.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (154) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +12 -0
  3. data/LICENSE +40 -3
  4. data/NOTICE +1 -1
  5. data/README.md +8 -8
  6. data/ext/datasketches/kll_wrapper.cpp +5 -1
  7. data/ext/datasketches/theta_wrapper.cpp +20 -4
  8. data/lib/datasketches/version.rb +1 -1
  9. data/vendor/datasketches-cpp/CMakeLists.txt +27 -5
  10. data/vendor/datasketches-cpp/LICENSE +40 -3
  11. data/vendor/datasketches-cpp/MANIFEST.in +3 -0
  12. data/vendor/datasketches-cpp/NOTICE +1 -1
  13. data/vendor/datasketches-cpp/README.md +76 -9
  14. data/vendor/datasketches-cpp/cmake/DataSketchesConfig.cmake.in +10 -0
  15. data/vendor/datasketches-cpp/common/CMakeLists.txt +18 -13
  16. data/vendor/datasketches-cpp/common/include/binomial_bounds.hpp +1 -0
  17. data/vendor/datasketches-cpp/common/include/common_defs.hpp +16 -0
  18. data/vendor/datasketches-cpp/{kll → common}/include/kolmogorov_smirnov.hpp +5 -3
  19. data/vendor/datasketches-cpp/{kll → common}/include/kolmogorov_smirnov_impl.hpp +13 -16
  20. data/vendor/datasketches-cpp/common/include/quantile_sketch_sorted_view.hpp +121 -0
  21. data/vendor/datasketches-cpp/common/include/quantile_sketch_sorted_view_impl.hpp +91 -0
  22. data/vendor/datasketches-cpp/common/test/test_type.hpp +2 -0
  23. data/vendor/datasketches-cpp/cpc/CMakeLists.txt +15 -35
  24. data/vendor/datasketches-cpp/cpc/include/cpc_common.hpp +10 -3
  25. data/vendor/datasketches-cpp/cpc/include/cpc_compressor_impl.hpp +1 -0
  26. data/vendor/datasketches-cpp/cpc/include/cpc_confidence.hpp +1 -0
  27. data/vendor/datasketches-cpp/cpc/include/cpc_sketch.hpp +1 -1
  28. data/vendor/datasketches-cpp/cpc/include/cpc_sketch_impl.hpp +5 -3
  29. data/vendor/datasketches-cpp/cpc/include/cpc_union.hpp +1 -1
  30. data/vendor/datasketches-cpp/cpc/include/cpc_union_impl.hpp +10 -6
  31. data/vendor/datasketches-cpp/cpc/test/cpc_sketch_allocation_test.cpp +17 -0
  32. data/vendor/datasketches-cpp/cpc/test/cpc_sketch_test.cpp +1 -0
  33. data/vendor/datasketches-cpp/cpc/test/cpc_union_test.cpp +2 -0
  34. data/vendor/datasketches-cpp/fi/CMakeLists.txt +5 -15
  35. data/vendor/datasketches-cpp/fi/include/frequent_items_sketch.hpp +37 -5
  36. data/vendor/datasketches-cpp/fi/include/frequent_items_sketch_impl.hpp +30 -12
  37. data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_custom_type_test.cpp +2 -1
  38. data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_test.cpp +1 -0
  39. data/vendor/datasketches-cpp/hll/CMakeLists.txt +33 -56
  40. data/vendor/datasketches-cpp/hll/include/AuxHashMap-internal.hpp +2 -0
  41. data/vendor/datasketches-cpp/hll/include/CompositeInterpolationXTable-internal.hpp +1 -0
  42. data/vendor/datasketches-cpp/hll/include/CouponHashSet-internal.hpp +2 -2
  43. data/vendor/datasketches-cpp/hll/include/CouponList-internal.hpp +1 -0
  44. data/vendor/datasketches-cpp/hll/include/HllSketch-internal.hpp +6 -4
  45. data/vendor/datasketches-cpp/hll/include/HllSketchImpl-internal.hpp +2 -0
  46. data/vendor/datasketches-cpp/hll/include/HllSketchImplFactory.hpp +2 -0
  47. data/vendor/datasketches-cpp/hll/test/AuxHashMapTest.cpp +1 -0
  48. data/vendor/datasketches-cpp/hll/test/CouponHashSetTest.cpp +1 -0
  49. data/vendor/datasketches-cpp/hll/test/CouponListTest.cpp +2 -0
  50. data/vendor/datasketches-cpp/hll/test/HllArrayTest.cpp +1 -0
  51. data/vendor/datasketches-cpp/hll/test/HllSketchTest.cpp +59 -0
  52. data/vendor/datasketches-cpp/hll/test/HllUnionTest.cpp +2 -0
  53. data/vendor/datasketches-cpp/hll/test/TablesTest.cpp +1 -0
  54. data/vendor/datasketches-cpp/kll/CMakeLists.txt +5 -19
  55. data/vendor/datasketches-cpp/kll/include/kll_helper.hpp +0 -4
  56. data/vendor/datasketches-cpp/kll/include/kll_helper_impl.hpp +3 -0
  57. data/vendor/datasketches-cpp/kll/include/kll_sketch.hpp +103 -44
  58. data/vendor/datasketches-cpp/kll/include/kll_sketch_impl.hpp +110 -130
  59. data/vendor/datasketches-cpp/kll/test/kll_sketch_test.cpp +156 -23
  60. data/vendor/datasketches-cpp/kll/test/kolmogorov_smirnov_test.cpp +1 -1
  61. data/vendor/datasketches-cpp/pyproject.toml +4 -2
  62. data/vendor/datasketches-cpp/python/CMakeLists.txt +17 -6
  63. data/vendor/datasketches-cpp/python/README.md +57 -50
  64. data/vendor/datasketches-cpp/python/pybind11Path.cmd +3 -0
  65. data/vendor/datasketches-cpp/python/src/cpc_wrapper.cpp +1 -1
  66. data/vendor/datasketches-cpp/python/src/datasketches.cpp +4 -0
  67. data/vendor/datasketches-cpp/python/src/fi_wrapper.cpp +6 -1
  68. data/vendor/datasketches-cpp/python/src/kll_wrapper.cpp +49 -14
  69. data/vendor/datasketches-cpp/python/src/ks_wrapper.cpp +68 -0
  70. data/vendor/datasketches-cpp/python/src/quantiles_wrapper.cpp +240 -0
  71. data/vendor/datasketches-cpp/python/src/req_wrapper.cpp +9 -2
  72. data/vendor/datasketches-cpp/python/src/theta_wrapper.cpp +2 -2
  73. data/vendor/datasketches-cpp/python/src/vector_of_kll.cpp +12 -5
  74. data/vendor/datasketches-cpp/python/tests/kll_test.py +12 -6
  75. data/vendor/datasketches-cpp/python/tests/quantiles_test.py +126 -0
  76. data/vendor/datasketches-cpp/python/tests/req_test.py +2 -2
  77. data/vendor/datasketches-cpp/python/tests/vector_of_kll_test.py +4 -4
  78. data/vendor/datasketches-cpp/quantiles/CMakeLists.txt +42 -0
  79. data/vendor/datasketches-cpp/quantiles/include/quantiles_sketch.hpp +641 -0
  80. data/vendor/datasketches-cpp/quantiles/include/quantiles_sketch_impl.hpp +1309 -0
  81. data/vendor/datasketches-cpp/quantiles/test/CMakeLists.txt +44 -0
  82. data/vendor/datasketches-cpp/quantiles/test/Qk128_n1000_v0.3.0.sk +0 -0
  83. data/vendor/datasketches-cpp/quantiles/test/Qk128_n1000_v0.6.0.sk +0 -0
  84. data/vendor/datasketches-cpp/quantiles/test/Qk128_n1000_v0.8.0.sk +0 -0
  85. data/vendor/datasketches-cpp/quantiles/test/Qk128_n1000_v0.8.3.sk +0 -0
  86. data/vendor/datasketches-cpp/quantiles/test/Qk128_n50_v0.3.0.sk +0 -0
  87. data/vendor/datasketches-cpp/quantiles/test/Qk128_n50_v0.6.0.sk +0 -0
  88. data/vendor/datasketches-cpp/quantiles/test/Qk128_n50_v0.8.0.sk +0 -0
  89. data/vendor/datasketches-cpp/quantiles/test/Qk128_n50_v0.8.3.sk +0 -0
  90. data/vendor/datasketches-cpp/quantiles/test/kolmogorov_smirnov_test.cpp +110 -0
  91. data/vendor/datasketches-cpp/quantiles/test/quantiles_compatibility_test.cpp +129 -0
  92. data/vendor/datasketches-cpp/quantiles/test/quantiles_sketch_test.cpp +912 -0
  93. data/vendor/datasketches-cpp/req/CMakeLists.txt +6 -21
  94. data/vendor/datasketches-cpp/req/include/req_common.hpp +0 -5
  95. data/vendor/datasketches-cpp/req/include/req_compactor_impl.hpp +3 -2
  96. data/vendor/datasketches-cpp/req/include/req_sketch.hpp +62 -23
  97. data/vendor/datasketches-cpp/req/include/req_sketch_impl.hpp +66 -61
  98. data/vendor/datasketches-cpp/req/test/req_sketch_test.cpp +5 -0
  99. data/vendor/datasketches-cpp/sampling/CMakeLists.txt +5 -9
  100. data/vendor/datasketches-cpp/sampling/include/var_opt_sketch.hpp +54 -12
  101. data/vendor/datasketches-cpp/sampling/include/var_opt_sketch_impl.hpp +45 -34
  102. data/vendor/datasketches-cpp/sampling/include/var_opt_union.hpp +41 -6
  103. data/vendor/datasketches-cpp/sampling/include/var_opt_union_impl.hpp +33 -15
  104. data/vendor/datasketches-cpp/sampling/test/var_opt_allocation_test.cpp +2 -2
  105. data/vendor/datasketches-cpp/sampling/test/var_opt_sketch_test.cpp +1 -0
  106. data/vendor/datasketches-cpp/sampling/test/var_opt_union_test.cpp +1 -0
  107. data/vendor/datasketches-cpp/setup.py +10 -7
  108. data/vendor/datasketches-cpp/theta/CMakeLists.txt +26 -45
  109. data/vendor/datasketches-cpp/theta/include/bounds_on_ratios_in_sampled_sets.hpp +1 -0
  110. data/vendor/datasketches-cpp/theta/include/compact_theta_sketch_parser_impl.hpp +92 -23
  111. data/vendor/datasketches-cpp/theta/include/theta_constants.hpp +9 -4
  112. data/vendor/datasketches-cpp/theta/include/theta_helpers.hpp +15 -0
  113. data/vendor/datasketches-cpp/theta/include/theta_intersection_base_impl.hpp +7 -6
  114. data/vendor/datasketches-cpp/theta/include/theta_set_difference_base_impl.hpp +3 -2
  115. data/vendor/datasketches-cpp/theta/include/theta_sketch.hpp +32 -15
  116. data/vendor/datasketches-cpp/theta/include/theta_sketch_impl.hpp +150 -93
  117. data/vendor/datasketches-cpp/theta/include/theta_union.hpp +6 -1
  118. data/vendor/datasketches-cpp/theta/include/theta_union_base.hpp +3 -1
  119. data/vendor/datasketches-cpp/theta/include/theta_union_base_impl.hpp +9 -2
  120. data/vendor/datasketches-cpp/theta/include/theta_union_impl.hpp +8 -5
  121. data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base.hpp +9 -5
  122. data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base_impl.hpp +39 -10
  123. data/vendor/datasketches-cpp/theta/test/CMakeLists.txt +1 -0
  124. data/vendor/datasketches-cpp/theta/test/theta_a_not_b_test.cpp +2 -0
  125. data/vendor/datasketches-cpp/theta/test/theta_compact_empty_from_java_v1.sk +0 -0
  126. data/vendor/datasketches-cpp/theta/test/theta_compact_empty_from_java_v2.sk +0 -0
  127. data/vendor/datasketches-cpp/theta/test/theta_compact_estimation_from_java_v1.sk +0 -0
  128. data/vendor/datasketches-cpp/theta/test/theta_compact_estimation_from_java_v2.sk +0 -0
  129. data/vendor/datasketches-cpp/theta/test/theta_compact_exact_from_java.sk +0 -0
  130. data/vendor/datasketches-cpp/theta/test/theta_intersection_test.cpp +2 -0
  131. data/vendor/datasketches-cpp/theta/test/theta_setop_test.cpp +446 -0
  132. data/vendor/datasketches-cpp/theta/test/theta_sketch_test.cpp +429 -1
  133. data/vendor/datasketches-cpp/theta/test/theta_union_test.cpp +25 -11
  134. data/vendor/datasketches-cpp/tuple/CMakeLists.txt +18 -33
  135. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_sketch.hpp +1 -1
  136. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_sketch_impl.hpp +3 -3
  137. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_union.hpp +1 -1
  138. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_union_impl.hpp +3 -3
  139. data/vendor/datasketches-cpp/tuple/include/tuple_sketch.hpp +29 -9
  140. data/vendor/datasketches-cpp/tuple/include/tuple_sketch_impl.hpp +34 -14
  141. data/vendor/datasketches-cpp/tuple/include/tuple_union.hpp +6 -1
  142. data/vendor/datasketches-cpp/tuple/include/tuple_union_impl.hpp +8 -3
  143. data/vendor/datasketches-cpp/tuple/test/array_of_doubles_sketch_test.cpp +16 -0
  144. data/vendor/datasketches-cpp/tuple/test/tuple_a_not_b_test.cpp +1 -0
  145. data/vendor/datasketches-cpp/tuple/test/tuple_intersection_test.cpp +1 -0
  146. data/vendor/datasketches-cpp/tuple/test/tuple_sketch_test.cpp +46 -8
  147. data/vendor/datasketches-cpp/tuple/test/tuple_union_test.cpp +8 -0
  148. metadata +34 -12
  149. data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator.hpp +0 -75
  150. data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator_impl.hpp +0 -184
  151. data/vendor/datasketches-cpp/req/include/req_quantile_calculator.hpp +0 -69
  152. data/vendor/datasketches-cpp/req/include/req_quantile_calculator_impl.hpp +0 -60
  153. data/vendor/datasketches-cpp/theta/test/theta_update_empty_from_java.sk +0 -0
  154. data/vendor/datasketches-cpp/theta/test/theta_update_estimation_from_java.sk +0 -0
@@ -1,75 +0,0 @@
1
- /*
2
- * Licensed to the Apache Software Foundation (ASF) under one
3
- * or more contributor license agreements. See the NOTICE file
4
- * distributed with this work for additional information
5
- * regarding copyright ownership. The ASF licenses this file
6
- * to you under the Apache License, Version 2.0 (the
7
- * "License"); you may not use this file except in compliance
8
- * with the License. You may obtain a copy of the License at
9
- *
10
- * http://www.apache.org/licenses/LICENSE-2.0
11
- *
12
- * Unless required by applicable law or agreed to in writing,
13
- * software distributed under the License is distributed on an
14
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15
- * KIND, either express or implied. See the License for the
16
- * specific language governing permissions and limitations
17
- * under the License.
18
- */
19
-
20
- #ifndef KLL_QUANTILE_CALCULATOR_HPP_
21
- #define KLL_QUANTILE_CALCULATOR_HPP_
22
-
23
- #include <memory>
24
-
25
- namespace datasketches {
26
-
27
- // forward declaration
28
- template<typename T, typename C, typename S, typename A> class kll_sketch;
29
-
30
- template <typename T, typename C, typename A>
31
- class kll_quantile_calculator {
32
- public:
33
- using Entry = std::pair<T, uint64_t>;
34
- using AllocEntry = typename std::allocator_traits<A>::template rebind_alloc<Entry>;
35
- using Container = std::vector<Entry, AllocEntry>;
36
- using const_iterator = typename Container::const_iterator;
37
-
38
- template<typename S>
39
- kll_quantile_calculator(const kll_sketch<T, C, S, A>& sketch);
40
-
41
- T get_quantile(double fraction) const;
42
- const_iterator begin() const;
43
- const_iterator end() const;
44
-
45
- private:
46
- using AllocU32 = typename std::allocator_traits<A>::template rebind_alloc<uint32_t>;
47
- using vector_u32 = std::vector<uint32_t, AllocU32>;
48
- uint64_t n_;
49
- vector_u32 levels_;
50
- Container entries_;
51
-
52
- void populate_from_sketch(const T* items, const uint32_t* levels, uint8_t num_levels);
53
- T approximately_answer_positional_query(uint64_t pos) const;
54
- void convert_to_preceding_cummulative();
55
- uint32_t chunk_containing_pos(uint64_t pos) const;
56
- uint32_t search_for_chunk_containing_pos(uint64_t pos, uint64_t l, uint64_t r) const;
57
- static void merge_sorted_blocks(Container& entries, const uint32_t* levels, uint8_t num_levels, uint32_t num_items);
58
- static void merge_sorted_blocks_direct(Container& orig, Container& temp, const uint32_t* levels, uint8_t starting_level, uint8_t num_levels);
59
- static void merge_sorted_blocks_reversed(Container& orig, Container& temp, const uint32_t* levels, uint8_t starting_level, uint8_t num_levels);
60
- static uint64_t pos_of_phi(double phi, uint64_t n);
61
-
62
- template<typename Comparator>
63
- struct compare_pair_by_first {
64
- template<typename Entry1, typename Entry2>
65
- bool operator()(Entry1&& a, Entry2&& b) const {
66
- return Comparator()(std::forward<Entry1>(a).first, std::forward<Entry2>(b).first);
67
- }
68
- };
69
- };
70
-
71
- } /* namespace datasketches */
72
-
73
- #include "kll_quantile_calculator_impl.hpp"
74
-
75
- #endif // KLL_QUANTILE_CALCULATOR_HPP_
@@ -1,184 +0,0 @@
1
- /*
2
- * Licensed to the Apache Software Foundation (ASF) under one
3
- * or more contributor license agreements. See the NOTICE file
4
- * distributed with this work for additional information
5
- * regarding copyright ownership. The ASF licenses this file
6
- * to you under the Apache License, Version 2.0 (the
7
- * "License"); you may not use this file except in compliance
8
- * with the License. You may obtain a copy of the License at
9
- *
10
- * http://www.apache.org/licenses/LICENSE-2.0
11
- *
12
- * Unless required by applicable law or agreed to in writing,
13
- * software distributed under the License is distributed on an
14
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15
- * KIND, either express or implied. See the License for the
16
- * specific language governing permissions and limitations
17
- * under the License.
18
- */
19
-
20
- #ifndef KLL_QUANTILE_CALCULATOR_IMPL_HPP_
21
- #define KLL_QUANTILE_CALCULATOR_IMPL_HPP_
22
-
23
- #include <memory>
24
- #include <cmath>
25
- #include <algorithm>
26
-
27
- #include "kll_helper.hpp"
28
-
29
- namespace datasketches {
30
-
31
- template<typename T, typename C, typename A>
32
- template<typename S>
33
- kll_quantile_calculator<T, C, A>::kll_quantile_calculator(const kll_sketch<T, C, S, A>& sketch):
34
- n_(sketch.n_), levels_(sketch.num_levels_ + 1, 0, sketch.allocator_), entries_(sketch.allocator_)
35
- {
36
- const uint32_t num_items = sketch.levels_[sketch.num_levels_] - sketch.levels_[0];
37
- if (num_items > 0) {
38
- entries_.reserve(num_items);
39
- populate_from_sketch(sketch.items_, sketch.levels_.data(), sketch.num_levels_);
40
- if (!sketch.is_level_zero_sorted_) std::sort(entries_.begin(), entries_.begin() + levels_[1], compare_pair_by_first<C>());
41
- merge_sorted_blocks(entries_, levels_.data(), static_cast<uint8_t>(levels_.size()) - 1, num_items);
42
- if (!is_sorted(entries_.begin(), entries_.end(), compare_pair_by_first<C>())) throw std::logic_error("entries must be sorted");
43
- convert_to_preceding_cummulative();
44
- }
45
- }
46
-
47
- template<typename T, typename C, typename A>
48
- T kll_quantile_calculator<T, C, A>::get_quantile(double fraction) const {
49
- return approximately_answer_positional_query(pos_of_phi(fraction, n_));
50
- }
51
-
52
- template<typename T, typename C, typename A>
53
- auto kll_quantile_calculator<T, C, A>::begin() const -> const_iterator {
54
- return entries_.begin();
55
- }
56
-
57
- template<typename T, typename C, typename A>
58
- auto kll_quantile_calculator<T, C, A>::end() const -> const_iterator {
59
- return entries_.end();
60
- }
61
-
62
- template<typename T, typename C, typename A>
63
- void kll_quantile_calculator<T, C, A>::populate_from_sketch(const T* items, const uint32_t* levels, uint8_t num_levels) {
64
- size_t src_level = 0;
65
- size_t dst_level = 0;
66
- uint64_t weight = 1;
67
- uint32_t offset = levels[0];
68
- while (src_level < num_levels) {
69
- const uint32_t from_index(levels[src_level] - offset);
70
- const uint32_t to_index(levels[src_level + 1] - offset); // exclusive
71
- if (from_index < to_index) { // skip empty levels
72
- for (uint32_t i = from_index; i < to_index; ++i) {
73
- entries_.push_back(Entry(items[i + offset], weight));
74
- }
75
- levels_[dst_level] = from_index;
76
- levels_[dst_level + 1] = to_index;
77
- dst_level++;
78
- }
79
- src_level++;
80
- weight *= 2;
81
- }
82
- if (levels_.size() > static_cast<size_t>(dst_level + 1)) levels_.resize(dst_level + 1);
83
- }
84
-
85
- template<typename T, typename C, typename A>
86
- T kll_quantile_calculator<T, C, A>::approximately_answer_positional_query(uint64_t pos) const {
87
- if (pos >= n_) throw std::logic_error("position out of range");
88
- const uint32_t num_items = levels_[levels_.size() - 1];
89
- if (pos > entries_[num_items - 1].second) return entries_[num_items - 1].first;
90
- const uint32_t index = chunk_containing_pos(pos);
91
- return entries_[index].first;
92
- }
93
-
94
- template<typename T, typename C, typename A>
95
- void kll_quantile_calculator<T, C, A>::convert_to_preceding_cummulative() {
96
- uint64_t subtotal = 0;
97
- for (auto& entry: entries_) {
98
- const uint64_t new_subtotal = subtotal + entry.second;
99
- entry.second = subtotal;
100
- subtotal = new_subtotal;
101
- }
102
- }
103
-
104
- template<typename T, typename C, typename A>
105
- uint64_t kll_quantile_calculator<T, C, A>::pos_of_phi(double phi, uint64_t n) {
106
- const uint64_t pos = static_cast<uint64_t>(std::floor(phi * n));
107
- return (pos == n) ? n - 1 : pos;
108
- }
109
-
110
- template<typename T, typename C, typename A>
111
- uint32_t kll_quantile_calculator<T, C, A>::chunk_containing_pos(uint64_t pos) const {
112
- if (entries_.size() < 1) throw std::logic_error("array too short");
113
- if (pos < entries_[0].second) throw std::logic_error("position too small");
114
- if (pos > entries_[entries_.size() - 1].second) throw std::logic_error("position too large");
115
- return search_for_chunk_containing_pos(pos, 0, entries_.size());
116
- }
117
-
118
- template<typename T, typename C, typename A>
119
- uint32_t kll_quantile_calculator<T, C, A>::search_for_chunk_containing_pos(uint64_t pos, uint64_t l, uint64_t r) const {
120
- if (l + 1 == r) {
121
- return static_cast<uint32_t>(l);
122
- }
123
- const uint64_t m = l + (r - l) / 2;
124
- if (entries_[m].second <= pos) {
125
- return search_for_chunk_containing_pos(pos, m, r);
126
- }
127
- return search_for_chunk_containing_pos(pos, l, m);
128
- }
129
-
130
- template<typename T, typename C, typename A>
131
- void kll_quantile_calculator<T, C, A>::merge_sorted_blocks(Container& entries, const uint32_t* levels, uint8_t num_levels, uint32_t num_items) {
132
- if (num_levels == 1) return;
133
- Container temporary(entries.get_allocator());
134
- temporary.reserve(num_items);
135
- merge_sorted_blocks_direct(entries, temporary, levels, 0, num_levels);
136
- }
137
-
138
- template<typename T, typename C, typename A>
139
- void kll_quantile_calculator<T, C, A>::merge_sorted_blocks_direct(Container& orig, Container& temp, const uint32_t* levels,
140
- uint8_t starting_level, uint8_t num_levels) {
141
- if (num_levels == 1) return;
142
- const uint8_t num_levels_1 = num_levels / 2;
143
- const uint8_t num_levels_2 = num_levels - num_levels_1;
144
- const uint8_t starting_level_1 = starting_level;
145
- const uint8_t starting_level_2 = starting_level + num_levels_1;
146
- const auto initial_size = temp.size();
147
- merge_sorted_blocks_reversed(orig, temp, levels, starting_level_1, num_levels_1);
148
- merge_sorted_blocks_reversed(orig, temp, levels, starting_level_2, num_levels_2);
149
- const uint32_t num_items_1 = levels[starting_level_1 + num_levels_1] - levels[starting_level_1];
150
- const auto chunk_begin = temp.begin() + initial_size;
151
- std::merge(
152
- std::make_move_iterator(chunk_begin), std::make_move_iterator(chunk_begin + num_items_1),
153
- std::make_move_iterator(chunk_begin + num_items_1), std::make_move_iterator(temp.end()),
154
- orig.begin() + levels[starting_level], compare_pair_by_first<C>()
155
- );
156
- temp.erase(chunk_begin, temp.end());
157
- }
158
-
159
- template<typename T, typename C, typename A>
160
- void kll_quantile_calculator<T, C, A>::merge_sorted_blocks_reversed(Container& orig, Container& temp, const uint32_t* levels,
161
- uint8_t starting_level, uint8_t num_levels) {
162
- if (num_levels == 1) {
163
- std::move(orig.begin() + levels[starting_level], orig.begin() + levels[starting_level + 1], std::back_inserter(temp));
164
- return;
165
- }
166
- const uint8_t num_levels_1 = num_levels / 2;
167
- const uint8_t num_levels_2 = num_levels - num_levels_1;
168
- const uint8_t starting_level_1 = starting_level;
169
- const uint8_t starting_level_2 = starting_level + num_levels_1;
170
- merge_sorted_blocks_direct(orig, temp, levels, starting_level_1, num_levels_1);
171
- merge_sorted_blocks_direct(orig, temp, levels, starting_level_2, num_levels_2);
172
- std::merge(
173
- std::make_move_iterator(orig.begin() + levels[starting_level_1]),
174
- std::make_move_iterator(orig.begin() + levels[starting_level_1 + num_levels_1]),
175
- std::make_move_iterator(orig.begin() + levels[starting_level_2]),
176
- std::make_move_iterator(orig.begin() + levels[starting_level_2 + num_levels_2]),
177
- std::back_inserter(temp),
178
- compare_pair_by_first<C>()
179
- );
180
- }
181
-
182
- } /* namespace datasketches */
183
-
184
- #endif // KLL_QUANTILE_CALCULATOR_IMPL_HPP_
@@ -1,69 +0,0 @@
1
- /*
2
- * Licensed to the Apache Software Foundation (ASF) under one
3
- * or more contributor license agreements. See the NOTICE file
4
- * distributed with this work for additional information
5
- * regarding copyright ownership. The ASF licenses this file
6
- * to you under the Apache License, Version 2.0 (the
7
- * "License"); you may not use this file except in compliance
8
- * with the License. You may obtain a copy of the License at
9
- *
10
- * http://www.apache.org/licenses/LICENSE-2.0
11
- *
12
- * Unless required by applicable law or agreed to in writing,
13
- * software distributed under the License is distributed on an
14
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15
- * KIND, either express or implied. See the License for the
16
- * specific language governing permissions and limitations
17
- * under the License.
18
- */
19
-
20
- #ifndef REQ_QUANTILE_CALCULATOR_HPP_
21
- #define REQ_QUANTILE_CALCULATOR_HPP_
22
-
23
- #include <functional>
24
-
25
- namespace datasketches {
26
-
27
- template<
28
- typename T,
29
- typename Comparator,
30
- typename Allocator
31
- >
32
- class req_quantile_calculator {
33
- public:
34
- req_quantile_calculator(uint64_t n, const Allocator& allocator);
35
-
36
- void add(const T* begin, const T* end, uint8_t lg_weight);
37
-
38
- template<bool inclusive>
39
- void convert_to_cummulative();
40
-
41
- const T* get_quantile(double rank) const;
42
-
43
- private:
44
- using Entry = std::pair<const T*, uint64_t>;
45
- using AllocEntry = typename std::allocator_traits<Allocator>::template rebind_alloc<Entry>;
46
- using Container = std::vector<Entry, AllocEntry>;
47
-
48
- template<typename C>
49
- struct compare_pairs_by_first_ptr {
50
- bool operator()(const Entry& a, const Entry& b) {
51
- return C()(*a.first, *b.first);
52
- }
53
- };
54
-
55
- struct compare_pairs_by_second {
56
- bool operator()(const Entry& a, const Entry& b) {
57
- return a.second < b.second;
58
- }
59
- };
60
-
61
- uint64_t n_;
62
- Container entries_;
63
- };
64
-
65
- } /* namespace datasketches */
66
-
67
- #include "req_quantile_calculator_impl.hpp"
68
-
69
- #endif
@@ -1,60 +0,0 @@
1
- /*
2
- * Licensed to the Apache Software Foundation (ASF) under one
3
- * or more contributor license agreements. See the NOTICE file
4
- * distributed with this work for additional information
5
- * regarding copyright ownership. The ASF licenses this file
6
- * to you under the Apache License, Version 2.0 (the
7
- * "License"); you may not use this file except in compliance
8
- * with the License. You may obtain a copy of the License at
9
- *
10
- * http://www.apache.org/licenses/LICENSE-2.0
11
- *
12
- * Unless required by applicable law or agreed to in writing,
13
- * software distributed under the License is distributed on an
14
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15
- * KIND, either express or implied. See the License for the
16
- * specific language governing permissions and limitations
17
- * under the License.
18
- */
19
-
20
- #ifndef REQ_QUANTILE_CALCULATOR_IMPL_HPP_
21
- #define REQ_QUANTILE_CALCULATOR_IMPL_HPP_
22
-
23
- namespace datasketches {
24
-
25
- template<typename T, typename C, typename A>
26
- req_quantile_calculator<T, C, A>::req_quantile_calculator(uint64_t n, const A& allocator):
27
- n_(n),
28
- entries_(allocator)
29
- {}
30
-
31
- template<typename T, typename C, typename A>
32
- void req_quantile_calculator<T, C, A>::add(const T* begin, const T* end, uint8_t lg_weight) {
33
- if (entries_.capacity() < entries_.size() + std::distance(begin, end)) entries_.reserve(entries_.size() + std::distance(begin, end));
34
- const size_t size_before = entries_.size();
35
- for (auto it = begin; it != end; ++it) entries_.push_back(Entry(it, 1 << lg_weight));
36
- if (size_before > 0) std::inplace_merge(entries_.begin(), entries_.begin() + size_before, entries_.end(), compare_pairs_by_first_ptr<C>());
37
- }
38
-
39
- template<typename T, typename C, typename A>
40
- template<bool inclusive>
41
- void req_quantile_calculator<T, C, A>::convert_to_cummulative() {
42
- uint64_t subtotal = 0;
43
- for (auto& entry: entries_) {
44
- const uint64_t new_subtotal = subtotal + entry.second;
45
- entry.second = inclusive ? new_subtotal : subtotal;
46
- subtotal = new_subtotal;
47
- }
48
- }
49
-
50
- template<typename T, typename C, typename A>
51
- const T* req_quantile_calculator<T, C, A>::get_quantile(double rank) const {
52
- uint64_t weight = static_cast<uint64_t>(rank * n_);
53
- auto it = std::lower_bound(entries_.begin(), entries_.end(), Entry(nullptr, weight), compare_pairs_by_second());
54
- if (it == entries_.end()) return entries_[entries_.size() - 1].first;
55
- return it->first;
56
- }
57
-
58
- } /* namespace datasketches */
59
-
60
- #endif