datasketches 0.2.3 → 0.2.6

Sign up to get free protection for your applications and to get access to all the features.
Files changed (143) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +12 -0
  3. data/README.md +8 -8
  4. data/ext/datasketches/kll_wrapper.cpp +7 -3
  5. data/ext/datasketches/theta_wrapper.cpp +20 -4
  6. data/lib/datasketches/version.rb +1 -1
  7. data/vendor/datasketches-cpp/CMakeLists.txt +25 -5
  8. data/vendor/datasketches-cpp/MANIFEST.in +3 -0
  9. data/vendor/datasketches-cpp/NOTICE +6 -5
  10. data/vendor/datasketches-cpp/README.md +76 -9
  11. data/vendor/datasketches-cpp/cmake/DataSketchesConfig.cmake.in +10 -0
  12. data/vendor/datasketches-cpp/common/CMakeLists.txt +18 -13
  13. data/vendor/datasketches-cpp/common/include/binomial_bounds.hpp +1 -0
  14. data/vendor/datasketches-cpp/common/include/common_defs.hpp +14 -0
  15. data/vendor/datasketches-cpp/{kll → common}/include/kolmogorov_smirnov.hpp +5 -3
  16. data/vendor/datasketches-cpp/{kll → common}/include/kolmogorov_smirnov_impl.hpp +13 -16
  17. data/vendor/datasketches-cpp/common/include/quantile_sketch_sorted_view.hpp +121 -0
  18. data/vendor/datasketches-cpp/common/include/quantile_sketch_sorted_view_impl.hpp +91 -0
  19. data/vendor/datasketches-cpp/common/test/test_type.hpp +2 -0
  20. data/vendor/datasketches-cpp/cpc/CMakeLists.txt +15 -35
  21. data/vendor/datasketches-cpp/cpc/include/cpc_compressor_impl.hpp +3 -1
  22. data/vendor/datasketches-cpp/cpc/include/cpc_confidence.hpp +1 -0
  23. data/vendor/datasketches-cpp/cpc/include/cpc_sketch_impl.hpp +5 -3
  24. data/vendor/datasketches-cpp/cpc/include/cpc_union_impl.hpp +10 -6
  25. data/vendor/datasketches-cpp/cpc/test/cpc_sketch_allocation_test.cpp +17 -0
  26. data/vendor/datasketches-cpp/cpc/test/cpc_sketch_test.cpp +1 -0
  27. data/vendor/datasketches-cpp/cpc/test/cpc_union_test.cpp +2 -0
  28. data/vendor/datasketches-cpp/fi/CMakeLists.txt +5 -15
  29. data/vendor/datasketches-cpp/fi/include/frequent_items_sketch.hpp +37 -5
  30. data/vendor/datasketches-cpp/fi/include/frequent_items_sketch_impl.hpp +29 -11
  31. data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_custom_type_test.cpp +2 -1
  32. data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_test.cpp +1 -0
  33. data/vendor/datasketches-cpp/hll/CMakeLists.txt +33 -56
  34. data/vendor/datasketches-cpp/hll/include/AuxHashMap-internal.hpp +2 -0
  35. data/vendor/datasketches-cpp/hll/include/CompositeInterpolationXTable-internal.hpp +1 -0
  36. data/vendor/datasketches-cpp/hll/include/CouponHashSet-internal.hpp +2 -2
  37. data/vendor/datasketches-cpp/hll/include/CouponList-internal.hpp +1 -0
  38. data/vendor/datasketches-cpp/hll/include/HllSketch-internal.hpp +6 -4
  39. data/vendor/datasketches-cpp/hll/include/HllSketchImpl-internal.hpp +2 -0
  40. data/vendor/datasketches-cpp/hll/include/HllSketchImplFactory.hpp +2 -0
  41. data/vendor/datasketches-cpp/hll/test/AuxHashMapTest.cpp +1 -0
  42. data/vendor/datasketches-cpp/hll/test/CouponHashSetTest.cpp +1 -0
  43. data/vendor/datasketches-cpp/hll/test/CouponListTest.cpp +2 -0
  44. data/vendor/datasketches-cpp/hll/test/HllArrayTest.cpp +1 -0
  45. data/vendor/datasketches-cpp/hll/test/HllSketchTest.cpp +59 -0
  46. data/vendor/datasketches-cpp/hll/test/HllUnionTest.cpp +2 -0
  47. data/vendor/datasketches-cpp/hll/test/TablesTest.cpp +1 -0
  48. data/vendor/datasketches-cpp/kll/CMakeLists.txt +5 -19
  49. data/vendor/datasketches-cpp/kll/include/kll_helper.hpp +0 -4
  50. data/vendor/datasketches-cpp/kll/include/kll_helper_impl.hpp +5 -2
  51. data/vendor/datasketches-cpp/kll/include/kll_sketch.hpp +108 -41
  52. data/vendor/datasketches-cpp/kll/include/kll_sketch_impl.hpp +150 -132
  53. data/vendor/datasketches-cpp/kll/test/kll_sketch_test.cpp +165 -31
  54. data/vendor/datasketches-cpp/kll/test/kolmogorov_smirnov_test.cpp +1 -1
  55. data/vendor/datasketches-cpp/pyproject.toml +1 -1
  56. data/vendor/datasketches-cpp/python/CMakeLists.txt +8 -1
  57. data/vendor/datasketches-cpp/python/README.md +13 -9
  58. data/vendor/datasketches-cpp/python/src/datasketches.cpp +4 -0
  59. data/vendor/datasketches-cpp/python/src/fi_wrapper.cpp +6 -1
  60. data/vendor/datasketches-cpp/python/src/kll_wrapper.cpp +48 -13
  61. data/vendor/datasketches-cpp/python/src/ks_wrapper.cpp +68 -0
  62. data/vendor/datasketches-cpp/python/src/quantiles_wrapper.cpp +240 -0
  63. data/vendor/datasketches-cpp/python/src/req_wrapper.cpp +9 -2
  64. data/vendor/datasketches-cpp/python/src/vector_of_kll.cpp +1 -0
  65. data/vendor/datasketches-cpp/python/tests/kll_test.py +10 -4
  66. data/vendor/datasketches-cpp/python/tests/quantiles_test.py +126 -0
  67. data/vendor/datasketches-cpp/quantiles/CMakeLists.txt +42 -0
  68. data/vendor/datasketches-cpp/quantiles/include/quantiles_sketch.hpp +656 -0
  69. data/vendor/datasketches-cpp/quantiles/include/quantiles_sketch_impl.hpp +1373 -0
  70. data/vendor/datasketches-cpp/quantiles/test/CMakeLists.txt +44 -0
  71. data/vendor/datasketches-cpp/quantiles/test/Qk128_n1000_v0.3.0.sk +0 -0
  72. data/vendor/datasketches-cpp/quantiles/test/Qk128_n1000_v0.6.0.sk +0 -0
  73. data/vendor/datasketches-cpp/quantiles/test/Qk128_n1000_v0.8.0.sk +0 -0
  74. data/vendor/datasketches-cpp/quantiles/test/Qk128_n1000_v0.8.3.sk +0 -0
  75. data/vendor/datasketches-cpp/quantiles/test/Qk128_n50_v0.3.0.sk +0 -0
  76. data/vendor/datasketches-cpp/quantiles/test/Qk128_n50_v0.6.0.sk +0 -0
  77. data/vendor/datasketches-cpp/quantiles/test/Qk128_n50_v0.8.0.sk +0 -0
  78. data/vendor/datasketches-cpp/quantiles/test/Qk128_n50_v0.8.3.sk +0 -0
  79. data/vendor/datasketches-cpp/quantiles/test/kolmogorov_smirnov_test.cpp +110 -0
  80. data/vendor/datasketches-cpp/quantiles/test/quantiles_compatibility_test.cpp +129 -0
  81. data/vendor/datasketches-cpp/quantiles/test/quantiles_sketch_test.cpp +975 -0
  82. data/vendor/datasketches-cpp/req/CMakeLists.txt +6 -21
  83. data/vendor/datasketches-cpp/req/include/req_common.hpp +0 -5
  84. data/vendor/datasketches-cpp/req/include/req_compactor.hpp +6 -0
  85. data/vendor/datasketches-cpp/req/include/req_compactor_impl.hpp +30 -2
  86. data/vendor/datasketches-cpp/req/include/req_sketch.hpp +73 -23
  87. data/vendor/datasketches-cpp/req/include/req_sketch_impl.hpp +95 -63
  88. data/vendor/datasketches-cpp/req/test/req_sketch_test.cpp +74 -3
  89. data/vendor/datasketches-cpp/sampling/CMakeLists.txt +5 -9
  90. data/vendor/datasketches-cpp/sampling/include/var_opt_sketch.hpp +44 -7
  91. data/vendor/datasketches-cpp/sampling/include/var_opt_sketch_impl.hpp +44 -33
  92. data/vendor/datasketches-cpp/sampling/include/var_opt_union.hpp +41 -6
  93. data/vendor/datasketches-cpp/sampling/include/var_opt_union_impl.hpp +33 -15
  94. data/vendor/datasketches-cpp/sampling/test/var_opt_allocation_test.cpp +2 -2
  95. data/vendor/datasketches-cpp/sampling/test/var_opt_sketch_test.cpp +1 -0
  96. data/vendor/datasketches-cpp/sampling/test/var_opt_union_test.cpp +1 -0
  97. data/vendor/datasketches-cpp/setup.py +1 -1
  98. data/vendor/datasketches-cpp/theta/CMakeLists.txt +26 -45
  99. data/vendor/datasketches-cpp/theta/include/bounds_on_ratios_in_sampled_sets.hpp +1 -0
  100. data/vendor/datasketches-cpp/theta/include/compact_theta_sketch_parser_impl.hpp +92 -23
  101. data/vendor/datasketches-cpp/theta/include/theta_helpers.hpp +15 -0
  102. data/vendor/datasketches-cpp/theta/include/theta_intersection_base_impl.hpp +7 -6
  103. data/vendor/datasketches-cpp/theta/include/theta_set_difference_base_impl.hpp +3 -2
  104. data/vendor/datasketches-cpp/theta/include/theta_sketch.hpp +32 -15
  105. data/vendor/datasketches-cpp/theta/include/theta_sketch_impl.hpp +150 -93
  106. data/vendor/datasketches-cpp/theta/include/theta_union.hpp +6 -1
  107. data/vendor/datasketches-cpp/theta/include/theta_union_base.hpp +3 -1
  108. data/vendor/datasketches-cpp/theta/include/theta_union_base_impl.hpp +9 -2
  109. data/vendor/datasketches-cpp/theta/include/theta_union_impl.hpp +8 -5
  110. data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base.hpp +5 -4
  111. data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base_impl.hpp +34 -9
  112. data/vendor/datasketches-cpp/theta/test/CMakeLists.txt +1 -0
  113. data/vendor/datasketches-cpp/theta/test/theta_a_not_b_test.cpp +2 -0
  114. data/vendor/datasketches-cpp/theta/test/theta_compact_empty_from_java_v1.sk +0 -0
  115. data/vendor/datasketches-cpp/theta/test/theta_compact_empty_from_java_v2.sk +0 -0
  116. data/vendor/datasketches-cpp/theta/test/theta_compact_estimation_from_java_v1.sk +0 -0
  117. data/vendor/datasketches-cpp/theta/test/theta_compact_estimation_from_java_v2.sk +0 -0
  118. data/vendor/datasketches-cpp/theta/test/theta_compact_exact_from_java.sk +0 -0
  119. data/vendor/datasketches-cpp/theta/test/theta_intersection_test.cpp +2 -0
  120. data/vendor/datasketches-cpp/theta/test/theta_setop_test.cpp +446 -0
  121. data/vendor/datasketches-cpp/theta/test/theta_sketch_test.cpp +429 -1
  122. data/vendor/datasketches-cpp/theta/test/theta_union_test.cpp +25 -11
  123. data/vendor/datasketches-cpp/tuple/CMakeLists.txt +18 -33
  124. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_sketch.hpp +1 -1
  125. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_sketch_impl.hpp +3 -3
  126. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_union.hpp +1 -1
  127. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_union_impl.hpp +3 -3
  128. data/vendor/datasketches-cpp/tuple/include/tuple_sketch.hpp +29 -9
  129. data/vendor/datasketches-cpp/tuple/include/tuple_sketch_impl.hpp +34 -14
  130. data/vendor/datasketches-cpp/tuple/include/tuple_union.hpp +6 -1
  131. data/vendor/datasketches-cpp/tuple/include/tuple_union_impl.hpp +8 -3
  132. data/vendor/datasketches-cpp/tuple/test/array_of_doubles_sketch_test.cpp +16 -0
  133. data/vendor/datasketches-cpp/tuple/test/tuple_a_not_b_test.cpp +1 -0
  134. data/vendor/datasketches-cpp/tuple/test/tuple_intersection_test.cpp +1 -0
  135. data/vendor/datasketches-cpp/tuple/test/tuple_sketch_test.cpp +46 -8
  136. data/vendor/datasketches-cpp/tuple/test/tuple_union_test.cpp +8 -0
  137. metadata +33 -12
  138. data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator.hpp +0 -75
  139. data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator_impl.hpp +0 -184
  140. data/vendor/datasketches-cpp/req/include/req_quantile_calculator.hpp +0 -69
  141. data/vendor/datasketches-cpp/req/include/req_quantile_calculator_impl.hpp +0 -60
  142. data/vendor/datasketches-cpp/theta/test/theta_update_empty_from_java.sk +0 -0
  143. data/vendor/datasketches-cpp/theta/test/theta_update_estimation_from_java.sk +0 -0
@@ -1,75 +0,0 @@
1
- /*
2
- * Licensed to the Apache Software Foundation (ASF) under one
3
- * or more contributor license agreements. See the NOTICE file
4
- * distributed with this work for additional information
5
- * regarding copyright ownership. The ASF licenses this file
6
- * to you under the Apache License, Version 2.0 (the
7
- * "License"); you may not use this file except in compliance
8
- * with the License. You may obtain a copy of the License at
9
- *
10
- * http://www.apache.org/licenses/LICENSE-2.0
11
- *
12
- * Unless required by applicable law or agreed to in writing,
13
- * software distributed under the License is distributed on an
14
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15
- * KIND, either express or implied. See the License for the
16
- * specific language governing permissions and limitations
17
- * under the License.
18
- */
19
-
20
- #ifndef KLL_QUANTILE_CALCULATOR_HPP_
21
- #define KLL_QUANTILE_CALCULATOR_HPP_
22
-
23
- #include <memory>
24
-
25
- namespace datasketches {
26
-
27
- // forward declaration
28
- template<typename T, typename C, typename S, typename A> class kll_sketch;
29
-
30
- template <typename T, typename C, typename A>
31
- class kll_quantile_calculator {
32
- public:
33
- using Entry = std::pair<T, uint64_t>;
34
- using AllocEntry = typename std::allocator_traits<A>::template rebind_alloc<Entry>;
35
- using Container = std::vector<Entry, AllocEntry>;
36
- using const_iterator = typename Container::const_iterator;
37
-
38
- template<typename S>
39
- kll_quantile_calculator(const kll_sketch<T, C, S, A>& sketch);
40
-
41
- T get_quantile(double fraction) const;
42
- const_iterator begin() const;
43
- const_iterator end() const;
44
-
45
- private:
46
- using AllocU32 = typename std::allocator_traits<A>::template rebind_alloc<uint32_t>;
47
- using vector_u32 = std::vector<uint32_t, AllocU32>;
48
- uint64_t n_;
49
- vector_u32 levels_;
50
- Container entries_;
51
-
52
- void populate_from_sketch(const T* items, const uint32_t* levels, uint8_t num_levels);
53
- T approximately_answer_positional_query(uint64_t pos) const;
54
- void convert_to_preceding_cummulative();
55
- uint32_t chunk_containing_pos(uint64_t pos) const;
56
- uint32_t search_for_chunk_containing_pos(uint64_t pos, uint64_t l, uint64_t r) const;
57
- static void merge_sorted_blocks(Container& entries, const uint32_t* levels, uint8_t num_levels, uint32_t num_items);
58
- static void merge_sorted_blocks_direct(Container& orig, Container& temp, const uint32_t* levels, uint8_t starting_level, uint8_t num_levels);
59
- static void merge_sorted_blocks_reversed(Container& orig, Container& temp, const uint32_t* levels, uint8_t starting_level, uint8_t num_levels);
60
- static uint64_t pos_of_phi(double phi, uint64_t n);
61
-
62
- template<typename Comparator>
63
- struct compare_pair_by_first {
64
- template<typename Entry1, typename Entry2>
65
- bool operator()(Entry1&& a, Entry2&& b) const {
66
- return Comparator()(std::forward<Entry1>(a).first, std::forward<Entry2>(b).first);
67
- }
68
- };
69
- };
70
-
71
- } /* namespace datasketches */
72
-
73
- #include "kll_quantile_calculator_impl.hpp"
74
-
75
- #endif // KLL_QUANTILE_CALCULATOR_HPP_
@@ -1,184 +0,0 @@
1
- /*
2
- * Licensed to the Apache Software Foundation (ASF) under one
3
- * or more contributor license agreements. See the NOTICE file
4
- * distributed with this work for additional information
5
- * regarding copyright ownership. The ASF licenses this file
6
- * to you under the Apache License, Version 2.0 (the
7
- * "License"); you may not use this file except in compliance
8
- * with the License. You may obtain a copy of the License at
9
- *
10
- * http://www.apache.org/licenses/LICENSE-2.0
11
- *
12
- * Unless required by applicable law or agreed to in writing,
13
- * software distributed under the License is distributed on an
14
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15
- * KIND, either express or implied. See the License for the
16
- * specific language governing permissions and limitations
17
- * under the License.
18
- */
19
-
20
- #ifndef KLL_QUANTILE_CALCULATOR_IMPL_HPP_
21
- #define KLL_QUANTILE_CALCULATOR_IMPL_HPP_
22
-
23
- #include <memory>
24
- #include <cmath>
25
- #include <algorithm>
26
-
27
- #include "kll_helper.hpp"
28
-
29
- namespace datasketches {
30
-
31
- template<typename T, typename C, typename A>
32
- template<typename S>
33
- kll_quantile_calculator<T, C, A>::kll_quantile_calculator(const kll_sketch<T, C, S, A>& sketch):
34
- n_(sketch.n_), levels_(sketch.num_levels_ + 1, 0, sketch.allocator_), entries_(sketch.allocator_)
35
- {
36
- const uint32_t num_items = sketch.levels_[sketch.num_levels_] - sketch.levels_[0];
37
- if (num_items > 0) {
38
- entries_.reserve(num_items);
39
- populate_from_sketch(sketch.items_, sketch.levels_.data(), sketch.num_levels_);
40
- if (!sketch.is_level_zero_sorted_) std::sort(entries_.begin(), entries_.begin() + levels_[1], compare_pair_by_first<C>());
41
- merge_sorted_blocks(entries_, levels_.data(), static_cast<uint8_t>(levels_.size()) - 1, num_items);
42
- if (!is_sorted(entries_.begin(), entries_.end(), compare_pair_by_first<C>())) throw std::logic_error("entries must be sorted");
43
- convert_to_preceding_cummulative();
44
- }
45
- }
46
-
47
- template<typename T, typename C, typename A>
48
- T kll_quantile_calculator<T, C, A>::get_quantile(double fraction) const {
49
- return approximately_answer_positional_query(pos_of_phi(fraction, n_));
50
- }
51
-
52
- template<typename T, typename C, typename A>
53
- auto kll_quantile_calculator<T, C, A>::begin() const -> const_iterator {
54
- return entries_.begin();
55
- }
56
-
57
- template<typename T, typename C, typename A>
58
- auto kll_quantile_calculator<T, C, A>::end() const -> const_iterator {
59
- return entries_.end();
60
- }
61
-
62
- template<typename T, typename C, typename A>
63
- void kll_quantile_calculator<T, C, A>::populate_from_sketch(const T* items, const uint32_t* levels, uint8_t num_levels) {
64
- size_t src_level = 0;
65
- size_t dst_level = 0;
66
- uint64_t weight = 1;
67
- uint32_t offset = levels[0];
68
- while (src_level < num_levels) {
69
- const uint32_t from_index(levels[src_level] - offset);
70
- const uint32_t to_index(levels[src_level + 1] - offset); // exclusive
71
- if (from_index < to_index) { // skip empty levels
72
- for (uint32_t i = from_index; i < to_index; ++i) {
73
- entries_.push_back(Entry(items[i + offset], weight));
74
- }
75
- levels_[dst_level] = from_index;
76
- levels_[dst_level + 1] = to_index;
77
- dst_level++;
78
- }
79
- src_level++;
80
- weight *= 2;
81
- }
82
- if (levels_.size() > static_cast<size_t>(dst_level + 1)) levels_.resize(dst_level + 1);
83
- }
84
-
85
- template<typename T, typename C, typename A>
86
- T kll_quantile_calculator<T, C, A>::approximately_answer_positional_query(uint64_t pos) const {
87
- if (pos >= n_) throw std::logic_error("position out of range");
88
- const uint32_t num_items = levels_[levels_.size() - 1];
89
- if (pos > entries_[num_items - 1].second) return entries_[num_items - 1].first;
90
- const uint32_t index = chunk_containing_pos(pos);
91
- return entries_[index].first;
92
- }
93
-
94
- template<typename T, typename C, typename A>
95
- void kll_quantile_calculator<T, C, A>::convert_to_preceding_cummulative() {
96
- uint64_t subtotal = 0;
97
- for (auto& entry: entries_) {
98
- const uint64_t new_subtotal = subtotal + entry.second;
99
- entry.second = subtotal;
100
- subtotal = new_subtotal;
101
- }
102
- }
103
-
104
- template<typename T, typename C, typename A>
105
- uint64_t kll_quantile_calculator<T, C, A>::pos_of_phi(double phi, uint64_t n) {
106
- const uint64_t pos = static_cast<uint64_t>(std::floor(phi * n));
107
- return (pos == n) ? n - 1 : pos;
108
- }
109
-
110
- template<typename T, typename C, typename A>
111
- uint32_t kll_quantile_calculator<T, C, A>::chunk_containing_pos(uint64_t pos) const {
112
- if (entries_.size() < 1) throw std::logic_error("array too short");
113
- if (pos < entries_[0].second) throw std::logic_error("position too small");
114
- if (pos > entries_[entries_.size() - 1].second) throw std::logic_error("position too large");
115
- return search_for_chunk_containing_pos(pos, 0, entries_.size());
116
- }
117
-
118
- template<typename T, typename C, typename A>
119
- uint32_t kll_quantile_calculator<T, C, A>::search_for_chunk_containing_pos(uint64_t pos, uint64_t l, uint64_t r) const {
120
- if (l + 1 == r) {
121
- return static_cast<uint32_t>(l);
122
- }
123
- const uint64_t m = l + (r - l) / 2;
124
- if (entries_[m].second <= pos) {
125
- return search_for_chunk_containing_pos(pos, m, r);
126
- }
127
- return search_for_chunk_containing_pos(pos, l, m);
128
- }
129
-
130
- template<typename T, typename C, typename A>
131
- void kll_quantile_calculator<T, C, A>::merge_sorted_blocks(Container& entries, const uint32_t* levels, uint8_t num_levels, uint32_t num_items) {
132
- if (num_levels == 1) return;
133
- Container temporary(entries.get_allocator());
134
- temporary.reserve(num_items);
135
- merge_sorted_blocks_direct(entries, temporary, levels, 0, num_levels);
136
- }
137
-
138
- template<typename T, typename C, typename A>
139
- void kll_quantile_calculator<T, C, A>::merge_sorted_blocks_direct(Container& orig, Container& temp, const uint32_t* levels,
140
- uint8_t starting_level, uint8_t num_levels) {
141
- if (num_levels == 1) return;
142
- const uint8_t num_levels_1 = num_levels / 2;
143
- const uint8_t num_levels_2 = num_levels - num_levels_1;
144
- const uint8_t starting_level_1 = starting_level;
145
- const uint8_t starting_level_2 = starting_level + num_levels_1;
146
- const auto initial_size = temp.size();
147
- merge_sorted_blocks_reversed(orig, temp, levels, starting_level_1, num_levels_1);
148
- merge_sorted_blocks_reversed(orig, temp, levels, starting_level_2, num_levels_2);
149
- const uint32_t num_items_1 = levels[starting_level_1 + num_levels_1] - levels[starting_level_1];
150
- const auto chunk_begin = temp.begin() + initial_size;
151
- std::merge(
152
- std::make_move_iterator(chunk_begin), std::make_move_iterator(chunk_begin + num_items_1),
153
- std::make_move_iterator(chunk_begin + num_items_1), std::make_move_iterator(temp.end()),
154
- orig.begin() + levels[starting_level], compare_pair_by_first<C>()
155
- );
156
- temp.erase(chunk_begin, temp.end());
157
- }
158
-
159
- template<typename T, typename C, typename A>
160
- void kll_quantile_calculator<T, C, A>::merge_sorted_blocks_reversed(Container& orig, Container& temp, const uint32_t* levels,
161
- uint8_t starting_level, uint8_t num_levels) {
162
- if (num_levels == 1) {
163
- std::move(orig.begin() + levels[starting_level], orig.begin() + levels[starting_level + 1], std::back_inserter(temp));
164
- return;
165
- }
166
- const uint8_t num_levels_1 = num_levels / 2;
167
- const uint8_t num_levels_2 = num_levels - num_levels_1;
168
- const uint8_t starting_level_1 = starting_level;
169
- const uint8_t starting_level_2 = starting_level + num_levels_1;
170
- merge_sorted_blocks_direct(orig, temp, levels, starting_level_1, num_levels_1);
171
- merge_sorted_blocks_direct(orig, temp, levels, starting_level_2, num_levels_2);
172
- std::merge(
173
- std::make_move_iterator(orig.begin() + levels[starting_level_1]),
174
- std::make_move_iterator(orig.begin() + levels[starting_level_1 + num_levels_1]),
175
- std::make_move_iterator(orig.begin() + levels[starting_level_2]),
176
- std::make_move_iterator(orig.begin() + levels[starting_level_2 + num_levels_2]),
177
- std::back_inserter(temp),
178
- compare_pair_by_first<C>()
179
- );
180
- }
181
-
182
- } /* namespace datasketches */
183
-
184
- #endif // KLL_QUANTILE_CALCULATOR_IMPL_HPP_
@@ -1,69 +0,0 @@
1
- /*
2
- * Licensed to the Apache Software Foundation (ASF) under one
3
- * or more contributor license agreements. See the NOTICE file
4
- * distributed with this work for additional information
5
- * regarding copyright ownership. The ASF licenses this file
6
- * to you under the Apache License, Version 2.0 (the
7
- * "License"); you may not use this file except in compliance
8
- * with the License. You may obtain a copy of the License at
9
- *
10
- * http://www.apache.org/licenses/LICENSE-2.0
11
- *
12
- * Unless required by applicable law or agreed to in writing,
13
- * software distributed under the License is distributed on an
14
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15
- * KIND, either express or implied. See the License for the
16
- * specific language governing permissions and limitations
17
- * under the License.
18
- */
19
-
20
- #ifndef REQ_QUANTILE_CALCULATOR_HPP_
21
- #define REQ_QUANTILE_CALCULATOR_HPP_
22
-
23
- #include <functional>
24
-
25
- namespace datasketches {
26
-
27
- template<
28
- typename T,
29
- typename Comparator,
30
- typename Allocator
31
- >
32
- class req_quantile_calculator {
33
- public:
34
- req_quantile_calculator(uint64_t n, const Allocator& allocator);
35
-
36
- void add(const T* begin, const T* end, uint8_t lg_weight);
37
-
38
- template<bool inclusive>
39
- void convert_to_cummulative();
40
-
41
- const T* get_quantile(double rank) const;
42
-
43
- private:
44
- using Entry = std::pair<const T*, uint64_t>;
45
- using AllocEntry = typename std::allocator_traits<Allocator>::template rebind_alloc<Entry>;
46
- using Container = std::vector<Entry, AllocEntry>;
47
-
48
- template<typename C>
49
- struct compare_pairs_by_first_ptr {
50
- bool operator()(const Entry& a, const Entry& b) {
51
- return C()(*a.first, *b.first);
52
- }
53
- };
54
-
55
- struct compare_pairs_by_second {
56
- bool operator()(const Entry& a, const Entry& b) {
57
- return a.second < b.second;
58
- }
59
- };
60
-
61
- uint64_t n_;
62
- Container entries_;
63
- };
64
-
65
- } /* namespace datasketches */
66
-
67
- #include "req_quantile_calculator_impl.hpp"
68
-
69
- #endif
@@ -1,60 +0,0 @@
1
- /*
2
- * Licensed to the Apache Software Foundation (ASF) under one
3
- * or more contributor license agreements. See the NOTICE file
4
- * distributed with this work for additional information
5
- * regarding copyright ownership. The ASF licenses this file
6
- * to you under the Apache License, Version 2.0 (the
7
- * "License"); you may not use this file except in compliance
8
- * with the License. You may obtain a copy of the License at
9
- *
10
- * http://www.apache.org/licenses/LICENSE-2.0
11
- *
12
- * Unless required by applicable law or agreed to in writing,
13
- * software distributed under the License is distributed on an
14
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15
- * KIND, either express or implied. See the License for the
16
- * specific language governing permissions and limitations
17
- * under the License.
18
- */
19
-
20
- #ifndef REQ_QUANTILE_CALCULATOR_IMPL_HPP_
21
- #define REQ_QUANTILE_CALCULATOR_IMPL_HPP_
22
-
23
- namespace datasketches {
24
-
25
- template<typename T, typename C, typename A>
26
- req_quantile_calculator<T, C, A>::req_quantile_calculator(uint64_t n, const A& allocator):
27
- n_(n),
28
- entries_(allocator)
29
- {}
30
-
31
- template<typename T, typename C, typename A>
32
- void req_quantile_calculator<T, C, A>::add(const T* begin, const T* end, uint8_t lg_weight) {
33
- if (entries_.capacity() < entries_.size() + std::distance(begin, end)) entries_.reserve(entries_.size() + std::distance(begin, end));
34
- const size_t size_before = entries_.size();
35
- for (auto it = begin; it != end; ++it) entries_.push_back(Entry(it, 1 << lg_weight));
36
- if (size_before > 0) std::inplace_merge(entries_.begin(), entries_.begin() + size_before, entries_.end(), compare_pairs_by_first_ptr<C>());
37
- }
38
-
39
- template<typename T, typename C, typename A>
40
- template<bool inclusive>
41
- void req_quantile_calculator<T, C, A>::convert_to_cummulative() {
42
- uint64_t subtotal = 0;
43
- for (auto& entry: entries_) {
44
- const uint64_t new_subtotal = subtotal + entry.second;
45
- entry.second = inclusive ? new_subtotal : subtotal;
46
- subtotal = new_subtotal;
47
- }
48
- }
49
-
50
- template<typename T, typename C, typename A>
51
- const T* req_quantile_calculator<T, C, A>::get_quantile(double rank) const {
52
- uint64_t weight = static_cast<uint64_t>(rank * n_);
53
- auto it = std::lower_bound(entries_.begin(), entries_.end(), Entry(nullptr, weight), compare_pairs_by_second());
54
- if (it == entries_.end()) return entries_[entries_.size() - 1].first;
55
- return it->first;
56
- }
57
-
58
- } /* namespace datasketches */
59
-
60
- #endif