datasketches 0.2.4 → 0.2.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (106) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +4 -0
  3. data/README.md +1 -1
  4. data/ext/datasketches/kll_wrapper.cpp +5 -1
  5. data/lib/datasketches/version.rb +1 -1
  6. data/vendor/datasketches-cpp/CMakeLists.txt +4 -3
  7. data/vendor/datasketches-cpp/common/CMakeLists.txt +4 -0
  8. data/vendor/datasketches-cpp/common/include/binomial_bounds.hpp +1 -0
  9. data/vendor/datasketches-cpp/common/include/common_defs.hpp +14 -0
  10. data/vendor/datasketches-cpp/{kll → common}/include/kolmogorov_smirnov.hpp +5 -3
  11. data/vendor/datasketches-cpp/{kll → common}/include/kolmogorov_smirnov_impl.hpp +13 -16
  12. data/vendor/datasketches-cpp/common/include/quantile_sketch_sorted_view.hpp +121 -0
  13. data/vendor/datasketches-cpp/common/include/quantile_sketch_sorted_view_impl.hpp +91 -0
  14. data/vendor/datasketches-cpp/common/test/test_type.hpp +2 -0
  15. data/vendor/datasketches-cpp/cpc/include/cpc_compressor_impl.hpp +1 -0
  16. data/vendor/datasketches-cpp/cpc/include/cpc_confidence.hpp +1 -0
  17. data/vendor/datasketches-cpp/cpc/include/cpc_union_impl.hpp +2 -0
  18. data/vendor/datasketches-cpp/cpc/test/cpc_sketch_test.cpp +1 -0
  19. data/vendor/datasketches-cpp/cpc/test/cpc_union_test.cpp +2 -0
  20. data/vendor/datasketches-cpp/fi/include/frequent_items_sketch.hpp +37 -5
  21. data/vendor/datasketches-cpp/fi/include/frequent_items_sketch_impl.hpp +25 -9
  22. data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_custom_type_test.cpp +2 -1
  23. data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_test.cpp +1 -0
  24. data/vendor/datasketches-cpp/hll/include/AuxHashMap-internal.hpp +2 -0
  25. data/vendor/datasketches-cpp/hll/include/CompositeInterpolationXTable-internal.hpp +1 -0
  26. data/vendor/datasketches-cpp/hll/include/CouponHashSet-internal.hpp +2 -2
  27. data/vendor/datasketches-cpp/hll/include/CouponList-internal.hpp +1 -0
  28. data/vendor/datasketches-cpp/hll/include/HllSketchImpl-internal.hpp +2 -0
  29. data/vendor/datasketches-cpp/hll/include/HllSketchImplFactory.hpp +2 -0
  30. data/vendor/datasketches-cpp/hll/test/AuxHashMapTest.cpp +1 -0
  31. data/vendor/datasketches-cpp/hll/test/CouponHashSetTest.cpp +1 -0
  32. data/vendor/datasketches-cpp/hll/test/CouponListTest.cpp +2 -0
  33. data/vendor/datasketches-cpp/hll/test/HllArrayTest.cpp +1 -0
  34. data/vendor/datasketches-cpp/hll/test/HllSketchTest.cpp +59 -0
  35. data/vendor/datasketches-cpp/hll/test/HllUnionTest.cpp +2 -0
  36. data/vendor/datasketches-cpp/hll/test/TablesTest.cpp +1 -0
  37. data/vendor/datasketches-cpp/kll/CMakeLists.txt +0 -4
  38. data/vendor/datasketches-cpp/kll/include/kll_helper.hpp +0 -4
  39. data/vendor/datasketches-cpp/kll/include/kll_helper_impl.hpp +3 -0
  40. data/vendor/datasketches-cpp/kll/include/kll_sketch.hpp +96 -42
  41. data/vendor/datasketches-cpp/kll/include/kll_sketch_impl.hpp +105 -127
  42. data/vendor/datasketches-cpp/kll/test/kll_sketch_test.cpp +94 -25
  43. data/vendor/datasketches-cpp/kll/test/kolmogorov_smirnov_test.cpp +1 -1
  44. data/vendor/datasketches-cpp/pyproject.toml +1 -1
  45. data/vendor/datasketches-cpp/python/CMakeLists.txt +8 -1
  46. data/vendor/datasketches-cpp/python/README.md +7 -0
  47. data/vendor/datasketches-cpp/python/src/datasketches.cpp +4 -0
  48. data/vendor/datasketches-cpp/python/src/fi_wrapper.cpp +6 -1
  49. data/vendor/datasketches-cpp/python/src/kll_wrapper.cpp +48 -13
  50. data/vendor/datasketches-cpp/python/src/ks_wrapper.cpp +68 -0
  51. data/vendor/datasketches-cpp/python/src/quantiles_wrapper.cpp +240 -0
  52. data/vendor/datasketches-cpp/python/src/req_wrapper.cpp +9 -2
  53. data/vendor/datasketches-cpp/python/src/vector_of_kll.cpp +1 -0
  54. data/vendor/datasketches-cpp/python/tests/kll_test.py +10 -4
  55. data/vendor/datasketches-cpp/python/tests/quantiles_test.py +126 -0
  56. data/vendor/datasketches-cpp/quantiles/CMakeLists.txt +42 -0
  57. data/vendor/datasketches-cpp/quantiles/include/quantiles_sketch.hpp +641 -0
  58. data/vendor/datasketches-cpp/quantiles/include/quantiles_sketch_impl.hpp +1309 -0
  59. data/vendor/datasketches-cpp/quantiles/test/CMakeLists.txt +44 -0
  60. data/vendor/datasketches-cpp/quantiles/test/Qk128_n1000_v0.3.0.sk +0 -0
  61. data/vendor/datasketches-cpp/quantiles/test/Qk128_n1000_v0.6.0.sk +0 -0
  62. data/vendor/datasketches-cpp/quantiles/test/Qk128_n1000_v0.8.0.sk +0 -0
  63. data/vendor/datasketches-cpp/quantiles/test/Qk128_n1000_v0.8.3.sk +0 -0
  64. data/vendor/datasketches-cpp/quantiles/test/Qk128_n50_v0.3.0.sk +0 -0
  65. data/vendor/datasketches-cpp/quantiles/test/Qk128_n50_v0.6.0.sk +0 -0
  66. data/vendor/datasketches-cpp/quantiles/test/Qk128_n50_v0.8.0.sk +0 -0
  67. data/vendor/datasketches-cpp/quantiles/test/Qk128_n50_v0.8.3.sk +0 -0
  68. data/vendor/datasketches-cpp/quantiles/test/kolmogorov_smirnov_test.cpp +110 -0
  69. data/vendor/datasketches-cpp/quantiles/test/quantiles_compatibility_test.cpp +129 -0
  70. data/vendor/datasketches-cpp/quantiles/test/quantiles_sketch_test.cpp +912 -0
  71. data/vendor/datasketches-cpp/req/CMakeLists.txt +0 -2
  72. data/vendor/datasketches-cpp/req/include/req_common.hpp +0 -5
  73. data/vendor/datasketches-cpp/req/include/req_compactor_impl.hpp +3 -2
  74. data/vendor/datasketches-cpp/req/include/req_sketch.hpp +62 -23
  75. data/vendor/datasketches-cpp/req/include/req_sketch_impl.hpp +62 -59
  76. data/vendor/datasketches-cpp/req/test/req_sketch_test.cpp +5 -0
  77. data/vendor/datasketches-cpp/sampling/include/var_opt_sketch.hpp +44 -7
  78. data/vendor/datasketches-cpp/sampling/include/var_opt_sketch_impl.hpp +31 -26
  79. data/vendor/datasketches-cpp/sampling/include/var_opt_union.hpp +41 -6
  80. data/vendor/datasketches-cpp/sampling/include/var_opt_union_impl.hpp +25 -9
  81. data/vendor/datasketches-cpp/sampling/test/var_opt_allocation_test.cpp +2 -2
  82. data/vendor/datasketches-cpp/sampling/test/var_opt_sketch_test.cpp +1 -0
  83. data/vendor/datasketches-cpp/sampling/test/var_opt_union_test.cpp +1 -0
  84. data/vendor/datasketches-cpp/setup.py +1 -1
  85. data/vendor/datasketches-cpp/theta/include/bounds_on_ratios_in_sampled_sets.hpp +1 -0
  86. data/vendor/datasketches-cpp/theta/include/compact_theta_sketch_parser_impl.hpp +8 -6
  87. data/vendor/datasketches-cpp/theta/include/theta_intersection_base_impl.hpp +1 -0
  88. data/vendor/datasketches-cpp/theta/include/theta_set_difference_base_impl.hpp +1 -0
  89. data/vendor/datasketches-cpp/theta/include/theta_sketch_impl.hpp +7 -45
  90. data/vendor/datasketches-cpp/theta/include/theta_union_base_impl.hpp +1 -0
  91. data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base_impl.hpp +1 -0
  92. data/vendor/datasketches-cpp/theta/test/theta_a_not_b_test.cpp +2 -0
  93. data/vendor/datasketches-cpp/theta/test/theta_intersection_test.cpp +2 -0
  94. data/vendor/datasketches-cpp/theta/test/theta_setop_test.cpp +1 -0
  95. data/vendor/datasketches-cpp/theta/test/theta_sketch_test.cpp +29 -1
  96. data/vendor/datasketches-cpp/theta/test/theta_union_test.cpp +2 -0
  97. data/vendor/datasketches-cpp/tuple/include/tuple_sketch.hpp +16 -0
  98. data/vendor/datasketches-cpp/tuple/include/tuple_sketch_impl.hpp +1 -0
  99. data/vendor/datasketches-cpp/tuple/test/tuple_a_not_b_test.cpp +1 -0
  100. data/vendor/datasketches-cpp/tuple/test/tuple_intersection_test.cpp +1 -0
  101. data/vendor/datasketches-cpp/tuple/test/tuple_union_test.cpp +1 -0
  102. metadata +25 -9
  103. data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator.hpp +0 -75
  104. data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator_impl.hpp +0 -184
  105. data/vendor/datasketches-cpp/req/include/req_quantile_calculator.hpp +0 -69
  106. data/vendor/datasketches-cpp/req/include/req_quantile_calculator_impl.hpp +0 -60
@@ -21,6 +21,7 @@
21
21
  #include <fstream>
22
22
  #include <sstream>
23
23
  #include <vector>
24
+ #include <stdexcept>
24
25
 
25
26
  #include <catch.hpp>
26
27
  #include <theta_sketch.hpp>
@@ -394,7 +395,13 @@ TEST_CASE("theta sketch: serialize deserialize stream and bytes equivalence", "[
394
395
  }
395
396
  }
396
397
 
397
- TEST_CASE("theta sketch: deserialize compact single item buffer overrun", "[theta_sketch]") {
398
+ TEST_CASE("theta sketch: deserialize empty buffer overrun", "[theta_sketch]") {
399
+ update_theta_sketch update_sketch = update_theta_sketch::builder().build();
400
+ auto bytes = update_sketch.compact().serialize();
401
+ REQUIRE_THROWS_AS(compact_theta_sketch::deserialize(bytes.data(), bytes.size() - 1), std::out_of_range);
402
+ }
403
+
404
+ TEST_CASE("theta sketch: deserialize single item buffer overrun", "[theta_sketch]") {
398
405
  update_theta_sketch update_sketch = update_theta_sketch::builder().build();
399
406
  update_sketch.update(1);
400
407
  auto bytes = update_sketch.compact().serialize();
@@ -402,6 +409,27 @@ TEST_CASE("theta sketch: deserialize compact single item buffer overrun", "[thet
402
409
  REQUIRE_THROWS_AS(compact_theta_sketch::deserialize(bytes.data(), bytes.size() - 1), std::out_of_range);
403
410
  }
404
411
 
412
+ TEST_CASE("theta sketch: deserialize exact mode buffer overrun", "[theta_sketch]") {
413
+ update_theta_sketch update_sketch = update_theta_sketch::builder().build();
414
+ for (int i = 0; i < 1000; ++i) update_sketch.update(i);
415
+ auto bytes = update_sketch.compact().serialize();
416
+ REQUIRE_THROWS_AS(compact_theta_sketch::deserialize(bytes.data(), 7), std::out_of_range);
417
+ REQUIRE_THROWS_AS(compact_theta_sketch::deserialize(bytes.data(), 8), std::out_of_range);
418
+ REQUIRE_THROWS_AS(compact_theta_sketch::deserialize(bytes.data(), 16), std::out_of_range);
419
+ REQUIRE_THROWS_AS(compact_theta_sketch::deserialize(bytes.data(), bytes.size() - 1), std::out_of_range);
420
+ }
421
+
422
+ TEST_CASE("theta sketch: deserialize estimation mode buffer overrun", "[theta_sketch]") {
423
+ update_theta_sketch update_sketch = update_theta_sketch::builder().build();
424
+ for (int i = 0; i < 10000; ++i) update_sketch.update(i);
425
+ auto bytes = update_sketch.compact().serialize();
426
+ REQUIRE_THROWS_AS(compact_theta_sketch::deserialize(bytes.data(), 7), std::out_of_range);
427
+ REQUIRE_THROWS_AS(compact_theta_sketch::deserialize(bytes.data(), 8), std::out_of_range);
428
+ REQUIRE_THROWS_AS(compact_theta_sketch::deserialize(bytes.data(), 16), std::out_of_range);
429
+ REQUIRE_THROWS_AS(compact_theta_sketch::deserialize(bytes.data(), 24), std::out_of_range);
430
+ REQUIRE_THROWS_AS(compact_theta_sketch::deserialize(bytes.data(), bytes.size() - 1), std::out_of_range);
431
+ }
432
+
405
433
  TEST_CASE("theta sketch: conversion constructor and wrapped compact", "[theta_sketch]") {
406
434
  update_theta_sketch update_sketch = update_theta_sketch::builder().build();
407
435
  const int n = 8192;
@@ -21,6 +21,8 @@
21
21
 
22
22
  #include <theta_union.hpp>
23
23
 
24
+ #include <stdexcept>
25
+
24
26
  namespace datasketches {
25
27
 
26
28
  TEST_CASE("theta union: empty", "[theta_union]") {
@@ -398,9 +398,23 @@ public:
398
398
  virtual uint32_t get_num_retained() const;
399
399
  virtual uint16_t get_seed_hash() const;
400
400
 
401
+ /**
402
+ * This method serializes the sketch into a given stream in a binary form
403
+ * @param os output stream
404
+ * @param instance of a SerDe
405
+ */
401
406
  template<typename SerDe = serde<Summary>>
402
407
  void serialize(std::ostream& os, const SerDe& sd = SerDe()) const;
403
408
 
409
+ /**
410
+ * This method serializes the sketch as a vector of bytes.
411
+ * An optional header can be reserved in front of the sketch.
412
+ * It is a blank space of a given size.
413
+ * This header is used in Datasketches PostgreSQL extension.
414
+ * @param header_size_bytes space to reserve in front of the sketch
415
+ * @param instance of a SerDe
416
+ * @return serialized sketch as a vector of bytes
417
+ */
404
418
  template<typename SerDe = serde<Summary>>
405
419
  vector_bytes serialize(unsigned header_size_bytes = 0, const SerDe& sd = SerDe()) const;
406
420
 
@@ -414,6 +428,7 @@ public:
414
428
  * @param is input stream
415
429
  * @param seed the seed for the hash function that was used to create the sketch
416
430
  * @param instance of a SerDe
431
+ * @param instance of an Allocator
417
432
  * @return an instance of a sketch
418
433
  */
419
434
  template<typename SerDe = serde<Summary>>
@@ -426,6 +441,7 @@ public:
426
441
  * @param size the size of the array
427
442
  * @param seed the seed for the hash function that was used to create the sketch
428
443
  * @param instance of a SerDe
444
+ * @param instance of an Allocator
429
445
  * @return an instance of the sketch
430
446
  */
431
447
  template<typename SerDe = serde<Summary>>
@@ -18,6 +18,7 @@
18
18
  */
19
19
 
20
20
  #include <sstream>
21
+ #include <stdexcept>
21
22
 
22
23
  #include "binomial_bounds.hpp"
23
24
  #include "theta_helpers.hpp"
@@ -22,6 +22,7 @@
22
22
  #include <catch.hpp>
23
23
  #include <tuple_a_not_b.hpp>
24
24
  #include <theta_sketch.hpp>
25
+ #include <stdexcept>
25
26
 
26
27
  namespace datasketches {
27
28
 
@@ -22,6 +22,7 @@
22
22
  #include <catch.hpp>
23
23
  #include <tuple_intersection.hpp>
24
24
  #include <theta_sketch.hpp>
25
+ #include <stdexcept>
25
26
 
26
27
  namespace datasketches {
27
28
 
@@ -18,6 +18,7 @@
18
18
  */
19
19
 
20
20
  #include <iostream>
21
+ #include <stdexcept>
21
22
 
22
23
  #include <catch.hpp>
23
24
  #include <tuple_union.hpp>
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: datasketches
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.4
4
+ version: 0.2.5
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andrew Kane
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2021-12-28 00:00:00.000000000 Z
11
+ date: 2022-05-21 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rice
@@ -62,7 +62,11 @@ files:
62
62
  - vendor/datasketches-cpp/common/include/conditional_forward.hpp
63
63
  - vendor/datasketches-cpp/common/include/count_zeros.hpp
64
64
  - vendor/datasketches-cpp/common/include/inv_pow2_table.hpp
65
+ - vendor/datasketches-cpp/common/include/kolmogorov_smirnov.hpp
66
+ - vendor/datasketches-cpp/common/include/kolmogorov_smirnov_impl.hpp
65
67
  - vendor/datasketches-cpp/common/include/memory_operations.hpp
68
+ - vendor/datasketches-cpp/common/include/quantile_sketch_sorted_view.hpp
69
+ - vendor/datasketches-cpp/common/include/quantile_sketch_sorted_view_impl.hpp
66
70
  - vendor/datasketches-cpp/common/include/serde.hpp
67
71
  - vendor/datasketches-cpp/common/test/CMakeLists.txt
68
72
  - vendor/datasketches-cpp/common/test/catch.hpp
@@ -157,12 +161,8 @@ files:
157
161
  - vendor/datasketches-cpp/kll/CMakeLists.txt
158
162
  - vendor/datasketches-cpp/kll/include/kll_helper.hpp
159
163
  - vendor/datasketches-cpp/kll/include/kll_helper_impl.hpp
160
- - vendor/datasketches-cpp/kll/include/kll_quantile_calculator.hpp
161
- - vendor/datasketches-cpp/kll/include/kll_quantile_calculator_impl.hpp
162
164
  - vendor/datasketches-cpp/kll/include/kll_sketch.hpp
163
165
  - vendor/datasketches-cpp/kll/include/kll_sketch_impl.hpp
164
- - vendor/datasketches-cpp/kll/include/kolmogorov_smirnov.hpp
165
- - vendor/datasketches-cpp/kll/include/kolmogorov_smirnov_impl.hpp
166
166
  - vendor/datasketches-cpp/kll/test/CMakeLists.txt
167
167
  - vendor/datasketches-cpp/kll/test/kll_sketch_custom_type_test.cpp
168
168
  - vendor/datasketches-cpp/kll/test/kll_sketch_float_one_item_v1.sk
@@ -185,6 +185,8 @@ files:
185
185
  - vendor/datasketches-cpp/python/src/fi_wrapper.cpp
186
186
  - vendor/datasketches-cpp/python/src/hll_wrapper.cpp
187
187
  - vendor/datasketches-cpp/python/src/kll_wrapper.cpp
188
+ - vendor/datasketches-cpp/python/src/ks_wrapper.cpp
189
+ - vendor/datasketches-cpp/python/src/quantiles_wrapper.cpp
188
190
  - vendor/datasketches-cpp/python/src/req_wrapper.cpp
189
191
  - vendor/datasketches-cpp/python/src/theta_wrapper.cpp
190
192
  - vendor/datasketches-cpp/python/src/vector_of_kll.cpp
@@ -194,16 +196,30 @@ files:
194
196
  - vendor/datasketches-cpp/python/tests/fi_test.py
195
197
  - vendor/datasketches-cpp/python/tests/hll_test.py
196
198
  - vendor/datasketches-cpp/python/tests/kll_test.py
199
+ - vendor/datasketches-cpp/python/tests/quantiles_test.py
197
200
  - vendor/datasketches-cpp/python/tests/req_test.py
198
201
  - vendor/datasketches-cpp/python/tests/theta_test.py
199
202
  - vendor/datasketches-cpp/python/tests/vector_of_kll_test.py
200
203
  - vendor/datasketches-cpp/python/tests/vo_test.py
204
+ - vendor/datasketches-cpp/quantiles/CMakeLists.txt
205
+ - vendor/datasketches-cpp/quantiles/include/quantiles_sketch.hpp
206
+ - vendor/datasketches-cpp/quantiles/include/quantiles_sketch_impl.hpp
207
+ - vendor/datasketches-cpp/quantiles/test/CMakeLists.txt
208
+ - vendor/datasketches-cpp/quantiles/test/Qk128_n1000_v0.3.0.sk
209
+ - vendor/datasketches-cpp/quantiles/test/Qk128_n1000_v0.6.0.sk
210
+ - vendor/datasketches-cpp/quantiles/test/Qk128_n1000_v0.8.0.sk
211
+ - vendor/datasketches-cpp/quantiles/test/Qk128_n1000_v0.8.3.sk
212
+ - vendor/datasketches-cpp/quantiles/test/Qk128_n50_v0.3.0.sk
213
+ - vendor/datasketches-cpp/quantiles/test/Qk128_n50_v0.6.0.sk
214
+ - vendor/datasketches-cpp/quantiles/test/Qk128_n50_v0.8.0.sk
215
+ - vendor/datasketches-cpp/quantiles/test/Qk128_n50_v0.8.3.sk
216
+ - vendor/datasketches-cpp/quantiles/test/kolmogorov_smirnov_test.cpp
217
+ - vendor/datasketches-cpp/quantiles/test/quantiles_compatibility_test.cpp
218
+ - vendor/datasketches-cpp/quantiles/test/quantiles_sketch_test.cpp
201
219
  - vendor/datasketches-cpp/req/CMakeLists.txt
202
220
  - vendor/datasketches-cpp/req/include/req_common.hpp
203
221
  - vendor/datasketches-cpp/req/include/req_compactor.hpp
204
222
  - vendor/datasketches-cpp/req/include/req_compactor_impl.hpp
205
- - vendor/datasketches-cpp/req/include/req_quantile_calculator.hpp
206
- - vendor/datasketches-cpp/req/include/req_quantile_calculator_impl.hpp
207
223
  - vendor/datasketches-cpp/req/include/req_sketch.hpp
208
224
  - vendor/datasketches-cpp/req/include/req_sketch_impl.hpp
209
225
  - vendor/datasketches-cpp/req/test/CMakeLists.txt
@@ -319,7 +335,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
319
335
  - !ruby/object:Gem::Version
320
336
  version: '0'
321
337
  requirements: []
322
- rubygems_version: 3.3.3
338
+ rubygems_version: 3.3.7
323
339
  signing_key:
324
340
  specification_version: 4
325
341
  summary: Sketch data structures for Ruby
@@ -1,75 +0,0 @@
1
- /*
2
- * Licensed to the Apache Software Foundation (ASF) under one
3
- * or more contributor license agreements. See the NOTICE file
4
- * distributed with this work for additional information
5
- * regarding copyright ownership. The ASF licenses this file
6
- * to you under the Apache License, Version 2.0 (the
7
- * "License"); you may not use this file except in compliance
8
- * with the License. You may obtain a copy of the License at
9
- *
10
- * http://www.apache.org/licenses/LICENSE-2.0
11
- *
12
- * Unless required by applicable law or agreed to in writing,
13
- * software distributed under the License is distributed on an
14
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15
- * KIND, either express or implied. See the License for the
16
- * specific language governing permissions and limitations
17
- * under the License.
18
- */
19
-
20
- #ifndef KLL_QUANTILE_CALCULATOR_HPP_
21
- #define KLL_QUANTILE_CALCULATOR_HPP_
22
-
23
- #include <memory>
24
-
25
- namespace datasketches {
26
-
27
- // forward declaration
28
- template<typename T, typename C, typename S, typename A> class kll_sketch;
29
-
30
- template <typename T, typename C, typename A>
31
- class kll_quantile_calculator {
32
- public:
33
- using Entry = std::pair<T, uint64_t>;
34
- using AllocEntry = typename std::allocator_traits<A>::template rebind_alloc<Entry>;
35
- using Container = std::vector<Entry, AllocEntry>;
36
- using const_iterator = typename Container::const_iterator;
37
-
38
- template<typename S>
39
- kll_quantile_calculator(const kll_sketch<T, C, S, A>& sketch);
40
-
41
- T get_quantile(double fraction) const;
42
- const_iterator begin() const;
43
- const_iterator end() const;
44
-
45
- private:
46
- using AllocU32 = typename std::allocator_traits<A>::template rebind_alloc<uint32_t>;
47
- using vector_u32 = std::vector<uint32_t, AllocU32>;
48
- uint64_t n_;
49
- vector_u32 levels_;
50
- Container entries_;
51
-
52
- void populate_from_sketch(const T* items, const uint32_t* levels, uint8_t num_levels);
53
- T approximately_answer_positional_query(uint64_t pos) const;
54
- void convert_to_preceding_cummulative();
55
- uint32_t chunk_containing_pos(uint64_t pos) const;
56
- uint32_t search_for_chunk_containing_pos(uint64_t pos, uint64_t l, uint64_t r) const;
57
- static void merge_sorted_blocks(Container& entries, const uint32_t* levels, uint8_t num_levels, uint32_t num_items);
58
- static void merge_sorted_blocks_direct(Container& orig, Container& temp, const uint32_t* levels, uint8_t starting_level, uint8_t num_levels);
59
- static void merge_sorted_blocks_reversed(Container& orig, Container& temp, const uint32_t* levels, uint8_t starting_level, uint8_t num_levels);
60
- static uint64_t pos_of_phi(double phi, uint64_t n);
61
-
62
- template<typename Comparator>
63
- struct compare_pair_by_first {
64
- template<typename Entry1, typename Entry2>
65
- bool operator()(Entry1&& a, Entry2&& b) const {
66
- return Comparator()(std::forward<Entry1>(a).first, std::forward<Entry2>(b).first);
67
- }
68
- };
69
- };
70
-
71
- } /* namespace datasketches */
72
-
73
- #include "kll_quantile_calculator_impl.hpp"
74
-
75
- #endif // KLL_QUANTILE_CALCULATOR_HPP_
@@ -1,184 +0,0 @@
1
- /*
2
- * Licensed to the Apache Software Foundation (ASF) under one
3
- * or more contributor license agreements. See the NOTICE file
4
- * distributed with this work for additional information
5
- * regarding copyright ownership. The ASF licenses this file
6
- * to you under the Apache License, Version 2.0 (the
7
- * "License"); you may not use this file except in compliance
8
- * with the License. You may obtain a copy of the License at
9
- *
10
- * http://www.apache.org/licenses/LICENSE-2.0
11
- *
12
- * Unless required by applicable law or agreed to in writing,
13
- * software distributed under the License is distributed on an
14
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15
- * KIND, either express or implied. See the License for the
16
- * specific language governing permissions and limitations
17
- * under the License.
18
- */
19
-
20
- #ifndef KLL_QUANTILE_CALCULATOR_IMPL_HPP_
21
- #define KLL_QUANTILE_CALCULATOR_IMPL_HPP_
22
-
23
- #include <memory>
24
- #include <cmath>
25
- #include <algorithm>
26
-
27
- #include "kll_helper.hpp"
28
-
29
- namespace datasketches {
30
-
31
- template<typename T, typename C, typename A>
32
- template<typename S>
33
- kll_quantile_calculator<T, C, A>::kll_quantile_calculator(const kll_sketch<T, C, S, A>& sketch):
34
- n_(sketch.n_), levels_(sketch.num_levels_ + 1, 0, sketch.allocator_), entries_(sketch.allocator_)
35
- {
36
- const uint32_t num_items = sketch.levels_[sketch.num_levels_] - sketch.levels_[0];
37
- if (num_items > 0) {
38
- entries_.reserve(num_items);
39
- populate_from_sketch(sketch.items_, sketch.levels_.data(), sketch.num_levels_);
40
- if (!sketch.is_level_zero_sorted_) std::sort(entries_.begin(), entries_.begin() + levels_[1], compare_pair_by_first<C>());
41
- merge_sorted_blocks(entries_, levels_.data(), static_cast<uint8_t>(levels_.size()) - 1, num_items);
42
- if (!is_sorted(entries_.begin(), entries_.end(), compare_pair_by_first<C>())) throw std::logic_error("entries must be sorted");
43
- convert_to_preceding_cummulative();
44
- }
45
- }
46
-
47
- template<typename T, typename C, typename A>
48
- T kll_quantile_calculator<T, C, A>::get_quantile(double fraction) const {
49
- return approximately_answer_positional_query(pos_of_phi(fraction, n_));
50
- }
51
-
52
- template<typename T, typename C, typename A>
53
- auto kll_quantile_calculator<T, C, A>::begin() const -> const_iterator {
54
- return entries_.begin();
55
- }
56
-
57
- template<typename T, typename C, typename A>
58
- auto kll_quantile_calculator<T, C, A>::end() const -> const_iterator {
59
- return entries_.end();
60
- }
61
-
62
- template<typename T, typename C, typename A>
63
- void kll_quantile_calculator<T, C, A>::populate_from_sketch(const T* items, const uint32_t* levels, uint8_t num_levels) {
64
- size_t src_level = 0;
65
- size_t dst_level = 0;
66
- uint64_t weight = 1;
67
- uint32_t offset = levels[0];
68
- while (src_level < num_levels) {
69
- const uint32_t from_index(levels[src_level] - offset);
70
- const uint32_t to_index(levels[src_level + 1] - offset); // exclusive
71
- if (from_index < to_index) { // skip empty levels
72
- for (uint32_t i = from_index; i < to_index; ++i) {
73
- entries_.push_back(Entry(items[i + offset], weight));
74
- }
75
- levels_[dst_level] = from_index;
76
- levels_[dst_level + 1] = to_index;
77
- dst_level++;
78
- }
79
- src_level++;
80
- weight *= 2;
81
- }
82
- if (levels_.size() > static_cast<size_t>(dst_level + 1)) levels_.resize(dst_level + 1);
83
- }
84
-
85
- template<typename T, typename C, typename A>
86
- T kll_quantile_calculator<T, C, A>::approximately_answer_positional_query(uint64_t pos) const {
87
- if (pos >= n_) throw std::logic_error("position out of range");
88
- const uint32_t num_items = levels_[levels_.size() - 1];
89
- if (pos > entries_[num_items - 1].second) return entries_[num_items - 1].first;
90
- const uint32_t index = chunk_containing_pos(pos);
91
- return entries_[index].first;
92
- }
93
-
94
- template<typename T, typename C, typename A>
95
- void kll_quantile_calculator<T, C, A>::convert_to_preceding_cummulative() {
96
- uint64_t subtotal = 0;
97
- for (auto& entry: entries_) {
98
- const uint64_t new_subtotal = subtotal + entry.second;
99
- entry.second = subtotal;
100
- subtotal = new_subtotal;
101
- }
102
- }
103
-
104
- template<typename T, typename C, typename A>
105
- uint64_t kll_quantile_calculator<T, C, A>::pos_of_phi(double phi, uint64_t n) {
106
- const uint64_t pos = static_cast<uint64_t>(std::floor(phi * n));
107
- return (pos == n) ? n - 1 : pos;
108
- }
109
-
110
- template<typename T, typename C, typename A>
111
- uint32_t kll_quantile_calculator<T, C, A>::chunk_containing_pos(uint64_t pos) const {
112
- if (entries_.size() < 1) throw std::logic_error("array too short");
113
- if (pos < entries_[0].second) throw std::logic_error("position too small");
114
- if (pos > entries_[entries_.size() - 1].second) throw std::logic_error("position too large");
115
- return search_for_chunk_containing_pos(pos, 0, entries_.size());
116
- }
117
-
118
- template<typename T, typename C, typename A>
119
- uint32_t kll_quantile_calculator<T, C, A>::search_for_chunk_containing_pos(uint64_t pos, uint64_t l, uint64_t r) const {
120
- if (l + 1 == r) {
121
- return static_cast<uint32_t>(l);
122
- }
123
- const uint64_t m = l + (r - l) / 2;
124
- if (entries_[m].second <= pos) {
125
- return search_for_chunk_containing_pos(pos, m, r);
126
- }
127
- return search_for_chunk_containing_pos(pos, l, m);
128
- }
129
-
130
- template<typename T, typename C, typename A>
131
- void kll_quantile_calculator<T, C, A>::merge_sorted_blocks(Container& entries, const uint32_t* levels, uint8_t num_levels, uint32_t num_items) {
132
- if (num_levels == 1) return;
133
- Container temporary(entries.get_allocator());
134
- temporary.reserve(num_items);
135
- merge_sorted_blocks_direct(entries, temporary, levels, 0, num_levels);
136
- }
137
-
138
- template<typename T, typename C, typename A>
139
- void kll_quantile_calculator<T, C, A>::merge_sorted_blocks_direct(Container& orig, Container& temp, const uint32_t* levels,
140
- uint8_t starting_level, uint8_t num_levels) {
141
- if (num_levels == 1) return;
142
- const uint8_t num_levels_1 = num_levels / 2;
143
- const uint8_t num_levels_2 = num_levels - num_levels_1;
144
- const uint8_t starting_level_1 = starting_level;
145
- const uint8_t starting_level_2 = starting_level + num_levels_1;
146
- const auto initial_size = temp.size();
147
- merge_sorted_blocks_reversed(orig, temp, levels, starting_level_1, num_levels_1);
148
- merge_sorted_blocks_reversed(orig, temp, levels, starting_level_2, num_levels_2);
149
- const uint32_t num_items_1 = levels[starting_level_1 + num_levels_1] - levels[starting_level_1];
150
- const auto chunk_begin = temp.begin() + initial_size;
151
- std::merge(
152
- std::make_move_iterator(chunk_begin), std::make_move_iterator(chunk_begin + num_items_1),
153
- std::make_move_iterator(chunk_begin + num_items_1), std::make_move_iterator(temp.end()),
154
- orig.begin() + levels[starting_level], compare_pair_by_first<C>()
155
- );
156
- temp.erase(chunk_begin, temp.end());
157
- }
158
-
159
- template<typename T, typename C, typename A>
160
- void kll_quantile_calculator<T, C, A>::merge_sorted_blocks_reversed(Container& orig, Container& temp, const uint32_t* levels,
161
- uint8_t starting_level, uint8_t num_levels) {
162
- if (num_levels == 1) {
163
- std::move(orig.begin() + levels[starting_level], orig.begin() + levels[starting_level + 1], std::back_inserter(temp));
164
- return;
165
- }
166
- const uint8_t num_levels_1 = num_levels / 2;
167
- const uint8_t num_levels_2 = num_levels - num_levels_1;
168
- const uint8_t starting_level_1 = starting_level;
169
- const uint8_t starting_level_2 = starting_level + num_levels_1;
170
- merge_sorted_blocks_direct(orig, temp, levels, starting_level_1, num_levels_1);
171
- merge_sorted_blocks_direct(orig, temp, levels, starting_level_2, num_levels_2);
172
- std::merge(
173
- std::make_move_iterator(orig.begin() + levels[starting_level_1]),
174
- std::make_move_iterator(orig.begin() + levels[starting_level_1 + num_levels_1]),
175
- std::make_move_iterator(orig.begin() + levels[starting_level_2]),
176
- std::make_move_iterator(orig.begin() + levels[starting_level_2 + num_levels_2]),
177
- std::back_inserter(temp),
178
- compare_pair_by_first<C>()
179
- );
180
- }
181
-
182
- } /* namespace datasketches */
183
-
184
- #endif // KLL_QUANTILE_CALCULATOR_IMPL_HPP_
@@ -1,69 +0,0 @@
1
- /*
2
- * Licensed to the Apache Software Foundation (ASF) under one
3
- * or more contributor license agreements. See the NOTICE file
4
- * distributed with this work for additional information
5
- * regarding copyright ownership. The ASF licenses this file
6
- * to you under the Apache License, Version 2.0 (the
7
- * "License"); you may not use this file except in compliance
8
- * with the License. You may obtain a copy of the License at
9
- *
10
- * http://www.apache.org/licenses/LICENSE-2.0
11
- *
12
- * Unless required by applicable law or agreed to in writing,
13
- * software distributed under the License is distributed on an
14
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15
- * KIND, either express or implied. See the License for the
16
- * specific language governing permissions and limitations
17
- * under the License.
18
- */
19
-
20
- #ifndef REQ_QUANTILE_CALCULATOR_HPP_
21
- #define REQ_QUANTILE_CALCULATOR_HPP_
22
-
23
- #include <functional>
24
-
25
- namespace datasketches {
26
-
27
- template<
28
- typename T,
29
- typename Comparator,
30
- typename Allocator
31
- >
32
- class req_quantile_calculator {
33
- public:
34
- req_quantile_calculator(uint64_t n, const Allocator& allocator);
35
-
36
- void add(const T* begin, const T* end, uint8_t lg_weight);
37
-
38
- template<bool inclusive>
39
- void convert_to_cummulative();
40
-
41
- const T* get_quantile(double rank) const;
42
-
43
- private:
44
- using Entry = std::pair<const T*, uint64_t>;
45
- using AllocEntry = typename std::allocator_traits<Allocator>::template rebind_alloc<Entry>;
46
- using Container = std::vector<Entry, AllocEntry>;
47
-
48
- template<typename C>
49
- struct compare_pairs_by_first_ptr {
50
- bool operator()(const Entry& a, const Entry& b) {
51
- return C()(*a.first, *b.first);
52
- }
53
- };
54
-
55
- struct compare_pairs_by_second {
56
- bool operator()(const Entry& a, const Entry& b) {
57
- return a.second < b.second;
58
- }
59
- };
60
-
61
- uint64_t n_;
62
- Container entries_;
63
- };
64
-
65
- } /* namespace datasketches */
66
-
67
- #include "req_quantile_calculator_impl.hpp"
68
-
69
- #endif
@@ -1,60 +0,0 @@
1
- /*
2
- * Licensed to the Apache Software Foundation (ASF) under one
3
- * or more contributor license agreements. See the NOTICE file
4
- * distributed with this work for additional information
5
- * regarding copyright ownership. The ASF licenses this file
6
- * to you under the Apache License, Version 2.0 (the
7
- * "License"); you may not use this file except in compliance
8
- * with the License. You may obtain a copy of the License at
9
- *
10
- * http://www.apache.org/licenses/LICENSE-2.0
11
- *
12
- * Unless required by applicable law or agreed to in writing,
13
- * software distributed under the License is distributed on an
14
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15
- * KIND, either express or implied. See the License for the
16
- * specific language governing permissions and limitations
17
- * under the License.
18
- */
19
-
20
- #ifndef REQ_QUANTILE_CALCULATOR_IMPL_HPP_
21
- #define REQ_QUANTILE_CALCULATOR_IMPL_HPP_
22
-
23
- namespace datasketches {
24
-
25
- template<typename T, typename C, typename A>
26
- req_quantile_calculator<T, C, A>::req_quantile_calculator(uint64_t n, const A& allocator):
27
- n_(n),
28
- entries_(allocator)
29
- {}
30
-
31
- template<typename T, typename C, typename A>
32
- void req_quantile_calculator<T, C, A>::add(const T* begin, const T* end, uint8_t lg_weight) {
33
- if (entries_.capacity() < entries_.size() + std::distance(begin, end)) entries_.reserve(entries_.size() + std::distance(begin, end));
34
- const size_t size_before = entries_.size();
35
- for (auto it = begin; it != end; ++it) entries_.push_back(Entry(it, 1 << lg_weight));
36
- if (size_before > 0) std::inplace_merge(entries_.begin(), entries_.begin() + size_before, entries_.end(), compare_pairs_by_first_ptr<C>());
37
- }
38
-
39
- template<typename T, typename C, typename A>
40
- template<bool inclusive>
41
- void req_quantile_calculator<T, C, A>::convert_to_cummulative() {
42
- uint64_t subtotal = 0;
43
- for (auto& entry: entries_) {
44
- const uint64_t new_subtotal = subtotal + entry.second;
45
- entry.second = inclusive ? new_subtotal : subtotal;
46
- subtotal = new_subtotal;
47
- }
48
- }
49
-
50
- template<typename T, typename C, typename A>
51
- const T* req_quantile_calculator<T, C, A>::get_quantile(double rank) const {
52
- uint64_t weight = static_cast<uint64_t>(rank * n_);
53
- auto it = std::lower_bound(entries_.begin(), entries_.end(), Entry(nullptr, weight), compare_pairs_by_second());
54
- if (it == entries_.end()) return entries_[entries_.size() - 1].first;
55
- return it->first;
56
- }
57
-
58
- } /* namespace datasketches */
59
-
60
- #endif