datasketches 0.2.2 → 0.2.5

Sign up to get free protection for your applications and to get access to all the features.
Files changed (154) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +12 -0
  3. data/LICENSE +40 -3
  4. data/NOTICE +1 -1
  5. data/README.md +8 -8
  6. data/ext/datasketches/kll_wrapper.cpp +5 -1
  7. data/ext/datasketches/theta_wrapper.cpp +20 -4
  8. data/lib/datasketches/version.rb +1 -1
  9. data/vendor/datasketches-cpp/CMakeLists.txt +27 -5
  10. data/vendor/datasketches-cpp/LICENSE +40 -3
  11. data/vendor/datasketches-cpp/MANIFEST.in +3 -0
  12. data/vendor/datasketches-cpp/NOTICE +1 -1
  13. data/vendor/datasketches-cpp/README.md +76 -9
  14. data/vendor/datasketches-cpp/cmake/DataSketchesConfig.cmake.in +10 -0
  15. data/vendor/datasketches-cpp/common/CMakeLists.txt +18 -13
  16. data/vendor/datasketches-cpp/common/include/binomial_bounds.hpp +1 -0
  17. data/vendor/datasketches-cpp/common/include/common_defs.hpp +16 -0
  18. data/vendor/datasketches-cpp/{kll → common}/include/kolmogorov_smirnov.hpp +5 -3
  19. data/vendor/datasketches-cpp/{kll → common}/include/kolmogorov_smirnov_impl.hpp +13 -16
  20. data/vendor/datasketches-cpp/common/include/quantile_sketch_sorted_view.hpp +121 -0
  21. data/vendor/datasketches-cpp/common/include/quantile_sketch_sorted_view_impl.hpp +91 -0
  22. data/vendor/datasketches-cpp/common/test/test_type.hpp +2 -0
  23. data/vendor/datasketches-cpp/cpc/CMakeLists.txt +15 -35
  24. data/vendor/datasketches-cpp/cpc/include/cpc_common.hpp +10 -3
  25. data/vendor/datasketches-cpp/cpc/include/cpc_compressor_impl.hpp +1 -0
  26. data/vendor/datasketches-cpp/cpc/include/cpc_confidence.hpp +1 -0
  27. data/vendor/datasketches-cpp/cpc/include/cpc_sketch.hpp +1 -1
  28. data/vendor/datasketches-cpp/cpc/include/cpc_sketch_impl.hpp +5 -3
  29. data/vendor/datasketches-cpp/cpc/include/cpc_union.hpp +1 -1
  30. data/vendor/datasketches-cpp/cpc/include/cpc_union_impl.hpp +10 -6
  31. data/vendor/datasketches-cpp/cpc/test/cpc_sketch_allocation_test.cpp +17 -0
  32. data/vendor/datasketches-cpp/cpc/test/cpc_sketch_test.cpp +1 -0
  33. data/vendor/datasketches-cpp/cpc/test/cpc_union_test.cpp +2 -0
  34. data/vendor/datasketches-cpp/fi/CMakeLists.txt +5 -15
  35. data/vendor/datasketches-cpp/fi/include/frequent_items_sketch.hpp +37 -5
  36. data/vendor/datasketches-cpp/fi/include/frequent_items_sketch_impl.hpp +30 -12
  37. data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_custom_type_test.cpp +2 -1
  38. data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_test.cpp +1 -0
  39. data/vendor/datasketches-cpp/hll/CMakeLists.txt +33 -56
  40. data/vendor/datasketches-cpp/hll/include/AuxHashMap-internal.hpp +2 -0
  41. data/vendor/datasketches-cpp/hll/include/CompositeInterpolationXTable-internal.hpp +1 -0
  42. data/vendor/datasketches-cpp/hll/include/CouponHashSet-internal.hpp +2 -2
  43. data/vendor/datasketches-cpp/hll/include/CouponList-internal.hpp +1 -0
  44. data/vendor/datasketches-cpp/hll/include/HllSketch-internal.hpp +6 -4
  45. data/vendor/datasketches-cpp/hll/include/HllSketchImpl-internal.hpp +2 -0
  46. data/vendor/datasketches-cpp/hll/include/HllSketchImplFactory.hpp +2 -0
  47. data/vendor/datasketches-cpp/hll/test/AuxHashMapTest.cpp +1 -0
  48. data/vendor/datasketches-cpp/hll/test/CouponHashSetTest.cpp +1 -0
  49. data/vendor/datasketches-cpp/hll/test/CouponListTest.cpp +2 -0
  50. data/vendor/datasketches-cpp/hll/test/HllArrayTest.cpp +1 -0
  51. data/vendor/datasketches-cpp/hll/test/HllSketchTest.cpp +59 -0
  52. data/vendor/datasketches-cpp/hll/test/HllUnionTest.cpp +2 -0
  53. data/vendor/datasketches-cpp/hll/test/TablesTest.cpp +1 -0
  54. data/vendor/datasketches-cpp/kll/CMakeLists.txt +5 -19
  55. data/vendor/datasketches-cpp/kll/include/kll_helper.hpp +0 -4
  56. data/vendor/datasketches-cpp/kll/include/kll_helper_impl.hpp +3 -0
  57. data/vendor/datasketches-cpp/kll/include/kll_sketch.hpp +103 -44
  58. data/vendor/datasketches-cpp/kll/include/kll_sketch_impl.hpp +110 -130
  59. data/vendor/datasketches-cpp/kll/test/kll_sketch_test.cpp +156 -23
  60. data/vendor/datasketches-cpp/kll/test/kolmogorov_smirnov_test.cpp +1 -1
  61. data/vendor/datasketches-cpp/pyproject.toml +4 -2
  62. data/vendor/datasketches-cpp/python/CMakeLists.txt +17 -6
  63. data/vendor/datasketches-cpp/python/README.md +57 -50
  64. data/vendor/datasketches-cpp/python/pybind11Path.cmd +3 -0
  65. data/vendor/datasketches-cpp/python/src/cpc_wrapper.cpp +1 -1
  66. data/vendor/datasketches-cpp/python/src/datasketches.cpp +4 -0
  67. data/vendor/datasketches-cpp/python/src/fi_wrapper.cpp +6 -1
  68. data/vendor/datasketches-cpp/python/src/kll_wrapper.cpp +49 -14
  69. data/vendor/datasketches-cpp/python/src/ks_wrapper.cpp +68 -0
  70. data/vendor/datasketches-cpp/python/src/quantiles_wrapper.cpp +240 -0
  71. data/vendor/datasketches-cpp/python/src/req_wrapper.cpp +9 -2
  72. data/vendor/datasketches-cpp/python/src/theta_wrapper.cpp +2 -2
  73. data/vendor/datasketches-cpp/python/src/vector_of_kll.cpp +12 -5
  74. data/vendor/datasketches-cpp/python/tests/kll_test.py +12 -6
  75. data/vendor/datasketches-cpp/python/tests/quantiles_test.py +126 -0
  76. data/vendor/datasketches-cpp/python/tests/req_test.py +2 -2
  77. data/vendor/datasketches-cpp/python/tests/vector_of_kll_test.py +4 -4
  78. data/vendor/datasketches-cpp/quantiles/CMakeLists.txt +42 -0
  79. data/vendor/datasketches-cpp/quantiles/include/quantiles_sketch.hpp +641 -0
  80. data/vendor/datasketches-cpp/quantiles/include/quantiles_sketch_impl.hpp +1309 -0
  81. data/vendor/datasketches-cpp/quantiles/test/CMakeLists.txt +44 -0
  82. data/vendor/datasketches-cpp/quantiles/test/Qk128_n1000_v0.3.0.sk +0 -0
  83. data/vendor/datasketches-cpp/quantiles/test/Qk128_n1000_v0.6.0.sk +0 -0
  84. data/vendor/datasketches-cpp/quantiles/test/Qk128_n1000_v0.8.0.sk +0 -0
  85. data/vendor/datasketches-cpp/quantiles/test/Qk128_n1000_v0.8.3.sk +0 -0
  86. data/vendor/datasketches-cpp/quantiles/test/Qk128_n50_v0.3.0.sk +0 -0
  87. data/vendor/datasketches-cpp/quantiles/test/Qk128_n50_v0.6.0.sk +0 -0
  88. data/vendor/datasketches-cpp/quantiles/test/Qk128_n50_v0.8.0.sk +0 -0
  89. data/vendor/datasketches-cpp/quantiles/test/Qk128_n50_v0.8.3.sk +0 -0
  90. data/vendor/datasketches-cpp/quantiles/test/kolmogorov_smirnov_test.cpp +110 -0
  91. data/vendor/datasketches-cpp/quantiles/test/quantiles_compatibility_test.cpp +129 -0
  92. data/vendor/datasketches-cpp/quantiles/test/quantiles_sketch_test.cpp +912 -0
  93. data/vendor/datasketches-cpp/req/CMakeLists.txt +6 -21
  94. data/vendor/datasketches-cpp/req/include/req_common.hpp +0 -5
  95. data/vendor/datasketches-cpp/req/include/req_compactor_impl.hpp +3 -2
  96. data/vendor/datasketches-cpp/req/include/req_sketch.hpp +62 -23
  97. data/vendor/datasketches-cpp/req/include/req_sketch_impl.hpp +66 -61
  98. data/vendor/datasketches-cpp/req/test/req_sketch_test.cpp +5 -0
  99. data/vendor/datasketches-cpp/sampling/CMakeLists.txt +5 -9
  100. data/vendor/datasketches-cpp/sampling/include/var_opt_sketch.hpp +54 -12
  101. data/vendor/datasketches-cpp/sampling/include/var_opt_sketch_impl.hpp +45 -34
  102. data/vendor/datasketches-cpp/sampling/include/var_opt_union.hpp +41 -6
  103. data/vendor/datasketches-cpp/sampling/include/var_opt_union_impl.hpp +33 -15
  104. data/vendor/datasketches-cpp/sampling/test/var_opt_allocation_test.cpp +2 -2
  105. data/vendor/datasketches-cpp/sampling/test/var_opt_sketch_test.cpp +1 -0
  106. data/vendor/datasketches-cpp/sampling/test/var_opt_union_test.cpp +1 -0
  107. data/vendor/datasketches-cpp/setup.py +10 -7
  108. data/vendor/datasketches-cpp/theta/CMakeLists.txt +26 -45
  109. data/vendor/datasketches-cpp/theta/include/bounds_on_ratios_in_sampled_sets.hpp +1 -0
  110. data/vendor/datasketches-cpp/theta/include/compact_theta_sketch_parser_impl.hpp +92 -23
  111. data/vendor/datasketches-cpp/theta/include/theta_constants.hpp +9 -4
  112. data/vendor/datasketches-cpp/theta/include/theta_helpers.hpp +15 -0
  113. data/vendor/datasketches-cpp/theta/include/theta_intersection_base_impl.hpp +7 -6
  114. data/vendor/datasketches-cpp/theta/include/theta_set_difference_base_impl.hpp +3 -2
  115. data/vendor/datasketches-cpp/theta/include/theta_sketch.hpp +32 -15
  116. data/vendor/datasketches-cpp/theta/include/theta_sketch_impl.hpp +150 -93
  117. data/vendor/datasketches-cpp/theta/include/theta_union.hpp +6 -1
  118. data/vendor/datasketches-cpp/theta/include/theta_union_base.hpp +3 -1
  119. data/vendor/datasketches-cpp/theta/include/theta_union_base_impl.hpp +9 -2
  120. data/vendor/datasketches-cpp/theta/include/theta_union_impl.hpp +8 -5
  121. data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base.hpp +9 -5
  122. data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base_impl.hpp +39 -10
  123. data/vendor/datasketches-cpp/theta/test/CMakeLists.txt +1 -0
  124. data/vendor/datasketches-cpp/theta/test/theta_a_not_b_test.cpp +2 -0
  125. data/vendor/datasketches-cpp/theta/test/theta_compact_empty_from_java_v1.sk +0 -0
  126. data/vendor/datasketches-cpp/theta/test/theta_compact_empty_from_java_v2.sk +0 -0
  127. data/vendor/datasketches-cpp/theta/test/theta_compact_estimation_from_java_v1.sk +0 -0
  128. data/vendor/datasketches-cpp/theta/test/theta_compact_estimation_from_java_v2.sk +0 -0
  129. data/vendor/datasketches-cpp/theta/test/theta_compact_exact_from_java.sk +0 -0
  130. data/vendor/datasketches-cpp/theta/test/theta_intersection_test.cpp +2 -0
  131. data/vendor/datasketches-cpp/theta/test/theta_setop_test.cpp +446 -0
  132. data/vendor/datasketches-cpp/theta/test/theta_sketch_test.cpp +429 -1
  133. data/vendor/datasketches-cpp/theta/test/theta_union_test.cpp +25 -11
  134. data/vendor/datasketches-cpp/tuple/CMakeLists.txt +18 -33
  135. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_sketch.hpp +1 -1
  136. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_sketch_impl.hpp +3 -3
  137. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_union.hpp +1 -1
  138. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_union_impl.hpp +3 -3
  139. data/vendor/datasketches-cpp/tuple/include/tuple_sketch.hpp +29 -9
  140. data/vendor/datasketches-cpp/tuple/include/tuple_sketch_impl.hpp +34 -14
  141. data/vendor/datasketches-cpp/tuple/include/tuple_union.hpp +6 -1
  142. data/vendor/datasketches-cpp/tuple/include/tuple_union_impl.hpp +8 -3
  143. data/vendor/datasketches-cpp/tuple/test/array_of_doubles_sketch_test.cpp +16 -0
  144. data/vendor/datasketches-cpp/tuple/test/tuple_a_not_b_test.cpp +1 -0
  145. data/vendor/datasketches-cpp/tuple/test/tuple_intersection_test.cpp +1 -0
  146. data/vendor/datasketches-cpp/tuple/test/tuple_sketch_test.cpp +46 -8
  147. data/vendor/datasketches-cpp/tuple/test/tuple_union_test.cpp +8 -0
  148. metadata +34 -12
  149. data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator.hpp +0 -75
  150. data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator_impl.hpp +0 -184
  151. data/vendor/datasketches-cpp/req/include/req_quantile_calculator.hpp +0 -69
  152. data/vendor/datasketches-cpp/req/include/req_quantile_calculator_impl.hpp +0 -60
  153. data/vendor/datasketches-cpp/theta/test/theta_update_empty_from_java.sk +0 -0
  154. data/vendor/datasketches-cpp/theta/test/theta_update_estimation_from_java.sk +0 -0
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 5c578044053c564421893cc4433f7fe557f23ba9d8a1995fc2a2c5f07742721a
4
- data.tar.gz: f4122bd75e19fede015b01a5e5ad8e6130f75babe9c9160cc56f378480a16cee
3
+ metadata.gz: 9eaa8a17efdbc591b3e56f94650e887babd30dc79d95db3a7986df0261184191
4
+ data.tar.gz: 5544326a0edf165d87373a680d8bf5b80acba2894b9048f92cbdb261fcd66d57
5
5
  SHA512:
6
- metadata.gz: 2d7c4d7306f28356557a816a78033b909561ccd8f843281a2b756e88cbdcb9936da7995ff80871a19e229675ead812aca00d6c639d63a6532998c3c1b35aa953
7
- data.tar.gz: fdf0fe1d14e04bfddef9df1ae7958f6571a7f689865aa02e81713d4b250afeeeb8c90a168ce855728d9c831aff6d3ea71df91c71b7269a760c19488c42c92658
6
+ metadata.gz: 5a28c093ecda083762367149800770f59fee8e630c0d983d3f29ed32d027fae2e2515dff243ee11bbd41f4875c7cea622f7bc5cc5d7e73176e785503ed19fc0b
7
+ data.tar.gz: 6b210f2fdca1ae3cbd4e4cbf88e284855014b5a1e1c883085dc96a057da29e370005163ce628e54351c9127b00fae4b7b33a4ca63e6f4b90e0665e93b7742a66
data/CHANGELOG.md CHANGED
@@ -1,3 +1,15 @@
1
+ ## 0.2.5 (2022-05-21)
2
+
3
+ - Updated DataSketches to 3.4.0
4
+
5
+ ## 0.2.4 (2021-12-28)
6
+
7
+ - Updated DataSketches to 3.3.0
8
+
9
+ ## 0.2.3 (2021-09-29)
10
+
11
+ - Updated DataSketches to 3.2.0
12
+
1
13
  ## 0.2.2 (2021-07-17)
2
14
 
3
15
  - Updated DataSketches to 3.1.0
data/LICENSE CHANGED
@@ -284,11 +284,48 @@ APPENDIX B: Additional licenses relevant to this product.
284
284
  ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
285
285
  DEALINGS IN THE SOFTWARE.
286
286
  -------------------------------------------------------------
287
- Code Locations
287
+ Code Locations:
288
288
  * https://github.com/apache/datasketches-cpp/blob/master/common/test/catch.hpp
289
289
  that is adapted from the above.
290
290
 
291
291
 
292
+ =============================================================
293
+ BSD License
294
+ =============================================================
295
+ Original source code:
296
+ https://github.com/pybind/pybind11/blob/master/LICENSE
297
+
298
+ Copyright (c) 2016 Wenzel Jakob <wenzel.jakob@epfl.ch>, All rights reserved.
299
+
300
+ Redistribution and use in source and binary forms, with or without
301
+ modification, are permitted provided that the following conditions are met:
302
+
303
+ 1. Redistributions of source code must retain the above copyright notice, this
304
+ list of conditions and the following disclaimer.
305
+
306
+ 2. Redistributions in binary form must reproduce the above copyright notice,
307
+ this list of conditions and the following disclaimer in the documentation
308
+ and/or other materials provided with the distribution.
309
+
310
+ 3. Neither the name of the copyright holder nor the names of its contributors
311
+ may be used to endorse or promote products derived from this software
312
+ without specific prior written permission.
313
+
314
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
315
+ ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
316
+ WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
317
+ DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
318
+ FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
319
+ DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
320
+ SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
321
+ CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
322
+ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
323
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
324
+ -------------------------------------------------------------
325
+ Code Locations:
326
+ Found only in the convenience binaries distributed from PyPI, which rely
327
+ on pybind11 code during compilation.
328
+
292
329
 
293
330
  =============================================================
294
331
  Public Domain
@@ -297,7 +334,7 @@ APPENDIX B: Additional licenses relevant to this product.
297
334
  https://github.com/aappleby/smhasher/blob/master/src/MurmurHash3.cpp
298
335
  Placed in the Public Domain by Austin Appleby
299
336
 
300
- Code Locations
337
+ Code Locations:
301
338
  common/include/MurmurHash3.h
302
339
  that is adapted from the above.
303
340
  -------------------------------------------------------------
@@ -305,6 +342,6 @@ APPENDIX B: Additional licenses relevant to this product.
305
342
  * https://graphics.stanford.edu/~seander/bithacks.html
306
343
  * Placed in the Public Domain by Sean Eron Anderson
307
344
 
308
- Code Locations
345
+ Code Locations:
309
346
  * common/include/ceiling_power_of_2.hpp
310
347
  that is adapted from the above.
data/NOTICE CHANGED
@@ -1,5 +1,5 @@
1
1
  Apache DataSketches-cpp
2
- Copyright 2020 The Apache Software Foundation
2
+ Copyright 2020-2021 The Apache Software Foundation
3
3
 
4
4
  Copyright 2015-2018 Yahoo
5
5
  Copyright 2019 Verizon Media
data/README.md CHANGED
@@ -1,15 +1,15 @@
1
- # DataSketches
1
+ # DataSketches Ruby
2
2
 
3
3
  [DataSketches](https://datasketches.apache.org/) - sketch data structures - for Ruby
4
4
 
5
- [![Build Status](https://github.com/ankane/datasketches/workflows/build/badge.svg?branch=master)](https://github.com/ankane/datasketches/actions)
5
+ [![Build Status](https://github.com/ankane/datasketches-ruby/workflows/build/badge.svg?branch=master)](https://github.com/ankane/datasketches-ruby/actions)
6
6
 
7
7
  ## Installation
8
8
 
9
9
  Add this line to your application’s Gemfile:
10
10
 
11
11
  ```ruby
12
- gem 'datasketches'
12
+ gem "datasketches"
13
13
  ```
14
14
 
15
15
  ## Sketch Families
@@ -292,22 +292,22 @@ This library is modeled after the DataSketches [Python API](https://github.com/a
292
292
 
293
293
  ## History
294
294
 
295
- View the [changelog](https://github.com/ankane/datasketches/blob/master/CHANGELOG.md)
295
+ View the [changelog](https://github.com/ankane/datasketches-ruby/blob/master/CHANGELOG.md)
296
296
 
297
297
  ## Contributing
298
298
 
299
299
  Everyone is encouraged to help improve this project. Here are a few ways you can help:
300
300
 
301
- - [Report bugs](https://github.com/ankane/datasketches/issues)
302
- - Fix bugs and [submit pull requests](https://github.com/ankane/datasketches/pulls)
301
+ - [Report bugs](https://github.com/ankane/datasketches-ruby/issues)
302
+ - Fix bugs and [submit pull requests](https://github.com/ankane/datasketches-ruby/pulls)
303
303
  - Write, clarify, or fix documentation
304
304
  - Suggest or add new features
305
305
 
306
306
  To get started with development:
307
307
 
308
308
  ```sh
309
- git clone --recursive https://github.com/ankane/datasketches.git
310
- cd datasketches
309
+ git clone --recursive https://github.com/ankane/datasketches-ruby.git
310
+ cd datasketches-ruby
311
311
  bundle install
312
312
  bundle exec rake compile
313
313
  bundle exec rake test
@@ -33,7 +33,11 @@ void bind_kll_sketch(Rice::Module& m, const char* name) {
33
33
  .define_method("estimation_mode?", &kll_sketch<T>::is_estimation_mode)
34
34
  .define_method("min_value", &kll_sketch<T>::get_min_value)
35
35
  .define_method("max_value", &kll_sketch<T>::get_max_value)
36
- .define_method("quantile", &kll_sketch<T>::get_quantile)
36
+ .define_method(
37
+ "quantile",
38
+ [](kll_sketch<T>& self, double fraction) {
39
+ return self.get_quantile(fraction);
40
+ })
37
41
  .define_method(
38
42
  "quantiles",
39
43
  [](kll_sketch<T>& self, Rice::Object obj) {
@@ -20,10 +20,26 @@ using Rice::Arg;
20
20
 
21
21
  void init_theta(Rice::Module& m) {
22
22
  Rice::define_class_under<theta_sketch>(m, "ThetaSketch")
23
- .define_method("empty?", &theta_sketch::is_empty)
24
- .define_method("estimate", &theta_sketch::get_estimate)
25
- .define_method("lower_bound", &theta_sketch::get_lower_bound)
26
- .define_method("upper_bound", &theta_sketch::get_upper_bound);
23
+ .define_method(
24
+ "empty?",
25
+ [](theta_sketch& self) {
26
+ return self.is_empty();
27
+ })
28
+ .define_method(
29
+ "estimate",
30
+ [](theta_sketch& self) {
31
+ return self.get_estimate();
32
+ })
33
+ .define_method(
34
+ "lower_bound",
35
+ [](theta_sketch& self, uint8_t num_std_devs) {
36
+ return self.get_lower_bound(num_std_devs);
37
+ })
38
+ .define_method(
39
+ "upper_bound",
40
+ [](theta_sketch& self, uint8_t num_std_devs) {
41
+ return self.get_upper_bound(num_std_devs);
42
+ });
27
43
 
28
44
  Rice::define_class_under<compact_theta_sketch, theta_sketch>(m, "CompactThetaSketch")
29
45
  .define_singleton_function(
@@ -1,3 +1,3 @@
1
1
  module DataSketches
2
- VERSION = "0.2.2"
2
+ VERSION = "0.2.5"
3
3
  end
@@ -15,9 +15,9 @@
15
15
  # specific language governing permissions and limitations
16
16
  # under the License.
17
17
 
18
- cmake_minimum_required(VERSION 3.12.0)
18
+ cmake_minimum_required(VERSION 3.16.0)
19
19
  project(DataSketches
20
- VERSION 0.12.0
20
+ VERSION 3.4.0
21
21
  LANGUAGES CXX)
22
22
 
23
23
  include(GNUInstallDirs)
@@ -35,6 +35,8 @@ set(CMAKE_WINDOWS_EXPORT_ALL_SYMBOLS ON)
35
35
  #set(CMAKE_VERBOSE_MAKEFILE ON)
36
36
  set(CMAKE_MACOSX_RPATH ON)
37
37
 
38
+ set(CMAKE_CXX_STANDARD 11)
39
+
38
40
  # enable compiler warnings globally
39
41
  # derived from https://foonathan.net/blog/2018/10/17/cmake-warnings.html
40
42
  # and https://arne-mertz.de/2018/07/cmake-properties-options/
@@ -104,12 +106,13 @@ add_subdirectory(theta)
104
106
  add_subdirectory(sampling)
105
107
  add_subdirectory(tuple)
106
108
  add_subdirectory(req)
109
+ add_subdirectory(quantiles)
107
110
 
108
111
  if (WITH_PYTHON)
109
112
  add_subdirectory(python)
110
113
  endif()
111
114
 
112
- target_link_libraries(datasketches INTERFACE hll cpc kll fi theta sampling)
115
+ target_link_libraries(datasketches INTERFACE hll cpc kll fi theta sampling req quantiles)
113
116
 
114
117
  if (COVERAGE)
115
118
  find_program(LCOV_PATH NAMES "lcov")
@@ -124,11 +127,30 @@ endif()
124
127
 
125
128
  # # Installation
126
129
  install(TARGETS datasketches
127
- EXPORT ${PROJCT_NAME}
130
+ EXPORT ${PROJECT_NAME}
128
131
  PUBLIC_HEADER DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/DataSketches
129
132
  INCLUDES DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/DataSketches
130
133
  )
131
134
 
135
+ # Packaging
136
+ include(CMakePackageConfigHelpers)
137
+ write_basic_package_version_file(
138
+ "${CMAKE_CURRENT_BINARY_DIR}/DataSketchesConfigVersion.cmake"
139
+ VERSION ${PROJECT_VERSION}
140
+ COMPATIBILITY SameMajorVersion
141
+ )
142
+ configure_package_config_file(
143
+ cmake/DataSketchesConfig.cmake.in
144
+ "${CMAKE_CURRENT_BINARY_DIR}/DataSketchesConfig.cmake"
145
+ INSTALL_DESTINATION lib/DataSketches/cmake
146
+ PATH_VARS CMAKE_INSTALL_INCLUDEDIR
147
+ )
148
+ install(EXPORT ${PROJECT_NAME} DESTINATION lib/DataSketches/cmake)
149
+ install(FILES "${CMAKE_CURRENT_BINARY_DIR}/DataSketchesConfigVersion.cmake"
150
+ "${CMAKE_CURRENT_BINARY_DIR}/DataSketchesConfig.cmake"
151
+ DESTINATION lib/DataSketches/cmake)
152
+
153
+
132
154
  #set(CPACK_PROJECT_NAME ${PROJECT_NAME})
133
155
  #set(CPACK_PROJECT_VERSION ${PROJECT_VERSION})
134
- #include(CPack)
156
+ include(CPack)
@@ -284,11 +284,48 @@ APPENDIX B: Additional licenses relevant to this product.
284
284
  ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
285
285
  DEALINGS IN THE SOFTWARE.
286
286
  -------------------------------------------------------------
287
- Code Locations
287
+ Code Locations:
288
288
  * https://github.com/apache/datasketches-cpp/blob/master/common/test/catch.hpp
289
289
  that is adapted from the above.
290
290
 
291
291
 
292
+ =============================================================
293
+ BSD License
294
+ =============================================================
295
+ Original source code:
296
+ https://github.com/pybind/pybind11/blob/master/LICENSE
297
+
298
+ Copyright (c) 2016 Wenzel Jakob <wenzel.jakob@epfl.ch>, All rights reserved.
299
+
300
+ Redistribution and use in source and binary forms, with or without
301
+ modification, are permitted provided that the following conditions are met:
302
+
303
+ 1. Redistributions of source code must retain the above copyright notice, this
304
+ list of conditions and the following disclaimer.
305
+
306
+ 2. Redistributions in binary form must reproduce the above copyright notice,
307
+ this list of conditions and the following disclaimer in the documentation
308
+ and/or other materials provided with the distribution.
309
+
310
+ 3. Neither the name of the copyright holder nor the names of its contributors
311
+ may be used to endorse or promote products derived from this software
312
+ without specific prior written permission.
313
+
314
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
315
+ ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
316
+ WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
317
+ DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
318
+ FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
319
+ DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
320
+ SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
321
+ CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
322
+ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
323
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
324
+ -------------------------------------------------------------
325
+ Code Locations:
326
+ Found only in the convenience binaries distributed from PyPI, which rely
327
+ on pybind11 code during compilation.
328
+
292
329
 
293
330
  =============================================================
294
331
  Public Domain
@@ -297,7 +334,7 @@ APPENDIX B: Additional licenses relevant to this product.
297
334
  https://github.com/aappleby/smhasher/blob/master/src/MurmurHash3.cpp
298
335
  Placed in the Public Domain by Austin Appleby
299
336
 
300
- Code Locations
337
+ Code Locations:
301
338
  common/include/MurmurHash3.h
302
339
  that is adapted from the above.
303
340
  -------------------------------------------------------------
@@ -305,7 +342,7 @@ APPENDIX B: Additional licenses relevant to this product.
305
342
  * https://graphics.stanford.edu/~seander/bithacks.html
306
343
  * Placed in the Public Domain by Sean Eron Anderson
307
344
 
308
- Code Locations
345
+ Code Locations:
309
346
  * common/include/ceiling_power_of_2.hpp
310
347
  that is adapted from the above.
311
348
 
@@ -9,11 +9,14 @@ global-exclude .git*
9
9
 
10
10
  recursive-include python/pybind11 *
11
11
 
12
+ graft cmake
12
13
  graft common
13
14
  graft cpc
14
15
  graft fi
15
16
  graft hll
16
17
  graft kll
18
+ graft req
17
19
  graft theta
20
+ graft tuple
18
21
  graft sampling
19
22
  graft python
@@ -1,5 +1,5 @@
1
1
  Apache DataSketches-cpp
2
- Copyright 2020 The Apache Software Foundation
2
+ Copyright 2020-2021 The Apache Software Foundation
3
3
 
4
4
  Copyright 2015-2018 Yahoo
5
5
  Copyright 2019 Verizon Media
@@ -25,18 +25,85 @@ Installing the latest cmake on OSX: brew install cmake
25
25
  Building and running unit tests using cmake for OSX and Linux:
26
26
 
27
27
  ```
28
- $ cd build
29
- $ cmake ..
30
- $ make
31
- $ make test
28
+ $ cmake -S . -B build/Release -DCMAKE_BUILD_TYPE=Release
29
+ $ cmake --build build/Release -t all test
32
30
  ```
33
31
 
34
32
  Building and running unit tests using cmake for Windows from the command line:
35
33
 
36
34
  ```
37
- $ cd build
38
- $ cmake ..
39
- $ cd ..
40
- $ cmake --build build --config Release
41
- $ cmake --build build --config Release --target RUN_TESTS
35
+ $ cd build
36
+ $ cmake ..
37
+ $ cd ..
38
+ $ cmake --build build --config Release
39
+ $ cmake --build build --config Release --target RUN_TESTS
42
40
  ```
41
+
42
+ To install a local distribution (OSX and Linux), use the following command. The
43
+ CMAKE_INSTALL_PREFIX variable controls the destination. If not specified, it
44
+ defaults to installing in /usr (/usr/include, /usr/lib, etc). In the command below,
45
+ the installation will be in /tmp/install/DataSketches (/tmp/install/DataSketches/include,
46
+ /tmp/install/DataSketches/lib, etc)
47
+
48
+ ```
49
+ $ cmake -S . -B build/Release -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=/tmp/install/DataSketches
50
+ $ cmake --build build/Release -t install
51
+ ```
52
+
53
+ To generate an installable package using cmake's built in cpack packaging tool,
54
+ use the following command. The type of packaging is controlled by the CPACK_GENERATOR
55
+ variable (semi-colon separated list). Cmake usually supports packaging types such as RPM,
56
+ DEB, STGZ, TGZ, TZ, ZIP, etc.
57
+
58
+ ```
59
+ $ cmake3 -S . -B build/Release -DCMAKE_BUILD_TYPE=Release -DCPACK_GENERATOR="RPM;STGZ;TGZ"
60
+ $ cmake3 --build build/Release -t package
61
+ ```
62
+
63
+ The DataSketches project can be included in other projects' CMakeLists.txt files in one of two ways.
64
+ If DataSketches has been installed on the host (using an RPM, DEB, "make install" into /usr/local, or some
65
+ way, then CMake's `find_package` command can be used like this:
66
+
67
+ ```
68
+ find_package(DataSketches 3.2 REQUIRED)
69
+ target_link_library(my_dependent_target PUBLIC ${DATASKETCHES_LIB})
70
+ ```
71
+
72
+ When used with find_package, DataSketches exports several variables, including
73
+
74
+ - `DATASKETCHES_VERSION`: The version number of the datasketches package that was imported.
75
+ - `DATASKETCHES_INCLUDE_DIR`: The directory that should be added to access DataSketches include files.
76
+ Because cmake automatically includes the interface directories for included target libraries when
77
+ using `target_link_library`, under normal circumstances there will be no need to include this directly.
78
+ - `DATASKETCHES_LIB`: The name of the DataSketches target to include as a dependency. Projects pulling
79
+ in DataSketches should reference this with `target_link_library` in order to set up all the correct dependencies
80
+ and include paths.
81
+
82
+ If you don't have DataSketches installed locally, dependent projects can pull it directly
83
+ from GitHub using CMake's `ExternalProject` module. The code would look something like this:
84
+
85
+ ```
86
+ cmake_policy(SET CMP0097 NEW)
87
+ include(ExternalProject)
88
+ ExternalProject_Add(datasketches
89
+ GIT_REPOSITORY https://github.com/apache/datasketches-cpp.git
90
+ GIT_TAG 3.2.0
91
+ GIT_SHALLOW true
92
+ GIT_SUBMODULES ""
93
+ INSTALL_DIR /tmp/datasketches-prefix
94
+ CMAKE_ARGS -DBUILD_TESTS=OFF -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} -DCMAKE_INSTALL_PREFIX=/tmp/datasketches-prefix
95
+
96
+ # Override the install command to add DESTDIR
97
+ # This is necessary to work around an oddity in the RPM (but not other) package
98
+ # generation, as CMake otherwise picks up the Datasketch files when building
99
+ # an RPM for a dependent package. (RPM scans the directory for files in addition to installing
100
+ # those files referenced in an "install" rule in the cmake file)
101
+ INSTALL_COMMAND env DESTDIR= ${CMAKE_COMMAND} --build . --target install
102
+ )
103
+ ExternalProject_Get_property(datasketches INSTALL_DIR)
104
+ set(datasketches_INSTALL_DIR ${INSTALL_DIR})
105
+ message("Source dir of datasketches = ${datasketches_INSTALL_DIR}")
106
+ target_include_directories(my_dependent_target
107
+ PRIVATE ${datasketches_INSTALL_DIR}/include/DataSketches)
108
+ add_dependencies(my_dependent_target datasketches)
109
+ ```
@@ -0,0 +1,10 @@
1
+ set(DATASKETCHES_VERSION "@PROJECT_VERSION@")
2
+
3
+ @PACKAGE_INIT@
4
+
5
+ include("${CMAKE_CURRENT_LIST_DIR}/DataSketches.cmake")
6
+
7
+ set_and_check(DATASKETCHES_INCLUDE_DIR "@PACKAGE_CMAKE_INSTALL_INCLUDEDIR@/DataSketches")
8
+ set(DATASKETCHES_LIB "datasketches")
9
+
10
+ check_required_components("@PROJECT_NAME@")
@@ -29,17 +29,22 @@ target_include_directories(common
29
29
 
30
30
  target_compile_features(common INTERFACE cxx_std_11)
31
31
 
32
- target_sources(common
33
- INTERFACE
34
- ${CMAKE_CURRENT_SOURCE_DIR}/include/common_defs.hpp
35
- ${CMAKE_CURRENT_SOURCE_DIR}/include/memory_operations.hpp
36
- ${CMAKE_CURRENT_SOURCE_DIR}/include/MurmurHash3.h
37
- ${CMAKE_CURRENT_SOURCE_DIR}/include/serde.hpp
38
- ${CMAKE_CURRENT_SOURCE_DIR}/include/count_zeros.hpp
39
- ${CMAKE_CURRENT_SOURCE_DIR}/include/inv_pow2_table.hpp
40
- ${CMAKE_CURRENT_SOURCE_DIR}/include/binomial_bounds.hpp
41
- ${CMAKE_CURRENT_SOURCE_DIR}/include/conditional_back_inserter.hpp
42
- ${CMAKE_CURRENT_SOURCE_DIR}/include/conditional_forward.hpp
43
- ${CMAKE_CURRENT_SOURCE_DIR}/include/ceiling_power_of_2.hpp
44
- )
32
+ install(TARGETS common EXPORT ${PROJECT_NAME})
45
33
 
34
+ install(FILES
35
+ include/common_defs.hpp
36
+ include/memory_operations.hpp
37
+ include/MurmurHash3.h
38
+ include/serde.hpp
39
+ include/count_zeros.hpp
40
+ include/inv_pow2_table.hpp
41
+ include/binomial_bounds.hpp
42
+ include/conditional_back_inserter.hpp
43
+ include/conditional_forward.hpp
44
+ include/ceiling_power_of_2.hpp
45
+ include/bounds_binomial_proportions.hpp
46
+ include/kolmogorov_smirnov.hpp
47
+ include/kolmogorov_smirnov_impl.hpp
48
+ include/quantile_sketch_sorted_view.hpp
49
+ include/quantile_sketch_sorted_view_impl.hpp
50
+ DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/DataSketches")
@@ -22,6 +22,7 @@
22
22
 
23
23
  #include <algorithm>
24
24
  #include <cmath>
25
+ #include <stdexcept>
25
26
 
26
27
  /*
27
28
  * This class enables the estimation of error bounds given a sample set size, the sampling
@@ -24,14 +24,30 @@
24
24
  #include <string>
25
25
  #include <memory>
26
26
  #include <iostream>
27
+ #include <random>
28
+ #include <chrono>
27
29
 
28
30
  namespace datasketches {
29
31
 
30
32
  static const uint64_t DEFAULT_SEED = 9001;
31
33
 
34
+ enum resize_factor { X1 = 0, X2, X4, X8 };
35
+
32
36
  template<typename A> using AllocChar = typename std::allocator_traits<A>::template rebind_alloc<char>;
33
37
  template<typename A> using string = std::basic_string<char, std::char_traits<char>, AllocChar<A>>;
34
38
 
39
+ // random bit
40
+ static std::independent_bits_engine<std::mt19937, 1, uint32_t>
41
+ random_bit(static_cast<uint32_t>(std::chrono::system_clock::now().time_since_epoch().count()));
42
+
43
+ // common random declarations
44
+ namespace random_utils {
45
+ static std::random_device rd; // possibly unsafe in MinGW with GCC < 9.2
46
+ static std::mt19937_64 rand(rd());
47
+ static std::uniform_real_distribution<> next_double(0.0, 1.0);
48
+ }
49
+
50
+
35
51
  // utility function to hide unused compiler warning
36
52
  // usually has no additional cost
37
53
  template<typename T> void unused(T&&...) {}
@@ -25,7 +25,8 @@ namespace datasketches {
25
25
  class kolmogorov_smirnov {
26
26
  public:
27
27
  /**
28
- * Computes the raw delta area between two KLL quantile sketches for the Kolmogorov-Smirnov Test.
28
+ * Computes the raw delta area between two quantile sketches for the Kolmogorov-Smirnov Test.
29
+ * Will work for a type-matched pair of KLL or Quantiles sketches of the same parameterized type T.
29
30
  * @param sketch1 KLL sketch 1
30
31
  * @param sketch2 KLL sketch 2
31
32
  * @return the raw delta between two KLL quantile sketches
@@ -37,6 +38,7 @@ public:
37
38
  * Computes the adjusted delta area threshold for the Kolmogorov-Smirnov Test.
38
39
  * Adjusts the computed threshold by the error epsilons of the two given sketches.
39
40
  * See <a href="https://en.wikipedia.org/wiki/Kolmogorov-Smirnov_test">Kolmogorov–Smirnov Test</a>
41
+ * Will work for a type-matched pair of KLL or Quantiles sketches of the same parameterized type T.
40
42
  * @param sketch1 KLL sketch 1
41
43
  * @param sketch2 KLL sketch 2
42
44
  * @param p Target p-value. Typically .001 to .1, e.g., .05.
@@ -46,7 +48,8 @@ public:
46
48
  static double threshold(const Sketch& sketch1, const Sketch& sketch2, double p);
47
49
 
48
50
  /**
49
- * Performs the Kolmogorov-Smirnov Test between two KLL quantiles sketches.
51
+ * Performs the Kolmogorov-Smirnov Test between two quantile sketches.
52
+ * Will work for a type-matched pair of KLL or Quantiles sketches of the same parameterized type T.
50
53
  * Note: if the given sketches have insufficient data or if the sketch sizes are too small,
51
54
  * this will return false.
52
55
  * @param sketch1 KLL sketch 1
@@ -57,7 +60,6 @@ public:
57
60
  */
58
61
  template<typename Sketch>
59
62
  static bool test(const Sketch& sketch1, const Sketch& sketch2, double p);
60
-
61
63
  };
62
64
 
63
65
  } /* namespace datasketches */
@@ -20,39 +20,36 @@
20
20
  #ifndef KOLMOGOROV_SMIRNOV_IMPL_HPP_
21
21
  #define KOLMOGOROV_SMIRNOV_IMPL_HPP_
22
22
 
23
- namespace datasketches {
23
+ #include <cmath>
24
+ #include <algorithm>
24
25
 
25
- // type resolver
26
- template<typename T, typename C, typename S, typename A>
27
- kll_quantile_calculator<T, C, A> make_quantile_calculator(const kll_sketch<T, C, S, A>& sketch) {
28
- return kll_quantile_calculator<T, C, A>(sketch);
29
- }
26
+ namespace datasketches {
30
27
 
31
28
  template<typename Sketch>
32
29
  double kolmogorov_smirnov::delta(const Sketch& sketch1, const Sketch& sketch2) {
33
- using Comparator = typename Sketch::comparator;
34
- auto calc1 = make_quantile_calculator(sketch1);
35
- auto calc2 = make_quantile_calculator(sketch2);
36
- auto it1 = calc1.begin();
37
- auto it2 = calc2.begin();
30
+ auto comparator = sketch1.get_comparator(); // assuming the same comparator in sketch2
31
+ auto view1 = sketch1.get_sorted_view(true);
32
+ auto view2 = sketch2.get_sorted_view(true);
33
+ auto it1 = view1.begin();
34
+ auto it2 = view2.begin();
38
35
  const auto n1 = sketch1.get_n();
39
36
  const auto n2 = sketch2.get_n();
40
37
  double delta = 0;
41
- while (it1 != calc1.end() && it2 != calc2.end()) {
38
+ while (it1 != view1.end() && it2 != view2.end()) {
42
39
  const double norm_cum_wt1 = static_cast<double>((*it1).second) / n1;
43
40
  const double norm_cum_wt2 = static_cast<double>((*it2).second) / n2;
44
41
  delta = std::max(delta, std::abs(norm_cum_wt1 - norm_cum_wt2));
45
- if (Comparator()((*it1).first, (*it2).first)) {
42
+ if (comparator((*it1).first, (*it2).first)) {
46
43
  ++it1;
47
- } else if (Comparator()((*it2).first, (*it1).first)) {
44
+ } else if (comparator((*it2).first, (*it1).first)) {
48
45
  ++it2;
49
46
  } else {
50
47
  ++it1;
51
48
  ++it2;
52
49
  }
53
50
  }
54
- const double norm_cum_wt1 = it1 == calc1.end() ? 1 : static_cast<double>((*it1).second) / n1;
55
- const double norm_cum_wt2 = it2 == calc2.end() ? 1 : static_cast<double>((*it2).second) / n2;
51
+ const double norm_cum_wt1 = it1 == view1.end() ? 1 : static_cast<double>((*it1).second) / n1;
52
+ const double norm_cum_wt2 = it2 == view2.end() ? 1 : static_cast<double>((*it2).second) / n2;
56
53
  delta = std::max(delta, std::abs(norm_cum_wt1 - norm_cum_wt2));
57
54
  return delta;
58
55
  }