datasketches 0.1.2 → 0.2.3

Sign up to get free protection for your applications and to get access to all the features.
Files changed (205) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +17 -0
  3. data/LICENSE +40 -3
  4. data/NOTICE +1 -1
  5. data/ext/datasketches/cpc_wrapper.cpp +12 -13
  6. data/ext/datasketches/ext.cpp +1 -1
  7. data/ext/datasketches/ext.h +4 -0
  8. data/ext/datasketches/extconf.rb +1 -1
  9. data/ext/datasketches/fi_wrapper.cpp +6 -8
  10. data/ext/datasketches/hll_wrapper.cpp +13 -14
  11. data/ext/datasketches/kll_wrapper.cpp +28 -76
  12. data/ext/datasketches/theta_wrapper.cpp +27 -41
  13. data/ext/datasketches/vo_wrapper.cpp +4 -6
  14. data/lib/datasketches/version.rb +1 -1
  15. data/vendor/datasketches-cpp/CMakeLists.txt +10 -0
  16. data/vendor/datasketches-cpp/LICENSE +40 -3
  17. data/vendor/datasketches-cpp/NOTICE +1 -1
  18. data/vendor/datasketches-cpp/README.md +4 -4
  19. data/vendor/datasketches-cpp/common/include/MurmurHash3.h +18 -7
  20. data/vendor/datasketches-cpp/common/include/binomial_bounds.hpp +8 -8
  21. data/vendor/datasketches-cpp/common/include/bounds_binomial_proportions.hpp +12 -15
  22. data/vendor/datasketches-cpp/common/include/common_defs.hpp +26 -0
  23. data/vendor/datasketches-cpp/common/include/conditional_forward.hpp +20 -8
  24. data/vendor/datasketches-cpp/common/include/count_zeros.hpp +2 -2
  25. data/vendor/datasketches-cpp/common/include/memory_operations.hpp +12 -0
  26. data/vendor/datasketches-cpp/common/include/serde.hpp +7 -7
  27. data/vendor/datasketches-cpp/common/test/CMakeLists.txt +24 -0
  28. data/vendor/datasketches-cpp/common/test/integration_test.cpp +77 -0
  29. data/vendor/datasketches-cpp/common/test/test_allocator.hpp +9 -1
  30. data/vendor/datasketches-cpp/cpc/include/cpc_common.hpp +13 -3
  31. data/vendor/datasketches-cpp/cpc/include/cpc_compressor.hpp +20 -20
  32. data/vendor/datasketches-cpp/cpc/include/cpc_compressor_impl.hpp +116 -105
  33. data/vendor/datasketches-cpp/cpc/include/cpc_sketch.hpp +22 -6
  34. data/vendor/datasketches-cpp/cpc/include/cpc_sketch_impl.hpp +140 -101
  35. data/vendor/datasketches-cpp/cpc/include/cpc_union.hpp +2 -2
  36. data/vendor/datasketches-cpp/cpc/include/cpc_union_impl.hpp +20 -20
  37. data/vendor/datasketches-cpp/cpc/include/cpc_util.hpp +10 -16
  38. data/vendor/datasketches-cpp/cpc/include/icon_estimator.hpp +6 -6
  39. data/vendor/datasketches-cpp/cpc/include/u32_table.hpp +10 -10
  40. data/vendor/datasketches-cpp/cpc/include/u32_table_impl.hpp +21 -21
  41. data/vendor/datasketches-cpp/cpc/test/CMakeLists.txt +1 -0
  42. data/vendor/datasketches-cpp/cpc/test/compression_test.cpp +10 -10
  43. data/vendor/datasketches-cpp/cpc/test/cpc_sketch_allocation_test.cpp +237 -0
  44. data/vendor/datasketches-cpp/cpc/test/cpc_sketch_test.cpp +25 -0
  45. data/vendor/datasketches-cpp/cpc/test/cpc_union_test.cpp +1 -1
  46. data/vendor/datasketches-cpp/fi/include/frequent_items_sketch.hpp +15 -10
  47. data/vendor/datasketches-cpp/fi/include/frequent_items_sketch_impl.hpp +102 -105
  48. data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map.hpp +19 -13
  49. data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map_impl.hpp +141 -125
  50. data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_custom_type_test.cpp +15 -12
  51. data/vendor/datasketches-cpp/fi/test/reverse_purge_hash_map_test.cpp +5 -5
  52. data/vendor/datasketches-cpp/hll/CMakeLists.txt +3 -0
  53. data/vendor/datasketches-cpp/hll/include/AuxHashMap-internal.hpp +81 -109
  54. data/vendor/datasketches-cpp/hll/include/AuxHashMap.hpp +25 -24
  55. data/vendor/datasketches-cpp/hll/include/CompositeInterpolationXTable-internal.hpp +15 -15
  56. data/vendor/datasketches-cpp/hll/include/CompositeInterpolationXTable.hpp +5 -5
  57. data/vendor/datasketches-cpp/hll/include/CouponHashSet-internal.hpp +89 -105
  58. data/vendor/datasketches-cpp/hll/include/CouponHashSet.hpp +13 -13
  59. data/vendor/datasketches-cpp/hll/include/CouponList-internal.hpp +130 -165
  60. data/vendor/datasketches-cpp/hll/include/CouponList.hpp +21 -22
  61. data/vendor/datasketches-cpp/hll/include/CubicInterpolation-internal.hpp +2 -4
  62. data/vendor/datasketches-cpp/hll/include/CubicInterpolation.hpp +2 -2
  63. data/vendor/datasketches-cpp/hll/include/HarmonicNumbers-internal.hpp +1 -1
  64. data/vendor/datasketches-cpp/hll/include/HarmonicNumbers.hpp +2 -2
  65. data/vendor/datasketches-cpp/hll/include/Hll4Array-internal.hpp +88 -83
  66. data/vendor/datasketches-cpp/hll/include/Hll4Array.hpp +9 -9
  67. data/vendor/datasketches-cpp/hll/include/Hll6Array-internal.hpp +34 -45
  68. data/vendor/datasketches-cpp/hll/include/Hll6Array.hpp +7 -8
  69. data/vendor/datasketches-cpp/hll/include/Hll8Array-internal.hpp +41 -52
  70. data/vendor/datasketches-cpp/hll/include/Hll8Array.hpp +7 -8
  71. data/vendor/datasketches-cpp/hll/include/HllArray-internal.hpp +220 -251
  72. data/vendor/datasketches-cpp/hll/include/HllArray.hpp +42 -42
  73. data/vendor/datasketches-cpp/hll/include/HllSketch-internal.hpp +36 -38
  74. data/vendor/datasketches-cpp/hll/include/HllSketchImpl-internal.hpp +22 -22
  75. data/vendor/datasketches-cpp/hll/include/HllSketchImpl.hpp +15 -14
  76. data/vendor/datasketches-cpp/hll/include/HllSketchImplFactory.hpp +47 -44
  77. data/vendor/datasketches-cpp/hll/include/HllUnion-internal.hpp +62 -87
  78. data/vendor/datasketches-cpp/hll/include/HllUtil.hpp +121 -128
  79. data/vendor/datasketches-cpp/hll/include/RelativeErrorTables.hpp +1 -1
  80. data/vendor/datasketches-cpp/hll/include/coupon_iterator-internal.hpp +9 -9
  81. data/vendor/datasketches-cpp/hll/include/coupon_iterator.hpp +5 -5
  82. data/vendor/datasketches-cpp/hll/include/hll.hpp +25 -53
  83. data/vendor/datasketches-cpp/hll/test/AuxHashMapTest.cpp +8 -8
  84. data/vendor/datasketches-cpp/hll/test/CouponHashSetTest.cpp +36 -36
  85. data/vendor/datasketches-cpp/hll/test/CouponListTest.cpp +28 -28
  86. data/vendor/datasketches-cpp/hll/test/CrossCountingTest.cpp +2 -2
  87. data/vendor/datasketches-cpp/hll/test/HllArrayTest.cpp +37 -37
  88. data/vendor/datasketches-cpp/hll/test/HllSketchTest.cpp +57 -61
  89. data/vendor/datasketches-cpp/hll/test/HllUnionTest.cpp +10 -14
  90. data/vendor/datasketches-cpp/hll/test/IsomorphicTest.cpp +3 -3
  91. data/vendor/datasketches-cpp/hll/test/ToFromByteArrayTest.cpp +4 -4
  92. data/vendor/datasketches-cpp/kll/include/kll_helper.hpp +5 -4
  93. data/vendor/datasketches-cpp/kll/include/kll_helper_impl.hpp +6 -6
  94. data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator.hpp +14 -6
  95. data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator_impl.hpp +40 -25
  96. data/vendor/datasketches-cpp/kll/include/kll_sketch.hpp +50 -6
  97. data/vendor/datasketches-cpp/kll/include/kll_sketch_impl.hpp +164 -136
  98. data/vendor/datasketches-cpp/kll/include/kolmogorov_smirnov.hpp +67 -0
  99. data/vendor/datasketches-cpp/kll/include/kolmogorov_smirnov_impl.hpp +78 -0
  100. data/vendor/datasketches-cpp/kll/test/CMakeLists.txt +1 -0
  101. data/vendor/datasketches-cpp/kll/test/kll_sketch_custom_type_test.cpp +11 -10
  102. data/vendor/datasketches-cpp/kll/test/kll_sketch_test.cpp +178 -88
  103. data/vendor/datasketches-cpp/kll/test/kolmogorov_smirnov_test.cpp +111 -0
  104. data/vendor/datasketches-cpp/pyproject.toml +4 -2
  105. data/vendor/datasketches-cpp/python/CMakeLists.txt +12 -6
  106. data/vendor/datasketches-cpp/python/README.md +52 -49
  107. data/vendor/datasketches-cpp/python/pybind11Path.cmd +3 -0
  108. data/vendor/datasketches-cpp/python/src/cpc_wrapper.cpp +1 -1
  109. data/vendor/datasketches-cpp/python/src/datasketches.cpp +2 -0
  110. data/vendor/datasketches-cpp/python/src/hll_wrapper.cpp +4 -6
  111. data/vendor/datasketches-cpp/python/src/kll_wrapper.cpp +4 -2
  112. data/vendor/datasketches-cpp/python/src/req_wrapper.cpp +246 -0
  113. data/vendor/datasketches-cpp/python/src/theta_wrapper.cpp +38 -28
  114. data/vendor/datasketches-cpp/python/src/vector_of_kll.cpp +11 -5
  115. data/vendor/datasketches-cpp/python/src/vo_wrapper.cpp +2 -2
  116. data/vendor/datasketches-cpp/python/tests/hll_test.py +1 -2
  117. data/vendor/datasketches-cpp/python/tests/kll_test.py +5 -5
  118. data/vendor/datasketches-cpp/python/tests/req_test.py +126 -0
  119. data/vendor/datasketches-cpp/python/tests/theta_test.py +28 -3
  120. data/vendor/datasketches-cpp/python/tests/vector_of_kll_test.py +4 -4
  121. data/vendor/datasketches-cpp/python/tests/vo_test.py +3 -3
  122. data/vendor/datasketches-cpp/req/CMakeLists.txt +60 -0
  123. data/vendor/datasketches-cpp/{tuple/include/theta_a_not_b_experimental_impl.hpp → req/include/req_common.hpp} +18 -8
  124. data/vendor/datasketches-cpp/req/include/req_compactor.hpp +137 -0
  125. data/vendor/datasketches-cpp/req/include/req_compactor_impl.hpp +488 -0
  126. data/vendor/datasketches-cpp/req/include/req_quantile_calculator.hpp +69 -0
  127. data/vendor/datasketches-cpp/req/include/req_quantile_calculator_impl.hpp +60 -0
  128. data/vendor/datasketches-cpp/req/include/req_sketch.hpp +395 -0
  129. data/vendor/datasketches-cpp/req/include/req_sketch_impl.hpp +810 -0
  130. data/vendor/datasketches-cpp/req/test/CMakeLists.txt +43 -0
  131. data/vendor/datasketches-cpp/req/test/req_float_empty_from_java.sk +0 -0
  132. data/vendor/datasketches-cpp/req/test/req_float_estimation_from_java.sk +0 -0
  133. data/vendor/datasketches-cpp/req/test/req_float_exact_from_java.sk +0 -0
  134. data/vendor/datasketches-cpp/req/test/req_float_raw_items_from_java.sk +0 -0
  135. data/vendor/datasketches-cpp/req/test/req_float_single_item_from_java.sk +0 -0
  136. data/vendor/datasketches-cpp/req/test/req_sketch_custom_type_test.cpp +128 -0
  137. data/vendor/datasketches-cpp/req/test/req_sketch_test.cpp +494 -0
  138. data/vendor/datasketches-cpp/sampling/include/var_opt_sketch.hpp +19 -13
  139. data/vendor/datasketches-cpp/sampling/include/var_opt_sketch_impl.hpp +130 -127
  140. data/vendor/datasketches-cpp/sampling/include/var_opt_union.hpp +5 -5
  141. data/vendor/datasketches-cpp/sampling/include/var_opt_union_impl.hpp +41 -49
  142. data/vendor/datasketches-cpp/sampling/test/CMakeLists.txt +1 -0
  143. data/vendor/datasketches-cpp/sampling/test/var_opt_allocation_test.cpp +96 -0
  144. data/vendor/datasketches-cpp/sampling/test/var_opt_sketch_test.cpp +6 -6
  145. data/vendor/datasketches-cpp/sampling/test/var_opt_union_test.cpp +13 -44
  146. data/vendor/datasketches-cpp/setup.py +11 -6
  147. data/vendor/datasketches-cpp/theta/CMakeLists.txt +30 -3
  148. data/vendor/datasketches-cpp/{tuple → theta}/include/bounds_on_ratios_in_sampled_sets.hpp +3 -2
  149. data/vendor/datasketches-cpp/{tuple → theta}/include/bounds_on_ratios_in_theta_sketched_sets.hpp +1 -1
  150. data/vendor/datasketches-cpp/theta/include/compact_theta_sketch_parser.hpp +67 -0
  151. data/vendor/datasketches-cpp/theta/include/compact_theta_sketch_parser_impl.hpp +70 -0
  152. data/vendor/datasketches-cpp/theta/include/theta_a_not_b.hpp +12 -29
  153. data/vendor/datasketches-cpp/theta/include/theta_a_not_b_impl.hpp +5 -46
  154. data/vendor/datasketches-cpp/{tuple → theta}/include/theta_comparators.hpp +0 -0
  155. data/vendor/datasketches-cpp/{tuple → theta}/include/theta_constants.hpp +11 -4
  156. data/vendor/datasketches-cpp/{tuple → theta}/include/theta_helpers.hpp +0 -0
  157. data/vendor/datasketches-cpp/theta/include/theta_intersection.hpp +26 -28
  158. data/vendor/datasketches-cpp/{tuple → theta}/include/theta_intersection_base.hpp +0 -0
  159. data/vendor/datasketches-cpp/{tuple → theta}/include/theta_intersection_base_impl.hpp +0 -0
  160. data/vendor/datasketches-cpp/theta/include/theta_intersection_impl.hpp +8 -90
  161. data/vendor/datasketches-cpp/{tuple/test/theta_union_experimental_test.cpp → theta/include/theta_jaccard_similarity.hpp} +11 -18
  162. data/vendor/datasketches-cpp/{tuple/include/jaccard_similarity.hpp → theta/include/theta_jaccard_similarity_base.hpp} +24 -36
  163. data/vendor/datasketches-cpp/{tuple → theta}/include/theta_set_difference_base.hpp +0 -0
  164. data/vendor/datasketches-cpp/{tuple → theta}/include/theta_set_difference_base_impl.hpp +5 -0
  165. data/vendor/datasketches-cpp/theta/include/theta_sketch.hpp +163 -256
  166. data/vendor/datasketches-cpp/theta/include/theta_sketch_impl.hpp +250 -651
  167. data/vendor/datasketches-cpp/theta/include/theta_union.hpp +27 -60
  168. data/vendor/datasketches-cpp/{tuple → theta}/include/theta_union_base.hpp +1 -1
  169. data/vendor/datasketches-cpp/{tuple → theta}/include/theta_union_base_impl.hpp +6 -1
  170. data/vendor/datasketches-cpp/theta/include/theta_union_impl.hpp +13 -69
  171. data/vendor/datasketches-cpp/{tuple → theta}/include/theta_update_sketch_base.hpp +10 -21
  172. data/vendor/datasketches-cpp/{tuple → theta}/include/theta_update_sketch_base_impl.hpp +44 -30
  173. data/vendor/datasketches-cpp/theta/test/CMakeLists.txt +1 -0
  174. data/vendor/datasketches-cpp/theta/test/theta_a_not_b_test.cpp +23 -1
  175. data/vendor/datasketches-cpp/theta/test/theta_intersection_test.cpp +21 -1
  176. data/vendor/datasketches-cpp/{tuple → theta}/test/theta_jaccard_similarity_test.cpp +60 -5
  177. data/vendor/datasketches-cpp/theta/test/theta_sketch_test.cpp +74 -235
  178. data/vendor/datasketches-cpp/theta/test/theta_union_test.cpp +22 -2
  179. data/vendor/datasketches-cpp/tuple/CMakeLists.txt +3 -35
  180. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_sketch_impl.hpp +47 -60
  181. data/vendor/datasketches-cpp/tuple/include/tuple_jaccard_similarity.hpp +38 -0
  182. data/vendor/datasketches-cpp/tuple/include/tuple_sketch.hpp +28 -13
  183. data/vendor/datasketches-cpp/tuple/include/tuple_sketch_impl.hpp +57 -70
  184. data/vendor/datasketches-cpp/tuple/test/CMakeLists.txt +1 -6
  185. data/vendor/datasketches-cpp/tuple/test/array_of_doubles_sketch_test.cpp +1 -1
  186. data/vendor/datasketches-cpp/tuple/test/tuple_a_not_b_test.cpp +18 -21
  187. data/vendor/datasketches-cpp/tuple/test/tuple_intersection_test.cpp +13 -16
  188. data/vendor/datasketches-cpp/tuple/test/tuple_jaccard_similarity_test.cpp +7 -6
  189. data/vendor/datasketches-cpp/tuple/test/tuple_sketch_allocation_test.cpp +3 -3
  190. data/vendor/datasketches-cpp/tuple/test/tuple_sketch_test.cpp +20 -20
  191. data/vendor/datasketches-cpp/tuple/test/tuple_union_test.cpp +13 -16
  192. metadata +51 -36
  193. data/vendor/datasketches-cpp/tuple/include/theta_a_not_b_experimental.hpp +0 -53
  194. data/vendor/datasketches-cpp/tuple/include/theta_intersection_experimental.hpp +0 -78
  195. data/vendor/datasketches-cpp/tuple/include/theta_intersection_experimental_impl.hpp +0 -43
  196. data/vendor/datasketches-cpp/tuple/include/theta_sketch_experimental.hpp +0 -393
  197. data/vendor/datasketches-cpp/tuple/include/theta_sketch_experimental_impl.hpp +0 -481
  198. data/vendor/datasketches-cpp/tuple/include/theta_union_experimental.hpp +0 -88
  199. data/vendor/datasketches-cpp/tuple/include/theta_union_experimental_impl.hpp +0 -47
  200. data/vendor/datasketches-cpp/tuple/test/theta_a_not_b_experimental_test.cpp +0 -250
  201. data/vendor/datasketches-cpp/tuple/test/theta_compact_empty_from_java.sk +0 -0
  202. data/vendor/datasketches-cpp/tuple/test/theta_compact_estimation_from_java.sk +0 -0
  203. data/vendor/datasketches-cpp/tuple/test/theta_compact_single_item_from_java.sk +0 -0
  204. data/vendor/datasketches-cpp/tuple/test/theta_intersection_experimental_test.cpp +0 -224
  205. data/vendor/datasketches-cpp/tuple/test/theta_sketch_experimental_test.cpp +0 -247
@@ -35,6 +35,8 @@ set(CMAKE_WINDOWS_EXPORT_ALL_SYMBOLS ON)
35
35
  #set(CMAKE_VERBOSE_MAKEFILE ON)
36
36
  set(CMAKE_MACOSX_RPATH ON)
37
37
 
38
+ set(CMAKE_CXX_STANDARD 11)
39
+
38
40
  # enable compiler warnings globally
39
41
  # derived from https://foonathan.net/blog/2018/10/17/cmake-warnings.html
40
42
  # and https://arne-mertz.de/2018/07/cmake-properties-options/
@@ -70,6 +72,13 @@ if(COVERAGE AND CMAKE_CXX_COMPILER_ID MATCHES "GNU|Clang")
70
72
  add_link_options(--coverage)
71
73
  endif()
72
74
 
75
+ option(SANITIZE "Run sanitization checks (g++/clang only)" OFF)
76
+ if(SANITIZE AND CMAKE_CXX_COMPILER_ID MATCHES "GNU|Clang")
77
+ add_compile_options(-fsanitize=${SANITIZE})
78
+ add_link_options(-fsanitize=${SANITIZE})
79
+ endif()
80
+
81
+
73
82
  # set default build type to Release
74
83
  # Derived from: https://blog.kitware.com/cmake-and-the-default-build-type/
75
84
  set(default_build_type "Release")
@@ -96,6 +105,7 @@ add_subdirectory(fi)
96
105
  add_subdirectory(theta)
97
106
  add_subdirectory(sampling)
98
107
  add_subdirectory(tuple)
108
+ add_subdirectory(req)
99
109
 
100
110
  if (WITH_PYTHON)
101
111
  add_subdirectory(python)
@@ -284,11 +284,48 @@ APPENDIX B: Additional licenses relevant to this product.
284
284
  ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
285
285
  DEALINGS IN THE SOFTWARE.
286
286
  -------------------------------------------------------------
287
- Code Locations
287
+ Code Locations:
288
288
  * https://github.com/apache/datasketches-cpp/blob/master/common/test/catch.hpp
289
289
  that is adapted from the above.
290
290
 
291
291
 
292
+ =============================================================
293
+ BSD License
294
+ =============================================================
295
+ Original source code:
296
+ https://github.com/pybind/pybind11/blob/master/LICENSE
297
+
298
+ Copyright (c) 2016 Wenzel Jakob <wenzel.jakob@epfl.ch>, All rights reserved.
299
+
300
+ Redistribution and use in source and binary forms, with or without
301
+ modification, are permitted provided that the following conditions are met:
302
+
303
+ 1. Redistributions of source code must retain the above copyright notice, this
304
+ list of conditions and the following disclaimer.
305
+
306
+ 2. Redistributions in binary form must reproduce the above copyright notice,
307
+ this list of conditions and the following disclaimer in the documentation
308
+ and/or other materials provided with the distribution.
309
+
310
+ 3. Neither the name of the copyright holder nor the names of its contributors
311
+ may be used to endorse or promote products derived from this software
312
+ without specific prior written permission.
313
+
314
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
315
+ ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
316
+ WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
317
+ DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
318
+ FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
319
+ DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
320
+ SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
321
+ CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
322
+ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
323
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
324
+ -------------------------------------------------------------
325
+ Code Locations:
326
+ Found only in the convenience binaries distributed from PyPI, which rely
327
+ on pybind11 code during compilation.
328
+
292
329
 
293
330
  =============================================================
294
331
  Public Domain
@@ -297,7 +334,7 @@ APPENDIX B: Additional licenses relevant to this product.
297
334
  https://github.com/aappleby/smhasher/blob/master/src/MurmurHash3.cpp
298
335
  Placed in the Public Domain by Austin Appleby
299
336
 
300
- Code Locations
337
+ Code Locations:
301
338
  common/include/MurmurHash3.h
302
339
  that is adapted from the above.
303
340
  -------------------------------------------------------------
@@ -305,7 +342,7 @@ APPENDIX B: Additional licenses relevant to this product.
305
342
  * https://graphics.stanford.edu/~seander/bithacks.html
306
343
  * Placed in the Public Domain by Sean Eron Anderson
307
344
 
308
- Code Locations
345
+ Code Locations:
309
346
  * common/include/ceiling_power_of_2.hpp
310
347
  that is adapted from the above.
311
348
 
@@ -1,5 +1,5 @@
1
1
  Apache DataSketches-cpp
2
- Copyright 2020 The Apache Software Foundation
2
+ Copyright 2020-2021 The Apache Software Foundation
3
3
 
4
4
  Copyright 2015-2018 Yahoo
5
5
  Copyright 2019 Verizon Media
@@ -1,18 +1,18 @@
1
- # DataSketches Core C++ Library Component
2
- This is the core C++ component of the DataSketches library. It contains all of the key sketching algorithms that are in the Java component and can be accessed directly from user applications.
1
+ # Apache DataSketches Core C++ Library Component
2
+ This is the core C++ component of the Apache DataSketches library. It contains all of the key sketching algorithms that are in the Java component and can be accessed directly from user applications.
3
3
 
4
4
  This component is also a dependency of other components of the library that create adaptors for target systems, such as PostgreSQL.
5
5
 
6
6
  Note that we have a parallel core component for Java implementations of the same sketch algorithms,
7
7
  [datasketches-java](https://github.com/apache/datasketches-java).
8
8
 
9
- Please visit the main [DataSketches website](https://datasketches.apache.org) for more information.
9
+ Please visit the main [Apache DataSketches website](https://datasketches.apache.org) for more information.
10
10
 
11
11
  If you are interested in making contributions to this site please see our [Community](https://datasketches.apache.org/docs/Community/) page for how to contact us.
12
12
 
13
13
  ---
14
14
 
15
- This code requires C++11. It was tested with GCC 4.8.5 (standard in RedHat at the time of this writing), GCC 8.2.0 and Apple LLVM version 10.0.1 (clang-1001.0.46.4)
15
+ This code requires C++11.
16
16
 
17
17
  This includes Python bindings. For the Python interface, see the README notes in [the python subdirectory](https://github.com/apache/datasketches-cpp/tree/master/python).
18
18
 
@@ -3,6 +3,7 @@
3
3
  // * Changed input seed in MurmurHash3_x64_128 to uint64_t
4
4
  // * Define and use HashState reference to return result
5
5
  // * Made entire hash function defined inline
6
+ // * Added compute_seed_hash
6
7
  //-----------------------------------------------------------------------------
7
8
  // MurmurHash3 was written by Austin Appleby, and is placed in the public
8
9
  // domain. The author hereby disclaims copyright to this source code.
@@ -15,6 +16,8 @@
15
16
  #ifndef _MURMURHASH3_H_
16
17
  #define _MURMURHASH3_H_
17
18
 
19
+ #include <cstring>
20
+
18
21
  //-----------------------------------------------------------------------------
19
22
  // Platform-specific functions and macros
20
23
 
@@ -75,9 +78,11 @@ typedef struct {
75
78
  // Block read - if your platform needs to do endian-swapping or can only
76
79
  // handle aligned reads, do the conversion here
77
80
 
78
- FORCE_INLINE uint64_t getblock64 ( const uint64_t * p, int i )
81
+ FORCE_INLINE uint64_t getblock64 ( const uint64_t * p, size_t i )
79
82
  {
80
- return p[i];
83
+ uint64_t res;
84
+ memcpy(&res, p + i, sizeof(res));
85
+ return res;
81
86
  }
82
87
 
83
88
  //-----------------------------------------------------------------------------
@@ -94,7 +99,7 @@ FORCE_INLINE uint64_t fmix64 ( uint64_t k )
94
99
  return k;
95
100
  }
96
101
 
97
- FORCE_INLINE void MurmurHash3_x64_128(const void* key, int lenBytes, uint64_t seed, HashState& out) {
102
+ FORCE_INLINE void MurmurHash3_x64_128(const void* key, size_t lenBytes, uint64_t seed, HashState& out) {
98
103
  static const uint64_t c1 = BIG_CONSTANT(0x87c37b91114253d5);
99
104
  static const uint64_t c2 = BIG_CONSTANT(0x4cf5ad432745937f);
100
105
 
@@ -105,13 +110,13 @@ FORCE_INLINE void MurmurHash3_x64_128(const void* key, int lenBytes, uint64_t se
105
110
 
106
111
  // Number of full 128-bit blocks of 16 bytes.
107
112
  // Possible exclusion of a remainder of up to 15 bytes.
108
- const int nblocks = lenBytes >> 4; // bytes / 16
113
+ const size_t nblocks = lenBytes >> 4; // bytes / 16
109
114
 
110
115
  // Process the 128-bit blocks (the body) into the hash
111
116
  const uint64_t* blocks = (const uint64_t*)(data);
112
- for (int i = 0; i < nblocks; ++i) { // 16 bytes per block
113
- uint64_t k1 = getblock64(blocks,i*2+0);
114
- uint64_t k2 = getblock64(blocks,i*2+1);
117
+ for (size_t i = 0; i < nblocks; ++i) { // 16 bytes per block
118
+ uint64_t k1 = getblock64(blocks, i * 2 + 0);
119
+ uint64_t k2 = getblock64(blocks, i * 2 + 1);
115
120
 
116
121
  k1 *= c1; k1 = ROTL64(k1,31); k1 *= c2; out.h1 ^= k1;
117
122
  out.h1 = ROTL64(out.h1,27);
@@ -170,4 +175,10 @@ FORCE_INLINE void MurmurHash3_x64_128(const void* key, int lenBytes, uint64_t se
170
175
 
171
176
  //-----------------------------------------------------------------------------
172
177
 
178
+ FORCE_INLINE uint16_t compute_seed_hash(uint64_t seed) {
179
+ HashState hashes;
180
+ MurmurHash3_x64_128(&seed, sizeof(seed), 0, hashes);
181
+ return static_cast<uint16_t>(hashes.h1 & 0xffff);
182
+ }
183
+
173
184
  #endif // _MURMURHASH3_H_
@@ -381,7 +381,7 @@ private:
381
381
  // The following computes an approximation to the lower bound of a Frequentist
382
382
  // confidence interval based on the tails of the Binomial distribution.
383
383
  static double compute_approx_binomial_lower_bound(unsigned long long num_samples, double theta, unsigned num_std_devs) {
384
- if (theta == 1) return num_samples;
384
+ if (theta == 1) return static_cast<double>(num_samples);
385
385
  if (num_samples == 0) return 0;
386
386
  if (num_samples == 1) {
387
387
  const double delta = delta_of_num_std_devs[num_std_devs];
@@ -395,24 +395,24 @@ private:
395
395
  }
396
396
  // at this point we know 2 <= num_samples <= 120
397
397
  if (theta > (1 - 1e-5)) { // empirically-determined threshold
398
- return num_samples;
398
+ return static_cast<double>(num_samples);
399
399
  }
400
400
  if (theta < (num_samples / 360.0)) { // empirically-determined threshold
401
401
  // here we use the Gaussian approximation, but with a modified num_std_devs
402
- const unsigned index = 3 * num_samples + (num_std_devs - 1);
402
+ const unsigned index = 3 * static_cast<unsigned>(num_samples) + (num_std_devs - 1);
403
403
  const double raw_lb = cont_classic_lb(num_samples, theta, lb_equiv_table[index]);
404
404
  return raw_lb - 0.5; // fake round down
405
405
  }
406
406
  // This is the most difficult range to approximate; we will compute an "exact" LB.
407
407
  // We know that est <= 360, so specialNStar() shouldn't be ridiculously slow.
408
408
  const double delta = delta_of_num_std_devs[num_std_devs];
409
- return special_n_star(num_samples, theta, delta); // no need to round
409
+ return static_cast<double>(special_n_star(num_samples, theta, delta)); // no need to round
410
410
  }
411
411
 
412
412
  // The following computes an approximation to the upper bound of a Frequentist
413
413
  // confidence interval based on the tails of the Binomial distribution.
414
414
  static double compute_approx_binomial_upper_bound(unsigned long long num_samples, double theta, unsigned num_std_devs) {
415
- if (theta == 1) return num_samples;
415
+ if (theta == 1) return static_cast<double>(num_samples);
416
416
  if (num_samples == 0) {
417
417
  const double delta = delta_of_num_std_devs[num_std_devs];
418
418
  const double raw_ub = std::log(delta) / std::log(1 - theta);
@@ -425,18 +425,18 @@ private:
425
425
  }
426
426
  // at this point we know 2 <= num_samples <= 120
427
427
  if (theta > (1 - 1e-5)) { // empirically-determined threshold
428
- return num_samples + 1;
428
+ return static_cast<double>(num_samples + 1);
429
429
  }
430
430
  if (theta < (num_samples / 360.0)) { // empirically-determined threshold
431
431
  // here we use the Gaussian approximation, but with a modified num_std_devs
432
- const unsigned index = 3 * num_samples + (num_std_devs - 1);
432
+ const unsigned index = 3 * static_cast<unsigned>(num_samples) + (num_std_devs - 1);
433
433
  const double raw_ub = cont_classic_ub(num_samples, theta, ub_equiv_table[index]);
434
434
  return raw_ub + 0.5; // fake round up
435
435
  }
436
436
  // This is the most difficult range to approximate; we will compute an "exact" UB.
437
437
  // We know that est <= 360, so specialNPrimeF() shouldn't be ridiculously slow.
438
438
  const double delta = delta_of_num_std_devs[num_std_devs];
439
- return special_n_prime_f(num_samples, theta, delta); // no need to round
439
+ return static_cast<double>(special_n_prime_f(num_samples, theta, delta)); // no need to round
440
440
  }
441
441
 
442
442
  static void check_theta(double theta) {
@@ -110,14 +110,14 @@ public:
110
110
  * @return the lower bound of the approximate Clopper-Pearson confidence interval for the
111
111
  * unknown success probability.
112
112
  */
113
- static inline double approximate_lower_bound_on_p(long n, long k, double num_std_devs) {
113
+ static inline double approximate_lower_bound_on_p(uint64_t n, uint64_t k, double num_std_devs) {
114
114
  check_inputs(n, k);
115
115
  if (n == 0) { return 0.0; } // the coin was never flipped, so we know nothing
116
116
  else if (k == 0) { return 0.0; }
117
117
  else if (k == 1) { return (exact_lower_bound_on_p_k_eq_1(n, delta_of_num_stdevs(num_std_devs))); }
118
118
  else if (k == n) { return (exact_lower_bound_on_p_k_eq_n(n, delta_of_num_stdevs(num_std_devs))); }
119
119
  else {
120
- double x = abramowitz_stegun_formula_26p5p22((n - k) + 1, k, (-1.0 * num_std_devs));
120
+ double x = abramowitz_stegun_formula_26p5p22((n - k) + 1.0, static_cast<double>(k), (-1.0 * num_std_devs));
121
121
  return (1.0 - x); // which is p
122
122
  }
123
123
  }
@@ -145,18 +145,18 @@ public:
145
145
  * @return the upper bound of the approximate Clopper-Pearson confidence interval for the
146
146
  * unknown success probability.
147
147
  */
148
- static inline double approximate_upper_bound_on_p(long n, long k, double num_std_devs) {
148
+ static inline double approximate_upper_bound_on_p(uint64_t n, uint64_t k, double num_std_devs) {
149
149
  check_inputs(n, k);
150
150
  if (n == 0) { return 1.0; } // the coin was never flipped, so we know nothing
151
151
  else if (k == n) { return 1.0; }
152
152
  else if (k == (n - 1)) {
153
- return (exactU_upper_bound_on_p_k_eq_minusone(n, delta_of_num_stdevs(num_std_devs)));
153
+ return (exact_upper_bound_on_p_k_eq_minusone(n, delta_of_num_stdevs(num_std_devs)));
154
154
  }
155
155
  else if (k == 0) {
156
156
  return (exact_upper_bound_on_p_k_eq_zero(n, delta_of_num_stdevs(num_std_devs)));
157
157
  }
158
158
  else {
159
- double x = abramowitz_stegun_formula_26p5p22(n - k, k + 1, num_std_devs);
159
+ double x = abramowitz_stegun_formula_26p5p22(static_cast<double>(n - k), k + 1.0, num_std_devs);
160
160
  return (1.0 - x); // which is p
161
161
  }
162
162
  }
@@ -167,7 +167,7 @@ public:
167
167
  * @param k is the number of successes. Must be non-negative, and cannot exceed n.
168
168
  * @return the estimate of the unknown binomial proportion.
169
169
  */
170
- static inline double estimate_unknown_p(long n, long k) {
170
+ static inline double estimate_unknown_p(uint64_t n, uint64_t k) {
171
171
  check_inputs(n, k);
172
172
  if (n == 0) { return 0.5; } // the coin was never flipped, so we know nothing
173
173
  else { return ((double) k / (double) n); }
@@ -193,9 +193,7 @@ public:
193
193
  }
194
194
 
195
195
  private:
196
- static inline void check_inputs(long n, long k) {
197
- if (n < 0) { throw std::invalid_argument("N must be non-negative"); }
198
- if (k < 0) { throw std::invalid_argument("K must be non-negative"); }
196
+ static inline void check_inputs(uint64_t n, uint64_t k) {
199
197
  if (k > n) { throw std::invalid_argument("K cannot exceed N"); }
200
198
  }
201
199
 
@@ -251,8 +249,7 @@ private:
251
249
  // and it is worth keeping it that way so that it will always be easy to verify
252
250
  // that the formula was typed in correctly.
253
251
 
254
- static inline double abramowitz_stegun_formula_26p5p22(double a, double b,
255
- double yp) {
252
+ static inline double abramowitz_stegun_formula_26p5p22(double a, double b, double yp) {
256
253
  const double b2m1 = (2.0 * b) - 1.0;
257
254
  const double a2m1 = (2.0 * a) - 1.0;
258
255
  const double lambda = ((yp * yp) - 3.0) / 6.0;
@@ -268,19 +265,19 @@ private:
268
265
 
269
266
  // Formulas for some special cases.
270
267
 
271
- static inline double exact_upper_bound_on_p_k_eq_zero(double n, double delta) {
268
+ static inline double exact_upper_bound_on_p_k_eq_zero(uint64_t n, double delta) {
272
269
  return (1.0 - pow(delta, (1.0 / n)));
273
270
  }
274
271
 
275
- static inline double exact_lower_bound_on_p_k_eq_n(double n, double delta) {
272
+ static inline double exact_lower_bound_on_p_k_eq_n(uint64_t n, double delta) {
276
273
  return (pow(delta, (1.0 / n)));
277
274
  }
278
275
 
279
- static inline double exact_lower_bound_on_p_k_eq_1(double n, double delta) {
276
+ static inline double exact_lower_bound_on_p_k_eq_1(uint64_t n, double delta) {
280
277
  return (1.0 - pow((1.0 - delta), (1.0 / n)));
281
278
  }
282
279
 
283
- static inline double exactU_upper_bound_on_p_k_eq_minusone(double n, double delta) {
280
+ static inline double exact_upper_bound_on_p_k_eq_minusone(uint64_t n, double delta) {
284
281
  return (pow((1.0 - delta), (1.0 / n)));
285
282
  }
286
283
 
@@ -23,11 +23,14 @@
23
23
  #include <cstdint>
24
24
  #include <string>
25
25
  #include <memory>
26
+ #include <iostream>
26
27
 
27
28
  namespace datasketches {
28
29
 
29
30
  static const uint64_t DEFAULT_SEED = 9001;
30
31
 
32
+ enum resize_factor { X1 = 0, X2, X4, X8 };
33
+
31
34
  template<typename A> using AllocChar = typename std::allocator_traits<A>::template rebind_alloc<char>;
32
35
  template<typename A> using string = std::basic_string<char, std::char_traits<char>, AllocChar<A>>;
33
36
 
@@ -46,6 +49,29 @@ constexpr uint8_t lg_size_from_count(uint32_t n, double load_factor) {
46
49
  return log2(n) + ((n > static_cast<uint32_t>((1 << (log2(n) + 1)) * load_factor)) ? 2 : 1);
47
50
  }
48
51
 
52
+ // stream helpers to hide casts
53
+ template<typename T>
54
+ static inline T read(std::istream& is) {
55
+ T value;
56
+ is.read(reinterpret_cast<char*>(&value), sizeof(T));
57
+ return value;
58
+ }
59
+
60
+ template<typename T>
61
+ static inline void read(std::istream& is, T* ptr, size_t size_bytes) {
62
+ is.read(reinterpret_cast<char*>(ptr), size_bytes);
63
+ }
64
+
65
+ template<typename T>
66
+ static inline void write(std::ostream& os, T& value) {
67
+ os.write(reinterpret_cast<const char*>(&value), sizeof(T));
68
+ }
69
+
70
+ template<typename T>
71
+ static inline void write(std::ostream& os, const T* ptr, size_t size_bytes) {
72
+ os.write(reinterpret_cast<const char*>(ptr), size_bytes);
73
+ }
74
+
49
75
  } // namespace
50
76
 
51
77
  #endif // _COMMON_DEFS_HPP_
@@ -38,29 +38,41 @@ fwd_type<T1, T2> conditional_forward(T2&& value) {
38
38
  // Forward container as iterators
39
39
 
40
40
  template<typename Container>
41
- auto forward_begin(Container&& c) ->
42
- typename std::enable_if<std::is_lvalue_reference<Container>::value, decltype(c.begin())>::type
41
+ auto forward_begin(Container&& c) -> typename std::enable_if<
42
+ std::is_lvalue_reference<Container>::value ||
43
+ std::is_same<typename std::remove_reference<Container>::type::const_iterator, decltype(c.begin())>::value,
44
+ decltype(c.begin())
45
+ >::type
43
46
  {
44
47
  return c.begin();
45
48
  }
46
49
 
47
50
  template<typename Container>
48
- auto forward_begin(Container&& c) ->
49
- typename std::enable_if<!std::is_lvalue_reference<Container>::value, decltype(std::make_move_iterator(c.begin()))>::type
51
+ auto forward_begin(Container&& c) -> typename std::enable_if<
52
+ !std::is_lvalue_reference<Container>::value &&
53
+ !std::is_same<typename std::remove_reference<Container>::type::const_iterator, decltype(c.begin())>::value,
54
+ decltype(std::make_move_iterator(c.begin()))
55
+ >::type
50
56
  {
51
57
  return std::make_move_iterator(c.begin());
52
58
  }
53
59
 
54
60
  template<typename Container>
55
- auto forward_end(Container&& c) ->
56
- typename std::enable_if<std::is_lvalue_reference<Container>::value, decltype(c.end())>::type
61
+ auto forward_end(Container&& c) -> typename std::enable_if<
62
+ std::is_lvalue_reference<Container>::value ||
63
+ std::is_same<typename std::remove_reference<Container>::type::const_iterator, decltype(c.begin())>::value,
64
+ decltype(c.end())
65
+ >::type
57
66
  {
58
67
  return c.end();
59
68
  }
60
69
 
61
70
  template<typename Container>
62
- auto forward_end(Container&& c) ->
63
- typename std::enable_if<!std::is_lvalue_reference<Container>::value, decltype(std::make_move_iterator(c.end()))>::type
71
+ auto forward_end(Container&& c) -> typename std::enable_if<
72
+ !std::is_lvalue_reference<Container>::value &&
73
+ !std::is_same<typename std::remove_reference<Container>::type::const_iterator, decltype(c.begin())>::value,
74
+ decltype(std::make_move_iterator(c.end()))
75
+ >::type
64
76
  {
65
77
  return std::make_move_iterator(c.end());
66
78
  }
@@ -94,7 +94,7 @@ static inline uint8_t count_leading_zeros_in_u64(uint64_t input) {
94
94
  static inline uint8_t count_trailing_zeros_in_u32(uint32_t input) {
95
95
  for (int i = 0; i < 4; i++) {
96
96
  const int byte = input & 0xff;
97
- if (byte != 0) return (i << 3) + byte_trailing_zeros_table[byte];
97
+ if (byte != 0) return static_cast<uint8_t>((i << 3) + byte_trailing_zeros_table[byte]);
98
98
  input >>= 8;
99
99
  }
100
100
  return 32;
@@ -103,7 +103,7 @@ static inline uint8_t count_trailing_zeros_in_u32(uint32_t input) {
103
103
  static inline uint8_t count_trailing_zeros_in_u64(uint64_t input) {
104
104
  for (int i = 0; i < 8; i++) {
105
105
  const int byte = input & 0xff;
106
- if (byte != 0) return (i << 3) + byte_trailing_zeros_table[byte];
106
+ if (byte != 0) return static_cast<uint8_t>((i << 3) + byte_trailing_zeros_table[byte]);
107
107
  input >>= 8;
108
108
  }
109
109
  return 64;
@@ -52,6 +52,18 @@ static inline size_t copy_to_mem(const void* src, void* dst, size_t size) {
52
52
  return size;
53
53
  }
54
54
 
55
+ template<typename T>
56
+ static inline size_t copy_to_mem(const T& item, void* dst) {
57
+ memcpy(dst, &item, sizeof(T));
58
+ return sizeof(T);
59
+ }
60
+
61
+ template<typename T>
62
+ static inline size_t copy_from_mem(const void* src, T& item) {
63
+ memcpy(&item, src, sizeof(T));
64
+ return sizeof(T);
65
+ }
66
+
55
67
  } // namespace
56
68
 
57
69
  #endif // _MEMORY_OPERATIONS_HPP_
@@ -51,7 +51,7 @@ struct serde<T, typename std::enable_if<std::is_arithmetic<T>::value>::type> {
51
51
  bool failure = false;
52
52
  try {
53
53
  os.write(reinterpret_cast<const char*>(items), sizeof(T) * num);
54
- } catch (std::ostream::failure& e) {
54
+ } catch (std::ostream::failure&) {
55
55
  failure = true;
56
56
  }
57
57
  if (failure || !os.good()) {
@@ -62,7 +62,7 @@ struct serde<T, typename std::enable_if<std::is_arithmetic<T>::value>::type> {
62
62
  bool failure = false;
63
63
  try {
64
64
  is.read((char*)items, sizeof(T) * num);
65
- } catch (std::istream::failure& e) {
65
+ } catch (std::istream::failure&) {
66
66
  failure = true;
67
67
  }
68
68
  if (failure || !is.good()) {
@@ -99,11 +99,11 @@ struct serde<std::string> {
99
99
  bool failure = false;
100
100
  try {
101
101
  for (; i < num && os.good(); i++) {
102
- uint32_t length = items[i].size();
102
+ uint32_t length = static_cast<uint32_t>(items[i].size());
103
103
  os.write((char*)&length, sizeof(length));
104
104
  os.write(items[i].c_str(), length);
105
105
  }
106
- } catch (std::ostream::failure& e) {
106
+ } catch (std::ostream::failure&) {
107
107
  failure = true;
108
108
  }
109
109
  if (failure || !os.good()) {
@@ -121,12 +121,12 @@ struct serde<std::string> {
121
121
  std::string str;
122
122
  str.reserve(length);
123
123
  for (uint32_t j = 0; j < length; j++) {
124
- str.push_back(is.get());
124
+ str.push_back(static_cast<char>(is.get()));
125
125
  }
126
126
  if (!is.good()) { break; }
127
127
  new (&items[i]) std::string(std::move(str));
128
128
  }
129
- } catch (std::istream::failure& e) {
129
+ } catch (std::istream::failure&) {
130
130
  failure = true;
131
131
  }
132
132
  if (failure || !is.good()) {
@@ -143,7 +143,7 @@ struct serde<std::string> {
143
143
  size_t serialize(void* ptr, size_t capacity, const std::string* items, unsigned num) const {
144
144
  size_t bytes_written = 0;
145
145
  for (unsigned i = 0; i < num; ++i) {
146
- const uint32_t length = items[i].size();
146
+ const uint32_t length = static_cast<uint32_t>(items[i].size());
147
147
  const size_t new_bytes = length + sizeof(length);
148
148
  check_memory_size(bytes_written + new_bytes, capacity);
149
149
  memcpy(ptr, &length, sizeof(length));
@@ -15,6 +15,10 @@
15
15
  # specific language governing permissions and limitations
16
16
  # under the License.
17
17
 
18
+ # two parts here, the common test code for other parts to use,
19
+ # and an integration test using the other parts of the library.
20
+
21
+ # common dependencies for tests
18
22
  add_library(common_test OBJECT "")
19
23
 
20
24
  set_target_properties(common_test PROPERTIES
@@ -36,3 +40,23 @@ target_sources(common_test
36
40
  ${CMAKE_CURRENT_SOURCE_DIR}/catch_runner.cpp
37
41
  ${CMAKE_CURRENT_SOURCE_DIR}/test_allocator.cpp
38
42
  )
43
+
44
+ # now the integration test part
45
+ add_executable(integration_test)
46
+
47
+ target_link_libraries(integration_test cpc fi hll kll req sampling theta tuple common_test)
48
+
49
+ set_target_properties(integration_test PROPERTIES
50
+ CXX_STANDARD 11
51
+ CXX_STANDARD_REQUIRED YES
52
+ )
53
+
54
+ add_test(
55
+ NAME integration_test
56
+ COMMAND integration_test
57
+ )
58
+
59
+ target_sources(integration_test
60
+ PRIVATE
61
+ integration_test.cpp
62
+ )
@@ -0,0 +1,77 @@
1
+ /*
2
+ * Licensed to the Apache Software Foundation (ASF) under one
3
+ * or more contributor license agreements. See the NOTICE file
4
+ * distributed with this work for additional information
5
+ * regarding copyright ownership. The ASF licenses this file
6
+ * to you under the Apache License, Version 2.0 (the
7
+ * "License"); you may not use this file except in compliance
8
+ * with the License. You may obtain a copy of the License at
9
+ *
10
+ * http://www.apache.org/licenses/LICENSE-2.0
11
+ *
12
+ * Unless required by applicable law or agreed to in writing,
13
+ * software distributed under the License is distributed on an
14
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15
+ * KIND, either express or implied. See the License for the
16
+ * specific language governing permissions and limitations
17
+ * under the License.
18
+ */
19
+
20
+ #include <catch.hpp>
21
+
22
+ #include "cpc_sketch.hpp"
23
+ #include "cpc_union.hpp"
24
+ #include "frequent_items_sketch.hpp"
25
+ #include "hll.hpp"
26
+ #include "kll_sketch.hpp"
27
+ #include "req_sketch.hpp"
28
+ #include "var_opt_sketch.hpp"
29
+ #include "var_opt_union.hpp"
30
+ #include "theta_sketch.hpp"
31
+ #include "theta_union.hpp"
32
+ #include "theta_intersection.hpp"
33
+ #include "theta_a_not_b.hpp"
34
+ #include "tuple_sketch.hpp"
35
+ #include "tuple_union.hpp"
36
+ #include "tuple_intersection.hpp"
37
+ #include "tuple_a_not_b.hpp"
38
+
39
+ namespace datasketches {
40
+
41
+ template<typename Summary>
42
+ struct subtracting_intersection_policy {
43
+ void operator()(Summary& summary, const Summary& other) const {
44
+ summary -= other;
45
+ }
46
+ };
47
+
48
+ using tuple_intersection_float = tuple_intersection<float, subtracting_intersection_policy<float>>;
49
+
50
+ TEST_CASE("integration: declare all sketches", "[integration]") {
51
+ cpc_sketch cpc(12);
52
+ cpc_union cpc_u(12);
53
+
54
+ frequent_items_sketch<std::string> fi(100);
55
+
56
+ hll_sketch hll(13);
57
+ hll_union hll_u(13);
58
+
59
+ kll_sketch<double> kll(200);
60
+
61
+ req_sketch<double> req(12);
62
+
63
+ var_opt_sketch<std::string> vo(100);
64
+ var_opt_union<std::string> vo_u(100);
65
+
66
+ update_theta_sketch theta = update_theta_sketch::builder().build();
67
+ theta_union theta_u = theta_union::builder().build();
68
+ theta_intersection theta_i;
69
+ theta_a_not_b theta_anb;
70
+
71
+ auto tuple = update_tuple_sketch<float>::builder().build();
72
+ auto tuple_u = tuple_union<float>::builder().build();
73
+ tuple_intersection_float tuple_i;
74
+ tuple_a_not_b<float> tuple_anb;
75
+ }
76
+
77
+ } /* namespace datasketches */