datasketches 0.2.3 → 0.2.4

Sign up to get free protection for your applications and to get access to all the features.
Files changed (63) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +4 -0
  3. data/README.md +7 -7
  4. data/ext/datasketches/theta_wrapper.cpp +20 -4
  5. data/lib/datasketches/version.rb +1 -1
  6. data/vendor/datasketches-cpp/CMakeLists.txt +22 -3
  7. data/vendor/datasketches-cpp/MANIFEST.in +3 -0
  8. data/vendor/datasketches-cpp/README.md +76 -9
  9. data/vendor/datasketches-cpp/cmake/DataSketchesConfig.cmake.in +10 -0
  10. data/vendor/datasketches-cpp/common/CMakeLists.txt +14 -13
  11. data/vendor/datasketches-cpp/cpc/CMakeLists.txt +15 -35
  12. data/vendor/datasketches-cpp/cpc/include/cpc_sketch_impl.hpp +5 -3
  13. data/vendor/datasketches-cpp/cpc/include/cpc_union_impl.hpp +8 -6
  14. data/vendor/datasketches-cpp/cpc/test/cpc_sketch_allocation_test.cpp +17 -0
  15. data/vendor/datasketches-cpp/fi/CMakeLists.txt +5 -15
  16. data/vendor/datasketches-cpp/fi/include/frequent_items_sketch_impl.hpp +4 -2
  17. data/vendor/datasketches-cpp/hll/CMakeLists.txt +33 -56
  18. data/vendor/datasketches-cpp/hll/include/HllSketch-internal.hpp +6 -4
  19. data/vendor/datasketches-cpp/kll/CMakeLists.txt +9 -19
  20. data/vendor/datasketches-cpp/kll/include/kll_sketch_impl.hpp +4 -2
  21. data/vendor/datasketches-cpp/req/CMakeLists.txt +8 -21
  22. data/vendor/datasketches-cpp/req/include/req_sketch_impl.hpp +4 -2
  23. data/vendor/datasketches-cpp/sampling/CMakeLists.txt +5 -9
  24. data/vendor/datasketches-cpp/sampling/include/var_opt_sketch_impl.hpp +13 -7
  25. data/vendor/datasketches-cpp/sampling/include/var_opt_union_impl.hpp +8 -6
  26. data/vendor/datasketches-cpp/setup.py +1 -1
  27. data/vendor/datasketches-cpp/theta/CMakeLists.txt +26 -45
  28. data/vendor/datasketches-cpp/theta/include/compact_theta_sketch_parser_impl.hpp +89 -22
  29. data/vendor/datasketches-cpp/theta/include/theta_helpers.hpp +15 -0
  30. data/vendor/datasketches-cpp/theta/include/theta_intersection_base_impl.hpp +6 -6
  31. data/vendor/datasketches-cpp/theta/include/theta_set_difference_base_impl.hpp +2 -2
  32. data/vendor/datasketches-cpp/theta/include/theta_sketch.hpp +32 -15
  33. data/vendor/datasketches-cpp/theta/include/theta_sketch_impl.hpp +146 -51
  34. data/vendor/datasketches-cpp/theta/include/theta_union.hpp +6 -1
  35. data/vendor/datasketches-cpp/theta/include/theta_union_base.hpp +3 -1
  36. data/vendor/datasketches-cpp/theta/include/theta_union_base_impl.hpp +8 -2
  37. data/vendor/datasketches-cpp/theta/include/theta_union_impl.hpp +8 -5
  38. data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base.hpp +5 -4
  39. data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base_impl.hpp +33 -9
  40. data/vendor/datasketches-cpp/theta/test/CMakeLists.txt +1 -0
  41. data/vendor/datasketches-cpp/theta/test/theta_compact_empty_from_java_v1.sk +0 -0
  42. data/vendor/datasketches-cpp/theta/test/theta_compact_empty_from_java_v2.sk +0 -0
  43. data/vendor/datasketches-cpp/theta/test/theta_compact_estimation_from_java_v1.sk +0 -0
  44. data/vendor/datasketches-cpp/theta/test/theta_compact_estimation_from_java_v2.sk +0 -0
  45. data/vendor/datasketches-cpp/theta/test/theta_compact_exact_from_java.sk +0 -0
  46. data/vendor/datasketches-cpp/theta/test/theta_setop_test.cpp +445 -0
  47. data/vendor/datasketches-cpp/theta/test/theta_sketch_test.cpp +400 -0
  48. data/vendor/datasketches-cpp/theta/test/theta_union_test.cpp +23 -11
  49. data/vendor/datasketches-cpp/tuple/CMakeLists.txt +18 -33
  50. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_sketch.hpp +1 -1
  51. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_sketch_impl.hpp +3 -3
  52. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_union.hpp +1 -1
  53. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_union_impl.hpp +3 -3
  54. data/vendor/datasketches-cpp/tuple/include/tuple_sketch.hpp +13 -9
  55. data/vendor/datasketches-cpp/tuple/include/tuple_sketch_impl.hpp +33 -14
  56. data/vendor/datasketches-cpp/tuple/include/tuple_union.hpp +6 -1
  57. data/vendor/datasketches-cpp/tuple/include/tuple_union_impl.hpp +8 -3
  58. data/vendor/datasketches-cpp/tuple/test/array_of_doubles_sketch_test.cpp +16 -0
  59. data/vendor/datasketches-cpp/tuple/test/tuple_sketch_test.cpp +46 -8
  60. data/vendor/datasketches-cpp/tuple/test/tuple_union_test.cpp +7 -0
  61. metadata +11 -6
  62. data/vendor/datasketches-cpp/theta/test/theta_update_empty_from_java.sk +0 -0
  63. data/vendor/datasketches-cpp/theta/test/theta_update_estimation_from_java.sk +0 -0
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: ea00e444de6dc1bebc2b8cf878a250f08717d55eaa55f63f6bec28f4be2af00d
4
- data.tar.gz: 161b9089e3b8d0dbd99cfb6cc0af463934c42ba85f4788a08306369966f28571
3
+ metadata.gz: 897dbc30f97ce17f0415630b6347a0092dac05196b0ef61e80939410d65cdf17
4
+ data.tar.gz: 61302f9cadde8a8badc97b455eb5c32d913c3b1fea8ed571e2da93a29e65afa9
5
5
  SHA512:
6
- metadata.gz: '09eede1e6e4c0fe57c0116c4e8873670192fea845783687ca34890bd9358af9dd19a535774ab7dd667055cf6acd0d3913f044dcf2274e0ec092b33307250a74a'
7
- data.tar.gz: b8bcaeb7af0d27e836f21941663229a2750922914c4f31f4ffbd6e3c3876320f9ce92916eb9730e02227b87e8f244bc08e5bc38541bf4ef4e3485203fff01942
6
+ metadata.gz: 4d541ba7f96a86f3f8de44f069f6e39d51ba6f28fa5d8c8d1d99a8434a95c5fe1a26470e6b062f348808fe5c0a444134d0dc96385437b4cb946c4a92044a2a5c
7
+ data.tar.gz: bc1bdacb7cbe69f9bb1382fd2ac7019bec04baf444dc963d63a594e989fd201d9eb9aadd0e463ac4efef8f7ba53915a594d8fb00f74ae295674b9024269a0406
data/CHANGELOG.md CHANGED
@@ -1,3 +1,7 @@
1
+ ## 0.2.4 (2021-12-28)
2
+
3
+ - Updated DataSketches to 3.3.0
4
+
1
5
  ## 0.2.3 (2021-09-29)
2
6
 
3
7
  - Updated DataSketches to 3.2.0
data/README.md CHANGED
@@ -1,8 +1,8 @@
1
- # DataSketches
1
+ # DataSketches Ruby
2
2
 
3
3
  [DataSketches](https://datasketches.apache.org/) - sketch data structures - for Ruby
4
4
 
5
- [![Build Status](https://github.com/ankane/datasketches/workflows/build/badge.svg?branch=master)](https://github.com/ankane/datasketches/actions)
5
+ [![Build Status](https://github.com/ankane/datasketches-ruby/workflows/build/badge.svg?branch=master)](https://github.com/ankane/datasketches-ruby/actions)
6
6
 
7
7
  ## Installation
8
8
 
@@ -292,22 +292,22 @@ This library is modeled after the DataSketches [Python API](https://github.com/a
292
292
 
293
293
  ## History
294
294
 
295
- View the [changelog](https://github.com/ankane/datasketches/blob/master/CHANGELOG.md)
295
+ View the [changelog](https://github.com/ankane/datasketches-ruby/blob/master/CHANGELOG.md)
296
296
 
297
297
  ## Contributing
298
298
 
299
299
  Everyone is encouraged to help improve this project. Here are a few ways you can help:
300
300
 
301
- - [Report bugs](https://github.com/ankane/datasketches/issues)
302
- - Fix bugs and [submit pull requests](https://github.com/ankane/datasketches/pulls)
301
+ - [Report bugs](https://github.com/ankane/datasketches-ruby/issues)
302
+ - Fix bugs and [submit pull requests](https://github.com/ankane/datasketches-ruby/pulls)
303
303
  - Write, clarify, or fix documentation
304
304
  - Suggest or add new features
305
305
 
306
306
  To get started with development:
307
307
 
308
308
  ```sh
309
- git clone --recursive https://github.com/ankane/datasketches.git
310
- cd datasketches
309
+ git clone --recursive https://github.com/ankane/datasketches-ruby.git
310
+ cd datasketches-ruby
311
311
  bundle install
312
312
  bundle exec rake compile
313
313
  bundle exec rake test
@@ -20,10 +20,26 @@ using Rice::Arg;
20
20
 
21
21
  void init_theta(Rice::Module& m) {
22
22
  Rice::define_class_under<theta_sketch>(m, "ThetaSketch")
23
- .define_method("empty?", &theta_sketch::is_empty)
24
- .define_method("estimate", &theta_sketch::get_estimate)
25
- .define_method("lower_bound", &theta_sketch::get_lower_bound)
26
- .define_method("upper_bound", &theta_sketch::get_upper_bound);
23
+ .define_method(
24
+ "empty?",
25
+ [](theta_sketch& self) {
26
+ return self.is_empty();
27
+ })
28
+ .define_method(
29
+ "estimate",
30
+ [](theta_sketch& self) {
31
+ return self.get_estimate();
32
+ })
33
+ .define_method(
34
+ "lower_bound",
35
+ [](theta_sketch& self, uint8_t num_std_devs) {
36
+ return self.get_lower_bound(num_std_devs);
37
+ })
38
+ .define_method(
39
+ "upper_bound",
40
+ [](theta_sketch& self, uint8_t num_std_devs) {
41
+ return self.get_upper_bound(num_std_devs);
42
+ });
27
43
 
28
44
  Rice::define_class_under<compact_theta_sketch, theta_sketch>(m, "CompactThetaSketch")
29
45
  .define_singleton_function(
@@ -1,3 +1,3 @@
1
1
  module DataSketches
2
- VERSION = "0.2.3"
2
+ VERSION = "0.2.4"
3
3
  end
@@ -17,7 +17,7 @@
17
17
 
18
18
  cmake_minimum_required(VERSION 3.12.0)
19
19
  project(DataSketches
20
- VERSION 0.12.0
20
+ VERSION 3.2.0
21
21
  LANGUAGES CXX)
22
22
 
23
23
  include(GNUInstallDirs)
@@ -126,11 +126,30 @@ endif()
126
126
 
127
127
  # # Installation
128
128
  install(TARGETS datasketches
129
- EXPORT ${PROJCT_NAME}
129
+ EXPORT ${PROJECT_NAME}
130
130
  PUBLIC_HEADER DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/DataSketches
131
131
  INCLUDES DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/DataSketches
132
132
  )
133
133
 
134
+ # Packaging
135
+ include(CMakePackageConfigHelpers)
136
+ write_basic_package_version_file(
137
+ "${CMAKE_CURRENT_BINARY_DIR}/DataSketchesConfigVersion.cmake"
138
+ VERSION ${PROJECT_VERSION}
139
+ COMPATIBILITY SameMajorVersion
140
+ )
141
+ configure_package_config_file(
142
+ cmake/DataSketchesConfig.cmake.in
143
+ "${CMAKE_CURRENT_BINARY_DIR}/DataSketchesConfig.cmake"
144
+ INSTALL_DESTINATION lib/DataSketches/cmake
145
+ PATH_VARS CMAKE_INSTALL_INCLUDEDIR
146
+ )
147
+ install(EXPORT ${PROJECT_NAME} DESTINATION lib/DataSketches/cmake)
148
+ install(FILES "${CMAKE_CURRENT_BINARY_DIR}/DataSketchesConfigVersion.cmake"
149
+ "${CMAKE_CURRENT_BINARY_DIR}/DataSketchesConfig.cmake"
150
+ DESTINATION lib/DataSketches/cmake)
151
+
152
+
134
153
  #set(CPACK_PROJECT_NAME ${PROJECT_NAME})
135
154
  #set(CPACK_PROJECT_VERSION ${PROJECT_VERSION})
136
- #include(CPack)
155
+ include(CPack)
@@ -9,11 +9,14 @@ global-exclude .git*
9
9
 
10
10
  recursive-include python/pybind11 *
11
11
 
12
+ graft cmake
12
13
  graft common
13
14
  graft cpc
14
15
  graft fi
15
16
  graft hll
16
17
  graft kll
18
+ graft req
17
19
  graft theta
20
+ graft tuple
18
21
  graft sampling
19
22
  graft python
@@ -25,18 +25,85 @@ Installing the latest cmake on OSX: brew install cmake
25
25
  Building and running unit tests using cmake for OSX and Linux:
26
26
 
27
27
  ```
28
- $ cd build
29
- $ cmake ..
30
- $ make
31
- $ make test
28
+ $ cmake -S . -B build/Release -DCMAKE_BUILD_TYPE=Release
29
+ $ cmake --build build/Release -t all test
32
30
  ```
33
31
 
34
32
  Building and running unit tests using cmake for Windows from the command line:
35
33
 
36
34
  ```
37
- $ cd build
38
- $ cmake ..
39
- $ cd ..
40
- $ cmake --build build --config Release
41
- $ cmake --build build --config Release --target RUN_TESTS
35
+ $ cd build
36
+ $ cmake ..
37
+ $ cd ..
38
+ $ cmake --build build --config Release
39
+ $ cmake --build build --config Release --target RUN_TESTS
42
40
  ```
41
+
42
+ To install a local distribution (OSX and Linux), use the following command. The
43
+ CMAKE_INSTALL_PREFIX variable controls the destination. If not specified, it
44
+ defaults to installing in /usr (/usr/include, /usr/lib, etc). In the command below,
45
+ the installation will be in /tmp/install/DataSketches (/tmp/install/DataSketches/include,
46
+ /tmp/install/DataSketches/lib, etc)
47
+
48
+ ```
49
+ $ cmake -S . -B build/Release -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=/tmp/install/DataSketches
50
+ $ cmake --build build/Release -t install
51
+ ```
52
+
53
+ To generate an installable package using cmake's built in cpack packaging tool,
54
+ use the following command. The type of packaging is controlled by the CPACK_GENERATOR
55
+ variable (semi-colon separated list). Cmake usually supports packaging types such as RPM,
56
+ DEB, STGZ, TGZ, TZ, ZIP, etc.
57
+
58
+ ```
59
+ $ cmake3 -S . -B build/Release -DCMAKE_BUILD_TYPE=Release -DCPACK_GENERATOR="RPM;STGZ;TGZ"
60
+ $ cmake3 --build build/Release -t package
61
+ ```
62
+
63
+ The DataSketches project can be included in other projects' CMakeLists.txt files in one of two ways.
64
+ If DataSketches has been installed on the host (using an RPM, DEB, "make install" into /usr/local, or some
65
+ way, then CMake's `find_package` command can be used like this:
66
+
67
+ ```
68
+ find_package(DataSketches 3.2 REQUIRED)
69
+ target_link_library(my_dependent_target PUBLIC ${DATASKETCHES_LIB})
70
+ ```
71
+
72
+ When used with find_package, DataSketches exports several variables, including
73
+
74
+ - `DATASKETCHES_VERSION`: The version number of the datasketches package that was imported.
75
+ - `DATASKETCHES_INCLUDE_DIR`: The directory that should be added to access DataSketches include files.
76
+ Because cmake automatically includes the interface directories for included target libraries when
77
+ using `target_link_library`, under normal circumstances there will be no need to include this directly.
78
+ - `DATASKETCHES_LIB`: The name of the DataSketches target to include as a dependency. Projects pulling
79
+ in DataSketches should reference this with `target_link_library` in order to set up all the correct dependencies
80
+ and include paths.
81
+
82
+ If you don't have DataSketches installed locally, dependent projects can pull it directly
83
+ from GitHub using CMake's `ExternalProject` module. The code would look something like this:
84
+
85
+ ```
86
+ cmake_policy(SET CMP0097 NEW)
87
+ include(ExternalProject)
88
+ ExternalProject_Add(datasketches
89
+ GIT_REPOSITORY https://github.com/apache/datasketches-cpp.git
90
+ GIT_TAG 3.2.0
91
+ GIT_SHALLOW true
92
+ GIT_SUBMODULES ""
93
+ INSTALL_DIR /tmp/datasketches-prefix
94
+ CMAKE_ARGS -DBUILD_TESTS=OFF -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} -DCMAKE_INSTALL_PREFIX=/tmp/datasketches-prefix
95
+
96
+ # Override the install command to add DESTDIR
97
+ # This is necessary to work around an oddity in the RPM (but not other) package
98
+ # generation, as CMake otherwise picks up the Datasketch files when building
99
+ # an RPM for a dependent package. (RPM scans the directory for files in addition to installing
100
+ # those files referenced in an "install" rule in the cmake file)
101
+ INSTALL_COMMAND env DESTDIR= ${CMAKE_COMMAND} --build . --target install
102
+ )
103
+ ExternalProject_Get_property(datasketches INSTALL_DIR)
104
+ set(datasketches_INSTALL_DIR ${INSTALL_DIR})
105
+ message("Source dir of datasketches = ${datasketches_INSTALL_DIR}")
106
+ target_include_directories(my_dependent_target
107
+ PRIVATE ${datasketches_INSTALL_DIR}/include/DataSketches)
108
+ add_dependencies(my_dependent_target datasketches)
109
+ ```
@@ -0,0 +1,10 @@
1
+ set(DATASKETCHES_VERSION "@PROJECT_VERSION@")
2
+
3
+ @PACKAGE_INIT@
4
+
5
+ include("${CMAKE_CURRENT_LIST_DIR}/DataSketches.cmake")
6
+
7
+ set_and_check(DATASKETCHES_INCLUDE_DIR "@PACKAGE_CMAKE_INSTALL_INCLUDEDIR@/DataSketches")
8
+ set(DATASKETCHES_LIB "datasketches")
9
+
10
+ check_required_components("@PROJECT_NAME@")
@@ -29,17 +29,18 @@ target_include_directories(common
29
29
 
30
30
  target_compile_features(common INTERFACE cxx_std_11)
31
31
 
32
- target_sources(common
33
- INTERFACE
34
- ${CMAKE_CURRENT_SOURCE_DIR}/include/common_defs.hpp
35
- ${CMAKE_CURRENT_SOURCE_DIR}/include/memory_operations.hpp
36
- ${CMAKE_CURRENT_SOURCE_DIR}/include/MurmurHash3.h
37
- ${CMAKE_CURRENT_SOURCE_DIR}/include/serde.hpp
38
- ${CMAKE_CURRENT_SOURCE_DIR}/include/count_zeros.hpp
39
- ${CMAKE_CURRENT_SOURCE_DIR}/include/inv_pow2_table.hpp
40
- ${CMAKE_CURRENT_SOURCE_DIR}/include/binomial_bounds.hpp
41
- ${CMAKE_CURRENT_SOURCE_DIR}/include/conditional_back_inserter.hpp
42
- ${CMAKE_CURRENT_SOURCE_DIR}/include/conditional_forward.hpp
43
- ${CMAKE_CURRENT_SOURCE_DIR}/include/ceiling_power_of_2.hpp
44
- )
32
+ install(TARGETS common EXPORT ${PROJECT_NAME})
45
33
 
34
+ install(FILES
35
+ include/common_defs.hpp
36
+ include/memory_operations.hpp
37
+ include/MurmurHash3.h
38
+ include/serde.hpp
39
+ include/count_zeros.hpp
40
+ include/inv_pow2_table.hpp
41
+ include/binomial_bounds.hpp
42
+ include/conditional_back_inserter.hpp
43
+ include/conditional_forward.hpp
44
+ include/ceiling_power_of_2.hpp
45
+ include/bounds_binomial_proportions.hpp
46
+ DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/DataSketches")
@@ -32,43 +32,23 @@ target_include_directories(cpc
32
32
  target_link_libraries(cpc INTERFACE common)
33
33
  target_compile_features(cpc INTERFACE cxx_std_11)
34
34
 
35
- set(cpc_HEADERS "")
36
- list(APPEND cpc_HEADERS "include/compression_data.hpp")
37
- list(APPEND cpc_HEADERS "include/cpc_common.hpp")
38
- list(APPEND cpc_HEADERS "include/cpc_compressor.hpp")
39
- list(APPEND cpc_HEADERS "include/cpc_compressor_impl.hpp")
40
- list(APPEND cpc_HEADERS "include/cpc_confidence.hpp")
41
- list(APPEND cpc_HEADERS "include/cpc_sketch.hpp")
42
- list(APPEND cpc_HEADERS "include/cpc_sketch_impl.hpp")
43
- list(APPEND cpc_HEADERS "include/cpc_union.hpp")
44
- list(APPEND cpc_HEADERS "include/cpc_union_impl.hpp")
45
- list(APPEND cpc_HEADERS "include/cpc_util.hpp")
46
- list(APPEND cpc_HEADERS "include/icon_estimator.hpp")
47
- list(APPEND cpc_HEADERS "include/kxp_byte_lookup.hpp")
48
- list(APPEND cpc_HEADERS "include/u32_table.hpp")
49
- list(APPEND cpc_HEADERS "include/u32_table_impl.hpp")
50
-
51
35
  install(TARGETS cpc
52
36
  EXPORT ${PROJECT_NAME}
53
37
  )
54
38
 
55
- install(FILES ${cpc_HEADERS}
39
+ install(FILES
40
+ include/compression_data.hpp
41
+ include/cpc_common.hpp
42
+ include/cpc_compressor.hpp
43
+ include/cpc_compressor_impl.hpp
44
+ include/cpc_confidence.hpp
45
+ include/cpc_sketch.hpp
46
+ include/cpc_sketch_impl.hpp
47
+ include/cpc_union.hpp
48
+ include/cpc_union_impl.hpp
49
+ include/cpc_util.hpp
50
+ include/icon_estimator.hpp
51
+ include/kxp_byte_lookup.hpp
52
+ include/u32_table.hpp
53
+ include/u32_table_impl.hpp
56
54
  DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/DataSketches")
57
-
58
- target_sources(cpc
59
- INTERFACE
60
- ${CMAKE_CURRENT_SOURCE_DIR}/include/compression_data.hpp
61
- ${CMAKE_CURRENT_SOURCE_DIR}/include/cpc_common.hpp
62
- ${CMAKE_CURRENT_SOURCE_DIR}/include/cpc_compressor.hpp
63
- ${CMAKE_CURRENT_SOURCE_DIR}/include/cpc_compressor_impl.hpp
64
- ${CMAKE_CURRENT_SOURCE_DIR}/include/cpc_confidence.hpp
65
- ${CMAKE_CURRENT_SOURCE_DIR}/include/cpc_sketch.hpp
66
- ${CMAKE_CURRENT_SOURCE_DIR}/include/cpc_sketch_impl.hpp
67
- ${CMAKE_CURRENT_SOURCE_DIR}/include/cpc_union.hpp
68
- ${CMAKE_CURRENT_SOURCE_DIR}/include/cpc_union_impl.hpp
69
- ${CMAKE_CURRENT_SOURCE_DIR}/include/cpc_util.hpp
70
- ${CMAKE_CURRENT_SOURCE_DIR}/include/icon_estimator.hpp
71
- ${CMAKE_CURRENT_SOURCE_DIR}/include/kxp_byte_lookup.hpp
72
- ${CMAKE_CURRENT_SOURCE_DIR}/include/u32_table.hpp
73
- ${CMAKE_CURRENT_SOURCE_DIR}/include/u32_table_impl.hpp
74
- )
@@ -381,7 +381,9 @@ void cpc_sketch_alloc<A>::refresh_kxp(const uint64_t* bit_matrix) {
381
381
 
382
382
  template<typename A>
383
383
  string<A> cpc_sketch_alloc<A>::to_string() const {
384
- std::basic_ostringstream<char, std::char_traits<char>, AllocChar<A>> os;
384
+ // Using a temporary stream for implementation here does not comply with AllocatorAwareContainer requirements.
385
+ // The stream does not support passing an allocator instance, and alternatives are complicated.
386
+ std::ostringstream os;
385
387
  os << "### CPC sketch summary:" << std::endl;
386
388
  os << " lg_k : " << std::to_string(lg_k) << std::endl;
387
389
  os << " seed hash : " << std::hex << compute_seed_hash(seed) << std::dec << std::endl;
@@ -392,14 +394,14 @@ string<A> cpc_sketch_alloc<A>::to_string() const {
392
394
  os << " HIP estimate : " << hip_est_accum << std::endl;
393
395
  os << " kxp : " << kxp << std::endl;
394
396
  }
395
- os << " intresting col : " << std::to_string(first_interesting_column) << std::endl;
397
+ os << " interesting col: " << std::to_string(first_interesting_column) << std::endl;
396
398
  os << " table entries : " << surprising_value_table.get_num_items() << std::endl;
397
399
  os << " window : " << (sliding_window.size() == 0 ? "not " : "") << "allocated" << std::endl;
398
400
  if (sliding_window.size() > 0) {
399
401
  os << " window offset : " << std::to_string(window_offset) << std::endl;
400
402
  }
401
403
  os << "### End sketch summary" << std::endl;
402
- return os.str();
404
+ return string<A>(os.str().c_str(), sliding_window.get_allocator());
403
405
  }
404
406
 
405
407
  template<typename A>
@@ -34,7 +34,7 @@ bit_matrix(allocator)
34
34
  if (lg_k < CPC_MIN_LG_K || lg_k > CPC_MAX_LG_K) {
35
35
  throw std::invalid_argument("lg_k must be >= " + std::to_string(CPC_MIN_LG_K) + " and <= " + std::to_string(CPC_MAX_LG_K) + ": " + std::to_string(lg_k));
36
36
  }
37
- accumulator = new (AllocCpc().allocate(1)) cpc_sketch_alloc<A>(lg_k, seed, allocator);
37
+ accumulator = new (AllocCpc(allocator).allocate(1)) cpc_sketch_alloc<A>(lg_k, seed, allocator);
38
38
  }
39
39
 
40
40
  template<typename A>
@@ -45,7 +45,7 @@ accumulator(other.accumulator),
45
45
  bit_matrix(other.bit_matrix)
46
46
  {
47
47
  if (accumulator != nullptr) {
48
- accumulator = new (AllocCpc().allocate(1)) cpc_sketch_alloc<A>(*other.accumulator);
48
+ accumulator = new (AllocCpc(accumulator->get_allocator()).allocate(1)) cpc_sketch_alloc<A>(*other.accumulator);
49
49
  }
50
50
  }
51
51
 
@@ -62,8 +62,9 @@ bit_matrix(std::move(other.bit_matrix))
62
62
  template<typename A>
63
63
  cpc_union_alloc<A>::~cpc_union_alloc() {
64
64
  if (accumulator != nullptr) {
65
+ AllocCpc allocator(accumulator->get_allocator());
65
66
  accumulator->~cpc_sketch_alloc<A>();
66
- AllocCpc().deallocate(accumulator, 1);
67
+ allocator.deallocate(accumulator, 1);
67
68
  }
68
69
  }
69
70
 
@@ -181,7 +182,7 @@ template<typename A>
181
182
  cpc_sketch_alloc<A> cpc_union_alloc<A>::get_result_from_accumulator() const {
182
183
  if (lg_k != accumulator->get_lg_k()) throw std::logic_error("lg_k != accumulator->lg_k");
183
184
  if (accumulator->get_num_coupons() == 0) {
184
- return cpc_sketch_alloc<A>(lg_k, seed);
185
+ return cpc_sketch_alloc<A>(lg_k, seed, accumulator->get_allocator());
185
186
  }
186
187
  if (accumulator->determine_flavor() != cpc_sketch_alloc<A>::flavor::SPARSE) throw std::logic_error("wrong flavor");
187
188
  cpc_sketch_alloc<A> copy(*accumulator);
@@ -242,8 +243,9 @@ cpc_sketch_alloc<A> cpc_union_alloc<A>::get_result_from_bit_matrix() const {
242
243
  template<typename A>
243
244
  void cpc_union_alloc<A>::switch_to_bit_matrix() {
244
245
  bit_matrix = accumulator->build_bit_matrix();
246
+ AllocCpc allocator(accumulator->get_allocator());
245
247
  accumulator->~cpc_sketch_alloc<A>();
246
- AllocCpc().deallocate(accumulator, 1);
248
+ allocator.deallocate(accumulator, 1);
247
249
  accumulator = nullptr;
248
250
  }
249
251
 
@@ -324,7 +326,7 @@ void cpc_union_alloc<A>::reduce_k(uint8_t new_lg_k) {
324
326
  if (bit_matrix.size() > 0) throw std::logic_error("bit_matrix is not expected");
325
327
  if (!accumulator->is_empty()) {
326
328
  cpc_sketch_alloc<A> old_accumulator(*accumulator);
327
- *accumulator = cpc_sketch_alloc<A>(new_lg_k, seed);
329
+ *accumulator = cpc_sketch_alloc<A>(new_lg_k, seed, old_accumulator.get_allocator());
328
330
  walk_table_updating_sketch(old_accumulator.surprising_value_table);
329
331
  }
330
332
  lg_k = new_lg_k;
@@ -25,6 +25,7 @@
25
25
  #include <catch.hpp>
26
26
 
27
27
  #include "cpc_sketch.hpp"
28
+ #include "cpc_union.hpp"
28
29
  #include "test_allocator.hpp"
29
30
 
30
31
  namespace datasketches {
@@ -234,4 +235,20 @@ TEST_CASE("cpc sketch allocation: serialize deserialize sliding, bytes", "[cpc_s
234
235
  REQUIRE(test_allocator_net_allocations == 0);
235
236
  }
236
237
 
238
+ using cpc_union_test_alloc = cpc_union_alloc<test_allocator<uint8_t>>;
239
+
240
+ TEST_CASE("cpc sketch allocation: union") {
241
+ cpc_sketch_test_alloc s1(11, DEFAULT_SEED, 0);
242
+ s1.update(1);
243
+
244
+ cpc_sketch_test_alloc s2(11, DEFAULT_SEED, 0);
245
+ s2.update(2);
246
+
247
+ cpc_union_test_alloc u(11, DEFAULT_SEED, 0);
248
+ u.update(s1);
249
+ u.update(s2);
250
+ auto s3 = u.get_result();
251
+ REQUIRE_FALSE(s3.is_empty());
252
+ }
253
+
237
254
  } /* namespace datasketches */
@@ -32,23 +32,13 @@ target_include_directories(fi
32
32
  target_link_libraries(fi INTERFACE common)
33
33
  target_compile_features(fi INTERFACE cxx_std_11)
34
34
 
35
- set(fi_HEADERS "")
36
- list(APPEND fi_HEADERS "include/frequent_items_sketch.hpp")
37
- list(APPEND fi_HEADERS "include/frequent_items_sketch_impl.hpp")
38
- list(APPEND fi_HEADERS "include/reverse_purge_hash_map.hpp")
39
- list(APPEND fi_HEADERS "include/reverse_purge_hash_map_impl.hpp")
40
-
41
35
  install(TARGETS fi
42
36
  EXPORT ${PROJECT_NAME}
43
37
  )
44
38
 
45
- install(FILES ${fi_HEADERS}
39
+ install(FILES
40
+ include/frequent_items_sketch.hpp
41
+ include/frequent_items_sketch_impl.hpp
42
+ include/reverse_purge_hash_map.hpp
43
+ include/reverse_purge_hash_map_impl.hpp
46
44
  DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/DataSketches")
47
-
48
- target_sources(fi
49
- INTERFACE
50
- ${CMAKE_CURRENT_SOURCE_DIR}/include/frequent_items_sketch.hpp
51
- ${CMAKE_CURRENT_SOURCE_DIR}/include/frequent_items_sketch_impl.hpp
52
- ${CMAKE_CURRENT_SOURCE_DIR}/include/reverse_purge_hash_map.hpp
53
- ${CMAKE_CURRENT_SOURCE_DIR}/include/reverse_purge_hash_map_impl.hpp
54
- )
@@ -421,7 +421,9 @@ void frequent_items_sketch<T, W, H, E, S, A>::check_size(uint8_t lg_cur_size, ui
421
421
 
422
422
  template<typename T, typename W, typename H, typename E, typename S, typename A>
423
423
  string<A> frequent_items_sketch<T, W, H, E, S, A>::to_string(bool print_items) const {
424
- std::basic_ostringstream<char, std::char_traits<char>, AllocChar<A>> os;
424
+ // Using a temporary stream for implementation here does not comply with AllocatorAwareContainer requirements.
425
+ // The stream does not support passing an allocator instance, and alternatives are complicated.
426
+ std::ostringstream os;
425
427
  os << "### Frequent items sketch summary:" << std::endl;
426
428
  os << " lg cur map size : " << (int) map.get_lg_cur_size() << std::endl;
427
429
  os << " lg max map size : " << (int) map.get_lg_max_size() << std::endl;
@@ -444,7 +446,7 @@ string<A> frequent_items_sketch<T, W, H, E, S, A>::to_string(bool print_items) c
444
446
  }
445
447
  os << "### End items" << std::endl;
446
448
  }
447
- return os.str();
449
+ return string<A>(os.str().c_str(), map.get_allocator());
448
450
  }
449
451
 
450
452
  // version for integral signed type
@@ -32,64 +32,41 @@ target_include_directories(hll
32
32
  target_link_libraries(hll INTERFACE common)
33
33
  target_compile_features(hll INTERFACE cxx_std_11)
34
34
 
35
- # TODO: would be useful if this didn't need to be reproduced in target_sources(), too
36
- set(hll_HEADERS "")
37
- list(APPEND hll_HEADERS "include/hll.hpp;include/AuxHashMap.hpp;include/CompositeInterpolationXTable.hpp")
38
- list(APPEND hll_HEADERS "include/hll.private.hpp;include/HllSketchImplFactory.hpp")
39
- list(APPEND hll_HEADERS "include/CouponHashSet.hpp;include/CouponList.hpp")
40
- list(APPEND hll_HEADERS "include/CubicInterpolation.hpp;include/HarmonicNumbers.hpp;include/Hll4Array.hpp")
41
- list(APPEND hll_HEADERS "include/Hll6Array.hpp;include/Hll8Array.hpp;include/HllArray.hpp")
42
- list(APPEND hll_HEADERS "include/HllSketchImpl.hpp")
43
- list(APPEND hll_HEADERS "include/HllUtil.hpp;include/coupon_iterator.hpp")
44
- list(APPEND hll_HEADERS "include/RelativeErrorTables.hpp;include/AuxHashMap-internal.hpp")
45
- list(APPEND hll_HEADERS "include/CompositeInterpolationXTable-internal.hpp")
46
- list(APPEND hll_HEADERS "include/CouponHashSet-internal.hpp;include/CouponList-internal.hpp")
47
- list(APPEND hll_HEADERS "include/CubicInterpolation-internal.hpp;include/HarmonicNumbers-internal.hpp")
48
- list(APPEND hll_HEADERS "include/Hll4Array-internal.hpp;include/Hll6Array-internal.hpp")
49
- list(APPEND hll_HEADERS "include/Hll8Array-internal.hpp;include/HllArray-internal.hpp")
50
- list(APPEND hll_HEADERS "include/HllSketch-internal.hpp")
51
- list(APPEND hll_HEADERS "include/HllSketchImpl-internal.hpp;include/HllUnion-internal.hpp")
52
- list(APPEND hll_HEADERS "include/coupon_iterator-internal.hpp;include/RelativeErrorTables-internal.hpp")
53
-
54
35
  install(TARGETS hll
55
36
  EXPORT ${PROJECT_NAME}
56
37
  )
57
38
 
58
- install(FILES ${hll_HEADERS}
39
+ install(FILES
40
+ include/hll.hpp
41
+ include/AuxHashMap.hpp
42
+ include/CompositeInterpolationXTable.hpp
43
+ include/hll.private.hpp
44
+ include/HllSketchImplFactory.hpp
45
+ include/CouponHashSet.hpp
46
+ include/CouponList.hpp
47
+ include/CubicInterpolation.hpp
48
+ include/HarmonicNumbers.hpp
49
+ include/Hll4Array.hpp
50
+ include/Hll6Array.hpp
51
+ include/Hll8Array.hpp
52
+ include/HllArray.hpp
53
+ include/HllSketchImpl.hpp
54
+ include/HllUtil.hpp
55
+ include/coupon_iterator.hpp
56
+ include/RelativeErrorTables.hpp
57
+ include/AuxHashMap-internal.hpp
58
+ include/CompositeInterpolationXTable-internal.hpp
59
+ include/CouponHashSet-internal.hpp
60
+ include/CouponList-internal.hpp
61
+ include/CubicInterpolation-internal.hpp
62
+ include/HarmonicNumbers-internal.hpp
63
+ include/Hll4Array-internal.hpp
64
+ include/Hll6Array-internal.hpp
65
+ include/Hll8Array-internal.hpp
66
+ include/HllArray-internal.hpp
67
+ include/HllSketch-internal.hpp
68
+ include/HllSketchImpl-internal.hpp
69
+ include/HllUnion-internal.hpp
70
+ include/coupon_iterator-internal.hpp
71
+ include/RelativeErrorTables-internal.hpp
59
72
  DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/DataSketches")
60
-
61
- target_sources(hll
62
- INTERFACE
63
- ${CMAKE_CURRENT_SOURCE_DIR}/include/hll.hpp
64
- ${CMAKE_CURRENT_SOURCE_DIR}/include/hll.private.hpp
65
- ${CMAKE_CURRENT_SOURCE_DIR}/include/AuxHashMap.hpp
66
- ${CMAKE_CURRENT_SOURCE_DIR}/include/CompositeInterpolationXTable.hpp
67
- ${CMAKE_CURRENT_SOURCE_DIR}/include/CouponHashSet.hpp
68
- ${CMAKE_CURRENT_SOURCE_DIR}/include/CouponList.hpp
69
- ${CMAKE_CURRENT_SOURCE_DIR}/include/CubicInterpolation.hpp
70
- ${CMAKE_CURRENT_SOURCE_DIR}/include/HarmonicNumbers.hpp
71
- ${CMAKE_CURRENT_SOURCE_DIR}/include/Hll4Array.hpp
72
- ${CMAKE_CURRENT_SOURCE_DIR}/include/Hll6Array.hpp
73
- ${CMAKE_CURRENT_SOURCE_DIR}/include/Hll8Array.hpp
74
- ${CMAKE_CURRENT_SOURCE_DIR}/include/HllArray.hpp
75
- ${CMAKE_CURRENT_SOURCE_DIR}/include/HllSketchImpl.hpp
76
- ${CMAKE_CURRENT_SOURCE_DIR}/include/HllSketchImplFactory.hpp
77
- ${CMAKE_CURRENT_SOURCE_DIR}/include/HllUtil.hpp
78
- ${CMAKE_CURRENT_SOURCE_DIR}/include/RelativeErrorTables.hpp
79
- ${CMAKE_CURRENT_SOURCE_DIR}/include/coupon_iterator.hpp
80
- ${CMAKE_CURRENT_SOURCE_DIR}/include/AuxHashMap-internal.hpp
81
- ${CMAKE_CURRENT_SOURCE_DIR}/include/CompositeInterpolationXTable-internal.hpp
82
- ${CMAKE_CURRENT_SOURCE_DIR}/include/CouponHashSet-internal.hpp
83
- ${CMAKE_CURRENT_SOURCE_DIR}/include/CouponList-internal.hpp
84
- ${CMAKE_CURRENT_SOURCE_DIR}/include/CubicInterpolation-internal.hpp
85
- ${CMAKE_CURRENT_SOURCE_DIR}/include/HarmonicNumbers-internal.hpp
86
- ${CMAKE_CURRENT_SOURCE_DIR}/include/Hll4Array-internal.hpp
87
- ${CMAKE_CURRENT_SOURCE_DIR}/include/Hll6Array-internal.hpp
88
- ${CMAKE_CURRENT_SOURCE_DIR}/include/Hll8Array-internal.hpp
89
- ${CMAKE_CURRENT_SOURCE_DIR}/include/HllArray-internal.hpp
90
- ${CMAKE_CURRENT_SOURCE_DIR}/include/HllSketch-internal.hpp
91
- ${CMAKE_CURRENT_SOURCE_DIR}/include/HllSketchImpl-internal.hpp
92
- ${CMAKE_CURRENT_SOURCE_DIR}/include/HllUnion-internal.hpp
93
- ${CMAKE_CURRENT_SOURCE_DIR}/include/RelativeErrorTables-internal.hpp
94
- ${CMAKE_CURRENT_SOURCE_DIR}/include/coupon_iterator-internal.hpp
95
- )
@@ -246,10 +246,12 @@ string<A> hll_sketch_alloc<A>::to_string(const bool summary,
246
246
  const bool detail,
247
247
  const bool aux_detail,
248
248
  const bool all) const {
249
- std::basic_ostringstream<char, std::char_traits<char>, AllocChar<A>> os;
249
+ // Using a temporary stream for implementation here does not comply with AllocatorAwareContainer requirements.
250
+ // The stream does not support passing an allocator instance, and alternatives are complicated.
251
+ std::stringstream os;
250
252
  if (summary) {
251
253
  os << "### HLL sketch summary:" << std::endl
252
- << " Log Config K : " << get_lg_config_k() << std::endl
254
+ << " Log Config K : " << std::to_string(get_lg_config_k()) << std::endl
253
255
  << " Hll Target : " << type_as_string() << std::endl
254
256
  << " Current Mode : " << mode_as_string() << std::endl
255
257
  << " LB : " << get_lower_bound(1) << std::endl
@@ -258,7 +260,7 @@ string<A> hll_sketch_alloc<A>::to_string(const bool summary,
258
260
  << " OutOfOrder flag: " << (is_out_of_order_flag() ? "true" : "false") << std::endl;
259
261
  if (get_current_mode() == HLL) {
260
262
  HllArray<A>* hllArray = (HllArray<A>*) sketch_impl;
261
- os << " CurMin : " << hllArray->getCurMin() << std::endl
263
+ os << " CurMin : " << std::to_string(hllArray->getCurMin()) << std::endl
262
264
  << " NumAtCurMin : " << hllArray->getNumAtCurMin() << std::endl
263
265
  << " HipAccum : " << hllArray->getHipAccum() << std::endl
264
266
  << " KxQ0 : " << hllArray->getKxQ0() << std::endl
@@ -338,7 +340,7 @@ string<A> hll_sketch_alloc<A>::to_string(const bool summary,
338
340
  }
339
341
  }
340
342
 
341
- return os.str();
343
+ return string<A>(os.str().c_str(), sketch_impl->getAllocator());
342
344
  }
343
345
 
344
346
  template<typename A>