datasketches 0.2.3 → 0.2.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (63) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +4 -0
  3. data/README.md +7 -7
  4. data/ext/datasketches/theta_wrapper.cpp +20 -4
  5. data/lib/datasketches/version.rb +1 -1
  6. data/vendor/datasketches-cpp/CMakeLists.txt +22 -3
  7. data/vendor/datasketches-cpp/MANIFEST.in +3 -0
  8. data/vendor/datasketches-cpp/README.md +76 -9
  9. data/vendor/datasketches-cpp/cmake/DataSketchesConfig.cmake.in +10 -0
  10. data/vendor/datasketches-cpp/common/CMakeLists.txt +14 -13
  11. data/vendor/datasketches-cpp/cpc/CMakeLists.txt +15 -35
  12. data/vendor/datasketches-cpp/cpc/include/cpc_sketch_impl.hpp +5 -3
  13. data/vendor/datasketches-cpp/cpc/include/cpc_union_impl.hpp +8 -6
  14. data/vendor/datasketches-cpp/cpc/test/cpc_sketch_allocation_test.cpp +17 -0
  15. data/vendor/datasketches-cpp/fi/CMakeLists.txt +5 -15
  16. data/vendor/datasketches-cpp/fi/include/frequent_items_sketch_impl.hpp +4 -2
  17. data/vendor/datasketches-cpp/hll/CMakeLists.txt +33 -56
  18. data/vendor/datasketches-cpp/hll/include/HllSketch-internal.hpp +6 -4
  19. data/vendor/datasketches-cpp/kll/CMakeLists.txt +9 -19
  20. data/vendor/datasketches-cpp/kll/include/kll_sketch_impl.hpp +4 -2
  21. data/vendor/datasketches-cpp/req/CMakeLists.txt +8 -21
  22. data/vendor/datasketches-cpp/req/include/req_sketch_impl.hpp +4 -2
  23. data/vendor/datasketches-cpp/sampling/CMakeLists.txt +5 -9
  24. data/vendor/datasketches-cpp/sampling/include/var_opt_sketch_impl.hpp +13 -7
  25. data/vendor/datasketches-cpp/sampling/include/var_opt_union_impl.hpp +8 -6
  26. data/vendor/datasketches-cpp/setup.py +1 -1
  27. data/vendor/datasketches-cpp/theta/CMakeLists.txt +26 -45
  28. data/vendor/datasketches-cpp/theta/include/compact_theta_sketch_parser_impl.hpp +89 -22
  29. data/vendor/datasketches-cpp/theta/include/theta_helpers.hpp +15 -0
  30. data/vendor/datasketches-cpp/theta/include/theta_intersection_base_impl.hpp +6 -6
  31. data/vendor/datasketches-cpp/theta/include/theta_set_difference_base_impl.hpp +2 -2
  32. data/vendor/datasketches-cpp/theta/include/theta_sketch.hpp +32 -15
  33. data/vendor/datasketches-cpp/theta/include/theta_sketch_impl.hpp +146 -51
  34. data/vendor/datasketches-cpp/theta/include/theta_union.hpp +6 -1
  35. data/vendor/datasketches-cpp/theta/include/theta_union_base.hpp +3 -1
  36. data/vendor/datasketches-cpp/theta/include/theta_union_base_impl.hpp +8 -2
  37. data/vendor/datasketches-cpp/theta/include/theta_union_impl.hpp +8 -5
  38. data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base.hpp +5 -4
  39. data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base_impl.hpp +33 -9
  40. data/vendor/datasketches-cpp/theta/test/CMakeLists.txt +1 -0
  41. data/vendor/datasketches-cpp/theta/test/theta_compact_empty_from_java_v1.sk +0 -0
  42. data/vendor/datasketches-cpp/theta/test/theta_compact_empty_from_java_v2.sk +0 -0
  43. data/vendor/datasketches-cpp/theta/test/theta_compact_estimation_from_java_v1.sk +0 -0
  44. data/vendor/datasketches-cpp/theta/test/theta_compact_estimation_from_java_v2.sk +0 -0
  45. data/vendor/datasketches-cpp/theta/test/theta_compact_exact_from_java.sk +0 -0
  46. data/vendor/datasketches-cpp/theta/test/theta_setop_test.cpp +445 -0
  47. data/vendor/datasketches-cpp/theta/test/theta_sketch_test.cpp +400 -0
  48. data/vendor/datasketches-cpp/theta/test/theta_union_test.cpp +23 -11
  49. data/vendor/datasketches-cpp/tuple/CMakeLists.txt +18 -33
  50. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_sketch.hpp +1 -1
  51. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_sketch_impl.hpp +3 -3
  52. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_union.hpp +1 -1
  53. data/vendor/datasketches-cpp/tuple/include/array_of_doubles_union_impl.hpp +3 -3
  54. data/vendor/datasketches-cpp/tuple/include/tuple_sketch.hpp +13 -9
  55. data/vendor/datasketches-cpp/tuple/include/tuple_sketch_impl.hpp +33 -14
  56. data/vendor/datasketches-cpp/tuple/include/tuple_union.hpp +6 -1
  57. data/vendor/datasketches-cpp/tuple/include/tuple_union_impl.hpp +8 -3
  58. data/vendor/datasketches-cpp/tuple/test/array_of_doubles_sketch_test.cpp +16 -0
  59. data/vendor/datasketches-cpp/tuple/test/tuple_sketch_test.cpp +46 -8
  60. data/vendor/datasketches-cpp/tuple/test/tuple_union_test.cpp +7 -0
  61. metadata +11 -6
  62. data/vendor/datasketches-cpp/theta/test/theta_update_empty_from_java.sk +0 -0
  63. data/vendor/datasketches-cpp/theta/test/theta_update_estimation_from_java.sk +0 -0
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: ea00e444de6dc1bebc2b8cf878a250f08717d55eaa55f63f6bec28f4be2af00d
4
- data.tar.gz: 161b9089e3b8d0dbd99cfb6cc0af463934c42ba85f4788a08306369966f28571
3
+ metadata.gz: 897dbc30f97ce17f0415630b6347a0092dac05196b0ef61e80939410d65cdf17
4
+ data.tar.gz: 61302f9cadde8a8badc97b455eb5c32d913c3b1fea8ed571e2da93a29e65afa9
5
5
  SHA512:
6
- metadata.gz: '09eede1e6e4c0fe57c0116c4e8873670192fea845783687ca34890bd9358af9dd19a535774ab7dd667055cf6acd0d3913f044dcf2274e0ec092b33307250a74a'
7
- data.tar.gz: b8bcaeb7af0d27e836f21941663229a2750922914c4f31f4ffbd6e3c3876320f9ce92916eb9730e02227b87e8f244bc08e5bc38541bf4ef4e3485203fff01942
6
+ metadata.gz: 4d541ba7f96a86f3f8de44f069f6e39d51ba6f28fa5d8c8d1d99a8434a95c5fe1a26470e6b062f348808fe5c0a444134d0dc96385437b4cb946c4a92044a2a5c
7
+ data.tar.gz: bc1bdacb7cbe69f9bb1382fd2ac7019bec04baf444dc963d63a594e989fd201d9eb9aadd0e463ac4efef8f7ba53915a594d8fb00f74ae295674b9024269a0406
data/CHANGELOG.md CHANGED
@@ -1,3 +1,7 @@
1
+ ## 0.2.4 (2021-12-28)
2
+
3
+ - Updated DataSketches to 3.3.0
4
+
1
5
  ## 0.2.3 (2021-09-29)
2
6
 
3
7
  - Updated DataSketches to 3.2.0
data/README.md CHANGED
@@ -1,8 +1,8 @@
1
- # DataSketches
1
+ # DataSketches Ruby
2
2
 
3
3
  [DataSketches](https://datasketches.apache.org/) - sketch data structures - for Ruby
4
4
 
5
- [![Build Status](https://github.com/ankane/datasketches/workflows/build/badge.svg?branch=master)](https://github.com/ankane/datasketches/actions)
5
+ [![Build Status](https://github.com/ankane/datasketches-ruby/workflows/build/badge.svg?branch=master)](https://github.com/ankane/datasketches-ruby/actions)
6
6
 
7
7
  ## Installation
8
8
 
@@ -292,22 +292,22 @@ This library is modeled after the DataSketches [Python API](https://github.com/a
292
292
 
293
293
  ## History
294
294
 
295
- View the [changelog](https://github.com/ankane/datasketches/blob/master/CHANGELOG.md)
295
+ View the [changelog](https://github.com/ankane/datasketches-ruby/blob/master/CHANGELOG.md)
296
296
 
297
297
  ## Contributing
298
298
 
299
299
  Everyone is encouraged to help improve this project. Here are a few ways you can help:
300
300
 
301
- - [Report bugs](https://github.com/ankane/datasketches/issues)
302
- - Fix bugs and [submit pull requests](https://github.com/ankane/datasketches/pulls)
301
+ - [Report bugs](https://github.com/ankane/datasketches-ruby/issues)
302
+ - Fix bugs and [submit pull requests](https://github.com/ankane/datasketches-ruby/pulls)
303
303
  - Write, clarify, or fix documentation
304
304
  - Suggest or add new features
305
305
 
306
306
  To get started with development:
307
307
 
308
308
  ```sh
309
- git clone --recursive https://github.com/ankane/datasketches.git
310
- cd datasketches
309
+ git clone --recursive https://github.com/ankane/datasketches-ruby.git
310
+ cd datasketches-ruby
311
311
  bundle install
312
312
  bundle exec rake compile
313
313
  bundle exec rake test
@@ -20,10 +20,26 @@ using Rice::Arg;
20
20
 
21
21
  void init_theta(Rice::Module& m) {
22
22
  Rice::define_class_under<theta_sketch>(m, "ThetaSketch")
23
- .define_method("empty?", &theta_sketch::is_empty)
24
- .define_method("estimate", &theta_sketch::get_estimate)
25
- .define_method("lower_bound", &theta_sketch::get_lower_bound)
26
- .define_method("upper_bound", &theta_sketch::get_upper_bound);
23
+ .define_method(
24
+ "empty?",
25
+ [](theta_sketch& self) {
26
+ return self.is_empty();
27
+ })
28
+ .define_method(
29
+ "estimate",
30
+ [](theta_sketch& self) {
31
+ return self.get_estimate();
32
+ })
33
+ .define_method(
34
+ "lower_bound",
35
+ [](theta_sketch& self, uint8_t num_std_devs) {
36
+ return self.get_lower_bound(num_std_devs);
37
+ })
38
+ .define_method(
39
+ "upper_bound",
40
+ [](theta_sketch& self, uint8_t num_std_devs) {
41
+ return self.get_upper_bound(num_std_devs);
42
+ });
27
43
 
28
44
  Rice::define_class_under<compact_theta_sketch, theta_sketch>(m, "CompactThetaSketch")
29
45
  .define_singleton_function(
@@ -1,3 +1,3 @@
1
1
  module DataSketches
2
- VERSION = "0.2.3"
2
+ VERSION = "0.2.4"
3
3
  end
@@ -17,7 +17,7 @@
17
17
 
18
18
  cmake_minimum_required(VERSION 3.12.0)
19
19
  project(DataSketches
20
- VERSION 0.12.0
20
+ VERSION 3.2.0
21
21
  LANGUAGES CXX)
22
22
 
23
23
  include(GNUInstallDirs)
@@ -126,11 +126,30 @@ endif()
126
126
 
127
127
  # # Installation
128
128
  install(TARGETS datasketches
129
- EXPORT ${PROJCT_NAME}
129
+ EXPORT ${PROJECT_NAME}
130
130
  PUBLIC_HEADER DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/DataSketches
131
131
  INCLUDES DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/DataSketches
132
132
  )
133
133
 
134
+ # Packaging
135
+ include(CMakePackageConfigHelpers)
136
+ write_basic_package_version_file(
137
+ "${CMAKE_CURRENT_BINARY_DIR}/DataSketchesConfigVersion.cmake"
138
+ VERSION ${PROJECT_VERSION}
139
+ COMPATIBILITY SameMajorVersion
140
+ )
141
+ configure_package_config_file(
142
+ cmake/DataSketchesConfig.cmake.in
143
+ "${CMAKE_CURRENT_BINARY_DIR}/DataSketchesConfig.cmake"
144
+ INSTALL_DESTINATION lib/DataSketches/cmake
145
+ PATH_VARS CMAKE_INSTALL_INCLUDEDIR
146
+ )
147
+ install(EXPORT ${PROJECT_NAME} DESTINATION lib/DataSketches/cmake)
148
+ install(FILES "${CMAKE_CURRENT_BINARY_DIR}/DataSketchesConfigVersion.cmake"
149
+ "${CMAKE_CURRENT_BINARY_DIR}/DataSketchesConfig.cmake"
150
+ DESTINATION lib/DataSketches/cmake)
151
+
152
+
134
153
  #set(CPACK_PROJECT_NAME ${PROJECT_NAME})
135
154
  #set(CPACK_PROJECT_VERSION ${PROJECT_VERSION})
136
- #include(CPack)
155
+ include(CPack)
@@ -9,11 +9,14 @@ global-exclude .git*
9
9
 
10
10
  recursive-include python/pybind11 *
11
11
 
12
+ graft cmake
12
13
  graft common
13
14
  graft cpc
14
15
  graft fi
15
16
  graft hll
16
17
  graft kll
18
+ graft req
17
19
  graft theta
20
+ graft tuple
18
21
  graft sampling
19
22
  graft python
@@ -25,18 +25,85 @@ Installing the latest cmake on OSX: brew install cmake
25
25
  Building and running unit tests using cmake for OSX and Linux:
26
26
 
27
27
  ```
28
- $ cd build
29
- $ cmake ..
30
- $ make
31
- $ make test
28
+ $ cmake -S . -B build/Release -DCMAKE_BUILD_TYPE=Release
29
+ $ cmake --build build/Release -t all test
32
30
  ```
33
31
 
34
32
  Building and running unit tests using cmake for Windows from the command line:
35
33
 
36
34
  ```
37
- $ cd build
38
- $ cmake ..
39
- $ cd ..
40
- $ cmake --build build --config Release
41
- $ cmake --build build --config Release --target RUN_TESTS
35
+ $ cd build
36
+ $ cmake ..
37
+ $ cd ..
38
+ $ cmake --build build --config Release
39
+ $ cmake --build build --config Release --target RUN_TESTS
42
40
  ```
41
+
42
+ To install a local distribution (OSX and Linux), use the following command. The
43
+ CMAKE_INSTALL_PREFIX variable controls the destination. If not specified, it
44
+ defaults to installing in /usr (/usr/include, /usr/lib, etc). In the command below,
45
+ the installation will be in /tmp/install/DataSketches (/tmp/install/DataSketches/include,
46
+ /tmp/install/DataSketches/lib, etc)
47
+
48
+ ```
49
+ $ cmake -S . -B build/Release -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=/tmp/install/DataSketches
50
+ $ cmake --build build/Release -t install
51
+ ```
52
+
53
+ To generate an installable package using cmake's built in cpack packaging tool,
54
+ use the following command. The type of packaging is controlled by the CPACK_GENERATOR
55
+ variable (semi-colon separated list). Cmake usually supports packaging types such as RPM,
56
+ DEB, STGZ, TGZ, TZ, ZIP, etc.
57
+
58
+ ```
59
+ $ cmake3 -S . -B build/Release -DCMAKE_BUILD_TYPE=Release -DCPACK_GENERATOR="RPM;STGZ;TGZ"
60
+ $ cmake3 --build build/Release -t package
61
+ ```
62
+
63
+ The DataSketches project can be included in other projects' CMakeLists.txt files in one of two ways.
64
+ If DataSketches has been installed on the host (using an RPM, DEB, "make install" into /usr/local, or some
65
+ way, then CMake's `find_package` command can be used like this:
66
+
67
+ ```
68
+ find_package(DataSketches 3.2 REQUIRED)
69
+ target_link_library(my_dependent_target PUBLIC ${DATASKETCHES_LIB})
70
+ ```
71
+
72
+ When used with find_package, DataSketches exports several variables, including
73
+
74
+ - `DATASKETCHES_VERSION`: The version number of the datasketches package that was imported.
75
+ - `DATASKETCHES_INCLUDE_DIR`: The directory that should be added to access DataSketches include files.
76
+ Because cmake automatically includes the interface directories for included target libraries when
77
+ using `target_link_library`, under normal circumstances there will be no need to include this directly.
78
+ - `DATASKETCHES_LIB`: The name of the DataSketches target to include as a dependency. Projects pulling
79
+ in DataSketches should reference this with `target_link_library` in order to set up all the correct dependencies
80
+ and include paths.
81
+
82
+ If you don't have DataSketches installed locally, dependent projects can pull it directly
83
+ from GitHub using CMake's `ExternalProject` module. The code would look something like this:
84
+
85
+ ```
86
+ cmake_policy(SET CMP0097 NEW)
87
+ include(ExternalProject)
88
+ ExternalProject_Add(datasketches
89
+ GIT_REPOSITORY https://github.com/apache/datasketches-cpp.git
90
+ GIT_TAG 3.2.0
91
+ GIT_SHALLOW true
92
+ GIT_SUBMODULES ""
93
+ INSTALL_DIR /tmp/datasketches-prefix
94
+ CMAKE_ARGS -DBUILD_TESTS=OFF -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} -DCMAKE_INSTALL_PREFIX=/tmp/datasketches-prefix
95
+
96
+ # Override the install command to add DESTDIR
97
+ # This is necessary to work around an oddity in the RPM (but not other) package
98
+ # generation, as CMake otherwise picks up the Datasketch files when building
99
+ # an RPM for a dependent package. (RPM scans the directory for files in addition to installing
100
+ # those files referenced in an "install" rule in the cmake file)
101
+ INSTALL_COMMAND env DESTDIR= ${CMAKE_COMMAND} --build . --target install
102
+ )
103
+ ExternalProject_Get_property(datasketches INSTALL_DIR)
104
+ set(datasketches_INSTALL_DIR ${INSTALL_DIR})
105
+ message("Source dir of datasketches = ${datasketches_INSTALL_DIR}")
106
+ target_include_directories(my_dependent_target
107
+ PRIVATE ${datasketches_INSTALL_DIR}/include/DataSketches)
108
+ add_dependencies(my_dependent_target datasketches)
109
+ ```
@@ -0,0 +1,10 @@
1
+ set(DATASKETCHES_VERSION "@PROJECT_VERSION@")
2
+
3
+ @PACKAGE_INIT@
4
+
5
+ include("${CMAKE_CURRENT_LIST_DIR}/DataSketches.cmake")
6
+
7
+ set_and_check(DATASKETCHES_INCLUDE_DIR "@PACKAGE_CMAKE_INSTALL_INCLUDEDIR@/DataSketches")
8
+ set(DATASKETCHES_LIB "datasketches")
9
+
10
+ check_required_components("@PROJECT_NAME@")
@@ -29,17 +29,18 @@ target_include_directories(common
29
29
 
30
30
  target_compile_features(common INTERFACE cxx_std_11)
31
31
 
32
- target_sources(common
33
- INTERFACE
34
- ${CMAKE_CURRENT_SOURCE_DIR}/include/common_defs.hpp
35
- ${CMAKE_CURRENT_SOURCE_DIR}/include/memory_operations.hpp
36
- ${CMAKE_CURRENT_SOURCE_DIR}/include/MurmurHash3.h
37
- ${CMAKE_CURRENT_SOURCE_DIR}/include/serde.hpp
38
- ${CMAKE_CURRENT_SOURCE_DIR}/include/count_zeros.hpp
39
- ${CMAKE_CURRENT_SOURCE_DIR}/include/inv_pow2_table.hpp
40
- ${CMAKE_CURRENT_SOURCE_DIR}/include/binomial_bounds.hpp
41
- ${CMAKE_CURRENT_SOURCE_DIR}/include/conditional_back_inserter.hpp
42
- ${CMAKE_CURRENT_SOURCE_DIR}/include/conditional_forward.hpp
43
- ${CMAKE_CURRENT_SOURCE_DIR}/include/ceiling_power_of_2.hpp
44
- )
32
+ install(TARGETS common EXPORT ${PROJECT_NAME})
45
33
 
34
+ install(FILES
35
+ include/common_defs.hpp
36
+ include/memory_operations.hpp
37
+ include/MurmurHash3.h
38
+ include/serde.hpp
39
+ include/count_zeros.hpp
40
+ include/inv_pow2_table.hpp
41
+ include/binomial_bounds.hpp
42
+ include/conditional_back_inserter.hpp
43
+ include/conditional_forward.hpp
44
+ include/ceiling_power_of_2.hpp
45
+ include/bounds_binomial_proportions.hpp
46
+ DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/DataSketches")
@@ -32,43 +32,23 @@ target_include_directories(cpc
32
32
  target_link_libraries(cpc INTERFACE common)
33
33
  target_compile_features(cpc INTERFACE cxx_std_11)
34
34
 
35
- set(cpc_HEADERS "")
36
- list(APPEND cpc_HEADERS "include/compression_data.hpp")
37
- list(APPEND cpc_HEADERS "include/cpc_common.hpp")
38
- list(APPEND cpc_HEADERS "include/cpc_compressor.hpp")
39
- list(APPEND cpc_HEADERS "include/cpc_compressor_impl.hpp")
40
- list(APPEND cpc_HEADERS "include/cpc_confidence.hpp")
41
- list(APPEND cpc_HEADERS "include/cpc_sketch.hpp")
42
- list(APPEND cpc_HEADERS "include/cpc_sketch_impl.hpp")
43
- list(APPEND cpc_HEADERS "include/cpc_union.hpp")
44
- list(APPEND cpc_HEADERS "include/cpc_union_impl.hpp")
45
- list(APPEND cpc_HEADERS "include/cpc_util.hpp")
46
- list(APPEND cpc_HEADERS "include/icon_estimator.hpp")
47
- list(APPEND cpc_HEADERS "include/kxp_byte_lookup.hpp")
48
- list(APPEND cpc_HEADERS "include/u32_table.hpp")
49
- list(APPEND cpc_HEADERS "include/u32_table_impl.hpp")
50
-
51
35
  install(TARGETS cpc
52
36
  EXPORT ${PROJECT_NAME}
53
37
  )
54
38
 
55
- install(FILES ${cpc_HEADERS}
39
+ install(FILES
40
+ include/compression_data.hpp
41
+ include/cpc_common.hpp
42
+ include/cpc_compressor.hpp
43
+ include/cpc_compressor_impl.hpp
44
+ include/cpc_confidence.hpp
45
+ include/cpc_sketch.hpp
46
+ include/cpc_sketch_impl.hpp
47
+ include/cpc_union.hpp
48
+ include/cpc_union_impl.hpp
49
+ include/cpc_util.hpp
50
+ include/icon_estimator.hpp
51
+ include/kxp_byte_lookup.hpp
52
+ include/u32_table.hpp
53
+ include/u32_table_impl.hpp
56
54
  DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/DataSketches")
57
-
58
- target_sources(cpc
59
- INTERFACE
60
- ${CMAKE_CURRENT_SOURCE_DIR}/include/compression_data.hpp
61
- ${CMAKE_CURRENT_SOURCE_DIR}/include/cpc_common.hpp
62
- ${CMAKE_CURRENT_SOURCE_DIR}/include/cpc_compressor.hpp
63
- ${CMAKE_CURRENT_SOURCE_DIR}/include/cpc_compressor_impl.hpp
64
- ${CMAKE_CURRENT_SOURCE_DIR}/include/cpc_confidence.hpp
65
- ${CMAKE_CURRENT_SOURCE_DIR}/include/cpc_sketch.hpp
66
- ${CMAKE_CURRENT_SOURCE_DIR}/include/cpc_sketch_impl.hpp
67
- ${CMAKE_CURRENT_SOURCE_DIR}/include/cpc_union.hpp
68
- ${CMAKE_CURRENT_SOURCE_DIR}/include/cpc_union_impl.hpp
69
- ${CMAKE_CURRENT_SOURCE_DIR}/include/cpc_util.hpp
70
- ${CMAKE_CURRENT_SOURCE_DIR}/include/icon_estimator.hpp
71
- ${CMAKE_CURRENT_SOURCE_DIR}/include/kxp_byte_lookup.hpp
72
- ${CMAKE_CURRENT_SOURCE_DIR}/include/u32_table.hpp
73
- ${CMAKE_CURRENT_SOURCE_DIR}/include/u32_table_impl.hpp
74
- )
@@ -381,7 +381,9 @@ void cpc_sketch_alloc<A>::refresh_kxp(const uint64_t* bit_matrix) {
381
381
 
382
382
  template<typename A>
383
383
  string<A> cpc_sketch_alloc<A>::to_string() const {
384
- std::basic_ostringstream<char, std::char_traits<char>, AllocChar<A>> os;
384
+ // Using a temporary stream for implementation here does not comply with AllocatorAwareContainer requirements.
385
+ // The stream does not support passing an allocator instance, and alternatives are complicated.
386
+ std::ostringstream os;
385
387
  os << "### CPC sketch summary:" << std::endl;
386
388
  os << " lg_k : " << std::to_string(lg_k) << std::endl;
387
389
  os << " seed hash : " << std::hex << compute_seed_hash(seed) << std::dec << std::endl;
@@ -392,14 +394,14 @@ string<A> cpc_sketch_alloc<A>::to_string() const {
392
394
  os << " HIP estimate : " << hip_est_accum << std::endl;
393
395
  os << " kxp : " << kxp << std::endl;
394
396
  }
395
- os << " intresting col : " << std::to_string(first_interesting_column) << std::endl;
397
+ os << " interesting col: " << std::to_string(first_interesting_column) << std::endl;
396
398
  os << " table entries : " << surprising_value_table.get_num_items() << std::endl;
397
399
  os << " window : " << (sliding_window.size() == 0 ? "not " : "") << "allocated" << std::endl;
398
400
  if (sliding_window.size() > 0) {
399
401
  os << " window offset : " << std::to_string(window_offset) << std::endl;
400
402
  }
401
403
  os << "### End sketch summary" << std::endl;
402
- return os.str();
404
+ return string<A>(os.str().c_str(), sliding_window.get_allocator());
403
405
  }
404
406
 
405
407
  template<typename A>
@@ -34,7 +34,7 @@ bit_matrix(allocator)
34
34
  if (lg_k < CPC_MIN_LG_K || lg_k > CPC_MAX_LG_K) {
35
35
  throw std::invalid_argument("lg_k must be >= " + std::to_string(CPC_MIN_LG_K) + " and <= " + std::to_string(CPC_MAX_LG_K) + ": " + std::to_string(lg_k));
36
36
  }
37
- accumulator = new (AllocCpc().allocate(1)) cpc_sketch_alloc<A>(lg_k, seed, allocator);
37
+ accumulator = new (AllocCpc(allocator).allocate(1)) cpc_sketch_alloc<A>(lg_k, seed, allocator);
38
38
  }
39
39
 
40
40
  template<typename A>
@@ -45,7 +45,7 @@ accumulator(other.accumulator),
45
45
  bit_matrix(other.bit_matrix)
46
46
  {
47
47
  if (accumulator != nullptr) {
48
- accumulator = new (AllocCpc().allocate(1)) cpc_sketch_alloc<A>(*other.accumulator);
48
+ accumulator = new (AllocCpc(accumulator->get_allocator()).allocate(1)) cpc_sketch_alloc<A>(*other.accumulator);
49
49
  }
50
50
  }
51
51
 
@@ -62,8 +62,9 @@ bit_matrix(std::move(other.bit_matrix))
62
62
  template<typename A>
63
63
  cpc_union_alloc<A>::~cpc_union_alloc() {
64
64
  if (accumulator != nullptr) {
65
+ AllocCpc allocator(accumulator->get_allocator());
65
66
  accumulator->~cpc_sketch_alloc<A>();
66
- AllocCpc().deallocate(accumulator, 1);
67
+ allocator.deallocate(accumulator, 1);
67
68
  }
68
69
  }
69
70
 
@@ -181,7 +182,7 @@ template<typename A>
181
182
  cpc_sketch_alloc<A> cpc_union_alloc<A>::get_result_from_accumulator() const {
182
183
  if (lg_k != accumulator->get_lg_k()) throw std::logic_error("lg_k != accumulator->lg_k");
183
184
  if (accumulator->get_num_coupons() == 0) {
184
- return cpc_sketch_alloc<A>(lg_k, seed);
185
+ return cpc_sketch_alloc<A>(lg_k, seed, accumulator->get_allocator());
185
186
  }
186
187
  if (accumulator->determine_flavor() != cpc_sketch_alloc<A>::flavor::SPARSE) throw std::logic_error("wrong flavor");
187
188
  cpc_sketch_alloc<A> copy(*accumulator);
@@ -242,8 +243,9 @@ cpc_sketch_alloc<A> cpc_union_alloc<A>::get_result_from_bit_matrix() const {
242
243
  template<typename A>
243
244
  void cpc_union_alloc<A>::switch_to_bit_matrix() {
244
245
  bit_matrix = accumulator->build_bit_matrix();
246
+ AllocCpc allocator(accumulator->get_allocator());
245
247
  accumulator->~cpc_sketch_alloc<A>();
246
- AllocCpc().deallocate(accumulator, 1);
248
+ allocator.deallocate(accumulator, 1);
247
249
  accumulator = nullptr;
248
250
  }
249
251
 
@@ -324,7 +326,7 @@ void cpc_union_alloc<A>::reduce_k(uint8_t new_lg_k) {
324
326
  if (bit_matrix.size() > 0) throw std::logic_error("bit_matrix is not expected");
325
327
  if (!accumulator->is_empty()) {
326
328
  cpc_sketch_alloc<A> old_accumulator(*accumulator);
327
- *accumulator = cpc_sketch_alloc<A>(new_lg_k, seed);
329
+ *accumulator = cpc_sketch_alloc<A>(new_lg_k, seed, old_accumulator.get_allocator());
328
330
  walk_table_updating_sketch(old_accumulator.surprising_value_table);
329
331
  }
330
332
  lg_k = new_lg_k;
@@ -25,6 +25,7 @@
25
25
  #include <catch.hpp>
26
26
 
27
27
  #include "cpc_sketch.hpp"
28
+ #include "cpc_union.hpp"
28
29
  #include "test_allocator.hpp"
29
30
 
30
31
  namespace datasketches {
@@ -234,4 +235,20 @@ TEST_CASE("cpc sketch allocation: serialize deserialize sliding, bytes", "[cpc_s
234
235
  REQUIRE(test_allocator_net_allocations == 0);
235
236
  }
236
237
 
238
+ using cpc_union_test_alloc = cpc_union_alloc<test_allocator<uint8_t>>;
239
+
240
+ TEST_CASE("cpc sketch allocation: union") {
241
+ cpc_sketch_test_alloc s1(11, DEFAULT_SEED, 0);
242
+ s1.update(1);
243
+
244
+ cpc_sketch_test_alloc s2(11, DEFAULT_SEED, 0);
245
+ s2.update(2);
246
+
247
+ cpc_union_test_alloc u(11, DEFAULT_SEED, 0);
248
+ u.update(s1);
249
+ u.update(s2);
250
+ auto s3 = u.get_result();
251
+ REQUIRE_FALSE(s3.is_empty());
252
+ }
253
+
237
254
  } /* namespace datasketches */
@@ -32,23 +32,13 @@ target_include_directories(fi
32
32
  target_link_libraries(fi INTERFACE common)
33
33
  target_compile_features(fi INTERFACE cxx_std_11)
34
34
 
35
- set(fi_HEADERS "")
36
- list(APPEND fi_HEADERS "include/frequent_items_sketch.hpp")
37
- list(APPEND fi_HEADERS "include/frequent_items_sketch_impl.hpp")
38
- list(APPEND fi_HEADERS "include/reverse_purge_hash_map.hpp")
39
- list(APPEND fi_HEADERS "include/reverse_purge_hash_map_impl.hpp")
40
-
41
35
  install(TARGETS fi
42
36
  EXPORT ${PROJECT_NAME}
43
37
  )
44
38
 
45
- install(FILES ${fi_HEADERS}
39
+ install(FILES
40
+ include/frequent_items_sketch.hpp
41
+ include/frequent_items_sketch_impl.hpp
42
+ include/reverse_purge_hash_map.hpp
43
+ include/reverse_purge_hash_map_impl.hpp
46
44
  DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/DataSketches")
47
-
48
- target_sources(fi
49
- INTERFACE
50
- ${CMAKE_CURRENT_SOURCE_DIR}/include/frequent_items_sketch.hpp
51
- ${CMAKE_CURRENT_SOURCE_DIR}/include/frequent_items_sketch_impl.hpp
52
- ${CMAKE_CURRENT_SOURCE_DIR}/include/reverse_purge_hash_map.hpp
53
- ${CMAKE_CURRENT_SOURCE_DIR}/include/reverse_purge_hash_map_impl.hpp
54
- )
@@ -421,7 +421,9 @@ void frequent_items_sketch<T, W, H, E, S, A>::check_size(uint8_t lg_cur_size, ui
421
421
 
422
422
  template<typename T, typename W, typename H, typename E, typename S, typename A>
423
423
  string<A> frequent_items_sketch<T, W, H, E, S, A>::to_string(bool print_items) const {
424
- std::basic_ostringstream<char, std::char_traits<char>, AllocChar<A>> os;
424
+ // Using a temporary stream for implementation here does not comply with AllocatorAwareContainer requirements.
425
+ // The stream does not support passing an allocator instance, and alternatives are complicated.
426
+ std::ostringstream os;
425
427
  os << "### Frequent items sketch summary:" << std::endl;
426
428
  os << " lg cur map size : " << (int) map.get_lg_cur_size() << std::endl;
427
429
  os << " lg max map size : " << (int) map.get_lg_max_size() << std::endl;
@@ -444,7 +446,7 @@ string<A> frequent_items_sketch<T, W, H, E, S, A>::to_string(bool print_items) c
444
446
  }
445
447
  os << "### End items" << std::endl;
446
448
  }
447
- return os.str();
449
+ return string<A>(os.str().c_str(), map.get_allocator());
448
450
  }
449
451
 
450
452
  // version for integral signed type
@@ -32,64 +32,41 @@ target_include_directories(hll
32
32
  target_link_libraries(hll INTERFACE common)
33
33
  target_compile_features(hll INTERFACE cxx_std_11)
34
34
 
35
- # TODO: would be useful if this didn't need to be reproduced in target_sources(), too
36
- set(hll_HEADERS "")
37
- list(APPEND hll_HEADERS "include/hll.hpp;include/AuxHashMap.hpp;include/CompositeInterpolationXTable.hpp")
38
- list(APPEND hll_HEADERS "include/hll.private.hpp;include/HllSketchImplFactory.hpp")
39
- list(APPEND hll_HEADERS "include/CouponHashSet.hpp;include/CouponList.hpp")
40
- list(APPEND hll_HEADERS "include/CubicInterpolation.hpp;include/HarmonicNumbers.hpp;include/Hll4Array.hpp")
41
- list(APPEND hll_HEADERS "include/Hll6Array.hpp;include/Hll8Array.hpp;include/HllArray.hpp")
42
- list(APPEND hll_HEADERS "include/HllSketchImpl.hpp")
43
- list(APPEND hll_HEADERS "include/HllUtil.hpp;include/coupon_iterator.hpp")
44
- list(APPEND hll_HEADERS "include/RelativeErrorTables.hpp;include/AuxHashMap-internal.hpp")
45
- list(APPEND hll_HEADERS "include/CompositeInterpolationXTable-internal.hpp")
46
- list(APPEND hll_HEADERS "include/CouponHashSet-internal.hpp;include/CouponList-internal.hpp")
47
- list(APPEND hll_HEADERS "include/CubicInterpolation-internal.hpp;include/HarmonicNumbers-internal.hpp")
48
- list(APPEND hll_HEADERS "include/Hll4Array-internal.hpp;include/Hll6Array-internal.hpp")
49
- list(APPEND hll_HEADERS "include/Hll8Array-internal.hpp;include/HllArray-internal.hpp")
50
- list(APPEND hll_HEADERS "include/HllSketch-internal.hpp")
51
- list(APPEND hll_HEADERS "include/HllSketchImpl-internal.hpp;include/HllUnion-internal.hpp")
52
- list(APPEND hll_HEADERS "include/coupon_iterator-internal.hpp;include/RelativeErrorTables-internal.hpp")
53
-
54
35
  install(TARGETS hll
55
36
  EXPORT ${PROJECT_NAME}
56
37
  )
57
38
 
58
- install(FILES ${hll_HEADERS}
39
+ install(FILES
40
+ include/hll.hpp
41
+ include/AuxHashMap.hpp
42
+ include/CompositeInterpolationXTable.hpp
43
+ include/hll.private.hpp
44
+ include/HllSketchImplFactory.hpp
45
+ include/CouponHashSet.hpp
46
+ include/CouponList.hpp
47
+ include/CubicInterpolation.hpp
48
+ include/HarmonicNumbers.hpp
49
+ include/Hll4Array.hpp
50
+ include/Hll6Array.hpp
51
+ include/Hll8Array.hpp
52
+ include/HllArray.hpp
53
+ include/HllSketchImpl.hpp
54
+ include/HllUtil.hpp
55
+ include/coupon_iterator.hpp
56
+ include/RelativeErrorTables.hpp
57
+ include/AuxHashMap-internal.hpp
58
+ include/CompositeInterpolationXTable-internal.hpp
59
+ include/CouponHashSet-internal.hpp
60
+ include/CouponList-internal.hpp
61
+ include/CubicInterpolation-internal.hpp
62
+ include/HarmonicNumbers-internal.hpp
63
+ include/Hll4Array-internal.hpp
64
+ include/Hll6Array-internal.hpp
65
+ include/Hll8Array-internal.hpp
66
+ include/HllArray-internal.hpp
67
+ include/HllSketch-internal.hpp
68
+ include/HllSketchImpl-internal.hpp
69
+ include/HllUnion-internal.hpp
70
+ include/coupon_iterator-internal.hpp
71
+ include/RelativeErrorTables-internal.hpp
59
72
  DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/DataSketches")
60
-
61
- target_sources(hll
62
- INTERFACE
63
- ${CMAKE_CURRENT_SOURCE_DIR}/include/hll.hpp
64
- ${CMAKE_CURRENT_SOURCE_DIR}/include/hll.private.hpp
65
- ${CMAKE_CURRENT_SOURCE_DIR}/include/AuxHashMap.hpp
66
- ${CMAKE_CURRENT_SOURCE_DIR}/include/CompositeInterpolationXTable.hpp
67
- ${CMAKE_CURRENT_SOURCE_DIR}/include/CouponHashSet.hpp
68
- ${CMAKE_CURRENT_SOURCE_DIR}/include/CouponList.hpp
69
- ${CMAKE_CURRENT_SOURCE_DIR}/include/CubicInterpolation.hpp
70
- ${CMAKE_CURRENT_SOURCE_DIR}/include/HarmonicNumbers.hpp
71
- ${CMAKE_CURRENT_SOURCE_DIR}/include/Hll4Array.hpp
72
- ${CMAKE_CURRENT_SOURCE_DIR}/include/Hll6Array.hpp
73
- ${CMAKE_CURRENT_SOURCE_DIR}/include/Hll8Array.hpp
74
- ${CMAKE_CURRENT_SOURCE_DIR}/include/HllArray.hpp
75
- ${CMAKE_CURRENT_SOURCE_DIR}/include/HllSketchImpl.hpp
76
- ${CMAKE_CURRENT_SOURCE_DIR}/include/HllSketchImplFactory.hpp
77
- ${CMAKE_CURRENT_SOURCE_DIR}/include/HllUtil.hpp
78
- ${CMAKE_CURRENT_SOURCE_DIR}/include/RelativeErrorTables.hpp
79
- ${CMAKE_CURRENT_SOURCE_DIR}/include/coupon_iterator.hpp
80
- ${CMAKE_CURRENT_SOURCE_DIR}/include/AuxHashMap-internal.hpp
81
- ${CMAKE_CURRENT_SOURCE_DIR}/include/CompositeInterpolationXTable-internal.hpp
82
- ${CMAKE_CURRENT_SOURCE_DIR}/include/CouponHashSet-internal.hpp
83
- ${CMAKE_CURRENT_SOURCE_DIR}/include/CouponList-internal.hpp
84
- ${CMAKE_CURRENT_SOURCE_DIR}/include/CubicInterpolation-internal.hpp
85
- ${CMAKE_CURRENT_SOURCE_DIR}/include/HarmonicNumbers-internal.hpp
86
- ${CMAKE_CURRENT_SOURCE_DIR}/include/Hll4Array-internal.hpp
87
- ${CMAKE_CURRENT_SOURCE_DIR}/include/Hll6Array-internal.hpp
88
- ${CMAKE_CURRENT_SOURCE_DIR}/include/Hll8Array-internal.hpp
89
- ${CMAKE_CURRENT_SOURCE_DIR}/include/HllArray-internal.hpp
90
- ${CMAKE_CURRENT_SOURCE_DIR}/include/HllSketch-internal.hpp
91
- ${CMAKE_CURRENT_SOURCE_DIR}/include/HllSketchImpl-internal.hpp
92
- ${CMAKE_CURRENT_SOURCE_DIR}/include/HllUnion-internal.hpp
93
- ${CMAKE_CURRENT_SOURCE_DIR}/include/RelativeErrorTables-internal.hpp
94
- ${CMAKE_CURRENT_SOURCE_DIR}/include/coupon_iterator-internal.hpp
95
- )
@@ -246,10 +246,12 @@ string<A> hll_sketch_alloc<A>::to_string(const bool summary,
246
246
  const bool detail,
247
247
  const bool aux_detail,
248
248
  const bool all) const {
249
- std::basic_ostringstream<char, std::char_traits<char>, AllocChar<A>> os;
249
+ // Using a temporary stream for implementation here does not comply with AllocatorAwareContainer requirements.
250
+ // The stream does not support passing an allocator instance, and alternatives are complicated.
251
+ std::stringstream os;
250
252
  if (summary) {
251
253
  os << "### HLL sketch summary:" << std::endl
252
- << " Log Config K : " << get_lg_config_k() << std::endl
254
+ << " Log Config K : " << std::to_string(get_lg_config_k()) << std::endl
253
255
  << " Hll Target : " << type_as_string() << std::endl
254
256
  << " Current Mode : " << mode_as_string() << std::endl
255
257
  << " LB : " << get_lower_bound(1) << std::endl
@@ -258,7 +260,7 @@ string<A> hll_sketch_alloc<A>::to_string(const bool summary,
258
260
  << " OutOfOrder flag: " << (is_out_of_order_flag() ? "true" : "false") << std::endl;
259
261
  if (get_current_mode() == HLL) {
260
262
  HllArray<A>* hllArray = (HllArray<A>*) sketch_impl;
261
- os << " CurMin : " << hllArray->getCurMin() << std::endl
263
+ os << " CurMin : " << std::to_string(hllArray->getCurMin()) << std::endl
262
264
  << " NumAtCurMin : " << hllArray->getNumAtCurMin() << std::endl
263
265
  << " HipAccum : " << hllArray->getHipAccum() << std::endl
264
266
  << " KxQ0 : " << hllArray->getKxQ0() << std::endl
@@ -338,7 +340,7 @@ string<A> hll_sketch_alloc<A>::to_string(const bool summary,
338
340
  }
339
341
  }
340
342
 
341
- return os.str();
343
+ return string<A>(os.str().c_str(), sketch_impl->getAllocator());
342
344
  }
343
345
 
344
346
  template<typename A>