datasketches 0.2.3 → 0.2.4
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +4 -0
- data/README.md +7 -7
- data/ext/datasketches/theta_wrapper.cpp +20 -4
- data/lib/datasketches/version.rb +1 -1
- data/vendor/datasketches-cpp/CMakeLists.txt +22 -3
- data/vendor/datasketches-cpp/MANIFEST.in +3 -0
- data/vendor/datasketches-cpp/README.md +76 -9
- data/vendor/datasketches-cpp/cmake/DataSketchesConfig.cmake.in +10 -0
- data/vendor/datasketches-cpp/common/CMakeLists.txt +14 -13
- data/vendor/datasketches-cpp/cpc/CMakeLists.txt +15 -35
- data/vendor/datasketches-cpp/cpc/include/cpc_sketch_impl.hpp +5 -3
- data/vendor/datasketches-cpp/cpc/include/cpc_union_impl.hpp +8 -6
- data/vendor/datasketches-cpp/cpc/test/cpc_sketch_allocation_test.cpp +17 -0
- data/vendor/datasketches-cpp/fi/CMakeLists.txt +5 -15
- data/vendor/datasketches-cpp/fi/include/frequent_items_sketch_impl.hpp +4 -2
- data/vendor/datasketches-cpp/hll/CMakeLists.txt +33 -56
- data/vendor/datasketches-cpp/hll/include/HllSketch-internal.hpp +6 -4
- data/vendor/datasketches-cpp/kll/CMakeLists.txt +9 -19
- data/vendor/datasketches-cpp/kll/include/kll_sketch_impl.hpp +4 -2
- data/vendor/datasketches-cpp/req/CMakeLists.txt +8 -21
- data/vendor/datasketches-cpp/req/include/req_sketch_impl.hpp +4 -2
- data/vendor/datasketches-cpp/sampling/CMakeLists.txt +5 -9
- data/vendor/datasketches-cpp/sampling/include/var_opt_sketch_impl.hpp +13 -7
- data/vendor/datasketches-cpp/sampling/include/var_opt_union_impl.hpp +8 -6
- data/vendor/datasketches-cpp/setup.py +1 -1
- data/vendor/datasketches-cpp/theta/CMakeLists.txt +26 -45
- data/vendor/datasketches-cpp/theta/include/compact_theta_sketch_parser_impl.hpp +89 -22
- data/vendor/datasketches-cpp/theta/include/theta_helpers.hpp +15 -0
- data/vendor/datasketches-cpp/theta/include/theta_intersection_base_impl.hpp +6 -6
- data/vendor/datasketches-cpp/theta/include/theta_set_difference_base_impl.hpp +2 -2
- data/vendor/datasketches-cpp/theta/include/theta_sketch.hpp +32 -15
- data/vendor/datasketches-cpp/theta/include/theta_sketch_impl.hpp +146 -51
- data/vendor/datasketches-cpp/theta/include/theta_union.hpp +6 -1
- data/vendor/datasketches-cpp/theta/include/theta_union_base.hpp +3 -1
- data/vendor/datasketches-cpp/theta/include/theta_union_base_impl.hpp +8 -2
- data/vendor/datasketches-cpp/theta/include/theta_union_impl.hpp +8 -5
- data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base.hpp +5 -4
- data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base_impl.hpp +33 -9
- data/vendor/datasketches-cpp/theta/test/CMakeLists.txt +1 -0
- data/vendor/datasketches-cpp/theta/test/theta_compact_empty_from_java_v1.sk +0 -0
- data/vendor/datasketches-cpp/theta/test/theta_compact_empty_from_java_v2.sk +0 -0
- data/vendor/datasketches-cpp/theta/test/theta_compact_estimation_from_java_v1.sk +0 -0
- data/vendor/datasketches-cpp/theta/test/theta_compact_estimation_from_java_v2.sk +0 -0
- data/vendor/datasketches-cpp/theta/test/theta_compact_exact_from_java.sk +0 -0
- data/vendor/datasketches-cpp/theta/test/theta_setop_test.cpp +445 -0
- data/vendor/datasketches-cpp/theta/test/theta_sketch_test.cpp +400 -0
- data/vendor/datasketches-cpp/theta/test/theta_union_test.cpp +23 -11
- data/vendor/datasketches-cpp/tuple/CMakeLists.txt +18 -33
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_sketch.hpp +1 -1
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_sketch_impl.hpp +3 -3
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_union.hpp +1 -1
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_union_impl.hpp +3 -3
- data/vendor/datasketches-cpp/tuple/include/tuple_sketch.hpp +13 -9
- data/vendor/datasketches-cpp/tuple/include/tuple_sketch_impl.hpp +33 -14
- data/vendor/datasketches-cpp/tuple/include/tuple_union.hpp +6 -1
- data/vendor/datasketches-cpp/tuple/include/tuple_union_impl.hpp +8 -3
- data/vendor/datasketches-cpp/tuple/test/array_of_doubles_sketch_test.cpp +16 -0
- data/vendor/datasketches-cpp/tuple/test/tuple_sketch_test.cpp +46 -8
- data/vendor/datasketches-cpp/tuple/test/tuple_union_test.cpp +7 -0
- metadata +11 -6
- data/vendor/datasketches-cpp/theta/test/theta_update_empty_from_java.sk +0 -0
- data/vendor/datasketches-cpp/theta/test/theta_update_estimation_from_java.sk +0 -0
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 897dbc30f97ce17f0415630b6347a0092dac05196b0ef61e80939410d65cdf17
|
4
|
+
data.tar.gz: 61302f9cadde8a8badc97b455eb5c32d913c3b1fea8ed571e2da93a29e65afa9
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 4d541ba7f96a86f3f8de44f069f6e39d51ba6f28fa5d8c8d1d99a8434a95c5fe1a26470e6b062f348808fe5c0a444134d0dc96385437b4cb946c4a92044a2a5c
|
7
|
+
data.tar.gz: bc1bdacb7cbe69f9bb1382fd2ac7019bec04baf444dc963d63a594e989fd201d9eb9aadd0e463ac4efef8f7ba53915a594d8fb00f74ae295674b9024269a0406
|
data/CHANGELOG.md
CHANGED
data/README.md
CHANGED
@@ -1,8 +1,8 @@
|
|
1
|
-
# DataSketches
|
1
|
+
# DataSketches Ruby
|
2
2
|
|
3
3
|
[DataSketches](https://datasketches.apache.org/) - sketch data structures - for Ruby
|
4
4
|
|
5
|
-
[![Build Status](https://github.com/ankane/datasketches/workflows/build/badge.svg?branch=master)](https://github.com/ankane/datasketches/actions)
|
5
|
+
[![Build Status](https://github.com/ankane/datasketches-ruby/workflows/build/badge.svg?branch=master)](https://github.com/ankane/datasketches-ruby/actions)
|
6
6
|
|
7
7
|
## Installation
|
8
8
|
|
@@ -292,22 +292,22 @@ This library is modeled after the DataSketches [Python API](https://github.com/a
|
|
292
292
|
|
293
293
|
## History
|
294
294
|
|
295
|
-
View the [changelog](https://github.com/ankane/datasketches/blob/master/CHANGELOG.md)
|
295
|
+
View the [changelog](https://github.com/ankane/datasketches-ruby/blob/master/CHANGELOG.md)
|
296
296
|
|
297
297
|
## Contributing
|
298
298
|
|
299
299
|
Everyone is encouraged to help improve this project. Here are a few ways you can help:
|
300
300
|
|
301
|
-
- [Report bugs](https://github.com/ankane/datasketches/issues)
|
302
|
-
- Fix bugs and [submit pull requests](https://github.com/ankane/datasketches/pulls)
|
301
|
+
- [Report bugs](https://github.com/ankane/datasketches-ruby/issues)
|
302
|
+
- Fix bugs and [submit pull requests](https://github.com/ankane/datasketches-ruby/pulls)
|
303
303
|
- Write, clarify, or fix documentation
|
304
304
|
- Suggest or add new features
|
305
305
|
|
306
306
|
To get started with development:
|
307
307
|
|
308
308
|
```sh
|
309
|
-
git clone --recursive https://github.com/ankane/datasketches.git
|
310
|
-
cd datasketches
|
309
|
+
git clone --recursive https://github.com/ankane/datasketches-ruby.git
|
310
|
+
cd datasketches-ruby
|
311
311
|
bundle install
|
312
312
|
bundle exec rake compile
|
313
313
|
bundle exec rake test
|
@@ -20,10 +20,26 @@ using Rice::Arg;
|
|
20
20
|
|
21
21
|
void init_theta(Rice::Module& m) {
|
22
22
|
Rice::define_class_under<theta_sketch>(m, "ThetaSketch")
|
23
|
-
.define_method(
|
24
|
-
|
25
|
-
|
26
|
-
|
23
|
+
.define_method(
|
24
|
+
"empty?",
|
25
|
+
[](theta_sketch& self) {
|
26
|
+
return self.is_empty();
|
27
|
+
})
|
28
|
+
.define_method(
|
29
|
+
"estimate",
|
30
|
+
[](theta_sketch& self) {
|
31
|
+
return self.get_estimate();
|
32
|
+
})
|
33
|
+
.define_method(
|
34
|
+
"lower_bound",
|
35
|
+
[](theta_sketch& self, uint8_t num_std_devs) {
|
36
|
+
return self.get_lower_bound(num_std_devs);
|
37
|
+
})
|
38
|
+
.define_method(
|
39
|
+
"upper_bound",
|
40
|
+
[](theta_sketch& self, uint8_t num_std_devs) {
|
41
|
+
return self.get_upper_bound(num_std_devs);
|
42
|
+
});
|
27
43
|
|
28
44
|
Rice::define_class_under<compact_theta_sketch, theta_sketch>(m, "CompactThetaSketch")
|
29
45
|
.define_singleton_function(
|
data/lib/datasketches/version.rb
CHANGED
@@ -17,7 +17,7 @@
|
|
17
17
|
|
18
18
|
cmake_minimum_required(VERSION 3.12.0)
|
19
19
|
project(DataSketches
|
20
|
-
VERSION
|
20
|
+
VERSION 3.2.0
|
21
21
|
LANGUAGES CXX)
|
22
22
|
|
23
23
|
include(GNUInstallDirs)
|
@@ -126,11 +126,30 @@ endif()
|
|
126
126
|
|
127
127
|
# # Installation
|
128
128
|
install(TARGETS datasketches
|
129
|
-
EXPORT ${
|
129
|
+
EXPORT ${PROJECT_NAME}
|
130
130
|
PUBLIC_HEADER DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/DataSketches
|
131
131
|
INCLUDES DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/DataSketches
|
132
132
|
)
|
133
133
|
|
134
|
+
# Packaging
|
135
|
+
include(CMakePackageConfigHelpers)
|
136
|
+
write_basic_package_version_file(
|
137
|
+
"${CMAKE_CURRENT_BINARY_DIR}/DataSketchesConfigVersion.cmake"
|
138
|
+
VERSION ${PROJECT_VERSION}
|
139
|
+
COMPATIBILITY SameMajorVersion
|
140
|
+
)
|
141
|
+
configure_package_config_file(
|
142
|
+
cmake/DataSketchesConfig.cmake.in
|
143
|
+
"${CMAKE_CURRENT_BINARY_DIR}/DataSketchesConfig.cmake"
|
144
|
+
INSTALL_DESTINATION lib/DataSketches/cmake
|
145
|
+
PATH_VARS CMAKE_INSTALL_INCLUDEDIR
|
146
|
+
)
|
147
|
+
install(EXPORT ${PROJECT_NAME} DESTINATION lib/DataSketches/cmake)
|
148
|
+
install(FILES "${CMAKE_CURRENT_BINARY_DIR}/DataSketchesConfigVersion.cmake"
|
149
|
+
"${CMAKE_CURRENT_BINARY_DIR}/DataSketchesConfig.cmake"
|
150
|
+
DESTINATION lib/DataSketches/cmake)
|
151
|
+
|
152
|
+
|
134
153
|
#set(CPACK_PROJECT_NAME ${PROJECT_NAME})
|
135
154
|
#set(CPACK_PROJECT_VERSION ${PROJECT_VERSION})
|
136
|
-
|
155
|
+
include(CPack)
|
@@ -25,18 +25,85 @@ Installing the latest cmake on OSX: brew install cmake
|
|
25
25
|
Building and running unit tests using cmake for OSX and Linux:
|
26
26
|
|
27
27
|
```
|
28
|
-
|
29
|
-
|
30
|
-
$ make
|
31
|
-
$ make test
|
28
|
+
$ cmake -S . -B build/Release -DCMAKE_BUILD_TYPE=Release
|
29
|
+
$ cmake --build build/Release -t all test
|
32
30
|
```
|
33
31
|
|
34
32
|
Building and running unit tests using cmake for Windows from the command line:
|
35
33
|
|
36
34
|
```
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
35
|
+
$ cd build
|
36
|
+
$ cmake ..
|
37
|
+
$ cd ..
|
38
|
+
$ cmake --build build --config Release
|
39
|
+
$ cmake --build build --config Release --target RUN_TESTS
|
42
40
|
```
|
41
|
+
|
42
|
+
To install a local distribution (OSX and Linux), use the following command. The
|
43
|
+
CMAKE_INSTALL_PREFIX variable controls the destination. If not specified, it
|
44
|
+
defaults to installing in /usr (/usr/include, /usr/lib, etc). In the command below,
|
45
|
+
the installation will be in /tmp/install/DataSketches (/tmp/install/DataSketches/include,
|
46
|
+
/tmp/install/DataSketches/lib, etc)
|
47
|
+
|
48
|
+
```
|
49
|
+
$ cmake -S . -B build/Release -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=/tmp/install/DataSketches
|
50
|
+
$ cmake --build build/Release -t install
|
51
|
+
```
|
52
|
+
|
53
|
+
To generate an installable package using cmake's built in cpack packaging tool,
|
54
|
+
use the following command. The type of packaging is controlled by the CPACK_GENERATOR
|
55
|
+
variable (semi-colon separated list). Cmake usually supports packaging types such as RPM,
|
56
|
+
DEB, STGZ, TGZ, TZ, ZIP, etc.
|
57
|
+
|
58
|
+
```
|
59
|
+
$ cmake3 -S . -B build/Release -DCMAKE_BUILD_TYPE=Release -DCPACK_GENERATOR="RPM;STGZ;TGZ"
|
60
|
+
$ cmake3 --build build/Release -t package
|
61
|
+
```
|
62
|
+
|
63
|
+
The DataSketches project can be included in other projects' CMakeLists.txt files in one of two ways.
|
64
|
+
If DataSketches has been installed on the host (using an RPM, DEB, "make install" into /usr/local, or some
|
65
|
+
way, then CMake's `find_package` command can be used like this:
|
66
|
+
|
67
|
+
```
|
68
|
+
find_package(DataSketches 3.2 REQUIRED)
|
69
|
+
target_link_library(my_dependent_target PUBLIC ${DATASKETCHES_LIB})
|
70
|
+
```
|
71
|
+
|
72
|
+
When used with find_package, DataSketches exports several variables, including
|
73
|
+
|
74
|
+
- `DATASKETCHES_VERSION`: The version number of the datasketches package that was imported.
|
75
|
+
- `DATASKETCHES_INCLUDE_DIR`: The directory that should be added to access DataSketches include files.
|
76
|
+
Because cmake automatically includes the interface directories for included target libraries when
|
77
|
+
using `target_link_library`, under normal circumstances there will be no need to include this directly.
|
78
|
+
- `DATASKETCHES_LIB`: The name of the DataSketches target to include as a dependency. Projects pulling
|
79
|
+
in DataSketches should reference this with `target_link_library` in order to set up all the correct dependencies
|
80
|
+
and include paths.
|
81
|
+
|
82
|
+
If you don't have DataSketches installed locally, dependent projects can pull it directly
|
83
|
+
from GitHub using CMake's `ExternalProject` module. The code would look something like this:
|
84
|
+
|
85
|
+
```
|
86
|
+
cmake_policy(SET CMP0097 NEW)
|
87
|
+
include(ExternalProject)
|
88
|
+
ExternalProject_Add(datasketches
|
89
|
+
GIT_REPOSITORY https://github.com/apache/datasketches-cpp.git
|
90
|
+
GIT_TAG 3.2.0
|
91
|
+
GIT_SHALLOW true
|
92
|
+
GIT_SUBMODULES ""
|
93
|
+
INSTALL_DIR /tmp/datasketches-prefix
|
94
|
+
CMAKE_ARGS -DBUILD_TESTS=OFF -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} -DCMAKE_INSTALL_PREFIX=/tmp/datasketches-prefix
|
95
|
+
|
96
|
+
# Override the install command to add DESTDIR
|
97
|
+
# This is necessary to work around an oddity in the RPM (but not other) package
|
98
|
+
# generation, as CMake otherwise picks up the Datasketch files when building
|
99
|
+
# an RPM for a dependent package. (RPM scans the directory for files in addition to installing
|
100
|
+
# those files referenced in an "install" rule in the cmake file)
|
101
|
+
INSTALL_COMMAND env DESTDIR= ${CMAKE_COMMAND} --build . --target install
|
102
|
+
)
|
103
|
+
ExternalProject_Get_property(datasketches INSTALL_DIR)
|
104
|
+
set(datasketches_INSTALL_DIR ${INSTALL_DIR})
|
105
|
+
message("Source dir of datasketches = ${datasketches_INSTALL_DIR}")
|
106
|
+
target_include_directories(my_dependent_target
|
107
|
+
PRIVATE ${datasketches_INSTALL_DIR}/include/DataSketches)
|
108
|
+
add_dependencies(my_dependent_target datasketches)
|
109
|
+
```
|
@@ -0,0 +1,10 @@
|
|
1
|
+
set(DATASKETCHES_VERSION "@PROJECT_VERSION@")
|
2
|
+
|
3
|
+
@PACKAGE_INIT@
|
4
|
+
|
5
|
+
include("${CMAKE_CURRENT_LIST_DIR}/DataSketches.cmake")
|
6
|
+
|
7
|
+
set_and_check(DATASKETCHES_INCLUDE_DIR "@PACKAGE_CMAKE_INSTALL_INCLUDEDIR@/DataSketches")
|
8
|
+
set(DATASKETCHES_LIB "datasketches")
|
9
|
+
|
10
|
+
check_required_components("@PROJECT_NAME@")
|
@@ -29,17 +29,18 @@ target_include_directories(common
|
|
29
29
|
|
30
30
|
target_compile_features(common INTERFACE cxx_std_11)
|
31
31
|
|
32
|
-
|
33
|
-
INTERFACE
|
34
|
-
${CMAKE_CURRENT_SOURCE_DIR}/include/common_defs.hpp
|
35
|
-
${CMAKE_CURRENT_SOURCE_DIR}/include/memory_operations.hpp
|
36
|
-
${CMAKE_CURRENT_SOURCE_DIR}/include/MurmurHash3.h
|
37
|
-
${CMAKE_CURRENT_SOURCE_DIR}/include/serde.hpp
|
38
|
-
${CMAKE_CURRENT_SOURCE_DIR}/include/count_zeros.hpp
|
39
|
-
${CMAKE_CURRENT_SOURCE_DIR}/include/inv_pow2_table.hpp
|
40
|
-
${CMAKE_CURRENT_SOURCE_DIR}/include/binomial_bounds.hpp
|
41
|
-
${CMAKE_CURRENT_SOURCE_DIR}/include/conditional_back_inserter.hpp
|
42
|
-
${CMAKE_CURRENT_SOURCE_DIR}/include/conditional_forward.hpp
|
43
|
-
${CMAKE_CURRENT_SOURCE_DIR}/include/ceiling_power_of_2.hpp
|
44
|
-
)
|
32
|
+
install(TARGETS common EXPORT ${PROJECT_NAME})
|
45
33
|
|
34
|
+
install(FILES
|
35
|
+
include/common_defs.hpp
|
36
|
+
include/memory_operations.hpp
|
37
|
+
include/MurmurHash3.h
|
38
|
+
include/serde.hpp
|
39
|
+
include/count_zeros.hpp
|
40
|
+
include/inv_pow2_table.hpp
|
41
|
+
include/binomial_bounds.hpp
|
42
|
+
include/conditional_back_inserter.hpp
|
43
|
+
include/conditional_forward.hpp
|
44
|
+
include/ceiling_power_of_2.hpp
|
45
|
+
include/bounds_binomial_proportions.hpp
|
46
|
+
DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/DataSketches")
|
@@ -32,43 +32,23 @@ target_include_directories(cpc
|
|
32
32
|
target_link_libraries(cpc INTERFACE common)
|
33
33
|
target_compile_features(cpc INTERFACE cxx_std_11)
|
34
34
|
|
35
|
-
set(cpc_HEADERS "")
|
36
|
-
list(APPEND cpc_HEADERS "include/compression_data.hpp")
|
37
|
-
list(APPEND cpc_HEADERS "include/cpc_common.hpp")
|
38
|
-
list(APPEND cpc_HEADERS "include/cpc_compressor.hpp")
|
39
|
-
list(APPEND cpc_HEADERS "include/cpc_compressor_impl.hpp")
|
40
|
-
list(APPEND cpc_HEADERS "include/cpc_confidence.hpp")
|
41
|
-
list(APPEND cpc_HEADERS "include/cpc_sketch.hpp")
|
42
|
-
list(APPEND cpc_HEADERS "include/cpc_sketch_impl.hpp")
|
43
|
-
list(APPEND cpc_HEADERS "include/cpc_union.hpp")
|
44
|
-
list(APPEND cpc_HEADERS "include/cpc_union_impl.hpp")
|
45
|
-
list(APPEND cpc_HEADERS "include/cpc_util.hpp")
|
46
|
-
list(APPEND cpc_HEADERS "include/icon_estimator.hpp")
|
47
|
-
list(APPEND cpc_HEADERS "include/kxp_byte_lookup.hpp")
|
48
|
-
list(APPEND cpc_HEADERS "include/u32_table.hpp")
|
49
|
-
list(APPEND cpc_HEADERS "include/u32_table_impl.hpp")
|
50
|
-
|
51
35
|
install(TARGETS cpc
|
52
36
|
EXPORT ${PROJECT_NAME}
|
53
37
|
)
|
54
38
|
|
55
|
-
install(FILES
|
39
|
+
install(FILES
|
40
|
+
include/compression_data.hpp
|
41
|
+
include/cpc_common.hpp
|
42
|
+
include/cpc_compressor.hpp
|
43
|
+
include/cpc_compressor_impl.hpp
|
44
|
+
include/cpc_confidence.hpp
|
45
|
+
include/cpc_sketch.hpp
|
46
|
+
include/cpc_sketch_impl.hpp
|
47
|
+
include/cpc_union.hpp
|
48
|
+
include/cpc_union_impl.hpp
|
49
|
+
include/cpc_util.hpp
|
50
|
+
include/icon_estimator.hpp
|
51
|
+
include/kxp_byte_lookup.hpp
|
52
|
+
include/u32_table.hpp
|
53
|
+
include/u32_table_impl.hpp
|
56
54
|
DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/DataSketches")
|
57
|
-
|
58
|
-
target_sources(cpc
|
59
|
-
INTERFACE
|
60
|
-
${CMAKE_CURRENT_SOURCE_DIR}/include/compression_data.hpp
|
61
|
-
${CMAKE_CURRENT_SOURCE_DIR}/include/cpc_common.hpp
|
62
|
-
${CMAKE_CURRENT_SOURCE_DIR}/include/cpc_compressor.hpp
|
63
|
-
${CMAKE_CURRENT_SOURCE_DIR}/include/cpc_compressor_impl.hpp
|
64
|
-
${CMAKE_CURRENT_SOURCE_DIR}/include/cpc_confidence.hpp
|
65
|
-
${CMAKE_CURRENT_SOURCE_DIR}/include/cpc_sketch.hpp
|
66
|
-
${CMAKE_CURRENT_SOURCE_DIR}/include/cpc_sketch_impl.hpp
|
67
|
-
${CMAKE_CURRENT_SOURCE_DIR}/include/cpc_union.hpp
|
68
|
-
${CMAKE_CURRENT_SOURCE_DIR}/include/cpc_union_impl.hpp
|
69
|
-
${CMAKE_CURRENT_SOURCE_DIR}/include/cpc_util.hpp
|
70
|
-
${CMAKE_CURRENT_SOURCE_DIR}/include/icon_estimator.hpp
|
71
|
-
${CMAKE_CURRENT_SOURCE_DIR}/include/kxp_byte_lookup.hpp
|
72
|
-
${CMAKE_CURRENT_SOURCE_DIR}/include/u32_table.hpp
|
73
|
-
${CMAKE_CURRENT_SOURCE_DIR}/include/u32_table_impl.hpp
|
74
|
-
)
|
@@ -381,7 +381,9 @@ void cpc_sketch_alloc<A>::refresh_kxp(const uint64_t* bit_matrix) {
|
|
381
381
|
|
382
382
|
template<typename A>
|
383
383
|
string<A> cpc_sketch_alloc<A>::to_string() const {
|
384
|
-
|
384
|
+
// Using a temporary stream for implementation here does not comply with AllocatorAwareContainer requirements.
|
385
|
+
// The stream does not support passing an allocator instance, and alternatives are complicated.
|
386
|
+
std::ostringstream os;
|
385
387
|
os << "### CPC sketch summary:" << std::endl;
|
386
388
|
os << " lg_k : " << std::to_string(lg_k) << std::endl;
|
387
389
|
os << " seed hash : " << std::hex << compute_seed_hash(seed) << std::dec << std::endl;
|
@@ -392,14 +394,14 @@ string<A> cpc_sketch_alloc<A>::to_string() const {
|
|
392
394
|
os << " HIP estimate : " << hip_est_accum << std::endl;
|
393
395
|
os << " kxp : " << kxp << std::endl;
|
394
396
|
}
|
395
|
-
os << "
|
397
|
+
os << " interesting col: " << std::to_string(first_interesting_column) << std::endl;
|
396
398
|
os << " table entries : " << surprising_value_table.get_num_items() << std::endl;
|
397
399
|
os << " window : " << (sliding_window.size() == 0 ? "not " : "") << "allocated" << std::endl;
|
398
400
|
if (sliding_window.size() > 0) {
|
399
401
|
os << " window offset : " << std::to_string(window_offset) << std::endl;
|
400
402
|
}
|
401
403
|
os << "### End sketch summary" << std::endl;
|
402
|
-
return os.str();
|
404
|
+
return string<A>(os.str().c_str(), sliding_window.get_allocator());
|
403
405
|
}
|
404
406
|
|
405
407
|
template<typename A>
|
@@ -34,7 +34,7 @@ bit_matrix(allocator)
|
|
34
34
|
if (lg_k < CPC_MIN_LG_K || lg_k > CPC_MAX_LG_K) {
|
35
35
|
throw std::invalid_argument("lg_k must be >= " + std::to_string(CPC_MIN_LG_K) + " and <= " + std::to_string(CPC_MAX_LG_K) + ": " + std::to_string(lg_k));
|
36
36
|
}
|
37
|
-
accumulator = new (AllocCpc().allocate(1)) cpc_sketch_alloc<A>(lg_k, seed, allocator);
|
37
|
+
accumulator = new (AllocCpc(allocator).allocate(1)) cpc_sketch_alloc<A>(lg_k, seed, allocator);
|
38
38
|
}
|
39
39
|
|
40
40
|
template<typename A>
|
@@ -45,7 +45,7 @@ accumulator(other.accumulator),
|
|
45
45
|
bit_matrix(other.bit_matrix)
|
46
46
|
{
|
47
47
|
if (accumulator != nullptr) {
|
48
|
-
accumulator = new (AllocCpc().allocate(1)) cpc_sketch_alloc<A>(*other.accumulator);
|
48
|
+
accumulator = new (AllocCpc(accumulator->get_allocator()).allocate(1)) cpc_sketch_alloc<A>(*other.accumulator);
|
49
49
|
}
|
50
50
|
}
|
51
51
|
|
@@ -62,8 +62,9 @@ bit_matrix(std::move(other.bit_matrix))
|
|
62
62
|
template<typename A>
|
63
63
|
cpc_union_alloc<A>::~cpc_union_alloc() {
|
64
64
|
if (accumulator != nullptr) {
|
65
|
+
AllocCpc allocator(accumulator->get_allocator());
|
65
66
|
accumulator->~cpc_sketch_alloc<A>();
|
66
|
-
|
67
|
+
allocator.deallocate(accumulator, 1);
|
67
68
|
}
|
68
69
|
}
|
69
70
|
|
@@ -181,7 +182,7 @@ template<typename A>
|
|
181
182
|
cpc_sketch_alloc<A> cpc_union_alloc<A>::get_result_from_accumulator() const {
|
182
183
|
if (lg_k != accumulator->get_lg_k()) throw std::logic_error("lg_k != accumulator->lg_k");
|
183
184
|
if (accumulator->get_num_coupons() == 0) {
|
184
|
-
return cpc_sketch_alloc<A>(lg_k, seed);
|
185
|
+
return cpc_sketch_alloc<A>(lg_k, seed, accumulator->get_allocator());
|
185
186
|
}
|
186
187
|
if (accumulator->determine_flavor() != cpc_sketch_alloc<A>::flavor::SPARSE) throw std::logic_error("wrong flavor");
|
187
188
|
cpc_sketch_alloc<A> copy(*accumulator);
|
@@ -242,8 +243,9 @@ cpc_sketch_alloc<A> cpc_union_alloc<A>::get_result_from_bit_matrix() const {
|
|
242
243
|
template<typename A>
|
243
244
|
void cpc_union_alloc<A>::switch_to_bit_matrix() {
|
244
245
|
bit_matrix = accumulator->build_bit_matrix();
|
246
|
+
AllocCpc allocator(accumulator->get_allocator());
|
245
247
|
accumulator->~cpc_sketch_alloc<A>();
|
246
|
-
|
248
|
+
allocator.deallocate(accumulator, 1);
|
247
249
|
accumulator = nullptr;
|
248
250
|
}
|
249
251
|
|
@@ -324,7 +326,7 @@ void cpc_union_alloc<A>::reduce_k(uint8_t new_lg_k) {
|
|
324
326
|
if (bit_matrix.size() > 0) throw std::logic_error("bit_matrix is not expected");
|
325
327
|
if (!accumulator->is_empty()) {
|
326
328
|
cpc_sketch_alloc<A> old_accumulator(*accumulator);
|
327
|
-
*accumulator = cpc_sketch_alloc<A>(new_lg_k, seed);
|
329
|
+
*accumulator = cpc_sketch_alloc<A>(new_lg_k, seed, old_accumulator.get_allocator());
|
328
330
|
walk_table_updating_sketch(old_accumulator.surprising_value_table);
|
329
331
|
}
|
330
332
|
lg_k = new_lg_k;
|
@@ -25,6 +25,7 @@
|
|
25
25
|
#include <catch.hpp>
|
26
26
|
|
27
27
|
#include "cpc_sketch.hpp"
|
28
|
+
#include "cpc_union.hpp"
|
28
29
|
#include "test_allocator.hpp"
|
29
30
|
|
30
31
|
namespace datasketches {
|
@@ -234,4 +235,20 @@ TEST_CASE("cpc sketch allocation: serialize deserialize sliding, bytes", "[cpc_s
|
|
234
235
|
REQUIRE(test_allocator_net_allocations == 0);
|
235
236
|
}
|
236
237
|
|
238
|
+
using cpc_union_test_alloc = cpc_union_alloc<test_allocator<uint8_t>>;
|
239
|
+
|
240
|
+
TEST_CASE("cpc sketch allocation: union") {
|
241
|
+
cpc_sketch_test_alloc s1(11, DEFAULT_SEED, 0);
|
242
|
+
s1.update(1);
|
243
|
+
|
244
|
+
cpc_sketch_test_alloc s2(11, DEFAULT_SEED, 0);
|
245
|
+
s2.update(2);
|
246
|
+
|
247
|
+
cpc_union_test_alloc u(11, DEFAULT_SEED, 0);
|
248
|
+
u.update(s1);
|
249
|
+
u.update(s2);
|
250
|
+
auto s3 = u.get_result();
|
251
|
+
REQUIRE_FALSE(s3.is_empty());
|
252
|
+
}
|
253
|
+
|
237
254
|
} /* namespace datasketches */
|
@@ -32,23 +32,13 @@ target_include_directories(fi
|
|
32
32
|
target_link_libraries(fi INTERFACE common)
|
33
33
|
target_compile_features(fi INTERFACE cxx_std_11)
|
34
34
|
|
35
|
-
set(fi_HEADERS "")
|
36
|
-
list(APPEND fi_HEADERS "include/frequent_items_sketch.hpp")
|
37
|
-
list(APPEND fi_HEADERS "include/frequent_items_sketch_impl.hpp")
|
38
|
-
list(APPEND fi_HEADERS "include/reverse_purge_hash_map.hpp")
|
39
|
-
list(APPEND fi_HEADERS "include/reverse_purge_hash_map_impl.hpp")
|
40
|
-
|
41
35
|
install(TARGETS fi
|
42
36
|
EXPORT ${PROJECT_NAME}
|
43
37
|
)
|
44
38
|
|
45
|
-
install(FILES
|
39
|
+
install(FILES
|
40
|
+
include/frequent_items_sketch.hpp
|
41
|
+
include/frequent_items_sketch_impl.hpp
|
42
|
+
include/reverse_purge_hash_map.hpp
|
43
|
+
include/reverse_purge_hash_map_impl.hpp
|
46
44
|
DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/DataSketches")
|
47
|
-
|
48
|
-
target_sources(fi
|
49
|
-
INTERFACE
|
50
|
-
${CMAKE_CURRENT_SOURCE_DIR}/include/frequent_items_sketch.hpp
|
51
|
-
${CMAKE_CURRENT_SOURCE_DIR}/include/frequent_items_sketch_impl.hpp
|
52
|
-
${CMAKE_CURRENT_SOURCE_DIR}/include/reverse_purge_hash_map.hpp
|
53
|
-
${CMAKE_CURRENT_SOURCE_DIR}/include/reverse_purge_hash_map_impl.hpp
|
54
|
-
)
|
@@ -421,7 +421,9 @@ void frequent_items_sketch<T, W, H, E, S, A>::check_size(uint8_t lg_cur_size, ui
|
|
421
421
|
|
422
422
|
template<typename T, typename W, typename H, typename E, typename S, typename A>
|
423
423
|
string<A> frequent_items_sketch<T, W, H, E, S, A>::to_string(bool print_items) const {
|
424
|
-
|
424
|
+
// Using a temporary stream for implementation here does not comply with AllocatorAwareContainer requirements.
|
425
|
+
// The stream does not support passing an allocator instance, and alternatives are complicated.
|
426
|
+
std::ostringstream os;
|
425
427
|
os << "### Frequent items sketch summary:" << std::endl;
|
426
428
|
os << " lg cur map size : " << (int) map.get_lg_cur_size() << std::endl;
|
427
429
|
os << " lg max map size : " << (int) map.get_lg_max_size() << std::endl;
|
@@ -444,7 +446,7 @@ string<A> frequent_items_sketch<T, W, H, E, S, A>::to_string(bool print_items) c
|
|
444
446
|
}
|
445
447
|
os << "### End items" << std::endl;
|
446
448
|
}
|
447
|
-
return os.str();
|
449
|
+
return string<A>(os.str().c_str(), map.get_allocator());
|
448
450
|
}
|
449
451
|
|
450
452
|
// version for integral signed type
|
@@ -32,64 +32,41 @@ target_include_directories(hll
|
|
32
32
|
target_link_libraries(hll INTERFACE common)
|
33
33
|
target_compile_features(hll INTERFACE cxx_std_11)
|
34
34
|
|
35
|
-
# TODO: would be useful if this didn't need to be reproduced in target_sources(), too
|
36
|
-
set(hll_HEADERS "")
|
37
|
-
list(APPEND hll_HEADERS "include/hll.hpp;include/AuxHashMap.hpp;include/CompositeInterpolationXTable.hpp")
|
38
|
-
list(APPEND hll_HEADERS "include/hll.private.hpp;include/HllSketchImplFactory.hpp")
|
39
|
-
list(APPEND hll_HEADERS "include/CouponHashSet.hpp;include/CouponList.hpp")
|
40
|
-
list(APPEND hll_HEADERS "include/CubicInterpolation.hpp;include/HarmonicNumbers.hpp;include/Hll4Array.hpp")
|
41
|
-
list(APPEND hll_HEADERS "include/Hll6Array.hpp;include/Hll8Array.hpp;include/HllArray.hpp")
|
42
|
-
list(APPEND hll_HEADERS "include/HllSketchImpl.hpp")
|
43
|
-
list(APPEND hll_HEADERS "include/HllUtil.hpp;include/coupon_iterator.hpp")
|
44
|
-
list(APPEND hll_HEADERS "include/RelativeErrorTables.hpp;include/AuxHashMap-internal.hpp")
|
45
|
-
list(APPEND hll_HEADERS "include/CompositeInterpolationXTable-internal.hpp")
|
46
|
-
list(APPEND hll_HEADERS "include/CouponHashSet-internal.hpp;include/CouponList-internal.hpp")
|
47
|
-
list(APPEND hll_HEADERS "include/CubicInterpolation-internal.hpp;include/HarmonicNumbers-internal.hpp")
|
48
|
-
list(APPEND hll_HEADERS "include/Hll4Array-internal.hpp;include/Hll6Array-internal.hpp")
|
49
|
-
list(APPEND hll_HEADERS "include/Hll8Array-internal.hpp;include/HllArray-internal.hpp")
|
50
|
-
list(APPEND hll_HEADERS "include/HllSketch-internal.hpp")
|
51
|
-
list(APPEND hll_HEADERS "include/HllSketchImpl-internal.hpp;include/HllUnion-internal.hpp")
|
52
|
-
list(APPEND hll_HEADERS "include/coupon_iterator-internal.hpp;include/RelativeErrorTables-internal.hpp")
|
53
|
-
|
54
35
|
install(TARGETS hll
|
55
36
|
EXPORT ${PROJECT_NAME}
|
56
37
|
)
|
57
38
|
|
58
|
-
install(FILES
|
39
|
+
install(FILES
|
40
|
+
include/hll.hpp
|
41
|
+
include/AuxHashMap.hpp
|
42
|
+
include/CompositeInterpolationXTable.hpp
|
43
|
+
include/hll.private.hpp
|
44
|
+
include/HllSketchImplFactory.hpp
|
45
|
+
include/CouponHashSet.hpp
|
46
|
+
include/CouponList.hpp
|
47
|
+
include/CubicInterpolation.hpp
|
48
|
+
include/HarmonicNumbers.hpp
|
49
|
+
include/Hll4Array.hpp
|
50
|
+
include/Hll6Array.hpp
|
51
|
+
include/Hll8Array.hpp
|
52
|
+
include/HllArray.hpp
|
53
|
+
include/HllSketchImpl.hpp
|
54
|
+
include/HllUtil.hpp
|
55
|
+
include/coupon_iterator.hpp
|
56
|
+
include/RelativeErrorTables.hpp
|
57
|
+
include/AuxHashMap-internal.hpp
|
58
|
+
include/CompositeInterpolationXTable-internal.hpp
|
59
|
+
include/CouponHashSet-internal.hpp
|
60
|
+
include/CouponList-internal.hpp
|
61
|
+
include/CubicInterpolation-internal.hpp
|
62
|
+
include/HarmonicNumbers-internal.hpp
|
63
|
+
include/Hll4Array-internal.hpp
|
64
|
+
include/Hll6Array-internal.hpp
|
65
|
+
include/Hll8Array-internal.hpp
|
66
|
+
include/HllArray-internal.hpp
|
67
|
+
include/HllSketch-internal.hpp
|
68
|
+
include/HllSketchImpl-internal.hpp
|
69
|
+
include/HllUnion-internal.hpp
|
70
|
+
include/coupon_iterator-internal.hpp
|
71
|
+
include/RelativeErrorTables-internal.hpp
|
59
72
|
DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/DataSketches")
|
60
|
-
|
61
|
-
target_sources(hll
|
62
|
-
INTERFACE
|
63
|
-
${CMAKE_CURRENT_SOURCE_DIR}/include/hll.hpp
|
64
|
-
${CMAKE_CURRENT_SOURCE_DIR}/include/hll.private.hpp
|
65
|
-
${CMAKE_CURRENT_SOURCE_DIR}/include/AuxHashMap.hpp
|
66
|
-
${CMAKE_CURRENT_SOURCE_DIR}/include/CompositeInterpolationXTable.hpp
|
67
|
-
${CMAKE_CURRENT_SOURCE_DIR}/include/CouponHashSet.hpp
|
68
|
-
${CMAKE_CURRENT_SOURCE_DIR}/include/CouponList.hpp
|
69
|
-
${CMAKE_CURRENT_SOURCE_DIR}/include/CubicInterpolation.hpp
|
70
|
-
${CMAKE_CURRENT_SOURCE_DIR}/include/HarmonicNumbers.hpp
|
71
|
-
${CMAKE_CURRENT_SOURCE_DIR}/include/Hll4Array.hpp
|
72
|
-
${CMAKE_CURRENT_SOURCE_DIR}/include/Hll6Array.hpp
|
73
|
-
${CMAKE_CURRENT_SOURCE_DIR}/include/Hll8Array.hpp
|
74
|
-
${CMAKE_CURRENT_SOURCE_DIR}/include/HllArray.hpp
|
75
|
-
${CMAKE_CURRENT_SOURCE_DIR}/include/HllSketchImpl.hpp
|
76
|
-
${CMAKE_CURRENT_SOURCE_DIR}/include/HllSketchImplFactory.hpp
|
77
|
-
${CMAKE_CURRENT_SOURCE_DIR}/include/HllUtil.hpp
|
78
|
-
${CMAKE_CURRENT_SOURCE_DIR}/include/RelativeErrorTables.hpp
|
79
|
-
${CMAKE_CURRENT_SOURCE_DIR}/include/coupon_iterator.hpp
|
80
|
-
${CMAKE_CURRENT_SOURCE_DIR}/include/AuxHashMap-internal.hpp
|
81
|
-
${CMAKE_CURRENT_SOURCE_DIR}/include/CompositeInterpolationXTable-internal.hpp
|
82
|
-
${CMAKE_CURRENT_SOURCE_DIR}/include/CouponHashSet-internal.hpp
|
83
|
-
${CMAKE_CURRENT_SOURCE_DIR}/include/CouponList-internal.hpp
|
84
|
-
${CMAKE_CURRENT_SOURCE_DIR}/include/CubicInterpolation-internal.hpp
|
85
|
-
${CMAKE_CURRENT_SOURCE_DIR}/include/HarmonicNumbers-internal.hpp
|
86
|
-
${CMAKE_CURRENT_SOURCE_DIR}/include/Hll4Array-internal.hpp
|
87
|
-
${CMAKE_CURRENT_SOURCE_DIR}/include/Hll6Array-internal.hpp
|
88
|
-
${CMAKE_CURRENT_SOURCE_DIR}/include/Hll8Array-internal.hpp
|
89
|
-
${CMAKE_CURRENT_SOURCE_DIR}/include/HllArray-internal.hpp
|
90
|
-
${CMAKE_CURRENT_SOURCE_DIR}/include/HllSketch-internal.hpp
|
91
|
-
${CMAKE_CURRENT_SOURCE_DIR}/include/HllSketchImpl-internal.hpp
|
92
|
-
${CMAKE_CURRENT_SOURCE_DIR}/include/HllUnion-internal.hpp
|
93
|
-
${CMAKE_CURRENT_SOURCE_DIR}/include/RelativeErrorTables-internal.hpp
|
94
|
-
${CMAKE_CURRENT_SOURCE_DIR}/include/coupon_iterator-internal.hpp
|
95
|
-
)
|
@@ -246,10 +246,12 @@ string<A> hll_sketch_alloc<A>::to_string(const bool summary,
|
|
246
246
|
const bool detail,
|
247
247
|
const bool aux_detail,
|
248
248
|
const bool all) const {
|
249
|
-
|
249
|
+
// Using a temporary stream for implementation here does not comply with AllocatorAwareContainer requirements.
|
250
|
+
// The stream does not support passing an allocator instance, and alternatives are complicated.
|
251
|
+
std::stringstream os;
|
250
252
|
if (summary) {
|
251
253
|
os << "### HLL sketch summary:" << std::endl
|
252
|
-
<< " Log Config K : " << get_lg_config_k() << std::endl
|
254
|
+
<< " Log Config K : " << std::to_string(get_lg_config_k()) << std::endl
|
253
255
|
<< " Hll Target : " << type_as_string() << std::endl
|
254
256
|
<< " Current Mode : " << mode_as_string() << std::endl
|
255
257
|
<< " LB : " << get_lower_bound(1) << std::endl
|
@@ -258,7 +260,7 @@ string<A> hll_sketch_alloc<A>::to_string(const bool summary,
|
|
258
260
|
<< " OutOfOrder flag: " << (is_out_of_order_flag() ? "true" : "false") << std::endl;
|
259
261
|
if (get_current_mode() == HLL) {
|
260
262
|
HllArray<A>* hllArray = (HllArray<A>*) sketch_impl;
|
261
|
-
os << " CurMin : " << hllArray->getCurMin() << std::endl
|
263
|
+
os << " CurMin : " << std::to_string(hllArray->getCurMin()) << std::endl
|
262
264
|
<< " NumAtCurMin : " << hllArray->getNumAtCurMin() << std::endl
|
263
265
|
<< " HipAccum : " << hllArray->getHipAccum() << std::endl
|
264
266
|
<< " KxQ0 : " << hllArray->getKxQ0() << std::endl
|
@@ -338,7 +340,7 @@ string<A> hll_sketch_alloc<A>::to_string(const bool summary,
|
|
338
340
|
}
|
339
341
|
}
|
340
342
|
|
341
|
-
return os.str();
|
343
|
+
return string<A>(os.str().c_str(), sketch_impl->getAllocator());
|
342
344
|
}
|
343
345
|
|
344
346
|
template<typename A>
|