datasketches 0.2.3 → 0.2.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +4 -0
- data/README.md +7 -7
- data/ext/datasketches/theta_wrapper.cpp +20 -4
- data/lib/datasketches/version.rb +1 -1
- data/vendor/datasketches-cpp/CMakeLists.txt +22 -3
- data/vendor/datasketches-cpp/MANIFEST.in +3 -0
- data/vendor/datasketches-cpp/README.md +76 -9
- data/vendor/datasketches-cpp/cmake/DataSketchesConfig.cmake.in +10 -0
- data/vendor/datasketches-cpp/common/CMakeLists.txt +14 -13
- data/vendor/datasketches-cpp/cpc/CMakeLists.txt +15 -35
- data/vendor/datasketches-cpp/cpc/include/cpc_sketch_impl.hpp +5 -3
- data/vendor/datasketches-cpp/cpc/include/cpc_union_impl.hpp +8 -6
- data/vendor/datasketches-cpp/cpc/test/cpc_sketch_allocation_test.cpp +17 -0
- data/vendor/datasketches-cpp/fi/CMakeLists.txt +5 -15
- data/vendor/datasketches-cpp/fi/include/frequent_items_sketch_impl.hpp +4 -2
- data/vendor/datasketches-cpp/hll/CMakeLists.txt +33 -56
- data/vendor/datasketches-cpp/hll/include/HllSketch-internal.hpp +6 -4
- data/vendor/datasketches-cpp/kll/CMakeLists.txt +9 -19
- data/vendor/datasketches-cpp/kll/include/kll_sketch_impl.hpp +4 -2
- data/vendor/datasketches-cpp/req/CMakeLists.txt +8 -21
- data/vendor/datasketches-cpp/req/include/req_sketch_impl.hpp +4 -2
- data/vendor/datasketches-cpp/sampling/CMakeLists.txt +5 -9
- data/vendor/datasketches-cpp/sampling/include/var_opt_sketch_impl.hpp +13 -7
- data/vendor/datasketches-cpp/sampling/include/var_opt_union_impl.hpp +8 -6
- data/vendor/datasketches-cpp/setup.py +1 -1
- data/vendor/datasketches-cpp/theta/CMakeLists.txt +26 -45
- data/vendor/datasketches-cpp/theta/include/compact_theta_sketch_parser_impl.hpp +89 -22
- data/vendor/datasketches-cpp/theta/include/theta_helpers.hpp +15 -0
- data/vendor/datasketches-cpp/theta/include/theta_intersection_base_impl.hpp +6 -6
- data/vendor/datasketches-cpp/theta/include/theta_set_difference_base_impl.hpp +2 -2
- data/vendor/datasketches-cpp/theta/include/theta_sketch.hpp +32 -15
- data/vendor/datasketches-cpp/theta/include/theta_sketch_impl.hpp +146 -51
- data/vendor/datasketches-cpp/theta/include/theta_union.hpp +6 -1
- data/vendor/datasketches-cpp/theta/include/theta_union_base.hpp +3 -1
- data/vendor/datasketches-cpp/theta/include/theta_union_base_impl.hpp +8 -2
- data/vendor/datasketches-cpp/theta/include/theta_union_impl.hpp +8 -5
- data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base.hpp +5 -4
- data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base_impl.hpp +33 -9
- data/vendor/datasketches-cpp/theta/test/CMakeLists.txt +1 -0
- data/vendor/datasketches-cpp/theta/test/theta_compact_empty_from_java_v1.sk +0 -0
- data/vendor/datasketches-cpp/theta/test/theta_compact_empty_from_java_v2.sk +0 -0
- data/vendor/datasketches-cpp/theta/test/theta_compact_estimation_from_java_v1.sk +0 -0
- data/vendor/datasketches-cpp/theta/test/theta_compact_estimation_from_java_v2.sk +0 -0
- data/vendor/datasketches-cpp/theta/test/theta_compact_exact_from_java.sk +0 -0
- data/vendor/datasketches-cpp/theta/test/theta_setop_test.cpp +445 -0
- data/vendor/datasketches-cpp/theta/test/theta_sketch_test.cpp +400 -0
- data/vendor/datasketches-cpp/theta/test/theta_union_test.cpp +23 -11
- data/vendor/datasketches-cpp/tuple/CMakeLists.txt +18 -33
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_sketch.hpp +1 -1
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_sketch_impl.hpp +3 -3
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_union.hpp +1 -1
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_union_impl.hpp +3 -3
- data/vendor/datasketches-cpp/tuple/include/tuple_sketch.hpp +13 -9
- data/vendor/datasketches-cpp/tuple/include/tuple_sketch_impl.hpp +33 -14
- data/vendor/datasketches-cpp/tuple/include/tuple_union.hpp +6 -1
- data/vendor/datasketches-cpp/tuple/include/tuple_union_impl.hpp +8 -3
- data/vendor/datasketches-cpp/tuple/test/array_of_doubles_sketch_test.cpp +16 -0
- data/vendor/datasketches-cpp/tuple/test/tuple_sketch_test.cpp +46 -8
- data/vendor/datasketches-cpp/tuple/test/tuple_union_test.cpp +7 -0
- metadata +11 -6
- data/vendor/datasketches-cpp/theta/test/theta_update_empty_from_java.sk +0 -0
- data/vendor/datasketches-cpp/theta/test/theta_update_estimation_from_java.sk +0 -0
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 897dbc30f97ce17f0415630b6347a0092dac05196b0ef61e80939410d65cdf17
|
4
|
+
data.tar.gz: 61302f9cadde8a8badc97b455eb5c32d913c3b1fea8ed571e2da93a29e65afa9
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 4d541ba7f96a86f3f8de44f069f6e39d51ba6f28fa5d8c8d1d99a8434a95c5fe1a26470e6b062f348808fe5c0a444134d0dc96385437b4cb946c4a92044a2a5c
|
7
|
+
data.tar.gz: bc1bdacb7cbe69f9bb1382fd2ac7019bec04baf444dc963d63a594e989fd201d9eb9aadd0e463ac4efef8f7ba53915a594d8fb00f74ae295674b9024269a0406
|
data/CHANGELOG.md
CHANGED
data/README.md
CHANGED
@@ -1,8 +1,8 @@
|
|
1
|
-
# DataSketches
|
1
|
+
# DataSketches Ruby
|
2
2
|
|
3
3
|
[DataSketches](https://datasketches.apache.org/) - sketch data structures - for Ruby
|
4
4
|
|
5
|
-
[](https://github.com/ankane/datasketches/actions)
|
5
|
+
[](https://github.com/ankane/datasketches-ruby/actions)
|
6
6
|
|
7
7
|
## Installation
|
8
8
|
|
@@ -292,22 +292,22 @@ This library is modeled after the DataSketches [Python API](https://github.com/a
|
|
292
292
|
|
293
293
|
## History
|
294
294
|
|
295
|
-
View the [changelog](https://github.com/ankane/datasketches/blob/master/CHANGELOG.md)
|
295
|
+
View the [changelog](https://github.com/ankane/datasketches-ruby/blob/master/CHANGELOG.md)
|
296
296
|
|
297
297
|
## Contributing
|
298
298
|
|
299
299
|
Everyone is encouraged to help improve this project. Here are a few ways you can help:
|
300
300
|
|
301
|
-
- [Report bugs](https://github.com/ankane/datasketches/issues)
|
302
|
-
- Fix bugs and [submit pull requests](https://github.com/ankane/datasketches/pulls)
|
301
|
+
- [Report bugs](https://github.com/ankane/datasketches-ruby/issues)
|
302
|
+
- Fix bugs and [submit pull requests](https://github.com/ankane/datasketches-ruby/pulls)
|
303
303
|
- Write, clarify, or fix documentation
|
304
304
|
- Suggest or add new features
|
305
305
|
|
306
306
|
To get started with development:
|
307
307
|
|
308
308
|
```sh
|
309
|
-
git clone --recursive https://github.com/ankane/datasketches.git
|
310
|
-
cd datasketches
|
309
|
+
git clone --recursive https://github.com/ankane/datasketches-ruby.git
|
310
|
+
cd datasketches-ruby
|
311
311
|
bundle install
|
312
312
|
bundle exec rake compile
|
313
313
|
bundle exec rake test
|
@@ -20,10 +20,26 @@ using Rice::Arg;
|
|
20
20
|
|
21
21
|
void init_theta(Rice::Module& m) {
|
22
22
|
Rice::define_class_under<theta_sketch>(m, "ThetaSketch")
|
23
|
-
.define_method(
|
24
|
-
|
25
|
-
|
26
|
-
|
23
|
+
.define_method(
|
24
|
+
"empty?",
|
25
|
+
[](theta_sketch& self) {
|
26
|
+
return self.is_empty();
|
27
|
+
})
|
28
|
+
.define_method(
|
29
|
+
"estimate",
|
30
|
+
[](theta_sketch& self) {
|
31
|
+
return self.get_estimate();
|
32
|
+
})
|
33
|
+
.define_method(
|
34
|
+
"lower_bound",
|
35
|
+
[](theta_sketch& self, uint8_t num_std_devs) {
|
36
|
+
return self.get_lower_bound(num_std_devs);
|
37
|
+
})
|
38
|
+
.define_method(
|
39
|
+
"upper_bound",
|
40
|
+
[](theta_sketch& self, uint8_t num_std_devs) {
|
41
|
+
return self.get_upper_bound(num_std_devs);
|
42
|
+
});
|
27
43
|
|
28
44
|
Rice::define_class_under<compact_theta_sketch, theta_sketch>(m, "CompactThetaSketch")
|
29
45
|
.define_singleton_function(
|
data/lib/datasketches/version.rb
CHANGED
@@ -17,7 +17,7 @@
|
|
17
17
|
|
18
18
|
cmake_minimum_required(VERSION 3.12.0)
|
19
19
|
project(DataSketches
|
20
|
-
VERSION
|
20
|
+
VERSION 3.2.0
|
21
21
|
LANGUAGES CXX)
|
22
22
|
|
23
23
|
include(GNUInstallDirs)
|
@@ -126,11 +126,30 @@ endif()
|
|
126
126
|
|
127
127
|
# # Installation
|
128
128
|
install(TARGETS datasketches
|
129
|
-
EXPORT ${
|
129
|
+
EXPORT ${PROJECT_NAME}
|
130
130
|
PUBLIC_HEADER DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/DataSketches
|
131
131
|
INCLUDES DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/DataSketches
|
132
132
|
)
|
133
133
|
|
134
|
+
# Packaging
|
135
|
+
include(CMakePackageConfigHelpers)
|
136
|
+
write_basic_package_version_file(
|
137
|
+
"${CMAKE_CURRENT_BINARY_DIR}/DataSketchesConfigVersion.cmake"
|
138
|
+
VERSION ${PROJECT_VERSION}
|
139
|
+
COMPATIBILITY SameMajorVersion
|
140
|
+
)
|
141
|
+
configure_package_config_file(
|
142
|
+
cmake/DataSketchesConfig.cmake.in
|
143
|
+
"${CMAKE_CURRENT_BINARY_DIR}/DataSketchesConfig.cmake"
|
144
|
+
INSTALL_DESTINATION lib/DataSketches/cmake
|
145
|
+
PATH_VARS CMAKE_INSTALL_INCLUDEDIR
|
146
|
+
)
|
147
|
+
install(EXPORT ${PROJECT_NAME} DESTINATION lib/DataSketches/cmake)
|
148
|
+
install(FILES "${CMAKE_CURRENT_BINARY_DIR}/DataSketchesConfigVersion.cmake"
|
149
|
+
"${CMAKE_CURRENT_BINARY_DIR}/DataSketchesConfig.cmake"
|
150
|
+
DESTINATION lib/DataSketches/cmake)
|
151
|
+
|
152
|
+
|
134
153
|
#set(CPACK_PROJECT_NAME ${PROJECT_NAME})
|
135
154
|
#set(CPACK_PROJECT_VERSION ${PROJECT_VERSION})
|
136
|
-
|
155
|
+
include(CPack)
|
@@ -25,18 +25,85 @@ Installing the latest cmake on OSX: brew install cmake
|
|
25
25
|
Building and running unit tests using cmake for OSX and Linux:
|
26
26
|
|
27
27
|
```
|
28
|
-
|
29
|
-
|
30
|
-
$ make
|
31
|
-
$ make test
|
28
|
+
$ cmake -S . -B build/Release -DCMAKE_BUILD_TYPE=Release
|
29
|
+
$ cmake --build build/Release -t all test
|
32
30
|
```
|
33
31
|
|
34
32
|
Building and running unit tests using cmake for Windows from the command line:
|
35
33
|
|
36
34
|
```
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
35
|
+
$ cd build
|
36
|
+
$ cmake ..
|
37
|
+
$ cd ..
|
38
|
+
$ cmake --build build --config Release
|
39
|
+
$ cmake --build build --config Release --target RUN_TESTS
|
42
40
|
```
|
41
|
+
|
42
|
+
To install a local distribution (OSX and Linux), use the following command. The
|
43
|
+
CMAKE_INSTALL_PREFIX variable controls the destination. If not specified, it
|
44
|
+
defaults to installing in /usr (/usr/include, /usr/lib, etc). In the command below,
|
45
|
+
the installation will be in /tmp/install/DataSketches (/tmp/install/DataSketches/include,
|
46
|
+
/tmp/install/DataSketches/lib, etc)
|
47
|
+
|
48
|
+
```
|
49
|
+
$ cmake -S . -B build/Release -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=/tmp/install/DataSketches
|
50
|
+
$ cmake --build build/Release -t install
|
51
|
+
```
|
52
|
+
|
53
|
+
To generate an installable package using cmake's built in cpack packaging tool,
|
54
|
+
use the following command. The type of packaging is controlled by the CPACK_GENERATOR
|
55
|
+
variable (semi-colon separated list). Cmake usually supports packaging types such as RPM,
|
56
|
+
DEB, STGZ, TGZ, TZ, ZIP, etc.
|
57
|
+
|
58
|
+
```
|
59
|
+
$ cmake3 -S . -B build/Release -DCMAKE_BUILD_TYPE=Release -DCPACK_GENERATOR="RPM;STGZ;TGZ"
|
60
|
+
$ cmake3 --build build/Release -t package
|
61
|
+
```
|
62
|
+
|
63
|
+
The DataSketches project can be included in other projects' CMakeLists.txt files in one of two ways.
|
64
|
+
If DataSketches has been installed on the host (using an RPM, DEB, "make install" into /usr/local, or some
|
65
|
+
way, then CMake's `find_package` command can be used like this:
|
66
|
+
|
67
|
+
```
|
68
|
+
find_package(DataSketches 3.2 REQUIRED)
|
69
|
+
target_link_library(my_dependent_target PUBLIC ${DATASKETCHES_LIB})
|
70
|
+
```
|
71
|
+
|
72
|
+
When used with find_package, DataSketches exports several variables, including
|
73
|
+
|
74
|
+
- `DATASKETCHES_VERSION`: The version number of the datasketches package that was imported.
|
75
|
+
- `DATASKETCHES_INCLUDE_DIR`: The directory that should be added to access DataSketches include files.
|
76
|
+
Because cmake automatically includes the interface directories for included target libraries when
|
77
|
+
using `target_link_library`, under normal circumstances there will be no need to include this directly.
|
78
|
+
- `DATASKETCHES_LIB`: The name of the DataSketches target to include as a dependency. Projects pulling
|
79
|
+
in DataSketches should reference this with `target_link_library` in order to set up all the correct dependencies
|
80
|
+
and include paths.
|
81
|
+
|
82
|
+
If you don't have DataSketches installed locally, dependent projects can pull it directly
|
83
|
+
from GitHub using CMake's `ExternalProject` module. The code would look something like this:
|
84
|
+
|
85
|
+
```
|
86
|
+
cmake_policy(SET CMP0097 NEW)
|
87
|
+
include(ExternalProject)
|
88
|
+
ExternalProject_Add(datasketches
|
89
|
+
GIT_REPOSITORY https://github.com/apache/datasketches-cpp.git
|
90
|
+
GIT_TAG 3.2.0
|
91
|
+
GIT_SHALLOW true
|
92
|
+
GIT_SUBMODULES ""
|
93
|
+
INSTALL_DIR /tmp/datasketches-prefix
|
94
|
+
CMAKE_ARGS -DBUILD_TESTS=OFF -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} -DCMAKE_INSTALL_PREFIX=/tmp/datasketches-prefix
|
95
|
+
|
96
|
+
# Override the install command to add DESTDIR
|
97
|
+
# This is necessary to work around an oddity in the RPM (but not other) package
|
98
|
+
# generation, as CMake otherwise picks up the Datasketch files when building
|
99
|
+
# an RPM for a dependent package. (RPM scans the directory for files in addition to installing
|
100
|
+
# those files referenced in an "install" rule in the cmake file)
|
101
|
+
INSTALL_COMMAND env DESTDIR= ${CMAKE_COMMAND} --build . --target install
|
102
|
+
)
|
103
|
+
ExternalProject_Get_property(datasketches INSTALL_DIR)
|
104
|
+
set(datasketches_INSTALL_DIR ${INSTALL_DIR})
|
105
|
+
message("Source dir of datasketches = ${datasketches_INSTALL_DIR}")
|
106
|
+
target_include_directories(my_dependent_target
|
107
|
+
PRIVATE ${datasketches_INSTALL_DIR}/include/DataSketches)
|
108
|
+
add_dependencies(my_dependent_target datasketches)
|
109
|
+
```
|
@@ -0,0 +1,10 @@
|
|
1
|
+
set(DATASKETCHES_VERSION "@PROJECT_VERSION@")
|
2
|
+
|
3
|
+
@PACKAGE_INIT@
|
4
|
+
|
5
|
+
include("${CMAKE_CURRENT_LIST_DIR}/DataSketches.cmake")
|
6
|
+
|
7
|
+
set_and_check(DATASKETCHES_INCLUDE_DIR "@PACKAGE_CMAKE_INSTALL_INCLUDEDIR@/DataSketches")
|
8
|
+
set(DATASKETCHES_LIB "datasketches")
|
9
|
+
|
10
|
+
check_required_components("@PROJECT_NAME@")
|
@@ -29,17 +29,18 @@ target_include_directories(common
|
|
29
29
|
|
30
30
|
target_compile_features(common INTERFACE cxx_std_11)
|
31
31
|
|
32
|
-
|
33
|
-
INTERFACE
|
34
|
-
${CMAKE_CURRENT_SOURCE_DIR}/include/common_defs.hpp
|
35
|
-
${CMAKE_CURRENT_SOURCE_DIR}/include/memory_operations.hpp
|
36
|
-
${CMAKE_CURRENT_SOURCE_DIR}/include/MurmurHash3.h
|
37
|
-
${CMAKE_CURRENT_SOURCE_DIR}/include/serde.hpp
|
38
|
-
${CMAKE_CURRENT_SOURCE_DIR}/include/count_zeros.hpp
|
39
|
-
${CMAKE_CURRENT_SOURCE_DIR}/include/inv_pow2_table.hpp
|
40
|
-
${CMAKE_CURRENT_SOURCE_DIR}/include/binomial_bounds.hpp
|
41
|
-
${CMAKE_CURRENT_SOURCE_DIR}/include/conditional_back_inserter.hpp
|
42
|
-
${CMAKE_CURRENT_SOURCE_DIR}/include/conditional_forward.hpp
|
43
|
-
${CMAKE_CURRENT_SOURCE_DIR}/include/ceiling_power_of_2.hpp
|
44
|
-
)
|
32
|
+
install(TARGETS common EXPORT ${PROJECT_NAME})
|
45
33
|
|
34
|
+
install(FILES
|
35
|
+
include/common_defs.hpp
|
36
|
+
include/memory_operations.hpp
|
37
|
+
include/MurmurHash3.h
|
38
|
+
include/serde.hpp
|
39
|
+
include/count_zeros.hpp
|
40
|
+
include/inv_pow2_table.hpp
|
41
|
+
include/binomial_bounds.hpp
|
42
|
+
include/conditional_back_inserter.hpp
|
43
|
+
include/conditional_forward.hpp
|
44
|
+
include/ceiling_power_of_2.hpp
|
45
|
+
include/bounds_binomial_proportions.hpp
|
46
|
+
DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/DataSketches")
|
@@ -32,43 +32,23 @@ target_include_directories(cpc
|
|
32
32
|
target_link_libraries(cpc INTERFACE common)
|
33
33
|
target_compile_features(cpc INTERFACE cxx_std_11)
|
34
34
|
|
35
|
-
set(cpc_HEADERS "")
|
36
|
-
list(APPEND cpc_HEADERS "include/compression_data.hpp")
|
37
|
-
list(APPEND cpc_HEADERS "include/cpc_common.hpp")
|
38
|
-
list(APPEND cpc_HEADERS "include/cpc_compressor.hpp")
|
39
|
-
list(APPEND cpc_HEADERS "include/cpc_compressor_impl.hpp")
|
40
|
-
list(APPEND cpc_HEADERS "include/cpc_confidence.hpp")
|
41
|
-
list(APPEND cpc_HEADERS "include/cpc_sketch.hpp")
|
42
|
-
list(APPEND cpc_HEADERS "include/cpc_sketch_impl.hpp")
|
43
|
-
list(APPEND cpc_HEADERS "include/cpc_union.hpp")
|
44
|
-
list(APPEND cpc_HEADERS "include/cpc_union_impl.hpp")
|
45
|
-
list(APPEND cpc_HEADERS "include/cpc_util.hpp")
|
46
|
-
list(APPEND cpc_HEADERS "include/icon_estimator.hpp")
|
47
|
-
list(APPEND cpc_HEADERS "include/kxp_byte_lookup.hpp")
|
48
|
-
list(APPEND cpc_HEADERS "include/u32_table.hpp")
|
49
|
-
list(APPEND cpc_HEADERS "include/u32_table_impl.hpp")
|
50
|
-
|
51
35
|
install(TARGETS cpc
|
52
36
|
EXPORT ${PROJECT_NAME}
|
53
37
|
)
|
54
38
|
|
55
|
-
install(FILES
|
39
|
+
install(FILES
|
40
|
+
include/compression_data.hpp
|
41
|
+
include/cpc_common.hpp
|
42
|
+
include/cpc_compressor.hpp
|
43
|
+
include/cpc_compressor_impl.hpp
|
44
|
+
include/cpc_confidence.hpp
|
45
|
+
include/cpc_sketch.hpp
|
46
|
+
include/cpc_sketch_impl.hpp
|
47
|
+
include/cpc_union.hpp
|
48
|
+
include/cpc_union_impl.hpp
|
49
|
+
include/cpc_util.hpp
|
50
|
+
include/icon_estimator.hpp
|
51
|
+
include/kxp_byte_lookup.hpp
|
52
|
+
include/u32_table.hpp
|
53
|
+
include/u32_table_impl.hpp
|
56
54
|
DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/DataSketches")
|
57
|
-
|
58
|
-
target_sources(cpc
|
59
|
-
INTERFACE
|
60
|
-
${CMAKE_CURRENT_SOURCE_DIR}/include/compression_data.hpp
|
61
|
-
${CMAKE_CURRENT_SOURCE_DIR}/include/cpc_common.hpp
|
62
|
-
${CMAKE_CURRENT_SOURCE_DIR}/include/cpc_compressor.hpp
|
63
|
-
${CMAKE_CURRENT_SOURCE_DIR}/include/cpc_compressor_impl.hpp
|
64
|
-
${CMAKE_CURRENT_SOURCE_DIR}/include/cpc_confidence.hpp
|
65
|
-
${CMAKE_CURRENT_SOURCE_DIR}/include/cpc_sketch.hpp
|
66
|
-
${CMAKE_CURRENT_SOURCE_DIR}/include/cpc_sketch_impl.hpp
|
67
|
-
${CMAKE_CURRENT_SOURCE_DIR}/include/cpc_union.hpp
|
68
|
-
${CMAKE_CURRENT_SOURCE_DIR}/include/cpc_union_impl.hpp
|
69
|
-
${CMAKE_CURRENT_SOURCE_DIR}/include/cpc_util.hpp
|
70
|
-
${CMAKE_CURRENT_SOURCE_DIR}/include/icon_estimator.hpp
|
71
|
-
${CMAKE_CURRENT_SOURCE_DIR}/include/kxp_byte_lookup.hpp
|
72
|
-
${CMAKE_CURRENT_SOURCE_DIR}/include/u32_table.hpp
|
73
|
-
${CMAKE_CURRENT_SOURCE_DIR}/include/u32_table_impl.hpp
|
74
|
-
)
|
@@ -381,7 +381,9 @@ void cpc_sketch_alloc<A>::refresh_kxp(const uint64_t* bit_matrix) {
|
|
381
381
|
|
382
382
|
template<typename A>
|
383
383
|
string<A> cpc_sketch_alloc<A>::to_string() const {
|
384
|
-
|
384
|
+
// Using a temporary stream for implementation here does not comply with AllocatorAwareContainer requirements.
|
385
|
+
// The stream does not support passing an allocator instance, and alternatives are complicated.
|
386
|
+
std::ostringstream os;
|
385
387
|
os << "### CPC sketch summary:" << std::endl;
|
386
388
|
os << " lg_k : " << std::to_string(lg_k) << std::endl;
|
387
389
|
os << " seed hash : " << std::hex << compute_seed_hash(seed) << std::dec << std::endl;
|
@@ -392,14 +394,14 @@ string<A> cpc_sketch_alloc<A>::to_string() const {
|
|
392
394
|
os << " HIP estimate : " << hip_est_accum << std::endl;
|
393
395
|
os << " kxp : " << kxp << std::endl;
|
394
396
|
}
|
395
|
-
os << "
|
397
|
+
os << " interesting col: " << std::to_string(first_interesting_column) << std::endl;
|
396
398
|
os << " table entries : " << surprising_value_table.get_num_items() << std::endl;
|
397
399
|
os << " window : " << (sliding_window.size() == 0 ? "not " : "") << "allocated" << std::endl;
|
398
400
|
if (sliding_window.size() > 0) {
|
399
401
|
os << " window offset : " << std::to_string(window_offset) << std::endl;
|
400
402
|
}
|
401
403
|
os << "### End sketch summary" << std::endl;
|
402
|
-
return os.str();
|
404
|
+
return string<A>(os.str().c_str(), sliding_window.get_allocator());
|
403
405
|
}
|
404
406
|
|
405
407
|
template<typename A>
|
@@ -34,7 +34,7 @@ bit_matrix(allocator)
|
|
34
34
|
if (lg_k < CPC_MIN_LG_K || lg_k > CPC_MAX_LG_K) {
|
35
35
|
throw std::invalid_argument("lg_k must be >= " + std::to_string(CPC_MIN_LG_K) + " and <= " + std::to_string(CPC_MAX_LG_K) + ": " + std::to_string(lg_k));
|
36
36
|
}
|
37
|
-
accumulator = new (AllocCpc().allocate(1)) cpc_sketch_alloc<A>(lg_k, seed, allocator);
|
37
|
+
accumulator = new (AllocCpc(allocator).allocate(1)) cpc_sketch_alloc<A>(lg_k, seed, allocator);
|
38
38
|
}
|
39
39
|
|
40
40
|
template<typename A>
|
@@ -45,7 +45,7 @@ accumulator(other.accumulator),
|
|
45
45
|
bit_matrix(other.bit_matrix)
|
46
46
|
{
|
47
47
|
if (accumulator != nullptr) {
|
48
|
-
accumulator = new (AllocCpc().allocate(1)) cpc_sketch_alloc<A>(*other.accumulator);
|
48
|
+
accumulator = new (AllocCpc(accumulator->get_allocator()).allocate(1)) cpc_sketch_alloc<A>(*other.accumulator);
|
49
49
|
}
|
50
50
|
}
|
51
51
|
|
@@ -62,8 +62,9 @@ bit_matrix(std::move(other.bit_matrix))
|
|
62
62
|
template<typename A>
|
63
63
|
cpc_union_alloc<A>::~cpc_union_alloc() {
|
64
64
|
if (accumulator != nullptr) {
|
65
|
+
AllocCpc allocator(accumulator->get_allocator());
|
65
66
|
accumulator->~cpc_sketch_alloc<A>();
|
66
|
-
|
67
|
+
allocator.deallocate(accumulator, 1);
|
67
68
|
}
|
68
69
|
}
|
69
70
|
|
@@ -181,7 +182,7 @@ template<typename A>
|
|
181
182
|
cpc_sketch_alloc<A> cpc_union_alloc<A>::get_result_from_accumulator() const {
|
182
183
|
if (lg_k != accumulator->get_lg_k()) throw std::logic_error("lg_k != accumulator->lg_k");
|
183
184
|
if (accumulator->get_num_coupons() == 0) {
|
184
|
-
return cpc_sketch_alloc<A>(lg_k, seed);
|
185
|
+
return cpc_sketch_alloc<A>(lg_k, seed, accumulator->get_allocator());
|
185
186
|
}
|
186
187
|
if (accumulator->determine_flavor() != cpc_sketch_alloc<A>::flavor::SPARSE) throw std::logic_error("wrong flavor");
|
187
188
|
cpc_sketch_alloc<A> copy(*accumulator);
|
@@ -242,8 +243,9 @@ cpc_sketch_alloc<A> cpc_union_alloc<A>::get_result_from_bit_matrix() const {
|
|
242
243
|
template<typename A>
|
243
244
|
void cpc_union_alloc<A>::switch_to_bit_matrix() {
|
244
245
|
bit_matrix = accumulator->build_bit_matrix();
|
246
|
+
AllocCpc allocator(accumulator->get_allocator());
|
245
247
|
accumulator->~cpc_sketch_alloc<A>();
|
246
|
-
|
248
|
+
allocator.deallocate(accumulator, 1);
|
247
249
|
accumulator = nullptr;
|
248
250
|
}
|
249
251
|
|
@@ -324,7 +326,7 @@ void cpc_union_alloc<A>::reduce_k(uint8_t new_lg_k) {
|
|
324
326
|
if (bit_matrix.size() > 0) throw std::logic_error("bit_matrix is not expected");
|
325
327
|
if (!accumulator->is_empty()) {
|
326
328
|
cpc_sketch_alloc<A> old_accumulator(*accumulator);
|
327
|
-
*accumulator = cpc_sketch_alloc<A>(new_lg_k, seed);
|
329
|
+
*accumulator = cpc_sketch_alloc<A>(new_lg_k, seed, old_accumulator.get_allocator());
|
328
330
|
walk_table_updating_sketch(old_accumulator.surprising_value_table);
|
329
331
|
}
|
330
332
|
lg_k = new_lg_k;
|
@@ -25,6 +25,7 @@
|
|
25
25
|
#include <catch.hpp>
|
26
26
|
|
27
27
|
#include "cpc_sketch.hpp"
|
28
|
+
#include "cpc_union.hpp"
|
28
29
|
#include "test_allocator.hpp"
|
29
30
|
|
30
31
|
namespace datasketches {
|
@@ -234,4 +235,20 @@ TEST_CASE("cpc sketch allocation: serialize deserialize sliding, bytes", "[cpc_s
|
|
234
235
|
REQUIRE(test_allocator_net_allocations == 0);
|
235
236
|
}
|
236
237
|
|
238
|
+
using cpc_union_test_alloc = cpc_union_alloc<test_allocator<uint8_t>>;
|
239
|
+
|
240
|
+
TEST_CASE("cpc sketch allocation: union") {
|
241
|
+
cpc_sketch_test_alloc s1(11, DEFAULT_SEED, 0);
|
242
|
+
s1.update(1);
|
243
|
+
|
244
|
+
cpc_sketch_test_alloc s2(11, DEFAULT_SEED, 0);
|
245
|
+
s2.update(2);
|
246
|
+
|
247
|
+
cpc_union_test_alloc u(11, DEFAULT_SEED, 0);
|
248
|
+
u.update(s1);
|
249
|
+
u.update(s2);
|
250
|
+
auto s3 = u.get_result();
|
251
|
+
REQUIRE_FALSE(s3.is_empty());
|
252
|
+
}
|
253
|
+
|
237
254
|
} /* namespace datasketches */
|
@@ -32,23 +32,13 @@ target_include_directories(fi
|
|
32
32
|
target_link_libraries(fi INTERFACE common)
|
33
33
|
target_compile_features(fi INTERFACE cxx_std_11)
|
34
34
|
|
35
|
-
set(fi_HEADERS "")
|
36
|
-
list(APPEND fi_HEADERS "include/frequent_items_sketch.hpp")
|
37
|
-
list(APPEND fi_HEADERS "include/frequent_items_sketch_impl.hpp")
|
38
|
-
list(APPEND fi_HEADERS "include/reverse_purge_hash_map.hpp")
|
39
|
-
list(APPEND fi_HEADERS "include/reverse_purge_hash_map_impl.hpp")
|
40
|
-
|
41
35
|
install(TARGETS fi
|
42
36
|
EXPORT ${PROJECT_NAME}
|
43
37
|
)
|
44
38
|
|
45
|
-
install(FILES
|
39
|
+
install(FILES
|
40
|
+
include/frequent_items_sketch.hpp
|
41
|
+
include/frequent_items_sketch_impl.hpp
|
42
|
+
include/reverse_purge_hash_map.hpp
|
43
|
+
include/reverse_purge_hash_map_impl.hpp
|
46
44
|
DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/DataSketches")
|
47
|
-
|
48
|
-
target_sources(fi
|
49
|
-
INTERFACE
|
50
|
-
${CMAKE_CURRENT_SOURCE_DIR}/include/frequent_items_sketch.hpp
|
51
|
-
${CMAKE_CURRENT_SOURCE_DIR}/include/frequent_items_sketch_impl.hpp
|
52
|
-
${CMAKE_CURRENT_SOURCE_DIR}/include/reverse_purge_hash_map.hpp
|
53
|
-
${CMAKE_CURRENT_SOURCE_DIR}/include/reverse_purge_hash_map_impl.hpp
|
54
|
-
)
|
@@ -421,7 +421,9 @@ void frequent_items_sketch<T, W, H, E, S, A>::check_size(uint8_t lg_cur_size, ui
|
|
421
421
|
|
422
422
|
template<typename T, typename W, typename H, typename E, typename S, typename A>
|
423
423
|
string<A> frequent_items_sketch<T, W, H, E, S, A>::to_string(bool print_items) const {
|
424
|
-
|
424
|
+
// Using a temporary stream for implementation here does not comply with AllocatorAwareContainer requirements.
|
425
|
+
// The stream does not support passing an allocator instance, and alternatives are complicated.
|
426
|
+
std::ostringstream os;
|
425
427
|
os << "### Frequent items sketch summary:" << std::endl;
|
426
428
|
os << " lg cur map size : " << (int) map.get_lg_cur_size() << std::endl;
|
427
429
|
os << " lg max map size : " << (int) map.get_lg_max_size() << std::endl;
|
@@ -444,7 +446,7 @@ string<A> frequent_items_sketch<T, W, H, E, S, A>::to_string(bool print_items) c
|
|
444
446
|
}
|
445
447
|
os << "### End items" << std::endl;
|
446
448
|
}
|
447
|
-
return os.str();
|
449
|
+
return string<A>(os.str().c_str(), map.get_allocator());
|
448
450
|
}
|
449
451
|
|
450
452
|
// version for integral signed type
|
@@ -32,64 +32,41 @@ target_include_directories(hll
|
|
32
32
|
target_link_libraries(hll INTERFACE common)
|
33
33
|
target_compile_features(hll INTERFACE cxx_std_11)
|
34
34
|
|
35
|
-
# TODO: would be useful if this didn't need to be reproduced in target_sources(), too
|
36
|
-
set(hll_HEADERS "")
|
37
|
-
list(APPEND hll_HEADERS "include/hll.hpp;include/AuxHashMap.hpp;include/CompositeInterpolationXTable.hpp")
|
38
|
-
list(APPEND hll_HEADERS "include/hll.private.hpp;include/HllSketchImplFactory.hpp")
|
39
|
-
list(APPEND hll_HEADERS "include/CouponHashSet.hpp;include/CouponList.hpp")
|
40
|
-
list(APPEND hll_HEADERS "include/CubicInterpolation.hpp;include/HarmonicNumbers.hpp;include/Hll4Array.hpp")
|
41
|
-
list(APPEND hll_HEADERS "include/Hll6Array.hpp;include/Hll8Array.hpp;include/HllArray.hpp")
|
42
|
-
list(APPEND hll_HEADERS "include/HllSketchImpl.hpp")
|
43
|
-
list(APPEND hll_HEADERS "include/HllUtil.hpp;include/coupon_iterator.hpp")
|
44
|
-
list(APPEND hll_HEADERS "include/RelativeErrorTables.hpp;include/AuxHashMap-internal.hpp")
|
45
|
-
list(APPEND hll_HEADERS "include/CompositeInterpolationXTable-internal.hpp")
|
46
|
-
list(APPEND hll_HEADERS "include/CouponHashSet-internal.hpp;include/CouponList-internal.hpp")
|
47
|
-
list(APPEND hll_HEADERS "include/CubicInterpolation-internal.hpp;include/HarmonicNumbers-internal.hpp")
|
48
|
-
list(APPEND hll_HEADERS "include/Hll4Array-internal.hpp;include/Hll6Array-internal.hpp")
|
49
|
-
list(APPEND hll_HEADERS "include/Hll8Array-internal.hpp;include/HllArray-internal.hpp")
|
50
|
-
list(APPEND hll_HEADERS "include/HllSketch-internal.hpp")
|
51
|
-
list(APPEND hll_HEADERS "include/HllSketchImpl-internal.hpp;include/HllUnion-internal.hpp")
|
52
|
-
list(APPEND hll_HEADERS "include/coupon_iterator-internal.hpp;include/RelativeErrorTables-internal.hpp")
|
53
|
-
|
54
35
|
install(TARGETS hll
|
55
36
|
EXPORT ${PROJECT_NAME}
|
56
37
|
)
|
57
38
|
|
58
|
-
install(FILES
|
39
|
+
install(FILES
|
40
|
+
include/hll.hpp
|
41
|
+
include/AuxHashMap.hpp
|
42
|
+
include/CompositeInterpolationXTable.hpp
|
43
|
+
include/hll.private.hpp
|
44
|
+
include/HllSketchImplFactory.hpp
|
45
|
+
include/CouponHashSet.hpp
|
46
|
+
include/CouponList.hpp
|
47
|
+
include/CubicInterpolation.hpp
|
48
|
+
include/HarmonicNumbers.hpp
|
49
|
+
include/Hll4Array.hpp
|
50
|
+
include/Hll6Array.hpp
|
51
|
+
include/Hll8Array.hpp
|
52
|
+
include/HllArray.hpp
|
53
|
+
include/HllSketchImpl.hpp
|
54
|
+
include/HllUtil.hpp
|
55
|
+
include/coupon_iterator.hpp
|
56
|
+
include/RelativeErrorTables.hpp
|
57
|
+
include/AuxHashMap-internal.hpp
|
58
|
+
include/CompositeInterpolationXTable-internal.hpp
|
59
|
+
include/CouponHashSet-internal.hpp
|
60
|
+
include/CouponList-internal.hpp
|
61
|
+
include/CubicInterpolation-internal.hpp
|
62
|
+
include/HarmonicNumbers-internal.hpp
|
63
|
+
include/Hll4Array-internal.hpp
|
64
|
+
include/Hll6Array-internal.hpp
|
65
|
+
include/Hll8Array-internal.hpp
|
66
|
+
include/HllArray-internal.hpp
|
67
|
+
include/HllSketch-internal.hpp
|
68
|
+
include/HllSketchImpl-internal.hpp
|
69
|
+
include/HllUnion-internal.hpp
|
70
|
+
include/coupon_iterator-internal.hpp
|
71
|
+
include/RelativeErrorTables-internal.hpp
|
59
72
|
DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/DataSketches")
|
60
|
-
|
61
|
-
target_sources(hll
|
62
|
-
INTERFACE
|
63
|
-
${CMAKE_CURRENT_SOURCE_DIR}/include/hll.hpp
|
64
|
-
${CMAKE_CURRENT_SOURCE_DIR}/include/hll.private.hpp
|
65
|
-
${CMAKE_CURRENT_SOURCE_DIR}/include/AuxHashMap.hpp
|
66
|
-
${CMAKE_CURRENT_SOURCE_DIR}/include/CompositeInterpolationXTable.hpp
|
67
|
-
${CMAKE_CURRENT_SOURCE_DIR}/include/CouponHashSet.hpp
|
68
|
-
${CMAKE_CURRENT_SOURCE_DIR}/include/CouponList.hpp
|
69
|
-
${CMAKE_CURRENT_SOURCE_DIR}/include/CubicInterpolation.hpp
|
70
|
-
${CMAKE_CURRENT_SOURCE_DIR}/include/HarmonicNumbers.hpp
|
71
|
-
${CMAKE_CURRENT_SOURCE_DIR}/include/Hll4Array.hpp
|
72
|
-
${CMAKE_CURRENT_SOURCE_DIR}/include/Hll6Array.hpp
|
73
|
-
${CMAKE_CURRENT_SOURCE_DIR}/include/Hll8Array.hpp
|
74
|
-
${CMAKE_CURRENT_SOURCE_DIR}/include/HllArray.hpp
|
75
|
-
${CMAKE_CURRENT_SOURCE_DIR}/include/HllSketchImpl.hpp
|
76
|
-
${CMAKE_CURRENT_SOURCE_DIR}/include/HllSketchImplFactory.hpp
|
77
|
-
${CMAKE_CURRENT_SOURCE_DIR}/include/HllUtil.hpp
|
78
|
-
${CMAKE_CURRENT_SOURCE_DIR}/include/RelativeErrorTables.hpp
|
79
|
-
${CMAKE_CURRENT_SOURCE_DIR}/include/coupon_iterator.hpp
|
80
|
-
${CMAKE_CURRENT_SOURCE_DIR}/include/AuxHashMap-internal.hpp
|
81
|
-
${CMAKE_CURRENT_SOURCE_DIR}/include/CompositeInterpolationXTable-internal.hpp
|
82
|
-
${CMAKE_CURRENT_SOURCE_DIR}/include/CouponHashSet-internal.hpp
|
83
|
-
${CMAKE_CURRENT_SOURCE_DIR}/include/CouponList-internal.hpp
|
84
|
-
${CMAKE_CURRENT_SOURCE_DIR}/include/CubicInterpolation-internal.hpp
|
85
|
-
${CMAKE_CURRENT_SOURCE_DIR}/include/HarmonicNumbers-internal.hpp
|
86
|
-
${CMAKE_CURRENT_SOURCE_DIR}/include/Hll4Array-internal.hpp
|
87
|
-
${CMAKE_CURRENT_SOURCE_DIR}/include/Hll6Array-internal.hpp
|
88
|
-
${CMAKE_CURRENT_SOURCE_DIR}/include/Hll8Array-internal.hpp
|
89
|
-
${CMAKE_CURRENT_SOURCE_DIR}/include/HllArray-internal.hpp
|
90
|
-
${CMAKE_CURRENT_SOURCE_DIR}/include/HllSketch-internal.hpp
|
91
|
-
${CMAKE_CURRENT_SOURCE_DIR}/include/HllSketchImpl-internal.hpp
|
92
|
-
${CMAKE_CURRENT_SOURCE_DIR}/include/HllUnion-internal.hpp
|
93
|
-
${CMAKE_CURRENT_SOURCE_DIR}/include/RelativeErrorTables-internal.hpp
|
94
|
-
${CMAKE_CURRENT_SOURCE_DIR}/include/coupon_iterator-internal.hpp
|
95
|
-
)
|
@@ -246,10 +246,12 @@ string<A> hll_sketch_alloc<A>::to_string(const bool summary,
|
|
246
246
|
const bool detail,
|
247
247
|
const bool aux_detail,
|
248
248
|
const bool all) const {
|
249
|
-
|
249
|
+
// Using a temporary stream for implementation here does not comply with AllocatorAwareContainer requirements.
|
250
|
+
// The stream does not support passing an allocator instance, and alternatives are complicated.
|
251
|
+
std::stringstream os;
|
250
252
|
if (summary) {
|
251
253
|
os << "### HLL sketch summary:" << std::endl
|
252
|
-
<< " Log Config K : " << get_lg_config_k() << std::endl
|
254
|
+
<< " Log Config K : " << std::to_string(get_lg_config_k()) << std::endl
|
253
255
|
<< " Hll Target : " << type_as_string() << std::endl
|
254
256
|
<< " Current Mode : " << mode_as_string() << std::endl
|
255
257
|
<< " LB : " << get_lower_bound(1) << std::endl
|
@@ -258,7 +260,7 @@ string<A> hll_sketch_alloc<A>::to_string(const bool summary,
|
|
258
260
|
<< " OutOfOrder flag: " << (is_out_of_order_flag() ? "true" : "false") << std::endl;
|
259
261
|
if (get_current_mode() == HLL) {
|
260
262
|
HllArray<A>* hllArray = (HllArray<A>*) sketch_impl;
|
261
|
-
os << " CurMin : " << hllArray->getCurMin() << std::endl
|
263
|
+
os << " CurMin : " << std::to_string(hllArray->getCurMin()) << std::endl
|
262
264
|
<< " NumAtCurMin : " << hllArray->getNumAtCurMin() << std::endl
|
263
265
|
<< " HipAccum : " << hllArray->getHipAccum() << std::endl
|
264
266
|
<< " KxQ0 : " << hllArray->getKxQ0() << std::endl
|
@@ -338,7 +340,7 @@ string<A> hll_sketch_alloc<A>::to_string(const bool summary,
|
|
338
340
|
}
|
339
341
|
}
|
340
342
|
|
341
|
-
return os.str();
|
343
|
+
return string<A>(os.str().c_str(), sketch_impl->getAllocator());
|
342
344
|
}
|
343
345
|
|
344
346
|
template<typename A>
|