datasketches 0.3.1 → 0.3.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +4 -0
- data/ext/datasketches/cpc_wrapper.cpp +1 -1
- data/lib/datasketches/version.rb +1 -1
- data/vendor/datasketches-cpp/CMakeLists.txt +22 -20
- data/vendor/datasketches-cpp/NOTICE +1 -1
- data/vendor/datasketches-cpp/common/include/MurmurHash3.h +25 -27
- data/vendor/datasketches-cpp/common/include/common_defs.hpp +8 -6
- data/vendor/datasketches-cpp/common/include/count_zeros.hpp +11 -0
- data/vendor/datasketches-cpp/common/include/memory_operations.hpp +5 -4
- data/vendor/datasketches-cpp/common/test/CMakeLists.txt +1 -1
- data/vendor/datasketches-cpp/common/test/integration_test.cpp +6 -0
- data/vendor/datasketches-cpp/count/CMakeLists.txt +42 -0
- data/vendor/datasketches-cpp/count/include/count_min.hpp +351 -0
- data/vendor/datasketches-cpp/count/include/count_min_impl.hpp +517 -0
- data/vendor/datasketches-cpp/count/test/CMakeLists.txt +43 -0
- data/vendor/datasketches-cpp/count/test/count_min_allocation_test.cpp +155 -0
- data/vendor/datasketches-cpp/count/test/count_min_test.cpp +306 -0
- data/vendor/datasketches-cpp/cpc/include/cpc_confidence.hpp +3 -3
- data/vendor/datasketches-cpp/cpc/include/cpc_sketch_impl.hpp +1 -1
- data/vendor/datasketches-cpp/cpc/include/cpc_util.hpp +16 -8
- data/vendor/datasketches-cpp/density/CMakeLists.txt +42 -0
- data/vendor/datasketches-cpp/density/include/density_sketch.hpp +236 -0
- data/vendor/datasketches-cpp/density/include/density_sketch_impl.hpp +543 -0
- data/vendor/datasketches-cpp/density/test/CMakeLists.txt +35 -0
- data/vendor/datasketches-cpp/density/test/density_sketch_test.cpp +244 -0
- data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map.hpp +9 -3
- data/vendor/datasketches-cpp/hll/include/Hll4Array-internal.hpp +19 -11
- data/vendor/datasketches-cpp/hll/include/Hll4Array.hpp +2 -5
- data/vendor/datasketches-cpp/hll/include/Hll6Array-internal.hpp +19 -7
- data/vendor/datasketches-cpp/hll/include/Hll6Array.hpp +1 -1
- data/vendor/datasketches-cpp/hll/include/Hll8Array-internal.hpp +98 -42
- data/vendor/datasketches-cpp/hll/include/Hll8Array.hpp +2 -0
- data/vendor/datasketches-cpp/hll/include/HllArray-internal.hpp +92 -59
- data/vendor/datasketches-cpp/hll/include/HllArray.hpp +16 -6
- data/vendor/datasketches-cpp/hll/include/HllSketchImplFactory.hpp +3 -21
- data/vendor/datasketches-cpp/hll/include/HllUnion-internal.hpp +8 -0
- data/vendor/datasketches-cpp/hll/include/HllUtil.hpp +14 -6
- data/vendor/datasketches-cpp/hll/include/coupon_iterator-internal.hpp +1 -1
- data/vendor/datasketches-cpp/hll/include/coupon_iterator.hpp +8 -2
- data/vendor/datasketches-cpp/hll/include/hll.hpp +9 -8
- data/vendor/datasketches-cpp/hll/test/HllUnionTest.cpp +7 -1
- data/vendor/datasketches-cpp/kll/include/kll_helper.hpp +0 -1
- data/vendor/datasketches-cpp/kll/include/kll_sketch.hpp +8 -3
- data/vendor/datasketches-cpp/kll/include/kll_sketch_impl.hpp +2 -2
- data/vendor/datasketches-cpp/kll/test/kll_sketch_test.cpp +2 -2
- data/vendor/datasketches-cpp/python/CMakeLists.txt +6 -0
- data/vendor/datasketches-cpp/python/README.md +5 -5
- data/vendor/datasketches-cpp/python/datasketches/DensityWrapper.py +87 -0
- data/vendor/datasketches-cpp/python/datasketches/KernelFunction.py +35 -0
- data/vendor/datasketches-cpp/python/datasketches/PySerDe.py +15 -9
- data/vendor/datasketches-cpp/python/datasketches/TuplePolicy.py +77 -0
- data/vendor/datasketches-cpp/python/datasketches/TupleWrapper.py +205 -0
- data/vendor/datasketches-cpp/python/datasketches/__init__.py +17 -1
- data/vendor/datasketches-cpp/python/include/kernel_function.hpp +98 -0
- data/vendor/datasketches-cpp/python/include/py_object_lt.hpp +37 -0
- data/vendor/datasketches-cpp/python/include/py_object_ostream.hpp +48 -0
- data/vendor/datasketches-cpp/python/include/quantile_conditional.hpp +104 -0
- data/vendor/datasketches-cpp/python/include/tuple_policy.hpp +136 -0
- data/vendor/datasketches-cpp/python/src/count_wrapper.cpp +101 -0
- data/vendor/datasketches-cpp/python/src/cpc_wrapper.cpp +16 -30
- data/vendor/datasketches-cpp/python/src/datasketches.cpp +6 -0
- data/vendor/datasketches-cpp/python/src/density_wrapper.cpp +95 -0
- data/vendor/datasketches-cpp/python/src/fi_wrapper.cpp +127 -73
- data/vendor/datasketches-cpp/python/src/hll_wrapper.cpp +28 -36
- data/vendor/datasketches-cpp/python/src/kll_wrapper.cpp +108 -160
- data/vendor/datasketches-cpp/python/src/py_serde.cpp +5 -4
- data/vendor/datasketches-cpp/python/src/quantiles_wrapper.cpp +99 -148
- data/vendor/datasketches-cpp/python/src/req_wrapper.cpp +117 -178
- data/vendor/datasketches-cpp/python/src/theta_wrapper.cpp +67 -73
- data/vendor/datasketches-cpp/python/src/tuple_wrapper.cpp +215 -0
- data/vendor/datasketches-cpp/python/src/vo_wrapper.cpp +1 -1
- data/vendor/datasketches-cpp/python/tests/count_min_test.py +86 -0
- data/vendor/datasketches-cpp/python/tests/cpc_test.py +10 -10
- data/vendor/datasketches-cpp/python/tests/density_test.py +93 -0
- data/vendor/datasketches-cpp/python/tests/fi_test.py +41 -2
- data/vendor/datasketches-cpp/python/tests/hll_test.py +19 -20
- data/vendor/datasketches-cpp/python/tests/kll_test.py +40 -6
- data/vendor/datasketches-cpp/python/tests/quantiles_test.py +39 -5
- data/vendor/datasketches-cpp/python/tests/req_test.py +38 -5
- data/vendor/datasketches-cpp/python/tests/theta_test.py +16 -14
- data/vendor/datasketches-cpp/python/tests/tuple_test.py +206 -0
- data/vendor/datasketches-cpp/python/tests/vo_test.py +7 -0
- data/vendor/datasketches-cpp/quantiles/include/quantiles_sketch.hpp +8 -3
- data/vendor/datasketches-cpp/quantiles/include/quantiles_sketch_impl.hpp +4 -4
- data/vendor/datasketches-cpp/quantiles/test/quantiles_sketch_test.cpp +1 -1
- data/vendor/datasketches-cpp/req/include/req_compactor_impl.hpp +0 -2
- data/vendor/datasketches-cpp/req/include/req_sketch.hpp +8 -3
- data/vendor/datasketches-cpp/req/include/req_sketch_impl.hpp +2 -2
- data/vendor/datasketches-cpp/sampling/include/var_opt_sketch.hpp +20 -6
- data/vendor/datasketches-cpp/sampling/include/var_opt_sketch_impl.hpp +30 -16
- data/vendor/datasketches-cpp/sampling/include/var_opt_union.hpp +5 -1
- data/vendor/datasketches-cpp/sampling/include/var_opt_union_impl.hpp +19 -15
- data/vendor/datasketches-cpp/sampling/test/var_opt_sketch_test.cpp +33 -14
- data/vendor/datasketches-cpp/sampling/test/var_opt_union_test.cpp +0 -2
- data/vendor/datasketches-cpp/setup.py +1 -1
- data/vendor/datasketches-cpp/theta/CMakeLists.txt +1 -0
- data/vendor/datasketches-cpp/theta/include/bit_packing.hpp +6279 -0
- data/vendor/datasketches-cpp/theta/include/compact_theta_sketch_parser.hpp +14 -8
- data/vendor/datasketches-cpp/theta/include/compact_theta_sketch_parser_impl.hpp +60 -46
- data/vendor/datasketches-cpp/theta/include/theta_helpers.hpp +4 -2
- data/vendor/datasketches-cpp/theta/include/theta_sketch.hpp +58 -10
- data/vendor/datasketches-cpp/theta/include/theta_sketch_impl.hpp +430 -130
- data/vendor/datasketches-cpp/theta/include/theta_union_base_impl.hpp +9 -9
- data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base.hpp +16 -4
- data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base_impl.hpp +2 -2
- data/vendor/datasketches-cpp/theta/test/CMakeLists.txt +1 -0
- data/vendor/datasketches-cpp/theta/test/bit_packing_test.cpp +80 -0
- data/vendor/datasketches-cpp/theta/test/theta_sketch_test.cpp +42 -3
- data/vendor/datasketches-cpp/theta/test/theta_union_test.cpp +25 -0
- data/vendor/datasketches-cpp/tuple/include/tuple_sketch_impl.hpp +2 -1
- data/vendor/datasketches-cpp/version.cfg.in +1 -1
- metadata +31 -3
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: e0c6cc871dc7c8726b31146fe287dd337e6aa5c1c70a234d2ad77b5bed74aca2
|
|
4
|
+
data.tar.gz: c5e89d9d28069aec80863fa6e1da339457dcc04f938d3b3506a3dfbb66899eee
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 36459e338671867ae63de590a6807f801891a9821336178e64bca0d9b1615c263aac5160b4f1cf74796c5756bfee4a36335585db6354f0d2e7a77765236e9730
|
|
7
|
+
data.tar.gz: dff58e6888788ce030484c6027f9a9bfee42b0315a18f6a2b02695336be482a7a255576efe0b128b2dbee99f50fd5184c788dbe799815a5a8839143d2b3642f3
|
data/CHANGELOG.md
CHANGED
data/lib/datasketches/version.rb
CHANGED
|
@@ -34,7 +34,7 @@ include(CMakeDependentOption)
|
|
|
34
34
|
### Require out-of-source builds
|
|
35
35
|
file(TO_CMAKE_PATH "${PROJECT_BINARY_DIR}/CMakeLists.txt" LOC_PATH)
|
|
36
36
|
if(EXISTS "${LOC_PATH}")
|
|
37
|
-
|
|
37
|
+
message(FATAL_ERROR "You cannot build in a source directory (or any directory with a CMakeLists.txt file). Please make a build subdirectory. Feel free to remove CMakeCache.txt and CMakeFiles.")
|
|
38
38
|
endif()
|
|
39
39
|
|
|
40
40
|
# Ensure builds on Windows export all symbols
|
|
@@ -69,7 +69,7 @@ set(CMAKE_CXX_EXTENSIONS OFF)
|
|
|
69
69
|
###### OPTIONS ######
|
|
70
70
|
# Enable testing
|
|
71
71
|
option(BUILD_TESTS "Build unit tests" ON)
|
|
72
|
-
if (BUILD_TESTS)
|
|
72
|
+
if (BUILD_TESTS)
|
|
73
73
|
enable_testing()
|
|
74
74
|
endif()
|
|
75
75
|
|
|
@@ -93,10 +93,10 @@ set(default_build_type "Release")
|
|
|
93
93
|
if(NOT CMAKE_BUILD_TYPE AND NOT CMAKE_CONFIGURATION_TYPES)
|
|
94
94
|
message(STATUS "Setting build type to '${default_build_type}' as none was specified.")
|
|
95
95
|
set(CMAKE_BUILD_TYPE "${default_build_type}" CACHE
|
|
96
|
-
|
|
96
|
+
STRING "Choose the type of build." FORCE)
|
|
97
97
|
# Set the possible values of build type for cmake-gui
|
|
98
98
|
set_property(CACHE CMAKE_BUILD_TYPE PROPERTY STRINGS
|
|
99
|
-
|
|
99
|
+
"Debug" "Release" "MinSizeRel" "RelWithDebInfo")
|
|
100
100
|
endif()
|
|
101
101
|
|
|
102
102
|
###### TARGETS ######
|
|
@@ -115,47 +115,49 @@ add_subdirectory(sampling)
|
|
|
115
115
|
add_subdirectory(tuple)
|
|
116
116
|
add_subdirectory(req)
|
|
117
117
|
add_subdirectory(quantiles)
|
|
118
|
+
add_subdirectory(count)
|
|
119
|
+
add_subdirectory(density)
|
|
118
120
|
|
|
119
121
|
if (WITH_PYTHON)
|
|
120
122
|
add_subdirectory(python)
|
|
121
123
|
endif()
|
|
122
124
|
|
|
123
|
-
target_link_libraries(datasketches INTERFACE hll cpc kll fi theta sampling req quantiles)
|
|
125
|
+
target_link_libraries(datasketches INTERFACE hll cpc kll fi theta sampling req quantiles count)
|
|
124
126
|
|
|
125
127
|
if (COVERAGE)
|
|
126
128
|
find_program(LCOV_PATH NAMES "lcov")
|
|
127
129
|
find_program(GENHTML_PATH NAMES "genhtml")
|
|
128
130
|
if (NOT LCOV_PATH-NOTFOUND AND NOT GENHTML_PATH-NOTFOUND)
|
|
129
131
|
add_custom_target(coverage_report
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
132
|
+
COMMAND ${LCOV_PATH} --capture --exclude '*/test/*' --exclude '/Library/*' --exclude '/usr/include/*' --directory . --output-file lcov.info
|
|
133
|
+
COMMAND ${GENHTML_PATH} --legend lcov.info --output-directory coverage --demangle-cpp)
|
|
134
|
+
endif()
|
|
133
135
|
endif()
|
|
134
136
|
|
|
135
137
|
|
|
136
138
|
# # Installation
|
|
137
139
|
install(TARGETS datasketches
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
)
|
|
140
|
+
EXPORT ${PROJECT_NAME}
|
|
141
|
+
PUBLIC_HEADER DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/DataSketches
|
|
142
|
+
INCLUDES DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/DataSketches
|
|
143
|
+
)
|
|
142
144
|
|
|
143
145
|
# Packaging
|
|
144
146
|
include(CMakePackageConfigHelpers)
|
|
145
147
|
write_basic_package_version_file(
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
148
|
+
"${CMAKE_CURRENT_BINARY_DIR}/DataSketchesConfigVersion.cmake"
|
|
149
|
+
VERSION ${PROJECT_VERSION}
|
|
150
|
+
COMPATIBILITY SameMajorVersion
|
|
149
151
|
)
|
|
150
152
|
configure_package_config_file(
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
153
|
+
cmake/DataSketchesConfig.cmake.in
|
|
154
|
+
"${CMAKE_CURRENT_BINARY_DIR}/DataSketchesConfig.cmake"
|
|
155
|
+
INSTALL_DESTINATION lib/DataSketches/cmake
|
|
156
|
+
PATH_VARS CMAKE_INSTALL_INCLUDEDIR
|
|
155
157
|
)
|
|
156
158
|
install(EXPORT ${PROJECT_NAME} DESTINATION lib/DataSketches/cmake)
|
|
157
159
|
install(FILES "${CMAKE_CURRENT_BINARY_DIR}/DataSketchesConfigVersion.cmake"
|
|
158
|
-
|
|
160
|
+
"${CMAKE_CURRENT_BINARY_DIR}/DataSketchesConfig.cmake"
|
|
159
161
|
DESTINATION lib/DataSketches/cmake)
|
|
160
162
|
|
|
161
163
|
|
|
@@ -29,14 +29,13 @@ typedef unsigned char uint8_t;
|
|
|
29
29
|
typedef unsigned int uint32_t;
|
|
30
30
|
typedef unsigned __int64 uint64_t;
|
|
31
31
|
|
|
32
|
-
#define
|
|
32
|
+
#define MURMUR3_FORCE_INLINE __forceinline
|
|
33
33
|
|
|
34
34
|
#include <stdlib.h>
|
|
35
35
|
|
|
36
|
-
#define
|
|
37
|
-
#define ROTL64(x,y) _rotl64(x,y)
|
|
36
|
+
#define MURMUR3_ROTL64(x,y) _rotl64(x,y)
|
|
38
37
|
|
|
39
|
-
#define
|
|
38
|
+
#define MURMUR3_BIG_CONSTANT(x) (x)
|
|
40
39
|
|
|
41
40
|
// Other compilers
|
|
42
41
|
|
|
@@ -44,22 +43,16 @@ typedef unsigned __int64 uint64_t;
|
|
|
44
43
|
|
|
45
44
|
#include <stdint.h>
|
|
46
45
|
|
|
47
|
-
#define
|
|
48
|
-
|
|
49
|
-
inline uint32_t rotl32 ( uint32_t x, int8_t r )
|
|
50
|
-
{
|
|
51
|
-
return (x << r) | (x >> (32 - r));
|
|
52
|
-
}
|
|
46
|
+
#define MURMUR3_FORCE_INLINE inline __attribute__((always_inline))
|
|
53
47
|
|
|
54
48
|
inline uint64_t rotl64 ( uint64_t x, int8_t r )
|
|
55
49
|
{
|
|
56
50
|
return (x << r) | (x >> (64 - r));
|
|
57
51
|
}
|
|
58
52
|
|
|
59
|
-
#define
|
|
60
|
-
#define ROTL64(x,y) rotl64(x,y)
|
|
53
|
+
#define MURMUR3_ROTL64(x,y) rotl64(x,y)
|
|
61
54
|
|
|
62
|
-
#define
|
|
55
|
+
#define MURMUR3_BIG_CONSTANT(x) (x##LLU)
|
|
63
56
|
|
|
64
57
|
#endif // !defined(_MSC_VER)
|
|
65
58
|
|
|
@@ -78,7 +71,7 @@ typedef struct {
|
|
|
78
71
|
// Block read - if your platform needs to do endian-swapping or can only
|
|
79
72
|
// handle aligned reads, do the conversion here
|
|
80
73
|
|
|
81
|
-
|
|
74
|
+
MURMUR3_FORCE_INLINE uint64_t getblock64 ( const uint64_t * p, size_t i )
|
|
82
75
|
{
|
|
83
76
|
uint64_t res;
|
|
84
77
|
memcpy(&res, p + i, sizeof(res));
|
|
@@ -88,20 +81,21 @@ FORCE_INLINE uint64_t getblock64 ( const uint64_t * p, size_t i )
|
|
|
88
81
|
//-----------------------------------------------------------------------------
|
|
89
82
|
// Finalization mix - force all bits of a hash block to avalanche
|
|
90
83
|
|
|
91
|
-
|
|
84
|
+
MURMUR3_FORCE_INLINE uint64_t fmix64 ( uint64_t k )
|
|
92
85
|
{
|
|
93
86
|
k ^= k >> 33;
|
|
94
|
-
k *=
|
|
87
|
+
k *= MURMUR3_BIG_CONSTANT(0xff51afd7ed558ccd);
|
|
95
88
|
k ^= k >> 33;
|
|
96
|
-
k *=
|
|
89
|
+
k *= MURMUR3_BIG_CONSTANT(0xc4ceb9fe1a85ec53);
|
|
97
90
|
k ^= k >> 33;
|
|
98
91
|
|
|
99
92
|
return k;
|
|
100
93
|
}
|
|
101
94
|
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
static const uint64_t
|
|
95
|
+
MURMUR3_FORCE_INLINE void MurmurHash3_x64_128(const void* key, size_t lenBytes,
|
|
96
|
+
uint64_t seed, HashState& out) {
|
|
97
|
+
static const uint64_t c1 = MURMUR3_BIG_CONSTANT(0x87c37b91114253d5);
|
|
98
|
+
static const uint64_t c2 = MURMUR3_BIG_CONSTANT(0x4cf5ad432745937f);
|
|
105
99
|
|
|
106
100
|
const uint8_t* data = (const uint8_t*)key;
|
|
107
101
|
|
|
@@ -118,13 +112,13 @@ FORCE_INLINE void MurmurHash3_x64_128(const void* key, size_t lenBytes, uint64_t
|
|
|
118
112
|
uint64_t k1 = getblock64(blocks, i * 2 + 0);
|
|
119
113
|
uint64_t k2 = getblock64(blocks, i * 2 + 1);
|
|
120
114
|
|
|
121
|
-
k1 *= c1; k1 =
|
|
122
|
-
out.h1 =
|
|
115
|
+
k1 *= c1; k1 = MURMUR3_ROTL64(k1,31); k1 *= c2; out.h1 ^= k1;
|
|
116
|
+
out.h1 = MURMUR3_ROTL64(out.h1,27);
|
|
123
117
|
out.h1 += out.h2;
|
|
124
118
|
out.h1 = out.h1*5+0x52dce729;
|
|
125
119
|
|
|
126
|
-
k2 *= c2; k2 =
|
|
127
|
-
out.h2 =
|
|
120
|
+
k2 *= c2; k2 = MURMUR3_ROTL64(k2,33); k2 *= c1; out.h2 ^= k2;
|
|
121
|
+
out.h2 = MURMUR3_ROTL64(out.h2,31);
|
|
128
122
|
out.h2 += out.h1;
|
|
129
123
|
out.h2 = out.h2*5+0x38495ab5;
|
|
130
124
|
}
|
|
@@ -144,7 +138,7 @@ FORCE_INLINE void MurmurHash3_x64_128(const void* key, size_t lenBytes, uint64_t
|
|
|
144
138
|
case 11: k2 ^= ((uint64_t)tail[10]) << 16; // falls through
|
|
145
139
|
case 10: k2 ^= ((uint64_t)tail[ 9]) << 8; // falls through
|
|
146
140
|
case 9: k2 ^= ((uint64_t)tail[ 8]) << 0;
|
|
147
|
-
k2 *= c2; k2 =
|
|
141
|
+
k2 *= c2; k2 = MURMUR3_ROTL64(k2,33); k2 *= c1; out.h2 ^= k2;
|
|
148
142
|
// falls through
|
|
149
143
|
case 8: k1 ^= ((uint64_t)tail[ 7]) << 56; // falls through
|
|
150
144
|
case 7: k1 ^= ((uint64_t)tail[ 6]) << 48; // falls through
|
|
@@ -154,7 +148,7 @@ FORCE_INLINE void MurmurHash3_x64_128(const void* key, size_t lenBytes, uint64_t
|
|
|
154
148
|
case 3: k1 ^= ((uint64_t)tail[ 2]) << 16; // falls through
|
|
155
149
|
case 2: k1 ^= ((uint64_t)tail[ 1]) << 8; // falls through
|
|
156
150
|
case 1: k1 ^= ((uint64_t)tail[ 0]) << 0;
|
|
157
|
-
k1 *= c1; k1 =
|
|
151
|
+
k1 *= c1; k1 = MURMUR3_ROTL64(k1,31); k1 *= c2; out.h1 ^= k1;
|
|
158
152
|
};
|
|
159
153
|
|
|
160
154
|
//----------
|
|
@@ -175,10 +169,14 @@ FORCE_INLINE void MurmurHash3_x64_128(const void* key, size_t lenBytes, uint64_t
|
|
|
175
169
|
|
|
176
170
|
//-----------------------------------------------------------------------------
|
|
177
171
|
|
|
178
|
-
|
|
172
|
+
MURMUR3_FORCE_INLINE uint16_t compute_seed_hash(uint64_t seed) {
|
|
179
173
|
HashState hashes;
|
|
180
174
|
MurmurHash3_x64_128(&seed, sizeof(seed), 0, hashes);
|
|
181
175
|
return static_cast<uint16_t>(hashes.h1 & 0xffff);
|
|
182
176
|
}
|
|
183
177
|
|
|
178
|
+
#undef MURMUR3_FORCE_INLINE
|
|
179
|
+
#undef MURMUR3_ROTL64
|
|
180
|
+
#undef MURMUR3_BIG_CONSTANT
|
|
181
|
+
|
|
184
182
|
#endif // _MURMURHASH3_H_
|
|
@@ -26,6 +26,7 @@
|
|
|
26
26
|
#include <iostream>
|
|
27
27
|
#include <random>
|
|
28
28
|
#include <chrono>
|
|
29
|
+
#include <thread>
|
|
29
30
|
|
|
30
31
|
namespace datasketches {
|
|
31
32
|
|
|
@@ -36,15 +37,16 @@ enum resize_factor { X1 = 0, X2, X4, X8 };
|
|
|
36
37
|
template<typename A> using AllocChar = typename std::allocator_traits<A>::template rebind_alloc<char>;
|
|
37
38
|
template<typename A> using string = std::basic_string<char, std::char_traits<char>, AllocChar<A>>;
|
|
38
39
|
|
|
39
|
-
// random bit
|
|
40
|
-
static std::independent_bits_engine<std::mt19937, 1, uint32_t>
|
|
41
|
-
random_bit(static_cast<uint32_t>(std::chrono::system_clock::now().time_since_epoch().count()
|
|
40
|
+
// thread-safe random bit
|
|
41
|
+
static thread_local std::independent_bits_engine<std::mt19937, 1, uint32_t>
|
|
42
|
+
random_bit(static_cast<uint32_t>(std::chrono::system_clock::now().time_since_epoch().count()
|
|
43
|
+
+ std::hash<std::thread::id>{}(std::this_thread::get_id())));
|
|
42
44
|
|
|
43
45
|
// common random declarations
|
|
44
46
|
namespace random_utils {
|
|
45
47
|
static std::random_device rd; // possibly unsafe in MinGW with GCC < 9.2
|
|
46
|
-
static std::mt19937_64 rand(rd());
|
|
47
|
-
static std::uniform_real_distribution<> next_double(0.0, 1.0);
|
|
48
|
+
static thread_local std::mt19937_64 rand(rd());
|
|
49
|
+
static thread_local std::uniform_real_distribution<> next_double(0.0, 1.0);
|
|
48
50
|
}
|
|
49
51
|
|
|
50
52
|
|
|
@@ -77,7 +79,7 @@ static inline void read(std::istream& is, T* ptr, size_t size_bytes) {
|
|
|
77
79
|
}
|
|
78
80
|
|
|
79
81
|
template<typename T>
|
|
80
|
-
static inline void write(std::ostream& os, T
|
|
82
|
+
static inline void write(std::ostream& os, T value) {
|
|
81
83
|
os.write(reinterpret_cast<const char*>(&value), sizeof(T));
|
|
82
84
|
}
|
|
83
85
|
|
|
@@ -91,6 +91,17 @@ static inline uint8_t count_leading_zeros_in_u64(uint64_t input) {
|
|
|
91
91
|
return 56 + byte_leading_zeros_table[(input ) & FCLZ_MASK_08];
|
|
92
92
|
}
|
|
93
93
|
|
|
94
|
+
static inline uint8_t count_leading_zeros_in_u32(uint32_t input) {
|
|
95
|
+
if (input > FCLZ_MASK_24)
|
|
96
|
+
return byte_leading_zeros_table[(input >> 24) & FCLZ_MASK_08];
|
|
97
|
+
if (input > FCLZ_MASK_16)
|
|
98
|
+
return 8 + byte_leading_zeros_table[(input >> 16) & FCLZ_MASK_08];
|
|
99
|
+
if (input > FCLZ_MASK_08)
|
|
100
|
+
return 16 + byte_leading_zeros_table[(input >> 8) & FCLZ_MASK_08];
|
|
101
|
+
if (true)
|
|
102
|
+
return 24 + byte_leading_zeros_table[(input ) & FCLZ_MASK_08];
|
|
103
|
+
}
|
|
104
|
+
|
|
94
105
|
static inline uint8_t count_trailing_zeros_in_u32(uint32_t input) {
|
|
95
106
|
for (int i = 0; i < 4; i++) {
|
|
96
107
|
const int byte = input & 0xff;
|
|
@@ -24,6 +24,7 @@
|
|
|
24
24
|
#include <exception>
|
|
25
25
|
#include <iostream>
|
|
26
26
|
#include <string>
|
|
27
|
+
#include <cstring>
|
|
27
28
|
|
|
28
29
|
namespace datasketches {
|
|
29
30
|
|
|
@@ -54,14 +55,14 @@ static inline size_t copy_to_mem(const void* src, void* dst, size_t size) {
|
|
|
54
55
|
}
|
|
55
56
|
|
|
56
57
|
template<typename T>
|
|
57
|
-
static inline size_t
|
|
58
|
-
memcpy(
|
|
58
|
+
static inline size_t copy_from_mem(const void* src, T& item) {
|
|
59
|
+
memcpy(&item, src, sizeof(T));
|
|
59
60
|
return sizeof(T);
|
|
60
61
|
}
|
|
61
62
|
|
|
62
63
|
template<typename T>
|
|
63
|
-
static inline size_t
|
|
64
|
-
memcpy(&item,
|
|
64
|
+
static inline size_t copy_to_mem(T item, void* dst) {
|
|
65
|
+
memcpy(dst, &item, sizeof(T));
|
|
65
66
|
return sizeof(T);
|
|
66
67
|
}
|
|
67
68
|
|
|
@@ -74,7 +74,7 @@ target_sources(common_test
|
|
|
74
74
|
# now the integration test part
|
|
75
75
|
add_executable(integration_test)
|
|
76
76
|
|
|
77
|
-
target_link_libraries(integration_test cpc fi hll kll req sampling theta tuple common_test_lib)
|
|
77
|
+
target_link_libraries(integration_test count cpc density fi hll kll req sampling theta tuple common_test_lib)
|
|
78
78
|
|
|
79
79
|
set_target_properties(integration_test PROPERTIES
|
|
80
80
|
CXX_STANDARD 11
|
|
@@ -19,8 +19,10 @@
|
|
|
19
19
|
|
|
20
20
|
#include <catch2/catch.hpp>
|
|
21
21
|
|
|
22
|
+
#include "count_min.hpp"
|
|
22
23
|
#include "cpc_sketch.hpp"
|
|
23
24
|
#include "cpc_union.hpp"
|
|
25
|
+
#include "density_sketch.hpp"
|
|
24
26
|
#include "frequent_items_sketch.hpp"
|
|
25
27
|
#include "hll.hpp"
|
|
26
28
|
#include "kll_sketch.hpp"
|
|
@@ -48,9 +50,13 @@ struct subtracting_intersection_policy {
|
|
|
48
50
|
using tuple_intersection_float = tuple_intersection<float, subtracting_intersection_policy<float>>;
|
|
49
51
|
|
|
50
52
|
TEST_CASE("integration: declare all sketches", "[integration]") {
|
|
53
|
+
count_min_sketch<double> cm(5, 128);
|
|
54
|
+
|
|
51
55
|
cpc_sketch cpc(12);
|
|
52
56
|
cpc_union cpc_u(12);
|
|
53
57
|
|
|
58
|
+
density_sketch<double> ds(32, 3);
|
|
59
|
+
|
|
54
60
|
frequent_items_sketch<std::string> fi(100);
|
|
55
61
|
|
|
56
62
|
hll_sketch hll(13);
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
# Licensed to the Apache Software Foundation (ASF) under one
|
|
2
|
+
# or more contributor license agreements. See the NOTICE file
|
|
3
|
+
# distributed with this work for additional information
|
|
4
|
+
# regarding copyright ownership. The ASF licenses this file
|
|
5
|
+
# to you under the Apache License, Version 2.0 (the
|
|
6
|
+
# "License"); you may not use this file except in compliance
|
|
7
|
+
# with the License. You may obtain a copy of the License at
|
|
8
|
+
#
|
|
9
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
10
|
+
#
|
|
11
|
+
# Unless required by applicable law or agreed to in writing,
|
|
12
|
+
# software distributed under the License is distributed on an
|
|
13
|
+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
14
|
+
# KIND, either express or implied. See the License for the
|
|
15
|
+
# specific language governing permissions and limitations
|
|
16
|
+
# under the License.
|
|
17
|
+
|
|
18
|
+
add_library(count INTERFACE)
|
|
19
|
+
|
|
20
|
+
add_library(${PROJECT_NAME}::COUNT ALIAS count)
|
|
21
|
+
|
|
22
|
+
if (BUILD_TESTS)
|
|
23
|
+
add_subdirectory(test)
|
|
24
|
+
endif()
|
|
25
|
+
|
|
26
|
+
target_include_directories(count
|
|
27
|
+
INTERFACE
|
|
28
|
+
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>
|
|
29
|
+
$<INSTALL_INTERFACE:$<INSTALL_PREFIX>/include>
|
|
30
|
+
)
|
|
31
|
+
|
|
32
|
+
target_link_libraries(count INTERFACE common)
|
|
33
|
+
target_compile_features(count INTERFACE cxx_std_11)
|
|
34
|
+
|
|
35
|
+
install(TARGETS count
|
|
36
|
+
EXPORT ${PROJECT_NAME}
|
|
37
|
+
)
|
|
38
|
+
|
|
39
|
+
install(FILES
|
|
40
|
+
include/count_min.hpp
|
|
41
|
+
include/count_min_impl.hpp
|
|
42
|
+
DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/DataSketches")
|