datasketches 0.3.0 → 0.3.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +8 -0
- data/ext/datasketches/cpc_wrapper.cpp +1 -1
- data/lib/datasketches/version.rb +1 -1
- data/vendor/datasketches-cpp/CMakeLists.txt +22 -20
- data/vendor/datasketches-cpp/common/include/MurmurHash3.h +25 -27
- data/vendor/datasketches-cpp/common/include/common_defs.hpp +8 -6
- data/vendor/datasketches-cpp/common/include/count_zeros.hpp +11 -0
- data/vendor/datasketches-cpp/common/include/memory_operations.hpp +5 -4
- data/vendor/datasketches-cpp/common/test/CMakeLists.txt +1 -1
- data/vendor/datasketches-cpp/common/test/integration_test.cpp +6 -0
- data/vendor/datasketches-cpp/count/CMakeLists.txt +42 -0
- data/vendor/datasketches-cpp/count/include/count_min.hpp +351 -0
- data/vendor/datasketches-cpp/count/include/count_min_impl.hpp +517 -0
- data/vendor/datasketches-cpp/count/test/CMakeLists.txt +43 -0
- data/vendor/datasketches-cpp/count/test/count_min_allocation_test.cpp +155 -0
- data/vendor/datasketches-cpp/count/test/count_min_test.cpp +306 -0
- data/vendor/datasketches-cpp/cpc/include/cpc_compressor_impl.hpp +1 -1
- data/vendor/datasketches-cpp/cpc/include/cpc_confidence.hpp +3 -3
- data/vendor/datasketches-cpp/cpc/include/cpc_sketch_impl.hpp +1 -1
- data/vendor/datasketches-cpp/cpc/include/cpc_util.hpp +16 -8
- data/vendor/datasketches-cpp/density/CMakeLists.txt +42 -0
- data/vendor/datasketches-cpp/density/include/density_sketch.hpp +236 -0
- data/vendor/datasketches-cpp/density/include/density_sketch_impl.hpp +543 -0
- data/vendor/datasketches-cpp/density/test/CMakeLists.txt +35 -0
- data/vendor/datasketches-cpp/density/test/density_sketch_test.cpp +244 -0
- data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map.hpp +9 -3
- data/vendor/datasketches-cpp/hll/include/Hll4Array-internal.hpp +19 -11
- data/vendor/datasketches-cpp/hll/include/Hll4Array.hpp +2 -5
- data/vendor/datasketches-cpp/hll/include/Hll6Array-internal.hpp +19 -7
- data/vendor/datasketches-cpp/hll/include/Hll6Array.hpp +1 -1
- data/vendor/datasketches-cpp/hll/include/Hll8Array-internal.hpp +98 -42
- data/vendor/datasketches-cpp/hll/include/Hll8Array.hpp +2 -0
- data/vendor/datasketches-cpp/hll/include/HllArray-internal.hpp +92 -59
- data/vendor/datasketches-cpp/hll/include/HllArray.hpp +16 -6
- data/vendor/datasketches-cpp/hll/include/HllSketchImplFactory.hpp +3 -21
- data/vendor/datasketches-cpp/hll/include/HllUnion-internal.hpp +8 -0
- data/vendor/datasketches-cpp/hll/include/HllUtil.hpp +14 -6
- data/vendor/datasketches-cpp/hll/include/coupon_iterator-internal.hpp +1 -1
- data/vendor/datasketches-cpp/hll/include/coupon_iterator.hpp +8 -2
- data/vendor/datasketches-cpp/hll/include/hll.hpp +9 -8
- data/vendor/datasketches-cpp/hll/test/HllUnionTest.cpp +7 -1
- data/vendor/datasketches-cpp/kll/include/kll_helper.hpp +0 -1
- data/vendor/datasketches-cpp/kll/include/kll_sketch.hpp +8 -3
- data/vendor/datasketches-cpp/kll/include/kll_sketch_impl.hpp +2 -2
- data/vendor/datasketches-cpp/kll/test/kll_sketch_test.cpp +2 -2
- data/vendor/datasketches-cpp/python/CMakeLists.txt +6 -0
- data/vendor/datasketches-cpp/python/README.md +5 -5
- data/vendor/datasketches-cpp/python/datasketches/DensityWrapper.py +87 -0
- data/vendor/datasketches-cpp/python/datasketches/KernelFunction.py +35 -0
- data/vendor/datasketches-cpp/python/datasketches/PySerDe.py +15 -9
- data/vendor/datasketches-cpp/python/datasketches/TuplePolicy.py +77 -0
- data/vendor/datasketches-cpp/python/datasketches/TupleWrapper.py +205 -0
- data/vendor/datasketches-cpp/python/datasketches/__init__.py +17 -1
- data/vendor/datasketches-cpp/python/include/kernel_function.hpp +98 -0
- data/vendor/datasketches-cpp/python/include/py_object_lt.hpp +37 -0
- data/vendor/datasketches-cpp/python/include/py_object_ostream.hpp +48 -0
- data/vendor/datasketches-cpp/python/include/quantile_conditional.hpp +104 -0
- data/vendor/datasketches-cpp/python/include/tuple_policy.hpp +136 -0
- data/vendor/datasketches-cpp/python/src/count_wrapper.cpp +101 -0
- data/vendor/datasketches-cpp/python/src/cpc_wrapper.cpp +16 -30
- data/vendor/datasketches-cpp/python/src/datasketches.cpp +6 -0
- data/vendor/datasketches-cpp/python/src/density_wrapper.cpp +95 -0
- data/vendor/datasketches-cpp/python/src/fi_wrapper.cpp +127 -73
- data/vendor/datasketches-cpp/python/src/hll_wrapper.cpp +28 -36
- data/vendor/datasketches-cpp/python/src/kll_wrapper.cpp +108 -160
- data/vendor/datasketches-cpp/python/src/py_serde.cpp +5 -4
- data/vendor/datasketches-cpp/python/src/quantiles_wrapper.cpp +99 -148
- data/vendor/datasketches-cpp/python/src/req_wrapper.cpp +117 -178
- data/vendor/datasketches-cpp/python/src/theta_wrapper.cpp +67 -73
- data/vendor/datasketches-cpp/python/src/tuple_wrapper.cpp +215 -0
- data/vendor/datasketches-cpp/python/src/vector_of_kll.cpp +2 -2
- data/vendor/datasketches-cpp/python/src/vo_wrapper.cpp +1 -1
- data/vendor/datasketches-cpp/python/tests/count_min_test.py +86 -0
- data/vendor/datasketches-cpp/python/tests/cpc_test.py +10 -10
- data/vendor/datasketches-cpp/python/tests/density_test.py +93 -0
- data/vendor/datasketches-cpp/python/tests/fi_test.py +41 -2
- data/vendor/datasketches-cpp/python/tests/hll_test.py +19 -20
- data/vendor/datasketches-cpp/python/tests/kll_test.py +40 -6
- data/vendor/datasketches-cpp/python/tests/quantiles_test.py +39 -5
- data/vendor/datasketches-cpp/python/tests/req_test.py +38 -5
- data/vendor/datasketches-cpp/python/tests/theta_test.py +16 -14
- data/vendor/datasketches-cpp/python/tests/tuple_test.py +206 -0
- data/vendor/datasketches-cpp/python/tests/vo_test.py +7 -0
- data/vendor/datasketches-cpp/quantiles/include/quantiles_sketch.hpp +8 -3
- data/vendor/datasketches-cpp/quantiles/include/quantiles_sketch_impl.hpp +4 -4
- data/vendor/datasketches-cpp/quantiles/test/quantiles_sketch_test.cpp +1 -1
- data/vendor/datasketches-cpp/req/include/req_compactor_impl.hpp +0 -2
- data/vendor/datasketches-cpp/req/include/req_sketch.hpp +8 -3
- data/vendor/datasketches-cpp/req/include/req_sketch_impl.hpp +2 -2
- data/vendor/datasketches-cpp/sampling/include/var_opt_sketch.hpp +20 -6
- data/vendor/datasketches-cpp/sampling/include/var_opt_sketch_impl.hpp +30 -16
- data/vendor/datasketches-cpp/sampling/include/var_opt_union.hpp +5 -1
- data/vendor/datasketches-cpp/sampling/include/var_opt_union_impl.hpp +19 -15
- data/vendor/datasketches-cpp/sampling/test/var_opt_sketch_test.cpp +33 -14
- data/vendor/datasketches-cpp/sampling/test/var_opt_union_test.cpp +0 -2
- data/vendor/datasketches-cpp/setup.py +1 -1
- data/vendor/datasketches-cpp/theta/CMakeLists.txt +1 -0
- data/vendor/datasketches-cpp/theta/include/bit_packing.hpp +6279 -0
- data/vendor/datasketches-cpp/theta/include/compact_theta_sketch_parser.hpp +14 -8
- data/vendor/datasketches-cpp/theta/include/compact_theta_sketch_parser_impl.hpp +60 -46
- data/vendor/datasketches-cpp/theta/include/theta_helpers.hpp +4 -2
- data/vendor/datasketches-cpp/theta/include/theta_sketch.hpp +58 -10
- data/vendor/datasketches-cpp/theta/include/theta_sketch_impl.hpp +430 -130
- data/vendor/datasketches-cpp/theta/include/theta_union_base_impl.hpp +9 -9
- data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base.hpp +16 -4
- data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base_impl.hpp +2 -2
- data/vendor/datasketches-cpp/theta/test/CMakeLists.txt +1 -0
- data/vendor/datasketches-cpp/theta/test/bit_packing_test.cpp +80 -0
- data/vendor/datasketches-cpp/theta/test/theta_sketch_test.cpp +42 -3
- data/vendor/datasketches-cpp/theta/test/theta_union_test.cpp +25 -0
- data/vendor/datasketches-cpp/tuple/include/tuple_sketch_impl.hpp +2 -1
- data/vendor/datasketches-cpp/version.cfg.in +1 -1
- metadata +31 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: e0c6cc871dc7c8726b31146fe287dd337e6aa5c1c70a234d2ad77b5bed74aca2
|
4
|
+
data.tar.gz: c5e89d9d28069aec80863fa6e1da339457dcc04f938d3b3506a3dfbb66899eee
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 36459e338671867ae63de590a6807f801891a9821336178e64bca0d9b1615c263aac5160b4f1cf74796c5756bfee4a36335585db6354f0d2e7a77765236e9730
|
7
|
+
data.tar.gz: dff58e6888788ce030484c6027f9a9bfee42b0315a18f6a2b02695336be482a7a255576efe0b128b2dbee99f50fd5184c788dbe799815a5a8839143d2b3642f3
|
data/CHANGELOG.md
CHANGED
data/lib/datasketches/version.rb
CHANGED
@@ -34,7 +34,7 @@ include(CMakeDependentOption)
|
|
34
34
|
### Require out-of-source builds
|
35
35
|
file(TO_CMAKE_PATH "${PROJECT_BINARY_DIR}/CMakeLists.txt" LOC_PATH)
|
36
36
|
if(EXISTS "${LOC_PATH}")
|
37
|
-
|
37
|
+
message(FATAL_ERROR "You cannot build in a source directory (or any directory with a CMakeLists.txt file). Please make a build subdirectory. Feel free to remove CMakeCache.txt and CMakeFiles.")
|
38
38
|
endif()
|
39
39
|
|
40
40
|
# Ensure builds on Windows export all symbols
|
@@ -69,7 +69,7 @@ set(CMAKE_CXX_EXTENSIONS OFF)
|
|
69
69
|
###### OPTIONS ######
|
70
70
|
# Enable testing
|
71
71
|
option(BUILD_TESTS "Build unit tests" ON)
|
72
|
-
if (BUILD_TESTS)
|
72
|
+
if (BUILD_TESTS)
|
73
73
|
enable_testing()
|
74
74
|
endif()
|
75
75
|
|
@@ -93,10 +93,10 @@ set(default_build_type "Release")
|
|
93
93
|
if(NOT CMAKE_BUILD_TYPE AND NOT CMAKE_CONFIGURATION_TYPES)
|
94
94
|
message(STATUS "Setting build type to '${default_build_type}' as none was specified.")
|
95
95
|
set(CMAKE_BUILD_TYPE "${default_build_type}" CACHE
|
96
|
-
|
96
|
+
STRING "Choose the type of build." FORCE)
|
97
97
|
# Set the possible values of build type for cmake-gui
|
98
98
|
set_property(CACHE CMAKE_BUILD_TYPE PROPERTY STRINGS
|
99
|
-
|
99
|
+
"Debug" "Release" "MinSizeRel" "RelWithDebInfo")
|
100
100
|
endif()
|
101
101
|
|
102
102
|
###### TARGETS ######
|
@@ -115,47 +115,49 @@ add_subdirectory(sampling)
|
|
115
115
|
add_subdirectory(tuple)
|
116
116
|
add_subdirectory(req)
|
117
117
|
add_subdirectory(quantiles)
|
118
|
+
add_subdirectory(count)
|
119
|
+
add_subdirectory(density)
|
118
120
|
|
119
121
|
if (WITH_PYTHON)
|
120
122
|
add_subdirectory(python)
|
121
123
|
endif()
|
122
124
|
|
123
|
-
target_link_libraries(datasketches INTERFACE hll cpc kll fi theta sampling req quantiles)
|
125
|
+
target_link_libraries(datasketches INTERFACE hll cpc kll fi theta sampling req quantiles count)
|
124
126
|
|
125
127
|
if (COVERAGE)
|
126
128
|
find_program(LCOV_PATH NAMES "lcov")
|
127
129
|
find_program(GENHTML_PATH NAMES "genhtml")
|
128
130
|
if (NOT LCOV_PATH-NOTFOUND AND NOT GENHTML_PATH-NOTFOUND)
|
129
131
|
add_custom_target(coverage_report
|
130
|
-
|
131
|
-
|
132
|
-
|
132
|
+
COMMAND ${LCOV_PATH} --capture --exclude '*/test/*' --exclude '/Library/*' --exclude '/usr/include/*' --directory . --output-file lcov.info
|
133
|
+
COMMAND ${GENHTML_PATH} --legend lcov.info --output-directory coverage --demangle-cpp)
|
134
|
+
endif()
|
133
135
|
endif()
|
134
136
|
|
135
137
|
|
136
138
|
# # Installation
|
137
139
|
install(TARGETS datasketches
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
)
|
140
|
+
EXPORT ${PROJECT_NAME}
|
141
|
+
PUBLIC_HEADER DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/DataSketches
|
142
|
+
INCLUDES DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/DataSketches
|
143
|
+
)
|
142
144
|
|
143
145
|
# Packaging
|
144
146
|
include(CMakePackageConfigHelpers)
|
145
147
|
write_basic_package_version_file(
|
146
|
-
|
147
|
-
|
148
|
-
|
148
|
+
"${CMAKE_CURRENT_BINARY_DIR}/DataSketchesConfigVersion.cmake"
|
149
|
+
VERSION ${PROJECT_VERSION}
|
150
|
+
COMPATIBILITY SameMajorVersion
|
149
151
|
)
|
150
152
|
configure_package_config_file(
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
153
|
+
cmake/DataSketchesConfig.cmake.in
|
154
|
+
"${CMAKE_CURRENT_BINARY_DIR}/DataSketchesConfig.cmake"
|
155
|
+
INSTALL_DESTINATION lib/DataSketches/cmake
|
156
|
+
PATH_VARS CMAKE_INSTALL_INCLUDEDIR
|
155
157
|
)
|
156
158
|
install(EXPORT ${PROJECT_NAME} DESTINATION lib/DataSketches/cmake)
|
157
159
|
install(FILES "${CMAKE_CURRENT_BINARY_DIR}/DataSketchesConfigVersion.cmake"
|
158
|
-
|
160
|
+
"${CMAKE_CURRENT_BINARY_DIR}/DataSketchesConfig.cmake"
|
159
161
|
DESTINATION lib/DataSketches/cmake)
|
160
162
|
|
161
163
|
|
@@ -29,14 +29,13 @@ typedef unsigned char uint8_t;
|
|
29
29
|
typedef unsigned int uint32_t;
|
30
30
|
typedef unsigned __int64 uint64_t;
|
31
31
|
|
32
|
-
#define
|
32
|
+
#define MURMUR3_FORCE_INLINE __forceinline
|
33
33
|
|
34
34
|
#include <stdlib.h>
|
35
35
|
|
36
|
-
#define
|
37
|
-
#define ROTL64(x,y) _rotl64(x,y)
|
36
|
+
#define MURMUR3_ROTL64(x,y) _rotl64(x,y)
|
38
37
|
|
39
|
-
#define
|
38
|
+
#define MURMUR3_BIG_CONSTANT(x) (x)
|
40
39
|
|
41
40
|
// Other compilers
|
42
41
|
|
@@ -44,22 +43,16 @@ typedef unsigned __int64 uint64_t;
|
|
44
43
|
|
45
44
|
#include <stdint.h>
|
46
45
|
|
47
|
-
#define
|
48
|
-
|
49
|
-
inline uint32_t rotl32 ( uint32_t x, int8_t r )
|
50
|
-
{
|
51
|
-
return (x << r) | (x >> (32 - r));
|
52
|
-
}
|
46
|
+
#define MURMUR3_FORCE_INLINE inline __attribute__((always_inline))
|
53
47
|
|
54
48
|
inline uint64_t rotl64 ( uint64_t x, int8_t r )
|
55
49
|
{
|
56
50
|
return (x << r) | (x >> (64 - r));
|
57
51
|
}
|
58
52
|
|
59
|
-
#define
|
60
|
-
#define ROTL64(x,y) rotl64(x,y)
|
53
|
+
#define MURMUR3_ROTL64(x,y) rotl64(x,y)
|
61
54
|
|
62
|
-
#define
|
55
|
+
#define MURMUR3_BIG_CONSTANT(x) (x##LLU)
|
63
56
|
|
64
57
|
#endif // !defined(_MSC_VER)
|
65
58
|
|
@@ -78,7 +71,7 @@ typedef struct {
|
|
78
71
|
// Block read - if your platform needs to do endian-swapping or can only
|
79
72
|
// handle aligned reads, do the conversion here
|
80
73
|
|
81
|
-
|
74
|
+
MURMUR3_FORCE_INLINE uint64_t getblock64 ( const uint64_t * p, size_t i )
|
82
75
|
{
|
83
76
|
uint64_t res;
|
84
77
|
memcpy(&res, p + i, sizeof(res));
|
@@ -88,20 +81,21 @@ FORCE_INLINE uint64_t getblock64 ( const uint64_t * p, size_t i )
|
|
88
81
|
//-----------------------------------------------------------------------------
|
89
82
|
// Finalization mix - force all bits of a hash block to avalanche
|
90
83
|
|
91
|
-
|
84
|
+
MURMUR3_FORCE_INLINE uint64_t fmix64 ( uint64_t k )
|
92
85
|
{
|
93
86
|
k ^= k >> 33;
|
94
|
-
k *=
|
87
|
+
k *= MURMUR3_BIG_CONSTANT(0xff51afd7ed558ccd);
|
95
88
|
k ^= k >> 33;
|
96
|
-
k *=
|
89
|
+
k *= MURMUR3_BIG_CONSTANT(0xc4ceb9fe1a85ec53);
|
97
90
|
k ^= k >> 33;
|
98
91
|
|
99
92
|
return k;
|
100
93
|
}
|
101
94
|
|
102
|
-
|
103
|
-
|
104
|
-
static const uint64_t
|
95
|
+
MURMUR3_FORCE_INLINE void MurmurHash3_x64_128(const void* key, size_t lenBytes,
|
96
|
+
uint64_t seed, HashState& out) {
|
97
|
+
static const uint64_t c1 = MURMUR3_BIG_CONSTANT(0x87c37b91114253d5);
|
98
|
+
static const uint64_t c2 = MURMUR3_BIG_CONSTANT(0x4cf5ad432745937f);
|
105
99
|
|
106
100
|
const uint8_t* data = (const uint8_t*)key;
|
107
101
|
|
@@ -118,13 +112,13 @@ FORCE_INLINE void MurmurHash3_x64_128(const void* key, size_t lenBytes, uint64_t
|
|
118
112
|
uint64_t k1 = getblock64(blocks, i * 2 + 0);
|
119
113
|
uint64_t k2 = getblock64(blocks, i * 2 + 1);
|
120
114
|
|
121
|
-
k1 *= c1; k1 =
|
122
|
-
out.h1 =
|
115
|
+
k1 *= c1; k1 = MURMUR3_ROTL64(k1,31); k1 *= c2; out.h1 ^= k1;
|
116
|
+
out.h1 = MURMUR3_ROTL64(out.h1,27);
|
123
117
|
out.h1 += out.h2;
|
124
118
|
out.h1 = out.h1*5+0x52dce729;
|
125
119
|
|
126
|
-
k2 *= c2; k2 =
|
127
|
-
out.h2 =
|
120
|
+
k2 *= c2; k2 = MURMUR3_ROTL64(k2,33); k2 *= c1; out.h2 ^= k2;
|
121
|
+
out.h2 = MURMUR3_ROTL64(out.h2,31);
|
128
122
|
out.h2 += out.h1;
|
129
123
|
out.h2 = out.h2*5+0x38495ab5;
|
130
124
|
}
|
@@ -144,7 +138,7 @@ FORCE_INLINE void MurmurHash3_x64_128(const void* key, size_t lenBytes, uint64_t
|
|
144
138
|
case 11: k2 ^= ((uint64_t)tail[10]) << 16; // falls through
|
145
139
|
case 10: k2 ^= ((uint64_t)tail[ 9]) << 8; // falls through
|
146
140
|
case 9: k2 ^= ((uint64_t)tail[ 8]) << 0;
|
147
|
-
k2 *= c2; k2 =
|
141
|
+
k2 *= c2; k2 = MURMUR3_ROTL64(k2,33); k2 *= c1; out.h2 ^= k2;
|
148
142
|
// falls through
|
149
143
|
case 8: k1 ^= ((uint64_t)tail[ 7]) << 56; // falls through
|
150
144
|
case 7: k1 ^= ((uint64_t)tail[ 6]) << 48; // falls through
|
@@ -154,7 +148,7 @@ FORCE_INLINE void MurmurHash3_x64_128(const void* key, size_t lenBytes, uint64_t
|
|
154
148
|
case 3: k1 ^= ((uint64_t)tail[ 2]) << 16; // falls through
|
155
149
|
case 2: k1 ^= ((uint64_t)tail[ 1]) << 8; // falls through
|
156
150
|
case 1: k1 ^= ((uint64_t)tail[ 0]) << 0;
|
157
|
-
k1 *= c1; k1 =
|
151
|
+
k1 *= c1; k1 = MURMUR3_ROTL64(k1,31); k1 *= c2; out.h1 ^= k1;
|
158
152
|
};
|
159
153
|
|
160
154
|
//----------
|
@@ -175,10 +169,14 @@ FORCE_INLINE void MurmurHash3_x64_128(const void* key, size_t lenBytes, uint64_t
|
|
175
169
|
|
176
170
|
//-----------------------------------------------------------------------------
|
177
171
|
|
178
|
-
|
172
|
+
MURMUR3_FORCE_INLINE uint16_t compute_seed_hash(uint64_t seed) {
|
179
173
|
HashState hashes;
|
180
174
|
MurmurHash3_x64_128(&seed, sizeof(seed), 0, hashes);
|
181
175
|
return static_cast<uint16_t>(hashes.h1 & 0xffff);
|
182
176
|
}
|
183
177
|
|
178
|
+
#undef MURMUR3_FORCE_INLINE
|
179
|
+
#undef MURMUR3_ROTL64
|
180
|
+
#undef MURMUR3_BIG_CONSTANT
|
181
|
+
|
184
182
|
#endif // _MURMURHASH3_H_
|
@@ -26,6 +26,7 @@
|
|
26
26
|
#include <iostream>
|
27
27
|
#include <random>
|
28
28
|
#include <chrono>
|
29
|
+
#include <thread>
|
29
30
|
|
30
31
|
namespace datasketches {
|
31
32
|
|
@@ -36,15 +37,16 @@ enum resize_factor { X1 = 0, X2, X4, X8 };
|
|
36
37
|
template<typename A> using AllocChar = typename std::allocator_traits<A>::template rebind_alloc<char>;
|
37
38
|
template<typename A> using string = std::basic_string<char, std::char_traits<char>, AllocChar<A>>;
|
38
39
|
|
39
|
-
// random bit
|
40
|
-
static std::independent_bits_engine<std::mt19937, 1, uint32_t>
|
41
|
-
random_bit(static_cast<uint32_t>(std::chrono::system_clock::now().time_since_epoch().count()
|
40
|
+
// thread-safe random bit
|
41
|
+
static thread_local std::independent_bits_engine<std::mt19937, 1, uint32_t>
|
42
|
+
random_bit(static_cast<uint32_t>(std::chrono::system_clock::now().time_since_epoch().count()
|
43
|
+
+ std::hash<std::thread::id>{}(std::this_thread::get_id())));
|
42
44
|
|
43
45
|
// common random declarations
|
44
46
|
namespace random_utils {
|
45
47
|
static std::random_device rd; // possibly unsafe in MinGW with GCC < 9.2
|
46
|
-
static std::mt19937_64 rand(rd());
|
47
|
-
static std::uniform_real_distribution<> next_double(0.0, 1.0);
|
48
|
+
static thread_local std::mt19937_64 rand(rd());
|
49
|
+
static thread_local std::uniform_real_distribution<> next_double(0.0, 1.0);
|
48
50
|
}
|
49
51
|
|
50
52
|
|
@@ -77,7 +79,7 @@ static inline void read(std::istream& is, T* ptr, size_t size_bytes) {
|
|
77
79
|
}
|
78
80
|
|
79
81
|
template<typename T>
|
80
|
-
static inline void write(std::ostream& os, T
|
82
|
+
static inline void write(std::ostream& os, T value) {
|
81
83
|
os.write(reinterpret_cast<const char*>(&value), sizeof(T));
|
82
84
|
}
|
83
85
|
|
@@ -91,6 +91,17 @@ static inline uint8_t count_leading_zeros_in_u64(uint64_t input) {
|
|
91
91
|
return 56 + byte_leading_zeros_table[(input ) & FCLZ_MASK_08];
|
92
92
|
}
|
93
93
|
|
94
|
+
static inline uint8_t count_leading_zeros_in_u32(uint32_t input) {
|
95
|
+
if (input > FCLZ_MASK_24)
|
96
|
+
return byte_leading_zeros_table[(input >> 24) & FCLZ_MASK_08];
|
97
|
+
if (input > FCLZ_MASK_16)
|
98
|
+
return 8 + byte_leading_zeros_table[(input >> 16) & FCLZ_MASK_08];
|
99
|
+
if (input > FCLZ_MASK_08)
|
100
|
+
return 16 + byte_leading_zeros_table[(input >> 8) & FCLZ_MASK_08];
|
101
|
+
if (true)
|
102
|
+
return 24 + byte_leading_zeros_table[(input ) & FCLZ_MASK_08];
|
103
|
+
}
|
104
|
+
|
94
105
|
static inline uint8_t count_trailing_zeros_in_u32(uint32_t input) {
|
95
106
|
for (int i = 0; i < 4; i++) {
|
96
107
|
const int byte = input & 0xff;
|
@@ -24,6 +24,7 @@
|
|
24
24
|
#include <exception>
|
25
25
|
#include <iostream>
|
26
26
|
#include <string>
|
27
|
+
#include <cstring>
|
27
28
|
|
28
29
|
namespace datasketches {
|
29
30
|
|
@@ -54,14 +55,14 @@ static inline size_t copy_to_mem(const void* src, void* dst, size_t size) {
|
|
54
55
|
}
|
55
56
|
|
56
57
|
template<typename T>
|
57
|
-
static inline size_t
|
58
|
-
memcpy(
|
58
|
+
static inline size_t copy_from_mem(const void* src, T& item) {
|
59
|
+
memcpy(&item, src, sizeof(T));
|
59
60
|
return sizeof(T);
|
60
61
|
}
|
61
62
|
|
62
63
|
template<typename T>
|
63
|
-
static inline size_t
|
64
|
-
memcpy(&item,
|
64
|
+
static inline size_t copy_to_mem(T item, void* dst) {
|
65
|
+
memcpy(dst, &item, sizeof(T));
|
65
66
|
return sizeof(T);
|
66
67
|
}
|
67
68
|
|
@@ -74,7 +74,7 @@ target_sources(common_test
|
|
74
74
|
# now the integration test part
|
75
75
|
add_executable(integration_test)
|
76
76
|
|
77
|
-
target_link_libraries(integration_test cpc fi hll kll req sampling theta tuple common_test_lib)
|
77
|
+
target_link_libraries(integration_test count cpc density fi hll kll req sampling theta tuple common_test_lib)
|
78
78
|
|
79
79
|
set_target_properties(integration_test PROPERTIES
|
80
80
|
CXX_STANDARD 11
|
@@ -19,8 +19,10 @@
|
|
19
19
|
|
20
20
|
#include <catch2/catch.hpp>
|
21
21
|
|
22
|
+
#include "count_min.hpp"
|
22
23
|
#include "cpc_sketch.hpp"
|
23
24
|
#include "cpc_union.hpp"
|
25
|
+
#include "density_sketch.hpp"
|
24
26
|
#include "frequent_items_sketch.hpp"
|
25
27
|
#include "hll.hpp"
|
26
28
|
#include "kll_sketch.hpp"
|
@@ -48,9 +50,13 @@ struct subtracting_intersection_policy {
|
|
48
50
|
using tuple_intersection_float = tuple_intersection<float, subtracting_intersection_policy<float>>;
|
49
51
|
|
50
52
|
TEST_CASE("integration: declare all sketches", "[integration]") {
|
53
|
+
count_min_sketch<double> cm(5, 128);
|
54
|
+
|
51
55
|
cpc_sketch cpc(12);
|
52
56
|
cpc_union cpc_u(12);
|
53
57
|
|
58
|
+
density_sketch<double> ds(32, 3);
|
59
|
+
|
54
60
|
frequent_items_sketch<std::string> fi(100);
|
55
61
|
|
56
62
|
hll_sketch hll(13);
|
@@ -0,0 +1,42 @@
|
|
1
|
+
# Licensed to the Apache Software Foundation (ASF) under one
|
2
|
+
# or more contributor license agreements. See the NOTICE file
|
3
|
+
# distributed with this work for additional information
|
4
|
+
# regarding copyright ownership. The ASF licenses this file
|
5
|
+
# to you under the Apache License, Version 2.0 (the
|
6
|
+
# "License"); you may not use this file except in compliance
|
7
|
+
# with the License. You may obtain a copy of the License at
|
8
|
+
#
|
9
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
10
|
+
#
|
11
|
+
# Unless required by applicable law or agreed to in writing,
|
12
|
+
# software distributed under the License is distributed on an
|
13
|
+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
14
|
+
# KIND, either express or implied. See the License for the
|
15
|
+
# specific language governing permissions and limitations
|
16
|
+
# under the License.
|
17
|
+
|
18
|
+
add_library(count INTERFACE)
|
19
|
+
|
20
|
+
add_library(${PROJECT_NAME}::COUNT ALIAS count)
|
21
|
+
|
22
|
+
if (BUILD_TESTS)
|
23
|
+
add_subdirectory(test)
|
24
|
+
endif()
|
25
|
+
|
26
|
+
target_include_directories(count
|
27
|
+
INTERFACE
|
28
|
+
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>
|
29
|
+
$<INSTALL_INTERFACE:$<INSTALL_PREFIX>/include>
|
30
|
+
)
|
31
|
+
|
32
|
+
target_link_libraries(count INTERFACE common)
|
33
|
+
target_compile_features(count INTERFACE cxx_std_11)
|
34
|
+
|
35
|
+
install(TARGETS count
|
36
|
+
EXPORT ${PROJECT_NAME}
|
37
|
+
)
|
38
|
+
|
39
|
+
install(FILES
|
40
|
+
include/count_min.hpp
|
41
|
+
include/count_min_impl.hpp
|
42
|
+
DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/DataSketches")
|