datasketches 0.3.1 → 0.3.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (113) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +4 -0
  3. data/ext/datasketches/cpc_wrapper.cpp +1 -1
  4. data/lib/datasketches/version.rb +1 -1
  5. data/vendor/datasketches-cpp/CMakeLists.txt +22 -20
  6. data/vendor/datasketches-cpp/NOTICE +1 -1
  7. data/vendor/datasketches-cpp/common/include/MurmurHash3.h +25 -27
  8. data/vendor/datasketches-cpp/common/include/common_defs.hpp +8 -6
  9. data/vendor/datasketches-cpp/common/include/count_zeros.hpp +11 -0
  10. data/vendor/datasketches-cpp/common/include/memory_operations.hpp +5 -4
  11. data/vendor/datasketches-cpp/common/test/CMakeLists.txt +1 -1
  12. data/vendor/datasketches-cpp/common/test/integration_test.cpp +6 -0
  13. data/vendor/datasketches-cpp/count/CMakeLists.txt +42 -0
  14. data/vendor/datasketches-cpp/count/include/count_min.hpp +351 -0
  15. data/vendor/datasketches-cpp/count/include/count_min_impl.hpp +517 -0
  16. data/vendor/datasketches-cpp/count/test/CMakeLists.txt +43 -0
  17. data/vendor/datasketches-cpp/count/test/count_min_allocation_test.cpp +155 -0
  18. data/vendor/datasketches-cpp/count/test/count_min_test.cpp +306 -0
  19. data/vendor/datasketches-cpp/cpc/include/cpc_confidence.hpp +3 -3
  20. data/vendor/datasketches-cpp/cpc/include/cpc_sketch_impl.hpp +1 -1
  21. data/vendor/datasketches-cpp/cpc/include/cpc_util.hpp +16 -8
  22. data/vendor/datasketches-cpp/density/CMakeLists.txt +42 -0
  23. data/vendor/datasketches-cpp/density/include/density_sketch.hpp +236 -0
  24. data/vendor/datasketches-cpp/density/include/density_sketch_impl.hpp +543 -0
  25. data/vendor/datasketches-cpp/density/test/CMakeLists.txt +35 -0
  26. data/vendor/datasketches-cpp/density/test/density_sketch_test.cpp +244 -0
  27. data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map.hpp +9 -3
  28. data/vendor/datasketches-cpp/hll/include/Hll4Array-internal.hpp +19 -11
  29. data/vendor/datasketches-cpp/hll/include/Hll4Array.hpp +2 -5
  30. data/vendor/datasketches-cpp/hll/include/Hll6Array-internal.hpp +19 -7
  31. data/vendor/datasketches-cpp/hll/include/Hll6Array.hpp +1 -1
  32. data/vendor/datasketches-cpp/hll/include/Hll8Array-internal.hpp +98 -42
  33. data/vendor/datasketches-cpp/hll/include/Hll8Array.hpp +2 -0
  34. data/vendor/datasketches-cpp/hll/include/HllArray-internal.hpp +92 -59
  35. data/vendor/datasketches-cpp/hll/include/HllArray.hpp +16 -6
  36. data/vendor/datasketches-cpp/hll/include/HllSketchImplFactory.hpp +3 -21
  37. data/vendor/datasketches-cpp/hll/include/HllUnion-internal.hpp +8 -0
  38. data/vendor/datasketches-cpp/hll/include/HllUtil.hpp +14 -6
  39. data/vendor/datasketches-cpp/hll/include/coupon_iterator-internal.hpp +1 -1
  40. data/vendor/datasketches-cpp/hll/include/coupon_iterator.hpp +8 -2
  41. data/vendor/datasketches-cpp/hll/include/hll.hpp +9 -8
  42. data/vendor/datasketches-cpp/hll/test/HllUnionTest.cpp +7 -1
  43. data/vendor/datasketches-cpp/kll/include/kll_helper.hpp +0 -1
  44. data/vendor/datasketches-cpp/kll/include/kll_sketch.hpp +8 -3
  45. data/vendor/datasketches-cpp/kll/include/kll_sketch_impl.hpp +2 -2
  46. data/vendor/datasketches-cpp/kll/test/kll_sketch_test.cpp +2 -2
  47. data/vendor/datasketches-cpp/python/CMakeLists.txt +6 -0
  48. data/vendor/datasketches-cpp/python/README.md +5 -5
  49. data/vendor/datasketches-cpp/python/datasketches/DensityWrapper.py +87 -0
  50. data/vendor/datasketches-cpp/python/datasketches/KernelFunction.py +35 -0
  51. data/vendor/datasketches-cpp/python/datasketches/PySerDe.py +15 -9
  52. data/vendor/datasketches-cpp/python/datasketches/TuplePolicy.py +77 -0
  53. data/vendor/datasketches-cpp/python/datasketches/TupleWrapper.py +205 -0
  54. data/vendor/datasketches-cpp/python/datasketches/__init__.py +17 -1
  55. data/vendor/datasketches-cpp/python/include/kernel_function.hpp +98 -0
  56. data/vendor/datasketches-cpp/python/include/py_object_lt.hpp +37 -0
  57. data/vendor/datasketches-cpp/python/include/py_object_ostream.hpp +48 -0
  58. data/vendor/datasketches-cpp/python/include/quantile_conditional.hpp +104 -0
  59. data/vendor/datasketches-cpp/python/include/tuple_policy.hpp +136 -0
  60. data/vendor/datasketches-cpp/python/src/count_wrapper.cpp +101 -0
  61. data/vendor/datasketches-cpp/python/src/cpc_wrapper.cpp +16 -30
  62. data/vendor/datasketches-cpp/python/src/datasketches.cpp +6 -0
  63. data/vendor/datasketches-cpp/python/src/density_wrapper.cpp +95 -0
  64. data/vendor/datasketches-cpp/python/src/fi_wrapper.cpp +127 -73
  65. data/vendor/datasketches-cpp/python/src/hll_wrapper.cpp +28 -36
  66. data/vendor/datasketches-cpp/python/src/kll_wrapper.cpp +108 -160
  67. data/vendor/datasketches-cpp/python/src/py_serde.cpp +5 -4
  68. data/vendor/datasketches-cpp/python/src/quantiles_wrapper.cpp +99 -148
  69. data/vendor/datasketches-cpp/python/src/req_wrapper.cpp +117 -178
  70. data/vendor/datasketches-cpp/python/src/theta_wrapper.cpp +67 -73
  71. data/vendor/datasketches-cpp/python/src/tuple_wrapper.cpp +215 -0
  72. data/vendor/datasketches-cpp/python/src/vo_wrapper.cpp +1 -1
  73. data/vendor/datasketches-cpp/python/tests/count_min_test.py +86 -0
  74. data/vendor/datasketches-cpp/python/tests/cpc_test.py +10 -10
  75. data/vendor/datasketches-cpp/python/tests/density_test.py +93 -0
  76. data/vendor/datasketches-cpp/python/tests/fi_test.py +41 -2
  77. data/vendor/datasketches-cpp/python/tests/hll_test.py +19 -20
  78. data/vendor/datasketches-cpp/python/tests/kll_test.py +40 -6
  79. data/vendor/datasketches-cpp/python/tests/quantiles_test.py +39 -5
  80. data/vendor/datasketches-cpp/python/tests/req_test.py +38 -5
  81. data/vendor/datasketches-cpp/python/tests/theta_test.py +16 -14
  82. data/vendor/datasketches-cpp/python/tests/tuple_test.py +206 -0
  83. data/vendor/datasketches-cpp/python/tests/vo_test.py +7 -0
  84. data/vendor/datasketches-cpp/quantiles/include/quantiles_sketch.hpp +8 -3
  85. data/vendor/datasketches-cpp/quantiles/include/quantiles_sketch_impl.hpp +4 -4
  86. data/vendor/datasketches-cpp/quantiles/test/quantiles_sketch_test.cpp +1 -1
  87. data/vendor/datasketches-cpp/req/include/req_compactor_impl.hpp +0 -2
  88. data/vendor/datasketches-cpp/req/include/req_sketch.hpp +8 -3
  89. data/vendor/datasketches-cpp/req/include/req_sketch_impl.hpp +2 -2
  90. data/vendor/datasketches-cpp/sampling/include/var_opt_sketch.hpp +20 -6
  91. data/vendor/datasketches-cpp/sampling/include/var_opt_sketch_impl.hpp +30 -16
  92. data/vendor/datasketches-cpp/sampling/include/var_opt_union.hpp +5 -1
  93. data/vendor/datasketches-cpp/sampling/include/var_opt_union_impl.hpp +19 -15
  94. data/vendor/datasketches-cpp/sampling/test/var_opt_sketch_test.cpp +33 -14
  95. data/vendor/datasketches-cpp/sampling/test/var_opt_union_test.cpp +0 -2
  96. data/vendor/datasketches-cpp/setup.py +1 -1
  97. data/vendor/datasketches-cpp/theta/CMakeLists.txt +1 -0
  98. data/vendor/datasketches-cpp/theta/include/bit_packing.hpp +6279 -0
  99. data/vendor/datasketches-cpp/theta/include/compact_theta_sketch_parser.hpp +14 -8
  100. data/vendor/datasketches-cpp/theta/include/compact_theta_sketch_parser_impl.hpp +60 -46
  101. data/vendor/datasketches-cpp/theta/include/theta_helpers.hpp +4 -2
  102. data/vendor/datasketches-cpp/theta/include/theta_sketch.hpp +58 -10
  103. data/vendor/datasketches-cpp/theta/include/theta_sketch_impl.hpp +430 -130
  104. data/vendor/datasketches-cpp/theta/include/theta_union_base_impl.hpp +9 -9
  105. data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base.hpp +16 -4
  106. data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base_impl.hpp +2 -2
  107. data/vendor/datasketches-cpp/theta/test/CMakeLists.txt +1 -0
  108. data/vendor/datasketches-cpp/theta/test/bit_packing_test.cpp +80 -0
  109. data/vendor/datasketches-cpp/theta/test/theta_sketch_test.cpp +42 -3
  110. data/vendor/datasketches-cpp/theta/test/theta_union_test.cpp +25 -0
  111. data/vendor/datasketches-cpp/tuple/include/tuple_sketch_impl.hpp +2 -1
  112. data/vendor/datasketches-cpp/version.cfg.in +1 -1
  113. metadata +31 -3
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 33b2bd41940eaa4000c00002a75f4c3e3d9096d7904381d19ccd8b206a85d67e
4
- data.tar.gz: 32eef85b031864b51f2968b4ea3bceaafceb1293126ddb0c00c86c85e97a41b7
3
+ metadata.gz: e0c6cc871dc7c8726b31146fe287dd337e6aa5c1c70a234d2ad77b5bed74aca2
4
+ data.tar.gz: c5e89d9d28069aec80863fa6e1da339457dcc04f938d3b3506a3dfbb66899eee
5
5
  SHA512:
6
- metadata.gz: d473c544c65b50995ab61b81dcd2c7d061432977b19f324d9d3601344e4049db83f04cd223ceb65fcfc2ccb0bea2c54b56e7335d050df13d5d096a5795f9af2e
7
- data.tar.gz: bfc6db62462ffbbf24217ea7c6f82d7a14986371dd2236a21e941cd8c9e03a769a9055db6589a1477e97252336535d6a26cddbcd9f0a061e35eb68799b99473b
6
+ metadata.gz: 36459e338671867ae63de590a6807f801891a9821336178e64bca0d9b1615c263aac5160b4f1cf74796c5756bfee4a36335585db6354f0d2e7a77765236e9730
7
+ data.tar.gz: dff58e6888788ce030484c6027f9a9bfee42b0315a18f6a2b02695336be482a7a255576efe0b128b2dbee99f50fd5184c788dbe799815a5a8839143d2b3642f3
data/CHANGELOG.md CHANGED
@@ -1,3 +1,7 @@
1
+ ## 0.3.2 (2023-05-03)
2
+
3
+ - Updated DataSketches to 4.1.0
4
+
1
5
  ## 0.3.1 (2023-01-31)
2
6
 
3
7
  - Updated DataSketches to 4.0.1
@@ -61,7 +61,7 @@ void init_cpc(Rice::Module& m) {
61
61
  .define_method("result", &cpc_union::get_result)
62
62
  .define_method(
63
63
  "update",
64
- [](cpc_union& self, cpc_sketch& sketch) {
64
+ [](cpc_union& self, const cpc_sketch& sketch) {
65
65
  self.update(sketch);
66
66
  });
67
67
  }
@@ -1,3 +1,3 @@
1
1
  module DataSketches
2
- VERSION = "0.3.1"
2
+ VERSION = "0.3.2"
3
3
  end
@@ -34,7 +34,7 @@ include(CMakeDependentOption)
34
34
  ### Require out-of-source builds
35
35
  file(TO_CMAKE_PATH "${PROJECT_BINARY_DIR}/CMakeLists.txt" LOC_PATH)
36
36
  if(EXISTS "${LOC_PATH}")
37
- message(FATAL_ERROR "You cannot build in a source directory (or any directory with a CMakeLists.txt file). Please make a build subdirectory. Feel free to remove CMakeCache.txt and CMakeFiles.")
37
+ message(FATAL_ERROR "You cannot build in a source directory (or any directory with a CMakeLists.txt file). Please make a build subdirectory. Feel free to remove CMakeCache.txt and CMakeFiles.")
38
38
  endif()
39
39
 
40
40
  # Ensure builds on Windows export all symbols
@@ -69,7 +69,7 @@ set(CMAKE_CXX_EXTENSIONS OFF)
69
69
  ###### OPTIONS ######
70
70
  # Enable testing
71
71
  option(BUILD_TESTS "Build unit tests" ON)
72
- if (BUILD_TESTS)
72
+ if (BUILD_TESTS)
73
73
  enable_testing()
74
74
  endif()
75
75
 
@@ -93,10 +93,10 @@ set(default_build_type "Release")
93
93
  if(NOT CMAKE_BUILD_TYPE AND NOT CMAKE_CONFIGURATION_TYPES)
94
94
  message(STATUS "Setting build type to '${default_build_type}' as none was specified.")
95
95
  set(CMAKE_BUILD_TYPE "${default_build_type}" CACHE
96
- STRING "Choose the type of build." FORCE)
96
+ STRING "Choose the type of build." FORCE)
97
97
  # Set the possible values of build type for cmake-gui
98
98
  set_property(CACHE CMAKE_BUILD_TYPE PROPERTY STRINGS
99
- "Debug" "Release" "MinSizeRel" "RelWithDebInfo")
99
+ "Debug" "Release" "MinSizeRel" "RelWithDebInfo")
100
100
  endif()
101
101
 
102
102
  ###### TARGETS ######
@@ -115,47 +115,49 @@ add_subdirectory(sampling)
115
115
  add_subdirectory(tuple)
116
116
  add_subdirectory(req)
117
117
  add_subdirectory(quantiles)
118
+ add_subdirectory(count)
119
+ add_subdirectory(density)
118
120
 
119
121
  if (WITH_PYTHON)
120
122
  add_subdirectory(python)
121
123
  endif()
122
124
 
123
- target_link_libraries(datasketches INTERFACE hll cpc kll fi theta sampling req quantiles)
125
+ target_link_libraries(datasketches INTERFACE hll cpc kll fi theta sampling req quantiles count)
124
126
 
125
127
  if (COVERAGE)
126
128
  find_program(LCOV_PATH NAMES "lcov")
127
129
  find_program(GENHTML_PATH NAMES "genhtml")
128
130
  if (NOT LCOV_PATH-NOTFOUND AND NOT GENHTML_PATH-NOTFOUND)
129
131
  add_custom_target(coverage_report
130
- COMMAND ${LCOV_PATH} --capture --exclude '*/test/*' --exclude '/Library/*' --exclude '/usr/include/*' --directory . --output-file lcov.info
131
- COMMAND ${GENHTML_PATH} --legend lcov.info --output-directory coverage --demangle-cpp)
132
- endif()
132
+ COMMAND ${LCOV_PATH} --capture --exclude '*/test/*' --exclude '/Library/*' --exclude '/usr/include/*' --directory . --output-file lcov.info
133
+ COMMAND ${GENHTML_PATH} --legend lcov.info --output-directory coverage --demangle-cpp)
134
+ endif()
133
135
  endif()
134
136
 
135
137
 
136
138
  # # Installation
137
139
  install(TARGETS datasketches
138
- EXPORT ${PROJECT_NAME}
139
- PUBLIC_HEADER DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/DataSketches
140
- INCLUDES DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/DataSketches
141
- )
140
+ EXPORT ${PROJECT_NAME}
141
+ PUBLIC_HEADER DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/DataSketches
142
+ INCLUDES DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/DataSketches
143
+ )
142
144
 
143
145
  # Packaging
144
146
  include(CMakePackageConfigHelpers)
145
147
  write_basic_package_version_file(
146
- "${CMAKE_CURRENT_BINARY_DIR}/DataSketchesConfigVersion.cmake"
147
- VERSION ${PROJECT_VERSION}
148
- COMPATIBILITY SameMajorVersion
148
+ "${CMAKE_CURRENT_BINARY_DIR}/DataSketchesConfigVersion.cmake"
149
+ VERSION ${PROJECT_VERSION}
150
+ COMPATIBILITY SameMajorVersion
149
151
  )
150
152
  configure_package_config_file(
151
- cmake/DataSketchesConfig.cmake.in
152
- "${CMAKE_CURRENT_BINARY_DIR}/DataSketchesConfig.cmake"
153
- INSTALL_DESTINATION lib/DataSketches/cmake
154
- PATH_VARS CMAKE_INSTALL_INCLUDEDIR
153
+ cmake/DataSketchesConfig.cmake.in
154
+ "${CMAKE_CURRENT_BINARY_DIR}/DataSketchesConfig.cmake"
155
+ INSTALL_DESTINATION lib/DataSketches/cmake
156
+ PATH_VARS CMAKE_INSTALL_INCLUDEDIR
155
157
  )
156
158
  install(EXPORT ${PROJECT_NAME} DESTINATION lib/DataSketches/cmake)
157
159
  install(FILES "${CMAKE_CURRENT_BINARY_DIR}/DataSketchesConfigVersion.cmake"
158
- "${CMAKE_CURRENT_BINARY_DIR}/DataSketchesConfig.cmake"
160
+ "${CMAKE_CURRENT_BINARY_DIR}/DataSketchesConfig.cmake"
159
161
  DESTINATION lib/DataSketches/cmake)
160
162
 
161
163
 
@@ -1,5 +1,5 @@
1
1
  Apache DataSketches C++ and Python
2
- Copyright 2023 The Apache Software Foundation
2
+ Copyright 2022 The Apache Software Foundation
3
3
 
4
4
  Copyright 2015-2018 Yahoo Inc.
5
5
  Copyright 2019-2020 Verizon Media
@@ -29,14 +29,13 @@ typedef unsigned char uint8_t;
29
29
  typedef unsigned int uint32_t;
30
30
  typedef unsigned __int64 uint64_t;
31
31
 
32
- #define FORCE_INLINE __forceinline
32
+ #define MURMUR3_FORCE_INLINE __forceinline
33
33
 
34
34
  #include <stdlib.h>
35
35
 
36
- #define ROTL32(x,y) _rotl(x,y)
37
- #define ROTL64(x,y) _rotl64(x,y)
36
+ #define MURMUR3_ROTL64(x,y) _rotl64(x,y)
38
37
 
39
- #define BIG_CONSTANT(x) (x)
38
+ #define MURMUR3_BIG_CONSTANT(x) (x)
40
39
 
41
40
  // Other compilers
42
41
 
@@ -44,22 +43,16 @@ typedef unsigned __int64 uint64_t;
44
43
 
45
44
  #include <stdint.h>
46
45
 
47
- #define FORCE_INLINE inline __attribute__((always_inline))
48
-
49
- inline uint32_t rotl32 ( uint32_t x, int8_t r )
50
- {
51
- return (x << r) | (x >> (32 - r));
52
- }
46
+ #define MURMUR3_FORCE_INLINE inline __attribute__((always_inline))
53
47
 
54
48
  inline uint64_t rotl64 ( uint64_t x, int8_t r )
55
49
  {
56
50
  return (x << r) | (x >> (64 - r));
57
51
  }
58
52
 
59
- #define ROTL32(x,y) rotl32(x,y)
60
- #define ROTL64(x,y) rotl64(x,y)
53
+ #define MURMUR3_ROTL64(x,y) rotl64(x,y)
61
54
 
62
- #define BIG_CONSTANT(x) (x##LLU)
55
+ #define MURMUR3_BIG_CONSTANT(x) (x##LLU)
63
56
 
64
57
  #endif // !defined(_MSC_VER)
65
58
 
@@ -78,7 +71,7 @@ typedef struct {
78
71
  // Block read - if your platform needs to do endian-swapping or can only
79
72
  // handle aligned reads, do the conversion here
80
73
 
81
- FORCE_INLINE uint64_t getblock64 ( const uint64_t * p, size_t i )
74
+ MURMUR3_FORCE_INLINE uint64_t getblock64 ( const uint64_t * p, size_t i )
82
75
  {
83
76
  uint64_t res;
84
77
  memcpy(&res, p + i, sizeof(res));
@@ -88,20 +81,21 @@ FORCE_INLINE uint64_t getblock64 ( const uint64_t * p, size_t i )
88
81
  //-----------------------------------------------------------------------------
89
82
  // Finalization mix - force all bits of a hash block to avalanche
90
83
 
91
- FORCE_INLINE uint64_t fmix64 ( uint64_t k )
84
+ MURMUR3_FORCE_INLINE uint64_t fmix64 ( uint64_t k )
92
85
  {
93
86
  k ^= k >> 33;
94
- k *= BIG_CONSTANT(0xff51afd7ed558ccd);
87
+ k *= MURMUR3_BIG_CONSTANT(0xff51afd7ed558ccd);
95
88
  k ^= k >> 33;
96
- k *= BIG_CONSTANT(0xc4ceb9fe1a85ec53);
89
+ k *= MURMUR3_BIG_CONSTANT(0xc4ceb9fe1a85ec53);
97
90
  k ^= k >> 33;
98
91
 
99
92
  return k;
100
93
  }
101
94
 
102
- FORCE_INLINE void MurmurHash3_x64_128(const void* key, size_t lenBytes, uint64_t seed, HashState& out) {
103
- static const uint64_t c1 = BIG_CONSTANT(0x87c37b91114253d5);
104
- static const uint64_t c2 = BIG_CONSTANT(0x4cf5ad432745937f);
95
+ MURMUR3_FORCE_INLINE void MurmurHash3_x64_128(const void* key, size_t lenBytes,
96
+ uint64_t seed, HashState& out) {
97
+ static const uint64_t c1 = MURMUR3_BIG_CONSTANT(0x87c37b91114253d5);
98
+ static const uint64_t c2 = MURMUR3_BIG_CONSTANT(0x4cf5ad432745937f);
105
99
 
106
100
  const uint8_t* data = (const uint8_t*)key;
107
101
 
@@ -118,13 +112,13 @@ FORCE_INLINE void MurmurHash3_x64_128(const void* key, size_t lenBytes, uint64_t
118
112
  uint64_t k1 = getblock64(blocks, i * 2 + 0);
119
113
  uint64_t k2 = getblock64(blocks, i * 2 + 1);
120
114
 
121
- k1 *= c1; k1 = ROTL64(k1,31); k1 *= c2; out.h1 ^= k1;
122
- out.h1 = ROTL64(out.h1,27);
115
+ k1 *= c1; k1 = MURMUR3_ROTL64(k1,31); k1 *= c2; out.h1 ^= k1;
116
+ out.h1 = MURMUR3_ROTL64(out.h1,27);
123
117
  out.h1 += out.h2;
124
118
  out.h1 = out.h1*5+0x52dce729;
125
119
 
126
- k2 *= c2; k2 = ROTL64(k2,33); k2 *= c1; out.h2 ^= k2;
127
- out.h2 = ROTL64(out.h2,31);
120
+ k2 *= c2; k2 = MURMUR3_ROTL64(k2,33); k2 *= c1; out.h2 ^= k2;
121
+ out.h2 = MURMUR3_ROTL64(out.h2,31);
128
122
  out.h2 += out.h1;
129
123
  out.h2 = out.h2*5+0x38495ab5;
130
124
  }
@@ -144,7 +138,7 @@ FORCE_INLINE void MurmurHash3_x64_128(const void* key, size_t lenBytes, uint64_t
144
138
  case 11: k2 ^= ((uint64_t)tail[10]) << 16; // falls through
145
139
  case 10: k2 ^= ((uint64_t)tail[ 9]) << 8; // falls through
146
140
  case 9: k2 ^= ((uint64_t)tail[ 8]) << 0;
147
- k2 *= c2; k2 = ROTL64(k2,33); k2 *= c1; out.h2 ^= k2;
141
+ k2 *= c2; k2 = MURMUR3_ROTL64(k2,33); k2 *= c1; out.h2 ^= k2;
148
142
  // falls through
149
143
  case 8: k1 ^= ((uint64_t)tail[ 7]) << 56; // falls through
150
144
  case 7: k1 ^= ((uint64_t)tail[ 6]) << 48; // falls through
@@ -154,7 +148,7 @@ FORCE_INLINE void MurmurHash3_x64_128(const void* key, size_t lenBytes, uint64_t
154
148
  case 3: k1 ^= ((uint64_t)tail[ 2]) << 16; // falls through
155
149
  case 2: k1 ^= ((uint64_t)tail[ 1]) << 8; // falls through
156
150
  case 1: k1 ^= ((uint64_t)tail[ 0]) << 0;
157
- k1 *= c1; k1 = ROTL64(k1,31); k1 *= c2; out.h1 ^= k1;
151
+ k1 *= c1; k1 = MURMUR3_ROTL64(k1,31); k1 *= c2; out.h1 ^= k1;
158
152
  };
159
153
 
160
154
  //----------
@@ -175,10 +169,14 @@ FORCE_INLINE void MurmurHash3_x64_128(const void* key, size_t lenBytes, uint64_t
175
169
 
176
170
  //-----------------------------------------------------------------------------
177
171
 
178
- FORCE_INLINE uint16_t compute_seed_hash(uint64_t seed) {
172
+ MURMUR3_FORCE_INLINE uint16_t compute_seed_hash(uint64_t seed) {
179
173
  HashState hashes;
180
174
  MurmurHash3_x64_128(&seed, sizeof(seed), 0, hashes);
181
175
  return static_cast<uint16_t>(hashes.h1 & 0xffff);
182
176
  }
183
177
 
178
+ #undef MURMUR3_FORCE_INLINE
179
+ #undef MURMUR3_ROTL64
180
+ #undef MURMUR3_BIG_CONSTANT
181
+
184
182
  #endif // _MURMURHASH3_H_
@@ -26,6 +26,7 @@
26
26
  #include <iostream>
27
27
  #include <random>
28
28
  #include <chrono>
29
+ #include <thread>
29
30
 
30
31
  namespace datasketches {
31
32
 
@@ -36,15 +37,16 @@ enum resize_factor { X1 = 0, X2, X4, X8 };
36
37
  template<typename A> using AllocChar = typename std::allocator_traits<A>::template rebind_alloc<char>;
37
38
  template<typename A> using string = std::basic_string<char, std::char_traits<char>, AllocChar<A>>;
38
39
 
39
- // random bit
40
- static std::independent_bits_engine<std::mt19937, 1, uint32_t>
41
- random_bit(static_cast<uint32_t>(std::chrono::system_clock::now().time_since_epoch().count()));
40
+ // thread-safe random bit
41
+ static thread_local std::independent_bits_engine<std::mt19937, 1, uint32_t>
42
+ random_bit(static_cast<uint32_t>(std::chrono::system_clock::now().time_since_epoch().count()
43
+ + std::hash<std::thread::id>{}(std::this_thread::get_id())));
42
44
 
43
45
  // common random declarations
44
46
  namespace random_utils {
45
47
  static std::random_device rd; // possibly unsafe in MinGW with GCC < 9.2
46
- static std::mt19937_64 rand(rd());
47
- static std::uniform_real_distribution<> next_double(0.0, 1.0);
48
+ static thread_local std::mt19937_64 rand(rd());
49
+ static thread_local std::uniform_real_distribution<> next_double(0.0, 1.0);
48
50
  }
49
51
 
50
52
 
@@ -77,7 +79,7 @@ static inline void read(std::istream& is, T* ptr, size_t size_bytes) {
77
79
  }
78
80
 
79
81
  template<typename T>
80
- static inline void write(std::ostream& os, T& value) {
82
+ static inline void write(std::ostream& os, T value) {
81
83
  os.write(reinterpret_cast<const char*>(&value), sizeof(T));
82
84
  }
83
85
 
@@ -91,6 +91,17 @@ static inline uint8_t count_leading_zeros_in_u64(uint64_t input) {
91
91
  return 56 + byte_leading_zeros_table[(input ) & FCLZ_MASK_08];
92
92
  }
93
93
 
94
+ static inline uint8_t count_leading_zeros_in_u32(uint32_t input) {
95
+ if (input > FCLZ_MASK_24)
96
+ return byte_leading_zeros_table[(input >> 24) & FCLZ_MASK_08];
97
+ if (input > FCLZ_MASK_16)
98
+ return 8 + byte_leading_zeros_table[(input >> 16) & FCLZ_MASK_08];
99
+ if (input > FCLZ_MASK_08)
100
+ return 16 + byte_leading_zeros_table[(input >> 8) & FCLZ_MASK_08];
101
+ if (true)
102
+ return 24 + byte_leading_zeros_table[(input ) & FCLZ_MASK_08];
103
+ }
104
+
94
105
  static inline uint8_t count_trailing_zeros_in_u32(uint32_t input) {
95
106
  for (int i = 0; i < 4; i++) {
96
107
  const int byte = input & 0xff;
@@ -24,6 +24,7 @@
24
24
  #include <exception>
25
25
  #include <iostream>
26
26
  #include <string>
27
+ #include <cstring>
27
28
 
28
29
  namespace datasketches {
29
30
 
@@ -54,14 +55,14 @@ static inline size_t copy_to_mem(const void* src, void* dst, size_t size) {
54
55
  }
55
56
 
56
57
  template<typename T>
57
- static inline size_t copy_to_mem(const T& item, void* dst) {
58
- memcpy(dst, &item, sizeof(T));
58
+ static inline size_t copy_from_mem(const void* src, T& item) {
59
+ memcpy(&item, src, sizeof(T));
59
60
  return sizeof(T);
60
61
  }
61
62
 
62
63
  template<typename T>
63
- static inline size_t copy_from_mem(const void* src, T& item) {
64
- memcpy(&item, src, sizeof(T));
64
+ static inline size_t copy_to_mem(T item, void* dst) {
65
+ memcpy(dst, &item, sizeof(T));
65
66
  return sizeof(T);
66
67
  }
67
68
 
@@ -74,7 +74,7 @@ target_sources(common_test
74
74
  # now the integration test part
75
75
  add_executable(integration_test)
76
76
 
77
- target_link_libraries(integration_test cpc fi hll kll req sampling theta tuple common_test_lib)
77
+ target_link_libraries(integration_test count cpc density fi hll kll req sampling theta tuple common_test_lib)
78
78
 
79
79
  set_target_properties(integration_test PROPERTIES
80
80
  CXX_STANDARD 11
@@ -19,8 +19,10 @@
19
19
 
20
20
  #include <catch2/catch.hpp>
21
21
 
22
+ #include "count_min.hpp"
22
23
  #include "cpc_sketch.hpp"
23
24
  #include "cpc_union.hpp"
25
+ #include "density_sketch.hpp"
24
26
  #include "frequent_items_sketch.hpp"
25
27
  #include "hll.hpp"
26
28
  #include "kll_sketch.hpp"
@@ -48,9 +50,13 @@ struct subtracting_intersection_policy {
48
50
  using tuple_intersection_float = tuple_intersection<float, subtracting_intersection_policy<float>>;
49
51
 
50
52
  TEST_CASE("integration: declare all sketches", "[integration]") {
53
+ count_min_sketch<double> cm(5, 128);
54
+
51
55
  cpc_sketch cpc(12);
52
56
  cpc_union cpc_u(12);
53
57
 
58
+ density_sketch<double> ds(32, 3);
59
+
54
60
  frequent_items_sketch<std::string> fi(100);
55
61
 
56
62
  hll_sketch hll(13);
@@ -0,0 +1,42 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+
18
+ add_library(count INTERFACE)
19
+
20
+ add_library(${PROJECT_NAME}::COUNT ALIAS count)
21
+
22
+ if (BUILD_TESTS)
23
+ add_subdirectory(test)
24
+ endif()
25
+
26
+ target_include_directories(count
27
+ INTERFACE
28
+ $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>
29
+ $<INSTALL_INTERFACE:$<INSTALL_PREFIX>/include>
30
+ )
31
+
32
+ target_link_libraries(count INTERFACE common)
33
+ target_compile_features(count INTERFACE cxx_std_11)
34
+
35
+ install(TARGETS count
36
+ EXPORT ${PROJECT_NAME}
37
+ )
38
+
39
+ install(FILES
40
+ include/count_min.hpp
41
+ include/count_min_impl.hpp
42
+ DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/DataSketches")