datasketches 0.4.2 → 0.4.3

Sign up to get free protection for your applications and to get access to all the features.
Files changed (52) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +4 -0
  3. data/NOTICE +1 -1
  4. data/README.md +1 -1
  5. data/lib/datasketches/version.rb +1 -1
  6. data/vendor/datasketches-cpp/CMakeLists.txt +1 -0
  7. data/vendor/datasketches-cpp/NOTICE +2 -2
  8. data/vendor/datasketches-cpp/README.md +2 -3
  9. data/vendor/datasketches-cpp/common/CMakeLists.txt +0 -2
  10. data/vendor/datasketches-cpp/common/include/MurmurHash3.h +5 -6
  11. data/vendor/datasketches-cpp/common/include/common_defs.hpp +17 -0
  12. data/vendor/datasketches-cpp/count/CMakeLists.txt +0 -1
  13. data/vendor/datasketches-cpp/cpc/CMakeLists.txt +0 -1
  14. data/vendor/datasketches-cpp/cpc/include/cpc_compressor.hpp +7 -1
  15. data/vendor/datasketches-cpp/cpc/include/cpc_compressor_impl.hpp +10 -0
  16. data/vendor/datasketches-cpp/density/CMakeLists.txt +0 -1
  17. data/vendor/datasketches-cpp/fi/CMakeLists.txt +0 -1
  18. data/vendor/datasketches-cpp/hll/CMakeLists.txt +0 -1
  19. data/vendor/datasketches-cpp/kll/CMakeLists.txt +0 -1
  20. data/vendor/datasketches-cpp/kll/test/kll_sketch_custom_type_test.cpp +6 -5
  21. data/vendor/datasketches-cpp/quantiles/CMakeLists.txt +0 -1
  22. data/vendor/datasketches-cpp/req/CMakeLists.txt +0 -1
  23. data/vendor/datasketches-cpp/sampling/CMakeLists.txt +0 -1
  24. data/vendor/datasketches-cpp/sampling/include/ebpps_sample.hpp +4 -4
  25. data/vendor/datasketches-cpp/sampling/include/ebpps_sample_impl.hpp +13 -16
  26. data/vendor/datasketches-cpp/sampling/include/ebpps_sketch.hpp +3 -1
  27. data/vendor/datasketches-cpp/sampling/include/ebpps_sketch_impl.hpp +10 -11
  28. data/vendor/datasketches-cpp/sampling/test/ebpps_sample_test.cpp +7 -4
  29. data/vendor/datasketches-cpp/tdigest/CMakeLists.txt +41 -0
  30. data/vendor/datasketches-cpp/tdigest/include/tdigest.hpp +254 -0
  31. data/vendor/datasketches-cpp/tdigest/include/tdigest_impl.hpp +595 -0
  32. data/vendor/datasketches-cpp/tdigest/test/CMakeLists.txt +56 -0
  33. data/vendor/datasketches-cpp/tdigest/test/tdigest_custom_allocator_test.cpp +43 -0
  34. data/vendor/datasketches-cpp/tdigest/test/tdigest_deserialize_from_java_test.cpp +54 -0
  35. data/vendor/datasketches-cpp/tdigest/test/tdigest_ref_k100_n10000_double.sk +0 -0
  36. data/vendor/datasketches-cpp/tdigest/test/tdigest_ref_k100_n10000_float.sk +0 -0
  37. data/vendor/datasketches-cpp/tdigest/test/tdigest_serialize_for_java.cpp +67 -0
  38. data/vendor/datasketches-cpp/tdigest/test/tdigest_test.cpp +447 -0
  39. data/vendor/datasketches-cpp/theta/CMakeLists.txt +0 -1
  40. data/vendor/datasketches-cpp/theta/include/theta_helpers.hpp +1 -1
  41. data/vendor/datasketches-cpp/theta/include/theta_sketch.hpp +18 -1
  42. data/vendor/datasketches-cpp/theta/include/theta_sketch_impl.hpp +45 -21
  43. data/vendor/datasketches-cpp/theta/test/bit_packing_test.cpp +9 -8
  44. data/vendor/datasketches-cpp/theta/test/theta_sketch_deserialize_from_java_test.cpp +17 -0
  45. data/vendor/datasketches-cpp/theta/test/theta_sketch_serialize_for_java.cpp +1 -1
  46. data/vendor/datasketches-cpp/theta/test/theta_sketch_test.cpp +73 -2
  47. data/vendor/datasketches-cpp/tuple/CMakeLists.txt +0 -1
  48. data/vendor/datasketches-cpp/tuple/include/tuple_sketch.hpp +28 -1
  49. data/vendor/datasketches-cpp/tuple/include/tuple_sketch_impl.hpp +33 -0
  50. data/vendor/datasketches-cpp/tuple/test/tuple_sketch_test.cpp +61 -0
  51. data/vendor/datasketches-cpp/version.cfg.in +1 -1
  52. metadata +13 -3
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 9131edb6c019db8cd0dbb98aaf1321ba213efe5911ea17dd37f4e2a6cd8e7125
4
- data.tar.gz: dc60612514895814bb7e920d8e66029b10abb820a6a6617e6426cc725482d6e2
3
+ metadata.gz: d80465f08285b46a56497ab21c0b77afa47eb183f49d463548113480a86a0128
4
+ data.tar.gz: 0e88f707d65bb9b40790c6e9a3a378395cddcc5ebd4ff367fc2f31523b6efc77
5
5
  SHA512:
6
- metadata.gz: b030673f22e3c02c7a1805a9c8378e305b4878c18c4c92c3cdccaa54ea2ea9a87871b6a858563a02be510f6371bfe1c34e76386ccad975da05a6bd09071c5ee9
7
- data.tar.gz: 11b1cbd76b5e47547b54ef39c195a2d9caa2a1a2a7ec3b8ed33be87b0e98adfa6cbef1ac8e6f5af6d4b57390731b10949f13620b4f916e15b7c73a449dfdfaa5
6
+ metadata.gz: 7b19e71cfeccb68714641f6a0cf84e24939f658dc52d328bd5cb05af8433e9622cfe4e4b51a1ed90929be273b1024989261cceae0447a7a328734a1e1c239509
7
+ data.tar.gz: ba51c3c7512c91bf77f6a309a28e4b2e7914812faa4bef78362eeecdc06ebcf4faaaf80931f63688acfe3ee5e27ebd1d325c9848ad3f14fc96181ea6cbf1b909
data/CHANGELOG.md CHANGED
@@ -1,3 +1,7 @@
1
+ ## 0.4.3 (2024-08-02)
2
+
3
+ - Updated DataSketches to 5.1.0
4
+
1
5
  ## 0.4.2 (2024-01-13)
2
6
 
3
7
  - Updated DataSketches to 5.0.2
data/NOTICE CHANGED
@@ -1,5 +1,5 @@
1
1
  Apache DataSketches C++ and Python
2
- Copyright 2023 The Apache Software Foundation
2
+ Copyright 2024 The Apache Software Foundation
3
3
 
4
4
  Copyright 2015-2018 Yahoo Inc.
5
5
  Copyright 2019-2020 Verizon Media
data/README.md CHANGED
@@ -2,7 +2,7 @@
2
2
 
3
3
  [DataSketches](https://datasketches.apache.org/) - sketch data structures - for Ruby
4
4
 
5
- [![Build Status](https://github.com/ankane/datasketches-ruby/workflows/build/badge.svg?branch=master)](https://github.com/ankane/datasketches-ruby/actions)
5
+ [![Build Status](https://github.com/ankane/datasketches-ruby/actions/workflows/build.yml/badge.svg)](https://github.com/ankane/datasketches-ruby/actions)
6
6
 
7
7
  ## Installation
8
8
 
@@ -1,3 +1,3 @@
1
1
  module DataSketches
2
- VERSION = "0.4.2"
2
+ VERSION = "0.4.3"
3
3
  end
@@ -118,6 +118,7 @@ add_subdirectory(req)
118
118
  add_subdirectory(quantiles)
119
119
  add_subdirectory(count)
120
120
  add_subdirectory(density)
121
+ add_subdirectory(tdigest)
121
122
 
122
123
  if (WITH_PYTHON)
123
124
  add_subdirectory(python)
@@ -1,5 +1,5 @@
1
- Apache DataSketches C++ and Python
2
- Copyright 2023 The Apache Software Foundation
1
+ Apache DataSketches C++
2
+ Copyright 2024 The Apache Software Foundation
3
3
 
4
4
  Copyright 2015-2018 Yahoo Inc.
5
5
  Copyright 2019-2020 Verizon Media
@@ -3,8 +3,7 @@ This is the core C++ component of the Apache DataSketches library. It contains
3
3
 
4
4
  This component is also a dependency of other components of the library that create adaptors for target systems, such as PostgreSQL.
5
5
 
6
- Note that we have a parallel core component for Java implementations of the same sketch algorithms,
7
- [datasketches-java](https://github.com/apache/datasketches-java).
6
+ Note that we have a parallel core component for [Java]((https://github.com/apache/datasketches-java) and [Python]((https://github.com/apache/datasketches-python) implementations of the same sketch algorithms.
8
7
 
9
8
  Please visit the main [Apache DataSketches website](https://datasketches.apache.org) for more information.
10
9
 
@@ -104,4 +103,4 @@ from GitHub using CMake's `ExternalProject` module. The code would look somethin
104
103
  target_include_directories(my_dependent_target
105
104
  PRIVATE ${datasketches_INSTALL_DIR}/include/DataSketches)
106
105
  add_dependencies(my_dependent_target datasketches)
107
- ```
106
+ ```
@@ -29,8 +29,6 @@ target_include_directories(common
29
29
  $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>
30
30
  )
31
31
 
32
- target_compile_features(common INTERFACE cxx_std_11)
33
-
34
32
  install(TARGETS common EXPORT ${PROJECT_NAME})
35
33
 
36
34
  install(FILES
@@ -71,10 +71,10 @@ typedef struct {
71
71
  // Block read - if your platform needs to do endian-swapping or can only
72
72
  // handle aligned reads, do the conversion here
73
73
 
74
- MURMUR3_FORCE_INLINE uint64_t getblock64 ( const uint64_t * p, size_t i )
74
+ MURMUR3_FORCE_INLINE uint64_t getblock64 ( const uint8_t * p, size_t i )
75
75
  {
76
76
  uint64_t res;
77
- memcpy(&res, p + i, sizeof(res));
77
+ memcpy(&res, p + i * sizeof(uint64_t), sizeof(res));
78
78
  return res;
79
79
  }
80
80
 
@@ -104,13 +104,12 @@ MURMUR3_FORCE_INLINE void MurmurHash3_x64_128(const void* key, size_t lenBytes,
104
104
 
105
105
  // Number of full 128-bit blocks of 16 bytes.
106
106
  // Possible exclusion of a remainder of up to 15 bytes.
107
- const size_t nblocks = lenBytes >> 4; // bytes / 16
107
+ const size_t nblocks = lenBytes >> 4; // bytes / 16
108
108
 
109
109
  // Process the 128-bit blocks (the body) into the hash
110
- const uint64_t* blocks = (const uint64_t*)(data);
111
110
  for (size_t i = 0; i < nblocks; ++i) { // 16 bytes per block
112
- uint64_t k1 = getblock64(blocks, i * 2 + 0);
113
- uint64_t k2 = getblock64(blocks, i * 2 + 1);
111
+ uint64_t k1 = getblock64(data, i * 2 + 0);
112
+ uint64_t k2 = getblock64(data, i * 2 + 1);
114
113
 
115
114
  k1 *= c1; k1 = MURMUR3_ROTL64(k1,31); k1 *= c2; out.h1 ^= k1;
116
115
  out.h1 = MURMUR3_ROTL64(out.h1,27);
@@ -91,6 +91,23 @@ static inline void write(std::ostream& os, const T* ptr, size_t size_bytes) {
91
91
  os.write(reinterpret_cast<const char*>(ptr), size_bytes);
92
92
  }
93
93
 
94
+ template<typename T>
95
+ T byteswap(T value) {
96
+ char* ptr = static_cast<char*>(static_cast<void*>(&value));
97
+ const int len = sizeof(T);
98
+ for (size_t i = 0; i < len / 2; ++i) {
99
+ std::swap(ptr[i], ptr[len - i - 1]);
100
+ }
101
+ return value;
102
+ }
103
+
104
+ template<typename T>
105
+ static inline T read_big_endian(std::istream& is) {
106
+ T value;
107
+ is.read(reinterpret_cast<char*>(&value), sizeof(T));
108
+ return byteswap(value);
109
+ }
110
+
94
111
  // wrapper for iterators to implement operator-> returning temporary value
95
112
  template<typename T>
96
113
  class return_value_holder {
@@ -30,7 +30,6 @@ target_include_directories(count
30
30
  )
31
31
 
32
32
  target_link_libraries(count INTERFACE common)
33
- target_compile_features(count INTERFACE cxx_std_11)
34
33
 
35
34
  install(TARGETS count
36
35
  EXPORT ${PROJECT_NAME}
@@ -30,7 +30,6 @@ target_include_directories(cpc
30
30
  )
31
31
 
32
32
  target_link_libraries(cpc INTERFACE common)
33
- target_compile_features(cpc INTERFACE cxx_std_11)
34
33
 
35
34
  install(TARGETS cpc
36
35
  EXPORT ${PROJECT_NAME}
@@ -44,6 +44,10 @@ template<typename A> class cpc_compressor;
44
44
  template<typename A>
45
45
  inline cpc_compressor<A>& get_compressor();
46
46
 
47
+ // function called atexit to clean up compression tables
48
+ template<typename A>
49
+ void destroy_compressor();
50
+
47
51
  template<typename A>
48
52
  class cpc_compressor {
49
53
  public:
@@ -109,8 +113,10 @@ private:
109
113
  };
110
114
 
111
115
  cpc_compressor();
112
- template<typename T> friend cpc_compressor<T>& get_compressor();
116
+ friend cpc_compressor& get_compressor<A>();
117
+
113
118
  ~cpc_compressor();
119
+ friend void destroy_compressor<A>();
114
120
 
115
121
  void make_decoding_tables(); // call this at startup
116
122
  void free_decoding_tables(); // call this at the end
@@ -22,9 +22,11 @@
22
22
  #ifndef CPC_COMPRESSOR_IMPL_HPP_
23
23
  #define CPC_COMPRESSOR_IMPL_HPP_
24
24
 
25
+ #include <cstdlib>
25
26
  #include <memory>
26
27
  #include <stdexcept>
27
28
 
29
+ #include "common_defs.hpp"
28
30
  #include "compression_data.hpp"
29
31
  #include "cpc_util.hpp"
30
32
  #include "cpc_common.hpp"
@@ -36,9 +38,17 @@ namespace datasketches {
36
38
  template<typename A>
37
39
  cpc_compressor<A>& get_compressor() {
38
40
  static cpc_compressor<A>* instance = new cpc_compressor<A>(); // use new for global initialization
41
+ static int reg_result = std::atexit(destroy_compressor<A>); // just to clean up a little more nicely; don't worry if it fails
42
+ unused(reg_result);
39
43
  return *instance;
40
44
  }
41
45
 
46
+ // register to call compressor destructor at exit
47
+ template<typename A>
48
+ void destroy_compressor() {
49
+ delete std::addressof(get_compressor<A>());
50
+ }
51
+
42
52
  template<typename A>
43
53
  cpc_compressor<A>::cpc_compressor() {
44
54
  make_decoding_tables();
@@ -30,7 +30,6 @@ target_include_directories(density
30
30
  )
31
31
 
32
32
  target_link_libraries(density INTERFACE common)
33
- target_compile_features(density INTERFACE cxx_std_11)
34
33
 
35
34
  install(TARGETS density
36
35
  EXPORT ${PROJECT_NAME}
@@ -30,7 +30,6 @@ target_include_directories(fi
30
30
  )
31
31
 
32
32
  target_link_libraries(fi INTERFACE common)
33
- target_compile_features(fi INTERFACE cxx_std_11)
34
33
 
35
34
  install(TARGETS fi
36
35
  EXPORT ${PROJECT_NAME}
@@ -30,7 +30,6 @@ target_include_directories(hll
30
30
  )
31
31
 
32
32
  target_link_libraries(hll INTERFACE common)
33
- target_compile_features(hll INTERFACE cxx_std_11)
34
33
 
35
34
  install(TARGETS hll
36
35
  EXPORT ${PROJECT_NAME}
@@ -30,7 +30,6 @@ target_include_directories(kll
30
30
  )
31
31
 
32
32
  target_link_libraries(kll INTERFACE common)
33
- target_compile_features(kll INTERFACE cxx_std_11)
34
33
 
35
34
  install(TARGETS kll
36
35
  EXPORT ${PROJECT_NAME}
@@ -31,11 +31,14 @@ using alloc = test_allocator<test_type>;
31
31
 
32
32
  TEST_CASE("kll sketch custom type", "[kll_sketch]") {
33
33
 
34
- // setup section
35
34
  test_allocator_total_bytes = 0;
35
+ test_allocator_net_allocations = 0;
36
36
 
37
37
  SECTION("compact level zero") {
38
38
  kll_test_type_sketch sketch(8, test_type_less(), 0);
39
+ REQUIRE(test_allocator_total_bytes != 0);
40
+ REQUIRE(test_allocator_net_allocations != 0);
41
+
39
42
  REQUIRE_THROWS_AS(sketch.get_quantile(0), std::runtime_error);
40
43
  REQUIRE_THROWS_AS(sketch.get_min_item(), std::runtime_error);
41
44
  REQUIRE_THROWS_AS(sketch.get_max_item(), std::runtime_error);
@@ -146,10 +149,8 @@ TEST_CASE("kll sketch custom type", "[kll_sketch]") {
146
149
  REQUIRE(sketch2.get_n() == 11);
147
150
  }
148
151
 
149
- // cleanup
150
- if (test_allocator_total_bytes != 0) {
151
- REQUIRE(test_allocator_total_bytes == 0);
152
- }
152
+ REQUIRE(test_allocator_total_bytes == 0);
153
+ REQUIRE(test_allocator_net_allocations == 0);
153
154
  }
154
155
 
155
156
  } /* namespace datasketches */
@@ -30,7 +30,6 @@ target_include_directories(quantiles
30
30
  )
31
31
 
32
32
  target_link_libraries(quantiles INTERFACE common)
33
- target_compile_features(quantiles INTERFACE cxx_std_11)
34
33
 
35
34
  install(TARGETS quantiles
36
35
  EXPORT ${PROJECT_NAME}
@@ -30,7 +30,6 @@ target_include_directories(req
30
30
  )
31
31
 
32
32
  target_link_libraries(req INTERFACE common)
33
- target_compile_features(req INTERFACE cxx_std_11)
34
33
 
35
34
  install(TARGETS req
36
35
  EXPORT ${PROJECT_NAME}
@@ -30,7 +30,6 @@ target_include_directories(sampling
30
30
  )
31
31
 
32
32
  target_link_libraries(sampling INTERFACE common)
33
- target_compile_features(sampling INTERFACE cxx_std_11)
34
33
 
35
34
  install(TARGETS sampling
36
35
  EXPORT ${PROJECT_NAME}
@@ -37,14 +37,14 @@ class ebpps_sample {
37
37
  public:
38
38
  explicit ebpps_sample(uint32_t k, const A& allocator = A());
39
39
 
40
- // constructor used to create a sample to merge one itme
41
- template<typename TT>
42
- ebpps_sample(TT&& item, double theta, const A& allocator = A());
43
-
44
40
  // for deserialization
45
41
  class items_deleter;
46
42
  ebpps_sample(std::vector<T, A>&& data, optional<T>&& partial_item, double c, const A& allocator = A());
47
43
 
44
+ // used instead of having a single-item constructor for update/merge calls
45
+ template<typename TT>
46
+ void replace_content(TT&& item, double theta);
47
+
48
48
  void reset();
49
49
  void downsample(double theta);
50
50
 
@@ -41,22 +41,6 @@ ebpps_sample<T,A>::ebpps_sample(uint32_t reserved_size, const A& allocator) :
41
41
  data_.reserve(reserved_size);
42
42
  }
43
43
 
44
- template<typename T, typename A>
45
- template<typename TT>
46
- ebpps_sample<T,A>::ebpps_sample(TT&& item, double theta, const A& allocator) :
47
- allocator_(allocator),
48
- c_(theta),
49
- partial_item_(),
50
- data_(allocator)
51
- {
52
- if (theta == 1.0) {
53
- data_.reserve(1);
54
- data_.emplace_back(std::forward<TT>(item));
55
- } else {
56
- partial_item_.emplace(std::forward<TT>(item));
57
- }
58
- }
59
-
60
44
  template<typename T, typename A>
61
45
  ebpps_sample<T,A>::ebpps_sample(std::vector<T, A>&& data, optional<T>&& partial_item, double c, const A& allocator) :
62
46
  allocator_(allocator),
@@ -65,6 +49,19 @@ ebpps_sample<T,A>::ebpps_sample(std::vector<T, A>&& data, optional<T>&& partial_
65
49
  data_(data, allocator)
66
50
  {}
67
51
 
52
+ template<typename T, typename A>
53
+ template<typename TT>
54
+ void ebpps_sample<T,A>::replace_content(TT&& item, double theta) {
55
+ c_ = theta;
56
+ data_.clear();
57
+ partial_item_.reset();
58
+ if (theta == 1.0) {
59
+ data_.emplace_back(std::forward<TT>(item));
60
+ } else {
61
+ partial_item_.emplace(std::forward<TT>(item));
62
+ }
63
+ }
64
+
68
65
  template<typename T, typename A>
69
66
  auto ebpps_sample<T,A>::get_sample() const -> result_type {
70
67
  double unused;
@@ -43,7 +43,7 @@ namespace ebpps_constants {
43
43
  * From: "Exact PPS Sampling with Bounded Sample Size",
44
44
  * B. Hentschel, P. J. Haas, Y. Tian. Information Processing Letters, 2023.
45
45
  *
46
- * This sketch samples data from a stream of items propotional to the weight of each item.
46
+ * This sketch samples data from a stream of items proportional to the weight of each item.
47
47
  * The sample guarantees the presence of an item in the result is proportional to that item's
48
48
  * portion of the total weight seen by the sketch, and returns a sample no larger than size k.
49
49
  *
@@ -256,6 +256,8 @@ class ebpps_sketch {
256
256
 
257
257
  ebpps_sample<T,A> sample_; // Object holding the current state of the sample
258
258
 
259
+ ebpps_sample<T,A> tmp_; // Temporary sample of size 1 used in updates
260
+
259
261
  // handles merge after ensuring other.cumulative_wt_ <= this->cumulative_wt_
260
262
  // so we can send items in individually
261
263
  template<typename O>
@@ -40,7 +40,8 @@ ebpps_sketch<T, A>::ebpps_sketch(uint32_t k, const A& allocator) :
40
40
  cumulative_wt_(0.0),
41
41
  wt_max_(0.0),
42
42
  rho_(1.0),
43
- sample_(check_k(k), allocator)
43
+ sample_(check_k(k), allocator),
44
+ tmp_(1, allocator)
44
45
  {}
45
46
 
46
47
  template<typename T, typename A>
@@ -53,7 +54,8 @@ ebpps_sketch<T,A>::ebpps_sketch(uint32_t k, uint64_t n, double cumulative_wt,
53
54
  cumulative_wt_(cumulative_wt),
54
55
  wt_max_(wt_max),
55
56
  rho_(rho),
56
- sample_(sample)
57
+ sample_(sample),
58
+ tmp_(1, allocator)
57
59
  {}
58
60
 
59
61
  template<typename T, typename A>
@@ -148,9 +150,8 @@ void ebpps_sketch<T, A>::internal_update(FwdItem&& item, double weight) {
148
150
  if (cumulative_wt_ > 0.0)
149
151
  sample_.downsample(new_rho / rho_);
150
152
 
151
- ebpps_sample<T,A> tmp(conditional_forward<FwdItem>(item), new_rho * weight, allocator_);
152
-
153
- sample_.merge(tmp);
153
+ tmp_.replace_content(conditional_forward<FwdItem>(item), new_rho * weight);
154
+ sample_.merge(tmp_);
154
155
 
155
156
  cumulative_wt_ = new_cum_wt;
156
157
  wt_max_ = new_wt_max;
@@ -240,9 +241,8 @@ void ebpps_sketch<T, A>::internal_merge(O&& sk) {
240
241
  if (cumulative_wt_ > 0.0)
241
242
  sample_.downsample(new_rho / rho_);
242
243
 
243
- ebpps_sample<T,A> tmp(conditional_forward<O>(items[i]), new_rho * avg_wt, allocator_);
244
-
245
- sample_.merge(tmp);
244
+ tmp_.replace_content(conditional_forward<O>(items[i]), new_rho * avg_wt);
245
+ sample_.merge(tmp_);
246
246
 
247
247
  cumulative_wt_ = new_cum_wt;
248
248
  rho_ = new_rho;
@@ -259,9 +259,8 @@ void ebpps_sketch<T, A>::internal_merge(O&& sk) {
259
259
  if (cumulative_wt_ > 0.0)
260
260
  sample_.downsample(new_rho / rho_);
261
261
 
262
- ebpps_sample<T,A> tmp(conditional_forward<O>(other_sample.get_partial_item()), new_rho * other_c_frac * avg_wt, allocator_);
263
-
264
- sample_.merge(tmp);
262
+ tmp_.replace_content(conditional_forward<O>(other_sample.get_partial_item()), new_rho * other_c_frac * avg_wt);
263
+ sample_.merge(tmp_);
265
264
 
266
265
  cumulative_wt_ = new_cum_wt;
267
266
  rho_ = new_rho;
@@ -42,14 +42,15 @@ TEST_CASE("ebpps sample: basic initialization", "[ebpps_sketch]") {
42
42
 
43
43
  TEST_CASE("ebpps sample: pre-initialized", "[ebpps_sketch]") {
44
44
  double theta = 1.0;
45
- ebpps_sample<int> sample = ebpps_sample<int>(-1, theta);
45
+ ebpps_sample<int> sample(1);
46
+ sample.replace_content(-1, theta);
46
47
  REQUIRE(sample.get_c() == theta);
47
48
  REQUIRE(sample.get_num_retained_items() == 1);
48
49
  REQUIRE(sample.get_sample().size() == 1);
49
50
  REQUIRE(sample.has_partial_item() == false);
50
51
 
51
52
  theta = 1e-300;
52
- sample = ebpps_sample<int>(-1, theta);
53
+ sample.replace_content(-1, theta);
53
54
  REQUIRE(sample.get_c() == theta);
54
55
  REQUIRE(sample.get_num_retained_items() == 1);
55
56
  REQUIRE(sample.get_sample().size() == 0); // assuming the random number is > 1e-300
@@ -57,7 +58,8 @@ TEST_CASE("ebpps sample: pre-initialized", "[ebpps_sketch]") {
57
58
  }
58
59
 
59
60
  TEST_CASE("ebpps sample: downsampling", "[ebpps_sketch]") {
60
- ebpps_sample<char> sample = ebpps_sample<char>('a', 1.0);
61
+ ebpps_sample<char> sample(1);
62
+ sample.replace_content('a', 1.0);
61
63
 
62
64
  sample.downsample(2.0); // no-op
63
65
  REQUIRE(sample.get_c() == 1.0);
@@ -121,8 +123,9 @@ TEST_CASE("ebpps sample: merge unit samples", "[ebpps_sketch]") {
121
123
  uint32_t k = 8;
122
124
  ebpps_sample<int> sample = ebpps_sample<int>(k);
123
125
 
126
+ ebpps_sample<int> s(1);
124
127
  for (uint32_t i = 1; i <= k; ++i) {
125
- ebpps_sample<int> s = ebpps_sample<int>(i, 1.0);
128
+ s.replace_content(i, 1.0);
126
129
  sample.merge(s);
127
130
  REQUIRE(sample.get_c() == static_cast<double>(i));
128
131
  REQUIRE(sample.get_num_retained_items() == i);
@@ -0,0 +1,41 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+
18
+ add_library(tdigest INTERFACE)
19
+
20
+ add_library(${PROJECT_NAME}::TDIGEST ALIAS tdigest)
21
+
22
+ if (BUILD_TESTS)
23
+ add_subdirectory(test)
24
+ endif()
25
+
26
+ target_include_directories(tdigest
27
+ INTERFACE
28
+ $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>
29
+ $<INSTALL_INTERFACE:$<INSTALL_PREFIX>/include>
30
+ )
31
+
32
+ target_link_libraries(tdigest INTERFACE common)
33
+
34
+ install(TARGETS tdigest
35
+ EXPORT ${PROJECT_NAME}
36
+ )
37
+
38
+ install(FILES
39
+ include/tdigest.hpp
40
+ include/tdigest_impl.hpp
41
+ DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/DataSketches")