datasketches 0.4.2 → 0.4.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +4 -0
  3. data/NOTICE +1 -1
  4. data/README.md +1 -1
  5. data/lib/datasketches/version.rb +1 -1
  6. data/vendor/datasketches-cpp/CMakeLists.txt +1 -0
  7. data/vendor/datasketches-cpp/NOTICE +2 -2
  8. data/vendor/datasketches-cpp/README.md +2 -3
  9. data/vendor/datasketches-cpp/common/CMakeLists.txt +0 -2
  10. data/vendor/datasketches-cpp/common/include/MurmurHash3.h +5 -6
  11. data/vendor/datasketches-cpp/common/include/common_defs.hpp +17 -0
  12. data/vendor/datasketches-cpp/count/CMakeLists.txt +0 -1
  13. data/vendor/datasketches-cpp/cpc/CMakeLists.txt +0 -1
  14. data/vendor/datasketches-cpp/cpc/include/cpc_compressor.hpp +7 -1
  15. data/vendor/datasketches-cpp/cpc/include/cpc_compressor_impl.hpp +10 -0
  16. data/vendor/datasketches-cpp/density/CMakeLists.txt +0 -1
  17. data/vendor/datasketches-cpp/fi/CMakeLists.txt +0 -1
  18. data/vendor/datasketches-cpp/hll/CMakeLists.txt +0 -1
  19. data/vendor/datasketches-cpp/kll/CMakeLists.txt +0 -1
  20. data/vendor/datasketches-cpp/kll/test/kll_sketch_custom_type_test.cpp +6 -5
  21. data/vendor/datasketches-cpp/quantiles/CMakeLists.txt +0 -1
  22. data/vendor/datasketches-cpp/req/CMakeLists.txt +0 -1
  23. data/vendor/datasketches-cpp/sampling/CMakeLists.txt +0 -1
  24. data/vendor/datasketches-cpp/sampling/include/ebpps_sample.hpp +4 -4
  25. data/vendor/datasketches-cpp/sampling/include/ebpps_sample_impl.hpp +13 -16
  26. data/vendor/datasketches-cpp/sampling/include/ebpps_sketch.hpp +3 -1
  27. data/vendor/datasketches-cpp/sampling/include/ebpps_sketch_impl.hpp +10 -11
  28. data/vendor/datasketches-cpp/sampling/test/ebpps_sample_test.cpp +7 -4
  29. data/vendor/datasketches-cpp/tdigest/CMakeLists.txt +41 -0
  30. data/vendor/datasketches-cpp/tdigest/include/tdigest.hpp +254 -0
  31. data/vendor/datasketches-cpp/tdigest/include/tdigest_impl.hpp +595 -0
  32. data/vendor/datasketches-cpp/tdigest/test/CMakeLists.txt +56 -0
  33. data/vendor/datasketches-cpp/tdigest/test/tdigest_custom_allocator_test.cpp +43 -0
  34. data/vendor/datasketches-cpp/tdigest/test/tdigest_deserialize_from_java_test.cpp +54 -0
  35. data/vendor/datasketches-cpp/tdigest/test/tdigest_ref_k100_n10000_double.sk +0 -0
  36. data/vendor/datasketches-cpp/tdigest/test/tdigest_ref_k100_n10000_float.sk +0 -0
  37. data/vendor/datasketches-cpp/tdigest/test/tdigest_serialize_for_java.cpp +67 -0
  38. data/vendor/datasketches-cpp/tdigest/test/tdigest_test.cpp +447 -0
  39. data/vendor/datasketches-cpp/theta/CMakeLists.txt +0 -1
  40. data/vendor/datasketches-cpp/theta/include/theta_helpers.hpp +1 -1
  41. data/vendor/datasketches-cpp/theta/include/theta_sketch.hpp +18 -1
  42. data/vendor/datasketches-cpp/theta/include/theta_sketch_impl.hpp +45 -21
  43. data/vendor/datasketches-cpp/theta/test/bit_packing_test.cpp +9 -8
  44. data/vendor/datasketches-cpp/theta/test/theta_sketch_deserialize_from_java_test.cpp +17 -0
  45. data/vendor/datasketches-cpp/theta/test/theta_sketch_serialize_for_java.cpp +1 -1
  46. data/vendor/datasketches-cpp/theta/test/theta_sketch_test.cpp +73 -2
  47. data/vendor/datasketches-cpp/tuple/CMakeLists.txt +0 -1
  48. data/vendor/datasketches-cpp/tuple/include/tuple_sketch.hpp +28 -1
  49. data/vendor/datasketches-cpp/tuple/include/tuple_sketch_impl.hpp +33 -0
  50. data/vendor/datasketches-cpp/tuple/test/tuple_sketch_test.cpp +61 -0
  51. data/vendor/datasketches-cpp/version.cfg.in +1 -1
  52. metadata +13 -3
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 9131edb6c019db8cd0dbb98aaf1321ba213efe5911ea17dd37f4e2a6cd8e7125
4
- data.tar.gz: dc60612514895814bb7e920d8e66029b10abb820a6a6617e6426cc725482d6e2
3
+ metadata.gz: d80465f08285b46a56497ab21c0b77afa47eb183f49d463548113480a86a0128
4
+ data.tar.gz: 0e88f707d65bb9b40790c6e9a3a378395cddcc5ebd4ff367fc2f31523b6efc77
5
5
  SHA512:
6
- metadata.gz: b030673f22e3c02c7a1805a9c8378e305b4878c18c4c92c3cdccaa54ea2ea9a87871b6a858563a02be510f6371bfe1c34e76386ccad975da05a6bd09071c5ee9
7
- data.tar.gz: 11b1cbd76b5e47547b54ef39c195a2d9caa2a1a2a7ec3b8ed33be87b0e98adfa6cbef1ac8e6f5af6d4b57390731b10949f13620b4f916e15b7c73a449dfdfaa5
6
+ metadata.gz: 7b19e71cfeccb68714641f6a0cf84e24939f658dc52d328bd5cb05af8433e9622cfe4e4b51a1ed90929be273b1024989261cceae0447a7a328734a1e1c239509
7
+ data.tar.gz: ba51c3c7512c91bf77f6a309a28e4b2e7914812faa4bef78362eeecdc06ebcf4faaaf80931f63688acfe3ee5e27ebd1d325c9848ad3f14fc96181ea6cbf1b909
data/CHANGELOG.md CHANGED
@@ -1,3 +1,7 @@
1
+ ## 0.4.3 (2024-08-02)
2
+
3
+ - Updated DataSketches to 5.1.0
4
+
1
5
  ## 0.4.2 (2024-01-13)
2
6
 
3
7
  - Updated DataSketches to 5.0.2
data/NOTICE CHANGED
@@ -1,5 +1,5 @@
1
1
  Apache DataSketches C++ and Python
2
- Copyright 2023 The Apache Software Foundation
2
+ Copyright 2024 The Apache Software Foundation
3
3
 
4
4
  Copyright 2015-2018 Yahoo Inc.
5
5
  Copyright 2019-2020 Verizon Media
data/README.md CHANGED
@@ -2,7 +2,7 @@
2
2
 
3
3
  [DataSketches](https://datasketches.apache.org/) - sketch data structures - for Ruby
4
4
 
5
- [![Build Status](https://github.com/ankane/datasketches-ruby/workflows/build/badge.svg?branch=master)](https://github.com/ankane/datasketches-ruby/actions)
5
+ [![Build Status](https://github.com/ankane/datasketches-ruby/actions/workflows/build.yml/badge.svg)](https://github.com/ankane/datasketches-ruby/actions)
6
6
 
7
7
  ## Installation
8
8
 
@@ -1,3 +1,3 @@
1
1
  module DataSketches
2
- VERSION = "0.4.2"
2
+ VERSION = "0.4.3"
3
3
  end
@@ -118,6 +118,7 @@ add_subdirectory(req)
118
118
  add_subdirectory(quantiles)
119
119
  add_subdirectory(count)
120
120
  add_subdirectory(density)
121
+ add_subdirectory(tdigest)
121
122
 
122
123
  if (WITH_PYTHON)
123
124
  add_subdirectory(python)
@@ -1,5 +1,5 @@
1
- Apache DataSketches C++ and Python
2
- Copyright 2023 The Apache Software Foundation
1
+ Apache DataSketches C++
2
+ Copyright 2024 The Apache Software Foundation
3
3
 
4
4
  Copyright 2015-2018 Yahoo Inc.
5
5
  Copyright 2019-2020 Verizon Media
@@ -3,8 +3,7 @@ This is the core C++ component of the Apache DataSketches library. It contains
3
3
 
4
4
  This component is also a dependency of other components of the library that create adaptors for target systems, such as PostgreSQL.
5
5
 
6
- Note that we have a parallel core component for Java implementations of the same sketch algorithms,
7
- [datasketches-java](https://github.com/apache/datasketches-java).
6
+ Note that we have a parallel core component for [Java]((https://github.com/apache/datasketches-java) and [Python]((https://github.com/apache/datasketches-python) implementations of the same sketch algorithms.
8
7
 
9
8
  Please visit the main [Apache DataSketches website](https://datasketches.apache.org) for more information.
10
9
 
@@ -104,4 +103,4 @@ from GitHub using CMake's `ExternalProject` module. The code would look somethin
104
103
  target_include_directories(my_dependent_target
105
104
  PRIVATE ${datasketches_INSTALL_DIR}/include/DataSketches)
106
105
  add_dependencies(my_dependent_target datasketches)
107
- ```
106
+ ```
@@ -29,8 +29,6 @@ target_include_directories(common
29
29
  $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>
30
30
  )
31
31
 
32
- target_compile_features(common INTERFACE cxx_std_11)
33
-
34
32
  install(TARGETS common EXPORT ${PROJECT_NAME})
35
33
 
36
34
  install(FILES
@@ -71,10 +71,10 @@ typedef struct {
71
71
  // Block read - if your platform needs to do endian-swapping or can only
72
72
  // handle aligned reads, do the conversion here
73
73
 
74
- MURMUR3_FORCE_INLINE uint64_t getblock64 ( const uint64_t * p, size_t i )
74
+ MURMUR3_FORCE_INLINE uint64_t getblock64 ( const uint8_t * p, size_t i )
75
75
  {
76
76
  uint64_t res;
77
- memcpy(&res, p + i, sizeof(res));
77
+ memcpy(&res, p + i * sizeof(uint64_t), sizeof(res));
78
78
  return res;
79
79
  }
80
80
 
@@ -104,13 +104,12 @@ MURMUR3_FORCE_INLINE void MurmurHash3_x64_128(const void* key, size_t lenBytes,
104
104
 
105
105
  // Number of full 128-bit blocks of 16 bytes.
106
106
  // Possible exclusion of a remainder of up to 15 bytes.
107
- const size_t nblocks = lenBytes >> 4; // bytes / 16
107
+ const size_t nblocks = lenBytes >> 4; // bytes / 16
108
108
 
109
109
  // Process the 128-bit blocks (the body) into the hash
110
- const uint64_t* blocks = (const uint64_t*)(data);
111
110
  for (size_t i = 0; i < nblocks; ++i) { // 16 bytes per block
112
- uint64_t k1 = getblock64(blocks, i * 2 + 0);
113
- uint64_t k2 = getblock64(blocks, i * 2 + 1);
111
+ uint64_t k1 = getblock64(data, i * 2 + 0);
112
+ uint64_t k2 = getblock64(data, i * 2 + 1);
114
113
 
115
114
  k1 *= c1; k1 = MURMUR3_ROTL64(k1,31); k1 *= c2; out.h1 ^= k1;
116
115
  out.h1 = MURMUR3_ROTL64(out.h1,27);
@@ -91,6 +91,23 @@ static inline void write(std::ostream& os, const T* ptr, size_t size_bytes) {
91
91
  os.write(reinterpret_cast<const char*>(ptr), size_bytes);
92
92
  }
93
93
 
94
+ template<typename T>
95
+ T byteswap(T value) {
96
+ char* ptr = static_cast<char*>(static_cast<void*>(&value));
97
+ const int len = sizeof(T);
98
+ for (size_t i = 0; i < len / 2; ++i) {
99
+ std::swap(ptr[i], ptr[len - i - 1]);
100
+ }
101
+ return value;
102
+ }
103
+
104
+ template<typename T>
105
+ static inline T read_big_endian(std::istream& is) {
106
+ T value;
107
+ is.read(reinterpret_cast<char*>(&value), sizeof(T));
108
+ return byteswap(value);
109
+ }
110
+
94
111
  // wrapper for iterators to implement operator-> returning temporary value
95
112
  template<typename T>
96
113
  class return_value_holder {
@@ -30,7 +30,6 @@ target_include_directories(count
30
30
  )
31
31
 
32
32
  target_link_libraries(count INTERFACE common)
33
- target_compile_features(count INTERFACE cxx_std_11)
34
33
 
35
34
  install(TARGETS count
36
35
  EXPORT ${PROJECT_NAME}
@@ -30,7 +30,6 @@ target_include_directories(cpc
30
30
  )
31
31
 
32
32
  target_link_libraries(cpc INTERFACE common)
33
- target_compile_features(cpc INTERFACE cxx_std_11)
34
33
 
35
34
  install(TARGETS cpc
36
35
  EXPORT ${PROJECT_NAME}
@@ -44,6 +44,10 @@ template<typename A> class cpc_compressor;
44
44
  template<typename A>
45
45
  inline cpc_compressor<A>& get_compressor();
46
46
 
47
+ // function called atexit to clean up compression tables
48
+ template<typename A>
49
+ void destroy_compressor();
50
+
47
51
  template<typename A>
48
52
  class cpc_compressor {
49
53
  public:
@@ -109,8 +113,10 @@ private:
109
113
  };
110
114
 
111
115
  cpc_compressor();
112
- template<typename T> friend cpc_compressor<T>& get_compressor();
116
+ friend cpc_compressor& get_compressor<A>();
117
+
113
118
  ~cpc_compressor();
119
+ friend void destroy_compressor<A>();
114
120
 
115
121
  void make_decoding_tables(); // call this at startup
116
122
  void free_decoding_tables(); // call this at the end
@@ -22,9 +22,11 @@
22
22
  #ifndef CPC_COMPRESSOR_IMPL_HPP_
23
23
  #define CPC_COMPRESSOR_IMPL_HPP_
24
24
 
25
+ #include <cstdlib>
25
26
  #include <memory>
26
27
  #include <stdexcept>
27
28
 
29
+ #include "common_defs.hpp"
28
30
  #include "compression_data.hpp"
29
31
  #include "cpc_util.hpp"
30
32
  #include "cpc_common.hpp"
@@ -36,9 +38,17 @@ namespace datasketches {
36
38
  template<typename A>
37
39
  cpc_compressor<A>& get_compressor() {
38
40
  static cpc_compressor<A>* instance = new cpc_compressor<A>(); // use new for global initialization
41
+ static int reg_result = std::atexit(destroy_compressor<A>); // just to clean up a little more nicely; don't worry if it fails
42
+ unused(reg_result);
39
43
  return *instance;
40
44
  }
41
45
 
46
+ // register to call compressor destructor at exit
47
+ template<typename A>
48
+ void destroy_compressor() {
49
+ delete std::addressof(get_compressor<A>());
50
+ }
51
+
42
52
  template<typename A>
43
53
  cpc_compressor<A>::cpc_compressor() {
44
54
  make_decoding_tables();
@@ -30,7 +30,6 @@ target_include_directories(density
30
30
  )
31
31
 
32
32
  target_link_libraries(density INTERFACE common)
33
- target_compile_features(density INTERFACE cxx_std_11)
34
33
 
35
34
  install(TARGETS density
36
35
  EXPORT ${PROJECT_NAME}
@@ -30,7 +30,6 @@ target_include_directories(fi
30
30
  )
31
31
 
32
32
  target_link_libraries(fi INTERFACE common)
33
- target_compile_features(fi INTERFACE cxx_std_11)
34
33
 
35
34
  install(TARGETS fi
36
35
  EXPORT ${PROJECT_NAME}
@@ -30,7 +30,6 @@ target_include_directories(hll
30
30
  )
31
31
 
32
32
  target_link_libraries(hll INTERFACE common)
33
- target_compile_features(hll INTERFACE cxx_std_11)
34
33
 
35
34
  install(TARGETS hll
36
35
  EXPORT ${PROJECT_NAME}
@@ -30,7 +30,6 @@ target_include_directories(kll
30
30
  )
31
31
 
32
32
  target_link_libraries(kll INTERFACE common)
33
- target_compile_features(kll INTERFACE cxx_std_11)
34
33
 
35
34
  install(TARGETS kll
36
35
  EXPORT ${PROJECT_NAME}
@@ -31,11 +31,14 @@ using alloc = test_allocator<test_type>;
31
31
 
32
32
  TEST_CASE("kll sketch custom type", "[kll_sketch]") {
33
33
 
34
- // setup section
35
34
  test_allocator_total_bytes = 0;
35
+ test_allocator_net_allocations = 0;
36
36
 
37
37
  SECTION("compact level zero") {
38
38
  kll_test_type_sketch sketch(8, test_type_less(), 0);
39
+ REQUIRE(test_allocator_total_bytes != 0);
40
+ REQUIRE(test_allocator_net_allocations != 0);
41
+
39
42
  REQUIRE_THROWS_AS(sketch.get_quantile(0), std::runtime_error);
40
43
  REQUIRE_THROWS_AS(sketch.get_min_item(), std::runtime_error);
41
44
  REQUIRE_THROWS_AS(sketch.get_max_item(), std::runtime_error);
@@ -146,10 +149,8 @@ TEST_CASE("kll sketch custom type", "[kll_sketch]") {
146
149
  REQUIRE(sketch2.get_n() == 11);
147
150
  }
148
151
 
149
- // cleanup
150
- if (test_allocator_total_bytes != 0) {
151
- REQUIRE(test_allocator_total_bytes == 0);
152
- }
152
+ REQUIRE(test_allocator_total_bytes == 0);
153
+ REQUIRE(test_allocator_net_allocations == 0);
153
154
  }
154
155
 
155
156
  } /* namespace datasketches */
@@ -30,7 +30,6 @@ target_include_directories(quantiles
30
30
  )
31
31
 
32
32
  target_link_libraries(quantiles INTERFACE common)
33
- target_compile_features(quantiles INTERFACE cxx_std_11)
34
33
 
35
34
  install(TARGETS quantiles
36
35
  EXPORT ${PROJECT_NAME}
@@ -30,7 +30,6 @@ target_include_directories(req
30
30
  )
31
31
 
32
32
  target_link_libraries(req INTERFACE common)
33
- target_compile_features(req INTERFACE cxx_std_11)
34
33
 
35
34
  install(TARGETS req
36
35
  EXPORT ${PROJECT_NAME}
@@ -30,7 +30,6 @@ target_include_directories(sampling
30
30
  )
31
31
 
32
32
  target_link_libraries(sampling INTERFACE common)
33
- target_compile_features(sampling INTERFACE cxx_std_11)
34
33
 
35
34
  install(TARGETS sampling
36
35
  EXPORT ${PROJECT_NAME}
@@ -37,14 +37,14 @@ class ebpps_sample {
37
37
  public:
38
38
  explicit ebpps_sample(uint32_t k, const A& allocator = A());
39
39
 
40
- // constructor used to create a sample to merge one itme
41
- template<typename TT>
42
- ebpps_sample(TT&& item, double theta, const A& allocator = A());
43
-
44
40
  // for deserialization
45
41
  class items_deleter;
46
42
  ebpps_sample(std::vector<T, A>&& data, optional<T>&& partial_item, double c, const A& allocator = A());
47
43
 
44
+ // used instead of having a single-item constructor for update/merge calls
45
+ template<typename TT>
46
+ void replace_content(TT&& item, double theta);
47
+
48
48
  void reset();
49
49
  void downsample(double theta);
50
50
 
@@ -41,22 +41,6 @@ ebpps_sample<T,A>::ebpps_sample(uint32_t reserved_size, const A& allocator) :
41
41
  data_.reserve(reserved_size);
42
42
  }
43
43
 
44
- template<typename T, typename A>
45
- template<typename TT>
46
- ebpps_sample<T,A>::ebpps_sample(TT&& item, double theta, const A& allocator) :
47
- allocator_(allocator),
48
- c_(theta),
49
- partial_item_(),
50
- data_(allocator)
51
- {
52
- if (theta == 1.0) {
53
- data_.reserve(1);
54
- data_.emplace_back(std::forward<TT>(item));
55
- } else {
56
- partial_item_.emplace(std::forward<TT>(item));
57
- }
58
- }
59
-
60
44
  template<typename T, typename A>
61
45
  ebpps_sample<T,A>::ebpps_sample(std::vector<T, A>&& data, optional<T>&& partial_item, double c, const A& allocator) :
62
46
  allocator_(allocator),
@@ -65,6 +49,19 @@ ebpps_sample<T,A>::ebpps_sample(std::vector<T, A>&& data, optional<T>&& partial_
65
49
  data_(data, allocator)
66
50
  {}
67
51
 
52
+ template<typename T, typename A>
53
+ template<typename TT>
54
+ void ebpps_sample<T,A>::replace_content(TT&& item, double theta) {
55
+ c_ = theta;
56
+ data_.clear();
57
+ partial_item_.reset();
58
+ if (theta == 1.0) {
59
+ data_.emplace_back(std::forward<TT>(item));
60
+ } else {
61
+ partial_item_.emplace(std::forward<TT>(item));
62
+ }
63
+ }
64
+
68
65
  template<typename T, typename A>
69
66
  auto ebpps_sample<T,A>::get_sample() const -> result_type {
70
67
  double unused;
@@ -43,7 +43,7 @@ namespace ebpps_constants {
43
43
  * From: "Exact PPS Sampling with Bounded Sample Size",
44
44
  * B. Hentschel, P. J. Haas, Y. Tian. Information Processing Letters, 2023.
45
45
  *
46
- * This sketch samples data from a stream of items propotional to the weight of each item.
46
+ * This sketch samples data from a stream of items proportional to the weight of each item.
47
47
  * The sample guarantees the presence of an item in the result is proportional to that item's
48
48
  * portion of the total weight seen by the sketch, and returns a sample no larger than size k.
49
49
  *
@@ -256,6 +256,8 @@ class ebpps_sketch {
256
256
 
257
257
  ebpps_sample<T,A> sample_; // Object holding the current state of the sample
258
258
 
259
+ ebpps_sample<T,A> tmp_; // Temporary sample of size 1 used in updates
260
+
259
261
  // handles merge after ensuring other.cumulative_wt_ <= this->cumulative_wt_
260
262
  // so we can send items in individually
261
263
  template<typename O>
@@ -40,7 +40,8 @@ ebpps_sketch<T, A>::ebpps_sketch(uint32_t k, const A& allocator) :
40
40
  cumulative_wt_(0.0),
41
41
  wt_max_(0.0),
42
42
  rho_(1.0),
43
- sample_(check_k(k), allocator)
43
+ sample_(check_k(k), allocator),
44
+ tmp_(1, allocator)
44
45
  {}
45
46
 
46
47
  template<typename T, typename A>
@@ -53,7 +54,8 @@ ebpps_sketch<T,A>::ebpps_sketch(uint32_t k, uint64_t n, double cumulative_wt,
53
54
  cumulative_wt_(cumulative_wt),
54
55
  wt_max_(wt_max),
55
56
  rho_(rho),
56
- sample_(sample)
57
+ sample_(sample),
58
+ tmp_(1, allocator)
57
59
  {}
58
60
 
59
61
  template<typename T, typename A>
@@ -148,9 +150,8 @@ void ebpps_sketch<T, A>::internal_update(FwdItem&& item, double weight) {
148
150
  if (cumulative_wt_ > 0.0)
149
151
  sample_.downsample(new_rho / rho_);
150
152
 
151
- ebpps_sample<T,A> tmp(conditional_forward<FwdItem>(item), new_rho * weight, allocator_);
152
-
153
- sample_.merge(tmp);
153
+ tmp_.replace_content(conditional_forward<FwdItem>(item), new_rho * weight);
154
+ sample_.merge(tmp_);
154
155
 
155
156
  cumulative_wt_ = new_cum_wt;
156
157
  wt_max_ = new_wt_max;
@@ -240,9 +241,8 @@ void ebpps_sketch<T, A>::internal_merge(O&& sk) {
240
241
  if (cumulative_wt_ > 0.0)
241
242
  sample_.downsample(new_rho / rho_);
242
243
 
243
- ebpps_sample<T,A> tmp(conditional_forward<O>(items[i]), new_rho * avg_wt, allocator_);
244
-
245
- sample_.merge(tmp);
244
+ tmp_.replace_content(conditional_forward<O>(items[i]), new_rho * avg_wt);
245
+ sample_.merge(tmp_);
246
246
 
247
247
  cumulative_wt_ = new_cum_wt;
248
248
  rho_ = new_rho;
@@ -259,9 +259,8 @@ void ebpps_sketch<T, A>::internal_merge(O&& sk) {
259
259
  if (cumulative_wt_ > 0.0)
260
260
  sample_.downsample(new_rho / rho_);
261
261
 
262
- ebpps_sample<T,A> tmp(conditional_forward<O>(other_sample.get_partial_item()), new_rho * other_c_frac * avg_wt, allocator_);
263
-
264
- sample_.merge(tmp);
262
+ tmp_.replace_content(conditional_forward<O>(other_sample.get_partial_item()), new_rho * other_c_frac * avg_wt);
263
+ sample_.merge(tmp_);
265
264
 
266
265
  cumulative_wt_ = new_cum_wt;
267
266
  rho_ = new_rho;
@@ -42,14 +42,15 @@ TEST_CASE("ebpps sample: basic initialization", "[ebpps_sketch]") {
42
42
 
43
43
  TEST_CASE("ebpps sample: pre-initialized", "[ebpps_sketch]") {
44
44
  double theta = 1.0;
45
- ebpps_sample<int> sample = ebpps_sample<int>(-1, theta);
45
+ ebpps_sample<int> sample(1);
46
+ sample.replace_content(-1, theta);
46
47
  REQUIRE(sample.get_c() == theta);
47
48
  REQUIRE(sample.get_num_retained_items() == 1);
48
49
  REQUIRE(sample.get_sample().size() == 1);
49
50
  REQUIRE(sample.has_partial_item() == false);
50
51
 
51
52
  theta = 1e-300;
52
- sample = ebpps_sample<int>(-1, theta);
53
+ sample.replace_content(-1, theta);
53
54
  REQUIRE(sample.get_c() == theta);
54
55
  REQUIRE(sample.get_num_retained_items() == 1);
55
56
  REQUIRE(sample.get_sample().size() == 0); // assuming the random number is > 1e-300
@@ -57,7 +58,8 @@ TEST_CASE("ebpps sample: pre-initialized", "[ebpps_sketch]") {
57
58
  }
58
59
 
59
60
  TEST_CASE("ebpps sample: downsampling", "[ebpps_sketch]") {
60
- ebpps_sample<char> sample = ebpps_sample<char>('a', 1.0);
61
+ ebpps_sample<char> sample(1);
62
+ sample.replace_content('a', 1.0);
61
63
 
62
64
  sample.downsample(2.0); // no-op
63
65
  REQUIRE(sample.get_c() == 1.0);
@@ -121,8 +123,9 @@ TEST_CASE("ebpps sample: merge unit samples", "[ebpps_sketch]") {
121
123
  uint32_t k = 8;
122
124
  ebpps_sample<int> sample = ebpps_sample<int>(k);
123
125
 
126
+ ebpps_sample<int> s(1);
124
127
  for (uint32_t i = 1; i <= k; ++i) {
125
- ebpps_sample<int> s = ebpps_sample<int>(i, 1.0);
128
+ s.replace_content(i, 1.0);
126
129
  sample.merge(s);
127
130
  REQUIRE(sample.get_c() == static_cast<double>(i));
128
131
  REQUIRE(sample.get_num_retained_items() == i);
@@ -0,0 +1,41 @@
1
+ # Licensed to the Apache Software Foundation (ASF) under one
2
+ # or more contributor license agreements. See the NOTICE file
3
+ # distributed with this work for additional information
4
+ # regarding copyright ownership. The ASF licenses this file
5
+ # to you under the Apache License, Version 2.0 (the
6
+ # "License"); you may not use this file except in compliance
7
+ # with the License. You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing,
12
+ # software distributed under the License is distributed on an
13
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
+ # KIND, either express or implied. See the License for the
15
+ # specific language governing permissions and limitations
16
+ # under the License.
17
+
18
+ add_library(tdigest INTERFACE)
19
+
20
+ add_library(${PROJECT_NAME}::TDIGEST ALIAS tdigest)
21
+
22
+ if (BUILD_TESTS)
23
+ add_subdirectory(test)
24
+ endif()
25
+
26
+ target_include_directories(tdigest
27
+ INTERFACE
28
+ $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>
29
+ $<INSTALL_INTERFACE:$<INSTALL_PREFIX>/include>
30
+ )
31
+
32
+ target_link_libraries(tdigest INTERFACE common)
33
+
34
+ install(TARGETS tdigest
35
+ EXPORT ${PROJECT_NAME}
36
+ )
37
+
38
+ install(FILES
39
+ include/tdigest.hpp
40
+ include/tdigest_impl.hpp
41
+ DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/DataSketches")