datasketches 0.3.1 → 0.3.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (113) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +4 -0
  3. data/ext/datasketches/cpc_wrapper.cpp +1 -1
  4. data/lib/datasketches/version.rb +1 -1
  5. data/vendor/datasketches-cpp/CMakeLists.txt +22 -20
  6. data/vendor/datasketches-cpp/NOTICE +1 -1
  7. data/vendor/datasketches-cpp/common/include/MurmurHash3.h +25 -27
  8. data/vendor/datasketches-cpp/common/include/common_defs.hpp +8 -6
  9. data/vendor/datasketches-cpp/common/include/count_zeros.hpp +11 -0
  10. data/vendor/datasketches-cpp/common/include/memory_operations.hpp +5 -4
  11. data/vendor/datasketches-cpp/common/test/CMakeLists.txt +1 -1
  12. data/vendor/datasketches-cpp/common/test/integration_test.cpp +6 -0
  13. data/vendor/datasketches-cpp/count/CMakeLists.txt +42 -0
  14. data/vendor/datasketches-cpp/count/include/count_min.hpp +351 -0
  15. data/vendor/datasketches-cpp/count/include/count_min_impl.hpp +517 -0
  16. data/vendor/datasketches-cpp/count/test/CMakeLists.txt +43 -0
  17. data/vendor/datasketches-cpp/count/test/count_min_allocation_test.cpp +155 -0
  18. data/vendor/datasketches-cpp/count/test/count_min_test.cpp +306 -0
  19. data/vendor/datasketches-cpp/cpc/include/cpc_confidence.hpp +3 -3
  20. data/vendor/datasketches-cpp/cpc/include/cpc_sketch_impl.hpp +1 -1
  21. data/vendor/datasketches-cpp/cpc/include/cpc_util.hpp +16 -8
  22. data/vendor/datasketches-cpp/density/CMakeLists.txt +42 -0
  23. data/vendor/datasketches-cpp/density/include/density_sketch.hpp +236 -0
  24. data/vendor/datasketches-cpp/density/include/density_sketch_impl.hpp +543 -0
  25. data/vendor/datasketches-cpp/density/test/CMakeLists.txt +35 -0
  26. data/vendor/datasketches-cpp/density/test/density_sketch_test.cpp +244 -0
  27. data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map.hpp +9 -3
  28. data/vendor/datasketches-cpp/hll/include/Hll4Array-internal.hpp +19 -11
  29. data/vendor/datasketches-cpp/hll/include/Hll4Array.hpp +2 -5
  30. data/vendor/datasketches-cpp/hll/include/Hll6Array-internal.hpp +19 -7
  31. data/vendor/datasketches-cpp/hll/include/Hll6Array.hpp +1 -1
  32. data/vendor/datasketches-cpp/hll/include/Hll8Array-internal.hpp +98 -42
  33. data/vendor/datasketches-cpp/hll/include/Hll8Array.hpp +2 -0
  34. data/vendor/datasketches-cpp/hll/include/HllArray-internal.hpp +92 -59
  35. data/vendor/datasketches-cpp/hll/include/HllArray.hpp +16 -6
  36. data/vendor/datasketches-cpp/hll/include/HllSketchImplFactory.hpp +3 -21
  37. data/vendor/datasketches-cpp/hll/include/HllUnion-internal.hpp +8 -0
  38. data/vendor/datasketches-cpp/hll/include/HllUtil.hpp +14 -6
  39. data/vendor/datasketches-cpp/hll/include/coupon_iterator-internal.hpp +1 -1
  40. data/vendor/datasketches-cpp/hll/include/coupon_iterator.hpp +8 -2
  41. data/vendor/datasketches-cpp/hll/include/hll.hpp +9 -8
  42. data/vendor/datasketches-cpp/hll/test/HllUnionTest.cpp +7 -1
  43. data/vendor/datasketches-cpp/kll/include/kll_helper.hpp +0 -1
  44. data/vendor/datasketches-cpp/kll/include/kll_sketch.hpp +8 -3
  45. data/vendor/datasketches-cpp/kll/include/kll_sketch_impl.hpp +2 -2
  46. data/vendor/datasketches-cpp/kll/test/kll_sketch_test.cpp +2 -2
  47. data/vendor/datasketches-cpp/python/CMakeLists.txt +6 -0
  48. data/vendor/datasketches-cpp/python/README.md +5 -5
  49. data/vendor/datasketches-cpp/python/datasketches/DensityWrapper.py +87 -0
  50. data/vendor/datasketches-cpp/python/datasketches/KernelFunction.py +35 -0
  51. data/vendor/datasketches-cpp/python/datasketches/PySerDe.py +15 -9
  52. data/vendor/datasketches-cpp/python/datasketches/TuplePolicy.py +77 -0
  53. data/vendor/datasketches-cpp/python/datasketches/TupleWrapper.py +205 -0
  54. data/vendor/datasketches-cpp/python/datasketches/__init__.py +17 -1
  55. data/vendor/datasketches-cpp/python/include/kernel_function.hpp +98 -0
  56. data/vendor/datasketches-cpp/python/include/py_object_lt.hpp +37 -0
  57. data/vendor/datasketches-cpp/python/include/py_object_ostream.hpp +48 -0
  58. data/vendor/datasketches-cpp/python/include/quantile_conditional.hpp +104 -0
  59. data/vendor/datasketches-cpp/python/include/tuple_policy.hpp +136 -0
  60. data/vendor/datasketches-cpp/python/src/count_wrapper.cpp +101 -0
  61. data/vendor/datasketches-cpp/python/src/cpc_wrapper.cpp +16 -30
  62. data/vendor/datasketches-cpp/python/src/datasketches.cpp +6 -0
  63. data/vendor/datasketches-cpp/python/src/density_wrapper.cpp +95 -0
  64. data/vendor/datasketches-cpp/python/src/fi_wrapper.cpp +127 -73
  65. data/vendor/datasketches-cpp/python/src/hll_wrapper.cpp +28 -36
  66. data/vendor/datasketches-cpp/python/src/kll_wrapper.cpp +108 -160
  67. data/vendor/datasketches-cpp/python/src/py_serde.cpp +5 -4
  68. data/vendor/datasketches-cpp/python/src/quantiles_wrapper.cpp +99 -148
  69. data/vendor/datasketches-cpp/python/src/req_wrapper.cpp +117 -178
  70. data/vendor/datasketches-cpp/python/src/theta_wrapper.cpp +67 -73
  71. data/vendor/datasketches-cpp/python/src/tuple_wrapper.cpp +215 -0
  72. data/vendor/datasketches-cpp/python/src/vo_wrapper.cpp +1 -1
  73. data/vendor/datasketches-cpp/python/tests/count_min_test.py +86 -0
  74. data/vendor/datasketches-cpp/python/tests/cpc_test.py +10 -10
  75. data/vendor/datasketches-cpp/python/tests/density_test.py +93 -0
  76. data/vendor/datasketches-cpp/python/tests/fi_test.py +41 -2
  77. data/vendor/datasketches-cpp/python/tests/hll_test.py +19 -20
  78. data/vendor/datasketches-cpp/python/tests/kll_test.py +40 -6
  79. data/vendor/datasketches-cpp/python/tests/quantiles_test.py +39 -5
  80. data/vendor/datasketches-cpp/python/tests/req_test.py +38 -5
  81. data/vendor/datasketches-cpp/python/tests/theta_test.py +16 -14
  82. data/vendor/datasketches-cpp/python/tests/tuple_test.py +206 -0
  83. data/vendor/datasketches-cpp/python/tests/vo_test.py +7 -0
  84. data/vendor/datasketches-cpp/quantiles/include/quantiles_sketch.hpp +8 -3
  85. data/vendor/datasketches-cpp/quantiles/include/quantiles_sketch_impl.hpp +4 -4
  86. data/vendor/datasketches-cpp/quantiles/test/quantiles_sketch_test.cpp +1 -1
  87. data/vendor/datasketches-cpp/req/include/req_compactor_impl.hpp +0 -2
  88. data/vendor/datasketches-cpp/req/include/req_sketch.hpp +8 -3
  89. data/vendor/datasketches-cpp/req/include/req_sketch_impl.hpp +2 -2
  90. data/vendor/datasketches-cpp/sampling/include/var_opt_sketch.hpp +20 -6
  91. data/vendor/datasketches-cpp/sampling/include/var_opt_sketch_impl.hpp +30 -16
  92. data/vendor/datasketches-cpp/sampling/include/var_opt_union.hpp +5 -1
  93. data/vendor/datasketches-cpp/sampling/include/var_opt_union_impl.hpp +19 -15
  94. data/vendor/datasketches-cpp/sampling/test/var_opt_sketch_test.cpp +33 -14
  95. data/vendor/datasketches-cpp/sampling/test/var_opt_union_test.cpp +0 -2
  96. data/vendor/datasketches-cpp/setup.py +1 -1
  97. data/vendor/datasketches-cpp/theta/CMakeLists.txt +1 -0
  98. data/vendor/datasketches-cpp/theta/include/bit_packing.hpp +6279 -0
  99. data/vendor/datasketches-cpp/theta/include/compact_theta_sketch_parser.hpp +14 -8
  100. data/vendor/datasketches-cpp/theta/include/compact_theta_sketch_parser_impl.hpp +60 -46
  101. data/vendor/datasketches-cpp/theta/include/theta_helpers.hpp +4 -2
  102. data/vendor/datasketches-cpp/theta/include/theta_sketch.hpp +58 -10
  103. data/vendor/datasketches-cpp/theta/include/theta_sketch_impl.hpp +430 -130
  104. data/vendor/datasketches-cpp/theta/include/theta_union_base_impl.hpp +9 -9
  105. data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base.hpp +16 -4
  106. data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base_impl.hpp +2 -2
  107. data/vendor/datasketches-cpp/theta/test/CMakeLists.txt +1 -0
  108. data/vendor/datasketches-cpp/theta/test/bit_packing_test.cpp +80 -0
  109. data/vendor/datasketches-cpp/theta/test/theta_sketch_test.cpp +42 -3
  110. data/vendor/datasketches-cpp/theta/test/theta_union_test.cpp +25 -0
  111. data/vendor/datasketches-cpp/tuple/include/tuple_sketch_impl.hpp +2 -1
  112. data/vendor/datasketches-cpp/version.cfg.in +1 -1
  113. metadata +31 -3
@@ -41,7 +41,7 @@ void theta_union_base<EN, EK, P, S, CS, A>::update(SS&& sketch) {
41
41
  if (sketch.is_empty()) return;
42
42
  if (sketch.get_seed_hash() != compute_seed_hash(table_.seed_)) throw std::invalid_argument("seed hash mismatch");
43
43
  table_.is_empty_ = false;
44
- if (sketch.get_theta64() < union_theta_) union_theta_ = sketch.get_theta64();
44
+ union_theta_ = std::min(union_theta_, sketch.get_theta64());
45
45
  for (auto& entry: sketch) {
46
46
  const uint64_t hash = EK()(entry);
47
47
  if (hash < union_theta_ && hash < table_.theta_) {
@@ -55,7 +55,7 @@ void theta_union_base<EN, EK, P, S, CS, A>::update(SS&& sketch) {
55
55
  if (sketch.is_ordered()) break; // early stop
56
56
  }
57
57
  }
58
- if (table_.theta_ < union_theta_) union_theta_ = table_.theta_;
58
+ union_theta_ = std::min(union_theta_, table_.theta_);
59
59
  }
60
60
 
61
61
  template<typename EN, typename EK, typename P, typename S, typename CS, typename A>
@@ -65,16 +65,16 @@ CS theta_union_base<EN, EK, P, S, CS, A>::get_result(bool ordered) const {
65
65
  entries.reserve(table_.num_entries_);
66
66
  uint64_t theta = std::min(union_theta_, table_.theta_);
67
67
  const uint32_t nominal_num = 1 << table_.lg_nom_size_;
68
- if (union_theta_ >= theta && table_.num_entries_ <= nominal_num) {
68
+ if (union_theta_ >= table_.theta_) {
69
69
  std::copy_if(table_.begin(), table_.end(), std::back_inserter(entries), key_not_zero<EN, EK>());
70
70
  } else {
71
71
  std::copy_if(table_.begin(), table_.end(), std::back_inserter(entries), key_not_zero_less_than<uint64_t, EN, EK>(theta));
72
- if (entries.size() > nominal_num) {
73
- std::nth_element(entries.begin(), entries.begin() + nominal_num, entries.end(), comparator());
74
- theta = EK()(entries[nominal_num]);
75
- entries.erase(entries.begin() + nominal_num, entries.end());
76
- entries.shrink_to_fit();
77
- }
72
+ }
73
+ if (entries.size() > nominal_num) {
74
+ std::nth_element(entries.begin(), entries.begin() + nominal_num, entries.end(), comparator());
75
+ theta = EK()(entries[nominal_num]);
76
+ entries.erase(entries.begin() + nominal_num, entries.end());
77
+ entries.shrink_to_fit();
78
78
  }
79
79
  if (ordered) std::sort(entries.begin(), entries.end(), comparator());
80
80
  return CS(table_.is_empty_, ordered, compute_seed_hash(table_.seed_), theta, std::move(entries));
@@ -23,8 +23,8 @@
23
23
  #include <vector>
24
24
  #include <climits>
25
25
  #include <cmath>
26
+ #include <iterator>
26
27
 
27
- #include "common_defs.hpp"
28
28
  #include "MurmurHash3.h"
29
29
  #include "theta_comparators.hpp"
30
30
  #include "theta_constants.hpp"
@@ -185,8 +185,14 @@ static inline uint64_t compute_hash(const void* data, size_t length, uint64_t se
185
185
  // iterators
186
186
 
187
187
  template<typename Entry, typename ExtractKey>
188
- class theta_iterator: public std::iterator<std::input_iterator_tag, Entry> {
188
+ class theta_iterator {
189
189
  public:
190
+ using iterator_category = std::input_iterator_tag;
191
+ using value_type = Entry;
192
+ using difference_type = std::ptrdiff_t;
193
+ using pointer = Entry*;
194
+ using reference = Entry&;
195
+
190
196
  theta_iterator(Entry* entries, uint32_t size, uint32_t index);
191
197
  theta_iterator& operator++();
192
198
  theta_iterator operator++(int);
@@ -201,14 +207,20 @@ private:
201
207
  };
202
208
 
203
209
  template<typename Entry, typename ExtractKey>
204
- class theta_const_iterator: public std::iterator<std::input_iterator_tag, Entry> {
210
+ class theta_const_iterator {
205
211
  public:
212
+ using iterator_category = std::input_iterator_tag;
213
+ using value_type = const Entry;
214
+ using difference_type = std::ptrdiff_t;
215
+ using pointer = const Entry*;
216
+ using reference = const Entry&;
217
+
206
218
  theta_const_iterator(const Entry* entries, uint32_t size, uint32_t index);
207
219
  theta_const_iterator& operator++();
208
220
  theta_const_iterator operator++(int);
209
221
  bool operator==(const theta_const_iterator& other) const;
210
222
  bool operator!=(const theta_const_iterator& other) const;
211
- const Entry& operator*() const;
223
+ reference operator*() const;
212
224
 
213
225
  private:
214
226
  const Entry* entries_;
@@ -188,7 +188,7 @@ auto theta_update_sketch_base<EN, EK, A>::begin() const -> iterator {
188
188
 
189
189
  template<typename EN, typename EK, typename A>
190
190
  auto theta_update_sketch_base<EN, EK, A>::end() const -> iterator {
191
- return &entries_[1ULL << lg_cur_size_];
191
+ return entries_ + (1ULL << lg_cur_size_);
192
192
  }
193
193
 
194
194
  template<typename EN, typename EK, typename A>
@@ -382,7 +382,7 @@ bool theta_iterator<Entry, ExtractKey>::operator==(const theta_iterator& other)
382
382
  }
383
383
 
384
384
  template<typename Entry, typename ExtractKey>
385
- auto theta_iterator<Entry, ExtractKey>::operator*() const -> Entry& {
385
+ auto theta_iterator<Entry, ExtractKey>::operator*() const -> reference {
386
386
  return entries_[index_];
387
387
  }
388
388
 
@@ -44,4 +44,5 @@ target_sources(theta_test
44
44
  theta_a_not_b_test.cpp
45
45
  theta_jaccard_similarity_test.cpp
46
46
  theta_setop_test.cpp
47
+ bit_packing_test.cpp
47
48
  )
@@ -0,0 +1,80 @@
1
+ /*
2
+ * Licensed to the Apache Software Foundation (ASF) under one
3
+ * or more contributor license agreements. See the NOTICE file
4
+ * distributed with this work for additional information
5
+ * regarding copyright ownership. The ASF licenses this file
6
+ * to you under the Apache License, Version 2.0 (the
7
+ * "License"); you may not use this file except in compliance
8
+ * with the License. You may obtain a copy of the License at
9
+ *
10
+ * http://www.apache.org/licenses/LICENSE-2.0
11
+ *
12
+ * Unless required by applicable law or agreed to in writing,
13
+ * software distributed under the License is distributed on an
14
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15
+ * KIND, either express or implied. See the License for the
16
+ * specific language governing permissions and limitations
17
+ * under the License.
18
+ */
19
+
20
+ #include <catch2/catch.hpp>
21
+ #include <bit_packing.hpp>
22
+
23
+ namespace datasketches {
24
+
25
+ // for every number of bits from 1 to 63
26
+ // generate pseudo-random data, pack, unpack and compare
27
+
28
+ // inverse golden ratio (0.618.. of max uint64_t)
29
+ static const uint64_t IGOLDEN64 = 0x9e3779b97f4a7c13ULL;
30
+
31
+ TEST_CASE("pack unpack bits") {
32
+ for (uint8_t bits = 1; bits <= 63; ++bits) {
33
+ const uint64_t mask = (1ULL << bits) - 1;
34
+ std::vector<uint64_t> input(8, 0);
35
+ const uint64_t igolden64 = IGOLDEN64;
36
+ uint64_t value = 0xaa55aa55aa55aa55ULL; // arbitrary starting value
37
+ for (int i = 0; i < 8; ++i) {
38
+ input[i] = value & mask;
39
+ value += igolden64;
40
+ }
41
+ std::vector<uint8_t> bytes(8 * sizeof(uint64_t), 0);
42
+ uint8_t offset = 0;
43
+ uint8_t* ptr = bytes.data();
44
+ for (int i = 0; i < 8; ++i) {
45
+ offset = pack_bits(input[i], bits, ptr, offset);
46
+ }
47
+
48
+ std::vector<uint64_t> output(8, 0);
49
+ offset = 0;
50
+ const uint8_t* cptr = bytes.data();
51
+ for (int i = 0; i < 8; ++i) {
52
+ offset = unpack_bits(output[i], bits, cptr, offset);
53
+ }
54
+ for (int i = 0; i < 8; ++i) {
55
+ REQUIRE((input[i] & mask) == output[i]);
56
+ }
57
+ }
58
+ }
59
+
60
+ TEST_CASE("pack unpack blocks") {
61
+ for (uint8_t bits = 1; bits <= 63; ++bits) {
62
+ const uint64_t mask = (1ULL << bits) - 1;
63
+ std::vector<uint64_t> input(8, 0);
64
+ const uint64_t igolden64 = IGOLDEN64;
65
+ uint64_t value = 0xaa55aa55aa55aa55ULL; // arbitrary starting value
66
+ for (int i = 0; i < 8; ++i) {
67
+ input[i] = value & mask;
68
+ value += igolden64;
69
+ }
70
+ std::vector<uint8_t> bytes(8 * sizeof(uint64_t), 0);
71
+ pack_bits_block8(input.data(), bytes.data(), bits);
72
+ std::vector<uint64_t> output(8, 0);
73
+ unpack_bits_block8(output.data(), bytes.data(), bits);
74
+ for (int i = 0; i < 8; ++i) {
75
+ REQUIRE((input[i] & mask) == output[i]);
76
+ }
77
+ }
78
+ }
79
+
80
+ } /* namespace datasketches */
@@ -607,7 +607,7 @@ TEST_CASE("theta sketch: wrap compact estimation from java", "[theta_sketch]") {
607
607
  compact_theta_sketch compact_sketch = update_sketch.compact();
608
608
  // the sketches are ordered, so the iteration sequence must match exactly
609
609
  auto iter = sketch.begin();
610
- for (const auto& key: compact_sketch) {
610
+ for (const auto key: compact_sketch) {
611
611
  REQUIRE(*iter == key);
612
612
  ++iter;
613
613
  }
@@ -652,7 +652,7 @@ TEST_CASE("theta sketch: wrap compact v1 estimation from java", "[theta_sketch]"
652
652
  compact_theta_sketch compact_sketch = update_sketch.compact();
653
653
  // the sketches are ordered, so the iteration sequence must match exactly
654
654
  auto iter = sketch.begin();
655
- for (const auto& key: compact_sketch) {
655
+ for (const auto key: compact_sketch) {
656
656
  REQUIRE(*iter == key);
657
657
  ++iter;
658
658
  }
@@ -697,7 +697,46 @@ TEST_CASE("theta sketch: wrap compact v2 estimation from java", "[theta_sketch]"
697
697
  compact_theta_sketch compact_sketch = update_sketch.compact();
698
698
  // the sketches are ordered, so the iteration sequence must match exactly
699
699
  auto iter = sketch.begin();
700
- for (const auto& key: compact_sketch) {
700
+ for (const auto key: compact_sketch) {
701
+ REQUIRE(*iter == key);
702
+ ++iter;
703
+ }
704
+ }
705
+
706
+ TEST_CASE("theta sketch: serialize deserialize compressed", "[theta_sketch]") {
707
+ auto update_sketch = update_theta_sketch::builder().build();
708
+ for (int i = 0; i < 10000; i++) update_sketch.update(i);
709
+ auto compact_sketch = update_sketch.compact();
710
+
711
+ auto bytes = compact_sketch.serialize_compressed();
712
+ { // deserialize bytes
713
+ auto deserialized_sketch = compact_theta_sketch::deserialize(bytes.data(), bytes.size());
714
+ REQUIRE(deserialized_sketch.get_num_retained() == compact_sketch.get_num_retained());
715
+ REQUIRE(deserialized_sketch.get_theta() == compact_sketch.get_theta());
716
+ auto iter = deserialized_sketch.begin();
717
+ for (const auto key: compact_sketch) {
718
+ REQUIRE(*iter == key);
719
+ ++iter;
720
+ }
721
+ }
722
+ { // wrap bytes
723
+ auto wrapped_sketch = wrapped_compact_theta_sketch::wrap(bytes.data(), bytes.size());
724
+ REQUIRE(wrapped_sketch.get_num_retained() == compact_sketch.get_num_retained());
725
+ REQUIRE(wrapped_sketch.get_theta() == compact_sketch.get_theta());
726
+ auto iter = wrapped_sketch.begin();
727
+ for (const auto key: compact_sketch) {
728
+ REQUIRE(*iter == key);
729
+ ++iter;
730
+ }
731
+ }
732
+
733
+ std::stringstream s(std::ios::in | std::ios::out | std::ios::binary);
734
+ compact_sketch.serialize_compressed(s);
735
+ auto deserialized_sketch = compact_theta_sketch::deserialize(s);
736
+ REQUIRE(deserialized_sketch.get_num_retained() == compact_sketch.get_num_retained());
737
+ REQUIRE(deserialized_sketch.get_theta() == compact_sketch.get_theta());
738
+ auto iter = deserialized_sketch.begin();
739
+ for (const auto key: compact_sketch) {
701
740
  REQUIRE(*iter == key);
702
741
  ++iter;
703
742
  }
@@ -128,4 +128,29 @@ TEST_CASE("theta union: seed mismatch", "[theta_union]") {
128
128
  REQUIRE_THROWS_AS(u.update(sketch), std::invalid_argument);
129
129
  }
130
130
 
131
+ TEST_CASE("theta union: larger K", "[theta_union]") {
132
+ auto update_sketch1 = datasketches::update_theta_sketch::builder().set_lg_k(14).build();
133
+ for(int i = 0; i < 16384; ++i) update_sketch1.update(i);
134
+
135
+ auto update_sketch2 = datasketches::update_theta_sketch::builder().set_lg_k(14).build();
136
+ for(int i = 0; i < 26384; ++i) update_sketch2.update(i);
137
+
138
+ auto update_sketch3 = datasketches::update_theta_sketch::builder().set_lg_k(14).build();
139
+ for(int i = 0; i < 86384; ++i) update_sketch3.update(i);
140
+
141
+ auto union1 = datasketches::theta_union::builder().set_lg_k(16).build();
142
+ union1.update(update_sketch2);
143
+ union1.update(update_sketch1);
144
+ union1.update(update_sketch3);
145
+ auto result1 = union1.get_result();
146
+ REQUIRE(result1.get_estimate() == update_sketch3.get_estimate());
147
+
148
+ auto union2 = datasketches::theta_union::builder().set_lg_k(16).build();
149
+ union2.update(update_sketch1);
150
+ union2.update(update_sketch3);
151
+ union2.update(update_sketch2);
152
+ auto result2 = union2.get_result();
153
+ REQUIRE(result2.get_estimate() == update_sketch3.get_estimate());
154
+ }
155
+
131
156
  } /* namespace datasketches */
@@ -32,7 +32,8 @@ bool tuple_sketch<S, A>::is_estimation_mode() const {
32
32
 
33
33
  template<typename S, typename A>
34
34
  double tuple_sketch<S, A>::get_theta() const {
35
- return static_cast<double>(get_theta64()) / theta_constants::MAX_THETA;
35
+ return static_cast<double>(get_theta64()) /
36
+ static_cast<double>(theta_constants::MAX_THETA);
36
37
  }
37
38
 
38
39
  template<typename S, typename A>
@@ -1 +1 @@
1
- 4.0.1
1
+ 4.1.0
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: datasketches
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.1
4
+ version: 0.3.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andrew Kane
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2023-01-31 00:00:00.000000000 Z
11
+ date: 2023-05-03 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rice
@@ -76,6 +76,12 @@ files:
76
76
  - vendor/datasketches-cpp/common/test/test_allocator.cpp
77
77
  - vendor/datasketches-cpp/common/test/test_allocator.hpp
78
78
  - vendor/datasketches-cpp/common/test/test_type.hpp
79
+ - vendor/datasketches-cpp/count/CMakeLists.txt
80
+ - vendor/datasketches-cpp/count/include/count_min.hpp
81
+ - vendor/datasketches-cpp/count/include/count_min_impl.hpp
82
+ - vendor/datasketches-cpp/count/test/CMakeLists.txt
83
+ - vendor/datasketches-cpp/count/test/count_min_allocation_test.cpp
84
+ - vendor/datasketches-cpp/count/test/count_min_test.cpp
79
85
  - vendor/datasketches-cpp/cpc/CMakeLists.txt
80
86
  - vendor/datasketches-cpp/cpc/include/compression_data.hpp
81
87
  - vendor/datasketches-cpp/cpc/include/cpc_common.hpp
@@ -96,6 +102,11 @@ files:
96
102
  - vendor/datasketches-cpp/cpc/test/cpc_sketch_allocation_test.cpp
97
103
  - vendor/datasketches-cpp/cpc/test/cpc_sketch_test.cpp
98
104
  - vendor/datasketches-cpp/cpc/test/cpc_union_test.cpp
105
+ - vendor/datasketches-cpp/density/CMakeLists.txt
106
+ - vendor/datasketches-cpp/density/include/density_sketch.hpp
107
+ - vendor/datasketches-cpp/density/include/density_sketch_impl.hpp
108
+ - vendor/datasketches-cpp/density/test/CMakeLists.txt
109
+ - vendor/datasketches-cpp/density/test/density_sketch_test.cpp
99
110
  - vendor/datasketches-cpp/fi/CMakeLists.txt
100
111
  - vendor/datasketches-cpp/fi/include/frequent_items_sketch.hpp
101
112
  - vendor/datasketches-cpp/fi/include/frequent_items_sketch_impl.hpp
@@ -173,9 +184,18 @@ files:
173
184
  - vendor/datasketches-cpp/pyproject.toml
174
185
  - vendor/datasketches-cpp/python/CMakeLists.txt
175
186
  - vendor/datasketches-cpp/python/README.md
187
+ - vendor/datasketches-cpp/python/datasketches/DensityWrapper.py
188
+ - vendor/datasketches-cpp/python/datasketches/KernelFunction.py
176
189
  - vendor/datasketches-cpp/python/datasketches/PySerDe.py
190
+ - vendor/datasketches-cpp/python/datasketches/TuplePolicy.py
191
+ - vendor/datasketches-cpp/python/datasketches/TupleWrapper.py
177
192
  - vendor/datasketches-cpp/python/datasketches/__init__.py
193
+ - vendor/datasketches-cpp/python/include/kernel_function.hpp
194
+ - vendor/datasketches-cpp/python/include/py_object_lt.hpp
195
+ - vendor/datasketches-cpp/python/include/py_object_ostream.hpp
178
196
  - vendor/datasketches-cpp/python/include/py_serde.hpp
197
+ - vendor/datasketches-cpp/python/include/quantile_conditional.hpp
198
+ - vendor/datasketches-cpp/python/include/tuple_policy.hpp
179
199
  - vendor/datasketches-cpp/python/jupyter/CPCSketch.ipynb
180
200
  - vendor/datasketches-cpp/python/jupyter/FrequentItemsSketch.ipynb
181
201
  - vendor/datasketches-cpp/python/jupyter/HLLSketch.ipynb
@@ -183,8 +203,10 @@ files:
183
203
  - vendor/datasketches-cpp/python/jupyter/ThetaSketchNotebook.ipynb
184
204
  - vendor/datasketches-cpp/python/pybind11Path.cmd
185
205
  - vendor/datasketches-cpp/python/src/__init__.py
206
+ - vendor/datasketches-cpp/python/src/count_wrapper.cpp
186
207
  - vendor/datasketches-cpp/python/src/cpc_wrapper.cpp
187
208
  - vendor/datasketches-cpp/python/src/datasketches.cpp
209
+ - vendor/datasketches-cpp/python/src/density_wrapper.cpp
188
210
  - vendor/datasketches-cpp/python/src/fi_wrapper.cpp
189
211
  - vendor/datasketches-cpp/python/src/hll_wrapper.cpp
190
212
  - vendor/datasketches-cpp/python/src/kll_wrapper.cpp
@@ -193,16 +215,20 @@ files:
193
215
  - vendor/datasketches-cpp/python/src/quantiles_wrapper.cpp
194
216
  - vendor/datasketches-cpp/python/src/req_wrapper.cpp
195
217
  - vendor/datasketches-cpp/python/src/theta_wrapper.cpp
218
+ - vendor/datasketches-cpp/python/src/tuple_wrapper.cpp
196
219
  - vendor/datasketches-cpp/python/src/vector_of_kll.cpp
197
220
  - vendor/datasketches-cpp/python/src/vo_wrapper.cpp
198
221
  - vendor/datasketches-cpp/python/tests/__init__.py
222
+ - vendor/datasketches-cpp/python/tests/count_min_test.py
199
223
  - vendor/datasketches-cpp/python/tests/cpc_test.py
224
+ - vendor/datasketches-cpp/python/tests/density_test.py
200
225
  - vendor/datasketches-cpp/python/tests/fi_test.py
201
226
  - vendor/datasketches-cpp/python/tests/hll_test.py
202
227
  - vendor/datasketches-cpp/python/tests/kll_test.py
203
228
  - vendor/datasketches-cpp/python/tests/quantiles_test.py
204
229
  - vendor/datasketches-cpp/python/tests/req_test.py
205
230
  - vendor/datasketches-cpp/python/tests/theta_test.py
231
+ - vendor/datasketches-cpp/python/tests/tuple_test.py
206
232
  - vendor/datasketches-cpp/python/tests/vector_of_kll_test.py
207
233
  - vendor/datasketches-cpp/python/tests/vo_test.py
208
234
  - vendor/datasketches-cpp/quantiles/CMakeLists.txt
@@ -249,6 +275,7 @@ files:
249
275
  - vendor/datasketches-cpp/sampling/test/varopt_union_double_sampling.sk
250
276
  - vendor/datasketches-cpp/setup.py
251
277
  - vendor/datasketches-cpp/theta/CMakeLists.txt
278
+ - vendor/datasketches-cpp/theta/include/bit_packing.hpp
252
279
  - vendor/datasketches-cpp/theta/include/bounds_on_ratios_in_sampled_sets.hpp
253
280
  - vendor/datasketches-cpp/theta/include/bounds_on_ratios_in_theta_sketched_sets.hpp
254
281
  - vendor/datasketches-cpp/theta/include/compact_theta_sketch_parser.hpp
@@ -275,6 +302,7 @@ files:
275
302
  - vendor/datasketches-cpp/theta/include/theta_update_sketch_base.hpp
276
303
  - vendor/datasketches-cpp/theta/include/theta_update_sketch_base_impl.hpp
277
304
  - vendor/datasketches-cpp/theta/test/CMakeLists.txt
305
+ - vendor/datasketches-cpp/theta/test/bit_packing_test.cpp
278
306
  - vendor/datasketches-cpp/theta/test/theta_a_not_b_test.cpp
279
307
  - vendor/datasketches-cpp/theta/test/theta_compact_empty_from_java.sk
280
308
  - vendor/datasketches-cpp/theta/test/theta_compact_empty_from_java_v1.sk
@@ -342,7 +370,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
342
370
  - !ruby/object:Gem::Version
343
371
  version: '0'
344
372
  requirements: []
345
- rubygems_version: 3.4.1
373
+ rubygems_version: 3.4.10
346
374
  signing_key:
347
375
  specification_version: 4
348
376
  summary: Sketch data structures for Ruby