datasketches 0.3.0 → 0.3.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (114) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +8 -0
  3. data/ext/datasketches/cpc_wrapper.cpp +1 -1
  4. data/lib/datasketches/version.rb +1 -1
  5. data/vendor/datasketches-cpp/CMakeLists.txt +22 -20
  6. data/vendor/datasketches-cpp/common/include/MurmurHash3.h +25 -27
  7. data/vendor/datasketches-cpp/common/include/common_defs.hpp +8 -6
  8. data/vendor/datasketches-cpp/common/include/count_zeros.hpp +11 -0
  9. data/vendor/datasketches-cpp/common/include/memory_operations.hpp +5 -4
  10. data/vendor/datasketches-cpp/common/test/CMakeLists.txt +1 -1
  11. data/vendor/datasketches-cpp/common/test/integration_test.cpp +6 -0
  12. data/vendor/datasketches-cpp/count/CMakeLists.txt +42 -0
  13. data/vendor/datasketches-cpp/count/include/count_min.hpp +351 -0
  14. data/vendor/datasketches-cpp/count/include/count_min_impl.hpp +517 -0
  15. data/vendor/datasketches-cpp/count/test/CMakeLists.txt +43 -0
  16. data/vendor/datasketches-cpp/count/test/count_min_allocation_test.cpp +155 -0
  17. data/vendor/datasketches-cpp/count/test/count_min_test.cpp +306 -0
  18. data/vendor/datasketches-cpp/cpc/include/cpc_compressor_impl.hpp +1 -1
  19. data/vendor/datasketches-cpp/cpc/include/cpc_confidence.hpp +3 -3
  20. data/vendor/datasketches-cpp/cpc/include/cpc_sketch_impl.hpp +1 -1
  21. data/vendor/datasketches-cpp/cpc/include/cpc_util.hpp +16 -8
  22. data/vendor/datasketches-cpp/density/CMakeLists.txt +42 -0
  23. data/vendor/datasketches-cpp/density/include/density_sketch.hpp +236 -0
  24. data/vendor/datasketches-cpp/density/include/density_sketch_impl.hpp +543 -0
  25. data/vendor/datasketches-cpp/density/test/CMakeLists.txt +35 -0
  26. data/vendor/datasketches-cpp/density/test/density_sketch_test.cpp +244 -0
  27. data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map.hpp +9 -3
  28. data/vendor/datasketches-cpp/hll/include/Hll4Array-internal.hpp +19 -11
  29. data/vendor/datasketches-cpp/hll/include/Hll4Array.hpp +2 -5
  30. data/vendor/datasketches-cpp/hll/include/Hll6Array-internal.hpp +19 -7
  31. data/vendor/datasketches-cpp/hll/include/Hll6Array.hpp +1 -1
  32. data/vendor/datasketches-cpp/hll/include/Hll8Array-internal.hpp +98 -42
  33. data/vendor/datasketches-cpp/hll/include/Hll8Array.hpp +2 -0
  34. data/vendor/datasketches-cpp/hll/include/HllArray-internal.hpp +92 -59
  35. data/vendor/datasketches-cpp/hll/include/HllArray.hpp +16 -6
  36. data/vendor/datasketches-cpp/hll/include/HllSketchImplFactory.hpp +3 -21
  37. data/vendor/datasketches-cpp/hll/include/HllUnion-internal.hpp +8 -0
  38. data/vendor/datasketches-cpp/hll/include/HllUtil.hpp +14 -6
  39. data/vendor/datasketches-cpp/hll/include/coupon_iterator-internal.hpp +1 -1
  40. data/vendor/datasketches-cpp/hll/include/coupon_iterator.hpp +8 -2
  41. data/vendor/datasketches-cpp/hll/include/hll.hpp +9 -8
  42. data/vendor/datasketches-cpp/hll/test/HllUnionTest.cpp +7 -1
  43. data/vendor/datasketches-cpp/kll/include/kll_helper.hpp +0 -1
  44. data/vendor/datasketches-cpp/kll/include/kll_sketch.hpp +8 -3
  45. data/vendor/datasketches-cpp/kll/include/kll_sketch_impl.hpp +2 -2
  46. data/vendor/datasketches-cpp/kll/test/kll_sketch_test.cpp +2 -2
  47. data/vendor/datasketches-cpp/python/CMakeLists.txt +6 -0
  48. data/vendor/datasketches-cpp/python/README.md +5 -5
  49. data/vendor/datasketches-cpp/python/datasketches/DensityWrapper.py +87 -0
  50. data/vendor/datasketches-cpp/python/datasketches/KernelFunction.py +35 -0
  51. data/vendor/datasketches-cpp/python/datasketches/PySerDe.py +15 -9
  52. data/vendor/datasketches-cpp/python/datasketches/TuplePolicy.py +77 -0
  53. data/vendor/datasketches-cpp/python/datasketches/TupleWrapper.py +205 -0
  54. data/vendor/datasketches-cpp/python/datasketches/__init__.py +17 -1
  55. data/vendor/datasketches-cpp/python/include/kernel_function.hpp +98 -0
  56. data/vendor/datasketches-cpp/python/include/py_object_lt.hpp +37 -0
  57. data/vendor/datasketches-cpp/python/include/py_object_ostream.hpp +48 -0
  58. data/vendor/datasketches-cpp/python/include/quantile_conditional.hpp +104 -0
  59. data/vendor/datasketches-cpp/python/include/tuple_policy.hpp +136 -0
  60. data/vendor/datasketches-cpp/python/src/count_wrapper.cpp +101 -0
  61. data/vendor/datasketches-cpp/python/src/cpc_wrapper.cpp +16 -30
  62. data/vendor/datasketches-cpp/python/src/datasketches.cpp +6 -0
  63. data/vendor/datasketches-cpp/python/src/density_wrapper.cpp +95 -0
  64. data/vendor/datasketches-cpp/python/src/fi_wrapper.cpp +127 -73
  65. data/vendor/datasketches-cpp/python/src/hll_wrapper.cpp +28 -36
  66. data/vendor/datasketches-cpp/python/src/kll_wrapper.cpp +108 -160
  67. data/vendor/datasketches-cpp/python/src/py_serde.cpp +5 -4
  68. data/vendor/datasketches-cpp/python/src/quantiles_wrapper.cpp +99 -148
  69. data/vendor/datasketches-cpp/python/src/req_wrapper.cpp +117 -178
  70. data/vendor/datasketches-cpp/python/src/theta_wrapper.cpp +67 -73
  71. data/vendor/datasketches-cpp/python/src/tuple_wrapper.cpp +215 -0
  72. data/vendor/datasketches-cpp/python/src/vector_of_kll.cpp +2 -2
  73. data/vendor/datasketches-cpp/python/src/vo_wrapper.cpp +1 -1
  74. data/vendor/datasketches-cpp/python/tests/count_min_test.py +86 -0
  75. data/vendor/datasketches-cpp/python/tests/cpc_test.py +10 -10
  76. data/vendor/datasketches-cpp/python/tests/density_test.py +93 -0
  77. data/vendor/datasketches-cpp/python/tests/fi_test.py +41 -2
  78. data/vendor/datasketches-cpp/python/tests/hll_test.py +19 -20
  79. data/vendor/datasketches-cpp/python/tests/kll_test.py +40 -6
  80. data/vendor/datasketches-cpp/python/tests/quantiles_test.py +39 -5
  81. data/vendor/datasketches-cpp/python/tests/req_test.py +38 -5
  82. data/vendor/datasketches-cpp/python/tests/theta_test.py +16 -14
  83. data/vendor/datasketches-cpp/python/tests/tuple_test.py +206 -0
  84. data/vendor/datasketches-cpp/python/tests/vo_test.py +7 -0
  85. data/vendor/datasketches-cpp/quantiles/include/quantiles_sketch.hpp +8 -3
  86. data/vendor/datasketches-cpp/quantiles/include/quantiles_sketch_impl.hpp +4 -4
  87. data/vendor/datasketches-cpp/quantiles/test/quantiles_sketch_test.cpp +1 -1
  88. data/vendor/datasketches-cpp/req/include/req_compactor_impl.hpp +0 -2
  89. data/vendor/datasketches-cpp/req/include/req_sketch.hpp +8 -3
  90. data/vendor/datasketches-cpp/req/include/req_sketch_impl.hpp +2 -2
  91. data/vendor/datasketches-cpp/sampling/include/var_opt_sketch.hpp +20 -6
  92. data/vendor/datasketches-cpp/sampling/include/var_opt_sketch_impl.hpp +30 -16
  93. data/vendor/datasketches-cpp/sampling/include/var_opt_union.hpp +5 -1
  94. data/vendor/datasketches-cpp/sampling/include/var_opt_union_impl.hpp +19 -15
  95. data/vendor/datasketches-cpp/sampling/test/var_opt_sketch_test.cpp +33 -14
  96. data/vendor/datasketches-cpp/sampling/test/var_opt_union_test.cpp +0 -2
  97. data/vendor/datasketches-cpp/setup.py +1 -1
  98. data/vendor/datasketches-cpp/theta/CMakeLists.txt +1 -0
  99. data/vendor/datasketches-cpp/theta/include/bit_packing.hpp +6279 -0
  100. data/vendor/datasketches-cpp/theta/include/compact_theta_sketch_parser.hpp +14 -8
  101. data/vendor/datasketches-cpp/theta/include/compact_theta_sketch_parser_impl.hpp +60 -46
  102. data/vendor/datasketches-cpp/theta/include/theta_helpers.hpp +4 -2
  103. data/vendor/datasketches-cpp/theta/include/theta_sketch.hpp +58 -10
  104. data/vendor/datasketches-cpp/theta/include/theta_sketch_impl.hpp +430 -130
  105. data/vendor/datasketches-cpp/theta/include/theta_union_base_impl.hpp +9 -9
  106. data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base.hpp +16 -4
  107. data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base_impl.hpp +2 -2
  108. data/vendor/datasketches-cpp/theta/test/CMakeLists.txt +1 -0
  109. data/vendor/datasketches-cpp/theta/test/bit_packing_test.cpp +80 -0
  110. data/vendor/datasketches-cpp/theta/test/theta_sketch_test.cpp +42 -3
  111. data/vendor/datasketches-cpp/theta/test/theta_union_test.cpp +25 -0
  112. data/vendor/datasketches-cpp/tuple/include/tuple_sketch_impl.hpp +2 -1
  113. data/vendor/datasketches-cpp/version.cfg.in +1 -1
  114. metadata +31 -3
@@ -41,7 +41,7 @@ void theta_union_base<EN, EK, P, S, CS, A>::update(SS&& sketch) {
41
41
  if (sketch.is_empty()) return;
42
42
  if (sketch.get_seed_hash() != compute_seed_hash(table_.seed_)) throw std::invalid_argument("seed hash mismatch");
43
43
  table_.is_empty_ = false;
44
- if (sketch.get_theta64() < union_theta_) union_theta_ = sketch.get_theta64();
44
+ union_theta_ = std::min(union_theta_, sketch.get_theta64());
45
45
  for (auto& entry: sketch) {
46
46
  const uint64_t hash = EK()(entry);
47
47
  if (hash < union_theta_ && hash < table_.theta_) {
@@ -55,7 +55,7 @@ void theta_union_base<EN, EK, P, S, CS, A>::update(SS&& sketch) {
55
55
  if (sketch.is_ordered()) break; // early stop
56
56
  }
57
57
  }
58
- if (table_.theta_ < union_theta_) union_theta_ = table_.theta_;
58
+ union_theta_ = std::min(union_theta_, table_.theta_);
59
59
  }
60
60
 
61
61
  template<typename EN, typename EK, typename P, typename S, typename CS, typename A>
@@ -65,16 +65,16 @@ CS theta_union_base<EN, EK, P, S, CS, A>::get_result(bool ordered) const {
65
65
  entries.reserve(table_.num_entries_);
66
66
  uint64_t theta = std::min(union_theta_, table_.theta_);
67
67
  const uint32_t nominal_num = 1 << table_.lg_nom_size_;
68
- if (union_theta_ >= theta && table_.num_entries_ <= nominal_num) {
68
+ if (union_theta_ >= table_.theta_) {
69
69
  std::copy_if(table_.begin(), table_.end(), std::back_inserter(entries), key_not_zero<EN, EK>());
70
70
  } else {
71
71
  std::copy_if(table_.begin(), table_.end(), std::back_inserter(entries), key_not_zero_less_than<uint64_t, EN, EK>(theta));
72
- if (entries.size() > nominal_num) {
73
- std::nth_element(entries.begin(), entries.begin() + nominal_num, entries.end(), comparator());
74
- theta = EK()(entries[nominal_num]);
75
- entries.erase(entries.begin() + nominal_num, entries.end());
76
- entries.shrink_to_fit();
77
- }
72
+ }
73
+ if (entries.size() > nominal_num) {
74
+ std::nth_element(entries.begin(), entries.begin() + nominal_num, entries.end(), comparator());
75
+ theta = EK()(entries[nominal_num]);
76
+ entries.erase(entries.begin() + nominal_num, entries.end());
77
+ entries.shrink_to_fit();
78
78
  }
79
79
  if (ordered) std::sort(entries.begin(), entries.end(), comparator());
80
80
  return CS(table_.is_empty_, ordered, compute_seed_hash(table_.seed_), theta, std::move(entries));
@@ -23,8 +23,8 @@
23
23
  #include <vector>
24
24
  #include <climits>
25
25
  #include <cmath>
26
+ #include <iterator>
26
27
 
27
- #include "common_defs.hpp"
28
28
  #include "MurmurHash3.h"
29
29
  #include "theta_comparators.hpp"
30
30
  #include "theta_constants.hpp"
@@ -185,8 +185,14 @@ static inline uint64_t compute_hash(const void* data, size_t length, uint64_t se
185
185
  // iterators
186
186
 
187
187
  template<typename Entry, typename ExtractKey>
188
- class theta_iterator: public std::iterator<std::input_iterator_tag, Entry> {
188
+ class theta_iterator {
189
189
  public:
190
+ using iterator_category = std::input_iterator_tag;
191
+ using value_type = Entry;
192
+ using difference_type = std::ptrdiff_t;
193
+ using pointer = Entry*;
194
+ using reference = Entry&;
195
+
190
196
  theta_iterator(Entry* entries, uint32_t size, uint32_t index);
191
197
  theta_iterator& operator++();
192
198
  theta_iterator operator++(int);
@@ -201,14 +207,20 @@ private:
201
207
  };
202
208
 
203
209
  template<typename Entry, typename ExtractKey>
204
- class theta_const_iterator: public std::iterator<std::input_iterator_tag, Entry> {
210
+ class theta_const_iterator {
205
211
  public:
212
+ using iterator_category = std::input_iterator_tag;
213
+ using value_type = const Entry;
214
+ using difference_type = std::ptrdiff_t;
215
+ using pointer = const Entry*;
216
+ using reference = const Entry&;
217
+
206
218
  theta_const_iterator(const Entry* entries, uint32_t size, uint32_t index);
207
219
  theta_const_iterator& operator++();
208
220
  theta_const_iterator operator++(int);
209
221
  bool operator==(const theta_const_iterator& other) const;
210
222
  bool operator!=(const theta_const_iterator& other) const;
211
- const Entry& operator*() const;
223
+ reference operator*() const;
212
224
 
213
225
  private:
214
226
  const Entry* entries_;
@@ -188,7 +188,7 @@ auto theta_update_sketch_base<EN, EK, A>::begin() const -> iterator {
188
188
 
189
189
  template<typename EN, typename EK, typename A>
190
190
  auto theta_update_sketch_base<EN, EK, A>::end() const -> iterator {
191
- return &entries_[1ULL << lg_cur_size_];
191
+ return entries_ + (1ULL << lg_cur_size_);
192
192
  }
193
193
 
194
194
  template<typename EN, typename EK, typename A>
@@ -382,7 +382,7 @@ bool theta_iterator<Entry, ExtractKey>::operator==(const theta_iterator& other)
382
382
  }
383
383
 
384
384
  template<typename Entry, typename ExtractKey>
385
- auto theta_iterator<Entry, ExtractKey>::operator*() const -> Entry& {
385
+ auto theta_iterator<Entry, ExtractKey>::operator*() const -> reference {
386
386
  return entries_[index_];
387
387
  }
388
388
 
@@ -44,4 +44,5 @@ target_sources(theta_test
44
44
  theta_a_not_b_test.cpp
45
45
  theta_jaccard_similarity_test.cpp
46
46
  theta_setop_test.cpp
47
+ bit_packing_test.cpp
47
48
  )
@@ -0,0 +1,80 @@
1
+ /*
2
+ * Licensed to the Apache Software Foundation (ASF) under one
3
+ * or more contributor license agreements. See the NOTICE file
4
+ * distributed with this work for additional information
5
+ * regarding copyright ownership. The ASF licenses this file
6
+ * to you under the Apache License, Version 2.0 (the
7
+ * "License"); you may not use this file except in compliance
8
+ * with the License. You may obtain a copy of the License at
9
+ *
10
+ * http://www.apache.org/licenses/LICENSE-2.0
11
+ *
12
+ * Unless required by applicable law or agreed to in writing,
13
+ * software distributed under the License is distributed on an
14
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15
+ * KIND, either express or implied. See the License for the
16
+ * specific language governing permissions and limitations
17
+ * under the License.
18
+ */
19
+
20
+ #include <catch2/catch.hpp>
21
+ #include <bit_packing.hpp>
22
+
23
+ namespace datasketches {
24
+
25
+ // for every number of bits from 1 to 63
26
+ // generate pseudo-random data, pack, unpack and compare
27
+
28
+ // inverse golden ratio (0.618.. of max uint64_t)
29
+ static const uint64_t IGOLDEN64 = 0x9e3779b97f4a7c13ULL;
30
+
31
+ TEST_CASE("pack unpack bits") {
32
+ for (uint8_t bits = 1; bits <= 63; ++bits) {
33
+ const uint64_t mask = (1ULL << bits) - 1;
34
+ std::vector<uint64_t> input(8, 0);
35
+ const uint64_t igolden64 = IGOLDEN64;
36
+ uint64_t value = 0xaa55aa55aa55aa55ULL; // arbitrary starting value
37
+ for (int i = 0; i < 8; ++i) {
38
+ input[i] = value & mask;
39
+ value += igolden64;
40
+ }
41
+ std::vector<uint8_t> bytes(8 * sizeof(uint64_t), 0);
42
+ uint8_t offset = 0;
43
+ uint8_t* ptr = bytes.data();
44
+ for (int i = 0; i < 8; ++i) {
45
+ offset = pack_bits(input[i], bits, ptr, offset);
46
+ }
47
+
48
+ std::vector<uint64_t> output(8, 0);
49
+ offset = 0;
50
+ const uint8_t* cptr = bytes.data();
51
+ for (int i = 0; i < 8; ++i) {
52
+ offset = unpack_bits(output[i], bits, cptr, offset);
53
+ }
54
+ for (int i = 0; i < 8; ++i) {
55
+ REQUIRE((input[i] & mask) == output[i]);
56
+ }
57
+ }
58
+ }
59
+
60
+ TEST_CASE("pack unpack blocks") {
61
+ for (uint8_t bits = 1; bits <= 63; ++bits) {
62
+ const uint64_t mask = (1ULL << bits) - 1;
63
+ std::vector<uint64_t> input(8, 0);
64
+ const uint64_t igolden64 = IGOLDEN64;
65
+ uint64_t value = 0xaa55aa55aa55aa55ULL; // arbitrary starting value
66
+ for (int i = 0; i < 8; ++i) {
67
+ input[i] = value & mask;
68
+ value += igolden64;
69
+ }
70
+ std::vector<uint8_t> bytes(8 * sizeof(uint64_t), 0);
71
+ pack_bits_block8(input.data(), bytes.data(), bits);
72
+ std::vector<uint64_t> output(8, 0);
73
+ unpack_bits_block8(output.data(), bytes.data(), bits);
74
+ for (int i = 0; i < 8; ++i) {
75
+ REQUIRE((input[i] & mask) == output[i]);
76
+ }
77
+ }
78
+ }
79
+
80
+ } /* namespace datasketches */
@@ -607,7 +607,7 @@ TEST_CASE("theta sketch: wrap compact estimation from java", "[theta_sketch]") {
607
607
  compact_theta_sketch compact_sketch = update_sketch.compact();
608
608
  // the sketches are ordered, so the iteration sequence must match exactly
609
609
  auto iter = sketch.begin();
610
- for (const auto& key: compact_sketch) {
610
+ for (const auto key: compact_sketch) {
611
611
  REQUIRE(*iter == key);
612
612
  ++iter;
613
613
  }
@@ -652,7 +652,7 @@ TEST_CASE("theta sketch: wrap compact v1 estimation from java", "[theta_sketch]"
652
652
  compact_theta_sketch compact_sketch = update_sketch.compact();
653
653
  // the sketches are ordered, so the iteration sequence must match exactly
654
654
  auto iter = sketch.begin();
655
- for (const auto& key: compact_sketch) {
655
+ for (const auto key: compact_sketch) {
656
656
  REQUIRE(*iter == key);
657
657
  ++iter;
658
658
  }
@@ -697,7 +697,46 @@ TEST_CASE("theta sketch: wrap compact v2 estimation from java", "[theta_sketch]"
697
697
  compact_theta_sketch compact_sketch = update_sketch.compact();
698
698
  // the sketches are ordered, so the iteration sequence must match exactly
699
699
  auto iter = sketch.begin();
700
- for (const auto& key: compact_sketch) {
700
+ for (const auto key: compact_sketch) {
701
+ REQUIRE(*iter == key);
702
+ ++iter;
703
+ }
704
+ }
705
+
706
+ TEST_CASE("theta sketch: serialize deserialize compressed", "[theta_sketch]") {
707
+ auto update_sketch = update_theta_sketch::builder().build();
708
+ for (int i = 0; i < 10000; i++) update_sketch.update(i);
709
+ auto compact_sketch = update_sketch.compact();
710
+
711
+ auto bytes = compact_sketch.serialize_compressed();
712
+ { // deserialize bytes
713
+ auto deserialized_sketch = compact_theta_sketch::deserialize(bytes.data(), bytes.size());
714
+ REQUIRE(deserialized_sketch.get_num_retained() == compact_sketch.get_num_retained());
715
+ REQUIRE(deserialized_sketch.get_theta() == compact_sketch.get_theta());
716
+ auto iter = deserialized_sketch.begin();
717
+ for (const auto key: compact_sketch) {
718
+ REQUIRE(*iter == key);
719
+ ++iter;
720
+ }
721
+ }
722
+ { // wrap bytes
723
+ auto wrapped_sketch = wrapped_compact_theta_sketch::wrap(bytes.data(), bytes.size());
724
+ REQUIRE(wrapped_sketch.get_num_retained() == compact_sketch.get_num_retained());
725
+ REQUIRE(wrapped_sketch.get_theta() == compact_sketch.get_theta());
726
+ auto iter = wrapped_sketch.begin();
727
+ for (const auto key: compact_sketch) {
728
+ REQUIRE(*iter == key);
729
+ ++iter;
730
+ }
731
+ }
732
+
733
+ std::stringstream s(std::ios::in | std::ios::out | std::ios::binary);
734
+ compact_sketch.serialize_compressed(s);
735
+ auto deserialized_sketch = compact_theta_sketch::deserialize(s);
736
+ REQUIRE(deserialized_sketch.get_num_retained() == compact_sketch.get_num_retained());
737
+ REQUIRE(deserialized_sketch.get_theta() == compact_sketch.get_theta());
738
+ auto iter = deserialized_sketch.begin();
739
+ for (const auto key: compact_sketch) {
701
740
  REQUIRE(*iter == key);
702
741
  ++iter;
703
742
  }
@@ -128,4 +128,29 @@ TEST_CASE("theta union: seed mismatch", "[theta_union]") {
128
128
  REQUIRE_THROWS_AS(u.update(sketch), std::invalid_argument);
129
129
  }
130
130
 
131
+ TEST_CASE("theta union: larger K", "[theta_union]") {
132
+ auto update_sketch1 = datasketches::update_theta_sketch::builder().set_lg_k(14).build();
133
+ for(int i = 0; i < 16384; ++i) update_sketch1.update(i);
134
+
135
+ auto update_sketch2 = datasketches::update_theta_sketch::builder().set_lg_k(14).build();
136
+ for(int i = 0; i < 26384; ++i) update_sketch2.update(i);
137
+
138
+ auto update_sketch3 = datasketches::update_theta_sketch::builder().set_lg_k(14).build();
139
+ for(int i = 0; i < 86384; ++i) update_sketch3.update(i);
140
+
141
+ auto union1 = datasketches::theta_union::builder().set_lg_k(16).build();
142
+ union1.update(update_sketch2);
143
+ union1.update(update_sketch1);
144
+ union1.update(update_sketch3);
145
+ auto result1 = union1.get_result();
146
+ REQUIRE(result1.get_estimate() == update_sketch3.get_estimate());
147
+
148
+ auto union2 = datasketches::theta_union::builder().set_lg_k(16).build();
149
+ union2.update(update_sketch1);
150
+ union2.update(update_sketch3);
151
+ union2.update(update_sketch2);
152
+ auto result2 = union2.get_result();
153
+ REQUIRE(result2.get_estimate() == update_sketch3.get_estimate());
154
+ }
155
+
131
156
  } /* namespace datasketches */
@@ -32,7 +32,8 @@ bool tuple_sketch<S, A>::is_estimation_mode() const {
32
32
 
33
33
  template<typename S, typename A>
34
34
  double tuple_sketch<S, A>::get_theta() const {
35
- return static_cast<double>(get_theta64()) / theta_constants::MAX_THETA;
35
+ return static_cast<double>(get_theta64()) /
36
+ static_cast<double>(theta_constants::MAX_THETA);
36
37
  }
37
38
 
38
39
  template<typename S, typename A>
@@ -1 +1 @@
1
- 4.0.0
1
+ 4.1.0
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: datasketches
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.0
4
+ version: 0.3.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andrew Kane
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2022-12-09 00:00:00.000000000 Z
11
+ date: 2023-05-03 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rice
@@ -76,6 +76,12 @@ files:
76
76
  - vendor/datasketches-cpp/common/test/test_allocator.cpp
77
77
  - vendor/datasketches-cpp/common/test/test_allocator.hpp
78
78
  - vendor/datasketches-cpp/common/test/test_type.hpp
79
+ - vendor/datasketches-cpp/count/CMakeLists.txt
80
+ - vendor/datasketches-cpp/count/include/count_min.hpp
81
+ - vendor/datasketches-cpp/count/include/count_min_impl.hpp
82
+ - vendor/datasketches-cpp/count/test/CMakeLists.txt
83
+ - vendor/datasketches-cpp/count/test/count_min_allocation_test.cpp
84
+ - vendor/datasketches-cpp/count/test/count_min_test.cpp
79
85
  - vendor/datasketches-cpp/cpc/CMakeLists.txt
80
86
  - vendor/datasketches-cpp/cpc/include/compression_data.hpp
81
87
  - vendor/datasketches-cpp/cpc/include/cpc_common.hpp
@@ -96,6 +102,11 @@ files:
96
102
  - vendor/datasketches-cpp/cpc/test/cpc_sketch_allocation_test.cpp
97
103
  - vendor/datasketches-cpp/cpc/test/cpc_sketch_test.cpp
98
104
  - vendor/datasketches-cpp/cpc/test/cpc_union_test.cpp
105
+ - vendor/datasketches-cpp/density/CMakeLists.txt
106
+ - vendor/datasketches-cpp/density/include/density_sketch.hpp
107
+ - vendor/datasketches-cpp/density/include/density_sketch_impl.hpp
108
+ - vendor/datasketches-cpp/density/test/CMakeLists.txt
109
+ - vendor/datasketches-cpp/density/test/density_sketch_test.cpp
99
110
  - vendor/datasketches-cpp/fi/CMakeLists.txt
100
111
  - vendor/datasketches-cpp/fi/include/frequent_items_sketch.hpp
101
112
  - vendor/datasketches-cpp/fi/include/frequent_items_sketch_impl.hpp
@@ -173,9 +184,18 @@ files:
173
184
  - vendor/datasketches-cpp/pyproject.toml
174
185
  - vendor/datasketches-cpp/python/CMakeLists.txt
175
186
  - vendor/datasketches-cpp/python/README.md
187
+ - vendor/datasketches-cpp/python/datasketches/DensityWrapper.py
188
+ - vendor/datasketches-cpp/python/datasketches/KernelFunction.py
176
189
  - vendor/datasketches-cpp/python/datasketches/PySerDe.py
190
+ - vendor/datasketches-cpp/python/datasketches/TuplePolicy.py
191
+ - vendor/datasketches-cpp/python/datasketches/TupleWrapper.py
177
192
  - vendor/datasketches-cpp/python/datasketches/__init__.py
193
+ - vendor/datasketches-cpp/python/include/kernel_function.hpp
194
+ - vendor/datasketches-cpp/python/include/py_object_lt.hpp
195
+ - vendor/datasketches-cpp/python/include/py_object_ostream.hpp
178
196
  - vendor/datasketches-cpp/python/include/py_serde.hpp
197
+ - vendor/datasketches-cpp/python/include/quantile_conditional.hpp
198
+ - vendor/datasketches-cpp/python/include/tuple_policy.hpp
179
199
  - vendor/datasketches-cpp/python/jupyter/CPCSketch.ipynb
180
200
  - vendor/datasketches-cpp/python/jupyter/FrequentItemsSketch.ipynb
181
201
  - vendor/datasketches-cpp/python/jupyter/HLLSketch.ipynb
@@ -183,8 +203,10 @@ files:
183
203
  - vendor/datasketches-cpp/python/jupyter/ThetaSketchNotebook.ipynb
184
204
  - vendor/datasketches-cpp/python/pybind11Path.cmd
185
205
  - vendor/datasketches-cpp/python/src/__init__.py
206
+ - vendor/datasketches-cpp/python/src/count_wrapper.cpp
186
207
  - vendor/datasketches-cpp/python/src/cpc_wrapper.cpp
187
208
  - vendor/datasketches-cpp/python/src/datasketches.cpp
209
+ - vendor/datasketches-cpp/python/src/density_wrapper.cpp
188
210
  - vendor/datasketches-cpp/python/src/fi_wrapper.cpp
189
211
  - vendor/datasketches-cpp/python/src/hll_wrapper.cpp
190
212
  - vendor/datasketches-cpp/python/src/kll_wrapper.cpp
@@ -193,16 +215,20 @@ files:
193
215
  - vendor/datasketches-cpp/python/src/quantiles_wrapper.cpp
194
216
  - vendor/datasketches-cpp/python/src/req_wrapper.cpp
195
217
  - vendor/datasketches-cpp/python/src/theta_wrapper.cpp
218
+ - vendor/datasketches-cpp/python/src/tuple_wrapper.cpp
196
219
  - vendor/datasketches-cpp/python/src/vector_of_kll.cpp
197
220
  - vendor/datasketches-cpp/python/src/vo_wrapper.cpp
198
221
  - vendor/datasketches-cpp/python/tests/__init__.py
222
+ - vendor/datasketches-cpp/python/tests/count_min_test.py
199
223
  - vendor/datasketches-cpp/python/tests/cpc_test.py
224
+ - vendor/datasketches-cpp/python/tests/density_test.py
200
225
  - vendor/datasketches-cpp/python/tests/fi_test.py
201
226
  - vendor/datasketches-cpp/python/tests/hll_test.py
202
227
  - vendor/datasketches-cpp/python/tests/kll_test.py
203
228
  - vendor/datasketches-cpp/python/tests/quantiles_test.py
204
229
  - vendor/datasketches-cpp/python/tests/req_test.py
205
230
  - vendor/datasketches-cpp/python/tests/theta_test.py
231
+ - vendor/datasketches-cpp/python/tests/tuple_test.py
206
232
  - vendor/datasketches-cpp/python/tests/vector_of_kll_test.py
207
233
  - vendor/datasketches-cpp/python/tests/vo_test.py
208
234
  - vendor/datasketches-cpp/quantiles/CMakeLists.txt
@@ -249,6 +275,7 @@ files:
249
275
  - vendor/datasketches-cpp/sampling/test/varopt_union_double_sampling.sk
250
276
  - vendor/datasketches-cpp/setup.py
251
277
  - vendor/datasketches-cpp/theta/CMakeLists.txt
278
+ - vendor/datasketches-cpp/theta/include/bit_packing.hpp
252
279
  - vendor/datasketches-cpp/theta/include/bounds_on_ratios_in_sampled_sets.hpp
253
280
  - vendor/datasketches-cpp/theta/include/bounds_on_ratios_in_theta_sketched_sets.hpp
254
281
  - vendor/datasketches-cpp/theta/include/compact_theta_sketch_parser.hpp
@@ -275,6 +302,7 @@ files:
275
302
  - vendor/datasketches-cpp/theta/include/theta_update_sketch_base.hpp
276
303
  - vendor/datasketches-cpp/theta/include/theta_update_sketch_base_impl.hpp
277
304
  - vendor/datasketches-cpp/theta/test/CMakeLists.txt
305
+ - vendor/datasketches-cpp/theta/test/bit_packing_test.cpp
278
306
  - vendor/datasketches-cpp/theta/test/theta_a_not_b_test.cpp
279
307
  - vendor/datasketches-cpp/theta/test/theta_compact_empty_from_java.sk
280
308
  - vendor/datasketches-cpp/theta/test/theta_compact_empty_from_java_v1.sk
@@ -342,7 +370,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
342
370
  - !ruby/object:Gem::Version
343
371
  version: '0'
344
372
  requirements: []
345
- rubygems_version: 3.3.7
373
+ rubygems_version: 3.4.10
346
374
  signing_key:
347
375
  specification_version: 4
348
376
  summary: Sketch data structures for Ruby