datasketches 0.1.2 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (160) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +5 -0
  3. data/ext/datasketches/cpc_wrapper.cpp +12 -13
  4. data/ext/datasketches/ext.cpp +1 -1
  5. data/ext/datasketches/ext.h +4 -0
  6. data/ext/datasketches/extconf.rb +1 -1
  7. data/ext/datasketches/fi_wrapper.cpp +6 -8
  8. data/ext/datasketches/hll_wrapper.cpp +13 -14
  9. data/ext/datasketches/kll_wrapper.cpp +28 -76
  10. data/ext/datasketches/theta_wrapper.cpp +27 -41
  11. data/ext/datasketches/vo_wrapper.cpp +4 -6
  12. data/lib/datasketches/version.rb +1 -1
  13. data/vendor/datasketches-cpp/CMakeLists.txt +1 -0
  14. data/vendor/datasketches-cpp/README.md +4 -4
  15. data/vendor/datasketches-cpp/common/include/MurmurHash3.h +7 -0
  16. data/vendor/datasketches-cpp/common/include/memory_operations.hpp +12 -0
  17. data/vendor/datasketches-cpp/common/test/CMakeLists.txt +24 -0
  18. data/vendor/datasketches-cpp/common/test/integration_test.cpp +77 -0
  19. data/vendor/datasketches-cpp/common/test/test_allocator.hpp +9 -1
  20. data/vendor/datasketches-cpp/cpc/include/cpc_common.hpp +3 -0
  21. data/vendor/datasketches-cpp/cpc/include/cpc_compressor.hpp +2 -2
  22. data/vendor/datasketches-cpp/cpc/include/cpc_compressor_impl.hpp +28 -19
  23. data/vendor/datasketches-cpp/cpc/include/cpc_sketch.hpp +8 -5
  24. data/vendor/datasketches-cpp/cpc/include/cpc_sketch_impl.hpp +19 -14
  25. data/vendor/datasketches-cpp/cpc/include/cpc_union.hpp +2 -2
  26. data/vendor/datasketches-cpp/cpc/include/cpc_union_impl.hpp +6 -6
  27. data/vendor/datasketches-cpp/cpc/include/cpc_util.hpp +0 -6
  28. data/vendor/datasketches-cpp/cpc/include/icon_estimator.hpp +3 -3
  29. data/vendor/datasketches-cpp/cpc/include/u32_table.hpp +3 -3
  30. data/vendor/datasketches-cpp/cpc/include/u32_table_impl.hpp +9 -9
  31. data/vendor/datasketches-cpp/cpc/test/CMakeLists.txt +1 -0
  32. data/vendor/datasketches-cpp/cpc/test/cpc_sketch_allocation_test.cpp +237 -0
  33. data/vendor/datasketches-cpp/fi/include/frequent_items_sketch.hpp +15 -10
  34. data/vendor/datasketches-cpp/fi/include/frequent_items_sketch_impl.hpp +40 -28
  35. data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map.hpp +19 -13
  36. data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map_impl.hpp +140 -124
  37. data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_custom_type_test.cpp +15 -12
  38. data/vendor/datasketches-cpp/fi/test/reverse_purge_hash_map_test.cpp +3 -3
  39. data/vendor/datasketches-cpp/hll/CMakeLists.txt +3 -0
  40. data/vendor/datasketches-cpp/hll/include/AuxHashMap-internal.hpp +32 -57
  41. data/vendor/datasketches-cpp/hll/include/AuxHashMap.hpp +9 -8
  42. data/vendor/datasketches-cpp/hll/include/CompositeInterpolationXTable.hpp +2 -2
  43. data/vendor/datasketches-cpp/hll/include/CouponHashSet-internal.hpp +34 -48
  44. data/vendor/datasketches-cpp/hll/include/CouponHashSet.hpp +10 -10
  45. data/vendor/datasketches-cpp/hll/include/CouponList-internal.hpp +45 -77
  46. data/vendor/datasketches-cpp/hll/include/CouponList.hpp +11 -12
  47. data/vendor/datasketches-cpp/hll/include/CubicInterpolation.hpp +2 -2
  48. data/vendor/datasketches-cpp/hll/include/HarmonicNumbers.hpp +2 -2
  49. data/vendor/datasketches-cpp/hll/include/Hll4Array-internal.hpp +15 -14
  50. data/vendor/datasketches-cpp/hll/include/Hll4Array.hpp +1 -1
  51. data/vendor/datasketches-cpp/hll/include/Hll6Array-internal.hpp +10 -21
  52. data/vendor/datasketches-cpp/hll/include/Hll6Array.hpp +2 -3
  53. data/vendor/datasketches-cpp/hll/include/Hll8Array-internal.hpp +10 -21
  54. data/vendor/datasketches-cpp/hll/include/Hll8Array.hpp +2 -3
  55. data/vendor/datasketches-cpp/hll/include/HllArray-internal.hpp +28 -55
  56. data/vendor/datasketches-cpp/hll/include/HllArray.hpp +8 -8
  57. data/vendor/datasketches-cpp/hll/include/HllSketch-internal.hpp +9 -11
  58. data/vendor/datasketches-cpp/hll/include/HllSketchImpl.hpp +2 -1
  59. data/vendor/datasketches-cpp/hll/include/HllSketchImplFactory.hpp +34 -31
  60. data/vendor/datasketches-cpp/hll/include/HllUnion-internal.hpp +3 -28
  61. data/vendor/datasketches-cpp/hll/include/HllUtil.hpp +1 -1
  62. data/vendor/datasketches-cpp/hll/include/RelativeErrorTables.hpp +1 -1
  63. data/vendor/datasketches-cpp/hll/include/hll.hpp +6 -34
  64. data/vendor/datasketches-cpp/hll/test/AuxHashMapTest.cpp +7 -7
  65. data/vendor/datasketches-cpp/hll/test/CouponHashSetTest.cpp +2 -2
  66. data/vendor/datasketches-cpp/hll/test/CouponListTest.cpp +3 -3
  67. data/vendor/datasketches-cpp/hll/test/HllArrayTest.cpp +2 -2
  68. data/vendor/datasketches-cpp/hll/test/HllSketchTest.cpp +46 -50
  69. data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator.hpp +1 -1
  70. data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator_impl.hpp +3 -3
  71. data/vendor/datasketches-cpp/kll/include/kll_sketch.hpp +10 -3
  72. data/vendor/datasketches-cpp/kll/include/kll_sketch_impl.hpp +93 -75
  73. data/vendor/datasketches-cpp/kll/test/kll_sketch_custom_type_test.cpp +11 -10
  74. data/vendor/datasketches-cpp/kll/test/kll_sketch_test.cpp +45 -42
  75. data/vendor/datasketches-cpp/python/CMakeLists.txt +2 -0
  76. data/vendor/datasketches-cpp/python/README.md +6 -3
  77. data/vendor/datasketches-cpp/python/src/datasketches.cpp +2 -0
  78. data/vendor/datasketches-cpp/python/src/hll_wrapper.cpp +0 -2
  79. data/vendor/datasketches-cpp/python/src/kll_wrapper.cpp +3 -1
  80. data/vendor/datasketches-cpp/python/src/req_wrapper.cpp +246 -0
  81. data/vendor/datasketches-cpp/python/src/theta_wrapper.cpp +36 -26
  82. data/vendor/datasketches-cpp/python/tests/hll_test.py +0 -1
  83. data/vendor/datasketches-cpp/python/tests/kll_test.py +3 -3
  84. data/vendor/datasketches-cpp/python/tests/req_test.py +126 -0
  85. data/vendor/datasketches-cpp/python/tests/theta_test.py +28 -3
  86. data/vendor/datasketches-cpp/req/CMakeLists.txt +60 -0
  87. data/vendor/datasketches-cpp/{tuple/include/theta_a_not_b_experimental_impl.hpp → req/include/req_common.hpp} +17 -8
  88. data/vendor/datasketches-cpp/req/include/req_compactor.hpp +137 -0
  89. data/vendor/datasketches-cpp/req/include/req_compactor_impl.hpp +501 -0
  90. data/vendor/datasketches-cpp/req/include/req_quantile_calculator.hpp +69 -0
  91. data/vendor/datasketches-cpp/req/include/req_quantile_calculator_impl.hpp +60 -0
  92. data/vendor/datasketches-cpp/req/include/req_sketch.hpp +395 -0
  93. data/vendor/datasketches-cpp/req/include/req_sketch_impl.hpp +810 -0
  94. data/vendor/datasketches-cpp/req/test/CMakeLists.txt +43 -0
  95. data/vendor/datasketches-cpp/req/test/req_float_empty_from_java.sk +0 -0
  96. data/vendor/datasketches-cpp/req/test/req_float_estimation_from_java.sk +0 -0
  97. data/vendor/datasketches-cpp/req/test/req_float_exact_from_java.sk +0 -0
  98. data/vendor/datasketches-cpp/req/test/req_float_raw_items_from_java.sk +0 -0
  99. data/vendor/datasketches-cpp/req/test/req_float_single_item_from_java.sk +0 -0
  100. data/vendor/datasketches-cpp/req/test/req_sketch_custom_type_test.cpp +128 -0
  101. data/vendor/datasketches-cpp/req/test/req_sketch_test.cpp +494 -0
  102. data/vendor/datasketches-cpp/sampling/include/var_opt_sketch.hpp +10 -9
  103. data/vendor/datasketches-cpp/sampling/include/var_opt_sketch_impl.hpp +82 -70
  104. data/vendor/datasketches-cpp/sampling/include/var_opt_union.hpp +5 -5
  105. data/vendor/datasketches-cpp/sampling/include/var_opt_union_impl.hpp +7 -7
  106. data/vendor/datasketches-cpp/sampling/test/CMakeLists.txt +1 -0
  107. data/vendor/datasketches-cpp/sampling/test/var_opt_allocation_test.cpp +96 -0
  108. data/vendor/datasketches-cpp/sampling/test/var_opt_union_test.cpp +0 -31
  109. data/vendor/datasketches-cpp/setup.py +5 -3
  110. data/vendor/datasketches-cpp/theta/CMakeLists.txt +30 -3
  111. data/vendor/datasketches-cpp/{tuple → theta}/include/bounds_on_ratios_in_sampled_sets.hpp +2 -1
  112. data/vendor/datasketches-cpp/{tuple → theta}/include/bounds_on_ratios_in_theta_sketched_sets.hpp +1 -1
  113. data/vendor/datasketches-cpp/theta/include/theta_a_not_b.hpp +12 -29
  114. data/vendor/datasketches-cpp/theta/include/theta_a_not_b_impl.hpp +5 -46
  115. data/vendor/datasketches-cpp/{tuple → theta}/include/theta_comparators.hpp +0 -0
  116. data/vendor/datasketches-cpp/{tuple → theta}/include/theta_constants.hpp +2 -0
  117. data/vendor/datasketches-cpp/{tuple → theta}/include/theta_helpers.hpp +0 -0
  118. data/vendor/datasketches-cpp/theta/include/theta_intersection.hpp +22 -29
  119. data/vendor/datasketches-cpp/{tuple → theta}/include/theta_intersection_base.hpp +0 -0
  120. data/vendor/datasketches-cpp/{tuple → theta}/include/theta_intersection_base_impl.hpp +0 -0
  121. data/vendor/datasketches-cpp/theta/include/theta_intersection_impl.hpp +8 -90
  122. data/vendor/datasketches-cpp/{tuple/test/theta_union_experimental_test.cpp → theta/include/theta_jaccard_similarity.hpp} +11 -18
  123. data/vendor/datasketches-cpp/{tuple/include/jaccard_similarity.hpp → theta/include/theta_jaccard_similarity_base.hpp} +6 -22
  124. data/vendor/datasketches-cpp/{tuple → theta}/include/theta_set_difference_base.hpp +0 -0
  125. data/vendor/datasketches-cpp/{tuple → theta}/include/theta_set_difference_base_impl.hpp +5 -0
  126. data/vendor/datasketches-cpp/theta/include/theta_sketch.hpp +132 -266
  127. data/vendor/datasketches-cpp/theta/include/theta_sketch_impl.hpp +200 -650
  128. data/vendor/datasketches-cpp/theta/include/theta_union.hpp +27 -60
  129. data/vendor/datasketches-cpp/{tuple → theta}/include/theta_union_base.hpp +1 -1
  130. data/vendor/datasketches-cpp/{tuple → theta}/include/theta_union_base_impl.hpp +5 -0
  131. data/vendor/datasketches-cpp/theta/include/theta_union_impl.hpp +13 -69
  132. data/vendor/datasketches-cpp/{tuple → theta}/include/theta_update_sketch_base.hpp +3 -19
  133. data/vendor/datasketches-cpp/{tuple → theta}/include/theta_update_sketch_base_impl.hpp +6 -1
  134. data/vendor/datasketches-cpp/theta/test/CMakeLists.txt +1 -0
  135. data/vendor/datasketches-cpp/{tuple → theta}/test/theta_jaccard_similarity_test.cpp +2 -3
  136. data/vendor/datasketches-cpp/theta/test/theta_sketch_test.cpp +37 -234
  137. data/vendor/datasketches-cpp/tuple/CMakeLists.txt +3 -35
  138. data/vendor/datasketches-cpp/tuple/include/tuple_jaccard_similarity.hpp +38 -0
  139. data/vendor/datasketches-cpp/tuple/include/tuple_sketch.hpp +28 -13
  140. data/vendor/datasketches-cpp/tuple/include/tuple_sketch_impl.hpp +6 -6
  141. data/vendor/datasketches-cpp/tuple/test/CMakeLists.txt +1 -6
  142. data/vendor/datasketches-cpp/tuple/test/tuple_a_not_b_test.cpp +1 -4
  143. data/vendor/datasketches-cpp/tuple/test/tuple_intersection_test.cpp +1 -4
  144. data/vendor/datasketches-cpp/tuple/test/tuple_jaccard_similarity_test.cpp +2 -1
  145. data/vendor/datasketches-cpp/tuple/test/tuple_sketch_allocation_test.cpp +2 -2
  146. data/vendor/datasketches-cpp/tuple/test/tuple_union_test.cpp +1 -4
  147. metadata +43 -34
  148. data/vendor/datasketches-cpp/tuple/include/theta_a_not_b_experimental.hpp +0 -53
  149. data/vendor/datasketches-cpp/tuple/include/theta_intersection_experimental.hpp +0 -78
  150. data/vendor/datasketches-cpp/tuple/include/theta_intersection_experimental_impl.hpp +0 -43
  151. data/vendor/datasketches-cpp/tuple/include/theta_sketch_experimental.hpp +0 -393
  152. data/vendor/datasketches-cpp/tuple/include/theta_sketch_experimental_impl.hpp +0 -481
  153. data/vendor/datasketches-cpp/tuple/include/theta_union_experimental.hpp +0 -88
  154. data/vendor/datasketches-cpp/tuple/include/theta_union_experimental_impl.hpp +0 -47
  155. data/vendor/datasketches-cpp/tuple/test/theta_a_not_b_experimental_test.cpp +0 -250
  156. data/vendor/datasketches-cpp/tuple/test/theta_compact_empty_from_java.sk +0 -0
  157. data/vendor/datasketches-cpp/tuple/test/theta_compact_estimation_from_java.sk +0 -0
  158. data/vendor/datasketches-cpp/tuple/test/theta_compact_single_item_from_java.sk +0 -0
  159. data/vendor/datasketches-cpp/tuple/test/theta_intersection_experimental_test.cpp +0 -224
  160. data/vendor/datasketches-cpp/tuple/test/theta_sketch_experimental_test.cpp +0 -247
@@ -1,88 +0,0 @@
1
- /*
2
- * Licensed to the Apache Software Foundation (ASF) under one
3
- * or more contributor license agreements. See the NOTICE file
4
- * distributed with this work for additional information
5
- * regarding copyright ownership. The ASF licenses this file
6
- * to you under the Apache License, Version 2.0 (the
7
- * "License"); you may not use this file except in compliance
8
- * with the License. You may obtain a copy of the License at
9
- *
10
- * http://www.apache.org/licenses/LICENSE-2.0
11
- *
12
- * Unless required by applicable law or agreed to in writing,
13
- * software distributed under the License is distributed on an
14
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15
- * KIND, either express or implied. See the License for the
16
- * specific language governing permissions and limitations
17
- * under the License.
18
- */
19
-
20
- #ifndef THETA_UNION_EXPERIMENTAL_HPP_
21
- #define THETA_UNION_EXPERIMENTAL_HPP_
22
-
23
- #include "serde.hpp"
24
- #include "tuple_sketch.hpp"
25
- #include "theta_union_base.hpp"
26
- #include "theta_sketch_experimental.hpp"
27
-
28
- namespace datasketches {
29
-
30
- // experimental theta union derived from the same base as tuple union
31
-
32
- template<typename Allocator = std::allocator<uint64_t>>
33
- class theta_union_experimental {
34
- public:
35
- using Entry = uint64_t;
36
- using ExtractKey = trivial_extract_key;
37
- using Sketch = theta_sketch_experimental<Allocator>;
38
- using CompactSketch = compact_theta_sketch_experimental<Allocator>;
39
- using resize_factor = theta_constants::resize_factor;
40
-
41
- struct pass_through_policy {
42
- uint64_t operator()(uint64_t internal_entry, uint64_t incoming_entry) const {
43
- unused(incoming_entry);
44
- return internal_entry;
45
- }
46
- };
47
- using State = theta_union_base<Entry, ExtractKey, pass_through_policy, Sketch, CompactSketch, Allocator>;
48
-
49
- // No constructor here. Use builder instead.
50
- class builder;
51
-
52
- /**
53
- * This method is to update the union with a given sketch
54
- * @param sketch to update the union with
55
- */
56
- void update(const Sketch& sketch);
57
-
58
- /**
59
- * This method produces a copy of the current state of the union as a compact sketch.
60
- * @param ordered optional flag to specify if ordered sketch should be produced
61
- * @return the result of the union
62
- */
63
- CompactSketch get_result(bool ordered = true) const;
64
-
65
- private:
66
- State state_;
67
-
68
- // for builder
69
- theta_union_experimental(uint8_t lg_cur_size, uint8_t lg_nom_size, resize_factor rf, uint64_t theta, uint64_t seed, const Allocator& allocator);
70
- };
71
-
72
- template<typename A>
73
- class theta_union_experimental<A>::builder: public theta_base_builder<builder, A> {
74
- public:
75
- builder(const A& allocator = A());
76
-
77
- /**
78
- * This is to create an instance of the union with predefined parameters.
79
- * @return an instance of the union
80
- */
81
- theta_union_experimental<A> build() const;
82
- };
83
-
84
- } /* namespace datasketches */
85
-
86
- #include "theta_union_experimental_impl.hpp"
87
-
88
- #endif
@@ -1,47 +0,0 @@
1
- /*
2
- * Licensed to the Apache Software Foundation (ASF) under one
3
- * or more contributor license agreements. See the NOTICE file
4
- * distributed with this work for additional information
5
- * regarding copyright ownership. The ASF licenses this file
6
- * to you under the Apache License, Version 2.0 (the
7
- * "License"); you may not use this file except in compliance
8
- * with the License. You may obtain a copy of the License at
9
- *
10
- * http://www.apache.org/licenses/LICENSE-2.0
11
- *
12
- * Unless required by applicable law or agreed to in writing,
13
- * software distributed under the License is distributed on an
14
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15
- * KIND, either express or implied. See the License for the
16
- * specific language governing permissions and limitations
17
- * under the License.
18
- */
19
-
20
- namespace datasketches {
21
-
22
- template<typename A>
23
- theta_union_experimental<A>::theta_union_experimental(uint8_t lg_cur_size, uint8_t lg_nom_size, resize_factor rf, uint64_t theta, uint64_t seed, const A& allocator):
24
- state_(lg_cur_size, lg_nom_size, rf, theta, seed, pass_through_policy(), allocator)
25
- {}
26
-
27
- template<typename A>
28
- void theta_union_experimental<A>::update(const Sketch& sketch) {
29
- state_.update(sketch);
30
- }
31
-
32
- template<typename A>
33
- auto theta_union_experimental<A>::get_result(bool ordered) const -> CompactSketch {
34
- return state_.get_result(ordered);
35
- }
36
-
37
- template<typename A>
38
- theta_union_experimental<A>::builder::builder(const A& allocator): theta_base_builder<builder, A>(allocator) {}
39
-
40
- template<typename A>
41
- auto theta_union_experimental<A>::builder::build() const -> theta_union_experimental {
42
- return theta_union_experimental(
43
- this->starting_sub_multiple(this->lg_k_ + 1, this->MIN_LG_K, static_cast<uint8_t>(this->rf_)),
44
- this->lg_k_, this->rf_, this->starting_theta(), this->seed_, this->allocator_);
45
- }
46
-
47
- } /* namespace datasketches */
@@ -1,250 +0,0 @@
1
- /*
2
- * Licensed to the Apache Software Foundation (ASF) under one
3
- * or more contributor license agreements. See the NOTICE file
4
- * distributed with this work for additional information
5
- * regarding copyright ownership. The ASF licenses this file
6
- * to you under the Apache License, Version 2.0 (the
7
- * "License"); you may not use this file except in compliance
8
- * with the License. You may obtain a copy of the License at
9
- *
10
- * http://www.apache.org/licenses/LICENSE-2.0
11
- *
12
- * Unless required by applicable law or agreed to in writing,
13
- * software distributed under the License is distributed on an
14
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15
- * KIND, either express or implied. See the License for the
16
- * specific language governing permissions and limitations
17
- * under the License.
18
- */
19
-
20
- #include <catch.hpp>
21
-
22
- #include <theta_a_not_b_experimental.hpp>
23
-
24
- namespace datasketches {
25
-
26
- // These tests have been copied from the existing theta sketch implementation.
27
-
28
- using update_theta_sketch = update_theta_sketch_experimental<>;
29
- using compact_theta_sketch = compact_theta_sketch_experimental<>;
30
- using theta_a_not_b = theta_a_not_b_experimental<>;
31
-
32
- TEST_CASE("theta a-not-b: empty", "[theta_a_not_b]") {
33
- theta_a_not_b a_not_b;
34
- auto a = update_theta_sketch::builder().build();
35
- auto b = update_theta_sketch::builder().build();
36
- compact_theta_sketch result = a_not_b.compute(a, b);
37
- REQUIRE(result.get_num_retained() == 0);
38
- REQUIRE(result.is_empty());
39
- REQUIRE_FALSE(result.is_estimation_mode());
40
- REQUIRE(result.get_estimate() == 0.0);
41
- }
42
-
43
- TEST_CASE("theta a-not-b: non empty no retained keys", "[theta_a_not_b]") {
44
- update_theta_sketch a = update_theta_sketch::builder().build();
45
- a.update(1);
46
- update_theta_sketch b = update_theta_sketch::builder().set_p(0.001).build();
47
- theta_a_not_b a_not_b;
48
-
49
- // B is still empty
50
- compact_theta_sketch result = a_not_b.compute(a, b);
51
- REQUIRE_FALSE(result.is_empty());
52
- REQUIRE_FALSE(result.is_estimation_mode());
53
- REQUIRE(result.get_num_retained() == 1);
54
- REQUIRE(result.get_theta() == Approx(1).margin(1e-10));
55
- REQUIRE(result.get_estimate() == 1.0);
56
-
57
- // B is not empty in estimation mode and no entries
58
- b.update(1);
59
- REQUIRE(b.get_num_retained() == 0U);
60
-
61
- result = a_not_b.compute(a, b);
62
- REQUIRE_FALSE(result.is_empty());
63
- REQUIRE(result.is_estimation_mode());
64
- REQUIRE(result.get_num_retained() == 0);
65
- REQUIRE(result.get_theta() == Approx(0.001).margin(1e-10));
66
- REQUIRE(result.get_estimate() == 0.0);
67
- }
68
-
69
- TEST_CASE("theta a-not-b: exact mode half overlap", "[theta_a_not_b]") {
70
- update_theta_sketch a = update_theta_sketch::builder().build();
71
- int value = 0;
72
- for (int i = 0; i < 1000; i++) a.update(value++);
73
-
74
- update_theta_sketch b = update_theta_sketch::builder().build();
75
- value = 500;
76
- for (int i = 0; i < 1000; i++) b.update(value++);
77
-
78
- theta_a_not_b a_not_b;
79
-
80
- // unordered inputs, ordered result
81
- compact_theta_sketch result = a_not_b.compute(a, b);
82
- REQUIRE_FALSE(result.is_empty());
83
- REQUIRE_FALSE(result.is_estimation_mode());
84
- REQUIRE(result.is_ordered());
85
- REQUIRE(result.get_estimate() == 500.0);
86
-
87
- // unordered inputs, unordered result
88
- result = a_not_b.compute(a, b, false);
89
- REQUIRE_FALSE(result.is_empty());
90
- REQUIRE_FALSE(result.is_estimation_mode());
91
- REQUIRE_FALSE(result.is_ordered());
92
- REQUIRE(result.get_estimate() == 500.0);
93
-
94
- // ordered inputs
95
- result = a_not_b.compute(a.compact(), b.compact());
96
- REQUIRE_FALSE(result.is_empty());
97
- REQUIRE_FALSE(result.is_estimation_mode());
98
- REQUIRE(result.is_ordered());
99
- REQUIRE(result.get_estimate() == 500.0);
100
-
101
- // A is ordered, so the result is ordered regardless
102
- result = a_not_b.compute(a.compact(), b, false);
103
- REQUIRE_FALSE(result.is_empty());
104
- REQUIRE_FALSE(result.is_estimation_mode());
105
- REQUIRE(result.is_ordered());
106
- REQUIRE(result.get_estimate() == 500.0);
107
- }
108
-
109
- TEST_CASE("theta a-not-b: exact mode disjoint", "[theta_a_not_b]") {
110
- update_theta_sketch a = update_theta_sketch::builder().build();
111
- int value = 0;
112
- for (int i = 0; i < 1000; i++) a.update(value++);
113
-
114
- update_theta_sketch b = update_theta_sketch::builder().build();
115
- for (int i = 0; i < 1000; i++) b.update(value++);
116
-
117
- theta_a_not_b a_not_b;
118
-
119
- // unordered inputs
120
- compact_theta_sketch result = a_not_b.compute(a, b);
121
- REQUIRE_FALSE(result.is_empty());
122
- REQUIRE_FALSE(result.is_estimation_mode());
123
- REQUIRE(result.get_estimate() == 1000.0);
124
-
125
- // ordered inputs
126
- result = a_not_b.compute(a.compact(), b.compact());
127
- REQUIRE_FALSE(result.is_empty());
128
- REQUIRE_FALSE(result.is_estimation_mode());
129
- REQUIRE(result.get_estimate() == 1000.0);
130
- }
131
-
132
- TEST_CASE("theta a-not-b: exact mode full overlap", "[theta_a_not_b]") {
133
- update_theta_sketch sketch = update_theta_sketch::builder().build();
134
- int value = 0;
135
- for (int i = 0; i < 1000; i++) sketch.update(value++);
136
-
137
- theta_a_not_b a_not_b;
138
-
139
- // unordered inputs
140
- compact_theta_sketch result = a_not_b.compute(sketch, sketch);
141
- REQUIRE(result.is_empty());
142
- REQUIRE_FALSE(result.is_estimation_mode());
143
- REQUIRE(result.get_estimate() == 0.0);
144
-
145
- // ordered inputs
146
- result = a_not_b.compute(sketch.compact(), sketch.compact());
147
- REQUIRE(result.is_empty());
148
- REQUIRE_FALSE(result.is_estimation_mode());
149
- REQUIRE(result.get_estimate() == 0.0);
150
- }
151
-
152
- TEST_CASE("theta a-not-b: estimation mode half overlap", "[theta_a_not_b]") {
153
- update_theta_sketch a = update_theta_sketch::builder().build();
154
- int value = 0;
155
- for (int i = 0; i < 10000; i++) a.update(value++);
156
-
157
- update_theta_sketch b = update_theta_sketch::builder().build();
158
- value = 5000;
159
- for (int i = 0; i < 10000; i++) b.update(value++);
160
-
161
- theta_a_not_b a_not_b;
162
-
163
- // unordered inputs
164
- compact_theta_sketch result = a_not_b.compute(a, b);
165
- REQUIRE_FALSE(result.is_empty());
166
- REQUIRE(result.is_estimation_mode());
167
- REQUIRE(result.get_estimate() == Approx(5000).margin(5000 * 0.02));
168
-
169
- // ordered inputs
170
- result = a_not_b.compute(a.compact(), b.compact());
171
- REQUIRE_FALSE(result.is_empty());
172
- REQUIRE(result.is_estimation_mode());
173
- REQUIRE(result.get_estimate() == Approx(5000).margin(5000 * 0.02));
174
- }
175
-
176
- TEST_CASE("theta a-not-b: estimation mode disjoint", "[theta_a_not_b]") {
177
- update_theta_sketch a = update_theta_sketch::builder().build();
178
- int value = 0;
179
- for (int i = 0; i < 10000; i++) a.update(value++);
180
-
181
- update_theta_sketch b = update_theta_sketch::builder().build();
182
- for (int i = 0; i < 10000; i++) b.update(value++);
183
-
184
- theta_a_not_b a_not_b;
185
-
186
- // unordered inputs
187
- compact_theta_sketch result = a_not_b.compute(a, b);
188
- REQUIRE_FALSE(result.is_empty());
189
- REQUIRE(result.is_estimation_mode());
190
- REQUIRE(result.get_estimate() == Approx(10000).margin(10000 * 0.02));
191
-
192
- // ordered inputs
193
- result = a_not_b.compute(a.compact(), b.compact());
194
- REQUIRE_FALSE(result.is_empty());
195
- REQUIRE(result.is_estimation_mode());
196
- REQUIRE(result.get_estimate() == Approx(10000).margin(10000 * 0.02));
197
- }
198
-
199
- TEST_CASE("theta a-not-b: estimation mode full overlap", "[theta_a_not_b]") {
200
- update_theta_sketch sketch = update_theta_sketch::builder().build();
201
- int value = 0;
202
- for (int i = 0; i < 10000; i++) sketch.update(value++);
203
-
204
- theta_a_not_b a_not_b;
205
-
206
- // unordered inputs
207
- compact_theta_sketch result = a_not_b.compute(sketch, sketch);
208
- REQUIRE_FALSE(result.is_empty());
209
- REQUIRE(result.is_estimation_mode());
210
- REQUIRE(result.get_estimate() == 0.0);
211
-
212
- // ordered inputs
213
- result = a_not_b.compute(sketch.compact(), sketch.compact());
214
- REQUIRE_FALSE(result.is_empty());
215
- REQUIRE(result.is_estimation_mode());
216
- REQUIRE(result.get_estimate() == 0.0);
217
- }
218
-
219
- TEST_CASE("theta a-not-b: seed mismatch", "[theta_a_not_b]") {
220
- update_theta_sketch sketch = update_theta_sketch::builder().build();
221
- sketch.update(1); // non-empty should not be ignored
222
- theta_a_not_b a_not_b(123);
223
- REQUIRE_THROWS_AS(a_not_b.compute(sketch, sketch), std::invalid_argument);
224
- }
225
-
226
- TEST_CASE("theta a-not-b: issue #152", "[theta_a_not_b]") {
227
- update_theta_sketch a = update_theta_sketch::builder().build();
228
- int value = 0;
229
- for (int i = 0; i < 10000; i++) a.update(value++);
230
-
231
- update_theta_sketch b = update_theta_sketch::builder().build();
232
- value = 5000;
233
- for (int i = 0; i < 25000; i++) b.update(value++);
234
-
235
- theta_a_not_b a_not_b;
236
-
237
- // unordered inputs
238
- compact_theta_sketch result = a_not_b.compute(a, b);
239
- REQUIRE_FALSE(result.is_empty());
240
- REQUIRE(result.is_estimation_mode());
241
- REQUIRE(result.get_estimate() == Approx(5000).margin(5000 * 0.03));
242
-
243
- // ordered inputs
244
- result = a_not_b.compute(a.compact(), b.compact());
245
- REQUIRE_FALSE(result.is_empty());
246
- REQUIRE(result.is_estimation_mode());
247
- REQUIRE(result.get_estimate() == Approx(5000).margin(5000 * 0.03));
248
- }
249
-
250
- } /* namespace datasketches */
@@ -1,224 +0,0 @@
1
- /*
2
- * Licensed to the Apache Software Foundation (ASF) under one
3
- * or more contributor license agreements. See the NOTICE file
4
- * distributed with this work for additional information
5
- * regarding copyright ownership. The ASF licenses this file
6
- * to you under the Apache License, Version 2.0 (the
7
- * "License"); you may not use this file except in compliance
8
- * with the License. You may obtain a copy of the License at
9
- *
10
- * http://www.apache.org/licenses/LICENSE-2.0
11
- *
12
- * Unless required by applicable law or agreed to in writing,
13
- * software distributed under the License is distributed on an
14
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15
- * KIND, either express or implied. See the License for the
16
- * specific language governing permissions and limitations
17
- * under the License.
18
- */
19
-
20
- #include <catch.hpp>
21
-
22
- #include <theta_intersection_experimental.hpp>
23
-
24
- namespace datasketches {
25
-
26
- // These tests have been copied from the existing theta sketch implementation.
27
-
28
- using update_theta_sketch = update_theta_sketch_experimental<>;
29
- using compact_theta_sketch = compact_theta_sketch_experimental<>;
30
- using theta_intersection = theta_intersection_experimental<>;
31
-
32
- TEST_CASE("theta intersection: invalid", "[theta_intersection]") {
33
- theta_intersection intersection;
34
- REQUIRE_FALSE(intersection.has_result());
35
- REQUIRE_THROWS_AS(intersection.get_result(), std::invalid_argument);
36
- }
37
-
38
- TEST_CASE("theta intersection: empty", "[theta_intersection]") {
39
- theta_intersection intersection;
40
- update_theta_sketch sketch = update_theta_sketch::builder().build();
41
- intersection.update(sketch);
42
- compact_theta_sketch result = intersection.get_result();
43
- REQUIRE(result.get_num_retained() == 0);
44
- REQUIRE(result.is_empty());
45
- REQUIRE_FALSE(result.is_estimation_mode());
46
- REQUIRE(result.get_estimate() == 0.0);
47
-
48
- intersection.update(sketch);
49
- result = intersection.get_result();
50
- REQUIRE(result.get_num_retained() == 0);
51
- REQUIRE(result.is_empty());
52
- REQUIRE_FALSE(result.is_estimation_mode());
53
- REQUIRE(result.get_estimate() == 0.0);
54
- }
55
-
56
- TEST_CASE("theta intersection: non empty no retained keys", "[theta_intersection]") {
57
- update_theta_sketch sketch = update_theta_sketch::builder().set_p(0.001).build();
58
- sketch.update(1);
59
- theta_intersection intersection;
60
- intersection.update(sketch);
61
- compact_theta_sketch result = intersection.get_result();
62
- REQUIRE(result.get_num_retained() == 0);
63
- REQUIRE_FALSE(result.is_empty());
64
- REQUIRE(result.is_estimation_mode());
65
- REQUIRE(result.get_theta() == Approx(0.001).margin(1e-10));
66
- REQUIRE(result.get_estimate() == 0.0);
67
-
68
- intersection.update(sketch);
69
- result = intersection.get_result();
70
- REQUIRE(result.get_num_retained() == 0);
71
- REQUIRE_FALSE(result.is_empty());
72
- REQUIRE(result.is_estimation_mode());
73
- REQUIRE(result.get_theta() == Approx(0.001).margin(1e-10));
74
- REQUIRE(result.get_estimate() == 0.0);
75
- }
76
-
77
- TEST_CASE("theta intersection: exact mode half overlap unordered", "[theta_intersection]") {
78
- update_theta_sketch sketch1 = update_theta_sketch::builder().build();
79
- int value = 0;
80
- for (int i = 0; i < 1000; i++) sketch1.update(value++);
81
-
82
- update_theta_sketch sketch2 = update_theta_sketch::builder().build();
83
- value = 500;
84
- for (int i = 0; i < 1000; i++) sketch2.update(value++);
85
-
86
- theta_intersection intersection;
87
- intersection.update(sketch1);
88
- intersection.update(sketch2);
89
- compact_theta_sketch result = intersection.get_result();
90
- REQUIRE_FALSE(result.is_empty());
91
- REQUIRE_FALSE(result.is_estimation_mode());
92
- REQUIRE(result.get_estimate() == 500.0);
93
- }
94
-
95
- TEST_CASE("theta intersection: exact mode half overlap ordered", "[theta_intersection]") {
96
- update_theta_sketch sketch1 = update_theta_sketch::builder().build();
97
- int value = 0;
98
- for (int i = 0; i < 1000; i++) sketch1.update(value++);
99
-
100
- update_theta_sketch sketch2 = update_theta_sketch::builder().build();
101
- value = 500;
102
- for (int i = 0; i < 1000; i++) sketch2.update(value++);
103
-
104
- theta_intersection intersection;
105
- intersection.update(sketch1.compact());
106
- intersection.update(sketch2.compact());
107
- compact_theta_sketch result = intersection.get_result();
108
- REQUIRE_FALSE(result.is_empty());
109
- REQUIRE_FALSE(result.is_estimation_mode());
110
- REQUIRE(result.get_estimate() == 500.0);
111
- }
112
-
113
- TEST_CASE("theta intersection: exact mode disjoint unordered", "[theta_intersection]") {
114
- update_theta_sketch sketch1 = update_theta_sketch::builder().build();
115
- int value = 0;
116
- for (int i = 0; i < 1000; i++) sketch1.update(value++);
117
-
118
- update_theta_sketch sketch2 = update_theta_sketch::builder().build();
119
- for (int i = 0; i < 1000; i++) sketch2.update(value++);
120
-
121
- theta_intersection intersection;
122
- intersection.update(sketch1);
123
- intersection.update(sketch2);
124
- compact_theta_sketch result = intersection.get_result();
125
- REQUIRE(result.is_empty());
126
- REQUIRE_FALSE(result.is_estimation_mode());
127
- REQUIRE(result.get_estimate() == 0.0);
128
- }
129
-
130
- TEST_CASE("theta intersection: exact mode disjoint ordered", "[theta_intersection]") {
131
- update_theta_sketch sketch1 = update_theta_sketch::builder().build();
132
- int value = 0;
133
- for (int i = 0; i < 1000; i++) sketch1.update(value++);
134
-
135
- update_theta_sketch sketch2 = update_theta_sketch::builder().build();
136
- for (int i = 0; i < 1000; i++) sketch2.update(value++);
137
-
138
- theta_intersection intersection;
139
- intersection.update(sketch1.compact());
140
- intersection.update(sketch2.compact());
141
- compact_theta_sketch result = intersection.get_result();
142
- REQUIRE(result.is_empty());
143
- REQUIRE_FALSE(result.is_estimation_mode());
144
- REQUIRE(result.get_estimate() == 0.0);
145
- }
146
-
147
- TEST_CASE("theta intersection: estimation mode half overlap unordered", "[theta_intersection]") {
148
- update_theta_sketch sketch1 = update_theta_sketch::builder().build();
149
- int value = 0;
150
- for (int i = 0; i < 10000; i++) sketch1.update(value++);
151
-
152
- update_theta_sketch sketch2 = update_theta_sketch::builder().build();
153
- value = 5000;
154
- for (int i = 0; i < 10000; i++) sketch2.update(value++);
155
-
156
- theta_intersection intersection;
157
- intersection.update(sketch1);
158
- intersection.update(sketch2);
159
- compact_theta_sketch result = intersection.get_result();
160
- REQUIRE_FALSE(result.is_empty());
161
- REQUIRE(result.is_estimation_mode());
162
- REQUIRE(result.get_estimate() == Approx(5000).margin(5000 * 0.02));
163
- }
164
-
165
- TEST_CASE("theta intersection: estimation mode half overlap ordered", "[theta_intersection]") {
166
- update_theta_sketch sketch1 = update_theta_sketch::builder().build();
167
- int value = 0;
168
- for (int i = 0; i < 10000; i++) sketch1.update(value++);
169
-
170
- update_theta_sketch sketch2 = update_theta_sketch::builder().build();
171
- value = 5000;
172
- for (int i = 0; i < 10000; i++) sketch2.update(value++);
173
-
174
- theta_intersection intersection;
175
- intersection.update(sketch1.compact());
176
- intersection.update(sketch2.compact());
177
- compact_theta_sketch result = intersection.get_result();
178
- REQUIRE_FALSE(result.is_empty());
179
- REQUIRE(result.is_estimation_mode());
180
- REQUIRE(result.get_estimate() == Approx(5000).margin(5000 * 0.02));
181
- }
182
-
183
- TEST_CASE("theta intersection: estimation mode disjoint unordered", "[theta_intersection]") {
184
- update_theta_sketch sketch1 = update_theta_sketch::builder().build();
185
- int value = 0;
186
- for (int i = 0; i < 10000; i++) sketch1.update(value++);
187
-
188
- update_theta_sketch sketch2 = update_theta_sketch::builder().build();
189
- for (int i = 0; i < 10000; i++) sketch2.update(value++);
190
-
191
- theta_intersection intersection;
192
- intersection.update(sketch1);
193
- intersection.update(sketch2);
194
- compact_theta_sketch result = intersection.get_result();
195
- REQUIRE_FALSE(result.is_empty());
196
- REQUIRE(result.is_estimation_mode());
197
- REQUIRE(result.get_estimate() == 0.0);
198
- }
199
-
200
- TEST_CASE("theta intersection: estimation mode disjoint ordered", "[theta_intersection]") {
201
- update_theta_sketch sketch1 = update_theta_sketch::builder().build();
202
- int value = 0;
203
- for (int i = 0; i < 10000; i++) sketch1.update(value++);
204
-
205
- update_theta_sketch sketch2 = update_theta_sketch::builder().build();
206
- for (int i = 0; i < 10000; i++) sketch2.update(value++);
207
-
208
- theta_intersection intersection;
209
- intersection.update(sketch1.compact());
210
- intersection.update(sketch2.compact());
211
- compact_theta_sketch result = intersection.get_result();
212
- REQUIRE_FALSE(result.is_empty());
213
- REQUIRE(result.is_estimation_mode());
214
- REQUIRE(result.get_estimate() == 0.0);
215
- }
216
-
217
- TEST_CASE("theta intersection: seed mismatch", "[theta_intersection]") {
218
- update_theta_sketch sketch = update_theta_sketch::builder().build();
219
- sketch.update(1); // non-empty should not be ignored
220
- theta_intersection intersection(123);
221
- REQUIRE_THROWS_AS(intersection.update(sketch), std::invalid_argument);
222
- }
223
-
224
- } /* namespace datasketches */