datasketches 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/CHANGELOG.md +3 -0
- data/LICENSE +310 -0
- data/NOTICE +11 -0
- data/README.md +126 -0
- data/ext/datasketches/cpc_wrapper.cpp +50 -0
- data/ext/datasketches/ext.cpp +12 -0
- data/ext/datasketches/extconf.rb +11 -0
- data/ext/datasketches/hll_wrapper.cpp +69 -0
- data/lib/datasketches.rb +9 -0
- data/lib/datasketches/version.rb +3 -0
- data/vendor/datasketches-cpp/CMakeLists.txt +126 -0
- data/vendor/datasketches-cpp/LICENSE +311 -0
- data/vendor/datasketches-cpp/MANIFEST.in +19 -0
- data/vendor/datasketches-cpp/NOTICE +11 -0
- data/vendor/datasketches-cpp/README.md +42 -0
- data/vendor/datasketches-cpp/common/CMakeLists.txt +45 -0
- data/vendor/datasketches-cpp/common/include/MurmurHash3.h +173 -0
- data/vendor/datasketches-cpp/common/include/binomial_bounds.hpp +458 -0
- data/vendor/datasketches-cpp/common/include/bounds_binomial_proportions.hpp +291 -0
- data/vendor/datasketches-cpp/common/include/ceiling_power_of_2.hpp +41 -0
- data/vendor/datasketches-cpp/common/include/common_defs.hpp +51 -0
- data/vendor/datasketches-cpp/common/include/conditional_back_inserter.hpp +68 -0
- data/vendor/datasketches-cpp/common/include/conditional_forward.hpp +70 -0
- data/vendor/datasketches-cpp/common/include/count_zeros.hpp +114 -0
- data/vendor/datasketches-cpp/common/include/inv_pow2_table.hpp +107 -0
- data/vendor/datasketches-cpp/common/include/memory_operations.hpp +57 -0
- data/vendor/datasketches-cpp/common/include/serde.hpp +196 -0
- data/vendor/datasketches-cpp/common/test/CMakeLists.txt +38 -0
- data/vendor/datasketches-cpp/common/test/catch.hpp +17618 -0
- data/vendor/datasketches-cpp/common/test/catch_runner.cpp +7 -0
- data/vendor/datasketches-cpp/common/test/test_allocator.cpp +31 -0
- data/vendor/datasketches-cpp/common/test/test_allocator.hpp +108 -0
- data/vendor/datasketches-cpp/common/test/test_runner.cpp +29 -0
- data/vendor/datasketches-cpp/common/test/test_type.hpp +137 -0
- data/vendor/datasketches-cpp/cpc/CMakeLists.txt +74 -0
- data/vendor/datasketches-cpp/cpc/include/compression_data.hpp +6022 -0
- data/vendor/datasketches-cpp/cpc/include/cpc_common.hpp +62 -0
- data/vendor/datasketches-cpp/cpc/include/cpc_compressor.hpp +147 -0
- data/vendor/datasketches-cpp/cpc/include/cpc_compressor_impl.hpp +742 -0
- data/vendor/datasketches-cpp/cpc/include/cpc_confidence.hpp +167 -0
- data/vendor/datasketches-cpp/cpc/include/cpc_sketch.hpp +311 -0
- data/vendor/datasketches-cpp/cpc/include/cpc_sketch_impl.hpp +810 -0
- data/vendor/datasketches-cpp/cpc/include/cpc_union.hpp +102 -0
- data/vendor/datasketches-cpp/cpc/include/cpc_union_impl.hpp +346 -0
- data/vendor/datasketches-cpp/cpc/include/cpc_util.hpp +137 -0
- data/vendor/datasketches-cpp/cpc/include/icon_estimator.hpp +274 -0
- data/vendor/datasketches-cpp/cpc/include/kxp_byte_lookup.hpp +81 -0
- data/vendor/datasketches-cpp/cpc/include/u32_table.hpp +84 -0
- data/vendor/datasketches-cpp/cpc/include/u32_table_impl.hpp +266 -0
- data/vendor/datasketches-cpp/cpc/test/CMakeLists.txt +44 -0
- data/vendor/datasketches-cpp/cpc/test/compression_test.cpp +67 -0
- data/vendor/datasketches-cpp/cpc/test/cpc_sketch_test.cpp +381 -0
- data/vendor/datasketches-cpp/cpc/test/cpc_union_test.cpp +149 -0
- data/vendor/datasketches-cpp/fi/CMakeLists.txt +54 -0
- data/vendor/datasketches-cpp/fi/include/frequent_items_sketch.hpp +319 -0
- data/vendor/datasketches-cpp/fi/include/frequent_items_sketch_impl.hpp +484 -0
- data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map.hpp +114 -0
- data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map_impl.hpp +345 -0
- data/vendor/datasketches-cpp/fi/test/CMakeLists.txt +44 -0
- data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_custom_type_test.cpp +84 -0
- data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_test.cpp +360 -0
- data/vendor/datasketches-cpp/fi/test/items_sketch_string_from_java.sk +0 -0
- data/vendor/datasketches-cpp/fi/test/items_sketch_string_utf8_from_java.sk +0 -0
- data/vendor/datasketches-cpp/fi/test/longs_sketch_from_java.sk +0 -0
- data/vendor/datasketches-cpp/fi/test/reverse_purge_hash_map_test.cpp +47 -0
- data/vendor/datasketches-cpp/hll/CMakeLists.txt +92 -0
- data/vendor/datasketches-cpp/hll/include/AuxHashMap-internal.hpp +303 -0
- data/vendor/datasketches-cpp/hll/include/AuxHashMap.hpp +83 -0
- data/vendor/datasketches-cpp/hll/include/CompositeInterpolationXTable-internal.hpp +811 -0
- data/vendor/datasketches-cpp/hll/include/CompositeInterpolationXTable.hpp +40 -0
- data/vendor/datasketches-cpp/hll/include/CouponHashSet-internal.hpp +291 -0
- data/vendor/datasketches-cpp/hll/include/CouponHashSet.hpp +59 -0
- data/vendor/datasketches-cpp/hll/include/CouponList-internal.hpp +417 -0
- data/vendor/datasketches-cpp/hll/include/CouponList.hpp +91 -0
- data/vendor/datasketches-cpp/hll/include/CubicInterpolation-internal.hpp +233 -0
- data/vendor/datasketches-cpp/hll/include/CubicInterpolation.hpp +43 -0
- data/vendor/datasketches-cpp/hll/include/HarmonicNumbers-internal.hpp +90 -0
- data/vendor/datasketches-cpp/hll/include/HarmonicNumbers.hpp +48 -0
- data/vendor/datasketches-cpp/hll/include/Hll4Array-internal.hpp +335 -0
- data/vendor/datasketches-cpp/hll/include/Hll4Array.hpp +69 -0
- data/vendor/datasketches-cpp/hll/include/Hll6Array-internal.hpp +124 -0
- data/vendor/datasketches-cpp/hll/include/Hll6Array.hpp +55 -0
- data/vendor/datasketches-cpp/hll/include/Hll8Array-internal.hpp +158 -0
- data/vendor/datasketches-cpp/hll/include/Hll8Array.hpp +56 -0
- data/vendor/datasketches-cpp/hll/include/HllArray-internal.hpp +706 -0
- data/vendor/datasketches-cpp/hll/include/HllArray.hpp +136 -0
- data/vendor/datasketches-cpp/hll/include/HllSketch-internal.hpp +462 -0
- data/vendor/datasketches-cpp/hll/include/HllSketchImpl-internal.hpp +149 -0
- data/vendor/datasketches-cpp/hll/include/HllSketchImpl.hpp +85 -0
- data/vendor/datasketches-cpp/hll/include/HllSketchImplFactory.hpp +170 -0
- data/vendor/datasketches-cpp/hll/include/HllUnion-internal.hpp +287 -0
- data/vendor/datasketches-cpp/hll/include/HllUtil.hpp +239 -0
- data/vendor/datasketches-cpp/hll/include/RelativeErrorTables-internal.hpp +112 -0
- data/vendor/datasketches-cpp/hll/include/RelativeErrorTables.hpp +46 -0
- data/vendor/datasketches-cpp/hll/include/coupon_iterator-internal.hpp +56 -0
- data/vendor/datasketches-cpp/hll/include/coupon_iterator.hpp +43 -0
- data/vendor/datasketches-cpp/hll/include/hll.hpp +669 -0
- data/vendor/datasketches-cpp/hll/include/hll.private.hpp +32 -0
- data/vendor/datasketches-cpp/hll/test/AuxHashMapTest.cpp +79 -0
- data/vendor/datasketches-cpp/hll/test/CMakeLists.txt +51 -0
- data/vendor/datasketches-cpp/hll/test/CouponHashSetTest.cpp +130 -0
- data/vendor/datasketches-cpp/hll/test/CouponListTest.cpp +181 -0
- data/vendor/datasketches-cpp/hll/test/CrossCountingTest.cpp +93 -0
- data/vendor/datasketches-cpp/hll/test/HllArrayTest.cpp +191 -0
- data/vendor/datasketches-cpp/hll/test/HllSketchTest.cpp +389 -0
- data/vendor/datasketches-cpp/hll/test/HllUnionTest.cpp +313 -0
- data/vendor/datasketches-cpp/hll/test/IsomorphicTest.cpp +141 -0
- data/vendor/datasketches-cpp/hll/test/TablesTest.cpp +44 -0
- data/vendor/datasketches-cpp/hll/test/ToFromByteArrayTest.cpp +168 -0
- data/vendor/datasketches-cpp/hll/test/array6_from_java.sk +0 -0
- data/vendor/datasketches-cpp/hll/test/compact_array4_from_java.sk +0 -0
- data/vendor/datasketches-cpp/hll/test/compact_set_from_java.sk +0 -0
- data/vendor/datasketches-cpp/hll/test/list_from_java.sk +0 -0
- data/vendor/datasketches-cpp/hll/test/updatable_array4_from_java.sk +0 -0
- data/vendor/datasketches-cpp/hll/test/updatable_set_from_java.sk +0 -0
- data/vendor/datasketches-cpp/kll/CMakeLists.txt +58 -0
- data/vendor/datasketches-cpp/kll/include/kll_helper.hpp +150 -0
- data/vendor/datasketches-cpp/kll/include/kll_helper_impl.hpp +319 -0
- data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator.hpp +67 -0
- data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator_impl.hpp +169 -0
- data/vendor/datasketches-cpp/kll/include/kll_sketch.hpp +559 -0
- data/vendor/datasketches-cpp/kll/include/kll_sketch_impl.hpp +1131 -0
- data/vendor/datasketches-cpp/kll/test/CMakeLists.txt +44 -0
- data/vendor/datasketches-cpp/kll/test/kll_sketch_custom_type_test.cpp +154 -0
- data/vendor/datasketches-cpp/kll/test/kll_sketch_float_one_item_v1.sk +0 -0
- data/vendor/datasketches-cpp/kll/test/kll_sketch_from_java.sk +0 -0
- data/vendor/datasketches-cpp/kll/test/kll_sketch_test.cpp +685 -0
- data/vendor/datasketches-cpp/kll/test/kll_sketch_validation.cpp +229 -0
- data/vendor/datasketches-cpp/pyproject.toml +17 -0
- data/vendor/datasketches-cpp/python/CMakeLists.txt +61 -0
- data/vendor/datasketches-cpp/python/README.md +78 -0
- data/vendor/datasketches-cpp/python/jupyter/CPCSketch.ipynb +345 -0
- data/vendor/datasketches-cpp/python/jupyter/FrequentItemsSketch.ipynb +354 -0
- data/vendor/datasketches-cpp/python/jupyter/HLLSketch.ipynb +346 -0
- data/vendor/datasketches-cpp/python/jupyter/KLLSketch.ipynb +463 -0
- data/vendor/datasketches-cpp/python/jupyter/ThetaSketchNotebook.ipynb +396 -0
- data/vendor/datasketches-cpp/python/src/__init__.py +2 -0
- data/vendor/datasketches-cpp/python/src/cpc_wrapper.cpp +90 -0
- data/vendor/datasketches-cpp/python/src/datasketches.cpp +40 -0
- data/vendor/datasketches-cpp/python/src/fi_wrapper.cpp +123 -0
- data/vendor/datasketches-cpp/python/src/hll_wrapper.cpp +136 -0
- data/vendor/datasketches-cpp/python/src/kll_wrapper.cpp +209 -0
- data/vendor/datasketches-cpp/python/src/theta_wrapper.cpp +162 -0
- data/vendor/datasketches-cpp/python/src/vector_of_kll.cpp +488 -0
- data/vendor/datasketches-cpp/python/src/vo_wrapper.cpp +140 -0
- data/vendor/datasketches-cpp/python/tests/__init__.py +0 -0
- data/vendor/datasketches-cpp/python/tests/cpc_test.py +64 -0
- data/vendor/datasketches-cpp/python/tests/fi_test.py +110 -0
- data/vendor/datasketches-cpp/python/tests/hll_test.py +131 -0
- data/vendor/datasketches-cpp/python/tests/kll_test.py +119 -0
- data/vendor/datasketches-cpp/python/tests/theta_test.py +121 -0
- data/vendor/datasketches-cpp/python/tests/vector_of_kll_test.py +148 -0
- data/vendor/datasketches-cpp/python/tests/vo_test.py +101 -0
- data/vendor/datasketches-cpp/sampling/CMakeLists.txt +48 -0
- data/vendor/datasketches-cpp/sampling/include/var_opt_sketch.hpp +392 -0
- data/vendor/datasketches-cpp/sampling/include/var_opt_sketch_impl.hpp +1752 -0
- data/vendor/datasketches-cpp/sampling/include/var_opt_union.hpp +239 -0
- data/vendor/datasketches-cpp/sampling/include/var_opt_union_impl.hpp +645 -0
- data/vendor/datasketches-cpp/sampling/test/CMakeLists.txt +43 -0
- data/vendor/datasketches-cpp/sampling/test/binaries_from_java.txt +67 -0
- data/vendor/datasketches-cpp/sampling/test/var_opt_sketch_test.cpp +509 -0
- data/vendor/datasketches-cpp/sampling/test/var_opt_union_test.cpp +358 -0
- data/vendor/datasketches-cpp/sampling/test/varopt_sketch_long_sampling.sk +0 -0
- data/vendor/datasketches-cpp/sampling/test/varopt_sketch_string_exact.sk +0 -0
- data/vendor/datasketches-cpp/sampling/test/varopt_union_double_sampling.sk +0 -0
- data/vendor/datasketches-cpp/setup.py +94 -0
- data/vendor/datasketches-cpp/theta/CMakeLists.txt +57 -0
- data/vendor/datasketches-cpp/theta/include/theta_a_not_b.hpp +73 -0
- data/vendor/datasketches-cpp/theta/include/theta_a_not_b_impl.hpp +83 -0
- data/vendor/datasketches-cpp/theta/include/theta_intersection.hpp +88 -0
- data/vendor/datasketches-cpp/theta/include/theta_intersection_impl.hpp +130 -0
- data/vendor/datasketches-cpp/theta/include/theta_sketch.hpp +533 -0
- data/vendor/datasketches-cpp/theta/include/theta_sketch_impl.hpp +939 -0
- data/vendor/datasketches-cpp/theta/include/theta_union.hpp +122 -0
- data/vendor/datasketches-cpp/theta/include/theta_union_impl.hpp +109 -0
- data/vendor/datasketches-cpp/theta/test/CMakeLists.txt +45 -0
- data/vendor/datasketches-cpp/theta/test/theta_a_not_b_test.cpp +244 -0
- data/vendor/datasketches-cpp/theta/test/theta_compact_empty_from_java.sk +0 -0
- data/vendor/datasketches-cpp/theta/test/theta_compact_estimation_from_java.sk +0 -0
- data/vendor/datasketches-cpp/theta/test/theta_compact_single_item_from_java.sk +0 -0
- data/vendor/datasketches-cpp/theta/test/theta_intersection_test.cpp +218 -0
- data/vendor/datasketches-cpp/theta/test/theta_sketch_test.cpp +438 -0
- data/vendor/datasketches-cpp/theta/test/theta_union_test.cpp +97 -0
- data/vendor/datasketches-cpp/theta/test/theta_update_empty_from_java.sk +0 -0
- data/vendor/datasketches-cpp/theta/test/theta_update_estimation_from_java.sk +0 -0
- data/vendor/datasketches-cpp/tuple/CMakeLists.txt +104 -0
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_a_not_b.hpp +52 -0
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_a_not_b_impl.hpp +32 -0
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_intersection.hpp +52 -0
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_intersection_impl.hpp +31 -0
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_sketch.hpp +179 -0
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_sketch_impl.hpp +238 -0
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_union.hpp +81 -0
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_union_impl.hpp +43 -0
- data/vendor/datasketches-cpp/tuple/include/bounds_on_ratios_in_sampled_sets.hpp +135 -0
- data/vendor/datasketches-cpp/tuple/include/bounds_on_ratios_in_theta_sketched_sets.hpp +135 -0
- data/vendor/datasketches-cpp/tuple/include/jaccard_similarity.hpp +172 -0
- data/vendor/datasketches-cpp/tuple/include/theta_a_not_b_experimental.hpp +53 -0
- data/vendor/datasketches-cpp/tuple/include/theta_a_not_b_experimental_impl.hpp +33 -0
- data/vendor/datasketches-cpp/tuple/include/theta_comparators.hpp +48 -0
- data/vendor/datasketches-cpp/tuple/include/theta_constants.hpp +34 -0
- data/vendor/datasketches-cpp/tuple/include/theta_helpers.hpp +54 -0
- data/vendor/datasketches-cpp/tuple/include/theta_intersection_base.hpp +59 -0
- data/vendor/datasketches-cpp/tuple/include/theta_intersection_base_impl.hpp +121 -0
- data/vendor/datasketches-cpp/tuple/include/theta_intersection_experimental.hpp +78 -0
- data/vendor/datasketches-cpp/tuple/include/theta_intersection_experimental_impl.hpp +43 -0
- data/vendor/datasketches-cpp/tuple/include/theta_set_difference_base.hpp +54 -0
- data/vendor/datasketches-cpp/tuple/include/theta_set_difference_base_impl.hpp +80 -0
- data/vendor/datasketches-cpp/tuple/include/theta_sketch_experimental.hpp +393 -0
- data/vendor/datasketches-cpp/tuple/include/theta_sketch_experimental_impl.hpp +481 -0
- data/vendor/datasketches-cpp/tuple/include/theta_union_base.hpp +60 -0
- data/vendor/datasketches-cpp/tuple/include/theta_union_base_impl.hpp +84 -0
- data/vendor/datasketches-cpp/tuple/include/theta_union_experimental.hpp +88 -0
- data/vendor/datasketches-cpp/tuple/include/theta_union_experimental_impl.hpp +47 -0
- data/vendor/datasketches-cpp/tuple/include/theta_update_sketch_base.hpp +259 -0
- data/vendor/datasketches-cpp/tuple/include/theta_update_sketch_base_impl.hpp +389 -0
- data/vendor/datasketches-cpp/tuple/include/tuple_a_not_b.hpp +57 -0
- data/vendor/datasketches-cpp/tuple/include/tuple_a_not_b_impl.hpp +33 -0
- data/vendor/datasketches-cpp/tuple/include/tuple_intersection.hpp +104 -0
- data/vendor/datasketches-cpp/tuple/include/tuple_intersection_impl.hpp +43 -0
- data/vendor/datasketches-cpp/tuple/include/tuple_sketch.hpp +496 -0
- data/vendor/datasketches-cpp/tuple/include/tuple_sketch_impl.hpp +587 -0
- data/vendor/datasketches-cpp/tuple/include/tuple_union.hpp +109 -0
- data/vendor/datasketches-cpp/tuple/include/tuple_union_impl.hpp +47 -0
- data/vendor/datasketches-cpp/tuple/test/CMakeLists.txt +53 -0
- data/vendor/datasketches-cpp/tuple/test/aod_1_compact_empty_from_java.sk +1 -0
- data/vendor/datasketches-cpp/tuple/test/aod_1_compact_estimation_from_java.sk +0 -0
- data/vendor/datasketches-cpp/tuple/test/aod_1_compact_non_empty_no_entries_from_java.sk +0 -0
- data/vendor/datasketches-cpp/tuple/test/aod_2_compact_exact_from_java.sk +0 -0
- data/vendor/datasketches-cpp/tuple/test/aod_3_compact_empty_from_java.sk +1 -0
- data/vendor/datasketches-cpp/tuple/test/array_of_doubles_sketch_test.cpp +298 -0
- data/vendor/datasketches-cpp/tuple/test/theta_a_not_b_experimental_test.cpp +250 -0
- data/vendor/datasketches-cpp/tuple/test/theta_compact_empty_from_java.sk +0 -0
- data/vendor/datasketches-cpp/tuple/test/theta_compact_estimation_from_java.sk +0 -0
- data/vendor/datasketches-cpp/tuple/test/theta_compact_single_item_from_java.sk +0 -0
- data/vendor/datasketches-cpp/tuple/test/theta_intersection_experimental_test.cpp +224 -0
- data/vendor/datasketches-cpp/tuple/test/theta_jaccard_similarity_test.cpp +144 -0
- data/vendor/datasketches-cpp/tuple/test/theta_sketch_experimental_test.cpp +247 -0
- data/vendor/datasketches-cpp/tuple/test/theta_union_experimental_test.cpp +44 -0
- data/vendor/datasketches-cpp/tuple/test/tuple_a_not_b_test.cpp +289 -0
- data/vendor/datasketches-cpp/tuple/test/tuple_intersection_test.cpp +235 -0
- data/vendor/datasketches-cpp/tuple/test/tuple_jaccard_similarity_test.cpp +98 -0
- data/vendor/datasketches-cpp/tuple/test/tuple_sketch_allocation_test.cpp +102 -0
- data/vendor/datasketches-cpp/tuple/test/tuple_sketch_test.cpp +249 -0
- data/vendor/datasketches-cpp/tuple/test/tuple_union_test.cpp +187 -0
- metadata +302 -0
|
Binary file
|
|
@@ -0,0 +1,218 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* Licensed to the Apache Software Foundation (ASF) under one
|
|
3
|
+
* or more contributor license agreements. See the NOTICE file
|
|
4
|
+
* distributed with this work for additional information
|
|
5
|
+
* regarding copyright ownership. The ASF licenses this file
|
|
6
|
+
* to you under the Apache License, Version 2.0 (the
|
|
7
|
+
* "License"); you may not use this file except in compliance
|
|
8
|
+
* with the License. You may obtain a copy of the License at
|
|
9
|
+
*
|
|
10
|
+
* http://www.apache.org/licenses/LICENSE-2.0
|
|
11
|
+
*
|
|
12
|
+
* Unless required by applicable law or agreed to in writing,
|
|
13
|
+
* software distributed under the License is distributed on an
|
|
14
|
+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
15
|
+
* KIND, either express or implied. See the License for the
|
|
16
|
+
* specific language governing permissions and limitations
|
|
17
|
+
* under the License.
|
|
18
|
+
*/
|
|
19
|
+
|
|
20
|
+
#include <catch.hpp>
|
|
21
|
+
|
|
22
|
+
#include <theta_intersection.hpp>
|
|
23
|
+
|
|
24
|
+
namespace datasketches {
|
|
25
|
+
|
|
26
|
+
TEST_CASE("theta intersection: invalid", "[theta_intersection]") {
|
|
27
|
+
theta_intersection intersection;
|
|
28
|
+
REQUIRE_FALSE(intersection.has_result());
|
|
29
|
+
REQUIRE_THROWS_AS(intersection.get_result(), std::invalid_argument);
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
TEST_CASE("theta intersection: empty", "[theta_intersection]") {
|
|
33
|
+
theta_intersection intersection;
|
|
34
|
+
update_theta_sketch sketch = update_theta_sketch::builder().build();
|
|
35
|
+
intersection.update(sketch);
|
|
36
|
+
compact_theta_sketch result = intersection.get_result();
|
|
37
|
+
REQUIRE(result.get_num_retained() == 0);
|
|
38
|
+
REQUIRE(result.is_empty());
|
|
39
|
+
REQUIRE_FALSE(result.is_estimation_mode());
|
|
40
|
+
REQUIRE(result.get_estimate() == 0.0);
|
|
41
|
+
|
|
42
|
+
intersection.update(sketch);
|
|
43
|
+
result = intersection.get_result();
|
|
44
|
+
REQUIRE(result.get_num_retained() == 0);
|
|
45
|
+
REQUIRE(result.is_empty());
|
|
46
|
+
REQUIRE_FALSE(result.is_estimation_mode());
|
|
47
|
+
REQUIRE(result.get_estimate() == 0.0);
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
TEST_CASE("theta intersection: non empty no retained keys", "[theta_intersection]") {
|
|
51
|
+
update_theta_sketch sketch = update_theta_sketch::builder().set_p(0.001).build();
|
|
52
|
+
sketch.update(1);
|
|
53
|
+
theta_intersection intersection;
|
|
54
|
+
intersection.update(sketch);
|
|
55
|
+
compact_theta_sketch result = intersection.get_result();
|
|
56
|
+
REQUIRE(result.get_num_retained() == 0);
|
|
57
|
+
REQUIRE_FALSE(result.is_empty());
|
|
58
|
+
REQUIRE(result.is_estimation_mode());
|
|
59
|
+
REQUIRE(result.get_theta() == Approx(0.001).margin(1e-10));
|
|
60
|
+
REQUIRE(result.get_estimate() == 0.0);
|
|
61
|
+
|
|
62
|
+
intersection.update(sketch);
|
|
63
|
+
result = intersection.get_result();
|
|
64
|
+
REQUIRE(result.get_num_retained() == 0);
|
|
65
|
+
REQUIRE_FALSE(result.is_empty());
|
|
66
|
+
REQUIRE(result.is_estimation_mode());
|
|
67
|
+
REQUIRE(result.get_theta() == Approx(0.001).margin(1e-10));
|
|
68
|
+
REQUIRE(result.get_estimate() == 0.0);
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
TEST_CASE("theta intersection: exact mode half overlap unordered", "[theta_intersection]") {
|
|
72
|
+
update_theta_sketch sketch1 = update_theta_sketch::builder().build();
|
|
73
|
+
int value = 0;
|
|
74
|
+
for (int i = 0; i < 1000; i++) sketch1.update(value++);
|
|
75
|
+
|
|
76
|
+
update_theta_sketch sketch2 = update_theta_sketch::builder().build();
|
|
77
|
+
value = 500;
|
|
78
|
+
for (int i = 0; i < 1000; i++) sketch2.update(value++);
|
|
79
|
+
|
|
80
|
+
theta_intersection intersection;
|
|
81
|
+
intersection.update(sketch1);
|
|
82
|
+
intersection.update(sketch2);
|
|
83
|
+
compact_theta_sketch result = intersection.get_result();
|
|
84
|
+
REQUIRE_FALSE(result.is_empty());
|
|
85
|
+
REQUIRE_FALSE(result.is_estimation_mode());
|
|
86
|
+
REQUIRE(result.get_estimate() == 500.0);
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
TEST_CASE("theta intersection: exact mode half overlap ordered", "[theta_intersection]") {
|
|
90
|
+
update_theta_sketch sketch1 = update_theta_sketch::builder().build();
|
|
91
|
+
int value = 0;
|
|
92
|
+
for (int i = 0; i < 1000; i++) sketch1.update(value++);
|
|
93
|
+
|
|
94
|
+
update_theta_sketch sketch2 = update_theta_sketch::builder().build();
|
|
95
|
+
value = 500;
|
|
96
|
+
for (int i = 0; i < 1000; i++) sketch2.update(value++);
|
|
97
|
+
|
|
98
|
+
theta_intersection intersection;
|
|
99
|
+
intersection.update(sketch1.compact());
|
|
100
|
+
intersection.update(sketch2.compact());
|
|
101
|
+
compact_theta_sketch result = intersection.get_result();
|
|
102
|
+
REQUIRE_FALSE(result.is_empty());
|
|
103
|
+
REQUIRE_FALSE(result.is_estimation_mode());
|
|
104
|
+
REQUIRE(result.get_estimate() == 500.0);
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
TEST_CASE("theta intersection: exact mode disjoint unordered", "[theta_intersection]") {
|
|
108
|
+
update_theta_sketch sketch1 = update_theta_sketch::builder().build();
|
|
109
|
+
int value = 0;
|
|
110
|
+
for (int i = 0; i < 1000; i++) sketch1.update(value++);
|
|
111
|
+
|
|
112
|
+
update_theta_sketch sketch2 = update_theta_sketch::builder().build();
|
|
113
|
+
for (int i = 0; i < 1000; i++) sketch2.update(value++);
|
|
114
|
+
|
|
115
|
+
theta_intersection intersection;
|
|
116
|
+
intersection.update(sketch1);
|
|
117
|
+
intersection.update(sketch2);
|
|
118
|
+
compact_theta_sketch result = intersection.get_result();
|
|
119
|
+
REQUIRE(result.is_empty());
|
|
120
|
+
REQUIRE_FALSE(result.is_estimation_mode());
|
|
121
|
+
REQUIRE(result.get_estimate() == 0.0);
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
TEST_CASE("theta intersection: exact mode disjoint ordered", "[theta_intersection]") {
|
|
125
|
+
update_theta_sketch sketch1 = update_theta_sketch::builder().build();
|
|
126
|
+
int value = 0;
|
|
127
|
+
for (int i = 0; i < 1000; i++) sketch1.update(value++);
|
|
128
|
+
|
|
129
|
+
update_theta_sketch sketch2 = update_theta_sketch::builder().build();
|
|
130
|
+
for (int i = 0; i < 1000; i++) sketch2.update(value++);
|
|
131
|
+
|
|
132
|
+
theta_intersection intersection;
|
|
133
|
+
intersection.update(sketch1.compact());
|
|
134
|
+
intersection.update(sketch2.compact());
|
|
135
|
+
compact_theta_sketch result = intersection.get_result();
|
|
136
|
+
REQUIRE(result.is_empty());
|
|
137
|
+
REQUIRE_FALSE(result.is_estimation_mode());
|
|
138
|
+
REQUIRE(result.get_estimate() == 0.0);
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
TEST_CASE("theta intersection: estimation mode half overlap unordered", "[theta_intersection]") {
|
|
142
|
+
update_theta_sketch sketch1 = update_theta_sketch::builder().build();
|
|
143
|
+
int value = 0;
|
|
144
|
+
for (int i = 0; i < 10000; i++) sketch1.update(value++);
|
|
145
|
+
|
|
146
|
+
update_theta_sketch sketch2 = update_theta_sketch::builder().build();
|
|
147
|
+
value = 5000;
|
|
148
|
+
for (int i = 0; i < 10000; i++) sketch2.update(value++);
|
|
149
|
+
|
|
150
|
+
theta_intersection intersection;
|
|
151
|
+
intersection.update(sketch1);
|
|
152
|
+
intersection.update(sketch2);
|
|
153
|
+
compact_theta_sketch result = intersection.get_result();
|
|
154
|
+
REQUIRE_FALSE(result.is_empty());
|
|
155
|
+
REQUIRE(result.is_estimation_mode());
|
|
156
|
+
REQUIRE(result.get_estimate() == Approx(5000).margin(5000 * 0.02));
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
TEST_CASE("theta intersection: estimation mode half overlap ordered", "[theta_intersection]") {
|
|
160
|
+
update_theta_sketch sketch1 = update_theta_sketch::builder().build();
|
|
161
|
+
int value = 0;
|
|
162
|
+
for (int i = 0; i < 10000; i++) sketch1.update(value++);
|
|
163
|
+
|
|
164
|
+
update_theta_sketch sketch2 = update_theta_sketch::builder().build();
|
|
165
|
+
value = 5000;
|
|
166
|
+
for (int i = 0; i < 10000; i++) sketch2.update(value++);
|
|
167
|
+
|
|
168
|
+
theta_intersection intersection;
|
|
169
|
+
intersection.update(sketch1.compact());
|
|
170
|
+
intersection.update(sketch2.compact());
|
|
171
|
+
compact_theta_sketch result = intersection.get_result();
|
|
172
|
+
REQUIRE_FALSE(result.is_empty());
|
|
173
|
+
REQUIRE(result.is_estimation_mode());
|
|
174
|
+
REQUIRE(result.get_estimate() == Approx(5000).margin(5000 * 0.02));
|
|
175
|
+
}
|
|
176
|
+
|
|
177
|
+
TEST_CASE("theta intersection: estimation mode disjoint unordered", "[theta_intersection]") {
|
|
178
|
+
update_theta_sketch sketch1 = update_theta_sketch::builder().build();
|
|
179
|
+
int value = 0;
|
|
180
|
+
for (int i = 0; i < 10000; i++) sketch1.update(value++);
|
|
181
|
+
|
|
182
|
+
update_theta_sketch sketch2 = update_theta_sketch::builder().build();
|
|
183
|
+
for (int i = 0; i < 10000; i++) sketch2.update(value++);
|
|
184
|
+
|
|
185
|
+
theta_intersection intersection;
|
|
186
|
+
intersection.update(sketch1);
|
|
187
|
+
intersection.update(sketch2);
|
|
188
|
+
compact_theta_sketch result = intersection.get_result();
|
|
189
|
+
REQUIRE_FALSE(result.is_empty());
|
|
190
|
+
REQUIRE(result.is_estimation_mode());
|
|
191
|
+
REQUIRE(result.get_estimate() == 0.0);
|
|
192
|
+
}
|
|
193
|
+
|
|
194
|
+
TEST_CASE("theta intersection: estimation mode disjoint ordered", "[theta_intersection]") {
|
|
195
|
+
update_theta_sketch sketch1 = update_theta_sketch::builder().build();
|
|
196
|
+
int value = 0;
|
|
197
|
+
for (int i = 0; i < 10000; i++) sketch1.update(value++);
|
|
198
|
+
|
|
199
|
+
update_theta_sketch sketch2 = update_theta_sketch::builder().build();
|
|
200
|
+
for (int i = 0; i < 10000; i++) sketch2.update(value++);
|
|
201
|
+
|
|
202
|
+
theta_intersection intersection;
|
|
203
|
+
intersection.update(sketch1.compact());
|
|
204
|
+
intersection.update(sketch2.compact());
|
|
205
|
+
compact_theta_sketch result = intersection.get_result();
|
|
206
|
+
REQUIRE_FALSE(result.is_empty());
|
|
207
|
+
REQUIRE(result.is_estimation_mode());
|
|
208
|
+
REQUIRE(result.get_estimate() == 0.0);
|
|
209
|
+
}
|
|
210
|
+
|
|
211
|
+
TEST_CASE("theta intersection: seed mismatch", "[theta_intersection]") {
|
|
212
|
+
update_theta_sketch sketch = update_theta_sketch::builder().build();
|
|
213
|
+
sketch.update(1); // non-empty should not be ignored
|
|
214
|
+
theta_intersection intersection(123);
|
|
215
|
+
REQUIRE_THROWS_AS(intersection.update(sketch), std::invalid_argument);
|
|
216
|
+
}
|
|
217
|
+
|
|
218
|
+
} /* namespace datasketches */
|
|
@@ -0,0 +1,438 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* Licensed to the Apache Software Foundation (ASF) under one
|
|
3
|
+
* or more contributor license agreements. See the NOTICE file
|
|
4
|
+
* distributed with this work for additional information
|
|
5
|
+
* regarding copyright ownership. The ASF licenses this file
|
|
6
|
+
* to you under the Apache License, Version 2.0 (the
|
|
7
|
+
* "License"); you may not use this file except in compliance
|
|
8
|
+
* with the License. You may obtain a copy of the License at
|
|
9
|
+
*
|
|
10
|
+
* http://www.apache.org/licenses/LICENSE-2.0
|
|
11
|
+
*
|
|
12
|
+
* Unless required by applicable law or agreed to in writing,
|
|
13
|
+
* software distributed under the License is distributed on an
|
|
14
|
+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
15
|
+
* KIND, either express or implied. See the License for the
|
|
16
|
+
* specific language governing permissions and limitations
|
|
17
|
+
* under the License.
|
|
18
|
+
*/
|
|
19
|
+
|
|
20
|
+
#include <catch.hpp>
|
|
21
|
+
#include <fstream>
|
|
22
|
+
#include <sstream>
|
|
23
|
+
|
|
24
|
+
#include <theta_sketch.hpp>
|
|
25
|
+
|
|
26
|
+
namespace datasketches {
|
|
27
|
+
|
|
28
|
+
#ifdef TEST_BINARY_INPUT_PATH
|
|
29
|
+
const std::string inputPath = TEST_BINARY_INPUT_PATH;
|
|
30
|
+
#else
|
|
31
|
+
const std::string inputPath = "test/";
|
|
32
|
+
#endif
|
|
33
|
+
|
|
34
|
+
TEST_CASE("theta sketch: empty", "[theta_sketch]") {
|
|
35
|
+
update_theta_sketch update_sketch = update_theta_sketch::builder().build();
|
|
36
|
+
REQUIRE(update_sketch.is_empty());
|
|
37
|
+
REQUIRE_FALSE(update_sketch.is_estimation_mode());
|
|
38
|
+
REQUIRE(update_sketch.get_theta() == 1.0);
|
|
39
|
+
REQUIRE(update_sketch.get_estimate() == 0.0);
|
|
40
|
+
REQUIRE(update_sketch.get_lower_bound(1) == 0.0);
|
|
41
|
+
REQUIRE(update_sketch.get_upper_bound(1) == 0.0);
|
|
42
|
+
|
|
43
|
+
compact_theta_sketch compact_sketch = update_sketch.compact();
|
|
44
|
+
REQUIRE(compact_sketch.is_empty());
|
|
45
|
+
REQUIRE_FALSE(compact_sketch.is_estimation_mode());
|
|
46
|
+
REQUIRE(compact_sketch.get_theta() == 1.0);
|
|
47
|
+
REQUIRE(compact_sketch.get_estimate() == 0.0);
|
|
48
|
+
REQUIRE(compact_sketch.get_lower_bound(1) == 0.0);
|
|
49
|
+
REQUIRE(compact_sketch.get_upper_bound(1) == 0.0);
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
TEST_CASE("theta sketch: non empty no retained keys", "[theta_sketch]") {
|
|
53
|
+
update_theta_sketch update_sketch = update_theta_sketch::builder().set_p(0.001).build();
|
|
54
|
+
update_sketch.update(1);
|
|
55
|
+
//std::cerr << update_sketch.to_string();
|
|
56
|
+
REQUIRE(update_sketch.get_num_retained() == 0);
|
|
57
|
+
REQUIRE_FALSE(update_sketch.is_empty());
|
|
58
|
+
REQUIRE(update_sketch.is_estimation_mode());
|
|
59
|
+
REQUIRE(update_sketch.get_estimate() == 0.0);
|
|
60
|
+
REQUIRE(update_sketch.get_lower_bound(1) == 0.0);
|
|
61
|
+
REQUIRE(update_sketch.get_upper_bound(1) > 0);
|
|
62
|
+
|
|
63
|
+
compact_theta_sketch compact_sketch = update_sketch.compact();
|
|
64
|
+
REQUIRE(compact_sketch.get_num_retained() == 0);
|
|
65
|
+
REQUIRE_FALSE(compact_sketch.is_empty());
|
|
66
|
+
REQUIRE(compact_sketch.is_estimation_mode());
|
|
67
|
+
REQUIRE(compact_sketch.get_estimate() == 0.0);
|
|
68
|
+
REQUIRE(compact_sketch.get_lower_bound(1) == 0.0);
|
|
69
|
+
REQUIRE(compact_sketch.get_upper_bound(1) > 0);
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
TEST_CASE("theta sketch: single item", "[theta_sketch]") {
|
|
73
|
+
update_theta_sketch update_sketch = update_theta_sketch::builder().build();
|
|
74
|
+
update_sketch.update(1);
|
|
75
|
+
REQUIRE_FALSE(update_sketch.is_empty());
|
|
76
|
+
REQUIRE_FALSE(update_sketch.is_estimation_mode());
|
|
77
|
+
REQUIRE(update_sketch.get_theta() == 1.0);
|
|
78
|
+
REQUIRE(update_sketch.get_estimate() == 1.0);
|
|
79
|
+
REQUIRE(update_sketch.get_lower_bound(1) == 1.0);
|
|
80
|
+
REQUIRE(update_sketch.get_upper_bound(1) == 1.0);
|
|
81
|
+
|
|
82
|
+
compact_theta_sketch compact_sketch = update_sketch.compact();
|
|
83
|
+
REQUIRE_FALSE(compact_sketch.is_empty());
|
|
84
|
+
REQUIRE_FALSE(compact_sketch.is_estimation_mode());
|
|
85
|
+
REQUIRE(compact_sketch.get_theta() == 1.0);
|
|
86
|
+
REQUIRE(compact_sketch.get_estimate() == 1.0);
|
|
87
|
+
REQUIRE(compact_sketch.get_lower_bound(1) == 1.0);
|
|
88
|
+
REQUIRE(compact_sketch.get_upper_bound(1) == 1.0);
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
TEST_CASE("theta sketch: resize exact", "[theta_sketch]") {
|
|
92
|
+
update_theta_sketch update_sketch = update_theta_sketch::builder().build();
|
|
93
|
+
for (int i = 0; i < 2000; i++) update_sketch.update(i);
|
|
94
|
+
REQUIRE_FALSE(update_sketch.is_empty());
|
|
95
|
+
REQUIRE_FALSE(update_sketch.is_estimation_mode());
|
|
96
|
+
REQUIRE(update_sketch.get_theta() == 1.0);
|
|
97
|
+
REQUIRE(update_sketch.get_estimate() == 2000.0);
|
|
98
|
+
REQUIRE(update_sketch.get_lower_bound(1) == 2000.0);
|
|
99
|
+
REQUIRE(update_sketch.get_upper_bound(1) == 2000.0);
|
|
100
|
+
|
|
101
|
+
compact_theta_sketch compact_sketch = update_sketch.compact();
|
|
102
|
+
REQUIRE_FALSE(compact_sketch.is_empty());
|
|
103
|
+
REQUIRE_FALSE(compact_sketch.is_estimation_mode());
|
|
104
|
+
REQUIRE(compact_sketch.get_theta() == 1.0);
|
|
105
|
+
REQUIRE(compact_sketch.get_estimate() == 2000.0);
|
|
106
|
+
REQUIRE(compact_sketch.get_lower_bound(1) == 2000.0);
|
|
107
|
+
REQUIRE(compact_sketch.get_upper_bound(1) == 2000.0);
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
TEST_CASE("theta sketch: estimation", "[theta_sketch]") {
|
|
111
|
+
update_theta_sketch update_sketch = update_theta_sketch::builder().set_resize_factor(update_theta_sketch::resize_factor::X1).build();
|
|
112
|
+
const int n = 8000;
|
|
113
|
+
for (int i = 0; i < n; i++) update_sketch.update(i);
|
|
114
|
+
//std::cerr << update_sketch.to_string();
|
|
115
|
+
REQUIRE_FALSE(update_sketch.is_empty());
|
|
116
|
+
REQUIRE(update_sketch.is_estimation_mode());
|
|
117
|
+
REQUIRE(update_sketch.get_theta() < 1.0);
|
|
118
|
+
REQUIRE(update_sketch.get_estimate() == Approx((double) n).margin(n * 0.01));
|
|
119
|
+
REQUIRE(update_sketch.get_lower_bound(1) < n);
|
|
120
|
+
REQUIRE(update_sketch.get_upper_bound(1) > n);
|
|
121
|
+
|
|
122
|
+
const uint32_t k = 1 << update_theta_sketch::builder::DEFAULT_LG_K;
|
|
123
|
+
REQUIRE(update_sketch.get_num_retained() >= k);
|
|
124
|
+
update_sketch.trim();
|
|
125
|
+
REQUIRE(update_sketch.get_num_retained() == k);
|
|
126
|
+
|
|
127
|
+
compact_theta_sketch compact_sketch = update_sketch.compact();
|
|
128
|
+
REQUIRE_FALSE(compact_sketch.is_empty());
|
|
129
|
+
REQUIRE(compact_sketch.is_ordered());
|
|
130
|
+
REQUIRE(compact_sketch.is_estimation_mode());
|
|
131
|
+
REQUIRE(compact_sketch.get_theta() < 1.0);
|
|
132
|
+
REQUIRE(compact_sketch.get_estimate() == Approx((double) n).margin(n * 0.01));
|
|
133
|
+
REQUIRE(compact_sketch.get_lower_bound(1) < n);
|
|
134
|
+
REQUIRE(compact_sketch.get_upper_bound(1) > n);
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
TEST_CASE("theta sketch: deserialize update empty from java as base", "[theta_sketch]") {
|
|
138
|
+
std::ifstream is;
|
|
139
|
+
is.exceptions(std::ios::failbit | std::ios::badbit);
|
|
140
|
+
is.open(inputPath + "theta_update_empty_from_java.sk", std::ios::binary);
|
|
141
|
+
auto sketchptr = theta_sketch::deserialize(is);
|
|
142
|
+
REQUIRE(sketchptr->is_empty());
|
|
143
|
+
REQUIRE_FALSE(sketchptr->is_estimation_mode());
|
|
144
|
+
REQUIRE(sketchptr->get_num_retained() == 0);
|
|
145
|
+
REQUIRE(sketchptr->get_theta() == 1.0);
|
|
146
|
+
REQUIRE(sketchptr->get_estimate() == 0.0);
|
|
147
|
+
REQUIRE(sketchptr->get_lower_bound(1) == 0.0);
|
|
148
|
+
REQUIRE(sketchptr->get_upper_bound(1) == 0.0);
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
TEST_CASE("theta sketch: deserialize update empty from java as subclass", "[theta_sketch]") {
|
|
152
|
+
std::ifstream is;
|
|
153
|
+
is.exceptions(std::ios::failbit | std::ios::badbit);
|
|
154
|
+
is.open(inputPath + "theta_update_empty_from_java.sk", std::ios::binary);
|
|
155
|
+
auto sketch = update_theta_sketch::deserialize(is);
|
|
156
|
+
REQUIRE(sketch.is_empty());
|
|
157
|
+
REQUIRE_FALSE(sketch.is_estimation_mode());
|
|
158
|
+
REQUIRE(sketch.get_num_retained() == 0);
|
|
159
|
+
REQUIRE(sketch.get_theta() == 1.0);
|
|
160
|
+
REQUIRE(sketch.get_estimate() == 0.0);
|
|
161
|
+
REQUIRE(sketch.get_lower_bound(1) == 0.0);
|
|
162
|
+
REQUIRE(sketch.get_upper_bound(1) == 0.0);
|
|
163
|
+
}
|
|
164
|
+
|
|
165
|
+
TEST_CASE("theta sketch: deserialize update estimation from java as base", "[theta_sketch]") {
|
|
166
|
+
std::ifstream is;
|
|
167
|
+
is.exceptions(std::ios::failbit | std::ios::badbit);
|
|
168
|
+
is.open(inputPath + "theta_update_estimation_from_java.sk", std::ios::binary);
|
|
169
|
+
auto sketchptr = theta_sketch::deserialize(is);
|
|
170
|
+
REQUIRE_FALSE(sketchptr->is_empty());
|
|
171
|
+
REQUIRE(sketchptr->is_estimation_mode());
|
|
172
|
+
REQUIRE(sketchptr->get_num_retained() == 5324);
|
|
173
|
+
REQUIRE(sketchptr->get_estimate() == Approx(10000.0).margin(10000 * 0.01));
|
|
174
|
+
REQUIRE(sketchptr->get_lower_bound(1) < 10000);
|
|
175
|
+
REQUIRE(sketchptr->get_upper_bound(1) > 10000);
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
TEST_CASE("theta sketch: deserialize update estimation from java as subclass", "[theta_sketch]") {
|
|
179
|
+
std::ifstream is;
|
|
180
|
+
is.exceptions(std::ios::failbit | std::ios::badbit);
|
|
181
|
+
is.open(inputPath + "theta_update_estimation_from_java.sk", std::ios::binary);
|
|
182
|
+
auto sketch = update_theta_sketch::deserialize(is);
|
|
183
|
+
REQUIRE_FALSE(sketch.is_empty());
|
|
184
|
+
REQUIRE(sketch.is_estimation_mode());
|
|
185
|
+
REQUIRE(sketch.get_num_retained() == 5324);
|
|
186
|
+
REQUIRE(sketch.get_estimate() == Approx(10000.0).margin(10000 * 0.01));
|
|
187
|
+
REQUIRE(sketch.get_lower_bound(1) < 10000);
|
|
188
|
+
REQUIRE(sketch.get_upper_bound(1) > 10000);
|
|
189
|
+
}
|
|
190
|
+
|
|
191
|
+
TEST_CASE("theta sketch: deserialize compact empty from java as base", "[theta_sketch]") {
|
|
192
|
+
std::ifstream is;
|
|
193
|
+
is.exceptions(std::ios::failbit | std::ios::badbit);
|
|
194
|
+
is.open(inputPath + "theta_compact_empty_from_java.sk", std::ios::binary);
|
|
195
|
+
auto sketchptr = theta_sketch::deserialize(is);
|
|
196
|
+
REQUIRE(sketchptr->is_empty());
|
|
197
|
+
REQUIRE_FALSE(sketchptr->is_estimation_mode());
|
|
198
|
+
REQUIRE(sketchptr->get_num_retained() == 0);
|
|
199
|
+
REQUIRE(sketchptr->get_theta() == 1.0);
|
|
200
|
+
REQUIRE(sketchptr->get_estimate() == 0.0);
|
|
201
|
+
REQUIRE(sketchptr->get_lower_bound(1) == 0.0);
|
|
202
|
+
REQUIRE(sketchptr->get_upper_bound(1) == 0.0);
|
|
203
|
+
}
|
|
204
|
+
|
|
205
|
+
TEST_CASE("theta sketch: deserialize compact empty from java as subclass", "[theta_sketch]") {
|
|
206
|
+
std::ifstream is;
|
|
207
|
+
is.exceptions(std::ios::failbit | std::ios::badbit);
|
|
208
|
+
is.open(inputPath + "theta_compact_empty_from_java.sk", std::ios::binary);
|
|
209
|
+
auto sketch = compact_theta_sketch::deserialize(is);
|
|
210
|
+
REQUIRE(sketch.is_empty());
|
|
211
|
+
REQUIRE_FALSE(sketch.is_estimation_mode());
|
|
212
|
+
REQUIRE(sketch.get_num_retained() == 0);
|
|
213
|
+
REQUIRE(sketch.get_theta() == 1.0);
|
|
214
|
+
REQUIRE(sketch.get_estimate() == 0.0);
|
|
215
|
+
REQUIRE(sketch.get_lower_bound(1) == 0.0);
|
|
216
|
+
REQUIRE(sketch.get_upper_bound(1) == 0.0);
|
|
217
|
+
}
|
|
218
|
+
|
|
219
|
+
TEST_CASE("theta sketch: deserialize single item from java as base", "[theta_sketch]") {
|
|
220
|
+
std::ifstream is;
|
|
221
|
+
is.exceptions(std::ios::failbit | std::ios::badbit);
|
|
222
|
+
is.open(inputPath + "theta_compact_single_item_from_java.sk", std::ios::binary);
|
|
223
|
+
auto sketchptr = theta_sketch::deserialize(is);
|
|
224
|
+
REQUIRE_FALSE(sketchptr->is_empty());
|
|
225
|
+
REQUIRE_FALSE(sketchptr->is_estimation_mode());
|
|
226
|
+
REQUIRE(sketchptr->get_num_retained() == 1);
|
|
227
|
+
REQUIRE(sketchptr->get_theta() == 1.0);
|
|
228
|
+
REQUIRE(sketchptr->get_estimate() == 1.0);
|
|
229
|
+
REQUIRE(sketchptr->get_lower_bound(1) == 1.0);
|
|
230
|
+
REQUIRE(sketchptr->get_upper_bound(1) == 1.0);
|
|
231
|
+
}
|
|
232
|
+
|
|
233
|
+
TEST_CASE("theta sketch: deserialize single item from java as subclass", "[theta_sketch]") {
|
|
234
|
+
std::ifstream is;
|
|
235
|
+
is.exceptions(std::ios::failbit | std::ios::badbit);
|
|
236
|
+
is.open(inputPath + "theta_compact_single_item_from_java.sk", std::ios::binary);
|
|
237
|
+
auto sketch = compact_theta_sketch::deserialize(is);
|
|
238
|
+
REQUIRE_FALSE(sketch.is_empty());
|
|
239
|
+
REQUIRE_FALSE(sketch.is_estimation_mode());
|
|
240
|
+
REQUIRE(sketch.get_num_retained() == 1);
|
|
241
|
+
REQUIRE(sketch.get_theta() == 1.0);
|
|
242
|
+
REQUIRE(sketch.get_estimate() == 1.0);
|
|
243
|
+
REQUIRE(sketch.get_lower_bound(1) == 1.0);
|
|
244
|
+
REQUIRE(sketch.get_upper_bound(1) == 1.0);
|
|
245
|
+
}
|
|
246
|
+
|
|
247
|
+
TEST_CASE("theta sketch: deserialize compact estimation from java as base", "[theta_sketch]") {
|
|
248
|
+
std::ifstream is;
|
|
249
|
+
is.exceptions(std::ios::failbit | std::ios::badbit);
|
|
250
|
+
is.open(inputPath + "theta_compact_estimation_from_java.sk", std::ios::binary);
|
|
251
|
+
auto sketchptr = theta_sketch::deserialize(is);
|
|
252
|
+
REQUIRE_FALSE(sketchptr->is_empty());
|
|
253
|
+
REQUIRE(sketchptr->is_estimation_mode());
|
|
254
|
+
REQUIRE(sketchptr->is_ordered());
|
|
255
|
+
REQUIRE(sketchptr->get_num_retained() == 4342);
|
|
256
|
+
REQUIRE(sketchptr->get_theta() == Approx(0.531700444213199).margin(1e-10));
|
|
257
|
+
REQUIRE(sketchptr->get_estimate() == Approx(8166.25234614053).margin(1e-10));
|
|
258
|
+
REQUIRE(sketchptr->get_lower_bound(2) == Approx(7996.956955317471).margin(1e-10));
|
|
259
|
+
REQUIRE(sketchptr->get_upper_bound(2) == Approx(8339.090301078124).margin(1e-10));
|
|
260
|
+
|
|
261
|
+
// the same construction process in Java must have produced exactly the same sketch
|
|
262
|
+
update_theta_sketch update_sketch = update_theta_sketch::builder().build();
|
|
263
|
+
const int n = 8192;
|
|
264
|
+
for (int i = 0; i < n; i++) update_sketch.update(i);
|
|
265
|
+
REQUIRE(sketchptr->get_num_retained() == update_sketch.get_num_retained());
|
|
266
|
+
REQUIRE(sketchptr->get_theta() == Approx(update_sketch.get_theta()).margin(1e-10));
|
|
267
|
+
REQUIRE(sketchptr->get_estimate() == Approx(update_sketch.get_estimate()).margin(1e-10));
|
|
268
|
+
REQUIRE(sketchptr->get_lower_bound(1) == Approx(update_sketch.get_lower_bound(1)).margin(1e-10));
|
|
269
|
+
REQUIRE(sketchptr->get_upper_bound(1) == Approx(update_sketch.get_upper_bound(1)).margin(1e-10));
|
|
270
|
+
REQUIRE(sketchptr->get_lower_bound(2) == Approx(update_sketch.get_lower_bound(2)).margin(1e-10));
|
|
271
|
+
REQUIRE(sketchptr->get_upper_bound(2) == Approx(update_sketch.get_upper_bound(2)).margin(1e-10));
|
|
272
|
+
REQUIRE(sketchptr->get_lower_bound(3) == Approx(update_sketch.get_lower_bound(3)).margin(1e-10));
|
|
273
|
+
REQUIRE(sketchptr->get_upper_bound(3) == Approx(update_sketch.get_upper_bound(3)).margin(1e-10));
|
|
274
|
+
compact_theta_sketch compact_sketch = update_sketch.compact();
|
|
275
|
+
// the sketches are ordered, so the iteration sequence must match exactly
|
|
276
|
+
auto iter = sketchptr->begin();
|
|
277
|
+
for (auto key: compact_sketch) {
|
|
278
|
+
REQUIRE(*iter == key);
|
|
279
|
+
++iter;
|
|
280
|
+
}
|
|
281
|
+
}
|
|
282
|
+
|
|
283
|
+
TEST_CASE("theta sketch: deserialize compact estimation from java as subclass", "[theta_sketch]") {
|
|
284
|
+
std::ifstream is;
|
|
285
|
+
is.exceptions(std::ios::failbit | std::ios::badbit);
|
|
286
|
+
is.open(inputPath + "theta_compact_estimation_from_java.sk", std::ios::binary);
|
|
287
|
+
auto sketch = compact_theta_sketch::deserialize(is);
|
|
288
|
+
REQUIRE_FALSE(sketch.is_empty());
|
|
289
|
+
REQUIRE(sketch.is_estimation_mode());
|
|
290
|
+
REQUIRE(sketch.get_num_retained() == 4342);
|
|
291
|
+
REQUIRE(sketch.get_theta() == Approx(0.531700444213199).margin(1e-10));
|
|
292
|
+
REQUIRE(sketch.get_estimate() == Approx(8166.25234614053).margin(1e-10));
|
|
293
|
+
REQUIRE(sketch.get_lower_bound(2) == Approx(7996.956955317471).margin(1e-10));
|
|
294
|
+
REQUIRE(sketch.get_upper_bound(2) == Approx(8339.090301078124).margin(1e-10));
|
|
295
|
+
|
|
296
|
+
update_theta_sketch update_sketch = update_theta_sketch::builder().build();
|
|
297
|
+
const int n = 8192;
|
|
298
|
+
for (int i = 0; i < n; i++) update_sketch.update(i);
|
|
299
|
+
REQUIRE(sketch.get_num_retained() == update_sketch.get_num_retained());
|
|
300
|
+
REQUIRE(sketch.get_theta() == Approx(update_sketch.get_theta()).margin(1e-10));
|
|
301
|
+
REQUIRE(sketch.get_estimate() == Approx(update_sketch.get_estimate()).margin(1e-10));
|
|
302
|
+
REQUIRE(sketch.get_lower_bound(1) == Approx(update_sketch.get_lower_bound(1)).margin(1e-10));
|
|
303
|
+
REQUIRE(sketch.get_upper_bound(1) == Approx(update_sketch.get_upper_bound(1)).margin(1e-10));
|
|
304
|
+
REQUIRE(sketch.get_lower_bound(2) == Approx(update_sketch.get_lower_bound(2)).margin(1e-10));
|
|
305
|
+
REQUIRE(sketch.get_upper_bound(2) == Approx(update_sketch.get_upper_bound(2)).margin(1e-10));
|
|
306
|
+
REQUIRE(sketch.get_lower_bound(3) == Approx(update_sketch.get_lower_bound(3)).margin(1e-10));
|
|
307
|
+
REQUIRE(sketch.get_upper_bound(3) == Approx(update_sketch.get_upper_bound(3)).margin(1e-10));
|
|
308
|
+
}
|
|
309
|
+
|
|
310
|
+
TEST_CASE("theta sketch: serialize deserialize stream and bytes equivalency", "[theta_sketch]") {
|
|
311
|
+
update_theta_sketch update_sketch = update_theta_sketch::builder().build();
|
|
312
|
+
const int n = 8192;
|
|
313
|
+
for (int i = 0; i < n; i++) update_sketch.update(i);
|
|
314
|
+
|
|
315
|
+
// update sketch stream and bytes comparison
|
|
316
|
+
{
|
|
317
|
+
std::stringstream s(std::ios::in | std::ios::out | std::ios::binary);
|
|
318
|
+
update_sketch.serialize(s);
|
|
319
|
+
auto bytes = update_sketch.serialize();
|
|
320
|
+
REQUIRE(bytes.size() == static_cast<size_t>(s.tellp()));
|
|
321
|
+
for (size_t i = 0; i < bytes.size(); ++i) {
|
|
322
|
+
REQUIRE(((char*)bytes.data())[i] == (char)s.get());
|
|
323
|
+
}
|
|
324
|
+
|
|
325
|
+
// deserialize as base class
|
|
326
|
+
{
|
|
327
|
+
s.seekg(0); // rewind
|
|
328
|
+
auto deserialized_sketch_ptr1 = theta_sketch::deserialize(s);
|
|
329
|
+
auto deserialized_sketch_ptr2 = theta_sketch::deserialize(bytes.data(), bytes.size());
|
|
330
|
+
REQUIRE(bytes.size() == static_cast<size_t>(s.tellg()));
|
|
331
|
+
REQUIRE(deserialized_sketch_ptr2->is_empty() == deserialized_sketch_ptr1->is_empty());
|
|
332
|
+
REQUIRE(deserialized_sketch_ptr2->is_ordered() == deserialized_sketch_ptr1->is_ordered());
|
|
333
|
+
REQUIRE(deserialized_sketch_ptr2->get_num_retained() == deserialized_sketch_ptr1->get_num_retained());
|
|
334
|
+
REQUIRE(deserialized_sketch_ptr2->get_theta() == deserialized_sketch_ptr1->get_theta());
|
|
335
|
+
REQUIRE(deserialized_sketch_ptr2->get_estimate() == deserialized_sketch_ptr1->get_estimate());
|
|
336
|
+
REQUIRE(deserialized_sketch_ptr2->get_lower_bound(1) == deserialized_sketch_ptr1->get_lower_bound(1));
|
|
337
|
+
REQUIRE(deserialized_sketch_ptr2->get_upper_bound(1) == deserialized_sketch_ptr1->get_upper_bound(1));
|
|
338
|
+
// hash tables must be identical since they are restored from dumps, and iteration is deterministic
|
|
339
|
+
auto iter = deserialized_sketch_ptr1->begin();
|
|
340
|
+
for (auto key: *deserialized_sketch_ptr2) {
|
|
341
|
+
REQUIRE(*iter == key);
|
|
342
|
+
++iter;
|
|
343
|
+
}
|
|
344
|
+
}
|
|
345
|
+
|
|
346
|
+
// deserialize as subclass
|
|
347
|
+
{
|
|
348
|
+
s.seekg(0); // rewind
|
|
349
|
+
update_theta_sketch deserialized_sketch1 = update_theta_sketch::deserialize(s);
|
|
350
|
+
update_theta_sketch deserialized_sketch2 = update_theta_sketch::deserialize(bytes.data(), bytes.size());
|
|
351
|
+
REQUIRE(bytes.size() == static_cast<size_t>(s.tellg()));
|
|
352
|
+
REQUIRE(deserialized_sketch2.is_empty() == deserialized_sketch1.is_empty());
|
|
353
|
+
REQUIRE(deserialized_sketch2.is_ordered() == deserialized_sketch1.is_ordered());
|
|
354
|
+
REQUIRE(deserialized_sketch2.get_num_retained() == deserialized_sketch1.get_num_retained());
|
|
355
|
+
REQUIRE(deserialized_sketch2.get_theta() == deserialized_sketch1.get_theta());
|
|
356
|
+
REQUIRE(deserialized_sketch2.get_estimate() == deserialized_sketch1.get_estimate());
|
|
357
|
+
REQUIRE(deserialized_sketch2.get_lower_bound(1) == deserialized_sketch1.get_lower_bound(1));
|
|
358
|
+
REQUIRE(deserialized_sketch2.get_upper_bound(1) == deserialized_sketch1.get_upper_bound(1));
|
|
359
|
+
// hash tables must be identical since they are restored from dumps, and iteration is deterministic
|
|
360
|
+
auto iter = deserialized_sketch1.begin();
|
|
361
|
+
for (auto key: deserialized_sketch2) {
|
|
362
|
+
REQUIRE(*iter == key);
|
|
363
|
+
++iter;
|
|
364
|
+
}
|
|
365
|
+
}
|
|
366
|
+
}
|
|
367
|
+
|
|
368
|
+
// compact sketch stream and bytes comparison
|
|
369
|
+
{
|
|
370
|
+
std::stringstream s(std::ios::in | std::ios::out | std::ios::binary);
|
|
371
|
+
update_sketch.compact().serialize(s);
|
|
372
|
+
auto bytes = update_sketch.compact().serialize();
|
|
373
|
+
REQUIRE(bytes.size() == static_cast<size_t>(s.tellp()));
|
|
374
|
+
for (size_t i = 0; i < bytes.size(); ++i) {
|
|
375
|
+
REQUIRE(((char*)bytes.data())[i] == (char)s.get());
|
|
376
|
+
}
|
|
377
|
+
|
|
378
|
+
// deserialize as base class
|
|
379
|
+
{
|
|
380
|
+
s.seekg(0); // rewind
|
|
381
|
+
auto deserialized_sketch_ptr1 = theta_sketch::deserialize(s);
|
|
382
|
+
auto deserialized_sketch_ptr2 = theta_sketch::deserialize(bytes.data(), bytes.size());
|
|
383
|
+
REQUIRE(bytes.size() == static_cast<size_t>(s.tellg()));
|
|
384
|
+
REQUIRE(deserialized_sketch_ptr2->is_empty() == deserialized_sketch_ptr1->is_empty());
|
|
385
|
+
REQUIRE(deserialized_sketch_ptr2->is_ordered() == deserialized_sketch_ptr1->is_ordered());
|
|
386
|
+
REQUIRE(deserialized_sketch_ptr2->get_num_retained() == deserialized_sketch_ptr1->get_num_retained());
|
|
387
|
+
REQUIRE(deserialized_sketch_ptr2->get_theta() == deserialized_sketch_ptr1->get_theta());
|
|
388
|
+
REQUIRE(deserialized_sketch_ptr2->get_estimate() == deserialized_sketch_ptr1->get_estimate());
|
|
389
|
+
REQUIRE(deserialized_sketch_ptr2->get_lower_bound(1) == deserialized_sketch_ptr1->get_lower_bound(1));
|
|
390
|
+
REQUIRE(deserialized_sketch_ptr2->get_upper_bound(1) == deserialized_sketch_ptr1->get_upper_bound(1));
|
|
391
|
+
// the sketches are ordered, so the iteration sequence must match exactly
|
|
392
|
+
auto iter = deserialized_sketch_ptr1->begin();
|
|
393
|
+
for (auto key: *deserialized_sketch_ptr2) {
|
|
394
|
+
REQUIRE(*iter == key);
|
|
395
|
+
++iter;
|
|
396
|
+
}
|
|
397
|
+
}
|
|
398
|
+
|
|
399
|
+
// deserialize as subclass
|
|
400
|
+
{
|
|
401
|
+
s.seekg(0); // rewind
|
|
402
|
+
compact_theta_sketch deserialized_sketch1 = compact_theta_sketch::deserialize(s);
|
|
403
|
+
compact_theta_sketch deserialized_sketch2 = compact_theta_sketch::deserialize(bytes.data(), bytes.size());
|
|
404
|
+
REQUIRE(bytes.size() == static_cast<size_t>(s.tellg()));
|
|
405
|
+
REQUIRE(deserialized_sketch2.is_empty() == deserialized_sketch1.is_empty());
|
|
406
|
+
REQUIRE(deserialized_sketch2.is_ordered() == deserialized_sketch1.is_ordered());
|
|
407
|
+
REQUIRE(deserialized_sketch2.get_num_retained() == deserialized_sketch1.get_num_retained());
|
|
408
|
+
REQUIRE(deserialized_sketch2.get_theta() == deserialized_sketch1.get_theta());
|
|
409
|
+
REQUIRE(deserialized_sketch2.get_estimate() == deserialized_sketch1.get_estimate());
|
|
410
|
+
REQUIRE(deserialized_sketch2.get_lower_bound(1) == deserialized_sketch1.get_lower_bound(1));
|
|
411
|
+
REQUIRE(deserialized_sketch2.get_upper_bound(1) == deserialized_sketch1.get_upper_bound(1));
|
|
412
|
+
// the sketches are ordered, so the iteration sequence must match exactly
|
|
413
|
+
auto iter = deserialized_sketch1.begin();
|
|
414
|
+
for (auto key: deserialized_sketch2) {
|
|
415
|
+
REQUIRE(*iter == key);
|
|
416
|
+
++iter;
|
|
417
|
+
}
|
|
418
|
+
}
|
|
419
|
+
}
|
|
420
|
+
}
|
|
421
|
+
|
|
422
|
+
TEST_CASE("theta sketch: deserialize update single item buffer overrun", "[theta_sketch]") {
|
|
423
|
+
update_theta_sketch update_sketch = update_theta_sketch::builder().build();
|
|
424
|
+
update_sketch.update(1);
|
|
425
|
+
theta_sketch::vector_bytes bytes = update_sketch.serialize();
|
|
426
|
+
REQUIRE_THROWS_AS(update_theta_sketch::deserialize(bytes.data(), 7), std::out_of_range);
|
|
427
|
+
REQUIRE_THROWS_AS(update_theta_sketch::deserialize(bytes.data(), bytes.size() - 1), std::out_of_range);
|
|
428
|
+
}
|
|
429
|
+
|
|
430
|
+
TEST_CASE("theta sketch: deserialize compact single item buffer overrun", "[theta_sketch]") {
|
|
431
|
+
update_theta_sketch update_sketch = update_theta_sketch::builder().build();
|
|
432
|
+
update_sketch.update(1);
|
|
433
|
+
theta_sketch::vector_bytes bytes = update_sketch.compact().serialize();
|
|
434
|
+
REQUIRE_THROWS_AS(compact_theta_sketch::deserialize(bytes.data(), 7), std::out_of_range);
|
|
435
|
+
REQUIRE_THROWS_AS(compact_theta_sketch::deserialize(bytes.data(), bytes.size() - 1), std::out_of_range);
|
|
436
|
+
}
|
|
437
|
+
|
|
438
|
+
} /* namespace datasketches */
|