datasketches 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/CHANGELOG.md +3 -0
- data/LICENSE +310 -0
- data/NOTICE +11 -0
- data/README.md +126 -0
- data/ext/datasketches/cpc_wrapper.cpp +50 -0
- data/ext/datasketches/ext.cpp +12 -0
- data/ext/datasketches/extconf.rb +11 -0
- data/ext/datasketches/hll_wrapper.cpp +69 -0
- data/lib/datasketches.rb +9 -0
- data/lib/datasketches/version.rb +3 -0
- data/vendor/datasketches-cpp/CMakeLists.txt +126 -0
- data/vendor/datasketches-cpp/LICENSE +311 -0
- data/vendor/datasketches-cpp/MANIFEST.in +19 -0
- data/vendor/datasketches-cpp/NOTICE +11 -0
- data/vendor/datasketches-cpp/README.md +42 -0
- data/vendor/datasketches-cpp/common/CMakeLists.txt +45 -0
- data/vendor/datasketches-cpp/common/include/MurmurHash3.h +173 -0
- data/vendor/datasketches-cpp/common/include/binomial_bounds.hpp +458 -0
- data/vendor/datasketches-cpp/common/include/bounds_binomial_proportions.hpp +291 -0
- data/vendor/datasketches-cpp/common/include/ceiling_power_of_2.hpp +41 -0
- data/vendor/datasketches-cpp/common/include/common_defs.hpp +51 -0
- data/vendor/datasketches-cpp/common/include/conditional_back_inserter.hpp +68 -0
- data/vendor/datasketches-cpp/common/include/conditional_forward.hpp +70 -0
- data/vendor/datasketches-cpp/common/include/count_zeros.hpp +114 -0
- data/vendor/datasketches-cpp/common/include/inv_pow2_table.hpp +107 -0
- data/vendor/datasketches-cpp/common/include/memory_operations.hpp +57 -0
- data/vendor/datasketches-cpp/common/include/serde.hpp +196 -0
- data/vendor/datasketches-cpp/common/test/CMakeLists.txt +38 -0
- data/vendor/datasketches-cpp/common/test/catch.hpp +17618 -0
- data/vendor/datasketches-cpp/common/test/catch_runner.cpp +7 -0
- data/vendor/datasketches-cpp/common/test/test_allocator.cpp +31 -0
- data/vendor/datasketches-cpp/common/test/test_allocator.hpp +108 -0
- data/vendor/datasketches-cpp/common/test/test_runner.cpp +29 -0
- data/vendor/datasketches-cpp/common/test/test_type.hpp +137 -0
- data/vendor/datasketches-cpp/cpc/CMakeLists.txt +74 -0
- data/vendor/datasketches-cpp/cpc/include/compression_data.hpp +6022 -0
- data/vendor/datasketches-cpp/cpc/include/cpc_common.hpp +62 -0
- data/vendor/datasketches-cpp/cpc/include/cpc_compressor.hpp +147 -0
- data/vendor/datasketches-cpp/cpc/include/cpc_compressor_impl.hpp +742 -0
- data/vendor/datasketches-cpp/cpc/include/cpc_confidence.hpp +167 -0
- data/vendor/datasketches-cpp/cpc/include/cpc_sketch.hpp +311 -0
- data/vendor/datasketches-cpp/cpc/include/cpc_sketch_impl.hpp +810 -0
- data/vendor/datasketches-cpp/cpc/include/cpc_union.hpp +102 -0
- data/vendor/datasketches-cpp/cpc/include/cpc_union_impl.hpp +346 -0
- data/vendor/datasketches-cpp/cpc/include/cpc_util.hpp +137 -0
- data/vendor/datasketches-cpp/cpc/include/icon_estimator.hpp +274 -0
- data/vendor/datasketches-cpp/cpc/include/kxp_byte_lookup.hpp +81 -0
- data/vendor/datasketches-cpp/cpc/include/u32_table.hpp +84 -0
- data/vendor/datasketches-cpp/cpc/include/u32_table_impl.hpp +266 -0
- data/vendor/datasketches-cpp/cpc/test/CMakeLists.txt +44 -0
- data/vendor/datasketches-cpp/cpc/test/compression_test.cpp +67 -0
- data/vendor/datasketches-cpp/cpc/test/cpc_sketch_test.cpp +381 -0
- data/vendor/datasketches-cpp/cpc/test/cpc_union_test.cpp +149 -0
- data/vendor/datasketches-cpp/fi/CMakeLists.txt +54 -0
- data/vendor/datasketches-cpp/fi/include/frequent_items_sketch.hpp +319 -0
- data/vendor/datasketches-cpp/fi/include/frequent_items_sketch_impl.hpp +484 -0
- data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map.hpp +114 -0
- data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map_impl.hpp +345 -0
- data/vendor/datasketches-cpp/fi/test/CMakeLists.txt +44 -0
- data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_custom_type_test.cpp +84 -0
- data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_test.cpp +360 -0
- data/vendor/datasketches-cpp/fi/test/items_sketch_string_from_java.sk +0 -0
- data/vendor/datasketches-cpp/fi/test/items_sketch_string_utf8_from_java.sk +0 -0
- data/vendor/datasketches-cpp/fi/test/longs_sketch_from_java.sk +0 -0
- data/vendor/datasketches-cpp/fi/test/reverse_purge_hash_map_test.cpp +47 -0
- data/vendor/datasketches-cpp/hll/CMakeLists.txt +92 -0
- data/vendor/datasketches-cpp/hll/include/AuxHashMap-internal.hpp +303 -0
- data/vendor/datasketches-cpp/hll/include/AuxHashMap.hpp +83 -0
- data/vendor/datasketches-cpp/hll/include/CompositeInterpolationXTable-internal.hpp +811 -0
- data/vendor/datasketches-cpp/hll/include/CompositeInterpolationXTable.hpp +40 -0
- data/vendor/datasketches-cpp/hll/include/CouponHashSet-internal.hpp +291 -0
- data/vendor/datasketches-cpp/hll/include/CouponHashSet.hpp +59 -0
- data/vendor/datasketches-cpp/hll/include/CouponList-internal.hpp +417 -0
- data/vendor/datasketches-cpp/hll/include/CouponList.hpp +91 -0
- data/vendor/datasketches-cpp/hll/include/CubicInterpolation-internal.hpp +233 -0
- data/vendor/datasketches-cpp/hll/include/CubicInterpolation.hpp +43 -0
- data/vendor/datasketches-cpp/hll/include/HarmonicNumbers-internal.hpp +90 -0
- data/vendor/datasketches-cpp/hll/include/HarmonicNumbers.hpp +48 -0
- data/vendor/datasketches-cpp/hll/include/Hll4Array-internal.hpp +335 -0
- data/vendor/datasketches-cpp/hll/include/Hll4Array.hpp +69 -0
- data/vendor/datasketches-cpp/hll/include/Hll6Array-internal.hpp +124 -0
- data/vendor/datasketches-cpp/hll/include/Hll6Array.hpp +55 -0
- data/vendor/datasketches-cpp/hll/include/Hll8Array-internal.hpp +158 -0
- data/vendor/datasketches-cpp/hll/include/Hll8Array.hpp +56 -0
- data/vendor/datasketches-cpp/hll/include/HllArray-internal.hpp +706 -0
- data/vendor/datasketches-cpp/hll/include/HllArray.hpp +136 -0
- data/vendor/datasketches-cpp/hll/include/HllSketch-internal.hpp +462 -0
- data/vendor/datasketches-cpp/hll/include/HllSketchImpl-internal.hpp +149 -0
- data/vendor/datasketches-cpp/hll/include/HllSketchImpl.hpp +85 -0
- data/vendor/datasketches-cpp/hll/include/HllSketchImplFactory.hpp +170 -0
- data/vendor/datasketches-cpp/hll/include/HllUnion-internal.hpp +287 -0
- data/vendor/datasketches-cpp/hll/include/HllUtil.hpp +239 -0
- data/vendor/datasketches-cpp/hll/include/RelativeErrorTables-internal.hpp +112 -0
- data/vendor/datasketches-cpp/hll/include/RelativeErrorTables.hpp +46 -0
- data/vendor/datasketches-cpp/hll/include/coupon_iterator-internal.hpp +56 -0
- data/vendor/datasketches-cpp/hll/include/coupon_iterator.hpp +43 -0
- data/vendor/datasketches-cpp/hll/include/hll.hpp +669 -0
- data/vendor/datasketches-cpp/hll/include/hll.private.hpp +32 -0
- data/vendor/datasketches-cpp/hll/test/AuxHashMapTest.cpp +79 -0
- data/vendor/datasketches-cpp/hll/test/CMakeLists.txt +51 -0
- data/vendor/datasketches-cpp/hll/test/CouponHashSetTest.cpp +130 -0
- data/vendor/datasketches-cpp/hll/test/CouponListTest.cpp +181 -0
- data/vendor/datasketches-cpp/hll/test/CrossCountingTest.cpp +93 -0
- data/vendor/datasketches-cpp/hll/test/HllArrayTest.cpp +191 -0
- data/vendor/datasketches-cpp/hll/test/HllSketchTest.cpp +389 -0
- data/vendor/datasketches-cpp/hll/test/HllUnionTest.cpp +313 -0
- data/vendor/datasketches-cpp/hll/test/IsomorphicTest.cpp +141 -0
- data/vendor/datasketches-cpp/hll/test/TablesTest.cpp +44 -0
- data/vendor/datasketches-cpp/hll/test/ToFromByteArrayTest.cpp +168 -0
- data/vendor/datasketches-cpp/hll/test/array6_from_java.sk +0 -0
- data/vendor/datasketches-cpp/hll/test/compact_array4_from_java.sk +0 -0
- data/vendor/datasketches-cpp/hll/test/compact_set_from_java.sk +0 -0
- data/vendor/datasketches-cpp/hll/test/list_from_java.sk +0 -0
- data/vendor/datasketches-cpp/hll/test/updatable_array4_from_java.sk +0 -0
- data/vendor/datasketches-cpp/hll/test/updatable_set_from_java.sk +0 -0
- data/vendor/datasketches-cpp/kll/CMakeLists.txt +58 -0
- data/vendor/datasketches-cpp/kll/include/kll_helper.hpp +150 -0
- data/vendor/datasketches-cpp/kll/include/kll_helper_impl.hpp +319 -0
- data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator.hpp +67 -0
- data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator_impl.hpp +169 -0
- data/vendor/datasketches-cpp/kll/include/kll_sketch.hpp +559 -0
- data/vendor/datasketches-cpp/kll/include/kll_sketch_impl.hpp +1131 -0
- data/vendor/datasketches-cpp/kll/test/CMakeLists.txt +44 -0
- data/vendor/datasketches-cpp/kll/test/kll_sketch_custom_type_test.cpp +154 -0
- data/vendor/datasketches-cpp/kll/test/kll_sketch_float_one_item_v1.sk +0 -0
- data/vendor/datasketches-cpp/kll/test/kll_sketch_from_java.sk +0 -0
- data/vendor/datasketches-cpp/kll/test/kll_sketch_test.cpp +685 -0
- data/vendor/datasketches-cpp/kll/test/kll_sketch_validation.cpp +229 -0
- data/vendor/datasketches-cpp/pyproject.toml +17 -0
- data/vendor/datasketches-cpp/python/CMakeLists.txt +61 -0
- data/vendor/datasketches-cpp/python/README.md +78 -0
- data/vendor/datasketches-cpp/python/jupyter/CPCSketch.ipynb +345 -0
- data/vendor/datasketches-cpp/python/jupyter/FrequentItemsSketch.ipynb +354 -0
- data/vendor/datasketches-cpp/python/jupyter/HLLSketch.ipynb +346 -0
- data/vendor/datasketches-cpp/python/jupyter/KLLSketch.ipynb +463 -0
- data/vendor/datasketches-cpp/python/jupyter/ThetaSketchNotebook.ipynb +396 -0
- data/vendor/datasketches-cpp/python/src/__init__.py +2 -0
- data/vendor/datasketches-cpp/python/src/cpc_wrapper.cpp +90 -0
- data/vendor/datasketches-cpp/python/src/datasketches.cpp +40 -0
- data/vendor/datasketches-cpp/python/src/fi_wrapper.cpp +123 -0
- data/vendor/datasketches-cpp/python/src/hll_wrapper.cpp +136 -0
- data/vendor/datasketches-cpp/python/src/kll_wrapper.cpp +209 -0
- data/vendor/datasketches-cpp/python/src/theta_wrapper.cpp +162 -0
- data/vendor/datasketches-cpp/python/src/vector_of_kll.cpp +488 -0
- data/vendor/datasketches-cpp/python/src/vo_wrapper.cpp +140 -0
- data/vendor/datasketches-cpp/python/tests/__init__.py +0 -0
- data/vendor/datasketches-cpp/python/tests/cpc_test.py +64 -0
- data/vendor/datasketches-cpp/python/tests/fi_test.py +110 -0
- data/vendor/datasketches-cpp/python/tests/hll_test.py +131 -0
- data/vendor/datasketches-cpp/python/tests/kll_test.py +119 -0
- data/vendor/datasketches-cpp/python/tests/theta_test.py +121 -0
- data/vendor/datasketches-cpp/python/tests/vector_of_kll_test.py +148 -0
- data/vendor/datasketches-cpp/python/tests/vo_test.py +101 -0
- data/vendor/datasketches-cpp/sampling/CMakeLists.txt +48 -0
- data/vendor/datasketches-cpp/sampling/include/var_opt_sketch.hpp +392 -0
- data/vendor/datasketches-cpp/sampling/include/var_opt_sketch_impl.hpp +1752 -0
- data/vendor/datasketches-cpp/sampling/include/var_opt_union.hpp +239 -0
- data/vendor/datasketches-cpp/sampling/include/var_opt_union_impl.hpp +645 -0
- data/vendor/datasketches-cpp/sampling/test/CMakeLists.txt +43 -0
- data/vendor/datasketches-cpp/sampling/test/binaries_from_java.txt +67 -0
- data/vendor/datasketches-cpp/sampling/test/var_opt_sketch_test.cpp +509 -0
- data/vendor/datasketches-cpp/sampling/test/var_opt_union_test.cpp +358 -0
- data/vendor/datasketches-cpp/sampling/test/varopt_sketch_long_sampling.sk +0 -0
- data/vendor/datasketches-cpp/sampling/test/varopt_sketch_string_exact.sk +0 -0
- data/vendor/datasketches-cpp/sampling/test/varopt_union_double_sampling.sk +0 -0
- data/vendor/datasketches-cpp/setup.py +94 -0
- data/vendor/datasketches-cpp/theta/CMakeLists.txt +57 -0
- data/vendor/datasketches-cpp/theta/include/theta_a_not_b.hpp +73 -0
- data/vendor/datasketches-cpp/theta/include/theta_a_not_b_impl.hpp +83 -0
- data/vendor/datasketches-cpp/theta/include/theta_intersection.hpp +88 -0
- data/vendor/datasketches-cpp/theta/include/theta_intersection_impl.hpp +130 -0
- data/vendor/datasketches-cpp/theta/include/theta_sketch.hpp +533 -0
- data/vendor/datasketches-cpp/theta/include/theta_sketch_impl.hpp +939 -0
- data/vendor/datasketches-cpp/theta/include/theta_union.hpp +122 -0
- data/vendor/datasketches-cpp/theta/include/theta_union_impl.hpp +109 -0
- data/vendor/datasketches-cpp/theta/test/CMakeLists.txt +45 -0
- data/vendor/datasketches-cpp/theta/test/theta_a_not_b_test.cpp +244 -0
- data/vendor/datasketches-cpp/theta/test/theta_compact_empty_from_java.sk +0 -0
- data/vendor/datasketches-cpp/theta/test/theta_compact_estimation_from_java.sk +0 -0
- data/vendor/datasketches-cpp/theta/test/theta_compact_single_item_from_java.sk +0 -0
- data/vendor/datasketches-cpp/theta/test/theta_intersection_test.cpp +218 -0
- data/vendor/datasketches-cpp/theta/test/theta_sketch_test.cpp +438 -0
- data/vendor/datasketches-cpp/theta/test/theta_union_test.cpp +97 -0
- data/vendor/datasketches-cpp/theta/test/theta_update_empty_from_java.sk +0 -0
- data/vendor/datasketches-cpp/theta/test/theta_update_estimation_from_java.sk +0 -0
- data/vendor/datasketches-cpp/tuple/CMakeLists.txt +104 -0
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_a_not_b.hpp +52 -0
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_a_not_b_impl.hpp +32 -0
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_intersection.hpp +52 -0
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_intersection_impl.hpp +31 -0
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_sketch.hpp +179 -0
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_sketch_impl.hpp +238 -0
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_union.hpp +81 -0
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_union_impl.hpp +43 -0
- data/vendor/datasketches-cpp/tuple/include/bounds_on_ratios_in_sampled_sets.hpp +135 -0
- data/vendor/datasketches-cpp/tuple/include/bounds_on_ratios_in_theta_sketched_sets.hpp +135 -0
- data/vendor/datasketches-cpp/tuple/include/jaccard_similarity.hpp +172 -0
- data/vendor/datasketches-cpp/tuple/include/theta_a_not_b_experimental.hpp +53 -0
- data/vendor/datasketches-cpp/tuple/include/theta_a_not_b_experimental_impl.hpp +33 -0
- data/vendor/datasketches-cpp/tuple/include/theta_comparators.hpp +48 -0
- data/vendor/datasketches-cpp/tuple/include/theta_constants.hpp +34 -0
- data/vendor/datasketches-cpp/tuple/include/theta_helpers.hpp +54 -0
- data/vendor/datasketches-cpp/tuple/include/theta_intersection_base.hpp +59 -0
- data/vendor/datasketches-cpp/tuple/include/theta_intersection_base_impl.hpp +121 -0
- data/vendor/datasketches-cpp/tuple/include/theta_intersection_experimental.hpp +78 -0
- data/vendor/datasketches-cpp/tuple/include/theta_intersection_experimental_impl.hpp +43 -0
- data/vendor/datasketches-cpp/tuple/include/theta_set_difference_base.hpp +54 -0
- data/vendor/datasketches-cpp/tuple/include/theta_set_difference_base_impl.hpp +80 -0
- data/vendor/datasketches-cpp/tuple/include/theta_sketch_experimental.hpp +393 -0
- data/vendor/datasketches-cpp/tuple/include/theta_sketch_experimental_impl.hpp +481 -0
- data/vendor/datasketches-cpp/tuple/include/theta_union_base.hpp +60 -0
- data/vendor/datasketches-cpp/tuple/include/theta_union_base_impl.hpp +84 -0
- data/vendor/datasketches-cpp/tuple/include/theta_union_experimental.hpp +88 -0
- data/vendor/datasketches-cpp/tuple/include/theta_union_experimental_impl.hpp +47 -0
- data/vendor/datasketches-cpp/tuple/include/theta_update_sketch_base.hpp +259 -0
- data/vendor/datasketches-cpp/tuple/include/theta_update_sketch_base_impl.hpp +389 -0
- data/vendor/datasketches-cpp/tuple/include/tuple_a_not_b.hpp +57 -0
- data/vendor/datasketches-cpp/tuple/include/tuple_a_not_b_impl.hpp +33 -0
- data/vendor/datasketches-cpp/tuple/include/tuple_intersection.hpp +104 -0
- data/vendor/datasketches-cpp/tuple/include/tuple_intersection_impl.hpp +43 -0
- data/vendor/datasketches-cpp/tuple/include/tuple_sketch.hpp +496 -0
- data/vendor/datasketches-cpp/tuple/include/tuple_sketch_impl.hpp +587 -0
- data/vendor/datasketches-cpp/tuple/include/tuple_union.hpp +109 -0
- data/vendor/datasketches-cpp/tuple/include/tuple_union_impl.hpp +47 -0
- data/vendor/datasketches-cpp/tuple/test/CMakeLists.txt +53 -0
- data/vendor/datasketches-cpp/tuple/test/aod_1_compact_empty_from_java.sk +1 -0
- data/vendor/datasketches-cpp/tuple/test/aod_1_compact_estimation_from_java.sk +0 -0
- data/vendor/datasketches-cpp/tuple/test/aod_1_compact_non_empty_no_entries_from_java.sk +0 -0
- data/vendor/datasketches-cpp/tuple/test/aod_2_compact_exact_from_java.sk +0 -0
- data/vendor/datasketches-cpp/tuple/test/aod_3_compact_empty_from_java.sk +1 -0
- data/vendor/datasketches-cpp/tuple/test/array_of_doubles_sketch_test.cpp +298 -0
- data/vendor/datasketches-cpp/tuple/test/theta_a_not_b_experimental_test.cpp +250 -0
- data/vendor/datasketches-cpp/tuple/test/theta_compact_empty_from_java.sk +0 -0
- data/vendor/datasketches-cpp/tuple/test/theta_compact_estimation_from_java.sk +0 -0
- data/vendor/datasketches-cpp/tuple/test/theta_compact_single_item_from_java.sk +0 -0
- data/vendor/datasketches-cpp/tuple/test/theta_intersection_experimental_test.cpp +224 -0
- data/vendor/datasketches-cpp/tuple/test/theta_jaccard_similarity_test.cpp +144 -0
- data/vendor/datasketches-cpp/tuple/test/theta_sketch_experimental_test.cpp +247 -0
- data/vendor/datasketches-cpp/tuple/test/theta_union_experimental_test.cpp +44 -0
- data/vendor/datasketches-cpp/tuple/test/tuple_a_not_b_test.cpp +289 -0
- data/vendor/datasketches-cpp/tuple/test/tuple_intersection_test.cpp +235 -0
- data/vendor/datasketches-cpp/tuple/test/tuple_jaccard_similarity_test.cpp +98 -0
- data/vendor/datasketches-cpp/tuple/test/tuple_sketch_allocation_test.cpp +102 -0
- data/vendor/datasketches-cpp/tuple/test/tuple_sketch_test.cpp +249 -0
- data/vendor/datasketches-cpp/tuple/test/tuple_union_test.cpp +187 -0
- metadata +302 -0
|
@@ -0,0 +1,235 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* Licensed to the Apache Software Foundation (ASF) under one
|
|
3
|
+
* or more contributor license agreements. See the NOTICE file
|
|
4
|
+
* distributed with this work for additional information
|
|
5
|
+
* regarding copyright ownership. The ASF licenses this file
|
|
6
|
+
* to you under the Apache License, Version 2.0 (the
|
|
7
|
+
* "License"); you may not use this file except in compliance
|
|
8
|
+
* with the License. You may obtain a copy of the License at
|
|
9
|
+
*
|
|
10
|
+
* http://www.apache.org/licenses/LICENSE-2.0
|
|
11
|
+
*
|
|
12
|
+
* Unless required by applicable law or agreed to in writing,
|
|
13
|
+
* software distributed under the License is distributed on an
|
|
14
|
+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
15
|
+
* KIND, either express or implied. See the License for the
|
|
16
|
+
* specific language governing permissions and limitations
|
|
17
|
+
* under the License.
|
|
18
|
+
*/
|
|
19
|
+
|
|
20
|
+
#include <iostream>
|
|
21
|
+
|
|
22
|
+
#include <catch.hpp>
|
|
23
|
+
#include <tuple_intersection.hpp>
|
|
24
|
+
#include <theta_sketch_experimental.hpp>
|
|
25
|
+
|
|
26
|
+
namespace datasketches {
|
|
27
|
+
|
|
28
|
+
template<typename Summary>
|
|
29
|
+
struct subtracting_intersection_policy {
|
|
30
|
+
void operator()(Summary& summary, const Summary& other) const {
|
|
31
|
+
summary -= other;
|
|
32
|
+
}
|
|
33
|
+
};
|
|
34
|
+
|
|
35
|
+
using tuple_intersection_float = tuple_intersection<float, subtracting_intersection_policy<float>>;
|
|
36
|
+
|
|
37
|
+
TEST_CASE("tuple intersection: invalid", "[tuple_intersection]") {
|
|
38
|
+
tuple_intersection_float intersection;
|
|
39
|
+
REQUIRE_FALSE(intersection.has_result());
|
|
40
|
+
REQUIRE_THROWS_AS(intersection.get_result(), std::invalid_argument);
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
TEST_CASE("tuple intersection: empty", "[tuple_intersection]") {
|
|
44
|
+
auto sketch = update_tuple_sketch<float>::builder().build();
|
|
45
|
+
tuple_intersection_float intersection;
|
|
46
|
+
intersection.update(sketch);
|
|
47
|
+
auto result = intersection.get_result();
|
|
48
|
+
REQUIRE(result.get_num_retained() == 0);
|
|
49
|
+
REQUIRE(result.is_empty());
|
|
50
|
+
REQUIRE_FALSE(result.is_estimation_mode());
|
|
51
|
+
REQUIRE(result.get_estimate() == 0.0);
|
|
52
|
+
|
|
53
|
+
intersection.update(sketch);
|
|
54
|
+
result = intersection.get_result();
|
|
55
|
+
REQUIRE(result.get_num_retained() == 0);
|
|
56
|
+
REQUIRE(result.is_empty());
|
|
57
|
+
REQUIRE_FALSE(result.is_estimation_mode());
|
|
58
|
+
REQUIRE(result.get_estimate() == 0.0);
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
TEST_CASE("tuple intersection: non empty no retained keys", "[tuple_intersection]") {
|
|
62
|
+
auto sketch = update_tuple_sketch<float>::builder().set_p(0.001).build();
|
|
63
|
+
sketch.update(1, 1);
|
|
64
|
+
tuple_intersection_float intersection;
|
|
65
|
+
intersection.update(sketch);
|
|
66
|
+
auto result = intersection.get_result();
|
|
67
|
+
REQUIRE(result.get_num_retained() == 0);
|
|
68
|
+
REQUIRE_FALSE(result.is_empty());
|
|
69
|
+
REQUIRE(result.is_estimation_mode());
|
|
70
|
+
REQUIRE(result.get_theta() == Approx(0.001).margin(1e-10));
|
|
71
|
+
REQUIRE(result.get_estimate() == 0.0);
|
|
72
|
+
|
|
73
|
+
intersection.update(sketch);
|
|
74
|
+
result = intersection.get_result();
|
|
75
|
+
REQUIRE(result.get_num_retained() == 0);
|
|
76
|
+
REQUIRE_FALSE(result.is_empty());
|
|
77
|
+
REQUIRE(result.is_estimation_mode());
|
|
78
|
+
REQUIRE(result.get_theta() == Approx(0.001).margin(1e-10));
|
|
79
|
+
REQUIRE(result.get_estimate() == 0.0);
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
TEST_CASE("tuple intersection: exact mode half overlap", "[tuple_intersection]") {
|
|
83
|
+
auto sketch1 = update_tuple_sketch<float>::builder().build();
|
|
84
|
+
int value = 0;
|
|
85
|
+
for (int i = 0; i < 1000; i++) sketch1.update(value++, 1);
|
|
86
|
+
|
|
87
|
+
auto sketch2 = update_tuple_sketch<float>::builder().build();
|
|
88
|
+
value = 500;
|
|
89
|
+
for (int i = 0; i < 1000; i++) sketch2.update(value++, 1);
|
|
90
|
+
|
|
91
|
+
{ // unordered
|
|
92
|
+
tuple_intersection_float intersection;
|
|
93
|
+
intersection.update(sketch1);
|
|
94
|
+
intersection.update(sketch2);
|
|
95
|
+
auto result = intersection.get_result();
|
|
96
|
+
REQUIRE_FALSE(result.is_empty());
|
|
97
|
+
REQUIRE_FALSE(result.is_estimation_mode());
|
|
98
|
+
REQUIRE(result.get_estimate() == 500.0);
|
|
99
|
+
}
|
|
100
|
+
{ // ordered
|
|
101
|
+
tuple_intersection_float intersection;
|
|
102
|
+
intersection.update(sketch1.compact());
|
|
103
|
+
intersection.update(sketch2.compact());
|
|
104
|
+
auto result = intersection.get_result();
|
|
105
|
+
REQUIRE_FALSE(result.is_empty());
|
|
106
|
+
REQUIRE_FALSE(result.is_estimation_mode());
|
|
107
|
+
REQUIRE(result.get_estimate() == 500.0);
|
|
108
|
+
}
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
TEST_CASE("tuple intersection: exact mode disjoint", "[tuple_intersection]") {
|
|
112
|
+
auto sketch1 = update_tuple_sketch<float>::builder().build();
|
|
113
|
+
int value = 0;
|
|
114
|
+
for (int i = 0; i < 1000; i++) sketch1.update(value++, 1);
|
|
115
|
+
|
|
116
|
+
auto sketch2 = update_tuple_sketch<float>::builder().build();
|
|
117
|
+
for (int i = 0; i < 1000; i++) sketch2.update(value++, 1);
|
|
118
|
+
|
|
119
|
+
{ // unordered
|
|
120
|
+
tuple_intersection_float intersection;
|
|
121
|
+
intersection.update(sketch1);
|
|
122
|
+
intersection.update(sketch2);
|
|
123
|
+
auto result = intersection.get_result();
|
|
124
|
+
REQUIRE(result.is_empty());
|
|
125
|
+
REQUIRE_FALSE(result.is_estimation_mode());
|
|
126
|
+
REQUIRE(result.get_estimate() == 0.0);
|
|
127
|
+
}
|
|
128
|
+
{ // ordered
|
|
129
|
+
tuple_intersection_float intersection;
|
|
130
|
+
intersection.update(sketch1.compact());
|
|
131
|
+
intersection.update(sketch2.compact());
|
|
132
|
+
auto result = intersection.get_result();
|
|
133
|
+
REQUIRE(result.is_empty());
|
|
134
|
+
REQUIRE_FALSE(result.is_estimation_mode());
|
|
135
|
+
REQUIRE(result.get_estimate() == 0.0);
|
|
136
|
+
}
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
// needed until promotion of experimental to replace existing theta sketch
|
|
140
|
+
using update_theta_sketch = update_theta_sketch_experimental<>;
|
|
141
|
+
|
|
142
|
+
TEST_CASE("mixed intersection: exact mode half overlap", "[tuple_intersection]") {
|
|
143
|
+
auto sketch1 = update_tuple_sketch<float>::builder().build();
|
|
144
|
+
int value = 0;
|
|
145
|
+
for (int i = 0; i < 1000; i++) sketch1.update(value++, 1);
|
|
146
|
+
|
|
147
|
+
auto sketch2 = update_theta_sketch::builder().build();
|
|
148
|
+
value = 500;
|
|
149
|
+
for (int i = 0; i < 1000; i++) sketch2.update(value++);
|
|
150
|
+
|
|
151
|
+
{ // unordered
|
|
152
|
+
tuple_intersection_float intersection;
|
|
153
|
+
intersection.update(sketch1);
|
|
154
|
+
intersection.update(compact_tuple_sketch<float>(sketch2, 1, false));
|
|
155
|
+
auto result = intersection.get_result();
|
|
156
|
+
REQUIRE_FALSE(result.is_empty());
|
|
157
|
+
REQUIRE_FALSE(result.is_estimation_mode());
|
|
158
|
+
REQUIRE(result.get_estimate() == 500.0);
|
|
159
|
+
}
|
|
160
|
+
{ // ordered
|
|
161
|
+
tuple_intersection_float intersection;
|
|
162
|
+
intersection.update(sketch1.compact());
|
|
163
|
+
intersection.update(compact_tuple_sketch<float>(sketch2.compact(), 1));
|
|
164
|
+
auto result = intersection.get_result();
|
|
165
|
+
REQUIRE_FALSE(result.is_empty());
|
|
166
|
+
REQUIRE_FALSE(result.is_estimation_mode());
|
|
167
|
+
REQUIRE(result.get_estimate() == 500.0);
|
|
168
|
+
}
|
|
169
|
+
}
|
|
170
|
+
|
|
171
|
+
TEST_CASE("tuple intersection: estimation mode half overlap", "[tuple_intersection]") {
|
|
172
|
+
auto sketch1 = update_tuple_sketch<float>::builder().build();
|
|
173
|
+
int value = 0;
|
|
174
|
+
for (int i = 0; i < 10000; i++) sketch1.update(value++, 1);
|
|
175
|
+
|
|
176
|
+
auto sketch2 = update_tuple_sketch<float>::builder().build();
|
|
177
|
+
value = 5000;
|
|
178
|
+
for (int i = 0; i < 10000; i++) sketch2.update(value++, 1);
|
|
179
|
+
|
|
180
|
+
{ // unordered
|
|
181
|
+
tuple_intersection_float intersection;
|
|
182
|
+
intersection.update(sketch1);
|
|
183
|
+
intersection.update(sketch2);
|
|
184
|
+
auto result = intersection.get_result();
|
|
185
|
+
REQUIRE_FALSE(result.is_empty());
|
|
186
|
+
REQUIRE(result.is_estimation_mode());
|
|
187
|
+
REQUIRE(result.get_estimate() == Approx(5000).margin(5000 * 0.02));
|
|
188
|
+
}
|
|
189
|
+
{ // ordered
|
|
190
|
+
tuple_intersection_float intersection;
|
|
191
|
+
intersection.update(sketch1.compact());
|
|
192
|
+
intersection.update(sketch2.compact());
|
|
193
|
+
auto result = intersection.get_result();
|
|
194
|
+
REQUIRE_FALSE(result.is_empty());
|
|
195
|
+
REQUIRE(result.is_estimation_mode());
|
|
196
|
+
REQUIRE(result.get_estimate() == Approx(5000).margin(5000 * 0.02));
|
|
197
|
+
}
|
|
198
|
+
}
|
|
199
|
+
|
|
200
|
+
TEST_CASE("tuple intersection: estimation mode disjoint", "[tuple_intersection]") {
|
|
201
|
+
auto sketch1 = update_tuple_sketch<float>::builder().build();
|
|
202
|
+
int value = 0;
|
|
203
|
+
for (int i = 0; i < 10000; i++) sketch1.update(value++, 1);
|
|
204
|
+
|
|
205
|
+
auto sketch2 = update_tuple_sketch<float>::builder().build();
|
|
206
|
+
for (int i = 0; i < 10000; i++) sketch2.update(value++, 1);
|
|
207
|
+
|
|
208
|
+
{ // unordered
|
|
209
|
+
tuple_intersection_float intersection;
|
|
210
|
+
intersection.update(sketch1);
|
|
211
|
+
intersection.update(sketch2);
|
|
212
|
+
auto result = intersection.get_result();
|
|
213
|
+
REQUIRE_FALSE(result.is_empty());
|
|
214
|
+
REQUIRE(result.is_estimation_mode());
|
|
215
|
+
REQUIRE(result.get_estimate() == 0.0);
|
|
216
|
+
}
|
|
217
|
+
{ // ordered
|
|
218
|
+
tuple_intersection_float intersection;
|
|
219
|
+
intersection.update(sketch1.compact());
|
|
220
|
+
intersection.update(sketch2.compact());
|
|
221
|
+
auto result = intersection.get_result();
|
|
222
|
+
REQUIRE_FALSE(result.is_empty());
|
|
223
|
+
REQUIRE(result.is_estimation_mode());
|
|
224
|
+
REQUIRE(result.get_estimate() == 0.0);
|
|
225
|
+
}
|
|
226
|
+
}
|
|
227
|
+
|
|
228
|
+
TEST_CASE("tuple intersection: seed mismatch", "[tuple_intersection]") {
|
|
229
|
+
auto sketch = update_tuple_sketch<float>::builder().build();
|
|
230
|
+
sketch.update(1, 1); // non-empty should not be ignored
|
|
231
|
+
tuple_intersection_float intersection(123);
|
|
232
|
+
REQUIRE_THROWS_AS(intersection.update(sketch), std::invalid_argument);
|
|
233
|
+
}
|
|
234
|
+
|
|
235
|
+
} /* namespace datasketches */
|
|
@@ -0,0 +1,98 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* Licensed to the Apache Software Foundation (ASF) under one
|
|
3
|
+
* or more contributor license agreements. See the NOTICE file
|
|
4
|
+
* distributed with this work for additional information
|
|
5
|
+
* regarding copyright ownership. The ASF licenses this file
|
|
6
|
+
* to you under the Apache License, Version 2.0 (the
|
|
7
|
+
* "License"); you may not use this file except in compliance
|
|
8
|
+
* with the License. You may obtain a copy of the License at
|
|
9
|
+
*
|
|
10
|
+
* http://www.apache.org/licenses/LICENSE-2.0
|
|
11
|
+
*
|
|
12
|
+
* Unless required by applicable law or agreed to in writing,
|
|
13
|
+
* software distributed under the License is distributed on an
|
|
14
|
+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
15
|
+
* KIND, either express or implied. See the License for the
|
|
16
|
+
* specific language governing permissions and limitations
|
|
17
|
+
* under the License.
|
|
18
|
+
*/
|
|
19
|
+
|
|
20
|
+
#include <iostream>
|
|
21
|
+
|
|
22
|
+
#include <catch.hpp>
|
|
23
|
+
#include <jaccard_similarity.hpp>
|
|
24
|
+
|
|
25
|
+
namespace datasketches {
|
|
26
|
+
|
|
27
|
+
using tuple_jaccard_similarity_float = tuple_jaccard_similarity<float, default_union_policy<float>>;
|
|
28
|
+
|
|
29
|
+
TEST_CASE("tuple jaccard: empty", "[tuple_sketch]") {
|
|
30
|
+
auto sk_a = update_tuple_sketch<float>::builder().build();
|
|
31
|
+
auto sk_b = update_tuple_sketch<float>::builder().build();
|
|
32
|
+
|
|
33
|
+
// update sketches
|
|
34
|
+
auto jc = tuple_jaccard_similarity_float::jaccard(sk_a, sk_b);
|
|
35
|
+
REQUIRE(jc == std::array<double, 3>{1, 1, 1});
|
|
36
|
+
|
|
37
|
+
// compact sketches
|
|
38
|
+
jc = tuple_jaccard_similarity_float::jaccard(sk_a.compact(), sk_b.compact());
|
|
39
|
+
REQUIRE(jc == std::array<double, 3>{1, 1, 1});
|
|
40
|
+
|
|
41
|
+
REQUIRE(tuple_jaccard_similarity_float::exactly_equal(sk_a, sk_b));
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
TEST_CASE("tuple jaccard: same sketch exact mode", "[tuple_sketch]") {
|
|
45
|
+
auto sk = update_tuple_sketch<float>::builder().build();
|
|
46
|
+
for (int i = 0; i < 1000; ++i) sk.update(i, 1);
|
|
47
|
+
|
|
48
|
+
// update sketch
|
|
49
|
+
auto jc = tuple_jaccard_similarity_float::jaccard(sk, sk);
|
|
50
|
+
REQUIRE(jc == std::array<double, 3>{1, 1, 1});
|
|
51
|
+
|
|
52
|
+
// compact sketch
|
|
53
|
+
jc = tuple_jaccard_similarity_float::jaccard(sk.compact(), sk.compact());
|
|
54
|
+
REQUIRE(jc == std::array<double, 3>{1, 1, 1});
|
|
55
|
+
|
|
56
|
+
REQUIRE(tuple_jaccard_similarity_float::exactly_equal(sk, sk));
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
TEST_CASE("tuple jaccard: full overlap exact mode", "[tuple_sketch]") {
|
|
60
|
+
auto sk_a = update_tuple_sketch<float>::builder().build();
|
|
61
|
+
auto sk_b = update_tuple_sketch<float>::builder().build();
|
|
62
|
+
for (int i = 0; i < 1000; ++i) {
|
|
63
|
+
sk_a.update(i, 1);
|
|
64
|
+
sk_b.update(i, 1);
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
// update sketches
|
|
68
|
+
auto jc = tuple_jaccard_similarity_float::jaccard(sk_a, sk_b);
|
|
69
|
+
REQUIRE(jc == std::array<double, 3>{1, 1, 1});
|
|
70
|
+
|
|
71
|
+
// compact sketches
|
|
72
|
+
jc = tuple_jaccard_similarity_float::jaccard(sk_a.compact(), sk_b.compact());
|
|
73
|
+
REQUIRE(jc == std::array<double, 3>{1, 1, 1});
|
|
74
|
+
|
|
75
|
+
REQUIRE(tuple_jaccard_similarity_float::exactly_equal(sk_a, sk_b));
|
|
76
|
+
REQUIRE(tuple_jaccard_similarity_float::exactly_equal(sk_a.compact(), sk_b));
|
|
77
|
+
REQUIRE(tuple_jaccard_similarity_float::exactly_equal(sk_a, sk_b.compact()));
|
|
78
|
+
REQUIRE(tuple_jaccard_similarity_float::exactly_equal(sk_a.compact(), sk_b.compact()));
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
TEST_CASE("tuple jaccard: disjoint exact mode", "[tuple_sketch]") {
|
|
82
|
+
auto sk_a = update_tuple_sketch<float>::builder().build();
|
|
83
|
+
auto sk_b = update_tuple_sketch<float>::builder().build();
|
|
84
|
+
for (int i = 0; i < 1000; ++i) {
|
|
85
|
+
sk_a.update(i, 1);
|
|
86
|
+
sk_b.update(i + 1000, 1);
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
// update sketches
|
|
90
|
+
auto jc = tuple_jaccard_similarity_float::jaccard(sk_a, sk_b);
|
|
91
|
+
REQUIRE(jc == std::array<double, 3>{0, 0, 0});
|
|
92
|
+
|
|
93
|
+
// compact sketches
|
|
94
|
+
jc = tuple_jaccard_similarity_float::jaccard(sk_a.compact(), sk_b.compact());
|
|
95
|
+
REQUIRE(jc == std::array<double, 3>{0, 0, 0});
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
} /* namespace datasketches */
|
|
@@ -0,0 +1,102 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* Licensed to the Apache Software Foundation (ASF) under one
|
|
3
|
+
* or more contributor license agreements. See the NOTICE file
|
|
4
|
+
* distributed with this work for additional information
|
|
5
|
+
* regarding copyright ownership. The ASF licenses this file
|
|
6
|
+
* to you under the Apache License, Version 2.0 (the
|
|
7
|
+
* "License"); you may not use this file except in compliance
|
|
8
|
+
* with the License. You may obtain a copy of the License at
|
|
9
|
+
*
|
|
10
|
+
* http://www.apache.org/licenses/LICENSE-2.0
|
|
11
|
+
*
|
|
12
|
+
* Unless required by applicable law or agreed to in writing,
|
|
13
|
+
* software distributed under the License is distributed on an
|
|
14
|
+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
15
|
+
* KIND, either express or implied. See the License for the
|
|
16
|
+
* specific language governing permissions and limitations
|
|
17
|
+
* under the License.
|
|
18
|
+
*/
|
|
19
|
+
|
|
20
|
+
#include <iostream>
|
|
21
|
+
|
|
22
|
+
#include <catch.hpp>
|
|
23
|
+
#include <tuple_sketch.hpp>
|
|
24
|
+
#include <test_allocator.hpp>
|
|
25
|
+
#include <test_type.hpp>
|
|
26
|
+
|
|
27
|
+
namespace datasketches {
|
|
28
|
+
|
|
29
|
+
static const bool ALLOCATOR_TEST_DEBUG = false;
|
|
30
|
+
|
|
31
|
+
struct test_type_replace_policy {
|
|
32
|
+
test_type create() const { return test_type(0); }
|
|
33
|
+
void update(test_type& summary, const test_type& update) const {
|
|
34
|
+
if (ALLOCATOR_TEST_DEBUG) std::cerr << "policy::update lvalue begin" << std::endl;
|
|
35
|
+
summary = update;
|
|
36
|
+
if (ALLOCATOR_TEST_DEBUG) std::cerr << "policy::update lvalue end" << std::endl;
|
|
37
|
+
}
|
|
38
|
+
void update(test_type& summary, test_type&& update) const {
|
|
39
|
+
if (ALLOCATOR_TEST_DEBUG) std::cerr << "policy::update rvalue begin" << std::endl;
|
|
40
|
+
summary = std::move(update);
|
|
41
|
+
if (ALLOCATOR_TEST_DEBUG) std::cerr << "policy::update rvalue end" << std::endl;
|
|
42
|
+
}
|
|
43
|
+
};
|
|
44
|
+
|
|
45
|
+
using update_tuple_sketch_test =
|
|
46
|
+
update_tuple_sketch<test_type, test_type, test_type_replace_policy, test_allocator<test_type>>;
|
|
47
|
+
using compact_tuple_sketch_test =
|
|
48
|
+
compact_tuple_sketch<test_type, test_allocator<test_type>>;
|
|
49
|
+
|
|
50
|
+
TEST_CASE("tuple sketch with test allocator: estimation mode", "[tuple_sketch]") {
|
|
51
|
+
test_allocator_total_bytes = 0;
|
|
52
|
+
test_allocator_net_allocations = 0;
|
|
53
|
+
{
|
|
54
|
+
auto update_sketch = update_tuple_sketch_test::builder().build();
|
|
55
|
+
for (int i = 0; i < 10000; ++i) update_sketch.update(i, 1);
|
|
56
|
+
for (int i = 0; i < 10000; ++i) update_sketch.update(i, 2);
|
|
57
|
+
REQUIRE(!update_sketch.is_empty());
|
|
58
|
+
REQUIRE(update_sketch.is_estimation_mode());
|
|
59
|
+
unsigned count = 0;
|
|
60
|
+
for (const auto& entry: update_sketch) {
|
|
61
|
+
REQUIRE(entry.second.get_value() == 2);
|
|
62
|
+
++count;
|
|
63
|
+
}
|
|
64
|
+
REQUIRE(count == update_sketch.get_num_retained());
|
|
65
|
+
|
|
66
|
+
update_sketch.trim();
|
|
67
|
+
REQUIRE(update_sketch.get_num_retained() == (1 << update_sketch.get_lg_k()));
|
|
68
|
+
|
|
69
|
+
auto compact_sketch = update_sketch.compact();
|
|
70
|
+
REQUIRE(!compact_sketch.is_empty());
|
|
71
|
+
REQUIRE(compact_sketch.is_estimation_mode());
|
|
72
|
+
count = 0;
|
|
73
|
+
for (const auto& entry: compact_sketch) {
|
|
74
|
+
REQUIRE(entry.second.get_value() == 2);
|
|
75
|
+
++count;
|
|
76
|
+
}
|
|
77
|
+
REQUIRE(count == update_sketch.get_num_retained());
|
|
78
|
+
|
|
79
|
+
auto bytes = compact_sketch.serialize(0, test_type_serde());
|
|
80
|
+
auto deserialized_sketch = compact_tuple_sketch_test::deserialize(bytes.data(), bytes.size(), DEFAULT_SEED, test_type_serde());
|
|
81
|
+
REQUIRE(deserialized_sketch.get_estimate() == compact_sketch.get_estimate());
|
|
82
|
+
|
|
83
|
+
// update sketch copy
|
|
84
|
+
if (ALLOCATOR_TEST_DEBUG) std::cout << update_sketch.to_string();
|
|
85
|
+
update_tuple_sketch_test update_sketch_copy(update_sketch);
|
|
86
|
+
update_sketch_copy = update_sketch;
|
|
87
|
+
// update sketch move
|
|
88
|
+
update_tuple_sketch_test update_sketch_moved(std::move(update_sketch_copy));
|
|
89
|
+
update_sketch_moved = std::move(update_sketch);
|
|
90
|
+
|
|
91
|
+
// compact sketch copy
|
|
92
|
+
compact_tuple_sketch_test compact_sketch_copy(compact_sketch);
|
|
93
|
+
compact_sketch_copy = compact_sketch;
|
|
94
|
+
// compact sketch move
|
|
95
|
+
compact_tuple_sketch_test compact_sketch_moved(std::move(compact_sketch_copy));
|
|
96
|
+
compact_sketch_moved = std::move(compact_sketch);
|
|
97
|
+
}
|
|
98
|
+
REQUIRE(test_allocator_total_bytes == 0);
|
|
99
|
+
REQUIRE(test_allocator_net_allocations == 0);
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
} /* namespace datasketches */
|
|
@@ -0,0 +1,249 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* Licensed to the Apache Software Foundation (ASF) under one
|
|
3
|
+
* or more contributor license agreements. See the NOTICE file
|
|
4
|
+
* distributed with this work for additional information
|
|
5
|
+
* regarding copyright ownership. The ASF licenses this file
|
|
6
|
+
* to you under the Apache License, Version 2.0 (the
|
|
7
|
+
* "License"); you may not use this file except in compliance
|
|
8
|
+
* with the License. You may obtain a copy of the License at
|
|
9
|
+
*
|
|
10
|
+
* http://www.apache.org/licenses/LICENSE-2.0
|
|
11
|
+
*
|
|
12
|
+
* Unless required by applicable law or agreed to in writing,
|
|
13
|
+
* software distributed under the License is distributed on an
|
|
14
|
+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
15
|
+
* KIND, either express or implied. See the License for the
|
|
16
|
+
* specific language governing permissions and limitations
|
|
17
|
+
* under the License.
|
|
18
|
+
*/
|
|
19
|
+
|
|
20
|
+
#include <iostream>
|
|
21
|
+
#include <tuple>
|
|
22
|
+
|
|
23
|
+
namespace datasketches {
|
|
24
|
+
|
|
25
|
+
using three_doubles = std::tuple<double, double, double>;
|
|
26
|
+
|
|
27
|
+
// this is needed for a test below, but should be defined here
|
|
28
|
+
std::ostream& operator<<(std::ostream& os, const three_doubles& tuple) {
|
|
29
|
+
os << std::get<0>(tuple) << ", " << std::get<1>(tuple) << ", " << std::get<2>(tuple);
|
|
30
|
+
return os;
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
#include <catch.hpp>
|
|
36
|
+
#include <tuple_sketch.hpp>
|
|
37
|
+
//#include <test_type.hpp>
|
|
38
|
+
|
|
39
|
+
namespace datasketches {
|
|
40
|
+
|
|
41
|
+
TEST_CASE("tuple sketch float: builder", "[tuple_sketch]") {
|
|
42
|
+
auto builder = update_tuple_sketch<float>::builder();
|
|
43
|
+
builder.set_lg_k(10).set_p(0.5).set_resize_factor(theta_constants::resize_factor::X2).set_seed(123);
|
|
44
|
+
auto sketch = builder.build();
|
|
45
|
+
REQUIRE(sketch.get_lg_k() == 10);
|
|
46
|
+
REQUIRE(sketch.get_theta() == 0.5);
|
|
47
|
+
REQUIRE(sketch.get_rf() == theta_constants::resize_factor::X2);
|
|
48
|
+
REQUIRE(sketch.get_seed_hash() == compute_seed_hash(123));
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
TEST_CASE("tuple sketch float: empty", "[tuple_sketch]") {
|
|
52
|
+
auto update_sketch = update_tuple_sketch<float>::builder().build();
|
|
53
|
+
std::cout << "sizeof(update_tuple_sketch<float>)=" << sizeof(update_sketch) << std::endl;
|
|
54
|
+
REQUIRE(update_sketch.is_empty());
|
|
55
|
+
REQUIRE(!update_sketch.is_estimation_mode());
|
|
56
|
+
REQUIRE(update_sketch.get_estimate() == 0);
|
|
57
|
+
REQUIRE(update_sketch.get_lower_bound(1) == 0);
|
|
58
|
+
REQUIRE(update_sketch.get_upper_bound(1) == 0);
|
|
59
|
+
REQUIRE(update_sketch.get_theta() == 1);
|
|
60
|
+
REQUIRE(update_sketch.get_num_retained() == 0);
|
|
61
|
+
REQUIRE(!update_sketch.is_ordered());
|
|
62
|
+
|
|
63
|
+
auto compact_sketch = update_sketch.compact();
|
|
64
|
+
std::cout << "sizeof(compact_tuple_sketch<float>)=" << sizeof(compact_sketch) << std::endl;
|
|
65
|
+
REQUIRE(compact_sketch.is_empty());
|
|
66
|
+
REQUIRE(!compact_sketch.is_estimation_mode());
|
|
67
|
+
REQUIRE(compact_sketch.get_estimate() == 0);
|
|
68
|
+
REQUIRE(compact_sketch.get_lower_bound(1) == 0);
|
|
69
|
+
REQUIRE(compact_sketch.get_upper_bound(1) == 0);
|
|
70
|
+
REQUIRE(compact_sketch.get_theta() == 1);
|
|
71
|
+
REQUIRE(compact_sketch.get_num_retained() == 0);
|
|
72
|
+
REQUIRE(compact_sketch.is_ordered());
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
TEST_CASE("tuple sketch float: exact mode", "[tuple_sketch]") {
|
|
76
|
+
auto update_sketch = update_tuple_sketch<float>::builder().build();
|
|
77
|
+
update_sketch.update(1, 1);
|
|
78
|
+
update_sketch.update(2, 2);
|
|
79
|
+
update_sketch.update(1, 1);
|
|
80
|
+
// std::cout << update_sketch.to_string(true);
|
|
81
|
+
REQUIRE(!update_sketch.is_empty());
|
|
82
|
+
REQUIRE(!update_sketch.is_estimation_mode());
|
|
83
|
+
REQUIRE(update_sketch.get_estimate() == 2);
|
|
84
|
+
REQUIRE(update_sketch.get_lower_bound(1) == 2);
|
|
85
|
+
REQUIRE(update_sketch.get_upper_bound(1) == 2);
|
|
86
|
+
REQUIRE(update_sketch.get_theta() == 1);
|
|
87
|
+
REQUIRE(update_sketch.get_num_retained() == 2);
|
|
88
|
+
REQUIRE(!update_sketch.is_ordered());
|
|
89
|
+
int count = 0;
|
|
90
|
+
for (const auto& entry: update_sketch) {
|
|
91
|
+
REQUIRE(entry.second == 2);
|
|
92
|
+
++count;
|
|
93
|
+
}
|
|
94
|
+
REQUIRE(count == 2);
|
|
95
|
+
|
|
96
|
+
auto compact_sketch = update_sketch.compact();
|
|
97
|
+
// std::cout << compact_sketch.to_string(true);
|
|
98
|
+
REQUIRE(!compact_sketch.is_empty());
|
|
99
|
+
REQUIRE(!compact_sketch.is_estimation_mode());
|
|
100
|
+
REQUIRE(compact_sketch.get_estimate() == 2);
|
|
101
|
+
REQUIRE(compact_sketch.get_lower_bound(1) == 2);
|
|
102
|
+
REQUIRE(compact_sketch.get_upper_bound(1) == 2);
|
|
103
|
+
REQUIRE(compact_sketch.get_theta() == 1);
|
|
104
|
+
REQUIRE(compact_sketch.get_num_retained() == 2);
|
|
105
|
+
REQUIRE(compact_sketch.is_ordered());
|
|
106
|
+
count = 0;
|
|
107
|
+
for (const auto& entry: compact_sketch) {
|
|
108
|
+
REQUIRE(entry.second == 2);
|
|
109
|
+
++count;
|
|
110
|
+
}
|
|
111
|
+
REQUIRE(count == 2);
|
|
112
|
+
|
|
113
|
+
{ // stream
|
|
114
|
+
std::stringstream s(std::ios::in | std::ios::out | std::ios::binary);
|
|
115
|
+
compact_sketch.serialize(s);
|
|
116
|
+
auto deserialized_sketch = compact_tuple_sketch<float>::deserialize(s);
|
|
117
|
+
REQUIRE(!deserialized_sketch.is_empty());
|
|
118
|
+
REQUIRE(!deserialized_sketch.is_estimation_mode());
|
|
119
|
+
REQUIRE(deserialized_sketch.get_estimate() == 2);
|
|
120
|
+
REQUIRE(deserialized_sketch.get_lower_bound(1) == 2);
|
|
121
|
+
REQUIRE(deserialized_sketch.get_upper_bound(1) == 2);
|
|
122
|
+
REQUIRE(deserialized_sketch.get_theta() == 1);
|
|
123
|
+
REQUIRE(deserialized_sketch.get_num_retained() == 2);
|
|
124
|
+
REQUIRE(deserialized_sketch.is_ordered());
|
|
125
|
+
// std::cout << "deserialized sketch:" << std::endl;
|
|
126
|
+
// std::cout << deserialized_sketch.to_string(true);
|
|
127
|
+
}
|
|
128
|
+
{ // bytes
|
|
129
|
+
auto bytes = compact_sketch.serialize();
|
|
130
|
+
auto deserialized_sketch = compact_tuple_sketch<float>::deserialize(bytes.data(), bytes.size());
|
|
131
|
+
REQUIRE(!deserialized_sketch.is_empty());
|
|
132
|
+
REQUIRE(!deserialized_sketch.is_estimation_mode());
|
|
133
|
+
REQUIRE(deserialized_sketch.get_estimate() == 2);
|
|
134
|
+
REQUIRE(deserialized_sketch.get_lower_bound(1) == 2);
|
|
135
|
+
REQUIRE(deserialized_sketch.get_upper_bound(1) == 2);
|
|
136
|
+
REQUIRE(deserialized_sketch.get_theta() == 1);
|
|
137
|
+
REQUIRE(deserialized_sketch.get_num_retained() == 2);
|
|
138
|
+
REQUIRE(deserialized_sketch.is_ordered());
|
|
139
|
+
// std::cout << deserialized_sketch.to_string(true);
|
|
140
|
+
}
|
|
141
|
+
// mixed
|
|
142
|
+
{
|
|
143
|
+
auto bytes = compact_sketch.serialize();
|
|
144
|
+
std::stringstream s(std::ios::in | std::ios::out | std::ios::binary);
|
|
145
|
+
s.write(reinterpret_cast<const char*>(bytes.data()), bytes.size());
|
|
146
|
+
auto deserialized_sketch = compact_tuple_sketch<float>::deserialize(s);
|
|
147
|
+
auto it = deserialized_sketch.begin();
|
|
148
|
+
for (const auto& entry: compact_sketch) {
|
|
149
|
+
REQUIRE(entry.first == (*it).first);
|
|
150
|
+
REQUIRE(entry.second == (*it).second);
|
|
151
|
+
++it;
|
|
152
|
+
}
|
|
153
|
+
}
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
template<typename T>
|
|
157
|
+
class max_value_policy {
|
|
158
|
+
public:
|
|
159
|
+
max_value_policy(const T& initial_value): initial_value(initial_value) {}
|
|
160
|
+
T create() const { return initial_value; }
|
|
161
|
+
void update(T& summary, const T& update) const { summary = std::max(summary, update); }
|
|
162
|
+
private:
|
|
163
|
+
T initial_value;
|
|
164
|
+
};
|
|
165
|
+
|
|
166
|
+
using max_float_update_tuple_sketch = update_tuple_sketch<float, float, max_value_policy<float>>;
|
|
167
|
+
|
|
168
|
+
TEST_CASE("tuple sketch: float, custom policy", "[tuple_sketch]") {
|
|
169
|
+
auto update_sketch = max_float_update_tuple_sketch::builder(max_value_policy<float>(5)).build();
|
|
170
|
+
update_sketch.update(1, 1);
|
|
171
|
+
update_sketch.update(1, 2);
|
|
172
|
+
update_sketch.update(2, 10);
|
|
173
|
+
update_sketch.update(3, 3);
|
|
174
|
+
update_sketch.update(3, 7);
|
|
175
|
+
// std::cout << update_sketch.to_string(true);
|
|
176
|
+
int count = 0;
|
|
177
|
+
float sum = 0;
|
|
178
|
+
for (const auto& entry: update_sketch) {
|
|
179
|
+
sum += entry.second;
|
|
180
|
+
++count;
|
|
181
|
+
}
|
|
182
|
+
REQUIRE(count == 3);
|
|
183
|
+
REQUIRE(sum == 22); // 5 + 10 + 7
|
|
184
|
+
}
|
|
185
|
+
|
|
186
|
+
struct three_doubles_update_policy {
|
|
187
|
+
std::tuple<double, double, double> create() const {
|
|
188
|
+
return std::tuple<double, double, double>(0, 0, 0);
|
|
189
|
+
}
|
|
190
|
+
void update(std::tuple<double, double, double>& summary, const std::tuple<double, double, double>& update) const {
|
|
191
|
+
std::get<0>(summary) += std::get<0>(update);
|
|
192
|
+
std::get<1>(summary) += std::get<1>(update);
|
|
193
|
+
std::get<2>(summary) += std::get<2>(update);
|
|
194
|
+
}
|
|
195
|
+
};
|
|
196
|
+
|
|
197
|
+
TEST_CASE("tuple sketch: tuple of doubles", "[tuple_sketch]") {
|
|
198
|
+
using three_doubles_update_tuple_sketch = update_tuple_sketch<three_doubles, three_doubles, three_doubles_update_policy>;
|
|
199
|
+
auto update_sketch = three_doubles_update_tuple_sketch::builder().build();
|
|
200
|
+
update_sketch.update(1, three_doubles(1, 2, 3));
|
|
201
|
+
// std::cout << update_sketch.to_string(true);
|
|
202
|
+
const auto& entry = *update_sketch.begin();
|
|
203
|
+
REQUIRE(std::get<0>(entry.second) == 1.0);
|
|
204
|
+
REQUIRE(std::get<1>(entry.second) == 2.0);
|
|
205
|
+
REQUIRE(std::get<2>(entry.second) == 3.0);
|
|
206
|
+
|
|
207
|
+
auto compact_sketch = update_sketch.compact();
|
|
208
|
+
// std::cout << compact_sketch.to_string(true);
|
|
209
|
+
REQUIRE(compact_sketch.get_num_retained() == 1);
|
|
210
|
+
}
|
|
211
|
+
|
|
212
|
+
TEST_CASE("tuple sketch: float, update with different types of keys", "[tuple_sketch]") {
|
|
213
|
+
auto sketch = update_tuple_sketch<float>::builder().build();
|
|
214
|
+
|
|
215
|
+
sketch.update(static_cast<uint64_t>(1), 1);
|
|
216
|
+
REQUIRE(sketch.get_num_retained() == 1);
|
|
217
|
+
|
|
218
|
+
sketch.update(static_cast<int64_t>(1), 1);
|
|
219
|
+
REQUIRE(sketch.get_num_retained() == 1);
|
|
220
|
+
|
|
221
|
+
sketch.update(static_cast<uint32_t>(1), 1);
|
|
222
|
+
REQUIRE(sketch.get_num_retained() == 1);
|
|
223
|
+
|
|
224
|
+
sketch.update(static_cast<int32_t>(1), 1);
|
|
225
|
+
REQUIRE(sketch.get_num_retained() == 1);
|
|
226
|
+
|
|
227
|
+
sketch.update(static_cast<uint16_t>(1), 1);
|
|
228
|
+
REQUIRE(sketch.get_num_retained() == 1);
|
|
229
|
+
|
|
230
|
+
sketch.update(static_cast<int16_t>(1), 1);
|
|
231
|
+
REQUIRE(sketch.get_num_retained() == 1);
|
|
232
|
+
|
|
233
|
+
sketch.update(static_cast<uint8_t>(1), 1);
|
|
234
|
+
REQUIRE(sketch.get_num_retained() == 1);
|
|
235
|
+
|
|
236
|
+
sketch.update(static_cast<int8_t>(1), 1);
|
|
237
|
+
REQUIRE(sketch.get_num_retained() == 1);
|
|
238
|
+
|
|
239
|
+
sketch.update(1.0, 1);
|
|
240
|
+
REQUIRE(sketch.get_num_retained() == 2);
|
|
241
|
+
|
|
242
|
+
sketch.update(static_cast<float>(1), 1);
|
|
243
|
+
REQUIRE(sketch.get_num_retained() == 2);
|
|
244
|
+
|
|
245
|
+
sketch.update("a", 1);
|
|
246
|
+
REQUIRE(sketch.get_num_retained() == 3);
|
|
247
|
+
}
|
|
248
|
+
|
|
249
|
+
} /* namespace datasketches */
|