datasketches 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/CHANGELOG.md +3 -0
- data/LICENSE +310 -0
- data/NOTICE +11 -0
- data/README.md +126 -0
- data/ext/datasketches/cpc_wrapper.cpp +50 -0
- data/ext/datasketches/ext.cpp +12 -0
- data/ext/datasketches/extconf.rb +11 -0
- data/ext/datasketches/hll_wrapper.cpp +69 -0
- data/lib/datasketches.rb +9 -0
- data/lib/datasketches/version.rb +3 -0
- data/vendor/datasketches-cpp/CMakeLists.txt +126 -0
- data/vendor/datasketches-cpp/LICENSE +311 -0
- data/vendor/datasketches-cpp/MANIFEST.in +19 -0
- data/vendor/datasketches-cpp/NOTICE +11 -0
- data/vendor/datasketches-cpp/README.md +42 -0
- data/vendor/datasketches-cpp/common/CMakeLists.txt +45 -0
- data/vendor/datasketches-cpp/common/include/MurmurHash3.h +173 -0
- data/vendor/datasketches-cpp/common/include/binomial_bounds.hpp +458 -0
- data/vendor/datasketches-cpp/common/include/bounds_binomial_proportions.hpp +291 -0
- data/vendor/datasketches-cpp/common/include/ceiling_power_of_2.hpp +41 -0
- data/vendor/datasketches-cpp/common/include/common_defs.hpp +51 -0
- data/vendor/datasketches-cpp/common/include/conditional_back_inserter.hpp +68 -0
- data/vendor/datasketches-cpp/common/include/conditional_forward.hpp +70 -0
- data/vendor/datasketches-cpp/common/include/count_zeros.hpp +114 -0
- data/vendor/datasketches-cpp/common/include/inv_pow2_table.hpp +107 -0
- data/vendor/datasketches-cpp/common/include/memory_operations.hpp +57 -0
- data/vendor/datasketches-cpp/common/include/serde.hpp +196 -0
- data/vendor/datasketches-cpp/common/test/CMakeLists.txt +38 -0
- data/vendor/datasketches-cpp/common/test/catch.hpp +17618 -0
- data/vendor/datasketches-cpp/common/test/catch_runner.cpp +7 -0
- data/vendor/datasketches-cpp/common/test/test_allocator.cpp +31 -0
- data/vendor/datasketches-cpp/common/test/test_allocator.hpp +108 -0
- data/vendor/datasketches-cpp/common/test/test_runner.cpp +29 -0
- data/vendor/datasketches-cpp/common/test/test_type.hpp +137 -0
- data/vendor/datasketches-cpp/cpc/CMakeLists.txt +74 -0
- data/vendor/datasketches-cpp/cpc/include/compression_data.hpp +6022 -0
- data/vendor/datasketches-cpp/cpc/include/cpc_common.hpp +62 -0
- data/vendor/datasketches-cpp/cpc/include/cpc_compressor.hpp +147 -0
- data/vendor/datasketches-cpp/cpc/include/cpc_compressor_impl.hpp +742 -0
- data/vendor/datasketches-cpp/cpc/include/cpc_confidence.hpp +167 -0
- data/vendor/datasketches-cpp/cpc/include/cpc_sketch.hpp +311 -0
- data/vendor/datasketches-cpp/cpc/include/cpc_sketch_impl.hpp +810 -0
- data/vendor/datasketches-cpp/cpc/include/cpc_union.hpp +102 -0
- data/vendor/datasketches-cpp/cpc/include/cpc_union_impl.hpp +346 -0
- data/vendor/datasketches-cpp/cpc/include/cpc_util.hpp +137 -0
- data/vendor/datasketches-cpp/cpc/include/icon_estimator.hpp +274 -0
- data/vendor/datasketches-cpp/cpc/include/kxp_byte_lookup.hpp +81 -0
- data/vendor/datasketches-cpp/cpc/include/u32_table.hpp +84 -0
- data/vendor/datasketches-cpp/cpc/include/u32_table_impl.hpp +266 -0
- data/vendor/datasketches-cpp/cpc/test/CMakeLists.txt +44 -0
- data/vendor/datasketches-cpp/cpc/test/compression_test.cpp +67 -0
- data/vendor/datasketches-cpp/cpc/test/cpc_sketch_test.cpp +381 -0
- data/vendor/datasketches-cpp/cpc/test/cpc_union_test.cpp +149 -0
- data/vendor/datasketches-cpp/fi/CMakeLists.txt +54 -0
- data/vendor/datasketches-cpp/fi/include/frequent_items_sketch.hpp +319 -0
- data/vendor/datasketches-cpp/fi/include/frequent_items_sketch_impl.hpp +484 -0
- data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map.hpp +114 -0
- data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map_impl.hpp +345 -0
- data/vendor/datasketches-cpp/fi/test/CMakeLists.txt +44 -0
- data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_custom_type_test.cpp +84 -0
- data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_test.cpp +360 -0
- data/vendor/datasketches-cpp/fi/test/items_sketch_string_from_java.sk +0 -0
- data/vendor/datasketches-cpp/fi/test/items_sketch_string_utf8_from_java.sk +0 -0
- data/vendor/datasketches-cpp/fi/test/longs_sketch_from_java.sk +0 -0
- data/vendor/datasketches-cpp/fi/test/reverse_purge_hash_map_test.cpp +47 -0
- data/vendor/datasketches-cpp/hll/CMakeLists.txt +92 -0
- data/vendor/datasketches-cpp/hll/include/AuxHashMap-internal.hpp +303 -0
- data/vendor/datasketches-cpp/hll/include/AuxHashMap.hpp +83 -0
- data/vendor/datasketches-cpp/hll/include/CompositeInterpolationXTable-internal.hpp +811 -0
- data/vendor/datasketches-cpp/hll/include/CompositeInterpolationXTable.hpp +40 -0
- data/vendor/datasketches-cpp/hll/include/CouponHashSet-internal.hpp +291 -0
- data/vendor/datasketches-cpp/hll/include/CouponHashSet.hpp +59 -0
- data/vendor/datasketches-cpp/hll/include/CouponList-internal.hpp +417 -0
- data/vendor/datasketches-cpp/hll/include/CouponList.hpp +91 -0
- data/vendor/datasketches-cpp/hll/include/CubicInterpolation-internal.hpp +233 -0
- data/vendor/datasketches-cpp/hll/include/CubicInterpolation.hpp +43 -0
- data/vendor/datasketches-cpp/hll/include/HarmonicNumbers-internal.hpp +90 -0
- data/vendor/datasketches-cpp/hll/include/HarmonicNumbers.hpp +48 -0
- data/vendor/datasketches-cpp/hll/include/Hll4Array-internal.hpp +335 -0
- data/vendor/datasketches-cpp/hll/include/Hll4Array.hpp +69 -0
- data/vendor/datasketches-cpp/hll/include/Hll6Array-internal.hpp +124 -0
- data/vendor/datasketches-cpp/hll/include/Hll6Array.hpp +55 -0
- data/vendor/datasketches-cpp/hll/include/Hll8Array-internal.hpp +158 -0
- data/vendor/datasketches-cpp/hll/include/Hll8Array.hpp +56 -0
- data/vendor/datasketches-cpp/hll/include/HllArray-internal.hpp +706 -0
- data/vendor/datasketches-cpp/hll/include/HllArray.hpp +136 -0
- data/vendor/datasketches-cpp/hll/include/HllSketch-internal.hpp +462 -0
- data/vendor/datasketches-cpp/hll/include/HllSketchImpl-internal.hpp +149 -0
- data/vendor/datasketches-cpp/hll/include/HllSketchImpl.hpp +85 -0
- data/vendor/datasketches-cpp/hll/include/HllSketchImplFactory.hpp +170 -0
- data/vendor/datasketches-cpp/hll/include/HllUnion-internal.hpp +287 -0
- data/vendor/datasketches-cpp/hll/include/HllUtil.hpp +239 -0
- data/vendor/datasketches-cpp/hll/include/RelativeErrorTables-internal.hpp +112 -0
- data/vendor/datasketches-cpp/hll/include/RelativeErrorTables.hpp +46 -0
- data/vendor/datasketches-cpp/hll/include/coupon_iterator-internal.hpp +56 -0
- data/vendor/datasketches-cpp/hll/include/coupon_iterator.hpp +43 -0
- data/vendor/datasketches-cpp/hll/include/hll.hpp +669 -0
- data/vendor/datasketches-cpp/hll/include/hll.private.hpp +32 -0
- data/vendor/datasketches-cpp/hll/test/AuxHashMapTest.cpp +79 -0
- data/vendor/datasketches-cpp/hll/test/CMakeLists.txt +51 -0
- data/vendor/datasketches-cpp/hll/test/CouponHashSetTest.cpp +130 -0
- data/vendor/datasketches-cpp/hll/test/CouponListTest.cpp +181 -0
- data/vendor/datasketches-cpp/hll/test/CrossCountingTest.cpp +93 -0
- data/vendor/datasketches-cpp/hll/test/HllArrayTest.cpp +191 -0
- data/vendor/datasketches-cpp/hll/test/HllSketchTest.cpp +389 -0
- data/vendor/datasketches-cpp/hll/test/HllUnionTest.cpp +313 -0
- data/vendor/datasketches-cpp/hll/test/IsomorphicTest.cpp +141 -0
- data/vendor/datasketches-cpp/hll/test/TablesTest.cpp +44 -0
- data/vendor/datasketches-cpp/hll/test/ToFromByteArrayTest.cpp +168 -0
- data/vendor/datasketches-cpp/hll/test/array6_from_java.sk +0 -0
- data/vendor/datasketches-cpp/hll/test/compact_array4_from_java.sk +0 -0
- data/vendor/datasketches-cpp/hll/test/compact_set_from_java.sk +0 -0
- data/vendor/datasketches-cpp/hll/test/list_from_java.sk +0 -0
- data/vendor/datasketches-cpp/hll/test/updatable_array4_from_java.sk +0 -0
- data/vendor/datasketches-cpp/hll/test/updatable_set_from_java.sk +0 -0
- data/vendor/datasketches-cpp/kll/CMakeLists.txt +58 -0
- data/vendor/datasketches-cpp/kll/include/kll_helper.hpp +150 -0
- data/vendor/datasketches-cpp/kll/include/kll_helper_impl.hpp +319 -0
- data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator.hpp +67 -0
- data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator_impl.hpp +169 -0
- data/vendor/datasketches-cpp/kll/include/kll_sketch.hpp +559 -0
- data/vendor/datasketches-cpp/kll/include/kll_sketch_impl.hpp +1131 -0
- data/vendor/datasketches-cpp/kll/test/CMakeLists.txt +44 -0
- data/vendor/datasketches-cpp/kll/test/kll_sketch_custom_type_test.cpp +154 -0
- data/vendor/datasketches-cpp/kll/test/kll_sketch_float_one_item_v1.sk +0 -0
- data/vendor/datasketches-cpp/kll/test/kll_sketch_from_java.sk +0 -0
- data/vendor/datasketches-cpp/kll/test/kll_sketch_test.cpp +685 -0
- data/vendor/datasketches-cpp/kll/test/kll_sketch_validation.cpp +229 -0
- data/vendor/datasketches-cpp/pyproject.toml +17 -0
- data/vendor/datasketches-cpp/python/CMakeLists.txt +61 -0
- data/vendor/datasketches-cpp/python/README.md +78 -0
- data/vendor/datasketches-cpp/python/jupyter/CPCSketch.ipynb +345 -0
- data/vendor/datasketches-cpp/python/jupyter/FrequentItemsSketch.ipynb +354 -0
- data/vendor/datasketches-cpp/python/jupyter/HLLSketch.ipynb +346 -0
- data/vendor/datasketches-cpp/python/jupyter/KLLSketch.ipynb +463 -0
- data/vendor/datasketches-cpp/python/jupyter/ThetaSketchNotebook.ipynb +396 -0
- data/vendor/datasketches-cpp/python/src/__init__.py +2 -0
- data/vendor/datasketches-cpp/python/src/cpc_wrapper.cpp +90 -0
- data/vendor/datasketches-cpp/python/src/datasketches.cpp +40 -0
- data/vendor/datasketches-cpp/python/src/fi_wrapper.cpp +123 -0
- data/vendor/datasketches-cpp/python/src/hll_wrapper.cpp +136 -0
- data/vendor/datasketches-cpp/python/src/kll_wrapper.cpp +209 -0
- data/vendor/datasketches-cpp/python/src/theta_wrapper.cpp +162 -0
- data/vendor/datasketches-cpp/python/src/vector_of_kll.cpp +488 -0
- data/vendor/datasketches-cpp/python/src/vo_wrapper.cpp +140 -0
- data/vendor/datasketches-cpp/python/tests/__init__.py +0 -0
- data/vendor/datasketches-cpp/python/tests/cpc_test.py +64 -0
- data/vendor/datasketches-cpp/python/tests/fi_test.py +110 -0
- data/vendor/datasketches-cpp/python/tests/hll_test.py +131 -0
- data/vendor/datasketches-cpp/python/tests/kll_test.py +119 -0
- data/vendor/datasketches-cpp/python/tests/theta_test.py +121 -0
- data/vendor/datasketches-cpp/python/tests/vector_of_kll_test.py +148 -0
- data/vendor/datasketches-cpp/python/tests/vo_test.py +101 -0
- data/vendor/datasketches-cpp/sampling/CMakeLists.txt +48 -0
- data/vendor/datasketches-cpp/sampling/include/var_opt_sketch.hpp +392 -0
- data/vendor/datasketches-cpp/sampling/include/var_opt_sketch_impl.hpp +1752 -0
- data/vendor/datasketches-cpp/sampling/include/var_opt_union.hpp +239 -0
- data/vendor/datasketches-cpp/sampling/include/var_opt_union_impl.hpp +645 -0
- data/vendor/datasketches-cpp/sampling/test/CMakeLists.txt +43 -0
- data/vendor/datasketches-cpp/sampling/test/binaries_from_java.txt +67 -0
- data/vendor/datasketches-cpp/sampling/test/var_opt_sketch_test.cpp +509 -0
- data/vendor/datasketches-cpp/sampling/test/var_opt_union_test.cpp +358 -0
- data/vendor/datasketches-cpp/sampling/test/varopt_sketch_long_sampling.sk +0 -0
- data/vendor/datasketches-cpp/sampling/test/varopt_sketch_string_exact.sk +0 -0
- data/vendor/datasketches-cpp/sampling/test/varopt_union_double_sampling.sk +0 -0
- data/vendor/datasketches-cpp/setup.py +94 -0
- data/vendor/datasketches-cpp/theta/CMakeLists.txt +57 -0
- data/vendor/datasketches-cpp/theta/include/theta_a_not_b.hpp +73 -0
- data/vendor/datasketches-cpp/theta/include/theta_a_not_b_impl.hpp +83 -0
- data/vendor/datasketches-cpp/theta/include/theta_intersection.hpp +88 -0
- data/vendor/datasketches-cpp/theta/include/theta_intersection_impl.hpp +130 -0
- data/vendor/datasketches-cpp/theta/include/theta_sketch.hpp +533 -0
- data/vendor/datasketches-cpp/theta/include/theta_sketch_impl.hpp +939 -0
- data/vendor/datasketches-cpp/theta/include/theta_union.hpp +122 -0
- data/vendor/datasketches-cpp/theta/include/theta_union_impl.hpp +109 -0
- data/vendor/datasketches-cpp/theta/test/CMakeLists.txt +45 -0
- data/vendor/datasketches-cpp/theta/test/theta_a_not_b_test.cpp +244 -0
- data/vendor/datasketches-cpp/theta/test/theta_compact_empty_from_java.sk +0 -0
- data/vendor/datasketches-cpp/theta/test/theta_compact_estimation_from_java.sk +0 -0
- data/vendor/datasketches-cpp/theta/test/theta_compact_single_item_from_java.sk +0 -0
- data/vendor/datasketches-cpp/theta/test/theta_intersection_test.cpp +218 -0
- data/vendor/datasketches-cpp/theta/test/theta_sketch_test.cpp +438 -0
- data/vendor/datasketches-cpp/theta/test/theta_union_test.cpp +97 -0
- data/vendor/datasketches-cpp/theta/test/theta_update_empty_from_java.sk +0 -0
- data/vendor/datasketches-cpp/theta/test/theta_update_estimation_from_java.sk +0 -0
- data/vendor/datasketches-cpp/tuple/CMakeLists.txt +104 -0
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_a_not_b.hpp +52 -0
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_a_not_b_impl.hpp +32 -0
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_intersection.hpp +52 -0
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_intersection_impl.hpp +31 -0
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_sketch.hpp +179 -0
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_sketch_impl.hpp +238 -0
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_union.hpp +81 -0
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_union_impl.hpp +43 -0
- data/vendor/datasketches-cpp/tuple/include/bounds_on_ratios_in_sampled_sets.hpp +135 -0
- data/vendor/datasketches-cpp/tuple/include/bounds_on_ratios_in_theta_sketched_sets.hpp +135 -0
- data/vendor/datasketches-cpp/tuple/include/jaccard_similarity.hpp +172 -0
- data/vendor/datasketches-cpp/tuple/include/theta_a_not_b_experimental.hpp +53 -0
- data/vendor/datasketches-cpp/tuple/include/theta_a_not_b_experimental_impl.hpp +33 -0
- data/vendor/datasketches-cpp/tuple/include/theta_comparators.hpp +48 -0
- data/vendor/datasketches-cpp/tuple/include/theta_constants.hpp +34 -0
- data/vendor/datasketches-cpp/tuple/include/theta_helpers.hpp +54 -0
- data/vendor/datasketches-cpp/tuple/include/theta_intersection_base.hpp +59 -0
- data/vendor/datasketches-cpp/tuple/include/theta_intersection_base_impl.hpp +121 -0
- data/vendor/datasketches-cpp/tuple/include/theta_intersection_experimental.hpp +78 -0
- data/vendor/datasketches-cpp/tuple/include/theta_intersection_experimental_impl.hpp +43 -0
- data/vendor/datasketches-cpp/tuple/include/theta_set_difference_base.hpp +54 -0
- data/vendor/datasketches-cpp/tuple/include/theta_set_difference_base_impl.hpp +80 -0
- data/vendor/datasketches-cpp/tuple/include/theta_sketch_experimental.hpp +393 -0
- data/vendor/datasketches-cpp/tuple/include/theta_sketch_experimental_impl.hpp +481 -0
- data/vendor/datasketches-cpp/tuple/include/theta_union_base.hpp +60 -0
- data/vendor/datasketches-cpp/tuple/include/theta_union_base_impl.hpp +84 -0
- data/vendor/datasketches-cpp/tuple/include/theta_union_experimental.hpp +88 -0
- data/vendor/datasketches-cpp/tuple/include/theta_union_experimental_impl.hpp +47 -0
- data/vendor/datasketches-cpp/tuple/include/theta_update_sketch_base.hpp +259 -0
- data/vendor/datasketches-cpp/tuple/include/theta_update_sketch_base_impl.hpp +389 -0
- data/vendor/datasketches-cpp/tuple/include/tuple_a_not_b.hpp +57 -0
- data/vendor/datasketches-cpp/tuple/include/tuple_a_not_b_impl.hpp +33 -0
- data/vendor/datasketches-cpp/tuple/include/tuple_intersection.hpp +104 -0
- data/vendor/datasketches-cpp/tuple/include/tuple_intersection_impl.hpp +43 -0
- data/vendor/datasketches-cpp/tuple/include/tuple_sketch.hpp +496 -0
- data/vendor/datasketches-cpp/tuple/include/tuple_sketch_impl.hpp +587 -0
- data/vendor/datasketches-cpp/tuple/include/tuple_union.hpp +109 -0
- data/vendor/datasketches-cpp/tuple/include/tuple_union_impl.hpp +47 -0
- data/vendor/datasketches-cpp/tuple/test/CMakeLists.txt +53 -0
- data/vendor/datasketches-cpp/tuple/test/aod_1_compact_empty_from_java.sk +1 -0
- data/vendor/datasketches-cpp/tuple/test/aod_1_compact_estimation_from_java.sk +0 -0
- data/vendor/datasketches-cpp/tuple/test/aod_1_compact_non_empty_no_entries_from_java.sk +0 -0
- data/vendor/datasketches-cpp/tuple/test/aod_2_compact_exact_from_java.sk +0 -0
- data/vendor/datasketches-cpp/tuple/test/aod_3_compact_empty_from_java.sk +1 -0
- data/vendor/datasketches-cpp/tuple/test/array_of_doubles_sketch_test.cpp +298 -0
- data/vendor/datasketches-cpp/tuple/test/theta_a_not_b_experimental_test.cpp +250 -0
- data/vendor/datasketches-cpp/tuple/test/theta_compact_empty_from_java.sk +0 -0
- data/vendor/datasketches-cpp/tuple/test/theta_compact_estimation_from_java.sk +0 -0
- data/vendor/datasketches-cpp/tuple/test/theta_compact_single_item_from_java.sk +0 -0
- data/vendor/datasketches-cpp/tuple/test/theta_intersection_experimental_test.cpp +224 -0
- data/vendor/datasketches-cpp/tuple/test/theta_jaccard_similarity_test.cpp +144 -0
- data/vendor/datasketches-cpp/tuple/test/theta_sketch_experimental_test.cpp +247 -0
- data/vendor/datasketches-cpp/tuple/test/theta_union_experimental_test.cpp +44 -0
- data/vendor/datasketches-cpp/tuple/test/tuple_a_not_b_test.cpp +289 -0
- data/vendor/datasketches-cpp/tuple/test/tuple_intersection_test.cpp +235 -0
- data/vendor/datasketches-cpp/tuple/test/tuple_jaccard_similarity_test.cpp +98 -0
- data/vendor/datasketches-cpp/tuple/test/tuple_sketch_allocation_test.cpp +102 -0
- data/vendor/datasketches-cpp/tuple/test/tuple_sketch_test.cpp +249 -0
- data/vendor/datasketches-cpp/tuple/test/tuple_union_test.cpp +187 -0
- metadata +302 -0
|
@@ -0,0 +1,109 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* Licensed to the Apache Software Foundation (ASF) under one
|
|
3
|
+
* or more contributor license agreements. See the NOTICE file
|
|
4
|
+
* distributed with this work for additional information
|
|
5
|
+
* regarding copyright ownership. The ASF licenses this file
|
|
6
|
+
* to you under the Apache License, Version 2.0 (the
|
|
7
|
+
* "License"); you may not use this file except in compliance
|
|
8
|
+
* with the License. You may obtain a copy of the License at
|
|
9
|
+
*
|
|
10
|
+
* http://www.apache.org/licenses/LICENSE-2.0
|
|
11
|
+
*
|
|
12
|
+
* Unless required by applicable law or agreed to in writing,
|
|
13
|
+
* software distributed under the License is distributed on an
|
|
14
|
+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
15
|
+
* KIND, either express or implied. See the License for the
|
|
16
|
+
* specific language governing permissions and limitations
|
|
17
|
+
* under the License.
|
|
18
|
+
*/
|
|
19
|
+
|
|
20
|
+
#ifndef TUPLE_UNION_HPP_
|
|
21
|
+
#define TUPLE_UNION_HPP_
|
|
22
|
+
|
|
23
|
+
#include "tuple_sketch.hpp"
|
|
24
|
+
#include "theta_union_base.hpp"
|
|
25
|
+
|
|
26
|
+
namespace datasketches {
|
|
27
|
+
|
|
28
|
+
// for types with defined + operation
|
|
29
|
+
template<typename Summary>
|
|
30
|
+
struct default_union_policy {
|
|
31
|
+
void operator()(Summary& summary, const Summary& other) const {
|
|
32
|
+
summary += other;
|
|
33
|
+
}
|
|
34
|
+
};
|
|
35
|
+
|
|
36
|
+
template<
|
|
37
|
+
typename Summary,
|
|
38
|
+
typename Policy = default_union_policy<Summary>,
|
|
39
|
+
typename Allocator = std::allocator<Summary>
|
|
40
|
+
>
|
|
41
|
+
class tuple_union {
|
|
42
|
+
public:
|
|
43
|
+
using Entry = std::pair<uint64_t, Summary>;
|
|
44
|
+
using ExtractKey = pair_extract_key<uint64_t, Summary>;
|
|
45
|
+
using Sketch = tuple_sketch<Summary, Allocator>;
|
|
46
|
+
using CompactSketch = compact_tuple_sketch<Summary, Allocator>;
|
|
47
|
+
using AllocEntry = typename std::allocator_traits<Allocator>::template rebind_alloc<Entry>;
|
|
48
|
+
using resize_factor = theta_constants::resize_factor;
|
|
49
|
+
|
|
50
|
+
// reformulate the external policy that operates on Summary
|
|
51
|
+
// in terms of operations on Entry
|
|
52
|
+
struct internal_policy {
|
|
53
|
+
internal_policy(const Policy& policy): policy_(policy) {}
|
|
54
|
+
void operator()(Entry& internal_entry, const Entry& incoming_entry) const {
|
|
55
|
+
policy_(internal_entry.second, incoming_entry.second);
|
|
56
|
+
}
|
|
57
|
+
void operator()(Entry& internal_entry, Entry&& incoming_entry) const {
|
|
58
|
+
policy_(internal_entry.second, std::move(incoming_entry.second));
|
|
59
|
+
}
|
|
60
|
+
const Policy& get_policy() const { return policy_; }
|
|
61
|
+
Policy policy_;
|
|
62
|
+
};
|
|
63
|
+
|
|
64
|
+
using State = theta_union_base<Entry, ExtractKey, internal_policy, Sketch, CompactSketch, AllocEntry>;
|
|
65
|
+
|
|
66
|
+
// No constructor here. Use builder instead.
|
|
67
|
+
class builder;
|
|
68
|
+
|
|
69
|
+
/**
|
|
70
|
+
* This method is to update the union with a given sketch
|
|
71
|
+
* @param sketch to update the union with
|
|
72
|
+
*/
|
|
73
|
+
template<typename FwdSketch>
|
|
74
|
+
void update(FwdSketch&& sketch);
|
|
75
|
+
|
|
76
|
+
/**
|
|
77
|
+
* This method produces a copy of the current state of the union as a compact sketch.
|
|
78
|
+
* @param ordered optional flag to specify if ordered sketch should be produced
|
|
79
|
+
* @return the result of the union
|
|
80
|
+
*/
|
|
81
|
+
CompactSketch get_result(bool ordered = true) const;
|
|
82
|
+
|
|
83
|
+
protected:
|
|
84
|
+
State state_;
|
|
85
|
+
|
|
86
|
+
// for builder
|
|
87
|
+
tuple_union(uint8_t lg_cur_size, uint8_t lg_nom_size, resize_factor rf, uint64_t theta, uint64_t seed, const Policy& policy, const Allocator& allocator);
|
|
88
|
+
};
|
|
89
|
+
|
|
90
|
+
template<typename S, typename P, typename A>
|
|
91
|
+
class tuple_union<S, P, A>::builder: public tuple_base_builder<builder, P, A> {
|
|
92
|
+
public:
|
|
93
|
+
/**
|
|
94
|
+
* Creates and instance of the builder with default parameters.
|
|
95
|
+
*/
|
|
96
|
+
builder(const P& policy = P(), const A& allocator = A());
|
|
97
|
+
|
|
98
|
+
/**
|
|
99
|
+
* This is to create an instance of the union with predefined parameters.
|
|
100
|
+
* @return an instance of the union
|
|
101
|
+
*/
|
|
102
|
+
tuple_union build() const;
|
|
103
|
+
};
|
|
104
|
+
|
|
105
|
+
} /* namespace datasketches */
|
|
106
|
+
|
|
107
|
+
#include "tuple_union_impl.hpp"
|
|
108
|
+
|
|
109
|
+
#endif
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* Licensed to the Apache Software Foundation (ASF) under one
|
|
3
|
+
* or more contributor license agreements. See the NOTICE file
|
|
4
|
+
* distributed with this work for additional information
|
|
5
|
+
* regarding copyright ownership. The ASF licenses this file
|
|
6
|
+
* to you under the Apache License, Version 2.0 (the
|
|
7
|
+
* "License"); you may not use this file except in compliance
|
|
8
|
+
* with the License. You may obtain a copy of the License at
|
|
9
|
+
*
|
|
10
|
+
* http://www.apache.org/licenses/LICENSE-2.0
|
|
11
|
+
*
|
|
12
|
+
* Unless required by applicable law or agreed to in writing,
|
|
13
|
+
* software distributed under the License is distributed on an
|
|
14
|
+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
15
|
+
* KIND, either express or implied. See the License for the
|
|
16
|
+
* specific language governing permissions and limitations
|
|
17
|
+
* under the License.
|
|
18
|
+
*/
|
|
19
|
+
|
|
20
|
+
namespace datasketches {
|
|
21
|
+
|
|
22
|
+
template<typename S, typename P, typename A>
|
|
23
|
+
tuple_union<S, P, A>::tuple_union(uint8_t lg_cur_size, uint8_t lg_nom_size, resize_factor rf, uint64_t theta, uint64_t seed, const P& policy, const A& allocator):
|
|
24
|
+
state_(lg_cur_size, lg_nom_size, rf, theta, seed, internal_policy(policy), allocator)
|
|
25
|
+
{}
|
|
26
|
+
|
|
27
|
+
template<typename S, typename P, typename A>
|
|
28
|
+
template<typename SS>
|
|
29
|
+
void tuple_union<S, P, A>::update(SS&& sketch) {
|
|
30
|
+
state_.update(std::forward<SS>(sketch));
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
template<typename S, typename P, typename A>
|
|
34
|
+
auto tuple_union<S, P, A>::get_result(bool ordered) const -> CompactSketch {
|
|
35
|
+
return state_.get_result(ordered);
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
template<typename S, typename P, typename A>
|
|
39
|
+
tuple_union<S, P, A>::builder::builder(const P& policy, const A& allocator):
|
|
40
|
+
tuple_base_builder<builder, P, A>(policy, allocator) {}
|
|
41
|
+
|
|
42
|
+
template<typename S, typename P, typename A>
|
|
43
|
+
auto tuple_union<S, P, A>::builder::build() const -> tuple_union {
|
|
44
|
+
return tuple_union(this->starting_lg_size(), this->lg_k_, this->rf_, this->starting_theta(), this->seed_, this->policy_, this->allocator_);
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
} /* namespace datasketches */
|
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
# Licensed to the Apache Software Foundation (ASF) under one
|
|
2
|
+
# or more contributor license agreements. See the NOTICE file
|
|
3
|
+
# distributed with this work for additional information
|
|
4
|
+
# regarding copyright ownership. The ASF licenses this file
|
|
5
|
+
# to you under the Apache License, Version 2.0 (the
|
|
6
|
+
# "License"); you may not use this file except in compliance
|
|
7
|
+
# with the License. You may obtain a copy of the License at
|
|
8
|
+
#
|
|
9
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
10
|
+
#
|
|
11
|
+
# Unless required by applicable law or agreed to in writing,
|
|
12
|
+
# software distributed under the License is distributed on an
|
|
13
|
+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
14
|
+
# KIND, either express or implied. See the License for the
|
|
15
|
+
# specific language governing permissions and limitations
|
|
16
|
+
# under the License.
|
|
17
|
+
|
|
18
|
+
add_executable(tuple_test)
|
|
19
|
+
|
|
20
|
+
target_link_libraries(tuple_test tuple common_test)
|
|
21
|
+
|
|
22
|
+
set_target_properties(tuple_test PROPERTIES
|
|
23
|
+
CXX_STANDARD 11
|
|
24
|
+
CXX_STANDARD_REQUIRED YES
|
|
25
|
+
)
|
|
26
|
+
|
|
27
|
+
file(TO_CMAKE_PATH "${CMAKE_CURRENT_SOURCE_DIR}" THETA_TEST_BINARY_PATH)
|
|
28
|
+
string(APPEND THETA_TEST_BINARY_PATH "/")
|
|
29
|
+
target_compile_definitions(tuple_test
|
|
30
|
+
PRIVATE
|
|
31
|
+
TEST_BINARY_INPUT_PATH="${THETA_TEST_BINARY_PATH}"
|
|
32
|
+
)
|
|
33
|
+
|
|
34
|
+
add_test(
|
|
35
|
+
NAME tuple_test
|
|
36
|
+
COMMAND tuple_test
|
|
37
|
+
)
|
|
38
|
+
|
|
39
|
+
target_sources(tuple_test
|
|
40
|
+
PRIVATE
|
|
41
|
+
tuple_sketch_test.cpp
|
|
42
|
+
tuple_sketch_allocation_test.cpp
|
|
43
|
+
tuple_union_test.cpp
|
|
44
|
+
tuple_intersection_test.cpp
|
|
45
|
+
tuple_a_not_b_test.cpp
|
|
46
|
+
array_of_doubles_sketch_test.cpp
|
|
47
|
+
theta_sketch_experimental_test.cpp
|
|
48
|
+
theta_union_experimental_test.cpp
|
|
49
|
+
theta_intersection_experimental_test.cpp
|
|
50
|
+
theta_a_not_b_experimental_test.cpp
|
|
51
|
+
theta_jaccard_similarity_test.cpp
|
|
52
|
+
tuple_jaccard_similarity_test.cpp
|
|
53
|
+
)
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
̓�������
|
|
Binary file
|
|
Binary file
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
̓�������
|
|
@@ -0,0 +1,298 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* Licensed to the Apache Software Foundation (ASF) under one
|
|
3
|
+
* or more contributor license agreements. See the NOTICE file
|
|
4
|
+
* distributed with this work for additional information
|
|
5
|
+
* regarding copyright ownership. The ASF licenses this file
|
|
6
|
+
* to you under the Apache License, Version 2.0 (the
|
|
7
|
+
* "License"); you may not use this file except in compliance
|
|
8
|
+
* with the License. You may obtain a copy of the License at
|
|
9
|
+
*
|
|
10
|
+
* http://www.apache.org/licenses/LICENSE-2.0
|
|
11
|
+
*
|
|
12
|
+
* Unless required by applicable law or agreed to in writing,
|
|
13
|
+
* software distributed under the License is distributed on an
|
|
14
|
+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
15
|
+
* KIND, either express or implied. See the License for the
|
|
16
|
+
* specific language governing permissions and limitations
|
|
17
|
+
* under the License.
|
|
18
|
+
*/
|
|
19
|
+
|
|
20
|
+
#include <iostream>
|
|
21
|
+
#include <fstream>
|
|
22
|
+
#include <sstream>
|
|
23
|
+
#include <array>
|
|
24
|
+
|
|
25
|
+
#include <catch.hpp>
|
|
26
|
+
#include <array_of_doubles_sketch.hpp>
|
|
27
|
+
#include <array_of_doubles_union.hpp>
|
|
28
|
+
#include <array_of_doubles_intersection.hpp>
|
|
29
|
+
#include <array_of_doubles_a_not_b.hpp>
|
|
30
|
+
|
|
31
|
+
namespace datasketches {
|
|
32
|
+
|
|
33
|
+
#ifdef TEST_BINARY_INPUT_PATH
|
|
34
|
+
const std::string inputPath = TEST_BINARY_INPUT_PATH;
|
|
35
|
+
#else
|
|
36
|
+
const std::string inputPath = "test/";
|
|
37
|
+
#endif
|
|
38
|
+
|
|
39
|
+
TEST_CASE("aod sketch: serialization compatibility with java - empty", "[tuple_sketch]") {
|
|
40
|
+
auto update_sketch = update_array_of_doubles_sketch::builder().build();
|
|
41
|
+
REQUIRE(update_sketch.is_empty());
|
|
42
|
+
REQUIRE(update_sketch.get_num_retained() == 0);
|
|
43
|
+
auto compact_sketch = update_sketch.compact();
|
|
44
|
+
|
|
45
|
+
// read binary sketch from Java
|
|
46
|
+
std::ifstream is;
|
|
47
|
+
is.exceptions(std::ios::failbit | std::ios::badbit);
|
|
48
|
+
is.open(inputPath + "aod_1_compact_empty_from_java.sk", std::ios::binary);
|
|
49
|
+
auto compact_sketch_from_java = compact_array_of_doubles_sketch::deserialize(is);
|
|
50
|
+
REQUIRE(compact_sketch.get_num_retained() == compact_sketch_from_java.get_num_retained());
|
|
51
|
+
REQUIRE(compact_sketch.get_theta() == Approx(compact_sketch_from_java.get_theta()).margin(1e-10));
|
|
52
|
+
REQUIRE(compact_sketch.get_estimate() == Approx(compact_sketch_from_java.get_estimate()).margin(1e-10));
|
|
53
|
+
REQUIRE(compact_sketch.get_lower_bound(1) == Approx(compact_sketch_from_java.get_lower_bound(1)).margin(1e-10));
|
|
54
|
+
REQUIRE(compact_sketch.get_upper_bound(1) == Approx(compact_sketch_from_java.get_upper_bound(1)).margin(1e-10));
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
TEST_CASE("aod sketch: serialization compatibility with java - empty configured for three values", "[tuple_sketch]") {
|
|
58
|
+
auto update_sketch = update_array_of_doubles_sketch::builder(3).build();
|
|
59
|
+
REQUIRE(update_sketch.is_empty());
|
|
60
|
+
REQUIRE(update_sketch.get_num_retained() == 0);
|
|
61
|
+
REQUIRE(update_sketch.get_num_values() == 3);
|
|
62
|
+
auto compact_sketch = update_sketch.compact();
|
|
63
|
+
|
|
64
|
+
// read binary sketch from Java
|
|
65
|
+
std::ifstream is;
|
|
66
|
+
is.exceptions(std::ios::failbit | std::ios::badbit);
|
|
67
|
+
is.open(inputPath + "aod_3_compact_empty_from_java.sk", std::ios::binary);
|
|
68
|
+
auto compact_sketch_from_java = compact_array_of_doubles_sketch::deserialize(is);
|
|
69
|
+
REQUIRE(compact_sketch.get_num_values() == compact_sketch_from_java.get_num_values());
|
|
70
|
+
REQUIRE(compact_sketch.get_num_retained() == compact_sketch_from_java.get_num_retained());
|
|
71
|
+
REQUIRE(compact_sketch.get_theta() == Approx(compact_sketch_from_java.get_theta()).margin(1e-10));
|
|
72
|
+
REQUIRE(compact_sketch.get_estimate() == Approx(compact_sketch_from_java.get_estimate()).margin(1e-10));
|
|
73
|
+
REQUIRE(compact_sketch.get_lower_bound(1) == Approx(compact_sketch_from_java.get_lower_bound(1)).margin(1e-10));
|
|
74
|
+
REQUIRE(compact_sketch.get_upper_bound(1) == Approx(compact_sketch_from_java.get_upper_bound(1)).margin(1e-10));
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
TEST_CASE("aod sketch: serialization compatibility with java - non-empty no entries", "[tuple_sketch]") {
|
|
78
|
+
auto update_sketch = update_array_of_doubles_sketch::builder().set_p(0.01).build();
|
|
79
|
+
std::vector<double> a = {1};
|
|
80
|
+
update_sketch.update(1, a);
|
|
81
|
+
REQUIRE_FALSE(update_sketch.is_empty());
|
|
82
|
+
REQUIRE(update_sketch.get_num_retained() == 0);
|
|
83
|
+
auto compact_sketch = update_sketch.compact();
|
|
84
|
+
|
|
85
|
+
// read binary sketch from Java
|
|
86
|
+
std::ifstream is;
|
|
87
|
+
is.exceptions(std::ios::failbit | std::ios::badbit);
|
|
88
|
+
is.open(inputPath + "aod_1_compact_non_empty_no_entries_from_java.sk", std::ios::binary);
|
|
89
|
+
auto compact_sketch_from_java = compact_array_of_doubles_sketch::deserialize(is);
|
|
90
|
+
REQUIRE(compact_sketch.get_num_retained() == compact_sketch_from_java.get_num_retained());
|
|
91
|
+
REQUIRE(compact_sketch.get_theta() == Approx(compact_sketch_from_java.get_theta()).margin(1e-10));
|
|
92
|
+
REQUIRE(compact_sketch.get_estimate() == Approx(compact_sketch_from_java.get_estimate()).margin(1e-10));
|
|
93
|
+
REQUIRE(compact_sketch.get_lower_bound(1) == Approx(compact_sketch_from_java.get_lower_bound(1)).margin(1e-10));
|
|
94
|
+
REQUIRE(compact_sketch.get_upper_bound(1) == Approx(compact_sketch_from_java.get_upper_bound(1)).margin(1e-10));
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
TEST_CASE("aod sketch: serialization compatibility with java - estimation mode", "[tuple_sketch]") {
|
|
98
|
+
auto update_sketch = update_array_of_doubles_sketch::builder().build();
|
|
99
|
+
std::vector<double> a = {1};
|
|
100
|
+
for (int i = 0; i < 8192; ++i) update_sketch.update(i, a);
|
|
101
|
+
auto compact_sketch = update_sketch.compact();
|
|
102
|
+
|
|
103
|
+
// read binary sketch from Java
|
|
104
|
+
std::ifstream is;
|
|
105
|
+
is.exceptions(std::ios::failbit | std::ios::badbit);
|
|
106
|
+
is.open(inputPath + "aod_1_compact_estimation_from_java.sk", std::ios::binary);
|
|
107
|
+
auto compact_sketch_from_java = compact_array_of_doubles_sketch::deserialize(is);
|
|
108
|
+
REQUIRE(compact_sketch.get_num_retained() == compact_sketch_from_java.get_num_retained());
|
|
109
|
+
REQUIRE(compact_sketch.get_theta() == Approx(compact_sketch_from_java.get_theta()).margin(1e-10));
|
|
110
|
+
REQUIRE(compact_sketch.get_estimate() == Approx(compact_sketch_from_java.get_estimate()).margin(1e-10));
|
|
111
|
+
REQUIRE(compact_sketch.get_lower_bound(1) == Approx(compact_sketch_from_java.get_lower_bound(1)).margin(1e-10));
|
|
112
|
+
REQUIRE(compact_sketch.get_upper_bound(1) == Approx(compact_sketch_from_java.get_upper_bound(1)).margin(1e-10));
|
|
113
|
+
REQUIRE(compact_sketch.get_lower_bound(2) == Approx(compact_sketch_from_java.get_lower_bound(2)).margin(1e-10));
|
|
114
|
+
REQUIRE(compact_sketch.get_upper_bound(2) == Approx(compact_sketch_from_java.get_upper_bound(2)).margin(1e-10));
|
|
115
|
+
REQUIRE(compact_sketch.get_lower_bound(3) == Approx(compact_sketch_from_java.get_lower_bound(3)).margin(1e-10));
|
|
116
|
+
REQUIRE(compact_sketch.get_upper_bound(3) == Approx(compact_sketch_from_java.get_upper_bound(3)).margin(1e-10));
|
|
117
|
+
|
|
118
|
+
// sketch from Java is not ordered
|
|
119
|
+
// transform it to ordered so that iteration sequence would match exactly
|
|
120
|
+
compact_array_of_doubles_sketch ordered_sketch_from_java(compact_sketch_from_java, true);
|
|
121
|
+
auto it = ordered_sketch_from_java.begin();
|
|
122
|
+
for (const auto& entry: compact_sketch) {
|
|
123
|
+
REQUIRE(entry == *it);
|
|
124
|
+
++it;
|
|
125
|
+
}
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
TEST_CASE("aod sketch: serialization compatibility with java - exact mode with two values", "[tuple_sketch]") {
|
|
129
|
+
auto update_sketch = update_array_of_doubles_sketch::builder(2).build();
|
|
130
|
+
std::vector<double> a = {1, 2};
|
|
131
|
+
for (int i = 0; i < 1000; ++i) update_sketch.update(i, a.data()); // pass vector as pointer
|
|
132
|
+
auto compact_sketch = update_sketch.compact();
|
|
133
|
+
REQUIRE_FALSE(compact_sketch.is_estimation_mode());
|
|
134
|
+
|
|
135
|
+
// read binary sketch from Java
|
|
136
|
+
std::ifstream is;
|
|
137
|
+
is.exceptions(std::ios::failbit | std::ios::badbit);
|
|
138
|
+
is.open(inputPath + "aod_2_compact_exact_from_java.sk", std::ios::binary);
|
|
139
|
+
auto compact_sketch_from_java = compact_array_of_doubles_sketch::deserialize(is);
|
|
140
|
+
REQUIRE(compact_sketch.get_num_retained() == compact_sketch_from_java.get_num_retained());
|
|
141
|
+
REQUIRE(compact_sketch.get_theta() == Approx(compact_sketch_from_java.get_theta()).margin(1e-10));
|
|
142
|
+
REQUIRE(compact_sketch.get_estimate() == Approx(compact_sketch_from_java.get_estimate()).margin(1e-10));
|
|
143
|
+
REQUIRE(compact_sketch.get_lower_bound(1) == Approx(compact_sketch_from_java.get_lower_bound(1)).margin(1e-10));
|
|
144
|
+
REQUIRE(compact_sketch.get_upper_bound(1) == Approx(compact_sketch_from_java.get_upper_bound(1)).margin(1e-10));
|
|
145
|
+
REQUIRE(compact_sketch.get_lower_bound(2) == Approx(compact_sketch_from_java.get_lower_bound(2)).margin(1e-10));
|
|
146
|
+
REQUIRE(compact_sketch.get_upper_bound(2) == Approx(compact_sketch_from_java.get_upper_bound(2)).margin(1e-10));
|
|
147
|
+
REQUIRE(compact_sketch.get_lower_bound(3) == Approx(compact_sketch_from_java.get_lower_bound(3)).margin(1e-10));
|
|
148
|
+
REQUIRE(compact_sketch.get_upper_bound(3) == Approx(compact_sketch_from_java.get_upper_bound(3)).margin(1e-10));
|
|
149
|
+
|
|
150
|
+
// sketch from Java is not ordered
|
|
151
|
+
// transform it to ordered so that iteration sequence would match exactly
|
|
152
|
+
compact_array_of_doubles_sketch ordered_sketch_from_java(compact_sketch_from_java, true);
|
|
153
|
+
auto it = ordered_sketch_from_java.begin();
|
|
154
|
+
for (const auto& entry: compact_sketch) {
|
|
155
|
+
REQUIRE(entry.first == (*it).first);
|
|
156
|
+
REQUIRE(entry.second.size() == 2);
|
|
157
|
+
REQUIRE(entry.second[0] == (*it).second[0]);
|
|
158
|
+
REQUIRE(entry.second[1] == (*it).second[1]);
|
|
159
|
+
++it;
|
|
160
|
+
}
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
TEST_CASE("aod sketch: stream serialize deserialize - estimation mode", "[tuple_sketch]") {
|
|
164
|
+
auto update_sketch = update_array_of_doubles_sketch::builder(2).build();
|
|
165
|
+
std::vector<double> a = {1, 2};
|
|
166
|
+
for (int i = 0; i < 8192; ++i) update_sketch.update(i, a);
|
|
167
|
+
auto compact_sketch = update_sketch.compact();
|
|
168
|
+
|
|
169
|
+
std::stringstream ss;
|
|
170
|
+
ss.exceptions(std::ios::failbit | std::ios::badbit);
|
|
171
|
+
compact_sketch.serialize(ss);
|
|
172
|
+
auto deserialized_sketch = compact_array_of_doubles_sketch::deserialize(ss);
|
|
173
|
+
REQUIRE(compact_sketch.get_num_retained() == deserialized_sketch.get_num_retained());
|
|
174
|
+
REQUIRE(compact_sketch.get_theta() == Approx(deserialized_sketch.get_theta()).margin(1e-10));
|
|
175
|
+
REQUIRE(compact_sketch.get_estimate() == Approx(deserialized_sketch.get_estimate()).margin(1e-10));
|
|
176
|
+
REQUIRE(compact_sketch.get_lower_bound(1) == Approx(deserialized_sketch.get_lower_bound(1)).margin(1e-10));
|
|
177
|
+
REQUIRE(compact_sketch.get_upper_bound(1) == Approx(deserialized_sketch.get_upper_bound(1)).margin(1e-10));
|
|
178
|
+
REQUIRE(compact_sketch.get_lower_bound(2) == Approx(deserialized_sketch.get_lower_bound(2)).margin(1e-10));
|
|
179
|
+
REQUIRE(compact_sketch.get_upper_bound(2) == Approx(deserialized_sketch.get_upper_bound(2)).margin(1e-10));
|
|
180
|
+
REQUIRE(compact_sketch.get_lower_bound(3) == Approx(deserialized_sketch.get_lower_bound(3)).margin(1e-10));
|
|
181
|
+
REQUIRE(compact_sketch.get_upper_bound(3) == Approx(deserialized_sketch.get_upper_bound(3)).margin(1e-10));
|
|
182
|
+
// sketches must be ordered and the iteration sequence must match exactly
|
|
183
|
+
auto it = deserialized_sketch.begin();
|
|
184
|
+
for (const auto& entry: compact_sketch) {
|
|
185
|
+
REQUIRE(entry.first == (*it).first);
|
|
186
|
+
REQUIRE(entry.second.size() == 2);
|
|
187
|
+
REQUIRE(entry.second[0] == (*it).second[0]);
|
|
188
|
+
REQUIRE(entry.second[1] == (*it).second[1]);
|
|
189
|
+
++it;
|
|
190
|
+
}
|
|
191
|
+
}
|
|
192
|
+
|
|
193
|
+
TEST_CASE("aod sketch: bytes to stream serialize deserialize - estimation mode", "[tuple_sketch]") {
|
|
194
|
+
auto update_sketch = update_array_of_doubles_sketch::builder(2).build();
|
|
195
|
+
std::vector<double> a = {1, 2};
|
|
196
|
+
for (int i = 0; i < 8192; ++i) update_sketch.update(i, a);
|
|
197
|
+
auto compact_sketch = update_sketch.compact();
|
|
198
|
+
|
|
199
|
+
auto bytes = compact_sketch.serialize();
|
|
200
|
+
std::stringstream ss;
|
|
201
|
+
ss.exceptions(std::ios::failbit | std::ios::badbit);
|
|
202
|
+
ss.write(reinterpret_cast<const char*>(bytes.data()), bytes.size());
|
|
203
|
+
auto deserialized_sketch = compact_array_of_doubles_sketch::deserialize(ss);
|
|
204
|
+
REQUIRE(compact_sketch.get_num_retained() == deserialized_sketch.get_num_retained());
|
|
205
|
+
REQUIRE(compact_sketch.get_theta() == Approx(deserialized_sketch.get_theta()).margin(1e-10));
|
|
206
|
+
REQUIRE(compact_sketch.get_estimate() == Approx(deserialized_sketch.get_estimate()).margin(1e-10));
|
|
207
|
+
REQUIRE(compact_sketch.get_lower_bound(1) == Approx(deserialized_sketch.get_lower_bound(1)).margin(1e-10));
|
|
208
|
+
REQUIRE(compact_sketch.get_upper_bound(1) == Approx(deserialized_sketch.get_upper_bound(1)).margin(1e-10));
|
|
209
|
+
REQUIRE(compact_sketch.get_lower_bound(2) == Approx(deserialized_sketch.get_lower_bound(2)).margin(1e-10));
|
|
210
|
+
REQUIRE(compact_sketch.get_upper_bound(2) == Approx(deserialized_sketch.get_upper_bound(2)).margin(1e-10));
|
|
211
|
+
REQUIRE(compact_sketch.get_lower_bound(3) == Approx(deserialized_sketch.get_lower_bound(3)).margin(1e-10));
|
|
212
|
+
REQUIRE(compact_sketch.get_upper_bound(3) == Approx(deserialized_sketch.get_upper_bound(3)).margin(1e-10));
|
|
213
|
+
// sketches must be ordered and the iteration sequence must match exactly
|
|
214
|
+
auto it = deserialized_sketch.begin();
|
|
215
|
+
for (const auto& entry: compact_sketch) {
|
|
216
|
+
REQUIRE(entry.first == (*it).first);
|
|
217
|
+
REQUIRE(entry.second.size() == 2);
|
|
218
|
+
REQUIRE(entry.second[0] == (*it).second[0]);
|
|
219
|
+
REQUIRE(entry.second[1] == (*it).second[1]);
|
|
220
|
+
++it;
|
|
221
|
+
}
|
|
222
|
+
}
|
|
223
|
+
|
|
224
|
+
TEST_CASE("aod sketch: bytes serialize deserialize - estimation mode", "[tuple_sketch]") {
|
|
225
|
+
auto update_sketch = update_array_of_doubles_sketch::builder(2).build();
|
|
226
|
+
std::vector<double> a = {1, 2};
|
|
227
|
+
for (int i = 0; i < 8192; ++i) update_sketch.update(i, a);
|
|
228
|
+
auto compact_sketch = update_sketch.compact();
|
|
229
|
+
|
|
230
|
+
auto bytes = compact_sketch.serialize();
|
|
231
|
+
auto deserialized_sketch = compact_array_of_doubles_sketch::deserialize(bytes.data(), bytes.size());
|
|
232
|
+
REQUIRE(compact_sketch.get_num_retained() == deserialized_sketch.get_num_retained());
|
|
233
|
+
REQUIRE(compact_sketch.get_theta() == Approx(deserialized_sketch.get_theta()).margin(1e-10));
|
|
234
|
+
REQUIRE(compact_sketch.get_estimate() == Approx(deserialized_sketch.get_estimate()).margin(1e-10));
|
|
235
|
+
REQUIRE(compact_sketch.get_lower_bound(1) == Approx(deserialized_sketch.get_lower_bound(1)).margin(1e-10));
|
|
236
|
+
REQUIRE(compact_sketch.get_upper_bound(1) == Approx(deserialized_sketch.get_upper_bound(1)).margin(1e-10));
|
|
237
|
+
REQUIRE(compact_sketch.get_lower_bound(2) == Approx(deserialized_sketch.get_lower_bound(2)).margin(1e-10));
|
|
238
|
+
REQUIRE(compact_sketch.get_upper_bound(2) == Approx(deserialized_sketch.get_upper_bound(2)).margin(1e-10));
|
|
239
|
+
REQUIRE(compact_sketch.get_lower_bound(3) == Approx(deserialized_sketch.get_lower_bound(3)).margin(1e-10));
|
|
240
|
+
REQUIRE(compact_sketch.get_upper_bound(3) == Approx(deserialized_sketch.get_upper_bound(3)).margin(1e-10));
|
|
241
|
+
// sketches must be ordered and the iteration sequence must match exactly
|
|
242
|
+
auto it = deserialized_sketch.begin();
|
|
243
|
+
for (const auto& entry: compact_sketch) {
|
|
244
|
+
REQUIRE(entry.first == (*it).first);
|
|
245
|
+
REQUIRE(entry.second.size() == 2);
|
|
246
|
+
REQUIRE(entry.second[0] == (*it).second[0]);
|
|
247
|
+
REQUIRE(entry.second[1] == (*it).second[1]);
|
|
248
|
+
++it;
|
|
249
|
+
}
|
|
250
|
+
}
|
|
251
|
+
|
|
252
|
+
TEST_CASE("aod union: half overlap", "[tuple_sketch]") {
|
|
253
|
+
std::vector<double> a = {1};
|
|
254
|
+
|
|
255
|
+
auto update_sketch1 = update_array_of_doubles_sketch::builder().build();
|
|
256
|
+
for (int i = 0; i < 1000; ++i) update_sketch1.update(i, a);
|
|
257
|
+
|
|
258
|
+
auto update_sketch2 = update_array_of_doubles_sketch::builder().build();
|
|
259
|
+
for (int i = 500; i < 1500; ++i) update_sketch2.update(i, a);
|
|
260
|
+
|
|
261
|
+
auto u = array_of_doubles_union::builder().build();
|
|
262
|
+
u.update(update_sketch1);
|
|
263
|
+
u.update(update_sketch2);
|
|
264
|
+
auto result = u.get_result();
|
|
265
|
+
REQUIRE(result.get_estimate() == Approx(1500).margin(0.01));
|
|
266
|
+
}
|
|
267
|
+
|
|
268
|
+
TEST_CASE("aod intersection: half overlap", "[tuple_sketch]") {
|
|
269
|
+
std::vector<double> a = {1};
|
|
270
|
+
|
|
271
|
+
auto update_sketch1 = update_array_of_doubles_sketch::builder().build();
|
|
272
|
+
for (int i = 0; i < 1000; ++i) update_sketch1.update(i, a);
|
|
273
|
+
|
|
274
|
+
auto update_sketch2 = update_array_of_doubles_sketch::builder().build();
|
|
275
|
+
for (int i = 500; i < 1500; ++i) update_sketch2.update(i, a);
|
|
276
|
+
|
|
277
|
+
array_of_doubles_intersection<array_of_doubles_union_policy> intersection;
|
|
278
|
+
intersection.update(update_sketch1);
|
|
279
|
+
intersection.update(update_sketch2);
|
|
280
|
+
auto result = intersection.get_result();
|
|
281
|
+
REQUIRE(result.get_estimate() == Approx(500).margin(0.01));
|
|
282
|
+
}
|
|
283
|
+
|
|
284
|
+
TEST_CASE("aod a-not-b: half overlap", "[tuple_sketch]") {
|
|
285
|
+
double a[1] = {1};
|
|
286
|
+
|
|
287
|
+
auto update_sketch1 = update_array_of_doubles_sketch::builder().build();
|
|
288
|
+
for (int i = 0; i < 1000; ++i) update_sketch1.update(i, a);
|
|
289
|
+
|
|
290
|
+
auto update_sketch2 = update_array_of_doubles_sketch::builder().build();
|
|
291
|
+
for (int i = 500; i < 1500; ++i) update_sketch2.update(i, a);
|
|
292
|
+
|
|
293
|
+
array_of_doubles_a_not_b a_not_b;
|
|
294
|
+
auto result = a_not_b.compute(update_sketch1, update_sketch2);
|
|
295
|
+
REQUIRE(result.get_estimate() == Approx(500).margin(0.01));
|
|
296
|
+
}
|
|
297
|
+
|
|
298
|
+
} /* namespace datasketches */
|