datasketches 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/CHANGELOG.md +3 -0
- data/LICENSE +310 -0
- data/NOTICE +11 -0
- data/README.md +126 -0
- data/ext/datasketches/cpc_wrapper.cpp +50 -0
- data/ext/datasketches/ext.cpp +12 -0
- data/ext/datasketches/extconf.rb +11 -0
- data/ext/datasketches/hll_wrapper.cpp +69 -0
- data/lib/datasketches.rb +9 -0
- data/lib/datasketches/version.rb +3 -0
- data/vendor/datasketches-cpp/CMakeLists.txt +126 -0
- data/vendor/datasketches-cpp/LICENSE +311 -0
- data/vendor/datasketches-cpp/MANIFEST.in +19 -0
- data/vendor/datasketches-cpp/NOTICE +11 -0
- data/vendor/datasketches-cpp/README.md +42 -0
- data/vendor/datasketches-cpp/common/CMakeLists.txt +45 -0
- data/vendor/datasketches-cpp/common/include/MurmurHash3.h +173 -0
- data/vendor/datasketches-cpp/common/include/binomial_bounds.hpp +458 -0
- data/vendor/datasketches-cpp/common/include/bounds_binomial_proportions.hpp +291 -0
- data/vendor/datasketches-cpp/common/include/ceiling_power_of_2.hpp +41 -0
- data/vendor/datasketches-cpp/common/include/common_defs.hpp +51 -0
- data/vendor/datasketches-cpp/common/include/conditional_back_inserter.hpp +68 -0
- data/vendor/datasketches-cpp/common/include/conditional_forward.hpp +70 -0
- data/vendor/datasketches-cpp/common/include/count_zeros.hpp +114 -0
- data/vendor/datasketches-cpp/common/include/inv_pow2_table.hpp +107 -0
- data/vendor/datasketches-cpp/common/include/memory_operations.hpp +57 -0
- data/vendor/datasketches-cpp/common/include/serde.hpp +196 -0
- data/vendor/datasketches-cpp/common/test/CMakeLists.txt +38 -0
- data/vendor/datasketches-cpp/common/test/catch.hpp +17618 -0
- data/vendor/datasketches-cpp/common/test/catch_runner.cpp +7 -0
- data/vendor/datasketches-cpp/common/test/test_allocator.cpp +31 -0
- data/vendor/datasketches-cpp/common/test/test_allocator.hpp +108 -0
- data/vendor/datasketches-cpp/common/test/test_runner.cpp +29 -0
- data/vendor/datasketches-cpp/common/test/test_type.hpp +137 -0
- data/vendor/datasketches-cpp/cpc/CMakeLists.txt +74 -0
- data/vendor/datasketches-cpp/cpc/include/compression_data.hpp +6022 -0
- data/vendor/datasketches-cpp/cpc/include/cpc_common.hpp +62 -0
- data/vendor/datasketches-cpp/cpc/include/cpc_compressor.hpp +147 -0
- data/vendor/datasketches-cpp/cpc/include/cpc_compressor_impl.hpp +742 -0
- data/vendor/datasketches-cpp/cpc/include/cpc_confidence.hpp +167 -0
- data/vendor/datasketches-cpp/cpc/include/cpc_sketch.hpp +311 -0
- data/vendor/datasketches-cpp/cpc/include/cpc_sketch_impl.hpp +810 -0
- data/vendor/datasketches-cpp/cpc/include/cpc_union.hpp +102 -0
- data/vendor/datasketches-cpp/cpc/include/cpc_union_impl.hpp +346 -0
- data/vendor/datasketches-cpp/cpc/include/cpc_util.hpp +137 -0
- data/vendor/datasketches-cpp/cpc/include/icon_estimator.hpp +274 -0
- data/vendor/datasketches-cpp/cpc/include/kxp_byte_lookup.hpp +81 -0
- data/vendor/datasketches-cpp/cpc/include/u32_table.hpp +84 -0
- data/vendor/datasketches-cpp/cpc/include/u32_table_impl.hpp +266 -0
- data/vendor/datasketches-cpp/cpc/test/CMakeLists.txt +44 -0
- data/vendor/datasketches-cpp/cpc/test/compression_test.cpp +67 -0
- data/vendor/datasketches-cpp/cpc/test/cpc_sketch_test.cpp +381 -0
- data/vendor/datasketches-cpp/cpc/test/cpc_union_test.cpp +149 -0
- data/vendor/datasketches-cpp/fi/CMakeLists.txt +54 -0
- data/vendor/datasketches-cpp/fi/include/frequent_items_sketch.hpp +319 -0
- data/vendor/datasketches-cpp/fi/include/frequent_items_sketch_impl.hpp +484 -0
- data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map.hpp +114 -0
- data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map_impl.hpp +345 -0
- data/vendor/datasketches-cpp/fi/test/CMakeLists.txt +44 -0
- data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_custom_type_test.cpp +84 -0
- data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_test.cpp +360 -0
- data/vendor/datasketches-cpp/fi/test/items_sketch_string_from_java.sk +0 -0
- data/vendor/datasketches-cpp/fi/test/items_sketch_string_utf8_from_java.sk +0 -0
- data/vendor/datasketches-cpp/fi/test/longs_sketch_from_java.sk +0 -0
- data/vendor/datasketches-cpp/fi/test/reverse_purge_hash_map_test.cpp +47 -0
- data/vendor/datasketches-cpp/hll/CMakeLists.txt +92 -0
- data/vendor/datasketches-cpp/hll/include/AuxHashMap-internal.hpp +303 -0
- data/vendor/datasketches-cpp/hll/include/AuxHashMap.hpp +83 -0
- data/vendor/datasketches-cpp/hll/include/CompositeInterpolationXTable-internal.hpp +811 -0
- data/vendor/datasketches-cpp/hll/include/CompositeInterpolationXTable.hpp +40 -0
- data/vendor/datasketches-cpp/hll/include/CouponHashSet-internal.hpp +291 -0
- data/vendor/datasketches-cpp/hll/include/CouponHashSet.hpp +59 -0
- data/vendor/datasketches-cpp/hll/include/CouponList-internal.hpp +417 -0
- data/vendor/datasketches-cpp/hll/include/CouponList.hpp +91 -0
- data/vendor/datasketches-cpp/hll/include/CubicInterpolation-internal.hpp +233 -0
- data/vendor/datasketches-cpp/hll/include/CubicInterpolation.hpp +43 -0
- data/vendor/datasketches-cpp/hll/include/HarmonicNumbers-internal.hpp +90 -0
- data/vendor/datasketches-cpp/hll/include/HarmonicNumbers.hpp +48 -0
- data/vendor/datasketches-cpp/hll/include/Hll4Array-internal.hpp +335 -0
- data/vendor/datasketches-cpp/hll/include/Hll4Array.hpp +69 -0
- data/vendor/datasketches-cpp/hll/include/Hll6Array-internal.hpp +124 -0
- data/vendor/datasketches-cpp/hll/include/Hll6Array.hpp +55 -0
- data/vendor/datasketches-cpp/hll/include/Hll8Array-internal.hpp +158 -0
- data/vendor/datasketches-cpp/hll/include/Hll8Array.hpp +56 -0
- data/vendor/datasketches-cpp/hll/include/HllArray-internal.hpp +706 -0
- data/vendor/datasketches-cpp/hll/include/HllArray.hpp +136 -0
- data/vendor/datasketches-cpp/hll/include/HllSketch-internal.hpp +462 -0
- data/vendor/datasketches-cpp/hll/include/HllSketchImpl-internal.hpp +149 -0
- data/vendor/datasketches-cpp/hll/include/HllSketchImpl.hpp +85 -0
- data/vendor/datasketches-cpp/hll/include/HllSketchImplFactory.hpp +170 -0
- data/vendor/datasketches-cpp/hll/include/HllUnion-internal.hpp +287 -0
- data/vendor/datasketches-cpp/hll/include/HllUtil.hpp +239 -0
- data/vendor/datasketches-cpp/hll/include/RelativeErrorTables-internal.hpp +112 -0
- data/vendor/datasketches-cpp/hll/include/RelativeErrorTables.hpp +46 -0
- data/vendor/datasketches-cpp/hll/include/coupon_iterator-internal.hpp +56 -0
- data/vendor/datasketches-cpp/hll/include/coupon_iterator.hpp +43 -0
- data/vendor/datasketches-cpp/hll/include/hll.hpp +669 -0
- data/vendor/datasketches-cpp/hll/include/hll.private.hpp +32 -0
- data/vendor/datasketches-cpp/hll/test/AuxHashMapTest.cpp +79 -0
- data/vendor/datasketches-cpp/hll/test/CMakeLists.txt +51 -0
- data/vendor/datasketches-cpp/hll/test/CouponHashSetTest.cpp +130 -0
- data/vendor/datasketches-cpp/hll/test/CouponListTest.cpp +181 -0
- data/vendor/datasketches-cpp/hll/test/CrossCountingTest.cpp +93 -0
- data/vendor/datasketches-cpp/hll/test/HllArrayTest.cpp +191 -0
- data/vendor/datasketches-cpp/hll/test/HllSketchTest.cpp +389 -0
- data/vendor/datasketches-cpp/hll/test/HllUnionTest.cpp +313 -0
- data/vendor/datasketches-cpp/hll/test/IsomorphicTest.cpp +141 -0
- data/vendor/datasketches-cpp/hll/test/TablesTest.cpp +44 -0
- data/vendor/datasketches-cpp/hll/test/ToFromByteArrayTest.cpp +168 -0
- data/vendor/datasketches-cpp/hll/test/array6_from_java.sk +0 -0
- data/vendor/datasketches-cpp/hll/test/compact_array4_from_java.sk +0 -0
- data/vendor/datasketches-cpp/hll/test/compact_set_from_java.sk +0 -0
- data/vendor/datasketches-cpp/hll/test/list_from_java.sk +0 -0
- data/vendor/datasketches-cpp/hll/test/updatable_array4_from_java.sk +0 -0
- data/vendor/datasketches-cpp/hll/test/updatable_set_from_java.sk +0 -0
- data/vendor/datasketches-cpp/kll/CMakeLists.txt +58 -0
- data/vendor/datasketches-cpp/kll/include/kll_helper.hpp +150 -0
- data/vendor/datasketches-cpp/kll/include/kll_helper_impl.hpp +319 -0
- data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator.hpp +67 -0
- data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator_impl.hpp +169 -0
- data/vendor/datasketches-cpp/kll/include/kll_sketch.hpp +559 -0
- data/vendor/datasketches-cpp/kll/include/kll_sketch_impl.hpp +1131 -0
- data/vendor/datasketches-cpp/kll/test/CMakeLists.txt +44 -0
- data/vendor/datasketches-cpp/kll/test/kll_sketch_custom_type_test.cpp +154 -0
- data/vendor/datasketches-cpp/kll/test/kll_sketch_float_one_item_v1.sk +0 -0
- data/vendor/datasketches-cpp/kll/test/kll_sketch_from_java.sk +0 -0
- data/vendor/datasketches-cpp/kll/test/kll_sketch_test.cpp +685 -0
- data/vendor/datasketches-cpp/kll/test/kll_sketch_validation.cpp +229 -0
- data/vendor/datasketches-cpp/pyproject.toml +17 -0
- data/vendor/datasketches-cpp/python/CMakeLists.txt +61 -0
- data/vendor/datasketches-cpp/python/README.md +78 -0
- data/vendor/datasketches-cpp/python/jupyter/CPCSketch.ipynb +345 -0
- data/vendor/datasketches-cpp/python/jupyter/FrequentItemsSketch.ipynb +354 -0
- data/vendor/datasketches-cpp/python/jupyter/HLLSketch.ipynb +346 -0
- data/vendor/datasketches-cpp/python/jupyter/KLLSketch.ipynb +463 -0
- data/vendor/datasketches-cpp/python/jupyter/ThetaSketchNotebook.ipynb +396 -0
- data/vendor/datasketches-cpp/python/src/__init__.py +2 -0
- data/vendor/datasketches-cpp/python/src/cpc_wrapper.cpp +90 -0
- data/vendor/datasketches-cpp/python/src/datasketches.cpp +40 -0
- data/vendor/datasketches-cpp/python/src/fi_wrapper.cpp +123 -0
- data/vendor/datasketches-cpp/python/src/hll_wrapper.cpp +136 -0
- data/vendor/datasketches-cpp/python/src/kll_wrapper.cpp +209 -0
- data/vendor/datasketches-cpp/python/src/theta_wrapper.cpp +162 -0
- data/vendor/datasketches-cpp/python/src/vector_of_kll.cpp +488 -0
- data/vendor/datasketches-cpp/python/src/vo_wrapper.cpp +140 -0
- data/vendor/datasketches-cpp/python/tests/__init__.py +0 -0
- data/vendor/datasketches-cpp/python/tests/cpc_test.py +64 -0
- data/vendor/datasketches-cpp/python/tests/fi_test.py +110 -0
- data/vendor/datasketches-cpp/python/tests/hll_test.py +131 -0
- data/vendor/datasketches-cpp/python/tests/kll_test.py +119 -0
- data/vendor/datasketches-cpp/python/tests/theta_test.py +121 -0
- data/vendor/datasketches-cpp/python/tests/vector_of_kll_test.py +148 -0
- data/vendor/datasketches-cpp/python/tests/vo_test.py +101 -0
- data/vendor/datasketches-cpp/sampling/CMakeLists.txt +48 -0
- data/vendor/datasketches-cpp/sampling/include/var_opt_sketch.hpp +392 -0
- data/vendor/datasketches-cpp/sampling/include/var_opt_sketch_impl.hpp +1752 -0
- data/vendor/datasketches-cpp/sampling/include/var_opt_union.hpp +239 -0
- data/vendor/datasketches-cpp/sampling/include/var_opt_union_impl.hpp +645 -0
- data/vendor/datasketches-cpp/sampling/test/CMakeLists.txt +43 -0
- data/vendor/datasketches-cpp/sampling/test/binaries_from_java.txt +67 -0
- data/vendor/datasketches-cpp/sampling/test/var_opt_sketch_test.cpp +509 -0
- data/vendor/datasketches-cpp/sampling/test/var_opt_union_test.cpp +358 -0
- data/vendor/datasketches-cpp/sampling/test/varopt_sketch_long_sampling.sk +0 -0
- data/vendor/datasketches-cpp/sampling/test/varopt_sketch_string_exact.sk +0 -0
- data/vendor/datasketches-cpp/sampling/test/varopt_union_double_sampling.sk +0 -0
- data/vendor/datasketches-cpp/setup.py +94 -0
- data/vendor/datasketches-cpp/theta/CMakeLists.txt +57 -0
- data/vendor/datasketches-cpp/theta/include/theta_a_not_b.hpp +73 -0
- data/vendor/datasketches-cpp/theta/include/theta_a_not_b_impl.hpp +83 -0
- data/vendor/datasketches-cpp/theta/include/theta_intersection.hpp +88 -0
- data/vendor/datasketches-cpp/theta/include/theta_intersection_impl.hpp +130 -0
- data/vendor/datasketches-cpp/theta/include/theta_sketch.hpp +533 -0
- data/vendor/datasketches-cpp/theta/include/theta_sketch_impl.hpp +939 -0
- data/vendor/datasketches-cpp/theta/include/theta_union.hpp +122 -0
- data/vendor/datasketches-cpp/theta/include/theta_union_impl.hpp +109 -0
- data/vendor/datasketches-cpp/theta/test/CMakeLists.txt +45 -0
- data/vendor/datasketches-cpp/theta/test/theta_a_not_b_test.cpp +244 -0
- data/vendor/datasketches-cpp/theta/test/theta_compact_empty_from_java.sk +0 -0
- data/vendor/datasketches-cpp/theta/test/theta_compact_estimation_from_java.sk +0 -0
- data/vendor/datasketches-cpp/theta/test/theta_compact_single_item_from_java.sk +0 -0
- data/vendor/datasketches-cpp/theta/test/theta_intersection_test.cpp +218 -0
- data/vendor/datasketches-cpp/theta/test/theta_sketch_test.cpp +438 -0
- data/vendor/datasketches-cpp/theta/test/theta_union_test.cpp +97 -0
- data/vendor/datasketches-cpp/theta/test/theta_update_empty_from_java.sk +0 -0
- data/vendor/datasketches-cpp/theta/test/theta_update_estimation_from_java.sk +0 -0
- data/vendor/datasketches-cpp/tuple/CMakeLists.txt +104 -0
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_a_not_b.hpp +52 -0
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_a_not_b_impl.hpp +32 -0
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_intersection.hpp +52 -0
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_intersection_impl.hpp +31 -0
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_sketch.hpp +179 -0
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_sketch_impl.hpp +238 -0
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_union.hpp +81 -0
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_union_impl.hpp +43 -0
- data/vendor/datasketches-cpp/tuple/include/bounds_on_ratios_in_sampled_sets.hpp +135 -0
- data/vendor/datasketches-cpp/tuple/include/bounds_on_ratios_in_theta_sketched_sets.hpp +135 -0
- data/vendor/datasketches-cpp/tuple/include/jaccard_similarity.hpp +172 -0
- data/vendor/datasketches-cpp/tuple/include/theta_a_not_b_experimental.hpp +53 -0
- data/vendor/datasketches-cpp/tuple/include/theta_a_not_b_experimental_impl.hpp +33 -0
- data/vendor/datasketches-cpp/tuple/include/theta_comparators.hpp +48 -0
- data/vendor/datasketches-cpp/tuple/include/theta_constants.hpp +34 -0
- data/vendor/datasketches-cpp/tuple/include/theta_helpers.hpp +54 -0
- data/vendor/datasketches-cpp/tuple/include/theta_intersection_base.hpp +59 -0
- data/vendor/datasketches-cpp/tuple/include/theta_intersection_base_impl.hpp +121 -0
- data/vendor/datasketches-cpp/tuple/include/theta_intersection_experimental.hpp +78 -0
- data/vendor/datasketches-cpp/tuple/include/theta_intersection_experimental_impl.hpp +43 -0
- data/vendor/datasketches-cpp/tuple/include/theta_set_difference_base.hpp +54 -0
- data/vendor/datasketches-cpp/tuple/include/theta_set_difference_base_impl.hpp +80 -0
- data/vendor/datasketches-cpp/tuple/include/theta_sketch_experimental.hpp +393 -0
- data/vendor/datasketches-cpp/tuple/include/theta_sketch_experimental_impl.hpp +481 -0
- data/vendor/datasketches-cpp/tuple/include/theta_union_base.hpp +60 -0
- data/vendor/datasketches-cpp/tuple/include/theta_union_base_impl.hpp +84 -0
- data/vendor/datasketches-cpp/tuple/include/theta_union_experimental.hpp +88 -0
- data/vendor/datasketches-cpp/tuple/include/theta_union_experimental_impl.hpp +47 -0
- data/vendor/datasketches-cpp/tuple/include/theta_update_sketch_base.hpp +259 -0
- data/vendor/datasketches-cpp/tuple/include/theta_update_sketch_base_impl.hpp +389 -0
- data/vendor/datasketches-cpp/tuple/include/tuple_a_not_b.hpp +57 -0
- data/vendor/datasketches-cpp/tuple/include/tuple_a_not_b_impl.hpp +33 -0
- data/vendor/datasketches-cpp/tuple/include/tuple_intersection.hpp +104 -0
- data/vendor/datasketches-cpp/tuple/include/tuple_intersection_impl.hpp +43 -0
- data/vendor/datasketches-cpp/tuple/include/tuple_sketch.hpp +496 -0
- data/vendor/datasketches-cpp/tuple/include/tuple_sketch_impl.hpp +587 -0
- data/vendor/datasketches-cpp/tuple/include/tuple_union.hpp +109 -0
- data/vendor/datasketches-cpp/tuple/include/tuple_union_impl.hpp +47 -0
- data/vendor/datasketches-cpp/tuple/test/CMakeLists.txt +53 -0
- data/vendor/datasketches-cpp/tuple/test/aod_1_compact_empty_from_java.sk +1 -0
- data/vendor/datasketches-cpp/tuple/test/aod_1_compact_estimation_from_java.sk +0 -0
- data/vendor/datasketches-cpp/tuple/test/aod_1_compact_non_empty_no_entries_from_java.sk +0 -0
- data/vendor/datasketches-cpp/tuple/test/aod_2_compact_exact_from_java.sk +0 -0
- data/vendor/datasketches-cpp/tuple/test/aod_3_compact_empty_from_java.sk +1 -0
- data/vendor/datasketches-cpp/tuple/test/array_of_doubles_sketch_test.cpp +298 -0
- data/vendor/datasketches-cpp/tuple/test/theta_a_not_b_experimental_test.cpp +250 -0
- data/vendor/datasketches-cpp/tuple/test/theta_compact_empty_from_java.sk +0 -0
- data/vendor/datasketches-cpp/tuple/test/theta_compact_estimation_from_java.sk +0 -0
- data/vendor/datasketches-cpp/tuple/test/theta_compact_single_item_from_java.sk +0 -0
- data/vendor/datasketches-cpp/tuple/test/theta_intersection_experimental_test.cpp +224 -0
- data/vendor/datasketches-cpp/tuple/test/theta_jaccard_similarity_test.cpp +144 -0
- data/vendor/datasketches-cpp/tuple/test/theta_sketch_experimental_test.cpp +247 -0
- data/vendor/datasketches-cpp/tuple/test/theta_union_experimental_test.cpp +44 -0
- data/vendor/datasketches-cpp/tuple/test/tuple_a_not_b_test.cpp +289 -0
- data/vendor/datasketches-cpp/tuple/test/tuple_intersection_test.cpp +235 -0
- data/vendor/datasketches-cpp/tuple/test/tuple_jaccard_similarity_test.cpp +98 -0
- data/vendor/datasketches-cpp/tuple/test/tuple_sketch_allocation_test.cpp +102 -0
- data/vendor/datasketches-cpp/tuple/test/tuple_sketch_test.cpp +249 -0
- data/vendor/datasketches-cpp/tuple/test/tuple_union_test.cpp +187 -0
- metadata +302 -0
@@ -0,0 +1,97 @@
|
|
1
|
+
/*
|
2
|
+
* Licensed to the Apache Software Foundation (ASF) under one
|
3
|
+
* or more contributor license agreements. See the NOTICE file
|
4
|
+
* distributed with this work for additional information
|
5
|
+
* regarding copyright ownership. The ASF licenses this file
|
6
|
+
* to you under the Apache License, Version 2.0 (the
|
7
|
+
* "License"); you may not use this file except in compliance
|
8
|
+
* with the License. You may obtain a copy of the License at
|
9
|
+
*
|
10
|
+
* http://www.apache.org/licenses/LICENSE-2.0
|
11
|
+
*
|
12
|
+
* Unless required by applicable law or agreed to in writing,
|
13
|
+
* software distributed under the License is distributed on an
|
14
|
+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
15
|
+
* KIND, either express or implied. See the License for the
|
16
|
+
* specific language governing permissions and limitations
|
17
|
+
* under the License.
|
18
|
+
*/
|
19
|
+
|
20
|
+
#include <catch.hpp>
|
21
|
+
|
22
|
+
#include <theta_union.hpp>
|
23
|
+
|
24
|
+
namespace datasketches {
|
25
|
+
|
26
|
+
TEST_CASE("theta union: empty", "[theta_union]") {
|
27
|
+
update_theta_sketch sketch1 = update_theta_sketch::builder().build();
|
28
|
+
theta_union u = theta_union::builder().build();
|
29
|
+
compact_theta_sketch sketch2 = u.get_result();
|
30
|
+
REQUIRE(sketch2.get_num_retained() == 0);
|
31
|
+
REQUIRE(sketch2.is_empty());
|
32
|
+
REQUIRE_FALSE(sketch2.is_estimation_mode());
|
33
|
+
|
34
|
+
u.update(sketch1);
|
35
|
+
sketch2 = u.get_result();
|
36
|
+
REQUIRE(sketch2.get_num_retained() == 0);
|
37
|
+
REQUIRE(sketch2.is_empty());
|
38
|
+
REQUIRE_FALSE(sketch2.is_estimation_mode());
|
39
|
+
}
|
40
|
+
|
41
|
+
TEST_CASE("theta union: non empty no retained keys", "[theta_union]") {
|
42
|
+
update_theta_sketch update_sketch = update_theta_sketch::builder().set_p(0.001).build();
|
43
|
+
update_sketch.update(1);
|
44
|
+
theta_union u = theta_union::builder().build();
|
45
|
+
u.update(update_sketch);
|
46
|
+
compact_theta_sketch sketch = u.get_result();
|
47
|
+
REQUIRE(sketch.get_num_retained() == 0);
|
48
|
+
REQUIRE_FALSE(sketch.is_empty());
|
49
|
+
REQUIRE(sketch.is_estimation_mode());
|
50
|
+
REQUIRE(sketch.get_theta() == Approx(0.001).margin(1e-10));
|
51
|
+
}
|
52
|
+
|
53
|
+
TEST_CASE("theta union: exact mode half overlap", "[theta_union]") {
|
54
|
+
update_theta_sketch sketch1 = update_theta_sketch::builder().build();
|
55
|
+
int value = 0;
|
56
|
+
for (int i = 0; i < 1000; i++) sketch1.update(value++);
|
57
|
+
|
58
|
+
update_theta_sketch sketch2 = update_theta_sketch::builder().build();
|
59
|
+
value = 500;
|
60
|
+
for (int i = 0; i < 1000; i++) sketch2.update(value++);
|
61
|
+
|
62
|
+
theta_union u = theta_union::builder().build();
|
63
|
+
u.update(sketch1);
|
64
|
+
u.update(sketch2);
|
65
|
+
compact_theta_sketch sketch3 = u.get_result();
|
66
|
+
REQUIRE_FALSE(sketch3.is_empty());
|
67
|
+
REQUIRE_FALSE(sketch3.is_estimation_mode());
|
68
|
+
REQUIRE(sketch3.get_estimate() == Approx(1500).margin(1500 * 0.01));
|
69
|
+
}
|
70
|
+
|
71
|
+
TEST_CASE("theta union: estimation mode half overlap", "[theta_union]") {
|
72
|
+
update_theta_sketch sketch1 = update_theta_sketch::builder().build();
|
73
|
+
int value = 0;
|
74
|
+
for (int i = 0; i < 10000; i++) sketch1.update(value++);
|
75
|
+
|
76
|
+
update_theta_sketch sketch2 = update_theta_sketch::builder().build();
|
77
|
+
value = 5000;
|
78
|
+
for (int i = 0; i < 10000; i++) sketch2.update(value++);
|
79
|
+
|
80
|
+
theta_union u = theta_union::builder().build();
|
81
|
+
u.update(sketch1);
|
82
|
+
u.update(sketch2);
|
83
|
+
compact_theta_sketch sketch3 = u.get_result();
|
84
|
+
REQUIRE_FALSE(sketch3.is_empty());
|
85
|
+
REQUIRE(sketch3.is_estimation_mode());
|
86
|
+
REQUIRE(sketch3.get_estimate() == Approx(15000).margin(15000 * 0.01));
|
87
|
+
//std::cerr << sketch3.to_string(true);
|
88
|
+
}
|
89
|
+
|
90
|
+
TEST_CASE("theta union: seed mismatch", "[theta_union]") {
|
91
|
+
update_theta_sketch sketch = update_theta_sketch::builder().build();
|
92
|
+
sketch.update(1); // non-empty should not be ignored
|
93
|
+
theta_union u = theta_union::builder().set_seed(123).build();
|
94
|
+
REQUIRE_THROWS_AS(u.update(sketch), std::invalid_argument);
|
95
|
+
}
|
96
|
+
|
97
|
+
} /* namespace datasketches */
|
Binary file
|
@@ -0,0 +1,104 @@
|
|
1
|
+
# Licensed to the Apache Software Foundation (ASF) under one
|
2
|
+
# or more contributor license agreements. See the NOTICE file
|
3
|
+
# distributed with this work for additional information
|
4
|
+
# regarding copyright ownership. The ASF licenses this file
|
5
|
+
# to you under the Apache License, Version 2.0 (the
|
6
|
+
# "License"); you may not use this file except in compliance
|
7
|
+
# with the License. You may obtain a copy of the License at
|
8
|
+
#
|
9
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
10
|
+
#
|
11
|
+
# Unless required by applicable law or agreed to in writing,
|
12
|
+
# software distributed under the License is distributed on an
|
13
|
+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
14
|
+
# KIND, either express or implied. See the License for the
|
15
|
+
# specific language governing permissions and limitations
|
16
|
+
# under the License.
|
17
|
+
|
18
|
+
add_library(tuple INTERFACE)
|
19
|
+
|
20
|
+
add_library(${PROJECT_NAME}::TUPLE ALIAS tuple)
|
21
|
+
|
22
|
+
if (BUILD_TESTS)
|
23
|
+
add_subdirectory(test)
|
24
|
+
endif()
|
25
|
+
|
26
|
+
target_include_directories(tuple
|
27
|
+
INTERFACE
|
28
|
+
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>
|
29
|
+
$<INSTALL_INTERFACE:$<INSTALL_PREFIX>/include>
|
30
|
+
)
|
31
|
+
|
32
|
+
target_link_libraries(tuple INTERFACE common)
|
33
|
+
target_compile_features(tuple INTERFACE cxx_std_11)
|
34
|
+
|
35
|
+
set(tuple_HEADERS "")
|
36
|
+
list(APPEND tuple_HEADERS "include/tuple_sketch.hpp;include/tuple_sketch_impl.hpp")
|
37
|
+
list(APPEND tuple_HEADERS "include/tuple_union.hpp;include/tuple_union_impl.hpp")
|
38
|
+
list(APPEND tuple_HEADERS "include/tuple_intersection.hpp;include/tuple_intersection_impl.hpp")
|
39
|
+
list(APPEND tuple_HEADERS "include/tuple_a_not_b.hpp;include/tuple_a_not_b_impl.hpp")
|
40
|
+
list(APPEND tuple_HEADERS "include/array_of_doubles_sketch.hpp;include/array_of_doubles_sketch_impl.hpp")
|
41
|
+
list(APPEND tuple_HEADERS "include/array_of_doubles_union.hpp;include/array_of_doubles_union_impl.hpp")
|
42
|
+
list(APPEND tuple_HEADERS "include/array_of_doubles_intersection.hpp;include/array_of_doubles_intersection_impl.hpp")
|
43
|
+
list(APPEND tuple_HEADERS "include/array_of_doubles_a_not_b.hpp;include/array_of_doubles_a_not_b_impl.hpp")
|
44
|
+
list(APPEND tuple_HEADERS "include/theta_update_sketch_base.hpp;include/theta_update_sketch_base_impl.hpp")
|
45
|
+
list(APPEND tuple_HEADERS "include/theta_union_base.hpp;include/theta_union_base_impl.hpp")
|
46
|
+
list(APPEND tuple_HEADERS "include/theta_intersection_base.hpp;include/theta_intersection_base_impl.hpp")
|
47
|
+
list(APPEND tuple_HEADERS "include/theta_set_difference_base.hpp;include/theta_set_difference_base_impl.hpp")
|
48
|
+
list(APPEND tuple_HEADERS "include/theta_sketch_experimental.hpp;include/theta_sketch_experimental_impl.hpp")
|
49
|
+
list(APPEND tuple_HEADERS "include/theta_union_experimental.hpp;include/theta_union_experimental_impl.hpp")
|
50
|
+
list(APPEND tuple_HEADERS "include/theta_intersection_experimental.hpp;include/theta_intersection_experimental_impl.hpp")
|
51
|
+
list(APPEND tuple_HEADERS "include/theta_a_not_b_experimental.hpp;include/theta_a_not_b_experimental_impl.hpp")
|
52
|
+
list(APPEND tuple_HEADERS "include/bounds_on_ratios_in_sampled_sets.hpp")
|
53
|
+
list(APPEND tuple_HEADERS "include/bounds_on_ratios_in_theta_sketched_sets.hpp")
|
54
|
+
list(APPEND tuple_HEADERS "include/jaccard_similarity.hpp")
|
55
|
+
list(APPEND tuple_HEADERS "include/theta_comparators.hpp")
|
56
|
+
list(APPEND tuple_HEADERS "include/theta_cnstants.hpp")
|
57
|
+
|
58
|
+
install(TARGETS tuple
|
59
|
+
EXPORT ${PROJECT_NAME}
|
60
|
+
)
|
61
|
+
|
62
|
+
install(FILES ${tuple_HEADERS}
|
63
|
+
DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/DataSketches")
|
64
|
+
|
65
|
+
target_sources(tuple
|
66
|
+
INTERFACE
|
67
|
+
${CMAKE_CURRENT_SOURCE_DIR}/include/tuple_sketch.hpp
|
68
|
+
${CMAKE_CURRENT_SOURCE_DIR}/include/tuple_sketch_impl.hpp
|
69
|
+
${CMAKE_CURRENT_SOURCE_DIR}/include/tuple_union.hpp
|
70
|
+
${CMAKE_CURRENT_SOURCE_DIR}/include/tuple_union_impl.hpp
|
71
|
+
${CMAKE_CURRENT_SOURCE_DIR}/include/tuple_intersection.hpp
|
72
|
+
${CMAKE_CURRENT_SOURCE_DIR}/include/tuple_intersection_impl.hpp
|
73
|
+
${CMAKE_CURRENT_SOURCE_DIR}/include/tuple_a_not_b.hpp
|
74
|
+
${CMAKE_CURRENT_SOURCE_DIR}/include/tuple_a_not_b_impl.hpp
|
75
|
+
${CMAKE_CURRENT_SOURCE_DIR}/include/array_of_doubles_sketch.hpp
|
76
|
+
${CMAKE_CURRENT_SOURCE_DIR}/include/array_of_doubles_sketch_impl.hpp
|
77
|
+
${CMAKE_CURRENT_SOURCE_DIR}/include/array_of_doubles_union.hpp
|
78
|
+
${CMAKE_CURRENT_SOURCE_DIR}/include/array_of_doubles_union_impl.hpp
|
79
|
+
${CMAKE_CURRENT_SOURCE_DIR}/include/array_of_doubles_intersection.hpp
|
80
|
+
${CMAKE_CURRENT_SOURCE_DIR}/include/array_of_doubles_intersection_impl.hpp
|
81
|
+
${CMAKE_CURRENT_SOURCE_DIR}/include/array_of_doubles_a_not_b.hpp
|
82
|
+
${CMAKE_CURRENT_SOURCE_DIR}/include/array_of_doubles_a_not_b_impl.hpp
|
83
|
+
${CMAKE_CURRENT_SOURCE_DIR}/include/theta_update_sketch_base.hpp
|
84
|
+
${CMAKE_CURRENT_SOURCE_DIR}/include/theta_update_sketch_base_impl.hpp
|
85
|
+
${CMAKE_CURRENT_SOURCE_DIR}/include/theta_union_base.hpp
|
86
|
+
${CMAKE_CURRENT_SOURCE_DIR}/include/theta_union_base_impl.hpp
|
87
|
+
${CMAKE_CURRENT_SOURCE_DIR}/include/theta_intersection_base.hpp
|
88
|
+
${CMAKE_CURRENT_SOURCE_DIR}/include/theta_intersection_base_impl.hpp
|
89
|
+
${CMAKE_CURRENT_SOURCE_DIR}/include/theta_set_difference_base.hpp
|
90
|
+
${CMAKE_CURRENT_SOURCE_DIR}/include/theta_set_difference_base_impl.hpp
|
91
|
+
${CMAKE_CURRENT_SOURCE_DIR}/include/theta_sketch_experimental.hpp
|
92
|
+
${CMAKE_CURRENT_SOURCE_DIR}/include/theta_sketch_experimental_impl.hpp
|
93
|
+
${CMAKE_CURRENT_SOURCE_DIR}/include/theta_union_experimental.hpp
|
94
|
+
${CMAKE_CURRENT_SOURCE_DIR}/include/theta_union_experimental_impl.hpp
|
95
|
+
${CMAKE_CURRENT_SOURCE_DIR}/include/theta_intersection_experimental.hpp
|
96
|
+
${CMAKE_CURRENT_SOURCE_DIR}/include/theta_intersection_experimental_impl.hpp
|
97
|
+
${CMAKE_CURRENT_SOURCE_DIR}/include/theta_a_not_b_experimental.hpp
|
98
|
+
${CMAKE_CURRENT_SOURCE_DIR}/include/theta_a_not_b_experimental_impl.hpp
|
99
|
+
${CMAKE_CURRENT_SOURCE_DIR}/include/bounds_on_ratios_in_sampled_sets.hpp
|
100
|
+
${CMAKE_CURRENT_SOURCE_DIR}/include/bounds_on_ratios_in_theta_sketched_sets.hpp
|
101
|
+
${CMAKE_CURRENT_SOURCE_DIR}/include/jaccard_similarity.hpp
|
102
|
+
${CMAKE_CURRENT_SOURCE_DIR}/include/theta_comparators.hpp
|
103
|
+
${CMAKE_CURRENT_SOURCE_DIR}/include/theta_constants.hpp
|
104
|
+
)
|
@@ -0,0 +1,52 @@
|
|
1
|
+
/*
|
2
|
+
* Licensed to the Apache Software Foundation (ASF) under one
|
3
|
+
* or more contributor license agreements. See the NOTICE file
|
4
|
+
* distributed with this work for additional information
|
5
|
+
* regarding copyright ownership. The ASF licenses this file
|
6
|
+
* to you under the Apache License, Version 2.0 (the
|
7
|
+
* "License"); you may not use this file except in compliance
|
8
|
+
* with the License. You may obtain a copy of the License at
|
9
|
+
*
|
10
|
+
* http://www.apache.org/licenses/LICENSE-2.0
|
11
|
+
*
|
12
|
+
* Unless required by applicable law or agreed to in writing,
|
13
|
+
* software distributed under the License is distributed on an
|
14
|
+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
15
|
+
* KIND, either express or implied. See the License for the
|
16
|
+
* specific language governing permissions and limitations
|
17
|
+
* under the License.
|
18
|
+
*/
|
19
|
+
|
20
|
+
#ifndef ARRAY_OF_DOUBLES_A_NOT_B_HPP_
|
21
|
+
#define ARRAY_OF_DOUBLES_A_NOT_B_HPP_
|
22
|
+
|
23
|
+
#include <vector>
|
24
|
+
#include <memory>
|
25
|
+
|
26
|
+
#include "array_of_doubles_sketch.hpp"
|
27
|
+
#include "tuple_a_not_b.hpp"
|
28
|
+
|
29
|
+
namespace datasketches {
|
30
|
+
|
31
|
+
template<typename Allocator = std::allocator<double>>
|
32
|
+
class array_of_doubles_a_not_b_alloc: tuple_a_not_b<aod<Allocator>, AllocAOD<Allocator>> {
|
33
|
+
public:
|
34
|
+
using Summary = aod<Allocator>;
|
35
|
+
using AllocSummary = AllocAOD<Allocator>;
|
36
|
+
using Base = tuple_a_not_b<Summary, AllocSummary>;
|
37
|
+
using CompactSketch = compact_array_of_doubles_sketch_alloc<Allocator>;
|
38
|
+
|
39
|
+
explicit array_of_doubles_a_not_b_alloc(uint64_t seed = DEFAULT_SEED, const Allocator& allocator = Allocator());
|
40
|
+
|
41
|
+
template<typename FwdSketch, typename Sketch>
|
42
|
+
CompactSketch compute(FwdSketch&& a, const Sketch& b, bool ordered = true) const;
|
43
|
+
};
|
44
|
+
|
45
|
+
// alias with the default allocator for convenience
|
46
|
+
using array_of_doubles_a_not_b = array_of_doubles_a_not_b_alloc<>;
|
47
|
+
|
48
|
+
} /* namespace datasketches */
|
49
|
+
|
50
|
+
#include "array_of_doubles_a_not_b_impl.hpp"
|
51
|
+
|
52
|
+
#endif
|
@@ -0,0 +1,32 @@
|
|
1
|
+
/*
|
2
|
+
* Licensed to the Apache Software Foundation (ASF) under one
|
3
|
+
* or more contributor license agreements. See the NOTICE file
|
4
|
+
* distributed with this work for additional information
|
5
|
+
* regarding copyright ownership. The ASF licenses this file
|
6
|
+
* to you under the Apache License, Version 2.0 (the
|
7
|
+
* "License"); you may not use this file except in compliance
|
8
|
+
* with the License. You may obtain a copy of the License at
|
9
|
+
*
|
10
|
+
* http://www.apache.org/licenses/LICENSE-2.0
|
11
|
+
*
|
12
|
+
* Unless required by applicable law or agreed to in writing,
|
13
|
+
* software distributed under the License is distributed on an
|
14
|
+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
15
|
+
* KIND, either express or implied. See the License for the
|
16
|
+
* specific language governing permissions and limitations
|
17
|
+
* under the License.
|
18
|
+
*/
|
19
|
+
|
20
|
+
namespace datasketches {
|
21
|
+
|
22
|
+
template<typename A>
|
23
|
+
array_of_doubles_a_not_b_alloc<A>::array_of_doubles_a_not_b_alloc(uint64_t seed, const A& allocator):
|
24
|
+
Base(seed, allocator) {}
|
25
|
+
|
26
|
+
template<typename A>
|
27
|
+
template<typename FwdSketch, typename Sketch>
|
28
|
+
auto array_of_doubles_a_not_b_alloc<A>::compute(FwdSketch&& a, const Sketch& b, bool ordered) const -> CompactSketch {
|
29
|
+
return CompactSketch(a.get_num_values(), Base::compute(std::forward<FwdSketch>(a), b, ordered));
|
30
|
+
}
|
31
|
+
|
32
|
+
} /* namespace datasketches */
|
@@ -0,0 +1,52 @@
|
|
1
|
+
/*
|
2
|
+
* Licensed to the Apache Software Foundation (ASF) under one
|
3
|
+
* or more contributor license agreements. See the NOTICE file
|
4
|
+
* distributed with this work for additional information
|
5
|
+
* regarding copyright ownership. The ASF licenses this file
|
6
|
+
* to you under the Apache License, Version 2.0 (the
|
7
|
+
* "License"); you may not use this file except in compliance
|
8
|
+
* with the License. You may obtain a copy of the License at
|
9
|
+
*
|
10
|
+
* http://www.apache.org/licenses/LICENSE-2.0
|
11
|
+
*
|
12
|
+
* Unless required by applicable law or agreed to in writing,
|
13
|
+
* software distributed under the License is distributed on an
|
14
|
+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
15
|
+
* KIND, either express or implied. See the License for the
|
16
|
+
* specific language governing permissions and limitations
|
17
|
+
* under the License.
|
18
|
+
*/
|
19
|
+
|
20
|
+
#ifndef ARRAY_OF_DOUBLES_INTERSECTION_HPP_
|
21
|
+
#define ARRAY_OF_DOUBLES_INTERSECTION_HPP_
|
22
|
+
|
23
|
+
#include <vector>
|
24
|
+
#include <memory>
|
25
|
+
|
26
|
+
#include "array_of_doubles_sketch.hpp"
|
27
|
+
#include "tuple_intersection.hpp"
|
28
|
+
|
29
|
+
namespace datasketches {
|
30
|
+
|
31
|
+
template<
|
32
|
+
typename Policy,
|
33
|
+
typename Allocator = std::allocator<double>
|
34
|
+
>
|
35
|
+
class array_of_doubles_intersection: public tuple_intersection<aod<Allocator>, Policy, AllocAOD<Allocator>> {
|
36
|
+
public:
|
37
|
+
using Summary = aod<Allocator>;
|
38
|
+
using AllocSummary = AllocAOD<Allocator>;
|
39
|
+
using Base = tuple_intersection<Summary, Policy, AllocSummary>;
|
40
|
+
using CompactSketch = compact_array_of_doubles_sketch_alloc<Allocator>;
|
41
|
+
using resize_factor = theta_constants::resize_factor;
|
42
|
+
|
43
|
+
explicit array_of_doubles_intersection(uint64_t seed = DEFAULT_SEED, const Policy& policy = Policy(), const Allocator& allocator = Allocator());
|
44
|
+
|
45
|
+
CompactSketch get_result(bool ordered = true) const;
|
46
|
+
};
|
47
|
+
|
48
|
+
} /* namespace datasketches */
|
49
|
+
|
50
|
+
#include "array_of_doubles_intersection_impl.hpp"
|
51
|
+
|
52
|
+
#endif
|
@@ -0,0 +1,31 @@
|
|
1
|
+
/*
|
2
|
+
* Licensed to the Apache Software Foundation (ASF) under one
|
3
|
+
* or more contributor license agreements. See the NOTICE file
|
4
|
+
* distributed with this work for additional information
|
5
|
+
* regarding copyright ownership. The ASF licenses this file
|
6
|
+
* to you under the Apache License, Version 2.0 (the
|
7
|
+
* "License"); you may not use this file except in compliance
|
8
|
+
* with the License. You may obtain a copy of the License at
|
9
|
+
*
|
10
|
+
* http://www.apache.org/licenses/LICENSE-2.0
|
11
|
+
*
|
12
|
+
* Unless required by applicable law or agreed to in writing,
|
13
|
+
* software distributed under the License is distributed on an
|
14
|
+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
15
|
+
* KIND, either express or implied. See the License for the
|
16
|
+
* specific language governing permissions and limitations
|
17
|
+
* under the License.
|
18
|
+
*/
|
19
|
+
|
20
|
+
namespace datasketches {
|
21
|
+
|
22
|
+
template<typename P, typename A>
|
23
|
+
array_of_doubles_intersection<P, A>::array_of_doubles_intersection(uint64_t seed, const P& policy, const A& allocator):
|
24
|
+
Base(seed, policy, allocator) {}
|
25
|
+
|
26
|
+
template<typename P, typename A>
|
27
|
+
auto array_of_doubles_intersection<P, A>::get_result(bool ordered) const -> CompactSketch {
|
28
|
+
return compact_array_of_doubles_sketch_alloc<A>(this->state_.get_policy().get_policy().get_num_values(), Base::get_result(ordered));
|
29
|
+
}
|
30
|
+
|
31
|
+
} /* namespace datasketches */
|
@@ -0,0 +1,179 @@
|
|
1
|
+
/*
|
2
|
+
* Licensed to the Apache Software Foundation (ASF) under one
|
3
|
+
* or more contributor license agreements. See the NOTICE file
|
4
|
+
* distributed with this work for additional information
|
5
|
+
* regarding copyright ownership. The ASF licenses this file
|
6
|
+
* to you under the Apache License, Version 2.0 (the
|
7
|
+
* "License"); you may not use this file except in compliance
|
8
|
+
* with the License. You may obtain a copy of the License at
|
9
|
+
*
|
10
|
+
* http://www.apache.org/licenses/LICENSE-2.0
|
11
|
+
*
|
12
|
+
* Unless required by applicable law or agreed to in writing,
|
13
|
+
* software distributed under the License is distributed on an
|
14
|
+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
15
|
+
* KIND, either express or implied. See the License for the
|
16
|
+
* specific language governing permissions and limitations
|
17
|
+
* under the License.
|
18
|
+
*/
|
19
|
+
|
20
|
+
#ifndef ARRAY_OF_DOUBLES_SKETCH_HPP_
|
21
|
+
#define ARRAY_OF_DOUBLES_SKETCH_HPP_
|
22
|
+
|
23
|
+
#include <vector>
|
24
|
+
#include <memory>
|
25
|
+
|
26
|
+
#include "serde.hpp"
|
27
|
+
#include "tuple_sketch.hpp"
|
28
|
+
|
29
|
+
namespace datasketches {
|
30
|
+
|
31
|
+
// This sketch is equivalent of ArrayOfDoublesSketch in Java
|
32
|
+
|
33
|
+
// This simple array of double is faster than std::vector and should be sufficient for this application
|
34
|
+
template<typename Allocator = std::allocator<double>>
|
35
|
+
class aod {
|
36
|
+
public:
|
37
|
+
explicit aod(uint8_t size, const Allocator& allocator = Allocator()):
|
38
|
+
allocator_(allocator), size_(size), array_(allocator_.allocate(size_)) {
|
39
|
+
std::fill(array_, array_ + size_, 0);
|
40
|
+
}
|
41
|
+
aod(const aod& other):
|
42
|
+
allocator_(other.allocator_),
|
43
|
+
size_(other.size_),
|
44
|
+
array_(allocator_.allocate(size_))
|
45
|
+
{
|
46
|
+
std::copy(other.array_, other.array_ + size_, array_);
|
47
|
+
}
|
48
|
+
aod(aod&& other) noexcept:
|
49
|
+
allocator_(std::move(other.allocator_)),
|
50
|
+
size_(other.size_),
|
51
|
+
array_(other.array_)
|
52
|
+
{
|
53
|
+
other.array_ = nullptr;
|
54
|
+
}
|
55
|
+
~aod() {
|
56
|
+
if (array_ != nullptr) allocator_.deallocate(array_, size_);
|
57
|
+
}
|
58
|
+
aod& operator=(const aod& other) {
|
59
|
+
aod copy(other);
|
60
|
+
std::swap(allocator_, copy.allocator_);
|
61
|
+
std::swap(size_, copy.size_);
|
62
|
+
std::swap(array_, copy.array_);
|
63
|
+
return *this;
|
64
|
+
}
|
65
|
+
aod& operator=(aod&& other) {
|
66
|
+
std::swap(allocator_, other.allocator_);
|
67
|
+
std::swap(size_, other.size_);
|
68
|
+
std::swap(array_, other.array_);
|
69
|
+
return *this;
|
70
|
+
}
|
71
|
+
double& operator[](size_t index) { return array_[index]; }
|
72
|
+
double operator[](size_t index) const { return array_[index]; }
|
73
|
+
uint8_t size() const { return size_; }
|
74
|
+
double* data() { return array_; }
|
75
|
+
const double* data() const { return array_; }
|
76
|
+
bool operator==(const aod& other) const {
|
77
|
+
for (uint8_t i = 0; i < size_; ++i) if (array_[i] != other.array_[i]) return false;
|
78
|
+
return true;
|
79
|
+
}
|
80
|
+
private:
|
81
|
+
Allocator allocator_;
|
82
|
+
uint8_t size_;
|
83
|
+
double* array_;
|
84
|
+
};
|
85
|
+
|
86
|
+
template<typename A = std::allocator<double>>
|
87
|
+
class array_of_doubles_update_policy {
|
88
|
+
public:
|
89
|
+
array_of_doubles_update_policy(uint8_t num_values = 1, const A& allocator = A()):
|
90
|
+
allocator_(allocator), num_values_(num_values) {}
|
91
|
+
aod<A> create() const {
|
92
|
+
return aod<A>(num_values_, allocator_);
|
93
|
+
}
|
94
|
+
template<typename InputVector> // to allow any type with indexed access (such as double*)
|
95
|
+
void update(aod<A>& summary, const InputVector& update) const {
|
96
|
+
for (uint8_t i = 0; i < num_values_; ++i) summary[i] += update[i];
|
97
|
+
}
|
98
|
+
uint8_t get_num_values() const {
|
99
|
+
return num_values_;
|
100
|
+
}
|
101
|
+
|
102
|
+
private:
|
103
|
+
A allocator_;
|
104
|
+
uint8_t num_values_;
|
105
|
+
};
|
106
|
+
|
107
|
+
// forward declaration
|
108
|
+
template<typename A> class compact_array_of_doubles_sketch_alloc;
|
109
|
+
|
110
|
+
template<typename A> using AllocAOD = typename std::allocator_traits<A>::template rebind_alloc<aod<A>>;
|
111
|
+
|
112
|
+
template<typename A = std::allocator<double>>
|
113
|
+
class update_array_of_doubles_sketch_alloc: public update_tuple_sketch<aod<A>, aod<A>, array_of_doubles_update_policy<A>, AllocAOD<A>> {
|
114
|
+
public:
|
115
|
+
using Base = update_tuple_sketch<aod<A>, aod<A>, array_of_doubles_update_policy<A>, AllocAOD<A>>;
|
116
|
+
using resize_factor = typename Base::resize_factor;
|
117
|
+
|
118
|
+
class builder;
|
119
|
+
|
120
|
+
compact_array_of_doubles_sketch_alloc<A> compact(bool ordered = true) const;
|
121
|
+
uint8_t get_num_values() const;
|
122
|
+
|
123
|
+
private:
|
124
|
+
// for builder
|
125
|
+
update_array_of_doubles_sketch_alloc(uint8_t lg_cur_size, uint8_t lg_nom_size, resize_factor rf, uint64_t theta,
|
126
|
+
uint64_t seed, const array_of_doubles_update_policy<A>& policy, const A& allocator);
|
127
|
+
};
|
128
|
+
|
129
|
+
// alias with the default allocator for convenience
|
130
|
+
using update_array_of_doubles_sketch = update_array_of_doubles_sketch_alloc<>;
|
131
|
+
|
132
|
+
template<typename A>
|
133
|
+
class update_array_of_doubles_sketch_alloc<A>::builder: public tuple_base_builder<builder, array_of_doubles_update_policy<A>, A> {
|
134
|
+
public:
|
135
|
+
builder(const array_of_doubles_update_policy<A>& policy = array_of_doubles_update_policy<A>(), const A& allocator = A());
|
136
|
+
update_array_of_doubles_sketch_alloc<A> build() const;
|
137
|
+
};
|
138
|
+
|
139
|
+
template<typename A = std::allocator<double>>
|
140
|
+
class compact_array_of_doubles_sketch_alloc: public compact_tuple_sketch<aod<A>, AllocAOD<A>> {
|
141
|
+
public:
|
142
|
+
using Base = compact_tuple_sketch<aod<A>, AllocAOD<A>>;
|
143
|
+
using Entry = typename Base::Entry;
|
144
|
+
using AllocEntry = typename Base::AllocEntry;
|
145
|
+
using AllocU64 = typename Base::AllocU64;
|
146
|
+
using vector_bytes = typename Base::vector_bytes;
|
147
|
+
|
148
|
+
static const uint8_t SERIAL_VERSION = 1;
|
149
|
+
static const uint8_t SKETCH_FAMILY = 9;
|
150
|
+
static const uint8_t SKETCH_TYPE = 3;
|
151
|
+
enum flags { UNUSED1, UNUSED2, IS_EMPTY, HAS_ENTRIES, IS_ORDERED };
|
152
|
+
|
153
|
+
template<typename Sketch>
|
154
|
+
compact_array_of_doubles_sketch_alloc(const Sketch& other, bool ordered = true);
|
155
|
+
|
156
|
+
uint8_t get_num_values() const;
|
157
|
+
|
158
|
+
void serialize(std::ostream& os) const;
|
159
|
+
vector_bytes serialize(unsigned header_size_bytes = 0) const;
|
160
|
+
|
161
|
+
static compact_array_of_doubles_sketch_alloc deserialize(std::istream& is, uint64_t seed = DEFAULT_SEED, const A& allocator = A());
|
162
|
+
static compact_array_of_doubles_sketch_alloc deserialize(const void* bytes, size_t size, uint64_t seed = DEFAULT_SEED,
|
163
|
+
const A& allocator = A());
|
164
|
+
|
165
|
+
// for internal use
|
166
|
+
compact_array_of_doubles_sketch_alloc(bool is_empty, bool is_ordered, uint16_t seed_hash, uint64_t theta, std::vector<Entry, AllocEntry>&& entries, uint8_t num_values);
|
167
|
+
compact_array_of_doubles_sketch_alloc(uint8_t num_values, Base&& base);
|
168
|
+
private:
|
169
|
+
uint8_t num_values_;
|
170
|
+
};
|
171
|
+
|
172
|
+
// alias with the default allocator for convenience
|
173
|
+
using compact_array_of_doubles_sketch = compact_array_of_doubles_sketch_alloc<>;
|
174
|
+
|
175
|
+
} /* namespace datasketches */
|
176
|
+
|
177
|
+
#include "array_of_doubles_sketch_impl.hpp"
|
178
|
+
|
179
|
+
#endif
|