datasketches 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/CHANGELOG.md +3 -0
- data/LICENSE +310 -0
- data/NOTICE +11 -0
- data/README.md +126 -0
- data/ext/datasketches/cpc_wrapper.cpp +50 -0
- data/ext/datasketches/ext.cpp +12 -0
- data/ext/datasketches/extconf.rb +11 -0
- data/ext/datasketches/hll_wrapper.cpp +69 -0
- data/lib/datasketches.rb +9 -0
- data/lib/datasketches/version.rb +3 -0
- data/vendor/datasketches-cpp/CMakeLists.txt +126 -0
- data/vendor/datasketches-cpp/LICENSE +311 -0
- data/vendor/datasketches-cpp/MANIFEST.in +19 -0
- data/vendor/datasketches-cpp/NOTICE +11 -0
- data/vendor/datasketches-cpp/README.md +42 -0
- data/vendor/datasketches-cpp/common/CMakeLists.txt +45 -0
- data/vendor/datasketches-cpp/common/include/MurmurHash3.h +173 -0
- data/vendor/datasketches-cpp/common/include/binomial_bounds.hpp +458 -0
- data/vendor/datasketches-cpp/common/include/bounds_binomial_proportions.hpp +291 -0
- data/vendor/datasketches-cpp/common/include/ceiling_power_of_2.hpp +41 -0
- data/vendor/datasketches-cpp/common/include/common_defs.hpp +51 -0
- data/vendor/datasketches-cpp/common/include/conditional_back_inserter.hpp +68 -0
- data/vendor/datasketches-cpp/common/include/conditional_forward.hpp +70 -0
- data/vendor/datasketches-cpp/common/include/count_zeros.hpp +114 -0
- data/vendor/datasketches-cpp/common/include/inv_pow2_table.hpp +107 -0
- data/vendor/datasketches-cpp/common/include/memory_operations.hpp +57 -0
- data/vendor/datasketches-cpp/common/include/serde.hpp +196 -0
- data/vendor/datasketches-cpp/common/test/CMakeLists.txt +38 -0
- data/vendor/datasketches-cpp/common/test/catch.hpp +17618 -0
- data/vendor/datasketches-cpp/common/test/catch_runner.cpp +7 -0
- data/vendor/datasketches-cpp/common/test/test_allocator.cpp +31 -0
- data/vendor/datasketches-cpp/common/test/test_allocator.hpp +108 -0
- data/vendor/datasketches-cpp/common/test/test_runner.cpp +29 -0
- data/vendor/datasketches-cpp/common/test/test_type.hpp +137 -0
- data/vendor/datasketches-cpp/cpc/CMakeLists.txt +74 -0
- data/vendor/datasketches-cpp/cpc/include/compression_data.hpp +6022 -0
- data/vendor/datasketches-cpp/cpc/include/cpc_common.hpp +62 -0
- data/vendor/datasketches-cpp/cpc/include/cpc_compressor.hpp +147 -0
- data/vendor/datasketches-cpp/cpc/include/cpc_compressor_impl.hpp +742 -0
- data/vendor/datasketches-cpp/cpc/include/cpc_confidence.hpp +167 -0
- data/vendor/datasketches-cpp/cpc/include/cpc_sketch.hpp +311 -0
- data/vendor/datasketches-cpp/cpc/include/cpc_sketch_impl.hpp +810 -0
- data/vendor/datasketches-cpp/cpc/include/cpc_union.hpp +102 -0
- data/vendor/datasketches-cpp/cpc/include/cpc_union_impl.hpp +346 -0
- data/vendor/datasketches-cpp/cpc/include/cpc_util.hpp +137 -0
- data/vendor/datasketches-cpp/cpc/include/icon_estimator.hpp +274 -0
- data/vendor/datasketches-cpp/cpc/include/kxp_byte_lookup.hpp +81 -0
- data/vendor/datasketches-cpp/cpc/include/u32_table.hpp +84 -0
- data/vendor/datasketches-cpp/cpc/include/u32_table_impl.hpp +266 -0
- data/vendor/datasketches-cpp/cpc/test/CMakeLists.txt +44 -0
- data/vendor/datasketches-cpp/cpc/test/compression_test.cpp +67 -0
- data/vendor/datasketches-cpp/cpc/test/cpc_sketch_test.cpp +381 -0
- data/vendor/datasketches-cpp/cpc/test/cpc_union_test.cpp +149 -0
- data/vendor/datasketches-cpp/fi/CMakeLists.txt +54 -0
- data/vendor/datasketches-cpp/fi/include/frequent_items_sketch.hpp +319 -0
- data/vendor/datasketches-cpp/fi/include/frequent_items_sketch_impl.hpp +484 -0
- data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map.hpp +114 -0
- data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map_impl.hpp +345 -0
- data/vendor/datasketches-cpp/fi/test/CMakeLists.txt +44 -0
- data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_custom_type_test.cpp +84 -0
- data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_test.cpp +360 -0
- data/vendor/datasketches-cpp/fi/test/items_sketch_string_from_java.sk +0 -0
- data/vendor/datasketches-cpp/fi/test/items_sketch_string_utf8_from_java.sk +0 -0
- data/vendor/datasketches-cpp/fi/test/longs_sketch_from_java.sk +0 -0
- data/vendor/datasketches-cpp/fi/test/reverse_purge_hash_map_test.cpp +47 -0
- data/vendor/datasketches-cpp/hll/CMakeLists.txt +92 -0
- data/vendor/datasketches-cpp/hll/include/AuxHashMap-internal.hpp +303 -0
- data/vendor/datasketches-cpp/hll/include/AuxHashMap.hpp +83 -0
- data/vendor/datasketches-cpp/hll/include/CompositeInterpolationXTable-internal.hpp +811 -0
- data/vendor/datasketches-cpp/hll/include/CompositeInterpolationXTable.hpp +40 -0
- data/vendor/datasketches-cpp/hll/include/CouponHashSet-internal.hpp +291 -0
- data/vendor/datasketches-cpp/hll/include/CouponHashSet.hpp +59 -0
- data/vendor/datasketches-cpp/hll/include/CouponList-internal.hpp +417 -0
- data/vendor/datasketches-cpp/hll/include/CouponList.hpp +91 -0
- data/vendor/datasketches-cpp/hll/include/CubicInterpolation-internal.hpp +233 -0
- data/vendor/datasketches-cpp/hll/include/CubicInterpolation.hpp +43 -0
- data/vendor/datasketches-cpp/hll/include/HarmonicNumbers-internal.hpp +90 -0
- data/vendor/datasketches-cpp/hll/include/HarmonicNumbers.hpp +48 -0
- data/vendor/datasketches-cpp/hll/include/Hll4Array-internal.hpp +335 -0
- data/vendor/datasketches-cpp/hll/include/Hll4Array.hpp +69 -0
- data/vendor/datasketches-cpp/hll/include/Hll6Array-internal.hpp +124 -0
- data/vendor/datasketches-cpp/hll/include/Hll6Array.hpp +55 -0
- data/vendor/datasketches-cpp/hll/include/Hll8Array-internal.hpp +158 -0
- data/vendor/datasketches-cpp/hll/include/Hll8Array.hpp +56 -0
- data/vendor/datasketches-cpp/hll/include/HllArray-internal.hpp +706 -0
- data/vendor/datasketches-cpp/hll/include/HllArray.hpp +136 -0
- data/vendor/datasketches-cpp/hll/include/HllSketch-internal.hpp +462 -0
- data/vendor/datasketches-cpp/hll/include/HllSketchImpl-internal.hpp +149 -0
- data/vendor/datasketches-cpp/hll/include/HllSketchImpl.hpp +85 -0
- data/vendor/datasketches-cpp/hll/include/HllSketchImplFactory.hpp +170 -0
- data/vendor/datasketches-cpp/hll/include/HllUnion-internal.hpp +287 -0
- data/vendor/datasketches-cpp/hll/include/HllUtil.hpp +239 -0
- data/vendor/datasketches-cpp/hll/include/RelativeErrorTables-internal.hpp +112 -0
- data/vendor/datasketches-cpp/hll/include/RelativeErrorTables.hpp +46 -0
- data/vendor/datasketches-cpp/hll/include/coupon_iterator-internal.hpp +56 -0
- data/vendor/datasketches-cpp/hll/include/coupon_iterator.hpp +43 -0
- data/vendor/datasketches-cpp/hll/include/hll.hpp +669 -0
- data/vendor/datasketches-cpp/hll/include/hll.private.hpp +32 -0
- data/vendor/datasketches-cpp/hll/test/AuxHashMapTest.cpp +79 -0
- data/vendor/datasketches-cpp/hll/test/CMakeLists.txt +51 -0
- data/vendor/datasketches-cpp/hll/test/CouponHashSetTest.cpp +130 -0
- data/vendor/datasketches-cpp/hll/test/CouponListTest.cpp +181 -0
- data/vendor/datasketches-cpp/hll/test/CrossCountingTest.cpp +93 -0
- data/vendor/datasketches-cpp/hll/test/HllArrayTest.cpp +191 -0
- data/vendor/datasketches-cpp/hll/test/HllSketchTest.cpp +389 -0
- data/vendor/datasketches-cpp/hll/test/HllUnionTest.cpp +313 -0
- data/vendor/datasketches-cpp/hll/test/IsomorphicTest.cpp +141 -0
- data/vendor/datasketches-cpp/hll/test/TablesTest.cpp +44 -0
- data/vendor/datasketches-cpp/hll/test/ToFromByteArrayTest.cpp +168 -0
- data/vendor/datasketches-cpp/hll/test/array6_from_java.sk +0 -0
- data/vendor/datasketches-cpp/hll/test/compact_array4_from_java.sk +0 -0
- data/vendor/datasketches-cpp/hll/test/compact_set_from_java.sk +0 -0
- data/vendor/datasketches-cpp/hll/test/list_from_java.sk +0 -0
- data/vendor/datasketches-cpp/hll/test/updatable_array4_from_java.sk +0 -0
- data/vendor/datasketches-cpp/hll/test/updatable_set_from_java.sk +0 -0
- data/vendor/datasketches-cpp/kll/CMakeLists.txt +58 -0
- data/vendor/datasketches-cpp/kll/include/kll_helper.hpp +150 -0
- data/vendor/datasketches-cpp/kll/include/kll_helper_impl.hpp +319 -0
- data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator.hpp +67 -0
- data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator_impl.hpp +169 -0
- data/vendor/datasketches-cpp/kll/include/kll_sketch.hpp +559 -0
- data/vendor/datasketches-cpp/kll/include/kll_sketch_impl.hpp +1131 -0
- data/vendor/datasketches-cpp/kll/test/CMakeLists.txt +44 -0
- data/vendor/datasketches-cpp/kll/test/kll_sketch_custom_type_test.cpp +154 -0
- data/vendor/datasketches-cpp/kll/test/kll_sketch_float_one_item_v1.sk +0 -0
- data/vendor/datasketches-cpp/kll/test/kll_sketch_from_java.sk +0 -0
- data/vendor/datasketches-cpp/kll/test/kll_sketch_test.cpp +685 -0
- data/vendor/datasketches-cpp/kll/test/kll_sketch_validation.cpp +229 -0
- data/vendor/datasketches-cpp/pyproject.toml +17 -0
- data/vendor/datasketches-cpp/python/CMakeLists.txt +61 -0
- data/vendor/datasketches-cpp/python/README.md +78 -0
- data/vendor/datasketches-cpp/python/jupyter/CPCSketch.ipynb +345 -0
- data/vendor/datasketches-cpp/python/jupyter/FrequentItemsSketch.ipynb +354 -0
- data/vendor/datasketches-cpp/python/jupyter/HLLSketch.ipynb +346 -0
- data/vendor/datasketches-cpp/python/jupyter/KLLSketch.ipynb +463 -0
- data/vendor/datasketches-cpp/python/jupyter/ThetaSketchNotebook.ipynb +396 -0
- data/vendor/datasketches-cpp/python/src/__init__.py +2 -0
- data/vendor/datasketches-cpp/python/src/cpc_wrapper.cpp +90 -0
- data/vendor/datasketches-cpp/python/src/datasketches.cpp +40 -0
- data/vendor/datasketches-cpp/python/src/fi_wrapper.cpp +123 -0
- data/vendor/datasketches-cpp/python/src/hll_wrapper.cpp +136 -0
- data/vendor/datasketches-cpp/python/src/kll_wrapper.cpp +209 -0
- data/vendor/datasketches-cpp/python/src/theta_wrapper.cpp +162 -0
- data/vendor/datasketches-cpp/python/src/vector_of_kll.cpp +488 -0
- data/vendor/datasketches-cpp/python/src/vo_wrapper.cpp +140 -0
- data/vendor/datasketches-cpp/python/tests/__init__.py +0 -0
- data/vendor/datasketches-cpp/python/tests/cpc_test.py +64 -0
- data/vendor/datasketches-cpp/python/tests/fi_test.py +110 -0
- data/vendor/datasketches-cpp/python/tests/hll_test.py +131 -0
- data/vendor/datasketches-cpp/python/tests/kll_test.py +119 -0
- data/vendor/datasketches-cpp/python/tests/theta_test.py +121 -0
- data/vendor/datasketches-cpp/python/tests/vector_of_kll_test.py +148 -0
- data/vendor/datasketches-cpp/python/tests/vo_test.py +101 -0
- data/vendor/datasketches-cpp/sampling/CMakeLists.txt +48 -0
- data/vendor/datasketches-cpp/sampling/include/var_opt_sketch.hpp +392 -0
- data/vendor/datasketches-cpp/sampling/include/var_opt_sketch_impl.hpp +1752 -0
- data/vendor/datasketches-cpp/sampling/include/var_opt_union.hpp +239 -0
- data/vendor/datasketches-cpp/sampling/include/var_opt_union_impl.hpp +645 -0
- data/vendor/datasketches-cpp/sampling/test/CMakeLists.txt +43 -0
- data/vendor/datasketches-cpp/sampling/test/binaries_from_java.txt +67 -0
- data/vendor/datasketches-cpp/sampling/test/var_opt_sketch_test.cpp +509 -0
- data/vendor/datasketches-cpp/sampling/test/var_opt_union_test.cpp +358 -0
- data/vendor/datasketches-cpp/sampling/test/varopt_sketch_long_sampling.sk +0 -0
- data/vendor/datasketches-cpp/sampling/test/varopt_sketch_string_exact.sk +0 -0
- data/vendor/datasketches-cpp/sampling/test/varopt_union_double_sampling.sk +0 -0
- data/vendor/datasketches-cpp/setup.py +94 -0
- data/vendor/datasketches-cpp/theta/CMakeLists.txt +57 -0
- data/vendor/datasketches-cpp/theta/include/theta_a_not_b.hpp +73 -0
- data/vendor/datasketches-cpp/theta/include/theta_a_not_b_impl.hpp +83 -0
- data/vendor/datasketches-cpp/theta/include/theta_intersection.hpp +88 -0
- data/vendor/datasketches-cpp/theta/include/theta_intersection_impl.hpp +130 -0
- data/vendor/datasketches-cpp/theta/include/theta_sketch.hpp +533 -0
- data/vendor/datasketches-cpp/theta/include/theta_sketch_impl.hpp +939 -0
- data/vendor/datasketches-cpp/theta/include/theta_union.hpp +122 -0
- data/vendor/datasketches-cpp/theta/include/theta_union_impl.hpp +109 -0
- data/vendor/datasketches-cpp/theta/test/CMakeLists.txt +45 -0
- data/vendor/datasketches-cpp/theta/test/theta_a_not_b_test.cpp +244 -0
- data/vendor/datasketches-cpp/theta/test/theta_compact_empty_from_java.sk +0 -0
- data/vendor/datasketches-cpp/theta/test/theta_compact_estimation_from_java.sk +0 -0
- data/vendor/datasketches-cpp/theta/test/theta_compact_single_item_from_java.sk +0 -0
- data/vendor/datasketches-cpp/theta/test/theta_intersection_test.cpp +218 -0
- data/vendor/datasketches-cpp/theta/test/theta_sketch_test.cpp +438 -0
- data/vendor/datasketches-cpp/theta/test/theta_union_test.cpp +97 -0
- data/vendor/datasketches-cpp/theta/test/theta_update_empty_from_java.sk +0 -0
- data/vendor/datasketches-cpp/theta/test/theta_update_estimation_from_java.sk +0 -0
- data/vendor/datasketches-cpp/tuple/CMakeLists.txt +104 -0
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_a_not_b.hpp +52 -0
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_a_not_b_impl.hpp +32 -0
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_intersection.hpp +52 -0
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_intersection_impl.hpp +31 -0
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_sketch.hpp +179 -0
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_sketch_impl.hpp +238 -0
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_union.hpp +81 -0
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_union_impl.hpp +43 -0
- data/vendor/datasketches-cpp/tuple/include/bounds_on_ratios_in_sampled_sets.hpp +135 -0
- data/vendor/datasketches-cpp/tuple/include/bounds_on_ratios_in_theta_sketched_sets.hpp +135 -0
- data/vendor/datasketches-cpp/tuple/include/jaccard_similarity.hpp +172 -0
- data/vendor/datasketches-cpp/tuple/include/theta_a_not_b_experimental.hpp +53 -0
- data/vendor/datasketches-cpp/tuple/include/theta_a_not_b_experimental_impl.hpp +33 -0
- data/vendor/datasketches-cpp/tuple/include/theta_comparators.hpp +48 -0
- data/vendor/datasketches-cpp/tuple/include/theta_constants.hpp +34 -0
- data/vendor/datasketches-cpp/tuple/include/theta_helpers.hpp +54 -0
- data/vendor/datasketches-cpp/tuple/include/theta_intersection_base.hpp +59 -0
- data/vendor/datasketches-cpp/tuple/include/theta_intersection_base_impl.hpp +121 -0
- data/vendor/datasketches-cpp/tuple/include/theta_intersection_experimental.hpp +78 -0
- data/vendor/datasketches-cpp/tuple/include/theta_intersection_experimental_impl.hpp +43 -0
- data/vendor/datasketches-cpp/tuple/include/theta_set_difference_base.hpp +54 -0
- data/vendor/datasketches-cpp/tuple/include/theta_set_difference_base_impl.hpp +80 -0
- data/vendor/datasketches-cpp/tuple/include/theta_sketch_experimental.hpp +393 -0
- data/vendor/datasketches-cpp/tuple/include/theta_sketch_experimental_impl.hpp +481 -0
- data/vendor/datasketches-cpp/tuple/include/theta_union_base.hpp +60 -0
- data/vendor/datasketches-cpp/tuple/include/theta_union_base_impl.hpp +84 -0
- data/vendor/datasketches-cpp/tuple/include/theta_union_experimental.hpp +88 -0
- data/vendor/datasketches-cpp/tuple/include/theta_union_experimental_impl.hpp +47 -0
- data/vendor/datasketches-cpp/tuple/include/theta_update_sketch_base.hpp +259 -0
- data/vendor/datasketches-cpp/tuple/include/theta_update_sketch_base_impl.hpp +389 -0
- data/vendor/datasketches-cpp/tuple/include/tuple_a_not_b.hpp +57 -0
- data/vendor/datasketches-cpp/tuple/include/tuple_a_not_b_impl.hpp +33 -0
- data/vendor/datasketches-cpp/tuple/include/tuple_intersection.hpp +104 -0
- data/vendor/datasketches-cpp/tuple/include/tuple_intersection_impl.hpp +43 -0
- data/vendor/datasketches-cpp/tuple/include/tuple_sketch.hpp +496 -0
- data/vendor/datasketches-cpp/tuple/include/tuple_sketch_impl.hpp +587 -0
- data/vendor/datasketches-cpp/tuple/include/tuple_union.hpp +109 -0
- data/vendor/datasketches-cpp/tuple/include/tuple_union_impl.hpp +47 -0
- data/vendor/datasketches-cpp/tuple/test/CMakeLists.txt +53 -0
- data/vendor/datasketches-cpp/tuple/test/aod_1_compact_empty_from_java.sk +1 -0
- data/vendor/datasketches-cpp/tuple/test/aod_1_compact_estimation_from_java.sk +0 -0
- data/vendor/datasketches-cpp/tuple/test/aod_1_compact_non_empty_no_entries_from_java.sk +0 -0
- data/vendor/datasketches-cpp/tuple/test/aod_2_compact_exact_from_java.sk +0 -0
- data/vendor/datasketches-cpp/tuple/test/aod_3_compact_empty_from_java.sk +1 -0
- data/vendor/datasketches-cpp/tuple/test/array_of_doubles_sketch_test.cpp +298 -0
- data/vendor/datasketches-cpp/tuple/test/theta_a_not_b_experimental_test.cpp +250 -0
- data/vendor/datasketches-cpp/tuple/test/theta_compact_empty_from_java.sk +0 -0
- data/vendor/datasketches-cpp/tuple/test/theta_compact_estimation_from_java.sk +0 -0
- data/vendor/datasketches-cpp/tuple/test/theta_compact_single_item_from_java.sk +0 -0
- data/vendor/datasketches-cpp/tuple/test/theta_intersection_experimental_test.cpp +224 -0
- data/vendor/datasketches-cpp/tuple/test/theta_jaccard_similarity_test.cpp +144 -0
- data/vendor/datasketches-cpp/tuple/test/theta_sketch_experimental_test.cpp +247 -0
- data/vendor/datasketches-cpp/tuple/test/theta_union_experimental_test.cpp +44 -0
- data/vendor/datasketches-cpp/tuple/test/tuple_a_not_b_test.cpp +289 -0
- data/vendor/datasketches-cpp/tuple/test/tuple_intersection_test.cpp +235 -0
- data/vendor/datasketches-cpp/tuple/test/tuple_jaccard_similarity_test.cpp +98 -0
- data/vendor/datasketches-cpp/tuple/test/tuple_sketch_allocation_test.cpp +102 -0
- data/vendor/datasketches-cpp/tuple/test/tuple_sketch_test.cpp +249 -0
- data/vendor/datasketches-cpp/tuple/test/tuple_union_test.cpp +187 -0
- metadata +302 -0
|
@@ -0,0 +1,167 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* Licensed to the Apache Software Foundation (ASF) under one
|
|
3
|
+
* or more contributor license agreements. See the NOTICE file
|
|
4
|
+
* distributed with this work for additional information
|
|
5
|
+
* regarding copyright ownership. The ASF licenses this file
|
|
6
|
+
* to you under the Apache License, Version 2.0 (the
|
|
7
|
+
* "License"); you may not use this file except in compliance
|
|
8
|
+
* with the License. You may obtain a copy of the License at
|
|
9
|
+
*
|
|
10
|
+
* http://www.apache.org/licenses/LICENSE-2.0
|
|
11
|
+
*
|
|
12
|
+
* Unless required by applicable law or agreed to in writing,
|
|
13
|
+
* software distributed under the License is distributed on an
|
|
14
|
+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
15
|
+
* KIND, either express or implied. See the License for the
|
|
16
|
+
* specific language governing permissions and limitations
|
|
17
|
+
* under the License.
|
|
18
|
+
*/
|
|
19
|
+
|
|
20
|
+
// author Kevin Lang, Oath Research
|
|
21
|
+
|
|
22
|
+
#ifndef CPC_CONFIDENCE_HPP_
|
|
23
|
+
#define CPC_CONFIDENCE_HPP_
|
|
24
|
+
|
|
25
|
+
#include <cmath>
|
|
26
|
+
|
|
27
|
+
#include "cpc_sketch.hpp"
|
|
28
|
+
|
|
29
|
+
namespace datasketches {
|
|
30
|
+
|
|
31
|
+
// ln 2.0
|
|
32
|
+
static const double ICON_ERROT_CONSTANT = 0.693147180559945286;
|
|
33
|
+
|
|
34
|
+
// 1, 2, 3, // kappa
|
|
35
|
+
static const int16_t ICON_LOW_SIDE_DATA [33] = { // Empirically measured at N = 1000 * K.
|
|
36
|
+
6037, 5720, 5328, // 4 1000000
|
|
37
|
+
6411, 6262, 5682, // 5 1000000
|
|
38
|
+
6724, 6403, 6127, // 6 1000000
|
|
39
|
+
6665, 6411, 6208, // 7 1000000
|
|
40
|
+
6959, 6525, 6427, // 8 1000000
|
|
41
|
+
6892, 6665, 6619, // 9 1000000
|
|
42
|
+
6792, 6752, 6690, // 10 1000000
|
|
43
|
+
6899, 6818, 6708, // 11 1000000
|
|
44
|
+
6871, 6845, 6812, // 12 1046369
|
|
45
|
+
6909, 6861, 6828, // 13 1043411
|
|
46
|
+
6919, 6897, 6842, // 14 1000297
|
|
47
|
+
}; // lgK numtrials
|
|
48
|
+
|
|
49
|
+
// 1, 2, 3, // kappa
|
|
50
|
+
static const int16_t ICON_HIGH_SIDE_DATA [33] = { // Empirically measured at N = 1000 * K.
|
|
51
|
+
8031, 8559, 9309, // 4 1000000
|
|
52
|
+
7084, 7959, 8660, // 5 1000000
|
|
53
|
+
7141, 7514, 7876, // 6 1000000
|
|
54
|
+
7458, 7430, 7572, // 7 1000000
|
|
55
|
+
6892, 7141, 7497, // 8 1000000
|
|
56
|
+
6889, 7132, 7290, // 9 1000000
|
|
57
|
+
7075, 7118, 7185, // 10 1000000
|
|
58
|
+
7040, 7047, 7085, // 11 1000000
|
|
59
|
+
6993, 7019, 7053, // 12 1046369
|
|
60
|
+
6953, 7001, 6983, // 13 1043411
|
|
61
|
+
6944, 6966, 7004, // 14 1000297
|
|
62
|
+
}; // lgK numtrials
|
|
63
|
+
|
|
64
|
+
// sqrt((ln 2.0) / 2.0)
|
|
65
|
+
static const double HIP_ERROR_CONSTANT = 0.588705011257737332;
|
|
66
|
+
|
|
67
|
+
// 1, 2, 3, // kappa
|
|
68
|
+
static const int16_t HIP_LOW_SIDE_DATA [33] = { // Empirically measured at N = 1000 * K.
|
|
69
|
+
5871, 5247, 4826, // 4 1000000
|
|
70
|
+
5877, 5403, 5070, // 5 1000000
|
|
71
|
+
5873, 5533, 5304, // 6 1000000
|
|
72
|
+
5878, 5632, 5464, // 7 1000000
|
|
73
|
+
5874, 5690, 5564, // 8 1000000
|
|
74
|
+
5880, 5745, 5619, // 9 1000000
|
|
75
|
+
5875, 5784, 5701, // 10 1000000
|
|
76
|
+
5866, 5789, 5742, // 11 1000000
|
|
77
|
+
5869, 5827, 5784, // 12 1046369
|
|
78
|
+
5876, 5860, 5827, // 13 1043411
|
|
79
|
+
5881, 5853, 5842, // 14 1000297
|
|
80
|
+
}; // lgK numtrials
|
|
81
|
+
|
|
82
|
+
// 1, 2, 3, // kappa
|
|
83
|
+
static const int16_t HIP_HIGH_SIDE_DATA [33] = { // Empirically measured at N = 1000 * K.
|
|
84
|
+
5855, 6688, 7391, // 4 1000000
|
|
85
|
+
5886, 6444, 6923, // 5 1000000
|
|
86
|
+
5885, 6254, 6594, // 6 1000000
|
|
87
|
+
5889, 6134, 6326, // 7 1000000
|
|
88
|
+
5900, 6072, 6203, // 8 1000000
|
|
89
|
+
5875, 6005, 6089, // 9 1000000
|
|
90
|
+
5871, 5980, 6040, // 10 1000000
|
|
91
|
+
5889, 5941, 6015, // 11 1000000
|
|
92
|
+
5871, 5926, 5973, // 12 1046369
|
|
93
|
+
5866, 5901, 5915, // 13 1043411
|
|
94
|
+
5880, 5914, 5953, // 14 1000297
|
|
95
|
+
}; // lgK numtrials
|
|
96
|
+
|
|
97
|
+
template<typename A>
|
|
98
|
+
double get_icon_confidence_lb(const cpc_sketch_alloc<A>& sketch, int kappa) {
|
|
99
|
+
if (sketch.get_num_coupons() == 0) return 0.0;
|
|
100
|
+
const int lg_k = sketch.get_lg_k();
|
|
101
|
+
const long k = 1 << lg_k;
|
|
102
|
+
if (lg_k < 4) throw std::logic_error("lgk < 4");
|
|
103
|
+
if (kappa < 1 || kappa > 3) throw std::invalid_argument("kappa must be between 1 and 3");
|
|
104
|
+
double x = ICON_ERROT_CONSTANT;
|
|
105
|
+
if (lg_k <= 14) x = ((double) ICON_HIGH_SIDE_DATA[3 * (lg_k - 4) + (kappa - 1)]) / 10000.0;
|
|
106
|
+
const double rel = x / sqrt(k);
|
|
107
|
+
const double eps = kappa * rel;
|
|
108
|
+
const double est = sketch.get_icon_estimate();
|
|
109
|
+
double result = est / (1.0 + eps);
|
|
110
|
+
const double check = sketch.get_num_coupons();
|
|
111
|
+
if (result < check) result = check;
|
|
112
|
+
return result;
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
template<typename A>
|
|
116
|
+
double get_icon_confidence_ub(const cpc_sketch_alloc<A>& sketch, int kappa) {
|
|
117
|
+
if (sketch.get_num_coupons() == 0) return 0.0;
|
|
118
|
+
const int lg_k = sketch.get_lg_k();
|
|
119
|
+
const long k = 1 << lg_k;
|
|
120
|
+
if (lg_k < 4) throw std::logic_error("lgk < 4");
|
|
121
|
+
if (kappa < 1 || kappa > 3) throw std::invalid_argument("kappa must be between 1 and 3");
|
|
122
|
+
double x = ICON_ERROT_CONSTANT;
|
|
123
|
+
if (lg_k <= 14) x = ((double) ICON_LOW_SIDE_DATA[3 * (lg_k - 4) + (kappa - 1)]) / 10000.0;
|
|
124
|
+
const double rel = x / sqrt(k);
|
|
125
|
+
const double eps = kappa * rel;
|
|
126
|
+
const double est = sketch.get_icon_estimate();
|
|
127
|
+
const double result = est / (1.0 - eps);
|
|
128
|
+
return ceil(result); // widening for coverage
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
template<typename A>
|
|
132
|
+
double get_hip_confidence_lb(const cpc_sketch_alloc<A>& sketch, int kappa) {
|
|
133
|
+
if (sketch.get_num_coupons() == 0) return 0.0;
|
|
134
|
+
const int lg_k = sketch.get_lg_k();
|
|
135
|
+
const long k = 1 << lg_k;
|
|
136
|
+
if (lg_k < 4) throw std::logic_error("lgk < 4");
|
|
137
|
+
if (kappa < 1 || kappa > 3) throw std::invalid_argument("kappa must be between 1 and 3");
|
|
138
|
+
double x = HIP_ERROR_CONSTANT;
|
|
139
|
+
if (lg_k <= 14) x = ((double) HIP_HIGH_SIDE_DATA[3 * (lg_k - 4) + (kappa - 1)]) / 10000.0;
|
|
140
|
+
const double rel = x / (sqrt((double) k));
|
|
141
|
+
const double eps = ((double) kappa) * rel;
|
|
142
|
+
const double est = sketch.get_hip_estimate();
|
|
143
|
+
double result = est / (1.0 + eps);
|
|
144
|
+
const double check = (double) sketch.get_num_coupons();
|
|
145
|
+
if (result < check) result = check;
|
|
146
|
+
return result;
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
template<typename A>
|
|
150
|
+
double get_hip_confidence_ub(const cpc_sketch_alloc<A>& sketch, int kappa) {
|
|
151
|
+
if (sketch.get_num_coupons() == 0) return 0.0;
|
|
152
|
+
const int lg_k = sketch.get_lg_k();
|
|
153
|
+
const long k = 1 << lg_k;
|
|
154
|
+
if (lg_k < 4) throw std::logic_error("lgk < 4");
|
|
155
|
+
if (kappa < 1 || kappa > 3) throw std::invalid_argument("kappa must be between 1 and 3");
|
|
156
|
+
double x = HIP_ERROR_CONSTANT;
|
|
157
|
+
if (lg_k <= 14) x = ((double) HIP_LOW_SIDE_DATA[3 * (lg_k - 4) + (kappa - 1)]) / 10000.0;
|
|
158
|
+
const double rel = x / sqrt(k);
|
|
159
|
+
const double eps = kappa * rel;
|
|
160
|
+
const double est = sketch.get_hip_estimate();
|
|
161
|
+
const double result = est / (1.0 - eps);
|
|
162
|
+
return ceil(result); // widening for coverage
|
|
163
|
+
}
|
|
164
|
+
|
|
165
|
+
} /* namespace datasketches */
|
|
166
|
+
|
|
167
|
+
#endif
|
|
@@ -0,0 +1,311 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* Licensed to the Apache Software Foundation (ASF) under one
|
|
3
|
+
* or more contributor license agreements. See the NOTICE file
|
|
4
|
+
* distributed with this work for additional information
|
|
5
|
+
* regarding copyright ownership. The ASF licenses this file
|
|
6
|
+
* to you under the Apache License, Version 2.0 (the
|
|
7
|
+
* "License"); you may not use this file except in compliance
|
|
8
|
+
* with the License. You may obtain a copy of the License at
|
|
9
|
+
*
|
|
10
|
+
* http://www.apache.org/licenses/LICENSE-2.0
|
|
11
|
+
*
|
|
12
|
+
* Unless required by applicable law or agreed to in writing,
|
|
13
|
+
* software distributed under the License is distributed on an
|
|
14
|
+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
15
|
+
* KIND, either express or implied. See the License for the
|
|
16
|
+
* specific language governing permissions and limitations
|
|
17
|
+
* under the License.
|
|
18
|
+
*/
|
|
19
|
+
|
|
20
|
+
#ifndef CPC_SKETCH_HPP_
|
|
21
|
+
#define CPC_SKETCH_HPP_
|
|
22
|
+
|
|
23
|
+
#include <iostream>
|
|
24
|
+
#include <functional>
|
|
25
|
+
#include <string>
|
|
26
|
+
#include <vector>
|
|
27
|
+
|
|
28
|
+
#include "u32_table.hpp"
|
|
29
|
+
#include "cpc_common.hpp"
|
|
30
|
+
#include "cpc_compressor.hpp"
|
|
31
|
+
#include "cpc_confidence.hpp"
|
|
32
|
+
#include "common_defs.hpp"
|
|
33
|
+
|
|
34
|
+
namespace datasketches {
|
|
35
|
+
|
|
36
|
+
/*
|
|
37
|
+
* High performance C++ implementation of Compressed Probabilistic Counting (CPC) Sketch
|
|
38
|
+
*
|
|
39
|
+
* This is a very compact (in serialized form) distinct counting sketch.
|
|
40
|
+
* The theory is described in the following paper:
|
|
41
|
+
* https://arxiv.org/abs/1708.06839
|
|
42
|
+
*
|
|
43
|
+
* author Kevin Lang
|
|
44
|
+
* author Alexander Saydakov
|
|
45
|
+
*/
|
|
46
|
+
|
|
47
|
+
// forward-declarations
|
|
48
|
+
template<typename A> class cpc_sketch_alloc;
|
|
49
|
+
template<typename A> class cpc_union_alloc;
|
|
50
|
+
|
|
51
|
+
// alias with default allocator for convenience
|
|
52
|
+
typedef cpc_sketch_alloc<std::allocator<void>> cpc_sketch;
|
|
53
|
+
|
|
54
|
+
// allocation and initialization of global decompression (decoding) tables
|
|
55
|
+
// call this before anything else if you want to control the initialization time
|
|
56
|
+
// for instance, to have this happen outside of a transaction context
|
|
57
|
+
// otherwise initialization happens on the first use (serialization or deserialization)
|
|
58
|
+
// it is safe to call more than once assuming no race conditions
|
|
59
|
+
// this is not thread safe! neither is the rest of the library
|
|
60
|
+
template<typename A> void cpc_init();
|
|
61
|
+
|
|
62
|
+
template<typename A>
|
|
63
|
+
class cpc_sketch_alloc {
|
|
64
|
+
public:
|
|
65
|
+
/**
|
|
66
|
+
* Creates an instance of the sketch given the lg_k parameter and hash seed.
|
|
67
|
+
* @param lg_k base 2 logarithm of the number of bins in the sketch
|
|
68
|
+
* @param seed for hash function
|
|
69
|
+
*/
|
|
70
|
+
explicit cpc_sketch_alloc(uint8_t lg_k = CPC_DEFAULT_LG_K, uint64_t seed = DEFAULT_SEED);
|
|
71
|
+
|
|
72
|
+
/**
|
|
73
|
+
* @return configured lg_k of this sketch
|
|
74
|
+
*/
|
|
75
|
+
uint8_t get_lg_k() const;
|
|
76
|
+
|
|
77
|
+
/**
|
|
78
|
+
* @return true if this sketch represents an empty set
|
|
79
|
+
*/
|
|
80
|
+
bool is_empty() const;
|
|
81
|
+
|
|
82
|
+
/**
|
|
83
|
+
* @return estimate of the distinct count of the input stream
|
|
84
|
+
*/
|
|
85
|
+
double get_estimate() const;
|
|
86
|
+
|
|
87
|
+
/**
|
|
88
|
+
* Returns the approximate lower error bound given a parameter kappa (1, 2 or 3).
|
|
89
|
+
* This parameter is similar to the number of standard deviations of the normal distribution
|
|
90
|
+
* and corresponds to approximately 67%, 95% and 99% confidence intervals.
|
|
91
|
+
* @param kappa parameter to specify confidence interval (1, 2 or 3)
|
|
92
|
+
* @return the lower bound
|
|
93
|
+
*/
|
|
94
|
+
double get_lower_bound(unsigned kappa) const;
|
|
95
|
+
|
|
96
|
+
/**
|
|
97
|
+
* Returns the approximate upper error bound given a parameter kappa (1, 2 or 3).
|
|
98
|
+
* This parameter is similar to the number of standard deviations of the normal distribution
|
|
99
|
+
* and corresponds to approximately 67%, 95% and 99% confidence intervals.
|
|
100
|
+
* @param kappa parameter to specify confidence interval (1, 2 or 3)
|
|
101
|
+
* @return the upper bound
|
|
102
|
+
*/
|
|
103
|
+
double get_upper_bound(unsigned kappa) const;
|
|
104
|
+
|
|
105
|
+
/**
|
|
106
|
+
* Update this sketch with a given string.
|
|
107
|
+
* @param value string to update the sketch with
|
|
108
|
+
*/
|
|
109
|
+
void update(const std::string& value);
|
|
110
|
+
|
|
111
|
+
/**
|
|
112
|
+
* Update this sketch with a given unsigned 64-bit integer.
|
|
113
|
+
* @param value uint64_t to update the sketch with
|
|
114
|
+
*/
|
|
115
|
+
void update(uint64_t value);
|
|
116
|
+
|
|
117
|
+
/**
|
|
118
|
+
* Update this sketch with a given signed 64-bit integer.
|
|
119
|
+
* @param value int64_t to update the sketch with
|
|
120
|
+
*/
|
|
121
|
+
void update(int64_t value);
|
|
122
|
+
|
|
123
|
+
/**
|
|
124
|
+
* Update this sketch with a given unsigned 32-bit integer.
|
|
125
|
+
* For compatibility with Java implementation.
|
|
126
|
+
* @param value uint32_t to update the sketch with
|
|
127
|
+
*/
|
|
128
|
+
void update(uint32_t value);
|
|
129
|
+
|
|
130
|
+
/**
|
|
131
|
+
* Update this sketch with a given signed 32-bit integer.
|
|
132
|
+
* For compatibility with Java implementation.
|
|
133
|
+
* @param value int32_t to update the sketch with
|
|
134
|
+
*/
|
|
135
|
+
void update(int32_t value);
|
|
136
|
+
|
|
137
|
+
/**
|
|
138
|
+
* Update this sketch with a given unsigned 16-bit integer.
|
|
139
|
+
* For compatibility with Java implementation.
|
|
140
|
+
* @param value uint16_t to update the sketch with
|
|
141
|
+
*/
|
|
142
|
+
void update(uint16_t value);
|
|
143
|
+
|
|
144
|
+
/**
|
|
145
|
+
* Update this sketch with a given signed 16-bit integer.
|
|
146
|
+
* For compatibility with Java implementation.
|
|
147
|
+
* @param value int16_t to update the sketch with
|
|
148
|
+
*/
|
|
149
|
+
void update(int16_t value);
|
|
150
|
+
|
|
151
|
+
/**
|
|
152
|
+
* Update this sketch with a given unsigned 8-bit integer.
|
|
153
|
+
* For compatibility with Java implementation.
|
|
154
|
+
* @param value uint8_t to update the sketch with
|
|
155
|
+
*/
|
|
156
|
+
void update(uint8_t value);
|
|
157
|
+
|
|
158
|
+
/**
|
|
159
|
+
* Update this sketch with a given signed 8-bit integer.
|
|
160
|
+
* For compatibility with Java implementation.
|
|
161
|
+
* @param value int8_t to update the sketch with
|
|
162
|
+
*/
|
|
163
|
+
void update(int8_t value);
|
|
164
|
+
|
|
165
|
+
/**
|
|
166
|
+
* Update this sketch with a given double-precision floating point value.
|
|
167
|
+
* For compatibility with Java implementation.
|
|
168
|
+
* @param value double to update the sketch with
|
|
169
|
+
*/
|
|
170
|
+
void update(double value);
|
|
171
|
+
|
|
172
|
+
/**
|
|
173
|
+
* Update this sketch with a given floating point value.
|
|
174
|
+
* For compatibility with Java implementation.
|
|
175
|
+
* @param value float to update the sketch with
|
|
176
|
+
*/
|
|
177
|
+
void update(float value);
|
|
178
|
+
|
|
179
|
+
/**
|
|
180
|
+
* Update this sketch with given data of any type.
|
|
181
|
+
* This is a "universal" update that covers all cases above,
|
|
182
|
+
* but may produce different hashes.
|
|
183
|
+
* Be very careful to hash input values consistently using the same approach
|
|
184
|
+
* both over time and on different platforms
|
|
185
|
+
* and while passing sketches between C++ environment and Java environment.
|
|
186
|
+
* Otherwise two sketches that should represent overlapping sets will be disjoint
|
|
187
|
+
* For instance, for signed 32-bit values call update(int32_t) method above,
|
|
188
|
+
* which does widening conversion to int64_t, if compatibility with Java is expected
|
|
189
|
+
* @param data pointer to the data
|
|
190
|
+
* @param length of the data in bytes
|
|
191
|
+
*/
|
|
192
|
+
void update(const void* value, int size);
|
|
193
|
+
|
|
194
|
+
/**
|
|
195
|
+
* Returns a human-readable summary of this sketch
|
|
196
|
+
*/
|
|
197
|
+
string<A> to_string() const;
|
|
198
|
+
|
|
199
|
+
/**
|
|
200
|
+
* This method serializes the sketch into a given stream in a binary form
|
|
201
|
+
* @param os output stream
|
|
202
|
+
*/
|
|
203
|
+
void serialize(std::ostream& os) const;
|
|
204
|
+
|
|
205
|
+
// This is a convenience alias for users
|
|
206
|
+
// The type returned by the following serialize method
|
|
207
|
+
typedef vector_u8<A> vector_bytes;
|
|
208
|
+
|
|
209
|
+
/**
|
|
210
|
+
* This method serializes the sketch as a vector of bytes.
|
|
211
|
+
* An optional header can be reserved in front of the sketch.
|
|
212
|
+
* It is an uninitialized space of a given size.
|
|
213
|
+
* This header is used in Datasketches PostgreSQL extension.
|
|
214
|
+
* @param header_size_bytes space to reserve in front of the sketch
|
|
215
|
+
*/
|
|
216
|
+
vector_bytes serialize(unsigned header_size_bytes = 0) const;
|
|
217
|
+
|
|
218
|
+
/**
|
|
219
|
+
* This method deserializes a sketch from a given stream.
|
|
220
|
+
* @param is input stream
|
|
221
|
+
* @param seed the seed for the hash function that was used to create the sketch
|
|
222
|
+
* @return an instance of a sketch
|
|
223
|
+
*/
|
|
224
|
+
static cpc_sketch_alloc<A> deserialize(std::istream& is, uint64_t seed = DEFAULT_SEED);
|
|
225
|
+
|
|
226
|
+
/**
|
|
227
|
+
* This method deserializes a sketch from a given array of bytes.
|
|
228
|
+
* @param bytes pointer to the array of bytes
|
|
229
|
+
* @param size the size of the array
|
|
230
|
+
* @param seed the seed for the hash function that was used to create the sketch
|
|
231
|
+
* @return an instance of the sketch
|
|
232
|
+
*/
|
|
233
|
+
static cpc_sketch_alloc<A> deserialize(const void* bytes, size_t size, uint64_t seed = DEFAULT_SEED);
|
|
234
|
+
|
|
235
|
+
// for internal use
|
|
236
|
+
uint32_t get_num_coupons() const;
|
|
237
|
+
|
|
238
|
+
// for debugging
|
|
239
|
+
// this should catch some forms of corruption during serialization-deserialization
|
|
240
|
+
bool validate() const;
|
|
241
|
+
|
|
242
|
+
private:
|
|
243
|
+
static const uint8_t SERIAL_VERSION = 1;
|
|
244
|
+
static const uint8_t FAMILY = 16;
|
|
245
|
+
|
|
246
|
+
enum flags { IS_BIG_ENDIAN, IS_COMPRESSED, HAS_HIP, HAS_TABLE, HAS_WINDOW };
|
|
247
|
+
|
|
248
|
+
// Note: except for brief transitional moments, these sketches always obey
|
|
249
|
+
// the following strict mapping between the flavor of a sketch and the
|
|
250
|
+
// number of coupons that it has collected
|
|
251
|
+
enum flavor {
|
|
252
|
+
EMPTY, // 0 == C < 1
|
|
253
|
+
SPARSE, // 1 <= C < 3K/32
|
|
254
|
+
HYBRID, // 3K/32 <= C < K/2
|
|
255
|
+
PINNED, // K/2 <= C < 27K/8 [NB: 27/8 = 3 + 3/8]
|
|
256
|
+
SLIDING // 27K/8 <= C
|
|
257
|
+
};
|
|
258
|
+
|
|
259
|
+
uint8_t lg_k;
|
|
260
|
+
uint64_t seed;
|
|
261
|
+
bool was_merged; // is the sketch the result of merging?
|
|
262
|
+
uint32_t num_coupons; // the number of coupons collected so far
|
|
263
|
+
|
|
264
|
+
u32_table<A> surprising_value_table;
|
|
265
|
+
vector_u8<A> sliding_window;
|
|
266
|
+
uint8_t window_offset; // derivable from num_coupons, but made explicit for speed
|
|
267
|
+
uint8_t first_interesting_column; // This is part of a speed optimization
|
|
268
|
+
|
|
269
|
+
double kxp;
|
|
270
|
+
double hip_est_accum;
|
|
271
|
+
|
|
272
|
+
// for deserialization and cpc_union::get_result()
|
|
273
|
+
cpc_sketch_alloc(uint8_t lg_k, uint32_t num_coupons, uint8_t first_interesting_column, u32_table<A>&& table,
|
|
274
|
+
vector_u8<A>&& window, bool has_hip, double kxp, double hip_est_accum, uint64_t seed);
|
|
275
|
+
|
|
276
|
+
inline void row_col_update(uint32_t row_col);
|
|
277
|
+
inline void update_sparse(uint32_t row_col);
|
|
278
|
+
inline void update_windowed(uint32_t row_col);
|
|
279
|
+
inline void update_hip(uint32_t row_col);
|
|
280
|
+
void promote_sparse_to_windowed();
|
|
281
|
+
void move_window();
|
|
282
|
+
void refresh_kxp(const uint64_t* bit_matrix);
|
|
283
|
+
|
|
284
|
+
friend double get_hip_confidence_lb<A>(const cpc_sketch_alloc<A>& sketch, int kappa);
|
|
285
|
+
friend double get_hip_confidence_ub<A>(const cpc_sketch_alloc<A>& sketch, int kappa);
|
|
286
|
+
friend double get_icon_confidence_lb<A>(const cpc_sketch_alloc<A>& sketch, int kappa);
|
|
287
|
+
friend double get_icon_confidence_ub<A>(const cpc_sketch_alloc<A>& sketch, int kappa);
|
|
288
|
+
double get_hip_estimate() const;
|
|
289
|
+
double get_icon_estimate() const;
|
|
290
|
+
|
|
291
|
+
inline flavor determine_flavor() const;
|
|
292
|
+
static inline flavor determine_flavor(uint8_t lg_k, uint64_t c);
|
|
293
|
+
|
|
294
|
+
static inline uint8_t determine_correct_offset(uint8_t lg_k, uint64_t c);
|
|
295
|
+
|
|
296
|
+
// this produces a full-size k-by-64 bit matrix
|
|
297
|
+
vector_u64<A> build_bit_matrix() const;
|
|
298
|
+
|
|
299
|
+
static uint8_t get_preamble_ints(uint32_t num_coupons, bool has_hip, bool has_table, bool has_window);
|
|
300
|
+
inline void write_hip(std::ostream& os) const;
|
|
301
|
+
inline size_t copy_hip_to_mem(void* dst) const;
|
|
302
|
+
|
|
303
|
+
friend cpc_compressor<A>;
|
|
304
|
+
friend cpc_union_alloc<A>;
|
|
305
|
+
};
|
|
306
|
+
|
|
307
|
+
} /* namespace datasketches */
|
|
308
|
+
|
|
309
|
+
#include "cpc_sketch_impl.hpp"
|
|
310
|
+
|
|
311
|
+
#endif
|