datasketches 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/CHANGELOG.md +3 -0
- data/LICENSE +310 -0
- data/NOTICE +11 -0
- data/README.md +126 -0
- data/ext/datasketches/cpc_wrapper.cpp +50 -0
- data/ext/datasketches/ext.cpp +12 -0
- data/ext/datasketches/extconf.rb +11 -0
- data/ext/datasketches/hll_wrapper.cpp +69 -0
- data/lib/datasketches.rb +9 -0
- data/lib/datasketches/version.rb +3 -0
- data/vendor/datasketches-cpp/CMakeLists.txt +126 -0
- data/vendor/datasketches-cpp/LICENSE +311 -0
- data/vendor/datasketches-cpp/MANIFEST.in +19 -0
- data/vendor/datasketches-cpp/NOTICE +11 -0
- data/vendor/datasketches-cpp/README.md +42 -0
- data/vendor/datasketches-cpp/common/CMakeLists.txt +45 -0
- data/vendor/datasketches-cpp/common/include/MurmurHash3.h +173 -0
- data/vendor/datasketches-cpp/common/include/binomial_bounds.hpp +458 -0
- data/vendor/datasketches-cpp/common/include/bounds_binomial_proportions.hpp +291 -0
- data/vendor/datasketches-cpp/common/include/ceiling_power_of_2.hpp +41 -0
- data/vendor/datasketches-cpp/common/include/common_defs.hpp +51 -0
- data/vendor/datasketches-cpp/common/include/conditional_back_inserter.hpp +68 -0
- data/vendor/datasketches-cpp/common/include/conditional_forward.hpp +70 -0
- data/vendor/datasketches-cpp/common/include/count_zeros.hpp +114 -0
- data/vendor/datasketches-cpp/common/include/inv_pow2_table.hpp +107 -0
- data/vendor/datasketches-cpp/common/include/memory_operations.hpp +57 -0
- data/vendor/datasketches-cpp/common/include/serde.hpp +196 -0
- data/vendor/datasketches-cpp/common/test/CMakeLists.txt +38 -0
- data/vendor/datasketches-cpp/common/test/catch.hpp +17618 -0
- data/vendor/datasketches-cpp/common/test/catch_runner.cpp +7 -0
- data/vendor/datasketches-cpp/common/test/test_allocator.cpp +31 -0
- data/vendor/datasketches-cpp/common/test/test_allocator.hpp +108 -0
- data/vendor/datasketches-cpp/common/test/test_runner.cpp +29 -0
- data/vendor/datasketches-cpp/common/test/test_type.hpp +137 -0
- data/vendor/datasketches-cpp/cpc/CMakeLists.txt +74 -0
- data/vendor/datasketches-cpp/cpc/include/compression_data.hpp +6022 -0
- data/vendor/datasketches-cpp/cpc/include/cpc_common.hpp +62 -0
- data/vendor/datasketches-cpp/cpc/include/cpc_compressor.hpp +147 -0
- data/vendor/datasketches-cpp/cpc/include/cpc_compressor_impl.hpp +742 -0
- data/vendor/datasketches-cpp/cpc/include/cpc_confidence.hpp +167 -0
- data/vendor/datasketches-cpp/cpc/include/cpc_sketch.hpp +311 -0
- data/vendor/datasketches-cpp/cpc/include/cpc_sketch_impl.hpp +810 -0
- data/vendor/datasketches-cpp/cpc/include/cpc_union.hpp +102 -0
- data/vendor/datasketches-cpp/cpc/include/cpc_union_impl.hpp +346 -0
- data/vendor/datasketches-cpp/cpc/include/cpc_util.hpp +137 -0
- data/vendor/datasketches-cpp/cpc/include/icon_estimator.hpp +274 -0
- data/vendor/datasketches-cpp/cpc/include/kxp_byte_lookup.hpp +81 -0
- data/vendor/datasketches-cpp/cpc/include/u32_table.hpp +84 -0
- data/vendor/datasketches-cpp/cpc/include/u32_table_impl.hpp +266 -0
- data/vendor/datasketches-cpp/cpc/test/CMakeLists.txt +44 -0
- data/vendor/datasketches-cpp/cpc/test/compression_test.cpp +67 -0
- data/vendor/datasketches-cpp/cpc/test/cpc_sketch_test.cpp +381 -0
- data/vendor/datasketches-cpp/cpc/test/cpc_union_test.cpp +149 -0
- data/vendor/datasketches-cpp/fi/CMakeLists.txt +54 -0
- data/vendor/datasketches-cpp/fi/include/frequent_items_sketch.hpp +319 -0
- data/vendor/datasketches-cpp/fi/include/frequent_items_sketch_impl.hpp +484 -0
- data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map.hpp +114 -0
- data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map_impl.hpp +345 -0
- data/vendor/datasketches-cpp/fi/test/CMakeLists.txt +44 -0
- data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_custom_type_test.cpp +84 -0
- data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_test.cpp +360 -0
- data/vendor/datasketches-cpp/fi/test/items_sketch_string_from_java.sk +0 -0
- data/vendor/datasketches-cpp/fi/test/items_sketch_string_utf8_from_java.sk +0 -0
- data/vendor/datasketches-cpp/fi/test/longs_sketch_from_java.sk +0 -0
- data/vendor/datasketches-cpp/fi/test/reverse_purge_hash_map_test.cpp +47 -0
- data/vendor/datasketches-cpp/hll/CMakeLists.txt +92 -0
- data/vendor/datasketches-cpp/hll/include/AuxHashMap-internal.hpp +303 -0
- data/vendor/datasketches-cpp/hll/include/AuxHashMap.hpp +83 -0
- data/vendor/datasketches-cpp/hll/include/CompositeInterpolationXTable-internal.hpp +811 -0
- data/vendor/datasketches-cpp/hll/include/CompositeInterpolationXTable.hpp +40 -0
- data/vendor/datasketches-cpp/hll/include/CouponHashSet-internal.hpp +291 -0
- data/vendor/datasketches-cpp/hll/include/CouponHashSet.hpp +59 -0
- data/vendor/datasketches-cpp/hll/include/CouponList-internal.hpp +417 -0
- data/vendor/datasketches-cpp/hll/include/CouponList.hpp +91 -0
- data/vendor/datasketches-cpp/hll/include/CubicInterpolation-internal.hpp +233 -0
- data/vendor/datasketches-cpp/hll/include/CubicInterpolation.hpp +43 -0
- data/vendor/datasketches-cpp/hll/include/HarmonicNumbers-internal.hpp +90 -0
- data/vendor/datasketches-cpp/hll/include/HarmonicNumbers.hpp +48 -0
- data/vendor/datasketches-cpp/hll/include/Hll4Array-internal.hpp +335 -0
- data/vendor/datasketches-cpp/hll/include/Hll4Array.hpp +69 -0
- data/vendor/datasketches-cpp/hll/include/Hll6Array-internal.hpp +124 -0
- data/vendor/datasketches-cpp/hll/include/Hll6Array.hpp +55 -0
- data/vendor/datasketches-cpp/hll/include/Hll8Array-internal.hpp +158 -0
- data/vendor/datasketches-cpp/hll/include/Hll8Array.hpp +56 -0
- data/vendor/datasketches-cpp/hll/include/HllArray-internal.hpp +706 -0
- data/vendor/datasketches-cpp/hll/include/HllArray.hpp +136 -0
- data/vendor/datasketches-cpp/hll/include/HllSketch-internal.hpp +462 -0
- data/vendor/datasketches-cpp/hll/include/HllSketchImpl-internal.hpp +149 -0
- data/vendor/datasketches-cpp/hll/include/HllSketchImpl.hpp +85 -0
- data/vendor/datasketches-cpp/hll/include/HllSketchImplFactory.hpp +170 -0
- data/vendor/datasketches-cpp/hll/include/HllUnion-internal.hpp +287 -0
- data/vendor/datasketches-cpp/hll/include/HllUtil.hpp +239 -0
- data/vendor/datasketches-cpp/hll/include/RelativeErrorTables-internal.hpp +112 -0
- data/vendor/datasketches-cpp/hll/include/RelativeErrorTables.hpp +46 -0
- data/vendor/datasketches-cpp/hll/include/coupon_iterator-internal.hpp +56 -0
- data/vendor/datasketches-cpp/hll/include/coupon_iterator.hpp +43 -0
- data/vendor/datasketches-cpp/hll/include/hll.hpp +669 -0
- data/vendor/datasketches-cpp/hll/include/hll.private.hpp +32 -0
- data/vendor/datasketches-cpp/hll/test/AuxHashMapTest.cpp +79 -0
- data/vendor/datasketches-cpp/hll/test/CMakeLists.txt +51 -0
- data/vendor/datasketches-cpp/hll/test/CouponHashSetTest.cpp +130 -0
- data/vendor/datasketches-cpp/hll/test/CouponListTest.cpp +181 -0
- data/vendor/datasketches-cpp/hll/test/CrossCountingTest.cpp +93 -0
- data/vendor/datasketches-cpp/hll/test/HllArrayTest.cpp +191 -0
- data/vendor/datasketches-cpp/hll/test/HllSketchTest.cpp +389 -0
- data/vendor/datasketches-cpp/hll/test/HllUnionTest.cpp +313 -0
- data/vendor/datasketches-cpp/hll/test/IsomorphicTest.cpp +141 -0
- data/vendor/datasketches-cpp/hll/test/TablesTest.cpp +44 -0
- data/vendor/datasketches-cpp/hll/test/ToFromByteArrayTest.cpp +168 -0
- data/vendor/datasketches-cpp/hll/test/array6_from_java.sk +0 -0
- data/vendor/datasketches-cpp/hll/test/compact_array4_from_java.sk +0 -0
- data/vendor/datasketches-cpp/hll/test/compact_set_from_java.sk +0 -0
- data/vendor/datasketches-cpp/hll/test/list_from_java.sk +0 -0
- data/vendor/datasketches-cpp/hll/test/updatable_array4_from_java.sk +0 -0
- data/vendor/datasketches-cpp/hll/test/updatable_set_from_java.sk +0 -0
- data/vendor/datasketches-cpp/kll/CMakeLists.txt +58 -0
- data/vendor/datasketches-cpp/kll/include/kll_helper.hpp +150 -0
- data/vendor/datasketches-cpp/kll/include/kll_helper_impl.hpp +319 -0
- data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator.hpp +67 -0
- data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator_impl.hpp +169 -0
- data/vendor/datasketches-cpp/kll/include/kll_sketch.hpp +559 -0
- data/vendor/datasketches-cpp/kll/include/kll_sketch_impl.hpp +1131 -0
- data/vendor/datasketches-cpp/kll/test/CMakeLists.txt +44 -0
- data/vendor/datasketches-cpp/kll/test/kll_sketch_custom_type_test.cpp +154 -0
- data/vendor/datasketches-cpp/kll/test/kll_sketch_float_one_item_v1.sk +0 -0
- data/vendor/datasketches-cpp/kll/test/kll_sketch_from_java.sk +0 -0
- data/vendor/datasketches-cpp/kll/test/kll_sketch_test.cpp +685 -0
- data/vendor/datasketches-cpp/kll/test/kll_sketch_validation.cpp +229 -0
- data/vendor/datasketches-cpp/pyproject.toml +17 -0
- data/vendor/datasketches-cpp/python/CMakeLists.txt +61 -0
- data/vendor/datasketches-cpp/python/README.md +78 -0
- data/vendor/datasketches-cpp/python/jupyter/CPCSketch.ipynb +345 -0
- data/vendor/datasketches-cpp/python/jupyter/FrequentItemsSketch.ipynb +354 -0
- data/vendor/datasketches-cpp/python/jupyter/HLLSketch.ipynb +346 -0
- data/vendor/datasketches-cpp/python/jupyter/KLLSketch.ipynb +463 -0
- data/vendor/datasketches-cpp/python/jupyter/ThetaSketchNotebook.ipynb +396 -0
- data/vendor/datasketches-cpp/python/src/__init__.py +2 -0
- data/vendor/datasketches-cpp/python/src/cpc_wrapper.cpp +90 -0
- data/vendor/datasketches-cpp/python/src/datasketches.cpp +40 -0
- data/vendor/datasketches-cpp/python/src/fi_wrapper.cpp +123 -0
- data/vendor/datasketches-cpp/python/src/hll_wrapper.cpp +136 -0
- data/vendor/datasketches-cpp/python/src/kll_wrapper.cpp +209 -0
- data/vendor/datasketches-cpp/python/src/theta_wrapper.cpp +162 -0
- data/vendor/datasketches-cpp/python/src/vector_of_kll.cpp +488 -0
- data/vendor/datasketches-cpp/python/src/vo_wrapper.cpp +140 -0
- data/vendor/datasketches-cpp/python/tests/__init__.py +0 -0
- data/vendor/datasketches-cpp/python/tests/cpc_test.py +64 -0
- data/vendor/datasketches-cpp/python/tests/fi_test.py +110 -0
- data/vendor/datasketches-cpp/python/tests/hll_test.py +131 -0
- data/vendor/datasketches-cpp/python/tests/kll_test.py +119 -0
- data/vendor/datasketches-cpp/python/tests/theta_test.py +121 -0
- data/vendor/datasketches-cpp/python/tests/vector_of_kll_test.py +148 -0
- data/vendor/datasketches-cpp/python/tests/vo_test.py +101 -0
- data/vendor/datasketches-cpp/sampling/CMakeLists.txt +48 -0
- data/vendor/datasketches-cpp/sampling/include/var_opt_sketch.hpp +392 -0
- data/vendor/datasketches-cpp/sampling/include/var_opt_sketch_impl.hpp +1752 -0
- data/vendor/datasketches-cpp/sampling/include/var_opt_union.hpp +239 -0
- data/vendor/datasketches-cpp/sampling/include/var_opt_union_impl.hpp +645 -0
- data/vendor/datasketches-cpp/sampling/test/CMakeLists.txt +43 -0
- data/vendor/datasketches-cpp/sampling/test/binaries_from_java.txt +67 -0
- data/vendor/datasketches-cpp/sampling/test/var_opt_sketch_test.cpp +509 -0
- data/vendor/datasketches-cpp/sampling/test/var_opt_union_test.cpp +358 -0
- data/vendor/datasketches-cpp/sampling/test/varopt_sketch_long_sampling.sk +0 -0
- data/vendor/datasketches-cpp/sampling/test/varopt_sketch_string_exact.sk +0 -0
- data/vendor/datasketches-cpp/sampling/test/varopt_union_double_sampling.sk +0 -0
- data/vendor/datasketches-cpp/setup.py +94 -0
- data/vendor/datasketches-cpp/theta/CMakeLists.txt +57 -0
- data/vendor/datasketches-cpp/theta/include/theta_a_not_b.hpp +73 -0
- data/vendor/datasketches-cpp/theta/include/theta_a_not_b_impl.hpp +83 -0
- data/vendor/datasketches-cpp/theta/include/theta_intersection.hpp +88 -0
- data/vendor/datasketches-cpp/theta/include/theta_intersection_impl.hpp +130 -0
- data/vendor/datasketches-cpp/theta/include/theta_sketch.hpp +533 -0
- data/vendor/datasketches-cpp/theta/include/theta_sketch_impl.hpp +939 -0
- data/vendor/datasketches-cpp/theta/include/theta_union.hpp +122 -0
- data/vendor/datasketches-cpp/theta/include/theta_union_impl.hpp +109 -0
- data/vendor/datasketches-cpp/theta/test/CMakeLists.txt +45 -0
- data/vendor/datasketches-cpp/theta/test/theta_a_not_b_test.cpp +244 -0
- data/vendor/datasketches-cpp/theta/test/theta_compact_empty_from_java.sk +0 -0
- data/vendor/datasketches-cpp/theta/test/theta_compact_estimation_from_java.sk +0 -0
- data/vendor/datasketches-cpp/theta/test/theta_compact_single_item_from_java.sk +0 -0
- data/vendor/datasketches-cpp/theta/test/theta_intersection_test.cpp +218 -0
- data/vendor/datasketches-cpp/theta/test/theta_sketch_test.cpp +438 -0
- data/vendor/datasketches-cpp/theta/test/theta_union_test.cpp +97 -0
- data/vendor/datasketches-cpp/theta/test/theta_update_empty_from_java.sk +0 -0
- data/vendor/datasketches-cpp/theta/test/theta_update_estimation_from_java.sk +0 -0
- data/vendor/datasketches-cpp/tuple/CMakeLists.txt +104 -0
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_a_not_b.hpp +52 -0
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_a_not_b_impl.hpp +32 -0
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_intersection.hpp +52 -0
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_intersection_impl.hpp +31 -0
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_sketch.hpp +179 -0
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_sketch_impl.hpp +238 -0
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_union.hpp +81 -0
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_union_impl.hpp +43 -0
- data/vendor/datasketches-cpp/tuple/include/bounds_on_ratios_in_sampled_sets.hpp +135 -0
- data/vendor/datasketches-cpp/tuple/include/bounds_on_ratios_in_theta_sketched_sets.hpp +135 -0
- data/vendor/datasketches-cpp/tuple/include/jaccard_similarity.hpp +172 -0
- data/vendor/datasketches-cpp/tuple/include/theta_a_not_b_experimental.hpp +53 -0
- data/vendor/datasketches-cpp/tuple/include/theta_a_not_b_experimental_impl.hpp +33 -0
- data/vendor/datasketches-cpp/tuple/include/theta_comparators.hpp +48 -0
- data/vendor/datasketches-cpp/tuple/include/theta_constants.hpp +34 -0
- data/vendor/datasketches-cpp/tuple/include/theta_helpers.hpp +54 -0
- data/vendor/datasketches-cpp/tuple/include/theta_intersection_base.hpp +59 -0
- data/vendor/datasketches-cpp/tuple/include/theta_intersection_base_impl.hpp +121 -0
- data/vendor/datasketches-cpp/tuple/include/theta_intersection_experimental.hpp +78 -0
- data/vendor/datasketches-cpp/tuple/include/theta_intersection_experimental_impl.hpp +43 -0
- data/vendor/datasketches-cpp/tuple/include/theta_set_difference_base.hpp +54 -0
- data/vendor/datasketches-cpp/tuple/include/theta_set_difference_base_impl.hpp +80 -0
- data/vendor/datasketches-cpp/tuple/include/theta_sketch_experimental.hpp +393 -0
- data/vendor/datasketches-cpp/tuple/include/theta_sketch_experimental_impl.hpp +481 -0
- data/vendor/datasketches-cpp/tuple/include/theta_union_base.hpp +60 -0
- data/vendor/datasketches-cpp/tuple/include/theta_union_base_impl.hpp +84 -0
- data/vendor/datasketches-cpp/tuple/include/theta_union_experimental.hpp +88 -0
- data/vendor/datasketches-cpp/tuple/include/theta_union_experimental_impl.hpp +47 -0
- data/vendor/datasketches-cpp/tuple/include/theta_update_sketch_base.hpp +259 -0
- data/vendor/datasketches-cpp/tuple/include/theta_update_sketch_base_impl.hpp +389 -0
- data/vendor/datasketches-cpp/tuple/include/tuple_a_not_b.hpp +57 -0
- data/vendor/datasketches-cpp/tuple/include/tuple_a_not_b_impl.hpp +33 -0
- data/vendor/datasketches-cpp/tuple/include/tuple_intersection.hpp +104 -0
- data/vendor/datasketches-cpp/tuple/include/tuple_intersection_impl.hpp +43 -0
- data/vendor/datasketches-cpp/tuple/include/tuple_sketch.hpp +496 -0
- data/vendor/datasketches-cpp/tuple/include/tuple_sketch_impl.hpp +587 -0
- data/vendor/datasketches-cpp/tuple/include/tuple_union.hpp +109 -0
- data/vendor/datasketches-cpp/tuple/include/tuple_union_impl.hpp +47 -0
- data/vendor/datasketches-cpp/tuple/test/CMakeLists.txt +53 -0
- data/vendor/datasketches-cpp/tuple/test/aod_1_compact_empty_from_java.sk +1 -0
- data/vendor/datasketches-cpp/tuple/test/aod_1_compact_estimation_from_java.sk +0 -0
- data/vendor/datasketches-cpp/tuple/test/aod_1_compact_non_empty_no_entries_from_java.sk +0 -0
- data/vendor/datasketches-cpp/tuple/test/aod_2_compact_exact_from_java.sk +0 -0
- data/vendor/datasketches-cpp/tuple/test/aod_3_compact_empty_from_java.sk +1 -0
- data/vendor/datasketches-cpp/tuple/test/array_of_doubles_sketch_test.cpp +298 -0
- data/vendor/datasketches-cpp/tuple/test/theta_a_not_b_experimental_test.cpp +250 -0
- data/vendor/datasketches-cpp/tuple/test/theta_compact_empty_from_java.sk +0 -0
- data/vendor/datasketches-cpp/tuple/test/theta_compact_estimation_from_java.sk +0 -0
- data/vendor/datasketches-cpp/tuple/test/theta_compact_single_item_from_java.sk +0 -0
- data/vendor/datasketches-cpp/tuple/test/theta_intersection_experimental_test.cpp +224 -0
- data/vendor/datasketches-cpp/tuple/test/theta_jaccard_similarity_test.cpp +144 -0
- data/vendor/datasketches-cpp/tuple/test/theta_sketch_experimental_test.cpp +247 -0
- data/vendor/datasketches-cpp/tuple/test/theta_union_experimental_test.cpp +44 -0
- data/vendor/datasketches-cpp/tuple/test/tuple_a_not_b_test.cpp +289 -0
- data/vendor/datasketches-cpp/tuple/test/tuple_intersection_test.cpp +235 -0
- data/vendor/datasketches-cpp/tuple/test/tuple_jaccard_similarity_test.cpp +98 -0
- data/vendor/datasketches-cpp/tuple/test/tuple_sketch_allocation_test.cpp +102 -0
- data/vendor/datasketches-cpp/tuple/test/tuple_sketch_test.cpp +249 -0
- data/vendor/datasketches-cpp/tuple/test/tuple_union_test.cpp +187 -0
- metadata +302 -0
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
global-include CMakeLists.txt
|
|
2
|
+
global-include *.cpp
|
|
3
|
+
global-include *.c
|
|
4
|
+
global-include *.hpp
|
|
5
|
+
global-include *.h
|
|
6
|
+
global-include *.bin
|
|
7
|
+
|
|
8
|
+
global-exclude .git*
|
|
9
|
+
|
|
10
|
+
recursive-include python/pybind11 *
|
|
11
|
+
|
|
12
|
+
graft common
|
|
13
|
+
graft cpc
|
|
14
|
+
graft fi
|
|
15
|
+
graft hll
|
|
16
|
+
graft kll
|
|
17
|
+
graft theta
|
|
18
|
+
graft sampling
|
|
19
|
+
graft python
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
Apache DataSketches-cpp
|
|
2
|
+
Copyright 2020 The Apache Software Foundation
|
|
3
|
+
|
|
4
|
+
Copyright 2015-2018 Yahoo
|
|
5
|
+
Copyright 2019 Verizon Media
|
|
6
|
+
|
|
7
|
+
This product includes software developed at
|
|
8
|
+
The Apache Software Foundation (http://www.apache.org/).
|
|
9
|
+
|
|
10
|
+
Prior to moving to ASF, the software for this project was developed at
|
|
11
|
+
Yahoo (now Verizon Media) (https://developer.yahoo.com).
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
# DataSketches Core C++ Library Component
|
|
2
|
+
This is the core C++ component of the DataSketches library. It contains all of the key sketching algorithms that are in the Java component and can be accessed directly from user applications.
|
|
3
|
+
|
|
4
|
+
This component is also a dependency of other components of the library that create adaptors for target systems, such as PostgreSQL.
|
|
5
|
+
|
|
6
|
+
Note that we have a parallel core component for Java implementations of the same sketch algorithms,
|
|
7
|
+
[datasketches-java](https://github.com/apache/datasketches-java).
|
|
8
|
+
|
|
9
|
+
Please visit the main [DataSketches website](https://datasketches.apache.org) for more information.
|
|
10
|
+
|
|
11
|
+
If you are interested in making contributions to this site please see our [Community](https://datasketches.apache.org/docs/Community/) page for how to contact us.
|
|
12
|
+
|
|
13
|
+
---
|
|
14
|
+
|
|
15
|
+
This code requires C++11. It was tested with GCC 4.8.5 (standard in RedHat at the time of this writing), GCC 8.2.0 and Apple LLVM version 10.0.1 (clang-1001.0.46.4)
|
|
16
|
+
|
|
17
|
+
This includes Python bindings. For the Python interface, see the README notes in [the python subdirectory](https://github.com/apache/datasketches-cpp/tree/master/python).
|
|
18
|
+
|
|
19
|
+
This library is header-only. The build process provided is only for building unit tests and the python library.
|
|
20
|
+
|
|
21
|
+
Building the unit tests requires cmake 3.12.0 or higher.
|
|
22
|
+
|
|
23
|
+
Installing the latest cmake on OSX: brew install cmake
|
|
24
|
+
|
|
25
|
+
Building and running unit tests using cmake for OSX and Linux:
|
|
26
|
+
|
|
27
|
+
```
|
|
28
|
+
$ cd build
|
|
29
|
+
$ cmake ..
|
|
30
|
+
$ make
|
|
31
|
+
$ make test
|
|
32
|
+
```
|
|
33
|
+
|
|
34
|
+
Building and running unit tests using cmake for Windows from the command line:
|
|
35
|
+
|
|
36
|
+
```
|
|
37
|
+
$ cd build
|
|
38
|
+
$ cmake ..
|
|
39
|
+
$ cd ..
|
|
40
|
+
$ cmake --build build --config Release
|
|
41
|
+
$ cmake --build build --config Release --target RUN_TESTS
|
|
42
|
+
```
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
# Licensed to the Apache Software Foundation (ASF) under one
|
|
2
|
+
# or more contributor license agreements. See the NOTICE file
|
|
3
|
+
# distributed with this work for additional information
|
|
4
|
+
# regarding copyright ownership. The ASF licenses this file
|
|
5
|
+
# to you under the Apache License, Version 2.0 (the
|
|
6
|
+
# "License"); you may not use this file except in compliance
|
|
7
|
+
# with the License. You may obtain a copy of the License at
|
|
8
|
+
#
|
|
9
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
10
|
+
#
|
|
11
|
+
# Unless required by applicable law or agreed to in writing,
|
|
12
|
+
# software distributed under the License is distributed on an
|
|
13
|
+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
14
|
+
# KIND, either express or implied. See the License for the
|
|
15
|
+
# specific language governing permissions and limitations
|
|
16
|
+
# under the License.
|
|
17
|
+
|
|
18
|
+
add_library(common INTERFACE)
|
|
19
|
+
|
|
20
|
+
if (BUILD_TESTS)
|
|
21
|
+
add_subdirectory(test)
|
|
22
|
+
endif()
|
|
23
|
+
|
|
24
|
+
target_include_directories(common
|
|
25
|
+
INTERFACE
|
|
26
|
+
$<INSTALL_INTERFACE:$<INSTALL_PREFIX>/include>
|
|
27
|
+
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>
|
|
28
|
+
)
|
|
29
|
+
|
|
30
|
+
target_compile_features(common INTERFACE cxx_std_11)
|
|
31
|
+
|
|
32
|
+
target_sources(common
|
|
33
|
+
INTERFACE
|
|
34
|
+
${CMAKE_CURRENT_SOURCE_DIR}/include/common_defs.hpp
|
|
35
|
+
${CMAKE_CURRENT_SOURCE_DIR}/include/memory_operations.hpp
|
|
36
|
+
${CMAKE_CURRENT_SOURCE_DIR}/include/MurmurHash3.h
|
|
37
|
+
${CMAKE_CURRENT_SOURCE_DIR}/include/serde.hpp
|
|
38
|
+
${CMAKE_CURRENT_SOURCE_DIR}/include/count_zeros.hpp
|
|
39
|
+
${CMAKE_CURRENT_SOURCE_DIR}/include/inv_pow2_table.hpp
|
|
40
|
+
${CMAKE_CURRENT_SOURCE_DIR}/include/binomial_bounds.hpp
|
|
41
|
+
${CMAKE_CURRENT_SOURCE_DIR}/include/conditional_back_inserter.hpp
|
|
42
|
+
${CMAKE_CURRENT_SOURCE_DIR}/include/conditional_forward.hpp
|
|
43
|
+
${CMAKE_CURRENT_SOURCE_DIR}/include/ceiling_power_of_2.hpp
|
|
44
|
+
)
|
|
45
|
+
|
|
@@ -0,0 +1,173 @@
|
|
|
1
|
+
// Minimally modified from Austin Applebee's code:
|
|
2
|
+
// * Removed MurmurHash3_x86_32 and MurmurHash3_x86_128
|
|
3
|
+
// * Changed input seed in MurmurHash3_x64_128 to uint64_t
|
|
4
|
+
// * Define and use HashState reference to return result
|
|
5
|
+
// * Made entire hash function defined inline
|
|
6
|
+
//-----------------------------------------------------------------------------
|
|
7
|
+
// MurmurHash3 was written by Austin Appleby, and is placed in the public
|
|
8
|
+
// domain. The author hereby disclaims copyright to this source code.
|
|
9
|
+
|
|
10
|
+
// Note - The x86 and x64 versions do _not_ produce the same results, as the
|
|
11
|
+
// algorithms are optimized for their respective platforms. You can still
|
|
12
|
+
// compile and run any of them on any platform, but your performance with the
|
|
13
|
+
// non-native version will be less than optimal.
|
|
14
|
+
|
|
15
|
+
#ifndef _MURMURHASH3_H_
|
|
16
|
+
#define _MURMURHASH3_H_
|
|
17
|
+
|
|
18
|
+
//-----------------------------------------------------------------------------
|
|
19
|
+
// Platform-specific functions and macros
|
|
20
|
+
|
|
21
|
+
// Microsoft Visual Studio
|
|
22
|
+
|
|
23
|
+
#if defined(_MSC_VER)
|
|
24
|
+
|
|
25
|
+
typedef unsigned char uint8_t;
|
|
26
|
+
typedef unsigned int uint32_t;
|
|
27
|
+
typedef unsigned __int64 uint64_t;
|
|
28
|
+
|
|
29
|
+
#define FORCE_INLINE __forceinline
|
|
30
|
+
|
|
31
|
+
#include <stdlib.h>
|
|
32
|
+
|
|
33
|
+
#define ROTL32(x,y) _rotl(x,y)
|
|
34
|
+
#define ROTL64(x,y) _rotl64(x,y)
|
|
35
|
+
|
|
36
|
+
#define BIG_CONSTANT(x) (x)
|
|
37
|
+
|
|
38
|
+
// Other compilers
|
|
39
|
+
|
|
40
|
+
#else // defined(_MSC_VER)
|
|
41
|
+
|
|
42
|
+
#include <stdint.h>
|
|
43
|
+
|
|
44
|
+
#define FORCE_INLINE inline __attribute__((always_inline))
|
|
45
|
+
|
|
46
|
+
inline uint32_t rotl32 ( uint32_t x, int8_t r )
|
|
47
|
+
{
|
|
48
|
+
return (x << r) | (x >> (32 - r));
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
inline uint64_t rotl64 ( uint64_t x, int8_t r )
|
|
52
|
+
{
|
|
53
|
+
return (x << r) | (x >> (64 - r));
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
#define ROTL32(x,y) rotl32(x,y)
|
|
57
|
+
#define ROTL64(x,y) rotl64(x,y)
|
|
58
|
+
|
|
59
|
+
#define BIG_CONSTANT(x) (x##LLU)
|
|
60
|
+
|
|
61
|
+
#endif // !defined(_MSC_VER)
|
|
62
|
+
|
|
63
|
+
//-----------------------------------------------------------------------------
|
|
64
|
+
|
|
65
|
+
//-----------------------------------------------------------------------------
|
|
66
|
+
// Return type - Using C++ reference for return type which should allow better
|
|
67
|
+
// compiler optimization than a void* pointer
|
|
68
|
+
typedef struct {
|
|
69
|
+
uint64_t h1;
|
|
70
|
+
uint64_t h2;
|
|
71
|
+
} HashState;
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
//-----------------------------------------------------------------------------
|
|
75
|
+
// Block read - if your platform needs to do endian-swapping or can only
|
|
76
|
+
// handle aligned reads, do the conversion here
|
|
77
|
+
|
|
78
|
+
FORCE_INLINE uint64_t getblock64 ( const uint64_t * p, int i )
|
|
79
|
+
{
|
|
80
|
+
return p[i];
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
//-----------------------------------------------------------------------------
|
|
84
|
+
// Finalization mix - force all bits of a hash block to avalanche
|
|
85
|
+
|
|
86
|
+
FORCE_INLINE uint64_t fmix64 ( uint64_t k )
|
|
87
|
+
{
|
|
88
|
+
k ^= k >> 33;
|
|
89
|
+
k *= BIG_CONSTANT(0xff51afd7ed558ccd);
|
|
90
|
+
k ^= k >> 33;
|
|
91
|
+
k *= BIG_CONSTANT(0xc4ceb9fe1a85ec53);
|
|
92
|
+
k ^= k >> 33;
|
|
93
|
+
|
|
94
|
+
return k;
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
FORCE_INLINE void MurmurHash3_x64_128(const void* key, int lenBytes, uint64_t seed, HashState& out) {
|
|
98
|
+
static const uint64_t c1 = BIG_CONSTANT(0x87c37b91114253d5);
|
|
99
|
+
static const uint64_t c2 = BIG_CONSTANT(0x4cf5ad432745937f);
|
|
100
|
+
|
|
101
|
+
const uint8_t* data = (const uint8_t*)key;
|
|
102
|
+
|
|
103
|
+
out.h1 = seed;
|
|
104
|
+
out.h2 = seed;
|
|
105
|
+
|
|
106
|
+
// Number of full 128-bit blocks of 16 bytes.
|
|
107
|
+
// Possible exclusion of a remainder of up to 15 bytes.
|
|
108
|
+
const int nblocks = lenBytes >> 4; // bytes / 16
|
|
109
|
+
|
|
110
|
+
// Process the 128-bit blocks (the body) into the hash
|
|
111
|
+
const uint64_t* blocks = (const uint64_t*)(data);
|
|
112
|
+
for (int i = 0; i < nblocks; ++i) { // 16 bytes per block
|
|
113
|
+
uint64_t k1 = getblock64(blocks,i*2+0);
|
|
114
|
+
uint64_t k2 = getblock64(blocks,i*2+1);
|
|
115
|
+
|
|
116
|
+
k1 *= c1; k1 = ROTL64(k1,31); k1 *= c2; out.h1 ^= k1;
|
|
117
|
+
out.h1 = ROTL64(out.h1,27);
|
|
118
|
+
out.h1 += out.h2;
|
|
119
|
+
out.h1 = out.h1*5+0x52dce729;
|
|
120
|
+
|
|
121
|
+
k2 *= c2; k2 = ROTL64(k2,33); k2 *= c1; out.h2 ^= k2;
|
|
122
|
+
out.h2 = ROTL64(out.h2,31);
|
|
123
|
+
out.h2 += out.h1;
|
|
124
|
+
out.h2 = out.h2*5+0x38495ab5;
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
// tail
|
|
128
|
+
const uint8_t * tail = (const uint8_t*)(data + (nblocks << 4));
|
|
129
|
+
|
|
130
|
+
uint64_t k1 = 0;
|
|
131
|
+
uint64_t k2 = 0;
|
|
132
|
+
|
|
133
|
+
switch(lenBytes & 15)
|
|
134
|
+
{
|
|
135
|
+
case 15: k2 ^= ((uint64_t)tail[14]) << 48; // falls through
|
|
136
|
+
case 14: k2 ^= ((uint64_t)tail[13]) << 40; // falls through
|
|
137
|
+
case 13: k2 ^= ((uint64_t)tail[12]) << 32; // falls through
|
|
138
|
+
case 12: k2 ^= ((uint64_t)tail[11]) << 24; // falls through
|
|
139
|
+
case 11: k2 ^= ((uint64_t)tail[10]) << 16; // falls through
|
|
140
|
+
case 10: k2 ^= ((uint64_t)tail[ 9]) << 8; // falls through
|
|
141
|
+
case 9: k2 ^= ((uint64_t)tail[ 8]) << 0;
|
|
142
|
+
k2 *= c2; k2 = ROTL64(k2,33); k2 *= c1; out.h2 ^= k2;
|
|
143
|
+
// falls through
|
|
144
|
+
case 8: k1 ^= ((uint64_t)tail[ 7]) << 56; // falls through
|
|
145
|
+
case 7: k1 ^= ((uint64_t)tail[ 6]) << 48; // falls through
|
|
146
|
+
case 6: k1 ^= ((uint64_t)tail[ 5]) << 40; // falls through
|
|
147
|
+
case 5: k1 ^= ((uint64_t)tail[ 4]) << 32; // falls through
|
|
148
|
+
case 4: k1 ^= ((uint64_t)tail[ 3]) << 24; // falls through
|
|
149
|
+
case 3: k1 ^= ((uint64_t)tail[ 2]) << 16; // falls through
|
|
150
|
+
case 2: k1 ^= ((uint64_t)tail[ 1]) << 8; // falls through
|
|
151
|
+
case 1: k1 ^= ((uint64_t)tail[ 0]) << 0;
|
|
152
|
+
k1 *= c1; k1 = ROTL64(k1,31); k1 *= c2; out.h1 ^= k1;
|
|
153
|
+
};
|
|
154
|
+
|
|
155
|
+
//----------
|
|
156
|
+
// finalization
|
|
157
|
+
|
|
158
|
+
out.h1 ^= lenBytes;
|
|
159
|
+
out.h2 ^= lenBytes;
|
|
160
|
+
|
|
161
|
+
out.h1 += out.h2;
|
|
162
|
+
out.h2 += out.h1;
|
|
163
|
+
|
|
164
|
+
out.h1 = fmix64(out.h1);
|
|
165
|
+
out.h2 = fmix64(out.h2);
|
|
166
|
+
|
|
167
|
+
out.h1 += out.h2;
|
|
168
|
+
out.h2 += out.h1;
|
|
169
|
+
}
|
|
170
|
+
|
|
171
|
+
//-----------------------------------------------------------------------------
|
|
172
|
+
|
|
173
|
+
#endif // _MURMURHASH3_H_
|
|
@@ -0,0 +1,458 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* Licensed to the Apache Software Foundation (ASF) under one
|
|
3
|
+
* or more contributor license agreements. See the NOTICE file
|
|
4
|
+
* distributed with this work for additional information
|
|
5
|
+
* regarding copyright ownership. The ASF licenses this file
|
|
6
|
+
* to you under the Apache License, Version 2.0 (the
|
|
7
|
+
* "License"); you may not use this file except in compliance
|
|
8
|
+
* with the License. You may obtain a copy of the License at
|
|
9
|
+
*
|
|
10
|
+
* http://www.apache.org/licenses/LICENSE-2.0
|
|
11
|
+
*
|
|
12
|
+
* Unless required by applicable law or agreed to in writing,
|
|
13
|
+
* software distributed under the License is distributed on an
|
|
14
|
+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
15
|
+
* KIND, either express or implied. See the License for the
|
|
16
|
+
* specific language governing permissions and limitations
|
|
17
|
+
* under the License.
|
|
18
|
+
*/
|
|
19
|
+
|
|
20
|
+
#ifndef BINOMIAL_BOUNDS_HPP_
|
|
21
|
+
#define BINOMIAL_BOUNDS_HPP_
|
|
22
|
+
|
|
23
|
+
#include <algorithm>
|
|
24
|
+
#include <cmath>
|
|
25
|
+
|
|
26
|
+
/*
|
|
27
|
+
* This class enables the estimation of error bounds given a sample set size, the sampling
|
|
28
|
+
* probability theta, the number of standard deviations and a simple noDataSeen flag. This can
|
|
29
|
+
* be used to estimate error bounds for fixed threshold sampling as well as the error bounds
|
|
30
|
+
* calculations for sketches.
|
|
31
|
+
*
|
|
32
|
+
* author Alexander Saydakov
|
|
33
|
+
* author Lee Rhodes
|
|
34
|
+
* author Kevin Lang
|
|
35
|
+
*/
|
|
36
|
+
|
|
37
|
+
namespace datasketches {
|
|
38
|
+
|
|
39
|
+
static constexpr double delta_of_num_std_devs[] = {
|
|
40
|
+
0.5000000000000000000, // not actually using this value
|
|
41
|
+
0.1586553191586026479,
|
|
42
|
+
0.0227502618904135701,
|
|
43
|
+
0.0013498126861731796
|
|
44
|
+
};
|
|
45
|
+
|
|
46
|
+
static constexpr double lb_equiv_table[] = {
|
|
47
|
+
1.0, 2.0, 3.0, // fake values for k = 0
|
|
48
|
+
0.78733703534118149, 3.14426768537558132, 13.56789685109913535, // k = 1
|
|
49
|
+
0.94091379266077979, 2.64699271711145911, 6.29302733018320737, // k = 2
|
|
50
|
+
0.96869128474958188, 2.46531676590527127, 4.97375283467403051, // k = 3
|
|
51
|
+
0.97933572521046131, 2.37418810664669877, 4.44899975481712318, // k = 4
|
|
52
|
+
0.98479165917274258, 2.31863116255024693, 4.16712379778553554, // k = 5
|
|
53
|
+
0.98806033915698777, 2.28075536565225434, 3.99010556144099837, // k = 6
|
|
54
|
+
0.99021896790580399, 2.25302005857281529, 3.86784477136922078, // k = 7
|
|
55
|
+
0.99174267079089873, 2.23168103978522936, 3.77784896945266269, // k = 8
|
|
56
|
+
0.99287147837287648, 2.21465899260871879, 3.70851932988722410, // k = 9
|
|
57
|
+
0.99373900046805375, 2.20070155496262032, 3.65326029076638292, // k = 10
|
|
58
|
+
0.99442519013851438, 2.18900651202670815, 3.60803817612955413, // k = 11
|
|
59
|
+
0.99498066823221620, 2.17903457780744247, 3.57024330407946877, // k = 12
|
|
60
|
+
0.99543899410224412, 2.17040883161922693, 3.53810982030634591, // k = 13
|
|
61
|
+
0.99582322541263579, 2.16285726913676513, 3.51039837124298515, // k = 14
|
|
62
|
+
0.99614973311747690, 2.15617827879603396, 3.48621230377099778, // k = 15
|
|
63
|
+
0.99643042892560629, 2.15021897666090922, 3.46488605693562590, // k = 16
|
|
64
|
+
0.99667418783778317, 2.14486114872480016, 3.44591466064832730, // k = 17
|
|
65
|
+
0.99688774875812669, 2.14001181420209718, 3.42890765690452781, // k = 18
|
|
66
|
+
0.99707632299691795, 2.13559675336844634, 3.41355809420343803, // k = 19
|
|
67
|
+
0.99724399084971083, 2.13155592217421486, 3.39962113251016262, // k = 20
|
|
68
|
+
0.99739400151915447, 2.12784018863251845, 3.38689892877548004, // k = 21
|
|
69
|
+
0.99752896842633731, 2.12440890875851096, 3.37522975271599535, // k = 22
|
|
70
|
+
0.99765101725122918, 2.12122815311133195, 3.36448003577621080, // k = 23
|
|
71
|
+
0.99776189496810730, 2.11826934724291505, 3.35453840911279144, // k = 24
|
|
72
|
+
0.99786304821586214, 2.11550823850916458, 3.34531123809287578, // k = 25
|
|
73
|
+
0.99795568665180667, 2.11292409529477254, 3.33671916527694634, // k = 26
|
|
74
|
+
0.99804083063483517, 2.11049908609763293, 3.32869446834217797, // k = 27
|
|
75
|
+
0.99811933910984862, 2.10821776918189130, 3.32117898316676019, // k = 28
|
|
76
|
+
0.99819195457286014, 2.10606671027090897, 3.31412243534683171, // k = 29
|
|
77
|
+
0.99825930555178388, 2.10403415237001923, 3.30748113008135647, // k = 30
|
|
78
|
+
0.99832193858154028, 2.10210975877822648, 3.30121691946897045, // k = 31
|
|
79
|
+
0.99838032666573895, 2.10028440670842542, 3.29529629751144171, // k = 32
|
|
80
|
+
0.99843488390555990, 2.09855000145353188, 3.28968974413223236, // k = 33
|
|
81
|
+
0.99848596721417948, 2.09689934193824001, 3.28437111460505093, // k = 34
|
|
82
|
+
0.99853390005924325, 2.09532599155502908, 3.27931717312372939, // k = 35
|
|
83
|
+
0.99857895741078551, 2.09382418262592296, 3.27450718840060517, // k = 36
|
|
84
|
+
0.99862138880970974, 2.09238872751677718, 3.26992261182860489, // k = 37
|
|
85
|
+
0.99866141580770318, 2.09101494715108061, 3.26554677962434425, // k = 38
|
|
86
|
+
0.99869923565267982, 2.08969860402822860, 3.26136468165239535, // k = 39
|
|
87
|
+
0.99873502010169091, 2.08843585627218431, 3.25736275677081721, // k = 40
|
|
88
|
+
0.99876893292508839, 2.08722321436752623, 3.25352872241415980, // k = 41
|
|
89
|
+
0.99880111078502409, 2.08605749165553789, 3.24985141664350863, // k = 42
|
|
90
|
+
0.99883168573342118, 2.08493577529222307, 3.24632068399498053, // k = 43
|
|
91
|
+
0.99886077231613513, 2.08385540129560809, 3.24292724848112357, // k = 44
|
|
92
|
+
0.99888847451828155, 2.08281392374021834, 3.23966263299664092, // k = 45
|
|
93
|
+
0.99891488795844907, 2.08180908991394631, 3.23651906111521726, // k = 46
|
|
94
|
+
0.99894010085196783, 2.08083882998420222, 3.23348939240611344, // k = 47
|
|
95
|
+
0.99896419358239541, 2.07990122528650545, 3.23056705515594444, // k = 48
|
|
96
|
+
0.99898723510594323, 2.07899450946285924, 3.22774598963252402, // k = 49
|
|
97
|
+
0.99900929266780736, 2.07811704477046533, 3.22502059972006805, // k = 50
|
|
98
|
+
0.99903043086155208, 2.07726730587160091, 3.22238570890294795, // k = 51
|
|
99
|
+
0.99905070073845081, 2.07644388314946582, 3.21983651940365689, // k = 52
|
|
100
|
+
0.99907015770423868, 2.07564546080757850, 3.21736857351049821, // k = 53
|
|
101
|
+
0.99908884779227947, 2.07487081196367740, 3.21497773796417619, // k = 54
|
|
102
|
+
0.99910681586905525, 2.07411879634256024, 3.21266015316183484, // k = 55
|
|
103
|
+
0.99912410177549305, 2.07338834403498140, 3.21041222805715165, // k = 56
|
|
104
|
+
0.99914074347179849, 2.07267845454973099, 3.20823061166797174, // k = 57
|
|
105
|
+
0.99915677607464204, 2.07198819052374006, 3.20611216970604573, // k = 58
|
|
106
|
+
0.99917223149395795, 2.07131667846186929, 3.20405396962596001, // k = 59
|
|
107
|
+
0.99918714153457699, 2.07066309019154460, 3.20205326110445299, // k = 60
|
|
108
|
+
0.99920153247185794, 2.07002665203046377, 3.20010746990493544, // k = 61
|
|
109
|
+
0.99921543193525508, 2.06940663431663552, 3.19821417453343315, // k = 62
|
|
110
|
+
0.99922886570365677, 2.06880235245998279, 3.19637109973109546, // k = 63
|
|
111
|
+
0.99924185357357942, 2.06821315729285971, 3.19457610621114441, // k = 64
|
|
112
|
+
0.99925441845175555, 2.06763843812092318, 3.19282717869864996, // k = 65
|
|
113
|
+
0.99926658263325407, 2.06707761824370095, 3.19112241228646099, // k = 66
|
|
114
|
+
0.99927836173816331, 2.06653015295219689, 3.18946001739936946, // k = 67
|
|
115
|
+
0.99928977431994781, 2.06599552505539918, 3.18783829446098821, // k = 68
|
|
116
|
+
0.99930083753795884, 2.06547324585920933, 3.18625564538041317, // k = 69
|
|
117
|
+
0.99931156864562354, 2.06496285191821016, 3.18471055124089730, // k = 70
|
|
118
|
+
0.99932197985521043, 2.06446390392778767, 3.18320157510865442, // k = 71
|
|
119
|
+
0.99933208559809827, 2.06397598606787369, 3.18172735837393361, // k = 72
|
|
120
|
+
0.99934190032416836, 2.06349869971447220, 3.18028661102792398, // k = 73
|
|
121
|
+
0.99935143390791836, 2.06303166975550312, 3.17887810481605015, // k = 74
|
|
122
|
+
0.99936070171270330, 2.06257453607466346, 3.17750067581857820, // k = 75
|
|
123
|
+
0.99936971103502970, 2.06212696042919674, 3.17615321728274580, // k = 76
|
|
124
|
+
0.99937847392385493, 2.06168861430600714, 3.17483467831510779, // k = 77
|
|
125
|
+
0.99938700168914352, 2.06125918927764928, 3.17354405480557489, // k = 78
|
|
126
|
+
0.99939530099953799, 2.06083838987589729, 3.17228039269048168, // k = 79
|
|
127
|
+
0.99940338278830154, 2.06042593411496000, 3.17104278166036124, // k = 80
|
|
128
|
+
0.99941125463777780, 2.06002155276328835, 3.16983035274597569, // k = 81
|
|
129
|
+
0.99941892470027938, 2.05962498741951094, 3.16864227952240185, // k = 82
|
|
130
|
+
0.99942640059737187, 2.05923599161263837, 3.16747776846497686, // k = 83
|
|
131
|
+
0.99943368842187397, 2.05885433061945378, 3.16633606416374391, // k = 84
|
|
132
|
+
0.99944079790603269, 2.05847977868873500, 3.16521644518826406, // k = 85
|
|
133
|
+
0.99944773295734990, 2.05811212058944193, 3.16411821883858124, // k = 86
|
|
134
|
+
0.99945450059186669, 2.05775114781260982, 3.16304072400711789, // k = 87
|
|
135
|
+
0.99946110646314423, 2.05739666442039493, 3.16198332650733960, // k = 88
|
|
136
|
+
0.99946755770463369, 2.05704847678819647, 3.16094541781455973, // k = 89
|
|
137
|
+
0.99947385746861528, 2.05670640500335367, 3.15992641851471490, // k = 90
|
|
138
|
+
0.99948001256305474, 2.05637027420314666, 3.15892576988736096, // k = 91
|
|
139
|
+
0.99948602689656241, 2.05603991286400856, 3.15794293484717059, // k = 92
|
|
140
|
+
0.99949190674294641, 2.05571516158917689, 3.15697740043813724, // k = 93
|
|
141
|
+
0.99949765436329585, 2.05539586490317561, 3.15602867309343083, // k = 94
|
|
142
|
+
0.99950327557880314, 2.05508187237845164, 3.15509627710042651, // k = 95
|
|
143
|
+
0.99950877461972709, 2.05477304104951486, 3.15417975753007340, // k = 96
|
|
144
|
+
0.99951415481862682, 2.05446923022574879, 3.15327867462917766, // k = 97
|
|
145
|
+
0.99951942042375208, 2.05417030908833453, 3.15239260700215596, // k = 98
|
|
146
|
+
0.99952457390890004, 2.05387614661762541, 3.15152114915238712, // k = 99
|
|
147
|
+
0.99952962005008317, 2.05358662050909402, 3.15066390921020911, // k = 100
|
|
148
|
+
0.99953456216121594, 2.05330161104427589, 3.14982051097524618, // k = 101
|
|
149
|
+
0.99953940176368405, 2.05302100378725072, 3.14899059183684926, // k = 102
|
|
150
|
+
0.99954414373920031, 2.05274468493067275, 3.14817379948561893, // k = 103
|
|
151
|
+
0.99954879047621148, 2.05247255013657082, 3.14736979964868624, // k = 104
|
|
152
|
+
0.99955334485656522, 2.05220449388099269, 3.14657826610371671, // k = 105
|
|
153
|
+
0.99955780993869325, 2.05194041831310869, 3.14579888316276879, // k = 106
|
|
154
|
+
0.99956218652590678, 2.05168022402710903, 3.14503134811607765, // k = 107
|
|
155
|
+
0.99956647932785359, 2.05142381889103831, 3.14427536967733090, // k = 108
|
|
156
|
+
0.99957069025060719, 2.05117111251445294, 3.14353066260227365, // k = 109
|
|
157
|
+
0.99957482032178291, 2.05092201793428330, 3.14279695558593630, // k = 110
|
|
158
|
+
0.99957887261450651, 2.05067645094720774, 3.14207398336887422, // k = 111
|
|
159
|
+
0.99958284988383639, 2.05043432833224415, 3.14136149076028914, // k = 112
|
|
160
|
+
0.99958675435604505, 2.05019557189746138, 3.14065923143530767, // k = 113
|
|
161
|
+
0.99959058650074439, 2.04996010556124020, 3.13996696426707445, // k = 114
|
|
162
|
+
0.99959434898201494, 2.04972785368377686, 3.13928445867830419, // k = 115
|
|
163
|
+
0.99959804437042976, 2.04949874512311681, 3.13861149103462367, // k = 116
|
|
164
|
+
0.99960167394553423, 2.04927271043337100, 3.13794784369528656, // k = 117
|
|
165
|
+
0.99960523957651048, 2.04904968140490951, 3.13729330661277572, // k = 118
|
|
166
|
+
0.99960874253329735, 2.04882959397491504, 3.13664767767019725, // k = 119
|
|
167
|
+
0.99961218434327748, 2.04861238220240693, 3.13601075688413289 // k = 120
|
|
168
|
+
};
|
|
169
|
+
|
|
170
|
+
static constexpr double ub_equiv_table[] = {
|
|
171
|
+
1.0, 2.0, 3.0, // fake values for k = 0
|
|
172
|
+
0.99067760836669549, 1.75460517119302040, 2.48055626001627161, // k = 1
|
|
173
|
+
0.99270518097577565, 1.78855957509907171, 2.53863835259832626, // k = 2
|
|
174
|
+
0.99402032633599902, 1.81047286499563143, 2.57811676180597260, // k = 3
|
|
175
|
+
0.99492607629539975, 1.82625928017762362, 2.60759550546498531, // k = 4
|
|
176
|
+
0.99558653966013821, 1.83839160339161367, 2.63086812358551470, // k = 5
|
|
177
|
+
0.99608981951632813, 1.84812399034444752, 2.64993712523727254, // k = 6
|
|
178
|
+
0.99648648035983456, 1.85617372053235385, 2.66598485907860550, // k = 7
|
|
179
|
+
0.99680750790483330, 1.86298655802610824, 2.67976541374471822, // k = 8
|
|
180
|
+
0.99707292880049181, 1.86885682585270274, 2.69178781407745760, // k = 9
|
|
181
|
+
0.99729614928489241, 1.87398826101983218, 2.70241106542158604, // k = 10
|
|
182
|
+
0.99748667952445658, 1.87852708449801753, 2.71189717290596377, // k = 11
|
|
183
|
+
0.99765127712748836, 1.88258159501103250, 2.72044290303773550, // k = 12
|
|
184
|
+
0.99779498340305395, 1.88623391878036273, 2.72819957382063194, // k = 13
|
|
185
|
+
0.99792160418357412, 1.88954778748873764, 2.73528576807902368, // k = 14
|
|
186
|
+
0.99803398604944960, 1.89257337682371940, 2.74179612106766513, // k = 15
|
|
187
|
+
0.99813449883217231, 1.89535099316557876, 2.74780718300419835, // k = 16
|
|
188
|
+
0.99822494122659577, 1.89791339232732525, 2.75338173141955167, // k = 17
|
|
189
|
+
0.99830679915913834, 1.90028752122407241, 2.75857186416826039, // k = 18
|
|
190
|
+
0.99838117410831728, 1.90249575897183831, 2.76342117562634826, // k = 19
|
|
191
|
+
0.99844913407071090, 1.90455689090418900, 2.76796659454200267, // k = 20
|
|
192
|
+
0.99851147736424650, 1.90648682834171268, 2.77223944710058845, // k = 21
|
|
193
|
+
0.99856879856019987, 1.90829917277082473, 2.77626682032629901, // k = 22
|
|
194
|
+
0.99862183849734265, 1.91000561415842185, 2.78007199816156003, // k = 23
|
|
195
|
+
0.99867096266018507, 1.91161621560812023, 2.78367524259661536, // k = 24
|
|
196
|
+
0.99871656986212543, 1.91313978579765376, 2.78709435016625662, // k = 25
|
|
197
|
+
0.99875907577771272, 1.91458400425526065, 2.79034488416175463, // k = 26
|
|
198
|
+
0.99879885565047744, 1.91595563175945927, 2.79344064132371273, // k = 27
|
|
199
|
+
0.99883610756373287, 1.91726064301425936, 2.79639384757751941, // k = 28
|
|
200
|
+
0.99887095169674467, 1.91850441099725799, 2.79921543574803877, // k = 29
|
|
201
|
+
0.99890379414739527, 1.91969155477030995, 2.80191513182441554, // k = 30
|
|
202
|
+
0.99893466279047516, 1.92082633358913313, 2.80450167352080371, // k = 31
|
|
203
|
+
0.99896392088177777, 1.92191254955568525, 2.80698295731653502, // k = 32
|
|
204
|
+
0.99899147889385631, 1.92295362479495680, 2.80936614404217266, // k = 33
|
|
205
|
+
0.99901764688726757, 1.92395267400968351, 2.81165765979318394, // k = 34
|
|
206
|
+
0.99904238606342233, 1.92491244978191389, 2.81386337393604435, // k = 35
|
|
207
|
+
0.99906590152386343, 1.92583552644848055, 2.81598868034527072, // k = 36
|
|
208
|
+
0.99908829040739988, 1.92672418013918900, 2.81803841726804194, // k = 37
|
|
209
|
+
0.99910959420023460, 1.92758051694144683, 2.82001709302821268, // k = 38
|
|
210
|
+
0.99912996403594434, 1.92840654943159961, 2.82192875763732332, // k = 39
|
|
211
|
+
0.99914930224576892, 1.92920397044028391, 2.82377730628954282, // k = 40
|
|
212
|
+
0.99916781270195543, 1.92997447498220254, 2.82556612075063640, // k = 41
|
|
213
|
+
0.99918553179077207, 1.93071949211818605, 2.82729843191989971, // k = 42
|
|
214
|
+
0.99920250730914972, 1.93144048613876862, 2.82897728689417249, // k = 43
|
|
215
|
+
0.99921873345181211, 1.93213870990595638, 2.83060537017752267, // k = 44
|
|
216
|
+
0.99923435180002684, 1.93281536508689555, 2.83218527795750674, // k = 45
|
|
217
|
+
0.99924930425362390, 1.93347145882316340, 2.83371938965598247, // k = 46
|
|
218
|
+
0.99926370394567243, 1.93410820221384938, 2.83520990872793277, // k = 47
|
|
219
|
+
0.99927750755296074, 1.93472643138986200, 2.83665891945119597, // k = 48
|
|
220
|
+
0.99929082941537217, 1.93532697329771963, 2.83806833931606661, // k = 49
|
|
221
|
+
0.99930366295501472, 1.93591074716263734, 2.83943997143404658, // k = 50
|
|
222
|
+
0.99931598804721489, 1.93647857274021362, 2.84077557836653227, // k = 51
|
|
223
|
+
0.99932789059798210, 1.93703110239354714, 2.84207662106302905, // k = 52
|
|
224
|
+
0.99933946180485123, 1.93756904936378760, 2.84334468086129277, // k = 53
|
|
225
|
+
0.99935053819703512, 1.93809302131219852, 2.84458116874117195, // k = 54
|
|
226
|
+
0.99936126637970801, 1.93860365411038060, 2.84578731838604426, // k = 55
|
|
227
|
+
0.99937166229284458, 1.93910149816429112, 2.84696443486512862, // k = 56
|
|
228
|
+
0.99938169190727422, 1.93958709548454067, 2.84811369085281285, // k = 57
|
|
229
|
+
0.99939136927613959, 1.94006085573701625, 2.84923617230361970, // k = 58
|
|
230
|
+
0.99940074328745254, 1.94052339623206649, 2.85033291216254270, // k = 59
|
|
231
|
+
0.99940993070470086, 1.94097508636855309, 2.85140492437699322, // k = 60
|
|
232
|
+
0.99941868577388959, 1.94141633372043998, 2.85245314430358121, // k = 61
|
|
233
|
+
0.99942734443487780, 1.94184757038001976, 2.85347839582286156, // k = 62
|
|
234
|
+
0.99943556385736088, 1.94226915100517772, 2.85448160365493209, // k = 63
|
|
235
|
+
0.99944374522542034, 1.94268143723749631, 2.85546346373061510, // k = 64
|
|
236
|
+
0.99945159955424856, 1.94308482059116727, 2.85642486111805738, // k = 65
|
|
237
|
+
0.99945915301904620, 1.94347956957849988, 2.85736639994965458, // k = 66
|
|
238
|
+
0.99946660663832176, 1.94386600964031686, 2.85828887832701639, // k = 67
|
|
239
|
+
0.99947383703224091, 1.94424436597356021, 2.85919278275500233, // k = 68
|
|
240
|
+
0.99948075442870277, 1.94461502153473020, 2.86007887186090670, // k = 69
|
|
241
|
+
0.99948766082269458, 1.94497821937304138, 2.86094774077355396, // k = 70
|
|
242
|
+
0.99949422748713346, 1.94533411296001191, 2.86179981848076181, // k = 71
|
|
243
|
+
0.99950070756119658, 1.94568300035135167, 2.86263579405672886, // k = 72
|
|
244
|
+
0.99950704321753392, 1.94602523449961495, 2.86345610449197352, // k = 73
|
|
245
|
+
0.99951320334216121, 1.94636083782822311, 2.86426125541271404, // k = 74
|
|
246
|
+
0.99951920293474927, 1.94669011080745236, 2.86505169255406145, // k = 75
|
|
247
|
+
0.99952501670378524, 1.94701327348536779, 2.86582788270862920, // k = 76
|
|
248
|
+
0.99953071209267819, 1.94733044372333097, 2.86659027602854621, // k = 77
|
|
249
|
+
0.99953632734991515, 1.94764180764266825, 2.86733927778843167, // k = 78
|
|
250
|
+
0.99954171164873173, 1.94794766430732125, 2.86807526143834934, // k = 79
|
|
251
|
+
0.99954699274462655, 1.94824807472994621, 2.86879864789403882, // k = 80
|
|
252
|
+
0.99955216611081710, 1.94854317889829076, 2.86950970901679625, // k = 81
|
|
253
|
+
0.99955730019613043, 1.94883320227168610, 2.87020887436986527, // k = 82
|
|
254
|
+
0.99956213770650493, 1.94911826561721568, 2.87089648477021342, // k = 83
|
|
255
|
+
0.99956704264963037, 1.94939848545763539, 2.87157281693902178, // k = 84
|
|
256
|
+
0.99957166306481327, 1.94967401618316671, 2.87223821840905202, // k = 85
|
|
257
|
+
0.99957632713136491, 1.94994497791333288, 2.87289293193450135, // k = 86
|
|
258
|
+
0.99958087233392234, 1.95021155752212394, 2.87353731228213860, // k = 87
|
|
259
|
+
0.99958532555996271, 1.95047376805584349, 2.87417154907075201, // k = 88
|
|
260
|
+
0.99958956246481989, 1.95073180380688882, 2.87479599765507032, // k = 89
|
|
261
|
+
0.99959389351869277, 1.95098572880579013, 2.87541081987382086, // k = 90
|
|
262
|
+
0.99959807862052230, 1.95123574036898617, 2.87601637401948551, // k = 91
|
|
263
|
+
0.99960214057801977, 1.95148186921983324, 2.87661283691068093, // k = 92
|
|
264
|
+
0.99960607527256684, 1.95172415829728152, 2.87720042968334155, // k = 93
|
|
265
|
+
0.99960996433179616, 1.95196280898670693, 2.87777936649376898, // k = 94
|
|
266
|
+
0.99961379137860717, 1.95219787713926962, 2.87834989933620022, // k = 95
|
|
267
|
+
0.99961756088146103, 1.95242944583677058, 2.87891216133900230, // k = 96
|
|
268
|
+
0.99962125605327401, 1.95265762420910960, 2.87946647367488140, // k = 97
|
|
269
|
+
0.99962486179100551, 1.95288245314810638, 2.88001290210658567, // k = 98
|
|
270
|
+
0.99962843240297161, 1.95310404286672679, 2.88055166523392359, // k = 99
|
|
271
|
+
0.99963187276145504, 1.95332251980147475, 2.88108300006589957, // k = 100
|
|
272
|
+
0.99963525453173929, 1.95353785898848287, 2.88160703591438505, // k = 101
|
|
273
|
+
0.99963855412988778, 1.95375019354571577, 2.88212393551896184, // k = 102
|
|
274
|
+
0.99964190254169694, 1.95395953472205974, 2.88263389761985422, // k = 103
|
|
275
|
+
0.99964506565942202, 1.95416607430155409, 2.88313700661564098, // k = 104
|
|
276
|
+
0.99964834424233118, 1.95436972855640079, 2.88363350163803034, // k = 105
|
|
277
|
+
0.99965136548857458, 1.95457068540693513, 2.88412349413960101, // k = 106
|
|
278
|
+
0.99965436594726498, 1.95476896383092935, 2.88460710620208260, // k = 107
|
|
279
|
+
0.99965736463468602, 1.95496457504532373, 2.88508450078833789, // k = 108
|
|
280
|
+
0.99966034130443404, 1.95515761150707590, 2.88555580586194083, // k = 109
|
|
281
|
+
0.99966326130828520, 1.95534810382198998, 2.88602118761679094, // k = 110
|
|
282
|
+
0.99966601446035952, 1.95553622237747504, 2.88648066384146773, // k = 111
|
|
283
|
+
0.99966887679593697, 1.95572186728168163, 2.88693444915907094, // k = 112
|
|
284
|
+
0.99967161286551232, 1.95590523410490391, 2.88738271495714116, // k = 113
|
|
285
|
+
0.99967435412270333, 1.95608626483223702, 2.88782540459769166, // k = 114
|
|
286
|
+
0.99967701261934394, 1.95626497627117146, 2.88826277189363623, // k = 115
|
|
287
|
+
0.99967963265157778, 1.95644153684824573, 2.88869486674335008, // k = 116
|
|
288
|
+
0.99968216317182623, 1.95661589936000269, 2.88912184353694101, // k = 117
|
|
289
|
+
0.99968479674396349, 1.95678821614791332, 2.88954376359643561, // k = 118
|
|
290
|
+
0.99968729031337489, 1.95695842061650183, 2.88996069422501023, // k = 119
|
|
291
|
+
0.99968963358631413, 1.95712651709766305, 2.89037285320668502 // k = 120
|
|
292
|
+
};
|
|
293
|
+
|
|
294
|
+
class binomial_bounds {
|
|
295
|
+
|
|
296
|
+
public:
|
|
297
|
+
static double get_lower_bound(unsigned long long num_samples, double theta, unsigned num_std_devs) {
|
|
298
|
+
check_theta(theta);
|
|
299
|
+
check_num_std_devs(num_std_devs);
|
|
300
|
+
const double estimate = num_samples / theta;
|
|
301
|
+
const double lb = compute_approx_binomial_lower_bound(num_samples, theta, num_std_devs);
|
|
302
|
+
return std::min(estimate, std::max(static_cast<double>(num_samples), lb));
|
|
303
|
+
}
|
|
304
|
+
|
|
305
|
+
static double get_upper_bound(unsigned long long num_samples, double theta, unsigned num_std_devs) {
|
|
306
|
+
check_theta(theta);
|
|
307
|
+
check_num_std_devs(num_std_devs);
|
|
308
|
+
const double estimate = num_samples / theta;
|
|
309
|
+
const double ub = compute_approx_binomial_upper_bound(num_samples, theta, num_std_devs);
|
|
310
|
+
return std::max(estimate, ub);
|
|
311
|
+
}
|
|
312
|
+
|
|
313
|
+
private:
|
|
314
|
+
// our "classic" bounds, but now with continuity correction
|
|
315
|
+
static double cont_classic_lb(unsigned long long num_samples, double theta, double num_std_devs) {
|
|
316
|
+
const double n_hat = (num_samples - 0.5) / theta;
|
|
317
|
+
const double b = num_std_devs * std::sqrt((1.0 - theta) / theta);
|
|
318
|
+
const double d = 0.5 * b * std::sqrt((b * b) + (4.0 * n_hat));
|
|
319
|
+
const double center = n_hat + (0.5 * (b * b));
|
|
320
|
+
return (center - d);
|
|
321
|
+
}
|
|
322
|
+
|
|
323
|
+
// our "classic" bounds, but now with continuity correction
|
|
324
|
+
static double cont_classic_ub(unsigned long long num_samples, double theta, double num_std_devs) {
|
|
325
|
+
const double n_hat = (num_samples + 0.5) / theta;
|
|
326
|
+
const double b = num_std_devs * std::sqrt((1.0 - theta) / theta);
|
|
327
|
+
const double d = 0.5 * b * std::sqrt((b * b) + (4.0 * n_hat));
|
|
328
|
+
const double center = n_hat + (0.5 * (b * b));
|
|
329
|
+
return (center + d);
|
|
330
|
+
}
|
|
331
|
+
|
|
332
|
+
// This is a special purpose calculator for NStar, using a computational
|
|
333
|
+
// strategy inspired by its Bayesian definition. It is only appropriate
|
|
334
|
+
// for a very limited set of inputs. However, the procedure compute_approx_binomial_lower_bound()
|
|
335
|
+
// below does in fact only call it for suitably limited inputs.
|
|
336
|
+
// Outside of this limited range, two different bad things will happen.
|
|
337
|
+
// First, because we are not using logarithms, the values of intermediate
|
|
338
|
+
// quantities will exceed the dynamic range of doubles. Second, even if that
|
|
339
|
+
// problem were fixed, the running time of this procedure is essentially linear
|
|
340
|
+
// in est = (numSamples / p), and that can be Very, Very Big.
|
|
341
|
+
static unsigned long long special_n_star(unsigned long long num_samples, double p, double delta) {
|
|
342
|
+
const double q = 1.0 - p;
|
|
343
|
+
// Use a different algorithm if the following is true; this one will be too slow, or worse.
|
|
344
|
+
if ((num_samples / p) >= 500.0) throw std::invalid_argument("out of range");
|
|
345
|
+
double cur_term = std::pow(p, num_samples); // curTerm = posteriorProbability (k, k, p)
|
|
346
|
+
if (cur_term <= 1e-100) throw std::logic_error("out of range"); // sanity check for non-use of logarithms
|
|
347
|
+
double tot = cur_term;
|
|
348
|
+
unsigned long long m = num_samples;
|
|
349
|
+
while (tot <= delta) { // this test can fail even the first time
|
|
350
|
+
cur_term = (cur_term * q * (m)) / ((m + 1) - num_samples);
|
|
351
|
+
tot += cur_term;
|
|
352
|
+
m += 1;
|
|
353
|
+
}
|
|
354
|
+
// we have reached a state where tot > delta, so back up one
|
|
355
|
+
return (m - 1);
|
|
356
|
+
}
|
|
357
|
+
|
|
358
|
+
// The following procedure has very limited applicability.
|
|
359
|
+
// The above remarks about special_n_star() also apply here.
|
|
360
|
+
static unsigned long long special_n_prime_b(unsigned long long num_samples, double p, double delta) {
|
|
361
|
+
const double q = 1.0 - p;
|
|
362
|
+
const double one_minus_delta = 1.0 - delta;
|
|
363
|
+
double cur_term = std::pow(p, num_samples); // curTerm = posteriorProbability (k, k, p)
|
|
364
|
+
if (cur_term <= 1e-100) throw std::logic_error("out of range"); // sanity check for non-use of logarithms
|
|
365
|
+
double tot = cur_term;
|
|
366
|
+
unsigned long long m = num_samples;
|
|
367
|
+
while (tot < one_minus_delta) {
|
|
368
|
+
cur_term = (cur_term * q * (m)) / ((m + 1) - num_samples);
|
|
369
|
+
tot += cur_term;
|
|
370
|
+
m += 1;
|
|
371
|
+
}
|
|
372
|
+
return m; // no need to back up
|
|
373
|
+
}
|
|
374
|
+
|
|
375
|
+
static unsigned long long special_n_prime_f(unsigned long long num_samples, double p, double delta) {
|
|
376
|
+
// Use a different algorithm if the following is true; this one will be too slow, or worse.
|
|
377
|
+
if ((num_samples / p) >= 500.0) throw std::invalid_argument("out of range"); //A super-small delta could also make it slow.
|
|
378
|
+
return special_n_prime_b(num_samples + 1, p, delta);
|
|
379
|
+
}
|
|
380
|
+
|
|
381
|
+
// The following computes an approximation to the lower bound of a Frequentist
|
|
382
|
+
// confidence interval based on the tails of the Binomial distribution.
|
|
383
|
+
static double compute_approx_binomial_lower_bound(unsigned long long num_samples, double theta, unsigned num_std_devs) {
|
|
384
|
+
if (theta == 1) return num_samples;
|
|
385
|
+
if (num_samples == 0) return 0;
|
|
386
|
+
if (num_samples == 1) {
|
|
387
|
+
const double delta = delta_of_num_std_devs[num_std_devs];
|
|
388
|
+
const double raw_lb = std::log(1 - delta) / std::log(1 - theta);
|
|
389
|
+
return std::floor(raw_lb); // round down
|
|
390
|
+
}
|
|
391
|
+
if (num_samples > 120) {
|
|
392
|
+
// plenty of samples, so gaussian approximation to binomial distribution isn't too bad
|
|
393
|
+
const double raw_lb = cont_classic_lb(num_samples, theta, num_std_devs);
|
|
394
|
+
return (raw_lb - 0.5); // fake round down
|
|
395
|
+
}
|
|
396
|
+
// at this point we know 2 <= num_samples <= 120
|
|
397
|
+
if (theta > (1 - 1e-5)) { // empirically-determined threshold
|
|
398
|
+
return num_samples;
|
|
399
|
+
}
|
|
400
|
+
if (theta < (num_samples / 360.0)) { // empirically-determined threshold
|
|
401
|
+
// here we use the Gaussian approximation, but with a modified num_std_devs
|
|
402
|
+
const unsigned index = 3 * num_samples + (num_std_devs - 1);
|
|
403
|
+
const double raw_lb = cont_classic_lb(num_samples, theta, lb_equiv_table[index]);
|
|
404
|
+
return raw_lb - 0.5; // fake round down
|
|
405
|
+
}
|
|
406
|
+
// This is the most difficult range to approximate; we will compute an "exact" LB.
|
|
407
|
+
// We know that est <= 360, so specialNStar() shouldn't be ridiculously slow.
|
|
408
|
+
const double delta = delta_of_num_std_devs[num_std_devs];
|
|
409
|
+
return special_n_star(num_samples, theta, delta); // no need to round
|
|
410
|
+
}
|
|
411
|
+
|
|
412
|
+
// The following computes an approximation to the upper bound of a Frequentist
|
|
413
|
+
// confidence interval based on the tails of the Binomial distribution.
|
|
414
|
+
static double compute_approx_binomial_upper_bound(unsigned long long num_samples, double theta, unsigned num_std_devs) {
|
|
415
|
+
if (theta == 1) return num_samples;
|
|
416
|
+
if (num_samples == 0) {
|
|
417
|
+
const double delta = delta_of_num_std_devs[num_std_devs];
|
|
418
|
+
const double raw_ub = std::log(delta) / std::log(1 - theta);
|
|
419
|
+
return std::ceil(raw_ub); // round up
|
|
420
|
+
}
|
|
421
|
+
if (num_samples > 120) {
|
|
422
|
+
// plenty of samples, so gaussian approximation to binomial distribution isn't too bad
|
|
423
|
+
const double raw_ub = cont_classic_ub(num_samples, theta, num_std_devs);
|
|
424
|
+
return (raw_ub + 0.5); // fake round up
|
|
425
|
+
}
|
|
426
|
+
// at this point we know 2 <= num_samples <= 120
|
|
427
|
+
if (theta > (1 - 1e-5)) { // empirically-determined threshold
|
|
428
|
+
return num_samples + 1;
|
|
429
|
+
}
|
|
430
|
+
if (theta < (num_samples / 360.0)) { // empirically-determined threshold
|
|
431
|
+
// here we use the Gaussian approximation, but with a modified num_std_devs
|
|
432
|
+
const unsigned index = 3 * num_samples + (num_std_devs - 1);
|
|
433
|
+
const double raw_ub = cont_classic_ub(num_samples, theta, ub_equiv_table[index]);
|
|
434
|
+
return raw_ub + 0.5; // fake round up
|
|
435
|
+
}
|
|
436
|
+
// This is the most difficult range to approximate; we will compute an "exact" UB.
|
|
437
|
+
// We know that est <= 360, so specialNPrimeF() shouldn't be ridiculously slow.
|
|
438
|
+
const double delta = delta_of_num_std_devs[num_std_devs];
|
|
439
|
+
return special_n_prime_f(num_samples, theta, delta); // no need to round
|
|
440
|
+
}
|
|
441
|
+
|
|
442
|
+
static void check_theta(double theta) {
|
|
443
|
+
if (theta < 0 || theta > 1) {
|
|
444
|
+
throw std::invalid_argument("theta must be in [0, 1]");
|
|
445
|
+
}
|
|
446
|
+
}
|
|
447
|
+
|
|
448
|
+
static void check_num_std_devs(unsigned num_std_devs) {
|
|
449
|
+
if (num_std_devs < 1 || num_std_devs > 3) {
|
|
450
|
+
throw std::invalid_argument("num_std_devs must be 1, 2 or 3");
|
|
451
|
+
}
|
|
452
|
+
}
|
|
453
|
+
|
|
454
|
+
};
|
|
455
|
+
|
|
456
|
+
} /* namespace datasketches */
|
|
457
|
+
|
|
458
|
+
# endif
|