datasketches 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/CHANGELOG.md +3 -0
- data/LICENSE +310 -0
- data/NOTICE +11 -0
- data/README.md +126 -0
- data/ext/datasketches/cpc_wrapper.cpp +50 -0
- data/ext/datasketches/ext.cpp +12 -0
- data/ext/datasketches/extconf.rb +11 -0
- data/ext/datasketches/hll_wrapper.cpp +69 -0
- data/lib/datasketches.rb +9 -0
- data/lib/datasketches/version.rb +3 -0
- data/vendor/datasketches-cpp/CMakeLists.txt +126 -0
- data/vendor/datasketches-cpp/LICENSE +311 -0
- data/vendor/datasketches-cpp/MANIFEST.in +19 -0
- data/vendor/datasketches-cpp/NOTICE +11 -0
- data/vendor/datasketches-cpp/README.md +42 -0
- data/vendor/datasketches-cpp/common/CMakeLists.txt +45 -0
- data/vendor/datasketches-cpp/common/include/MurmurHash3.h +173 -0
- data/vendor/datasketches-cpp/common/include/binomial_bounds.hpp +458 -0
- data/vendor/datasketches-cpp/common/include/bounds_binomial_proportions.hpp +291 -0
- data/vendor/datasketches-cpp/common/include/ceiling_power_of_2.hpp +41 -0
- data/vendor/datasketches-cpp/common/include/common_defs.hpp +51 -0
- data/vendor/datasketches-cpp/common/include/conditional_back_inserter.hpp +68 -0
- data/vendor/datasketches-cpp/common/include/conditional_forward.hpp +70 -0
- data/vendor/datasketches-cpp/common/include/count_zeros.hpp +114 -0
- data/vendor/datasketches-cpp/common/include/inv_pow2_table.hpp +107 -0
- data/vendor/datasketches-cpp/common/include/memory_operations.hpp +57 -0
- data/vendor/datasketches-cpp/common/include/serde.hpp +196 -0
- data/vendor/datasketches-cpp/common/test/CMakeLists.txt +38 -0
- data/vendor/datasketches-cpp/common/test/catch.hpp +17618 -0
- data/vendor/datasketches-cpp/common/test/catch_runner.cpp +7 -0
- data/vendor/datasketches-cpp/common/test/test_allocator.cpp +31 -0
- data/vendor/datasketches-cpp/common/test/test_allocator.hpp +108 -0
- data/vendor/datasketches-cpp/common/test/test_runner.cpp +29 -0
- data/vendor/datasketches-cpp/common/test/test_type.hpp +137 -0
- data/vendor/datasketches-cpp/cpc/CMakeLists.txt +74 -0
- data/vendor/datasketches-cpp/cpc/include/compression_data.hpp +6022 -0
- data/vendor/datasketches-cpp/cpc/include/cpc_common.hpp +62 -0
- data/vendor/datasketches-cpp/cpc/include/cpc_compressor.hpp +147 -0
- data/vendor/datasketches-cpp/cpc/include/cpc_compressor_impl.hpp +742 -0
- data/vendor/datasketches-cpp/cpc/include/cpc_confidence.hpp +167 -0
- data/vendor/datasketches-cpp/cpc/include/cpc_sketch.hpp +311 -0
- data/vendor/datasketches-cpp/cpc/include/cpc_sketch_impl.hpp +810 -0
- data/vendor/datasketches-cpp/cpc/include/cpc_union.hpp +102 -0
- data/vendor/datasketches-cpp/cpc/include/cpc_union_impl.hpp +346 -0
- data/vendor/datasketches-cpp/cpc/include/cpc_util.hpp +137 -0
- data/vendor/datasketches-cpp/cpc/include/icon_estimator.hpp +274 -0
- data/vendor/datasketches-cpp/cpc/include/kxp_byte_lookup.hpp +81 -0
- data/vendor/datasketches-cpp/cpc/include/u32_table.hpp +84 -0
- data/vendor/datasketches-cpp/cpc/include/u32_table_impl.hpp +266 -0
- data/vendor/datasketches-cpp/cpc/test/CMakeLists.txt +44 -0
- data/vendor/datasketches-cpp/cpc/test/compression_test.cpp +67 -0
- data/vendor/datasketches-cpp/cpc/test/cpc_sketch_test.cpp +381 -0
- data/vendor/datasketches-cpp/cpc/test/cpc_union_test.cpp +149 -0
- data/vendor/datasketches-cpp/fi/CMakeLists.txt +54 -0
- data/vendor/datasketches-cpp/fi/include/frequent_items_sketch.hpp +319 -0
- data/vendor/datasketches-cpp/fi/include/frequent_items_sketch_impl.hpp +484 -0
- data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map.hpp +114 -0
- data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map_impl.hpp +345 -0
- data/vendor/datasketches-cpp/fi/test/CMakeLists.txt +44 -0
- data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_custom_type_test.cpp +84 -0
- data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_test.cpp +360 -0
- data/vendor/datasketches-cpp/fi/test/items_sketch_string_from_java.sk +0 -0
- data/vendor/datasketches-cpp/fi/test/items_sketch_string_utf8_from_java.sk +0 -0
- data/vendor/datasketches-cpp/fi/test/longs_sketch_from_java.sk +0 -0
- data/vendor/datasketches-cpp/fi/test/reverse_purge_hash_map_test.cpp +47 -0
- data/vendor/datasketches-cpp/hll/CMakeLists.txt +92 -0
- data/vendor/datasketches-cpp/hll/include/AuxHashMap-internal.hpp +303 -0
- data/vendor/datasketches-cpp/hll/include/AuxHashMap.hpp +83 -0
- data/vendor/datasketches-cpp/hll/include/CompositeInterpolationXTable-internal.hpp +811 -0
- data/vendor/datasketches-cpp/hll/include/CompositeInterpolationXTable.hpp +40 -0
- data/vendor/datasketches-cpp/hll/include/CouponHashSet-internal.hpp +291 -0
- data/vendor/datasketches-cpp/hll/include/CouponHashSet.hpp +59 -0
- data/vendor/datasketches-cpp/hll/include/CouponList-internal.hpp +417 -0
- data/vendor/datasketches-cpp/hll/include/CouponList.hpp +91 -0
- data/vendor/datasketches-cpp/hll/include/CubicInterpolation-internal.hpp +233 -0
- data/vendor/datasketches-cpp/hll/include/CubicInterpolation.hpp +43 -0
- data/vendor/datasketches-cpp/hll/include/HarmonicNumbers-internal.hpp +90 -0
- data/vendor/datasketches-cpp/hll/include/HarmonicNumbers.hpp +48 -0
- data/vendor/datasketches-cpp/hll/include/Hll4Array-internal.hpp +335 -0
- data/vendor/datasketches-cpp/hll/include/Hll4Array.hpp +69 -0
- data/vendor/datasketches-cpp/hll/include/Hll6Array-internal.hpp +124 -0
- data/vendor/datasketches-cpp/hll/include/Hll6Array.hpp +55 -0
- data/vendor/datasketches-cpp/hll/include/Hll8Array-internal.hpp +158 -0
- data/vendor/datasketches-cpp/hll/include/Hll8Array.hpp +56 -0
- data/vendor/datasketches-cpp/hll/include/HllArray-internal.hpp +706 -0
- data/vendor/datasketches-cpp/hll/include/HllArray.hpp +136 -0
- data/vendor/datasketches-cpp/hll/include/HllSketch-internal.hpp +462 -0
- data/vendor/datasketches-cpp/hll/include/HllSketchImpl-internal.hpp +149 -0
- data/vendor/datasketches-cpp/hll/include/HllSketchImpl.hpp +85 -0
- data/vendor/datasketches-cpp/hll/include/HllSketchImplFactory.hpp +170 -0
- data/vendor/datasketches-cpp/hll/include/HllUnion-internal.hpp +287 -0
- data/vendor/datasketches-cpp/hll/include/HllUtil.hpp +239 -0
- data/vendor/datasketches-cpp/hll/include/RelativeErrorTables-internal.hpp +112 -0
- data/vendor/datasketches-cpp/hll/include/RelativeErrorTables.hpp +46 -0
- data/vendor/datasketches-cpp/hll/include/coupon_iterator-internal.hpp +56 -0
- data/vendor/datasketches-cpp/hll/include/coupon_iterator.hpp +43 -0
- data/vendor/datasketches-cpp/hll/include/hll.hpp +669 -0
- data/vendor/datasketches-cpp/hll/include/hll.private.hpp +32 -0
- data/vendor/datasketches-cpp/hll/test/AuxHashMapTest.cpp +79 -0
- data/vendor/datasketches-cpp/hll/test/CMakeLists.txt +51 -0
- data/vendor/datasketches-cpp/hll/test/CouponHashSetTest.cpp +130 -0
- data/vendor/datasketches-cpp/hll/test/CouponListTest.cpp +181 -0
- data/vendor/datasketches-cpp/hll/test/CrossCountingTest.cpp +93 -0
- data/vendor/datasketches-cpp/hll/test/HllArrayTest.cpp +191 -0
- data/vendor/datasketches-cpp/hll/test/HllSketchTest.cpp +389 -0
- data/vendor/datasketches-cpp/hll/test/HllUnionTest.cpp +313 -0
- data/vendor/datasketches-cpp/hll/test/IsomorphicTest.cpp +141 -0
- data/vendor/datasketches-cpp/hll/test/TablesTest.cpp +44 -0
- data/vendor/datasketches-cpp/hll/test/ToFromByteArrayTest.cpp +168 -0
- data/vendor/datasketches-cpp/hll/test/array6_from_java.sk +0 -0
- data/vendor/datasketches-cpp/hll/test/compact_array4_from_java.sk +0 -0
- data/vendor/datasketches-cpp/hll/test/compact_set_from_java.sk +0 -0
- data/vendor/datasketches-cpp/hll/test/list_from_java.sk +0 -0
- data/vendor/datasketches-cpp/hll/test/updatable_array4_from_java.sk +0 -0
- data/vendor/datasketches-cpp/hll/test/updatable_set_from_java.sk +0 -0
- data/vendor/datasketches-cpp/kll/CMakeLists.txt +58 -0
- data/vendor/datasketches-cpp/kll/include/kll_helper.hpp +150 -0
- data/vendor/datasketches-cpp/kll/include/kll_helper_impl.hpp +319 -0
- data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator.hpp +67 -0
- data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator_impl.hpp +169 -0
- data/vendor/datasketches-cpp/kll/include/kll_sketch.hpp +559 -0
- data/vendor/datasketches-cpp/kll/include/kll_sketch_impl.hpp +1131 -0
- data/vendor/datasketches-cpp/kll/test/CMakeLists.txt +44 -0
- data/vendor/datasketches-cpp/kll/test/kll_sketch_custom_type_test.cpp +154 -0
- data/vendor/datasketches-cpp/kll/test/kll_sketch_float_one_item_v1.sk +0 -0
- data/vendor/datasketches-cpp/kll/test/kll_sketch_from_java.sk +0 -0
- data/vendor/datasketches-cpp/kll/test/kll_sketch_test.cpp +685 -0
- data/vendor/datasketches-cpp/kll/test/kll_sketch_validation.cpp +229 -0
- data/vendor/datasketches-cpp/pyproject.toml +17 -0
- data/vendor/datasketches-cpp/python/CMakeLists.txt +61 -0
- data/vendor/datasketches-cpp/python/README.md +78 -0
- data/vendor/datasketches-cpp/python/jupyter/CPCSketch.ipynb +345 -0
- data/vendor/datasketches-cpp/python/jupyter/FrequentItemsSketch.ipynb +354 -0
- data/vendor/datasketches-cpp/python/jupyter/HLLSketch.ipynb +346 -0
- data/vendor/datasketches-cpp/python/jupyter/KLLSketch.ipynb +463 -0
- data/vendor/datasketches-cpp/python/jupyter/ThetaSketchNotebook.ipynb +396 -0
- data/vendor/datasketches-cpp/python/src/__init__.py +2 -0
- data/vendor/datasketches-cpp/python/src/cpc_wrapper.cpp +90 -0
- data/vendor/datasketches-cpp/python/src/datasketches.cpp +40 -0
- data/vendor/datasketches-cpp/python/src/fi_wrapper.cpp +123 -0
- data/vendor/datasketches-cpp/python/src/hll_wrapper.cpp +136 -0
- data/vendor/datasketches-cpp/python/src/kll_wrapper.cpp +209 -0
- data/vendor/datasketches-cpp/python/src/theta_wrapper.cpp +162 -0
- data/vendor/datasketches-cpp/python/src/vector_of_kll.cpp +488 -0
- data/vendor/datasketches-cpp/python/src/vo_wrapper.cpp +140 -0
- data/vendor/datasketches-cpp/python/tests/__init__.py +0 -0
- data/vendor/datasketches-cpp/python/tests/cpc_test.py +64 -0
- data/vendor/datasketches-cpp/python/tests/fi_test.py +110 -0
- data/vendor/datasketches-cpp/python/tests/hll_test.py +131 -0
- data/vendor/datasketches-cpp/python/tests/kll_test.py +119 -0
- data/vendor/datasketches-cpp/python/tests/theta_test.py +121 -0
- data/vendor/datasketches-cpp/python/tests/vector_of_kll_test.py +148 -0
- data/vendor/datasketches-cpp/python/tests/vo_test.py +101 -0
- data/vendor/datasketches-cpp/sampling/CMakeLists.txt +48 -0
- data/vendor/datasketches-cpp/sampling/include/var_opt_sketch.hpp +392 -0
- data/vendor/datasketches-cpp/sampling/include/var_opt_sketch_impl.hpp +1752 -0
- data/vendor/datasketches-cpp/sampling/include/var_opt_union.hpp +239 -0
- data/vendor/datasketches-cpp/sampling/include/var_opt_union_impl.hpp +645 -0
- data/vendor/datasketches-cpp/sampling/test/CMakeLists.txt +43 -0
- data/vendor/datasketches-cpp/sampling/test/binaries_from_java.txt +67 -0
- data/vendor/datasketches-cpp/sampling/test/var_opt_sketch_test.cpp +509 -0
- data/vendor/datasketches-cpp/sampling/test/var_opt_union_test.cpp +358 -0
- data/vendor/datasketches-cpp/sampling/test/varopt_sketch_long_sampling.sk +0 -0
- data/vendor/datasketches-cpp/sampling/test/varopt_sketch_string_exact.sk +0 -0
- data/vendor/datasketches-cpp/sampling/test/varopt_union_double_sampling.sk +0 -0
- data/vendor/datasketches-cpp/setup.py +94 -0
- data/vendor/datasketches-cpp/theta/CMakeLists.txt +57 -0
- data/vendor/datasketches-cpp/theta/include/theta_a_not_b.hpp +73 -0
- data/vendor/datasketches-cpp/theta/include/theta_a_not_b_impl.hpp +83 -0
- data/vendor/datasketches-cpp/theta/include/theta_intersection.hpp +88 -0
- data/vendor/datasketches-cpp/theta/include/theta_intersection_impl.hpp +130 -0
- data/vendor/datasketches-cpp/theta/include/theta_sketch.hpp +533 -0
- data/vendor/datasketches-cpp/theta/include/theta_sketch_impl.hpp +939 -0
- data/vendor/datasketches-cpp/theta/include/theta_union.hpp +122 -0
- data/vendor/datasketches-cpp/theta/include/theta_union_impl.hpp +109 -0
- data/vendor/datasketches-cpp/theta/test/CMakeLists.txt +45 -0
- data/vendor/datasketches-cpp/theta/test/theta_a_not_b_test.cpp +244 -0
- data/vendor/datasketches-cpp/theta/test/theta_compact_empty_from_java.sk +0 -0
- data/vendor/datasketches-cpp/theta/test/theta_compact_estimation_from_java.sk +0 -0
- data/vendor/datasketches-cpp/theta/test/theta_compact_single_item_from_java.sk +0 -0
- data/vendor/datasketches-cpp/theta/test/theta_intersection_test.cpp +218 -0
- data/vendor/datasketches-cpp/theta/test/theta_sketch_test.cpp +438 -0
- data/vendor/datasketches-cpp/theta/test/theta_union_test.cpp +97 -0
- data/vendor/datasketches-cpp/theta/test/theta_update_empty_from_java.sk +0 -0
- data/vendor/datasketches-cpp/theta/test/theta_update_estimation_from_java.sk +0 -0
- data/vendor/datasketches-cpp/tuple/CMakeLists.txt +104 -0
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_a_not_b.hpp +52 -0
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_a_not_b_impl.hpp +32 -0
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_intersection.hpp +52 -0
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_intersection_impl.hpp +31 -0
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_sketch.hpp +179 -0
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_sketch_impl.hpp +238 -0
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_union.hpp +81 -0
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_union_impl.hpp +43 -0
- data/vendor/datasketches-cpp/tuple/include/bounds_on_ratios_in_sampled_sets.hpp +135 -0
- data/vendor/datasketches-cpp/tuple/include/bounds_on_ratios_in_theta_sketched_sets.hpp +135 -0
- data/vendor/datasketches-cpp/tuple/include/jaccard_similarity.hpp +172 -0
- data/vendor/datasketches-cpp/tuple/include/theta_a_not_b_experimental.hpp +53 -0
- data/vendor/datasketches-cpp/tuple/include/theta_a_not_b_experimental_impl.hpp +33 -0
- data/vendor/datasketches-cpp/tuple/include/theta_comparators.hpp +48 -0
- data/vendor/datasketches-cpp/tuple/include/theta_constants.hpp +34 -0
- data/vendor/datasketches-cpp/tuple/include/theta_helpers.hpp +54 -0
- data/vendor/datasketches-cpp/tuple/include/theta_intersection_base.hpp +59 -0
- data/vendor/datasketches-cpp/tuple/include/theta_intersection_base_impl.hpp +121 -0
- data/vendor/datasketches-cpp/tuple/include/theta_intersection_experimental.hpp +78 -0
- data/vendor/datasketches-cpp/tuple/include/theta_intersection_experimental_impl.hpp +43 -0
- data/vendor/datasketches-cpp/tuple/include/theta_set_difference_base.hpp +54 -0
- data/vendor/datasketches-cpp/tuple/include/theta_set_difference_base_impl.hpp +80 -0
- data/vendor/datasketches-cpp/tuple/include/theta_sketch_experimental.hpp +393 -0
- data/vendor/datasketches-cpp/tuple/include/theta_sketch_experimental_impl.hpp +481 -0
- data/vendor/datasketches-cpp/tuple/include/theta_union_base.hpp +60 -0
- data/vendor/datasketches-cpp/tuple/include/theta_union_base_impl.hpp +84 -0
- data/vendor/datasketches-cpp/tuple/include/theta_union_experimental.hpp +88 -0
- data/vendor/datasketches-cpp/tuple/include/theta_union_experimental_impl.hpp +47 -0
- data/vendor/datasketches-cpp/tuple/include/theta_update_sketch_base.hpp +259 -0
- data/vendor/datasketches-cpp/tuple/include/theta_update_sketch_base_impl.hpp +389 -0
- data/vendor/datasketches-cpp/tuple/include/tuple_a_not_b.hpp +57 -0
- data/vendor/datasketches-cpp/tuple/include/tuple_a_not_b_impl.hpp +33 -0
- data/vendor/datasketches-cpp/tuple/include/tuple_intersection.hpp +104 -0
- data/vendor/datasketches-cpp/tuple/include/tuple_intersection_impl.hpp +43 -0
- data/vendor/datasketches-cpp/tuple/include/tuple_sketch.hpp +496 -0
- data/vendor/datasketches-cpp/tuple/include/tuple_sketch_impl.hpp +587 -0
- data/vendor/datasketches-cpp/tuple/include/tuple_union.hpp +109 -0
- data/vendor/datasketches-cpp/tuple/include/tuple_union_impl.hpp +47 -0
- data/vendor/datasketches-cpp/tuple/test/CMakeLists.txt +53 -0
- data/vendor/datasketches-cpp/tuple/test/aod_1_compact_empty_from_java.sk +1 -0
- data/vendor/datasketches-cpp/tuple/test/aod_1_compact_estimation_from_java.sk +0 -0
- data/vendor/datasketches-cpp/tuple/test/aod_1_compact_non_empty_no_entries_from_java.sk +0 -0
- data/vendor/datasketches-cpp/tuple/test/aod_2_compact_exact_from_java.sk +0 -0
- data/vendor/datasketches-cpp/tuple/test/aod_3_compact_empty_from_java.sk +1 -0
- data/vendor/datasketches-cpp/tuple/test/array_of_doubles_sketch_test.cpp +298 -0
- data/vendor/datasketches-cpp/tuple/test/theta_a_not_b_experimental_test.cpp +250 -0
- data/vendor/datasketches-cpp/tuple/test/theta_compact_empty_from_java.sk +0 -0
- data/vendor/datasketches-cpp/tuple/test/theta_compact_estimation_from_java.sk +0 -0
- data/vendor/datasketches-cpp/tuple/test/theta_compact_single_item_from_java.sk +0 -0
- data/vendor/datasketches-cpp/tuple/test/theta_intersection_experimental_test.cpp +224 -0
- data/vendor/datasketches-cpp/tuple/test/theta_jaccard_similarity_test.cpp +144 -0
- data/vendor/datasketches-cpp/tuple/test/theta_sketch_experimental_test.cpp +247 -0
- data/vendor/datasketches-cpp/tuple/test/theta_union_experimental_test.cpp +44 -0
- data/vendor/datasketches-cpp/tuple/test/tuple_a_not_b_test.cpp +289 -0
- data/vendor/datasketches-cpp/tuple/test/tuple_intersection_test.cpp +235 -0
- data/vendor/datasketches-cpp/tuple/test/tuple_jaccard_similarity_test.cpp +98 -0
- data/vendor/datasketches-cpp/tuple/test/tuple_sketch_allocation_test.cpp +102 -0
- data/vendor/datasketches-cpp/tuple/test/tuple_sketch_test.cpp +249 -0
- data/vendor/datasketches-cpp/tuple/test/tuple_union_test.cpp +187 -0
- metadata +302 -0
@@ -0,0 +1,19 @@
|
|
1
|
+
global-include CMakeLists.txt
|
2
|
+
global-include *.cpp
|
3
|
+
global-include *.c
|
4
|
+
global-include *.hpp
|
5
|
+
global-include *.h
|
6
|
+
global-include *.bin
|
7
|
+
|
8
|
+
global-exclude .git*
|
9
|
+
|
10
|
+
recursive-include python/pybind11 *
|
11
|
+
|
12
|
+
graft common
|
13
|
+
graft cpc
|
14
|
+
graft fi
|
15
|
+
graft hll
|
16
|
+
graft kll
|
17
|
+
graft theta
|
18
|
+
graft sampling
|
19
|
+
graft python
|
@@ -0,0 +1,11 @@
|
|
1
|
+
Apache DataSketches-cpp
|
2
|
+
Copyright 2020 The Apache Software Foundation
|
3
|
+
|
4
|
+
Copyright 2015-2018 Yahoo
|
5
|
+
Copyright 2019 Verizon Media
|
6
|
+
|
7
|
+
This product includes software developed at
|
8
|
+
The Apache Software Foundation (http://www.apache.org/).
|
9
|
+
|
10
|
+
Prior to moving to ASF, the software for this project was developed at
|
11
|
+
Yahoo (now Verizon Media) (https://developer.yahoo.com).
|
@@ -0,0 +1,42 @@
|
|
1
|
+
# DataSketches Core C++ Library Component
|
2
|
+
This is the core C++ component of the DataSketches library. It contains all of the key sketching algorithms that are in the Java component and can be accessed directly from user applications.
|
3
|
+
|
4
|
+
This component is also a dependency of other components of the library that create adaptors for target systems, such as PostgreSQL.
|
5
|
+
|
6
|
+
Note that we have a parallel core component for Java implementations of the same sketch algorithms,
|
7
|
+
[datasketches-java](https://github.com/apache/datasketches-java).
|
8
|
+
|
9
|
+
Please visit the main [DataSketches website](https://datasketches.apache.org) for more information.
|
10
|
+
|
11
|
+
If you are interested in making contributions to this site please see our [Community](https://datasketches.apache.org/docs/Community/) page for how to contact us.
|
12
|
+
|
13
|
+
---
|
14
|
+
|
15
|
+
This code requires C++11. It was tested with GCC 4.8.5 (standard in RedHat at the time of this writing), GCC 8.2.0 and Apple LLVM version 10.0.1 (clang-1001.0.46.4)
|
16
|
+
|
17
|
+
This includes Python bindings. For the Python interface, see the README notes in [the python subdirectory](https://github.com/apache/datasketches-cpp/tree/master/python).
|
18
|
+
|
19
|
+
This library is header-only. The build process provided is only for building unit tests and the python library.
|
20
|
+
|
21
|
+
Building the unit tests requires cmake 3.12.0 or higher.
|
22
|
+
|
23
|
+
Installing the latest cmake on OSX: brew install cmake
|
24
|
+
|
25
|
+
Building and running unit tests using cmake for OSX and Linux:
|
26
|
+
|
27
|
+
```
|
28
|
+
$ cd build
|
29
|
+
$ cmake ..
|
30
|
+
$ make
|
31
|
+
$ make test
|
32
|
+
```
|
33
|
+
|
34
|
+
Building and running unit tests using cmake for Windows from the command line:
|
35
|
+
|
36
|
+
```
|
37
|
+
$ cd build
|
38
|
+
$ cmake ..
|
39
|
+
$ cd ..
|
40
|
+
$ cmake --build build --config Release
|
41
|
+
$ cmake --build build --config Release --target RUN_TESTS
|
42
|
+
```
|
@@ -0,0 +1,45 @@
|
|
1
|
+
# Licensed to the Apache Software Foundation (ASF) under one
|
2
|
+
# or more contributor license agreements. See the NOTICE file
|
3
|
+
# distributed with this work for additional information
|
4
|
+
# regarding copyright ownership. The ASF licenses this file
|
5
|
+
# to you under the Apache License, Version 2.0 (the
|
6
|
+
# "License"); you may not use this file except in compliance
|
7
|
+
# with the License. You may obtain a copy of the License at
|
8
|
+
#
|
9
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
10
|
+
#
|
11
|
+
# Unless required by applicable law or agreed to in writing,
|
12
|
+
# software distributed under the License is distributed on an
|
13
|
+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
14
|
+
# KIND, either express or implied. See the License for the
|
15
|
+
# specific language governing permissions and limitations
|
16
|
+
# under the License.
|
17
|
+
|
18
|
+
add_library(common INTERFACE)
|
19
|
+
|
20
|
+
if (BUILD_TESTS)
|
21
|
+
add_subdirectory(test)
|
22
|
+
endif()
|
23
|
+
|
24
|
+
target_include_directories(common
|
25
|
+
INTERFACE
|
26
|
+
$<INSTALL_INTERFACE:$<INSTALL_PREFIX>/include>
|
27
|
+
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>
|
28
|
+
)
|
29
|
+
|
30
|
+
target_compile_features(common INTERFACE cxx_std_11)
|
31
|
+
|
32
|
+
target_sources(common
|
33
|
+
INTERFACE
|
34
|
+
${CMAKE_CURRENT_SOURCE_DIR}/include/common_defs.hpp
|
35
|
+
${CMAKE_CURRENT_SOURCE_DIR}/include/memory_operations.hpp
|
36
|
+
${CMAKE_CURRENT_SOURCE_DIR}/include/MurmurHash3.h
|
37
|
+
${CMAKE_CURRENT_SOURCE_DIR}/include/serde.hpp
|
38
|
+
${CMAKE_CURRENT_SOURCE_DIR}/include/count_zeros.hpp
|
39
|
+
${CMAKE_CURRENT_SOURCE_DIR}/include/inv_pow2_table.hpp
|
40
|
+
${CMAKE_CURRENT_SOURCE_DIR}/include/binomial_bounds.hpp
|
41
|
+
${CMAKE_CURRENT_SOURCE_DIR}/include/conditional_back_inserter.hpp
|
42
|
+
${CMAKE_CURRENT_SOURCE_DIR}/include/conditional_forward.hpp
|
43
|
+
${CMAKE_CURRENT_SOURCE_DIR}/include/ceiling_power_of_2.hpp
|
44
|
+
)
|
45
|
+
|
@@ -0,0 +1,173 @@
|
|
1
|
+
// Minimally modified from Austin Applebee's code:
|
2
|
+
// * Removed MurmurHash3_x86_32 and MurmurHash3_x86_128
|
3
|
+
// * Changed input seed in MurmurHash3_x64_128 to uint64_t
|
4
|
+
// * Define and use HashState reference to return result
|
5
|
+
// * Made entire hash function defined inline
|
6
|
+
//-----------------------------------------------------------------------------
|
7
|
+
// MurmurHash3 was written by Austin Appleby, and is placed in the public
|
8
|
+
// domain. The author hereby disclaims copyright to this source code.
|
9
|
+
|
10
|
+
// Note - The x86 and x64 versions do _not_ produce the same results, as the
|
11
|
+
// algorithms are optimized for their respective platforms. You can still
|
12
|
+
// compile and run any of them on any platform, but your performance with the
|
13
|
+
// non-native version will be less than optimal.
|
14
|
+
|
15
|
+
#ifndef _MURMURHASH3_H_
|
16
|
+
#define _MURMURHASH3_H_
|
17
|
+
|
18
|
+
//-----------------------------------------------------------------------------
|
19
|
+
// Platform-specific functions and macros
|
20
|
+
|
21
|
+
// Microsoft Visual Studio
|
22
|
+
|
23
|
+
#if defined(_MSC_VER)
|
24
|
+
|
25
|
+
typedef unsigned char uint8_t;
|
26
|
+
typedef unsigned int uint32_t;
|
27
|
+
typedef unsigned __int64 uint64_t;
|
28
|
+
|
29
|
+
#define FORCE_INLINE __forceinline
|
30
|
+
|
31
|
+
#include <stdlib.h>
|
32
|
+
|
33
|
+
#define ROTL32(x,y) _rotl(x,y)
|
34
|
+
#define ROTL64(x,y) _rotl64(x,y)
|
35
|
+
|
36
|
+
#define BIG_CONSTANT(x) (x)
|
37
|
+
|
38
|
+
// Other compilers
|
39
|
+
|
40
|
+
#else // defined(_MSC_VER)
|
41
|
+
|
42
|
+
#include <stdint.h>
|
43
|
+
|
44
|
+
#define FORCE_INLINE inline __attribute__((always_inline))
|
45
|
+
|
46
|
+
inline uint32_t rotl32 ( uint32_t x, int8_t r )
|
47
|
+
{
|
48
|
+
return (x << r) | (x >> (32 - r));
|
49
|
+
}
|
50
|
+
|
51
|
+
inline uint64_t rotl64 ( uint64_t x, int8_t r )
|
52
|
+
{
|
53
|
+
return (x << r) | (x >> (64 - r));
|
54
|
+
}
|
55
|
+
|
56
|
+
#define ROTL32(x,y) rotl32(x,y)
|
57
|
+
#define ROTL64(x,y) rotl64(x,y)
|
58
|
+
|
59
|
+
#define BIG_CONSTANT(x) (x##LLU)
|
60
|
+
|
61
|
+
#endif // !defined(_MSC_VER)
|
62
|
+
|
63
|
+
//-----------------------------------------------------------------------------
|
64
|
+
|
65
|
+
//-----------------------------------------------------------------------------
|
66
|
+
// Return type - Using C++ reference for return type which should allow better
|
67
|
+
// compiler optimization than a void* pointer
|
68
|
+
typedef struct {
|
69
|
+
uint64_t h1;
|
70
|
+
uint64_t h2;
|
71
|
+
} HashState;
|
72
|
+
|
73
|
+
|
74
|
+
//-----------------------------------------------------------------------------
|
75
|
+
// Block read - if your platform needs to do endian-swapping or can only
|
76
|
+
// handle aligned reads, do the conversion here
|
77
|
+
|
78
|
+
FORCE_INLINE uint64_t getblock64 ( const uint64_t * p, int i )
|
79
|
+
{
|
80
|
+
return p[i];
|
81
|
+
}
|
82
|
+
|
83
|
+
//-----------------------------------------------------------------------------
|
84
|
+
// Finalization mix - force all bits of a hash block to avalanche
|
85
|
+
|
86
|
+
FORCE_INLINE uint64_t fmix64 ( uint64_t k )
|
87
|
+
{
|
88
|
+
k ^= k >> 33;
|
89
|
+
k *= BIG_CONSTANT(0xff51afd7ed558ccd);
|
90
|
+
k ^= k >> 33;
|
91
|
+
k *= BIG_CONSTANT(0xc4ceb9fe1a85ec53);
|
92
|
+
k ^= k >> 33;
|
93
|
+
|
94
|
+
return k;
|
95
|
+
}
|
96
|
+
|
97
|
+
FORCE_INLINE void MurmurHash3_x64_128(const void* key, int lenBytes, uint64_t seed, HashState& out) {
|
98
|
+
static const uint64_t c1 = BIG_CONSTANT(0x87c37b91114253d5);
|
99
|
+
static const uint64_t c2 = BIG_CONSTANT(0x4cf5ad432745937f);
|
100
|
+
|
101
|
+
const uint8_t* data = (const uint8_t*)key;
|
102
|
+
|
103
|
+
out.h1 = seed;
|
104
|
+
out.h2 = seed;
|
105
|
+
|
106
|
+
// Number of full 128-bit blocks of 16 bytes.
|
107
|
+
// Possible exclusion of a remainder of up to 15 bytes.
|
108
|
+
const int nblocks = lenBytes >> 4; // bytes / 16
|
109
|
+
|
110
|
+
// Process the 128-bit blocks (the body) into the hash
|
111
|
+
const uint64_t* blocks = (const uint64_t*)(data);
|
112
|
+
for (int i = 0; i < nblocks; ++i) { // 16 bytes per block
|
113
|
+
uint64_t k1 = getblock64(blocks,i*2+0);
|
114
|
+
uint64_t k2 = getblock64(blocks,i*2+1);
|
115
|
+
|
116
|
+
k1 *= c1; k1 = ROTL64(k1,31); k1 *= c2; out.h1 ^= k1;
|
117
|
+
out.h1 = ROTL64(out.h1,27);
|
118
|
+
out.h1 += out.h2;
|
119
|
+
out.h1 = out.h1*5+0x52dce729;
|
120
|
+
|
121
|
+
k2 *= c2; k2 = ROTL64(k2,33); k2 *= c1; out.h2 ^= k2;
|
122
|
+
out.h2 = ROTL64(out.h2,31);
|
123
|
+
out.h2 += out.h1;
|
124
|
+
out.h2 = out.h2*5+0x38495ab5;
|
125
|
+
}
|
126
|
+
|
127
|
+
// tail
|
128
|
+
const uint8_t * tail = (const uint8_t*)(data + (nblocks << 4));
|
129
|
+
|
130
|
+
uint64_t k1 = 0;
|
131
|
+
uint64_t k2 = 0;
|
132
|
+
|
133
|
+
switch(lenBytes & 15)
|
134
|
+
{
|
135
|
+
case 15: k2 ^= ((uint64_t)tail[14]) << 48; // falls through
|
136
|
+
case 14: k2 ^= ((uint64_t)tail[13]) << 40; // falls through
|
137
|
+
case 13: k2 ^= ((uint64_t)tail[12]) << 32; // falls through
|
138
|
+
case 12: k2 ^= ((uint64_t)tail[11]) << 24; // falls through
|
139
|
+
case 11: k2 ^= ((uint64_t)tail[10]) << 16; // falls through
|
140
|
+
case 10: k2 ^= ((uint64_t)tail[ 9]) << 8; // falls through
|
141
|
+
case 9: k2 ^= ((uint64_t)tail[ 8]) << 0;
|
142
|
+
k2 *= c2; k2 = ROTL64(k2,33); k2 *= c1; out.h2 ^= k2;
|
143
|
+
// falls through
|
144
|
+
case 8: k1 ^= ((uint64_t)tail[ 7]) << 56; // falls through
|
145
|
+
case 7: k1 ^= ((uint64_t)tail[ 6]) << 48; // falls through
|
146
|
+
case 6: k1 ^= ((uint64_t)tail[ 5]) << 40; // falls through
|
147
|
+
case 5: k1 ^= ((uint64_t)tail[ 4]) << 32; // falls through
|
148
|
+
case 4: k1 ^= ((uint64_t)tail[ 3]) << 24; // falls through
|
149
|
+
case 3: k1 ^= ((uint64_t)tail[ 2]) << 16; // falls through
|
150
|
+
case 2: k1 ^= ((uint64_t)tail[ 1]) << 8; // falls through
|
151
|
+
case 1: k1 ^= ((uint64_t)tail[ 0]) << 0;
|
152
|
+
k1 *= c1; k1 = ROTL64(k1,31); k1 *= c2; out.h1 ^= k1;
|
153
|
+
};
|
154
|
+
|
155
|
+
//----------
|
156
|
+
// finalization
|
157
|
+
|
158
|
+
out.h1 ^= lenBytes;
|
159
|
+
out.h2 ^= lenBytes;
|
160
|
+
|
161
|
+
out.h1 += out.h2;
|
162
|
+
out.h2 += out.h1;
|
163
|
+
|
164
|
+
out.h1 = fmix64(out.h1);
|
165
|
+
out.h2 = fmix64(out.h2);
|
166
|
+
|
167
|
+
out.h1 += out.h2;
|
168
|
+
out.h2 += out.h1;
|
169
|
+
}
|
170
|
+
|
171
|
+
//-----------------------------------------------------------------------------
|
172
|
+
|
173
|
+
#endif // _MURMURHASH3_H_
|
@@ -0,0 +1,458 @@
|
|
1
|
+
/*
|
2
|
+
* Licensed to the Apache Software Foundation (ASF) under one
|
3
|
+
* or more contributor license agreements. See the NOTICE file
|
4
|
+
* distributed with this work for additional information
|
5
|
+
* regarding copyright ownership. The ASF licenses this file
|
6
|
+
* to you under the Apache License, Version 2.0 (the
|
7
|
+
* "License"); you may not use this file except in compliance
|
8
|
+
* with the License. You may obtain a copy of the License at
|
9
|
+
*
|
10
|
+
* http://www.apache.org/licenses/LICENSE-2.0
|
11
|
+
*
|
12
|
+
* Unless required by applicable law or agreed to in writing,
|
13
|
+
* software distributed under the License is distributed on an
|
14
|
+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
15
|
+
* KIND, either express or implied. See the License for the
|
16
|
+
* specific language governing permissions and limitations
|
17
|
+
* under the License.
|
18
|
+
*/
|
19
|
+
|
20
|
+
#ifndef BINOMIAL_BOUNDS_HPP_
|
21
|
+
#define BINOMIAL_BOUNDS_HPP_
|
22
|
+
|
23
|
+
#include <algorithm>
|
24
|
+
#include <cmath>
|
25
|
+
|
26
|
+
/*
|
27
|
+
* This class enables the estimation of error bounds given a sample set size, the sampling
|
28
|
+
* probability theta, the number of standard deviations and a simple noDataSeen flag. This can
|
29
|
+
* be used to estimate error bounds for fixed threshold sampling as well as the error bounds
|
30
|
+
* calculations for sketches.
|
31
|
+
*
|
32
|
+
* author Alexander Saydakov
|
33
|
+
* author Lee Rhodes
|
34
|
+
* author Kevin Lang
|
35
|
+
*/
|
36
|
+
|
37
|
+
namespace datasketches {
|
38
|
+
|
39
|
+
static constexpr double delta_of_num_std_devs[] = {
|
40
|
+
0.5000000000000000000, // not actually using this value
|
41
|
+
0.1586553191586026479,
|
42
|
+
0.0227502618904135701,
|
43
|
+
0.0013498126861731796
|
44
|
+
};
|
45
|
+
|
46
|
+
static constexpr double lb_equiv_table[] = {
|
47
|
+
1.0, 2.0, 3.0, // fake values for k = 0
|
48
|
+
0.78733703534118149, 3.14426768537558132, 13.56789685109913535, // k = 1
|
49
|
+
0.94091379266077979, 2.64699271711145911, 6.29302733018320737, // k = 2
|
50
|
+
0.96869128474958188, 2.46531676590527127, 4.97375283467403051, // k = 3
|
51
|
+
0.97933572521046131, 2.37418810664669877, 4.44899975481712318, // k = 4
|
52
|
+
0.98479165917274258, 2.31863116255024693, 4.16712379778553554, // k = 5
|
53
|
+
0.98806033915698777, 2.28075536565225434, 3.99010556144099837, // k = 6
|
54
|
+
0.99021896790580399, 2.25302005857281529, 3.86784477136922078, // k = 7
|
55
|
+
0.99174267079089873, 2.23168103978522936, 3.77784896945266269, // k = 8
|
56
|
+
0.99287147837287648, 2.21465899260871879, 3.70851932988722410, // k = 9
|
57
|
+
0.99373900046805375, 2.20070155496262032, 3.65326029076638292, // k = 10
|
58
|
+
0.99442519013851438, 2.18900651202670815, 3.60803817612955413, // k = 11
|
59
|
+
0.99498066823221620, 2.17903457780744247, 3.57024330407946877, // k = 12
|
60
|
+
0.99543899410224412, 2.17040883161922693, 3.53810982030634591, // k = 13
|
61
|
+
0.99582322541263579, 2.16285726913676513, 3.51039837124298515, // k = 14
|
62
|
+
0.99614973311747690, 2.15617827879603396, 3.48621230377099778, // k = 15
|
63
|
+
0.99643042892560629, 2.15021897666090922, 3.46488605693562590, // k = 16
|
64
|
+
0.99667418783778317, 2.14486114872480016, 3.44591466064832730, // k = 17
|
65
|
+
0.99688774875812669, 2.14001181420209718, 3.42890765690452781, // k = 18
|
66
|
+
0.99707632299691795, 2.13559675336844634, 3.41355809420343803, // k = 19
|
67
|
+
0.99724399084971083, 2.13155592217421486, 3.39962113251016262, // k = 20
|
68
|
+
0.99739400151915447, 2.12784018863251845, 3.38689892877548004, // k = 21
|
69
|
+
0.99752896842633731, 2.12440890875851096, 3.37522975271599535, // k = 22
|
70
|
+
0.99765101725122918, 2.12122815311133195, 3.36448003577621080, // k = 23
|
71
|
+
0.99776189496810730, 2.11826934724291505, 3.35453840911279144, // k = 24
|
72
|
+
0.99786304821586214, 2.11550823850916458, 3.34531123809287578, // k = 25
|
73
|
+
0.99795568665180667, 2.11292409529477254, 3.33671916527694634, // k = 26
|
74
|
+
0.99804083063483517, 2.11049908609763293, 3.32869446834217797, // k = 27
|
75
|
+
0.99811933910984862, 2.10821776918189130, 3.32117898316676019, // k = 28
|
76
|
+
0.99819195457286014, 2.10606671027090897, 3.31412243534683171, // k = 29
|
77
|
+
0.99825930555178388, 2.10403415237001923, 3.30748113008135647, // k = 30
|
78
|
+
0.99832193858154028, 2.10210975877822648, 3.30121691946897045, // k = 31
|
79
|
+
0.99838032666573895, 2.10028440670842542, 3.29529629751144171, // k = 32
|
80
|
+
0.99843488390555990, 2.09855000145353188, 3.28968974413223236, // k = 33
|
81
|
+
0.99848596721417948, 2.09689934193824001, 3.28437111460505093, // k = 34
|
82
|
+
0.99853390005924325, 2.09532599155502908, 3.27931717312372939, // k = 35
|
83
|
+
0.99857895741078551, 2.09382418262592296, 3.27450718840060517, // k = 36
|
84
|
+
0.99862138880970974, 2.09238872751677718, 3.26992261182860489, // k = 37
|
85
|
+
0.99866141580770318, 2.09101494715108061, 3.26554677962434425, // k = 38
|
86
|
+
0.99869923565267982, 2.08969860402822860, 3.26136468165239535, // k = 39
|
87
|
+
0.99873502010169091, 2.08843585627218431, 3.25736275677081721, // k = 40
|
88
|
+
0.99876893292508839, 2.08722321436752623, 3.25352872241415980, // k = 41
|
89
|
+
0.99880111078502409, 2.08605749165553789, 3.24985141664350863, // k = 42
|
90
|
+
0.99883168573342118, 2.08493577529222307, 3.24632068399498053, // k = 43
|
91
|
+
0.99886077231613513, 2.08385540129560809, 3.24292724848112357, // k = 44
|
92
|
+
0.99888847451828155, 2.08281392374021834, 3.23966263299664092, // k = 45
|
93
|
+
0.99891488795844907, 2.08180908991394631, 3.23651906111521726, // k = 46
|
94
|
+
0.99894010085196783, 2.08083882998420222, 3.23348939240611344, // k = 47
|
95
|
+
0.99896419358239541, 2.07990122528650545, 3.23056705515594444, // k = 48
|
96
|
+
0.99898723510594323, 2.07899450946285924, 3.22774598963252402, // k = 49
|
97
|
+
0.99900929266780736, 2.07811704477046533, 3.22502059972006805, // k = 50
|
98
|
+
0.99903043086155208, 2.07726730587160091, 3.22238570890294795, // k = 51
|
99
|
+
0.99905070073845081, 2.07644388314946582, 3.21983651940365689, // k = 52
|
100
|
+
0.99907015770423868, 2.07564546080757850, 3.21736857351049821, // k = 53
|
101
|
+
0.99908884779227947, 2.07487081196367740, 3.21497773796417619, // k = 54
|
102
|
+
0.99910681586905525, 2.07411879634256024, 3.21266015316183484, // k = 55
|
103
|
+
0.99912410177549305, 2.07338834403498140, 3.21041222805715165, // k = 56
|
104
|
+
0.99914074347179849, 2.07267845454973099, 3.20823061166797174, // k = 57
|
105
|
+
0.99915677607464204, 2.07198819052374006, 3.20611216970604573, // k = 58
|
106
|
+
0.99917223149395795, 2.07131667846186929, 3.20405396962596001, // k = 59
|
107
|
+
0.99918714153457699, 2.07066309019154460, 3.20205326110445299, // k = 60
|
108
|
+
0.99920153247185794, 2.07002665203046377, 3.20010746990493544, // k = 61
|
109
|
+
0.99921543193525508, 2.06940663431663552, 3.19821417453343315, // k = 62
|
110
|
+
0.99922886570365677, 2.06880235245998279, 3.19637109973109546, // k = 63
|
111
|
+
0.99924185357357942, 2.06821315729285971, 3.19457610621114441, // k = 64
|
112
|
+
0.99925441845175555, 2.06763843812092318, 3.19282717869864996, // k = 65
|
113
|
+
0.99926658263325407, 2.06707761824370095, 3.19112241228646099, // k = 66
|
114
|
+
0.99927836173816331, 2.06653015295219689, 3.18946001739936946, // k = 67
|
115
|
+
0.99928977431994781, 2.06599552505539918, 3.18783829446098821, // k = 68
|
116
|
+
0.99930083753795884, 2.06547324585920933, 3.18625564538041317, // k = 69
|
117
|
+
0.99931156864562354, 2.06496285191821016, 3.18471055124089730, // k = 70
|
118
|
+
0.99932197985521043, 2.06446390392778767, 3.18320157510865442, // k = 71
|
119
|
+
0.99933208559809827, 2.06397598606787369, 3.18172735837393361, // k = 72
|
120
|
+
0.99934190032416836, 2.06349869971447220, 3.18028661102792398, // k = 73
|
121
|
+
0.99935143390791836, 2.06303166975550312, 3.17887810481605015, // k = 74
|
122
|
+
0.99936070171270330, 2.06257453607466346, 3.17750067581857820, // k = 75
|
123
|
+
0.99936971103502970, 2.06212696042919674, 3.17615321728274580, // k = 76
|
124
|
+
0.99937847392385493, 2.06168861430600714, 3.17483467831510779, // k = 77
|
125
|
+
0.99938700168914352, 2.06125918927764928, 3.17354405480557489, // k = 78
|
126
|
+
0.99939530099953799, 2.06083838987589729, 3.17228039269048168, // k = 79
|
127
|
+
0.99940338278830154, 2.06042593411496000, 3.17104278166036124, // k = 80
|
128
|
+
0.99941125463777780, 2.06002155276328835, 3.16983035274597569, // k = 81
|
129
|
+
0.99941892470027938, 2.05962498741951094, 3.16864227952240185, // k = 82
|
130
|
+
0.99942640059737187, 2.05923599161263837, 3.16747776846497686, // k = 83
|
131
|
+
0.99943368842187397, 2.05885433061945378, 3.16633606416374391, // k = 84
|
132
|
+
0.99944079790603269, 2.05847977868873500, 3.16521644518826406, // k = 85
|
133
|
+
0.99944773295734990, 2.05811212058944193, 3.16411821883858124, // k = 86
|
134
|
+
0.99945450059186669, 2.05775114781260982, 3.16304072400711789, // k = 87
|
135
|
+
0.99946110646314423, 2.05739666442039493, 3.16198332650733960, // k = 88
|
136
|
+
0.99946755770463369, 2.05704847678819647, 3.16094541781455973, // k = 89
|
137
|
+
0.99947385746861528, 2.05670640500335367, 3.15992641851471490, // k = 90
|
138
|
+
0.99948001256305474, 2.05637027420314666, 3.15892576988736096, // k = 91
|
139
|
+
0.99948602689656241, 2.05603991286400856, 3.15794293484717059, // k = 92
|
140
|
+
0.99949190674294641, 2.05571516158917689, 3.15697740043813724, // k = 93
|
141
|
+
0.99949765436329585, 2.05539586490317561, 3.15602867309343083, // k = 94
|
142
|
+
0.99950327557880314, 2.05508187237845164, 3.15509627710042651, // k = 95
|
143
|
+
0.99950877461972709, 2.05477304104951486, 3.15417975753007340, // k = 96
|
144
|
+
0.99951415481862682, 2.05446923022574879, 3.15327867462917766, // k = 97
|
145
|
+
0.99951942042375208, 2.05417030908833453, 3.15239260700215596, // k = 98
|
146
|
+
0.99952457390890004, 2.05387614661762541, 3.15152114915238712, // k = 99
|
147
|
+
0.99952962005008317, 2.05358662050909402, 3.15066390921020911, // k = 100
|
148
|
+
0.99953456216121594, 2.05330161104427589, 3.14982051097524618, // k = 101
|
149
|
+
0.99953940176368405, 2.05302100378725072, 3.14899059183684926, // k = 102
|
150
|
+
0.99954414373920031, 2.05274468493067275, 3.14817379948561893, // k = 103
|
151
|
+
0.99954879047621148, 2.05247255013657082, 3.14736979964868624, // k = 104
|
152
|
+
0.99955334485656522, 2.05220449388099269, 3.14657826610371671, // k = 105
|
153
|
+
0.99955780993869325, 2.05194041831310869, 3.14579888316276879, // k = 106
|
154
|
+
0.99956218652590678, 2.05168022402710903, 3.14503134811607765, // k = 107
|
155
|
+
0.99956647932785359, 2.05142381889103831, 3.14427536967733090, // k = 108
|
156
|
+
0.99957069025060719, 2.05117111251445294, 3.14353066260227365, // k = 109
|
157
|
+
0.99957482032178291, 2.05092201793428330, 3.14279695558593630, // k = 110
|
158
|
+
0.99957887261450651, 2.05067645094720774, 3.14207398336887422, // k = 111
|
159
|
+
0.99958284988383639, 2.05043432833224415, 3.14136149076028914, // k = 112
|
160
|
+
0.99958675435604505, 2.05019557189746138, 3.14065923143530767, // k = 113
|
161
|
+
0.99959058650074439, 2.04996010556124020, 3.13996696426707445, // k = 114
|
162
|
+
0.99959434898201494, 2.04972785368377686, 3.13928445867830419, // k = 115
|
163
|
+
0.99959804437042976, 2.04949874512311681, 3.13861149103462367, // k = 116
|
164
|
+
0.99960167394553423, 2.04927271043337100, 3.13794784369528656, // k = 117
|
165
|
+
0.99960523957651048, 2.04904968140490951, 3.13729330661277572, // k = 118
|
166
|
+
0.99960874253329735, 2.04882959397491504, 3.13664767767019725, // k = 119
|
167
|
+
0.99961218434327748, 2.04861238220240693, 3.13601075688413289 // k = 120
|
168
|
+
};
|
169
|
+
|
170
|
+
static constexpr double ub_equiv_table[] = {
|
171
|
+
1.0, 2.0, 3.0, // fake values for k = 0
|
172
|
+
0.99067760836669549, 1.75460517119302040, 2.48055626001627161, // k = 1
|
173
|
+
0.99270518097577565, 1.78855957509907171, 2.53863835259832626, // k = 2
|
174
|
+
0.99402032633599902, 1.81047286499563143, 2.57811676180597260, // k = 3
|
175
|
+
0.99492607629539975, 1.82625928017762362, 2.60759550546498531, // k = 4
|
176
|
+
0.99558653966013821, 1.83839160339161367, 2.63086812358551470, // k = 5
|
177
|
+
0.99608981951632813, 1.84812399034444752, 2.64993712523727254, // k = 6
|
178
|
+
0.99648648035983456, 1.85617372053235385, 2.66598485907860550, // k = 7
|
179
|
+
0.99680750790483330, 1.86298655802610824, 2.67976541374471822, // k = 8
|
180
|
+
0.99707292880049181, 1.86885682585270274, 2.69178781407745760, // k = 9
|
181
|
+
0.99729614928489241, 1.87398826101983218, 2.70241106542158604, // k = 10
|
182
|
+
0.99748667952445658, 1.87852708449801753, 2.71189717290596377, // k = 11
|
183
|
+
0.99765127712748836, 1.88258159501103250, 2.72044290303773550, // k = 12
|
184
|
+
0.99779498340305395, 1.88623391878036273, 2.72819957382063194, // k = 13
|
185
|
+
0.99792160418357412, 1.88954778748873764, 2.73528576807902368, // k = 14
|
186
|
+
0.99803398604944960, 1.89257337682371940, 2.74179612106766513, // k = 15
|
187
|
+
0.99813449883217231, 1.89535099316557876, 2.74780718300419835, // k = 16
|
188
|
+
0.99822494122659577, 1.89791339232732525, 2.75338173141955167, // k = 17
|
189
|
+
0.99830679915913834, 1.90028752122407241, 2.75857186416826039, // k = 18
|
190
|
+
0.99838117410831728, 1.90249575897183831, 2.76342117562634826, // k = 19
|
191
|
+
0.99844913407071090, 1.90455689090418900, 2.76796659454200267, // k = 20
|
192
|
+
0.99851147736424650, 1.90648682834171268, 2.77223944710058845, // k = 21
|
193
|
+
0.99856879856019987, 1.90829917277082473, 2.77626682032629901, // k = 22
|
194
|
+
0.99862183849734265, 1.91000561415842185, 2.78007199816156003, // k = 23
|
195
|
+
0.99867096266018507, 1.91161621560812023, 2.78367524259661536, // k = 24
|
196
|
+
0.99871656986212543, 1.91313978579765376, 2.78709435016625662, // k = 25
|
197
|
+
0.99875907577771272, 1.91458400425526065, 2.79034488416175463, // k = 26
|
198
|
+
0.99879885565047744, 1.91595563175945927, 2.79344064132371273, // k = 27
|
199
|
+
0.99883610756373287, 1.91726064301425936, 2.79639384757751941, // k = 28
|
200
|
+
0.99887095169674467, 1.91850441099725799, 2.79921543574803877, // k = 29
|
201
|
+
0.99890379414739527, 1.91969155477030995, 2.80191513182441554, // k = 30
|
202
|
+
0.99893466279047516, 1.92082633358913313, 2.80450167352080371, // k = 31
|
203
|
+
0.99896392088177777, 1.92191254955568525, 2.80698295731653502, // k = 32
|
204
|
+
0.99899147889385631, 1.92295362479495680, 2.80936614404217266, // k = 33
|
205
|
+
0.99901764688726757, 1.92395267400968351, 2.81165765979318394, // k = 34
|
206
|
+
0.99904238606342233, 1.92491244978191389, 2.81386337393604435, // k = 35
|
207
|
+
0.99906590152386343, 1.92583552644848055, 2.81598868034527072, // k = 36
|
208
|
+
0.99908829040739988, 1.92672418013918900, 2.81803841726804194, // k = 37
|
209
|
+
0.99910959420023460, 1.92758051694144683, 2.82001709302821268, // k = 38
|
210
|
+
0.99912996403594434, 1.92840654943159961, 2.82192875763732332, // k = 39
|
211
|
+
0.99914930224576892, 1.92920397044028391, 2.82377730628954282, // k = 40
|
212
|
+
0.99916781270195543, 1.92997447498220254, 2.82556612075063640, // k = 41
|
213
|
+
0.99918553179077207, 1.93071949211818605, 2.82729843191989971, // k = 42
|
214
|
+
0.99920250730914972, 1.93144048613876862, 2.82897728689417249, // k = 43
|
215
|
+
0.99921873345181211, 1.93213870990595638, 2.83060537017752267, // k = 44
|
216
|
+
0.99923435180002684, 1.93281536508689555, 2.83218527795750674, // k = 45
|
217
|
+
0.99924930425362390, 1.93347145882316340, 2.83371938965598247, // k = 46
|
218
|
+
0.99926370394567243, 1.93410820221384938, 2.83520990872793277, // k = 47
|
219
|
+
0.99927750755296074, 1.93472643138986200, 2.83665891945119597, // k = 48
|
220
|
+
0.99929082941537217, 1.93532697329771963, 2.83806833931606661, // k = 49
|
221
|
+
0.99930366295501472, 1.93591074716263734, 2.83943997143404658, // k = 50
|
222
|
+
0.99931598804721489, 1.93647857274021362, 2.84077557836653227, // k = 51
|
223
|
+
0.99932789059798210, 1.93703110239354714, 2.84207662106302905, // k = 52
|
224
|
+
0.99933946180485123, 1.93756904936378760, 2.84334468086129277, // k = 53
|
225
|
+
0.99935053819703512, 1.93809302131219852, 2.84458116874117195, // k = 54
|
226
|
+
0.99936126637970801, 1.93860365411038060, 2.84578731838604426, // k = 55
|
227
|
+
0.99937166229284458, 1.93910149816429112, 2.84696443486512862, // k = 56
|
228
|
+
0.99938169190727422, 1.93958709548454067, 2.84811369085281285, // k = 57
|
229
|
+
0.99939136927613959, 1.94006085573701625, 2.84923617230361970, // k = 58
|
230
|
+
0.99940074328745254, 1.94052339623206649, 2.85033291216254270, // k = 59
|
231
|
+
0.99940993070470086, 1.94097508636855309, 2.85140492437699322, // k = 60
|
232
|
+
0.99941868577388959, 1.94141633372043998, 2.85245314430358121, // k = 61
|
233
|
+
0.99942734443487780, 1.94184757038001976, 2.85347839582286156, // k = 62
|
234
|
+
0.99943556385736088, 1.94226915100517772, 2.85448160365493209, // k = 63
|
235
|
+
0.99944374522542034, 1.94268143723749631, 2.85546346373061510, // k = 64
|
236
|
+
0.99945159955424856, 1.94308482059116727, 2.85642486111805738, // k = 65
|
237
|
+
0.99945915301904620, 1.94347956957849988, 2.85736639994965458, // k = 66
|
238
|
+
0.99946660663832176, 1.94386600964031686, 2.85828887832701639, // k = 67
|
239
|
+
0.99947383703224091, 1.94424436597356021, 2.85919278275500233, // k = 68
|
240
|
+
0.99948075442870277, 1.94461502153473020, 2.86007887186090670, // k = 69
|
241
|
+
0.99948766082269458, 1.94497821937304138, 2.86094774077355396, // k = 70
|
242
|
+
0.99949422748713346, 1.94533411296001191, 2.86179981848076181, // k = 71
|
243
|
+
0.99950070756119658, 1.94568300035135167, 2.86263579405672886, // k = 72
|
244
|
+
0.99950704321753392, 1.94602523449961495, 2.86345610449197352, // k = 73
|
245
|
+
0.99951320334216121, 1.94636083782822311, 2.86426125541271404, // k = 74
|
246
|
+
0.99951920293474927, 1.94669011080745236, 2.86505169255406145, // k = 75
|
247
|
+
0.99952501670378524, 1.94701327348536779, 2.86582788270862920, // k = 76
|
248
|
+
0.99953071209267819, 1.94733044372333097, 2.86659027602854621, // k = 77
|
249
|
+
0.99953632734991515, 1.94764180764266825, 2.86733927778843167, // k = 78
|
250
|
+
0.99954171164873173, 1.94794766430732125, 2.86807526143834934, // k = 79
|
251
|
+
0.99954699274462655, 1.94824807472994621, 2.86879864789403882, // k = 80
|
252
|
+
0.99955216611081710, 1.94854317889829076, 2.86950970901679625, // k = 81
|
253
|
+
0.99955730019613043, 1.94883320227168610, 2.87020887436986527, // k = 82
|
254
|
+
0.99956213770650493, 1.94911826561721568, 2.87089648477021342, // k = 83
|
255
|
+
0.99956704264963037, 1.94939848545763539, 2.87157281693902178, // k = 84
|
256
|
+
0.99957166306481327, 1.94967401618316671, 2.87223821840905202, // k = 85
|
257
|
+
0.99957632713136491, 1.94994497791333288, 2.87289293193450135, // k = 86
|
258
|
+
0.99958087233392234, 1.95021155752212394, 2.87353731228213860, // k = 87
|
259
|
+
0.99958532555996271, 1.95047376805584349, 2.87417154907075201, // k = 88
|
260
|
+
0.99958956246481989, 1.95073180380688882, 2.87479599765507032, // k = 89
|
261
|
+
0.99959389351869277, 1.95098572880579013, 2.87541081987382086, // k = 90
|
262
|
+
0.99959807862052230, 1.95123574036898617, 2.87601637401948551, // k = 91
|
263
|
+
0.99960214057801977, 1.95148186921983324, 2.87661283691068093, // k = 92
|
264
|
+
0.99960607527256684, 1.95172415829728152, 2.87720042968334155, // k = 93
|
265
|
+
0.99960996433179616, 1.95196280898670693, 2.87777936649376898, // k = 94
|
266
|
+
0.99961379137860717, 1.95219787713926962, 2.87834989933620022, // k = 95
|
267
|
+
0.99961756088146103, 1.95242944583677058, 2.87891216133900230, // k = 96
|
268
|
+
0.99962125605327401, 1.95265762420910960, 2.87946647367488140, // k = 97
|
269
|
+
0.99962486179100551, 1.95288245314810638, 2.88001290210658567, // k = 98
|
270
|
+
0.99962843240297161, 1.95310404286672679, 2.88055166523392359, // k = 99
|
271
|
+
0.99963187276145504, 1.95332251980147475, 2.88108300006589957, // k = 100
|
272
|
+
0.99963525453173929, 1.95353785898848287, 2.88160703591438505, // k = 101
|
273
|
+
0.99963855412988778, 1.95375019354571577, 2.88212393551896184, // k = 102
|
274
|
+
0.99964190254169694, 1.95395953472205974, 2.88263389761985422, // k = 103
|
275
|
+
0.99964506565942202, 1.95416607430155409, 2.88313700661564098, // k = 104
|
276
|
+
0.99964834424233118, 1.95436972855640079, 2.88363350163803034, // k = 105
|
277
|
+
0.99965136548857458, 1.95457068540693513, 2.88412349413960101, // k = 106
|
278
|
+
0.99965436594726498, 1.95476896383092935, 2.88460710620208260, // k = 107
|
279
|
+
0.99965736463468602, 1.95496457504532373, 2.88508450078833789, // k = 108
|
280
|
+
0.99966034130443404, 1.95515761150707590, 2.88555580586194083, // k = 109
|
281
|
+
0.99966326130828520, 1.95534810382198998, 2.88602118761679094, // k = 110
|
282
|
+
0.99966601446035952, 1.95553622237747504, 2.88648066384146773, // k = 111
|
283
|
+
0.99966887679593697, 1.95572186728168163, 2.88693444915907094, // k = 112
|
284
|
+
0.99967161286551232, 1.95590523410490391, 2.88738271495714116, // k = 113
|
285
|
+
0.99967435412270333, 1.95608626483223702, 2.88782540459769166, // k = 114
|
286
|
+
0.99967701261934394, 1.95626497627117146, 2.88826277189363623, // k = 115
|
287
|
+
0.99967963265157778, 1.95644153684824573, 2.88869486674335008, // k = 116
|
288
|
+
0.99968216317182623, 1.95661589936000269, 2.88912184353694101, // k = 117
|
289
|
+
0.99968479674396349, 1.95678821614791332, 2.88954376359643561, // k = 118
|
290
|
+
0.99968729031337489, 1.95695842061650183, 2.88996069422501023, // k = 119
|
291
|
+
0.99968963358631413, 1.95712651709766305, 2.89037285320668502 // k = 120
|
292
|
+
};
|
293
|
+
|
294
|
+
class binomial_bounds {
|
295
|
+
|
296
|
+
public:
|
297
|
+
static double get_lower_bound(unsigned long long num_samples, double theta, unsigned num_std_devs) {
|
298
|
+
check_theta(theta);
|
299
|
+
check_num_std_devs(num_std_devs);
|
300
|
+
const double estimate = num_samples / theta;
|
301
|
+
const double lb = compute_approx_binomial_lower_bound(num_samples, theta, num_std_devs);
|
302
|
+
return std::min(estimate, std::max(static_cast<double>(num_samples), lb));
|
303
|
+
}
|
304
|
+
|
305
|
+
static double get_upper_bound(unsigned long long num_samples, double theta, unsigned num_std_devs) {
|
306
|
+
check_theta(theta);
|
307
|
+
check_num_std_devs(num_std_devs);
|
308
|
+
const double estimate = num_samples / theta;
|
309
|
+
const double ub = compute_approx_binomial_upper_bound(num_samples, theta, num_std_devs);
|
310
|
+
return std::max(estimate, ub);
|
311
|
+
}
|
312
|
+
|
313
|
+
private:
|
314
|
+
// our "classic" bounds, but now with continuity correction
|
315
|
+
static double cont_classic_lb(unsigned long long num_samples, double theta, double num_std_devs) {
|
316
|
+
const double n_hat = (num_samples - 0.5) / theta;
|
317
|
+
const double b = num_std_devs * std::sqrt((1.0 - theta) / theta);
|
318
|
+
const double d = 0.5 * b * std::sqrt((b * b) + (4.0 * n_hat));
|
319
|
+
const double center = n_hat + (0.5 * (b * b));
|
320
|
+
return (center - d);
|
321
|
+
}
|
322
|
+
|
323
|
+
// our "classic" bounds, but now with continuity correction
|
324
|
+
static double cont_classic_ub(unsigned long long num_samples, double theta, double num_std_devs) {
|
325
|
+
const double n_hat = (num_samples + 0.5) / theta;
|
326
|
+
const double b = num_std_devs * std::sqrt((1.0 - theta) / theta);
|
327
|
+
const double d = 0.5 * b * std::sqrt((b * b) + (4.0 * n_hat));
|
328
|
+
const double center = n_hat + (0.5 * (b * b));
|
329
|
+
return (center + d);
|
330
|
+
}
|
331
|
+
|
332
|
+
// This is a special purpose calculator for NStar, using a computational
|
333
|
+
// strategy inspired by its Bayesian definition. It is only appropriate
|
334
|
+
// for a very limited set of inputs. However, the procedure compute_approx_binomial_lower_bound()
|
335
|
+
// below does in fact only call it for suitably limited inputs.
|
336
|
+
// Outside of this limited range, two different bad things will happen.
|
337
|
+
// First, because we are not using logarithms, the values of intermediate
|
338
|
+
// quantities will exceed the dynamic range of doubles. Second, even if that
|
339
|
+
// problem were fixed, the running time of this procedure is essentially linear
|
340
|
+
// in est = (numSamples / p), and that can be Very, Very Big.
|
341
|
+
static unsigned long long special_n_star(unsigned long long num_samples, double p, double delta) {
|
342
|
+
const double q = 1.0 - p;
|
343
|
+
// Use a different algorithm if the following is true; this one will be too slow, or worse.
|
344
|
+
if ((num_samples / p) >= 500.0) throw std::invalid_argument("out of range");
|
345
|
+
double cur_term = std::pow(p, num_samples); // curTerm = posteriorProbability (k, k, p)
|
346
|
+
if (cur_term <= 1e-100) throw std::logic_error("out of range"); // sanity check for non-use of logarithms
|
347
|
+
double tot = cur_term;
|
348
|
+
unsigned long long m = num_samples;
|
349
|
+
while (tot <= delta) { // this test can fail even the first time
|
350
|
+
cur_term = (cur_term * q * (m)) / ((m + 1) - num_samples);
|
351
|
+
tot += cur_term;
|
352
|
+
m += 1;
|
353
|
+
}
|
354
|
+
// we have reached a state where tot > delta, so back up one
|
355
|
+
return (m - 1);
|
356
|
+
}
|
357
|
+
|
358
|
+
// The following procedure has very limited applicability.
|
359
|
+
// The above remarks about special_n_star() also apply here.
|
360
|
+
static unsigned long long special_n_prime_b(unsigned long long num_samples, double p, double delta) {
|
361
|
+
const double q = 1.0 - p;
|
362
|
+
const double one_minus_delta = 1.0 - delta;
|
363
|
+
double cur_term = std::pow(p, num_samples); // curTerm = posteriorProbability (k, k, p)
|
364
|
+
if (cur_term <= 1e-100) throw std::logic_error("out of range"); // sanity check for non-use of logarithms
|
365
|
+
double tot = cur_term;
|
366
|
+
unsigned long long m = num_samples;
|
367
|
+
while (tot < one_minus_delta) {
|
368
|
+
cur_term = (cur_term * q * (m)) / ((m + 1) - num_samples);
|
369
|
+
tot += cur_term;
|
370
|
+
m += 1;
|
371
|
+
}
|
372
|
+
return m; // no need to back up
|
373
|
+
}
|
374
|
+
|
375
|
+
static unsigned long long special_n_prime_f(unsigned long long num_samples, double p, double delta) {
|
376
|
+
// Use a different algorithm if the following is true; this one will be too slow, or worse.
|
377
|
+
if ((num_samples / p) >= 500.0) throw std::invalid_argument("out of range"); //A super-small delta could also make it slow.
|
378
|
+
return special_n_prime_b(num_samples + 1, p, delta);
|
379
|
+
}
|
380
|
+
|
381
|
+
// The following computes an approximation to the lower bound of a Frequentist
|
382
|
+
// confidence interval based on the tails of the Binomial distribution.
|
383
|
+
static double compute_approx_binomial_lower_bound(unsigned long long num_samples, double theta, unsigned num_std_devs) {
|
384
|
+
if (theta == 1) return num_samples;
|
385
|
+
if (num_samples == 0) return 0;
|
386
|
+
if (num_samples == 1) {
|
387
|
+
const double delta = delta_of_num_std_devs[num_std_devs];
|
388
|
+
const double raw_lb = std::log(1 - delta) / std::log(1 - theta);
|
389
|
+
return std::floor(raw_lb); // round down
|
390
|
+
}
|
391
|
+
if (num_samples > 120) {
|
392
|
+
// plenty of samples, so gaussian approximation to binomial distribution isn't too bad
|
393
|
+
const double raw_lb = cont_classic_lb(num_samples, theta, num_std_devs);
|
394
|
+
return (raw_lb - 0.5); // fake round down
|
395
|
+
}
|
396
|
+
// at this point we know 2 <= num_samples <= 120
|
397
|
+
if (theta > (1 - 1e-5)) { // empirically-determined threshold
|
398
|
+
return num_samples;
|
399
|
+
}
|
400
|
+
if (theta < (num_samples / 360.0)) { // empirically-determined threshold
|
401
|
+
// here we use the Gaussian approximation, but with a modified num_std_devs
|
402
|
+
const unsigned index = 3 * num_samples + (num_std_devs - 1);
|
403
|
+
const double raw_lb = cont_classic_lb(num_samples, theta, lb_equiv_table[index]);
|
404
|
+
return raw_lb - 0.5; // fake round down
|
405
|
+
}
|
406
|
+
// This is the most difficult range to approximate; we will compute an "exact" LB.
|
407
|
+
// We know that est <= 360, so specialNStar() shouldn't be ridiculously slow.
|
408
|
+
const double delta = delta_of_num_std_devs[num_std_devs];
|
409
|
+
return special_n_star(num_samples, theta, delta); // no need to round
|
410
|
+
}
|
411
|
+
|
412
|
+
// The following computes an approximation to the upper bound of a Frequentist
|
413
|
+
// confidence interval based on the tails of the Binomial distribution.
|
414
|
+
static double compute_approx_binomial_upper_bound(unsigned long long num_samples, double theta, unsigned num_std_devs) {
|
415
|
+
if (theta == 1) return num_samples;
|
416
|
+
if (num_samples == 0) {
|
417
|
+
const double delta = delta_of_num_std_devs[num_std_devs];
|
418
|
+
const double raw_ub = std::log(delta) / std::log(1 - theta);
|
419
|
+
return std::ceil(raw_ub); // round up
|
420
|
+
}
|
421
|
+
if (num_samples > 120) {
|
422
|
+
// plenty of samples, so gaussian approximation to binomial distribution isn't too bad
|
423
|
+
const double raw_ub = cont_classic_ub(num_samples, theta, num_std_devs);
|
424
|
+
return (raw_ub + 0.5); // fake round up
|
425
|
+
}
|
426
|
+
// at this point we know 2 <= num_samples <= 120
|
427
|
+
if (theta > (1 - 1e-5)) { // empirically-determined threshold
|
428
|
+
return num_samples + 1;
|
429
|
+
}
|
430
|
+
if (theta < (num_samples / 360.0)) { // empirically-determined threshold
|
431
|
+
// here we use the Gaussian approximation, but with a modified num_std_devs
|
432
|
+
const unsigned index = 3 * num_samples + (num_std_devs - 1);
|
433
|
+
const double raw_ub = cont_classic_ub(num_samples, theta, ub_equiv_table[index]);
|
434
|
+
return raw_ub + 0.5; // fake round up
|
435
|
+
}
|
436
|
+
// This is the most difficult range to approximate; we will compute an "exact" UB.
|
437
|
+
// We know that est <= 360, so specialNPrimeF() shouldn't be ridiculously slow.
|
438
|
+
const double delta = delta_of_num_std_devs[num_std_devs];
|
439
|
+
return special_n_prime_f(num_samples, theta, delta); // no need to round
|
440
|
+
}
|
441
|
+
|
442
|
+
static void check_theta(double theta) {
|
443
|
+
if (theta < 0 || theta > 1) {
|
444
|
+
throw std::invalid_argument("theta must be in [0, 1]");
|
445
|
+
}
|
446
|
+
}
|
447
|
+
|
448
|
+
static void check_num_std_devs(unsigned num_std_devs) {
|
449
|
+
if (num_std_devs < 1 || num_std_devs > 3) {
|
450
|
+
throw std::invalid_argument("num_std_devs must be 1, 2 or 3");
|
451
|
+
}
|
452
|
+
}
|
453
|
+
|
454
|
+
};
|
455
|
+
|
456
|
+
} /* namespace datasketches */
|
457
|
+
|
458
|
+
# endif
|