datasketches 0.2.0 → 0.2.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +16 -0
- data/LICENSE +40 -3
- data/NOTICE +1 -1
- data/README.md +7 -7
- data/ext/datasketches/extconf.rb +1 -1
- data/ext/datasketches/theta_wrapper.cpp +20 -4
- data/lib/datasketches/version.rb +1 -1
- data/vendor/datasketches-cpp/CMakeLists.txt +31 -3
- data/vendor/datasketches-cpp/LICENSE +40 -3
- data/vendor/datasketches-cpp/MANIFEST.in +3 -0
- data/vendor/datasketches-cpp/NOTICE +1 -1
- data/vendor/datasketches-cpp/README.md +76 -9
- data/vendor/datasketches-cpp/cmake/DataSketchesConfig.cmake.in +10 -0
- data/vendor/datasketches-cpp/common/CMakeLists.txt +14 -13
- data/vendor/datasketches-cpp/common/include/MurmurHash3.h +11 -7
- data/vendor/datasketches-cpp/common/include/binomial_bounds.hpp +8 -8
- data/vendor/datasketches-cpp/common/include/bounds_binomial_proportions.hpp +12 -15
- data/vendor/datasketches-cpp/common/include/common_defs.hpp +26 -0
- data/vendor/datasketches-cpp/common/include/conditional_forward.hpp +20 -8
- data/vendor/datasketches-cpp/common/include/count_zeros.hpp +2 -2
- data/vendor/datasketches-cpp/common/include/serde.hpp +7 -7
- data/vendor/datasketches-cpp/cpc/CMakeLists.txt +15 -35
- data/vendor/datasketches-cpp/cpc/include/cpc_common.hpp +10 -3
- data/vendor/datasketches-cpp/cpc/include/cpc_compressor.hpp +19 -19
- data/vendor/datasketches-cpp/cpc/include/cpc_compressor_impl.hpp +91 -89
- data/vendor/datasketches-cpp/cpc/include/cpc_sketch.hpp +15 -2
- data/vendor/datasketches-cpp/cpc/include/cpc_sketch_impl.hpp +126 -90
- data/vendor/datasketches-cpp/cpc/include/cpc_union.hpp +1 -1
- data/vendor/datasketches-cpp/cpc/include/cpc_union_impl.hpp +22 -20
- data/vendor/datasketches-cpp/cpc/include/cpc_util.hpp +10 -10
- data/vendor/datasketches-cpp/cpc/include/icon_estimator.hpp +4 -4
- data/vendor/datasketches-cpp/cpc/include/u32_table.hpp +8 -8
- data/vendor/datasketches-cpp/cpc/include/u32_table_impl.hpp +14 -14
- data/vendor/datasketches-cpp/cpc/test/compression_test.cpp +10 -10
- data/vendor/datasketches-cpp/cpc/test/cpc_sketch_allocation_test.cpp +17 -0
- data/vendor/datasketches-cpp/cpc/test/cpc_sketch_test.cpp +25 -0
- data/vendor/datasketches-cpp/cpc/test/cpc_union_test.cpp +1 -1
- data/vendor/datasketches-cpp/fi/CMakeLists.txt +5 -15
- data/vendor/datasketches-cpp/fi/include/frequent_items_sketch_impl.hpp +69 -82
- data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map_impl.hpp +10 -10
- data/vendor/datasketches-cpp/fi/test/reverse_purge_hash_map_test.cpp +2 -2
- data/vendor/datasketches-cpp/hll/CMakeLists.txt +33 -56
- data/vendor/datasketches-cpp/hll/include/AuxHashMap-internal.hpp +60 -63
- data/vendor/datasketches-cpp/hll/include/AuxHashMap.hpp +19 -19
- data/vendor/datasketches-cpp/hll/include/CompositeInterpolationXTable-internal.hpp +15 -15
- data/vendor/datasketches-cpp/hll/include/CompositeInterpolationXTable.hpp +3 -3
- data/vendor/datasketches-cpp/hll/include/CouponHashSet-internal.hpp +74 -76
- data/vendor/datasketches-cpp/hll/include/CouponHashSet.hpp +6 -6
- data/vendor/datasketches-cpp/hll/include/CouponList-internal.hpp +110 -113
- data/vendor/datasketches-cpp/hll/include/CouponList.hpp +13 -13
- data/vendor/datasketches-cpp/hll/include/CubicInterpolation-internal.hpp +2 -4
- data/vendor/datasketches-cpp/hll/include/HarmonicNumbers-internal.hpp +1 -1
- data/vendor/datasketches-cpp/hll/include/Hll4Array-internal.hpp +80 -76
- data/vendor/datasketches-cpp/hll/include/Hll4Array.hpp +9 -9
- data/vendor/datasketches-cpp/hll/include/Hll6Array-internal.hpp +26 -26
- data/vendor/datasketches-cpp/hll/include/Hll6Array.hpp +6 -6
- data/vendor/datasketches-cpp/hll/include/Hll8Array-internal.hpp +33 -33
- data/vendor/datasketches-cpp/hll/include/Hll8Array.hpp +6 -6
- data/vendor/datasketches-cpp/hll/include/HllArray-internal.hpp +205 -209
- data/vendor/datasketches-cpp/hll/include/HllArray.hpp +36 -36
- data/vendor/datasketches-cpp/hll/include/HllSketch-internal.hpp +34 -32
- data/vendor/datasketches-cpp/hll/include/HllSketchImpl-internal.hpp +22 -22
- data/vendor/datasketches-cpp/hll/include/HllSketchImpl.hpp +13 -13
- data/vendor/datasketches-cpp/hll/include/HllSketchImplFactory.hpp +15 -15
- data/vendor/datasketches-cpp/hll/include/HllUnion-internal.hpp +61 -61
- data/vendor/datasketches-cpp/hll/include/HllUtil.hpp +120 -127
- data/vendor/datasketches-cpp/hll/include/coupon_iterator-internal.hpp +9 -9
- data/vendor/datasketches-cpp/hll/include/coupon_iterator.hpp +5 -5
- data/vendor/datasketches-cpp/hll/include/hll.hpp +21 -21
- data/vendor/datasketches-cpp/hll/test/AuxHashMapTest.cpp +1 -1
- data/vendor/datasketches-cpp/hll/test/CouponHashSetTest.cpp +34 -34
- data/vendor/datasketches-cpp/hll/test/CouponListTest.cpp +25 -25
- data/vendor/datasketches-cpp/hll/test/CrossCountingTest.cpp +2 -2
- data/vendor/datasketches-cpp/hll/test/HllArrayTest.cpp +35 -35
- data/vendor/datasketches-cpp/hll/test/HllSketchTest.cpp +15 -15
- data/vendor/datasketches-cpp/hll/test/HllUnionTest.cpp +10 -14
- data/vendor/datasketches-cpp/hll/test/IsomorphicTest.cpp +3 -3
- data/vendor/datasketches-cpp/hll/test/ToFromByteArrayTest.cpp +4 -4
- data/vendor/datasketches-cpp/kll/CMakeLists.txt +9 -19
- data/vendor/datasketches-cpp/kll/include/kll_helper.hpp +5 -4
- data/vendor/datasketches-cpp/kll/include/kll_helper_impl.hpp +6 -6
- data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator.hpp +14 -6
- data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator_impl.hpp +39 -24
- data/vendor/datasketches-cpp/kll/include/kll_sketch.hpp +41 -4
- data/vendor/datasketches-cpp/kll/include/kll_sketch_impl.hpp +76 -64
- data/vendor/datasketches-cpp/kll/include/kolmogorov_smirnov.hpp +67 -0
- data/vendor/datasketches-cpp/kll/include/kolmogorov_smirnov_impl.hpp +78 -0
- data/vendor/datasketches-cpp/kll/test/CMakeLists.txt +1 -0
- data/vendor/datasketches-cpp/kll/test/kll_sketch_test.cpp +133 -46
- data/vendor/datasketches-cpp/kll/test/kolmogorov_smirnov_test.cpp +111 -0
- data/vendor/datasketches-cpp/pyproject.toml +4 -2
- data/vendor/datasketches-cpp/python/CMakeLists.txt +10 -6
- data/vendor/datasketches-cpp/python/README.md +50 -50
- data/vendor/datasketches-cpp/python/pybind11Path.cmd +3 -0
- data/vendor/datasketches-cpp/python/src/cpc_wrapper.cpp +1 -1
- data/vendor/datasketches-cpp/python/src/hll_wrapper.cpp +4 -4
- data/vendor/datasketches-cpp/python/src/kll_wrapper.cpp +1 -1
- data/vendor/datasketches-cpp/python/src/theta_wrapper.cpp +8 -8
- data/vendor/datasketches-cpp/python/src/vector_of_kll.cpp +11 -5
- data/vendor/datasketches-cpp/python/src/vo_wrapper.cpp +2 -2
- data/vendor/datasketches-cpp/python/tests/hll_test.py +1 -1
- data/vendor/datasketches-cpp/python/tests/kll_test.py +2 -2
- data/vendor/datasketches-cpp/python/tests/req_test.py +2 -2
- data/vendor/datasketches-cpp/python/tests/vector_of_kll_test.py +4 -4
- data/vendor/datasketches-cpp/python/tests/vo_test.py +3 -3
- data/vendor/datasketches-cpp/req/CMakeLists.txt +8 -21
- data/vendor/datasketches-cpp/req/include/req_common.hpp +2 -1
- data/vendor/datasketches-cpp/req/include/req_compactor.hpp +4 -4
- data/vendor/datasketches-cpp/req/include/req_compactor_impl.hpp +26 -39
- data/vendor/datasketches-cpp/req/include/req_sketch.hpp +1 -1
- data/vendor/datasketches-cpp/req/include/req_sketch_impl.hpp +13 -11
- data/vendor/datasketches-cpp/req/test/req_sketch_test.cpp +52 -52
- data/vendor/datasketches-cpp/sampling/CMakeLists.txt +5 -9
- data/vendor/datasketches-cpp/sampling/include/var_opt_sketch.hpp +10 -5
- data/vendor/datasketches-cpp/sampling/include/var_opt_sketch_impl.hpp +61 -64
- data/vendor/datasketches-cpp/sampling/include/var_opt_union_impl.hpp +42 -48
- data/vendor/datasketches-cpp/sampling/test/var_opt_sketch_test.cpp +6 -6
- data/vendor/datasketches-cpp/sampling/test/var_opt_union_test.cpp +13 -13
- data/vendor/datasketches-cpp/setup.py +10 -7
- data/vendor/datasketches-cpp/theta/CMakeLists.txt +26 -45
- data/vendor/datasketches-cpp/theta/include/bounds_on_ratios_in_sampled_sets.hpp +1 -1
- data/vendor/datasketches-cpp/theta/include/compact_theta_sketch_parser.hpp +67 -0
- data/vendor/datasketches-cpp/theta/include/compact_theta_sketch_parser_impl.hpp +137 -0
- data/vendor/datasketches-cpp/theta/include/theta_constants.hpp +9 -4
- data/vendor/datasketches-cpp/theta/include/theta_helpers.hpp +15 -0
- data/vendor/datasketches-cpp/theta/include/theta_intersection.hpp +9 -4
- data/vendor/datasketches-cpp/theta/include/theta_intersection_base_impl.hpp +6 -6
- data/vendor/datasketches-cpp/theta/include/theta_intersection_impl.hpp +1 -1
- data/vendor/datasketches-cpp/theta/include/theta_jaccard_similarity_base.hpp +18 -14
- data/vendor/datasketches-cpp/theta/include/theta_set_difference_base_impl.hpp +2 -2
- data/vendor/datasketches-cpp/theta/include/theta_sketch.hpp +73 -15
- data/vendor/datasketches-cpp/theta/include/theta_sketch_impl.hpp +247 -103
- data/vendor/datasketches-cpp/theta/include/theta_union.hpp +10 -5
- data/vendor/datasketches-cpp/theta/include/theta_union_base.hpp +3 -1
- data/vendor/datasketches-cpp/theta/include/theta_union_base_impl.hpp +9 -3
- data/vendor/datasketches-cpp/theta/include/theta_union_impl.hpp +8 -5
- data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base.hpp +11 -5
- data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base_impl.hpp +70 -37
- data/vendor/datasketches-cpp/theta/test/CMakeLists.txt +1 -0
- data/vendor/datasketches-cpp/theta/test/theta_a_not_b_test.cpp +23 -1
- data/vendor/datasketches-cpp/theta/test/theta_compact_empty_from_java_v1.sk +0 -0
- data/vendor/datasketches-cpp/theta/test/theta_compact_empty_from_java_v2.sk +0 -0
- data/vendor/datasketches-cpp/theta/test/theta_compact_estimation_from_java_v1.sk +0 -0
- data/vendor/datasketches-cpp/theta/test/theta_compact_estimation_from_java_v2.sk +0 -0
- data/vendor/datasketches-cpp/theta/test/theta_compact_exact_from_java.sk +0 -0
- data/vendor/datasketches-cpp/theta/test/theta_intersection_test.cpp +21 -1
- data/vendor/datasketches-cpp/theta/test/theta_jaccard_similarity_test.cpp +58 -2
- data/vendor/datasketches-cpp/theta/test/theta_setop_test.cpp +445 -0
- data/vendor/datasketches-cpp/theta/test/theta_sketch_test.cpp +437 -1
- data/vendor/datasketches-cpp/theta/test/theta_union_test.cpp +41 -9
- data/vendor/datasketches-cpp/tuple/CMakeLists.txt +18 -33
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_sketch.hpp +1 -1
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_sketch_impl.hpp +50 -63
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_union.hpp +1 -1
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_union_impl.hpp +3 -3
- data/vendor/datasketches-cpp/tuple/include/tuple_sketch.hpp +13 -9
- data/vendor/datasketches-cpp/tuple/include/tuple_sketch_impl.hpp +84 -78
- data/vendor/datasketches-cpp/tuple/include/tuple_union.hpp +6 -1
- data/vendor/datasketches-cpp/tuple/include/tuple_union_impl.hpp +8 -3
- data/vendor/datasketches-cpp/tuple/test/array_of_doubles_sketch_test.cpp +17 -1
- data/vendor/datasketches-cpp/tuple/test/tuple_a_not_b_test.cpp +17 -17
- data/vendor/datasketches-cpp/tuple/test/tuple_intersection_test.cpp +12 -12
- data/vendor/datasketches-cpp/tuple/test/tuple_jaccard_similarity_test.cpp +5 -5
- data/vendor/datasketches-cpp/tuple/test/tuple_sketch_allocation_test.cpp +1 -1
- data/vendor/datasketches-cpp/tuple/test/tuple_sketch_test.cpp +66 -28
- data/vendor/datasketches-cpp/tuple/test/tuple_union_test.cpp +19 -12
- metadata +18 -7
- data/vendor/datasketches-cpp/theta/test/theta_update_empty_from_java.sk +0 -0
- data/vendor/datasketches-cpp/theta/test/theta_update_estimation_from_java.sk +0 -0
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 897dbc30f97ce17f0415630b6347a0092dac05196b0ef61e80939410d65cdf17
|
|
4
|
+
data.tar.gz: 61302f9cadde8a8badc97b455eb5c32d913c3b1fea8ed571e2da93a29e65afa9
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 4d541ba7f96a86f3f8de44f069f6e39d51ba6f28fa5d8c8d1d99a8434a95c5fe1a26470e6b062f348808fe5c0a444134d0dc96385437b4cb946c4a92044a2a5c
|
|
7
|
+
data.tar.gz: bc1bdacb7cbe69f9bb1382fd2ac7019bec04baf444dc963d63a594e989fd201d9eb9aadd0e463ac4efef8f7ba53915a594d8fb00f74ae295674b9024269a0406
|
data/CHANGELOG.md
CHANGED
|
@@ -1,3 +1,19 @@
|
|
|
1
|
+
## 0.2.4 (2021-12-28)
|
|
2
|
+
|
|
3
|
+
- Updated DataSketches to 3.3.0
|
|
4
|
+
|
|
5
|
+
## 0.2.3 (2021-09-29)
|
|
6
|
+
|
|
7
|
+
- Updated DataSketches to 3.2.0
|
|
8
|
+
|
|
9
|
+
## 0.2.2 (2021-07-17)
|
|
10
|
+
|
|
11
|
+
- Updated DataSketches to 3.1.0
|
|
12
|
+
|
|
13
|
+
## 0.2.1 (2021-05-23)
|
|
14
|
+
|
|
15
|
+
- Improved performance
|
|
16
|
+
|
|
1
17
|
## 0.2.0 (2021-05-17)
|
|
2
18
|
|
|
3
19
|
- Updated DataSketches to 3.0.0
|
data/LICENSE
CHANGED
|
@@ -284,11 +284,48 @@ APPENDIX B: Additional licenses relevant to this product.
|
|
|
284
284
|
ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
|
285
285
|
DEALINGS IN THE SOFTWARE.
|
|
286
286
|
-------------------------------------------------------------
|
|
287
|
-
Code Locations
|
|
287
|
+
Code Locations:
|
|
288
288
|
* https://github.com/apache/datasketches-cpp/blob/master/common/test/catch.hpp
|
|
289
289
|
that is adapted from the above.
|
|
290
290
|
|
|
291
291
|
|
|
292
|
+
=============================================================
|
|
293
|
+
BSD License
|
|
294
|
+
=============================================================
|
|
295
|
+
Original source code:
|
|
296
|
+
https://github.com/pybind/pybind11/blob/master/LICENSE
|
|
297
|
+
|
|
298
|
+
Copyright (c) 2016 Wenzel Jakob <wenzel.jakob@epfl.ch>, All rights reserved.
|
|
299
|
+
|
|
300
|
+
Redistribution and use in source and binary forms, with or without
|
|
301
|
+
modification, are permitted provided that the following conditions are met:
|
|
302
|
+
|
|
303
|
+
1. Redistributions of source code must retain the above copyright notice, this
|
|
304
|
+
list of conditions and the following disclaimer.
|
|
305
|
+
|
|
306
|
+
2. Redistributions in binary form must reproduce the above copyright notice,
|
|
307
|
+
this list of conditions and the following disclaimer in the documentation
|
|
308
|
+
and/or other materials provided with the distribution.
|
|
309
|
+
|
|
310
|
+
3. Neither the name of the copyright holder nor the names of its contributors
|
|
311
|
+
may be used to endorse or promote products derived from this software
|
|
312
|
+
without specific prior written permission.
|
|
313
|
+
|
|
314
|
+
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
|
315
|
+
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
|
316
|
+
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
|
317
|
+
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
|
318
|
+
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
|
319
|
+
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
|
320
|
+
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
|
321
|
+
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
|
322
|
+
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
323
|
+
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
324
|
+
-------------------------------------------------------------
|
|
325
|
+
Code Locations:
|
|
326
|
+
Found only in the convenience binaries distributed from PyPI, which rely
|
|
327
|
+
on pybind11 code during compilation.
|
|
328
|
+
|
|
292
329
|
|
|
293
330
|
=============================================================
|
|
294
331
|
Public Domain
|
|
@@ -297,7 +334,7 @@ APPENDIX B: Additional licenses relevant to this product.
|
|
|
297
334
|
https://github.com/aappleby/smhasher/blob/master/src/MurmurHash3.cpp
|
|
298
335
|
Placed in the Public Domain by Austin Appleby
|
|
299
336
|
|
|
300
|
-
Code Locations
|
|
337
|
+
Code Locations:
|
|
301
338
|
common/include/MurmurHash3.h
|
|
302
339
|
that is adapted from the above.
|
|
303
340
|
-------------------------------------------------------------
|
|
@@ -305,6 +342,6 @@ APPENDIX B: Additional licenses relevant to this product.
|
|
|
305
342
|
* https://graphics.stanford.edu/~seander/bithacks.html
|
|
306
343
|
* Placed in the Public Domain by Sean Eron Anderson
|
|
307
344
|
|
|
308
|
-
Code Locations
|
|
345
|
+
Code Locations:
|
|
309
346
|
* common/include/ceiling_power_of_2.hpp
|
|
310
347
|
that is adapted from the above.
|
data/NOTICE
CHANGED
data/README.md
CHANGED
|
@@ -1,8 +1,8 @@
|
|
|
1
|
-
# DataSketches
|
|
1
|
+
# DataSketches Ruby
|
|
2
2
|
|
|
3
3
|
[DataSketches](https://datasketches.apache.org/) - sketch data structures - for Ruby
|
|
4
4
|
|
|
5
|
-
[](https://github.com/ankane/datasketches/actions)
|
|
5
|
+
[](https://github.com/ankane/datasketches-ruby/actions)
|
|
6
6
|
|
|
7
7
|
## Installation
|
|
8
8
|
|
|
@@ -292,22 +292,22 @@ This library is modeled after the DataSketches [Python API](https://github.com/a
|
|
|
292
292
|
|
|
293
293
|
## History
|
|
294
294
|
|
|
295
|
-
View the [changelog](https://github.com/ankane/datasketches/blob/master/CHANGELOG.md)
|
|
295
|
+
View the [changelog](https://github.com/ankane/datasketches-ruby/blob/master/CHANGELOG.md)
|
|
296
296
|
|
|
297
297
|
## Contributing
|
|
298
298
|
|
|
299
299
|
Everyone is encouraged to help improve this project. Here are a few ways you can help:
|
|
300
300
|
|
|
301
|
-
- [Report bugs](https://github.com/ankane/datasketches/issues)
|
|
302
|
-
- Fix bugs and [submit pull requests](https://github.com/ankane/datasketches/pulls)
|
|
301
|
+
- [Report bugs](https://github.com/ankane/datasketches-ruby/issues)
|
|
302
|
+
- Fix bugs and [submit pull requests](https://github.com/ankane/datasketches-ruby/pulls)
|
|
303
303
|
- Write, clarify, or fix documentation
|
|
304
304
|
- Suggest or add new features
|
|
305
305
|
|
|
306
306
|
To get started with development:
|
|
307
307
|
|
|
308
308
|
```sh
|
|
309
|
-
git clone --recursive https://github.com/ankane/datasketches.git
|
|
310
|
-
cd datasketches
|
|
309
|
+
git clone --recursive https://github.com/ankane/datasketches-ruby.git
|
|
310
|
+
cd datasketches-ruby
|
|
311
311
|
bundle install
|
|
312
312
|
bundle exec rake compile
|
|
313
313
|
bundle exec rake test
|
data/ext/datasketches/extconf.rb
CHANGED
|
@@ -20,10 +20,26 @@ using Rice::Arg;
|
|
|
20
20
|
|
|
21
21
|
void init_theta(Rice::Module& m) {
|
|
22
22
|
Rice::define_class_under<theta_sketch>(m, "ThetaSketch")
|
|
23
|
-
.define_method(
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
23
|
+
.define_method(
|
|
24
|
+
"empty?",
|
|
25
|
+
[](theta_sketch& self) {
|
|
26
|
+
return self.is_empty();
|
|
27
|
+
})
|
|
28
|
+
.define_method(
|
|
29
|
+
"estimate",
|
|
30
|
+
[](theta_sketch& self) {
|
|
31
|
+
return self.get_estimate();
|
|
32
|
+
})
|
|
33
|
+
.define_method(
|
|
34
|
+
"lower_bound",
|
|
35
|
+
[](theta_sketch& self, uint8_t num_std_devs) {
|
|
36
|
+
return self.get_lower_bound(num_std_devs);
|
|
37
|
+
})
|
|
38
|
+
.define_method(
|
|
39
|
+
"upper_bound",
|
|
40
|
+
[](theta_sketch& self, uint8_t num_std_devs) {
|
|
41
|
+
return self.get_upper_bound(num_std_devs);
|
|
42
|
+
});
|
|
27
43
|
|
|
28
44
|
Rice::define_class_under<compact_theta_sketch, theta_sketch>(m, "CompactThetaSketch")
|
|
29
45
|
.define_singleton_function(
|
data/lib/datasketches/version.rb
CHANGED
|
@@ -17,7 +17,7 @@
|
|
|
17
17
|
|
|
18
18
|
cmake_minimum_required(VERSION 3.12.0)
|
|
19
19
|
project(DataSketches
|
|
20
|
-
VERSION
|
|
20
|
+
VERSION 3.2.0
|
|
21
21
|
LANGUAGES CXX)
|
|
22
22
|
|
|
23
23
|
include(GNUInstallDirs)
|
|
@@ -35,6 +35,8 @@ set(CMAKE_WINDOWS_EXPORT_ALL_SYMBOLS ON)
|
|
|
35
35
|
#set(CMAKE_VERBOSE_MAKEFILE ON)
|
|
36
36
|
set(CMAKE_MACOSX_RPATH ON)
|
|
37
37
|
|
|
38
|
+
set(CMAKE_CXX_STANDARD 11)
|
|
39
|
+
|
|
38
40
|
# enable compiler warnings globally
|
|
39
41
|
# derived from https://foonathan.net/blog/2018/10/17/cmake-warnings.html
|
|
40
42
|
# and https://arne-mertz.de/2018/07/cmake-properties-options/
|
|
@@ -70,6 +72,13 @@ if(COVERAGE AND CMAKE_CXX_COMPILER_ID MATCHES "GNU|Clang")
|
|
|
70
72
|
add_link_options(--coverage)
|
|
71
73
|
endif()
|
|
72
74
|
|
|
75
|
+
option(SANITIZE "Run sanitization checks (g++/clang only)" OFF)
|
|
76
|
+
if(SANITIZE AND CMAKE_CXX_COMPILER_ID MATCHES "GNU|Clang")
|
|
77
|
+
add_compile_options(-fsanitize=${SANITIZE})
|
|
78
|
+
add_link_options(-fsanitize=${SANITIZE})
|
|
79
|
+
endif()
|
|
80
|
+
|
|
81
|
+
|
|
73
82
|
# set default build type to Release
|
|
74
83
|
# Derived from: https://blog.kitware.com/cmake-and-the-default-build-type/
|
|
75
84
|
set(default_build_type "Release")
|
|
@@ -117,11 +126,30 @@ endif()
|
|
|
117
126
|
|
|
118
127
|
# # Installation
|
|
119
128
|
install(TARGETS datasketches
|
|
120
|
-
EXPORT ${
|
|
129
|
+
EXPORT ${PROJECT_NAME}
|
|
121
130
|
PUBLIC_HEADER DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/DataSketches
|
|
122
131
|
INCLUDES DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/DataSketches
|
|
123
132
|
)
|
|
124
133
|
|
|
134
|
+
# Packaging
|
|
135
|
+
include(CMakePackageConfigHelpers)
|
|
136
|
+
write_basic_package_version_file(
|
|
137
|
+
"${CMAKE_CURRENT_BINARY_DIR}/DataSketchesConfigVersion.cmake"
|
|
138
|
+
VERSION ${PROJECT_VERSION}
|
|
139
|
+
COMPATIBILITY SameMajorVersion
|
|
140
|
+
)
|
|
141
|
+
configure_package_config_file(
|
|
142
|
+
cmake/DataSketchesConfig.cmake.in
|
|
143
|
+
"${CMAKE_CURRENT_BINARY_DIR}/DataSketchesConfig.cmake"
|
|
144
|
+
INSTALL_DESTINATION lib/DataSketches/cmake
|
|
145
|
+
PATH_VARS CMAKE_INSTALL_INCLUDEDIR
|
|
146
|
+
)
|
|
147
|
+
install(EXPORT ${PROJECT_NAME} DESTINATION lib/DataSketches/cmake)
|
|
148
|
+
install(FILES "${CMAKE_CURRENT_BINARY_DIR}/DataSketchesConfigVersion.cmake"
|
|
149
|
+
"${CMAKE_CURRENT_BINARY_DIR}/DataSketchesConfig.cmake"
|
|
150
|
+
DESTINATION lib/DataSketches/cmake)
|
|
151
|
+
|
|
152
|
+
|
|
125
153
|
#set(CPACK_PROJECT_NAME ${PROJECT_NAME})
|
|
126
154
|
#set(CPACK_PROJECT_VERSION ${PROJECT_VERSION})
|
|
127
|
-
|
|
155
|
+
include(CPack)
|
|
@@ -284,11 +284,48 @@ APPENDIX B: Additional licenses relevant to this product.
|
|
|
284
284
|
ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
|
285
285
|
DEALINGS IN THE SOFTWARE.
|
|
286
286
|
-------------------------------------------------------------
|
|
287
|
-
Code Locations
|
|
287
|
+
Code Locations:
|
|
288
288
|
* https://github.com/apache/datasketches-cpp/blob/master/common/test/catch.hpp
|
|
289
289
|
that is adapted from the above.
|
|
290
290
|
|
|
291
291
|
|
|
292
|
+
=============================================================
|
|
293
|
+
BSD License
|
|
294
|
+
=============================================================
|
|
295
|
+
Original source code:
|
|
296
|
+
https://github.com/pybind/pybind11/blob/master/LICENSE
|
|
297
|
+
|
|
298
|
+
Copyright (c) 2016 Wenzel Jakob <wenzel.jakob@epfl.ch>, All rights reserved.
|
|
299
|
+
|
|
300
|
+
Redistribution and use in source and binary forms, with or without
|
|
301
|
+
modification, are permitted provided that the following conditions are met:
|
|
302
|
+
|
|
303
|
+
1. Redistributions of source code must retain the above copyright notice, this
|
|
304
|
+
list of conditions and the following disclaimer.
|
|
305
|
+
|
|
306
|
+
2. Redistributions in binary form must reproduce the above copyright notice,
|
|
307
|
+
this list of conditions and the following disclaimer in the documentation
|
|
308
|
+
and/or other materials provided with the distribution.
|
|
309
|
+
|
|
310
|
+
3. Neither the name of the copyright holder nor the names of its contributors
|
|
311
|
+
may be used to endorse or promote products derived from this software
|
|
312
|
+
without specific prior written permission.
|
|
313
|
+
|
|
314
|
+
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
|
315
|
+
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
|
316
|
+
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
|
317
|
+
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
|
318
|
+
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
|
319
|
+
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
|
320
|
+
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
|
321
|
+
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
|
322
|
+
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
323
|
+
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
324
|
+
-------------------------------------------------------------
|
|
325
|
+
Code Locations:
|
|
326
|
+
Found only in the convenience binaries distributed from PyPI, which rely
|
|
327
|
+
on pybind11 code during compilation.
|
|
328
|
+
|
|
292
329
|
|
|
293
330
|
=============================================================
|
|
294
331
|
Public Domain
|
|
@@ -297,7 +334,7 @@ APPENDIX B: Additional licenses relevant to this product.
|
|
|
297
334
|
https://github.com/aappleby/smhasher/blob/master/src/MurmurHash3.cpp
|
|
298
335
|
Placed in the Public Domain by Austin Appleby
|
|
299
336
|
|
|
300
|
-
Code Locations
|
|
337
|
+
Code Locations:
|
|
301
338
|
common/include/MurmurHash3.h
|
|
302
339
|
that is adapted from the above.
|
|
303
340
|
-------------------------------------------------------------
|
|
@@ -305,7 +342,7 @@ APPENDIX B: Additional licenses relevant to this product.
|
|
|
305
342
|
* https://graphics.stanford.edu/~seander/bithacks.html
|
|
306
343
|
* Placed in the Public Domain by Sean Eron Anderson
|
|
307
344
|
|
|
308
|
-
Code Locations
|
|
345
|
+
Code Locations:
|
|
309
346
|
* common/include/ceiling_power_of_2.hpp
|
|
310
347
|
that is adapted from the above.
|
|
311
348
|
|
|
@@ -25,18 +25,85 @@ Installing the latest cmake on OSX: brew install cmake
|
|
|
25
25
|
Building and running unit tests using cmake for OSX and Linux:
|
|
26
26
|
|
|
27
27
|
```
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
$ make
|
|
31
|
-
$ make test
|
|
28
|
+
$ cmake -S . -B build/Release -DCMAKE_BUILD_TYPE=Release
|
|
29
|
+
$ cmake --build build/Release -t all test
|
|
32
30
|
```
|
|
33
31
|
|
|
34
32
|
Building and running unit tests using cmake for Windows from the command line:
|
|
35
33
|
|
|
36
34
|
```
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
35
|
+
$ cd build
|
|
36
|
+
$ cmake ..
|
|
37
|
+
$ cd ..
|
|
38
|
+
$ cmake --build build --config Release
|
|
39
|
+
$ cmake --build build --config Release --target RUN_TESTS
|
|
42
40
|
```
|
|
41
|
+
|
|
42
|
+
To install a local distribution (OSX and Linux), use the following command. The
|
|
43
|
+
CMAKE_INSTALL_PREFIX variable controls the destination. If not specified, it
|
|
44
|
+
defaults to installing in /usr (/usr/include, /usr/lib, etc). In the command below,
|
|
45
|
+
the installation will be in /tmp/install/DataSketches (/tmp/install/DataSketches/include,
|
|
46
|
+
/tmp/install/DataSketches/lib, etc)
|
|
47
|
+
|
|
48
|
+
```
|
|
49
|
+
$ cmake -S . -B build/Release -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=/tmp/install/DataSketches
|
|
50
|
+
$ cmake --build build/Release -t install
|
|
51
|
+
```
|
|
52
|
+
|
|
53
|
+
To generate an installable package using cmake's built in cpack packaging tool,
|
|
54
|
+
use the following command. The type of packaging is controlled by the CPACK_GENERATOR
|
|
55
|
+
variable (semi-colon separated list). Cmake usually supports packaging types such as RPM,
|
|
56
|
+
DEB, STGZ, TGZ, TZ, ZIP, etc.
|
|
57
|
+
|
|
58
|
+
```
|
|
59
|
+
$ cmake3 -S . -B build/Release -DCMAKE_BUILD_TYPE=Release -DCPACK_GENERATOR="RPM;STGZ;TGZ"
|
|
60
|
+
$ cmake3 --build build/Release -t package
|
|
61
|
+
```
|
|
62
|
+
|
|
63
|
+
The DataSketches project can be included in other projects' CMakeLists.txt files in one of two ways.
|
|
64
|
+
If DataSketches has been installed on the host (using an RPM, DEB, "make install" into /usr/local, or some
|
|
65
|
+
way, then CMake's `find_package` command can be used like this:
|
|
66
|
+
|
|
67
|
+
```
|
|
68
|
+
find_package(DataSketches 3.2 REQUIRED)
|
|
69
|
+
target_link_library(my_dependent_target PUBLIC ${DATASKETCHES_LIB})
|
|
70
|
+
```
|
|
71
|
+
|
|
72
|
+
When used with find_package, DataSketches exports several variables, including
|
|
73
|
+
|
|
74
|
+
- `DATASKETCHES_VERSION`: The version number of the datasketches package that was imported.
|
|
75
|
+
- `DATASKETCHES_INCLUDE_DIR`: The directory that should be added to access DataSketches include files.
|
|
76
|
+
Because cmake automatically includes the interface directories for included target libraries when
|
|
77
|
+
using `target_link_library`, under normal circumstances there will be no need to include this directly.
|
|
78
|
+
- `DATASKETCHES_LIB`: The name of the DataSketches target to include as a dependency. Projects pulling
|
|
79
|
+
in DataSketches should reference this with `target_link_library` in order to set up all the correct dependencies
|
|
80
|
+
and include paths.
|
|
81
|
+
|
|
82
|
+
If you don't have DataSketches installed locally, dependent projects can pull it directly
|
|
83
|
+
from GitHub using CMake's `ExternalProject` module. The code would look something like this:
|
|
84
|
+
|
|
85
|
+
```
|
|
86
|
+
cmake_policy(SET CMP0097 NEW)
|
|
87
|
+
include(ExternalProject)
|
|
88
|
+
ExternalProject_Add(datasketches
|
|
89
|
+
GIT_REPOSITORY https://github.com/apache/datasketches-cpp.git
|
|
90
|
+
GIT_TAG 3.2.0
|
|
91
|
+
GIT_SHALLOW true
|
|
92
|
+
GIT_SUBMODULES ""
|
|
93
|
+
INSTALL_DIR /tmp/datasketches-prefix
|
|
94
|
+
CMAKE_ARGS -DBUILD_TESTS=OFF -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} -DCMAKE_INSTALL_PREFIX=/tmp/datasketches-prefix
|
|
95
|
+
|
|
96
|
+
# Override the install command to add DESTDIR
|
|
97
|
+
# This is necessary to work around an oddity in the RPM (but not other) package
|
|
98
|
+
# generation, as CMake otherwise picks up the Datasketch files when building
|
|
99
|
+
# an RPM for a dependent package. (RPM scans the directory for files in addition to installing
|
|
100
|
+
# those files referenced in an "install" rule in the cmake file)
|
|
101
|
+
INSTALL_COMMAND env DESTDIR= ${CMAKE_COMMAND} --build . --target install
|
|
102
|
+
)
|
|
103
|
+
ExternalProject_Get_property(datasketches INSTALL_DIR)
|
|
104
|
+
set(datasketches_INSTALL_DIR ${INSTALL_DIR})
|
|
105
|
+
message("Source dir of datasketches = ${datasketches_INSTALL_DIR}")
|
|
106
|
+
target_include_directories(my_dependent_target
|
|
107
|
+
PRIVATE ${datasketches_INSTALL_DIR}/include/DataSketches)
|
|
108
|
+
add_dependencies(my_dependent_target datasketches)
|
|
109
|
+
```
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
set(DATASKETCHES_VERSION "@PROJECT_VERSION@")
|
|
2
|
+
|
|
3
|
+
@PACKAGE_INIT@
|
|
4
|
+
|
|
5
|
+
include("${CMAKE_CURRENT_LIST_DIR}/DataSketches.cmake")
|
|
6
|
+
|
|
7
|
+
set_and_check(DATASKETCHES_INCLUDE_DIR "@PACKAGE_CMAKE_INSTALL_INCLUDEDIR@/DataSketches")
|
|
8
|
+
set(DATASKETCHES_LIB "datasketches")
|
|
9
|
+
|
|
10
|
+
check_required_components("@PROJECT_NAME@")
|
|
@@ -29,17 +29,18 @@ target_include_directories(common
|
|
|
29
29
|
|
|
30
30
|
target_compile_features(common INTERFACE cxx_std_11)
|
|
31
31
|
|
|
32
|
-
|
|
33
|
-
INTERFACE
|
|
34
|
-
${CMAKE_CURRENT_SOURCE_DIR}/include/common_defs.hpp
|
|
35
|
-
${CMAKE_CURRENT_SOURCE_DIR}/include/memory_operations.hpp
|
|
36
|
-
${CMAKE_CURRENT_SOURCE_DIR}/include/MurmurHash3.h
|
|
37
|
-
${CMAKE_CURRENT_SOURCE_DIR}/include/serde.hpp
|
|
38
|
-
${CMAKE_CURRENT_SOURCE_DIR}/include/count_zeros.hpp
|
|
39
|
-
${CMAKE_CURRENT_SOURCE_DIR}/include/inv_pow2_table.hpp
|
|
40
|
-
${CMAKE_CURRENT_SOURCE_DIR}/include/binomial_bounds.hpp
|
|
41
|
-
${CMAKE_CURRENT_SOURCE_DIR}/include/conditional_back_inserter.hpp
|
|
42
|
-
${CMAKE_CURRENT_SOURCE_DIR}/include/conditional_forward.hpp
|
|
43
|
-
${CMAKE_CURRENT_SOURCE_DIR}/include/ceiling_power_of_2.hpp
|
|
44
|
-
)
|
|
32
|
+
install(TARGETS common EXPORT ${PROJECT_NAME})
|
|
45
33
|
|
|
34
|
+
install(FILES
|
|
35
|
+
include/common_defs.hpp
|
|
36
|
+
include/memory_operations.hpp
|
|
37
|
+
include/MurmurHash3.h
|
|
38
|
+
include/serde.hpp
|
|
39
|
+
include/count_zeros.hpp
|
|
40
|
+
include/inv_pow2_table.hpp
|
|
41
|
+
include/binomial_bounds.hpp
|
|
42
|
+
include/conditional_back_inserter.hpp
|
|
43
|
+
include/conditional_forward.hpp
|
|
44
|
+
include/ceiling_power_of_2.hpp
|
|
45
|
+
include/bounds_binomial_proportions.hpp
|
|
46
|
+
DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/DataSketches")
|
|
@@ -16,6 +16,8 @@
|
|
|
16
16
|
#ifndef _MURMURHASH3_H_
|
|
17
17
|
#define _MURMURHASH3_H_
|
|
18
18
|
|
|
19
|
+
#include <cstring>
|
|
20
|
+
|
|
19
21
|
//-----------------------------------------------------------------------------
|
|
20
22
|
// Platform-specific functions and macros
|
|
21
23
|
|
|
@@ -76,9 +78,11 @@ typedef struct {
|
|
|
76
78
|
// Block read - if your platform needs to do endian-swapping or can only
|
|
77
79
|
// handle aligned reads, do the conversion here
|
|
78
80
|
|
|
79
|
-
FORCE_INLINE uint64_t getblock64 ( const uint64_t * p,
|
|
81
|
+
FORCE_INLINE uint64_t getblock64 ( const uint64_t * p, size_t i )
|
|
80
82
|
{
|
|
81
|
-
|
|
83
|
+
uint64_t res;
|
|
84
|
+
memcpy(&res, p + i, sizeof(res));
|
|
85
|
+
return res;
|
|
82
86
|
}
|
|
83
87
|
|
|
84
88
|
//-----------------------------------------------------------------------------
|
|
@@ -95,7 +99,7 @@ FORCE_INLINE uint64_t fmix64 ( uint64_t k )
|
|
|
95
99
|
return k;
|
|
96
100
|
}
|
|
97
101
|
|
|
98
|
-
FORCE_INLINE void MurmurHash3_x64_128(const void* key,
|
|
102
|
+
FORCE_INLINE void MurmurHash3_x64_128(const void* key, size_t lenBytes, uint64_t seed, HashState& out) {
|
|
99
103
|
static const uint64_t c1 = BIG_CONSTANT(0x87c37b91114253d5);
|
|
100
104
|
static const uint64_t c2 = BIG_CONSTANT(0x4cf5ad432745937f);
|
|
101
105
|
|
|
@@ -106,13 +110,13 @@ FORCE_INLINE void MurmurHash3_x64_128(const void* key, int lenBytes, uint64_t se
|
|
|
106
110
|
|
|
107
111
|
// Number of full 128-bit blocks of 16 bytes.
|
|
108
112
|
// Possible exclusion of a remainder of up to 15 bytes.
|
|
109
|
-
const
|
|
113
|
+
const size_t nblocks = lenBytes >> 4; // bytes / 16
|
|
110
114
|
|
|
111
115
|
// Process the 128-bit blocks (the body) into the hash
|
|
112
116
|
const uint64_t* blocks = (const uint64_t*)(data);
|
|
113
|
-
for (
|
|
114
|
-
uint64_t k1 = getblock64(blocks,i*2+0);
|
|
115
|
-
uint64_t k2 = getblock64(blocks,i*2+1);
|
|
117
|
+
for (size_t i = 0; i < nblocks; ++i) { // 16 bytes per block
|
|
118
|
+
uint64_t k1 = getblock64(blocks, i * 2 + 0);
|
|
119
|
+
uint64_t k2 = getblock64(blocks, i * 2 + 1);
|
|
116
120
|
|
|
117
121
|
k1 *= c1; k1 = ROTL64(k1,31); k1 *= c2; out.h1 ^= k1;
|
|
118
122
|
out.h1 = ROTL64(out.h1,27);
|
|
@@ -381,7 +381,7 @@ private:
|
|
|
381
381
|
// The following computes an approximation to the lower bound of a Frequentist
|
|
382
382
|
// confidence interval based on the tails of the Binomial distribution.
|
|
383
383
|
static double compute_approx_binomial_lower_bound(unsigned long long num_samples, double theta, unsigned num_std_devs) {
|
|
384
|
-
if (theta == 1) return num_samples;
|
|
384
|
+
if (theta == 1) return static_cast<double>(num_samples);
|
|
385
385
|
if (num_samples == 0) return 0;
|
|
386
386
|
if (num_samples == 1) {
|
|
387
387
|
const double delta = delta_of_num_std_devs[num_std_devs];
|
|
@@ -395,24 +395,24 @@ private:
|
|
|
395
395
|
}
|
|
396
396
|
// at this point we know 2 <= num_samples <= 120
|
|
397
397
|
if (theta > (1 - 1e-5)) { // empirically-determined threshold
|
|
398
|
-
return num_samples;
|
|
398
|
+
return static_cast<double>(num_samples);
|
|
399
399
|
}
|
|
400
400
|
if (theta < (num_samples / 360.0)) { // empirically-determined threshold
|
|
401
401
|
// here we use the Gaussian approximation, but with a modified num_std_devs
|
|
402
|
-
const unsigned index = 3 * num_samples + (num_std_devs - 1);
|
|
402
|
+
const unsigned index = 3 * static_cast<unsigned>(num_samples) + (num_std_devs - 1);
|
|
403
403
|
const double raw_lb = cont_classic_lb(num_samples, theta, lb_equiv_table[index]);
|
|
404
404
|
return raw_lb - 0.5; // fake round down
|
|
405
405
|
}
|
|
406
406
|
// This is the most difficult range to approximate; we will compute an "exact" LB.
|
|
407
407
|
// We know that est <= 360, so specialNStar() shouldn't be ridiculously slow.
|
|
408
408
|
const double delta = delta_of_num_std_devs[num_std_devs];
|
|
409
|
-
return special_n_star(num_samples, theta, delta); // no need to round
|
|
409
|
+
return static_cast<double>(special_n_star(num_samples, theta, delta)); // no need to round
|
|
410
410
|
}
|
|
411
411
|
|
|
412
412
|
// The following computes an approximation to the upper bound of a Frequentist
|
|
413
413
|
// confidence interval based on the tails of the Binomial distribution.
|
|
414
414
|
static double compute_approx_binomial_upper_bound(unsigned long long num_samples, double theta, unsigned num_std_devs) {
|
|
415
|
-
if (theta == 1) return num_samples;
|
|
415
|
+
if (theta == 1) return static_cast<double>(num_samples);
|
|
416
416
|
if (num_samples == 0) {
|
|
417
417
|
const double delta = delta_of_num_std_devs[num_std_devs];
|
|
418
418
|
const double raw_ub = std::log(delta) / std::log(1 - theta);
|
|
@@ -425,18 +425,18 @@ private:
|
|
|
425
425
|
}
|
|
426
426
|
// at this point we know 2 <= num_samples <= 120
|
|
427
427
|
if (theta > (1 - 1e-5)) { // empirically-determined threshold
|
|
428
|
-
return num_samples + 1;
|
|
428
|
+
return static_cast<double>(num_samples + 1);
|
|
429
429
|
}
|
|
430
430
|
if (theta < (num_samples / 360.0)) { // empirically-determined threshold
|
|
431
431
|
// here we use the Gaussian approximation, but with a modified num_std_devs
|
|
432
|
-
const unsigned index = 3 * num_samples + (num_std_devs - 1);
|
|
432
|
+
const unsigned index = 3 * static_cast<unsigned>(num_samples) + (num_std_devs - 1);
|
|
433
433
|
const double raw_ub = cont_classic_ub(num_samples, theta, ub_equiv_table[index]);
|
|
434
434
|
return raw_ub + 0.5; // fake round up
|
|
435
435
|
}
|
|
436
436
|
// This is the most difficult range to approximate; we will compute an "exact" UB.
|
|
437
437
|
// We know that est <= 360, so specialNPrimeF() shouldn't be ridiculously slow.
|
|
438
438
|
const double delta = delta_of_num_std_devs[num_std_devs];
|
|
439
|
-
return special_n_prime_f(num_samples, theta, delta); // no need to round
|
|
439
|
+
return static_cast<double>(special_n_prime_f(num_samples, theta, delta)); // no need to round
|
|
440
440
|
}
|
|
441
441
|
|
|
442
442
|
static void check_theta(double theta) {
|