datasketches 0.2.2 → 0.2.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +12 -0
- data/LICENSE +40 -3
- data/NOTICE +1 -1
- data/README.md +8 -8
- data/ext/datasketches/kll_wrapper.cpp +5 -1
- data/ext/datasketches/theta_wrapper.cpp +20 -4
- data/lib/datasketches/version.rb +1 -1
- data/vendor/datasketches-cpp/CMakeLists.txt +27 -5
- data/vendor/datasketches-cpp/LICENSE +40 -3
- data/vendor/datasketches-cpp/MANIFEST.in +3 -0
- data/vendor/datasketches-cpp/NOTICE +1 -1
- data/vendor/datasketches-cpp/README.md +76 -9
- data/vendor/datasketches-cpp/cmake/DataSketchesConfig.cmake.in +10 -0
- data/vendor/datasketches-cpp/common/CMakeLists.txt +18 -13
- data/vendor/datasketches-cpp/common/include/binomial_bounds.hpp +1 -0
- data/vendor/datasketches-cpp/common/include/common_defs.hpp +16 -0
- data/vendor/datasketches-cpp/{kll → common}/include/kolmogorov_smirnov.hpp +5 -3
- data/vendor/datasketches-cpp/{kll → common}/include/kolmogorov_smirnov_impl.hpp +13 -16
- data/vendor/datasketches-cpp/common/include/quantile_sketch_sorted_view.hpp +121 -0
- data/vendor/datasketches-cpp/common/include/quantile_sketch_sorted_view_impl.hpp +91 -0
- data/vendor/datasketches-cpp/common/test/test_type.hpp +2 -0
- data/vendor/datasketches-cpp/cpc/CMakeLists.txt +15 -35
- data/vendor/datasketches-cpp/cpc/include/cpc_common.hpp +10 -3
- data/vendor/datasketches-cpp/cpc/include/cpc_compressor_impl.hpp +1 -0
- data/vendor/datasketches-cpp/cpc/include/cpc_confidence.hpp +1 -0
- data/vendor/datasketches-cpp/cpc/include/cpc_sketch.hpp +1 -1
- data/vendor/datasketches-cpp/cpc/include/cpc_sketch_impl.hpp +5 -3
- data/vendor/datasketches-cpp/cpc/include/cpc_union.hpp +1 -1
- data/vendor/datasketches-cpp/cpc/include/cpc_union_impl.hpp +10 -6
- data/vendor/datasketches-cpp/cpc/test/cpc_sketch_allocation_test.cpp +17 -0
- data/vendor/datasketches-cpp/cpc/test/cpc_sketch_test.cpp +1 -0
- data/vendor/datasketches-cpp/cpc/test/cpc_union_test.cpp +2 -0
- data/vendor/datasketches-cpp/fi/CMakeLists.txt +5 -15
- data/vendor/datasketches-cpp/fi/include/frequent_items_sketch.hpp +37 -5
- data/vendor/datasketches-cpp/fi/include/frequent_items_sketch_impl.hpp +30 -12
- data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_custom_type_test.cpp +2 -1
- data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_test.cpp +1 -0
- data/vendor/datasketches-cpp/hll/CMakeLists.txt +33 -56
- data/vendor/datasketches-cpp/hll/include/AuxHashMap-internal.hpp +2 -0
- data/vendor/datasketches-cpp/hll/include/CompositeInterpolationXTable-internal.hpp +1 -0
- data/vendor/datasketches-cpp/hll/include/CouponHashSet-internal.hpp +2 -2
- data/vendor/datasketches-cpp/hll/include/CouponList-internal.hpp +1 -0
- data/vendor/datasketches-cpp/hll/include/HllSketch-internal.hpp +6 -4
- data/vendor/datasketches-cpp/hll/include/HllSketchImpl-internal.hpp +2 -0
- data/vendor/datasketches-cpp/hll/include/HllSketchImplFactory.hpp +2 -0
- data/vendor/datasketches-cpp/hll/test/AuxHashMapTest.cpp +1 -0
- data/vendor/datasketches-cpp/hll/test/CouponHashSetTest.cpp +1 -0
- data/vendor/datasketches-cpp/hll/test/CouponListTest.cpp +2 -0
- data/vendor/datasketches-cpp/hll/test/HllArrayTest.cpp +1 -0
- data/vendor/datasketches-cpp/hll/test/HllSketchTest.cpp +59 -0
- data/vendor/datasketches-cpp/hll/test/HllUnionTest.cpp +2 -0
- data/vendor/datasketches-cpp/hll/test/TablesTest.cpp +1 -0
- data/vendor/datasketches-cpp/kll/CMakeLists.txt +5 -19
- data/vendor/datasketches-cpp/kll/include/kll_helper.hpp +0 -4
- data/vendor/datasketches-cpp/kll/include/kll_helper_impl.hpp +3 -0
- data/vendor/datasketches-cpp/kll/include/kll_sketch.hpp +103 -44
- data/vendor/datasketches-cpp/kll/include/kll_sketch_impl.hpp +110 -130
- data/vendor/datasketches-cpp/kll/test/kll_sketch_test.cpp +156 -23
- data/vendor/datasketches-cpp/kll/test/kolmogorov_smirnov_test.cpp +1 -1
- data/vendor/datasketches-cpp/pyproject.toml +4 -2
- data/vendor/datasketches-cpp/python/CMakeLists.txt +17 -6
- data/vendor/datasketches-cpp/python/README.md +57 -50
- data/vendor/datasketches-cpp/python/pybind11Path.cmd +3 -0
- data/vendor/datasketches-cpp/python/src/cpc_wrapper.cpp +1 -1
- data/vendor/datasketches-cpp/python/src/datasketches.cpp +4 -0
- data/vendor/datasketches-cpp/python/src/fi_wrapper.cpp +6 -1
- data/vendor/datasketches-cpp/python/src/kll_wrapper.cpp +49 -14
- data/vendor/datasketches-cpp/python/src/ks_wrapper.cpp +68 -0
- data/vendor/datasketches-cpp/python/src/quantiles_wrapper.cpp +240 -0
- data/vendor/datasketches-cpp/python/src/req_wrapper.cpp +9 -2
- data/vendor/datasketches-cpp/python/src/theta_wrapper.cpp +2 -2
- data/vendor/datasketches-cpp/python/src/vector_of_kll.cpp +12 -5
- data/vendor/datasketches-cpp/python/tests/kll_test.py +12 -6
- data/vendor/datasketches-cpp/python/tests/quantiles_test.py +126 -0
- data/vendor/datasketches-cpp/python/tests/req_test.py +2 -2
- data/vendor/datasketches-cpp/python/tests/vector_of_kll_test.py +4 -4
- data/vendor/datasketches-cpp/quantiles/CMakeLists.txt +42 -0
- data/vendor/datasketches-cpp/quantiles/include/quantiles_sketch.hpp +641 -0
- data/vendor/datasketches-cpp/quantiles/include/quantiles_sketch_impl.hpp +1309 -0
- data/vendor/datasketches-cpp/quantiles/test/CMakeLists.txt +44 -0
- data/vendor/datasketches-cpp/quantiles/test/Qk128_n1000_v0.3.0.sk +0 -0
- data/vendor/datasketches-cpp/quantiles/test/Qk128_n1000_v0.6.0.sk +0 -0
- data/vendor/datasketches-cpp/quantiles/test/Qk128_n1000_v0.8.0.sk +0 -0
- data/vendor/datasketches-cpp/quantiles/test/Qk128_n1000_v0.8.3.sk +0 -0
- data/vendor/datasketches-cpp/quantiles/test/Qk128_n50_v0.3.0.sk +0 -0
- data/vendor/datasketches-cpp/quantiles/test/Qk128_n50_v0.6.0.sk +0 -0
- data/vendor/datasketches-cpp/quantiles/test/Qk128_n50_v0.8.0.sk +0 -0
- data/vendor/datasketches-cpp/quantiles/test/Qk128_n50_v0.8.3.sk +0 -0
- data/vendor/datasketches-cpp/quantiles/test/kolmogorov_smirnov_test.cpp +110 -0
- data/vendor/datasketches-cpp/quantiles/test/quantiles_compatibility_test.cpp +129 -0
- data/vendor/datasketches-cpp/quantiles/test/quantiles_sketch_test.cpp +912 -0
- data/vendor/datasketches-cpp/req/CMakeLists.txt +6 -21
- data/vendor/datasketches-cpp/req/include/req_common.hpp +0 -5
- data/vendor/datasketches-cpp/req/include/req_compactor_impl.hpp +3 -2
- data/vendor/datasketches-cpp/req/include/req_sketch.hpp +62 -23
- data/vendor/datasketches-cpp/req/include/req_sketch_impl.hpp +66 -61
- data/vendor/datasketches-cpp/req/test/req_sketch_test.cpp +5 -0
- data/vendor/datasketches-cpp/sampling/CMakeLists.txt +5 -9
- data/vendor/datasketches-cpp/sampling/include/var_opt_sketch.hpp +54 -12
- data/vendor/datasketches-cpp/sampling/include/var_opt_sketch_impl.hpp +45 -34
- data/vendor/datasketches-cpp/sampling/include/var_opt_union.hpp +41 -6
- data/vendor/datasketches-cpp/sampling/include/var_opt_union_impl.hpp +33 -15
- data/vendor/datasketches-cpp/sampling/test/var_opt_allocation_test.cpp +2 -2
- data/vendor/datasketches-cpp/sampling/test/var_opt_sketch_test.cpp +1 -0
- data/vendor/datasketches-cpp/sampling/test/var_opt_union_test.cpp +1 -0
- data/vendor/datasketches-cpp/setup.py +10 -7
- data/vendor/datasketches-cpp/theta/CMakeLists.txt +26 -45
- data/vendor/datasketches-cpp/theta/include/bounds_on_ratios_in_sampled_sets.hpp +1 -0
- data/vendor/datasketches-cpp/theta/include/compact_theta_sketch_parser_impl.hpp +92 -23
- data/vendor/datasketches-cpp/theta/include/theta_constants.hpp +9 -4
- data/vendor/datasketches-cpp/theta/include/theta_helpers.hpp +15 -0
- data/vendor/datasketches-cpp/theta/include/theta_intersection_base_impl.hpp +7 -6
- data/vendor/datasketches-cpp/theta/include/theta_set_difference_base_impl.hpp +3 -2
- data/vendor/datasketches-cpp/theta/include/theta_sketch.hpp +32 -15
- data/vendor/datasketches-cpp/theta/include/theta_sketch_impl.hpp +150 -93
- data/vendor/datasketches-cpp/theta/include/theta_union.hpp +6 -1
- data/vendor/datasketches-cpp/theta/include/theta_union_base.hpp +3 -1
- data/vendor/datasketches-cpp/theta/include/theta_union_base_impl.hpp +9 -2
- data/vendor/datasketches-cpp/theta/include/theta_union_impl.hpp +8 -5
- data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base.hpp +9 -5
- data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base_impl.hpp +39 -10
- data/vendor/datasketches-cpp/theta/test/CMakeLists.txt +1 -0
- data/vendor/datasketches-cpp/theta/test/theta_a_not_b_test.cpp +2 -0
- data/vendor/datasketches-cpp/theta/test/theta_compact_empty_from_java_v1.sk +0 -0
- data/vendor/datasketches-cpp/theta/test/theta_compact_empty_from_java_v2.sk +0 -0
- data/vendor/datasketches-cpp/theta/test/theta_compact_estimation_from_java_v1.sk +0 -0
- data/vendor/datasketches-cpp/theta/test/theta_compact_estimation_from_java_v2.sk +0 -0
- data/vendor/datasketches-cpp/theta/test/theta_compact_exact_from_java.sk +0 -0
- data/vendor/datasketches-cpp/theta/test/theta_intersection_test.cpp +2 -0
- data/vendor/datasketches-cpp/theta/test/theta_setop_test.cpp +446 -0
- data/vendor/datasketches-cpp/theta/test/theta_sketch_test.cpp +429 -1
- data/vendor/datasketches-cpp/theta/test/theta_union_test.cpp +25 -11
- data/vendor/datasketches-cpp/tuple/CMakeLists.txt +18 -33
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_sketch.hpp +1 -1
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_sketch_impl.hpp +3 -3
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_union.hpp +1 -1
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_union_impl.hpp +3 -3
- data/vendor/datasketches-cpp/tuple/include/tuple_sketch.hpp +29 -9
- data/vendor/datasketches-cpp/tuple/include/tuple_sketch_impl.hpp +34 -14
- data/vendor/datasketches-cpp/tuple/include/tuple_union.hpp +6 -1
- data/vendor/datasketches-cpp/tuple/include/tuple_union_impl.hpp +8 -3
- data/vendor/datasketches-cpp/tuple/test/array_of_doubles_sketch_test.cpp +16 -0
- data/vendor/datasketches-cpp/tuple/test/tuple_a_not_b_test.cpp +1 -0
- data/vendor/datasketches-cpp/tuple/test/tuple_intersection_test.cpp +1 -0
- data/vendor/datasketches-cpp/tuple/test/tuple_sketch_test.cpp +46 -8
- data/vendor/datasketches-cpp/tuple/test/tuple_union_test.cpp +8 -0
- metadata +34 -12
- data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator.hpp +0 -75
- data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator_impl.hpp +0 -184
- data/vendor/datasketches-cpp/req/include/req_quantile_calculator.hpp +0 -69
- data/vendor/datasketches-cpp/req/include/req_quantile_calculator_impl.hpp +0 -60
- data/vendor/datasketches-cpp/theta/test/theta_update_empty_from_java.sk +0 -0
- data/vendor/datasketches-cpp/theta/test/theta_update_estimation_from_java.sk +0 -0
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 9eaa8a17efdbc591b3e56f94650e887babd30dc79d95db3a7986df0261184191
|
|
4
|
+
data.tar.gz: 5544326a0edf165d87373a680d8bf5b80acba2894b9048f92cbdb261fcd66d57
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 5a28c093ecda083762367149800770f59fee8e630c0d983d3f29ed32d027fae2e2515dff243ee11bbd41f4875c7cea622f7bc5cc5d7e73176e785503ed19fc0b
|
|
7
|
+
data.tar.gz: 6b210f2fdca1ae3cbd4e4cbf88e284855014b5a1e1c883085dc96a057da29e370005163ce628e54351c9127b00fae4b7b33a4ca63e6f4b90e0665e93b7742a66
|
data/CHANGELOG.md
CHANGED
data/LICENSE
CHANGED
|
@@ -284,11 +284,48 @@ APPENDIX B: Additional licenses relevant to this product.
|
|
|
284
284
|
ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
|
285
285
|
DEALINGS IN THE SOFTWARE.
|
|
286
286
|
-------------------------------------------------------------
|
|
287
|
-
Code Locations
|
|
287
|
+
Code Locations:
|
|
288
288
|
* https://github.com/apache/datasketches-cpp/blob/master/common/test/catch.hpp
|
|
289
289
|
that is adapted from the above.
|
|
290
290
|
|
|
291
291
|
|
|
292
|
+
=============================================================
|
|
293
|
+
BSD License
|
|
294
|
+
=============================================================
|
|
295
|
+
Original source code:
|
|
296
|
+
https://github.com/pybind/pybind11/blob/master/LICENSE
|
|
297
|
+
|
|
298
|
+
Copyright (c) 2016 Wenzel Jakob <wenzel.jakob@epfl.ch>, All rights reserved.
|
|
299
|
+
|
|
300
|
+
Redistribution and use in source and binary forms, with or without
|
|
301
|
+
modification, are permitted provided that the following conditions are met:
|
|
302
|
+
|
|
303
|
+
1. Redistributions of source code must retain the above copyright notice, this
|
|
304
|
+
list of conditions and the following disclaimer.
|
|
305
|
+
|
|
306
|
+
2. Redistributions in binary form must reproduce the above copyright notice,
|
|
307
|
+
this list of conditions and the following disclaimer in the documentation
|
|
308
|
+
and/or other materials provided with the distribution.
|
|
309
|
+
|
|
310
|
+
3. Neither the name of the copyright holder nor the names of its contributors
|
|
311
|
+
may be used to endorse or promote products derived from this software
|
|
312
|
+
without specific prior written permission.
|
|
313
|
+
|
|
314
|
+
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
|
315
|
+
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
|
316
|
+
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
|
317
|
+
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
|
318
|
+
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
|
319
|
+
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
|
320
|
+
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
|
321
|
+
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
|
322
|
+
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
323
|
+
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
324
|
+
-------------------------------------------------------------
|
|
325
|
+
Code Locations:
|
|
326
|
+
Found only in the convenience binaries distributed from PyPI, which rely
|
|
327
|
+
on pybind11 code during compilation.
|
|
328
|
+
|
|
292
329
|
|
|
293
330
|
=============================================================
|
|
294
331
|
Public Domain
|
|
@@ -297,7 +334,7 @@ APPENDIX B: Additional licenses relevant to this product.
|
|
|
297
334
|
https://github.com/aappleby/smhasher/blob/master/src/MurmurHash3.cpp
|
|
298
335
|
Placed in the Public Domain by Austin Appleby
|
|
299
336
|
|
|
300
|
-
Code Locations
|
|
337
|
+
Code Locations:
|
|
301
338
|
common/include/MurmurHash3.h
|
|
302
339
|
that is adapted from the above.
|
|
303
340
|
-------------------------------------------------------------
|
|
@@ -305,6 +342,6 @@ APPENDIX B: Additional licenses relevant to this product.
|
|
|
305
342
|
* https://graphics.stanford.edu/~seander/bithacks.html
|
|
306
343
|
* Placed in the Public Domain by Sean Eron Anderson
|
|
307
344
|
|
|
308
|
-
Code Locations
|
|
345
|
+
Code Locations:
|
|
309
346
|
* common/include/ceiling_power_of_2.hpp
|
|
310
347
|
that is adapted from the above.
|
data/NOTICE
CHANGED
data/README.md
CHANGED
|
@@ -1,15 +1,15 @@
|
|
|
1
|
-
# DataSketches
|
|
1
|
+
# DataSketches Ruby
|
|
2
2
|
|
|
3
3
|
[DataSketches](https://datasketches.apache.org/) - sketch data structures - for Ruby
|
|
4
4
|
|
|
5
|
-
[](https://github.com/ankane/datasketches/actions)
|
|
5
|
+
[](https://github.com/ankane/datasketches-ruby/actions)
|
|
6
6
|
|
|
7
7
|
## Installation
|
|
8
8
|
|
|
9
9
|
Add this line to your application’s Gemfile:
|
|
10
10
|
|
|
11
11
|
```ruby
|
|
12
|
-
gem
|
|
12
|
+
gem "datasketches"
|
|
13
13
|
```
|
|
14
14
|
|
|
15
15
|
## Sketch Families
|
|
@@ -292,22 +292,22 @@ This library is modeled after the DataSketches [Python API](https://github.com/a
|
|
|
292
292
|
|
|
293
293
|
## History
|
|
294
294
|
|
|
295
|
-
View the [changelog](https://github.com/ankane/datasketches/blob/master/CHANGELOG.md)
|
|
295
|
+
View the [changelog](https://github.com/ankane/datasketches-ruby/blob/master/CHANGELOG.md)
|
|
296
296
|
|
|
297
297
|
## Contributing
|
|
298
298
|
|
|
299
299
|
Everyone is encouraged to help improve this project. Here are a few ways you can help:
|
|
300
300
|
|
|
301
|
-
- [Report bugs](https://github.com/ankane/datasketches/issues)
|
|
302
|
-
- Fix bugs and [submit pull requests](https://github.com/ankane/datasketches/pulls)
|
|
301
|
+
- [Report bugs](https://github.com/ankane/datasketches-ruby/issues)
|
|
302
|
+
- Fix bugs and [submit pull requests](https://github.com/ankane/datasketches-ruby/pulls)
|
|
303
303
|
- Write, clarify, or fix documentation
|
|
304
304
|
- Suggest or add new features
|
|
305
305
|
|
|
306
306
|
To get started with development:
|
|
307
307
|
|
|
308
308
|
```sh
|
|
309
|
-
git clone --recursive https://github.com/ankane/datasketches.git
|
|
310
|
-
cd datasketches
|
|
309
|
+
git clone --recursive https://github.com/ankane/datasketches-ruby.git
|
|
310
|
+
cd datasketches-ruby
|
|
311
311
|
bundle install
|
|
312
312
|
bundle exec rake compile
|
|
313
313
|
bundle exec rake test
|
|
@@ -33,7 +33,11 @@ void bind_kll_sketch(Rice::Module& m, const char* name) {
|
|
|
33
33
|
.define_method("estimation_mode?", &kll_sketch<T>::is_estimation_mode)
|
|
34
34
|
.define_method("min_value", &kll_sketch<T>::get_min_value)
|
|
35
35
|
.define_method("max_value", &kll_sketch<T>::get_max_value)
|
|
36
|
-
.define_method(
|
|
36
|
+
.define_method(
|
|
37
|
+
"quantile",
|
|
38
|
+
[](kll_sketch<T>& self, double fraction) {
|
|
39
|
+
return self.get_quantile(fraction);
|
|
40
|
+
})
|
|
37
41
|
.define_method(
|
|
38
42
|
"quantiles",
|
|
39
43
|
[](kll_sketch<T>& self, Rice::Object obj) {
|
|
@@ -20,10 +20,26 @@ using Rice::Arg;
|
|
|
20
20
|
|
|
21
21
|
void init_theta(Rice::Module& m) {
|
|
22
22
|
Rice::define_class_under<theta_sketch>(m, "ThetaSketch")
|
|
23
|
-
.define_method(
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
23
|
+
.define_method(
|
|
24
|
+
"empty?",
|
|
25
|
+
[](theta_sketch& self) {
|
|
26
|
+
return self.is_empty();
|
|
27
|
+
})
|
|
28
|
+
.define_method(
|
|
29
|
+
"estimate",
|
|
30
|
+
[](theta_sketch& self) {
|
|
31
|
+
return self.get_estimate();
|
|
32
|
+
})
|
|
33
|
+
.define_method(
|
|
34
|
+
"lower_bound",
|
|
35
|
+
[](theta_sketch& self, uint8_t num_std_devs) {
|
|
36
|
+
return self.get_lower_bound(num_std_devs);
|
|
37
|
+
})
|
|
38
|
+
.define_method(
|
|
39
|
+
"upper_bound",
|
|
40
|
+
[](theta_sketch& self, uint8_t num_std_devs) {
|
|
41
|
+
return self.get_upper_bound(num_std_devs);
|
|
42
|
+
});
|
|
27
43
|
|
|
28
44
|
Rice::define_class_under<compact_theta_sketch, theta_sketch>(m, "CompactThetaSketch")
|
|
29
45
|
.define_singleton_function(
|
data/lib/datasketches/version.rb
CHANGED
|
@@ -15,9 +15,9 @@
|
|
|
15
15
|
# specific language governing permissions and limitations
|
|
16
16
|
# under the License.
|
|
17
17
|
|
|
18
|
-
cmake_minimum_required(VERSION 3.
|
|
18
|
+
cmake_minimum_required(VERSION 3.16.0)
|
|
19
19
|
project(DataSketches
|
|
20
|
-
VERSION
|
|
20
|
+
VERSION 3.4.0
|
|
21
21
|
LANGUAGES CXX)
|
|
22
22
|
|
|
23
23
|
include(GNUInstallDirs)
|
|
@@ -35,6 +35,8 @@ set(CMAKE_WINDOWS_EXPORT_ALL_SYMBOLS ON)
|
|
|
35
35
|
#set(CMAKE_VERBOSE_MAKEFILE ON)
|
|
36
36
|
set(CMAKE_MACOSX_RPATH ON)
|
|
37
37
|
|
|
38
|
+
set(CMAKE_CXX_STANDARD 11)
|
|
39
|
+
|
|
38
40
|
# enable compiler warnings globally
|
|
39
41
|
# derived from https://foonathan.net/blog/2018/10/17/cmake-warnings.html
|
|
40
42
|
# and https://arne-mertz.de/2018/07/cmake-properties-options/
|
|
@@ -104,12 +106,13 @@ add_subdirectory(theta)
|
|
|
104
106
|
add_subdirectory(sampling)
|
|
105
107
|
add_subdirectory(tuple)
|
|
106
108
|
add_subdirectory(req)
|
|
109
|
+
add_subdirectory(quantiles)
|
|
107
110
|
|
|
108
111
|
if (WITH_PYTHON)
|
|
109
112
|
add_subdirectory(python)
|
|
110
113
|
endif()
|
|
111
114
|
|
|
112
|
-
target_link_libraries(datasketches INTERFACE hll cpc kll fi theta sampling)
|
|
115
|
+
target_link_libraries(datasketches INTERFACE hll cpc kll fi theta sampling req quantiles)
|
|
113
116
|
|
|
114
117
|
if (COVERAGE)
|
|
115
118
|
find_program(LCOV_PATH NAMES "lcov")
|
|
@@ -124,11 +127,30 @@ endif()
|
|
|
124
127
|
|
|
125
128
|
# # Installation
|
|
126
129
|
install(TARGETS datasketches
|
|
127
|
-
EXPORT ${
|
|
130
|
+
EXPORT ${PROJECT_NAME}
|
|
128
131
|
PUBLIC_HEADER DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/DataSketches
|
|
129
132
|
INCLUDES DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/DataSketches
|
|
130
133
|
)
|
|
131
134
|
|
|
135
|
+
# Packaging
|
|
136
|
+
include(CMakePackageConfigHelpers)
|
|
137
|
+
write_basic_package_version_file(
|
|
138
|
+
"${CMAKE_CURRENT_BINARY_DIR}/DataSketchesConfigVersion.cmake"
|
|
139
|
+
VERSION ${PROJECT_VERSION}
|
|
140
|
+
COMPATIBILITY SameMajorVersion
|
|
141
|
+
)
|
|
142
|
+
configure_package_config_file(
|
|
143
|
+
cmake/DataSketchesConfig.cmake.in
|
|
144
|
+
"${CMAKE_CURRENT_BINARY_DIR}/DataSketchesConfig.cmake"
|
|
145
|
+
INSTALL_DESTINATION lib/DataSketches/cmake
|
|
146
|
+
PATH_VARS CMAKE_INSTALL_INCLUDEDIR
|
|
147
|
+
)
|
|
148
|
+
install(EXPORT ${PROJECT_NAME} DESTINATION lib/DataSketches/cmake)
|
|
149
|
+
install(FILES "${CMAKE_CURRENT_BINARY_DIR}/DataSketchesConfigVersion.cmake"
|
|
150
|
+
"${CMAKE_CURRENT_BINARY_DIR}/DataSketchesConfig.cmake"
|
|
151
|
+
DESTINATION lib/DataSketches/cmake)
|
|
152
|
+
|
|
153
|
+
|
|
132
154
|
#set(CPACK_PROJECT_NAME ${PROJECT_NAME})
|
|
133
155
|
#set(CPACK_PROJECT_VERSION ${PROJECT_VERSION})
|
|
134
|
-
|
|
156
|
+
include(CPack)
|
|
@@ -284,11 +284,48 @@ APPENDIX B: Additional licenses relevant to this product.
|
|
|
284
284
|
ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
|
285
285
|
DEALINGS IN THE SOFTWARE.
|
|
286
286
|
-------------------------------------------------------------
|
|
287
|
-
Code Locations
|
|
287
|
+
Code Locations:
|
|
288
288
|
* https://github.com/apache/datasketches-cpp/blob/master/common/test/catch.hpp
|
|
289
289
|
that is adapted from the above.
|
|
290
290
|
|
|
291
291
|
|
|
292
|
+
=============================================================
|
|
293
|
+
BSD License
|
|
294
|
+
=============================================================
|
|
295
|
+
Original source code:
|
|
296
|
+
https://github.com/pybind/pybind11/blob/master/LICENSE
|
|
297
|
+
|
|
298
|
+
Copyright (c) 2016 Wenzel Jakob <wenzel.jakob@epfl.ch>, All rights reserved.
|
|
299
|
+
|
|
300
|
+
Redistribution and use in source and binary forms, with or without
|
|
301
|
+
modification, are permitted provided that the following conditions are met:
|
|
302
|
+
|
|
303
|
+
1. Redistributions of source code must retain the above copyright notice, this
|
|
304
|
+
list of conditions and the following disclaimer.
|
|
305
|
+
|
|
306
|
+
2. Redistributions in binary form must reproduce the above copyright notice,
|
|
307
|
+
this list of conditions and the following disclaimer in the documentation
|
|
308
|
+
and/or other materials provided with the distribution.
|
|
309
|
+
|
|
310
|
+
3. Neither the name of the copyright holder nor the names of its contributors
|
|
311
|
+
may be used to endorse or promote products derived from this software
|
|
312
|
+
without specific prior written permission.
|
|
313
|
+
|
|
314
|
+
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
|
315
|
+
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
|
316
|
+
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
|
317
|
+
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
|
318
|
+
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
|
319
|
+
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
|
320
|
+
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
|
321
|
+
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
|
322
|
+
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
323
|
+
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
324
|
+
-------------------------------------------------------------
|
|
325
|
+
Code Locations:
|
|
326
|
+
Found only in the convenience binaries distributed from PyPI, which rely
|
|
327
|
+
on pybind11 code during compilation.
|
|
328
|
+
|
|
292
329
|
|
|
293
330
|
=============================================================
|
|
294
331
|
Public Domain
|
|
@@ -297,7 +334,7 @@ APPENDIX B: Additional licenses relevant to this product.
|
|
|
297
334
|
https://github.com/aappleby/smhasher/blob/master/src/MurmurHash3.cpp
|
|
298
335
|
Placed in the Public Domain by Austin Appleby
|
|
299
336
|
|
|
300
|
-
Code Locations
|
|
337
|
+
Code Locations:
|
|
301
338
|
common/include/MurmurHash3.h
|
|
302
339
|
that is adapted from the above.
|
|
303
340
|
-------------------------------------------------------------
|
|
@@ -305,7 +342,7 @@ APPENDIX B: Additional licenses relevant to this product.
|
|
|
305
342
|
* https://graphics.stanford.edu/~seander/bithacks.html
|
|
306
343
|
* Placed in the Public Domain by Sean Eron Anderson
|
|
307
344
|
|
|
308
|
-
Code Locations
|
|
345
|
+
Code Locations:
|
|
309
346
|
* common/include/ceiling_power_of_2.hpp
|
|
310
347
|
that is adapted from the above.
|
|
311
348
|
|
|
@@ -25,18 +25,85 @@ Installing the latest cmake on OSX: brew install cmake
|
|
|
25
25
|
Building and running unit tests using cmake for OSX and Linux:
|
|
26
26
|
|
|
27
27
|
```
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
$ make
|
|
31
|
-
$ make test
|
|
28
|
+
$ cmake -S . -B build/Release -DCMAKE_BUILD_TYPE=Release
|
|
29
|
+
$ cmake --build build/Release -t all test
|
|
32
30
|
```
|
|
33
31
|
|
|
34
32
|
Building and running unit tests using cmake for Windows from the command line:
|
|
35
33
|
|
|
36
34
|
```
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
35
|
+
$ cd build
|
|
36
|
+
$ cmake ..
|
|
37
|
+
$ cd ..
|
|
38
|
+
$ cmake --build build --config Release
|
|
39
|
+
$ cmake --build build --config Release --target RUN_TESTS
|
|
42
40
|
```
|
|
41
|
+
|
|
42
|
+
To install a local distribution (OSX and Linux), use the following command. The
|
|
43
|
+
CMAKE_INSTALL_PREFIX variable controls the destination. If not specified, it
|
|
44
|
+
defaults to installing in /usr (/usr/include, /usr/lib, etc). In the command below,
|
|
45
|
+
the installation will be in /tmp/install/DataSketches (/tmp/install/DataSketches/include,
|
|
46
|
+
/tmp/install/DataSketches/lib, etc)
|
|
47
|
+
|
|
48
|
+
```
|
|
49
|
+
$ cmake -S . -B build/Release -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=/tmp/install/DataSketches
|
|
50
|
+
$ cmake --build build/Release -t install
|
|
51
|
+
```
|
|
52
|
+
|
|
53
|
+
To generate an installable package using cmake's built in cpack packaging tool,
|
|
54
|
+
use the following command. The type of packaging is controlled by the CPACK_GENERATOR
|
|
55
|
+
variable (semi-colon separated list). Cmake usually supports packaging types such as RPM,
|
|
56
|
+
DEB, STGZ, TGZ, TZ, ZIP, etc.
|
|
57
|
+
|
|
58
|
+
```
|
|
59
|
+
$ cmake3 -S . -B build/Release -DCMAKE_BUILD_TYPE=Release -DCPACK_GENERATOR="RPM;STGZ;TGZ"
|
|
60
|
+
$ cmake3 --build build/Release -t package
|
|
61
|
+
```
|
|
62
|
+
|
|
63
|
+
The DataSketches project can be included in other projects' CMakeLists.txt files in one of two ways.
|
|
64
|
+
If DataSketches has been installed on the host (using an RPM, DEB, "make install" into /usr/local, or some
|
|
65
|
+
way, then CMake's `find_package` command can be used like this:
|
|
66
|
+
|
|
67
|
+
```
|
|
68
|
+
find_package(DataSketches 3.2 REQUIRED)
|
|
69
|
+
target_link_library(my_dependent_target PUBLIC ${DATASKETCHES_LIB})
|
|
70
|
+
```
|
|
71
|
+
|
|
72
|
+
When used with find_package, DataSketches exports several variables, including
|
|
73
|
+
|
|
74
|
+
- `DATASKETCHES_VERSION`: The version number of the datasketches package that was imported.
|
|
75
|
+
- `DATASKETCHES_INCLUDE_DIR`: The directory that should be added to access DataSketches include files.
|
|
76
|
+
Because cmake automatically includes the interface directories for included target libraries when
|
|
77
|
+
using `target_link_library`, under normal circumstances there will be no need to include this directly.
|
|
78
|
+
- `DATASKETCHES_LIB`: The name of the DataSketches target to include as a dependency. Projects pulling
|
|
79
|
+
in DataSketches should reference this with `target_link_library` in order to set up all the correct dependencies
|
|
80
|
+
and include paths.
|
|
81
|
+
|
|
82
|
+
If you don't have DataSketches installed locally, dependent projects can pull it directly
|
|
83
|
+
from GitHub using CMake's `ExternalProject` module. The code would look something like this:
|
|
84
|
+
|
|
85
|
+
```
|
|
86
|
+
cmake_policy(SET CMP0097 NEW)
|
|
87
|
+
include(ExternalProject)
|
|
88
|
+
ExternalProject_Add(datasketches
|
|
89
|
+
GIT_REPOSITORY https://github.com/apache/datasketches-cpp.git
|
|
90
|
+
GIT_TAG 3.2.0
|
|
91
|
+
GIT_SHALLOW true
|
|
92
|
+
GIT_SUBMODULES ""
|
|
93
|
+
INSTALL_DIR /tmp/datasketches-prefix
|
|
94
|
+
CMAKE_ARGS -DBUILD_TESTS=OFF -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} -DCMAKE_INSTALL_PREFIX=/tmp/datasketches-prefix
|
|
95
|
+
|
|
96
|
+
# Override the install command to add DESTDIR
|
|
97
|
+
# This is necessary to work around an oddity in the RPM (but not other) package
|
|
98
|
+
# generation, as CMake otherwise picks up the Datasketch files when building
|
|
99
|
+
# an RPM for a dependent package. (RPM scans the directory for files in addition to installing
|
|
100
|
+
# those files referenced in an "install" rule in the cmake file)
|
|
101
|
+
INSTALL_COMMAND env DESTDIR= ${CMAKE_COMMAND} --build . --target install
|
|
102
|
+
)
|
|
103
|
+
ExternalProject_Get_property(datasketches INSTALL_DIR)
|
|
104
|
+
set(datasketches_INSTALL_DIR ${INSTALL_DIR})
|
|
105
|
+
message("Source dir of datasketches = ${datasketches_INSTALL_DIR}")
|
|
106
|
+
target_include_directories(my_dependent_target
|
|
107
|
+
PRIVATE ${datasketches_INSTALL_DIR}/include/DataSketches)
|
|
108
|
+
add_dependencies(my_dependent_target datasketches)
|
|
109
|
+
```
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
set(DATASKETCHES_VERSION "@PROJECT_VERSION@")
|
|
2
|
+
|
|
3
|
+
@PACKAGE_INIT@
|
|
4
|
+
|
|
5
|
+
include("${CMAKE_CURRENT_LIST_DIR}/DataSketches.cmake")
|
|
6
|
+
|
|
7
|
+
set_and_check(DATASKETCHES_INCLUDE_DIR "@PACKAGE_CMAKE_INSTALL_INCLUDEDIR@/DataSketches")
|
|
8
|
+
set(DATASKETCHES_LIB "datasketches")
|
|
9
|
+
|
|
10
|
+
check_required_components("@PROJECT_NAME@")
|
|
@@ -29,17 +29,22 @@ target_include_directories(common
|
|
|
29
29
|
|
|
30
30
|
target_compile_features(common INTERFACE cxx_std_11)
|
|
31
31
|
|
|
32
|
-
|
|
33
|
-
INTERFACE
|
|
34
|
-
${CMAKE_CURRENT_SOURCE_DIR}/include/common_defs.hpp
|
|
35
|
-
${CMAKE_CURRENT_SOURCE_DIR}/include/memory_operations.hpp
|
|
36
|
-
${CMAKE_CURRENT_SOURCE_DIR}/include/MurmurHash3.h
|
|
37
|
-
${CMAKE_CURRENT_SOURCE_DIR}/include/serde.hpp
|
|
38
|
-
${CMAKE_CURRENT_SOURCE_DIR}/include/count_zeros.hpp
|
|
39
|
-
${CMAKE_CURRENT_SOURCE_DIR}/include/inv_pow2_table.hpp
|
|
40
|
-
${CMAKE_CURRENT_SOURCE_DIR}/include/binomial_bounds.hpp
|
|
41
|
-
${CMAKE_CURRENT_SOURCE_DIR}/include/conditional_back_inserter.hpp
|
|
42
|
-
${CMAKE_CURRENT_SOURCE_DIR}/include/conditional_forward.hpp
|
|
43
|
-
${CMAKE_CURRENT_SOURCE_DIR}/include/ceiling_power_of_2.hpp
|
|
44
|
-
)
|
|
32
|
+
install(TARGETS common EXPORT ${PROJECT_NAME})
|
|
45
33
|
|
|
34
|
+
install(FILES
|
|
35
|
+
include/common_defs.hpp
|
|
36
|
+
include/memory_operations.hpp
|
|
37
|
+
include/MurmurHash3.h
|
|
38
|
+
include/serde.hpp
|
|
39
|
+
include/count_zeros.hpp
|
|
40
|
+
include/inv_pow2_table.hpp
|
|
41
|
+
include/binomial_bounds.hpp
|
|
42
|
+
include/conditional_back_inserter.hpp
|
|
43
|
+
include/conditional_forward.hpp
|
|
44
|
+
include/ceiling_power_of_2.hpp
|
|
45
|
+
include/bounds_binomial_proportions.hpp
|
|
46
|
+
include/kolmogorov_smirnov.hpp
|
|
47
|
+
include/kolmogorov_smirnov_impl.hpp
|
|
48
|
+
include/quantile_sketch_sorted_view.hpp
|
|
49
|
+
include/quantile_sketch_sorted_view_impl.hpp
|
|
50
|
+
DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/DataSketches")
|
|
@@ -24,14 +24,30 @@
|
|
|
24
24
|
#include <string>
|
|
25
25
|
#include <memory>
|
|
26
26
|
#include <iostream>
|
|
27
|
+
#include <random>
|
|
28
|
+
#include <chrono>
|
|
27
29
|
|
|
28
30
|
namespace datasketches {
|
|
29
31
|
|
|
30
32
|
static const uint64_t DEFAULT_SEED = 9001;
|
|
31
33
|
|
|
34
|
+
enum resize_factor { X1 = 0, X2, X4, X8 };
|
|
35
|
+
|
|
32
36
|
template<typename A> using AllocChar = typename std::allocator_traits<A>::template rebind_alloc<char>;
|
|
33
37
|
template<typename A> using string = std::basic_string<char, std::char_traits<char>, AllocChar<A>>;
|
|
34
38
|
|
|
39
|
+
// random bit
|
|
40
|
+
static std::independent_bits_engine<std::mt19937, 1, uint32_t>
|
|
41
|
+
random_bit(static_cast<uint32_t>(std::chrono::system_clock::now().time_since_epoch().count()));
|
|
42
|
+
|
|
43
|
+
// common random declarations
|
|
44
|
+
namespace random_utils {
|
|
45
|
+
static std::random_device rd; // possibly unsafe in MinGW with GCC < 9.2
|
|
46
|
+
static std::mt19937_64 rand(rd());
|
|
47
|
+
static std::uniform_real_distribution<> next_double(0.0, 1.0);
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
|
|
35
51
|
// utility function to hide unused compiler warning
|
|
36
52
|
// usually has no additional cost
|
|
37
53
|
template<typename T> void unused(T&&...) {}
|
|
@@ -25,7 +25,8 @@ namespace datasketches {
|
|
|
25
25
|
class kolmogorov_smirnov {
|
|
26
26
|
public:
|
|
27
27
|
/**
|
|
28
|
-
* Computes the raw delta area between two
|
|
28
|
+
* Computes the raw delta area between two quantile sketches for the Kolmogorov-Smirnov Test.
|
|
29
|
+
* Will work for a type-matched pair of KLL or Quantiles sketches of the same parameterized type T.
|
|
29
30
|
* @param sketch1 KLL sketch 1
|
|
30
31
|
* @param sketch2 KLL sketch 2
|
|
31
32
|
* @return the raw delta between two KLL quantile sketches
|
|
@@ -37,6 +38,7 @@ public:
|
|
|
37
38
|
* Computes the adjusted delta area threshold for the Kolmogorov-Smirnov Test.
|
|
38
39
|
* Adjusts the computed threshold by the error epsilons of the two given sketches.
|
|
39
40
|
* See <a href="https://en.wikipedia.org/wiki/Kolmogorov-Smirnov_test">Kolmogorov–Smirnov Test</a>
|
|
41
|
+
* Will work for a type-matched pair of KLL or Quantiles sketches of the same parameterized type T.
|
|
40
42
|
* @param sketch1 KLL sketch 1
|
|
41
43
|
* @param sketch2 KLL sketch 2
|
|
42
44
|
* @param p Target p-value. Typically .001 to .1, e.g., .05.
|
|
@@ -46,7 +48,8 @@ public:
|
|
|
46
48
|
static double threshold(const Sketch& sketch1, const Sketch& sketch2, double p);
|
|
47
49
|
|
|
48
50
|
/**
|
|
49
|
-
* Performs the Kolmogorov-Smirnov Test between two
|
|
51
|
+
* Performs the Kolmogorov-Smirnov Test between two quantile sketches.
|
|
52
|
+
* Will work for a type-matched pair of KLL or Quantiles sketches of the same parameterized type T.
|
|
50
53
|
* Note: if the given sketches have insufficient data or if the sketch sizes are too small,
|
|
51
54
|
* this will return false.
|
|
52
55
|
* @param sketch1 KLL sketch 1
|
|
@@ -57,7 +60,6 @@ public:
|
|
|
57
60
|
*/
|
|
58
61
|
template<typename Sketch>
|
|
59
62
|
static bool test(const Sketch& sketch1, const Sketch& sketch2, double p);
|
|
60
|
-
|
|
61
63
|
};
|
|
62
64
|
|
|
63
65
|
} /* namespace datasketches */
|
|
@@ -20,39 +20,36 @@
|
|
|
20
20
|
#ifndef KOLMOGOROV_SMIRNOV_IMPL_HPP_
|
|
21
21
|
#define KOLMOGOROV_SMIRNOV_IMPL_HPP_
|
|
22
22
|
|
|
23
|
-
|
|
23
|
+
#include <cmath>
|
|
24
|
+
#include <algorithm>
|
|
24
25
|
|
|
25
|
-
|
|
26
|
-
template<typename T, typename C, typename S, typename A>
|
|
27
|
-
kll_quantile_calculator<T, C, A> make_quantile_calculator(const kll_sketch<T, C, S, A>& sketch) {
|
|
28
|
-
return kll_quantile_calculator<T, C, A>(sketch);
|
|
29
|
-
}
|
|
26
|
+
namespace datasketches {
|
|
30
27
|
|
|
31
28
|
template<typename Sketch>
|
|
32
29
|
double kolmogorov_smirnov::delta(const Sketch& sketch1, const Sketch& sketch2) {
|
|
33
|
-
|
|
34
|
-
auto
|
|
35
|
-
auto
|
|
36
|
-
auto it1 =
|
|
37
|
-
auto it2 =
|
|
30
|
+
auto comparator = sketch1.get_comparator(); // assuming the same comparator in sketch2
|
|
31
|
+
auto view1 = sketch1.get_sorted_view(true);
|
|
32
|
+
auto view2 = sketch2.get_sorted_view(true);
|
|
33
|
+
auto it1 = view1.begin();
|
|
34
|
+
auto it2 = view2.begin();
|
|
38
35
|
const auto n1 = sketch1.get_n();
|
|
39
36
|
const auto n2 = sketch2.get_n();
|
|
40
37
|
double delta = 0;
|
|
41
|
-
while (it1 !=
|
|
38
|
+
while (it1 != view1.end() && it2 != view2.end()) {
|
|
42
39
|
const double norm_cum_wt1 = static_cast<double>((*it1).second) / n1;
|
|
43
40
|
const double norm_cum_wt2 = static_cast<double>((*it2).second) / n2;
|
|
44
41
|
delta = std::max(delta, std::abs(norm_cum_wt1 - norm_cum_wt2));
|
|
45
|
-
if (
|
|
42
|
+
if (comparator((*it1).first, (*it2).first)) {
|
|
46
43
|
++it1;
|
|
47
|
-
} else if (
|
|
44
|
+
} else if (comparator((*it2).first, (*it1).first)) {
|
|
48
45
|
++it2;
|
|
49
46
|
} else {
|
|
50
47
|
++it1;
|
|
51
48
|
++it2;
|
|
52
49
|
}
|
|
53
50
|
}
|
|
54
|
-
const double norm_cum_wt1 = it1 ==
|
|
55
|
-
const double norm_cum_wt2 = it2 ==
|
|
51
|
+
const double norm_cum_wt1 = it1 == view1.end() ? 1 : static_cast<double>((*it1).second) / n1;
|
|
52
|
+
const double norm_cum_wt2 = it2 == view2.end() ? 1 : static_cast<double>((*it2).second) / n2;
|
|
56
53
|
delta = std::max(delta, std::abs(norm_cum_wt1 - norm_cum_wt2));
|
|
57
54
|
return delta;
|
|
58
55
|
}
|