datasketches 0.3.2 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +6 -0
- data/NOTICE +1 -1
- data/README.md +0 -2
- data/ext/datasketches/cpc_wrapper.cpp +2 -2
- data/ext/datasketches/kll_wrapper.cpp +0 -10
- data/lib/datasketches/version.rb +1 -1
- data/lib/datasketches.rb +1 -1
- data/vendor/datasketches-cpp/CMakeLists.txt +1 -0
- data/vendor/datasketches-cpp/CODE_OF_CONDUCT.md +3 -0
- data/vendor/datasketches-cpp/CONTRIBUTING.md +50 -0
- data/vendor/datasketches-cpp/Doxyfile +2827 -0
- data/vendor/datasketches-cpp/LICENSE +0 -76
- data/vendor/datasketches-cpp/NOTICE +1 -1
- data/vendor/datasketches-cpp/README.md +1 -3
- data/vendor/datasketches-cpp/common/CMakeLists.txt +12 -11
- data/vendor/datasketches-cpp/common/include/common_defs.hpp +11 -8
- data/vendor/datasketches-cpp/common/include/count_zeros.hpp +0 -2
- data/vendor/datasketches-cpp/common/include/kolmogorov_smirnov.hpp +9 -6
- data/vendor/datasketches-cpp/common/include/optional.hpp +148 -0
- data/vendor/datasketches-cpp/common/include/quantiles_sorted_view.hpp +95 -2
- data/vendor/datasketches-cpp/common/include/quantiles_sorted_view_impl.hpp +1 -1
- data/vendor/datasketches-cpp/common/include/serde.hpp +69 -20
- data/vendor/datasketches-cpp/common/test/CMakeLists.txt +1 -1
- data/vendor/datasketches-cpp/common/test/optional_test.cpp +85 -0
- data/vendor/datasketches-cpp/common/test/test_allocator.hpp +14 -14
- data/vendor/datasketches-cpp/count/include/count_min.hpp +132 -78
- data/vendor/datasketches-cpp/count/include/count_min_impl.hpp +132 -152
- data/vendor/datasketches-cpp/count/test/CMakeLists.txt +11 -12
- data/vendor/datasketches-cpp/count/test/count_min_allocation_test.cpp +61 -61
- data/vendor/datasketches-cpp/count/test/count_min_test.cpp +175 -178
- data/vendor/datasketches-cpp/cpc/include/cpc_common.hpp +14 -20
- data/vendor/datasketches-cpp/cpc/include/cpc_compressor.hpp +7 -4
- data/vendor/datasketches-cpp/cpc/include/cpc_compressor_impl.hpp +17 -17
- data/vendor/datasketches-cpp/cpc/include/cpc_sketch.hpp +40 -40
- data/vendor/datasketches-cpp/cpc/include/cpc_sketch_impl.hpp +13 -10
- data/vendor/datasketches-cpp/cpc/include/cpc_union.hpp +35 -11
- data/vendor/datasketches-cpp/cpc/include/cpc_union_impl.hpp +8 -8
- data/vendor/datasketches-cpp/cpc/include/u32_table.hpp +3 -2
- data/vendor/datasketches-cpp/cpc/include/u32_table_impl.hpp +5 -5
- data/vendor/datasketches-cpp/cpc/test/CMakeLists.txt +20 -7
- data/vendor/datasketches-cpp/cpc/test/cpc_sketch_deserialize_from_java_test.cpp +60 -0
- data/vendor/datasketches-cpp/{python/include/py_object_lt.hpp → cpc/test/cpc_sketch_serialize_for_java.cpp} +15 -14
- data/vendor/datasketches-cpp/cpc/test/cpc_sketch_test.cpp +4 -29
- data/vendor/datasketches-cpp/cpc/test/cpc_union_test.cpp +4 -4
- data/vendor/datasketches-cpp/density/include/density_sketch.hpp +29 -9
- data/vendor/datasketches-cpp/density/include/density_sketch_impl.hpp +1 -1
- data/vendor/datasketches-cpp/density/test/CMakeLists.txt +0 -1
- data/vendor/datasketches-cpp/fi/include/frequent_items_sketch.hpp +21 -9
- data/vendor/datasketches-cpp/fi/include/frequent_items_sketch_impl.hpp +6 -4
- data/vendor/datasketches-cpp/fi/test/CMakeLists.txt +14 -1
- data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_deserialize_from_java_test.cpp +95 -0
- data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_serialize_for_java.cpp +83 -0
- data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_test.cpp +3 -42
- data/vendor/datasketches-cpp/hll/include/CouponList-internal.hpp +2 -2
- data/vendor/datasketches-cpp/hll/include/CouponList.hpp +3 -1
- data/vendor/datasketches-cpp/hll/include/HllArray-internal.hpp +3 -3
- data/vendor/datasketches-cpp/hll/include/HllArray.hpp +5 -3
- data/vendor/datasketches-cpp/hll/include/HllSketch-internal.hpp +4 -4
- data/vendor/datasketches-cpp/hll/include/HllSketchImpl.hpp +3 -1
- data/vendor/datasketches-cpp/hll/include/HllUtil.hpp +0 -12
- data/vendor/datasketches-cpp/hll/include/hll.hpp +70 -57
- data/vendor/datasketches-cpp/hll/test/CMakeLists.txt +14 -1
- data/vendor/datasketches-cpp/hll/test/ToFromByteArrayTest.cpp +0 -68
- data/vendor/datasketches-cpp/hll/test/hll_sketch_deserialize_from_java_test.cpp +69 -0
- data/vendor/datasketches-cpp/hll/test/hll_sketch_serialize_for_java.cpp +52 -0
- data/vendor/datasketches-cpp/kll/include/kll_helper_impl.hpp +2 -2
- data/vendor/datasketches-cpp/kll/include/kll_sketch.hpp +71 -50
- data/vendor/datasketches-cpp/kll/include/kll_sketch_impl.hpp +59 -130
- data/vendor/datasketches-cpp/kll/test/CMakeLists.txt +14 -1
- data/vendor/datasketches-cpp/kll/test/kll_sketch_deserialize_from_java_test.cpp +103 -0
- data/vendor/datasketches-cpp/kll/test/kll_sketch_serialize_for_java.cpp +62 -0
- data/vendor/datasketches-cpp/kll/test/kll_sketch_test.cpp +3 -38
- data/vendor/datasketches-cpp/quantiles/include/quantiles_sketch.hpp +68 -51
- data/vendor/datasketches-cpp/quantiles/include/quantiles_sketch_impl.hpp +62 -132
- data/vendor/datasketches-cpp/quantiles/test/CMakeLists.txt +14 -1
- data/vendor/datasketches-cpp/quantiles/test/quantiles_sketch_deserialize_from_java_test.cpp +84 -0
- data/vendor/datasketches-cpp/quantiles/test/quantiles_sketch_serialize_for_java.cpp +52 -0
- data/vendor/datasketches-cpp/quantiles/test/quantiles_sketch_test.cpp +14 -38
- data/vendor/datasketches-cpp/req/include/req_common.hpp +7 -3
- data/vendor/datasketches-cpp/req/include/req_compactor_impl.hpp +2 -2
- data/vendor/datasketches-cpp/req/include/req_sketch.hpp +97 -23
- data/vendor/datasketches-cpp/req/include/req_sketch_impl.hpp +48 -109
- data/vendor/datasketches-cpp/req/test/CMakeLists.txt +14 -1
- data/vendor/datasketches-cpp/req/test/req_sketch_deserialize_from_java_test.cpp +55 -0
- data/vendor/datasketches-cpp/{tuple/include/array_of_doubles_intersection_impl.hpp → req/test/req_sketch_serialize_for_java.cpp} +12 -7
- data/vendor/datasketches-cpp/req/test/req_sketch_test.cpp +3 -89
- data/vendor/datasketches-cpp/sampling/CMakeLists.txt +4 -0
- data/vendor/datasketches-cpp/sampling/include/ebpps_sample.hpp +210 -0
- data/vendor/datasketches-cpp/sampling/include/ebpps_sample_impl.hpp +535 -0
- data/vendor/datasketches-cpp/sampling/include/ebpps_sketch.hpp +281 -0
- data/vendor/datasketches-cpp/sampling/include/ebpps_sketch_impl.hpp +531 -0
- data/vendor/datasketches-cpp/sampling/include/var_opt_sketch.hpp +69 -26
- data/vendor/datasketches-cpp/sampling/include/var_opt_sketch_impl.hpp +3 -3
- data/vendor/datasketches-cpp/sampling/include/var_opt_union.hpp +10 -11
- data/vendor/datasketches-cpp/sampling/include/var_opt_union_impl.hpp +4 -4
- data/vendor/datasketches-cpp/sampling/test/CMakeLists.txt +55 -8
- data/vendor/datasketches-cpp/sampling/test/ebpps_allocation_test.cpp +96 -0
- data/vendor/datasketches-cpp/sampling/test/ebpps_sample_test.cpp +137 -0
- data/vendor/datasketches-cpp/sampling/test/ebpps_sketch_test.cpp +266 -0
- data/vendor/datasketches-cpp/sampling/test/var_opt_sketch_deserialize_from_java_test.cpp +81 -0
- data/vendor/datasketches-cpp/sampling/test/var_opt_sketch_serialize_for_java.cpp +54 -0
- data/vendor/datasketches-cpp/sampling/test/var_opt_sketch_test.cpp +0 -37
- data/vendor/datasketches-cpp/sampling/test/var_opt_union_deserialize_from_java_test.cpp +50 -0
- data/vendor/datasketches-cpp/sampling/test/var_opt_union_serialize_for_java.cpp +56 -0
- data/vendor/datasketches-cpp/sampling/test/var_opt_union_test.cpp +0 -18
- data/vendor/datasketches-cpp/theta/include/bit_packing.hpp +2608 -2608
- data/vendor/datasketches-cpp/theta/include/bounds_on_ratios_in_sampled_sets.hpp +1 -0
- data/vendor/datasketches-cpp/theta/include/bounds_on_ratios_in_theta_sketched_sets.hpp +7 -6
- data/vendor/datasketches-cpp/theta/include/theta_a_not_b.hpp +20 -5
- data/vendor/datasketches-cpp/theta/include/theta_constants.hpp +10 -4
- data/vendor/datasketches-cpp/theta/include/theta_helpers.hpp +1 -1
- data/vendor/datasketches-cpp/theta/include/theta_intersection.hpp +13 -5
- data/vendor/datasketches-cpp/theta/include/theta_intersection_base_impl.hpp +5 -5
- data/vendor/datasketches-cpp/theta/include/theta_intersection_impl.hpp +3 -3
- data/vendor/datasketches-cpp/theta/include/theta_jaccard_similarity.hpp +2 -1
- data/vendor/datasketches-cpp/theta/include/theta_jaccard_similarity_base.hpp +1 -0
- data/vendor/datasketches-cpp/theta/include/theta_set_difference_base_impl.hpp +1 -1
- data/vendor/datasketches-cpp/theta/include/theta_sketch.hpp +126 -27
- data/vendor/datasketches-cpp/theta/include/theta_sketch_impl.hpp +8 -8
- data/vendor/datasketches-cpp/theta/include/theta_union.hpp +17 -10
- data/vendor/datasketches-cpp/theta/include/theta_union_base_impl.hpp +1 -1
- data/vendor/datasketches-cpp/theta/include/theta_union_impl.hpp +3 -3
- data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base.hpp +5 -2
- data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base_impl.hpp +11 -1
- data/vendor/datasketches-cpp/theta/test/CMakeLists.txt +14 -1
- data/vendor/datasketches-cpp/theta/test/theta_sketch_deserialize_from_java_test.cpp +57 -0
- data/vendor/datasketches-cpp/theta/test/theta_sketch_serialize_for_java.cpp +61 -0
- data/vendor/datasketches-cpp/theta/test/theta_sketch_test.cpp +0 -188
- data/vendor/datasketches-cpp/tuple/CMakeLists.txt +8 -7
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_sketch.hpp +19 -144
- data/vendor/datasketches-cpp/tuple/include/{array_of_doubles_a_not_b.hpp → array_tuple_a_not_b.hpp} +24 -16
- data/vendor/datasketches-cpp/tuple/include/{array_of_doubles_a_not_b_impl.hpp → array_tuple_a_not_b_impl.hpp} +4 -4
- data/vendor/datasketches-cpp/tuple/include/array_tuple_intersection.hpp +65 -0
- data/vendor/datasketches-cpp/{python/include/py_object_ostream.hpp → tuple/include/array_tuple_intersection_impl.hpp} +7 -24
- data/vendor/datasketches-cpp/tuple/include/array_tuple_sketch.hpp +237 -0
- data/vendor/datasketches-cpp/tuple/include/{array_of_doubles_sketch_impl.hpp → array_tuple_sketch_impl.hpp} +40 -41
- data/vendor/datasketches-cpp/tuple/include/array_tuple_union.hpp +81 -0
- data/vendor/datasketches-cpp/tuple/include/array_tuple_union_impl.hpp +43 -0
- data/vendor/datasketches-cpp/tuple/include/tuple_a_not_b.hpp +11 -2
- data/vendor/datasketches-cpp/tuple/include/tuple_intersection.hpp +17 -10
- data/vendor/datasketches-cpp/tuple/include/tuple_jaccard_similarity.hpp +2 -1
- data/vendor/datasketches-cpp/tuple/include/tuple_sketch.hpp +95 -32
- data/vendor/datasketches-cpp/tuple/include/tuple_union.hpp +19 -11
- data/vendor/datasketches-cpp/tuple/test/CMakeLists.txt +16 -1
- data/vendor/datasketches-cpp/tuple/test/aod_sketch_deserialize_from_java_test.cpp +76 -0
- data/vendor/datasketches-cpp/tuple/test/aod_sketch_serialize_for_java.cpp +62 -0
- data/vendor/datasketches-cpp/tuple/test/array_of_doubles_sketch_test.cpp +5 -129
- data/vendor/datasketches-cpp/tuple/test/engagement_test.cpp +85 -89
- data/vendor/datasketches-cpp/tuple/test/tuple_jaccard_similarity_test.cpp +3 -1
- data/vendor/datasketches-cpp/tuple/test/tuple_sketch_deserialize_from_java_test.cpp +47 -0
- data/vendor/datasketches-cpp/tuple/test/tuple_sketch_serialize_for_java.cpp +38 -0
- data/vendor/datasketches-cpp/tuple/test/tuple_sketch_test.cpp +1 -1
- data/vendor/datasketches-cpp/version.cfg.in +1 -1
- metadata +47 -93
- data/vendor/datasketches-cpp/MANIFEST.in +0 -39
- data/vendor/datasketches-cpp/fi/test/items_sketch_string_from_java.sk +0 -0
- data/vendor/datasketches-cpp/fi/test/items_sketch_string_utf8_from_java.sk +0 -0
- data/vendor/datasketches-cpp/fi/test/longs_sketch_from_java.sk +0 -0
- data/vendor/datasketches-cpp/hll/test/array6_from_java.sk +0 -0
- data/vendor/datasketches-cpp/hll/test/compact_array4_from_java.sk +0 -0
- data/vendor/datasketches-cpp/hll/test/compact_set_from_java.sk +0 -0
- data/vendor/datasketches-cpp/hll/test/list_from_java.sk +0 -0
- data/vendor/datasketches-cpp/hll/test/updatable_array4_from_java.sk +0 -0
- data/vendor/datasketches-cpp/hll/test/updatable_set_from_java.sk +0 -0
- data/vendor/datasketches-cpp/kll/test/kll_sketch_from_java.sk +0 -0
- data/vendor/datasketches-cpp/pyproject.toml +0 -23
- data/vendor/datasketches-cpp/python/CMakeLists.txt +0 -87
- data/vendor/datasketches-cpp/python/README.md +0 -85
- data/vendor/datasketches-cpp/python/datasketches/DensityWrapper.py +0 -87
- data/vendor/datasketches-cpp/python/datasketches/KernelFunction.py +0 -35
- data/vendor/datasketches-cpp/python/datasketches/PySerDe.py +0 -110
- data/vendor/datasketches-cpp/python/datasketches/TuplePolicy.py +0 -77
- data/vendor/datasketches-cpp/python/datasketches/TupleWrapper.py +0 -205
- data/vendor/datasketches-cpp/python/datasketches/__init__.py +0 -38
- data/vendor/datasketches-cpp/python/include/kernel_function.hpp +0 -98
- data/vendor/datasketches-cpp/python/include/py_serde.hpp +0 -113
- data/vendor/datasketches-cpp/python/include/quantile_conditional.hpp +0 -104
- data/vendor/datasketches-cpp/python/include/tuple_policy.hpp +0 -136
- data/vendor/datasketches-cpp/python/jupyter/CPCSketch.ipynb +0 -345
- data/vendor/datasketches-cpp/python/jupyter/FrequentItemsSketch.ipynb +0 -354
- data/vendor/datasketches-cpp/python/jupyter/HLLSketch.ipynb +0 -346
- data/vendor/datasketches-cpp/python/jupyter/KLLSketch.ipynb +0 -463
- data/vendor/datasketches-cpp/python/jupyter/ThetaSketchNotebook.ipynb +0 -403
- data/vendor/datasketches-cpp/python/pybind11Path.cmd +0 -21
- data/vendor/datasketches-cpp/python/src/__init__.py +0 -18
- data/vendor/datasketches-cpp/python/src/count_wrapper.cpp +0 -101
- data/vendor/datasketches-cpp/python/src/cpc_wrapper.cpp +0 -76
- data/vendor/datasketches-cpp/python/src/datasketches.cpp +0 -58
- data/vendor/datasketches-cpp/python/src/density_wrapper.cpp +0 -95
- data/vendor/datasketches-cpp/python/src/fi_wrapper.cpp +0 -182
- data/vendor/datasketches-cpp/python/src/hll_wrapper.cpp +0 -126
- data/vendor/datasketches-cpp/python/src/kll_wrapper.cpp +0 -158
- data/vendor/datasketches-cpp/python/src/ks_wrapper.cpp +0 -68
- data/vendor/datasketches-cpp/python/src/py_serde.cpp +0 -112
- data/vendor/datasketches-cpp/python/src/quantiles_wrapper.cpp +0 -155
- data/vendor/datasketches-cpp/python/src/req_wrapper.cpp +0 -154
- data/vendor/datasketches-cpp/python/src/theta_wrapper.cpp +0 -166
- data/vendor/datasketches-cpp/python/src/tuple_wrapper.cpp +0 -215
- data/vendor/datasketches-cpp/python/src/vector_of_kll.cpp +0 -490
- data/vendor/datasketches-cpp/python/src/vo_wrapper.cpp +0 -173
- data/vendor/datasketches-cpp/python/tests/__init__.py +0 -16
- data/vendor/datasketches-cpp/python/tests/count_min_test.py +0 -86
- data/vendor/datasketches-cpp/python/tests/cpc_test.py +0 -64
- data/vendor/datasketches-cpp/python/tests/density_test.py +0 -93
- data/vendor/datasketches-cpp/python/tests/fi_test.py +0 -149
- data/vendor/datasketches-cpp/python/tests/hll_test.py +0 -129
- data/vendor/datasketches-cpp/python/tests/kll_test.py +0 -159
- data/vendor/datasketches-cpp/python/tests/quantiles_test.py +0 -160
- data/vendor/datasketches-cpp/python/tests/req_test.py +0 -159
- data/vendor/datasketches-cpp/python/tests/theta_test.py +0 -148
- data/vendor/datasketches-cpp/python/tests/tuple_test.py +0 -206
- data/vendor/datasketches-cpp/python/tests/vector_of_kll_test.py +0 -148
- data/vendor/datasketches-cpp/python/tests/vo_test.py +0 -132
- data/vendor/datasketches-cpp/req/test/req_float_empty_from_java.sk +0 -0
- data/vendor/datasketches-cpp/req/test/req_float_estimation_from_java.sk +0 -0
- data/vendor/datasketches-cpp/req/test/req_float_exact_from_java.sk +0 -0
- data/vendor/datasketches-cpp/req/test/req_float_raw_items_from_java.sk +0 -0
- data/vendor/datasketches-cpp/req/test/req_float_single_item_from_java.sk +0 -0
- data/vendor/datasketches-cpp/sampling/test/binaries_from_java.txt +0 -67
- data/vendor/datasketches-cpp/sampling/test/varopt_sketch_long_sampling.sk +0 -0
- data/vendor/datasketches-cpp/sampling/test/varopt_sketch_string_exact.sk +0 -0
- data/vendor/datasketches-cpp/sampling/test/varopt_union_double_sampling.sk +0 -0
- data/vendor/datasketches-cpp/setup.py +0 -110
- data/vendor/datasketches-cpp/theta/test/theta_compact_empty_from_java.sk +0 -0
- data/vendor/datasketches-cpp/theta/test/theta_compact_estimation_from_java.sk +0 -0
- data/vendor/datasketches-cpp/theta/test/theta_compact_exact_from_java.sk +0 -0
- data/vendor/datasketches-cpp/theta/test/theta_compact_single_item_from_java.sk +0 -0
- data/vendor/datasketches-cpp/tox.ini +0 -26
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_intersection.hpp +0 -52
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_union.hpp +0 -81
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_union_impl.hpp +0 -43
- data/vendor/datasketches-cpp/tuple/test/aod_1_compact_empty_from_java.sk +0 -1
- data/vendor/datasketches-cpp/tuple/test/aod_1_compact_estimation_from_java.sk +0 -0
- data/vendor/datasketches-cpp/tuple/test/aod_1_compact_non_empty_no_entries_from_java.sk +0 -0
- data/vendor/datasketches-cpp/tuple/test/aod_2_compact_exact_from_java.sk +0 -0
- data/vendor/datasketches-cpp/tuple/test/aod_3_compact_empty_from_java.sk +0 -1
|
@@ -0,0 +1,266 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* Licensed to the Apache Software Foundation (ASF) under one
|
|
3
|
+
* or more contributor license agreements. See the NOTICE file
|
|
4
|
+
* distributed with this work for additional information
|
|
5
|
+
* regarding copyright ownership. The ASF licenses this file
|
|
6
|
+
* to you under the Apache License, Version 2.0 (the
|
|
7
|
+
* "License"); you may not use this file except in compliance
|
|
8
|
+
* with the License. You may obtain a copy of the License at
|
|
9
|
+
*
|
|
10
|
+
* http://www.apache.org/licenses/LICENSE-2.0
|
|
11
|
+
*
|
|
12
|
+
* Unless required by applicable law or agreed to in writing,
|
|
13
|
+
* software distributed under the License is distributed on an
|
|
14
|
+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
15
|
+
* KIND, either express or implied. See the License for the
|
|
16
|
+
* specific language governing permissions and limitations
|
|
17
|
+
* under the License.
|
|
18
|
+
*/
|
|
19
|
+
|
|
20
|
+
#include <ebpps_sketch.hpp>
|
|
21
|
+
|
|
22
|
+
#include <catch2/catch.hpp>
|
|
23
|
+
|
|
24
|
+
#include <vector>
|
|
25
|
+
#include <string>
|
|
26
|
+
#include <sstream>
|
|
27
|
+
#include <fstream>
|
|
28
|
+
#include <cmath>
|
|
29
|
+
#include <random>
|
|
30
|
+
#include <stdexcept>
|
|
31
|
+
|
|
32
|
+
#ifdef TEST_BINARY_INPUT_PATH
|
|
33
|
+
static std::string testBinaryInputPath = TEST_BINARY_INPUT_PATH;
|
|
34
|
+
#else
|
|
35
|
+
static std::string testBinaryInputPath = "test/";
|
|
36
|
+
#endif
|
|
37
|
+
|
|
38
|
+
namespace datasketches {
|
|
39
|
+
|
|
40
|
+
static constexpr double EPS = 1e-13;
|
|
41
|
+
|
|
42
|
+
static ebpps_sketch<int> create_unweighted_sketch(uint32_t k, uint64_t n) {
|
|
43
|
+
ebpps_sketch<int> sk(k);
|
|
44
|
+
for (uint64_t i = 0; i < n; ++i) {
|
|
45
|
+
sk.update(static_cast<int>(i), 1.0);
|
|
46
|
+
}
|
|
47
|
+
return sk;
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
template<typename T, typename A>
|
|
51
|
+
static void check_if_equal(ebpps_sketch<T, A>& sk1, ebpps_sketch<T, A>& sk2) {
|
|
52
|
+
REQUIRE(sk1.get_k() == sk2.get_k());
|
|
53
|
+
REQUIRE(sk1.get_n() == sk2.get_n());
|
|
54
|
+
REQUIRE(sk1.get_c() == sk2.get_c());
|
|
55
|
+
REQUIRE(sk1.get_cumulative_weight() == sk2.get_cumulative_weight());
|
|
56
|
+
|
|
57
|
+
auto it1 = sk1.begin();
|
|
58
|
+
auto it2 = sk2.begin();
|
|
59
|
+
size_t count = 0;
|
|
60
|
+
|
|
61
|
+
while ((it1 != sk1.end()) && (it2 != sk2.end())) {
|
|
62
|
+
REQUIRE(*it1 == *it2);
|
|
63
|
+
++it1;
|
|
64
|
+
++it2;
|
|
65
|
+
++count;
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
REQUIRE(((count == std::floor(sk1.get_c())) || (count == std::ceil(sk1.get_c()))));
|
|
69
|
+
|
|
70
|
+
// if c != floor(c) one sketch may not have reached the end,
|
|
71
|
+
// but that's not testable from the external API
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
TEST_CASE("ebpps sketch: invalid k", "[ebpps_sketch]") {
|
|
75
|
+
REQUIRE_THROWS_AS(ebpps_sketch<int>(0), std::invalid_argument);
|
|
76
|
+
REQUIRE_THROWS_AS(ebpps_sketch<int>(ebpps_constants::MAX_K + 1), std::invalid_argument);
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
TEST_CASE("ebpps sketch: invalid weights", "[ebpps_sketch]") {
|
|
80
|
+
uint32_t k = 100;
|
|
81
|
+
ebpps_sketch<int> sk = create_unweighted_sketch(k, 3);
|
|
82
|
+
REQUIRE(sk.get_n() == 3);
|
|
83
|
+
REQUIRE(sk.get_cumulative_weight() == 3.0);
|
|
84
|
+
sk.update(-1, 0.0); // no-op
|
|
85
|
+
REQUIRE(sk.get_n() == 3);
|
|
86
|
+
REQUIRE(sk.get_cumulative_weight() == 3.0);
|
|
87
|
+
|
|
88
|
+
REQUIRE_THROWS_AS(sk.update(-2, -1.0), std::invalid_argument);
|
|
89
|
+
|
|
90
|
+
ebpps_sketch<float> sk2(k);
|
|
91
|
+
REQUIRE_THROWS_AS(sk2.update(-2, std::numeric_limits<float>::infinity()), std::invalid_argument);
|
|
92
|
+
REQUIRE_THROWS_AS(sk2.update(-2, nanf("")), std::invalid_argument);
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
TEST_CASE("ebpps sketch: insert items", "[ebpps_sketch]") {
|
|
96
|
+
size_t n = 0;
|
|
97
|
+
uint32_t k = 5;
|
|
98
|
+
ebpps_sketch<int> sk = create_unweighted_sketch(k, n);
|
|
99
|
+
REQUIRE(sk.get_allocator() == std::allocator<int>());
|
|
100
|
+
REQUIRE(sk.get_k() == k);
|
|
101
|
+
REQUIRE(sk.get_n() == 0);
|
|
102
|
+
REQUIRE(sk.get_c() == 0.0);
|
|
103
|
+
REQUIRE(sk.get_cumulative_weight() == 0.0);
|
|
104
|
+
REQUIRE(sk.is_empty());
|
|
105
|
+
|
|
106
|
+
n = k;
|
|
107
|
+
sk = create_unweighted_sketch(k, n);
|
|
108
|
+
REQUIRE_FALSE(sk.is_empty());
|
|
109
|
+
REQUIRE(sk.get_n() == n);
|
|
110
|
+
REQUIRE(sk.get_cumulative_weight() == static_cast<double>(n));
|
|
111
|
+
for (int val : sk.get_result())
|
|
112
|
+
REQUIRE(val < static_cast<int>(n));
|
|
113
|
+
|
|
114
|
+
n = k * 10;
|
|
115
|
+
sk = create_unweighted_sketch(k, n);
|
|
116
|
+
REQUIRE_FALSE(sk.is_empty());
|
|
117
|
+
REQUIRE(sk.get_n() == n);
|
|
118
|
+
REQUIRE(sk.get_cumulative_weight() == static_cast<double>(n));
|
|
119
|
+
|
|
120
|
+
auto result = sk.get_result();
|
|
121
|
+
REQUIRE(result.size() == sk.get_k()); // uniform weights so should be exactly k
|
|
122
|
+
for (int val : sk.get_result())
|
|
123
|
+
REQUIRE(val < static_cast<int>(n));
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
TEST_CASE("ebpps sketch: serialize/deserialize string", "[ebpps_sketch]") {
|
|
127
|
+
// since C <= k we don't have the usual sketch notion of exact vs estimation
|
|
128
|
+
// mode at any time. The only real serializaiton cases are empty and non-empty
|
|
129
|
+
// with and without a partial item
|
|
130
|
+
uint32_t k = 10;
|
|
131
|
+
ebpps_sketch<std::string> sk(k);
|
|
132
|
+
|
|
133
|
+
// empty
|
|
134
|
+
auto bytes = sk.serialize();
|
|
135
|
+
REQUIRE(bytes.size() == sk.get_serialized_size_bytes());
|
|
136
|
+
REQUIRE_THROWS_AS(ebpps_sketch<std::string>::deserialize(bytes.data(), bytes.size()-1), std::out_of_range);
|
|
137
|
+
auto sk_bytes = ebpps_sketch<std::string>::deserialize(bytes.data(), bytes.size());
|
|
138
|
+
check_if_equal(sk, sk_bytes);
|
|
139
|
+
|
|
140
|
+
std::stringstream ss(std::ios::in | std::ios::out | std::ios::binary);
|
|
141
|
+
sk.serialize(ss);
|
|
142
|
+
auto sk_stream = ebpps_sketch<std::string>::deserialize(ss);
|
|
143
|
+
check_if_equal(sk, sk_stream);
|
|
144
|
+
check_if_equal(sk_bytes, sk_stream); // should be redundant
|
|
145
|
+
|
|
146
|
+
for (uint32_t i = 0; i < k; ++i)
|
|
147
|
+
sk.update(std::to_string(i));
|
|
148
|
+
|
|
149
|
+
// non-empty, no partial item
|
|
150
|
+
bytes = sk.serialize();
|
|
151
|
+
REQUIRE(bytes.size() == sk.get_serialized_size_bytes());
|
|
152
|
+
REQUIRE_THROWS_AS(ebpps_sketch<std::string>::deserialize(bytes.data(), bytes.size()-1), std::out_of_range);
|
|
153
|
+
sk_bytes = ebpps_sketch<std::string>::deserialize(bytes.data(), bytes.size());
|
|
154
|
+
check_if_equal(sk, sk_bytes);
|
|
155
|
+
|
|
156
|
+
ss.str("");
|
|
157
|
+
sk.serialize(ss);
|
|
158
|
+
sk_stream = ebpps_sketch<std::string>::deserialize(ss);
|
|
159
|
+
check_if_equal(sk, sk_stream);
|
|
160
|
+
check_if_equal(sk_bytes, sk_stream); // should be redundant
|
|
161
|
+
|
|
162
|
+
// non-empty with partial item
|
|
163
|
+
sk.update(std::to_string(2 * k), 2.5);
|
|
164
|
+
REQUIRE(sk.get_cumulative_weight() == Approx(k + 2.5).margin(EPS));
|
|
165
|
+
bytes = sk.serialize();
|
|
166
|
+
REQUIRE(bytes.size() == sk.get_serialized_size_bytes());
|
|
167
|
+
REQUIRE_THROWS_AS(ebpps_sketch<std::string>::deserialize(bytes.data(), bytes.size()-1), std::out_of_range);
|
|
168
|
+
sk_bytes = ebpps_sketch<std::string>::deserialize(bytes.data(), bytes.size());
|
|
169
|
+
check_if_equal(sk, sk_bytes);
|
|
170
|
+
|
|
171
|
+
ss.str("");
|
|
172
|
+
sk.serialize(ss);
|
|
173
|
+
sk_stream = ebpps_sketch<std::string>::deserialize(ss);
|
|
174
|
+
check_if_equal(sk, sk_stream);
|
|
175
|
+
check_if_equal(sk_bytes, sk_stream); // should be redundant
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
TEST_CASE("ebpps sketch: serialize/deserialize ints", "[ebpps_sketch]") {
|
|
179
|
+
uint32_t k = 10;
|
|
180
|
+
ebpps_sketch<uint32_t> sk(k);
|
|
181
|
+
|
|
182
|
+
for (uint32_t i = 0; i < k; ++i)
|
|
183
|
+
sk.update(i);
|
|
184
|
+
sk.update(2 * k, 3.5);
|
|
185
|
+
REQUIRE(sk.get_cumulative_weight() == Approx(k + 3.5).margin(EPS));
|
|
186
|
+
|
|
187
|
+
auto bytes = sk.serialize();
|
|
188
|
+
REQUIRE(bytes.size() == sk.get_serialized_size_bytes());
|
|
189
|
+
REQUIRE_THROWS_AS(ebpps_sketch<uint32_t>::deserialize(bytes.data(), bytes.size()-1), std::out_of_range);
|
|
190
|
+
auto sk_bytes = ebpps_sketch<uint32_t>::deserialize(bytes.data(), bytes.size());
|
|
191
|
+
check_if_equal(sk, sk_bytes);
|
|
192
|
+
|
|
193
|
+
std::stringstream ss(std::ios::in | std::ios::out | std::ios::binary);
|
|
194
|
+
sk.serialize(ss);
|
|
195
|
+
auto sk_stream = ebpps_sketch<uint32_t>::deserialize(ss);
|
|
196
|
+
check_if_equal(sk, sk_stream);
|
|
197
|
+
check_if_equal(sk_bytes, sk_stream); // should be redundant
|
|
198
|
+
|
|
199
|
+
sk.reset();
|
|
200
|
+
REQUIRE(sk.get_k() == k);
|
|
201
|
+
REQUIRE(sk.get_n() == 0);
|
|
202
|
+
REQUIRE(sk.get_c() == 0.0);
|
|
203
|
+
REQUIRE(sk.get_cumulative_weight() == 0.0);
|
|
204
|
+
REQUIRE(sk.is_empty());
|
|
205
|
+
}
|
|
206
|
+
|
|
207
|
+
TEST_CASE("ebpps sketch: merge large into small", "[ebpps_sketch]") {
|
|
208
|
+
uint32_t k = 100;
|
|
209
|
+
|
|
210
|
+
// lvalue merge
|
|
211
|
+
ebpps_sketch<int> sk1(k / 2);
|
|
212
|
+
sk1.update(-1, k / 10.0); // one heavy item, but less than sk2 weight
|
|
213
|
+
ebpps_sketch<int> sk2 = create_unweighted_sketch(k, k);
|
|
214
|
+
|
|
215
|
+
sk1.merge(sk2);
|
|
216
|
+
REQUIRE(sk1.get_k() == k / 2);
|
|
217
|
+
REQUIRE(sk1.get_n() == k + 1);
|
|
218
|
+
REQUIRE(sk1.get_c() < k);
|
|
219
|
+
REQUIRE(sk1.get_cumulative_weight() == Approx(1.1 * k).margin(EPS));
|
|
220
|
+
|
|
221
|
+
// rvalue merge
|
|
222
|
+
sk1 = create_unweighted_sketch(k / 2, 0);
|
|
223
|
+
sk1.update(-1, k / 4.0);
|
|
224
|
+
sk1.update(-2, k / 8.0);
|
|
225
|
+
// sk2 should have been unchaged
|
|
226
|
+
REQUIRE(sk2.get_n() == k);
|
|
227
|
+
REQUIRE(sk2.get_c() == Approx(k).margin(EPS));
|
|
228
|
+
|
|
229
|
+
sk1.merge(std::move(sk2));
|
|
230
|
+
REQUIRE(sk1.get_k() == k / 2);
|
|
231
|
+
REQUIRE(sk1.get_n() == k + 2);
|
|
232
|
+
REQUIRE(sk1.get_c() < k);
|
|
233
|
+
// cumulative weight is now (1.5 + 0.2) k
|
|
234
|
+
REQUIRE(sk1.get_cumulative_weight() == Approx(1.375 * k).margin(EPS));
|
|
235
|
+
}
|
|
236
|
+
|
|
237
|
+
TEST_CASE("ebpps sketch: merge small into large", "[ebpps_sketch]") {
|
|
238
|
+
uint32_t k = 100;
|
|
239
|
+
|
|
240
|
+
// lvalue merge
|
|
241
|
+
ebpps_sketch<int> sk1 = create_unweighted_sketch(k, k);
|
|
242
|
+
ebpps_sketch<int> sk2(k / 2);
|
|
243
|
+
sk2.update(-1, k / 10.0); // one heavy item, but less than sk1 weight
|
|
244
|
+
|
|
245
|
+
sk1.merge(sk2);
|
|
246
|
+
REQUIRE(sk1.get_k() == k / 2);
|
|
247
|
+
REQUIRE(sk1.get_n() == k + 1);
|
|
248
|
+
REQUIRE(sk1.get_c() < k);
|
|
249
|
+
REQUIRE(sk1.get_cumulative_weight() == Approx(1.1 * k).margin(EPS));
|
|
250
|
+
|
|
251
|
+
// rvalue merge
|
|
252
|
+
sk1 = create_unweighted_sketch(k, 3 * k / 2);
|
|
253
|
+
// sk2 should have been unchaged
|
|
254
|
+
REQUIRE(sk2.get_n() == 1);
|
|
255
|
+
REQUIRE(sk2.get_c() == 1.0);
|
|
256
|
+
sk2.update(-2, k / 10.0);
|
|
257
|
+
|
|
258
|
+
sk1.merge(std::move(sk2));
|
|
259
|
+
REQUIRE(sk1.get_k() == k / 2);
|
|
260
|
+
REQUIRE(sk1.get_n() == (3 * k / 2) + 2);
|
|
261
|
+
REQUIRE(sk1.get_c() < k);
|
|
262
|
+
// cumulative weight is now (1.5 + 0.2) k
|
|
263
|
+
REQUIRE(sk1.get_cumulative_weight() == Approx(1.7 * k).margin(EPS));
|
|
264
|
+
}
|
|
265
|
+
|
|
266
|
+
}
|
|
@@ -0,0 +1,81 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* Licensed to the Apache Software Foundation (ASF) under one
|
|
3
|
+
* or more contributor license agreements. See the NOTICE file
|
|
4
|
+
* distributed with this work for additional information
|
|
5
|
+
* regarding copyright ownership. The ASF licenses this file
|
|
6
|
+
* to you under the Apache License, Version 2.0 (the
|
|
7
|
+
* "License"); you may not use this file except in compliance
|
|
8
|
+
* with the License. You may obtain a copy of the License at
|
|
9
|
+
*
|
|
10
|
+
* http://www.apache.org/licenses/LICENSE-2.0
|
|
11
|
+
*
|
|
12
|
+
* Unless required by applicable law or agreed to in writing,
|
|
13
|
+
* software distributed under the License is distributed on an
|
|
14
|
+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
15
|
+
* KIND, either express or implied. See the License for the
|
|
16
|
+
* specific language governing permissions and limitations
|
|
17
|
+
* under the License.
|
|
18
|
+
*/
|
|
19
|
+
|
|
20
|
+
#include <catch2/catch.hpp>
|
|
21
|
+
#include <fstream>
|
|
22
|
+
#include <var_opt_sketch.hpp>
|
|
23
|
+
|
|
24
|
+
namespace datasketches {
|
|
25
|
+
|
|
26
|
+
// assume the binary sketches for this test have been generated by datasketches-java code
|
|
27
|
+
// in the subdirectory called "java" in the root directory of this project
|
|
28
|
+
static std::string testBinaryInputPath = std::string(TEST_BINARY_INPUT_PATH) + "../../java/";
|
|
29
|
+
|
|
30
|
+
TEST_CASE("var opt sketch long", "[serde_compat]") {
|
|
31
|
+
const unsigned n_arr[] = {0, 1, 10, 100, 1000, 10000, 100000, 1000000};
|
|
32
|
+
for (const unsigned n: n_arr) {
|
|
33
|
+
std::ifstream is;
|
|
34
|
+
is.exceptions(std::ios::failbit | std::ios::badbit);
|
|
35
|
+
is.open(testBinaryInputPath + "varopt_sketch_long_n" + std::to_string(n) + "_java.sk", std::ios::binary);
|
|
36
|
+
const auto sketch = var_opt_sketch<long>::deserialize(is);
|
|
37
|
+
REQUIRE(sketch.is_empty() == (n == 0));
|
|
38
|
+
REQUIRE(sketch.get_num_samples() == (n > 10 ? 32 : n));
|
|
39
|
+
}
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
TEST_CASE("var opt sketch: deserialize exact from java", "[serde_compat]") {
|
|
43
|
+
const double EPS = 1e-13;
|
|
44
|
+
std::ifstream is;
|
|
45
|
+
is.exceptions(std::ios::failbit | std::ios::badbit);
|
|
46
|
+
is.open(testBinaryInputPath + "varopt_sketch_string_exact_java.sk", std::ios::binary);
|
|
47
|
+
const auto sketch = var_opt_sketch<std::string>::deserialize(is);
|
|
48
|
+
REQUIRE_FALSE(sketch.is_empty());
|
|
49
|
+
REQUIRE(sketch.get_k() == 1024);
|
|
50
|
+
REQUIRE(sketch.get_n() == 200);
|
|
51
|
+
REQUIRE(sketch.get_num_samples() == 200);
|
|
52
|
+
const subset_summary ss = sketch.estimate_subset_sum([](std::string){ return true; });
|
|
53
|
+
|
|
54
|
+
double tgt_wt = 0.0;
|
|
55
|
+
for (int i = 1; i <= 200; ++i) { tgt_wt += 1000.0 / i; }
|
|
56
|
+
REQUIRE(ss.total_sketch_weight == Approx(tgt_wt).margin(EPS));
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
TEST_CASE("var opt sketch: deserialize sampling from java", "[serde_compat]") {
|
|
61
|
+
const double EPS = 1e-13;
|
|
62
|
+
std::ifstream is;
|
|
63
|
+
is.exceptions(std::ios::failbit | std::ios::badbit);
|
|
64
|
+
is.open(testBinaryInputPath + "varopt_sketch_long_sampling_java.sk", std::ios::binary);
|
|
65
|
+
const auto sketch = var_opt_sketch<int64_t>::deserialize(is);
|
|
66
|
+
REQUIRE_FALSE(sketch.is_empty());
|
|
67
|
+
REQUIRE(sketch.get_k() == 1024);
|
|
68
|
+
REQUIRE(sketch.get_n() == 2003);
|
|
69
|
+
REQUIRE(sketch.get_num_samples() == sketch.get_k());
|
|
70
|
+
subset_summary ss = sketch.estimate_subset_sum([](int64_t){ return true; });
|
|
71
|
+
REQUIRE(ss.estimate == Approx(332000.0).margin(EPS));
|
|
72
|
+
REQUIRE(ss.total_sketch_weight == Approx(332000.0).margin(EPS));
|
|
73
|
+
|
|
74
|
+
ss = sketch.estimate_subset_sum([](int64_t x){ return x < 0; });
|
|
75
|
+
REQUIRE(ss.estimate == 330000.0); // heavy item, weight is exact
|
|
76
|
+
|
|
77
|
+
ss = sketch.estimate_subset_sum([](int64_t x){ return x >= 0; });
|
|
78
|
+
REQUIRE(ss.estimate == Approx(2000.0).margin(EPS));
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
} /* namespace datasketches */
|
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* Licensed to the Apache Software Foundation (ASF) under one
|
|
3
|
+
* or more contributor license agreements. See the NOTICE file
|
|
4
|
+
* distributed with this work for additional information
|
|
5
|
+
* regarding copyright ownership. The ASF licenses this file
|
|
6
|
+
* to you under the Apache License, Version 2.0 (the
|
|
7
|
+
* "License"); you may not use this file except in compliance
|
|
8
|
+
* with the License. You may obtain a copy of the License at
|
|
9
|
+
*
|
|
10
|
+
* http://www.apache.org/licenses/LICENSE-2.0
|
|
11
|
+
*
|
|
12
|
+
* Unless required by applicable law or agreed to in writing,
|
|
13
|
+
* software distributed under the License is distributed on an
|
|
14
|
+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
15
|
+
* KIND, either express or implied. See the License for the
|
|
16
|
+
* specific language governing permissions and limitations
|
|
17
|
+
* under the License.
|
|
18
|
+
*/
|
|
19
|
+
|
|
20
|
+
#include <catch2/catch.hpp>
|
|
21
|
+
#include <fstream>
|
|
22
|
+
#include <var_opt_sketch.hpp>
|
|
23
|
+
|
|
24
|
+
namespace datasketches {
|
|
25
|
+
|
|
26
|
+
TEST_CASE("varopt sketch long generate", "[serialize_for_java]") {
|
|
27
|
+
const unsigned n_arr[] = {0, 1, 10, 100, 1000, 10000, 100000, 1000000};
|
|
28
|
+
for (const unsigned n: n_arr) {
|
|
29
|
+
var_opt_sketch<long> sketch(32);
|
|
30
|
+
for (unsigned i = 1; i <= n; ++i) sketch.update(i);
|
|
31
|
+
std::ofstream os("varopt_sketch_long_n" + std::to_string(n) + "_cpp.sk", std::ios::binary);
|
|
32
|
+
sketch.serialize(os);
|
|
33
|
+
}
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
TEST_CASE("varopt sketch string exact", "[serialize_for_java]") {
|
|
37
|
+
var_opt_sketch<std::string> sketch(1024);
|
|
38
|
+
for (unsigned i = 1; i <= 200; ++i) sketch.update(std::to_string(i), 1000.0 / i);
|
|
39
|
+
std::ofstream os("varopt_sketch_string_exact_cpp.sk", std::ios::binary);
|
|
40
|
+
sketch.serialize(os);
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
TEST_CASE("varopt sketch long sampling", "[serialize_for_java]") {
|
|
44
|
+
var_opt_sketch<long> sketch(1024);
|
|
45
|
+
for (unsigned i = 0; i < 2000; ++i) sketch.update(i);
|
|
46
|
+
// negative heavy items to allow a simple predicate to filter
|
|
47
|
+
sketch.update(-1L, 100000.0);
|
|
48
|
+
sketch.update(-2L, 110000.0);
|
|
49
|
+
sketch.update(-3L, 120000.0);
|
|
50
|
+
std::ofstream os("varopt_sketch_long_sampling_cpp.sk", std::ios::binary);
|
|
51
|
+
sketch.serialize(os);
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
} /* namespace datasketches */
|
|
@@ -489,41 +489,4 @@ TEST_CASE("varopt sketch: estimate subset sum", "[var_opt_sketch]") {
|
|
|
489
489
|
REQUIRE(summary.estimate < total_weight); // exact mode, so know it must be strictly less
|
|
490
490
|
}
|
|
491
491
|
|
|
492
|
-
TEST_CASE("varopt sketch: deserialize exact from java", "[var_opt_sketch]") {
|
|
493
|
-
std::ifstream is;
|
|
494
|
-
is.exceptions(std::ios::failbit | std::ios::badbit);
|
|
495
|
-
is.open(testBinaryInputPath + "varopt_sketch_string_exact.sk", std::ios::binary);
|
|
496
|
-
var_opt_sketch<std::string> sketch = var_opt_sketch<std::string>::deserialize(is);
|
|
497
|
-
REQUIRE_FALSE(sketch.is_empty());
|
|
498
|
-
REQUIRE(sketch.get_k() == 1024);
|
|
499
|
-
REQUIRE(sketch.get_n() == 200);
|
|
500
|
-
REQUIRE(sketch.get_num_samples() == 200);
|
|
501
|
-
subset_summary ss = sketch.estimate_subset_sum([](std::string){ return true; });
|
|
502
|
-
|
|
503
|
-
double tgt_wt = 0.0;
|
|
504
|
-
for (int i = 1; i <= 200; ++i) { tgt_wt += 1000.0 / i; }
|
|
505
|
-
REQUIRE(ss.total_sketch_weight == Approx(tgt_wt).margin(EPS));
|
|
506
|
-
}
|
|
507
|
-
|
|
508
|
-
|
|
509
|
-
TEST_CASE("varopt sketch: deserialize sampling from java", "[var_opt_sketch]") {
|
|
510
|
-
std::ifstream is;
|
|
511
|
-
is.exceptions(std::ios::failbit | std::ios::badbit);
|
|
512
|
-
is.open(testBinaryInputPath + "varopt_sketch_long_sampling.sk", std::ios::binary);
|
|
513
|
-
var_opt_sketch<int64_t> sketch = var_opt_sketch<int64_t>::deserialize(is);
|
|
514
|
-
REQUIRE_FALSE(sketch.is_empty());
|
|
515
|
-
REQUIRE(sketch.get_k() == 1024);
|
|
516
|
-
REQUIRE(sketch.get_n() == 2003);
|
|
517
|
-
REQUIRE(sketch.get_num_samples() == sketch.get_k());
|
|
518
|
-
subset_summary ss = sketch.estimate_subset_sum([](int64_t){ return true; });
|
|
519
|
-
REQUIRE(ss.estimate == Approx(332000.0).margin(EPS));
|
|
520
|
-
REQUIRE(ss.total_sketch_weight == Approx(332000.0).margin(EPS));
|
|
521
|
-
|
|
522
|
-
ss = sketch.estimate_subset_sum([](int64_t x){ return x < 0; });
|
|
523
|
-
REQUIRE(ss.estimate == 330000.0); // heavy item, weight is exact
|
|
524
|
-
|
|
525
|
-
ss = sketch.estimate_subset_sum([](int64_t x){ return x >= 0; });
|
|
526
|
-
REQUIRE(ss.estimate == Approx(2000.0).margin(EPS));
|
|
527
|
-
}
|
|
528
|
-
|
|
529
492
|
}
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* Licensed to the Apache Software Foundation (ASF) under one
|
|
3
|
+
* or more contributor license agreements. See the NOTICE file
|
|
4
|
+
* distributed with this work for additional information
|
|
5
|
+
* regarding copyright ownership. The ASF licenses this file
|
|
6
|
+
* to you under the Apache License, Version 2.0 (the
|
|
7
|
+
* "License"); you may not use this file except in compliance
|
|
8
|
+
* with the License. You may obtain a copy of the License at
|
|
9
|
+
*
|
|
10
|
+
* http://www.apache.org/licenses/LICENSE-2.0
|
|
11
|
+
*
|
|
12
|
+
* Unless required by applicable law or agreed to in writing,
|
|
13
|
+
* software distributed under the License is distributed on an
|
|
14
|
+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
15
|
+
* KIND, either express or implied. See the License for the
|
|
16
|
+
* specific language governing permissions and limitations
|
|
17
|
+
* under the License.
|
|
18
|
+
*/
|
|
19
|
+
|
|
20
|
+
#include <catch2/catch.hpp>
|
|
21
|
+
#include <fstream>
|
|
22
|
+
#include <var_opt_union.hpp>
|
|
23
|
+
|
|
24
|
+
namespace datasketches {
|
|
25
|
+
|
|
26
|
+
// assume the binary sketches for this test have been generated by datasketches-java code
|
|
27
|
+
// in the subdirectory called "java" in the root directory of this project
|
|
28
|
+
static std::string testBinaryInputPath = std::string(TEST_BINARY_INPUT_PATH) + "../../java/";
|
|
29
|
+
|
|
30
|
+
TEST_CASE("var opt union double", "[serde_compat]") {
|
|
31
|
+
const double EPS = 1e-13;
|
|
32
|
+
|
|
33
|
+
std::ifstream is;
|
|
34
|
+
is.exceptions(std::ios::failbit | std::ios::badbit);
|
|
35
|
+
is.open(testBinaryInputPath + "varopt_union_double_sampling_java.sk", std::ios::binary);
|
|
36
|
+
auto u = var_opt_union<double>::deserialize(is);
|
|
37
|
+
|
|
38
|
+
// must reduce k in the process
|
|
39
|
+
const auto result = u.get_result();
|
|
40
|
+
REQUIRE_FALSE(result.is_empty());
|
|
41
|
+
REQUIRE(result.get_n() == 97);
|
|
42
|
+
|
|
43
|
+
const double expected_wt = 96.0; // light items -- ignoring the heavy one
|
|
44
|
+
const subset_summary ss = result.estimate_subset_sum([](double x){return x >= 0;});
|
|
45
|
+
REQUIRE(ss.estimate == Approx(expected_wt).margin(EPS));
|
|
46
|
+
REQUIRE(ss.total_sketch_weight == Approx(expected_wt + 1024.0).margin(EPS));
|
|
47
|
+
REQUIRE(result.get_k() < 128);
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
} /* namespace datasketches */
|
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* Licensed to the Apache Software Foundation (ASF) under one
|
|
3
|
+
* or more contributor license agreements. See the NOTICE file
|
|
4
|
+
* distributed with this work for additional information
|
|
5
|
+
* regarding copyright ownership. The ASF licenses this file
|
|
6
|
+
* to you under the Apache License, Version 2.0 (the
|
|
7
|
+
* "License"); you may not use this file except in compliance
|
|
8
|
+
* with the License. You may obtain a copy of the License at
|
|
9
|
+
*
|
|
10
|
+
* http://www.apache.org/licenses/LICENSE-2.0
|
|
11
|
+
*
|
|
12
|
+
* Unless required by applicable law or agreed to in writing,
|
|
13
|
+
* software distributed under the License is distributed on an
|
|
14
|
+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
15
|
+
* KIND, either express or implied. See the License for the
|
|
16
|
+
* specific language governing permissions and limitations
|
|
17
|
+
* under the License.
|
|
18
|
+
*/
|
|
19
|
+
|
|
20
|
+
#include <catch2/catch.hpp>
|
|
21
|
+
#include <fstream>
|
|
22
|
+
#include <var_opt_union.hpp>
|
|
23
|
+
|
|
24
|
+
namespace datasketches {
|
|
25
|
+
|
|
26
|
+
TEST_CASE("var opt union double sampling", "[serialize_for_java]") {
|
|
27
|
+
const unsigned k_small = 16;
|
|
28
|
+
const unsigned k_max = 128;
|
|
29
|
+
const unsigned n1 = 32;
|
|
30
|
+
const unsigned n2 = 64;
|
|
31
|
+
|
|
32
|
+
// small k sketch, but sampling
|
|
33
|
+
var_opt_sketch<double> sketch1(k_small);
|
|
34
|
+
for (unsigned i = 0; i < n1; ++i) sketch1.update(i);
|
|
35
|
+
// negative heavy item to allow a simple predicate to filter
|
|
36
|
+
sketch1.update(-1, n1 * n1);
|
|
37
|
+
|
|
38
|
+
// another one, but different n to get a different per-item weight
|
|
39
|
+
var_opt_sketch<double> sketch2(k_small);
|
|
40
|
+
for (unsigned i = 0; i < n2; ++i) sketch2.update(i);
|
|
41
|
+
|
|
42
|
+
var_opt_union<double> u(k_max);
|
|
43
|
+
u.update(sketch1);
|
|
44
|
+
u.update(sketch2);
|
|
45
|
+
|
|
46
|
+
// must reduce k in the process
|
|
47
|
+
auto result = u.get_result();
|
|
48
|
+
REQUIRE(result.get_k() < k_max);
|
|
49
|
+
REQUIRE(result.get_k() >= k_small);
|
|
50
|
+
REQUIRE(result.get_n() == 97);
|
|
51
|
+
|
|
52
|
+
std::ofstream os("varopt_union_double_sampling_cpp.sk", std::ios::binary);
|
|
53
|
+
u.serialize(os);
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
} /* namespace datasketches */
|
|
@@ -305,22 +305,4 @@ TEST_CASE("varopt union: serialize sampling", "[var_opt_union]") {
|
|
|
305
305
|
compare_serialization_deserialization(u);
|
|
306
306
|
}
|
|
307
307
|
|
|
308
|
-
TEST_CASE("varopt union: deserialize from java", "[var_opt_union]") {
|
|
309
|
-
std::ifstream is;
|
|
310
|
-
is.exceptions(std::ios::failbit | std::ios::badbit);
|
|
311
|
-
is.open(testBinaryInputPath + "varopt_union_double_sampling.sk", std::ios::binary);
|
|
312
|
-
var_opt_union<double> u = var_opt_union<double>::deserialize(is);
|
|
313
|
-
|
|
314
|
-
// must reduce k in the process, like in small_sampling_sketch()
|
|
315
|
-
var_opt_sketch<double> result = u.get_result();
|
|
316
|
-
REQUIRE_FALSE(result.is_empty());
|
|
317
|
-
REQUIRE(result.get_n() == 97);
|
|
318
|
-
|
|
319
|
-
double expected_wt = 96.0;// light items -- ignoring the heavy one
|
|
320
|
-
subset_summary ss = result.estimate_subset_sum([](double x){return x >= 0;});
|
|
321
|
-
REQUIRE(ss.estimate == Approx(expected_wt).margin(EPS));
|
|
322
|
-
REQUIRE(ss.total_sketch_weight == Approx(expected_wt + 1024.0).margin(EPS));
|
|
323
|
-
REQUIRE(result.get_k() < 128);
|
|
324
|
-
}
|
|
325
|
-
|
|
326
308
|
}
|