datasketches 0.3.2 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +6 -0
- data/NOTICE +1 -1
- data/README.md +0 -2
- data/ext/datasketches/cpc_wrapper.cpp +2 -2
- data/ext/datasketches/kll_wrapper.cpp +0 -10
- data/lib/datasketches/version.rb +1 -1
- data/lib/datasketches.rb +1 -1
- data/vendor/datasketches-cpp/CMakeLists.txt +1 -0
- data/vendor/datasketches-cpp/CODE_OF_CONDUCT.md +3 -0
- data/vendor/datasketches-cpp/CONTRIBUTING.md +50 -0
- data/vendor/datasketches-cpp/Doxyfile +2827 -0
- data/vendor/datasketches-cpp/LICENSE +0 -76
- data/vendor/datasketches-cpp/NOTICE +1 -1
- data/vendor/datasketches-cpp/README.md +1 -3
- data/vendor/datasketches-cpp/common/CMakeLists.txt +12 -11
- data/vendor/datasketches-cpp/common/include/common_defs.hpp +11 -8
- data/vendor/datasketches-cpp/common/include/count_zeros.hpp +0 -2
- data/vendor/datasketches-cpp/common/include/kolmogorov_smirnov.hpp +9 -6
- data/vendor/datasketches-cpp/common/include/optional.hpp +148 -0
- data/vendor/datasketches-cpp/common/include/quantiles_sorted_view.hpp +95 -2
- data/vendor/datasketches-cpp/common/include/quantiles_sorted_view_impl.hpp +1 -1
- data/vendor/datasketches-cpp/common/include/serde.hpp +69 -20
- data/vendor/datasketches-cpp/common/test/CMakeLists.txt +1 -1
- data/vendor/datasketches-cpp/common/test/optional_test.cpp +85 -0
- data/vendor/datasketches-cpp/common/test/test_allocator.hpp +14 -14
- data/vendor/datasketches-cpp/count/include/count_min.hpp +132 -78
- data/vendor/datasketches-cpp/count/include/count_min_impl.hpp +132 -152
- data/vendor/datasketches-cpp/count/test/CMakeLists.txt +11 -12
- data/vendor/datasketches-cpp/count/test/count_min_allocation_test.cpp +61 -61
- data/vendor/datasketches-cpp/count/test/count_min_test.cpp +175 -178
- data/vendor/datasketches-cpp/cpc/include/cpc_common.hpp +14 -20
- data/vendor/datasketches-cpp/cpc/include/cpc_compressor.hpp +7 -4
- data/vendor/datasketches-cpp/cpc/include/cpc_compressor_impl.hpp +17 -17
- data/vendor/datasketches-cpp/cpc/include/cpc_sketch.hpp +40 -40
- data/vendor/datasketches-cpp/cpc/include/cpc_sketch_impl.hpp +13 -10
- data/vendor/datasketches-cpp/cpc/include/cpc_union.hpp +35 -11
- data/vendor/datasketches-cpp/cpc/include/cpc_union_impl.hpp +8 -8
- data/vendor/datasketches-cpp/cpc/include/u32_table.hpp +3 -2
- data/vendor/datasketches-cpp/cpc/include/u32_table_impl.hpp +5 -5
- data/vendor/datasketches-cpp/cpc/test/CMakeLists.txt +20 -7
- data/vendor/datasketches-cpp/cpc/test/cpc_sketch_deserialize_from_java_test.cpp +60 -0
- data/vendor/datasketches-cpp/{python/include/py_object_lt.hpp → cpc/test/cpc_sketch_serialize_for_java.cpp} +15 -14
- data/vendor/datasketches-cpp/cpc/test/cpc_sketch_test.cpp +4 -29
- data/vendor/datasketches-cpp/cpc/test/cpc_union_test.cpp +4 -4
- data/vendor/datasketches-cpp/density/include/density_sketch.hpp +29 -9
- data/vendor/datasketches-cpp/density/include/density_sketch_impl.hpp +1 -1
- data/vendor/datasketches-cpp/density/test/CMakeLists.txt +0 -1
- data/vendor/datasketches-cpp/fi/include/frequent_items_sketch.hpp +21 -9
- data/vendor/datasketches-cpp/fi/include/frequent_items_sketch_impl.hpp +6 -4
- data/vendor/datasketches-cpp/fi/test/CMakeLists.txt +14 -1
- data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_deserialize_from_java_test.cpp +95 -0
- data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_serialize_for_java.cpp +83 -0
- data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_test.cpp +3 -42
- data/vendor/datasketches-cpp/hll/include/CouponList-internal.hpp +2 -2
- data/vendor/datasketches-cpp/hll/include/CouponList.hpp +3 -1
- data/vendor/datasketches-cpp/hll/include/HllArray-internal.hpp +3 -3
- data/vendor/datasketches-cpp/hll/include/HllArray.hpp +5 -3
- data/vendor/datasketches-cpp/hll/include/HllSketch-internal.hpp +4 -4
- data/vendor/datasketches-cpp/hll/include/HllSketchImpl.hpp +3 -1
- data/vendor/datasketches-cpp/hll/include/HllUtil.hpp +0 -12
- data/vendor/datasketches-cpp/hll/include/hll.hpp +70 -57
- data/vendor/datasketches-cpp/hll/test/CMakeLists.txt +14 -1
- data/vendor/datasketches-cpp/hll/test/ToFromByteArrayTest.cpp +0 -68
- data/vendor/datasketches-cpp/hll/test/hll_sketch_deserialize_from_java_test.cpp +69 -0
- data/vendor/datasketches-cpp/hll/test/hll_sketch_serialize_for_java.cpp +52 -0
- data/vendor/datasketches-cpp/kll/include/kll_helper_impl.hpp +2 -2
- data/vendor/datasketches-cpp/kll/include/kll_sketch.hpp +71 -50
- data/vendor/datasketches-cpp/kll/include/kll_sketch_impl.hpp +59 -130
- data/vendor/datasketches-cpp/kll/test/CMakeLists.txt +14 -1
- data/vendor/datasketches-cpp/kll/test/kll_sketch_deserialize_from_java_test.cpp +103 -0
- data/vendor/datasketches-cpp/kll/test/kll_sketch_serialize_for_java.cpp +62 -0
- data/vendor/datasketches-cpp/kll/test/kll_sketch_test.cpp +3 -38
- data/vendor/datasketches-cpp/quantiles/include/quantiles_sketch.hpp +68 -51
- data/vendor/datasketches-cpp/quantiles/include/quantiles_sketch_impl.hpp +62 -132
- data/vendor/datasketches-cpp/quantiles/test/CMakeLists.txt +14 -1
- data/vendor/datasketches-cpp/quantiles/test/quantiles_sketch_deserialize_from_java_test.cpp +84 -0
- data/vendor/datasketches-cpp/quantiles/test/quantiles_sketch_serialize_for_java.cpp +52 -0
- data/vendor/datasketches-cpp/quantiles/test/quantiles_sketch_test.cpp +14 -38
- data/vendor/datasketches-cpp/req/include/req_common.hpp +7 -3
- data/vendor/datasketches-cpp/req/include/req_compactor_impl.hpp +2 -2
- data/vendor/datasketches-cpp/req/include/req_sketch.hpp +97 -23
- data/vendor/datasketches-cpp/req/include/req_sketch_impl.hpp +48 -109
- data/vendor/datasketches-cpp/req/test/CMakeLists.txt +14 -1
- data/vendor/datasketches-cpp/req/test/req_sketch_deserialize_from_java_test.cpp +55 -0
- data/vendor/datasketches-cpp/{tuple/include/array_of_doubles_intersection_impl.hpp → req/test/req_sketch_serialize_for_java.cpp} +12 -7
- data/vendor/datasketches-cpp/req/test/req_sketch_test.cpp +3 -89
- data/vendor/datasketches-cpp/sampling/CMakeLists.txt +4 -0
- data/vendor/datasketches-cpp/sampling/include/ebpps_sample.hpp +210 -0
- data/vendor/datasketches-cpp/sampling/include/ebpps_sample_impl.hpp +535 -0
- data/vendor/datasketches-cpp/sampling/include/ebpps_sketch.hpp +281 -0
- data/vendor/datasketches-cpp/sampling/include/ebpps_sketch_impl.hpp +531 -0
- data/vendor/datasketches-cpp/sampling/include/var_opt_sketch.hpp +69 -26
- data/vendor/datasketches-cpp/sampling/include/var_opt_sketch_impl.hpp +3 -3
- data/vendor/datasketches-cpp/sampling/include/var_opt_union.hpp +10 -11
- data/vendor/datasketches-cpp/sampling/include/var_opt_union_impl.hpp +4 -4
- data/vendor/datasketches-cpp/sampling/test/CMakeLists.txt +55 -8
- data/vendor/datasketches-cpp/sampling/test/ebpps_allocation_test.cpp +96 -0
- data/vendor/datasketches-cpp/sampling/test/ebpps_sample_test.cpp +137 -0
- data/vendor/datasketches-cpp/sampling/test/ebpps_sketch_test.cpp +266 -0
- data/vendor/datasketches-cpp/sampling/test/var_opt_sketch_deserialize_from_java_test.cpp +81 -0
- data/vendor/datasketches-cpp/sampling/test/var_opt_sketch_serialize_for_java.cpp +54 -0
- data/vendor/datasketches-cpp/sampling/test/var_opt_sketch_test.cpp +0 -37
- data/vendor/datasketches-cpp/sampling/test/var_opt_union_deserialize_from_java_test.cpp +50 -0
- data/vendor/datasketches-cpp/sampling/test/var_opt_union_serialize_for_java.cpp +56 -0
- data/vendor/datasketches-cpp/sampling/test/var_opt_union_test.cpp +0 -18
- data/vendor/datasketches-cpp/theta/include/bit_packing.hpp +2608 -2608
- data/vendor/datasketches-cpp/theta/include/bounds_on_ratios_in_sampled_sets.hpp +1 -0
- data/vendor/datasketches-cpp/theta/include/bounds_on_ratios_in_theta_sketched_sets.hpp +7 -6
- data/vendor/datasketches-cpp/theta/include/theta_a_not_b.hpp +20 -5
- data/vendor/datasketches-cpp/theta/include/theta_constants.hpp +10 -4
- data/vendor/datasketches-cpp/theta/include/theta_helpers.hpp +1 -1
- data/vendor/datasketches-cpp/theta/include/theta_intersection.hpp +13 -5
- data/vendor/datasketches-cpp/theta/include/theta_intersection_base_impl.hpp +5 -5
- data/vendor/datasketches-cpp/theta/include/theta_intersection_impl.hpp +3 -3
- data/vendor/datasketches-cpp/theta/include/theta_jaccard_similarity.hpp +2 -1
- data/vendor/datasketches-cpp/theta/include/theta_jaccard_similarity_base.hpp +1 -0
- data/vendor/datasketches-cpp/theta/include/theta_set_difference_base_impl.hpp +1 -1
- data/vendor/datasketches-cpp/theta/include/theta_sketch.hpp +126 -27
- data/vendor/datasketches-cpp/theta/include/theta_sketch_impl.hpp +8 -8
- data/vendor/datasketches-cpp/theta/include/theta_union.hpp +17 -10
- data/vendor/datasketches-cpp/theta/include/theta_union_base_impl.hpp +1 -1
- data/vendor/datasketches-cpp/theta/include/theta_union_impl.hpp +3 -3
- data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base.hpp +5 -2
- data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base_impl.hpp +11 -1
- data/vendor/datasketches-cpp/theta/test/CMakeLists.txt +14 -1
- data/vendor/datasketches-cpp/theta/test/theta_sketch_deserialize_from_java_test.cpp +57 -0
- data/vendor/datasketches-cpp/theta/test/theta_sketch_serialize_for_java.cpp +61 -0
- data/vendor/datasketches-cpp/theta/test/theta_sketch_test.cpp +0 -188
- data/vendor/datasketches-cpp/tuple/CMakeLists.txt +8 -7
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_sketch.hpp +19 -144
- data/vendor/datasketches-cpp/tuple/include/{array_of_doubles_a_not_b.hpp → array_tuple_a_not_b.hpp} +24 -16
- data/vendor/datasketches-cpp/tuple/include/{array_of_doubles_a_not_b_impl.hpp → array_tuple_a_not_b_impl.hpp} +4 -4
- data/vendor/datasketches-cpp/tuple/include/array_tuple_intersection.hpp +65 -0
- data/vendor/datasketches-cpp/{python/include/py_object_ostream.hpp → tuple/include/array_tuple_intersection_impl.hpp} +7 -24
- data/vendor/datasketches-cpp/tuple/include/array_tuple_sketch.hpp +237 -0
- data/vendor/datasketches-cpp/tuple/include/{array_of_doubles_sketch_impl.hpp → array_tuple_sketch_impl.hpp} +40 -41
- data/vendor/datasketches-cpp/tuple/include/array_tuple_union.hpp +81 -0
- data/vendor/datasketches-cpp/tuple/include/array_tuple_union_impl.hpp +43 -0
- data/vendor/datasketches-cpp/tuple/include/tuple_a_not_b.hpp +11 -2
- data/vendor/datasketches-cpp/tuple/include/tuple_intersection.hpp +17 -10
- data/vendor/datasketches-cpp/tuple/include/tuple_jaccard_similarity.hpp +2 -1
- data/vendor/datasketches-cpp/tuple/include/tuple_sketch.hpp +95 -32
- data/vendor/datasketches-cpp/tuple/include/tuple_union.hpp +19 -11
- data/vendor/datasketches-cpp/tuple/test/CMakeLists.txt +16 -1
- data/vendor/datasketches-cpp/tuple/test/aod_sketch_deserialize_from_java_test.cpp +76 -0
- data/vendor/datasketches-cpp/tuple/test/aod_sketch_serialize_for_java.cpp +62 -0
- data/vendor/datasketches-cpp/tuple/test/array_of_doubles_sketch_test.cpp +5 -129
- data/vendor/datasketches-cpp/tuple/test/engagement_test.cpp +85 -89
- data/vendor/datasketches-cpp/tuple/test/tuple_jaccard_similarity_test.cpp +3 -1
- data/vendor/datasketches-cpp/tuple/test/tuple_sketch_deserialize_from_java_test.cpp +47 -0
- data/vendor/datasketches-cpp/tuple/test/tuple_sketch_serialize_for_java.cpp +38 -0
- data/vendor/datasketches-cpp/tuple/test/tuple_sketch_test.cpp +1 -1
- data/vendor/datasketches-cpp/version.cfg.in +1 -1
- metadata +47 -93
- data/vendor/datasketches-cpp/MANIFEST.in +0 -39
- data/vendor/datasketches-cpp/fi/test/items_sketch_string_from_java.sk +0 -0
- data/vendor/datasketches-cpp/fi/test/items_sketch_string_utf8_from_java.sk +0 -0
- data/vendor/datasketches-cpp/fi/test/longs_sketch_from_java.sk +0 -0
- data/vendor/datasketches-cpp/hll/test/array6_from_java.sk +0 -0
- data/vendor/datasketches-cpp/hll/test/compact_array4_from_java.sk +0 -0
- data/vendor/datasketches-cpp/hll/test/compact_set_from_java.sk +0 -0
- data/vendor/datasketches-cpp/hll/test/list_from_java.sk +0 -0
- data/vendor/datasketches-cpp/hll/test/updatable_array4_from_java.sk +0 -0
- data/vendor/datasketches-cpp/hll/test/updatable_set_from_java.sk +0 -0
- data/vendor/datasketches-cpp/kll/test/kll_sketch_from_java.sk +0 -0
- data/vendor/datasketches-cpp/pyproject.toml +0 -23
- data/vendor/datasketches-cpp/python/CMakeLists.txt +0 -87
- data/vendor/datasketches-cpp/python/README.md +0 -85
- data/vendor/datasketches-cpp/python/datasketches/DensityWrapper.py +0 -87
- data/vendor/datasketches-cpp/python/datasketches/KernelFunction.py +0 -35
- data/vendor/datasketches-cpp/python/datasketches/PySerDe.py +0 -110
- data/vendor/datasketches-cpp/python/datasketches/TuplePolicy.py +0 -77
- data/vendor/datasketches-cpp/python/datasketches/TupleWrapper.py +0 -205
- data/vendor/datasketches-cpp/python/datasketches/__init__.py +0 -38
- data/vendor/datasketches-cpp/python/include/kernel_function.hpp +0 -98
- data/vendor/datasketches-cpp/python/include/py_serde.hpp +0 -113
- data/vendor/datasketches-cpp/python/include/quantile_conditional.hpp +0 -104
- data/vendor/datasketches-cpp/python/include/tuple_policy.hpp +0 -136
- data/vendor/datasketches-cpp/python/jupyter/CPCSketch.ipynb +0 -345
- data/vendor/datasketches-cpp/python/jupyter/FrequentItemsSketch.ipynb +0 -354
- data/vendor/datasketches-cpp/python/jupyter/HLLSketch.ipynb +0 -346
- data/vendor/datasketches-cpp/python/jupyter/KLLSketch.ipynb +0 -463
- data/vendor/datasketches-cpp/python/jupyter/ThetaSketchNotebook.ipynb +0 -403
- data/vendor/datasketches-cpp/python/pybind11Path.cmd +0 -21
- data/vendor/datasketches-cpp/python/src/__init__.py +0 -18
- data/vendor/datasketches-cpp/python/src/count_wrapper.cpp +0 -101
- data/vendor/datasketches-cpp/python/src/cpc_wrapper.cpp +0 -76
- data/vendor/datasketches-cpp/python/src/datasketches.cpp +0 -58
- data/vendor/datasketches-cpp/python/src/density_wrapper.cpp +0 -95
- data/vendor/datasketches-cpp/python/src/fi_wrapper.cpp +0 -182
- data/vendor/datasketches-cpp/python/src/hll_wrapper.cpp +0 -126
- data/vendor/datasketches-cpp/python/src/kll_wrapper.cpp +0 -158
- data/vendor/datasketches-cpp/python/src/ks_wrapper.cpp +0 -68
- data/vendor/datasketches-cpp/python/src/py_serde.cpp +0 -112
- data/vendor/datasketches-cpp/python/src/quantiles_wrapper.cpp +0 -155
- data/vendor/datasketches-cpp/python/src/req_wrapper.cpp +0 -154
- data/vendor/datasketches-cpp/python/src/theta_wrapper.cpp +0 -166
- data/vendor/datasketches-cpp/python/src/tuple_wrapper.cpp +0 -215
- data/vendor/datasketches-cpp/python/src/vector_of_kll.cpp +0 -490
- data/vendor/datasketches-cpp/python/src/vo_wrapper.cpp +0 -173
- data/vendor/datasketches-cpp/python/tests/__init__.py +0 -16
- data/vendor/datasketches-cpp/python/tests/count_min_test.py +0 -86
- data/vendor/datasketches-cpp/python/tests/cpc_test.py +0 -64
- data/vendor/datasketches-cpp/python/tests/density_test.py +0 -93
- data/vendor/datasketches-cpp/python/tests/fi_test.py +0 -149
- data/vendor/datasketches-cpp/python/tests/hll_test.py +0 -129
- data/vendor/datasketches-cpp/python/tests/kll_test.py +0 -159
- data/vendor/datasketches-cpp/python/tests/quantiles_test.py +0 -160
- data/vendor/datasketches-cpp/python/tests/req_test.py +0 -159
- data/vendor/datasketches-cpp/python/tests/theta_test.py +0 -148
- data/vendor/datasketches-cpp/python/tests/tuple_test.py +0 -206
- data/vendor/datasketches-cpp/python/tests/vector_of_kll_test.py +0 -148
- data/vendor/datasketches-cpp/python/tests/vo_test.py +0 -132
- data/vendor/datasketches-cpp/req/test/req_float_empty_from_java.sk +0 -0
- data/vendor/datasketches-cpp/req/test/req_float_estimation_from_java.sk +0 -0
- data/vendor/datasketches-cpp/req/test/req_float_exact_from_java.sk +0 -0
- data/vendor/datasketches-cpp/req/test/req_float_raw_items_from_java.sk +0 -0
- data/vendor/datasketches-cpp/req/test/req_float_single_item_from_java.sk +0 -0
- data/vendor/datasketches-cpp/sampling/test/binaries_from_java.txt +0 -67
- data/vendor/datasketches-cpp/sampling/test/varopt_sketch_long_sampling.sk +0 -0
- data/vendor/datasketches-cpp/sampling/test/varopt_sketch_string_exact.sk +0 -0
- data/vendor/datasketches-cpp/sampling/test/varopt_union_double_sampling.sk +0 -0
- data/vendor/datasketches-cpp/setup.py +0 -110
- data/vendor/datasketches-cpp/theta/test/theta_compact_empty_from_java.sk +0 -0
- data/vendor/datasketches-cpp/theta/test/theta_compact_estimation_from_java.sk +0 -0
- data/vendor/datasketches-cpp/theta/test/theta_compact_exact_from_java.sk +0 -0
- data/vendor/datasketches-cpp/theta/test/theta_compact_single_item_from_java.sk +0 -0
- data/vendor/datasketches-cpp/tox.ini +0 -26
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_intersection.hpp +0 -52
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_union.hpp +0 -81
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_union_impl.hpp +0 -43
- data/vendor/datasketches-cpp/tuple/test/aod_1_compact_empty_from_java.sk +0 -1
- data/vendor/datasketches-cpp/tuple/test/aod_1_compact_estimation_from_java.sk +0 -0
- data/vendor/datasketches-cpp/tuple/test/aod_1_compact_non_empty_no_entries_from_java.sk +0 -0
- data/vendor/datasketches-cpp/tuple/test/aod_2_compact_exact_from_java.sk +0 -0
- data/vendor/datasketches-cpp/tuple/test/aod_3_compact_empty_from_java.sk +0 -1
|
@@ -23,10 +23,8 @@
|
|
|
23
23
|
#include <array>
|
|
24
24
|
|
|
25
25
|
#include <catch2/catch.hpp>
|
|
26
|
-
|
|
27
|
-
#include
|
|
28
|
-
#include <array_of_doubles_intersection.hpp>
|
|
29
|
-
#include <array_of_doubles_a_not_b.hpp>
|
|
26
|
+
|
|
27
|
+
#include "array_of_doubles_sketch.hpp"
|
|
30
28
|
|
|
31
29
|
namespace datasketches {
|
|
32
30
|
|
|
@@ -47,130 +45,6 @@ TEST_CASE("aod sketch: reset", "[tuple_sketch]") {
|
|
|
47
45
|
REQUIRE(update_sketch.get_num_retained() == 0);
|
|
48
46
|
}
|
|
49
47
|
|
|
50
|
-
TEST_CASE("aod sketch: serialization compatibility with java - empty", "[tuple_sketch]") {
|
|
51
|
-
auto update_sketch = update_array_of_doubles_sketch::builder().build();
|
|
52
|
-
REQUIRE(update_sketch.is_empty());
|
|
53
|
-
REQUIRE(update_sketch.get_num_retained() == 0);
|
|
54
|
-
auto compact_sketch = update_sketch.compact();
|
|
55
|
-
|
|
56
|
-
// read binary sketch from Java
|
|
57
|
-
std::ifstream is;
|
|
58
|
-
is.exceptions(std::ios::failbit | std::ios::badbit);
|
|
59
|
-
is.open(inputPath + "aod_1_compact_empty_from_java.sk", std::ios::binary);
|
|
60
|
-
auto compact_sketch_from_java = compact_array_of_doubles_sketch::deserialize(is);
|
|
61
|
-
REQUIRE(compact_sketch.get_num_retained() == compact_sketch_from_java.get_num_retained());
|
|
62
|
-
REQUIRE(compact_sketch.get_theta() == Approx(compact_sketch_from_java.get_theta()).margin(1e-10));
|
|
63
|
-
REQUIRE(compact_sketch.get_estimate() == Approx(compact_sketch_from_java.get_estimate()).margin(1e-10));
|
|
64
|
-
REQUIRE(compact_sketch.get_lower_bound(1) == Approx(compact_sketch_from_java.get_lower_bound(1)).margin(1e-10));
|
|
65
|
-
REQUIRE(compact_sketch.get_upper_bound(1) == Approx(compact_sketch_from_java.get_upper_bound(1)).margin(1e-10));
|
|
66
|
-
}
|
|
67
|
-
|
|
68
|
-
TEST_CASE("aod sketch: serialization compatibility with java - empty configured for three values", "[tuple_sketch]") {
|
|
69
|
-
auto update_sketch = update_array_of_doubles_sketch::builder(3).build();
|
|
70
|
-
REQUIRE(update_sketch.is_empty());
|
|
71
|
-
REQUIRE(update_sketch.get_num_retained() == 0);
|
|
72
|
-
REQUIRE(update_sketch.get_num_values() == 3);
|
|
73
|
-
auto compact_sketch = update_sketch.compact();
|
|
74
|
-
|
|
75
|
-
// read binary sketch from Java
|
|
76
|
-
std::ifstream is;
|
|
77
|
-
is.exceptions(std::ios::failbit | std::ios::badbit);
|
|
78
|
-
is.open(inputPath + "aod_3_compact_empty_from_java.sk", std::ios::binary);
|
|
79
|
-
auto compact_sketch_from_java = compact_array_of_doubles_sketch::deserialize(is);
|
|
80
|
-
REQUIRE(compact_sketch.get_num_values() == compact_sketch_from_java.get_num_values());
|
|
81
|
-
REQUIRE(compact_sketch.get_num_retained() == compact_sketch_from_java.get_num_retained());
|
|
82
|
-
REQUIRE(compact_sketch.get_theta() == Approx(compact_sketch_from_java.get_theta()).margin(1e-10));
|
|
83
|
-
REQUIRE(compact_sketch.get_estimate() == Approx(compact_sketch_from_java.get_estimate()).margin(1e-10));
|
|
84
|
-
REQUIRE(compact_sketch.get_lower_bound(1) == Approx(compact_sketch_from_java.get_lower_bound(1)).margin(1e-10));
|
|
85
|
-
REQUIRE(compact_sketch.get_upper_bound(1) == Approx(compact_sketch_from_java.get_upper_bound(1)).margin(1e-10));
|
|
86
|
-
}
|
|
87
|
-
|
|
88
|
-
TEST_CASE("aod sketch: serialization compatibility with java - non-empty no entries", "[tuple_sketch]") {
|
|
89
|
-
auto update_sketch = update_array_of_doubles_sketch::builder().set_p(0.01f).build();
|
|
90
|
-
std::vector<double> a = {1};
|
|
91
|
-
update_sketch.update(1, a);
|
|
92
|
-
REQUIRE_FALSE(update_sketch.is_empty());
|
|
93
|
-
REQUIRE(update_sketch.get_num_retained() == 0);
|
|
94
|
-
auto compact_sketch = update_sketch.compact();
|
|
95
|
-
|
|
96
|
-
// read binary sketch from Java
|
|
97
|
-
std::ifstream is;
|
|
98
|
-
is.exceptions(std::ios::failbit | std::ios::badbit);
|
|
99
|
-
is.open(inputPath + "aod_1_compact_non_empty_no_entries_from_java.sk", std::ios::binary);
|
|
100
|
-
auto compact_sketch_from_java = compact_array_of_doubles_sketch::deserialize(is);
|
|
101
|
-
REQUIRE(compact_sketch.get_num_retained() == compact_sketch_from_java.get_num_retained());
|
|
102
|
-
REQUIRE(compact_sketch.get_theta() == Approx(compact_sketch_from_java.get_theta()).margin(1e-10));
|
|
103
|
-
REQUIRE(compact_sketch.get_estimate() == Approx(compact_sketch_from_java.get_estimate()).margin(1e-10));
|
|
104
|
-
REQUIRE(compact_sketch.get_lower_bound(1) == Approx(compact_sketch_from_java.get_lower_bound(1)).margin(1e-10));
|
|
105
|
-
REQUIRE(compact_sketch.get_upper_bound(1) == Approx(compact_sketch_from_java.get_upper_bound(1)).margin(1e-10));
|
|
106
|
-
}
|
|
107
|
-
|
|
108
|
-
TEST_CASE("aod sketch: serialization compatibility with java - estimation mode", "[tuple_sketch]") {
|
|
109
|
-
auto update_sketch = update_array_of_doubles_sketch::builder().build();
|
|
110
|
-
std::vector<double> a = {1};
|
|
111
|
-
for (int i = 0; i < 8192; ++i) update_sketch.update(i, a);
|
|
112
|
-
auto compact_sketch = update_sketch.compact();
|
|
113
|
-
|
|
114
|
-
// read binary sketch from Java
|
|
115
|
-
std::ifstream is;
|
|
116
|
-
is.exceptions(std::ios::failbit | std::ios::badbit);
|
|
117
|
-
is.open(inputPath + "aod_1_compact_estimation_from_java.sk", std::ios::binary);
|
|
118
|
-
auto compact_sketch_from_java = compact_array_of_doubles_sketch::deserialize(is);
|
|
119
|
-
REQUIRE(compact_sketch.get_num_retained() == compact_sketch_from_java.get_num_retained());
|
|
120
|
-
REQUIRE(compact_sketch.get_theta() == Approx(compact_sketch_from_java.get_theta()).margin(1e-10));
|
|
121
|
-
REQUIRE(compact_sketch.get_estimate() == Approx(compact_sketch_from_java.get_estimate()).margin(1e-10));
|
|
122
|
-
REQUIRE(compact_sketch.get_lower_bound(1) == Approx(compact_sketch_from_java.get_lower_bound(1)).margin(1e-10));
|
|
123
|
-
REQUIRE(compact_sketch.get_upper_bound(1) == Approx(compact_sketch_from_java.get_upper_bound(1)).margin(1e-10));
|
|
124
|
-
REQUIRE(compact_sketch.get_lower_bound(2) == Approx(compact_sketch_from_java.get_lower_bound(2)).margin(1e-10));
|
|
125
|
-
REQUIRE(compact_sketch.get_upper_bound(2) == Approx(compact_sketch_from_java.get_upper_bound(2)).margin(1e-10));
|
|
126
|
-
REQUIRE(compact_sketch.get_lower_bound(3) == Approx(compact_sketch_from_java.get_lower_bound(3)).margin(1e-10));
|
|
127
|
-
REQUIRE(compact_sketch.get_upper_bound(3) == Approx(compact_sketch_from_java.get_upper_bound(3)).margin(1e-10));
|
|
128
|
-
|
|
129
|
-
// sketch from Java is not ordered
|
|
130
|
-
// transform it to ordered so that iteration sequence would match exactly
|
|
131
|
-
compact_array_of_doubles_sketch ordered_sketch_from_java(compact_sketch_from_java, true);
|
|
132
|
-
auto it = ordered_sketch_from_java.begin();
|
|
133
|
-
for (const auto& entry: compact_sketch) {
|
|
134
|
-
REQUIRE(entry == *it);
|
|
135
|
-
++it;
|
|
136
|
-
}
|
|
137
|
-
}
|
|
138
|
-
|
|
139
|
-
TEST_CASE("aod sketch: serialization compatibility with java - exact mode with two values", "[tuple_sketch]") {
|
|
140
|
-
auto update_sketch = update_array_of_doubles_sketch::builder(2).build();
|
|
141
|
-
std::vector<double> a = {1, 2};
|
|
142
|
-
for (int i = 0; i < 1000; ++i) update_sketch.update(i, a.data()); // pass vector as pointer
|
|
143
|
-
auto compact_sketch = update_sketch.compact();
|
|
144
|
-
REQUIRE_FALSE(compact_sketch.is_estimation_mode());
|
|
145
|
-
|
|
146
|
-
// read binary sketch from Java
|
|
147
|
-
std::ifstream is;
|
|
148
|
-
is.exceptions(std::ios::failbit | std::ios::badbit);
|
|
149
|
-
is.open(inputPath + "aod_2_compact_exact_from_java.sk", std::ios::binary);
|
|
150
|
-
auto compact_sketch_from_java = compact_array_of_doubles_sketch::deserialize(is);
|
|
151
|
-
REQUIRE(compact_sketch.get_num_retained() == compact_sketch_from_java.get_num_retained());
|
|
152
|
-
REQUIRE(compact_sketch.get_theta() == Approx(compact_sketch_from_java.get_theta()).margin(1e-10));
|
|
153
|
-
REQUIRE(compact_sketch.get_estimate() == Approx(compact_sketch_from_java.get_estimate()).margin(1e-10));
|
|
154
|
-
REQUIRE(compact_sketch.get_lower_bound(1) == Approx(compact_sketch_from_java.get_lower_bound(1)).margin(1e-10));
|
|
155
|
-
REQUIRE(compact_sketch.get_upper_bound(1) == Approx(compact_sketch_from_java.get_upper_bound(1)).margin(1e-10));
|
|
156
|
-
REQUIRE(compact_sketch.get_lower_bound(2) == Approx(compact_sketch_from_java.get_lower_bound(2)).margin(1e-10));
|
|
157
|
-
REQUIRE(compact_sketch.get_upper_bound(2) == Approx(compact_sketch_from_java.get_upper_bound(2)).margin(1e-10));
|
|
158
|
-
REQUIRE(compact_sketch.get_lower_bound(3) == Approx(compact_sketch_from_java.get_lower_bound(3)).margin(1e-10));
|
|
159
|
-
REQUIRE(compact_sketch.get_upper_bound(3) == Approx(compact_sketch_from_java.get_upper_bound(3)).margin(1e-10));
|
|
160
|
-
|
|
161
|
-
// sketch from Java is not ordered
|
|
162
|
-
// transform it to ordered so that iteration sequence would match exactly
|
|
163
|
-
compact_array_of_doubles_sketch ordered_sketch_from_java(compact_sketch_from_java, true);
|
|
164
|
-
auto it = ordered_sketch_from_java.begin();
|
|
165
|
-
for (const auto& entry: compact_sketch) {
|
|
166
|
-
REQUIRE(entry.first == (*it).first);
|
|
167
|
-
REQUIRE(entry.second.size() == 2);
|
|
168
|
-
REQUIRE(entry.second[0] == (*it).second[0]);
|
|
169
|
-
REQUIRE(entry.second[1] == (*it).second[1]);
|
|
170
|
-
++it;
|
|
171
|
-
}
|
|
172
|
-
}
|
|
173
|
-
|
|
174
48
|
TEST_CASE("aod sketch: stream serialize deserialize - estimation mode", "[tuple_sketch]") {
|
|
175
49
|
auto update_sketch = update_array_of_doubles_sketch::builder(2).build();
|
|
176
50
|
std::vector<double> a = {1, 2};
|
|
@@ -290,7 +164,9 @@ TEST_CASE("aod intersection: half overlap", "[tuple_sketch]") {
|
|
|
290
164
|
auto update_sketch2 = update_array_of_doubles_sketch::builder().build();
|
|
291
165
|
for (int i = 500; i < 1500; ++i) update_sketch2.update(i, a);
|
|
292
166
|
|
|
293
|
-
|
|
167
|
+
// there is no default policy for intersection
|
|
168
|
+
// let's combine values the same way as in union for testing
|
|
169
|
+
array_of_doubles_intersection<default_array_of_doubles_union_policy> intersection;
|
|
294
170
|
intersection.update(update_sketch1);
|
|
295
171
|
intersection.update(update_sketch2);
|
|
296
172
|
auto result = intersection.get_result();
|
|
@@ -42,11 +42,11 @@ class always_one_policy {
|
|
|
42
42
|
public:
|
|
43
43
|
always_one_policy(): initial_value(1) {}
|
|
44
44
|
T create() const { return 1; }
|
|
45
|
-
void update(T&, const T&) const {
|
|
45
|
+
void update(T&, const T&) const {}
|
|
46
46
|
private:
|
|
47
47
|
T initial_value;
|
|
48
48
|
};
|
|
49
|
-
using always_one_tuple_sketch = datasketches::update_tuple_sketch<int, int, always_one_policy<int
|
|
49
|
+
using always_one_tuple_sketch = datasketches::update_tuple_sketch<int, int, always_one_policy<int>>;
|
|
50
50
|
|
|
51
51
|
template<typename T>
|
|
52
52
|
class update_sum_value_policy {
|
|
@@ -66,20 +66,20 @@ struct union_sum_value_policy {
|
|
|
66
66
|
}
|
|
67
67
|
};
|
|
68
68
|
|
|
69
|
-
using sum_union_tuple_sketch = datasketches::tuple_union<int,
|
|
69
|
+
using sum_union_tuple_sketch = datasketches::tuple_union<int, union_sum_value_policy<int>>;
|
|
70
70
|
|
|
71
71
|
|
|
72
|
-
class EngagementTest{
|
|
72
|
+
class EngagementTest {
|
|
73
73
|
public:
|
|
74
|
-
|
|
75
|
-
void test_always_one_update(){
|
|
74
|
+
uint8_t num_std_dev = 2;
|
|
75
|
+
void test_always_one_update() {
|
|
76
76
|
/*
|
|
77
77
|
* Tests that updates into an update_tuple_sketch sketch only keeps a 1 in the column for stored values.
|
|
78
78
|
*/
|
|
79
|
-
|
|
80
|
-
std::vector<datasketches::update_tuple_sketch<int, int, always_one_policy<int>>> sketch_array
|
|
79
|
+
uint8_t lgK = 8;
|
|
80
|
+
std::vector<datasketches::update_tuple_sketch<int, int, always_one_policy<int>>> sketch_array;
|
|
81
81
|
|
|
82
|
-
auto always_one_sketch = always_one_tuple_sketch::builder(always_one_policy<int>()).set_lg_k(lgK).build()
|
|
82
|
+
auto always_one_sketch = always_one_tuple_sketch::builder(always_one_policy<int>()).set_lg_k(lgK).build();
|
|
83
83
|
|
|
84
84
|
always_one_sketch.update(1, 1);
|
|
85
85
|
always_one_sketch.update(1, 2);
|
|
@@ -97,12 +97,12 @@ public:
|
|
|
97
97
|
REQUIRE(sum == 3); // we only keep 1 for every stored key.
|
|
98
98
|
}
|
|
99
99
|
|
|
100
|
-
void test_sum_update_policy(){
|
|
100
|
+
void test_sum_update_policy() {
|
|
101
101
|
/*
|
|
102
102
|
* Tests that updates into an sum_update_tuple_sketch sum the stored values on updates.
|
|
103
103
|
*/
|
|
104
|
-
|
|
105
|
-
auto sum_sketch = sum_update_tuple_sketch::builder().set_lg_k(lgK).build()
|
|
104
|
+
uint8_t lgK = 8;
|
|
105
|
+
auto sum_sketch = sum_update_tuple_sketch::builder().set_lg_k(lgK).build();
|
|
106
106
|
|
|
107
107
|
sum_sketch.update(1, 1);
|
|
108
108
|
sum_sketch.update(1, 2);
|
|
@@ -124,8 +124,8 @@ public:
|
|
|
124
124
|
* Tests that updates into two sketches of sum_update_tuple_sketch flavour, which have been unioned,
|
|
125
125
|
* cause the stored values of two of the same keys to be summed.
|
|
126
126
|
*/
|
|
127
|
-
auto sketch1 = sum_update_tuple_sketch::builder().build()
|
|
128
|
-
auto sketch2 = sum_update_tuple_sketch::builder().build()
|
|
127
|
+
auto sketch1 = sum_update_tuple_sketch::builder().build();
|
|
128
|
+
auto sketch2 = sum_update_tuple_sketch::builder().build();
|
|
129
129
|
|
|
130
130
|
sketch1.update(1, 1);
|
|
131
131
|
sketch1.update(2, 1);
|
|
@@ -135,10 +135,10 @@ public:
|
|
|
135
135
|
sketch2.update(2, 1);
|
|
136
136
|
sketch2.update(3, 7);
|
|
137
137
|
|
|
138
|
-
auto union_sketch = sum_union_tuple_sketch::builder().build()
|
|
139
|
-
union_sketch.update(sketch1)
|
|
140
|
-
union_sketch.update(sketch2)
|
|
141
|
-
auto union_result = union_sketch.get_result()
|
|
138
|
+
auto union_sketch = sum_union_tuple_sketch::builder().build();
|
|
139
|
+
union_sketch.update(sketch1);
|
|
140
|
+
union_sketch.update(sketch2);
|
|
141
|
+
auto union_result = union_sketch.get_result();
|
|
142
142
|
|
|
143
143
|
int num_retained = 0;
|
|
144
144
|
int sum = 0;
|
|
@@ -150,95 +150,95 @@ public:
|
|
|
150
150
|
REQUIRE(sum == 15); // 1:(1+2) + 2:(1+1) + 3:(3+7) = 15
|
|
151
151
|
}
|
|
152
152
|
|
|
153
|
-
void compute_engagement_histogram(){
|
|
153
|
+
void compute_engagement_histogram() {
|
|
154
154
|
/*
|
|
155
155
|
* Returns the estimated histogram from the synthetic data.
|
|
156
156
|
* On inspection one can verify this agrees with the
|
|
157
157
|
* https://github.com/apache/datasketches-java/blob/master/src/test/java/org/apache/datasketches/tuple/aninteger/EngagementTest.java
|
|
158
158
|
*/
|
|
159
|
-
|
|
160
|
-
const int days = 30
|
|
161
|
-
int v = 0
|
|
159
|
+
uint8_t lgK = 8;
|
|
160
|
+
const int days = 30;
|
|
161
|
+
int v = 0;
|
|
162
162
|
std::set<int> set_array[days];
|
|
163
|
-
std::vector<datasketches::update_tuple_sketch<int, int, always_one_policy<int>>> sketch_array
|
|
163
|
+
std::vector<datasketches::update_tuple_sketch<int, int, always_one_policy<int>>> sketch_array;
|
|
164
164
|
|
|
165
165
|
|
|
166
|
-
for(int i=0; i<days
|
|
167
|
-
auto builder = always_one_tuple_sketch::builder(always_one_policy<int>())
|
|
168
|
-
builder.set_lg_k(lgK)
|
|
169
|
-
auto sketch = builder.build()
|
|
166
|
+
for (int i = 0; i < days; ++i) {
|
|
167
|
+
auto builder = always_one_tuple_sketch::builder(always_one_policy<int>());
|
|
168
|
+
builder.set_lg_k(lgK);
|
|
169
|
+
auto sketch = builder.build();
|
|
170
170
|
sketch_array.push_back(sketch);
|
|
171
171
|
}
|
|
172
|
-
REQUIRE(sketch_array.size() == days)
|
|
172
|
+
REQUIRE(sketch_array.size() == days);
|
|
173
173
|
|
|
174
|
-
for(int i=0; i<=days; i
|
|
175
|
-
int32_t num_ids = get_num_ids(days, i)
|
|
176
|
-
int32_t num_days = get_num_days(days, i)
|
|
174
|
+
for (int i = 0; i <= days; ++i) {
|
|
175
|
+
int32_t num_ids = get_num_ids(days, i);
|
|
176
|
+
int32_t num_days = get_num_days(days, i);
|
|
177
177
|
|
|
178
|
-
int my_v = v
|
|
179
|
-
for(int d=0
|
|
180
|
-
for(int id = 0; id < num_ids; id
|
|
181
|
-
set_array[d].insert(my_v + id)
|
|
182
|
-
sketch_array[d].update(my_v + id, 1)
|
|
178
|
+
int my_v = v++;
|
|
179
|
+
for (int d = 0; d < num_days; ++d) {
|
|
180
|
+
for (int id = 0; id < num_ids; ++id) {
|
|
181
|
+
set_array[d].insert(my_v + id);
|
|
182
|
+
sketch_array[d].update(my_v + id, 1);
|
|
183
183
|
}
|
|
184
184
|
}
|
|
185
|
-
v += num_ids
|
|
185
|
+
v += num_ids;
|
|
186
186
|
}
|
|
187
|
-
union_ops(lgK, sketch_array)
|
|
187
|
+
union_ops(lgK, sketch_array);
|
|
188
188
|
}
|
|
189
189
|
private:
|
|
190
|
-
int32_t get_num_ids(int total_days, int index){
|
|
190
|
+
int32_t get_num_ids(int total_days, int index) {
|
|
191
191
|
/*
|
|
192
192
|
* Generates power law distributed synthetic data
|
|
193
193
|
*/
|
|
194
|
-
double d = total_days
|
|
195
|
-
double i = index
|
|
196
|
-
return int(round(exp(i * log(d) / d)))
|
|
194
|
+
double d = total_days;
|
|
195
|
+
double i = index;
|
|
196
|
+
return int(round(exp(i * log(d) / d)));
|
|
197
197
|
}
|
|
198
198
|
|
|
199
|
-
int32_t get_num_days(int total_days, int index){
|
|
200
|
-
double d = total_days
|
|
201
|
-
double i = index
|
|
202
|
-
return int(round(exp(
|
|
199
|
+
int32_t get_num_days(int total_days, int index) {
|
|
200
|
+
double d = total_days;
|
|
201
|
+
double i = index;
|
|
202
|
+
return int(round(exp((d-i) * log(d) / d )));
|
|
203
203
|
}
|
|
204
204
|
|
|
205
|
-
int32_t round_double_to_int(double x){
|
|
206
|
-
return int(std::round(x))
|
|
205
|
+
int32_t round_double_to_int(double x) {
|
|
206
|
+
return int(std::round(x));
|
|
207
207
|
}
|
|
208
208
|
|
|
209
|
-
void union_ops(
|
|
210
|
-
|
|
211
|
-
auto u = sum_union_tuple_sketch::builder().set_lg_k(lgk).build()
|
|
209
|
+
void union_ops(uint8_t lgk, std::vector<datasketches::update_tuple_sketch<int, int, always_one_policy<int>>> sketches) {
|
|
210
|
+
auto num_sketches = sketches.size();
|
|
211
|
+
auto u = sum_union_tuple_sketch::builder().set_lg_k(lgk).build();
|
|
212
212
|
|
|
213
|
-
for(auto sk:sketches){
|
|
214
|
-
u.update(sk)
|
|
213
|
+
for (auto sk: sketches) {
|
|
214
|
+
u.update(sk);
|
|
215
215
|
}
|
|
216
|
-
auto union_result = u.get_result()
|
|
217
|
-
std::vector<uint64_t> num_days_arr(num_sketches+1)
|
|
216
|
+
auto union_result = u.get_result();
|
|
217
|
+
std::vector<uint64_t> num_days_arr(num_sketches+1);
|
|
218
218
|
|
|
219
219
|
for (const auto& entry: union_result) {
|
|
220
|
-
int num_days_visited = entry.second
|
|
221
|
-
num_days_arr[num_days_visited]
|
|
220
|
+
int num_days_visited = entry.second;
|
|
221
|
+
++num_days_arr[num_days_visited];
|
|
222
222
|
}
|
|
223
223
|
|
|
224
|
-
|
|
224
|
+
uint64_t sum_visits = 0;
|
|
225
225
|
double theta = union_result.get_theta();
|
|
226
|
-
std::cout <<"\t\tEngagement Histogram.\t\t\t\n"
|
|
227
|
-
std::cout << "Number of Unique Visitors by Number of Days Visited" << std::endl
|
|
228
|
-
std::cout << "---------------------------------------------------" << std::endl
|
|
226
|
+
std::cout <<"\t\tEngagement Histogram.\t\t\t\n";
|
|
227
|
+
std::cout << "Number of Unique Visitors by Number of Days Visited" << std::endl;
|
|
228
|
+
std::cout << "---------------------------------------------------" << std::endl;
|
|
229
229
|
|
|
230
230
|
std::cout << std::setw(12) << "Days Visited"
|
|
231
231
|
<< std::setw(12) << "Estimate"
|
|
232
232
|
<< std::setw(12) << "LB"
|
|
233
233
|
<< std::setw(12) << "UB"
|
|
234
|
-
<< std:: endl
|
|
234
|
+
<< std:: endl;
|
|
235
235
|
|
|
236
|
-
for (
|
|
237
|
-
|
|
238
|
-
if(visitors_at_days_visited == 0)
|
|
239
|
-
sum_visits += visitors_at_days_visited * i
|
|
236
|
+
for (size_t i = 0; i < num_days_arr.size(); ++i) {
|
|
237
|
+
auto visitors_at_days_visited = num_days_arr[i];
|
|
238
|
+
if (visitors_at_days_visited == 0) continue;
|
|
239
|
+
sum_visits += visitors_at_days_visited * i;
|
|
240
240
|
|
|
241
|
-
double est_visitors_at_days_visited = visitors_at_days_visited / theta
|
|
241
|
+
double est_visitors_at_days_visited = visitors_at_days_visited / theta;
|
|
242
242
|
double lower_bound_at_days_visited = union_result.get_lower_bound(num_std_dev, visitors_at_days_visited);
|
|
243
243
|
double upper_bound_at_days_visited = union_result.get_upper_bound(num_std_dev, visitors_at_days_visited);
|
|
244
244
|
|
|
@@ -246,27 +246,25 @@ private:
|
|
|
246
246
|
<< std::setw(12) << est_visitors_at_days_visited
|
|
247
247
|
<< std::setw(12) << lower_bound_at_days_visited
|
|
248
248
|
<< std::setw(12) << upper_bound_at_days_visited
|
|
249
|
-
<< std:: endl
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
std::cout << std::endl << std::endl ;
|
|
249
|
+
<< std:: endl;
|
|
250
|
+
}
|
|
251
|
+
std::cout << std::endl << std::endl;
|
|
253
252
|
std::cout << std::setw(12) << "Totals"
|
|
254
253
|
<< std::setw(12) << "Estimate"
|
|
255
254
|
<< std::setw(12) << "LB"
|
|
256
255
|
<< std::setw(12) << "UB"
|
|
257
|
-
<< std:: endl
|
|
258
|
-
std::cout << "---------------------------------------------------" << std::endl
|
|
259
|
-
|
|
260
|
-
const double total_visitors = union_result.get_estimate() ;
|
|
261
|
-
const double lb_visitors = union_result.get_lower_bound(num_std_dev) ;
|
|
262
|
-
const double ub_visitors = union_result.get_upper_bound(num_std_dev) ;
|
|
256
|
+
<< std:: endl;
|
|
257
|
+
std::cout << "---------------------------------------------------" << std::endl;
|
|
263
258
|
|
|
259
|
+
const double total_visitors = union_result.get_estimate();
|
|
260
|
+
const double lb_visitors = union_result.get_lower_bound(num_std_dev);
|
|
261
|
+
const double ub_visitors = union_result.get_upper_bound(num_std_dev);
|
|
264
262
|
|
|
265
263
|
std::cout << std::setw(12) << "Visitors"
|
|
266
264
|
<< std::setw(12) << total_visitors
|
|
267
265
|
<< std::setw(12) << lb_visitors
|
|
268
266
|
<< std::setw(12) << ub_visitors
|
|
269
|
-
<< std:: endl
|
|
267
|
+
<< std:: endl;
|
|
270
268
|
|
|
271
269
|
// The total number of visits, however, is a scaled metric and takes advantage of the fact that
|
|
272
270
|
// the retained entries in the sketch is a uniform random sample of all unique visitors, and
|
|
@@ -275,25 +273,23 @@ private:
|
|
|
275
273
|
const double lb_visits = est_visits * lb_visitors / total_visitors;
|
|
276
274
|
const double ub_visits = est_visits * ub_visitors / total_visitors;
|
|
277
275
|
|
|
278
|
-
|
|
279
276
|
std::cout << std::setw(12) << "Visits"
|
|
280
277
|
<< std::setw(12) << est_visits
|
|
281
278
|
<< std::setw(12) << lb_visits
|
|
282
279
|
<< std::setw(12) << ub_visits
|
|
283
|
-
<< std:: endl
|
|
280
|
+
<< std:: endl;
|
|
284
281
|
}
|
|
285
282
|
|
|
286
283
|
};
|
|
287
284
|
|
|
288
285
|
namespace datasketches {
|
|
289
286
|
|
|
290
|
-
|
|
291
|
-
EngagementTest E
|
|
292
|
-
E.test_always_one_update()
|
|
293
|
-
E.test_sum_update_policy()
|
|
294
|
-
E.test_sum_union_policy()
|
|
295
|
-
E.compute_engagement_histogram()
|
|
296
|
-
}
|
|
297
|
-
|
|
287
|
+
TEST_CASE("engagement", "[engagement]") {
|
|
288
|
+
EngagementTest E;
|
|
289
|
+
E.test_always_one_update();
|
|
290
|
+
E.test_sum_update_policy();
|
|
291
|
+
E.test_sum_union_policy();
|
|
292
|
+
E.compute_engagement_histogram();
|
|
293
|
+
}
|
|
298
294
|
|
|
299
|
-
} /* namespace datasketches */
|
|
295
|
+
} /* namespace datasketches */
|
|
@@ -25,7 +25,9 @@
|
|
|
25
25
|
|
|
26
26
|
namespace datasketches {
|
|
27
27
|
|
|
28
|
-
|
|
28
|
+
// there is no default policy for intersection
|
|
29
|
+
// let's combine values the same way as in union for testing
|
|
30
|
+
using tuple_jaccard_similarity_float = tuple_jaccard_similarity<float, default_tuple_union_policy<float>>;
|
|
29
31
|
|
|
30
32
|
TEST_CASE("tuple jaccard: empty", "[tuple_sketch]") {
|
|
31
33
|
auto sk_a = update_tuple_sketch<float>::builder().build();
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* Licensed to the Apache Software Foundation (ASF) under one
|
|
3
|
+
* or more contributor license agreements. See the NOTICE file
|
|
4
|
+
* distributed with this work for additional information
|
|
5
|
+
* regarding copyright ownership. The ASF licenses this file
|
|
6
|
+
* to you under the Apache License, Version 2.0 (the
|
|
7
|
+
* "License"); you may not use this file except in compliance
|
|
8
|
+
* with the License. You may obtain a copy of the License at
|
|
9
|
+
*
|
|
10
|
+
* http://www.apache.org/licenses/LICENSE-2.0
|
|
11
|
+
*
|
|
12
|
+
* Unless required by applicable law or agreed to in writing,
|
|
13
|
+
* software distributed under the License is distributed on an
|
|
14
|
+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
15
|
+
* KIND, either express or implied. See the License for the
|
|
16
|
+
* specific language governing permissions and limitations
|
|
17
|
+
* under the License.
|
|
18
|
+
*/
|
|
19
|
+
|
|
20
|
+
#include <catch2/catch.hpp>
|
|
21
|
+
#include <fstream>
|
|
22
|
+
#include <tuple_sketch.hpp>
|
|
23
|
+
|
|
24
|
+
namespace datasketches {
|
|
25
|
+
|
|
26
|
+
// assume the binary sketches for this test have been generated by datasketches-java code
|
|
27
|
+
// in the subdirectory called "java" in the root directory of this project
|
|
28
|
+
static std::string testBinaryInputPath = std::string(TEST_BINARY_INPUT_PATH) + "../../java/";
|
|
29
|
+
|
|
30
|
+
TEST_CASE("tuple sketch int", "[serde_compat]") {
|
|
31
|
+
const unsigned n_arr[] = {0, 1, 10, 100, 1000, 10000, 100000, 1000000};
|
|
32
|
+
for (const unsigned n: n_arr) {
|
|
33
|
+
std::ifstream is;
|
|
34
|
+
is.exceptions(std::ios::failbit | std::ios::badbit);
|
|
35
|
+
is.open(testBinaryInputPath + "tuple_int_n" + std::to_string(n) + "_java.sk", std::ios::binary);
|
|
36
|
+
const auto sketch = compact_tuple_sketch<int>::deserialize(is);
|
|
37
|
+
REQUIRE(sketch.is_empty() == (n == 0));
|
|
38
|
+
REQUIRE(sketch.is_estimation_mode() == (n > 1000));
|
|
39
|
+
REQUIRE(sketch.get_estimate() == Approx(n).margin(n * 0.03));
|
|
40
|
+
for (const auto& entry: sketch) {
|
|
41
|
+
REQUIRE(entry.first < sketch.get_theta64());
|
|
42
|
+
REQUIRE(entry.second < static_cast<int>(n));
|
|
43
|
+
}
|
|
44
|
+
}
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
} /* namespace datasketches */
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* Licensed to the Apache Software Foundation (ASF) under one
|
|
3
|
+
* or more contributor license agreements. See the NOTICE file
|
|
4
|
+
* distributed with this work for additional information
|
|
5
|
+
* regarding copyright ownership. The ASF licenses this file
|
|
6
|
+
* to you under the Apache License, Version 2.0 (the
|
|
7
|
+
* "License"); you may not use this file except in compliance
|
|
8
|
+
* with the License. You may obtain a copy of the License at
|
|
9
|
+
*
|
|
10
|
+
* http://www.apache.org/licenses/LICENSE-2.0
|
|
11
|
+
*
|
|
12
|
+
* Unless required by applicable law or agreed to in writing,
|
|
13
|
+
* software distributed under the License is distributed on an
|
|
14
|
+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
15
|
+
* KIND, either express or implied. See the License for the
|
|
16
|
+
* specific language governing permissions and limitations
|
|
17
|
+
* under the License.
|
|
18
|
+
*/
|
|
19
|
+
|
|
20
|
+
#include <catch2/catch.hpp>
|
|
21
|
+
#include <fstream>
|
|
22
|
+
#include <tuple_sketch.hpp>
|
|
23
|
+
|
|
24
|
+
namespace datasketches {
|
|
25
|
+
|
|
26
|
+
TEST_CASE("tuple sketch int generate", "[serialize_for_java]") {
|
|
27
|
+
const unsigned n_arr[] = {0, 1, 10, 100, 1000, 10000, 100000, 1000000};
|
|
28
|
+
for (const unsigned n: n_arr) {
|
|
29
|
+
auto sketch = update_tuple_sketch<int>::builder().build();
|
|
30
|
+
for (unsigned i = 0; i < n; ++i) sketch.update(i, i);
|
|
31
|
+
REQUIRE(sketch.is_empty() == (n == 0));
|
|
32
|
+
REQUIRE(sketch.get_estimate() == Approx(n).margin(n * 0.03));
|
|
33
|
+
std::ofstream os("tuple_int_n" + std::to_string(n) + "_cpp.sk", std::ios::binary);
|
|
34
|
+
sketch.compact().serialize(os);
|
|
35
|
+
}
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
} /* namespace datasketches */
|
|
@@ -86,7 +86,7 @@ TEST_CASE("tuple sketch float: empty", "[tuple_sketch]") {
|
|
|
86
86
|
REQUIRE(update_sketch.compact(false).is_ordered());
|
|
87
87
|
}
|
|
88
88
|
|
|
89
|
-
TEST_CASE("tuple sketch: single item", "[
|
|
89
|
+
TEST_CASE("tuple sketch: single item", "[tuple_sketch]") {
|
|
90
90
|
auto update_sketch = update_tuple_sketch<float>::builder().build();
|
|
91
91
|
update_sketch.update(1, 1.0f);
|
|
92
92
|
REQUIRE_FALSE(update_sketch.is_empty());
|
|
@@ -1 +1 @@
|
|
|
1
|
-
|
|
1
|
+
5.0.0
|