datasketches 0.3.2 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +6 -0
- data/NOTICE +1 -1
- data/README.md +0 -2
- data/ext/datasketches/cpc_wrapper.cpp +2 -2
- data/ext/datasketches/kll_wrapper.cpp +0 -10
- data/lib/datasketches/version.rb +1 -1
- data/lib/datasketches.rb +1 -1
- data/vendor/datasketches-cpp/CMakeLists.txt +1 -0
- data/vendor/datasketches-cpp/CODE_OF_CONDUCT.md +3 -0
- data/vendor/datasketches-cpp/CONTRIBUTING.md +50 -0
- data/vendor/datasketches-cpp/Doxyfile +2827 -0
- data/vendor/datasketches-cpp/LICENSE +0 -76
- data/vendor/datasketches-cpp/NOTICE +1 -1
- data/vendor/datasketches-cpp/README.md +1 -3
- data/vendor/datasketches-cpp/common/CMakeLists.txt +12 -11
- data/vendor/datasketches-cpp/common/include/common_defs.hpp +11 -8
- data/vendor/datasketches-cpp/common/include/count_zeros.hpp +0 -2
- data/vendor/datasketches-cpp/common/include/kolmogorov_smirnov.hpp +9 -6
- data/vendor/datasketches-cpp/common/include/optional.hpp +148 -0
- data/vendor/datasketches-cpp/common/include/quantiles_sorted_view.hpp +95 -2
- data/vendor/datasketches-cpp/common/include/quantiles_sorted_view_impl.hpp +1 -1
- data/vendor/datasketches-cpp/common/include/serde.hpp +69 -20
- data/vendor/datasketches-cpp/common/test/CMakeLists.txt +1 -1
- data/vendor/datasketches-cpp/common/test/optional_test.cpp +85 -0
- data/vendor/datasketches-cpp/common/test/test_allocator.hpp +14 -14
- data/vendor/datasketches-cpp/count/include/count_min.hpp +132 -78
- data/vendor/datasketches-cpp/count/include/count_min_impl.hpp +132 -152
- data/vendor/datasketches-cpp/count/test/CMakeLists.txt +11 -12
- data/vendor/datasketches-cpp/count/test/count_min_allocation_test.cpp +61 -61
- data/vendor/datasketches-cpp/count/test/count_min_test.cpp +175 -178
- data/vendor/datasketches-cpp/cpc/include/cpc_common.hpp +14 -20
- data/vendor/datasketches-cpp/cpc/include/cpc_compressor.hpp +7 -4
- data/vendor/datasketches-cpp/cpc/include/cpc_compressor_impl.hpp +17 -17
- data/vendor/datasketches-cpp/cpc/include/cpc_sketch.hpp +40 -40
- data/vendor/datasketches-cpp/cpc/include/cpc_sketch_impl.hpp +13 -10
- data/vendor/datasketches-cpp/cpc/include/cpc_union.hpp +35 -11
- data/vendor/datasketches-cpp/cpc/include/cpc_union_impl.hpp +8 -8
- data/vendor/datasketches-cpp/cpc/include/u32_table.hpp +3 -2
- data/vendor/datasketches-cpp/cpc/include/u32_table_impl.hpp +5 -5
- data/vendor/datasketches-cpp/cpc/test/CMakeLists.txt +20 -7
- data/vendor/datasketches-cpp/cpc/test/cpc_sketch_deserialize_from_java_test.cpp +60 -0
- data/vendor/datasketches-cpp/{python/include/py_object_lt.hpp → cpc/test/cpc_sketch_serialize_for_java.cpp} +15 -14
- data/vendor/datasketches-cpp/cpc/test/cpc_sketch_test.cpp +4 -29
- data/vendor/datasketches-cpp/cpc/test/cpc_union_test.cpp +4 -4
- data/vendor/datasketches-cpp/density/include/density_sketch.hpp +29 -9
- data/vendor/datasketches-cpp/density/include/density_sketch_impl.hpp +1 -1
- data/vendor/datasketches-cpp/density/test/CMakeLists.txt +0 -1
- data/vendor/datasketches-cpp/fi/include/frequent_items_sketch.hpp +21 -9
- data/vendor/datasketches-cpp/fi/include/frequent_items_sketch_impl.hpp +6 -4
- data/vendor/datasketches-cpp/fi/test/CMakeLists.txt +14 -1
- data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_deserialize_from_java_test.cpp +95 -0
- data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_serialize_for_java.cpp +83 -0
- data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_test.cpp +3 -42
- data/vendor/datasketches-cpp/hll/include/CouponList-internal.hpp +2 -2
- data/vendor/datasketches-cpp/hll/include/CouponList.hpp +3 -1
- data/vendor/datasketches-cpp/hll/include/HllArray-internal.hpp +3 -3
- data/vendor/datasketches-cpp/hll/include/HllArray.hpp +5 -3
- data/vendor/datasketches-cpp/hll/include/HllSketch-internal.hpp +4 -4
- data/vendor/datasketches-cpp/hll/include/HllSketchImpl.hpp +3 -1
- data/vendor/datasketches-cpp/hll/include/HllUtil.hpp +0 -12
- data/vendor/datasketches-cpp/hll/include/hll.hpp +70 -57
- data/vendor/datasketches-cpp/hll/test/CMakeLists.txt +14 -1
- data/vendor/datasketches-cpp/hll/test/ToFromByteArrayTest.cpp +0 -68
- data/vendor/datasketches-cpp/hll/test/hll_sketch_deserialize_from_java_test.cpp +69 -0
- data/vendor/datasketches-cpp/hll/test/hll_sketch_serialize_for_java.cpp +52 -0
- data/vendor/datasketches-cpp/kll/include/kll_helper_impl.hpp +2 -2
- data/vendor/datasketches-cpp/kll/include/kll_sketch.hpp +71 -50
- data/vendor/datasketches-cpp/kll/include/kll_sketch_impl.hpp +59 -130
- data/vendor/datasketches-cpp/kll/test/CMakeLists.txt +14 -1
- data/vendor/datasketches-cpp/kll/test/kll_sketch_deserialize_from_java_test.cpp +103 -0
- data/vendor/datasketches-cpp/kll/test/kll_sketch_serialize_for_java.cpp +62 -0
- data/vendor/datasketches-cpp/kll/test/kll_sketch_test.cpp +3 -38
- data/vendor/datasketches-cpp/quantiles/include/quantiles_sketch.hpp +68 -51
- data/vendor/datasketches-cpp/quantiles/include/quantiles_sketch_impl.hpp +62 -132
- data/vendor/datasketches-cpp/quantiles/test/CMakeLists.txt +14 -1
- data/vendor/datasketches-cpp/quantiles/test/quantiles_sketch_deserialize_from_java_test.cpp +84 -0
- data/vendor/datasketches-cpp/quantiles/test/quantiles_sketch_serialize_for_java.cpp +52 -0
- data/vendor/datasketches-cpp/quantiles/test/quantiles_sketch_test.cpp +14 -38
- data/vendor/datasketches-cpp/req/include/req_common.hpp +7 -3
- data/vendor/datasketches-cpp/req/include/req_compactor_impl.hpp +2 -2
- data/vendor/datasketches-cpp/req/include/req_sketch.hpp +97 -23
- data/vendor/datasketches-cpp/req/include/req_sketch_impl.hpp +48 -109
- data/vendor/datasketches-cpp/req/test/CMakeLists.txt +14 -1
- data/vendor/datasketches-cpp/req/test/req_sketch_deserialize_from_java_test.cpp +55 -0
- data/vendor/datasketches-cpp/{tuple/include/array_of_doubles_intersection_impl.hpp → req/test/req_sketch_serialize_for_java.cpp} +12 -7
- data/vendor/datasketches-cpp/req/test/req_sketch_test.cpp +3 -89
- data/vendor/datasketches-cpp/sampling/CMakeLists.txt +4 -0
- data/vendor/datasketches-cpp/sampling/include/ebpps_sample.hpp +210 -0
- data/vendor/datasketches-cpp/sampling/include/ebpps_sample_impl.hpp +535 -0
- data/vendor/datasketches-cpp/sampling/include/ebpps_sketch.hpp +281 -0
- data/vendor/datasketches-cpp/sampling/include/ebpps_sketch_impl.hpp +531 -0
- data/vendor/datasketches-cpp/sampling/include/var_opt_sketch.hpp +69 -26
- data/vendor/datasketches-cpp/sampling/include/var_opt_sketch_impl.hpp +3 -3
- data/vendor/datasketches-cpp/sampling/include/var_opt_union.hpp +10 -11
- data/vendor/datasketches-cpp/sampling/include/var_opt_union_impl.hpp +4 -4
- data/vendor/datasketches-cpp/sampling/test/CMakeLists.txt +55 -8
- data/vendor/datasketches-cpp/sampling/test/ebpps_allocation_test.cpp +96 -0
- data/vendor/datasketches-cpp/sampling/test/ebpps_sample_test.cpp +137 -0
- data/vendor/datasketches-cpp/sampling/test/ebpps_sketch_test.cpp +266 -0
- data/vendor/datasketches-cpp/sampling/test/var_opt_sketch_deserialize_from_java_test.cpp +81 -0
- data/vendor/datasketches-cpp/sampling/test/var_opt_sketch_serialize_for_java.cpp +54 -0
- data/vendor/datasketches-cpp/sampling/test/var_opt_sketch_test.cpp +0 -37
- data/vendor/datasketches-cpp/sampling/test/var_opt_union_deserialize_from_java_test.cpp +50 -0
- data/vendor/datasketches-cpp/sampling/test/var_opt_union_serialize_for_java.cpp +56 -0
- data/vendor/datasketches-cpp/sampling/test/var_opt_union_test.cpp +0 -18
- data/vendor/datasketches-cpp/theta/include/bit_packing.hpp +2608 -2608
- data/vendor/datasketches-cpp/theta/include/bounds_on_ratios_in_sampled_sets.hpp +1 -0
- data/vendor/datasketches-cpp/theta/include/bounds_on_ratios_in_theta_sketched_sets.hpp +7 -6
- data/vendor/datasketches-cpp/theta/include/theta_a_not_b.hpp +20 -5
- data/vendor/datasketches-cpp/theta/include/theta_constants.hpp +10 -4
- data/vendor/datasketches-cpp/theta/include/theta_helpers.hpp +1 -1
- data/vendor/datasketches-cpp/theta/include/theta_intersection.hpp +13 -5
- data/vendor/datasketches-cpp/theta/include/theta_intersection_base_impl.hpp +5 -5
- data/vendor/datasketches-cpp/theta/include/theta_intersection_impl.hpp +3 -3
- data/vendor/datasketches-cpp/theta/include/theta_jaccard_similarity.hpp +2 -1
- data/vendor/datasketches-cpp/theta/include/theta_jaccard_similarity_base.hpp +1 -0
- data/vendor/datasketches-cpp/theta/include/theta_set_difference_base_impl.hpp +1 -1
- data/vendor/datasketches-cpp/theta/include/theta_sketch.hpp +126 -27
- data/vendor/datasketches-cpp/theta/include/theta_sketch_impl.hpp +8 -8
- data/vendor/datasketches-cpp/theta/include/theta_union.hpp +17 -10
- data/vendor/datasketches-cpp/theta/include/theta_union_base_impl.hpp +1 -1
- data/vendor/datasketches-cpp/theta/include/theta_union_impl.hpp +3 -3
- data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base.hpp +5 -2
- data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base_impl.hpp +11 -1
- data/vendor/datasketches-cpp/theta/test/CMakeLists.txt +14 -1
- data/vendor/datasketches-cpp/theta/test/theta_sketch_deserialize_from_java_test.cpp +57 -0
- data/vendor/datasketches-cpp/theta/test/theta_sketch_serialize_for_java.cpp +61 -0
- data/vendor/datasketches-cpp/theta/test/theta_sketch_test.cpp +0 -188
- data/vendor/datasketches-cpp/tuple/CMakeLists.txt +8 -7
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_sketch.hpp +19 -144
- data/vendor/datasketches-cpp/tuple/include/{array_of_doubles_a_not_b.hpp → array_tuple_a_not_b.hpp} +24 -16
- data/vendor/datasketches-cpp/tuple/include/{array_of_doubles_a_not_b_impl.hpp → array_tuple_a_not_b_impl.hpp} +4 -4
- data/vendor/datasketches-cpp/tuple/include/array_tuple_intersection.hpp +65 -0
- data/vendor/datasketches-cpp/{python/include/py_object_ostream.hpp → tuple/include/array_tuple_intersection_impl.hpp} +7 -24
- data/vendor/datasketches-cpp/tuple/include/array_tuple_sketch.hpp +237 -0
- data/vendor/datasketches-cpp/tuple/include/{array_of_doubles_sketch_impl.hpp → array_tuple_sketch_impl.hpp} +40 -41
- data/vendor/datasketches-cpp/tuple/include/array_tuple_union.hpp +81 -0
- data/vendor/datasketches-cpp/tuple/include/array_tuple_union_impl.hpp +43 -0
- data/vendor/datasketches-cpp/tuple/include/tuple_a_not_b.hpp +11 -2
- data/vendor/datasketches-cpp/tuple/include/tuple_intersection.hpp +17 -10
- data/vendor/datasketches-cpp/tuple/include/tuple_jaccard_similarity.hpp +2 -1
- data/vendor/datasketches-cpp/tuple/include/tuple_sketch.hpp +95 -32
- data/vendor/datasketches-cpp/tuple/include/tuple_union.hpp +19 -11
- data/vendor/datasketches-cpp/tuple/test/CMakeLists.txt +16 -1
- data/vendor/datasketches-cpp/tuple/test/aod_sketch_deserialize_from_java_test.cpp +76 -0
- data/vendor/datasketches-cpp/tuple/test/aod_sketch_serialize_for_java.cpp +62 -0
- data/vendor/datasketches-cpp/tuple/test/array_of_doubles_sketch_test.cpp +5 -129
- data/vendor/datasketches-cpp/tuple/test/engagement_test.cpp +85 -89
- data/vendor/datasketches-cpp/tuple/test/tuple_jaccard_similarity_test.cpp +3 -1
- data/vendor/datasketches-cpp/tuple/test/tuple_sketch_deserialize_from_java_test.cpp +47 -0
- data/vendor/datasketches-cpp/tuple/test/tuple_sketch_serialize_for_java.cpp +38 -0
- data/vendor/datasketches-cpp/tuple/test/tuple_sketch_test.cpp +1 -1
- data/vendor/datasketches-cpp/version.cfg.in +1 -1
- metadata +47 -93
- data/vendor/datasketches-cpp/MANIFEST.in +0 -39
- data/vendor/datasketches-cpp/fi/test/items_sketch_string_from_java.sk +0 -0
- data/vendor/datasketches-cpp/fi/test/items_sketch_string_utf8_from_java.sk +0 -0
- data/vendor/datasketches-cpp/fi/test/longs_sketch_from_java.sk +0 -0
- data/vendor/datasketches-cpp/hll/test/array6_from_java.sk +0 -0
- data/vendor/datasketches-cpp/hll/test/compact_array4_from_java.sk +0 -0
- data/vendor/datasketches-cpp/hll/test/compact_set_from_java.sk +0 -0
- data/vendor/datasketches-cpp/hll/test/list_from_java.sk +0 -0
- data/vendor/datasketches-cpp/hll/test/updatable_array4_from_java.sk +0 -0
- data/vendor/datasketches-cpp/hll/test/updatable_set_from_java.sk +0 -0
- data/vendor/datasketches-cpp/kll/test/kll_sketch_from_java.sk +0 -0
- data/vendor/datasketches-cpp/pyproject.toml +0 -23
- data/vendor/datasketches-cpp/python/CMakeLists.txt +0 -87
- data/vendor/datasketches-cpp/python/README.md +0 -85
- data/vendor/datasketches-cpp/python/datasketches/DensityWrapper.py +0 -87
- data/vendor/datasketches-cpp/python/datasketches/KernelFunction.py +0 -35
- data/vendor/datasketches-cpp/python/datasketches/PySerDe.py +0 -110
- data/vendor/datasketches-cpp/python/datasketches/TuplePolicy.py +0 -77
- data/vendor/datasketches-cpp/python/datasketches/TupleWrapper.py +0 -205
- data/vendor/datasketches-cpp/python/datasketches/__init__.py +0 -38
- data/vendor/datasketches-cpp/python/include/kernel_function.hpp +0 -98
- data/vendor/datasketches-cpp/python/include/py_serde.hpp +0 -113
- data/vendor/datasketches-cpp/python/include/quantile_conditional.hpp +0 -104
- data/vendor/datasketches-cpp/python/include/tuple_policy.hpp +0 -136
- data/vendor/datasketches-cpp/python/jupyter/CPCSketch.ipynb +0 -345
- data/vendor/datasketches-cpp/python/jupyter/FrequentItemsSketch.ipynb +0 -354
- data/vendor/datasketches-cpp/python/jupyter/HLLSketch.ipynb +0 -346
- data/vendor/datasketches-cpp/python/jupyter/KLLSketch.ipynb +0 -463
- data/vendor/datasketches-cpp/python/jupyter/ThetaSketchNotebook.ipynb +0 -403
- data/vendor/datasketches-cpp/python/pybind11Path.cmd +0 -21
- data/vendor/datasketches-cpp/python/src/__init__.py +0 -18
- data/vendor/datasketches-cpp/python/src/count_wrapper.cpp +0 -101
- data/vendor/datasketches-cpp/python/src/cpc_wrapper.cpp +0 -76
- data/vendor/datasketches-cpp/python/src/datasketches.cpp +0 -58
- data/vendor/datasketches-cpp/python/src/density_wrapper.cpp +0 -95
- data/vendor/datasketches-cpp/python/src/fi_wrapper.cpp +0 -182
- data/vendor/datasketches-cpp/python/src/hll_wrapper.cpp +0 -126
- data/vendor/datasketches-cpp/python/src/kll_wrapper.cpp +0 -158
- data/vendor/datasketches-cpp/python/src/ks_wrapper.cpp +0 -68
- data/vendor/datasketches-cpp/python/src/py_serde.cpp +0 -112
- data/vendor/datasketches-cpp/python/src/quantiles_wrapper.cpp +0 -155
- data/vendor/datasketches-cpp/python/src/req_wrapper.cpp +0 -154
- data/vendor/datasketches-cpp/python/src/theta_wrapper.cpp +0 -166
- data/vendor/datasketches-cpp/python/src/tuple_wrapper.cpp +0 -215
- data/vendor/datasketches-cpp/python/src/vector_of_kll.cpp +0 -490
- data/vendor/datasketches-cpp/python/src/vo_wrapper.cpp +0 -173
- data/vendor/datasketches-cpp/python/tests/__init__.py +0 -16
- data/vendor/datasketches-cpp/python/tests/count_min_test.py +0 -86
- data/vendor/datasketches-cpp/python/tests/cpc_test.py +0 -64
- data/vendor/datasketches-cpp/python/tests/density_test.py +0 -93
- data/vendor/datasketches-cpp/python/tests/fi_test.py +0 -149
- data/vendor/datasketches-cpp/python/tests/hll_test.py +0 -129
- data/vendor/datasketches-cpp/python/tests/kll_test.py +0 -159
- data/vendor/datasketches-cpp/python/tests/quantiles_test.py +0 -160
- data/vendor/datasketches-cpp/python/tests/req_test.py +0 -159
- data/vendor/datasketches-cpp/python/tests/theta_test.py +0 -148
- data/vendor/datasketches-cpp/python/tests/tuple_test.py +0 -206
- data/vendor/datasketches-cpp/python/tests/vector_of_kll_test.py +0 -148
- data/vendor/datasketches-cpp/python/tests/vo_test.py +0 -132
- data/vendor/datasketches-cpp/req/test/req_float_empty_from_java.sk +0 -0
- data/vendor/datasketches-cpp/req/test/req_float_estimation_from_java.sk +0 -0
- data/vendor/datasketches-cpp/req/test/req_float_exact_from_java.sk +0 -0
- data/vendor/datasketches-cpp/req/test/req_float_raw_items_from_java.sk +0 -0
- data/vendor/datasketches-cpp/req/test/req_float_single_item_from_java.sk +0 -0
- data/vendor/datasketches-cpp/sampling/test/binaries_from_java.txt +0 -67
- data/vendor/datasketches-cpp/sampling/test/varopt_sketch_long_sampling.sk +0 -0
- data/vendor/datasketches-cpp/sampling/test/varopt_sketch_string_exact.sk +0 -0
- data/vendor/datasketches-cpp/sampling/test/varopt_union_double_sampling.sk +0 -0
- data/vendor/datasketches-cpp/setup.py +0 -110
- data/vendor/datasketches-cpp/theta/test/theta_compact_empty_from_java.sk +0 -0
- data/vendor/datasketches-cpp/theta/test/theta_compact_estimation_from_java.sk +0 -0
- data/vendor/datasketches-cpp/theta/test/theta_compact_exact_from_java.sk +0 -0
- data/vendor/datasketches-cpp/theta/test/theta_compact_single_item_from_java.sk +0 -0
- data/vendor/datasketches-cpp/tox.ini +0 -26
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_intersection.hpp +0 -52
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_union.hpp +0 -81
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_union_impl.hpp +0 -43
- data/vendor/datasketches-cpp/tuple/test/aod_1_compact_empty_from_java.sk +0 -1
- data/vendor/datasketches-cpp/tuple/test/aod_1_compact_estimation_from_java.sk +0 -0
- data/vendor/datasketches-cpp/tuple/test/aod_1_compact_non_empty_no_entries_from_java.sk +0 -0
- data/vendor/datasketches-cpp/tuple/test/aod_2_compact_exact_from_java.sk +0 -0
- data/vendor/datasketches-cpp/tuple/test/aod_3_compact_empty_from_java.sk +0 -1
|
@@ -26,6 +26,16 @@
|
|
|
26
26
|
|
|
27
27
|
namespace datasketches {
|
|
28
28
|
|
|
29
|
+
// forward declaration
|
|
30
|
+
template<typename A> class theta_union_alloc;
|
|
31
|
+
|
|
32
|
+
// alias with default allocator for convenience
|
|
33
|
+
using theta_union = theta_union_alloc<std::allocator<uint64_t>>;
|
|
34
|
+
|
|
35
|
+
/**
|
|
36
|
+
* Theta Union.
|
|
37
|
+
* Computes union of Theta sketches. There is no constructor. Use builder instead.
|
|
38
|
+
*/
|
|
29
39
|
template<typename Allocator = std::allocator<uint64_t>>
|
|
30
40
|
class theta_union_alloc {
|
|
31
41
|
public:
|
|
@@ -35,6 +45,7 @@ public:
|
|
|
35
45
|
using CompactSketch = compact_theta_sketch_alloc<Allocator>;
|
|
36
46
|
using resize_factor = theta_constants::resize_factor;
|
|
37
47
|
|
|
48
|
+
// there is no payload in Theta sketch entry
|
|
38
49
|
struct nop_policy {
|
|
39
50
|
void operator()(uint64_t internal_entry, uint64_t incoming_entry) const {
|
|
40
51
|
unused(internal_entry);
|
|
@@ -47,22 +58,20 @@ public:
|
|
|
47
58
|
class builder;
|
|
48
59
|
|
|
49
60
|
/**
|
|
50
|
-
*
|
|
61
|
+
* Update the union with a given sketch
|
|
51
62
|
* @param sketch to update the union with
|
|
52
63
|
*/
|
|
53
64
|
template<typename FwdSketch>
|
|
54
65
|
void update(FwdSketch&& sketch);
|
|
55
66
|
|
|
56
67
|
/**
|
|
57
|
-
*
|
|
58
|
-
* @param ordered optional flag to specify if ordered sketch should be produced
|
|
68
|
+
* Produces a copy of the current state of the union as a compact sketch.
|
|
69
|
+
* @param ordered optional flag to specify if an ordered sketch should be produced
|
|
59
70
|
* @return the result of the union
|
|
60
71
|
*/
|
|
61
72
|
CompactSketch get_result(bool ordered = true) const;
|
|
62
73
|
|
|
63
|
-
|
|
64
|
-
* Reset the union to the initial empty state
|
|
65
|
-
*/
|
|
74
|
+
/// Reset the union to the initial empty state
|
|
66
75
|
void reset();
|
|
67
76
|
|
|
68
77
|
private:
|
|
@@ -72,21 +81,19 @@ private:
|
|
|
72
81
|
theta_union_alloc(uint8_t lg_cur_size, uint8_t lg_nom_size, resize_factor rf, float p, uint64_t theta, uint64_t seed, const Allocator& allocator);
|
|
73
82
|
};
|
|
74
83
|
|
|
84
|
+
/// Theta union builder
|
|
75
85
|
template<typename A>
|
|
76
86
|
class theta_union_alloc<A>::builder: public theta_base_builder<builder, A> {
|
|
77
87
|
public:
|
|
78
88
|
builder(const A& allocator = A());
|
|
79
89
|
|
|
80
90
|
/**
|
|
81
|
-
*
|
|
91
|
+
* Create an instance of the union with predefined parameters.
|
|
82
92
|
* @return an instance of the union
|
|
83
93
|
*/
|
|
84
94
|
theta_union_alloc<A> build() const;
|
|
85
95
|
};
|
|
86
96
|
|
|
87
|
-
// alias with default allocator for convenience
|
|
88
|
-
using theta_union = theta_union_alloc<std::allocator<uint64_t>>;
|
|
89
|
-
|
|
90
97
|
} /* namespace datasketches */
|
|
91
98
|
|
|
92
99
|
#include "theta_union_impl.hpp"
|
|
@@ -42,7 +42,7 @@ void theta_union_base<EN, EK, P, S, CS, A>::update(SS&& sketch) {
|
|
|
42
42
|
if (sketch.get_seed_hash() != compute_seed_hash(table_.seed_)) throw std::invalid_argument("seed hash mismatch");
|
|
43
43
|
table_.is_empty_ = false;
|
|
44
44
|
union_theta_ = std::min(union_theta_, sketch.get_theta64());
|
|
45
|
-
for (auto
|
|
45
|
+
for (auto&& entry: sketch) {
|
|
46
46
|
const uint64_t hash = EK()(entry);
|
|
47
47
|
if (hash < union_theta_ && hash < table_.theta_) {
|
|
48
48
|
auto result = table_.find(hash);
|
|
@@ -28,9 +28,9 @@ state_(lg_cur_size, lg_nom_size, rf, p, theta, seed, nop_policy(), allocator)
|
|
|
28
28
|
{}
|
|
29
29
|
|
|
30
30
|
template<typename A>
|
|
31
|
-
template<typename
|
|
32
|
-
void theta_union_alloc<A>::update(
|
|
33
|
-
state_.update(std::forward<
|
|
31
|
+
template<typename FwdSketch>
|
|
32
|
+
void theta_union_alloc<A>::update(FwdSketch&& sketch) {
|
|
33
|
+
state_.update(std::forward<FwdSketch>(sketch));
|
|
34
34
|
}
|
|
35
35
|
|
|
36
36
|
template<typename A>
|
|
@@ -91,13 +91,14 @@ struct theta_update_sketch_base {
|
|
|
91
91
|
static void consolidate_non_empty(Entry* entries, size_t size, size_t num);
|
|
92
92
|
};
|
|
93
93
|
|
|
94
|
-
// builder
|
|
95
94
|
|
|
95
|
+
/// Theta base builder
|
|
96
96
|
template<typename Derived, typename Allocator>
|
|
97
97
|
class theta_base_builder {
|
|
98
98
|
public:
|
|
99
99
|
/**
|
|
100
100
|
* Creates and instance of the builder with default parameters.
|
|
101
|
+
* @param allocator instance of an Allocator to pass to created sketches
|
|
101
102
|
*/
|
|
102
103
|
theta_base_builder(const Allocator& allocator);
|
|
103
104
|
|
|
@@ -198,7 +199,8 @@ public:
|
|
|
198
199
|
theta_iterator operator++(int);
|
|
199
200
|
bool operator==(const theta_iterator& other) const;
|
|
200
201
|
bool operator!=(const theta_iterator& other) const;
|
|
201
|
-
|
|
202
|
+
reference operator*() const;
|
|
203
|
+
pointer operator->() const;
|
|
202
204
|
|
|
203
205
|
private:
|
|
204
206
|
Entry* entries_;
|
|
@@ -221,6 +223,7 @@ public:
|
|
|
221
223
|
bool operator==(const theta_const_iterator& other) const;
|
|
222
224
|
bool operator!=(const theta_const_iterator& other) const;
|
|
223
225
|
reference operator*() const;
|
|
226
|
+
pointer operator->() const;
|
|
224
227
|
|
|
225
228
|
private:
|
|
226
229
|
const Entry* entries_;
|
|
@@ -386,6 +386,11 @@ auto theta_iterator<Entry, ExtractKey>::operator*() const -> reference {
|
|
|
386
386
|
return entries_[index_];
|
|
387
387
|
}
|
|
388
388
|
|
|
389
|
+
template<typename Entry, typename ExtractKey>
|
|
390
|
+
auto theta_iterator<Entry, ExtractKey>::operator->() const -> pointer {
|
|
391
|
+
return entries_ + index_;
|
|
392
|
+
}
|
|
393
|
+
|
|
389
394
|
// const iterator
|
|
390
395
|
|
|
391
396
|
template<typename Entry, typename ExtractKey>
|
|
@@ -419,10 +424,15 @@ bool theta_const_iterator<Entry, ExtractKey>::operator==(const theta_const_itera
|
|
|
419
424
|
}
|
|
420
425
|
|
|
421
426
|
template<typename Entry, typename ExtractKey>
|
|
422
|
-
auto theta_const_iterator<Entry, ExtractKey>::operator*() const ->
|
|
427
|
+
auto theta_const_iterator<Entry, ExtractKey>::operator*() const -> reference {
|
|
423
428
|
return entries_[index_];
|
|
424
429
|
}
|
|
425
430
|
|
|
431
|
+
template<typename Entry, typename ExtractKey>
|
|
432
|
+
auto theta_const_iterator<Entry, ExtractKey>::operator->() const -> pointer {
|
|
433
|
+
return entries_ + index_;
|
|
434
|
+
}
|
|
435
|
+
|
|
426
436
|
} /* namespace datasketches */
|
|
427
437
|
|
|
428
438
|
#endif
|
|
@@ -20,7 +20,6 @@ add_executable(theta_test)
|
|
|
20
20
|
target_link_libraries(theta_test theta common_test_lib)
|
|
21
21
|
|
|
22
22
|
set_target_properties(theta_test PROPERTIES
|
|
23
|
-
CXX_STANDARD 11
|
|
24
23
|
CXX_STANDARD_REQUIRED YES
|
|
25
24
|
)
|
|
26
25
|
|
|
@@ -46,3 +45,17 @@ target_sources(theta_test
|
|
|
46
45
|
theta_setop_test.cpp
|
|
47
46
|
bit_packing_test.cpp
|
|
48
47
|
)
|
|
48
|
+
|
|
49
|
+
if (SERDE_COMPAT)
|
|
50
|
+
target_sources(theta_test
|
|
51
|
+
PRIVATE
|
|
52
|
+
theta_sketch_deserialize_from_java_test.cpp
|
|
53
|
+
)
|
|
54
|
+
endif()
|
|
55
|
+
|
|
56
|
+
if (GENERATE)
|
|
57
|
+
target_sources(theta_test
|
|
58
|
+
PRIVATE
|
|
59
|
+
theta_sketch_serialize_for_java.cpp
|
|
60
|
+
)
|
|
61
|
+
endif()
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* Licensed to the Apache Software Foundation (ASF) under one
|
|
3
|
+
* or more contributor license agreements. See the NOTICE file
|
|
4
|
+
* distributed with this work for additional information
|
|
5
|
+
* regarding copyright ownership. The ASF licenses this file
|
|
6
|
+
* to you under the Apache License, Version 2.0 (the
|
|
7
|
+
* "License"); you may not use this file except in compliance
|
|
8
|
+
* with the License. You may obtain a copy of the License at
|
|
9
|
+
*
|
|
10
|
+
* http://www.apache.org/licenses/LICENSE-2.0
|
|
11
|
+
*
|
|
12
|
+
* Unless required by applicable law or agreed to in writing,
|
|
13
|
+
* software distributed under the License is distributed on an
|
|
14
|
+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
15
|
+
* KIND, either express or implied. See the License for the
|
|
16
|
+
* specific language governing permissions and limitations
|
|
17
|
+
* under the License.
|
|
18
|
+
*/
|
|
19
|
+
|
|
20
|
+
#include <catch2/catch.hpp>
|
|
21
|
+
#include <fstream>
|
|
22
|
+
#include <theta_sketch.hpp>
|
|
23
|
+
|
|
24
|
+
namespace datasketches {
|
|
25
|
+
|
|
26
|
+
// assume the binary sketches for this test have been generated by datasketches-java code
|
|
27
|
+
// in the subdirectory called "java" in the root directory of this project
|
|
28
|
+
static std::string testBinaryInputPath = std::string(TEST_BINARY_INPUT_PATH) + "../../java/";
|
|
29
|
+
|
|
30
|
+
TEST_CASE("theta sketch", "[serde_compat]") {
|
|
31
|
+
const unsigned n_arr[] = {0, 1, 10, 100, 1000, 10000, 100000, 1000000};
|
|
32
|
+
for (const unsigned n: n_arr) {
|
|
33
|
+
std::ifstream is;
|
|
34
|
+
is.exceptions(std::ios::failbit | std::ios::badbit);
|
|
35
|
+
is.open(testBinaryInputPath + "theta_n" + std::to_string(n) + "_java.sk", std::ios::binary);
|
|
36
|
+
const auto sketch = compact_theta_sketch::deserialize(is);
|
|
37
|
+
REQUIRE(sketch.is_empty() == (n == 0));
|
|
38
|
+
REQUIRE(sketch.is_estimation_mode() == (n > 1000));
|
|
39
|
+
REQUIRE(sketch.get_estimate() == Approx(n).margin(n * 0.03));
|
|
40
|
+
for (const auto hash: sketch) {
|
|
41
|
+
REQUIRE(hash < sketch.get_theta64());
|
|
42
|
+
}
|
|
43
|
+
REQUIRE(sketch.is_ordered());
|
|
44
|
+
REQUIRE(std::is_sorted(sketch.begin(), sketch.end()));
|
|
45
|
+
}
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
TEST_CASE("theta sketch non-empty no entries", "[serde_compat]") {
|
|
49
|
+
std::ifstream is;
|
|
50
|
+
is.exceptions(std::ios::failbit | std::ios::badbit);
|
|
51
|
+
is.open(testBinaryInputPath + "theta_non_empty_no_entries_java.sk", std::ios::binary);
|
|
52
|
+
const auto sketch = compact_theta_sketch::deserialize(is);
|
|
53
|
+
REQUIRE_FALSE(sketch.is_empty());
|
|
54
|
+
REQUIRE(sketch.get_num_retained() == 0);
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
} /* namespace datasketches */
|
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* Licensed to the Apache Software Foundation (ASF) under one
|
|
3
|
+
* or more contributor license agreements. See the NOTICE file
|
|
4
|
+
* distributed with this work for additional information
|
|
5
|
+
* regarding copyright ownership. The ASF licenses this file
|
|
6
|
+
* to you under the Apache License, Version 2.0 (the
|
|
7
|
+
* "License"); you may not use this file except in compliance
|
|
8
|
+
* with the License. You may obtain a copy of the License at
|
|
9
|
+
*
|
|
10
|
+
* http://www.apache.org/licenses/LICENSE-2.0
|
|
11
|
+
*
|
|
12
|
+
* Unless required by applicable law or agreed to in writing,
|
|
13
|
+
* software distributed under the License is distributed on an
|
|
14
|
+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
15
|
+
* KIND, either express or implied. See the License for the
|
|
16
|
+
* specific language governing permissions and limitations
|
|
17
|
+
* under the License.
|
|
18
|
+
*/
|
|
19
|
+
|
|
20
|
+
#include <catch2/catch.hpp>
|
|
21
|
+
#include <fstream>
|
|
22
|
+
#include <theta_sketch.hpp>
|
|
23
|
+
|
|
24
|
+
namespace datasketches {
|
|
25
|
+
|
|
26
|
+
TEST_CASE("theta sketch generate", "[serialize_for_java]") {
|
|
27
|
+
const unsigned n_arr[] = {0, 1, 10, 100, 1000, 10000, 100000, 1000000};
|
|
28
|
+
for (const unsigned n: n_arr) {
|
|
29
|
+
auto sketch = update_theta_sketch::builder().build();
|
|
30
|
+
for (unsigned i = 0; i < n; ++i) sketch.update(i);
|
|
31
|
+
REQUIRE(sketch.is_empty() == (n == 0));
|
|
32
|
+
REQUIRE(sketch.get_estimate() == Approx(n).margin(n * 0.03));
|
|
33
|
+
std::ofstream os("theta_n" + std::to_string(n) + "_cpp.sk", std::ios::binary);
|
|
34
|
+
sketch.compact().serialize(os);
|
|
35
|
+
}
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
TEST_CASE("theta sketch generate compressed", "[serialize_for_java]") {
|
|
39
|
+
const unsigned n_arr[] = {10, 100, 1000, 10000, 100000, 1000000};
|
|
40
|
+
for (const unsigned n: n_arr) {
|
|
41
|
+
auto sketch = update_theta_sketch::builder().build();
|
|
42
|
+
for (unsigned i = 0; i < n; ++i) sketch.update(i);
|
|
43
|
+
REQUIRE_FALSE(sketch.is_empty());
|
|
44
|
+
REQUIRE(sketch.get_estimate() == Approx(n).margin(n * 0.03));
|
|
45
|
+
std::ofstream os("theta_compressed_n" + std::to_string(n) + "_cpp.sk", std::ios::binary);
|
|
46
|
+
sketch.compact().serialize(os);
|
|
47
|
+
}
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
TEST_CASE("theta sketch generate non-empty no entries", "[serialize_for_java]") {
|
|
51
|
+
auto sketch = update_theta_sketch::builder().set_p(0.01).build();
|
|
52
|
+
// here we rely on the fact that hash of 1 happens to be greater than 0.01 (when normalized)
|
|
53
|
+
// and therefore gets rejected
|
|
54
|
+
sketch.update(1);
|
|
55
|
+
REQUIRE_FALSE(sketch.is_empty());
|
|
56
|
+
REQUIRE(sketch.get_num_retained() == 0);
|
|
57
|
+
std::ofstream os("theta_non_empty_no_entries_cpp.sk", std::ios::binary);
|
|
58
|
+
sketch.compact().serialize(os);
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
} /* namespace datasketches */
|
|
@@ -167,20 +167,6 @@ TEST_CASE("theta sketch: estimation", "[theta_sketch]") {
|
|
|
167
167
|
REQUIRE(compact_sketch.get_upper_bound(1) > n);
|
|
168
168
|
}
|
|
169
169
|
|
|
170
|
-
TEST_CASE("theta sketch: deserialize compact empty from java", "[theta_sketch]") {
|
|
171
|
-
std::ifstream is;
|
|
172
|
-
is.exceptions(std::ios::failbit | std::ios::badbit);
|
|
173
|
-
is.open(inputPath + "theta_compact_empty_from_java.sk", std::ios::binary);
|
|
174
|
-
auto sketch = compact_theta_sketch::deserialize(is);
|
|
175
|
-
REQUIRE(sketch.is_empty());
|
|
176
|
-
REQUIRE_FALSE(sketch.is_estimation_mode());
|
|
177
|
-
REQUIRE(sketch.get_num_retained() == 0);
|
|
178
|
-
REQUIRE(sketch.get_theta() == 1.0);
|
|
179
|
-
REQUIRE(sketch.get_estimate() == 0.0);
|
|
180
|
-
REQUIRE(sketch.get_lower_bound(1) == 0.0);
|
|
181
|
-
REQUIRE(sketch.get_upper_bound(1) == 0.0);
|
|
182
|
-
}
|
|
183
|
-
|
|
184
170
|
TEST_CASE("theta sketch: deserialize compact v1 empty from java", "[theta_sketch]") {
|
|
185
171
|
std::ifstream is;
|
|
186
172
|
is.exceptions(std::ios::failbit | std::ios::badbit);
|
|
@@ -209,88 +195,6 @@ TEST_CASE("theta sketch: deserialize compact v2 empty from java", "[theta_sketch
|
|
|
209
195
|
REQUIRE(sketch.get_upper_bound(1) == 0.0);
|
|
210
196
|
}
|
|
211
197
|
|
|
212
|
-
TEST_CASE("theta sketch: deserialize single item from java", "[theta_sketch]") {
|
|
213
|
-
std::ifstream is;
|
|
214
|
-
is.exceptions(std::ios::failbit | std::ios::badbit);
|
|
215
|
-
is.open(inputPath + "theta_compact_single_item_from_java.sk", std::ios::binary);
|
|
216
|
-
auto sketch = compact_theta_sketch::deserialize(is);
|
|
217
|
-
REQUIRE_FALSE(sketch.is_empty());
|
|
218
|
-
REQUIRE_FALSE(sketch.is_estimation_mode());
|
|
219
|
-
REQUIRE(sketch.get_num_retained() == 1);
|
|
220
|
-
REQUIRE(sketch.get_theta() == 1.0);
|
|
221
|
-
REQUIRE(sketch.get_estimate() == 1.0);
|
|
222
|
-
REQUIRE(sketch.get_lower_bound(1) == 1.0);
|
|
223
|
-
REQUIRE(sketch.get_upper_bound(1) == 1.0);
|
|
224
|
-
}
|
|
225
|
-
|
|
226
|
-
TEST_CASE("theta sketch: deserialize compact exact from java", "[theta_sketch]") {
|
|
227
|
-
std::ifstream is;
|
|
228
|
-
is.exceptions(std::ios::failbit | std::ios::badbit);
|
|
229
|
-
is.open(inputPath + "theta_compact_exact_from_java.sk", std::ios::binary);
|
|
230
|
-
auto sketch = compact_theta_sketch::deserialize(is);
|
|
231
|
-
REQUIRE_FALSE(sketch.is_empty());
|
|
232
|
-
REQUIRE_FALSE(sketch.is_estimation_mode());
|
|
233
|
-
REQUIRE(sketch.is_ordered());
|
|
234
|
-
REQUIRE(sketch.get_num_retained() == 100);
|
|
235
|
-
|
|
236
|
-
// the same construction process in Java must have produced exactly the same sketch
|
|
237
|
-
auto update_sketch = update_theta_sketch::builder().build();
|
|
238
|
-
const int n = 100;
|
|
239
|
-
for (int i = 0; i < n; i++) update_sketch.update(i);
|
|
240
|
-
REQUIRE(sketch.get_num_retained() == update_sketch.get_num_retained());
|
|
241
|
-
REQUIRE(sketch.get_theta() == Approx(update_sketch.get_theta()).margin(1e-10));
|
|
242
|
-
REQUIRE(sketch.get_estimate() == Approx(update_sketch.get_estimate()).margin(1e-10));
|
|
243
|
-
REQUIRE(sketch.get_lower_bound(1) == Approx(update_sketch.get_lower_bound(1)).margin(1e-10));
|
|
244
|
-
REQUIRE(sketch.get_upper_bound(1) == Approx(update_sketch.get_upper_bound(1)).margin(1e-10));
|
|
245
|
-
REQUIRE(sketch.get_lower_bound(2) == Approx(update_sketch.get_lower_bound(2)).margin(1e-10));
|
|
246
|
-
REQUIRE(sketch.get_upper_bound(2) == Approx(update_sketch.get_upper_bound(2)).margin(1e-10));
|
|
247
|
-
REQUIRE(sketch.get_lower_bound(3) == Approx(update_sketch.get_lower_bound(3)).margin(1e-10));
|
|
248
|
-
REQUIRE(sketch.get_upper_bound(3) == Approx(update_sketch.get_upper_bound(3)).margin(1e-10));
|
|
249
|
-
compact_theta_sketch compact_sketch = update_sketch.compact();
|
|
250
|
-
// the sketches are ordered, so the iteration sequence must match exactly
|
|
251
|
-
auto iter = sketch.begin();
|
|
252
|
-
for (const auto& key: compact_sketch) {
|
|
253
|
-
REQUIRE(*iter == key);
|
|
254
|
-
++iter;
|
|
255
|
-
}
|
|
256
|
-
}
|
|
257
|
-
|
|
258
|
-
TEST_CASE("theta sketch: deserialize compact estimation from java", "[theta_sketch]") {
|
|
259
|
-
std::ifstream is;
|
|
260
|
-
is.exceptions(std::ios::failbit | std::ios::badbit);
|
|
261
|
-
is.open(inputPath + "theta_compact_estimation_from_java.sk", std::ios::binary);
|
|
262
|
-
auto sketch = compact_theta_sketch::deserialize(is);
|
|
263
|
-
REQUIRE_FALSE(sketch.is_empty());
|
|
264
|
-
REQUIRE(sketch.is_estimation_mode());
|
|
265
|
-
REQUIRE(sketch.is_ordered());
|
|
266
|
-
REQUIRE(sketch.get_num_retained() == 4342);
|
|
267
|
-
REQUIRE(sketch.get_theta() == Approx(0.531700444213199).margin(1e-10));
|
|
268
|
-
REQUIRE(sketch.get_estimate() == Approx(8166.25234614053).margin(1e-10));
|
|
269
|
-
REQUIRE(sketch.get_lower_bound(2) == Approx(7996.956955317471).margin(1e-10));
|
|
270
|
-
REQUIRE(sketch.get_upper_bound(2) == Approx(8339.090301078124).margin(1e-10));
|
|
271
|
-
|
|
272
|
-
// the same construction process in Java must have produced exactly the same sketch
|
|
273
|
-
update_theta_sketch update_sketch = update_theta_sketch::builder().build();
|
|
274
|
-
const int n = 8192;
|
|
275
|
-
for (int i = 0; i < n; i++) update_sketch.update(i);
|
|
276
|
-
REQUIRE(sketch.get_num_retained() == update_sketch.get_num_retained());
|
|
277
|
-
REQUIRE(sketch.get_theta() == Approx(update_sketch.get_theta()).margin(1e-10));
|
|
278
|
-
REQUIRE(sketch.get_estimate() == Approx(update_sketch.get_estimate()).margin(1e-10));
|
|
279
|
-
REQUIRE(sketch.get_lower_bound(1) == Approx(update_sketch.get_lower_bound(1)).margin(1e-10));
|
|
280
|
-
REQUIRE(sketch.get_upper_bound(1) == Approx(update_sketch.get_upper_bound(1)).margin(1e-10));
|
|
281
|
-
REQUIRE(sketch.get_lower_bound(2) == Approx(update_sketch.get_lower_bound(2)).margin(1e-10));
|
|
282
|
-
REQUIRE(sketch.get_upper_bound(2) == Approx(update_sketch.get_upper_bound(2)).margin(1e-10));
|
|
283
|
-
REQUIRE(sketch.get_lower_bound(3) == Approx(update_sketch.get_lower_bound(3)).margin(1e-10));
|
|
284
|
-
REQUIRE(sketch.get_upper_bound(3) == Approx(update_sketch.get_upper_bound(3)).margin(1e-10));
|
|
285
|
-
compact_theta_sketch compact_sketch = update_sketch.compact();
|
|
286
|
-
// the sketches are ordered, so the iteration sequence must match exactly
|
|
287
|
-
auto iter = sketch.begin();
|
|
288
|
-
for (const auto& key: compact_sketch) {
|
|
289
|
-
REQUIRE(*iter == key);
|
|
290
|
-
++iter;
|
|
291
|
-
}
|
|
292
|
-
}
|
|
293
|
-
|
|
294
198
|
TEST_CASE("theta sketch: deserialize compact v1 estimation from java", "[theta_sketch]") {
|
|
295
199
|
std::ifstream is;
|
|
296
200
|
is.exceptions(std::ios::failbit | std::ios::badbit);
|
|
@@ -473,30 +377,6 @@ TEST_CASE("theta sketch: conversion constructor and wrapped compact", "[theta_sk
|
|
|
473
377
|
REQUIRE_THROWS_AS(wrapped_compact_theta_sketch::wrap(bytes.data(), bytes.size(), 0), std::invalid_argument);
|
|
474
378
|
}
|
|
475
379
|
|
|
476
|
-
TEST_CASE("theta sketch: wrap compact empty from java", "[theta_sketch]") {
|
|
477
|
-
std::ifstream is;
|
|
478
|
-
is.exceptions(std::ios::failbit | std::ios::badbit);
|
|
479
|
-
is.open(inputPath + "theta_compact_empty_from_java.sk", std::ios::binary | std::ios::ate);
|
|
480
|
-
|
|
481
|
-
std::vector<uint8_t> buf;
|
|
482
|
-
if(is) {
|
|
483
|
-
auto size = is.tellg();
|
|
484
|
-
buf.reserve(size);
|
|
485
|
-
buf.assign(size, 0);
|
|
486
|
-
is.seekg(0, std::ios_base::beg);
|
|
487
|
-
is.read((char*)(buf.data()), buf.size());
|
|
488
|
-
}
|
|
489
|
-
|
|
490
|
-
auto sketch = wrapped_compact_theta_sketch::wrap(buf.data(), buf.size());
|
|
491
|
-
REQUIRE(sketch.is_empty());
|
|
492
|
-
REQUIRE_FALSE(sketch.is_estimation_mode());
|
|
493
|
-
REQUIRE(sketch.get_num_retained() == 0);
|
|
494
|
-
REQUIRE(sketch.get_theta() == 1.0);
|
|
495
|
-
REQUIRE(sketch.get_estimate() == 0.0);
|
|
496
|
-
REQUIRE(sketch.get_lower_bound(1) == 0.0);
|
|
497
|
-
REQUIRE(sketch.get_upper_bound(1) == 0.0);
|
|
498
|
-
}
|
|
499
|
-
|
|
500
380
|
TEST_CASE("theta sketch: wrap compact v1 empty from java", "[theta_sketch]") {
|
|
501
381
|
std::ifstream is;
|
|
502
382
|
is.exceptions(std::ios::failbit | std::ios::badbit);
|
|
@@ -545,74 +425,6 @@ TEST_CASE("theta sketch: wrap compact v2 empty from java", "[theta_sketch]") {
|
|
|
545
425
|
REQUIRE(sketch.get_upper_bound(1) == 0.0);
|
|
546
426
|
}
|
|
547
427
|
|
|
548
|
-
TEST_CASE("theta sketch: wrap single item from java", "[theta_sketch]") {
|
|
549
|
-
std::ifstream is;
|
|
550
|
-
is.exceptions(std::ios::failbit | std::ios::badbit);
|
|
551
|
-
is.open(inputPath + "theta_compact_single_item_from_java.sk", std::ios::binary | std::ios::ate);
|
|
552
|
-
std::vector<uint8_t> buf;
|
|
553
|
-
if(is) {
|
|
554
|
-
auto size = is.tellg();
|
|
555
|
-
buf.reserve(size);
|
|
556
|
-
buf.assign(size, 0);
|
|
557
|
-
is.seekg(0, std::ios_base::beg);
|
|
558
|
-
is.read((char*)(buf.data()), buf.size());
|
|
559
|
-
}
|
|
560
|
-
|
|
561
|
-
auto sketch = wrapped_compact_theta_sketch::wrap(buf.data(), buf.size());
|
|
562
|
-
REQUIRE_FALSE(sketch.is_empty());
|
|
563
|
-
REQUIRE_FALSE(sketch.is_estimation_mode());
|
|
564
|
-
REQUIRE(sketch.get_num_retained() == 1);
|
|
565
|
-
REQUIRE(sketch.get_theta() == 1.0);
|
|
566
|
-
REQUIRE(sketch.get_estimate() == 1.0);
|
|
567
|
-
REQUIRE(sketch.get_lower_bound(1) == 1.0);
|
|
568
|
-
REQUIRE(sketch.get_upper_bound(1) == 1.0);
|
|
569
|
-
}
|
|
570
|
-
|
|
571
|
-
TEST_CASE("theta sketch: wrap compact estimation from java", "[theta_sketch]") {
|
|
572
|
-
std::ifstream is;
|
|
573
|
-
is.exceptions(std::ios::failbit | std::ios::badbit);
|
|
574
|
-
is.open(inputPath + "theta_compact_estimation_from_java.sk", std::ios::binary | std::ios::ate);
|
|
575
|
-
std::vector<uint8_t> buf;
|
|
576
|
-
if(is) {
|
|
577
|
-
auto size = is.tellg();
|
|
578
|
-
buf.reserve(size);
|
|
579
|
-
buf.assign(size, 0);
|
|
580
|
-
is.seekg(0, std::ios_base::beg);
|
|
581
|
-
is.read((char*)(buf.data()), buf.size());
|
|
582
|
-
}
|
|
583
|
-
|
|
584
|
-
auto sketch = wrapped_compact_theta_sketch::wrap(buf.data(), buf.size());
|
|
585
|
-
REQUIRE_FALSE(sketch.is_empty());
|
|
586
|
-
REQUIRE(sketch.is_estimation_mode());
|
|
587
|
-
REQUIRE(sketch.is_ordered());
|
|
588
|
-
REQUIRE(sketch.get_num_retained() == 4342);
|
|
589
|
-
REQUIRE(sketch.get_theta() == Approx(0.531700444213199).margin(1e-10));
|
|
590
|
-
REQUIRE(sketch.get_estimate() == Approx(8166.25234614053).margin(1e-10));
|
|
591
|
-
REQUIRE(sketch.get_lower_bound(2) == Approx(7996.956955317471).margin(1e-10));
|
|
592
|
-
REQUIRE(sketch.get_upper_bound(2) == Approx(8339.090301078124).margin(1e-10));
|
|
593
|
-
|
|
594
|
-
// the same construction process in Java must have produced exactly the same sketch
|
|
595
|
-
update_theta_sketch update_sketch = update_theta_sketch::builder().build();
|
|
596
|
-
const int n = 8192;
|
|
597
|
-
for (int i = 0; i < n; i++) update_sketch.update(i);
|
|
598
|
-
REQUIRE(sketch.get_num_retained() == update_sketch.get_num_retained());
|
|
599
|
-
REQUIRE(sketch.get_theta() == Approx(update_sketch.get_theta()).margin(1e-10));
|
|
600
|
-
REQUIRE(sketch.get_estimate() == Approx(update_sketch.get_estimate()).margin(1e-10));
|
|
601
|
-
REQUIRE(sketch.get_lower_bound(1) == Approx(update_sketch.get_lower_bound(1)).margin(1e-10));
|
|
602
|
-
REQUIRE(sketch.get_upper_bound(1) == Approx(update_sketch.get_upper_bound(1)).margin(1e-10));
|
|
603
|
-
REQUIRE(sketch.get_lower_bound(2) == Approx(update_sketch.get_lower_bound(2)).margin(1e-10));
|
|
604
|
-
REQUIRE(sketch.get_upper_bound(2) == Approx(update_sketch.get_upper_bound(2)).margin(1e-10));
|
|
605
|
-
REQUIRE(sketch.get_lower_bound(3) == Approx(update_sketch.get_lower_bound(3)).margin(1e-10));
|
|
606
|
-
REQUIRE(sketch.get_upper_bound(3) == Approx(update_sketch.get_upper_bound(3)).margin(1e-10));
|
|
607
|
-
compact_theta_sketch compact_sketch = update_sketch.compact();
|
|
608
|
-
// the sketches are ordered, so the iteration sequence must match exactly
|
|
609
|
-
auto iter = sketch.begin();
|
|
610
|
-
for (const auto key: compact_sketch) {
|
|
611
|
-
REQUIRE(*iter == key);
|
|
612
|
-
++iter;
|
|
613
|
-
}
|
|
614
|
-
}
|
|
615
|
-
|
|
616
428
|
TEST_CASE("theta sketch: wrap compact v1 estimation from java", "[theta_sketch]") {
|
|
617
429
|
std::ifstream is;
|
|
618
430
|
is.exceptions(std::ios::failbit | std::ios::badbit);
|
|
@@ -47,11 +47,12 @@ install(FILES
|
|
|
47
47
|
include/tuple_a_not_b_impl.hpp
|
|
48
48
|
include/tuple_jaccard_similarity.hpp
|
|
49
49
|
include/array_of_doubles_sketch.hpp
|
|
50
|
-
include/
|
|
51
|
-
include/
|
|
52
|
-
include/
|
|
53
|
-
include/
|
|
54
|
-
include/
|
|
55
|
-
include/
|
|
56
|
-
include/
|
|
50
|
+
include/array_tuple_sketch.hpp
|
|
51
|
+
include/array_tuple_sketch_impl.hpp
|
|
52
|
+
include/array_tuple_union.hpp
|
|
53
|
+
include/array_tuple_union_impl.hpp
|
|
54
|
+
include/array_tuple_intersection.hpp
|
|
55
|
+
include/array_tuple_intersection_impl.hpp
|
|
56
|
+
include/array_tuple_a_not_b.hpp
|
|
57
|
+
include/array_tuple_a_not_b_impl.hpp
|
|
57
58
|
DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/DataSketches")
|