datasketches 0.3.1 → 0.4.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +10 -0
- data/NOTICE +1 -1
- data/README.md +0 -2
- data/ext/datasketches/cpc_wrapper.cpp +3 -3
- data/ext/datasketches/kll_wrapper.cpp +0 -10
- data/lib/datasketches/version.rb +1 -1
- data/lib/datasketches.rb +1 -1
- data/vendor/datasketches-cpp/CMakeLists.txt +23 -20
- data/vendor/datasketches-cpp/CODE_OF_CONDUCT.md +3 -0
- data/vendor/datasketches-cpp/CONTRIBUTING.md +50 -0
- data/vendor/datasketches-cpp/Doxyfile +2827 -0
- data/vendor/datasketches-cpp/LICENSE +0 -76
- data/vendor/datasketches-cpp/README.md +1 -3
- data/vendor/datasketches-cpp/common/CMakeLists.txt +12 -11
- data/vendor/datasketches-cpp/common/include/MurmurHash3.h +25 -27
- data/vendor/datasketches-cpp/common/include/common_defs.hpp +15 -10
- data/vendor/datasketches-cpp/common/include/count_zeros.hpp +11 -2
- data/vendor/datasketches-cpp/common/include/kolmogorov_smirnov.hpp +9 -6
- data/vendor/datasketches-cpp/common/include/memory_operations.hpp +5 -4
- data/vendor/datasketches-cpp/common/include/optional.hpp +148 -0
- data/vendor/datasketches-cpp/common/include/quantiles_sorted_view.hpp +95 -2
- data/vendor/datasketches-cpp/common/include/quantiles_sorted_view_impl.hpp +1 -1
- data/vendor/datasketches-cpp/common/include/serde.hpp +69 -20
- data/vendor/datasketches-cpp/common/test/CMakeLists.txt +2 -2
- data/vendor/datasketches-cpp/common/test/integration_test.cpp +6 -0
- data/vendor/datasketches-cpp/common/test/optional_test.cpp +85 -0
- data/vendor/datasketches-cpp/common/test/test_allocator.hpp +14 -14
- data/vendor/datasketches-cpp/{python/src/__init__.py → count/CMakeLists.txt} +25 -1
- data/vendor/datasketches-cpp/count/include/count_min.hpp +405 -0
- data/vendor/datasketches-cpp/count/include/count_min_impl.hpp +497 -0
- data/vendor/datasketches-cpp/{MANIFEST.in → count/test/CMakeLists.txt} +23 -20
- data/vendor/datasketches-cpp/count/test/count_min_allocation_test.cpp +155 -0
- data/vendor/datasketches-cpp/count/test/count_min_test.cpp +303 -0
- data/vendor/datasketches-cpp/cpc/include/cpc_common.hpp +14 -20
- data/vendor/datasketches-cpp/cpc/include/cpc_compressor.hpp +7 -4
- data/vendor/datasketches-cpp/cpc/include/cpc_compressor_impl.hpp +17 -17
- data/vendor/datasketches-cpp/cpc/include/cpc_confidence.hpp +3 -3
- data/vendor/datasketches-cpp/cpc/include/cpc_sketch.hpp +40 -40
- data/vendor/datasketches-cpp/cpc/include/cpc_sketch_impl.hpp +14 -11
- data/vendor/datasketches-cpp/cpc/include/cpc_union.hpp +35 -11
- data/vendor/datasketches-cpp/cpc/include/cpc_union_impl.hpp +8 -8
- data/vendor/datasketches-cpp/cpc/include/cpc_util.hpp +16 -8
- data/vendor/datasketches-cpp/cpc/include/u32_table.hpp +3 -2
- data/vendor/datasketches-cpp/cpc/include/u32_table_impl.hpp +5 -5
- data/vendor/datasketches-cpp/cpc/test/CMakeLists.txt +20 -7
- data/vendor/datasketches-cpp/cpc/test/cpc_sketch_deserialize_from_java_test.cpp +60 -0
- data/vendor/datasketches-cpp/cpc/test/cpc_sketch_serialize_for_java.cpp +38 -0
- data/vendor/datasketches-cpp/cpc/test/cpc_sketch_test.cpp +4 -29
- data/vendor/datasketches-cpp/cpc/test/cpc_union_test.cpp +4 -4
- data/vendor/datasketches-cpp/{tox.ini → density/CMakeLists.txt} +24 -8
- data/vendor/datasketches-cpp/density/include/density_sketch.hpp +256 -0
- data/vendor/datasketches-cpp/density/include/density_sketch_impl.hpp +543 -0
- data/vendor/datasketches-cpp/{python/datasketches/__init__.py → density/test/CMakeLists.txt} +15 -3
- data/vendor/datasketches-cpp/density/test/density_sketch_test.cpp +244 -0
- data/vendor/datasketches-cpp/fi/include/frequent_items_sketch.hpp +21 -9
- data/vendor/datasketches-cpp/fi/include/frequent_items_sketch_impl.hpp +6 -4
- data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map.hpp +9 -3
- data/vendor/datasketches-cpp/fi/test/CMakeLists.txt +14 -1
- data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_deserialize_from_java_test.cpp +95 -0
- data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_serialize_for_java.cpp +83 -0
- data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_test.cpp +3 -42
- data/vendor/datasketches-cpp/hll/include/CouponList-internal.hpp +2 -2
- data/vendor/datasketches-cpp/hll/include/CouponList.hpp +3 -1
- data/vendor/datasketches-cpp/hll/include/Hll4Array-internal.hpp +19 -11
- data/vendor/datasketches-cpp/hll/include/Hll4Array.hpp +2 -5
- data/vendor/datasketches-cpp/hll/include/Hll6Array-internal.hpp +19 -7
- data/vendor/datasketches-cpp/hll/include/Hll6Array.hpp +1 -1
- data/vendor/datasketches-cpp/hll/include/Hll8Array-internal.hpp +98 -42
- data/vendor/datasketches-cpp/hll/include/Hll8Array.hpp +2 -0
- data/vendor/datasketches-cpp/hll/include/HllArray-internal.hpp +94 -61
- data/vendor/datasketches-cpp/hll/include/HllArray.hpp +20 -8
- data/vendor/datasketches-cpp/hll/include/HllSketch-internal.hpp +4 -4
- data/vendor/datasketches-cpp/hll/include/HllSketchImpl.hpp +3 -1
- data/vendor/datasketches-cpp/hll/include/HllSketchImplFactory.hpp +3 -21
- data/vendor/datasketches-cpp/hll/include/HllUnion-internal.hpp +8 -0
- data/vendor/datasketches-cpp/hll/include/HllUtil.hpp +14 -18
- data/vendor/datasketches-cpp/hll/include/coupon_iterator-internal.hpp +1 -1
- data/vendor/datasketches-cpp/hll/include/coupon_iterator.hpp +8 -2
- data/vendor/datasketches-cpp/hll/include/hll.hpp +79 -65
- data/vendor/datasketches-cpp/hll/test/CMakeLists.txt +14 -1
- data/vendor/datasketches-cpp/hll/test/HllUnionTest.cpp +7 -1
- data/vendor/datasketches-cpp/hll/test/ToFromByteArrayTest.cpp +0 -68
- data/vendor/datasketches-cpp/hll/test/hll_sketch_deserialize_from_java_test.cpp +69 -0
- data/vendor/datasketches-cpp/hll/test/hll_sketch_serialize_for_java.cpp +52 -0
- data/vendor/datasketches-cpp/kll/include/kll_helper.hpp +0 -1
- data/vendor/datasketches-cpp/kll/include/kll_helper_impl.hpp +2 -2
- data/vendor/datasketches-cpp/kll/include/kll_sketch.hpp +79 -53
- data/vendor/datasketches-cpp/kll/include/kll_sketch_impl.hpp +61 -132
- data/vendor/datasketches-cpp/kll/test/CMakeLists.txt +14 -1
- data/vendor/datasketches-cpp/kll/test/kll_sketch_deserialize_from_java_test.cpp +103 -0
- data/vendor/datasketches-cpp/kll/test/kll_sketch_serialize_for_java.cpp +62 -0
- data/vendor/datasketches-cpp/kll/test/kll_sketch_test.cpp +5 -40
- data/vendor/datasketches-cpp/quantiles/include/quantiles_sketch.hpp +76 -54
- data/vendor/datasketches-cpp/quantiles/include/quantiles_sketch_impl.hpp +66 -136
- data/vendor/datasketches-cpp/quantiles/test/CMakeLists.txt +14 -1
- data/vendor/datasketches-cpp/quantiles/test/quantiles_sketch_deserialize_from_java_test.cpp +84 -0
- data/vendor/datasketches-cpp/quantiles/test/quantiles_sketch_serialize_for_java.cpp +52 -0
- data/vendor/datasketches-cpp/quantiles/test/quantiles_sketch_test.cpp +15 -39
- data/vendor/datasketches-cpp/req/include/req_common.hpp +7 -3
- data/vendor/datasketches-cpp/req/include/req_compactor_impl.hpp +2 -4
- data/vendor/datasketches-cpp/req/include/req_sketch.hpp +105 -26
- data/vendor/datasketches-cpp/req/include/req_sketch_impl.hpp +50 -111
- data/vendor/datasketches-cpp/req/test/CMakeLists.txt +14 -1
- data/vendor/datasketches-cpp/req/test/req_sketch_deserialize_from_java_test.cpp +55 -0
- data/vendor/datasketches-cpp/{tuple/include/array_of_doubles_intersection_impl.hpp → req/test/req_sketch_serialize_for_java.cpp} +12 -7
- data/vendor/datasketches-cpp/req/test/req_sketch_test.cpp +3 -89
- data/vendor/datasketches-cpp/sampling/CMakeLists.txt +4 -0
- data/vendor/datasketches-cpp/sampling/include/ebpps_sample.hpp +210 -0
- data/vendor/datasketches-cpp/sampling/include/ebpps_sample_impl.hpp +535 -0
- data/vendor/datasketches-cpp/sampling/include/ebpps_sketch.hpp +281 -0
- data/vendor/datasketches-cpp/sampling/include/ebpps_sketch_impl.hpp +531 -0
- data/vendor/datasketches-cpp/sampling/include/var_opt_sketch.hpp +89 -32
- data/vendor/datasketches-cpp/sampling/include/var_opt_sketch_impl.hpp +33 -19
- data/vendor/datasketches-cpp/sampling/include/var_opt_union.hpp +13 -10
- data/vendor/datasketches-cpp/sampling/include/var_opt_union_impl.hpp +23 -19
- data/vendor/datasketches-cpp/sampling/test/CMakeLists.txt +55 -8
- data/vendor/datasketches-cpp/sampling/test/ebpps_allocation_test.cpp +96 -0
- data/vendor/datasketches-cpp/sampling/test/ebpps_sample_test.cpp +137 -0
- data/vendor/datasketches-cpp/sampling/test/ebpps_sketch_test.cpp +266 -0
- data/vendor/datasketches-cpp/sampling/test/var_opt_sketch_deserialize_from_java_test.cpp +81 -0
- data/vendor/datasketches-cpp/sampling/test/var_opt_sketch_serialize_for_java.cpp +54 -0
- data/vendor/datasketches-cpp/sampling/test/var_opt_sketch_test.cpp +33 -51
- data/vendor/datasketches-cpp/sampling/test/var_opt_union_deserialize_from_java_test.cpp +50 -0
- data/vendor/datasketches-cpp/sampling/test/var_opt_union_serialize_for_java.cpp +56 -0
- data/vendor/datasketches-cpp/sampling/test/var_opt_union_test.cpp +0 -20
- data/vendor/datasketches-cpp/theta/CMakeLists.txt +1 -0
- data/vendor/datasketches-cpp/theta/include/bit_packing.hpp +6279 -0
- data/vendor/datasketches-cpp/theta/include/bounds_on_ratios_in_sampled_sets.hpp +1 -0
- data/vendor/datasketches-cpp/theta/include/bounds_on_ratios_in_theta_sketched_sets.hpp +7 -6
- data/vendor/datasketches-cpp/theta/include/compact_theta_sketch_parser.hpp +14 -8
- data/vendor/datasketches-cpp/theta/include/compact_theta_sketch_parser_impl.hpp +60 -46
- data/vendor/datasketches-cpp/theta/include/theta_a_not_b.hpp +20 -5
- data/vendor/datasketches-cpp/theta/include/theta_constants.hpp +10 -4
- data/vendor/datasketches-cpp/theta/include/theta_helpers.hpp +3 -1
- data/vendor/datasketches-cpp/theta/include/theta_intersection.hpp +13 -5
- data/vendor/datasketches-cpp/theta/include/theta_intersection_base_impl.hpp +5 -5
- data/vendor/datasketches-cpp/theta/include/theta_intersection_impl.hpp +3 -3
- data/vendor/datasketches-cpp/theta/include/theta_jaccard_similarity.hpp +2 -1
- data/vendor/datasketches-cpp/theta/include/theta_jaccard_similarity_base.hpp +1 -0
- data/vendor/datasketches-cpp/theta/include/theta_set_difference_base_impl.hpp +1 -1
- data/vendor/datasketches-cpp/theta/include/theta_sketch.hpp +180 -33
- data/vendor/datasketches-cpp/theta/include/theta_sketch_impl.hpp +430 -130
- data/vendor/datasketches-cpp/theta/include/theta_union.hpp +17 -10
- data/vendor/datasketches-cpp/theta/include/theta_union_base_impl.hpp +10 -10
- data/vendor/datasketches-cpp/theta/include/theta_union_impl.hpp +3 -3
- data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base.hpp +21 -6
- data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base_impl.hpp +13 -3
- data/vendor/datasketches-cpp/theta/test/CMakeLists.txt +15 -1
- data/vendor/datasketches-cpp/theta/test/bit_packing_test.cpp +80 -0
- data/vendor/datasketches-cpp/theta/test/theta_sketch_deserialize_from_java_test.cpp +57 -0
- data/vendor/datasketches-cpp/theta/test/theta_sketch_serialize_for_java.cpp +61 -0
- data/vendor/datasketches-cpp/theta/test/theta_sketch_test.cpp +39 -188
- data/vendor/datasketches-cpp/theta/test/theta_union_test.cpp +25 -0
- data/vendor/datasketches-cpp/tuple/CMakeLists.txt +8 -7
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_sketch.hpp +19 -144
- data/vendor/datasketches-cpp/tuple/include/{array_of_doubles_a_not_b.hpp → array_tuple_a_not_b.hpp} +24 -16
- data/vendor/datasketches-cpp/tuple/include/{array_of_doubles_a_not_b_impl.hpp → array_tuple_a_not_b_impl.hpp} +4 -4
- data/vendor/datasketches-cpp/tuple/include/array_tuple_intersection.hpp +65 -0
- data/vendor/datasketches-cpp/tuple/include/array_tuple_intersection_impl.hpp +31 -0
- data/vendor/datasketches-cpp/tuple/include/array_tuple_sketch.hpp +237 -0
- data/vendor/datasketches-cpp/tuple/include/{array_of_doubles_sketch_impl.hpp → array_tuple_sketch_impl.hpp} +40 -41
- data/vendor/datasketches-cpp/tuple/include/array_tuple_union.hpp +81 -0
- data/vendor/datasketches-cpp/tuple/include/array_tuple_union_impl.hpp +43 -0
- data/vendor/datasketches-cpp/tuple/include/tuple_a_not_b.hpp +11 -2
- data/vendor/datasketches-cpp/tuple/include/tuple_intersection.hpp +17 -10
- data/vendor/datasketches-cpp/tuple/include/tuple_jaccard_similarity.hpp +2 -1
- data/vendor/datasketches-cpp/tuple/include/tuple_sketch.hpp +95 -32
- data/vendor/datasketches-cpp/tuple/include/tuple_sketch_impl.hpp +2 -1
- data/vendor/datasketches-cpp/tuple/include/tuple_union.hpp +19 -11
- data/vendor/datasketches-cpp/tuple/test/CMakeLists.txt +16 -1
- data/vendor/datasketches-cpp/tuple/test/aod_sketch_deserialize_from_java_test.cpp +76 -0
- data/vendor/datasketches-cpp/tuple/test/aod_sketch_serialize_for_java.cpp +62 -0
- data/vendor/datasketches-cpp/tuple/test/array_of_doubles_sketch_test.cpp +5 -129
- data/vendor/datasketches-cpp/tuple/test/engagement_test.cpp +85 -89
- data/vendor/datasketches-cpp/tuple/test/tuple_jaccard_similarity_test.cpp +3 -1
- data/vendor/datasketches-cpp/tuple/test/tuple_sketch_deserialize_from_java_test.cpp +47 -0
- data/vendor/datasketches-cpp/{python/src/datasketches.cpp → tuple/test/tuple_sketch_serialize_for_java.cpp} +16 -30
- data/vendor/datasketches-cpp/tuple/test/tuple_sketch_test.cpp +1 -1
- data/vendor/datasketches-cpp/version.cfg.in +1 -1
- metadata +61 -79
- data/vendor/datasketches-cpp/fi/test/items_sketch_string_from_java.sk +0 -0
- data/vendor/datasketches-cpp/fi/test/items_sketch_string_utf8_from_java.sk +0 -0
- data/vendor/datasketches-cpp/fi/test/longs_sketch_from_java.sk +0 -0
- data/vendor/datasketches-cpp/hll/test/array6_from_java.sk +0 -0
- data/vendor/datasketches-cpp/hll/test/compact_array4_from_java.sk +0 -0
- data/vendor/datasketches-cpp/hll/test/compact_set_from_java.sk +0 -0
- data/vendor/datasketches-cpp/hll/test/list_from_java.sk +0 -0
- data/vendor/datasketches-cpp/hll/test/updatable_array4_from_java.sk +0 -0
- data/vendor/datasketches-cpp/hll/test/updatable_set_from_java.sk +0 -0
- data/vendor/datasketches-cpp/kll/test/kll_sketch_from_java.sk +0 -0
- data/vendor/datasketches-cpp/pyproject.toml +0 -23
- data/vendor/datasketches-cpp/python/CMakeLists.txt +0 -81
- data/vendor/datasketches-cpp/python/README.md +0 -85
- data/vendor/datasketches-cpp/python/datasketches/PySerDe.py +0 -104
- data/vendor/datasketches-cpp/python/include/py_serde.hpp +0 -113
- data/vendor/datasketches-cpp/python/jupyter/CPCSketch.ipynb +0 -345
- data/vendor/datasketches-cpp/python/jupyter/FrequentItemsSketch.ipynb +0 -354
- data/vendor/datasketches-cpp/python/jupyter/HLLSketch.ipynb +0 -346
- data/vendor/datasketches-cpp/python/jupyter/KLLSketch.ipynb +0 -463
- data/vendor/datasketches-cpp/python/jupyter/ThetaSketchNotebook.ipynb +0 -403
- data/vendor/datasketches-cpp/python/pybind11Path.cmd +0 -21
- data/vendor/datasketches-cpp/python/src/cpc_wrapper.cpp +0 -90
- data/vendor/datasketches-cpp/python/src/fi_wrapper.cpp +0 -128
- data/vendor/datasketches-cpp/python/src/hll_wrapper.cpp +0 -134
- data/vendor/datasketches-cpp/python/src/kll_wrapper.cpp +0 -210
- data/vendor/datasketches-cpp/python/src/ks_wrapper.cpp +0 -68
- data/vendor/datasketches-cpp/python/src/py_serde.cpp +0 -111
- data/vendor/datasketches-cpp/python/src/quantiles_wrapper.cpp +0 -204
- data/vendor/datasketches-cpp/python/src/req_wrapper.cpp +0 -215
- data/vendor/datasketches-cpp/python/src/theta_wrapper.cpp +0 -172
- data/vendor/datasketches-cpp/python/src/vector_of_kll.cpp +0 -490
- data/vendor/datasketches-cpp/python/src/vo_wrapper.cpp +0 -173
- data/vendor/datasketches-cpp/python/tests/__init__.py +0 -16
- data/vendor/datasketches-cpp/python/tests/cpc_test.py +0 -64
- data/vendor/datasketches-cpp/python/tests/fi_test.py +0 -110
- data/vendor/datasketches-cpp/python/tests/hll_test.py +0 -130
- data/vendor/datasketches-cpp/python/tests/kll_test.py +0 -125
- data/vendor/datasketches-cpp/python/tests/quantiles_test.py +0 -126
- data/vendor/datasketches-cpp/python/tests/req_test.py +0 -126
- data/vendor/datasketches-cpp/python/tests/theta_test.py +0 -146
- data/vendor/datasketches-cpp/python/tests/vector_of_kll_test.py +0 -148
- data/vendor/datasketches-cpp/python/tests/vo_test.py +0 -125
- data/vendor/datasketches-cpp/req/test/req_float_empty_from_java.sk +0 -0
- data/vendor/datasketches-cpp/req/test/req_float_estimation_from_java.sk +0 -0
- data/vendor/datasketches-cpp/req/test/req_float_exact_from_java.sk +0 -0
- data/vendor/datasketches-cpp/req/test/req_float_raw_items_from_java.sk +0 -0
- data/vendor/datasketches-cpp/req/test/req_float_single_item_from_java.sk +0 -0
- data/vendor/datasketches-cpp/sampling/test/binaries_from_java.txt +0 -67
- data/vendor/datasketches-cpp/sampling/test/varopt_sketch_long_sampling.sk +0 -0
- data/vendor/datasketches-cpp/sampling/test/varopt_sketch_string_exact.sk +0 -0
- data/vendor/datasketches-cpp/sampling/test/varopt_union_double_sampling.sk +0 -0
- data/vendor/datasketches-cpp/setup.py +0 -110
- data/vendor/datasketches-cpp/theta/test/theta_compact_empty_from_java.sk +0 -0
- data/vendor/datasketches-cpp/theta/test/theta_compact_estimation_from_java.sk +0 -0
- data/vendor/datasketches-cpp/theta/test/theta_compact_exact_from_java.sk +0 -0
- data/vendor/datasketches-cpp/theta/test/theta_compact_single_item_from_java.sk +0 -0
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_intersection.hpp +0 -52
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_union.hpp +0 -81
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_union_impl.hpp +0 -43
- data/vendor/datasketches-cpp/tuple/test/aod_1_compact_empty_from_java.sk +0 -1
- data/vendor/datasketches-cpp/tuple/test/aod_1_compact_estimation_from_java.sk +0 -0
- data/vendor/datasketches-cpp/tuple/test/aod_1_compact_non_empty_no_entries_from_java.sk +0 -0
- data/vendor/datasketches-cpp/tuple/test/aod_2_compact_exact_from_java.sk +0 -0
- data/vendor/datasketches-cpp/tuple/test/aod_3_compact_empty_from_java.sk +0 -1
@@ -94,14 +94,14 @@ hll_sketch_alloc<A>::hll_sketch_alloc(HllSketchImpl<A>* that) :
|
|
94
94
|
{}
|
95
95
|
|
96
96
|
template<typename A>
|
97
|
-
hll_sketch_alloc<A
|
97
|
+
hll_sketch_alloc<A>& hll_sketch_alloc<A>::operator=(const hll_sketch_alloc<A>& other) {
|
98
98
|
sketch_impl->get_deleter()(sketch_impl);
|
99
99
|
sketch_impl = other.sketch_impl->copy();
|
100
100
|
return *this;
|
101
101
|
}
|
102
102
|
|
103
103
|
template<typename A>
|
104
|
-
hll_sketch_alloc<A
|
104
|
+
hll_sketch_alloc<A>& hll_sketch_alloc<A>::operator=(hll_sketch_alloc<A>&& other) {
|
105
105
|
std::swap(sketch_impl, other.sketch_impl);
|
106
106
|
return *this;
|
107
107
|
}
|
@@ -232,12 +232,12 @@ void hll_sketch_alloc<A>::serialize_updatable(std::ostream& os) const {
|
|
232
232
|
}
|
233
233
|
|
234
234
|
template<typename A>
|
235
|
-
|
235
|
+
auto hll_sketch_alloc<A>::serialize_compact(unsigned header_size_bytes) const -> vector_bytes {
|
236
236
|
return sketch_impl->serialize(true, header_size_bytes);
|
237
237
|
}
|
238
238
|
|
239
239
|
template<typename A>
|
240
|
-
|
240
|
+
auto hll_sketch_alloc<A>::serialize_updatable() const -> vector_bytes {
|
241
241
|
return sketch_impl->serialize(false, 0);
|
242
242
|
}
|
243
243
|
|
@@ -30,11 +30,13 @@ namespace datasketches {
|
|
30
30
|
template<typename A>
|
31
31
|
class HllSketchImpl {
|
32
32
|
public:
|
33
|
+
using vector_bytes = std::vector<uint8_t, typename std::allocator_traits<A>::template rebind_alloc<uint8_t>>;
|
34
|
+
|
33
35
|
HllSketchImpl(uint8_t lgConfigK, target_hll_type tgtHllType, hll_mode mode, bool startFullSize);
|
34
36
|
virtual ~HllSketchImpl();
|
35
37
|
|
36
38
|
virtual void serialize(std::ostream& os, bool compact) const = 0;
|
37
|
-
virtual
|
39
|
+
virtual vector_bytes serialize(bool compact, unsigned header_size_bytes) const = 0;
|
38
40
|
|
39
41
|
virtual HllSketchImpl* copy() const = 0;
|
40
42
|
virtual HllSketchImpl* copyAs(target_hll_type tgtHllType) const = 0;
|
@@ -136,38 +136,20 @@ HllSketchImpl<A>* HllSketchImplFactory<A>::reset(HllSketchImpl<A>* impl, bool st
|
|
136
136
|
|
137
137
|
template<typename A>
|
138
138
|
Hll4Array<A>* HllSketchImplFactory<A>::convertToHll4(const HllArray<A>& srcHllArr) {
|
139
|
-
const uint8_t lgConfigK = srcHllArr.getLgConfigK();
|
140
139
|
using Hll4Alloc = typename std::allocator_traits<A>::template rebind_alloc<Hll4Array<A>>;
|
141
|
-
|
142
|
-
Hll4Array<A>(lgConfigK, srcHllArr.isStartFullSize(), srcHllArr.getAllocator());
|
143
|
-
hll4Array->putOutOfOrderFlag(srcHllArr.isOutOfOrderFlag());
|
144
|
-
hll4Array->mergeHll(srcHllArr);
|
145
|
-
hll4Array->putHipAccum(srcHllArr.getHipAccum());
|
146
|
-
return hll4Array;
|
140
|
+
return new (Hll4Alloc(srcHllArr.getAllocator()).allocate(1)) Hll4Array<A>(srcHllArr);
|
147
141
|
}
|
148
142
|
|
149
143
|
template<typename A>
|
150
144
|
Hll6Array<A>* HllSketchImplFactory<A>::convertToHll6(const HllArray<A>& srcHllArr) {
|
151
|
-
const uint8_t lgConfigK = srcHllArr.getLgConfigK();
|
152
145
|
using Hll6Alloc = typename std::allocator_traits<A>::template rebind_alloc<Hll6Array<A>>;
|
153
|
-
|
154
|
-
Hll6Array<A>(lgConfigK, srcHllArr.isStartFullSize(), srcHllArr.getAllocator());
|
155
|
-
hll6Array->putOutOfOrderFlag(srcHllArr.isOutOfOrderFlag());
|
156
|
-
hll6Array->mergeHll(srcHllArr);
|
157
|
-
hll6Array->putHipAccum(srcHllArr.getHipAccum());
|
158
|
-
return hll6Array;
|
146
|
+
return new (Hll6Alloc(srcHllArr.getAllocator()).allocate(1)) Hll6Array<A>(srcHllArr);
|
159
147
|
}
|
160
148
|
|
161
149
|
template<typename A>
|
162
150
|
Hll8Array<A>* HllSketchImplFactory<A>::convertToHll8(const HllArray<A>& srcHllArr) {
|
163
|
-
const uint8_t lgConfigK = srcHllArr.getLgConfigK();
|
164
151
|
using Hll8Alloc = typename std::allocator_traits<A>::template rebind_alloc<Hll8Array<A>>;
|
165
|
-
|
166
|
-
Hll8Array<A>(lgConfigK, srcHllArr.isStartFullSize(), srcHllArr.getAllocator());
|
167
|
-
hll8Array->putOutOfOrderFlag(srcHllArr.isOutOfOrderFlag());
|
168
|
-
hll8Array->mergeHll(srcHllArr);
|
169
|
-
hll8Array->putHipAccum(srcHllArr.getHipAccum());
|
170
|
-
return hll8Array;
|
152
|
+
return new (Hll8Alloc(srcHllArr.getAllocator()).allocate(1)) Hll8Array<A>(srcHllArr);
|
171
153
|
}
|
172
154
|
|
173
155
|
}
|
@@ -131,21 +131,29 @@ void hll_union_alloc<A>::coupon_update(uint32_t coupon) {
|
|
131
131
|
|
132
132
|
template<typename A>
|
133
133
|
double hll_union_alloc<A>::get_estimate() const {
|
134
|
+
if (gadget_.sketch_impl->getCurMode() == hll_mode::HLL)
|
135
|
+
static_cast<HllArray<A>*>(gadget_.sketch_impl)->check_rebuild_kxq_cur_min();
|
134
136
|
return gadget_.get_estimate();
|
135
137
|
}
|
136
138
|
|
137
139
|
template<typename A>
|
138
140
|
double hll_union_alloc<A>::get_composite_estimate() const {
|
141
|
+
if (gadget_.sketch_impl->getCurMode() == hll_mode::HLL)
|
142
|
+
static_cast<HllArray<A>*>(gadget_.sketch_impl)->check_rebuild_kxq_cur_min();
|
139
143
|
return gadget_.get_composite_estimate();
|
140
144
|
}
|
141
145
|
|
142
146
|
template<typename A>
|
143
147
|
double hll_union_alloc<A>::get_lower_bound(uint8_t num_std_dev) const {
|
148
|
+
if (gadget_.sketch_impl->getCurMode() == hll_mode::HLL)
|
149
|
+
static_cast<HllArray<A>*>(gadget_.sketch_impl)->check_rebuild_kxq_cur_min();
|
144
150
|
return gadget_.get_lower_bound(num_std_dev);
|
145
151
|
}
|
146
152
|
|
147
153
|
template<typename A>
|
148
154
|
double hll_union_alloc<A>::get_upper_bound(uint8_t num_std_dev) const {
|
155
|
+
if (gadget_.sketch_impl->getCurMode() == hll_mode::HLL)
|
156
|
+
static_cast<HllArray<A>*>(gadget_.sketch_impl)->check_rebuild_kxq_cur_min();
|
149
157
|
return gadget_.get_upper_bound(num_std_dev);
|
150
158
|
}
|
151
159
|
|
@@ -124,8 +124,6 @@ public:
|
|
124
124
|
static uint32_t pair(uint32_t slotNo, uint8_t value);
|
125
125
|
static uint32_t getLow26(uint32_t coupon);
|
126
126
|
static uint8_t getValue(uint32_t coupon);
|
127
|
-
static double invPow2(uint8_t e);
|
128
|
-
static uint8_t ceilingPowerOf2(uint32_t n);
|
129
127
|
static uint8_t simpleIntLog2(uint32_t n); // n must be power of 2
|
130
128
|
static uint8_t computeLgArrInts(hll_mode mode, uint32_t count, uint8_t lgConfigK);
|
131
129
|
static double getRelErr(bool upperBound, bool unioned, uint8_t lgConfigK, uint8_t numStdDev);
|
@@ -152,12 +150,6 @@ inline void HllUtil<A>::hash(const void* key, size_t keyLen, uint64_t seed, Hash
|
|
152
150
|
MurmurHash3_x64_128(key, keyLen, seed, result);
|
153
151
|
}
|
154
152
|
|
155
|
-
template<typename A>
|
156
|
-
inline double HllUtil<A>::getRelErr(bool upperBound, bool unioned,
|
157
|
-
uint8_t lgConfigK, uint8_t numStdDev) {
|
158
|
-
return RelativeErrorTables<A>::getRelErr(upperBound, unioned, lgConfigK, numStdDev);
|
159
|
-
}
|
160
|
-
|
161
153
|
template<typename A>
|
162
154
|
inline uint8_t HllUtil<A>::checkLgK(uint8_t lgK) {
|
163
155
|
if ((lgK >= hll_constants::MIN_LOG_K) && (lgK <= hll_constants::MAX_LOG_K)) {
|
@@ -167,6 +159,20 @@ inline uint8_t HllUtil<A>::checkLgK(uint8_t lgK) {
|
|
167
159
|
}
|
168
160
|
}
|
169
161
|
|
162
|
+
template<typename A>
|
163
|
+
inline double HllUtil<A>::getRelErr(bool upperBound, bool unioned,
|
164
|
+
uint8_t lgConfigK, uint8_t numStdDev) {
|
165
|
+
checkLgK(lgConfigK);
|
166
|
+
if (lgConfigK > 12) {
|
167
|
+
const double rseFactor = unioned ?
|
168
|
+
hll_constants::HLL_NON_HIP_RSE_FACTOR : hll_constants::HLL_HIP_RSE_FACTOR;
|
169
|
+
const uint32_t configK = 1 << lgConfigK;
|
170
|
+
return (upperBound ? -1 : 1) * (numStdDev * rseFactor) / sqrt(configK);
|
171
|
+
} else {
|
172
|
+
return RelativeErrorTables<A>::getRelErr(upperBound, unioned, lgConfigK, numStdDev);
|
173
|
+
}
|
174
|
+
}
|
175
|
+
|
170
176
|
template<typename A>
|
171
177
|
inline void HllUtil<A>::checkMemSize(uint64_t minBytes, uint64_t capBytes) {
|
172
178
|
if (capBytes < minBytes) {
|
@@ -196,16 +202,6 @@ inline uint8_t HllUtil<A>::getValue(uint32_t coupon) {
|
|
196
202
|
return coupon >> hll_constants::KEY_BITS_26;
|
197
203
|
}
|
198
204
|
|
199
|
-
template<typename A>
|
200
|
-
inline double HllUtil<A>::invPow2(uint8_t e) {
|
201
|
-
union {
|
202
|
-
long long longVal;
|
203
|
-
double doubleVal;
|
204
|
-
} conv;
|
205
|
-
conv.longVal = (1023L - e) << 52;
|
206
|
-
return conv.doubleVal;
|
207
|
-
}
|
208
|
-
|
209
205
|
template<typename A>
|
210
206
|
inline uint8_t HllUtil<A>::simpleIntLog2(uint32_t n) {
|
211
207
|
if (n == 0) {
|
@@ -23,12 +23,18 @@
|
|
23
23
|
namespace datasketches {
|
24
24
|
|
25
25
|
template<typename A>
|
26
|
-
class coupon_iterator
|
26
|
+
class coupon_iterator {
|
27
27
|
public:
|
28
|
+
using iterator_category = std::input_iterator_tag;
|
29
|
+
using value_type = uint32_t;
|
30
|
+
using difference_type = void;
|
31
|
+
using pointer = uint32_t*;
|
32
|
+
using reference = uint32_t;
|
33
|
+
|
28
34
|
coupon_iterator(const uint32_t* array, size_t array_slze, size_t index, bool all);
|
29
35
|
coupon_iterator& operator++();
|
30
36
|
bool operator!=(const coupon_iterator& other) const;
|
31
|
-
|
37
|
+
reference operator*() const;
|
32
38
|
private:
|
33
39
|
const uint32_t* array_;
|
34
40
|
size_t array_size_;
|
@@ -23,46 +23,22 @@
|
|
23
23
|
#include "common_defs.hpp"
|
24
24
|
#include "HllUtil.hpp"
|
25
25
|
|
26
|
-
#include <memory>
|
27
26
|
#include <iostream>
|
27
|
+
#include <memory>
|
28
|
+
#include <string>
|
28
29
|
#include <vector>
|
29
30
|
|
30
31
|
namespace datasketches {
|
31
32
|
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
* trade-offs with accuracy, space and performance. These types are specified with the
|
41
|
-
* {@link TgtHllType} parameter.
|
42
|
-
*
|
43
|
-
* <p>In terms of accuracy, all three types, for the same <i>lg_config_k</i>, have the same error
|
44
|
-
* distribution as a function of <i>n</i>, the number of unique values fed to the sketch.
|
45
|
-
* The configuration parameter <i>lg_config_k</i> is the log-base-2 of <i>K</i>,
|
46
|
-
* where <i>K</i> is the number of buckets or slots for the sketch.
|
47
|
-
*
|
48
|
-
* <p>During warmup, when the sketch has only received a small number of unique items
|
49
|
-
* (up to about 10% of <i>K</i>), this implementation leverages a new class of estimator
|
50
|
-
* algorithms with significantly better accuracy.
|
51
|
-
*
|
52
|
-
* <p>This sketch also offers the capability of operating off-heap. Given a WritableMemory object
|
53
|
-
* created by the user, the sketch will perform all of its updates and internal phase transitions
|
54
|
-
* in that object, which can actually reside either on-heap or off-heap based on how it is
|
55
|
-
* configured. In large systems that must update and merge many millions of sketches, having the
|
56
|
-
* sketch operate off-heap avoids the serialization and deserialization costs of moving sketches
|
57
|
-
* to and from off-heap memory-mapped files, for example, and eliminates big garbage collection
|
58
|
-
* delays.
|
59
|
-
*
|
60
|
-
* author Jon Malkin
|
61
|
-
* author Lee Rhodes
|
62
|
-
* author Kevin Lang
|
63
|
-
*/
|
33
|
+
// forward declarations
|
34
|
+
template<typename A> class hll_sketch_alloc;
|
35
|
+
template<typename A> class hll_union_alloc;
|
36
|
+
|
37
|
+
/// HLL sketch alias with default allocator
|
38
|
+
using hll_sketch = hll_sketch_alloc<std::allocator<uint8_t>>;
|
39
|
+
/// HLL union alias with default allocator
|
40
|
+
using hll_union = hll_union_alloc<std::allocator<uint8_t>>;
|
64
41
|
|
65
|
-
|
66
42
|
/**
|
67
43
|
* Specifies the target type of HLL sketch to be created. It is a target in that the actual
|
68
44
|
* allocation of the HLL array is deferred until sufficient number of items have been received by
|
@@ -99,14 +75,41 @@ enum target_hll_type {
|
|
99
75
|
HLL_8 ///< 8 bits per entry (fastest, fixed size)
|
100
76
|
};
|
101
77
|
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
78
|
+
/**
|
79
|
+
* This is a high performance implementation of Phillipe Flajolet's HLL sketch but with
|
80
|
+
* significantly improved error behavior. If the ONLY use case for sketching is counting
|
81
|
+
* uniques and merging, the HLL sketch is a reasonable choice, although the highest performing in terms of accuracy for
|
82
|
+
* storage space consumed is CPC (Compressed Probabilistic Counting). For large enough counts, this HLL version (with HLL_4) can be 2 to
|
83
|
+
* 16 times smaller than the Theta sketch family for the same accuracy.
|
84
|
+
*
|
85
|
+
* <p>This implementation offers three different types of HLL sketch, each with different
|
86
|
+
* trade-offs with accuracy, space and performance. These types are specified with the
|
87
|
+
* {@link target_hll_type} parameter.
|
88
|
+
*
|
89
|
+
* <p>In terms of accuracy, all three types, for the same <i>lg_config_k</i>, have the same error
|
90
|
+
* distribution as a function of <i>n</i>, the number of unique values fed to the sketch.
|
91
|
+
* The configuration parameter <i>lg_config_k</i> is the log-base-2 of <i>K</i>,
|
92
|
+
* where <i>K</i> is the number of buckets or slots for the sketch.
|
93
|
+
*
|
94
|
+
* <p>During warmup, when the sketch has only received a small number of unique items
|
95
|
+
* (up to about 10% of <i>K</i>), this implementation leverages a new class of estimator
|
96
|
+
* algorithms with significantly better accuracy.
|
97
|
+
*
|
98
|
+
* <p>This sketch also offers the capability of operating off-heap. Given a WritableMemory object
|
99
|
+
* created by the user, the sketch will perform all of its updates and internal phase transitions
|
100
|
+
* in that object, which can actually reside either on-heap or off-heap based on how it is
|
101
|
+
* configured. In large systems that must update and merge many millions of sketches, having the
|
102
|
+
* sketch operate off-heap avoids the serialization and deserialization costs of moving sketches
|
103
|
+
* to and from off-heap memory-mapped files, for example, and eliminates big garbage collection
|
104
|
+
* delays.
|
105
|
+
*
|
106
|
+
* author Jon Malkin
|
107
|
+
* author Lee Rhodes
|
108
|
+
* author Kevin Lang
|
109
|
+
*/
|
107
110
|
|
108
|
-
|
109
|
-
template<typename A>
|
111
|
+
// forward declaration
|
112
|
+
template<typename A> class HllSketchImpl;
|
110
113
|
|
111
114
|
template<typename A = std::allocator<uint8_t> >
|
112
115
|
class hll_sketch_alloc final {
|
@@ -118,45 +121,60 @@ class hll_sketch_alloc final {
|
|
118
121
|
* @param start_full_size Indicates whether to start in HLL mode,
|
119
122
|
* keeping memory use constant (if HLL_6 or HLL_8) at the cost of
|
120
123
|
* starting out using much more memory
|
124
|
+
* @param allocator instance of an Allocator
|
121
125
|
*/
|
122
126
|
explicit hll_sketch_alloc(uint8_t lg_config_k, target_hll_type tgt_type = HLL_4, bool start_full_size = false, const A& allocator = A());
|
123
127
|
|
124
128
|
/**
|
125
129
|
* Copy constructor
|
130
|
+
* @param that sketch to be copied
|
126
131
|
*/
|
127
132
|
hll_sketch_alloc(const hll_sketch_alloc<A>& that);
|
128
133
|
|
129
134
|
/**
|
130
135
|
* Copy constructor to a new target type
|
136
|
+
* @param that sketch to be copied
|
137
|
+
* @param tgt_type target_hll_type
|
131
138
|
*/
|
132
139
|
hll_sketch_alloc(const hll_sketch_alloc<A>& that, target_hll_type tgt_type);
|
133
140
|
|
134
141
|
/**
|
135
142
|
* Move constructor
|
143
|
+
* @param that sketch to be moved
|
136
144
|
*/
|
137
145
|
hll_sketch_alloc(hll_sketch_alloc<A>&& that) noexcept;
|
138
146
|
|
139
147
|
/**
|
140
148
|
* Reconstructs a sketch from a serialized image on a stream.
|
141
149
|
* @param is An input stream with a binary image of a sketch
|
150
|
+
* @param allocator instance of an Allocator
|
142
151
|
*/
|
143
152
|
static hll_sketch_alloc deserialize(std::istream& is, const A& allocator = A());
|
144
153
|
|
145
154
|
/**
|
146
155
|
* Reconstructs a sketch from a serialized image in a byte array.
|
147
|
-
* @param
|
156
|
+
* @param bytes An input array with a binary image of a sketch
|
148
157
|
* @param len Length of the input array, in bytes
|
158
|
+
* @param allocator instance of an Allocator
|
149
159
|
*/
|
150
160
|
static hll_sketch_alloc deserialize(const void* bytes, size_t len, const A& allocator = A());
|
151
161
|
|
152
162
|
//! Class destructor
|
153
163
|
virtual ~hll_sketch_alloc();
|
154
164
|
|
155
|
-
|
156
|
-
|
165
|
+
/**
|
166
|
+
* Copy assignment operator
|
167
|
+
* @param other sketch to be copied
|
168
|
+
* @return reference to this sketch
|
169
|
+
*/
|
170
|
+
hll_sketch_alloc& operator=(const hll_sketch_alloc<A>& other);
|
157
171
|
|
158
|
-
|
159
|
-
|
172
|
+
/**
|
173
|
+
* Move assignment operator
|
174
|
+
* @param other sketch to be moved
|
175
|
+
* @return reference to this sketch
|
176
|
+
*/
|
177
|
+
hll_sketch_alloc& operator=(hll_sketch_alloc<A>&& other);
|
160
178
|
|
161
179
|
/**
|
162
180
|
* Resets the sketch to an empty state in coupon collection mode.
|
@@ -164,18 +182,22 @@ class hll_sketch_alloc final {
|
|
164
182
|
*/
|
165
183
|
void reset();
|
166
184
|
|
167
|
-
|
185
|
+
// This is a convenience alias for users
|
186
|
+
// The type returned by the following serialize method
|
187
|
+
using vector_bytes = std::vector<uint8_t, typename std::allocator_traits<A>::template rebind_alloc<uint8_t>>;
|
168
188
|
|
169
189
|
/**
|
170
190
|
* Serializes the sketch to a byte array, compacting data structures
|
171
191
|
* where feasible to eliminate unused storage in the serialized image.
|
172
192
|
* @param header_size_bytes Allows for PostgreSQL integration
|
193
|
+
* @return serialized sketch in binary form
|
173
194
|
*/
|
174
195
|
vector_bytes serialize_compact(unsigned header_size_bytes = 0) const;
|
175
196
|
|
176
197
|
/**
|
177
198
|
* Serializes the sketch to a byte array, retaining all internal
|
178
199
|
* data structures in their current form.
|
200
|
+
* @return serialized sketch in binary form
|
179
201
|
*/
|
180
202
|
vector_bytes serialize_updatable() const;
|
181
203
|
|
@@ -197,7 +219,7 @@ class hll_sketch_alloc final {
|
|
197
219
|
* Human readable summary with optional detail
|
198
220
|
* @param summary if true, output the sketch summary
|
199
221
|
* @param detail if true, output the internal data array
|
200
|
-
* @param
|
222
|
+
* @param aux_detail if true, output the internal Aux array, if it exists.
|
201
223
|
* @param all if true, outputs all entries including empty ones
|
202
224
|
* @return human readable string with optional detail.
|
203
225
|
*/
|
@@ -358,7 +380,7 @@ class hll_sketch_alloc final {
|
|
358
380
|
* value can be exceeded in extremely rare cases. If exceeded, it
|
359
381
|
* will be larger by only a few percent.
|
360
382
|
*
|
361
|
-
* @param
|
383
|
+
* @param lg_k The Log2 of K for the target HLL sketch. This value must be
|
362
384
|
* between 4 and 21 inclusively.
|
363
385
|
* @param tgt_type the desired Hll type
|
364
386
|
* @return the maximum size in bytes that this sketch can grow to.
|
@@ -391,8 +413,6 @@ class hll_sketch_alloc final {
|
|
391
413
|
bool is_out_of_order_flag() const;
|
392
414
|
bool is_estimation_mode() const;
|
393
415
|
|
394
|
-
typedef typename std::allocator_traits<A>::template rebind_alloc<hll_sketch_alloc> AllocHllSketch;
|
395
|
-
|
396
416
|
HllSketchImpl<A>* sketch_impl;
|
397
417
|
friend hll_union_alloc<A>;
|
398
418
|
};
|
@@ -412,8 +432,8 @@ class hll_sketch_alloc final {
|
|
412
432
|
* <p>Although the API for this union operator parallels many of the methods of the
|
413
433
|
* <i>HllSketch</i>, the behavior of the union operator has some fundamental differences.
|
414
434
|
*
|
415
|
-
* <p>First, the user cannot specify the #
|
416
|
-
* Instead, it is specified for the sketch returned with #get_result
|
435
|
+
* <p>First, the user cannot specify the #target_hll_type as an input parameter.
|
436
|
+
* Instead, it is specified for the sketch returned with #get_result.
|
417
437
|
*
|
418
438
|
* <p>Second, the internal effective value of log-base-2 of <i>k</i> for the union operation can
|
419
439
|
* change dynamically based on the smallest <i>lg_config_k</i> that the union operation has seen.
|
@@ -422,7 +442,6 @@ class hll_sketch_alloc final {
|
|
422
442
|
* author Lee Rhodes
|
423
443
|
* author Kevin Lang
|
424
444
|
*/
|
425
|
-
|
426
445
|
template<typename A = std::allocator<uint8_t> >
|
427
446
|
class hll_union_alloc {
|
428
447
|
public:
|
@@ -430,6 +449,7 @@ class hll_union_alloc {
|
|
430
449
|
* Construct an hll_union operator with the given maximum log2 of k.
|
431
450
|
* @param lg_max_k The maximum size, in log2, of k. The value must
|
432
451
|
* be between 7 and 21, inclusive.
|
452
|
+
* @param allocator instance of an Allocator
|
433
453
|
*/
|
434
454
|
explicit hll_union_alloc(uint8_t lg_max_k, const A& allocator = A());
|
435
455
|
|
@@ -494,21 +514,21 @@ class hll_union_alloc {
|
|
494
514
|
|
495
515
|
/**
|
496
516
|
* Returns the result of this union operator with the specified
|
497
|
-
* #
|
498
|
-
* @param The tgt_hll_type enum value of the desired result (Default: HLL_4)
|
517
|
+
* #target_hll_type.
|
518
|
+
* @param tgt_type The tgt_hll_type enum value of the desired result (Default: HLL_4)
|
499
519
|
* @return The result of this union with the specified tgt_hll_type
|
500
520
|
*/
|
501
521
|
hll_sketch_alloc<A> get_result(target_hll_type tgt_type = HLL_4) const;
|
502
522
|
|
503
523
|
/**
|
504
524
|
* Update this union operator with the given sketch.
|
505
|
-
* @param The given sketch.
|
525
|
+
* @param sketch The given sketch.
|
506
526
|
*/
|
507
527
|
void update(const hll_sketch_alloc<A>& sketch);
|
508
528
|
|
509
529
|
/**
|
510
530
|
* Update this union operator with the given temporary sketch.
|
511
|
-
* @param The given sketch.
|
531
|
+
* @param sketch The given sketch.
|
512
532
|
*/
|
513
533
|
void update(hll_sketch_alloc<A>&& sketch);
|
514
534
|
|
@@ -608,7 +628,7 @@ class hll_union_alloc {
|
|
608
628
|
* perform the union. This may involve swapping, down-sampling, transforming, and / or
|
609
629
|
* copying one of the arguments and may completely replace the internals of the union.
|
610
630
|
*
|
611
|
-
* @param
|
631
|
+
* @param sketch the given incoming sketch, which may not be modified.
|
612
632
|
* @param lg_max_k the maximum value of log2 K for this union.
|
613
633
|
*/
|
614
634
|
inline void union_impl(const hll_sketch_alloc<A>& sketch, uint8_t lg_max_k);
|
@@ -628,12 +648,6 @@ class hll_union_alloc {
|
|
628
648
|
hll_sketch_alloc<A> gadget_;
|
629
649
|
};
|
630
650
|
|
631
|
-
/// convenience alias for hll_sketch with default allocator
|
632
|
-
typedef hll_sketch_alloc<> hll_sketch;
|
633
|
-
|
634
|
-
/// convenience alias for hll_union with default allocator
|
635
|
-
typedef hll_union_alloc<> hll_union;
|
636
|
-
|
637
651
|
} // namespace datasketches
|
638
652
|
|
639
653
|
#include "hll.private.hpp"
|
@@ -20,7 +20,6 @@ add_executable(hll_test)
|
|
20
20
|
target_link_libraries(hll_test hll common_test_lib)
|
21
21
|
|
22
22
|
set_target_properties(hll_test PROPERTIES
|
23
|
-
CXX_STANDARD 11
|
24
23
|
CXX_STANDARD_REQUIRED YES
|
25
24
|
)
|
26
25
|
|
@@ -49,3 +48,17 @@ target_sources(hll_test
|
|
49
48
|
ToFromByteArrayTest.cpp
|
50
49
|
IsomorphicTest.cpp
|
51
50
|
)
|
51
|
+
|
52
|
+
if (SERDE_COMPAT)
|
53
|
+
target_sources(hll_test
|
54
|
+
PRIVATE
|
55
|
+
hll_sketch_deserialize_from_java_test.cpp
|
56
|
+
)
|
57
|
+
endif()
|
58
|
+
|
59
|
+
if (GENERATE)
|
60
|
+
target_sources(hll_test
|
61
|
+
PRIVATE
|
62
|
+
hll_sketch_serialize_for_java.cpp
|
63
|
+
)
|
64
|
+
endif()
|
@@ -53,11 +53,16 @@ static void basicUnion(uint64_t n1, uint64_t n2,
|
|
53
53
|
v += n2;
|
54
54
|
|
55
55
|
hll_union u(lgMaxK);
|
56
|
-
u.update(
|
56
|
+
u.update(h1);
|
57
57
|
u.update(h2);
|
58
58
|
|
59
59
|
hll_sketch result = u.get_result(resultType);
|
60
60
|
|
61
|
+
// ensure we check a direct union estimate, without first caling get_result()
|
62
|
+
u.reset();
|
63
|
+
u.update(std::move(h1));
|
64
|
+
u.update(h2);
|
65
|
+
|
61
66
|
// force non-HIP estimates to avoid issues with in- vs out-of-order
|
62
67
|
double uEst = result.get_composite_estimate();
|
63
68
|
double uUb = result.get_upper_bound(2);
|
@@ -74,6 +79,7 @@ static void basicUnion(uint64_t n1, uint64_t n2,
|
|
74
79
|
REQUIRE((uEst - uLb) >= 0.0);
|
75
80
|
|
76
81
|
REQUIRE(controlEst == uEst);
|
82
|
+
REQUIRE(controlEst == u.get_composite_estimate());
|
77
83
|
}
|
78
84
|
|
79
85
|
/**
|
@@ -53,74 +53,6 @@ TEST_CASE("hll to/from byte array: double serialize", "[hll_byte_array]") {
|
|
53
53
|
}
|
54
54
|
}
|
55
55
|
|
56
|
-
TEST_CASE("hll to/from byte array: deserialize from java", "[hll_byte_array]") {
|
57
|
-
std::string inputPath;
|
58
|
-
#ifdef TEST_BINARY_INPUT_PATH
|
59
|
-
inputPath = TEST_BINARY_INPUT_PATH;
|
60
|
-
#else
|
61
|
-
inputPath = "test/";
|
62
|
-
#endif
|
63
|
-
|
64
|
-
std::ifstream ifs;
|
65
|
-
ifs.open(inputPath + "list_from_java.sk", std::ios::binary);
|
66
|
-
hll_sketch sk = hll_sketch::deserialize(ifs);
|
67
|
-
REQUIRE(sk.is_empty() == false);
|
68
|
-
REQUIRE(sk.get_lg_config_k() == 8);
|
69
|
-
REQUIRE(sk.get_lower_bound(1) == 7.0);
|
70
|
-
REQUIRE(sk.get_estimate() == Approx(7.0).margin(1e-6));
|
71
|
-
REQUIRE(sk.get_upper_bound(1) == Approx(7.000350).margin(1e-5));
|
72
|
-
ifs.close();
|
73
|
-
|
74
|
-
ifs.open(inputPath + "compact_set_from_java.sk", std::ios::binary);
|
75
|
-
sk = hll_sketch::deserialize(ifs);
|
76
|
-
REQUIRE(sk.is_empty() == false);
|
77
|
-
REQUIRE(sk.get_lg_config_k() == 8);
|
78
|
-
REQUIRE(sk.get_lower_bound(1) == 24.0);
|
79
|
-
REQUIRE(sk.get_estimate() == Approx(24.0).margin(1e-5));
|
80
|
-
REQUIRE(sk.get_upper_bound(1) == Approx(24.001200).margin(1e-5));
|
81
|
-
ifs.close();
|
82
|
-
|
83
|
-
ifs.open(inputPath + "updatable_set_from_java.sk", std::ios::binary);
|
84
|
-
sk = hll_sketch::deserialize(ifs);
|
85
|
-
REQUIRE(sk.is_empty() == false);
|
86
|
-
REQUIRE(sk.get_lg_config_k() == 8);
|
87
|
-
REQUIRE(sk.get_lower_bound(1) == 24.0);
|
88
|
-
REQUIRE(sk.get_estimate() == Approx(24.0).margin(1e-5));
|
89
|
-
REQUIRE(sk.get_upper_bound(1) == Approx(24.001200).margin(1e-5));
|
90
|
-
ifs.close();
|
91
|
-
|
92
|
-
|
93
|
-
ifs.open(inputPath + "array6_from_java.sk", std::ios::binary);
|
94
|
-
sk = hll_sketch::deserialize(ifs);
|
95
|
-
REQUIRE(sk.is_empty() == false);
|
96
|
-
REQUIRE(sk.get_lg_config_k() == 8);
|
97
|
-
REQUIRE(sk.get_lower_bound(1) == Approx(9589.968564).margin(1e-5));
|
98
|
-
REQUIRE(sk.get_estimate() == Approx(10089.150211).margin(1e-5));
|
99
|
-
REQUIRE(sk.get_upper_bound(1) == Approx(10642.370492).margin(1e-5));
|
100
|
-
ifs.close();
|
101
|
-
|
102
|
-
|
103
|
-
ifs.open(inputPath + "compact_array4_from_java.sk", std::ios::binary);
|
104
|
-
sk = hll_sketch::deserialize(ifs);
|
105
|
-
REQUIRE(sk.is_empty() == false);
|
106
|
-
REQUIRE(sk.get_lg_config_k() == 8);
|
107
|
-
REQUIRE(sk.get_lower_bound(1) == Approx(9589.968564).margin(1e-5));
|
108
|
-
REQUIRE(sk.get_estimate() == Approx(10089.150211).margin(1e-5));
|
109
|
-
REQUIRE(sk.get_upper_bound(1) == Approx(10642.370492).margin(1e-5));
|
110
|
-
|
111
|
-
ifs.close();
|
112
|
-
|
113
|
-
|
114
|
-
ifs.open(inputPath + "updatable_array4_from_java.sk", std::ios::binary);
|
115
|
-
sk = hll_sketch::deserialize(ifs);
|
116
|
-
REQUIRE(sk.is_empty() == false);
|
117
|
-
REQUIRE(sk.get_lg_config_k() == 8);
|
118
|
-
REQUIRE(sk.get_lower_bound(1) == Approx(9589.968564).margin(1e-5));
|
119
|
-
REQUIRE(sk.get_estimate() == Approx(10089.150211).margin(1e-5));
|
120
|
-
REQUIRE(sk.get_upper_bound(1) == Approx(10642.370492).margin(1e-5));
|
121
|
-
ifs.close();
|
122
|
-
}
|
123
|
-
|
124
56
|
static void checkSketchEquality(hll_sketch& sk1, hll_sketch& sk2) {
|
125
57
|
REQUIRE(sk1.get_lg_config_k() == sk2.get_lg_config_k());
|
126
58
|
REQUIRE(sk1.get_lower_bound(1) == sk2.get_lower_bound(1));
|