datasketches 0.3.2 → 0.4.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +6 -0
- data/NOTICE +1 -1
- data/README.md +0 -2
- data/ext/datasketches/cpc_wrapper.cpp +2 -2
- data/ext/datasketches/kll_wrapper.cpp +0 -10
- data/lib/datasketches/version.rb +1 -1
- data/lib/datasketches.rb +1 -1
- data/vendor/datasketches-cpp/CMakeLists.txt +1 -0
- data/vendor/datasketches-cpp/CODE_OF_CONDUCT.md +3 -0
- data/vendor/datasketches-cpp/CONTRIBUTING.md +50 -0
- data/vendor/datasketches-cpp/Doxyfile +2827 -0
- data/vendor/datasketches-cpp/LICENSE +0 -76
- data/vendor/datasketches-cpp/NOTICE +1 -1
- data/vendor/datasketches-cpp/README.md +1 -3
- data/vendor/datasketches-cpp/common/CMakeLists.txt +12 -11
- data/vendor/datasketches-cpp/common/include/common_defs.hpp +11 -8
- data/vendor/datasketches-cpp/common/include/count_zeros.hpp +0 -2
- data/vendor/datasketches-cpp/common/include/kolmogorov_smirnov.hpp +9 -6
- data/vendor/datasketches-cpp/common/include/optional.hpp +148 -0
- data/vendor/datasketches-cpp/common/include/quantiles_sorted_view.hpp +95 -2
- data/vendor/datasketches-cpp/common/include/quantiles_sorted_view_impl.hpp +1 -1
- data/vendor/datasketches-cpp/common/include/serde.hpp +69 -20
- data/vendor/datasketches-cpp/common/test/CMakeLists.txt +1 -1
- data/vendor/datasketches-cpp/common/test/optional_test.cpp +85 -0
- data/vendor/datasketches-cpp/common/test/test_allocator.hpp +14 -14
- data/vendor/datasketches-cpp/count/include/count_min.hpp +132 -78
- data/vendor/datasketches-cpp/count/include/count_min_impl.hpp +132 -152
- data/vendor/datasketches-cpp/count/test/CMakeLists.txt +11 -12
- data/vendor/datasketches-cpp/count/test/count_min_allocation_test.cpp +61 -61
- data/vendor/datasketches-cpp/count/test/count_min_test.cpp +175 -178
- data/vendor/datasketches-cpp/cpc/include/cpc_common.hpp +14 -20
- data/vendor/datasketches-cpp/cpc/include/cpc_compressor.hpp +7 -4
- data/vendor/datasketches-cpp/cpc/include/cpc_compressor_impl.hpp +17 -17
- data/vendor/datasketches-cpp/cpc/include/cpc_sketch.hpp +40 -40
- data/vendor/datasketches-cpp/cpc/include/cpc_sketch_impl.hpp +13 -10
- data/vendor/datasketches-cpp/cpc/include/cpc_union.hpp +35 -11
- data/vendor/datasketches-cpp/cpc/include/cpc_union_impl.hpp +8 -8
- data/vendor/datasketches-cpp/cpc/include/u32_table.hpp +3 -2
- data/vendor/datasketches-cpp/cpc/include/u32_table_impl.hpp +5 -5
- data/vendor/datasketches-cpp/cpc/test/CMakeLists.txt +20 -7
- data/vendor/datasketches-cpp/cpc/test/cpc_sketch_deserialize_from_java_test.cpp +60 -0
- data/vendor/datasketches-cpp/{python/include/py_object_lt.hpp → cpc/test/cpc_sketch_serialize_for_java.cpp} +15 -14
- data/vendor/datasketches-cpp/cpc/test/cpc_sketch_test.cpp +4 -29
- data/vendor/datasketches-cpp/cpc/test/cpc_union_test.cpp +4 -4
- data/vendor/datasketches-cpp/density/include/density_sketch.hpp +29 -9
- data/vendor/datasketches-cpp/density/include/density_sketch_impl.hpp +1 -1
- data/vendor/datasketches-cpp/density/test/CMakeLists.txt +0 -1
- data/vendor/datasketches-cpp/fi/include/frequent_items_sketch.hpp +21 -9
- data/vendor/datasketches-cpp/fi/include/frequent_items_sketch_impl.hpp +6 -4
- data/vendor/datasketches-cpp/fi/test/CMakeLists.txt +14 -1
- data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_deserialize_from_java_test.cpp +95 -0
- data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_serialize_for_java.cpp +83 -0
- data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_test.cpp +3 -42
- data/vendor/datasketches-cpp/hll/include/CouponList-internal.hpp +2 -2
- data/vendor/datasketches-cpp/hll/include/CouponList.hpp +3 -1
- data/vendor/datasketches-cpp/hll/include/HllArray-internal.hpp +3 -3
- data/vendor/datasketches-cpp/hll/include/HllArray.hpp +5 -3
- data/vendor/datasketches-cpp/hll/include/HllSketch-internal.hpp +4 -4
- data/vendor/datasketches-cpp/hll/include/HllSketchImpl.hpp +3 -1
- data/vendor/datasketches-cpp/hll/include/HllUtil.hpp +0 -12
- data/vendor/datasketches-cpp/hll/include/hll.hpp +70 -57
- data/vendor/datasketches-cpp/hll/test/CMakeLists.txt +14 -1
- data/vendor/datasketches-cpp/hll/test/ToFromByteArrayTest.cpp +0 -68
- data/vendor/datasketches-cpp/hll/test/hll_sketch_deserialize_from_java_test.cpp +69 -0
- data/vendor/datasketches-cpp/hll/test/hll_sketch_serialize_for_java.cpp +52 -0
- data/vendor/datasketches-cpp/kll/include/kll_helper_impl.hpp +2 -2
- data/vendor/datasketches-cpp/kll/include/kll_sketch.hpp +71 -50
- data/vendor/datasketches-cpp/kll/include/kll_sketch_impl.hpp +59 -130
- data/vendor/datasketches-cpp/kll/test/CMakeLists.txt +14 -1
- data/vendor/datasketches-cpp/kll/test/kll_sketch_deserialize_from_java_test.cpp +103 -0
- data/vendor/datasketches-cpp/kll/test/kll_sketch_serialize_for_java.cpp +62 -0
- data/vendor/datasketches-cpp/kll/test/kll_sketch_test.cpp +3 -38
- data/vendor/datasketches-cpp/quantiles/include/quantiles_sketch.hpp +68 -51
- data/vendor/datasketches-cpp/quantiles/include/quantiles_sketch_impl.hpp +62 -132
- data/vendor/datasketches-cpp/quantiles/test/CMakeLists.txt +14 -1
- data/vendor/datasketches-cpp/quantiles/test/quantiles_sketch_deserialize_from_java_test.cpp +84 -0
- data/vendor/datasketches-cpp/quantiles/test/quantiles_sketch_serialize_for_java.cpp +52 -0
- data/vendor/datasketches-cpp/quantiles/test/quantiles_sketch_test.cpp +14 -38
- data/vendor/datasketches-cpp/req/include/req_common.hpp +7 -3
- data/vendor/datasketches-cpp/req/include/req_compactor_impl.hpp +2 -2
- data/vendor/datasketches-cpp/req/include/req_sketch.hpp +97 -23
- data/vendor/datasketches-cpp/req/include/req_sketch_impl.hpp +48 -109
- data/vendor/datasketches-cpp/req/test/CMakeLists.txt +14 -1
- data/vendor/datasketches-cpp/req/test/req_sketch_deserialize_from_java_test.cpp +55 -0
- data/vendor/datasketches-cpp/{tuple/include/array_of_doubles_intersection_impl.hpp → req/test/req_sketch_serialize_for_java.cpp} +12 -7
- data/vendor/datasketches-cpp/req/test/req_sketch_test.cpp +3 -89
- data/vendor/datasketches-cpp/sampling/CMakeLists.txt +4 -0
- data/vendor/datasketches-cpp/sampling/include/ebpps_sample.hpp +210 -0
- data/vendor/datasketches-cpp/sampling/include/ebpps_sample_impl.hpp +535 -0
- data/vendor/datasketches-cpp/sampling/include/ebpps_sketch.hpp +281 -0
- data/vendor/datasketches-cpp/sampling/include/ebpps_sketch_impl.hpp +531 -0
- data/vendor/datasketches-cpp/sampling/include/var_opt_sketch.hpp +69 -26
- data/vendor/datasketches-cpp/sampling/include/var_opt_sketch_impl.hpp +3 -3
- data/vendor/datasketches-cpp/sampling/include/var_opt_union.hpp +10 -11
- data/vendor/datasketches-cpp/sampling/include/var_opt_union_impl.hpp +4 -4
- data/vendor/datasketches-cpp/sampling/test/CMakeLists.txt +55 -8
- data/vendor/datasketches-cpp/sampling/test/ebpps_allocation_test.cpp +96 -0
- data/vendor/datasketches-cpp/sampling/test/ebpps_sample_test.cpp +137 -0
- data/vendor/datasketches-cpp/sampling/test/ebpps_sketch_test.cpp +266 -0
- data/vendor/datasketches-cpp/sampling/test/var_opt_sketch_deserialize_from_java_test.cpp +81 -0
- data/vendor/datasketches-cpp/sampling/test/var_opt_sketch_serialize_for_java.cpp +54 -0
- data/vendor/datasketches-cpp/sampling/test/var_opt_sketch_test.cpp +0 -37
- data/vendor/datasketches-cpp/sampling/test/var_opt_union_deserialize_from_java_test.cpp +50 -0
- data/vendor/datasketches-cpp/sampling/test/var_opt_union_serialize_for_java.cpp +56 -0
- data/vendor/datasketches-cpp/sampling/test/var_opt_union_test.cpp +0 -18
- data/vendor/datasketches-cpp/theta/include/bit_packing.hpp +2608 -2608
- data/vendor/datasketches-cpp/theta/include/bounds_on_ratios_in_sampled_sets.hpp +1 -0
- data/vendor/datasketches-cpp/theta/include/bounds_on_ratios_in_theta_sketched_sets.hpp +7 -6
- data/vendor/datasketches-cpp/theta/include/theta_a_not_b.hpp +20 -5
- data/vendor/datasketches-cpp/theta/include/theta_constants.hpp +10 -4
- data/vendor/datasketches-cpp/theta/include/theta_helpers.hpp +1 -1
- data/vendor/datasketches-cpp/theta/include/theta_intersection.hpp +13 -5
- data/vendor/datasketches-cpp/theta/include/theta_intersection_base_impl.hpp +5 -5
- data/vendor/datasketches-cpp/theta/include/theta_intersection_impl.hpp +3 -3
- data/vendor/datasketches-cpp/theta/include/theta_jaccard_similarity.hpp +2 -1
- data/vendor/datasketches-cpp/theta/include/theta_jaccard_similarity_base.hpp +1 -0
- data/vendor/datasketches-cpp/theta/include/theta_set_difference_base_impl.hpp +1 -1
- data/vendor/datasketches-cpp/theta/include/theta_sketch.hpp +126 -27
- data/vendor/datasketches-cpp/theta/include/theta_sketch_impl.hpp +8 -8
- data/vendor/datasketches-cpp/theta/include/theta_union.hpp +17 -10
- data/vendor/datasketches-cpp/theta/include/theta_union_base_impl.hpp +1 -1
- data/vendor/datasketches-cpp/theta/include/theta_union_impl.hpp +3 -3
- data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base.hpp +5 -2
- data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base_impl.hpp +11 -1
- data/vendor/datasketches-cpp/theta/test/CMakeLists.txt +14 -1
- data/vendor/datasketches-cpp/theta/test/theta_sketch_deserialize_from_java_test.cpp +57 -0
- data/vendor/datasketches-cpp/theta/test/theta_sketch_serialize_for_java.cpp +61 -0
- data/vendor/datasketches-cpp/theta/test/theta_sketch_test.cpp +0 -188
- data/vendor/datasketches-cpp/tuple/CMakeLists.txt +8 -7
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_sketch.hpp +19 -144
- data/vendor/datasketches-cpp/tuple/include/{array_of_doubles_a_not_b.hpp → array_tuple_a_not_b.hpp} +24 -16
- data/vendor/datasketches-cpp/tuple/include/{array_of_doubles_a_not_b_impl.hpp → array_tuple_a_not_b_impl.hpp} +4 -4
- data/vendor/datasketches-cpp/tuple/include/array_tuple_intersection.hpp +65 -0
- data/vendor/datasketches-cpp/{python/include/py_object_ostream.hpp → tuple/include/array_tuple_intersection_impl.hpp} +7 -24
- data/vendor/datasketches-cpp/tuple/include/array_tuple_sketch.hpp +237 -0
- data/vendor/datasketches-cpp/tuple/include/{array_of_doubles_sketch_impl.hpp → array_tuple_sketch_impl.hpp} +40 -41
- data/vendor/datasketches-cpp/tuple/include/array_tuple_union.hpp +81 -0
- data/vendor/datasketches-cpp/tuple/include/array_tuple_union_impl.hpp +43 -0
- data/vendor/datasketches-cpp/tuple/include/tuple_a_not_b.hpp +11 -2
- data/vendor/datasketches-cpp/tuple/include/tuple_intersection.hpp +17 -10
- data/vendor/datasketches-cpp/tuple/include/tuple_jaccard_similarity.hpp +2 -1
- data/vendor/datasketches-cpp/tuple/include/tuple_sketch.hpp +95 -32
- data/vendor/datasketches-cpp/tuple/include/tuple_union.hpp +19 -11
- data/vendor/datasketches-cpp/tuple/test/CMakeLists.txt +16 -1
- data/vendor/datasketches-cpp/tuple/test/aod_sketch_deserialize_from_java_test.cpp +76 -0
- data/vendor/datasketches-cpp/tuple/test/aod_sketch_serialize_for_java.cpp +62 -0
- data/vendor/datasketches-cpp/tuple/test/array_of_doubles_sketch_test.cpp +5 -129
- data/vendor/datasketches-cpp/tuple/test/engagement_test.cpp +85 -89
- data/vendor/datasketches-cpp/tuple/test/tuple_jaccard_similarity_test.cpp +3 -1
- data/vendor/datasketches-cpp/tuple/test/tuple_sketch_deserialize_from_java_test.cpp +47 -0
- data/vendor/datasketches-cpp/tuple/test/tuple_sketch_serialize_for_java.cpp +38 -0
- data/vendor/datasketches-cpp/tuple/test/tuple_sketch_test.cpp +1 -1
- data/vendor/datasketches-cpp/version.cfg.in +1 -1
- metadata +47 -93
- data/vendor/datasketches-cpp/MANIFEST.in +0 -39
- data/vendor/datasketches-cpp/fi/test/items_sketch_string_from_java.sk +0 -0
- data/vendor/datasketches-cpp/fi/test/items_sketch_string_utf8_from_java.sk +0 -0
- data/vendor/datasketches-cpp/fi/test/longs_sketch_from_java.sk +0 -0
- data/vendor/datasketches-cpp/hll/test/array6_from_java.sk +0 -0
- data/vendor/datasketches-cpp/hll/test/compact_array4_from_java.sk +0 -0
- data/vendor/datasketches-cpp/hll/test/compact_set_from_java.sk +0 -0
- data/vendor/datasketches-cpp/hll/test/list_from_java.sk +0 -0
- data/vendor/datasketches-cpp/hll/test/updatable_array4_from_java.sk +0 -0
- data/vendor/datasketches-cpp/hll/test/updatable_set_from_java.sk +0 -0
- data/vendor/datasketches-cpp/kll/test/kll_sketch_from_java.sk +0 -0
- data/vendor/datasketches-cpp/pyproject.toml +0 -23
- data/vendor/datasketches-cpp/python/CMakeLists.txt +0 -87
- data/vendor/datasketches-cpp/python/README.md +0 -85
- data/vendor/datasketches-cpp/python/datasketches/DensityWrapper.py +0 -87
- data/vendor/datasketches-cpp/python/datasketches/KernelFunction.py +0 -35
- data/vendor/datasketches-cpp/python/datasketches/PySerDe.py +0 -110
- data/vendor/datasketches-cpp/python/datasketches/TuplePolicy.py +0 -77
- data/vendor/datasketches-cpp/python/datasketches/TupleWrapper.py +0 -205
- data/vendor/datasketches-cpp/python/datasketches/__init__.py +0 -38
- data/vendor/datasketches-cpp/python/include/kernel_function.hpp +0 -98
- data/vendor/datasketches-cpp/python/include/py_serde.hpp +0 -113
- data/vendor/datasketches-cpp/python/include/quantile_conditional.hpp +0 -104
- data/vendor/datasketches-cpp/python/include/tuple_policy.hpp +0 -136
- data/vendor/datasketches-cpp/python/jupyter/CPCSketch.ipynb +0 -345
- data/vendor/datasketches-cpp/python/jupyter/FrequentItemsSketch.ipynb +0 -354
- data/vendor/datasketches-cpp/python/jupyter/HLLSketch.ipynb +0 -346
- data/vendor/datasketches-cpp/python/jupyter/KLLSketch.ipynb +0 -463
- data/vendor/datasketches-cpp/python/jupyter/ThetaSketchNotebook.ipynb +0 -403
- data/vendor/datasketches-cpp/python/pybind11Path.cmd +0 -21
- data/vendor/datasketches-cpp/python/src/__init__.py +0 -18
- data/vendor/datasketches-cpp/python/src/count_wrapper.cpp +0 -101
- data/vendor/datasketches-cpp/python/src/cpc_wrapper.cpp +0 -76
- data/vendor/datasketches-cpp/python/src/datasketches.cpp +0 -58
- data/vendor/datasketches-cpp/python/src/density_wrapper.cpp +0 -95
- data/vendor/datasketches-cpp/python/src/fi_wrapper.cpp +0 -182
- data/vendor/datasketches-cpp/python/src/hll_wrapper.cpp +0 -126
- data/vendor/datasketches-cpp/python/src/kll_wrapper.cpp +0 -158
- data/vendor/datasketches-cpp/python/src/ks_wrapper.cpp +0 -68
- data/vendor/datasketches-cpp/python/src/py_serde.cpp +0 -112
- data/vendor/datasketches-cpp/python/src/quantiles_wrapper.cpp +0 -155
- data/vendor/datasketches-cpp/python/src/req_wrapper.cpp +0 -154
- data/vendor/datasketches-cpp/python/src/theta_wrapper.cpp +0 -166
- data/vendor/datasketches-cpp/python/src/tuple_wrapper.cpp +0 -215
- data/vendor/datasketches-cpp/python/src/vector_of_kll.cpp +0 -490
- data/vendor/datasketches-cpp/python/src/vo_wrapper.cpp +0 -173
- data/vendor/datasketches-cpp/python/tests/__init__.py +0 -16
- data/vendor/datasketches-cpp/python/tests/count_min_test.py +0 -86
- data/vendor/datasketches-cpp/python/tests/cpc_test.py +0 -64
- data/vendor/datasketches-cpp/python/tests/density_test.py +0 -93
- data/vendor/datasketches-cpp/python/tests/fi_test.py +0 -149
- data/vendor/datasketches-cpp/python/tests/hll_test.py +0 -129
- data/vendor/datasketches-cpp/python/tests/kll_test.py +0 -159
- data/vendor/datasketches-cpp/python/tests/quantiles_test.py +0 -160
- data/vendor/datasketches-cpp/python/tests/req_test.py +0 -159
- data/vendor/datasketches-cpp/python/tests/theta_test.py +0 -148
- data/vendor/datasketches-cpp/python/tests/tuple_test.py +0 -206
- data/vendor/datasketches-cpp/python/tests/vector_of_kll_test.py +0 -148
- data/vendor/datasketches-cpp/python/tests/vo_test.py +0 -132
- data/vendor/datasketches-cpp/req/test/req_float_empty_from_java.sk +0 -0
- data/vendor/datasketches-cpp/req/test/req_float_estimation_from_java.sk +0 -0
- data/vendor/datasketches-cpp/req/test/req_float_exact_from_java.sk +0 -0
- data/vendor/datasketches-cpp/req/test/req_float_raw_items_from_java.sk +0 -0
- data/vendor/datasketches-cpp/req/test/req_float_single_item_from_java.sk +0 -0
- data/vendor/datasketches-cpp/sampling/test/binaries_from_java.txt +0 -67
- data/vendor/datasketches-cpp/sampling/test/varopt_sketch_long_sampling.sk +0 -0
- data/vendor/datasketches-cpp/sampling/test/varopt_sketch_string_exact.sk +0 -0
- data/vendor/datasketches-cpp/sampling/test/varopt_union_double_sampling.sk +0 -0
- data/vendor/datasketches-cpp/setup.py +0 -110
- data/vendor/datasketches-cpp/theta/test/theta_compact_empty_from_java.sk +0 -0
- data/vendor/datasketches-cpp/theta/test/theta_compact_estimation_from_java.sk +0 -0
- data/vendor/datasketches-cpp/theta/test/theta_compact_exact_from_java.sk +0 -0
- data/vendor/datasketches-cpp/theta/test/theta_compact_single_item_from_java.sk +0 -0
- data/vendor/datasketches-cpp/tox.ini +0 -26
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_intersection.hpp +0 -52
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_union.hpp +0 -81
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_union_impl.hpp +0 -43
- data/vendor/datasketches-cpp/tuple/test/aod_1_compact_empty_from_java.sk +0 -1
- data/vendor/datasketches-cpp/tuple/test/aod_1_compact_estimation_from_java.sk +0 -0
- data/vendor/datasketches-cpp/tuple/test/aod_1_compact_non_empty_no_entries_from_java.sk +0 -0
- data/vendor/datasketches-cpp/tuple/test/aod_2_compact_exact_from_java.sk +0 -0
- data/vendor/datasketches-cpp/tuple/test/aod_3_compact_empty_from_java.sk +0 -1
@@ -29,6 +29,7 @@
|
|
29
29
|
namespace datasketches {
|
30
30
|
|
31
31
|
/**
|
32
|
+
* Bounds on ratios in sampled sets.
|
32
33
|
* This class is used to compute the bounds on the estimate of the ratio <i>|B| / |A|</i>, where:
|
33
34
|
* <ul>
|
34
35
|
* <li><i>|A|</i> is the unknown size of a set <i>A</i> of unique identifiers.</li>
|
@@ -28,6 +28,7 @@
|
|
28
28
|
namespace datasketches {
|
29
29
|
|
30
30
|
/**
|
31
|
+
* Bounds on ratios in Theta sketched sets.
|
31
32
|
* This is to compute the bounds on the estimate of the ratio <i>B / A</i>, where:
|
32
33
|
* <ul>
|
33
34
|
* <li><i>A</i> is a Theta Sketch of population <i>PopA</i>.</li>
|
@@ -50,8 +51,8 @@ class bounds_on_ratios_in_theta_sketched_sets {
|
|
50
51
|
public:
|
51
52
|
/**
|
52
53
|
* Gets the approximate lower bound for B over A based on a 95% confidence interval
|
53
|
-
* @param
|
54
|
-
* @param
|
54
|
+
* @param sketch_a the sketch A
|
55
|
+
* @param sketch_b the sketch B
|
55
56
|
* @return the approximate lower bound for B over A
|
56
57
|
*/
|
57
58
|
template<typename SketchA, typename SketchB>
|
@@ -72,8 +73,8 @@ public:
|
|
72
73
|
|
73
74
|
/**
|
74
75
|
* Gets the approximate upper bound for B over A based on a 95% confidence interval
|
75
|
-
* @param
|
76
|
-
* @param
|
76
|
+
* @param sketch_a the sketch A
|
77
|
+
* @param sketch_b the sketch B
|
77
78
|
* @return the approximate upper bound for B over A
|
78
79
|
*/
|
79
80
|
template<typename SketchA, typename SketchB>
|
@@ -94,8 +95,8 @@ public:
|
|
94
95
|
|
95
96
|
/**
|
96
97
|
* Gets the estimate for B over A
|
97
|
-
* @param
|
98
|
-
* @param
|
98
|
+
* @param sketch_a the sketch A
|
99
|
+
* @param sketch_b the sketch B
|
99
100
|
* @return the estimate for B over A
|
100
101
|
*/
|
101
102
|
template<typename SketchA, typename SketchB>
|
@@ -25,6 +25,16 @@
|
|
25
25
|
|
26
26
|
namespace datasketches {
|
27
27
|
|
28
|
+
// forward declaration
|
29
|
+
template<typename A> class theta_a_not_b_alloc;
|
30
|
+
|
31
|
+
// alias with default allocator for convenience
|
32
|
+
using theta_a_not_b = theta_a_not_b_alloc<std::allocator<uint64_t>>;
|
33
|
+
|
34
|
+
/**
|
35
|
+
* Theta A-not-B (set difference).
|
36
|
+
* Computes set difference of Theta sketches.
|
37
|
+
*/
|
28
38
|
template<typename Allocator = std::allocator<uint64_t>>
|
29
39
|
class theta_a_not_b_alloc {
|
30
40
|
public:
|
@@ -33,11 +43,19 @@ public:
|
|
33
43
|
using CompactSketch = compact_theta_sketch_alloc<Allocator>;
|
34
44
|
using State = theta_set_difference_base<Entry, ExtractKey, CompactSketch, Allocator>;
|
35
45
|
|
46
|
+
/**
|
47
|
+
* Constructor
|
48
|
+
* @param seed for the hash function that was used to create the sketch
|
49
|
+
* @param allocator to use for allocating and deallocating memory
|
50
|
+
*/
|
36
51
|
explicit theta_a_not_b_alloc(uint64_t seed = DEFAULT_SEED, const Allocator& allocator = Allocator());
|
37
52
|
|
38
53
|
/**
|
39
|
-
* Computes the
|
40
|
-
* @
|
54
|
+
* Computes the A-not-B set operation given two sketches.
|
55
|
+
* @param a sketch A
|
56
|
+
* @param b sketch B
|
57
|
+
* @param ordered optional flag to specify if an ordered sketch should be produced
|
58
|
+
* @return the result of A-not-B as a compact sketch
|
41
59
|
*/
|
42
60
|
template<typename FwdSketch, typename Sketch>
|
43
61
|
CompactSketch compute(FwdSketch&& a, const Sketch& b, bool ordered = true) const;
|
@@ -46,9 +64,6 @@ private:
|
|
46
64
|
State state_;
|
47
65
|
};
|
48
66
|
|
49
|
-
// alias with default allocator for convenience
|
50
|
-
using theta_a_not_b = theta_a_not_b_alloc<std::allocator<uint64_t>>;
|
51
|
-
|
52
67
|
} /* namespace datasketches */
|
53
68
|
|
54
69
|
#include "theta_a_not_b_impl.hpp"
|
@@ -25,15 +25,21 @@
|
|
25
25
|
|
26
26
|
namespace datasketches {
|
27
27
|
|
28
|
+
/// Theta constants
|
28
29
|
namespace theta_constants {
|
30
|
+
/// hash table resize factor
|
29
31
|
using resize_factor = datasketches::resize_factor;
|
30
|
-
|
31
|
-
const
|
32
|
+
/// default resize factor
|
33
|
+
const resize_factor DEFAULT_RESIZE_FACTOR = resize_factor::X8;
|
34
|
+
|
35
|
+
/// max theta - signed max for compatibility with Java
|
36
|
+
const uint64_t MAX_THETA = LLONG_MAX;
|
37
|
+
/// min log2 of K
|
32
38
|
const uint8_t MIN_LG_K = 5;
|
39
|
+
/// max log2 of K
|
33
40
|
const uint8_t MAX_LG_K = 26;
|
34
|
-
|
41
|
+
/// default log2 of K
|
35
42
|
const uint8_t DEFAULT_LG_K = 12;
|
36
|
-
const resize_factor DEFAULT_RESIZE_FACTOR = resize_factor::X8;
|
37
43
|
}
|
38
44
|
|
39
45
|
} /* namespace datasketches */
|
@@ -57,7 +57,7 @@ public:
|
|
57
57
|
// consistent way of initializing theta from p
|
58
58
|
// avoids multiplication if p == 1 since it might not yield MAX_THETA exactly
|
59
59
|
static uint64_t starting_theta_from_p(float p) {
|
60
|
-
if (p < 1) return static_cast<
|
60
|
+
if (p < 1) return static_cast<uint64_t>(theta_constants::MAX_THETA * p);
|
61
61
|
return theta_constants::MAX_THETA;
|
62
62
|
}
|
63
63
|
|
@@ -25,6 +25,16 @@
|
|
25
25
|
|
26
26
|
namespace datasketches {
|
27
27
|
|
28
|
+
// forward declaration
|
29
|
+
template<typename A> class theta_intersection_alloc;
|
30
|
+
|
31
|
+
// alias with default allocator for convenience
|
32
|
+
using theta_intersection = theta_intersection_alloc<std::allocator<uint64_t>>;
|
33
|
+
|
34
|
+
/**
|
35
|
+
* Theta intersection.
|
36
|
+
* Computes intersection of Theta sketches.
|
37
|
+
*/
|
28
38
|
template<typename Allocator = std::allocator<uint64_t>>
|
29
39
|
class theta_intersection_alloc {
|
30
40
|
public:
|
@@ -33,6 +43,7 @@ public:
|
|
33
43
|
using Sketch = theta_sketch_alloc<Allocator>;
|
34
44
|
using CompactSketch = compact_theta_sketch_alloc<Allocator>;
|
35
45
|
|
46
|
+
// there is no payload in Theta sketch entry
|
36
47
|
struct nop_policy {
|
37
48
|
void operator()(uint64_t internal_entry, uint64_t incoming_entry) const {
|
38
49
|
unused(incoming_entry);
|
@@ -41,7 +52,7 @@ public:
|
|
41
52
|
};
|
42
53
|
using State = theta_intersection_base<Entry, ExtractKey, nop_policy, Sketch, CompactSketch, Allocator>;
|
43
54
|
|
44
|
-
|
55
|
+
/**
|
45
56
|
* Constructor
|
46
57
|
* @param seed for the hash function that was used to create the sketch
|
47
58
|
* @param allocator to use for allocating and deallocating memory
|
@@ -61,7 +72,7 @@ public:
|
|
61
72
|
* Produces a copy of the current state of the intersection.
|
62
73
|
* If update() was not called, the state is the infinite "universe",
|
63
74
|
* which is considered an undefined state, and throws an exception.
|
64
|
-
* @param ordered optional flag to specify if ordered sketch should be produced
|
75
|
+
* @param ordered optional flag to specify if an ordered sketch should be produced
|
65
76
|
* @return the result of the intersection
|
66
77
|
*/
|
67
78
|
CompactSketch get_result(bool ordered = true) const;
|
@@ -76,9 +87,6 @@ private:
|
|
76
87
|
State state_;
|
77
88
|
};
|
78
89
|
|
79
|
-
// alias with default allocator for convenience
|
80
|
-
using theta_intersection = theta_intersection_alloc<std::allocator<uint64_t>>;
|
81
|
-
|
82
90
|
} /* namespace datasketches */
|
83
91
|
|
84
92
|
#include "theta_intersection_impl.hpp"
|
@@ -49,8 +49,8 @@ void theta_intersection_base<EN, EK, P, S, CS, A>::update(SS&& sketch) {
|
|
49
49
|
if (!is_valid_) { // first update, copy or move incoming sketch
|
50
50
|
is_valid_ = true;
|
51
51
|
const uint8_t lg_size = lg_size_from_count(sketch.get_num_retained(), theta_update_sketch_base<EN, EK, A>::REBUILD_THRESHOLD);
|
52
|
-
table_ = hash_table(lg_size, lg_size, resize_factor::X1, 1, table_.theta_, table_.seed_, table_.allocator_, table_.is_empty_);
|
53
|
-
for (auto
|
52
|
+
table_ = hash_table(lg_size, lg_size - 1, resize_factor::X1, 1, table_.theta_, table_.seed_, table_.allocator_, table_.is_empty_);
|
53
|
+
for (auto&& entry: sketch) {
|
54
54
|
auto result = table_.find(EK()(entry));
|
55
55
|
if (result.second) {
|
56
56
|
throw std::invalid_argument("duplicate key, possibly corrupted input sketch");
|
@@ -64,7 +64,7 @@ void theta_intersection_base<EN, EK, P, S, CS, A>::update(SS&& sketch) {
|
|
64
64
|
matched_entries.reserve(max_matches);
|
65
65
|
uint32_t match_count = 0;
|
66
66
|
uint32_t count = 0;
|
67
|
-
for (auto
|
67
|
+
for (auto&& entry: sketch) {
|
68
68
|
if (EK()(entry) < table_.theta_) {
|
69
69
|
auto result = table_.find(EK()(entry));
|
70
70
|
if (result.second) {
|
@@ -88,8 +88,8 @@ void theta_intersection_base<EN, EK, P, S, CS, A>::update(SS&& sketch) {
|
|
88
88
|
if (table_.theta_ == theta_constants::MAX_THETA) table_.is_empty_ = true;
|
89
89
|
} else {
|
90
90
|
const uint8_t lg_size = lg_size_from_count(match_count, theta_update_sketch_base<EN, EK, A>::REBUILD_THRESHOLD);
|
91
|
-
table_ = hash_table(lg_size, lg_size, resize_factor::X1, 1, table_.theta_, table_.seed_, table_.allocator_, table_.is_empty_);
|
92
|
-
for (uint32_t i = 0; i < match_count; i
|
91
|
+
table_ = hash_table(lg_size, lg_size - 1, resize_factor::X1, 1, table_.theta_, table_.seed_, table_.allocator_, table_.is_empty_);
|
92
|
+
for (uint32_t i = 0; i < match_count; ++i) {
|
93
93
|
auto result = table_.find(EK()(matched_entries[i]));
|
94
94
|
table_.insert(result.first, std::move(matched_entries[i]));
|
95
95
|
}
|
@@ -28,9 +28,9 @@ state_(seed, nop_policy(), allocator)
|
|
28
28
|
{}
|
29
29
|
|
30
30
|
template<typename A>
|
31
|
-
template<typename
|
32
|
-
void theta_intersection_alloc<A>::update(
|
33
|
-
state_.update(std::forward<
|
31
|
+
template<typename FwdSketch>
|
32
|
+
void theta_intersection_alloc<A>::update(FwdSketch&& sketch) {
|
33
|
+
state_.update(std::forward<FwdSketch>(sketch));
|
34
34
|
}
|
35
35
|
|
36
36
|
template<typename A>
|
@@ -26,10 +26,11 @@
|
|
26
26
|
|
27
27
|
namespace datasketches {
|
28
28
|
|
29
|
+
/// Theta Jaccard similarity alias
|
29
30
|
template<typename Allocator = std::allocator<uint64_t>>
|
30
31
|
using theta_jaccard_similarity_alloc = jaccard_similarity_base<theta_union_alloc<Allocator>, theta_intersection_alloc<Allocator>, trivial_extract_key>;
|
31
32
|
|
32
|
-
|
33
|
+
/// Theta Jaccard similarity alias with default allocator
|
33
34
|
using theta_jaccard_similarity = theta_jaccard_similarity_alloc<std::allocator<uint64_t>>;
|
34
35
|
|
35
36
|
} /* namespace datasketches */
|
@@ -25,6 +25,22 @@
|
|
25
25
|
|
26
26
|
namespace datasketches {
|
27
27
|
|
28
|
+
// forward declarations
|
29
|
+
template<typename A> class theta_sketch_alloc;
|
30
|
+
template<typename A> class update_theta_sketch_alloc;
|
31
|
+
template<typename A> class compact_theta_sketch_alloc;
|
32
|
+
template<typename A> class wrapped_compact_theta_sketch_alloc;
|
33
|
+
|
34
|
+
/// Theta sketch alias with default allocator
|
35
|
+
using theta_sketch = theta_sketch_alloc<std::allocator<uint64_t>>;
|
36
|
+
/// Update Theta sketch alias with default allocator
|
37
|
+
using update_theta_sketch = update_theta_sketch_alloc<std::allocator<uint64_t>>;
|
38
|
+
/// Compact Theta sketch alias with default allocator
|
39
|
+
using compact_theta_sketch = compact_theta_sketch_alloc<std::allocator<uint64_t>>;
|
40
|
+
/// Wrapped Compact Theta sketch alias with default allocator
|
41
|
+
using wrapped_compact_theta_sketch = wrapped_compact_theta_sketch_alloc<std::allocator<uint64_t>>;
|
42
|
+
|
43
|
+
/// Abstract base class for Theta sketch
|
28
44
|
template<typename Allocator = std::allocator<uint64_t>>
|
29
45
|
class base_theta_sketch_alloc {
|
30
46
|
public:
|
@@ -106,6 +122,7 @@ protected:
|
|
106
122
|
virtual void print_items(std::ostringstream& os) const = 0;
|
107
123
|
};
|
108
124
|
|
125
|
+
/// Base class for the Theta Sketch, a generalization of the Kth Minimum Value (KMV) sketch.
|
109
126
|
template<typename Allocator = std::allocator<uint64_t>>
|
110
127
|
class theta_sketch_alloc: public base_theta_sketch_alloc<Allocator> {
|
111
128
|
public:
|
@@ -149,6 +166,11 @@ protected:
|
|
149
166
|
// forward declaration
|
150
167
|
template<typename A> class compact_theta_sketch_alloc;
|
151
168
|
|
169
|
+
/**
|
170
|
+
* Update Theta sketch.
|
171
|
+
* The purpose of this class is to build a Theta sketch from input data via the update() methods.
|
172
|
+
* There is no constructor. Use builder instead.
|
173
|
+
*/
|
152
174
|
template<typename Allocator = std::allocator<uint64_t>>
|
153
175
|
class update_theta_sketch_alloc: public theta_sketch_alloc<Allocator> {
|
154
176
|
public:
|
@@ -163,11 +185,33 @@ public:
|
|
163
185
|
// No constructor here. Use builder instead.
|
164
186
|
class builder;
|
165
187
|
|
166
|
-
|
167
|
-
|
188
|
+
/**
|
189
|
+
* Copy constructor
|
190
|
+
* @param other sketch to be copied
|
191
|
+
*/
|
192
|
+
update_theta_sketch_alloc(const update_theta_sketch_alloc& other) = default;
|
193
|
+
|
194
|
+
/**
|
195
|
+
* Move constructor
|
196
|
+
* @param other sketch to be moved
|
197
|
+
*/
|
198
|
+
update_theta_sketch_alloc(update_theta_sketch_alloc&& other) noexcept = default;
|
199
|
+
|
168
200
|
virtual ~update_theta_sketch_alloc() = default;
|
169
|
-
|
170
|
-
|
201
|
+
|
202
|
+
/**
|
203
|
+
* Copy assignment
|
204
|
+
* @param other sketch to be copied
|
205
|
+
* @return reference to this sketch
|
206
|
+
*/
|
207
|
+
update_theta_sketch_alloc& operator=(const update_theta_sketch_alloc& other) = default;
|
208
|
+
|
209
|
+
/**
|
210
|
+
* Move assignment
|
211
|
+
* @param other sketch to be moved
|
212
|
+
* @return reference to this sketch
|
213
|
+
*/
|
214
|
+
update_theta_sketch_alloc& operator=(update_theta_sketch_alloc&& other) = default;
|
171
215
|
|
172
216
|
virtual Allocator get_allocator() const;
|
173
217
|
virtual bool is_empty() const;
|
@@ -287,7 +331,7 @@ public:
|
|
287
331
|
|
288
332
|
/**
|
289
333
|
* Converts this sketch to a compact sketch (ordered or unordered).
|
290
|
-
* @param ordered optional flag to specify if ordered sketch should be produced
|
334
|
+
* @param ordered optional flag to specify if an ordered sketch should be produced
|
291
335
|
* @return compact sketch
|
292
336
|
*/
|
293
337
|
compact_theta_sketch_alloc<Allocator> compact(bool ordered = true) const;
|
@@ -307,8 +351,10 @@ private:
|
|
307
351
|
virtual void print_specifics(std::ostringstream& os) const;
|
308
352
|
};
|
309
353
|
|
310
|
-
|
311
|
-
|
354
|
+
/**
|
355
|
+
* Compact Theta sketch.
|
356
|
+
* This is an immutable form of the Theta sketch, the form that can be serialized and deserialized.
|
357
|
+
*/
|
312
358
|
template<typename Allocator = std::allocator<uint64_t>>
|
313
359
|
class compact_theta_sketch_alloc: public theta_sketch_alloc<Allocator> {
|
314
360
|
public:
|
@@ -327,13 +373,42 @@ public:
|
|
327
373
|
// - as a result of a set operation
|
328
374
|
// - by deserializing a previously serialized compact sketch
|
329
375
|
|
376
|
+
/**
|
377
|
+
* Copy constructor.
|
378
|
+
* Constructs a compact sketch from any other type of Theta sketch
|
379
|
+
* @param other sketch to be constructed from
|
380
|
+
* @param ordered if true make the resulting sketch ordered
|
381
|
+
*/
|
330
382
|
template<typename Other>
|
331
383
|
compact_theta_sketch_alloc(const Other& other, bool ordered);
|
332
|
-
|
333
|
-
|
384
|
+
|
385
|
+
/**
|
386
|
+
* Copy constructor
|
387
|
+
* @param other sketch to be copied
|
388
|
+
*/
|
389
|
+
compact_theta_sketch_alloc(const compact_theta_sketch_alloc& other) = default;
|
390
|
+
|
391
|
+
/**
|
392
|
+
* Move constructor
|
393
|
+
* @param other sketch to be moved
|
394
|
+
*/
|
395
|
+
compact_theta_sketch_alloc(compact_theta_sketch_alloc&& other) noexcept = default;
|
396
|
+
|
334
397
|
virtual ~compact_theta_sketch_alloc() = default;
|
335
|
-
|
336
|
-
|
398
|
+
|
399
|
+
/**
|
400
|
+
* Copy assignment
|
401
|
+
* @param other sketch to be copied
|
402
|
+
* @return reference to this sketch
|
403
|
+
*/
|
404
|
+
compact_theta_sketch_alloc& operator=(const compact_theta_sketch_alloc& other) = default;
|
405
|
+
|
406
|
+
/**
|
407
|
+
* Move assignment
|
408
|
+
* @param other sketch to be moved
|
409
|
+
* @return reference to this sketch
|
410
|
+
*/
|
411
|
+
compact_theta_sketch_alloc& operator=(compact_theta_sketch_alloc&& other) = default;
|
337
412
|
|
338
413
|
virtual Allocator get_allocator() const;
|
339
414
|
virtual bool is_empty() const;
|
@@ -385,6 +460,7 @@ public:
|
|
385
460
|
* This method deserializes a sketch from a given stream.
|
386
461
|
* @param is input stream
|
387
462
|
* @param seed the seed for the hash function that was used to create the sketch
|
463
|
+
* @param allocator instance of an Allocator
|
388
464
|
* @return an instance of the sketch
|
389
465
|
*/
|
390
466
|
static compact_theta_sketch_alloc deserialize(std::istream& is,
|
@@ -395,14 +471,12 @@ public:
|
|
395
471
|
* @param bytes pointer to the array of bytes
|
396
472
|
* @param size the size of the array
|
397
473
|
* @param seed the seed for the hash function that was used to create the sketch
|
474
|
+
* @param allocator instance of an Allocator
|
398
475
|
* @return an instance of the sketch
|
399
476
|
*/
|
400
477
|
static compact_theta_sketch_alloc deserialize(const void* bytes, size_t size,
|
401
478
|
uint64_t seed = DEFAULT_SEED, const Allocator& allocator = Allocator());
|
402
479
|
|
403
|
-
// for internal use
|
404
|
-
compact_theta_sketch_alloc(bool is_empty, bool is_ordered, uint16_t seed_hash, uint64_t theta, std::vector<uint64_t, Allocator>&& entries);
|
405
|
-
|
406
480
|
private:
|
407
481
|
enum flags { IS_BIG_ENDIAN, IS_READ_ONLY, IS_EMPTY, IS_COMPACT, IS_ORDERED };
|
408
482
|
|
@@ -423,20 +497,33 @@ private:
|
|
423
497
|
static compact_theta_sketch_alloc deserialize_v4(uint8_t preamble_longs, std::istream& is, uint64_t seed, const Allocator& allocator);
|
424
498
|
|
425
499
|
virtual void print_specifics(std::ostringstream& os) const;
|
500
|
+
|
501
|
+
template<typename E, typename EK, typename P, typename S, typename CS, typename A> friend class theta_union_base;
|
502
|
+
template<typename E, typename EK, typename P, typename S, typename CS, typename A> friend class theta_intersection_base;
|
503
|
+
template<typename E, typename EK, typename CS, typename A> friend class theta_set_difference_base;
|
504
|
+
compact_theta_sketch_alloc(bool is_empty, bool is_ordered, uint16_t seed_hash, uint64_t theta, std::vector<uint64_t, Allocator>&& entries);
|
426
505
|
};
|
427
506
|
|
507
|
+
/// Update Theta sketch builder
|
428
508
|
template<typename Allocator>
|
429
509
|
class update_theta_sketch_alloc<Allocator>::builder: public theta_base_builder<builder, Allocator> {
|
430
510
|
public:
|
511
|
+
/**
|
512
|
+
* Constructor
|
513
|
+
* @param allocator
|
514
|
+
*/
|
431
515
|
builder(const Allocator& allocator = Allocator());
|
516
|
+
/// @return instance of Update Theta sketch
|
432
517
|
update_theta_sketch_alloc build() const;
|
433
518
|
};
|
434
519
|
|
435
|
-
|
436
|
-
|
437
|
-
|
520
|
+
/**
|
521
|
+
* Wrapped Compact Theta sketch.
|
522
|
+
* This is to wrap a buffer containing a serialized compact sketch and use it in a set operation avoiding some cost of deserialization.
|
523
|
+
* It does not take the ownership of the buffer.
|
524
|
+
*/
|
438
525
|
template<typename Allocator = std::allocator<uint64_t>>
|
439
|
-
class wrapped_compact_theta_sketch_alloc
|
526
|
+
class wrapped_compact_theta_sketch_alloc: public base_theta_sketch_alloc<Allocator> {
|
440
527
|
public:
|
441
528
|
class const_iterator;
|
442
529
|
|
@@ -447,7 +534,17 @@ public:
|
|
447
534
|
uint32_t get_num_retained() const;
|
448
535
|
uint16_t get_seed_hash() const;
|
449
536
|
|
537
|
+
/**
|
538
|
+
* Const iterator over hash values in this sketch.
|
539
|
+
* @return begin iterator
|
540
|
+
*/
|
450
541
|
const_iterator begin() const;
|
542
|
+
|
543
|
+
/**
|
544
|
+
* Const iterator pointing past the valid range.
|
545
|
+
* Not to be incremented or dereferenced.
|
546
|
+
* @return end iterator
|
547
|
+
*/
|
451
548
|
const_iterator end() const;
|
452
549
|
|
453
550
|
/**
|
@@ -455,6 +552,7 @@ public:
|
|
455
552
|
* @param bytes pointer to the array of bytes
|
456
553
|
* @param size the size of the array
|
457
554
|
* @param seed the seed for the hash function that was used to create the sketch
|
555
|
+
* @param dump_on_error if true prints hex dump of the input
|
458
556
|
* @return an instance of the sketch
|
459
557
|
*/
|
460
558
|
static const wrapped_compact_theta_sketch_alloc wrap(const void* bytes, size_t size, uint64_t seed = DEFAULT_SEED, bool dump_on_error = false);
|
@@ -471,15 +569,22 @@ private:
|
|
471
569
|
};
|
472
570
|
|
473
571
|
template<typename Allocator>
|
474
|
-
class wrapped_compact_theta_sketch_alloc<Allocator>::const_iterator
|
572
|
+
class wrapped_compact_theta_sketch_alloc<Allocator>::const_iterator {
|
475
573
|
public:
|
574
|
+
using iterator_category = std::input_iterator_tag;
|
575
|
+
using value_type = const uint64_t;
|
576
|
+
using difference_type = void;
|
577
|
+
using pointer = value_type*;
|
578
|
+
using reference = uint64_t;
|
579
|
+
|
476
580
|
const_iterator(const void* ptr, uint8_t entry_bits, uint32_t num_entries, uint32_t index);
|
477
581
|
const_iterator& operator++();
|
478
582
|
const_iterator operator++(int);
|
479
583
|
bool operator==(const const_iterator& other) const;
|
480
584
|
bool operator!=(const const_iterator& other) const;
|
481
|
-
|
482
|
-
|
585
|
+
reference operator*() const;
|
586
|
+
pointer operator->() const;
|
587
|
+
|
483
588
|
private:
|
484
589
|
const void* ptr_;
|
485
590
|
uint8_t entry_bits_;
|
@@ -492,12 +597,6 @@ private:
|
|
492
597
|
uint64_t buffer_[8];
|
493
598
|
};
|
494
599
|
|
495
|
-
// aliases with default allocator for convenience
|
496
|
-
using theta_sketch = theta_sketch_alloc<std::allocator<uint64_t>>;
|
497
|
-
using update_theta_sketch = update_theta_sketch_alloc<std::allocator<uint64_t>>;
|
498
|
-
using compact_theta_sketch = compact_theta_sketch_alloc<std::allocator<uint64_t>>;
|
499
|
-
using wrapped_compact_theta_sketch = wrapped_compact_theta_sketch_alloc<std::allocator<uint64_t>>;
|
500
|
-
|
501
600
|
} /* namespace datasketches */
|
502
601
|
|
503
602
|
#include "theta_sketch_impl.hpp"
|
@@ -357,7 +357,7 @@ void compact_theta_sketch_alloc<A>::serialize(std::ostream& os) const {
|
|
357
357
|
write(os, flags_byte);
|
358
358
|
write(os, get_seed_hash());
|
359
359
|
if (preamble_longs > 1) {
|
360
|
-
write<uint32_t>(
|
360
|
+
write(os, static_cast<uint32_t>(entries_.size()));
|
361
361
|
write<uint32_t>(os, 0); // unused
|
362
362
|
}
|
363
363
|
if (this->is_estimation_mode()) write(os, this->theta_);
|
@@ -385,7 +385,7 @@ auto compact_theta_sketch_alloc<A>::serialize(unsigned header_size_bytes) const
|
|
385
385
|
*ptr++ = flags_byte;
|
386
386
|
ptr += copy_to_mem(get_seed_hash(), ptr);
|
387
387
|
if (preamble_longs > 1) {
|
388
|
-
ptr += copy_to_mem<uint32_t>(entries_.size(), ptr);
|
388
|
+
ptr += copy_to_mem(static_cast<uint32_t>(entries_.size()), ptr);
|
389
389
|
ptr += sizeof(uint32_t); // unused
|
390
390
|
}
|
391
391
|
if (this->is_estimation_mode()) ptr += copy_to_mem(theta_, ptr);
|
@@ -432,7 +432,7 @@ void compact_theta_sketch_alloc<A>::serialize_version_4(std::ostream& os) const
|
|
432
432
|
const uint8_t entry_bits = 64 - compute_min_leading_zeros();
|
433
433
|
|
434
434
|
// store num_entries as whole bytes since whole-byte blocks will follow (most probably)
|
435
|
-
const uint8_t num_entries_bytes = whole_bytes_to_hold_bits<uint8_t>(32 - count_leading_zeros_in_u32(entries_.size()));
|
435
|
+
const uint8_t num_entries_bytes = whole_bytes_to_hold_bits<uint8_t>(32 - count_leading_zeros_in_u32(static_cast<uint32_t>(entries_.size())));
|
436
436
|
|
437
437
|
write(os, preamble_longs);
|
438
438
|
write(os, COMPRESSED_SERIAL_VERSION);
|
@@ -447,7 +447,7 @@ void compact_theta_sketch_alloc<A>::serialize_version_4(std::ostream& os) const
|
|
447
447
|
write(os, flags_byte);
|
448
448
|
write(os, get_seed_hash());
|
449
449
|
if (this->is_estimation_mode()) write(os, this->theta_);
|
450
|
-
uint32_t num_entries = entries_.size();
|
450
|
+
uint32_t num_entries = static_cast<uint32_t>(entries_.size());
|
451
451
|
for (unsigned i = 0; i < num_entries_bytes; ++i) {
|
452
452
|
write<uint8_t>(os, num_entries & 0xff);
|
453
453
|
num_entries >>= 8;
|
@@ -488,7 +488,7 @@ auto compact_theta_sketch_alloc<A>::serialize_version_4(unsigned header_size_byt
|
|
488
488
|
const size_t compressed_bits = entry_bits * entries_.size();
|
489
489
|
|
490
490
|
// store num_entries as whole bytes since whole-byte blocks will follow (most probably)
|
491
|
-
const uint8_t num_entries_bytes = whole_bytes_to_hold_bits<uint8_t>(32 - count_leading_zeros_in_u32(entries_.size()));
|
491
|
+
const uint8_t num_entries_bytes = whole_bytes_to_hold_bits<uint8_t>(32 - count_leading_zeros_in_u32(static_cast<uint32_t>(entries_.size())));
|
492
492
|
|
493
493
|
const size_t size = header_size_bytes + sizeof(uint64_t) * preamble_longs + num_entries_bytes
|
494
494
|
+ whole_bytes_to_hold_bits(compressed_bits);
|
@@ -510,7 +510,7 @@ auto compact_theta_sketch_alloc<A>::serialize_version_4(unsigned header_size_byt
|
|
510
510
|
if (this->is_estimation_mode()) {
|
511
511
|
ptr += copy_to_mem(theta_, ptr);
|
512
512
|
}
|
513
|
-
uint32_t num_entries = entries_.size();
|
513
|
+
uint32_t num_entries = static_cast<uint32_t>(entries_.size());
|
514
514
|
for (unsigned i = 0; i < num_entries_bytes; ++i) {
|
515
515
|
*ptr++ = num_entries & 0xff;
|
516
516
|
num_entries >>= 8;
|
@@ -869,13 +869,13 @@ bool wrapped_compact_theta_sketch_alloc<Allocator>::const_iterator::operator==(c
|
|
869
869
|
}
|
870
870
|
|
871
871
|
template<typename Allocator>
|
872
|
-
|
872
|
+
auto wrapped_compact_theta_sketch_alloc<Allocator>::const_iterator::operator*() const -> reference {
|
873
873
|
if (entry_bits_ == 64) return *reinterpret_cast<const uint64_t*>(ptr_);
|
874
874
|
return buffer_[buf_i_];
|
875
875
|
}
|
876
876
|
|
877
877
|
template<typename Allocator>
|
878
|
-
|
878
|
+
auto wrapped_compact_theta_sketch_alloc<Allocator>::const_iterator::operator->() const -> pointer {
|
879
879
|
if (entry_bits_ == 64) return reinterpret_cast<const uint64_t*>(ptr_);
|
880
880
|
return buffer_ + buf_i_;
|
881
881
|
}
|