datasketches 0.3.2 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +6 -0
- data/NOTICE +1 -1
- data/README.md +0 -2
- data/ext/datasketches/cpc_wrapper.cpp +2 -2
- data/ext/datasketches/kll_wrapper.cpp +0 -10
- data/lib/datasketches/version.rb +1 -1
- data/lib/datasketches.rb +1 -1
- data/vendor/datasketches-cpp/CMakeLists.txt +1 -0
- data/vendor/datasketches-cpp/CODE_OF_CONDUCT.md +3 -0
- data/vendor/datasketches-cpp/CONTRIBUTING.md +50 -0
- data/vendor/datasketches-cpp/Doxyfile +2827 -0
- data/vendor/datasketches-cpp/LICENSE +0 -76
- data/vendor/datasketches-cpp/NOTICE +1 -1
- data/vendor/datasketches-cpp/README.md +1 -3
- data/vendor/datasketches-cpp/common/CMakeLists.txt +12 -11
- data/vendor/datasketches-cpp/common/include/common_defs.hpp +11 -8
- data/vendor/datasketches-cpp/common/include/count_zeros.hpp +0 -2
- data/vendor/datasketches-cpp/common/include/kolmogorov_smirnov.hpp +9 -6
- data/vendor/datasketches-cpp/common/include/optional.hpp +148 -0
- data/vendor/datasketches-cpp/common/include/quantiles_sorted_view.hpp +95 -2
- data/vendor/datasketches-cpp/common/include/quantiles_sorted_view_impl.hpp +1 -1
- data/vendor/datasketches-cpp/common/include/serde.hpp +69 -20
- data/vendor/datasketches-cpp/common/test/CMakeLists.txt +1 -1
- data/vendor/datasketches-cpp/common/test/optional_test.cpp +85 -0
- data/vendor/datasketches-cpp/common/test/test_allocator.hpp +14 -14
- data/vendor/datasketches-cpp/count/include/count_min.hpp +132 -78
- data/vendor/datasketches-cpp/count/include/count_min_impl.hpp +132 -152
- data/vendor/datasketches-cpp/count/test/CMakeLists.txt +11 -12
- data/vendor/datasketches-cpp/count/test/count_min_allocation_test.cpp +61 -61
- data/vendor/datasketches-cpp/count/test/count_min_test.cpp +175 -178
- data/vendor/datasketches-cpp/cpc/include/cpc_common.hpp +14 -20
- data/vendor/datasketches-cpp/cpc/include/cpc_compressor.hpp +7 -4
- data/vendor/datasketches-cpp/cpc/include/cpc_compressor_impl.hpp +17 -17
- data/vendor/datasketches-cpp/cpc/include/cpc_sketch.hpp +40 -40
- data/vendor/datasketches-cpp/cpc/include/cpc_sketch_impl.hpp +13 -10
- data/vendor/datasketches-cpp/cpc/include/cpc_union.hpp +35 -11
- data/vendor/datasketches-cpp/cpc/include/cpc_union_impl.hpp +8 -8
- data/vendor/datasketches-cpp/cpc/include/u32_table.hpp +3 -2
- data/vendor/datasketches-cpp/cpc/include/u32_table_impl.hpp +5 -5
- data/vendor/datasketches-cpp/cpc/test/CMakeLists.txt +20 -7
- data/vendor/datasketches-cpp/cpc/test/cpc_sketch_deserialize_from_java_test.cpp +60 -0
- data/vendor/datasketches-cpp/{python/include/py_object_lt.hpp → cpc/test/cpc_sketch_serialize_for_java.cpp} +15 -14
- data/vendor/datasketches-cpp/cpc/test/cpc_sketch_test.cpp +4 -29
- data/vendor/datasketches-cpp/cpc/test/cpc_union_test.cpp +4 -4
- data/vendor/datasketches-cpp/density/include/density_sketch.hpp +29 -9
- data/vendor/datasketches-cpp/density/include/density_sketch_impl.hpp +1 -1
- data/vendor/datasketches-cpp/density/test/CMakeLists.txt +0 -1
- data/vendor/datasketches-cpp/fi/include/frequent_items_sketch.hpp +21 -9
- data/vendor/datasketches-cpp/fi/include/frequent_items_sketch_impl.hpp +6 -4
- data/vendor/datasketches-cpp/fi/test/CMakeLists.txt +14 -1
- data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_deserialize_from_java_test.cpp +95 -0
- data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_serialize_for_java.cpp +83 -0
- data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_test.cpp +3 -42
- data/vendor/datasketches-cpp/hll/include/CouponList-internal.hpp +2 -2
- data/vendor/datasketches-cpp/hll/include/CouponList.hpp +3 -1
- data/vendor/datasketches-cpp/hll/include/HllArray-internal.hpp +3 -3
- data/vendor/datasketches-cpp/hll/include/HllArray.hpp +5 -3
- data/vendor/datasketches-cpp/hll/include/HllSketch-internal.hpp +4 -4
- data/vendor/datasketches-cpp/hll/include/HllSketchImpl.hpp +3 -1
- data/vendor/datasketches-cpp/hll/include/HllUtil.hpp +0 -12
- data/vendor/datasketches-cpp/hll/include/hll.hpp +70 -57
- data/vendor/datasketches-cpp/hll/test/CMakeLists.txt +14 -1
- data/vendor/datasketches-cpp/hll/test/ToFromByteArrayTest.cpp +0 -68
- data/vendor/datasketches-cpp/hll/test/hll_sketch_deserialize_from_java_test.cpp +69 -0
- data/vendor/datasketches-cpp/hll/test/hll_sketch_serialize_for_java.cpp +52 -0
- data/vendor/datasketches-cpp/kll/include/kll_helper_impl.hpp +2 -2
- data/vendor/datasketches-cpp/kll/include/kll_sketch.hpp +71 -50
- data/vendor/datasketches-cpp/kll/include/kll_sketch_impl.hpp +59 -130
- data/vendor/datasketches-cpp/kll/test/CMakeLists.txt +14 -1
- data/vendor/datasketches-cpp/kll/test/kll_sketch_deserialize_from_java_test.cpp +103 -0
- data/vendor/datasketches-cpp/kll/test/kll_sketch_serialize_for_java.cpp +62 -0
- data/vendor/datasketches-cpp/kll/test/kll_sketch_test.cpp +3 -38
- data/vendor/datasketches-cpp/quantiles/include/quantiles_sketch.hpp +68 -51
- data/vendor/datasketches-cpp/quantiles/include/quantiles_sketch_impl.hpp +62 -132
- data/vendor/datasketches-cpp/quantiles/test/CMakeLists.txt +14 -1
- data/vendor/datasketches-cpp/quantiles/test/quantiles_sketch_deserialize_from_java_test.cpp +84 -0
- data/vendor/datasketches-cpp/quantiles/test/quantiles_sketch_serialize_for_java.cpp +52 -0
- data/vendor/datasketches-cpp/quantiles/test/quantiles_sketch_test.cpp +14 -38
- data/vendor/datasketches-cpp/req/include/req_common.hpp +7 -3
- data/vendor/datasketches-cpp/req/include/req_compactor_impl.hpp +2 -2
- data/vendor/datasketches-cpp/req/include/req_sketch.hpp +97 -23
- data/vendor/datasketches-cpp/req/include/req_sketch_impl.hpp +48 -109
- data/vendor/datasketches-cpp/req/test/CMakeLists.txt +14 -1
- data/vendor/datasketches-cpp/req/test/req_sketch_deserialize_from_java_test.cpp +55 -0
- data/vendor/datasketches-cpp/{tuple/include/array_of_doubles_intersection_impl.hpp → req/test/req_sketch_serialize_for_java.cpp} +12 -7
- data/vendor/datasketches-cpp/req/test/req_sketch_test.cpp +3 -89
- data/vendor/datasketches-cpp/sampling/CMakeLists.txt +4 -0
- data/vendor/datasketches-cpp/sampling/include/ebpps_sample.hpp +210 -0
- data/vendor/datasketches-cpp/sampling/include/ebpps_sample_impl.hpp +535 -0
- data/vendor/datasketches-cpp/sampling/include/ebpps_sketch.hpp +281 -0
- data/vendor/datasketches-cpp/sampling/include/ebpps_sketch_impl.hpp +531 -0
- data/vendor/datasketches-cpp/sampling/include/var_opt_sketch.hpp +69 -26
- data/vendor/datasketches-cpp/sampling/include/var_opt_sketch_impl.hpp +3 -3
- data/vendor/datasketches-cpp/sampling/include/var_opt_union.hpp +10 -11
- data/vendor/datasketches-cpp/sampling/include/var_opt_union_impl.hpp +4 -4
- data/vendor/datasketches-cpp/sampling/test/CMakeLists.txt +55 -8
- data/vendor/datasketches-cpp/sampling/test/ebpps_allocation_test.cpp +96 -0
- data/vendor/datasketches-cpp/sampling/test/ebpps_sample_test.cpp +137 -0
- data/vendor/datasketches-cpp/sampling/test/ebpps_sketch_test.cpp +266 -0
- data/vendor/datasketches-cpp/sampling/test/var_opt_sketch_deserialize_from_java_test.cpp +81 -0
- data/vendor/datasketches-cpp/sampling/test/var_opt_sketch_serialize_for_java.cpp +54 -0
- data/vendor/datasketches-cpp/sampling/test/var_opt_sketch_test.cpp +0 -37
- data/vendor/datasketches-cpp/sampling/test/var_opt_union_deserialize_from_java_test.cpp +50 -0
- data/vendor/datasketches-cpp/sampling/test/var_opt_union_serialize_for_java.cpp +56 -0
- data/vendor/datasketches-cpp/sampling/test/var_opt_union_test.cpp +0 -18
- data/vendor/datasketches-cpp/theta/include/bit_packing.hpp +2608 -2608
- data/vendor/datasketches-cpp/theta/include/bounds_on_ratios_in_sampled_sets.hpp +1 -0
- data/vendor/datasketches-cpp/theta/include/bounds_on_ratios_in_theta_sketched_sets.hpp +7 -6
- data/vendor/datasketches-cpp/theta/include/theta_a_not_b.hpp +20 -5
- data/vendor/datasketches-cpp/theta/include/theta_constants.hpp +10 -4
- data/vendor/datasketches-cpp/theta/include/theta_helpers.hpp +1 -1
- data/vendor/datasketches-cpp/theta/include/theta_intersection.hpp +13 -5
- data/vendor/datasketches-cpp/theta/include/theta_intersection_base_impl.hpp +5 -5
- data/vendor/datasketches-cpp/theta/include/theta_intersection_impl.hpp +3 -3
- data/vendor/datasketches-cpp/theta/include/theta_jaccard_similarity.hpp +2 -1
- data/vendor/datasketches-cpp/theta/include/theta_jaccard_similarity_base.hpp +1 -0
- data/vendor/datasketches-cpp/theta/include/theta_set_difference_base_impl.hpp +1 -1
- data/vendor/datasketches-cpp/theta/include/theta_sketch.hpp +126 -27
- data/vendor/datasketches-cpp/theta/include/theta_sketch_impl.hpp +8 -8
- data/vendor/datasketches-cpp/theta/include/theta_union.hpp +17 -10
- data/vendor/datasketches-cpp/theta/include/theta_union_base_impl.hpp +1 -1
- data/vendor/datasketches-cpp/theta/include/theta_union_impl.hpp +3 -3
- data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base.hpp +5 -2
- data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base_impl.hpp +11 -1
- data/vendor/datasketches-cpp/theta/test/CMakeLists.txt +14 -1
- data/vendor/datasketches-cpp/theta/test/theta_sketch_deserialize_from_java_test.cpp +57 -0
- data/vendor/datasketches-cpp/theta/test/theta_sketch_serialize_for_java.cpp +61 -0
- data/vendor/datasketches-cpp/theta/test/theta_sketch_test.cpp +0 -188
- data/vendor/datasketches-cpp/tuple/CMakeLists.txt +8 -7
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_sketch.hpp +19 -144
- data/vendor/datasketches-cpp/tuple/include/{array_of_doubles_a_not_b.hpp → array_tuple_a_not_b.hpp} +24 -16
- data/vendor/datasketches-cpp/tuple/include/{array_of_doubles_a_not_b_impl.hpp → array_tuple_a_not_b_impl.hpp} +4 -4
- data/vendor/datasketches-cpp/tuple/include/array_tuple_intersection.hpp +65 -0
- data/vendor/datasketches-cpp/{python/include/py_object_ostream.hpp → tuple/include/array_tuple_intersection_impl.hpp} +7 -24
- data/vendor/datasketches-cpp/tuple/include/array_tuple_sketch.hpp +237 -0
- data/vendor/datasketches-cpp/tuple/include/{array_of_doubles_sketch_impl.hpp → array_tuple_sketch_impl.hpp} +40 -41
- data/vendor/datasketches-cpp/tuple/include/array_tuple_union.hpp +81 -0
- data/vendor/datasketches-cpp/tuple/include/array_tuple_union_impl.hpp +43 -0
- data/vendor/datasketches-cpp/tuple/include/tuple_a_not_b.hpp +11 -2
- data/vendor/datasketches-cpp/tuple/include/tuple_intersection.hpp +17 -10
- data/vendor/datasketches-cpp/tuple/include/tuple_jaccard_similarity.hpp +2 -1
- data/vendor/datasketches-cpp/tuple/include/tuple_sketch.hpp +95 -32
- data/vendor/datasketches-cpp/tuple/include/tuple_union.hpp +19 -11
- data/vendor/datasketches-cpp/tuple/test/CMakeLists.txt +16 -1
- data/vendor/datasketches-cpp/tuple/test/aod_sketch_deserialize_from_java_test.cpp +76 -0
- data/vendor/datasketches-cpp/tuple/test/aod_sketch_serialize_for_java.cpp +62 -0
- data/vendor/datasketches-cpp/tuple/test/array_of_doubles_sketch_test.cpp +5 -129
- data/vendor/datasketches-cpp/tuple/test/engagement_test.cpp +85 -89
- data/vendor/datasketches-cpp/tuple/test/tuple_jaccard_similarity_test.cpp +3 -1
- data/vendor/datasketches-cpp/tuple/test/tuple_sketch_deserialize_from_java_test.cpp +47 -0
- data/vendor/datasketches-cpp/tuple/test/tuple_sketch_serialize_for_java.cpp +38 -0
- data/vendor/datasketches-cpp/tuple/test/tuple_sketch_test.cpp +1 -1
- data/vendor/datasketches-cpp/version.cfg.in +1 -1
- metadata +47 -93
- data/vendor/datasketches-cpp/MANIFEST.in +0 -39
- data/vendor/datasketches-cpp/fi/test/items_sketch_string_from_java.sk +0 -0
- data/vendor/datasketches-cpp/fi/test/items_sketch_string_utf8_from_java.sk +0 -0
- data/vendor/datasketches-cpp/fi/test/longs_sketch_from_java.sk +0 -0
- data/vendor/datasketches-cpp/hll/test/array6_from_java.sk +0 -0
- data/vendor/datasketches-cpp/hll/test/compact_array4_from_java.sk +0 -0
- data/vendor/datasketches-cpp/hll/test/compact_set_from_java.sk +0 -0
- data/vendor/datasketches-cpp/hll/test/list_from_java.sk +0 -0
- data/vendor/datasketches-cpp/hll/test/updatable_array4_from_java.sk +0 -0
- data/vendor/datasketches-cpp/hll/test/updatable_set_from_java.sk +0 -0
- data/vendor/datasketches-cpp/kll/test/kll_sketch_from_java.sk +0 -0
- data/vendor/datasketches-cpp/pyproject.toml +0 -23
- data/vendor/datasketches-cpp/python/CMakeLists.txt +0 -87
- data/vendor/datasketches-cpp/python/README.md +0 -85
- data/vendor/datasketches-cpp/python/datasketches/DensityWrapper.py +0 -87
- data/vendor/datasketches-cpp/python/datasketches/KernelFunction.py +0 -35
- data/vendor/datasketches-cpp/python/datasketches/PySerDe.py +0 -110
- data/vendor/datasketches-cpp/python/datasketches/TuplePolicy.py +0 -77
- data/vendor/datasketches-cpp/python/datasketches/TupleWrapper.py +0 -205
- data/vendor/datasketches-cpp/python/datasketches/__init__.py +0 -38
- data/vendor/datasketches-cpp/python/include/kernel_function.hpp +0 -98
- data/vendor/datasketches-cpp/python/include/py_serde.hpp +0 -113
- data/vendor/datasketches-cpp/python/include/quantile_conditional.hpp +0 -104
- data/vendor/datasketches-cpp/python/include/tuple_policy.hpp +0 -136
- data/vendor/datasketches-cpp/python/jupyter/CPCSketch.ipynb +0 -345
- data/vendor/datasketches-cpp/python/jupyter/FrequentItemsSketch.ipynb +0 -354
- data/vendor/datasketches-cpp/python/jupyter/HLLSketch.ipynb +0 -346
- data/vendor/datasketches-cpp/python/jupyter/KLLSketch.ipynb +0 -463
- data/vendor/datasketches-cpp/python/jupyter/ThetaSketchNotebook.ipynb +0 -403
- data/vendor/datasketches-cpp/python/pybind11Path.cmd +0 -21
- data/vendor/datasketches-cpp/python/src/__init__.py +0 -18
- data/vendor/datasketches-cpp/python/src/count_wrapper.cpp +0 -101
- data/vendor/datasketches-cpp/python/src/cpc_wrapper.cpp +0 -76
- data/vendor/datasketches-cpp/python/src/datasketches.cpp +0 -58
- data/vendor/datasketches-cpp/python/src/density_wrapper.cpp +0 -95
- data/vendor/datasketches-cpp/python/src/fi_wrapper.cpp +0 -182
- data/vendor/datasketches-cpp/python/src/hll_wrapper.cpp +0 -126
- data/vendor/datasketches-cpp/python/src/kll_wrapper.cpp +0 -158
- data/vendor/datasketches-cpp/python/src/ks_wrapper.cpp +0 -68
- data/vendor/datasketches-cpp/python/src/py_serde.cpp +0 -112
- data/vendor/datasketches-cpp/python/src/quantiles_wrapper.cpp +0 -155
- data/vendor/datasketches-cpp/python/src/req_wrapper.cpp +0 -154
- data/vendor/datasketches-cpp/python/src/theta_wrapper.cpp +0 -166
- data/vendor/datasketches-cpp/python/src/tuple_wrapper.cpp +0 -215
- data/vendor/datasketches-cpp/python/src/vector_of_kll.cpp +0 -490
- data/vendor/datasketches-cpp/python/src/vo_wrapper.cpp +0 -173
- data/vendor/datasketches-cpp/python/tests/__init__.py +0 -16
- data/vendor/datasketches-cpp/python/tests/count_min_test.py +0 -86
- data/vendor/datasketches-cpp/python/tests/cpc_test.py +0 -64
- data/vendor/datasketches-cpp/python/tests/density_test.py +0 -93
- data/vendor/datasketches-cpp/python/tests/fi_test.py +0 -149
- data/vendor/datasketches-cpp/python/tests/hll_test.py +0 -129
- data/vendor/datasketches-cpp/python/tests/kll_test.py +0 -159
- data/vendor/datasketches-cpp/python/tests/quantiles_test.py +0 -160
- data/vendor/datasketches-cpp/python/tests/req_test.py +0 -159
- data/vendor/datasketches-cpp/python/tests/theta_test.py +0 -148
- data/vendor/datasketches-cpp/python/tests/tuple_test.py +0 -206
- data/vendor/datasketches-cpp/python/tests/vector_of_kll_test.py +0 -148
- data/vendor/datasketches-cpp/python/tests/vo_test.py +0 -132
- data/vendor/datasketches-cpp/req/test/req_float_empty_from_java.sk +0 -0
- data/vendor/datasketches-cpp/req/test/req_float_estimation_from_java.sk +0 -0
- data/vendor/datasketches-cpp/req/test/req_float_exact_from_java.sk +0 -0
- data/vendor/datasketches-cpp/req/test/req_float_raw_items_from_java.sk +0 -0
- data/vendor/datasketches-cpp/req/test/req_float_single_item_from_java.sk +0 -0
- data/vendor/datasketches-cpp/sampling/test/binaries_from_java.txt +0 -67
- data/vendor/datasketches-cpp/sampling/test/varopt_sketch_long_sampling.sk +0 -0
- data/vendor/datasketches-cpp/sampling/test/varopt_sketch_string_exact.sk +0 -0
- data/vendor/datasketches-cpp/sampling/test/varopt_union_double_sampling.sk +0 -0
- data/vendor/datasketches-cpp/setup.py +0 -110
- data/vendor/datasketches-cpp/theta/test/theta_compact_empty_from_java.sk +0 -0
- data/vendor/datasketches-cpp/theta/test/theta_compact_estimation_from_java.sk +0 -0
- data/vendor/datasketches-cpp/theta/test/theta_compact_exact_from_java.sk +0 -0
- data/vendor/datasketches-cpp/theta/test/theta_compact_single_item_from_java.sk +0 -0
- data/vendor/datasketches-cpp/tox.ini +0 -26
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_intersection.hpp +0 -52
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_union.hpp +0 -81
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_union_impl.hpp +0 -43
- data/vendor/datasketches-cpp/tuple/test/aod_1_compact_empty_from_java.sk +0 -1
- data/vendor/datasketches-cpp/tuple/test/aod_1_compact_estimation_from_java.sk +0 -0
- data/vendor/datasketches-cpp/tuple/test/aod_1_compact_non_empty_no_entries_from_java.sk +0 -0
- data/vendor/datasketches-cpp/tuple/test/aod_2_compact_exact_from_java.sk +0 -0
- data/vendor/datasketches-cpp/tuple/test/aod_3_compact_empty_from_java.sk +0 -1
|
@@ -19,56 +19,55 @@
|
|
|
19
19
|
|
|
20
20
|
namespace datasketches {
|
|
21
21
|
|
|
22
|
-
template<typename
|
|
23
|
-
|
|
24
|
-
float p, uint64_t theta, uint64_t seed, const
|
|
22
|
+
template<typename Array, typename Policy, typename Allocator>
|
|
23
|
+
update_array_tuple_sketch<Array, Policy, Allocator>::update_array_tuple_sketch(uint8_t lg_cur_size, uint8_t lg_nom_size, resize_factor rf,
|
|
24
|
+
float p, uint64_t theta, uint64_t seed, const Policy& policy, const Allocator& allocator):
|
|
25
25
|
Base(lg_cur_size, lg_nom_size, rf, p, theta, seed, policy, allocator) {}
|
|
26
26
|
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
uint8_t update_array_of_doubles_sketch_alloc<A>::get_num_values() const {
|
|
27
|
+
template<typename Array, typename Policy, typename Allocator>
|
|
28
|
+
uint8_t update_array_tuple_sketch<Array, Policy, Allocator>::get_num_values() const {
|
|
30
29
|
return this->policy_.get_num_values();
|
|
31
30
|
}
|
|
32
31
|
|
|
33
|
-
template<typename
|
|
34
|
-
|
|
35
|
-
return
|
|
32
|
+
template<typename Array, typename Policy, typename Allocator>
|
|
33
|
+
compact_array_tuple_sketch<Array, Allocator> update_array_tuple_sketch<Array, Policy, Allocator>::compact(bool ordered) const {
|
|
34
|
+
return compact_array_tuple_sketch<Array, Allocator>(*this, ordered);
|
|
36
35
|
}
|
|
37
36
|
|
|
38
37
|
// builder
|
|
39
38
|
|
|
40
|
-
template<typename
|
|
41
|
-
|
|
42
|
-
tuple_base_builder<builder,
|
|
39
|
+
template<typename Array, typename Policy, typename Allocator>
|
|
40
|
+
update_array_tuple_sketch<Array, Policy, Allocator>::builder::builder(const Policy& policy, const Allocator& allocator):
|
|
41
|
+
tuple_base_builder<builder, Policy, Allocator>(policy, allocator) {}
|
|
43
42
|
|
|
44
|
-
template<typename
|
|
45
|
-
|
|
46
|
-
return
|
|
43
|
+
template<typename Array, typename Policy, typename Allocator>
|
|
44
|
+
auto update_array_tuple_sketch<Array, Policy, Allocator>::builder::build() const -> update_array_tuple_sketch {
|
|
45
|
+
return update_array_tuple_sketch(this->starting_lg_size(), this->lg_k_, this->rf_, this->p_, this->starting_theta(), this->seed_, this->policy_, this->allocator_);
|
|
47
46
|
}
|
|
48
47
|
|
|
49
48
|
// compact sketch
|
|
50
49
|
|
|
51
|
-
template<typename
|
|
50
|
+
template<typename Array, typename Allocator>
|
|
52
51
|
template<typename S>
|
|
53
|
-
|
|
52
|
+
compact_array_tuple_sketch<Array, Allocator>::compact_array_tuple_sketch(const S& other, bool ordered):
|
|
54
53
|
Base(other, ordered), num_values_(other.get_num_values()) {}
|
|
55
54
|
|
|
56
|
-
template<typename
|
|
57
|
-
|
|
55
|
+
template<typename Array, typename Allocator>
|
|
56
|
+
compact_array_tuple_sketch<Array, Allocator>::compact_array_tuple_sketch(bool is_empty, bool is_ordered,
|
|
58
57
|
uint16_t seed_hash, uint64_t theta, std::vector<Entry, AllocEntry>&& entries, uint8_t num_values):
|
|
59
58
|
Base(is_empty, is_ordered, seed_hash, theta, std::move(entries)), num_values_(num_values) {}
|
|
60
59
|
|
|
61
|
-
template<typename
|
|
62
|
-
|
|
60
|
+
template<typename Array, typename Allocator>
|
|
61
|
+
compact_array_tuple_sketch<Array, Allocator>::compact_array_tuple_sketch(uint8_t num_values, Base&& base):
|
|
63
62
|
Base(std::move(base)), num_values_(num_values) {}
|
|
64
63
|
|
|
65
|
-
template<typename
|
|
66
|
-
uint8_t
|
|
64
|
+
template<typename Array, typename Allocator>
|
|
65
|
+
uint8_t compact_array_tuple_sketch<Array, Allocator>::get_num_values() const {
|
|
67
66
|
return num_values_;
|
|
68
67
|
}
|
|
69
68
|
|
|
70
|
-
template<typename
|
|
71
|
-
void
|
|
69
|
+
template<typename Array, typename Allocator>
|
|
70
|
+
void compact_array_tuple_sketch<Array, Allocator>::serialize(std::ostream& os) const {
|
|
72
71
|
const uint8_t preamble_longs = 1;
|
|
73
72
|
write(os, preamble_longs);
|
|
74
73
|
const uint8_t serial_version = SERIAL_VERSION;
|
|
@@ -96,17 +95,17 @@ void compact_array_of_doubles_sketch_alloc<A>::serialize(std::ostream& os) const
|
|
|
96
95
|
write(os, it.first);
|
|
97
96
|
}
|
|
98
97
|
for (const auto& it: this->entries_) {
|
|
99
|
-
write(os, it.second.data(), it.second.size() * sizeof(
|
|
98
|
+
write(os, it.second.data(), it.second.size() * sizeof(typename Array::value_type));
|
|
100
99
|
}
|
|
101
100
|
}
|
|
102
101
|
}
|
|
103
102
|
|
|
104
|
-
template<typename
|
|
105
|
-
auto
|
|
103
|
+
template<typename Array, typename Allocator>
|
|
104
|
+
auto compact_array_tuple_sketch<Array, Allocator>::serialize(unsigned header_size_bytes) const -> vector_bytes {
|
|
106
105
|
const uint8_t preamble_longs = 1;
|
|
107
106
|
const size_t size = header_size_bytes + 16 // preamble and theta
|
|
108
107
|
+ (this->entries_.size() > 0 ? 8 : 0)
|
|
109
|
-
+ (sizeof(uint64_t) + sizeof(
|
|
108
|
+
+ (sizeof(uint64_t) + sizeof(typename Array::value_type) * num_values_) * this->entries_.size();
|
|
110
109
|
vector_bytes bytes(size, 0, this->entries_.get_allocator());
|
|
111
110
|
uint8_t* ptr = bytes.data() + header_size_bytes;
|
|
112
111
|
|
|
@@ -135,14 +134,14 @@ auto compact_array_of_doubles_sketch_alloc<A>::serialize(unsigned header_size_by
|
|
|
135
134
|
ptr += copy_to_mem(it.first, ptr);
|
|
136
135
|
}
|
|
137
136
|
for (const auto& it: this->entries_) {
|
|
138
|
-
ptr += copy_to_mem(it.second.data(), ptr, it.second.size() * sizeof(
|
|
137
|
+
ptr += copy_to_mem(it.second.data(), ptr, it.second.size() * sizeof(typename Array::value_type));
|
|
139
138
|
}
|
|
140
139
|
}
|
|
141
140
|
return bytes;
|
|
142
141
|
}
|
|
143
142
|
|
|
144
|
-
template<typename
|
|
145
|
-
|
|
143
|
+
template<typename Array, typename Allocator>
|
|
144
|
+
compact_array_tuple_sketch<Array, Allocator> compact_array_tuple_sketch<Array, Allocator>::deserialize(std::istream& is, uint64_t seed, const Allocator& allocator) {
|
|
146
145
|
read<uint8_t>(is); // unused
|
|
147
146
|
const auto serial_version = read<uint8_t>(is);
|
|
148
147
|
const auto family = read<uint8_t>(is);
|
|
@@ -165,19 +164,19 @@ compact_array_of_doubles_sketch_alloc<A> compact_array_of_doubles_sketch_alloc<A
|
|
|
165
164
|
std::vector<uint64_t, AllocU64> keys(num_entries, 0, allocator);
|
|
166
165
|
read(is, keys.data(), num_entries * sizeof(uint64_t));
|
|
167
166
|
for (size_t i = 0; i < num_entries; ++i) {
|
|
168
|
-
|
|
169
|
-
read(is, summary.data(), num_values * sizeof(
|
|
167
|
+
Array summary(num_values, 0, allocator);
|
|
168
|
+
read(is, summary.data(), num_values * sizeof(typename Array::value_type));
|
|
170
169
|
entries.push_back(Entry(keys[i], std::move(summary)));
|
|
171
170
|
}
|
|
172
171
|
}
|
|
173
172
|
if (!is.good()) throw std::runtime_error("error reading from std::istream");
|
|
174
173
|
const bool is_empty = flags_byte & (1 << flags::IS_EMPTY);
|
|
175
174
|
const bool is_ordered = flags_byte & (1 << flags::IS_ORDERED);
|
|
176
|
-
return
|
|
175
|
+
return compact_array_tuple_sketch<Array, Allocator>(is_empty, is_ordered, seed_hash, theta, std::move(entries), num_values);
|
|
177
176
|
}
|
|
178
177
|
|
|
179
|
-
template<typename
|
|
180
|
-
|
|
178
|
+
template<typename Array, typename Allocator>
|
|
179
|
+
compact_array_tuple_sketch<Array, Allocator> compact_array_tuple_sketch<Array, Allocator>::deserialize(const void* bytes, size_t size, uint64_t seed, const Allocator& allocator) {
|
|
181
180
|
ensure_minimum_memory(size, 16);
|
|
182
181
|
const char* ptr = static_cast<const char*>(bytes);
|
|
183
182
|
ptr += sizeof(uint8_t); // unused
|
|
@@ -207,19 +206,19 @@ compact_array_of_doubles_sketch_alloc<A> compact_array_of_doubles_sketch_alloc<A
|
|
|
207
206
|
uint32_t num_entries;
|
|
208
207
|
ptr += copy_from_mem(ptr, num_entries);
|
|
209
208
|
ptr += sizeof(uint32_t); // unused
|
|
210
|
-
ensure_minimum_memory(size, 24 + (sizeof(uint64_t) + sizeof(
|
|
209
|
+
ensure_minimum_memory(size, 24 + (sizeof(uint64_t) + sizeof(typename Array::value_type) * num_values) * num_entries);
|
|
211
210
|
entries.reserve(num_entries);
|
|
212
211
|
std::vector<uint64_t, AllocU64> keys(num_entries, 0, allocator);
|
|
213
212
|
ptr += copy_from_mem(ptr, keys.data(), sizeof(uint64_t) * num_entries);
|
|
214
213
|
for (size_t i = 0; i < num_entries; ++i) {
|
|
215
|
-
|
|
216
|
-
ptr += copy_from_mem(ptr, summary.data(), num_values * sizeof(
|
|
214
|
+
Array summary(num_values, 0, allocator);
|
|
215
|
+
ptr += copy_from_mem(ptr, summary.data(), num_values * sizeof(typename Array::value_type));
|
|
217
216
|
entries.push_back(Entry(keys[i], std::move(summary)));
|
|
218
217
|
}
|
|
219
218
|
}
|
|
220
219
|
const bool is_empty = flags_byte & (1 << flags::IS_EMPTY);
|
|
221
220
|
const bool is_ordered = flags_byte & (1 << flags::IS_ORDERED);
|
|
222
|
-
return
|
|
221
|
+
return compact_array_tuple_sketch<Array, Allocator>(is_empty, is_ordered, seed_hash, theta, std::move(entries), num_values);
|
|
223
222
|
}
|
|
224
223
|
|
|
225
224
|
} /* namespace datasketches */
|
|
@@ -0,0 +1,81 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* Licensed to the Apache Software Foundation (ASF) under one
|
|
3
|
+
* or more contributor license agreements. See the NOTICE file
|
|
4
|
+
* distributed with this work for additional information
|
|
5
|
+
* regarding copyright ownership. The ASF licenses this file
|
|
6
|
+
* to you under the Apache License, Version 2.0 (the
|
|
7
|
+
* "License"); you may not use this file except in compliance
|
|
8
|
+
* with the License. You may obtain a copy of the License at
|
|
9
|
+
*
|
|
10
|
+
* http://www.apache.org/licenses/LICENSE-2.0
|
|
11
|
+
*
|
|
12
|
+
* Unless required by applicable law or agreed to in writing,
|
|
13
|
+
* software distributed under the License is distributed on an
|
|
14
|
+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
15
|
+
* KIND, either express or implied. See the License for the
|
|
16
|
+
* specific language governing permissions and limitations
|
|
17
|
+
* under the License.
|
|
18
|
+
*/
|
|
19
|
+
|
|
20
|
+
#ifndef ARRAY_TUPLE_UNION_HPP_
|
|
21
|
+
#define ARRAY_TUPLE_UNION_HPP_
|
|
22
|
+
|
|
23
|
+
#include <vector>
|
|
24
|
+
#include <memory>
|
|
25
|
+
#include "array_tuple_sketch.hpp"
|
|
26
|
+
|
|
27
|
+
#include "tuple_union.hpp"
|
|
28
|
+
|
|
29
|
+
namespace datasketches {
|
|
30
|
+
|
|
31
|
+
/// default array tuple union policy
|
|
32
|
+
template<typename Array>
|
|
33
|
+
struct default_array_tuple_union_policy {
|
|
34
|
+
default_array_tuple_union_policy(uint8_t num_values = 1): num_values_(num_values) {}
|
|
35
|
+
|
|
36
|
+
void operator()(Array& array, const Array& other) const {
|
|
37
|
+
for (uint8_t i = 0; i < num_values_; ++i) {
|
|
38
|
+
array[i] += other[i];
|
|
39
|
+
}
|
|
40
|
+
}
|
|
41
|
+
uint8_t get_num_values() const {
|
|
42
|
+
return num_values_;
|
|
43
|
+
}
|
|
44
|
+
private:
|
|
45
|
+
uint8_t num_values_;
|
|
46
|
+
};
|
|
47
|
+
|
|
48
|
+
/// array tuple union
|
|
49
|
+
template<
|
|
50
|
+
typename Array,
|
|
51
|
+
typename Policy = default_array_tuple_union_policy<Array>,
|
|
52
|
+
typename Allocator = typename Array::allocator_type
|
|
53
|
+
>
|
|
54
|
+
class array_tuple_union: public tuple_union<Array, Policy, Allocator> {
|
|
55
|
+
public:
|
|
56
|
+
using value_type = typename Array::value_type;
|
|
57
|
+
using Base = tuple_union<Array, Policy, Allocator>;
|
|
58
|
+
using CompactSketch = compact_array_tuple_sketch<Array, Allocator>;
|
|
59
|
+
using resize_factor = theta_constants::resize_factor;
|
|
60
|
+
|
|
61
|
+
class builder;
|
|
62
|
+
|
|
63
|
+
CompactSketch get_result(bool ordered = true) const;
|
|
64
|
+
|
|
65
|
+
private:
|
|
66
|
+
// for builder
|
|
67
|
+
array_tuple_union(uint8_t lg_cur_size, uint8_t lg_nom_size, resize_factor rf, float p, uint64_t theta, uint64_t seed, const Policy& policy, const Allocator& allocator);
|
|
68
|
+
};
|
|
69
|
+
|
|
70
|
+
template<typename Array, typename Policy, typename Allocator>
|
|
71
|
+
class array_tuple_union<Array, Policy, Allocator>::builder: public tuple_base_builder<builder, Policy, Allocator> {
|
|
72
|
+
public:
|
|
73
|
+
builder(const Policy& policy = Policy(), const Allocator& allocator = Allocator());
|
|
74
|
+
array_tuple_union build() const;
|
|
75
|
+
};
|
|
76
|
+
|
|
77
|
+
} /* namespace datasketches */
|
|
78
|
+
|
|
79
|
+
#include "array_tuple_union_impl.hpp"
|
|
80
|
+
|
|
81
|
+
#endif
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* Licensed to the Apache Software Foundation (ASF) under one
|
|
3
|
+
* or more contributor license agreements. See the NOTICE file
|
|
4
|
+
* distributed with this work for additional information
|
|
5
|
+
* regarding copyright ownership. The ASF licenses this file
|
|
6
|
+
* to you under the Apache License, Version 2.0 (the
|
|
7
|
+
* "License"); you may not use this file except in compliance
|
|
8
|
+
* with the License. You may obtain a copy of the License at
|
|
9
|
+
*
|
|
10
|
+
* http://www.apache.org/licenses/LICENSE-2.0
|
|
11
|
+
*
|
|
12
|
+
* Unless required by applicable law or agreed to in writing,
|
|
13
|
+
* software distributed under the License is distributed on an
|
|
14
|
+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
15
|
+
* KIND, either express or implied. See the License for the
|
|
16
|
+
* specific language governing permissions and limitations
|
|
17
|
+
* under the License.
|
|
18
|
+
*/
|
|
19
|
+
|
|
20
|
+
namespace datasketches {
|
|
21
|
+
|
|
22
|
+
template<typename Array, typename Policy, typename Allocator>
|
|
23
|
+
array_tuple_union<Array, Policy, Allocator>::array_tuple_union(uint8_t lg_cur_size, uint8_t lg_nom_size, resize_factor rf, float p, uint64_t theta, uint64_t seed, const Policy& policy, const Allocator& allocator):
|
|
24
|
+
Base(lg_cur_size, lg_nom_size, rf, p, theta, seed, policy, allocator)
|
|
25
|
+
{}
|
|
26
|
+
|
|
27
|
+
template<typename Array, typename Policy, typename Allocator>
|
|
28
|
+
auto array_tuple_union<Array, Policy, Allocator>::get_result(bool ordered) const -> CompactSketch {
|
|
29
|
+
return CompactSketch(this->state_.get_policy().get_external_policy().get_num_values(), Base::get_result(ordered));
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
// builder
|
|
33
|
+
|
|
34
|
+
template<typename Array, typename Policy, typename Allocator>
|
|
35
|
+
array_tuple_union<Array, Policy, Allocator>::builder::builder(const Policy& policy, const Allocator& allocator):
|
|
36
|
+
tuple_base_builder<builder, Policy, typename Array::allocator_type>(policy, allocator) {}
|
|
37
|
+
|
|
38
|
+
template<typename Array, typename Policy, typename Allocator>
|
|
39
|
+
auto array_tuple_union<Array, Policy, Allocator>::builder::build() const -> array_tuple_union {
|
|
40
|
+
return array_tuple_union(this->starting_lg_size(), this->lg_k_, this->rf_, this->p_, this->starting_theta(), this->seed_, this->policy_, this->allocator_);
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
} /* namespace datasketches */
|
|
@@ -25,6 +25,7 @@
|
|
|
25
25
|
|
|
26
26
|
namespace datasketches {
|
|
27
27
|
|
|
28
|
+
/// tuple A-not-B
|
|
28
29
|
template<
|
|
29
30
|
typename Summary,
|
|
30
31
|
typename Allocator = std::allocator<Summary>
|
|
@@ -37,11 +38,19 @@ public:
|
|
|
37
38
|
using AllocEntry = typename std::allocator_traits<Allocator>::template rebind_alloc<Entry>;
|
|
38
39
|
using State = theta_set_difference_base<Entry, ExtractKey, CompactSketch, AllocEntry>;
|
|
39
40
|
|
|
41
|
+
/**
|
|
42
|
+
* Constructor
|
|
43
|
+
* @param seed for the hash function that was used to create the sketch
|
|
44
|
+
* @param allocator to use for allocating and deallocating memory
|
|
45
|
+
*/
|
|
40
46
|
explicit tuple_a_not_b(uint64_t seed = DEFAULT_SEED, const Allocator& allocator = Allocator());
|
|
41
47
|
|
|
42
48
|
/**
|
|
43
|
-
* Computes the
|
|
44
|
-
* @
|
|
49
|
+
* Computes the A-not-B set operation given two sketches.
|
|
50
|
+
* @param a sketch A
|
|
51
|
+
* @param b sketch B
|
|
52
|
+
* @param ordered optional flag to specify if an ordered sketch should be produced
|
|
53
|
+
* @return the result of A-not-B as a compact sketch
|
|
45
54
|
*/
|
|
46
55
|
template<typename FwdSketch, typename Sketch>
|
|
47
56
|
CompactSketch compute(FwdSketch&& a, const Sketch& b, bool ordered = true) const;
|
|
@@ -28,16 +28,17 @@ namespace datasketches {
|
|
|
28
28
|
/*
|
|
29
29
|
// for types with defined + operation
|
|
30
30
|
template<typename Summary>
|
|
31
|
-
struct
|
|
31
|
+
struct example_tuple_intersection_policy {
|
|
32
32
|
void operator()(Summary& summary, const Summary& other) const {
|
|
33
33
|
summary += other;
|
|
34
34
|
}
|
|
35
|
-
void operator()(Summary& summary, Summary&& other) const {
|
|
36
|
-
summary += other;
|
|
37
|
-
}
|
|
38
35
|
};
|
|
39
36
|
*/
|
|
40
37
|
|
|
38
|
+
/**
|
|
39
|
+
* Tuple intersection.
|
|
40
|
+
* Computes intersection of Tuple sketches.
|
|
41
|
+
*/
|
|
41
42
|
template<
|
|
42
43
|
typename Summary,
|
|
43
44
|
typename Policy,
|
|
@@ -54,19 +55,25 @@ public:
|
|
|
54
55
|
// reformulate the external policy that operates on Summary
|
|
55
56
|
// in terms of operations on Entry
|
|
56
57
|
struct internal_policy {
|
|
57
|
-
internal_policy(const Policy&
|
|
58
|
+
internal_policy(const Policy& external_policy): external_policy_(external_policy) {}
|
|
58
59
|
void operator()(Entry& internal_entry, const Entry& incoming_entry) const {
|
|
59
|
-
|
|
60
|
+
external_policy_(internal_entry.second, incoming_entry.second);
|
|
60
61
|
}
|
|
61
62
|
void operator()(Entry& internal_entry, Entry&& incoming_entry) const {
|
|
62
|
-
|
|
63
|
+
external_policy_(internal_entry.second, std::move(incoming_entry.second));
|
|
63
64
|
}
|
|
64
|
-
const Policy&
|
|
65
|
-
Policy
|
|
65
|
+
const Policy& get_external_policy() const { return external_policy_; }
|
|
66
|
+
Policy external_policy_;
|
|
66
67
|
};
|
|
67
68
|
|
|
68
69
|
using State = theta_intersection_base<Entry, ExtractKey, internal_policy, Sketch, CompactSketch, AllocEntry>;
|
|
69
70
|
|
|
71
|
+
/**
|
|
72
|
+
* Constructor
|
|
73
|
+
* @param seed for the hash function that was used to create the sketch
|
|
74
|
+
* @param policy user-defined way of combining Summary during intersection
|
|
75
|
+
* @param allocator to use for allocating and deallocating memory
|
|
76
|
+
*/
|
|
70
77
|
explicit tuple_intersection(uint64_t seed = DEFAULT_SEED, const Policy& policy = Policy(), const Allocator& allocator = Allocator());
|
|
71
78
|
|
|
72
79
|
/**
|
|
@@ -82,7 +89,7 @@ public:
|
|
|
82
89
|
* Produces a copy of the current state of the intersection.
|
|
83
90
|
* If update() was not called, the state is the infinite "universe",
|
|
84
91
|
* which is considered an undefined state, and throws an exception.
|
|
85
|
-
* @param ordered optional flag to specify if ordered sketch should be produced
|
|
92
|
+
* @param ordered optional flag to specify if an ordered sketch should be produced
|
|
86
93
|
* @return the result of the intersection
|
|
87
94
|
*/
|
|
88
95
|
CompactSketch get_result(bool ordered = true) const;
|
|
@@ -26,10 +26,11 @@
|
|
|
26
26
|
|
|
27
27
|
namespace datasketches {
|
|
28
28
|
|
|
29
|
+
/// Tuple Jaccard similarity alias
|
|
29
30
|
template<
|
|
30
31
|
typename Summary,
|
|
31
32
|
typename IntersectionPolicy,
|
|
32
|
-
typename UnionPolicy =
|
|
33
|
+
typename UnionPolicy = default_tuple_union_policy<Summary>,
|
|
33
34
|
typename Allocator = std::allocator<Summary>>
|
|
34
35
|
using tuple_jaccard_similarity = jaccard_similarity_base<tuple_union<Summary, UnionPolicy, Allocator>, tuple_intersection<Summary, IntersectionPolicy, Allocator>, pair_extract_key<uint64_t, Summary>>;
|
|
35
36
|
|