datasketches 0.3.2 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +6 -0
- data/NOTICE +1 -1
- data/README.md +0 -2
- data/ext/datasketches/cpc_wrapper.cpp +2 -2
- data/ext/datasketches/kll_wrapper.cpp +0 -10
- data/lib/datasketches/version.rb +1 -1
- data/lib/datasketches.rb +1 -1
- data/vendor/datasketches-cpp/CMakeLists.txt +1 -0
- data/vendor/datasketches-cpp/CODE_OF_CONDUCT.md +3 -0
- data/vendor/datasketches-cpp/CONTRIBUTING.md +50 -0
- data/vendor/datasketches-cpp/Doxyfile +2827 -0
- data/vendor/datasketches-cpp/LICENSE +0 -76
- data/vendor/datasketches-cpp/NOTICE +1 -1
- data/vendor/datasketches-cpp/README.md +1 -3
- data/vendor/datasketches-cpp/common/CMakeLists.txt +12 -11
- data/vendor/datasketches-cpp/common/include/common_defs.hpp +11 -8
- data/vendor/datasketches-cpp/common/include/count_zeros.hpp +0 -2
- data/vendor/datasketches-cpp/common/include/kolmogorov_smirnov.hpp +9 -6
- data/vendor/datasketches-cpp/common/include/optional.hpp +148 -0
- data/vendor/datasketches-cpp/common/include/quantiles_sorted_view.hpp +95 -2
- data/vendor/datasketches-cpp/common/include/quantiles_sorted_view_impl.hpp +1 -1
- data/vendor/datasketches-cpp/common/include/serde.hpp +69 -20
- data/vendor/datasketches-cpp/common/test/CMakeLists.txt +1 -1
- data/vendor/datasketches-cpp/common/test/optional_test.cpp +85 -0
- data/vendor/datasketches-cpp/common/test/test_allocator.hpp +14 -14
- data/vendor/datasketches-cpp/count/include/count_min.hpp +132 -78
- data/vendor/datasketches-cpp/count/include/count_min_impl.hpp +132 -152
- data/vendor/datasketches-cpp/count/test/CMakeLists.txt +11 -12
- data/vendor/datasketches-cpp/count/test/count_min_allocation_test.cpp +61 -61
- data/vendor/datasketches-cpp/count/test/count_min_test.cpp +175 -178
- data/vendor/datasketches-cpp/cpc/include/cpc_common.hpp +14 -20
- data/vendor/datasketches-cpp/cpc/include/cpc_compressor.hpp +7 -4
- data/vendor/datasketches-cpp/cpc/include/cpc_compressor_impl.hpp +17 -17
- data/vendor/datasketches-cpp/cpc/include/cpc_sketch.hpp +40 -40
- data/vendor/datasketches-cpp/cpc/include/cpc_sketch_impl.hpp +13 -10
- data/vendor/datasketches-cpp/cpc/include/cpc_union.hpp +35 -11
- data/vendor/datasketches-cpp/cpc/include/cpc_union_impl.hpp +8 -8
- data/vendor/datasketches-cpp/cpc/include/u32_table.hpp +3 -2
- data/vendor/datasketches-cpp/cpc/include/u32_table_impl.hpp +5 -5
- data/vendor/datasketches-cpp/cpc/test/CMakeLists.txt +20 -7
- data/vendor/datasketches-cpp/cpc/test/cpc_sketch_deserialize_from_java_test.cpp +60 -0
- data/vendor/datasketches-cpp/{python/include/py_object_lt.hpp → cpc/test/cpc_sketch_serialize_for_java.cpp} +15 -14
- data/vendor/datasketches-cpp/cpc/test/cpc_sketch_test.cpp +4 -29
- data/vendor/datasketches-cpp/cpc/test/cpc_union_test.cpp +4 -4
- data/vendor/datasketches-cpp/density/include/density_sketch.hpp +29 -9
- data/vendor/datasketches-cpp/density/include/density_sketch_impl.hpp +1 -1
- data/vendor/datasketches-cpp/density/test/CMakeLists.txt +0 -1
- data/vendor/datasketches-cpp/fi/include/frequent_items_sketch.hpp +21 -9
- data/vendor/datasketches-cpp/fi/include/frequent_items_sketch_impl.hpp +6 -4
- data/vendor/datasketches-cpp/fi/test/CMakeLists.txt +14 -1
- data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_deserialize_from_java_test.cpp +95 -0
- data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_serialize_for_java.cpp +83 -0
- data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_test.cpp +3 -42
- data/vendor/datasketches-cpp/hll/include/CouponList-internal.hpp +2 -2
- data/vendor/datasketches-cpp/hll/include/CouponList.hpp +3 -1
- data/vendor/datasketches-cpp/hll/include/HllArray-internal.hpp +3 -3
- data/vendor/datasketches-cpp/hll/include/HllArray.hpp +5 -3
- data/vendor/datasketches-cpp/hll/include/HllSketch-internal.hpp +4 -4
- data/vendor/datasketches-cpp/hll/include/HllSketchImpl.hpp +3 -1
- data/vendor/datasketches-cpp/hll/include/HllUtil.hpp +0 -12
- data/vendor/datasketches-cpp/hll/include/hll.hpp +70 -57
- data/vendor/datasketches-cpp/hll/test/CMakeLists.txt +14 -1
- data/vendor/datasketches-cpp/hll/test/ToFromByteArrayTest.cpp +0 -68
- data/vendor/datasketches-cpp/hll/test/hll_sketch_deserialize_from_java_test.cpp +69 -0
- data/vendor/datasketches-cpp/hll/test/hll_sketch_serialize_for_java.cpp +52 -0
- data/vendor/datasketches-cpp/kll/include/kll_helper_impl.hpp +2 -2
- data/vendor/datasketches-cpp/kll/include/kll_sketch.hpp +71 -50
- data/vendor/datasketches-cpp/kll/include/kll_sketch_impl.hpp +59 -130
- data/vendor/datasketches-cpp/kll/test/CMakeLists.txt +14 -1
- data/vendor/datasketches-cpp/kll/test/kll_sketch_deserialize_from_java_test.cpp +103 -0
- data/vendor/datasketches-cpp/kll/test/kll_sketch_serialize_for_java.cpp +62 -0
- data/vendor/datasketches-cpp/kll/test/kll_sketch_test.cpp +3 -38
- data/vendor/datasketches-cpp/quantiles/include/quantiles_sketch.hpp +68 -51
- data/vendor/datasketches-cpp/quantiles/include/quantiles_sketch_impl.hpp +62 -132
- data/vendor/datasketches-cpp/quantiles/test/CMakeLists.txt +14 -1
- data/vendor/datasketches-cpp/quantiles/test/quantiles_sketch_deserialize_from_java_test.cpp +84 -0
- data/vendor/datasketches-cpp/quantiles/test/quantiles_sketch_serialize_for_java.cpp +52 -0
- data/vendor/datasketches-cpp/quantiles/test/quantiles_sketch_test.cpp +14 -38
- data/vendor/datasketches-cpp/req/include/req_common.hpp +7 -3
- data/vendor/datasketches-cpp/req/include/req_compactor_impl.hpp +2 -2
- data/vendor/datasketches-cpp/req/include/req_sketch.hpp +97 -23
- data/vendor/datasketches-cpp/req/include/req_sketch_impl.hpp +48 -109
- data/vendor/datasketches-cpp/req/test/CMakeLists.txt +14 -1
- data/vendor/datasketches-cpp/req/test/req_sketch_deserialize_from_java_test.cpp +55 -0
- data/vendor/datasketches-cpp/{tuple/include/array_of_doubles_intersection_impl.hpp → req/test/req_sketch_serialize_for_java.cpp} +12 -7
- data/vendor/datasketches-cpp/req/test/req_sketch_test.cpp +3 -89
- data/vendor/datasketches-cpp/sampling/CMakeLists.txt +4 -0
- data/vendor/datasketches-cpp/sampling/include/ebpps_sample.hpp +210 -0
- data/vendor/datasketches-cpp/sampling/include/ebpps_sample_impl.hpp +535 -0
- data/vendor/datasketches-cpp/sampling/include/ebpps_sketch.hpp +281 -0
- data/vendor/datasketches-cpp/sampling/include/ebpps_sketch_impl.hpp +531 -0
- data/vendor/datasketches-cpp/sampling/include/var_opt_sketch.hpp +69 -26
- data/vendor/datasketches-cpp/sampling/include/var_opt_sketch_impl.hpp +3 -3
- data/vendor/datasketches-cpp/sampling/include/var_opt_union.hpp +10 -11
- data/vendor/datasketches-cpp/sampling/include/var_opt_union_impl.hpp +4 -4
- data/vendor/datasketches-cpp/sampling/test/CMakeLists.txt +55 -8
- data/vendor/datasketches-cpp/sampling/test/ebpps_allocation_test.cpp +96 -0
- data/vendor/datasketches-cpp/sampling/test/ebpps_sample_test.cpp +137 -0
- data/vendor/datasketches-cpp/sampling/test/ebpps_sketch_test.cpp +266 -0
- data/vendor/datasketches-cpp/sampling/test/var_opt_sketch_deserialize_from_java_test.cpp +81 -0
- data/vendor/datasketches-cpp/sampling/test/var_opt_sketch_serialize_for_java.cpp +54 -0
- data/vendor/datasketches-cpp/sampling/test/var_opt_sketch_test.cpp +0 -37
- data/vendor/datasketches-cpp/sampling/test/var_opt_union_deserialize_from_java_test.cpp +50 -0
- data/vendor/datasketches-cpp/sampling/test/var_opt_union_serialize_for_java.cpp +56 -0
- data/vendor/datasketches-cpp/sampling/test/var_opt_union_test.cpp +0 -18
- data/vendor/datasketches-cpp/theta/include/bit_packing.hpp +2608 -2608
- data/vendor/datasketches-cpp/theta/include/bounds_on_ratios_in_sampled_sets.hpp +1 -0
- data/vendor/datasketches-cpp/theta/include/bounds_on_ratios_in_theta_sketched_sets.hpp +7 -6
- data/vendor/datasketches-cpp/theta/include/theta_a_not_b.hpp +20 -5
- data/vendor/datasketches-cpp/theta/include/theta_constants.hpp +10 -4
- data/vendor/datasketches-cpp/theta/include/theta_helpers.hpp +1 -1
- data/vendor/datasketches-cpp/theta/include/theta_intersection.hpp +13 -5
- data/vendor/datasketches-cpp/theta/include/theta_intersection_base_impl.hpp +5 -5
- data/vendor/datasketches-cpp/theta/include/theta_intersection_impl.hpp +3 -3
- data/vendor/datasketches-cpp/theta/include/theta_jaccard_similarity.hpp +2 -1
- data/vendor/datasketches-cpp/theta/include/theta_jaccard_similarity_base.hpp +1 -0
- data/vendor/datasketches-cpp/theta/include/theta_set_difference_base_impl.hpp +1 -1
- data/vendor/datasketches-cpp/theta/include/theta_sketch.hpp +126 -27
- data/vendor/datasketches-cpp/theta/include/theta_sketch_impl.hpp +8 -8
- data/vendor/datasketches-cpp/theta/include/theta_union.hpp +17 -10
- data/vendor/datasketches-cpp/theta/include/theta_union_base_impl.hpp +1 -1
- data/vendor/datasketches-cpp/theta/include/theta_union_impl.hpp +3 -3
- data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base.hpp +5 -2
- data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base_impl.hpp +11 -1
- data/vendor/datasketches-cpp/theta/test/CMakeLists.txt +14 -1
- data/vendor/datasketches-cpp/theta/test/theta_sketch_deserialize_from_java_test.cpp +57 -0
- data/vendor/datasketches-cpp/theta/test/theta_sketch_serialize_for_java.cpp +61 -0
- data/vendor/datasketches-cpp/theta/test/theta_sketch_test.cpp +0 -188
- data/vendor/datasketches-cpp/tuple/CMakeLists.txt +8 -7
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_sketch.hpp +19 -144
- data/vendor/datasketches-cpp/tuple/include/{array_of_doubles_a_not_b.hpp → array_tuple_a_not_b.hpp} +24 -16
- data/vendor/datasketches-cpp/tuple/include/{array_of_doubles_a_not_b_impl.hpp → array_tuple_a_not_b_impl.hpp} +4 -4
- data/vendor/datasketches-cpp/tuple/include/array_tuple_intersection.hpp +65 -0
- data/vendor/datasketches-cpp/{python/include/py_object_ostream.hpp → tuple/include/array_tuple_intersection_impl.hpp} +7 -24
- data/vendor/datasketches-cpp/tuple/include/array_tuple_sketch.hpp +237 -0
- data/vendor/datasketches-cpp/tuple/include/{array_of_doubles_sketch_impl.hpp → array_tuple_sketch_impl.hpp} +40 -41
- data/vendor/datasketches-cpp/tuple/include/array_tuple_union.hpp +81 -0
- data/vendor/datasketches-cpp/tuple/include/array_tuple_union_impl.hpp +43 -0
- data/vendor/datasketches-cpp/tuple/include/tuple_a_not_b.hpp +11 -2
- data/vendor/datasketches-cpp/tuple/include/tuple_intersection.hpp +17 -10
- data/vendor/datasketches-cpp/tuple/include/tuple_jaccard_similarity.hpp +2 -1
- data/vendor/datasketches-cpp/tuple/include/tuple_sketch.hpp +95 -32
- data/vendor/datasketches-cpp/tuple/include/tuple_union.hpp +19 -11
- data/vendor/datasketches-cpp/tuple/test/CMakeLists.txt +16 -1
- data/vendor/datasketches-cpp/tuple/test/aod_sketch_deserialize_from_java_test.cpp +76 -0
- data/vendor/datasketches-cpp/tuple/test/aod_sketch_serialize_for_java.cpp +62 -0
- data/vendor/datasketches-cpp/tuple/test/array_of_doubles_sketch_test.cpp +5 -129
- data/vendor/datasketches-cpp/tuple/test/engagement_test.cpp +85 -89
- data/vendor/datasketches-cpp/tuple/test/tuple_jaccard_similarity_test.cpp +3 -1
- data/vendor/datasketches-cpp/tuple/test/tuple_sketch_deserialize_from_java_test.cpp +47 -0
- data/vendor/datasketches-cpp/tuple/test/tuple_sketch_serialize_for_java.cpp +38 -0
- data/vendor/datasketches-cpp/tuple/test/tuple_sketch_test.cpp +1 -1
- data/vendor/datasketches-cpp/version.cfg.in +1 -1
- metadata +47 -93
- data/vendor/datasketches-cpp/MANIFEST.in +0 -39
- data/vendor/datasketches-cpp/fi/test/items_sketch_string_from_java.sk +0 -0
- data/vendor/datasketches-cpp/fi/test/items_sketch_string_utf8_from_java.sk +0 -0
- data/vendor/datasketches-cpp/fi/test/longs_sketch_from_java.sk +0 -0
- data/vendor/datasketches-cpp/hll/test/array6_from_java.sk +0 -0
- data/vendor/datasketches-cpp/hll/test/compact_array4_from_java.sk +0 -0
- data/vendor/datasketches-cpp/hll/test/compact_set_from_java.sk +0 -0
- data/vendor/datasketches-cpp/hll/test/list_from_java.sk +0 -0
- data/vendor/datasketches-cpp/hll/test/updatable_array4_from_java.sk +0 -0
- data/vendor/datasketches-cpp/hll/test/updatable_set_from_java.sk +0 -0
- data/vendor/datasketches-cpp/kll/test/kll_sketch_from_java.sk +0 -0
- data/vendor/datasketches-cpp/pyproject.toml +0 -23
- data/vendor/datasketches-cpp/python/CMakeLists.txt +0 -87
- data/vendor/datasketches-cpp/python/README.md +0 -85
- data/vendor/datasketches-cpp/python/datasketches/DensityWrapper.py +0 -87
- data/vendor/datasketches-cpp/python/datasketches/KernelFunction.py +0 -35
- data/vendor/datasketches-cpp/python/datasketches/PySerDe.py +0 -110
- data/vendor/datasketches-cpp/python/datasketches/TuplePolicy.py +0 -77
- data/vendor/datasketches-cpp/python/datasketches/TupleWrapper.py +0 -205
- data/vendor/datasketches-cpp/python/datasketches/__init__.py +0 -38
- data/vendor/datasketches-cpp/python/include/kernel_function.hpp +0 -98
- data/vendor/datasketches-cpp/python/include/py_serde.hpp +0 -113
- data/vendor/datasketches-cpp/python/include/quantile_conditional.hpp +0 -104
- data/vendor/datasketches-cpp/python/include/tuple_policy.hpp +0 -136
- data/vendor/datasketches-cpp/python/jupyter/CPCSketch.ipynb +0 -345
- data/vendor/datasketches-cpp/python/jupyter/FrequentItemsSketch.ipynb +0 -354
- data/vendor/datasketches-cpp/python/jupyter/HLLSketch.ipynb +0 -346
- data/vendor/datasketches-cpp/python/jupyter/KLLSketch.ipynb +0 -463
- data/vendor/datasketches-cpp/python/jupyter/ThetaSketchNotebook.ipynb +0 -403
- data/vendor/datasketches-cpp/python/pybind11Path.cmd +0 -21
- data/vendor/datasketches-cpp/python/src/__init__.py +0 -18
- data/vendor/datasketches-cpp/python/src/count_wrapper.cpp +0 -101
- data/vendor/datasketches-cpp/python/src/cpc_wrapper.cpp +0 -76
- data/vendor/datasketches-cpp/python/src/datasketches.cpp +0 -58
- data/vendor/datasketches-cpp/python/src/density_wrapper.cpp +0 -95
- data/vendor/datasketches-cpp/python/src/fi_wrapper.cpp +0 -182
- data/vendor/datasketches-cpp/python/src/hll_wrapper.cpp +0 -126
- data/vendor/datasketches-cpp/python/src/kll_wrapper.cpp +0 -158
- data/vendor/datasketches-cpp/python/src/ks_wrapper.cpp +0 -68
- data/vendor/datasketches-cpp/python/src/py_serde.cpp +0 -112
- data/vendor/datasketches-cpp/python/src/quantiles_wrapper.cpp +0 -155
- data/vendor/datasketches-cpp/python/src/req_wrapper.cpp +0 -154
- data/vendor/datasketches-cpp/python/src/theta_wrapper.cpp +0 -166
- data/vendor/datasketches-cpp/python/src/tuple_wrapper.cpp +0 -215
- data/vendor/datasketches-cpp/python/src/vector_of_kll.cpp +0 -490
- data/vendor/datasketches-cpp/python/src/vo_wrapper.cpp +0 -173
- data/vendor/datasketches-cpp/python/tests/__init__.py +0 -16
- data/vendor/datasketches-cpp/python/tests/count_min_test.py +0 -86
- data/vendor/datasketches-cpp/python/tests/cpc_test.py +0 -64
- data/vendor/datasketches-cpp/python/tests/density_test.py +0 -93
- data/vendor/datasketches-cpp/python/tests/fi_test.py +0 -149
- data/vendor/datasketches-cpp/python/tests/hll_test.py +0 -129
- data/vendor/datasketches-cpp/python/tests/kll_test.py +0 -159
- data/vendor/datasketches-cpp/python/tests/quantiles_test.py +0 -160
- data/vendor/datasketches-cpp/python/tests/req_test.py +0 -159
- data/vendor/datasketches-cpp/python/tests/theta_test.py +0 -148
- data/vendor/datasketches-cpp/python/tests/tuple_test.py +0 -206
- data/vendor/datasketches-cpp/python/tests/vector_of_kll_test.py +0 -148
- data/vendor/datasketches-cpp/python/tests/vo_test.py +0 -132
- data/vendor/datasketches-cpp/req/test/req_float_empty_from_java.sk +0 -0
- data/vendor/datasketches-cpp/req/test/req_float_estimation_from_java.sk +0 -0
- data/vendor/datasketches-cpp/req/test/req_float_exact_from_java.sk +0 -0
- data/vendor/datasketches-cpp/req/test/req_float_raw_items_from_java.sk +0 -0
- data/vendor/datasketches-cpp/req/test/req_float_single_item_from_java.sk +0 -0
- data/vendor/datasketches-cpp/sampling/test/binaries_from_java.txt +0 -67
- data/vendor/datasketches-cpp/sampling/test/varopt_sketch_long_sampling.sk +0 -0
- data/vendor/datasketches-cpp/sampling/test/varopt_sketch_string_exact.sk +0 -0
- data/vendor/datasketches-cpp/sampling/test/varopt_union_double_sampling.sk +0 -0
- data/vendor/datasketches-cpp/setup.py +0 -110
- data/vendor/datasketches-cpp/theta/test/theta_compact_empty_from_java.sk +0 -0
- data/vendor/datasketches-cpp/theta/test/theta_compact_estimation_from_java.sk +0 -0
- data/vendor/datasketches-cpp/theta/test/theta_compact_exact_from_java.sk +0 -0
- data/vendor/datasketches-cpp/theta/test/theta_compact_single_item_from_java.sk +0 -0
- data/vendor/datasketches-cpp/tox.ini +0 -26
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_intersection.hpp +0 -52
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_union.hpp +0 -81
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_union_impl.hpp +0 -43
- data/vendor/datasketches-cpp/tuple/test/aod_1_compact_empty_from_java.sk +0 -1
- data/vendor/datasketches-cpp/tuple/test/aod_1_compact_estimation_from_java.sk +0 -0
- data/vendor/datasketches-cpp/tuple/test/aod_1_compact_non_empty_no_entries_from_java.sk +0 -0
- data/vendor/datasketches-cpp/tuple/test/aod_2_compact_exact_from_java.sk +0 -0
- data/vendor/datasketches-cpp/tuple/test/aod_3_compact_empty_from_java.sk +0 -1
|
@@ -35,8 +35,8 @@ max_nom_size_(0),
|
|
|
35
35
|
num_retained_(0),
|
|
36
36
|
n_(0),
|
|
37
37
|
compactors_(allocator),
|
|
38
|
-
min_item_(
|
|
39
|
-
max_item_(
|
|
38
|
+
min_item_(),
|
|
39
|
+
max_item_(),
|
|
40
40
|
sorted_view_(nullptr)
|
|
41
41
|
{
|
|
42
42
|
grow();
|
|
@@ -44,14 +44,6 @@ sorted_view_(nullptr)
|
|
|
44
44
|
|
|
45
45
|
template<typename T, typename C, typename A>
|
|
46
46
|
req_sketch<T, C, A>::~req_sketch() {
|
|
47
|
-
if (min_item_ != nullptr) {
|
|
48
|
-
min_item_->~T();
|
|
49
|
-
allocator_.deallocate(min_item_, 1);
|
|
50
|
-
}
|
|
51
|
-
if (max_item_ != nullptr) {
|
|
52
|
-
max_item_->~T();
|
|
53
|
-
allocator_.deallocate(max_item_, 1);
|
|
54
|
-
}
|
|
55
47
|
reset_sorted_view();
|
|
56
48
|
}
|
|
57
49
|
|
|
@@ -65,13 +57,10 @@ max_nom_size_(other.max_nom_size_),
|
|
|
65
57
|
num_retained_(other.num_retained_),
|
|
66
58
|
n_(other.n_),
|
|
67
59
|
compactors_(other.compactors_),
|
|
68
|
-
min_item_(
|
|
69
|
-
max_item_(
|
|
60
|
+
min_item_(other.min_item_),
|
|
61
|
+
max_item_(other.max_item_),
|
|
70
62
|
sorted_view_(nullptr)
|
|
71
|
-
{
|
|
72
|
-
if (other.min_item_ != nullptr) min_item_ = new (allocator_.allocate(1)) T(*other.min_item_);
|
|
73
|
-
if (other.max_item_ != nullptr) max_item_ = new (allocator_.allocate(1)) T(*other.max_item_);
|
|
74
|
-
}
|
|
63
|
+
{}
|
|
75
64
|
|
|
76
65
|
template<typename T, typename C, typename A>
|
|
77
66
|
req_sketch<T, C, A>::req_sketch(req_sketch&& other) noexcept :
|
|
@@ -83,13 +72,10 @@ max_nom_size_(other.max_nom_size_),
|
|
|
83
72
|
num_retained_(other.num_retained_),
|
|
84
73
|
n_(other.n_),
|
|
85
74
|
compactors_(std::move(other.compactors_)),
|
|
86
|
-
min_item_(other.min_item_),
|
|
87
|
-
max_item_(other.max_item_),
|
|
75
|
+
min_item_(std::move(other.min_item_)),
|
|
76
|
+
max_item_(std::move(other.max_item_)),
|
|
88
77
|
sorted_view_(nullptr)
|
|
89
|
-
{
|
|
90
|
-
other.min_item_ = nullptr;
|
|
91
|
-
other.max_item_ = nullptr;
|
|
92
|
-
}
|
|
78
|
+
{}
|
|
93
79
|
|
|
94
80
|
template<typename T, typename C, typename A>
|
|
95
81
|
req_sketch<T, C, A>& req_sketch<T, C, A>::operator=(const req_sketch& other) {
|
|
@@ -135,8 +121,8 @@ max_nom_size_(other.max_nom_size_),
|
|
|
135
121
|
num_retained_(other.num_retained_),
|
|
136
122
|
n_(other.n_),
|
|
137
123
|
compactors_(allocator),
|
|
138
|
-
min_item_(
|
|
139
|
-
max_item_(
|
|
124
|
+
min_item_(other.min_item_),
|
|
125
|
+
max_item_(other.max_item_),
|
|
140
126
|
sorted_view_(nullptr)
|
|
141
127
|
{
|
|
142
128
|
static_assert(
|
|
@@ -147,10 +133,6 @@ sorted_view_(nullptr)
|
|
|
147
133
|
for (const auto& compactor: other.compactors_) {
|
|
148
134
|
compactors_.push_back(req_compactor<T, C, A>(compactor, comparator_, allocator_));
|
|
149
135
|
}
|
|
150
|
-
if (!other.is_empty()) {
|
|
151
|
-
min_item_ = new (allocator_.allocate(1)) T(other.get_min_item());
|
|
152
|
-
max_item_ = new (allocator_.allocate(1)) T(other.get_max_item());
|
|
153
|
-
}
|
|
154
136
|
}
|
|
155
137
|
|
|
156
138
|
template<typename T, typename C, typename A>
|
|
@@ -188,8 +170,8 @@ template<typename FwdT>
|
|
|
188
170
|
void req_sketch<T, C, A>::update(FwdT&& item) {
|
|
189
171
|
if (!check_update_item(item)) { return; }
|
|
190
172
|
if (is_empty()) {
|
|
191
|
-
min_item_
|
|
192
|
-
max_item_
|
|
173
|
+
min_item_.emplace(item);
|
|
174
|
+
max_item_.emplace(item);
|
|
193
175
|
} else {
|
|
194
176
|
if (comparator_(item, *min_item_)) *min_item_ = item;
|
|
195
177
|
if (comparator_(*max_item_, item)) *max_item_ = item;
|
|
@@ -207,8 +189,8 @@ void req_sketch<T, C, A>::merge(FwdSk&& other) {
|
|
|
207
189
|
if (is_HRA() != other.is_HRA()) throw std::invalid_argument("merging HRA and LRA is not valid");
|
|
208
190
|
if (other.is_empty()) return;
|
|
209
191
|
if (is_empty()) {
|
|
210
|
-
min_item_
|
|
211
|
-
max_item_
|
|
192
|
+
min_item_.emplace(conditional_forward<FwdSk>(*other.min_item_));
|
|
193
|
+
max_item_.emplace(conditional_forward<FwdSk>(*other.max_item_));
|
|
212
194
|
} else {
|
|
213
195
|
if (comparator_(*other.min_item_, *min_item_)) *min_item_ = conditional_forward<FwdSk>(*other.min_item_);
|
|
214
196
|
if (comparator_(*max_item_, *other.max_item_)) *max_item_ = conditional_forward<FwdSk>(*other.max_item_);
|
|
@@ -283,25 +265,6 @@ auto req_sketch<T, C, A>::get_quantile(double rank, bool inclusive) const -> qua
|
|
|
283
265
|
return sorted_view_->get_quantile(rank, inclusive);
|
|
284
266
|
}
|
|
285
267
|
|
|
286
|
-
template<typename T, typename C, typename A>
|
|
287
|
-
std::vector<T, A> req_sketch<T, C, A>::get_quantiles(const double* ranks, uint32_t size, bool inclusive) const {
|
|
288
|
-
if (is_empty()) throw std::runtime_error("operation is undefined for an empty sketch");
|
|
289
|
-
std::vector<T, A> quantiles(allocator_);
|
|
290
|
-
quantiles.reserve(size);
|
|
291
|
-
|
|
292
|
-
// possible side-effect of sorting level zero
|
|
293
|
-
setup_sorted_view();
|
|
294
|
-
|
|
295
|
-
for (uint32_t i = 0; i < size; ++i) {
|
|
296
|
-
const double rank = ranks[i];
|
|
297
|
-
if ((rank < 0.0) || (rank > 1.0)) {
|
|
298
|
-
throw std::invalid_argument("Normalized rank cannot be less than 0 or greater than 1");
|
|
299
|
-
}
|
|
300
|
-
quantiles.push_back(sorted_view_->get_quantile(rank, inclusive));
|
|
301
|
-
}
|
|
302
|
-
return quantiles;
|
|
303
|
-
}
|
|
304
|
-
|
|
305
268
|
template<typename T, typename C, typename A>
|
|
306
269
|
quantiles_sorted_view<T, C, A> req_sketch<T, C, A>::get_sorted_view() const {
|
|
307
270
|
if (!compactors_[0].is_sorted()) {
|
|
@@ -310,7 +273,7 @@ quantiles_sorted_view<T, C, A> req_sketch<T, C, A>::get_sorted_view() const {
|
|
|
310
273
|
quantiles_sorted_view<T, C, A> view(get_num_retained(), comparator_, allocator_);
|
|
311
274
|
|
|
312
275
|
for (auto& compactor: compactors_) {
|
|
313
|
-
view.add(compactor.begin(), compactor.end(),
|
|
276
|
+
view.add(compactor.begin(), compactor.end(), 1ULL << compactor.get_lg_weight());
|
|
314
277
|
}
|
|
315
278
|
|
|
316
279
|
view.convert_to_cummulative();
|
|
@@ -426,8 +389,8 @@ void req_sketch<T, C, A>::serialize(std::ostream& os, const SerDe& sd) const {
|
|
|
426
389
|
if (is_empty()) return;
|
|
427
390
|
if (is_estimation_mode()) {
|
|
428
391
|
write(os, n_);
|
|
429
|
-
sd.serialize(os, min_item_, 1);
|
|
430
|
-
sd.serialize(os, max_item_, 1);
|
|
392
|
+
sd.serialize(os, &*min_item_, 1);
|
|
393
|
+
sd.serialize(os, &*max_item_, 1);
|
|
431
394
|
}
|
|
432
395
|
if (raw_items) {
|
|
433
396
|
sd.serialize(os, compactors_[0].begin(), num_raw_items);
|
|
@@ -466,8 +429,8 @@ auto req_sketch<T, C, A>::serialize(unsigned header_size_bytes, const SerDe& sd)
|
|
|
466
429
|
if (!is_empty()) {
|
|
467
430
|
if (is_estimation_mode()) {
|
|
468
431
|
ptr += copy_to_mem(n_, ptr);
|
|
469
|
-
ptr += sd.serialize(ptr, end_ptr - ptr, min_item_, 1);
|
|
470
|
-
ptr += sd.serialize(ptr, end_ptr - ptr, max_item_, 1);
|
|
432
|
+
ptr += sd.serialize(ptr, end_ptr - ptr, &*min_item_, 1);
|
|
433
|
+
ptr += sd.serialize(ptr, end_ptr - ptr, &*max_item_, 1);
|
|
471
434
|
}
|
|
472
435
|
if (raw_items) {
|
|
473
436
|
ptr += sd.serialize(ptr, end_ptr - ptr, compactors_[0].begin(), num_raw_items);
|
|
@@ -498,12 +461,9 @@ req_sketch<T, C, A> req_sketch<T, C, A>::deserialize(std::istream& is, const Ser
|
|
|
498
461
|
const bool hra = flags_byte & (1 << flags::IS_HIGH_RANK);
|
|
499
462
|
if (is_empty) return req_sketch(k, hra, comparator, allocator);
|
|
500
463
|
|
|
501
|
-
|
|
502
|
-
|
|
503
|
-
|
|
504
|
-
std::unique_ptr<T, decltype(item_buffer_deleter)> max_item_buffer(alloc.allocate(1), item_buffer_deleter);
|
|
505
|
-
std::unique_ptr<T, item_deleter> min_item(nullptr, item_deleter(allocator));
|
|
506
|
-
std::unique_ptr<T, item_deleter> max_item(nullptr, item_deleter(allocator));
|
|
464
|
+
optional<T> tmp; // space to deserialize min and max
|
|
465
|
+
optional<T> min_item;
|
|
466
|
+
optional<T> max_item;
|
|
507
467
|
|
|
508
468
|
const bool raw_items = flags_byte & (1 << flags::RAW_ITEMS);
|
|
509
469
|
const bool is_level_0_sorted = flags_byte & (1 << flags::IS_LEVEL_ZERO_SORTED);
|
|
@@ -512,12 +472,14 @@ req_sketch<T, C, A> req_sketch<T, C, A>::deserialize(std::istream& is, const Ser
|
|
|
512
472
|
uint64_t n = 1;
|
|
513
473
|
if (num_levels > 1) {
|
|
514
474
|
n = read<uint64_t>(is);
|
|
515
|
-
sd.deserialize(is,
|
|
516
|
-
// serde call did not throw, repackage
|
|
517
|
-
min_item
|
|
518
|
-
|
|
519
|
-
|
|
520
|
-
|
|
475
|
+
sd.deserialize(is, &*tmp, 1);
|
|
476
|
+
// serde call did not throw, repackage and cleanup
|
|
477
|
+
min_item.emplace(*tmp);
|
|
478
|
+
(*tmp).~T();
|
|
479
|
+
sd.deserialize(is, &*tmp, 1);
|
|
480
|
+
// serde call did not throw, repackage and cleanup
|
|
481
|
+
max_item.emplace(*tmp);
|
|
482
|
+
(*tmp).~T();
|
|
521
483
|
}
|
|
522
484
|
|
|
523
485
|
if (raw_items) {
|
|
@@ -537,12 +499,8 @@ req_sketch<T, C, A> req_sketch<T, C, A>::deserialize(std::istream& is, const Ser
|
|
|
537
499
|
if (comparator(*it, *min_it)) min_it = it;
|
|
538
500
|
if (comparator(*max_it, *it)) max_it = it;
|
|
539
501
|
}
|
|
540
|
-
|
|
541
|
-
|
|
542
|
-
min_item = std::unique_ptr<T, item_deleter>(min_item_buffer.release(), item_deleter(allocator));
|
|
543
|
-
new (max_item_buffer.get()) T(*max_it);
|
|
544
|
-
// copy did not throw, repackage with destrtuctor
|
|
545
|
-
max_item = std::unique_ptr<T, item_deleter>(max_item_buffer.release(), item_deleter(allocator));
|
|
502
|
+
min_item.emplace(*min_it);
|
|
503
|
+
max_item.emplace(*max_it);
|
|
546
504
|
}
|
|
547
505
|
|
|
548
506
|
if (!is.good()) throw std::runtime_error("error reading from std::istream");
|
|
@@ -579,12 +537,9 @@ req_sketch<T, C, A> req_sketch<T, C, A>::deserialize(const void* bytes, size_t s
|
|
|
579
537
|
const bool hra = flags_byte & (1 << flags::IS_HIGH_RANK);
|
|
580
538
|
if (is_empty) return req_sketch(k, hra, comparator, allocator);
|
|
581
539
|
|
|
582
|
-
|
|
583
|
-
|
|
584
|
-
|
|
585
|
-
std::unique_ptr<T, decltype(item_buffer_deleter)> max_item_buffer(alloc.allocate(1), item_buffer_deleter);
|
|
586
|
-
std::unique_ptr<T, item_deleter> min_item(nullptr, item_deleter(allocator));
|
|
587
|
-
std::unique_ptr<T, item_deleter> max_item(nullptr, item_deleter(allocator));
|
|
540
|
+
optional<T> tmp; // space to deserialize min and max
|
|
541
|
+
optional<T> min_item;
|
|
542
|
+
optional<T> max_item;
|
|
588
543
|
|
|
589
544
|
const bool raw_items = flags_byte & (1 << flags::RAW_ITEMS);
|
|
590
545
|
const bool is_level_0_sorted = flags_byte & (1 << flags::IS_LEVEL_ZERO_SORTED);
|
|
@@ -594,12 +549,14 @@ req_sketch<T, C, A> req_sketch<T, C, A>::deserialize(const void* bytes, size_t s
|
|
|
594
549
|
if (num_levels > 1) {
|
|
595
550
|
ensure_minimum_memory(end_ptr - ptr, sizeof(n));
|
|
596
551
|
ptr += copy_from_mem(ptr, n);
|
|
597
|
-
ptr += sd.deserialize(ptr, end_ptr - ptr,
|
|
598
|
-
// serde call did not throw, repackage
|
|
599
|
-
min_item
|
|
600
|
-
|
|
601
|
-
|
|
602
|
-
|
|
552
|
+
ptr += sd.deserialize(ptr, end_ptr - ptr, &*tmp, 1);
|
|
553
|
+
// serde call did not throw, repackage and cleanup
|
|
554
|
+
min_item.emplace(*tmp);
|
|
555
|
+
(*tmp).~T();
|
|
556
|
+
ptr += sd.deserialize(ptr, end_ptr - ptr, &*tmp, 1);
|
|
557
|
+
// serde call did not throw, repackage and cleanup
|
|
558
|
+
max_item.emplace(*tmp);
|
|
559
|
+
(*tmp).~T();
|
|
603
560
|
}
|
|
604
561
|
|
|
605
562
|
if (raw_items) {
|
|
@@ -623,12 +580,8 @@ req_sketch<T, C, A> req_sketch<T, C, A>::deserialize(const void* bytes, size_t s
|
|
|
623
580
|
if (comparator(*it, *min_it)) min_it = it;
|
|
624
581
|
if (comparator(*max_it, *it)) max_it = it;
|
|
625
582
|
}
|
|
626
|
-
|
|
627
|
-
|
|
628
|
-
min_item = std::unique_ptr<T, item_deleter>(min_item_buffer.release(), item_deleter(allocator));
|
|
629
|
-
new (max_item_buffer.get()) T(*max_it);
|
|
630
|
-
// copy did not throw, repackage with destrtuctor
|
|
631
|
-
max_item = std::unique_ptr<T, item_deleter>(max_item_buffer.release(), item_deleter(allocator));
|
|
583
|
+
min_item.emplace(*min_it);
|
|
584
|
+
max_item.emplace(*max_it);
|
|
632
585
|
}
|
|
633
586
|
|
|
634
587
|
return req_sketch(k, hra, n, std::move(min_item), std::move(max_item), std::move(compactors), comparator);
|
|
@@ -721,23 +674,9 @@ string<A> req_sketch<T, C, A>::to_string(bool print_levels, bool print_items) co
|
|
|
721
674
|
return string<A>(os.str().c_str(), allocator_);
|
|
722
675
|
}
|
|
723
676
|
|
|
724
|
-
template<typename T, typename C, typename A>
|
|
725
|
-
class req_sketch<T, C, A>::item_deleter {
|
|
726
|
-
public:
|
|
727
|
-
item_deleter(const A& allocator): allocator_(allocator) {}
|
|
728
|
-
void operator() (T* ptr) {
|
|
729
|
-
if (ptr != nullptr) {
|
|
730
|
-
ptr->~T();
|
|
731
|
-
allocator_.deallocate(ptr, 1);
|
|
732
|
-
}
|
|
733
|
-
}
|
|
734
|
-
private:
|
|
735
|
-
A allocator_;
|
|
736
|
-
};
|
|
737
|
-
|
|
738
677
|
template<typename T, typename C, typename A>
|
|
739
678
|
req_sketch<T, C, A>::req_sketch(uint16_t k, bool hra, uint64_t n,
|
|
740
|
-
|
|
679
|
+
optional<T>&& min_item, optional<T>&& max_item,
|
|
741
680
|
std::vector<Compactor, AllocCompactor>&& compactors, const C& comparator):
|
|
742
681
|
comparator_(comparator),
|
|
743
682
|
allocator_(compactors.get_allocator()),
|
|
@@ -747,8 +686,8 @@ max_nom_size_(0),
|
|
|
747
686
|
num_retained_(0),
|
|
748
687
|
n_(n),
|
|
749
688
|
compactors_(std::move(compactors)),
|
|
750
|
-
min_item_(min_item
|
|
751
|
-
max_item_(max_item
|
|
689
|
+
min_item_(std::move(min_item)),
|
|
690
|
+
max_item_(std::move(max_item)),
|
|
752
691
|
sorted_view_(nullptr)
|
|
753
692
|
{
|
|
754
693
|
update_max_nom_size();
|
|
@@ -20,7 +20,6 @@ add_executable(req_test)
|
|
|
20
20
|
target_link_libraries(req_test req common_test_lib)
|
|
21
21
|
|
|
22
22
|
set_target_properties(req_test PROPERTIES
|
|
23
|
-
CXX_STANDARD 11
|
|
24
23
|
CXX_STANDARD_REQUIRED YES
|
|
25
24
|
)
|
|
26
25
|
|
|
@@ -41,3 +40,17 @@ target_sources(req_test
|
|
|
41
40
|
req_sketch_test.cpp
|
|
42
41
|
req_sketch_custom_type_test.cpp
|
|
43
42
|
)
|
|
43
|
+
|
|
44
|
+
if (SERDE_COMPAT)
|
|
45
|
+
target_sources(req_test
|
|
46
|
+
PRIVATE
|
|
47
|
+
req_sketch_deserialize_from_java_test.cpp
|
|
48
|
+
)
|
|
49
|
+
endif()
|
|
50
|
+
|
|
51
|
+
if (GENERATE)
|
|
52
|
+
target_sources(req_test
|
|
53
|
+
PRIVATE
|
|
54
|
+
req_sketch_serialize_for_java.cpp
|
|
55
|
+
)
|
|
56
|
+
endif()
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* Licensed to the Apache Software Foundation (ASF) under one
|
|
3
|
+
* or more contributor license agreements. See the NOTICE file
|
|
4
|
+
* distributed with this work for additional information
|
|
5
|
+
* regarding copyright ownership. The ASF licenses this file
|
|
6
|
+
* to you under the Apache License, Version 2.0 (the
|
|
7
|
+
* "License"); you may not use this file except in compliance
|
|
8
|
+
* with the License. You may obtain a copy of the License at
|
|
9
|
+
*
|
|
10
|
+
* http://www.apache.org/licenses/LICENSE-2.0
|
|
11
|
+
*
|
|
12
|
+
* Unless required by applicable law or agreed to in writing,
|
|
13
|
+
* software distributed under the License is distributed on an
|
|
14
|
+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
15
|
+
* KIND, either express or implied. See the License for the
|
|
16
|
+
* specific language governing permissions and limitations
|
|
17
|
+
* under the License.
|
|
18
|
+
*/
|
|
19
|
+
|
|
20
|
+
#include <catch2/catch.hpp>
|
|
21
|
+
#include <fstream>
|
|
22
|
+
#include <req_sketch.hpp>
|
|
23
|
+
|
|
24
|
+
namespace datasketches {
|
|
25
|
+
|
|
26
|
+
// assume the binary sketches for this test have been generated by datasketches-java code
|
|
27
|
+
// in the subdirectory called "java" in the root directory of this project
|
|
28
|
+
static std::string testBinaryInputPath = std::string(TEST_BINARY_INPUT_PATH) + "../../java/";
|
|
29
|
+
|
|
30
|
+
TEST_CASE("req float", "[serde_compat]") {
|
|
31
|
+
const unsigned n_arr[] = {0, 1, 10, 100, 1000, 10000, 100000, 1000000};
|
|
32
|
+
for (const unsigned n: n_arr) {
|
|
33
|
+
std::ifstream is;
|
|
34
|
+
is.exceptions(std::ios::failbit | std::ios::badbit);
|
|
35
|
+
is.open(testBinaryInputPath + "req_float_n" + std::to_string(n) + "_java.sk", std::ios::binary);
|
|
36
|
+
const auto sketch = req_sketch<float>::deserialize(is);
|
|
37
|
+
REQUIRE(sketch.is_HRA());
|
|
38
|
+
REQUIRE(sketch.is_empty() == (n == 0));
|
|
39
|
+
REQUIRE(sketch.is_estimation_mode() == (n > 10));
|
|
40
|
+
REQUIRE(sketch.get_n() == n);
|
|
41
|
+
if (n > 0) {
|
|
42
|
+
REQUIRE(sketch.get_min_item() == 1.0f);
|
|
43
|
+
REQUIRE(sketch.get_max_item() == static_cast<float>(n));
|
|
44
|
+
uint64_t weight = 0;
|
|
45
|
+
for (const auto pair: sketch) {
|
|
46
|
+
REQUIRE(pair.first >= sketch.get_min_item());
|
|
47
|
+
REQUIRE(pair.first <= sketch.get_max_item());
|
|
48
|
+
weight += pair.second;
|
|
49
|
+
}
|
|
50
|
+
REQUIRE(weight == sketch.get_n());
|
|
51
|
+
}
|
|
52
|
+
}
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
} /* namespace datasketches */
|
|
@@ -17,15 +17,20 @@
|
|
|
17
17
|
* under the License.
|
|
18
18
|
*/
|
|
19
19
|
|
|
20
|
-
|
|
20
|
+
#include <catch2/catch.hpp>
|
|
21
|
+
#include <fstream>
|
|
22
|
+
#include <req_sketch.hpp>
|
|
21
23
|
|
|
22
|
-
|
|
23
|
-
array_of_doubles_intersection<P, A>::array_of_doubles_intersection(uint64_t seed, const P& policy, const A& allocator):
|
|
24
|
-
Base(seed, policy, allocator) {}
|
|
24
|
+
namespace datasketches {
|
|
25
25
|
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
26
|
+
TEST_CASE("req sketch float generate", "[serialize_for_java]") {
|
|
27
|
+
const unsigned n_arr[] = {0, 1, 10, 100, 1000, 10000, 100000, 1000000};
|
|
28
|
+
for (const unsigned n: n_arr) {
|
|
29
|
+
req_sketch<float> sketch(12);
|
|
30
|
+
for (unsigned i = 1; i <= n; ++i) sketch.update(i);
|
|
31
|
+
std::ofstream os("req_float_n" + std::to_string(n) + "_cpp.sk", std::ios::binary);
|
|
32
|
+
sketch.serialize(os);
|
|
33
|
+
}
|
|
29
34
|
}
|
|
30
35
|
|
|
31
36
|
} /* namespace datasketches */
|
|
@@ -47,8 +47,6 @@ TEST_CASE("req sketch: empty", "[req_sketch]") {
|
|
|
47
47
|
REQUIRE_THROWS_AS(sketch.get_max_item(), std::runtime_error);
|
|
48
48
|
REQUIRE_THROWS_AS(sketch.get_rank(0), std::runtime_error);
|
|
49
49
|
REQUIRE_THROWS_AS(sketch.get_quantile(0), std::runtime_error);
|
|
50
|
-
const double ranks[3] {0, 0.5, 1};
|
|
51
|
-
REQUIRE_THROWS_AS(sketch.get_quantiles(ranks, 3), std::runtime_error);
|
|
52
50
|
|
|
53
51
|
const float split_points[1] {0};
|
|
54
52
|
REQUIRE_THROWS_AS(sketch.get_CDF(split_points, 1), std::runtime_error);
|
|
@@ -71,13 +69,6 @@ TEST_CASE("req sketch: single value, lra", "[req_sketch]") {
|
|
|
71
69
|
REQUIRE(sketch.get_quantile(0.5, false) == 1);
|
|
72
70
|
REQUIRE(sketch.get_quantile(1, false) == 1);
|
|
73
71
|
|
|
74
|
-
const double ranks[3] {0, 0.5, 1};
|
|
75
|
-
auto quantiles = sketch.get_quantiles(ranks, 3);
|
|
76
|
-
REQUIRE(quantiles.size() == 3);
|
|
77
|
-
REQUIRE(quantiles[0] == 1);
|
|
78
|
-
REQUIRE(quantiles[1] == 1);
|
|
79
|
-
REQUIRE(quantiles[2] == 1);
|
|
80
|
-
|
|
81
72
|
unsigned count = 0;
|
|
82
73
|
for (auto pair: sketch) {
|
|
83
74
|
REQUIRE(pair.second == 1);
|
|
@@ -145,13 +136,6 @@ TEST_CASE("req sketch: exact mode", "[req_sketch]") {
|
|
|
145
136
|
REQUIRE(sketch.get_quantile(0.9) == 9);
|
|
146
137
|
REQUIRE(sketch.get_quantile(1) == 10);
|
|
147
138
|
|
|
148
|
-
const double ranks[3] {0, 0.5, 1};
|
|
149
|
-
auto quantiles = sketch.get_quantiles(ranks, 3);
|
|
150
|
-
REQUIRE(quantiles.size() == 3);
|
|
151
|
-
REQUIRE(quantiles[0] == 1);
|
|
152
|
-
REQUIRE(quantiles[1] == 5);
|
|
153
|
-
REQUIRE(quantiles[2] == 10);
|
|
154
|
-
|
|
155
139
|
const float splits[3] {2, 6, 9};
|
|
156
140
|
auto cdf = sketch.get_CDF(splits, 3, false);
|
|
157
141
|
REQUIRE(cdf[0] == 0.1);
|
|
@@ -356,76 +340,6 @@ TEST_CASE("req sketch: serialize deserialize stream and bytes equivalence", "[re
|
|
|
356
340
|
REQUIRE(sketch2.get_max_item() == sketch.get_max_item());
|
|
357
341
|
}
|
|
358
342
|
|
|
359
|
-
TEST_CASE("req sketch: stream deserialize from Java - empty", "[req_sketch]") {
|
|
360
|
-
std::ifstream is;
|
|
361
|
-
is.exceptions(std::ios::failbit | std::ios::badbit);
|
|
362
|
-
is.open(input_path + "req_float_empty_from_java.sk", std::ios::binary);
|
|
363
|
-
auto sketch = req_sketch<float>::deserialize(is);
|
|
364
|
-
REQUIRE(sketch.is_empty());
|
|
365
|
-
REQUIRE_FALSE(sketch.is_estimation_mode());
|
|
366
|
-
REQUIRE(sketch.get_n() == 0);
|
|
367
|
-
REQUIRE(sketch.get_num_retained() == 0);
|
|
368
|
-
REQUIRE_THROWS_AS(sketch.get_min_item(), std::runtime_error);
|
|
369
|
-
REQUIRE_THROWS_AS(sketch.get_max_item(), std::runtime_error);
|
|
370
|
-
}
|
|
371
|
-
|
|
372
|
-
TEST_CASE("req sketch: stream deserialize from Java - single item", "[req_sketch]") {
|
|
373
|
-
std::ifstream is;
|
|
374
|
-
is.exceptions(std::ios::failbit | std::ios::badbit);
|
|
375
|
-
is.open(input_path + "req_float_single_item_from_java.sk", std::ios::binary);
|
|
376
|
-
auto sketch = req_sketch<float>::deserialize(is);
|
|
377
|
-
REQUIRE_FALSE(sketch.is_empty());
|
|
378
|
-
REQUIRE_FALSE(sketch.is_estimation_mode());
|
|
379
|
-
REQUIRE(sketch.get_n() == 1);
|
|
380
|
-
REQUIRE(sketch.get_num_retained() == 1);
|
|
381
|
-
REQUIRE(sketch.get_min_item() == 1);
|
|
382
|
-
REQUIRE(sketch.get_max_item() == 1);
|
|
383
|
-
REQUIRE(sketch.get_rank(1.0f, false) == 0);
|
|
384
|
-
REQUIRE(sketch.get_rank(1.0f) == 1);
|
|
385
|
-
}
|
|
386
|
-
|
|
387
|
-
TEST_CASE("req sketch: stream deserialize from Java - raw items", "[req_sketch]") {
|
|
388
|
-
std::ifstream is;
|
|
389
|
-
is.exceptions(std::ios::failbit | std::ios::badbit);
|
|
390
|
-
is.open(input_path + "req_float_raw_items_from_java.sk", std::ios::binary);
|
|
391
|
-
auto sketch = req_sketch<float>::deserialize(is);
|
|
392
|
-
REQUIRE_FALSE(sketch.is_empty());
|
|
393
|
-
REQUIRE_FALSE(sketch.is_estimation_mode());
|
|
394
|
-
REQUIRE(sketch.get_n() == 4);
|
|
395
|
-
REQUIRE(sketch.get_num_retained() == 4);
|
|
396
|
-
REQUIRE(sketch.get_min_item() == 0);
|
|
397
|
-
REQUIRE(sketch.get_max_item() == 3);
|
|
398
|
-
REQUIRE(sketch.get_rank(2.0f, false) == 0.5);
|
|
399
|
-
}
|
|
400
|
-
|
|
401
|
-
TEST_CASE("req sketch: stream deserialize from Java - exact mode", "[req_sketch]") {
|
|
402
|
-
std::ifstream is;
|
|
403
|
-
is.exceptions(std::ios::failbit | std::ios::badbit);
|
|
404
|
-
is.open(input_path + "req_float_exact_from_java.sk", std::ios::binary);
|
|
405
|
-
auto sketch = req_sketch<float>::deserialize(is);
|
|
406
|
-
REQUIRE_FALSE(sketch.is_empty());
|
|
407
|
-
REQUIRE_FALSE(sketch.is_estimation_mode());
|
|
408
|
-
REQUIRE(sketch.get_n() == 100);
|
|
409
|
-
REQUIRE(sketch.get_num_retained() == 100);
|
|
410
|
-
REQUIRE(sketch.get_min_item() == 0);
|
|
411
|
-
REQUIRE(sketch.get_max_item() == 99);
|
|
412
|
-
REQUIRE(sketch.get_rank(50.0f, false) == 0.5);
|
|
413
|
-
}
|
|
414
|
-
|
|
415
|
-
TEST_CASE("req sketch: stream deserialize from Java - estimation mode", "[req_sketch]") {
|
|
416
|
-
std::ifstream is;
|
|
417
|
-
is.exceptions(std::ios::failbit | std::ios::badbit);
|
|
418
|
-
is.open(input_path + "req_float_estimation_from_java.sk", std::ios::binary);
|
|
419
|
-
auto sketch = req_sketch<float>::deserialize(is);
|
|
420
|
-
REQUIRE_FALSE(sketch.is_empty());
|
|
421
|
-
REQUIRE(sketch.is_estimation_mode());
|
|
422
|
-
REQUIRE(sketch.get_n() == 10000);
|
|
423
|
-
REQUIRE(sketch.get_num_retained() == 2942);
|
|
424
|
-
REQUIRE(sketch.get_min_item() == 0);
|
|
425
|
-
REQUIRE(sketch.get_max_item() == 9999);
|
|
426
|
-
REQUIRE(sketch.get_rank(5000.0f, false) == 0.5);
|
|
427
|
-
}
|
|
428
|
-
|
|
429
343
|
TEST_CASE("req sketch: merge into empty", "[req_sketch]") {
|
|
430
344
|
req_sketch<float> sketch1(40);
|
|
431
345
|
|
|
@@ -555,11 +469,11 @@ TEST_CASE("req sketch: type conversion - custom types") {
|
|
|
555
469
|
|
|
556
470
|
TEST_CASE("get_rank equivalence") {
|
|
557
471
|
req_sketch<int> sketch(12);
|
|
558
|
-
const
|
|
559
|
-
for (
|
|
472
|
+
const int n = 1000;
|
|
473
|
+
for (int i = 0; i < n; ++i) sketch.update(i);
|
|
560
474
|
REQUIRE(sketch.get_n() == n);
|
|
561
475
|
auto view = sketch.get_sorted_view();
|
|
562
|
-
for (
|
|
476
|
+
for (int i = 0; i < n; ++i) {
|
|
563
477
|
REQUIRE(sketch.get_rank(i) == view.get_rank(i));
|
|
564
478
|
}
|
|
565
479
|
}
|
|
@@ -41,4 +41,8 @@ install(FILES
|
|
|
41
41
|
include/var_opt_sketch_impl.hpp
|
|
42
42
|
include/var_opt_union.hpp
|
|
43
43
|
include/var_opt_union_impl.hpp
|
|
44
|
+
include/ebpps_sample.hpp
|
|
45
|
+
include/ebpps_sample_impl.hpp
|
|
46
|
+
include/ebpps_sketch.hpp
|
|
47
|
+
include/ebpps_sketch_impl.hpp
|
|
44
48
|
DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/DataSketches")
|