datasketches 0.3.2 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +6 -0
- data/NOTICE +1 -1
- data/README.md +0 -2
- data/ext/datasketches/cpc_wrapper.cpp +2 -2
- data/ext/datasketches/kll_wrapper.cpp +0 -10
- data/lib/datasketches/version.rb +1 -1
- data/lib/datasketches.rb +1 -1
- data/vendor/datasketches-cpp/CMakeLists.txt +1 -0
- data/vendor/datasketches-cpp/CODE_OF_CONDUCT.md +3 -0
- data/vendor/datasketches-cpp/CONTRIBUTING.md +50 -0
- data/vendor/datasketches-cpp/Doxyfile +2827 -0
- data/vendor/datasketches-cpp/LICENSE +0 -76
- data/vendor/datasketches-cpp/NOTICE +1 -1
- data/vendor/datasketches-cpp/README.md +1 -3
- data/vendor/datasketches-cpp/common/CMakeLists.txt +12 -11
- data/vendor/datasketches-cpp/common/include/common_defs.hpp +11 -8
- data/vendor/datasketches-cpp/common/include/count_zeros.hpp +0 -2
- data/vendor/datasketches-cpp/common/include/kolmogorov_smirnov.hpp +9 -6
- data/vendor/datasketches-cpp/common/include/optional.hpp +148 -0
- data/vendor/datasketches-cpp/common/include/quantiles_sorted_view.hpp +95 -2
- data/vendor/datasketches-cpp/common/include/quantiles_sorted_view_impl.hpp +1 -1
- data/vendor/datasketches-cpp/common/include/serde.hpp +69 -20
- data/vendor/datasketches-cpp/common/test/CMakeLists.txt +1 -1
- data/vendor/datasketches-cpp/common/test/optional_test.cpp +85 -0
- data/vendor/datasketches-cpp/common/test/test_allocator.hpp +14 -14
- data/vendor/datasketches-cpp/count/include/count_min.hpp +132 -78
- data/vendor/datasketches-cpp/count/include/count_min_impl.hpp +132 -152
- data/vendor/datasketches-cpp/count/test/CMakeLists.txt +11 -12
- data/vendor/datasketches-cpp/count/test/count_min_allocation_test.cpp +61 -61
- data/vendor/datasketches-cpp/count/test/count_min_test.cpp +175 -178
- data/vendor/datasketches-cpp/cpc/include/cpc_common.hpp +14 -20
- data/vendor/datasketches-cpp/cpc/include/cpc_compressor.hpp +7 -4
- data/vendor/datasketches-cpp/cpc/include/cpc_compressor_impl.hpp +17 -17
- data/vendor/datasketches-cpp/cpc/include/cpc_sketch.hpp +40 -40
- data/vendor/datasketches-cpp/cpc/include/cpc_sketch_impl.hpp +13 -10
- data/vendor/datasketches-cpp/cpc/include/cpc_union.hpp +35 -11
- data/vendor/datasketches-cpp/cpc/include/cpc_union_impl.hpp +8 -8
- data/vendor/datasketches-cpp/cpc/include/u32_table.hpp +3 -2
- data/vendor/datasketches-cpp/cpc/include/u32_table_impl.hpp +5 -5
- data/vendor/datasketches-cpp/cpc/test/CMakeLists.txt +20 -7
- data/vendor/datasketches-cpp/cpc/test/cpc_sketch_deserialize_from_java_test.cpp +60 -0
- data/vendor/datasketches-cpp/{python/include/py_object_lt.hpp → cpc/test/cpc_sketch_serialize_for_java.cpp} +15 -14
- data/vendor/datasketches-cpp/cpc/test/cpc_sketch_test.cpp +4 -29
- data/vendor/datasketches-cpp/cpc/test/cpc_union_test.cpp +4 -4
- data/vendor/datasketches-cpp/density/include/density_sketch.hpp +29 -9
- data/vendor/datasketches-cpp/density/include/density_sketch_impl.hpp +1 -1
- data/vendor/datasketches-cpp/density/test/CMakeLists.txt +0 -1
- data/vendor/datasketches-cpp/fi/include/frequent_items_sketch.hpp +21 -9
- data/vendor/datasketches-cpp/fi/include/frequent_items_sketch_impl.hpp +6 -4
- data/vendor/datasketches-cpp/fi/test/CMakeLists.txt +14 -1
- data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_deserialize_from_java_test.cpp +95 -0
- data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_serialize_for_java.cpp +83 -0
- data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_test.cpp +3 -42
- data/vendor/datasketches-cpp/hll/include/CouponList-internal.hpp +2 -2
- data/vendor/datasketches-cpp/hll/include/CouponList.hpp +3 -1
- data/vendor/datasketches-cpp/hll/include/HllArray-internal.hpp +3 -3
- data/vendor/datasketches-cpp/hll/include/HllArray.hpp +5 -3
- data/vendor/datasketches-cpp/hll/include/HllSketch-internal.hpp +4 -4
- data/vendor/datasketches-cpp/hll/include/HllSketchImpl.hpp +3 -1
- data/vendor/datasketches-cpp/hll/include/HllUtil.hpp +0 -12
- data/vendor/datasketches-cpp/hll/include/hll.hpp +70 -57
- data/vendor/datasketches-cpp/hll/test/CMakeLists.txt +14 -1
- data/vendor/datasketches-cpp/hll/test/ToFromByteArrayTest.cpp +0 -68
- data/vendor/datasketches-cpp/hll/test/hll_sketch_deserialize_from_java_test.cpp +69 -0
- data/vendor/datasketches-cpp/hll/test/hll_sketch_serialize_for_java.cpp +52 -0
- data/vendor/datasketches-cpp/kll/include/kll_helper_impl.hpp +2 -2
- data/vendor/datasketches-cpp/kll/include/kll_sketch.hpp +71 -50
- data/vendor/datasketches-cpp/kll/include/kll_sketch_impl.hpp +59 -130
- data/vendor/datasketches-cpp/kll/test/CMakeLists.txt +14 -1
- data/vendor/datasketches-cpp/kll/test/kll_sketch_deserialize_from_java_test.cpp +103 -0
- data/vendor/datasketches-cpp/kll/test/kll_sketch_serialize_for_java.cpp +62 -0
- data/vendor/datasketches-cpp/kll/test/kll_sketch_test.cpp +3 -38
- data/vendor/datasketches-cpp/quantiles/include/quantiles_sketch.hpp +68 -51
- data/vendor/datasketches-cpp/quantiles/include/quantiles_sketch_impl.hpp +62 -132
- data/vendor/datasketches-cpp/quantiles/test/CMakeLists.txt +14 -1
- data/vendor/datasketches-cpp/quantiles/test/quantiles_sketch_deserialize_from_java_test.cpp +84 -0
- data/vendor/datasketches-cpp/quantiles/test/quantiles_sketch_serialize_for_java.cpp +52 -0
- data/vendor/datasketches-cpp/quantiles/test/quantiles_sketch_test.cpp +14 -38
- data/vendor/datasketches-cpp/req/include/req_common.hpp +7 -3
- data/vendor/datasketches-cpp/req/include/req_compactor_impl.hpp +2 -2
- data/vendor/datasketches-cpp/req/include/req_sketch.hpp +97 -23
- data/vendor/datasketches-cpp/req/include/req_sketch_impl.hpp +48 -109
- data/vendor/datasketches-cpp/req/test/CMakeLists.txt +14 -1
- data/vendor/datasketches-cpp/req/test/req_sketch_deserialize_from_java_test.cpp +55 -0
- data/vendor/datasketches-cpp/{tuple/include/array_of_doubles_intersection_impl.hpp → req/test/req_sketch_serialize_for_java.cpp} +12 -7
- data/vendor/datasketches-cpp/req/test/req_sketch_test.cpp +3 -89
- data/vendor/datasketches-cpp/sampling/CMakeLists.txt +4 -0
- data/vendor/datasketches-cpp/sampling/include/ebpps_sample.hpp +210 -0
- data/vendor/datasketches-cpp/sampling/include/ebpps_sample_impl.hpp +535 -0
- data/vendor/datasketches-cpp/sampling/include/ebpps_sketch.hpp +281 -0
- data/vendor/datasketches-cpp/sampling/include/ebpps_sketch_impl.hpp +531 -0
- data/vendor/datasketches-cpp/sampling/include/var_opt_sketch.hpp +69 -26
- data/vendor/datasketches-cpp/sampling/include/var_opt_sketch_impl.hpp +3 -3
- data/vendor/datasketches-cpp/sampling/include/var_opt_union.hpp +10 -11
- data/vendor/datasketches-cpp/sampling/include/var_opt_union_impl.hpp +4 -4
- data/vendor/datasketches-cpp/sampling/test/CMakeLists.txt +55 -8
- data/vendor/datasketches-cpp/sampling/test/ebpps_allocation_test.cpp +96 -0
- data/vendor/datasketches-cpp/sampling/test/ebpps_sample_test.cpp +137 -0
- data/vendor/datasketches-cpp/sampling/test/ebpps_sketch_test.cpp +266 -0
- data/vendor/datasketches-cpp/sampling/test/var_opt_sketch_deserialize_from_java_test.cpp +81 -0
- data/vendor/datasketches-cpp/sampling/test/var_opt_sketch_serialize_for_java.cpp +54 -0
- data/vendor/datasketches-cpp/sampling/test/var_opt_sketch_test.cpp +0 -37
- data/vendor/datasketches-cpp/sampling/test/var_opt_union_deserialize_from_java_test.cpp +50 -0
- data/vendor/datasketches-cpp/sampling/test/var_opt_union_serialize_for_java.cpp +56 -0
- data/vendor/datasketches-cpp/sampling/test/var_opt_union_test.cpp +0 -18
- data/vendor/datasketches-cpp/theta/include/bit_packing.hpp +2608 -2608
- data/vendor/datasketches-cpp/theta/include/bounds_on_ratios_in_sampled_sets.hpp +1 -0
- data/vendor/datasketches-cpp/theta/include/bounds_on_ratios_in_theta_sketched_sets.hpp +7 -6
- data/vendor/datasketches-cpp/theta/include/theta_a_not_b.hpp +20 -5
- data/vendor/datasketches-cpp/theta/include/theta_constants.hpp +10 -4
- data/vendor/datasketches-cpp/theta/include/theta_helpers.hpp +1 -1
- data/vendor/datasketches-cpp/theta/include/theta_intersection.hpp +13 -5
- data/vendor/datasketches-cpp/theta/include/theta_intersection_base_impl.hpp +5 -5
- data/vendor/datasketches-cpp/theta/include/theta_intersection_impl.hpp +3 -3
- data/vendor/datasketches-cpp/theta/include/theta_jaccard_similarity.hpp +2 -1
- data/vendor/datasketches-cpp/theta/include/theta_jaccard_similarity_base.hpp +1 -0
- data/vendor/datasketches-cpp/theta/include/theta_set_difference_base_impl.hpp +1 -1
- data/vendor/datasketches-cpp/theta/include/theta_sketch.hpp +126 -27
- data/vendor/datasketches-cpp/theta/include/theta_sketch_impl.hpp +8 -8
- data/vendor/datasketches-cpp/theta/include/theta_union.hpp +17 -10
- data/vendor/datasketches-cpp/theta/include/theta_union_base_impl.hpp +1 -1
- data/vendor/datasketches-cpp/theta/include/theta_union_impl.hpp +3 -3
- data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base.hpp +5 -2
- data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base_impl.hpp +11 -1
- data/vendor/datasketches-cpp/theta/test/CMakeLists.txt +14 -1
- data/vendor/datasketches-cpp/theta/test/theta_sketch_deserialize_from_java_test.cpp +57 -0
- data/vendor/datasketches-cpp/theta/test/theta_sketch_serialize_for_java.cpp +61 -0
- data/vendor/datasketches-cpp/theta/test/theta_sketch_test.cpp +0 -188
- data/vendor/datasketches-cpp/tuple/CMakeLists.txt +8 -7
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_sketch.hpp +19 -144
- data/vendor/datasketches-cpp/tuple/include/{array_of_doubles_a_not_b.hpp → array_tuple_a_not_b.hpp} +24 -16
- data/vendor/datasketches-cpp/tuple/include/{array_of_doubles_a_not_b_impl.hpp → array_tuple_a_not_b_impl.hpp} +4 -4
- data/vendor/datasketches-cpp/tuple/include/array_tuple_intersection.hpp +65 -0
- data/vendor/datasketches-cpp/{python/include/py_object_ostream.hpp → tuple/include/array_tuple_intersection_impl.hpp} +7 -24
- data/vendor/datasketches-cpp/tuple/include/array_tuple_sketch.hpp +237 -0
- data/vendor/datasketches-cpp/tuple/include/{array_of_doubles_sketch_impl.hpp → array_tuple_sketch_impl.hpp} +40 -41
- data/vendor/datasketches-cpp/tuple/include/array_tuple_union.hpp +81 -0
- data/vendor/datasketches-cpp/tuple/include/array_tuple_union_impl.hpp +43 -0
- data/vendor/datasketches-cpp/tuple/include/tuple_a_not_b.hpp +11 -2
- data/vendor/datasketches-cpp/tuple/include/tuple_intersection.hpp +17 -10
- data/vendor/datasketches-cpp/tuple/include/tuple_jaccard_similarity.hpp +2 -1
- data/vendor/datasketches-cpp/tuple/include/tuple_sketch.hpp +95 -32
- data/vendor/datasketches-cpp/tuple/include/tuple_union.hpp +19 -11
- data/vendor/datasketches-cpp/tuple/test/CMakeLists.txt +16 -1
- data/vendor/datasketches-cpp/tuple/test/aod_sketch_deserialize_from_java_test.cpp +76 -0
- data/vendor/datasketches-cpp/tuple/test/aod_sketch_serialize_for_java.cpp +62 -0
- data/vendor/datasketches-cpp/tuple/test/array_of_doubles_sketch_test.cpp +5 -129
- data/vendor/datasketches-cpp/tuple/test/engagement_test.cpp +85 -89
- data/vendor/datasketches-cpp/tuple/test/tuple_jaccard_similarity_test.cpp +3 -1
- data/vendor/datasketches-cpp/tuple/test/tuple_sketch_deserialize_from_java_test.cpp +47 -0
- data/vendor/datasketches-cpp/tuple/test/tuple_sketch_serialize_for_java.cpp +38 -0
- data/vendor/datasketches-cpp/tuple/test/tuple_sketch_test.cpp +1 -1
- data/vendor/datasketches-cpp/version.cfg.in +1 -1
- metadata +47 -93
- data/vendor/datasketches-cpp/MANIFEST.in +0 -39
- data/vendor/datasketches-cpp/fi/test/items_sketch_string_from_java.sk +0 -0
- data/vendor/datasketches-cpp/fi/test/items_sketch_string_utf8_from_java.sk +0 -0
- data/vendor/datasketches-cpp/fi/test/longs_sketch_from_java.sk +0 -0
- data/vendor/datasketches-cpp/hll/test/array6_from_java.sk +0 -0
- data/vendor/datasketches-cpp/hll/test/compact_array4_from_java.sk +0 -0
- data/vendor/datasketches-cpp/hll/test/compact_set_from_java.sk +0 -0
- data/vendor/datasketches-cpp/hll/test/list_from_java.sk +0 -0
- data/vendor/datasketches-cpp/hll/test/updatable_array4_from_java.sk +0 -0
- data/vendor/datasketches-cpp/hll/test/updatable_set_from_java.sk +0 -0
- data/vendor/datasketches-cpp/kll/test/kll_sketch_from_java.sk +0 -0
- data/vendor/datasketches-cpp/pyproject.toml +0 -23
- data/vendor/datasketches-cpp/python/CMakeLists.txt +0 -87
- data/vendor/datasketches-cpp/python/README.md +0 -85
- data/vendor/datasketches-cpp/python/datasketches/DensityWrapper.py +0 -87
- data/vendor/datasketches-cpp/python/datasketches/KernelFunction.py +0 -35
- data/vendor/datasketches-cpp/python/datasketches/PySerDe.py +0 -110
- data/vendor/datasketches-cpp/python/datasketches/TuplePolicy.py +0 -77
- data/vendor/datasketches-cpp/python/datasketches/TupleWrapper.py +0 -205
- data/vendor/datasketches-cpp/python/datasketches/__init__.py +0 -38
- data/vendor/datasketches-cpp/python/include/kernel_function.hpp +0 -98
- data/vendor/datasketches-cpp/python/include/py_serde.hpp +0 -113
- data/vendor/datasketches-cpp/python/include/quantile_conditional.hpp +0 -104
- data/vendor/datasketches-cpp/python/include/tuple_policy.hpp +0 -136
- data/vendor/datasketches-cpp/python/jupyter/CPCSketch.ipynb +0 -345
- data/vendor/datasketches-cpp/python/jupyter/FrequentItemsSketch.ipynb +0 -354
- data/vendor/datasketches-cpp/python/jupyter/HLLSketch.ipynb +0 -346
- data/vendor/datasketches-cpp/python/jupyter/KLLSketch.ipynb +0 -463
- data/vendor/datasketches-cpp/python/jupyter/ThetaSketchNotebook.ipynb +0 -403
- data/vendor/datasketches-cpp/python/pybind11Path.cmd +0 -21
- data/vendor/datasketches-cpp/python/src/__init__.py +0 -18
- data/vendor/datasketches-cpp/python/src/count_wrapper.cpp +0 -101
- data/vendor/datasketches-cpp/python/src/cpc_wrapper.cpp +0 -76
- data/vendor/datasketches-cpp/python/src/datasketches.cpp +0 -58
- data/vendor/datasketches-cpp/python/src/density_wrapper.cpp +0 -95
- data/vendor/datasketches-cpp/python/src/fi_wrapper.cpp +0 -182
- data/vendor/datasketches-cpp/python/src/hll_wrapper.cpp +0 -126
- data/vendor/datasketches-cpp/python/src/kll_wrapper.cpp +0 -158
- data/vendor/datasketches-cpp/python/src/ks_wrapper.cpp +0 -68
- data/vendor/datasketches-cpp/python/src/py_serde.cpp +0 -112
- data/vendor/datasketches-cpp/python/src/quantiles_wrapper.cpp +0 -155
- data/vendor/datasketches-cpp/python/src/req_wrapper.cpp +0 -154
- data/vendor/datasketches-cpp/python/src/theta_wrapper.cpp +0 -166
- data/vendor/datasketches-cpp/python/src/tuple_wrapper.cpp +0 -215
- data/vendor/datasketches-cpp/python/src/vector_of_kll.cpp +0 -490
- data/vendor/datasketches-cpp/python/src/vo_wrapper.cpp +0 -173
- data/vendor/datasketches-cpp/python/tests/__init__.py +0 -16
- data/vendor/datasketches-cpp/python/tests/count_min_test.py +0 -86
- data/vendor/datasketches-cpp/python/tests/cpc_test.py +0 -64
- data/vendor/datasketches-cpp/python/tests/density_test.py +0 -93
- data/vendor/datasketches-cpp/python/tests/fi_test.py +0 -149
- data/vendor/datasketches-cpp/python/tests/hll_test.py +0 -129
- data/vendor/datasketches-cpp/python/tests/kll_test.py +0 -159
- data/vendor/datasketches-cpp/python/tests/quantiles_test.py +0 -160
- data/vendor/datasketches-cpp/python/tests/req_test.py +0 -159
- data/vendor/datasketches-cpp/python/tests/theta_test.py +0 -148
- data/vendor/datasketches-cpp/python/tests/tuple_test.py +0 -206
- data/vendor/datasketches-cpp/python/tests/vector_of_kll_test.py +0 -148
- data/vendor/datasketches-cpp/python/tests/vo_test.py +0 -132
- data/vendor/datasketches-cpp/req/test/req_float_empty_from_java.sk +0 -0
- data/vendor/datasketches-cpp/req/test/req_float_estimation_from_java.sk +0 -0
- data/vendor/datasketches-cpp/req/test/req_float_exact_from_java.sk +0 -0
- data/vendor/datasketches-cpp/req/test/req_float_raw_items_from_java.sk +0 -0
- data/vendor/datasketches-cpp/req/test/req_float_single_item_from_java.sk +0 -0
- data/vendor/datasketches-cpp/sampling/test/binaries_from_java.txt +0 -67
- data/vendor/datasketches-cpp/sampling/test/varopt_sketch_long_sampling.sk +0 -0
- data/vendor/datasketches-cpp/sampling/test/varopt_sketch_string_exact.sk +0 -0
- data/vendor/datasketches-cpp/sampling/test/varopt_union_double_sampling.sk +0 -0
- data/vendor/datasketches-cpp/setup.py +0 -110
- data/vendor/datasketches-cpp/theta/test/theta_compact_empty_from_java.sk +0 -0
- data/vendor/datasketches-cpp/theta/test/theta_compact_estimation_from_java.sk +0 -0
- data/vendor/datasketches-cpp/theta/test/theta_compact_exact_from_java.sk +0 -0
- data/vendor/datasketches-cpp/theta/test/theta_compact_single_item_from_java.sk +0 -0
- data/vendor/datasketches-cpp/tox.ini +0 -26
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_intersection.hpp +0 -52
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_union.hpp +0 -81
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_union_impl.hpp +0 -43
- data/vendor/datasketches-cpp/tuple/test/aod_1_compact_empty_from_java.sk +0 -1
- data/vendor/datasketches-cpp/tuple/test/aod_1_compact_estimation_from_java.sk +0 -0
- data/vendor/datasketches-cpp/tuple/test/aod_1_compact_non_empty_no_entries_from_java.sk +0 -0
- data/vendor/datasketches-cpp/tuple/test/aod_2_compact_exact_from_java.sk +0 -0
- data/vendor/datasketches-cpp/tuple/test/aod_3_compact_empty_from_java.sk +0 -1
|
@@ -27,7 +27,7 @@
|
|
|
27
27
|
|
|
28
28
|
namespace datasketches {
|
|
29
29
|
|
|
30
|
-
// forward
|
|
30
|
+
// forward declarations
|
|
31
31
|
template<typename S, typename A> class tuple_sketch;
|
|
32
32
|
template<typename S, typename U, typename P, typename A> class update_tuple_sketch;
|
|
33
33
|
template<typename S, typename A> class compact_tuple_sketch;
|
|
@@ -43,6 +43,10 @@ struct pair_extract_key {
|
|
|
43
43
|
}
|
|
44
44
|
};
|
|
45
45
|
|
|
46
|
+
/**
|
|
47
|
+
* Base class for Tuple sketch.
|
|
48
|
+
* This is an extension of Theta sketch that allows keeping arbitrary Summary associated with each retained key.
|
|
49
|
+
*/
|
|
46
50
|
template<
|
|
47
51
|
typename Summary,
|
|
48
52
|
typename Allocator = std::allocator<Summary>
|
|
@@ -190,7 +194,7 @@ protected:
|
|
|
190
194
|
|
|
191
195
|
// for types with defined default constructor and + operation
|
|
192
196
|
template<typename Summary, typename Update>
|
|
193
|
-
struct
|
|
197
|
+
struct default_tuple_update_policy {
|
|
194
198
|
Summary create() const {
|
|
195
199
|
return Summary();
|
|
196
200
|
}
|
|
@@ -199,10 +203,15 @@ struct default_update_policy {
|
|
|
199
203
|
}
|
|
200
204
|
};
|
|
201
205
|
|
|
206
|
+
/**
|
|
207
|
+
* Update Tuple sketch.
|
|
208
|
+
* The purpose of this class is to build a Tuple sketch from input data via the update() methods.
|
|
209
|
+
* There is no constructor. Use builder instead.
|
|
210
|
+
*/
|
|
202
211
|
template<
|
|
203
212
|
typename Summary,
|
|
204
213
|
typename Update = Summary,
|
|
205
|
-
typename Policy =
|
|
214
|
+
typename Policy = default_tuple_update_policy<Summary, Update>,
|
|
206
215
|
typename Allocator = std::allocator<Summary>
|
|
207
216
|
>
|
|
208
217
|
class update_tuple_sketch: public tuple_sketch<Summary, Allocator> {
|
|
@@ -244,21 +253,24 @@ public:
|
|
|
244
253
|
|
|
245
254
|
/**
|
|
246
255
|
* Update this sketch with a given string.
|
|
247
|
-
* @param
|
|
256
|
+
* @param key string to update the sketch with
|
|
257
|
+
* @param value to update the sketch with
|
|
248
258
|
*/
|
|
249
259
|
template<typename FwdUpdate>
|
|
250
260
|
inline void update(const std::string& key, FwdUpdate&& value);
|
|
251
261
|
|
|
252
262
|
/**
|
|
253
263
|
* Update this sketch with a given unsigned 64-bit integer.
|
|
254
|
-
* @param
|
|
264
|
+
* @param key uint64_t to update the sketch with
|
|
265
|
+
* @param value to update the sketch with
|
|
255
266
|
*/
|
|
256
267
|
template<typename FwdUpdate>
|
|
257
268
|
inline void update(uint64_t key, FwdUpdate&& value);
|
|
258
269
|
|
|
259
270
|
/**
|
|
260
271
|
* Update this sketch with a given signed 64-bit integer.
|
|
261
|
-
* @param
|
|
272
|
+
* @param key int64_t to update the sketch with
|
|
273
|
+
* @param value to update the sketch with
|
|
262
274
|
*/
|
|
263
275
|
template<typename FwdUpdate>
|
|
264
276
|
inline void update(int64_t key, FwdUpdate&& value);
|
|
@@ -266,7 +278,8 @@ public:
|
|
|
266
278
|
/**
|
|
267
279
|
* Update this sketch with a given unsigned 32-bit integer.
|
|
268
280
|
* For compatibility with Java implementation.
|
|
269
|
-
* @param
|
|
281
|
+
* @param key uint32_t to update the sketch with
|
|
282
|
+
* @param value to update the sketch with
|
|
270
283
|
*/
|
|
271
284
|
template<typename FwdUpdate>
|
|
272
285
|
inline void update(uint32_t key, FwdUpdate&& value);
|
|
@@ -274,7 +287,8 @@ public:
|
|
|
274
287
|
/**
|
|
275
288
|
* Update this sketch with a given signed 32-bit integer.
|
|
276
289
|
* For compatibility with Java implementation.
|
|
277
|
-
* @param
|
|
290
|
+
* @param key int32_t to update the sketch with
|
|
291
|
+
* @param value to update the sketch with
|
|
278
292
|
*/
|
|
279
293
|
template<typename FwdUpdate>
|
|
280
294
|
inline void update(int32_t key, FwdUpdate&& value);
|
|
@@ -282,7 +296,8 @@ public:
|
|
|
282
296
|
/**
|
|
283
297
|
* Update this sketch with a given unsigned 16-bit integer.
|
|
284
298
|
* For compatibility with Java implementation.
|
|
285
|
-
* @param
|
|
299
|
+
* @param key uint16_t to update the sketch with
|
|
300
|
+
* @param value to update the sketch with
|
|
286
301
|
*/
|
|
287
302
|
template<typename FwdUpdate>
|
|
288
303
|
inline void update(uint16_t key, FwdUpdate&& value);
|
|
@@ -290,7 +305,8 @@ public:
|
|
|
290
305
|
/**
|
|
291
306
|
* Update this sketch with a given signed 16-bit integer.
|
|
292
307
|
* For compatibility with Java implementation.
|
|
293
|
-
* @param
|
|
308
|
+
* @param key int16_t to update the sketch with
|
|
309
|
+
* @param value to update the sketch with
|
|
294
310
|
*/
|
|
295
311
|
template<typename FwdUpdate>
|
|
296
312
|
inline void update(int16_t key, FwdUpdate&& value);
|
|
@@ -298,7 +314,8 @@ public:
|
|
|
298
314
|
/**
|
|
299
315
|
* Update this sketch with a given unsigned 8-bit integer.
|
|
300
316
|
* For compatibility with Java implementation.
|
|
301
|
-
* @param
|
|
317
|
+
* @param key uint8_t to update the sketch with
|
|
318
|
+
* @param value to update the sketch with
|
|
302
319
|
*/
|
|
303
320
|
template<typename FwdUpdate>
|
|
304
321
|
inline void update(uint8_t key, FwdUpdate&& value);
|
|
@@ -306,7 +323,8 @@ public:
|
|
|
306
323
|
/**
|
|
307
324
|
* Update this sketch with a given signed 8-bit integer.
|
|
308
325
|
* For compatibility with Java implementation.
|
|
309
|
-
* @param
|
|
326
|
+
* @param key int8_t to update the sketch with
|
|
327
|
+
* @param value to update the sketch with
|
|
310
328
|
*/
|
|
311
329
|
template<typename FwdUpdate>
|
|
312
330
|
inline void update(int8_t key, FwdUpdate&& value);
|
|
@@ -314,7 +332,8 @@ public:
|
|
|
314
332
|
/**
|
|
315
333
|
* Update this sketch with a given double-precision floating point value.
|
|
316
334
|
* For compatibility with Java implementation.
|
|
317
|
-
* @param
|
|
335
|
+
* @param key double to update the sketch with
|
|
336
|
+
* @param value to update the sketch with
|
|
318
337
|
*/
|
|
319
338
|
template<typename FwdUpdate>
|
|
320
339
|
inline void update(double key, FwdUpdate&& value);
|
|
@@ -322,7 +341,8 @@ public:
|
|
|
322
341
|
/**
|
|
323
342
|
* Update this sketch with a given floating point value.
|
|
324
343
|
* For compatibility with Java implementation.
|
|
325
|
-
* @param
|
|
344
|
+
* @param key float to update the sketch with
|
|
345
|
+
* @param value to update the sketch with
|
|
326
346
|
*/
|
|
327
347
|
template<typename FwdUpdate>
|
|
328
348
|
inline void update(float key, FwdUpdate&& value);
|
|
@@ -337,8 +357,9 @@ public:
|
|
|
337
357
|
* Otherwise two sketches that should represent overlapping sets will be disjoint
|
|
338
358
|
* For instance, for signed 32-bit values call update(int32_t) method above,
|
|
339
359
|
* which does widening conversion to int64_t, if compatibility with Java is expected
|
|
340
|
-
* @param
|
|
360
|
+
* @param key pointer to the data
|
|
341
361
|
* @param length of the data in bytes
|
|
362
|
+
* @param value to update the sketch with
|
|
342
363
|
*/
|
|
343
364
|
template<typename FwdUpdate>
|
|
344
365
|
void update(const void* key, size_t length, FwdUpdate&& value);
|
|
@@ -355,7 +376,7 @@ public:
|
|
|
355
376
|
|
|
356
377
|
/**
|
|
357
378
|
* Converts this sketch to a compact sketch (ordered or unordered).
|
|
358
|
-
* @param ordered optional flag to specify if ordered sketch should be produced
|
|
379
|
+
* @param ordered optional flag to specify if an ordered sketch should be produced
|
|
359
380
|
* @return compact sketch
|
|
360
381
|
*/
|
|
361
382
|
compact_tuple_sketch<Summary, Allocator> compact(bool ordered = true) const;
|
|
@@ -375,8 +396,10 @@ protected:
|
|
|
375
396
|
virtual void print_specifics(std::ostringstream& os) const;
|
|
376
397
|
};
|
|
377
398
|
|
|
378
|
-
|
|
379
|
-
|
|
399
|
+
/**
|
|
400
|
+
* Compact Tuple sketch.
|
|
401
|
+
* This is an immutable form of the Tuple sketch, the form that can be serialized and deserialized.
|
|
402
|
+
*/
|
|
380
403
|
template<
|
|
381
404
|
typename Summary,
|
|
382
405
|
typename Allocator = std::allocator<Summary>
|
|
@@ -406,13 +429,48 @@ public:
|
|
|
406
429
|
// - as a result of a set operation
|
|
407
430
|
// - by deserializing a previously serialized compact sketch
|
|
408
431
|
|
|
432
|
+
/**
|
|
433
|
+
* Copy constructor.
|
|
434
|
+
* Constructs a compact sketch from another sketch (either update or compact)
|
|
435
|
+
* @param other sketch to be copied
|
|
436
|
+
* @param ordered if true make the resulting sketch ordered
|
|
437
|
+
*/
|
|
409
438
|
compact_tuple_sketch(const Base& other, bool ordered);
|
|
410
|
-
|
|
439
|
+
|
|
440
|
+
/**
|
|
441
|
+
* Copy constructor.
|
|
442
|
+
* @param other sketch to be copied
|
|
443
|
+
*/
|
|
444
|
+
compact_tuple_sketch(const compact_tuple_sketch& other) = default;
|
|
445
|
+
|
|
446
|
+
/**
|
|
447
|
+
* Move constructor.
|
|
448
|
+
* @param other sketch to be moved
|
|
449
|
+
*/
|
|
411
450
|
compact_tuple_sketch(compact_tuple_sketch&&) noexcept;
|
|
451
|
+
|
|
412
452
|
virtual ~compact_tuple_sketch() = default;
|
|
413
|
-
compact_tuple_sketch& operator=(const compact_tuple_sketch&) = default;
|
|
414
|
-
compact_tuple_sketch& operator=(compact_tuple_sketch&&) = default;
|
|
415
453
|
|
|
454
|
+
/**
|
|
455
|
+
* Copy assignment
|
|
456
|
+
* @param other sketch to be copied
|
|
457
|
+
* @return reference to this sketch
|
|
458
|
+
*/
|
|
459
|
+
compact_tuple_sketch& operator=(const compact_tuple_sketch& other) = default;
|
|
460
|
+
|
|
461
|
+
/**
|
|
462
|
+
* Move assignment
|
|
463
|
+
* @param other sketch to be moved
|
|
464
|
+
* @return reference to this sketch
|
|
465
|
+
*/
|
|
466
|
+
compact_tuple_sketch& operator=(compact_tuple_sketch&& other) = default;
|
|
467
|
+
|
|
468
|
+
/**
|
|
469
|
+
* Constructor from Theta sketch
|
|
470
|
+
* @param other Theta sketch to be constructed from
|
|
471
|
+
* @param summary Summary instance to be associated with each entry
|
|
472
|
+
* @param ordered if true make the resulting sketch ordered
|
|
473
|
+
*/
|
|
416
474
|
compact_tuple_sketch(const theta_sketch_alloc<AllocU64>& other, const Summary& summary, bool ordered = true);
|
|
417
475
|
|
|
418
476
|
virtual Allocator get_allocator() const;
|
|
@@ -425,7 +483,7 @@ public:
|
|
|
425
483
|
/**
|
|
426
484
|
* This method serializes the sketch into a given stream in a binary form
|
|
427
485
|
* @param os output stream
|
|
428
|
-
* @param instance of a SerDe
|
|
486
|
+
* @param sd instance of a SerDe
|
|
429
487
|
*/
|
|
430
488
|
template<typename SerDe = serde<Summary>>
|
|
431
489
|
void serialize(std::ostream& os, const SerDe& sd = SerDe()) const;
|
|
@@ -436,7 +494,7 @@ public:
|
|
|
436
494
|
* It is a blank space of a given size.
|
|
437
495
|
* This header is used in Datasketches PostgreSQL extension.
|
|
438
496
|
* @param header_size_bytes space to reserve in front of the sketch
|
|
439
|
-
* @param instance of a SerDe
|
|
497
|
+
* @param sd instance of a SerDe
|
|
440
498
|
* @return serialized sketch as a vector of bytes
|
|
441
499
|
*/
|
|
442
500
|
template<typename SerDe = serde<Summary>>
|
|
@@ -451,8 +509,8 @@ public:
|
|
|
451
509
|
* This method deserializes a sketch from a given stream.
|
|
452
510
|
* @param is input stream
|
|
453
511
|
* @param seed the seed for the hash function that was used to create the sketch
|
|
454
|
-
* @param instance of a SerDe
|
|
455
|
-
* @param instance of an Allocator
|
|
512
|
+
* @param sd instance of a SerDe
|
|
513
|
+
* @param allocator instance of an Allocator
|
|
456
514
|
* @return an instance of a sketch
|
|
457
515
|
*/
|
|
458
516
|
template<typename SerDe = serde<Summary>>
|
|
@@ -464,17 +522,14 @@ public:
|
|
|
464
522
|
* @param bytes pointer to the array of bytes
|
|
465
523
|
* @param size the size of the array
|
|
466
524
|
* @param seed the seed for the hash function that was used to create the sketch
|
|
467
|
-
* @param instance of a SerDe
|
|
468
|
-
* @param instance of an Allocator
|
|
525
|
+
* @param sd instance of a SerDe
|
|
526
|
+
* @param allocator instance of an Allocator
|
|
469
527
|
* @return an instance of the sketch
|
|
470
528
|
*/
|
|
471
529
|
template<typename SerDe = serde<Summary>>
|
|
472
530
|
static compact_tuple_sketch deserialize(const void* bytes, size_t size, uint64_t seed = DEFAULT_SEED,
|
|
473
531
|
const SerDe& sd = SerDe(), const Allocator& allocator = Allocator());
|
|
474
532
|
|
|
475
|
-
// for internal use
|
|
476
|
-
compact_tuple_sketch(bool is_empty, bool is_ordered, uint16_t seed_hash, uint64_t theta, std::vector<Entry, AllocEntry>&& entries);
|
|
477
|
-
|
|
478
533
|
protected:
|
|
479
534
|
bool is_empty_;
|
|
480
535
|
bool is_ordered_;
|
|
@@ -520,10 +575,14 @@ protected:
|
|
|
520
575
|
|
|
521
576
|
virtual void print_specifics(std::ostringstream& os) const;
|
|
522
577
|
|
|
523
|
-
|
|
578
|
+
template<typename E, typename EK, typename P, typename S, typename CS, typename A> friend class theta_union_base;
|
|
579
|
+
template<typename E, typename EK, typename P, typename S, typename CS, typename A> friend class theta_intersection_base;
|
|
580
|
+
template<typename E, typename EK, typename CS, typename A> friend class theta_set_difference_base;
|
|
581
|
+
compact_tuple_sketch(bool is_empty, bool is_ordered, uint16_t seed_hash, uint64_t theta, std::vector<Entry, AllocEntry>&& entries);
|
|
524
582
|
|
|
525
|
-
|
|
583
|
+
};
|
|
526
584
|
|
|
585
|
+
/// Tuple base builder
|
|
527
586
|
template<typename Derived, typename Policy, typename Allocator>
|
|
528
587
|
class tuple_base_builder: public theta_base_builder<Derived, Allocator> {
|
|
529
588
|
public:
|
|
@@ -533,11 +592,15 @@ protected:
|
|
|
533
592
|
Policy policy_;
|
|
534
593
|
};
|
|
535
594
|
|
|
595
|
+
/// Update Tuple sketch builder
|
|
536
596
|
template<typename S, typename U, typename P, typename A>
|
|
537
597
|
class update_tuple_sketch<S, U, P, A>::builder: public tuple_base_builder<builder, P, A> {
|
|
538
598
|
public:
|
|
539
599
|
/**
|
|
600
|
+
* Constructor
|
|
540
601
|
* Creates and instance of the builder with default parameters.
|
|
602
|
+
* @param policy user-defined way of creating and updating Summary
|
|
603
|
+
* @param allocator instance of an Allocator to pass to created sketches
|
|
541
604
|
*/
|
|
542
605
|
builder(const P& policy = P(), const A& allocator = A());
|
|
543
606
|
|
|
@@ -27,15 +27,19 @@ namespace datasketches {
|
|
|
27
27
|
|
|
28
28
|
// for types with defined + operation
|
|
29
29
|
template<typename Summary>
|
|
30
|
-
struct
|
|
30
|
+
struct default_tuple_union_policy {
|
|
31
31
|
void operator()(Summary& summary, const Summary& other) const {
|
|
32
32
|
summary += other;
|
|
33
33
|
}
|
|
34
34
|
};
|
|
35
35
|
|
|
36
|
+
/**
|
|
37
|
+
* Tuple Union.
|
|
38
|
+
* Computes union of Tuple sketches. There is no constructor. Use builder instead.
|
|
39
|
+
*/
|
|
36
40
|
template<
|
|
37
41
|
typename Summary,
|
|
38
|
-
typename Policy =
|
|
42
|
+
typename Policy = default_tuple_union_policy<Summary>,
|
|
39
43
|
typename Allocator = std::allocator<Summary>
|
|
40
44
|
>
|
|
41
45
|
class tuple_union {
|
|
@@ -50,15 +54,15 @@ public:
|
|
|
50
54
|
// reformulate the external policy that operates on Summary
|
|
51
55
|
// in terms of operations on Entry
|
|
52
56
|
struct internal_policy {
|
|
53
|
-
internal_policy(const Policy&
|
|
57
|
+
internal_policy(const Policy& external_policy): external_policy_(external_policy) {}
|
|
54
58
|
void operator()(Entry& internal_entry, const Entry& incoming_entry) const {
|
|
55
|
-
|
|
59
|
+
external_policy_(internal_entry.second, incoming_entry.second);
|
|
56
60
|
}
|
|
57
61
|
void operator()(Entry& internal_entry, Entry&& incoming_entry) const {
|
|
58
|
-
|
|
62
|
+
external_policy_(internal_entry.second, std::move(incoming_entry.second));
|
|
59
63
|
}
|
|
60
|
-
const Policy&
|
|
61
|
-
Policy
|
|
64
|
+
const Policy& get_external_policy() const { return external_policy_; }
|
|
65
|
+
Policy external_policy_;
|
|
62
66
|
};
|
|
63
67
|
|
|
64
68
|
using State = theta_union_base<Entry, ExtractKey, internal_policy, Sketch, CompactSketch, AllocEntry>;
|
|
@@ -67,15 +71,15 @@ public:
|
|
|
67
71
|
class builder;
|
|
68
72
|
|
|
69
73
|
/**
|
|
70
|
-
*
|
|
74
|
+
* Update the union with a given sketch
|
|
71
75
|
* @param sketch to update the union with
|
|
72
76
|
*/
|
|
73
77
|
template<typename FwdSketch>
|
|
74
78
|
void update(FwdSketch&& sketch);
|
|
75
79
|
|
|
76
80
|
/**
|
|
77
|
-
*
|
|
78
|
-
* @param ordered optional flag to specify if ordered sketch should be produced
|
|
81
|
+
* Produces a copy of the current state of the union as a compact sketch.
|
|
82
|
+
* @param ordered optional flag to specify if an ordered sketch should be produced
|
|
79
83
|
* @return the result of the union
|
|
80
84
|
*/
|
|
81
85
|
CompactSketch get_result(bool ordered = true) const;
|
|
@@ -92,16 +96,20 @@ protected:
|
|
|
92
96
|
tuple_union(uint8_t lg_cur_size, uint8_t lg_nom_size, resize_factor rf, float p, uint64_t theta, uint64_t seed, const Policy& policy, const Allocator& allocator);
|
|
93
97
|
};
|
|
94
98
|
|
|
99
|
+
/// Tuple union builder
|
|
95
100
|
template<typename S, typename P, typename A>
|
|
96
101
|
class tuple_union<S, P, A>::builder: public tuple_base_builder<builder, P, A> {
|
|
97
102
|
public:
|
|
98
103
|
/**
|
|
104
|
+
* Constructor.
|
|
99
105
|
* Creates and instance of the builder with default parameters.
|
|
106
|
+
* @param policy
|
|
107
|
+
* @param allocator
|
|
100
108
|
*/
|
|
101
109
|
builder(const P& policy = P(), const A& allocator = A());
|
|
102
110
|
|
|
103
111
|
/**
|
|
104
|
-
*
|
|
112
|
+
* Create an instance of the union with predefined parameters.
|
|
105
113
|
* @return an instance of the union
|
|
106
114
|
*/
|
|
107
115
|
tuple_union build() const;
|
|
@@ -20,7 +20,6 @@ add_executable(tuple_test)
|
|
|
20
20
|
target_link_libraries(tuple_test tuple common_test_lib)
|
|
21
21
|
|
|
22
22
|
set_target_properties(tuple_test PROPERTIES
|
|
23
|
-
CXX_STANDARD 11
|
|
24
23
|
CXX_STANDARD_REQUIRED YES
|
|
25
24
|
)
|
|
26
25
|
|
|
@@ -47,3 +46,19 @@ target_sources(tuple_test
|
|
|
47
46
|
array_of_doubles_sketch_test.cpp
|
|
48
47
|
engagement_test.cpp
|
|
49
48
|
)
|
|
49
|
+
|
|
50
|
+
if (SERDE_COMPAT)
|
|
51
|
+
target_sources(tuple_test
|
|
52
|
+
PRIVATE
|
|
53
|
+
aod_sketch_deserialize_from_java_test.cpp
|
|
54
|
+
tuple_sketch_deserialize_from_java_test.cpp
|
|
55
|
+
)
|
|
56
|
+
endif()
|
|
57
|
+
|
|
58
|
+
if (GENERATE)
|
|
59
|
+
target_sources(tuple_test
|
|
60
|
+
PRIVATE
|
|
61
|
+
aod_sketch_serialize_for_java.cpp
|
|
62
|
+
tuple_sketch_serialize_for_java.cpp
|
|
63
|
+
)
|
|
64
|
+
endif()
|
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* Licensed to the Apache Software Foundation (ASF) under one
|
|
3
|
+
* or more contributor license agreements. See the NOTICE file
|
|
4
|
+
* distributed with this work for additional information
|
|
5
|
+
* regarding copyright ownership. The ASF licenses this file
|
|
6
|
+
* to you under the Apache License, Version 2.0 (the
|
|
7
|
+
* "License"); you may not use this file except in compliance
|
|
8
|
+
* with the License. You may obtain a copy of the License at
|
|
9
|
+
*
|
|
10
|
+
* http://www.apache.org/licenses/LICENSE-2.0
|
|
11
|
+
*
|
|
12
|
+
* Unless required by applicable law or agreed to in writing,
|
|
13
|
+
* software distributed under the License is distributed on an
|
|
14
|
+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
15
|
+
* KIND, either express or implied. See the License for the
|
|
16
|
+
* specific language governing permissions and limitations
|
|
17
|
+
* under the License.
|
|
18
|
+
*/
|
|
19
|
+
|
|
20
|
+
#include <catch2/catch.hpp>
|
|
21
|
+
#include <fstream>
|
|
22
|
+
|
|
23
|
+
#include "array_of_doubles_sketch.hpp"
|
|
24
|
+
|
|
25
|
+
namespace datasketches {
|
|
26
|
+
|
|
27
|
+
// assume the binary sketches for this test have been generated by datasketches-java code
|
|
28
|
+
// in the subdirectory called "java" in the root directory of this project
|
|
29
|
+
static std::string testBinaryInputPath = std::string(TEST_BINARY_INPUT_PATH) + "../../java/";
|
|
30
|
+
|
|
31
|
+
TEST_CASE("aod sketch one value", "[serde_compat]") {
|
|
32
|
+
const unsigned n_arr[] = {0, 1, 10, 100, 1000, 10000, 100000, 1000000};
|
|
33
|
+
for (const unsigned n: n_arr) {
|
|
34
|
+
std::ifstream is;
|
|
35
|
+
is.exceptions(std::ios::failbit | std::ios::badbit);
|
|
36
|
+
is.open(testBinaryInputPath + "aod_1_n" + std::to_string(n) + "_java.sk", std::ios::binary);
|
|
37
|
+
const auto sketch = compact_array_of_doubles_sketch::deserialize(is);
|
|
38
|
+
REQUIRE(sketch.is_empty() == (n == 0));
|
|
39
|
+
REQUIRE(sketch.is_estimation_mode() == (n > 1000));
|
|
40
|
+
REQUIRE(sketch.get_estimate() == Approx(n).margin(n * 0.03));
|
|
41
|
+
REQUIRE(sketch.get_num_values() == 1);
|
|
42
|
+
for (const auto& entry: sketch) {
|
|
43
|
+
REQUIRE(entry.first < sketch.get_theta64());
|
|
44
|
+
}
|
|
45
|
+
}
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
TEST_CASE("aod sketch three values", "[serde_compat]") {
|
|
49
|
+
const unsigned n_arr[] = {0, 1, 10, 100, 1000, 10000, 100000, 1000000};
|
|
50
|
+
for (const unsigned n: n_arr) {
|
|
51
|
+
std::ifstream is;
|
|
52
|
+
is.exceptions(std::ios::failbit | std::ios::badbit);
|
|
53
|
+
is.open(testBinaryInputPath + "aod_3_n" + std::to_string(n) + "_java.sk", std::ios::binary);
|
|
54
|
+
const auto sketch = compact_array_of_doubles_sketch::deserialize(is);
|
|
55
|
+
REQUIRE(sketch.is_empty() == (n == 0));
|
|
56
|
+
REQUIRE(sketch.is_estimation_mode() == (n > 1000));
|
|
57
|
+
REQUIRE(sketch.get_estimate() == Approx(n).margin(n * 0.03));
|
|
58
|
+
REQUIRE(sketch.get_num_values() == 3);
|
|
59
|
+
for (const auto& entry: sketch) {
|
|
60
|
+
REQUIRE(entry.first < sketch.get_theta64());
|
|
61
|
+
REQUIRE(entry.second[0] == entry.second[1]);
|
|
62
|
+
REQUIRE(entry.second[0] == entry.second[2]);
|
|
63
|
+
}
|
|
64
|
+
}
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
TEST_CASE("aod sketch non-empty no entries", "[serde_compat]") {
|
|
68
|
+
std::ifstream is;
|
|
69
|
+
is.exceptions(std::ios::failbit | std::ios::badbit);
|
|
70
|
+
is.open(testBinaryInputPath + "aod_1_non_empty_no_entries_java.sk", std::ios::binary);
|
|
71
|
+
const auto sketch = compact_array_of_doubles_sketch::deserialize(is);
|
|
72
|
+
REQUIRE_FALSE(sketch.is_empty());
|
|
73
|
+
REQUIRE(sketch.get_num_retained() == 0);
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
} /* namespace datasketches */
|
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* Licensed to the Apache Software Foundation (ASF) under one
|
|
3
|
+
* or more contributor license agreements. See the NOTICE file
|
|
4
|
+
* distributed with this work for additional information
|
|
5
|
+
* regarding copyright ownership. The ASF licenses this file
|
|
6
|
+
* to you under the Apache License, Version 2.0 (the
|
|
7
|
+
* "License"); you may not use this file except in compliance
|
|
8
|
+
* with the License. You may obtain a copy of the License at
|
|
9
|
+
*
|
|
10
|
+
* http://www.apache.org/licenses/LICENSE-2.0
|
|
11
|
+
*
|
|
12
|
+
* Unless required by applicable law or agreed to in writing,
|
|
13
|
+
* software distributed under the License is distributed on an
|
|
14
|
+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
15
|
+
* KIND, either express or implied. See the License for the
|
|
16
|
+
* specific language governing permissions and limitations
|
|
17
|
+
* under the License.
|
|
18
|
+
*/
|
|
19
|
+
|
|
20
|
+
#include <catch2/catch.hpp>
|
|
21
|
+
#include <fstream>
|
|
22
|
+
|
|
23
|
+
#include "array_of_doubles_sketch.hpp"
|
|
24
|
+
|
|
25
|
+
namespace datasketches {
|
|
26
|
+
|
|
27
|
+
TEST_CASE("aod sketch generate one value", "[serialize_for_java]") {
|
|
28
|
+
const unsigned n_arr[] = {0, 1, 10, 100, 1000, 10000, 100000, 1000000};
|
|
29
|
+
for (const unsigned n: n_arr) {
|
|
30
|
+
auto sketch = update_array_of_doubles_sketch::builder().build();
|
|
31
|
+
for (unsigned i = 0; i < n; ++i) sketch.update(i, std::vector<double>(1, i));
|
|
32
|
+
REQUIRE(sketch.is_empty() == (n == 0));
|
|
33
|
+
REQUIRE(sketch.get_estimate() == Approx(n).margin(n * 0.03));
|
|
34
|
+
std::ofstream os("aod_1_n" + std::to_string(n) + "_cpp.sk", std::ios::binary);
|
|
35
|
+
sketch.compact().serialize(os);
|
|
36
|
+
}
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
TEST_CASE("aod sketch generate three values", "[serialize_for_java]") {
|
|
40
|
+
const unsigned n_arr[] = {0, 1, 10, 100, 1000, 10000, 100000, 1000000};
|
|
41
|
+
for (const unsigned n: n_arr) {
|
|
42
|
+
auto sketch = update_array_of_doubles_sketch::builder(3).build();
|
|
43
|
+
for (unsigned i = 0; i < n; ++i) sketch.update(i, std::vector<double>(3, i));
|
|
44
|
+
REQUIRE(sketch.is_empty() == (n == 0));
|
|
45
|
+
REQUIRE(sketch.get_estimate() == Approx(n).margin(n * 0.03));
|
|
46
|
+
std::ofstream os("aod_3_n" + std::to_string(n) + "_cpp.sk", std::ios::binary);
|
|
47
|
+
sketch.compact().serialize(os);
|
|
48
|
+
}
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
TEST_CASE("aod sketch generate non-empty no entries", "[serialize_for_java]") {
|
|
52
|
+
auto sketch = update_array_of_doubles_sketch::builder().set_p(0.01).build();
|
|
53
|
+
// here we rely on the fact that hash of 1 happens to be greater than 0.01 (when normalized)
|
|
54
|
+
// and therefore gets rejected
|
|
55
|
+
sketch.update(1, std::vector<double>({1}));
|
|
56
|
+
REQUIRE_FALSE(sketch.is_empty());
|
|
57
|
+
REQUIRE(sketch.get_num_retained() == 0);
|
|
58
|
+
std::ofstream os("aod_1_non_empty_no_entries_cpp.sk", std::ios::binary);
|
|
59
|
+
sketch.compact().serialize(os);
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
} /* namespace datasketches */
|