datasketches 0.3.2 → 0.4.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +6 -0
- data/NOTICE +1 -1
- data/README.md +0 -2
- data/ext/datasketches/cpc_wrapper.cpp +2 -2
- data/ext/datasketches/kll_wrapper.cpp +0 -10
- data/lib/datasketches/version.rb +1 -1
- data/lib/datasketches.rb +1 -1
- data/vendor/datasketches-cpp/CMakeLists.txt +1 -0
- data/vendor/datasketches-cpp/CODE_OF_CONDUCT.md +3 -0
- data/vendor/datasketches-cpp/CONTRIBUTING.md +50 -0
- data/vendor/datasketches-cpp/Doxyfile +2827 -0
- data/vendor/datasketches-cpp/LICENSE +0 -76
- data/vendor/datasketches-cpp/NOTICE +1 -1
- data/vendor/datasketches-cpp/README.md +1 -3
- data/vendor/datasketches-cpp/common/CMakeLists.txt +12 -11
- data/vendor/datasketches-cpp/common/include/common_defs.hpp +11 -8
- data/vendor/datasketches-cpp/common/include/count_zeros.hpp +0 -2
- data/vendor/datasketches-cpp/common/include/kolmogorov_smirnov.hpp +9 -6
- data/vendor/datasketches-cpp/common/include/optional.hpp +148 -0
- data/vendor/datasketches-cpp/common/include/quantiles_sorted_view.hpp +95 -2
- data/vendor/datasketches-cpp/common/include/quantiles_sorted_view_impl.hpp +1 -1
- data/vendor/datasketches-cpp/common/include/serde.hpp +69 -20
- data/vendor/datasketches-cpp/common/test/CMakeLists.txt +1 -1
- data/vendor/datasketches-cpp/common/test/optional_test.cpp +85 -0
- data/vendor/datasketches-cpp/common/test/test_allocator.hpp +14 -14
- data/vendor/datasketches-cpp/count/include/count_min.hpp +132 -78
- data/vendor/datasketches-cpp/count/include/count_min_impl.hpp +132 -152
- data/vendor/datasketches-cpp/count/test/CMakeLists.txt +11 -12
- data/vendor/datasketches-cpp/count/test/count_min_allocation_test.cpp +61 -61
- data/vendor/datasketches-cpp/count/test/count_min_test.cpp +175 -178
- data/vendor/datasketches-cpp/cpc/include/cpc_common.hpp +14 -20
- data/vendor/datasketches-cpp/cpc/include/cpc_compressor.hpp +7 -4
- data/vendor/datasketches-cpp/cpc/include/cpc_compressor_impl.hpp +17 -17
- data/vendor/datasketches-cpp/cpc/include/cpc_sketch.hpp +40 -40
- data/vendor/datasketches-cpp/cpc/include/cpc_sketch_impl.hpp +13 -10
- data/vendor/datasketches-cpp/cpc/include/cpc_union.hpp +35 -11
- data/vendor/datasketches-cpp/cpc/include/cpc_union_impl.hpp +8 -8
- data/vendor/datasketches-cpp/cpc/include/u32_table.hpp +3 -2
- data/vendor/datasketches-cpp/cpc/include/u32_table_impl.hpp +5 -5
- data/vendor/datasketches-cpp/cpc/test/CMakeLists.txt +20 -7
- data/vendor/datasketches-cpp/cpc/test/cpc_sketch_deserialize_from_java_test.cpp +60 -0
- data/vendor/datasketches-cpp/{python/include/py_object_lt.hpp → cpc/test/cpc_sketch_serialize_for_java.cpp} +15 -14
- data/vendor/datasketches-cpp/cpc/test/cpc_sketch_test.cpp +4 -29
- data/vendor/datasketches-cpp/cpc/test/cpc_union_test.cpp +4 -4
- data/vendor/datasketches-cpp/density/include/density_sketch.hpp +29 -9
- data/vendor/datasketches-cpp/density/include/density_sketch_impl.hpp +1 -1
- data/vendor/datasketches-cpp/density/test/CMakeLists.txt +0 -1
- data/vendor/datasketches-cpp/fi/include/frequent_items_sketch.hpp +21 -9
- data/vendor/datasketches-cpp/fi/include/frequent_items_sketch_impl.hpp +6 -4
- data/vendor/datasketches-cpp/fi/test/CMakeLists.txt +14 -1
- data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_deserialize_from_java_test.cpp +95 -0
- data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_serialize_for_java.cpp +83 -0
- data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_test.cpp +3 -42
- data/vendor/datasketches-cpp/hll/include/CouponList-internal.hpp +2 -2
- data/vendor/datasketches-cpp/hll/include/CouponList.hpp +3 -1
- data/vendor/datasketches-cpp/hll/include/HllArray-internal.hpp +3 -3
- data/vendor/datasketches-cpp/hll/include/HllArray.hpp +5 -3
- data/vendor/datasketches-cpp/hll/include/HllSketch-internal.hpp +4 -4
- data/vendor/datasketches-cpp/hll/include/HllSketchImpl.hpp +3 -1
- data/vendor/datasketches-cpp/hll/include/HllUtil.hpp +0 -12
- data/vendor/datasketches-cpp/hll/include/hll.hpp +70 -57
- data/vendor/datasketches-cpp/hll/test/CMakeLists.txt +14 -1
- data/vendor/datasketches-cpp/hll/test/ToFromByteArrayTest.cpp +0 -68
- data/vendor/datasketches-cpp/hll/test/hll_sketch_deserialize_from_java_test.cpp +69 -0
- data/vendor/datasketches-cpp/hll/test/hll_sketch_serialize_for_java.cpp +52 -0
- data/vendor/datasketches-cpp/kll/include/kll_helper_impl.hpp +2 -2
- data/vendor/datasketches-cpp/kll/include/kll_sketch.hpp +71 -50
- data/vendor/datasketches-cpp/kll/include/kll_sketch_impl.hpp +59 -130
- data/vendor/datasketches-cpp/kll/test/CMakeLists.txt +14 -1
- data/vendor/datasketches-cpp/kll/test/kll_sketch_deserialize_from_java_test.cpp +103 -0
- data/vendor/datasketches-cpp/kll/test/kll_sketch_serialize_for_java.cpp +62 -0
- data/vendor/datasketches-cpp/kll/test/kll_sketch_test.cpp +3 -38
- data/vendor/datasketches-cpp/quantiles/include/quantiles_sketch.hpp +68 -51
- data/vendor/datasketches-cpp/quantiles/include/quantiles_sketch_impl.hpp +62 -132
- data/vendor/datasketches-cpp/quantiles/test/CMakeLists.txt +14 -1
- data/vendor/datasketches-cpp/quantiles/test/quantiles_sketch_deserialize_from_java_test.cpp +84 -0
- data/vendor/datasketches-cpp/quantiles/test/quantiles_sketch_serialize_for_java.cpp +52 -0
- data/vendor/datasketches-cpp/quantiles/test/quantiles_sketch_test.cpp +14 -38
- data/vendor/datasketches-cpp/req/include/req_common.hpp +7 -3
- data/vendor/datasketches-cpp/req/include/req_compactor_impl.hpp +2 -2
- data/vendor/datasketches-cpp/req/include/req_sketch.hpp +97 -23
- data/vendor/datasketches-cpp/req/include/req_sketch_impl.hpp +48 -109
- data/vendor/datasketches-cpp/req/test/CMakeLists.txt +14 -1
- data/vendor/datasketches-cpp/req/test/req_sketch_deserialize_from_java_test.cpp +55 -0
- data/vendor/datasketches-cpp/{tuple/include/array_of_doubles_intersection_impl.hpp → req/test/req_sketch_serialize_for_java.cpp} +12 -7
- data/vendor/datasketches-cpp/req/test/req_sketch_test.cpp +3 -89
- data/vendor/datasketches-cpp/sampling/CMakeLists.txt +4 -0
- data/vendor/datasketches-cpp/sampling/include/ebpps_sample.hpp +210 -0
- data/vendor/datasketches-cpp/sampling/include/ebpps_sample_impl.hpp +535 -0
- data/vendor/datasketches-cpp/sampling/include/ebpps_sketch.hpp +281 -0
- data/vendor/datasketches-cpp/sampling/include/ebpps_sketch_impl.hpp +531 -0
- data/vendor/datasketches-cpp/sampling/include/var_opt_sketch.hpp +69 -26
- data/vendor/datasketches-cpp/sampling/include/var_opt_sketch_impl.hpp +3 -3
- data/vendor/datasketches-cpp/sampling/include/var_opt_union.hpp +10 -11
- data/vendor/datasketches-cpp/sampling/include/var_opt_union_impl.hpp +4 -4
- data/vendor/datasketches-cpp/sampling/test/CMakeLists.txt +55 -8
- data/vendor/datasketches-cpp/sampling/test/ebpps_allocation_test.cpp +96 -0
- data/vendor/datasketches-cpp/sampling/test/ebpps_sample_test.cpp +137 -0
- data/vendor/datasketches-cpp/sampling/test/ebpps_sketch_test.cpp +266 -0
- data/vendor/datasketches-cpp/sampling/test/var_opt_sketch_deserialize_from_java_test.cpp +81 -0
- data/vendor/datasketches-cpp/sampling/test/var_opt_sketch_serialize_for_java.cpp +54 -0
- data/vendor/datasketches-cpp/sampling/test/var_opt_sketch_test.cpp +0 -37
- data/vendor/datasketches-cpp/sampling/test/var_opt_union_deserialize_from_java_test.cpp +50 -0
- data/vendor/datasketches-cpp/sampling/test/var_opt_union_serialize_for_java.cpp +56 -0
- data/vendor/datasketches-cpp/sampling/test/var_opt_union_test.cpp +0 -18
- data/vendor/datasketches-cpp/theta/include/bit_packing.hpp +2608 -2608
- data/vendor/datasketches-cpp/theta/include/bounds_on_ratios_in_sampled_sets.hpp +1 -0
- data/vendor/datasketches-cpp/theta/include/bounds_on_ratios_in_theta_sketched_sets.hpp +7 -6
- data/vendor/datasketches-cpp/theta/include/theta_a_not_b.hpp +20 -5
- data/vendor/datasketches-cpp/theta/include/theta_constants.hpp +10 -4
- data/vendor/datasketches-cpp/theta/include/theta_helpers.hpp +1 -1
- data/vendor/datasketches-cpp/theta/include/theta_intersection.hpp +13 -5
- data/vendor/datasketches-cpp/theta/include/theta_intersection_base_impl.hpp +5 -5
- data/vendor/datasketches-cpp/theta/include/theta_intersection_impl.hpp +3 -3
- data/vendor/datasketches-cpp/theta/include/theta_jaccard_similarity.hpp +2 -1
- data/vendor/datasketches-cpp/theta/include/theta_jaccard_similarity_base.hpp +1 -0
- data/vendor/datasketches-cpp/theta/include/theta_set_difference_base_impl.hpp +1 -1
- data/vendor/datasketches-cpp/theta/include/theta_sketch.hpp +126 -27
- data/vendor/datasketches-cpp/theta/include/theta_sketch_impl.hpp +8 -8
- data/vendor/datasketches-cpp/theta/include/theta_union.hpp +17 -10
- data/vendor/datasketches-cpp/theta/include/theta_union_base_impl.hpp +1 -1
- data/vendor/datasketches-cpp/theta/include/theta_union_impl.hpp +3 -3
- data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base.hpp +5 -2
- data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base_impl.hpp +11 -1
- data/vendor/datasketches-cpp/theta/test/CMakeLists.txt +14 -1
- data/vendor/datasketches-cpp/theta/test/theta_sketch_deserialize_from_java_test.cpp +57 -0
- data/vendor/datasketches-cpp/theta/test/theta_sketch_serialize_for_java.cpp +61 -0
- data/vendor/datasketches-cpp/theta/test/theta_sketch_test.cpp +0 -188
- data/vendor/datasketches-cpp/tuple/CMakeLists.txt +8 -7
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_sketch.hpp +19 -144
- data/vendor/datasketches-cpp/tuple/include/{array_of_doubles_a_not_b.hpp → array_tuple_a_not_b.hpp} +24 -16
- data/vendor/datasketches-cpp/tuple/include/{array_of_doubles_a_not_b_impl.hpp → array_tuple_a_not_b_impl.hpp} +4 -4
- data/vendor/datasketches-cpp/tuple/include/array_tuple_intersection.hpp +65 -0
- data/vendor/datasketches-cpp/{python/include/py_object_ostream.hpp → tuple/include/array_tuple_intersection_impl.hpp} +7 -24
- data/vendor/datasketches-cpp/tuple/include/array_tuple_sketch.hpp +237 -0
- data/vendor/datasketches-cpp/tuple/include/{array_of_doubles_sketch_impl.hpp → array_tuple_sketch_impl.hpp} +40 -41
- data/vendor/datasketches-cpp/tuple/include/array_tuple_union.hpp +81 -0
- data/vendor/datasketches-cpp/tuple/include/array_tuple_union_impl.hpp +43 -0
- data/vendor/datasketches-cpp/tuple/include/tuple_a_not_b.hpp +11 -2
- data/vendor/datasketches-cpp/tuple/include/tuple_intersection.hpp +17 -10
- data/vendor/datasketches-cpp/tuple/include/tuple_jaccard_similarity.hpp +2 -1
- data/vendor/datasketches-cpp/tuple/include/tuple_sketch.hpp +95 -32
- data/vendor/datasketches-cpp/tuple/include/tuple_union.hpp +19 -11
- data/vendor/datasketches-cpp/tuple/test/CMakeLists.txt +16 -1
- data/vendor/datasketches-cpp/tuple/test/aod_sketch_deserialize_from_java_test.cpp +76 -0
- data/vendor/datasketches-cpp/tuple/test/aod_sketch_serialize_for_java.cpp +62 -0
- data/vendor/datasketches-cpp/tuple/test/array_of_doubles_sketch_test.cpp +5 -129
- data/vendor/datasketches-cpp/tuple/test/engagement_test.cpp +85 -89
- data/vendor/datasketches-cpp/tuple/test/tuple_jaccard_similarity_test.cpp +3 -1
- data/vendor/datasketches-cpp/tuple/test/tuple_sketch_deserialize_from_java_test.cpp +47 -0
- data/vendor/datasketches-cpp/tuple/test/tuple_sketch_serialize_for_java.cpp +38 -0
- data/vendor/datasketches-cpp/tuple/test/tuple_sketch_test.cpp +1 -1
- data/vendor/datasketches-cpp/version.cfg.in +1 -1
- metadata +47 -93
- data/vendor/datasketches-cpp/MANIFEST.in +0 -39
- data/vendor/datasketches-cpp/fi/test/items_sketch_string_from_java.sk +0 -0
- data/vendor/datasketches-cpp/fi/test/items_sketch_string_utf8_from_java.sk +0 -0
- data/vendor/datasketches-cpp/fi/test/longs_sketch_from_java.sk +0 -0
- data/vendor/datasketches-cpp/hll/test/array6_from_java.sk +0 -0
- data/vendor/datasketches-cpp/hll/test/compact_array4_from_java.sk +0 -0
- data/vendor/datasketches-cpp/hll/test/compact_set_from_java.sk +0 -0
- data/vendor/datasketches-cpp/hll/test/list_from_java.sk +0 -0
- data/vendor/datasketches-cpp/hll/test/updatable_array4_from_java.sk +0 -0
- data/vendor/datasketches-cpp/hll/test/updatable_set_from_java.sk +0 -0
- data/vendor/datasketches-cpp/kll/test/kll_sketch_from_java.sk +0 -0
- data/vendor/datasketches-cpp/pyproject.toml +0 -23
- data/vendor/datasketches-cpp/python/CMakeLists.txt +0 -87
- data/vendor/datasketches-cpp/python/README.md +0 -85
- data/vendor/datasketches-cpp/python/datasketches/DensityWrapper.py +0 -87
- data/vendor/datasketches-cpp/python/datasketches/KernelFunction.py +0 -35
- data/vendor/datasketches-cpp/python/datasketches/PySerDe.py +0 -110
- data/vendor/datasketches-cpp/python/datasketches/TuplePolicy.py +0 -77
- data/vendor/datasketches-cpp/python/datasketches/TupleWrapper.py +0 -205
- data/vendor/datasketches-cpp/python/datasketches/__init__.py +0 -38
- data/vendor/datasketches-cpp/python/include/kernel_function.hpp +0 -98
- data/vendor/datasketches-cpp/python/include/py_serde.hpp +0 -113
- data/vendor/datasketches-cpp/python/include/quantile_conditional.hpp +0 -104
- data/vendor/datasketches-cpp/python/include/tuple_policy.hpp +0 -136
- data/vendor/datasketches-cpp/python/jupyter/CPCSketch.ipynb +0 -345
- data/vendor/datasketches-cpp/python/jupyter/FrequentItemsSketch.ipynb +0 -354
- data/vendor/datasketches-cpp/python/jupyter/HLLSketch.ipynb +0 -346
- data/vendor/datasketches-cpp/python/jupyter/KLLSketch.ipynb +0 -463
- data/vendor/datasketches-cpp/python/jupyter/ThetaSketchNotebook.ipynb +0 -403
- data/vendor/datasketches-cpp/python/pybind11Path.cmd +0 -21
- data/vendor/datasketches-cpp/python/src/__init__.py +0 -18
- data/vendor/datasketches-cpp/python/src/count_wrapper.cpp +0 -101
- data/vendor/datasketches-cpp/python/src/cpc_wrapper.cpp +0 -76
- data/vendor/datasketches-cpp/python/src/datasketches.cpp +0 -58
- data/vendor/datasketches-cpp/python/src/density_wrapper.cpp +0 -95
- data/vendor/datasketches-cpp/python/src/fi_wrapper.cpp +0 -182
- data/vendor/datasketches-cpp/python/src/hll_wrapper.cpp +0 -126
- data/vendor/datasketches-cpp/python/src/kll_wrapper.cpp +0 -158
- data/vendor/datasketches-cpp/python/src/ks_wrapper.cpp +0 -68
- data/vendor/datasketches-cpp/python/src/py_serde.cpp +0 -112
- data/vendor/datasketches-cpp/python/src/quantiles_wrapper.cpp +0 -155
- data/vendor/datasketches-cpp/python/src/req_wrapper.cpp +0 -154
- data/vendor/datasketches-cpp/python/src/theta_wrapper.cpp +0 -166
- data/vendor/datasketches-cpp/python/src/tuple_wrapper.cpp +0 -215
- data/vendor/datasketches-cpp/python/src/vector_of_kll.cpp +0 -490
- data/vendor/datasketches-cpp/python/src/vo_wrapper.cpp +0 -173
- data/vendor/datasketches-cpp/python/tests/__init__.py +0 -16
- data/vendor/datasketches-cpp/python/tests/count_min_test.py +0 -86
- data/vendor/datasketches-cpp/python/tests/cpc_test.py +0 -64
- data/vendor/datasketches-cpp/python/tests/density_test.py +0 -93
- data/vendor/datasketches-cpp/python/tests/fi_test.py +0 -149
- data/vendor/datasketches-cpp/python/tests/hll_test.py +0 -129
- data/vendor/datasketches-cpp/python/tests/kll_test.py +0 -159
- data/vendor/datasketches-cpp/python/tests/quantiles_test.py +0 -160
- data/vendor/datasketches-cpp/python/tests/req_test.py +0 -159
- data/vendor/datasketches-cpp/python/tests/theta_test.py +0 -148
- data/vendor/datasketches-cpp/python/tests/tuple_test.py +0 -206
- data/vendor/datasketches-cpp/python/tests/vector_of_kll_test.py +0 -148
- data/vendor/datasketches-cpp/python/tests/vo_test.py +0 -132
- data/vendor/datasketches-cpp/req/test/req_float_empty_from_java.sk +0 -0
- data/vendor/datasketches-cpp/req/test/req_float_estimation_from_java.sk +0 -0
- data/vendor/datasketches-cpp/req/test/req_float_exact_from_java.sk +0 -0
- data/vendor/datasketches-cpp/req/test/req_float_raw_items_from_java.sk +0 -0
- data/vendor/datasketches-cpp/req/test/req_float_single_item_from_java.sk +0 -0
- data/vendor/datasketches-cpp/sampling/test/binaries_from_java.txt +0 -67
- data/vendor/datasketches-cpp/sampling/test/varopt_sketch_long_sampling.sk +0 -0
- data/vendor/datasketches-cpp/sampling/test/varopt_sketch_string_exact.sk +0 -0
- data/vendor/datasketches-cpp/sampling/test/varopt_union_double_sampling.sk +0 -0
- data/vendor/datasketches-cpp/setup.py +0 -110
- data/vendor/datasketches-cpp/theta/test/theta_compact_empty_from_java.sk +0 -0
- data/vendor/datasketches-cpp/theta/test/theta_compact_estimation_from_java.sk +0 -0
- data/vendor/datasketches-cpp/theta/test/theta_compact_exact_from_java.sk +0 -0
- data/vendor/datasketches-cpp/theta/test/theta_compact_single_item_from_java.sk +0 -0
- data/vendor/datasketches-cpp/tox.ini +0 -26
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_intersection.hpp +0 -52
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_union.hpp +0 -81
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_union_impl.hpp +0 -43
- data/vendor/datasketches-cpp/tuple/test/aod_1_compact_empty_from_java.sk +0 -1
- data/vendor/datasketches-cpp/tuple/test/aod_1_compact_estimation_from_java.sk +0 -0
- data/vendor/datasketches-cpp/tuple/test/aod_1_compact_non_empty_no_entries_from_java.sk +0 -0
- data/vendor/datasketches-cpp/tuple/test/aod_2_compact_exact_from_java.sk +0 -0
- data/vendor/datasketches-cpp/tuple/test/aod_3_compact_empty_from_java.sk +0 -1
@@ -27,7 +27,7 @@
|
|
27
27
|
|
28
28
|
namespace datasketches {
|
29
29
|
|
30
|
-
// forward
|
30
|
+
// forward declarations
|
31
31
|
template<typename S, typename A> class tuple_sketch;
|
32
32
|
template<typename S, typename U, typename P, typename A> class update_tuple_sketch;
|
33
33
|
template<typename S, typename A> class compact_tuple_sketch;
|
@@ -43,6 +43,10 @@ struct pair_extract_key {
|
|
43
43
|
}
|
44
44
|
};
|
45
45
|
|
46
|
+
/**
|
47
|
+
* Base class for Tuple sketch.
|
48
|
+
* This is an extension of Theta sketch that allows keeping arbitrary Summary associated with each retained key.
|
49
|
+
*/
|
46
50
|
template<
|
47
51
|
typename Summary,
|
48
52
|
typename Allocator = std::allocator<Summary>
|
@@ -190,7 +194,7 @@ protected:
|
|
190
194
|
|
191
195
|
// for types with defined default constructor and + operation
|
192
196
|
template<typename Summary, typename Update>
|
193
|
-
struct
|
197
|
+
struct default_tuple_update_policy {
|
194
198
|
Summary create() const {
|
195
199
|
return Summary();
|
196
200
|
}
|
@@ -199,10 +203,15 @@ struct default_update_policy {
|
|
199
203
|
}
|
200
204
|
};
|
201
205
|
|
206
|
+
/**
|
207
|
+
* Update Tuple sketch.
|
208
|
+
* The purpose of this class is to build a Tuple sketch from input data via the update() methods.
|
209
|
+
* There is no constructor. Use builder instead.
|
210
|
+
*/
|
202
211
|
template<
|
203
212
|
typename Summary,
|
204
213
|
typename Update = Summary,
|
205
|
-
typename Policy =
|
214
|
+
typename Policy = default_tuple_update_policy<Summary, Update>,
|
206
215
|
typename Allocator = std::allocator<Summary>
|
207
216
|
>
|
208
217
|
class update_tuple_sketch: public tuple_sketch<Summary, Allocator> {
|
@@ -244,21 +253,24 @@ public:
|
|
244
253
|
|
245
254
|
/**
|
246
255
|
* Update this sketch with a given string.
|
247
|
-
* @param
|
256
|
+
* @param key string to update the sketch with
|
257
|
+
* @param value to update the sketch with
|
248
258
|
*/
|
249
259
|
template<typename FwdUpdate>
|
250
260
|
inline void update(const std::string& key, FwdUpdate&& value);
|
251
261
|
|
252
262
|
/**
|
253
263
|
* Update this sketch with a given unsigned 64-bit integer.
|
254
|
-
* @param
|
264
|
+
* @param key uint64_t to update the sketch with
|
265
|
+
* @param value to update the sketch with
|
255
266
|
*/
|
256
267
|
template<typename FwdUpdate>
|
257
268
|
inline void update(uint64_t key, FwdUpdate&& value);
|
258
269
|
|
259
270
|
/**
|
260
271
|
* Update this sketch with a given signed 64-bit integer.
|
261
|
-
* @param
|
272
|
+
* @param key int64_t to update the sketch with
|
273
|
+
* @param value to update the sketch with
|
262
274
|
*/
|
263
275
|
template<typename FwdUpdate>
|
264
276
|
inline void update(int64_t key, FwdUpdate&& value);
|
@@ -266,7 +278,8 @@ public:
|
|
266
278
|
/**
|
267
279
|
* Update this sketch with a given unsigned 32-bit integer.
|
268
280
|
* For compatibility with Java implementation.
|
269
|
-
* @param
|
281
|
+
* @param key uint32_t to update the sketch with
|
282
|
+
* @param value to update the sketch with
|
270
283
|
*/
|
271
284
|
template<typename FwdUpdate>
|
272
285
|
inline void update(uint32_t key, FwdUpdate&& value);
|
@@ -274,7 +287,8 @@ public:
|
|
274
287
|
/**
|
275
288
|
* Update this sketch with a given signed 32-bit integer.
|
276
289
|
* For compatibility with Java implementation.
|
277
|
-
* @param
|
290
|
+
* @param key int32_t to update the sketch with
|
291
|
+
* @param value to update the sketch with
|
278
292
|
*/
|
279
293
|
template<typename FwdUpdate>
|
280
294
|
inline void update(int32_t key, FwdUpdate&& value);
|
@@ -282,7 +296,8 @@ public:
|
|
282
296
|
/**
|
283
297
|
* Update this sketch with a given unsigned 16-bit integer.
|
284
298
|
* For compatibility with Java implementation.
|
285
|
-
* @param
|
299
|
+
* @param key uint16_t to update the sketch with
|
300
|
+
* @param value to update the sketch with
|
286
301
|
*/
|
287
302
|
template<typename FwdUpdate>
|
288
303
|
inline void update(uint16_t key, FwdUpdate&& value);
|
@@ -290,7 +305,8 @@ public:
|
|
290
305
|
/**
|
291
306
|
* Update this sketch with a given signed 16-bit integer.
|
292
307
|
* For compatibility with Java implementation.
|
293
|
-
* @param
|
308
|
+
* @param key int16_t to update the sketch with
|
309
|
+
* @param value to update the sketch with
|
294
310
|
*/
|
295
311
|
template<typename FwdUpdate>
|
296
312
|
inline void update(int16_t key, FwdUpdate&& value);
|
@@ -298,7 +314,8 @@ public:
|
|
298
314
|
/**
|
299
315
|
* Update this sketch with a given unsigned 8-bit integer.
|
300
316
|
* For compatibility with Java implementation.
|
301
|
-
* @param
|
317
|
+
* @param key uint8_t to update the sketch with
|
318
|
+
* @param value to update the sketch with
|
302
319
|
*/
|
303
320
|
template<typename FwdUpdate>
|
304
321
|
inline void update(uint8_t key, FwdUpdate&& value);
|
@@ -306,7 +323,8 @@ public:
|
|
306
323
|
/**
|
307
324
|
* Update this sketch with a given signed 8-bit integer.
|
308
325
|
* For compatibility with Java implementation.
|
309
|
-
* @param
|
326
|
+
* @param key int8_t to update the sketch with
|
327
|
+
* @param value to update the sketch with
|
310
328
|
*/
|
311
329
|
template<typename FwdUpdate>
|
312
330
|
inline void update(int8_t key, FwdUpdate&& value);
|
@@ -314,7 +332,8 @@ public:
|
|
314
332
|
/**
|
315
333
|
* Update this sketch with a given double-precision floating point value.
|
316
334
|
* For compatibility with Java implementation.
|
317
|
-
* @param
|
335
|
+
* @param key double to update the sketch with
|
336
|
+
* @param value to update the sketch with
|
318
337
|
*/
|
319
338
|
template<typename FwdUpdate>
|
320
339
|
inline void update(double key, FwdUpdate&& value);
|
@@ -322,7 +341,8 @@ public:
|
|
322
341
|
/**
|
323
342
|
* Update this sketch with a given floating point value.
|
324
343
|
* For compatibility with Java implementation.
|
325
|
-
* @param
|
344
|
+
* @param key float to update the sketch with
|
345
|
+
* @param value to update the sketch with
|
326
346
|
*/
|
327
347
|
template<typename FwdUpdate>
|
328
348
|
inline void update(float key, FwdUpdate&& value);
|
@@ -337,8 +357,9 @@ public:
|
|
337
357
|
* Otherwise two sketches that should represent overlapping sets will be disjoint
|
338
358
|
* For instance, for signed 32-bit values call update(int32_t) method above,
|
339
359
|
* which does widening conversion to int64_t, if compatibility with Java is expected
|
340
|
-
* @param
|
360
|
+
* @param key pointer to the data
|
341
361
|
* @param length of the data in bytes
|
362
|
+
* @param value to update the sketch with
|
342
363
|
*/
|
343
364
|
template<typename FwdUpdate>
|
344
365
|
void update(const void* key, size_t length, FwdUpdate&& value);
|
@@ -355,7 +376,7 @@ public:
|
|
355
376
|
|
356
377
|
/**
|
357
378
|
* Converts this sketch to a compact sketch (ordered or unordered).
|
358
|
-
* @param ordered optional flag to specify if ordered sketch should be produced
|
379
|
+
* @param ordered optional flag to specify if an ordered sketch should be produced
|
359
380
|
* @return compact sketch
|
360
381
|
*/
|
361
382
|
compact_tuple_sketch<Summary, Allocator> compact(bool ordered = true) const;
|
@@ -375,8 +396,10 @@ protected:
|
|
375
396
|
virtual void print_specifics(std::ostringstream& os) const;
|
376
397
|
};
|
377
398
|
|
378
|
-
|
379
|
-
|
399
|
+
/**
|
400
|
+
* Compact Tuple sketch.
|
401
|
+
* This is an immutable form of the Tuple sketch, the form that can be serialized and deserialized.
|
402
|
+
*/
|
380
403
|
template<
|
381
404
|
typename Summary,
|
382
405
|
typename Allocator = std::allocator<Summary>
|
@@ -406,13 +429,48 @@ public:
|
|
406
429
|
// - as a result of a set operation
|
407
430
|
// - by deserializing a previously serialized compact sketch
|
408
431
|
|
432
|
+
/**
|
433
|
+
* Copy constructor.
|
434
|
+
* Constructs a compact sketch from another sketch (either update or compact)
|
435
|
+
* @param other sketch to be copied
|
436
|
+
* @param ordered if true make the resulting sketch ordered
|
437
|
+
*/
|
409
438
|
compact_tuple_sketch(const Base& other, bool ordered);
|
410
|
-
|
439
|
+
|
440
|
+
/**
|
441
|
+
* Copy constructor.
|
442
|
+
* @param other sketch to be copied
|
443
|
+
*/
|
444
|
+
compact_tuple_sketch(const compact_tuple_sketch& other) = default;
|
445
|
+
|
446
|
+
/**
|
447
|
+
* Move constructor.
|
448
|
+
* @param other sketch to be moved
|
449
|
+
*/
|
411
450
|
compact_tuple_sketch(compact_tuple_sketch&&) noexcept;
|
451
|
+
|
412
452
|
virtual ~compact_tuple_sketch() = default;
|
413
|
-
compact_tuple_sketch& operator=(const compact_tuple_sketch&) = default;
|
414
|
-
compact_tuple_sketch& operator=(compact_tuple_sketch&&) = default;
|
415
453
|
|
454
|
+
/**
|
455
|
+
* Copy assignment
|
456
|
+
* @param other sketch to be copied
|
457
|
+
* @return reference to this sketch
|
458
|
+
*/
|
459
|
+
compact_tuple_sketch& operator=(const compact_tuple_sketch& other) = default;
|
460
|
+
|
461
|
+
/**
|
462
|
+
* Move assignment
|
463
|
+
* @param other sketch to be moved
|
464
|
+
* @return reference to this sketch
|
465
|
+
*/
|
466
|
+
compact_tuple_sketch& operator=(compact_tuple_sketch&& other) = default;
|
467
|
+
|
468
|
+
/**
|
469
|
+
* Constructor from Theta sketch
|
470
|
+
* @param other Theta sketch to be constructed from
|
471
|
+
* @param summary Summary instance to be associated with each entry
|
472
|
+
* @param ordered if true make the resulting sketch ordered
|
473
|
+
*/
|
416
474
|
compact_tuple_sketch(const theta_sketch_alloc<AllocU64>& other, const Summary& summary, bool ordered = true);
|
417
475
|
|
418
476
|
virtual Allocator get_allocator() const;
|
@@ -425,7 +483,7 @@ public:
|
|
425
483
|
/**
|
426
484
|
* This method serializes the sketch into a given stream in a binary form
|
427
485
|
* @param os output stream
|
428
|
-
* @param instance of a SerDe
|
486
|
+
* @param sd instance of a SerDe
|
429
487
|
*/
|
430
488
|
template<typename SerDe = serde<Summary>>
|
431
489
|
void serialize(std::ostream& os, const SerDe& sd = SerDe()) const;
|
@@ -436,7 +494,7 @@ public:
|
|
436
494
|
* It is a blank space of a given size.
|
437
495
|
* This header is used in Datasketches PostgreSQL extension.
|
438
496
|
* @param header_size_bytes space to reserve in front of the sketch
|
439
|
-
* @param instance of a SerDe
|
497
|
+
* @param sd instance of a SerDe
|
440
498
|
* @return serialized sketch as a vector of bytes
|
441
499
|
*/
|
442
500
|
template<typename SerDe = serde<Summary>>
|
@@ -451,8 +509,8 @@ public:
|
|
451
509
|
* This method deserializes a sketch from a given stream.
|
452
510
|
* @param is input stream
|
453
511
|
* @param seed the seed for the hash function that was used to create the sketch
|
454
|
-
* @param instance of a SerDe
|
455
|
-
* @param instance of an Allocator
|
512
|
+
* @param sd instance of a SerDe
|
513
|
+
* @param allocator instance of an Allocator
|
456
514
|
* @return an instance of a sketch
|
457
515
|
*/
|
458
516
|
template<typename SerDe = serde<Summary>>
|
@@ -464,17 +522,14 @@ public:
|
|
464
522
|
* @param bytes pointer to the array of bytes
|
465
523
|
* @param size the size of the array
|
466
524
|
* @param seed the seed for the hash function that was used to create the sketch
|
467
|
-
* @param instance of a SerDe
|
468
|
-
* @param instance of an Allocator
|
525
|
+
* @param sd instance of a SerDe
|
526
|
+
* @param allocator instance of an Allocator
|
469
527
|
* @return an instance of the sketch
|
470
528
|
*/
|
471
529
|
template<typename SerDe = serde<Summary>>
|
472
530
|
static compact_tuple_sketch deserialize(const void* bytes, size_t size, uint64_t seed = DEFAULT_SEED,
|
473
531
|
const SerDe& sd = SerDe(), const Allocator& allocator = Allocator());
|
474
532
|
|
475
|
-
// for internal use
|
476
|
-
compact_tuple_sketch(bool is_empty, bool is_ordered, uint16_t seed_hash, uint64_t theta, std::vector<Entry, AllocEntry>&& entries);
|
477
|
-
|
478
533
|
protected:
|
479
534
|
bool is_empty_;
|
480
535
|
bool is_ordered_;
|
@@ -520,10 +575,14 @@ protected:
|
|
520
575
|
|
521
576
|
virtual void print_specifics(std::ostringstream& os) const;
|
522
577
|
|
523
|
-
|
578
|
+
template<typename E, typename EK, typename P, typename S, typename CS, typename A> friend class theta_union_base;
|
579
|
+
template<typename E, typename EK, typename P, typename S, typename CS, typename A> friend class theta_intersection_base;
|
580
|
+
template<typename E, typename EK, typename CS, typename A> friend class theta_set_difference_base;
|
581
|
+
compact_tuple_sketch(bool is_empty, bool is_ordered, uint16_t seed_hash, uint64_t theta, std::vector<Entry, AllocEntry>&& entries);
|
524
582
|
|
525
|
-
|
583
|
+
};
|
526
584
|
|
585
|
+
/// Tuple base builder
|
527
586
|
template<typename Derived, typename Policy, typename Allocator>
|
528
587
|
class tuple_base_builder: public theta_base_builder<Derived, Allocator> {
|
529
588
|
public:
|
@@ -533,11 +592,15 @@ protected:
|
|
533
592
|
Policy policy_;
|
534
593
|
};
|
535
594
|
|
595
|
+
/// Update Tuple sketch builder
|
536
596
|
template<typename S, typename U, typename P, typename A>
|
537
597
|
class update_tuple_sketch<S, U, P, A>::builder: public tuple_base_builder<builder, P, A> {
|
538
598
|
public:
|
539
599
|
/**
|
600
|
+
* Constructor
|
540
601
|
* Creates and instance of the builder with default parameters.
|
602
|
+
* @param policy user-defined way of creating and updating Summary
|
603
|
+
* @param allocator instance of an Allocator to pass to created sketches
|
541
604
|
*/
|
542
605
|
builder(const P& policy = P(), const A& allocator = A());
|
543
606
|
|
@@ -27,15 +27,19 @@ namespace datasketches {
|
|
27
27
|
|
28
28
|
// for types with defined + operation
|
29
29
|
template<typename Summary>
|
30
|
-
struct
|
30
|
+
struct default_tuple_union_policy {
|
31
31
|
void operator()(Summary& summary, const Summary& other) const {
|
32
32
|
summary += other;
|
33
33
|
}
|
34
34
|
};
|
35
35
|
|
36
|
+
/**
|
37
|
+
* Tuple Union.
|
38
|
+
* Computes union of Tuple sketches. There is no constructor. Use builder instead.
|
39
|
+
*/
|
36
40
|
template<
|
37
41
|
typename Summary,
|
38
|
-
typename Policy =
|
42
|
+
typename Policy = default_tuple_union_policy<Summary>,
|
39
43
|
typename Allocator = std::allocator<Summary>
|
40
44
|
>
|
41
45
|
class tuple_union {
|
@@ -50,15 +54,15 @@ public:
|
|
50
54
|
// reformulate the external policy that operates on Summary
|
51
55
|
// in terms of operations on Entry
|
52
56
|
struct internal_policy {
|
53
|
-
internal_policy(const Policy&
|
57
|
+
internal_policy(const Policy& external_policy): external_policy_(external_policy) {}
|
54
58
|
void operator()(Entry& internal_entry, const Entry& incoming_entry) const {
|
55
|
-
|
59
|
+
external_policy_(internal_entry.second, incoming_entry.second);
|
56
60
|
}
|
57
61
|
void operator()(Entry& internal_entry, Entry&& incoming_entry) const {
|
58
|
-
|
62
|
+
external_policy_(internal_entry.second, std::move(incoming_entry.second));
|
59
63
|
}
|
60
|
-
const Policy&
|
61
|
-
Policy
|
64
|
+
const Policy& get_external_policy() const { return external_policy_; }
|
65
|
+
Policy external_policy_;
|
62
66
|
};
|
63
67
|
|
64
68
|
using State = theta_union_base<Entry, ExtractKey, internal_policy, Sketch, CompactSketch, AllocEntry>;
|
@@ -67,15 +71,15 @@ public:
|
|
67
71
|
class builder;
|
68
72
|
|
69
73
|
/**
|
70
|
-
*
|
74
|
+
* Update the union with a given sketch
|
71
75
|
* @param sketch to update the union with
|
72
76
|
*/
|
73
77
|
template<typename FwdSketch>
|
74
78
|
void update(FwdSketch&& sketch);
|
75
79
|
|
76
80
|
/**
|
77
|
-
*
|
78
|
-
* @param ordered optional flag to specify if ordered sketch should be produced
|
81
|
+
* Produces a copy of the current state of the union as a compact sketch.
|
82
|
+
* @param ordered optional flag to specify if an ordered sketch should be produced
|
79
83
|
* @return the result of the union
|
80
84
|
*/
|
81
85
|
CompactSketch get_result(bool ordered = true) const;
|
@@ -92,16 +96,20 @@ protected:
|
|
92
96
|
tuple_union(uint8_t lg_cur_size, uint8_t lg_nom_size, resize_factor rf, float p, uint64_t theta, uint64_t seed, const Policy& policy, const Allocator& allocator);
|
93
97
|
};
|
94
98
|
|
99
|
+
/// Tuple union builder
|
95
100
|
template<typename S, typename P, typename A>
|
96
101
|
class tuple_union<S, P, A>::builder: public tuple_base_builder<builder, P, A> {
|
97
102
|
public:
|
98
103
|
/**
|
104
|
+
* Constructor.
|
99
105
|
* Creates and instance of the builder with default parameters.
|
106
|
+
* @param policy
|
107
|
+
* @param allocator
|
100
108
|
*/
|
101
109
|
builder(const P& policy = P(), const A& allocator = A());
|
102
110
|
|
103
111
|
/**
|
104
|
-
*
|
112
|
+
* Create an instance of the union with predefined parameters.
|
105
113
|
* @return an instance of the union
|
106
114
|
*/
|
107
115
|
tuple_union build() const;
|
@@ -20,7 +20,6 @@ add_executable(tuple_test)
|
|
20
20
|
target_link_libraries(tuple_test tuple common_test_lib)
|
21
21
|
|
22
22
|
set_target_properties(tuple_test PROPERTIES
|
23
|
-
CXX_STANDARD 11
|
24
23
|
CXX_STANDARD_REQUIRED YES
|
25
24
|
)
|
26
25
|
|
@@ -47,3 +46,19 @@ target_sources(tuple_test
|
|
47
46
|
array_of_doubles_sketch_test.cpp
|
48
47
|
engagement_test.cpp
|
49
48
|
)
|
49
|
+
|
50
|
+
if (SERDE_COMPAT)
|
51
|
+
target_sources(tuple_test
|
52
|
+
PRIVATE
|
53
|
+
aod_sketch_deserialize_from_java_test.cpp
|
54
|
+
tuple_sketch_deserialize_from_java_test.cpp
|
55
|
+
)
|
56
|
+
endif()
|
57
|
+
|
58
|
+
if (GENERATE)
|
59
|
+
target_sources(tuple_test
|
60
|
+
PRIVATE
|
61
|
+
aod_sketch_serialize_for_java.cpp
|
62
|
+
tuple_sketch_serialize_for_java.cpp
|
63
|
+
)
|
64
|
+
endif()
|
@@ -0,0 +1,76 @@
|
|
1
|
+
/*
|
2
|
+
* Licensed to the Apache Software Foundation (ASF) under one
|
3
|
+
* or more contributor license agreements. See the NOTICE file
|
4
|
+
* distributed with this work for additional information
|
5
|
+
* regarding copyright ownership. The ASF licenses this file
|
6
|
+
* to you under the Apache License, Version 2.0 (the
|
7
|
+
* "License"); you may not use this file except in compliance
|
8
|
+
* with the License. You may obtain a copy of the License at
|
9
|
+
*
|
10
|
+
* http://www.apache.org/licenses/LICENSE-2.0
|
11
|
+
*
|
12
|
+
* Unless required by applicable law or agreed to in writing,
|
13
|
+
* software distributed under the License is distributed on an
|
14
|
+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
15
|
+
* KIND, either express or implied. See the License for the
|
16
|
+
* specific language governing permissions and limitations
|
17
|
+
* under the License.
|
18
|
+
*/
|
19
|
+
|
20
|
+
#include <catch2/catch.hpp>
|
21
|
+
#include <fstream>
|
22
|
+
|
23
|
+
#include "array_of_doubles_sketch.hpp"
|
24
|
+
|
25
|
+
namespace datasketches {
|
26
|
+
|
27
|
+
// assume the binary sketches for this test have been generated by datasketches-java code
|
28
|
+
// in the subdirectory called "java" in the root directory of this project
|
29
|
+
static std::string testBinaryInputPath = std::string(TEST_BINARY_INPUT_PATH) + "../../java/";
|
30
|
+
|
31
|
+
TEST_CASE("aod sketch one value", "[serde_compat]") {
|
32
|
+
const unsigned n_arr[] = {0, 1, 10, 100, 1000, 10000, 100000, 1000000};
|
33
|
+
for (const unsigned n: n_arr) {
|
34
|
+
std::ifstream is;
|
35
|
+
is.exceptions(std::ios::failbit | std::ios::badbit);
|
36
|
+
is.open(testBinaryInputPath + "aod_1_n" + std::to_string(n) + "_java.sk", std::ios::binary);
|
37
|
+
const auto sketch = compact_array_of_doubles_sketch::deserialize(is);
|
38
|
+
REQUIRE(sketch.is_empty() == (n == 0));
|
39
|
+
REQUIRE(sketch.is_estimation_mode() == (n > 1000));
|
40
|
+
REQUIRE(sketch.get_estimate() == Approx(n).margin(n * 0.03));
|
41
|
+
REQUIRE(sketch.get_num_values() == 1);
|
42
|
+
for (const auto& entry: sketch) {
|
43
|
+
REQUIRE(entry.first < sketch.get_theta64());
|
44
|
+
}
|
45
|
+
}
|
46
|
+
}
|
47
|
+
|
48
|
+
TEST_CASE("aod sketch three values", "[serde_compat]") {
|
49
|
+
const unsigned n_arr[] = {0, 1, 10, 100, 1000, 10000, 100000, 1000000};
|
50
|
+
for (const unsigned n: n_arr) {
|
51
|
+
std::ifstream is;
|
52
|
+
is.exceptions(std::ios::failbit | std::ios::badbit);
|
53
|
+
is.open(testBinaryInputPath + "aod_3_n" + std::to_string(n) + "_java.sk", std::ios::binary);
|
54
|
+
const auto sketch = compact_array_of_doubles_sketch::deserialize(is);
|
55
|
+
REQUIRE(sketch.is_empty() == (n == 0));
|
56
|
+
REQUIRE(sketch.is_estimation_mode() == (n > 1000));
|
57
|
+
REQUIRE(sketch.get_estimate() == Approx(n).margin(n * 0.03));
|
58
|
+
REQUIRE(sketch.get_num_values() == 3);
|
59
|
+
for (const auto& entry: sketch) {
|
60
|
+
REQUIRE(entry.first < sketch.get_theta64());
|
61
|
+
REQUIRE(entry.second[0] == entry.second[1]);
|
62
|
+
REQUIRE(entry.second[0] == entry.second[2]);
|
63
|
+
}
|
64
|
+
}
|
65
|
+
}
|
66
|
+
|
67
|
+
TEST_CASE("aod sketch non-empty no entries", "[serde_compat]") {
|
68
|
+
std::ifstream is;
|
69
|
+
is.exceptions(std::ios::failbit | std::ios::badbit);
|
70
|
+
is.open(testBinaryInputPath + "aod_1_non_empty_no_entries_java.sk", std::ios::binary);
|
71
|
+
const auto sketch = compact_array_of_doubles_sketch::deserialize(is);
|
72
|
+
REQUIRE_FALSE(sketch.is_empty());
|
73
|
+
REQUIRE(sketch.get_num_retained() == 0);
|
74
|
+
}
|
75
|
+
|
76
|
+
} /* namespace datasketches */
|
@@ -0,0 +1,62 @@
|
|
1
|
+
/*
|
2
|
+
* Licensed to the Apache Software Foundation (ASF) under one
|
3
|
+
* or more contributor license agreements. See the NOTICE file
|
4
|
+
* distributed with this work for additional information
|
5
|
+
* regarding copyright ownership. The ASF licenses this file
|
6
|
+
* to you under the Apache License, Version 2.0 (the
|
7
|
+
* "License"); you may not use this file except in compliance
|
8
|
+
* with the License. You may obtain a copy of the License at
|
9
|
+
*
|
10
|
+
* http://www.apache.org/licenses/LICENSE-2.0
|
11
|
+
*
|
12
|
+
* Unless required by applicable law or agreed to in writing,
|
13
|
+
* software distributed under the License is distributed on an
|
14
|
+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
15
|
+
* KIND, either express or implied. See the License for the
|
16
|
+
* specific language governing permissions and limitations
|
17
|
+
* under the License.
|
18
|
+
*/
|
19
|
+
|
20
|
+
#include <catch2/catch.hpp>
|
21
|
+
#include <fstream>
|
22
|
+
|
23
|
+
#include "array_of_doubles_sketch.hpp"
|
24
|
+
|
25
|
+
namespace datasketches {
|
26
|
+
|
27
|
+
TEST_CASE("aod sketch generate one value", "[serialize_for_java]") {
|
28
|
+
const unsigned n_arr[] = {0, 1, 10, 100, 1000, 10000, 100000, 1000000};
|
29
|
+
for (const unsigned n: n_arr) {
|
30
|
+
auto sketch = update_array_of_doubles_sketch::builder().build();
|
31
|
+
for (unsigned i = 0; i < n; ++i) sketch.update(i, std::vector<double>(1, i));
|
32
|
+
REQUIRE(sketch.is_empty() == (n == 0));
|
33
|
+
REQUIRE(sketch.get_estimate() == Approx(n).margin(n * 0.03));
|
34
|
+
std::ofstream os("aod_1_n" + std::to_string(n) + "_cpp.sk", std::ios::binary);
|
35
|
+
sketch.compact().serialize(os);
|
36
|
+
}
|
37
|
+
}
|
38
|
+
|
39
|
+
TEST_CASE("aod sketch generate three values", "[serialize_for_java]") {
|
40
|
+
const unsigned n_arr[] = {0, 1, 10, 100, 1000, 10000, 100000, 1000000};
|
41
|
+
for (const unsigned n: n_arr) {
|
42
|
+
auto sketch = update_array_of_doubles_sketch::builder(3).build();
|
43
|
+
for (unsigned i = 0; i < n; ++i) sketch.update(i, std::vector<double>(3, i));
|
44
|
+
REQUIRE(sketch.is_empty() == (n == 0));
|
45
|
+
REQUIRE(sketch.get_estimate() == Approx(n).margin(n * 0.03));
|
46
|
+
std::ofstream os("aod_3_n" + std::to_string(n) + "_cpp.sk", std::ios::binary);
|
47
|
+
sketch.compact().serialize(os);
|
48
|
+
}
|
49
|
+
}
|
50
|
+
|
51
|
+
TEST_CASE("aod sketch generate non-empty no entries", "[serialize_for_java]") {
|
52
|
+
auto sketch = update_array_of_doubles_sketch::builder().set_p(0.01).build();
|
53
|
+
// here we rely on the fact that hash of 1 happens to be greater than 0.01 (when normalized)
|
54
|
+
// and therefore gets rejected
|
55
|
+
sketch.update(1, std::vector<double>({1}));
|
56
|
+
REQUIRE_FALSE(sketch.is_empty());
|
57
|
+
REQUIRE(sketch.get_num_retained() == 0);
|
58
|
+
std::ofstream os("aod_1_non_empty_no_entries_cpp.sk", std::ios::binary);
|
59
|
+
sketch.compact().serialize(os);
|
60
|
+
}
|
61
|
+
|
62
|
+
} /* namespace datasketches */
|