datasketches 0.2.2 → 0.2.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +12 -0
- data/LICENSE +40 -3
- data/NOTICE +1 -1
- data/README.md +8 -8
- data/ext/datasketches/kll_wrapper.cpp +5 -1
- data/ext/datasketches/theta_wrapper.cpp +20 -4
- data/lib/datasketches/version.rb +1 -1
- data/vendor/datasketches-cpp/CMakeLists.txt +27 -5
- data/vendor/datasketches-cpp/LICENSE +40 -3
- data/vendor/datasketches-cpp/MANIFEST.in +3 -0
- data/vendor/datasketches-cpp/NOTICE +1 -1
- data/vendor/datasketches-cpp/README.md +76 -9
- data/vendor/datasketches-cpp/cmake/DataSketchesConfig.cmake.in +10 -0
- data/vendor/datasketches-cpp/common/CMakeLists.txt +18 -13
- data/vendor/datasketches-cpp/common/include/binomial_bounds.hpp +1 -0
- data/vendor/datasketches-cpp/common/include/common_defs.hpp +16 -0
- data/vendor/datasketches-cpp/{kll → common}/include/kolmogorov_smirnov.hpp +5 -3
- data/vendor/datasketches-cpp/{kll → common}/include/kolmogorov_smirnov_impl.hpp +13 -16
- data/vendor/datasketches-cpp/common/include/quantile_sketch_sorted_view.hpp +121 -0
- data/vendor/datasketches-cpp/common/include/quantile_sketch_sorted_view_impl.hpp +91 -0
- data/vendor/datasketches-cpp/common/test/test_type.hpp +2 -0
- data/vendor/datasketches-cpp/cpc/CMakeLists.txt +15 -35
- data/vendor/datasketches-cpp/cpc/include/cpc_common.hpp +10 -3
- data/vendor/datasketches-cpp/cpc/include/cpc_compressor_impl.hpp +1 -0
- data/vendor/datasketches-cpp/cpc/include/cpc_confidence.hpp +1 -0
- data/vendor/datasketches-cpp/cpc/include/cpc_sketch.hpp +1 -1
- data/vendor/datasketches-cpp/cpc/include/cpc_sketch_impl.hpp +5 -3
- data/vendor/datasketches-cpp/cpc/include/cpc_union.hpp +1 -1
- data/vendor/datasketches-cpp/cpc/include/cpc_union_impl.hpp +10 -6
- data/vendor/datasketches-cpp/cpc/test/cpc_sketch_allocation_test.cpp +17 -0
- data/vendor/datasketches-cpp/cpc/test/cpc_sketch_test.cpp +1 -0
- data/vendor/datasketches-cpp/cpc/test/cpc_union_test.cpp +2 -0
- data/vendor/datasketches-cpp/fi/CMakeLists.txt +5 -15
- data/vendor/datasketches-cpp/fi/include/frequent_items_sketch.hpp +37 -5
- data/vendor/datasketches-cpp/fi/include/frequent_items_sketch_impl.hpp +30 -12
- data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_custom_type_test.cpp +2 -1
- data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_test.cpp +1 -0
- data/vendor/datasketches-cpp/hll/CMakeLists.txt +33 -56
- data/vendor/datasketches-cpp/hll/include/AuxHashMap-internal.hpp +2 -0
- data/vendor/datasketches-cpp/hll/include/CompositeInterpolationXTable-internal.hpp +1 -0
- data/vendor/datasketches-cpp/hll/include/CouponHashSet-internal.hpp +2 -2
- data/vendor/datasketches-cpp/hll/include/CouponList-internal.hpp +1 -0
- data/vendor/datasketches-cpp/hll/include/HllSketch-internal.hpp +6 -4
- data/vendor/datasketches-cpp/hll/include/HllSketchImpl-internal.hpp +2 -0
- data/vendor/datasketches-cpp/hll/include/HllSketchImplFactory.hpp +2 -0
- data/vendor/datasketches-cpp/hll/test/AuxHashMapTest.cpp +1 -0
- data/vendor/datasketches-cpp/hll/test/CouponHashSetTest.cpp +1 -0
- data/vendor/datasketches-cpp/hll/test/CouponListTest.cpp +2 -0
- data/vendor/datasketches-cpp/hll/test/HllArrayTest.cpp +1 -0
- data/vendor/datasketches-cpp/hll/test/HllSketchTest.cpp +59 -0
- data/vendor/datasketches-cpp/hll/test/HllUnionTest.cpp +2 -0
- data/vendor/datasketches-cpp/hll/test/TablesTest.cpp +1 -0
- data/vendor/datasketches-cpp/kll/CMakeLists.txt +5 -19
- data/vendor/datasketches-cpp/kll/include/kll_helper.hpp +0 -4
- data/vendor/datasketches-cpp/kll/include/kll_helper_impl.hpp +3 -0
- data/vendor/datasketches-cpp/kll/include/kll_sketch.hpp +103 -44
- data/vendor/datasketches-cpp/kll/include/kll_sketch_impl.hpp +110 -130
- data/vendor/datasketches-cpp/kll/test/kll_sketch_test.cpp +156 -23
- data/vendor/datasketches-cpp/kll/test/kolmogorov_smirnov_test.cpp +1 -1
- data/vendor/datasketches-cpp/pyproject.toml +4 -2
- data/vendor/datasketches-cpp/python/CMakeLists.txt +17 -6
- data/vendor/datasketches-cpp/python/README.md +57 -50
- data/vendor/datasketches-cpp/python/pybind11Path.cmd +3 -0
- data/vendor/datasketches-cpp/python/src/cpc_wrapper.cpp +1 -1
- data/vendor/datasketches-cpp/python/src/datasketches.cpp +4 -0
- data/vendor/datasketches-cpp/python/src/fi_wrapper.cpp +6 -1
- data/vendor/datasketches-cpp/python/src/kll_wrapper.cpp +49 -14
- data/vendor/datasketches-cpp/python/src/ks_wrapper.cpp +68 -0
- data/vendor/datasketches-cpp/python/src/quantiles_wrapper.cpp +240 -0
- data/vendor/datasketches-cpp/python/src/req_wrapper.cpp +9 -2
- data/vendor/datasketches-cpp/python/src/theta_wrapper.cpp +2 -2
- data/vendor/datasketches-cpp/python/src/vector_of_kll.cpp +12 -5
- data/vendor/datasketches-cpp/python/tests/kll_test.py +12 -6
- data/vendor/datasketches-cpp/python/tests/quantiles_test.py +126 -0
- data/vendor/datasketches-cpp/python/tests/req_test.py +2 -2
- data/vendor/datasketches-cpp/python/tests/vector_of_kll_test.py +4 -4
- data/vendor/datasketches-cpp/quantiles/CMakeLists.txt +42 -0
- data/vendor/datasketches-cpp/quantiles/include/quantiles_sketch.hpp +641 -0
- data/vendor/datasketches-cpp/quantiles/include/quantiles_sketch_impl.hpp +1309 -0
- data/vendor/datasketches-cpp/quantiles/test/CMakeLists.txt +44 -0
- data/vendor/datasketches-cpp/quantiles/test/Qk128_n1000_v0.3.0.sk +0 -0
- data/vendor/datasketches-cpp/quantiles/test/Qk128_n1000_v0.6.0.sk +0 -0
- data/vendor/datasketches-cpp/quantiles/test/Qk128_n1000_v0.8.0.sk +0 -0
- data/vendor/datasketches-cpp/quantiles/test/Qk128_n1000_v0.8.3.sk +0 -0
- data/vendor/datasketches-cpp/quantiles/test/Qk128_n50_v0.3.0.sk +0 -0
- data/vendor/datasketches-cpp/quantiles/test/Qk128_n50_v0.6.0.sk +0 -0
- data/vendor/datasketches-cpp/quantiles/test/Qk128_n50_v0.8.0.sk +0 -0
- data/vendor/datasketches-cpp/quantiles/test/Qk128_n50_v0.8.3.sk +0 -0
- data/vendor/datasketches-cpp/quantiles/test/kolmogorov_smirnov_test.cpp +110 -0
- data/vendor/datasketches-cpp/quantiles/test/quantiles_compatibility_test.cpp +129 -0
- data/vendor/datasketches-cpp/quantiles/test/quantiles_sketch_test.cpp +912 -0
- data/vendor/datasketches-cpp/req/CMakeLists.txt +6 -21
- data/vendor/datasketches-cpp/req/include/req_common.hpp +0 -5
- data/vendor/datasketches-cpp/req/include/req_compactor_impl.hpp +3 -2
- data/vendor/datasketches-cpp/req/include/req_sketch.hpp +62 -23
- data/vendor/datasketches-cpp/req/include/req_sketch_impl.hpp +66 -61
- data/vendor/datasketches-cpp/req/test/req_sketch_test.cpp +5 -0
- data/vendor/datasketches-cpp/sampling/CMakeLists.txt +5 -9
- data/vendor/datasketches-cpp/sampling/include/var_opt_sketch.hpp +54 -12
- data/vendor/datasketches-cpp/sampling/include/var_opt_sketch_impl.hpp +45 -34
- data/vendor/datasketches-cpp/sampling/include/var_opt_union.hpp +41 -6
- data/vendor/datasketches-cpp/sampling/include/var_opt_union_impl.hpp +33 -15
- data/vendor/datasketches-cpp/sampling/test/var_opt_allocation_test.cpp +2 -2
- data/vendor/datasketches-cpp/sampling/test/var_opt_sketch_test.cpp +1 -0
- data/vendor/datasketches-cpp/sampling/test/var_opt_union_test.cpp +1 -0
- data/vendor/datasketches-cpp/setup.py +10 -7
- data/vendor/datasketches-cpp/theta/CMakeLists.txt +26 -45
- data/vendor/datasketches-cpp/theta/include/bounds_on_ratios_in_sampled_sets.hpp +1 -0
- data/vendor/datasketches-cpp/theta/include/compact_theta_sketch_parser_impl.hpp +92 -23
- data/vendor/datasketches-cpp/theta/include/theta_constants.hpp +9 -4
- data/vendor/datasketches-cpp/theta/include/theta_helpers.hpp +15 -0
- data/vendor/datasketches-cpp/theta/include/theta_intersection_base_impl.hpp +7 -6
- data/vendor/datasketches-cpp/theta/include/theta_set_difference_base_impl.hpp +3 -2
- data/vendor/datasketches-cpp/theta/include/theta_sketch.hpp +32 -15
- data/vendor/datasketches-cpp/theta/include/theta_sketch_impl.hpp +150 -93
- data/vendor/datasketches-cpp/theta/include/theta_union.hpp +6 -1
- data/vendor/datasketches-cpp/theta/include/theta_union_base.hpp +3 -1
- data/vendor/datasketches-cpp/theta/include/theta_union_base_impl.hpp +9 -2
- data/vendor/datasketches-cpp/theta/include/theta_union_impl.hpp +8 -5
- data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base.hpp +9 -5
- data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base_impl.hpp +39 -10
- data/vendor/datasketches-cpp/theta/test/CMakeLists.txt +1 -0
- data/vendor/datasketches-cpp/theta/test/theta_a_not_b_test.cpp +2 -0
- data/vendor/datasketches-cpp/theta/test/theta_compact_empty_from_java_v1.sk +0 -0
- data/vendor/datasketches-cpp/theta/test/theta_compact_empty_from_java_v2.sk +0 -0
- data/vendor/datasketches-cpp/theta/test/theta_compact_estimation_from_java_v1.sk +0 -0
- data/vendor/datasketches-cpp/theta/test/theta_compact_estimation_from_java_v2.sk +0 -0
- data/vendor/datasketches-cpp/theta/test/theta_compact_exact_from_java.sk +0 -0
- data/vendor/datasketches-cpp/theta/test/theta_intersection_test.cpp +2 -0
- data/vendor/datasketches-cpp/theta/test/theta_setop_test.cpp +446 -0
- data/vendor/datasketches-cpp/theta/test/theta_sketch_test.cpp +429 -1
- data/vendor/datasketches-cpp/theta/test/theta_union_test.cpp +25 -11
- data/vendor/datasketches-cpp/tuple/CMakeLists.txt +18 -33
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_sketch.hpp +1 -1
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_sketch_impl.hpp +3 -3
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_union.hpp +1 -1
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_union_impl.hpp +3 -3
- data/vendor/datasketches-cpp/tuple/include/tuple_sketch.hpp +29 -9
- data/vendor/datasketches-cpp/tuple/include/tuple_sketch_impl.hpp +34 -14
- data/vendor/datasketches-cpp/tuple/include/tuple_union.hpp +6 -1
- data/vendor/datasketches-cpp/tuple/include/tuple_union_impl.hpp +8 -3
- data/vendor/datasketches-cpp/tuple/test/array_of_doubles_sketch_test.cpp +16 -0
- data/vendor/datasketches-cpp/tuple/test/tuple_a_not_b_test.cpp +1 -0
- data/vendor/datasketches-cpp/tuple/test/tuple_intersection_test.cpp +1 -0
- data/vendor/datasketches-cpp/tuple/test/tuple_sketch_test.cpp +46 -8
- data/vendor/datasketches-cpp/tuple/test/tuple_union_test.cpp +8 -0
- metadata +34 -12
- data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator.hpp +0 -75
- data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator_impl.hpp +0 -184
- data/vendor/datasketches-cpp/req/include/req_quantile_calculator.hpp +0 -69
- data/vendor/datasketches-cpp/req/include/req_quantile_calculator_impl.hpp +0 -60
- data/vendor/datasketches-cpp/theta/test/theta_update_empty_from_java.sk +0 -0
- data/vendor/datasketches-cpp/theta/test/theta_update_estimation_from_java.sk +0 -0
|
@@ -23,6 +23,7 @@
|
|
|
23
23
|
#include <cstring>
|
|
24
24
|
#include <limits>
|
|
25
25
|
#include <sstream>
|
|
26
|
+
#include <stdexcept>
|
|
26
27
|
|
|
27
28
|
#include "memory_operations.hpp"
|
|
28
29
|
|
|
@@ -160,7 +161,8 @@ frequent_items_sketch<T, W, H, E, S, A>::get_frequent_items(frequent_items_error
|
|
|
160
161
|
}
|
|
161
162
|
|
|
162
163
|
template<typename T, typename W, typename H, typename E, typename S, typename A>
|
|
163
|
-
|
|
164
|
+
template<typename SerDe>
|
|
165
|
+
void frequent_items_sketch<T, W, H, E, S, A>::serialize(std::ostream& os, const SerDe& sd) const {
|
|
164
166
|
const uint8_t preamble_longs = is_empty() ? PREAMBLE_LONGS_EMPTY : PREAMBLE_LONGS_NONEMPTY;
|
|
165
167
|
write(os, preamble_longs);
|
|
166
168
|
const uint8_t serial_version = SERIAL_VERSION;
|
|
@@ -198,23 +200,25 @@ void frequent_items_sketch<T, W, H, E, S, A>::serialize(std::ostream& os) const
|
|
|
198
200
|
}
|
|
199
201
|
write(os, weights, sizeof(W) * num_items);
|
|
200
202
|
aw.deallocate(weights, num_items);
|
|
201
|
-
|
|
203
|
+
sd.serialize(os, items, num_items);
|
|
202
204
|
for (i = 0; i < num_items; i++) items[i].~T();
|
|
203
205
|
alloc.deallocate(items, num_items);
|
|
204
206
|
}
|
|
205
207
|
}
|
|
206
208
|
|
|
207
209
|
template<typename T, typename W, typename H, typename E, typename S, typename A>
|
|
208
|
-
|
|
210
|
+
template<typename SerDe>
|
|
211
|
+
size_t frequent_items_sketch<T, W, H, E, S, A>::get_serialized_size_bytes(const SerDe& sd) const {
|
|
209
212
|
if (is_empty()) return PREAMBLE_LONGS_EMPTY * sizeof(uint64_t);
|
|
210
213
|
size_t size = PREAMBLE_LONGS_NONEMPTY * sizeof(uint64_t) + map.get_num_active() * sizeof(W);
|
|
211
|
-
for (auto it: map) size +=
|
|
214
|
+
for (auto it: map) size += sd.size_of_item(it.first);
|
|
212
215
|
return size;
|
|
213
216
|
}
|
|
214
217
|
|
|
215
218
|
template<typename T, typename W, typename H, typename E, typename S, typename A>
|
|
216
|
-
|
|
217
|
-
|
|
219
|
+
template<typename SerDe>
|
|
220
|
+
auto frequent_items_sketch<T, W, H, E, S, A>::serialize(unsigned header_size_bytes, const SerDe& sd) const -> vector_bytes {
|
|
221
|
+
const size_t size = header_size_bytes + get_serialized_size_bytes(sd);
|
|
218
222
|
vector_bytes bytes(size, 0, map.get_allocator());
|
|
219
223
|
uint8_t* ptr = bytes.data() + header_size_bytes;
|
|
220
224
|
uint8_t* end_ptr = ptr + size;
|
|
@@ -255,7 +259,7 @@ auto frequent_items_sketch<T, W, H, E, S, A>::serialize(unsigned header_size_byt
|
|
|
255
259
|
ptr += copy_to_mem(weights, ptr, sizeof(W) * num_items);
|
|
256
260
|
aw.deallocate(weights, num_items);
|
|
257
261
|
const size_t bytes_remaining = end_ptr - ptr;
|
|
258
|
-
ptr +=
|
|
262
|
+
ptr += sd.serialize(ptr, bytes_remaining, items, num_items);
|
|
259
263
|
for (i = 0; i < num_items; i++) items[i].~T();
|
|
260
264
|
alloc.deallocate(items, num_items);
|
|
261
265
|
}
|
|
@@ -284,6 +288,12 @@ private:
|
|
|
284
288
|
|
|
285
289
|
template<typename T, typename W, typename H, typename E, typename S, typename A>
|
|
286
290
|
frequent_items_sketch<T, W, H, E, S, A> frequent_items_sketch<T, W, H, E, S, A>::deserialize(std::istream& is, const A& allocator) {
|
|
291
|
+
return deserialize(is, S(), allocator);
|
|
292
|
+
}
|
|
293
|
+
|
|
294
|
+
template<typename T, typename W, typename H, typename E, typename S, typename A>
|
|
295
|
+
template<typename SerDe>
|
|
296
|
+
frequent_items_sketch<T, W, H, E, S, A> frequent_items_sketch<T, W, H, E, S, A>::deserialize(std::istream& is, const SerDe& sd, const A& allocator) {
|
|
287
297
|
const auto preamble_longs = read<uint8_t>(is);
|
|
288
298
|
const auto serial_version = read<uint8_t>(is);
|
|
289
299
|
const auto family_id = read<uint8_t>(is);
|
|
@@ -312,7 +322,7 @@ frequent_items_sketch<T, W, H, E, S, A> frequent_items_sketch<T, W, H, E, S, A>:
|
|
|
312
322
|
read(is, weights.data(), sizeof(W) * num_items);
|
|
313
323
|
A alloc(allocator);
|
|
314
324
|
std::unique_ptr<T, items_deleter> items(alloc.allocate(num_items), items_deleter(num_items, false, alloc));
|
|
315
|
-
|
|
325
|
+
sd.deserialize(is, items.get(), num_items);
|
|
316
326
|
items.get_deleter().set_destroy(true); // serde did not throw, so the items must be constructed
|
|
317
327
|
for (uint32_t i = 0; i < num_items; i++) {
|
|
318
328
|
sketch.update(std::move(items.get()[i]), weights[i]);
|
|
@@ -327,6 +337,12 @@ frequent_items_sketch<T, W, H, E, S, A> frequent_items_sketch<T, W, H, E, S, A>:
|
|
|
327
337
|
|
|
328
338
|
template<typename T, typename W, typename H, typename E, typename S, typename A>
|
|
329
339
|
frequent_items_sketch<T, W, H, E, S, A> frequent_items_sketch<T, W, H, E, S, A>::deserialize(const void* bytes, size_t size, const A& allocator) {
|
|
340
|
+
return deserialize(bytes, size, S(), allocator);
|
|
341
|
+
}
|
|
342
|
+
|
|
343
|
+
template<typename T, typename W, typename H, typename E, typename S, typename A>
|
|
344
|
+
template<typename SerDe>
|
|
345
|
+
frequent_items_sketch<T, W, H, E, S, A> frequent_items_sketch<T, W, H, E, S, A>::deserialize(const void* bytes, size_t size, const SerDe& sd, const A& allocator) {
|
|
330
346
|
ensure_minimum_memory(size, 8);
|
|
331
347
|
const char* ptr = static_cast<const char*>(bytes);
|
|
332
348
|
const char* base = static_cast<const char*>(bytes);
|
|
@@ -350,7 +366,7 @@ frequent_items_sketch<T, W, H, E, S, A> frequent_items_sketch<T, W, H, E, S, A>:
|
|
|
350
366
|
check_serial_version(serial_version);
|
|
351
367
|
check_family_id(family_id);
|
|
352
368
|
check_size(lg_cur_size, lg_max_size);
|
|
353
|
-
ensure_minimum_memory(size,
|
|
369
|
+
ensure_minimum_memory(size, preamble_longs * sizeof(uint64_t));
|
|
354
370
|
|
|
355
371
|
frequent_items_sketch<T, W, H, E, S, A> sketch(lg_max_size, lg_cur_size, allocator);
|
|
356
372
|
if (!is_empty) {
|
|
@@ -370,7 +386,7 @@ frequent_items_sketch<T, W, H, E, S, A> frequent_items_sketch<T, W, H, E, S, A>:
|
|
|
370
386
|
A alloc(allocator);
|
|
371
387
|
std::unique_ptr<T, items_deleter> items(alloc.allocate(num_items), items_deleter(num_items, false, alloc));
|
|
372
388
|
const size_t bytes_remaining = size - (ptr - base);
|
|
373
|
-
ptr +=
|
|
389
|
+
ptr += sd.deserialize(ptr, bytes_remaining, items.get(), num_items);
|
|
374
390
|
items.get_deleter().set_destroy(true); // serde did not throw, so the items must be constructed
|
|
375
391
|
for (uint32_t i = 0; i < num_items; i++) {
|
|
376
392
|
sketch.update(std::move(items.get()[i]), weights[i]);
|
|
@@ -421,7 +437,9 @@ void frequent_items_sketch<T, W, H, E, S, A>::check_size(uint8_t lg_cur_size, ui
|
|
|
421
437
|
|
|
422
438
|
template<typename T, typename W, typename H, typename E, typename S, typename A>
|
|
423
439
|
string<A> frequent_items_sketch<T, W, H, E, S, A>::to_string(bool print_items) const {
|
|
424
|
-
|
|
440
|
+
// Using a temporary stream for implementation here does not comply with AllocatorAwareContainer requirements.
|
|
441
|
+
// The stream does not support passing an allocator instance, and alternatives are complicated.
|
|
442
|
+
std::ostringstream os;
|
|
425
443
|
os << "### Frequent items sketch summary:" << std::endl;
|
|
426
444
|
os << " lg cur map size : " << (int) map.get_lg_cur_size() << std::endl;
|
|
427
445
|
os << " lg max map size : " << (int) map.get_lg_max_size() << std::endl;
|
|
@@ -444,7 +462,7 @@ string<A> frequent_items_sketch<T, W, H, E, S, A>::to_string(bool print_items) c
|
|
|
444
462
|
}
|
|
445
463
|
os << "### End items" << std::endl;
|
|
446
464
|
}
|
|
447
|
-
return os.str();
|
|
465
|
+
return string<A>(os.str().c_str(), map.get_allocator());
|
|
448
466
|
}
|
|
449
467
|
|
|
450
468
|
// version for integral signed type
|
|
@@ -19,6 +19,7 @@
|
|
|
19
19
|
|
|
20
20
|
#include <catch.hpp>
|
|
21
21
|
#include <sstream>
|
|
22
|
+
#include <stdexcept>
|
|
22
23
|
|
|
23
24
|
#include "frequent_items_sketch.hpp"
|
|
24
25
|
#include "test_type.hpp"
|
|
@@ -59,7 +60,7 @@ TEST_CASE("frequent items: custom type", "[frequent_items_sketch]") {
|
|
|
59
60
|
REQUIRE(sketch.get_maximum_error() == sketch2.get_maximum_error());
|
|
60
61
|
|
|
61
62
|
auto bytes = sketch.serialize();
|
|
62
|
-
auto sketch3 = frequent_test_type_sketch::deserialize(bytes.data(), bytes.size(), 0);
|
|
63
|
+
auto sketch3 = frequent_test_type_sketch::deserialize(bytes.data(), bytes.size(), alloc(0));
|
|
63
64
|
REQUIRE_FALSE(sketch3.is_empty());
|
|
64
65
|
REQUIRE(sketch3.get_total_weight() == 17);
|
|
65
66
|
REQUIRE(sketch3.get_estimate(1) == 10);
|
|
@@ -32,64 +32,41 @@ target_include_directories(hll
|
|
|
32
32
|
target_link_libraries(hll INTERFACE common)
|
|
33
33
|
target_compile_features(hll INTERFACE cxx_std_11)
|
|
34
34
|
|
|
35
|
-
# TODO: would be useful if this didn't need to be reproduced in target_sources(), too
|
|
36
|
-
set(hll_HEADERS "")
|
|
37
|
-
list(APPEND hll_HEADERS "include/hll.hpp;include/AuxHashMap.hpp;include/CompositeInterpolationXTable.hpp")
|
|
38
|
-
list(APPEND hll_HEADERS "include/hll.private.hpp;include/HllSketchImplFactory.hpp")
|
|
39
|
-
list(APPEND hll_HEADERS "include/CouponHashSet.hpp;include/CouponList.hpp")
|
|
40
|
-
list(APPEND hll_HEADERS "include/CubicInterpolation.hpp;include/HarmonicNumbers.hpp;include/Hll4Array.hpp")
|
|
41
|
-
list(APPEND hll_HEADERS "include/Hll6Array.hpp;include/Hll8Array.hpp;include/HllArray.hpp")
|
|
42
|
-
list(APPEND hll_HEADERS "include/HllSketchImpl.hpp")
|
|
43
|
-
list(APPEND hll_HEADERS "include/HllUtil.hpp;include/coupon_iterator.hpp")
|
|
44
|
-
list(APPEND hll_HEADERS "include/RelativeErrorTables.hpp;include/AuxHashMap-internal.hpp")
|
|
45
|
-
list(APPEND hll_HEADERS "include/CompositeInterpolationXTable-internal.hpp")
|
|
46
|
-
list(APPEND hll_HEADERS "include/CouponHashSet-internal.hpp;include/CouponList-internal.hpp")
|
|
47
|
-
list(APPEND hll_HEADERS "include/CubicInterpolation-internal.hpp;include/HarmonicNumbers-internal.hpp")
|
|
48
|
-
list(APPEND hll_HEADERS "include/Hll4Array-internal.hpp;include/Hll6Array-internal.hpp")
|
|
49
|
-
list(APPEND hll_HEADERS "include/Hll8Array-internal.hpp;include/HllArray-internal.hpp")
|
|
50
|
-
list(APPEND hll_HEADERS "include/HllSketch-internal.hpp")
|
|
51
|
-
list(APPEND hll_HEADERS "include/HllSketchImpl-internal.hpp;include/HllUnion-internal.hpp")
|
|
52
|
-
list(APPEND hll_HEADERS "include/coupon_iterator-internal.hpp;include/RelativeErrorTables-internal.hpp")
|
|
53
|
-
|
|
54
35
|
install(TARGETS hll
|
|
55
36
|
EXPORT ${PROJECT_NAME}
|
|
56
37
|
)
|
|
57
38
|
|
|
58
|
-
install(FILES
|
|
39
|
+
install(FILES
|
|
40
|
+
include/hll.hpp
|
|
41
|
+
include/AuxHashMap.hpp
|
|
42
|
+
include/CompositeInterpolationXTable.hpp
|
|
43
|
+
include/hll.private.hpp
|
|
44
|
+
include/HllSketchImplFactory.hpp
|
|
45
|
+
include/CouponHashSet.hpp
|
|
46
|
+
include/CouponList.hpp
|
|
47
|
+
include/CubicInterpolation.hpp
|
|
48
|
+
include/HarmonicNumbers.hpp
|
|
49
|
+
include/Hll4Array.hpp
|
|
50
|
+
include/Hll6Array.hpp
|
|
51
|
+
include/Hll8Array.hpp
|
|
52
|
+
include/HllArray.hpp
|
|
53
|
+
include/HllSketchImpl.hpp
|
|
54
|
+
include/HllUtil.hpp
|
|
55
|
+
include/coupon_iterator.hpp
|
|
56
|
+
include/RelativeErrorTables.hpp
|
|
57
|
+
include/AuxHashMap-internal.hpp
|
|
58
|
+
include/CompositeInterpolationXTable-internal.hpp
|
|
59
|
+
include/CouponHashSet-internal.hpp
|
|
60
|
+
include/CouponList-internal.hpp
|
|
61
|
+
include/CubicInterpolation-internal.hpp
|
|
62
|
+
include/HarmonicNumbers-internal.hpp
|
|
63
|
+
include/Hll4Array-internal.hpp
|
|
64
|
+
include/Hll6Array-internal.hpp
|
|
65
|
+
include/Hll8Array-internal.hpp
|
|
66
|
+
include/HllArray-internal.hpp
|
|
67
|
+
include/HllSketch-internal.hpp
|
|
68
|
+
include/HllSketchImpl-internal.hpp
|
|
69
|
+
include/HllUnion-internal.hpp
|
|
70
|
+
include/coupon_iterator-internal.hpp
|
|
71
|
+
include/RelativeErrorTables-internal.hpp
|
|
59
72
|
DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/DataSketches")
|
|
60
|
-
|
|
61
|
-
target_sources(hll
|
|
62
|
-
INTERFACE
|
|
63
|
-
${CMAKE_CURRENT_SOURCE_DIR}/include/hll.hpp
|
|
64
|
-
${CMAKE_CURRENT_SOURCE_DIR}/include/hll.private.hpp
|
|
65
|
-
${CMAKE_CURRENT_SOURCE_DIR}/include/AuxHashMap.hpp
|
|
66
|
-
${CMAKE_CURRENT_SOURCE_DIR}/include/CompositeInterpolationXTable.hpp
|
|
67
|
-
${CMAKE_CURRENT_SOURCE_DIR}/include/CouponHashSet.hpp
|
|
68
|
-
${CMAKE_CURRENT_SOURCE_DIR}/include/CouponList.hpp
|
|
69
|
-
${CMAKE_CURRENT_SOURCE_DIR}/include/CubicInterpolation.hpp
|
|
70
|
-
${CMAKE_CURRENT_SOURCE_DIR}/include/HarmonicNumbers.hpp
|
|
71
|
-
${CMAKE_CURRENT_SOURCE_DIR}/include/Hll4Array.hpp
|
|
72
|
-
${CMAKE_CURRENT_SOURCE_DIR}/include/Hll6Array.hpp
|
|
73
|
-
${CMAKE_CURRENT_SOURCE_DIR}/include/Hll8Array.hpp
|
|
74
|
-
${CMAKE_CURRENT_SOURCE_DIR}/include/HllArray.hpp
|
|
75
|
-
${CMAKE_CURRENT_SOURCE_DIR}/include/HllSketchImpl.hpp
|
|
76
|
-
${CMAKE_CURRENT_SOURCE_DIR}/include/HllSketchImplFactory.hpp
|
|
77
|
-
${CMAKE_CURRENT_SOURCE_DIR}/include/HllUtil.hpp
|
|
78
|
-
${CMAKE_CURRENT_SOURCE_DIR}/include/RelativeErrorTables.hpp
|
|
79
|
-
${CMAKE_CURRENT_SOURCE_DIR}/include/coupon_iterator.hpp
|
|
80
|
-
${CMAKE_CURRENT_SOURCE_DIR}/include/AuxHashMap-internal.hpp
|
|
81
|
-
${CMAKE_CURRENT_SOURCE_DIR}/include/CompositeInterpolationXTable-internal.hpp
|
|
82
|
-
${CMAKE_CURRENT_SOURCE_DIR}/include/CouponHashSet-internal.hpp
|
|
83
|
-
${CMAKE_CURRENT_SOURCE_DIR}/include/CouponList-internal.hpp
|
|
84
|
-
${CMAKE_CURRENT_SOURCE_DIR}/include/CubicInterpolation-internal.hpp
|
|
85
|
-
${CMAKE_CURRENT_SOURCE_DIR}/include/HarmonicNumbers-internal.hpp
|
|
86
|
-
${CMAKE_CURRENT_SOURCE_DIR}/include/Hll4Array-internal.hpp
|
|
87
|
-
${CMAKE_CURRENT_SOURCE_DIR}/include/Hll6Array-internal.hpp
|
|
88
|
-
${CMAKE_CURRENT_SOURCE_DIR}/include/Hll8Array-internal.hpp
|
|
89
|
-
${CMAKE_CURRENT_SOURCE_DIR}/include/HllArray-internal.hpp
|
|
90
|
-
${CMAKE_CURRENT_SOURCE_DIR}/include/HllSketch-internal.hpp
|
|
91
|
-
${CMAKE_CURRENT_SOURCE_DIR}/include/HllSketchImpl-internal.hpp
|
|
92
|
-
${CMAKE_CURRENT_SOURCE_DIR}/include/HllUnion-internal.hpp
|
|
93
|
-
${CMAKE_CURRENT_SOURCE_DIR}/include/RelativeErrorTables-internal.hpp
|
|
94
|
-
${CMAKE_CURRENT_SOURCE_DIR}/include/coupon_iterator-internal.hpp
|
|
95
|
-
)
|
|
@@ -24,6 +24,7 @@
|
|
|
24
24
|
|
|
25
25
|
#include <cstring>
|
|
26
26
|
#include <exception>
|
|
27
|
+
#include <stdexcept>
|
|
27
28
|
|
|
28
29
|
namespace datasketches {
|
|
29
30
|
|
|
@@ -113,10 +114,9 @@ CouponHashSet<A>* CouponHashSet<A>::newSet(const void* bytes, size_t len, const
|
|
|
113
114
|
} else {
|
|
114
115
|
sketch->coupons_.resize(1ULL << lgArrInts);
|
|
115
116
|
sketch->couponCount_ = couponCount;
|
|
116
|
-
// only need to read valid coupons, unlike in stream case
|
|
117
117
|
std::memcpy(sketch->coupons_.data(),
|
|
118
118
|
data + hll_constants::HASH_SET_INT_ARR_START,
|
|
119
|
-
|
|
119
|
+
couponsInArray * sizeof(uint32_t));
|
|
120
120
|
}
|
|
121
121
|
|
|
122
122
|
return sketch;
|
|
@@ -246,10 +246,12 @@ string<A> hll_sketch_alloc<A>::to_string(const bool summary,
|
|
|
246
246
|
const bool detail,
|
|
247
247
|
const bool aux_detail,
|
|
248
248
|
const bool all) const {
|
|
249
|
-
|
|
249
|
+
// Using a temporary stream for implementation here does not comply with AllocatorAwareContainer requirements.
|
|
250
|
+
// The stream does not support passing an allocator instance, and alternatives are complicated.
|
|
251
|
+
std::stringstream os;
|
|
250
252
|
if (summary) {
|
|
251
253
|
os << "### HLL sketch summary:" << std::endl
|
|
252
|
-
<< " Log Config K : " << get_lg_config_k() << std::endl
|
|
254
|
+
<< " Log Config K : " << std::to_string(get_lg_config_k()) << std::endl
|
|
253
255
|
<< " Hll Target : " << type_as_string() << std::endl
|
|
254
256
|
<< " Current Mode : " << mode_as_string() << std::endl
|
|
255
257
|
<< " LB : " << get_lower_bound(1) << std::endl
|
|
@@ -258,7 +260,7 @@ string<A> hll_sketch_alloc<A>::to_string(const bool summary,
|
|
|
258
260
|
<< " OutOfOrder flag: " << (is_out_of_order_flag() ? "true" : "false") << std::endl;
|
|
259
261
|
if (get_current_mode() == HLL) {
|
|
260
262
|
HllArray<A>* hllArray = (HllArray<A>*) sketch_impl;
|
|
261
|
-
os << " CurMin : " << hllArray->getCurMin() << std::endl
|
|
263
|
+
os << " CurMin : " << std::to_string(hllArray->getCurMin()) << std::endl
|
|
262
264
|
<< " NumAtCurMin : " << hllArray->getNumAtCurMin() << std::endl
|
|
263
265
|
<< " HipAccum : " << hllArray->getHipAccum() << std::endl
|
|
264
266
|
<< " KxQ0 : " << hllArray->getKxQ0() << std::endl
|
|
@@ -338,7 +340,7 @@ string<A> hll_sketch_alloc<A>::to_string(const bool summary,
|
|
|
338
340
|
}
|
|
339
341
|
}
|
|
340
342
|
|
|
341
|
-
return os.str();
|
|
343
|
+
return string<A>(os.str().c_str(), sketch_impl->getAllocator());
|
|
342
344
|
}
|
|
343
345
|
|
|
344
346
|
template<typename A>
|
|
@@ -23,6 +23,7 @@
|
|
|
23
23
|
#include <cmath>
|
|
24
24
|
#include <string>
|
|
25
25
|
#include <exception>
|
|
26
|
+
#include <stdexcept>
|
|
26
27
|
|
|
27
28
|
#include "hll.hpp"
|
|
28
29
|
#include "CouponList.hpp"
|
|
@@ -31,6 +32,7 @@
|
|
|
31
32
|
namespace datasketches {
|
|
32
33
|
|
|
33
34
|
void println_string(std::string str) {
|
|
35
|
+
unused(str);
|
|
34
36
|
//std::cout << str << std::endl;
|
|
35
37
|
}
|
|
36
38
|
|
|
@@ -17,6 +17,8 @@
|
|
|
17
17
|
* under the License.
|
|
18
18
|
*/
|
|
19
19
|
|
|
20
|
+
#include <stdexcept>
|
|
21
|
+
|
|
20
22
|
#include "hll.hpp"
|
|
21
23
|
|
|
22
24
|
#include <catch.hpp>
|
|
@@ -382,4 +384,61 @@ TEST_CASE("hll sketch: deserialize HLL mode buffer overrun", "[hll_sketch]") {
|
|
|
382
384
|
REQUIRE(test_allocator_total_bytes == 0);
|
|
383
385
|
}
|
|
384
386
|
|
|
387
|
+
TEST_CASE("hll sketch: bytes serialize-deserialize-serialize list mode") {
|
|
388
|
+
test_allocator_total_bytes = 0;
|
|
389
|
+
{
|
|
390
|
+
hll_sketch_test_alloc s1(10, target_hll_type::HLL_4, false, 0);
|
|
391
|
+
s1.update(1);
|
|
392
|
+
s1.update(2);
|
|
393
|
+
s1.update(3);
|
|
394
|
+
std::cout << s1.to_string();
|
|
395
|
+
auto bytes1 = s1.serialize_compact();
|
|
396
|
+
auto s2 = hll_sketch_test_alloc::deserialize(bytes1.data(), bytes1.size(), 0);
|
|
397
|
+
auto bytes2 = s2.serialize_compact();
|
|
398
|
+
REQUIRE(bytes1 == bytes2);
|
|
399
|
+
}
|
|
400
|
+
REQUIRE(test_allocator_total_bytes == 0);
|
|
401
|
+
}
|
|
402
|
+
|
|
403
|
+
TEST_CASE("hll sketch: updatable bytes serialize-deserialize-serialize set mode") {
|
|
404
|
+
test_allocator_total_bytes = 0;
|
|
405
|
+
{
|
|
406
|
+
hll_sketch_test_alloc s1(10, target_hll_type::HLL_4, false, 0);
|
|
407
|
+
for (int i = 0; i < 10; ++i) s1.update(i);
|
|
408
|
+
std::cout << s1.to_string();
|
|
409
|
+
auto bytes1 = s1.serialize_updatable();
|
|
410
|
+
auto s2 = hll_sketch_test_alloc::deserialize(bytes1.data(), bytes1.size(), 0);
|
|
411
|
+
|
|
412
|
+
auto bytes2 = s2.serialize_updatable();
|
|
413
|
+
REQUIRE(bytes1 == bytes2);
|
|
414
|
+
}
|
|
415
|
+
REQUIRE(test_allocator_total_bytes == 0);
|
|
416
|
+
}
|
|
417
|
+
|
|
418
|
+
TEST_CASE("hll sketch: compact bytes serialize-deserialize-serialize set mode") {
|
|
419
|
+
test_allocator_total_bytes = 0;
|
|
420
|
+
{
|
|
421
|
+
hll_sketch_test_alloc s1(10, target_hll_type::HLL_4, false, 0);
|
|
422
|
+
for (int i = 0; i < 10; ++i) s1.update(i);
|
|
423
|
+
std::cout << s1.to_string();
|
|
424
|
+
auto bytes1 = s1.serialize_compact();
|
|
425
|
+
auto s2 = hll_sketch_test_alloc::deserialize(bytes1.data(), bytes1.size(), 0);
|
|
426
|
+
|
|
427
|
+
// cannot just compare bytes here
|
|
428
|
+
// hash set does not preserve the order after reconstruction in compact mode
|
|
429
|
+
// add more to push them to HLL mode
|
|
430
|
+
for (int i = 10; i < 100; ++i) {
|
|
431
|
+
s1.update(i);
|
|
432
|
+
s2.update(i);
|
|
433
|
+
}
|
|
434
|
+
std::cout << s1.to_string();
|
|
435
|
+
std::cout << s2.to_string();
|
|
436
|
+
|
|
437
|
+
auto bytes2 = s1.serialize_compact();
|
|
438
|
+
auto bytes3 = s2.serialize_compact();
|
|
439
|
+
REQUIRE(bytes2 == bytes3);
|
|
440
|
+
}
|
|
441
|
+
REQUIRE(test_allocator_total_bytes == 0);
|
|
442
|
+
}
|
|
443
|
+
|
|
385
444
|
} /* namespace datasketches */
|
|
@@ -32,27 +32,13 @@ target_include_directories(kll
|
|
|
32
32
|
target_link_libraries(kll INTERFACE common)
|
|
33
33
|
target_compile_features(kll INTERFACE cxx_std_11)
|
|
34
34
|
|
|
35
|
-
set(kll_HEADERS "")
|
|
36
|
-
list(APPEND kll_HEADERS "include/kll_sketch.hpp")
|
|
37
|
-
list(APPEND kll_HEADERS "include/kll_sketch_impl.hpp")
|
|
38
|
-
list(APPEND kll_HEADERS "include/kll_helper.hpp")
|
|
39
|
-
list(APPEND kll_HEADERS "include/kll_helper_impl.hpp")
|
|
40
|
-
list(APPEND kll_HEADERS "include/kll_quantile_calculator.hpp")
|
|
41
|
-
list(APPEND kll_HEADERS "include/kll_quantile_calculator_impl.hpp")
|
|
42
|
-
|
|
43
35
|
install(TARGETS kll
|
|
44
36
|
EXPORT ${PROJECT_NAME}
|
|
45
37
|
)
|
|
46
38
|
|
|
47
|
-
install(FILES
|
|
39
|
+
install(FILES
|
|
40
|
+
include/kll_sketch.hpp
|
|
41
|
+
include/kll_sketch_impl.hpp
|
|
42
|
+
include/kll_helper.hpp
|
|
43
|
+
include/kll_helper_impl.hpp
|
|
48
44
|
DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/DataSketches")
|
|
49
|
-
|
|
50
|
-
target_sources(kll
|
|
51
|
-
INTERFACE
|
|
52
|
-
${CMAKE_CURRENT_SOURCE_DIR}/include/kll_helper.hpp
|
|
53
|
-
${CMAKE_CURRENT_SOURCE_DIR}/include/kll_helper_impl.hpp
|
|
54
|
-
${CMAKE_CURRENT_SOURCE_DIR}/include/kll_sketch.hpp
|
|
55
|
-
${CMAKE_CURRENT_SOURCE_DIR}/include/kll_sketch_impl.hpp
|
|
56
|
-
${CMAKE_CURRENT_SOURCE_DIR}/include/kll_quantile_calculator.hpp
|
|
57
|
-
${CMAKE_CURRENT_SOURCE_DIR}/include/kll_quantile_calculator_impl.hpp
|
|
58
|
-
)
|
|
@@ -22,13 +22,9 @@
|
|
|
22
22
|
|
|
23
23
|
#include <random>
|
|
24
24
|
#include <stdexcept>
|
|
25
|
-
#include <chrono>
|
|
26
25
|
|
|
27
26
|
namespace datasketches {
|
|
28
27
|
|
|
29
|
-
static std::independent_bits_engine<std::mt19937, 1, uint32_t>
|
|
30
|
-
random_bit(static_cast<uint32_t>(std::chrono::system_clock::now().time_since_epoch().count()));
|
|
31
|
-
|
|
32
28
|
#ifdef KLL_VALIDATION
|
|
33
29
|
extern uint32_t kll_next_offset;
|
|
34
30
|
#endif
|