datasketches 0.1.2 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +5 -0
- data/ext/datasketches/cpc_wrapper.cpp +12 -13
- data/ext/datasketches/ext.cpp +1 -1
- data/ext/datasketches/ext.h +4 -0
- data/ext/datasketches/extconf.rb +1 -1
- data/ext/datasketches/fi_wrapper.cpp +6 -8
- data/ext/datasketches/hll_wrapper.cpp +13 -14
- data/ext/datasketches/kll_wrapper.cpp +28 -76
- data/ext/datasketches/theta_wrapper.cpp +27 -41
- data/ext/datasketches/vo_wrapper.cpp +4 -6
- data/lib/datasketches/version.rb +1 -1
- data/vendor/datasketches-cpp/CMakeLists.txt +1 -0
- data/vendor/datasketches-cpp/README.md +4 -4
- data/vendor/datasketches-cpp/common/include/MurmurHash3.h +7 -0
- data/vendor/datasketches-cpp/common/include/memory_operations.hpp +12 -0
- data/vendor/datasketches-cpp/common/test/CMakeLists.txt +24 -0
- data/vendor/datasketches-cpp/common/test/integration_test.cpp +77 -0
- data/vendor/datasketches-cpp/common/test/test_allocator.hpp +9 -1
- data/vendor/datasketches-cpp/cpc/include/cpc_common.hpp +3 -0
- data/vendor/datasketches-cpp/cpc/include/cpc_compressor.hpp +2 -2
- data/vendor/datasketches-cpp/cpc/include/cpc_compressor_impl.hpp +28 -19
- data/vendor/datasketches-cpp/cpc/include/cpc_sketch.hpp +8 -5
- data/vendor/datasketches-cpp/cpc/include/cpc_sketch_impl.hpp +19 -14
- data/vendor/datasketches-cpp/cpc/include/cpc_union.hpp +2 -2
- data/vendor/datasketches-cpp/cpc/include/cpc_union_impl.hpp +6 -6
- data/vendor/datasketches-cpp/cpc/include/cpc_util.hpp +0 -6
- data/vendor/datasketches-cpp/cpc/include/icon_estimator.hpp +3 -3
- data/vendor/datasketches-cpp/cpc/include/u32_table.hpp +3 -3
- data/vendor/datasketches-cpp/cpc/include/u32_table_impl.hpp +9 -9
- data/vendor/datasketches-cpp/cpc/test/CMakeLists.txt +1 -0
- data/vendor/datasketches-cpp/cpc/test/cpc_sketch_allocation_test.cpp +237 -0
- data/vendor/datasketches-cpp/fi/include/frequent_items_sketch.hpp +15 -10
- data/vendor/datasketches-cpp/fi/include/frequent_items_sketch_impl.hpp +40 -28
- data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map.hpp +19 -13
- data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map_impl.hpp +140 -124
- data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_custom_type_test.cpp +15 -12
- data/vendor/datasketches-cpp/fi/test/reverse_purge_hash_map_test.cpp +3 -3
- data/vendor/datasketches-cpp/hll/CMakeLists.txt +3 -0
- data/vendor/datasketches-cpp/hll/include/AuxHashMap-internal.hpp +32 -57
- data/vendor/datasketches-cpp/hll/include/AuxHashMap.hpp +9 -8
- data/vendor/datasketches-cpp/hll/include/CompositeInterpolationXTable.hpp +2 -2
- data/vendor/datasketches-cpp/hll/include/CouponHashSet-internal.hpp +34 -48
- data/vendor/datasketches-cpp/hll/include/CouponHashSet.hpp +10 -10
- data/vendor/datasketches-cpp/hll/include/CouponList-internal.hpp +45 -77
- data/vendor/datasketches-cpp/hll/include/CouponList.hpp +11 -12
- data/vendor/datasketches-cpp/hll/include/CubicInterpolation.hpp +2 -2
- data/vendor/datasketches-cpp/hll/include/HarmonicNumbers.hpp +2 -2
- data/vendor/datasketches-cpp/hll/include/Hll4Array-internal.hpp +15 -14
- data/vendor/datasketches-cpp/hll/include/Hll4Array.hpp +1 -1
- data/vendor/datasketches-cpp/hll/include/Hll6Array-internal.hpp +10 -21
- data/vendor/datasketches-cpp/hll/include/Hll6Array.hpp +2 -3
- data/vendor/datasketches-cpp/hll/include/Hll8Array-internal.hpp +10 -21
- data/vendor/datasketches-cpp/hll/include/Hll8Array.hpp +2 -3
- data/vendor/datasketches-cpp/hll/include/HllArray-internal.hpp +28 -55
- data/vendor/datasketches-cpp/hll/include/HllArray.hpp +8 -8
- data/vendor/datasketches-cpp/hll/include/HllSketch-internal.hpp +9 -11
- data/vendor/datasketches-cpp/hll/include/HllSketchImpl.hpp +2 -1
- data/vendor/datasketches-cpp/hll/include/HllSketchImplFactory.hpp +34 -31
- data/vendor/datasketches-cpp/hll/include/HllUnion-internal.hpp +3 -28
- data/vendor/datasketches-cpp/hll/include/HllUtil.hpp +1 -1
- data/vendor/datasketches-cpp/hll/include/RelativeErrorTables.hpp +1 -1
- data/vendor/datasketches-cpp/hll/include/hll.hpp +6 -34
- data/vendor/datasketches-cpp/hll/test/AuxHashMapTest.cpp +7 -7
- data/vendor/datasketches-cpp/hll/test/CouponHashSetTest.cpp +2 -2
- data/vendor/datasketches-cpp/hll/test/CouponListTest.cpp +3 -3
- data/vendor/datasketches-cpp/hll/test/HllArrayTest.cpp +2 -2
- data/vendor/datasketches-cpp/hll/test/HllSketchTest.cpp +46 -50
- data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator.hpp +1 -1
- data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator_impl.hpp +3 -3
- data/vendor/datasketches-cpp/kll/include/kll_sketch.hpp +10 -3
- data/vendor/datasketches-cpp/kll/include/kll_sketch_impl.hpp +93 -75
- data/vendor/datasketches-cpp/kll/test/kll_sketch_custom_type_test.cpp +11 -10
- data/vendor/datasketches-cpp/kll/test/kll_sketch_test.cpp +45 -42
- data/vendor/datasketches-cpp/python/CMakeLists.txt +2 -0
- data/vendor/datasketches-cpp/python/README.md +6 -3
- data/vendor/datasketches-cpp/python/src/datasketches.cpp +2 -0
- data/vendor/datasketches-cpp/python/src/hll_wrapper.cpp +0 -2
- data/vendor/datasketches-cpp/python/src/kll_wrapper.cpp +3 -1
- data/vendor/datasketches-cpp/python/src/req_wrapper.cpp +246 -0
- data/vendor/datasketches-cpp/python/src/theta_wrapper.cpp +36 -26
- data/vendor/datasketches-cpp/python/tests/hll_test.py +0 -1
- data/vendor/datasketches-cpp/python/tests/kll_test.py +3 -3
- data/vendor/datasketches-cpp/python/tests/req_test.py +126 -0
- data/vendor/datasketches-cpp/python/tests/theta_test.py +28 -3
- data/vendor/datasketches-cpp/req/CMakeLists.txt +60 -0
- data/vendor/datasketches-cpp/{tuple/include/theta_a_not_b_experimental_impl.hpp → req/include/req_common.hpp} +17 -8
- data/vendor/datasketches-cpp/req/include/req_compactor.hpp +137 -0
- data/vendor/datasketches-cpp/req/include/req_compactor_impl.hpp +501 -0
- data/vendor/datasketches-cpp/req/include/req_quantile_calculator.hpp +69 -0
- data/vendor/datasketches-cpp/req/include/req_quantile_calculator_impl.hpp +60 -0
- data/vendor/datasketches-cpp/req/include/req_sketch.hpp +395 -0
- data/vendor/datasketches-cpp/req/include/req_sketch_impl.hpp +810 -0
- data/vendor/datasketches-cpp/req/test/CMakeLists.txt +43 -0
- data/vendor/datasketches-cpp/req/test/req_float_empty_from_java.sk +0 -0
- data/vendor/datasketches-cpp/req/test/req_float_estimation_from_java.sk +0 -0
- data/vendor/datasketches-cpp/req/test/req_float_exact_from_java.sk +0 -0
- data/vendor/datasketches-cpp/req/test/req_float_raw_items_from_java.sk +0 -0
- data/vendor/datasketches-cpp/req/test/req_float_single_item_from_java.sk +0 -0
- data/vendor/datasketches-cpp/req/test/req_sketch_custom_type_test.cpp +128 -0
- data/vendor/datasketches-cpp/req/test/req_sketch_test.cpp +494 -0
- data/vendor/datasketches-cpp/sampling/include/var_opt_sketch.hpp +10 -9
- data/vendor/datasketches-cpp/sampling/include/var_opt_sketch_impl.hpp +82 -70
- data/vendor/datasketches-cpp/sampling/include/var_opt_union.hpp +5 -5
- data/vendor/datasketches-cpp/sampling/include/var_opt_union_impl.hpp +7 -7
- data/vendor/datasketches-cpp/sampling/test/CMakeLists.txt +1 -0
- data/vendor/datasketches-cpp/sampling/test/var_opt_allocation_test.cpp +96 -0
- data/vendor/datasketches-cpp/sampling/test/var_opt_union_test.cpp +0 -31
- data/vendor/datasketches-cpp/setup.py +5 -3
- data/vendor/datasketches-cpp/theta/CMakeLists.txt +30 -3
- data/vendor/datasketches-cpp/{tuple → theta}/include/bounds_on_ratios_in_sampled_sets.hpp +2 -1
- data/vendor/datasketches-cpp/{tuple → theta}/include/bounds_on_ratios_in_theta_sketched_sets.hpp +1 -1
- data/vendor/datasketches-cpp/theta/include/theta_a_not_b.hpp +12 -29
- data/vendor/datasketches-cpp/theta/include/theta_a_not_b_impl.hpp +5 -46
- data/vendor/datasketches-cpp/{tuple → theta}/include/theta_comparators.hpp +0 -0
- data/vendor/datasketches-cpp/{tuple → theta}/include/theta_constants.hpp +2 -0
- data/vendor/datasketches-cpp/{tuple → theta}/include/theta_helpers.hpp +0 -0
- data/vendor/datasketches-cpp/theta/include/theta_intersection.hpp +22 -29
- data/vendor/datasketches-cpp/{tuple → theta}/include/theta_intersection_base.hpp +0 -0
- data/vendor/datasketches-cpp/{tuple → theta}/include/theta_intersection_base_impl.hpp +0 -0
- data/vendor/datasketches-cpp/theta/include/theta_intersection_impl.hpp +8 -90
- data/vendor/datasketches-cpp/{tuple/test/theta_union_experimental_test.cpp → theta/include/theta_jaccard_similarity.hpp} +11 -18
- data/vendor/datasketches-cpp/{tuple/include/jaccard_similarity.hpp → theta/include/theta_jaccard_similarity_base.hpp} +6 -22
- data/vendor/datasketches-cpp/{tuple → theta}/include/theta_set_difference_base.hpp +0 -0
- data/vendor/datasketches-cpp/{tuple → theta}/include/theta_set_difference_base_impl.hpp +5 -0
- data/vendor/datasketches-cpp/theta/include/theta_sketch.hpp +132 -266
- data/vendor/datasketches-cpp/theta/include/theta_sketch_impl.hpp +200 -650
- data/vendor/datasketches-cpp/theta/include/theta_union.hpp +27 -60
- data/vendor/datasketches-cpp/{tuple → theta}/include/theta_union_base.hpp +1 -1
- data/vendor/datasketches-cpp/{tuple → theta}/include/theta_union_base_impl.hpp +5 -0
- data/vendor/datasketches-cpp/theta/include/theta_union_impl.hpp +13 -69
- data/vendor/datasketches-cpp/{tuple → theta}/include/theta_update_sketch_base.hpp +3 -19
- data/vendor/datasketches-cpp/{tuple → theta}/include/theta_update_sketch_base_impl.hpp +6 -1
- data/vendor/datasketches-cpp/theta/test/CMakeLists.txt +1 -0
- data/vendor/datasketches-cpp/{tuple → theta}/test/theta_jaccard_similarity_test.cpp +2 -3
- data/vendor/datasketches-cpp/theta/test/theta_sketch_test.cpp +37 -234
- data/vendor/datasketches-cpp/tuple/CMakeLists.txt +3 -35
- data/vendor/datasketches-cpp/tuple/include/tuple_jaccard_similarity.hpp +38 -0
- data/vendor/datasketches-cpp/tuple/include/tuple_sketch.hpp +28 -13
- data/vendor/datasketches-cpp/tuple/include/tuple_sketch_impl.hpp +6 -6
- data/vendor/datasketches-cpp/tuple/test/CMakeLists.txt +1 -6
- data/vendor/datasketches-cpp/tuple/test/tuple_a_not_b_test.cpp +1 -4
- data/vendor/datasketches-cpp/tuple/test/tuple_intersection_test.cpp +1 -4
- data/vendor/datasketches-cpp/tuple/test/tuple_jaccard_similarity_test.cpp +2 -1
- data/vendor/datasketches-cpp/tuple/test/tuple_sketch_allocation_test.cpp +2 -2
- data/vendor/datasketches-cpp/tuple/test/tuple_union_test.cpp +1 -4
- metadata +43 -34
- data/vendor/datasketches-cpp/tuple/include/theta_a_not_b_experimental.hpp +0 -53
- data/vendor/datasketches-cpp/tuple/include/theta_intersection_experimental.hpp +0 -78
- data/vendor/datasketches-cpp/tuple/include/theta_intersection_experimental_impl.hpp +0 -43
- data/vendor/datasketches-cpp/tuple/include/theta_sketch_experimental.hpp +0 -393
- data/vendor/datasketches-cpp/tuple/include/theta_sketch_experimental_impl.hpp +0 -481
- data/vendor/datasketches-cpp/tuple/include/theta_union_experimental.hpp +0 -88
- data/vendor/datasketches-cpp/tuple/include/theta_union_experimental_impl.hpp +0 -47
- data/vendor/datasketches-cpp/tuple/test/theta_a_not_b_experimental_test.cpp +0 -250
- data/vendor/datasketches-cpp/tuple/test/theta_compact_empty_from_java.sk +0 -0
- data/vendor/datasketches-cpp/tuple/test/theta_compact_estimation_from_java.sk +0 -0
- data/vendor/datasketches-cpp/tuple/test/theta_compact_single_item_from_java.sk +0 -0
- data/vendor/datasketches-cpp/tuple/test/theta_intersection_experimental_test.cpp +0 -224
- data/vendor/datasketches-cpp/tuple/test/theta_sketch_experimental_test.cpp +0 -247
|
@@ -41,13 +41,13 @@ void cpc_init() {
|
|
|
41
41
|
}
|
|
42
42
|
|
|
43
43
|
template<typename A>
|
|
44
|
-
cpc_sketch_alloc<A>::cpc_sketch_alloc(uint8_t lg_k, uint64_t seed):
|
|
44
|
+
cpc_sketch_alloc<A>::cpc_sketch_alloc(uint8_t lg_k, uint64_t seed, const A& allocator):
|
|
45
45
|
lg_k(lg_k),
|
|
46
46
|
seed(seed),
|
|
47
47
|
was_merged(false),
|
|
48
48
|
num_coupons(0),
|
|
49
|
-
surprising_value_table(2, 6 + lg_k),
|
|
50
|
-
sliding_window(),
|
|
49
|
+
surprising_value_table(2, 6 + lg_k, allocator),
|
|
50
|
+
sliding_window(allocator),
|
|
51
51
|
window_offset(0),
|
|
52
52
|
first_interesting_column(0),
|
|
53
53
|
kxp(1 << lg_k),
|
|
@@ -58,6 +58,11 @@ hip_est_accum(0)
|
|
|
58
58
|
}
|
|
59
59
|
}
|
|
60
60
|
|
|
61
|
+
template<typename A>
|
|
62
|
+
A cpc_sketch_alloc<A>::get_allocator() const {
|
|
63
|
+
return sliding_window.get_allocator();
|
|
64
|
+
}
|
|
65
|
+
|
|
61
66
|
template<typename A>
|
|
62
67
|
uint8_t cpc_sketch_alloc<A>::get_lg_k() const {
|
|
63
68
|
return lg_k;
|
|
@@ -277,7 +282,7 @@ void cpc_sketch_alloc<A>::promote_sparse_to_windowed() {
|
|
|
277
282
|
|
|
278
283
|
sliding_window.resize(k, 0); // zero the memory (because we will be OR'ing into it)
|
|
279
284
|
|
|
280
|
-
u32_table<A> new_table(2, 6 + lg_k);
|
|
285
|
+
u32_table<A> new_table(2, 6 + lg_k, sliding_window.get_allocator());
|
|
281
286
|
|
|
282
287
|
const uint32_t* old_slots = surprising_value_table.get_slots();
|
|
283
288
|
const size_t old_num_slots = 1 << surprising_value_table.get_lg_size();
|
|
@@ -401,7 +406,7 @@ string<A> cpc_sketch_alloc<A>::to_string() const {
|
|
|
401
406
|
|
|
402
407
|
template<typename A>
|
|
403
408
|
void cpc_sketch_alloc<A>::serialize(std::ostream& os) const {
|
|
404
|
-
compressed_state<A> compressed;
|
|
409
|
+
compressed_state<A> compressed(A(sliding_window.get_allocator()));
|
|
405
410
|
compressed.table_data_words = 0;
|
|
406
411
|
compressed.table_num_entries = 0;
|
|
407
412
|
compressed.window_data_words = 0;
|
|
@@ -454,7 +459,7 @@ void cpc_sketch_alloc<A>::serialize(std::ostream& os) const {
|
|
|
454
459
|
|
|
455
460
|
template<typename A>
|
|
456
461
|
vector_u8<A> cpc_sketch_alloc<A>::serialize(unsigned header_size_bytes) const {
|
|
457
|
-
compressed_state<A> compressed;
|
|
462
|
+
compressed_state<A> compressed(sliding_window.get_allocator());
|
|
458
463
|
compressed.table_data_words = 0;
|
|
459
464
|
compressed.table_num_entries = 0;
|
|
460
465
|
compressed.window_data_words = 0;
|
|
@@ -464,7 +469,7 @@ vector_u8<A> cpc_sketch_alloc<A>::serialize(unsigned header_size_bytes) const {
|
|
|
464
469
|
const bool has_window = compressed.window_data.size() > 0;
|
|
465
470
|
const uint8_t preamble_ints = get_preamble_ints(num_coupons, has_hip, has_table, has_window);
|
|
466
471
|
const size_t size = header_size_bytes + (preamble_ints + compressed.table_data_words + compressed.window_data_words) * sizeof(uint32_t);
|
|
467
|
-
vector_u8<A> bytes(size);
|
|
472
|
+
vector_u8<A> bytes(size, 0, sliding_window.get_allocator());
|
|
468
473
|
uint8_t* ptr = bytes.data() + header_size_bytes;
|
|
469
474
|
ptr += copy_to_mem(&preamble_ints, ptr, sizeof(preamble_ints));
|
|
470
475
|
const uint8_t serial_version = SERIAL_VERSION;
|
|
@@ -511,7 +516,7 @@ vector_u8<A> cpc_sketch_alloc<A>::serialize(unsigned header_size_bytes) const {
|
|
|
511
516
|
}
|
|
512
517
|
|
|
513
518
|
template<typename A>
|
|
514
|
-
cpc_sketch_alloc<A> cpc_sketch_alloc<A>::deserialize(std::istream& is, uint64_t seed) {
|
|
519
|
+
cpc_sketch_alloc<A> cpc_sketch_alloc<A>::deserialize(std::istream& is, uint64_t seed, const A& allocator) {
|
|
515
520
|
uint8_t preamble_ints;
|
|
516
521
|
is.read((char*)&preamble_ints, sizeof(preamble_ints));
|
|
517
522
|
uint8_t serial_version;
|
|
@@ -529,7 +534,7 @@ cpc_sketch_alloc<A> cpc_sketch_alloc<A>::deserialize(std::istream& is, uint64_t
|
|
|
529
534
|
const bool has_hip = flags_byte & (1 << flags::HAS_HIP);
|
|
530
535
|
const bool has_table = flags_byte & (1 << flags::HAS_TABLE);
|
|
531
536
|
const bool has_window = flags_byte & (1 << flags::HAS_WINDOW);
|
|
532
|
-
compressed_state<A> compressed;
|
|
537
|
+
compressed_state<A> compressed(allocator);
|
|
533
538
|
compressed.table_data_words = 0;
|
|
534
539
|
compressed.table_num_entries = 0;
|
|
535
540
|
compressed.window_data_words = 0;
|
|
@@ -583,7 +588,7 @@ cpc_sketch_alloc<A> cpc_sketch_alloc<A>::deserialize(std::istream& is, uint64_t
|
|
|
583
588
|
throw std::invalid_argument("Incompatible seed hashes: " + std::to_string(seed_hash) + ", "
|
|
584
589
|
+ std::to_string(compute_seed_hash(seed)));
|
|
585
590
|
}
|
|
586
|
-
uncompressed_state<A> uncompressed;
|
|
591
|
+
uncompressed_state<A> uncompressed(allocator);
|
|
587
592
|
get_compressor<A>().uncompress(compressed, uncompressed, lg_k, num_coupons);
|
|
588
593
|
if (!is.good())
|
|
589
594
|
throw std::runtime_error("error reading from std::istream");
|
|
@@ -592,7 +597,7 @@ cpc_sketch_alloc<A> cpc_sketch_alloc<A>::deserialize(std::istream& is, uint64_t
|
|
|
592
597
|
}
|
|
593
598
|
|
|
594
599
|
template<typename A>
|
|
595
|
-
cpc_sketch_alloc<A> cpc_sketch_alloc<A>::deserialize(const void* bytes, size_t size, uint64_t seed) {
|
|
600
|
+
cpc_sketch_alloc<A> cpc_sketch_alloc<A>::deserialize(const void* bytes, size_t size, uint64_t seed, const A& allocator) {
|
|
596
601
|
ensure_minimum_memory(size, 8);
|
|
597
602
|
const char* ptr = static_cast<const char*>(bytes);
|
|
598
603
|
const char* base = static_cast<const char*>(bytes);
|
|
@@ -614,7 +619,7 @@ cpc_sketch_alloc<A> cpc_sketch_alloc<A>::deserialize(const void* bytes, size_t s
|
|
|
614
619
|
const bool has_table = flags_byte & (1 << flags::HAS_TABLE);
|
|
615
620
|
const bool has_window = flags_byte & (1 << flags::HAS_WINDOW);
|
|
616
621
|
ensure_minimum_memory(size, preamble_ints << 2);
|
|
617
|
-
compressed_state<A> compressed;
|
|
622
|
+
compressed_state<A> compressed(allocator);
|
|
618
623
|
compressed.table_data_words = 0;
|
|
619
624
|
compressed.table_num_entries = 0;
|
|
620
625
|
compressed.window_data_words = 0;
|
|
@@ -677,7 +682,7 @@ cpc_sketch_alloc<A> cpc_sketch_alloc<A>::deserialize(const void* bytes, size_t s
|
|
|
677
682
|
throw std::invalid_argument("Incompatible seed hashes: " + std::to_string(seed_hash) + ", "
|
|
678
683
|
+ std::to_string(compute_seed_hash(seed)));
|
|
679
684
|
}
|
|
680
|
-
uncompressed_state<A> uncompressed;
|
|
685
|
+
uncompressed_state<A> uncompressed(allocator);
|
|
681
686
|
get_compressor<A>().uncompress(compressed, uncompressed, lg_k, num_coupons);
|
|
682
687
|
return cpc_sketch_alloc(lg_k, num_coupons, first_interesting_column, std::move(uncompressed.table),
|
|
683
688
|
std::move(uncompressed.window), has_hip, kxp, hip_est_accum, seed);
|
|
@@ -766,7 +771,7 @@ vector_u64<A> cpc_sketch_alloc<A>::build_bit_matrix() const {
|
|
|
766
771
|
// Fill the matrix with default rows in which the "early zone" is filled with ones.
|
|
767
772
|
// This is essential for the routine's O(k) time cost (as opposed to O(C)).
|
|
768
773
|
const uint64_t default_row = (static_cast<uint64_t>(1) << window_offset) - 1;
|
|
769
|
-
vector_u64<A> matrix(k, default_row);
|
|
774
|
+
vector_u64<A> matrix(k, default_row, sliding_window.get_allocator());
|
|
770
775
|
|
|
771
776
|
if (num_coupons == 0) return matrix;
|
|
772
777
|
|
|
@@ -35,7 +35,7 @@ namespace datasketches {
|
|
|
35
35
|
*/
|
|
36
36
|
|
|
37
37
|
// alias with default allocator for convenience
|
|
38
|
-
|
|
38
|
+
using cpc_union = cpc_union_alloc<std::allocator<uint8_t>>;
|
|
39
39
|
|
|
40
40
|
template<typename A>
|
|
41
41
|
class cpc_union_alloc {
|
|
@@ -45,7 +45,7 @@ public:
|
|
|
45
45
|
* @param lg_k base 2 logarithm of the number of bins in the sketch
|
|
46
46
|
* @param seed for hash function
|
|
47
47
|
*/
|
|
48
|
-
explicit cpc_union_alloc(uint8_t lg_k = CPC_DEFAULT_LG_K, uint64_t seed = DEFAULT_SEED);
|
|
48
|
+
explicit cpc_union_alloc(uint8_t lg_k = CPC_DEFAULT_LG_K, uint64_t seed = DEFAULT_SEED, const A& allocator = A());
|
|
49
49
|
|
|
50
50
|
cpc_union_alloc(const cpc_union_alloc<A>& other);
|
|
51
51
|
cpc_union_alloc(cpc_union_alloc<A>&& other) noexcept;
|
|
@@ -25,16 +25,16 @@
|
|
|
25
25
|
namespace datasketches {
|
|
26
26
|
|
|
27
27
|
template<typename A>
|
|
28
|
-
cpc_union_alloc<A>::cpc_union_alloc(uint8_t lg_k, uint64_t seed):
|
|
28
|
+
cpc_union_alloc<A>::cpc_union_alloc(uint8_t lg_k, uint64_t seed, const A& allocator):
|
|
29
29
|
lg_k(lg_k),
|
|
30
30
|
seed(seed),
|
|
31
31
|
accumulator(nullptr),
|
|
32
|
-
bit_matrix()
|
|
32
|
+
bit_matrix(allocator)
|
|
33
33
|
{
|
|
34
34
|
if (lg_k < CPC_MIN_LG_K || lg_k > CPC_MAX_LG_K) {
|
|
35
35
|
throw std::invalid_argument("lg_k must be >= " + std::to_string(CPC_MIN_LG_K) + " and <= " + std::to_string(CPC_MAX_LG_K) + ": " + std::to_string(lg_k));
|
|
36
36
|
}
|
|
37
|
-
accumulator = new (AllocCpc().allocate(1)) cpc_sketch_alloc<A>(lg_k, seed);
|
|
37
|
+
accumulator = new (AllocCpc().allocate(1)) cpc_sketch_alloc<A>(lg_k, seed, allocator);
|
|
38
38
|
}
|
|
39
39
|
|
|
40
40
|
template<typename A>
|
|
@@ -200,13 +200,13 @@ cpc_sketch_alloc<A> cpc_union_alloc<A>::get_result_from_bit_matrix() const {
|
|
|
200
200
|
|
|
201
201
|
const uint8_t offset = cpc_sketch_alloc<A>::determine_correct_offset(lg_k, num_coupons);
|
|
202
202
|
|
|
203
|
-
vector_u8<A> sliding_window(k);
|
|
203
|
+
vector_u8<A> sliding_window(k, 0, bit_matrix.get_allocator());
|
|
204
204
|
// don't need to zero the window's memory
|
|
205
205
|
|
|
206
206
|
// dynamically growing caused snowplow effect
|
|
207
207
|
uint8_t table_lg_size = lg_k - 4; // K/16; in some cases this will end up being oversized
|
|
208
208
|
if (table_lg_size < 2) table_lg_size = 2;
|
|
209
|
-
u32_table<A> table(table_lg_size, 6 + lg_k);
|
|
209
|
+
u32_table<A> table(table_lg_size, 6 + lg_k, bit_matrix.get_allocator());
|
|
210
210
|
|
|
211
211
|
// the following should work even when the offset is zero
|
|
212
212
|
const uint64_t mask_for_clearing_window = (static_cast<uint64_t>(0xff) << offset) ^ UINT64_MAX;
|
|
@@ -314,7 +314,7 @@ void cpc_union_alloc<A>::reduce_k(uint8_t new_lg_k) {
|
|
|
314
314
|
vector_u64<A> old_matrix = std::move(bit_matrix);
|
|
315
315
|
const uint8_t old_lg_k = lg_k;
|
|
316
316
|
const size_t new_k = 1 << new_lg_k;
|
|
317
|
-
bit_matrix = vector_u64<A>(new_k, 0);
|
|
317
|
+
bit_matrix = vector_u64<A>(new_k, 0, old_matrix.get_allocator());
|
|
318
318
|
lg_k = new_lg_k;
|
|
319
319
|
or_matrix_into_matrix(old_matrix, old_lg_k);
|
|
320
320
|
return;
|
|
@@ -24,12 +24,6 @@
|
|
|
24
24
|
|
|
25
25
|
namespace datasketches {
|
|
26
26
|
|
|
27
|
-
static inline uint16_t compute_seed_hash(uint64_t seed) {
|
|
28
|
-
HashState hashes;
|
|
29
|
-
MurmurHash3_x64_128(&seed, sizeof(seed), 0, hashes);
|
|
30
|
-
return hashes.h1 & 0xffff;
|
|
31
|
-
}
|
|
32
|
-
|
|
33
27
|
static inline uint64_t divide_longs_rounding_up(uint64_t x, uint64_t y) {
|
|
34
28
|
if (y == 0) throw std::invalid_argument("divide_longs_rounding_up: bad argument");
|
|
35
29
|
const uint64_t quotient = x / y;
|
|
@@ -231,7 +231,7 @@ static const double ICON_POLYNOMIAL_COEFFICIENTS[ICON_TABLE_SIZE] = {
|
|
|
231
231
|
#endif
|
|
232
232
|
};
|
|
233
233
|
|
|
234
|
-
static double evaluate_polynomial(const double* coefficients, int start, int num, double x) {
|
|
234
|
+
static inline double evaluate_polynomial(const double* coefficients, int start, int num, double x) {
|
|
235
235
|
const int final = start + num - 1;
|
|
236
236
|
double total = coefficients[final];
|
|
237
237
|
for (int j = final - 1; j >= start; j--) {
|
|
@@ -241,11 +241,11 @@ static double evaluate_polynomial(const double* coefficients, int start, int num
|
|
|
241
241
|
return total;
|
|
242
242
|
}
|
|
243
243
|
|
|
244
|
-
static double icon_exponential_approximation(double k, double c) {
|
|
244
|
+
static inline double icon_exponential_approximation(double k, double c) {
|
|
245
245
|
return (0.7940236163830469 * k * pow(2.0, c / k));
|
|
246
246
|
}
|
|
247
247
|
|
|
248
|
-
static double compute_icon_estimate(uint8_t lg_k, uint64_t c) {
|
|
248
|
+
static inline double compute_icon_estimate(uint8_t lg_k, uint64_t c) {
|
|
249
249
|
if (lg_k < ICON_MIN_LOG_K || lg_k > ICON_MAX_LOG_K) throw std::out_of_range("lg_k out of range");
|
|
250
250
|
if (c < 2) return ((c == 0) ? 0.0 : 1.0);
|
|
251
251
|
const size_t k = 1 << lg_k;
|
|
@@ -39,8 +39,8 @@ template<typename A>
|
|
|
39
39
|
class u32_table {
|
|
40
40
|
public:
|
|
41
41
|
|
|
42
|
-
u32_table();
|
|
43
|
-
u32_table(uint8_t lg_size, uint8_t num_valid_bits);
|
|
42
|
+
u32_table(const A& allocator);
|
|
43
|
+
u32_table(uint8_t lg_size, uint8_t num_valid_bits, const A& allocator);
|
|
44
44
|
|
|
45
45
|
inline size_t get_num_items() const;
|
|
46
46
|
inline const uint32_t* get_slots() const;
|
|
@@ -52,7 +52,7 @@ public:
|
|
|
52
52
|
// returns true iff the item was present and was therefore removed from the table
|
|
53
53
|
inline bool maybe_delete(uint32_t item);
|
|
54
54
|
|
|
55
|
-
static u32_table make_from_pairs(const uint32_t* pairs, size_t num_pairs, uint8_t lg_k);
|
|
55
|
+
static u32_table make_from_pairs(const uint32_t* pairs, size_t num_pairs, uint8_t lg_k, const A& allocator);
|
|
56
56
|
|
|
57
57
|
vector_u32<A> unwrapping_get_items() const;
|
|
58
58
|
|
|
@@ -29,19 +29,19 @@
|
|
|
29
29
|
namespace datasketches {
|
|
30
30
|
|
|
31
31
|
template<typename A>
|
|
32
|
-
u32_table<A>::u32_table():
|
|
32
|
+
u32_table<A>::u32_table(const A& allocator):
|
|
33
33
|
lg_size(0),
|
|
34
34
|
num_valid_bits(0),
|
|
35
35
|
num_items(0),
|
|
36
|
-
slots()
|
|
36
|
+
slots(allocator)
|
|
37
37
|
{}
|
|
38
38
|
|
|
39
39
|
template<typename A>
|
|
40
|
-
u32_table<A>::u32_table(uint8_t lg_size, uint8_t num_valid_bits):
|
|
40
|
+
u32_table<A>::u32_table(uint8_t lg_size, uint8_t num_valid_bits, const A& allocator):
|
|
41
41
|
lg_size(lg_size),
|
|
42
42
|
num_valid_bits(num_valid_bits),
|
|
43
43
|
num_items(0),
|
|
44
|
-
slots(1 << lg_size, UINT32_MAX)
|
|
44
|
+
slots(1 << lg_size, UINT32_MAX, allocator)
|
|
45
45
|
{
|
|
46
46
|
if (lg_size < 2) throw std::invalid_argument("lg_size must be >= 2");
|
|
47
47
|
if (num_valid_bits < 1 || num_valid_bits > 32) throw std::invalid_argument("num_valid_bits must be between 1 and 32");
|
|
@@ -110,10 +110,10 @@ bool u32_table<A>::maybe_delete(uint32_t item) {
|
|
|
110
110
|
|
|
111
111
|
// this one is specifically tailored to be a part of fm85 decompression scheme
|
|
112
112
|
template<typename A>
|
|
113
|
-
u32_table<A> u32_table<A>::make_from_pairs(const uint32_t* pairs, size_t num_pairs, uint8_t lg_k) {
|
|
113
|
+
u32_table<A> u32_table<A>::make_from_pairs(const uint32_t* pairs, size_t num_pairs, uint8_t lg_k, const A& allocator) {
|
|
114
114
|
uint8_t lg_num_slots = 2;
|
|
115
115
|
while (U32_TABLE_UPSIZE_DENOM * num_pairs > U32_TABLE_UPSIZE_NUMER * (1 << lg_num_slots)) lg_num_slots++;
|
|
116
|
-
u32_table<A> table(lg_num_slots, 6 + lg_k);
|
|
116
|
+
u32_table<A> table(lg_num_slots, 6 + lg_k, allocator);
|
|
117
117
|
// Note: there is a possible "snowplow effect" here because the caller is passing in a sorted pairs array
|
|
118
118
|
// However, we are starting out with the correct final table size, so the problem might not occur
|
|
119
119
|
for (size_t i = 0; i < num_pairs; i++) {
|
|
@@ -152,7 +152,7 @@ void u32_table<A>::rebuild(uint8_t new_lg_size) {
|
|
|
152
152
|
const size_t new_size = 1 << new_lg_size;
|
|
153
153
|
if (new_size <= num_items) throw std::logic_error("new_size <= num_items");
|
|
154
154
|
vector_u32<A> old_slots = std::move(slots);
|
|
155
|
-
slots = vector_u32<A>(new_size, UINT32_MAX);
|
|
155
|
+
slots = vector_u32<A>(new_size, UINT32_MAX, old_slots.get_allocator());
|
|
156
156
|
lg_size = new_lg_size;
|
|
157
157
|
for (size_t i = 0; i < old_size; i++) {
|
|
158
158
|
if (old_slots[i] != UINT32_MAX) {
|
|
@@ -169,9 +169,9 @@ void u32_table<A>::rebuild(uint8_t new_lg_size) {
|
|
|
169
169
|
// The result is nearly sorted, so make sure to use an efficient sort for that case
|
|
170
170
|
template<typename A>
|
|
171
171
|
vector_u32<A> u32_table<A>::unwrapping_get_items() const {
|
|
172
|
-
if (num_items == 0) return vector_u32<A>();
|
|
172
|
+
if (num_items == 0) return vector_u32<A>(slots.get_allocator());
|
|
173
173
|
const size_t table_size = 1 << lg_size;
|
|
174
|
-
vector_u32<A> result(num_items);
|
|
174
|
+
vector_u32<A> result(num_items, 0, slots.get_allocator());
|
|
175
175
|
size_t i = 0;
|
|
176
176
|
size_t l = 0;
|
|
177
177
|
size_t r = num_items - 1;
|
|
@@ -0,0 +1,237 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* Licensed to the Apache Software Foundation (ASF) under one
|
|
3
|
+
* or more contributor license agreements. See the NOTICE file
|
|
4
|
+
* distributed with this work for additional information
|
|
5
|
+
* regarding copyright ownership. The ASF licenses this file
|
|
6
|
+
* to you under the Apache License, Version 2.0 (the
|
|
7
|
+
* "License"); you may not use this file except in compliance
|
|
8
|
+
* with the License. You may obtain a copy of the License at
|
|
9
|
+
*
|
|
10
|
+
* http://www.apache.org/licenses/LICENSE-2.0
|
|
11
|
+
*
|
|
12
|
+
* Unless required by applicable law or agreed to in writing,
|
|
13
|
+
* software distributed under the License is distributed on an
|
|
14
|
+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
15
|
+
* KIND, either express or implied. See the License for the
|
|
16
|
+
* specific language governing permissions and limitations
|
|
17
|
+
* under the License.
|
|
18
|
+
*/
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
#include <cstring>
|
|
22
|
+
#include <sstream>
|
|
23
|
+
#include <fstream>
|
|
24
|
+
|
|
25
|
+
#include <catch.hpp>
|
|
26
|
+
|
|
27
|
+
#include "cpc_sketch.hpp"
|
|
28
|
+
#include "test_allocator.hpp"
|
|
29
|
+
|
|
30
|
+
namespace datasketches {
|
|
31
|
+
|
|
32
|
+
using cpc_sketch_test_alloc = cpc_sketch_alloc<test_allocator<uint8_t>>;
|
|
33
|
+
using alloc = test_allocator<uint8_t>;
|
|
34
|
+
|
|
35
|
+
TEST_CASE("cpc sketch allocation: serialize deserialize empty", "[cpc_sketch]") {
|
|
36
|
+
test_allocator_total_bytes = 0;
|
|
37
|
+
test_allocator_net_allocations = 0;
|
|
38
|
+
{
|
|
39
|
+
cpc_sketch_test_alloc sketch(11, DEFAULT_SEED, 0);
|
|
40
|
+
std::stringstream s(std::ios::in | std::ios::out | std::ios::binary);
|
|
41
|
+
sketch.serialize(s);
|
|
42
|
+
auto deserialized = cpc_sketch_test_alloc::deserialize(s, DEFAULT_SEED, alloc(0));
|
|
43
|
+
REQUIRE(deserialized.is_empty() == sketch.is_empty());
|
|
44
|
+
REQUIRE(deserialized.get_estimate() == sketch.get_estimate());
|
|
45
|
+
REQUIRE(deserialized.validate());
|
|
46
|
+
}
|
|
47
|
+
REQUIRE(test_allocator_total_bytes == 0);
|
|
48
|
+
REQUIRE(test_allocator_net_allocations == 0);
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
TEST_CASE("cpc sketch allocation: serialize deserialize sparse", "[cpc_sketch]") {
|
|
52
|
+
test_allocator_total_bytes = 0;
|
|
53
|
+
test_allocator_net_allocations = 0;
|
|
54
|
+
{
|
|
55
|
+
cpc_sketch_test_alloc sketch(11, DEFAULT_SEED, 0);
|
|
56
|
+
const int n(100);
|
|
57
|
+
for (int i = 0; i < n; i++) sketch.update(i);
|
|
58
|
+
std::stringstream s(std::ios::in | std::ios::out | std::ios::binary);
|
|
59
|
+
sketch.serialize(s);
|
|
60
|
+
auto deserialized = cpc_sketch_test_alloc::deserialize(s, DEFAULT_SEED, alloc(0));
|
|
61
|
+
REQUIRE(deserialized.is_empty() == sketch.is_empty());
|
|
62
|
+
REQUIRE(deserialized.get_estimate() == sketch.get_estimate());
|
|
63
|
+
REQUIRE(deserialized.validate());
|
|
64
|
+
}
|
|
65
|
+
REQUIRE(test_allocator_total_bytes == 0);
|
|
66
|
+
REQUIRE(test_allocator_net_allocations == 0);
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
TEST_CASE("cpc sketch allocation: serialize deserialize hybrid", "[cpc_sketch]") {
|
|
70
|
+
test_allocator_total_bytes = 0;
|
|
71
|
+
test_allocator_net_allocations = 0;
|
|
72
|
+
{
|
|
73
|
+
cpc_sketch_test_alloc sketch(11, DEFAULT_SEED, 0);
|
|
74
|
+
const int n(200);
|
|
75
|
+
for (int i = 0; i < n; i++) sketch.update(i);
|
|
76
|
+
std::stringstream s(std::ios::in | std::ios::out | std::ios::binary);
|
|
77
|
+
sketch.serialize(s);
|
|
78
|
+
auto deserialized = cpc_sketch_test_alloc::deserialize(s, DEFAULT_SEED, alloc(0));
|
|
79
|
+
REQUIRE(deserialized.is_empty() == sketch.is_empty());
|
|
80
|
+
REQUIRE(deserialized.get_estimate() == sketch.get_estimate());
|
|
81
|
+
REQUIRE(deserialized.validate());
|
|
82
|
+
}
|
|
83
|
+
REQUIRE(test_allocator_total_bytes == 0);
|
|
84
|
+
REQUIRE(test_allocator_net_allocations == 0);
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
TEST_CASE("cpc sketch allocation: serialize deserialize pinned", "[cpc_sketch]") {
|
|
88
|
+
test_allocator_total_bytes = 0;
|
|
89
|
+
test_allocator_net_allocations = 0;
|
|
90
|
+
{
|
|
91
|
+
cpc_sketch_test_alloc sketch(11, DEFAULT_SEED, 0);
|
|
92
|
+
const int n(2000);
|
|
93
|
+
for (int i = 0; i < n; i++) sketch.update(i);
|
|
94
|
+
std::stringstream s(std::ios::in | std::ios::out | std::ios::binary);
|
|
95
|
+
sketch.serialize(s);
|
|
96
|
+
auto deserialized = cpc_sketch_test_alloc::deserialize(s, DEFAULT_SEED, alloc(0));
|
|
97
|
+
REQUIRE(deserialized.is_empty() == sketch.is_empty());
|
|
98
|
+
REQUIRE(deserialized.get_estimate() == sketch.get_estimate());
|
|
99
|
+
REQUIRE(deserialized.validate());
|
|
100
|
+
}
|
|
101
|
+
REQUIRE(test_allocator_total_bytes == 0);
|
|
102
|
+
REQUIRE(test_allocator_net_allocations == 0);
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
TEST_CASE("cpc sketch allocation: serialize deserialize sliding", "[cpc_sketch]") {
|
|
106
|
+
test_allocator_total_bytes = 0;
|
|
107
|
+
test_allocator_net_allocations = 0;
|
|
108
|
+
{
|
|
109
|
+
cpc_sketch_test_alloc sketch(11, DEFAULT_SEED, 0);
|
|
110
|
+
const int n(20000);
|
|
111
|
+
for (int i = 0; i < n; i++) sketch.update(i);
|
|
112
|
+
std::stringstream s(std::ios::in | std::ios::out | std::ios::binary);
|
|
113
|
+
sketch.serialize(s);
|
|
114
|
+
auto deserialized = cpc_sketch_test_alloc::deserialize(s, DEFAULT_SEED, alloc(0));
|
|
115
|
+
REQUIRE(deserialized.is_empty() == sketch.is_empty());
|
|
116
|
+
REQUIRE(deserialized.get_estimate() == sketch.get_estimate());
|
|
117
|
+
REQUIRE(deserialized.validate());
|
|
118
|
+
}
|
|
119
|
+
REQUIRE(test_allocator_total_bytes == 0);
|
|
120
|
+
REQUIRE(test_allocator_net_allocations == 0);
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
TEST_CASE("cpc sketch allocation: serializing deserialize sliding large", "[cpc_sketch]") {
|
|
124
|
+
test_allocator_total_bytes = 0;
|
|
125
|
+
test_allocator_net_allocations = 0;
|
|
126
|
+
{
|
|
127
|
+
cpc_sketch_test_alloc sketch(11, DEFAULT_SEED, 0);
|
|
128
|
+
const int n(3000000);
|
|
129
|
+
for (int i = 0; i < n; i++) sketch.update(i);
|
|
130
|
+
std::stringstream s(std::ios::in | std::ios::out | std::ios::binary);
|
|
131
|
+
sketch.serialize(s);
|
|
132
|
+
auto deserialized = cpc_sketch_test_alloc::deserialize(s, DEFAULT_SEED, alloc(0));
|
|
133
|
+
REQUIRE(deserialized.is_empty() == sketch.is_empty());
|
|
134
|
+
REQUIRE(deserialized.get_estimate() == sketch.get_estimate());
|
|
135
|
+
REQUIRE(deserialized.validate());
|
|
136
|
+
}
|
|
137
|
+
REQUIRE(test_allocator_total_bytes == 0);
|
|
138
|
+
REQUIRE(test_allocator_net_allocations == 0);
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
TEST_CASE("cpc sketch allocation: serialize deserialize empty, bytes", "[cpc_sketch]") {
|
|
142
|
+
test_allocator_total_bytes = 0;
|
|
143
|
+
test_allocator_net_allocations = 0;
|
|
144
|
+
{
|
|
145
|
+
cpc_sketch_test_alloc sketch(11, DEFAULT_SEED, 0);
|
|
146
|
+
auto bytes = sketch.serialize();
|
|
147
|
+
auto deserialized = cpc_sketch_test_alloc::deserialize(bytes.data(), bytes.size(), DEFAULT_SEED, 0);
|
|
148
|
+
REQUIRE(deserialized.is_empty() == sketch.is_empty());
|
|
149
|
+
REQUIRE(deserialized.get_estimate() == sketch.get_estimate());
|
|
150
|
+
REQUIRE(deserialized.validate());
|
|
151
|
+
REQUIRE_THROWS_AS(cpc_sketch_test_alloc::deserialize(bytes.data(), bytes.size() - 1, DEFAULT_SEED, 0), std::out_of_range);
|
|
152
|
+
}
|
|
153
|
+
REQUIRE(test_allocator_total_bytes == 0);
|
|
154
|
+
REQUIRE(test_allocator_net_allocations == 0);
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
TEST_CASE("cpc sketch allocation: serialize deserialize sparse, bytes", "[cpc_sketch]") {
|
|
158
|
+
test_allocator_total_bytes = 0;
|
|
159
|
+
test_allocator_net_allocations = 0;
|
|
160
|
+
{
|
|
161
|
+
cpc_sketch_test_alloc sketch(11, DEFAULT_SEED, 0);
|
|
162
|
+
const int n(100);
|
|
163
|
+
for (int i = 0; i < n; i++) sketch.update(i);
|
|
164
|
+
auto bytes = sketch.serialize();
|
|
165
|
+
auto deserialized = cpc_sketch_test_alloc::deserialize(bytes.data(), bytes.size(), DEFAULT_SEED, 0);
|
|
166
|
+
REQUIRE(deserialized.is_empty() == sketch.is_empty());
|
|
167
|
+
REQUIRE(deserialized.get_estimate() == sketch.get_estimate());
|
|
168
|
+
REQUIRE(deserialized.validate());
|
|
169
|
+
REQUIRE_THROWS_AS(cpc_sketch_test_alloc::deserialize(bytes.data(), 7, DEFAULT_SEED, 0), std::out_of_range);
|
|
170
|
+
REQUIRE_THROWS_AS(cpc_sketch_test_alloc::deserialize(bytes.data(), 15, DEFAULT_SEED, 0), std::out_of_range);
|
|
171
|
+
REQUIRE_THROWS_AS(cpc_sketch_test_alloc::deserialize(bytes.data(), bytes.size() - 1, DEFAULT_SEED, 0), std::out_of_range);
|
|
172
|
+
}
|
|
173
|
+
REQUIRE(test_allocator_total_bytes == 0);
|
|
174
|
+
REQUIRE(test_allocator_net_allocations == 0);
|
|
175
|
+
}
|
|
176
|
+
|
|
177
|
+
TEST_CASE("cpc sketch allocation: serialize deserialize hybrid, bytes", "[cpc_sketch]") {
|
|
178
|
+
test_allocator_total_bytes = 0;
|
|
179
|
+
test_allocator_net_allocations = 0;
|
|
180
|
+
{
|
|
181
|
+
cpc_sketch_test_alloc sketch(11, DEFAULT_SEED, 0);
|
|
182
|
+
const int n(200);
|
|
183
|
+
for (int i = 0; i < n; i++) sketch.update(i);
|
|
184
|
+
auto bytes = sketch.serialize();
|
|
185
|
+
auto deserialized = cpc_sketch_test_alloc::deserialize(bytes.data(), bytes.size(), DEFAULT_SEED, 0);
|
|
186
|
+
REQUIRE(deserialized.is_empty() == sketch.is_empty());
|
|
187
|
+
REQUIRE(deserialized.get_estimate() == sketch.get_estimate());
|
|
188
|
+
REQUIRE(deserialized.validate());
|
|
189
|
+
REQUIRE_THROWS_AS(cpc_sketch_test_alloc::deserialize(bytes.data(), 7, DEFAULT_SEED, 0), std::out_of_range);
|
|
190
|
+
REQUIRE_THROWS_AS(cpc_sketch_test_alloc::deserialize(bytes.data(), 15, DEFAULT_SEED, 0), std::out_of_range);
|
|
191
|
+
REQUIRE_THROWS_AS(cpc_sketch_test_alloc::deserialize(bytes.data(), bytes.size() - 1, DEFAULT_SEED, 0), std::out_of_range);
|
|
192
|
+
}
|
|
193
|
+
REQUIRE(test_allocator_total_bytes == 0);
|
|
194
|
+
REQUIRE(test_allocator_net_allocations == 0);
|
|
195
|
+
}
|
|
196
|
+
|
|
197
|
+
TEST_CASE("cpc sketch allocation: serialize deserialize pinned, bytes", "[cpc_sketch]") {
|
|
198
|
+
test_allocator_total_bytes = 0;
|
|
199
|
+
test_allocator_net_allocations = 0;
|
|
200
|
+
{
|
|
201
|
+
cpc_sketch_test_alloc sketch(11, DEFAULT_SEED, 0);
|
|
202
|
+
const int n(2000);
|
|
203
|
+
for (int i = 0; i < n; i++) sketch.update(i);
|
|
204
|
+
auto bytes = sketch.serialize();
|
|
205
|
+
auto deserialized = cpc_sketch_test_alloc::deserialize(bytes.data(), bytes.size(), DEFAULT_SEED, 0);
|
|
206
|
+
REQUIRE(deserialized.is_empty() == sketch.is_empty());
|
|
207
|
+
REQUIRE(deserialized.get_estimate() == sketch.get_estimate());
|
|
208
|
+
REQUIRE(deserialized.validate());
|
|
209
|
+
REQUIRE_THROWS_AS(cpc_sketch_test_alloc::deserialize(bytes.data(), 7, DEFAULT_SEED, 0), std::out_of_range);
|
|
210
|
+
REQUIRE_THROWS_AS(cpc_sketch_test_alloc::deserialize(bytes.data(), 15, DEFAULT_SEED, 0), std::out_of_range);
|
|
211
|
+
REQUIRE_THROWS_AS(cpc_sketch_test_alloc::deserialize(bytes.data(), bytes.size() - 1, DEFAULT_SEED, 0), std::out_of_range);
|
|
212
|
+
}
|
|
213
|
+
REQUIRE(test_allocator_total_bytes == 0);
|
|
214
|
+
REQUIRE(test_allocator_net_allocations == 0);
|
|
215
|
+
}
|
|
216
|
+
|
|
217
|
+
TEST_CASE("cpc sketch allocation: serialize deserialize sliding, bytes", "[cpc_sketch]") {
|
|
218
|
+
test_allocator_total_bytes = 0;
|
|
219
|
+
test_allocator_net_allocations = 0;
|
|
220
|
+
{
|
|
221
|
+
cpc_sketch_test_alloc sketch(11, DEFAULT_SEED, 0);
|
|
222
|
+
const int n(20000);
|
|
223
|
+
for (int i = 0; i < n; i++) sketch.update(i);
|
|
224
|
+
auto bytes = sketch.serialize();
|
|
225
|
+
auto deserialized = cpc_sketch_test_alloc::deserialize(bytes.data(), bytes.size(), DEFAULT_SEED, 0);
|
|
226
|
+
REQUIRE(deserialized.is_empty() == sketch.is_empty());
|
|
227
|
+
REQUIRE(deserialized.get_estimate() == sketch.get_estimate());
|
|
228
|
+
REQUIRE(deserialized.validate());
|
|
229
|
+
REQUIRE_THROWS_AS(cpc_sketch_test_alloc::deserialize(bytes.data(), 7, DEFAULT_SEED, 0), std::out_of_range);
|
|
230
|
+
REQUIRE_THROWS_AS(cpc_sketch_test_alloc::deserialize(bytes.data(), 15, DEFAULT_SEED, 0), std::out_of_range);
|
|
231
|
+
REQUIRE_THROWS_AS(cpc_sketch_test_alloc::deserialize(bytes.data(), bytes.size() - 1, DEFAULT_SEED, 0), std::out_of_range);
|
|
232
|
+
}
|
|
233
|
+
REQUIRE(test_allocator_total_bytes == 0);
|
|
234
|
+
REQUIRE(test_allocator_net_allocations == 0);
|
|
235
|
+
}
|
|
236
|
+
|
|
237
|
+
} /* namespace datasketches */
|