datasketches 0.1.2 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +5 -0
- data/ext/datasketches/cpc_wrapper.cpp +12 -13
- data/ext/datasketches/ext.cpp +1 -1
- data/ext/datasketches/ext.h +4 -0
- data/ext/datasketches/extconf.rb +1 -1
- data/ext/datasketches/fi_wrapper.cpp +6 -8
- data/ext/datasketches/hll_wrapper.cpp +13 -14
- data/ext/datasketches/kll_wrapper.cpp +28 -76
- data/ext/datasketches/theta_wrapper.cpp +27 -41
- data/ext/datasketches/vo_wrapper.cpp +4 -6
- data/lib/datasketches/version.rb +1 -1
- data/vendor/datasketches-cpp/CMakeLists.txt +1 -0
- data/vendor/datasketches-cpp/README.md +4 -4
- data/vendor/datasketches-cpp/common/include/MurmurHash3.h +7 -0
- data/vendor/datasketches-cpp/common/include/memory_operations.hpp +12 -0
- data/vendor/datasketches-cpp/common/test/CMakeLists.txt +24 -0
- data/vendor/datasketches-cpp/common/test/integration_test.cpp +77 -0
- data/vendor/datasketches-cpp/common/test/test_allocator.hpp +9 -1
- data/vendor/datasketches-cpp/cpc/include/cpc_common.hpp +3 -0
- data/vendor/datasketches-cpp/cpc/include/cpc_compressor.hpp +2 -2
- data/vendor/datasketches-cpp/cpc/include/cpc_compressor_impl.hpp +28 -19
- data/vendor/datasketches-cpp/cpc/include/cpc_sketch.hpp +8 -5
- data/vendor/datasketches-cpp/cpc/include/cpc_sketch_impl.hpp +19 -14
- data/vendor/datasketches-cpp/cpc/include/cpc_union.hpp +2 -2
- data/vendor/datasketches-cpp/cpc/include/cpc_union_impl.hpp +6 -6
- data/vendor/datasketches-cpp/cpc/include/cpc_util.hpp +0 -6
- data/vendor/datasketches-cpp/cpc/include/icon_estimator.hpp +3 -3
- data/vendor/datasketches-cpp/cpc/include/u32_table.hpp +3 -3
- data/vendor/datasketches-cpp/cpc/include/u32_table_impl.hpp +9 -9
- data/vendor/datasketches-cpp/cpc/test/CMakeLists.txt +1 -0
- data/vendor/datasketches-cpp/cpc/test/cpc_sketch_allocation_test.cpp +237 -0
- data/vendor/datasketches-cpp/fi/include/frequent_items_sketch.hpp +15 -10
- data/vendor/datasketches-cpp/fi/include/frequent_items_sketch_impl.hpp +40 -28
- data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map.hpp +19 -13
- data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map_impl.hpp +140 -124
- data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_custom_type_test.cpp +15 -12
- data/vendor/datasketches-cpp/fi/test/reverse_purge_hash_map_test.cpp +3 -3
- data/vendor/datasketches-cpp/hll/CMakeLists.txt +3 -0
- data/vendor/datasketches-cpp/hll/include/AuxHashMap-internal.hpp +32 -57
- data/vendor/datasketches-cpp/hll/include/AuxHashMap.hpp +9 -8
- data/vendor/datasketches-cpp/hll/include/CompositeInterpolationXTable.hpp +2 -2
- data/vendor/datasketches-cpp/hll/include/CouponHashSet-internal.hpp +34 -48
- data/vendor/datasketches-cpp/hll/include/CouponHashSet.hpp +10 -10
- data/vendor/datasketches-cpp/hll/include/CouponList-internal.hpp +45 -77
- data/vendor/datasketches-cpp/hll/include/CouponList.hpp +11 -12
- data/vendor/datasketches-cpp/hll/include/CubicInterpolation.hpp +2 -2
- data/vendor/datasketches-cpp/hll/include/HarmonicNumbers.hpp +2 -2
- data/vendor/datasketches-cpp/hll/include/Hll4Array-internal.hpp +15 -14
- data/vendor/datasketches-cpp/hll/include/Hll4Array.hpp +1 -1
- data/vendor/datasketches-cpp/hll/include/Hll6Array-internal.hpp +10 -21
- data/vendor/datasketches-cpp/hll/include/Hll6Array.hpp +2 -3
- data/vendor/datasketches-cpp/hll/include/Hll8Array-internal.hpp +10 -21
- data/vendor/datasketches-cpp/hll/include/Hll8Array.hpp +2 -3
- data/vendor/datasketches-cpp/hll/include/HllArray-internal.hpp +28 -55
- data/vendor/datasketches-cpp/hll/include/HllArray.hpp +8 -8
- data/vendor/datasketches-cpp/hll/include/HllSketch-internal.hpp +9 -11
- data/vendor/datasketches-cpp/hll/include/HllSketchImpl.hpp +2 -1
- data/vendor/datasketches-cpp/hll/include/HllSketchImplFactory.hpp +34 -31
- data/vendor/datasketches-cpp/hll/include/HllUnion-internal.hpp +3 -28
- data/vendor/datasketches-cpp/hll/include/HllUtil.hpp +1 -1
- data/vendor/datasketches-cpp/hll/include/RelativeErrorTables.hpp +1 -1
- data/vendor/datasketches-cpp/hll/include/hll.hpp +6 -34
- data/vendor/datasketches-cpp/hll/test/AuxHashMapTest.cpp +7 -7
- data/vendor/datasketches-cpp/hll/test/CouponHashSetTest.cpp +2 -2
- data/vendor/datasketches-cpp/hll/test/CouponListTest.cpp +3 -3
- data/vendor/datasketches-cpp/hll/test/HllArrayTest.cpp +2 -2
- data/vendor/datasketches-cpp/hll/test/HllSketchTest.cpp +46 -50
- data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator.hpp +1 -1
- data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator_impl.hpp +3 -3
- data/vendor/datasketches-cpp/kll/include/kll_sketch.hpp +10 -3
- data/vendor/datasketches-cpp/kll/include/kll_sketch_impl.hpp +93 -75
- data/vendor/datasketches-cpp/kll/test/kll_sketch_custom_type_test.cpp +11 -10
- data/vendor/datasketches-cpp/kll/test/kll_sketch_test.cpp +45 -42
- data/vendor/datasketches-cpp/python/CMakeLists.txt +2 -0
- data/vendor/datasketches-cpp/python/README.md +6 -3
- data/vendor/datasketches-cpp/python/src/datasketches.cpp +2 -0
- data/vendor/datasketches-cpp/python/src/hll_wrapper.cpp +0 -2
- data/vendor/datasketches-cpp/python/src/kll_wrapper.cpp +3 -1
- data/vendor/datasketches-cpp/python/src/req_wrapper.cpp +246 -0
- data/vendor/datasketches-cpp/python/src/theta_wrapper.cpp +36 -26
- data/vendor/datasketches-cpp/python/tests/hll_test.py +0 -1
- data/vendor/datasketches-cpp/python/tests/kll_test.py +3 -3
- data/vendor/datasketches-cpp/python/tests/req_test.py +126 -0
- data/vendor/datasketches-cpp/python/tests/theta_test.py +28 -3
- data/vendor/datasketches-cpp/req/CMakeLists.txt +60 -0
- data/vendor/datasketches-cpp/{tuple/include/theta_a_not_b_experimental_impl.hpp → req/include/req_common.hpp} +17 -8
- data/vendor/datasketches-cpp/req/include/req_compactor.hpp +137 -0
- data/vendor/datasketches-cpp/req/include/req_compactor_impl.hpp +501 -0
- data/vendor/datasketches-cpp/req/include/req_quantile_calculator.hpp +69 -0
- data/vendor/datasketches-cpp/req/include/req_quantile_calculator_impl.hpp +60 -0
- data/vendor/datasketches-cpp/req/include/req_sketch.hpp +395 -0
- data/vendor/datasketches-cpp/req/include/req_sketch_impl.hpp +810 -0
- data/vendor/datasketches-cpp/req/test/CMakeLists.txt +43 -0
- data/vendor/datasketches-cpp/req/test/req_float_empty_from_java.sk +0 -0
- data/vendor/datasketches-cpp/req/test/req_float_estimation_from_java.sk +0 -0
- data/vendor/datasketches-cpp/req/test/req_float_exact_from_java.sk +0 -0
- data/vendor/datasketches-cpp/req/test/req_float_raw_items_from_java.sk +0 -0
- data/vendor/datasketches-cpp/req/test/req_float_single_item_from_java.sk +0 -0
- data/vendor/datasketches-cpp/req/test/req_sketch_custom_type_test.cpp +128 -0
- data/vendor/datasketches-cpp/req/test/req_sketch_test.cpp +494 -0
- data/vendor/datasketches-cpp/sampling/include/var_opt_sketch.hpp +10 -9
- data/vendor/datasketches-cpp/sampling/include/var_opt_sketch_impl.hpp +82 -70
- data/vendor/datasketches-cpp/sampling/include/var_opt_union.hpp +5 -5
- data/vendor/datasketches-cpp/sampling/include/var_opt_union_impl.hpp +7 -7
- data/vendor/datasketches-cpp/sampling/test/CMakeLists.txt +1 -0
- data/vendor/datasketches-cpp/sampling/test/var_opt_allocation_test.cpp +96 -0
- data/vendor/datasketches-cpp/sampling/test/var_opt_union_test.cpp +0 -31
- data/vendor/datasketches-cpp/setup.py +5 -3
- data/vendor/datasketches-cpp/theta/CMakeLists.txt +30 -3
- data/vendor/datasketches-cpp/{tuple → theta}/include/bounds_on_ratios_in_sampled_sets.hpp +2 -1
- data/vendor/datasketches-cpp/{tuple → theta}/include/bounds_on_ratios_in_theta_sketched_sets.hpp +1 -1
- data/vendor/datasketches-cpp/theta/include/theta_a_not_b.hpp +12 -29
- data/vendor/datasketches-cpp/theta/include/theta_a_not_b_impl.hpp +5 -46
- data/vendor/datasketches-cpp/{tuple → theta}/include/theta_comparators.hpp +0 -0
- data/vendor/datasketches-cpp/{tuple → theta}/include/theta_constants.hpp +2 -0
- data/vendor/datasketches-cpp/{tuple → theta}/include/theta_helpers.hpp +0 -0
- data/vendor/datasketches-cpp/theta/include/theta_intersection.hpp +22 -29
- data/vendor/datasketches-cpp/{tuple → theta}/include/theta_intersection_base.hpp +0 -0
- data/vendor/datasketches-cpp/{tuple → theta}/include/theta_intersection_base_impl.hpp +0 -0
- data/vendor/datasketches-cpp/theta/include/theta_intersection_impl.hpp +8 -90
- data/vendor/datasketches-cpp/{tuple/test/theta_union_experimental_test.cpp → theta/include/theta_jaccard_similarity.hpp} +11 -18
- data/vendor/datasketches-cpp/{tuple/include/jaccard_similarity.hpp → theta/include/theta_jaccard_similarity_base.hpp} +6 -22
- data/vendor/datasketches-cpp/{tuple → theta}/include/theta_set_difference_base.hpp +0 -0
- data/vendor/datasketches-cpp/{tuple → theta}/include/theta_set_difference_base_impl.hpp +5 -0
- data/vendor/datasketches-cpp/theta/include/theta_sketch.hpp +132 -266
- data/vendor/datasketches-cpp/theta/include/theta_sketch_impl.hpp +200 -650
- data/vendor/datasketches-cpp/theta/include/theta_union.hpp +27 -60
- data/vendor/datasketches-cpp/{tuple → theta}/include/theta_union_base.hpp +1 -1
- data/vendor/datasketches-cpp/{tuple → theta}/include/theta_union_base_impl.hpp +5 -0
- data/vendor/datasketches-cpp/theta/include/theta_union_impl.hpp +13 -69
- data/vendor/datasketches-cpp/{tuple → theta}/include/theta_update_sketch_base.hpp +3 -19
- data/vendor/datasketches-cpp/{tuple → theta}/include/theta_update_sketch_base_impl.hpp +6 -1
- data/vendor/datasketches-cpp/theta/test/CMakeLists.txt +1 -0
- data/vendor/datasketches-cpp/{tuple → theta}/test/theta_jaccard_similarity_test.cpp +2 -3
- data/vendor/datasketches-cpp/theta/test/theta_sketch_test.cpp +37 -234
- data/vendor/datasketches-cpp/tuple/CMakeLists.txt +3 -35
- data/vendor/datasketches-cpp/tuple/include/tuple_jaccard_similarity.hpp +38 -0
- data/vendor/datasketches-cpp/tuple/include/tuple_sketch.hpp +28 -13
- data/vendor/datasketches-cpp/tuple/include/tuple_sketch_impl.hpp +6 -6
- data/vendor/datasketches-cpp/tuple/test/CMakeLists.txt +1 -6
- data/vendor/datasketches-cpp/tuple/test/tuple_a_not_b_test.cpp +1 -4
- data/vendor/datasketches-cpp/tuple/test/tuple_intersection_test.cpp +1 -4
- data/vendor/datasketches-cpp/tuple/test/tuple_jaccard_similarity_test.cpp +2 -1
- data/vendor/datasketches-cpp/tuple/test/tuple_sketch_allocation_test.cpp +2 -2
- data/vendor/datasketches-cpp/tuple/test/tuple_union_test.cpp +1 -4
- metadata +43 -34
- data/vendor/datasketches-cpp/tuple/include/theta_a_not_b_experimental.hpp +0 -53
- data/vendor/datasketches-cpp/tuple/include/theta_intersection_experimental.hpp +0 -78
- data/vendor/datasketches-cpp/tuple/include/theta_intersection_experimental_impl.hpp +0 -43
- data/vendor/datasketches-cpp/tuple/include/theta_sketch_experimental.hpp +0 -393
- data/vendor/datasketches-cpp/tuple/include/theta_sketch_experimental_impl.hpp +0 -481
- data/vendor/datasketches-cpp/tuple/include/theta_union_experimental.hpp +0 -88
- data/vendor/datasketches-cpp/tuple/include/theta_union_experimental_impl.hpp +0 -47
- data/vendor/datasketches-cpp/tuple/test/theta_a_not_b_experimental_test.cpp +0 -250
- data/vendor/datasketches-cpp/tuple/test/theta_compact_empty_from_java.sk +0 -0
- data/vendor/datasketches-cpp/tuple/test/theta_compact_estimation_from_java.sk +0 -0
- data/vendor/datasketches-cpp/tuple/test/theta_compact_single_item_from_java.sk +0 -0
- data/vendor/datasketches-cpp/tuple/test/theta_intersection_experimental_test.cpp +0 -224
- data/vendor/datasketches-cpp/tuple/test/theta_sketch_experimental_test.cpp +0 -247
@@ -41,13 +41,13 @@ void cpc_init() {
|
|
41
41
|
}
|
42
42
|
|
43
43
|
template<typename A>
|
44
|
-
cpc_sketch_alloc<A>::cpc_sketch_alloc(uint8_t lg_k, uint64_t seed):
|
44
|
+
cpc_sketch_alloc<A>::cpc_sketch_alloc(uint8_t lg_k, uint64_t seed, const A& allocator):
|
45
45
|
lg_k(lg_k),
|
46
46
|
seed(seed),
|
47
47
|
was_merged(false),
|
48
48
|
num_coupons(0),
|
49
|
-
surprising_value_table(2, 6 + lg_k),
|
50
|
-
sliding_window(),
|
49
|
+
surprising_value_table(2, 6 + lg_k, allocator),
|
50
|
+
sliding_window(allocator),
|
51
51
|
window_offset(0),
|
52
52
|
first_interesting_column(0),
|
53
53
|
kxp(1 << lg_k),
|
@@ -58,6 +58,11 @@ hip_est_accum(0)
|
|
58
58
|
}
|
59
59
|
}
|
60
60
|
|
61
|
+
template<typename A>
|
62
|
+
A cpc_sketch_alloc<A>::get_allocator() const {
|
63
|
+
return sliding_window.get_allocator();
|
64
|
+
}
|
65
|
+
|
61
66
|
template<typename A>
|
62
67
|
uint8_t cpc_sketch_alloc<A>::get_lg_k() const {
|
63
68
|
return lg_k;
|
@@ -277,7 +282,7 @@ void cpc_sketch_alloc<A>::promote_sparse_to_windowed() {
|
|
277
282
|
|
278
283
|
sliding_window.resize(k, 0); // zero the memory (because we will be OR'ing into it)
|
279
284
|
|
280
|
-
u32_table<A> new_table(2, 6 + lg_k);
|
285
|
+
u32_table<A> new_table(2, 6 + lg_k, sliding_window.get_allocator());
|
281
286
|
|
282
287
|
const uint32_t* old_slots = surprising_value_table.get_slots();
|
283
288
|
const size_t old_num_slots = 1 << surprising_value_table.get_lg_size();
|
@@ -401,7 +406,7 @@ string<A> cpc_sketch_alloc<A>::to_string() const {
|
|
401
406
|
|
402
407
|
template<typename A>
|
403
408
|
void cpc_sketch_alloc<A>::serialize(std::ostream& os) const {
|
404
|
-
compressed_state<A> compressed;
|
409
|
+
compressed_state<A> compressed(A(sliding_window.get_allocator()));
|
405
410
|
compressed.table_data_words = 0;
|
406
411
|
compressed.table_num_entries = 0;
|
407
412
|
compressed.window_data_words = 0;
|
@@ -454,7 +459,7 @@ void cpc_sketch_alloc<A>::serialize(std::ostream& os) const {
|
|
454
459
|
|
455
460
|
template<typename A>
|
456
461
|
vector_u8<A> cpc_sketch_alloc<A>::serialize(unsigned header_size_bytes) const {
|
457
|
-
compressed_state<A> compressed;
|
462
|
+
compressed_state<A> compressed(sliding_window.get_allocator());
|
458
463
|
compressed.table_data_words = 0;
|
459
464
|
compressed.table_num_entries = 0;
|
460
465
|
compressed.window_data_words = 0;
|
@@ -464,7 +469,7 @@ vector_u8<A> cpc_sketch_alloc<A>::serialize(unsigned header_size_bytes) const {
|
|
464
469
|
const bool has_window = compressed.window_data.size() > 0;
|
465
470
|
const uint8_t preamble_ints = get_preamble_ints(num_coupons, has_hip, has_table, has_window);
|
466
471
|
const size_t size = header_size_bytes + (preamble_ints + compressed.table_data_words + compressed.window_data_words) * sizeof(uint32_t);
|
467
|
-
vector_u8<A> bytes(size);
|
472
|
+
vector_u8<A> bytes(size, 0, sliding_window.get_allocator());
|
468
473
|
uint8_t* ptr = bytes.data() + header_size_bytes;
|
469
474
|
ptr += copy_to_mem(&preamble_ints, ptr, sizeof(preamble_ints));
|
470
475
|
const uint8_t serial_version = SERIAL_VERSION;
|
@@ -511,7 +516,7 @@ vector_u8<A> cpc_sketch_alloc<A>::serialize(unsigned header_size_bytes) const {
|
|
511
516
|
}
|
512
517
|
|
513
518
|
template<typename A>
|
514
|
-
cpc_sketch_alloc<A> cpc_sketch_alloc<A>::deserialize(std::istream& is, uint64_t seed) {
|
519
|
+
cpc_sketch_alloc<A> cpc_sketch_alloc<A>::deserialize(std::istream& is, uint64_t seed, const A& allocator) {
|
515
520
|
uint8_t preamble_ints;
|
516
521
|
is.read((char*)&preamble_ints, sizeof(preamble_ints));
|
517
522
|
uint8_t serial_version;
|
@@ -529,7 +534,7 @@ cpc_sketch_alloc<A> cpc_sketch_alloc<A>::deserialize(std::istream& is, uint64_t
|
|
529
534
|
const bool has_hip = flags_byte & (1 << flags::HAS_HIP);
|
530
535
|
const bool has_table = flags_byte & (1 << flags::HAS_TABLE);
|
531
536
|
const bool has_window = flags_byte & (1 << flags::HAS_WINDOW);
|
532
|
-
compressed_state<A> compressed;
|
537
|
+
compressed_state<A> compressed(allocator);
|
533
538
|
compressed.table_data_words = 0;
|
534
539
|
compressed.table_num_entries = 0;
|
535
540
|
compressed.window_data_words = 0;
|
@@ -583,7 +588,7 @@ cpc_sketch_alloc<A> cpc_sketch_alloc<A>::deserialize(std::istream& is, uint64_t
|
|
583
588
|
throw std::invalid_argument("Incompatible seed hashes: " + std::to_string(seed_hash) + ", "
|
584
589
|
+ std::to_string(compute_seed_hash(seed)));
|
585
590
|
}
|
586
|
-
uncompressed_state<A> uncompressed;
|
591
|
+
uncompressed_state<A> uncompressed(allocator);
|
587
592
|
get_compressor<A>().uncompress(compressed, uncompressed, lg_k, num_coupons);
|
588
593
|
if (!is.good())
|
589
594
|
throw std::runtime_error("error reading from std::istream");
|
@@ -592,7 +597,7 @@ cpc_sketch_alloc<A> cpc_sketch_alloc<A>::deserialize(std::istream& is, uint64_t
|
|
592
597
|
}
|
593
598
|
|
594
599
|
template<typename A>
|
595
|
-
cpc_sketch_alloc<A> cpc_sketch_alloc<A>::deserialize(const void* bytes, size_t size, uint64_t seed) {
|
600
|
+
cpc_sketch_alloc<A> cpc_sketch_alloc<A>::deserialize(const void* bytes, size_t size, uint64_t seed, const A& allocator) {
|
596
601
|
ensure_minimum_memory(size, 8);
|
597
602
|
const char* ptr = static_cast<const char*>(bytes);
|
598
603
|
const char* base = static_cast<const char*>(bytes);
|
@@ -614,7 +619,7 @@ cpc_sketch_alloc<A> cpc_sketch_alloc<A>::deserialize(const void* bytes, size_t s
|
|
614
619
|
const bool has_table = flags_byte & (1 << flags::HAS_TABLE);
|
615
620
|
const bool has_window = flags_byte & (1 << flags::HAS_WINDOW);
|
616
621
|
ensure_minimum_memory(size, preamble_ints << 2);
|
617
|
-
compressed_state<A> compressed;
|
622
|
+
compressed_state<A> compressed(allocator);
|
618
623
|
compressed.table_data_words = 0;
|
619
624
|
compressed.table_num_entries = 0;
|
620
625
|
compressed.window_data_words = 0;
|
@@ -677,7 +682,7 @@ cpc_sketch_alloc<A> cpc_sketch_alloc<A>::deserialize(const void* bytes, size_t s
|
|
677
682
|
throw std::invalid_argument("Incompatible seed hashes: " + std::to_string(seed_hash) + ", "
|
678
683
|
+ std::to_string(compute_seed_hash(seed)));
|
679
684
|
}
|
680
|
-
uncompressed_state<A> uncompressed;
|
685
|
+
uncompressed_state<A> uncompressed(allocator);
|
681
686
|
get_compressor<A>().uncompress(compressed, uncompressed, lg_k, num_coupons);
|
682
687
|
return cpc_sketch_alloc(lg_k, num_coupons, first_interesting_column, std::move(uncompressed.table),
|
683
688
|
std::move(uncompressed.window), has_hip, kxp, hip_est_accum, seed);
|
@@ -766,7 +771,7 @@ vector_u64<A> cpc_sketch_alloc<A>::build_bit_matrix() const {
|
|
766
771
|
// Fill the matrix with default rows in which the "early zone" is filled with ones.
|
767
772
|
// This is essential for the routine's O(k) time cost (as opposed to O(C)).
|
768
773
|
const uint64_t default_row = (static_cast<uint64_t>(1) << window_offset) - 1;
|
769
|
-
vector_u64<A> matrix(k, default_row);
|
774
|
+
vector_u64<A> matrix(k, default_row, sliding_window.get_allocator());
|
770
775
|
|
771
776
|
if (num_coupons == 0) return matrix;
|
772
777
|
|
@@ -35,7 +35,7 @@ namespace datasketches {
|
|
35
35
|
*/
|
36
36
|
|
37
37
|
// alias with default allocator for convenience
|
38
|
-
|
38
|
+
using cpc_union = cpc_union_alloc<std::allocator<uint8_t>>;
|
39
39
|
|
40
40
|
template<typename A>
|
41
41
|
class cpc_union_alloc {
|
@@ -45,7 +45,7 @@ public:
|
|
45
45
|
* @param lg_k base 2 logarithm of the number of bins in the sketch
|
46
46
|
* @param seed for hash function
|
47
47
|
*/
|
48
|
-
explicit cpc_union_alloc(uint8_t lg_k = CPC_DEFAULT_LG_K, uint64_t seed = DEFAULT_SEED);
|
48
|
+
explicit cpc_union_alloc(uint8_t lg_k = CPC_DEFAULT_LG_K, uint64_t seed = DEFAULT_SEED, const A& allocator = A());
|
49
49
|
|
50
50
|
cpc_union_alloc(const cpc_union_alloc<A>& other);
|
51
51
|
cpc_union_alloc(cpc_union_alloc<A>&& other) noexcept;
|
@@ -25,16 +25,16 @@
|
|
25
25
|
namespace datasketches {
|
26
26
|
|
27
27
|
template<typename A>
|
28
|
-
cpc_union_alloc<A>::cpc_union_alloc(uint8_t lg_k, uint64_t seed):
|
28
|
+
cpc_union_alloc<A>::cpc_union_alloc(uint8_t lg_k, uint64_t seed, const A& allocator):
|
29
29
|
lg_k(lg_k),
|
30
30
|
seed(seed),
|
31
31
|
accumulator(nullptr),
|
32
|
-
bit_matrix()
|
32
|
+
bit_matrix(allocator)
|
33
33
|
{
|
34
34
|
if (lg_k < CPC_MIN_LG_K || lg_k > CPC_MAX_LG_K) {
|
35
35
|
throw std::invalid_argument("lg_k must be >= " + std::to_string(CPC_MIN_LG_K) + " and <= " + std::to_string(CPC_MAX_LG_K) + ": " + std::to_string(lg_k));
|
36
36
|
}
|
37
|
-
accumulator = new (AllocCpc().allocate(1)) cpc_sketch_alloc<A>(lg_k, seed);
|
37
|
+
accumulator = new (AllocCpc().allocate(1)) cpc_sketch_alloc<A>(lg_k, seed, allocator);
|
38
38
|
}
|
39
39
|
|
40
40
|
template<typename A>
|
@@ -200,13 +200,13 @@ cpc_sketch_alloc<A> cpc_union_alloc<A>::get_result_from_bit_matrix() const {
|
|
200
200
|
|
201
201
|
const uint8_t offset = cpc_sketch_alloc<A>::determine_correct_offset(lg_k, num_coupons);
|
202
202
|
|
203
|
-
vector_u8<A> sliding_window(k);
|
203
|
+
vector_u8<A> sliding_window(k, 0, bit_matrix.get_allocator());
|
204
204
|
// don't need to zero the window's memory
|
205
205
|
|
206
206
|
// dynamically growing caused snowplow effect
|
207
207
|
uint8_t table_lg_size = lg_k - 4; // K/16; in some cases this will end up being oversized
|
208
208
|
if (table_lg_size < 2) table_lg_size = 2;
|
209
|
-
u32_table<A> table(table_lg_size, 6 + lg_k);
|
209
|
+
u32_table<A> table(table_lg_size, 6 + lg_k, bit_matrix.get_allocator());
|
210
210
|
|
211
211
|
// the following should work even when the offset is zero
|
212
212
|
const uint64_t mask_for_clearing_window = (static_cast<uint64_t>(0xff) << offset) ^ UINT64_MAX;
|
@@ -314,7 +314,7 @@ void cpc_union_alloc<A>::reduce_k(uint8_t new_lg_k) {
|
|
314
314
|
vector_u64<A> old_matrix = std::move(bit_matrix);
|
315
315
|
const uint8_t old_lg_k = lg_k;
|
316
316
|
const size_t new_k = 1 << new_lg_k;
|
317
|
-
bit_matrix = vector_u64<A>(new_k, 0);
|
317
|
+
bit_matrix = vector_u64<A>(new_k, 0, old_matrix.get_allocator());
|
318
318
|
lg_k = new_lg_k;
|
319
319
|
or_matrix_into_matrix(old_matrix, old_lg_k);
|
320
320
|
return;
|
@@ -24,12 +24,6 @@
|
|
24
24
|
|
25
25
|
namespace datasketches {
|
26
26
|
|
27
|
-
static inline uint16_t compute_seed_hash(uint64_t seed) {
|
28
|
-
HashState hashes;
|
29
|
-
MurmurHash3_x64_128(&seed, sizeof(seed), 0, hashes);
|
30
|
-
return hashes.h1 & 0xffff;
|
31
|
-
}
|
32
|
-
|
33
27
|
static inline uint64_t divide_longs_rounding_up(uint64_t x, uint64_t y) {
|
34
28
|
if (y == 0) throw std::invalid_argument("divide_longs_rounding_up: bad argument");
|
35
29
|
const uint64_t quotient = x / y;
|
@@ -231,7 +231,7 @@ static const double ICON_POLYNOMIAL_COEFFICIENTS[ICON_TABLE_SIZE] = {
|
|
231
231
|
#endif
|
232
232
|
};
|
233
233
|
|
234
|
-
static double evaluate_polynomial(const double* coefficients, int start, int num, double x) {
|
234
|
+
static inline double evaluate_polynomial(const double* coefficients, int start, int num, double x) {
|
235
235
|
const int final = start + num - 1;
|
236
236
|
double total = coefficients[final];
|
237
237
|
for (int j = final - 1; j >= start; j--) {
|
@@ -241,11 +241,11 @@ static double evaluate_polynomial(const double* coefficients, int start, int num
|
|
241
241
|
return total;
|
242
242
|
}
|
243
243
|
|
244
|
-
static double icon_exponential_approximation(double k, double c) {
|
244
|
+
static inline double icon_exponential_approximation(double k, double c) {
|
245
245
|
return (0.7940236163830469 * k * pow(2.0, c / k));
|
246
246
|
}
|
247
247
|
|
248
|
-
static double compute_icon_estimate(uint8_t lg_k, uint64_t c) {
|
248
|
+
static inline double compute_icon_estimate(uint8_t lg_k, uint64_t c) {
|
249
249
|
if (lg_k < ICON_MIN_LOG_K || lg_k > ICON_MAX_LOG_K) throw std::out_of_range("lg_k out of range");
|
250
250
|
if (c < 2) return ((c == 0) ? 0.0 : 1.0);
|
251
251
|
const size_t k = 1 << lg_k;
|
@@ -39,8 +39,8 @@ template<typename A>
|
|
39
39
|
class u32_table {
|
40
40
|
public:
|
41
41
|
|
42
|
-
u32_table();
|
43
|
-
u32_table(uint8_t lg_size, uint8_t num_valid_bits);
|
42
|
+
u32_table(const A& allocator);
|
43
|
+
u32_table(uint8_t lg_size, uint8_t num_valid_bits, const A& allocator);
|
44
44
|
|
45
45
|
inline size_t get_num_items() const;
|
46
46
|
inline const uint32_t* get_slots() const;
|
@@ -52,7 +52,7 @@ public:
|
|
52
52
|
// returns true iff the item was present and was therefore removed from the table
|
53
53
|
inline bool maybe_delete(uint32_t item);
|
54
54
|
|
55
|
-
static u32_table make_from_pairs(const uint32_t* pairs, size_t num_pairs, uint8_t lg_k);
|
55
|
+
static u32_table make_from_pairs(const uint32_t* pairs, size_t num_pairs, uint8_t lg_k, const A& allocator);
|
56
56
|
|
57
57
|
vector_u32<A> unwrapping_get_items() const;
|
58
58
|
|
@@ -29,19 +29,19 @@
|
|
29
29
|
namespace datasketches {
|
30
30
|
|
31
31
|
template<typename A>
|
32
|
-
u32_table<A>::u32_table():
|
32
|
+
u32_table<A>::u32_table(const A& allocator):
|
33
33
|
lg_size(0),
|
34
34
|
num_valid_bits(0),
|
35
35
|
num_items(0),
|
36
|
-
slots()
|
36
|
+
slots(allocator)
|
37
37
|
{}
|
38
38
|
|
39
39
|
template<typename A>
|
40
|
-
u32_table<A>::u32_table(uint8_t lg_size, uint8_t num_valid_bits):
|
40
|
+
u32_table<A>::u32_table(uint8_t lg_size, uint8_t num_valid_bits, const A& allocator):
|
41
41
|
lg_size(lg_size),
|
42
42
|
num_valid_bits(num_valid_bits),
|
43
43
|
num_items(0),
|
44
|
-
slots(1 << lg_size, UINT32_MAX)
|
44
|
+
slots(1 << lg_size, UINT32_MAX, allocator)
|
45
45
|
{
|
46
46
|
if (lg_size < 2) throw std::invalid_argument("lg_size must be >= 2");
|
47
47
|
if (num_valid_bits < 1 || num_valid_bits > 32) throw std::invalid_argument("num_valid_bits must be between 1 and 32");
|
@@ -110,10 +110,10 @@ bool u32_table<A>::maybe_delete(uint32_t item) {
|
|
110
110
|
|
111
111
|
// this one is specifically tailored to be a part of fm85 decompression scheme
|
112
112
|
template<typename A>
|
113
|
-
u32_table<A> u32_table<A>::make_from_pairs(const uint32_t* pairs, size_t num_pairs, uint8_t lg_k) {
|
113
|
+
u32_table<A> u32_table<A>::make_from_pairs(const uint32_t* pairs, size_t num_pairs, uint8_t lg_k, const A& allocator) {
|
114
114
|
uint8_t lg_num_slots = 2;
|
115
115
|
while (U32_TABLE_UPSIZE_DENOM * num_pairs > U32_TABLE_UPSIZE_NUMER * (1 << lg_num_slots)) lg_num_slots++;
|
116
|
-
u32_table<A> table(lg_num_slots, 6 + lg_k);
|
116
|
+
u32_table<A> table(lg_num_slots, 6 + lg_k, allocator);
|
117
117
|
// Note: there is a possible "snowplow effect" here because the caller is passing in a sorted pairs array
|
118
118
|
// However, we are starting out with the correct final table size, so the problem might not occur
|
119
119
|
for (size_t i = 0; i < num_pairs; i++) {
|
@@ -152,7 +152,7 @@ void u32_table<A>::rebuild(uint8_t new_lg_size) {
|
|
152
152
|
const size_t new_size = 1 << new_lg_size;
|
153
153
|
if (new_size <= num_items) throw std::logic_error("new_size <= num_items");
|
154
154
|
vector_u32<A> old_slots = std::move(slots);
|
155
|
-
slots = vector_u32<A>(new_size, UINT32_MAX);
|
155
|
+
slots = vector_u32<A>(new_size, UINT32_MAX, old_slots.get_allocator());
|
156
156
|
lg_size = new_lg_size;
|
157
157
|
for (size_t i = 0; i < old_size; i++) {
|
158
158
|
if (old_slots[i] != UINT32_MAX) {
|
@@ -169,9 +169,9 @@ void u32_table<A>::rebuild(uint8_t new_lg_size) {
|
|
169
169
|
// The result is nearly sorted, so make sure to use an efficient sort for that case
|
170
170
|
template<typename A>
|
171
171
|
vector_u32<A> u32_table<A>::unwrapping_get_items() const {
|
172
|
-
if (num_items == 0) return vector_u32<A>();
|
172
|
+
if (num_items == 0) return vector_u32<A>(slots.get_allocator());
|
173
173
|
const size_t table_size = 1 << lg_size;
|
174
|
-
vector_u32<A> result(num_items);
|
174
|
+
vector_u32<A> result(num_items, 0, slots.get_allocator());
|
175
175
|
size_t i = 0;
|
176
176
|
size_t l = 0;
|
177
177
|
size_t r = num_items - 1;
|
@@ -0,0 +1,237 @@
|
|
1
|
+
/*
|
2
|
+
* Licensed to the Apache Software Foundation (ASF) under one
|
3
|
+
* or more contributor license agreements. See the NOTICE file
|
4
|
+
* distributed with this work for additional information
|
5
|
+
* regarding copyright ownership. The ASF licenses this file
|
6
|
+
* to you under the Apache License, Version 2.0 (the
|
7
|
+
* "License"); you may not use this file except in compliance
|
8
|
+
* with the License. You may obtain a copy of the License at
|
9
|
+
*
|
10
|
+
* http://www.apache.org/licenses/LICENSE-2.0
|
11
|
+
*
|
12
|
+
* Unless required by applicable law or agreed to in writing,
|
13
|
+
* software distributed under the License is distributed on an
|
14
|
+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
15
|
+
* KIND, either express or implied. See the License for the
|
16
|
+
* specific language governing permissions and limitations
|
17
|
+
* under the License.
|
18
|
+
*/
|
19
|
+
|
20
|
+
|
21
|
+
#include <cstring>
|
22
|
+
#include <sstream>
|
23
|
+
#include <fstream>
|
24
|
+
|
25
|
+
#include <catch.hpp>
|
26
|
+
|
27
|
+
#include "cpc_sketch.hpp"
|
28
|
+
#include "test_allocator.hpp"
|
29
|
+
|
30
|
+
namespace datasketches {
|
31
|
+
|
32
|
+
using cpc_sketch_test_alloc = cpc_sketch_alloc<test_allocator<uint8_t>>;
|
33
|
+
using alloc = test_allocator<uint8_t>;
|
34
|
+
|
35
|
+
TEST_CASE("cpc sketch allocation: serialize deserialize empty", "[cpc_sketch]") {
|
36
|
+
test_allocator_total_bytes = 0;
|
37
|
+
test_allocator_net_allocations = 0;
|
38
|
+
{
|
39
|
+
cpc_sketch_test_alloc sketch(11, DEFAULT_SEED, 0);
|
40
|
+
std::stringstream s(std::ios::in | std::ios::out | std::ios::binary);
|
41
|
+
sketch.serialize(s);
|
42
|
+
auto deserialized = cpc_sketch_test_alloc::deserialize(s, DEFAULT_SEED, alloc(0));
|
43
|
+
REQUIRE(deserialized.is_empty() == sketch.is_empty());
|
44
|
+
REQUIRE(deserialized.get_estimate() == sketch.get_estimate());
|
45
|
+
REQUIRE(deserialized.validate());
|
46
|
+
}
|
47
|
+
REQUIRE(test_allocator_total_bytes == 0);
|
48
|
+
REQUIRE(test_allocator_net_allocations == 0);
|
49
|
+
}
|
50
|
+
|
51
|
+
TEST_CASE("cpc sketch allocation: serialize deserialize sparse", "[cpc_sketch]") {
|
52
|
+
test_allocator_total_bytes = 0;
|
53
|
+
test_allocator_net_allocations = 0;
|
54
|
+
{
|
55
|
+
cpc_sketch_test_alloc sketch(11, DEFAULT_SEED, 0);
|
56
|
+
const int n(100);
|
57
|
+
for (int i = 0; i < n; i++) sketch.update(i);
|
58
|
+
std::stringstream s(std::ios::in | std::ios::out | std::ios::binary);
|
59
|
+
sketch.serialize(s);
|
60
|
+
auto deserialized = cpc_sketch_test_alloc::deserialize(s, DEFAULT_SEED, alloc(0));
|
61
|
+
REQUIRE(deserialized.is_empty() == sketch.is_empty());
|
62
|
+
REQUIRE(deserialized.get_estimate() == sketch.get_estimate());
|
63
|
+
REQUIRE(deserialized.validate());
|
64
|
+
}
|
65
|
+
REQUIRE(test_allocator_total_bytes == 0);
|
66
|
+
REQUIRE(test_allocator_net_allocations == 0);
|
67
|
+
}
|
68
|
+
|
69
|
+
TEST_CASE("cpc sketch allocation: serialize deserialize hybrid", "[cpc_sketch]") {
|
70
|
+
test_allocator_total_bytes = 0;
|
71
|
+
test_allocator_net_allocations = 0;
|
72
|
+
{
|
73
|
+
cpc_sketch_test_alloc sketch(11, DEFAULT_SEED, 0);
|
74
|
+
const int n(200);
|
75
|
+
for (int i = 0; i < n; i++) sketch.update(i);
|
76
|
+
std::stringstream s(std::ios::in | std::ios::out | std::ios::binary);
|
77
|
+
sketch.serialize(s);
|
78
|
+
auto deserialized = cpc_sketch_test_alloc::deserialize(s, DEFAULT_SEED, alloc(0));
|
79
|
+
REQUIRE(deserialized.is_empty() == sketch.is_empty());
|
80
|
+
REQUIRE(deserialized.get_estimate() == sketch.get_estimate());
|
81
|
+
REQUIRE(deserialized.validate());
|
82
|
+
}
|
83
|
+
REQUIRE(test_allocator_total_bytes == 0);
|
84
|
+
REQUIRE(test_allocator_net_allocations == 0);
|
85
|
+
}
|
86
|
+
|
87
|
+
TEST_CASE("cpc sketch allocation: serialize deserialize pinned", "[cpc_sketch]") {
|
88
|
+
test_allocator_total_bytes = 0;
|
89
|
+
test_allocator_net_allocations = 0;
|
90
|
+
{
|
91
|
+
cpc_sketch_test_alloc sketch(11, DEFAULT_SEED, 0);
|
92
|
+
const int n(2000);
|
93
|
+
for (int i = 0; i < n; i++) sketch.update(i);
|
94
|
+
std::stringstream s(std::ios::in | std::ios::out | std::ios::binary);
|
95
|
+
sketch.serialize(s);
|
96
|
+
auto deserialized = cpc_sketch_test_alloc::deserialize(s, DEFAULT_SEED, alloc(0));
|
97
|
+
REQUIRE(deserialized.is_empty() == sketch.is_empty());
|
98
|
+
REQUIRE(deserialized.get_estimate() == sketch.get_estimate());
|
99
|
+
REQUIRE(deserialized.validate());
|
100
|
+
}
|
101
|
+
REQUIRE(test_allocator_total_bytes == 0);
|
102
|
+
REQUIRE(test_allocator_net_allocations == 0);
|
103
|
+
}
|
104
|
+
|
105
|
+
TEST_CASE("cpc sketch allocation: serialize deserialize sliding", "[cpc_sketch]") {
|
106
|
+
test_allocator_total_bytes = 0;
|
107
|
+
test_allocator_net_allocations = 0;
|
108
|
+
{
|
109
|
+
cpc_sketch_test_alloc sketch(11, DEFAULT_SEED, 0);
|
110
|
+
const int n(20000);
|
111
|
+
for (int i = 0; i < n; i++) sketch.update(i);
|
112
|
+
std::stringstream s(std::ios::in | std::ios::out | std::ios::binary);
|
113
|
+
sketch.serialize(s);
|
114
|
+
auto deserialized = cpc_sketch_test_alloc::deserialize(s, DEFAULT_SEED, alloc(0));
|
115
|
+
REQUIRE(deserialized.is_empty() == sketch.is_empty());
|
116
|
+
REQUIRE(deserialized.get_estimate() == sketch.get_estimate());
|
117
|
+
REQUIRE(deserialized.validate());
|
118
|
+
}
|
119
|
+
REQUIRE(test_allocator_total_bytes == 0);
|
120
|
+
REQUIRE(test_allocator_net_allocations == 0);
|
121
|
+
}
|
122
|
+
|
123
|
+
TEST_CASE("cpc sketch allocation: serializing deserialize sliding large", "[cpc_sketch]") {
|
124
|
+
test_allocator_total_bytes = 0;
|
125
|
+
test_allocator_net_allocations = 0;
|
126
|
+
{
|
127
|
+
cpc_sketch_test_alloc sketch(11, DEFAULT_SEED, 0);
|
128
|
+
const int n(3000000);
|
129
|
+
for (int i = 0; i < n; i++) sketch.update(i);
|
130
|
+
std::stringstream s(std::ios::in | std::ios::out | std::ios::binary);
|
131
|
+
sketch.serialize(s);
|
132
|
+
auto deserialized = cpc_sketch_test_alloc::deserialize(s, DEFAULT_SEED, alloc(0));
|
133
|
+
REQUIRE(deserialized.is_empty() == sketch.is_empty());
|
134
|
+
REQUIRE(deserialized.get_estimate() == sketch.get_estimate());
|
135
|
+
REQUIRE(deserialized.validate());
|
136
|
+
}
|
137
|
+
REQUIRE(test_allocator_total_bytes == 0);
|
138
|
+
REQUIRE(test_allocator_net_allocations == 0);
|
139
|
+
}
|
140
|
+
|
141
|
+
TEST_CASE("cpc sketch allocation: serialize deserialize empty, bytes", "[cpc_sketch]") {
|
142
|
+
test_allocator_total_bytes = 0;
|
143
|
+
test_allocator_net_allocations = 0;
|
144
|
+
{
|
145
|
+
cpc_sketch_test_alloc sketch(11, DEFAULT_SEED, 0);
|
146
|
+
auto bytes = sketch.serialize();
|
147
|
+
auto deserialized = cpc_sketch_test_alloc::deserialize(bytes.data(), bytes.size(), DEFAULT_SEED, 0);
|
148
|
+
REQUIRE(deserialized.is_empty() == sketch.is_empty());
|
149
|
+
REQUIRE(deserialized.get_estimate() == sketch.get_estimate());
|
150
|
+
REQUIRE(deserialized.validate());
|
151
|
+
REQUIRE_THROWS_AS(cpc_sketch_test_alloc::deserialize(bytes.data(), bytes.size() - 1, DEFAULT_SEED, 0), std::out_of_range);
|
152
|
+
}
|
153
|
+
REQUIRE(test_allocator_total_bytes == 0);
|
154
|
+
REQUIRE(test_allocator_net_allocations == 0);
|
155
|
+
}
|
156
|
+
|
157
|
+
TEST_CASE("cpc sketch allocation: serialize deserialize sparse, bytes", "[cpc_sketch]") {
|
158
|
+
test_allocator_total_bytes = 0;
|
159
|
+
test_allocator_net_allocations = 0;
|
160
|
+
{
|
161
|
+
cpc_sketch_test_alloc sketch(11, DEFAULT_SEED, 0);
|
162
|
+
const int n(100);
|
163
|
+
for (int i = 0; i < n; i++) sketch.update(i);
|
164
|
+
auto bytes = sketch.serialize();
|
165
|
+
auto deserialized = cpc_sketch_test_alloc::deserialize(bytes.data(), bytes.size(), DEFAULT_SEED, 0);
|
166
|
+
REQUIRE(deserialized.is_empty() == sketch.is_empty());
|
167
|
+
REQUIRE(deserialized.get_estimate() == sketch.get_estimate());
|
168
|
+
REQUIRE(deserialized.validate());
|
169
|
+
REQUIRE_THROWS_AS(cpc_sketch_test_alloc::deserialize(bytes.data(), 7, DEFAULT_SEED, 0), std::out_of_range);
|
170
|
+
REQUIRE_THROWS_AS(cpc_sketch_test_alloc::deserialize(bytes.data(), 15, DEFAULT_SEED, 0), std::out_of_range);
|
171
|
+
REQUIRE_THROWS_AS(cpc_sketch_test_alloc::deserialize(bytes.data(), bytes.size() - 1, DEFAULT_SEED, 0), std::out_of_range);
|
172
|
+
}
|
173
|
+
REQUIRE(test_allocator_total_bytes == 0);
|
174
|
+
REQUIRE(test_allocator_net_allocations == 0);
|
175
|
+
}
|
176
|
+
|
177
|
+
TEST_CASE("cpc sketch allocation: serialize deserialize hybrid, bytes", "[cpc_sketch]") {
|
178
|
+
test_allocator_total_bytes = 0;
|
179
|
+
test_allocator_net_allocations = 0;
|
180
|
+
{
|
181
|
+
cpc_sketch_test_alloc sketch(11, DEFAULT_SEED, 0);
|
182
|
+
const int n(200);
|
183
|
+
for (int i = 0; i < n; i++) sketch.update(i);
|
184
|
+
auto bytes = sketch.serialize();
|
185
|
+
auto deserialized = cpc_sketch_test_alloc::deserialize(bytes.data(), bytes.size(), DEFAULT_SEED, 0);
|
186
|
+
REQUIRE(deserialized.is_empty() == sketch.is_empty());
|
187
|
+
REQUIRE(deserialized.get_estimate() == sketch.get_estimate());
|
188
|
+
REQUIRE(deserialized.validate());
|
189
|
+
REQUIRE_THROWS_AS(cpc_sketch_test_alloc::deserialize(bytes.data(), 7, DEFAULT_SEED, 0), std::out_of_range);
|
190
|
+
REQUIRE_THROWS_AS(cpc_sketch_test_alloc::deserialize(bytes.data(), 15, DEFAULT_SEED, 0), std::out_of_range);
|
191
|
+
REQUIRE_THROWS_AS(cpc_sketch_test_alloc::deserialize(bytes.data(), bytes.size() - 1, DEFAULT_SEED, 0), std::out_of_range);
|
192
|
+
}
|
193
|
+
REQUIRE(test_allocator_total_bytes == 0);
|
194
|
+
REQUIRE(test_allocator_net_allocations == 0);
|
195
|
+
}
|
196
|
+
|
197
|
+
TEST_CASE("cpc sketch allocation: serialize deserialize pinned, bytes", "[cpc_sketch]") {
|
198
|
+
test_allocator_total_bytes = 0;
|
199
|
+
test_allocator_net_allocations = 0;
|
200
|
+
{
|
201
|
+
cpc_sketch_test_alloc sketch(11, DEFAULT_SEED, 0);
|
202
|
+
const int n(2000);
|
203
|
+
for (int i = 0; i < n; i++) sketch.update(i);
|
204
|
+
auto bytes = sketch.serialize();
|
205
|
+
auto deserialized = cpc_sketch_test_alloc::deserialize(bytes.data(), bytes.size(), DEFAULT_SEED, 0);
|
206
|
+
REQUIRE(deserialized.is_empty() == sketch.is_empty());
|
207
|
+
REQUIRE(deserialized.get_estimate() == sketch.get_estimate());
|
208
|
+
REQUIRE(deserialized.validate());
|
209
|
+
REQUIRE_THROWS_AS(cpc_sketch_test_alloc::deserialize(bytes.data(), 7, DEFAULT_SEED, 0), std::out_of_range);
|
210
|
+
REQUIRE_THROWS_AS(cpc_sketch_test_alloc::deserialize(bytes.data(), 15, DEFAULT_SEED, 0), std::out_of_range);
|
211
|
+
REQUIRE_THROWS_AS(cpc_sketch_test_alloc::deserialize(bytes.data(), bytes.size() - 1, DEFAULT_SEED, 0), std::out_of_range);
|
212
|
+
}
|
213
|
+
REQUIRE(test_allocator_total_bytes == 0);
|
214
|
+
REQUIRE(test_allocator_net_allocations == 0);
|
215
|
+
}
|
216
|
+
|
217
|
+
TEST_CASE("cpc sketch allocation: serialize deserialize sliding, bytes", "[cpc_sketch]") {
|
218
|
+
test_allocator_total_bytes = 0;
|
219
|
+
test_allocator_net_allocations = 0;
|
220
|
+
{
|
221
|
+
cpc_sketch_test_alloc sketch(11, DEFAULT_SEED, 0);
|
222
|
+
const int n(20000);
|
223
|
+
for (int i = 0; i < n; i++) sketch.update(i);
|
224
|
+
auto bytes = sketch.serialize();
|
225
|
+
auto deserialized = cpc_sketch_test_alloc::deserialize(bytes.data(), bytes.size(), DEFAULT_SEED, 0);
|
226
|
+
REQUIRE(deserialized.is_empty() == sketch.is_empty());
|
227
|
+
REQUIRE(deserialized.get_estimate() == sketch.get_estimate());
|
228
|
+
REQUIRE(deserialized.validate());
|
229
|
+
REQUIRE_THROWS_AS(cpc_sketch_test_alloc::deserialize(bytes.data(), 7, DEFAULT_SEED, 0), std::out_of_range);
|
230
|
+
REQUIRE_THROWS_AS(cpc_sketch_test_alloc::deserialize(bytes.data(), 15, DEFAULT_SEED, 0), std::out_of_range);
|
231
|
+
REQUIRE_THROWS_AS(cpc_sketch_test_alloc::deserialize(bytes.data(), bytes.size() - 1, DEFAULT_SEED, 0), std::out_of_range);
|
232
|
+
}
|
233
|
+
REQUIRE(test_allocator_total_bytes == 0);
|
234
|
+
REQUIRE(test_allocator_net_allocations == 0);
|
235
|
+
}
|
236
|
+
|
237
|
+
} /* namespace datasketches */
|