datasketches 0.2.1 → 0.2.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +4 -0
- data/lib/datasketches/version.rb +1 -1
- data/vendor/datasketches-cpp/CMakeLists.txt +7 -0
- data/vendor/datasketches-cpp/common/include/MurmurHash3.h +11 -7
- data/vendor/datasketches-cpp/common/include/binomial_bounds.hpp +8 -8
- data/vendor/datasketches-cpp/common/include/bounds_binomial_proportions.hpp +12 -15
- data/vendor/datasketches-cpp/common/include/common_defs.hpp +24 -0
- data/vendor/datasketches-cpp/common/include/conditional_forward.hpp +20 -8
- data/vendor/datasketches-cpp/common/include/count_zeros.hpp +2 -2
- data/vendor/datasketches-cpp/common/include/serde.hpp +7 -7
- data/vendor/datasketches-cpp/cpc/include/cpc_compressor.hpp +19 -19
- data/vendor/datasketches-cpp/cpc/include/cpc_compressor_impl.hpp +91 -89
- data/vendor/datasketches-cpp/cpc/include/cpc_sketch.hpp +14 -1
- data/vendor/datasketches-cpp/cpc/include/cpc_sketch_impl.hpp +121 -87
- data/vendor/datasketches-cpp/cpc/include/cpc_union_impl.hpp +14 -14
- data/vendor/datasketches-cpp/cpc/include/cpc_util.hpp +10 -10
- data/vendor/datasketches-cpp/cpc/include/icon_estimator.hpp +4 -4
- data/vendor/datasketches-cpp/cpc/include/u32_table.hpp +8 -8
- data/vendor/datasketches-cpp/cpc/include/u32_table_impl.hpp +14 -14
- data/vendor/datasketches-cpp/cpc/test/compression_test.cpp +10 -10
- data/vendor/datasketches-cpp/cpc/test/cpc_sketch_test.cpp +25 -0
- data/vendor/datasketches-cpp/cpc/test/cpc_union_test.cpp +1 -1
- data/vendor/datasketches-cpp/fi/include/frequent_items_sketch_impl.hpp +65 -80
- data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map_impl.hpp +10 -10
- data/vendor/datasketches-cpp/fi/test/reverse_purge_hash_map_test.cpp +2 -2
- data/vendor/datasketches-cpp/hll/include/AuxHashMap-internal.hpp +60 -63
- data/vendor/datasketches-cpp/hll/include/AuxHashMap.hpp +19 -19
- data/vendor/datasketches-cpp/hll/include/CompositeInterpolationXTable-internal.hpp +15 -15
- data/vendor/datasketches-cpp/hll/include/CompositeInterpolationXTable.hpp +3 -3
- data/vendor/datasketches-cpp/hll/include/CouponHashSet-internal.hpp +74 -76
- data/vendor/datasketches-cpp/hll/include/CouponHashSet.hpp +6 -6
- data/vendor/datasketches-cpp/hll/include/CouponList-internal.hpp +110 -113
- data/vendor/datasketches-cpp/hll/include/CouponList.hpp +13 -13
- data/vendor/datasketches-cpp/hll/include/CubicInterpolation-internal.hpp +2 -4
- data/vendor/datasketches-cpp/hll/include/HarmonicNumbers-internal.hpp +1 -1
- data/vendor/datasketches-cpp/hll/include/Hll4Array-internal.hpp +80 -76
- data/vendor/datasketches-cpp/hll/include/Hll4Array.hpp +9 -9
- data/vendor/datasketches-cpp/hll/include/Hll6Array-internal.hpp +26 -26
- data/vendor/datasketches-cpp/hll/include/Hll6Array.hpp +6 -6
- data/vendor/datasketches-cpp/hll/include/Hll8Array-internal.hpp +33 -33
- data/vendor/datasketches-cpp/hll/include/Hll8Array.hpp +6 -6
- data/vendor/datasketches-cpp/hll/include/HllArray-internal.hpp +205 -209
- data/vendor/datasketches-cpp/hll/include/HllArray.hpp +36 -36
- data/vendor/datasketches-cpp/hll/include/HllSketch-internal.hpp +28 -28
- data/vendor/datasketches-cpp/hll/include/HllSketchImpl-internal.hpp +22 -22
- data/vendor/datasketches-cpp/hll/include/HllSketchImpl.hpp +13 -13
- data/vendor/datasketches-cpp/hll/include/HllSketchImplFactory.hpp +15 -15
- data/vendor/datasketches-cpp/hll/include/HllUnion-internal.hpp +61 -61
- data/vendor/datasketches-cpp/hll/include/HllUtil.hpp +120 -127
- data/vendor/datasketches-cpp/hll/include/coupon_iterator-internal.hpp +9 -9
- data/vendor/datasketches-cpp/hll/include/coupon_iterator.hpp +5 -5
- data/vendor/datasketches-cpp/hll/include/hll.hpp +21 -21
- data/vendor/datasketches-cpp/hll/test/AuxHashMapTest.cpp +1 -1
- data/vendor/datasketches-cpp/hll/test/CouponHashSetTest.cpp +34 -34
- data/vendor/datasketches-cpp/hll/test/CouponListTest.cpp +25 -25
- data/vendor/datasketches-cpp/hll/test/CrossCountingTest.cpp +2 -2
- data/vendor/datasketches-cpp/hll/test/HllArrayTest.cpp +35 -35
- data/vendor/datasketches-cpp/hll/test/HllSketchTest.cpp +15 -15
- data/vendor/datasketches-cpp/hll/test/HllUnionTest.cpp +10 -14
- data/vendor/datasketches-cpp/hll/test/IsomorphicTest.cpp +3 -3
- data/vendor/datasketches-cpp/hll/test/ToFromByteArrayTest.cpp +4 -4
- data/vendor/datasketches-cpp/kll/include/kll_helper.hpp +5 -4
- data/vendor/datasketches-cpp/kll/include/kll_helper_impl.hpp +6 -6
- data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator.hpp +14 -6
- data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator_impl.hpp +39 -24
- data/vendor/datasketches-cpp/kll/include/kll_sketch.hpp +34 -2
- data/vendor/datasketches-cpp/kll/include/kll_sketch_impl.hpp +72 -62
- data/vendor/datasketches-cpp/kll/include/kolmogorov_smirnov.hpp +67 -0
- data/vendor/datasketches-cpp/kll/include/kolmogorov_smirnov_impl.hpp +78 -0
- data/vendor/datasketches-cpp/kll/test/CMakeLists.txt +1 -0
- data/vendor/datasketches-cpp/kll/test/kll_sketch_test.cpp +68 -45
- data/vendor/datasketches-cpp/kll/test/kolmogorov_smirnov_test.cpp +111 -0
- data/vendor/datasketches-cpp/python/src/hll_wrapper.cpp +4 -4
- data/vendor/datasketches-cpp/python/src/theta_wrapper.cpp +6 -6
- data/vendor/datasketches-cpp/python/src/vo_wrapper.cpp +2 -2
- data/vendor/datasketches-cpp/python/tests/hll_test.py +1 -1
- data/vendor/datasketches-cpp/python/tests/vo_test.py +3 -3
- data/vendor/datasketches-cpp/req/include/req_common.hpp +2 -1
- data/vendor/datasketches-cpp/req/include/req_compactor.hpp +4 -4
- data/vendor/datasketches-cpp/req/include/req_compactor_impl.hpp +26 -39
- data/vendor/datasketches-cpp/req/include/req_sketch.hpp +1 -1
- data/vendor/datasketches-cpp/req/include/req_sketch_impl.hpp +9 -9
- data/vendor/datasketches-cpp/req/test/req_sketch_test.cpp +52 -52
- data/vendor/datasketches-cpp/sampling/include/var_opt_sketch_impl.hpp +47 -56
- data/vendor/datasketches-cpp/sampling/include/var_opt_union_impl.hpp +34 -42
- data/vendor/datasketches-cpp/sampling/test/var_opt_sketch_test.cpp +6 -6
- data/vendor/datasketches-cpp/sampling/test/var_opt_union_test.cpp +13 -13
- data/vendor/datasketches-cpp/setup.py +1 -1
- data/vendor/datasketches-cpp/theta/include/bounds_on_ratios_in_sampled_sets.hpp +1 -1
- data/vendor/datasketches-cpp/theta/include/compact_theta_sketch_parser.hpp +67 -0
- data/vendor/datasketches-cpp/theta/include/compact_theta_sketch_parser_impl.hpp +70 -0
- data/vendor/datasketches-cpp/theta/include/theta_intersection.hpp +9 -4
- data/vendor/datasketches-cpp/theta/include/theta_intersection_impl.hpp +1 -1
- data/vendor/datasketches-cpp/theta/include/theta_jaccard_similarity_base.hpp +18 -14
- data/vendor/datasketches-cpp/theta/include/theta_sketch.hpp +42 -1
- data/vendor/datasketches-cpp/theta/include/theta_sketch_impl.hpp +107 -58
- data/vendor/datasketches-cpp/theta/include/theta_union.hpp +4 -4
- data/vendor/datasketches-cpp/theta/include/theta_union_base_impl.hpp +1 -1
- data/vendor/datasketches-cpp/theta/include/theta_union_impl.hpp +1 -1
- data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base.hpp +2 -0
- data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base_impl.hpp +33 -28
- data/vendor/datasketches-cpp/theta/test/theta_a_not_b_test.cpp +23 -1
- data/vendor/datasketches-cpp/theta/test/theta_intersection_test.cpp +21 -1
- data/vendor/datasketches-cpp/theta/test/theta_jaccard_similarity_test.cpp +58 -2
- data/vendor/datasketches-cpp/theta/test/theta_sketch_test.cpp +37 -1
- data/vendor/datasketches-cpp/theta/test/theta_union_test.cpp +22 -2
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_sketch_impl.hpp +47 -60
- data/vendor/datasketches-cpp/tuple/include/tuple_sketch_impl.hpp +51 -64
- data/vendor/datasketches-cpp/tuple/test/array_of_doubles_sketch_test.cpp +1 -1
- data/vendor/datasketches-cpp/tuple/test/tuple_a_not_b_test.cpp +17 -17
- data/vendor/datasketches-cpp/tuple/test/tuple_intersection_test.cpp +12 -12
- data/vendor/datasketches-cpp/tuple/test/tuple_jaccard_similarity_test.cpp +5 -5
- data/vendor/datasketches-cpp/tuple/test/tuple_sketch_allocation_test.cpp +1 -1
- data/vendor/datasketches-cpp/tuple/test/tuple_sketch_test.cpp +20 -20
- data/vendor/datasketches-cpp/tuple/test/tuple_union_test.cpp +12 -12
- metadata +8 -3
@@ -25,30 +25,30 @@
|
|
25
25
|
namespace datasketches {
|
26
26
|
|
27
27
|
template<typename A>
|
28
|
-
coupon_iterator<A>::coupon_iterator(const
|
29
|
-
|
30
|
-
while (
|
31
|
-
if (
|
32
|
-
|
28
|
+
coupon_iterator<A>::coupon_iterator(const uint32_t* array, size_t array_size, size_t index, bool all):
|
29
|
+
array_(array), array_size_(array_size), index_(index), all_(all) {
|
30
|
+
while (index_ < array_size_) {
|
31
|
+
if (all_ || array_[index_] != hll_constants::EMPTY) break;
|
32
|
+
++index_;
|
33
33
|
}
|
34
34
|
}
|
35
35
|
|
36
36
|
template<typename A>
|
37
37
|
coupon_iterator<A>& coupon_iterator<A>::operator++() {
|
38
|
-
while (++
|
39
|
-
if (
|
38
|
+
while (++index_ < array_size_) {
|
39
|
+
if (all_ || array_[index_] != hll_constants::EMPTY) break;
|
40
40
|
}
|
41
41
|
return *this;
|
42
42
|
}
|
43
43
|
|
44
44
|
template<typename A>
|
45
45
|
bool coupon_iterator<A>::operator!=(const coupon_iterator& other) const {
|
46
|
-
return
|
46
|
+
return index_ != other.index_;
|
47
47
|
}
|
48
48
|
|
49
49
|
template<typename A>
|
50
50
|
uint32_t coupon_iterator<A>::operator*() const {
|
51
|
-
return
|
51
|
+
return array_[index_];
|
52
52
|
}
|
53
53
|
|
54
54
|
}
|
@@ -25,15 +25,15 @@ namespace datasketches {
|
|
25
25
|
template<typename A>
|
26
26
|
class coupon_iterator: public std::iterator<std::input_iterator_tag, uint32_t> {
|
27
27
|
public:
|
28
|
-
coupon_iterator(const
|
28
|
+
coupon_iterator(const uint32_t* array, size_t array_slze, size_t index, bool all);
|
29
29
|
coupon_iterator& operator++();
|
30
30
|
bool operator!=(const coupon_iterator& other) const;
|
31
31
|
uint32_t operator*() const;
|
32
32
|
private:
|
33
|
-
const
|
34
|
-
size_t
|
35
|
-
size_t
|
36
|
-
bool
|
33
|
+
const uint32_t* array_;
|
34
|
+
size_t array_size_;
|
35
|
+
size_t index_;
|
36
|
+
bool all_;
|
37
37
|
};
|
38
38
|
|
39
39
|
}
|
@@ -119,7 +119,7 @@ class hll_sketch_alloc final {
|
|
119
119
|
* keeping memory use constant (if HLL_6 or HLL_8) at the cost of
|
120
120
|
* starting out using much more memory
|
121
121
|
*/
|
122
|
-
explicit hll_sketch_alloc(
|
122
|
+
explicit hll_sketch_alloc(uint8_t lg_config_k, target_hll_type tgt_type = HLL_4, bool start_full_size = false, const A& allocator = A());
|
123
123
|
|
124
124
|
/**
|
125
125
|
* Copy constructor
|
@@ -306,7 +306,7 @@ class hll_sketch_alloc final {
|
|
306
306
|
* @param num_std_dev Number of standard deviations, an integer from the set {1, 2, 3}.
|
307
307
|
* @return The approximate lower bound.
|
308
308
|
*/
|
309
|
-
double get_lower_bound(
|
309
|
+
double get_lower_bound(uint8_t num_std_dev) const;
|
310
310
|
|
311
311
|
/**
|
312
312
|
* Returns the approximate upper error bound given the specified
|
@@ -314,13 +314,13 @@ class hll_sketch_alloc final {
|
|
314
314
|
* @param num_std_dev Number of standard deviations, an integer from the set {1, 2, 3}.
|
315
315
|
* @return The approximate upper bound.
|
316
316
|
*/
|
317
|
-
double get_upper_bound(
|
317
|
+
double get_upper_bound(uint8_t num_std_dev) const;
|
318
318
|
|
319
319
|
/**
|
320
320
|
* Returns sketch's configured lg_k value.
|
321
321
|
* @return Configured lg_k value.
|
322
322
|
*/
|
323
|
-
|
323
|
+
uint8_t get_lg_config_k() const;
|
324
324
|
|
325
325
|
/**
|
326
326
|
* Returns the sketch's target HLL mode (from #target_hll_type).
|
@@ -344,13 +344,13 @@ class hll_sketch_alloc final {
|
|
344
344
|
* Returns the size of the sketch serialized in compact form.
|
345
345
|
* @return Size of the sketch serialized in compact form, in bytes.
|
346
346
|
*/
|
347
|
-
|
347
|
+
uint32_t get_compact_serialization_bytes() const;
|
348
348
|
|
349
349
|
/**
|
350
350
|
* Returns the size of the sketch serialized without compaction.
|
351
351
|
* @return Size of the sketch serialized without compaction, in bytes.
|
352
352
|
*/
|
353
|
-
|
353
|
+
uint32_t get_updatable_serialization_bytes() const;
|
354
354
|
|
355
355
|
/**
|
356
356
|
* Returns the maximum size in bytes that this sketch can grow to
|
@@ -363,7 +363,7 @@ class hll_sketch_alloc final {
|
|
363
363
|
* @param tgt_type the desired Hll type
|
364
364
|
* @return the maximum size in bytes that this sketch can grow to.
|
365
365
|
*/
|
366
|
-
static
|
366
|
+
static uint32_t get_max_updatable_serialization_bytes(uint8_t lg_k, target_hll_type tgt_type);
|
367
367
|
|
368
368
|
/**
|
369
369
|
* Gets the current (approximate) Relative Error (RE) asymptotic values given several
|
@@ -376,18 +376,18 @@ class hll_sketch_alloc final {
|
|
376
376
|
* @return the current (approximate) RelativeError
|
377
377
|
*/
|
378
378
|
static double get_rel_err(bool upper_bound, bool unioned,
|
379
|
-
|
379
|
+
uint8_t lg_config_k, uint8_t num_std_dev);
|
380
380
|
|
381
381
|
private:
|
382
382
|
explicit hll_sketch_alloc(HllSketchImpl<A>* that);
|
383
383
|
|
384
|
-
void coupon_update(
|
384
|
+
void coupon_update(uint32_t coupon);
|
385
385
|
|
386
386
|
std::string type_as_string() const;
|
387
387
|
std::string mode_as_string() const;
|
388
388
|
|
389
389
|
hll_mode get_current_mode() const;
|
390
|
-
|
390
|
+
uint8_t get_serialization_version() const;
|
391
391
|
bool is_out_of_order_flag() const;
|
392
392
|
bool is_estimation_mode() const;
|
393
393
|
|
@@ -431,7 +431,7 @@ class hll_union_alloc {
|
|
431
431
|
* @param lg_max_k The maximum size, in log2, of k. The value must
|
432
432
|
* be between 7 and 21, inclusive.
|
433
433
|
*/
|
434
|
-
explicit hll_union_alloc(
|
434
|
+
explicit hll_union_alloc(uint8_t lg_max_k, const A& allocator = A());
|
435
435
|
|
436
436
|
/**
|
437
437
|
* Returns the current cardinality estimate
|
@@ -458,7 +458,7 @@ class hll_union_alloc {
|
|
458
458
|
* @param num_std_dev Number of standard deviations, an integer from the set {1, 2, 3}.
|
459
459
|
* @return The approximate lower bound.
|
460
460
|
*/
|
461
|
-
double get_lower_bound(
|
461
|
+
double get_lower_bound(uint8_t num_std_dev) const;
|
462
462
|
|
463
463
|
/**
|
464
464
|
* Returns the approximate upper error bound given the specified
|
@@ -466,13 +466,13 @@ class hll_union_alloc {
|
|
466
466
|
* @param num_std_dev Number of standard deviations, an integer from the set {1, 2, 3}.
|
467
467
|
* @return The approximate upper bound.
|
468
468
|
*/
|
469
|
-
double get_upper_bound(
|
469
|
+
double get_upper_bound(uint8_t num_std_dev) const;
|
470
470
|
|
471
471
|
/**
|
472
472
|
* Returns union's configured lg_k value.
|
473
473
|
* @return Configured lg_k value.
|
474
474
|
*/
|
475
|
-
|
475
|
+
uint8_t get_lg_config_k() const;
|
476
476
|
|
477
477
|
/**
|
478
478
|
* Returns the union's target HLL mode (from #target_hll_type).
|
@@ -598,7 +598,7 @@ class hll_union_alloc {
|
|
598
598
|
* @return the current (approximate) RelativeError
|
599
599
|
*/
|
600
600
|
static double get_rel_err(bool upper_bound, bool unioned,
|
601
|
-
|
601
|
+
uint8_t lg_config_k, uint8_t num_std_dev);
|
602
602
|
|
603
603
|
private:
|
604
604
|
|
@@ -611,21 +611,21 @@ class hll_union_alloc {
|
|
611
611
|
* @param incoming_impl the given incoming sketch, which may not be modified.
|
612
612
|
* @param lg_max_k the maximum value of log2 K for this union.
|
613
613
|
*/
|
614
|
-
inline void union_impl(const hll_sketch_alloc<A>& sketch,
|
614
|
+
inline void union_impl(const hll_sketch_alloc<A>& sketch, uint8_t lg_max_k);
|
615
615
|
|
616
|
-
static HllSketchImpl<A>* copy_or_downsample(const HllSketchImpl<A>* src_impl,
|
616
|
+
static HllSketchImpl<A>* copy_or_downsample(const HllSketchImpl<A>* src_impl, uint8_t tgt_lg_k);
|
617
617
|
|
618
|
-
void coupon_update(
|
618
|
+
void coupon_update(uint32_t coupon);
|
619
619
|
|
620
620
|
hll_mode get_current_mode() const;
|
621
621
|
bool is_out_of_order_flag() const;
|
622
622
|
bool is_estimation_mode() const;
|
623
623
|
|
624
624
|
// calls couponUpdate on sketch, freeing the old sketch upon changes in hll_mode
|
625
|
-
static HllSketchImpl<A>* leak_free_coupon_update(HllSketchImpl<A>* impl,
|
625
|
+
static HllSketchImpl<A>* leak_free_coupon_update(HllSketchImpl<A>* impl, uint32_t coupon);
|
626
626
|
|
627
|
-
|
628
|
-
hll_sketch_alloc<A>
|
627
|
+
uint8_t lg_max_k_;
|
628
|
+
hll_sketch_alloc<A> gadget_;
|
629
629
|
};
|
630
630
|
|
631
631
|
/// convenience alias for hll_sketch with default allocator
|
@@ -45,7 +45,7 @@ TEST_CASE("aux hash map: check grow space", "[aux_hash_map]") {
|
|
45
45
|
AuxHashMap<std::allocator<uint8_t>>::make_deleter()
|
46
46
|
);
|
47
47
|
REQUIRE(map->getLgAuxArrInts() == 3);
|
48
|
-
for (
|
48
|
+
for (uint8_t i = 1; i <= 7; ++i) {
|
49
49
|
map->mustAdd(i, i);
|
50
50
|
}
|
51
51
|
REQUIRE(map->getLgAuxArrInts() == 4);
|
@@ -30,7 +30,7 @@
|
|
30
30
|
namespace datasketches {
|
31
31
|
|
32
32
|
TEST_CASE("coupon hash set: check corrupt bytearray", "[coupon_hash_set]") {
|
33
|
-
|
33
|
+
uint8_t lgK = 8;
|
34
34
|
hll_sketch sk1(lgK);
|
35
35
|
for (int i = 0; i < 24; ++i) {
|
36
36
|
sk1.update(i);
|
@@ -39,42 +39,42 @@ TEST_CASE("coupon hash set: check corrupt bytearray", "[coupon_hash_set]") {
|
|
39
39
|
uint8_t* bytes = sketchBytes.data();
|
40
40
|
const size_t size = sketchBytes.size();
|
41
41
|
|
42
|
-
bytes[
|
42
|
+
bytes[hll_constants::PREAMBLE_INTS_BYTE] = 0;
|
43
43
|
// fail in HllSketchImpl
|
44
44
|
REQUIRE_THROWS_AS(hll_sketch::deserialize(bytes, size), std::invalid_argument);
|
45
45
|
// fail in CouponHashSet
|
46
46
|
REQUIRE_THROWS_AS(CouponHashSet<std::allocator<uint8_t>>::newSet(bytes, size, std::allocator<uint8_t>()), std::invalid_argument);
|
47
|
-
bytes[
|
47
|
+
bytes[hll_constants::PREAMBLE_INTS_BYTE] = hll_constants::HASH_SET_PREINTS;
|
48
48
|
|
49
|
-
bytes[
|
49
|
+
bytes[hll_constants::SER_VER_BYTE] = 0;
|
50
50
|
REQUIRE_THROWS_AS(hll_sketch::deserialize(bytes, size), std::invalid_argument);
|
51
|
-
bytes[
|
51
|
+
bytes[hll_constants::SER_VER_BYTE] = hll_constants::SER_VER;
|
52
52
|
|
53
|
-
bytes[
|
53
|
+
bytes[hll_constants::FAMILY_BYTE] = 0;
|
54
54
|
REQUIRE_THROWS_AS(hll_sketch::deserialize(bytes, size), std::invalid_argument);
|
55
|
-
bytes[
|
55
|
+
bytes[hll_constants::FAMILY_BYTE] = hll_constants::FAMILY_ID;
|
56
56
|
|
57
|
-
bytes[
|
57
|
+
bytes[hll_constants::LG_K_BYTE] = 6;
|
58
58
|
REQUIRE_THROWS_AS(hll_sketch::deserialize(bytes, size), std::invalid_argument);
|
59
|
-
bytes[
|
59
|
+
bytes[hll_constants::LG_K_BYTE] = lgK;
|
60
60
|
|
61
|
-
uint8_t tmp = bytes[
|
62
|
-
bytes[
|
61
|
+
uint8_t tmp = bytes[hll_constants::MODE_BYTE];
|
62
|
+
bytes[hll_constants::MODE_BYTE] = 0x10; // HLL_6, LIST
|
63
63
|
REQUIRE_THROWS_AS(hll_sketch::deserialize(bytes, size), std::invalid_argument);
|
64
|
-
bytes[
|
64
|
+
bytes[hll_constants::MODE_BYTE] = tmp;
|
65
65
|
|
66
|
-
tmp = bytes[
|
67
|
-
bytes[
|
66
|
+
tmp = bytes[hll_constants::LG_ARR_BYTE];
|
67
|
+
bytes[hll_constants::LG_ARR_BYTE] = 0;
|
68
68
|
hll_sketch::deserialize(bytes, size);
|
69
69
|
// should work fine despite the corruption
|
70
|
-
bytes[
|
70
|
+
bytes[hll_constants::LG_ARR_BYTE] = tmp;
|
71
71
|
|
72
72
|
REQUIRE_THROWS_AS(hll_sketch::deserialize(bytes, size - 1), std::out_of_range);
|
73
73
|
REQUIRE_THROWS_AS(hll_sketch::deserialize(bytes, 3), std::out_of_range);
|
74
74
|
}
|
75
75
|
|
76
76
|
TEST_CASE("coupon hash set: check corrupt stream", "[coupon_hash_set]") {
|
77
|
-
|
77
|
+
uint8_t lgK = 9;
|
78
78
|
hll_sketch sk1(lgK);
|
79
79
|
for (int i = 0; i < 24; ++i) {
|
80
80
|
sk1.update(i);
|
@@ -82,48 +82,48 @@ TEST_CASE("coupon hash set: check corrupt stream", "[coupon_hash_set]") {
|
|
82
82
|
std::stringstream ss;
|
83
83
|
sk1.serialize_compact(ss);
|
84
84
|
|
85
|
-
ss.seekp(
|
85
|
+
ss.seekp(hll_constants::PREAMBLE_INTS_BYTE);
|
86
86
|
ss.put(0);
|
87
87
|
ss.seekg(0);
|
88
88
|
// fail in HllSketchImpl
|
89
89
|
REQUIRE_THROWS_AS(hll_sketch::deserialize(ss), std::invalid_argument);
|
90
90
|
// fail in CouponHashSet
|
91
91
|
REQUIRE_THROWS_AS(CouponHashSet<std::allocator<uint8_t>>::newSet(ss, std::allocator<uint8_t>()), std::invalid_argument);
|
92
|
-
ss.seekp(
|
93
|
-
ss.put(
|
92
|
+
ss.seekp(hll_constants::PREAMBLE_INTS_BYTE);
|
93
|
+
ss.put(hll_constants::HASH_SET_PREINTS);
|
94
94
|
|
95
|
-
ss.seekp(
|
95
|
+
ss.seekp(hll_constants::SER_VER_BYTE);
|
96
96
|
ss.put(0);
|
97
97
|
ss.seekg(0);
|
98
98
|
REQUIRE_THROWS_AS(hll_sketch::deserialize(ss), std::invalid_argument);
|
99
|
-
ss.seekp(
|
100
|
-
ss.put(
|
99
|
+
ss.seekp(hll_constants::SER_VER_BYTE);
|
100
|
+
ss.put(hll_constants::SER_VER);
|
101
101
|
|
102
|
-
ss.seekp(
|
102
|
+
ss.seekp(hll_constants::FAMILY_BYTE);
|
103
103
|
ss.put(0);
|
104
104
|
ss.seekg(0);
|
105
105
|
REQUIRE_THROWS_AS(hll_sketch::deserialize(ss), std::invalid_argument);
|
106
|
-
ss.seekp(
|
107
|
-
ss.put(
|
106
|
+
ss.seekp(hll_constants::FAMILY_BYTE);
|
107
|
+
ss.put(hll_constants::FAMILY_ID);
|
108
108
|
|
109
|
-
ss.seekg(
|
110
|
-
|
111
|
-
ss.seekp(
|
109
|
+
ss.seekg(hll_constants::MODE_BYTE);
|
110
|
+
auto tmp = ss.get();
|
111
|
+
ss.seekp(hll_constants::MODE_BYTE);
|
112
112
|
ss.put(0x22); // HLL_8, HLL
|
113
113
|
ss.seekg(0);
|
114
114
|
REQUIRE_THROWS_AS(hll_sketch::deserialize(ss), std::invalid_argument);
|
115
|
-
ss.seekp(
|
116
|
-
ss.put(tmp);
|
115
|
+
ss.seekp(hll_constants::MODE_BYTE);
|
116
|
+
ss.put((char)tmp);
|
117
117
|
|
118
|
-
ss.seekg(
|
118
|
+
ss.seekg(hll_constants::LG_ARR_BYTE);
|
119
119
|
tmp = ss.get();
|
120
|
-
ss.seekp(
|
120
|
+
ss.seekp(hll_constants::LG_ARR_BYTE);
|
121
121
|
ss.put(0);
|
122
122
|
ss.seekg(0);
|
123
123
|
hll_sketch::deserialize(ss);
|
124
124
|
// should work fine despite the corruption
|
125
|
-
ss.seekp(
|
126
|
-
ss.put(tmp);
|
125
|
+
ss.seekp(hll_constants::LG_ARR_BYTE);
|
126
|
+
ss.put((char)tmp);
|
127
127
|
}
|
128
128
|
|
129
129
|
|
@@ -35,9 +35,9 @@ void println_string(std::string str) {
|
|
35
35
|
}
|
36
36
|
|
37
37
|
TEST_CASE("coupon list: check iterator", "[coupon_list]") {
|
38
|
-
|
38
|
+
uint8_t lgConfigK = 8;
|
39
39
|
CouponList<std::allocator<uint8_t>> cl(lgConfigK, HLL_4, LIST, std::allocator<uint8_t>());
|
40
|
-
for (
|
40
|
+
for (uint8_t i = 1; i <= 7; ++i) { cl.couponUpdate(HllUtil<>::pair(i, i)); } // not hashes but distinct values
|
41
41
|
const int mask = (1 << lgConfigK) - 1;
|
42
42
|
int idx = 0;
|
43
43
|
auto itr = cl.begin(false);
|
@@ -56,7 +56,7 @@ TEST_CASE("coupon list: check iterator", "[coupon_list]") {
|
|
56
56
|
}
|
57
57
|
|
58
58
|
TEST_CASE("coupon list: check duplicates and misc", "[coupon_list]") {
|
59
|
-
|
59
|
+
uint8_t lgConfigK = 8;
|
60
60
|
hll_sketch sk(lgConfigK);
|
61
61
|
|
62
62
|
for (int i = 1; i <= 7; ++i) {
|
@@ -79,7 +79,7 @@ TEST_CASE("coupon list: check duplicates and misc", "[coupon_list]") {
|
|
79
79
|
REQUIRE(relErr < 0.0);
|
80
80
|
}
|
81
81
|
|
82
|
-
static void serializeDeserialize(
|
82
|
+
static void serializeDeserialize(uint8_t lgK) {
|
83
83
|
hll_sketch sk1(lgK);
|
84
84
|
|
85
85
|
int u = (lgK < 8) ? 7 : (((1 << (lgK - 3))/ 4) * 3);
|
@@ -110,7 +110,7 @@ TEST_CASE("coupon list: check serialize deserialize", "[coupon_list]") {
|
|
110
110
|
}
|
111
111
|
|
112
112
|
TEST_CASE("coupon list: check corrupt bytearray data", "[coupon_list]") {
|
113
|
-
|
113
|
+
uint8_t lgK = 6;
|
114
114
|
hll_sketch sk1(lgK);
|
115
115
|
sk1.update(1);
|
116
116
|
sk1.update(2);
|
@@ -118,24 +118,24 @@ TEST_CASE("coupon list: check corrupt bytearray data", "[coupon_list]") {
|
|
118
118
|
uint8_t* bytes = sketchBytes.data();
|
119
119
|
const size_t size = sketchBytes.size();
|
120
120
|
|
121
|
-
bytes[
|
121
|
+
bytes[hll_constants::PREAMBLE_INTS_BYTE] = 0;
|
122
122
|
REQUIRE_THROWS_AS(hll_sketch::deserialize(bytes, size), std::invalid_argument);
|
123
123
|
REQUIRE_THROWS_AS(CouponList<std::allocator<uint8_t>>::newList(bytes, size, std::allocator<uint8_t>()), std::invalid_argument);
|
124
124
|
|
125
|
-
bytes[
|
125
|
+
bytes[hll_constants::PREAMBLE_INTS_BYTE] = hll_constants::LIST_PREINTS;
|
126
126
|
|
127
|
-
bytes[
|
127
|
+
bytes[hll_constants::SER_VER_BYTE] = 0;
|
128
128
|
REQUIRE_THROWS_AS(hll_sketch::deserialize(bytes, size), std::invalid_argument);
|
129
|
-
bytes[
|
129
|
+
bytes[hll_constants::SER_VER_BYTE] = hll_constants::SER_VER;
|
130
130
|
|
131
|
-
bytes[
|
131
|
+
bytes[hll_constants::FAMILY_BYTE] = 0;
|
132
132
|
REQUIRE_THROWS_AS(hll_sketch::deserialize(bytes, size), std::invalid_argument);
|
133
|
-
bytes[
|
133
|
+
bytes[hll_constants::FAMILY_BYTE] = hll_constants::FAMILY_ID;
|
134
134
|
|
135
|
-
uint8_t tmp = bytes[
|
136
|
-
bytes[
|
135
|
+
uint8_t tmp = bytes[hll_constants::MODE_BYTE];
|
136
|
+
bytes[hll_constants::MODE_BYTE] = 0x01; // HLL_4, SET
|
137
137
|
REQUIRE_THROWS_AS(hll_sketch::deserialize(bytes, size), std::invalid_argument);
|
138
|
-
bytes[
|
138
|
+
bytes[hll_constants::MODE_BYTE] = tmp;
|
139
139
|
|
140
140
|
REQUIRE_THROWS_AS(hll_sketch::deserialize(bytes, size - 1), std::out_of_range);
|
141
141
|
|
@@ -143,36 +143,36 @@ TEST_CASE("coupon list: check corrupt bytearray data", "[coupon_list]") {
|
|
143
143
|
}
|
144
144
|
|
145
145
|
TEST_CASE("coupon list: check corrupt stream data", "[coupon_list]") {
|
146
|
-
|
146
|
+
uint8_t lgK = 6;
|
147
147
|
hll_sketch sk1(lgK);
|
148
148
|
sk1.update(1);
|
149
149
|
sk1.update(2);
|
150
150
|
std::stringstream ss;
|
151
151
|
sk1.serialize_compact(ss);
|
152
152
|
|
153
|
-
ss.seekp(
|
153
|
+
ss.seekp(hll_constants::PREAMBLE_INTS_BYTE);
|
154
154
|
ss.put(0);
|
155
155
|
ss.seekg(0);
|
156
156
|
REQUIRE_THROWS_AS(hll_sketch::deserialize(ss), std::invalid_argument);
|
157
157
|
REQUIRE_THROWS_AS(CouponList<std::allocator<uint8_t>>::newList(ss, std::allocator<uint8_t>()), std::invalid_argument);
|
158
|
-
ss.seekp(
|
159
|
-
ss.put(
|
158
|
+
ss.seekp(hll_constants::PREAMBLE_INTS_BYTE);
|
159
|
+
ss.put(hll_constants::LIST_PREINTS);
|
160
160
|
|
161
|
-
ss.seekp(
|
161
|
+
ss.seekp(hll_constants::SER_VER_BYTE);
|
162
162
|
ss.put(0);
|
163
163
|
ss.seekg(0);
|
164
164
|
REQUIRE_THROWS_AS(hll_sketch::deserialize(ss), std::invalid_argument);
|
165
|
-
ss.seekp(
|
166
|
-
ss.put(
|
165
|
+
ss.seekp(hll_constants::SER_VER_BYTE);
|
166
|
+
ss.put(hll_constants::SER_VER);
|
167
167
|
|
168
|
-
ss.seekp(
|
168
|
+
ss.seekp(hll_constants::FAMILY_BYTE);
|
169
169
|
ss.put(0);
|
170
170
|
ss.seekg(0);
|
171
171
|
REQUIRE_THROWS_AS(hll_sketch::deserialize(ss), std::invalid_argument);
|
172
|
-
ss.seekp(
|
173
|
-
ss.put(
|
172
|
+
ss.seekp(hll_constants::FAMILY_BYTE);
|
173
|
+
ss.put(hll_constants::FAMILY_ID);
|
174
174
|
|
175
|
-
ss.seekp(
|
175
|
+
ss.seekp(hll_constants::MODE_BYTE);
|
176
176
|
ss.put(0x22); // HLL_8, HLL
|
177
177
|
ss.seekg(0);
|
178
178
|
REQUIRE_THROWS_AS(hll_sketch::deserialize(ss), std::invalid_argument);
|