datasketches 0.2.1 → 0.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +4 -0
- data/lib/datasketches/version.rb +1 -1
- data/vendor/datasketches-cpp/CMakeLists.txt +7 -0
- data/vendor/datasketches-cpp/common/include/MurmurHash3.h +11 -7
- data/vendor/datasketches-cpp/common/include/binomial_bounds.hpp +8 -8
- data/vendor/datasketches-cpp/common/include/bounds_binomial_proportions.hpp +12 -15
- data/vendor/datasketches-cpp/common/include/common_defs.hpp +24 -0
- data/vendor/datasketches-cpp/common/include/conditional_forward.hpp +20 -8
- data/vendor/datasketches-cpp/common/include/count_zeros.hpp +2 -2
- data/vendor/datasketches-cpp/common/include/serde.hpp +7 -7
- data/vendor/datasketches-cpp/cpc/include/cpc_compressor.hpp +19 -19
- data/vendor/datasketches-cpp/cpc/include/cpc_compressor_impl.hpp +91 -89
- data/vendor/datasketches-cpp/cpc/include/cpc_sketch.hpp +14 -1
- data/vendor/datasketches-cpp/cpc/include/cpc_sketch_impl.hpp +121 -87
- data/vendor/datasketches-cpp/cpc/include/cpc_union_impl.hpp +14 -14
- data/vendor/datasketches-cpp/cpc/include/cpc_util.hpp +10 -10
- data/vendor/datasketches-cpp/cpc/include/icon_estimator.hpp +4 -4
- data/vendor/datasketches-cpp/cpc/include/u32_table.hpp +8 -8
- data/vendor/datasketches-cpp/cpc/include/u32_table_impl.hpp +14 -14
- data/vendor/datasketches-cpp/cpc/test/compression_test.cpp +10 -10
- data/vendor/datasketches-cpp/cpc/test/cpc_sketch_test.cpp +25 -0
- data/vendor/datasketches-cpp/cpc/test/cpc_union_test.cpp +1 -1
- data/vendor/datasketches-cpp/fi/include/frequent_items_sketch_impl.hpp +65 -80
- data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map_impl.hpp +10 -10
- data/vendor/datasketches-cpp/fi/test/reverse_purge_hash_map_test.cpp +2 -2
- data/vendor/datasketches-cpp/hll/include/AuxHashMap-internal.hpp +60 -63
- data/vendor/datasketches-cpp/hll/include/AuxHashMap.hpp +19 -19
- data/vendor/datasketches-cpp/hll/include/CompositeInterpolationXTable-internal.hpp +15 -15
- data/vendor/datasketches-cpp/hll/include/CompositeInterpolationXTable.hpp +3 -3
- data/vendor/datasketches-cpp/hll/include/CouponHashSet-internal.hpp +74 -76
- data/vendor/datasketches-cpp/hll/include/CouponHashSet.hpp +6 -6
- data/vendor/datasketches-cpp/hll/include/CouponList-internal.hpp +110 -113
- data/vendor/datasketches-cpp/hll/include/CouponList.hpp +13 -13
- data/vendor/datasketches-cpp/hll/include/CubicInterpolation-internal.hpp +2 -4
- data/vendor/datasketches-cpp/hll/include/HarmonicNumbers-internal.hpp +1 -1
- data/vendor/datasketches-cpp/hll/include/Hll4Array-internal.hpp +80 -76
- data/vendor/datasketches-cpp/hll/include/Hll4Array.hpp +9 -9
- data/vendor/datasketches-cpp/hll/include/Hll6Array-internal.hpp +26 -26
- data/vendor/datasketches-cpp/hll/include/Hll6Array.hpp +6 -6
- data/vendor/datasketches-cpp/hll/include/Hll8Array-internal.hpp +33 -33
- data/vendor/datasketches-cpp/hll/include/Hll8Array.hpp +6 -6
- data/vendor/datasketches-cpp/hll/include/HllArray-internal.hpp +205 -209
- data/vendor/datasketches-cpp/hll/include/HllArray.hpp +36 -36
- data/vendor/datasketches-cpp/hll/include/HllSketch-internal.hpp +28 -28
- data/vendor/datasketches-cpp/hll/include/HllSketchImpl-internal.hpp +22 -22
- data/vendor/datasketches-cpp/hll/include/HllSketchImpl.hpp +13 -13
- data/vendor/datasketches-cpp/hll/include/HllSketchImplFactory.hpp +15 -15
- data/vendor/datasketches-cpp/hll/include/HllUnion-internal.hpp +61 -61
- data/vendor/datasketches-cpp/hll/include/HllUtil.hpp +120 -127
- data/vendor/datasketches-cpp/hll/include/coupon_iterator-internal.hpp +9 -9
- data/vendor/datasketches-cpp/hll/include/coupon_iterator.hpp +5 -5
- data/vendor/datasketches-cpp/hll/include/hll.hpp +21 -21
- data/vendor/datasketches-cpp/hll/test/AuxHashMapTest.cpp +1 -1
- data/vendor/datasketches-cpp/hll/test/CouponHashSetTest.cpp +34 -34
- data/vendor/datasketches-cpp/hll/test/CouponListTest.cpp +25 -25
- data/vendor/datasketches-cpp/hll/test/CrossCountingTest.cpp +2 -2
- data/vendor/datasketches-cpp/hll/test/HllArrayTest.cpp +35 -35
- data/vendor/datasketches-cpp/hll/test/HllSketchTest.cpp +15 -15
- data/vendor/datasketches-cpp/hll/test/HllUnionTest.cpp +10 -14
- data/vendor/datasketches-cpp/hll/test/IsomorphicTest.cpp +3 -3
- data/vendor/datasketches-cpp/hll/test/ToFromByteArrayTest.cpp +4 -4
- data/vendor/datasketches-cpp/kll/include/kll_helper.hpp +5 -4
- data/vendor/datasketches-cpp/kll/include/kll_helper_impl.hpp +6 -6
- data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator.hpp +14 -6
- data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator_impl.hpp +39 -24
- data/vendor/datasketches-cpp/kll/include/kll_sketch.hpp +34 -2
- data/vendor/datasketches-cpp/kll/include/kll_sketch_impl.hpp +72 -62
- data/vendor/datasketches-cpp/kll/include/kolmogorov_smirnov.hpp +67 -0
- data/vendor/datasketches-cpp/kll/include/kolmogorov_smirnov_impl.hpp +78 -0
- data/vendor/datasketches-cpp/kll/test/CMakeLists.txt +1 -0
- data/vendor/datasketches-cpp/kll/test/kll_sketch_test.cpp +68 -45
- data/vendor/datasketches-cpp/kll/test/kolmogorov_smirnov_test.cpp +111 -0
- data/vendor/datasketches-cpp/python/src/hll_wrapper.cpp +4 -4
- data/vendor/datasketches-cpp/python/src/theta_wrapper.cpp +6 -6
- data/vendor/datasketches-cpp/python/src/vo_wrapper.cpp +2 -2
- data/vendor/datasketches-cpp/python/tests/hll_test.py +1 -1
- data/vendor/datasketches-cpp/python/tests/vo_test.py +3 -3
- data/vendor/datasketches-cpp/req/include/req_common.hpp +2 -1
- data/vendor/datasketches-cpp/req/include/req_compactor.hpp +4 -4
- data/vendor/datasketches-cpp/req/include/req_compactor_impl.hpp +26 -39
- data/vendor/datasketches-cpp/req/include/req_sketch.hpp +1 -1
- data/vendor/datasketches-cpp/req/include/req_sketch_impl.hpp +9 -9
- data/vendor/datasketches-cpp/req/test/req_sketch_test.cpp +52 -52
- data/vendor/datasketches-cpp/sampling/include/var_opt_sketch_impl.hpp +47 -56
- data/vendor/datasketches-cpp/sampling/include/var_opt_union_impl.hpp +34 -42
- data/vendor/datasketches-cpp/sampling/test/var_opt_sketch_test.cpp +6 -6
- data/vendor/datasketches-cpp/sampling/test/var_opt_union_test.cpp +13 -13
- data/vendor/datasketches-cpp/setup.py +1 -1
- data/vendor/datasketches-cpp/theta/include/bounds_on_ratios_in_sampled_sets.hpp +1 -1
- data/vendor/datasketches-cpp/theta/include/compact_theta_sketch_parser.hpp +67 -0
- data/vendor/datasketches-cpp/theta/include/compact_theta_sketch_parser_impl.hpp +70 -0
- data/vendor/datasketches-cpp/theta/include/theta_intersection.hpp +9 -4
- data/vendor/datasketches-cpp/theta/include/theta_intersection_impl.hpp +1 -1
- data/vendor/datasketches-cpp/theta/include/theta_jaccard_similarity_base.hpp +18 -14
- data/vendor/datasketches-cpp/theta/include/theta_sketch.hpp +42 -1
- data/vendor/datasketches-cpp/theta/include/theta_sketch_impl.hpp +107 -58
- data/vendor/datasketches-cpp/theta/include/theta_union.hpp +4 -4
- data/vendor/datasketches-cpp/theta/include/theta_union_base_impl.hpp +1 -1
- data/vendor/datasketches-cpp/theta/include/theta_union_impl.hpp +1 -1
- data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base.hpp +2 -0
- data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base_impl.hpp +33 -28
- data/vendor/datasketches-cpp/theta/test/theta_a_not_b_test.cpp +23 -1
- data/vendor/datasketches-cpp/theta/test/theta_intersection_test.cpp +21 -1
- data/vendor/datasketches-cpp/theta/test/theta_jaccard_similarity_test.cpp +58 -2
- data/vendor/datasketches-cpp/theta/test/theta_sketch_test.cpp +37 -1
- data/vendor/datasketches-cpp/theta/test/theta_union_test.cpp +22 -2
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_sketch_impl.hpp +47 -60
- data/vendor/datasketches-cpp/tuple/include/tuple_sketch_impl.hpp +51 -64
- data/vendor/datasketches-cpp/tuple/test/array_of_doubles_sketch_test.cpp +1 -1
- data/vendor/datasketches-cpp/tuple/test/tuple_a_not_b_test.cpp +17 -17
- data/vendor/datasketches-cpp/tuple/test/tuple_intersection_test.cpp +12 -12
- data/vendor/datasketches-cpp/tuple/test/tuple_jaccard_similarity_test.cpp +5 -5
- data/vendor/datasketches-cpp/tuple/test/tuple_sketch_allocation_test.cpp +1 -1
- data/vendor/datasketches-cpp/tuple/test/tuple_sketch_test.cpp +20 -20
- data/vendor/datasketches-cpp/tuple/test/tuple_union_test.cpp +12 -12
- metadata +8 -3
|
@@ -25,30 +25,30 @@
|
|
|
25
25
|
namespace datasketches {
|
|
26
26
|
|
|
27
27
|
template<typename A>
|
|
28
|
-
coupon_iterator<A>::coupon_iterator(const
|
|
29
|
-
|
|
30
|
-
while (
|
|
31
|
-
if (
|
|
32
|
-
|
|
28
|
+
coupon_iterator<A>::coupon_iterator(const uint32_t* array, size_t array_size, size_t index, bool all):
|
|
29
|
+
array_(array), array_size_(array_size), index_(index), all_(all) {
|
|
30
|
+
while (index_ < array_size_) {
|
|
31
|
+
if (all_ || array_[index_] != hll_constants::EMPTY) break;
|
|
32
|
+
++index_;
|
|
33
33
|
}
|
|
34
34
|
}
|
|
35
35
|
|
|
36
36
|
template<typename A>
|
|
37
37
|
coupon_iterator<A>& coupon_iterator<A>::operator++() {
|
|
38
|
-
while (++
|
|
39
|
-
if (
|
|
38
|
+
while (++index_ < array_size_) {
|
|
39
|
+
if (all_ || array_[index_] != hll_constants::EMPTY) break;
|
|
40
40
|
}
|
|
41
41
|
return *this;
|
|
42
42
|
}
|
|
43
43
|
|
|
44
44
|
template<typename A>
|
|
45
45
|
bool coupon_iterator<A>::operator!=(const coupon_iterator& other) const {
|
|
46
|
-
return
|
|
46
|
+
return index_ != other.index_;
|
|
47
47
|
}
|
|
48
48
|
|
|
49
49
|
template<typename A>
|
|
50
50
|
uint32_t coupon_iterator<A>::operator*() const {
|
|
51
|
-
return
|
|
51
|
+
return array_[index_];
|
|
52
52
|
}
|
|
53
53
|
|
|
54
54
|
}
|
|
@@ -25,15 +25,15 @@ namespace datasketches {
|
|
|
25
25
|
template<typename A>
|
|
26
26
|
class coupon_iterator: public std::iterator<std::input_iterator_tag, uint32_t> {
|
|
27
27
|
public:
|
|
28
|
-
coupon_iterator(const
|
|
28
|
+
coupon_iterator(const uint32_t* array, size_t array_slze, size_t index, bool all);
|
|
29
29
|
coupon_iterator& operator++();
|
|
30
30
|
bool operator!=(const coupon_iterator& other) const;
|
|
31
31
|
uint32_t operator*() const;
|
|
32
32
|
private:
|
|
33
|
-
const
|
|
34
|
-
size_t
|
|
35
|
-
size_t
|
|
36
|
-
bool
|
|
33
|
+
const uint32_t* array_;
|
|
34
|
+
size_t array_size_;
|
|
35
|
+
size_t index_;
|
|
36
|
+
bool all_;
|
|
37
37
|
};
|
|
38
38
|
|
|
39
39
|
}
|
|
@@ -119,7 +119,7 @@ class hll_sketch_alloc final {
|
|
|
119
119
|
* keeping memory use constant (if HLL_6 or HLL_8) at the cost of
|
|
120
120
|
* starting out using much more memory
|
|
121
121
|
*/
|
|
122
|
-
explicit hll_sketch_alloc(
|
|
122
|
+
explicit hll_sketch_alloc(uint8_t lg_config_k, target_hll_type tgt_type = HLL_4, bool start_full_size = false, const A& allocator = A());
|
|
123
123
|
|
|
124
124
|
/**
|
|
125
125
|
* Copy constructor
|
|
@@ -306,7 +306,7 @@ class hll_sketch_alloc final {
|
|
|
306
306
|
* @param num_std_dev Number of standard deviations, an integer from the set {1, 2, 3}.
|
|
307
307
|
* @return The approximate lower bound.
|
|
308
308
|
*/
|
|
309
|
-
double get_lower_bound(
|
|
309
|
+
double get_lower_bound(uint8_t num_std_dev) const;
|
|
310
310
|
|
|
311
311
|
/**
|
|
312
312
|
* Returns the approximate upper error bound given the specified
|
|
@@ -314,13 +314,13 @@ class hll_sketch_alloc final {
|
|
|
314
314
|
* @param num_std_dev Number of standard deviations, an integer from the set {1, 2, 3}.
|
|
315
315
|
* @return The approximate upper bound.
|
|
316
316
|
*/
|
|
317
|
-
double get_upper_bound(
|
|
317
|
+
double get_upper_bound(uint8_t num_std_dev) const;
|
|
318
318
|
|
|
319
319
|
/**
|
|
320
320
|
* Returns sketch's configured lg_k value.
|
|
321
321
|
* @return Configured lg_k value.
|
|
322
322
|
*/
|
|
323
|
-
|
|
323
|
+
uint8_t get_lg_config_k() const;
|
|
324
324
|
|
|
325
325
|
/**
|
|
326
326
|
* Returns the sketch's target HLL mode (from #target_hll_type).
|
|
@@ -344,13 +344,13 @@ class hll_sketch_alloc final {
|
|
|
344
344
|
* Returns the size of the sketch serialized in compact form.
|
|
345
345
|
* @return Size of the sketch serialized in compact form, in bytes.
|
|
346
346
|
*/
|
|
347
|
-
|
|
347
|
+
uint32_t get_compact_serialization_bytes() const;
|
|
348
348
|
|
|
349
349
|
/**
|
|
350
350
|
* Returns the size of the sketch serialized without compaction.
|
|
351
351
|
* @return Size of the sketch serialized without compaction, in bytes.
|
|
352
352
|
*/
|
|
353
|
-
|
|
353
|
+
uint32_t get_updatable_serialization_bytes() const;
|
|
354
354
|
|
|
355
355
|
/**
|
|
356
356
|
* Returns the maximum size in bytes that this sketch can grow to
|
|
@@ -363,7 +363,7 @@ class hll_sketch_alloc final {
|
|
|
363
363
|
* @param tgt_type the desired Hll type
|
|
364
364
|
* @return the maximum size in bytes that this sketch can grow to.
|
|
365
365
|
*/
|
|
366
|
-
static
|
|
366
|
+
static uint32_t get_max_updatable_serialization_bytes(uint8_t lg_k, target_hll_type tgt_type);
|
|
367
367
|
|
|
368
368
|
/**
|
|
369
369
|
* Gets the current (approximate) Relative Error (RE) asymptotic values given several
|
|
@@ -376,18 +376,18 @@ class hll_sketch_alloc final {
|
|
|
376
376
|
* @return the current (approximate) RelativeError
|
|
377
377
|
*/
|
|
378
378
|
static double get_rel_err(bool upper_bound, bool unioned,
|
|
379
|
-
|
|
379
|
+
uint8_t lg_config_k, uint8_t num_std_dev);
|
|
380
380
|
|
|
381
381
|
private:
|
|
382
382
|
explicit hll_sketch_alloc(HllSketchImpl<A>* that);
|
|
383
383
|
|
|
384
|
-
void coupon_update(
|
|
384
|
+
void coupon_update(uint32_t coupon);
|
|
385
385
|
|
|
386
386
|
std::string type_as_string() const;
|
|
387
387
|
std::string mode_as_string() const;
|
|
388
388
|
|
|
389
389
|
hll_mode get_current_mode() const;
|
|
390
|
-
|
|
390
|
+
uint8_t get_serialization_version() const;
|
|
391
391
|
bool is_out_of_order_flag() const;
|
|
392
392
|
bool is_estimation_mode() const;
|
|
393
393
|
|
|
@@ -431,7 +431,7 @@ class hll_union_alloc {
|
|
|
431
431
|
* @param lg_max_k The maximum size, in log2, of k. The value must
|
|
432
432
|
* be between 7 and 21, inclusive.
|
|
433
433
|
*/
|
|
434
|
-
explicit hll_union_alloc(
|
|
434
|
+
explicit hll_union_alloc(uint8_t lg_max_k, const A& allocator = A());
|
|
435
435
|
|
|
436
436
|
/**
|
|
437
437
|
* Returns the current cardinality estimate
|
|
@@ -458,7 +458,7 @@ class hll_union_alloc {
|
|
|
458
458
|
* @param num_std_dev Number of standard deviations, an integer from the set {1, 2, 3}.
|
|
459
459
|
* @return The approximate lower bound.
|
|
460
460
|
*/
|
|
461
|
-
double get_lower_bound(
|
|
461
|
+
double get_lower_bound(uint8_t num_std_dev) const;
|
|
462
462
|
|
|
463
463
|
/**
|
|
464
464
|
* Returns the approximate upper error bound given the specified
|
|
@@ -466,13 +466,13 @@ class hll_union_alloc {
|
|
|
466
466
|
* @param num_std_dev Number of standard deviations, an integer from the set {1, 2, 3}.
|
|
467
467
|
* @return The approximate upper bound.
|
|
468
468
|
*/
|
|
469
|
-
double get_upper_bound(
|
|
469
|
+
double get_upper_bound(uint8_t num_std_dev) const;
|
|
470
470
|
|
|
471
471
|
/**
|
|
472
472
|
* Returns union's configured lg_k value.
|
|
473
473
|
* @return Configured lg_k value.
|
|
474
474
|
*/
|
|
475
|
-
|
|
475
|
+
uint8_t get_lg_config_k() const;
|
|
476
476
|
|
|
477
477
|
/**
|
|
478
478
|
* Returns the union's target HLL mode (from #target_hll_type).
|
|
@@ -598,7 +598,7 @@ class hll_union_alloc {
|
|
|
598
598
|
* @return the current (approximate) RelativeError
|
|
599
599
|
*/
|
|
600
600
|
static double get_rel_err(bool upper_bound, bool unioned,
|
|
601
|
-
|
|
601
|
+
uint8_t lg_config_k, uint8_t num_std_dev);
|
|
602
602
|
|
|
603
603
|
private:
|
|
604
604
|
|
|
@@ -611,21 +611,21 @@ class hll_union_alloc {
|
|
|
611
611
|
* @param incoming_impl the given incoming sketch, which may not be modified.
|
|
612
612
|
* @param lg_max_k the maximum value of log2 K for this union.
|
|
613
613
|
*/
|
|
614
|
-
inline void union_impl(const hll_sketch_alloc<A>& sketch,
|
|
614
|
+
inline void union_impl(const hll_sketch_alloc<A>& sketch, uint8_t lg_max_k);
|
|
615
615
|
|
|
616
|
-
static HllSketchImpl<A>* copy_or_downsample(const HllSketchImpl<A>* src_impl,
|
|
616
|
+
static HllSketchImpl<A>* copy_or_downsample(const HllSketchImpl<A>* src_impl, uint8_t tgt_lg_k);
|
|
617
617
|
|
|
618
|
-
void coupon_update(
|
|
618
|
+
void coupon_update(uint32_t coupon);
|
|
619
619
|
|
|
620
620
|
hll_mode get_current_mode() const;
|
|
621
621
|
bool is_out_of_order_flag() const;
|
|
622
622
|
bool is_estimation_mode() const;
|
|
623
623
|
|
|
624
624
|
// calls couponUpdate on sketch, freeing the old sketch upon changes in hll_mode
|
|
625
|
-
static HllSketchImpl<A>* leak_free_coupon_update(HllSketchImpl<A>* impl,
|
|
625
|
+
static HllSketchImpl<A>* leak_free_coupon_update(HllSketchImpl<A>* impl, uint32_t coupon);
|
|
626
626
|
|
|
627
|
-
|
|
628
|
-
hll_sketch_alloc<A>
|
|
627
|
+
uint8_t lg_max_k_;
|
|
628
|
+
hll_sketch_alloc<A> gadget_;
|
|
629
629
|
};
|
|
630
630
|
|
|
631
631
|
/// convenience alias for hll_sketch with default allocator
|
|
@@ -45,7 +45,7 @@ TEST_CASE("aux hash map: check grow space", "[aux_hash_map]") {
|
|
|
45
45
|
AuxHashMap<std::allocator<uint8_t>>::make_deleter()
|
|
46
46
|
);
|
|
47
47
|
REQUIRE(map->getLgAuxArrInts() == 3);
|
|
48
|
-
for (
|
|
48
|
+
for (uint8_t i = 1; i <= 7; ++i) {
|
|
49
49
|
map->mustAdd(i, i);
|
|
50
50
|
}
|
|
51
51
|
REQUIRE(map->getLgAuxArrInts() == 4);
|
|
@@ -30,7 +30,7 @@
|
|
|
30
30
|
namespace datasketches {
|
|
31
31
|
|
|
32
32
|
TEST_CASE("coupon hash set: check corrupt bytearray", "[coupon_hash_set]") {
|
|
33
|
-
|
|
33
|
+
uint8_t lgK = 8;
|
|
34
34
|
hll_sketch sk1(lgK);
|
|
35
35
|
for (int i = 0; i < 24; ++i) {
|
|
36
36
|
sk1.update(i);
|
|
@@ -39,42 +39,42 @@ TEST_CASE("coupon hash set: check corrupt bytearray", "[coupon_hash_set]") {
|
|
|
39
39
|
uint8_t* bytes = sketchBytes.data();
|
|
40
40
|
const size_t size = sketchBytes.size();
|
|
41
41
|
|
|
42
|
-
bytes[
|
|
42
|
+
bytes[hll_constants::PREAMBLE_INTS_BYTE] = 0;
|
|
43
43
|
// fail in HllSketchImpl
|
|
44
44
|
REQUIRE_THROWS_AS(hll_sketch::deserialize(bytes, size), std::invalid_argument);
|
|
45
45
|
// fail in CouponHashSet
|
|
46
46
|
REQUIRE_THROWS_AS(CouponHashSet<std::allocator<uint8_t>>::newSet(bytes, size, std::allocator<uint8_t>()), std::invalid_argument);
|
|
47
|
-
bytes[
|
|
47
|
+
bytes[hll_constants::PREAMBLE_INTS_BYTE] = hll_constants::HASH_SET_PREINTS;
|
|
48
48
|
|
|
49
|
-
bytes[
|
|
49
|
+
bytes[hll_constants::SER_VER_BYTE] = 0;
|
|
50
50
|
REQUIRE_THROWS_AS(hll_sketch::deserialize(bytes, size), std::invalid_argument);
|
|
51
|
-
bytes[
|
|
51
|
+
bytes[hll_constants::SER_VER_BYTE] = hll_constants::SER_VER;
|
|
52
52
|
|
|
53
|
-
bytes[
|
|
53
|
+
bytes[hll_constants::FAMILY_BYTE] = 0;
|
|
54
54
|
REQUIRE_THROWS_AS(hll_sketch::deserialize(bytes, size), std::invalid_argument);
|
|
55
|
-
bytes[
|
|
55
|
+
bytes[hll_constants::FAMILY_BYTE] = hll_constants::FAMILY_ID;
|
|
56
56
|
|
|
57
|
-
bytes[
|
|
57
|
+
bytes[hll_constants::LG_K_BYTE] = 6;
|
|
58
58
|
REQUIRE_THROWS_AS(hll_sketch::deserialize(bytes, size), std::invalid_argument);
|
|
59
|
-
bytes[
|
|
59
|
+
bytes[hll_constants::LG_K_BYTE] = lgK;
|
|
60
60
|
|
|
61
|
-
uint8_t tmp = bytes[
|
|
62
|
-
bytes[
|
|
61
|
+
uint8_t tmp = bytes[hll_constants::MODE_BYTE];
|
|
62
|
+
bytes[hll_constants::MODE_BYTE] = 0x10; // HLL_6, LIST
|
|
63
63
|
REQUIRE_THROWS_AS(hll_sketch::deserialize(bytes, size), std::invalid_argument);
|
|
64
|
-
bytes[
|
|
64
|
+
bytes[hll_constants::MODE_BYTE] = tmp;
|
|
65
65
|
|
|
66
|
-
tmp = bytes[
|
|
67
|
-
bytes[
|
|
66
|
+
tmp = bytes[hll_constants::LG_ARR_BYTE];
|
|
67
|
+
bytes[hll_constants::LG_ARR_BYTE] = 0;
|
|
68
68
|
hll_sketch::deserialize(bytes, size);
|
|
69
69
|
// should work fine despite the corruption
|
|
70
|
-
bytes[
|
|
70
|
+
bytes[hll_constants::LG_ARR_BYTE] = tmp;
|
|
71
71
|
|
|
72
72
|
REQUIRE_THROWS_AS(hll_sketch::deserialize(bytes, size - 1), std::out_of_range);
|
|
73
73
|
REQUIRE_THROWS_AS(hll_sketch::deserialize(bytes, 3), std::out_of_range);
|
|
74
74
|
}
|
|
75
75
|
|
|
76
76
|
TEST_CASE("coupon hash set: check corrupt stream", "[coupon_hash_set]") {
|
|
77
|
-
|
|
77
|
+
uint8_t lgK = 9;
|
|
78
78
|
hll_sketch sk1(lgK);
|
|
79
79
|
for (int i = 0; i < 24; ++i) {
|
|
80
80
|
sk1.update(i);
|
|
@@ -82,48 +82,48 @@ TEST_CASE("coupon hash set: check corrupt stream", "[coupon_hash_set]") {
|
|
|
82
82
|
std::stringstream ss;
|
|
83
83
|
sk1.serialize_compact(ss);
|
|
84
84
|
|
|
85
|
-
ss.seekp(
|
|
85
|
+
ss.seekp(hll_constants::PREAMBLE_INTS_BYTE);
|
|
86
86
|
ss.put(0);
|
|
87
87
|
ss.seekg(0);
|
|
88
88
|
// fail in HllSketchImpl
|
|
89
89
|
REQUIRE_THROWS_AS(hll_sketch::deserialize(ss), std::invalid_argument);
|
|
90
90
|
// fail in CouponHashSet
|
|
91
91
|
REQUIRE_THROWS_AS(CouponHashSet<std::allocator<uint8_t>>::newSet(ss, std::allocator<uint8_t>()), std::invalid_argument);
|
|
92
|
-
ss.seekp(
|
|
93
|
-
ss.put(
|
|
92
|
+
ss.seekp(hll_constants::PREAMBLE_INTS_BYTE);
|
|
93
|
+
ss.put(hll_constants::HASH_SET_PREINTS);
|
|
94
94
|
|
|
95
|
-
ss.seekp(
|
|
95
|
+
ss.seekp(hll_constants::SER_VER_BYTE);
|
|
96
96
|
ss.put(0);
|
|
97
97
|
ss.seekg(0);
|
|
98
98
|
REQUIRE_THROWS_AS(hll_sketch::deserialize(ss), std::invalid_argument);
|
|
99
|
-
ss.seekp(
|
|
100
|
-
ss.put(
|
|
99
|
+
ss.seekp(hll_constants::SER_VER_BYTE);
|
|
100
|
+
ss.put(hll_constants::SER_VER);
|
|
101
101
|
|
|
102
|
-
ss.seekp(
|
|
102
|
+
ss.seekp(hll_constants::FAMILY_BYTE);
|
|
103
103
|
ss.put(0);
|
|
104
104
|
ss.seekg(0);
|
|
105
105
|
REQUIRE_THROWS_AS(hll_sketch::deserialize(ss), std::invalid_argument);
|
|
106
|
-
ss.seekp(
|
|
107
|
-
ss.put(
|
|
106
|
+
ss.seekp(hll_constants::FAMILY_BYTE);
|
|
107
|
+
ss.put(hll_constants::FAMILY_ID);
|
|
108
108
|
|
|
109
|
-
ss.seekg(
|
|
110
|
-
|
|
111
|
-
ss.seekp(
|
|
109
|
+
ss.seekg(hll_constants::MODE_BYTE);
|
|
110
|
+
auto tmp = ss.get();
|
|
111
|
+
ss.seekp(hll_constants::MODE_BYTE);
|
|
112
112
|
ss.put(0x22); // HLL_8, HLL
|
|
113
113
|
ss.seekg(0);
|
|
114
114
|
REQUIRE_THROWS_AS(hll_sketch::deserialize(ss), std::invalid_argument);
|
|
115
|
-
ss.seekp(
|
|
116
|
-
ss.put(tmp);
|
|
115
|
+
ss.seekp(hll_constants::MODE_BYTE);
|
|
116
|
+
ss.put((char)tmp);
|
|
117
117
|
|
|
118
|
-
ss.seekg(
|
|
118
|
+
ss.seekg(hll_constants::LG_ARR_BYTE);
|
|
119
119
|
tmp = ss.get();
|
|
120
|
-
ss.seekp(
|
|
120
|
+
ss.seekp(hll_constants::LG_ARR_BYTE);
|
|
121
121
|
ss.put(0);
|
|
122
122
|
ss.seekg(0);
|
|
123
123
|
hll_sketch::deserialize(ss);
|
|
124
124
|
// should work fine despite the corruption
|
|
125
|
-
ss.seekp(
|
|
126
|
-
ss.put(tmp);
|
|
125
|
+
ss.seekp(hll_constants::LG_ARR_BYTE);
|
|
126
|
+
ss.put((char)tmp);
|
|
127
127
|
}
|
|
128
128
|
|
|
129
129
|
|
|
@@ -35,9 +35,9 @@ void println_string(std::string str) {
|
|
|
35
35
|
}
|
|
36
36
|
|
|
37
37
|
TEST_CASE("coupon list: check iterator", "[coupon_list]") {
|
|
38
|
-
|
|
38
|
+
uint8_t lgConfigK = 8;
|
|
39
39
|
CouponList<std::allocator<uint8_t>> cl(lgConfigK, HLL_4, LIST, std::allocator<uint8_t>());
|
|
40
|
-
for (
|
|
40
|
+
for (uint8_t i = 1; i <= 7; ++i) { cl.couponUpdate(HllUtil<>::pair(i, i)); } // not hashes but distinct values
|
|
41
41
|
const int mask = (1 << lgConfigK) - 1;
|
|
42
42
|
int idx = 0;
|
|
43
43
|
auto itr = cl.begin(false);
|
|
@@ -56,7 +56,7 @@ TEST_CASE("coupon list: check iterator", "[coupon_list]") {
|
|
|
56
56
|
}
|
|
57
57
|
|
|
58
58
|
TEST_CASE("coupon list: check duplicates and misc", "[coupon_list]") {
|
|
59
|
-
|
|
59
|
+
uint8_t lgConfigK = 8;
|
|
60
60
|
hll_sketch sk(lgConfigK);
|
|
61
61
|
|
|
62
62
|
for (int i = 1; i <= 7; ++i) {
|
|
@@ -79,7 +79,7 @@ TEST_CASE("coupon list: check duplicates and misc", "[coupon_list]") {
|
|
|
79
79
|
REQUIRE(relErr < 0.0);
|
|
80
80
|
}
|
|
81
81
|
|
|
82
|
-
static void serializeDeserialize(
|
|
82
|
+
static void serializeDeserialize(uint8_t lgK) {
|
|
83
83
|
hll_sketch sk1(lgK);
|
|
84
84
|
|
|
85
85
|
int u = (lgK < 8) ? 7 : (((1 << (lgK - 3))/ 4) * 3);
|
|
@@ -110,7 +110,7 @@ TEST_CASE("coupon list: check serialize deserialize", "[coupon_list]") {
|
|
|
110
110
|
}
|
|
111
111
|
|
|
112
112
|
TEST_CASE("coupon list: check corrupt bytearray data", "[coupon_list]") {
|
|
113
|
-
|
|
113
|
+
uint8_t lgK = 6;
|
|
114
114
|
hll_sketch sk1(lgK);
|
|
115
115
|
sk1.update(1);
|
|
116
116
|
sk1.update(2);
|
|
@@ -118,24 +118,24 @@ TEST_CASE("coupon list: check corrupt bytearray data", "[coupon_list]") {
|
|
|
118
118
|
uint8_t* bytes = sketchBytes.data();
|
|
119
119
|
const size_t size = sketchBytes.size();
|
|
120
120
|
|
|
121
|
-
bytes[
|
|
121
|
+
bytes[hll_constants::PREAMBLE_INTS_BYTE] = 0;
|
|
122
122
|
REQUIRE_THROWS_AS(hll_sketch::deserialize(bytes, size), std::invalid_argument);
|
|
123
123
|
REQUIRE_THROWS_AS(CouponList<std::allocator<uint8_t>>::newList(bytes, size, std::allocator<uint8_t>()), std::invalid_argument);
|
|
124
124
|
|
|
125
|
-
bytes[
|
|
125
|
+
bytes[hll_constants::PREAMBLE_INTS_BYTE] = hll_constants::LIST_PREINTS;
|
|
126
126
|
|
|
127
|
-
bytes[
|
|
127
|
+
bytes[hll_constants::SER_VER_BYTE] = 0;
|
|
128
128
|
REQUIRE_THROWS_AS(hll_sketch::deserialize(bytes, size), std::invalid_argument);
|
|
129
|
-
bytes[
|
|
129
|
+
bytes[hll_constants::SER_VER_BYTE] = hll_constants::SER_VER;
|
|
130
130
|
|
|
131
|
-
bytes[
|
|
131
|
+
bytes[hll_constants::FAMILY_BYTE] = 0;
|
|
132
132
|
REQUIRE_THROWS_AS(hll_sketch::deserialize(bytes, size), std::invalid_argument);
|
|
133
|
-
bytes[
|
|
133
|
+
bytes[hll_constants::FAMILY_BYTE] = hll_constants::FAMILY_ID;
|
|
134
134
|
|
|
135
|
-
uint8_t tmp = bytes[
|
|
136
|
-
bytes[
|
|
135
|
+
uint8_t tmp = bytes[hll_constants::MODE_BYTE];
|
|
136
|
+
bytes[hll_constants::MODE_BYTE] = 0x01; // HLL_4, SET
|
|
137
137
|
REQUIRE_THROWS_AS(hll_sketch::deserialize(bytes, size), std::invalid_argument);
|
|
138
|
-
bytes[
|
|
138
|
+
bytes[hll_constants::MODE_BYTE] = tmp;
|
|
139
139
|
|
|
140
140
|
REQUIRE_THROWS_AS(hll_sketch::deserialize(bytes, size - 1), std::out_of_range);
|
|
141
141
|
|
|
@@ -143,36 +143,36 @@ TEST_CASE("coupon list: check corrupt bytearray data", "[coupon_list]") {
|
|
|
143
143
|
}
|
|
144
144
|
|
|
145
145
|
TEST_CASE("coupon list: check corrupt stream data", "[coupon_list]") {
|
|
146
|
-
|
|
146
|
+
uint8_t lgK = 6;
|
|
147
147
|
hll_sketch sk1(lgK);
|
|
148
148
|
sk1.update(1);
|
|
149
149
|
sk1.update(2);
|
|
150
150
|
std::stringstream ss;
|
|
151
151
|
sk1.serialize_compact(ss);
|
|
152
152
|
|
|
153
|
-
ss.seekp(
|
|
153
|
+
ss.seekp(hll_constants::PREAMBLE_INTS_BYTE);
|
|
154
154
|
ss.put(0);
|
|
155
155
|
ss.seekg(0);
|
|
156
156
|
REQUIRE_THROWS_AS(hll_sketch::deserialize(ss), std::invalid_argument);
|
|
157
157
|
REQUIRE_THROWS_AS(CouponList<std::allocator<uint8_t>>::newList(ss, std::allocator<uint8_t>()), std::invalid_argument);
|
|
158
|
-
ss.seekp(
|
|
159
|
-
ss.put(
|
|
158
|
+
ss.seekp(hll_constants::PREAMBLE_INTS_BYTE);
|
|
159
|
+
ss.put(hll_constants::LIST_PREINTS);
|
|
160
160
|
|
|
161
|
-
ss.seekp(
|
|
161
|
+
ss.seekp(hll_constants::SER_VER_BYTE);
|
|
162
162
|
ss.put(0);
|
|
163
163
|
ss.seekg(0);
|
|
164
164
|
REQUIRE_THROWS_AS(hll_sketch::deserialize(ss), std::invalid_argument);
|
|
165
|
-
ss.seekp(
|
|
166
|
-
ss.put(
|
|
165
|
+
ss.seekp(hll_constants::SER_VER_BYTE);
|
|
166
|
+
ss.put(hll_constants::SER_VER);
|
|
167
167
|
|
|
168
|
-
ss.seekp(
|
|
168
|
+
ss.seekp(hll_constants::FAMILY_BYTE);
|
|
169
169
|
ss.put(0);
|
|
170
170
|
ss.seekg(0);
|
|
171
171
|
REQUIRE_THROWS_AS(hll_sketch::deserialize(ss), std::invalid_argument);
|
|
172
|
-
ss.seekp(
|
|
173
|
-
ss.put(
|
|
172
|
+
ss.seekp(hll_constants::FAMILY_BYTE);
|
|
173
|
+
ss.put(hll_constants::FAMILY_ID);
|
|
174
174
|
|
|
175
|
-
ss.seekp(
|
|
175
|
+
ss.seekp(hll_constants::MODE_BYTE);
|
|
176
176
|
ss.put(0x22); // HLL_8, HLL
|
|
177
177
|
ss.seekg(0);
|
|
178
178
|
REQUIRE_THROWS_AS(hll_sketch::deserialize(ss), std::invalid_argument);
|