datasketches 0.2.1 → 0.2.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +4 -0
- data/lib/datasketches/version.rb +1 -1
- data/vendor/datasketches-cpp/CMakeLists.txt +7 -0
- data/vendor/datasketches-cpp/common/include/MurmurHash3.h +11 -7
- data/vendor/datasketches-cpp/common/include/binomial_bounds.hpp +8 -8
- data/vendor/datasketches-cpp/common/include/bounds_binomial_proportions.hpp +12 -15
- data/vendor/datasketches-cpp/common/include/common_defs.hpp +24 -0
- data/vendor/datasketches-cpp/common/include/conditional_forward.hpp +20 -8
- data/vendor/datasketches-cpp/common/include/count_zeros.hpp +2 -2
- data/vendor/datasketches-cpp/common/include/serde.hpp +7 -7
- data/vendor/datasketches-cpp/cpc/include/cpc_compressor.hpp +19 -19
- data/vendor/datasketches-cpp/cpc/include/cpc_compressor_impl.hpp +91 -89
- data/vendor/datasketches-cpp/cpc/include/cpc_sketch.hpp +14 -1
- data/vendor/datasketches-cpp/cpc/include/cpc_sketch_impl.hpp +121 -87
- data/vendor/datasketches-cpp/cpc/include/cpc_union_impl.hpp +14 -14
- data/vendor/datasketches-cpp/cpc/include/cpc_util.hpp +10 -10
- data/vendor/datasketches-cpp/cpc/include/icon_estimator.hpp +4 -4
- data/vendor/datasketches-cpp/cpc/include/u32_table.hpp +8 -8
- data/vendor/datasketches-cpp/cpc/include/u32_table_impl.hpp +14 -14
- data/vendor/datasketches-cpp/cpc/test/compression_test.cpp +10 -10
- data/vendor/datasketches-cpp/cpc/test/cpc_sketch_test.cpp +25 -0
- data/vendor/datasketches-cpp/cpc/test/cpc_union_test.cpp +1 -1
- data/vendor/datasketches-cpp/fi/include/frequent_items_sketch_impl.hpp +65 -80
- data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map_impl.hpp +10 -10
- data/vendor/datasketches-cpp/fi/test/reverse_purge_hash_map_test.cpp +2 -2
- data/vendor/datasketches-cpp/hll/include/AuxHashMap-internal.hpp +60 -63
- data/vendor/datasketches-cpp/hll/include/AuxHashMap.hpp +19 -19
- data/vendor/datasketches-cpp/hll/include/CompositeInterpolationXTable-internal.hpp +15 -15
- data/vendor/datasketches-cpp/hll/include/CompositeInterpolationXTable.hpp +3 -3
- data/vendor/datasketches-cpp/hll/include/CouponHashSet-internal.hpp +74 -76
- data/vendor/datasketches-cpp/hll/include/CouponHashSet.hpp +6 -6
- data/vendor/datasketches-cpp/hll/include/CouponList-internal.hpp +110 -113
- data/vendor/datasketches-cpp/hll/include/CouponList.hpp +13 -13
- data/vendor/datasketches-cpp/hll/include/CubicInterpolation-internal.hpp +2 -4
- data/vendor/datasketches-cpp/hll/include/HarmonicNumbers-internal.hpp +1 -1
- data/vendor/datasketches-cpp/hll/include/Hll4Array-internal.hpp +80 -76
- data/vendor/datasketches-cpp/hll/include/Hll4Array.hpp +9 -9
- data/vendor/datasketches-cpp/hll/include/Hll6Array-internal.hpp +26 -26
- data/vendor/datasketches-cpp/hll/include/Hll6Array.hpp +6 -6
- data/vendor/datasketches-cpp/hll/include/Hll8Array-internal.hpp +33 -33
- data/vendor/datasketches-cpp/hll/include/Hll8Array.hpp +6 -6
- data/vendor/datasketches-cpp/hll/include/HllArray-internal.hpp +205 -209
- data/vendor/datasketches-cpp/hll/include/HllArray.hpp +36 -36
- data/vendor/datasketches-cpp/hll/include/HllSketch-internal.hpp +28 -28
- data/vendor/datasketches-cpp/hll/include/HllSketchImpl-internal.hpp +22 -22
- data/vendor/datasketches-cpp/hll/include/HllSketchImpl.hpp +13 -13
- data/vendor/datasketches-cpp/hll/include/HllSketchImplFactory.hpp +15 -15
- data/vendor/datasketches-cpp/hll/include/HllUnion-internal.hpp +61 -61
- data/vendor/datasketches-cpp/hll/include/HllUtil.hpp +120 -127
- data/vendor/datasketches-cpp/hll/include/coupon_iterator-internal.hpp +9 -9
- data/vendor/datasketches-cpp/hll/include/coupon_iterator.hpp +5 -5
- data/vendor/datasketches-cpp/hll/include/hll.hpp +21 -21
- data/vendor/datasketches-cpp/hll/test/AuxHashMapTest.cpp +1 -1
- data/vendor/datasketches-cpp/hll/test/CouponHashSetTest.cpp +34 -34
- data/vendor/datasketches-cpp/hll/test/CouponListTest.cpp +25 -25
- data/vendor/datasketches-cpp/hll/test/CrossCountingTest.cpp +2 -2
- data/vendor/datasketches-cpp/hll/test/HllArrayTest.cpp +35 -35
- data/vendor/datasketches-cpp/hll/test/HllSketchTest.cpp +15 -15
- data/vendor/datasketches-cpp/hll/test/HllUnionTest.cpp +10 -14
- data/vendor/datasketches-cpp/hll/test/IsomorphicTest.cpp +3 -3
- data/vendor/datasketches-cpp/hll/test/ToFromByteArrayTest.cpp +4 -4
- data/vendor/datasketches-cpp/kll/include/kll_helper.hpp +5 -4
- data/vendor/datasketches-cpp/kll/include/kll_helper_impl.hpp +6 -6
- data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator.hpp +14 -6
- data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator_impl.hpp +39 -24
- data/vendor/datasketches-cpp/kll/include/kll_sketch.hpp +34 -2
- data/vendor/datasketches-cpp/kll/include/kll_sketch_impl.hpp +72 -62
- data/vendor/datasketches-cpp/kll/include/kolmogorov_smirnov.hpp +67 -0
- data/vendor/datasketches-cpp/kll/include/kolmogorov_smirnov_impl.hpp +78 -0
- data/vendor/datasketches-cpp/kll/test/CMakeLists.txt +1 -0
- data/vendor/datasketches-cpp/kll/test/kll_sketch_test.cpp +68 -45
- data/vendor/datasketches-cpp/kll/test/kolmogorov_smirnov_test.cpp +111 -0
- data/vendor/datasketches-cpp/python/src/hll_wrapper.cpp +4 -4
- data/vendor/datasketches-cpp/python/src/theta_wrapper.cpp +6 -6
- data/vendor/datasketches-cpp/python/src/vo_wrapper.cpp +2 -2
- data/vendor/datasketches-cpp/python/tests/hll_test.py +1 -1
- data/vendor/datasketches-cpp/python/tests/vo_test.py +3 -3
- data/vendor/datasketches-cpp/req/include/req_common.hpp +2 -1
- data/vendor/datasketches-cpp/req/include/req_compactor.hpp +4 -4
- data/vendor/datasketches-cpp/req/include/req_compactor_impl.hpp +26 -39
- data/vendor/datasketches-cpp/req/include/req_sketch.hpp +1 -1
- data/vendor/datasketches-cpp/req/include/req_sketch_impl.hpp +9 -9
- data/vendor/datasketches-cpp/req/test/req_sketch_test.cpp +52 -52
- data/vendor/datasketches-cpp/sampling/include/var_opt_sketch_impl.hpp +47 -56
- data/vendor/datasketches-cpp/sampling/include/var_opt_union_impl.hpp +34 -42
- data/vendor/datasketches-cpp/sampling/test/var_opt_sketch_test.cpp +6 -6
- data/vendor/datasketches-cpp/sampling/test/var_opt_union_test.cpp +13 -13
- data/vendor/datasketches-cpp/setup.py +1 -1
- data/vendor/datasketches-cpp/theta/include/bounds_on_ratios_in_sampled_sets.hpp +1 -1
- data/vendor/datasketches-cpp/theta/include/compact_theta_sketch_parser.hpp +67 -0
- data/vendor/datasketches-cpp/theta/include/compact_theta_sketch_parser_impl.hpp +70 -0
- data/vendor/datasketches-cpp/theta/include/theta_intersection.hpp +9 -4
- data/vendor/datasketches-cpp/theta/include/theta_intersection_impl.hpp +1 -1
- data/vendor/datasketches-cpp/theta/include/theta_jaccard_similarity_base.hpp +18 -14
- data/vendor/datasketches-cpp/theta/include/theta_sketch.hpp +42 -1
- data/vendor/datasketches-cpp/theta/include/theta_sketch_impl.hpp +107 -58
- data/vendor/datasketches-cpp/theta/include/theta_union.hpp +4 -4
- data/vendor/datasketches-cpp/theta/include/theta_union_base_impl.hpp +1 -1
- data/vendor/datasketches-cpp/theta/include/theta_union_impl.hpp +1 -1
- data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base.hpp +2 -0
- data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base_impl.hpp +33 -28
- data/vendor/datasketches-cpp/theta/test/theta_a_not_b_test.cpp +23 -1
- data/vendor/datasketches-cpp/theta/test/theta_intersection_test.cpp +21 -1
- data/vendor/datasketches-cpp/theta/test/theta_jaccard_similarity_test.cpp +58 -2
- data/vendor/datasketches-cpp/theta/test/theta_sketch_test.cpp +37 -1
- data/vendor/datasketches-cpp/theta/test/theta_union_test.cpp +22 -2
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_sketch_impl.hpp +47 -60
- data/vendor/datasketches-cpp/tuple/include/tuple_sketch_impl.hpp +51 -64
- data/vendor/datasketches-cpp/tuple/test/array_of_doubles_sketch_test.cpp +1 -1
- data/vendor/datasketches-cpp/tuple/test/tuple_a_not_b_test.cpp +17 -17
- data/vendor/datasketches-cpp/tuple/test/tuple_intersection_test.cpp +12 -12
- data/vendor/datasketches-cpp/tuple/test/tuple_jaccard_similarity_test.cpp +5 -5
- data/vendor/datasketches-cpp/tuple/test/tuple_sketch_allocation_test.cpp +1 -1
- data/vendor/datasketches-cpp/tuple/test/tuple_sketch_test.cpp +20 -20
- data/vendor/datasketches-cpp/tuple/test/tuple_union_test.cpp +12 -12
- metadata +8 -3
@@ -315,7 +315,7 @@ uint64_t compact_tuple_sketch<S, A>::get_theta64() const {
|
|
315
315
|
|
316
316
|
template<typename S, typename A>
|
317
317
|
uint32_t compact_tuple_sketch<S, A>::get_num_retained() const {
|
318
|
-
return entries_.size();
|
318
|
+
return static_cast<uint32_t>(entries_.size());
|
319
319
|
}
|
320
320
|
|
321
321
|
template<typename S, typename A>
|
@@ -347,36 +347,36 @@ template<typename SerDe>
|
|
347
347
|
void compact_tuple_sketch<S, A>::serialize(std::ostream& os, const SerDe& sd) const {
|
348
348
|
const bool is_single_item = entries_.size() == 1 && !this->is_estimation_mode();
|
349
349
|
const uint8_t preamble_longs = this->is_empty() || is_single_item ? 1 : this->is_estimation_mode() ? 3 : 2;
|
350
|
-
|
350
|
+
write(os, preamble_longs);
|
351
351
|
const uint8_t serial_version = SERIAL_VERSION;
|
352
|
-
|
352
|
+
write(os, serial_version);
|
353
353
|
const uint8_t family = SKETCH_FAMILY;
|
354
|
-
|
354
|
+
write(os, family);
|
355
355
|
const uint8_t type = SKETCH_TYPE;
|
356
|
-
|
356
|
+
write(os, type);
|
357
357
|
const uint8_t unused8 = 0;
|
358
|
-
|
358
|
+
write(os, unused8);
|
359
359
|
const uint8_t flags_byte(
|
360
360
|
(1 << flags::IS_COMPACT) |
|
361
361
|
(1 << flags::IS_READ_ONLY) |
|
362
362
|
(this->is_empty() ? 1 << flags::IS_EMPTY : 0) |
|
363
363
|
(this->is_ordered() ? 1 << flags::IS_ORDERED : 0)
|
364
364
|
);
|
365
|
-
|
365
|
+
write(os, flags_byte);
|
366
366
|
const uint16_t seed_hash = get_seed_hash();
|
367
|
-
|
367
|
+
write(os, seed_hash);
|
368
368
|
if (!this->is_empty()) {
|
369
369
|
if (!is_single_item) {
|
370
|
-
const uint32_t num_entries = entries_.size();
|
371
|
-
|
370
|
+
const uint32_t num_entries = static_cast<uint32_t>(entries_.size());
|
371
|
+
write(os, num_entries);
|
372
372
|
const uint32_t unused32 = 0;
|
373
|
-
|
373
|
+
write(os, unused32);
|
374
374
|
if (this->is_estimation_mode()) {
|
375
|
-
|
375
|
+
write(os, this->theta_);
|
376
376
|
}
|
377
377
|
}
|
378
378
|
for (const auto& it: entries_) {
|
379
|
-
|
379
|
+
write(os, it.first);
|
380
380
|
sd.serialize(os, &it.second, 1);
|
381
381
|
}
|
382
382
|
}
|
@@ -393,36 +393,34 @@ auto compact_tuple_sketch<S, A>::serialize(unsigned header_size_bytes, const Ser
|
|
393
393
|
uint8_t* ptr = bytes.data() + header_size_bytes;
|
394
394
|
const uint8_t* end_ptr = ptr + size;
|
395
395
|
|
396
|
-
ptr += copy_to_mem(
|
396
|
+
ptr += copy_to_mem(preamble_longs, ptr);
|
397
397
|
const uint8_t serial_version = SERIAL_VERSION;
|
398
|
-
ptr += copy_to_mem(
|
398
|
+
ptr += copy_to_mem(serial_version, ptr);
|
399
399
|
const uint8_t family = SKETCH_FAMILY;
|
400
|
-
ptr += copy_to_mem(
|
400
|
+
ptr += copy_to_mem(family, ptr);
|
401
401
|
const uint8_t type = SKETCH_TYPE;
|
402
|
-
ptr += copy_to_mem(
|
403
|
-
|
404
|
-
ptr += copy_to_mem(&unused8, ptr, sizeof(unused8));
|
402
|
+
ptr += copy_to_mem(type, ptr);
|
403
|
+
ptr += sizeof(uint8_t); // unused
|
405
404
|
const uint8_t flags_byte(
|
406
405
|
(1 << flags::IS_COMPACT) |
|
407
406
|
(1 << flags::IS_READ_ONLY) |
|
408
407
|
(this->is_empty() ? 1 << flags::IS_EMPTY : 0) |
|
409
408
|
(this->is_ordered() ? 1 << flags::IS_ORDERED : 0)
|
410
409
|
);
|
411
|
-
ptr += copy_to_mem(
|
410
|
+
ptr += copy_to_mem(flags_byte, ptr);
|
412
411
|
const uint16_t seed_hash = get_seed_hash();
|
413
|
-
ptr += copy_to_mem(
|
412
|
+
ptr += copy_to_mem(seed_hash, ptr);
|
414
413
|
if (!this->is_empty()) {
|
415
414
|
if (!is_single_item) {
|
416
|
-
const uint32_t num_entries = entries_.size();
|
417
|
-
ptr += copy_to_mem(
|
418
|
-
|
419
|
-
ptr += copy_to_mem(&unused32, ptr, sizeof(unused32));
|
415
|
+
const uint32_t num_entries = static_cast<uint32_t>(entries_.size());
|
416
|
+
ptr += copy_to_mem(num_entries, ptr);
|
417
|
+
ptr += sizeof(uint32_t); // unused
|
420
418
|
if (this->is_estimation_mode()) {
|
421
|
-
ptr += copy_to_mem(
|
419
|
+
ptr += copy_to_mem(theta_, ptr);
|
422
420
|
}
|
423
421
|
}
|
424
422
|
for (const auto& it: entries_) {
|
425
|
-
ptr += copy_to_mem(
|
423
|
+
ptr += copy_to_mem(it.first, ptr);
|
426
424
|
ptr += sd.serialize(ptr, end_ptr - ptr, &it.second, 1);
|
427
425
|
}
|
428
426
|
}
|
@@ -432,20 +430,13 @@ auto compact_tuple_sketch<S, A>::serialize(unsigned header_size_bytes, const Ser
|
|
432
430
|
template<typename S, typename A>
|
433
431
|
template<typename SerDe>
|
434
432
|
compact_tuple_sketch<S, A> compact_tuple_sketch<S, A>::deserialize(std::istream& is, uint64_t seed, const SerDe& sd, const A& allocator) {
|
435
|
-
|
436
|
-
|
437
|
-
uint8_t
|
438
|
-
|
439
|
-
uint8_t
|
440
|
-
|
441
|
-
|
442
|
-
is.read(reinterpret_cast<char*>(&type), sizeof(type));
|
443
|
-
uint8_t unused8;
|
444
|
-
is.read(reinterpret_cast<char*>(&unused8), sizeof(unused8));
|
445
|
-
uint8_t flags_byte;
|
446
|
-
is.read(reinterpret_cast<char*>(&flags_byte), sizeof(flags_byte));
|
447
|
-
uint16_t seed_hash;
|
448
|
-
is.read(reinterpret_cast<char*>(&seed_hash), sizeof(seed_hash));
|
433
|
+
const auto preamble_longs = read<uint8_t>(is);
|
434
|
+
const auto serial_version = read<uint8_t>(is);
|
435
|
+
const auto family = read<uint8_t>(is);
|
436
|
+
const auto type = read<uint8_t>(is);
|
437
|
+
read<uint8_t>(is); // unused
|
438
|
+
const auto flags_byte = read<uint8_t>(is);
|
439
|
+
const auto seed_hash = read<uint16_t>(is);
|
449
440
|
checker<true>::check_serial_version(serial_version, SERIAL_VERSION);
|
450
441
|
checker<true>::check_sketch_family(family, SKETCH_FAMILY);
|
451
442
|
checker<true>::check_sketch_type(type, SKETCH_TYPE);
|
@@ -458,11 +449,10 @@ compact_tuple_sketch<S, A> compact_tuple_sketch<S, A>::deserialize(std::istream&
|
|
458
449
|
if (preamble_longs == 1) {
|
459
450
|
num_entries = 1;
|
460
451
|
} else {
|
461
|
-
|
462
|
-
uint32_t
|
463
|
-
is.read(reinterpret_cast<char*>(&unused32), sizeof(unused32));
|
452
|
+
num_entries = read<uint32_t>(is);
|
453
|
+
read<uint32_t>(is); // unused
|
464
454
|
if (preamble_longs > 2) {
|
465
|
-
|
455
|
+
theta = read<uint64_t>(is);
|
466
456
|
}
|
467
457
|
}
|
468
458
|
}
|
@@ -472,8 +462,7 @@ compact_tuple_sketch<S, A> compact_tuple_sketch<S, A>::deserialize(std::istream&
|
|
472
462
|
entries.reserve(num_entries);
|
473
463
|
std::unique_ptr<S, deleter_of_summaries> summary(alloc.allocate(1), deleter_of_summaries(1, false, allocator));
|
474
464
|
for (size_t i = 0; i < num_entries; ++i) {
|
475
|
-
|
476
|
-
is.read(reinterpret_cast<char*>(&key), sizeof(uint64_t));
|
465
|
+
const auto key = read<uint64_t>(is);
|
477
466
|
sd.deserialize(is, summary.get(), 1);
|
478
467
|
entries.push_back(Entry(key, std::move(*summary)));
|
479
468
|
(*summary).~S();
|
@@ -491,19 +480,18 @@ compact_tuple_sketch<S, A> compact_tuple_sketch<S, A>::deserialize(const void* b
|
|
491
480
|
const char* ptr = static_cast<const char*>(bytes);
|
492
481
|
const char* base = ptr;
|
493
482
|
uint8_t preamble_longs;
|
494
|
-
ptr += copy_from_mem(ptr,
|
483
|
+
ptr += copy_from_mem(ptr, preamble_longs);
|
495
484
|
uint8_t serial_version;
|
496
|
-
ptr += copy_from_mem(ptr,
|
485
|
+
ptr += copy_from_mem(ptr, serial_version);
|
497
486
|
uint8_t family;
|
498
|
-
ptr += copy_from_mem(ptr,
|
487
|
+
ptr += copy_from_mem(ptr, family);
|
499
488
|
uint8_t type;
|
500
|
-
ptr += copy_from_mem(ptr,
|
501
|
-
uint8_t
|
502
|
-
ptr += copy_from_mem(ptr, &unused8, sizeof(unused8));
|
489
|
+
ptr += copy_from_mem(ptr, type);
|
490
|
+
ptr += sizeof(uint8_t); // unused
|
503
491
|
uint8_t flags_byte;
|
504
|
-
ptr += copy_from_mem(ptr,
|
492
|
+
ptr += copy_from_mem(ptr, flags_byte);
|
505
493
|
uint16_t seed_hash;
|
506
|
-
ptr += copy_from_mem(ptr,
|
494
|
+
ptr += copy_from_mem(ptr, seed_hash);
|
507
495
|
checker<true>::check_serial_version(serial_version, SERIAL_VERSION);
|
508
496
|
checker<true>::check_sketch_family(family, SKETCH_FAMILY);
|
509
497
|
checker<true>::check_sketch_type(type, SKETCH_TYPE);
|
@@ -518,12 +506,11 @@ compact_tuple_sketch<S, A> compact_tuple_sketch<S, A>::deserialize(const void* b
|
|
518
506
|
num_entries = 1;
|
519
507
|
} else {
|
520
508
|
ensure_minimum_memory(size, 8); // read the first prelong before this method
|
521
|
-
ptr += copy_from_mem(ptr,
|
522
|
-
uint32_t
|
523
|
-
ptr += copy_from_mem(ptr, &unused32, sizeof(unused32));
|
509
|
+
ptr += copy_from_mem(ptr, num_entries);
|
510
|
+
ptr += sizeof(uint32_t); // unused
|
524
511
|
if (preamble_longs > 2) {
|
525
512
|
ensure_minimum_memory(size, (preamble_longs - 1) << 3);
|
526
|
-
ptr += copy_from_mem(ptr,
|
513
|
+
ptr += copy_from_mem(ptr, theta);
|
527
514
|
}
|
528
515
|
}
|
529
516
|
}
|
@@ -536,7 +523,7 @@ compact_tuple_sketch<S, A> compact_tuple_sketch<S, A>::deserialize(const void* b
|
|
536
523
|
std::unique_ptr<S, deleter_of_summaries> summary(alloc.allocate(1), deleter_of_summaries(1, false, allocator));
|
537
524
|
for (size_t i = 0; i < num_entries; ++i) {
|
538
525
|
uint64_t key;
|
539
|
-
ptr += copy_from_mem(ptr,
|
526
|
+
ptr += copy_from_mem(ptr, key);
|
540
527
|
ptr += sd.deserialize(ptr, base + size - ptr, summary.get(), 1);
|
541
528
|
entries.push_back(Entry(key, std::move(*summary)));
|
542
529
|
(*summary).~S();
|
@@ -548,22 +535,22 @@ compact_tuple_sketch<S, A> compact_tuple_sketch<S, A>::deserialize(const void* b
|
|
548
535
|
|
549
536
|
template<typename S, typename A>
|
550
537
|
auto compact_tuple_sketch<S, A>::begin() -> iterator {
|
551
|
-
return iterator(entries_.data(), entries_.size(), 0);
|
538
|
+
return iterator(entries_.data(), static_cast<uint32_t>(entries_.size()), 0);
|
552
539
|
}
|
553
540
|
|
554
541
|
template<typename S, typename A>
|
555
542
|
auto compact_tuple_sketch<S, A>::end() -> iterator {
|
556
|
-
return iterator(nullptr, 0, entries_.size());
|
543
|
+
return iterator(nullptr, 0, static_cast<uint32_t>(entries_.size()));
|
557
544
|
}
|
558
545
|
|
559
546
|
template<typename S, typename A>
|
560
547
|
auto compact_tuple_sketch<S, A>::begin() const -> const_iterator {
|
561
|
-
return const_iterator(entries_.data(), entries_.size(), 0);
|
548
|
+
return const_iterator(entries_.data(), static_cast<uint32_t>(entries_.size()), 0);
|
562
549
|
}
|
563
550
|
|
564
551
|
template<typename S, typename A>
|
565
552
|
auto compact_tuple_sketch<S, A>::end() const -> const_iterator {
|
566
|
-
return const_iterator(nullptr, 0, entries_.size());
|
553
|
+
return const_iterator(nullptr, 0, static_cast<uint32_t>(entries_.size()));
|
567
554
|
}
|
568
555
|
|
569
556
|
template<typename S, typename A>
|
@@ -75,7 +75,7 @@ TEST_CASE("aod sketch: serialization compatibility with java - empty configured
|
|
75
75
|
}
|
76
76
|
|
77
77
|
TEST_CASE("aod sketch: serialization compatibility with java - non-empty no entries", "[tuple_sketch]") {
|
78
|
-
auto update_sketch = update_array_of_doubles_sketch::builder().set_p(0.
|
78
|
+
auto update_sketch = update_array_of_doubles_sketch::builder().set_p(0.01f).build();
|
79
79
|
std::vector<double> a = {1};
|
80
80
|
update_sketch.update(1, a);
|
81
81
|
REQUIRE_FALSE(update_sketch.is_empty());
|
@@ -38,8 +38,8 @@ TEST_CASE("tuple a-not-b: empty", "[tuple_a_not_b]") {
|
|
38
38
|
|
39
39
|
TEST_CASE("tuple a-not-b: non empty no retained keys", "[tuple_a_not_b]") {
|
40
40
|
auto a = update_tuple_sketch<float>::builder().build();
|
41
|
-
a.update(1, 1);
|
42
|
-
auto b = update_tuple_sketch<float>::builder().set_p(0.
|
41
|
+
a.update(1, 1.0f);
|
42
|
+
auto b = update_tuple_sketch<float>::builder().set_p(0.001f).build();
|
43
43
|
tuple_a_not_b<float> a_not_b;
|
44
44
|
|
45
45
|
// B is still empty
|
@@ -51,7 +51,7 @@ TEST_CASE("tuple a-not-b: non empty no retained keys", "[tuple_a_not_b]") {
|
|
51
51
|
REQUIRE(result.get_estimate() == 1.0);
|
52
52
|
|
53
53
|
// B is not empty in estimation mode and no entries
|
54
|
-
b.update(1, 1);
|
54
|
+
b.update(1, 1.0f);
|
55
55
|
REQUIRE(b.get_num_retained() == 0);
|
56
56
|
|
57
57
|
result = a_not_b.compute(a, b);
|
@@ -65,11 +65,11 @@ TEST_CASE("tuple a-not-b: non empty no retained keys", "[tuple_a_not_b]") {
|
|
65
65
|
TEST_CASE("tuple a-not-b: exact mode half overlap", "[tuple_a_not_b]") {
|
66
66
|
auto a = update_tuple_sketch<float>::builder().build();
|
67
67
|
int value = 0;
|
68
|
-
for (int i = 0; i < 1000; i++) a.update(value++, 1);
|
68
|
+
for (int i = 0; i < 1000; i++) a.update(value++, 1.0f);
|
69
69
|
|
70
70
|
auto b = update_tuple_sketch<float>::builder().build();
|
71
71
|
value = 500;
|
72
|
-
for (int i = 0; i < 1000; i++) b.update(value++, 1);
|
72
|
+
for (int i = 0; i < 1000; i++) b.update(value++, 1.0f);
|
73
73
|
|
74
74
|
tuple_a_not_b<float> a_not_b;
|
75
75
|
|
@@ -105,7 +105,7 @@ TEST_CASE("tuple a-not-b: exact mode half overlap", "[tuple_a_not_b]") {
|
|
105
105
|
TEST_CASE("mixed a-not-b: exact mode half overlap", "[tuple_a_not_b]") {
|
106
106
|
auto a = update_tuple_sketch<float>::builder().build();
|
107
107
|
int value = 0;
|
108
|
-
for (int i = 0; i < 1000; i++) a.update(value++, 1);
|
108
|
+
for (int i = 0; i < 1000; i++) a.update(value++, 1.0f);
|
109
109
|
|
110
110
|
auto b = update_theta_sketch::builder().build();
|
111
111
|
value = 500;
|
@@ -145,10 +145,10 @@ TEST_CASE("mixed a-not-b: exact mode half overlap", "[tuple_a_not_b]") {
|
|
145
145
|
TEST_CASE("tuple a-not-b: exact mode disjoint", "[tuple_a_not_b]") {
|
146
146
|
auto a = update_tuple_sketch<float>::builder().build();
|
147
147
|
int value = 0;
|
148
|
-
for (int i = 0; i < 1000; i++) a.update(value++, 1);
|
148
|
+
for (int i = 0; i < 1000; i++) a.update(value++, 1.0f);
|
149
149
|
|
150
150
|
auto b = update_tuple_sketch<float>::builder().build();
|
151
|
-
for (int i = 0; i < 1000; i++) b.update(value++, 1);
|
151
|
+
for (int i = 0; i < 1000; i++) b.update(value++, 1.0f);
|
152
152
|
|
153
153
|
tuple_a_not_b<float> a_not_b;
|
154
154
|
|
@@ -168,7 +168,7 @@ TEST_CASE("tuple a-not-b: exact mode disjoint", "[tuple_a_not_b]") {
|
|
168
168
|
TEST_CASE("tuple a-not-b: exact mode full overlap", "[tuple_a_not_b]") {
|
169
169
|
auto sketch = update_tuple_sketch<float>::builder().build();
|
170
170
|
int value = 0;
|
171
|
-
for (int i = 0; i < 1000; i++) sketch.update(value++, 1);
|
171
|
+
for (int i = 0; i < 1000; i++) sketch.update(value++, 1.0f);
|
172
172
|
|
173
173
|
tuple_a_not_b<float> a_not_b;
|
174
174
|
|
@@ -188,11 +188,11 @@ TEST_CASE("tuple a-not-b: exact mode full overlap", "[tuple_a_not_b]") {
|
|
188
188
|
TEST_CASE("tuple a-not-b: estimation mode half overlap", "[tuple_a_not_b]") {
|
189
189
|
auto a = update_tuple_sketch<float>::builder().build();
|
190
190
|
int value = 0;
|
191
|
-
for (int i = 0; i < 10000; i++) a.update(value++, 1);
|
191
|
+
for (int i = 0; i < 10000; i++) a.update(value++, 1.0f);
|
192
192
|
|
193
193
|
auto b = update_tuple_sketch<float>::builder().build();
|
194
194
|
value = 5000;
|
195
|
-
for (int i = 0; i < 10000; i++) b.update(value++, 1);
|
195
|
+
for (int i = 0; i < 10000; i++) b.update(value++, 1.0f);
|
196
196
|
|
197
197
|
tuple_a_not_b<float> a_not_b;
|
198
198
|
|
@@ -212,10 +212,10 @@ TEST_CASE("tuple a-not-b: estimation mode half overlap", "[tuple_a_not_b]") {
|
|
212
212
|
TEST_CASE("tuple a-not-b: estimation mode disjoint", "[tuple_a_not_b]") {
|
213
213
|
auto a = update_tuple_sketch<float>::builder().build();
|
214
214
|
int value = 0;
|
215
|
-
for (int i = 0; i < 10000; i++) a.update(value++, 1);
|
215
|
+
for (int i = 0; i < 10000; i++) a.update(value++, 1.0f);
|
216
216
|
|
217
217
|
auto b = update_tuple_sketch<float>::builder().build();
|
218
|
-
for (int i = 0; i < 10000; i++) b.update(value++, 1);
|
218
|
+
for (int i = 0; i < 10000; i++) b.update(value++, 1.0f);
|
219
219
|
|
220
220
|
tuple_a_not_b<float> a_not_b;
|
221
221
|
|
@@ -235,7 +235,7 @@ TEST_CASE("tuple a-not-b: estimation mode disjoint", "[tuple_a_not_b]") {
|
|
235
235
|
TEST_CASE("tuple a-not-b: estimation mode full overlap", "[tuple_a_not_b]") {
|
236
236
|
auto sketch = update_tuple_sketch<float>::builder().build();
|
237
237
|
int value = 0;
|
238
|
-
for (int i = 0; i < 10000; i++) sketch.update(value++, 1);
|
238
|
+
for (int i = 0; i < 10000; i++) sketch.update(value++, 1.0f);
|
239
239
|
|
240
240
|
tuple_a_not_b<float> a_not_b;
|
241
241
|
|
@@ -254,7 +254,7 @@ TEST_CASE("tuple a-not-b: estimation mode full overlap", "[tuple_a_not_b]") {
|
|
254
254
|
|
255
255
|
TEST_CASE("tuple a-not-b: seed mismatch", "[tuple_a_not_b]") {
|
256
256
|
auto sketch = update_tuple_sketch<float>::builder().build();
|
257
|
-
sketch.update(1, 1); // non-empty should not be ignored
|
257
|
+
sketch.update(1, 1.0f); // non-empty should not be ignored
|
258
258
|
tuple_a_not_b<float> a_not_b(123);
|
259
259
|
REQUIRE_THROWS_AS(a_not_b.compute(sketch, sketch), std::invalid_argument);
|
260
260
|
}
|
@@ -262,11 +262,11 @@ TEST_CASE("tuple a-not-b: seed mismatch", "[tuple_a_not_b]") {
|
|
262
262
|
TEST_CASE("tuple a-not-b: issue #152", "[tuple_a_not_b]") {
|
263
263
|
auto a = update_tuple_sketch<float>::builder().build();
|
264
264
|
int value = 0;
|
265
|
-
for (int i = 0; i < 10000; i++) a.update(value++, 1);
|
265
|
+
for (int i = 0; i < 10000; i++) a.update(value++, 1.0f);
|
266
266
|
|
267
267
|
auto b = update_tuple_sketch<float>::builder().build();
|
268
268
|
value = 5000;
|
269
|
-
for (int i = 0; i < 25000; i++) b.update(value++, 1);
|
269
|
+
for (int i = 0; i < 25000; i++) b.update(value++, 1.0f);
|
270
270
|
|
271
271
|
tuple_a_not_b<float> a_not_b;
|
272
272
|
|
@@ -59,8 +59,8 @@ TEST_CASE("tuple intersection: empty", "[tuple_intersection]") {
|
|
59
59
|
}
|
60
60
|
|
61
61
|
TEST_CASE("tuple intersection: non empty no retained keys", "[tuple_intersection]") {
|
62
|
-
auto sketch = update_tuple_sketch<float>::builder().set_p(0.
|
63
|
-
sketch.update(1, 1);
|
62
|
+
auto sketch = update_tuple_sketch<float>::builder().set_p(0.001f).build();
|
63
|
+
sketch.update(1, 1.0f);
|
64
64
|
tuple_intersection_float intersection;
|
65
65
|
intersection.update(sketch);
|
66
66
|
auto result = intersection.get_result();
|
@@ -82,11 +82,11 @@ TEST_CASE("tuple intersection: non empty no retained keys", "[tuple_intersection
|
|
82
82
|
TEST_CASE("tuple intersection: exact mode half overlap", "[tuple_intersection]") {
|
83
83
|
auto sketch1 = update_tuple_sketch<float>::builder().build();
|
84
84
|
int value = 0;
|
85
|
-
for (int i = 0; i < 1000; i++) sketch1.update(value++, 1);
|
85
|
+
for (int i = 0; i < 1000; i++) sketch1.update(value++, 1.0f);
|
86
86
|
|
87
87
|
auto sketch2 = update_tuple_sketch<float>::builder().build();
|
88
88
|
value = 500;
|
89
|
-
for (int i = 0; i < 1000; i++) sketch2.update(value++, 1);
|
89
|
+
for (int i = 0; i < 1000; i++) sketch2.update(value++, 1.0f);
|
90
90
|
|
91
91
|
{ // unordered
|
92
92
|
tuple_intersection_float intersection;
|
@@ -111,10 +111,10 @@ TEST_CASE("tuple intersection: exact mode half overlap", "[tuple_intersection]")
|
|
111
111
|
TEST_CASE("tuple intersection: exact mode disjoint", "[tuple_intersection]") {
|
112
112
|
auto sketch1 = update_tuple_sketch<float>::builder().build();
|
113
113
|
int value = 0;
|
114
|
-
for (int i = 0; i < 1000; i++) sketch1.update(value++, 1);
|
114
|
+
for (int i = 0; i < 1000; i++) sketch1.update(value++, 1.0f);
|
115
115
|
|
116
116
|
auto sketch2 = update_tuple_sketch<float>::builder().build();
|
117
|
-
for (int i = 0; i < 1000; i++) sketch2.update(value++, 1);
|
117
|
+
for (int i = 0; i < 1000; i++) sketch2.update(value++, 1.0f);
|
118
118
|
|
119
119
|
{ // unordered
|
120
120
|
tuple_intersection_float intersection;
|
@@ -139,7 +139,7 @@ TEST_CASE("tuple intersection: exact mode disjoint", "[tuple_intersection]") {
|
|
139
139
|
TEST_CASE("mixed intersection: exact mode half overlap", "[tuple_intersection]") {
|
140
140
|
auto sketch1 = update_tuple_sketch<float>::builder().build();
|
141
141
|
int value = 0;
|
142
|
-
for (int i = 0; i < 1000; i++) sketch1.update(value++, 1);
|
142
|
+
for (int i = 0; i < 1000; i++) sketch1.update(value++, 1.0f);
|
143
143
|
|
144
144
|
auto sketch2 = update_theta_sketch::builder().build();
|
145
145
|
value = 500;
|
@@ -168,11 +168,11 @@ TEST_CASE("mixed intersection: exact mode half overlap", "[tuple_intersection]")
|
|
168
168
|
TEST_CASE("tuple intersection: estimation mode half overlap", "[tuple_intersection]") {
|
169
169
|
auto sketch1 = update_tuple_sketch<float>::builder().build();
|
170
170
|
int value = 0;
|
171
|
-
for (int i = 0; i < 10000; i++) sketch1.update(value++, 1);
|
171
|
+
for (int i = 0; i < 10000; i++) sketch1.update(value++, 1.0f);
|
172
172
|
|
173
173
|
auto sketch2 = update_tuple_sketch<float>::builder().build();
|
174
174
|
value = 5000;
|
175
|
-
for (int i = 0; i < 10000; i++) sketch2.update(value++, 1);
|
175
|
+
for (int i = 0; i < 10000; i++) sketch2.update(value++, 1.0f);
|
176
176
|
|
177
177
|
{ // unordered
|
178
178
|
tuple_intersection_float intersection;
|
@@ -197,10 +197,10 @@ TEST_CASE("tuple intersection: estimation mode half overlap", "[tuple_intersecti
|
|
197
197
|
TEST_CASE("tuple intersection: estimation mode disjoint", "[tuple_intersection]") {
|
198
198
|
auto sketch1 = update_tuple_sketch<float>::builder().build();
|
199
199
|
int value = 0;
|
200
|
-
for (int i = 0; i < 10000; i++) sketch1.update(value++, 1);
|
200
|
+
for (int i = 0; i < 10000; i++) sketch1.update(value++, 1.0f);
|
201
201
|
|
202
202
|
auto sketch2 = update_tuple_sketch<float>::builder().build();
|
203
|
-
for (int i = 0; i < 10000; i++) sketch2.update(value++, 1);
|
203
|
+
for (int i = 0; i < 10000; i++) sketch2.update(value++, 1.0f);
|
204
204
|
|
205
205
|
{ // unordered
|
206
206
|
tuple_intersection_float intersection;
|
@@ -224,7 +224,7 @@ TEST_CASE("tuple intersection: estimation mode disjoint", "[tuple_intersection]"
|
|
224
224
|
|
225
225
|
TEST_CASE("tuple intersection: seed mismatch", "[tuple_intersection]") {
|
226
226
|
auto sketch = update_tuple_sketch<float>::builder().build();
|
227
|
-
sketch.update(1, 1); // non-empty should not be ignored
|
227
|
+
sketch.update(1, 1.0f); // non-empty should not be ignored
|
228
228
|
tuple_intersection_float intersection(123);
|
229
229
|
REQUIRE_THROWS_AS(intersection.update(sketch), std::invalid_argument);
|
230
230
|
}
|
@@ -44,7 +44,7 @@ TEST_CASE("tuple jaccard: empty", "[tuple_sketch]") {
|
|
44
44
|
|
45
45
|
TEST_CASE("tuple jaccard: same sketch exact mode", "[tuple_sketch]") {
|
46
46
|
auto sk = update_tuple_sketch<float>::builder().build();
|
47
|
-
for (int i = 0; i < 1000; ++i) sk.update(i, 1);
|
47
|
+
for (int i = 0; i < 1000; ++i) sk.update(i, 1.0f);
|
48
48
|
|
49
49
|
// update sketch
|
50
50
|
auto jc = tuple_jaccard_similarity_float::jaccard(sk, sk);
|
@@ -61,8 +61,8 @@ TEST_CASE("tuple jaccard: full overlap exact mode", "[tuple_sketch]") {
|
|
61
61
|
auto sk_a = update_tuple_sketch<float>::builder().build();
|
62
62
|
auto sk_b = update_tuple_sketch<float>::builder().build();
|
63
63
|
for (int i = 0; i < 1000; ++i) {
|
64
|
-
sk_a.update(i, 1);
|
65
|
-
sk_b.update(i, 1);
|
64
|
+
sk_a.update(i, 1.0f);
|
65
|
+
sk_b.update(i, 1.0f);
|
66
66
|
}
|
67
67
|
|
68
68
|
// update sketches
|
@@ -83,8 +83,8 @@ TEST_CASE("tuple jaccard: disjoint exact mode", "[tuple_sketch]") {
|
|
83
83
|
auto sk_a = update_tuple_sketch<float>::builder().build();
|
84
84
|
auto sk_b = update_tuple_sketch<float>::builder().build();
|
85
85
|
for (int i = 0; i < 1000; ++i) {
|
86
|
-
sk_a.update(i, 1);
|
87
|
-
sk_b.update(i + 1000, 1);
|
86
|
+
sk_a.update(i, 1.0f);
|
87
|
+
sk_b.update(i + 1000, 1.0f);
|
88
88
|
}
|
89
89
|
|
90
90
|
// update sketches
|