datasketches 0.2.1 → 0.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +4 -0
- data/lib/datasketches/version.rb +1 -1
- data/vendor/datasketches-cpp/CMakeLists.txt +7 -0
- data/vendor/datasketches-cpp/common/include/MurmurHash3.h +11 -7
- data/vendor/datasketches-cpp/common/include/binomial_bounds.hpp +8 -8
- data/vendor/datasketches-cpp/common/include/bounds_binomial_proportions.hpp +12 -15
- data/vendor/datasketches-cpp/common/include/common_defs.hpp +24 -0
- data/vendor/datasketches-cpp/common/include/conditional_forward.hpp +20 -8
- data/vendor/datasketches-cpp/common/include/count_zeros.hpp +2 -2
- data/vendor/datasketches-cpp/common/include/serde.hpp +7 -7
- data/vendor/datasketches-cpp/cpc/include/cpc_compressor.hpp +19 -19
- data/vendor/datasketches-cpp/cpc/include/cpc_compressor_impl.hpp +91 -89
- data/vendor/datasketches-cpp/cpc/include/cpc_sketch.hpp +14 -1
- data/vendor/datasketches-cpp/cpc/include/cpc_sketch_impl.hpp +121 -87
- data/vendor/datasketches-cpp/cpc/include/cpc_union_impl.hpp +14 -14
- data/vendor/datasketches-cpp/cpc/include/cpc_util.hpp +10 -10
- data/vendor/datasketches-cpp/cpc/include/icon_estimator.hpp +4 -4
- data/vendor/datasketches-cpp/cpc/include/u32_table.hpp +8 -8
- data/vendor/datasketches-cpp/cpc/include/u32_table_impl.hpp +14 -14
- data/vendor/datasketches-cpp/cpc/test/compression_test.cpp +10 -10
- data/vendor/datasketches-cpp/cpc/test/cpc_sketch_test.cpp +25 -0
- data/vendor/datasketches-cpp/cpc/test/cpc_union_test.cpp +1 -1
- data/vendor/datasketches-cpp/fi/include/frequent_items_sketch_impl.hpp +65 -80
- data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map_impl.hpp +10 -10
- data/vendor/datasketches-cpp/fi/test/reverse_purge_hash_map_test.cpp +2 -2
- data/vendor/datasketches-cpp/hll/include/AuxHashMap-internal.hpp +60 -63
- data/vendor/datasketches-cpp/hll/include/AuxHashMap.hpp +19 -19
- data/vendor/datasketches-cpp/hll/include/CompositeInterpolationXTable-internal.hpp +15 -15
- data/vendor/datasketches-cpp/hll/include/CompositeInterpolationXTable.hpp +3 -3
- data/vendor/datasketches-cpp/hll/include/CouponHashSet-internal.hpp +74 -76
- data/vendor/datasketches-cpp/hll/include/CouponHashSet.hpp +6 -6
- data/vendor/datasketches-cpp/hll/include/CouponList-internal.hpp +110 -113
- data/vendor/datasketches-cpp/hll/include/CouponList.hpp +13 -13
- data/vendor/datasketches-cpp/hll/include/CubicInterpolation-internal.hpp +2 -4
- data/vendor/datasketches-cpp/hll/include/HarmonicNumbers-internal.hpp +1 -1
- data/vendor/datasketches-cpp/hll/include/Hll4Array-internal.hpp +80 -76
- data/vendor/datasketches-cpp/hll/include/Hll4Array.hpp +9 -9
- data/vendor/datasketches-cpp/hll/include/Hll6Array-internal.hpp +26 -26
- data/vendor/datasketches-cpp/hll/include/Hll6Array.hpp +6 -6
- data/vendor/datasketches-cpp/hll/include/Hll8Array-internal.hpp +33 -33
- data/vendor/datasketches-cpp/hll/include/Hll8Array.hpp +6 -6
- data/vendor/datasketches-cpp/hll/include/HllArray-internal.hpp +205 -209
- data/vendor/datasketches-cpp/hll/include/HllArray.hpp +36 -36
- data/vendor/datasketches-cpp/hll/include/HllSketch-internal.hpp +28 -28
- data/vendor/datasketches-cpp/hll/include/HllSketchImpl-internal.hpp +22 -22
- data/vendor/datasketches-cpp/hll/include/HllSketchImpl.hpp +13 -13
- data/vendor/datasketches-cpp/hll/include/HllSketchImplFactory.hpp +15 -15
- data/vendor/datasketches-cpp/hll/include/HllUnion-internal.hpp +61 -61
- data/vendor/datasketches-cpp/hll/include/HllUtil.hpp +120 -127
- data/vendor/datasketches-cpp/hll/include/coupon_iterator-internal.hpp +9 -9
- data/vendor/datasketches-cpp/hll/include/coupon_iterator.hpp +5 -5
- data/vendor/datasketches-cpp/hll/include/hll.hpp +21 -21
- data/vendor/datasketches-cpp/hll/test/AuxHashMapTest.cpp +1 -1
- data/vendor/datasketches-cpp/hll/test/CouponHashSetTest.cpp +34 -34
- data/vendor/datasketches-cpp/hll/test/CouponListTest.cpp +25 -25
- data/vendor/datasketches-cpp/hll/test/CrossCountingTest.cpp +2 -2
- data/vendor/datasketches-cpp/hll/test/HllArrayTest.cpp +35 -35
- data/vendor/datasketches-cpp/hll/test/HllSketchTest.cpp +15 -15
- data/vendor/datasketches-cpp/hll/test/HllUnionTest.cpp +10 -14
- data/vendor/datasketches-cpp/hll/test/IsomorphicTest.cpp +3 -3
- data/vendor/datasketches-cpp/hll/test/ToFromByteArrayTest.cpp +4 -4
- data/vendor/datasketches-cpp/kll/include/kll_helper.hpp +5 -4
- data/vendor/datasketches-cpp/kll/include/kll_helper_impl.hpp +6 -6
- data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator.hpp +14 -6
- data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator_impl.hpp +39 -24
- data/vendor/datasketches-cpp/kll/include/kll_sketch.hpp +34 -2
- data/vendor/datasketches-cpp/kll/include/kll_sketch_impl.hpp +72 -62
- data/vendor/datasketches-cpp/kll/include/kolmogorov_smirnov.hpp +67 -0
- data/vendor/datasketches-cpp/kll/include/kolmogorov_smirnov_impl.hpp +78 -0
- data/vendor/datasketches-cpp/kll/test/CMakeLists.txt +1 -0
- data/vendor/datasketches-cpp/kll/test/kll_sketch_test.cpp +68 -45
- data/vendor/datasketches-cpp/kll/test/kolmogorov_smirnov_test.cpp +111 -0
- data/vendor/datasketches-cpp/python/src/hll_wrapper.cpp +4 -4
- data/vendor/datasketches-cpp/python/src/theta_wrapper.cpp +6 -6
- data/vendor/datasketches-cpp/python/src/vo_wrapper.cpp +2 -2
- data/vendor/datasketches-cpp/python/tests/hll_test.py +1 -1
- data/vendor/datasketches-cpp/python/tests/vo_test.py +3 -3
- data/vendor/datasketches-cpp/req/include/req_common.hpp +2 -1
- data/vendor/datasketches-cpp/req/include/req_compactor.hpp +4 -4
- data/vendor/datasketches-cpp/req/include/req_compactor_impl.hpp +26 -39
- data/vendor/datasketches-cpp/req/include/req_sketch.hpp +1 -1
- data/vendor/datasketches-cpp/req/include/req_sketch_impl.hpp +9 -9
- data/vendor/datasketches-cpp/req/test/req_sketch_test.cpp +52 -52
- data/vendor/datasketches-cpp/sampling/include/var_opt_sketch_impl.hpp +47 -56
- data/vendor/datasketches-cpp/sampling/include/var_opt_union_impl.hpp +34 -42
- data/vendor/datasketches-cpp/sampling/test/var_opt_sketch_test.cpp +6 -6
- data/vendor/datasketches-cpp/sampling/test/var_opt_union_test.cpp +13 -13
- data/vendor/datasketches-cpp/setup.py +1 -1
- data/vendor/datasketches-cpp/theta/include/bounds_on_ratios_in_sampled_sets.hpp +1 -1
- data/vendor/datasketches-cpp/theta/include/compact_theta_sketch_parser.hpp +67 -0
- data/vendor/datasketches-cpp/theta/include/compact_theta_sketch_parser_impl.hpp +70 -0
- data/vendor/datasketches-cpp/theta/include/theta_intersection.hpp +9 -4
- data/vendor/datasketches-cpp/theta/include/theta_intersection_impl.hpp +1 -1
- data/vendor/datasketches-cpp/theta/include/theta_jaccard_similarity_base.hpp +18 -14
- data/vendor/datasketches-cpp/theta/include/theta_sketch.hpp +42 -1
- data/vendor/datasketches-cpp/theta/include/theta_sketch_impl.hpp +107 -58
- data/vendor/datasketches-cpp/theta/include/theta_union.hpp +4 -4
- data/vendor/datasketches-cpp/theta/include/theta_union_base_impl.hpp +1 -1
- data/vendor/datasketches-cpp/theta/include/theta_union_impl.hpp +1 -1
- data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base.hpp +2 -0
- data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base_impl.hpp +33 -28
- data/vendor/datasketches-cpp/theta/test/theta_a_not_b_test.cpp +23 -1
- data/vendor/datasketches-cpp/theta/test/theta_intersection_test.cpp +21 -1
- data/vendor/datasketches-cpp/theta/test/theta_jaccard_similarity_test.cpp +58 -2
- data/vendor/datasketches-cpp/theta/test/theta_sketch_test.cpp +37 -1
- data/vendor/datasketches-cpp/theta/test/theta_union_test.cpp +22 -2
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_sketch_impl.hpp +47 -60
- data/vendor/datasketches-cpp/tuple/include/tuple_sketch_impl.hpp +51 -64
- data/vendor/datasketches-cpp/tuple/test/array_of_doubles_sketch_test.cpp +1 -1
- data/vendor/datasketches-cpp/tuple/test/tuple_a_not_b_test.cpp +17 -17
- data/vendor/datasketches-cpp/tuple/test/tuple_intersection_test.cpp +12 -12
- data/vendor/datasketches-cpp/tuple/test/tuple_jaccard_similarity_test.cpp +5 -5
- data/vendor/datasketches-cpp/tuple/test/tuple_sketch_allocation_test.cpp +1 -1
- data/vendor/datasketches-cpp/tuple/test/tuple_sketch_test.cpp +20 -20
- data/vendor/datasketches-cpp/tuple/test/tuple_union_test.cpp +12 -12
- metadata +8 -3
|
@@ -315,7 +315,7 @@ uint64_t compact_tuple_sketch<S, A>::get_theta64() const {
|
|
|
315
315
|
|
|
316
316
|
template<typename S, typename A>
|
|
317
317
|
uint32_t compact_tuple_sketch<S, A>::get_num_retained() const {
|
|
318
|
-
return entries_.size();
|
|
318
|
+
return static_cast<uint32_t>(entries_.size());
|
|
319
319
|
}
|
|
320
320
|
|
|
321
321
|
template<typename S, typename A>
|
|
@@ -347,36 +347,36 @@ template<typename SerDe>
|
|
|
347
347
|
void compact_tuple_sketch<S, A>::serialize(std::ostream& os, const SerDe& sd) const {
|
|
348
348
|
const bool is_single_item = entries_.size() == 1 && !this->is_estimation_mode();
|
|
349
349
|
const uint8_t preamble_longs = this->is_empty() || is_single_item ? 1 : this->is_estimation_mode() ? 3 : 2;
|
|
350
|
-
|
|
350
|
+
write(os, preamble_longs);
|
|
351
351
|
const uint8_t serial_version = SERIAL_VERSION;
|
|
352
|
-
|
|
352
|
+
write(os, serial_version);
|
|
353
353
|
const uint8_t family = SKETCH_FAMILY;
|
|
354
|
-
|
|
354
|
+
write(os, family);
|
|
355
355
|
const uint8_t type = SKETCH_TYPE;
|
|
356
|
-
|
|
356
|
+
write(os, type);
|
|
357
357
|
const uint8_t unused8 = 0;
|
|
358
|
-
|
|
358
|
+
write(os, unused8);
|
|
359
359
|
const uint8_t flags_byte(
|
|
360
360
|
(1 << flags::IS_COMPACT) |
|
|
361
361
|
(1 << flags::IS_READ_ONLY) |
|
|
362
362
|
(this->is_empty() ? 1 << flags::IS_EMPTY : 0) |
|
|
363
363
|
(this->is_ordered() ? 1 << flags::IS_ORDERED : 0)
|
|
364
364
|
);
|
|
365
|
-
|
|
365
|
+
write(os, flags_byte);
|
|
366
366
|
const uint16_t seed_hash = get_seed_hash();
|
|
367
|
-
|
|
367
|
+
write(os, seed_hash);
|
|
368
368
|
if (!this->is_empty()) {
|
|
369
369
|
if (!is_single_item) {
|
|
370
|
-
const uint32_t num_entries = entries_.size();
|
|
371
|
-
|
|
370
|
+
const uint32_t num_entries = static_cast<uint32_t>(entries_.size());
|
|
371
|
+
write(os, num_entries);
|
|
372
372
|
const uint32_t unused32 = 0;
|
|
373
|
-
|
|
373
|
+
write(os, unused32);
|
|
374
374
|
if (this->is_estimation_mode()) {
|
|
375
|
-
|
|
375
|
+
write(os, this->theta_);
|
|
376
376
|
}
|
|
377
377
|
}
|
|
378
378
|
for (const auto& it: entries_) {
|
|
379
|
-
|
|
379
|
+
write(os, it.first);
|
|
380
380
|
sd.serialize(os, &it.second, 1);
|
|
381
381
|
}
|
|
382
382
|
}
|
|
@@ -393,36 +393,34 @@ auto compact_tuple_sketch<S, A>::serialize(unsigned header_size_bytes, const Ser
|
|
|
393
393
|
uint8_t* ptr = bytes.data() + header_size_bytes;
|
|
394
394
|
const uint8_t* end_ptr = ptr + size;
|
|
395
395
|
|
|
396
|
-
ptr += copy_to_mem(
|
|
396
|
+
ptr += copy_to_mem(preamble_longs, ptr);
|
|
397
397
|
const uint8_t serial_version = SERIAL_VERSION;
|
|
398
|
-
ptr += copy_to_mem(
|
|
398
|
+
ptr += copy_to_mem(serial_version, ptr);
|
|
399
399
|
const uint8_t family = SKETCH_FAMILY;
|
|
400
|
-
ptr += copy_to_mem(
|
|
400
|
+
ptr += copy_to_mem(family, ptr);
|
|
401
401
|
const uint8_t type = SKETCH_TYPE;
|
|
402
|
-
ptr += copy_to_mem(
|
|
403
|
-
|
|
404
|
-
ptr += copy_to_mem(&unused8, ptr, sizeof(unused8));
|
|
402
|
+
ptr += copy_to_mem(type, ptr);
|
|
403
|
+
ptr += sizeof(uint8_t); // unused
|
|
405
404
|
const uint8_t flags_byte(
|
|
406
405
|
(1 << flags::IS_COMPACT) |
|
|
407
406
|
(1 << flags::IS_READ_ONLY) |
|
|
408
407
|
(this->is_empty() ? 1 << flags::IS_EMPTY : 0) |
|
|
409
408
|
(this->is_ordered() ? 1 << flags::IS_ORDERED : 0)
|
|
410
409
|
);
|
|
411
|
-
ptr += copy_to_mem(
|
|
410
|
+
ptr += copy_to_mem(flags_byte, ptr);
|
|
412
411
|
const uint16_t seed_hash = get_seed_hash();
|
|
413
|
-
ptr += copy_to_mem(
|
|
412
|
+
ptr += copy_to_mem(seed_hash, ptr);
|
|
414
413
|
if (!this->is_empty()) {
|
|
415
414
|
if (!is_single_item) {
|
|
416
|
-
const uint32_t num_entries = entries_.size();
|
|
417
|
-
ptr += copy_to_mem(
|
|
418
|
-
|
|
419
|
-
ptr += copy_to_mem(&unused32, ptr, sizeof(unused32));
|
|
415
|
+
const uint32_t num_entries = static_cast<uint32_t>(entries_.size());
|
|
416
|
+
ptr += copy_to_mem(num_entries, ptr);
|
|
417
|
+
ptr += sizeof(uint32_t); // unused
|
|
420
418
|
if (this->is_estimation_mode()) {
|
|
421
|
-
ptr += copy_to_mem(
|
|
419
|
+
ptr += copy_to_mem(theta_, ptr);
|
|
422
420
|
}
|
|
423
421
|
}
|
|
424
422
|
for (const auto& it: entries_) {
|
|
425
|
-
ptr += copy_to_mem(
|
|
423
|
+
ptr += copy_to_mem(it.first, ptr);
|
|
426
424
|
ptr += sd.serialize(ptr, end_ptr - ptr, &it.second, 1);
|
|
427
425
|
}
|
|
428
426
|
}
|
|
@@ -432,20 +430,13 @@ auto compact_tuple_sketch<S, A>::serialize(unsigned header_size_bytes, const Ser
|
|
|
432
430
|
template<typename S, typename A>
|
|
433
431
|
template<typename SerDe>
|
|
434
432
|
compact_tuple_sketch<S, A> compact_tuple_sketch<S, A>::deserialize(std::istream& is, uint64_t seed, const SerDe& sd, const A& allocator) {
|
|
435
|
-
|
|
436
|
-
|
|
437
|
-
uint8_t
|
|
438
|
-
|
|
439
|
-
uint8_t
|
|
440
|
-
|
|
441
|
-
|
|
442
|
-
is.read(reinterpret_cast<char*>(&type), sizeof(type));
|
|
443
|
-
uint8_t unused8;
|
|
444
|
-
is.read(reinterpret_cast<char*>(&unused8), sizeof(unused8));
|
|
445
|
-
uint8_t flags_byte;
|
|
446
|
-
is.read(reinterpret_cast<char*>(&flags_byte), sizeof(flags_byte));
|
|
447
|
-
uint16_t seed_hash;
|
|
448
|
-
is.read(reinterpret_cast<char*>(&seed_hash), sizeof(seed_hash));
|
|
433
|
+
const auto preamble_longs = read<uint8_t>(is);
|
|
434
|
+
const auto serial_version = read<uint8_t>(is);
|
|
435
|
+
const auto family = read<uint8_t>(is);
|
|
436
|
+
const auto type = read<uint8_t>(is);
|
|
437
|
+
read<uint8_t>(is); // unused
|
|
438
|
+
const auto flags_byte = read<uint8_t>(is);
|
|
439
|
+
const auto seed_hash = read<uint16_t>(is);
|
|
449
440
|
checker<true>::check_serial_version(serial_version, SERIAL_VERSION);
|
|
450
441
|
checker<true>::check_sketch_family(family, SKETCH_FAMILY);
|
|
451
442
|
checker<true>::check_sketch_type(type, SKETCH_TYPE);
|
|
@@ -458,11 +449,10 @@ compact_tuple_sketch<S, A> compact_tuple_sketch<S, A>::deserialize(std::istream&
|
|
|
458
449
|
if (preamble_longs == 1) {
|
|
459
450
|
num_entries = 1;
|
|
460
451
|
} else {
|
|
461
|
-
|
|
462
|
-
uint32_t
|
|
463
|
-
is.read(reinterpret_cast<char*>(&unused32), sizeof(unused32));
|
|
452
|
+
num_entries = read<uint32_t>(is);
|
|
453
|
+
read<uint32_t>(is); // unused
|
|
464
454
|
if (preamble_longs > 2) {
|
|
465
|
-
|
|
455
|
+
theta = read<uint64_t>(is);
|
|
466
456
|
}
|
|
467
457
|
}
|
|
468
458
|
}
|
|
@@ -472,8 +462,7 @@ compact_tuple_sketch<S, A> compact_tuple_sketch<S, A>::deserialize(std::istream&
|
|
|
472
462
|
entries.reserve(num_entries);
|
|
473
463
|
std::unique_ptr<S, deleter_of_summaries> summary(alloc.allocate(1), deleter_of_summaries(1, false, allocator));
|
|
474
464
|
for (size_t i = 0; i < num_entries; ++i) {
|
|
475
|
-
|
|
476
|
-
is.read(reinterpret_cast<char*>(&key), sizeof(uint64_t));
|
|
465
|
+
const auto key = read<uint64_t>(is);
|
|
477
466
|
sd.deserialize(is, summary.get(), 1);
|
|
478
467
|
entries.push_back(Entry(key, std::move(*summary)));
|
|
479
468
|
(*summary).~S();
|
|
@@ -491,19 +480,18 @@ compact_tuple_sketch<S, A> compact_tuple_sketch<S, A>::deserialize(const void* b
|
|
|
491
480
|
const char* ptr = static_cast<const char*>(bytes);
|
|
492
481
|
const char* base = ptr;
|
|
493
482
|
uint8_t preamble_longs;
|
|
494
|
-
ptr += copy_from_mem(ptr,
|
|
483
|
+
ptr += copy_from_mem(ptr, preamble_longs);
|
|
495
484
|
uint8_t serial_version;
|
|
496
|
-
ptr += copy_from_mem(ptr,
|
|
485
|
+
ptr += copy_from_mem(ptr, serial_version);
|
|
497
486
|
uint8_t family;
|
|
498
|
-
ptr += copy_from_mem(ptr,
|
|
487
|
+
ptr += copy_from_mem(ptr, family);
|
|
499
488
|
uint8_t type;
|
|
500
|
-
ptr += copy_from_mem(ptr,
|
|
501
|
-
uint8_t
|
|
502
|
-
ptr += copy_from_mem(ptr, &unused8, sizeof(unused8));
|
|
489
|
+
ptr += copy_from_mem(ptr, type);
|
|
490
|
+
ptr += sizeof(uint8_t); // unused
|
|
503
491
|
uint8_t flags_byte;
|
|
504
|
-
ptr += copy_from_mem(ptr,
|
|
492
|
+
ptr += copy_from_mem(ptr, flags_byte);
|
|
505
493
|
uint16_t seed_hash;
|
|
506
|
-
ptr += copy_from_mem(ptr,
|
|
494
|
+
ptr += copy_from_mem(ptr, seed_hash);
|
|
507
495
|
checker<true>::check_serial_version(serial_version, SERIAL_VERSION);
|
|
508
496
|
checker<true>::check_sketch_family(family, SKETCH_FAMILY);
|
|
509
497
|
checker<true>::check_sketch_type(type, SKETCH_TYPE);
|
|
@@ -518,12 +506,11 @@ compact_tuple_sketch<S, A> compact_tuple_sketch<S, A>::deserialize(const void* b
|
|
|
518
506
|
num_entries = 1;
|
|
519
507
|
} else {
|
|
520
508
|
ensure_minimum_memory(size, 8); // read the first prelong before this method
|
|
521
|
-
ptr += copy_from_mem(ptr,
|
|
522
|
-
uint32_t
|
|
523
|
-
ptr += copy_from_mem(ptr, &unused32, sizeof(unused32));
|
|
509
|
+
ptr += copy_from_mem(ptr, num_entries);
|
|
510
|
+
ptr += sizeof(uint32_t); // unused
|
|
524
511
|
if (preamble_longs > 2) {
|
|
525
512
|
ensure_minimum_memory(size, (preamble_longs - 1) << 3);
|
|
526
|
-
ptr += copy_from_mem(ptr,
|
|
513
|
+
ptr += copy_from_mem(ptr, theta);
|
|
527
514
|
}
|
|
528
515
|
}
|
|
529
516
|
}
|
|
@@ -536,7 +523,7 @@ compact_tuple_sketch<S, A> compact_tuple_sketch<S, A>::deserialize(const void* b
|
|
|
536
523
|
std::unique_ptr<S, deleter_of_summaries> summary(alloc.allocate(1), deleter_of_summaries(1, false, allocator));
|
|
537
524
|
for (size_t i = 0; i < num_entries; ++i) {
|
|
538
525
|
uint64_t key;
|
|
539
|
-
ptr += copy_from_mem(ptr,
|
|
526
|
+
ptr += copy_from_mem(ptr, key);
|
|
540
527
|
ptr += sd.deserialize(ptr, base + size - ptr, summary.get(), 1);
|
|
541
528
|
entries.push_back(Entry(key, std::move(*summary)));
|
|
542
529
|
(*summary).~S();
|
|
@@ -548,22 +535,22 @@ compact_tuple_sketch<S, A> compact_tuple_sketch<S, A>::deserialize(const void* b
|
|
|
548
535
|
|
|
549
536
|
template<typename S, typename A>
|
|
550
537
|
auto compact_tuple_sketch<S, A>::begin() -> iterator {
|
|
551
|
-
return iterator(entries_.data(), entries_.size(), 0);
|
|
538
|
+
return iterator(entries_.data(), static_cast<uint32_t>(entries_.size()), 0);
|
|
552
539
|
}
|
|
553
540
|
|
|
554
541
|
template<typename S, typename A>
|
|
555
542
|
auto compact_tuple_sketch<S, A>::end() -> iterator {
|
|
556
|
-
return iterator(nullptr, 0, entries_.size());
|
|
543
|
+
return iterator(nullptr, 0, static_cast<uint32_t>(entries_.size()));
|
|
557
544
|
}
|
|
558
545
|
|
|
559
546
|
template<typename S, typename A>
|
|
560
547
|
auto compact_tuple_sketch<S, A>::begin() const -> const_iterator {
|
|
561
|
-
return const_iterator(entries_.data(), entries_.size(), 0);
|
|
548
|
+
return const_iterator(entries_.data(), static_cast<uint32_t>(entries_.size()), 0);
|
|
562
549
|
}
|
|
563
550
|
|
|
564
551
|
template<typename S, typename A>
|
|
565
552
|
auto compact_tuple_sketch<S, A>::end() const -> const_iterator {
|
|
566
|
-
return const_iterator(nullptr, 0, entries_.size());
|
|
553
|
+
return const_iterator(nullptr, 0, static_cast<uint32_t>(entries_.size()));
|
|
567
554
|
}
|
|
568
555
|
|
|
569
556
|
template<typename S, typename A>
|
|
@@ -75,7 +75,7 @@ TEST_CASE("aod sketch: serialization compatibility with java - empty configured
|
|
|
75
75
|
}
|
|
76
76
|
|
|
77
77
|
TEST_CASE("aod sketch: serialization compatibility with java - non-empty no entries", "[tuple_sketch]") {
|
|
78
|
-
auto update_sketch = update_array_of_doubles_sketch::builder().set_p(0.
|
|
78
|
+
auto update_sketch = update_array_of_doubles_sketch::builder().set_p(0.01f).build();
|
|
79
79
|
std::vector<double> a = {1};
|
|
80
80
|
update_sketch.update(1, a);
|
|
81
81
|
REQUIRE_FALSE(update_sketch.is_empty());
|
|
@@ -38,8 +38,8 @@ TEST_CASE("tuple a-not-b: empty", "[tuple_a_not_b]") {
|
|
|
38
38
|
|
|
39
39
|
TEST_CASE("tuple a-not-b: non empty no retained keys", "[tuple_a_not_b]") {
|
|
40
40
|
auto a = update_tuple_sketch<float>::builder().build();
|
|
41
|
-
a.update(1, 1);
|
|
42
|
-
auto b = update_tuple_sketch<float>::builder().set_p(0.
|
|
41
|
+
a.update(1, 1.0f);
|
|
42
|
+
auto b = update_tuple_sketch<float>::builder().set_p(0.001f).build();
|
|
43
43
|
tuple_a_not_b<float> a_not_b;
|
|
44
44
|
|
|
45
45
|
// B is still empty
|
|
@@ -51,7 +51,7 @@ TEST_CASE("tuple a-not-b: non empty no retained keys", "[tuple_a_not_b]") {
|
|
|
51
51
|
REQUIRE(result.get_estimate() == 1.0);
|
|
52
52
|
|
|
53
53
|
// B is not empty in estimation mode and no entries
|
|
54
|
-
b.update(1, 1);
|
|
54
|
+
b.update(1, 1.0f);
|
|
55
55
|
REQUIRE(b.get_num_retained() == 0);
|
|
56
56
|
|
|
57
57
|
result = a_not_b.compute(a, b);
|
|
@@ -65,11 +65,11 @@ TEST_CASE("tuple a-not-b: non empty no retained keys", "[tuple_a_not_b]") {
|
|
|
65
65
|
TEST_CASE("tuple a-not-b: exact mode half overlap", "[tuple_a_not_b]") {
|
|
66
66
|
auto a = update_tuple_sketch<float>::builder().build();
|
|
67
67
|
int value = 0;
|
|
68
|
-
for (int i = 0; i < 1000; i++) a.update(value++, 1);
|
|
68
|
+
for (int i = 0; i < 1000; i++) a.update(value++, 1.0f);
|
|
69
69
|
|
|
70
70
|
auto b = update_tuple_sketch<float>::builder().build();
|
|
71
71
|
value = 500;
|
|
72
|
-
for (int i = 0; i < 1000; i++) b.update(value++, 1);
|
|
72
|
+
for (int i = 0; i < 1000; i++) b.update(value++, 1.0f);
|
|
73
73
|
|
|
74
74
|
tuple_a_not_b<float> a_not_b;
|
|
75
75
|
|
|
@@ -105,7 +105,7 @@ TEST_CASE("tuple a-not-b: exact mode half overlap", "[tuple_a_not_b]") {
|
|
|
105
105
|
TEST_CASE("mixed a-not-b: exact mode half overlap", "[tuple_a_not_b]") {
|
|
106
106
|
auto a = update_tuple_sketch<float>::builder().build();
|
|
107
107
|
int value = 0;
|
|
108
|
-
for (int i = 0; i < 1000; i++) a.update(value++, 1);
|
|
108
|
+
for (int i = 0; i < 1000; i++) a.update(value++, 1.0f);
|
|
109
109
|
|
|
110
110
|
auto b = update_theta_sketch::builder().build();
|
|
111
111
|
value = 500;
|
|
@@ -145,10 +145,10 @@ TEST_CASE("mixed a-not-b: exact mode half overlap", "[tuple_a_not_b]") {
|
|
|
145
145
|
TEST_CASE("tuple a-not-b: exact mode disjoint", "[tuple_a_not_b]") {
|
|
146
146
|
auto a = update_tuple_sketch<float>::builder().build();
|
|
147
147
|
int value = 0;
|
|
148
|
-
for (int i = 0; i < 1000; i++) a.update(value++, 1);
|
|
148
|
+
for (int i = 0; i < 1000; i++) a.update(value++, 1.0f);
|
|
149
149
|
|
|
150
150
|
auto b = update_tuple_sketch<float>::builder().build();
|
|
151
|
-
for (int i = 0; i < 1000; i++) b.update(value++, 1);
|
|
151
|
+
for (int i = 0; i < 1000; i++) b.update(value++, 1.0f);
|
|
152
152
|
|
|
153
153
|
tuple_a_not_b<float> a_not_b;
|
|
154
154
|
|
|
@@ -168,7 +168,7 @@ TEST_CASE("tuple a-not-b: exact mode disjoint", "[tuple_a_not_b]") {
|
|
|
168
168
|
TEST_CASE("tuple a-not-b: exact mode full overlap", "[tuple_a_not_b]") {
|
|
169
169
|
auto sketch = update_tuple_sketch<float>::builder().build();
|
|
170
170
|
int value = 0;
|
|
171
|
-
for (int i = 0; i < 1000; i++) sketch.update(value++, 1);
|
|
171
|
+
for (int i = 0; i < 1000; i++) sketch.update(value++, 1.0f);
|
|
172
172
|
|
|
173
173
|
tuple_a_not_b<float> a_not_b;
|
|
174
174
|
|
|
@@ -188,11 +188,11 @@ TEST_CASE("tuple a-not-b: exact mode full overlap", "[tuple_a_not_b]") {
|
|
|
188
188
|
TEST_CASE("tuple a-not-b: estimation mode half overlap", "[tuple_a_not_b]") {
|
|
189
189
|
auto a = update_tuple_sketch<float>::builder().build();
|
|
190
190
|
int value = 0;
|
|
191
|
-
for (int i = 0; i < 10000; i++) a.update(value++, 1);
|
|
191
|
+
for (int i = 0; i < 10000; i++) a.update(value++, 1.0f);
|
|
192
192
|
|
|
193
193
|
auto b = update_tuple_sketch<float>::builder().build();
|
|
194
194
|
value = 5000;
|
|
195
|
-
for (int i = 0; i < 10000; i++) b.update(value++, 1);
|
|
195
|
+
for (int i = 0; i < 10000; i++) b.update(value++, 1.0f);
|
|
196
196
|
|
|
197
197
|
tuple_a_not_b<float> a_not_b;
|
|
198
198
|
|
|
@@ -212,10 +212,10 @@ TEST_CASE("tuple a-not-b: estimation mode half overlap", "[tuple_a_not_b]") {
|
|
|
212
212
|
TEST_CASE("tuple a-not-b: estimation mode disjoint", "[tuple_a_not_b]") {
|
|
213
213
|
auto a = update_tuple_sketch<float>::builder().build();
|
|
214
214
|
int value = 0;
|
|
215
|
-
for (int i = 0; i < 10000; i++) a.update(value++, 1);
|
|
215
|
+
for (int i = 0; i < 10000; i++) a.update(value++, 1.0f);
|
|
216
216
|
|
|
217
217
|
auto b = update_tuple_sketch<float>::builder().build();
|
|
218
|
-
for (int i = 0; i < 10000; i++) b.update(value++, 1);
|
|
218
|
+
for (int i = 0; i < 10000; i++) b.update(value++, 1.0f);
|
|
219
219
|
|
|
220
220
|
tuple_a_not_b<float> a_not_b;
|
|
221
221
|
|
|
@@ -235,7 +235,7 @@ TEST_CASE("tuple a-not-b: estimation mode disjoint", "[tuple_a_not_b]") {
|
|
|
235
235
|
TEST_CASE("tuple a-not-b: estimation mode full overlap", "[tuple_a_not_b]") {
|
|
236
236
|
auto sketch = update_tuple_sketch<float>::builder().build();
|
|
237
237
|
int value = 0;
|
|
238
|
-
for (int i = 0; i < 10000; i++) sketch.update(value++, 1);
|
|
238
|
+
for (int i = 0; i < 10000; i++) sketch.update(value++, 1.0f);
|
|
239
239
|
|
|
240
240
|
tuple_a_not_b<float> a_not_b;
|
|
241
241
|
|
|
@@ -254,7 +254,7 @@ TEST_CASE("tuple a-not-b: estimation mode full overlap", "[tuple_a_not_b]") {
|
|
|
254
254
|
|
|
255
255
|
TEST_CASE("tuple a-not-b: seed mismatch", "[tuple_a_not_b]") {
|
|
256
256
|
auto sketch = update_tuple_sketch<float>::builder().build();
|
|
257
|
-
sketch.update(1, 1); // non-empty should not be ignored
|
|
257
|
+
sketch.update(1, 1.0f); // non-empty should not be ignored
|
|
258
258
|
tuple_a_not_b<float> a_not_b(123);
|
|
259
259
|
REQUIRE_THROWS_AS(a_not_b.compute(sketch, sketch), std::invalid_argument);
|
|
260
260
|
}
|
|
@@ -262,11 +262,11 @@ TEST_CASE("tuple a-not-b: seed mismatch", "[tuple_a_not_b]") {
|
|
|
262
262
|
TEST_CASE("tuple a-not-b: issue #152", "[tuple_a_not_b]") {
|
|
263
263
|
auto a = update_tuple_sketch<float>::builder().build();
|
|
264
264
|
int value = 0;
|
|
265
|
-
for (int i = 0; i < 10000; i++) a.update(value++, 1);
|
|
265
|
+
for (int i = 0; i < 10000; i++) a.update(value++, 1.0f);
|
|
266
266
|
|
|
267
267
|
auto b = update_tuple_sketch<float>::builder().build();
|
|
268
268
|
value = 5000;
|
|
269
|
-
for (int i = 0; i < 25000; i++) b.update(value++, 1);
|
|
269
|
+
for (int i = 0; i < 25000; i++) b.update(value++, 1.0f);
|
|
270
270
|
|
|
271
271
|
tuple_a_not_b<float> a_not_b;
|
|
272
272
|
|
|
@@ -59,8 +59,8 @@ TEST_CASE("tuple intersection: empty", "[tuple_intersection]") {
|
|
|
59
59
|
}
|
|
60
60
|
|
|
61
61
|
TEST_CASE("tuple intersection: non empty no retained keys", "[tuple_intersection]") {
|
|
62
|
-
auto sketch = update_tuple_sketch<float>::builder().set_p(0.
|
|
63
|
-
sketch.update(1, 1);
|
|
62
|
+
auto sketch = update_tuple_sketch<float>::builder().set_p(0.001f).build();
|
|
63
|
+
sketch.update(1, 1.0f);
|
|
64
64
|
tuple_intersection_float intersection;
|
|
65
65
|
intersection.update(sketch);
|
|
66
66
|
auto result = intersection.get_result();
|
|
@@ -82,11 +82,11 @@ TEST_CASE("tuple intersection: non empty no retained keys", "[tuple_intersection
|
|
|
82
82
|
TEST_CASE("tuple intersection: exact mode half overlap", "[tuple_intersection]") {
|
|
83
83
|
auto sketch1 = update_tuple_sketch<float>::builder().build();
|
|
84
84
|
int value = 0;
|
|
85
|
-
for (int i = 0; i < 1000; i++) sketch1.update(value++, 1);
|
|
85
|
+
for (int i = 0; i < 1000; i++) sketch1.update(value++, 1.0f);
|
|
86
86
|
|
|
87
87
|
auto sketch2 = update_tuple_sketch<float>::builder().build();
|
|
88
88
|
value = 500;
|
|
89
|
-
for (int i = 0; i < 1000; i++) sketch2.update(value++, 1);
|
|
89
|
+
for (int i = 0; i < 1000; i++) sketch2.update(value++, 1.0f);
|
|
90
90
|
|
|
91
91
|
{ // unordered
|
|
92
92
|
tuple_intersection_float intersection;
|
|
@@ -111,10 +111,10 @@ TEST_CASE("tuple intersection: exact mode half overlap", "[tuple_intersection]")
|
|
|
111
111
|
TEST_CASE("tuple intersection: exact mode disjoint", "[tuple_intersection]") {
|
|
112
112
|
auto sketch1 = update_tuple_sketch<float>::builder().build();
|
|
113
113
|
int value = 0;
|
|
114
|
-
for (int i = 0; i < 1000; i++) sketch1.update(value++, 1);
|
|
114
|
+
for (int i = 0; i < 1000; i++) sketch1.update(value++, 1.0f);
|
|
115
115
|
|
|
116
116
|
auto sketch2 = update_tuple_sketch<float>::builder().build();
|
|
117
|
-
for (int i = 0; i < 1000; i++) sketch2.update(value++, 1);
|
|
117
|
+
for (int i = 0; i < 1000; i++) sketch2.update(value++, 1.0f);
|
|
118
118
|
|
|
119
119
|
{ // unordered
|
|
120
120
|
tuple_intersection_float intersection;
|
|
@@ -139,7 +139,7 @@ TEST_CASE("tuple intersection: exact mode disjoint", "[tuple_intersection]") {
|
|
|
139
139
|
TEST_CASE("mixed intersection: exact mode half overlap", "[tuple_intersection]") {
|
|
140
140
|
auto sketch1 = update_tuple_sketch<float>::builder().build();
|
|
141
141
|
int value = 0;
|
|
142
|
-
for (int i = 0; i < 1000; i++) sketch1.update(value++, 1);
|
|
142
|
+
for (int i = 0; i < 1000; i++) sketch1.update(value++, 1.0f);
|
|
143
143
|
|
|
144
144
|
auto sketch2 = update_theta_sketch::builder().build();
|
|
145
145
|
value = 500;
|
|
@@ -168,11 +168,11 @@ TEST_CASE("mixed intersection: exact mode half overlap", "[tuple_intersection]")
|
|
|
168
168
|
TEST_CASE("tuple intersection: estimation mode half overlap", "[tuple_intersection]") {
|
|
169
169
|
auto sketch1 = update_tuple_sketch<float>::builder().build();
|
|
170
170
|
int value = 0;
|
|
171
|
-
for (int i = 0; i < 10000; i++) sketch1.update(value++, 1);
|
|
171
|
+
for (int i = 0; i < 10000; i++) sketch1.update(value++, 1.0f);
|
|
172
172
|
|
|
173
173
|
auto sketch2 = update_tuple_sketch<float>::builder().build();
|
|
174
174
|
value = 5000;
|
|
175
|
-
for (int i = 0; i < 10000; i++) sketch2.update(value++, 1);
|
|
175
|
+
for (int i = 0; i < 10000; i++) sketch2.update(value++, 1.0f);
|
|
176
176
|
|
|
177
177
|
{ // unordered
|
|
178
178
|
tuple_intersection_float intersection;
|
|
@@ -197,10 +197,10 @@ TEST_CASE("tuple intersection: estimation mode half overlap", "[tuple_intersecti
|
|
|
197
197
|
TEST_CASE("tuple intersection: estimation mode disjoint", "[tuple_intersection]") {
|
|
198
198
|
auto sketch1 = update_tuple_sketch<float>::builder().build();
|
|
199
199
|
int value = 0;
|
|
200
|
-
for (int i = 0; i < 10000; i++) sketch1.update(value++, 1);
|
|
200
|
+
for (int i = 0; i < 10000; i++) sketch1.update(value++, 1.0f);
|
|
201
201
|
|
|
202
202
|
auto sketch2 = update_tuple_sketch<float>::builder().build();
|
|
203
|
-
for (int i = 0; i < 10000; i++) sketch2.update(value++, 1);
|
|
203
|
+
for (int i = 0; i < 10000; i++) sketch2.update(value++, 1.0f);
|
|
204
204
|
|
|
205
205
|
{ // unordered
|
|
206
206
|
tuple_intersection_float intersection;
|
|
@@ -224,7 +224,7 @@ TEST_CASE("tuple intersection: estimation mode disjoint", "[tuple_intersection]"
|
|
|
224
224
|
|
|
225
225
|
TEST_CASE("tuple intersection: seed mismatch", "[tuple_intersection]") {
|
|
226
226
|
auto sketch = update_tuple_sketch<float>::builder().build();
|
|
227
|
-
sketch.update(1, 1); // non-empty should not be ignored
|
|
227
|
+
sketch.update(1, 1.0f); // non-empty should not be ignored
|
|
228
228
|
tuple_intersection_float intersection(123);
|
|
229
229
|
REQUIRE_THROWS_AS(intersection.update(sketch), std::invalid_argument);
|
|
230
230
|
}
|
|
@@ -44,7 +44,7 @@ TEST_CASE("tuple jaccard: empty", "[tuple_sketch]") {
|
|
|
44
44
|
|
|
45
45
|
TEST_CASE("tuple jaccard: same sketch exact mode", "[tuple_sketch]") {
|
|
46
46
|
auto sk = update_tuple_sketch<float>::builder().build();
|
|
47
|
-
for (int i = 0; i < 1000; ++i) sk.update(i, 1);
|
|
47
|
+
for (int i = 0; i < 1000; ++i) sk.update(i, 1.0f);
|
|
48
48
|
|
|
49
49
|
// update sketch
|
|
50
50
|
auto jc = tuple_jaccard_similarity_float::jaccard(sk, sk);
|
|
@@ -61,8 +61,8 @@ TEST_CASE("tuple jaccard: full overlap exact mode", "[tuple_sketch]") {
|
|
|
61
61
|
auto sk_a = update_tuple_sketch<float>::builder().build();
|
|
62
62
|
auto sk_b = update_tuple_sketch<float>::builder().build();
|
|
63
63
|
for (int i = 0; i < 1000; ++i) {
|
|
64
|
-
sk_a.update(i, 1);
|
|
65
|
-
sk_b.update(i, 1);
|
|
64
|
+
sk_a.update(i, 1.0f);
|
|
65
|
+
sk_b.update(i, 1.0f);
|
|
66
66
|
}
|
|
67
67
|
|
|
68
68
|
// update sketches
|
|
@@ -83,8 +83,8 @@ TEST_CASE("tuple jaccard: disjoint exact mode", "[tuple_sketch]") {
|
|
|
83
83
|
auto sk_a = update_tuple_sketch<float>::builder().build();
|
|
84
84
|
auto sk_b = update_tuple_sketch<float>::builder().build();
|
|
85
85
|
for (int i = 0; i < 1000; ++i) {
|
|
86
|
-
sk_a.update(i, 1);
|
|
87
|
-
sk_b.update(i + 1000, 1);
|
|
86
|
+
sk_a.update(i, 1.0f);
|
|
87
|
+
sk_b.update(i + 1000, 1.0f);
|
|
88
88
|
}
|
|
89
89
|
|
|
90
90
|
// update sketches
|