datasketches 0.2.3 → 0.2.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +4 -0
- data/README.md +7 -7
- data/ext/datasketches/theta_wrapper.cpp +20 -4
- data/lib/datasketches/version.rb +1 -1
- data/vendor/datasketches-cpp/CMakeLists.txt +22 -3
- data/vendor/datasketches-cpp/MANIFEST.in +3 -0
- data/vendor/datasketches-cpp/README.md +76 -9
- data/vendor/datasketches-cpp/cmake/DataSketchesConfig.cmake.in +10 -0
- data/vendor/datasketches-cpp/common/CMakeLists.txt +14 -13
- data/vendor/datasketches-cpp/cpc/CMakeLists.txt +15 -35
- data/vendor/datasketches-cpp/cpc/include/cpc_sketch_impl.hpp +5 -3
- data/vendor/datasketches-cpp/cpc/include/cpc_union_impl.hpp +8 -6
- data/vendor/datasketches-cpp/cpc/test/cpc_sketch_allocation_test.cpp +17 -0
- data/vendor/datasketches-cpp/fi/CMakeLists.txt +5 -15
- data/vendor/datasketches-cpp/fi/include/frequent_items_sketch_impl.hpp +4 -2
- data/vendor/datasketches-cpp/hll/CMakeLists.txt +33 -56
- data/vendor/datasketches-cpp/hll/include/HllSketch-internal.hpp +6 -4
- data/vendor/datasketches-cpp/kll/CMakeLists.txt +9 -19
- data/vendor/datasketches-cpp/kll/include/kll_sketch_impl.hpp +4 -2
- data/vendor/datasketches-cpp/req/CMakeLists.txt +8 -21
- data/vendor/datasketches-cpp/req/include/req_sketch_impl.hpp +4 -2
- data/vendor/datasketches-cpp/sampling/CMakeLists.txt +5 -9
- data/vendor/datasketches-cpp/sampling/include/var_opt_sketch_impl.hpp +13 -7
- data/vendor/datasketches-cpp/sampling/include/var_opt_union_impl.hpp +8 -6
- data/vendor/datasketches-cpp/setup.py +1 -1
- data/vendor/datasketches-cpp/theta/CMakeLists.txt +26 -45
- data/vendor/datasketches-cpp/theta/include/compact_theta_sketch_parser_impl.hpp +89 -22
- data/vendor/datasketches-cpp/theta/include/theta_helpers.hpp +15 -0
- data/vendor/datasketches-cpp/theta/include/theta_intersection_base_impl.hpp +6 -6
- data/vendor/datasketches-cpp/theta/include/theta_set_difference_base_impl.hpp +2 -2
- data/vendor/datasketches-cpp/theta/include/theta_sketch.hpp +32 -15
- data/vendor/datasketches-cpp/theta/include/theta_sketch_impl.hpp +146 -51
- data/vendor/datasketches-cpp/theta/include/theta_union.hpp +6 -1
- data/vendor/datasketches-cpp/theta/include/theta_union_base.hpp +3 -1
- data/vendor/datasketches-cpp/theta/include/theta_union_base_impl.hpp +8 -2
- data/vendor/datasketches-cpp/theta/include/theta_union_impl.hpp +8 -5
- data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base.hpp +5 -4
- data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base_impl.hpp +33 -9
- data/vendor/datasketches-cpp/theta/test/CMakeLists.txt +1 -0
- data/vendor/datasketches-cpp/theta/test/theta_compact_empty_from_java_v1.sk +0 -0
- data/vendor/datasketches-cpp/theta/test/theta_compact_empty_from_java_v2.sk +0 -0
- data/vendor/datasketches-cpp/theta/test/theta_compact_estimation_from_java_v1.sk +0 -0
- data/vendor/datasketches-cpp/theta/test/theta_compact_estimation_from_java_v2.sk +0 -0
- data/vendor/datasketches-cpp/theta/test/theta_compact_exact_from_java.sk +0 -0
- data/vendor/datasketches-cpp/theta/test/theta_setop_test.cpp +445 -0
- data/vendor/datasketches-cpp/theta/test/theta_sketch_test.cpp +400 -0
- data/vendor/datasketches-cpp/theta/test/theta_union_test.cpp +23 -11
- data/vendor/datasketches-cpp/tuple/CMakeLists.txt +18 -33
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_sketch.hpp +1 -1
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_sketch_impl.hpp +3 -3
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_union.hpp +1 -1
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_union_impl.hpp +3 -3
- data/vendor/datasketches-cpp/tuple/include/tuple_sketch.hpp +13 -9
- data/vendor/datasketches-cpp/tuple/include/tuple_sketch_impl.hpp +33 -14
- data/vendor/datasketches-cpp/tuple/include/tuple_union.hpp +6 -1
- data/vendor/datasketches-cpp/tuple/include/tuple_union_impl.hpp +8 -3
- data/vendor/datasketches-cpp/tuple/test/array_of_doubles_sketch_test.cpp +16 -0
- data/vendor/datasketches-cpp/tuple/test/tuple_sketch_test.cpp +46 -8
- data/vendor/datasketches-cpp/tuple/test/tuple_union_test.cpp +7 -0
- metadata +11 -6
- data/vendor/datasketches-cpp/theta/test/theta_update_empty_from_java.sk +0 -0
- data/vendor/datasketches-cpp/theta/test/theta_update_estimation_from_java.sk +0 -0
@@ -20,8 +20,8 @@
|
|
20
20
|
namespace datasketches {
|
21
21
|
|
22
22
|
template<typename A>
|
23
|
-
array_of_doubles_union_alloc<A>::array_of_doubles_union_alloc(uint8_t lg_cur_size, uint8_t lg_nom_size, resize_factor rf, uint64_t theta, uint64_t seed, const Policy& policy, const A& allocator):
|
24
|
-
Base(lg_cur_size, lg_nom_size, rf, theta, seed, policy, allocator)
|
23
|
+
array_of_doubles_union_alloc<A>::array_of_doubles_union_alloc(uint8_t lg_cur_size, uint8_t lg_nom_size, resize_factor rf, float p, uint64_t theta, uint64_t seed, const Policy& policy, const A& allocator):
|
24
|
+
Base(lg_cur_size, lg_nom_size, rf, p, theta, seed, policy, allocator)
|
25
25
|
{}
|
26
26
|
|
27
27
|
template<typename A>
|
@@ -37,7 +37,7 @@ tuple_base_builder<builder, Policy, A>(policy, allocator) {}
|
|
37
37
|
|
38
38
|
template<typename A>
|
39
39
|
array_of_doubles_union_alloc<A> array_of_doubles_union_alloc<A>::builder::build() const {
|
40
|
-
return array_of_doubles_union_alloc<A>(this->starting_lg_size(), this->lg_k_, this->rf_, this->starting_theta(), this->seed_, this->policy_, this->allocator_);
|
40
|
+
return array_of_doubles_union_alloc<A>(this->starting_lg_size(), this->lg_k_, this->rf_, this->p_, this->starting_theta(), this->seed_, this->policy_, this->allocator_);
|
41
41
|
}
|
42
42
|
|
43
43
|
} /* namespace datasketches */
|
@@ -153,8 +153,7 @@ public:
|
|
153
153
|
virtual const_iterator end() const = 0;
|
154
154
|
|
155
155
|
protected:
|
156
|
-
|
157
|
-
virtual void print_specifics(ostrstream& os) const = 0;
|
156
|
+
virtual void print_specifics(std::ostringstream& os) const = 0;
|
158
157
|
|
159
158
|
static uint16_t get_seed_hash(uint64_t seed);
|
160
159
|
|
@@ -325,6 +324,11 @@ public:
|
|
325
324
|
*/
|
326
325
|
void trim();
|
327
326
|
|
327
|
+
/**
|
328
|
+
* Reset the sketch to the initial empty state
|
329
|
+
*/
|
330
|
+
void reset();
|
331
|
+
|
328
332
|
/**
|
329
333
|
* Converts this sketch to a compact sketch (ordered or unordered).
|
330
334
|
* @param ordered optional flag to specify if ordered sketch should be produced
|
@@ -342,10 +346,9 @@ protected:
|
|
342
346
|
tuple_map map_;
|
343
347
|
|
344
348
|
// for builder
|
345
|
-
update_tuple_sketch(uint8_t lg_cur_size, uint8_t lg_nom_size, resize_factor rf, uint64_t theta, uint64_t seed, const Policy& policy, const Allocator& allocator);
|
349
|
+
update_tuple_sketch(uint8_t lg_cur_size, uint8_t lg_nom_size, resize_factor rf, float p, uint64_t theta, uint64_t seed, const Policy& policy, const Allocator& allocator);
|
346
350
|
|
347
|
-
|
348
|
-
virtual void print_specifics(ostrstream& os) const;
|
351
|
+
virtual void print_specifics(std::ostringstream& os) const;
|
349
352
|
};
|
350
353
|
|
351
354
|
// compact sketch
|
@@ -367,9 +370,11 @@ public:
|
|
367
370
|
using vector_bytes = std::vector<uint8_t, AllocBytes>;
|
368
371
|
using comparator = compare_by_key<ExtractKey>;
|
369
372
|
|
370
|
-
static const uint8_t
|
373
|
+
static const uint8_t SERIAL_VERSION_LEGACY = 1;
|
374
|
+
static const uint8_t SERIAL_VERSION = 3;
|
371
375
|
static const uint8_t SKETCH_FAMILY = 9;
|
372
|
-
static const uint8_t SKETCH_TYPE =
|
376
|
+
static const uint8_t SKETCH_TYPE = 1;
|
377
|
+
static const uint8_t SKETCH_TYPE_LEGACY = 5;
|
373
378
|
enum flags { IS_BIG_ENDIAN, IS_READ_ONLY, IS_EMPTY, IS_COMPACT, IS_ORDERED };
|
374
379
|
|
375
380
|
// Instances of this type can be obtained:
|
@@ -473,8 +478,7 @@ protected:
|
|
473
478
|
bool destroy_;
|
474
479
|
};
|
475
480
|
|
476
|
-
|
477
|
-
virtual void print_specifics(ostrstream& os) const;
|
481
|
+
virtual void print_specifics(std::ostringstream& os) const;
|
478
482
|
|
479
483
|
};
|
480
484
|
|
@@ -53,7 +53,9 @@ double tuple_sketch<S, A>::get_upper_bound(uint8_t num_std_devs) const {
|
|
53
53
|
|
54
54
|
template<typename S, typename A>
|
55
55
|
string<A> tuple_sketch<S, A>::to_string(bool detail) const {
|
56
|
-
|
56
|
+
// Using a temporary stream for implementation here does not comply with AllocatorAwareContainer requirements.
|
57
|
+
// The stream does not support passing an allocator instance, and alternatives are complicated.
|
58
|
+
std::ostringstream os;
|
57
59
|
os << "### Tuple sketch summary:" << std::endl;
|
58
60
|
os << " num retained entries : " << get_num_retained() << std::endl;
|
59
61
|
os << " seed hash : " << get_seed_hash() << std::endl;
|
@@ -74,15 +76,15 @@ string<A> tuple_sketch<S, A>::to_string(bool detail) const {
|
|
74
76
|
}
|
75
77
|
os << "### End retained entries" << std::endl;
|
76
78
|
}
|
77
|
-
return os.str();
|
79
|
+
return string<A>(os.str().c_str(), get_allocator());
|
78
80
|
}
|
79
81
|
|
80
82
|
// update sketch
|
81
83
|
|
82
84
|
template<typename S, typename U, typename P, typename A>
|
83
|
-
update_tuple_sketch<S, U, P, A>::update_tuple_sketch(uint8_t lg_cur_size, uint8_t lg_nom_size, resize_factor rf, uint64_t theta, uint64_t seed, const P& policy, const A& allocator):
|
85
|
+
update_tuple_sketch<S, U, P, A>::update_tuple_sketch(uint8_t lg_cur_size, uint8_t lg_nom_size, resize_factor rf, float p, uint64_t theta, uint64_t seed, const P& policy, const A& allocator):
|
84
86
|
policy_(policy),
|
85
|
-
map_(lg_cur_size, lg_nom_size, rf, theta, seed, allocator)
|
87
|
+
map_(lg_cur_size, lg_nom_size, rf, p, theta, seed, allocator)
|
86
88
|
{}
|
87
89
|
|
88
90
|
template<typename S, typename U, typename P, typename A>
|
@@ -97,12 +99,12 @@ bool update_tuple_sketch<S, U, P, A>::is_empty() const {
|
|
97
99
|
|
98
100
|
template<typename S, typename U, typename P, typename A>
|
99
101
|
bool update_tuple_sketch<S, U, P, A>::is_ordered() const {
|
100
|
-
return false
|
102
|
+
return map_.num_entries_ > 1 ? false : true;;
|
101
103
|
}
|
102
104
|
|
103
105
|
template<typename S, typename U, typename P, typename A>
|
104
106
|
uint64_t update_tuple_sketch<S, U, P, A>::get_theta64() const {
|
105
|
-
return map_.theta_;
|
107
|
+
return is_empty() ? theta_constants::MAX_THETA : map_.theta_;
|
106
108
|
}
|
107
109
|
|
108
110
|
template<typename S, typename U, typename P, typename A>
|
@@ -212,6 +214,11 @@ void update_tuple_sketch<S, U, P, A>::trim() {
|
|
212
214
|
map_.trim();
|
213
215
|
}
|
214
216
|
|
217
|
+
template<typename S, typename U, typename P, typename A>
|
218
|
+
void update_tuple_sketch<S, U, P, A>::reset() {
|
219
|
+
map_.reset();
|
220
|
+
}
|
221
|
+
|
215
222
|
template<typename S, typename U, typename P, typename A>
|
216
223
|
auto update_tuple_sketch<S, U, P, A>::begin() -> iterator {
|
217
224
|
return iterator(map_.entries_, 1 << map_.lg_cur_size_, 0);
|
@@ -238,7 +245,7 @@ compact_tuple_sketch<S, A> update_tuple_sketch<S, U, P, A>::compact(bool ordered
|
|
238
245
|
}
|
239
246
|
|
240
247
|
template<typename S, typename U, typename P, typename A>
|
241
|
-
void update_tuple_sketch<S, U, P, A>::print_specifics(
|
248
|
+
void update_tuple_sketch<S, U, P, A>::print_specifics(std::ostringstream& os) const {
|
242
249
|
os << " lg nominal size : " << (int) map_.lg_nom_size_ << std::endl;
|
243
250
|
os << " lg current size : " << (int) map_.lg_cur_size_ << std::endl;
|
244
251
|
os << " resize factor : " << (1 << map_.rf_) << std::endl;
|
@@ -250,7 +257,7 @@ template<typename S, typename A>
|
|
250
257
|
compact_tuple_sketch<S, A>::compact_tuple_sketch(bool is_empty, bool is_ordered, uint16_t seed_hash, uint64_t theta,
|
251
258
|
std::vector<Entry, AllocEntry>&& entries):
|
252
259
|
is_empty_(is_empty),
|
253
|
-
is_ordered_(is_ordered),
|
260
|
+
is_ordered_(is_ordered || (entries.size() <= 1ULL)),
|
254
261
|
seed_hash_(seed_hash),
|
255
262
|
theta_(theta),
|
256
263
|
entries_(std::move(entries))
|
@@ -437,9 +444,15 @@ compact_tuple_sketch<S, A> compact_tuple_sketch<S, A>::deserialize(std::istream&
|
|
437
444
|
read<uint8_t>(is); // unused
|
438
445
|
const auto flags_byte = read<uint8_t>(is);
|
439
446
|
const auto seed_hash = read<uint16_t>(is);
|
440
|
-
|
447
|
+
if (serial_version != SERIAL_VERSION && serial_version != SERIAL_VERSION_LEGACY) {
|
448
|
+
throw std::invalid_argument("serial version mismatch: expected " + std::to_string(SERIAL_VERSION) + " or "
|
449
|
+
+ std::to_string(SERIAL_VERSION_LEGACY) + ", actual " + std::to_string(serial_version));
|
450
|
+
}
|
441
451
|
checker<true>::check_sketch_family(family, SKETCH_FAMILY);
|
442
|
-
|
452
|
+
if (type != SKETCH_TYPE && type != SKETCH_TYPE_LEGACY) {
|
453
|
+
throw std::invalid_argument("sketch type mismatch: expected " + std::to_string(SKETCH_TYPE) + " or "
|
454
|
+
+ std::to_string(SKETCH_TYPE_LEGACY) + ", actual " + std::to_string(type));
|
455
|
+
}
|
443
456
|
const bool is_empty = flags_byte & (1 << flags::IS_EMPTY);
|
444
457
|
if (!is_empty) checker<true>::check_seed_hash(seed_hash, compute_seed_hash(seed));
|
445
458
|
|
@@ -492,9 +505,15 @@ compact_tuple_sketch<S, A> compact_tuple_sketch<S, A>::deserialize(const void* b
|
|
492
505
|
ptr += copy_from_mem(ptr, flags_byte);
|
493
506
|
uint16_t seed_hash;
|
494
507
|
ptr += copy_from_mem(ptr, seed_hash);
|
495
|
-
|
508
|
+
if (serial_version != SERIAL_VERSION && serial_version != SERIAL_VERSION_LEGACY) {
|
509
|
+
throw std::invalid_argument("serial version mismatch: expected " + std::to_string(SERIAL_VERSION) + " or "
|
510
|
+
+ std::to_string(SERIAL_VERSION_LEGACY) + ", actual " + std::to_string(serial_version));
|
511
|
+
}
|
496
512
|
checker<true>::check_sketch_family(family, SKETCH_FAMILY);
|
497
|
-
|
513
|
+
if (type != SKETCH_TYPE && type != SKETCH_TYPE_LEGACY) {
|
514
|
+
throw std::invalid_argument("sketch type mismatch: expected " + std::to_string(SKETCH_TYPE) + " or "
|
515
|
+
+ std::to_string(SKETCH_TYPE_LEGACY) + ", actual " + std::to_string(type));
|
516
|
+
}
|
498
517
|
const bool is_empty = flags_byte & (1 << flags::IS_EMPTY);
|
499
518
|
if (!is_empty) checker<true>::check_seed_hash(seed_hash, compute_seed_hash(seed));
|
500
519
|
|
@@ -554,7 +573,7 @@ auto compact_tuple_sketch<S, A>::end() const -> const_iterator {
|
|
554
573
|
}
|
555
574
|
|
556
575
|
template<typename S, typename A>
|
557
|
-
void compact_tuple_sketch<S, A>::print_specifics(
|
576
|
+
void compact_tuple_sketch<S, A>::print_specifics(std::ostringstream&) const {}
|
558
577
|
|
559
578
|
// builder
|
560
579
|
|
@@ -568,7 +587,7 @@ tuple_base_builder<builder, P, A>(policy, allocator) {}
|
|
568
587
|
|
569
588
|
template<typename S, typename U, typename P, typename A>
|
570
589
|
auto update_tuple_sketch<S, U, P, A>::builder::build() const -> update_tuple_sketch {
|
571
|
-
return update_tuple_sketch(this->starting_lg_size(), this->lg_k_, this->rf_, this->starting_theta(), this->seed_, this->policy_, this->allocator_);
|
590
|
+
return update_tuple_sketch(this->starting_lg_size(), this->lg_k_, this->rf_, this->p_, this->starting_theta(), this->seed_, this->policy_, this->allocator_);
|
572
591
|
}
|
573
592
|
|
574
593
|
} /* namespace datasketches */
|
@@ -80,11 +80,16 @@ public:
|
|
80
80
|
*/
|
81
81
|
CompactSketch get_result(bool ordered = true) const;
|
82
82
|
|
83
|
+
/**
|
84
|
+
* Reset the union to the initial empty state
|
85
|
+
*/
|
86
|
+
void reset();
|
87
|
+
|
83
88
|
protected:
|
84
89
|
State state_;
|
85
90
|
|
86
91
|
// for builder
|
87
|
-
tuple_union(uint8_t lg_cur_size, uint8_t lg_nom_size, resize_factor rf, uint64_t theta, uint64_t seed, const Policy& policy, const Allocator& allocator);
|
92
|
+
tuple_union(uint8_t lg_cur_size, uint8_t lg_nom_size, resize_factor rf, float p, uint64_t theta, uint64_t seed, const Policy& policy, const Allocator& allocator);
|
88
93
|
};
|
89
94
|
|
90
95
|
template<typename S, typename P, typename A>
|
@@ -20,8 +20,8 @@
|
|
20
20
|
namespace datasketches {
|
21
21
|
|
22
22
|
template<typename S, typename P, typename A>
|
23
|
-
tuple_union<S, P, A>::tuple_union(uint8_t lg_cur_size, uint8_t lg_nom_size, resize_factor rf, uint64_t theta, uint64_t seed, const P& policy, const A& allocator):
|
24
|
-
state_(lg_cur_size, lg_nom_size, rf, theta, seed, internal_policy(policy), allocator)
|
23
|
+
tuple_union<S, P, A>::tuple_union(uint8_t lg_cur_size, uint8_t lg_nom_size, resize_factor rf, float p, uint64_t theta, uint64_t seed, const P& policy, const A& allocator):
|
24
|
+
state_(lg_cur_size, lg_nom_size, rf, p, theta, seed, internal_policy(policy), allocator)
|
25
25
|
{}
|
26
26
|
|
27
27
|
template<typename S, typename P, typename A>
|
@@ -35,13 +35,18 @@ auto tuple_union<S, P, A>::get_result(bool ordered) const -> CompactSketch {
|
|
35
35
|
return state_.get_result(ordered);
|
36
36
|
}
|
37
37
|
|
38
|
+
template<typename S, typename P, typename A>
|
39
|
+
void tuple_union<S, P, A>::reset() {
|
40
|
+
return state_.reset();
|
41
|
+
}
|
42
|
+
|
38
43
|
template<typename S, typename P, typename A>
|
39
44
|
tuple_union<S, P, A>::builder::builder(const P& policy, const A& allocator):
|
40
45
|
tuple_base_builder<builder, P, A>(policy, allocator) {}
|
41
46
|
|
42
47
|
template<typename S, typename P, typename A>
|
43
48
|
auto tuple_union<S, P, A>::builder::build() const -> tuple_union {
|
44
|
-
return tuple_union(this->starting_lg_size(), this->lg_k_, this->rf_, this->starting_theta(), this->seed_, this->policy_, this->allocator_);
|
49
|
+
return tuple_union(this->starting_lg_size(), this->lg_k_, this->rf_, this->p_, this->starting_theta(), this->seed_, this->policy_, this->allocator_);
|
45
50
|
}
|
46
51
|
|
47
52
|
} /* namespace datasketches */
|
@@ -36,6 +36,17 @@ const std::string inputPath = TEST_BINARY_INPUT_PATH;
|
|
36
36
|
const std::string inputPath = "test/";
|
37
37
|
#endif
|
38
38
|
|
39
|
+
TEST_CASE("aod sketch: reset", "[tuple_sketch]") {
|
40
|
+
auto update_sketch = update_array_of_doubles_sketch::builder().build();
|
41
|
+
std::vector<double> a = {1};
|
42
|
+
update_sketch.update(1, a);
|
43
|
+
REQUIRE(!update_sketch.is_empty());
|
44
|
+
REQUIRE(update_sketch.get_num_retained() == 1);
|
45
|
+
update_sketch.reset();
|
46
|
+
REQUIRE(update_sketch.is_empty());
|
47
|
+
REQUIRE(update_sketch.get_num_retained() == 0);
|
48
|
+
}
|
49
|
+
|
39
50
|
TEST_CASE("aod sketch: serialization compatibility with java - empty", "[tuple_sketch]") {
|
40
51
|
auto update_sketch = update_array_of_doubles_sketch::builder().build();
|
41
52
|
REQUIRE(update_sketch.is_empty());
|
@@ -263,6 +274,11 @@ TEST_CASE("aod union: half overlap", "[tuple_sketch]") {
|
|
263
274
|
u.update(update_sketch2);
|
264
275
|
auto result = u.get_result();
|
265
276
|
REQUIRE(result.get_estimate() == Approx(1500).margin(0.01));
|
277
|
+
|
278
|
+
u.reset();
|
279
|
+
result = u.get_result();
|
280
|
+
REQUIRE(result.is_empty());
|
281
|
+
REQUIRE(result.get_num_retained() == 0);
|
266
282
|
}
|
267
283
|
|
268
284
|
TEST_CASE("aod intersection: half overlap", "[tuple_sketch]") {
|
@@ -34,7 +34,6 @@ std::ostream& operator<<(std::ostream& os, const three_doubles& tuple) {
|
|
34
34
|
|
35
35
|
#include <catch.hpp>
|
36
36
|
#include <tuple_sketch.hpp>
|
37
|
-
//#include <test_type.hpp>
|
38
37
|
|
39
38
|
namespace datasketches {
|
40
39
|
|
@@ -43,9 +42,11 @@ TEST_CASE("tuple sketch float: builder", "[tuple_sketch]") {
|
|
43
42
|
builder.set_lg_k(10).set_p(0.5f).set_resize_factor(theta_constants::resize_factor::X2).set_seed(123);
|
44
43
|
auto sketch = builder.build();
|
45
44
|
REQUIRE(sketch.get_lg_k() == 10);
|
46
|
-
REQUIRE(sketch.get_theta() == 0
|
45
|
+
REQUIRE(sketch.get_theta() == 1.0); // empty sketch should have theta 1.0
|
47
46
|
REQUIRE(sketch.get_rf() == theta_constants::resize_factor::X2);
|
48
47
|
REQUIRE(sketch.get_seed_hash() == compute_seed_hash(123));
|
48
|
+
sketch.update(1, 0);
|
49
|
+
REQUIRE(sketch.get_theta() == 0.5); // theta = p
|
49
50
|
}
|
50
51
|
|
51
52
|
TEST_CASE("tuple sketch float: empty", "[tuple_sketch]") {
|
@@ -58,7 +59,7 @@ TEST_CASE("tuple sketch float: empty", "[tuple_sketch]") {
|
|
58
59
|
REQUIRE(update_sketch.get_upper_bound(1) == 0);
|
59
60
|
REQUIRE(update_sketch.get_theta() == 1);
|
60
61
|
REQUIRE(update_sketch.get_num_retained() == 0);
|
61
|
-
REQUIRE(
|
62
|
+
REQUIRE(update_sketch.is_ordered());
|
62
63
|
|
63
64
|
auto compact_sketch = update_sketch.compact();
|
64
65
|
std::cout << "sizeof(compact_tuple_sketch<float>)=" << sizeof(compact_sketch) << std::endl;
|
@@ -70,6 +71,33 @@ TEST_CASE("tuple sketch float: empty", "[tuple_sketch]") {
|
|
70
71
|
REQUIRE(compact_sketch.get_theta() == 1);
|
71
72
|
REQUIRE(compact_sketch.get_num_retained() == 0);
|
72
73
|
REQUIRE(compact_sketch.is_ordered());
|
74
|
+
|
75
|
+
// empty is forced to be ordered
|
76
|
+
REQUIRE(update_sketch.compact(false).is_ordered());
|
77
|
+
}
|
78
|
+
|
79
|
+
TEST_CASE("tuple sketch: single item", "[theta_sketch]") {
|
80
|
+
auto update_sketch = update_tuple_sketch<float>::builder().build();
|
81
|
+
update_sketch.update(1, 1.0f);
|
82
|
+
REQUIRE_FALSE(update_sketch.is_empty());
|
83
|
+
REQUIRE_FALSE(update_sketch.is_estimation_mode());
|
84
|
+
REQUIRE(update_sketch.get_theta() == 1.0);
|
85
|
+
REQUIRE(update_sketch.get_estimate() == 1.0);
|
86
|
+
REQUIRE(update_sketch.get_lower_bound(1) == 1.0);
|
87
|
+
REQUIRE(update_sketch.get_upper_bound(1) == 1.0);
|
88
|
+
REQUIRE(update_sketch.is_ordered()); // one item is ordered
|
89
|
+
|
90
|
+
auto compact_sketch = update_sketch.compact();
|
91
|
+
REQUIRE_FALSE(compact_sketch.is_empty());
|
92
|
+
REQUIRE_FALSE(compact_sketch.is_estimation_mode());
|
93
|
+
REQUIRE(compact_sketch.get_theta() == 1.0);
|
94
|
+
REQUIRE(compact_sketch.get_estimate() == 1.0);
|
95
|
+
REQUIRE(compact_sketch.get_lower_bound(1) == 1.0);
|
96
|
+
REQUIRE(compact_sketch.get_upper_bound(1) == 1.0);
|
97
|
+
REQUIRE(compact_sketch.is_ordered());
|
98
|
+
|
99
|
+
// single item is forced to be ordered
|
100
|
+
REQUIRE(update_sketch.compact(false).is_ordered());
|
73
101
|
}
|
74
102
|
|
75
103
|
TEST_CASE("tuple sketch float: exact mode", "[tuple_sketch]") {
|
@@ -78,14 +106,14 @@ TEST_CASE("tuple sketch float: exact mode", "[tuple_sketch]") {
|
|
78
106
|
update_sketch.update(2, 2.0f);
|
79
107
|
update_sketch.update(1, 1.0f);
|
80
108
|
// std::cout << update_sketch.to_string(true);
|
81
|
-
|
82
|
-
|
109
|
+
REQUIRE_FALSE(update_sketch.is_empty());
|
110
|
+
REQUIRE_FALSE(update_sketch.is_estimation_mode());
|
83
111
|
REQUIRE(update_sketch.get_estimate() == 2);
|
84
112
|
REQUIRE(update_sketch.get_lower_bound(1) == 2);
|
85
113
|
REQUIRE(update_sketch.get_upper_bound(1) == 2);
|
86
114
|
REQUIRE(update_sketch.get_theta() == 1);
|
87
115
|
REQUIRE(update_sketch.get_num_retained() == 2);
|
88
|
-
|
116
|
+
REQUIRE_FALSE(update_sketch.is_ordered());
|
89
117
|
int count = 0;
|
90
118
|
for (const auto& entry: update_sketch) {
|
91
119
|
REQUIRE(entry.second == 2);
|
@@ -95,8 +123,8 @@ TEST_CASE("tuple sketch float: exact mode", "[tuple_sketch]") {
|
|
95
123
|
|
96
124
|
auto compact_sketch = update_sketch.compact();
|
97
125
|
// std::cout << compact_sketch.to_string(true);
|
98
|
-
|
99
|
-
|
126
|
+
REQUIRE_FALSE(compact_sketch.is_empty());
|
127
|
+
REQUIRE_FALSE(compact_sketch.is_estimation_mode());
|
100
128
|
REQUIRE(compact_sketch.get_estimate() == 2);
|
101
129
|
REQUIRE(compact_sketch.get_lower_bound(1) == 2);
|
102
130
|
REQUIRE(compact_sketch.get_upper_bound(1) == 2);
|
@@ -151,6 +179,16 @@ TEST_CASE("tuple sketch float: exact mode", "[tuple_sketch]") {
|
|
151
179
|
++it;
|
152
180
|
}
|
153
181
|
}
|
182
|
+
|
183
|
+
update_sketch.reset();
|
184
|
+
REQUIRE(update_sketch.is_empty());
|
185
|
+
REQUIRE_FALSE(update_sketch.is_estimation_mode());
|
186
|
+
REQUIRE(update_sketch.get_estimate() == 0);
|
187
|
+
REQUIRE(update_sketch.get_lower_bound(1) == 0);
|
188
|
+
REQUIRE(update_sketch.get_upper_bound(1) == 0);
|
189
|
+
REQUIRE(update_sketch.get_theta() == 1);
|
190
|
+
REQUIRE(update_sketch.get_num_retained() == 0);
|
191
|
+
REQUIRE(update_sketch.is_ordered());
|
154
192
|
}
|
155
193
|
|
156
194
|
template<typename T>
|
@@ -81,6 +81,13 @@ TEST_CASE("tuple_union float: simple case", "[tuple union]") {
|
|
81
81
|
u.update(update_sketch2);
|
82
82
|
auto result = u.get_result();
|
83
83
|
REQUIRE(result.get_num_retained() == 3);
|
84
|
+
|
85
|
+
u.reset();
|
86
|
+
result = u.get_result();
|
87
|
+
REQUIRE(result.is_empty());
|
88
|
+
REQUIRE(result.get_num_retained() == 0);
|
89
|
+
REQUIRE(!result.is_estimation_mode());
|
90
|
+
REQUIRE(result.get_estimate() == 0);
|
84
91
|
}
|
85
92
|
|
86
93
|
TEST_CASE("tuple_union float: exact mode half overlap", "[tuple union]") {
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: datasketches
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Andrew Kane
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2021-
|
11
|
+
date: 2021-12-28 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rice
|
@@ -51,6 +51,7 @@ files:
|
|
51
51
|
- vendor/datasketches-cpp/MANIFEST.in
|
52
52
|
- vendor/datasketches-cpp/NOTICE
|
53
53
|
- vendor/datasketches-cpp/README.md
|
54
|
+
- vendor/datasketches-cpp/cmake/DataSketchesConfig.cmake.in
|
54
55
|
- vendor/datasketches-cpp/common/CMakeLists.txt
|
55
56
|
- vendor/datasketches-cpp/common/include/MurmurHash3.h
|
56
57
|
- vendor/datasketches-cpp/common/include/binomial_bounds.hpp
|
@@ -256,14 +257,18 @@ files:
|
|
256
257
|
- vendor/datasketches-cpp/theta/test/CMakeLists.txt
|
257
258
|
- vendor/datasketches-cpp/theta/test/theta_a_not_b_test.cpp
|
258
259
|
- vendor/datasketches-cpp/theta/test/theta_compact_empty_from_java.sk
|
260
|
+
- vendor/datasketches-cpp/theta/test/theta_compact_empty_from_java_v1.sk
|
261
|
+
- vendor/datasketches-cpp/theta/test/theta_compact_empty_from_java_v2.sk
|
259
262
|
- vendor/datasketches-cpp/theta/test/theta_compact_estimation_from_java.sk
|
263
|
+
- vendor/datasketches-cpp/theta/test/theta_compact_estimation_from_java_v1.sk
|
264
|
+
- vendor/datasketches-cpp/theta/test/theta_compact_estimation_from_java_v2.sk
|
265
|
+
- vendor/datasketches-cpp/theta/test/theta_compact_exact_from_java.sk
|
260
266
|
- vendor/datasketches-cpp/theta/test/theta_compact_single_item_from_java.sk
|
261
267
|
- vendor/datasketches-cpp/theta/test/theta_intersection_test.cpp
|
262
268
|
- vendor/datasketches-cpp/theta/test/theta_jaccard_similarity_test.cpp
|
269
|
+
- vendor/datasketches-cpp/theta/test/theta_setop_test.cpp
|
263
270
|
- vendor/datasketches-cpp/theta/test/theta_sketch_test.cpp
|
264
271
|
- vendor/datasketches-cpp/theta/test/theta_union_test.cpp
|
265
|
-
- vendor/datasketches-cpp/theta/test/theta_update_empty_from_java.sk
|
266
|
-
- vendor/datasketches-cpp/theta/test/theta_update_estimation_from_java.sk
|
267
272
|
- vendor/datasketches-cpp/tuple/CMakeLists.txt
|
268
273
|
- vendor/datasketches-cpp/tuple/include/array_of_doubles_a_not_b.hpp
|
269
274
|
- vendor/datasketches-cpp/tuple/include/array_of_doubles_a_not_b_impl.hpp
|
@@ -295,7 +300,7 @@ files:
|
|
295
300
|
- vendor/datasketches-cpp/tuple/test/tuple_sketch_allocation_test.cpp
|
296
301
|
- vendor/datasketches-cpp/tuple/test/tuple_sketch_test.cpp
|
297
302
|
- vendor/datasketches-cpp/tuple/test/tuple_union_test.cpp
|
298
|
-
homepage: https://github.com/ankane/datasketches
|
303
|
+
homepage: https://github.com/ankane/datasketches-ruby
|
299
304
|
licenses:
|
300
305
|
- Apache-2.0
|
301
306
|
metadata: {}
|
@@ -314,7 +319,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
314
319
|
- !ruby/object:Gem::Version
|
315
320
|
version: '0'
|
316
321
|
requirements: []
|
317
|
-
rubygems_version: 3.
|
322
|
+
rubygems_version: 3.3.3
|
318
323
|
signing_key:
|
319
324
|
specification_version: 4
|
320
325
|
summary: Sketch data structures for Ruby
|
Binary file
|
Binary file
|