datasketches 0.2.2 → 0.2.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +12 -0
- data/LICENSE +40 -3
- data/NOTICE +1 -1
- data/README.md +8 -8
- data/ext/datasketches/kll_wrapper.cpp +5 -1
- data/ext/datasketches/theta_wrapper.cpp +20 -4
- data/lib/datasketches/version.rb +1 -1
- data/vendor/datasketches-cpp/CMakeLists.txt +27 -5
- data/vendor/datasketches-cpp/LICENSE +40 -3
- data/vendor/datasketches-cpp/MANIFEST.in +3 -0
- data/vendor/datasketches-cpp/NOTICE +1 -1
- data/vendor/datasketches-cpp/README.md +76 -9
- data/vendor/datasketches-cpp/cmake/DataSketchesConfig.cmake.in +10 -0
- data/vendor/datasketches-cpp/common/CMakeLists.txt +18 -13
- data/vendor/datasketches-cpp/common/include/binomial_bounds.hpp +1 -0
- data/vendor/datasketches-cpp/common/include/common_defs.hpp +16 -0
- data/vendor/datasketches-cpp/{kll → common}/include/kolmogorov_smirnov.hpp +5 -3
- data/vendor/datasketches-cpp/{kll → common}/include/kolmogorov_smirnov_impl.hpp +13 -16
- data/vendor/datasketches-cpp/common/include/quantile_sketch_sorted_view.hpp +121 -0
- data/vendor/datasketches-cpp/common/include/quantile_sketch_sorted_view_impl.hpp +91 -0
- data/vendor/datasketches-cpp/common/test/test_type.hpp +2 -0
- data/vendor/datasketches-cpp/cpc/CMakeLists.txt +15 -35
- data/vendor/datasketches-cpp/cpc/include/cpc_common.hpp +10 -3
- data/vendor/datasketches-cpp/cpc/include/cpc_compressor_impl.hpp +1 -0
- data/vendor/datasketches-cpp/cpc/include/cpc_confidence.hpp +1 -0
- data/vendor/datasketches-cpp/cpc/include/cpc_sketch.hpp +1 -1
- data/vendor/datasketches-cpp/cpc/include/cpc_sketch_impl.hpp +5 -3
- data/vendor/datasketches-cpp/cpc/include/cpc_union.hpp +1 -1
- data/vendor/datasketches-cpp/cpc/include/cpc_union_impl.hpp +10 -6
- data/vendor/datasketches-cpp/cpc/test/cpc_sketch_allocation_test.cpp +17 -0
- data/vendor/datasketches-cpp/cpc/test/cpc_sketch_test.cpp +1 -0
- data/vendor/datasketches-cpp/cpc/test/cpc_union_test.cpp +2 -0
- data/vendor/datasketches-cpp/fi/CMakeLists.txt +5 -15
- data/vendor/datasketches-cpp/fi/include/frequent_items_sketch.hpp +37 -5
- data/vendor/datasketches-cpp/fi/include/frequent_items_sketch_impl.hpp +30 -12
- data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_custom_type_test.cpp +2 -1
- data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_test.cpp +1 -0
- data/vendor/datasketches-cpp/hll/CMakeLists.txt +33 -56
- data/vendor/datasketches-cpp/hll/include/AuxHashMap-internal.hpp +2 -0
- data/vendor/datasketches-cpp/hll/include/CompositeInterpolationXTable-internal.hpp +1 -0
- data/vendor/datasketches-cpp/hll/include/CouponHashSet-internal.hpp +2 -2
- data/vendor/datasketches-cpp/hll/include/CouponList-internal.hpp +1 -0
- data/vendor/datasketches-cpp/hll/include/HllSketch-internal.hpp +6 -4
- data/vendor/datasketches-cpp/hll/include/HllSketchImpl-internal.hpp +2 -0
- data/vendor/datasketches-cpp/hll/include/HllSketchImplFactory.hpp +2 -0
- data/vendor/datasketches-cpp/hll/test/AuxHashMapTest.cpp +1 -0
- data/vendor/datasketches-cpp/hll/test/CouponHashSetTest.cpp +1 -0
- data/vendor/datasketches-cpp/hll/test/CouponListTest.cpp +2 -0
- data/vendor/datasketches-cpp/hll/test/HllArrayTest.cpp +1 -0
- data/vendor/datasketches-cpp/hll/test/HllSketchTest.cpp +59 -0
- data/vendor/datasketches-cpp/hll/test/HllUnionTest.cpp +2 -0
- data/vendor/datasketches-cpp/hll/test/TablesTest.cpp +1 -0
- data/vendor/datasketches-cpp/kll/CMakeLists.txt +5 -19
- data/vendor/datasketches-cpp/kll/include/kll_helper.hpp +0 -4
- data/vendor/datasketches-cpp/kll/include/kll_helper_impl.hpp +3 -0
- data/vendor/datasketches-cpp/kll/include/kll_sketch.hpp +103 -44
- data/vendor/datasketches-cpp/kll/include/kll_sketch_impl.hpp +110 -130
- data/vendor/datasketches-cpp/kll/test/kll_sketch_test.cpp +156 -23
- data/vendor/datasketches-cpp/kll/test/kolmogorov_smirnov_test.cpp +1 -1
- data/vendor/datasketches-cpp/pyproject.toml +4 -2
- data/vendor/datasketches-cpp/python/CMakeLists.txt +17 -6
- data/vendor/datasketches-cpp/python/README.md +57 -50
- data/vendor/datasketches-cpp/python/pybind11Path.cmd +3 -0
- data/vendor/datasketches-cpp/python/src/cpc_wrapper.cpp +1 -1
- data/vendor/datasketches-cpp/python/src/datasketches.cpp +4 -0
- data/vendor/datasketches-cpp/python/src/fi_wrapper.cpp +6 -1
- data/vendor/datasketches-cpp/python/src/kll_wrapper.cpp +49 -14
- data/vendor/datasketches-cpp/python/src/ks_wrapper.cpp +68 -0
- data/vendor/datasketches-cpp/python/src/quantiles_wrapper.cpp +240 -0
- data/vendor/datasketches-cpp/python/src/req_wrapper.cpp +9 -2
- data/vendor/datasketches-cpp/python/src/theta_wrapper.cpp +2 -2
- data/vendor/datasketches-cpp/python/src/vector_of_kll.cpp +12 -5
- data/vendor/datasketches-cpp/python/tests/kll_test.py +12 -6
- data/vendor/datasketches-cpp/python/tests/quantiles_test.py +126 -0
- data/vendor/datasketches-cpp/python/tests/req_test.py +2 -2
- data/vendor/datasketches-cpp/python/tests/vector_of_kll_test.py +4 -4
- data/vendor/datasketches-cpp/quantiles/CMakeLists.txt +42 -0
- data/vendor/datasketches-cpp/quantiles/include/quantiles_sketch.hpp +641 -0
- data/vendor/datasketches-cpp/quantiles/include/quantiles_sketch_impl.hpp +1309 -0
- data/vendor/datasketches-cpp/quantiles/test/CMakeLists.txt +44 -0
- data/vendor/datasketches-cpp/quantiles/test/Qk128_n1000_v0.3.0.sk +0 -0
- data/vendor/datasketches-cpp/quantiles/test/Qk128_n1000_v0.6.0.sk +0 -0
- data/vendor/datasketches-cpp/quantiles/test/Qk128_n1000_v0.8.0.sk +0 -0
- data/vendor/datasketches-cpp/quantiles/test/Qk128_n1000_v0.8.3.sk +0 -0
- data/vendor/datasketches-cpp/quantiles/test/Qk128_n50_v0.3.0.sk +0 -0
- data/vendor/datasketches-cpp/quantiles/test/Qk128_n50_v0.6.0.sk +0 -0
- data/vendor/datasketches-cpp/quantiles/test/Qk128_n50_v0.8.0.sk +0 -0
- data/vendor/datasketches-cpp/quantiles/test/Qk128_n50_v0.8.3.sk +0 -0
- data/vendor/datasketches-cpp/quantiles/test/kolmogorov_smirnov_test.cpp +110 -0
- data/vendor/datasketches-cpp/quantiles/test/quantiles_compatibility_test.cpp +129 -0
- data/vendor/datasketches-cpp/quantiles/test/quantiles_sketch_test.cpp +912 -0
- data/vendor/datasketches-cpp/req/CMakeLists.txt +6 -21
- data/vendor/datasketches-cpp/req/include/req_common.hpp +0 -5
- data/vendor/datasketches-cpp/req/include/req_compactor_impl.hpp +3 -2
- data/vendor/datasketches-cpp/req/include/req_sketch.hpp +62 -23
- data/vendor/datasketches-cpp/req/include/req_sketch_impl.hpp +66 -61
- data/vendor/datasketches-cpp/req/test/req_sketch_test.cpp +5 -0
- data/vendor/datasketches-cpp/sampling/CMakeLists.txt +5 -9
- data/vendor/datasketches-cpp/sampling/include/var_opt_sketch.hpp +54 -12
- data/vendor/datasketches-cpp/sampling/include/var_opt_sketch_impl.hpp +45 -34
- data/vendor/datasketches-cpp/sampling/include/var_opt_union.hpp +41 -6
- data/vendor/datasketches-cpp/sampling/include/var_opt_union_impl.hpp +33 -15
- data/vendor/datasketches-cpp/sampling/test/var_opt_allocation_test.cpp +2 -2
- data/vendor/datasketches-cpp/sampling/test/var_opt_sketch_test.cpp +1 -0
- data/vendor/datasketches-cpp/sampling/test/var_opt_union_test.cpp +1 -0
- data/vendor/datasketches-cpp/setup.py +10 -7
- data/vendor/datasketches-cpp/theta/CMakeLists.txt +26 -45
- data/vendor/datasketches-cpp/theta/include/bounds_on_ratios_in_sampled_sets.hpp +1 -0
- data/vendor/datasketches-cpp/theta/include/compact_theta_sketch_parser_impl.hpp +92 -23
- data/vendor/datasketches-cpp/theta/include/theta_constants.hpp +9 -4
- data/vendor/datasketches-cpp/theta/include/theta_helpers.hpp +15 -0
- data/vendor/datasketches-cpp/theta/include/theta_intersection_base_impl.hpp +7 -6
- data/vendor/datasketches-cpp/theta/include/theta_set_difference_base_impl.hpp +3 -2
- data/vendor/datasketches-cpp/theta/include/theta_sketch.hpp +32 -15
- data/vendor/datasketches-cpp/theta/include/theta_sketch_impl.hpp +150 -93
- data/vendor/datasketches-cpp/theta/include/theta_union.hpp +6 -1
- data/vendor/datasketches-cpp/theta/include/theta_union_base.hpp +3 -1
- data/vendor/datasketches-cpp/theta/include/theta_union_base_impl.hpp +9 -2
- data/vendor/datasketches-cpp/theta/include/theta_union_impl.hpp +8 -5
- data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base.hpp +9 -5
- data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base_impl.hpp +39 -10
- data/vendor/datasketches-cpp/theta/test/CMakeLists.txt +1 -0
- data/vendor/datasketches-cpp/theta/test/theta_a_not_b_test.cpp +2 -0
- data/vendor/datasketches-cpp/theta/test/theta_compact_empty_from_java_v1.sk +0 -0
- data/vendor/datasketches-cpp/theta/test/theta_compact_empty_from_java_v2.sk +0 -0
- data/vendor/datasketches-cpp/theta/test/theta_compact_estimation_from_java_v1.sk +0 -0
- data/vendor/datasketches-cpp/theta/test/theta_compact_estimation_from_java_v2.sk +0 -0
- data/vendor/datasketches-cpp/theta/test/theta_compact_exact_from_java.sk +0 -0
- data/vendor/datasketches-cpp/theta/test/theta_intersection_test.cpp +2 -0
- data/vendor/datasketches-cpp/theta/test/theta_setop_test.cpp +446 -0
- data/vendor/datasketches-cpp/theta/test/theta_sketch_test.cpp +429 -1
- data/vendor/datasketches-cpp/theta/test/theta_union_test.cpp +25 -11
- data/vendor/datasketches-cpp/tuple/CMakeLists.txt +18 -33
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_sketch.hpp +1 -1
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_sketch_impl.hpp +3 -3
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_union.hpp +1 -1
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_union_impl.hpp +3 -3
- data/vendor/datasketches-cpp/tuple/include/tuple_sketch.hpp +29 -9
- data/vendor/datasketches-cpp/tuple/include/tuple_sketch_impl.hpp +34 -14
- data/vendor/datasketches-cpp/tuple/include/tuple_union.hpp +6 -1
- data/vendor/datasketches-cpp/tuple/include/tuple_union_impl.hpp +8 -3
- data/vendor/datasketches-cpp/tuple/test/array_of_doubles_sketch_test.cpp +16 -0
- data/vendor/datasketches-cpp/tuple/test/tuple_a_not_b_test.cpp +1 -0
- data/vendor/datasketches-cpp/tuple/test/tuple_intersection_test.cpp +1 -0
- data/vendor/datasketches-cpp/tuple/test/tuple_sketch_test.cpp +46 -8
- data/vendor/datasketches-cpp/tuple/test/tuple_union_test.cpp +8 -0
- metadata +34 -12
- data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator.hpp +0 -75
- data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator_impl.hpp +0 -184
- data/vendor/datasketches-cpp/req/include/req_quantile_calculator.hpp +0 -69
- data/vendor/datasketches-cpp/req/include/req_quantile_calculator_impl.hpp +0 -60
- data/vendor/datasketches-cpp/theta/test/theta_update_empty_from_java.sk +0 -0
- data/vendor/datasketches-cpp/theta/test/theta_update_estimation_from_java.sk +0 -0
|
@@ -122,7 +122,7 @@ public:
|
|
|
122
122
|
|
|
123
123
|
private:
|
|
124
124
|
// for builder
|
|
125
|
-
update_array_of_doubles_sketch_alloc(uint8_t lg_cur_size, uint8_t lg_nom_size, resize_factor rf, uint64_t theta,
|
|
125
|
+
update_array_of_doubles_sketch_alloc(uint8_t lg_cur_size, uint8_t lg_nom_size, resize_factor rf, float p, uint64_t theta,
|
|
126
126
|
uint64_t seed, const array_of_doubles_update_policy<A>& policy, const A& allocator);
|
|
127
127
|
};
|
|
128
128
|
|
|
@@ -21,8 +21,8 @@ namespace datasketches {
|
|
|
21
21
|
|
|
22
22
|
template<typename A>
|
|
23
23
|
update_array_of_doubles_sketch_alloc<A>::update_array_of_doubles_sketch_alloc(uint8_t lg_cur_size, uint8_t lg_nom_size, resize_factor rf,
|
|
24
|
-
uint64_t theta, uint64_t seed, const array_of_doubles_update_policy<A>& policy, const A& allocator):
|
|
25
|
-
Base(lg_cur_size, lg_nom_size, rf, theta, seed, policy, allocator) {}
|
|
24
|
+
float p, uint64_t theta, uint64_t seed, const array_of_doubles_update_policy<A>& policy, const A& allocator):
|
|
25
|
+
Base(lg_cur_size, lg_nom_size, rf, p, theta, seed, policy, allocator) {}
|
|
26
26
|
|
|
27
27
|
|
|
28
28
|
template<typename A>
|
|
@@ -43,7 +43,7 @@ tuple_base_builder<builder, array_of_doubles_update_policy<A>, A>(policy, alloca
|
|
|
43
43
|
|
|
44
44
|
template<typename A>
|
|
45
45
|
update_array_of_doubles_sketch_alloc<A> update_array_of_doubles_sketch_alloc<A>::builder::build() const {
|
|
46
|
-
return update_array_of_doubles_sketch_alloc<A>(this->starting_lg_size(), this->lg_k_, this->rf_, this->starting_theta(), this->seed_, this->policy_, this->allocator_);
|
|
46
|
+
return update_array_of_doubles_sketch_alloc<A>(this->starting_lg_size(), this->lg_k_, this->rf_, this->p_, this->starting_theta(), this->seed_, this->policy_, this->allocator_);
|
|
47
47
|
}
|
|
48
48
|
|
|
49
49
|
// compact sketch
|
|
@@ -61,7 +61,7 @@ public:
|
|
|
61
61
|
|
|
62
62
|
private:
|
|
63
63
|
// for builder
|
|
64
|
-
array_of_doubles_union_alloc(uint8_t lg_cur_size, uint8_t lg_nom_size, resize_factor rf, uint64_t theta, uint64_t seed, const Policy& policy, const Allocator& allocator);
|
|
64
|
+
array_of_doubles_union_alloc(uint8_t lg_cur_size, uint8_t lg_nom_size, resize_factor rf, float p, uint64_t theta, uint64_t seed, const Policy& policy, const Allocator& allocator);
|
|
65
65
|
};
|
|
66
66
|
|
|
67
67
|
template<typename Allocator>
|
|
@@ -20,8 +20,8 @@
|
|
|
20
20
|
namespace datasketches {
|
|
21
21
|
|
|
22
22
|
template<typename A>
|
|
23
|
-
array_of_doubles_union_alloc<A>::array_of_doubles_union_alloc(uint8_t lg_cur_size, uint8_t lg_nom_size, resize_factor rf, uint64_t theta, uint64_t seed, const Policy& policy, const A& allocator):
|
|
24
|
-
Base(lg_cur_size, lg_nom_size, rf, theta, seed, policy, allocator)
|
|
23
|
+
array_of_doubles_union_alloc<A>::array_of_doubles_union_alloc(uint8_t lg_cur_size, uint8_t lg_nom_size, resize_factor rf, float p, uint64_t theta, uint64_t seed, const Policy& policy, const A& allocator):
|
|
24
|
+
Base(lg_cur_size, lg_nom_size, rf, p, theta, seed, policy, allocator)
|
|
25
25
|
{}
|
|
26
26
|
|
|
27
27
|
template<typename A>
|
|
@@ -37,7 +37,7 @@ tuple_base_builder<builder, Policy, A>(policy, allocator) {}
|
|
|
37
37
|
|
|
38
38
|
template<typename A>
|
|
39
39
|
array_of_doubles_union_alloc<A> array_of_doubles_union_alloc<A>::builder::build() const {
|
|
40
|
-
return array_of_doubles_union_alloc<A>(this->starting_lg_size(), this->lg_k_, this->rf_, this->starting_theta(), this->seed_, this->policy_, this->allocator_);
|
|
40
|
+
return array_of_doubles_union_alloc<A>(this->starting_lg_size(), this->lg_k_, this->rf_, this->p_, this->starting_theta(), this->seed_, this->policy_, this->allocator_);
|
|
41
41
|
}
|
|
42
42
|
|
|
43
43
|
} /* namespace datasketches */
|
|
@@ -153,8 +153,7 @@ public:
|
|
|
153
153
|
virtual const_iterator end() const = 0;
|
|
154
154
|
|
|
155
155
|
protected:
|
|
156
|
-
|
|
157
|
-
virtual void print_specifics(ostrstream& os) const = 0;
|
|
156
|
+
virtual void print_specifics(std::ostringstream& os) const = 0;
|
|
158
157
|
|
|
159
158
|
static uint16_t get_seed_hash(uint64_t seed);
|
|
160
159
|
|
|
@@ -325,6 +324,11 @@ public:
|
|
|
325
324
|
*/
|
|
326
325
|
void trim();
|
|
327
326
|
|
|
327
|
+
/**
|
|
328
|
+
* Reset the sketch to the initial empty state
|
|
329
|
+
*/
|
|
330
|
+
void reset();
|
|
331
|
+
|
|
328
332
|
/**
|
|
329
333
|
* Converts this sketch to a compact sketch (ordered or unordered).
|
|
330
334
|
* @param ordered optional flag to specify if ordered sketch should be produced
|
|
@@ -342,10 +346,9 @@ protected:
|
|
|
342
346
|
tuple_map map_;
|
|
343
347
|
|
|
344
348
|
// for builder
|
|
345
|
-
update_tuple_sketch(uint8_t lg_cur_size, uint8_t lg_nom_size, resize_factor rf, uint64_t theta, uint64_t seed, const Policy& policy, const Allocator& allocator);
|
|
349
|
+
update_tuple_sketch(uint8_t lg_cur_size, uint8_t lg_nom_size, resize_factor rf, float p, uint64_t theta, uint64_t seed, const Policy& policy, const Allocator& allocator);
|
|
346
350
|
|
|
347
|
-
|
|
348
|
-
virtual void print_specifics(ostrstream& os) const;
|
|
351
|
+
virtual void print_specifics(std::ostringstream& os) const;
|
|
349
352
|
};
|
|
350
353
|
|
|
351
354
|
// compact sketch
|
|
@@ -367,9 +370,11 @@ public:
|
|
|
367
370
|
using vector_bytes = std::vector<uint8_t, AllocBytes>;
|
|
368
371
|
using comparator = compare_by_key<ExtractKey>;
|
|
369
372
|
|
|
370
|
-
static const uint8_t
|
|
373
|
+
static const uint8_t SERIAL_VERSION_LEGACY = 1;
|
|
374
|
+
static const uint8_t SERIAL_VERSION = 3;
|
|
371
375
|
static const uint8_t SKETCH_FAMILY = 9;
|
|
372
|
-
static const uint8_t SKETCH_TYPE =
|
|
376
|
+
static const uint8_t SKETCH_TYPE = 1;
|
|
377
|
+
static const uint8_t SKETCH_TYPE_LEGACY = 5;
|
|
373
378
|
enum flags { IS_BIG_ENDIAN, IS_READ_ONLY, IS_EMPTY, IS_COMPACT, IS_ORDERED };
|
|
374
379
|
|
|
375
380
|
// Instances of this type can be obtained:
|
|
@@ -393,9 +398,23 @@ public:
|
|
|
393
398
|
virtual uint32_t get_num_retained() const;
|
|
394
399
|
virtual uint16_t get_seed_hash() const;
|
|
395
400
|
|
|
401
|
+
/**
|
|
402
|
+
* This method serializes the sketch into a given stream in a binary form
|
|
403
|
+
* @param os output stream
|
|
404
|
+
* @param instance of a SerDe
|
|
405
|
+
*/
|
|
396
406
|
template<typename SerDe = serde<Summary>>
|
|
397
407
|
void serialize(std::ostream& os, const SerDe& sd = SerDe()) const;
|
|
398
408
|
|
|
409
|
+
/**
|
|
410
|
+
* This method serializes the sketch as a vector of bytes.
|
|
411
|
+
* An optional header can be reserved in front of the sketch.
|
|
412
|
+
* It is a blank space of a given size.
|
|
413
|
+
* This header is used in Datasketches PostgreSQL extension.
|
|
414
|
+
* @param header_size_bytes space to reserve in front of the sketch
|
|
415
|
+
* @param instance of a SerDe
|
|
416
|
+
* @return serialized sketch as a vector of bytes
|
|
417
|
+
*/
|
|
399
418
|
template<typename SerDe = serde<Summary>>
|
|
400
419
|
vector_bytes serialize(unsigned header_size_bytes = 0, const SerDe& sd = SerDe()) const;
|
|
401
420
|
|
|
@@ -409,6 +428,7 @@ public:
|
|
|
409
428
|
* @param is input stream
|
|
410
429
|
* @param seed the seed for the hash function that was used to create the sketch
|
|
411
430
|
* @param instance of a SerDe
|
|
431
|
+
* @param instance of an Allocator
|
|
412
432
|
* @return an instance of a sketch
|
|
413
433
|
*/
|
|
414
434
|
template<typename SerDe = serde<Summary>>
|
|
@@ -421,6 +441,7 @@ public:
|
|
|
421
441
|
* @param size the size of the array
|
|
422
442
|
* @param seed the seed for the hash function that was used to create the sketch
|
|
423
443
|
* @param instance of a SerDe
|
|
444
|
+
* @param instance of an Allocator
|
|
424
445
|
* @return an instance of the sketch
|
|
425
446
|
*/
|
|
426
447
|
template<typename SerDe = serde<Summary>>
|
|
@@ -473,8 +494,7 @@ protected:
|
|
|
473
494
|
bool destroy_;
|
|
474
495
|
};
|
|
475
496
|
|
|
476
|
-
|
|
477
|
-
virtual void print_specifics(ostrstream& os) const;
|
|
497
|
+
virtual void print_specifics(std::ostringstream& os) const;
|
|
478
498
|
|
|
479
499
|
};
|
|
480
500
|
|
|
@@ -18,6 +18,7 @@
|
|
|
18
18
|
*/
|
|
19
19
|
|
|
20
20
|
#include <sstream>
|
|
21
|
+
#include <stdexcept>
|
|
21
22
|
|
|
22
23
|
#include "binomial_bounds.hpp"
|
|
23
24
|
#include "theta_helpers.hpp"
|
|
@@ -53,7 +54,9 @@ double tuple_sketch<S, A>::get_upper_bound(uint8_t num_std_devs) const {
|
|
|
53
54
|
|
|
54
55
|
template<typename S, typename A>
|
|
55
56
|
string<A> tuple_sketch<S, A>::to_string(bool detail) const {
|
|
56
|
-
|
|
57
|
+
// Using a temporary stream for implementation here does not comply with AllocatorAwareContainer requirements.
|
|
58
|
+
// The stream does not support passing an allocator instance, and alternatives are complicated.
|
|
59
|
+
std::ostringstream os;
|
|
57
60
|
os << "### Tuple sketch summary:" << std::endl;
|
|
58
61
|
os << " num retained entries : " << get_num_retained() << std::endl;
|
|
59
62
|
os << " seed hash : " << get_seed_hash() << std::endl;
|
|
@@ -74,15 +77,15 @@ string<A> tuple_sketch<S, A>::to_string(bool detail) const {
|
|
|
74
77
|
}
|
|
75
78
|
os << "### End retained entries" << std::endl;
|
|
76
79
|
}
|
|
77
|
-
return os.str();
|
|
80
|
+
return string<A>(os.str().c_str(), get_allocator());
|
|
78
81
|
}
|
|
79
82
|
|
|
80
83
|
// update sketch
|
|
81
84
|
|
|
82
85
|
template<typename S, typename U, typename P, typename A>
|
|
83
|
-
update_tuple_sketch<S, U, P, A>::update_tuple_sketch(uint8_t lg_cur_size, uint8_t lg_nom_size, resize_factor rf, uint64_t theta, uint64_t seed, const P& policy, const A& allocator):
|
|
86
|
+
update_tuple_sketch<S, U, P, A>::update_tuple_sketch(uint8_t lg_cur_size, uint8_t lg_nom_size, resize_factor rf, float p, uint64_t theta, uint64_t seed, const P& policy, const A& allocator):
|
|
84
87
|
policy_(policy),
|
|
85
|
-
map_(lg_cur_size, lg_nom_size, rf, theta, seed, allocator)
|
|
88
|
+
map_(lg_cur_size, lg_nom_size, rf, p, theta, seed, allocator)
|
|
86
89
|
{}
|
|
87
90
|
|
|
88
91
|
template<typename S, typename U, typename P, typename A>
|
|
@@ -97,12 +100,12 @@ bool update_tuple_sketch<S, U, P, A>::is_empty() const {
|
|
|
97
100
|
|
|
98
101
|
template<typename S, typename U, typename P, typename A>
|
|
99
102
|
bool update_tuple_sketch<S, U, P, A>::is_ordered() const {
|
|
100
|
-
return false
|
|
103
|
+
return map_.num_entries_ > 1 ? false : true;;
|
|
101
104
|
}
|
|
102
105
|
|
|
103
106
|
template<typename S, typename U, typename P, typename A>
|
|
104
107
|
uint64_t update_tuple_sketch<S, U, P, A>::get_theta64() const {
|
|
105
|
-
return map_.theta_;
|
|
108
|
+
return is_empty() ? theta_constants::MAX_THETA : map_.theta_;
|
|
106
109
|
}
|
|
107
110
|
|
|
108
111
|
template<typename S, typename U, typename P, typename A>
|
|
@@ -212,6 +215,11 @@ void update_tuple_sketch<S, U, P, A>::trim() {
|
|
|
212
215
|
map_.trim();
|
|
213
216
|
}
|
|
214
217
|
|
|
218
|
+
template<typename S, typename U, typename P, typename A>
|
|
219
|
+
void update_tuple_sketch<S, U, P, A>::reset() {
|
|
220
|
+
map_.reset();
|
|
221
|
+
}
|
|
222
|
+
|
|
215
223
|
template<typename S, typename U, typename P, typename A>
|
|
216
224
|
auto update_tuple_sketch<S, U, P, A>::begin() -> iterator {
|
|
217
225
|
return iterator(map_.entries_, 1 << map_.lg_cur_size_, 0);
|
|
@@ -238,7 +246,7 @@ compact_tuple_sketch<S, A> update_tuple_sketch<S, U, P, A>::compact(bool ordered
|
|
|
238
246
|
}
|
|
239
247
|
|
|
240
248
|
template<typename S, typename U, typename P, typename A>
|
|
241
|
-
void update_tuple_sketch<S, U, P, A>::print_specifics(
|
|
249
|
+
void update_tuple_sketch<S, U, P, A>::print_specifics(std::ostringstream& os) const {
|
|
242
250
|
os << " lg nominal size : " << (int) map_.lg_nom_size_ << std::endl;
|
|
243
251
|
os << " lg current size : " << (int) map_.lg_cur_size_ << std::endl;
|
|
244
252
|
os << " resize factor : " << (1 << map_.rf_) << std::endl;
|
|
@@ -250,7 +258,7 @@ template<typename S, typename A>
|
|
|
250
258
|
compact_tuple_sketch<S, A>::compact_tuple_sketch(bool is_empty, bool is_ordered, uint16_t seed_hash, uint64_t theta,
|
|
251
259
|
std::vector<Entry, AllocEntry>&& entries):
|
|
252
260
|
is_empty_(is_empty),
|
|
253
|
-
is_ordered_(is_ordered),
|
|
261
|
+
is_ordered_(is_ordered || (entries.size() <= 1ULL)),
|
|
254
262
|
seed_hash_(seed_hash),
|
|
255
263
|
theta_(theta),
|
|
256
264
|
entries_(std::move(entries))
|
|
@@ -437,9 +445,15 @@ compact_tuple_sketch<S, A> compact_tuple_sketch<S, A>::deserialize(std::istream&
|
|
|
437
445
|
read<uint8_t>(is); // unused
|
|
438
446
|
const auto flags_byte = read<uint8_t>(is);
|
|
439
447
|
const auto seed_hash = read<uint16_t>(is);
|
|
440
|
-
|
|
448
|
+
if (serial_version != SERIAL_VERSION && serial_version != SERIAL_VERSION_LEGACY) {
|
|
449
|
+
throw std::invalid_argument("serial version mismatch: expected " + std::to_string(SERIAL_VERSION) + " or "
|
|
450
|
+
+ std::to_string(SERIAL_VERSION_LEGACY) + ", actual " + std::to_string(serial_version));
|
|
451
|
+
}
|
|
441
452
|
checker<true>::check_sketch_family(family, SKETCH_FAMILY);
|
|
442
|
-
|
|
453
|
+
if (type != SKETCH_TYPE && type != SKETCH_TYPE_LEGACY) {
|
|
454
|
+
throw std::invalid_argument("sketch type mismatch: expected " + std::to_string(SKETCH_TYPE) + " or "
|
|
455
|
+
+ std::to_string(SKETCH_TYPE_LEGACY) + ", actual " + std::to_string(type));
|
|
456
|
+
}
|
|
443
457
|
const bool is_empty = flags_byte & (1 << flags::IS_EMPTY);
|
|
444
458
|
if (!is_empty) checker<true>::check_seed_hash(seed_hash, compute_seed_hash(seed));
|
|
445
459
|
|
|
@@ -492,9 +506,15 @@ compact_tuple_sketch<S, A> compact_tuple_sketch<S, A>::deserialize(const void* b
|
|
|
492
506
|
ptr += copy_from_mem(ptr, flags_byte);
|
|
493
507
|
uint16_t seed_hash;
|
|
494
508
|
ptr += copy_from_mem(ptr, seed_hash);
|
|
495
|
-
|
|
509
|
+
if (serial_version != SERIAL_VERSION && serial_version != SERIAL_VERSION_LEGACY) {
|
|
510
|
+
throw std::invalid_argument("serial version mismatch: expected " + std::to_string(SERIAL_VERSION) + " or "
|
|
511
|
+
+ std::to_string(SERIAL_VERSION_LEGACY) + ", actual " + std::to_string(serial_version));
|
|
512
|
+
}
|
|
496
513
|
checker<true>::check_sketch_family(family, SKETCH_FAMILY);
|
|
497
|
-
|
|
514
|
+
if (type != SKETCH_TYPE && type != SKETCH_TYPE_LEGACY) {
|
|
515
|
+
throw std::invalid_argument("sketch type mismatch: expected " + std::to_string(SKETCH_TYPE) + " or "
|
|
516
|
+
+ std::to_string(SKETCH_TYPE_LEGACY) + ", actual " + std::to_string(type));
|
|
517
|
+
}
|
|
498
518
|
const bool is_empty = flags_byte & (1 << flags::IS_EMPTY);
|
|
499
519
|
if (!is_empty) checker<true>::check_seed_hash(seed_hash, compute_seed_hash(seed));
|
|
500
520
|
|
|
@@ -554,7 +574,7 @@ auto compact_tuple_sketch<S, A>::end() const -> const_iterator {
|
|
|
554
574
|
}
|
|
555
575
|
|
|
556
576
|
template<typename S, typename A>
|
|
557
|
-
void compact_tuple_sketch<S, A>::print_specifics(
|
|
577
|
+
void compact_tuple_sketch<S, A>::print_specifics(std::ostringstream&) const {}
|
|
558
578
|
|
|
559
579
|
// builder
|
|
560
580
|
|
|
@@ -568,7 +588,7 @@ tuple_base_builder<builder, P, A>(policy, allocator) {}
|
|
|
568
588
|
|
|
569
589
|
template<typename S, typename U, typename P, typename A>
|
|
570
590
|
auto update_tuple_sketch<S, U, P, A>::builder::build() const -> update_tuple_sketch {
|
|
571
|
-
return update_tuple_sketch(this->starting_lg_size(), this->lg_k_, this->rf_, this->starting_theta(), this->seed_, this->policy_, this->allocator_);
|
|
591
|
+
return update_tuple_sketch(this->starting_lg_size(), this->lg_k_, this->rf_, this->p_, this->starting_theta(), this->seed_, this->policy_, this->allocator_);
|
|
572
592
|
}
|
|
573
593
|
|
|
574
594
|
} /* namespace datasketches */
|
|
@@ -80,11 +80,16 @@ public:
|
|
|
80
80
|
*/
|
|
81
81
|
CompactSketch get_result(bool ordered = true) const;
|
|
82
82
|
|
|
83
|
+
/**
|
|
84
|
+
* Reset the union to the initial empty state
|
|
85
|
+
*/
|
|
86
|
+
void reset();
|
|
87
|
+
|
|
83
88
|
protected:
|
|
84
89
|
State state_;
|
|
85
90
|
|
|
86
91
|
// for builder
|
|
87
|
-
tuple_union(uint8_t lg_cur_size, uint8_t lg_nom_size, resize_factor rf, uint64_t theta, uint64_t seed, const Policy& policy, const Allocator& allocator);
|
|
92
|
+
tuple_union(uint8_t lg_cur_size, uint8_t lg_nom_size, resize_factor rf, float p, uint64_t theta, uint64_t seed, const Policy& policy, const Allocator& allocator);
|
|
88
93
|
};
|
|
89
94
|
|
|
90
95
|
template<typename S, typename P, typename A>
|
|
@@ -20,8 +20,8 @@
|
|
|
20
20
|
namespace datasketches {
|
|
21
21
|
|
|
22
22
|
template<typename S, typename P, typename A>
|
|
23
|
-
tuple_union<S, P, A>::tuple_union(uint8_t lg_cur_size, uint8_t lg_nom_size, resize_factor rf, uint64_t theta, uint64_t seed, const P& policy, const A& allocator):
|
|
24
|
-
state_(lg_cur_size, lg_nom_size, rf, theta, seed, internal_policy(policy), allocator)
|
|
23
|
+
tuple_union<S, P, A>::tuple_union(uint8_t lg_cur_size, uint8_t lg_nom_size, resize_factor rf, float p, uint64_t theta, uint64_t seed, const P& policy, const A& allocator):
|
|
24
|
+
state_(lg_cur_size, lg_nom_size, rf, p, theta, seed, internal_policy(policy), allocator)
|
|
25
25
|
{}
|
|
26
26
|
|
|
27
27
|
template<typename S, typename P, typename A>
|
|
@@ -35,13 +35,18 @@ auto tuple_union<S, P, A>::get_result(bool ordered) const -> CompactSketch {
|
|
|
35
35
|
return state_.get_result(ordered);
|
|
36
36
|
}
|
|
37
37
|
|
|
38
|
+
template<typename S, typename P, typename A>
|
|
39
|
+
void tuple_union<S, P, A>::reset() {
|
|
40
|
+
return state_.reset();
|
|
41
|
+
}
|
|
42
|
+
|
|
38
43
|
template<typename S, typename P, typename A>
|
|
39
44
|
tuple_union<S, P, A>::builder::builder(const P& policy, const A& allocator):
|
|
40
45
|
tuple_base_builder<builder, P, A>(policy, allocator) {}
|
|
41
46
|
|
|
42
47
|
template<typename S, typename P, typename A>
|
|
43
48
|
auto tuple_union<S, P, A>::builder::build() const -> tuple_union {
|
|
44
|
-
return tuple_union(this->starting_lg_size(), this->lg_k_, this->rf_, this->starting_theta(), this->seed_, this->policy_, this->allocator_);
|
|
49
|
+
return tuple_union(this->starting_lg_size(), this->lg_k_, this->rf_, this->p_, this->starting_theta(), this->seed_, this->policy_, this->allocator_);
|
|
45
50
|
}
|
|
46
51
|
|
|
47
52
|
} /* namespace datasketches */
|
|
@@ -36,6 +36,17 @@ const std::string inputPath = TEST_BINARY_INPUT_PATH;
|
|
|
36
36
|
const std::string inputPath = "test/";
|
|
37
37
|
#endif
|
|
38
38
|
|
|
39
|
+
TEST_CASE("aod sketch: reset", "[tuple_sketch]") {
|
|
40
|
+
auto update_sketch = update_array_of_doubles_sketch::builder().build();
|
|
41
|
+
std::vector<double> a = {1};
|
|
42
|
+
update_sketch.update(1, a);
|
|
43
|
+
REQUIRE(!update_sketch.is_empty());
|
|
44
|
+
REQUIRE(update_sketch.get_num_retained() == 1);
|
|
45
|
+
update_sketch.reset();
|
|
46
|
+
REQUIRE(update_sketch.is_empty());
|
|
47
|
+
REQUIRE(update_sketch.get_num_retained() == 0);
|
|
48
|
+
}
|
|
49
|
+
|
|
39
50
|
TEST_CASE("aod sketch: serialization compatibility with java - empty", "[tuple_sketch]") {
|
|
40
51
|
auto update_sketch = update_array_of_doubles_sketch::builder().build();
|
|
41
52
|
REQUIRE(update_sketch.is_empty());
|
|
@@ -263,6 +274,11 @@ TEST_CASE("aod union: half overlap", "[tuple_sketch]") {
|
|
|
263
274
|
u.update(update_sketch2);
|
|
264
275
|
auto result = u.get_result();
|
|
265
276
|
REQUIRE(result.get_estimate() == Approx(1500).margin(0.01));
|
|
277
|
+
|
|
278
|
+
u.reset();
|
|
279
|
+
result = u.get_result();
|
|
280
|
+
REQUIRE(result.is_empty());
|
|
281
|
+
REQUIRE(result.get_num_retained() == 0);
|
|
266
282
|
}
|
|
267
283
|
|
|
268
284
|
TEST_CASE("aod intersection: half overlap", "[tuple_sketch]") {
|
|
@@ -34,7 +34,6 @@ std::ostream& operator<<(std::ostream& os, const three_doubles& tuple) {
|
|
|
34
34
|
|
|
35
35
|
#include <catch.hpp>
|
|
36
36
|
#include <tuple_sketch.hpp>
|
|
37
|
-
//#include <test_type.hpp>
|
|
38
37
|
|
|
39
38
|
namespace datasketches {
|
|
40
39
|
|
|
@@ -43,9 +42,11 @@ TEST_CASE("tuple sketch float: builder", "[tuple_sketch]") {
|
|
|
43
42
|
builder.set_lg_k(10).set_p(0.5f).set_resize_factor(theta_constants::resize_factor::X2).set_seed(123);
|
|
44
43
|
auto sketch = builder.build();
|
|
45
44
|
REQUIRE(sketch.get_lg_k() == 10);
|
|
46
|
-
REQUIRE(sketch.get_theta() == 0
|
|
45
|
+
REQUIRE(sketch.get_theta() == 1.0); // empty sketch should have theta 1.0
|
|
47
46
|
REQUIRE(sketch.get_rf() == theta_constants::resize_factor::X2);
|
|
48
47
|
REQUIRE(sketch.get_seed_hash() == compute_seed_hash(123));
|
|
48
|
+
sketch.update(1, 0);
|
|
49
|
+
REQUIRE(sketch.get_theta() == 0.5); // theta = p
|
|
49
50
|
}
|
|
50
51
|
|
|
51
52
|
TEST_CASE("tuple sketch float: empty", "[tuple_sketch]") {
|
|
@@ -58,7 +59,7 @@ TEST_CASE("tuple sketch float: empty", "[tuple_sketch]") {
|
|
|
58
59
|
REQUIRE(update_sketch.get_upper_bound(1) == 0);
|
|
59
60
|
REQUIRE(update_sketch.get_theta() == 1);
|
|
60
61
|
REQUIRE(update_sketch.get_num_retained() == 0);
|
|
61
|
-
REQUIRE(
|
|
62
|
+
REQUIRE(update_sketch.is_ordered());
|
|
62
63
|
|
|
63
64
|
auto compact_sketch = update_sketch.compact();
|
|
64
65
|
std::cout << "sizeof(compact_tuple_sketch<float>)=" << sizeof(compact_sketch) << std::endl;
|
|
@@ -70,6 +71,33 @@ TEST_CASE("tuple sketch float: empty", "[tuple_sketch]") {
|
|
|
70
71
|
REQUIRE(compact_sketch.get_theta() == 1);
|
|
71
72
|
REQUIRE(compact_sketch.get_num_retained() == 0);
|
|
72
73
|
REQUIRE(compact_sketch.is_ordered());
|
|
74
|
+
|
|
75
|
+
// empty is forced to be ordered
|
|
76
|
+
REQUIRE(update_sketch.compact(false).is_ordered());
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
TEST_CASE("tuple sketch: single item", "[theta_sketch]") {
|
|
80
|
+
auto update_sketch = update_tuple_sketch<float>::builder().build();
|
|
81
|
+
update_sketch.update(1, 1.0f);
|
|
82
|
+
REQUIRE_FALSE(update_sketch.is_empty());
|
|
83
|
+
REQUIRE_FALSE(update_sketch.is_estimation_mode());
|
|
84
|
+
REQUIRE(update_sketch.get_theta() == 1.0);
|
|
85
|
+
REQUIRE(update_sketch.get_estimate() == 1.0);
|
|
86
|
+
REQUIRE(update_sketch.get_lower_bound(1) == 1.0);
|
|
87
|
+
REQUIRE(update_sketch.get_upper_bound(1) == 1.0);
|
|
88
|
+
REQUIRE(update_sketch.is_ordered()); // one item is ordered
|
|
89
|
+
|
|
90
|
+
auto compact_sketch = update_sketch.compact();
|
|
91
|
+
REQUIRE_FALSE(compact_sketch.is_empty());
|
|
92
|
+
REQUIRE_FALSE(compact_sketch.is_estimation_mode());
|
|
93
|
+
REQUIRE(compact_sketch.get_theta() == 1.0);
|
|
94
|
+
REQUIRE(compact_sketch.get_estimate() == 1.0);
|
|
95
|
+
REQUIRE(compact_sketch.get_lower_bound(1) == 1.0);
|
|
96
|
+
REQUIRE(compact_sketch.get_upper_bound(1) == 1.0);
|
|
97
|
+
REQUIRE(compact_sketch.is_ordered());
|
|
98
|
+
|
|
99
|
+
// single item is forced to be ordered
|
|
100
|
+
REQUIRE(update_sketch.compact(false).is_ordered());
|
|
73
101
|
}
|
|
74
102
|
|
|
75
103
|
TEST_CASE("tuple sketch float: exact mode", "[tuple_sketch]") {
|
|
@@ -78,14 +106,14 @@ TEST_CASE("tuple sketch float: exact mode", "[tuple_sketch]") {
|
|
|
78
106
|
update_sketch.update(2, 2.0f);
|
|
79
107
|
update_sketch.update(1, 1.0f);
|
|
80
108
|
// std::cout << update_sketch.to_string(true);
|
|
81
|
-
|
|
82
|
-
|
|
109
|
+
REQUIRE_FALSE(update_sketch.is_empty());
|
|
110
|
+
REQUIRE_FALSE(update_sketch.is_estimation_mode());
|
|
83
111
|
REQUIRE(update_sketch.get_estimate() == 2);
|
|
84
112
|
REQUIRE(update_sketch.get_lower_bound(1) == 2);
|
|
85
113
|
REQUIRE(update_sketch.get_upper_bound(1) == 2);
|
|
86
114
|
REQUIRE(update_sketch.get_theta() == 1);
|
|
87
115
|
REQUIRE(update_sketch.get_num_retained() == 2);
|
|
88
|
-
|
|
116
|
+
REQUIRE_FALSE(update_sketch.is_ordered());
|
|
89
117
|
int count = 0;
|
|
90
118
|
for (const auto& entry: update_sketch) {
|
|
91
119
|
REQUIRE(entry.second == 2);
|
|
@@ -95,8 +123,8 @@ TEST_CASE("tuple sketch float: exact mode", "[tuple_sketch]") {
|
|
|
95
123
|
|
|
96
124
|
auto compact_sketch = update_sketch.compact();
|
|
97
125
|
// std::cout << compact_sketch.to_string(true);
|
|
98
|
-
|
|
99
|
-
|
|
126
|
+
REQUIRE_FALSE(compact_sketch.is_empty());
|
|
127
|
+
REQUIRE_FALSE(compact_sketch.is_estimation_mode());
|
|
100
128
|
REQUIRE(compact_sketch.get_estimate() == 2);
|
|
101
129
|
REQUIRE(compact_sketch.get_lower_bound(1) == 2);
|
|
102
130
|
REQUIRE(compact_sketch.get_upper_bound(1) == 2);
|
|
@@ -151,6 +179,16 @@ TEST_CASE("tuple sketch float: exact mode", "[tuple_sketch]") {
|
|
|
151
179
|
++it;
|
|
152
180
|
}
|
|
153
181
|
}
|
|
182
|
+
|
|
183
|
+
update_sketch.reset();
|
|
184
|
+
REQUIRE(update_sketch.is_empty());
|
|
185
|
+
REQUIRE_FALSE(update_sketch.is_estimation_mode());
|
|
186
|
+
REQUIRE(update_sketch.get_estimate() == 0);
|
|
187
|
+
REQUIRE(update_sketch.get_lower_bound(1) == 0);
|
|
188
|
+
REQUIRE(update_sketch.get_upper_bound(1) == 0);
|
|
189
|
+
REQUIRE(update_sketch.get_theta() == 1);
|
|
190
|
+
REQUIRE(update_sketch.get_num_retained() == 0);
|
|
191
|
+
REQUIRE(update_sketch.is_ordered());
|
|
154
192
|
}
|
|
155
193
|
|
|
156
194
|
template<typename T>
|
|
@@ -18,6 +18,7 @@
|
|
|
18
18
|
*/
|
|
19
19
|
|
|
20
20
|
#include <iostream>
|
|
21
|
+
#include <stdexcept>
|
|
21
22
|
|
|
22
23
|
#include <catch.hpp>
|
|
23
24
|
#include <tuple_union.hpp>
|
|
@@ -81,6 +82,13 @@ TEST_CASE("tuple_union float: simple case", "[tuple union]") {
|
|
|
81
82
|
u.update(update_sketch2);
|
|
82
83
|
auto result = u.get_result();
|
|
83
84
|
REQUIRE(result.get_num_retained() == 3);
|
|
85
|
+
|
|
86
|
+
u.reset();
|
|
87
|
+
result = u.get_result();
|
|
88
|
+
REQUIRE(result.is_empty());
|
|
89
|
+
REQUIRE(result.get_num_retained() == 0);
|
|
90
|
+
REQUIRE(!result.is_estimation_mode());
|
|
91
|
+
REQUIRE(result.get_estimate() == 0);
|
|
84
92
|
}
|
|
85
93
|
|
|
86
94
|
TEST_CASE("tuple_union float: exact mode half overlap", "[tuple union]") {
|
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: datasketches
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.2.
|
|
4
|
+
version: 0.2.5
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Andrew Kane
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: bin
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date:
|
|
11
|
+
date: 2022-05-21 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
name: rice
|
|
@@ -51,6 +51,7 @@ files:
|
|
|
51
51
|
- vendor/datasketches-cpp/MANIFEST.in
|
|
52
52
|
- vendor/datasketches-cpp/NOTICE
|
|
53
53
|
- vendor/datasketches-cpp/README.md
|
|
54
|
+
- vendor/datasketches-cpp/cmake/DataSketchesConfig.cmake.in
|
|
54
55
|
- vendor/datasketches-cpp/common/CMakeLists.txt
|
|
55
56
|
- vendor/datasketches-cpp/common/include/MurmurHash3.h
|
|
56
57
|
- vendor/datasketches-cpp/common/include/binomial_bounds.hpp
|
|
@@ -61,7 +62,11 @@ files:
|
|
|
61
62
|
- vendor/datasketches-cpp/common/include/conditional_forward.hpp
|
|
62
63
|
- vendor/datasketches-cpp/common/include/count_zeros.hpp
|
|
63
64
|
- vendor/datasketches-cpp/common/include/inv_pow2_table.hpp
|
|
65
|
+
- vendor/datasketches-cpp/common/include/kolmogorov_smirnov.hpp
|
|
66
|
+
- vendor/datasketches-cpp/common/include/kolmogorov_smirnov_impl.hpp
|
|
64
67
|
- vendor/datasketches-cpp/common/include/memory_operations.hpp
|
|
68
|
+
- vendor/datasketches-cpp/common/include/quantile_sketch_sorted_view.hpp
|
|
69
|
+
- vendor/datasketches-cpp/common/include/quantile_sketch_sorted_view_impl.hpp
|
|
65
70
|
- vendor/datasketches-cpp/common/include/serde.hpp
|
|
66
71
|
- vendor/datasketches-cpp/common/test/CMakeLists.txt
|
|
67
72
|
- vendor/datasketches-cpp/common/test/catch.hpp
|
|
@@ -156,12 +161,8 @@ files:
|
|
|
156
161
|
- vendor/datasketches-cpp/kll/CMakeLists.txt
|
|
157
162
|
- vendor/datasketches-cpp/kll/include/kll_helper.hpp
|
|
158
163
|
- vendor/datasketches-cpp/kll/include/kll_helper_impl.hpp
|
|
159
|
-
- vendor/datasketches-cpp/kll/include/kll_quantile_calculator.hpp
|
|
160
|
-
- vendor/datasketches-cpp/kll/include/kll_quantile_calculator_impl.hpp
|
|
161
164
|
- vendor/datasketches-cpp/kll/include/kll_sketch.hpp
|
|
162
165
|
- vendor/datasketches-cpp/kll/include/kll_sketch_impl.hpp
|
|
163
|
-
- vendor/datasketches-cpp/kll/include/kolmogorov_smirnov.hpp
|
|
164
|
-
- vendor/datasketches-cpp/kll/include/kolmogorov_smirnov_impl.hpp
|
|
165
166
|
- vendor/datasketches-cpp/kll/test/CMakeLists.txt
|
|
166
167
|
- vendor/datasketches-cpp/kll/test/kll_sketch_custom_type_test.cpp
|
|
167
168
|
- vendor/datasketches-cpp/kll/test/kll_sketch_float_one_item_v1.sk
|
|
@@ -177,12 +178,15 @@ files:
|
|
|
177
178
|
- vendor/datasketches-cpp/python/jupyter/HLLSketch.ipynb
|
|
178
179
|
- vendor/datasketches-cpp/python/jupyter/KLLSketch.ipynb
|
|
179
180
|
- vendor/datasketches-cpp/python/jupyter/ThetaSketchNotebook.ipynb
|
|
181
|
+
- vendor/datasketches-cpp/python/pybind11Path.cmd
|
|
180
182
|
- vendor/datasketches-cpp/python/src/__init__.py
|
|
181
183
|
- vendor/datasketches-cpp/python/src/cpc_wrapper.cpp
|
|
182
184
|
- vendor/datasketches-cpp/python/src/datasketches.cpp
|
|
183
185
|
- vendor/datasketches-cpp/python/src/fi_wrapper.cpp
|
|
184
186
|
- vendor/datasketches-cpp/python/src/hll_wrapper.cpp
|
|
185
187
|
- vendor/datasketches-cpp/python/src/kll_wrapper.cpp
|
|
188
|
+
- vendor/datasketches-cpp/python/src/ks_wrapper.cpp
|
|
189
|
+
- vendor/datasketches-cpp/python/src/quantiles_wrapper.cpp
|
|
186
190
|
- vendor/datasketches-cpp/python/src/req_wrapper.cpp
|
|
187
191
|
- vendor/datasketches-cpp/python/src/theta_wrapper.cpp
|
|
188
192
|
- vendor/datasketches-cpp/python/src/vector_of_kll.cpp
|
|
@@ -192,16 +196,30 @@ files:
|
|
|
192
196
|
- vendor/datasketches-cpp/python/tests/fi_test.py
|
|
193
197
|
- vendor/datasketches-cpp/python/tests/hll_test.py
|
|
194
198
|
- vendor/datasketches-cpp/python/tests/kll_test.py
|
|
199
|
+
- vendor/datasketches-cpp/python/tests/quantiles_test.py
|
|
195
200
|
- vendor/datasketches-cpp/python/tests/req_test.py
|
|
196
201
|
- vendor/datasketches-cpp/python/tests/theta_test.py
|
|
197
202
|
- vendor/datasketches-cpp/python/tests/vector_of_kll_test.py
|
|
198
203
|
- vendor/datasketches-cpp/python/tests/vo_test.py
|
|
204
|
+
- vendor/datasketches-cpp/quantiles/CMakeLists.txt
|
|
205
|
+
- vendor/datasketches-cpp/quantiles/include/quantiles_sketch.hpp
|
|
206
|
+
- vendor/datasketches-cpp/quantiles/include/quantiles_sketch_impl.hpp
|
|
207
|
+
- vendor/datasketches-cpp/quantiles/test/CMakeLists.txt
|
|
208
|
+
- vendor/datasketches-cpp/quantiles/test/Qk128_n1000_v0.3.0.sk
|
|
209
|
+
- vendor/datasketches-cpp/quantiles/test/Qk128_n1000_v0.6.0.sk
|
|
210
|
+
- vendor/datasketches-cpp/quantiles/test/Qk128_n1000_v0.8.0.sk
|
|
211
|
+
- vendor/datasketches-cpp/quantiles/test/Qk128_n1000_v0.8.3.sk
|
|
212
|
+
- vendor/datasketches-cpp/quantiles/test/Qk128_n50_v0.3.0.sk
|
|
213
|
+
- vendor/datasketches-cpp/quantiles/test/Qk128_n50_v0.6.0.sk
|
|
214
|
+
- vendor/datasketches-cpp/quantiles/test/Qk128_n50_v0.8.0.sk
|
|
215
|
+
- vendor/datasketches-cpp/quantiles/test/Qk128_n50_v0.8.3.sk
|
|
216
|
+
- vendor/datasketches-cpp/quantiles/test/kolmogorov_smirnov_test.cpp
|
|
217
|
+
- vendor/datasketches-cpp/quantiles/test/quantiles_compatibility_test.cpp
|
|
218
|
+
- vendor/datasketches-cpp/quantiles/test/quantiles_sketch_test.cpp
|
|
199
219
|
- vendor/datasketches-cpp/req/CMakeLists.txt
|
|
200
220
|
- vendor/datasketches-cpp/req/include/req_common.hpp
|
|
201
221
|
- vendor/datasketches-cpp/req/include/req_compactor.hpp
|
|
202
222
|
- vendor/datasketches-cpp/req/include/req_compactor_impl.hpp
|
|
203
|
-
- vendor/datasketches-cpp/req/include/req_quantile_calculator.hpp
|
|
204
|
-
- vendor/datasketches-cpp/req/include/req_quantile_calculator_impl.hpp
|
|
205
223
|
- vendor/datasketches-cpp/req/include/req_sketch.hpp
|
|
206
224
|
- vendor/datasketches-cpp/req/include/req_sketch_impl.hpp
|
|
207
225
|
- vendor/datasketches-cpp/req/test/CMakeLists.txt
|
|
@@ -255,14 +273,18 @@ files:
|
|
|
255
273
|
- vendor/datasketches-cpp/theta/test/CMakeLists.txt
|
|
256
274
|
- vendor/datasketches-cpp/theta/test/theta_a_not_b_test.cpp
|
|
257
275
|
- vendor/datasketches-cpp/theta/test/theta_compact_empty_from_java.sk
|
|
276
|
+
- vendor/datasketches-cpp/theta/test/theta_compact_empty_from_java_v1.sk
|
|
277
|
+
- vendor/datasketches-cpp/theta/test/theta_compact_empty_from_java_v2.sk
|
|
258
278
|
- vendor/datasketches-cpp/theta/test/theta_compact_estimation_from_java.sk
|
|
279
|
+
- vendor/datasketches-cpp/theta/test/theta_compact_estimation_from_java_v1.sk
|
|
280
|
+
- vendor/datasketches-cpp/theta/test/theta_compact_estimation_from_java_v2.sk
|
|
281
|
+
- vendor/datasketches-cpp/theta/test/theta_compact_exact_from_java.sk
|
|
259
282
|
- vendor/datasketches-cpp/theta/test/theta_compact_single_item_from_java.sk
|
|
260
283
|
- vendor/datasketches-cpp/theta/test/theta_intersection_test.cpp
|
|
261
284
|
- vendor/datasketches-cpp/theta/test/theta_jaccard_similarity_test.cpp
|
|
285
|
+
- vendor/datasketches-cpp/theta/test/theta_setop_test.cpp
|
|
262
286
|
- vendor/datasketches-cpp/theta/test/theta_sketch_test.cpp
|
|
263
287
|
- vendor/datasketches-cpp/theta/test/theta_union_test.cpp
|
|
264
|
-
- vendor/datasketches-cpp/theta/test/theta_update_empty_from_java.sk
|
|
265
|
-
- vendor/datasketches-cpp/theta/test/theta_update_estimation_from_java.sk
|
|
266
288
|
- vendor/datasketches-cpp/tuple/CMakeLists.txt
|
|
267
289
|
- vendor/datasketches-cpp/tuple/include/array_of_doubles_a_not_b.hpp
|
|
268
290
|
- vendor/datasketches-cpp/tuple/include/array_of_doubles_a_not_b_impl.hpp
|
|
@@ -294,7 +316,7 @@ files:
|
|
|
294
316
|
- vendor/datasketches-cpp/tuple/test/tuple_sketch_allocation_test.cpp
|
|
295
317
|
- vendor/datasketches-cpp/tuple/test/tuple_sketch_test.cpp
|
|
296
318
|
- vendor/datasketches-cpp/tuple/test/tuple_union_test.cpp
|
|
297
|
-
homepage: https://github.com/ankane/datasketches
|
|
319
|
+
homepage: https://github.com/ankane/datasketches-ruby
|
|
298
320
|
licenses:
|
|
299
321
|
- Apache-2.0
|
|
300
322
|
metadata: {}
|
|
@@ -313,7 +335,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
|
313
335
|
- !ruby/object:Gem::Version
|
|
314
336
|
version: '0'
|
|
315
337
|
requirements: []
|
|
316
|
-
rubygems_version: 3.
|
|
338
|
+
rubygems_version: 3.3.7
|
|
317
339
|
signing_key:
|
|
318
340
|
specification_version: 4
|
|
319
341
|
summary: Sketch data structures for Ruby
|