datasketches 0.2.7 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +5 -0
- data/ext/datasketches/kll_wrapper.cpp +20 -20
- data/ext/datasketches/theta_wrapper.cpp +2 -2
- data/lib/datasketches/version.rb +1 -1
- data/vendor/datasketches-cpp/CMakeLists.txt +9 -1
- data/vendor/datasketches-cpp/MANIFEST.in +21 -2
- data/vendor/datasketches-cpp/common/CMakeLists.txt +5 -2
- data/vendor/datasketches-cpp/common/include/common_defs.hpp +10 -0
- data/vendor/datasketches-cpp/common/include/kolmogorov_smirnov_impl.hpp +6 -6
- data/vendor/datasketches-cpp/common/include/memory_operations.hpp +1 -0
- data/vendor/datasketches-cpp/common/include/{quantile_sketch_sorted_view.hpp → quantiles_sorted_view.hpp} +60 -25
- data/vendor/datasketches-cpp/common/include/quantiles_sorted_view_impl.hpp +125 -0
- data/vendor/datasketches-cpp/common/include/version.hpp.in +36 -0
- data/vendor/datasketches-cpp/common/test/CMakeLists.txt +25 -6
- data/vendor/datasketches-cpp/common/test/quantiles_sorted_view_test.cpp +459 -0
- data/vendor/datasketches-cpp/cpc/test/CMakeLists.txt +1 -1
- data/vendor/datasketches-cpp/fi/include/frequent_items_sketch.hpp +28 -44
- data/vendor/datasketches-cpp/fi/include/frequent_items_sketch_impl.hpp +70 -78
- data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map.hpp +11 -4
- data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map_impl.hpp +16 -9
- data/vendor/datasketches-cpp/fi/test/CMakeLists.txt +1 -1
- data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_custom_type_test.cpp +54 -41
- data/vendor/datasketches-cpp/fi/test/reverse_purge_hash_map_test.cpp +3 -3
- data/vendor/datasketches-cpp/hll/include/Hll4Array-internal.hpp +2 -2
- data/vendor/datasketches-cpp/hll/test/CMakeLists.txt +1 -1
- data/vendor/datasketches-cpp/kll/include/kll_helper.hpp +0 -32
- data/vendor/datasketches-cpp/kll/include/kll_sketch.hpp +176 -233
- data/vendor/datasketches-cpp/kll/include/kll_sketch_impl.hpp +337 -395
- data/vendor/datasketches-cpp/kll/test/CMakeLists.txt +1 -1
- data/vendor/datasketches-cpp/kll/test/kll_sketch_custom_type_test.cpp +26 -26
- data/vendor/datasketches-cpp/kll/test/kll_sketch_test.cpp +196 -232
- data/vendor/datasketches-cpp/kll/test/kll_sketch_validation.cpp +41 -31
- data/vendor/datasketches-cpp/pyproject.toml +17 -12
- data/vendor/datasketches-cpp/python/CMakeLists.txt +8 -1
- data/vendor/datasketches-cpp/python/datasketches/PySerDe.py +104 -0
- data/vendor/datasketches-cpp/python/datasketches/__init__.py +22 -0
- data/vendor/datasketches-cpp/python/include/py_serde.hpp +113 -0
- data/vendor/datasketches-cpp/python/jupyter/ThetaSketchNotebook.ipynb +31 -24
- data/vendor/datasketches-cpp/python/pybind11Path.cmd +18 -0
- data/vendor/datasketches-cpp/python/src/__init__.py +17 -1
- data/vendor/datasketches-cpp/python/src/datasketches.cpp +9 -3
- data/vendor/datasketches-cpp/python/src/kll_wrapper.cpp +18 -54
- data/vendor/datasketches-cpp/python/src/py_serde.cpp +111 -0
- data/vendor/datasketches-cpp/python/src/quantiles_wrapper.cpp +17 -53
- data/vendor/datasketches-cpp/python/src/req_wrapper.cpp +17 -55
- data/vendor/datasketches-cpp/python/src/vector_of_kll.cpp +62 -67
- data/vendor/datasketches-cpp/python/src/vo_wrapper.cpp +47 -14
- data/vendor/datasketches-cpp/python/tests/__init__.py +16 -0
- data/vendor/datasketches-cpp/python/tests/req_test.py +1 -1
- data/vendor/datasketches-cpp/python/tests/vo_test.py +25 -1
- data/vendor/datasketches-cpp/quantiles/include/quantiles_sketch.hpp +135 -180
- data/vendor/datasketches-cpp/quantiles/include/quantiles_sketch_impl.hpp +205 -210
- data/vendor/datasketches-cpp/quantiles/test/CMakeLists.txt +1 -1
- data/vendor/datasketches-cpp/quantiles/test/quantiles_compatibility_test.cpp +19 -18
- data/vendor/datasketches-cpp/quantiles/test/quantiles_sketch_test.cpp +240 -232
- data/vendor/datasketches-cpp/req/include/req_compactor.hpp +15 -9
- data/vendor/datasketches-cpp/req/include/req_compactor_impl.hpp +35 -19
- data/vendor/datasketches-cpp/req/include/req_sketch.hpp +126 -147
- data/vendor/datasketches-cpp/req/include/req_sketch_impl.hpp +265 -245
- data/vendor/datasketches-cpp/req/test/CMakeLists.txt +1 -1
- data/vendor/datasketches-cpp/req/test/req_sketch_custom_type_test.cpp +26 -26
- data/vendor/datasketches-cpp/req/test/req_sketch_test.cpp +116 -103
- data/vendor/datasketches-cpp/sampling/include/var_opt_sketch.hpp +22 -46
- data/vendor/datasketches-cpp/sampling/include/var_opt_sketch_impl.hpp +180 -207
- data/vendor/datasketches-cpp/sampling/include/var_opt_union.hpp +18 -39
- data/vendor/datasketches-cpp/sampling/include/var_opt_union_impl.hpp +75 -85
- data/vendor/datasketches-cpp/sampling/test/CMakeLists.txt +1 -1
- data/vendor/datasketches-cpp/sampling/test/var_opt_allocation_test.cpp +6 -6
- data/vendor/datasketches-cpp/sampling/test/var_opt_sketch_test.cpp +2 -2
- data/vendor/datasketches-cpp/sampling/test/var_opt_union_test.cpp +4 -4
- data/vendor/datasketches-cpp/setup.py +14 -2
- data/vendor/datasketches-cpp/theta/include/theta_sketch_impl.hpp +15 -25
- data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base.hpp +0 -9
- data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base_impl.hpp +5 -5
- data/vendor/datasketches-cpp/theta/test/CMakeLists.txt +1 -1
- data/vendor/datasketches-cpp/theta/test/theta_sketch_test.cpp +2 -1
- data/vendor/datasketches-cpp/tox.ini +26 -0
- data/vendor/datasketches-cpp/tuple/include/tuple_sketch.hpp +36 -12
- data/vendor/datasketches-cpp/tuple/include/tuple_sketch_impl.hpp +16 -4
- data/vendor/datasketches-cpp/tuple/test/CMakeLists.txt +2 -1
- data/vendor/datasketches-cpp/tuple/test/engagement_test.cpp +299 -0
- data/vendor/datasketches-cpp/tuple/test/tuple_sketch_test.cpp +26 -0
- data/vendor/datasketches-cpp/version.cfg.in +1 -0
- metadata +14 -5
- data/vendor/datasketches-cpp/common/include/quantile_sketch_sorted_view_impl.hpp +0 -91
|
@@ -26,7 +26,7 @@
|
|
|
26
26
|
|
|
27
27
|
namespace datasketches {
|
|
28
28
|
|
|
29
|
-
using req_test_type_sketch = req_sketch<test_type, test_type_less,
|
|
29
|
+
using req_test_type_sketch = req_sketch<test_type, test_type_less, test_allocator<test_type>>;
|
|
30
30
|
using alloc = test_allocator<test_type>;
|
|
31
31
|
|
|
32
32
|
TEST_CASE("req sketch custom type", "[req_sketch]") {
|
|
@@ -35,26 +35,26 @@ TEST_CASE("req sketch custom type", "[req_sketch]") {
|
|
|
35
35
|
test_allocator_total_bytes = 0;
|
|
36
36
|
|
|
37
37
|
SECTION("compact level zero") {
|
|
38
|
-
req_test_type_sketch sketch(4, true, 0);
|
|
38
|
+
req_test_type_sketch sketch(4, true, test_type_less(), 0);
|
|
39
39
|
REQUIRE_THROWS_AS(sketch.get_quantile(0), std::runtime_error);
|
|
40
|
-
REQUIRE_THROWS_AS(sketch.
|
|
41
|
-
REQUIRE_THROWS_AS(sketch.
|
|
42
|
-
REQUIRE(sketch.get_serialized_size_bytes() == 8);
|
|
40
|
+
REQUIRE_THROWS_AS(sketch.get_min_item(), std::runtime_error);
|
|
41
|
+
REQUIRE_THROWS_AS(sketch.get_max_item(), std::runtime_error);
|
|
42
|
+
REQUIRE(sketch.get_serialized_size_bytes(test_type_serde()) == 8);
|
|
43
43
|
|
|
44
44
|
for (int i = 0; i < 24; ++i) sketch.update(i);
|
|
45
45
|
//std::cout << sketch.to_string(true);
|
|
46
46
|
|
|
47
47
|
REQUIRE(sketch.is_estimation_mode());
|
|
48
48
|
REQUIRE(sketch.get_n() > sketch.get_num_retained());
|
|
49
|
-
REQUIRE(sketch.
|
|
50
|
-
REQUIRE(sketch.
|
|
49
|
+
REQUIRE(sketch.get_min_item().get_value() == 0);
|
|
50
|
+
REQUIRE(sketch.get_max_item().get_value() == 23);
|
|
51
51
|
}
|
|
52
52
|
|
|
53
53
|
SECTION("merge small") {
|
|
54
|
-
req_test_type_sketch sketch1(4, true, 0);
|
|
54
|
+
req_test_type_sketch sketch1(4, true, test_type_less(), 0);
|
|
55
55
|
sketch1.update(1);
|
|
56
56
|
|
|
57
|
-
req_test_type_sketch sketch2(4, true, 0);
|
|
57
|
+
req_test_type_sketch sketch2(4, true, test_type_less(), 0);
|
|
58
58
|
sketch2.update(2);
|
|
59
59
|
|
|
60
60
|
sketch2.merge(sketch1);
|
|
@@ -63,15 +63,15 @@ TEST_CASE("req sketch custom type", "[req_sketch]") {
|
|
|
63
63
|
|
|
64
64
|
REQUIRE_FALSE(sketch2.is_estimation_mode());
|
|
65
65
|
REQUIRE(sketch2.get_num_retained() == sketch2.get_n());
|
|
66
|
-
REQUIRE(sketch2.
|
|
67
|
-
REQUIRE(sketch2.
|
|
66
|
+
REQUIRE(sketch2.get_min_item().get_value() == 1);
|
|
67
|
+
REQUIRE(sketch2.get_max_item().get_value() == 2);
|
|
68
68
|
}
|
|
69
69
|
|
|
70
70
|
SECTION("merge higher levels") {
|
|
71
|
-
req_test_type_sketch sketch1(4, true, 0);
|
|
71
|
+
req_test_type_sketch sketch1(4, true, test_type_less(), 0);
|
|
72
72
|
for (int i = 0; i < 24; ++i) sketch1.update(i);
|
|
73
73
|
|
|
74
|
-
req_test_type_sketch sketch2(4, true, 0);
|
|
74
|
+
req_test_type_sketch sketch2(4, true, test_type_less(), 0);
|
|
75
75
|
for (int i = 0; i < 24; ++i) sketch2.update(i);
|
|
76
76
|
|
|
77
77
|
sketch2.merge(sketch1);
|
|
@@ -80,28 +80,28 @@ TEST_CASE("req sketch custom type", "[req_sketch]") {
|
|
|
80
80
|
|
|
81
81
|
REQUIRE(sketch2.is_estimation_mode());
|
|
82
82
|
REQUIRE(sketch2.get_n() > sketch2.get_num_retained());
|
|
83
|
-
REQUIRE(sketch2.
|
|
84
|
-
REQUIRE(sketch2.
|
|
83
|
+
REQUIRE(sketch2.get_min_item().get_value() == 0);
|
|
84
|
+
REQUIRE(sketch2.get_max_item().get_value() == 23);
|
|
85
85
|
}
|
|
86
86
|
|
|
87
87
|
SECTION("serialize deserialize") {
|
|
88
|
-
req_test_type_sketch sketch1(12, true, 0);
|
|
88
|
+
req_test_type_sketch sketch1(12, true, test_type_less(), 0);
|
|
89
89
|
|
|
90
90
|
const int n = 1000;
|
|
91
91
|
for (int i = 0; i < n; i++) sketch1.update(i);
|
|
92
92
|
|
|
93
93
|
std::stringstream s(std::ios::in | std::ios::out | std::ios::binary);
|
|
94
|
-
sketch1.serialize(s);
|
|
95
|
-
REQUIRE((size_t) s.tellp() == sketch1.get_serialized_size_bytes());
|
|
96
|
-
auto sketch2 = req_test_type_sketch::deserialize(s,
|
|
97
|
-
REQUIRE((size_t) s.tellg() == sketch2.get_serialized_size_bytes());
|
|
94
|
+
sketch1.serialize(s, test_type_serde());
|
|
95
|
+
REQUIRE((size_t) s.tellp() == sketch1.get_serialized_size_bytes(test_type_serde()));
|
|
96
|
+
auto sketch2 = req_test_type_sketch::deserialize(s, test_type_serde(), test_type_less(), 0);
|
|
97
|
+
REQUIRE((size_t) s.tellg() == sketch2.get_serialized_size_bytes(test_type_serde()));
|
|
98
98
|
REQUIRE(s.tellg() == s.tellp());
|
|
99
99
|
REQUIRE(sketch2.is_empty() == sketch1.is_empty());
|
|
100
100
|
REQUIRE(sketch2.is_estimation_mode() == sketch1.is_estimation_mode());
|
|
101
101
|
REQUIRE(sketch2.get_n() == sketch1.get_n());
|
|
102
102
|
REQUIRE(sketch2.get_num_retained() == sketch1.get_num_retained());
|
|
103
|
-
REQUIRE(sketch2.
|
|
104
|
-
REQUIRE(sketch2.
|
|
103
|
+
REQUIRE(sketch2.get_min_item().get_value() == sketch1.get_min_item().get_value());
|
|
104
|
+
REQUIRE(sketch2.get_max_item().get_value() == sketch1.get_max_item().get_value());
|
|
105
105
|
REQUIRE(sketch2.get_quantile(0.5).get_value() == sketch1.get_quantile(0.5).get_value());
|
|
106
106
|
REQUIRE(sketch2.get_rank(0) == sketch1.get_rank(0));
|
|
107
107
|
REQUIRE(sketch2.get_rank(n) == sketch1.get_rank(n));
|
|
@@ -109,13 +109,13 @@ TEST_CASE("req sketch custom type", "[req_sketch]") {
|
|
|
109
109
|
}
|
|
110
110
|
|
|
111
111
|
SECTION("moving merge") {
|
|
112
|
-
req_test_type_sketch sketch1(4, true, 0);
|
|
112
|
+
req_test_type_sketch sketch1(4, true, test_type_less(), 0);
|
|
113
113
|
for (int i = 0; i < 10; i++) sketch1.update(i);
|
|
114
|
-
req_test_type_sketch sketch2(4, true, 0);
|
|
114
|
+
req_test_type_sketch sketch2(4, true, test_type_less(), 0);
|
|
115
115
|
sketch2.update(10);
|
|
116
116
|
sketch2.merge(std::move(sketch1));
|
|
117
|
-
REQUIRE(sketch2.
|
|
118
|
-
REQUIRE(sketch2.
|
|
117
|
+
REQUIRE(sketch2.get_min_item().get_value() == 0);
|
|
118
|
+
REQUIRE(sketch2.get_max_item().get_value() == 10);
|
|
119
119
|
REQUIRE(sketch2.get_n() == 11);
|
|
120
120
|
}
|
|
121
121
|
|
|
@@ -43,19 +43,16 @@ TEST_CASE("req sketch: empty", "[req_sketch]") {
|
|
|
43
43
|
REQUIRE_FALSE(sketch.is_estimation_mode());
|
|
44
44
|
REQUIRE(sketch.get_n() == 0);
|
|
45
45
|
REQUIRE(sketch.get_num_retained() == 0);
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
REQUIRE(std::isnan(sketch.get_quantile(0)));
|
|
51
|
-
REQUIRE(std::isnan(sketch.get_quantile(0.5)));
|
|
52
|
-
REQUIRE(std::isnan(sketch.get_quantile(1)));
|
|
46
|
+
REQUIRE_THROWS_AS(sketch.get_min_item(), std::runtime_error);
|
|
47
|
+
REQUIRE_THROWS_AS(sketch.get_max_item(), std::runtime_error);
|
|
48
|
+
REQUIRE_THROWS_AS(sketch.get_rank(0), std::runtime_error);
|
|
49
|
+
REQUIRE_THROWS_AS(sketch.get_quantile(0), std::runtime_error);
|
|
53
50
|
const double ranks[3] {0, 0.5, 1};
|
|
54
|
-
|
|
51
|
+
REQUIRE_THROWS_AS(sketch.get_quantiles(ranks, 3), std::runtime_error);
|
|
55
52
|
|
|
56
53
|
const float split_points[1] {0};
|
|
57
|
-
|
|
58
|
-
|
|
54
|
+
REQUIRE_THROWS_AS(sketch.get_CDF(split_points, 1), std::runtime_error);
|
|
55
|
+
REQUIRE_THROWS_AS(sketch.get_PMF(split_points, 1), std::runtime_error);
|
|
59
56
|
}
|
|
60
57
|
|
|
61
58
|
TEST_CASE("req sketch: single value, lra", "[req_sketch]") {
|
|
@@ -66,13 +63,13 @@ TEST_CASE("req sketch: single value, lra", "[req_sketch]") {
|
|
|
66
63
|
REQUIRE_FALSE(sketch.is_estimation_mode());
|
|
67
64
|
REQUIRE(sketch.get_n() == 1);
|
|
68
65
|
REQUIRE(sketch.get_num_retained() == 1);
|
|
69
|
-
REQUIRE(sketch.get_rank(1.0f) == 0);
|
|
70
|
-
REQUIRE(sketch.get_rank
|
|
71
|
-
REQUIRE(sketch.get_rank(1.1f) == 1);
|
|
66
|
+
REQUIRE(sketch.get_rank(1.0f, false) == 0);
|
|
67
|
+
REQUIRE(sketch.get_rank(1.0f) == 1);
|
|
68
|
+
REQUIRE(sketch.get_rank(1.1f, false) == 1);
|
|
72
69
|
REQUIRE(sketch.get_rank(std::numeric_limits<float>::infinity()) == 1);
|
|
73
|
-
REQUIRE(sketch.get_quantile(0) == 1);
|
|
74
|
-
REQUIRE(sketch.get_quantile(0.5) == 1);
|
|
75
|
-
REQUIRE(sketch.get_quantile(1) == 1);
|
|
70
|
+
REQUIRE(sketch.get_quantile(0, false) == 1);
|
|
71
|
+
REQUIRE(sketch.get_quantile(0.5, false) == 1);
|
|
72
|
+
REQUIRE(sketch.get_quantile(1, false) == 1);
|
|
76
73
|
|
|
77
74
|
const double ranks[3] {0, 0.5, 1};
|
|
78
75
|
auto quantiles = sketch.get_quantiles(ranks, 3);
|
|
@@ -82,11 +79,16 @@ TEST_CASE("req sketch: single value, lra", "[req_sketch]") {
|
|
|
82
79
|
REQUIRE(quantiles[2] == 1);
|
|
83
80
|
|
|
84
81
|
unsigned count = 0;
|
|
85
|
-
for (auto
|
|
86
|
-
REQUIRE(
|
|
82
|
+
for (auto pair: sketch) {
|
|
83
|
+
REQUIRE(pair.second == 1);
|
|
87
84
|
++count;
|
|
88
85
|
}
|
|
89
86
|
REQUIRE(count == 1);
|
|
87
|
+
|
|
88
|
+
// iterator dereferencing
|
|
89
|
+
auto it = sketch.begin();
|
|
90
|
+
REQUIRE(it->first == 1.0f);
|
|
91
|
+
REQUIRE((*it).first == 1.0f);
|
|
90
92
|
}
|
|
91
93
|
|
|
92
94
|
TEST_CASE("req sketch: repeated values", "[req_sketch]") {
|
|
@@ -101,10 +103,10 @@ TEST_CASE("req sketch: repeated values", "[req_sketch]") {
|
|
|
101
103
|
REQUIRE_FALSE(sketch.is_estimation_mode());
|
|
102
104
|
REQUIRE(sketch.get_n() == 6);
|
|
103
105
|
REQUIRE(sketch.get_num_retained() == 6);
|
|
104
|
-
REQUIRE(sketch.get_rank(1.0f) == 0);
|
|
105
|
-
REQUIRE(sketch.get_rank
|
|
106
|
-
REQUIRE(sketch.get_rank(2.0f) == 0.5);
|
|
107
|
-
REQUIRE(sketch.get_rank
|
|
106
|
+
REQUIRE(sketch.get_rank(1.0f, false) == 0);
|
|
107
|
+
REQUIRE(sketch.get_rank(1.0f) == 0.5);
|
|
108
|
+
REQUIRE(sketch.get_rank(2.0f, false) == 0.5);
|
|
109
|
+
REQUIRE(sketch.get_rank(2.0f) == 1);
|
|
108
110
|
}
|
|
109
111
|
|
|
110
112
|
TEST_CASE("req sketch: exact mode", "[req_sketch]") {
|
|
@@ -115,48 +117,48 @@ TEST_CASE("req sketch: exact mode", "[req_sketch]") {
|
|
|
115
117
|
REQUIRE(sketch.get_n() == 10);
|
|
116
118
|
REQUIRE(sketch.get_num_retained() == 10);
|
|
117
119
|
|
|
118
|
-
//
|
|
119
|
-
REQUIRE(sketch.get_rank(1.0f) == 0);
|
|
120
|
-
REQUIRE(sketch.get_rank(2.0f) == 0.1);
|
|
121
|
-
REQUIRE(sketch.get_rank(6.0f) == 0.5);
|
|
122
|
-
REQUIRE(sketch.get_rank(9.0f) == 0.8);
|
|
123
|
-
REQUIRE(sketch.get_rank(10.0f) == 0.9);
|
|
120
|
+
// exclusive
|
|
121
|
+
REQUIRE(sketch.get_rank(1.0f, false) == 0);
|
|
122
|
+
REQUIRE(sketch.get_rank(2.0f, false) == 0.1);
|
|
123
|
+
REQUIRE(sketch.get_rank(6.0f, false) == 0.5);
|
|
124
|
+
REQUIRE(sketch.get_rank(9.0f, false) == 0.8);
|
|
125
|
+
REQUIRE(sketch.get_rank(10.0f, false) == 0.9);
|
|
124
126
|
|
|
125
127
|
// inclusive
|
|
126
|
-
REQUIRE(sketch.get_rank
|
|
127
|
-
REQUIRE(sketch.get_rank
|
|
128
|
-
REQUIRE(sketch.get_rank
|
|
129
|
-
REQUIRE(sketch.get_rank
|
|
130
|
-
REQUIRE(sketch.get_rank
|
|
128
|
+
REQUIRE(sketch.get_rank(1.0f) == 0.1);
|
|
129
|
+
REQUIRE(sketch.get_rank(2.0f) == 0.2);
|
|
130
|
+
REQUIRE(sketch.get_rank(5.0f) == 0.5);
|
|
131
|
+
REQUIRE(sketch.get_rank(9.0f) == 0.9);
|
|
132
|
+
REQUIRE(sketch.get_rank(10.0f) == 1);
|
|
133
|
+
|
|
134
|
+
// exclusive
|
|
135
|
+
REQUIRE(sketch.get_quantile(0, false) == 1);
|
|
136
|
+
REQUIRE(sketch.get_quantile(0.1, false) == 2);
|
|
137
|
+
REQUIRE(sketch.get_quantile(0.5, false) == 6);
|
|
138
|
+
REQUIRE(sketch.get_quantile(0.9, false) == 10);
|
|
139
|
+
REQUIRE(sketch.get_quantile(1, false) == 10);
|
|
131
140
|
|
|
132
|
-
//
|
|
141
|
+
// inclusive
|
|
133
142
|
REQUIRE(sketch.get_quantile(0) == 1);
|
|
134
|
-
REQUIRE(sketch.get_quantile(0.1) ==
|
|
135
|
-
REQUIRE(sketch.get_quantile(0.5) ==
|
|
136
|
-
REQUIRE(sketch.get_quantile(0.9) ==
|
|
143
|
+
REQUIRE(sketch.get_quantile(0.1) == 1);
|
|
144
|
+
REQUIRE(sketch.get_quantile(0.5) == 5);
|
|
145
|
+
REQUIRE(sketch.get_quantile(0.9) == 9);
|
|
137
146
|
REQUIRE(sketch.get_quantile(1) == 10);
|
|
138
147
|
|
|
139
|
-
// inclusive
|
|
140
|
-
REQUIRE(sketch.get_quantile<true>(0) == 1);
|
|
141
|
-
REQUIRE(sketch.get_quantile<true>(0.1) == 1);
|
|
142
|
-
REQUIRE(sketch.get_quantile<true>(0.5) == 5);
|
|
143
|
-
REQUIRE(sketch.get_quantile<true>(0.9) == 9);
|
|
144
|
-
REQUIRE(sketch.get_quantile<true>(1) == 10);
|
|
145
|
-
|
|
146
148
|
const double ranks[3] {0, 0.5, 1};
|
|
147
149
|
auto quantiles = sketch.get_quantiles(ranks, 3);
|
|
148
150
|
REQUIRE(quantiles.size() == 3);
|
|
149
151
|
REQUIRE(quantiles[0] == 1);
|
|
150
|
-
REQUIRE(quantiles[1] ==
|
|
152
|
+
REQUIRE(quantiles[1] == 5);
|
|
151
153
|
REQUIRE(quantiles[2] == 10);
|
|
152
154
|
|
|
153
155
|
const float splits[3] {2, 6, 9};
|
|
154
|
-
auto cdf = sketch.get_CDF(splits, 3);
|
|
156
|
+
auto cdf = sketch.get_CDF(splits, 3, false);
|
|
155
157
|
REQUIRE(cdf[0] == 0.1);
|
|
156
158
|
REQUIRE(cdf[1] == 0.5);
|
|
157
159
|
REQUIRE(cdf[2] == 0.8);
|
|
158
160
|
REQUIRE(cdf[3] == 1);
|
|
159
|
-
auto pmf = sketch.get_PMF(splits, 3);
|
|
161
|
+
auto pmf = sketch.get_PMF(splits, 3, false);
|
|
160
162
|
REQUIRE(pmf[0] == Approx(0.1).margin(1e-8));
|
|
161
163
|
REQUIRE(pmf[1] == Approx(0.4).margin(1e-8));
|
|
162
164
|
REQUIRE(pmf[2] == Approx(0.3).margin(1e-8));
|
|
@@ -175,18 +177,18 @@ TEST_CASE("req sketch: estimation mode", "[req_sketch]") {
|
|
|
175
177
|
REQUIRE(sketch.get_n() == n);
|
|
176
178
|
// std::cout << sketch.to_string(true);
|
|
177
179
|
REQUIRE(sketch.get_num_retained() < n);
|
|
178
|
-
REQUIRE(sketch.get_rank(0) == 0);
|
|
179
|
-
REQUIRE(sketch.get_rank(static_cast<float>(n)) == 1);
|
|
180
|
-
REQUIRE(sketch.get_rank(n / 2.0f) == Approx(0.5).margin(0.01));
|
|
181
|
-
REQUIRE(sketch.get_rank(n - 1.0f) == Approx(1).margin(0.01));
|
|
182
|
-
REQUIRE(sketch.
|
|
183
|
-
REQUIRE(sketch.
|
|
180
|
+
REQUIRE(sketch.get_rank(0, false) == 0);
|
|
181
|
+
REQUIRE(sketch.get_rank(static_cast<float>(n), false) == 1);
|
|
182
|
+
REQUIRE(sketch.get_rank(n / 2.0f, false) == Approx(0.5).margin(0.01));
|
|
183
|
+
REQUIRE(sketch.get_rank(n - 1.0f, false) == Approx(1).margin(0.01));
|
|
184
|
+
REQUIRE(sketch.get_min_item() == 0);
|
|
185
|
+
REQUIRE(sketch.get_max_item() == n - 1);
|
|
184
186
|
REQUIRE(sketch.get_rank_lower_bound(0.5, 1) < 0.5);
|
|
185
187
|
REQUIRE(sketch.get_rank_upper_bound(0.5, 1) > 0.5);
|
|
186
188
|
|
|
187
189
|
unsigned count = 0;
|
|
188
|
-
for (auto
|
|
189
|
-
REQUIRE(
|
|
190
|
+
for (auto pair: sketch) {
|
|
191
|
+
REQUIRE(pair.second >= 1);
|
|
190
192
|
++count;
|
|
191
193
|
}
|
|
192
194
|
REQUIRE(count == sketch.get_num_retained());
|
|
@@ -203,8 +205,8 @@ TEST_CASE("req sketch: stream serialize-deserialize empty", "[req_sketch]") {
|
|
|
203
205
|
REQUIRE(sketch2.is_estimation_mode() == sketch.is_estimation_mode());
|
|
204
206
|
REQUIRE(sketch2.get_num_retained() == sketch.get_num_retained());
|
|
205
207
|
REQUIRE(sketch2.get_n() == sketch.get_n());
|
|
206
|
-
|
|
207
|
-
|
|
208
|
+
REQUIRE_THROWS_AS(sketch2.get_min_item(), std::runtime_error);
|
|
209
|
+
REQUIRE_THROWS_AS(sketch2.get_max_item(), std::runtime_error);
|
|
208
210
|
}
|
|
209
211
|
|
|
210
212
|
TEST_CASE("req sketch: byte serialize-deserialize empty", "[req_sketch]") {
|
|
@@ -218,8 +220,8 @@ TEST_CASE("req sketch: byte serialize-deserialize empty", "[req_sketch]") {
|
|
|
218
220
|
REQUIRE(sketch2.is_estimation_mode() == sketch.is_estimation_mode());
|
|
219
221
|
REQUIRE(sketch2.get_num_retained() == sketch.get_num_retained());
|
|
220
222
|
REQUIRE(sketch2.get_n() == sketch.get_n());
|
|
221
|
-
|
|
222
|
-
|
|
223
|
+
REQUIRE_THROWS_AS(sketch2.get_min_item(), std::runtime_error);
|
|
224
|
+
REQUIRE_THROWS_AS(sketch2.get_max_item(), std::runtime_error);
|
|
223
225
|
}
|
|
224
226
|
|
|
225
227
|
TEST_CASE("req sketch: stream serialize-deserialize single item", "[req_sketch]") {
|
|
@@ -234,8 +236,8 @@ TEST_CASE("req sketch: stream serialize-deserialize single item", "[req_sketch]"
|
|
|
234
236
|
REQUIRE(sketch2.is_estimation_mode() == sketch.is_estimation_mode());
|
|
235
237
|
REQUIRE(sketch2.get_num_retained() == sketch.get_num_retained());
|
|
236
238
|
REQUIRE(sketch2.get_n() == sketch.get_n());
|
|
237
|
-
REQUIRE(sketch2.
|
|
238
|
-
REQUIRE(sketch2.
|
|
239
|
+
REQUIRE(sketch2.get_min_item() == sketch.get_min_item());
|
|
240
|
+
REQUIRE(sketch2.get_max_item() == sketch.get_max_item());
|
|
239
241
|
}
|
|
240
242
|
|
|
241
243
|
TEST_CASE("req sketch: byte serialize-deserialize single item", "[req_sketch]") {
|
|
@@ -251,8 +253,8 @@ TEST_CASE("req sketch: byte serialize-deserialize single item", "[req_sketch]")
|
|
|
251
253
|
REQUIRE(sketch2.is_estimation_mode() == sketch.is_estimation_mode());
|
|
252
254
|
REQUIRE(sketch2.get_num_retained() == sketch.get_num_retained());
|
|
253
255
|
REQUIRE(sketch2.get_n() == sketch.get_n());
|
|
254
|
-
REQUIRE(sketch2.
|
|
255
|
-
REQUIRE(sketch2.
|
|
256
|
+
REQUIRE(sketch2.get_min_item() == sketch.get_min_item());
|
|
257
|
+
REQUIRE(sketch2.get_max_item() == sketch.get_max_item());
|
|
256
258
|
}
|
|
257
259
|
|
|
258
260
|
TEST_CASE("req sketch: stream serialize-deserialize exact mode", "[req_sketch]") {
|
|
@@ -269,8 +271,8 @@ TEST_CASE("req sketch: stream serialize-deserialize exact mode", "[req_sketch]")
|
|
|
269
271
|
REQUIRE(sketch2.is_estimation_mode() == sketch.is_estimation_mode());
|
|
270
272
|
REQUIRE(sketch2.get_num_retained() == sketch.get_num_retained());
|
|
271
273
|
REQUIRE(sketch2.get_n() == sketch.get_n());
|
|
272
|
-
REQUIRE(sketch2.
|
|
273
|
-
REQUIRE(sketch2.
|
|
274
|
+
REQUIRE(sketch2.get_min_item() == sketch.get_min_item());
|
|
275
|
+
REQUIRE(sketch2.get_max_item() == sketch.get_max_item());
|
|
274
276
|
}
|
|
275
277
|
|
|
276
278
|
TEST_CASE("req sketch: byte serialize-deserialize exact mode", "[req_sketch]") {
|
|
@@ -288,8 +290,8 @@ TEST_CASE("req sketch: byte serialize-deserialize exact mode", "[req_sketch]") {
|
|
|
288
290
|
REQUIRE(sketch2.is_estimation_mode() == sketch.is_estimation_mode());
|
|
289
291
|
REQUIRE(sketch2.get_num_retained() == sketch.get_num_retained());
|
|
290
292
|
REQUIRE(sketch2.get_n() == sketch.get_n());
|
|
291
|
-
REQUIRE(sketch2.
|
|
292
|
-
REQUIRE(sketch2.
|
|
293
|
+
REQUIRE(sketch2.get_min_item() == sketch.get_min_item());
|
|
294
|
+
REQUIRE(sketch2.get_max_item() == sketch.get_max_item());
|
|
293
295
|
}
|
|
294
296
|
|
|
295
297
|
TEST_CASE("req sketch: stream serialize-deserialize estimation mode", "[req_sketch]") {
|
|
@@ -306,8 +308,8 @@ TEST_CASE("req sketch: stream serialize-deserialize estimation mode", "[req_sket
|
|
|
306
308
|
REQUIRE(sketch2.is_estimation_mode() == sketch.is_estimation_mode());
|
|
307
309
|
REQUIRE(sketch2.get_num_retained() == sketch.get_num_retained());
|
|
308
310
|
REQUIRE(sketch2.get_n() == sketch.get_n());
|
|
309
|
-
REQUIRE(sketch2.
|
|
310
|
-
REQUIRE(sketch2.
|
|
311
|
+
REQUIRE(sketch2.get_min_item() == sketch.get_min_item());
|
|
312
|
+
REQUIRE(sketch2.get_max_item() == sketch.get_max_item());
|
|
311
313
|
}
|
|
312
314
|
|
|
313
315
|
TEST_CASE("req sketch: byte serialize-deserialize estimation mode", "[req_sketch]") {
|
|
@@ -324,8 +326,8 @@ TEST_CASE("req sketch: byte serialize-deserialize estimation mode", "[req_sketch
|
|
|
324
326
|
REQUIRE(sketch2.is_estimation_mode() == sketch.is_estimation_mode());
|
|
325
327
|
REQUIRE(sketch2.get_num_retained() == sketch.get_num_retained());
|
|
326
328
|
REQUIRE(sketch2.get_n() == sketch.get_n());
|
|
327
|
-
REQUIRE(sketch2.
|
|
328
|
-
REQUIRE(sketch2.
|
|
329
|
+
REQUIRE(sketch2.get_min_item() == sketch.get_min_item());
|
|
330
|
+
REQUIRE(sketch2.get_max_item() == sketch.get_max_item());
|
|
329
331
|
}
|
|
330
332
|
|
|
331
333
|
TEST_CASE("req sketch: serialize deserialize stream and bytes equivalence", "[req_sketch]") {
|
|
@@ -350,8 +352,8 @@ TEST_CASE("req sketch: serialize deserialize stream and bytes equivalence", "[re
|
|
|
350
352
|
REQUIRE(sketch2.is_estimation_mode() == sketch.is_estimation_mode());
|
|
351
353
|
REQUIRE(sketch2.get_num_retained() == sketch.get_num_retained());
|
|
352
354
|
REQUIRE(sketch2.get_n() == sketch.get_n());
|
|
353
|
-
REQUIRE(sketch2.
|
|
354
|
-
REQUIRE(sketch2.
|
|
355
|
+
REQUIRE(sketch2.get_min_item() == sketch.get_min_item());
|
|
356
|
+
REQUIRE(sketch2.get_max_item() == sketch.get_max_item());
|
|
355
357
|
}
|
|
356
358
|
|
|
357
359
|
TEST_CASE("req sketch: stream deserialize from Java - empty", "[req_sketch]") {
|
|
@@ -363,8 +365,8 @@ TEST_CASE("req sketch: stream deserialize from Java - empty", "[req_sketch]") {
|
|
|
363
365
|
REQUIRE_FALSE(sketch.is_estimation_mode());
|
|
364
366
|
REQUIRE(sketch.get_n() == 0);
|
|
365
367
|
REQUIRE(sketch.get_num_retained() == 0);
|
|
366
|
-
|
|
367
|
-
|
|
368
|
+
REQUIRE_THROWS_AS(sketch.get_min_item(), std::runtime_error);
|
|
369
|
+
REQUIRE_THROWS_AS(sketch.get_max_item(), std::runtime_error);
|
|
368
370
|
}
|
|
369
371
|
|
|
370
372
|
TEST_CASE("req sketch: stream deserialize from Java - single item", "[req_sketch]") {
|
|
@@ -376,10 +378,10 @@ TEST_CASE("req sketch: stream deserialize from Java - single item", "[req_sketch
|
|
|
376
378
|
REQUIRE_FALSE(sketch.is_estimation_mode());
|
|
377
379
|
REQUIRE(sketch.get_n() == 1);
|
|
378
380
|
REQUIRE(sketch.get_num_retained() == 1);
|
|
379
|
-
REQUIRE(sketch.
|
|
380
|
-
REQUIRE(sketch.
|
|
381
|
-
REQUIRE(sketch.get_rank(1.0f) == 0);
|
|
382
|
-
REQUIRE(sketch.get_rank
|
|
381
|
+
REQUIRE(sketch.get_min_item() == 1);
|
|
382
|
+
REQUIRE(sketch.get_max_item() == 1);
|
|
383
|
+
REQUIRE(sketch.get_rank(1.0f, false) == 0);
|
|
384
|
+
REQUIRE(sketch.get_rank(1.0f) == 1);
|
|
383
385
|
}
|
|
384
386
|
|
|
385
387
|
TEST_CASE("req sketch: stream deserialize from Java - raw items", "[req_sketch]") {
|
|
@@ -391,9 +393,9 @@ TEST_CASE("req sketch: stream deserialize from Java - raw items", "[req_sketch]"
|
|
|
391
393
|
REQUIRE_FALSE(sketch.is_estimation_mode());
|
|
392
394
|
REQUIRE(sketch.get_n() == 4);
|
|
393
395
|
REQUIRE(sketch.get_num_retained() == 4);
|
|
394
|
-
REQUIRE(sketch.
|
|
395
|
-
REQUIRE(sketch.
|
|
396
|
-
REQUIRE(sketch.get_rank(2.0f) == 0.5);
|
|
396
|
+
REQUIRE(sketch.get_min_item() == 0);
|
|
397
|
+
REQUIRE(sketch.get_max_item() == 3);
|
|
398
|
+
REQUIRE(sketch.get_rank(2.0f, false) == 0.5);
|
|
397
399
|
}
|
|
398
400
|
|
|
399
401
|
TEST_CASE("req sketch: stream deserialize from Java - exact mode", "[req_sketch]") {
|
|
@@ -405,9 +407,9 @@ TEST_CASE("req sketch: stream deserialize from Java - exact mode", "[req_sketch]
|
|
|
405
407
|
REQUIRE_FALSE(sketch.is_estimation_mode());
|
|
406
408
|
REQUIRE(sketch.get_n() == 100);
|
|
407
409
|
REQUIRE(sketch.get_num_retained() == 100);
|
|
408
|
-
REQUIRE(sketch.
|
|
409
|
-
REQUIRE(sketch.
|
|
410
|
-
REQUIRE(sketch.get_rank(50.0f) == 0.5);
|
|
410
|
+
REQUIRE(sketch.get_min_item() == 0);
|
|
411
|
+
REQUIRE(sketch.get_max_item() == 99);
|
|
412
|
+
REQUIRE(sketch.get_rank(50.0f, false) == 0.5);
|
|
411
413
|
}
|
|
412
414
|
|
|
413
415
|
TEST_CASE("req sketch: stream deserialize from Java - estimation mode", "[req_sketch]") {
|
|
@@ -419,9 +421,9 @@ TEST_CASE("req sketch: stream deserialize from Java - estimation mode", "[req_sk
|
|
|
419
421
|
REQUIRE(sketch.is_estimation_mode());
|
|
420
422
|
REQUIRE(sketch.get_n() == 10000);
|
|
421
423
|
REQUIRE(sketch.get_num_retained() == 2942);
|
|
422
|
-
REQUIRE(sketch.
|
|
423
|
-
REQUIRE(sketch.
|
|
424
|
-
REQUIRE(sketch.get_rank(5000.0f) == 0.5);
|
|
424
|
+
REQUIRE(sketch.get_min_item() == 0);
|
|
425
|
+
REQUIRE(sketch.get_max_item() == 9999);
|
|
426
|
+
REQUIRE(sketch.get_rank(5000.0f, false) == 0.5);
|
|
425
427
|
}
|
|
426
428
|
|
|
427
429
|
TEST_CASE("req sketch: merge into empty", "[req_sketch]") {
|
|
@@ -431,11 +433,11 @@ TEST_CASE("req sketch: merge into empty", "[req_sketch]") {
|
|
|
431
433
|
for (size_t i = 0; i < 1000; ++i) sketch2.update(static_cast<float>(i));
|
|
432
434
|
|
|
433
435
|
sketch1.merge(sketch2);
|
|
434
|
-
REQUIRE(sketch1.
|
|
435
|
-
REQUIRE(sketch1.
|
|
436
|
-
REQUIRE(sketch1.get_quantile(0.25) == Approx(250).
|
|
437
|
-
REQUIRE(sketch1.get_quantile(0.5) == Approx(500).
|
|
438
|
-
REQUIRE(sketch1.get_quantile(0.75) == Approx(750).
|
|
436
|
+
REQUIRE(sketch1.get_min_item() == 0);
|
|
437
|
+
REQUIRE(sketch1.get_max_item() == 999);
|
|
438
|
+
REQUIRE(sketch1.get_quantile(0.25) == Approx(250).epsilon(0.01));
|
|
439
|
+
REQUIRE(sketch1.get_quantile(0.5) == Approx(500).epsilon(0.01));
|
|
440
|
+
REQUIRE(sketch1.get_quantile(0.75) == Approx(750).epsilon(0.01));
|
|
439
441
|
REQUIRE(sketch1.get_rank(500.0f) == Approx(0.5).margin(0.01));
|
|
440
442
|
}
|
|
441
443
|
|
|
@@ -447,11 +449,11 @@ TEST_CASE("req sketch: merge", "[req_sketch]") {
|
|
|
447
449
|
for (size_t i = 1000; i < 2000; ++i) sketch2.update(static_cast<float>(i));
|
|
448
450
|
|
|
449
451
|
sketch1.merge(sketch2);
|
|
450
|
-
REQUIRE(sketch1.
|
|
451
|
-
REQUIRE(sketch1.
|
|
452
|
-
REQUIRE(sketch1.get_quantile(0.25) == Approx(500).
|
|
453
|
-
REQUIRE(sketch1.get_quantile(0.5) == Approx(1000).
|
|
454
|
-
REQUIRE(sketch1.get_quantile(0.75) == Approx(1500).
|
|
452
|
+
REQUIRE(sketch1.get_min_item() == 0);
|
|
453
|
+
REQUIRE(sketch1.get_max_item() == 1999);
|
|
454
|
+
REQUIRE(sketch1.get_quantile(0.25) == Approx(500).epsilon(0.01));
|
|
455
|
+
REQUIRE(sketch1.get_quantile(0.5) == Approx(1000).epsilon(0.01));
|
|
456
|
+
REQUIRE(sketch1.get_quantile(0.75) == Approx(1500).epsilon(0.01));
|
|
455
457
|
REQUIRE(sketch1.get_rank(1000.0f) == Approx(0.5).margin(0.01));
|
|
456
458
|
}
|
|
457
459
|
|
|
@@ -469,9 +471,9 @@ TEST_CASE("req sketch: merge multiple", "[req_sketch]") {
|
|
|
469
471
|
sketch.merge(sketch1);
|
|
470
472
|
sketch.merge(sketch2);
|
|
471
473
|
sketch.merge(sketch3);
|
|
472
|
-
REQUIRE(sketch.
|
|
473
|
-
REQUIRE(sketch.
|
|
474
|
-
REQUIRE(sketch.get_quantile(0.5) == Approx(60).
|
|
474
|
+
REQUIRE(sketch.get_min_item() == 0);
|
|
475
|
+
REQUIRE(sketch.get_max_item() == 119);
|
|
476
|
+
REQUIRE(sketch.get_quantile(0.5) == Approx(60).epsilon(0.02));
|
|
475
477
|
REQUIRE(sketch.get_rank(60.0f) == Approx(0.5).margin(0.01));
|
|
476
478
|
}
|
|
477
479
|
|
|
@@ -503,8 +505,8 @@ TEST_CASE("req sketch: type conversion - several levels", "[req_sketch]") {
|
|
|
503
505
|
REQUIRE(req_float.get_n() == req_double.get_n());
|
|
504
506
|
REQUIRE(req_float.get_num_retained() == req_double.get_num_retained());
|
|
505
507
|
|
|
506
|
-
auto sv_float = req_float.get_sorted_view(
|
|
507
|
-
auto sv_double = req_double.get_sorted_view(
|
|
508
|
+
auto sv_float = req_float.get_sorted_view();
|
|
509
|
+
auto sv_double = req_double.get_sorted_view();
|
|
508
510
|
auto sv_float_it = sv_float.begin();
|
|
509
511
|
auto sv_double_it = sv_double.begin();
|
|
510
512
|
while (sv_float_it != sv_float.end()) {
|
|
@@ -551,6 +553,17 @@ TEST_CASE("req sketch: type conversion - custom types") {
|
|
|
551
553
|
REQUIRE(sb.get_n() == 3);
|
|
552
554
|
}
|
|
553
555
|
|
|
556
|
+
TEST_CASE("get_rank equivalence") {
|
|
557
|
+
req_sketch<int> sketch(12);
|
|
558
|
+
const size_t n = 1000;
|
|
559
|
+
for (size_t i = 0; i < n; ++i) sketch.update(i);
|
|
560
|
+
REQUIRE(sketch.get_n() == n);
|
|
561
|
+
auto view = sketch.get_sorted_view();
|
|
562
|
+
for (size_t i = 0; i < n; ++i) {
|
|
563
|
+
REQUIRE(sketch.get_rank(i) == view.get_rank(i));
|
|
564
|
+
}
|
|
565
|
+
}
|
|
566
|
+
|
|
554
567
|
//TEST_CASE("for manual comparison with Java") {
|
|
555
568
|
// req_sketch<float> sketch(12, false);
|
|
556
569
|
// for (size_t i = 0; i < 100000; ++i) sketch.update(i);
|