datasketches 0.2.7 → 0.3.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +9 -0
- data/ext/datasketches/kll_wrapper.cpp +20 -20
- data/ext/datasketches/theta_wrapper.cpp +2 -2
- data/lib/datasketches/version.rb +1 -1
- data/vendor/datasketches-cpp/CMakeLists.txt +9 -1
- data/vendor/datasketches-cpp/MANIFEST.in +21 -2
- data/vendor/datasketches-cpp/NOTICE +1 -1
- data/vendor/datasketches-cpp/common/CMakeLists.txt +5 -2
- data/vendor/datasketches-cpp/common/include/common_defs.hpp +10 -0
- data/vendor/datasketches-cpp/common/include/kolmogorov_smirnov_impl.hpp +6 -6
- data/vendor/datasketches-cpp/common/include/memory_operations.hpp +1 -0
- data/vendor/datasketches-cpp/common/include/{quantile_sketch_sorted_view.hpp → quantiles_sorted_view.hpp} +60 -25
- data/vendor/datasketches-cpp/common/include/quantiles_sorted_view_impl.hpp +125 -0
- data/vendor/datasketches-cpp/common/include/version.hpp.in +36 -0
- data/vendor/datasketches-cpp/common/test/CMakeLists.txt +25 -6
- data/vendor/datasketches-cpp/common/test/quantiles_sorted_view_test.cpp +459 -0
- data/vendor/datasketches-cpp/cpc/include/cpc_compressor_impl.hpp +1 -1
- data/vendor/datasketches-cpp/cpc/test/CMakeLists.txt +1 -1
- data/vendor/datasketches-cpp/fi/include/frequent_items_sketch.hpp +28 -44
- data/vendor/datasketches-cpp/fi/include/frequent_items_sketch_impl.hpp +70 -78
- data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map.hpp +11 -4
- data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map_impl.hpp +16 -9
- data/vendor/datasketches-cpp/fi/test/CMakeLists.txt +1 -1
- data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_custom_type_test.cpp +54 -41
- data/vendor/datasketches-cpp/fi/test/reverse_purge_hash_map_test.cpp +3 -3
- data/vendor/datasketches-cpp/hll/include/Hll4Array-internal.hpp +2 -2
- data/vendor/datasketches-cpp/hll/test/CMakeLists.txt +1 -1
- data/vendor/datasketches-cpp/kll/include/kll_helper.hpp +0 -32
- data/vendor/datasketches-cpp/kll/include/kll_sketch.hpp +176 -233
- data/vendor/datasketches-cpp/kll/include/kll_sketch_impl.hpp +337 -395
- data/vendor/datasketches-cpp/kll/test/CMakeLists.txt +1 -1
- data/vendor/datasketches-cpp/kll/test/kll_sketch_custom_type_test.cpp +26 -26
- data/vendor/datasketches-cpp/kll/test/kll_sketch_test.cpp +196 -232
- data/vendor/datasketches-cpp/kll/test/kll_sketch_validation.cpp +41 -31
- data/vendor/datasketches-cpp/pyproject.toml +17 -12
- data/vendor/datasketches-cpp/python/CMakeLists.txt +8 -1
- data/vendor/datasketches-cpp/python/datasketches/PySerDe.py +104 -0
- data/vendor/datasketches-cpp/python/datasketches/__init__.py +22 -0
- data/vendor/datasketches-cpp/python/include/py_serde.hpp +113 -0
- data/vendor/datasketches-cpp/python/jupyter/ThetaSketchNotebook.ipynb +31 -24
- data/vendor/datasketches-cpp/python/pybind11Path.cmd +18 -0
- data/vendor/datasketches-cpp/python/src/__init__.py +17 -1
- data/vendor/datasketches-cpp/python/src/datasketches.cpp +9 -3
- data/vendor/datasketches-cpp/python/src/kll_wrapper.cpp +18 -54
- data/vendor/datasketches-cpp/python/src/py_serde.cpp +111 -0
- data/vendor/datasketches-cpp/python/src/quantiles_wrapper.cpp +17 -53
- data/vendor/datasketches-cpp/python/src/req_wrapper.cpp +17 -55
- data/vendor/datasketches-cpp/python/src/vector_of_kll.cpp +63 -68
- data/vendor/datasketches-cpp/python/src/vo_wrapper.cpp +47 -14
- data/vendor/datasketches-cpp/python/tests/__init__.py +16 -0
- data/vendor/datasketches-cpp/python/tests/req_test.py +1 -1
- data/vendor/datasketches-cpp/python/tests/vo_test.py +25 -1
- data/vendor/datasketches-cpp/quantiles/include/quantiles_sketch.hpp +135 -180
- data/vendor/datasketches-cpp/quantiles/include/quantiles_sketch_impl.hpp +205 -210
- data/vendor/datasketches-cpp/quantiles/test/CMakeLists.txt +1 -1
- data/vendor/datasketches-cpp/quantiles/test/quantiles_compatibility_test.cpp +19 -18
- data/vendor/datasketches-cpp/quantiles/test/quantiles_sketch_test.cpp +240 -232
- data/vendor/datasketches-cpp/req/include/req_compactor.hpp +15 -9
- data/vendor/datasketches-cpp/req/include/req_compactor_impl.hpp +35 -19
- data/vendor/datasketches-cpp/req/include/req_sketch.hpp +126 -147
- data/vendor/datasketches-cpp/req/include/req_sketch_impl.hpp +265 -245
- data/vendor/datasketches-cpp/req/test/CMakeLists.txt +1 -1
- data/vendor/datasketches-cpp/req/test/req_sketch_custom_type_test.cpp +26 -26
- data/vendor/datasketches-cpp/req/test/req_sketch_test.cpp +116 -103
- data/vendor/datasketches-cpp/sampling/include/var_opt_sketch.hpp +22 -46
- data/vendor/datasketches-cpp/sampling/include/var_opt_sketch_impl.hpp +180 -207
- data/vendor/datasketches-cpp/sampling/include/var_opt_union.hpp +18 -39
- data/vendor/datasketches-cpp/sampling/include/var_opt_union_impl.hpp +75 -85
- data/vendor/datasketches-cpp/sampling/test/CMakeLists.txt +1 -1
- data/vendor/datasketches-cpp/sampling/test/var_opt_allocation_test.cpp +6 -6
- data/vendor/datasketches-cpp/sampling/test/var_opt_sketch_test.cpp +2 -2
- data/vendor/datasketches-cpp/sampling/test/var_opt_union_test.cpp +4 -4
- data/vendor/datasketches-cpp/setup.py +14 -2
- data/vendor/datasketches-cpp/theta/include/theta_sketch_impl.hpp +15 -25
- data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base.hpp +0 -9
- data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base_impl.hpp +5 -5
- data/vendor/datasketches-cpp/theta/test/CMakeLists.txt +1 -1
- data/vendor/datasketches-cpp/theta/test/theta_sketch_test.cpp +2 -1
- data/vendor/datasketches-cpp/tox.ini +26 -0
- data/vendor/datasketches-cpp/tuple/include/tuple_sketch.hpp +36 -12
- data/vendor/datasketches-cpp/tuple/include/tuple_sketch_impl.hpp +16 -4
- data/vendor/datasketches-cpp/tuple/test/CMakeLists.txt +2 -1
- data/vendor/datasketches-cpp/tuple/test/engagement_test.cpp +299 -0
- data/vendor/datasketches-cpp/tuple/test/tuple_sketch_test.cpp +26 -0
- data/vendor/datasketches-cpp/version.cfg.in +1 -0
- metadata +15 -6
- data/vendor/datasketches-cpp/common/include/quantile_sketch_sorted_view_impl.hpp +0 -91
@@ -26,7 +26,7 @@
|
|
26
26
|
|
27
27
|
namespace datasketches {
|
28
28
|
|
29
|
-
using kll_test_type_sketch = kll_sketch<test_type, test_type_less,
|
29
|
+
using kll_test_type_sketch = kll_sketch<test_type, test_type_less, test_allocator<test_type>>;
|
30
30
|
using alloc = test_allocator<test_type>;
|
31
31
|
|
32
32
|
TEST_CASE("kll sketch custom type", "[kll_sketch]") {
|
@@ -35,11 +35,11 @@ TEST_CASE("kll sketch custom type", "[kll_sketch]") {
|
|
35
35
|
test_allocator_total_bytes = 0;
|
36
36
|
|
37
37
|
SECTION("compact level zero") {
|
38
|
-
kll_test_type_sketch sketch(8, 0);
|
38
|
+
kll_test_type_sketch sketch(8, test_type_less(), 0);
|
39
39
|
REQUIRE_THROWS_AS(sketch.get_quantile(0), std::runtime_error);
|
40
|
-
REQUIRE_THROWS_AS(sketch.
|
41
|
-
REQUIRE_THROWS_AS(sketch.
|
42
|
-
REQUIRE(sketch.get_serialized_size_bytes() == 8);
|
40
|
+
REQUIRE_THROWS_AS(sketch.get_min_item(), std::runtime_error);
|
41
|
+
REQUIRE_THROWS_AS(sketch.get_max_item(), std::runtime_error);
|
42
|
+
REQUIRE(sketch.get_serialized_size_bytes(test_type_serde()) == 8);
|
43
43
|
|
44
44
|
sketch.update(1);
|
45
45
|
sketch.update(2);
|
@@ -55,15 +55,15 @@ TEST_CASE("kll sketch custom type", "[kll_sketch]") {
|
|
55
55
|
|
56
56
|
REQUIRE(sketch.is_estimation_mode());
|
57
57
|
REQUIRE(sketch.get_n() > sketch.get_num_retained());
|
58
|
-
REQUIRE(sketch.
|
59
|
-
REQUIRE(sketch.
|
58
|
+
REQUIRE(sketch.get_min_item().get_value() == 1);
|
59
|
+
REQUIRE(sketch.get_max_item().get_value() == 9);
|
60
60
|
}
|
61
61
|
|
62
62
|
SECTION("merge small") {
|
63
|
-
kll_test_type_sketch sketch1(8, 0);
|
63
|
+
kll_test_type_sketch sketch1(8, test_type_less(), 0);
|
64
64
|
sketch1.update(1);
|
65
65
|
|
66
|
-
kll_test_type_sketch sketch2(8, 0);
|
66
|
+
kll_test_type_sketch sketch2(8, test_type_less(), 0);
|
67
67
|
sketch2.update(2);
|
68
68
|
|
69
69
|
sketch2.merge(sketch1);
|
@@ -72,12 +72,12 @@ TEST_CASE("kll sketch custom type", "[kll_sketch]") {
|
|
72
72
|
|
73
73
|
REQUIRE_FALSE(sketch2.is_estimation_mode());
|
74
74
|
REQUIRE(sketch2.get_num_retained() == sketch2.get_n());
|
75
|
-
REQUIRE(sketch2.
|
76
|
-
REQUIRE(sketch2.
|
75
|
+
REQUIRE(sketch2.get_min_item().get_value() == 1);
|
76
|
+
REQUIRE(sketch2.get_max_item().get_value() == 2);
|
77
77
|
}
|
78
78
|
|
79
79
|
SECTION("merge higher levels") {
|
80
|
-
kll_test_type_sketch sketch1(8, 0);
|
80
|
+
kll_test_type_sketch sketch1(8, test_type_less(), 0);
|
81
81
|
sketch1.update(1);
|
82
82
|
sketch1.update(2);
|
83
83
|
sketch1.update(3);
|
@@ -88,7 +88,7 @@ TEST_CASE("kll sketch custom type", "[kll_sketch]") {
|
|
88
88
|
sketch1.update(8);
|
89
89
|
sketch1.update(9);
|
90
90
|
|
91
|
-
kll_test_type_sketch sketch2(8, 0);
|
91
|
+
kll_test_type_sketch sketch2(8, test_type_less(), 0);
|
92
92
|
sketch2.update(10);
|
93
93
|
sketch2.update(11);
|
94
94
|
sketch2.update(12);
|
@@ -105,28 +105,28 @@ TEST_CASE("kll sketch custom type", "[kll_sketch]") {
|
|
105
105
|
|
106
106
|
REQUIRE(sketch2.is_estimation_mode());
|
107
107
|
REQUIRE(sketch2.get_n() > sketch2.get_num_retained());
|
108
|
-
REQUIRE(sketch2.
|
109
|
-
REQUIRE(sketch2.
|
108
|
+
REQUIRE(sketch2.get_min_item().get_value() == 1);
|
109
|
+
REQUIRE(sketch2.get_max_item().get_value() == 18);
|
110
110
|
}
|
111
111
|
|
112
112
|
SECTION("serialize deserialize") {
|
113
|
-
kll_test_type_sketch sketch1(200, 0);
|
113
|
+
kll_test_type_sketch sketch1(200, test_type_less(), 0);
|
114
114
|
|
115
115
|
const int n = 1000;
|
116
116
|
for (int i = 0; i < n; i++) sketch1.update(i);
|
117
117
|
|
118
118
|
std::stringstream s(std::ios::in | std::ios::out | std::ios::binary);
|
119
|
-
sketch1.serialize(s);
|
120
|
-
REQUIRE((size_t) s.tellp() == sketch1.get_serialized_size_bytes());
|
121
|
-
auto sketch2 = kll_test_type_sketch::deserialize(s,
|
122
|
-
REQUIRE((size_t) s.tellg() == sketch2.get_serialized_size_bytes());
|
119
|
+
sketch1.serialize(s, test_type_serde());
|
120
|
+
REQUIRE((size_t) s.tellp() == sketch1.get_serialized_size_bytes(test_type_serde()));
|
121
|
+
auto sketch2 = kll_test_type_sketch::deserialize(s, test_type_serde(), test_type_less(), 0);
|
122
|
+
REQUIRE((size_t) s.tellg() == sketch2.get_serialized_size_bytes(test_type_serde()));
|
123
123
|
REQUIRE(s.tellg() == s.tellp());
|
124
124
|
REQUIRE(sketch2.is_empty() == sketch1.is_empty());
|
125
125
|
REQUIRE(sketch2.is_estimation_mode() == sketch1.is_estimation_mode());
|
126
126
|
REQUIRE(sketch2.get_n() == sketch1.get_n());
|
127
127
|
REQUIRE(sketch2.get_num_retained() == sketch1.get_num_retained());
|
128
|
-
REQUIRE(sketch2.
|
129
|
-
REQUIRE(sketch2.
|
128
|
+
REQUIRE(sketch2.get_min_item().get_value() == sketch1.get_min_item().get_value());
|
129
|
+
REQUIRE(sketch2.get_max_item().get_value() == sketch1.get_max_item().get_value());
|
130
130
|
REQUIRE(sketch2.get_normalized_rank_error(false) == sketch1.get_normalized_rank_error(false));
|
131
131
|
REQUIRE(sketch2.get_normalized_rank_error(true) == sketch1.get_normalized_rank_error(true));
|
132
132
|
REQUIRE(sketch2.get_quantile(0.5).get_value() == sketch1.get_quantile(0.5).get_value());
|
@@ -136,13 +136,13 @@ TEST_CASE("kll sketch custom type", "[kll_sketch]") {
|
|
136
136
|
}
|
137
137
|
|
138
138
|
SECTION("moving merge") {
|
139
|
-
kll_test_type_sketch sketch1(8, 0);
|
139
|
+
kll_test_type_sketch sketch1(8, test_type_less(), 0);
|
140
140
|
for (int i = 0; i < 10; i++) sketch1.update(i);
|
141
|
-
kll_test_type_sketch sketch2(8, 0);
|
141
|
+
kll_test_type_sketch sketch2(8, test_type_less(), 0);
|
142
142
|
sketch2.update(10);
|
143
143
|
sketch2.merge(std::move(sketch1));
|
144
|
-
REQUIRE(sketch2.
|
145
|
-
REQUIRE(sketch2.
|
144
|
+
REQUIRE(sketch2.get_min_item().get_value() == 0);
|
145
|
+
REQUIRE(sketch2.get_max_item().get_value() == 10);
|
146
146
|
REQUIRE(sketch2.get_n() == 11);
|
147
147
|
}
|
148
148
|
|