datasketches 0.2.7 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +5 -0
- data/ext/datasketches/kll_wrapper.cpp +20 -20
- data/ext/datasketches/theta_wrapper.cpp +2 -2
- data/lib/datasketches/version.rb +1 -1
- data/vendor/datasketches-cpp/CMakeLists.txt +9 -1
- data/vendor/datasketches-cpp/MANIFEST.in +21 -2
- data/vendor/datasketches-cpp/common/CMakeLists.txt +5 -2
- data/vendor/datasketches-cpp/common/include/common_defs.hpp +10 -0
- data/vendor/datasketches-cpp/common/include/kolmogorov_smirnov_impl.hpp +6 -6
- data/vendor/datasketches-cpp/common/include/memory_operations.hpp +1 -0
- data/vendor/datasketches-cpp/common/include/{quantile_sketch_sorted_view.hpp → quantiles_sorted_view.hpp} +60 -25
- data/vendor/datasketches-cpp/common/include/quantiles_sorted_view_impl.hpp +125 -0
- data/vendor/datasketches-cpp/common/include/version.hpp.in +36 -0
- data/vendor/datasketches-cpp/common/test/CMakeLists.txt +25 -6
- data/vendor/datasketches-cpp/common/test/quantiles_sorted_view_test.cpp +459 -0
- data/vendor/datasketches-cpp/cpc/test/CMakeLists.txt +1 -1
- data/vendor/datasketches-cpp/fi/include/frequent_items_sketch.hpp +28 -44
- data/vendor/datasketches-cpp/fi/include/frequent_items_sketch_impl.hpp +70 -78
- data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map.hpp +11 -4
- data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map_impl.hpp +16 -9
- data/vendor/datasketches-cpp/fi/test/CMakeLists.txt +1 -1
- data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_custom_type_test.cpp +54 -41
- data/vendor/datasketches-cpp/fi/test/reverse_purge_hash_map_test.cpp +3 -3
- data/vendor/datasketches-cpp/hll/include/Hll4Array-internal.hpp +2 -2
- data/vendor/datasketches-cpp/hll/test/CMakeLists.txt +1 -1
- data/vendor/datasketches-cpp/kll/include/kll_helper.hpp +0 -32
- data/vendor/datasketches-cpp/kll/include/kll_sketch.hpp +176 -233
- data/vendor/datasketches-cpp/kll/include/kll_sketch_impl.hpp +337 -395
- data/vendor/datasketches-cpp/kll/test/CMakeLists.txt +1 -1
- data/vendor/datasketches-cpp/kll/test/kll_sketch_custom_type_test.cpp +26 -26
- data/vendor/datasketches-cpp/kll/test/kll_sketch_test.cpp +196 -232
- data/vendor/datasketches-cpp/kll/test/kll_sketch_validation.cpp +41 -31
- data/vendor/datasketches-cpp/pyproject.toml +17 -12
- data/vendor/datasketches-cpp/python/CMakeLists.txt +8 -1
- data/vendor/datasketches-cpp/python/datasketches/PySerDe.py +104 -0
- data/vendor/datasketches-cpp/python/datasketches/__init__.py +22 -0
- data/vendor/datasketches-cpp/python/include/py_serde.hpp +113 -0
- data/vendor/datasketches-cpp/python/jupyter/ThetaSketchNotebook.ipynb +31 -24
- data/vendor/datasketches-cpp/python/pybind11Path.cmd +18 -0
- data/vendor/datasketches-cpp/python/src/__init__.py +17 -1
- data/vendor/datasketches-cpp/python/src/datasketches.cpp +9 -3
- data/vendor/datasketches-cpp/python/src/kll_wrapper.cpp +18 -54
- data/vendor/datasketches-cpp/python/src/py_serde.cpp +111 -0
- data/vendor/datasketches-cpp/python/src/quantiles_wrapper.cpp +17 -53
- data/vendor/datasketches-cpp/python/src/req_wrapper.cpp +17 -55
- data/vendor/datasketches-cpp/python/src/vector_of_kll.cpp +62 -67
- data/vendor/datasketches-cpp/python/src/vo_wrapper.cpp +47 -14
- data/vendor/datasketches-cpp/python/tests/__init__.py +16 -0
- data/vendor/datasketches-cpp/python/tests/req_test.py +1 -1
- data/vendor/datasketches-cpp/python/tests/vo_test.py +25 -1
- data/vendor/datasketches-cpp/quantiles/include/quantiles_sketch.hpp +135 -180
- data/vendor/datasketches-cpp/quantiles/include/quantiles_sketch_impl.hpp +205 -210
- data/vendor/datasketches-cpp/quantiles/test/CMakeLists.txt +1 -1
- data/vendor/datasketches-cpp/quantiles/test/quantiles_compatibility_test.cpp +19 -18
- data/vendor/datasketches-cpp/quantiles/test/quantiles_sketch_test.cpp +240 -232
- data/vendor/datasketches-cpp/req/include/req_compactor.hpp +15 -9
- data/vendor/datasketches-cpp/req/include/req_compactor_impl.hpp +35 -19
- data/vendor/datasketches-cpp/req/include/req_sketch.hpp +126 -147
- data/vendor/datasketches-cpp/req/include/req_sketch_impl.hpp +265 -245
- data/vendor/datasketches-cpp/req/test/CMakeLists.txt +1 -1
- data/vendor/datasketches-cpp/req/test/req_sketch_custom_type_test.cpp +26 -26
- data/vendor/datasketches-cpp/req/test/req_sketch_test.cpp +116 -103
- data/vendor/datasketches-cpp/sampling/include/var_opt_sketch.hpp +22 -46
- data/vendor/datasketches-cpp/sampling/include/var_opt_sketch_impl.hpp +180 -207
- data/vendor/datasketches-cpp/sampling/include/var_opt_union.hpp +18 -39
- data/vendor/datasketches-cpp/sampling/include/var_opt_union_impl.hpp +75 -85
- data/vendor/datasketches-cpp/sampling/test/CMakeLists.txt +1 -1
- data/vendor/datasketches-cpp/sampling/test/var_opt_allocation_test.cpp +6 -6
- data/vendor/datasketches-cpp/sampling/test/var_opt_sketch_test.cpp +2 -2
- data/vendor/datasketches-cpp/sampling/test/var_opt_union_test.cpp +4 -4
- data/vendor/datasketches-cpp/setup.py +14 -2
- data/vendor/datasketches-cpp/theta/include/theta_sketch_impl.hpp +15 -25
- data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base.hpp +0 -9
- data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base_impl.hpp +5 -5
- data/vendor/datasketches-cpp/theta/test/CMakeLists.txt +1 -1
- data/vendor/datasketches-cpp/theta/test/theta_sketch_test.cpp +2 -1
- data/vendor/datasketches-cpp/tox.ini +26 -0
- data/vendor/datasketches-cpp/tuple/include/tuple_sketch.hpp +36 -12
- data/vendor/datasketches-cpp/tuple/include/tuple_sketch_impl.hpp +16 -4
- data/vendor/datasketches-cpp/tuple/test/CMakeLists.txt +2 -1
- data/vendor/datasketches-cpp/tuple/test/engagement_test.cpp +299 -0
- data/vendor/datasketches-cpp/tuple/test/tuple_sketch_test.cpp +26 -0
- data/vendor/datasketches-cpp/version.cfg.in +1 -0
- metadata +14 -5
- data/vendor/datasketches-cpp/common/include/quantile_sketch_sorted_view_impl.hpp +0 -91
|
@@ -26,7 +26,7 @@
|
|
|
26
26
|
|
|
27
27
|
namespace datasketches {
|
|
28
28
|
|
|
29
|
-
using kll_test_type_sketch = kll_sketch<test_type, test_type_less,
|
|
29
|
+
using kll_test_type_sketch = kll_sketch<test_type, test_type_less, test_allocator<test_type>>;
|
|
30
30
|
using alloc = test_allocator<test_type>;
|
|
31
31
|
|
|
32
32
|
TEST_CASE("kll sketch custom type", "[kll_sketch]") {
|
|
@@ -35,11 +35,11 @@ TEST_CASE("kll sketch custom type", "[kll_sketch]") {
|
|
|
35
35
|
test_allocator_total_bytes = 0;
|
|
36
36
|
|
|
37
37
|
SECTION("compact level zero") {
|
|
38
|
-
kll_test_type_sketch sketch(8, 0);
|
|
38
|
+
kll_test_type_sketch sketch(8, test_type_less(), 0);
|
|
39
39
|
REQUIRE_THROWS_AS(sketch.get_quantile(0), std::runtime_error);
|
|
40
|
-
REQUIRE_THROWS_AS(sketch.
|
|
41
|
-
REQUIRE_THROWS_AS(sketch.
|
|
42
|
-
REQUIRE(sketch.get_serialized_size_bytes() == 8);
|
|
40
|
+
REQUIRE_THROWS_AS(sketch.get_min_item(), std::runtime_error);
|
|
41
|
+
REQUIRE_THROWS_AS(sketch.get_max_item(), std::runtime_error);
|
|
42
|
+
REQUIRE(sketch.get_serialized_size_bytes(test_type_serde()) == 8);
|
|
43
43
|
|
|
44
44
|
sketch.update(1);
|
|
45
45
|
sketch.update(2);
|
|
@@ -55,15 +55,15 @@ TEST_CASE("kll sketch custom type", "[kll_sketch]") {
|
|
|
55
55
|
|
|
56
56
|
REQUIRE(sketch.is_estimation_mode());
|
|
57
57
|
REQUIRE(sketch.get_n() > sketch.get_num_retained());
|
|
58
|
-
REQUIRE(sketch.
|
|
59
|
-
REQUIRE(sketch.
|
|
58
|
+
REQUIRE(sketch.get_min_item().get_value() == 1);
|
|
59
|
+
REQUIRE(sketch.get_max_item().get_value() == 9);
|
|
60
60
|
}
|
|
61
61
|
|
|
62
62
|
SECTION("merge small") {
|
|
63
|
-
kll_test_type_sketch sketch1(8, 0);
|
|
63
|
+
kll_test_type_sketch sketch1(8, test_type_less(), 0);
|
|
64
64
|
sketch1.update(1);
|
|
65
65
|
|
|
66
|
-
kll_test_type_sketch sketch2(8, 0);
|
|
66
|
+
kll_test_type_sketch sketch2(8, test_type_less(), 0);
|
|
67
67
|
sketch2.update(2);
|
|
68
68
|
|
|
69
69
|
sketch2.merge(sketch1);
|
|
@@ -72,12 +72,12 @@ TEST_CASE("kll sketch custom type", "[kll_sketch]") {
|
|
|
72
72
|
|
|
73
73
|
REQUIRE_FALSE(sketch2.is_estimation_mode());
|
|
74
74
|
REQUIRE(sketch2.get_num_retained() == sketch2.get_n());
|
|
75
|
-
REQUIRE(sketch2.
|
|
76
|
-
REQUIRE(sketch2.
|
|
75
|
+
REQUIRE(sketch2.get_min_item().get_value() == 1);
|
|
76
|
+
REQUIRE(sketch2.get_max_item().get_value() == 2);
|
|
77
77
|
}
|
|
78
78
|
|
|
79
79
|
SECTION("merge higher levels") {
|
|
80
|
-
kll_test_type_sketch sketch1(8, 0);
|
|
80
|
+
kll_test_type_sketch sketch1(8, test_type_less(), 0);
|
|
81
81
|
sketch1.update(1);
|
|
82
82
|
sketch1.update(2);
|
|
83
83
|
sketch1.update(3);
|
|
@@ -88,7 +88,7 @@ TEST_CASE("kll sketch custom type", "[kll_sketch]") {
|
|
|
88
88
|
sketch1.update(8);
|
|
89
89
|
sketch1.update(9);
|
|
90
90
|
|
|
91
|
-
kll_test_type_sketch sketch2(8, 0);
|
|
91
|
+
kll_test_type_sketch sketch2(8, test_type_less(), 0);
|
|
92
92
|
sketch2.update(10);
|
|
93
93
|
sketch2.update(11);
|
|
94
94
|
sketch2.update(12);
|
|
@@ -105,28 +105,28 @@ TEST_CASE("kll sketch custom type", "[kll_sketch]") {
|
|
|
105
105
|
|
|
106
106
|
REQUIRE(sketch2.is_estimation_mode());
|
|
107
107
|
REQUIRE(sketch2.get_n() > sketch2.get_num_retained());
|
|
108
|
-
REQUIRE(sketch2.
|
|
109
|
-
REQUIRE(sketch2.
|
|
108
|
+
REQUIRE(sketch2.get_min_item().get_value() == 1);
|
|
109
|
+
REQUIRE(sketch2.get_max_item().get_value() == 18);
|
|
110
110
|
}
|
|
111
111
|
|
|
112
112
|
SECTION("serialize deserialize") {
|
|
113
|
-
kll_test_type_sketch sketch1(200, 0);
|
|
113
|
+
kll_test_type_sketch sketch1(200, test_type_less(), 0);
|
|
114
114
|
|
|
115
115
|
const int n = 1000;
|
|
116
116
|
for (int i = 0; i < n; i++) sketch1.update(i);
|
|
117
117
|
|
|
118
118
|
std::stringstream s(std::ios::in | std::ios::out | std::ios::binary);
|
|
119
|
-
sketch1.serialize(s);
|
|
120
|
-
REQUIRE((size_t) s.tellp() == sketch1.get_serialized_size_bytes());
|
|
121
|
-
auto sketch2 = kll_test_type_sketch::deserialize(s,
|
|
122
|
-
REQUIRE((size_t) s.tellg() == sketch2.get_serialized_size_bytes());
|
|
119
|
+
sketch1.serialize(s, test_type_serde());
|
|
120
|
+
REQUIRE((size_t) s.tellp() == sketch1.get_serialized_size_bytes(test_type_serde()));
|
|
121
|
+
auto sketch2 = kll_test_type_sketch::deserialize(s, test_type_serde(), test_type_less(), 0);
|
|
122
|
+
REQUIRE((size_t) s.tellg() == sketch2.get_serialized_size_bytes(test_type_serde()));
|
|
123
123
|
REQUIRE(s.tellg() == s.tellp());
|
|
124
124
|
REQUIRE(sketch2.is_empty() == sketch1.is_empty());
|
|
125
125
|
REQUIRE(sketch2.is_estimation_mode() == sketch1.is_estimation_mode());
|
|
126
126
|
REQUIRE(sketch2.get_n() == sketch1.get_n());
|
|
127
127
|
REQUIRE(sketch2.get_num_retained() == sketch1.get_num_retained());
|
|
128
|
-
REQUIRE(sketch2.
|
|
129
|
-
REQUIRE(sketch2.
|
|
128
|
+
REQUIRE(sketch2.get_min_item().get_value() == sketch1.get_min_item().get_value());
|
|
129
|
+
REQUIRE(sketch2.get_max_item().get_value() == sketch1.get_max_item().get_value());
|
|
130
130
|
REQUIRE(sketch2.get_normalized_rank_error(false) == sketch1.get_normalized_rank_error(false));
|
|
131
131
|
REQUIRE(sketch2.get_normalized_rank_error(true) == sketch1.get_normalized_rank_error(true));
|
|
132
132
|
REQUIRE(sketch2.get_quantile(0.5).get_value() == sketch1.get_quantile(0.5).get_value());
|
|
@@ -136,13 +136,13 @@ TEST_CASE("kll sketch custom type", "[kll_sketch]") {
|
|
|
136
136
|
}
|
|
137
137
|
|
|
138
138
|
SECTION("moving merge") {
|
|
139
|
-
kll_test_type_sketch sketch1(8, 0);
|
|
139
|
+
kll_test_type_sketch sketch1(8, test_type_less(), 0);
|
|
140
140
|
for (int i = 0; i < 10; i++) sketch1.update(i);
|
|
141
|
-
kll_test_type_sketch sketch2(8, 0);
|
|
141
|
+
kll_test_type_sketch sketch2(8, test_type_less(), 0);
|
|
142
142
|
sketch2.update(10);
|
|
143
143
|
sketch2.merge(std::move(sketch1));
|
|
144
|
-
REQUIRE(sketch2.
|
|
145
|
-
REQUIRE(sketch2.
|
|
144
|
+
REQUIRE(sketch2.get_min_item().get_value() == 0);
|
|
145
|
+
REQUIRE(sketch2.get_max_item().get_value() == 10);
|
|
146
146
|
REQUIRE(sketch2.get_n() == 11);
|
|
147
147
|
}
|
|
148
148
|
|