datasketches 0.2.7 → 0.3.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (86) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +5 -0
  3. data/ext/datasketches/kll_wrapper.cpp +20 -20
  4. data/ext/datasketches/theta_wrapper.cpp +2 -2
  5. data/lib/datasketches/version.rb +1 -1
  6. data/vendor/datasketches-cpp/CMakeLists.txt +9 -1
  7. data/vendor/datasketches-cpp/MANIFEST.in +21 -2
  8. data/vendor/datasketches-cpp/common/CMakeLists.txt +5 -2
  9. data/vendor/datasketches-cpp/common/include/common_defs.hpp +10 -0
  10. data/vendor/datasketches-cpp/common/include/kolmogorov_smirnov_impl.hpp +6 -6
  11. data/vendor/datasketches-cpp/common/include/memory_operations.hpp +1 -0
  12. data/vendor/datasketches-cpp/common/include/{quantile_sketch_sorted_view.hpp → quantiles_sorted_view.hpp} +60 -25
  13. data/vendor/datasketches-cpp/common/include/quantiles_sorted_view_impl.hpp +125 -0
  14. data/vendor/datasketches-cpp/common/include/version.hpp.in +36 -0
  15. data/vendor/datasketches-cpp/common/test/CMakeLists.txt +25 -6
  16. data/vendor/datasketches-cpp/common/test/quantiles_sorted_view_test.cpp +459 -0
  17. data/vendor/datasketches-cpp/cpc/test/CMakeLists.txt +1 -1
  18. data/vendor/datasketches-cpp/fi/include/frequent_items_sketch.hpp +28 -44
  19. data/vendor/datasketches-cpp/fi/include/frequent_items_sketch_impl.hpp +70 -78
  20. data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map.hpp +11 -4
  21. data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map_impl.hpp +16 -9
  22. data/vendor/datasketches-cpp/fi/test/CMakeLists.txt +1 -1
  23. data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_custom_type_test.cpp +54 -41
  24. data/vendor/datasketches-cpp/fi/test/reverse_purge_hash_map_test.cpp +3 -3
  25. data/vendor/datasketches-cpp/hll/include/Hll4Array-internal.hpp +2 -2
  26. data/vendor/datasketches-cpp/hll/test/CMakeLists.txt +1 -1
  27. data/vendor/datasketches-cpp/kll/include/kll_helper.hpp +0 -32
  28. data/vendor/datasketches-cpp/kll/include/kll_sketch.hpp +176 -233
  29. data/vendor/datasketches-cpp/kll/include/kll_sketch_impl.hpp +337 -395
  30. data/vendor/datasketches-cpp/kll/test/CMakeLists.txt +1 -1
  31. data/vendor/datasketches-cpp/kll/test/kll_sketch_custom_type_test.cpp +26 -26
  32. data/vendor/datasketches-cpp/kll/test/kll_sketch_test.cpp +196 -232
  33. data/vendor/datasketches-cpp/kll/test/kll_sketch_validation.cpp +41 -31
  34. data/vendor/datasketches-cpp/pyproject.toml +17 -12
  35. data/vendor/datasketches-cpp/python/CMakeLists.txt +8 -1
  36. data/vendor/datasketches-cpp/python/datasketches/PySerDe.py +104 -0
  37. data/vendor/datasketches-cpp/python/datasketches/__init__.py +22 -0
  38. data/vendor/datasketches-cpp/python/include/py_serde.hpp +113 -0
  39. data/vendor/datasketches-cpp/python/jupyter/ThetaSketchNotebook.ipynb +31 -24
  40. data/vendor/datasketches-cpp/python/pybind11Path.cmd +18 -0
  41. data/vendor/datasketches-cpp/python/src/__init__.py +17 -1
  42. data/vendor/datasketches-cpp/python/src/datasketches.cpp +9 -3
  43. data/vendor/datasketches-cpp/python/src/kll_wrapper.cpp +18 -54
  44. data/vendor/datasketches-cpp/python/src/py_serde.cpp +111 -0
  45. data/vendor/datasketches-cpp/python/src/quantiles_wrapper.cpp +17 -53
  46. data/vendor/datasketches-cpp/python/src/req_wrapper.cpp +17 -55
  47. data/vendor/datasketches-cpp/python/src/vector_of_kll.cpp +62 -67
  48. data/vendor/datasketches-cpp/python/src/vo_wrapper.cpp +47 -14
  49. data/vendor/datasketches-cpp/python/tests/__init__.py +16 -0
  50. data/vendor/datasketches-cpp/python/tests/req_test.py +1 -1
  51. data/vendor/datasketches-cpp/python/tests/vo_test.py +25 -1
  52. data/vendor/datasketches-cpp/quantiles/include/quantiles_sketch.hpp +135 -180
  53. data/vendor/datasketches-cpp/quantiles/include/quantiles_sketch_impl.hpp +205 -210
  54. data/vendor/datasketches-cpp/quantiles/test/CMakeLists.txt +1 -1
  55. data/vendor/datasketches-cpp/quantiles/test/quantiles_compatibility_test.cpp +19 -18
  56. data/vendor/datasketches-cpp/quantiles/test/quantiles_sketch_test.cpp +240 -232
  57. data/vendor/datasketches-cpp/req/include/req_compactor.hpp +15 -9
  58. data/vendor/datasketches-cpp/req/include/req_compactor_impl.hpp +35 -19
  59. data/vendor/datasketches-cpp/req/include/req_sketch.hpp +126 -147
  60. data/vendor/datasketches-cpp/req/include/req_sketch_impl.hpp +265 -245
  61. data/vendor/datasketches-cpp/req/test/CMakeLists.txt +1 -1
  62. data/vendor/datasketches-cpp/req/test/req_sketch_custom_type_test.cpp +26 -26
  63. data/vendor/datasketches-cpp/req/test/req_sketch_test.cpp +116 -103
  64. data/vendor/datasketches-cpp/sampling/include/var_opt_sketch.hpp +22 -46
  65. data/vendor/datasketches-cpp/sampling/include/var_opt_sketch_impl.hpp +180 -207
  66. data/vendor/datasketches-cpp/sampling/include/var_opt_union.hpp +18 -39
  67. data/vendor/datasketches-cpp/sampling/include/var_opt_union_impl.hpp +75 -85
  68. data/vendor/datasketches-cpp/sampling/test/CMakeLists.txt +1 -1
  69. data/vendor/datasketches-cpp/sampling/test/var_opt_allocation_test.cpp +6 -6
  70. data/vendor/datasketches-cpp/sampling/test/var_opt_sketch_test.cpp +2 -2
  71. data/vendor/datasketches-cpp/sampling/test/var_opt_union_test.cpp +4 -4
  72. data/vendor/datasketches-cpp/setup.py +14 -2
  73. data/vendor/datasketches-cpp/theta/include/theta_sketch_impl.hpp +15 -25
  74. data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base.hpp +0 -9
  75. data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base_impl.hpp +5 -5
  76. data/vendor/datasketches-cpp/theta/test/CMakeLists.txt +1 -1
  77. data/vendor/datasketches-cpp/theta/test/theta_sketch_test.cpp +2 -1
  78. data/vendor/datasketches-cpp/tox.ini +26 -0
  79. data/vendor/datasketches-cpp/tuple/include/tuple_sketch.hpp +36 -12
  80. data/vendor/datasketches-cpp/tuple/include/tuple_sketch_impl.hpp +16 -4
  81. data/vendor/datasketches-cpp/tuple/test/CMakeLists.txt +2 -1
  82. data/vendor/datasketches-cpp/tuple/test/engagement_test.cpp +299 -0
  83. data/vendor/datasketches-cpp/tuple/test/tuple_sketch_test.cpp +26 -0
  84. data/vendor/datasketches-cpp/version.cfg.in +1 -0
  85. metadata +14 -5
  86. data/vendor/datasketches-cpp/common/include/quantile_sketch_sorted_view_impl.hpp +0 -91
@@ -17,7 +17,7 @@
17
17
 
18
18
  add_executable(req_test)
19
19
 
20
- target_link_libraries(req_test req common_test)
20
+ target_link_libraries(req_test req common_test_lib)
21
21
 
22
22
  set_target_properties(req_test PROPERTIES
23
23
  CXX_STANDARD 11
@@ -26,7 +26,7 @@
26
26
 
27
27
  namespace datasketches {
28
28
 
29
- using req_test_type_sketch = req_sketch<test_type, test_type_less, test_type_serde, test_allocator<test_type>>;
29
+ using req_test_type_sketch = req_sketch<test_type, test_type_less, test_allocator<test_type>>;
30
30
  using alloc = test_allocator<test_type>;
31
31
 
32
32
  TEST_CASE("req sketch custom type", "[req_sketch]") {
@@ -35,26 +35,26 @@ TEST_CASE("req sketch custom type", "[req_sketch]") {
35
35
  test_allocator_total_bytes = 0;
36
36
 
37
37
  SECTION("compact level zero") {
38
- req_test_type_sketch sketch(4, true, 0);
38
+ req_test_type_sketch sketch(4, true, test_type_less(), 0);
39
39
  REQUIRE_THROWS_AS(sketch.get_quantile(0), std::runtime_error);
40
- REQUIRE_THROWS_AS(sketch.get_min_value(), std::runtime_error);
41
- REQUIRE_THROWS_AS(sketch.get_max_value(), std::runtime_error);
42
- REQUIRE(sketch.get_serialized_size_bytes() == 8);
40
+ REQUIRE_THROWS_AS(sketch.get_min_item(), std::runtime_error);
41
+ REQUIRE_THROWS_AS(sketch.get_max_item(), std::runtime_error);
42
+ REQUIRE(sketch.get_serialized_size_bytes(test_type_serde()) == 8);
43
43
 
44
44
  for (int i = 0; i < 24; ++i) sketch.update(i);
45
45
  //std::cout << sketch.to_string(true);
46
46
 
47
47
  REQUIRE(sketch.is_estimation_mode());
48
48
  REQUIRE(sketch.get_n() > sketch.get_num_retained());
49
- REQUIRE(sketch.get_min_value().get_value() == 0);
50
- REQUIRE(sketch.get_max_value().get_value() == 23);
49
+ REQUIRE(sketch.get_min_item().get_value() == 0);
50
+ REQUIRE(sketch.get_max_item().get_value() == 23);
51
51
  }
52
52
 
53
53
  SECTION("merge small") {
54
- req_test_type_sketch sketch1(4, true, 0);
54
+ req_test_type_sketch sketch1(4, true, test_type_less(), 0);
55
55
  sketch1.update(1);
56
56
 
57
- req_test_type_sketch sketch2(4, true, 0);
57
+ req_test_type_sketch sketch2(4, true, test_type_less(), 0);
58
58
  sketch2.update(2);
59
59
 
60
60
  sketch2.merge(sketch1);
@@ -63,15 +63,15 @@ TEST_CASE("req sketch custom type", "[req_sketch]") {
63
63
 
64
64
  REQUIRE_FALSE(sketch2.is_estimation_mode());
65
65
  REQUIRE(sketch2.get_num_retained() == sketch2.get_n());
66
- REQUIRE(sketch2.get_min_value().get_value() == 1);
67
- REQUIRE(sketch2.get_max_value().get_value() == 2);
66
+ REQUIRE(sketch2.get_min_item().get_value() == 1);
67
+ REQUIRE(sketch2.get_max_item().get_value() == 2);
68
68
  }
69
69
 
70
70
  SECTION("merge higher levels") {
71
- req_test_type_sketch sketch1(4, true, 0);
71
+ req_test_type_sketch sketch1(4, true, test_type_less(), 0);
72
72
  for (int i = 0; i < 24; ++i) sketch1.update(i);
73
73
 
74
- req_test_type_sketch sketch2(4, true, 0);
74
+ req_test_type_sketch sketch2(4, true, test_type_less(), 0);
75
75
  for (int i = 0; i < 24; ++i) sketch2.update(i);
76
76
 
77
77
  sketch2.merge(sketch1);
@@ -80,28 +80,28 @@ TEST_CASE("req sketch custom type", "[req_sketch]") {
80
80
 
81
81
  REQUIRE(sketch2.is_estimation_mode());
82
82
  REQUIRE(sketch2.get_n() > sketch2.get_num_retained());
83
- REQUIRE(sketch2.get_min_value().get_value() == 0);
84
- REQUIRE(sketch2.get_max_value().get_value() == 23);
83
+ REQUIRE(sketch2.get_min_item().get_value() == 0);
84
+ REQUIRE(sketch2.get_max_item().get_value() == 23);
85
85
  }
86
86
 
87
87
  SECTION("serialize deserialize") {
88
- req_test_type_sketch sketch1(12, true, 0);
88
+ req_test_type_sketch sketch1(12, true, test_type_less(), 0);
89
89
 
90
90
  const int n = 1000;
91
91
  for (int i = 0; i < n; i++) sketch1.update(i);
92
92
 
93
93
  std::stringstream s(std::ios::in | std::ios::out | std::ios::binary);
94
- sketch1.serialize(s);
95
- REQUIRE((size_t) s.tellp() == sketch1.get_serialized_size_bytes());
96
- auto sketch2 = req_test_type_sketch::deserialize(s, alloc(0));
97
- REQUIRE((size_t) s.tellg() == sketch2.get_serialized_size_bytes());
94
+ sketch1.serialize(s, test_type_serde());
95
+ REQUIRE((size_t) s.tellp() == sketch1.get_serialized_size_bytes(test_type_serde()));
96
+ auto sketch2 = req_test_type_sketch::deserialize(s, test_type_serde(), test_type_less(), 0);
97
+ REQUIRE((size_t) s.tellg() == sketch2.get_serialized_size_bytes(test_type_serde()));
98
98
  REQUIRE(s.tellg() == s.tellp());
99
99
  REQUIRE(sketch2.is_empty() == sketch1.is_empty());
100
100
  REQUIRE(sketch2.is_estimation_mode() == sketch1.is_estimation_mode());
101
101
  REQUIRE(sketch2.get_n() == sketch1.get_n());
102
102
  REQUIRE(sketch2.get_num_retained() == sketch1.get_num_retained());
103
- REQUIRE(sketch2.get_min_value().get_value() == sketch1.get_min_value().get_value());
104
- REQUIRE(sketch2.get_max_value().get_value() == sketch1.get_max_value().get_value());
103
+ REQUIRE(sketch2.get_min_item().get_value() == sketch1.get_min_item().get_value());
104
+ REQUIRE(sketch2.get_max_item().get_value() == sketch1.get_max_item().get_value());
105
105
  REQUIRE(sketch2.get_quantile(0.5).get_value() == sketch1.get_quantile(0.5).get_value());
106
106
  REQUIRE(sketch2.get_rank(0) == sketch1.get_rank(0));
107
107
  REQUIRE(sketch2.get_rank(n) == sketch1.get_rank(n));
@@ -109,13 +109,13 @@ TEST_CASE("req sketch custom type", "[req_sketch]") {
109
109
  }
110
110
 
111
111
  SECTION("moving merge") {
112
- req_test_type_sketch sketch1(4, true, 0);
112
+ req_test_type_sketch sketch1(4, true, test_type_less(), 0);
113
113
  for (int i = 0; i < 10; i++) sketch1.update(i);
114
- req_test_type_sketch sketch2(4, true, 0);
114
+ req_test_type_sketch sketch2(4, true, test_type_less(), 0);
115
115
  sketch2.update(10);
116
116
  sketch2.merge(std::move(sketch1));
117
- REQUIRE(sketch2.get_min_value().get_value() == 0);
118
- REQUIRE(sketch2.get_max_value().get_value() == 10);
117
+ REQUIRE(sketch2.get_min_item().get_value() == 0);
118
+ REQUIRE(sketch2.get_max_item().get_value() == 10);
119
119
  REQUIRE(sketch2.get_n() == 11);
120
120
  }
121
121
 
@@ -43,19 +43,16 @@ TEST_CASE("req sketch: empty", "[req_sketch]") {
43
43
  REQUIRE_FALSE(sketch.is_estimation_mode());
44
44
  REQUIRE(sketch.get_n() == 0);
45
45
  REQUIRE(sketch.get_num_retained() == 0);
46
- REQUIRE(std::isnan(sketch.get_rank(0)));
47
- REQUIRE(std::isnan(sketch.get_rank(std::numeric_limits<float>::infinity())));
48
- REQUIRE(std::isnan(sketch.get_min_value()));
49
- REQUIRE(std::isnan(sketch.get_max_value()));
50
- REQUIRE(std::isnan(sketch.get_quantile(0)));
51
- REQUIRE(std::isnan(sketch.get_quantile(0.5)));
52
- REQUIRE(std::isnan(sketch.get_quantile(1)));
46
+ REQUIRE_THROWS_AS(sketch.get_min_item(), std::runtime_error);
47
+ REQUIRE_THROWS_AS(sketch.get_max_item(), std::runtime_error);
48
+ REQUIRE_THROWS_AS(sketch.get_rank(0), std::runtime_error);
49
+ REQUIRE_THROWS_AS(sketch.get_quantile(0), std::runtime_error);
53
50
  const double ranks[3] {0, 0.5, 1};
54
- REQUIRE(sketch.get_quantiles(ranks, 3).size() == 0);
51
+ REQUIRE_THROWS_AS(sketch.get_quantiles(ranks, 3), std::runtime_error);
55
52
 
56
53
  const float split_points[1] {0};
57
- REQUIRE(sketch.get_CDF(split_points, 1).empty());
58
- REQUIRE(sketch.get_PMF(split_points, 1).empty());
54
+ REQUIRE_THROWS_AS(sketch.get_CDF(split_points, 1), std::runtime_error);
55
+ REQUIRE_THROWS_AS(sketch.get_PMF(split_points, 1), std::runtime_error);
59
56
  }
60
57
 
61
58
  TEST_CASE("req sketch: single value, lra", "[req_sketch]") {
@@ -66,13 +63,13 @@ TEST_CASE("req sketch: single value, lra", "[req_sketch]") {
66
63
  REQUIRE_FALSE(sketch.is_estimation_mode());
67
64
  REQUIRE(sketch.get_n() == 1);
68
65
  REQUIRE(sketch.get_num_retained() == 1);
69
- REQUIRE(sketch.get_rank(1.0f) == 0);
70
- REQUIRE(sketch.get_rank<true>(1.0f) == 1);
71
- REQUIRE(sketch.get_rank(1.1f) == 1);
66
+ REQUIRE(sketch.get_rank(1.0f, false) == 0);
67
+ REQUIRE(sketch.get_rank(1.0f) == 1);
68
+ REQUIRE(sketch.get_rank(1.1f, false) == 1);
72
69
  REQUIRE(sketch.get_rank(std::numeric_limits<float>::infinity()) == 1);
73
- REQUIRE(sketch.get_quantile(0) == 1);
74
- REQUIRE(sketch.get_quantile(0.5) == 1);
75
- REQUIRE(sketch.get_quantile(1) == 1);
70
+ REQUIRE(sketch.get_quantile(0, false) == 1);
71
+ REQUIRE(sketch.get_quantile(0.5, false) == 1);
72
+ REQUIRE(sketch.get_quantile(1, false) == 1);
76
73
 
77
74
  const double ranks[3] {0, 0.5, 1};
78
75
  auto quantiles = sketch.get_quantiles(ranks, 3);
@@ -82,11 +79,16 @@ TEST_CASE("req sketch: single value, lra", "[req_sketch]") {
82
79
  REQUIRE(quantiles[2] == 1);
83
80
 
84
81
  unsigned count = 0;
85
- for (auto it: sketch) {
86
- REQUIRE(it.second == 1);
82
+ for (auto pair: sketch) {
83
+ REQUIRE(pair.second == 1);
87
84
  ++count;
88
85
  }
89
86
  REQUIRE(count == 1);
87
+
88
+ // iterator dereferencing
89
+ auto it = sketch.begin();
90
+ REQUIRE(it->first == 1.0f);
91
+ REQUIRE((*it).first == 1.0f);
90
92
  }
91
93
 
92
94
  TEST_CASE("req sketch: repeated values", "[req_sketch]") {
@@ -101,10 +103,10 @@ TEST_CASE("req sketch: repeated values", "[req_sketch]") {
101
103
  REQUIRE_FALSE(sketch.is_estimation_mode());
102
104
  REQUIRE(sketch.get_n() == 6);
103
105
  REQUIRE(sketch.get_num_retained() == 6);
104
- REQUIRE(sketch.get_rank(1.0f) == 0);
105
- REQUIRE(sketch.get_rank<true>(1.0f) == 0.5);
106
- REQUIRE(sketch.get_rank(2.0f) == 0.5);
107
- REQUIRE(sketch.get_rank<true>(2.0f) == 1);
106
+ REQUIRE(sketch.get_rank(1.0f, false) == 0);
107
+ REQUIRE(sketch.get_rank(1.0f) == 0.5);
108
+ REQUIRE(sketch.get_rank(2.0f, false) == 0.5);
109
+ REQUIRE(sketch.get_rank(2.0f) == 1);
108
110
  }
109
111
 
110
112
  TEST_CASE("req sketch: exact mode", "[req_sketch]") {
@@ -115,48 +117,48 @@ TEST_CASE("req sketch: exact mode", "[req_sketch]") {
115
117
  REQUIRE(sketch.get_n() == 10);
116
118
  REQUIRE(sketch.get_num_retained() == 10);
117
119
 
118
- // like KLL
119
- REQUIRE(sketch.get_rank(1.0f) == 0);
120
- REQUIRE(sketch.get_rank(2.0f) == 0.1);
121
- REQUIRE(sketch.get_rank(6.0f) == 0.5);
122
- REQUIRE(sketch.get_rank(9.0f) == 0.8);
123
- REQUIRE(sketch.get_rank(10.0f) == 0.9);
120
+ // exclusive
121
+ REQUIRE(sketch.get_rank(1.0f, false) == 0);
122
+ REQUIRE(sketch.get_rank(2.0f, false) == 0.1);
123
+ REQUIRE(sketch.get_rank(6.0f, false) == 0.5);
124
+ REQUIRE(sketch.get_rank(9.0f, false) == 0.8);
125
+ REQUIRE(sketch.get_rank(10.0f, false) == 0.9);
124
126
 
125
127
  // inclusive
126
- REQUIRE(sketch.get_rank<true>(1.0f) == 0.1);
127
- REQUIRE(sketch.get_rank<true>(2.0f) == 0.2);
128
- REQUIRE(sketch.get_rank<true>(5.0f) == 0.5);
129
- REQUIRE(sketch.get_rank<true>(9.0f) == 0.9);
130
- REQUIRE(sketch.get_rank<true>(10.0f) == 1);
128
+ REQUIRE(sketch.get_rank(1.0f) == 0.1);
129
+ REQUIRE(sketch.get_rank(2.0f) == 0.2);
130
+ REQUIRE(sketch.get_rank(5.0f) == 0.5);
131
+ REQUIRE(sketch.get_rank(9.0f) == 0.9);
132
+ REQUIRE(sketch.get_rank(10.0f) == 1);
133
+
134
+ // exclusive
135
+ REQUIRE(sketch.get_quantile(0, false) == 1);
136
+ REQUIRE(sketch.get_quantile(0.1, false) == 2);
137
+ REQUIRE(sketch.get_quantile(0.5, false) == 6);
138
+ REQUIRE(sketch.get_quantile(0.9, false) == 10);
139
+ REQUIRE(sketch.get_quantile(1, false) == 10);
131
140
 
132
- // like KLL
141
+ // inclusive
133
142
  REQUIRE(sketch.get_quantile(0) == 1);
134
- REQUIRE(sketch.get_quantile(0.1) == 2);
135
- REQUIRE(sketch.get_quantile(0.5) == 6);
136
- REQUIRE(sketch.get_quantile(0.9) == 10);
143
+ REQUIRE(sketch.get_quantile(0.1) == 1);
144
+ REQUIRE(sketch.get_quantile(0.5) == 5);
145
+ REQUIRE(sketch.get_quantile(0.9) == 9);
137
146
  REQUIRE(sketch.get_quantile(1) == 10);
138
147
 
139
- // inclusive
140
- REQUIRE(sketch.get_quantile<true>(0) == 1);
141
- REQUIRE(sketch.get_quantile<true>(0.1) == 1);
142
- REQUIRE(sketch.get_quantile<true>(0.5) == 5);
143
- REQUIRE(sketch.get_quantile<true>(0.9) == 9);
144
- REQUIRE(sketch.get_quantile<true>(1) == 10);
145
-
146
148
  const double ranks[3] {0, 0.5, 1};
147
149
  auto quantiles = sketch.get_quantiles(ranks, 3);
148
150
  REQUIRE(quantiles.size() == 3);
149
151
  REQUIRE(quantiles[0] == 1);
150
- REQUIRE(quantiles[1] == 6);
152
+ REQUIRE(quantiles[1] == 5);
151
153
  REQUIRE(quantiles[2] == 10);
152
154
 
153
155
  const float splits[3] {2, 6, 9};
154
- auto cdf = sketch.get_CDF(splits, 3);
156
+ auto cdf = sketch.get_CDF(splits, 3, false);
155
157
  REQUIRE(cdf[0] == 0.1);
156
158
  REQUIRE(cdf[1] == 0.5);
157
159
  REQUIRE(cdf[2] == 0.8);
158
160
  REQUIRE(cdf[3] == 1);
159
- auto pmf = sketch.get_PMF(splits, 3);
161
+ auto pmf = sketch.get_PMF(splits, 3, false);
160
162
  REQUIRE(pmf[0] == Approx(0.1).margin(1e-8));
161
163
  REQUIRE(pmf[1] == Approx(0.4).margin(1e-8));
162
164
  REQUIRE(pmf[2] == Approx(0.3).margin(1e-8));
@@ -175,18 +177,18 @@ TEST_CASE("req sketch: estimation mode", "[req_sketch]") {
175
177
  REQUIRE(sketch.get_n() == n);
176
178
  // std::cout << sketch.to_string(true);
177
179
  REQUIRE(sketch.get_num_retained() < n);
178
- REQUIRE(sketch.get_rank(0) == 0);
179
- REQUIRE(sketch.get_rank(static_cast<float>(n)) == 1);
180
- REQUIRE(sketch.get_rank(n / 2.0f) == Approx(0.5).margin(0.01));
181
- REQUIRE(sketch.get_rank(n - 1.0f) == Approx(1).margin(0.01));
182
- REQUIRE(sketch.get_min_value() == 0);
183
- REQUIRE(sketch.get_max_value() == n - 1);
180
+ REQUIRE(sketch.get_rank(0, false) == 0);
181
+ REQUIRE(sketch.get_rank(static_cast<float>(n), false) == 1);
182
+ REQUIRE(sketch.get_rank(n / 2.0f, false) == Approx(0.5).margin(0.01));
183
+ REQUIRE(sketch.get_rank(n - 1.0f, false) == Approx(1).margin(0.01));
184
+ REQUIRE(sketch.get_min_item() == 0);
185
+ REQUIRE(sketch.get_max_item() == n - 1);
184
186
  REQUIRE(sketch.get_rank_lower_bound(0.5, 1) < 0.5);
185
187
  REQUIRE(sketch.get_rank_upper_bound(0.5, 1) > 0.5);
186
188
 
187
189
  unsigned count = 0;
188
- for (auto it: sketch) {
189
- REQUIRE(it.second >= 1);
190
+ for (auto pair: sketch) {
191
+ REQUIRE(pair.second >= 1);
190
192
  ++count;
191
193
  }
192
194
  REQUIRE(count == sketch.get_num_retained());
@@ -203,8 +205,8 @@ TEST_CASE("req sketch: stream serialize-deserialize empty", "[req_sketch]") {
203
205
  REQUIRE(sketch2.is_estimation_mode() == sketch.is_estimation_mode());
204
206
  REQUIRE(sketch2.get_num_retained() == sketch.get_num_retained());
205
207
  REQUIRE(sketch2.get_n() == sketch.get_n());
206
- REQUIRE(std::isnan(sketch2.get_min_value()));
207
- REQUIRE(std::isnan(sketch2.get_max_value()));
208
+ REQUIRE_THROWS_AS(sketch2.get_min_item(), std::runtime_error);
209
+ REQUIRE_THROWS_AS(sketch2.get_max_item(), std::runtime_error);
208
210
  }
209
211
 
210
212
  TEST_CASE("req sketch: byte serialize-deserialize empty", "[req_sketch]") {
@@ -218,8 +220,8 @@ TEST_CASE("req sketch: byte serialize-deserialize empty", "[req_sketch]") {
218
220
  REQUIRE(sketch2.is_estimation_mode() == sketch.is_estimation_mode());
219
221
  REQUIRE(sketch2.get_num_retained() == sketch.get_num_retained());
220
222
  REQUIRE(sketch2.get_n() == sketch.get_n());
221
- REQUIRE(std::isnan(sketch2.get_min_value()));
222
- REQUIRE(std::isnan(sketch2.get_max_value()));
223
+ REQUIRE_THROWS_AS(sketch2.get_min_item(), std::runtime_error);
224
+ REQUIRE_THROWS_AS(sketch2.get_max_item(), std::runtime_error);
223
225
  }
224
226
 
225
227
  TEST_CASE("req sketch: stream serialize-deserialize single item", "[req_sketch]") {
@@ -234,8 +236,8 @@ TEST_CASE("req sketch: stream serialize-deserialize single item", "[req_sketch]"
234
236
  REQUIRE(sketch2.is_estimation_mode() == sketch.is_estimation_mode());
235
237
  REQUIRE(sketch2.get_num_retained() == sketch.get_num_retained());
236
238
  REQUIRE(sketch2.get_n() == sketch.get_n());
237
- REQUIRE(sketch2.get_min_value() == sketch.get_min_value());
238
- REQUIRE(sketch2.get_max_value() == sketch.get_max_value());
239
+ REQUIRE(sketch2.get_min_item() == sketch.get_min_item());
240
+ REQUIRE(sketch2.get_max_item() == sketch.get_max_item());
239
241
  }
240
242
 
241
243
  TEST_CASE("req sketch: byte serialize-deserialize single item", "[req_sketch]") {
@@ -251,8 +253,8 @@ TEST_CASE("req sketch: byte serialize-deserialize single item", "[req_sketch]")
251
253
  REQUIRE(sketch2.is_estimation_mode() == sketch.is_estimation_mode());
252
254
  REQUIRE(sketch2.get_num_retained() == sketch.get_num_retained());
253
255
  REQUIRE(sketch2.get_n() == sketch.get_n());
254
- REQUIRE(sketch2.get_min_value() == sketch.get_min_value());
255
- REQUIRE(sketch2.get_max_value() == sketch.get_max_value());
256
+ REQUIRE(sketch2.get_min_item() == sketch.get_min_item());
257
+ REQUIRE(sketch2.get_max_item() == sketch.get_max_item());
256
258
  }
257
259
 
258
260
  TEST_CASE("req sketch: stream serialize-deserialize exact mode", "[req_sketch]") {
@@ -269,8 +271,8 @@ TEST_CASE("req sketch: stream serialize-deserialize exact mode", "[req_sketch]")
269
271
  REQUIRE(sketch2.is_estimation_mode() == sketch.is_estimation_mode());
270
272
  REQUIRE(sketch2.get_num_retained() == sketch.get_num_retained());
271
273
  REQUIRE(sketch2.get_n() == sketch.get_n());
272
- REQUIRE(sketch2.get_min_value() == sketch.get_min_value());
273
- REQUIRE(sketch2.get_max_value() == sketch.get_max_value());
274
+ REQUIRE(sketch2.get_min_item() == sketch.get_min_item());
275
+ REQUIRE(sketch2.get_max_item() == sketch.get_max_item());
274
276
  }
275
277
 
276
278
  TEST_CASE("req sketch: byte serialize-deserialize exact mode", "[req_sketch]") {
@@ -288,8 +290,8 @@ TEST_CASE("req sketch: byte serialize-deserialize exact mode", "[req_sketch]") {
288
290
  REQUIRE(sketch2.is_estimation_mode() == sketch.is_estimation_mode());
289
291
  REQUIRE(sketch2.get_num_retained() == sketch.get_num_retained());
290
292
  REQUIRE(sketch2.get_n() == sketch.get_n());
291
- REQUIRE(sketch2.get_min_value() == sketch.get_min_value());
292
- REQUIRE(sketch2.get_max_value() == sketch.get_max_value());
293
+ REQUIRE(sketch2.get_min_item() == sketch.get_min_item());
294
+ REQUIRE(sketch2.get_max_item() == sketch.get_max_item());
293
295
  }
294
296
 
295
297
  TEST_CASE("req sketch: stream serialize-deserialize estimation mode", "[req_sketch]") {
@@ -306,8 +308,8 @@ TEST_CASE("req sketch: stream serialize-deserialize estimation mode", "[req_sket
306
308
  REQUIRE(sketch2.is_estimation_mode() == sketch.is_estimation_mode());
307
309
  REQUIRE(sketch2.get_num_retained() == sketch.get_num_retained());
308
310
  REQUIRE(sketch2.get_n() == sketch.get_n());
309
- REQUIRE(sketch2.get_min_value() == sketch.get_min_value());
310
- REQUIRE(sketch2.get_max_value() == sketch.get_max_value());
311
+ REQUIRE(sketch2.get_min_item() == sketch.get_min_item());
312
+ REQUIRE(sketch2.get_max_item() == sketch.get_max_item());
311
313
  }
312
314
 
313
315
  TEST_CASE("req sketch: byte serialize-deserialize estimation mode", "[req_sketch]") {
@@ -324,8 +326,8 @@ TEST_CASE("req sketch: byte serialize-deserialize estimation mode", "[req_sketch
324
326
  REQUIRE(sketch2.is_estimation_mode() == sketch.is_estimation_mode());
325
327
  REQUIRE(sketch2.get_num_retained() == sketch.get_num_retained());
326
328
  REQUIRE(sketch2.get_n() == sketch.get_n());
327
- REQUIRE(sketch2.get_min_value() == sketch.get_min_value());
328
- REQUIRE(sketch2.get_max_value() == sketch.get_max_value());
329
+ REQUIRE(sketch2.get_min_item() == sketch.get_min_item());
330
+ REQUIRE(sketch2.get_max_item() == sketch.get_max_item());
329
331
  }
330
332
 
331
333
  TEST_CASE("req sketch: serialize deserialize stream and bytes equivalence", "[req_sketch]") {
@@ -350,8 +352,8 @@ TEST_CASE("req sketch: serialize deserialize stream and bytes equivalence", "[re
350
352
  REQUIRE(sketch2.is_estimation_mode() == sketch.is_estimation_mode());
351
353
  REQUIRE(sketch2.get_num_retained() == sketch.get_num_retained());
352
354
  REQUIRE(sketch2.get_n() == sketch.get_n());
353
- REQUIRE(sketch2.get_min_value() == sketch.get_min_value());
354
- REQUIRE(sketch2.get_max_value() == sketch.get_max_value());
355
+ REQUIRE(sketch2.get_min_item() == sketch.get_min_item());
356
+ REQUIRE(sketch2.get_max_item() == sketch.get_max_item());
355
357
  }
356
358
 
357
359
  TEST_CASE("req sketch: stream deserialize from Java - empty", "[req_sketch]") {
@@ -363,8 +365,8 @@ TEST_CASE("req sketch: stream deserialize from Java - empty", "[req_sketch]") {
363
365
  REQUIRE_FALSE(sketch.is_estimation_mode());
364
366
  REQUIRE(sketch.get_n() == 0);
365
367
  REQUIRE(sketch.get_num_retained() == 0);
366
- REQUIRE(std::isnan(sketch.get_min_value()));
367
- REQUIRE(std::isnan(sketch.get_max_value()));
368
+ REQUIRE_THROWS_AS(sketch.get_min_item(), std::runtime_error);
369
+ REQUIRE_THROWS_AS(sketch.get_max_item(), std::runtime_error);
368
370
  }
369
371
 
370
372
  TEST_CASE("req sketch: stream deserialize from Java - single item", "[req_sketch]") {
@@ -376,10 +378,10 @@ TEST_CASE("req sketch: stream deserialize from Java - single item", "[req_sketch
376
378
  REQUIRE_FALSE(sketch.is_estimation_mode());
377
379
  REQUIRE(sketch.get_n() == 1);
378
380
  REQUIRE(sketch.get_num_retained() == 1);
379
- REQUIRE(sketch.get_min_value() == 1);
380
- REQUIRE(sketch.get_max_value() == 1);
381
- REQUIRE(sketch.get_rank(1.0f) == 0);
382
- REQUIRE(sketch.get_rank<true>(1.0f) == 1);
381
+ REQUIRE(sketch.get_min_item() == 1);
382
+ REQUIRE(sketch.get_max_item() == 1);
383
+ REQUIRE(sketch.get_rank(1.0f, false) == 0);
384
+ REQUIRE(sketch.get_rank(1.0f) == 1);
383
385
  }
384
386
 
385
387
  TEST_CASE("req sketch: stream deserialize from Java - raw items", "[req_sketch]") {
@@ -391,9 +393,9 @@ TEST_CASE("req sketch: stream deserialize from Java - raw items", "[req_sketch]"
391
393
  REQUIRE_FALSE(sketch.is_estimation_mode());
392
394
  REQUIRE(sketch.get_n() == 4);
393
395
  REQUIRE(sketch.get_num_retained() == 4);
394
- REQUIRE(sketch.get_min_value() == 0);
395
- REQUIRE(sketch.get_max_value() == 3);
396
- REQUIRE(sketch.get_rank(2.0f) == 0.5);
396
+ REQUIRE(sketch.get_min_item() == 0);
397
+ REQUIRE(sketch.get_max_item() == 3);
398
+ REQUIRE(sketch.get_rank(2.0f, false) == 0.5);
397
399
  }
398
400
 
399
401
  TEST_CASE("req sketch: stream deserialize from Java - exact mode", "[req_sketch]") {
@@ -405,9 +407,9 @@ TEST_CASE("req sketch: stream deserialize from Java - exact mode", "[req_sketch]
405
407
  REQUIRE_FALSE(sketch.is_estimation_mode());
406
408
  REQUIRE(sketch.get_n() == 100);
407
409
  REQUIRE(sketch.get_num_retained() == 100);
408
- REQUIRE(sketch.get_min_value() == 0);
409
- REQUIRE(sketch.get_max_value() == 99);
410
- REQUIRE(sketch.get_rank(50.0f) == 0.5);
410
+ REQUIRE(sketch.get_min_item() == 0);
411
+ REQUIRE(sketch.get_max_item() == 99);
412
+ REQUIRE(sketch.get_rank(50.0f, false) == 0.5);
411
413
  }
412
414
 
413
415
  TEST_CASE("req sketch: stream deserialize from Java - estimation mode", "[req_sketch]") {
@@ -419,9 +421,9 @@ TEST_CASE("req sketch: stream deserialize from Java - estimation mode", "[req_sk
419
421
  REQUIRE(sketch.is_estimation_mode());
420
422
  REQUIRE(sketch.get_n() == 10000);
421
423
  REQUIRE(sketch.get_num_retained() == 2942);
422
- REQUIRE(sketch.get_min_value() == 0);
423
- REQUIRE(sketch.get_max_value() == 9999);
424
- REQUIRE(sketch.get_rank(5000.0f) == 0.5);
424
+ REQUIRE(sketch.get_min_item() == 0);
425
+ REQUIRE(sketch.get_max_item() == 9999);
426
+ REQUIRE(sketch.get_rank(5000.0f, false) == 0.5);
425
427
  }
426
428
 
427
429
  TEST_CASE("req sketch: merge into empty", "[req_sketch]") {
@@ -431,11 +433,11 @@ TEST_CASE("req sketch: merge into empty", "[req_sketch]") {
431
433
  for (size_t i = 0; i < 1000; ++i) sketch2.update(static_cast<float>(i));
432
434
 
433
435
  sketch1.merge(sketch2);
434
- REQUIRE(sketch1.get_min_value() == 0);
435
- REQUIRE(sketch1.get_max_value() == 999);
436
- REQUIRE(sketch1.get_quantile(0.25) == Approx(250).margin(3));
437
- REQUIRE(sketch1.get_quantile(0.5) == Approx(500).margin(3));
438
- REQUIRE(sketch1.get_quantile(0.75) == Approx(750).margin(3));
436
+ REQUIRE(sketch1.get_min_item() == 0);
437
+ REQUIRE(sketch1.get_max_item() == 999);
438
+ REQUIRE(sketch1.get_quantile(0.25) == Approx(250).epsilon(0.01));
439
+ REQUIRE(sketch1.get_quantile(0.5) == Approx(500).epsilon(0.01));
440
+ REQUIRE(sketch1.get_quantile(0.75) == Approx(750).epsilon(0.01));
439
441
  REQUIRE(sketch1.get_rank(500.0f) == Approx(0.5).margin(0.01));
440
442
  }
441
443
 
@@ -447,11 +449,11 @@ TEST_CASE("req sketch: merge", "[req_sketch]") {
447
449
  for (size_t i = 1000; i < 2000; ++i) sketch2.update(static_cast<float>(i));
448
450
 
449
451
  sketch1.merge(sketch2);
450
- REQUIRE(sketch1.get_min_value() == 0);
451
- REQUIRE(sketch1.get_max_value() == 1999);
452
- REQUIRE(sketch1.get_quantile(0.25) == Approx(500).margin(3));
453
- REQUIRE(sketch1.get_quantile(0.5) == Approx(1000).margin(1));
454
- REQUIRE(sketch1.get_quantile(0.75) == Approx(1500).margin(1));
452
+ REQUIRE(sketch1.get_min_item() == 0);
453
+ REQUIRE(sketch1.get_max_item() == 1999);
454
+ REQUIRE(sketch1.get_quantile(0.25) == Approx(500).epsilon(0.01));
455
+ REQUIRE(sketch1.get_quantile(0.5) == Approx(1000).epsilon(0.01));
456
+ REQUIRE(sketch1.get_quantile(0.75) == Approx(1500).epsilon(0.01));
455
457
  REQUIRE(sketch1.get_rank(1000.0f) == Approx(0.5).margin(0.01));
456
458
  }
457
459
 
@@ -469,9 +471,9 @@ TEST_CASE("req sketch: merge multiple", "[req_sketch]") {
469
471
  sketch.merge(sketch1);
470
472
  sketch.merge(sketch2);
471
473
  sketch.merge(sketch3);
472
- REQUIRE(sketch.get_min_value() == 0);
473
- REQUIRE(sketch.get_max_value() == 119);
474
- REQUIRE(sketch.get_quantile(0.5) == Approx(60).margin(3));
474
+ REQUIRE(sketch.get_min_item() == 0);
475
+ REQUIRE(sketch.get_max_item() == 119);
476
+ REQUIRE(sketch.get_quantile(0.5) == Approx(60).epsilon(0.02));
475
477
  REQUIRE(sketch.get_rank(60.0f) == Approx(0.5).margin(0.01));
476
478
  }
477
479
 
@@ -503,8 +505,8 @@ TEST_CASE("req sketch: type conversion - several levels", "[req_sketch]") {
503
505
  REQUIRE(req_float.get_n() == req_double.get_n());
504
506
  REQUIRE(req_float.get_num_retained() == req_double.get_num_retained());
505
507
 
506
- auto sv_float = req_float.get_sorted_view(false);
507
- auto sv_double = req_double.get_sorted_view(false);
508
+ auto sv_float = req_float.get_sorted_view();
509
+ auto sv_double = req_double.get_sorted_view();
508
510
  auto sv_float_it = sv_float.begin();
509
511
  auto sv_double_it = sv_double.begin();
510
512
  while (sv_float_it != sv_float.end()) {
@@ -551,6 +553,17 @@ TEST_CASE("req sketch: type conversion - custom types") {
551
553
  REQUIRE(sb.get_n() == 3);
552
554
  }
553
555
 
556
+ TEST_CASE("get_rank equivalence") {
557
+ req_sketch<int> sketch(12);
558
+ const size_t n = 1000;
559
+ for (size_t i = 0; i < n; ++i) sketch.update(i);
560
+ REQUIRE(sketch.get_n() == n);
561
+ auto view = sketch.get_sorted_view();
562
+ for (size_t i = 0; i < n; ++i) {
563
+ REQUIRE(sketch.get_rank(i) == view.get_rank(i));
564
+ }
565
+ }
566
+
554
567
  //TEST_CASE("for manual comparison with Java") {
555
568
  // req_sketch<float> sketch(12, false);
556
569
  // for (size_t i = 0; i < 100000; ++i) sketch.update(i);