datasketches 0.2.7 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (86) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +5 -0
  3. data/ext/datasketches/kll_wrapper.cpp +20 -20
  4. data/ext/datasketches/theta_wrapper.cpp +2 -2
  5. data/lib/datasketches/version.rb +1 -1
  6. data/vendor/datasketches-cpp/CMakeLists.txt +9 -1
  7. data/vendor/datasketches-cpp/MANIFEST.in +21 -2
  8. data/vendor/datasketches-cpp/common/CMakeLists.txt +5 -2
  9. data/vendor/datasketches-cpp/common/include/common_defs.hpp +10 -0
  10. data/vendor/datasketches-cpp/common/include/kolmogorov_smirnov_impl.hpp +6 -6
  11. data/vendor/datasketches-cpp/common/include/memory_operations.hpp +1 -0
  12. data/vendor/datasketches-cpp/common/include/{quantile_sketch_sorted_view.hpp → quantiles_sorted_view.hpp} +60 -25
  13. data/vendor/datasketches-cpp/common/include/quantiles_sorted_view_impl.hpp +125 -0
  14. data/vendor/datasketches-cpp/common/include/version.hpp.in +36 -0
  15. data/vendor/datasketches-cpp/common/test/CMakeLists.txt +25 -6
  16. data/vendor/datasketches-cpp/common/test/quantiles_sorted_view_test.cpp +459 -0
  17. data/vendor/datasketches-cpp/cpc/test/CMakeLists.txt +1 -1
  18. data/vendor/datasketches-cpp/fi/include/frequent_items_sketch.hpp +28 -44
  19. data/vendor/datasketches-cpp/fi/include/frequent_items_sketch_impl.hpp +70 -78
  20. data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map.hpp +11 -4
  21. data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map_impl.hpp +16 -9
  22. data/vendor/datasketches-cpp/fi/test/CMakeLists.txt +1 -1
  23. data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_custom_type_test.cpp +54 -41
  24. data/vendor/datasketches-cpp/fi/test/reverse_purge_hash_map_test.cpp +3 -3
  25. data/vendor/datasketches-cpp/hll/include/Hll4Array-internal.hpp +2 -2
  26. data/vendor/datasketches-cpp/hll/test/CMakeLists.txt +1 -1
  27. data/vendor/datasketches-cpp/kll/include/kll_helper.hpp +0 -32
  28. data/vendor/datasketches-cpp/kll/include/kll_sketch.hpp +176 -233
  29. data/vendor/datasketches-cpp/kll/include/kll_sketch_impl.hpp +337 -395
  30. data/vendor/datasketches-cpp/kll/test/CMakeLists.txt +1 -1
  31. data/vendor/datasketches-cpp/kll/test/kll_sketch_custom_type_test.cpp +26 -26
  32. data/vendor/datasketches-cpp/kll/test/kll_sketch_test.cpp +196 -232
  33. data/vendor/datasketches-cpp/kll/test/kll_sketch_validation.cpp +41 -31
  34. data/vendor/datasketches-cpp/pyproject.toml +17 -12
  35. data/vendor/datasketches-cpp/python/CMakeLists.txt +8 -1
  36. data/vendor/datasketches-cpp/python/datasketches/PySerDe.py +104 -0
  37. data/vendor/datasketches-cpp/python/datasketches/__init__.py +22 -0
  38. data/vendor/datasketches-cpp/python/include/py_serde.hpp +113 -0
  39. data/vendor/datasketches-cpp/python/jupyter/ThetaSketchNotebook.ipynb +31 -24
  40. data/vendor/datasketches-cpp/python/pybind11Path.cmd +18 -0
  41. data/vendor/datasketches-cpp/python/src/__init__.py +17 -1
  42. data/vendor/datasketches-cpp/python/src/datasketches.cpp +9 -3
  43. data/vendor/datasketches-cpp/python/src/kll_wrapper.cpp +18 -54
  44. data/vendor/datasketches-cpp/python/src/py_serde.cpp +111 -0
  45. data/vendor/datasketches-cpp/python/src/quantiles_wrapper.cpp +17 -53
  46. data/vendor/datasketches-cpp/python/src/req_wrapper.cpp +17 -55
  47. data/vendor/datasketches-cpp/python/src/vector_of_kll.cpp +62 -67
  48. data/vendor/datasketches-cpp/python/src/vo_wrapper.cpp +47 -14
  49. data/vendor/datasketches-cpp/python/tests/__init__.py +16 -0
  50. data/vendor/datasketches-cpp/python/tests/req_test.py +1 -1
  51. data/vendor/datasketches-cpp/python/tests/vo_test.py +25 -1
  52. data/vendor/datasketches-cpp/quantiles/include/quantiles_sketch.hpp +135 -180
  53. data/vendor/datasketches-cpp/quantiles/include/quantiles_sketch_impl.hpp +205 -210
  54. data/vendor/datasketches-cpp/quantiles/test/CMakeLists.txt +1 -1
  55. data/vendor/datasketches-cpp/quantiles/test/quantiles_compatibility_test.cpp +19 -18
  56. data/vendor/datasketches-cpp/quantiles/test/quantiles_sketch_test.cpp +240 -232
  57. data/vendor/datasketches-cpp/req/include/req_compactor.hpp +15 -9
  58. data/vendor/datasketches-cpp/req/include/req_compactor_impl.hpp +35 -19
  59. data/vendor/datasketches-cpp/req/include/req_sketch.hpp +126 -147
  60. data/vendor/datasketches-cpp/req/include/req_sketch_impl.hpp +265 -245
  61. data/vendor/datasketches-cpp/req/test/CMakeLists.txt +1 -1
  62. data/vendor/datasketches-cpp/req/test/req_sketch_custom_type_test.cpp +26 -26
  63. data/vendor/datasketches-cpp/req/test/req_sketch_test.cpp +116 -103
  64. data/vendor/datasketches-cpp/sampling/include/var_opt_sketch.hpp +22 -46
  65. data/vendor/datasketches-cpp/sampling/include/var_opt_sketch_impl.hpp +180 -207
  66. data/vendor/datasketches-cpp/sampling/include/var_opt_union.hpp +18 -39
  67. data/vendor/datasketches-cpp/sampling/include/var_opt_union_impl.hpp +75 -85
  68. data/vendor/datasketches-cpp/sampling/test/CMakeLists.txt +1 -1
  69. data/vendor/datasketches-cpp/sampling/test/var_opt_allocation_test.cpp +6 -6
  70. data/vendor/datasketches-cpp/sampling/test/var_opt_sketch_test.cpp +2 -2
  71. data/vendor/datasketches-cpp/sampling/test/var_opt_union_test.cpp +4 -4
  72. data/vendor/datasketches-cpp/setup.py +14 -2
  73. data/vendor/datasketches-cpp/theta/include/theta_sketch_impl.hpp +15 -25
  74. data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base.hpp +0 -9
  75. data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base_impl.hpp +5 -5
  76. data/vendor/datasketches-cpp/theta/test/CMakeLists.txt +1 -1
  77. data/vendor/datasketches-cpp/theta/test/theta_sketch_test.cpp +2 -1
  78. data/vendor/datasketches-cpp/tox.ini +26 -0
  79. data/vendor/datasketches-cpp/tuple/include/tuple_sketch.hpp +36 -12
  80. data/vendor/datasketches-cpp/tuple/include/tuple_sketch_impl.hpp +16 -4
  81. data/vendor/datasketches-cpp/tuple/test/CMakeLists.txt +2 -1
  82. data/vendor/datasketches-cpp/tuple/test/engagement_test.cpp +299 -0
  83. data/vendor/datasketches-cpp/tuple/test/tuple_sketch_test.cpp +26 -0
  84. data/vendor/datasketches-cpp/version.cfg.in +1 -0
  85. metadata +14 -5
  86. data/vendor/datasketches-cpp/common/include/quantile_sketch_sorted_view_impl.hpp +0 -91
@@ -17,7 +17,7 @@
17
17
 
18
18
  add_executable(req_test)
19
19
 
20
- target_link_libraries(req_test req common_test)
20
+ target_link_libraries(req_test req common_test_lib)
21
21
 
22
22
  set_target_properties(req_test PROPERTIES
23
23
  CXX_STANDARD 11
@@ -26,7 +26,7 @@
26
26
 
27
27
  namespace datasketches {
28
28
 
29
- using req_test_type_sketch = req_sketch<test_type, test_type_less, test_type_serde, test_allocator<test_type>>;
29
+ using req_test_type_sketch = req_sketch<test_type, test_type_less, test_allocator<test_type>>;
30
30
  using alloc = test_allocator<test_type>;
31
31
 
32
32
  TEST_CASE("req sketch custom type", "[req_sketch]") {
@@ -35,26 +35,26 @@ TEST_CASE("req sketch custom type", "[req_sketch]") {
35
35
  test_allocator_total_bytes = 0;
36
36
 
37
37
  SECTION("compact level zero") {
38
- req_test_type_sketch sketch(4, true, 0);
38
+ req_test_type_sketch sketch(4, true, test_type_less(), 0);
39
39
  REQUIRE_THROWS_AS(sketch.get_quantile(0), std::runtime_error);
40
- REQUIRE_THROWS_AS(sketch.get_min_value(), std::runtime_error);
41
- REQUIRE_THROWS_AS(sketch.get_max_value(), std::runtime_error);
42
- REQUIRE(sketch.get_serialized_size_bytes() == 8);
40
+ REQUIRE_THROWS_AS(sketch.get_min_item(), std::runtime_error);
41
+ REQUIRE_THROWS_AS(sketch.get_max_item(), std::runtime_error);
42
+ REQUIRE(sketch.get_serialized_size_bytes(test_type_serde()) == 8);
43
43
 
44
44
  for (int i = 0; i < 24; ++i) sketch.update(i);
45
45
  //std::cout << sketch.to_string(true);
46
46
 
47
47
  REQUIRE(sketch.is_estimation_mode());
48
48
  REQUIRE(sketch.get_n() > sketch.get_num_retained());
49
- REQUIRE(sketch.get_min_value().get_value() == 0);
50
- REQUIRE(sketch.get_max_value().get_value() == 23);
49
+ REQUIRE(sketch.get_min_item().get_value() == 0);
50
+ REQUIRE(sketch.get_max_item().get_value() == 23);
51
51
  }
52
52
 
53
53
  SECTION("merge small") {
54
- req_test_type_sketch sketch1(4, true, 0);
54
+ req_test_type_sketch sketch1(4, true, test_type_less(), 0);
55
55
  sketch1.update(1);
56
56
 
57
- req_test_type_sketch sketch2(4, true, 0);
57
+ req_test_type_sketch sketch2(4, true, test_type_less(), 0);
58
58
  sketch2.update(2);
59
59
 
60
60
  sketch2.merge(sketch1);
@@ -63,15 +63,15 @@ TEST_CASE("req sketch custom type", "[req_sketch]") {
63
63
 
64
64
  REQUIRE_FALSE(sketch2.is_estimation_mode());
65
65
  REQUIRE(sketch2.get_num_retained() == sketch2.get_n());
66
- REQUIRE(sketch2.get_min_value().get_value() == 1);
67
- REQUIRE(sketch2.get_max_value().get_value() == 2);
66
+ REQUIRE(sketch2.get_min_item().get_value() == 1);
67
+ REQUIRE(sketch2.get_max_item().get_value() == 2);
68
68
  }
69
69
 
70
70
  SECTION("merge higher levels") {
71
- req_test_type_sketch sketch1(4, true, 0);
71
+ req_test_type_sketch sketch1(4, true, test_type_less(), 0);
72
72
  for (int i = 0; i < 24; ++i) sketch1.update(i);
73
73
 
74
- req_test_type_sketch sketch2(4, true, 0);
74
+ req_test_type_sketch sketch2(4, true, test_type_less(), 0);
75
75
  for (int i = 0; i < 24; ++i) sketch2.update(i);
76
76
 
77
77
  sketch2.merge(sketch1);
@@ -80,28 +80,28 @@ TEST_CASE("req sketch custom type", "[req_sketch]") {
80
80
 
81
81
  REQUIRE(sketch2.is_estimation_mode());
82
82
  REQUIRE(sketch2.get_n() > sketch2.get_num_retained());
83
- REQUIRE(sketch2.get_min_value().get_value() == 0);
84
- REQUIRE(sketch2.get_max_value().get_value() == 23);
83
+ REQUIRE(sketch2.get_min_item().get_value() == 0);
84
+ REQUIRE(sketch2.get_max_item().get_value() == 23);
85
85
  }
86
86
 
87
87
  SECTION("serialize deserialize") {
88
- req_test_type_sketch sketch1(12, true, 0);
88
+ req_test_type_sketch sketch1(12, true, test_type_less(), 0);
89
89
 
90
90
  const int n = 1000;
91
91
  for (int i = 0; i < n; i++) sketch1.update(i);
92
92
 
93
93
  std::stringstream s(std::ios::in | std::ios::out | std::ios::binary);
94
- sketch1.serialize(s);
95
- REQUIRE((size_t) s.tellp() == sketch1.get_serialized_size_bytes());
96
- auto sketch2 = req_test_type_sketch::deserialize(s, alloc(0));
97
- REQUIRE((size_t) s.tellg() == sketch2.get_serialized_size_bytes());
94
+ sketch1.serialize(s, test_type_serde());
95
+ REQUIRE((size_t) s.tellp() == sketch1.get_serialized_size_bytes(test_type_serde()));
96
+ auto sketch2 = req_test_type_sketch::deserialize(s, test_type_serde(), test_type_less(), 0);
97
+ REQUIRE((size_t) s.tellg() == sketch2.get_serialized_size_bytes(test_type_serde()));
98
98
  REQUIRE(s.tellg() == s.tellp());
99
99
  REQUIRE(sketch2.is_empty() == sketch1.is_empty());
100
100
  REQUIRE(sketch2.is_estimation_mode() == sketch1.is_estimation_mode());
101
101
  REQUIRE(sketch2.get_n() == sketch1.get_n());
102
102
  REQUIRE(sketch2.get_num_retained() == sketch1.get_num_retained());
103
- REQUIRE(sketch2.get_min_value().get_value() == sketch1.get_min_value().get_value());
104
- REQUIRE(sketch2.get_max_value().get_value() == sketch1.get_max_value().get_value());
103
+ REQUIRE(sketch2.get_min_item().get_value() == sketch1.get_min_item().get_value());
104
+ REQUIRE(sketch2.get_max_item().get_value() == sketch1.get_max_item().get_value());
105
105
  REQUIRE(sketch2.get_quantile(0.5).get_value() == sketch1.get_quantile(0.5).get_value());
106
106
  REQUIRE(sketch2.get_rank(0) == sketch1.get_rank(0));
107
107
  REQUIRE(sketch2.get_rank(n) == sketch1.get_rank(n));
@@ -109,13 +109,13 @@ TEST_CASE("req sketch custom type", "[req_sketch]") {
109
109
  }
110
110
 
111
111
  SECTION("moving merge") {
112
- req_test_type_sketch sketch1(4, true, 0);
112
+ req_test_type_sketch sketch1(4, true, test_type_less(), 0);
113
113
  for (int i = 0; i < 10; i++) sketch1.update(i);
114
- req_test_type_sketch sketch2(4, true, 0);
114
+ req_test_type_sketch sketch2(4, true, test_type_less(), 0);
115
115
  sketch2.update(10);
116
116
  sketch2.merge(std::move(sketch1));
117
- REQUIRE(sketch2.get_min_value().get_value() == 0);
118
- REQUIRE(sketch2.get_max_value().get_value() == 10);
117
+ REQUIRE(sketch2.get_min_item().get_value() == 0);
118
+ REQUIRE(sketch2.get_max_item().get_value() == 10);
119
119
  REQUIRE(sketch2.get_n() == 11);
120
120
  }
121
121
 
@@ -43,19 +43,16 @@ TEST_CASE("req sketch: empty", "[req_sketch]") {
43
43
  REQUIRE_FALSE(sketch.is_estimation_mode());
44
44
  REQUIRE(sketch.get_n() == 0);
45
45
  REQUIRE(sketch.get_num_retained() == 0);
46
- REQUIRE(std::isnan(sketch.get_rank(0)));
47
- REQUIRE(std::isnan(sketch.get_rank(std::numeric_limits<float>::infinity())));
48
- REQUIRE(std::isnan(sketch.get_min_value()));
49
- REQUIRE(std::isnan(sketch.get_max_value()));
50
- REQUIRE(std::isnan(sketch.get_quantile(0)));
51
- REQUIRE(std::isnan(sketch.get_quantile(0.5)));
52
- REQUIRE(std::isnan(sketch.get_quantile(1)));
46
+ REQUIRE_THROWS_AS(sketch.get_min_item(), std::runtime_error);
47
+ REQUIRE_THROWS_AS(sketch.get_max_item(), std::runtime_error);
48
+ REQUIRE_THROWS_AS(sketch.get_rank(0), std::runtime_error);
49
+ REQUIRE_THROWS_AS(sketch.get_quantile(0), std::runtime_error);
53
50
  const double ranks[3] {0, 0.5, 1};
54
- REQUIRE(sketch.get_quantiles(ranks, 3).size() == 0);
51
+ REQUIRE_THROWS_AS(sketch.get_quantiles(ranks, 3), std::runtime_error);
55
52
 
56
53
  const float split_points[1] {0};
57
- REQUIRE(sketch.get_CDF(split_points, 1).empty());
58
- REQUIRE(sketch.get_PMF(split_points, 1).empty());
54
+ REQUIRE_THROWS_AS(sketch.get_CDF(split_points, 1), std::runtime_error);
55
+ REQUIRE_THROWS_AS(sketch.get_PMF(split_points, 1), std::runtime_error);
59
56
  }
60
57
 
61
58
  TEST_CASE("req sketch: single value, lra", "[req_sketch]") {
@@ -66,13 +63,13 @@ TEST_CASE("req sketch: single value, lra", "[req_sketch]") {
66
63
  REQUIRE_FALSE(sketch.is_estimation_mode());
67
64
  REQUIRE(sketch.get_n() == 1);
68
65
  REQUIRE(sketch.get_num_retained() == 1);
69
- REQUIRE(sketch.get_rank(1.0f) == 0);
70
- REQUIRE(sketch.get_rank<true>(1.0f) == 1);
71
- REQUIRE(sketch.get_rank(1.1f) == 1);
66
+ REQUIRE(sketch.get_rank(1.0f, false) == 0);
67
+ REQUIRE(sketch.get_rank(1.0f) == 1);
68
+ REQUIRE(sketch.get_rank(1.1f, false) == 1);
72
69
  REQUIRE(sketch.get_rank(std::numeric_limits<float>::infinity()) == 1);
73
- REQUIRE(sketch.get_quantile(0) == 1);
74
- REQUIRE(sketch.get_quantile(0.5) == 1);
75
- REQUIRE(sketch.get_quantile(1) == 1);
70
+ REQUIRE(sketch.get_quantile(0, false) == 1);
71
+ REQUIRE(sketch.get_quantile(0.5, false) == 1);
72
+ REQUIRE(sketch.get_quantile(1, false) == 1);
76
73
 
77
74
  const double ranks[3] {0, 0.5, 1};
78
75
  auto quantiles = sketch.get_quantiles(ranks, 3);
@@ -82,11 +79,16 @@ TEST_CASE("req sketch: single value, lra", "[req_sketch]") {
82
79
  REQUIRE(quantiles[2] == 1);
83
80
 
84
81
  unsigned count = 0;
85
- for (auto it: sketch) {
86
- REQUIRE(it.second == 1);
82
+ for (auto pair: sketch) {
83
+ REQUIRE(pair.second == 1);
87
84
  ++count;
88
85
  }
89
86
  REQUIRE(count == 1);
87
+
88
+ // iterator dereferencing
89
+ auto it = sketch.begin();
90
+ REQUIRE(it->first == 1.0f);
91
+ REQUIRE((*it).first == 1.0f);
90
92
  }
91
93
 
92
94
  TEST_CASE("req sketch: repeated values", "[req_sketch]") {
@@ -101,10 +103,10 @@ TEST_CASE("req sketch: repeated values", "[req_sketch]") {
101
103
  REQUIRE_FALSE(sketch.is_estimation_mode());
102
104
  REQUIRE(sketch.get_n() == 6);
103
105
  REQUIRE(sketch.get_num_retained() == 6);
104
- REQUIRE(sketch.get_rank(1.0f) == 0);
105
- REQUIRE(sketch.get_rank<true>(1.0f) == 0.5);
106
- REQUIRE(sketch.get_rank(2.0f) == 0.5);
107
- REQUIRE(sketch.get_rank<true>(2.0f) == 1);
106
+ REQUIRE(sketch.get_rank(1.0f, false) == 0);
107
+ REQUIRE(sketch.get_rank(1.0f) == 0.5);
108
+ REQUIRE(sketch.get_rank(2.0f, false) == 0.5);
109
+ REQUIRE(sketch.get_rank(2.0f) == 1);
108
110
  }
109
111
 
110
112
  TEST_CASE("req sketch: exact mode", "[req_sketch]") {
@@ -115,48 +117,48 @@ TEST_CASE("req sketch: exact mode", "[req_sketch]") {
115
117
  REQUIRE(sketch.get_n() == 10);
116
118
  REQUIRE(sketch.get_num_retained() == 10);
117
119
 
118
- // like KLL
119
- REQUIRE(sketch.get_rank(1.0f) == 0);
120
- REQUIRE(sketch.get_rank(2.0f) == 0.1);
121
- REQUIRE(sketch.get_rank(6.0f) == 0.5);
122
- REQUIRE(sketch.get_rank(9.0f) == 0.8);
123
- REQUIRE(sketch.get_rank(10.0f) == 0.9);
120
+ // exclusive
121
+ REQUIRE(sketch.get_rank(1.0f, false) == 0);
122
+ REQUIRE(sketch.get_rank(2.0f, false) == 0.1);
123
+ REQUIRE(sketch.get_rank(6.0f, false) == 0.5);
124
+ REQUIRE(sketch.get_rank(9.0f, false) == 0.8);
125
+ REQUIRE(sketch.get_rank(10.0f, false) == 0.9);
124
126
 
125
127
  // inclusive
126
- REQUIRE(sketch.get_rank<true>(1.0f) == 0.1);
127
- REQUIRE(sketch.get_rank<true>(2.0f) == 0.2);
128
- REQUIRE(sketch.get_rank<true>(5.0f) == 0.5);
129
- REQUIRE(sketch.get_rank<true>(9.0f) == 0.9);
130
- REQUIRE(sketch.get_rank<true>(10.0f) == 1);
128
+ REQUIRE(sketch.get_rank(1.0f) == 0.1);
129
+ REQUIRE(sketch.get_rank(2.0f) == 0.2);
130
+ REQUIRE(sketch.get_rank(5.0f) == 0.5);
131
+ REQUIRE(sketch.get_rank(9.0f) == 0.9);
132
+ REQUIRE(sketch.get_rank(10.0f) == 1);
133
+
134
+ // exclusive
135
+ REQUIRE(sketch.get_quantile(0, false) == 1);
136
+ REQUIRE(sketch.get_quantile(0.1, false) == 2);
137
+ REQUIRE(sketch.get_quantile(0.5, false) == 6);
138
+ REQUIRE(sketch.get_quantile(0.9, false) == 10);
139
+ REQUIRE(sketch.get_quantile(1, false) == 10);
131
140
 
132
- // like KLL
141
+ // inclusive
133
142
  REQUIRE(sketch.get_quantile(0) == 1);
134
- REQUIRE(sketch.get_quantile(0.1) == 2);
135
- REQUIRE(sketch.get_quantile(0.5) == 6);
136
- REQUIRE(sketch.get_quantile(0.9) == 10);
143
+ REQUIRE(sketch.get_quantile(0.1) == 1);
144
+ REQUIRE(sketch.get_quantile(0.5) == 5);
145
+ REQUIRE(sketch.get_quantile(0.9) == 9);
137
146
  REQUIRE(sketch.get_quantile(1) == 10);
138
147
 
139
- // inclusive
140
- REQUIRE(sketch.get_quantile<true>(0) == 1);
141
- REQUIRE(sketch.get_quantile<true>(0.1) == 1);
142
- REQUIRE(sketch.get_quantile<true>(0.5) == 5);
143
- REQUIRE(sketch.get_quantile<true>(0.9) == 9);
144
- REQUIRE(sketch.get_quantile<true>(1) == 10);
145
-
146
148
  const double ranks[3] {0, 0.5, 1};
147
149
  auto quantiles = sketch.get_quantiles(ranks, 3);
148
150
  REQUIRE(quantiles.size() == 3);
149
151
  REQUIRE(quantiles[0] == 1);
150
- REQUIRE(quantiles[1] == 6);
152
+ REQUIRE(quantiles[1] == 5);
151
153
  REQUIRE(quantiles[2] == 10);
152
154
 
153
155
  const float splits[3] {2, 6, 9};
154
- auto cdf = sketch.get_CDF(splits, 3);
156
+ auto cdf = sketch.get_CDF(splits, 3, false);
155
157
  REQUIRE(cdf[0] == 0.1);
156
158
  REQUIRE(cdf[1] == 0.5);
157
159
  REQUIRE(cdf[2] == 0.8);
158
160
  REQUIRE(cdf[3] == 1);
159
- auto pmf = sketch.get_PMF(splits, 3);
161
+ auto pmf = sketch.get_PMF(splits, 3, false);
160
162
  REQUIRE(pmf[0] == Approx(0.1).margin(1e-8));
161
163
  REQUIRE(pmf[1] == Approx(0.4).margin(1e-8));
162
164
  REQUIRE(pmf[2] == Approx(0.3).margin(1e-8));
@@ -175,18 +177,18 @@ TEST_CASE("req sketch: estimation mode", "[req_sketch]") {
175
177
  REQUIRE(sketch.get_n() == n);
176
178
  // std::cout << sketch.to_string(true);
177
179
  REQUIRE(sketch.get_num_retained() < n);
178
- REQUIRE(sketch.get_rank(0) == 0);
179
- REQUIRE(sketch.get_rank(static_cast<float>(n)) == 1);
180
- REQUIRE(sketch.get_rank(n / 2.0f) == Approx(0.5).margin(0.01));
181
- REQUIRE(sketch.get_rank(n - 1.0f) == Approx(1).margin(0.01));
182
- REQUIRE(sketch.get_min_value() == 0);
183
- REQUIRE(sketch.get_max_value() == n - 1);
180
+ REQUIRE(sketch.get_rank(0, false) == 0);
181
+ REQUIRE(sketch.get_rank(static_cast<float>(n), false) == 1);
182
+ REQUIRE(sketch.get_rank(n / 2.0f, false) == Approx(0.5).margin(0.01));
183
+ REQUIRE(sketch.get_rank(n - 1.0f, false) == Approx(1).margin(0.01));
184
+ REQUIRE(sketch.get_min_item() == 0);
185
+ REQUIRE(sketch.get_max_item() == n - 1);
184
186
  REQUIRE(sketch.get_rank_lower_bound(0.5, 1) < 0.5);
185
187
  REQUIRE(sketch.get_rank_upper_bound(0.5, 1) > 0.5);
186
188
 
187
189
  unsigned count = 0;
188
- for (auto it: sketch) {
189
- REQUIRE(it.second >= 1);
190
+ for (auto pair: sketch) {
191
+ REQUIRE(pair.second >= 1);
190
192
  ++count;
191
193
  }
192
194
  REQUIRE(count == sketch.get_num_retained());
@@ -203,8 +205,8 @@ TEST_CASE("req sketch: stream serialize-deserialize empty", "[req_sketch]") {
203
205
  REQUIRE(sketch2.is_estimation_mode() == sketch.is_estimation_mode());
204
206
  REQUIRE(sketch2.get_num_retained() == sketch.get_num_retained());
205
207
  REQUIRE(sketch2.get_n() == sketch.get_n());
206
- REQUIRE(std::isnan(sketch2.get_min_value()));
207
- REQUIRE(std::isnan(sketch2.get_max_value()));
208
+ REQUIRE_THROWS_AS(sketch2.get_min_item(), std::runtime_error);
209
+ REQUIRE_THROWS_AS(sketch2.get_max_item(), std::runtime_error);
208
210
  }
209
211
 
210
212
  TEST_CASE("req sketch: byte serialize-deserialize empty", "[req_sketch]") {
@@ -218,8 +220,8 @@ TEST_CASE("req sketch: byte serialize-deserialize empty", "[req_sketch]") {
218
220
  REQUIRE(sketch2.is_estimation_mode() == sketch.is_estimation_mode());
219
221
  REQUIRE(sketch2.get_num_retained() == sketch.get_num_retained());
220
222
  REQUIRE(sketch2.get_n() == sketch.get_n());
221
- REQUIRE(std::isnan(sketch2.get_min_value()));
222
- REQUIRE(std::isnan(sketch2.get_max_value()));
223
+ REQUIRE_THROWS_AS(sketch2.get_min_item(), std::runtime_error);
224
+ REQUIRE_THROWS_AS(sketch2.get_max_item(), std::runtime_error);
223
225
  }
224
226
 
225
227
  TEST_CASE("req sketch: stream serialize-deserialize single item", "[req_sketch]") {
@@ -234,8 +236,8 @@ TEST_CASE("req sketch: stream serialize-deserialize single item", "[req_sketch]"
234
236
  REQUIRE(sketch2.is_estimation_mode() == sketch.is_estimation_mode());
235
237
  REQUIRE(sketch2.get_num_retained() == sketch.get_num_retained());
236
238
  REQUIRE(sketch2.get_n() == sketch.get_n());
237
- REQUIRE(sketch2.get_min_value() == sketch.get_min_value());
238
- REQUIRE(sketch2.get_max_value() == sketch.get_max_value());
239
+ REQUIRE(sketch2.get_min_item() == sketch.get_min_item());
240
+ REQUIRE(sketch2.get_max_item() == sketch.get_max_item());
239
241
  }
240
242
 
241
243
  TEST_CASE("req sketch: byte serialize-deserialize single item", "[req_sketch]") {
@@ -251,8 +253,8 @@ TEST_CASE("req sketch: byte serialize-deserialize single item", "[req_sketch]")
251
253
  REQUIRE(sketch2.is_estimation_mode() == sketch.is_estimation_mode());
252
254
  REQUIRE(sketch2.get_num_retained() == sketch.get_num_retained());
253
255
  REQUIRE(sketch2.get_n() == sketch.get_n());
254
- REQUIRE(sketch2.get_min_value() == sketch.get_min_value());
255
- REQUIRE(sketch2.get_max_value() == sketch.get_max_value());
256
+ REQUIRE(sketch2.get_min_item() == sketch.get_min_item());
257
+ REQUIRE(sketch2.get_max_item() == sketch.get_max_item());
256
258
  }
257
259
 
258
260
  TEST_CASE("req sketch: stream serialize-deserialize exact mode", "[req_sketch]") {
@@ -269,8 +271,8 @@ TEST_CASE("req sketch: stream serialize-deserialize exact mode", "[req_sketch]")
269
271
  REQUIRE(sketch2.is_estimation_mode() == sketch.is_estimation_mode());
270
272
  REQUIRE(sketch2.get_num_retained() == sketch.get_num_retained());
271
273
  REQUIRE(sketch2.get_n() == sketch.get_n());
272
- REQUIRE(sketch2.get_min_value() == sketch.get_min_value());
273
- REQUIRE(sketch2.get_max_value() == sketch.get_max_value());
274
+ REQUIRE(sketch2.get_min_item() == sketch.get_min_item());
275
+ REQUIRE(sketch2.get_max_item() == sketch.get_max_item());
274
276
  }
275
277
 
276
278
  TEST_CASE("req sketch: byte serialize-deserialize exact mode", "[req_sketch]") {
@@ -288,8 +290,8 @@ TEST_CASE("req sketch: byte serialize-deserialize exact mode", "[req_sketch]") {
288
290
  REQUIRE(sketch2.is_estimation_mode() == sketch.is_estimation_mode());
289
291
  REQUIRE(sketch2.get_num_retained() == sketch.get_num_retained());
290
292
  REQUIRE(sketch2.get_n() == sketch.get_n());
291
- REQUIRE(sketch2.get_min_value() == sketch.get_min_value());
292
- REQUIRE(sketch2.get_max_value() == sketch.get_max_value());
293
+ REQUIRE(sketch2.get_min_item() == sketch.get_min_item());
294
+ REQUIRE(sketch2.get_max_item() == sketch.get_max_item());
293
295
  }
294
296
 
295
297
  TEST_CASE("req sketch: stream serialize-deserialize estimation mode", "[req_sketch]") {
@@ -306,8 +308,8 @@ TEST_CASE("req sketch: stream serialize-deserialize estimation mode", "[req_sket
306
308
  REQUIRE(sketch2.is_estimation_mode() == sketch.is_estimation_mode());
307
309
  REQUIRE(sketch2.get_num_retained() == sketch.get_num_retained());
308
310
  REQUIRE(sketch2.get_n() == sketch.get_n());
309
- REQUIRE(sketch2.get_min_value() == sketch.get_min_value());
310
- REQUIRE(sketch2.get_max_value() == sketch.get_max_value());
311
+ REQUIRE(sketch2.get_min_item() == sketch.get_min_item());
312
+ REQUIRE(sketch2.get_max_item() == sketch.get_max_item());
311
313
  }
312
314
 
313
315
  TEST_CASE("req sketch: byte serialize-deserialize estimation mode", "[req_sketch]") {
@@ -324,8 +326,8 @@ TEST_CASE("req sketch: byte serialize-deserialize estimation mode", "[req_sketch
324
326
  REQUIRE(sketch2.is_estimation_mode() == sketch.is_estimation_mode());
325
327
  REQUIRE(sketch2.get_num_retained() == sketch.get_num_retained());
326
328
  REQUIRE(sketch2.get_n() == sketch.get_n());
327
- REQUIRE(sketch2.get_min_value() == sketch.get_min_value());
328
- REQUIRE(sketch2.get_max_value() == sketch.get_max_value());
329
+ REQUIRE(sketch2.get_min_item() == sketch.get_min_item());
330
+ REQUIRE(sketch2.get_max_item() == sketch.get_max_item());
329
331
  }
330
332
 
331
333
  TEST_CASE("req sketch: serialize deserialize stream and bytes equivalence", "[req_sketch]") {
@@ -350,8 +352,8 @@ TEST_CASE("req sketch: serialize deserialize stream and bytes equivalence", "[re
350
352
  REQUIRE(sketch2.is_estimation_mode() == sketch.is_estimation_mode());
351
353
  REQUIRE(sketch2.get_num_retained() == sketch.get_num_retained());
352
354
  REQUIRE(sketch2.get_n() == sketch.get_n());
353
- REQUIRE(sketch2.get_min_value() == sketch.get_min_value());
354
- REQUIRE(sketch2.get_max_value() == sketch.get_max_value());
355
+ REQUIRE(sketch2.get_min_item() == sketch.get_min_item());
356
+ REQUIRE(sketch2.get_max_item() == sketch.get_max_item());
355
357
  }
356
358
 
357
359
  TEST_CASE("req sketch: stream deserialize from Java - empty", "[req_sketch]") {
@@ -363,8 +365,8 @@ TEST_CASE("req sketch: stream deserialize from Java - empty", "[req_sketch]") {
363
365
  REQUIRE_FALSE(sketch.is_estimation_mode());
364
366
  REQUIRE(sketch.get_n() == 0);
365
367
  REQUIRE(sketch.get_num_retained() == 0);
366
- REQUIRE(std::isnan(sketch.get_min_value()));
367
- REQUIRE(std::isnan(sketch.get_max_value()));
368
+ REQUIRE_THROWS_AS(sketch.get_min_item(), std::runtime_error);
369
+ REQUIRE_THROWS_AS(sketch.get_max_item(), std::runtime_error);
368
370
  }
369
371
 
370
372
  TEST_CASE("req sketch: stream deserialize from Java - single item", "[req_sketch]") {
@@ -376,10 +378,10 @@ TEST_CASE("req sketch: stream deserialize from Java - single item", "[req_sketch
376
378
  REQUIRE_FALSE(sketch.is_estimation_mode());
377
379
  REQUIRE(sketch.get_n() == 1);
378
380
  REQUIRE(sketch.get_num_retained() == 1);
379
- REQUIRE(sketch.get_min_value() == 1);
380
- REQUIRE(sketch.get_max_value() == 1);
381
- REQUIRE(sketch.get_rank(1.0f) == 0);
382
- REQUIRE(sketch.get_rank<true>(1.0f) == 1);
381
+ REQUIRE(sketch.get_min_item() == 1);
382
+ REQUIRE(sketch.get_max_item() == 1);
383
+ REQUIRE(sketch.get_rank(1.0f, false) == 0);
384
+ REQUIRE(sketch.get_rank(1.0f) == 1);
383
385
  }
384
386
 
385
387
  TEST_CASE("req sketch: stream deserialize from Java - raw items", "[req_sketch]") {
@@ -391,9 +393,9 @@ TEST_CASE("req sketch: stream deserialize from Java - raw items", "[req_sketch]"
391
393
  REQUIRE_FALSE(sketch.is_estimation_mode());
392
394
  REQUIRE(sketch.get_n() == 4);
393
395
  REQUIRE(sketch.get_num_retained() == 4);
394
- REQUIRE(sketch.get_min_value() == 0);
395
- REQUIRE(sketch.get_max_value() == 3);
396
- REQUIRE(sketch.get_rank(2.0f) == 0.5);
396
+ REQUIRE(sketch.get_min_item() == 0);
397
+ REQUIRE(sketch.get_max_item() == 3);
398
+ REQUIRE(sketch.get_rank(2.0f, false) == 0.5);
397
399
  }
398
400
 
399
401
  TEST_CASE("req sketch: stream deserialize from Java - exact mode", "[req_sketch]") {
@@ -405,9 +407,9 @@ TEST_CASE("req sketch: stream deserialize from Java - exact mode", "[req_sketch]
405
407
  REQUIRE_FALSE(sketch.is_estimation_mode());
406
408
  REQUIRE(sketch.get_n() == 100);
407
409
  REQUIRE(sketch.get_num_retained() == 100);
408
- REQUIRE(sketch.get_min_value() == 0);
409
- REQUIRE(sketch.get_max_value() == 99);
410
- REQUIRE(sketch.get_rank(50.0f) == 0.5);
410
+ REQUIRE(sketch.get_min_item() == 0);
411
+ REQUIRE(sketch.get_max_item() == 99);
412
+ REQUIRE(sketch.get_rank(50.0f, false) == 0.5);
411
413
  }
412
414
 
413
415
  TEST_CASE("req sketch: stream deserialize from Java - estimation mode", "[req_sketch]") {
@@ -419,9 +421,9 @@ TEST_CASE("req sketch: stream deserialize from Java - estimation mode", "[req_sk
419
421
  REQUIRE(sketch.is_estimation_mode());
420
422
  REQUIRE(sketch.get_n() == 10000);
421
423
  REQUIRE(sketch.get_num_retained() == 2942);
422
- REQUIRE(sketch.get_min_value() == 0);
423
- REQUIRE(sketch.get_max_value() == 9999);
424
- REQUIRE(sketch.get_rank(5000.0f) == 0.5);
424
+ REQUIRE(sketch.get_min_item() == 0);
425
+ REQUIRE(sketch.get_max_item() == 9999);
426
+ REQUIRE(sketch.get_rank(5000.0f, false) == 0.5);
425
427
  }
426
428
 
427
429
  TEST_CASE("req sketch: merge into empty", "[req_sketch]") {
@@ -431,11 +433,11 @@ TEST_CASE("req sketch: merge into empty", "[req_sketch]") {
431
433
  for (size_t i = 0; i < 1000; ++i) sketch2.update(static_cast<float>(i));
432
434
 
433
435
  sketch1.merge(sketch2);
434
- REQUIRE(sketch1.get_min_value() == 0);
435
- REQUIRE(sketch1.get_max_value() == 999);
436
- REQUIRE(sketch1.get_quantile(0.25) == Approx(250).margin(3));
437
- REQUIRE(sketch1.get_quantile(0.5) == Approx(500).margin(3));
438
- REQUIRE(sketch1.get_quantile(0.75) == Approx(750).margin(3));
436
+ REQUIRE(sketch1.get_min_item() == 0);
437
+ REQUIRE(sketch1.get_max_item() == 999);
438
+ REQUIRE(sketch1.get_quantile(0.25) == Approx(250).epsilon(0.01));
439
+ REQUIRE(sketch1.get_quantile(0.5) == Approx(500).epsilon(0.01));
440
+ REQUIRE(sketch1.get_quantile(0.75) == Approx(750).epsilon(0.01));
439
441
  REQUIRE(sketch1.get_rank(500.0f) == Approx(0.5).margin(0.01));
440
442
  }
441
443
 
@@ -447,11 +449,11 @@ TEST_CASE("req sketch: merge", "[req_sketch]") {
447
449
  for (size_t i = 1000; i < 2000; ++i) sketch2.update(static_cast<float>(i));
448
450
 
449
451
  sketch1.merge(sketch2);
450
- REQUIRE(sketch1.get_min_value() == 0);
451
- REQUIRE(sketch1.get_max_value() == 1999);
452
- REQUIRE(sketch1.get_quantile(0.25) == Approx(500).margin(3));
453
- REQUIRE(sketch1.get_quantile(0.5) == Approx(1000).margin(1));
454
- REQUIRE(sketch1.get_quantile(0.75) == Approx(1500).margin(1));
452
+ REQUIRE(sketch1.get_min_item() == 0);
453
+ REQUIRE(sketch1.get_max_item() == 1999);
454
+ REQUIRE(sketch1.get_quantile(0.25) == Approx(500).epsilon(0.01));
455
+ REQUIRE(sketch1.get_quantile(0.5) == Approx(1000).epsilon(0.01));
456
+ REQUIRE(sketch1.get_quantile(0.75) == Approx(1500).epsilon(0.01));
455
457
  REQUIRE(sketch1.get_rank(1000.0f) == Approx(0.5).margin(0.01));
456
458
  }
457
459
 
@@ -469,9 +471,9 @@ TEST_CASE("req sketch: merge multiple", "[req_sketch]") {
469
471
  sketch.merge(sketch1);
470
472
  sketch.merge(sketch2);
471
473
  sketch.merge(sketch3);
472
- REQUIRE(sketch.get_min_value() == 0);
473
- REQUIRE(sketch.get_max_value() == 119);
474
- REQUIRE(sketch.get_quantile(0.5) == Approx(60).margin(3));
474
+ REQUIRE(sketch.get_min_item() == 0);
475
+ REQUIRE(sketch.get_max_item() == 119);
476
+ REQUIRE(sketch.get_quantile(0.5) == Approx(60).epsilon(0.02));
475
477
  REQUIRE(sketch.get_rank(60.0f) == Approx(0.5).margin(0.01));
476
478
  }
477
479
 
@@ -503,8 +505,8 @@ TEST_CASE("req sketch: type conversion - several levels", "[req_sketch]") {
503
505
  REQUIRE(req_float.get_n() == req_double.get_n());
504
506
  REQUIRE(req_float.get_num_retained() == req_double.get_num_retained());
505
507
 
506
- auto sv_float = req_float.get_sorted_view(false);
507
- auto sv_double = req_double.get_sorted_view(false);
508
+ auto sv_float = req_float.get_sorted_view();
509
+ auto sv_double = req_double.get_sorted_view();
508
510
  auto sv_float_it = sv_float.begin();
509
511
  auto sv_double_it = sv_double.begin();
510
512
  while (sv_float_it != sv_float.end()) {
@@ -551,6 +553,17 @@ TEST_CASE("req sketch: type conversion - custom types") {
551
553
  REQUIRE(sb.get_n() == 3);
552
554
  }
553
555
 
556
+ TEST_CASE("get_rank equivalence") {
557
+ req_sketch<int> sketch(12);
558
+ const size_t n = 1000;
559
+ for (size_t i = 0; i < n; ++i) sketch.update(i);
560
+ REQUIRE(sketch.get_n() == n);
561
+ auto view = sketch.get_sorted_view();
562
+ for (size_t i = 0; i < n; ++i) {
563
+ REQUIRE(sketch.get_rank(i) == view.get_rank(i));
564
+ }
565
+ }
566
+
554
567
  //TEST_CASE("for manual comparison with Java") {
555
568
  // req_sketch<float> sketch(12, false);
556
569
  // for (size_t i = 0; i < 100000; ++i) sketch.update(i);