datasketches 0.2.7 → 0.3.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (86) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +5 -0
  3. data/ext/datasketches/kll_wrapper.cpp +20 -20
  4. data/ext/datasketches/theta_wrapper.cpp +2 -2
  5. data/lib/datasketches/version.rb +1 -1
  6. data/vendor/datasketches-cpp/CMakeLists.txt +9 -1
  7. data/vendor/datasketches-cpp/MANIFEST.in +21 -2
  8. data/vendor/datasketches-cpp/common/CMakeLists.txt +5 -2
  9. data/vendor/datasketches-cpp/common/include/common_defs.hpp +10 -0
  10. data/vendor/datasketches-cpp/common/include/kolmogorov_smirnov_impl.hpp +6 -6
  11. data/vendor/datasketches-cpp/common/include/memory_operations.hpp +1 -0
  12. data/vendor/datasketches-cpp/common/include/{quantile_sketch_sorted_view.hpp → quantiles_sorted_view.hpp} +60 -25
  13. data/vendor/datasketches-cpp/common/include/quantiles_sorted_view_impl.hpp +125 -0
  14. data/vendor/datasketches-cpp/common/include/version.hpp.in +36 -0
  15. data/vendor/datasketches-cpp/common/test/CMakeLists.txt +25 -6
  16. data/vendor/datasketches-cpp/common/test/quantiles_sorted_view_test.cpp +459 -0
  17. data/vendor/datasketches-cpp/cpc/test/CMakeLists.txt +1 -1
  18. data/vendor/datasketches-cpp/fi/include/frequent_items_sketch.hpp +28 -44
  19. data/vendor/datasketches-cpp/fi/include/frequent_items_sketch_impl.hpp +70 -78
  20. data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map.hpp +11 -4
  21. data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map_impl.hpp +16 -9
  22. data/vendor/datasketches-cpp/fi/test/CMakeLists.txt +1 -1
  23. data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_custom_type_test.cpp +54 -41
  24. data/vendor/datasketches-cpp/fi/test/reverse_purge_hash_map_test.cpp +3 -3
  25. data/vendor/datasketches-cpp/hll/include/Hll4Array-internal.hpp +2 -2
  26. data/vendor/datasketches-cpp/hll/test/CMakeLists.txt +1 -1
  27. data/vendor/datasketches-cpp/kll/include/kll_helper.hpp +0 -32
  28. data/vendor/datasketches-cpp/kll/include/kll_sketch.hpp +176 -233
  29. data/vendor/datasketches-cpp/kll/include/kll_sketch_impl.hpp +337 -395
  30. data/vendor/datasketches-cpp/kll/test/CMakeLists.txt +1 -1
  31. data/vendor/datasketches-cpp/kll/test/kll_sketch_custom_type_test.cpp +26 -26
  32. data/vendor/datasketches-cpp/kll/test/kll_sketch_test.cpp +196 -232
  33. data/vendor/datasketches-cpp/kll/test/kll_sketch_validation.cpp +41 -31
  34. data/vendor/datasketches-cpp/pyproject.toml +17 -12
  35. data/vendor/datasketches-cpp/python/CMakeLists.txt +8 -1
  36. data/vendor/datasketches-cpp/python/datasketches/PySerDe.py +104 -0
  37. data/vendor/datasketches-cpp/python/datasketches/__init__.py +22 -0
  38. data/vendor/datasketches-cpp/python/include/py_serde.hpp +113 -0
  39. data/vendor/datasketches-cpp/python/jupyter/ThetaSketchNotebook.ipynb +31 -24
  40. data/vendor/datasketches-cpp/python/pybind11Path.cmd +18 -0
  41. data/vendor/datasketches-cpp/python/src/__init__.py +17 -1
  42. data/vendor/datasketches-cpp/python/src/datasketches.cpp +9 -3
  43. data/vendor/datasketches-cpp/python/src/kll_wrapper.cpp +18 -54
  44. data/vendor/datasketches-cpp/python/src/py_serde.cpp +111 -0
  45. data/vendor/datasketches-cpp/python/src/quantiles_wrapper.cpp +17 -53
  46. data/vendor/datasketches-cpp/python/src/req_wrapper.cpp +17 -55
  47. data/vendor/datasketches-cpp/python/src/vector_of_kll.cpp +62 -67
  48. data/vendor/datasketches-cpp/python/src/vo_wrapper.cpp +47 -14
  49. data/vendor/datasketches-cpp/python/tests/__init__.py +16 -0
  50. data/vendor/datasketches-cpp/python/tests/req_test.py +1 -1
  51. data/vendor/datasketches-cpp/python/tests/vo_test.py +25 -1
  52. data/vendor/datasketches-cpp/quantiles/include/quantiles_sketch.hpp +135 -180
  53. data/vendor/datasketches-cpp/quantiles/include/quantiles_sketch_impl.hpp +205 -210
  54. data/vendor/datasketches-cpp/quantiles/test/CMakeLists.txt +1 -1
  55. data/vendor/datasketches-cpp/quantiles/test/quantiles_compatibility_test.cpp +19 -18
  56. data/vendor/datasketches-cpp/quantiles/test/quantiles_sketch_test.cpp +240 -232
  57. data/vendor/datasketches-cpp/req/include/req_compactor.hpp +15 -9
  58. data/vendor/datasketches-cpp/req/include/req_compactor_impl.hpp +35 -19
  59. data/vendor/datasketches-cpp/req/include/req_sketch.hpp +126 -147
  60. data/vendor/datasketches-cpp/req/include/req_sketch_impl.hpp +265 -245
  61. data/vendor/datasketches-cpp/req/test/CMakeLists.txt +1 -1
  62. data/vendor/datasketches-cpp/req/test/req_sketch_custom_type_test.cpp +26 -26
  63. data/vendor/datasketches-cpp/req/test/req_sketch_test.cpp +116 -103
  64. data/vendor/datasketches-cpp/sampling/include/var_opt_sketch.hpp +22 -46
  65. data/vendor/datasketches-cpp/sampling/include/var_opt_sketch_impl.hpp +180 -207
  66. data/vendor/datasketches-cpp/sampling/include/var_opt_union.hpp +18 -39
  67. data/vendor/datasketches-cpp/sampling/include/var_opt_union_impl.hpp +75 -85
  68. data/vendor/datasketches-cpp/sampling/test/CMakeLists.txt +1 -1
  69. data/vendor/datasketches-cpp/sampling/test/var_opt_allocation_test.cpp +6 -6
  70. data/vendor/datasketches-cpp/sampling/test/var_opt_sketch_test.cpp +2 -2
  71. data/vendor/datasketches-cpp/sampling/test/var_opt_union_test.cpp +4 -4
  72. data/vendor/datasketches-cpp/setup.py +14 -2
  73. data/vendor/datasketches-cpp/theta/include/theta_sketch_impl.hpp +15 -25
  74. data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base.hpp +0 -9
  75. data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base_impl.hpp +5 -5
  76. data/vendor/datasketches-cpp/theta/test/CMakeLists.txt +1 -1
  77. data/vendor/datasketches-cpp/theta/test/theta_sketch_test.cpp +2 -1
  78. data/vendor/datasketches-cpp/tox.ini +26 -0
  79. data/vendor/datasketches-cpp/tuple/include/tuple_sketch.hpp +36 -12
  80. data/vendor/datasketches-cpp/tuple/include/tuple_sketch_impl.hpp +16 -4
  81. data/vendor/datasketches-cpp/tuple/test/CMakeLists.txt +2 -1
  82. data/vendor/datasketches-cpp/tuple/test/engagement_test.cpp +299 -0
  83. data/vendor/datasketches-cpp/tuple/test/tuple_sketch_test.cpp +26 -0
  84. data/vendor/datasketches-cpp/version.cfg.in +1 -0
  85. metadata +14 -5
  86. data/vendor/datasketches-cpp/common/include/quantile_sketch_sorted_view_impl.hpp +0 -91
@@ -0,0 +1,459 @@
1
+ /*
2
+ * Licensed to the Apache Software Foundation (ASF) under one
3
+ * or more contributor license agreements. See the NOTICE file
4
+ * distributed with this work for additional information
5
+ * regarding copyright ownership. The ASF licenses this file
6
+ * to you under the Apache License, Version 2.0 (the
7
+ * "License"); you may not use this file except in compliance
8
+ * with the License. You may obtain a copy of the License at
9
+ *
10
+ * http://www.apache.org/licenses/LICENSE-2.0
11
+ *
12
+ * Unless required by applicable law or agreed to in writing,
13
+ * software distributed under the License is distributed on an
14
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15
+ * KIND, either express or implied. See the License for the
16
+ * specific language governing permissions and limitations
17
+ * under the License.
18
+ */
19
+
20
+ #include <catch2/catch.hpp>
21
+
22
+ #include <vector>
23
+ #include <utility>
24
+
25
+ #include <iostream>
26
+
27
+ #include "quantiles_sorted_view.hpp"
28
+
29
+ namespace datasketches {
30
+
31
+ TEST_CASE("empty", "sorted view") {
32
+ auto view = quantiles_sorted_view<float, std::less<float>, std::allocator<float>>(1, std::less<float>(), std::allocator<float>());
33
+ REQUIRE_THROWS_AS(view.get_rank(0), std::runtime_error);
34
+ REQUIRE_THROWS_AS(view.get_quantile(0), std::runtime_error);
35
+ const float split_points[1] {0};
36
+ REQUIRE_THROWS_AS(view.get_CDF(split_points, 1), std::runtime_error);
37
+ REQUIRE_THROWS_AS(view.get_PMF(split_points, 1), std::runtime_error);
38
+ }
39
+
40
+ TEST_CASE("set 0", "sorted view") {
41
+ auto view = quantiles_sorted_view<float, std::less<float>, std::allocator<float>>(1, std::less<float>(), std::allocator<float>());
42
+ std::vector<float> l0 {10};
43
+ view.add(l0.begin(), l0.end(), 1);
44
+ view.convert_to_cummulative();
45
+ REQUIRE(view.size() == 1);
46
+
47
+ auto it = view.begin();
48
+ // using operator->
49
+ REQUIRE(it->first == 10);
50
+ REQUIRE(it->second == 1);
51
+ // using operator*
52
+ REQUIRE((*it).first == 10);
53
+ REQUIRE((*it).second == 1);
54
+ REQUIRE(it.get_weight() == 1);
55
+ REQUIRE(it.get_cumulative_weight() == 1);
56
+ REQUIRE(it.get_cumulative_weight(false) == 0);
57
+ ++it;
58
+ REQUIRE(it == view.end());
59
+
60
+ REQUIRE(view.get_rank(5, true) == 0);
61
+ REQUIRE(view.get_rank(10, true) == 1);
62
+ REQUIRE(view.get_rank(15, true) == 1);
63
+
64
+ REQUIRE(view.get_rank(5, false) == 0);
65
+ REQUIRE(view.get_rank(10, false) == 0);
66
+ REQUIRE(view.get_rank(15, false) == 1);
67
+
68
+ REQUIRE(view.get_quantile(0, true) == 10);
69
+ REQUIRE(view.get_quantile(0.5, true) == 10);
70
+ REQUIRE(view.get_quantile(1, true) == 10);
71
+
72
+ REQUIRE(view.get_quantile(0, false) == 10);
73
+ REQUIRE(view.get_quantile(0.5, false) == 10);
74
+ REQUIRE(view.get_quantile(1, false) == 10);
75
+ }
76
+
77
+ TEST_CASE("set 1", "sorted view") {
78
+ auto view = quantiles_sorted_view<float, std::less<float>, std::allocator<float>>(1, std::less<float>(), std::allocator<float>());
79
+ std::vector<float> l0 {10, 10};
80
+ view.add(l0.begin(), l0.end(), 1);
81
+ view.convert_to_cummulative();
82
+ REQUIRE(view.size() == 2);
83
+
84
+ auto it = view.begin();
85
+ REQUIRE(it->first == 10);
86
+ REQUIRE(it->second == 1);
87
+ REQUIRE(it.get_weight() == 1);
88
+ REQUIRE(it.get_cumulative_weight() == 1);
89
+ REQUIRE(it.get_cumulative_weight(false) == 0);
90
+ ++it;
91
+ REQUIRE(it->first == 10);
92
+ REQUIRE(it->second == 2);
93
+ REQUIRE(it.get_weight() == 1);
94
+ REQUIRE(it.get_cumulative_weight() == 2);
95
+ REQUIRE(it.get_cumulative_weight(false) == 1);
96
+ ++it;
97
+ REQUIRE(it == view.end());
98
+
99
+ REQUIRE(view.get_rank(5, true) == 0);
100
+ REQUIRE(view.get_rank(10, true) == 1);
101
+ REQUIRE(view.get_rank(15, true) == 1);
102
+
103
+ REQUIRE(view.get_rank(5, false) == 0);
104
+ REQUIRE(view.get_rank(10, false) == 0);
105
+ REQUIRE(view.get_rank(15, false) == 1);
106
+
107
+ REQUIRE(view.get_quantile(0, true) == 10);
108
+ REQUIRE(view.get_quantile(0.25, true) == 10);
109
+ REQUIRE(view.get_quantile(0.5, true) == 10);
110
+ REQUIRE(view.get_quantile(0.75, true) == 10);
111
+ REQUIRE(view.get_quantile(1, true) == 10);
112
+
113
+ REQUIRE(view.get_quantile(0, false) == 10);
114
+ REQUIRE(view.get_quantile(0.25, false) == 10);
115
+ REQUIRE(view.get_quantile(0.5, false) == 10);
116
+ REQUIRE(view.get_quantile(0.75, false) == 10);
117
+ REQUIRE(view.get_quantile(1, false) == 10);
118
+ }
119
+
120
+ TEST_CASE("set 2", "sorted view") {
121
+ auto view = quantiles_sorted_view<float, std::less<float>, std::allocator<float>>(1, std::less<float>(), std::allocator<float>());
122
+ std::vector<float> l1 {10, 20, 30, 40};
123
+ view.add(l1.begin(), l1.end(), 2);
124
+ view.convert_to_cummulative();
125
+ REQUIRE(view.size() == 4);
126
+
127
+ auto it = view.begin();
128
+ REQUIRE(it->first == 10);
129
+ REQUIRE(it->second == 2);
130
+ REQUIRE(it.get_weight() == 2);
131
+ REQUIRE(it.get_cumulative_weight() == 2);
132
+ REQUIRE(it.get_cumulative_weight(false) == 0);
133
+ ++it;
134
+ REQUIRE(it->first == 20);
135
+ REQUIRE(it->second == 4);
136
+ REQUIRE(it.get_weight() == 2);
137
+ REQUIRE(it.get_cumulative_weight() == 4);
138
+ REQUIRE(it.get_cumulative_weight(false) == 2);
139
+ ++it;
140
+ REQUIRE(it->first == 30);
141
+ REQUIRE(it->second == 6);
142
+ REQUIRE(it.get_weight() == 2);
143
+ REQUIRE(it.get_cumulative_weight() == 6);
144
+ REQUIRE(it.get_cumulative_weight(false) == 4);
145
+ ++it;
146
+ REQUIRE(it->first == 40);
147
+ REQUIRE(it->second == 8);
148
+ REQUIRE(it.get_weight() == 2);
149
+ REQUIRE(it.get_cumulative_weight() == 8);
150
+ REQUIRE(it.get_cumulative_weight(false) == 6);
151
+ ++it;
152
+ REQUIRE(it == view.end());
153
+
154
+ REQUIRE(view.get_rank(5, true) == 0);
155
+ REQUIRE(view.get_rank(10, true) == 0.25);
156
+ REQUIRE(view.get_rank(15, true) == 0.25);
157
+ REQUIRE(view.get_rank(20, true) == 0.5);
158
+ REQUIRE(view.get_rank(25, true) == 0.5);
159
+ REQUIRE(view.get_rank(30, true) == 0.75);
160
+ REQUIRE(view.get_rank(35, true) == 0.75);
161
+ REQUIRE(view.get_rank(40, true) == 1);
162
+ REQUIRE(view.get_rank(45, true) == 1);
163
+
164
+ REQUIRE(view.get_rank(5, false) == 0);
165
+ REQUIRE(view.get_rank(10, false) == 0);
166
+ REQUIRE(view.get_rank(15, false) == 0.25);
167
+ REQUIRE(view.get_rank(20, false) == 0.25);
168
+ REQUIRE(view.get_rank(25, false) == 0.5);
169
+ REQUIRE(view.get_rank(30, false) == 0.5);
170
+ REQUIRE(view.get_rank(35, false) == 0.75);
171
+ REQUIRE(view.get_rank(40, false) == 0.75);
172
+ REQUIRE(view.get_rank(45, false) == 1);
173
+
174
+ REQUIRE(view.get_quantile(0, true) == 10);
175
+ REQUIRE(view.get_quantile(0.0625, true) == 10);
176
+ REQUIRE(view.get_quantile(0.125, true) == 10);
177
+ REQUIRE(view.get_quantile(0.1875, true) == 10);
178
+ REQUIRE(view.get_quantile(0.25, true) == 10);
179
+ REQUIRE(view.get_quantile(0.3125, true) == 20);
180
+ REQUIRE(view.get_quantile(0.375, true) == 20);
181
+ REQUIRE(view.get_quantile(0.4375, true) == 20);
182
+ REQUIRE(view.get_quantile(0.5, true) == 20);
183
+ REQUIRE(view.get_quantile(0.5625, true) == 30);
184
+ REQUIRE(view.get_quantile(0.625, true) == 30);
185
+ REQUIRE(view.get_quantile(0.6875, true) == 30);
186
+ REQUIRE(view.get_quantile(0.75, true) == 30);
187
+ REQUIRE(view.get_quantile(0.8125, true) == 40);
188
+ REQUIRE(view.get_quantile(0.875, true) == 40);
189
+ REQUIRE(view.get_quantile(0.9375, true) == 40);
190
+ REQUIRE(view.get_quantile(1, true) == 40);
191
+
192
+ REQUIRE(view.get_quantile(0, false) == 10);
193
+ REQUIRE(view.get_quantile(0.0625, false) == 10);
194
+ REQUIRE(view.get_quantile(0.125, false) == 10);
195
+ REQUIRE(view.get_quantile(0.1875, false) == 10);
196
+ REQUIRE(view.get_quantile(0.25, false) == 20);
197
+ REQUIRE(view.get_quantile(0.3125, false) == 20);
198
+ REQUIRE(view.get_quantile(0.375, false) == 20);
199
+ REQUIRE(view.get_quantile(0.4375, false) == 20);
200
+ REQUIRE(view.get_quantile(0.5, false) == 30);
201
+ REQUIRE(view.get_quantile(0.5625, false) == 30);
202
+ REQUIRE(view.get_quantile(0.625, false) == 30);
203
+ REQUIRE(view.get_quantile(0.6875, false) == 30);
204
+ REQUIRE(view.get_quantile(0.75, false) == 40);
205
+ REQUIRE(view.get_quantile(0.8125, false) == 40);
206
+ REQUIRE(view.get_quantile(0.875, false) == 40);
207
+ REQUIRE(view.get_quantile(0.9375, false) == 40);
208
+ REQUIRE(view.get_quantile(1, false) == 40);
209
+ }
210
+
211
+ TEST_CASE("set 3", "sorted view") {
212
+ auto view = quantiles_sorted_view<float, std::less<float>, std::allocator<float>>(8, std::less<float>(), std::allocator<float>());
213
+ std::vector<float> l1 {10, 20, 20, 30, 30, 30, 40, 50};
214
+ view.add(l1.begin(), l1.end(), 2);
215
+ view.convert_to_cummulative();
216
+ REQUIRE(view.size() == 8);
217
+
218
+ auto it = view.begin();
219
+ REQUIRE(it->first == 10);
220
+ REQUIRE(it->second == 2);
221
+ REQUIRE(it.get_weight() == 2);
222
+ ++it;
223
+ REQUIRE(it->first == 20);
224
+ REQUIRE(it->second == 4);
225
+ REQUIRE(it.get_weight() == 2);
226
+ ++it;
227
+ REQUIRE(it->first == 20);
228
+ REQUIRE(it->second == 6);
229
+ REQUIRE(it.get_weight() == 2);
230
+ ++it;
231
+ REQUIRE(it->first == 30);
232
+ REQUIRE(it->second == 8);
233
+ REQUIRE(it.get_weight() == 2);
234
+ ++it;
235
+ REQUIRE(it->first == 30);
236
+ REQUIRE(it->second == 10);
237
+ REQUIRE(it.get_weight() == 2);
238
+ ++it;
239
+ REQUIRE(it->first == 30);
240
+ REQUIRE(it->second == 12);
241
+ REQUIRE(it.get_weight() == 2);
242
+ ++it;
243
+ REQUIRE(it->first == 40);
244
+ REQUIRE(it->second == 14);
245
+ REQUIRE(it.get_weight() == 2);
246
+ ++it;
247
+ REQUIRE(it->first == 50);
248
+ REQUIRE(it->second == 16);
249
+ REQUIRE(it.get_weight() == 2);
250
+
251
+ REQUIRE(view.get_rank(5, true) == 0);
252
+ REQUIRE(view.get_rank(10, true) == 0.125);
253
+ REQUIRE(view.get_rank(15, true) == 0.125);
254
+ REQUIRE(view.get_rank(20, true) == 0.375);
255
+ REQUIRE(view.get_rank(25, true) == 0.375);
256
+ REQUIRE(view.get_rank(30, true) == 0.75);
257
+ REQUIRE(view.get_rank(35, true) == 0.75);
258
+ REQUIRE(view.get_rank(40, true) == 0.875);
259
+ REQUIRE(view.get_rank(45, true) == 0.875);
260
+ REQUIRE(view.get_rank(50, true) == 1);
261
+ REQUIRE(view.get_rank(55, true) == 1);
262
+
263
+ REQUIRE(view.get_rank(5, false) == 0);
264
+ REQUIRE(view.get_rank(10, false) == 0);
265
+ REQUIRE(view.get_rank(15, false) == 0.125);
266
+ REQUIRE(view.get_rank(20, false) == 0.125);
267
+ REQUIRE(view.get_rank(25, false) == 0.375);
268
+ REQUIRE(view.get_rank(30, false) == 0.375);
269
+ REQUIRE(view.get_rank(35, false) == 0.75);
270
+ REQUIRE(view.get_rank(40, false) == 0.75);
271
+ REQUIRE(view.get_rank(45, false) == 0.875);
272
+ REQUIRE(view.get_rank(50, false) == 0.875);
273
+ REQUIRE(view.get_rank(55, false) == 1);
274
+
275
+ REQUIRE(view.get_quantile(0, true) == 10);
276
+ REQUIRE(view.get_quantile(0.03125, true) == 10);
277
+ REQUIRE(view.get_quantile(0.0625, true) == 10);
278
+ REQUIRE(view.get_quantile(0.09375, true) == 10);
279
+ REQUIRE(view.get_quantile(0.125, true) == 10);
280
+ REQUIRE(view.get_quantile(0.15625, true) == 20);
281
+ REQUIRE(view.get_quantile(0.1875, true) == 20);
282
+ REQUIRE(view.get_quantile(0.21875, true) == 20);
283
+ REQUIRE(view.get_quantile(0.25, true) == 20);
284
+ REQUIRE(view.get_quantile(0.28125, true) == 20);
285
+ REQUIRE(view.get_quantile(0.3125, true) == 20);
286
+ REQUIRE(view.get_quantile(0.34375, true) == 20);
287
+ REQUIRE(view.get_quantile(0.375, true) == 20);
288
+ REQUIRE(view.get_quantile(0.40625, true) == 30);
289
+ REQUIRE(view.get_quantile(0.4375, true) == 30);
290
+ REQUIRE(view.get_quantile(0.46875, true) == 30);
291
+ REQUIRE(view.get_quantile(0.5, true) == 30);
292
+ REQUIRE(view.get_quantile(0.53125, true) == 30);
293
+ REQUIRE(view.get_quantile(0.5625, true) == 30);
294
+ REQUIRE(view.get_quantile(0.59375, true) == 30);
295
+ REQUIRE(view.get_quantile(0.625, true) == 30);
296
+ REQUIRE(view.get_quantile(0.65625, true) == 30);
297
+ REQUIRE(view.get_quantile(0.6875, true) == 30);
298
+ REQUIRE(view.get_quantile(0.71875, true) == 30);
299
+ REQUIRE(view.get_quantile(0.75, true) == 30);
300
+ REQUIRE(view.get_quantile(0.78125, true) == 40);
301
+ REQUIRE(view.get_quantile(0.8125, true) == 40);
302
+ REQUIRE(view.get_quantile(0.84375, true) == 40);
303
+ REQUIRE(view.get_quantile(0.875, true) == 40);
304
+ REQUIRE(view.get_quantile(0.90625, true) == 50);
305
+ REQUIRE(view.get_quantile(0.9375, true) == 50);
306
+ REQUIRE(view.get_quantile(0.96875, true) == 50);
307
+ REQUIRE(view.get_quantile(1, true) == 50);
308
+
309
+ REQUIRE(view.get_quantile(0, false) == 10);
310
+ REQUIRE(view.get_quantile(0.03125, false) == 10);
311
+ REQUIRE(view.get_quantile(0.0625, false) == 10);
312
+ REQUIRE(view.get_quantile(0.09375, false) == 10);
313
+ REQUIRE(view.get_quantile(0.125, false) == 20);
314
+ REQUIRE(view.get_quantile(0.15625, false) == 20);
315
+ REQUIRE(view.get_quantile(0.1875, false) == 20);
316
+ REQUIRE(view.get_quantile(0.21875, false) == 20);
317
+ REQUIRE(view.get_quantile(0.25, false) == 20);
318
+ REQUIRE(view.get_quantile(0.28125, false) == 20);
319
+ REQUIRE(view.get_quantile(0.3125, false) == 20);
320
+ REQUIRE(view.get_quantile(0.34375, false) == 20);
321
+ REQUIRE(view.get_quantile(0.375, false) == 30);
322
+ REQUIRE(view.get_quantile(0.40625, false) == 30);
323
+ REQUIRE(view.get_quantile(0.4375, false) == 30);
324
+ REQUIRE(view.get_quantile(0.46875, false) == 30);
325
+ REQUIRE(view.get_quantile(0.5, false) == 30);
326
+ REQUIRE(view.get_quantile(0.53125, false) == 30);
327
+ REQUIRE(view.get_quantile(0.5625, false) == 30);
328
+ REQUIRE(view.get_quantile(0.59375, false) == 30);
329
+ REQUIRE(view.get_quantile(0.625, false) == 30);
330
+ REQUIRE(view.get_quantile(0.65625, false) == 30);
331
+ REQUIRE(view.get_quantile(0.6875, false) == 30);
332
+ REQUIRE(view.get_quantile(0.71875, false) == 30);
333
+ REQUIRE(view.get_quantile(0.75, false) == 40);
334
+ REQUIRE(view.get_quantile(0.78125, false) == 40);
335
+ REQUIRE(view.get_quantile(0.8125, false) == 40);
336
+ REQUIRE(view.get_quantile(0.84375, false) == 40);
337
+ REQUIRE(view.get_quantile(0.875, false) == 50);
338
+ REQUIRE(view.get_quantile(0.90625, false) == 50);
339
+ REQUIRE(view.get_quantile(0.9375, false) == 50);
340
+ REQUIRE(view.get_quantile(0.96875, false) == 50);
341
+ REQUIRE(view.get_quantile(1, false) == 50);
342
+ }
343
+
344
+ TEST_CASE("set 4", "sorted view") {
345
+ auto view = quantiles_sorted_view<float, std::less<float>, std::allocator<float>>(8, std::less<float>(), std::allocator<float>());
346
+ std::vector<float> l1 {10, 20, 30, 40};
347
+ view.add(l1.begin(), l1.end(), 2);
348
+ std::vector<float> l0 {10, 20, 30, 40};
349
+ view.add(l0.begin(), l0.end(), 1);
350
+ view.convert_to_cummulative();
351
+ REQUIRE(view.size() == 8);
352
+
353
+ auto it = view.begin();
354
+ REQUIRE(it->first == 10);
355
+ REQUIRE(it->second == 2);
356
+ REQUIRE(it.get_weight() == 2);
357
+ ++it;
358
+ REQUIRE(it->first == 10);
359
+ REQUIRE(it->second == 3);
360
+ REQUIRE(it.get_weight() == 1);
361
+ ++it;
362
+ REQUIRE(it->first == 20);
363
+ REQUIRE(it->second == 5);
364
+ REQUIRE(it.get_weight() == 2);
365
+ ++it;
366
+ REQUIRE(it->first == 20);
367
+ REQUIRE(it->second == 6);
368
+ REQUIRE(it.get_weight() == 1);
369
+ ++it;
370
+ REQUIRE(it->first == 30);
371
+ REQUIRE(it->second == 8);
372
+ REQUIRE(it.get_weight() == 2);
373
+ ++it;
374
+ REQUIRE(it->first == 30);
375
+ REQUIRE(it->second == 9);
376
+ REQUIRE(it.get_weight() == 1);
377
+ ++it;
378
+ REQUIRE(it->first == 40);
379
+ REQUIRE(it->second == 11);
380
+ REQUIRE(it.get_weight() == 2);
381
+ ++it;
382
+ REQUIRE(it->first == 40);
383
+ REQUIRE(it->second == 12);
384
+ REQUIRE(it.get_weight() == 1);
385
+
386
+ REQUIRE(view.get_rank(5, true) == 0);
387
+ REQUIRE(view.get_rank(10, true) == 0.25);
388
+ REQUIRE(view.get_rank(15, true) == 0.25);
389
+ REQUIRE(view.get_rank(20, true) == 0.5);
390
+ REQUIRE(view.get_rank(25, true) == 0.5);
391
+ REQUIRE(view.get_rank(30, true) == 0.75);
392
+ REQUIRE(view.get_rank(35, true) == 0.75);
393
+ REQUIRE(view.get_rank(40, true) == 1);
394
+ REQUIRE(view.get_rank(45, true) == 1);
395
+
396
+ REQUIRE(view.get_rank(5, false) == 0);
397
+ REQUIRE(view.get_rank(10, false) == 0);
398
+ REQUIRE(view.get_rank(15, false) == 0.25);
399
+ REQUIRE(view.get_rank(20, false) == 0.25);
400
+ REQUIRE(view.get_rank(25, false) == 0.5);
401
+ REQUIRE(view.get_rank(30, false) == 0.5);
402
+ REQUIRE(view.get_rank(35, false) == 0.75);
403
+ REQUIRE(view.get_rank(40, false) == 0.75);
404
+ REQUIRE(view.get_rank(45, false) == 1);
405
+
406
+ REQUIRE(view.get_quantile(0, true) == 10);
407
+ REQUIRE(view.get_quantile(0.0417, true) == 10);
408
+ REQUIRE(view.get_quantile(0.0833, true) == 10);
409
+ REQUIRE(view.get_quantile(0.125, true) == 10);
410
+ REQUIRE(view.get_quantile(0.1667, true) == 10);
411
+ REQUIRE(view.get_quantile(0.2083, true) == 10);
412
+ REQUIRE(view.get_quantile(0.25, true) == 10);
413
+ REQUIRE(view.get_quantile(0.2917, true) == 20);
414
+ REQUIRE(view.get_quantile(0.3333, true) == 20);
415
+ REQUIRE(view.get_quantile(0.375, true) == 20);
416
+ REQUIRE(view.get_quantile(0.4167, true) == 20);
417
+ REQUIRE(view.get_quantile(0.4583, true) == 20);
418
+ REQUIRE(view.get_quantile(0.5, true) == 20);
419
+ REQUIRE(view.get_quantile(0.5417, true) == 30);
420
+ REQUIRE(view.get_quantile(0.5833, true) == 30);
421
+ REQUIRE(view.get_quantile(0.625, true) == 30);
422
+ REQUIRE(view.get_quantile(0.6667, true) == 30);
423
+ REQUIRE(view.get_quantile(0.7083, true) == 30);
424
+ REQUIRE(view.get_quantile(0.75, true) == 30);
425
+ REQUIRE(view.get_quantile(0.7917, true) == 40);
426
+ REQUIRE(view.get_quantile(0.8333, true) == 40);
427
+ REQUIRE(view.get_quantile(0.875, true) == 40);
428
+ REQUIRE(view.get_quantile(0.9167, true) == 40);
429
+ REQUIRE(view.get_quantile(0.9583, true) == 40);
430
+ REQUIRE(view.get_quantile(1, true) == 40);
431
+
432
+ REQUIRE(view.get_quantile(0, false) == 10);
433
+ REQUIRE(view.get_quantile(0.0417, false) == 10);
434
+ REQUIRE(view.get_quantile(0.0833, false) == 10);
435
+ REQUIRE(view.get_quantile(0.125, false) == 10);
436
+ REQUIRE(view.get_quantile(0.1667, false) == 10);
437
+ REQUIRE(view.get_quantile(0.2083, false) == 10);
438
+ REQUIRE(view.get_quantile(0.25, false) == 20);
439
+ REQUIRE(view.get_quantile(0.2917, false) == 20);
440
+ REQUIRE(view.get_quantile(0.3333, false) == 20);
441
+ REQUIRE(view.get_quantile(0.375, false) == 20);
442
+ REQUIRE(view.get_quantile(0.4167, false) == 20);
443
+ REQUIRE(view.get_quantile(0.4583, false) == 20);
444
+ REQUIRE(view.get_quantile(0.5, false) == 30);
445
+ REQUIRE(view.get_quantile(0.5417, false) == 30);
446
+ REQUIRE(view.get_quantile(0.5833, false) == 30);
447
+ REQUIRE(view.get_quantile(0.625, false) == 30);
448
+ REQUIRE(view.get_quantile(0.6667, false) == 30);
449
+ REQUIRE(view.get_quantile(0.7083, false) == 30);
450
+ REQUIRE(view.get_quantile(0.75, false) == 40);
451
+ REQUIRE(view.get_quantile(0.7917, false) == 40);
452
+ REQUIRE(view.get_quantile(0.8333, false) == 40);
453
+ REQUIRE(view.get_quantile(0.875, false) == 40);
454
+ REQUIRE(view.get_quantile(0.9167, false) == 40);
455
+ REQUIRE(view.get_quantile(0.9583, false) == 40);
456
+ REQUIRE(view.get_quantile(1, false) == 40);
457
+ }
458
+
459
+ } /* namespace datasketches */
@@ -17,7 +17,7 @@
17
17
 
18
18
  add_executable(cpc_test)
19
19
 
20
- target_link_libraries(cpc_test cpc common_test)
20
+ target_link_libraries(cpc_test cpc common_test_lib)
21
21
 
22
22
  set_target_properties(cpc_test PROPERTIES
23
23
  CXX_STANDARD 11
@@ -34,7 +34,7 @@ namespace datasketches {
34
34
 
35
35
  /*
36
36
  * Based on Java implementation here:
37
- * https://github.com/DataSketches/sketches-core/blob/master/src/main/java/com/yahoo/sketches/frequencies/ItemsSketch.java
37
+ * https://github.com/apache/datasketches-java/blob/master/src/main/java/org/apache/datasketches/frequencies/ItemsSketch.java
38
38
  * author Alexander Saydakov
39
39
  */
40
40
 
@@ -46,7 +46,6 @@ template<
46
46
  typename W = uint64_t,
47
47
  typename H = std::hash<T>,
48
48
  typename E = std::equal_to<T>,
49
- typename S = serde<T>, // deprecated, to be removed in the next major version
50
49
  typename A = std::allocator<T>
51
50
  >
52
51
  class frequent_items_sketch {
@@ -60,11 +59,13 @@ public:
60
59
  * @param lg_max_map_size Log2 of the physical size of the internal hash map managed by this
61
60
  * sketch. The maximum capacity of this internal hash map is 0.75 times 2^lg_max_map_size.
62
61
  * Both the ultimate accuracy and size of this sketch are functions of lg_max_map_size.
63
- *
64
62
  * @param lg_start_map_size Log2 of the starting physical size of the internal hash
65
63
  * map managed by this sketch.
64
+ * @param equal instance of Equality operator
65
+ * @param allocator instance of an Allocator
66
66
  */
67
- explicit frequent_items_sketch(uint8_t lg_max_map_size, uint8_t lg_start_map_size = LG_MIN_MAP_SIZE, const A& allocator = A());
67
+ explicit frequent_items_sketch(uint8_t lg_max_map_size, uint8_t lg_start_map_size = LG_MIN_MAP_SIZE,
68
+ const E& equal = E(), const A& allocator = A());
68
69
 
69
70
  /**
70
71
  * Update this sketch with an item and a positive weight (frequency count).
@@ -158,7 +159,7 @@ public:
158
159
  /**
159
160
  * Returns epsilon used to compute <i>a priori</i> error.
160
161
  * This is just the value <i>3.5 / maxMapSize</i>.
161
- * @param maxMapSize the planned map size to be used when constructing this sketch.
162
+ * @param lg_max_map_size the planned map size to be used when constructing this sketch.
162
163
  * @return epsilon used to compute <i>a priori</i> error.
163
164
  */
164
165
  static double get_epsilon(uint8_t lg_max_map_size);
@@ -167,13 +168,13 @@ public:
167
168
  * Returns the estimated <i>a priori</i> error given the max_map_size for the sketch and the
168
169
  * estimated_total_stream_weight.
169
170
  * @param lg_max_map_size the planned map size to be used when constructing this sketch.
170
- * @param estimated_total_stream_weight the estimated total stream weight.
171
+ * @param estimated_total_weight the estimated total stream weight.
171
172
  * @return the estimated <i>a priori</i> error.
172
173
  */
173
174
  static double get_apriori_error(uint8_t lg_max_map_size, W estimated_total_weight);
174
175
 
175
176
  class row;
176
- typedef typename std::vector<row, typename std::allocator_traits<A>::template rebind_alloc<row>> vector_row; // alias for users
177
+ using vector_row = typename std::vector<row, typename std::allocator_traits<A>::template rebind_alloc<row>>;
177
178
 
178
179
  /**
179
180
  * Returns an array of rows that include frequent items, estimates, upper and lower bounds
@@ -225,18 +226,18 @@ public:
225
226
  /**
226
227
  * Computes size needed to serialize the current state of the sketch.
227
228
  * This can be expensive since every item needs to be looked at.
228
- * @param instance of a SerDe
229
+ * @param sd instance of a SerDe
229
230
  * @return size in bytes needed to serialize this sketch
230
231
  */
231
- template<typename SerDe = S>
232
+ template<typename SerDe = serde<T>>
232
233
  size_t get_serialized_size_bytes(const SerDe& sd = SerDe()) const;
233
234
 
234
235
  /**
235
236
  * This method serializes the sketch into a given stream in a binary form
236
237
  * @param os output stream
237
- * @param instance of a SerDe
238
+ * @param sd instance of a SerDe
238
239
  */
239
- template<typename SerDe = S>
240
+ template<typename SerDe = serde<T>>
240
241
  void serialize(std::ostream& os, const SerDe& sd = SerDe()) const;
241
242
 
242
243
  // This is a convenience alias for users
@@ -249,53 +250,36 @@ public:
249
250
  * It is a blank space of a given size.
250
251
  * This header is used in Datasketches PostgreSQL extension.
251
252
  * @param header_size_bytes space to reserve in front of the sketch
252
- * @param instance of a SerDe
253
+ * @param sd instance of a SerDe
253
254
  * @return serialized sketch as a vector of bytes
254
255
  */
255
- template<typename SerDe = S>
256
+ template<typename SerDe = serde<T>>
256
257
  vector_bytes serialize(unsigned header_size_bytes = 0, const SerDe& sd = SerDe()) const;
257
258
 
258
259
  /**
259
260
  * This method deserializes a sketch from a given stream.
260
261
  * @param is input stream
261
- * @param instance of an Allocator
262
- * @return an instance of the sketch
263
- *
264
- * Deprecated, to be removed in the next major version
265
- */
266
- static frequent_items_sketch deserialize(std::istream& is, const A& allocator = A());
267
-
268
- /**
269
- * This method deserializes a sketch from a given stream.
270
- * @param is input stream
271
- * @param instance of a SerDe
272
- * @param instance of an Allocator
262
+ * @param sd instance of a SerDe
263
+ * @param equal instance of Equality operator
264
+ * @param allocator instance of an Allocator
273
265
  * @return an instance of the sketch
274
266
  */
275
- template<typename SerDe = S>
276
- static frequent_items_sketch deserialize(std::istream& is, const SerDe& sd = SerDe(), const A& allocator = A());
277
-
278
- /**
279
- * This method deserializes a sketch from a given array of bytes.
280
- * @param bytes pointer to the array of bytes
281
- * @param size the size of the array
282
- * @param instance of an Allocator
283
- * @return an instance of the sketch
284
- *
285
- * Deprecated, to be removed in the next major version
286
- */
287
- static frequent_items_sketch deserialize(const void* bytes, size_t size, const A& allocator = A());
267
+ template<typename SerDe = serde<T>>
268
+ static frequent_items_sketch deserialize(std::istream& is, const SerDe& sd = SerDe(),
269
+ const E& equal = E(), const A& allocator = A());
288
270
 
289
271
  /**
290
272
  * This method deserializes a sketch from a given array of bytes.
291
273
  * @param bytes pointer to the array of bytes
292
274
  * @param size the size of the array
293
- * @param instance of a SerDe
294
- * @param instance of an Allocator
275
+ * @param sd instance of a SerDe
276
+ * @param equal instance of Equality operator
277
+ * @param allocator instance of an Allocator
295
278
  * @return an instance of the sketch
296
279
  */
297
- template<typename SerDe = S>
298
- static frequent_items_sketch deserialize(const void* bytes, size_t size, const SerDe& sd = SerDe(), const A& allocator = A());
280
+ template<typename SerDe = serde<T>>
281
+ static frequent_items_sketch deserialize(const void* bytes, size_t size, const SerDe& sd = SerDe(),
282
+ const E& equal = E(), const A& allocator = A());
299
283
 
300
284
  /**
301
285
  * Returns a human readable summary of this sketch
@@ -334,8 +318,8 @@ private:
334
318
  class items_deleter;
335
319
  };
336
320
 
337
- template<typename T, typename W, typename H, typename E, typename S, typename A>
338
- class frequent_items_sketch<T, W, H, E, S, A>::row {
321
+ template<typename T, typename W, typename H, typename E, typename A>
322
+ class frequent_items_sketch<T, W, H, E, A>::row {
339
323
  public:
340
324
  row(const T* item, W weight, W offset):
341
325
  item(item), weight(weight), offset(offset) {}