datasketches 0.2.7 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (86) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +5 -0
  3. data/ext/datasketches/kll_wrapper.cpp +20 -20
  4. data/ext/datasketches/theta_wrapper.cpp +2 -2
  5. data/lib/datasketches/version.rb +1 -1
  6. data/vendor/datasketches-cpp/CMakeLists.txt +9 -1
  7. data/vendor/datasketches-cpp/MANIFEST.in +21 -2
  8. data/vendor/datasketches-cpp/common/CMakeLists.txt +5 -2
  9. data/vendor/datasketches-cpp/common/include/common_defs.hpp +10 -0
  10. data/vendor/datasketches-cpp/common/include/kolmogorov_smirnov_impl.hpp +6 -6
  11. data/vendor/datasketches-cpp/common/include/memory_operations.hpp +1 -0
  12. data/vendor/datasketches-cpp/common/include/{quantile_sketch_sorted_view.hpp → quantiles_sorted_view.hpp} +60 -25
  13. data/vendor/datasketches-cpp/common/include/quantiles_sorted_view_impl.hpp +125 -0
  14. data/vendor/datasketches-cpp/common/include/version.hpp.in +36 -0
  15. data/vendor/datasketches-cpp/common/test/CMakeLists.txt +25 -6
  16. data/vendor/datasketches-cpp/common/test/quantiles_sorted_view_test.cpp +459 -0
  17. data/vendor/datasketches-cpp/cpc/test/CMakeLists.txt +1 -1
  18. data/vendor/datasketches-cpp/fi/include/frequent_items_sketch.hpp +28 -44
  19. data/vendor/datasketches-cpp/fi/include/frequent_items_sketch_impl.hpp +70 -78
  20. data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map.hpp +11 -4
  21. data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map_impl.hpp +16 -9
  22. data/vendor/datasketches-cpp/fi/test/CMakeLists.txt +1 -1
  23. data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_custom_type_test.cpp +54 -41
  24. data/vendor/datasketches-cpp/fi/test/reverse_purge_hash_map_test.cpp +3 -3
  25. data/vendor/datasketches-cpp/hll/include/Hll4Array-internal.hpp +2 -2
  26. data/vendor/datasketches-cpp/hll/test/CMakeLists.txt +1 -1
  27. data/vendor/datasketches-cpp/kll/include/kll_helper.hpp +0 -32
  28. data/vendor/datasketches-cpp/kll/include/kll_sketch.hpp +176 -233
  29. data/vendor/datasketches-cpp/kll/include/kll_sketch_impl.hpp +337 -395
  30. data/vendor/datasketches-cpp/kll/test/CMakeLists.txt +1 -1
  31. data/vendor/datasketches-cpp/kll/test/kll_sketch_custom_type_test.cpp +26 -26
  32. data/vendor/datasketches-cpp/kll/test/kll_sketch_test.cpp +196 -232
  33. data/vendor/datasketches-cpp/kll/test/kll_sketch_validation.cpp +41 -31
  34. data/vendor/datasketches-cpp/pyproject.toml +17 -12
  35. data/vendor/datasketches-cpp/python/CMakeLists.txt +8 -1
  36. data/vendor/datasketches-cpp/python/datasketches/PySerDe.py +104 -0
  37. data/vendor/datasketches-cpp/python/datasketches/__init__.py +22 -0
  38. data/vendor/datasketches-cpp/python/include/py_serde.hpp +113 -0
  39. data/vendor/datasketches-cpp/python/jupyter/ThetaSketchNotebook.ipynb +31 -24
  40. data/vendor/datasketches-cpp/python/pybind11Path.cmd +18 -0
  41. data/vendor/datasketches-cpp/python/src/__init__.py +17 -1
  42. data/vendor/datasketches-cpp/python/src/datasketches.cpp +9 -3
  43. data/vendor/datasketches-cpp/python/src/kll_wrapper.cpp +18 -54
  44. data/vendor/datasketches-cpp/python/src/py_serde.cpp +111 -0
  45. data/vendor/datasketches-cpp/python/src/quantiles_wrapper.cpp +17 -53
  46. data/vendor/datasketches-cpp/python/src/req_wrapper.cpp +17 -55
  47. data/vendor/datasketches-cpp/python/src/vector_of_kll.cpp +62 -67
  48. data/vendor/datasketches-cpp/python/src/vo_wrapper.cpp +47 -14
  49. data/vendor/datasketches-cpp/python/tests/__init__.py +16 -0
  50. data/vendor/datasketches-cpp/python/tests/req_test.py +1 -1
  51. data/vendor/datasketches-cpp/python/tests/vo_test.py +25 -1
  52. data/vendor/datasketches-cpp/quantiles/include/quantiles_sketch.hpp +135 -180
  53. data/vendor/datasketches-cpp/quantiles/include/quantiles_sketch_impl.hpp +205 -210
  54. data/vendor/datasketches-cpp/quantiles/test/CMakeLists.txt +1 -1
  55. data/vendor/datasketches-cpp/quantiles/test/quantiles_compatibility_test.cpp +19 -18
  56. data/vendor/datasketches-cpp/quantiles/test/quantiles_sketch_test.cpp +240 -232
  57. data/vendor/datasketches-cpp/req/include/req_compactor.hpp +15 -9
  58. data/vendor/datasketches-cpp/req/include/req_compactor_impl.hpp +35 -19
  59. data/vendor/datasketches-cpp/req/include/req_sketch.hpp +126 -147
  60. data/vendor/datasketches-cpp/req/include/req_sketch_impl.hpp +265 -245
  61. data/vendor/datasketches-cpp/req/test/CMakeLists.txt +1 -1
  62. data/vendor/datasketches-cpp/req/test/req_sketch_custom_type_test.cpp +26 -26
  63. data/vendor/datasketches-cpp/req/test/req_sketch_test.cpp +116 -103
  64. data/vendor/datasketches-cpp/sampling/include/var_opt_sketch.hpp +22 -46
  65. data/vendor/datasketches-cpp/sampling/include/var_opt_sketch_impl.hpp +180 -207
  66. data/vendor/datasketches-cpp/sampling/include/var_opt_union.hpp +18 -39
  67. data/vendor/datasketches-cpp/sampling/include/var_opt_union_impl.hpp +75 -85
  68. data/vendor/datasketches-cpp/sampling/test/CMakeLists.txt +1 -1
  69. data/vendor/datasketches-cpp/sampling/test/var_opt_allocation_test.cpp +6 -6
  70. data/vendor/datasketches-cpp/sampling/test/var_opt_sketch_test.cpp +2 -2
  71. data/vendor/datasketches-cpp/sampling/test/var_opt_union_test.cpp +4 -4
  72. data/vendor/datasketches-cpp/setup.py +14 -2
  73. data/vendor/datasketches-cpp/theta/include/theta_sketch_impl.hpp +15 -25
  74. data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base.hpp +0 -9
  75. data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base_impl.hpp +5 -5
  76. data/vendor/datasketches-cpp/theta/test/CMakeLists.txt +1 -1
  77. data/vendor/datasketches-cpp/theta/test/theta_sketch_test.cpp +2 -1
  78. data/vendor/datasketches-cpp/tox.ini +26 -0
  79. data/vendor/datasketches-cpp/tuple/include/tuple_sketch.hpp +36 -12
  80. data/vendor/datasketches-cpp/tuple/include/tuple_sketch_impl.hpp +16 -4
  81. data/vendor/datasketches-cpp/tuple/test/CMakeLists.txt +2 -1
  82. data/vendor/datasketches-cpp/tuple/test/engagement_test.cpp +299 -0
  83. data/vendor/datasketches-cpp/tuple/test/tuple_sketch_test.cpp +26 -0
  84. data/vendor/datasketches-cpp/version.cfg.in +1 -0
  85. metadata +14 -5
  86. data/vendor/datasketches-cpp/common/include/quantile_sketch_sorted_view_impl.hpp +0 -91
@@ -0,0 +1,459 @@
1
+ /*
2
+ * Licensed to the Apache Software Foundation (ASF) under one
3
+ * or more contributor license agreements. See the NOTICE file
4
+ * distributed with this work for additional information
5
+ * regarding copyright ownership. The ASF licenses this file
6
+ * to you under the Apache License, Version 2.0 (the
7
+ * "License"); you may not use this file except in compliance
8
+ * with the License. You may obtain a copy of the License at
9
+ *
10
+ * http://www.apache.org/licenses/LICENSE-2.0
11
+ *
12
+ * Unless required by applicable law or agreed to in writing,
13
+ * software distributed under the License is distributed on an
14
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15
+ * KIND, either express or implied. See the License for the
16
+ * specific language governing permissions and limitations
17
+ * under the License.
18
+ */
19
+
20
+ #include <catch2/catch.hpp>
21
+
22
+ #include <vector>
23
+ #include <utility>
24
+
25
+ #include <iostream>
26
+
27
+ #include "quantiles_sorted_view.hpp"
28
+
29
+ namespace datasketches {
30
+
31
+ TEST_CASE("empty", "sorted view") {
32
+ auto view = quantiles_sorted_view<float, std::less<float>, std::allocator<float>>(1, std::less<float>(), std::allocator<float>());
33
+ REQUIRE_THROWS_AS(view.get_rank(0), std::runtime_error);
34
+ REQUIRE_THROWS_AS(view.get_quantile(0), std::runtime_error);
35
+ const float split_points[1] {0};
36
+ REQUIRE_THROWS_AS(view.get_CDF(split_points, 1), std::runtime_error);
37
+ REQUIRE_THROWS_AS(view.get_PMF(split_points, 1), std::runtime_error);
38
+ }
39
+
40
+ TEST_CASE("set 0", "sorted view") {
41
+ auto view = quantiles_sorted_view<float, std::less<float>, std::allocator<float>>(1, std::less<float>(), std::allocator<float>());
42
+ std::vector<float> l0 {10};
43
+ view.add(l0.begin(), l0.end(), 1);
44
+ view.convert_to_cummulative();
45
+ REQUIRE(view.size() == 1);
46
+
47
+ auto it = view.begin();
48
+ // using operator->
49
+ REQUIRE(it->first == 10);
50
+ REQUIRE(it->second == 1);
51
+ // using operator*
52
+ REQUIRE((*it).first == 10);
53
+ REQUIRE((*it).second == 1);
54
+ REQUIRE(it.get_weight() == 1);
55
+ REQUIRE(it.get_cumulative_weight() == 1);
56
+ REQUIRE(it.get_cumulative_weight(false) == 0);
57
+ ++it;
58
+ REQUIRE(it == view.end());
59
+
60
+ REQUIRE(view.get_rank(5, true) == 0);
61
+ REQUIRE(view.get_rank(10, true) == 1);
62
+ REQUIRE(view.get_rank(15, true) == 1);
63
+
64
+ REQUIRE(view.get_rank(5, false) == 0);
65
+ REQUIRE(view.get_rank(10, false) == 0);
66
+ REQUIRE(view.get_rank(15, false) == 1);
67
+
68
+ REQUIRE(view.get_quantile(0, true) == 10);
69
+ REQUIRE(view.get_quantile(0.5, true) == 10);
70
+ REQUIRE(view.get_quantile(1, true) == 10);
71
+
72
+ REQUIRE(view.get_quantile(0, false) == 10);
73
+ REQUIRE(view.get_quantile(0.5, false) == 10);
74
+ REQUIRE(view.get_quantile(1, false) == 10);
75
+ }
76
+
77
+ TEST_CASE("set 1", "sorted view") {
78
+ auto view = quantiles_sorted_view<float, std::less<float>, std::allocator<float>>(1, std::less<float>(), std::allocator<float>());
79
+ std::vector<float> l0 {10, 10};
80
+ view.add(l0.begin(), l0.end(), 1);
81
+ view.convert_to_cummulative();
82
+ REQUIRE(view.size() == 2);
83
+
84
+ auto it = view.begin();
85
+ REQUIRE(it->first == 10);
86
+ REQUIRE(it->second == 1);
87
+ REQUIRE(it.get_weight() == 1);
88
+ REQUIRE(it.get_cumulative_weight() == 1);
89
+ REQUIRE(it.get_cumulative_weight(false) == 0);
90
+ ++it;
91
+ REQUIRE(it->first == 10);
92
+ REQUIRE(it->second == 2);
93
+ REQUIRE(it.get_weight() == 1);
94
+ REQUIRE(it.get_cumulative_weight() == 2);
95
+ REQUIRE(it.get_cumulative_weight(false) == 1);
96
+ ++it;
97
+ REQUIRE(it == view.end());
98
+
99
+ REQUIRE(view.get_rank(5, true) == 0);
100
+ REQUIRE(view.get_rank(10, true) == 1);
101
+ REQUIRE(view.get_rank(15, true) == 1);
102
+
103
+ REQUIRE(view.get_rank(5, false) == 0);
104
+ REQUIRE(view.get_rank(10, false) == 0);
105
+ REQUIRE(view.get_rank(15, false) == 1);
106
+
107
+ REQUIRE(view.get_quantile(0, true) == 10);
108
+ REQUIRE(view.get_quantile(0.25, true) == 10);
109
+ REQUIRE(view.get_quantile(0.5, true) == 10);
110
+ REQUIRE(view.get_quantile(0.75, true) == 10);
111
+ REQUIRE(view.get_quantile(1, true) == 10);
112
+
113
+ REQUIRE(view.get_quantile(0, false) == 10);
114
+ REQUIRE(view.get_quantile(0.25, false) == 10);
115
+ REQUIRE(view.get_quantile(0.5, false) == 10);
116
+ REQUIRE(view.get_quantile(0.75, false) == 10);
117
+ REQUIRE(view.get_quantile(1, false) == 10);
118
+ }
119
+
120
+ TEST_CASE("set 2", "sorted view") {
121
+ auto view = quantiles_sorted_view<float, std::less<float>, std::allocator<float>>(1, std::less<float>(), std::allocator<float>());
122
+ std::vector<float> l1 {10, 20, 30, 40};
123
+ view.add(l1.begin(), l1.end(), 2);
124
+ view.convert_to_cummulative();
125
+ REQUIRE(view.size() == 4);
126
+
127
+ auto it = view.begin();
128
+ REQUIRE(it->first == 10);
129
+ REQUIRE(it->second == 2);
130
+ REQUIRE(it.get_weight() == 2);
131
+ REQUIRE(it.get_cumulative_weight() == 2);
132
+ REQUIRE(it.get_cumulative_weight(false) == 0);
133
+ ++it;
134
+ REQUIRE(it->first == 20);
135
+ REQUIRE(it->second == 4);
136
+ REQUIRE(it.get_weight() == 2);
137
+ REQUIRE(it.get_cumulative_weight() == 4);
138
+ REQUIRE(it.get_cumulative_weight(false) == 2);
139
+ ++it;
140
+ REQUIRE(it->first == 30);
141
+ REQUIRE(it->second == 6);
142
+ REQUIRE(it.get_weight() == 2);
143
+ REQUIRE(it.get_cumulative_weight() == 6);
144
+ REQUIRE(it.get_cumulative_weight(false) == 4);
145
+ ++it;
146
+ REQUIRE(it->first == 40);
147
+ REQUIRE(it->second == 8);
148
+ REQUIRE(it.get_weight() == 2);
149
+ REQUIRE(it.get_cumulative_weight() == 8);
150
+ REQUIRE(it.get_cumulative_weight(false) == 6);
151
+ ++it;
152
+ REQUIRE(it == view.end());
153
+
154
+ REQUIRE(view.get_rank(5, true) == 0);
155
+ REQUIRE(view.get_rank(10, true) == 0.25);
156
+ REQUIRE(view.get_rank(15, true) == 0.25);
157
+ REQUIRE(view.get_rank(20, true) == 0.5);
158
+ REQUIRE(view.get_rank(25, true) == 0.5);
159
+ REQUIRE(view.get_rank(30, true) == 0.75);
160
+ REQUIRE(view.get_rank(35, true) == 0.75);
161
+ REQUIRE(view.get_rank(40, true) == 1);
162
+ REQUIRE(view.get_rank(45, true) == 1);
163
+
164
+ REQUIRE(view.get_rank(5, false) == 0);
165
+ REQUIRE(view.get_rank(10, false) == 0);
166
+ REQUIRE(view.get_rank(15, false) == 0.25);
167
+ REQUIRE(view.get_rank(20, false) == 0.25);
168
+ REQUIRE(view.get_rank(25, false) == 0.5);
169
+ REQUIRE(view.get_rank(30, false) == 0.5);
170
+ REQUIRE(view.get_rank(35, false) == 0.75);
171
+ REQUIRE(view.get_rank(40, false) == 0.75);
172
+ REQUIRE(view.get_rank(45, false) == 1);
173
+
174
+ REQUIRE(view.get_quantile(0, true) == 10);
175
+ REQUIRE(view.get_quantile(0.0625, true) == 10);
176
+ REQUIRE(view.get_quantile(0.125, true) == 10);
177
+ REQUIRE(view.get_quantile(0.1875, true) == 10);
178
+ REQUIRE(view.get_quantile(0.25, true) == 10);
179
+ REQUIRE(view.get_quantile(0.3125, true) == 20);
180
+ REQUIRE(view.get_quantile(0.375, true) == 20);
181
+ REQUIRE(view.get_quantile(0.4375, true) == 20);
182
+ REQUIRE(view.get_quantile(0.5, true) == 20);
183
+ REQUIRE(view.get_quantile(0.5625, true) == 30);
184
+ REQUIRE(view.get_quantile(0.625, true) == 30);
185
+ REQUIRE(view.get_quantile(0.6875, true) == 30);
186
+ REQUIRE(view.get_quantile(0.75, true) == 30);
187
+ REQUIRE(view.get_quantile(0.8125, true) == 40);
188
+ REQUIRE(view.get_quantile(0.875, true) == 40);
189
+ REQUIRE(view.get_quantile(0.9375, true) == 40);
190
+ REQUIRE(view.get_quantile(1, true) == 40);
191
+
192
+ REQUIRE(view.get_quantile(0, false) == 10);
193
+ REQUIRE(view.get_quantile(0.0625, false) == 10);
194
+ REQUIRE(view.get_quantile(0.125, false) == 10);
195
+ REQUIRE(view.get_quantile(0.1875, false) == 10);
196
+ REQUIRE(view.get_quantile(0.25, false) == 20);
197
+ REQUIRE(view.get_quantile(0.3125, false) == 20);
198
+ REQUIRE(view.get_quantile(0.375, false) == 20);
199
+ REQUIRE(view.get_quantile(0.4375, false) == 20);
200
+ REQUIRE(view.get_quantile(0.5, false) == 30);
201
+ REQUIRE(view.get_quantile(0.5625, false) == 30);
202
+ REQUIRE(view.get_quantile(0.625, false) == 30);
203
+ REQUIRE(view.get_quantile(0.6875, false) == 30);
204
+ REQUIRE(view.get_quantile(0.75, false) == 40);
205
+ REQUIRE(view.get_quantile(0.8125, false) == 40);
206
+ REQUIRE(view.get_quantile(0.875, false) == 40);
207
+ REQUIRE(view.get_quantile(0.9375, false) == 40);
208
+ REQUIRE(view.get_quantile(1, false) == 40);
209
+ }
210
+
211
+ TEST_CASE("set 3", "sorted view") {
212
+ auto view = quantiles_sorted_view<float, std::less<float>, std::allocator<float>>(8, std::less<float>(), std::allocator<float>());
213
+ std::vector<float> l1 {10, 20, 20, 30, 30, 30, 40, 50};
214
+ view.add(l1.begin(), l1.end(), 2);
215
+ view.convert_to_cummulative();
216
+ REQUIRE(view.size() == 8);
217
+
218
+ auto it = view.begin();
219
+ REQUIRE(it->first == 10);
220
+ REQUIRE(it->second == 2);
221
+ REQUIRE(it.get_weight() == 2);
222
+ ++it;
223
+ REQUIRE(it->first == 20);
224
+ REQUIRE(it->second == 4);
225
+ REQUIRE(it.get_weight() == 2);
226
+ ++it;
227
+ REQUIRE(it->first == 20);
228
+ REQUIRE(it->second == 6);
229
+ REQUIRE(it.get_weight() == 2);
230
+ ++it;
231
+ REQUIRE(it->first == 30);
232
+ REQUIRE(it->second == 8);
233
+ REQUIRE(it.get_weight() == 2);
234
+ ++it;
235
+ REQUIRE(it->first == 30);
236
+ REQUIRE(it->second == 10);
237
+ REQUIRE(it.get_weight() == 2);
238
+ ++it;
239
+ REQUIRE(it->first == 30);
240
+ REQUIRE(it->second == 12);
241
+ REQUIRE(it.get_weight() == 2);
242
+ ++it;
243
+ REQUIRE(it->first == 40);
244
+ REQUIRE(it->second == 14);
245
+ REQUIRE(it.get_weight() == 2);
246
+ ++it;
247
+ REQUIRE(it->first == 50);
248
+ REQUIRE(it->second == 16);
249
+ REQUIRE(it.get_weight() == 2);
250
+
251
+ REQUIRE(view.get_rank(5, true) == 0);
252
+ REQUIRE(view.get_rank(10, true) == 0.125);
253
+ REQUIRE(view.get_rank(15, true) == 0.125);
254
+ REQUIRE(view.get_rank(20, true) == 0.375);
255
+ REQUIRE(view.get_rank(25, true) == 0.375);
256
+ REQUIRE(view.get_rank(30, true) == 0.75);
257
+ REQUIRE(view.get_rank(35, true) == 0.75);
258
+ REQUIRE(view.get_rank(40, true) == 0.875);
259
+ REQUIRE(view.get_rank(45, true) == 0.875);
260
+ REQUIRE(view.get_rank(50, true) == 1);
261
+ REQUIRE(view.get_rank(55, true) == 1);
262
+
263
+ REQUIRE(view.get_rank(5, false) == 0);
264
+ REQUIRE(view.get_rank(10, false) == 0);
265
+ REQUIRE(view.get_rank(15, false) == 0.125);
266
+ REQUIRE(view.get_rank(20, false) == 0.125);
267
+ REQUIRE(view.get_rank(25, false) == 0.375);
268
+ REQUIRE(view.get_rank(30, false) == 0.375);
269
+ REQUIRE(view.get_rank(35, false) == 0.75);
270
+ REQUIRE(view.get_rank(40, false) == 0.75);
271
+ REQUIRE(view.get_rank(45, false) == 0.875);
272
+ REQUIRE(view.get_rank(50, false) == 0.875);
273
+ REQUIRE(view.get_rank(55, false) == 1);
274
+
275
+ REQUIRE(view.get_quantile(0, true) == 10);
276
+ REQUIRE(view.get_quantile(0.03125, true) == 10);
277
+ REQUIRE(view.get_quantile(0.0625, true) == 10);
278
+ REQUIRE(view.get_quantile(0.09375, true) == 10);
279
+ REQUIRE(view.get_quantile(0.125, true) == 10);
280
+ REQUIRE(view.get_quantile(0.15625, true) == 20);
281
+ REQUIRE(view.get_quantile(0.1875, true) == 20);
282
+ REQUIRE(view.get_quantile(0.21875, true) == 20);
283
+ REQUIRE(view.get_quantile(0.25, true) == 20);
284
+ REQUIRE(view.get_quantile(0.28125, true) == 20);
285
+ REQUIRE(view.get_quantile(0.3125, true) == 20);
286
+ REQUIRE(view.get_quantile(0.34375, true) == 20);
287
+ REQUIRE(view.get_quantile(0.375, true) == 20);
288
+ REQUIRE(view.get_quantile(0.40625, true) == 30);
289
+ REQUIRE(view.get_quantile(0.4375, true) == 30);
290
+ REQUIRE(view.get_quantile(0.46875, true) == 30);
291
+ REQUIRE(view.get_quantile(0.5, true) == 30);
292
+ REQUIRE(view.get_quantile(0.53125, true) == 30);
293
+ REQUIRE(view.get_quantile(0.5625, true) == 30);
294
+ REQUIRE(view.get_quantile(0.59375, true) == 30);
295
+ REQUIRE(view.get_quantile(0.625, true) == 30);
296
+ REQUIRE(view.get_quantile(0.65625, true) == 30);
297
+ REQUIRE(view.get_quantile(0.6875, true) == 30);
298
+ REQUIRE(view.get_quantile(0.71875, true) == 30);
299
+ REQUIRE(view.get_quantile(0.75, true) == 30);
300
+ REQUIRE(view.get_quantile(0.78125, true) == 40);
301
+ REQUIRE(view.get_quantile(0.8125, true) == 40);
302
+ REQUIRE(view.get_quantile(0.84375, true) == 40);
303
+ REQUIRE(view.get_quantile(0.875, true) == 40);
304
+ REQUIRE(view.get_quantile(0.90625, true) == 50);
305
+ REQUIRE(view.get_quantile(0.9375, true) == 50);
306
+ REQUIRE(view.get_quantile(0.96875, true) == 50);
307
+ REQUIRE(view.get_quantile(1, true) == 50);
308
+
309
+ REQUIRE(view.get_quantile(0, false) == 10);
310
+ REQUIRE(view.get_quantile(0.03125, false) == 10);
311
+ REQUIRE(view.get_quantile(0.0625, false) == 10);
312
+ REQUIRE(view.get_quantile(0.09375, false) == 10);
313
+ REQUIRE(view.get_quantile(0.125, false) == 20);
314
+ REQUIRE(view.get_quantile(0.15625, false) == 20);
315
+ REQUIRE(view.get_quantile(0.1875, false) == 20);
316
+ REQUIRE(view.get_quantile(0.21875, false) == 20);
317
+ REQUIRE(view.get_quantile(0.25, false) == 20);
318
+ REQUIRE(view.get_quantile(0.28125, false) == 20);
319
+ REQUIRE(view.get_quantile(0.3125, false) == 20);
320
+ REQUIRE(view.get_quantile(0.34375, false) == 20);
321
+ REQUIRE(view.get_quantile(0.375, false) == 30);
322
+ REQUIRE(view.get_quantile(0.40625, false) == 30);
323
+ REQUIRE(view.get_quantile(0.4375, false) == 30);
324
+ REQUIRE(view.get_quantile(0.46875, false) == 30);
325
+ REQUIRE(view.get_quantile(0.5, false) == 30);
326
+ REQUIRE(view.get_quantile(0.53125, false) == 30);
327
+ REQUIRE(view.get_quantile(0.5625, false) == 30);
328
+ REQUIRE(view.get_quantile(0.59375, false) == 30);
329
+ REQUIRE(view.get_quantile(0.625, false) == 30);
330
+ REQUIRE(view.get_quantile(0.65625, false) == 30);
331
+ REQUIRE(view.get_quantile(0.6875, false) == 30);
332
+ REQUIRE(view.get_quantile(0.71875, false) == 30);
333
+ REQUIRE(view.get_quantile(0.75, false) == 40);
334
+ REQUIRE(view.get_quantile(0.78125, false) == 40);
335
+ REQUIRE(view.get_quantile(0.8125, false) == 40);
336
+ REQUIRE(view.get_quantile(0.84375, false) == 40);
337
+ REQUIRE(view.get_quantile(0.875, false) == 50);
338
+ REQUIRE(view.get_quantile(0.90625, false) == 50);
339
+ REQUIRE(view.get_quantile(0.9375, false) == 50);
340
+ REQUIRE(view.get_quantile(0.96875, false) == 50);
341
+ REQUIRE(view.get_quantile(1, false) == 50);
342
+ }
343
+
344
+ TEST_CASE("set 4", "sorted view") {
345
+ auto view = quantiles_sorted_view<float, std::less<float>, std::allocator<float>>(8, std::less<float>(), std::allocator<float>());
346
+ std::vector<float> l1 {10, 20, 30, 40};
347
+ view.add(l1.begin(), l1.end(), 2);
348
+ std::vector<float> l0 {10, 20, 30, 40};
349
+ view.add(l0.begin(), l0.end(), 1);
350
+ view.convert_to_cummulative();
351
+ REQUIRE(view.size() == 8);
352
+
353
+ auto it = view.begin();
354
+ REQUIRE(it->first == 10);
355
+ REQUIRE(it->second == 2);
356
+ REQUIRE(it.get_weight() == 2);
357
+ ++it;
358
+ REQUIRE(it->first == 10);
359
+ REQUIRE(it->second == 3);
360
+ REQUIRE(it.get_weight() == 1);
361
+ ++it;
362
+ REQUIRE(it->first == 20);
363
+ REQUIRE(it->second == 5);
364
+ REQUIRE(it.get_weight() == 2);
365
+ ++it;
366
+ REQUIRE(it->first == 20);
367
+ REQUIRE(it->second == 6);
368
+ REQUIRE(it.get_weight() == 1);
369
+ ++it;
370
+ REQUIRE(it->first == 30);
371
+ REQUIRE(it->second == 8);
372
+ REQUIRE(it.get_weight() == 2);
373
+ ++it;
374
+ REQUIRE(it->first == 30);
375
+ REQUIRE(it->second == 9);
376
+ REQUIRE(it.get_weight() == 1);
377
+ ++it;
378
+ REQUIRE(it->first == 40);
379
+ REQUIRE(it->second == 11);
380
+ REQUIRE(it.get_weight() == 2);
381
+ ++it;
382
+ REQUIRE(it->first == 40);
383
+ REQUIRE(it->second == 12);
384
+ REQUIRE(it.get_weight() == 1);
385
+
386
+ REQUIRE(view.get_rank(5, true) == 0);
387
+ REQUIRE(view.get_rank(10, true) == 0.25);
388
+ REQUIRE(view.get_rank(15, true) == 0.25);
389
+ REQUIRE(view.get_rank(20, true) == 0.5);
390
+ REQUIRE(view.get_rank(25, true) == 0.5);
391
+ REQUIRE(view.get_rank(30, true) == 0.75);
392
+ REQUIRE(view.get_rank(35, true) == 0.75);
393
+ REQUIRE(view.get_rank(40, true) == 1);
394
+ REQUIRE(view.get_rank(45, true) == 1);
395
+
396
+ REQUIRE(view.get_rank(5, false) == 0);
397
+ REQUIRE(view.get_rank(10, false) == 0);
398
+ REQUIRE(view.get_rank(15, false) == 0.25);
399
+ REQUIRE(view.get_rank(20, false) == 0.25);
400
+ REQUIRE(view.get_rank(25, false) == 0.5);
401
+ REQUIRE(view.get_rank(30, false) == 0.5);
402
+ REQUIRE(view.get_rank(35, false) == 0.75);
403
+ REQUIRE(view.get_rank(40, false) == 0.75);
404
+ REQUIRE(view.get_rank(45, false) == 1);
405
+
406
+ REQUIRE(view.get_quantile(0, true) == 10);
407
+ REQUIRE(view.get_quantile(0.0417, true) == 10);
408
+ REQUIRE(view.get_quantile(0.0833, true) == 10);
409
+ REQUIRE(view.get_quantile(0.125, true) == 10);
410
+ REQUIRE(view.get_quantile(0.1667, true) == 10);
411
+ REQUIRE(view.get_quantile(0.2083, true) == 10);
412
+ REQUIRE(view.get_quantile(0.25, true) == 10);
413
+ REQUIRE(view.get_quantile(0.2917, true) == 20);
414
+ REQUIRE(view.get_quantile(0.3333, true) == 20);
415
+ REQUIRE(view.get_quantile(0.375, true) == 20);
416
+ REQUIRE(view.get_quantile(0.4167, true) == 20);
417
+ REQUIRE(view.get_quantile(0.4583, true) == 20);
418
+ REQUIRE(view.get_quantile(0.5, true) == 20);
419
+ REQUIRE(view.get_quantile(0.5417, true) == 30);
420
+ REQUIRE(view.get_quantile(0.5833, true) == 30);
421
+ REQUIRE(view.get_quantile(0.625, true) == 30);
422
+ REQUIRE(view.get_quantile(0.6667, true) == 30);
423
+ REQUIRE(view.get_quantile(0.7083, true) == 30);
424
+ REQUIRE(view.get_quantile(0.75, true) == 30);
425
+ REQUIRE(view.get_quantile(0.7917, true) == 40);
426
+ REQUIRE(view.get_quantile(0.8333, true) == 40);
427
+ REQUIRE(view.get_quantile(0.875, true) == 40);
428
+ REQUIRE(view.get_quantile(0.9167, true) == 40);
429
+ REQUIRE(view.get_quantile(0.9583, true) == 40);
430
+ REQUIRE(view.get_quantile(1, true) == 40);
431
+
432
+ REQUIRE(view.get_quantile(0, false) == 10);
433
+ REQUIRE(view.get_quantile(0.0417, false) == 10);
434
+ REQUIRE(view.get_quantile(0.0833, false) == 10);
435
+ REQUIRE(view.get_quantile(0.125, false) == 10);
436
+ REQUIRE(view.get_quantile(0.1667, false) == 10);
437
+ REQUIRE(view.get_quantile(0.2083, false) == 10);
438
+ REQUIRE(view.get_quantile(0.25, false) == 20);
439
+ REQUIRE(view.get_quantile(0.2917, false) == 20);
440
+ REQUIRE(view.get_quantile(0.3333, false) == 20);
441
+ REQUIRE(view.get_quantile(0.375, false) == 20);
442
+ REQUIRE(view.get_quantile(0.4167, false) == 20);
443
+ REQUIRE(view.get_quantile(0.4583, false) == 20);
444
+ REQUIRE(view.get_quantile(0.5, false) == 30);
445
+ REQUIRE(view.get_quantile(0.5417, false) == 30);
446
+ REQUIRE(view.get_quantile(0.5833, false) == 30);
447
+ REQUIRE(view.get_quantile(0.625, false) == 30);
448
+ REQUIRE(view.get_quantile(0.6667, false) == 30);
449
+ REQUIRE(view.get_quantile(0.7083, false) == 30);
450
+ REQUIRE(view.get_quantile(0.75, false) == 40);
451
+ REQUIRE(view.get_quantile(0.7917, false) == 40);
452
+ REQUIRE(view.get_quantile(0.8333, false) == 40);
453
+ REQUIRE(view.get_quantile(0.875, false) == 40);
454
+ REQUIRE(view.get_quantile(0.9167, false) == 40);
455
+ REQUIRE(view.get_quantile(0.9583, false) == 40);
456
+ REQUIRE(view.get_quantile(1, false) == 40);
457
+ }
458
+
459
+ } /* namespace datasketches */
@@ -17,7 +17,7 @@
17
17
 
18
18
  add_executable(cpc_test)
19
19
 
20
- target_link_libraries(cpc_test cpc common_test)
20
+ target_link_libraries(cpc_test cpc common_test_lib)
21
21
 
22
22
  set_target_properties(cpc_test PROPERTIES
23
23
  CXX_STANDARD 11
@@ -34,7 +34,7 @@ namespace datasketches {
34
34
 
35
35
  /*
36
36
  * Based on Java implementation here:
37
- * https://github.com/DataSketches/sketches-core/blob/master/src/main/java/com/yahoo/sketches/frequencies/ItemsSketch.java
37
+ * https://github.com/apache/datasketches-java/blob/master/src/main/java/org/apache/datasketches/frequencies/ItemsSketch.java
38
38
  * author Alexander Saydakov
39
39
  */
40
40
 
@@ -46,7 +46,6 @@ template<
46
46
  typename W = uint64_t,
47
47
  typename H = std::hash<T>,
48
48
  typename E = std::equal_to<T>,
49
- typename S = serde<T>, // deprecated, to be removed in the next major version
50
49
  typename A = std::allocator<T>
51
50
  >
52
51
  class frequent_items_sketch {
@@ -60,11 +59,13 @@ public:
60
59
  * @param lg_max_map_size Log2 of the physical size of the internal hash map managed by this
61
60
  * sketch. The maximum capacity of this internal hash map is 0.75 times 2^lg_max_map_size.
62
61
  * Both the ultimate accuracy and size of this sketch are functions of lg_max_map_size.
63
- *
64
62
  * @param lg_start_map_size Log2 of the starting physical size of the internal hash
65
63
  * map managed by this sketch.
64
+ * @param equal instance of Equality operator
65
+ * @param allocator instance of an Allocator
66
66
  */
67
- explicit frequent_items_sketch(uint8_t lg_max_map_size, uint8_t lg_start_map_size = LG_MIN_MAP_SIZE, const A& allocator = A());
67
+ explicit frequent_items_sketch(uint8_t lg_max_map_size, uint8_t lg_start_map_size = LG_MIN_MAP_SIZE,
68
+ const E& equal = E(), const A& allocator = A());
68
69
 
69
70
  /**
70
71
  * Update this sketch with an item and a positive weight (frequency count).
@@ -158,7 +159,7 @@ public:
158
159
  /**
159
160
  * Returns epsilon used to compute <i>a priori</i> error.
160
161
  * This is just the value <i>3.5 / maxMapSize</i>.
161
- * @param maxMapSize the planned map size to be used when constructing this sketch.
162
+ * @param lg_max_map_size the planned map size to be used when constructing this sketch.
162
163
  * @return epsilon used to compute <i>a priori</i> error.
163
164
  */
164
165
  static double get_epsilon(uint8_t lg_max_map_size);
@@ -167,13 +168,13 @@ public:
167
168
  * Returns the estimated <i>a priori</i> error given the max_map_size for the sketch and the
168
169
  * estimated_total_stream_weight.
169
170
  * @param lg_max_map_size the planned map size to be used when constructing this sketch.
170
- * @param estimated_total_stream_weight the estimated total stream weight.
171
+ * @param estimated_total_weight the estimated total stream weight.
171
172
  * @return the estimated <i>a priori</i> error.
172
173
  */
173
174
  static double get_apriori_error(uint8_t lg_max_map_size, W estimated_total_weight);
174
175
 
175
176
  class row;
176
- typedef typename std::vector<row, typename std::allocator_traits<A>::template rebind_alloc<row>> vector_row; // alias for users
177
+ using vector_row = typename std::vector<row, typename std::allocator_traits<A>::template rebind_alloc<row>>;
177
178
 
178
179
  /**
179
180
  * Returns an array of rows that include frequent items, estimates, upper and lower bounds
@@ -225,18 +226,18 @@ public:
225
226
  /**
226
227
  * Computes size needed to serialize the current state of the sketch.
227
228
  * This can be expensive since every item needs to be looked at.
228
- * @param instance of a SerDe
229
+ * @param sd instance of a SerDe
229
230
  * @return size in bytes needed to serialize this sketch
230
231
  */
231
- template<typename SerDe = S>
232
+ template<typename SerDe = serde<T>>
232
233
  size_t get_serialized_size_bytes(const SerDe& sd = SerDe()) const;
233
234
 
234
235
  /**
235
236
  * This method serializes the sketch into a given stream in a binary form
236
237
  * @param os output stream
237
- * @param instance of a SerDe
238
+ * @param sd instance of a SerDe
238
239
  */
239
- template<typename SerDe = S>
240
+ template<typename SerDe = serde<T>>
240
241
  void serialize(std::ostream& os, const SerDe& sd = SerDe()) const;
241
242
 
242
243
  // This is a convenience alias for users
@@ -249,53 +250,36 @@ public:
249
250
  * It is a blank space of a given size.
250
251
  * This header is used in Datasketches PostgreSQL extension.
251
252
  * @param header_size_bytes space to reserve in front of the sketch
252
- * @param instance of a SerDe
253
+ * @param sd instance of a SerDe
253
254
  * @return serialized sketch as a vector of bytes
254
255
  */
255
- template<typename SerDe = S>
256
+ template<typename SerDe = serde<T>>
256
257
  vector_bytes serialize(unsigned header_size_bytes = 0, const SerDe& sd = SerDe()) const;
257
258
 
258
259
  /**
259
260
  * This method deserializes a sketch from a given stream.
260
261
  * @param is input stream
261
- * @param instance of an Allocator
262
- * @return an instance of the sketch
263
- *
264
- * Deprecated, to be removed in the next major version
265
- */
266
- static frequent_items_sketch deserialize(std::istream& is, const A& allocator = A());
267
-
268
- /**
269
- * This method deserializes a sketch from a given stream.
270
- * @param is input stream
271
- * @param instance of a SerDe
272
- * @param instance of an Allocator
262
+ * @param sd instance of a SerDe
263
+ * @param equal instance of Equality operator
264
+ * @param allocator instance of an Allocator
273
265
  * @return an instance of the sketch
274
266
  */
275
- template<typename SerDe = S>
276
- static frequent_items_sketch deserialize(std::istream& is, const SerDe& sd = SerDe(), const A& allocator = A());
277
-
278
- /**
279
- * This method deserializes a sketch from a given array of bytes.
280
- * @param bytes pointer to the array of bytes
281
- * @param size the size of the array
282
- * @param instance of an Allocator
283
- * @return an instance of the sketch
284
- *
285
- * Deprecated, to be removed in the next major version
286
- */
287
- static frequent_items_sketch deserialize(const void* bytes, size_t size, const A& allocator = A());
267
+ template<typename SerDe = serde<T>>
268
+ static frequent_items_sketch deserialize(std::istream& is, const SerDe& sd = SerDe(),
269
+ const E& equal = E(), const A& allocator = A());
288
270
 
289
271
  /**
290
272
  * This method deserializes a sketch from a given array of bytes.
291
273
  * @param bytes pointer to the array of bytes
292
274
  * @param size the size of the array
293
- * @param instance of a SerDe
294
- * @param instance of an Allocator
275
+ * @param sd instance of a SerDe
276
+ * @param equal instance of Equality operator
277
+ * @param allocator instance of an Allocator
295
278
  * @return an instance of the sketch
296
279
  */
297
- template<typename SerDe = S>
298
- static frequent_items_sketch deserialize(const void* bytes, size_t size, const SerDe& sd = SerDe(), const A& allocator = A());
280
+ template<typename SerDe = serde<T>>
281
+ static frequent_items_sketch deserialize(const void* bytes, size_t size, const SerDe& sd = SerDe(),
282
+ const E& equal = E(), const A& allocator = A());
299
283
 
300
284
  /**
301
285
  * Returns a human readable summary of this sketch
@@ -334,8 +318,8 @@ private:
334
318
  class items_deleter;
335
319
  };
336
320
 
337
- template<typename T, typename W, typename H, typename E, typename S, typename A>
338
- class frequent_items_sketch<T, W, H, E, S, A>::row {
321
+ template<typename T, typename W, typename H, typename E, typename A>
322
+ class frequent_items_sketch<T, W, H, E, A>::row {
339
323
  public:
340
324
  row(const T* item, W weight, W offset):
341
325
  item(item), weight(weight), offset(offset) {}