datasketches 0.2.7 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +5 -0
- data/ext/datasketches/kll_wrapper.cpp +20 -20
- data/ext/datasketches/theta_wrapper.cpp +2 -2
- data/lib/datasketches/version.rb +1 -1
- data/vendor/datasketches-cpp/CMakeLists.txt +9 -1
- data/vendor/datasketches-cpp/MANIFEST.in +21 -2
- data/vendor/datasketches-cpp/common/CMakeLists.txt +5 -2
- data/vendor/datasketches-cpp/common/include/common_defs.hpp +10 -0
- data/vendor/datasketches-cpp/common/include/kolmogorov_smirnov_impl.hpp +6 -6
- data/vendor/datasketches-cpp/common/include/memory_operations.hpp +1 -0
- data/vendor/datasketches-cpp/common/include/{quantile_sketch_sorted_view.hpp → quantiles_sorted_view.hpp} +60 -25
- data/vendor/datasketches-cpp/common/include/quantiles_sorted_view_impl.hpp +125 -0
- data/vendor/datasketches-cpp/common/include/version.hpp.in +36 -0
- data/vendor/datasketches-cpp/common/test/CMakeLists.txt +25 -6
- data/vendor/datasketches-cpp/common/test/quantiles_sorted_view_test.cpp +459 -0
- data/vendor/datasketches-cpp/cpc/test/CMakeLists.txt +1 -1
- data/vendor/datasketches-cpp/fi/include/frequent_items_sketch.hpp +28 -44
- data/vendor/datasketches-cpp/fi/include/frequent_items_sketch_impl.hpp +70 -78
- data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map.hpp +11 -4
- data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map_impl.hpp +16 -9
- data/vendor/datasketches-cpp/fi/test/CMakeLists.txt +1 -1
- data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_custom_type_test.cpp +54 -41
- data/vendor/datasketches-cpp/fi/test/reverse_purge_hash_map_test.cpp +3 -3
- data/vendor/datasketches-cpp/hll/include/Hll4Array-internal.hpp +2 -2
- data/vendor/datasketches-cpp/hll/test/CMakeLists.txt +1 -1
- data/vendor/datasketches-cpp/kll/include/kll_helper.hpp +0 -32
- data/vendor/datasketches-cpp/kll/include/kll_sketch.hpp +176 -233
- data/vendor/datasketches-cpp/kll/include/kll_sketch_impl.hpp +337 -395
- data/vendor/datasketches-cpp/kll/test/CMakeLists.txt +1 -1
- data/vendor/datasketches-cpp/kll/test/kll_sketch_custom_type_test.cpp +26 -26
- data/vendor/datasketches-cpp/kll/test/kll_sketch_test.cpp +196 -232
- data/vendor/datasketches-cpp/kll/test/kll_sketch_validation.cpp +41 -31
- data/vendor/datasketches-cpp/pyproject.toml +17 -12
- data/vendor/datasketches-cpp/python/CMakeLists.txt +8 -1
- data/vendor/datasketches-cpp/python/datasketches/PySerDe.py +104 -0
- data/vendor/datasketches-cpp/python/datasketches/__init__.py +22 -0
- data/vendor/datasketches-cpp/python/include/py_serde.hpp +113 -0
- data/vendor/datasketches-cpp/python/jupyter/ThetaSketchNotebook.ipynb +31 -24
- data/vendor/datasketches-cpp/python/pybind11Path.cmd +18 -0
- data/vendor/datasketches-cpp/python/src/__init__.py +17 -1
- data/vendor/datasketches-cpp/python/src/datasketches.cpp +9 -3
- data/vendor/datasketches-cpp/python/src/kll_wrapper.cpp +18 -54
- data/vendor/datasketches-cpp/python/src/py_serde.cpp +111 -0
- data/vendor/datasketches-cpp/python/src/quantiles_wrapper.cpp +17 -53
- data/vendor/datasketches-cpp/python/src/req_wrapper.cpp +17 -55
- data/vendor/datasketches-cpp/python/src/vector_of_kll.cpp +62 -67
- data/vendor/datasketches-cpp/python/src/vo_wrapper.cpp +47 -14
- data/vendor/datasketches-cpp/python/tests/__init__.py +16 -0
- data/vendor/datasketches-cpp/python/tests/req_test.py +1 -1
- data/vendor/datasketches-cpp/python/tests/vo_test.py +25 -1
- data/vendor/datasketches-cpp/quantiles/include/quantiles_sketch.hpp +135 -180
- data/vendor/datasketches-cpp/quantiles/include/quantiles_sketch_impl.hpp +205 -210
- data/vendor/datasketches-cpp/quantiles/test/CMakeLists.txt +1 -1
- data/vendor/datasketches-cpp/quantiles/test/quantiles_compatibility_test.cpp +19 -18
- data/vendor/datasketches-cpp/quantiles/test/quantiles_sketch_test.cpp +240 -232
- data/vendor/datasketches-cpp/req/include/req_compactor.hpp +15 -9
- data/vendor/datasketches-cpp/req/include/req_compactor_impl.hpp +35 -19
- data/vendor/datasketches-cpp/req/include/req_sketch.hpp +126 -147
- data/vendor/datasketches-cpp/req/include/req_sketch_impl.hpp +265 -245
- data/vendor/datasketches-cpp/req/test/CMakeLists.txt +1 -1
- data/vendor/datasketches-cpp/req/test/req_sketch_custom_type_test.cpp +26 -26
- data/vendor/datasketches-cpp/req/test/req_sketch_test.cpp +116 -103
- data/vendor/datasketches-cpp/sampling/include/var_opt_sketch.hpp +22 -46
- data/vendor/datasketches-cpp/sampling/include/var_opt_sketch_impl.hpp +180 -207
- data/vendor/datasketches-cpp/sampling/include/var_opt_union.hpp +18 -39
- data/vendor/datasketches-cpp/sampling/include/var_opt_union_impl.hpp +75 -85
- data/vendor/datasketches-cpp/sampling/test/CMakeLists.txt +1 -1
- data/vendor/datasketches-cpp/sampling/test/var_opt_allocation_test.cpp +6 -6
- data/vendor/datasketches-cpp/sampling/test/var_opt_sketch_test.cpp +2 -2
- data/vendor/datasketches-cpp/sampling/test/var_opt_union_test.cpp +4 -4
- data/vendor/datasketches-cpp/setup.py +14 -2
- data/vendor/datasketches-cpp/theta/include/theta_sketch_impl.hpp +15 -25
- data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base.hpp +0 -9
- data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base_impl.hpp +5 -5
- data/vendor/datasketches-cpp/theta/test/CMakeLists.txt +1 -1
- data/vendor/datasketches-cpp/theta/test/theta_sketch_test.cpp +2 -1
- data/vendor/datasketches-cpp/tox.ini +26 -0
- data/vendor/datasketches-cpp/tuple/include/tuple_sketch.hpp +36 -12
- data/vendor/datasketches-cpp/tuple/include/tuple_sketch_impl.hpp +16 -4
- data/vendor/datasketches-cpp/tuple/test/CMakeLists.txt +2 -1
- data/vendor/datasketches-cpp/tuple/test/engagement_test.cpp +299 -0
- data/vendor/datasketches-cpp/tuple/test/tuple_sketch_test.cpp +26 -0
- data/vendor/datasketches-cpp/version.cfg.in +1 -0
- metadata +14 -5
- data/vendor/datasketches-cpp/common/include/quantile_sketch_sorted_view_impl.hpp +0 -91
|
@@ -42,12 +42,12 @@ namespace datasketches {
|
|
|
42
42
|
* author Kevin Lang
|
|
43
43
|
* author Jon Malkin
|
|
44
44
|
*/
|
|
45
|
-
template<typename T, typename
|
|
46
|
-
var_opt_sketch<T,
|
|
47
|
-
var_opt_sketch
|
|
45
|
+
template<typename T, typename A>
|
|
46
|
+
var_opt_sketch<T, A>::var_opt_sketch(uint32_t k, resize_factor rf, const A& allocator) :
|
|
47
|
+
var_opt_sketch(k, rf, false, allocator) {}
|
|
48
48
|
|
|
49
|
-
template<typename T, typename
|
|
50
|
-
var_opt_sketch<T,
|
|
49
|
+
template<typename T, typename A>
|
|
50
|
+
var_opt_sketch<T, A>::var_opt_sketch(const var_opt_sketch& other) :
|
|
51
51
|
k_(other.k_),
|
|
52
52
|
h_(other.h_),
|
|
53
53
|
m_(other.m_),
|
|
@@ -83,8 +83,8 @@ var_opt_sketch<T,S,A>::var_opt_sketch(const var_opt_sketch& other) :
|
|
|
83
83
|
}
|
|
84
84
|
}
|
|
85
85
|
|
|
86
|
-
template<typename T, typename
|
|
87
|
-
var_opt_sketch<T,
|
|
86
|
+
template<typename T, typename A>
|
|
87
|
+
var_opt_sketch<T, A>::var_opt_sketch(const var_opt_sketch& other, bool as_sketch, uint64_t adjusted_n) :
|
|
88
88
|
k_(other.k_),
|
|
89
89
|
h_(other.h_),
|
|
90
90
|
m_(other.m_),
|
|
@@ -120,27 +120,8 @@ var_opt_sketch<T,S,A>::var_opt_sketch(const var_opt_sketch& other, bool as_sketc
|
|
|
120
120
|
}
|
|
121
121
|
}
|
|
122
122
|
|
|
123
|
-
template<typename T, typename
|
|
124
|
-
var_opt_sketch<T,
|
|
125
|
-
uint32_t k, uint64_t n, uint32_t h_count, uint32_t r_count, double total_wt_r, const A& allocator) :
|
|
126
|
-
k_(k),
|
|
127
|
-
h_(h_count),
|
|
128
|
-
m_(0),
|
|
129
|
-
r_(r_count),
|
|
130
|
-
n_(n),
|
|
131
|
-
total_wt_r_(total_wt_r),
|
|
132
|
-
rf_(var_opt_constants::DEFAULT_RESIZE_FACTOR),
|
|
133
|
-
curr_items_alloc_(len),
|
|
134
|
-
filled_data_(n > k),
|
|
135
|
-
allocator_(allocator),
|
|
136
|
-
data_(data),
|
|
137
|
-
weights_(weights),
|
|
138
|
-
num_marks_in_h_(0),
|
|
139
|
-
marks_(nullptr)
|
|
140
|
-
{}
|
|
141
|
-
|
|
142
|
-
template<typename T, typename S, typename A>
|
|
143
|
-
var_opt_sketch<T,S,A>::var_opt_sketch(var_opt_sketch&& other) noexcept :
|
|
123
|
+
template<typename T, typename A>
|
|
124
|
+
var_opt_sketch<T, A>::var_opt_sketch(var_opt_sketch&& other) noexcept :
|
|
144
125
|
k_(other.k_),
|
|
145
126
|
h_(other.h_),
|
|
146
127
|
m_(other.m_),
|
|
@@ -161,8 +142,8 @@ var_opt_sketch<T,S,A>::var_opt_sketch(var_opt_sketch&& other) noexcept :
|
|
|
161
142
|
other.marks_ = nullptr;
|
|
162
143
|
}
|
|
163
144
|
|
|
164
|
-
template<typename T, typename
|
|
165
|
-
var_opt_sketch<T,
|
|
145
|
+
template<typename T, typename A>
|
|
146
|
+
var_opt_sketch<T, A>::var_opt_sketch(uint32_t k, resize_factor rf, bool is_gadget, const A& allocator) :
|
|
166
147
|
k_(k), h_(0), m_(0), r_(0), n_(0), total_wt_r_(0.0), rf_(rf), allocator_(allocator) {
|
|
167
148
|
if (k == 0 || k_ > MAX_K) {
|
|
168
149
|
throw std::invalid_argument("k must be at least 1 and less than 2^31 - 1");
|
|
@@ -179,8 +160,8 @@ var_opt_sketch<T,S,A>::var_opt_sketch(uint32_t k, resize_factor rf, bool is_gadg
|
|
|
179
160
|
num_marks_in_h_ = 0;
|
|
180
161
|
}
|
|
181
162
|
|
|
182
|
-
template<typename T, typename
|
|
183
|
-
var_opt_sketch<T,
|
|
163
|
+
template<typename T, typename A>
|
|
164
|
+
var_opt_sketch<T, A>::var_opt_sketch(uint32_t k, uint32_t h, uint32_t m, uint32_t r, uint64_t n, double total_wt_r, resize_factor rf,
|
|
184
165
|
uint32_t curr_items_alloc, bool filled_data, std::unique_ptr<T, items_deleter> items,
|
|
185
166
|
std::unique_ptr<double, weights_deleter> weights, uint32_t num_marks_in_h,
|
|
186
167
|
std::unique_ptr<bool, marks_deleter> marks, const A& allocator) :
|
|
@@ -201,8 +182,8 @@ var_opt_sketch<T,S,A>::var_opt_sketch(uint32_t k, uint32_t h, uint32_t m, uint32
|
|
|
201
182
|
{}
|
|
202
183
|
|
|
203
184
|
|
|
204
|
-
template<typename T, typename
|
|
205
|
-
var_opt_sketch<T,
|
|
185
|
+
template<typename T, typename A>
|
|
186
|
+
var_opt_sketch<T, A>::~var_opt_sketch() {
|
|
206
187
|
if (data_ != nullptr) {
|
|
207
188
|
if (filled_data_) {
|
|
208
189
|
// destroy everything
|
|
@@ -232,9 +213,9 @@ var_opt_sketch<T,S,A>::~var_opt_sketch() {
|
|
|
232
213
|
}
|
|
233
214
|
}
|
|
234
215
|
|
|
235
|
-
template<typename T, typename
|
|
236
|
-
var_opt_sketch<T,
|
|
237
|
-
var_opt_sketch
|
|
216
|
+
template<typename T, typename A>
|
|
217
|
+
var_opt_sketch<T, A>& var_opt_sketch<T, A>::operator=(const var_opt_sketch& other) {
|
|
218
|
+
var_opt_sketch sk_copy(other);
|
|
238
219
|
std::swap(k_, sk_copy.k_);
|
|
239
220
|
std::swap(h_, sk_copy.h_);
|
|
240
221
|
std::swap(m_, sk_copy.m_);
|
|
@@ -252,8 +233,8 @@ var_opt_sketch<T,S,A>& var_opt_sketch<T,S,A>::operator=(const var_opt_sketch& ot
|
|
|
252
233
|
return *this;
|
|
253
234
|
}
|
|
254
235
|
|
|
255
|
-
template<typename T, typename
|
|
256
|
-
var_opt_sketch<T,
|
|
236
|
+
template<typename T, typename A>
|
|
237
|
+
var_opt_sketch<T, A>& var_opt_sketch<T, A>::operator=(var_opt_sketch&& other) {
|
|
257
238
|
std::swap(k_, other.k_);
|
|
258
239
|
std::swap(h_, other.h_);
|
|
259
240
|
std::swap(m_, other.m_);
|
|
@@ -311,9 +292,9 @@ var_opt_sketch<T,S,A>& var_opt_sketch<T,S,A>::operator=(var_opt_sketch&& other)
|
|
|
311
292
|
*/
|
|
312
293
|
|
|
313
294
|
// implementation for fixed-size arithmetic types (integral and floating point)
|
|
314
|
-
template<typename T, typename
|
|
295
|
+
template<typename T, typename A>
|
|
315
296
|
template<typename TT, typename SerDe, typename std::enable_if<std::is_arithmetic<TT>::value, int>::type>
|
|
316
|
-
size_t var_opt_sketch<T,
|
|
297
|
+
size_t var_opt_sketch<T, A>::get_serialized_size_bytes(const SerDe&) const {
|
|
317
298
|
if (is_empty()) { return PREAMBLE_LONGS_EMPTY << 3; }
|
|
318
299
|
size_t num_bytes = (r_ == 0 ? PREAMBLE_LONGS_WARMUP : PREAMBLE_LONGS_FULL) << 3;
|
|
319
300
|
num_bytes += h_ * sizeof(double); // weights
|
|
@@ -325,9 +306,9 @@ size_t var_opt_sketch<T,S,A>::get_serialized_size_bytes(const SerDe&) const {
|
|
|
325
306
|
}
|
|
326
307
|
|
|
327
308
|
// implementation for all other types
|
|
328
|
-
template<typename T, typename
|
|
309
|
+
template<typename T, typename A>
|
|
329
310
|
template<typename TT, typename SerDe, typename std::enable_if<!std::is_arithmetic<TT>::value, int>::type>
|
|
330
|
-
size_t var_opt_sketch<T,
|
|
311
|
+
size_t var_opt_sketch<T, A>::get_serialized_size_bytes(const SerDe& sd) const {
|
|
331
312
|
if (is_empty()) { return PREAMBLE_LONGS_EMPTY << 3; }
|
|
332
313
|
size_t num_bytes = (r_ == 0 ? PREAMBLE_LONGS_WARMUP : PREAMBLE_LONGS_FULL) << 3;
|
|
333
314
|
num_bytes += h_ * sizeof(double); // weights
|
|
@@ -340,9 +321,9 @@ size_t var_opt_sketch<T,S,A>::get_serialized_size_bytes(const SerDe& sd) const {
|
|
|
340
321
|
return num_bytes;
|
|
341
322
|
}
|
|
342
323
|
|
|
343
|
-
template<typename T, typename
|
|
324
|
+
template<typename T, typename A>
|
|
344
325
|
template<typename SerDe>
|
|
345
|
-
std::vector<uint8_t, AllocU8<A>> var_opt_sketch<T,
|
|
326
|
+
std::vector<uint8_t, AllocU8<A>> var_opt_sketch<T, A>::serialize(unsigned header_size_bytes, const SerDe& sd) const {
|
|
346
327
|
const size_t size = header_size_bytes + get_serialized_size_bytes(sd);
|
|
347
328
|
std::vector<uint8_t, AllocU8<A>> bytes(size, 0, allocator_);
|
|
348
329
|
uint8_t* ptr = bytes.data() + header_size_bytes;
|
|
@@ -414,9 +395,9 @@ std::vector<uint8_t, AllocU8<A>> var_opt_sketch<T,S,A>::serialize(unsigned heade
|
|
|
414
395
|
return bytes;
|
|
415
396
|
}
|
|
416
397
|
|
|
417
|
-
template<typename T, typename
|
|
398
|
+
template<typename T, typename A>
|
|
418
399
|
template<typename SerDe>
|
|
419
|
-
void var_opt_sketch<T,
|
|
400
|
+
void var_opt_sketch<T, A>::serialize(std::ostream& os, const SerDe& sd) const {
|
|
420
401
|
const bool empty = (h_ == 0) && (r_ == 0);
|
|
421
402
|
|
|
422
403
|
const uint8_t preLongs = (empty ? PREAMBLE_LONGS_EMPTY
|
|
@@ -477,14 +458,9 @@ void var_opt_sketch<T,S,A>::serialize(std::ostream& os, const SerDe& sd) const {
|
|
|
477
458
|
}
|
|
478
459
|
}
|
|
479
460
|
|
|
480
|
-
template<typename T, typename
|
|
481
|
-
var_opt_sketch<T,S,A> var_opt_sketch<T,S,A>::deserialize(const void* bytes, size_t size, const A& allocator) {
|
|
482
|
-
return deserialize(bytes, size, S(), allocator);
|
|
483
|
-
}
|
|
484
|
-
|
|
485
|
-
template<typename T, typename S, typename A>
|
|
461
|
+
template<typename T, typename A>
|
|
486
462
|
template<typename SerDe>
|
|
487
|
-
var_opt_sketch<T,
|
|
463
|
+
var_opt_sketch<T, A> var_opt_sketch<T, A>::deserialize(const void* bytes, size_t size, const SerDe& sd, const A& allocator) {
|
|
488
464
|
ensure_minimum_memory(size, 8);
|
|
489
465
|
const char* ptr = static_cast<const char*>(bytes);
|
|
490
466
|
const char* base = ptr;
|
|
@@ -510,7 +486,7 @@ var_opt_sketch<T,S,A> var_opt_sketch<T,S,A>::deserialize(const void* bytes, size
|
|
|
510
486
|
const bool is_gadget = flags & GADGET_FLAG_MASK;
|
|
511
487
|
|
|
512
488
|
if (is_empty) {
|
|
513
|
-
return var_opt_sketch
|
|
489
|
+
return var_opt_sketch(k, rf, is_gadget, allocator);
|
|
514
490
|
}
|
|
515
491
|
|
|
516
492
|
// second and third prelongs
|
|
@@ -578,14 +554,9 @@ var_opt_sketch<T,S,A> var_opt_sketch<T,S,A>::deserialize(const void* bytes, size
|
|
|
578
554
|
std::move(items), std::move(weights), num_marks_in_h, std::move(marks), allocator);
|
|
579
555
|
}
|
|
580
556
|
|
|
581
|
-
template<typename T, typename
|
|
582
|
-
var_opt_sketch<T,S,A> var_opt_sketch<T,S,A>::deserialize(std::istream& is, const A& allocator) {
|
|
583
|
-
return deserialize(is, S(), allocator);
|
|
584
|
-
}
|
|
585
|
-
|
|
586
|
-
template<typename T, typename S, typename A>
|
|
557
|
+
template<typename T, typename A>
|
|
587
558
|
template<typename SerDe>
|
|
588
|
-
var_opt_sketch<T,
|
|
559
|
+
var_opt_sketch<T, A> var_opt_sketch<T, A>::deserialize(std::istream& is, const SerDe& sd, const A& allocator) {
|
|
589
560
|
const auto first_byte = read<uint8_t>(is);
|
|
590
561
|
uint8_t preamble_longs = first_byte & 0x3f;
|
|
591
562
|
const resize_factor rf = static_cast<resize_factor>((first_byte >> 6) & 0x03);
|
|
@@ -604,7 +575,7 @@ var_opt_sketch<T,S,A> var_opt_sketch<T,S,A>::deserialize(std::istream& is, const
|
|
|
604
575
|
if (!is.good())
|
|
605
576
|
throw std::runtime_error("error reading from std::istream");
|
|
606
577
|
else
|
|
607
|
-
return var_opt_sketch
|
|
578
|
+
return var_opt_sketch(k, rf, is_gadget, allocator);
|
|
608
579
|
}
|
|
609
580
|
|
|
610
581
|
// second and third prelongs
|
|
@@ -668,13 +639,13 @@ var_opt_sketch<T,S,A> var_opt_sketch<T,S,A>::deserialize(std::istream& is, const
|
|
|
668
639
|
std::move(items), std::move(weights), num_marks_in_h, std::move(marks), allocator);
|
|
669
640
|
}
|
|
670
641
|
|
|
671
|
-
template<typename T, typename
|
|
672
|
-
bool var_opt_sketch<T,
|
|
642
|
+
template<typename T, typename A>
|
|
643
|
+
bool var_opt_sketch<T, A>::is_empty() const {
|
|
673
644
|
return (h_ == 0 && r_ == 0);
|
|
674
645
|
}
|
|
675
646
|
|
|
676
|
-
template<typename T, typename
|
|
677
|
-
void var_opt_sketch<T,
|
|
647
|
+
template<typename T, typename A>
|
|
648
|
+
void var_opt_sketch<T, A>::reset() {
|
|
678
649
|
const uint32_t prev_alloc = curr_items_alloc_;
|
|
679
650
|
const uint32_t ceiling_lg_k = to_log_2(ceiling_power_of_2(k_));
|
|
680
651
|
const uint32_t initial_lg_size = starting_sub_multiple(ceiling_lg_k, rf_, MIN_LG_ARR_ITEMS);
|
|
@@ -718,34 +689,34 @@ void var_opt_sketch<T,S,A>::reset() {
|
|
|
718
689
|
filled_data_ = false;
|
|
719
690
|
}
|
|
720
691
|
|
|
721
|
-
template<typename T, typename
|
|
722
|
-
uint64_t var_opt_sketch<T,
|
|
692
|
+
template<typename T, typename A>
|
|
693
|
+
uint64_t var_opt_sketch<T, A>::get_n() const {
|
|
723
694
|
return n_;
|
|
724
695
|
}
|
|
725
696
|
|
|
726
|
-
template<typename T, typename
|
|
727
|
-
uint32_t var_opt_sketch<T,
|
|
697
|
+
template<typename T, typename A>
|
|
698
|
+
uint32_t var_opt_sketch<T, A>::get_k() const {
|
|
728
699
|
return k_;
|
|
729
700
|
}
|
|
730
701
|
|
|
731
|
-
template<typename T, typename
|
|
732
|
-
uint32_t var_opt_sketch<T,
|
|
702
|
+
template<typename T, typename A>
|
|
703
|
+
uint32_t var_opt_sketch<T, A>::get_num_samples() const {
|
|
733
704
|
const uint32_t num_in_sketch = h_ + r_;
|
|
734
705
|
return (num_in_sketch < k_ ? num_in_sketch : k_);
|
|
735
706
|
}
|
|
736
707
|
|
|
737
|
-
template<typename T, typename
|
|
738
|
-
void var_opt_sketch<T,
|
|
708
|
+
template<typename T, typename A>
|
|
709
|
+
void var_opt_sketch<T, A>::update(const T& item, double weight) {
|
|
739
710
|
update(item, weight, false);
|
|
740
711
|
}
|
|
741
712
|
|
|
742
|
-
template<typename T, typename
|
|
743
|
-
void var_opt_sketch<T,
|
|
713
|
+
template<typename T, typename A>
|
|
714
|
+
void var_opt_sketch<T, A>::update(T&& item, double weight) {
|
|
744
715
|
update(std::move(item), weight, false);
|
|
745
716
|
}
|
|
746
717
|
|
|
747
|
-
template<typename T, typename
|
|
748
|
-
string<A> var_opt_sketch<T,
|
|
718
|
+
template<typename T, typename A>
|
|
719
|
+
string<A> var_opt_sketch<T, A>::to_string() const {
|
|
749
720
|
// Using a temporary stream for implementation here does not comply with AllocatorAwareContainer requirements.
|
|
750
721
|
// The stream does not support passing an allocator instance, and alternatives are complicated.
|
|
751
722
|
std::ostringstream os;
|
|
@@ -760,8 +731,8 @@ string<A> var_opt_sketch<T,S,A>::to_string() const {
|
|
|
760
731
|
return string<A>(os.str().c_str(), allocator_);
|
|
761
732
|
}
|
|
762
733
|
|
|
763
|
-
template<typename T, typename
|
|
764
|
-
string<A> var_opt_sketch<T,
|
|
734
|
+
template<typename T, typename A>
|
|
735
|
+
string<A> var_opt_sketch<T, A>::items_to_string() const {
|
|
765
736
|
// Using a temporary stream for implementation here does not comply with AllocatorAwareContainer requirements.
|
|
766
737
|
// The stream does not support passing an allocator instance, and alternatives are complicated.
|
|
767
738
|
std::ostringstream os;
|
|
@@ -774,8 +745,8 @@ string<A> var_opt_sketch<T,S,A>::items_to_string() const {
|
|
|
774
745
|
return string<A>(os.str().c_str(), allocator_);
|
|
775
746
|
}
|
|
776
747
|
|
|
777
|
-
template<typename T, typename
|
|
778
|
-
string<A> var_opt_sketch<T,
|
|
748
|
+
template<typename T, typename A>
|
|
749
|
+
string<A> var_opt_sketch<T, A>::items_to_string(bool print_gap) const {
|
|
779
750
|
// Using a temporary stream for implementation here does not comply with AllocatorAwareContainer requirements.
|
|
780
751
|
// The stream does not support passing an allocator instance, and alternatives are complicated.
|
|
781
752
|
std::ostringstream os;
|
|
@@ -798,9 +769,9 @@ string<A> var_opt_sketch<T,S,A>::items_to_string(bool print_gap) const {
|
|
|
798
769
|
return string<A>(os.str().c_str(), allocator_);
|
|
799
770
|
}
|
|
800
771
|
|
|
801
|
-
template<typename T, typename
|
|
772
|
+
template<typename T, typename A>
|
|
802
773
|
template<typename O>
|
|
803
|
-
void var_opt_sketch<T,
|
|
774
|
+
void var_opt_sketch<T, A>::update(O&& item, double weight, bool mark) {
|
|
804
775
|
if (weight < 0.0 || std::isnan(weight) || std::isinf(weight)) {
|
|
805
776
|
throw std::invalid_argument("Item weights must be nonnegative and finite. Found: "
|
|
806
777
|
+ std::to_string(weight));
|
|
@@ -838,9 +809,9 @@ void var_opt_sketch<T,S,A>::update(O&& item, double weight, bool mark) {
|
|
|
838
809
|
}
|
|
839
810
|
}
|
|
840
811
|
|
|
841
|
-
template<typename T, typename
|
|
812
|
+
template<typename T, typename A>
|
|
842
813
|
template<typename O>
|
|
843
|
-
void var_opt_sketch<T,
|
|
814
|
+
void var_opt_sketch<T, A>::update_warmup_phase(O&& item, double weight, bool mark) {
|
|
844
815
|
// seems overly cautious
|
|
845
816
|
if (r_ > 0 || m_ != 0 || h_ > k_) throw std::logic_error("invalid sketch state during warmup");
|
|
846
817
|
|
|
@@ -868,14 +839,15 @@ void var_opt_sketch<T,S,A>::update_warmup_phase(O&& item, double weight, bool ma
|
|
|
868
839
|
would appear to the right of the R items in a hypothetical reverse-sorted
|
|
869
840
|
list. It is easy to prove that it is light enough to be part of this
|
|
870
841
|
round's downsampling */
|
|
871
|
-
template<typename T, typename
|
|
842
|
+
template<typename T, typename A>
|
|
872
843
|
template<typename O>
|
|
873
|
-
void var_opt_sketch<T,
|
|
844
|
+
void var_opt_sketch<T, A>::update_light(O&& item, double weight, bool mark) {
|
|
874
845
|
if (r_ == 0 || (r_ + h_) != k_) throw std::logic_error("invalid sketch state during light warmup");
|
|
875
846
|
|
|
876
847
|
const uint32_t m_slot = h_; // index of the gap, which becomes the M region
|
|
877
848
|
if (filled_data_) {
|
|
878
|
-
data_[m_slot]
|
|
849
|
+
if (&data_[m_slot] != &item)
|
|
850
|
+
data_[m_slot] = std::forward<O>(item);
|
|
879
851
|
} else {
|
|
880
852
|
new (&data_[m_slot]) T(std::forward<O>(item));
|
|
881
853
|
filled_data_ = true;
|
|
@@ -895,9 +867,9 @@ void var_opt_sketch<T,S,A>::update_light(O&& item, double weight, bool mark) {
|
|
|
895
867
|
In other words, it might go into the heap and then come right back out,
|
|
896
868
|
but that should be okay because pseudo_heavy items cannot predominate
|
|
897
869
|
in long streams unless (max wt) / (min wt) > o(exp(N)) */
|
|
898
|
-
template<typename T, typename
|
|
870
|
+
template<typename T, typename A>
|
|
899
871
|
template<typename O>
|
|
900
|
-
void var_opt_sketch<T,
|
|
872
|
+
void var_opt_sketch<T, A>::update_heavy_general(O&& item, double weight, bool mark) {
|
|
901
873
|
if (r_ < 2 || m_ != 0 || (r_ + h_) != k_) throw std::logic_error("invalid sketch state during heavy general update");
|
|
902
874
|
|
|
903
875
|
// put into H, although may come back out momentarily
|
|
@@ -909,9 +881,9 @@ void var_opt_sketch<T,S,A>::update_heavy_general(O&& item, double weight, bool m
|
|
|
909
881
|
/* The analysis of this case is similar to that of the general heavy case.
|
|
910
882
|
The one small technical difference is that since R < 2, we must grab an M item
|
|
911
883
|
to have a valid starting point for continue_by_growing_candidate_set () */
|
|
912
|
-
template<typename T, typename
|
|
884
|
+
template<typename T, typename A>
|
|
913
885
|
template<typename O>
|
|
914
|
-
void var_opt_sketch<T,
|
|
886
|
+
void var_opt_sketch<T, A>::update_heavy_r_eq1(O&& item, double weight, bool mark) {
|
|
915
887
|
if (r_ != 1 || m_ != 0 || (r_ + h_) != k_) throw std::logic_error("invalid sketch state during heavy r=1 update");
|
|
916
888
|
|
|
917
889
|
push(std::forward<O>(item), weight, mark); // new item into H
|
|
@@ -929,8 +901,8 @@ void var_opt_sketch<T,S,A>::update_heavy_r_eq1(O&& item, double weight, bool mar
|
|
|
929
901
|
* <p>Subject to certain pre-conditions, decreasing k causes tau to increase. This fact is used by
|
|
930
902
|
* the unioning algorithm to force "marked" items out of H and into the reservoir region.</p>
|
|
931
903
|
*/
|
|
932
|
-
template<typename T, typename
|
|
933
|
-
void var_opt_sketch<T,
|
|
904
|
+
template<typename T, typename A>
|
|
905
|
+
void var_opt_sketch<T, A>::decrease_k_by_1() {
|
|
934
906
|
if (k_ <= 1) {
|
|
935
907
|
throw std::logic_error("Cannot decrease k below 1 in union");
|
|
936
908
|
}
|
|
@@ -952,9 +924,10 @@ void var_opt_sketch<T,S,A>::decrease_k_by_1() {
|
|
|
952
924
|
// first, slide the R zone to the left by 1, temporarily filling the gap
|
|
953
925
|
const uint32_t old_gap_idx = h_;
|
|
954
926
|
const uint32_t old_final_r_idx = (h_ + 1 + r_) - 1;
|
|
955
|
-
|
|
927
|
+
if (old_final_r_idx != k_) throw std::logic_error("gadget in invalid state");
|
|
956
928
|
|
|
957
929
|
swap_values(old_final_r_idx, old_gap_idx);
|
|
930
|
+
filled_data_ = true; // we just filled the gap, and no need to check previous state
|
|
958
931
|
|
|
959
932
|
// now we pull an item out of H; any item is ok, but if we grab the rightmost and then
|
|
960
933
|
// reduce h_, the heap invariant will be preserved (and the gap will be restored), plus
|
|
@@ -987,8 +960,8 @@ void var_opt_sketch<T,S,A>::decrease_k_by_1() {
|
|
|
987
960
|
}
|
|
988
961
|
}
|
|
989
962
|
|
|
990
|
-
template<typename T, typename
|
|
991
|
-
void var_opt_sketch<T,
|
|
963
|
+
template<typename T, typename A>
|
|
964
|
+
void var_opt_sketch<T, A>::allocate_data_arrays(uint32_t tgt_size, bool use_marks) {
|
|
992
965
|
filled_data_ = false;
|
|
993
966
|
|
|
994
967
|
data_ = allocator_.allocate(tgt_size);
|
|
@@ -1001,8 +974,8 @@ void var_opt_sketch<T,S,A>::allocate_data_arrays(uint32_t tgt_size, bool use_mar
|
|
|
1001
974
|
}
|
|
1002
975
|
}
|
|
1003
976
|
|
|
1004
|
-
template<typename T, typename
|
|
1005
|
-
void var_opt_sketch<T,
|
|
977
|
+
template<typename T, typename A>
|
|
978
|
+
void var_opt_sketch<T, A>::grow_data_arrays() {
|
|
1006
979
|
const uint32_t prev_size = curr_items_alloc_;
|
|
1007
980
|
curr_items_alloc_ = get_adjusted_size(k_, curr_items_alloc_ << rf_);
|
|
1008
981
|
if (curr_items_alloc_ == k_) {
|
|
@@ -1038,8 +1011,8 @@ void var_opt_sketch<T,S,A>::grow_data_arrays() {
|
|
|
1038
1011
|
}
|
|
1039
1012
|
}
|
|
1040
1013
|
|
|
1041
|
-
template<typename T, typename
|
|
1042
|
-
void var_opt_sketch<T,
|
|
1014
|
+
template<typename T, typename A>
|
|
1015
|
+
void var_opt_sketch<T, A>::transition_from_warmup() {
|
|
1043
1016
|
// Move the 2 lightest items from H to M
|
|
1044
1017
|
// But the lighter really belongs in R, so update counts to reflect that
|
|
1045
1018
|
convert_to_heap();
|
|
@@ -1061,8 +1034,8 @@ void var_opt_sketch<T,S,A>::transition_from_warmup() {
|
|
|
1061
1034
|
grow_candidate_set(weights_[k_ - 1] + total_wt_r_, 2);
|
|
1062
1035
|
}
|
|
1063
1036
|
|
|
1064
|
-
template<typename T, typename
|
|
1065
|
-
void var_opt_sketch<T,
|
|
1037
|
+
template<typename T, typename A>
|
|
1038
|
+
void var_opt_sketch<T, A>::convert_to_heap() {
|
|
1066
1039
|
if (h_ < 2) {
|
|
1067
1040
|
return; // nothing to do
|
|
1068
1041
|
}
|
|
@@ -1081,8 +1054,8 @@ void var_opt_sketch<T,S,A>::convert_to_heap() {
|
|
|
1081
1054
|
//}
|
|
1082
1055
|
}
|
|
1083
1056
|
|
|
1084
|
-
template<typename T, typename
|
|
1085
|
-
void var_opt_sketch<T,
|
|
1057
|
+
template<typename T, typename A>
|
|
1058
|
+
void var_opt_sketch<T, A>::restore_towards_leaves(uint32_t slot_in) {
|
|
1086
1059
|
const uint32_t last_slot = h_ - 1;
|
|
1087
1060
|
if (h_ == 0 || slot_in > last_slot) throw std::logic_error("invalid heap state");
|
|
1088
1061
|
|
|
@@ -1109,8 +1082,8 @@ void var_opt_sketch<T,S,A>::restore_towards_leaves(uint32_t slot_in) {
|
|
|
1109
1082
|
}
|
|
1110
1083
|
}
|
|
1111
1084
|
|
|
1112
|
-
template<typename T, typename
|
|
1113
|
-
void var_opt_sketch<T,
|
|
1085
|
+
template<typename T, typename A>
|
|
1086
|
+
void var_opt_sketch<T, A>::restore_towards_root(uint32_t slot_in) {
|
|
1114
1087
|
uint32_t slot = slot_in;
|
|
1115
1088
|
uint32_t p = (((slot + 1) / 2) - 1); // valid if slot >= 1
|
|
1116
1089
|
while ((slot > 0) && (weights_[slot] < weights_[p])) {
|
|
@@ -1120,11 +1093,12 @@ void var_opt_sketch<T,S,A>::restore_towards_root(uint32_t slot_in) {
|
|
|
1120
1093
|
}
|
|
1121
1094
|
}
|
|
1122
1095
|
|
|
1123
|
-
template<typename T, typename
|
|
1096
|
+
template<typename T, typename A>
|
|
1124
1097
|
template<typename O>
|
|
1125
|
-
void var_opt_sketch<T,
|
|
1098
|
+
void var_opt_sketch<T, A>::push(O&& item, double wt, bool mark) {
|
|
1126
1099
|
if (filled_data_) {
|
|
1127
|
-
data_[h_]
|
|
1100
|
+
if (&data_[h_] != &item)
|
|
1101
|
+
data_[h_] = std::forward<O>(item);
|
|
1128
1102
|
} else {
|
|
1129
1103
|
new (&data_[h_]) T(std::forward<O>(item));
|
|
1130
1104
|
filled_data_ = true;
|
|
@@ -1139,8 +1113,8 @@ void var_opt_sketch<T,S,A>::push(O&& item, double wt, bool mark) {
|
|
|
1139
1113
|
restore_towards_root(h_ - 1); // need use old h_, but want accurate h_
|
|
1140
1114
|
}
|
|
1141
1115
|
|
|
1142
|
-
template<typename T, typename
|
|
1143
|
-
void var_opt_sketch<T,
|
|
1116
|
+
template<typename T, typename A>
|
|
1117
|
+
void var_opt_sketch<T, A>::pop_min_to_m_region() {
|
|
1144
1118
|
if (h_ == 0 || (h_ + m_ + r_ != k_ + 1))
|
|
1145
1119
|
throw std::logic_error("invalid heap state popping min to M region");
|
|
1146
1120
|
|
|
@@ -1164,8 +1138,8 @@ void var_opt_sketch<T,S,A>::pop_min_to_m_region() {
|
|
|
1164
1138
|
}
|
|
1165
1139
|
|
|
1166
1140
|
|
|
1167
|
-
template<typename T, typename
|
|
1168
|
-
void var_opt_sketch<T,
|
|
1141
|
+
template<typename T, typename A>
|
|
1142
|
+
void var_opt_sketch<T, A>::swap_values(uint32_t src, uint32_t dst) {
|
|
1169
1143
|
std::swap(data_[src], data_[dst]);
|
|
1170
1144
|
std::swap(weights_[src], weights_[dst]);
|
|
1171
1145
|
|
|
@@ -1182,8 +1156,8 @@ void var_opt_sketch<T,S,A>::swap_values(uint32_t src, uint32_t dst) {
|
|
|
1182
1156
|
of cands is at least 2. We will now grow the candidate set as much as possible
|
|
1183
1157
|
by pulling sufficiently light items from h to m.
|
|
1184
1158
|
*/
|
|
1185
|
-
template<typename T, typename
|
|
1186
|
-
void var_opt_sketch<T,
|
|
1159
|
+
template<typename T, typename A>
|
|
1160
|
+
void var_opt_sketch<T, A>::grow_candidate_set(double wt_cands, uint32_t num_cands) {
|
|
1187
1161
|
if ((h_ + m_ + r_ != k_ + 1) || (num_cands < 1) || (num_cands != m_ + r_) || (m_ >= 2))
|
|
1188
1162
|
throw std::logic_error("invariant violated when growing candidate set");
|
|
1189
1163
|
|
|
@@ -1206,8 +1180,8 @@ void var_opt_sketch<T,S,A>::grow_candidate_set(double wt_cands, uint32_t num_can
|
|
|
1206
1180
|
downsample_candidate_set(wt_cands, num_cands);
|
|
1207
1181
|
}
|
|
1208
1182
|
|
|
1209
|
-
template<typename T, typename
|
|
1210
|
-
void var_opt_sketch<T,
|
|
1183
|
+
template<typename T, typename A>
|
|
1184
|
+
void var_opt_sketch<T, A>::downsample_candidate_set(double wt_cands, uint32_t num_cands) {
|
|
1211
1185
|
if (num_cands < 2 || h_ + num_cands != k_ + 1)
|
|
1212
1186
|
throw std::logic_error("invalid num_cands when downsampling");
|
|
1213
1187
|
|
|
@@ -1225,17 +1199,16 @@ void var_opt_sketch<T,S,A>::downsample_candidate_set(double wt_cands, uint32_t n
|
|
|
1225
1199
|
weights_[j] = -1.0;
|
|
1226
1200
|
}
|
|
1227
1201
|
|
|
1228
|
-
// The next
|
|
1202
|
+
// The next line works even when delete_slot == leftmost_cand_slot
|
|
1229
1203
|
data_[delete_slot] = std::move(data_[leftmost_cand_slot]);
|
|
1230
|
-
// cannot set data_[leftmost_cand_slot] to null since not uisng T*
|
|
1231
1204
|
|
|
1232
1205
|
m_ = 0;
|
|
1233
1206
|
r_ = num_cands - 1;
|
|
1234
1207
|
total_wt_r_ = wt_cands;
|
|
1235
1208
|
}
|
|
1236
1209
|
|
|
1237
|
-
template<typename T, typename
|
|
1238
|
-
uint32_t var_opt_sketch<T,
|
|
1210
|
+
template<typename T, typename A>
|
|
1211
|
+
uint32_t var_opt_sketch<T, A>::choose_delete_slot(double wt_cands, uint32_t num_cands) const {
|
|
1239
1212
|
if (r_ == 0) throw std::logic_error("choosing delete slot while in exact mode");
|
|
1240
1213
|
|
|
1241
1214
|
if (m_ == 0) {
|
|
@@ -1262,8 +1235,8 @@ uint32_t var_opt_sketch<T,S,A>::choose_delete_slot(double wt_cands, uint32_t num
|
|
|
1262
1235
|
}
|
|
1263
1236
|
}
|
|
1264
1237
|
|
|
1265
|
-
template<typename T, typename
|
|
1266
|
-
uint32_t var_opt_sketch<T,
|
|
1238
|
+
template<typename T, typename A>
|
|
1239
|
+
uint32_t var_opt_sketch<T, A>::choose_weighted_delete_slot(double wt_cands, uint32_t num_cands) const {
|
|
1267
1240
|
if (m_ < 1) throw std::logic_error("must have weighted delete slot");
|
|
1268
1241
|
|
|
1269
1242
|
const uint32_t offset = h_;
|
|
@@ -1286,8 +1259,8 @@ uint32_t var_opt_sketch<T,S,A>::choose_weighted_delete_slot(double wt_cands, uin
|
|
|
1286
1259
|
return final_m + 1;
|
|
1287
1260
|
}
|
|
1288
1261
|
|
|
1289
|
-
template<typename T, typename
|
|
1290
|
-
uint32_t var_opt_sketch<T,
|
|
1262
|
+
template<typename T, typename A>
|
|
1263
|
+
uint32_t var_opt_sketch<T, A>::pick_random_slot_in_r() const {
|
|
1291
1264
|
if (r_ == 0) throw std::logic_error("r_ = 0 when picking slot in R region");
|
|
1292
1265
|
const uint32_t offset = h_ + m_;
|
|
1293
1266
|
if (r_ == 1) {
|
|
@@ -1297,32 +1270,32 @@ uint32_t var_opt_sketch<T,S,A>::pick_random_slot_in_r() const {
|
|
|
1297
1270
|
}
|
|
1298
1271
|
}
|
|
1299
1272
|
|
|
1300
|
-
template<typename T, typename
|
|
1301
|
-
double var_opt_sketch<T,
|
|
1273
|
+
template<typename T, typename A>
|
|
1274
|
+
double var_opt_sketch<T, A>::peek_min() const {
|
|
1302
1275
|
if (h_ == 0) throw std::logic_error("h_ = 0 when checking min in H region");
|
|
1303
1276
|
return weights_[0];
|
|
1304
1277
|
}
|
|
1305
1278
|
|
|
1306
|
-
template<typename T, typename
|
|
1307
|
-
inline bool var_opt_sketch<T,
|
|
1279
|
+
template<typename T, typename A>
|
|
1280
|
+
inline bool var_opt_sketch<T, A>::is_marked(uint32_t idx) const {
|
|
1308
1281
|
return marks_ == nullptr ? false : marks_[idx];
|
|
1309
1282
|
}
|
|
1310
1283
|
|
|
1311
|
-
template<typename T, typename
|
|
1312
|
-
double var_opt_sketch<T,
|
|
1284
|
+
template<typename T, typename A>
|
|
1285
|
+
double var_opt_sketch<T, A>::get_tau() const {
|
|
1313
1286
|
return r_ == 0 ? std::nan("1") : (total_wt_r_ / r_);
|
|
1314
1287
|
}
|
|
1315
1288
|
|
|
1316
|
-
template<typename T, typename
|
|
1317
|
-
void var_opt_sketch<T,
|
|
1289
|
+
template<typename T, typename A>
|
|
1290
|
+
void var_opt_sketch<T, A>::strip_marks() {
|
|
1318
1291
|
if (marks_ == nullptr) throw std::logic_error("request to strip marks from non-gadget");
|
|
1319
1292
|
num_marks_in_h_ = 0;
|
|
1320
1293
|
AllocBool(allocator_).deallocate(marks_, curr_items_alloc_);
|
|
1321
1294
|
marks_ = nullptr;
|
|
1322
1295
|
}
|
|
1323
1296
|
|
|
1324
|
-
template<typename T, typename
|
|
1325
|
-
void var_opt_sketch<T,
|
|
1297
|
+
template<typename T, typename A>
|
|
1298
|
+
void var_opt_sketch<T, A>::check_preamble_longs(uint8_t preamble_longs, uint8_t flags) {
|
|
1326
1299
|
const bool is_empty(flags & EMPTY_FLAG_MASK);
|
|
1327
1300
|
|
|
1328
1301
|
if (is_empty) {
|
|
@@ -1342,8 +1315,8 @@ void var_opt_sketch<T,S,A>::check_preamble_longs(uint8_t preamble_longs, uint8_t
|
|
|
1342
1315
|
}
|
|
1343
1316
|
}
|
|
1344
1317
|
|
|
1345
|
-
template<typename T, typename
|
|
1346
|
-
void var_opt_sketch<T,
|
|
1318
|
+
template<typename T, typename A>
|
|
1319
|
+
void var_opt_sketch<T, A>::check_family_and_serialization_version(uint8_t family_id, uint8_t ser_ver) {
|
|
1347
1320
|
if (family_id == FAMILY_ID) {
|
|
1348
1321
|
if (ser_ver != SER_VER) {
|
|
1349
1322
|
throw std::invalid_argument("Possible corruption: VarOpt serialization version must be "
|
|
@@ -1357,8 +1330,8 @@ void var_opt_sketch<T,S,A>::check_family_and_serialization_version(uint8_t famil
|
|
|
1357
1330
|
+ std::to_string(FAMILY_ID) + ". Found: " + std::to_string(family_id));
|
|
1358
1331
|
}
|
|
1359
1332
|
|
|
1360
|
-
template<typename T, typename
|
|
1361
|
-
uint32_t var_opt_sketch<T,
|
|
1333
|
+
template<typename T, typename A>
|
|
1334
|
+
uint32_t var_opt_sketch<T, A>::validate_and_get_target_size(uint32_t preamble_longs, uint32_t k, uint64_t n,
|
|
1362
1335
|
uint32_t h, uint32_t r, resize_factor rf) {
|
|
1363
1336
|
if (k == 0 || k > MAX_K) {
|
|
1364
1337
|
throw std::invalid_argument("k must be at least 1 and less than 2^31 - 1");
|
|
@@ -1403,9 +1376,9 @@ uint32_t var_opt_sketch<T, S, A>::validate_and_get_target_size(uint32_t preamble
|
|
|
1403
1376
|
return array_size;
|
|
1404
1377
|
}
|
|
1405
1378
|
|
|
1406
|
-
template<typename T, typename
|
|
1379
|
+
template<typename T, typename A>
|
|
1407
1380
|
template<typename P>
|
|
1408
|
-
subset_summary var_opt_sketch<T,
|
|
1381
|
+
subset_summary var_opt_sketch<T, A>::estimate_subset_sum(P predicate) const {
|
|
1409
1382
|
if (n_ == 0) {
|
|
1410
1383
|
return {0.0, 0.0, 0.0, 0.0};
|
|
1411
1384
|
}
|
|
@@ -1451,8 +1424,8 @@ subset_summary var_opt_sketch<T, S, A>::estimate_subset_sum(P predicate) const {
|
|
|
1451
1424
|
};
|
|
1452
1425
|
}
|
|
1453
1426
|
|
|
1454
|
-
template<typename T, typename
|
|
1455
|
-
class var_opt_sketch<T,
|
|
1427
|
+
template<typename T, typename A>
|
|
1428
|
+
class var_opt_sketch<T, A>::items_deleter {
|
|
1456
1429
|
public:
|
|
1457
1430
|
items_deleter(uint32_t num, const A& allocator) : num(num), h_count(0), r_count(0), allocator(allocator) {}
|
|
1458
1431
|
void set_h(uint32_t h) { h_count = h; }
|
|
@@ -1480,8 +1453,8 @@ class var_opt_sketch<T, S, A>::items_deleter {
|
|
|
1480
1453
|
A allocator;
|
|
1481
1454
|
};
|
|
1482
1455
|
|
|
1483
|
-
template<typename T, typename
|
|
1484
|
-
class var_opt_sketch<T,
|
|
1456
|
+
template<typename T, typename A>
|
|
1457
|
+
class var_opt_sketch<T, A>::weights_deleter {
|
|
1485
1458
|
public:
|
|
1486
1459
|
weights_deleter(uint32_t num, const A& allocator) : num(num), allocator(allocator) {}
|
|
1487
1460
|
void operator() (double* ptr) {
|
|
@@ -1494,8 +1467,8 @@ class var_opt_sketch<T, S, A>::weights_deleter {
|
|
|
1494
1467
|
AllocDouble allocator;
|
|
1495
1468
|
};
|
|
1496
1469
|
|
|
1497
|
-
template<typename T, typename
|
|
1498
|
-
class var_opt_sketch<T,
|
|
1470
|
+
template<typename T, typename A>
|
|
1471
|
+
class var_opt_sketch<T, A>::marks_deleter {
|
|
1499
1472
|
public:
|
|
1500
1473
|
marks_deleter(uint32_t num, const A& allocator) : num(num), allocator(allocator) {}
|
|
1501
1474
|
void operator() (bool* ptr) {
|
|
@@ -1509,20 +1482,20 @@ class var_opt_sketch<T, S, A>::marks_deleter {
|
|
|
1509
1482
|
};
|
|
1510
1483
|
|
|
1511
1484
|
|
|
1512
|
-
template<typename T, typename
|
|
1513
|
-
typename var_opt_sketch<T,
|
|
1514
|
-
return
|
|
1485
|
+
template<typename T, typename A>
|
|
1486
|
+
typename var_opt_sketch<T, A>::const_iterator var_opt_sketch<T, A>::begin() const {
|
|
1487
|
+
return const_iterator(*this, false);
|
|
1515
1488
|
}
|
|
1516
1489
|
|
|
1517
|
-
template<typename T, typename
|
|
1518
|
-
typename var_opt_sketch<T,
|
|
1519
|
-
return
|
|
1490
|
+
template<typename T, typename A>
|
|
1491
|
+
typename var_opt_sketch<T, A>::const_iterator var_opt_sketch<T, A>::end() const {
|
|
1492
|
+
return const_iterator(*this, true);
|
|
1520
1493
|
}
|
|
1521
1494
|
|
|
1522
1495
|
// -------- var_opt_sketch::const_iterator implementation ---------
|
|
1523
1496
|
|
|
1524
|
-
template<typename T, typename
|
|
1525
|
-
var_opt_sketch<T,
|
|
1497
|
+
template<typename T, typename A>
|
|
1498
|
+
var_opt_sketch<T, A>::const_iterator::const_iterator(const var_opt_sketch& sk, bool is_end) :
|
|
1526
1499
|
sk_(&sk),
|
|
1527
1500
|
cum_r_weight_(0.0),
|
|
1528
1501
|
r_item_wt_(sk.get_tau()),
|
|
@@ -1540,8 +1513,8 @@ var_opt_sketch<T,S,A>::const_iterator::const_iterator(const var_opt_sketch<T,S,A
|
|
|
1540
1513
|
if (idx_ == final_idx_) { sk_ = nullptr; }
|
|
1541
1514
|
}
|
|
1542
1515
|
|
|
1543
|
-
template<typename T, typename
|
|
1544
|
-
var_opt_sketch<T,
|
|
1516
|
+
template<typename T, typename A>
|
|
1517
|
+
var_opt_sketch<T, A>::const_iterator::const_iterator(const var_opt_sketch& sk, bool is_end, bool use_r_region) :
|
|
1545
1518
|
sk_(&sk),
|
|
1546
1519
|
cum_r_weight_(0.0),
|
|
1547
1520
|
r_item_wt_(sk.get_tau()),
|
|
@@ -1559,8 +1532,8 @@ var_opt_sketch<T,S,A>::const_iterator::const_iterator(const var_opt_sketch<T,S,A
|
|
|
1559
1532
|
}
|
|
1560
1533
|
|
|
1561
1534
|
|
|
1562
|
-
template<typename T,
|
|
1563
|
-
var_opt_sketch<T,
|
|
1535
|
+
template<typename T, typename A>
|
|
1536
|
+
var_opt_sketch<T, A>::const_iterator::const_iterator(const const_iterator& other) :
|
|
1564
1537
|
sk_(other.sk_),
|
|
1565
1538
|
cum_r_weight_(other.cum_r_weight_),
|
|
1566
1539
|
r_item_wt_(other.r_item_wt_),
|
|
@@ -1568,8 +1541,8 @@ var_opt_sketch<T, S, A>::const_iterator::const_iterator(const const_iterator& ot
|
|
|
1568
1541
|
final_idx_(other.final_idx_)
|
|
1569
1542
|
{}
|
|
1570
1543
|
|
|
1571
|
-
template<typename T,
|
|
1572
|
-
typename var_opt_sketch<T,
|
|
1544
|
+
template<typename T, typename A>
|
|
1545
|
+
typename var_opt_sketch<T, A>::const_iterator& var_opt_sketch<T, A>::const_iterator::operator++() {
|
|
1573
1546
|
++idx_;
|
|
1574
1547
|
|
|
1575
1548
|
if (idx_ == final_idx_) {
|
|
@@ -1582,27 +1555,27 @@ typename var_opt_sketch<T, S, A>::const_iterator& var_opt_sketch<T, S, A>::const
|
|
|
1582
1555
|
return *this;
|
|
1583
1556
|
}
|
|
1584
1557
|
|
|
1585
|
-
template<typename T,
|
|
1586
|
-
typename var_opt_sketch<T,
|
|
1558
|
+
template<typename T, typename A>
|
|
1559
|
+
typename var_opt_sketch<T, A>::const_iterator& var_opt_sketch<T, A>::const_iterator::operator++(int) {
|
|
1587
1560
|
const_iterator tmp(*this);
|
|
1588
1561
|
operator++();
|
|
1589
1562
|
return tmp;
|
|
1590
1563
|
}
|
|
1591
1564
|
|
|
1592
|
-
template<typename T, typename
|
|
1593
|
-
bool var_opt_sketch<T,
|
|
1565
|
+
template<typename T, typename A>
|
|
1566
|
+
bool var_opt_sketch<T, A>::const_iterator::operator==(const const_iterator& other) const {
|
|
1594
1567
|
if (sk_ != other.sk_) return false;
|
|
1595
1568
|
if (sk_ == nullptr) return true; // end (and we know other.sk_ is also null)
|
|
1596
1569
|
return idx_ == other.idx_;
|
|
1597
1570
|
}
|
|
1598
1571
|
|
|
1599
|
-
template<typename T, typename
|
|
1600
|
-
bool var_opt_sketch<T,
|
|
1572
|
+
template<typename T, typename A>
|
|
1573
|
+
bool var_opt_sketch<T, A>::const_iterator::operator!=(const const_iterator& other) const {
|
|
1601
1574
|
return !operator==(other);
|
|
1602
1575
|
}
|
|
1603
1576
|
|
|
1604
|
-
template<typename T, typename
|
|
1605
|
-
const std::pair<const T&, const double> var_opt_sketch<T,
|
|
1577
|
+
template<typename T, typename A>
|
|
1578
|
+
const std::pair<const T&, const double> var_opt_sketch<T, A>::const_iterator::operator*() const {
|
|
1606
1579
|
double wt;
|
|
1607
1580
|
if (idx_ < sk_->h_) {
|
|
1608
1581
|
wt = sk_->weights_[idx_];
|
|
@@ -1612,16 +1585,16 @@ const std::pair<const T&, const double> var_opt_sketch<T, S, A>::const_iterator:
|
|
|
1612
1585
|
return std::pair<const T&, const double>(sk_->data_[idx_], wt);
|
|
1613
1586
|
}
|
|
1614
1587
|
|
|
1615
|
-
template<typename T, typename
|
|
1616
|
-
bool var_opt_sketch<T,
|
|
1588
|
+
template<typename T, typename A>
|
|
1589
|
+
bool var_opt_sketch<T, A>::const_iterator::get_mark() const {
|
|
1617
1590
|
return sk_->marks_ == nullptr ? false : sk_->marks_[idx_];
|
|
1618
1591
|
}
|
|
1619
1592
|
|
|
1620
1593
|
|
|
1621
1594
|
// -------- var_opt_sketch::iterator implementation ---------
|
|
1622
1595
|
|
|
1623
|
-
template<typename T, typename
|
|
1624
|
-
var_opt_sketch<T,
|
|
1596
|
+
template<typename T, typename A>
|
|
1597
|
+
var_opt_sketch<T, A>::iterator::iterator(const var_opt_sketch& sk, bool is_end, bool use_r_region) :
|
|
1625
1598
|
sk_(&sk),
|
|
1626
1599
|
cum_r_weight_(0.0),
|
|
1627
1600
|
r_item_wt_(sk.get_tau()),
|
|
@@ -1638,8 +1611,8 @@ var_opt_sketch<T,S,A>::iterator::iterator(const var_opt_sketch<T,S,A>& sk, bool
|
|
|
1638
1611
|
if (idx_ == final_idx_) { sk_ = nullptr; }
|
|
1639
1612
|
}
|
|
1640
1613
|
|
|
1641
|
-
template<typename T,
|
|
1642
|
-
var_opt_sketch<T,
|
|
1614
|
+
template<typename T, typename A>
|
|
1615
|
+
var_opt_sketch<T, A>::iterator::iterator(const iterator& other) :
|
|
1643
1616
|
sk_(other.sk_),
|
|
1644
1617
|
cum_r_weight_(other.cum_r_weight_),
|
|
1645
1618
|
r_item_wt_(other.r_item_wt_),
|
|
@@ -1647,8 +1620,8 @@ var_opt_sketch<T, S, A>::iterator::iterator(const iterator& other) :
|
|
|
1647
1620
|
final_idx_(other.final_idx_)
|
|
1648
1621
|
{}
|
|
1649
1622
|
|
|
1650
|
-
template<typename T,
|
|
1651
|
-
typename var_opt_sketch<T,
|
|
1623
|
+
template<typename T, typename A>
|
|
1624
|
+
typename var_opt_sketch<T, A>::iterator& var_opt_sketch<T, A>::iterator::operator++() {
|
|
1652
1625
|
++idx_;
|
|
1653
1626
|
|
|
1654
1627
|
if (idx_ == final_idx_) {
|
|
@@ -1661,27 +1634,27 @@ typename var_opt_sketch<T, S, A>::iterator& var_opt_sketch<T, S, A>::iterator::o
|
|
|
1661
1634
|
return *this;
|
|
1662
1635
|
}
|
|
1663
1636
|
|
|
1664
|
-
template<typename T,
|
|
1665
|
-
typename var_opt_sketch<T,
|
|
1637
|
+
template<typename T, typename A>
|
|
1638
|
+
typename var_opt_sketch<T, A>::iterator& var_opt_sketch<T, A>::iterator::operator++(int) {
|
|
1666
1639
|
const_iterator tmp(*this);
|
|
1667
1640
|
operator++();
|
|
1668
1641
|
return tmp;
|
|
1669
1642
|
}
|
|
1670
1643
|
|
|
1671
|
-
template<typename T, typename
|
|
1672
|
-
bool var_opt_sketch<T,
|
|
1644
|
+
template<typename T, typename A>
|
|
1645
|
+
bool var_opt_sketch<T, A>::iterator::operator==(const iterator& other) const {
|
|
1673
1646
|
if (sk_ != other.sk_) return false;
|
|
1674
1647
|
if (sk_ == nullptr) return true; // end (and we know other.sk_ is also null)
|
|
1675
1648
|
return idx_ == other.idx_;
|
|
1676
1649
|
}
|
|
1677
1650
|
|
|
1678
|
-
template<typename T, typename
|
|
1679
|
-
bool var_opt_sketch<T,
|
|
1651
|
+
template<typename T, typename A>
|
|
1652
|
+
bool var_opt_sketch<T, A>::iterator::operator!=(const iterator& other) const {
|
|
1680
1653
|
return !operator==(other);
|
|
1681
1654
|
}
|
|
1682
1655
|
|
|
1683
|
-
template<typename T, typename
|
|
1684
|
-
std::pair<T&, double> var_opt_sketch<T,
|
|
1656
|
+
template<typename T, typename A>
|
|
1657
|
+
std::pair<T&, double> var_opt_sketch<T, A>::iterator::operator*() {
|
|
1685
1658
|
double wt;
|
|
1686
1659
|
if (idx_ < sk_->h_) {
|
|
1687
1660
|
wt = sk_->weights_[idx_];
|
|
@@ -1693,8 +1666,8 @@ std::pair<T&, double> var_opt_sketch<T, S, A>::iterator::operator*() {
|
|
|
1693
1666
|
return std::pair<T&, double>(sk_->data_[idx_], wt);
|
|
1694
1667
|
}
|
|
1695
1668
|
|
|
1696
|
-
template<typename T, typename
|
|
1697
|
-
bool var_opt_sketch<T,
|
|
1669
|
+
template<typename T, typename A>
|
|
1670
|
+
bool var_opt_sketch<T, A>::iterator::get_mark() const {
|
|
1698
1671
|
return sk_->marks_ == nullptr ? false : sk_->marks_[idx_];
|
|
1699
1672
|
}
|
|
1700
1673
|
|
|
@@ -1702,40 +1675,40 @@ bool var_opt_sketch<T, S, A>::iterator::get_mark() const {
|
|
|
1702
1675
|
* Checks if target sampling allocation is more than 50% of max sampling size.
|
|
1703
1676
|
* If so, returns max sampling size, otherwise passes through target size.
|
|
1704
1677
|
*/
|
|
1705
|
-
template<typename T, typename
|
|
1706
|
-
uint32_t var_opt_sketch<T,
|
|
1707
|
-
if (max_size
|
|
1678
|
+
template<typename T, typename A>
|
|
1679
|
+
uint32_t var_opt_sketch<T, A>::get_adjusted_size(uint32_t max_size, uint32_t resize_target) {
|
|
1680
|
+
if (max_size < (resize_target << 1)) {
|
|
1708
1681
|
return max_size;
|
|
1709
1682
|
}
|
|
1710
1683
|
return resize_target;
|
|
1711
1684
|
}
|
|
1712
1685
|
|
|
1713
|
-
template<typename T, typename
|
|
1714
|
-
uint32_t var_opt_sketch<T,
|
|
1686
|
+
template<typename T, typename A>
|
|
1687
|
+
uint32_t var_opt_sketch<T, A>::starting_sub_multiple(uint32_t lg_target, uint32_t lg_rf, uint32_t lg_min) {
|
|
1715
1688
|
return (lg_target <= lg_min)
|
|
1716
1689
|
? lg_min : (lg_rf == 0) ? lg_target
|
|
1717
1690
|
: (lg_target - lg_min) % lg_rf + lg_min;
|
|
1718
1691
|
}
|
|
1719
1692
|
|
|
1720
|
-
template<typename T, typename
|
|
1721
|
-
double var_opt_sketch<T,
|
|
1693
|
+
template<typename T, typename A>
|
|
1694
|
+
double var_opt_sketch<T, A>::pseudo_hypergeometric_ub_on_p(uint64_t n, uint32_t k, double sampling_rate) {
|
|
1722
1695
|
const double adjusted_kappa = DEFAULT_KAPPA * sqrt(1 - sampling_rate);
|
|
1723
1696
|
return bounds_binomial_proportions::approximate_upper_bound_on_p(n, k, adjusted_kappa);
|
|
1724
1697
|
}
|
|
1725
1698
|
|
|
1726
|
-
template<typename T, typename
|
|
1727
|
-
double var_opt_sketch<T,
|
|
1699
|
+
template<typename T, typename A>
|
|
1700
|
+
double var_opt_sketch<T, A>::pseudo_hypergeometric_lb_on_p(uint64_t n, uint32_t k, double sampling_rate) {
|
|
1728
1701
|
const double adjusted_kappa = DEFAULT_KAPPA * sqrt(1 - sampling_rate);
|
|
1729
1702
|
return bounds_binomial_proportions::approximate_lower_bound_on_p(n, k, adjusted_kappa);
|
|
1730
1703
|
}
|
|
1731
1704
|
|
|
1732
|
-
template<typename T, typename
|
|
1733
|
-
bool var_opt_sketch<T,
|
|
1705
|
+
template<typename T, typename A>
|
|
1706
|
+
bool var_opt_sketch<T, A>::is_power_of_2(uint32_t v) {
|
|
1734
1707
|
return v && !(v & (v - 1));
|
|
1735
1708
|
}
|
|
1736
1709
|
|
|
1737
|
-
template<typename T, typename
|
|
1738
|
-
uint32_t var_opt_sketch<T,
|
|
1710
|
+
template<typename T, typename A>
|
|
1711
|
+
uint32_t var_opt_sketch<T, A>::to_log_2(uint32_t v) {
|
|
1739
1712
|
if (is_power_of_2(v)) {
|
|
1740
1713
|
return count_trailing_zeros_in_u32(v);
|
|
1741
1714
|
} else {
|
|
@@ -1744,14 +1717,14 @@ uint32_t var_opt_sketch<T,S,A>::to_log_2(uint32_t v) {
|
|
|
1744
1717
|
}
|
|
1745
1718
|
|
|
1746
1719
|
// Returns an integer in the range [0, max_value) -- excludes max_value
|
|
1747
|
-
template<typename T, typename
|
|
1748
|
-
uint32_t var_opt_sketch<T,
|
|
1720
|
+
template<typename T, typename A>
|
|
1721
|
+
uint32_t var_opt_sketch<T, A>::next_int(uint32_t max_value) {
|
|
1749
1722
|
std::uniform_int_distribution<uint32_t> dist(0, max_value - 1);
|
|
1750
1723
|
return dist(random_utils::rand);
|
|
1751
1724
|
}
|
|
1752
1725
|
|
|
1753
|
-
template<typename T, typename
|
|
1754
|
-
double var_opt_sketch<T,
|
|
1726
|
+
template<typename T, typename A>
|
|
1727
|
+
double var_opt_sketch<T, A>::next_double_exclude_zero() {
|
|
1755
1728
|
double r = random_utils::next_double(random_utils::rand);
|
|
1756
1729
|
while (r == 0.0) {
|
|
1757
1730
|
r = random_utils::next_double(random_utils::rand);
|