datasketches 0.2.7 → 0.3.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +5 -0
- data/ext/datasketches/kll_wrapper.cpp +20 -20
- data/ext/datasketches/theta_wrapper.cpp +2 -2
- data/lib/datasketches/version.rb +1 -1
- data/vendor/datasketches-cpp/CMakeLists.txt +9 -1
- data/vendor/datasketches-cpp/MANIFEST.in +21 -2
- data/vendor/datasketches-cpp/common/CMakeLists.txt +5 -2
- data/vendor/datasketches-cpp/common/include/common_defs.hpp +10 -0
- data/vendor/datasketches-cpp/common/include/kolmogorov_smirnov_impl.hpp +6 -6
- data/vendor/datasketches-cpp/common/include/memory_operations.hpp +1 -0
- data/vendor/datasketches-cpp/common/include/{quantile_sketch_sorted_view.hpp → quantiles_sorted_view.hpp} +60 -25
- data/vendor/datasketches-cpp/common/include/quantiles_sorted_view_impl.hpp +125 -0
- data/vendor/datasketches-cpp/common/include/version.hpp.in +36 -0
- data/vendor/datasketches-cpp/common/test/CMakeLists.txt +25 -6
- data/vendor/datasketches-cpp/common/test/quantiles_sorted_view_test.cpp +459 -0
- data/vendor/datasketches-cpp/cpc/test/CMakeLists.txt +1 -1
- data/vendor/datasketches-cpp/fi/include/frequent_items_sketch.hpp +28 -44
- data/vendor/datasketches-cpp/fi/include/frequent_items_sketch_impl.hpp +70 -78
- data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map.hpp +11 -4
- data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map_impl.hpp +16 -9
- data/vendor/datasketches-cpp/fi/test/CMakeLists.txt +1 -1
- data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_custom_type_test.cpp +54 -41
- data/vendor/datasketches-cpp/fi/test/reverse_purge_hash_map_test.cpp +3 -3
- data/vendor/datasketches-cpp/hll/include/Hll4Array-internal.hpp +2 -2
- data/vendor/datasketches-cpp/hll/test/CMakeLists.txt +1 -1
- data/vendor/datasketches-cpp/kll/include/kll_helper.hpp +0 -32
- data/vendor/datasketches-cpp/kll/include/kll_sketch.hpp +176 -233
- data/vendor/datasketches-cpp/kll/include/kll_sketch_impl.hpp +337 -395
- data/vendor/datasketches-cpp/kll/test/CMakeLists.txt +1 -1
- data/vendor/datasketches-cpp/kll/test/kll_sketch_custom_type_test.cpp +26 -26
- data/vendor/datasketches-cpp/kll/test/kll_sketch_test.cpp +196 -232
- data/vendor/datasketches-cpp/kll/test/kll_sketch_validation.cpp +41 -31
- data/vendor/datasketches-cpp/pyproject.toml +17 -12
- data/vendor/datasketches-cpp/python/CMakeLists.txt +8 -1
- data/vendor/datasketches-cpp/python/datasketches/PySerDe.py +104 -0
- data/vendor/datasketches-cpp/python/datasketches/__init__.py +22 -0
- data/vendor/datasketches-cpp/python/include/py_serde.hpp +113 -0
- data/vendor/datasketches-cpp/python/jupyter/ThetaSketchNotebook.ipynb +31 -24
- data/vendor/datasketches-cpp/python/pybind11Path.cmd +18 -0
- data/vendor/datasketches-cpp/python/src/__init__.py +17 -1
- data/vendor/datasketches-cpp/python/src/datasketches.cpp +9 -3
- data/vendor/datasketches-cpp/python/src/kll_wrapper.cpp +18 -54
- data/vendor/datasketches-cpp/python/src/py_serde.cpp +111 -0
- data/vendor/datasketches-cpp/python/src/quantiles_wrapper.cpp +17 -53
- data/vendor/datasketches-cpp/python/src/req_wrapper.cpp +17 -55
- data/vendor/datasketches-cpp/python/src/vector_of_kll.cpp +62 -67
- data/vendor/datasketches-cpp/python/src/vo_wrapper.cpp +47 -14
- data/vendor/datasketches-cpp/python/tests/__init__.py +16 -0
- data/vendor/datasketches-cpp/python/tests/req_test.py +1 -1
- data/vendor/datasketches-cpp/python/tests/vo_test.py +25 -1
- data/vendor/datasketches-cpp/quantiles/include/quantiles_sketch.hpp +135 -180
- data/vendor/datasketches-cpp/quantiles/include/quantiles_sketch_impl.hpp +205 -210
- data/vendor/datasketches-cpp/quantiles/test/CMakeLists.txt +1 -1
- data/vendor/datasketches-cpp/quantiles/test/quantiles_compatibility_test.cpp +19 -18
- data/vendor/datasketches-cpp/quantiles/test/quantiles_sketch_test.cpp +240 -232
- data/vendor/datasketches-cpp/req/include/req_compactor.hpp +15 -9
- data/vendor/datasketches-cpp/req/include/req_compactor_impl.hpp +35 -19
- data/vendor/datasketches-cpp/req/include/req_sketch.hpp +126 -147
- data/vendor/datasketches-cpp/req/include/req_sketch_impl.hpp +265 -245
- data/vendor/datasketches-cpp/req/test/CMakeLists.txt +1 -1
- data/vendor/datasketches-cpp/req/test/req_sketch_custom_type_test.cpp +26 -26
- data/vendor/datasketches-cpp/req/test/req_sketch_test.cpp +116 -103
- data/vendor/datasketches-cpp/sampling/include/var_opt_sketch.hpp +22 -46
- data/vendor/datasketches-cpp/sampling/include/var_opt_sketch_impl.hpp +180 -207
- data/vendor/datasketches-cpp/sampling/include/var_opt_union.hpp +18 -39
- data/vendor/datasketches-cpp/sampling/include/var_opt_union_impl.hpp +75 -85
- data/vendor/datasketches-cpp/sampling/test/CMakeLists.txt +1 -1
- data/vendor/datasketches-cpp/sampling/test/var_opt_allocation_test.cpp +6 -6
- data/vendor/datasketches-cpp/sampling/test/var_opt_sketch_test.cpp +2 -2
- data/vendor/datasketches-cpp/sampling/test/var_opt_union_test.cpp +4 -4
- data/vendor/datasketches-cpp/setup.py +14 -2
- data/vendor/datasketches-cpp/theta/include/theta_sketch_impl.hpp +15 -25
- data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base.hpp +0 -9
- data/vendor/datasketches-cpp/theta/include/theta_update_sketch_base_impl.hpp +5 -5
- data/vendor/datasketches-cpp/theta/test/CMakeLists.txt +1 -1
- data/vendor/datasketches-cpp/theta/test/theta_sketch_test.cpp +2 -1
- data/vendor/datasketches-cpp/tox.ini +26 -0
- data/vendor/datasketches-cpp/tuple/include/tuple_sketch.hpp +36 -12
- data/vendor/datasketches-cpp/tuple/include/tuple_sketch_impl.hpp +16 -4
- data/vendor/datasketches-cpp/tuple/test/CMakeLists.txt +2 -1
- data/vendor/datasketches-cpp/tuple/test/engagement_test.cpp +299 -0
- data/vendor/datasketches-cpp/tuple/test/tuple_sketch_test.cpp +26 -0
- data/vendor/datasketches-cpp/version.cfg.in +1 -0
- metadata +14 -5
- data/vendor/datasketches-cpp/common/include/quantile_sketch_sorted_view_impl.hpp +0 -91
@@ -42,12 +42,12 @@ namespace datasketches {
|
|
42
42
|
* author Kevin Lang
|
43
43
|
* author Jon Malkin
|
44
44
|
*/
|
45
|
-
template<typename T, typename
|
46
|
-
var_opt_sketch<T,
|
47
|
-
var_opt_sketch
|
45
|
+
template<typename T, typename A>
|
46
|
+
var_opt_sketch<T, A>::var_opt_sketch(uint32_t k, resize_factor rf, const A& allocator) :
|
47
|
+
var_opt_sketch(k, rf, false, allocator) {}
|
48
48
|
|
49
|
-
template<typename T, typename
|
50
|
-
var_opt_sketch<T,
|
49
|
+
template<typename T, typename A>
|
50
|
+
var_opt_sketch<T, A>::var_opt_sketch(const var_opt_sketch& other) :
|
51
51
|
k_(other.k_),
|
52
52
|
h_(other.h_),
|
53
53
|
m_(other.m_),
|
@@ -83,8 +83,8 @@ var_opt_sketch<T,S,A>::var_opt_sketch(const var_opt_sketch& other) :
|
|
83
83
|
}
|
84
84
|
}
|
85
85
|
|
86
|
-
template<typename T, typename
|
87
|
-
var_opt_sketch<T,
|
86
|
+
template<typename T, typename A>
|
87
|
+
var_opt_sketch<T, A>::var_opt_sketch(const var_opt_sketch& other, bool as_sketch, uint64_t adjusted_n) :
|
88
88
|
k_(other.k_),
|
89
89
|
h_(other.h_),
|
90
90
|
m_(other.m_),
|
@@ -120,27 +120,8 @@ var_opt_sketch<T,S,A>::var_opt_sketch(const var_opt_sketch& other, bool as_sketc
|
|
120
120
|
}
|
121
121
|
}
|
122
122
|
|
123
|
-
template<typename T, typename
|
124
|
-
var_opt_sketch<T,
|
125
|
-
uint32_t k, uint64_t n, uint32_t h_count, uint32_t r_count, double total_wt_r, const A& allocator) :
|
126
|
-
k_(k),
|
127
|
-
h_(h_count),
|
128
|
-
m_(0),
|
129
|
-
r_(r_count),
|
130
|
-
n_(n),
|
131
|
-
total_wt_r_(total_wt_r),
|
132
|
-
rf_(var_opt_constants::DEFAULT_RESIZE_FACTOR),
|
133
|
-
curr_items_alloc_(len),
|
134
|
-
filled_data_(n > k),
|
135
|
-
allocator_(allocator),
|
136
|
-
data_(data),
|
137
|
-
weights_(weights),
|
138
|
-
num_marks_in_h_(0),
|
139
|
-
marks_(nullptr)
|
140
|
-
{}
|
141
|
-
|
142
|
-
template<typename T, typename S, typename A>
|
143
|
-
var_opt_sketch<T,S,A>::var_opt_sketch(var_opt_sketch&& other) noexcept :
|
123
|
+
template<typename T, typename A>
|
124
|
+
var_opt_sketch<T, A>::var_opt_sketch(var_opt_sketch&& other) noexcept :
|
144
125
|
k_(other.k_),
|
145
126
|
h_(other.h_),
|
146
127
|
m_(other.m_),
|
@@ -161,8 +142,8 @@ var_opt_sketch<T,S,A>::var_opt_sketch(var_opt_sketch&& other) noexcept :
|
|
161
142
|
other.marks_ = nullptr;
|
162
143
|
}
|
163
144
|
|
164
|
-
template<typename T, typename
|
165
|
-
var_opt_sketch<T,
|
145
|
+
template<typename T, typename A>
|
146
|
+
var_opt_sketch<T, A>::var_opt_sketch(uint32_t k, resize_factor rf, bool is_gadget, const A& allocator) :
|
166
147
|
k_(k), h_(0), m_(0), r_(0), n_(0), total_wt_r_(0.0), rf_(rf), allocator_(allocator) {
|
167
148
|
if (k == 0 || k_ > MAX_K) {
|
168
149
|
throw std::invalid_argument("k must be at least 1 and less than 2^31 - 1");
|
@@ -179,8 +160,8 @@ var_opt_sketch<T,S,A>::var_opt_sketch(uint32_t k, resize_factor rf, bool is_gadg
|
|
179
160
|
num_marks_in_h_ = 0;
|
180
161
|
}
|
181
162
|
|
182
|
-
template<typename T, typename
|
183
|
-
var_opt_sketch<T,
|
163
|
+
template<typename T, typename A>
|
164
|
+
var_opt_sketch<T, A>::var_opt_sketch(uint32_t k, uint32_t h, uint32_t m, uint32_t r, uint64_t n, double total_wt_r, resize_factor rf,
|
184
165
|
uint32_t curr_items_alloc, bool filled_data, std::unique_ptr<T, items_deleter> items,
|
185
166
|
std::unique_ptr<double, weights_deleter> weights, uint32_t num_marks_in_h,
|
186
167
|
std::unique_ptr<bool, marks_deleter> marks, const A& allocator) :
|
@@ -201,8 +182,8 @@ var_opt_sketch<T,S,A>::var_opt_sketch(uint32_t k, uint32_t h, uint32_t m, uint32
|
|
201
182
|
{}
|
202
183
|
|
203
184
|
|
204
|
-
template<typename T, typename
|
205
|
-
var_opt_sketch<T,
|
185
|
+
template<typename T, typename A>
|
186
|
+
var_opt_sketch<T, A>::~var_opt_sketch() {
|
206
187
|
if (data_ != nullptr) {
|
207
188
|
if (filled_data_) {
|
208
189
|
// destroy everything
|
@@ -232,9 +213,9 @@ var_opt_sketch<T,S,A>::~var_opt_sketch() {
|
|
232
213
|
}
|
233
214
|
}
|
234
215
|
|
235
|
-
template<typename T, typename
|
236
|
-
var_opt_sketch<T,
|
237
|
-
var_opt_sketch
|
216
|
+
template<typename T, typename A>
|
217
|
+
var_opt_sketch<T, A>& var_opt_sketch<T, A>::operator=(const var_opt_sketch& other) {
|
218
|
+
var_opt_sketch sk_copy(other);
|
238
219
|
std::swap(k_, sk_copy.k_);
|
239
220
|
std::swap(h_, sk_copy.h_);
|
240
221
|
std::swap(m_, sk_copy.m_);
|
@@ -252,8 +233,8 @@ var_opt_sketch<T,S,A>& var_opt_sketch<T,S,A>::operator=(const var_opt_sketch& ot
|
|
252
233
|
return *this;
|
253
234
|
}
|
254
235
|
|
255
|
-
template<typename T, typename
|
256
|
-
var_opt_sketch<T,
|
236
|
+
template<typename T, typename A>
|
237
|
+
var_opt_sketch<T, A>& var_opt_sketch<T, A>::operator=(var_opt_sketch&& other) {
|
257
238
|
std::swap(k_, other.k_);
|
258
239
|
std::swap(h_, other.h_);
|
259
240
|
std::swap(m_, other.m_);
|
@@ -311,9 +292,9 @@ var_opt_sketch<T,S,A>& var_opt_sketch<T,S,A>::operator=(var_opt_sketch&& other)
|
|
311
292
|
*/
|
312
293
|
|
313
294
|
// implementation for fixed-size arithmetic types (integral and floating point)
|
314
|
-
template<typename T, typename
|
295
|
+
template<typename T, typename A>
|
315
296
|
template<typename TT, typename SerDe, typename std::enable_if<std::is_arithmetic<TT>::value, int>::type>
|
316
|
-
size_t var_opt_sketch<T,
|
297
|
+
size_t var_opt_sketch<T, A>::get_serialized_size_bytes(const SerDe&) const {
|
317
298
|
if (is_empty()) { return PREAMBLE_LONGS_EMPTY << 3; }
|
318
299
|
size_t num_bytes = (r_ == 0 ? PREAMBLE_LONGS_WARMUP : PREAMBLE_LONGS_FULL) << 3;
|
319
300
|
num_bytes += h_ * sizeof(double); // weights
|
@@ -325,9 +306,9 @@ size_t var_opt_sketch<T,S,A>::get_serialized_size_bytes(const SerDe&) const {
|
|
325
306
|
}
|
326
307
|
|
327
308
|
// implementation for all other types
|
328
|
-
template<typename T, typename
|
309
|
+
template<typename T, typename A>
|
329
310
|
template<typename TT, typename SerDe, typename std::enable_if<!std::is_arithmetic<TT>::value, int>::type>
|
330
|
-
size_t var_opt_sketch<T,
|
311
|
+
size_t var_opt_sketch<T, A>::get_serialized_size_bytes(const SerDe& sd) const {
|
331
312
|
if (is_empty()) { return PREAMBLE_LONGS_EMPTY << 3; }
|
332
313
|
size_t num_bytes = (r_ == 0 ? PREAMBLE_LONGS_WARMUP : PREAMBLE_LONGS_FULL) << 3;
|
333
314
|
num_bytes += h_ * sizeof(double); // weights
|
@@ -340,9 +321,9 @@ size_t var_opt_sketch<T,S,A>::get_serialized_size_bytes(const SerDe& sd) const {
|
|
340
321
|
return num_bytes;
|
341
322
|
}
|
342
323
|
|
343
|
-
template<typename T, typename
|
324
|
+
template<typename T, typename A>
|
344
325
|
template<typename SerDe>
|
345
|
-
std::vector<uint8_t, AllocU8<A>> var_opt_sketch<T,
|
326
|
+
std::vector<uint8_t, AllocU8<A>> var_opt_sketch<T, A>::serialize(unsigned header_size_bytes, const SerDe& sd) const {
|
346
327
|
const size_t size = header_size_bytes + get_serialized_size_bytes(sd);
|
347
328
|
std::vector<uint8_t, AllocU8<A>> bytes(size, 0, allocator_);
|
348
329
|
uint8_t* ptr = bytes.data() + header_size_bytes;
|
@@ -414,9 +395,9 @@ std::vector<uint8_t, AllocU8<A>> var_opt_sketch<T,S,A>::serialize(unsigned heade
|
|
414
395
|
return bytes;
|
415
396
|
}
|
416
397
|
|
417
|
-
template<typename T, typename
|
398
|
+
template<typename T, typename A>
|
418
399
|
template<typename SerDe>
|
419
|
-
void var_opt_sketch<T,
|
400
|
+
void var_opt_sketch<T, A>::serialize(std::ostream& os, const SerDe& sd) const {
|
420
401
|
const bool empty = (h_ == 0) && (r_ == 0);
|
421
402
|
|
422
403
|
const uint8_t preLongs = (empty ? PREAMBLE_LONGS_EMPTY
|
@@ -477,14 +458,9 @@ void var_opt_sketch<T,S,A>::serialize(std::ostream& os, const SerDe& sd) const {
|
|
477
458
|
}
|
478
459
|
}
|
479
460
|
|
480
|
-
template<typename T, typename
|
481
|
-
var_opt_sketch<T,S,A> var_opt_sketch<T,S,A>::deserialize(const void* bytes, size_t size, const A& allocator) {
|
482
|
-
return deserialize(bytes, size, S(), allocator);
|
483
|
-
}
|
484
|
-
|
485
|
-
template<typename T, typename S, typename A>
|
461
|
+
template<typename T, typename A>
|
486
462
|
template<typename SerDe>
|
487
|
-
var_opt_sketch<T,
|
463
|
+
var_opt_sketch<T, A> var_opt_sketch<T, A>::deserialize(const void* bytes, size_t size, const SerDe& sd, const A& allocator) {
|
488
464
|
ensure_minimum_memory(size, 8);
|
489
465
|
const char* ptr = static_cast<const char*>(bytes);
|
490
466
|
const char* base = ptr;
|
@@ -510,7 +486,7 @@ var_opt_sketch<T,S,A> var_opt_sketch<T,S,A>::deserialize(const void* bytes, size
|
|
510
486
|
const bool is_gadget = flags & GADGET_FLAG_MASK;
|
511
487
|
|
512
488
|
if (is_empty) {
|
513
|
-
return var_opt_sketch
|
489
|
+
return var_opt_sketch(k, rf, is_gadget, allocator);
|
514
490
|
}
|
515
491
|
|
516
492
|
// second and third prelongs
|
@@ -578,14 +554,9 @@ var_opt_sketch<T,S,A> var_opt_sketch<T,S,A>::deserialize(const void* bytes, size
|
|
578
554
|
std::move(items), std::move(weights), num_marks_in_h, std::move(marks), allocator);
|
579
555
|
}
|
580
556
|
|
581
|
-
template<typename T, typename
|
582
|
-
var_opt_sketch<T,S,A> var_opt_sketch<T,S,A>::deserialize(std::istream& is, const A& allocator) {
|
583
|
-
return deserialize(is, S(), allocator);
|
584
|
-
}
|
585
|
-
|
586
|
-
template<typename T, typename S, typename A>
|
557
|
+
template<typename T, typename A>
|
587
558
|
template<typename SerDe>
|
588
|
-
var_opt_sketch<T,
|
559
|
+
var_opt_sketch<T, A> var_opt_sketch<T, A>::deserialize(std::istream& is, const SerDe& sd, const A& allocator) {
|
589
560
|
const auto first_byte = read<uint8_t>(is);
|
590
561
|
uint8_t preamble_longs = first_byte & 0x3f;
|
591
562
|
const resize_factor rf = static_cast<resize_factor>((first_byte >> 6) & 0x03);
|
@@ -604,7 +575,7 @@ var_opt_sketch<T,S,A> var_opt_sketch<T,S,A>::deserialize(std::istream& is, const
|
|
604
575
|
if (!is.good())
|
605
576
|
throw std::runtime_error("error reading from std::istream");
|
606
577
|
else
|
607
|
-
return var_opt_sketch
|
578
|
+
return var_opt_sketch(k, rf, is_gadget, allocator);
|
608
579
|
}
|
609
580
|
|
610
581
|
// second and third prelongs
|
@@ -668,13 +639,13 @@ var_opt_sketch<T,S,A> var_opt_sketch<T,S,A>::deserialize(std::istream& is, const
|
|
668
639
|
std::move(items), std::move(weights), num_marks_in_h, std::move(marks), allocator);
|
669
640
|
}
|
670
641
|
|
671
|
-
template<typename T, typename
|
672
|
-
bool var_opt_sketch<T,
|
642
|
+
template<typename T, typename A>
|
643
|
+
bool var_opt_sketch<T, A>::is_empty() const {
|
673
644
|
return (h_ == 0 && r_ == 0);
|
674
645
|
}
|
675
646
|
|
676
|
-
template<typename T, typename
|
677
|
-
void var_opt_sketch<T,
|
647
|
+
template<typename T, typename A>
|
648
|
+
void var_opt_sketch<T, A>::reset() {
|
678
649
|
const uint32_t prev_alloc = curr_items_alloc_;
|
679
650
|
const uint32_t ceiling_lg_k = to_log_2(ceiling_power_of_2(k_));
|
680
651
|
const uint32_t initial_lg_size = starting_sub_multiple(ceiling_lg_k, rf_, MIN_LG_ARR_ITEMS);
|
@@ -718,34 +689,34 @@ void var_opt_sketch<T,S,A>::reset() {
|
|
718
689
|
filled_data_ = false;
|
719
690
|
}
|
720
691
|
|
721
|
-
template<typename T, typename
|
722
|
-
uint64_t var_opt_sketch<T,
|
692
|
+
template<typename T, typename A>
|
693
|
+
uint64_t var_opt_sketch<T, A>::get_n() const {
|
723
694
|
return n_;
|
724
695
|
}
|
725
696
|
|
726
|
-
template<typename T, typename
|
727
|
-
uint32_t var_opt_sketch<T,
|
697
|
+
template<typename T, typename A>
|
698
|
+
uint32_t var_opt_sketch<T, A>::get_k() const {
|
728
699
|
return k_;
|
729
700
|
}
|
730
701
|
|
731
|
-
template<typename T, typename
|
732
|
-
uint32_t var_opt_sketch<T,
|
702
|
+
template<typename T, typename A>
|
703
|
+
uint32_t var_opt_sketch<T, A>::get_num_samples() const {
|
733
704
|
const uint32_t num_in_sketch = h_ + r_;
|
734
705
|
return (num_in_sketch < k_ ? num_in_sketch : k_);
|
735
706
|
}
|
736
707
|
|
737
|
-
template<typename T, typename
|
738
|
-
void var_opt_sketch<T,
|
708
|
+
template<typename T, typename A>
|
709
|
+
void var_opt_sketch<T, A>::update(const T& item, double weight) {
|
739
710
|
update(item, weight, false);
|
740
711
|
}
|
741
712
|
|
742
|
-
template<typename T, typename
|
743
|
-
void var_opt_sketch<T,
|
713
|
+
template<typename T, typename A>
|
714
|
+
void var_opt_sketch<T, A>::update(T&& item, double weight) {
|
744
715
|
update(std::move(item), weight, false);
|
745
716
|
}
|
746
717
|
|
747
|
-
template<typename T, typename
|
748
|
-
string<A> var_opt_sketch<T,
|
718
|
+
template<typename T, typename A>
|
719
|
+
string<A> var_opt_sketch<T, A>::to_string() const {
|
749
720
|
// Using a temporary stream for implementation here does not comply with AllocatorAwareContainer requirements.
|
750
721
|
// The stream does not support passing an allocator instance, and alternatives are complicated.
|
751
722
|
std::ostringstream os;
|
@@ -760,8 +731,8 @@ string<A> var_opt_sketch<T,S,A>::to_string() const {
|
|
760
731
|
return string<A>(os.str().c_str(), allocator_);
|
761
732
|
}
|
762
733
|
|
763
|
-
template<typename T, typename
|
764
|
-
string<A> var_opt_sketch<T,
|
734
|
+
template<typename T, typename A>
|
735
|
+
string<A> var_opt_sketch<T, A>::items_to_string() const {
|
765
736
|
// Using a temporary stream for implementation here does not comply with AllocatorAwareContainer requirements.
|
766
737
|
// The stream does not support passing an allocator instance, and alternatives are complicated.
|
767
738
|
std::ostringstream os;
|
@@ -774,8 +745,8 @@ string<A> var_opt_sketch<T,S,A>::items_to_string() const {
|
|
774
745
|
return string<A>(os.str().c_str(), allocator_);
|
775
746
|
}
|
776
747
|
|
777
|
-
template<typename T, typename
|
778
|
-
string<A> var_opt_sketch<T,
|
748
|
+
template<typename T, typename A>
|
749
|
+
string<A> var_opt_sketch<T, A>::items_to_string(bool print_gap) const {
|
779
750
|
// Using a temporary stream for implementation here does not comply with AllocatorAwareContainer requirements.
|
780
751
|
// The stream does not support passing an allocator instance, and alternatives are complicated.
|
781
752
|
std::ostringstream os;
|
@@ -798,9 +769,9 @@ string<A> var_opt_sketch<T,S,A>::items_to_string(bool print_gap) const {
|
|
798
769
|
return string<A>(os.str().c_str(), allocator_);
|
799
770
|
}
|
800
771
|
|
801
|
-
template<typename T, typename
|
772
|
+
template<typename T, typename A>
|
802
773
|
template<typename O>
|
803
|
-
void var_opt_sketch<T,
|
774
|
+
void var_opt_sketch<T, A>::update(O&& item, double weight, bool mark) {
|
804
775
|
if (weight < 0.0 || std::isnan(weight) || std::isinf(weight)) {
|
805
776
|
throw std::invalid_argument("Item weights must be nonnegative and finite. Found: "
|
806
777
|
+ std::to_string(weight));
|
@@ -838,9 +809,9 @@ void var_opt_sketch<T,S,A>::update(O&& item, double weight, bool mark) {
|
|
838
809
|
}
|
839
810
|
}
|
840
811
|
|
841
|
-
template<typename T, typename
|
812
|
+
template<typename T, typename A>
|
842
813
|
template<typename O>
|
843
|
-
void var_opt_sketch<T,
|
814
|
+
void var_opt_sketch<T, A>::update_warmup_phase(O&& item, double weight, bool mark) {
|
844
815
|
// seems overly cautious
|
845
816
|
if (r_ > 0 || m_ != 0 || h_ > k_) throw std::logic_error("invalid sketch state during warmup");
|
846
817
|
|
@@ -868,14 +839,15 @@ void var_opt_sketch<T,S,A>::update_warmup_phase(O&& item, double weight, bool ma
|
|
868
839
|
would appear to the right of the R items in a hypothetical reverse-sorted
|
869
840
|
list. It is easy to prove that it is light enough to be part of this
|
870
841
|
round's downsampling */
|
871
|
-
template<typename T, typename
|
842
|
+
template<typename T, typename A>
|
872
843
|
template<typename O>
|
873
|
-
void var_opt_sketch<T,
|
844
|
+
void var_opt_sketch<T, A>::update_light(O&& item, double weight, bool mark) {
|
874
845
|
if (r_ == 0 || (r_ + h_) != k_) throw std::logic_error("invalid sketch state during light warmup");
|
875
846
|
|
876
847
|
const uint32_t m_slot = h_; // index of the gap, which becomes the M region
|
877
848
|
if (filled_data_) {
|
878
|
-
data_[m_slot]
|
849
|
+
if (&data_[m_slot] != &item)
|
850
|
+
data_[m_slot] = std::forward<O>(item);
|
879
851
|
} else {
|
880
852
|
new (&data_[m_slot]) T(std::forward<O>(item));
|
881
853
|
filled_data_ = true;
|
@@ -895,9 +867,9 @@ void var_opt_sketch<T,S,A>::update_light(O&& item, double weight, bool mark) {
|
|
895
867
|
In other words, it might go into the heap and then come right back out,
|
896
868
|
but that should be okay because pseudo_heavy items cannot predominate
|
897
869
|
in long streams unless (max wt) / (min wt) > o(exp(N)) */
|
898
|
-
template<typename T, typename
|
870
|
+
template<typename T, typename A>
|
899
871
|
template<typename O>
|
900
|
-
void var_opt_sketch<T,
|
872
|
+
void var_opt_sketch<T, A>::update_heavy_general(O&& item, double weight, bool mark) {
|
901
873
|
if (r_ < 2 || m_ != 0 || (r_ + h_) != k_) throw std::logic_error("invalid sketch state during heavy general update");
|
902
874
|
|
903
875
|
// put into H, although may come back out momentarily
|
@@ -909,9 +881,9 @@ void var_opt_sketch<T,S,A>::update_heavy_general(O&& item, double weight, bool m
|
|
909
881
|
/* The analysis of this case is similar to that of the general heavy case.
|
910
882
|
The one small technical difference is that since R < 2, we must grab an M item
|
911
883
|
to have a valid starting point for continue_by_growing_candidate_set () */
|
912
|
-
template<typename T, typename
|
884
|
+
template<typename T, typename A>
|
913
885
|
template<typename O>
|
914
|
-
void var_opt_sketch<T,
|
886
|
+
void var_opt_sketch<T, A>::update_heavy_r_eq1(O&& item, double weight, bool mark) {
|
915
887
|
if (r_ != 1 || m_ != 0 || (r_ + h_) != k_) throw std::logic_error("invalid sketch state during heavy r=1 update");
|
916
888
|
|
917
889
|
push(std::forward<O>(item), weight, mark); // new item into H
|
@@ -929,8 +901,8 @@ void var_opt_sketch<T,S,A>::update_heavy_r_eq1(O&& item, double weight, bool mar
|
|
929
901
|
* <p>Subject to certain pre-conditions, decreasing k causes tau to increase. This fact is used by
|
930
902
|
* the unioning algorithm to force "marked" items out of H and into the reservoir region.</p>
|
931
903
|
*/
|
932
|
-
template<typename T, typename
|
933
|
-
void var_opt_sketch<T,
|
904
|
+
template<typename T, typename A>
|
905
|
+
void var_opt_sketch<T, A>::decrease_k_by_1() {
|
934
906
|
if (k_ <= 1) {
|
935
907
|
throw std::logic_error("Cannot decrease k below 1 in union");
|
936
908
|
}
|
@@ -952,9 +924,10 @@ void var_opt_sketch<T,S,A>::decrease_k_by_1() {
|
|
952
924
|
// first, slide the R zone to the left by 1, temporarily filling the gap
|
953
925
|
const uint32_t old_gap_idx = h_;
|
954
926
|
const uint32_t old_final_r_idx = (h_ + 1 + r_) - 1;
|
955
|
-
|
927
|
+
if (old_final_r_idx != k_) throw std::logic_error("gadget in invalid state");
|
956
928
|
|
957
929
|
swap_values(old_final_r_idx, old_gap_idx);
|
930
|
+
filled_data_ = true; // we just filled the gap, and no need to check previous state
|
958
931
|
|
959
932
|
// now we pull an item out of H; any item is ok, but if we grab the rightmost and then
|
960
933
|
// reduce h_, the heap invariant will be preserved (and the gap will be restored), plus
|
@@ -987,8 +960,8 @@ void var_opt_sketch<T,S,A>::decrease_k_by_1() {
|
|
987
960
|
}
|
988
961
|
}
|
989
962
|
|
990
|
-
template<typename T, typename
|
991
|
-
void var_opt_sketch<T,
|
963
|
+
template<typename T, typename A>
|
964
|
+
void var_opt_sketch<T, A>::allocate_data_arrays(uint32_t tgt_size, bool use_marks) {
|
992
965
|
filled_data_ = false;
|
993
966
|
|
994
967
|
data_ = allocator_.allocate(tgt_size);
|
@@ -1001,8 +974,8 @@ void var_opt_sketch<T,S,A>::allocate_data_arrays(uint32_t tgt_size, bool use_mar
|
|
1001
974
|
}
|
1002
975
|
}
|
1003
976
|
|
1004
|
-
template<typename T, typename
|
1005
|
-
void var_opt_sketch<T,
|
977
|
+
template<typename T, typename A>
|
978
|
+
void var_opt_sketch<T, A>::grow_data_arrays() {
|
1006
979
|
const uint32_t prev_size = curr_items_alloc_;
|
1007
980
|
curr_items_alloc_ = get_adjusted_size(k_, curr_items_alloc_ << rf_);
|
1008
981
|
if (curr_items_alloc_ == k_) {
|
@@ -1038,8 +1011,8 @@ void var_opt_sketch<T,S,A>::grow_data_arrays() {
|
|
1038
1011
|
}
|
1039
1012
|
}
|
1040
1013
|
|
1041
|
-
template<typename T, typename
|
1042
|
-
void var_opt_sketch<T,
|
1014
|
+
template<typename T, typename A>
|
1015
|
+
void var_opt_sketch<T, A>::transition_from_warmup() {
|
1043
1016
|
// Move the 2 lightest items from H to M
|
1044
1017
|
// But the lighter really belongs in R, so update counts to reflect that
|
1045
1018
|
convert_to_heap();
|
@@ -1061,8 +1034,8 @@ void var_opt_sketch<T,S,A>::transition_from_warmup() {
|
|
1061
1034
|
grow_candidate_set(weights_[k_ - 1] + total_wt_r_, 2);
|
1062
1035
|
}
|
1063
1036
|
|
1064
|
-
template<typename T, typename
|
1065
|
-
void var_opt_sketch<T,
|
1037
|
+
template<typename T, typename A>
|
1038
|
+
void var_opt_sketch<T, A>::convert_to_heap() {
|
1066
1039
|
if (h_ < 2) {
|
1067
1040
|
return; // nothing to do
|
1068
1041
|
}
|
@@ -1081,8 +1054,8 @@ void var_opt_sketch<T,S,A>::convert_to_heap() {
|
|
1081
1054
|
//}
|
1082
1055
|
}
|
1083
1056
|
|
1084
|
-
template<typename T, typename
|
1085
|
-
void var_opt_sketch<T,
|
1057
|
+
template<typename T, typename A>
|
1058
|
+
void var_opt_sketch<T, A>::restore_towards_leaves(uint32_t slot_in) {
|
1086
1059
|
const uint32_t last_slot = h_ - 1;
|
1087
1060
|
if (h_ == 0 || slot_in > last_slot) throw std::logic_error("invalid heap state");
|
1088
1061
|
|
@@ -1109,8 +1082,8 @@ void var_opt_sketch<T,S,A>::restore_towards_leaves(uint32_t slot_in) {
|
|
1109
1082
|
}
|
1110
1083
|
}
|
1111
1084
|
|
1112
|
-
template<typename T, typename
|
1113
|
-
void var_opt_sketch<T,
|
1085
|
+
template<typename T, typename A>
|
1086
|
+
void var_opt_sketch<T, A>::restore_towards_root(uint32_t slot_in) {
|
1114
1087
|
uint32_t slot = slot_in;
|
1115
1088
|
uint32_t p = (((slot + 1) / 2) - 1); // valid if slot >= 1
|
1116
1089
|
while ((slot > 0) && (weights_[slot] < weights_[p])) {
|
@@ -1120,11 +1093,12 @@ void var_opt_sketch<T,S,A>::restore_towards_root(uint32_t slot_in) {
|
|
1120
1093
|
}
|
1121
1094
|
}
|
1122
1095
|
|
1123
|
-
template<typename T, typename
|
1096
|
+
template<typename T, typename A>
|
1124
1097
|
template<typename O>
|
1125
|
-
void var_opt_sketch<T,
|
1098
|
+
void var_opt_sketch<T, A>::push(O&& item, double wt, bool mark) {
|
1126
1099
|
if (filled_data_) {
|
1127
|
-
data_[h_]
|
1100
|
+
if (&data_[h_] != &item)
|
1101
|
+
data_[h_] = std::forward<O>(item);
|
1128
1102
|
} else {
|
1129
1103
|
new (&data_[h_]) T(std::forward<O>(item));
|
1130
1104
|
filled_data_ = true;
|
@@ -1139,8 +1113,8 @@ void var_opt_sketch<T,S,A>::push(O&& item, double wt, bool mark) {
|
|
1139
1113
|
restore_towards_root(h_ - 1); // need use old h_, but want accurate h_
|
1140
1114
|
}
|
1141
1115
|
|
1142
|
-
template<typename T, typename
|
1143
|
-
void var_opt_sketch<T,
|
1116
|
+
template<typename T, typename A>
|
1117
|
+
void var_opt_sketch<T, A>::pop_min_to_m_region() {
|
1144
1118
|
if (h_ == 0 || (h_ + m_ + r_ != k_ + 1))
|
1145
1119
|
throw std::logic_error("invalid heap state popping min to M region");
|
1146
1120
|
|
@@ -1164,8 +1138,8 @@ void var_opt_sketch<T,S,A>::pop_min_to_m_region() {
|
|
1164
1138
|
}
|
1165
1139
|
|
1166
1140
|
|
1167
|
-
template<typename T, typename
|
1168
|
-
void var_opt_sketch<T,
|
1141
|
+
template<typename T, typename A>
|
1142
|
+
void var_opt_sketch<T, A>::swap_values(uint32_t src, uint32_t dst) {
|
1169
1143
|
std::swap(data_[src], data_[dst]);
|
1170
1144
|
std::swap(weights_[src], weights_[dst]);
|
1171
1145
|
|
@@ -1182,8 +1156,8 @@ void var_opt_sketch<T,S,A>::swap_values(uint32_t src, uint32_t dst) {
|
|
1182
1156
|
of cands is at least 2. We will now grow the candidate set as much as possible
|
1183
1157
|
by pulling sufficiently light items from h to m.
|
1184
1158
|
*/
|
1185
|
-
template<typename T, typename
|
1186
|
-
void var_opt_sketch<T,
|
1159
|
+
template<typename T, typename A>
|
1160
|
+
void var_opt_sketch<T, A>::grow_candidate_set(double wt_cands, uint32_t num_cands) {
|
1187
1161
|
if ((h_ + m_ + r_ != k_ + 1) || (num_cands < 1) || (num_cands != m_ + r_) || (m_ >= 2))
|
1188
1162
|
throw std::logic_error("invariant violated when growing candidate set");
|
1189
1163
|
|
@@ -1206,8 +1180,8 @@ void var_opt_sketch<T,S,A>::grow_candidate_set(double wt_cands, uint32_t num_can
|
|
1206
1180
|
downsample_candidate_set(wt_cands, num_cands);
|
1207
1181
|
}
|
1208
1182
|
|
1209
|
-
template<typename T, typename
|
1210
|
-
void var_opt_sketch<T,
|
1183
|
+
template<typename T, typename A>
|
1184
|
+
void var_opt_sketch<T, A>::downsample_candidate_set(double wt_cands, uint32_t num_cands) {
|
1211
1185
|
if (num_cands < 2 || h_ + num_cands != k_ + 1)
|
1212
1186
|
throw std::logic_error("invalid num_cands when downsampling");
|
1213
1187
|
|
@@ -1225,17 +1199,16 @@ void var_opt_sketch<T,S,A>::downsample_candidate_set(double wt_cands, uint32_t n
|
|
1225
1199
|
weights_[j] = -1.0;
|
1226
1200
|
}
|
1227
1201
|
|
1228
|
-
// The next
|
1202
|
+
// The next line works even when delete_slot == leftmost_cand_slot
|
1229
1203
|
data_[delete_slot] = std::move(data_[leftmost_cand_slot]);
|
1230
|
-
// cannot set data_[leftmost_cand_slot] to null since not uisng T*
|
1231
1204
|
|
1232
1205
|
m_ = 0;
|
1233
1206
|
r_ = num_cands - 1;
|
1234
1207
|
total_wt_r_ = wt_cands;
|
1235
1208
|
}
|
1236
1209
|
|
1237
|
-
template<typename T, typename
|
1238
|
-
uint32_t var_opt_sketch<T,
|
1210
|
+
template<typename T, typename A>
|
1211
|
+
uint32_t var_opt_sketch<T, A>::choose_delete_slot(double wt_cands, uint32_t num_cands) const {
|
1239
1212
|
if (r_ == 0) throw std::logic_error("choosing delete slot while in exact mode");
|
1240
1213
|
|
1241
1214
|
if (m_ == 0) {
|
@@ -1262,8 +1235,8 @@ uint32_t var_opt_sketch<T,S,A>::choose_delete_slot(double wt_cands, uint32_t num
|
|
1262
1235
|
}
|
1263
1236
|
}
|
1264
1237
|
|
1265
|
-
template<typename T, typename
|
1266
|
-
uint32_t var_opt_sketch<T,
|
1238
|
+
template<typename T, typename A>
|
1239
|
+
uint32_t var_opt_sketch<T, A>::choose_weighted_delete_slot(double wt_cands, uint32_t num_cands) const {
|
1267
1240
|
if (m_ < 1) throw std::logic_error("must have weighted delete slot");
|
1268
1241
|
|
1269
1242
|
const uint32_t offset = h_;
|
@@ -1286,8 +1259,8 @@ uint32_t var_opt_sketch<T,S,A>::choose_weighted_delete_slot(double wt_cands, uin
|
|
1286
1259
|
return final_m + 1;
|
1287
1260
|
}
|
1288
1261
|
|
1289
|
-
template<typename T, typename
|
1290
|
-
uint32_t var_opt_sketch<T,
|
1262
|
+
template<typename T, typename A>
|
1263
|
+
uint32_t var_opt_sketch<T, A>::pick_random_slot_in_r() const {
|
1291
1264
|
if (r_ == 0) throw std::logic_error("r_ = 0 when picking slot in R region");
|
1292
1265
|
const uint32_t offset = h_ + m_;
|
1293
1266
|
if (r_ == 1) {
|
@@ -1297,32 +1270,32 @@ uint32_t var_opt_sketch<T,S,A>::pick_random_slot_in_r() const {
|
|
1297
1270
|
}
|
1298
1271
|
}
|
1299
1272
|
|
1300
|
-
template<typename T, typename
|
1301
|
-
double var_opt_sketch<T,
|
1273
|
+
template<typename T, typename A>
|
1274
|
+
double var_opt_sketch<T, A>::peek_min() const {
|
1302
1275
|
if (h_ == 0) throw std::logic_error("h_ = 0 when checking min in H region");
|
1303
1276
|
return weights_[0];
|
1304
1277
|
}
|
1305
1278
|
|
1306
|
-
template<typename T, typename
|
1307
|
-
inline bool var_opt_sketch<T,
|
1279
|
+
template<typename T, typename A>
|
1280
|
+
inline bool var_opt_sketch<T, A>::is_marked(uint32_t idx) const {
|
1308
1281
|
return marks_ == nullptr ? false : marks_[idx];
|
1309
1282
|
}
|
1310
1283
|
|
1311
|
-
template<typename T, typename
|
1312
|
-
double var_opt_sketch<T,
|
1284
|
+
template<typename T, typename A>
|
1285
|
+
double var_opt_sketch<T, A>::get_tau() const {
|
1313
1286
|
return r_ == 0 ? std::nan("1") : (total_wt_r_ / r_);
|
1314
1287
|
}
|
1315
1288
|
|
1316
|
-
template<typename T, typename
|
1317
|
-
void var_opt_sketch<T,
|
1289
|
+
template<typename T, typename A>
|
1290
|
+
void var_opt_sketch<T, A>::strip_marks() {
|
1318
1291
|
if (marks_ == nullptr) throw std::logic_error("request to strip marks from non-gadget");
|
1319
1292
|
num_marks_in_h_ = 0;
|
1320
1293
|
AllocBool(allocator_).deallocate(marks_, curr_items_alloc_);
|
1321
1294
|
marks_ = nullptr;
|
1322
1295
|
}
|
1323
1296
|
|
1324
|
-
template<typename T, typename
|
1325
|
-
void var_opt_sketch<T,
|
1297
|
+
template<typename T, typename A>
|
1298
|
+
void var_opt_sketch<T, A>::check_preamble_longs(uint8_t preamble_longs, uint8_t flags) {
|
1326
1299
|
const bool is_empty(flags & EMPTY_FLAG_MASK);
|
1327
1300
|
|
1328
1301
|
if (is_empty) {
|
@@ -1342,8 +1315,8 @@ void var_opt_sketch<T,S,A>::check_preamble_longs(uint8_t preamble_longs, uint8_t
|
|
1342
1315
|
}
|
1343
1316
|
}
|
1344
1317
|
|
1345
|
-
template<typename T, typename
|
1346
|
-
void var_opt_sketch<T,
|
1318
|
+
template<typename T, typename A>
|
1319
|
+
void var_opt_sketch<T, A>::check_family_and_serialization_version(uint8_t family_id, uint8_t ser_ver) {
|
1347
1320
|
if (family_id == FAMILY_ID) {
|
1348
1321
|
if (ser_ver != SER_VER) {
|
1349
1322
|
throw std::invalid_argument("Possible corruption: VarOpt serialization version must be "
|
@@ -1357,8 +1330,8 @@ void var_opt_sketch<T,S,A>::check_family_and_serialization_version(uint8_t famil
|
|
1357
1330
|
+ std::to_string(FAMILY_ID) + ". Found: " + std::to_string(family_id));
|
1358
1331
|
}
|
1359
1332
|
|
1360
|
-
template<typename T, typename
|
1361
|
-
uint32_t var_opt_sketch<T,
|
1333
|
+
template<typename T, typename A>
|
1334
|
+
uint32_t var_opt_sketch<T, A>::validate_and_get_target_size(uint32_t preamble_longs, uint32_t k, uint64_t n,
|
1362
1335
|
uint32_t h, uint32_t r, resize_factor rf) {
|
1363
1336
|
if (k == 0 || k > MAX_K) {
|
1364
1337
|
throw std::invalid_argument("k must be at least 1 and less than 2^31 - 1");
|
@@ -1403,9 +1376,9 @@ uint32_t var_opt_sketch<T, S, A>::validate_and_get_target_size(uint32_t preamble
|
|
1403
1376
|
return array_size;
|
1404
1377
|
}
|
1405
1378
|
|
1406
|
-
template<typename T, typename
|
1379
|
+
template<typename T, typename A>
|
1407
1380
|
template<typename P>
|
1408
|
-
subset_summary var_opt_sketch<T,
|
1381
|
+
subset_summary var_opt_sketch<T, A>::estimate_subset_sum(P predicate) const {
|
1409
1382
|
if (n_ == 0) {
|
1410
1383
|
return {0.0, 0.0, 0.0, 0.0};
|
1411
1384
|
}
|
@@ -1451,8 +1424,8 @@ subset_summary var_opt_sketch<T, S, A>::estimate_subset_sum(P predicate) const {
|
|
1451
1424
|
};
|
1452
1425
|
}
|
1453
1426
|
|
1454
|
-
template<typename T, typename
|
1455
|
-
class var_opt_sketch<T,
|
1427
|
+
template<typename T, typename A>
|
1428
|
+
class var_opt_sketch<T, A>::items_deleter {
|
1456
1429
|
public:
|
1457
1430
|
items_deleter(uint32_t num, const A& allocator) : num(num), h_count(0), r_count(0), allocator(allocator) {}
|
1458
1431
|
void set_h(uint32_t h) { h_count = h; }
|
@@ -1480,8 +1453,8 @@ class var_opt_sketch<T, S, A>::items_deleter {
|
|
1480
1453
|
A allocator;
|
1481
1454
|
};
|
1482
1455
|
|
1483
|
-
template<typename T, typename
|
1484
|
-
class var_opt_sketch<T,
|
1456
|
+
template<typename T, typename A>
|
1457
|
+
class var_opt_sketch<T, A>::weights_deleter {
|
1485
1458
|
public:
|
1486
1459
|
weights_deleter(uint32_t num, const A& allocator) : num(num), allocator(allocator) {}
|
1487
1460
|
void operator() (double* ptr) {
|
@@ -1494,8 +1467,8 @@ class var_opt_sketch<T, S, A>::weights_deleter {
|
|
1494
1467
|
AllocDouble allocator;
|
1495
1468
|
};
|
1496
1469
|
|
1497
|
-
template<typename T, typename
|
1498
|
-
class var_opt_sketch<T,
|
1470
|
+
template<typename T, typename A>
|
1471
|
+
class var_opt_sketch<T, A>::marks_deleter {
|
1499
1472
|
public:
|
1500
1473
|
marks_deleter(uint32_t num, const A& allocator) : num(num), allocator(allocator) {}
|
1501
1474
|
void operator() (bool* ptr) {
|
@@ -1509,20 +1482,20 @@ class var_opt_sketch<T, S, A>::marks_deleter {
|
|
1509
1482
|
};
|
1510
1483
|
|
1511
1484
|
|
1512
|
-
template<typename T, typename
|
1513
|
-
typename var_opt_sketch<T,
|
1514
|
-
return
|
1485
|
+
template<typename T, typename A>
|
1486
|
+
typename var_opt_sketch<T, A>::const_iterator var_opt_sketch<T, A>::begin() const {
|
1487
|
+
return const_iterator(*this, false);
|
1515
1488
|
}
|
1516
1489
|
|
1517
|
-
template<typename T, typename
|
1518
|
-
typename var_opt_sketch<T,
|
1519
|
-
return
|
1490
|
+
template<typename T, typename A>
|
1491
|
+
typename var_opt_sketch<T, A>::const_iterator var_opt_sketch<T, A>::end() const {
|
1492
|
+
return const_iterator(*this, true);
|
1520
1493
|
}
|
1521
1494
|
|
1522
1495
|
// -------- var_opt_sketch::const_iterator implementation ---------
|
1523
1496
|
|
1524
|
-
template<typename T, typename
|
1525
|
-
var_opt_sketch<T,
|
1497
|
+
template<typename T, typename A>
|
1498
|
+
var_opt_sketch<T, A>::const_iterator::const_iterator(const var_opt_sketch& sk, bool is_end) :
|
1526
1499
|
sk_(&sk),
|
1527
1500
|
cum_r_weight_(0.0),
|
1528
1501
|
r_item_wt_(sk.get_tau()),
|
@@ -1540,8 +1513,8 @@ var_opt_sketch<T,S,A>::const_iterator::const_iterator(const var_opt_sketch<T,S,A
|
|
1540
1513
|
if (idx_ == final_idx_) { sk_ = nullptr; }
|
1541
1514
|
}
|
1542
1515
|
|
1543
|
-
template<typename T, typename
|
1544
|
-
var_opt_sketch<T,
|
1516
|
+
template<typename T, typename A>
|
1517
|
+
var_opt_sketch<T, A>::const_iterator::const_iterator(const var_opt_sketch& sk, bool is_end, bool use_r_region) :
|
1545
1518
|
sk_(&sk),
|
1546
1519
|
cum_r_weight_(0.0),
|
1547
1520
|
r_item_wt_(sk.get_tau()),
|
@@ -1559,8 +1532,8 @@ var_opt_sketch<T,S,A>::const_iterator::const_iterator(const var_opt_sketch<T,S,A
|
|
1559
1532
|
}
|
1560
1533
|
|
1561
1534
|
|
1562
|
-
template<typename T,
|
1563
|
-
var_opt_sketch<T,
|
1535
|
+
template<typename T, typename A>
|
1536
|
+
var_opt_sketch<T, A>::const_iterator::const_iterator(const const_iterator& other) :
|
1564
1537
|
sk_(other.sk_),
|
1565
1538
|
cum_r_weight_(other.cum_r_weight_),
|
1566
1539
|
r_item_wt_(other.r_item_wt_),
|
@@ -1568,8 +1541,8 @@ var_opt_sketch<T, S, A>::const_iterator::const_iterator(const const_iterator& ot
|
|
1568
1541
|
final_idx_(other.final_idx_)
|
1569
1542
|
{}
|
1570
1543
|
|
1571
|
-
template<typename T,
|
1572
|
-
typename var_opt_sketch<T,
|
1544
|
+
template<typename T, typename A>
|
1545
|
+
typename var_opt_sketch<T, A>::const_iterator& var_opt_sketch<T, A>::const_iterator::operator++() {
|
1573
1546
|
++idx_;
|
1574
1547
|
|
1575
1548
|
if (idx_ == final_idx_) {
|
@@ -1582,27 +1555,27 @@ typename var_opt_sketch<T, S, A>::const_iterator& var_opt_sketch<T, S, A>::const
|
|
1582
1555
|
return *this;
|
1583
1556
|
}
|
1584
1557
|
|
1585
|
-
template<typename T,
|
1586
|
-
typename var_opt_sketch<T,
|
1558
|
+
template<typename T, typename A>
|
1559
|
+
typename var_opt_sketch<T, A>::const_iterator& var_opt_sketch<T, A>::const_iterator::operator++(int) {
|
1587
1560
|
const_iterator tmp(*this);
|
1588
1561
|
operator++();
|
1589
1562
|
return tmp;
|
1590
1563
|
}
|
1591
1564
|
|
1592
|
-
template<typename T, typename
|
1593
|
-
bool var_opt_sketch<T,
|
1565
|
+
template<typename T, typename A>
|
1566
|
+
bool var_opt_sketch<T, A>::const_iterator::operator==(const const_iterator& other) const {
|
1594
1567
|
if (sk_ != other.sk_) return false;
|
1595
1568
|
if (sk_ == nullptr) return true; // end (and we know other.sk_ is also null)
|
1596
1569
|
return idx_ == other.idx_;
|
1597
1570
|
}
|
1598
1571
|
|
1599
|
-
template<typename T, typename
|
1600
|
-
bool var_opt_sketch<T,
|
1572
|
+
template<typename T, typename A>
|
1573
|
+
bool var_opt_sketch<T, A>::const_iterator::operator!=(const const_iterator& other) const {
|
1601
1574
|
return !operator==(other);
|
1602
1575
|
}
|
1603
1576
|
|
1604
|
-
template<typename T, typename
|
1605
|
-
const std::pair<const T&, const double> var_opt_sketch<T,
|
1577
|
+
template<typename T, typename A>
|
1578
|
+
const std::pair<const T&, const double> var_opt_sketch<T, A>::const_iterator::operator*() const {
|
1606
1579
|
double wt;
|
1607
1580
|
if (idx_ < sk_->h_) {
|
1608
1581
|
wt = sk_->weights_[idx_];
|
@@ -1612,16 +1585,16 @@ const std::pair<const T&, const double> var_opt_sketch<T, S, A>::const_iterator:
|
|
1612
1585
|
return std::pair<const T&, const double>(sk_->data_[idx_], wt);
|
1613
1586
|
}
|
1614
1587
|
|
1615
|
-
template<typename T, typename
|
1616
|
-
bool var_opt_sketch<T,
|
1588
|
+
template<typename T, typename A>
|
1589
|
+
bool var_opt_sketch<T, A>::const_iterator::get_mark() const {
|
1617
1590
|
return sk_->marks_ == nullptr ? false : sk_->marks_[idx_];
|
1618
1591
|
}
|
1619
1592
|
|
1620
1593
|
|
1621
1594
|
// -------- var_opt_sketch::iterator implementation ---------
|
1622
1595
|
|
1623
|
-
template<typename T, typename
|
1624
|
-
var_opt_sketch<T,
|
1596
|
+
template<typename T, typename A>
|
1597
|
+
var_opt_sketch<T, A>::iterator::iterator(const var_opt_sketch& sk, bool is_end, bool use_r_region) :
|
1625
1598
|
sk_(&sk),
|
1626
1599
|
cum_r_weight_(0.0),
|
1627
1600
|
r_item_wt_(sk.get_tau()),
|
@@ -1638,8 +1611,8 @@ var_opt_sketch<T,S,A>::iterator::iterator(const var_opt_sketch<T,S,A>& sk, bool
|
|
1638
1611
|
if (idx_ == final_idx_) { sk_ = nullptr; }
|
1639
1612
|
}
|
1640
1613
|
|
1641
|
-
template<typename T,
|
1642
|
-
var_opt_sketch<T,
|
1614
|
+
template<typename T, typename A>
|
1615
|
+
var_opt_sketch<T, A>::iterator::iterator(const iterator& other) :
|
1643
1616
|
sk_(other.sk_),
|
1644
1617
|
cum_r_weight_(other.cum_r_weight_),
|
1645
1618
|
r_item_wt_(other.r_item_wt_),
|
@@ -1647,8 +1620,8 @@ var_opt_sketch<T, S, A>::iterator::iterator(const iterator& other) :
|
|
1647
1620
|
final_idx_(other.final_idx_)
|
1648
1621
|
{}
|
1649
1622
|
|
1650
|
-
template<typename T,
|
1651
|
-
typename var_opt_sketch<T,
|
1623
|
+
template<typename T, typename A>
|
1624
|
+
typename var_opt_sketch<T, A>::iterator& var_opt_sketch<T, A>::iterator::operator++() {
|
1652
1625
|
++idx_;
|
1653
1626
|
|
1654
1627
|
if (idx_ == final_idx_) {
|
@@ -1661,27 +1634,27 @@ typename var_opt_sketch<T, S, A>::iterator& var_opt_sketch<T, S, A>::iterator::o
|
|
1661
1634
|
return *this;
|
1662
1635
|
}
|
1663
1636
|
|
1664
|
-
template<typename T,
|
1665
|
-
typename var_opt_sketch<T,
|
1637
|
+
template<typename T, typename A>
|
1638
|
+
typename var_opt_sketch<T, A>::iterator& var_opt_sketch<T, A>::iterator::operator++(int) {
|
1666
1639
|
const_iterator tmp(*this);
|
1667
1640
|
operator++();
|
1668
1641
|
return tmp;
|
1669
1642
|
}
|
1670
1643
|
|
1671
|
-
template<typename T, typename
|
1672
|
-
bool var_opt_sketch<T,
|
1644
|
+
template<typename T, typename A>
|
1645
|
+
bool var_opt_sketch<T, A>::iterator::operator==(const iterator& other) const {
|
1673
1646
|
if (sk_ != other.sk_) return false;
|
1674
1647
|
if (sk_ == nullptr) return true; // end (and we know other.sk_ is also null)
|
1675
1648
|
return idx_ == other.idx_;
|
1676
1649
|
}
|
1677
1650
|
|
1678
|
-
template<typename T, typename
|
1679
|
-
bool var_opt_sketch<T,
|
1651
|
+
template<typename T, typename A>
|
1652
|
+
bool var_opt_sketch<T, A>::iterator::operator!=(const iterator& other) const {
|
1680
1653
|
return !operator==(other);
|
1681
1654
|
}
|
1682
1655
|
|
1683
|
-
template<typename T, typename
|
1684
|
-
std::pair<T&, double> var_opt_sketch<T,
|
1656
|
+
template<typename T, typename A>
|
1657
|
+
std::pair<T&, double> var_opt_sketch<T, A>::iterator::operator*() {
|
1685
1658
|
double wt;
|
1686
1659
|
if (idx_ < sk_->h_) {
|
1687
1660
|
wt = sk_->weights_[idx_];
|
@@ -1693,8 +1666,8 @@ std::pair<T&, double> var_opt_sketch<T, S, A>::iterator::operator*() {
|
|
1693
1666
|
return std::pair<T&, double>(sk_->data_[idx_], wt);
|
1694
1667
|
}
|
1695
1668
|
|
1696
|
-
template<typename T, typename
|
1697
|
-
bool var_opt_sketch<T,
|
1669
|
+
template<typename T, typename A>
|
1670
|
+
bool var_opt_sketch<T, A>::iterator::get_mark() const {
|
1698
1671
|
return sk_->marks_ == nullptr ? false : sk_->marks_[idx_];
|
1699
1672
|
}
|
1700
1673
|
|
@@ -1702,40 +1675,40 @@ bool var_opt_sketch<T, S, A>::iterator::get_mark() const {
|
|
1702
1675
|
* Checks if target sampling allocation is more than 50% of max sampling size.
|
1703
1676
|
* If so, returns max sampling size, otherwise passes through target size.
|
1704
1677
|
*/
|
1705
|
-
template<typename T, typename
|
1706
|
-
uint32_t var_opt_sketch<T,
|
1707
|
-
if (max_size
|
1678
|
+
template<typename T, typename A>
|
1679
|
+
uint32_t var_opt_sketch<T, A>::get_adjusted_size(uint32_t max_size, uint32_t resize_target) {
|
1680
|
+
if (max_size < (resize_target << 1)) {
|
1708
1681
|
return max_size;
|
1709
1682
|
}
|
1710
1683
|
return resize_target;
|
1711
1684
|
}
|
1712
1685
|
|
1713
|
-
template<typename T, typename
|
1714
|
-
uint32_t var_opt_sketch<T,
|
1686
|
+
template<typename T, typename A>
|
1687
|
+
uint32_t var_opt_sketch<T, A>::starting_sub_multiple(uint32_t lg_target, uint32_t lg_rf, uint32_t lg_min) {
|
1715
1688
|
return (lg_target <= lg_min)
|
1716
1689
|
? lg_min : (lg_rf == 0) ? lg_target
|
1717
1690
|
: (lg_target - lg_min) % lg_rf + lg_min;
|
1718
1691
|
}
|
1719
1692
|
|
1720
|
-
template<typename T, typename
|
1721
|
-
double var_opt_sketch<T,
|
1693
|
+
template<typename T, typename A>
|
1694
|
+
double var_opt_sketch<T, A>::pseudo_hypergeometric_ub_on_p(uint64_t n, uint32_t k, double sampling_rate) {
|
1722
1695
|
const double adjusted_kappa = DEFAULT_KAPPA * sqrt(1 - sampling_rate);
|
1723
1696
|
return bounds_binomial_proportions::approximate_upper_bound_on_p(n, k, adjusted_kappa);
|
1724
1697
|
}
|
1725
1698
|
|
1726
|
-
template<typename T, typename
|
1727
|
-
double var_opt_sketch<T,
|
1699
|
+
template<typename T, typename A>
|
1700
|
+
double var_opt_sketch<T, A>::pseudo_hypergeometric_lb_on_p(uint64_t n, uint32_t k, double sampling_rate) {
|
1728
1701
|
const double adjusted_kappa = DEFAULT_KAPPA * sqrt(1 - sampling_rate);
|
1729
1702
|
return bounds_binomial_proportions::approximate_lower_bound_on_p(n, k, adjusted_kappa);
|
1730
1703
|
}
|
1731
1704
|
|
1732
|
-
template<typename T, typename
|
1733
|
-
bool var_opt_sketch<T,
|
1705
|
+
template<typename T, typename A>
|
1706
|
+
bool var_opt_sketch<T, A>::is_power_of_2(uint32_t v) {
|
1734
1707
|
return v && !(v & (v - 1));
|
1735
1708
|
}
|
1736
1709
|
|
1737
|
-
template<typename T, typename
|
1738
|
-
uint32_t var_opt_sketch<T,
|
1710
|
+
template<typename T, typename A>
|
1711
|
+
uint32_t var_opt_sketch<T, A>::to_log_2(uint32_t v) {
|
1739
1712
|
if (is_power_of_2(v)) {
|
1740
1713
|
return count_trailing_zeros_in_u32(v);
|
1741
1714
|
} else {
|
@@ -1744,14 +1717,14 @@ uint32_t var_opt_sketch<T,S,A>::to_log_2(uint32_t v) {
|
|
1744
1717
|
}
|
1745
1718
|
|
1746
1719
|
// Returns an integer in the range [0, max_value) -- excludes max_value
|
1747
|
-
template<typename T, typename
|
1748
|
-
uint32_t var_opt_sketch<T,
|
1720
|
+
template<typename T, typename A>
|
1721
|
+
uint32_t var_opt_sketch<T, A>::next_int(uint32_t max_value) {
|
1749
1722
|
std::uniform_int_distribution<uint32_t> dist(0, max_value - 1);
|
1750
1723
|
return dist(random_utils::rand);
|
1751
1724
|
}
|
1752
1725
|
|
1753
|
-
template<typename T, typename
|
1754
|
-
double var_opt_sketch<T,
|
1726
|
+
template<typename T, typename A>
|
1727
|
+
double var_opt_sketch<T, A>::next_double_exclude_zero() {
|
1755
1728
|
double r = random_utils::next_double(random_utils::rand);
|
1756
1729
|
while (r == 0.0) {
|
1757
1730
|
r = random_utils::next_double(random_utils::rand);
|