datasketches 0.1.2 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +5 -0
- data/ext/datasketches/cpc_wrapper.cpp +12 -13
- data/ext/datasketches/ext.cpp +1 -1
- data/ext/datasketches/ext.h +4 -0
- data/ext/datasketches/extconf.rb +1 -1
- data/ext/datasketches/fi_wrapper.cpp +6 -8
- data/ext/datasketches/hll_wrapper.cpp +13 -14
- data/ext/datasketches/kll_wrapper.cpp +28 -76
- data/ext/datasketches/theta_wrapper.cpp +27 -41
- data/ext/datasketches/vo_wrapper.cpp +4 -6
- data/lib/datasketches/version.rb +1 -1
- data/vendor/datasketches-cpp/CMakeLists.txt +1 -0
- data/vendor/datasketches-cpp/README.md +4 -4
- data/vendor/datasketches-cpp/common/include/MurmurHash3.h +7 -0
- data/vendor/datasketches-cpp/common/include/memory_operations.hpp +12 -0
- data/vendor/datasketches-cpp/common/test/CMakeLists.txt +24 -0
- data/vendor/datasketches-cpp/common/test/integration_test.cpp +77 -0
- data/vendor/datasketches-cpp/common/test/test_allocator.hpp +9 -1
- data/vendor/datasketches-cpp/cpc/include/cpc_common.hpp +3 -0
- data/vendor/datasketches-cpp/cpc/include/cpc_compressor.hpp +2 -2
- data/vendor/datasketches-cpp/cpc/include/cpc_compressor_impl.hpp +28 -19
- data/vendor/datasketches-cpp/cpc/include/cpc_sketch.hpp +8 -5
- data/vendor/datasketches-cpp/cpc/include/cpc_sketch_impl.hpp +19 -14
- data/vendor/datasketches-cpp/cpc/include/cpc_union.hpp +2 -2
- data/vendor/datasketches-cpp/cpc/include/cpc_union_impl.hpp +6 -6
- data/vendor/datasketches-cpp/cpc/include/cpc_util.hpp +0 -6
- data/vendor/datasketches-cpp/cpc/include/icon_estimator.hpp +3 -3
- data/vendor/datasketches-cpp/cpc/include/u32_table.hpp +3 -3
- data/vendor/datasketches-cpp/cpc/include/u32_table_impl.hpp +9 -9
- data/vendor/datasketches-cpp/cpc/test/CMakeLists.txt +1 -0
- data/vendor/datasketches-cpp/cpc/test/cpc_sketch_allocation_test.cpp +237 -0
- data/vendor/datasketches-cpp/fi/include/frequent_items_sketch.hpp +15 -10
- data/vendor/datasketches-cpp/fi/include/frequent_items_sketch_impl.hpp +40 -28
- data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map.hpp +19 -13
- data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map_impl.hpp +140 -124
- data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_custom_type_test.cpp +15 -12
- data/vendor/datasketches-cpp/fi/test/reverse_purge_hash_map_test.cpp +3 -3
- data/vendor/datasketches-cpp/hll/CMakeLists.txt +3 -0
- data/vendor/datasketches-cpp/hll/include/AuxHashMap-internal.hpp +32 -57
- data/vendor/datasketches-cpp/hll/include/AuxHashMap.hpp +9 -8
- data/vendor/datasketches-cpp/hll/include/CompositeInterpolationXTable.hpp +2 -2
- data/vendor/datasketches-cpp/hll/include/CouponHashSet-internal.hpp +34 -48
- data/vendor/datasketches-cpp/hll/include/CouponHashSet.hpp +10 -10
- data/vendor/datasketches-cpp/hll/include/CouponList-internal.hpp +45 -77
- data/vendor/datasketches-cpp/hll/include/CouponList.hpp +11 -12
- data/vendor/datasketches-cpp/hll/include/CubicInterpolation.hpp +2 -2
- data/vendor/datasketches-cpp/hll/include/HarmonicNumbers.hpp +2 -2
- data/vendor/datasketches-cpp/hll/include/Hll4Array-internal.hpp +15 -14
- data/vendor/datasketches-cpp/hll/include/Hll4Array.hpp +1 -1
- data/vendor/datasketches-cpp/hll/include/Hll6Array-internal.hpp +10 -21
- data/vendor/datasketches-cpp/hll/include/Hll6Array.hpp +2 -3
- data/vendor/datasketches-cpp/hll/include/Hll8Array-internal.hpp +10 -21
- data/vendor/datasketches-cpp/hll/include/Hll8Array.hpp +2 -3
- data/vendor/datasketches-cpp/hll/include/HllArray-internal.hpp +28 -55
- data/vendor/datasketches-cpp/hll/include/HllArray.hpp +8 -8
- data/vendor/datasketches-cpp/hll/include/HllSketch-internal.hpp +9 -11
- data/vendor/datasketches-cpp/hll/include/HllSketchImpl.hpp +2 -1
- data/vendor/datasketches-cpp/hll/include/HllSketchImplFactory.hpp +34 -31
- data/vendor/datasketches-cpp/hll/include/HllUnion-internal.hpp +3 -28
- data/vendor/datasketches-cpp/hll/include/HllUtil.hpp +1 -1
- data/vendor/datasketches-cpp/hll/include/RelativeErrorTables.hpp +1 -1
- data/vendor/datasketches-cpp/hll/include/hll.hpp +6 -34
- data/vendor/datasketches-cpp/hll/test/AuxHashMapTest.cpp +7 -7
- data/vendor/datasketches-cpp/hll/test/CouponHashSetTest.cpp +2 -2
- data/vendor/datasketches-cpp/hll/test/CouponListTest.cpp +3 -3
- data/vendor/datasketches-cpp/hll/test/HllArrayTest.cpp +2 -2
- data/vendor/datasketches-cpp/hll/test/HllSketchTest.cpp +46 -50
- data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator.hpp +1 -1
- data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator_impl.hpp +3 -3
- data/vendor/datasketches-cpp/kll/include/kll_sketch.hpp +10 -3
- data/vendor/datasketches-cpp/kll/include/kll_sketch_impl.hpp +93 -75
- data/vendor/datasketches-cpp/kll/test/kll_sketch_custom_type_test.cpp +11 -10
- data/vendor/datasketches-cpp/kll/test/kll_sketch_test.cpp +45 -42
- data/vendor/datasketches-cpp/python/CMakeLists.txt +2 -0
- data/vendor/datasketches-cpp/python/README.md +6 -3
- data/vendor/datasketches-cpp/python/src/datasketches.cpp +2 -0
- data/vendor/datasketches-cpp/python/src/hll_wrapper.cpp +0 -2
- data/vendor/datasketches-cpp/python/src/kll_wrapper.cpp +3 -1
- data/vendor/datasketches-cpp/python/src/req_wrapper.cpp +246 -0
- data/vendor/datasketches-cpp/python/src/theta_wrapper.cpp +36 -26
- data/vendor/datasketches-cpp/python/tests/hll_test.py +0 -1
- data/vendor/datasketches-cpp/python/tests/kll_test.py +3 -3
- data/vendor/datasketches-cpp/python/tests/req_test.py +126 -0
- data/vendor/datasketches-cpp/python/tests/theta_test.py +28 -3
- data/vendor/datasketches-cpp/req/CMakeLists.txt +60 -0
- data/vendor/datasketches-cpp/{tuple/include/theta_a_not_b_experimental_impl.hpp → req/include/req_common.hpp} +17 -8
- data/vendor/datasketches-cpp/req/include/req_compactor.hpp +137 -0
- data/vendor/datasketches-cpp/req/include/req_compactor_impl.hpp +501 -0
- data/vendor/datasketches-cpp/req/include/req_quantile_calculator.hpp +69 -0
- data/vendor/datasketches-cpp/req/include/req_quantile_calculator_impl.hpp +60 -0
- data/vendor/datasketches-cpp/req/include/req_sketch.hpp +395 -0
- data/vendor/datasketches-cpp/req/include/req_sketch_impl.hpp +810 -0
- data/vendor/datasketches-cpp/req/test/CMakeLists.txt +43 -0
- data/vendor/datasketches-cpp/req/test/req_float_empty_from_java.sk +0 -0
- data/vendor/datasketches-cpp/req/test/req_float_estimation_from_java.sk +0 -0
- data/vendor/datasketches-cpp/req/test/req_float_exact_from_java.sk +0 -0
- data/vendor/datasketches-cpp/req/test/req_float_raw_items_from_java.sk +0 -0
- data/vendor/datasketches-cpp/req/test/req_float_single_item_from_java.sk +0 -0
- data/vendor/datasketches-cpp/req/test/req_sketch_custom_type_test.cpp +128 -0
- data/vendor/datasketches-cpp/req/test/req_sketch_test.cpp +494 -0
- data/vendor/datasketches-cpp/sampling/include/var_opt_sketch.hpp +10 -9
- data/vendor/datasketches-cpp/sampling/include/var_opt_sketch_impl.hpp +82 -70
- data/vendor/datasketches-cpp/sampling/include/var_opt_union.hpp +5 -5
- data/vendor/datasketches-cpp/sampling/include/var_opt_union_impl.hpp +7 -7
- data/vendor/datasketches-cpp/sampling/test/CMakeLists.txt +1 -0
- data/vendor/datasketches-cpp/sampling/test/var_opt_allocation_test.cpp +96 -0
- data/vendor/datasketches-cpp/sampling/test/var_opt_union_test.cpp +0 -31
- data/vendor/datasketches-cpp/setup.py +5 -3
- data/vendor/datasketches-cpp/theta/CMakeLists.txt +30 -3
- data/vendor/datasketches-cpp/{tuple → theta}/include/bounds_on_ratios_in_sampled_sets.hpp +2 -1
- data/vendor/datasketches-cpp/{tuple → theta}/include/bounds_on_ratios_in_theta_sketched_sets.hpp +1 -1
- data/vendor/datasketches-cpp/theta/include/theta_a_not_b.hpp +12 -29
- data/vendor/datasketches-cpp/theta/include/theta_a_not_b_impl.hpp +5 -46
- data/vendor/datasketches-cpp/{tuple → theta}/include/theta_comparators.hpp +0 -0
- data/vendor/datasketches-cpp/{tuple → theta}/include/theta_constants.hpp +2 -0
- data/vendor/datasketches-cpp/{tuple → theta}/include/theta_helpers.hpp +0 -0
- data/vendor/datasketches-cpp/theta/include/theta_intersection.hpp +22 -29
- data/vendor/datasketches-cpp/{tuple → theta}/include/theta_intersection_base.hpp +0 -0
- data/vendor/datasketches-cpp/{tuple → theta}/include/theta_intersection_base_impl.hpp +0 -0
- data/vendor/datasketches-cpp/theta/include/theta_intersection_impl.hpp +8 -90
- data/vendor/datasketches-cpp/{tuple/test/theta_union_experimental_test.cpp → theta/include/theta_jaccard_similarity.hpp} +11 -18
- data/vendor/datasketches-cpp/{tuple/include/jaccard_similarity.hpp → theta/include/theta_jaccard_similarity_base.hpp} +6 -22
- data/vendor/datasketches-cpp/{tuple → theta}/include/theta_set_difference_base.hpp +0 -0
- data/vendor/datasketches-cpp/{tuple → theta}/include/theta_set_difference_base_impl.hpp +5 -0
- data/vendor/datasketches-cpp/theta/include/theta_sketch.hpp +132 -266
- data/vendor/datasketches-cpp/theta/include/theta_sketch_impl.hpp +200 -650
- data/vendor/datasketches-cpp/theta/include/theta_union.hpp +27 -60
- data/vendor/datasketches-cpp/{tuple → theta}/include/theta_union_base.hpp +1 -1
- data/vendor/datasketches-cpp/{tuple → theta}/include/theta_union_base_impl.hpp +5 -0
- data/vendor/datasketches-cpp/theta/include/theta_union_impl.hpp +13 -69
- data/vendor/datasketches-cpp/{tuple → theta}/include/theta_update_sketch_base.hpp +3 -19
- data/vendor/datasketches-cpp/{tuple → theta}/include/theta_update_sketch_base_impl.hpp +6 -1
- data/vendor/datasketches-cpp/theta/test/CMakeLists.txt +1 -0
- data/vendor/datasketches-cpp/{tuple → theta}/test/theta_jaccard_similarity_test.cpp +2 -3
- data/vendor/datasketches-cpp/theta/test/theta_sketch_test.cpp +37 -234
- data/vendor/datasketches-cpp/tuple/CMakeLists.txt +3 -35
- data/vendor/datasketches-cpp/tuple/include/tuple_jaccard_similarity.hpp +38 -0
- data/vendor/datasketches-cpp/tuple/include/tuple_sketch.hpp +28 -13
- data/vendor/datasketches-cpp/tuple/include/tuple_sketch_impl.hpp +6 -6
- data/vendor/datasketches-cpp/tuple/test/CMakeLists.txt +1 -6
- data/vendor/datasketches-cpp/tuple/test/tuple_a_not_b_test.cpp +1 -4
- data/vendor/datasketches-cpp/tuple/test/tuple_intersection_test.cpp +1 -4
- data/vendor/datasketches-cpp/tuple/test/tuple_jaccard_similarity_test.cpp +2 -1
- data/vendor/datasketches-cpp/tuple/test/tuple_sketch_allocation_test.cpp +2 -2
- data/vendor/datasketches-cpp/tuple/test/tuple_union_test.cpp +1 -4
- metadata +43 -34
- data/vendor/datasketches-cpp/tuple/include/theta_a_not_b_experimental.hpp +0 -53
- data/vendor/datasketches-cpp/tuple/include/theta_intersection_experimental.hpp +0 -78
- data/vendor/datasketches-cpp/tuple/include/theta_intersection_experimental_impl.hpp +0 -43
- data/vendor/datasketches-cpp/tuple/include/theta_sketch_experimental.hpp +0 -393
- data/vendor/datasketches-cpp/tuple/include/theta_sketch_experimental_impl.hpp +0 -481
- data/vendor/datasketches-cpp/tuple/include/theta_union_experimental.hpp +0 -88
- data/vendor/datasketches-cpp/tuple/include/theta_union_experimental_impl.hpp +0 -47
- data/vendor/datasketches-cpp/tuple/test/theta_a_not_b_experimental_test.cpp +0 -250
- data/vendor/datasketches-cpp/tuple/test/theta_compact_empty_from_java.sk +0 -0
- data/vendor/datasketches-cpp/tuple/test/theta_compact_estimation_from_java.sk +0 -0
- data/vendor/datasketches-cpp/tuple/test/theta_compact_single_item_from_java.sk +0 -0
- data/vendor/datasketches-cpp/tuple/test/theta_intersection_experimental_test.cpp +0 -224
- data/vendor/datasketches-cpp/tuple/test/theta_sketch_experimental_test.cpp +0 -247
@@ -24,20 +24,20 @@
|
|
24
24
|
|
25
25
|
namespace datasketches {
|
26
26
|
|
27
|
-
template<typename A
|
27
|
+
template<typename A>
|
28
28
|
class CouponHashSet : public CouponList<A> {
|
29
29
|
public:
|
30
|
-
static CouponHashSet* newSet(const void* bytes, size_t len);
|
31
|
-
static CouponHashSet* newSet(std::istream& is);
|
32
|
-
|
33
|
-
|
34
|
-
explicit CouponHashSet(const CouponHashSet& that);
|
30
|
+
static CouponHashSet* newSet(const void* bytes, size_t len, const A& allocator);
|
31
|
+
static CouponHashSet* newSet(std::istream& is, const A& allocator);
|
32
|
+
CouponHashSet(int lgConfigK, target_hll_type tgtHllType, const A& allocator);
|
33
|
+
CouponHashSet(const CouponHashSet& that, target_hll_type tgtHllType);
|
35
34
|
|
36
|
-
virtual ~CouponHashSet();
|
35
|
+
virtual ~CouponHashSet() = default;
|
37
36
|
virtual std::function<void(HllSketchImpl<A>*)> get_deleter() const;
|
38
37
|
|
39
38
|
protected:
|
40
|
-
|
39
|
+
using vector_int = std::vector<int, typename std::allocator_traits<A>::template rebind_alloc<int>>;
|
40
|
+
|
41
41
|
virtual CouponHashSet* copy() const;
|
42
42
|
virtual CouponHashSet* copyAs(target_hll_type tgtHllType) const;
|
43
43
|
|
@@ -49,9 +49,9 @@ class CouponHashSet : public CouponList<A> {
|
|
49
49
|
friend class HllSketchImplFactory<A>;
|
50
50
|
|
51
51
|
private:
|
52
|
-
|
52
|
+
using ChsAlloc = typename std::allocator_traits<A>::template rebind_alloc<CouponHashSet<A>>;
|
53
53
|
bool checkGrowOrPromote();
|
54
|
-
void growHashSet(int
|
54
|
+
void growHashSet(int tgtLgCoupArrSize);
|
55
55
|
};
|
56
56
|
|
57
57
|
}
|
@@ -23,6 +23,7 @@
|
|
23
23
|
#include "CouponList.hpp"
|
24
24
|
#include "CubicInterpolation.hpp"
|
25
25
|
#include "HllUtil.hpp"
|
26
|
+
#include "count_zeros.hpp"
|
26
27
|
|
27
28
|
#include <algorithm>
|
28
29
|
#include <cmath>
|
@@ -30,74 +31,45 @@
|
|
30
31
|
namespace datasketches {
|
31
32
|
|
32
33
|
template<typename A>
|
33
|
-
CouponList<A>::CouponList(const int lgConfigK, const target_hll_type tgtHllType, const hll_mode mode)
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
}
|
40
|
-
oooFlag = false;
|
41
|
-
const int arrayLen = 1 << lgCouponArrInts;
|
42
|
-
typedef typename std::allocator_traits<A>::template rebind_alloc<int> intAlloc;
|
43
|
-
couponIntArr = intAlloc().allocate(arrayLen);
|
44
|
-
std::fill(couponIntArr, couponIntArr + arrayLen, 0);
|
45
|
-
couponCount = 0;
|
46
|
-
}
|
47
|
-
|
48
|
-
template<typename A>
|
49
|
-
CouponList<A>::CouponList(const CouponList& that)
|
50
|
-
: HllSketchImpl<A>(that.lgConfigK, that.tgtHllType, that.mode, false),
|
51
|
-
lgCouponArrInts(that.lgCouponArrInts),
|
52
|
-
couponCount(that.couponCount),
|
53
|
-
oooFlag(that.oooFlag) {
|
54
|
-
|
55
|
-
const int numItems = 1 << lgCouponArrInts;
|
56
|
-
typedef typename std::allocator_traits<A>::template rebind_alloc<int> intAlloc;
|
57
|
-
couponIntArr = intAlloc().allocate(numItems);
|
58
|
-
std::copy(that.couponIntArr, that.couponIntArr + numItems, couponIntArr);
|
59
|
-
}
|
60
|
-
|
61
|
-
template<typename A>
|
62
|
-
CouponList<A>::CouponList(const CouponList& that, const target_hll_type tgtHllType)
|
63
|
-
: HllSketchImpl<A>(that.lgConfigK, tgtHllType, that.mode, false),
|
64
|
-
lgCouponArrInts(that.lgCouponArrInts),
|
65
|
-
couponCount(that.couponCount),
|
66
|
-
oooFlag(that.oooFlag) {
|
67
|
-
|
68
|
-
const int numItems = 1 << lgCouponArrInts;
|
69
|
-
typedef typename std::allocator_traits<A>::template rebind_alloc<int> intAlloc;
|
70
|
-
couponIntArr = intAlloc().allocate(numItems);
|
71
|
-
std::copy(that.couponIntArr, that.couponIntArr + numItems, couponIntArr);
|
72
|
-
}
|
34
|
+
CouponList<A>::CouponList(const int lgConfigK, const target_hll_type tgtHllType, const hll_mode mode, const A& allocator):
|
35
|
+
HllSketchImpl<A>(lgConfigK, tgtHllType, mode, false),
|
36
|
+
couponCount(0),
|
37
|
+
oooFlag(false),
|
38
|
+
coupons(1 << (mode == hll_mode::LIST ? HllUtil<A>::LG_INIT_LIST_SIZE : HllUtil<A>::LG_INIT_SET_SIZE), 0, allocator)
|
39
|
+
{}
|
73
40
|
|
74
41
|
template<typename A>
|
75
|
-
CouponList<A
|
76
|
-
|
77
|
-
|
78
|
-
|
42
|
+
CouponList<A>::CouponList(const CouponList& that, const target_hll_type tgtHllType):
|
43
|
+
HllSketchImpl<A>(that.lgConfigK, tgtHllType, that.mode, false),
|
44
|
+
couponCount(that.couponCount),
|
45
|
+
oooFlag(that.oooFlag),
|
46
|
+
coupons(that.coupons)
|
47
|
+
{}
|
79
48
|
|
80
49
|
template<typename A>
|
81
50
|
std::function<void(HllSketchImpl<A>*)> CouponList<A>::get_deleter() const {
|
82
51
|
return [](HllSketchImpl<A>* ptr) {
|
83
52
|
CouponList<A>* cl = static_cast<CouponList<A>*>(ptr);
|
53
|
+
ClAlloc cla(cl->getAllocator());
|
84
54
|
cl->~CouponList();
|
85
|
-
|
55
|
+
cla.deallocate(cl, 1);
|
86
56
|
};
|
87
57
|
}
|
88
58
|
|
89
59
|
template<typename A>
|
90
60
|
CouponList<A>* CouponList<A>::copy() const {
|
91
|
-
|
61
|
+
ClAlloc cla(coupons.get_allocator());
|
62
|
+
return new (cla.allocate(1)) CouponList<A>(*this);
|
92
63
|
}
|
93
64
|
|
94
65
|
template<typename A>
|
95
66
|
CouponList<A>* CouponList<A>::copyAs(target_hll_type tgtHllType) const {
|
96
|
-
|
67
|
+
ClAlloc cla(coupons.get_allocator());
|
68
|
+
return new (cla.allocate(1)) CouponList<A>(*this, tgtHllType);
|
97
69
|
}
|
98
70
|
|
99
71
|
template<typename A>
|
100
|
-
CouponList<A>* CouponList<A>::newList(const void* bytes, size_t len) {
|
72
|
+
CouponList<A>* CouponList<A>::newList(const void* bytes, size_t len, const A& allocator) {
|
101
73
|
if (len < HllUtil<A>::LIST_INT_ARR_START) {
|
102
74
|
throw std::out_of_range("Input data length insufficient to hold CouponHashSet");
|
103
75
|
}
|
@@ -115,7 +87,7 @@ CouponList<A>* CouponList<A>::newList(const void* bytes, size_t len) {
|
|
115
87
|
|
116
88
|
hll_mode mode = HllSketchImpl<A>::extractCurMode(data[HllUtil<A>::MODE_BYTE]);
|
117
89
|
if (mode != LIST) {
|
118
|
-
throw std::invalid_argument("Calling
|
90
|
+
throw std::invalid_argument("Calling list constructor with non-list mode data");
|
119
91
|
}
|
120
92
|
|
121
93
|
target_hll_type tgtHllType = HllSketchImpl<A>::extractTgtHllType(data[HllUtil<A>::MODE_BYTE]);
|
@@ -133,20 +105,21 @@ CouponList<A>* CouponList<A>::newList(const void* bytes, size_t len) {
|
|
133
105
|
+ ", found: " + std::to_string(len));
|
134
106
|
}
|
135
107
|
|
136
|
-
|
108
|
+
ClAlloc cla(allocator);
|
109
|
+
CouponList<A>* sketch = new (cla.allocate(1)) CouponList<A>(lgK, tgtHllType, mode, allocator);
|
137
110
|
sketch->couponCount = couponCount;
|
138
111
|
sketch->putOutOfOrderFlag(oooFlag); // should always be false for LIST
|
139
112
|
|
140
113
|
if (!emptyFlag) {
|
141
114
|
// only need to read valid coupons, unlike in stream case
|
142
|
-
std::memcpy(sketch->
|
115
|
+
std::memcpy(sketch->coupons.data(), data + HllUtil<A>::LIST_INT_ARR_START, couponCount * sizeof(int));
|
143
116
|
}
|
144
117
|
|
145
118
|
return sketch;
|
146
119
|
}
|
147
120
|
|
148
121
|
template<typename A>
|
149
|
-
CouponList<A>* CouponList<A>::newList(std::istream& is) {
|
122
|
+
CouponList<A>* CouponList<A>::newList(std::istream& is, const A& allocator) {
|
150
123
|
uint8_t listHeader[8];
|
151
124
|
is.read((char*)listHeader, 8 * sizeof(uint8_t));
|
152
125
|
|
@@ -162,7 +135,7 @@ CouponList<A>* CouponList<A>::newList(std::istream& is) {
|
|
162
135
|
|
163
136
|
hll_mode mode = HllSketchImpl<A>::extractCurMode(listHeader[HllUtil<A>::MODE_BYTE]);
|
164
137
|
if (mode != LIST) {
|
165
|
-
throw std::invalid_argument("Calling list
|
138
|
+
throw std::invalid_argument("Calling list constructor with non-list mode data");
|
166
139
|
}
|
167
140
|
|
168
141
|
const target_hll_type tgtHllType = HllSketchImpl<A>::extractTgtHllType(listHeader[HllUtil<A>::MODE_BYTE]);
|
@@ -172,8 +145,9 @@ CouponList<A>* CouponList<A>::newList(std::istream& is) {
|
|
172
145
|
const bool oooFlag = ((listHeader[HllUtil<A>::FLAGS_BYTE] & HllUtil<A>::OUT_OF_ORDER_FLAG_MASK) ? true : false);
|
173
146
|
const bool emptyFlag = ((listHeader[HllUtil<A>::FLAGS_BYTE] & HllUtil<A>::EMPTY_FLAG_MASK) ? true : false);
|
174
147
|
|
175
|
-
|
176
|
-
|
148
|
+
ClAlloc cla(allocator);
|
149
|
+
CouponList<A>* sketch = new (cla.allocate(1)) CouponList<A>(lgK, tgtHllType, mode, allocator);
|
150
|
+
using coupon_list_ptr = std::unique_ptr<CouponList<A>, std::function<void(HllSketchImpl<A>*)>>;
|
177
151
|
coupon_list_ptr ptr(sketch, sketch->get_deleter());
|
178
152
|
const int couponCount = listHeader[HllUtil<A>::LIST_COUNT_BYTE];
|
179
153
|
sketch->couponCount = couponCount;
|
@@ -183,8 +157,8 @@ CouponList<A>* CouponList<A>::newList(std::istream& is) {
|
|
183
157
|
// For stream processing, need to read entire number written to stream so read
|
184
158
|
// pointer ends up set correctly.
|
185
159
|
// If not compact, still need to read empty items even though in order.
|
186
|
-
const int numToRead = (compact ? couponCount :
|
187
|
-
is.read((char*)sketch->
|
160
|
+
const int numToRead = (compact ? couponCount : sketch->coupons.size());
|
161
|
+
is.read((char*)sketch->coupons.data(), numToRead * sizeof(int));
|
188
162
|
}
|
189
163
|
|
190
164
|
if (!is.good())
|
@@ -196,14 +170,14 @@ CouponList<A>* CouponList<A>::newList(std::istream& is) {
|
|
196
170
|
template<typename A>
|
197
171
|
vector_u8<A> CouponList<A>::serialize(bool compact, unsigned header_size_bytes) const {
|
198
172
|
const size_t sketchSizeBytes = (compact ? getCompactSerializationBytes() : getUpdatableSerializationBytes()) + header_size_bytes;
|
199
|
-
vector_u8<A> byteArr(sketchSizeBytes);
|
173
|
+
vector_u8<A> byteArr(sketchSizeBytes, 0, getAllocator());
|
200
174
|
uint8_t* bytes = byteArr.data() + header_size_bytes;
|
201
175
|
|
202
176
|
bytes[HllUtil<A>::PREAMBLE_INTS_BYTE] = static_cast<uint8_t>(getPreInts());
|
203
177
|
bytes[HllUtil<A>::SER_VER_BYTE] = static_cast<uint8_t>(HllUtil<A>::SER_VER);
|
204
178
|
bytes[HllUtil<A>::FAMILY_BYTE] = static_cast<uint8_t>(HllUtil<A>::FAMILY_ID);
|
205
179
|
bytes[HllUtil<A>::LG_K_BYTE] = static_cast<uint8_t>(this->lgConfigK);
|
206
|
-
bytes[HllUtil<A>::LG_ARR_BYTE] =
|
180
|
+
bytes[HllUtil<A>::LG_ARR_BYTE] = count_trailing_zeros_in_u32(coupons.size());
|
207
181
|
bytes[HllUtil<A>::FLAGS_BYTE] = this->makeFlagsByte(compact);
|
208
182
|
bytes[HllUtil<A>::LIST_COUNT_BYTE] = static_cast<uint8_t>(this->mode == LIST ? couponCount : 0);
|
209
183
|
bytes[HllUtil<A>::MODE_BYTE] = this->makeModeByte();
|
@@ -217,7 +191,7 @@ vector_u8<A> CouponList<A>::serialize(bool compact, unsigned header_size_bytes)
|
|
217
191
|
const int sw = (isCompact() ? 2 : 0) | (compact ? 1 : 0);
|
218
192
|
switch (sw) {
|
219
193
|
case 0: { // src updatable, dst updatable
|
220
|
-
std::memcpy(bytes + getMemDataStart(),
|
194
|
+
std::memcpy(bytes + getMemDataStart(), coupons.data(), coupons.size() * sizeof(int));
|
221
195
|
break;
|
222
196
|
}
|
223
197
|
case 1: { // src updatable, dst compact
|
@@ -247,7 +221,7 @@ void CouponList<A>::serialize(std::ostream& os, const bool compact) const {
|
|
247
221
|
os.write((char*)&familyId, sizeof(familyId));
|
248
222
|
const uint8_t lgKByte((uint8_t) this->lgConfigK);
|
249
223
|
os.write((char*)&lgKByte, sizeof(lgKByte));
|
250
|
-
const uint8_t lgArrIntsByte((
|
224
|
+
const uint8_t lgArrIntsByte(count_trailing_zeros_in_u32(coupons.size()));
|
251
225
|
os.write((char*)&lgArrIntsByte, sizeof(lgArrIntsByte));
|
252
226
|
const uint8_t flagsByte(this->makeFlagsByte(compact));
|
253
227
|
os.write((char*)&flagsByte, sizeof(flagsByte));
|
@@ -273,7 +247,7 @@ void CouponList<A>::serialize(std::ostream& os, const bool compact) const {
|
|
273
247
|
const int sw = (isCompact() ? 2 : 0) | (compact ? 1 : 0);
|
274
248
|
switch (sw) {
|
275
249
|
case 0: { // src updatable, dst updatable
|
276
|
-
os.write((char*)
|
250
|
+
os.write((char*)coupons.data(), coupons.size() * sizeof(int));
|
277
251
|
break;
|
278
252
|
}
|
279
253
|
case 1: { // src updatable, dst compact
|
@@ -292,13 +266,12 @@ void CouponList<A>::serialize(std::ostream& os, const bool compact) const {
|
|
292
266
|
|
293
267
|
template<typename A>
|
294
268
|
HllSketchImpl<A>* CouponList<A>::couponUpdate(int coupon) {
|
295
|
-
|
296
|
-
|
297
|
-
const int couponAtIdx = couponIntArr[i];
|
269
|
+
for (size_t i = 0; i < coupons.size(); ++i) { // search for empty slot
|
270
|
+
const int couponAtIdx = coupons[i];
|
298
271
|
if (couponAtIdx == HllUtil<A>::EMPTY) {
|
299
|
-
|
272
|
+
coupons[i] = coupon; // the actual update
|
300
273
|
++couponCount;
|
301
|
-
if (couponCount
|
274
|
+
if (couponCount == static_cast<int>(coupons.size())) { // array full
|
302
275
|
if (this->lgConfigK < 8) {
|
303
276
|
return promoteHeapListOrSetToHll(*this);
|
304
277
|
}
|
@@ -348,7 +321,7 @@ bool CouponList<A>::isEmpty() const { return getCouponCount() == 0; }
|
|
348
321
|
|
349
322
|
template<typename A>
|
350
323
|
int CouponList<A>::getUpdatableSerializationBytes() const {
|
351
|
-
return getMemDataStart() + (
|
324
|
+
return getMemDataStart() + coupons.size() * sizeof(int);
|
352
325
|
}
|
353
326
|
|
354
327
|
template<typename A>
|
@@ -383,13 +356,8 @@ void CouponList<A>::putOutOfOrderFlag(bool oooFlag) {
|
|
383
356
|
}
|
384
357
|
|
385
358
|
template<typename A>
|
386
|
-
|
387
|
-
return
|
388
|
-
}
|
389
|
-
|
390
|
-
template<typename A>
|
391
|
-
int* CouponList<A>::getCouponIntArr() const {
|
392
|
-
return couponIntArr;
|
359
|
+
A CouponList<A>::getAllocator() const {
|
360
|
+
return coupons.get_allocator();
|
393
361
|
}
|
394
362
|
|
395
363
|
template<typename A>
|
@@ -404,12 +372,12 @@ HllSketchImpl<A>* CouponList<A>::promoteHeapListOrSetToHll(CouponList& src) {
|
|
404
372
|
|
405
373
|
template<typename A>
|
406
374
|
coupon_iterator<A> CouponList<A>::begin(bool all) const {
|
407
|
-
return coupon_iterator<A>(
|
375
|
+
return coupon_iterator<A>(coupons.data(), coupons.size(), 0, all);
|
408
376
|
}
|
409
377
|
|
410
378
|
template<typename A>
|
411
379
|
coupon_iterator<A> CouponList<A>::end() const {
|
412
|
-
return coupon_iterator<A>(
|
380
|
+
return coupon_iterator<A>(coupons.data(), coupons.size(), coupons.size(), false);
|
413
381
|
}
|
414
382
|
|
415
383
|
}
|
@@ -30,19 +30,18 @@ namespace datasketches {
|
|
30
30
|
template<typename A>
|
31
31
|
class HllSketchImplFactory;
|
32
32
|
|
33
|
-
template<typename A
|
33
|
+
template<typename A>
|
34
34
|
class CouponList : public HllSketchImpl<A> {
|
35
35
|
public:
|
36
|
-
|
37
|
-
|
38
|
-
explicit CouponList(const CouponList& that, target_hll_type tgtHllType);
|
36
|
+
CouponList(int lgConfigK, target_hll_type tgtHllType, hll_mode mode, const A& allocator);
|
37
|
+
CouponList(const CouponList& that, target_hll_type tgtHllType);
|
39
38
|
|
40
|
-
static CouponList* newList(const void* bytes, size_t len);
|
41
|
-
static CouponList* newList(std::istream& is);
|
39
|
+
static CouponList* newList(const void* bytes, size_t len, const A& allocator);
|
40
|
+
static CouponList* newList(std::istream& is, const A& allocator);
|
42
41
|
virtual vector_u8<A> serialize(bool compact, unsigned header_size_bytes) const;
|
43
42
|
virtual void serialize(std::ostream& os, bool compact) const;
|
44
43
|
|
45
|
-
virtual ~CouponList();
|
44
|
+
virtual ~CouponList() = default;
|
46
45
|
virtual std::function<void(HllSketchImpl<A>*)> get_deleter() const;
|
47
46
|
|
48
47
|
virtual CouponList* copy() const;
|
@@ -62,7 +61,9 @@ class CouponList : public HllSketchImpl<A> {
|
|
62
61
|
coupon_iterator<A> end() const;
|
63
62
|
|
64
63
|
protected:
|
65
|
-
|
64
|
+
using ClAlloc = typename std::allocator_traits<A>::template rebind_alloc<CouponList<A>>;
|
65
|
+
|
66
|
+
using vector_int = std::vector<int, typename std::allocator_traits<A>::template rebind_alloc<int>>;
|
66
67
|
|
67
68
|
HllSketchImpl<A>* promoteHeapListToSet(CouponList& list);
|
68
69
|
HllSketchImpl<A>* promoteHeapListOrSetToHll(CouponList& src);
|
@@ -75,13 +76,11 @@ class CouponList : public HllSketchImpl<A> {
|
|
75
76
|
virtual bool isOutOfOrderFlag() const;
|
76
77
|
virtual void putOutOfOrderFlag(bool oooFlag);
|
77
78
|
|
78
|
-
virtual
|
79
|
-
virtual int* getCouponIntArr() const;
|
79
|
+
virtual A getAllocator() const;
|
80
80
|
|
81
|
-
int lgCouponArrInts;
|
82
81
|
int couponCount;
|
83
82
|
bool oooFlag;
|
84
|
-
|
83
|
+
vector_int coupons;
|
85
84
|
|
86
85
|
friend class HllSketchImplFactory<A>;
|
87
86
|
};
|
@@ -24,7 +24,7 @@
|
|
24
24
|
|
25
25
|
namespace datasketches {
|
26
26
|
|
27
|
-
template<typename A = std::allocator<
|
27
|
+
template<typename A = std::allocator<uint8_t>>
|
28
28
|
class CubicInterpolation {
|
29
29
|
public:
|
30
30
|
static double usingXAndYTables(const double xArr[], const double yArr[],
|
@@ -40,4 +40,4 @@ class CubicInterpolation {
|
|
40
40
|
|
41
41
|
#include "CubicInterpolation-internal.hpp"
|
42
42
|
|
43
|
-
#endif /* _CUBICINTERPOLATION_HPP_ */
|
43
|
+
#endif /* _CUBICINTERPOLATION_HPP_ */
|
@@ -25,7 +25,7 @@
|
|
25
25
|
|
26
26
|
namespace datasketches {
|
27
27
|
|
28
|
-
template<typename A = std::allocator<
|
28
|
+
template<typename A = std::allocator<uint8_t>>
|
29
29
|
class HarmonicNumbers {
|
30
30
|
public:
|
31
31
|
/**
|
@@ -45,4 +45,4 @@ class HarmonicNumbers {
|
|
45
45
|
|
46
46
|
#include "HarmonicNumbers-internal.hpp"
|
47
47
|
|
48
|
-
#endif /* _HARMONICNUMBERS_HPP_ */
|
48
|
+
#endif /* _HARMONICNUMBERS_HPP_ */
|
@@ -30,13 +30,12 @@
|
|
30
30
|
namespace datasketches {
|
31
31
|
|
32
32
|
template<typename A>
|
33
|
-
Hll4Array<A>::Hll4Array(const int lgConfigK, const bool startFullSize)
|
34
|
-
|
33
|
+
Hll4Array<A>::Hll4Array(const int lgConfigK, const bool startFullSize, const A& allocator):
|
34
|
+
HllArray<A>(lgConfigK, target_hll_type::HLL_4, startFullSize, allocator),
|
35
|
+
auxHashMap(nullptr)
|
36
|
+
{
|
35
37
|
const int numBytes = this->hll4ArrBytes(lgConfigK);
|
36
|
-
|
37
|
-
this->hllByteArr = uint8Alloc().allocate(numBytes);
|
38
|
-
std::fill(this->hllByteArr, this->hllByteArr + numBytes, 0);
|
39
|
-
auxHashMap = nullptr;
|
38
|
+
this->hllByteArr.resize(numBytes, 0);
|
40
39
|
}
|
41
40
|
|
42
41
|
template<typename A>
|
@@ -63,17 +62,19 @@ Hll4Array<A>::~Hll4Array() {
|
|
63
62
|
template<typename A>
|
64
63
|
std::function<void(HllSketchImpl<A>*)> Hll4Array<A>::get_deleter() const {
|
65
64
|
return [](HllSketchImpl<A>* ptr) {
|
66
|
-
typedef typename std::allocator_traits<A>::template rebind_alloc<Hll4Array<A>> hll4Alloc;
|
67
65
|
Hll4Array<A>* hll = static_cast<Hll4Array<A>*>(ptr);
|
66
|
+
using Hll4Alloc = typename std::allocator_traits<A>::template rebind_alloc<Hll4Array<A>>;
|
67
|
+
Hll4Alloc hll4Alloc(hll->getAllocator());
|
68
68
|
hll->~Hll4Array();
|
69
|
-
hll4Alloc
|
69
|
+
hll4Alloc.deallocate(hll, 1);
|
70
70
|
};
|
71
71
|
}
|
72
72
|
|
73
73
|
template<typename A>
|
74
74
|
Hll4Array<A>* Hll4Array<A>::copy() const {
|
75
|
-
|
76
|
-
|
75
|
+
using Hll4Alloc = typename std::allocator_traits<A>::template rebind_alloc<Hll4Array<A>>;
|
76
|
+
Hll4Alloc hll4Alloc(this->getAllocator());
|
77
|
+
return new (hll4Alloc.allocate(1)) Hll4Array<A>(*this);
|
77
78
|
}
|
78
79
|
|
79
80
|
template<typename A>
|
@@ -195,7 +196,7 @@ void Hll4Array<A>::internalHll4Update(const int slotNo, const int newVal) {
|
|
195
196
|
// added to the exception table
|
196
197
|
putSlot(slotNo, HllUtil<A>::AUX_TOKEN);
|
197
198
|
if (auxHashMap == nullptr) {
|
198
|
-
auxHashMap = AuxHashMap<A>::newAuxHashMap(HllUtil<A>::LG_AUX_ARR_INTS[this->lgConfigK], this->lgConfigK);
|
199
|
+
auxHashMap = AuxHashMap<A>::newAuxHashMap(HllUtil<A>::LG_AUX_ARR_INTS[this->lgConfigK], this->lgConfigK, this->getAllocator());
|
199
200
|
}
|
200
201
|
auxHashMap->mustAdd(slotNo, newVal);
|
201
202
|
}
|
@@ -285,7 +286,7 @@ void Hll4Array<A>::shiftToBiggerCurMin() {
|
|
285
286
|
} else { //newShiftedVal >= AUX_TOKEN
|
286
287
|
// the former exception remains an exception, so must be added to the newAuxMap
|
287
288
|
if (newAuxMap == nullptr) {
|
288
|
-
newAuxMap = AuxHashMap<A>::newAuxHashMap(HllUtil<A>::LG_AUX_ARR_INTS[this->lgConfigK], this->lgConfigK);
|
289
|
+
newAuxMap = AuxHashMap<A>::newAuxHashMap(HllUtil<A>::LG_AUX_ARR_INTS[this->lgConfigK], this->lgConfigK, this->getAllocator());
|
289
290
|
}
|
290
291
|
newAuxMap->mustAdd(slotNum, oldActualVal);
|
291
292
|
}
|
@@ -315,12 +316,12 @@ void Hll4Array<A>::shiftToBiggerCurMin() {
|
|
315
316
|
|
316
317
|
template<typename A>
|
317
318
|
typename HllArray<A>::const_iterator Hll4Array<A>::begin(bool all) const {
|
318
|
-
return typename HllArray<A>::const_iterator(this->hllByteArr, 1 << this->lgConfigK, 0, this->tgtHllType, auxHashMap, this->curMin, all);
|
319
|
+
return typename HllArray<A>::const_iterator(this->hllByteArr.data(), 1 << this->lgConfigK, 0, this->tgtHllType, auxHashMap, this->curMin, all);
|
319
320
|
}
|
320
321
|
|
321
322
|
template<typename A>
|
322
323
|
typename HllArray<A>::const_iterator Hll4Array<A>::end() const {
|
323
|
-
return typename HllArray<A>::const_iterator(this->hllByteArr, 1 << this->lgConfigK, 1 << this->lgConfigK, this->tgtHllType, auxHashMap, this->curMin, false);
|
324
|
+
return typename HllArray<A>::const_iterator(this->hllByteArr.data(), 1 << this->lgConfigK, 1 << this->lgConfigK, this->tgtHllType, auxHashMap, this->curMin, false);
|
324
325
|
}
|
325
326
|
|
326
327
|
template<typename A>
|