datasketches 0.1.2 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +5 -0
- data/ext/datasketches/cpc_wrapper.cpp +12 -13
- data/ext/datasketches/ext.cpp +1 -1
- data/ext/datasketches/ext.h +4 -0
- data/ext/datasketches/extconf.rb +1 -1
- data/ext/datasketches/fi_wrapper.cpp +6 -8
- data/ext/datasketches/hll_wrapper.cpp +13 -14
- data/ext/datasketches/kll_wrapper.cpp +28 -76
- data/ext/datasketches/theta_wrapper.cpp +27 -41
- data/ext/datasketches/vo_wrapper.cpp +4 -6
- data/lib/datasketches/version.rb +1 -1
- data/vendor/datasketches-cpp/CMakeLists.txt +1 -0
- data/vendor/datasketches-cpp/README.md +4 -4
- data/vendor/datasketches-cpp/common/include/MurmurHash3.h +7 -0
- data/vendor/datasketches-cpp/common/include/memory_operations.hpp +12 -0
- data/vendor/datasketches-cpp/common/test/CMakeLists.txt +24 -0
- data/vendor/datasketches-cpp/common/test/integration_test.cpp +77 -0
- data/vendor/datasketches-cpp/common/test/test_allocator.hpp +9 -1
- data/vendor/datasketches-cpp/cpc/include/cpc_common.hpp +3 -0
- data/vendor/datasketches-cpp/cpc/include/cpc_compressor.hpp +2 -2
- data/vendor/datasketches-cpp/cpc/include/cpc_compressor_impl.hpp +28 -19
- data/vendor/datasketches-cpp/cpc/include/cpc_sketch.hpp +8 -5
- data/vendor/datasketches-cpp/cpc/include/cpc_sketch_impl.hpp +19 -14
- data/vendor/datasketches-cpp/cpc/include/cpc_union.hpp +2 -2
- data/vendor/datasketches-cpp/cpc/include/cpc_union_impl.hpp +6 -6
- data/vendor/datasketches-cpp/cpc/include/cpc_util.hpp +0 -6
- data/vendor/datasketches-cpp/cpc/include/icon_estimator.hpp +3 -3
- data/vendor/datasketches-cpp/cpc/include/u32_table.hpp +3 -3
- data/vendor/datasketches-cpp/cpc/include/u32_table_impl.hpp +9 -9
- data/vendor/datasketches-cpp/cpc/test/CMakeLists.txt +1 -0
- data/vendor/datasketches-cpp/cpc/test/cpc_sketch_allocation_test.cpp +237 -0
- data/vendor/datasketches-cpp/fi/include/frequent_items_sketch.hpp +15 -10
- data/vendor/datasketches-cpp/fi/include/frequent_items_sketch_impl.hpp +40 -28
- data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map.hpp +19 -13
- data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map_impl.hpp +140 -124
- data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_custom_type_test.cpp +15 -12
- data/vendor/datasketches-cpp/fi/test/reverse_purge_hash_map_test.cpp +3 -3
- data/vendor/datasketches-cpp/hll/CMakeLists.txt +3 -0
- data/vendor/datasketches-cpp/hll/include/AuxHashMap-internal.hpp +32 -57
- data/vendor/datasketches-cpp/hll/include/AuxHashMap.hpp +9 -8
- data/vendor/datasketches-cpp/hll/include/CompositeInterpolationXTable.hpp +2 -2
- data/vendor/datasketches-cpp/hll/include/CouponHashSet-internal.hpp +34 -48
- data/vendor/datasketches-cpp/hll/include/CouponHashSet.hpp +10 -10
- data/vendor/datasketches-cpp/hll/include/CouponList-internal.hpp +45 -77
- data/vendor/datasketches-cpp/hll/include/CouponList.hpp +11 -12
- data/vendor/datasketches-cpp/hll/include/CubicInterpolation.hpp +2 -2
- data/vendor/datasketches-cpp/hll/include/HarmonicNumbers.hpp +2 -2
- data/vendor/datasketches-cpp/hll/include/Hll4Array-internal.hpp +15 -14
- data/vendor/datasketches-cpp/hll/include/Hll4Array.hpp +1 -1
- data/vendor/datasketches-cpp/hll/include/Hll6Array-internal.hpp +10 -21
- data/vendor/datasketches-cpp/hll/include/Hll6Array.hpp +2 -3
- data/vendor/datasketches-cpp/hll/include/Hll8Array-internal.hpp +10 -21
- data/vendor/datasketches-cpp/hll/include/Hll8Array.hpp +2 -3
- data/vendor/datasketches-cpp/hll/include/HllArray-internal.hpp +28 -55
- data/vendor/datasketches-cpp/hll/include/HllArray.hpp +8 -8
- data/vendor/datasketches-cpp/hll/include/HllSketch-internal.hpp +9 -11
- data/vendor/datasketches-cpp/hll/include/HllSketchImpl.hpp +2 -1
- data/vendor/datasketches-cpp/hll/include/HllSketchImplFactory.hpp +34 -31
- data/vendor/datasketches-cpp/hll/include/HllUnion-internal.hpp +3 -28
- data/vendor/datasketches-cpp/hll/include/HllUtil.hpp +1 -1
- data/vendor/datasketches-cpp/hll/include/RelativeErrorTables.hpp +1 -1
- data/vendor/datasketches-cpp/hll/include/hll.hpp +6 -34
- data/vendor/datasketches-cpp/hll/test/AuxHashMapTest.cpp +7 -7
- data/vendor/datasketches-cpp/hll/test/CouponHashSetTest.cpp +2 -2
- data/vendor/datasketches-cpp/hll/test/CouponListTest.cpp +3 -3
- data/vendor/datasketches-cpp/hll/test/HllArrayTest.cpp +2 -2
- data/vendor/datasketches-cpp/hll/test/HllSketchTest.cpp +46 -50
- data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator.hpp +1 -1
- data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator_impl.hpp +3 -3
- data/vendor/datasketches-cpp/kll/include/kll_sketch.hpp +10 -3
- data/vendor/datasketches-cpp/kll/include/kll_sketch_impl.hpp +93 -75
- data/vendor/datasketches-cpp/kll/test/kll_sketch_custom_type_test.cpp +11 -10
- data/vendor/datasketches-cpp/kll/test/kll_sketch_test.cpp +45 -42
- data/vendor/datasketches-cpp/python/CMakeLists.txt +2 -0
- data/vendor/datasketches-cpp/python/README.md +6 -3
- data/vendor/datasketches-cpp/python/src/datasketches.cpp +2 -0
- data/vendor/datasketches-cpp/python/src/hll_wrapper.cpp +0 -2
- data/vendor/datasketches-cpp/python/src/kll_wrapper.cpp +3 -1
- data/vendor/datasketches-cpp/python/src/req_wrapper.cpp +246 -0
- data/vendor/datasketches-cpp/python/src/theta_wrapper.cpp +36 -26
- data/vendor/datasketches-cpp/python/tests/hll_test.py +0 -1
- data/vendor/datasketches-cpp/python/tests/kll_test.py +3 -3
- data/vendor/datasketches-cpp/python/tests/req_test.py +126 -0
- data/vendor/datasketches-cpp/python/tests/theta_test.py +28 -3
- data/vendor/datasketches-cpp/req/CMakeLists.txt +60 -0
- data/vendor/datasketches-cpp/{tuple/include/theta_a_not_b_experimental_impl.hpp → req/include/req_common.hpp} +17 -8
- data/vendor/datasketches-cpp/req/include/req_compactor.hpp +137 -0
- data/vendor/datasketches-cpp/req/include/req_compactor_impl.hpp +501 -0
- data/vendor/datasketches-cpp/req/include/req_quantile_calculator.hpp +69 -0
- data/vendor/datasketches-cpp/req/include/req_quantile_calculator_impl.hpp +60 -0
- data/vendor/datasketches-cpp/req/include/req_sketch.hpp +395 -0
- data/vendor/datasketches-cpp/req/include/req_sketch_impl.hpp +810 -0
- data/vendor/datasketches-cpp/req/test/CMakeLists.txt +43 -0
- data/vendor/datasketches-cpp/req/test/req_float_empty_from_java.sk +0 -0
- data/vendor/datasketches-cpp/req/test/req_float_estimation_from_java.sk +0 -0
- data/vendor/datasketches-cpp/req/test/req_float_exact_from_java.sk +0 -0
- data/vendor/datasketches-cpp/req/test/req_float_raw_items_from_java.sk +0 -0
- data/vendor/datasketches-cpp/req/test/req_float_single_item_from_java.sk +0 -0
- data/vendor/datasketches-cpp/req/test/req_sketch_custom_type_test.cpp +128 -0
- data/vendor/datasketches-cpp/req/test/req_sketch_test.cpp +494 -0
- data/vendor/datasketches-cpp/sampling/include/var_opt_sketch.hpp +10 -9
- data/vendor/datasketches-cpp/sampling/include/var_opt_sketch_impl.hpp +82 -70
- data/vendor/datasketches-cpp/sampling/include/var_opt_union.hpp +5 -5
- data/vendor/datasketches-cpp/sampling/include/var_opt_union_impl.hpp +7 -7
- data/vendor/datasketches-cpp/sampling/test/CMakeLists.txt +1 -0
- data/vendor/datasketches-cpp/sampling/test/var_opt_allocation_test.cpp +96 -0
- data/vendor/datasketches-cpp/sampling/test/var_opt_union_test.cpp +0 -31
- data/vendor/datasketches-cpp/setup.py +5 -3
- data/vendor/datasketches-cpp/theta/CMakeLists.txt +30 -3
- data/vendor/datasketches-cpp/{tuple → theta}/include/bounds_on_ratios_in_sampled_sets.hpp +2 -1
- data/vendor/datasketches-cpp/{tuple → theta}/include/bounds_on_ratios_in_theta_sketched_sets.hpp +1 -1
- data/vendor/datasketches-cpp/theta/include/theta_a_not_b.hpp +12 -29
- data/vendor/datasketches-cpp/theta/include/theta_a_not_b_impl.hpp +5 -46
- data/vendor/datasketches-cpp/{tuple → theta}/include/theta_comparators.hpp +0 -0
- data/vendor/datasketches-cpp/{tuple → theta}/include/theta_constants.hpp +2 -0
- data/vendor/datasketches-cpp/{tuple → theta}/include/theta_helpers.hpp +0 -0
- data/vendor/datasketches-cpp/theta/include/theta_intersection.hpp +22 -29
- data/vendor/datasketches-cpp/{tuple → theta}/include/theta_intersection_base.hpp +0 -0
- data/vendor/datasketches-cpp/{tuple → theta}/include/theta_intersection_base_impl.hpp +0 -0
- data/vendor/datasketches-cpp/theta/include/theta_intersection_impl.hpp +8 -90
- data/vendor/datasketches-cpp/{tuple/test/theta_union_experimental_test.cpp → theta/include/theta_jaccard_similarity.hpp} +11 -18
- data/vendor/datasketches-cpp/{tuple/include/jaccard_similarity.hpp → theta/include/theta_jaccard_similarity_base.hpp} +6 -22
- data/vendor/datasketches-cpp/{tuple → theta}/include/theta_set_difference_base.hpp +0 -0
- data/vendor/datasketches-cpp/{tuple → theta}/include/theta_set_difference_base_impl.hpp +5 -0
- data/vendor/datasketches-cpp/theta/include/theta_sketch.hpp +132 -266
- data/vendor/datasketches-cpp/theta/include/theta_sketch_impl.hpp +200 -650
- data/vendor/datasketches-cpp/theta/include/theta_union.hpp +27 -60
- data/vendor/datasketches-cpp/{tuple → theta}/include/theta_union_base.hpp +1 -1
- data/vendor/datasketches-cpp/{tuple → theta}/include/theta_union_base_impl.hpp +5 -0
- data/vendor/datasketches-cpp/theta/include/theta_union_impl.hpp +13 -69
- data/vendor/datasketches-cpp/{tuple → theta}/include/theta_update_sketch_base.hpp +3 -19
- data/vendor/datasketches-cpp/{tuple → theta}/include/theta_update_sketch_base_impl.hpp +6 -1
- data/vendor/datasketches-cpp/theta/test/CMakeLists.txt +1 -0
- data/vendor/datasketches-cpp/{tuple → theta}/test/theta_jaccard_similarity_test.cpp +2 -3
- data/vendor/datasketches-cpp/theta/test/theta_sketch_test.cpp +37 -234
- data/vendor/datasketches-cpp/tuple/CMakeLists.txt +3 -35
- data/vendor/datasketches-cpp/tuple/include/tuple_jaccard_similarity.hpp +38 -0
- data/vendor/datasketches-cpp/tuple/include/tuple_sketch.hpp +28 -13
- data/vendor/datasketches-cpp/tuple/include/tuple_sketch_impl.hpp +6 -6
- data/vendor/datasketches-cpp/tuple/test/CMakeLists.txt +1 -6
- data/vendor/datasketches-cpp/tuple/test/tuple_a_not_b_test.cpp +1 -4
- data/vendor/datasketches-cpp/tuple/test/tuple_intersection_test.cpp +1 -4
- data/vendor/datasketches-cpp/tuple/test/tuple_jaccard_similarity_test.cpp +2 -1
- data/vendor/datasketches-cpp/tuple/test/tuple_sketch_allocation_test.cpp +2 -2
- data/vendor/datasketches-cpp/tuple/test/tuple_union_test.cpp +1 -4
- metadata +43 -34
- data/vendor/datasketches-cpp/tuple/include/theta_a_not_b_experimental.hpp +0 -53
- data/vendor/datasketches-cpp/tuple/include/theta_intersection_experimental.hpp +0 -78
- data/vendor/datasketches-cpp/tuple/include/theta_intersection_experimental_impl.hpp +0 -43
- data/vendor/datasketches-cpp/tuple/include/theta_sketch_experimental.hpp +0 -393
- data/vendor/datasketches-cpp/tuple/include/theta_sketch_experimental_impl.hpp +0 -481
- data/vendor/datasketches-cpp/tuple/include/theta_union_experimental.hpp +0 -88
- data/vendor/datasketches-cpp/tuple/include/theta_union_experimental_impl.hpp +0 -47
- data/vendor/datasketches-cpp/tuple/test/theta_a_not_b_experimental_test.cpp +0 -250
- data/vendor/datasketches-cpp/tuple/test/theta_compact_empty_from_java.sk +0 -0
- data/vendor/datasketches-cpp/tuple/test/theta_compact_estimation_from_java.sk +0 -0
- data/vendor/datasketches-cpp/tuple/test/theta_compact_single_item_from_java.sk +0 -0
- data/vendor/datasketches-cpp/tuple/test/theta_intersection_experimental_test.cpp +0 -224
- data/vendor/datasketches-cpp/tuple/test/theta_sketch_experimental_test.cpp +0 -247
|
@@ -24,20 +24,20 @@
|
|
|
24
24
|
|
|
25
25
|
namespace datasketches {
|
|
26
26
|
|
|
27
|
-
template<typename A
|
|
27
|
+
template<typename A>
|
|
28
28
|
class CouponHashSet : public CouponList<A> {
|
|
29
29
|
public:
|
|
30
|
-
static CouponHashSet* newSet(const void* bytes, size_t len);
|
|
31
|
-
static CouponHashSet* newSet(std::istream& is);
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
explicit CouponHashSet(const CouponHashSet& that);
|
|
30
|
+
static CouponHashSet* newSet(const void* bytes, size_t len, const A& allocator);
|
|
31
|
+
static CouponHashSet* newSet(std::istream& is, const A& allocator);
|
|
32
|
+
CouponHashSet(int lgConfigK, target_hll_type tgtHllType, const A& allocator);
|
|
33
|
+
CouponHashSet(const CouponHashSet& that, target_hll_type tgtHllType);
|
|
35
34
|
|
|
36
|
-
virtual ~CouponHashSet();
|
|
35
|
+
virtual ~CouponHashSet() = default;
|
|
37
36
|
virtual std::function<void(HllSketchImpl<A>*)> get_deleter() const;
|
|
38
37
|
|
|
39
38
|
protected:
|
|
40
|
-
|
|
39
|
+
using vector_int = std::vector<int, typename std::allocator_traits<A>::template rebind_alloc<int>>;
|
|
40
|
+
|
|
41
41
|
virtual CouponHashSet* copy() const;
|
|
42
42
|
virtual CouponHashSet* copyAs(target_hll_type tgtHllType) const;
|
|
43
43
|
|
|
@@ -49,9 +49,9 @@ class CouponHashSet : public CouponList<A> {
|
|
|
49
49
|
friend class HllSketchImplFactory<A>;
|
|
50
50
|
|
|
51
51
|
private:
|
|
52
|
-
|
|
52
|
+
using ChsAlloc = typename std::allocator_traits<A>::template rebind_alloc<CouponHashSet<A>>;
|
|
53
53
|
bool checkGrowOrPromote();
|
|
54
|
-
void growHashSet(int
|
|
54
|
+
void growHashSet(int tgtLgCoupArrSize);
|
|
55
55
|
};
|
|
56
56
|
|
|
57
57
|
}
|
|
@@ -23,6 +23,7 @@
|
|
|
23
23
|
#include "CouponList.hpp"
|
|
24
24
|
#include "CubicInterpolation.hpp"
|
|
25
25
|
#include "HllUtil.hpp"
|
|
26
|
+
#include "count_zeros.hpp"
|
|
26
27
|
|
|
27
28
|
#include <algorithm>
|
|
28
29
|
#include <cmath>
|
|
@@ -30,74 +31,45 @@
|
|
|
30
31
|
namespace datasketches {
|
|
31
32
|
|
|
32
33
|
template<typename A>
|
|
33
|
-
CouponList<A>::CouponList(const int lgConfigK, const target_hll_type tgtHllType, const hll_mode mode)
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
}
|
|
40
|
-
oooFlag = false;
|
|
41
|
-
const int arrayLen = 1 << lgCouponArrInts;
|
|
42
|
-
typedef typename std::allocator_traits<A>::template rebind_alloc<int> intAlloc;
|
|
43
|
-
couponIntArr = intAlloc().allocate(arrayLen);
|
|
44
|
-
std::fill(couponIntArr, couponIntArr + arrayLen, 0);
|
|
45
|
-
couponCount = 0;
|
|
46
|
-
}
|
|
47
|
-
|
|
48
|
-
template<typename A>
|
|
49
|
-
CouponList<A>::CouponList(const CouponList& that)
|
|
50
|
-
: HllSketchImpl<A>(that.lgConfigK, that.tgtHllType, that.mode, false),
|
|
51
|
-
lgCouponArrInts(that.lgCouponArrInts),
|
|
52
|
-
couponCount(that.couponCount),
|
|
53
|
-
oooFlag(that.oooFlag) {
|
|
54
|
-
|
|
55
|
-
const int numItems = 1 << lgCouponArrInts;
|
|
56
|
-
typedef typename std::allocator_traits<A>::template rebind_alloc<int> intAlloc;
|
|
57
|
-
couponIntArr = intAlloc().allocate(numItems);
|
|
58
|
-
std::copy(that.couponIntArr, that.couponIntArr + numItems, couponIntArr);
|
|
59
|
-
}
|
|
60
|
-
|
|
61
|
-
template<typename A>
|
|
62
|
-
CouponList<A>::CouponList(const CouponList& that, const target_hll_type tgtHllType)
|
|
63
|
-
: HllSketchImpl<A>(that.lgConfigK, tgtHllType, that.mode, false),
|
|
64
|
-
lgCouponArrInts(that.lgCouponArrInts),
|
|
65
|
-
couponCount(that.couponCount),
|
|
66
|
-
oooFlag(that.oooFlag) {
|
|
67
|
-
|
|
68
|
-
const int numItems = 1 << lgCouponArrInts;
|
|
69
|
-
typedef typename std::allocator_traits<A>::template rebind_alloc<int> intAlloc;
|
|
70
|
-
couponIntArr = intAlloc().allocate(numItems);
|
|
71
|
-
std::copy(that.couponIntArr, that.couponIntArr + numItems, couponIntArr);
|
|
72
|
-
}
|
|
34
|
+
CouponList<A>::CouponList(const int lgConfigK, const target_hll_type tgtHllType, const hll_mode mode, const A& allocator):
|
|
35
|
+
HllSketchImpl<A>(lgConfigK, tgtHllType, mode, false),
|
|
36
|
+
couponCount(0),
|
|
37
|
+
oooFlag(false),
|
|
38
|
+
coupons(1 << (mode == hll_mode::LIST ? HllUtil<A>::LG_INIT_LIST_SIZE : HllUtil<A>::LG_INIT_SET_SIZE), 0, allocator)
|
|
39
|
+
{}
|
|
73
40
|
|
|
74
41
|
template<typename A>
|
|
75
|
-
CouponList<A
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
42
|
+
CouponList<A>::CouponList(const CouponList& that, const target_hll_type tgtHllType):
|
|
43
|
+
HllSketchImpl<A>(that.lgConfigK, tgtHllType, that.mode, false),
|
|
44
|
+
couponCount(that.couponCount),
|
|
45
|
+
oooFlag(that.oooFlag),
|
|
46
|
+
coupons(that.coupons)
|
|
47
|
+
{}
|
|
79
48
|
|
|
80
49
|
template<typename A>
|
|
81
50
|
std::function<void(HllSketchImpl<A>*)> CouponList<A>::get_deleter() const {
|
|
82
51
|
return [](HllSketchImpl<A>* ptr) {
|
|
83
52
|
CouponList<A>* cl = static_cast<CouponList<A>*>(ptr);
|
|
53
|
+
ClAlloc cla(cl->getAllocator());
|
|
84
54
|
cl->~CouponList();
|
|
85
|
-
|
|
55
|
+
cla.deallocate(cl, 1);
|
|
86
56
|
};
|
|
87
57
|
}
|
|
88
58
|
|
|
89
59
|
template<typename A>
|
|
90
60
|
CouponList<A>* CouponList<A>::copy() const {
|
|
91
|
-
|
|
61
|
+
ClAlloc cla(coupons.get_allocator());
|
|
62
|
+
return new (cla.allocate(1)) CouponList<A>(*this);
|
|
92
63
|
}
|
|
93
64
|
|
|
94
65
|
template<typename A>
|
|
95
66
|
CouponList<A>* CouponList<A>::copyAs(target_hll_type tgtHllType) const {
|
|
96
|
-
|
|
67
|
+
ClAlloc cla(coupons.get_allocator());
|
|
68
|
+
return new (cla.allocate(1)) CouponList<A>(*this, tgtHllType);
|
|
97
69
|
}
|
|
98
70
|
|
|
99
71
|
template<typename A>
|
|
100
|
-
CouponList<A>* CouponList<A>::newList(const void* bytes, size_t len) {
|
|
72
|
+
CouponList<A>* CouponList<A>::newList(const void* bytes, size_t len, const A& allocator) {
|
|
101
73
|
if (len < HllUtil<A>::LIST_INT_ARR_START) {
|
|
102
74
|
throw std::out_of_range("Input data length insufficient to hold CouponHashSet");
|
|
103
75
|
}
|
|
@@ -115,7 +87,7 @@ CouponList<A>* CouponList<A>::newList(const void* bytes, size_t len) {
|
|
|
115
87
|
|
|
116
88
|
hll_mode mode = HllSketchImpl<A>::extractCurMode(data[HllUtil<A>::MODE_BYTE]);
|
|
117
89
|
if (mode != LIST) {
|
|
118
|
-
throw std::invalid_argument("Calling
|
|
90
|
+
throw std::invalid_argument("Calling list constructor with non-list mode data");
|
|
119
91
|
}
|
|
120
92
|
|
|
121
93
|
target_hll_type tgtHllType = HllSketchImpl<A>::extractTgtHllType(data[HllUtil<A>::MODE_BYTE]);
|
|
@@ -133,20 +105,21 @@ CouponList<A>* CouponList<A>::newList(const void* bytes, size_t len) {
|
|
|
133
105
|
+ ", found: " + std::to_string(len));
|
|
134
106
|
}
|
|
135
107
|
|
|
136
|
-
|
|
108
|
+
ClAlloc cla(allocator);
|
|
109
|
+
CouponList<A>* sketch = new (cla.allocate(1)) CouponList<A>(lgK, tgtHllType, mode, allocator);
|
|
137
110
|
sketch->couponCount = couponCount;
|
|
138
111
|
sketch->putOutOfOrderFlag(oooFlag); // should always be false for LIST
|
|
139
112
|
|
|
140
113
|
if (!emptyFlag) {
|
|
141
114
|
// only need to read valid coupons, unlike in stream case
|
|
142
|
-
std::memcpy(sketch->
|
|
115
|
+
std::memcpy(sketch->coupons.data(), data + HllUtil<A>::LIST_INT_ARR_START, couponCount * sizeof(int));
|
|
143
116
|
}
|
|
144
117
|
|
|
145
118
|
return sketch;
|
|
146
119
|
}
|
|
147
120
|
|
|
148
121
|
template<typename A>
|
|
149
|
-
CouponList<A>* CouponList<A>::newList(std::istream& is) {
|
|
122
|
+
CouponList<A>* CouponList<A>::newList(std::istream& is, const A& allocator) {
|
|
150
123
|
uint8_t listHeader[8];
|
|
151
124
|
is.read((char*)listHeader, 8 * sizeof(uint8_t));
|
|
152
125
|
|
|
@@ -162,7 +135,7 @@ CouponList<A>* CouponList<A>::newList(std::istream& is) {
|
|
|
162
135
|
|
|
163
136
|
hll_mode mode = HllSketchImpl<A>::extractCurMode(listHeader[HllUtil<A>::MODE_BYTE]);
|
|
164
137
|
if (mode != LIST) {
|
|
165
|
-
throw std::invalid_argument("Calling list
|
|
138
|
+
throw std::invalid_argument("Calling list constructor with non-list mode data");
|
|
166
139
|
}
|
|
167
140
|
|
|
168
141
|
const target_hll_type tgtHllType = HllSketchImpl<A>::extractTgtHllType(listHeader[HllUtil<A>::MODE_BYTE]);
|
|
@@ -172,8 +145,9 @@ CouponList<A>* CouponList<A>::newList(std::istream& is) {
|
|
|
172
145
|
const bool oooFlag = ((listHeader[HllUtil<A>::FLAGS_BYTE] & HllUtil<A>::OUT_OF_ORDER_FLAG_MASK) ? true : false);
|
|
173
146
|
const bool emptyFlag = ((listHeader[HllUtil<A>::FLAGS_BYTE] & HllUtil<A>::EMPTY_FLAG_MASK) ? true : false);
|
|
174
147
|
|
|
175
|
-
|
|
176
|
-
|
|
148
|
+
ClAlloc cla(allocator);
|
|
149
|
+
CouponList<A>* sketch = new (cla.allocate(1)) CouponList<A>(lgK, tgtHllType, mode, allocator);
|
|
150
|
+
using coupon_list_ptr = std::unique_ptr<CouponList<A>, std::function<void(HllSketchImpl<A>*)>>;
|
|
177
151
|
coupon_list_ptr ptr(sketch, sketch->get_deleter());
|
|
178
152
|
const int couponCount = listHeader[HllUtil<A>::LIST_COUNT_BYTE];
|
|
179
153
|
sketch->couponCount = couponCount;
|
|
@@ -183,8 +157,8 @@ CouponList<A>* CouponList<A>::newList(std::istream& is) {
|
|
|
183
157
|
// For stream processing, need to read entire number written to stream so read
|
|
184
158
|
// pointer ends up set correctly.
|
|
185
159
|
// If not compact, still need to read empty items even though in order.
|
|
186
|
-
const int numToRead = (compact ? couponCount :
|
|
187
|
-
is.read((char*)sketch->
|
|
160
|
+
const int numToRead = (compact ? couponCount : sketch->coupons.size());
|
|
161
|
+
is.read((char*)sketch->coupons.data(), numToRead * sizeof(int));
|
|
188
162
|
}
|
|
189
163
|
|
|
190
164
|
if (!is.good())
|
|
@@ -196,14 +170,14 @@ CouponList<A>* CouponList<A>::newList(std::istream& is) {
|
|
|
196
170
|
template<typename A>
|
|
197
171
|
vector_u8<A> CouponList<A>::serialize(bool compact, unsigned header_size_bytes) const {
|
|
198
172
|
const size_t sketchSizeBytes = (compact ? getCompactSerializationBytes() : getUpdatableSerializationBytes()) + header_size_bytes;
|
|
199
|
-
vector_u8<A> byteArr(sketchSizeBytes);
|
|
173
|
+
vector_u8<A> byteArr(sketchSizeBytes, 0, getAllocator());
|
|
200
174
|
uint8_t* bytes = byteArr.data() + header_size_bytes;
|
|
201
175
|
|
|
202
176
|
bytes[HllUtil<A>::PREAMBLE_INTS_BYTE] = static_cast<uint8_t>(getPreInts());
|
|
203
177
|
bytes[HllUtil<A>::SER_VER_BYTE] = static_cast<uint8_t>(HllUtil<A>::SER_VER);
|
|
204
178
|
bytes[HllUtil<A>::FAMILY_BYTE] = static_cast<uint8_t>(HllUtil<A>::FAMILY_ID);
|
|
205
179
|
bytes[HllUtil<A>::LG_K_BYTE] = static_cast<uint8_t>(this->lgConfigK);
|
|
206
|
-
bytes[HllUtil<A>::LG_ARR_BYTE] =
|
|
180
|
+
bytes[HllUtil<A>::LG_ARR_BYTE] = count_trailing_zeros_in_u32(coupons.size());
|
|
207
181
|
bytes[HllUtil<A>::FLAGS_BYTE] = this->makeFlagsByte(compact);
|
|
208
182
|
bytes[HllUtil<A>::LIST_COUNT_BYTE] = static_cast<uint8_t>(this->mode == LIST ? couponCount : 0);
|
|
209
183
|
bytes[HllUtil<A>::MODE_BYTE] = this->makeModeByte();
|
|
@@ -217,7 +191,7 @@ vector_u8<A> CouponList<A>::serialize(bool compact, unsigned header_size_bytes)
|
|
|
217
191
|
const int sw = (isCompact() ? 2 : 0) | (compact ? 1 : 0);
|
|
218
192
|
switch (sw) {
|
|
219
193
|
case 0: { // src updatable, dst updatable
|
|
220
|
-
std::memcpy(bytes + getMemDataStart(),
|
|
194
|
+
std::memcpy(bytes + getMemDataStart(), coupons.data(), coupons.size() * sizeof(int));
|
|
221
195
|
break;
|
|
222
196
|
}
|
|
223
197
|
case 1: { // src updatable, dst compact
|
|
@@ -247,7 +221,7 @@ void CouponList<A>::serialize(std::ostream& os, const bool compact) const {
|
|
|
247
221
|
os.write((char*)&familyId, sizeof(familyId));
|
|
248
222
|
const uint8_t lgKByte((uint8_t) this->lgConfigK);
|
|
249
223
|
os.write((char*)&lgKByte, sizeof(lgKByte));
|
|
250
|
-
const uint8_t lgArrIntsByte((
|
|
224
|
+
const uint8_t lgArrIntsByte(count_trailing_zeros_in_u32(coupons.size()));
|
|
251
225
|
os.write((char*)&lgArrIntsByte, sizeof(lgArrIntsByte));
|
|
252
226
|
const uint8_t flagsByte(this->makeFlagsByte(compact));
|
|
253
227
|
os.write((char*)&flagsByte, sizeof(flagsByte));
|
|
@@ -273,7 +247,7 @@ void CouponList<A>::serialize(std::ostream& os, const bool compact) const {
|
|
|
273
247
|
const int sw = (isCompact() ? 2 : 0) | (compact ? 1 : 0);
|
|
274
248
|
switch (sw) {
|
|
275
249
|
case 0: { // src updatable, dst updatable
|
|
276
|
-
os.write((char*)
|
|
250
|
+
os.write((char*)coupons.data(), coupons.size() * sizeof(int));
|
|
277
251
|
break;
|
|
278
252
|
}
|
|
279
253
|
case 1: { // src updatable, dst compact
|
|
@@ -292,13 +266,12 @@ void CouponList<A>::serialize(std::ostream& os, const bool compact) const {
|
|
|
292
266
|
|
|
293
267
|
template<typename A>
|
|
294
268
|
HllSketchImpl<A>* CouponList<A>::couponUpdate(int coupon) {
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
const int couponAtIdx = couponIntArr[i];
|
|
269
|
+
for (size_t i = 0; i < coupons.size(); ++i) { // search for empty slot
|
|
270
|
+
const int couponAtIdx = coupons[i];
|
|
298
271
|
if (couponAtIdx == HllUtil<A>::EMPTY) {
|
|
299
|
-
|
|
272
|
+
coupons[i] = coupon; // the actual update
|
|
300
273
|
++couponCount;
|
|
301
|
-
if (couponCount
|
|
274
|
+
if (couponCount == static_cast<int>(coupons.size())) { // array full
|
|
302
275
|
if (this->lgConfigK < 8) {
|
|
303
276
|
return promoteHeapListOrSetToHll(*this);
|
|
304
277
|
}
|
|
@@ -348,7 +321,7 @@ bool CouponList<A>::isEmpty() const { return getCouponCount() == 0; }
|
|
|
348
321
|
|
|
349
322
|
template<typename A>
|
|
350
323
|
int CouponList<A>::getUpdatableSerializationBytes() const {
|
|
351
|
-
return getMemDataStart() + (
|
|
324
|
+
return getMemDataStart() + coupons.size() * sizeof(int);
|
|
352
325
|
}
|
|
353
326
|
|
|
354
327
|
template<typename A>
|
|
@@ -383,13 +356,8 @@ void CouponList<A>::putOutOfOrderFlag(bool oooFlag) {
|
|
|
383
356
|
}
|
|
384
357
|
|
|
385
358
|
template<typename A>
|
|
386
|
-
|
|
387
|
-
return
|
|
388
|
-
}
|
|
389
|
-
|
|
390
|
-
template<typename A>
|
|
391
|
-
int* CouponList<A>::getCouponIntArr() const {
|
|
392
|
-
return couponIntArr;
|
|
359
|
+
A CouponList<A>::getAllocator() const {
|
|
360
|
+
return coupons.get_allocator();
|
|
393
361
|
}
|
|
394
362
|
|
|
395
363
|
template<typename A>
|
|
@@ -404,12 +372,12 @@ HllSketchImpl<A>* CouponList<A>::promoteHeapListOrSetToHll(CouponList& src) {
|
|
|
404
372
|
|
|
405
373
|
template<typename A>
|
|
406
374
|
coupon_iterator<A> CouponList<A>::begin(bool all) const {
|
|
407
|
-
return coupon_iterator<A>(
|
|
375
|
+
return coupon_iterator<A>(coupons.data(), coupons.size(), 0, all);
|
|
408
376
|
}
|
|
409
377
|
|
|
410
378
|
template<typename A>
|
|
411
379
|
coupon_iterator<A> CouponList<A>::end() const {
|
|
412
|
-
return coupon_iterator<A>(
|
|
380
|
+
return coupon_iterator<A>(coupons.data(), coupons.size(), coupons.size(), false);
|
|
413
381
|
}
|
|
414
382
|
|
|
415
383
|
}
|
|
@@ -30,19 +30,18 @@ namespace datasketches {
|
|
|
30
30
|
template<typename A>
|
|
31
31
|
class HllSketchImplFactory;
|
|
32
32
|
|
|
33
|
-
template<typename A
|
|
33
|
+
template<typename A>
|
|
34
34
|
class CouponList : public HllSketchImpl<A> {
|
|
35
35
|
public:
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
explicit CouponList(const CouponList& that, target_hll_type tgtHllType);
|
|
36
|
+
CouponList(int lgConfigK, target_hll_type tgtHllType, hll_mode mode, const A& allocator);
|
|
37
|
+
CouponList(const CouponList& that, target_hll_type tgtHllType);
|
|
39
38
|
|
|
40
|
-
static CouponList* newList(const void* bytes, size_t len);
|
|
41
|
-
static CouponList* newList(std::istream& is);
|
|
39
|
+
static CouponList* newList(const void* bytes, size_t len, const A& allocator);
|
|
40
|
+
static CouponList* newList(std::istream& is, const A& allocator);
|
|
42
41
|
virtual vector_u8<A> serialize(bool compact, unsigned header_size_bytes) const;
|
|
43
42
|
virtual void serialize(std::ostream& os, bool compact) const;
|
|
44
43
|
|
|
45
|
-
virtual ~CouponList();
|
|
44
|
+
virtual ~CouponList() = default;
|
|
46
45
|
virtual std::function<void(HllSketchImpl<A>*)> get_deleter() const;
|
|
47
46
|
|
|
48
47
|
virtual CouponList* copy() const;
|
|
@@ -62,7 +61,9 @@ class CouponList : public HllSketchImpl<A> {
|
|
|
62
61
|
coupon_iterator<A> end() const;
|
|
63
62
|
|
|
64
63
|
protected:
|
|
65
|
-
|
|
64
|
+
using ClAlloc = typename std::allocator_traits<A>::template rebind_alloc<CouponList<A>>;
|
|
65
|
+
|
|
66
|
+
using vector_int = std::vector<int, typename std::allocator_traits<A>::template rebind_alloc<int>>;
|
|
66
67
|
|
|
67
68
|
HllSketchImpl<A>* promoteHeapListToSet(CouponList& list);
|
|
68
69
|
HllSketchImpl<A>* promoteHeapListOrSetToHll(CouponList& src);
|
|
@@ -75,13 +76,11 @@ class CouponList : public HllSketchImpl<A> {
|
|
|
75
76
|
virtual bool isOutOfOrderFlag() const;
|
|
76
77
|
virtual void putOutOfOrderFlag(bool oooFlag);
|
|
77
78
|
|
|
78
|
-
virtual
|
|
79
|
-
virtual int* getCouponIntArr() const;
|
|
79
|
+
virtual A getAllocator() const;
|
|
80
80
|
|
|
81
|
-
int lgCouponArrInts;
|
|
82
81
|
int couponCount;
|
|
83
82
|
bool oooFlag;
|
|
84
|
-
|
|
83
|
+
vector_int coupons;
|
|
85
84
|
|
|
86
85
|
friend class HllSketchImplFactory<A>;
|
|
87
86
|
};
|
|
@@ -24,7 +24,7 @@
|
|
|
24
24
|
|
|
25
25
|
namespace datasketches {
|
|
26
26
|
|
|
27
|
-
template<typename A = std::allocator<
|
|
27
|
+
template<typename A = std::allocator<uint8_t>>
|
|
28
28
|
class CubicInterpolation {
|
|
29
29
|
public:
|
|
30
30
|
static double usingXAndYTables(const double xArr[], const double yArr[],
|
|
@@ -40,4 +40,4 @@ class CubicInterpolation {
|
|
|
40
40
|
|
|
41
41
|
#include "CubicInterpolation-internal.hpp"
|
|
42
42
|
|
|
43
|
-
#endif /* _CUBICINTERPOLATION_HPP_ */
|
|
43
|
+
#endif /* _CUBICINTERPOLATION_HPP_ */
|
|
@@ -25,7 +25,7 @@
|
|
|
25
25
|
|
|
26
26
|
namespace datasketches {
|
|
27
27
|
|
|
28
|
-
template<typename A = std::allocator<
|
|
28
|
+
template<typename A = std::allocator<uint8_t>>
|
|
29
29
|
class HarmonicNumbers {
|
|
30
30
|
public:
|
|
31
31
|
/**
|
|
@@ -45,4 +45,4 @@ class HarmonicNumbers {
|
|
|
45
45
|
|
|
46
46
|
#include "HarmonicNumbers-internal.hpp"
|
|
47
47
|
|
|
48
|
-
#endif /* _HARMONICNUMBERS_HPP_ */
|
|
48
|
+
#endif /* _HARMONICNUMBERS_HPP_ */
|
|
@@ -30,13 +30,12 @@
|
|
|
30
30
|
namespace datasketches {
|
|
31
31
|
|
|
32
32
|
template<typename A>
|
|
33
|
-
Hll4Array<A>::Hll4Array(const int lgConfigK, const bool startFullSize)
|
|
34
|
-
|
|
33
|
+
Hll4Array<A>::Hll4Array(const int lgConfigK, const bool startFullSize, const A& allocator):
|
|
34
|
+
HllArray<A>(lgConfigK, target_hll_type::HLL_4, startFullSize, allocator),
|
|
35
|
+
auxHashMap(nullptr)
|
|
36
|
+
{
|
|
35
37
|
const int numBytes = this->hll4ArrBytes(lgConfigK);
|
|
36
|
-
|
|
37
|
-
this->hllByteArr = uint8Alloc().allocate(numBytes);
|
|
38
|
-
std::fill(this->hllByteArr, this->hllByteArr + numBytes, 0);
|
|
39
|
-
auxHashMap = nullptr;
|
|
38
|
+
this->hllByteArr.resize(numBytes, 0);
|
|
40
39
|
}
|
|
41
40
|
|
|
42
41
|
template<typename A>
|
|
@@ -63,17 +62,19 @@ Hll4Array<A>::~Hll4Array() {
|
|
|
63
62
|
template<typename A>
|
|
64
63
|
std::function<void(HllSketchImpl<A>*)> Hll4Array<A>::get_deleter() const {
|
|
65
64
|
return [](HllSketchImpl<A>* ptr) {
|
|
66
|
-
typedef typename std::allocator_traits<A>::template rebind_alloc<Hll4Array<A>> hll4Alloc;
|
|
67
65
|
Hll4Array<A>* hll = static_cast<Hll4Array<A>*>(ptr);
|
|
66
|
+
using Hll4Alloc = typename std::allocator_traits<A>::template rebind_alloc<Hll4Array<A>>;
|
|
67
|
+
Hll4Alloc hll4Alloc(hll->getAllocator());
|
|
68
68
|
hll->~Hll4Array();
|
|
69
|
-
hll4Alloc
|
|
69
|
+
hll4Alloc.deallocate(hll, 1);
|
|
70
70
|
};
|
|
71
71
|
}
|
|
72
72
|
|
|
73
73
|
template<typename A>
|
|
74
74
|
Hll4Array<A>* Hll4Array<A>::copy() const {
|
|
75
|
-
|
|
76
|
-
|
|
75
|
+
using Hll4Alloc = typename std::allocator_traits<A>::template rebind_alloc<Hll4Array<A>>;
|
|
76
|
+
Hll4Alloc hll4Alloc(this->getAllocator());
|
|
77
|
+
return new (hll4Alloc.allocate(1)) Hll4Array<A>(*this);
|
|
77
78
|
}
|
|
78
79
|
|
|
79
80
|
template<typename A>
|
|
@@ -195,7 +196,7 @@ void Hll4Array<A>::internalHll4Update(const int slotNo, const int newVal) {
|
|
|
195
196
|
// added to the exception table
|
|
196
197
|
putSlot(slotNo, HllUtil<A>::AUX_TOKEN);
|
|
197
198
|
if (auxHashMap == nullptr) {
|
|
198
|
-
auxHashMap = AuxHashMap<A>::newAuxHashMap(HllUtil<A>::LG_AUX_ARR_INTS[this->lgConfigK], this->lgConfigK);
|
|
199
|
+
auxHashMap = AuxHashMap<A>::newAuxHashMap(HllUtil<A>::LG_AUX_ARR_INTS[this->lgConfigK], this->lgConfigK, this->getAllocator());
|
|
199
200
|
}
|
|
200
201
|
auxHashMap->mustAdd(slotNo, newVal);
|
|
201
202
|
}
|
|
@@ -285,7 +286,7 @@ void Hll4Array<A>::shiftToBiggerCurMin() {
|
|
|
285
286
|
} else { //newShiftedVal >= AUX_TOKEN
|
|
286
287
|
// the former exception remains an exception, so must be added to the newAuxMap
|
|
287
288
|
if (newAuxMap == nullptr) {
|
|
288
|
-
newAuxMap = AuxHashMap<A>::newAuxHashMap(HllUtil<A>::LG_AUX_ARR_INTS[this->lgConfigK], this->lgConfigK);
|
|
289
|
+
newAuxMap = AuxHashMap<A>::newAuxHashMap(HllUtil<A>::LG_AUX_ARR_INTS[this->lgConfigK], this->lgConfigK, this->getAllocator());
|
|
289
290
|
}
|
|
290
291
|
newAuxMap->mustAdd(slotNum, oldActualVal);
|
|
291
292
|
}
|
|
@@ -315,12 +316,12 @@ void Hll4Array<A>::shiftToBiggerCurMin() {
|
|
|
315
316
|
|
|
316
317
|
template<typename A>
|
|
317
318
|
typename HllArray<A>::const_iterator Hll4Array<A>::begin(bool all) const {
|
|
318
|
-
return typename HllArray<A>::const_iterator(this->hllByteArr, 1 << this->lgConfigK, 0, this->tgtHllType, auxHashMap, this->curMin, all);
|
|
319
|
+
return typename HllArray<A>::const_iterator(this->hllByteArr.data(), 1 << this->lgConfigK, 0, this->tgtHllType, auxHashMap, this->curMin, all);
|
|
319
320
|
}
|
|
320
321
|
|
|
321
322
|
template<typename A>
|
|
322
323
|
typename HllArray<A>::const_iterator Hll4Array<A>::end() const {
|
|
323
|
-
return typename HllArray<A>::const_iterator(this->hllByteArr, 1 << this->lgConfigK, 1 << this->lgConfigK, this->tgtHllType, auxHashMap, this->curMin, false);
|
|
324
|
+
return typename HllArray<A>::const_iterator(this->hllByteArr.data(), 1 << this->lgConfigK, 1 << this->lgConfigK, this->tgtHllType, auxHashMap, this->curMin, false);
|
|
324
325
|
}
|
|
325
326
|
|
|
326
327
|
template<typename A>
|