datasketches 0.1.2 → 0.2.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +17 -0
- data/LICENSE +40 -3
- data/NOTICE +1 -1
- data/ext/datasketches/cpc_wrapper.cpp +12 -13
- data/ext/datasketches/ext.cpp +1 -1
- data/ext/datasketches/ext.h +4 -0
- data/ext/datasketches/extconf.rb +1 -1
- data/ext/datasketches/fi_wrapper.cpp +6 -8
- data/ext/datasketches/hll_wrapper.cpp +13 -14
- data/ext/datasketches/kll_wrapper.cpp +28 -76
- data/ext/datasketches/theta_wrapper.cpp +27 -41
- data/ext/datasketches/vo_wrapper.cpp +4 -6
- data/lib/datasketches/version.rb +1 -1
- data/vendor/datasketches-cpp/CMakeLists.txt +10 -0
- data/vendor/datasketches-cpp/LICENSE +40 -3
- data/vendor/datasketches-cpp/NOTICE +1 -1
- data/vendor/datasketches-cpp/README.md +4 -4
- data/vendor/datasketches-cpp/common/include/MurmurHash3.h +18 -7
- data/vendor/datasketches-cpp/common/include/binomial_bounds.hpp +8 -8
- data/vendor/datasketches-cpp/common/include/bounds_binomial_proportions.hpp +12 -15
- data/vendor/datasketches-cpp/common/include/common_defs.hpp +26 -0
- data/vendor/datasketches-cpp/common/include/conditional_forward.hpp +20 -8
- data/vendor/datasketches-cpp/common/include/count_zeros.hpp +2 -2
- data/vendor/datasketches-cpp/common/include/memory_operations.hpp +12 -0
- data/vendor/datasketches-cpp/common/include/serde.hpp +7 -7
- data/vendor/datasketches-cpp/common/test/CMakeLists.txt +24 -0
- data/vendor/datasketches-cpp/common/test/integration_test.cpp +77 -0
- data/vendor/datasketches-cpp/common/test/test_allocator.hpp +9 -1
- data/vendor/datasketches-cpp/cpc/include/cpc_common.hpp +13 -3
- data/vendor/datasketches-cpp/cpc/include/cpc_compressor.hpp +20 -20
- data/vendor/datasketches-cpp/cpc/include/cpc_compressor_impl.hpp +116 -105
- data/vendor/datasketches-cpp/cpc/include/cpc_sketch.hpp +22 -6
- data/vendor/datasketches-cpp/cpc/include/cpc_sketch_impl.hpp +140 -101
- data/vendor/datasketches-cpp/cpc/include/cpc_union.hpp +2 -2
- data/vendor/datasketches-cpp/cpc/include/cpc_union_impl.hpp +20 -20
- data/vendor/datasketches-cpp/cpc/include/cpc_util.hpp +10 -16
- data/vendor/datasketches-cpp/cpc/include/icon_estimator.hpp +6 -6
- data/vendor/datasketches-cpp/cpc/include/u32_table.hpp +10 -10
- data/vendor/datasketches-cpp/cpc/include/u32_table_impl.hpp +21 -21
- data/vendor/datasketches-cpp/cpc/test/CMakeLists.txt +1 -0
- data/vendor/datasketches-cpp/cpc/test/compression_test.cpp +10 -10
- data/vendor/datasketches-cpp/cpc/test/cpc_sketch_allocation_test.cpp +237 -0
- data/vendor/datasketches-cpp/cpc/test/cpc_sketch_test.cpp +25 -0
- data/vendor/datasketches-cpp/cpc/test/cpc_union_test.cpp +1 -1
- data/vendor/datasketches-cpp/fi/include/frequent_items_sketch.hpp +15 -10
- data/vendor/datasketches-cpp/fi/include/frequent_items_sketch_impl.hpp +102 -105
- data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map.hpp +19 -13
- data/vendor/datasketches-cpp/fi/include/reverse_purge_hash_map_impl.hpp +141 -125
- data/vendor/datasketches-cpp/fi/test/frequent_items_sketch_custom_type_test.cpp +15 -12
- data/vendor/datasketches-cpp/fi/test/reverse_purge_hash_map_test.cpp +5 -5
- data/vendor/datasketches-cpp/hll/CMakeLists.txt +3 -0
- data/vendor/datasketches-cpp/hll/include/AuxHashMap-internal.hpp +81 -109
- data/vendor/datasketches-cpp/hll/include/AuxHashMap.hpp +25 -24
- data/vendor/datasketches-cpp/hll/include/CompositeInterpolationXTable-internal.hpp +15 -15
- data/vendor/datasketches-cpp/hll/include/CompositeInterpolationXTable.hpp +5 -5
- data/vendor/datasketches-cpp/hll/include/CouponHashSet-internal.hpp +89 -105
- data/vendor/datasketches-cpp/hll/include/CouponHashSet.hpp +13 -13
- data/vendor/datasketches-cpp/hll/include/CouponList-internal.hpp +130 -165
- data/vendor/datasketches-cpp/hll/include/CouponList.hpp +21 -22
- data/vendor/datasketches-cpp/hll/include/CubicInterpolation-internal.hpp +2 -4
- data/vendor/datasketches-cpp/hll/include/CubicInterpolation.hpp +2 -2
- data/vendor/datasketches-cpp/hll/include/HarmonicNumbers-internal.hpp +1 -1
- data/vendor/datasketches-cpp/hll/include/HarmonicNumbers.hpp +2 -2
- data/vendor/datasketches-cpp/hll/include/Hll4Array-internal.hpp +88 -83
- data/vendor/datasketches-cpp/hll/include/Hll4Array.hpp +9 -9
- data/vendor/datasketches-cpp/hll/include/Hll6Array-internal.hpp +34 -45
- data/vendor/datasketches-cpp/hll/include/Hll6Array.hpp +7 -8
- data/vendor/datasketches-cpp/hll/include/Hll8Array-internal.hpp +41 -52
- data/vendor/datasketches-cpp/hll/include/Hll8Array.hpp +7 -8
- data/vendor/datasketches-cpp/hll/include/HllArray-internal.hpp +220 -251
- data/vendor/datasketches-cpp/hll/include/HllArray.hpp +42 -42
- data/vendor/datasketches-cpp/hll/include/HllSketch-internal.hpp +36 -38
- data/vendor/datasketches-cpp/hll/include/HllSketchImpl-internal.hpp +22 -22
- data/vendor/datasketches-cpp/hll/include/HllSketchImpl.hpp +15 -14
- data/vendor/datasketches-cpp/hll/include/HllSketchImplFactory.hpp +47 -44
- data/vendor/datasketches-cpp/hll/include/HllUnion-internal.hpp +62 -87
- data/vendor/datasketches-cpp/hll/include/HllUtil.hpp +121 -128
- data/vendor/datasketches-cpp/hll/include/RelativeErrorTables.hpp +1 -1
- data/vendor/datasketches-cpp/hll/include/coupon_iterator-internal.hpp +9 -9
- data/vendor/datasketches-cpp/hll/include/coupon_iterator.hpp +5 -5
- data/vendor/datasketches-cpp/hll/include/hll.hpp +25 -53
- data/vendor/datasketches-cpp/hll/test/AuxHashMapTest.cpp +8 -8
- data/vendor/datasketches-cpp/hll/test/CouponHashSetTest.cpp +36 -36
- data/vendor/datasketches-cpp/hll/test/CouponListTest.cpp +28 -28
- data/vendor/datasketches-cpp/hll/test/CrossCountingTest.cpp +2 -2
- data/vendor/datasketches-cpp/hll/test/HllArrayTest.cpp +37 -37
- data/vendor/datasketches-cpp/hll/test/HllSketchTest.cpp +57 -61
- data/vendor/datasketches-cpp/hll/test/HllUnionTest.cpp +10 -14
- data/vendor/datasketches-cpp/hll/test/IsomorphicTest.cpp +3 -3
- data/vendor/datasketches-cpp/hll/test/ToFromByteArrayTest.cpp +4 -4
- data/vendor/datasketches-cpp/kll/include/kll_helper.hpp +5 -4
- data/vendor/datasketches-cpp/kll/include/kll_helper_impl.hpp +6 -6
- data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator.hpp +14 -6
- data/vendor/datasketches-cpp/kll/include/kll_quantile_calculator_impl.hpp +40 -25
- data/vendor/datasketches-cpp/kll/include/kll_sketch.hpp +50 -6
- data/vendor/datasketches-cpp/kll/include/kll_sketch_impl.hpp +164 -136
- data/vendor/datasketches-cpp/kll/include/kolmogorov_smirnov.hpp +67 -0
- data/vendor/datasketches-cpp/kll/include/kolmogorov_smirnov_impl.hpp +78 -0
- data/vendor/datasketches-cpp/kll/test/CMakeLists.txt +1 -0
- data/vendor/datasketches-cpp/kll/test/kll_sketch_custom_type_test.cpp +11 -10
- data/vendor/datasketches-cpp/kll/test/kll_sketch_test.cpp +178 -88
- data/vendor/datasketches-cpp/kll/test/kolmogorov_smirnov_test.cpp +111 -0
- data/vendor/datasketches-cpp/pyproject.toml +4 -2
- data/vendor/datasketches-cpp/python/CMakeLists.txt +12 -6
- data/vendor/datasketches-cpp/python/README.md +52 -49
- data/vendor/datasketches-cpp/python/pybind11Path.cmd +3 -0
- data/vendor/datasketches-cpp/python/src/cpc_wrapper.cpp +1 -1
- data/vendor/datasketches-cpp/python/src/datasketches.cpp +2 -0
- data/vendor/datasketches-cpp/python/src/hll_wrapper.cpp +4 -6
- data/vendor/datasketches-cpp/python/src/kll_wrapper.cpp +4 -2
- data/vendor/datasketches-cpp/python/src/req_wrapper.cpp +246 -0
- data/vendor/datasketches-cpp/python/src/theta_wrapper.cpp +38 -28
- data/vendor/datasketches-cpp/python/src/vector_of_kll.cpp +11 -5
- data/vendor/datasketches-cpp/python/src/vo_wrapper.cpp +2 -2
- data/vendor/datasketches-cpp/python/tests/hll_test.py +1 -2
- data/vendor/datasketches-cpp/python/tests/kll_test.py +5 -5
- data/vendor/datasketches-cpp/python/tests/req_test.py +126 -0
- data/vendor/datasketches-cpp/python/tests/theta_test.py +28 -3
- data/vendor/datasketches-cpp/python/tests/vector_of_kll_test.py +4 -4
- data/vendor/datasketches-cpp/python/tests/vo_test.py +3 -3
- data/vendor/datasketches-cpp/req/CMakeLists.txt +60 -0
- data/vendor/datasketches-cpp/{tuple/include/theta_a_not_b_experimental_impl.hpp → req/include/req_common.hpp} +18 -8
- data/vendor/datasketches-cpp/req/include/req_compactor.hpp +137 -0
- data/vendor/datasketches-cpp/req/include/req_compactor_impl.hpp +488 -0
- data/vendor/datasketches-cpp/req/include/req_quantile_calculator.hpp +69 -0
- data/vendor/datasketches-cpp/req/include/req_quantile_calculator_impl.hpp +60 -0
- data/vendor/datasketches-cpp/req/include/req_sketch.hpp +395 -0
- data/vendor/datasketches-cpp/req/include/req_sketch_impl.hpp +810 -0
- data/vendor/datasketches-cpp/req/test/CMakeLists.txt +43 -0
- data/vendor/datasketches-cpp/req/test/req_float_empty_from_java.sk +0 -0
- data/vendor/datasketches-cpp/req/test/req_float_estimation_from_java.sk +0 -0
- data/vendor/datasketches-cpp/req/test/req_float_exact_from_java.sk +0 -0
- data/vendor/datasketches-cpp/req/test/req_float_raw_items_from_java.sk +0 -0
- data/vendor/datasketches-cpp/req/test/req_float_single_item_from_java.sk +0 -0
- data/vendor/datasketches-cpp/req/test/req_sketch_custom_type_test.cpp +128 -0
- data/vendor/datasketches-cpp/req/test/req_sketch_test.cpp +494 -0
- data/vendor/datasketches-cpp/sampling/include/var_opt_sketch.hpp +19 -13
- data/vendor/datasketches-cpp/sampling/include/var_opt_sketch_impl.hpp +130 -127
- data/vendor/datasketches-cpp/sampling/include/var_opt_union.hpp +5 -5
- data/vendor/datasketches-cpp/sampling/include/var_opt_union_impl.hpp +41 -49
- data/vendor/datasketches-cpp/sampling/test/CMakeLists.txt +1 -0
- data/vendor/datasketches-cpp/sampling/test/var_opt_allocation_test.cpp +96 -0
- data/vendor/datasketches-cpp/sampling/test/var_opt_sketch_test.cpp +6 -6
- data/vendor/datasketches-cpp/sampling/test/var_opt_union_test.cpp +13 -44
- data/vendor/datasketches-cpp/setup.py +11 -6
- data/vendor/datasketches-cpp/theta/CMakeLists.txt +30 -3
- data/vendor/datasketches-cpp/{tuple → theta}/include/bounds_on_ratios_in_sampled_sets.hpp +3 -2
- data/vendor/datasketches-cpp/{tuple → theta}/include/bounds_on_ratios_in_theta_sketched_sets.hpp +1 -1
- data/vendor/datasketches-cpp/theta/include/compact_theta_sketch_parser.hpp +67 -0
- data/vendor/datasketches-cpp/theta/include/compact_theta_sketch_parser_impl.hpp +70 -0
- data/vendor/datasketches-cpp/theta/include/theta_a_not_b.hpp +12 -29
- data/vendor/datasketches-cpp/theta/include/theta_a_not_b_impl.hpp +5 -46
- data/vendor/datasketches-cpp/{tuple → theta}/include/theta_comparators.hpp +0 -0
- data/vendor/datasketches-cpp/{tuple → theta}/include/theta_constants.hpp +11 -4
- data/vendor/datasketches-cpp/{tuple → theta}/include/theta_helpers.hpp +0 -0
- data/vendor/datasketches-cpp/theta/include/theta_intersection.hpp +26 -28
- data/vendor/datasketches-cpp/{tuple → theta}/include/theta_intersection_base.hpp +0 -0
- data/vendor/datasketches-cpp/{tuple → theta}/include/theta_intersection_base_impl.hpp +0 -0
- data/vendor/datasketches-cpp/theta/include/theta_intersection_impl.hpp +8 -90
- data/vendor/datasketches-cpp/{tuple/test/theta_union_experimental_test.cpp → theta/include/theta_jaccard_similarity.hpp} +11 -18
- data/vendor/datasketches-cpp/{tuple/include/jaccard_similarity.hpp → theta/include/theta_jaccard_similarity_base.hpp} +24 -36
- data/vendor/datasketches-cpp/{tuple → theta}/include/theta_set_difference_base.hpp +0 -0
- data/vendor/datasketches-cpp/{tuple → theta}/include/theta_set_difference_base_impl.hpp +5 -0
- data/vendor/datasketches-cpp/theta/include/theta_sketch.hpp +163 -256
- data/vendor/datasketches-cpp/theta/include/theta_sketch_impl.hpp +250 -651
- data/vendor/datasketches-cpp/theta/include/theta_union.hpp +27 -60
- data/vendor/datasketches-cpp/{tuple → theta}/include/theta_union_base.hpp +1 -1
- data/vendor/datasketches-cpp/{tuple → theta}/include/theta_union_base_impl.hpp +6 -1
- data/vendor/datasketches-cpp/theta/include/theta_union_impl.hpp +13 -69
- data/vendor/datasketches-cpp/{tuple → theta}/include/theta_update_sketch_base.hpp +10 -21
- data/vendor/datasketches-cpp/{tuple → theta}/include/theta_update_sketch_base_impl.hpp +44 -30
- data/vendor/datasketches-cpp/theta/test/CMakeLists.txt +1 -0
- data/vendor/datasketches-cpp/theta/test/theta_a_not_b_test.cpp +23 -1
- data/vendor/datasketches-cpp/theta/test/theta_intersection_test.cpp +21 -1
- data/vendor/datasketches-cpp/{tuple → theta}/test/theta_jaccard_similarity_test.cpp +60 -5
- data/vendor/datasketches-cpp/theta/test/theta_sketch_test.cpp +74 -235
- data/vendor/datasketches-cpp/theta/test/theta_union_test.cpp +22 -2
- data/vendor/datasketches-cpp/tuple/CMakeLists.txt +3 -35
- data/vendor/datasketches-cpp/tuple/include/array_of_doubles_sketch_impl.hpp +47 -60
- data/vendor/datasketches-cpp/tuple/include/tuple_jaccard_similarity.hpp +38 -0
- data/vendor/datasketches-cpp/tuple/include/tuple_sketch.hpp +28 -13
- data/vendor/datasketches-cpp/tuple/include/tuple_sketch_impl.hpp +57 -70
- data/vendor/datasketches-cpp/tuple/test/CMakeLists.txt +1 -6
- data/vendor/datasketches-cpp/tuple/test/array_of_doubles_sketch_test.cpp +1 -1
- data/vendor/datasketches-cpp/tuple/test/tuple_a_not_b_test.cpp +18 -21
- data/vendor/datasketches-cpp/tuple/test/tuple_intersection_test.cpp +13 -16
- data/vendor/datasketches-cpp/tuple/test/tuple_jaccard_similarity_test.cpp +7 -6
- data/vendor/datasketches-cpp/tuple/test/tuple_sketch_allocation_test.cpp +3 -3
- data/vendor/datasketches-cpp/tuple/test/tuple_sketch_test.cpp +20 -20
- data/vendor/datasketches-cpp/tuple/test/tuple_union_test.cpp +13 -16
- metadata +51 -36
- data/vendor/datasketches-cpp/tuple/include/theta_a_not_b_experimental.hpp +0 -53
- data/vendor/datasketches-cpp/tuple/include/theta_intersection_experimental.hpp +0 -78
- data/vendor/datasketches-cpp/tuple/include/theta_intersection_experimental_impl.hpp +0 -43
- data/vendor/datasketches-cpp/tuple/include/theta_sketch_experimental.hpp +0 -393
- data/vendor/datasketches-cpp/tuple/include/theta_sketch_experimental_impl.hpp +0 -481
- data/vendor/datasketches-cpp/tuple/include/theta_union_experimental.hpp +0 -88
- data/vendor/datasketches-cpp/tuple/include/theta_union_experimental_impl.hpp +0 -47
- data/vendor/datasketches-cpp/tuple/test/theta_a_not_b_experimental_test.cpp +0 -250
- data/vendor/datasketches-cpp/tuple/test/theta_compact_empty_from_java.sk +0 -0
- data/vendor/datasketches-cpp/tuple/test/theta_compact_estimation_from_java.sk +0 -0
- data/vendor/datasketches-cpp/tuple/test/theta_compact_single_item_from_java.sk +0 -0
- data/vendor/datasketches-cpp/tuple/test/theta_intersection_experimental_test.cpp +0 -224
- data/vendor/datasketches-cpp/tuple/test/theta_sketch_experimental_test.cpp +0 -247
@@ -34,140 +34,133 @@ namespace datasketches {
|
|
34
34
|
|
35
35
|
enum hll_mode { LIST = 0, SET, HLL };
|
36
36
|
|
37
|
+
namespace hll_constants {
|
38
|
+
|
39
|
+
// preamble stuff
|
40
|
+
static const uint8_t SER_VER = 1;
|
41
|
+
static const uint8_t FAMILY_ID = 7;
|
42
|
+
|
43
|
+
static const uint8_t EMPTY_FLAG_MASK = 4;
|
44
|
+
static const uint8_t COMPACT_FLAG_MASK = 8;
|
45
|
+
static const uint8_t OUT_OF_ORDER_FLAG_MASK = 16;
|
46
|
+
static const uint8_t FULL_SIZE_FLAG_MASK = 32;
|
47
|
+
|
48
|
+
static const uint32_t PREAMBLE_INTS_BYTE = 0;
|
49
|
+
static const uint32_t SER_VER_BYTE = 1;
|
50
|
+
static const uint32_t FAMILY_BYTE = 2;
|
51
|
+
static const uint32_t LG_K_BYTE = 3;
|
52
|
+
static const uint32_t LG_ARR_BYTE = 4;
|
53
|
+
static const uint32_t FLAGS_BYTE = 5;
|
54
|
+
static const uint32_t LIST_COUNT_BYTE = 6;
|
55
|
+
static const uint32_t HLL_CUR_MIN_BYTE = 6;
|
56
|
+
static const uint32_t MODE_BYTE = 7; // lo2bits = curMode, next 2 bits = tgtHllMode
|
57
|
+
|
58
|
+
// Coupon List
|
59
|
+
static const uint32_t LIST_INT_ARR_START = 8;
|
60
|
+
static const uint8_t LIST_PREINTS = 2;
|
61
|
+
// Coupon Hash Set
|
62
|
+
static const uint32_t HASH_SET_COUNT_INT = 8;
|
63
|
+
static const uint32_t HASH_SET_INT_ARR_START = 12;
|
64
|
+
static const uint8_t HASH_SET_PREINTS = 3;
|
65
|
+
// HLL
|
66
|
+
static const uint8_t HLL_PREINTS = 10;
|
67
|
+
static const uint32_t HLL_BYTE_ARR_START = 40;
|
68
|
+
static const uint32_t HIP_ACCUM_DOUBLE = 8;
|
69
|
+
static const uint32_t KXQ0_DOUBLE = 16;
|
70
|
+
static const uint32_t KXQ1_DOUBLE = 24;
|
71
|
+
static const uint32_t CUR_MIN_COUNT_INT = 32;
|
72
|
+
static const uint32_t AUX_COUNT_INT = 36;
|
73
|
+
|
74
|
+
static const uint32_t EMPTY_SKETCH_SIZE_BYTES = 8;
|
75
|
+
|
76
|
+
// other HllUtil stuff
|
77
|
+
static const uint8_t KEY_BITS_26 = 26;
|
78
|
+
static const uint8_t VAL_BITS_6 = 6;
|
79
|
+
static const uint32_t KEY_MASK_26 = (1 << KEY_BITS_26) - 1;
|
80
|
+
static const uint32_t VAL_MASK_6 = (1 << VAL_BITS_6) - 1;
|
81
|
+
static const uint32_t EMPTY = 0;
|
82
|
+
static const uint8_t MIN_LOG_K = 4;
|
83
|
+
static const uint8_t MAX_LOG_K = 21;
|
84
|
+
|
85
|
+
static const double HLL_HIP_RSE_FACTOR = 0.8325546; // sqrt(ln(2))
|
86
|
+
static const double HLL_NON_HIP_RSE_FACTOR = 1.03896; // sqrt((3 * ln(2)) - 1)
|
87
|
+
static const double COUPON_RSE_FACTOR = 0.409; // at transition point not the asymptote
|
88
|
+
static const double COUPON_RSE = COUPON_RSE_FACTOR / (1 << 13);
|
89
|
+
|
90
|
+
static const uint8_t LG_INIT_LIST_SIZE = 3;
|
91
|
+
static const uint8_t LG_INIT_SET_SIZE = 5;
|
92
|
+
static const uint32_t RESIZE_NUMER = 3;
|
93
|
+
static const uint32_t RESIZE_DENOM = 4;
|
94
|
+
|
95
|
+
static const uint8_t loNibbleMask = 0x0f;
|
96
|
+
static const uint8_t hiNibbleMask = 0xf0;
|
97
|
+
static const uint8_t AUX_TOKEN = 0xf;
|
98
|
+
|
99
|
+
/**
|
100
|
+
* Log2 table sizes for exceptions based on lgK from 0 to 26.
|
101
|
+
* However, only lgK from 4 to 21 are used.
|
102
|
+
*/
|
103
|
+
static const uint8_t LG_AUX_ARR_INTS[] = {
|
104
|
+
0, 2, 2, 2, 2, 2, 2, 3, 3, 3, // 0 - 9
|
105
|
+
4, 4, 5, 5, 6, 7, 8, 9, 10, 11, // 10-19
|
106
|
+
12, 13, 14, 15, 16, 17, 18 // 20-26
|
107
|
+
};
|
108
|
+
|
109
|
+
} // namespace hll_constants
|
110
|
+
|
111
|
+
|
37
112
|
// template provides internal consistency and allows static float values
|
38
113
|
// but we don't use the template parameter anywhere
|
39
|
-
template<typename A = std::allocator<
|
114
|
+
template<typename A = std::allocator<uint8_t> >
|
40
115
|
class HllUtil final {
|
41
116
|
public:
|
42
|
-
|
43
|
-
static const
|
44
|
-
static const
|
45
|
-
|
46
|
-
static
|
47
|
-
static const int COMPACT_FLAG_MASK = 8;
|
48
|
-
static const int OUT_OF_ORDER_FLAG_MASK = 16;
|
49
|
-
static const int FULL_SIZE_FLAG_MASK = 32;
|
50
|
-
|
51
|
-
static const int PREAMBLE_INTS_BYTE = 0;
|
52
|
-
static const int SER_VER_BYTE = 1;
|
53
|
-
static const int FAMILY_BYTE = 2;
|
54
|
-
static const int LG_K_BYTE = 3;
|
55
|
-
static const int LG_ARR_BYTE = 4;
|
56
|
-
static const int FLAGS_BYTE = 5;
|
57
|
-
static const int LIST_COUNT_BYTE = 6;
|
58
|
-
static const int HLL_CUR_MIN_BYTE = 6;
|
59
|
-
static const int MODE_BYTE = 7; // lo2bits = curMode, next 2 bits = tgtHllMode
|
60
|
-
|
61
|
-
// Coupon List
|
62
|
-
static const int LIST_INT_ARR_START = 8;
|
63
|
-
static const int LIST_PREINTS = 2;
|
64
|
-
// Coupon Hash Set
|
65
|
-
static const int HASH_SET_COUNT_INT = 8;
|
66
|
-
static const int HASH_SET_INT_ARR_START = 12;
|
67
|
-
static const int HASH_SET_PREINTS = 3;
|
68
|
-
// HLL
|
69
|
-
static const int HLL_PREINTS = 10;
|
70
|
-
static const int HLL_BYTE_ARR_START = 40;
|
71
|
-
static const int HIP_ACCUM_DOUBLE = 8;
|
72
|
-
static const int KXQ0_DOUBLE = 16;
|
73
|
-
static const int KXQ1_DOUBLE = 24;
|
74
|
-
static const int CUR_MIN_COUNT_INT = 32;
|
75
|
-
static const int AUX_COUNT_INT = 36;
|
76
|
-
|
77
|
-
static const int EMPTY_SKETCH_SIZE_BYTES = 8;
|
78
|
-
|
79
|
-
// other HllUtil stuff
|
80
|
-
static const int KEY_BITS_26 = 26;
|
81
|
-
static const int VAL_BITS_6 = 6;
|
82
|
-
static const int KEY_MASK_26 = (1 << KEY_BITS_26) - 1;
|
83
|
-
static const int VAL_MASK_6 = (1 << VAL_BITS_6) - 1;
|
84
|
-
static const int EMPTY = 0;
|
85
|
-
static const int MIN_LOG_K = 4;
|
86
|
-
static const int MAX_LOG_K = 21;
|
87
|
-
|
88
|
-
static const double HLL_HIP_RSE_FACTOR; // sqrt(log(2.0)) = 0.8325546
|
89
|
-
static const double HLL_NON_HIP_RSE_FACTOR; // sqrt((3.0 * log(2.0)) - 1.0) = 1.03896
|
90
|
-
static const double COUPON_RSE_FACTOR; // 0.409 at transition point not the asymptote
|
91
|
-
static const double COUPON_RSE; // COUPON_RSE_FACTOR / (1 << 13);
|
92
|
-
|
93
|
-
static const int LG_INIT_LIST_SIZE = 3;
|
94
|
-
static const int LG_INIT_SET_SIZE = 5;
|
95
|
-
static const int RESIZE_NUMER = 3;
|
96
|
-
static const int RESIZE_DENOM = 4;
|
97
|
-
|
98
|
-
static const int loNibbleMask = 0x0f;
|
99
|
-
static const int hiNibbleMask = 0xf0;
|
100
|
-
static const int AUX_TOKEN = 0xf;
|
101
|
-
|
102
|
-
/**
|
103
|
-
* Log2 table sizes for exceptions based on lgK from 0 to 26.
|
104
|
-
* However, only lgK from 4 to 21 are used.
|
105
|
-
*/
|
106
|
-
static const int LG_AUX_ARR_INTS[];
|
107
|
-
|
108
|
-
static int coupon(const uint64_t hash[]);
|
109
|
-
static int coupon(const HashState& hashState);
|
110
|
-
static void hash(const void* key, int keyLen, uint64_t seed, HashState& result);
|
111
|
-
static int checkLgK(int lgK);
|
117
|
+
|
118
|
+
static uint32_t coupon(const uint64_t hash[]);
|
119
|
+
static uint32_t coupon(const HashState& hashState);
|
120
|
+
static void hash(const void* key, size_t keyLen, uint64_t seed, HashState& result);
|
121
|
+
static uint8_t checkLgK(uint8_t lgK);
|
112
122
|
static void checkMemSize(uint64_t minBytes, uint64_t capBytes);
|
113
|
-
static inline void checkNumStdDev(
|
114
|
-
static
|
115
|
-
static
|
116
|
-
static
|
117
|
-
static double invPow2(
|
118
|
-
static
|
119
|
-
static
|
120
|
-
static
|
121
|
-
static double getRelErr(bool upperBound, bool unioned,
|
122
|
-
int lgConfigK, int numStdDev);
|
123
|
+
static inline void checkNumStdDev(uint8_t numStdDev);
|
124
|
+
static uint32_t pair(uint32_t slotNo, uint8_t value);
|
125
|
+
static uint32_t getLow26(uint32_t coupon);
|
126
|
+
static uint8_t getValue(uint32_t coupon);
|
127
|
+
static double invPow2(uint8_t e);
|
128
|
+
static uint8_t ceilingPowerOf2(uint32_t n);
|
129
|
+
static uint8_t simpleIntLog2(uint32_t n); // n must be power of 2
|
130
|
+
static uint8_t computeLgArrInts(hll_mode mode, uint32_t count, uint8_t lgConfigK);
|
131
|
+
static double getRelErr(bool upperBound, bool unioned, uint8_t lgConfigK, uint8_t numStdDev);
|
123
132
|
};
|
124
133
|
|
125
134
|
template<typename A>
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
template<typename A>
|
132
|
-
const double HllUtil<A>::COUPON_RSE = COUPON_RSE_FACTOR / (1 << 13);
|
133
|
-
|
134
|
-
template<typename A>
|
135
|
-
const int HllUtil<A>::LG_AUX_ARR_INTS[] = {
|
136
|
-
0, 2, 2, 2, 2, 2, 2, 3, 3, 3, // 0 - 9
|
137
|
-
4, 4, 5, 5, 6, 7, 8, 9, 10, 11, // 10-19
|
138
|
-
12, 13, 14, 15, 16, 17, 18 // 20-26
|
139
|
-
};
|
140
|
-
|
141
|
-
template<typename A>
|
142
|
-
inline int HllUtil<A>::coupon(const uint64_t hash[]) {
|
143
|
-
int addr26 = (int) (hash[0] & KEY_MASK_26);
|
144
|
-
int lz = count_leading_zeros_in_u64(hash[1]);
|
145
|
-
int value = ((lz > 62 ? 62 : lz) + 1);
|
146
|
-
return (value << KEY_BITS_26) | addr26;
|
135
|
+
inline uint32_t HllUtil<A>::coupon(const uint64_t hash[]) {
|
136
|
+
uint32_t addr26 = hash[0] & hll_constants::KEY_MASK_26;
|
137
|
+
uint8_t lz = count_leading_zeros_in_u64(hash[1]);
|
138
|
+
uint8_t value = ((lz > 62 ? 62 : lz) + 1);
|
139
|
+
return (value << hll_constants::KEY_BITS_26) | addr26;
|
147
140
|
}
|
148
141
|
|
149
142
|
template<typename A>
|
150
|
-
inline
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
return (value << KEY_BITS_26) | addr26;
|
143
|
+
inline uint32_t HllUtil<A>::coupon(const HashState& hashState) {
|
144
|
+
uint32_t addr26 = (int) (hashState.h1 & hll_constants::KEY_MASK_26);
|
145
|
+
uint8_t lz = count_leading_zeros_in_u64(hashState.h2);
|
146
|
+
uint8_t value = ((lz > 62 ? 62 : lz) + 1);
|
147
|
+
return (value << hll_constants::KEY_BITS_26) | addr26;
|
155
148
|
}
|
156
149
|
|
157
150
|
template<typename A>
|
158
|
-
inline void HllUtil<A>::hash(const void* key,
|
151
|
+
inline void HllUtil<A>::hash(const void* key, size_t keyLen, uint64_t seed, HashState& result) {
|
159
152
|
MurmurHash3_x64_128(key, keyLen, seed, result);
|
160
153
|
}
|
161
154
|
|
162
155
|
template<typename A>
|
163
|
-
inline double HllUtil<A>::getRelErr(
|
164
|
-
|
156
|
+
inline double HllUtil<A>::getRelErr(bool upperBound, bool unioned,
|
157
|
+
uint8_t lgConfigK, uint8_t numStdDev) {
|
165
158
|
return RelativeErrorTables<A>::getRelErr(upperBound, unioned, lgConfigK, numStdDev);
|
166
159
|
}
|
167
160
|
|
168
161
|
template<typename A>
|
169
|
-
inline
|
170
|
-
if ((lgK >=
|
162
|
+
inline uint8_t HllUtil<A>::checkLgK(uint8_t lgK) {
|
163
|
+
if ((lgK >= hll_constants::MIN_LOG_K) && (lgK <= hll_constants::MAX_LOG_K)) {
|
171
164
|
return lgK;
|
172
165
|
} else {
|
173
166
|
throw std::invalid_argument("Invalid value of k: " + std::to_string(lgK));
|
@@ -175,36 +168,36 @@ inline int HllUtil<A>::checkLgK(const int lgK) {
|
|
175
168
|
}
|
176
169
|
|
177
170
|
template<typename A>
|
178
|
-
inline void HllUtil<A>::checkMemSize(
|
171
|
+
inline void HllUtil<A>::checkMemSize(uint64_t minBytes, uint64_t capBytes) {
|
179
172
|
if (capBytes < minBytes) {
|
180
173
|
throw std::invalid_argument("Given destination array is not large enough: " + std::to_string(capBytes));
|
181
174
|
}
|
182
175
|
}
|
183
176
|
|
184
177
|
template<typename A>
|
185
|
-
inline void HllUtil<A>::checkNumStdDev(
|
178
|
+
inline void HllUtil<A>::checkNumStdDev(uint8_t numStdDev) {
|
186
179
|
if ((numStdDev < 1) || (numStdDev > 3)) {
|
187
180
|
throw std::invalid_argument("NumStdDev may not be less than 1 or greater than 3.");
|
188
181
|
}
|
189
182
|
}
|
190
183
|
|
191
184
|
template<typename A>
|
192
|
-
inline
|
193
|
-
return (value <<
|
185
|
+
inline uint32_t HllUtil<A>::pair(uint32_t slotNo, uint8_t value) {
|
186
|
+
return (value << hll_constants::KEY_BITS_26) | (slotNo & hll_constants::KEY_MASK_26);
|
194
187
|
}
|
195
188
|
|
196
189
|
template<typename A>
|
197
|
-
inline
|
198
|
-
return coupon &
|
190
|
+
inline uint32_t HllUtil<A>::getLow26(uint32_t coupon) {
|
191
|
+
return coupon & hll_constants::KEY_MASK_26;
|
199
192
|
}
|
200
193
|
|
201
194
|
template<typename A>
|
202
|
-
inline
|
203
|
-
return coupon >>
|
195
|
+
inline uint8_t HllUtil<A>::getValue(uint32_t coupon) {
|
196
|
+
return coupon >> hll_constants::KEY_BITS_26;
|
204
197
|
}
|
205
198
|
|
206
199
|
template<typename A>
|
207
|
-
inline double HllUtil<A>::invPow2(
|
200
|
+
inline double HllUtil<A>::invPow2(uint8_t e) {
|
208
201
|
union {
|
209
202
|
long long longVal;
|
210
203
|
double doubleVal;
|
@@ -214,7 +207,7 @@ inline double HllUtil<A>::invPow2(const int e) {
|
|
214
207
|
}
|
215
208
|
|
216
209
|
template<typename A>
|
217
|
-
inline
|
210
|
+
inline uint8_t HllUtil<A>::simpleIntLog2(uint32_t n) {
|
218
211
|
if (n == 0) {
|
219
212
|
throw std::logic_error("cannot take log of 0");
|
220
213
|
}
|
@@ -222,16 +215,16 @@ inline uint32_t HllUtil<A>::simpleIntLog2(uint32_t n) {
|
|
222
215
|
}
|
223
216
|
|
224
217
|
template<typename A>
|
225
|
-
inline
|
218
|
+
inline uint8_t HllUtil<A>::computeLgArrInts(hll_mode mode, uint32_t count, uint8_t lgConfigK) {
|
226
219
|
// assume value missing and recompute
|
227
|
-
if (mode == LIST) { return
|
228
|
-
|
229
|
-
if ((
|
220
|
+
if (mode == LIST) { return hll_constants::LG_INIT_LIST_SIZE; }
|
221
|
+
uint32_t ceilPwr2 = ceiling_power_of_2(count);
|
222
|
+
if ((hll_constants::RESIZE_DENOM * count) > (hll_constants::RESIZE_NUMER * ceilPwr2)) { ceilPwr2 <<= 1;}
|
230
223
|
if (mode == SET) {
|
231
|
-
return
|
224
|
+
return std::max(hll_constants::LG_INIT_SET_SIZE, HllUtil<A>::simpleIntLog2(ceilPwr2));
|
232
225
|
}
|
233
226
|
//only used for HLL4
|
234
|
-
return
|
227
|
+
return std::max(hll_constants::LG_AUX_ARR_INTS[lgConfigK], HllUtil<A>::simpleIntLog2(ceilPwr2));
|
235
228
|
}
|
236
229
|
|
237
230
|
}
|
@@ -25,30 +25,30 @@
|
|
25
25
|
namespace datasketches {
|
26
26
|
|
27
27
|
template<typename A>
|
28
|
-
coupon_iterator<A>::coupon_iterator(const
|
29
|
-
|
30
|
-
while (
|
31
|
-
if (
|
32
|
-
|
28
|
+
coupon_iterator<A>::coupon_iterator(const uint32_t* array, size_t array_size, size_t index, bool all):
|
29
|
+
array_(array), array_size_(array_size), index_(index), all_(all) {
|
30
|
+
while (index_ < array_size_) {
|
31
|
+
if (all_ || array_[index_] != hll_constants::EMPTY) break;
|
32
|
+
++index_;
|
33
33
|
}
|
34
34
|
}
|
35
35
|
|
36
36
|
template<typename A>
|
37
37
|
coupon_iterator<A>& coupon_iterator<A>::operator++() {
|
38
|
-
while (++
|
39
|
-
if (
|
38
|
+
while (++index_ < array_size_) {
|
39
|
+
if (all_ || array_[index_] != hll_constants::EMPTY) break;
|
40
40
|
}
|
41
41
|
return *this;
|
42
42
|
}
|
43
43
|
|
44
44
|
template<typename A>
|
45
45
|
bool coupon_iterator<A>::operator!=(const coupon_iterator& other) const {
|
46
|
-
return
|
46
|
+
return index_ != other.index_;
|
47
47
|
}
|
48
48
|
|
49
49
|
template<typename A>
|
50
50
|
uint32_t coupon_iterator<A>::operator*() const {
|
51
|
-
return
|
51
|
+
return array_[index_];
|
52
52
|
}
|
53
53
|
|
54
54
|
}
|
@@ -25,15 +25,15 @@ namespace datasketches {
|
|
25
25
|
template<typename A>
|
26
26
|
class coupon_iterator: public std::iterator<std::input_iterator_tag, uint32_t> {
|
27
27
|
public:
|
28
|
-
coupon_iterator(const
|
28
|
+
coupon_iterator(const uint32_t* array, size_t array_slze, size_t index, bool all);
|
29
29
|
coupon_iterator& operator++();
|
30
30
|
bool operator!=(const coupon_iterator& other) const;
|
31
31
|
uint32_t operator*() const;
|
32
32
|
private:
|
33
|
-
const
|
34
|
-
size_t
|
35
|
-
size_t
|
36
|
-
bool
|
33
|
+
const uint32_t* array_;
|
34
|
+
size_t array_size_;
|
35
|
+
size_t index_;
|
36
|
+
bool all_;
|
37
37
|
};
|
38
38
|
|
39
39
|
}
|
@@ -108,7 +108,7 @@ class hll_union_alloc;
|
|
108
108
|
template<typename A> using AllocU8 = typename std::allocator_traits<A>::template rebind_alloc<uint8_t>;
|
109
109
|
template<typename A> using vector_u8 = std::vector<uint8_t, AllocU8<A>>;
|
110
110
|
|
111
|
-
template<typename A = std::allocator<
|
111
|
+
template<typename A = std::allocator<uint8_t> >
|
112
112
|
class hll_sketch_alloc final {
|
113
113
|
public:
|
114
114
|
/**
|
@@ -119,7 +119,7 @@ class hll_sketch_alloc final {
|
|
119
119
|
* keeping memory use constant (if HLL_6 or HLL_8) at the cost of
|
120
120
|
* starting out using much more memory
|
121
121
|
*/
|
122
|
-
explicit hll_sketch_alloc(
|
122
|
+
explicit hll_sketch_alloc(uint8_t lg_config_k, target_hll_type tgt_type = HLL_4, bool start_full_size = false, const A& allocator = A());
|
123
123
|
|
124
124
|
/**
|
125
125
|
* Copy constructor
|
@@ -140,14 +140,14 @@ class hll_sketch_alloc final {
|
|
140
140
|
* Reconstructs a sketch from a serialized image on a stream.
|
141
141
|
* @param is An input stream with a binary image of a sketch
|
142
142
|
*/
|
143
|
-
static hll_sketch_alloc deserialize(std::istream& is);
|
143
|
+
static hll_sketch_alloc deserialize(std::istream& is, const A& allocator = A());
|
144
144
|
|
145
145
|
/**
|
146
146
|
* Reconstructs a sketch from a serialized image in a byte array.
|
147
147
|
* @param is bytes An input array with a binary image of a sketch
|
148
148
|
* @param len Length of the input array, in bytes
|
149
149
|
*/
|
150
|
-
static hll_sketch_alloc deserialize(const void* bytes, size_t len);
|
150
|
+
static hll_sketch_alloc deserialize(const void* bytes, size_t len, const A& allocator = A());
|
151
151
|
|
152
152
|
//! Class destructor
|
153
153
|
virtual ~hll_sketch_alloc();
|
@@ -306,7 +306,7 @@ class hll_sketch_alloc final {
|
|
306
306
|
* @param num_std_dev Number of standard deviations, an integer from the set {1, 2, 3}.
|
307
307
|
* @return The approximate lower bound.
|
308
308
|
*/
|
309
|
-
double get_lower_bound(
|
309
|
+
double get_lower_bound(uint8_t num_std_dev) const;
|
310
310
|
|
311
311
|
/**
|
312
312
|
* Returns the approximate upper error bound given the specified
|
@@ -314,13 +314,13 @@ class hll_sketch_alloc final {
|
|
314
314
|
* @param num_std_dev Number of standard deviations, an integer from the set {1, 2, 3}.
|
315
315
|
* @return The approximate upper bound.
|
316
316
|
*/
|
317
|
-
double get_upper_bound(
|
317
|
+
double get_upper_bound(uint8_t num_std_dev) const;
|
318
318
|
|
319
319
|
/**
|
320
320
|
* Returns sketch's configured lg_k value.
|
321
321
|
* @return Configured lg_k value.
|
322
322
|
*/
|
323
|
-
|
323
|
+
uint8_t get_lg_config_k() const;
|
324
324
|
|
325
325
|
/**
|
326
326
|
* Returns the sketch's target HLL mode (from #target_hll_type).
|
@@ -344,13 +344,13 @@ class hll_sketch_alloc final {
|
|
344
344
|
* Returns the size of the sketch serialized in compact form.
|
345
345
|
* @return Size of the sketch serialized in compact form, in bytes.
|
346
346
|
*/
|
347
|
-
|
347
|
+
uint32_t get_compact_serialization_bytes() const;
|
348
348
|
|
349
349
|
/**
|
350
350
|
* Returns the size of the sketch serialized without compaction.
|
351
351
|
* @return Size of the sketch serialized without compaction, in bytes.
|
352
352
|
*/
|
353
|
-
|
353
|
+
uint32_t get_updatable_serialization_bytes() const;
|
354
354
|
|
355
355
|
/**
|
356
356
|
* Returns the maximum size in bytes that this sketch can grow to
|
@@ -363,7 +363,7 @@ class hll_sketch_alloc final {
|
|
363
363
|
* @param tgt_type the desired Hll type
|
364
364
|
* @return the maximum size in bytes that this sketch can grow to.
|
365
365
|
*/
|
366
|
-
static
|
366
|
+
static uint32_t get_max_updatable_serialization_bytes(uint8_t lg_k, target_hll_type tgt_type);
|
367
367
|
|
368
368
|
/**
|
369
369
|
* Gets the current (approximate) Relative Error (RE) asymptotic values given several
|
@@ -376,18 +376,18 @@ class hll_sketch_alloc final {
|
|
376
376
|
* @return the current (approximate) RelativeError
|
377
377
|
*/
|
378
378
|
static double get_rel_err(bool upper_bound, bool unioned,
|
379
|
-
|
379
|
+
uint8_t lg_config_k, uint8_t num_std_dev);
|
380
380
|
|
381
381
|
private:
|
382
382
|
explicit hll_sketch_alloc(HllSketchImpl<A>* that);
|
383
383
|
|
384
|
-
void coupon_update(
|
384
|
+
void coupon_update(uint32_t coupon);
|
385
385
|
|
386
386
|
std::string type_as_string() const;
|
387
387
|
std::string mode_as_string() const;
|
388
388
|
|
389
389
|
hll_mode get_current_mode() const;
|
390
|
-
|
390
|
+
uint8_t get_serialization_version() const;
|
391
391
|
bool is_out_of_order_flag() const;
|
392
392
|
bool is_estimation_mode() const;
|
393
393
|
|
@@ -423,7 +423,7 @@ class hll_sketch_alloc final {
|
|
423
423
|
* author Kevin Lang
|
424
424
|
*/
|
425
425
|
|
426
|
-
template<typename A = std::allocator<
|
426
|
+
template<typename A = std::allocator<uint8_t> >
|
427
427
|
class hll_union_alloc {
|
428
428
|
public:
|
429
429
|
/**
|
@@ -431,7 +431,7 @@ class hll_union_alloc {
|
|
431
431
|
* @param lg_max_k The maximum size, in log2, of k. The value must
|
432
432
|
* be between 7 and 21, inclusive.
|
433
433
|
*/
|
434
|
-
explicit hll_union_alloc(
|
434
|
+
explicit hll_union_alloc(uint8_t lg_max_k, const A& allocator = A());
|
435
435
|
|
436
436
|
/**
|
437
437
|
* Returns the current cardinality estimate
|
@@ -458,7 +458,7 @@ class hll_union_alloc {
|
|
458
458
|
* @param num_std_dev Number of standard deviations, an integer from the set {1, 2, 3}.
|
459
459
|
* @return The approximate lower bound.
|
460
460
|
*/
|
461
|
-
double get_lower_bound(
|
461
|
+
double get_lower_bound(uint8_t num_std_dev) const;
|
462
462
|
|
463
463
|
/**
|
464
464
|
* Returns the approximate upper error bound given the specified
|
@@ -466,25 +466,13 @@ class hll_union_alloc {
|
|
466
466
|
* @param num_std_dev Number of standard deviations, an integer from the set {1, 2, 3}.
|
467
467
|
* @return The approximate upper bound.
|
468
468
|
*/
|
469
|
-
double get_upper_bound(
|
470
|
-
|
471
|
-
/**
|
472
|
-
* Returns the size of the union serialized in compact form.
|
473
|
-
* @return Size of the union serialized in compact form, in bytes.
|
474
|
-
*/
|
475
|
-
int get_compact_serialization_bytes() const;
|
476
|
-
|
477
|
-
/**
|
478
|
-
* Returns the size of the union serialized without compaction.
|
479
|
-
* @return Size of the union serialized without compaction, in bytes.
|
480
|
-
*/
|
481
|
-
int get_updatable_serialization_bytes() const;
|
469
|
+
double get_upper_bound(uint8_t num_std_dev) const;
|
482
470
|
|
483
471
|
/**
|
484
472
|
* Returns union's configured lg_k value.
|
485
473
|
* @return Configured lg_k value.
|
486
474
|
*/
|
487
|
-
|
475
|
+
uint8_t get_lg_config_k() const;
|
488
476
|
|
489
477
|
/**
|
490
478
|
* Returns the union's target HLL mode (from #target_hll_type).
|
@@ -492,12 +480,6 @@ class hll_union_alloc {
|
|
492
480
|
*/
|
493
481
|
target_hll_type get_target_type() const;
|
494
482
|
|
495
|
-
/**
|
496
|
-
* Indicates if the union is currently stored compacted.
|
497
|
-
* @return True if the union is stored in compact form.
|
498
|
-
*/
|
499
|
-
bool is_compact() const;
|
500
|
-
|
501
483
|
/**
|
502
484
|
* Indicates if the union is currently empty.
|
503
485
|
* @return True if the union is empty.
|
@@ -605,15 +587,6 @@ class hll_union_alloc {
|
|
605
587
|
*/
|
606
588
|
void update(const void* data, size_t length_bytes);
|
607
589
|
|
608
|
-
/**
|
609
|
-
* Returns the maximum size in bytes that this union operator can grow to given a lg_k.
|
610
|
-
*
|
611
|
-
* @param lg_k The maximum Log2 of k for this union operator. This value must be
|
612
|
-
* between 4 and 21 inclusively.
|
613
|
-
* @return the maximum size in bytes that this union operator can grow to.
|
614
|
-
*/
|
615
|
-
static int get_max_serialization_bytes(int lg_k);
|
616
|
-
|
617
590
|
/**
|
618
591
|
* Gets the current (approximate) Relative Error (RE) asymptotic values given several
|
619
592
|
* parameters. This is used primarily for testing.
|
@@ -625,7 +598,7 @@ class hll_union_alloc {
|
|
625
598
|
* @return the current (approximate) RelativeError
|
626
599
|
*/
|
627
600
|
static double get_rel_err(bool upper_bound, bool unioned,
|
628
|
-
|
601
|
+
uint8_t lg_config_k, uint8_t num_std_dev);
|
629
602
|
|
630
603
|
private:
|
631
604
|
|
@@ -638,22 +611,21 @@ class hll_union_alloc {
|
|
638
611
|
* @param incoming_impl the given incoming sketch, which may not be modified.
|
639
612
|
* @param lg_max_k the maximum value of log2 K for this union.
|
640
613
|
*/
|
641
|
-
inline void union_impl(const hll_sketch_alloc<A>& sketch,
|
614
|
+
inline void union_impl(const hll_sketch_alloc<A>& sketch, uint8_t lg_max_k);
|
642
615
|
|
643
|
-
static HllSketchImpl<A>* copy_or_downsample(const HllSketchImpl<A>* src_impl,
|
616
|
+
static HllSketchImpl<A>* copy_or_downsample(const HllSketchImpl<A>* src_impl, uint8_t tgt_lg_k);
|
644
617
|
|
645
|
-
void coupon_update(
|
618
|
+
void coupon_update(uint32_t coupon);
|
646
619
|
|
647
620
|
hll_mode get_current_mode() const;
|
648
|
-
int get_serialization_version() const;
|
649
621
|
bool is_out_of_order_flag() const;
|
650
622
|
bool is_estimation_mode() const;
|
651
623
|
|
652
624
|
// calls couponUpdate on sketch, freeing the old sketch upon changes in hll_mode
|
653
|
-
static HllSketchImpl<A>* leak_free_coupon_update(HllSketchImpl<A>* impl,
|
625
|
+
static HllSketchImpl<A>* leak_free_coupon_update(HllSketchImpl<A>* impl, uint32_t coupon);
|
654
626
|
|
655
|
-
|
656
|
-
hll_sketch_alloc<A>
|
627
|
+
uint8_t lg_max_k_;
|
628
|
+
hll_sketch_alloc<A> gadget_;
|
657
629
|
};
|
658
630
|
|
659
631
|
/// convenience alias for hll_sketch with default allocator
|
@@ -25,7 +25,7 @@
|
|
25
25
|
namespace datasketches {
|
26
26
|
|
27
27
|
TEST_CASE("aux hash map: check must replace", "[aux_hash_map]") {
|
28
|
-
AuxHashMap
|
28
|
+
AuxHashMap<std::allocator<uint8_t>>* map = new AuxHashMap<std::allocator<uint8_t>>(3, 7, std::allocator<uint8_t>());
|
29
29
|
map->mustAdd(100, 5);
|
30
30
|
int val = map->mustFindValueFor(100);
|
31
31
|
REQUIRE(val == 5);
|
@@ -40,12 +40,12 @@ TEST_CASE("aux hash map: check must replace", "[aux_hash_map]") {
|
|
40
40
|
}
|
41
41
|
|
42
42
|
TEST_CASE("aux hash map: check grow space", "[aux_hash_map]") {
|
43
|
-
auto map = std::unique_ptr<AuxHashMap
|
44
|
-
AuxHashMap
|
45
|
-
AuxHashMap
|
43
|
+
auto map = std::unique_ptr<AuxHashMap<std::allocator<uint8_t>>, std::function<void(AuxHashMap<std::allocator<uint8_t>>*)>>(
|
44
|
+
AuxHashMap<std::allocator<uint8_t>>::newAuxHashMap(3, 7, std::allocator<uint8_t>()),
|
45
|
+
AuxHashMap<std::allocator<uint8_t>>::make_deleter()
|
46
46
|
);
|
47
47
|
REQUIRE(map->getLgAuxArrInts() == 3);
|
48
|
-
for (
|
48
|
+
for (uint8_t i = 1; i <= 7; ++i) {
|
49
49
|
map->mustAdd(i, i);
|
50
50
|
}
|
51
51
|
REQUIRE(map->getLgAuxArrInts() == 4);
|
@@ -63,17 +63,17 @@ TEST_CASE("aux hash map: check grow space", "[aux_hash_map]") {
|
|
63
63
|
}
|
64
64
|
|
65
65
|
TEST_CASE("aux hash map: check exception must find value for", "[aux_hash_map]") {
|
66
|
-
AuxHashMap
|
66
|
+
AuxHashMap<std::allocator<uint8_t>> map(3, 7, std::allocator<uint8_t>());
|
67
67
|
map.mustAdd(100, 5);
|
68
68
|
REQUIRE_THROWS_AS(map.mustFindValueFor(101), std::invalid_argument);
|
69
69
|
}
|
70
70
|
|
71
71
|
TEST_CASE("aux hash map: check exception must add", "[aux_hash_map]") {
|
72
|
-
AuxHashMap
|
72
|
+
AuxHashMap<std::allocator<uint8_t>>* map = AuxHashMap<std::allocator<uint8_t>>::newAuxHashMap(3, 7, std::allocator<uint8_t>());
|
73
73
|
map->mustAdd(100, 5);
|
74
74
|
REQUIRE_THROWS_AS(map->mustAdd(100, 6), std::invalid_argument);
|
75
75
|
|
76
|
-
AuxHashMap
|
76
|
+
AuxHashMap<std::allocator<uint8_t>>::make_deleter()(map);
|
77
77
|
}
|
78
78
|
|
79
79
|
} /* namespace datasketches */
|