ddsketch 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.rspec +2 -0
- data/.rubocop.yml +4 -0
- data/.simplecov +1 -0
- data/.yardopts +4 -0
- data/CHANGELOG.md +8 -0
- data/CONTRIBUTING.md +6 -0
- data/Gemfile +41 -0
- data/LICENSE +201 -0
- data/LICENSE-3rdparty.csv +2 -0
- data/NOTICE +4 -0
- data/README.md +122 -0
- data/Rakefile +77 -0
- data/ddsketch-ruby.gemspec +29 -0
- data/lib/ddsketch/base_sketch.rb +196 -0
- data/lib/ddsketch/errors.rb +10 -0
- data/lib/ddsketch/log_collapsing_highest_dense_sketch.rb +21 -0
- data/lib/ddsketch/log_collapsing_lowest_dense_sketch.rb +21 -0
- data/lib/ddsketch/mapping/cubically_interpolated_key_mapping.rb +70 -0
- data/lib/ddsketch/mapping/key_mapping.rb +102 -0
- data/lib/ddsketch/mapping/linear_interpolated_key_mapping.rb +52 -0
- data/lib/ddsketch/mapping/logarithmic_key_mapping.rb +26 -0
- data/lib/ddsketch/proto/ddsketch.proto +66 -0
- data/lib/ddsketch/proto/ddsketch_pb.rb +36 -0
- data/lib/ddsketch/proto.rb +46 -0
- data/lib/ddsketch/sketch.rb +18 -0
- data/lib/ddsketch/store/collapsing_highest_dense_store.rb +143 -0
- data/lib/ddsketch/store/collapsing_lowest_dense_store.rb +145 -0
- data/lib/ddsketch/store/dense_store.rb +210 -0
- data/lib/ddsketch/version.rb +14 -0
- data/lib/ddsketch.rb +25 -0
- metadata +78 -0
@@ -0,0 +1,196 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module DDSketch
|
4
|
+
# A quantile sketch with relative-error guarantees. This sketch computes
|
5
|
+
# quantile values with an approximation error that is relative to the actual
|
6
|
+
# quantile value. It works on both negative and non-negative input values.
|
7
|
+
#
|
8
|
+
# For instance, using DDSketch with a relative accuracy guarantee set to 1%, if
|
9
|
+
# the expected quantile value is 100, the computed quantile value is guaranteed to
|
10
|
+
# be between 99 and 101. If the expected quantile value is 1000, the computed
|
11
|
+
# quantile value is guaranteed to be between 990 and 1010.
|
12
|
+
#
|
13
|
+
# DDSketch works by mapping floating-point input values to bins and counting the
|
14
|
+
# number of values for each bin. The underlying structure that keeps track of bin
|
15
|
+
# counts is store.
|
16
|
+
#
|
17
|
+
# The memory size of the sketch depends on the range that is covered by the input
|
18
|
+
# values: the larger that range, the more bins are needed to keep track of the
|
19
|
+
# input values. As a rough estimate, if working on durations with a relative
|
20
|
+
# accuracy of 2%, about 2kB (275 bins) are needed to cover values between 1
|
21
|
+
# millisecond and 1 minute, and about 6kB (802 bins) to cover values between 1
|
22
|
+
# nanosecond and 1 day.
|
23
|
+
#
|
24
|
+
# The size of the sketch can be have a fail-safe upper-bound by using collapsing
|
25
|
+
# stores. As shown in
|
26
|
+
# <a href="http://www.vldb.org/pvldb/vol12/p2195-masson.pdf">the DDSketch paper</a>
|
27
|
+
# the likelihood of a store collapsing when using the default bound is vanishingly
|
28
|
+
# small for most data.
|
29
|
+
#
|
30
|
+
# @abstract Subclass and override to implement a custom Sketch class.
|
31
|
+
class BaseSketch
|
32
|
+
# @return [Float] the default relative accuracy for key mapping instantiation
|
33
|
+
DEFAULT_REL_ACC = 0.01
|
34
|
+
|
35
|
+
# @return [Integer] the default bin limit for collasping dense store instantiation
|
36
|
+
DEFAULT_BIN_LIMIT = 2048
|
37
|
+
|
38
|
+
# @return [Mapping::KeyMapping] Mapping between values and integer indices that imposes relative accuracy guarantees.
|
39
|
+
attr_reader :mapping
|
40
|
+
|
41
|
+
# @return [Store::DenseStore] store maps integers to counters
|
42
|
+
attr_reader :store
|
43
|
+
|
44
|
+
# @return [Store::DenseStore] store maps negative integers to counters
|
45
|
+
attr_reader :negative_store
|
46
|
+
|
47
|
+
# @return [Float] the count of zeros in the sketch
|
48
|
+
attr_reader :zero_count
|
49
|
+
|
50
|
+
# @return [Float] the maximum value in the sketch
|
51
|
+
attr_reader :max
|
52
|
+
|
53
|
+
# @return [Float] the minimum value in the sketch
|
54
|
+
attr_reader :min
|
55
|
+
|
56
|
+
# @return [Float] the sum of values in the sketch
|
57
|
+
attr_reader :sum
|
58
|
+
|
59
|
+
# @return [Float] the count of values in the sketch
|
60
|
+
attr_reader :count
|
61
|
+
|
62
|
+
# @param [Mapping::KeyMapping] mapping
|
63
|
+
# mapping between values and integer indices that imposes relative accuracy guarantees.
|
64
|
+
# @param [Store::DenseStore] store
|
65
|
+
# store maps integers to counters
|
66
|
+
# @param [Store::DenseStore] negative_store
|
67
|
+
# store maps negative integers to counters
|
68
|
+
# @param [Float] zero_count
|
69
|
+
# the count of zeros in the sketch
|
70
|
+
def initialize(mapping:, store:, negative_store:, zero_count: 0.0)
|
71
|
+
@mapping = mapping
|
72
|
+
@store = store
|
73
|
+
@negative_store = negative_store
|
74
|
+
@zero_count = zero_count
|
75
|
+
|
76
|
+
@relative_accuracy = mapping.relative_accuracy
|
77
|
+
@count = @negative_store.count + @zero_count + @store.count
|
78
|
+
@min = Float::INFINITY
|
79
|
+
@max = -Float::INFINITY
|
80
|
+
@sum = 0.0
|
81
|
+
end
|
82
|
+
|
83
|
+
# Average of the sketch
|
84
|
+
#
|
85
|
+
# @return [Float]
|
86
|
+
def avg
|
87
|
+
sum / count
|
88
|
+
end
|
89
|
+
|
90
|
+
# Add a value to the sketch.
|
91
|
+
#
|
92
|
+
# @param [Float] val The value to be added.
|
93
|
+
# @param [Float] weight Must be positive.
|
94
|
+
#
|
95
|
+
# @return [nil]
|
96
|
+
def add(val, weight = 1.0)
|
97
|
+
raise ArgumentError, "weight must be positive" if weight <= 0.0
|
98
|
+
|
99
|
+
if val > @mapping.min_possible
|
100
|
+
@store.add(@mapping.key(val), weight)
|
101
|
+
elsif val < -@mapping.min_possible
|
102
|
+
@negative_store.add(@mapping.key(-val), weight)
|
103
|
+
else
|
104
|
+
@zero_count += weight
|
105
|
+
end
|
106
|
+
|
107
|
+
# Keep track of summary stats
|
108
|
+
@count += weight
|
109
|
+
@sum += val * weight
|
110
|
+
@min = val if val < @min
|
111
|
+
@max = val if val > @max
|
112
|
+
|
113
|
+
nil
|
114
|
+
end
|
115
|
+
|
116
|
+
# Return the approximate value at the specified quantile.
|
117
|
+
#
|
118
|
+
# @param [Float] quantile Must be between 0 ~ 1
|
119
|
+
#
|
120
|
+
# @return [Float]
|
121
|
+
def get_quantile_value(quantile)
|
122
|
+
return nil if quantile < 0 || quantile > 1 || @count == 0
|
123
|
+
|
124
|
+
rank = quantile * (@count - 1)
|
125
|
+
if rank < @negative_store.count
|
126
|
+
reversed_rank = @negative_store.count - rank - 1
|
127
|
+
key = @negative_store.key_at_rank(reversed_rank, false)
|
128
|
+
quantile_value = -@mapping.value(key)
|
129
|
+
elsif rank < @zero_count + @negative_store.count
|
130
|
+
return 0
|
131
|
+
else
|
132
|
+
key = @store.key_at_rank(
|
133
|
+
rank - @zero_count - @negative_store.count
|
134
|
+
)
|
135
|
+
quantile_value = @mapping.value(key)
|
136
|
+
end
|
137
|
+
quantile_value
|
138
|
+
end
|
139
|
+
|
140
|
+
# Merge the given sketch into the current one. After this operation, this sketch
|
141
|
+
# encodes the values that were added to both this and the input sketch.
|
142
|
+
#
|
143
|
+
# @param [BaseSketch] sketch The sketch to be merged.
|
144
|
+
#
|
145
|
+
# @return [nil]
|
146
|
+
def merge(sketch)
|
147
|
+
unless mergeable?(sketch)
|
148
|
+
raise InvalidSketchMergeError, "Cannot merge two sketches with different relative accuracy"
|
149
|
+
end
|
150
|
+
|
151
|
+
return if sketch.count == 0
|
152
|
+
|
153
|
+
if @count == 0
|
154
|
+
copy(sketch)
|
155
|
+
return
|
156
|
+
end
|
157
|
+
|
158
|
+
# Merge the stores
|
159
|
+
@store.merge(sketch.store)
|
160
|
+
@negative_store.merge(sketch.negative_store)
|
161
|
+
@zero_count += sketch.zero_count
|
162
|
+
|
163
|
+
# Merge summary stats
|
164
|
+
@count += sketch.count
|
165
|
+
@sum += sketch.sum
|
166
|
+
@min = sketch.min if sketch.min < @min
|
167
|
+
|
168
|
+
@max = sketch.max if sketch.max > @max
|
169
|
+
|
170
|
+
nil
|
171
|
+
end
|
172
|
+
|
173
|
+
# @return [Float] the count of values in the sketch
|
174
|
+
def num_values
|
175
|
+
@count
|
176
|
+
end
|
177
|
+
|
178
|
+
private
|
179
|
+
|
180
|
+
# Two sketches can be merged only if their gammas are equal.
|
181
|
+
def mergeable?(other)
|
182
|
+
@mapping.gamma == other.mapping.gamma
|
183
|
+
end
|
184
|
+
|
185
|
+
# Copy the input sketch into this one
|
186
|
+
def copy(sketch)
|
187
|
+
@store.copy(sketch.store)
|
188
|
+
@negative_store.copy(sketch.negative_store)
|
189
|
+
@zero_count = sketch.zero_count
|
190
|
+
@min = sketch.min
|
191
|
+
@max = sketch.max
|
192
|
+
@count = sketch.count
|
193
|
+
@sum = sketch.sum
|
194
|
+
end
|
195
|
+
end
|
196
|
+
end
|
@@ -0,0 +1,21 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module DDSketch
|
4
|
+
# Implementation of BaseSketch with optimized memory usage at the cost of
|
5
|
+
# lower ingestion speed, using a limited number of bins. When the maximum
|
6
|
+
# number of bins is reached, bins with highest indices are collapsed, which
|
7
|
+
# causes the relative accuracy to be lost on the highest quantiles. For the
|
8
|
+
# default bin limit, collapsing is unlikely to occur unless the data is
|
9
|
+
# distributed with tails heavier than any subexponential.
|
10
|
+
class LogCollapsingHighestDenseSketch < BaseSketch
|
11
|
+
# @param relative_accuracy (see Sketch#initialize)
|
12
|
+
# @param [Integer] bin_limit the maximum number of bins
|
13
|
+
def initialize(relative_accuracy: DEFAULT_REL_ACC, bin_limit: DEFAULT_BIN_LIMIT)
|
14
|
+
super(
|
15
|
+
mapping: Mapping::LogarithmicKeyMapping.new(relative_accuracy: relative_accuracy),
|
16
|
+
store: Store::CollapsingHighestDenseStore.new(bin_limit: bin_limit),
|
17
|
+
negative_store: Store::CollapsingHighestDenseStore.new(bin_limit: bin_limit)
|
18
|
+
)
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
@@ -0,0 +1,21 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module DDSketch
|
4
|
+
# Implementation of BaseSketch with optimized memory usage at the cost of
|
5
|
+
# lower ingestion speed, using a limited number of bins. When the maximum
|
6
|
+
# number of bins is reached, bins with lowest indices are collapsed, which
|
7
|
+
# causes the relative accuracy to be lost on the lowest quantiles. For the
|
8
|
+
# default bin limit, collapsing is unlikely to occur unless the data is
|
9
|
+
# distributed with tails heavier than any subexponential.
|
10
|
+
class LogCollapsingLowestDenseSketch < BaseSketch
|
11
|
+
# @param relative_accuracy (see Sketch#initialize)
|
12
|
+
# @param [Integer] bin_limit the maximum number of bins
|
13
|
+
def initialize(relative_accuracy: DEFAULT_REL_ACC, bin_limit: DEFAULT_BIN_LIMIT)
|
14
|
+
super(
|
15
|
+
mapping: Mapping::LogarithmicKeyMapping.new(relative_accuracy: relative_accuracy),
|
16
|
+
store: Store::CollapsingLowestDenseStore.new(bin_limit: bin_limit),
|
17
|
+
negative_store: Store::CollapsingLowestDenseStore.new(bin_limit: bin_limit)
|
18
|
+
)
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
@@ -0,0 +1,70 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module DDSketch
|
4
|
+
module Mapping
|
5
|
+
# A fast KeyMapping that approximates the memory-optimal LogarithmicMapping by
|
6
|
+
# extracting the floor value of the logarithm to the base 2 from the binary
|
7
|
+
# representations of floating-point values and cubically interpolating the
|
8
|
+
# logarithm in-between.
|
9
|
+
class CubicallyInterpolatedKeyMapping < KeyMapping
|
10
|
+
A = 6.0 / 35.0
|
11
|
+
B = -3.0 / 5.0
|
12
|
+
C = 10.0 / 7.0
|
13
|
+
|
14
|
+
#
|
15
|
+
# Indicates cubically interpolating algorithm
|
16
|
+
#
|
17
|
+
# @return [Symbol]
|
18
|
+
#
|
19
|
+
def self.interpolation
|
20
|
+
:cubic
|
21
|
+
end
|
22
|
+
|
23
|
+
# (see KeyMapping#initialize)
|
24
|
+
def initialize(relative_accuracy:, offset: 0.0)
|
25
|
+
super(relative_accuracy: relative_accuracy, offset: offset)
|
26
|
+
|
27
|
+
@multiplier /= C
|
28
|
+
end
|
29
|
+
|
30
|
+
protected
|
31
|
+
|
32
|
+
def log_gamma(value)
|
33
|
+
_cubic_log2_approx(value) * @multiplier
|
34
|
+
end
|
35
|
+
|
36
|
+
def pow_gamma(value)
|
37
|
+
_cubic_exp2_approx(value / @multiplier)
|
38
|
+
end
|
39
|
+
|
40
|
+
# Approximates log2 using a cubic polynomial
|
41
|
+
def _cubic_log2_approx(value)
|
42
|
+
mantissa, exponent = Math.frexp(value)
|
43
|
+
significand = 2 * mantissa - 1
|
44
|
+
(
|
45
|
+
(A * significand + B) * significand + C
|
46
|
+
) * significand + (exponent - 1)
|
47
|
+
end
|
48
|
+
|
49
|
+
def _cubic_exp2_approx(value)
|
50
|
+
exponent = Integer(value.floor)
|
51
|
+
delta_0 = B * B - 3 * A * C
|
52
|
+
|
53
|
+
# Derived from Cardano's formula
|
54
|
+
delta_1 = (2.0 * B * B * B) - (9.0 * A * B * C) - (27.0 * A * A * (value - exponent))
|
55
|
+
cardano = Math.cbrt(
|
56
|
+
(delta_1 - ((delta_1 * delta_1 - 4 * delta_0 * delta_0 * delta_0)**0.5)) / 2.0
|
57
|
+
)
|
58
|
+
|
59
|
+
significand_plus_one = (
|
60
|
+
-(B + cardano + delta_0 / cardano) / (3.0 * A) + 1.0
|
61
|
+
)
|
62
|
+
mantissa = significand_plus_one / 2
|
63
|
+
|
64
|
+
# JRuby has inconsistent result with `Math.ldexp`
|
65
|
+
# https://github.com/jruby/jruby/issues/7234
|
66
|
+
Math.ldexp(mantissa, exponent + 1)
|
67
|
+
end
|
68
|
+
end
|
69
|
+
end
|
70
|
+
end
|
@@ -0,0 +1,102 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module DDSketch
|
4
|
+
module Mapping
|
5
|
+
# A mapping between values and integer indices that imposes relative accuracy
|
6
|
+
# guarantees. Specifically, for any value `minIndexableValue() < value <
|
7
|
+
# maxIndexableValue` implementations of `KeyMapping` must be such that
|
8
|
+
# `value(key(v))` is close to `v` with a relative error that is less than
|
9
|
+
# `relative_accuracy`.
|
10
|
+
#
|
11
|
+
# In implementations of KeyMapping, there is generally a trade-off between the
|
12
|
+
# cost of computing the key and the number of keys that are required to cover a
|
13
|
+
# given range of values (memory optimality). The most memory-optimal mapping is
|
14
|
+
# the LogarithmicMapping, but it requires the costly evaluation of the logarithm
|
15
|
+
# when computing the index. Other mappings can approximate the logarithmic
|
16
|
+
# mapping, while being less computationally costly.
|
17
|
+
#
|
18
|
+
# @abstract Subclass and override to implement a custom KeyMapping class.
|
19
|
+
class KeyMapping
|
20
|
+
# @return [Float] the base for the exponential buckets. gamma = (1 + alpha) / (1 - alpha)
|
21
|
+
attr_reader :gamma
|
22
|
+
|
23
|
+
# @return [Float] the relative accuaracy guaranteed, must between 0 ~ 1
|
24
|
+
attr_reader :relative_accuracy
|
25
|
+
|
26
|
+
# @return [Float] the smallest value the sketch can distinguish from 0
|
27
|
+
attr_reader :min_possible
|
28
|
+
|
29
|
+
# @return [Float] the largest value the sketch can handle
|
30
|
+
attr_reader :max_possible
|
31
|
+
|
32
|
+
# @return [Float] value used to shift all bin keys
|
33
|
+
attr_reader :offset
|
34
|
+
|
35
|
+
#
|
36
|
+
# Indicates interpolating algorithm
|
37
|
+
#
|
38
|
+
# @return [Symbol, nil]
|
39
|
+
#
|
40
|
+
def self.interpolation
|
41
|
+
nil
|
42
|
+
end
|
43
|
+
|
44
|
+
# @param [Float] relative_accuracy the relative accuaracy guaranteed, must between 0 ~ 1
|
45
|
+
# @param [Float] offset value used to shift all bin keys
|
46
|
+
def initialize(relative_accuracy:, offset: 0.0)
|
47
|
+
if (relative_accuracy <= 0) || (relative_accuracy >= 1)
|
48
|
+
raise ArgumentError, "Relative accuracy must be between 0 and 1."
|
49
|
+
end
|
50
|
+
|
51
|
+
@relative_accuracy = relative_accuracy
|
52
|
+
@offset = offset
|
53
|
+
|
54
|
+
gamma_mantissa = 2 * relative_accuracy / (1 - relative_accuracy)
|
55
|
+
|
56
|
+
@gamma = 1 + gamma_mantissa
|
57
|
+
@multiplier = 1 / Math.log(gamma_mantissa + 1)
|
58
|
+
@min_possible = Float::MIN * @gamma
|
59
|
+
@max_possible = Float::MAX / @gamma
|
60
|
+
end
|
61
|
+
|
62
|
+
#
|
63
|
+
# Returns the key specifying the bucket for value
|
64
|
+
#
|
65
|
+
# @param [Float] value
|
66
|
+
#
|
67
|
+
# @return [Integer]
|
68
|
+
#
|
69
|
+
def key(value)
|
70
|
+
Integer(log_gamma(value).ceil + @offset)
|
71
|
+
end
|
72
|
+
|
73
|
+
#
|
74
|
+
# Returns the value represented by the bucket specified by the key
|
75
|
+
#
|
76
|
+
# @param [Integer] key
|
77
|
+
#
|
78
|
+
# @return [Float]
|
79
|
+
#
|
80
|
+
def value(key)
|
81
|
+
pow_gamma(key - @offset) * (2.0 / (1 + @gamma))
|
82
|
+
end
|
83
|
+
|
84
|
+
#
|
85
|
+
# Indicates interpolating algorithm
|
86
|
+
#
|
87
|
+
# @return [Symbol, nil]
|
88
|
+
#
|
89
|
+
def interpolation
|
90
|
+
self.class.interpolation
|
91
|
+
end
|
92
|
+
|
93
|
+
protected
|
94
|
+
|
95
|
+
def log_gamma(value)
|
96
|
+
end
|
97
|
+
|
98
|
+
def pow_gamma(value)
|
99
|
+
end
|
100
|
+
end
|
101
|
+
end
|
102
|
+
end
|
@@ -0,0 +1,52 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module DDSketch
|
4
|
+
module Mapping
|
5
|
+
# A fast KeyMapping that approximates the memory-optimal
|
6
|
+
# LogarithmicMapping by extracting the floor value of the logarithm to the
|
7
|
+
# base 2 from the binary representations of floating-point values and
|
8
|
+
# linearly interpolating the logarithm in-between.
|
9
|
+
class LinearlyInterpolatedKeyMapping < KeyMapping
|
10
|
+
#
|
11
|
+
# Indicates linear interpolating algorithm
|
12
|
+
#
|
13
|
+
# @return [nil]
|
14
|
+
#
|
15
|
+
def self.interpolation
|
16
|
+
:linear
|
17
|
+
end
|
18
|
+
|
19
|
+
protected
|
20
|
+
|
21
|
+
def log_gamma(value)
|
22
|
+
_log2_approx(value) * @multiplier
|
23
|
+
end
|
24
|
+
|
25
|
+
def pow_gamma(value)
|
26
|
+
_exp2_approx(value / @multiplier)
|
27
|
+
end
|
28
|
+
|
29
|
+
# Approximates log2 by s + f
|
30
|
+
# where v = (s+1) * 2 ** f for s in [0, 1)
|
31
|
+
|
32
|
+
# frexp(v) returns m and e s.t.
|
33
|
+
# v = m * 2 ** e ; (m in [0.5, 1) or 0.0)
|
34
|
+
# so we adjust m and e accordingly
|
35
|
+
def _log2_approx(value)
|
36
|
+
mantissa, exponent = Math.frexp(value)
|
37
|
+
significand = 2 * mantissa - 1
|
38
|
+
|
39
|
+
significand + (exponent - 1)
|
40
|
+
end
|
41
|
+
|
42
|
+
def _exp2_approx(value)
|
43
|
+
exponent = Integer(value.floor + 1)
|
44
|
+
mantissa = (value - exponent + 2) / 2.0
|
45
|
+
|
46
|
+
# JRuby has inconsistent result with `Math.ldexp`
|
47
|
+
# https://github.com/jruby/jruby/issues/7234
|
48
|
+
Math.ldexp(mantissa, exponent)
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
52
|
+
end
|
@@ -0,0 +1,26 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module DDSketch
|
4
|
+
module Mapping
|
5
|
+
# A memory-optimal KeyMapping, i.e., given a targeted relative accuracy, it
|
6
|
+
# requires the least number of keys to cover a given range of values. This is
|
7
|
+
# done by logarithmically mapping floating-point values to integers.
|
8
|
+
class LogarithmicKeyMapping < KeyMapping
|
9
|
+
# (see KeyMapping#initialize)
|
10
|
+
def initialize(relative_accuracy:, offset: 0.0)
|
11
|
+
super(relative_accuracy: relative_accuracy, offset: offset)
|
12
|
+
@multiplier *= Math.log(2)
|
13
|
+
end
|
14
|
+
|
15
|
+
protected
|
16
|
+
|
17
|
+
def log_gamma(value)
|
18
|
+
Math.log(value, 2) * @multiplier
|
19
|
+
end
|
20
|
+
|
21
|
+
def pow_gamma(value)
|
22
|
+
2**(value / @multiplier)
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
@@ -0,0 +1,66 @@
|
|
1
|
+
/* Unless explicitly stated otherwise all files in this repository are licensed under the Apache License 2.0.
|
2
|
+
* This product includes software developed at Datadog (https://www.datadoghq.com/).
|
3
|
+
* Copyright 2020 Datadog, Inc.
|
4
|
+
*/
|
5
|
+
|
6
|
+
syntax = "proto3";
|
7
|
+
|
8
|
+
option ruby_package = "DDSketch::Proto";
|
9
|
+
|
10
|
+
// A DDSketch is essentially a histogram that partitions the range of positive values into an infinite number of
|
11
|
+
// indexed bins whose size grows exponentially. It keeps track of the number of values (or possibly floating-point
|
12
|
+
// weights) added to each bin. Negative values are partitioned like positive values, symmetrically to zero.
|
13
|
+
// The value zero as well as its close neighborhood that would be mapped to extreme bin indexes is mapped to a specific
|
14
|
+
// counter.
|
15
|
+
message DDSketch {
|
16
|
+
// The mapping between positive values and the bin indexes they belong to.
|
17
|
+
IndexMapping mapping = 1;
|
18
|
+
|
19
|
+
// The store for keeping track of positive values.
|
20
|
+
Store positiveValues = 2;
|
21
|
+
|
22
|
+
// The store for keeping track of negative values. A negative value v is mapped using its positive opposite -v.
|
23
|
+
Store negativeValues = 3;
|
24
|
+
|
25
|
+
// The count for the value zero and its close neighborhood (whose width depends on the mapping).
|
26
|
+
double zeroCount = 4;
|
27
|
+
}
|
28
|
+
|
29
|
+
// How to map positive values to the bins they belong to.
|
30
|
+
message IndexMapping {
|
31
|
+
// The gamma parameter of the mapping, such that bin index that a value v belongs to is roughly equal to
|
32
|
+
// log(v)/log(gamma).
|
33
|
+
double gamma = 1;
|
34
|
+
|
35
|
+
// An offset that can be used to shift all bin indexes.
|
36
|
+
double indexOffset = 2;
|
37
|
+
|
38
|
+
// To speed up the computation of the index a value belongs to, the computation of the log may be approximated using
|
39
|
+
// the fact that the log to the base 2 of powers of 2 can be computed at a low cost from the binary representation of
|
40
|
+
// the input value. Other values can be approximated by interpolating between successive powers of 2 (linearly,
|
41
|
+
// quadratically or cubically).
|
42
|
+
// NONE means that the log is to be computed exactly (no interpolation).
|
43
|
+
Interpolation interpolation = 3;
|
44
|
+
enum Interpolation {
|
45
|
+
NONE = 0;
|
46
|
+
LINEAR = 1;
|
47
|
+
QUADRATIC = 2;
|
48
|
+
CUBIC = 3;
|
49
|
+
}
|
50
|
+
}
|
51
|
+
|
52
|
+
// A Store maps bin indexes to their respective counts.
|
53
|
+
// Counts can be encoded sparsely using binCounts, but also in a contiguous way using contiguousBinCounts and
|
54
|
+
// contiguousBinIndexOffset. Given that non-empty bins are in practice usually contiguous or close to one another, the
|
55
|
+
// latter contiguous encoding method is usually more efficient than the sparse one.
|
56
|
+
// Both encoding methods can be used conjointly. If a bin appears in both the sparse and the contiguous encodings, its
|
57
|
+
// count value is the sum of the counts in each encodings.
|
58
|
+
message Store {
|
59
|
+
// The bin counts, encoded sparsely.
|
60
|
+
map<sint32, double> binCounts = 1;
|
61
|
+
|
62
|
+
// The bin counts, encoded contiguously. The values of contiguousBinCounts are the counts for the bins of indexes
|
63
|
+
// o, o+1, o+2, etc., where o is contiguousBinIndexOffset.
|
64
|
+
repeated double contiguousBinCounts = 2 [packed = true];
|
65
|
+
sint32 contiguousBinIndexOffset = 3;
|
66
|
+
}
|
@@ -0,0 +1,36 @@
|
|
1
|
+
# Generated by the protocol buffer compiler. DO NOT EDIT!
|
2
|
+
# source: ddsketch.proto
|
3
|
+
|
4
|
+
require "google/protobuf"
|
5
|
+
|
6
|
+
Google::Protobuf::DescriptorPool.generated_pool.build do
|
7
|
+
add_message "DDSketch" do
|
8
|
+
optional :mapping, :message, 1, "IndexMapping"
|
9
|
+
optional :positiveValues, :message, 2, "Store"
|
10
|
+
optional :negativeValues, :message, 3, "Store"
|
11
|
+
optional :zeroCount, :double, 4
|
12
|
+
end
|
13
|
+
add_message "IndexMapping" do
|
14
|
+
optional :gamma, :double, 1
|
15
|
+
optional :indexOffset, :double, 2
|
16
|
+
optional :interpolation, :enum, 3, "IndexMapping.Interpolation"
|
17
|
+
end
|
18
|
+
add_enum "IndexMapping.Interpolation" do
|
19
|
+
value :NONE, 0
|
20
|
+
value :LINEAR, 1
|
21
|
+
value :QUADRATIC, 2
|
22
|
+
value :CUBIC, 3
|
23
|
+
end
|
24
|
+
add_message "Store" do
|
25
|
+
map :binCounts, :sint32, :double, 1
|
26
|
+
repeated :contiguousBinCounts, :double, 2
|
27
|
+
optional :contiguousBinIndexOffset, :sint32, 3
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
module DDSketch::Proto
|
32
|
+
DDSketch = Google::Protobuf::DescriptorPool.generated_pool.lookup("DDSketch").msgclass
|
33
|
+
IndexMapping = Google::Protobuf::DescriptorPool.generated_pool.lookup("IndexMapping").msgclass
|
34
|
+
IndexMapping::Interpolation = Google::Protobuf::DescriptorPool.generated_pool.lookup("IndexMapping.Interpolation").enummodule
|
35
|
+
Store = Google::Protobuf::DescriptorPool.generated_pool.lookup("Store").msgclass
|
36
|
+
end
|
@@ -0,0 +1,46 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "ddsketch/proto/ddsketch_pb"
|
4
|
+
|
5
|
+
module DDSketch
|
6
|
+
# Namespace for protobuf object generated by `google-protobuf`
|
7
|
+
# @!visibility private
|
8
|
+
module Proto
|
9
|
+
INTERPOLATION_MAPPING = {
|
10
|
+
linear: IndexMapping::Interpolation::LINEAR,
|
11
|
+
cubic: IndexMapping::Interpolation::CUBIC
|
12
|
+
}.freeze
|
13
|
+
|
14
|
+
private_constant :INTERPOLATION_MAPPING
|
15
|
+
|
16
|
+
module_function
|
17
|
+
|
18
|
+
def serialize_sketch(sketch)
|
19
|
+
DDSketch.new(
|
20
|
+
mapping: serialize_key_mapping(sketch.mapping),
|
21
|
+
positiveValues: serialize_store(sketch.store),
|
22
|
+
negativeValues: serialize_store(sketch.negative_store),
|
23
|
+
zeroCount: sketch.zero_count
|
24
|
+
)
|
25
|
+
end
|
26
|
+
|
27
|
+
def serialize_store(store)
|
28
|
+
Store.new(
|
29
|
+
contiguousBinCounts: store.bins,
|
30
|
+
contiguousBinIndexOffset: store.offset
|
31
|
+
)
|
32
|
+
end
|
33
|
+
|
34
|
+
def serialize_key_mapping(mapping)
|
35
|
+
IndexMapping.new(
|
36
|
+
gamma: mapping.relative_accuracy,
|
37
|
+
indexOffset: mapping.offset,
|
38
|
+
interpolation: serialize_interpolation(mapping)
|
39
|
+
)
|
40
|
+
end
|
41
|
+
|
42
|
+
def serialize_interpolation(mapping)
|
43
|
+
INTERPOLATION_MAPPING.fetch(mapping.interpolation, IndexMapping::Interpolation::NONE)
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
@@ -0,0 +1,18 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module DDSketch
|
4
|
+
# The default implementation of DDSketch, with optimized memory usage at
|
5
|
+
# the cost of lower ingestion speed, using an unlimited number of bins. The
|
6
|
+
# number of bins will not exceed a reasonable number unless the data is
|
7
|
+
# distributed with tails heavier than any subexponential.
|
8
|
+
class Sketch < BaseSketch
|
9
|
+
# @param [Float] relative_accuracy The guaranteed relative accuracy for sketch
|
10
|
+
def initialize(relative_accuracy: DEFAULT_REL_ACC)
|
11
|
+
super(
|
12
|
+
mapping: Mapping::LogarithmicKeyMapping.new(relative_accuracy: relative_accuracy),
|
13
|
+
store: Store::DenseStore.new,
|
14
|
+
negative_store: Store::DenseStore.new
|
15
|
+
)
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|