ddsketch 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,196 @@
1
+ # frozen_string_literal: true
2
+
3
+ module DDSketch
4
+ # A quantile sketch with relative-error guarantees. This sketch computes
5
+ # quantile values with an approximation error that is relative to the actual
6
+ # quantile value. It works on both negative and non-negative input values.
7
+ #
8
+ # For instance, using DDSketch with a relative accuracy guarantee set to 1%, if
9
+ # the expected quantile value is 100, the computed quantile value is guaranteed to
10
+ # be between 99 and 101. If the expected quantile value is 1000, the computed
11
+ # quantile value is guaranteed to be between 990 and 1010.
12
+ #
13
+ # DDSketch works by mapping floating-point input values to bins and counting the
14
+ # number of values for each bin. The underlying structure that keeps track of bin
15
+ # counts is store.
16
+ #
17
+ # The memory size of the sketch depends on the range that is covered by the input
18
+ # values: the larger that range, the more bins are needed to keep track of the
19
+ # input values. As a rough estimate, if working on durations with a relative
20
+ # accuracy of 2%, about 2kB (275 bins) are needed to cover values between 1
21
+ # millisecond and 1 minute, and about 6kB (802 bins) to cover values between 1
22
+ # nanosecond and 1 day.
23
+ #
24
+ # The size of the sketch can be have a fail-safe upper-bound by using collapsing
25
+ # stores. As shown in
26
+ # <a href="http://www.vldb.org/pvldb/vol12/p2195-masson.pdf">the DDSketch paper</a>
27
+ # the likelihood of a store collapsing when using the default bound is vanishingly
28
+ # small for most data.
29
+ #
30
+ # @abstract Subclass and override to implement a custom Sketch class.
31
+ class BaseSketch
32
+ # @return [Float] the default relative accuracy for key mapping instantiation
33
+ DEFAULT_REL_ACC = 0.01
34
+
35
+ # @return [Integer] the default bin limit for collasping dense store instantiation
36
+ DEFAULT_BIN_LIMIT = 2048
37
+
38
+ # @return [Mapping::KeyMapping] Mapping between values and integer indices that imposes relative accuracy guarantees.
39
+ attr_reader :mapping
40
+
41
+ # @return [Store::DenseStore] store maps integers to counters
42
+ attr_reader :store
43
+
44
+ # @return [Store::DenseStore] store maps negative integers to counters
45
+ attr_reader :negative_store
46
+
47
+ # @return [Float] the count of zeros in the sketch
48
+ attr_reader :zero_count
49
+
50
+ # @return [Float] the maximum value in the sketch
51
+ attr_reader :max
52
+
53
+ # @return [Float] the minimum value in the sketch
54
+ attr_reader :min
55
+
56
+ # @return [Float] the sum of values in the sketch
57
+ attr_reader :sum
58
+
59
+ # @return [Float] the count of values in the sketch
60
+ attr_reader :count
61
+
62
+ # @param [Mapping::KeyMapping] mapping
63
+ # mapping between values and integer indices that imposes relative accuracy guarantees.
64
+ # @param [Store::DenseStore] store
65
+ # store maps integers to counters
66
+ # @param [Store::DenseStore] negative_store
67
+ # store maps negative integers to counters
68
+ # @param [Float] zero_count
69
+ # the count of zeros in the sketch
70
+ def initialize(mapping:, store:, negative_store:, zero_count: 0.0)
71
+ @mapping = mapping
72
+ @store = store
73
+ @negative_store = negative_store
74
+ @zero_count = zero_count
75
+
76
+ @relative_accuracy = mapping.relative_accuracy
77
+ @count = @negative_store.count + @zero_count + @store.count
78
+ @min = Float::INFINITY
79
+ @max = -Float::INFINITY
80
+ @sum = 0.0
81
+ end
82
+
83
+ # Average of the sketch
84
+ #
85
+ # @return [Float]
86
+ def avg
87
+ sum / count
88
+ end
89
+
90
+ # Add a value to the sketch.
91
+ #
92
+ # @param [Float] val The value to be added.
93
+ # @param [Float] weight Must be positive.
94
+ #
95
+ # @return [nil]
96
+ def add(val, weight = 1.0)
97
+ raise ArgumentError, "weight must be positive" if weight <= 0.0
98
+
99
+ if val > @mapping.min_possible
100
+ @store.add(@mapping.key(val), weight)
101
+ elsif val < -@mapping.min_possible
102
+ @negative_store.add(@mapping.key(-val), weight)
103
+ else
104
+ @zero_count += weight
105
+ end
106
+
107
+ # Keep track of summary stats
108
+ @count += weight
109
+ @sum += val * weight
110
+ @min = val if val < @min
111
+ @max = val if val > @max
112
+
113
+ nil
114
+ end
115
+
116
+ # Return the approximate value at the specified quantile.
117
+ #
118
+ # @param [Float] quantile Must be between 0 ~ 1
119
+ #
120
+ # @return [Float]
121
+ def get_quantile_value(quantile)
122
+ return nil if quantile < 0 || quantile > 1 || @count == 0
123
+
124
+ rank = quantile * (@count - 1)
125
+ if rank < @negative_store.count
126
+ reversed_rank = @negative_store.count - rank - 1
127
+ key = @negative_store.key_at_rank(reversed_rank, false)
128
+ quantile_value = -@mapping.value(key)
129
+ elsif rank < @zero_count + @negative_store.count
130
+ return 0
131
+ else
132
+ key = @store.key_at_rank(
133
+ rank - @zero_count - @negative_store.count
134
+ )
135
+ quantile_value = @mapping.value(key)
136
+ end
137
+ quantile_value
138
+ end
139
+
140
+ # Merge the given sketch into the current one. After this operation, this sketch
141
+ # encodes the values that were added to both this and the input sketch.
142
+ #
143
+ # @param [BaseSketch] sketch The sketch to be merged.
144
+ #
145
+ # @return [nil]
146
+ def merge(sketch)
147
+ unless mergeable?(sketch)
148
+ raise InvalidSketchMergeError, "Cannot merge two sketches with different relative accuracy"
149
+ end
150
+
151
+ return if sketch.count == 0
152
+
153
+ if @count == 0
154
+ copy(sketch)
155
+ return
156
+ end
157
+
158
+ # Merge the stores
159
+ @store.merge(sketch.store)
160
+ @negative_store.merge(sketch.negative_store)
161
+ @zero_count += sketch.zero_count
162
+
163
+ # Merge summary stats
164
+ @count += sketch.count
165
+ @sum += sketch.sum
166
+ @min = sketch.min if sketch.min < @min
167
+
168
+ @max = sketch.max if sketch.max > @max
169
+
170
+ nil
171
+ end
172
+
173
+ # @return [Float] the count of values in the sketch
174
+ def num_values
175
+ @count
176
+ end
177
+
178
+ private
179
+
180
+ # Two sketches can be merged only if their gammas are equal.
181
+ def mergeable?(other)
182
+ @mapping.gamma == other.mapping.gamma
183
+ end
184
+
185
+ # Copy the input sketch into this one
186
+ def copy(sketch)
187
+ @store.copy(sketch.store)
188
+ @negative_store.copy(sketch.negative_store)
189
+ @zero_count = sketch.zero_count
190
+ @min = sketch.min
191
+ @max = sketch.max
192
+ @count = sketch.count
193
+ @sum = sketch.sum
194
+ end
195
+ end
196
+ end
@@ -0,0 +1,10 @@
1
+ # frozen_string_literal: true
2
+
3
+ module DDSketch
4
+ class BaseError < ::StandardError
5
+ end
6
+
7
+ # Error when merging two incompatible sketches
8
+ class InvalidSketchMergeError < BaseError
9
+ end
10
+ end
@@ -0,0 +1,21 @@
1
+ # frozen_string_literal: true
2
+
3
+ module DDSketch
4
+ # Implementation of BaseSketch with optimized memory usage at the cost of
5
+ # lower ingestion speed, using a limited number of bins. When the maximum
6
+ # number of bins is reached, bins with highest indices are collapsed, which
7
+ # causes the relative accuracy to be lost on the highest quantiles. For the
8
+ # default bin limit, collapsing is unlikely to occur unless the data is
9
+ # distributed with tails heavier than any subexponential.
10
+ class LogCollapsingHighestDenseSketch < BaseSketch
11
+ # @param relative_accuracy (see Sketch#initialize)
12
+ # @param [Integer] bin_limit the maximum number of bins
13
+ def initialize(relative_accuracy: DEFAULT_REL_ACC, bin_limit: DEFAULT_BIN_LIMIT)
14
+ super(
15
+ mapping: Mapping::LogarithmicKeyMapping.new(relative_accuracy: relative_accuracy),
16
+ store: Store::CollapsingHighestDenseStore.new(bin_limit: bin_limit),
17
+ negative_store: Store::CollapsingHighestDenseStore.new(bin_limit: bin_limit)
18
+ )
19
+ end
20
+ end
21
+ end
@@ -0,0 +1,21 @@
1
+ # frozen_string_literal: true
2
+
3
+ module DDSketch
4
+ # Implementation of BaseSketch with optimized memory usage at the cost of
5
+ # lower ingestion speed, using a limited number of bins. When the maximum
6
+ # number of bins is reached, bins with lowest indices are collapsed, which
7
+ # causes the relative accuracy to be lost on the lowest quantiles. For the
8
+ # default bin limit, collapsing is unlikely to occur unless the data is
9
+ # distributed with tails heavier than any subexponential.
10
+ class LogCollapsingLowestDenseSketch < BaseSketch
11
+ # @param relative_accuracy (see Sketch#initialize)
12
+ # @param [Integer] bin_limit the maximum number of bins
13
+ def initialize(relative_accuracy: DEFAULT_REL_ACC, bin_limit: DEFAULT_BIN_LIMIT)
14
+ super(
15
+ mapping: Mapping::LogarithmicKeyMapping.new(relative_accuracy: relative_accuracy),
16
+ store: Store::CollapsingLowestDenseStore.new(bin_limit: bin_limit),
17
+ negative_store: Store::CollapsingLowestDenseStore.new(bin_limit: bin_limit)
18
+ )
19
+ end
20
+ end
21
+ end
@@ -0,0 +1,70 @@
1
+ # frozen_string_literal: true
2
+
3
+ module DDSketch
4
+ module Mapping
5
+ # A fast KeyMapping that approximates the memory-optimal LogarithmicMapping by
6
+ # extracting the floor value of the logarithm to the base 2 from the binary
7
+ # representations of floating-point values and cubically interpolating the
8
+ # logarithm in-between.
9
+ class CubicallyInterpolatedKeyMapping < KeyMapping
10
+ A = 6.0 / 35.0
11
+ B = -3.0 / 5.0
12
+ C = 10.0 / 7.0
13
+
14
+ #
15
+ # Indicates cubically interpolating algorithm
16
+ #
17
+ # @return [Symbol]
18
+ #
19
+ def self.interpolation
20
+ :cubic
21
+ end
22
+
23
+ # (see KeyMapping#initialize)
24
+ def initialize(relative_accuracy:, offset: 0.0)
25
+ super(relative_accuracy: relative_accuracy, offset: offset)
26
+
27
+ @multiplier /= C
28
+ end
29
+
30
+ protected
31
+
32
+ def log_gamma(value)
33
+ _cubic_log2_approx(value) * @multiplier
34
+ end
35
+
36
+ def pow_gamma(value)
37
+ _cubic_exp2_approx(value / @multiplier)
38
+ end
39
+
40
+ # Approximates log2 using a cubic polynomial
41
+ def _cubic_log2_approx(value)
42
+ mantissa, exponent = Math.frexp(value)
43
+ significand = 2 * mantissa - 1
44
+ (
45
+ (A * significand + B) * significand + C
46
+ ) * significand + (exponent - 1)
47
+ end
48
+
49
+ def _cubic_exp2_approx(value)
50
+ exponent = Integer(value.floor)
51
+ delta_0 = B * B - 3 * A * C
52
+
53
+ # Derived from Cardano's formula
54
+ delta_1 = (2.0 * B * B * B) - (9.0 * A * B * C) - (27.0 * A * A * (value - exponent))
55
+ cardano = Math.cbrt(
56
+ (delta_1 - ((delta_1 * delta_1 - 4 * delta_0 * delta_0 * delta_0)**0.5)) / 2.0
57
+ )
58
+
59
+ significand_plus_one = (
60
+ -(B + cardano + delta_0 / cardano) / (3.0 * A) + 1.0
61
+ )
62
+ mantissa = significand_plus_one / 2
63
+
64
+ # JRuby has inconsistent result with `Math.ldexp`
65
+ # https://github.com/jruby/jruby/issues/7234
66
+ Math.ldexp(mantissa, exponent + 1)
67
+ end
68
+ end
69
+ end
70
+ end
@@ -0,0 +1,102 @@
1
+ # frozen_string_literal: true
2
+
3
+ module DDSketch
4
+ module Mapping
5
+ # A mapping between values and integer indices that imposes relative accuracy
6
+ # guarantees. Specifically, for any value `minIndexableValue() < value <
7
+ # maxIndexableValue` implementations of `KeyMapping` must be such that
8
+ # `value(key(v))` is close to `v` with a relative error that is less than
9
+ # `relative_accuracy`.
10
+ #
11
+ # In implementations of KeyMapping, there is generally a trade-off between the
12
+ # cost of computing the key and the number of keys that are required to cover a
13
+ # given range of values (memory optimality). The most memory-optimal mapping is
14
+ # the LogarithmicMapping, but it requires the costly evaluation of the logarithm
15
+ # when computing the index. Other mappings can approximate the logarithmic
16
+ # mapping, while being less computationally costly.
17
+ #
18
+ # @abstract Subclass and override to implement a custom KeyMapping class.
19
+ class KeyMapping
20
+ # @return [Float] the base for the exponential buckets. gamma = (1 + alpha) / (1 - alpha)
21
+ attr_reader :gamma
22
+
23
+ # @return [Float] the relative accuaracy guaranteed, must between 0 ~ 1
24
+ attr_reader :relative_accuracy
25
+
26
+ # @return [Float] the smallest value the sketch can distinguish from 0
27
+ attr_reader :min_possible
28
+
29
+ # @return [Float] the largest value the sketch can handle
30
+ attr_reader :max_possible
31
+
32
+ # @return [Float] value used to shift all bin keys
33
+ attr_reader :offset
34
+
35
+ #
36
+ # Indicates interpolating algorithm
37
+ #
38
+ # @return [Symbol, nil]
39
+ #
40
+ def self.interpolation
41
+ nil
42
+ end
43
+
44
+ # @param [Float] relative_accuracy the relative accuaracy guaranteed, must between 0 ~ 1
45
+ # @param [Float] offset value used to shift all bin keys
46
+ def initialize(relative_accuracy:, offset: 0.0)
47
+ if (relative_accuracy <= 0) || (relative_accuracy >= 1)
48
+ raise ArgumentError, "Relative accuracy must be between 0 and 1."
49
+ end
50
+
51
+ @relative_accuracy = relative_accuracy
52
+ @offset = offset
53
+
54
+ gamma_mantissa = 2 * relative_accuracy / (1 - relative_accuracy)
55
+
56
+ @gamma = 1 + gamma_mantissa
57
+ @multiplier = 1 / Math.log(gamma_mantissa + 1)
58
+ @min_possible = Float::MIN * @gamma
59
+ @max_possible = Float::MAX / @gamma
60
+ end
61
+
62
+ #
63
+ # Returns the key specifying the bucket for value
64
+ #
65
+ # @param [Float] value
66
+ #
67
+ # @return [Integer]
68
+ #
69
+ def key(value)
70
+ Integer(log_gamma(value).ceil + @offset)
71
+ end
72
+
73
+ #
74
+ # Returns the value represented by the bucket specified by the key
75
+ #
76
+ # @param [Integer] key
77
+ #
78
+ # @return [Float]
79
+ #
80
+ def value(key)
81
+ pow_gamma(key - @offset) * (2.0 / (1 + @gamma))
82
+ end
83
+
84
+ #
85
+ # Indicates interpolating algorithm
86
+ #
87
+ # @return [Symbol, nil]
88
+ #
89
+ def interpolation
90
+ self.class.interpolation
91
+ end
92
+
93
+ protected
94
+
95
+ def log_gamma(value)
96
+ end
97
+
98
+ def pow_gamma(value)
99
+ end
100
+ end
101
+ end
102
+ end
@@ -0,0 +1,52 @@
1
+ # frozen_string_literal: true
2
+
3
+ module DDSketch
4
+ module Mapping
5
+ # A fast KeyMapping that approximates the memory-optimal
6
+ # LogarithmicMapping by extracting the floor value of the logarithm to the
7
+ # base 2 from the binary representations of floating-point values and
8
+ # linearly interpolating the logarithm in-between.
9
+ class LinearlyInterpolatedKeyMapping < KeyMapping
10
+ #
11
+ # Indicates linear interpolating algorithm
12
+ #
13
+ # @return [nil]
14
+ #
15
+ def self.interpolation
16
+ :linear
17
+ end
18
+
19
+ protected
20
+
21
+ def log_gamma(value)
22
+ _log2_approx(value) * @multiplier
23
+ end
24
+
25
+ def pow_gamma(value)
26
+ _exp2_approx(value / @multiplier)
27
+ end
28
+
29
+ # Approximates log2 by s + f
30
+ # where v = (s+1) * 2 ** f for s in [0, 1)
31
+
32
+ # frexp(v) returns m and e s.t.
33
+ # v = m * 2 ** e ; (m in [0.5, 1) or 0.0)
34
+ # so we adjust m and e accordingly
35
+ def _log2_approx(value)
36
+ mantissa, exponent = Math.frexp(value)
37
+ significand = 2 * mantissa - 1
38
+
39
+ significand + (exponent - 1)
40
+ end
41
+
42
+ def _exp2_approx(value)
43
+ exponent = Integer(value.floor + 1)
44
+ mantissa = (value - exponent + 2) / 2.0
45
+
46
+ # JRuby has inconsistent result with `Math.ldexp`
47
+ # https://github.com/jruby/jruby/issues/7234
48
+ Math.ldexp(mantissa, exponent)
49
+ end
50
+ end
51
+ end
52
+ end
@@ -0,0 +1,26 @@
1
+ # frozen_string_literal: true
2
+
3
+ module DDSketch
4
+ module Mapping
5
+ # A memory-optimal KeyMapping, i.e., given a targeted relative accuracy, it
6
+ # requires the least number of keys to cover a given range of values. This is
7
+ # done by logarithmically mapping floating-point values to integers.
8
+ class LogarithmicKeyMapping < KeyMapping
9
+ # (see KeyMapping#initialize)
10
+ def initialize(relative_accuracy:, offset: 0.0)
11
+ super(relative_accuracy: relative_accuracy, offset: offset)
12
+ @multiplier *= Math.log(2)
13
+ end
14
+
15
+ protected
16
+
17
+ def log_gamma(value)
18
+ Math.log(value, 2) * @multiplier
19
+ end
20
+
21
+ def pow_gamma(value)
22
+ 2**(value / @multiplier)
23
+ end
24
+ end
25
+ end
26
+ end
@@ -0,0 +1,66 @@
1
+ /* Unless explicitly stated otherwise all files in this repository are licensed under the Apache License 2.0.
2
+ * This product includes software developed at Datadog (https://www.datadoghq.com/).
3
+ * Copyright 2020 Datadog, Inc.
4
+ */
5
+
6
+ syntax = "proto3";
7
+
8
+ option ruby_package = "DDSketch::Proto";
9
+
10
+ // A DDSketch is essentially a histogram that partitions the range of positive values into an infinite number of
11
+ // indexed bins whose size grows exponentially. It keeps track of the number of values (or possibly floating-point
12
+ // weights) added to each bin. Negative values are partitioned like positive values, symmetrically to zero.
13
+ // The value zero as well as its close neighborhood that would be mapped to extreme bin indexes is mapped to a specific
14
+ // counter.
15
+ message DDSketch {
16
+ // The mapping between positive values and the bin indexes they belong to.
17
+ IndexMapping mapping = 1;
18
+
19
+ // The store for keeping track of positive values.
20
+ Store positiveValues = 2;
21
+
22
+ // The store for keeping track of negative values. A negative value v is mapped using its positive opposite -v.
23
+ Store negativeValues = 3;
24
+
25
+ // The count for the value zero and its close neighborhood (whose width depends on the mapping).
26
+ double zeroCount = 4;
27
+ }
28
+
29
+ // How to map positive values to the bins they belong to.
30
+ message IndexMapping {
31
+ // The gamma parameter of the mapping, such that bin index that a value v belongs to is roughly equal to
32
+ // log(v)/log(gamma).
33
+ double gamma = 1;
34
+
35
+ // An offset that can be used to shift all bin indexes.
36
+ double indexOffset = 2;
37
+
38
+ // To speed up the computation of the index a value belongs to, the computation of the log may be approximated using
39
+ // the fact that the log to the base 2 of powers of 2 can be computed at a low cost from the binary representation of
40
+ // the input value. Other values can be approximated by interpolating between successive powers of 2 (linearly,
41
+ // quadratically or cubically).
42
+ // NONE means that the log is to be computed exactly (no interpolation).
43
+ Interpolation interpolation = 3;
44
+ enum Interpolation {
45
+ NONE = 0;
46
+ LINEAR = 1;
47
+ QUADRATIC = 2;
48
+ CUBIC = 3;
49
+ }
50
+ }
51
+
52
+ // A Store maps bin indexes to their respective counts.
53
+ // Counts can be encoded sparsely using binCounts, but also in a contiguous way using contiguousBinCounts and
54
+ // contiguousBinIndexOffset. Given that non-empty bins are in practice usually contiguous or close to one another, the
55
+ // latter contiguous encoding method is usually more efficient than the sparse one.
56
+ // Both encoding methods can be used conjointly. If a bin appears in both the sparse and the contiguous encodings, its
57
+ // count value is the sum of the counts in each encodings.
58
+ message Store {
59
+ // The bin counts, encoded sparsely.
60
+ map<sint32, double> binCounts = 1;
61
+
62
+ // The bin counts, encoded contiguously. The values of contiguousBinCounts are the counts for the bins of indexes
63
+ // o, o+1, o+2, etc., where o is contiguousBinIndexOffset.
64
+ repeated double contiguousBinCounts = 2 [packed = true];
65
+ sint32 contiguousBinIndexOffset = 3;
66
+ }
@@ -0,0 +1,36 @@
1
+ # Generated by the protocol buffer compiler. DO NOT EDIT!
2
+ # source: ddsketch.proto
3
+
4
+ require "google/protobuf"
5
+
6
+ Google::Protobuf::DescriptorPool.generated_pool.build do
7
+ add_message "DDSketch" do
8
+ optional :mapping, :message, 1, "IndexMapping"
9
+ optional :positiveValues, :message, 2, "Store"
10
+ optional :negativeValues, :message, 3, "Store"
11
+ optional :zeroCount, :double, 4
12
+ end
13
+ add_message "IndexMapping" do
14
+ optional :gamma, :double, 1
15
+ optional :indexOffset, :double, 2
16
+ optional :interpolation, :enum, 3, "IndexMapping.Interpolation"
17
+ end
18
+ add_enum "IndexMapping.Interpolation" do
19
+ value :NONE, 0
20
+ value :LINEAR, 1
21
+ value :QUADRATIC, 2
22
+ value :CUBIC, 3
23
+ end
24
+ add_message "Store" do
25
+ map :binCounts, :sint32, :double, 1
26
+ repeated :contiguousBinCounts, :double, 2
27
+ optional :contiguousBinIndexOffset, :sint32, 3
28
+ end
29
+ end
30
+
31
+ module DDSketch::Proto
32
+ DDSketch = Google::Protobuf::DescriptorPool.generated_pool.lookup("DDSketch").msgclass
33
+ IndexMapping = Google::Protobuf::DescriptorPool.generated_pool.lookup("IndexMapping").msgclass
34
+ IndexMapping::Interpolation = Google::Protobuf::DescriptorPool.generated_pool.lookup("IndexMapping.Interpolation").enummodule
35
+ Store = Google::Protobuf::DescriptorPool.generated_pool.lookup("Store").msgclass
36
+ end
@@ -0,0 +1,46 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "ddsketch/proto/ddsketch_pb"
4
+
5
+ module DDSketch
6
+ # Namespace for protobuf object generated by `google-protobuf`
7
+ # @!visibility private
8
+ module Proto
9
+ INTERPOLATION_MAPPING = {
10
+ linear: IndexMapping::Interpolation::LINEAR,
11
+ cubic: IndexMapping::Interpolation::CUBIC
12
+ }.freeze
13
+
14
+ private_constant :INTERPOLATION_MAPPING
15
+
16
+ module_function
17
+
18
+ def serialize_sketch(sketch)
19
+ DDSketch.new(
20
+ mapping: serialize_key_mapping(sketch.mapping),
21
+ positiveValues: serialize_store(sketch.store),
22
+ negativeValues: serialize_store(sketch.negative_store),
23
+ zeroCount: sketch.zero_count
24
+ )
25
+ end
26
+
27
+ def serialize_store(store)
28
+ Store.new(
29
+ contiguousBinCounts: store.bins,
30
+ contiguousBinIndexOffset: store.offset
31
+ )
32
+ end
33
+
34
+ def serialize_key_mapping(mapping)
35
+ IndexMapping.new(
36
+ gamma: mapping.relative_accuracy,
37
+ indexOffset: mapping.offset,
38
+ interpolation: serialize_interpolation(mapping)
39
+ )
40
+ end
41
+
42
+ def serialize_interpolation(mapping)
43
+ INTERPOLATION_MAPPING.fetch(mapping.interpolation, IndexMapping::Interpolation::NONE)
44
+ end
45
+ end
46
+ end
@@ -0,0 +1,18 @@
1
+ # frozen_string_literal: true
2
+
3
+ module DDSketch
4
+ # The default implementation of DDSketch, with optimized memory usage at
5
+ # the cost of lower ingestion speed, using an unlimited number of bins. The
6
+ # number of bins will not exceed a reasonable number unless the data is
7
+ # distributed with tails heavier than any subexponential.
8
+ class Sketch < BaseSketch
9
+ # @param [Float] relative_accuracy The guaranteed relative accuracy for sketch
10
+ def initialize(relative_accuracy: DEFAULT_REL_ACC)
11
+ super(
12
+ mapping: Mapping::LogarithmicKeyMapping.new(relative_accuracy: relative_accuracy),
13
+ store: Store::DenseStore.new,
14
+ negative_store: Store::DenseStore.new
15
+ )
16
+ end
17
+ end
18
+ end