ddsketch 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,196 @@
1
+ # frozen_string_literal: true
2
+
3
+ module DDSketch
4
+ # A quantile sketch with relative-error guarantees. This sketch computes
5
+ # quantile values with an approximation error that is relative to the actual
6
+ # quantile value. It works on both negative and non-negative input values.
7
+ #
8
+ # For instance, using DDSketch with a relative accuracy guarantee set to 1%, if
9
+ # the expected quantile value is 100, the computed quantile value is guaranteed to
10
+ # be between 99 and 101. If the expected quantile value is 1000, the computed
11
+ # quantile value is guaranteed to be between 990 and 1010.
12
+ #
13
+ # DDSketch works by mapping floating-point input values to bins and counting the
14
+ # number of values for each bin. The underlying structure that keeps track of bin
15
+ # counts is store.
16
+ #
17
+ # The memory size of the sketch depends on the range that is covered by the input
18
+ # values: the larger that range, the more bins are needed to keep track of the
19
+ # input values. As a rough estimate, if working on durations with a relative
20
+ # accuracy of 2%, about 2kB (275 bins) are needed to cover values between 1
21
+ # millisecond and 1 minute, and about 6kB (802 bins) to cover values between 1
22
+ # nanosecond and 1 day.
23
+ #
24
+ # The size of the sketch can be have a fail-safe upper-bound by using collapsing
25
+ # stores. As shown in
26
+ # <a href="http://www.vldb.org/pvldb/vol12/p2195-masson.pdf">the DDSketch paper</a>
27
+ # the likelihood of a store collapsing when using the default bound is vanishingly
28
+ # small for most data.
29
+ #
30
+ # @abstract Subclass and override to implement a custom Sketch class.
31
+ class BaseSketch
32
+ # @return [Float] the default relative accuracy for key mapping instantiation
33
+ DEFAULT_REL_ACC = 0.01
34
+
35
+ # @return [Integer] the default bin limit for collasping dense store instantiation
36
+ DEFAULT_BIN_LIMIT = 2048
37
+
38
+ # @return [Mapping::KeyMapping] Mapping between values and integer indices that imposes relative accuracy guarantees.
39
+ attr_reader :mapping
40
+
41
+ # @return [Store::DenseStore] store maps integers to counters
42
+ attr_reader :store
43
+
44
+ # @return [Store::DenseStore] store maps negative integers to counters
45
+ attr_reader :negative_store
46
+
47
+ # @return [Float] the count of zeros in the sketch
48
+ attr_reader :zero_count
49
+
50
+ # @return [Float] the maximum value in the sketch
51
+ attr_reader :max
52
+
53
+ # @return [Float] the minimum value in the sketch
54
+ attr_reader :min
55
+
56
+ # @return [Float] the sum of values in the sketch
57
+ attr_reader :sum
58
+
59
+ # @return [Float] the count of values in the sketch
60
+ attr_reader :count
61
+
62
+ # @param [Mapping::KeyMapping] mapping
63
+ # mapping between values and integer indices that imposes relative accuracy guarantees.
64
+ # @param [Store::DenseStore] store
65
+ # store maps integers to counters
66
+ # @param [Store::DenseStore] negative_store
67
+ # store maps negative integers to counters
68
+ # @param [Float] zero_count
69
+ # the count of zeros in the sketch
70
+ def initialize(mapping:, store:, negative_store:, zero_count: 0.0)
71
+ @mapping = mapping
72
+ @store = store
73
+ @negative_store = negative_store
74
+ @zero_count = zero_count
75
+
76
+ @relative_accuracy = mapping.relative_accuracy
77
+ @count = @negative_store.count + @zero_count + @store.count
78
+ @min = Float::INFINITY
79
+ @max = -Float::INFINITY
80
+ @sum = 0.0
81
+ end
82
+
83
+ # Average of the sketch
84
+ #
85
+ # @return [Float]
86
+ def avg
87
+ sum / count
88
+ end
89
+
90
+ # Add a value to the sketch.
91
+ #
92
+ # @param [Float] val The value to be added.
93
+ # @param [Float] weight Must be positive.
94
+ #
95
+ # @return [nil]
96
+ def add(val, weight = 1.0)
97
+ raise ArgumentError, "weight must be positive" if weight <= 0.0
98
+
99
+ if val > @mapping.min_possible
100
+ @store.add(@mapping.key(val), weight)
101
+ elsif val < -@mapping.min_possible
102
+ @negative_store.add(@mapping.key(-val), weight)
103
+ else
104
+ @zero_count += weight
105
+ end
106
+
107
+ # Keep track of summary stats
108
+ @count += weight
109
+ @sum += val * weight
110
+ @min = val if val < @min
111
+ @max = val if val > @max
112
+
113
+ nil
114
+ end
115
+
116
+ # Return the approximate value at the specified quantile.
117
+ #
118
+ # @param [Float] quantile Must be between 0 ~ 1
119
+ #
120
+ # @return [Float]
121
+ def get_quantile_value(quantile)
122
+ return nil if quantile < 0 || quantile > 1 || @count == 0
123
+
124
+ rank = quantile * (@count - 1)
125
+ if rank < @negative_store.count
126
+ reversed_rank = @negative_store.count - rank - 1
127
+ key = @negative_store.key_at_rank(reversed_rank, false)
128
+ quantile_value = -@mapping.value(key)
129
+ elsif rank < @zero_count + @negative_store.count
130
+ return 0
131
+ else
132
+ key = @store.key_at_rank(
133
+ rank - @zero_count - @negative_store.count
134
+ )
135
+ quantile_value = @mapping.value(key)
136
+ end
137
+ quantile_value
138
+ end
139
+
140
+ # Merge the given sketch into the current one. After this operation, this sketch
141
+ # encodes the values that were added to both this and the input sketch.
142
+ #
143
+ # @param [BaseSketch] sketch The sketch to be merged.
144
+ #
145
+ # @return [nil]
146
+ def merge(sketch)
147
+ unless mergeable?(sketch)
148
+ raise InvalidSketchMergeError, "Cannot merge two sketches with different relative accuracy"
149
+ end
150
+
151
+ return if sketch.count == 0
152
+
153
+ if @count == 0
154
+ copy(sketch)
155
+ return
156
+ end
157
+
158
+ # Merge the stores
159
+ @store.merge(sketch.store)
160
+ @negative_store.merge(sketch.negative_store)
161
+ @zero_count += sketch.zero_count
162
+
163
+ # Merge summary stats
164
+ @count += sketch.count
165
+ @sum += sketch.sum
166
+ @min = sketch.min if sketch.min < @min
167
+
168
+ @max = sketch.max if sketch.max > @max
169
+
170
+ nil
171
+ end
172
+
173
+ # @return [Float] the count of values in the sketch
174
+ def num_values
175
+ @count
176
+ end
177
+
178
+ private
179
+
180
+ # Two sketches can be merged only if their gammas are equal.
181
+ def mergeable?(other)
182
+ @mapping.gamma == other.mapping.gamma
183
+ end
184
+
185
+ # Copy the input sketch into this one
186
+ def copy(sketch)
187
+ @store.copy(sketch.store)
188
+ @negative_store.copy(sketch.negative_store)
189
+ @zero_count = sketch.zero_count
190
+ @min = sketch.min
191
+ @max = sketch.max
192
+ @count = sketch.count
193
+ @sum = sketch.sum
194
+ end
195
+ end
196
+ end
@@ -0,0 +1,10 @@
1
+ # frozen_string_literal: true
2
+
3
+ module DDSketch
4
+ class BaseError < ::StandardError
5
+ end
6
+
7
+ # Error when merging two incompatible sketches
8
+ class InvalidSketchMergeError < BaseError
9
+ end
10
+ end
@@ -0,0 +1,21 @@
1
+ # frozen_string_literal: true
2
+
3
+ module DDSketch
4
+ # Implementation of BaseSketch with optimized memory usage at the cost of
5
+ # lower ingestion speed, using a limited number of bins. When the maximum
6
+ # number of bins is reached, bins with highest indices are collapsed, which
7
+ # causes the relative accuracy to be lost on the highest quantiles. For the
8
+ # default bin limit, collapsing is unlikely to occur unless the data is
9
+ # distributed with tails heavier than any subexponential.
10
+ class LogCollapsingHighestDenseSketch < BaseSketch
11
+ # @param relative_accuracy (see Sketch#initialize)
12
+ # @param [Integer] bin_limit the maximum number of bins
13
+ def initialize(relative_accuracy: DEFAULT_REL_ACC, bin_limit: DEFAULT_BIN_LIMIT)
14
+ super(
15
+ mapping: Mapping::LogarithmicKeyMapping.new(relative_accuracy: relative_accuracy),
16
+ store: Store::CollapsingHighestDenseStore.new(bin_limit: bin_limit),
17
+ negative_store: Store::CollapsingHighestDenseStore.new(bin_limit: bin_limit)
18
+ )
19
+ end
20
+ end
21
+ end
@@ -0,0 +1,21 @@
1
+ # frozen_string_literal: true
2
+
3
+ module DDSketch
4
+ # Implementation of BaseSketch with optimized memory usage at the cost of
5
+ # lower ingestion speed, using a limited number of bins. When the maximum
6
+ # number of bins is reached, bins with lowest indices are collapsed, which
7
+ # causes the relative accuracy to be lost on the lowest quantiles. For the
8
+ # default bin limit, collapsing is unlikely to occur unless the data is
9
+ # distributed with tails heavier than any subexponential.
10
+ class LogCollapsingLowestDenseSketch < BaseSketch
11
+ # @param relative_accuracy (see Sketch#initialize)
12
+ # @param [Integer] bin_limit the maximum number of bins
13
+ def initialize(relative_accuracy: DEFAULT_REL_ACC, bin_limit: DEFAULT_BIN_LIMIT)
14
+ super(
15
+ mapping: Mapping::LogarithmicKeyMapping.new(relative_accuracy: relative_accuracy),
16
+ store: Store::CollapsingLowestDenseStore.new(bin_limit: bin_limit),
17
+ negative_store: Store::CollapsingLowestDenseStore.new(bin_limit: bin_limit)
18
+ )
19
+ end
20
+ end
21
+ end
@@ -0,0 +1,70 @@
1
+ # frozen_string_literal: true
2
+
3
+ module DDSketch
4
+ module Mapping
5
+ # A fast KeyMapping that approximates the memory-optimal LogarithmicMapping by
6
+ # extracting the floor value of the logarithm to the base 2 from the binary
7
+ # representations of floating-point values and cubically interpolating the
8
+ # logarithm in-between.
9
+ class CubicallyInterpolatedKeyMapping < KeyMapping
10
+ A = 6.0 / 35.0
11
+ B = -3.0 / 5.0
12
+ C = 10.0 / 7.0
13
+
14
+ #
15
+ # Indicates cubically interpolating algorithm
16
+ #
17
+ # @return [Symbol]
18
+ #
19
+ def self.interpolation
20
+ :cubic
21
+ end
22
+
23
+ # (see KeyMapping#initialize)
24
+ def initialize(relative_accuracy:, offset: 0.0)
25
+ super(relative_accuracy: relative_accuracy, offset: offset)
26
+
27
+ @multiplier /= C
28
+ end
29
+
30
+ protected
31
+
32
+ def log_gamma(value)
33
+ _cubic_log2_approx(value) * @multiplier
34
+ end
35
+
36
+ def pow_gamma(value)
37
+ _cubic_exp2_approx(value / @multiplier)
38
+ end
39
+
40
+ # Approximates log2 using a cubic polynomial
41
+ def _cubic_log2_approx(value)
42
+ mantissa, exponent = Math.frexp(value)
43
+ significand = 2 * mantissa - 1
44
+ (
45
+ (A * significand + B) * significand + C
46
+ ) * significand + (exponent - 1)
47
+ end
48
+
49
+ def _cubic_exp2_approx(value)
50
+ exponent = Integer(value.floor)
51
+ delta_0 = B * B - 3 * A * C
52
+
53
+ # Derived from Cardano's formula
54
+ delta_1 = (2.0 * B * B * B) - (9.0 * A * B * C) - (27.0 * A * A * (value - exponent))
55
+ cardano = Math.cbrt(
56
+ (delta_1 - ((delta_1 * delta_1 - 4 * delta_0 * delta_0 * delta_0)**0.5)) / 2.0
57
+ )
58
+
59
+ significand_plus_one = (
60
+ -(B + cardano + delta_0 / cardano) / (3.0 * A) + 1.0
61
+ )
62
+ mantissa = significand_plus_one / 2
63
+
64
+ # JRuby has inconsistent result with `Math.ldexp`
65
+ # https://github.com/jruby/jruby/issues/7234
66
+ Math.ldexp(mantissa, exponent + 1)
67
+ end
68
+ end
69
+ end
70
+ end
@@ -0,0 +1,102 @@
1
+ # frozen_string_literal: true
2
+
3
+ module DDSketch
4
+ module Mapping
5
+ # A mapping between values and integer indices that imposes relative accuracy
6
+ # guarantees. Specifically, for any value `minIndexableValue() < value <
7
+ # maxIndexableValue` implementations of `KeyMapping` must be such that
8
+ # `value(key(v))` is close to `v` with a relative error that is less than
9
+ # `relative_accuracy`.
10
+ #
11
+ # In implementations of KeyMapping, there is generally a trade-off between the
12
+ # cost of computing the key and the number of keys that are required to cover a
13
+ # given range of values (memory optimality). The most memory-optimal mapping is
14
+ # the LogarithmicMapping, but it requires the costly evaluation of the logarithm
15
+ # when computing the index. Other mappings can approximate the logarithmic
16
+ # mapping, while being less computationally costly.
17
+ #
18
+ # @abstract Subclass and override to implement a custom KeyMapping class.
19
+ class KeyMapping
20
+ # @return [Float] the base for the exponential buckets. gamma = (1 + alpha) / (1 - alpha)
21
+ attr_reader :gamma
22
+
23
+ # @return [Float] the relative accuaracy guaranteed, must between 0 ~ 1
24
+ attr_reader :relative_accuracy
25
+
26
+ # @return [Float] the smallest value the sketch can distinguish from 0
27
+ attr_reader :min_possible
28
+
29
+ # @return [Float] the largest value the sketch can handle
30
+ attr_reader :max_possible
31
+
32
+ # @return [Float] value used to shift all bin keys
33
+ attr_reader :offset
34
+
35
+ #
36
+ # Indicates interpolating algorithm
37
+ #
38
+ # @return [Symbol, nil]
39
+ #
40
+ def self.interpolation
41
+ nil
42
+ end
43
+
44
+ # @param [Float] relative_accuracy the relative accuaracy guaranteed, must between 0 ~ 1
45
+ # @param [Float] offset value used to shift all bin keys
46
+ def initialize(relative_accuracy:, offset: 0.0)
47
+ if (relative_accuracy <= 0) || (relative_accuracy >= 1)
48
+ raise ArgumentError, "Relative accuracy must be between 0 and 1."
49
+ end
50
+
51
+ @relative_accuracy = relative_accuracy
52
+ @offset = offset
53
+
54
+ gamma_mantissa = 2 * relative_accuracy / (1 - relative_accuracy)
55
+
56
+ @gamma = 1 + gamma_mantissa
57
+ @multiplier = 1 / Math.log(gamma_mantissa + 1)
58
+ @min_possible = Float::MIN * @gamma
59
+ @max_possible = Float::MAX / @gamma
60
+ end
61
+
62
+ #
63
+ # Returns the key specifying the bucket for value
64
+ #
65
+ # @param [Float] value
66
+ #
67
+ # @return [Integer]
68
+ #
69
+ def key(value)
70
+ Integer(log_gamma(value).ceil + @offset)
71
+ end
72
+
73
+ #
74
+ # Returns the value represented by the bucket specified by the key
75
+ #
76
+ # @param [Integer] key
77
+ #
78
+ # @return [Float]
79
+ #
80
+ def value(key)
81
+ pow_gamma(key - @offset) * (2.0 / (1 + @gamma))
82
+ end
83
+
84
+ #
85
+ # Indicates interpolating algorithm
86
+ #
87
+ # @return [Symbol, nil]
88
+ #
89
+ def interpolation
90
+ self.class.interpolation
91
+ end
92
+
93
+ protected
94
+
95
+ def log_gamma(value)
96
+ end
97
+
98
+ def pow_gamma(value)
99
+ end
100
+ end
101
+ end
102
+ end
@@ -0,0 +1,52 @@
1
+ # frozen_string_literal: true
2
+
3
+ module DDSketch
4
+ module Mapping
5
+ # A fast KeyMapping that approximates the memory-optimal
6
+ # LogarithmicMapping by extracting the floor value of the logarithm to the
7
+ # base 2 from the binary representations of floating-point values and
8
+ # linearly interpolating the logarithm in-between.
9
+ class LinearlyInterpolatedKeyMapping < KeyMapping
10
+ #
11
+ # Indicates linear interpolating algorithm
12
+ #
13
+ # @return [nil]
14
+ #
15
+ def self.interpolation
16
+ :linear
17
+ end
18
+
19
+ protected
20
+
21
+ def log_gamma(value)
22
+ _log2_approx(value) * @multiplier
23
+ end
24
+
25
+ def pow_gamma(value)
26
+ _exp2_approx(value / @multiplier)
27
+ end
28
+
29
+ # Approximates log2 by s + f
30
+ # where v = (s+1) * 2 ** f for s in [0, 1)
31
+
32
+ # frexp(v) returns m and e s.t.
33
+ # v = m * 2 ** e ; (m in [0.5, 1) or 0.0)
34
+ # so we adjust m and e accordingly
35
+ def _log2_approx(value)
36
+ mantissa, exponent = Math.frexp(value)
37
+ significand = 2 * mantissa - 1
38
+
39
+ significand + (exponent - 1)
40
+ end
41
+
42
+ def _exp2_approx(value)
43
+ exponent = Integer(value.floor + 1)
44
+ mantissa = (value - exponent + 2) / 2.0
45
+
46
+ # JRuby has inconsistent result with `Math.ldexp`
47
+ # https://github.com/jruby/jruby/issues/7234
48
+ Math.ldexp(mantissa, exponent)
49
+ end
50
+ end
51
+ end
52
+ end
@@ -0,0 +1,26 @@
1
+ # frozen_string_literal: true
2
+
3
+ module DDSketch
4
+ module Mapping
5
+ # A memory-optimal KeyMapping, i.e., given a targeted relative accuracy, it
6
+ # requires the least number of keys to cover a given range of values. This is
7
+ # done by logarithmically mapping floating-point values to integers.
8
+ class LogarithmicKeyMapping < KeyMapping
9
+ # (see KeyMapping#initialize)
10
+ def initialize(relative_accuracy:, offset: 0.0)
11
+ super(relative_accuracy: relative_accuracy, offset: offset)
12
+ @multiplier *= Math.log(2)
13
+ end
14
+
15
+ protected
16
+
17
+ def log_gamma(value)
18
+ Math.log(value, 2) * @multiplier
19
+ end
20
+
21
+ def pow_gamma(value)
22
+ 2**(value / @multiplier)
23
+ end
24
+ end
25
+ end
26
+ end
@@ -0,0 +1,66 @@
1
+ /* Unless explicitly stated otherwise all files in this repository are licensed under the Apache License 2.0.
2
+ * This product includes software developed at Datadog (https://www.datadoghq.com/).
3
+ * Copyright 2020 Datadog, Inc.
4
+ */
5
+
6
+ syntax = "proto3";
7
+
8
+ option ruby_package = "DDSketch::Proto";
9
+
10
+ // A DDSketch is essentially a histogram that partitions the range of positive values into an infinite number of
11
+ // indexed bins whose size grows exponentially. It keeps track of the number of values (or possibly floating-point
12
+ // weights) added to each bin. Negative values are partitioned like positive values, symmetrically to zero.
13
+ // The value zero as well as its close neighborhood that would be mapped to extreme bin indexes is mapped to a specific
14
+ // counter.
15
+ message DDSketch {
16
+ // The mapping between positive values and the bin indexes they belong to.
17
+ IndexMapping mapping = 1;
18
+
19
+ // The store for keeping track of positive values.
20
+ Store positiveValues = 2;
21
+
22
+ // The store for keeping track of negative values. A negative value v is mapped using its positive opposite -v.
23
+ Store negativeValues = 3;
24
+
25
+ // The count for the value zero and its close neighborhood (whose width depends on the mapping).
26
+ double zeroCount = 4;
27
+ }
28
+
29
+ // How to map positive values to the bins they belong to.
30
+ message IndexMapping {
31
+ // The gamma parameter of the mapping, such that bin index that a value v belongs to is roughly equal to
32
+ // log(v)/log(gamma).
33
+ double gamma = 1;
34
+
35
+ // An offset that can be used to shift all bin indexes.
36
+ double indexOffset = 2;
37
+
38
+ // To speed up the computation of the index a value belongs to, the computation of the log may be approximated using
39
+ // the fact that the log to the base 2 of powers of 2 can be computed at a low cost from the binary representation of
40
+ // the input value. Other values can be approximated by interpolating between successive powers of 2 (linearly,
41
+ // quadratically or cubically).
42
+ // NONE means that the log is to be computed exactly (no interpolation).
43
+ Interpolation interpolation = 3;
44
+ enum Interpolation {
45
+ NONE = 0;
46
+ LINEAR = 1;
47
+ QUADRATIC = 2;
48
+ CUBIC = 3;
49
+ }
50
+ }
51
+
52
+ // A Store maps bin indexes to their respective counts.
53
+ // Counts can be encoded sparsely using binCounts, but also in a contiguous way using contiguousBinCounts and
54
+ // contiguousBinIndexOffset. Given that non-empty bins are in practice usually contiguous or close to one another, the
55
+ // latter contiguous encoding method is usually more efficient than the sparse one.
56
+ // Both encoding methods can be used conjointly. If a bin appears in both the sparse and the contiguous encodings, its
57
+ // count value is the sum of the counts in each encodings.
58
+ message Store {
59
+ // The bin counts, encoded sparsely.
60
+ map<sint32, double> binCounts = 1;
61
+
62
+ // The bin counts, encoded contiguously. The values of contiguousBinCounts are the counts for the bins of indexes
63
+ // o, o+1, o+2, etc., where o is contiguousBinIndexOffset.
64
+ repeated double contiguousBinCounts = 2 [packed = true];
65
+ sint32 contiguousBinIndexOffset = 3;
66
+ }
@@ -0,0 +1,36 @@
1
+ # Generated by the protocol buffer compiler. DO NOT EDIT!
2
+ # source: ddsketch.proto
3
+
4
+ require "google/protobuf"
5
+
6
+ Google::Protobuf::DescriptorPool.generated_pool.build do
7
+ add_message "DDSketch" do
8
+ optional :mapping, :message, 1, "IndexMapping"
9
+ optional :positiveValues, :message, 2, "Store"
10
+ optional :negativeValues, :message, 3, "Store"
11
+ optional :zeroCount, :double, 4
12
+ end
13
+ add_message "IndexMapping" do
14
+ optional :gamma, :double, 1
15
+ optional :indexOffset, :double, 2
16
+ optional :interpolation, :enum, 3, "IndexMapping.Interpolation"
17
+ end
18
+ add_enum "IndexMapping.Interpolation" do
19
+ value :NONE, 0
20
+ value :LINEAR, 1
21
+ value :QUADRATIC, 2
22
+ value :CUBIC, 3
23
+ end
24
+ add_message "Store" do
25
+ map :binCounts, :sint32, :double, 1
26
+ repeated :contiguousBinCounts, :double, 2
27
+ optional :contiguousBinIndexOffset, :sint32, 3
28
+ end
29
+ end
30
+
31
+ module DDSketch::Proto
32
+ DDSketch = Google::Protobuf::DescriptorPool.generated_pool.lookup("DDSketch").msgclass
33
+ IndexMapping = Google::Protobuf::DescriptorPool.generated_pool.lookup("IndexMapping").msgclass
34
+ IndexMapping::Interpolation = Google::Protobuf::DescriptorPool.generated_pool.lookup("IndexMapping.Interpolation").enummodule
35
+ Store = Google::Protobuf::DescriptorPool.generated_pool.lookup("Store").msgclass
36
+ end
@@ -0,0 +1,46 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "ddsketch/proto/ddsketch_pb"
4
+
5
+ module DDSketch
6
+ # Namespace for protobuf object generated by `google-protobuf`
7
+ # @!visibility private
8
+ module Proto
9
+ INTERPOLATION_MAPPING = {
10
+ linear: IndexMapping::Interpolation::LINEAR,
11
+ cubic: IndexMapping::Interpolation::CUBIC
12
+ }.freeze
13
+
14
+ private_constant :INTERPOLATION_MAPPING
15
+
16
+ module_function
17
+
18
+ def serialize_sketch(sketch)
19
+ DDSketch.new(
20
+ mapping: serialize_key_mapping(sketch.mapping),
21
+ positiveValues: serialize_store(sketch.store),
22
+ negativeValues: serialize_store(sketch.negative_store),
23
+ zeroCount: sketch.zero_count
24
+ )
25
+ end
26
+
27
+ def serialize_store(store)
28
+ Store.new(
29
+ contiguousBinCounts: store.bins,
30
+ contiguousBinIndexOffset: store.offset
31
+ )
32
+ end
33
+
34
+ def serialize_key_mapping(mapping)
35
+ IndexMapping.new(
36
+ gamma: mapping.relative_accuracy,
37
+ indexOffset: mapping.offset,
38
+ interpolation: serialize_interpolation(mapping)
39
+ )
40
+ end
41
+
42
+ def serialize_interpolation(mapping)
43
+ INTERPOLATION_MAPPING.fetch(mapping.interpolation, IndexMapping::Interpolation::NONE)
44
+ end
45
+ end
46
+ end
@@ -0,0 +1,18 @@
1
+ # frozen_string_literal: true
2
+
3
+ module DDSketch
4
+ # The default implementation of DDSketch, with optimized memory usage at
5
+ # the cost of lower ingestion speed, using an unlimited number of bins. The
6
+ # number of bins will not exceed a reasonable number unless the data is
7
+ # distributed with tails heavier than any subexponential.
8
+ class Sketch < BaseSketch
9
+ # @param [Float] relative_accuracy The guaranteed relative accuracy for sketch
10
+ def initialize(relative_accuracy: DEFAULT_REL_ACC)
11
+ super(
12
+ mapping: Mapping::LogarithmicKeyMapping.new(relative_accuracy: relative_accuracy),
13
+ store: Store::DenseStore.new,
14
+ negative_store: Store::DenseStore.new
15
+ )
16
+ end
17
+ end
18
+ end