ddsketch 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,143 @@
1
+ # frozen_string_literal: true
2
+
3
+ module DDSketch
4
+ module Store
5
+ # A dense store that keeps all the bins between the bin for the min_key and the
6
+ # bin for the max_key, but collapsing the right-most bins if the number of bins
7
+ # exceeds the bin_limit
8
+ class CollapsingHighestDenseStore < DenseStore
9
+ # @return [Integer] the maximum number of bins
10
+ attr_reader :bin_limit
11
+
12
+ # @return [Boolean] whether the store has been collapsed
13
+ attr_reader :is_collapsed
14
+
15
+ # @param [Integer] bin_limit the maximum number of bins
16
+ # @param [Integer] chunk_size the number of bins to grow by
17
+ def initialize(bin_limit:, chunk_size: CHUNK_SIZE)
18
+ super(chunk_size: chunk_size)
19
+
20
+ @bin_limit = bin_limit
21
+ @is_collapsed = false
22
+ end
23
+
24
+ # Copies the input store into the current store
25
+ #
26
+ # @param [Store::CollapsingHighestDenseStore] store the store to be copied
27
+ #
28
+ # @return [nil]
29
+ def copy(store)
30
+ super(store)
31
+
32
+ self.bin_limit = store.bin_limit
33
+ self.is_collapsed = store.is_collapsed
34
+
35
+ nil
36
+ end
37
+
38
+ # Merge another store into the current store.
39
+ # collapsing the right-most bins if the number of bins
40
+ # exceeds the bin_limit
41
+ #
42
+ # @param [Store::CollapsingHighestDenseStore] store
43
+ # the store to be merged
44
+ #
45
+ # @return [nil]
46
+ def merge(store)
47
+ return if store.count == 0
48
+
49
+ if count == 0
50
+ copy(store)
51
+ return
52
+ end
53
+
54
+ extend_range(store.min_key, store.max_key) if (store.min_key < min_key) || (store.max_key > max_key)
55
+
56
+ collapse_end_idx = store.max_key - store.offset + 1
57
+ collapse_start_idx = [max_key + 1, store.min_key].max - store.offset
58
+ if collapse_end_idx > collapse_start_idx
59
+ collapse_count = store.bins[collapse_start_idx...collapse_end_idx].inject(:+) || 0
60
+ bins[-1] += collapse_count
61
+ else
62
+ collapse_start_idx = collapse_end_idx
63
+ end
64
+
65
+ (store.min_key).upto(collapse_start_idx + store.offset - 1).each do |key|
66
+ bins[key - offset] += store.bins[key - store.offset]
67
+ end
68
+
69
+ self.count += store.count
70
+
71
+ nil
72
+ end
73
+
74
+ protected
75
+
76
+ attr_writer :bin_limit, :is_collapsed
77
+
78
+ private
79
+
80
+ def get_new_length(new_min_key, new_max_key)
81
+ desired_length = new_max_key - new_min_key + 1
82
+ # For some reason mypy can't infer that min(int, int) is an int, so cast it.
83
+ [
84
+ chunk_size * (desired_length.to_f / chunk_size).ceil,
85
+ bin_limit
86
+ ].min
87
+ end
88
+
89
+ # Calculate the bin index for the key, extending the range if necessary
90
+ def get_index(key)
91
+ if key > max_key
92
+ return length - 1 if is_collapsed
93
+
94
+ extend_range(key)
95
+ return length - 1 if is_collapsed
96
+ elsif key < min_key
97
+ extend_range(key)
98
+ end
99
+ key - offset
100
+ end
101
+
102
+ # Override. Adjust the bins, the offset, the min_key, and max_key, without
103
+ # resizing the bins, in order to try making it fit the specified
104
+ # range. Collapse to the left if necessary.
105
+ def adjust(new_min_key, new_max_key)
106
+ if new_max_key - new_min_key + 1 > length
107
+ # The range of keys is too wide, the lowest bins need to be collapsed.
108
+ new_max_key = new_min_key + length - 1
109
+
110
+ if new_max_key <= min_key
111
+ # put everything in the last bin
112
+ self.offset = new_min_key
113
+ self.max_key = new_max_key
114
+ self.bins = [0.0] * length
115
+ bins[-1] = count
116
+ else
117
+ shift = offset - new_min_key
118
+
119
+ if shift > 0
120
+ collapse_start_index = new_max_key - offset + 1
121
+ collapse_end_index = max_key - offset + 1
122
+ collapsed_count = bins[collapse_start_index...collapse_end_index].inject(:+) || 0
123
+
124
+ bins[collapse_start_index...collapse_end_index] = [0.0] * (max_key - new_max_key)
125
+ bins[collapse_start_index - 1] += collapsed_count
126
+ end
127
+
128
+ self.max_key = new_max_key
129
+ # shift the buckets to make room for new_max_key
130
+ shift_bins(shift)
131
+ end
132
+
133
+ self.min_key = new_min_key
134
+ self.is_collapsed = true
135
+ else
136
+ center_bins(new_min_key, new_max_key)
137
+ self.min_key = new_min_key
138
+ self.max_key = new_max_key
139
+ end
140
+ end
141
+ end
142
+ end
143
+ end
@@ -0,0 +1,145 @@
1
+ # frozen_string_literal: true
2
+
3
+ module DDSketch
4
+ module Store
5
+ # A dense store that keeps all the bins between the bin for the min_key and the
6
+ # bin for the max_key, but collapsing the left-most bins if the number of bins
7
+ # exceeds the bin_limit
8
+ class CollapsingLowestDenseStore < DenseStore
9
+ # @return [Integer] the maximum number of bins
10
+ attr_reader :bin_limit
11
+
12
+ # @return [Boolean] whether the store has been collapsed
13
+ attr_reader :is_collapsed
14
+
15
+ # @param [Integer] bin_limit the maximum number of bins
16
+ # @param [Integer] chunk_size the number of bins to grow by
17
+ def initialize(bin_limit:, chunk_size: CHUNK_SIZE)
18
+ super(chunk_size: chunk_size)
19
+
20
+ @bin_limit = bin_limit
21
+ @is_collapsed = false
22
+ end
23
+
24
+ # Copies the input store into the current store
25
+ #
26
+ # @param [Store::CollapsingLowestDenseStore] store the store to be copied
27
+ #
28
+ # @return [nil]
29
+ def copy(store)
30
+ super(store)
31
+
32
+ self.bin_limit = store.bin_limit
33
+ self.is_collapsed = store.is_collapsed
34
+
35
+ nil
36
+ end
37
+
38
+ # Merge another store into the current store.
39
+ # collapsing the left-most bins if the number of bins
40
+ # exceeds the bin_limit
41
+ #
42
+ # @param [Store::CollapsingLowestDenseStore] store
43
+ # the store to be merged
44
+ #
45
+ # @return [nil]
46
+ def merge(store)
47
+ return if store.count == 0
48
+
49
+ if count == 0
50
+ copy(store)
51
+ return
52
+ end
53
+
54
+ extend_range(store.min_key, store.max_key) if store.min_key < min_key || store.max_key > max_key
55
+
56
+ collapse_start_idx = store.min_key - store.offset
57
+ collapse_end_idx = [min_key, store.max_key + 1].min - store.offset
58
+
59
+ if collapse_end_idx > collapse_start_idx
60
+ collapse_count = store.bins[collapse_start_idx...collapse_end_idx].inject(:+) || 0
61
+ bins[0] += collapse_count
62
+ else
63
+ collapse_end_idx = collapse_start_idx
64
+ end
65
+
66
+ (collapse_end_idx + store.offset).upto(store.max_key).each do |key|
67
+ bins[key - offset] += store.bins[key - store.offset]
68
+ end
69
+
70
+ self.count += store.count
71
+
72
+ nil
73
+ end
74
+
75
+ protected
76
+
77
+ attr_writer :bin_limit, :is_collapsed
78
+
79
+ private
80
+
81
+ def get_new_length(new_min_key, new_max_key)
82
+ desired_length = new_max_key - new_min_key + 1
83
+
84
+ [
85
+ chunk_size * (desired_length.to_f / chunk_size).ceil,
86
+ bin_limit
87
+ ].min
88
+ end
89
+
90
+ # Calculate the bin index for the key, extending the range if necessary.
91
+ def get_index(key)
92
+ if key < min_key
93
+ return 0 if is_collapsed
94
+
95
+ extend_range(key)
96
+ return 0 if is_collapsed
97
+ elsif key > max_key
98
+ extend_range(key)
99
+ end
100
+
101
+ key - offset
102
+ end
103
+
104
+ # Override. Adjust the bins, the offset, the min_key, and max_key, without
105
+ # resizing the bins, in order to try making it fit the specified
106
+ # range. Collapse to the left if necessary.
107
+ def adjust(new_min_key, new_max_key)
108
+ if new_max_key - new_min_key + 1 > length
109
+ # The range of keys is too wide, the lowest bins need to be collapsed.
110
+ new_min_key = new_max_key - length + 1
111
+
112
+ if new_min_key >= max_key
113
+ # put everything in the first bin
114
+ self.offset = new_min_key
115
+ self.min_key = new_min_key
116
+ self.bins = [0.0] * length
117
+ bins[0] = count
118
+ else
119
+ shift = offset - new_min_key
120
+
121
+ if shift < 0
122
+ collapse_start_index = min_key - offset
123
+ collapse_end_index = new_min_key - offset
124
+ collapsed_count = bins[collapse_start_index...collapse_end_index].inject(:+) || 0
125
+
126
+ bins[collapse_start_index...collapse_end_index] = [0.0] * (new_min_key - min_key)
127
+ bins[collapse_end_index] += collapsed_count
128
+ end
129
+
130
+ self.min_key = new_min_key
131
+ # shift the buckets to make room for new_min_key
132
+ shift_bins(shift)
133
+ end
134
+
135
+ self.max_key = new_max_key
136
+ self.is_collapsed = true
137
+ else
138
+ center_bins(new_min_key, new_max_key)
139
+ self.min_key = new_min_key
140
+ self.max_key = new_max_key
141
+ end
142
+ end
143
+ end
144
+ end
145
+ end
@@ -0,0 +1,210 @@
1
+ # frozen_string_literal: true
2
+
3
+ module DDSketch
4
+ module Store
5
+ #
6
+ # Stores map integers to counters. They can be seen as a collection of bins.
7
+ # We start with 128 bins and grow the store in chunks of 128 unless specified
8
+ # otherwise.
9
+ #
10
+ # A dense store that keeps all the bins between the bin for the min_key and the
11
+ # bin for the max_key.
12
+ #
13
+ class DenseStore
14
+ CHUNK_SIZE = 128
15
+
16
+ # @return [Integer] the sum of the counts for the bins
17
+ attr_reader :count
18
+
19
+ # @return [Integer] the minimum key bin
20
+ attr_reader :min_key
21
+
22
+ # @return [Integer] the maximum key bin
23
+ attr_reader :max_key
24
+
25
+ # @return [Integer] the number of bins to grow by
26
+ attr_reader :chunk_size
27
+
28
+ # @return [Integer] the difference btw the keys and the index in which they are stored
29
+ attr_reader :offset
30
+
31
+ # @return [Array<Float>] the bins
32
+ attr_reader :bins
33
+
34
+ # @param [Integer] chunk_size the number of bins to grow by
35
+ def initialize(chunk_size: CHUNK_SIZE)
36
+ @count = 0
37
+ @min_key = Float::INFINITY
38
+ @max_key = -Float::INFINITY
39
+
40
+ @chunk_size = chunk_size
41
+ @offset = 0
42
+ @bins = []
43
+ end
44
+
45
+ #
46
+ # Copies the input store into the current store
47
+ #
48
+ # @param [Store::DenseStore] store the store to be copied
49
+ #
50
+ # @return [nil]
51
+ #
52
+ def copy(store)
53
+ self.bins = store.bins.dup
54
+ self.count = store.count
55
+ self.min_key = store.min_key
56
+ self.max_key = store.max_key
57
+ self.offset = store.offset
58
+
59
+ nil
60
+ end
61
+
62
+ #
63
+ # Merge another store into the current store. This should be equivalent as running the
64
+ # add operations that have been run on the other store on this one.
65
+ #
66
+ # @param [Store::DenseStore] store
67
+ # the store to be merged
68
+ #
69
+ # @return [nil]
70
+ #
71
+ def merge(store)
72
+ return if store.count == 0
73
+
74
+ if count == 0
75
+ copy(store)
76
+ return
77
+ end
78
+
79
+ extend_range(store.min_key, store.max_key) if store.min_key < min_key || store.max_key > max_key
80
+
81
+ store.min_key.upto(store.max_key).each do |key|
82
+ bins[key - offset] += store.bins[key - store.offset]
83
+ end
84
+
85
+ self.count += store.count
86
+
87
+ nil
88
+ end
89
+
90
+ #
91
+ # Return the number of bins
92
+ #
93
+ # @return [Integer] the number of bins
94
+ #
95
+ def length
96
+ bins.length
97
+ end
98
+
99
+ #
100
+ # Updates the counter at the specified index key, growing the number of bins if necessary.
101
+ #
102
+ # @param [Float] key
103
+ # @param [Float] weight
104
+ #
105
+ # @return [nil]
106
+ #
107
+ def add(key, weight = 1.0)
108
+ idx = get_index(key)
109
+ bins[idx] += weight
110
+ self.count += weight
111
+
112
+ nil
113
+ end
114
+
115
+ #
116
+ # Return the key for the value at a given rank
117
+ #
118
+ # @param [Float] rank
119
+ # @param [Boolean] lower
120
+ #
121
+ # @return [Integer]
122
+ #
123
+ def key_at_rank(rank, lower = true)
124
+ running_ct = 0.0
125
+
126
+ bins.each_with_index do |bin_ct, i|
127
+ running_ct += bin_ct
128
+
129
+ ## ??
130
+ return i + offset if (lower && running_ct > rank) || (!lower && running_ct >= rank + 1)
131
+ end
132
+
133
+ # Never here....??
134
+ max_key
135
+ end
136
+
137
+ protected
138
+
139
+ attr_writer :count, :min_key, :max_key, :chunk_size, :offset, :bins
140
+
141
+ private
142
+
143
+ # Calculate the bin index for the key, extending the range if necessary.
144
+ def get_index(key)
145
+ extend_range(key) if key < min_key || key > max_key
146
+
147
+ key - offset
148
+ end
149
+
150
+ def get_new_length(new_min_key, new_max_key)
151
+ desired_length = new_max_key - new_min_key + 1
152
+
153
+ chunk_size * (desired_length.to_f / chunk_size).ceil
154
+ end
155
+
156
+ # Grow the bins as necessary and call adjust
157
+ def extend_range(key, _second_key = nil) # rubocop:todo Lint/UnderscorePrefixedVariableName
158
+ second_key = _second_key || key
159
+ new_min_key = [key, second_key, min_key].min
160
+ new_max_key = [key, second_key, max_key].max
161
+
162
+ if length == 0
163
+ # initialize bins
164
+ self.bins = [0.0] * get_new_length(new_min_key, new_max_key)
165
+ self.offset = new_min_key
166
+ adjust(new_min_key, new_max_key)
167
+
168
+ elsif new_min_key >= min_key && (new_max_key < (offset + length))
169
+ # no need to change the range; just update min/max keys
170
+ self.min_key = new_min_key
171
+ self.max_key = new_max_key
172
+
173
+ else
174
+ # grow the bins
175
+ new_length = get_new_length(new_min_key, new_max_key)
176
+ bins.push(*([0.0] * (new_length - length))) if new_length > length
177
+ adjust(new_min_key, new_max_key)
178
+ end
179
+ end
180
+
181
+ # Adjust the bins, the offset, the min_key, and max_key, without resizing the
182
+ # bins, in order to try making it fit the specified range.
183
+ def adjust(new_min_key, new_max_key)
184
+ center_bins(new_min_key, new_max_key)
185
+
186
+ self.min_key = new_min_key
187
+ self.max_key = new_max_key
188
+ end
189
+
190
+ # Shift the bins; this changes the offset.
191
+ def shift_bins(shift)
192
+ if shift > 0
193
+ self.bins = bins[0...-shift]
194
+ bins.unshift(*([0.0] * shift))
195
+ else
196
+ self.bins = bins[(shift.abs)..-1]
197
+ bins.push(*([0.0] * shift.abs))
198
+ end
199
+ self.offset -= shift
200
+ end
201
+
202
+ # Center the bins; this changes the offset.
203
+ def center_bins(new_min_key, new_max_key)
204
+ middle_key = new_min_key + (new_max_key - new_min_key + 1).div(2)
205
+
206
+ shift_bins(offset + length.div(2) - middle_key)
207
+ end
208
+ end
209
+ end
210
+ end
@@ -0,0 +1,14 @@
1
+ # frozen_string_literal: true
2
+
3
+ module DDSketch
4
+ module Version
5
+ MAJOR = 0
6
+ MINOR = 1
7
+ PATCH = 0
8
+ PRE = nil
9
+
10
+ def self.to_s
11
+ [MAJOR, MINOR, PATCH, PRE].compact.join(".")
12
+ end
13
+ end
14
+ end
data/lib/ddsketch.rb ADDED
@@ -0,0 +1,25 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "ddsketch/version"
4
+ require "ddsketch/errors"
5
+
6
+ # sketchs
7
+ require "ddsketch/base_sketch"
8
+ require "ddsketch/sketch"
9
+ require "ddsketch/log_collapsing_lowest_dense_sketch"
10
+ require "ddsketch/log_collapsing_highest_dense_sketch"
11
+
12
+ # key mappings
13
+ require "ddsketch/mapping/key_mapping"
14
+ require "ddsketch/mapping/logarithmic_key_mapping"
15
+ require "ddsketch/mapping/linear_interpolated_key_mapping"
16
+ require "ddsketch/mapping/cubically_interpolated_key_mapping"
17
+
18
+ # dense stores
19
+ require "ddsketch/store/dense_store"
20
+ require "ddsketch/store/collapsing_lowest_dense_store"
21
+ require "ddsketch/store/collapsing_highest_dense_store"
22
+
23
+ # Namespace for DDSketch library
24
+ module DDSketch
25
+ end
metadata ADDED
@@ -0,0 +1,78 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: ddsketch
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Datadog, Inc.
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2022-07-07 00:00:00.000000000 Z
12
+ dependencies: []
13
+ description: DDSketch is a fast-to-insert, fully mergeable, space-efficient quantile
14
+ sketch with relative error guarantees.
15
+ email:
16
+ - dev@datadoghq.com
17
+ executables: []
18
+ extensions: []
19
+ extra_rdoc_files: []
20
+ files:
21
+ - ".rspec"
22
+ - ".rubocop.yml"
23
+ - ".simplecov"
24
+ - ".yardopts"
25
+ - CHANGELOG.md
26
+ - CONTRIBUTING.md
27
+ - Gemfile
28
+ - LICENSE
29
+ - LICENSE-3rdparty.csv
30
+ - NOTICE
31
+ - README.md
32
+ - Rakefile
33
+ - ddsketch-ruby.gemspec
34
+ - lib/ddsketch.rb
35
+ - lib/ddsketch/base_sketch.rb
36
+ - lib/ddsketch/errors.rb
37
+ - lib/ddsketch/log_collapsing_highest_dense_sketch.rb
38
+ - lib/ddsketch/log_collapsing_lowest_dense_sketch.rb
39
+ - lib/ddsketch/mapping/cubically_interpolated_key_mapping.rb
40
+ - lib/ddsketch/mapping/key_mapping.rb
41
+ - lib/ddsketch/mapping/linear_interpolated_key_mapping.rb
42
+ - lib/ddsketch/mapping/logarithmic_key_mapping.rb
43
+ - lib/ddsketch/proto.rb
44
+ - lib/ddsketch/proto/ddsketch.proto
45
+ - lib/ddsketch/proto/ddsketch_pb.rb
46
+ - lib/ddsketch/sketch.rb
47
+ - lib/ddsketch/store/collapsing_highest_dense_store.rb
48
+ - lib/ddsketch/store/collapsing_lowest_dense_store.rb
49
+ - lib/ddsketch/store/dense_store.rb
50
+ - lib/ddsketch/version.rb
51
+ homepage: https://github.com/DataDog/ddsketch-ruby
52
+ licenses:
53
+ - Apache-2.0
54
+ metadata:
55
+ allowed_push_host: https://rubygems.org
56
+ homepage_uri: https://github.com/DataDog/ddsketch-ruby
57
+ source_code_uri: https://github.com/DataDog/ddsketch-ruby
58
+ changelog_uri: https://github.com/DataDog/ddsketch-ruby/blob/main/CHANGELOG.md
59
+ post_install_message:
60
+ rdoc_options: []
61
+ require_paths:
62
+ - lib
63
+ required_ruby_version: !ruby/object:Gem::Requirement
64
+ requirements:
65
+ - - ">="
66
+ - !ruby/object:Gem::Version
67
+ version: 2.1.0
68
+ required_rubygems_version: !ruby/object:Gem::Requirement
69
+ requirements:
70
+ - - ">="
71
+ - !ruby/object:Gem::Version
72
+ version: '0'
73
+ requirements: []
74
+ rubygems_version: 3.3.15
75
+ signing_key:
76
+ specification_version: 4
77
+ summary: Ruby implementations of the distributed quantile sketch algorithm DDSketch.
78
+ test_files: []