ddsketch 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,143 @@
1
+ # frozen_string_literal: true
2
+
3
+ module DDSketch
4
+ module Store
5
+ # A dense store that keeps all the bins between the bin for the min_key and the
6
+ # bin for the max_key, but collapsing the right-most bins if the number of bins
7
+ # exceeds the bin_limit
8
+ class CollapsingHighestDenseStore < DenseStore
9
+ # @return [Integer] the maximum number of bins
10
+ attr_reader :bin_limit
11
+
12
+ # @return [Boolean] whether the store has been collapsed
13
+ attr_reader :is_collapsed
14
+
15
+ # @param [Integer] bin_limit the maximum number of bins
16
+ # @param [Integer] chunk_size the number of bins to grow by
17
+ def initialize(bin_limit:, chunk_size: CHUNK_SIZE)
18
+ super(chunk_size: chunk_size)
19
+
20
+ @bin_limit = bin_limit
21
+ @is_collapsed = false
22
+ end
23
+
24
+ # Copies the input store into the current store
25
+ #
26
+ # @param [Store::CollapsingHighestDenseStore] store the store to be copied
27
+ #
28
+ # @return [nil]
29
+ def copy(store)
30
+ super(store)
31
+
32
+ self.bin_limit = store.bin_limit
33
+ self.is_collapsed = store.is_collapsed
34
+
35
+ nil
36
+ end
37
+
38
+ # Merge another store into the current store.
39
+ # collapsing the right-most bins if the number of bins
40
+ # exceeds the bin_limit
41
+ #
42
+ # @param [Store::CollapsingHighestDenseStore] store
43
+ # the store to be merged
44
+ #
45
+ # @return [nil]
46
+ def merge(store)
47
+ return if store.count == 0
48
+
49
+ if count == 0
50
+ copy(store)
51
+ return
52
+ end
53
+
54
+ extend_range(store.min_key, store.max_key) if (store.min_key < min_key) || (store.max_key > max_key)
55
+
56
+ collapse_end_idx = store.max_key - store.offset + 1
57
+ collapse_start_idx = [max_key + 1, store.min_key].max - store.offset
58
+ if collapse_end_idx > collapse_start_idx
59
+ collapse_count = store.bins[collapse_start_idx...collapse_end_idx].inject(:+) || 0
60
+ bins[-1] += collapse_count
61
+ else
62
+ collapse_start_idx = collapse_end_idx
63
+ end
64
+
65
+ (store.min_key).upto(collapse_start_idx + store.offset - 1).each do |key|
66
+ bins[key - offset] += store.bins[key - store.offset]
67
+ end
68
+
69
+ self.count += store.count
70
+
71
+ nil
72
+ end
73
+
74
+ protected
75
+
76
+ attr_writer :bin_limit, :is_collapsed
77
+
78
+ private
79
+
80
+ def get_new_length(new_min_key, new_max_key)
81
+ desired_length = new_max_key - new_min_key + 1
82
+ # For some reason mypy can't infer that min(int, int) is an int, so cast it.
83
+ [
84
+ chunk_size * (desired_length.to_f / chunk_size).ceil,
85
+ bin_limit
86
+ ].min
87
+ end
88
+
89
+ # Calculate the bin index for the key, extending the range if necessary
90
+ def get_index(key)
91
+ if key > max_key
92
+ return length - 1 if is_collapsed
93
+
94
+ extend_range(key)
95
+ return length - 1 if is_collapsed
96
+ elsif key < min_key
97
+ extend_range(key)
98
+ end
99
+ key - offset
100
+ end
101
+
102
+ # Override. Adjust the bins, the offset, the min_key, and max_key, without
103
+ # resizing the bins, in order to try making it fit the specified
104
+ # range. Collapse to the left if necessary.
105
+ def adjust(new_min_key, new_max_key)
106
+ if new_max_key - new_min_key + 1 > length
107
+ # The range of keys is too wide, the lowest bins need to be collapsed.
108
+ new_max_key = new_min_key + length - 1
109
+
110
+ if new_max_key <= min_key
111
+ # put everything in the last bin
112
+ self.offset = new_min_key
113
+ self.max_key = new_max_key
114
+ self.bins = [0.0] * length
115
+ bins[-1] = count
116
+ else
117
+ shift = offset - new_min_key
118
+
119
+ if shift > 0
120
+ collapse_start_index = new_max_key - offset + 1
121
+ collapse_end_index = max_key - offset + 1
122
+ collapsed_count = bins[collapse_start_index...collapse_end_index].inject(:+) || 0
123
+
124
+ bins[collapse_start_index...collapse_end_index] = [0.0] * (max_key - new_max_key)
125
+ bins[collapse_start_index - 1] += collapsed_count
126
+ end
127
+
128
+ self.max_key = new_max_key
129
+ # shift the buckets to make room for new_max_key
130
+ shift_bins(shift)
131
+ end
132
+
133
+ self.min_key = new_min_key
134
+ self.is_collapsed = true
135
+ else
136
+ center_bins(new_min_key, new_max_key)
137
+ self.min_key = new_min_key
138
+ self.max_key = new_max_key
139
+ end
140
+ end
141
+ end
142
+ end
143
+ end
@@ -0,0 +1,145 @@
1
+ # frozen_string_literal: true
2
+
3
+ module DDSketch
4
+ module Store
5
+ # A dense store that keeps all the bins between the bin for the min_key and the
6
+ # bin for the max_key, but collapsing the left-most bins if the number of bins
7
+ # exceeds the bin_limit
8
+ class CollapsingLowestDenseStore < DenseStore
9
+ # @return [Integer] the maximum number of bins
10
+ attr_reader :bin_limit
11
+
12
+ # @return [Boolean] whether the store has been collapsed
13
+ attr_reader :is_collapsed
14
+
15
+ # @param [Integer] bin_limit the maximum number of bins
16
+ # @param [Integer] chunk_size the number of bins to grow by
17
+ def initialize(bin_limit:, chunk_size: CHUNK_SIZE)
18
+ super(chunk_size: chunk_size)
19
+
20
+ @bin_limit = bin_limit
21
+ @is_collapsed = false
22
+ end
23
+
24
+ # Copies the input store into the current store
25
+ #
26
+ # @param [Store::CollapsingLowestDenseStore] store the store to be copied
27
+ #
28
+ # @return [nil]
29
+ def copy(store)
30
+ super(store)
31
+
32
+ self.bin_limit = store.bin_limit
33
+ self.is_collapsed = store.is_collapsed
34
+
35
+ nil
36
+ end
37
+
38
+ # Merge another store into the current store.
39
+ # collapsing the left-most bins if the number of bins
40
+ # exceeds the bin_limit
41
+ #
42
+ # @param [Store::CollapsingLowestDenseStore] store
43
+ # the store to be merged
44
+ #
45
+ # @return [nil]
46
+ def merge(store)
47
+ return if store.count == 0
48
+
49
+ if count == 0
50
+ copy(store)
51
+ return
52
+ end
53
+
54
+ extend_range(store.min_key, store.max_key) if store.min_key < min_key || store.max_key > max_key
55
+
56
+ collapse_start_idx = store.min_key - store.offset
57
+ collapse_end_idx = [min_key, store.max_key + 1].min - store.offset
58
+
59
+ if collapse_end_idx > collapse_start_idx
60
+ collapse_count = store.bins[collapse_start_idx...collapse_end_idx].inject(:+) || 0
61
+ bins[0] += collapse_count
62
+ else
63
+ collapse_end_idx = collapse_start_idx
64
+ end
65
+
66
+ (collapse_end_idx + store.offset).upto(store.max_key).each do |key|
67
+ bins[key - offset] += store.bins[key - store.offset]
68
+ end
69
+
70
+ self.count += store.count
71
+
72
+ nil
73
+ end
74
+
75
+ protected
76
+
77
+ attr_writer :bin_limit, :is_collapsed
78
+
79
+ private
80
+
81
+ def get_new_length(new_min_key, new_max_key)
82
+ desired_length = new_max_key - new_min_key + 1
83
+
84
+ [
85
+ chunk_size * (desired_length.to_f / chunk_size).ceil,
86
+ bin_limit
87
+ ].min
88
+ end
89
+
90
+ # Calculate the bin index for the key, extending the range if necessary.
91
+ def get_index(key)
92
+ if key < min_key
93
+ return 0 if is_collapsed
94
+
95
+ extend_range(key)
96
+ return 0 if is_collapsed
97
+ elsif key > max_key
98
+ extend_range(key)
99
+ end
100
+
101
+ key - offset
102
+ end
103
+
104
+ # Override. Adjust the bins, the offset, the min_key, and max_key, without
105
+ # resizing the bins, in order to try making it fit the specified
106
+ # range. Collapse to the left if necessary.
107
+ def adjust(new_min_key, new_max_key)
108
+ if new_max_key - new_min_key + 1 > length
109
+ # The range of keys is too wide, the lowest bins need to be collapsed.
110
+ new_min_key = new_max_key - length + 1
111
+
112
+ if new_min_key >= max_key
113
+ # put everything in the first bin
114
+ self.offset = new_min_key
115
+ self.min_key = new_min_key
116
+ self.bins = [0.0] * length
117
+ bins[0] = count
118
+ else
119
+ shift = offset - new_min_key
120
+
121
+ if shift < 0
122
+ collapse_start_index = min_key - offset
123
+ collapse_end_index = new_min_key - offset
124
+ collapsed_count = bins[collapse_start_index...collapse_end_index].inject(:+) || 0
125
+
126
+ bins[collapse_start_index...collapse_end_index] = [0.0] * (new_min_key - min_key)
127
+ bins[collapse_end_index] += collapsed_count
128
+ end
129
+
130
+ self.min_key = new_min_key
131
+ # shift the buckets to make room for new_min_key
132
+ shift_bins(shift)
133
+ end
134
+
135
+ self.max_key = new_max_key
136
+ self.is_collapsed = true
137
+ else
138
+ center_bins(new_min_key, new_max_key)
139
+ self.min_key = new_min_key
140
+ self.max_key = new_max_key
141
+ end
142
+ end
143
+ end
144
+ end
145
+ end
@@ -0,0 +1,210 @@
1
+ # frozen_string_literal: true
2
+
3
+ module DDSketch
4
+ module Store
5
+ #
6
+ # Stores map integers to counters. They can be seen as a collection of bins.
7
+ # We start with 128 bins and grow the store in chunks of 128 unless specified
8
+ # otherwise.
9
+ #
10
+ # A dense store that keeps all the bins between the bin for the min_key and the
11
+ # bin for the max_key.
12
+ #
13
+ class DenseStore
14
+ CHUNK_SIZE = 128
15
+
16
+ # @return [Integer] the sum of the counts for the bins
17
+ attr_reader :count
18
+
19
+ # @return [Integer] the minimum key bin
20
+ attr_reader :min_key
21
+
22
+ # @return [Integer] the maximum key bin
23
+ attr_reader :max_key
24
+
25
+ # @return [Integer] the number of bins to grow by
26
+ attr_reader :chunk_size
27
+
28
+ # @return [Integer] the difference btw the keys and the index in which they are stored
29
+ attr_reader :offset
30
+
31
+ # @return [Array<Float>] the bins
32
+ attr_reader :bins
33
+
34
+ # @param [Integer] chunk_size the number of bins to grow by
35
+ def initialize(chunk_size: CHUNK_SIZE)
36
+ @count = 0
37
+ @min_key = Float::INFINITY
38
+ @max_key = -Float::INFINITY
39
+
40
+ @chunk_size = chunk_size
41
+ @offset = 0
42
+ @bins = []
43
+ end
44
+
45
+ #
46
+ # Copies the input store into the current store
47
+ #
48
+ # @param [Store::DenseStore] store the store to be copied
49
+ #
50
+ # @return [nil]
51
+ #
52
+ def copy(store)
53
+ self.bins = store.bins.dup
54
+ self.count = store.count
55
+ self.min_key = store.min_key
56
+ self.max_key = store.max_key
57
+ self.offset = store.offset
58
+
59
+ nil
60
+ end
61
+
62
+ #
63
+ # Merge another store into the current store. This should be equivalent as running the
64
+ # add operations that have been run on the other store on this one.
65
+ #
66
+ # @param [Store::DenseStore] store
67
+ # the store to be merged
68
+ #
69
+ # @return [nil]
70
+ #
71
+ def merge(store)
72
+ return if store.count == 0
73
+
74
+ if count == 0
75
+ copy(store)
76
+ return
77
+ end
78
+
79
+ extend_range(store.min_key, store.max_key) if store.min_key < min_key || store.max_key > max_key
80
+
81
+ store.min_key.upto(store.max_key).each do |key|
82
+ bins[key - offset] += store.bins[key - store.offset]
83
+ end
84
+
85
+ self.count += store.count
86
+
87
+ nil
88
+ end
89
+
90
+ #
91
+ # Return the number of bins
92
+ #
93
+ # @return [Integer] the number of bins
94
+ #
95
+ def length
96
+ bins.length
97
+ end
98
+
99
+ #
100
+ # Updates the counter at the specified index key, growing the number of bins if necessary.
101
+ #
102
+ # @param [Float] key
103
+ # @param [Float] weight
104
+ #
105
+ # @return [nil]
106
+ #
107
+ def add(key, weight = 1.0)
108
+ idx = get_index(key)
109
+ bins[idx] += weight
110
+ self.count += weight
111
+
112
+ nil
113
+ end
114
+
115
+ #
116
+ # Return the key for the value at a given rank
117
+ #
118
+ # @param [Float] rank
119
+ # @param [Boolean] lower
120
+ #
121
+ # @return [Integer]
122
+ #
123
+ def key_at_rank(rank, lower = true)
124
+ running_ct = 0.0
125
+
126
+ bins.each_with_index do |bin_ct, i|
127
+ running_ct += bin_ct
128
+
129
+ ## ??
130
+ return i + offset if (lower && running_ct > rank) || (!lower && running_ct >= rank + 1)
131
+ end
132
+
133
+ # Never here....??
134
+ max_key
135
+ end
136
+
137
+ protected
138
+
139
+ attr_writer :count, :min_key, :max_key, :chunk_size, :offset, :bins
140
+
141
+ private
142
+
143
+ # Calculate the bin index for the key, extending the range if necessary.
144
+ def get_index(key)
145
+ extend_range(key) if key < min_key || key > max_key
146
+
147
+ key - offset
148
+ end
149
+
150
+ def get_new_length(new_min_key, new_max_key)
151
+ desired_length = new_max_key - new_min_key + 1
152
+
153
+ chunk_size * (desired_length.to_f / chunk_size).ceil
154
+ end
155
+
156
+ # Grow the bins as necessary and call adjust
157
+ def extend_range(key, _second_key = nil) # rubocop:todo Lint/UnderscorePrefixedVariableName
158
+ second_key = _second_key || key
159
+ new_min_key = [key, second_key, min_key].min
160
+ new_max_key = [key, second_key, max_key].max
161
+
162
+ if length == 0
163
+ # initialize bins
164
+ self.bins = [0.0] * get_new_length(new_min_key, new_max_key)
165
+ self.offset = new_min_key
166
+ adjust(new_min_key, new_max_key)
167
+
168
+ elsif new_min_key >= min_key && (new_max_key < (offset + length))
169
+ # no need to change the range; just update min/max keys
170
+ self.min_key = new_min_key
171
+ self.max_key = new_max_key
172
+
173
+ else
174
+ # grow the bins
175
+ new_length = get_new_length(new_min_key, new_max_key)
176
+ bins.push(*([0.0] * (new_length - length))) if new_length > length
177
+ adjust(new_min_key, new_max_key)
178
+ end
179
+ end
180
+
181
+ # Adjust the bins, the offset, the min_key, and max_key, without resizing the
182
+ # bins, in order to try making it fit the specified range.
183
+ def adjust(new_min_key, new_max_key)
184
+ center_bins(new_min_key, new_max_key)
185
+
186
+ self.min_key = new_min_key
187
+ self.max_key = new_max_key
188
+ end
189
+
190
+ # Shift the bins; this changes the offset.
191
+ def shift_bins(shift)
192
+ if shift > 0
193
+ self.bins = bins[0...-shift]
194
+ bins.unshift(*([0.0] * shift))
195
+ else
196
+ self.bins = bins[(shift.abs)..-1]
197
+ bins.push(*([0.0] * shift.abs))
198
+ end
199
+ self.offset -= shift
200
+ end
201
+
202
+ # Center the bins; this changes the offset.
203
+ def center_bins(new_min_key, new_max_key)
204
+ middle_key = new_min_key + (new_max_key - new_min_key + 1).div(2)
205
+
206
+ shift_bins(offset + length.div(2) - middle_key)
207
+ end
208
+ end
209
+ end
210
+ end
@@ -0,0 +1,14 @@
1
+ # frozen_string_literal: true
2
+
3
+ module DDSketch
4
+ module Version
5
+ MAJOR = 0
6
+ MINOR = 1
7
+ PATCH = 0
8
+ PRE = nil
9
+
10
+ def self.to_s
11
+ [MAJOR, MINOR, PATCH, PRE].compact.join(".")
12
+ end
13
+ end
14
+ end
data/lib/ddsketch.rb ADDED
@@ -0,0 +1,25 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "ddsketch/version"
4
+ require "ddsketch/errors"
5
+
6
+ # sketchs
7
+ require "ddsketch/base_sketch"
8
+ require "ddsketch/sketch"
9
+ require "ddsketch/log_collapsing_lowest_dense_sketch"
10
+ require "ddsketch/log_collapsing_highest_dense_sketch"
11
+
12
+ # key mappings
13
+ require "ddsketch/mapping/key_mapping"
14
+ require "ddsketch/mapping/logarithmic_key_mapping"
15
+ require "ddsketch/mapping/linear_interpolated_key_mapping"
16
+ require "ddsketch/mapping/cubically_interpolated_key_mapping"
17
+
18
+ # dense stores
19
+ require "ddsketch/store/dense_store"
20
+ require "ddsketch/store/collapsing_lowest_dense_store"
21
+ require "ddsketch/store/collapsing_highest_dense_store"
22
+
23
+ # Namespace for DDSketch library
24
+ module DDSketch
25
+ end
metadata ADDED
@@ -0,0 +1,78 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: ddsketch
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Datadog, Inc.
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2022-07-07 00:00:00.000000000 Z
12
+ dependencies: []
13
+ description: DDSketch is a fast-to-insert, fully mergeable, space-efficient quantile
14
+ sketch with relative error guarantees.
15
+ email:
16
+ - dev@datadoghq.com
17
+ executables: []
18
+ extensions: []
19
+ extra_rdoc_files: []
20
+ files:
21
+ - ".rspec"
22
+ - ".rubocop.yml"
23
+ - ".simplecov"
24
+ - ".yardopts"
25
+ - CHANGELOG.md
26
+ - CONTRIBUTING.md
27
+ - Gemfile
28
+ - LICENSE
29
+ - LICENSE-3rdparty.csv
30
+ - NOTICE
31
+ - README.md
32
+ - Rakefile
33
+ - ddsketch-ruby.gemspec
34
+ - lib/ddsketch.rb
35
+ - lib/ddsketch/base_sketch.rb
36
+ - lib/ddsketch/errors.rb
37
+ - lib/ddsketch/log_collapsing_highest_dense_sketch.rb
38
+ - lib/ddsketch/log_collapsing_lowest_dense_sketch.rb
39
+ - lib/ddsketch/mapping/cubically_interpolated_key_mapping.rb
40
+ - lib/ddsketch/mapping/key_mapping.rb
41
+ - lib/ddsketch/mapping/linear_interpolated_key_mapping.rb
42
+ - lib/ddsketch/mapping/logarithmic_key_mapping.rb
43
+ - lib/ddsketch/proto.rb
44
+ - lib/ddsketch/proto/ddsketch.proto
45
+ - lib/ddsketch/proto/ddsketch_pb.rb
46
+ - lib/ddsketch/sketch.rb
47
+ - lib/ddsketch/store/collapsing_highest_dense_store.rb
48
+ - lib/ddsketch/store/collapsing_lowest_dense_store.rb
49
+ - lib/ddsketch/store/dense_store.rb
50
+ - lib/ddsketch/version.rb
51
+ homepage: https://github.com/DataDog/ddsketch-ruby
52
+ licenses:
53
+ - Apache-2.0
54
+ metadata:
55
+ allowed_push_host: https://rubygems.org
56
+ homepage_uri: https://github.com/DataDog/ddsketch-ruby
57
+ source_code_uri: https://github.com/DataDog/ddsketch-ruby
58
+ changelog_uri: https://github.com/DataDog/ddsketch-ruby/blob/main/CHANGELOG.md
59
+ post_install_message:
60
+ rdoc_options: []
61
+ require_paths:
62
+ - lib
63
+ required_ruby_version: !ruby/object:Gem::Requirement
64
+ requirements:
65
+ - - ">="
66
+ - !ruby/object:Gem::Version
67
+ version: 2.1.0
68
+ required_rubygems_version: !ruby/object:Gem::Requirement
69
+ requirements:
70
+ - - ">="
71
+ - !ruby/object:Gem::Version
72
+ version: '0'
73
+ requirements: []
74
+ rubygems_version: 3.3.15
75
+ signing_key:
76
+ specification_version: 4
77
+ summary: Ruby implementations of the distributed quantile sketch algorithm DDSketch.
78
+ test_files: []