airbrake-ruby 3.1.0 → 3.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/airbrake-ruby.rb +197 -43
- data/lib/airbrake-ruby/config.rb +43 -11
- data/lib/airbrake-ruby/deploy_notifier.rb +47 -0
- data/lib/airbrake-ruby/filter_chain.rb +32 -50
- data/lib/airbrake-ruby/filters/git_repository_filter.rb +9 -1
- data/lib/airbrake-ruby/filters/sql_filter.rb +104 -0
- data/lib/airbrake-ruby/hash_keyable.rb +37 -0
- data/lib/airbrake-ruby/ignorable.rb +44 -0
- data/lib/airbrake-ruby/notice.rb +2 -22
- data/lib/airbrake-ruby/{notifier.rb → notice_notifier.rb} +66 -46
- data/lib/airbrake-ruby/performance_notifier.rb +161 -0
- data/lib/airbrake-ruby/stat.rb +56 -0
- data/lib/airbrake-ruby/tdigest.rb +393 -0
- data/lib/airbrake-ruby/time_truncate.rb +17 -0
- data/lib/airbrake-ruby/version.rb +1 -1
- data/spec/airbrake_spec.rb +57 -13
- data/spec/async_sender_spec.rb +0 -2
- data/spec/backtrace_spec.rb +0 -2
- data/spec/code_hunk_spec.rb +0 -2
- data/spec/config/validator_spec.rb +0 -2
- data/spec/config_spec.rb +16 -4
- data/spec/deploy_notifier_spec.rb +41 -0
- data/spec/file_cache.rb +0 -2
- data/spec/filter_chain_spec.rb +1 -7
- data/spec/filters/context_filter_spec.rb +0 -2
- data/spec/filters/dependency_filter_spec.rb +0 -2
- data/spec/filters/exception_attributes_filter_spec.rb +0 -2
- data/spec/filters/gem_root_filter_spec.rb +0 -2
- data/spec/filters/git_last_checkout_filter_spec.rb +0 -2
- data/spec/filters/git_repository_filter.rb +0 -2
- data/spec/filters/git_revision_filter_spec.rb +0 -2
- data/spec/filters/keys_blacklist_spec.rb +0 -2
- data/spec/filters/keys_whitelist_spec.rb +0 -2
- data/spec/filters/root_directory_filter_spec.rb +0 -2
- data/spec/filters/sql_filter_spec.rb +219 -0
- data/spec/filters/system_exit_filter_spec.rb +0 -2
- data/spec/filters/thread_filter_spec.rb +0 -2
- data/spec/ignorable_spec.rb +14 -0
- data/spec/nested_exception_spec.rb +0 -2
- data/spec/{notifier_spec.rb → notice_notifier_spec.rb} +24 -114
- data/spec/{notifier_spec → notice_notifier_spec}/options_spec.rb +40 -39
- data/spec/notice_spec.rb +2 -4
- data/spec/performance_notifier_spec.rb +287 -0
- data/spec/promise_spec.rb +0 -2
- data/spec/response_spec.rb +0 -2
- data/spec/stat_spec.rb +35 -0
- data/spec/sync_sender_spec.rb +0 -2
- data/spec/tdigest_spec.rb +230 -0
- data/spec/time_truncate_spec.rb +13 -0
- data/spec/truncator_spec.rb +0 -2
- metadata +34 -15
- data/lib/airbrake-ruby/route_sender.rb +0 -175
- data/spec/route_sender_spec.rb +0 -130
@@ -0,0 +1,161 @@
|
|
1
|
+
module Airbrake
|
2
|
+
# QueryNotifier aggregates information about SQL queries and periodically sends
|
3
|
+
# collected data to Airbrake.
|
4
|
+
#
|
5
|
+
# @api public
|
6
|
+
# @since v3.2.0
|
7
|
+
class PerformanceNotifier
|
8
|
+
# @param [Airbrake::Config] config
|
9
|
+
def initialize(config)
|
10
|
+
@config =
|
11
|
+
if config.is_a?(Config)
|
12
|
+
config
|
13
|
+
else
|
14
|
+
loc = caller_locations(1..1).first
|
15
|
+
signature = "#{self.class.name}##{__method__}"
|
16
|
+
warn(
|
17
|
+
"#{loc.path}:#{loc.lineno}: warning: passing a Hash to #{signature} " \
|
18
|
+
'is deprecated. Pass `Airbrake::Config` instead'
|
19
|
+
)
|
20
|
+
Config.new(config)
|
21
|
+
end
|
22
|
+
|
23
|
+
@flush_period = @config.performance_stats_flush_period
|
24
|
+
@sender = SyncSender.new(@config, :put)
|
25
|
+
@payload = {}
|
26
|
+
@schedule_flush = nil
|
27
|
+
@mutex = Mutex.new
|
28
|
+
@filter_chain = FilterChain.new
|
29
|
+
end
|
30
|
+
|
31
|
+
# @param [Hash] resource
|
32
|
+
# @param [Airbrake::Promise] promise
|
33
|
+
# @see Airbrake.notify_query
|
34
|
+
# @see Airbrake.notify_request
|
35
|
+
def notify(resource, promise = Airbrake::Promise.new)
|
36
|
+
if @config.ignored_environment?
|
37
|
+
return promise.reject("The '#{@config.environment}' environment is ignored")
|
38
|
+
end
|
39
|
+
|
40
|
+
unless @config.performance_stats
|
41
|
+
return promise.reject("The Performance Stats feature is disabled")
|
42
|
+
end
|
43
|
+
|
44
|
+
@filter_chain.refine(resource)
|
45
|
+
return if resource.ignored?
|
46
|
+
|
47
|
+
@mutex.synchronize do
|
48
|
+
@payload[resource] ||= Airbrake::Stat.new
|
49
|
+
@payload[resource].increment(resource.start_time, resource.end_time)
|
50
|
+
|
51
|
+
if @flush_period > 0
|
52
|
+
schedule_flush(promise)
|
53
|
+
else
|
54
|
+
send(@payload, promise)
|
55
|
+
end
|
56
|
+
end
|
57
|
+
|
58
|
+
promise
|
59
|
+
end
|
60
|
+
|
61
|
+
# @see Airbrake.add_performance_filter
|
62
|
+
def add_filter(filter = nil, &block)
|
63
|
+
@filter_chain.add_filter(block_given? ? block : filter)
|
64
|
+
end
|
65
|
+
|
66
|
+
# @see Airbrake.delete_performance_filter
|
67
|
+
def delete_filter(filter_class)
|
68
|
+
@filter_chain.delete_filter(filter_class)
|
69
|
+
end
|
70
|
+
|
71
|
+
private
|
72
|
+
|
73
|
+
def schedule_flush(promise)
|
74
|
+
@schedule_flush ||= Thread.new do
|
75
|
+
sleep(@flush_period)
|
76
|
+
|
77
|
+
payload = nil
|
78
|
+
@mutex.synchronize do
|
79
|
+
payload = @payload
|
80
|
+
@payload = {}
|
81
|
+
@schedule_flush = nil
|
82
|
+
end
|
83
|
+
|
84
|
+
send(payload, promise)
|
85
|
+
end
|
86
|
+
end
|
87
|
+
|
88
|
+
def send(payload, promise)
|
89
|
+
signature = "#{self.class.name}##{__method__}"
|
90
|
+
raise "#{signature}: payload (#{payload}) cannot be empty. Race?" if payload.none?
|
91
|
+
|
92
|
+
@config.logger.debug("#{LOG_LABEL} #{signature}: #{payload}")
|
93
|
+
|
94
|
+
payload.group_by { |k, _v| k.name }.each do |resource_name, data|
|
95
|
+
@sender.send(
|
96
|
+
{ resource_name => data.map { |k, v| k.to_h.merge!(v.to_h) } },
|
97
|
+
promise,
|
98
|
+
URI.join(
|
99
|
+
@config.host,
|
100
|
+
"api/v5/projects/#{@config.project_id}/#{resource_name}-stats"
|
101
|
+
)
|
102
|
+
)
|
103
|
+
end
|
104
|
+
end
|
105
|
+
end
|
106
|
+
|
107
|
+
# Request holds request data that powers route stats.
|
108
|
+
#
|
109
|
+
# @see Airbrake.notify_request
|
110
|
+
# @api public
|
111
|
+
# @since v3.2.0
|
112
|
+
Request = Struct.new(:method, :route, :status_code, :start_time, :end_time) do
|
113
|
+
include HashKeyable
|
114
|
+
include Ignorable
|
115
|
+
|
116
|
+
def initialize(method:, route:, status_code:, start_time:, end_time: Time.now)
|
117
|
+
@ignored = false
|
118
|
+
super(method, route, status_code, start_time, end_time)
|
119
|
+
end
|
120
|
+
|
121
|
+
def name
|
122
|
+
'routes'
|
123
|
+
end
|
124
|
+
|
125
|
+
def to_h
|
126
|
+
{
|
127
|
+
'method' => method,
|
128
|
+
'route' => route,
|
129
|
+
'statusCode' => status_code,
|
130
|
+
'time' => TimeTruncate.utc_truncate_minutes(start_time)
|
131
|
+
}
|
132
|
+
end
|
133
|
+
end
|
134
|
+
|
135
|
+
# Query holds SQL query data that powers SQL query collection.
|
136
|
+
#
|
137
|
+
# @see Airbrake.notify_query
|
138
|
+
# @api public
|
139
|
+
# @since v3.2.0
|
140
|
+
Query = Struct.new(:method, :route, :query, :start_time, :end_time) do
|
141
|
+
include HashKeyable
|
142
|
+
include Ignorable
|
143
|
+
|
144
|
+
def initialize(method:, route:, query:, start_time:, end_time: Time.now)
|
145
|
+
super(method, route, query, start_time, end_time)
|
146
|
+
end
|
147
|
+
|
148
|
+
def name
|
149
|
+
'queries'
|
150
|
+
end
|
151
|
+
|
152
|
+
def to_h
|
153
|
+
{
|
154
|
+
'method' => method,
|
155
|
+
'route' => route,
|
156
|
+
'query' => query,
|
157
|
+
'time' => TimeTruncate.utc_truncate_minutes(start_time)
|
158
|
+
}
|
159
|
+
end
|
160
|
+
end
|
161
|
+
end
|
@@ -0,0 +1,56 @@
|
|
1
|
+
require 'base64'
|
2
|
+
|
3
|
+
module Airbrake
|
4
|
+
# Stat is a data structure that allows accumulating performance data (route
|
5
|
+
# performance, SQL query performance and such). It's powered by TDigests.
|
6
|
+
#
|
7
|
+
# Usually, one Stat corresponds to one resource (route or query,
|
8
|
+
# etc.). Incrementing a stat means pushing new performance statistics.
|
9
|
+
#
|
10
|
+
# @example
|
11
|
+
# stat = Airbrake::Stat.new
|
12
|
+
# stat.increment(Time.now - 200)
|
13
|
+
# stat.to_h # Pack and serialize data so it can be transmitted.
|
14
|
+
#
|
15
|
+
# @since v3.2.0
|
16
|
+
Stat = Struct.new(:count, :sum, :sumsq, :tdigest) do
|
17
|
+
# @param [Integer] count How many times this stat was incremented
|
18
|
+
# @param [Float] sum The sum of duration in milliseconds
|
19
|
+
# @param [Float] sumsq The squared sum of duration in milliseconds
|
20
|
+
# @param [TDigest::TDigest] tdigest Packed durations. By default,
|
21
|
+
# compression is 20
|
22
|
+
def initialize(count: 0, sum: 0.0, sumsq: 0.0, tdigest: TDigest.new(0.05))
|
23
|
+
super(count, sum, sumsq, tdigest)
|
24
|
+
end
|
25
|
+
|
26
|
+
# @return [Hash{String=>Object}] stats as a hash with compressed TDigest
|
27
|
+
# (serialized as base64)
|
28
|
+
def to_h
|
29
|
+
tdigest.compress!
|
30
|
+
{
|
31
|
+
'count' => count,
|
32
|
+
'sum' => sum,
|
33
|
+
'sumsq' => sumsq,
|
34
|
+
'tdigest' => Base64.strict_encode64(tdigest.as_small_bytes)
|
35
|
+
}
|
36
|
+
end
|
37
|
+
|
38
|
+
# Increments count and updates performance with the difference of +end_time+
|
39
|
+
# and +start_time+.
|
40
|
+
#
|
41
|
+
# @param [Date] start_time
|
42
|
+
# @param [Date] end_time
|
43
|
+
# @return [void]
|
44
|
+
def increment(start_time, end_time = nil)
|
45
|
+
end_time ||= Time.new
|
46
|
+
|
47
|
+
self.count += 1
|
48
|
+
|
49
|
+
ms = (end_time - start_time) * 1000
|
50
|
+
self.sum += ms
|
51
|
+
self.sumsq += ms * ms
|
52
|
+
|
53
|
+
tdigest.push(ms)
|
54
|
+
end
|
55
|
+
end
|
56
|
+
end
|
@@ -0,0 +1,393 @@
|
|
1
|
+
require 'rbtree'
|
2
|
+
|
3
|
+
module Airbrake
|
4
|
+
# Ruby implementation of Ted Dunning's t-digest data structure.
|
5
|
+
#
|
6
|
+
# This implementation is imported from https://github.com/castle/tdigest with
|
7
|
+
# custom modifications. Huge thanks to Castle for the implementation :beer:
|
8
|
+
#
|
9
|
+
# The difference is that we pack with Big Endian (unlike Native Endian in
|
10
|
+
# Castle's version). Our backend does not permit little endian.
|
11
|
+
#
|
12
|
+
# @see https://github.com/tdunning/t-digest
|
13
|
+
# @see https://github.com/castle/tdigest
|
14
|
+
# @api private
|
15
|
+
# @since v3.2.0
|
16
|
+
#
|
17
|
+
# rubocop:disable Metrics/ClassLength
|
18
|
+
class TDigest
|
19
|
+
VERBOSE_ENCODING = 1
|
20
|
+
SMALL_ENCODING = 2
|
21
|
+
|
22
|
+
# Centroid represents a number of data points.
|
23
|
+
# @api private
|
24
|
+
# @since v3.2.0
|
25
|
+
class Centroid
|
26
|
+
attr_accessor :mean, :n, :cumn, :mean_cumn
|
27
|
+
def initialize(mean, n, cumn, mean_cumn = nil)
|
28
|
+
@mean = mean
|
29
|
+
@n = n
|
30
|
+
@cumn = cumn
|
31
|
+
@mean_cumn = mean_cumn
|
32
|
+
end
|
33
|
+
|
34
|
+
def as_json(_ = nil)
|
35
|
+
{ m: mean, n: n }
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
attr_accessor :centroids
|
40
|
+
def initialize(delta = 0.01, k = 25, cx = 1.1)
|
41
|
+
@delta = delta
|
42
|
+
@k = k
|
43
|
+
@cx = cx
|
44
|
+
@centroids = RBTree.new
|
45
|
+
@nreset = 0
|
46
|
+
@n = 0
|
47
|
+
reset!
|
48
|
+
end
|
49
|
+
|
50
|
+
def +(other)
|
51
|
+
# Uses delta, k and cx from the caller
|
52
|
+
t = self.class.new(@delta, @k, @cx)
|
53
|
+
data = centroids.values + other.centroids.values
|
54
|
+
t.push_centroid(data.delete_at(rand(data.length))) while data.any?
|
55
|
+
t
|
56
|
+
end
|
57
|
+
|
58
|
+
def as_bytes
|
59
|
+
# compression as defined by Java implementation
|
60
|
+
size = @centroids.size
|
61
|
+
output = [VERBOSE_ENCODING, compression, size]
|
62
|
+
output += @centroids.map { |_, c| c.mean }
|
63
|
+
output += @centroids.map { |_, c| c.n }
|
64
|
+
output.pack("NGNG#{size}N#{size}")
|
65
|
+
end
|
66
|
+
|
67
|
+
# rubocop:disable Metrics/AbcSize
|
68
|
+
def as_small_bytes
|
69
|
+
size = @centroids.size
|
70
|
+
output = [self.class::SMALL_ENCODING, compression, size]
|
71
|
+
x = 0
|
72
|
+
# delta encoding allows saving 4-bytes floats
|
73
|
+
mean_arr = @centroids.map do |_, c|
|
74
|
+
val = c.mean - x
|
75
|
+
x = c.mean
|
76
|
+
val
|
77
|
+
end
|
78
|
+
output += mean_arr
|
79
|
+
# Variable length encoding of numbers
|
80
|
+
c_arr = @centroids.each_with_object([]) do |(_, c), arr|
|
81
|
+
k = 0
|
82
|
+
n = c.n
|
83
|
+
while n < 0 || n > 0x7f
|
84
|
+
b = 0x80 | (0x7f & n)
|
85
|
+
arr << b
|
86
|
+
n = n >> 7
|
87
|
+
k += 1
|
88
|
+
raise 'Unreasonable large number' if k > 6
|
89
|
+
end
|
90
|
+
arr << n
|
91
|
+
end
|
92
|
+
output += c_arr
|
93
|
+
output.pack("NGNg#{size}C#{size}")
|
94
|
+
end
|
95
|
+
# rubocop:enable Metrics/AbcSize
|
96
|
+
|
97
|
+
def as_json(_ = nil)
|
98
|
+
@centroids.map { |_, c| c.as_json }
|
99
|
+
end
|
100
|
+
|
101
|
+
def bound_mean(x)
|
102
|
+
upper = @centroids.upper_bound(x)
|
103
|
+
lower = @centroids.lower_bound(x)
|
104
|
+
[lower[1], upper[1]]
|
105
|
+
end
|
106
|
+
|
107
|
+
def bound_mean_cumn(cumn)
|
108
|
+
last_c = nil
|
109
|
+
bounds = []
|
110
|
+
@centroids.each_value do |v|
|
111
|
+
if v.mean_cumn == cumn
|
112
|
+
bounds << v
|
113
|
+
break
|
114
|
+
elsif v.mean_cumn > cumn
|
115
|
+
bounds << last_c
|
116
|
+
bounds << v
|
117
|
+
break
|
118
|
+
else
|
119
|
+
last_c = v
|
120
|
+
end
|
121
|
+
end
|
122
|
+
# If still no results, pick lagging value if any
|
123
|
+
bounds << last_c if bounds.empty? && !last_c.nil?
|
124
|
+
|
125
|
+
bounds
|
126
|
+
end
|
127
|
+
|
128
|
+
def compress!
|
129
|
+
points = to_a
|
130
|
+
reset!
|
131
|
+
push_centroid(points.shuffle)
|
132
|
+
_cumulate(true, true)
|
133
|
+
nil
|
134
|
+
end
|
135
|
+
|
136
|
+
def compression
|
137
|
+
1 / @delta
|
138
|
+
end
|
139
|
+
|
140
|
+
def find_nearest(x)
|
141
|
+
return nil if size == 0
|
142
|
+
|
143
|
+
ceil = @centroids.upper_bound(x)
|
144
|
+
floor = @centroids.lower_bound(x)
|
145
|
+
|
146
|
+
return floor[1] if ceil.nil?
|
147
|
+
return ceil[1] if floor.nil?
|
148
|
+
|
149
|
+
ceil_key = ceil[0]
|
150
|
+
floor_key = floor[0]
|
151
|
+
|
152
|
+
if (floor_key - x).abs < (ceil_key - x).abs
|
153
|
+
floor[1]
|
154
|
+
else
|
155
|
+
ceil[1]
|
156
|
+
end
|
157
|
+
end
|
158
|
+
|
159
|
+
def merge!(other)
|
160
|
+
push_centroid(other.centroids.values.shuffle)
|
161
|
+
self
|
162
|
+
end
|
163
|
+
|
164
|
+
# rubocop:disable Metrics/PerceivedComplexity, Metrics/AbcSize
|
165
|
+
# rubocop:disable Metrics/CyclomaticComplexity
|
166
|
+
def p_rank(x)
|
167
|
+
is_array = x.is_a? Array
|
168
|
+
x = [x] unless is_array
|
169
|
+
|
170
|
+
min = @centroids.first
|
171
|
+
max = @centroids.last
|
172
|
+
|
173
|
+
x.map! do |item|
|
174
|
+
if size == 0
|
175
|
+
nil
|
176
|
+
elsif item < min[1].mean
|
177
|
+
0.0
|
178
|
+
elsif item > max[1].mean
|
179
|
+
1.0
|
180
|
+
else
|
181
|
+
_cumulate(true)
|
182
|
+
bound = bound_mean(item)
|
183
|
+
lower, upper = bound
|
184
|
+
mean_cumn = lower.mean_cumn
|
185
|
+
if lower != upper
|
186
|
+
mean_cumn += (item - lower.mean) * (upper.mean_cumn - lower.mean_cumn) \
|
187
|
+
/ (upper.mean - lower.mean)
|
188
|
+
end
|
189
|
+
mean_cumn / @n
|
190
|
+
end
|
191
|
+
end
|
192
|
+
is_array ? x : x.first
|
193
|
+
end
|
194
|
+
# rubocop:enable Metrics/PerceivedComplexity, Metrics/AbcSize
|
195
|
+
# rubocop:enable Metrics/CyclomaticComplexity
|
196
|
+
|
197
|
+
# rubocop:disable Metrics/PerceivedComplexity, Metrics/CyclomaticComplexity
|
198
|
+
# rubocop:disable Metrics/AbcSize
|
199
|
+
def percentile(p)
|
200
|
+
is_array = p.is_a? Array
|
201
|
+
p = [p] unless is_array
|
202
|
+
p.map! do |item|
|
203
|
+
unless (0..1).cover?(item)
|
204
|
+
raise ArgumentError, "p should be in [0,1], got #{item}"
|
205
|
+
end
|
206
|
+
if size == 0
|
207
|
+
nil
|
208
|
+
else
|
209
|
+
_cumulate(true)
|
210
|
+
h = @n * item
|
211
|
+
lower, upper = bound_mean_cumn(h)
|
212
|
+
if lower.nil? && upper.nil?
|
213
|
+
nil
|
214
|
+
elsif upper == lower || lower.nil? || upper.nil?
|
215
|
+
(lower || upper).mean
|
216
|
+
elsif h == lower.mean_cumn
|
217
|
+
lower.mean
|
218
|
+
else
|
219
|
+
upper.mean
|
220
|
+
end
|
221
|
+
end
|
222
|
+
end
|
223
|
+
is_array ? p : p.first
|
224
|
+
end
|
225
|
+
# rubocop:enable Metrics/PerceivedComplexity, Metrics/CyclomaticComplexity
|
226
|
+
# rubocop:enable Metrics/AbcSize
|
227
|
+
|
228
|
+
def push(x, n = 1)
|
229
|
+
x = [x] unless x.is_a? Array
|
230
|
+
x.each { |value| _digest(value, n) }
|
231
|
+
end
|
232
|
+
|
233
|
+
def push_centroid(c)
|
234
|
+
c = [c] unless c.is_a? Array
|
235
|
+
c.each { |centroid| _digest(centroid.mean, centroid.n) }
|
236
|
+
end
|
237
|
+
|
238
|
+
def reset!
|
239
|
+
@centroids.clear
|
240
|
+
@n = 0
|
241
|
+
@nreset += 1
|
242
|
+
@last_cumulate = 0
|
243
|
+
end
|
244
|
+
|
245
|
+
def size
|
246
|
+
@n || 0
|
247
|
+
end
|
248
|
+
|
249
|
+
def to_a
|
250
|
+
@centroids.map { |_, c| c }
|
251
|
+
end
|
252
|
+
|
253
|
+
# rubocop:disable Metrics/PerceivedComplexity, Metrics/MethodLength
|
254
|
+
# rubocop:disable Metrics/CyclomaticComplexity, Metrics/AbcSize
|
255
|
+
def self.from_bytes(bytes)
|
256
|
+
format, compression, size = bytes.unpack('NGN')
|
257
|
+
tdigest = new(1 / compression)
|
258
|
+
|
259
|
+
start_idx = 16 # after header
|
260
|
+
case format
|
261
|
+
when VERBOSE_ENCODING
|
262
|
+
array = bytes[start_idx..-1].unpack("G#{size}N#{size}")
|
263
|
+
means, counts = array.each_slice(size).to_a if array.any?
|
264
|
+
when SMALL_ENCODING
|
265
|
+
means = bytes[start_idx..(start_idx + 4 * size)].unpack("g#{size}")
|
266
|
+
# Decode delta encoding of means
|
267
|
+
x = 0
|
268
|
+
means.map! do |m|
|
269
|
+
m += x
|
270
|
+
x = m
|
271
|
+
m
|
272
|
+
end
|
273
|
+
counts_bytes = bytes[(start_idx + 4 * size)..-1].unpack('C*')
|
274
|
+
counts = []
|
275
|
+
# Decode variable length integer bytes
|
276
|
+
size.times do
|
277
|
+
v = counts_bytes.shift
|
278
|
+
z = 0x7f & v
|
279
|
+
shift = 7
|
280
|
+
while (v & 0x80) != 0
|
281
|
+
raise 'Shift too large in decode' if shift > 28
|
282
|
+
v = counts_bytes.shift || 0
|
283
|
+
z += (v & 0x7f) << shift
|
284
|
+
shift += 7
|
285
|
+
end
|
286
|
+
counts << z
|
287
|
+
end
|
288
|
+
# This shouldn't happen
|
289
|
+
raise 'Mismatch' unless counts.size == means.size
|
290
|
+
else
|
291
|
+
raise 'Unknown compression format'
|
292
|
+
end
|
293
|
+
|
294
|
+
means.zip(counts).each { |val| tdigest.push(val[0], val[1]) } if means && counts
|
295
|
+
|
296
|
+
tdigest
|
297
|
+
end
|
298
|
+
# rubocop:enable Metrics/PerceivedComplexity, Metrics/MethodLength
|
299
|
+
# rubocop:enable Metrics/CyclomaticComplexity, Metrics/AbcSize
|
300
|
+
|
301
|
+
def self.from_json(array)
|
302
|
+
tdigest = new
|
303
|
+
# Handle both string and symbol keys
|
304
|
+
array.each { |a| tdigest.push(a['m'] || a[:m], a['n'] || a[:n]) }
|
305
|
+
tdigest
|
306
|
+
end
|
307
|
+
|
308
|
+
private
|
309
|
+
|
310
|
+
def _add_weight(nearest, x, n)
|
311
|
+
nearest.mean += n * (x - nearest.mean) / (nearest.n + n) unless x == nearest.mean
|
312
|
+
|
313
|
+
_cumulate(false, true) if nearest.mean_cumn.nil?
|
314
|
+
|
315
|
+
nearest.cumn += n
|
316
|
+
nearest.mean_cumn += n / 2.0
|
317
|
+
nearest.n += n
|
318
|
+
|
319
|
+
nil
|
320
|
+
end
|
321
|
+
|
322
|
+
# rubocop:disable Metrics/PerceivedComplexity, Metrics/CyclomaticComplexity
|
323
|
+
def _cumulate(exact = false, force = false)
|
324
|
+
unless force
|
325
|
+
factor = if @last_cumulate == 0
|
326
|
+
Float::INFINITY
|
327
|
+
else
|
328
|
+
(@n.to_f / @last_cumulate)
|
329
|
+
end
|
330
|
+
return if @n == @last_cumulate || (!exact && @cx && @cx > factor)
|
331
|
+
end
|
332
|
+
|
333
|
+
cumn = 0
|
334
|
+
@centroids.each do |_, c|
|
335
|
+
c.mean_cumn = cumn + c.n / 2.0
|
336
|
+
cumn = c.cumn = cumn + c.n
|
337
|
+
end
|
338
|
+
@n = @last_cumulate = cumn
|
339
|
+
nil
|
340
|
+
end
|
341
|
+
# rubocop:enable Metrics/PerceivedComplexity, Metrics/CyclomaticComplexity
|
342
|
+
|
343
|
+
# rubocop:disable Metrics/PerceivedComplexity, Metrics/CyclomaticComplexity
|
344
|
+
# rubocop:disable Metrics/AbcSize
|
345
|
+
def _digest(x, n)
|
346
|
+
# Use 'first' and 'last' instead of min/max because of performance reasons
|
347
|
+
# This works because RBTree is sorted
|
348
|
+
min = @centroids.first
|
349
|
+
max = @centroids.last
|
350
|
+
|
351
|
+
min = min.nil? ? nil : min[1]
|
352
|
+
max = max.nil? ? nil : max[1]
|
353
|
+
nearest = find_nearest(x)
|
354
|
+
|
355
|
+
@n += n
|
356
|
+
|
357
|
+
if nearest && nearest.mean == x
|
358
|
+
_add_weight(nearest, x, n)
|
359
|
+
elsif nearest == min
|
360
|
+
_new_centroid(x, n, 0)
|
361
|
+
elsif nearest == max
|
362
|
+
_new_centroid(x, n, @n)
|
363
|
+
else
|
364
|
+
p = nearest.mean_cumn.to_f / @n
|
365
|
+
max_n = (4 * @n * @delta * p * (1 - p)).floor
|
366
|
+
if max_n - nearest.n >= n
|
367
|
+
_add_weight(nearest, x, n)
|
368
|
+
else
|
369
|
+
_new_centroid(x, n, nearest.cumn)
|
370
|
+
end
|
371
|
+
end
|
372
|
+
|
373
|
+
_cumulate(false)
|
374
|
+
|
375
|
+
# If the number of centroids has grown to a very large size,
|
376
|
+
# it may be due to values being inserted in sorted order.
|
377
|
+
# We combat that by replaying the centroids in random order,
|
378
|
+
# which is what compress! does
|
379
|
+
compress! if @centroids.size > (@k / @delta)
|
380
|
+
|
381
|
+
nil
|
382
|
+
end
|
383
|
+
# rubocop:enable Metrics/PerceivedComplexity, Metrics/CyclomaticComplexity,
|
384
|
+
# rubocop:enable Metrics/AbcSize
|
385
|
+
|
386
|
+
def _new_centroid(x, n, cumn)
|
387
|
+
c = Centroid.new(x, n, cumn)
|
388
|
+
@centroids[x] = c
|
389
|
+
c
|
390
|
+
end
|
391
|
+
end
|
392
|
+
# rubocop:enable Metrics/ClassLength
|
393
|
+
end
|