airbrake-ruby 3.1.0 → 3.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/airbrake-ruby.rb +197 -43
- data/lib/airbrake-ruby/config.rb +43 -11
- data/lib/airbrake-ruby/deploy_notifier.rb +47 -0
- data/lib/airbrake-ruby/filter_chain.rb +32 -50
- data/lib/airbrake-ruby/filters/git_repository_filter.rb +9 -1
- data/lib/airbrake-ruby/filters/sql_filter.rb +104 -0
- data/lib/airbrake-ruby/hash_keyable.rb +37 -0
- data/lib/airbrake-ruby/ignorable.rb +44 -0
- data/lib/airbrake-ruby/notice.rb +2 -22
- data/lib/airbrake-ruby/{notifier.rb → notice_notifier.rb} +66 -46
- data/lib/airbrake-ruby/performance_notifier.rb +161 -0
- data/lib/airbrake-ruby/stat.rb +56 -0
- data/lib/airbrake-ruby/tdigest.rb +393 -0
- data/lib/airbrake-ruby/time_truncate.rb +17 -0
- data/lib/airbrake-ruby/version.rb +1 -1
- data/spec/airbrake_spec.rb +57 -13
- data/spec/async_sender_spec.rb +0 -2
- data/spec/backtrace_spec.rb +0 -2
- data/spec/code_hunk_spec.rb +0 -2
- data/spec/config/validator_spec.rb +0 -2
- data/spec/config_spec.rb +16 -4
- data/spec/deploy_notifier_spec.rb +41 -0
- data/spec/file_cache.rb +0 -2
- data/spec/filter_chain_spec.rb +1 -7
- data/spec/filters/context_filter_spec.rb +0 -2
- data/spec/filters/dependency_filter_spec.rb +0 -2
- data/spec/filters/exception_attributes_filter_spec.rb +0 -2
- data/spec/filters/gem_root_filter_spec.rb +0 -2
- data/spec/filters/git_last_checkout_filter_spec.rb +0 -2
- data/spec/filters/git_repository_filter.rb +0 -2
- data/spec/filters/git_revision_filter_spec.rb +0 -2
- data/spec/filters/keys_blacklist_spec.rb +0 -2
- data/spec/filters/keys_whitelist_spec.rb +0 -2
- data/spec/filters/root_directory_filter_spec.rb +0 -2
- data/spec/filters/sql_filter_spec.rb +219 -0
- data/spec/filters/system_exit_filter_spec.rb +0 -2
- data/spec/filters/thread_filter_spec.rb +0 -2
- data/spec/ignorable_spec.rb +14 -0
- data/spec/nested_exception_spec.rb +0 -2
- data/spec/{notifier_spec.rb → notice_notifier_spec.rb} +24 -114
- data/spec/{notifier_spec → notice_notifier_spec}/options_spec.rb +40 -39
- data/spec/notice_spec.rb +2 -4
- data/spec/performance_notifier_spec.rb +287 -0
- data/spec/promise_spec.rb +0 -2
- data/spec/response_spec.rb +0 -2
- data/spec/stat_spec.rb +35 -0
- data/spec/sync_sender_spec.rb +0 -2
- data/spec/tdigest_spec.rb +230 -0
- data/spec/time_truncate_spec.rb +13 -0
- data/spec/truncator_spec.rb +0 -2
- metadata +34 -15
- data/lib/airbrake-ruby/route_sender.rb +0 -175
- data/spec/route_sender_spec.rb +0 -130
@@ -0,0 +1,161 @@
|
|
1
|
+
module Airbrake
|
2
|
+
# QueryNotifier aggregates information about SQL queries and periodically sends
|
3
|
+
# collected data to Airbrake.
|
4
|
+
#
|
5
|
+
# @api public
|
6
|
+
# @since v3.2.0
|
7
|
+
class PerformanceNotifier
|
8
|
+
# @param [Airbrake::Config] config
|
9
|
+
def initialize(config)
|
10
|
+
@config =
|
11
|
+
if config.is_a?(Config)
|
12
|
+
config
|
13
|
+
else
|
14
|
+
loc = caller_locations(1..1).first
|
15
|
+
signature = "#{self.class.name}##{__method__}"
|
16
|
+
warn(
|
17
|
+
"#{loc.path}:#{loc.lineno}: warning: passing a Hash to #{signature} " \
|
18
|
+
'is deprecated. Pass `Airbrake::Config` instead'
|
19
|
+
)
|
20
|
+
Config.new(config)
|
21
|
+
end
|
22
|
+
|
23
|
+
@flush_period = @config.performance_stats_flush_period
|
24
|
+
@sender = SyncSender.new(@config, :put)
|
25
|
+
@payload = {}
|
26
|
+
@schedule_flush = nil
|
27
|
+
@mutex = Mutex.new
|
28
|
+
@filter_chain = FilterChain.new
|
29
|
+
end
|
30
|
+
|
31
|
+
# @param [Hash] resource
|
32
|
+
# @param [Airbrake::Promise] promise
|
33
|
+
# @see Airbrake.notify_query
|
34
|
+
# @see Airbrake.notify_request
|
35
|
+
def notify(resource, promise = Airbrake::Promise.new)
|
36
|
+
if @config.ignored_environment?
|
37
|
+
return promise.reject("The '#{@config.environment}' environment is ignored")
|
38
|
+
end
|
39
|
+
|
40
|
+
unless @config.performance_stats
|
41
|
+
return promise.reject("The Performance Stats feature is disabled")
|
42
|
+
end
|
43
|
+
|
44
|
+
@filter_chain.refine(resource)
|
45
|
+
return if resource.ignored?
|
46
|
+
|
47
|
+
@mutex.synchronize do
|
48
|
+
@payload[resource] ||= Airbrake::Stat.new
|
49
|
+
@payload[resource].increment(resource.start_time, resource.end_time)
|
50
|
+
|
51
|
+
if @flush_period > 0
|
52
|
+
schedule_flush(promise)
|
53
|
+
else
|
54
|
+
send(@payload, promise)
|
55
|
+
end
|
56
|
+
end
|
57
|
+
|
58
|
+
promise
|
59
|
+
end
|
60
|
+
|
61
|
+
# @see Airbrake.add_performance_filter
|
62
|
+
def add_filter(filter = nil, &block)
|
63
|
+
@filter_chain.add_filter(block_given? ? block : filter)
|
64
|
+
end
|
65
|
+
|
66
|
+
# @see Airbrake.delete_performance_filter
|
67
|
+
def delete_filter(filter_class)
|
68
|
+
@filter_chain.delete_filter(filter_class)
|
69
|
+
end
|
70
|
+
|
71
|
+
private
|
72
|
+
|
73
|
+
def schedule_flush(promise)
|
74
|
+
@schedule_flush ||= Thread.new do
|
75
|
+
sleep(@flush_period)
|
76
|
+
|
77
|
+
payload = nil
|
78
|
+
@mutex.synchronize do
|
79
|
+
payload = @payload
|
80
|
+
@payload = {}
|
81
|
+
@schedule_flush = nil
|
82
|
+
end
|
83
|
+
|
84
|
+
send(payload, promise)
|
85
|
+
end
|
86
|
+
end
|
87
|
+
|
88
|
+
def send(payload, promise)
|
89
|
+
signature = "#{self.class.name}##{__method__}"
|
90
|
+
raise "#{signature}: payload (#{payload}) cannot be empty. Race?" if payload.none?
|
91
|
+
|
92
|
+
@config.logger.debug("#{LOG_LABEL} #{signature}: #{payload}")
|
93
|
+
|
94
|
+
payload.group_by { |k, _v| k.name }.each do |resource_name, data|
|
95
|
+
@sender.send(
|
96
|
+
{ resource_name => data.map { |k, v| k.to_h.merge!(v.to_h) } },
|
97
|
+
promise,
|
98
|
+
URI.join(
|
99
|
+
@config.host,
|
100
|
+
"api/v5/projects/#{@config.project_id}/#{resource_name}-stats"
|
101
|
+
)
|
102
|
+
)
|
103
|
+
end
|
104
|
+
end
|
105
|
+
end
|
106
|
+
|
107
|
+
# Request holds request data that powers route stats.
|
108
|
+
#
|
109
|
+
# @see Airbrake.notify_request
|
110
|
+
# @api public
|
111
|
+
# @since v3.2.0
|
112
|
+
Request = Struct.new(:method, :route, :status_code, :start_time, :end_time) do
|
113
|
+
include HashKeyable
|
114
|
+
include Ignorable
|
115
|
+
|
116
|
+
def initialize(method:, route:, status_code:, start_time:, end_time: Time.now)
|
117
|
+
@ignored = false
|
118
|
+
super(method, route, status_code, start_time, end_time)
|
119
|
+
end
|
120
|
+
|
121
|
+
def name
|
122
|
+
'routes'
|
123
|
+
end
|
124
|
+
|
125
|
+
def to_h
|
126
|
+
{
|
127
|
+
'method' => method,
|
128
|
+
'route' => route,
|
129
|
+
'statusCode' => status_code,
|
130
|
+
'time' => TimeTruncate.utc_truncate_minutes(start_time)
|
131
|
+
}
|
132
|
+
end
|
133
|
+
end
|
134
|
+
|
135
|
+
# Query holds SQL query data that powers SQL query collection.
|
136
|
+
#
|
137
|
+
# @see Airbrake.notify_query
|
138
|
+
# @api public
|
139
|
+
# @since v3.2.0
|
140
|
+
Query = Struct.new(:method, :route, :query, :start_time, :end_time) do
|
141
|
+
include HashKeyable
|
142
|
+
include Ignorable
|
143
|
+
|
144
|
+
def initialize(method:, route:, query:, start_time:, end_time: Time.now)
|
145
|
+
super(method, route, query, start_time, end_time)
|
146
|
+
end
|
147
|
+
|
148
|
+
def name
|
149
|
+
'queries'
|
150
|
+
end
|
151
|
+
|
152
|
+
def to_h
|
153
|
+
{
|
154
|
+
'method' => method,
|
155
|
+
'route' => route,
|
156
|
+
'query' => query,
|
157
|
+
'time' => TimeTruncate.utc_truncate_minutes(start_time)
|
158
|
+
}
|
159
|
+
end
|
160
|
+
end
|
161
|
+
end
|
@@ -0,0 +1,56 @@
|
|
1
|
+
require 'base64'
|
2
|
+
|
3
|
+
module Airbrake
|
4
|
+
# Stat is a data structure that allows accumulating performance data (route
|
5
|
+
# performance, SQL query performance and such). It's powered by TDigests.
|
6
|
+
#
|
7
|
+
# Usually, one Stat corresponds to one resource (route or query,
|
8
|
+
# etc.). Incrementing a stat means pushing new performance statistics.
|
9
|
+
#
|
10
|
+
# @example
|
11
|
+
# stat = Airbrake::Stat.new
|
12
|
+
# stat.increment(Time.now - 200)
|
13
|
+
# stat.to_h # Pack and serialize data so it can be transmitted.
|
14
|
+
#
|
15
|
+
# @since v3.2.0
|
16
|
+
Stat = Struct.new(:count, :sum, :sumsq, :tdigest) do
|
17
|
+
# @param [Integer] count How many times this stat was incremented
|
18
|
+
# @param [Float] sum The sum of duration in milliseconds
|
19
|
+
# @param [Float] sumsq The squared sum of duration in milliseconds
|
20
|
+
# @param [TDigest::TDigest] tdigest Packed durations. By default,
|
21
|
+
# compression is 20
|
22
|
+
def initialize(count: 0, sum: 0.0, sumsq: 0.0, tdigest: TDigest.new(0.05))
|
23
|
+
super(count, sum, sumsq, tdigest)
|
24
|
+
end
|
25
|
+
|
26
|
+
# @return [Hash{String=>Object}] stats as a hash with compressed TDigest
|
27
|
+
# (serialized as base64)
|
28
|
+
def to_h
|
29
|
+
tdigest.compress!
|
30
|
+
{
|
31
|
+
'count' => count,
|
32
|
+
'sum' => sum,
|
33
|
+
'sumsq' => sumsq,
|
34
|
+
'tdigest' => Base64.strict_encode64(tdigest.as_small_bytes)
|
35
|
+
}
|
36
|
+
end
|
37
|
+
|
38
|
+
# Increments count and updates performance with the difference of +end_time+
|
39
|
+
# and +start_time+.
|
40
|
+
#
|
41
|
+
# @param [Date] start_time
|
42
|
+
# @param [Date] end_time
|
43
|
+
# @return [void]
|
44
|
+
def increment(start_time, end_time = nil)
|
45
|
+
end_time ||= Time.new
|
46
|
+
|
47
|
+
self.count += 1
|
48
|
+
|
49
|
+
ms = (end_time - start_time) * 1000
|
50
|
+
self.sum += ms
|
51
|
+
self.sumsq += ms * ms
|
52
|
+
|
53
|
+
tdigest.push(ms)
|
54
|
+
end
|
55
|
+
end
|
56
|
+
end
|
@@ -0,0 +1,393 @@
|
|
1
|
+
require 'rbtree'
|
2
|
+
|
3
|
+
module Airbrake
|
4
|
+
# Ruby implementation of Ted Dunning's t-digest data structure.
|
5
|
+
#
|
6
|
+
# This implementation is imported from https://github.com/castle/tdigest with
|
7
|
+
# custom modifications. Huge thanks to Castle for the implementation :beer:
|
8
|
+
#
|
9
|
+
# The difference is that we pack with Big Endian (unlike Native Endian in
|
10
|
+
# Castle's version). Our backend does not permit little endian.
|
11
|
+
#
|
12
|
+
# @see https://github.com/tdunning/t-digest
|
13
|
+
# @see https://github.com/castle/tdigest
|
14
|
+
# @api private
|
15
|
+
# @since v3.2.0
|
16
|
+
#
|
17
|
+
# rubocop:disable Metrics/ClassLength
|
18
|
+
class TDigest
|
19
|
+
VERBOSE_ENCODING = 1
|
20
|
+
SMALL_ENCODING = 2
|
21
|
+
|
22
|
+
# Centroid represents a number of data points.
|
23
|
+
# @api private
|
24
|
+
# @since v3.2.0
|
25
|
+
class Centroid
|
26
|
+
attr_accessor :mean, :n, :cumn, :mean_cumn
|
27
|
+
def initialize(mean, n, cumn, mean_cumn = nil)
|
28
|
+
@mean = mean
|
29
|
+
@n = n
|
30
|
+
@cumn = cumn
|
31
|
+
@mean_cumn = mean_cumn
|
32
|
+
end
|
33
|
+
|
34
|
+
def as_json(_ = nil)
|
35
|
+
{ m: mean, n: n }
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
attr_accessor :centroids
|
40
|
+
def initialize(delta = 0.01, k = 25, cx = 1.1)
|
41
|
+
@delta = delta
|
42
|
+
@k = k
|
43
|
+
@cx = cx
|
44
|
+
@centroids = RBTree.new
|
45
|
+
@nreset = 0
|
46
|
+
@n = 0
|
47
|
+
reset!
|
48
|
+
end
|
49
|
+
|
50
|
+
def +(other)
|
51
|
+
# Uses delta, k and cx from the caller
|
52
|
+
t = self.class.new(@delta, @k, @cx)
|
53
|
+
data = centroids.values + other.centroids.values
|
54
|
+
t.push_centroid(data.delete_at(rand(data.length))) while data.any?
|
55
|
+
t
|
56
|
+
end
|
57
|
+
|
58
|
+
def as_bytes
|
59
|
+
# compression as defined by Java implementation
|
60
|
+
size = @centroids.size
|
61
|
+
output = [VERBOSE_ENCODING, compression, size]
|
62
|
+
output += @centroids.map { |_, c| c.mean }
|
63
|
+
output += @centroids.map { |_, c| c.n }
|
64
|
+
output.pack("NGNG#{size}N#{size}")
|
65
|
+
end
|
66
|
+
|
67
|
+
# rubocop:disable Metrics/AbcSize
|
68
|
+
def as_small_bytes
|
69
|
+
size = @centroids.size
|
70
|
+
output = [self.class::SMALL_ENCODING, compression, size]
|
71
|
+
x = 0
|
72
|
+
# delta encoding allows saving 4-bytes floats
|
73
|
+
mean_arr = @centroids.map do |_, c|
|
74
|
+
val = c.mean - x
|
75
|
+
x = c.mean
|
76
|
+
val
|
77
|
+
end
|
78
|
+
output += mean_arr
|
79
|
+
# Variable length encoding of numbers
|
80
|
+
c_arr = @centroids.each_with_object([]) do |(_, c), arr|
|
81
|
+
k = 0
|
82
|
+
n = c.n
|
83
|
+
while n < 0 || n > 0x7f
|
84
|
+
b = 0x80 | (0x7f & n)
|
85
|
+
arr << b
|
86
|
+
n = n >> 7
|
87
|
+
k += 1
|
88
|
+
raise 'Unreasonable large number' if k > 6
|
89
|
+
end
|
90
|
+
arr << n
|
91
|
+
end
|
92
|
+
output += c_arr
|
93
|
+
output.pack("NGNg#{size}C#{size}")
|
94
|
+
end
|
95
|
+
# rubocop:enable Metrics/AbcSize
|
96
|
+
|
97
|
+
def as_json(_ = nil)
|
98
|
+
@centroids.map { |_, c| c.as_json }
|
99
|
+
end
|
100
|
+
|
101
|
+
def bound_mean(x)
|
102
|
+
upper = @centroids.upper_bound(x)
|
103
|
+
lower = @centroids.lower_bound(x)
|
104
|
+
[lower[1], upper[1]]
|
105
|
+
end
|
106
|
+
|
107
|
+
def bound_mean_cumn(cumn)
|
108
|
+
last_c = nil
|
109
|
+
bounds = []
|
110
|
+
@centroids.each_value do |v|
|
111
|
+
if v.mean_cumn == cumn
|
112
|
+
bounds << v
|
113
|
+
break
|
114
|
+
elsif v.mean_cumn > cumn
|
115
|
+
bounds << last_c
|
116
|
+
bounds << v
|
117
|
+
break
|
118
|
+
else
|
119
|
+
last_c = v
|
120
|
+
end
|
121
|
+
end
|
122
|
+
# If still no results, pick lagging value if any
|
123
|
+
bounds << last_c if bounds.empty? && !last_c.nil?
|
124
|
+
|
125
|
+
bounds
|
126
|
+
end
|
127
|
+
|
128
|
+
def compress!
|
129
|
+
points = to_a
|
130
|
+
reset!
|
131
|
+
push_centroid(points.shuffle)
|
132
|
+
_cumulate(true, true)
|
133
|
+
nil
|
134
|
+
end
|
135
|
+
|
136
|
+
def compression
|
137
|
+
1 / @delta
|
138
|
+
end
|
139
|
+
|
140
|
+
def find_nearest(x)
|
141
|
+
return nil if size == 0
|
142
|
+
|
143
|
+
ceil = @centroids.upper_bound(x)
|
144
|
+
floor = @centroids.lower_bound(x)
|
145
|
+
|
146
|
+
return floor[1] if ceil.nil?
|
147
|
+
return ceil[1] if floor.nil?
|
148
|
+
|
149
|
+
ceil_key = ceil[0]
|
150
|
+
floor_key = floor[0]
|
151
|
+
|
152
|
+
if (floor_key - x).abs < (ceil_key - x).abs
|
153
|
+
floor[1]
|
154
|
+
else
|
155
|
+
ceil[1]
|
156
|
+
end
|
157
|
+
end
|
158
|
+
|
159
|
+
def merge!(other)
|
160
|
+
push_centroid(other.centroids.values.shuffle)
|
161
|
+
self
|
162
|
+
end
|
163
|
+
|
164
|
+
# rubocop:disable Metrics/PerceivedComplexity, Metrics/AbcSize
|
165
|
+
# rubocop:disable Metrics/CyclomaticComplexity
|
166
|
+
def p_rank(x)
|
167
|
+
is_array = x.is_a? Array
|
168
|
+
x = [x] unless is_array
|
169
|
+
|
170
|
+
min = @centroids.first
|
171
|
+
max = @centroids.last
|
172
|
+
|
173
|
+
x.map! do |item|
|
174
|
+
if size == 0
|
175
|
+
nil
|
176
|
+
elsif item < min[1].mean
|
177
|
+
0.0
|
178
|
+
elsif item > max[1].mean
|
179
|
+
1.0
|
180
|
+
else
|
181
|
+
_cumulate(true)
|
182
|
+
bound = bound_mean(item)
|
183
|
+
lower, upper = bound
|
184
|
+
mean_cumn = lower.mean_cumn
|
185
|
+
if lower != upper
|
186
|
+
mean_cumn += (item - lower.mean) * (upper.mean_cumn - lower.mean_cumn) \
|
187
|
+
/ (upper.mean - lower.mean)
|
188
|
+
end
|
189
|
+
mean_cumn / @n
|
190
|
+
end
|
191
|
+
end
|
192
|
+
is_array ? x : x.first
|
193
|
+
end
|
194
|
+
# rubocop:enable Metrics/PerceivedComplexity, Metrics/AbcSize
|
195
|
+
# rubocop:enable Metrics/CyclomaticComplexity
|
196
|
+
|
197
|
+
# rubocop:disable Metrics/PerceivedComplexity, Metrics/CyclomaticComplexity
|
198
|
+
# rubocop:disable Metrics/AbcSize
|
199
|
+
def percentile(p)
|
200
|
+
is_array = p.is_a? Array
|
201
|
+
p = [p] unless is_array
|
202
|
+
p.map! do |item|
|
203
|
+
unless (0..1).cover?(item)
|
204
|
+
raise ArgumentError, "p should be in [0,1], got #{item}"
|
205
|
+
end
|
206
|
+
if size == 0
|
207
|
+
nil
|
208
|
+
else
|
209
|
+
_cumulate(true)
|
210
|
+
h = @n * item
|
211
|
+
lower, upper = bound_mean_cumn(h)
|
212
|
+
if lower.nil? && upper.nil?
|
213
|
+
nil
|
214
|
+
elsif upper == lower || lower.nil? || upper.nil?
|
215
|
+
(lower || upper).mean
|
216
|
+
elsif h == lower.mean_cumn
|
217
|
+
lower.mean
|
218
|
+
else
|
219
|
+
upper.mean
|
220
|
+
end
|
221
|
+
end
|
222
|
+
end
|
223
|
+
is_array ? p : p.first
|
224
|
+
end
|
225
|
+
# rubocop:enable Metrics/PerceivedComplexity, Metrics/CyclomaticComplexity
|
226
|
+
# rubocop:enable Metrics/AbcSize
|
227
|
+
|
228
|
+
def push(x, n = 1)
|
229
|
+
x = [x] unless x.is_a? Array
|
230
|
+
x.each { |value| _digest(value, n) }
|
231
|
+
end
|
232
|
+
|
233
|
+
def push_centroid(c)
|
234
|
+
c = [c] unless c.is_a? Array
|
235
|
+
c.each { |centroid| _digest(centroid.mean, centroid.n) }
|
236
|
+
end
|
237
|
+
|
238
|
+
def reset!
|
239
|
+
@centroids.clear
|
240
|
+
@n = 0
|
241
|
+
@nreset += 1
|
242
|
+
@last_cumulate = 0
|
243
|
+
end
|
244
|
+
|
245
|
+
def size
|
246
|
+
@n || 0
|
247
|
+
end
|
248
|
+
|
249
|
+
def to_a
|
250
|
+
@centroids.map { |_, c| c }
|
251
|
+
end
|
252
|
+
|
253
|
+
# rubocop:disable Metrics/PerceivedComplexity, Metrics/MethodLength
|
254
|
+
# rubocop:disable Metrics/CyclomaticComplexity, Metrics/AbcSize
|
255
|
+
def self.from_bytes(bytes)
|
256
|
+
format, compression, size = bytes.unpack('NGN')
|
257
|
+
tdigest = new(1 / compression)
|
258
|
+
|
259
|
+
start_idx = 16 # after header
|
260
|
+
case format
|
261
|
+
when VERBOSE_ENCODING
|
262
|
+
array = bytes[start_idx..-1].unpack("G#{size}N#{size}")
|
263
|
+
means, counts = array.each_slice(size).to_a if array.any?
|
264
|
+
when SMALL_ENCODING
|
265
|
+
means = bytes[start_idx..(start_idx + 4 * size)].unpack("g#{size}")
|
266
|
+
# Decode delta encoding of means
|
267
|
+
x = 0
|
268
|
+
means.map! do |m|
|
269
|
+
m += x
|
270
|
+
x = m
|
271
|
+
m
|
272
|
+
end
|
273
|
+
counts_bytes = bytes[(start_idx + 4 * size)..-1].unpack('C*')
|
274
|
+
counts = []
|
275
|
+
# Decode variable length integer bytes
|
276
|
+
size.times do
|
277
|
+
v = counts_bytes.shift
|
278
|
+
z = 0x7f & v
|
279
|
+
shift = 7
|
280
|
+
while (v & 0x80) != 0
|
281
|
+
raise 'Shift too large in decode' if shift > 28
|
282
|
+
v = counts_bytes.shift || 0
|
283
|
+
z += (v & 0x7f) << shift
|
284
|
+
shift += 7
|
285
|
+
end
|
286
|
+
counts << z
|
287
|
+
end
|
288
|
+
# This shouldn't happen
|
289
|
+
raise 'Mismatch' unless counts.size == means.size
|
290
|
+
else
|
291
|
+
raise 'Unknown compression format'
|
292
|
+
end
|
293
|
+
|
294
|
+
means.zip(counts).each { |val| tdigest.push(val[0], val[1]) } if means && counts
|
295
|
+
|
296
|
+
tdigest
|
297
|
+
end
|
298
|
+
# rubocop:enable Metrics/PerceivedComplexity, Metrics/MethodLength
|
299
|
+
# rubocop:enable Metrics/CyclomaticComplexity, Metrics/AbcSize
|
300
|
+
|
301
|
+
def self.from_json(array)
|
302
|
+
tdigest = new
|
303
|
+
# Handle both string and symbol keys
|
304
|
+
array.each { |a| tdigest.push(a['m'] || a[:m], a['n'] || a[:n]) }
|
305
|
+
tdigest
|
306
|
+
end
|
307
|
+
|
308
|
+
private
|
309
|
+
|
310
|
+
def _add_weight(nearest, x, n)
|
311
|
+
nearest.mean += n * (x - nearest.mean) / (nearest.n + n) unless x == nearest.mean
|
312
|
+
|
313
|
+
_cumulate(false, true) if nearest.mean_cumn.nil?
|
314
|
+
|
315
|
+
nearest.cumn += n
|
316
|
+
nearest.mean_cumn += n / 2.0
|
317
|
+
nearest.n += n
|
318
|
+
|
319
|
+
nil
|
320
|
+
end
|
321
|
+
|
322
|
+
# rubocop:disable Metrics/PerceivedComplexity, Metrics/CyclomaticComplexity
|
323
|
+
def _cumulate(exact = false, force = false)
|
324
|
+
unless force
|
325
|
+
factor = if @last_cumulate == 0
|
326
|
+
Float::INFINITY
|
327
|
+
else
|
328
|
+
(@n.to_f / @last_cumulate)
|
329
|
+
end
|
330
|
+
return if @n == @last_cumulate || (!exact && @cx && @cx > factor)
|
331
|
+
end
|
332
|
+
|
333
|
+
cumn = 0
|
334
|
+
@centroids.each do |_, c|
|
335
|
+
c.mean_cumn = cumn + c.n / 2.0
|
336
|
+
cumn = c.cumn = cumn + c.n
|
337
|
+
end
|
338
|
+
@n = @last_cumulate = cumn
|
339
|
+
nil
|
340
|
+
end
|
341
|
+
# rubocop:enable Metrics/PerceivedComplexity, Metrics/CyclomaticComplexity
|
342
|
+
|
343
|
+
# rubocop:disable Metrics/PerceivedComplexity, Metrics/CyclomaticComplexity
|
344
|
+
# rubocop:disable Metrics/AbcSize
|
345
|
+
def _digest(x, n)
|
346
|
+
# Use 'first' and 'last' instead of min/max because of performance reasons
|
347
|
+
# This works because RBTree is sorted
|
348
|
+
min = @centroids.first
|
349
|
+
max = @centroids.last
|
350
|
+
|
351
|
+
min = min.nil? ? nil : min[1]
|
352
|
+
max = max.nil? ? nil : max[1]
|
353
|
+
nearest = find_nearest(x)
|
354
|
+
|
355
|
+
@n += n
|
356
|
+
|
357
|
+
if nearest && nearest.mean == x
|
358
|
+
_add_weight(nearest, x, n)
|
359
|
+
elsif nearest == min
|
360
|
+
_new_centroid(x, n, 0)
|
361
|
+
elsif nearest == max
|
362
|
+
_new_centroid(x, n, @n)
|
363
|
+
else
|
364
|
+
p = nearest.mean_cumn.to_f / @n
|
365
|
+
max_n = (4 * @n * @delta * p * (1 - p)).floor
|
366
|
+
if max_n - nearest.n >= n
|
367
|
+
_add_weight(nearest, x, n)
|
368
|
+
else
|
369
|
+
_new_centroid(x, n, nearest.cumn)
|
370
|
+
end
|
371
|
+
end
|
372
|
+
|
373
|
+
_cumulate(false)
|
374
|
+
|
375
|
+
# If the number of centroids has grown to a very large size,
|
376
|
+
# it may be due to values being inserted in sorted order.
|
377
|
+
# We combat that by replaying the centroids in random order,
|
378
|
+
# which is what compress! does
|
379
|
+
compress! if @centroids.size > (@k / @delta)
|
380
|
+
|
381
|
+
nil
|
382
|
+
end
|
383
|
+
# rubocop:enable Metrics/PerceivedComplexity, Metrics/CyclomaticComplexity,
|
384
|
+
# rubocop:enable Metrics/AbcSize
|
385
|
+
|
386
|
+
def _new_centroid(x, n, cumn)
|
387
|
+
c = Centroid.new(x, n, cumn)
|
388
|
+
@centroids[x] = c
|
389
|
+
c
|
390
|
+
end
|
391
|
+
end
|
392
|
+
# rubocop:enable Metrics/ClassLength
|
393
|
+
end
|