airbrake-ruby 4.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/lib/airbrake-ruby.rb +513 -0
- data/lib/airbrake-ruby/async_sender.rb +142 -0
- data/lib/airbrake-ruby/backtrace.rb +196 -0
- data/lib/airbrake-ruby/benchmark.rb +39 -0
- data/lib/airbrake-ruby/code_hunk.rb +51 -0
- data/lib/airbrake-ruby/config.rb +229 -0
- data/lib/airbrake-ruby/config/validator.rb +91 -0
- data/lib/airbrake-ruby/deploy_notifier.rb +36 -0
- data/lib/airbrake-ruby/file_cache.rb +48 -0
- data/lib/airbrake-ruby/filter_chain.rb +95 -0
- data/lib/airbrake-ruby/filters/context_filter.rb +29 -0
- data/lib/airbrake-ruby/filters/dependency_filter.rb +31 -0
- data/lib/airbrake-ruby/filters/exception_attributes_filter.rb +46 -0
- data/lib/airbrake-ruby/filters/gem_root_filter.rb +33 -0
- data/lib/airbrake-ruby/filters/git_last_checkout_filter.rb +92 -0
- data/lib/airbrake-ruby/filters/git_repository_filter.rb +64 -0
- data/lib/airbrake-ruby/filters/git_revision_filter.rb +66 -0
- data/lib/airbrake-ruby/filters/keys_blacklist.rb +49 -0
- data/lib/airbrake-ruby/filters/keys_filter.rb +140 -0
- data/lib/airbrake-ruby/filters/keys_whitelist.rb +48 -0
- data/lib/airbrake-ruby/filters/root_directory_filter.rb +28 -0
- data/lib/airbrake-ruby/filters/sql_filter.rb +104 -0
- data/lib/airbrake-ruby/filters/system_exit_filter.rb +23 -0
- data/lib/airbrake-ruby/filters/thread_filter.rb +92 -0
- data/lib/airbrake-ruby/hash_keyable.rb +37 -0
- data/lib/airbrake-ruby/ignorable.rb +44 -0
- data/lib/airbrake-ruby/inspectable.rb +39 -0
- data/lib/airbrake-ruby/loggable.rb +34 -0
- data/lib/airbrake-ruby/monotonic_time.rb +43 -0
- data/lib/airbrake-ruby/nested_exception.rb +38 -0
- data/lib/airbrake-ruby/notice.rb +162 -0
- data/lib/airbrake-ruby/notice_notifier.rb +134 -0
- data/lib/airbrake-ruby/performance_breakdown.rb +45 -0
- data/lib/airbrake-ruby/performance_notifier.rb +125 -0
- data/lib/airbrake-ruby/promise.rb +109 -0
- data/lib/airbrake-ruby/query.rb +53 -0
- data/lib/airbrake-ruby/request.rb +45 -0
- data/lib/airbrake-ruby/response.rb +74 -0
- data/lib/airbrake-ruby/stashable.rb +15 -0
- data/lib/airbrake-ruby/stat.rb +73 -0
- data/lib/airbrake-ruby/sync_sender.rb +113 -0
- data/lib/airbrake-ruby/tdigest.rb +393 -0
- data/lib/airbrake-ruby/time_truncate.rb +17 -0
- data/lib/airbrake-ruby/timed_trace.rb +58 -0
- data/lib/airbrake-ruby/truncator.rb +115 -0
- data/lib/airbrake-ruby/version.rb +6 -0
- data/spec/airbrake_spec.rb +324 -0
- data/spec/async_sender_spec.rb +155 -0
- data/spec/backtrace_spec.rb +427 -0
- data/spec/benchmark_spec.rb +33 -0
- data/spec/code_hunk_spec.rb +115 -0
- data/spec/config/validator_spec.rb +184 -0
- data/spec/config_spec.rb +154 -0
- data/spec/deploy_notifier_spec.rb +48 -0
- data/spec/file_cache.rb +36 -0
- data/spec/filter_chain_spec.rb +92 -0
- data/spec/filters/context_filter_spec.rb +23 -0
- data/spec/filters/dependency_filter_spec.rb +12 -0
- data/spec/filters/exception_attributes_filter_spec.rb +50 -0
- data/spec/filters/gem_root_filter_spec.rb +41 -0
- data/spec/filters/git_last_checkout_filter_spec.rb +46 -0
- data/spec/filters/git_repository_filter.rb +61 -0
- data/spec/filters/git_revision_filter_spec.rb +126 -0
- data/spec/filters/keys_blacklist_spec.rb +225 -0
- data/spec/filters/keys_whitelist_spec.rb +194 -0
- data/spec/filters/root_directory_filter_spec.rb +39 -0
- data/spec/filters/sql_filter_spec.rb +219 -0
- data/spec/filters/system_exit_filter_spec.rb +14 -0
- data/spec/filters/thread_filter_spec.rb +277 -0
- data/spec/fixtures/notroot.txt +7 -0
- data/spec/fixtures/project_root/code.rb +221 -0
- data/spec/fixtures/project_root/empty_file.rb +0 -0
- data/spec/fixtures/project_root/long_line.txt +1 -0
- data/spec/fixtures/project_root/short_file.rb +3 -0
- data/spec/fixtures/project_root/vendor/bundle/ignored_file.rb +5 -0
- data/spec/helpers.rb +9 -0
- data/spec/ignorable_spec.rb +14 -0
- data/spec/inspectable_spec.rb +45 -0
- data/spec/monotonic_time_spec.rb +12 -0
- data/spec/nested_exception_spec.rb +73 -0
- data/spec/notice_notifier_spec.rb +356 -0
- data/spec/notice_notifier_spec/options_spec.rb +259 -0
- data/spec/notice_spec.rb +296 -0
- data/spec/performance_breakdown_spec.rb +12 -0
- data/spec/performance_notifier_spec.rb +435 -0
- data/spec/promise_spec.rb +197 -0
- data/spec/query_spec.rb +11 -0
- data/spec/request_spec.rb +11 -0
- data/spec/response_spec.rb +88 -0
- data/spec/spec_helper.rb +100 -0
- data/spec/stashable_spec.rb +23 -0
- data/spec/stat_spec.rb +47 -0
- data/spec/sync_sender_spec.rb +133 -0
- data/spec/tdigest_spec.rb +230 -0
- data/spec/time_truncate_spec.rb +13 -0
- data/spec/timed_trace_spec.rb +125 -0
- data/spec/truncator_spec.rb +238 -0
- metadata +213 -0
@@ -0,0 +1,45 @@
|
|
1
|
+
module Airbrake
|
2
|
+
# Request holds request data that powers route stats.
|
3
|
+
#
|
4
|
+
# @see Airbrake.notify_request
|
5
|
+
# @api public
|
6
|
+
# @since v3.2.0
|
7
|
+
# rubocop:disable Metrics/BlockLength
|
8
|
+
Request = Struct.new(:method, :route, :status_code, :start_time, :end_time) do
|
9
|
+
include HashKeyable
|
10
|
+
include Ignorable
|
11
|
+
include Stashable
|
12
|
+
|
13
|
+
def initialize(
|
14
|
+
method:,
|
15
|
+
route:,
|
16
|
+
status_code:,
|
17
|
+
start_time:,
|
18
|
+
end_time: Time.now
|
19
|
+
)
|
20
|
+
super(method, route, status_code, start_time, end_time)
|
21
|
+
end
|
22
|
+
|
23
|
+
def destination
|
24
|
+
'routes-stats'
|
25
|
+
end
|
26
|
+
|
27
|
+
def cargo
|
28
|
+
'routes'
|
29
|
+
end
|
30
|
+
|
31
|
+
def groups
|
32
|
+
{}
|
33
|
+
end
|
34
|
+
|
35
|
+
def to_h
|
36
|
+
{
|
37
|
+
'method' => method,
|
38
|
+
'route' => route,
|
39
|
+
'statusCode' => status_code,
|
40
|
+
'time' => TimeTruncate.utc_truncate_minutes(start_time)
|
41
|
+
}.delete_if { |_key, val| val.nil? }
|
42
|
+
end
|
43
|
+
end
|
44
|
+
# rubocop:enable Metrics/BlockLength
|
45
|
+
end
|
@@ -0,0 +1,74 @@
|
|
1
|
+
module Airbrake
|
2
|
+
# Parses responses coming from the Airbrake API. Handles HTTP errors by
|
3
|
+
# logging them.
|
4
|
+
#
|
5
|
+
# @api private
|
6
|
+
# @since v1.0.0
|
7
|
+
module Response
|
8
|
+
# @return [Integer] the limit of the response body
|
9
|
+
TRUNCATE_LIMIT = 100
|
10
|
+
|
11
|
+
# @return [Integer] HTTP code returned when an IP sends over 10k/min notices
|
12
|
+
TOO_MANY_REQUESTS = 429
|
13
|
+
|
14
|
+
class << self
|
15
|
+
include Loggable
|
16
|
+
end
|
17
|
+
|
18
|
+
# Parses HTTP responses from the Airbrake API.
|
19
|
+
#
|
20
|
+
# @param [Net::HTTPResponse] response
|
21
|
+
# @return [Hash{String=>String}] parsed response
|
22
|
+
# rubocop:disable Metrics/MethodLength, Metrics/AbcSize
|
23
|
+
def self.parse(response)
|
24
|
+
code = response.code.to_i
|
25
|
+
body = response.body
|
26
|
+
|
27
|
+
begin
|
28
|
+
case code
|
29
|
+
when 200, 204
|
30
|
+
logger.debug("#{LOG_LABEL} #{name} (#{code}): #{body}")
|
31
|
+
{ response.msg => response.body }
|
32
|
+
when 201
|
33
|
+
parsed_body = JSON.parse(body)
|
34
|
+
logger.debug("#{LOG_LABEL} #{name} (#{code}): #{parsed_body}")
|
35
|
+
parsed_body
|
36
|
+
when 400, 401, 403, 420
|
37
|
+
parsed_body = JSON.parse(body)
|
38
|
+
logger.error("#{LOG_LABEL} #{parsed_body['message']}")
|
39
|
+
parsed_body
|
40
|
+
when TOO_MANY_REQUESTS
|
41
|
+
parsed_body = JSON.parse(body)
|
42
|
+
msg = "#{LOG_LABEL} #{parsed_body['message']}"
|
43
|
+
logger.error(msg)
|
44
|
+
{ 'error' => msg, 'rate_limit_reset' => rate_limit_reset(response) }
|
45
|
+
else
|
46
|
+
body_msg = truncated_body(body)
|
47
|
+
logger.error("#{LOG_LABEL} unexpected code (#{code}). Body: #{body_msg}")
|
48
|
+
{ 'error' => body_msg }
|
49
|
+
end
|
50
|
+
rescue StandardError => ex
|
51
|
+
body_msg = truncated_body(body)
|
52
|
+
logger.error("#{LOG_LABEL} error while parsing body (#{ex}). Body: #{body_msg}")
|
53
|
+
{ 'error' => ex.inspect }
|
54
|
+
end
|
55
|
+
end
|
56
|
+
# rubocop:enable Metrics/MethodLength, Metrics/AbcSize
|
57
|
+
|
58
|
+
def self.truncated_body(body)
|
59
|
+
if body.nil?
|
60
|
+
'[EMPTY_BODY]'.freeze
|
61
|
+
elsif body.length > TRUNCATE_LIMIT
|
62
|
+
body[0..TRUNCATE_LIMIT] << '...'
|
63
|
+
else
|
64
|
+
body
|
65
|
+
end
|
66
|
+
end
|
67
|
+
private_class_method :truncated_body
|
68
|
+
|
69
|
+
def self.rate_limit_reset(response)
|
70
|
+
Time.now + response['X-RateLimit-Delay'].to_i
|
71
|
+
end
|
72
|
+
private_class_method :rate_limit_reset
|
73
|
+
end
|
74
|
+
end
|
@@ -0,0 +1,15 @@
|
|
1
|
+
module Airbrake
|
2
|
+
# Stashable should be included in any class that wants the ability to stash
|
3
|
+
# arbitrary objects. It is mainly used by data objects that users can access
|
4
|
+
# through filters.
|
5
|
+
#
|
6
|
+
# @since v4.4.0
|
7
|
+
# @api private
|
8
|
+
module Stashable
|
9
|
+
# @return [Hash{Symbol=>Object}] the hash with arbitrary objects to be used
|
10
|
+
# in filters
|
11
|
+
def stash
|
12
|
+
@stash ||= {}
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
@@ -0,0 +1,73 @@
|
|
1
|
+
require 'base64'
|
2
|
+
|
3
|
+
# rubocop:disable Metrics/BlockLength
|
4
|
+
module Airbrake
|
5
|
+
# Stat is a data structure that allows accumulating performance data (route
|
6
|
+
# performance, SQL query performance and such). It's powered by TDigests.
|
7
|
+
#
|
8
|
+
# Usually, one Stat corresponds to one resource (route or query,
|
9
|
+
# etc.). Incrementing a stat means pushing new performance statistics.
|
10
|
+
#
|
11
|
+
# @example
|
12
|
+
# stat = Airbrake::Stat.new
|
13
|
+
# stat.increment(Time.now - 200)
|
14
|
+
# stat.to_h # Pack and serialize data so it can be transmitted.
|
15
|
+
#
|
16
|
+
# @since v3.2.0
|
17
|
+
Stat = Struct.new(:count, :sum, :sumsq, :tdigest) do
|
18
|
+
# @param [Integer] count How many times this stat was incremented
|
19
|
+
# @param [Float] sum The sum of duration in milliseconds
|
20
|
+
# @param [Float] sumsq The squared sum of duration in milliseconds
|
21
|
+
# @param [TDigest::TDigest] tdigest Packed durations. By default,
|
22
|
+
# compression is 20
|
23
|
+
def initialize(count: 0, sum: 0.0, sumsq: 0.0, tdigest: TDigest.new(0.05))
|
24
|
+
super(count, sum, sumsq, tdigest)
|
25
|
+
end
|
26
|
+
|
27
|
+
# @return [Hash{String=>Object}] stats as a hash with compressed TDigest
|
28
|
+
# (serialized as base64)
|
29
|
+
def to_h
|
30
|
+
tdigest.compress!
|
31
|
+
{
|
32
|
+
'count' => count,
|
33
|
+
'sum' => sum,
|
34
|
+
'sumsq' => sumsq,
|
35
|
+
'tdigest' => Base64.strict_encode64(tdigest.as_small_bytes)
|
36
|
+
}
|
37
|
+
end
|
38
|
+
|
39
|
+
# Increments count and updates performance with the difference of +end_time+
|
40
|
+
# and +start_time+.
|
41
|
+
#
|
42
|
+
# @param [Date] start_time
|
43
|
+
# @param [Date] end_time
|
44
|
+
# @return [void]
|
45
|
+
def increment(start_time, end_time = nil)
|
46
|
+
end_time ||= Time.new
|
47
|
+
increment_ms((end_time - start_time) * 1000)
|
48
|
+
end
|
49
|
+
|
50
|
+
# Increments count and updates performance with given +ms+ value.
|
51
|
+
#
|
52
|
+
# @param [Float] ms
|
53
|
+
# @return [void]
|
54
|
+
def increment_ms(ms)
|
55
|
+
self.count += 1
|
56
|
+
|
57
|
+
self.sum += ms
|
58
|
+
self.sumsq += ms * ms
|
59
|
+
|
60
|
+
tdigest.push(ms)
|
61
|
+
end
|
62
|
+
|
63
|
+
# We define custom inspect so that we weed out uninformative TDigest, which
|
64
|
+
# is also very slow to dump when we log Airbrake::Stat.
|
65
|
+
#
|
66
|
+
# @return [String]
|
67
|
+
def inspect
|
68
|
+
"#<struct Airbrake::Stat count=#{count}, sum=#{sum}, sumsq=#{sumsq}>"
|
69
|
+
end
|
70
|
+
alias_method :pretty_print, :inspect
|
71
|
+
end
|
72
|
+
end
|
73
|
+
# rubocop:enable Metrics/BlockLength
|
@@ -0,0 +1,113 @@
|
|
1
|
+
module Airbrake
|
2
|
+
# Responsible for sending data to Airbrake synchronously via PUT or POST
|
3
|
+
# methods. Supports proxies.
|
4
|
+
#
|
5
|
+
# @see AsyncSender
|
6
|
+
# @api private
|
7
|
+
# @since v1.0.0
|
8
|
+
class SyncSender
|
9
|
+
# @return [String] body for HTTP requests
|
10
|
+
CONTENT_TYPE = 'application/json'.freeze
|
11
|
+
|
12
|
+
include Loggable
|
13
|
+
|
14
|
+
# @param [Symbol] method HTTP method to use to send payload
|
15
|
+
def initialize(method = :post)
|
16
|
+
@config = Airbrake::Config.instance
|
17
|
+
@method = method
|
18
|
+
@rate_limit_reset = Time.now
|
19
|
+
end
|
20
|
+
|
21
|
+
# Sends a POST or PUT request to the given +endpoint+ with the +data+ payload.
|
22
|
+
#
|
23
|
+
# @param [#to_json] data
|
24
|
+
# @param [URI::HTTPS] endpoint
|
25
|
+
# @return [Hash{String=>String}] the parsed HTTP response
|
26
|
+
def send(data, promise, endpoint = @config.endpoint)
|
27
|
+
return promise if rate_limited_ip?(promise)
|
28
|
+
|
29
|
+
response = nil
|
30
|
+
req = build_request(endpoint, data)
|
31
|
+
|
32
|
+
return promise if missing_body?(req, promise)
|
33
|
+
|
34
|
+
https = build_https(endpoint)
|
35
|
+
|
36
|
+
begin
|
37
|
+
response = https.request(req)
|
38
|
+
rescue StandardError => ex
|
39
|
+
reason = "#{LOG_LABEL} HTTP error: #{ex}"
|
40
|
+
logger.error(reason)
|
41
|
+
return promise.reject(reason)
|
42
|
+
end
|
43
|
+
|
44
|
+
parsed_resp = Response.parse(response)
|
45
|
+
if parsed_resp.key?('rate_limit_reset')
|
46
|
+
@rate_limit_reset = parsed_resp['rate_limit_reset']
|
47
|
+
end
|
48
|
+
|
49
|
+
return promise.reject(parsed_resp['error']) if parsed_resp.key?('error')
|
50
|
+
promise.resolve(parsed_resp)
|
51
|
+
end
|
52
|
+
|
53
|
+
private
|
54
|
+
|
55
|
+
def build_https(uri)
|
56
|
+
Net::HTTP.new(uri.host, uri.port, *proxy_params).tap do |https|
|
57
|
+
https.use_ssl = uri.is_a?(URI::HTTPS)
|
58
|
+
if @config.timeout
|
59
|
+
https.open_timeout = @config.timeout
|
60
|
+
https.read_timeout = @config.timeout
|
61
|
+
end
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
65
|
+
def build_request(uri, data)
|
66
|
+
req =
|
67
|
+
if @method == :put
|
68
|
+
Net::HTTP::Put.new(uri.request_uri)
|
69
|
+
else
|
70
|
+
Net::HTTP::Post.new(uri.request_uri)
|
71
|
+
end
|
72
|
+
|
73
|
+
build_request_body(req, data)
|
74
|
+
end
|
75
|
+
|
76
|
+
def build_request_body(req, data)
|
77
|
+
req.body = data.to_json
|
78
|
+
|
79
|
+
req['Authorization'] = "Bearer #{@config.project_key}"
|
80
|
+
req['Content-Type'] = CONTENT_TYPE
|
81
|
+
req['User-Agent'] =
|
82
|
+
"#{Airbrake::Notice::NOTIFIER[:name]}/#{Airbrake::AIRBRAKE_RUBY_VERSION}" \
|
83
|
+
" Ruby/#{RUBY_VERSION}"
|
84
|
+
|
85
|
+
req
|
86
|
+
end
|
87
|
+
|
88
|
+
def proxy_params
|
89
|
+
return unless @config.proxy.key?(:host)
|
90
|
+
|
91
|
+
[@config.proxy[:host], @config.proxy[:port], @config.proxy[:user],
|
92
|
+
@config.proxy[:password]]
|
93
|
+
end
|
94
|
+
|
95
|
+
def rate_limited_ip?(promise)
|
96
|
+
rate_limited = Time.now < @rate_limit_reset
|
97
|
+
promise.reject("#{LOG_LABEL} IP is rate limited") if rate_limited
|
98
|
+
rate_limited
|
99
|
+
end
|
100
|
+
|
101
|
+
def missing_body?(req, promise)
|
102
|
+
missing = req.body.nil?
|
103
|
+
|
104
|
+
if missing
|
105
|
+
reason = "#{LOG_LABEL} data was not sent because of missing body"
|
106
|
+
logger.error(reason)
|
107
|
+
promise.reject(reason)
|
108
|
+
end
|
109
|
+
|
110
|
+
missing
|
111
|
+
end
|
112
|
+
end
|
113
|
+
end
|
@@ -0,0 +1,393 @@
|
|
1
|
+
require 'rbtree'
|
2
|
+
|
3
|
+
module Airbrake
|
4
|
+
# Ruby implementation of Ted Dunning's t-digest data structure.
|
5
|
+
#
|
6
|
+
# This implementation is imported from https://github.com/castle/tdigest with
|
7
|
+
# custom modifications. Huge thanks to Castle for the implementation :beer:
|
8
|
+
#
|
9
|
+
# The difference is that we pack with Big Endian (unlike Native Endian in
|
10
|
+
# Castle's version). Our backend does not permit little endian.
|
11
|
+
#
|
12
|
+
# @see https://github.com/tdunning/t-digest
|
13
|
+
# @see https://github.com/castle/tdigest
|
14
|
+
# @api private
|
15
|
+
# @since v3.2.0
|
16
|
+
#
|
17
|
+
# rubocop:disable Metrics/ClassLength
|
18
|
+
class TDigest
|
19
|
+
VERBOSE_ENCODING = 1
|
20
|
+
SMALL_ENCODING = 2
|
21
|
+
|
22
|
+
# Centroid represents a number of data points.
|
23
|
+
# @api private
|
24
|
+
# @since v3.2.0
|
25
|
+
class Centroid
|
26
|
+
attr_accessor :mean, :n, :cumn, :mean_cumn
|
27
|
+
def initialize(mean, n, cumn, mean_cumn = nil)
|
28
|
+
@mean = mean
|
29
|
+
@n = n
|
30
|
+
@cumn = cumn
|
31
|
+
@mean_cumn = mean_cumn
|
32
|
+
end
|
33
|
+
|
34
|
+
def as_json(_ = nil)
|
35
|
+
{ m: mean, n: n }
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
attr_accessor :centroids
|
40
|
+
def initialize(delta = 0.01, k = 25, cx = 1.1)
|
41
|
+
@delta = delta
|
42
|
+
@k = k
|
43
|
+
@cx = cx
|
44
|
+
@centroids = RBTree.new
|
45
|
+
@nreset = 0
|
46
|
+
@n = 0
|
47
|
+
reset!
|
48
|
+
end
|
49
|
+
|
50
|
+
def +(other)
|
51
|
+
# Uses delta, k and cx from the caller
|
52
|
+
t = self.class.new(@delta, @k, @cx)
|
53
|
+
data = centroids.values + other.centroids.values
|
54
|
+
t.push_centroid(data.delete_at(rand(data.length))) while data.any?
|
55
|
+
t
|
56
|
+
end
|
57
|
+
|
58
|
+
def as_bytes
|
59
|
+
# compression as defined by Java implementation
|
60
|
+
size = @centroids.size
|
61
|
+
output = [VERBOSE_ENCODING, compression, size]
|
62
|
+
output += @centroids.map { |_, c| c.mean }
|
63
|
+
output += @centroids.map { |_, c| c.n }
|
64
|
+
output.pack("NGNG#{size}N#{size}")
|
65
|
+
end
|
66
|
+
|
67
|
+
# rubocop:disable Metrics/AbcSize
|
68
|
+
def as_small_bytes
|
69
|
+
size = @centroids.size
|
70
|
+
output = [self.class::SMALL_ENCODING, compression, size]
|
71
|
+
x = 0
|
72
|
+
# delta encoding allows saving 4-bytes floats
|
73
|
+
mean_arr = @centroids.map do |_, c|
|
74
|
+
val = c.mean - x
|
75
|
+
x = c.mean
|
76
|
+
val
|
77
|
+
end
|
78
|
+
output += mean_arr
|
79
|
+
# Variable length encoding of numbers
|
80
|
+
c_arr = @centroids.each_with_object([]) do |(_, c), arr|
|
81
|
+
k = 0
|
82
|
+
n = c.n
|
83
|
+
while n < 0 || n > 0x7f
|
84
|
+
b = 0x80 | (0x7f & n)
|
85
|
+
arr << b
|
86
|
+
n = n >> 7
|
87
|
+
k += 1
|
88
|
+
raise 'Unreasonable large number' if k > 6
|
89
|
+
end
|
90
|
+
arr << n
|
91
|
+
end
|
92
|
+
output += c_arr
|
93
|
+
output.pack("NGNg#{size}C#{size}")
|
94
|
+
end
|
95
|
+
# rubocop:enable Metrics/AbcSize
|
96
|
+
|
97
|
+
def as_json(_ = nil)
|
98
|
+
@centroids.map { |_, c| c.as_json }
|
99
|
+
end
|
100
|
+
|
101
|
+
def bound_mean(x)
|
102
|
+
upper = @centroids.upper_bound(x)
|
103
|
+
lower = @centroids.lower_bound(x)
|
104
|
+
[lower[1], upper[1]]
|
105
|
+
end
|
106
|
+
|
107
|
+
def bound_mean_cumn(cumn)
|
108
|
+
last_c = nil
|
109
|
+
bounds = []
|
110
|
+
@centroids.each_value do |v|
|
111
|
+
if v.mean_cumn == cumn
|
112
|
+
bounds << v
|
113
|
+
break
|
114
|
+
elsif v.mean_cumn > cumn
|
115
|
+
bounds << last_c
|
116
|
+
bounds << v
|
117
|
+
break
|
118
|
+
else
|
119
|
+
last_c = v
|
120
|
+
end
|
121
|
+
end
|
122
|
+
# If still no results, pick lagging value if any
|
123
|
+
bounds << last_c if bounds.empty? && !last_c.nil?
|
124
|
+
|
125
|
+
bounds
|
126
|
+
end
|
127
|
+
|
128
|
+
def compress!
|
129
|
+
points = to_a
|
130
|
+
reset!
|
131
|
+
push_centroid(points.shuffle)
|
132
|
+
_cumulate(true, true)
|
133
|
+
nil
|
134
|
+
end
|
135
|
+
|
136
|
+
def compression
|
137
|
+
1 / @delta
|
138
|
+
end
|
139
|
+
|
140
|
+
def find_nearest(x)
|
141
|
+
return nil if size == 0
|
142
|
+
|
143
|
+
ceil = @centroids.upper_bound(x)
|
144
|
+
floor = @centroids.lower_bound(x)
|
145
|
+
|
146
|
+
return floor[1] if ceil.nil?
|
147
|
+
return ceil[1] if floor.nil?
|
148
|
+
|
149
|
+
ceil_key = ceil[0]
|
150
|
+
floor_key = floor[0]
|
151
|
+
|
152
|
+
if (floor_key - x).abs < (ceil_key - x).abs
|
153
|
+
floor[1]
|
154
|
+
else
|
155
|
+
ceil[1]
|
156
|
+
end
|
157
|
+
end
|
158
|
+
|
159
|
+
def merge!(other)
|
160
|
+
push_centroid(other.centroids.values.shuffle)
|
161
|
+
self
|
162
|
+
end
|
163
|
+
|
164
|
+
# rubocop:disable Metrics/PerceivedComplexity, Metrics/AbcSize
|
165
|
+
# rubocop:disable Metrics/CyclomaticComplexity
|
166
|
+
def p_rank(x)
|
167
|
+
is_array = x.is_a? Array
|
168
|
+
x = [x] unless is_array
|
169
|
+
|
170
|
+
min = @centroids.first
|
171
|
+
max = @centroids.last
|
172
|
+
|
173
|
+
x.map! do |item|
|
174
|
+
if size == 0
|
175
|
+
nil
|
176
|
+
elsif item < min[1].mean
|
177
|
+
0.0
|
178
|
+
elsif item > max[1].mean
|
179
|
+
1.0
|
180
|
+
else
|
181
|
+
_cumulate(true)
|
182
|
+
bound = bound_mean(item)
|
183
|
+
lower, upper = bound
|
184
|
+
mean_cumn = lower.mean_cumn
|
185
|
+
if lower != upper
|
186
|
+
mean_cumn += (item - lower.mean) * (upper.mean_cumn - lower.mean_cumn) \
|
187
|
+
/ (upper.mean - lower.mean)
|
188
|
+
end
|
189
|
+
mean_cumn / @n
|
190
|
+
end
|
191
|
+
end
|
192
|
+
is_array ? x : x.first
|
193
|
+
end
|
194
|
+
# rubocop:enable Metrics/PerceivedComplexity, Metrics/AbcSize
|
195
|
+
# rubocop:enable Metrics/CyclomaticComplexity
|
196
|
+
|
197
|
+
# rubocop:disable Metrics/PerceivedComplexity, Metrics/CyclomaticComplexity
|
198
|
+
# rubocop:disable Metrics/AbcSize
|
199
|
+
def percentile(p)
|
200
|
+
is_array = p.is_a? Array
|
201
|
+
p = [p] unless is_array
|
202
|
+
p.map! do |item|
|
203
|
+
unless (0..1).cover?(item)
|
204
|
+
raise ArgumentError, "p should be in [0,1], got #{item}"
|
205
|
+
end
|
206
|
+
if size == 0
|
207
|
+
nil
|
208
|
+
else
|
209
|
+
_cumulate(true)
|
210
|
+
h = @n * item
|
211
|
+
lower, upper = bound_mean_cumn(h)
|
212
|
+
if lower.nil? && upper.nil?
|
213
|
+
nil
|
214
|
+
elsif upper == lower || lower.nil? || upper.nil?
|
215
|
+
(lower || upper).mean
|
216
|
+
elsif h == lower.mean_cumn
|
217
|
+
lower.mean
|
218
|
+
else
|
219
|
+
upper.mean
|
220
|
+
end
|
221
|
+
end
|
222
|
+
end
|
223
|
+
is_array ? p : p.first
|
224
|
+
end
|
225
|
+
# rubocop:enable Metrics/PerceivedComplexity, Metrics/CyclomaticComplexity
|
226
|
+
# rubocop:enable Metrics/AbcSize
|
227
|
+
|
228
|
+
def push(x, n = 1)
|
229
|
+
x = [x] unless x.is_a? Array
|
230
|
+
x.each { |value| _digest(value, n) }
|
231
|
+
end
|
232
|
+
|
233
|
+
def push_centroid(c)
|
234
|
+
c = [c] unless c.is_a? Array
|
235
|
+
c.each { |centroid| _digest(centroid.mean, centroid.n) }
|
236
|
+
end
|
237
|
+
|
238
|
+
def reset!
|
239
|
+
@centroids.clear
|
240
|
+
@n = 0
|
241
|
+
@nreset += 1
|
242
|
+
@last_cumulate = 0
|
243
|
+
end
|
244
|
+
|
245
|
+
def size
|
246
|
+
@n || 0
|
247
|
+
end
|
248
|
+
|
249
|
+
def to_a
|
250
|
+
@centroids.map { |_, c| c }
|
251
|
+
end
|
252
|
+
|
253
|
+
# rubocop:disable Metrics/PerceivedComplexity, Metrics/MethodLength
|
254
|
+
# rubocop:disable Metrics/CyclomaticComplexity, Metrics/AbcSize
|
255
|
+
def self.from_bytes(bytes)
|
256
|
+
format, compression, size = bytes.unpack('NGN')
|
257
|
+
tdigest = new(1 / compression)
|
258
|
+
|
259
|
+
start_idx = 16 # after header
|
260
|
+
case format
|
261
|
+
when VERBOSE_ENCODING
|
262
|
+
array = bytes[start_idx..-1].unpack("G#{size}N#{size}")
|
263
|
+
means, counts = array.each_slice(size).to_a if array.any?
|
264
|
+
when SMALL_ENCODING
|
265
|
+
means = bytes[start_idx..(start_idx + 4 * size)].unpack("g#{size}")
|
266
|
+
# Decode delta encoding of means
|
267
|
+
x = 0
|
268
|
+
means.map! do |m|
|
269
|
+
m += x
|
270
|
+
x = m
|
271
|
+
m
|
272
|
+
end
|
273
|
+
counts_bytes = bytes[(start_idx + 4 * size)..-1].unpack('C*')
|
274
|
+
counts = []
|
275
|
+
# Decode variable length integer bytes
|
276
|
+
size.times do
|
277
|
+
v = counts_bytes.shift
|
278
|
+
z = 0x7f & v
|
279
|
+
shift = 7
|
280
|
+
while (v & 0x80) != 0
|
281
|
+
raise 'Shift too large in decode' if shift > 28
|
282
|
+
v = counts_bytes.shift || 0
|
283
|
+
z += (v & 0x7f) << shift
|
284
|
+
shift += 7
|
285
|
+
end
|
286
|
+
counts << z
|
287
|
+
end
|
288
|
+
# This shouldn't happen
|
289
|
+
raise 'Mismatch' unless counts.size == means.size
|
290
|
+
else
|
291
|
+
raise 'Unknown compression format'
|
292
|
+
end
|
293
|
+
|
294
|
+
means.zip(counts).each { |val| tdigest.push(val[0], val[1]) } if means && counts
|
295
|
+
|
296
|
+
tdigest
|
297
|
+
end
|
298
|
+
# rubocop:enable Metrics/PerceivedComplexity, Metrics/MethodLength
|
299
|
+
# rubocop:enable Metrics/CyclomaticComplexity, Metrics/AbcSize
|
300
|
+
|
301
|
+
def self.from_json(array)
|
302
|
+
tdigest = new
|
303
|
+
# Handle both string and symbol keys
|
304
|
+
array.each { |a| tdigest.push(a['m'] || a[:m], a['n'] || a[:n]) }
|
305
|
+
tdigest
|
306
|
+
end
|
307
|
+
|
308
|
+
private
|
309
|
+
|
310
|
+
def _add_weight(nearest, x, n)
|
311
|
+
nearest.mean += n * (x - nearest.mean) / (nearest.n + n) unless x == nearest.mean
|
312
|
+
|
313
|
+
_cumulate(false, true) if nearest.mean_cumn.nil?
|
314
|
+
|
315
|
+
nearest.cumn += n
|
316
|
+
nearest.mean_cumn += n / 2.0
|
317
|
+
nearest.n += n
|
318
|
+
|
319
|
+
nil
|
320
|
+
end
|
321
|
+
|
322
|
+
# rubocop:disable Metrics/PerceivedComplexity, Metrics/CyclomaticComplexity
|
323
|
+
def _cumulate(exact = false, force = false)
|
324
|
+
unless force
|
325
|
+
factor = if @last_cumulate == 0
|
326
|
+
Float::INFINITY
|
327
|
+
else
|
328
|
+
(@n.to_f / @last_cumulate)
|
329
|
+
end
|
330
|
+
return if @n == @last_cumulate || (!exact && @cx && @cx > factor)
|
331
|
+
end
|
332
|
+
|
333
|
+
cumn = 0
|
334
|
+
@centroids.each do |_, c|
|
335
|
+
c.mean_cumn = cumn + c.n / 2.0
|
336
|
+
cumn = c.cumn = cumn + c.n
|
337
|
+
end
|
338
|
+
@n = @last_cumulate = cumn
|
339
|
+
nil
|
340
|
+
end
|
341
|
+
# rubocop:enable Metrics/PerceivedComplexity, Metrics/CyclomaticComplexity
|
342
|
+
|
343
|
+
# rubocop:disable Metrics/PerceivedComplexity, Metrics/CyclomaticComplexity
|
344
|
+
# rubocop:disable Metrics/AbcSize
|
345
|
+
def _digest(x, n)
|
346
|
+
# Use 'first' and 'last' instead of min/max because of performance reasons
|
347
|
+
# This works because RBTree is sorted
|
348
|
+
min = @centroids.first
|
349
|
+
max = @centroids.last
|
350
|
+
|
351
|
+
min = min.nil? ? nil : min[1]
|
352
|
+
max = max.nil? ? nil : max[1]
|
353
|
+
nearest = find_nearest(x)
|
354
|
+
|
355
|
+
@n += n
|
356
|
+
|
357
|
+
if nearest && nearest.mean == x
|
358
|
+
_add_weight(nearest, x, n)
|
359
|
+
elsif nearest == min
|
360
|
+
_new_centroid(x, n, 0)
|
361
|
+
elsif nearest == max
|
362
|
+
_new_centroid(x, n, @n)
|
363
|
+
else
|
364
|
+
p = nearest.mean_cumn.to_f / @n
|
365
|
+
max_n = (4 * @n * @delta * p * (1 - p)).floor
|
366
|
+
if max_n - nearest.n >= n
|
367
|
+
_add_weight(nearest, x, n)
|
368
|
+
else
|
369
|
+
_new_centroid(x, n, nearest.cumn)
|
370
|
+
end
|
371
|
+
end
|
372
|
+
|
373
|
+
_cumulate(false)
|
374
|
+
|
375
|
+
# If the number of centroids has grown to a very large size,
|
376
|
+
# it may be due to values being inserted in sorted order.
|
377
|
+
# We combat that by replaying the centroids in random order,
|
378
|
+
# which is what compress! does
|
379
|
+
compress! if @centroids.size > (@k / @delta)
|
380
|
+
|
381
|
+
nil
|
382
|
+
end
|
383
|
+
# rubocop:enable Metrics/PerceivedComplexity, Metrics/CyclomaticComplexity,
|
384
|
+
# rubocop:enable Metrics/AbcSize
|
385
|
+
|
386
|
+
def _new_centroid(x, n, cumn)
|
387
|
+
c = Centroid.new(x, n, cumn)
|
388
|
+
@centroids[x] = c
|
389
|
+
c
|
390
|
+
end
|
391
|
+
end
|
392
|
+
# rubocop:enable Metrics/ClassLength
|
393
|
+
end
|