airbrake-ruby 4.8.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/lib/airbrake-ruby.rb +515 -0
- data/lib/airbrake-ruby/async_sender.rb +80 -0
- data/lib/airbrake-ruby/backtrace.rb +196 -0
- data/lib/airbrake-ruby/benchmark.rb +39 -0
- data/lib/airbrake-ruby/code_hunk.rb +51 -0
- data/lib/airbrake-ruby/config.rb +229 -0
- data/lib/airbrake-ruby/config/validator.rb +91 -0
- data/lib/airbrake-ruby/deploy_notifier.rb +36 -0
- data/lib/airbrake-ruby/file_cache.rb +54 -0
- data/lib/airbrake-ruby/filter_chain.rb +95 -0
- data/lib/airbrake-ruby/filters/context_filter.rb +29 -0
- data/lib/airbrake-ruby/filters/dependency_filter.rb +31 -0
- data/lib/airbrake-ruby/filters/exception_attributes_filter.rb +46 -0
- data/lib/airbrake-ruby/filters/gem_root_filter.rb +33 -0
- data/lib/airbrake-ruby/filters/git_last_checkout_filter.rb +92 -0
- data/lib/airbrake-ruby/filters/git_repository_filter.rb +64 -0
- data/lib/airbrake-ruby/filters/git_revision_filter.rb +66 -0
- data/lib/airbrake-ruby/filters/keys_blacklist.rb +49 -0
- data/lib/airbrake-ruby/filters/keys_filter.rb +140 -0
- data/lib/airbrake-ruby/filters/keys_whitelist.rb +48 -0
- data/lib/airbrake-ruby/filters/root_directory_filter.rb +28 -0
- data/lib/airbrake-ruby/filters/sql_filter.rb +128 -0
- data/lib/airbrake-ruby/filters/system_exit_filter.rb +23 -0
- data/lib/airbrake-ruby/filters/thread_filter.rb +92 -0
- data/lib/airbrake-ruby/hash_keyable.rb +37 -0
- data/lib/airbrake-ruby/ignorable.rb +44 -0
- data/lib/airbrake-ruby/inspectable.rb +39 -0
- data/lib/airbrake-ruby/loggable.rb +34 -0
- data/lib/airbrake-ruby/monotonic_time.rb +43 -0
- data/lib/airbrake-ruby/nested_exception.rb +38 -0
- data/lib/airbrake-ruby/notice.rb +162 -0
- data/lib/airbrake-ruby/notice_notifier.rb +134 -0
- data/lib/airbrake-ruby/performance_breakdown.rb +46 -0
- data/lib/airbrake-ruby/performance_notifier.rb +155 -0
- data/lib/airbrake-ruby/promise.rb +109 -0
- data/lib/airbrake-ruby/query.rb +54 -0
- data/lib/airbrake-ruby/request.rb +46 -0
- data/lib/airbrake-ruby/response.rb +74 -0
- data/lib/airbrake-ruby/stashable.rb +15 -0
- data/lib/airbrake-ruby/stat.rb +73 -0
- data/lib/airbrake-ruby/sync_sender.rb +113 -0
- data/lib/airbrake-ruby/tdigest.rb +393 -0
- data/lib/airbrake-ruby/thread_pool.rb +128 -0
- data/lib/airbrake-ruby/time_truncate.rb +17 -0
- data/lib/airbrake-ruby/timed_trace.rb +58 -0
- data/lib/airbrake-ruby/truncator.rb +115 -0
- data/lib/airbrake-ruby/version.rb +6 -0
- data/spec/airbrake_spec.rb +324 -0
- data/spec/async_sender_spec.rb +72 -0
- data/spec/backtrace_spec.rb +427 -0
- data/spec/benchmark_spec.rb +33 -0
- data/spec/code_hunk_spec.rb +115 -0
- data/spec/config/validator_spec.rb +184 -0
- data/spec/config_spec.rb +154 -0
- data/spec/deploy_notifier_spec.rb +48 -0
- data/spec/file_cache_spec.rb +34 -0
- data/spec/filter_chain_spec.rb +92 -0
- data/spec/filters/context_filter_spec.rb +23 -0
- data/spec/filters/dependency_filter_spec.rb +12 -0
- data/spec/filters/exception_attributes_filter_spec.rb +50 -0
- data/spec/filters/gem_root_filter_spec.rb +41 -0
- data/spec/filters/git_last_checkout_filter_spec.rb +46 -0
- data/spec/filters/git_repository_filter.rb +61 -0
- data/spec/filters/git_revision_filter_spec.rb +126 -0
- data/spec/filters/keys_blacklist_spec.rb +225 -0
- data/spec/filters/keys_whitelist_spec.rb +194 -0
- data/spec/filters/root_directory_filter_spec.rb +39 -0
- data/spec/filters/sql_filter_spec.rb +276 -0
- data/spec/filters/system_exit_filter_spec.rb +14 -0
- data/spec/filters/thread_filter_spec.rb +277 -0
- data/spec/fixtures/notroot.txt +7 -0
- data/spec/fixtures/project_root/code.rb +221 -0
- data/spec/fixtures/project_root/empty_file.rb +0 -0
- data/spec/fixtures/project_root/long_line.txt +1 -0
- data/spec/fixtures/project_root/short_file.rb +3 -0
- data/spec/fixtures/project_root/vendor/bundle/ignored_file.rb +5 -0
- data/spec/helpers.rb +9 -0
- data/spec/ignorable_spec.rb +14 -0
- data/spec/inspectable_spec.rb +45 -0
- data/spec/monotonic_time_spec.rb +12 -0
- data/spec/nested_exception_spec.rb +73 -0
- data/spec/notice_notifier/options_spec.rb +259 -0
- data/spec/notice_notifier_spec.rb +356 -0
- data/spec/notice_spec.rb +296 -0
- data/spec/performance_breakdown_spec.rb +12 -0
- data/spec/performance_notifier_spec.rb +491 -0
- data/spec/promise_spec.rb +197 -0
- data/spec/query_spec.rb +11 -0
- data/spec/request_spec.rb +11 -0
- data/spec/response_spec.rb +88 -0
- data/spec/spec_helper.rb +100 -0
- data/spec/stashable_spec.rb +23 -0
- data/spec/stat_spec.rb +47 -0
- data/spec/sync_sender_spec.rb +133 -0
- data/spec/tdigest_spec.rb +230 -0
- data/spec/thread_pool_spec.rb +158 -0
- data/spec/time_truncate_spec.rb +13 -0
- data/spec/timed_trace_spec.rb +125 -0
- data/spec/truncator_spec.rb +238 -0
- metadata +216 -0
@@ -0,0 +1,46 @@
|
|
1
|
+
module Airbrake
|
2
|
+
# Request holds request data that powers route stats.
|
3
|
+
#
|
4
|
+
# @see Airbrake.notify_request
|
5
|
+
# @api public
|
6
|
+
# @since v3.2.0
|
7
|
+
# rubocop:disable Metrics/BlockLength
|
8
|
+
Request = Struct.new(:method, :route, :status_code, :start_time, :end_time) do
|
9
|
+
include HashKeyable
|
10
|
+
include Ignorable
|
11
|
+
include Stashable
|
12
|
+
|
13
|
+
def initialize(
|
14
|
+
method:,
|
15
|
+
route:,
|
16
|
+
status_code:,
|
17
|
+
start_time:,
|
18
|
+
end_time: Time.now
|
19
|
+
)
|
20
|
+
@start_time_utc = TimeTruncate.utc_truncate_minutes(start_time)
|
21
|
+
super(method, route, status_code, start_time, end_time)
|
22
|
+
end
|
23
|
+
|
24
|
+
def destination
|
25
|
+
'routes-stats'
|
26
|
+
end
|
27
|
+
|
28
|
+
def cargo
|
29
|
+
'routes'
|
30
|
+
end
|
31
|
+
|
32
|
+
def groups
|
33
|
+
{}
|
34
|
+
end
|
35
|
+
|
36
|
+
def to_h
|
37
|
+
{
|
38
|
+
'method' => method,
|
39
|
+
'route' => route,
|
40
|
+
'statusCode' => status_code,
|
41
|
+
'time' => @start_time_utc
|
42
|
+
}.delete_if { |_key, val| val.nil? }
|
43
|
+
end
|
44
|
+
end
|
45
|
+
# rubocop:enable Metrics/BlockLength
|
46
|
+
end
|
@@ -0,0 +1,74 @@
|
|
1
|
+
module Airbrake
|
2
|
+
# Parses responses coming from the Airbrake API. Handles HTTP errors by
|
3
|
+
# logging them.
|
4
|
+
#
|
5
|
+
# @api private
|
6
|
+
# @since v1.0.0
|
7
|
+
module Response
|
8
|
+
# @return [Integer] the limit of the response body
|
9
|
+
TRUNCATE_LIMIT = 100
|
10
|
+
|
11
|
+
# @return [Integer] HTTP code returned when an IP sends over 10k/min notices
|
12
|
+
TOO_MANY_REQUESTS = 429
|
13
|
+
|
14
|
+
class << self
|
15
|
+
include Loggable
|
16
|
+
end
|
17
|
+
|
18
|
+
# Parses HTTP responses from the Airbrake API.
|
19
|
+
#
|
20
|
+
# @param [Net::HTTPResponse] response
|
21
|
+
# @return [Hash{String=>String}] parsed response
|
22
|
+
# rubocop:disable Metrics/MethodLength, Metrics/AbcSize
|
23
|
+
def self.parse(response)
|
24
|
+
code = response.code.to_i
|
25
|
+
body = response.body
|
26
|
+
|
27
|
+
begin
|
28
|
+
case code
|
29
|
+
when 200, 204
|
30
|
+
logger.debug("#{LOG_LABEL} #{name} (#{code}): #{body}")
|
31
|
+
{ response.msg => response.body }
|
32
|
+
when 201
|
33
|
+
parsed_body = JSON.parse(body)
|
34
|
+
logger.debug("#{LOG_LABEL} #{name} (#{code}): #{parsed_body}")
|
35
|
+
parsed_body
|
36
|
+
when 400, 401, 403, 420
|
37
|
+
parsed_body = JSON.parse(body)
|
38
|
+
logger.error("#{LOG_LABEL} #{parsed_body['message']}")
|
39
|
+
parsed_body
|
40
|
+
when TOO_MANY_REQUESTS
|
41
|
+
parsed_body = JSON.parse(body)
|
42
|
+
msg = "#{LOG_LABEL} #{parsed_body['message']}"
|
43
|
+
logger.error(msg)
|
44
|
+
{ 'error' => msg, 'rate_limit_reset' => rate_limit_reset(response) }
|
45
|
+
else
|
46
|
+
body_msg = truncated_body(body)
|
47
|
+
logger.error("#{LOG_LABEL} unexpected code (#{code}). Body: #{body_msg}")
|
48
|
+
{ 'error' => body_msg }
|
49
|
+
end
|
50
|
+
rescue StandardError => ex
|
51
|
+
body_msg = truncated_body(body)
|
52
|
+
logger.error("#{LOG_LABEL} error while parsing body (#{ex}). Body: #{body_msg}")
|
53
|
+
{ 'error' => ex.inspect }
|
54
|
+
end
|
55
|
+
end
|
56
|
+
# rubocop:enable Metrics/MethodLength, Metrics/AbcSize
|
57
|
+
|
58
|
+
def self.truncated_body(body)
|
59
|
+
if body.nil?
|
60
|
+
'[EMPTY_BODY]'.freeze
|
61
|
+
elsif body.length > TRUNCATE_LIMIT
|
62
|
+
body[0..TRUNCATE_LIMIT] << '...'
|
63
|
+
else
|
64
|
+
body
|
65
|
+
end
|
66
|
+
end
|
67
|
+
private_class_method :truncated_body
|
68
|
+
|
69
|
+
def self.rate_limit_reset(response)
|
70
|
+
Time.now + response['X-RateLimit-Delay'].to_i
|
71
|
+
end
|
72
|
+
private_class_method :rate_limit_reset
|
73
|
+
end
|
74
|
+
end
|
@@ -0,0 +1,15 @@
|
|
1
|
+
module Airbrake
|
2
|
+
# Stashable should be included in any class that wants the ability to stash
|
3
|
+
# arbitrary objects. It is mainly used by data objects that users can access
|
4
|
+
# through filters.
|
5
|
+
#
|
6
|
+
# @since v4.4.0
|
7
|
+
# @api private
|
8
|
+
module Stashable
|
9
|
+
# @return [Hash{Symbol=>Object}] the hash with arbitrary objects to be used
|
10
|
+
# in filters
|
11
|
+
def stash
|
12
|
+
@stash ||= {}
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
@@ -0,0 +1,73 @@
|
|
1
|
+
require 'base64'
|
2
|
+
|
3
|
+
# rubocop:disable Metrics/BlockLength
|
4
|
+
module Airbrake
|
5
|
+
# Stat is a data structure that allows accumulating performance data (route
|
6
|
+
# performance, SQL query performance and such). It's powered by TDigests.
|
7
|
+
#
|
8
|
+
# Usually, one Stat corresponds to one resource (route or query,
|
9
|
+
# etc.). Incrementing a stat means pushing new performance statistics.
|
10
|
+
#
|
11
|
+
# @example
|
12
|
+
# stat = Airbrake::Stat.new
|
13
|
+
# stat.increment(Time.now - 200)
|
14
|
+
# stat.to_h # Pack and serialize data so it can be transmitted.
|
15
|
+
#
|
16
|
+
# @since v3.2.0
|
17
|
+
Stat = Struct.new(:count, :sum, :sumsq, :tdigest) do
|
18
|
+
# @param [Integer] count How many times this stat was incremented
|
19
|
+
# @param [Float] sum The sum of duration in milliseconds
|
20
|
+
# @param [Float] sumsq The squared sum of duration in milliseconds
|
21
|
+
# @param [TDigest::TDigest] tdigest Packed durations. By default,
|
22
|
+
# compression is 20
|
23
|
+
def initialize(count: 0, sum: 0.0, sumsq: 0.0, tdigest: TDigest.new(0.05))
|
24
|
+
super(count, sum, sumsq, tdigest)
|
25
|
+
end
|
26
|
+
|
27
|
+
# @return [Hash{String=>Object}] stats as a hash with compressed TDigest
|
28
|
+
# (serialized as base64)
|
29
|
+
def to_h
|
30
|
+
tdigest.compress!
|
31
|
+
{
|
32
|
+
'count' => count,
|
33
|
+
'sum' => sum,
|
34
|
+
'sumsq' => sumsq,
|
35
|
+
'tdigest' => Base64.strict_encode64(tdigest.as_small_bytes)
|
36
|
+
}
|
37
|
+
end
|
38
|
+
|
39
|
+
# Increments count and updates performance with the difference of +end_time+
|
40
|
+
# and +start_time+.
|
41
|
+
#
|
42
|
+
# @param [Date] start_time
|
43
|
+
# @param [Date] end_time
|
44
|
+
# @return [void]
|
45
|
+
def increment(start_time, end_time = nil)
|
46
|
+
end_time ||= Time.new
|
47
|
+
increment_ms((end_time - start_time) * 1000)
|
48
|
+
end
|
49
|
+
|
50
|
+
# Increments count and updates performance with given +ms+ value.
|
51
|
+
#
|
52
|
+
# @param [Float] ms
|
53
|
+
# @return [void]
|
54
|
+
def increment_ms(ms)
|
55
|
+
self.count += 1
|
56
|
+
|
57
|
+
self.sum += ms
|
58
|
+
self.sumsq += ms * ms
|
59
|
+
|
60
|
+
tdigest.push(ms)
|
61
|
+
end
|
62
|
+
|
63
|
+
# We define custom inspect so that we weed out uninformative TDigest, which
|
64
|
+
# is also very slow to dump when we log Airbrake::Stat.
|
65
|
+
#
|
66
|
+
# @return [String]
|
67
|
+
def inspect
|
68
|
+
"#<struct Airbrake::Stat count=#{count}, sum=#{sum}, sumsq=#{sumsq}>"
|
69
|
+
end
|
70
|
+
alias_method :pretty_print, :inspect
|
71
|
+
end
|
72
|
+
end
|
73
|
+
# rubocop:enable Metrics/BlockLength
|
@@ -0,0 +1,113 @@
|
|
1
|
+
module Airbrake
|
2
|
+
# Responsible for sending data to Airbrake synchronously via PUT or POST
|
3
|
+
# methods. Supports proxies.
|
4
|
+
#
|
5
|
+
# @see AsyncSender
|
6
|
+
# @api private
|
7
|
+
# @since v1.0.0
|
8
|
+
class SyncSender
|
9
|
+
# @return [String] body for HTTP requests
|
10
|
+
CONTENT_TYPE = 'application/json'.freeze
|
11
|
+
|
12
|
+
include Loggable
|
13
|
+
|
14
|
+
# @param [Symbol] method HTTP method to use to send payload
|
15
|
+
def initialize(method = :post)
|
16
|
+
@config = Airbrake::Config.instance
|
17
|
+
@method = method
|
18
|
+
@rate_limit_reset = Time.now
|
19
|
+
end
|
20
|
+
|
21
|
+
# Sends a POST or PUT request to the given +endpoint+ with the +data+ payload.
|
22
|
+
#
|
23
|
+
# @param [#to_json] data
|
24
|
+
# @param [URI::HTTPS] endpoint
|
25
|
+
# @return [Hash{String=>String}] the parsed HTTP response
|
26
|
+
def send(data, promise, endpoint = @config.endpoint)
|
27
|
+
return promise if rate_limited_ip?(promise)
|
28
|
+
|
29
|
+
response = nil
|
30
|
+
req = build_request(endpoint, data)
|
31
|
+
|
32
|
+
return promise if missing_body?(req, promise)
|
33
|
+
|
34
|
+
https = build_https(endpoint)
|
35
|
+
|
36
|
+
begin
|
37
|
+
response = https.request(req)
|
38
|
+
rescue StandardError => ex
|
39
|
+
reason = "#{LOG_LABEL} HTTP error: #{ex}"
|
40
|
+
logger.error(reason)
|
41
|
+
return promise.reject(reason)
|
42
|
+
end
|
43
|
+
|
44
|
+
parsed_resp = Response.parse(response)
|
45
|
+
if parsed_resp.key?('rate_limit_reset')
|
46
|
+
@rate_limit_reset = parsed_resp['rate_limit_reset']
|
47
|
+
end
|
48
|
+
|
49
|
+
return promise.reject(parsed_resp['error']) if parsed_resp.key?('error')
|
50
|
+
promise.resolve(parsed_resp)
|
51
|
+
end
|
52
|
+
|
53
|
+
private
|
54
|
+
|
55
|
+
def build_https(uri)
|
56
|
+
Net::HTTP.new(uri.host, uri.port, *proxy_params).tap do |https|
|
57
|
+
https.use_ssl = uri.is_a?(URI::HTTPS)
|
58
|
+
if @config.timeout
|
59
|
+
https.open_timeout = @config.timeout
|
60
|
+
https.read_timeout = @config.timeout
|
61
|
+
end
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
65
|
+
def build_request(uri, data)
|
66
|
+
req =
|
67
|
+
if @method == :put
|
68
|
+
Net::HTTP::Put.new(uri.request_uri)
|
69
|
+
else
|
70
|
+
Net::HTTP::Post.new(uri.request_uri)
|
71
|
+
end
|
72
|
+
|
73
|
+
build_request_body(req, data)
|
74
|
+
end
|
75
|
+
|
76
|
+
def build_request_body(req, data)
|
77
|
+
req.body = data.to_json
|
78
|
+
|
79
|
+
req['Authorization'] = "Bearer #{@config.project_key}"
|
80
|
+
req['Content-Type'] = CONTENT_TYPE
|
81
|
+
req['User-Agent'] =
|
82
|
+
"#{Airbrake::Notice::NOTIFIER[:name]}/#{Airbrake::AIRBRAKE_RUBY_VERSION}" \
|
83
|
+
" Ruby/#{RUBY_VERSION}"
|
84
|
+
|
85
|
+
req
|
86
|
+
end
|
87
|
+
|
88
|
+
def proxy_params
|
89
|
+
return unless @config.proxy.key?(:host)
|
90
|
+
|
91
|
+
[@config.proxy[:host], @config.proxy[:port], @config.proxy[:user],
|
92
|
+
@config.proxy[:password]]
|
93
|
+
end
|
94
|
+
|
95
|
+
def rate_limited_ip?(promise)
|
96
|
+
rate_limited = Time.now < @rate_limit_reset
|
97
|
+
promise.reject("#{LOG_LABEL} IP is rate limited") if rate_limited
|
98
|
+
rate_limited
|
99
|
+
end
|
100
|
+
|
101
|
+
def missing_body?(req, promise)
|
102
|
+
missing = req.body.nil?
|
103
|
+
|
104
|
+
if missing
|
105
|
+
reason = "#{LOG_LABEL} data was not sent because of missing body"
|
106
|
+
logger.error(reason)
|
107
|
+
promise.reject(reason)
|
108
|
+
end
|
109
|
+
|
110
|
+
missing
|
111
|
+
end
|
112
|
+
end
|
113
|
+
end
|
@@ -0,0 +1,393 @@
|
|
1
|
+
require 'rbtree'
|
2
|
+
|
3
|
+
module Airbrake
|
4
|
+
# Ruby implementation of Ted Dunning's t-digest data structure.
|
5
|
+
#
|
6
|
+
# This implementation is imported from https://github.com/castle/tdigest with
|
7
|
+
# custom modifications. Huge thanks to Castle for the implementation :beer:
|
8
|
+
#
|
9
|
+
# The difference is that we pack with Big Endian (unlike Native Endian in
|
10
|
+
# Castle's version). Our backend does not permit little endian.
|
11
|
+
#
|
12
|
+
# @see https://github.com/tdunning/t-digest
|
13
|
+
# @see https://github.com/castle/tdigest
|
14
|
+
# @api private
|
15
|
+
# @since v3.2.0
|
16
|
+
#
|
17
|
+
# rubocop:disable Metrics/ClassLength
|
18
|
+
class TDigest
|
19
|
+
VERBOSE_ENCODING = 1
|
20
|
+
SMALL_ENCODING = 2
|
21
|
+
|
22
|
+
# Centroid represents a number of data points.
|
23
|
+
# @api private
|
24
|
+
# @since v3.2.0
|
25
|
+
class Centroid
|
26
|
+
attr_accessor :mean, :n, :cumn, :mean_cumn
|
27
|
+
def initialize(mean, n, cumn, mean_cumn = nil)
|
28
|
+
@mean = mean
|
29
|
+
@n = n
|
30
|
+
@cumn = cumn
|
31
|
+
@mean_cumn = mean_cumn
|
32
|
+
end
|
33
|
+
|
34
|
+
def as_json(_ = nil)
|
35
|
+
{ m: mean, n: n }
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
attr_accessor :centroids
|
40
|
+
def initialize(delta = 0.01, k = 25, cx = 1.1)
|
41
|
+
@delta = delta
|
42
|
+
@k = k
|
43
|
+
@cx = cx
|
44
|
+
@centroids = RBTree.new
|
45
|
+
@nreset = 0
|
46
|
+
@n = 0
|
47
|
+
reset!
|
48
|
+
end
|
49
|
+
|
50
|
+
def +(other)
|
51
|
+
# Uses delta, k and cx from the caller
|
52
|
+
t = self.class.new(@delta, @k, @cx)
|
53
|
+
data = centroids.values + other.centroids.values
|
54
|
+
t.push_centroid(data.delete_at(rand(data.length))) while data.any?
|
55
|
+
t
|
56
|
+
end
|
57
|
+
|
58
|
+
def as_bytes
|
59
|
+
# compression as defined by Java implementation
|
60
|
+
size = @centroids.size
|
61
|
+
output = [VERBOSE_ENCODING, compression, size]
|
62
|
+
output += @centroids.map { |_, c| c.mean }
|
63
|
+
output += @centroids.map { |_, c| c.n }
|
64
|
+
output.pack("NGNG#{size}N#{size}")
|
65
|
+
end
|
66
|
+
|
67
|
+
# rubocop:disable Metrics/AbcSize
|
68
|
+
def as_small_bytes
|
69
|
+
size = @centroids.size
|
70
|
+
output = [self.class::SMALL_ENCODING, compression, size]
|
71
|
+
x = 0
|
72
|
+
# delta encoding allows saving 4-bytes floats
|
73
|
+
mean_arr = @centroids.map do |_, c|
|
74
|
+
val = c.mean - x
|
75
|
+
x = c.mean
|
76
|
+
val
|
77
|
+
end
|
78
|
+
output += mean_arr
|
79
|
+
# Variable length encoding of numbers
|
80
|
+
c_arr = @centroids.each_with_object([]) do |(_, c), arr|
|
81
|
+
k = 0
|
82
|
+
n = c.n
|
83
|
+
while n < 0 || n > 0x7f
|
84
|
+
b = 0x80 | (0x7f & n)
|
85
|
+
arr << b
|
86
|
+
n = n >> 7
|
87
|
+
k += 1
|
88
|
+
raise 'Unreasonable large number' if k > 6
|
89
|
+
end
|
90
|
+
arr << n
|
91
|
+
end
|
92
|
+
output += c_arr
|
93
|
+
output.pack("NGNg#{size}C#{size}")
|
94
|
+
end
|
95
|
+
# rubocop:enable Metrics/AbcSize
|
96
|
+
|
97
|
+
def as_json(_ = nil)
|
98
|
+
@centroids.map { |_, c| c.as_json }
|
99
|
+
end
|
100
|
+
|
101
|
+
def bound_mean(x)
|
102
|
+
upper = @centroids.upper_bound(x)
|
103
|
+
lower = @centroids.lower_bound(x)
|
104
|
+
[lower[1], upper[1]]
|
105
|
+
end
|
106
|
+
|
107
|
+
def bound_mean_cumn(cumn)
|
108
|
+
last_c = nil
|
109
|
+
bounds = []
|
110
|
+
@centroids.each_value do |v|
|
111
|
+
if v.mean_cumn == cumn
|
112
|
+
bounds << v
|
113
|
+
break
|
114
|
+
elsif v.mean_cumn > cumn
|
115
|
+
bounds << last_c
|
116
|
+
bounds << v
|
117
|
+
break
|
118
|
+
else
|
119
|
+
last_c = v
|
120
|
+
end
|
121
|
+
end
|
122
|
+
# If still no results, pick lagging value if any
|
123
|
+
bounds << last_c if bounds.empty? && !last_c.nil?
|
124
|
+
|
125
|
+
bounds
|
126
|
+
end
|
127
|
+
|
128
|
+
def compress!
|
129
|
+
points = to_a
|
130
|
+
reset!
|
131
|
+
push_centroid(points.shuffle)
|
132
|
+
_cumulate(true, true)
|
133
|
+
nil
|
134
|
+
end
|
135
|
+
|
136
|
+
def compression
|
137
|
+
1 / @delta
|
138
|
+
end
|
139
|
+
|
140
|
+
def find_nearest(x)
|
141
|
+
return nil if size == 0
|
142
|
+
|
143
|
+
ceil = @centroids.upper_bound(x)
|
144
|
+
floor = @centroids.lower_bound(x)
|
145
|
+
|
146
|
+
return floor[1] if ceil.nil?
|
147
|
+
return ceil[1] if floor.nil?
|
148
|
+
|
149
|
+
ceil_key = ceil[0]
|
150
|
+
floor_key = floor[0]
|
151
|
+
|
152
|
+
if (floor_key - x).abs < (ceil_key - x).abs
|
153
|
+
floor[1]
|
154
|
+
else
|
155
|
+
ceil[1]
|
156
|
+
end
|
157
|
+
end
|
158
|
+
|
159
|
+
def merge!(other)
|
160
|
+
push_centroid(other.centroids.values.shuffle)
|
161
|
+
self
|
162
|
+
end
|
163
|
+
|
164
|
+
# rubocop:disable Metrics/PerceivedComplexity, Metrics/AbcSize
|
165
|
+
# rubocop:disable Metrics/CyclomaticComplexity
|
166
|
+
def p_rank(x)
|
167
|
+
is_array = x.is_a? Array
|
168
|
+
x = [x] unless is_array
|
169
|
+
|
170
|
+
min = @centroids.first
|
171
|
+
max = @centroids.last
|
172
|
+
|
173
|
+
x.map! do |item|
|
174
|
+
if size == 0
|
175
|
+
nil
|
176
|
+
elsif item < min[1].mean
|
177
|
+
0.0
|
178
|
+
elsif item > max[1].mean
|
179
|
+
1.0
|
180
|
+
else
|
181
|
+
_cumulate(true)
|
182
|
+
bound = bound_mean(item)
|
183
|
+
lower, upper = bound
|
184
|
+
mean_cumn = lower.mean_cumn
|
185
|
+
if lower != upper
|
186
|
+
mean_cumn += (item - lower.mean) * (upper.mean_cumn - lower.mean_cumn) \
|
187
|
+
/ (upper.mean - lower.mean)
|
188
|
+
end
|
189
|
+
mean_cumn / @n
|
190
|
+
end
|
191
|
+
end
|
192
|
+
is_array ? x : x.first
|
193
|
+
end
|
194
|
+
# rubocop:enable Metrics/PerceivedComplexity, Metrics/AbcSize
|
195
|
+
# rubocop:enable Metrics/CyclomaticComplexity
|
196
|
+
|
197
|
+
# rubocop:disable Metrics/PerceivedComplexity, Metrics/CyclomaticComplexity
|
198
|
+
# rubocop:disable Metrics/AbcSize
|
199
|
+
def percentile(p)
|
200
|
+
is_array = p.is_a? Array
|
201
|
+
p = [p] unless is_array
|
202
|
+
p.map! do |item|
|
203
|
+
unless (0..1).cover?(item)
|
204
|
+
raise ArgumentError, "p should be in [0,1], got #{item}"
|
205
|
+
end
|
206
|
+
if size == 0
|
207
|
+
nil
|
208
|
+
else
|
209
|
+
_cumulate(true)
|
210
|
+
h = @n * item
|
211
|
+
lower, upper = bound_mean_cumn(h)
|
212
|
+
if lower.nil? && upper.nil?
|
213
|
+
nil
|
214
|
+
elsif upper == lower || lower.nil? || upper.nil?
|
215
|
+
(lower || upper).mean
|
216
|
+
elsif h == lower.mean_cumn
|
217
|
+
lower.mean
|
218
|
+
else
|
219
|
+
upper.mean
|
220
|
+
end
|
221
|
+
end
|
222
|
+
end
|
223
|
+
is_array ? p : p.first
|
224
|
+
end
|
225
|
+
# rubocop:enable Metrics/PerceivedComplexity, Metrics/CyclomaticComplexity
|
226
|
+
# rubocop:enable Metrics/AbcSize
|
227
|
+
|
228
|
+
def push(x, n = 1)
|
229
|
+
x = [x] unless x.is_a? Array
|
230
|
+
x.each { |value| _digest(value, n) }
|
231
|
+
end
|
232
|
+
|
233
|
+
def push_centroid(c)
|
234
|
+
c = [c] unless c.is_a? Array
|
235
|
+
c.each { |centroid| _digest(centroid.mean, centroid.n) }
|
236
|
+
end
|
237
|
+
|
238
|
+
def reset!
|
239
|
+
@centroids.clear
|
240
|
+
@n = 0
|
241
|
+
@nreset += 1
|
242
|
+
@last_cumulate = 0
|
243
|
+
end
|
244
|
+
|
245
|
+
def size
|
246
|
+
@n || 0
|
247
|
+
end
|
248
|
+
|
249
|
+
def to_a
|
250
|
+
@centroids.map { |_, c| c }
|
251
|
+
end
|
252
|
+
|
253
|
+
# rubocop:disable Metrics/PerceivedComplexity, Metrics/MethodLength
|
254
|
+
# rubocop:disable Metrics/CyclomaticComplexity, Metrics/AbcSize
|
255
|
+
def self.from_bytes(bytes)
|
256
|
+
format, compression, size = bytes.unpack('NGN')
|
257
|
+
tdigest = new(1 / compression)
|
258
|
+
|
259
|
+
start_idx = 16 # after header
|
260
|
+
case format
|
261
|
+
when VERBOSE_ENCODING
|
262
|
+
array = bytes[start_idx..-1].unpack("G#{size}N#{size}")
|
263
|
+
means, counts = array.each_slice(size).to_a if array.any?
|
264
|
+
when SMALL_ENCODING
|
265
|
+
means = bytes[start_idx..(start_idx + 4 * size)].unpack("g#{size}")
|
266
|
+
# Decode delta encoding of means
|
267
|
+
x = 0
|
268
|
+
means.map! do |m|
|
269
|
+
m += x
|
270
|
+
x = m
|
271
|
+
m
|
272
|
+
end
|
273
|
+
counts_bytes = bytes[(start_idx + 4 * size)..-1].unpack('C*')
|
274
|
+
counts = []
|
275
|
+
# Decode variable length integer bytes
|
276
|
+
size.times do
|
277
|
+
v = counts_bytes.shift
|
278
|
+
z = 0x7f & v
|
279
|
+
shift = 7
|
280
|
+
while (v & 0x80) != 0
|
281
|
+
raise 'Shift too large in decode' if shift > 28
|
282
|
+
v = counts_bytes.shift || 0
|
283
|
+
z += (v & 0x7f) << shift
|
284
|
+
shift += 7
|
285
|
+
end
|
286
|
+
counts << z
|
287
|
+
end
|
288
|
+
# This shouldn't happen
|
289
|
+
raise 'Mismatch' unless counts.size == means.size
|
290
|
+
else
|
291
|
+
raise 'Unknown compression format'
|
292
|
+
end
|
293
|
+
|
294
|
+
means.zip(counts).each { |val| tdigest.push(val[0], val[1]) } if means && counts
|
295
|
+
|
296
|
+
tdigest
|
297
|
+
end
|
298
|
+
# rubocop:enable Metrics/PerceivedComplexity, Metrics/MethodLength
|
299
|
+
# rubocop:enable Metrics/CyclomaticComplexity, Metrics/AbcSize
|
300
|
+
|
301
|
+
def self.from_json(array)
|
302
|
+
tdigest = new
|
303
|
+
# Handle both string and symbol keys
|
304
|
+
array.each { |a| tdigest.push(a['m'] || a[:m], a['n'] || a[:n]) }
|
305
|
+
tdigest
|
306
|
+
end
|
307
|
+
|
308
|
+
private
|
309
|
+
|
310
|
+
def _add_weight(nearest, x, n)
|
311
|
+
nearest.mean += n * (x - nearest.mean) / (nearest.n + n) unless x == nearest.mean
|
312
|
+
|
313
|
+
_cumulate(false, true) if nearest.mean_cumn.nil?
|
314
|
+
|
315
|
+
nearest.cumn += n
|
316
|
+
nearest.mean_cumn += n / 2.0
|
317
|
+
nearest.n += n
|
318
|
+
|
319
|
+
nil
|
320
|
+
end
|
321
|
+
|
322
|
+
# rubocop:disable Metrics/PerceivedComplexity, Metrics/CyclomaticComplexity
|
323
|
+
def _cumulate(exact = false, force = false)
|
324
|
+
unless force
|
325
|
+
factor = if @last_cumulate == 0
|
326
|
+
Float::INFINITY
|
327
|
+
else
|
328
|
+
(@n.to_f / @last_cumulate)
|
329
|
+
end
|
330
|
+
return if @n == @last_cumulate || (!exact && @cx && @cx > factor)
|
331
|
+
end
|
332
|
+
|
333
|
+
cumn = 0
|
334
|
+
@centroids.each do |_, c|
|
335
|
+
c.mean_cumn = cumn + c.n / 2.0
|
336
|
+
cumn = c.cumn = cumn + c.n
|
337
|
+
end
|
338
|
+
@n = @last_cumulate = cumn
|
339
|
+
nil
|
340
|
+
end
|
341
|
+
# rubocop:enable Metrics/PerceivedComplexity, Metrics/CyclomaticComplexity
|
342
|
+
|
343
|
+
# rubocop:disable Metrics/PerceivedComplexity, Metrics/CyclomaticComplexity
|
344
|
+
# rubocop:disable Metrics/AbcSize
|
345
|
+
def _digest(x, n)
|
346
|
+
# Use 'first' and 'last' instead of min/max because of performance reasons
|
347
|
+
# This works because RBTree is sorted
|
348
|
+
min = @centroids.first
|
349
|
+
max = @centroids.last
|
350
|
+
|
351
|
+
min = min.nil? ? nil : min[1]
|
352
|
+
max = max.nil? ? nil : max[1]
|
353
|
+
nearest = find_nearest(x)
|
354
|
+
|
355
|
+
@n += n
|
356
|
+
|
357
|
+
if nearest && nearest.mean == x
|
358
|
+
_add_weight(nearest, x, n)
|
359
|
+
elsif nearest == min
|
360
|
+
_new_centroid(x, n, 0)
|
361
|
+
elsif nearest == max
|
362
|
+
_new_centroid(x, n, @n)
|
363
|
+
else
|
364
|
+
p = nearest.mean_cumn.to_f / @n
|
365
|
+
max_n = (4 * @n * @delta * p * (1 - p)).floor
|
366
|
+
if max_n - nearest.n >= n
|
367
|
+
_add_weight(nearest, x, n)
|
368
|
+
else
|
369
|
+
_new_centroid(x, n, nearest.cumn)
|
370
|
+
end
|
371
|
+
end
|
372
|
+
|
373
|
+
_cumulate(false)
|
374
|
+
|
375
|
+
# If the number of centroids has grown to a very large size,
|
376
|
+
# it may be due to values being inserted in sorted order.
|
377
|
+
# We combat that by replaying the centroids in random order,
|
378
|
+
# which is what compress! does
|
379
|
+
compress! if @centroids.size > (@k / @delta)
|
380
|
+
|
381
|
+
nil
|
382
|
+
end
|
383
|
+
# rubocop:enable Metrics/PerceivedComplexity, Metrics/CyclomaticComplexity,
|
384
|
+
# rubocop:enable Metrics/AbcSize
|
385
|
+
|
386
|
+
def _new_centroid(x, n, cumn)
|
387
|
+
c = Centroid.new(x, n, cumn)
|
388
|
+
@centroids[x] = c
|
389
|
+
c
|
390
|
+
end
|
391
|
+
end
|
392
|
+
# rubocop:enable Metrics/ClassLength
|
393
|
+
end
|