skylight 0.3.21 → 0.4.0.alpha1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +0 -4
- data/ext/extconf.rb +92 -47
- data/ext/libskylight.yml +4 -4
- data/ext/skylight_native.c +248 -286
- data/lib/skylight.rb +19 -114
- data/lib/skylight/api.rb +1 -1
- data/lib/skylight/config.rb +176 -146
- data/lib/skylight/data/cacert.pem +717 -719
- data/lib/skylight/formatters/http.rb +1 -1
- data/lib/skylight/instrumenter.rb +28 -35
- data/lib/skylight/native.rb +58 -72
- data/lib/skylight/normalizers.rb +0 -1
- data/lib/skylight/normalizers/active_record/sql.rb +0 -4
- data/lib/skylight/probes/excon/middleware.rb +3 -1
- data/lib/skylight/probes/net_http.rb +3 -1
- data/lib/skylight/subscriber.rb +0 -4
- data/lib/skylight/trace.rb +189 -0
- data/lib/skylight/util.rb +10 -12
- data/lib/skylight/util/hostname.rb +17 -0
- data/lib/skylight/util/http.rb +33 -36
- data/lib/skylight/util/logging.rb +20 -1
- data/lib/skylight/util/multi_io.rb +21 -0
- data/lib/skylight/util/native_ext_fetcher.rb +83 -69
- data/lib/skylight/util/platform.rb +67 -0
- data/lib/skylight/util/ssl.rb +50 -0
- data/lib/skylight/version.rb +1 -1
- metadata +9 -34
- data/ext/rust_support/ruby.h +0 -93
- data/ext/skylight.h +0 -85
- data/ext/skylight.map +0 -4
- data/ext/test/extconf.rb +0 -18
- data/ext/test/skylight_native_test.c +0 -82
- data/ext/test/skylight_test.h +0 -20
- data/lib/skylight/formatters.rb +0 -6
- data/lib/skylight/messages.rb +0 -21
- data/lib/skylight/messages/error.rb +0 -15
- data/lib/skylight/messages/hello.rb +0 -13
- data/lib/skylight/messages/trace.rb +0 -179
- data/lib/skylight/messages/trace_envelope.rb +0 -19
- data/lib/skylight/metrics.rb +0 -9
- data/lib/skylight/metrics/ewma.rb +0 -69
- data/lib/skylight/metrics/meter.rb +0 -58
- data/lib/skylight/metrics/process_cpu_gauge.rb +0 -65
- data/lib/skylight/metrics/process_mem_gauge.rb +0 -34
- data/lib/skylight/util/conversions.rb +0 -9
- data/lib/skylight/util/queue.rb +0 -96
- data/lib/skylight/util/task.rb +0 -172
- data/lib/skylight/util/uniform_sample.rb +0 -63
- data/lib/skylight/worker.rb +0 -19
- data/lib/skylight/worker/builder.rb +0 -73
- data/lib/skylight/worker/collector.rb +0 -274
- data/lib/skylight/worker/connection.rb +0 -87
- data/lib/skylight/worker/connection_set.rb +0 -56
- data/lib/skylight/worker/embedded.rb +0 -24
- data/lib/skylight/worker/metrics_reporter.rb +0 -104
- data/lib/skylight/worker/server.rb +0 -336
- data/lib/skylight/worker/standalone.rb +0 -421
@@ -1,274 +0,0 @@
|
|
1
|
-
require 'uri'
|
2
|
-
|
3
|
-
module Skylight
|
4
|
-
module Worker
|
5
|
-
class Collector < Util::Task
|
6
|
-
include URI::Escape
|
7
|
-
|
8
|
-
ENDPOINT = '/report'.freeze
|
9
|
-
CONTENT_TYPE = 'content-type'.freeze
|
10
|
-
SKYLIGHT_V2 = 'application/x-skylight-report-v2'.freeze
|
11
|
-
|
12
|
-
include Util::Logging
|
13
|
-
|
14
|
-
attr_reader :config, :metrics_reporter
|
15
|
-
|
16
|
-
def initialize(config, metrics_reporter = nil)
|
17
|
-
super(1000, 0.25)
|
18
|
-
|
19
|
-
@config = config
|
20
|
-
@size = config[:'agent.sample']
|
21
|
-
@batch = nil
|
22
|
-
@interval = config[:'agent.interval']
|
23
|
-
@refresh_at = 0
|
24
|
-
@http_auth = Util::HTTP.new(config, :accounts)
|
25
|
-
@http_report = nil
|
26
|
-
@report_meter = Metrics::Meter.new
|
27
|
-
@report_success_meter = Metrics::Meter.new
|
28
|
-
@metrics_reporter = metrics_reporter
|
29
|
-
|
30
|
-
@metrics_reporter.register("collector.report-rate", @report_meter)
|
31
|
-
@metrics_reporter.register("collector.report-success-rate", @report_success_meter)
|
32
|
-
|
33
|
-
t { fmt "starting collector; interval=%d; size=%d", @interval, @size }
|
34
|
-
end
|
35
|
-
|
36
|
-
def self.build(config)
|
37
|
-
new(config, MetricsReporter.new(config))
|
38
|
-
end
|
39
|
-
|
40
|
-
def prepare
|
41
|
-
if @metrics_reporter
|
42
|
-
@metrics_reporter.register("worker.collector.queue-depth", queue_depth_metric)
|
43
|
-
@metrics_reporter.spawn
|
44
|
-
end
|
45
|
-
end
|
46
|
-
|
47
|
-
def handle(msg, now = Util::Clock.absolute_secs)
|
48
|
-
@batch ||= new_batch(now)
|
49
|
-
|
50
|
-
if should_refresh_token?(now)
|
51
|
-
refresh_report_token(now)
|
52
|
-
end
|
53
|
-
|
54
|
-
if @batch.should_flush?(now)
|
55
|
-
if has_report_token?(now)
|
56
|
-
flush(@batch)
|
57
|
-
else
|
58
|
-
warn "do not have valid session token -- dropping"
|
59
|
-
return true
|
60
|
-
end
|
61
|
-
|
62
|
-
@batch = new_batch(now)
|
63
|
-
end
|
64
|
-
|
65
|
-
return true unless msg
|
66
|
-
|
67
|
-
case msg
|
68
|
-
when Messages::TraceEnvelope
|
69
|
-
t { fmt "collector received trace" }
|
70
|
-
@batch.push(msg)
|
71
|
-
when Error
|
72
|
-
send_error(msg)
|
73
|
-
else
|
74
|
-
debug "Received unknown message; class=%s", msg.class.to_s
|
75
|
-
end
|
76
|
-
|
77
|
-
true
|
78
|
-
end
|
79
|
-
|
80
|
-
def send_http_exception(http, response)
|
81
|
-
send_exception(response.exception, additional_info: {
|
82
|
-
host: http.host,
|
83
|
-
port: http.port,
|
84
|
-
path: response.request.path,
|
85
|
-
method: response.request.method
|
86
|
-
})
|
87
|
-
end
|
88
|
-
|
89
|
-
def send_exception(exception, data={})
|
90
|
-
data = { class_name: exception.class.name,
|
91
|
-
agent_info: @metrics_reporter.build_report }.merge(data)
|
92
|
-
|
93
|
-
if Exception === exception
|
94
|
-
data.merge!(message: exception.message, backtrace: exception.backtrace)
|
95
|
-
end
|
96
|
-
|
97
|
-
post_data(:exception, data, false)
|
98
|
-
end
|
99
|
-
|
100
|
-
private
|
101
|
-
|
102
|
-
def post_data(type, data, notify = true)
|
103
|
-
t { "posting data (#{type}): #{data.inspect}" }
|
104
|
-
|
105
|
-
res = @http_auth.post("/agent/#{type}?hostname=#{escape(config[:'hostname'])}", data)
|
106
|
-
|
107
|
-
unless res.success?
|
108
|
-
warn "#{type} wasn't sent successfully; status=%s", res.status
|
109
|
-
end
|
110
|
-
|
111
|
-
if res.exception
|
112
|
-
send_http_exception(@http_auth, res) if notify
|
113
|
-
false
|
114
|
-
else
|
115
|
-
true
|
116
|
-
end
|
117
|
-
rescue Exception => e
|
118
|
-
error "exception; msg=%s; class=%s", e.message, e.class
|
119
|
-
t { e.backtrace.join("\n") }
|
120
|
-
end
|
121
|
-
|
122
|
-
def send_error(msg)
|
123
|
-
details = msg.details ? JSON.parse(msg.details) : nil
|
124
|
-
post_data(:error, type: msg.type, description: msg.description, details: details)
|
125
|
-
end
|
126
|
-
|
127
|
-
def finish
|
128
|
-
t { fmt "collector finishing up" }
|
129
|
-
|
130
|
-
now = Util::Clock.absolute_secs
|
131
|
-
|
132
|
-
if should_refresh_token?(now)
|
133
|
-
refresh_report_token(now)
|
134
|
-
end
|
135
|
-
|
136
|
-
if @batch && has_report_token?(now)
|
137
|
-
flush(@batch)
|
138
|
-
end
|
139
|
-
|
140
|
-
@batch = nil
|
141
|
-
ensure
|
142
|
-
if @metrics_reporter
|
143
|
-
@metrics_reporter.shutdown
|
144
|
-
end
|
145
|
-
end
|
146
|
-
|
147
|
-
def flush(batch)
|
148
|
-
return if batch.empty?
|
149
|
-
|
150
|
-
debug "flushing batch; size=%d", batch.sample.count
|
151
|
-
|
152
|
-
@report_meter.mark
|
153
|
-
|
154
|
-
res = @http_report.post(ENDPOINT, batch.encode, CONTENT_TYPE => SKYLIGHT_V2)
|
155
|
-
|
156
|
-
if res.exception
|
157
|
-
send_http_exception(@http_report, res)
|
158
|
-
else
|
159
|
-
@report_success_meter.mark
|
160
|
-
end
|
161
|
-
|
162
|
-
nil
|
163
|
-
end
|
164
|
-
|
165
|
-
def refresh_report_token(now)
|
166
|
-
res = @http_auth.get("/agent/authenticate?hostname=#{escape(config[:'hostname'])}")
|
167
|
-
|
168
|
-
if res.exception
|
169
|
-
send_http_exception(@http_auth, res)
|
170
|
-
return
|
171
|
-
end
|
172
|
-
|
173
|
-
unless res.success?
|
174
|
-
if (400..499).include? res.status
|
175
|
-
warn "token request rejected; status=%s", res.status
|
176
|
-
@http_report = nil
|
177
|
-
end
|
178
|
-
|
179
|
-
warn "could not fetch report session token; status=%s", res.status
|
180
|
-
return
|
181
|
-
end
|
182
|
-
|
183
|
-
session = res.body['session']
|
184
|
-
tok, expires_at = session['token'], session['expires_at'] if session
|
185
|
-
|
186
|
-
if tok && expires_at
|
187
|
-
if expires_at <= now
|
188
|
-
error "token is expired: token=%s; expires_at=%s", tok, expires_at
|
189
|
-
return
|
190
|
-
end
|
191
|
-
|
192
|
-
# 30 minute buffer or split the difference
|
193
|
-
@refresh_at = expires_at - now > 3600 ?
|
194
|
-
now + ((expires_at - now) / 2) :
|
195
|
-
expires_at - 1800
|
196
|
-
|
197
|
-
@http_report = Util::HTTP.new(config, :report)
|
198
|
-
@http_report.authentication = tok
|
199
|
-
else
|
200
|
-
if @http_report
|
201
|
-
@refresh_at = now + 60
|
202
|
-
end
|
203
|
-
warn "server did not return a session token"
|
204
|
-
end
|
205
|
-
rescue Exception => e
|
206
|
-
error "exception; msg=%s; class=%s", e.message, e.class
|
207
|
-
t { e.backtrace.join("\n") }
|
208
|
-
end
|
209
|
-
|
210
|
-
def should_refresh_token?(now)
|
211
|
-
now >= @refresh_at
|
212
|
-
end
|
213
|
-
|
214
|
-
def has_report_token?(now)
|
215
|
-
return true if config.ignore_token?
|
216
|
-
return unless @http_report
|
217
|
-
now < @refresh_at + (3600 * 3 - 660)
|
218
|
-
end
|
219
|
-
|
220
|
-
def new_batch(now)
|
221
|
-
Batch.new(config, @size, round(now), @interval)
|
222
|
-
end
|
223
|
-
|
224
|
-
def round(time)
|
225
|
-
(time.to_i / @interval) * @interval
|
226
|
-
end
|
227
|
-
|
228
|
-
class Batch
|
229
|
-
include Util::Logging
|
230
|
-
|
231
|
-
attr_reader :config, :from, :counts, :sample, :flush_at
|
232
|
-
|
233
|
-
def initialize(config, size, from, interval)
|
234
|
-
@config = config
|
235
|
-
@from = from
|
236
|
-
@flush_at = from + interval
|
237
|
-
@sample = Util::UniformSample.new(size)
|
238
|
-
@counts = Hash.new(0)
|
239
|
-
end
|
240
|
-
|
241
|
-
def should_flush?(now)
|
242
|
-
return true if @config.constant_flush?
|
243
|
-
now >= @flush_at
|
244
|
-
end
|
245
|
-
|
246
|
-
def empty?
|
247
|
-
@sample.empty?
|
248
|
-
end
|
249
|
-
|
250
|
-
def push(trace)
|
251
|
-
# Count it
|
252
|
-
@counts[trace.endpoint_name] += 1
|
253
|
-
# Push the trace into the sample
|
254
|
-
@sample << trace
|
255
|
-
end
|
256
|
-
|
257
|
-
def encode
|
258
|
-
batch = Skylight::Batch.native_new(from, config[:hostname])
|
259
|
-
|
260
|
-
sample.each do |trace|
|
261
|
-
batch.native_move_in(trace.data)
|
262
|
-
end
|
263
|
-
|
264
|
-
@counts.each do |endpoint_name,count|
|
265
|
-
batch.native_set_endpoint_count(endpoint_name, count)
|
266
|
-
end
|
267
|
-
|
268
|
-
batch.native_serialize
|
269
|
-
end
|
270
|
-
end
|
271
|
-
|
272
|
-
end
|
273
|
-
end
|
274
|
-
end
|
@@ -1,87 +0,0 @@
|
|
1
|
-
module Skylight
|
2
|
-
module Worker
|
3
|
-
# Represents the IPC client connection
|
4
|
-
class Connection
|
5
|
-
FRAME_HDR_LEN = 8
|
6
|
-
|
7
|
-
attr_reader :sock, :throughput
|
8
|
-
|
9
|
-
def initialize(sock)
|
10
|
-
@sock = sock
|
11
|
-
@len = nil
|
12
|
-
@buf = ""
|
13
|
-
|
14
|
-
# Metrics
|
15
|
-
@throughput = Metrics::Meter.new
|
16
|
-
end
|
17
|
-
|
18
|
-
def read
|
19
|
-
if msg = maybe_read_message
|
20
|
-
return msg
|
21
|
-
end
|
22
|
-
|
23
|
-
if chunk = read_sock
|
24
|
-
|
25
|
-
@buf << chunk
|
26
|
-
|
27
|
-
if !@len && @buf.bytesize >= FRAME_HDR_LEN
|
28
|
-
@len = read_len
|
29
|
-
end
|
30
|
-
|
31
|
-
maybe_read_message
|
32
|
-
end
|
33
|
-
end
|
34
|
-
|
35
|
-
def cleanup
|
36
|
-
# Any cleanup code here
|
37
|
-
end
|
38
|
-
|
39
|
-
private
|
40
|
-
|
41
|
-
def read_len
|
42
|
-
if len = @buf[4, 4]
|
43
|
-
len.unpack("L")[0]
|
44
|
-
end
|
45
|
-
end
|
46
|
-
|
47
|
-
def read_message_id
|
48
|
-
if win = @buf[0, 4]
|
49
|
-
win.unpack("L")[0]
|
50
|
-
end
|
51
|
-
end
|
52
|
-
|
53
|
-
def maybe_read_message
|
54
|
-
if @len && @buf.bytesize >= @len + FRAME_HDR_LEN
|
55
|
-
mid = read_message_id
|
56
|
-
klass = Messages::ID_TO_KLASS.fetch(mid) do
|
57
|
-
raise IpcProtoError, "unknown message `#{mid}`"
|
58
|
-
end
|
59
|
-
data = @buf[FRAME_HDR_LEN, @len]
|
60
|
-
@buf = @buf[(FRAME_HDR_LEN + @len)..-1] || ""
|
61
|
-
|
62
|
-
if @buf.bytesize >= FRAME_HDR_LEN
|
63
|
-
@len = read_len
|
64
|
-
else
|
65
|
-
@len = nil
|
66
|
-
end
|
67
|
-
|
68
|
-
begin
|
69
|
-
return klass.deserialize(data)
|
70
|
-
rescue Exception => e
|
71
|
-
# reraise protobuf decoding exceptions
|
72
|
-
raise IpcProtoError, e.message
|
73
|
-
end
|
74
|
-
end
|
75
|
-
end
|
76
|
-
|
77
|
-
def read_sock
|
78
|
-
ret = @sock.read_nonblock(CHUNK_SIZE)
|
79
|
-
# Track the throughput
|
80
|
-
@throughput.mark(ret.bytesize) if ret
|
81
|
-
ret
|
82
|
-
rescue Errno::EAGAIN, Errno::EWOULDBLOCK
|
83
|
-
end
|
84
|
-
|
85
|
-
end
|
86
|
-
end
|
87
|
-
end
|
@@ -1,56 +0,0 @@
|
|
1
|
-
require 'thread'
|
2
|
-
|
3
|
-
module Skylight
|
4
|
-
module Worker
|
5
|
-
class ConnectionSet
|
6
|
-
attr_reader :open_connections, :throughput
|
7
|
-
|
8
|
-
def initialize
|
9
|
-
@connections = {}
|
10
|
-
@lock = Mutex.new
|
11
|
-
|
12
|
-
# Metrics
|
13
|
-
@open_connections = build_open_connections_metric
|
14
|
-
@throughput = build_throughput_metric
|
15
|
-
end
|
16
|
-
|
17
|
-
def add(sock)
|
18
|
-
conn = Connection.new(sock)
|
19
|
-
@lock.synchronize { @connections[sock] = conn }
|
20
|
-
conn
|
21
|
-
end
|
22
|
-
|
23
|
-
def socks
|
24
|
-
@lock.synchronize { @connections.keys }
|
25
|
-
end
|
26
|
-
|
27
|
-
def [](sock)
|
28
|
-
@lock.synchronize do
|
29
|
-
@connections[sock]
|
30
|
-
end
|
31
|
-
end
|
32
|
-
|
33
|
-
def cleanup(sock)
|
34
|
-
if conn = @lock.synchronize { @connections.delete(sock) }
|
35
|
-
conn.cleanup
|
36
|
-
sock.close rescue nil
|
37
|
-
end
|
38
|
-
end
|
39
|
-
|
40
|
-
private
|
41
|
-
|
42
|
-
def build_open_connections_metric
|
43
|
-
lambda do
|
44
|
-
@lock.synchronize { @connections.length }
|
45
|
-
end
|
46
|
-
end
|
47
|
-
|
48
|
-
def build_throughput_metric
|
49
|
-
lambda do
|
50
|
-
conns = @lock.synchronize { @connections.values }
|
51
|
-
conns.map { |c| c.throughput.rate.to_i }
|
52
|
-
end
|
53
|
-
end
|
54
|
-
end
|
55
|
-
end
|
56
|
-
end
|
@@ -1,24 +0,0 @@
|
|
1
|
-
module Skylight
|
2
|
-
module Worker
|
3
|
-
class Embedded
|
4
|
-
def initialize(collector)
|
5
|
-
@collector = collector
|
6
|
-
end
|
7
|
-
|
8
|
-
def spawn
|
9
|
-
@collector.spawn
|
10
|
-
end
|
11
|
-
|
12
|
-
def shutdown
|
13
|
-
@collector.shutdown
|
14
|
-
end
|
15
|
-
|
16
|
-
def submit(msg)
|
17
|
-
decoder = Messages::ID_TO_KLASS.fetch(Messages::KLASS_TO_ID.fetch(msg.class))
|
18
|
-
msg = decoder.deserialize(msg.serialize)
|
19
|
-
|
20
|
-
@collector.submit(msg)
|
21
|
-
end
|
22
|
-
end
|
23
|
-
end
|
24
|
-
end
|
@@ -1,104 +0,0 @@
|
|
1
|
-
require 'thread'
|
2
|
-
require 'rbconfig'
|
3
|
-
|
4
|
-
module Skylight
|
5
|
-
module Worker
|
6
|
-
class MetricsReporter < Util::Task
|
7
|
-
|
8
|
-
include Util::Logging
|
9
|
-
|
10
|
-
attr_reader :config
|
11
|
-
|
12
|
-
def initialize(config)
|
13
|
-
super(1000, 0.25)
|
14
|
-
|
15
|
-
@metrics = {}
|
16
|
-
@config = config
|
17
|
-
@interval = config[:'metrics.report_interval']
|
18
|
-
@lock = Mutex.new
|
19
|
-
@next_report_at = nil
|
20
|
-
@http_auth = Util::HTTP.new(config, :accounts)
|
21
|
-
end
|
22
|
-
|
23
|
-
# A metric responds to #call and returns metric info
|
24
|
-
def register(name, metric)
|
25
|
-
@lock.synchronize { @metrics[name] = metric }
|
26
|
-
end
|
27
|
-
|
28
|
-
def unregister(name)
|
29
|
-
@lock.synchronize { @metrics.delete(name) }
|
30
|
-
end
|
31
|
-
|
32
|
-
# msg is always nil, but we can use the Task abstraction anyway
|
33
|
-
def handle(msg, now = Util::Clock.absolute_secs)
|
34
|
-
# Initially set the next report at
|
35
|
-
unless @next_report_at
|
36
|
-
update_next_report_at(now)
|
37
|
-
return true
|
38
|
-
end
|
39
|
-
|
40
|
-
if now < @next_report_at
|
41
|
-
# Nothing to do
|
42
|
-
return true
|
43
|
-
end
|
44
|
-
|
45
|
-
update_next_report_at(now)
|
46
|
-
post_report
|
47
|
-
|
48
|
-
true
|
49
|
-
end
|
50
|
-
|
51
|
-
def build_report
|
52
|
-
report = {
|
53
|
-
"hostname" => config[:'hostname'],
|
54
|
-
"host.info" => RbConfig::CONFIG['arch'],
|
55
|
-
"ruby.version" => "#{RUBY_VERSION}-p#{RUBY_PATCHLEVEL}",
|
56
|
-
"ruby.engine" => RUBY_ENGINE,
|
57
|
-
"rails.version" => defined?(Rails) ? Rails.version : nil,
|
58
|
-
"skylight.version" => Skylight::VERSION
|
59
|
-
}
|
60
|
-
|
61
|
-
metric_names.each do |name|
|
62
|
-
# Since we are operating in a concurrent environment, it is possible
|
63
|
-
# that the metric for the current name is unregistered before we
|
64
|
-
# access it here.
|
65
|
-
unless m = metric(name)
|
66
|
-
next
|
67
|
-
end
|
68
|
-
|
69
|
-
report[name] = m.call
|
70
|
-
end
|
71
|
-
|
72
|
-
report
|
73
|
-
end
|
74
|
-
|
75
|
-
def post_report
|
76
|
-
report = build_report
|
77
|
-
|
78
|
-
# Send the report
|
79
|
-
t { fmt "reporting internal metrics; payload=%s", report.inspect }
|
80
|
-
|
81
|
-
res = @http_auth.post("/agent/metrics", report: report)
|
82
|
-
|
83
|
-
unless res.success?
|
84
|
-
warn "internal metrics report failed; status=%s", res.status
|
85
|
-
end
|
86
|
-
end
|
87
|
-
|
88
|
-
private
|
89
|
-
|
90
|
-
def metric_names
|
91
|
-
@lock.synchronize { @metrics.keys }
|
92
|
-
end
|
93
|
-
|
94
|
-
def metric(name)
|
95
|
-
@lock.synchronize { @metrics[name] }
|
96
|
-
end
|
97
|
-
|
98
|
-
def update_next_report_at(now)
|
99
|
-
@next_report_at = now + @interval
|
100
|
-
end
|
101
|
-
|
102
|
-
end
|
103
|
-
end
|
104
|
-
end
|