skylight 0.3.21 → 0.4.0.alpha1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +0 -4
- data/ext/extconf.rb +92 -47
- data/ext/libskylight.yml +4 -4
- data/ext/skylight_native.c +248 -286
- data/lib/skylight.rb +19 -114
- data/lib/skylight/api.rb +1 -1
- data/lib/skylight/config.rb +176 -146
- data/lib/skylight/data/cacert.pem +717 -719
- data/lib/skylight/formatters/http.rb +1 -1
- data/lib/skylight/instrumenter.rb +28 -35
- data/lib/skylight/native.rb +58 -72
- data/lib/skylight/normalizers.rb +0 -1
- data/lib/skylight/normalizers/active_record/sql.rb +0 -4
- data/lib/skylight/probes/excon/middleware.rb +3 -1
- data/lib/skylight/probes/net_http.rb +3 -1
- data/lib/skylight/subscriber.rb +0 -4
- data/lib/skylight/trace.rb +189 -0
- data/lib/skylight/util.rb +10 -12
- data/lib/skylight/util/hostname.rb +17 -0
- data/lib/skylight/util/http.rb +33 -36
- data/lib/skylight/util/logging.rb +20 -1
- data/lib/skylight/util/multi_io.rb +21 -0
- data/lib/skylight/util/native_ext_fetcher.rb +83 -69
- data/lib/skylight/util/platform.rb +67 -0
- data/lib/skylight/util/ssl.rb +50 -0
- data/lib/skylight/version.rb +1 -1
- metadata +9 -34
- data/ext/rust_support/ruby.h +0 -93
- data/ext/skylight.h +0 -85
- data/ext/skylight.map +0 -4
- data/ext/test/extconf.rb +0 -18
- data/ext/test/skylight_native_test.c +0 -82
- data/ext/test/skylight_test.h +0 -20
- data/lib/skylight/formatters.rb +0 -6
- data/lib/skylight/messages.rb +0 -21
- data/lib/skylight/messages/error.rb +0 -15
- data/lib/skylight/messages/hello.rb +0 -13
- data/lib/skylight/messages/trace.rb +0 -179
- data/lib/skylight/messages/trace_envelope.rb +0 -19
- data/lib/skylight/metrics.rb +0 -9
- data/lib/skylight/metrics/ewma.rb +0 -69
- data/lib/skylight/metrics/meter.rb +0 -58
- data/lib/skylight/metrics/process_cpu_gauge.rb +0 -65
- data/lib/skylight/metrics/process_mem_gauge.rb +0 -34
- data/lib/skylight/util/conversions.rb +0 -9
- data/lib/skylight/util/queue.rb +0 -96
- data/lib/skylight/util/task.rb +0 -172
- data/lib/skylight/util/uniform_sample.rb +0 -63
- data/lib/skylight/worker.rb +0 -19
- data/lib/skylight/worker/builder.rb +0 -73
- data/lib/skylight/worker/collector.rb +0 -274
- data/lib/skylight/worker/connection.rb +0 -87
- data/lib/skylight/worker/connection_set.rb +0 -56
- data/lib/skylight/worker/embedded.rb +0 -24
- data/lib/skylight/worker/metrics_reporter.rb +0 -104
- data/lib/skylight/worker/server.rb +0 -336
- data/lib/skylight/worker/standalone.rb +0 -421
@@ -1,274 +0,0 @@
|
|
1
|
-
require 'uri'
|
2
|
-
|
3
|
-
module Skylight
|
4
|
-
module Worker
|
5
|
-
class Collector < Util::Task
|
6
|
-
include URI::Escape
|
7
|
-
|
8
|
-
ENDPOINT = '/report'.freeze
|
9
|
-
CONTENT_TYPE = 'content-type'.freeze
|
10
|
-
SKYLIGHT_V2 = 'application/x-skylight-report-v2'.freeze
|
11
|
-
|
12
|
-
include Util::Logging
|
13
|
-
|
14
|
-
attr_reader :config, :metrics_reporter
|
15
|
-
|
16
|
-
def initialize(config, metrics_reporter = nil)
|
17
|
-
super(1000, 0.25)
|
18
|
-
|
19
|
-
@config = config
|
20
|
-
@size = config[:'agent.sample']
|
21
|
-
@batch = nil
|
22
|
-
@interval = config[:'agent.interval']
|
23
|
-
@refresh_at = 0
|
24
|
-
@http_auth = Util::HTTP.new(config, :accounts)
|
25
|
-
@http_report = nil
|
26
|
-
@report_meter = Metrics::Meter.new
|
27
|
-
@report_success_meter = Metrics::Meter.new
|
28
|
-
@metrics_reporter = metrics_reporter
|
29
|
-
|
30
|
-
@metrics_reporter.register("collector.report-rate", @report_meter)
|
31
|
-
@metrics_reporter.register("collector.report-success-rate", @report_success_meter)
|
32
|
-
|
33
|
-
t { fmt "starting collector; interval=%d; size=%d", @interval, @size }
|
34
|
-
end
|
35
|
-
|
36
|
-
def self.build(config)
|
37
|
-
new(config, MetricsReporter.new(config))
|
38
|
-
end
|
39
|
-
|
40
|
-
def prepare
|
41
|
-
if @metrics_reporter
|
42
|
-
@metrics_reporter.register("worker.collector.queue-depth", queue_depth_metric)
|
43
|
-
@metrics_reporter.spawn
|
44
|
-
end
|
45
|
-
end
|
46
|
-
|
47
|
-
def handle(msg, now = Util::Clock.absolute_secs)
|
48
|
-
@batch ||= new_batch(now)
|
49
|
-
|
50
|
-
if should_refresh_token?(now)
|
51
|
-
refresh_report_token(now)
|
52
|
-
end
|
53
|
-
|
54
|
-
if @batch.should_flush?(now)
|
55
|
-
if has_report_token?(now)
|
56
|
-
flush(@batch)
|
57
|
-
else
|
58
|
-
warn "do not have valid session token -- dropping"
|
59
|
-
return true
|
60
|
-
end
|
61
|
-
|
62
|
-
@batch = new_batch(now)
|
63
|
-
end
|
64
|
-
|
65
|
-
return true unless msg
|
66
|
-
|
67
|
-
case msg
|
68
|
-
when Messages::TraceEnvelope
|
69
|
-
t { fmt "collector received trace" }
|
70
|
-
@batch.push(msg)
|
71
|
-
when Error
|
72
|
-
send_error(msg)
|
73
|
-
else
|
74
|
-
debug "Received unknown message; class=%s", msg.class.to_s
|
75
|
-
end
|
76
|
-
|
77
|
-
true
|
78
|
-
end
|
79
|
-
|
80
|
-
def send_http_exception(http, response)
|
81
|
-
send_exception(response.exception, additional_info: {
|
82
|
-
host: http.host,
|
83
|
-
port: http.port,
|
84
|
-
path: response.request.path,
|
85
|
-
method: response.request.method
|
86
|
-
})
|
87
|
-
end
|
88
|
-
|
89
|
-
def send_exception(exception, data={})
|
90
|
-
data = { class_name: exception.class.name,
|
91
|
-
agent_info: @metrics_reporter.build_report }.merge(data)
|
92
|
-
|
93
|
-
if Exception === exception
|
94
|
-
data.merge!(message: exception.message, backtrace: exception.backtrace)
|
95
|
-
end
|
96
|
-
|
97
|
-
post_data(:exception, data, false)
|
98
|
-
end
|
99
|
-
|
100
|
-
private
|
101
|
-
|
102
|
-
def post_data(type, data, notify = true)
|
103
|
-
t { "posting data (#{type}): #{data.inspect}" }
|
104
|
-
|
105
|
-
res = @http_auth.post("/agent/#{type}?hostname=#{escape(config[:'hostname'])}", data)
|
106
|
-
|
107
|
-
unless res.success?
|
108
|
-
warn "#{type} wasn't sent successfully; status=%s", res.status
|
109
|
-
end
|
110
|
-
|
111
|
-
if res.exception
|
112
|
-
send_http_exception(@http_auth, res) if notify
|
113
|
-
false
|
114
|
-
else
|
115
|
-
true
|
116
|
-
end
|
117
|
-
rescue Exception => e
|
118
|
-
error "exception; msg=%s; class=%s", e.message, e.class
|
119
|
-
t { e.backtrace.join("\n") }
|
120
|
-
end
|
121
|
-
|
122
|
-
def send_error(msg)
|
123
|
-
details = msg.details ? JSON.parse(msg.details) : nil
|
124
|
-
post_data(:error, type: msg.type, description: msg.description, details: details)
|
125
|
-
end
|
126
|
-
|
127
|
-
def finish
|
128
|
-
t { fmt "collector finishing up" }
|
129
|
-
|
130
|
-
now = Util::Clock.absolute_secs
|
131
|
-
|
132
|
-
if should_refresh_token?(now)
|
133
|
-
refresh_report_token(now)
|
134
|
-
end
|
135
|
-
|
136
|
-
if @batch && has_report_token?(now)
|
137
|
-
flush(@batch)
|
138
|
-
end
|
139
|
-
|
140
|
-
@batch = nil
|
141
|
-
ensure
|
142
|
-
if @metrics_reporter
|
143
|
-
@metrics_reporter.shutdown
|
144
|
-
end
|
145
|
-
end
|
146
|
-
|
147
|
-
def flush(batch)
|
148
|
-
return if batch.empty?
|
149
|
-
|
150
|
-
debug "flushing batch; size=%d", batch.sample.count
|
151
|
-
|
152
|
-
@report_meter.mark
|
153
|
-
|
154
|
-
res = @http_report.post(ENDPOINT, batch.encode, CONTENT_TYPE => SKYLIGHT_V2)
|
155
|
-
|
156
|
-
if res.exception
|
157
|
-
send_http_exception(@http_report, res)
|
158
|
-
else
|
159
|
-
@report_success_meter.mark
|
160
|
-
end
|
161
|
-
|
162
|
-
nil
|
163
|
-
end
|
164
|
-
|
165
|
-
def refresh_report_token(now)
|
166
|
-
res = @http_auth.get("/agent/authenticate?hostname=#{escape(config[:'hostname'])}")
|
167
|
-
|
168
|
-
if res.exception
|
169
|
-
send_http_exception(@http_auth, res)
|
170
|
-
return
|
171
|
-
end
|
172
|
-
|
173
|
-
unless res.success?
|
174
|
-
if (400..499).include? res.status
|
175
|
-
warn "token request rejected; status=%s", res.status
|
176
|
-
@http_report = nil
|
177
|
-
end
|
178
|
-
|
179
|
-
warn "could not fetch report session token; status=%s", res.status
|
180
|
-
return
|
181
|
-
end
|
182
|
-
|
183
|
-
session = res.body['session']
|
184
|
-
tok, expires_at = session['token'], session['expires_at'] if session
|
185
|
-
|
186
|
-
if tok && expires_at
|
187
|
-
if expires_at <= now
|
188
|
-
error "token is expired: token=%s; expires_at=%s", tok, expires_at
|
189
|
-
return
|
190
|
-
end
|
191
|
-
|
192
|
-
# 30 minute buffer or split the difference
|
193
|
-
@refresh_at = expires_at - now > 3600 ?
|
194
|
-
now + ((expires_at - now) / 2) :
|
195
|
-
expires_at - 1800
|
196
|
-
|
197
|
-
@http_report = Util::HTTP.new(config, :report)
|
198
|
-
@http_report.authentication = tok
|
199
|
-
else
|
200
|
-
if @http_report
|
201
|
-
@refresh_at = now + 60
|
202
|
-
end
|
203
|
-
warn "server did not return a session token"
|
204
|
-
end
|
205
|
-
rescue Exception => e
|
206
|
-
error "exception; msg=%s; class=%s", e.message, e.class
|
207
|
-
t { e.backtrace.join("\n") }
|
208
|
-
end
|
209
|
-
|
210
|
-
def should_refresh_token?(now)
|
211
|
-
now >= @refresh_at
|
212
|
-
end
|
213
|
-
|
214
|
-
def has_report_token?(now)
|
215
|
-
return true if config.ignore_token?
|
216
|
-
return unless @http_report
|
217
|
-
now < @refresh_at + (3600 * 3 - 660)
|
218
|
-
end
|
219
|
-
|
220
|
-
def new_batch(now)
|
221
|
-
Batch.new(config, @size, round(now), @interval)
|
222
|
-
end
|
223
|
-
|
224
|
-
def round(time)
|
225
|
-
(time.to_i / @interval) * @interval
|
226
|
-
end
|
227
|
-
|
228
|
-
class Batch
|
229
|
-
include Util::Logging
|
230
|
-
|
231
|
-
attr_reader :config, :from, :counts, :sample, :flush_at
|
232
|
-
|
233
|
-
def initialize(config, size, from, interval)
|
234
|
-
@config = config
|
235
|
-
@from = from
|
236
|
-
@flush_at = from + interval
|
237
|
-
@sample = Util::UniformSample.new(size)
|
238
|
-
@counts = Hash.new(0)
|
239
|
-
end
|
240
|
-
|
241
|
-
def should_flush?(now)
|
242
|
-
return true if @config.constant_flush?
|
243
|
-
now >= @flush_at
|
244
|
-
end
|
245
|
-
|
246
|
-
def empty?
|
247
|
-
@sample.empty?
|
248
|
-
end
|
249
|
-
|
250
|
-
def push(trace)
|
251
|
-
# Count it
|
252
|
-
@counts[trace.endpoint_name] += 1
|
253
|
-
# Push the trace into the sample
|
254
|
-
@sample << trace
|
255
|
-
end
|
256
|
-
|
257
|
-
def encode
|
258
|
-
batch = Skylight::Batch.native_new(from, config[:hostname])
|
259
|
-
|
260
|
-
sample.each do |trace|
|
261
|
-
batch.native_move_in(trace.data)
|
262
|
-
end
|
263
|
-
|
264
|
-
@counts.each do |endpoint_name,count|
|
265
|
-
batch.native_set_endpoint_count(endpoint_name, count)
|
266
|
-
end
|
267
|
-
|
268
|
-
batch.native_serialize
|
269
|
-
end
|
270
|
-
end
|
271
|
-
|
272
|
-
end
|
273
|
-
end
|
274
|
-
end
|
@@ -1,87 +0,0 @@
|
|
1
|
-
module Skylight
|
2
|
-
module Worker
|
3
|
-
# Represents the IPC client connection
|
4
|
-
class Connection
|
5
|
-
FRAME_HDR_LEN = 8
|
6
|
-
|
7
|
-
attr_reader :sock, :throughput
|
8
|
-
|
9
|
-
def initialize(sock)
|
10
|
-
@sock = sock
|
11
|
-
@len = nil
|
12
|
-
@buf = ""
|
13
|
-
|
14
|
-
# Metrics
|
15
|
-
@throughput = Metrics::Meter.new
|
16
|
-
end
|
17
|
-
|
18
|
-
def read
|
19
|
-
if msg = maybe_read_message
|
20
|
-
return msg
|
21
|
-
end
|
22
|
-
|
23
|
-
if chunk = read_sock
|
24
|
-
|
25
|
-
@buf << chunk
|
26
|
-
|
27
|
-
if !@len && @buf.bytesize >= FRAME_HDR_LEN
|
28
|
-
@len = read_len
|
29
|
-
end
|
30
|
-
|
31
|
-
maybe_read_message
|
32
|
-
end
|
33
|
-
end
|
34
|
-
|
35
|
-
def cleanup
|
36
|
-
# Any cleanup code here
|
37
|
-
end
|
38
|
-
|
39
|
-
private
|
40
|
-
|
41
|
-
def read_len
|
42
|
-
if len = @buf[4, 4]
|
43
|
-
len.unpack("L")[0]
|
44
|
-
end
|
45
|
-
end
|
46
|
-
|
47
|
-
def read_message_id
|
48
|
-
if win = @buf[0, 4]
|
49
|
-
win.unpack("L")[0]
|
50
|
-
end
|
51
|
-
end
|
52
|
-
|
53
|
-
def maybe_read_message
|
54
|
-
if @len && @buf.bytesize >= @len + FRAME_HDR_LEN
|
55
|
-
mid = read_message_id
|
56
|
-
klass = Messages::ID_TO_KLASS.fetch(mid) do
|
57
|
-
raise IpcProtoError, "unknown message `#{mid}`"
|
58
|
-
end
|
59
|
-
data = @buf[FRAME_HDR_LEN, @len]
|
60
|
-
@buf = @buf[(FRAME_HDR_LEN + @len)..-1] || ""
|
61
|
-
|
62
|
-
if @buf.bytesize >= FRAME_HDR_LEN
|
63
|
-
@len = read_len
|
64
|
-
else
|
65
|
-
@len = nil
|
66
|
-
end
|
67
|
-
|
68
|
-
begin
|
69
|
-
return klass.deserialize(data)
|
70
|
-
rescue Exception => e
|
71
|
-
# reraise protobuf decoding exceptions
|
72
|
-
raise IpcProtoError, e.message
|
73
|
-
end
|
74
|
-
end
|
75
|
-
end
|
76
|
-
|
77
|
-
def read_sock
|
78
|
-
ret = @sock.read_nonblock(CHUNK_SIZE)
|
79
|
-
# Track the throughput
|
80
|
-
@throughput.mark(ret.bytesize) if ret
|
81
|
-
ret
|
82
|
-
rescue Errno::EAGAIN, Errno::EWOULDBLOCK
|
83
|
-
end
|
84
|
-
|
85
|
-
end
|
86
|
-
end
|
87
|
-
end
|
@@ -1,56 +0,0 @@
|
|
1
|
-
require 'thread'
|
2
|
-
|
3
|
-
module Skylight
|
4
|
-
module Worker
|
5
|
-
class ConnectionSet
|
6
|
-
attr_reader :open_connections, :throughput
|
7
|
-
|
8
|
-
def initialize
|
9
|
-
@connections = {}
|
10
|
-
@lock = Mutex.new
|
11
|
-
|
12
|
-
# Metrics
|
13
|
-
@open_connections = build_open_connections_metric
|
14
|
-
@throughput = build_throughput_metric
|
15
|
-
end
|
16
|
-
|
17
|
-
def add(sock)
|
18
|
-
conn = Connection.new(sock)
|
19
|
-
@lock.synchronize { @connections[sock] = conn }
|
20
|
-
conn
|
21
|
-
end
|
22
|
-
|
23
|
-
def socks
|
24
|
-
@lock.synchronize { @connections.keys }
|
25
|
-
end
|
26
|
-
|
27
|
-
def [](sock)
|
28
|
-
@lock.synchronize do
|
29
|
-
@connections[sock]
|
30
|
-
end
|
31
|
-
end
|
32
|
-
|
33
|
-
def cleanup(sock)
|
34
|
-
if conn = @lock.synchronize { @connections.delete(sock) }
|
35
|
-
conn.cleanup
|
36
|
-
sock.close rescue nil
|
37
|
-
end
|
38
|
-
end
|
39
|
-
|
40
|
-
private
|
41
|
-
|
42
|
-
def build_open_connections_metric
|
43
|
-
lambda do
|
44
|
-
@lock.synchronize { @connections.length }
|
45
|
-
end
|
46
|
-
end
|
47
|
-
|
48
|
-
def build_throughput_metric
|
49
|
-
lambda do
|
50
|
-
conns = @lock.synchronize { @connections.values }
|
51
|
-
conns.map { |c| c.throughput.rate.to_i }
|
52
|
-
end
|
53
|
-
end
|
54
|
-
end
|
55
|
-
end
|
56
|
-
end
|
@@ -1,24 +0,0 @@
|
|
1
|
-
module Skylight
|
2
|
-
module Worker
|
3
|
-
class Embedded
|
4
|
-
def initialize(collector)
|
5
|
-
@collector = collector
|
6
|
-
end
|
7
|
-
|
8
|
-
def spawn
|
9
|
-
@collector.spawn
|
10
|
-
end
|
11
|
-
|
12
|
-
def shutdown
|
13
|
-
@collector.shutdown
|
14
|
-
end
|
15
|
-
|
16
|
-
def submit(msg)
|
17
|
-
decoder = Messages::ID_TO_KLASS.fetch(Messages::KLASS_TO_ID.fetch(msg.class))
|
18
|
-
msg = decoder.deserialize(msg.serialize)
|
19
|
-
|
20
|
-
@collector.submit(msg)
|
21
|
-
end
|
22
|
-
end
|
23
|
-
end
|
24
|
-
end
|
@@ -1,104 +0,0 @@
|
|
1
|
-
require 'thread'
|
2
|
-
require 'rbconfig'
|
3
|
-
|
4
|
-
module Skylight
|
5
|
-
module Worker
|
6
|
-
class MetricsReporter < Util::Task
|
7
|
-
|
8
|
-
include Util::Logging
|
9
|
-
|
10
|
-
attr_reader :config
|
11
|
-
|
12
|
-
def initialize(config)
|
13
|
-
super(1000, 0.25)
|
14
|
-
|
15
|
-
@metrics = {}
|
16
|
-
@config = config
|
17
|
-
@interval = config[:'metrics.report_interval']
|
18
|
-
@lock = Mutex.new
|
19
|
-
@next_report_at = nil
|
20
|
-
@http_auth = Util::HTTP.new(config, :accounts)
|
21
|
-
end
|
22
|
-
|
23
|
-
# A metric responds to #call and returns metric info
|
24
|
-
def register(name, metric)
|
25
|
-
@lock.synchronize { @metrics[name] = metric }
|
26
|
-
end
|
27
|
-
|
28
|
-
def unregister(name)
|
29
|
-
@lock.synchronize { @metrics.delete(name) }
|
30
|
-
end
|
31
|
-
|
32
|
-
# msg is always nil, but we can use the Task abstraction anyway
|
33
|
-
def handle(msg, now = Util::Clock.absolute_secs)
|
34
|
-
# Initially set the next report at
|
35
|
-
unless @next_report_at
|
36
|
-
update_next_report_at(now)
|
37
|
-
return true
|
38
|
-
end
|
39
|
-
|
40
|
-
if now < @next_report_at
|
41
|
-
# Nothing to do
|
42
|
-
return true
|
43
|
-
end
|
44
|
-
|
45
|
-
update_next_report_at(now)
|
46
|
-
post_report
|
47
|
-
|
48
|
-
true
|
49
|
-
end
|
50
|
-
|
51
|
-
def build_report
|
52
|
-
report = {
|
53
|
-
"hostname" => config[:'hostname'],
|
54
|
-
"host.info" => RbConfig::CONFIG['arch'],
|
55
|
-
"ruby.version" => "#{RUBY_VERSION}-p#{RUBY_PATCHLEVEL}",
|
56
|
-
"ruby.engine" => RUBY_ENGINE,
|
57
|
-
"rails.version" => defined?(Rails) ? Rails.version : nil,
|
58
|
-
"skylight.version" => Skylight::VERSION
|
59
|
-
}
|
60
|
-
|
61
|
-
metric_names.each do |name|
|
62
|
-
# Since we are operating in a concurrent environment, it is possible
|
63
|
-
# that the metric for the current name is unregistered before we
|
64
|
-
# access it here.
|
65
|
-
unless m = metric(name)
|
66
|
-
next
|
67
|
-
end
|
68
|
-
|
69
|
-
report[name] = m.call
|
70
|
-
end
|
71
|
-
|
72
|
-
report
|
73
|
-
end
|
74
|
-
|
75
|
-
def post_report
|
76
|
-
report = build_report
|
77
|
-
|
78
|
-
# Send the report
|
79
|
-
t { fmt "reporting internal metrics; payload=%s", report.inspect }
|
80
|
-
|
81
|
-
res = @http_auth.post("/agent/metrics", report: report)
|
82
|
-
|
83
|
-
unless res.success?
|
84
|
-
warn "internal metrics report failed; status=%s", res.status
|
85
|
-
end
|
86
|
-
end
|
87
|
-
|
88
|
-
private
|
89
|
-
|
90
|
-
def metric_names
|
91
|
-
@lock.synchronize { @metrics.keys }
|
92
|
-
end
|
93
|
-
|
94
|
-
def metric(name)
|
95
|
-
@lock.synchronize { @metrics[name] }
|
96
|
-
end
|
97
|
-
|
98
|
-
def update_next_report_at(now)
|
99
|
-
@next_report_at = now + @interval
|
100
|
-
end
|
101
|
-
|
102
|
-
end
|
103
|
-
end
|
104
|
-
end
|