skylight 0.3.7 → 0.3.8
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +13 -0
- data/ext/libskylight.yml +3 -3
- data/ext/skylight_native.c +0 -41
- data/lib/skylight.rb +8 -0
- data/lib/skylight/config.rb +44 -41
- data/lib/skylight/metrics/ewma.rb +69 -0
- data/lib/skylight/metrics/meter.rb +58 -0
- data/lib/skylight/metrics/process_cpu_gauge.rb +65 -0
- data/lib/skylight/metrics/process_mem_gauge.rb +34 -0
- data/lib/skylight/util/clock.rb +13 -7
- data/lib/skylight/util/conversions.rb +9 -0
- data/lib/skylight/util/http.rb +6 -4
- data/lib/skylight/util/native_ext_fetcher.rb +1 -1
- data/lib/skylight/util/task.rb +21 -5
- data/lib/skylight/vendor/cli/highline.rb +42 -20
- data/lib/skylight/vendor/cli/highline/menu.rb +6 -23
- data/lib/skylight/vendor/cli/highline/question.rb +20 -14
- data/lib/skylight/vendor/cli/highline/string_extensions.rb +19 -39
- data/lib/skylight/vendor/cli/highline/system_extensions.rb +28 -4
- data/lib/skylight/version.rb +1 -1
- data/lib/skylight/worker.rb +8 -6
- data/lib/skylight/worker/builder.rb +1 -1
- data/lib/skylight/worker/collector.rb +57 -16
- data/lib/skylight/worker/connection.rb +14 -2
- data/lib/skylight/worker/connection_set.rb +56 -0
- data/lib/skylight/worker/metrics_reporter.rb +103 -0
- data/lib/skylight/worker/server.rb +35 -36
- metadata +23 -15
data/lib/skylight/version.rb
CHANGED
data/lib/skylight/worker.rb
CHANGED
@@ -5,12 +5,14 @@ module Skylight
|
|
5
5
|
CHUNK_SIZE = 16 * 1024
|
6
6
|
|
7
7
|
# === Modules
|
8
|
-
autoload :Builder,
|
9
|
-
autoload :Collector,
|
10
|
-
autoload :Connection,
|
11
|
-
autoload :
|
12
|
-
autoload :
|
13
|
-
autoload :
|
8
|
+
autoload :Builder, 'skylight/worker/builder'
|
9
|
+
autoload :Collector, 'skylight/worker/collector'
|
10
|
+
autoload :Connection, 'skylight/worker/connection'
|
11
|
+
autoload :ConnectionSet, 'skylight/worker/connection_set'
|
12
|
+
autoload :Embedded, 'skylight/worker/embedded'
|
13
|
+
autoload :MetricsReporter, 'skylight/worker/metrics_reporter'
|
14
|
+
autoload :Server, 'skylight/worker/server'
|
15
|
+
autoload :Standalone, 'skylight/worker/standalone'
|
14
16
|
|
15
17
|
end
|
16
18
|
end
|
@@ -11,23 +11,39 @@ module Skylight
|
|
11
11
|
|
12
12
|
include Util::Logging
|
13
13
|
|
14
|
-
attr_reader :config
|
14
|
+
attr_reader :config, :metrics_reporter
|
15
15
|
|
16
|
-
def initialize(config)
|
16
|
+
def initialize(config, metrics_reporter = nil)
|
17
17
|
super(1000, 0.25)
|
18
18
|
|
19
|
-
@config
|
20
|
-
@size
|
21
|
-
@batch
|
22
|
-
@interval
|
23
|
-
@refresh_at
|
24
|
-
@http_auth
|
19
|
+
@config = config
|
20
|
+
@size = config[:'agent.sample']
|
21
|
+
@batch = nil
|
22
|
+
@interval = config[:'agent.interval']
|
23
|
+
@refresh_at = 0
|
24
|
+
@http_auth = Util::HTTP.new(config, :accounts)
|
25
25
|
@http_report = nil
|
26
|
-
|
26
|
+
@report_meter = Metrics::Meter.new
|
27
|
+
@report_success_meter = Metrics::Meter.new
|
28
|
+
@metrics_reporter = metrics_reporter
|
29
|
+
|
30
|
+
@metrics_reporter.register("collector.report-rate", @report_meter)
|
31
|
+
@metrics_reporter.register("collector.report-success-rate", @report_success_meter)
|
27
32
|
|
28
33
|
t { fmt "starting collector; interval=%d; size=%d", @interval, @size }
|
29
34
|
end
|
30
35
|
|
36
|
+
def self.build(config)
|
37
|
+
new(config, MetricsReporter.new(config))
|
38
|
+
end
|
39
|
+
|
40
|
+
def prepare
|
41
|
+
if @metrics_reporter
|
42
|
+
@metrics_reporter.register("worker.collector.queue-depth", queue_depth_metric)
|
43
|
+
@metrics_reporter.spawn
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
31
47
|
def handle(msg, now = Util::Clock.absolute_secs)
|
32
48
|
@batch ||= new_batch(now)
|
33
49
|
|
@@ -61,15 +77,23 @@ module Skylight
|
|
61
77
|
true
|
62
78
|
end
|
63
79
|
|
64
|
-
def
|
65
|
-
|
80
|
+
def send_http_exception(http, response)
|
81
|
+
send_exception(response.exception, additional_info: {
|
82
|
+
host: http.host,
|
83
|
+
port: http.port,
|
84
|
+
path: response.request.path,
|
85
|
+
method: response.request.method
|
86
|
+
})
|
66
87
|
end
|
67
88
|
|
68
|
-
def send_exception(exception)
|
69
|
-
data = {class_name: exception.class.name
|
89
|
+
def send_exception(exception, data={})
|
90
|
+
data = { class_name: exception.class.name,
|
91
|
+
agent_info: @metrics_reporter.build_report }.merge(data)
|
92
|
+
|
70
93
|
if Exception === exception
|
71
94
|
data.merge!(message: exception.message, backtrace: exception.backtrace)
|
72
95
|
end
|
96
|
+
|
73
97
|
post_data(:exception, data, false)
|
74
98
|
end
|
75
99
|
|
@@ -84,7 +108,12 @@ module Skylight
|
|
84
108
|
warn "#{type} wasn't sent successfully; status=%s", res.status
|
85
109
|
end
|
86
110
|
|
87
|
-
|
111
|
+
if res.exception
|
112
|
+
send_http_exception(@http_auth, res) if notify
|
113
|
+
false
|
114
|
+
else
|
115
|
+
true
|
116
|
+
end
|
88
117
|
rescue Exception => e
|
89
118
|
error "exception; msg=%s; class=%s", e.message, e.class
|
90
119
|
t { e.backtrace.join("\n") }
|
@@ -109,6 +138,10 @@ module Skylight
|
|
109
138
|
end
|
110
139
|
|
111
140
|
@batch = nil
|
141
|
+
ensure
|
142
|
+
if @metrics_reporter
|
143
|
+
@metrics_reporter.shutdown
|
144
|
+
end
|
112
145
|
end
|
113
146
|
|
114
147
|
def flush(batch)
|
@@ -116,8 +149,16 @@ module Skylight
|
|
116
149
|
|
117
150
|
debug "flushing batch; size=%d", batch.sample.count
|
118
151
|
|
152
|
+
@report_meter.mark
|
153
|
+
|
119
154
|
res = @http_report.post(ENDPOINT, batch.encode, CONTENT_TYPE => SKYLIGHT_V2)
|
120
|
-
|
155
|
+
|
156
|
+
if res.exception
|
157
|
+
send_http_exception(@http_report, res)
|
158
|
+
else
|
159
|
+
@report_success_meter.mark
|
160
|
+
end
|
161
|
+
|
121
162
|
nil
|
122
163
|
end
|
123
164
|
|
@@ -125,7 +166,7 @@ module Skylight
|
|
125
166
|
res = @http_auth.get("/agent/authenticate?hostname=#{escape(config[:'hostname'])}")
|
126
167
|
|
127
168
|
if res.exception
|
128
|
-
|
169
|
+
send_http_exception(@http_auth, res)
|
129
170
|
return
|
130
171
|
end
|
131
172
|
|
@@ -1,14 +1,18 @@
|
|
1
1
|
module Skylight
|
2
2
|
module Worker
|
3
|
+
# Represents the IPC client connection
|
3
4
|
class Connection
|
4
5
|
FRAME_HDR_LEN = 8
|
5
6
|
|
6
|
-
attr_reader :sock
|
7
|
+
attr_reader :sock, :throughput
|
7
8
|
|
8
9
|
def initialize(sock)
|
9
10
|
@sock = sock
|
10
11
|
@len = nil
|
11
12
|
@buf = ""
|
13
|
+
|
14
|
+
# Metrics
|
15
|
+
@throughput = Metrics::Meter.new
|
12
16
|
end
|
13
17
|
|
14
18
|
def read
|
@@ -17,6 +21,7 @@ module Skylight
|
|
17
21
|
end
|
18
22
|
|
19
23
|
if chunk = read_sock
|
24
|
+
|
20
25
|
@buf << chunk
|
21
26
|
|
22
27
|
if !@len && @buf.bytesize >= FRAME_HDR_LEN
|
@@ -27,6 +32,10 @@ module Skylight
|
|
27
32
|
end
|
28
33
|
end
|
29
34
|
|
35
|
+
def cleanup
|
36
|
+
# Any cleanup code here
|
37
|
+
end
|
38
|
+
|
30
39
|
private
|
31
40
|
|
32
41
|
def read_len
|
@@ -66,7 +75,10 @@ module Skylight
|
|
66
75
|
end
|
67
76
|
|
68
77
|
def read_sock
|
69
|
-
@sock.read_nonblock(CHUNK_SIZE)
|
78
|
+
ret = @sock.read_nonblock(CHUNK_SIZE)
|
79
|
+
# Track the throughput
|
80
|
+
@throughput.mark(ret.bytesize) if ret
|
81
|
+
ret
|
70
82
|
rescue Errno::EAGAIN, Errno::EWOULDBLOCK
|
71
83
|
end
|
72
84
|
|
@@ -0,0 +1,56 @@
|
|
1
|
+
require 'thread'
|
2
|
+
|
3
|
+
module Skylight
|
4
|
+
module Worker
|
5
|
+
class ConnectionSet
|
6
|
+
attr_reader :open_connections, :throughput
|
7
|
+
|
8
|
+
def initialize
|
9
|
+
@connections = {}
|
10
|
+
@lock = Mutex.new
|
11
|
+
|
12
|
+
# Metrics
|
13
|
+
@open_connections = build_open_connections_metric
|
14
|
+
@throughput = build_throughput_metric
|
15
|
+
end
|
16
|
+
|
17
|
+
def add(sock)
|
18
|
+
conn = Connection.new(sock)
|
19
|
+
@lock.synchronize { @connections[sock] = conn }
|
20
|
+
conn
|
21
|
+
end
|
22
|
+
|
23
|
+
def socks
|
24
|
+
@lock.synchronize { @connections.keys }
|
25
|
+
end
|
26
|
+
|
27
|
+
def [](sock)
|
28
|
+
@lock.synchronize do
|
29
|
+
@connections[sock]
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
def cleanup(sock)
|
34
|
+
if conn = @lock.synchronize { @connections.delete(sock) }
|
35
|
+
conn.cleanup
|
36
|
+
sock.close rescue nil
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
40
|
+
private
|
41
|
+
|
42
|
+
def build_open_connections_metric
|
43
|
+
lambda do
|
44
|
+
@lock.synchronize { @connections.length }
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
def build_throughput_metric
|
49
|
+
lambda do
|
50
|
+
conns = @lock.synchronize { @connections.values }
|
51
|
+
conns.map { |c| c.throughput.rate.to_i }
|
52
|
+
end
|
53
|
+
end
|
54
|
+
end
|
55
|
+
end
|
56
|
+
end
|
@@ -0,0 +1,103 @@
|
|
1
|
+
require 'thread'
|
2
|
+
require 'rbconfig'
|
3
|
+
|
4
|
+
module Skylight
|
5
|
+
module Worker
|
6
|
+
class MetricsReporter < Util::Task
|
7
|
+
|
8
|
+
include Util::Logging
|
9
|
+
|
10
|
+
attr_reader :config
|
11
|
+
|
12
|
+
def initialize(config)
|
13
|
+
super(1000, 0.25)
|
14
|
+
|
15
|
+
@metrics = {}
|
16
|
+
@config = config
|
17
|
+
@interval = config[:'metrics.report_interval']
|
18
|
+
@lock = Mutex.new
|
19
|
+
@next_report_at = nil
|
20
|
+
@http_auth = Util::HTTP.new(config, :accounts)
|
21
|
+
end
|
22
|
+
|
23
|
+
# A metric responds to #call and returns metric info
|
24
|
+
def register(name, metric)
|
25
|
+
@lock.synchronize { @metrics[name] = metric }
|
26
|
+
end
|
27
|
+
|
28
|
+
def unregister(name)
|
29
|
+
@lock.synchronize { @metrics.delete(name) }
|
30
|
+
end
|
31
|
+
|
32
|
+
# msg is always nil, but we can use the Task abstraction anyway
|
33
|
+
def handle(msg, now = Util::Clock.absolute_secs)
|
34
|
+
# Initially set the next report at
|
35
|
+
unless @next_report_at
|
36
|
+
update_next_report_at(now)
|
37
|
+
return true
|
38
|
+
end
|
39
|
+
|
40
|
+
if now < @next_report_at
|
41
|
+
# Nothing to do
|
42
|
+
return true
|
43
|
+
end
|
44
|
+
|
45
|
+
update_next_report_at(now)
|
46
|
+
post_report
|
47
|
+
|
48
|
+
true
|
49
|
+
end
|
50
|
+
|
51
|
+
def build_report
|
52
|
+
report = {
|
53
|
+
"hostname" => config[:'hostname'],
|
54
|
+
"host.info" => RbConfig::CONFIG['arch'],
|
55
|
+
"ruby.version" => RUBY_VERSION,
|
56
|
+
"ruby.engine" => RUBY_ENGINE,
|
57
|
+
"skylight.version" => Skylight::VERSION
|
58
|
+
}
|
59
|
+
|
60
|
+
metric_names.each do |name|
|
61
|
+
# Since we are operating in a concurrent environment, it is possible
|
62
|
+
# that the metric for the current name is unregistered before we
|
63
|
+
# access it here.
|
64
|
+
unless m = metric(name)
|
65
|
+
next
|
66
|
+
end
|
67
|
+
|
68
|
+
report[name] = m.call
|
69
|
+
end
|
70
|
+
|
71
|
+
report
|
72
|
+
end
|
73
|
+
|
74
|
+
def post_report
|
75
|
+
report = build_report
|
76
|
+
|
77
|
+
# Send the report
|
78
|
+
t { fmt "reporting internal metrics; payload=%s", report.inspect }
|
79
|
+
|
80
|
+
res = @http_auth.post("/agent/metrics", report: report)
|
81
|
+
|
82
|
+
unless res.success?
|
83
|
+
warn "internal metrics report failed; status=%s", res.status
|
84
|
+
end
|
85
|
+
end
|
86
|
+
|
87
|
+
private
|
88
|
+
|
89
|
+
def metric_names
|
90
|
+
@lock.synchronize { @metrics.keys }
|
91
|
+
end
|
92
|
+
|
93
|
+
def metric(name)
|
94
|
+
@lock.synchronize { @metrics[name] }
|
95
|
+
end
|
96
|
+
|
97
|
+
def update_next_report_at(now)
|
98
|
+
@next_report_at = now + @interval
|
99
|
+
end
|
100
|
+
|
101
|
+
end
|
102
|
+
end
|
103
|
+
end
|
@@ -17,7 +17,6 @@ module Skylight
|
|
17
17
|
:keepalive,
|
18
18
|
:lockfile_path,
|
19
19
|
:sockfile_path,
|
20
|
-
:status_interval,
|
21
20
|
:last_status_update,
|
22
21
|
:max_memory
|
23
22
|
|
@@ -26,20 +25,23 @@ module Skylight
|
|
26
25
|
raise ArgumentError, "lockfile and unix domain server socket are required"
|
27
26
|
end
|
28
27
|
|
29
|
-
@pid
|
30
|
-
@run
|
31
|
-
@tick
|
32
|
-
@socks
|
33
|
-
@config
|
34
|
-
@server
|
35
|
-
@lockfile
|
36
|
-
@collector
|
37
|
-
@
|
38
|
-
@
|
28
|
+
@pid = Process.pid
|
29
|
+
@run = true
|
30
|
+
@tick = 1
|
31
|
+
@socks = []
|
32
|
+
@config = config
|
33
|
+
@server = srv
|
34
|
+
@lockfile = lockfile
|
35
|
+
@collector = Collector.build(config)
|
36
|
+
@metrics_reporter = @collector.metrics_reporter
|
37
|
+
@keepalive = @config[:'agent.keepalive']
|
38
|
+
@connections = ConnectionSet.new
|
39
39
|
@lockfile_path = lockfile_path
|
40
40
|
@sockfile_path = @config[:'agent.sockfile_path']
|
41
|
-
@
|
42
|
-
@
|
41
|
+
@process_mem_gauge = Metrics::ProcessMemGauge.new
|
42
|
+
@process_cpu_gauge = Metrics::ProcessCpuGauge.new
|
43
|
+
@max_memory = @config[:'agent.max_memory']
|
44
|
+
@booted_at = Util::Clock.absolute_secs
|
43
45
|
end
|
44
46
|
|
45
47
|
# Called from skylight.rb on require
|
@@ -122,9 +124,21 @@ module Skylight
|
|
122
124
|
private
|
123
125
|
|
124
126
|
def init
|
127
|
+
# TODO: Not super ideal to always iterate here even if debug mode isn't
|
128
|
+
# enabled, but it's not super perf critical. We will fix when we revamp
|
129
|
+
# logging
|
130
|
+
debug "initializing server; config=%s", config.to_env
|
131
|
+
|
125
132
|
trap('TERM') { @run = false }
|
126
133
|
trap('INT') { @run = false }
|
127
134
|
|
135
|
+
# Register metrics
|
136
|
+
@metrics_reporter.register("worker.memory", @process_mem_gauge)
|
137
|
+
@metrics_reporter.register("worker.cpu", @process_cpu_gauge)
|
138
|
+
@metrics_reporter.register("worker.uptime", lambda { Util::Clock.absolute_secs - @booted_at })
|
139
|
+
@metrics_reporter.register("worker.ipc.open-connections", @connections.open_connections)
|
140
|
+
@metrics_reporter.register("worker.ipc.throughput", @connections.throughput)
|
141
|
+
|
128
142
|
info "starting skylight daemon"
|
129
143
|
@collector.spawn
|
130
144
|
end
|
@@ -190,9 +204,8 @@ module Skylight
|
|
190
204
|
sanity_check
|
191
205
|
end
|
192
206
|
|
193
|
-
if
|
194
|
-
|
195
|
-
status_check
|
207
|
+
if @process_mem_gauge.call > max_memory
|
208
|
+
raise WorkerStateError, "Memory limit exceeded: #{memory_usage} (max: #{max_memory})"
|
196
209
|
end
|
197
210
|
end
|
198
211
|
|
@@ -213,6 +226,9 @@ module Skylight
|
|
213
226
|
end while @run
|
214
227
|
|
215
228
|
true # Successful return
|
229
|
+
ensure
|
230
|
+
# Send a final metrics report
|
231
|
+
@metrics_reporter.post_report
|
216
232
|
end
|
217
233
|
|
218
234
|
# Handles an incoming message. Will be instances from
|
@@ -254,7 +270,7 @@ module Skylight
|
|
254
270
|
def connect(sock)
|
255
271
|
trace "client accepted"
|
256
272
|
@socks << sock
|
257
|
-
@connections
|
273
|
+
@connections.add(sock)
|
258
274
|
end
|
259
275
|
|
260
276
|
def cleanup
|
@@ -272,16 +288,15 @@ module Skylight
|
|
272
288
|
end
|
273
289
|
|
274
290
|
def clients_close
|
275
|
-
@connections.
|
291
|
+
@connections.socks.each do |sock|
|
276
292
|
client_close(sock)
|
277
293
|
end
|
278
294
|
end
|
279
295
|
|
280
296
|
def client_close(sock)
|
281
297
|
trace "closing client connection; fd=%d", sock.fileno
|
282
|
-
@connections.
|
298
|
+
@connections.cleanup(sock)
|
283
299
|
@socks.delete(sock)
|
284
|
-
sock.close rescue nil
|
285
300
|
end
|
286
301
|
|
287
302
|
def sockfile
|
@@ -315,22 +330,6 @@ module Skylight
|
|
315
330
|
raise WorkerStateError, "sockfile gone"
|
316
331
|
end
|
317
332
|
end
|
318
|
-
|
319
|
-
def status_check
|
320
|
-
memory_usage = get_memory_usage
|
321
|
-
|
322
|
-
@collector.send_status(memory: memory_usage, max_memory: max_memory)
|
323
|
-
|
324
|
-
if memory_usage > max_memory
|
325
|
-
raise WorkerStateError, "Memory limit exceeded: #{memory_usage} (max: #{max_memory})"
|
326
|
-
end
|
327
|
-
end
|
328
|
-
|
329
|
-
def get_memory_usage
|
330
|
-
`ps -o rss= -p #{Process.pid}`.to_i / 1024
|
331
|
-
rescue Errno::ENOENT, Errno::EINTR
|
332
|
-
0
|
333
|
-
end
|
334
333
|
end
|
335
334
|
end
|
336
335
|
end
|