skylight 0.3.7 → 0.3.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +13 -0
- data/ext/libskylight.yml +3 -3
- data/ext/skylight_native.c +0 -41
- data/lib/skylight.rb +8 -0
- data/lib/skylight/config.rb +44 -41
- data/lib/skylight/metrics/ewma.rb +69 -0
- data/lib/skylight/metrics/meter.rb +58 -0
- data/lib/skylight/metrics/process_cpu_gauge.rb +65 -0
- data/lib/skylight/metrics/process_mem_gauge.rb +34 -0
- data/lib/skylight/util/clock.rb +13 -7
- data/lib/skylight/util/conversions.rb +9 -0
- data/lib/skylight/util/http.rb +6 -4
- data/lib/skylight/util/native_ext_fetcher.rb +1 -1
- data/lib/skylight/util/task.rb +21 -5
- data/lib/skylight/vendor/cli/highline.rb +42 -20
- data/lib/skylight/vendor/cli/highline/menu.rb +6 -23
- data/lib/skylight/vendor/cli/highline/question.rb +20 -14
- data/lib/skylight/vendor/cli/highline/string_extensions.rb +19 -39
- data/lib/skylight/vendor/cli/highline/system_extensions.rb +28 -4
- data/lib/skylight/version.rb +1 -1
- data/lib/skylight/worker.rb +8 -6
- data/lib/skylight/worker/builder.rb +1 -1
- data/lib/skylight/worker/collector.rb +57 -16
- data/lib/skylight/worker/connection.rb +14 -2
- data/lib/skylight/worker/connection_set.rb +56 -0
- data/lib/skylight/worker/metrics_reporter.rb +103 -0
- data/lib/skylight/worker/server.rb +35 -36
- metadata +23 -15
data/lib/skylight/version.rb
CHANGED
data/lib/skylight/worker.rb
CHANGED
@@ -5,12 +5,14 @@ module Skylight
|
|
5
5
|
CHUNK_SIZE = 16 * 1024
|
6
6
|
|
7
7
|
# === Modules
|
8
|
-
autoload :Builder,
|
9
|
-
autoload :Collector,
|
10
|
-
autoload :Connection,
|
11
|
-
autoload :
|
12
|
-
autoload :
|
13
|
-
autoload :
|
8
|
+
autoload :Builder, 'skylight/worker/builder'
|
9
|
+
autoload :Collector, 'skylight/worker/collector'
|
10
|
+
autoload :Connection, 'skylight/worker/connection'
|
11
|
+
autoload :ConnectionSet, 'skylight/worker/connection_set'
|
12
|
+
autoload :Embedded, 'skylight/worker/embedded'
|
13
|
+
autoload :MetricsReporter, 'skylight/worker/metrics_reporter'
|
14
|
+
autoload :Server, 'skylight/worker/server'
|
15
|
+
autoload :Standalone, 'skylight/worker/standalone'
|
14
16
|
|
15
17
|
end
|
16
18
|
end
|
@@ -11,23 +11,39 @@ module Skylight
|
|
11
11
|
|
12
12
|
include Util::Logging
|
13
13
|
|
14
|
-
attr_reader :config
|
14
|
+
attr_reader :config, :metrics_reporter
|
15
15
|
|
16
|
-
def initialize(config)
|
16
|
+
def initialize(config, metrics_reporter = nil)
|
17
17
|
super(1000, 0.25)
|
18
18
|
|
19
|
-
@config
|
20
|
-
@size
|
21
|
-
@batch
|
22
|
-
@interval
|
23
|
-
@refresh_at
|
24
|
-
@http_auth
|
19
|
+
@config = config
|
20
|
+
@size = config[:'agent.sample']
|
21
|
+
@batch = nil
|
22
|
+
@interval = config[:'agent.interval']
|
23
|
+
@refresh_at = 0
|
24
|
+
@http_auth = Util::HTTP.new(config, :accounts)
|
25
25
|
@http_report = nil
|
26
|
-
|
26
|
+
@report_meter = Metrics::Meter.new
|
27
|
+
@report_success_meter = Metrics::Meter.new
|
28
|
+
@metrics_reporter = metrics_reporter
|
29
|
+
|
30
|
+
@metrics_reporter.register("collector.report-rate", @report_meter)
|
31
|
+
@metrics_reporter.register("collector.report-success-rate", @report_success_meter)
|
27
32
|
|
28
33
|
t { fmt "starting collector; interval=%d; size=%d", @interval, @size }
|
29
34
|
end
|
30
35
|
|
36
|
+
def self.build(config)
|
37
|
+
new(config, MetricsReporter.new(config))
|
38
|
+
end
|
39
|
+
|
40
|
+
def prepare
|
41
|
+
if @metrics_reporter
|
42
|
+
@metrics_reporter.register("worker.collector.queue-depth", queue_depth_metric)
|
43
|
+
@metrics_reporter.spawn
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
31
47
|
def handle(msg, now = Util::Clock.absolute_secs)
|
32
48
|
@batch ||= new_batch(now)
|
33
49
|
|
@@ -61,15 +77,23 @@ module Skylight
|
|
61
77
|
true
|
62
78
|
end
|
63
79
|
|
64
|
-
def
|
65
|
-
|
80
|
+
def send_http_exception(http, response)
|
81
|
+
send_exception(response.exception, additional_info: {
|
82
|
+
host: http.host,
|
83
|
+
port: http.port,
|
84
|
+
path: response.request.path,
|
85
|
+
method: response.request.method
|
86
|
+
})
|
66
87
|
end
|
67
88
|
|
68
|
-
def send_exception(exception)
|
69
|
-
data = {class_name: exception.class.name
|
89
|
+
def send_exception(exception, data={})
|
90
|
+
data = { class_name: exception.class.name,
|
91
|
+
agent_info: @metrics_reporter.build_report }.merge(data)
|
92
|
+
|
70
93
|
if Exception === exception
|
71
94
|
data.merge!(message: exception.message, backtrace: exception.backtrace)
|
72
95
|
end
|
96
|
+
|
73
97
|
post_data(:exception, data, false)
|
74
98
|
end
|
75
99
|
|
@@ -84,7 +108,12 @@ module Skylight
|
|
84
108
|
warn "#{type} wasn't sent successfully; status=%s", res.status
|
85
109
|
end
|
86
110
|
|
87
|
-
|
111
|
+
if res.exception
|
112
|
+
send_http_exception(@http_auth, res) if notify
|
113
|
+
false
|
114
|
+
else
|
115
|
+
true
|
116
|
+
end
|
88
117
|
rescue Exception => e
|
89
118
|
error "exception; msg=%s; class=%s", e.message, e.class
|
90
119
|
t { e.backtrace.join("\n") }
|
@@ -109,6 +138,10 @@ module Skylight
|
|
109
138
|
end
|
110
139
|
|
111
140
|
@batch = nil
|
141
|
+
ensure
|
142
|
+
if @metrics_reporter
|
143
|
+
@metrics_reporter.shutdown
|
144
|
+
end
|
112
145
|
end
|
113
146
|
|
114
147
|
def flush(batch)
|
@@ -116,8 +149,16 @@ module Skylight
|
|
116
149
|
|
117
150
|
debug "flushing batch; size=%d", batch.sample.count
|
118
151
|
|
152
|
+
@report_meter.mark
|
153
|
+
|
119
154
|
res = @http_report.post(ENDPOINT, batch.encode, CONTENT_TYPE => SKYLIGHT_V2)
|
120
|
-
|
155
|
+
|
156
|
+
if res.exception
|
157
|
+
send_http_exception(@http_report, res)
|
158
|
+
else
|
159
|
+
@report_success_meter.mark
|
160
|
+
end
|
161
|
+
|
121
162
|
nil
|
122
163
|
end
|
123
164
|
|
@@ -125,7 +166,7 @@ module Skylight
|
|
125
166
|
res = @http_auth.get("/agent/authenticate?hostname=#{escape(config[:'hostname'])}")
|
126
167
|
|
127
168
|
if res.exception
|
128
|
-
|
169
|
+
send_http_exception(@http_auth, res)
|
129
170
|
return
|
130
171
|
end
|
131
172
|
|
@@ -1,14 +1,18 @@
|
|
1
1
|
module Skylight
|
2
2
|
module Worker
|
3
|
+
# Represents the IPC client connection
|
3
4
|
class Connection
|
4
5
|
FRAME_HDR_LEN = 8
|
5
6
|
|
6
|
-
attr_reader :sock
|
7
|
+
attr_reader :sock, :throughput
|
7
8
|
|
8
9
|
def initialize(sock)
|
9
10
|
@sock = sock
|
10
11
|
@len = nil
|
11
12
|
@buf = ""
|
13
|
+
|
14
|
+
# Metrics
|
15
|
+
@throughput = Metrics::Meter.new
|
12
16
|
end
|
13
17
|
|
14
18
|
def read
|
@@ -17,6 +21,7 @@ module Skylight
|
|
17
21
|
end
|
18
22
|
|
19
23
|
if chunk = read_sock
|
24
|
+
|
20
25
|
@buf << chunk
|
21
26
|
|
22
27
|
if !@len && @buf.bytesize >= FRAME_HDR_LEN
|
@@ -27,6 +32,10 @@ module Skylight
|
|
27
32
|
end
|
28
33
|
end
|
29
34
|
|
35
|
+
def cleanup
|
36
|
+
# Any cleanup code here
|
37
|
+
end
|
38
|
+
|
30
39
|
private
|
31
40
|
|
32
41
|
def read_len
|
@@ -66,7 +75,10 @@ module Skylight
|
|
66
75
|
end
|
67
76
|
|
68
77
|
def read_sock
|
69
|
-
@sock.read_nonblock(CHUNK_SIZE)
|
78
|
+
ret = @sock.read_nonblock(CHUNK_SIZE)
|
79
|
+
# Track the throughput
|
80
|
+
@throughput.mark(ret.bytesize) if ret
|
81
|
+
ret
|
70
82
|
rescue Errno::EAGAIN, Errno::EWOULDBLOCK
|
71
83
|
end
|
72
84
|
|
@@ -0,0 +1,56 @@
|
|
1
|
+
require 'thread'
|
2
|
+
|
3
|
+
module Skylight
|
4
|
+
module Worker
|
5
|
+
class ConnectionSet
|
6
|
+
attr_reader :open_connections, :throughput
|
7
|
+
|
8
|
+
def initialize
|
9
|
+
@connections = {}
|
10
|
+
@lock = Mutex.new
|
11
|
+
|
12
|
+
# Metrics
|
13
|
+
@open_connections = build_open_connections_metric
|
14
|
+
@throughput = build_throughput_metric
|
15
|
+
end
|
16
|
+
|
17
|
+
def add(sock)
|
18
|
+
conn = Connection.new(sock)
|
19
|
+
@lock.synchronize { @connections[sock] = conn }
|
20
|
+
conn
|
21
|
+
end
|
22
|
+
|
23
|
+
def socks
|
24
|
+
@lock.synchronize { @connections.keys }
|
25
|
+
end
|
26
|
+
|
27
|
+
def [](sock)
|
28
|
+
@lock.synchronize do
|
29
|
+
@connections[sock]
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
def cleanup(sock)
|
34
|
+
if conn = @lock.synchronize { @connections.delete(sock) }
|
35
|
+
conn.cleanup
|
36
|
+
sock.close rescue nil
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
40
|
+
private
|
41
|
+
|
42
|
+
def build_open_connections_metric
|
43
|
+
lambda do
|
44
|
+
@lock.synchronize { @connections.length }
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
def build_throughput_metric
|
49
|
+
lambda do
|
50
|
+
conns = @lock.synchronize { @connections.values }
|
51
|
+
conns.map { |c| c.throughput.rate.to_i }
|
52
|
+
end
|
53
|
+
end
|
54
|
+
end
|
55
|
+
end
|
56
|
+
end
|
@@ -0,0 +1,103 @@
|
|
1
|
+
require 'thread'
|
2
|
+
require 'rbconfig'
|
3
|
+
|
4
|
+
module Skylight
|
5
|
+
module Worker
|
6
|
+
class MetricsReporter < Util::Task
|
7
|
+
|
8
|
+
include Util::Logging
|
9
|
+
|
10
|
+
attr_reader :config
|
11
|
+
|
12
|
+
def initialize(config)
|
13
|
+
super(1000, 0.25)
|
14
|
+
|
15
|
+
@metrics = {}
|
16
|
+
@config = config
|
17
|
+
@interval = config[:'metrics.report_interval']
|
18
|
+
@lock = Mutex.new
|
19
|
+
@next_report_at = nil
|
20
|
+
@http_auth = Util::HTTP.new(config, :accounts)
|
21
|
+
end
|
22
|
+
|
23
|
+
# A metric responds to #call and returns metric info
|
24
|
+
def register(name, metric)
|
25
|
+
@lock.synchronize { @metrics[name] = metric }
|
26
|
+
end
|
27
|
+
|
28
|
+
def unregister(name)
|
29
|
+
@lock.synchronize { @metrics.delete(name) }
|
30
|
+
end
|
31
|
+
|
32
|
+
# msg is always nil, but we can use the Task abstraction anyway
|
33
|
+
def handle(msg, now = Util::Clock.absolute_secs)
|
34
|
+
# Initially set the next report at
|
35
|
+
unless @next_report_at
|
36
|
+
update_next_report_at(now)
|
37
|
+
return true
|
38
|
+
end
|
39
|
+
|
40
|
+
if now < @next_report_at
|
41
|
+
# Nothing to do
|
42
|
+
return true
|
43
|
+
end
|
44
|
+
|
45
|
+
update_next_report_at(now)
|
46
|
+
post_report
|
47
|
+
|
48
|
+
true
|
49
|
+
end
|
50
|
+
|
51
|
+
def build_report
|
52
|
+
report = {
|
53
|
+
"hostname" => config[:'hostname'],
|
54
|
+
"host.info" => RbConfig::CONFIG['arch'],
|
55
|
+
"ruby.version" => RUBY_VERSION,
|
56
|
+
"ruby.engine" => RUBY_ENGINE,
|
57
|
+
"skylight.version" => Skylight::VERSION
|
58
|
+
}
|
59
|
+
|
60
|
+
metric_names.each do |name|
|
61
|
+
# Since we are operating in a concurrent environment, it is possible
|
62
|
+
# that the metric for the current name is unregistered before we
|
63
|
+
# access it here.
|
64
|
+
unless m = metric(name)
|
65
|
+
next
|
66
|
+
end
|
67
|
+
|
68
|
+
report[name] = m.call
|
69
|
+
end
|
70
|
+
|
71
|
+
report
|
72
|
+
end
|
73
|
+
|
74
|
+
def post_report
|
75
|
+
report = build_report
|
76
|
+
|
77
|
+
# Send the report
|
78
|
+
t { fmt "reporting internal metrics; payload=%s", report.inspect }
|
79
|
+
|
80
|
+
res = @http_auth.post("/agent/metrics", report: report)
|
81
|
+
|
82
|
+
unless res.success?
|
83
|
+
warn "internal metrics report failed; status=%s", res.status
|
84
|
+
end
|
85
|
+
end
|
86
|
+
|
87
|
+
private
|
88
|
+
|
89
|
+
def metric_names
|
90
|
+
@lock.synchronize { @metrics.keys }
|
91
|
+
end
|
92
|
+
|
93
|
+
def metric(name)
|
94
|
+
@lock.synchronize { @metrics[name] }
|
95
|
+
end
|
96
|
+
|
97
|
+
def update_next_report_at(now)
|
98
|
+
@next_report_at = now + @interval
|
99
|
+
end
|
100
|
+
|
101
|
+
end
|
102
|
+
end
|
103
|
+
end
|
@@ -17,7 +17,6 @@ module Skylight
|
|
17
17
|
:keepalive,
|
18
18
|
:lockfile_path,
|
19
19
|
:sockfile_path,
|
20
|
-
:status_interval,
|
21
20
|
:last_status_update,
|
22
21
|
:max_memory
|
23
22
|
|
@@ -26,20 +25,23 @@ module Skylight
|
|
26
25
|
raise ArgumentError, "lockfile and unix domain server socket are required"
|
27
26
|
end
|
28
27
|
|
29
|
-
@pid
|
30
|
-
@run
|
31
|
-
@tick
|
32
|
-
@socks
|
33
|
-
@config
|
34
|
-
@server
|
35
|
-
@lockfile
|
36
|
-
@collector
|
37
|
-
@
|
38
|
-
@
|
28
|
+
@pid = Process.pid
|
29
|
+
@run = true
|
30
|
+
@tick = 1
|
31
|
+
@socks = []
|
32
|
+
@config = config
|
33
|
+
@server = srv
|
34
|
+
@lockfile = lockfile
|
35
|
+
@collector = Collector.build(config)
|
36
|
+
@metrics_reporter = @collector.metrics_reporter
|
37
|
+
@keepalive = @config[:'agent.keepalive']
|
38
|
+
@connections = ConnectionSet.new
|
39
39
|
@lockfile_path = lockfile_path
|
40
40
|
@sockfile_path = @config[:'agent.sockfile_path']
|
41
|
-
@
|
42
|
-
@
|
41
|
+
@process_mem_gauge = Metrics::ProcessMemGauge.new
|
42
|
+
@process_cpu_gauge = Metrics::ProcessCpuGauge.new
|
43
|
+
@max_memory = @config[:'agent.max_memory']
|
44
|
+
@booted_at = Util::Clock.absolute_secs
|
43
45
|
end
|
44
46
|
|
45
47
|
# Called from skylight.rb on require
|
@@ -122,9 +124,21 @@ module Skylight
|
|
122
124
|
private
|
123
125
|
|
124
126
|
def init
|
127
|
+
# TODO: Not super ideal to always iterate here even if debug mode isn't
|
128
|
+
# enabled, but it's not super perf critical. We will fix when we revamp
|
129
|
+
# logging
|
130
|
+
debug "initializing server; config=%s", config.to_env
|
131
|
+
|
125
132
|
trap('TERM') { @run = false }
|
126
133
|
trap('INT') { @run = false }
|
127
134
|
|
135
|
+
# Register metrics
|
136
|
+
@metrics_reporter.register("worker.memory", @process_mem_gauge)
|
137
|
+
@metrics_reporter.register("worker.cpu", @process_cpu_gauge)
|
138
|
+
@metrics_reporter.register("worker.uptime", lambda { Util::Clock.absolute_secs - @booted_at })
|
139
|
+
@metrics_reporter.register("worker.ipc.open-connections", @connections.open_connections)
|
140
|
+
@metrics_reporter.register("worker.ipc.throughput", @connections.throughput)
|
141
|
+
|
128
142
|
info "starting skylight daemon"
|
129
143
|
@collector.spawn
|
130
144
|
end
|
@@ -190,9 +204,8 @@ module Skylight
|
|
190
204
|
sanity_check
|
191
205
|
end
|
192
206
|
|
193
|
-
if
|
194
|
-
|
195
|
-
status_check
|
207
|
+
if @process_mem_gauge.call > max_memory
|
208
|
+
raise WorkerStateError, "Memory limit exceeded: #{memory_usage} (max: #{max_memory})"
|
196
209
|
end
|
197
210
|
end
|
198
211
|
|
@@ -213,6 +226,9 @@ module Skylight
|
|
213
226
|
end while @run
|
214
227
|
|
215
228
|
true # Successful return
|
229
|
+
ensure
|
230
|
+
# Send a final metrics report
|
231
|
+
@metrics_reporter.post_report
|
216
232
|
end
|
217
233
|
|
218
234
|
# Handles an incoming message. Will be instances from
|
@@ -254,7 +270,7 @@ module Skylight
|
|
254
270
|
def connect(sock)
|
255
271
|
trace "client accepted"
|
256
272
|
@socks << sock
|
257
|
-
@connections
|
273
|
+
@connections.add(sock)
|
258
274
|
end
|
259
275
|
|
260
276
|
def cleanup
|
@@ -272,16 +288,15 @@ module Skylight
|
|
272
288
|
end
|
273
289
|
|
274
290
|
def clients_close
|
275
|
-
@connections.
|
291
|
+
@connections.socks.each do |sock|
|
276
292
|
client_close(sock)
|
277
293
|
end
|
278
294
|
end
|
279
295
|
|
280
296
|
def client_close(sock)
|
281
297
|
trace "closing client connection; fd=%d", sock.fileno
|
282
|
-
@connections.
|
298
|
+
@connections.cleanup(sock)
|
283
299
|
@socks.delete(sock)
|
284
|
-
sock.close rescue nil
|
285
300
|
end
|
286
301
|
|
287
302
|
def sockfile
|
@@ -315,22 +330,6 @@ module Skylight
|
|
315
330
|
raise WorkerStateError, "sockfile gone"
|
316
331
|
end
|
317
332
|
end
|
318
|
-
|
319
|
-
def status_check
|
320
|
-
memory_usage = get_memory_usage
|
321
|
-
|
322
|
-
@collector.send_status(memory: memory_usage, max_memory: max_memory)
|
323
|
-
|
324
|
-
if memory_usage > max_memory
|
325
|
-
raise WorkerStateError, "Memory limit exceeded: #{memory_usage} (max: #{max_memory})"
|
326
|
-
end
|
327
|
-
end
|
328
|
-
|
329
|
-
def get_memory_usage
|
330
|
-
`ps -o rss= -p #{Process.pid}`.to_i / 1024
|
331
|
-
rescue Errno::ENOENT, Errno::EINTR
|
332
|
-
0
|
333
|
-
end
|
334
333
|
end
|
335
334
|
end
|
336
335
|
end
|