skylight 0.3.21 → 0.4.0.alpha1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +0 -4
- data/ext/extconf.rb +92 -47
- data/ext/libskylight.yml +4 -4
- data/ext/skylight_native.c +248 -286
- data/lib/skylight.rb +19 -114
- data/lib/skylight/api.rb +1 -1
- data/lib/skylight/config.rb +176 -146
- data/lib/skylight/data/cacert.pem +717 -719
- data/lib/skylight/formatters/http.rb +1 -1
- data/lib/skylight/instrumenter.rb +28 -35
- data/lib/skylight/native.rb +58 -72
- data/lib/skylight/normalizers.rb +0 -1
- data/lib/skylight/normalizers/active_record/sql.rb +0 -4
- data/lib/skylight/probes/excon/middleware.rb +3 -1
- data/lib/skylight/probes/net_http.rb +3 -1
- data/lib/skylight/subscriber.rb +0 -4
- data/lib/skylight/trace.rb +189 -0
- data/lib/skylight/util.rb +10 -12
- data/lib/skylight/util/hostname.rb +17 -0
- data/lib/skylight/util/http.rb +33 -36
- data/lib/skylight/util/logging.rb +20 -1
- data/lib/skylight/util/multi_io.rb +21 -0
- data/lib/skylight/util/native_ext_fetcher.rb +83 -69
- data/lib/skylight/util/platform.rb +67 -0
- data/lib/skylight/util/ssl.rb +50 -0
- data/lib/skylight/version.rb +1 -1
- metadata +9 -34
- data/ext/rust_support/ruby.h +0 -93
- data/ext/skylight.h +0 -85
- data/ext/skylight.map +0 -4
- data/ext/test/extconf.rb +0 -18
- data/ext/test/skylight_native_test.c +0 -82
- data/ext/test/skylight_test.h +0 -20
- data/lib/skylight/formatters.rb +0 -6
- data/lib/skylight/messages.rb +0 -21
- data/lib/skylight/messages/error.rb +0 -15
- data/lib/skylight/messages/hello.rb +0 -13
- data/lib/skylight/messages/trace.rb +0 -179
- data/lib/skylight/messages/trace_envelope.rb +0 -19
- data/lib/skylight/metrics.rb +0 -9
- data/lib/skylight/metrics/ewma.rb +0 -69
- data/lib/skylight/metrics/meter.rb +0 -58
- data/lib/skylight/metrics/process_cpu_gauge.rb +0 -65
- data/lib/skylight/metrics/process_mem_gauge.rb +0 -34
- data/lib/skylight/util/conversions.rb +0 -9
- data/lib/skylight/util/queue.rb +0 -96
- data/lib/skylight/util/task.rb +0 -172
- data/lib/skylight/util/uniform_sample.rb +0 -63
- data/lib/skylight/worker.rb +0 -19
- data/lib/skylight/worker/builder.rb +0 -73
- data/lib/skylight/worker/collector.rb +0 -274
- data/lib/skylight/worker/connection.rb +0 -87
- data/lib/skylight/worker/connection_set.rb +0 -56
- data/lib/skylight/worker/embedded.rb +0 -24
- data/lib/skylight/worker/metrics_reporter.rb +0 -104
- data/lib/skylight/worker/server.rb +0 -336
- data/lib/skylight/worker/standalone.rb +0 -421
@@ -1,336 +0,0 @@
|
|
1
|
-
require 'socket'
|
2
|
-
|
3
|
-
module Skylight
|
4
|
-
module Worker
|
5
|
-
class Server
|
6
|
-
LOCKFILE_PATH = 'SKYLIGHT_LOCKFILE_PATH'.freeze
|
7
|
-
LOCKFILE_ENV_KEY = 'SKYLIGHT_LOCKFILE_FD'.freeze
|
8
|
-
UDS_SRV_FD_KEY = 'SKYLIGHT_UDS_FD'.freeze
|
9
|
-
KEEPALIVE_KEY = 'SKYLIGHT_KEEPALIVE'.freeze
|
10
|
-
|
11
|
-
include Util::Logging
|
12
|
-
|
13
|
-
attr_reader \
|
14
|
-
:pid,
|
15
|
-
:tick,
|
16
|
-
:config,
|
17
|
-
:keepalive,
|
18
|
-
:lockfile_path,
|
19
|
-
:sockfile_path,
|
20
|
-
:last_status_update,
|
21
|
-
:max_memory
|
22
|
-
|
23
|
-
def initialize(config, lockfile, srv, lockfile_path)
|
24
|
-
unless lockfile && srv
|
25
|
-
raise ArgumentError, "lockfile and unix domain server socket are required"
|
26
|
-
end
|
27
|
-
|
28
|
-
@pid = Process.pid
|
29
|
-
@run = true
|
30
|
-
@tick = 1
|
31
|
-
@socks = []
|
32
|
-
@config = config
|
33
|
-
@server = srv
|
34
|
-
@lockfile = lockfile
|
35
|
-
@collector = Collector.build(config)
|
36
|
-
@metrics_reporter = @collector.metrics_reporter
|
37
|
-
@keepalive = @config[:'agent.keepalive']
|
38
|
-
@connections = ConnectionSet.new
|
39
|
-
@lockfile_path = lockfile_path
|
40
|
-
@sockfile_path = @config[:'agent.sockfile_path']
|
41
|
-
@process_mem_gauge = Metrics::ProcessMemGauge.new
|
42
|
-
@process_cpu_gauge = Metrics::ProcessCpuGauge.new
|
43
|
-
@max_memory = @config[:'agent.max_memory']
|
44
|
-
@booted_at = Util::Clock.absolute_secs
|
45
|
-
end
|
46
|
-
|
47
|
-
# Called from skylight.rb on require
|
48
|
-
def self.boot
|
49
|
-
fail = lambda do |msg|
|
50
|
-
STDERR.puts msg
|
51
|
-
exit 1
|
52
|
-
end
|
53
|
-
|
54
|
-
config = Config.load_from_env
|
55
|
-
|
56
|
-
unless fd = ENV[LOCKFILE_ENV_KEY]
|
57
|
-
fail.call "missing lockfile FD"
|
58
|
-
end
|
59
|
-
|
60
|
-
unless fd =~ /^\d+$/
|
61
|
-
fail.call "invalid lockfile FD"
|
62
|
-
end
|
63
|
-
|
64
|
-
begin
|
65
|
-
lockfile = IO.open(fd.to_i)
|
66
|
-
rescue Exception => e
|
67
|
-
fail.call "invalid lockfile FD: #{e.message}"
|
68
|
-
end
|
69
|
-
|
70
|
-
unless lockfile_path = ENV[LOCKFILE_PATH]
|
71
|
-
fail.call "missing lockfile path"
|
72
|
-
end
|
73
|
-
|
74
|
-
unless config[:'agent.sockfile_path']
|
75
|
-
fail.call "missing sockfile path"
|
76
|
-
end
|
77
|
-
|
78
|
-
srv = nil
|
79
|
-
if fd = ENV[UDS_SRV_FD_KEY]
|
80
|
-
srv = UNIXServer.for_fd(fd.to_i)
|
81
|
-
end
|
82
|
-
|
83
|
-
server = new(
|
84
|
-
config,
|
85
|
-
lockfile,
|
86
|
-
srv,
|
87
|
-
lockfile_path)
|
88
|
-
|
89
|
-
server.run
|
90
|
-
end
|
91
|
-
|
92
|
-
def self.exec(cmd, config, lockfile, srv, lockfile_path)
|
93
|
-
env = config.to_env
|
94
|
-
env.merge!(
|
95
|
-
STANDALONE_ENV_KEY => STANDALONE_ENV_VAL,
|
96
|
-
LOCKFILE_PATH => lockfile_path,
|
97
|
-
LOCKFILE_ENV_KEY => lockfile.fileno.to_s)
|
98
|
-
|
99
|
-
if srv
|
100
|
-
env[UDS_SRV_FD_KEY] = srv.fileno.to_s
|
101
|
-
end
|
102
|
-
|
103
|
-
opts = {}
|
104
|
-
args = [env] + cmd + [opts]
|
105
|
-
|
106
|
-
unless RUBY_VERSION < '1.9'
|
107
|
-
[lockfile, srv].each do |io|
|
108
|
-
next unless io
|
109
|
-
fd = io.fileno.to_i
|
110
|
-
opts[fd] = fd
|
111
|
-
end
|
112
|
-
end
|
113
|
-
|
114
|
-
Kernel.exec(*args)
|
115
|
-
end
|
116
|
-
|
117
|
-
def run
|
118
|
-
init
|
119
|
-
work
|
120
|
-
ensure
|
121
|
-
cleanup
|
122
|
-
end
|
123
|
-
|
124
|
-
private
|
125
|
-
|
126
|
-
def init
|
127
|
-
# TODO: Not super ideal to always iterate here even if debug mode isn't
|
128
|
-
# enabled, but it's not super perf critical. We will fix when we revamp
|
129
|
-
# logging
|
130
|
-
debug "initializing server; config=%s", config.to_env
|
131
|
-
|
132
|
-
trap('TERM') { @run = false }
|
133
|
-
trap('INT') { @run = false }
|
134
|
-
|
135
|
-
# Register metrics
|
136
|
-
@metrics_reporter.register("worker.memory", @process_mem_gauge)
|
137
|
-
@metrics_reporter.register("worker.cpu", @process_cpu_gauge)
|
138
|
-
@metrics_reporter.register("worker.uptime", lambda { Util::Clock.absolute_secs - @booted_at })
|
139
|
-
@metrics_reporter.register("worker.ipc.open-connections", @connections.open_connections)
|
140
|
-
@metrics_reporter.register("worker.ipc.throughput", @connections.throughput)
|
141
|
-
|
142
|
-
info "starting skylight daemon"
|
143
|
-
@collector.spawn
|
144
|
-
end
|
145
|
-
|
146
|
-
def work
|
147
|
-
t { "server working" }
|
148
|
-
@socks << @server
|
149
|
-
|
150
|
-
now = Time.now.to_i
|
151
|
-
next_sanity_check_at = now + tick
|
152
|
-
had_client_at = now
|
153
|
-
last_status_update = now
|
154
|
-
|
155
|
-
trace "starting IO loop"
|
156
|
-
begin
|
157
|
-
# Wait for something to do
|
158
|
-
r, _, _ = IO.select(@socks, [], [], tick)
|
159
|
-
|
160
|
-
if r
|
161
|
-
r.each do |sock|
|
162
|
-
if sock == @server
|
163
|
-
# If the server socket, accept
|
164
|
-
# the incoming connection
|
165
|
-
if client = accept
|
166
|
-
connect(client)
|
167
|
-
end
|
168
|
-
else
|
169
|
-
# Client socket, lookup the associated connection
|
170
|
-
# state machine.
|
171
|
-
unless conn = @connections[sock]
|
172
|
-
# No associated connection, weird.. bail
|
173
|
-
client_close(sock)
|
174
|
-
next
|
175
|
-
end
|
176
|
-
|
177
|
-
begin
|
178
|
-
# Pop em while we got em
|
179
|
-
while msg = conn.read
|
180
|
-
handle(msg)
|
181
|
-
end
|
182
|
-
rescue SystemCallError, EOFError
|
183
|
-
client_close(sock)
|
184
|
-
rescue IpcProtoError => e
|
185
|
-
error "Server#work - IPC protocol exception: %s", e.message
|
186
|
-
client_close(sock)
|
187
|
-
end
|
188
|
-
end
|
189
|
-
end
|
190
|
-
end
|
191
|
-
|
192
|
-
now = Time.now.to_i
|
193
|
-
|
194
|
-
if @socks.length > 1
|
195
|
-
had_client_at = now
|
196
|
-
end
|
197
|
-
|
198
|
-
if keepalive < now - had_client_at
|
199
|
-
info "no clients for #{keepalive} sec - shutting down"
|
200
|
-
@run = false
|
201
|
-
else
|
202
|
-
if next_sanity_check_at <= now
|
203
|
-
next_sanity_check_at = now + tick
|
204
|
-
sanity_check
|
205
|
-
end
|
206
|
-
|
207
|
-
memory_usage = @process_mem_gauge.call
|
208
|
-
if memory_usage > max_memory
|
209
|
-
raise WorkerStateError, "Memory limit exceeded: #{memory_usage} (max: #{max_memory})"
|
210
|
-
end
|
211
|
-
end
|
212
|
-
|
213
|
-
rescue SignalException => e
|
214
|
-
error "Did not handle: #{e.class}"
|
215
|
-
@run = false
|
216
|
-
rescue WorkerStateError => e
|
217
|
-
info "#{e.message} - shutting down"
|
218
|
-
@run = false
|
219
|
-
rescue Exception => e
|
220
|
-
error "Loop exception: %s (%s)\n%s", e.message, e.class, e.backtrace.join("\n")
|
221
|
-
@collector.send_exception(e)
|
222
|
-
return false
|
223
|
-
rescue Object => o
|
224
|
-
error "Unknown object thrown: `%s`", o.to_s
|
225
|
-
@collector.send_exception(o)
|
226
|
-
return false
|
227
|
-
end while @run
|
228
|
-
|
229
|
-
true # Successful return
|
230
|
-
ensure
|
231
|
-
# Send a final metrics report
|
232
|
-
@metrics_reporter.post_report
|
233
|
-
end
|
234
|
-
|
235
|
-
# Handles an incoming message. Will be instances from
|
236
|
-
# the Messages namespace
|
237
|
-
def handle(msg)
|
238
|
-
case msg
|
239
|
-
when nil
|
240
|
-
return
|
241
|
-
when Hello
|
242
|
-
if msg.newer?
|
243
|
-
info "newer version of agent deployed - restarting; curr=%s; new=%s", VERSION, msg.version
|
244
|
-
reload(msg)
|
245
|
-
end
|
246
|
-
when Messages::TraceEnvelope, Error
|
247
|
-
t { "received message" }
|
248
|
-
@collector.submit(msg)
|
249
|
-
when :unknown
|
250
|
-
debug "received unknown message"
|
251
|
-
else
|
252
|
-
debug "recieved: %s", msg
|
253
|
-
end
|
254
|
-
end
|
255
|
-
|
256
|
-
def reload(hello)
|
257
|
-
# Close all client connections
|
258
|
-
trace "closing all client connections"
|
259
|
-
clients_close
|
260
|
-
|
261
|
-
# Re-exec the process
|
262
|
-
trace "re-exec"
|
263
|
-
Server.exec(hello.cmd, @config, @lockfile, @server, lockfile_path)
|
264
|
-
end
|
265
|
-
|
266
|
-
def accept
|
267
|
-
@server.accept_nonblock
|
268
|
-
rescue Errno::EWOULDBLOCK, Errno::EAGAIN, Errno::ECONNABORTED
|
269
|
-
end
|
270
|
-
|
271
|
-
def connect(sock)
|
272
|
-
trace "client accepted"
|
273
|
-
@socks << sock
|
274
|
-
@connections.add(sock)
|
275
|
-
end
|
276
|
-
|
277
|
-
def cleanup
|
278
|
-
t { "server cleaning up" }
|
279
|
-
# The lockfile is not deleted. There is no way to atomically ensure
|
280
|
-
# that we are deleting the lockfile for the current process.
|
281
|
-
cleanup_curr_sockfile
|
282
|
-
close
|
283
|
-
@lockfile.close
|
284
|
-
end
|
285
|
-
|
286
|
-
def close
|
287
|
-
@server.close if @server
|
288
|
-
clients_close
|
289
|
-
end
|
290
|
-
|
291
|
-
def clients_close
|
292
|
-
@connections.socks.each do |sock|
|
293
|
-
client_close(sock)
|
294
|
-
end
|
295
|
-
end
|
296
|
-
|
297
|
-
def client_close(sock)
|
298
|
-
trace "closing client connection; fd=%d", sock.fileno
|
299
|
-
@connections.cleanup(sock)
|
300
|
-
@socks.delete(sock)
|
301
|
-
end
|
302
|
-
|
303
|
-
def sockfile
|
304
|
-
"#{sockfile_path}/skylight-#{pid}.sock"
|
305
|
-
end
|
306
|
-
|
307
|
-
def sockfile?
|
308
|
-
File.exist?(sockfile)
|
309
|
-
end
|
310
|
-
|
311
|
-
def cleanup_curr_sockfile
|
312
|
-
File.unlink(sockfile) rescue nil
|
313
|
-
end
|
314
|
-
|
315
|
-
def sanity_check
|
316
|
-
if !File.exist?(lockfile_path)
|
317
|
-
raise WorkerStateError, "lockfile gone"
|
318
|
-
end
|
319
|
-
|
320
|
-
pid = File.read(lockfile_path) rescue nil
|
321
|
-
|
322
|
-
unless pid
|
323
|
-
raise WorkerStateError, "could not read lockfile"
|
324
|
-
end
|
325
|
-
|
326
|
-
unless pid == Process.pid.to_s
|
327
|
-
raise WorkerStateError, "lockfile points to different process"
|
328
|
-
end
|
329
|
-
|
330
|
-
unless sockfile?
|
331
|
-
raise WorkerStateError, "sockfile gone"
|
332
|
-
end
|
333
|
-
end
|
334
|
-
end
|
335
|
-
end
|
336
|
-
end
|
@@ -1,421 +0,0 @@
|
|
1
|
-
require 'socket'
|
2
|
-
require 'thread'
|
3
|
-
require 'fileutils'
|
4
|
-
require 'rbconfig'
|
5
|
-
|
6
|
-
# TODO: Handle cool-off
|
7
|
-
module Skylight
|
8
|
-
module Worker
|
9
|
-
# Handle to the agent subprocess. Manages creation, communication, and
|
10
|
-
# shutdown. Lazily spawns a thread that handles writing messages to the
|
11
|
-
# unix domain socket
|
12
|
-
#
|
13
|
-
class Standalone
|
14
|
-
include Util::Logging
|
15
|
-
|
16
|
-
# Locates skylight_native so that it can be included in the standalone agent startup command
|
17
|
-
def self.locate_skylight_native
|
18
|
-
$LOADED_FEATURES.find do |feature|
|
19
|
-
return feature if feature =~ /skylight_native\.#{RbConfig::CONFIG['DLEXT']}/
|
20
|
-
end
|
21
|
-
end
|
22
|
-
|
23
|
-
def self.build_subprocess_cmd
|
24
|
-
# Native extension location
|
25
|
-
native_path = locate_skylight_native
|
26
|
-
native_dir = native_path ? File.dirname(native_path) : nil
|
27
|
-
|
28
|
-
paths = [
|
29
|
-
File.expand_path('../../..', __FILE__), # Ruby code root
|
30
|
-
native_dir
|
31
|
-
].uniq.compact
|
32
|
-
|
33
|
-
ret = [ RUBYBIN ]
|
34
|
-
paths.each { |path| ret << "-I" << path }
|
35
|
-
ret << File.expand_path('../../../skylight.rb', __FILE__) # The agent startup script
|
36
|
-
ret
|
37
|
-
end
|
38
|
-
|
39
|
-
# Used to start the standalone agent as well as included in the hello message
|
40
|
-
SUBPROCESS_CMD = build_subprocess_cmd
|
41
|
-
|
42
|
-
# Used to handle starting the thread
|
43
|
-
LOCK = Mutex.new
|
44
|
-
|
45
|
-
attr_reader \
|
46
|
-
:pid,
|
47
|
-
:config,
|
48
|
-
:lockfile,
|
49
|
-
:keepalive,
|
50
|
-
:max_spawns,
|
51
|
-
:spawn_window,
|
52
|
-
:sockfile_path
|
53
|
-
|
54
|
-
def initialize(config, lockfile, server)
|
55
|
-
@pid = nil
|
56
|
-
@sock = nil
|
57
|
-
|
58
|
-
unless config && lockfile && server
|
59
|
-
raise ArgumentError, "all arguments are required"
|
60
|
-
end
|
61
|
-
|
62
|
-
@me = Process.pid
|
63
|
-
@config = config
|
64
|
-
@spawns = []
|
65
|
-
@server = server
|
66
|
-
@lockfile = lockfile
|
67
|
-
@keepalive = config[:'agent.keepalive']
|
68
|
-
@sockfile_path = config[:'agent.sockfile_path']
|
69
|
-
|
70
|
-
# Should be configurable
|
71
|
-
@max_spawns = 3
|
72
|
-
@spawn_window = 5 * 60
|
73
|
-
|
74
|
-
# Writer background processor will accept messages and write them to
|
75
|
-
# the IPC socket
|
76
|
-
@writer = build_queue
|
77
|
-
end
|
78
|
-
|
79
|
-
def spawn(*args)
|
80
|
-
return if @pid
|
81
|
-
|
82
|
-
if __spawn(*args)
|
83
|
-
@writer.spawn
|
84
|
-
true
|
85
|
-
end
|
86
|
-
end
|
87
|
-
|
88
|
-
def submit(msg)
|
89
|
-
unless msg.respond_to?(:encode) || msg.respond_to?(:native_serialize)
|
90
|
-
raise ArgumentError, "message not encodable"
|
91
|
-
end
|
92
|
-
|
93
|
-
unless @pid
|
94
|
-
t { "no pid, can't submit: #{msg.inspect}" }
|
95
|
-
return
|
96
|
-
end
|
97
|
-
|
98
|
-
if @me != Process.pid
|
99
|
-
handle_fork
|
100
|
-
end
|
101
|
-
|
102
|
-
@writer.submit(msg, @me)
|
103
|
-
end
|
104
|
-
|
105
|
-
# Shutdown any side task threads. Let the agent process die on it's own.
|
106
|
-
def shutdown
|
107
|
-
# TODO: implement
|
108
|
-
@writer.submit(:SHUTDOWN)
|
109
|
-
@writer.shutdown
|
110
|
-
end
|
111
|
-
|
112
|
-
# Shutdown any side task threads as well as the agent process
|
113
|
-
def shutdown_all
|
114
|
-
# TODO: implement
|
115
|
-
shutdown
|
116
|
-
end
|
117
|
-
|
118
|
-
private
|
119
|
-
|
120
|
-
def __spawn(timeout = 10)
|
121
|
-
if timeout < 2
|
122
|
-
raise ArgumentError, "at least 2 seconds required"
|
123
|
-
end
|
124
|
-
|
125
|
-
start = Time.now
|
126
|
-
|
127
|
-
if @spawns.length >= @max_spawns
|
128
|
-
if @spawn_window >= (start - @spawns.first)
|
129
|
-
trace "too many spawns in window"
|
130
|
-
return false
|
131
|
-
end
|
132
|
-
|
133
|
-
@spawns.unshift
|
134
|
-
end
|
135
|
-
|
136
|
-
@spawns << start
|
137
|
-
|
138
|
-
check_permissions
|
139
|
-
|
140
|
-
lockf = File.open lockfile, File::RDWR | File::CREAT
|
141
|
-
|
142
|
-
spawn_worker(lockf)
|
143
|
-
|
144
|
-
while timeout >= (Time.now - start)
|
145
|
-
if pid = read_lockfile
|
146
|
-
if sockfile?(pid)
|
147
|
-
if sock = connect(pid)
|
148
|
-
trace "connected to unix socket; pid=%s", pid
|
149
|
-
write_msg(sock, build_hello)
|
150
|
-
@sock = sock
|
151
|
-
@pid = pid
|
152
|
-
return true
|
153
|
-
end
|
154
|
-
end
|
155
|
-
end
|
156
|
-
|
157
|
-
sleep 0.1
|
158
|
-
end
|
159
|
-
|
160
|
-
trace "failed to spawn worker"
|
161
|
-
return false
|
162
|
-
|
163
|
-
ensure
|
164
|
-
lockf.close rescue nil if lockf
|
165
|
-
end
|
166
|
-
|
167
|
-
def repair
|
168
|
-
@sock.close rescue nil if @sock
|
169
|
-
|
170
|
-
t { "repairing socket" }
|
171
|
-
|
172
|
-
# Attempt to reconnect to the currently known agent PID. If the agent
|
173
|
-
# is still healthy but is simply reloading itself, this should work
|
174
|
-
# just fine.
|
175
|
-
if sock = connect(@pid)
|
176
|
-
t { "reconnected to worker" }
|
177
|
-
@sock = sock
|
178
|
-
# TODO: Should HELLO be sent again?
|
179
|
-
return true
|
180
|
-
end
|
181
|
-
|
182
|
-
debug "failed to reconnect -- attempting worker respawn"
|
183
|
-
|
184
|
-
# Attempt to respawn the agent process
|
185
|
-
unless __spawn
|
186
|
-
debug "could not respawn -- shutting down"
|
187
|
-
|
188
|
-
@pid = nil
|
189
|
-
@sock = nil
|
190
|
-
return false
|
191
|
-
end
|
192
|
-
|
193
|
-
true
|
194
|
-
end
|
195
|
-
|
196
|
-
def writer_tick(msg)
|
197
|
-
if :SHUTDOWN == msg
|
198
|
-
trace "shuting down agent connection"
|
199
|
-
@sock.close if @sock
|
200
|
-
@pid = nil
|
201
|
-
|
202
|
-
return false
|
203
|
-
elsif msg
|
204
|
-
return handle(msg)
|
205
|
-
else
|
206
|
-
begin
|
207
|
-
@sock.read_nonblock(1)
|
208
|
-
rescue Errno::EWOULDBLOCK, Errno::EAGAIN, Errno::EINTR
|
209
|
-
rescue Exception => e
|
210
|
-
trace "bad socket: #{e}"
|
211
|
-
unless repair
|
212
|
-
raise WorkerStateError, "could not repair connection to agent"
|
213
|
-
end
|
214
|
-
end
|
215
|
-
|
216
|
-
return true
|
217
|
-
end
|
218
|
-
rescue WorkerStateError => e
|
219
|
-
error "skylight shutting down: %s", e.message
|
220
|
-
return false
|
221
|
-
end
|
222
|
-
|
223
|
-
def handle(msg)
|
224
|
-
2.times do
|
225
|
-
unless sock = @sock
|
226
|
-
return false unless repair
|
227
|
-
sock = @sock
|
228
|
-
end
|
229
|
-
|
230
|
-
if write_msg(sock, msg)
|
231
|
-
return true
|
232
|
-
end
|
233
|
-
|
234
|
-
@sock = nil
|
235
|
-
sock.close rescue nil
|
236
|
-
|
237
|
-
unless repair
|
238
|
-
return false
|
239
|
-
end
|
240
|
-
end
|
241
|
-
|
242
|
-
debug "could not handle message; msg=%s", msg.class
|
243
|
-
|
244
|
-
false
|
245
|
-
end
|
246
|
-
|
247
|
-
def write_msg(sock, msg)
|
248
|
-
t { "writing a #{msg.class} on the wire" }
|
249
|
-
id = Messages::KLASS_TO_ID.fetch(msg.class)
|
250
|
-
buf = msg.serialize
|
251
|
-
|
252
|
-
frame = [ id, buf.bytesize ].pack("LL")
|
253
|
-
|
254
|
-
write(sock, frame) && write(sock, buf)
|
255
|
-
end
|
256
|
-
|
257
|
-
SOCK_TIMEOUT_VAL = [ 0, 0.01 * 1_000_000 ].pack("l_2")
|
258
|
-
|
259
|
-
# TODO: Handle configuring the socket with proper timeouts
|
260
|
-
def connect(pid)
|
261
|
-
sock = UNIXSocket.new(sockfile(pid)) rescue nil
|
262
|
-
if sock
|
263
|
-
sock.setsockopt Socket::SOL_SOCKET, Socket::SO_SNDTIMEO, SOCK_TIMEOUT_VAL
|
264
|
-
sock
|
265
|
-
end
|
266
|
-
end
|
267
|
-
|
268
|
-
def write(sock, msg, timeout = 5)
|
269
|
-
msg = msg.to_s
|
270
|
-
cnt = 10
|
271
|
-
|
272
|
-
begin
|
273
|
-
while true
|
274
|
-
res = sock.write_nonblock(msg)
|
275
|
-
|
276
|
-
if res == msg.bytesize
|
277
|
-
return true
|
278
|
-
elsif res > 0
|
279
|
-
msg = msg.byteslice(res..-1)
|
280
|
-
cnt = 10
|
281
|
-
else
|
282
|
-
if 0 <= (cnt -= 1)
|
283
|
-
t { "write failed -- max attempts" }
|
284
|
-
return false
|
285
|
-
end
|
286
|
-
end
|
287
|
-
end
|
288
|
-
rescue Errno::EAGAIN, Errno::EWOULDBLOCK
|
289
|
-
_, socks, = IO.select([], [sock], [], timeout)
|
290
|
-
unless socks == [sock]
|
291
|
-
t { "write timed out" }
|
292
|
-
return false
|
293
|
-
end
|
294
|
-
retry
|
295
|
-
rescue Errno::EINTR
|
296
|
-
raise
|
297
|
-
rescue SystemCallError => e
|
298
|
-
t { fmt "write failed; err=%s", e.class }
|
299
|
-
return false
|
300
|
-
end
|
301
|
-
end
|
302
|
-
|
303
|
-
# Spawn the worker process.
|
304
|
-
def spawn_worker(f)
|
305
|
-
pid = fork do
|
306
|
-
# Note: By default, Ruby will finalize C objects inside the fork. Because those C objects
|
307
|
-
# are shared with the parent, this can cause database connections to disconnect in the
|
308
|
-
# parent process. We need to double-fork for proper semantics, so we disable the GC and
|
309
|
-
# exit! to avoid finalizing shared handles.
|
310
|
-
#
|
311
|
-
# We should continue to look for alternate solutions, and to determine whether there is
|
312
|
-
# still a possible race between the fork and the GC disabling.
|
313
|
-
::GC.disable
|
314
|
-
::Process.setsid
|
315
|
-
exit! if fork
|
316
|
-
|
317
|
-
# Acquire exclusive file lock, exit otherwise
|
318
|
-
unless f.flock(File::LOCK_EX | File::LOCK_NB)
|
319
|
-
exit! 1
|
320
|
-
end
|
321
|
-
|
322
|
-
f.truncate(0)
|
323
|
-
|
324
|
-
# Lock acquired, cleanup old sock files
|
325
|
-
Dir["#{sockfile_path}/skylight-*.sock"].each do |sf|
|
326
|
-
File.unlink(sf) rescue nil
|
327
|
-
end
|
328
|
-
|
329
|
-
pid = Process.pid.to_s
|
330
|
-
|
331
|
-
# Write the pid
|
332
|
-
f.write(pid)
|
333
|
-
f.flush
|
334
|
-
|
335
|
-
sf = sockfile(pid)
|
336
|
-
File.unlink(sf) rescue nil
|
337
|
-
|
338
|
-
t { fmt "opening a new socket; %s", sf }
|
339
|
-
srv = UNIXServer.new(sf)
|
340
|
-
|
341
|
-
unless ENV[TRACE_ENV_KEY]
|
342
|
-
null = File.open "/dev/null", File::RDWR
|
343
|
-
STDIN.reopen null
|
344
|
-
STDOUT.reopen null
|
345
|
-
STDERR.reopen null
|
346
|
-
end
|
347
|
-
|
348
|
-
# Cleanup the ENV
|
349
|
-
ENV['RUBYOPT'] = nil
|
350
|
-
|
351
|
-
@server.exec(SUBPROCESS_CMD, @config, f, srv, lockfile)
|
352
|
-
end
|
353
|
-
|
354
|
-
Process.detach(pid)
|
355
|
-
end
|
356
|
-
|
357
|
-
# If the process was forked, create a new queue and restart the worker
|
358
|
-
def handle_fork
|
359
|
-
LOCK.synchronize do
|
360
|
-
if @me != Process.pid
|
361
|
-
trace "process forked; recovering"
|
362
|
-
# Update the current process ID
|
363
|
-
@me = Process.pid
|
364
|
-
|
365
|
-
# Deal w/ the inherited socket
|
366
|
-
@sock.close rescue nil if @sock
|
367
|
-
@sock = nil
|
368
|
-
|
369
|
-
@writer = build_queue
|
370
|
-
@writer.spawn
|
371
|
-
end
|
372
|
-
end
|
373
|
-
end
|
374
|
-
|
375
|
-
def check_permissions
|
376
|
-
lockfile_root = File.dirname(lockfile)
|
377
|
-
|
378
|
-
FileUtils.mkdir_p lockfile_root
|
379
|
-
FileUtils.mkdir_p sockfile_path
|
380
|
-
|
381
|
-
if File.exist?(lockfile)
|
382
|
-
if !FileTest.writable?(lockfile)
|
383
|
-
raise WorkerStateError, "`#{lockfile}` not writable. Please set agent.lockfile or agent.sockfile_path in your config to a writable path."
|
384
|
-
end
|
385
|
-
else
|
386
|
-
if !FileTest.writable?(lockfile_root)
|
387
|
-
raise WorkerStateError, "`#{lockfile_root}` not writable. Please set agent.lockfile or agent.sockfile_path in your config to a writable path."
|
388
|
-
end
|
389
|
-
end
|
390
|
-
|
391
|
-
unless FileTest.writable?(sockfile_path)
|
392
|
-
raise WorkerStateError, "`#{sockfile_path}` not writable. Please set agent.sockfile_path in your config to a writable path."
|
393
|
-
end
|
394
|
-
end
|
395
|
-
|
396
|
-
def build_hello
|
397
|
-
Messages::Hello.build(VERSION, SUBPROCESS_CMD)
|
398
|
-
end
|
399
|
-
|
400
|
-
def build_queue
|
401
|
-
Util::Task.new(100, 1) { |m| writer_tick(m) }
|
402
|
-
end
|
403
|
-
|
404
|
-
def read_lockfile
|
405
|
-
pid = File.read(lockfile) rescue nil
|
406
|
-
if pid =~ /^\d+$/
|
407
|
-
pid.to_i
|
408
|
-
end
|
409
|
-
end
|
410
|
-
|
411
|
-
def sockfile(pid)
|
412
|
-
"#{sockfile_path}/skylight-#{pid}.sock"
|
413
|
-
end
|
414
|
-
|
415
|
-
def sockfile?(pid)
|
416
|
-
File.exist?(sockfile(pid))
|
417
|
-
end
|
418
|
-
|
419
|
-
end
|
420
|
-
end
|
421
|
-
end
|