skylight 0.3.7 → 0.3.8

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,4 +1,4 @@
1
1
  module Skylight
2
- VERSION = '0.3.7'
2
+ VERSION = '0.3.8'
3
3
  end
4
4
 
@@ -5,12 +5,14 @@ module Skylight
5
5
  CHUNK_SIZE = 16 * 1024
6
6
 
7
7
  # === Modules
8
- autoload :Builder, 'skylight/worker/builder'
9
- autoload :Collector, 'skylight/worker/collector'
10
- autoload :Connection, 'skylight/worker/connection'
11
- autoload :Embedded, 'skylight/worker/embedded'
12
- autoload :Server, 'skylight/worker/server'
13
- autoload :Standalone, 'skylight/worker/standalone'
8
+ autoload :Builder, 'skylight/worker/builder'
9
+ autoload :Collector, 'skylight/worker/collector'
10
+ autoload :Connection, 'skylight/worker/connection'
11
+ autoload :ConnectionSet, 'skylight/worker/connection_set'
12
+ autoload :Embedded, 'skylight/worker/embedded'
13
+ autoload :MetricsReporter, 'skylight/worker/metrics_reporter'
14
+ autoload :Server, 'skylight/worker/server'
15
+ autoload :Standalone, 'skylight/worker/standalone'
14
16
 
15
17
  end
16
18
  end
@@ -19,7 +19,7 @@ module Skylight
19
19
  case s
20
20
  when 'embedded'
21
21
  trace "building embedded worker"
22
- Embedded.new(Collector.new(config))
22
+ Embedded.new(Collector.build(config))
23
23
  when 'standalone'
24
24
  trace "building standalone worker"
25
25
 
@@ -11,23 +11,39 @@ module Skylight
11
11
 
12
12
  include Util::Logging
13
13
 
14
- attr_reader :config
14
+ attr_reader :config, :metrics_reporter
15
15
 
16
- def initialize(config)
16
+ def initialize(config, metrics_reporter = nil)
17
17
  super(1000, 0.25)
18
18
 
19
- @config = config
20
- @size = config[:'agent.sample']
21
- @batch = nil
22
- @interval = config[:'agent.interval']
23
- @refresh_at = 0
24
- @http_auth = Util::HTTP.new(config, :accounts)
19
+ @config = config
20
+ @size = config[:'agent.sample']
21
+ @batch = nil
22
+ @interval = config[:'agent.interval']
23
+ @refresh_at = 0
24
+ @http_auth = Util::HTTP.new(config, :accounts)
25
25
  @http_report = nil
26
- # @http_report = Util::HTTP.new(config, :report)
26
+ @report_meter = Metrics::Meter.new
27
+ @report_success_meter = Metrics::Meter.new
28
+ @metrics_reporter = metrics_reporter
29
+
30
+ @metrics_reporter.register("collector.report-rate", @report_meter)
31
+ @metrics_reporter.register("collector.report-success-rate", @report_success_meter)
27
32
 
28
33
  t { fmt "starting collector; interval=%d; size=%d", @interval, @size }
29
34
  end
30
35
 
36
+ def self.build(config)
37
+ new(config, MetricsReporter.new(config))
38
+ end
39
+
40
+ def prepare
41
+ if @metrics_reporter
42
+ @metrics_reporter.register("worker.collector.queue-depth", queue_depth_metric)
43
+ @metrics_reporter.spawn
44
+ end
45
+ end
46
+
31
47
  def handle(msg, now = Util::Clock.absolute_secs)
32
48
  @batch ||= new_batch(now)
33
49
 
@@ -61,15 +77,23 @@ module Skylight
61
77
  true
62
78
  end
63
79
 
64
- def send_status(status)
65
- post_data(:status, status)
80
+ def send_http_exception(http, response)
81
+ send_exception(response.exception, additional_info: {
82
+ host: http.host,
83
+ port: http.port,
84
+ path: response.request.path,
85
+ method: response.request.method
86
+ })
66
87
  end
67
88
 
68
- def send_exception(exception)
69
- data = {class_name: exception.class.name}
89
+ def send_exception(exception, data={})
90
+ data = { class_name: exception.class.name,
91
+ agent_info: @metrics_reporter.build_report }.merge(data)
92
+
70
93
  if Exception === exception
71
94
  data.merge!(message: exception.message, backtrace: exception.backtrace)
72
95
  end
96
+
73
97
  post_data(:exception, data, false)
74
98
  end
75
99
 
@@ -84,7 +108,12 @@ module Skylight
84
108
  warn "#{type} wasn't sent successfully; status=%s", res.status
85
109
  end
86
110
 
87
- send_exception(res.exception) if notify && res.exception
111
+ if res.exception
112
+ send_http_exception(@http_auth, res) if notify
113
+ false
114
+ else
115
+ true
116
+ end
88
117
  rescue Exception => e
89
118
  error "exception; msg=%s; class=%s", e.message, e.class
90
119
  t { e.backtrace.join("\n") }
@@ -109,6 +138,10 @@ module Skylight
109
138
  end
110
139
 
111
140
  @batch = nil
141
+ ensure
142
+ if @metrics_reporter
143
+ @metrics_reporter.shutdown
144
+ end
112
145
  end
113
146
 
114
147
  def flush(batch)
@@ -116,8 +149,16 @@ module Skylight
116
149
 
117
150
  debug "flushing batch; size=%d", batch.sample.count
118
151
 
152
+ @report_meter.mark
153
+
119
154
  res = @http_report.post(ENDPOINT, batch.encode, CONTENT_TYPE => SKYLIGHT_V2)
120
- send_exception(res.exception) if res.exception
155
+
156
+ if res.exception
157
+ send_http_exception(@http_report, res)
158
+ else
159
+ @report_success_meter.mark
160
+ end
161
+
121
162
  nil
122
163
  end
123
164
 
@@ -125,7 +166,7 @@ module Skylight
125
166
  res = @http_auth.get("/agent/authenticate?hostname=#{escape(config[:'hostname'])}")
126
167
 
127
168
  if res.exception
128
- send_exception(res.exception)
169
+ send_http_exception(@http_auth, res)
129
170
  return
130
171
  end
131
172
 
@@ -1,14 +1,18 @@
1
1
  module Skylight
2
2
  module Worker
3
+ # Represents the IPC client connection
3
4
  class Connection
4
5
  FRAME_HDR_LEN = 8
5
6
 
6
- attr_reader :sock
7
+ attr_reader :sock, :throughput
7
8
 
8
9
  def initialize(sock)
9
10
  @sock = sock
10
11
  @len = nil
11
12
  @buf = ""
13
+
14
+ # Metrics
15
+ @throughput = Metrics::Meter.new
12
16
  end
13
17
 
14
18
  def read
@@ -17,6 +21,7 @@ module Skylight
17
21
  end
18
22
 
19
23
  if chunk = read_sock
24
+
20
25
  @buf << chunk
21
26
 
22
27
  if !@len && @buf.bytesize >= FRAME_HDR_LEN
@@ -27,6 +32,10 @@ module Skylight
27
32
  end
28
33
  end
29
34
 
35
+ def cleanup
36
+ # Any cleanup code here
37
+ end
38
+
30
39
  private
31
40
 
32
41
  def read_len
@@ -66,7 +75,10 @@ module Skylight
66
75
  end
67
76
 
68
77
  def read_sock
69
- @sock.read_nonblock(CHUNK_SIZE)
78
+ ret = @sock.read_nonblock(CHUNK_SIZE)
79
+ # Track the throughput
80
+ @throughput.mark(ret.bytesize) if ret
81
+ ret
70
82
  rescue Errno::EAGAIN, Errno::EWOULDBLOCK
71
83
  end
72
84
 
@@ -0,0 +1,56 @@
1
+ require 'thread'
2
+
3
+ module Skylight
4
+ module Worker
5
+ class ConnectionSet
6
+ attr_reader :open_connections, :throughput
7
+
8
+ def initialize
9
+ @connections = {}
10
+ @lock = Mutex.new
11
+
12
+ # Metrics
13
+ @open_connections = build_open_connections_metric
14
+ @throughput = build_throughput_metric
15
+ end
16
+
17
+ def add(sock)
18
+ conn = Connection.new(sock)
19
+ @lock.synchronize { @connections[sock] = conn }
20
+ conn
21
+ end
22
+
23
+ def socks
24
+ @lock.synchronize { @connections.keys }
25
+ end
26
+
27
+ def [](sock)
28
+ @lock.synchronize do
29
+ @connections[sock]
30
+ end
31
+ end
32
+
33
+ def cleanup(sock)
34
+ if conn = @lock.synchronize { @connections.delete(sock) }
35
+ conn.cleanup
36
+ sock.close rescue nil
37
+ end
38
+ end
39
+
40
+ private
41
+
42
+ def build_open_connections_metric
43
+ lambda do
44
+ @lock.synchronize { @connections.length }
45
+ end
46
+ end
47
+
48
+ def build_throughput_metric
49
+ lambda do
50
+ conns = @lock.synchronize { @connections.values }
51
+ conns.map { |c| c.throughput.rate.to_i }
52
+ end
53
+ end
54
+ end
55
+ end
56
+ end
@@ -0,0 +1,103 @@
1
+ require 'thread'
2
+ require 'rbconfig'
3
+
4
+ module Skylight
5
+ module Worker
6
+ class MetricsReporter < Util::Task
7
+
8
+ include Util::Logging
9
+
10
+ attr_reader :config
11
+
12
+ def initialize(config)
13
+ super(1000, 0.25)
14
+
15
+ @metrics = {}
16
+ @config = config
17
+ @interval = config[:'metrics.report_interval']
18
+ @lock = Mutex.new
19
+ @next_report_at = nil
20
+ @http_auth = Util::HTTP.new(config, :accounts)
21
+ end
22
+
23
+ # A metric responds to #call and returns metric info
24
+ def register(name, metric)
25
+ @lock.synchronize { @metrics[name] = metric }
26
+ end
27
+
28
+ def unregister(name)
29
+ @lock.synchronize { @metrics.delete(name) }
30
+ end
31
+
32
+ # msg is always nil, but we can use the Task abstraction anyway
33
+ def handle(msg, now = Util::Clock.absolute_secs)
34
+ # Initially set the next report at
35
+ unless @next_report_at
36
+ update_next_report_at(now)
37
+ return true
38
+ end
39
+
40
+ if now < @next_report_at
41
+ # Nothing to do
42
+ return true
43
+ end
44
+
45
+ update_next_report_at(now)
46
+ post_report
47
+
48
+ true
49
+ end
50
+
51
+ def build_report
52
+ report = {
53
+ "hostname" => config[:'hostname'],
54
+ "host.info" => RbConfig::CONFIG['arch'],
55
+ "ruby.version" => RUBY_VERSION,
56
+ "ruby.engine" => RUBY_ENGINE,
57
+ "skylight.version" => Skylight::VERSION
58
+ }
59
+
60
+ metric_names.each do |name|
61
+ # Since we are operating in a concurrent environment, it is possible
62
+ # that the metric for the current name is unregistered before we
63
+ # access it here.
64
+ unless m = metric(name)
65
+ next
66
+ end
67
+
68
+ report[name] = m.call
69
+ end
70
+
71
+ report
72
+ end
73
+
74
+ def post_report
75
+ report = build_report
76
+
77
+ # Send the report
78
+ t { fmt "reporting internal metrics; payload=%s", report.inspect }
79
+
80
+ res = @http_auth.post("/agent/metrics", report: report)
81
+
82
+ unless res.success?
83
+ warn "internal metrics report failed; status=%s", res.status
84
+ end
85
+ end
86
+
87
+ private
88
+
89
+ def metric_names
90
+ @lock.synchronize { @metrics.keys }
91
+ end
92
+
93
+ def metric(name)
94
+ @lock.synchronize { @metrics[name] }
95
+ end
96
+
97
+ def update_next_report_at(now)
98
+ @next_report_at = now + @interval
99
+ end
100
+
101
+ end
102
+ end
103
+ end
@@ -17,7 +17,6 @@ module Skylight
17
17
  :keepalive,
18
18
  :lockfile_path,
19
19
  :sockfile_path,
20
- :status_interval,
21
20
  :last_status_update,
22
21
  :max_memory
23
22
 
@@ -26,20 +25,23 @@ module Skylight
26
25
  raise ArgumentError, "lockfile and unix domain server socket are required"
27
26
  end
28
27
 
29
- @pid = Process.pid
30
- @run = true
31
- @tick = 1
32
- @socks = []
33
- @config = config
34
- @server = srv
35
- @lockfile = lockfile
36
- @collector = Collector.new(config)
37
- @keepalive = @config[:'agent.keepalive']
38
- @connections = {}
28
+ @pid = Process.pid
29
+ @run = true
30
+ @tick = 1
31
+ @socks = []
32
+ @config = config
33
+ @server = srv
34
+ @lockfile = lockfile
35
+ @collector = Collector.build(config)
36
+ @metrics_reporter = @collector.metrics_reporter
37
+ @keepalive = @config[:'agent.keepalive']
38
+ @connections = ConnectionSet.new
39
39
  @lockfile_path = lockfile_path
40
40
  @sockfile_path = @config[:'agent.sockfile_path']
41
- @status_interval = 60
42
- @max_memory = @config[:'agent.max_memory']
41
+ @process_mem_gauge = Metrics::ProcessMemGauge.new
42
+ @process_cpu_gauge = Metrics::ProcessCpuGauge.new
43
+ @max_memory = @config[:'agent.max_memory']
44
+ @booted_at = Util::Clock.absolute_secs
43
45
  end
44
46
 
45
47
  # Called from skylight.rb on require
@@ -122,9 +124,21 @@ module Skylight
122
124
  private
123
125
 
124
126
  def init
127
+ # TODO: Not super ideal to always iterate here even if debug mode isn't
128
+ # enabled, but it's not super perf critical. We will fix when we revamp
129
+ # logging
130
+ debug "initializing server; config=%s", config.to_env
131
+
125
132
  trap('TERM') { @run = false }
126
133
  trap('INT') { @run = false }
127
134
 
135
+ # Register metrics
136
+ @metrics_reporter.register("worker.memory", @process_mem_gauge)
137
+ @metrics_reporter.register("worker.cpu", @process_cpu_gauge)
138
+ @metrics_reporter.register("worker.uptime", lambda { Util::Clock.absolute_secs - @booted_at })
139
+ @metrics_reporter.register("worker.ipc.open-connections", @connections.open_connections)
140
+ @metrics_reporter.register("worker.ipc.throughput", @connections.throughput)
141
+
128
142
  info "starting skylight daemon"
129
143
  @collector.spawn
130
144
  end
@@ -190,9 +204,8 @@ module Skylight
190
204
  sanity_check
191
205
  end
192
206
 
193
- if status_interval < now - last_status_update
194
- last_status_update = now
195
- status_check
207
+ if @process_mem_gauge.call > max_memory
208
+ raise WorkerStateError, "Memory limit exceeded: #{memory_usage} (max: #{max_memory})"
196
209
  end
197
210
  end
198
211
 
@@ -213,6 +226,9 @@ module Skylight
213
226
  end while @run
214
227
 
215
228
  true # Successful return
229
+ ensure
230
+ # Send a final metrics report
231
+ @metrics_reporter.post_report
216
232
  end
217
233
 
218
234
  # Handles an incoming message. Will be instances from
@@ -254,7 +270,7 @@ module Skylight
254
270
  def connect(sock)
255
271
  trace "client accepted"
256
272
  @socks << sock
257
- @connections[sock] = Connection.new(sock)
273
+ @connections.add(sock)
258
274
  end
259
275
 
260
276
  def cleanup
@@ -272,16 +288,15 @@ module Skylight
272
288
  end
273
289
 
274
290
  def clients_close
275
- @connections.keys.each do |sock|
291
+ @connections.socks.each do |sock|
276
292
  client_close(sock)
277
293
  end
278
294
  end
279
295
 
280
296
  def client_close(sock)
281
297
  trace "closing client connection; fd=%d", sock.fileno
282
- @connections.delete(sock)
298
+ @connections.cleanup(sock)
283
299
  @socks.delete(sock)
284
- sock.close rescue nil
285
300
  end
286
301
 
287
302
  def sockfile
@@ -315,22 +330,6 @@ module Skylight
315
330
  raise WorkerStateError, "sockfile gone"
316
331
  end
317
332
  end
318
-
319
- def status_check
320
- memory_usage = get_memory_usage
321
-
322
- @collector.send_status(memory: memory_usage, max_memory: max_memory)
323
-
324
- if memory_usage > max_memory
325
- raise WorkerStateError, "Memory limit exceeded: #{memory_usage} (max: #{max_memory})"
326
- end
327
- end
328
-
329
- def get_memory_usage
330
- `ps -o rss= -p #{Process.pid}`.to_i / 1024
331
- rescue Errno::ENOENT, Errno::EINTR
332
- 0
333
- end
334
333
  end
335
334
  end
336
335
  end