skylight 0.3.21 → 0.4.0.alpha1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (58) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +0 -4
  3. data/ext/extconf.rb +92 -47
  4. data/ext/libskylight.yml +4 -4
  5. data/ext/skylight_native.c +248 -286
  6. data/lib/skylight.rb +19 -114
  7. data/lib/skylight/api.rb +1 -1
  8. data/lib/skylight/config.rb +176 -146
  9. data/lib/skylight/data/cacert.pem +717 -719
  10. data/lib/skylight/formatters/http.rb +1 -1
  11. data/lib/skylight/instrumenter.rb +28 -35
  12. data/lib/skylight/native.rb +58 -72
  13. data/lib/skylight/normalizers.rb +0 -1
  14. data/lib/skylight/normalizers/active_record/sql.rb +0 -4
  15. data/lib/skylight/probes/excon/middleware.rb +3 -1
  16. data/lib/skylight/probes/net_http.rb +3 -1
  17. data/lib/skylight/subscriber.rb +0 -4
  18. data/lib/skylight/trace.rb +189 -0
  19. data/lib/skylight/util.rb +10 -12
  20. data/lib/skylight/util/hostname.rb +17 -0
  21. data/lib/skylight/util/http.rb +33 -36
  22. data/lib/skylight/util/logging.rb +20 -1
  23. data/lib/skylight/util/multi_io.rb +21 -0
  24. data/lib/skylight/util/native_ext_fetcher.rb +83 -69
  25. data/lib/skylight/util/platform.rb +67 -0
  26. data/lib/skylight/util/ssl.rb +50 -0
  27. data/lib/skylight/version.rb +1 -1
  28. metadata +9 -34
  29. data/ext/rust_support/ruby.h +0 -93
  30. data/ext/skylight.h +0 -85
  31. data/ext/skylight.map +0 -4
  32. data/ext/test/extconf.rb +0 -18
  33. data/ext/test/skylight_native_test.c +0 -82
  34. data/ext/test/skylight_test.h +0 -20
  35. data/lib/skylight/formatters.rb +0 -6
  36. data/lib/skylight/messages.rb +0 -21
  37. data/lib/skylight/messages/error.rb +0 -15
  38. data/lib/skylight/messages/hello.rb +0 -13
  39. data/lib/skylight/messages/trace.rb +0 -179
  40. data/lib/skylight/messages/trace_envelope.rb +0 -19
  41. data/lib/skylight/metrics.rb +0 -9
  42. data/lib/skylight/metrics/ewma.rb +0 -69
  43. data/lib/skylight/metrics/meter.rb +0 -58
  44. data/lib/skylight/metrics/process_cpu_gauge.rb +0 -65
  45. data/lib/skylight/metrics/process_mem_gauge.rb +0 -34
  46. data/lib/skylight/util/conversions.rb +0 -9
  47. data/lib/skylight/util/queue.rb +0 -96
  48. data/lib/skylight/util/task.rb +0 -172
  49. data/lib/skylight/util/uniform_sample.rb +0 -63
  50. data/lib/skylight/worker.rb +0 -19
  51. data/lib/skylight/worker/builder.rb +0 -73
  52. data/lib/skylight/worker/collector.rb +0 -274
  53. data/lib/skylight/worker/connection.rb +0 -87
  54. data/lib/skylight/worker/connection_set.rb +0 -56
  55. data/lib/skylight/worker/embedded.rb +0 -24
  56. data/lib/skylight/worker/metrics_reporter.rb +0 -104
  57. data/lib/skylight/worker/server.rb +0 -336
  58. data/lib/skylight/worker/standalone.rb +0 -421
@@ -1,274 +0,0 @@
1
- require 'uri'
2
-
3
- module Skylight
4
- module Worker
5
- class Collector < Util::Task
6
- include URI::Escape
7
-
8
- ENDPOINT = '/report'.freeze
9
- CONTENT_TYPE = 'content-type'.freeze
10
- SKYLIGHT_V2 = 'application/x-skylight-report-v2'.freeze
11
-
12
- include Util::Logging
13
-
14
- attr_reader :config, :metrics_reporter
15
-
16
- def initialize(config, metrics_reporter = nil)
17
- super(1000, 0.25)
18
-
19
- @config = config
20
- @size = config[:'agent.sample']
21
- @batch = nil
22
- @interval = config[:'agent.interval']
23
- @refresh_at = 0
24
- @http_auth = Util::HTTP.new(config, :accounts)
25
- @http_report = nil
26
- @report_meter = Metrics::Meter.new
27
- @report_success_meter = Metrics::Meter.new
28
- @metrics_reporter = metrics_reporter
29
-
30
- @metrics_reporter.register("collector.report-rate", @report_meter)
31
- @metrics_reporter.register("collector.report-success-rate", @report_success_meter)
32
-
33
- t { fmt "starting collector; interval=%d; size=%d", @interval, @size }
34
- end
35
-
36
- def self.build(config)
37
- new(config, MetricsReporter.new(config))
38
- end
39
-
40
- def prepare
41
- if @metrics_reporter
42
- @metrics_reporter.register("worker.collector.queue-depth", queue_depth_metric)
43
- @metrics_reporter.spawn
44
- end
45
- end
46
-
47
- def handle(msg, now = Util::Clock.absolute_secs)
48
- @batch ||= new_batch(now)
49
-
50
- if should_refresh_token?(now)
51
- refresh_report_token(now)
52
- end
53
-
54
- if @batch.should_flush?(now)
55
- if has_report_token?(now)
56
- flush(@batch)
57
- else
58
- warn "do not have valid session token -- dropping"
59
- return true
60
- end
61
-
62
- @batch = new_batch(now)
63
- end
64
-
65
- return true unless msg
66
-
67
- case msg
68
- when Messages::TraceEnvelope
69
- t { fmt "collector received trace" }
70
- @batch.push(msg)
71
- when Error
72
- send_error(msg)
73
- else
74
- debug "Received unknown message; class=%s", msg.class.to_s
75
- end
76
-
77
- true
78
- end
79
-
80
- def send_http_exception(http, response)
81
- send_exception(response.exception, additional_info: {
82
- host: http.host,
83
- port: http.port,
84
- path: response.request.path,
85
- method: response.request.method
86
- })
87
- end
88
-
89
- def send_exception(exception, data={})
90
- data = { class_name: exception.class.name,
91
- agent_info: @metrics_reporter.build_report }.merge(data)
92
-
93
- if Exception === exception
94
- data.merge!(message: exception.message, backtrace: exception.backtrace)
95
- end
96
-
97
- post_data(:exception, data, false)
98
- end
99
-
100
- private
101
-
102
- def post_data(type, data, notify = true)
103
- t { "posting data (#{type}): #{data.inspect}" }
104
-
105
- res = @http_auth.post("/agent/#{type}?hostname=#{escape(config[:'hostname'])}", data)
106
-
107
- unless res.success?
108
- warn "#{type} wasn't sent successfully; status=%s", res.status
109
- end
110
-
111
- if res.exception
112
- send_http_exception(@http_auth, res) if notify
113
- false
114
- else
115
- true
116
- end
117
- rescue Exception => e
118
- error "exception; msg=%s; class=%s", e.message, e.class
119
- t { e.backtrace.join("\n") }
120
- end
121
-
122
- def send_error(msg)
123
- details = msg.details ? JSON.parse(msg.details) : nil
124
- post_data(:error, type: msg.type, description: msg.description, details: details)
125
- end
126
-
127
- def finish
128
- t { fmt "collector finishing up" }
129
-
130
- now = Util::Clock.absolute_secs
131
-
132
- if should_refresh_token?(now)
133
- refresh_report_token(now)
134
- end
135
-
136
- if @batch && has_report_token?(now)
137
- flush(@batch)
138
- end
139
-
140
- @batch = nil
141
- ensure
142
- if @metrics_reporter
143
- @metrics_reporter.shutdown
144
- end
145
- end
146
-
147
- def flush(batch)
148
- return if batch.empty?
149
-
150
- debug "flushing batch; size=%d", batch.sample.count
151
-
152
- @report_meter.mark
153
-
154
- res = @http_report.post(ENDPOINT, batch.encode, CONTENT_TYPE => SKYLIGHT_V2)
155
-
156
- if res.exception
157
- send_http_exception(@http_report, res)
158
- else
159
- @report_success_meter.mark
160
- end
161
-
162
- nil
163
- end
164
-
165
- def refresh_report_token(now)
166
- res = @http_auth.get("/agent/authenticate?hostname=#{escape(config[:'hostname'])}")
167
-
168
- if res.exception
169
- send_http_exception(@http_auth, res)
170
- return
171
- end
172
-
173
- unless res.success?
174
- if (400..499).include? res.status
175
- warn "token request rejected; status=%s", res.status
176
- @http_report = nil
177
- end
178
-
179
- warn "could not fetch report session token; status=%s", res.status
180
- return
181
- end
182
-
183
- session = res.body['session']
184
- tok, expires_at = session['token'], session['expires_at'] if session
185
-
186
- if tok && expires_at
187
- if expires_at <= now
188
- error "token is expired: token=%s; expires_at=%s", tok, expires_at
189
- return
190
- end
191
-
192
- # 30 minute buffer or split the difference
193
- @refresh_at = expires_at - now > 3600 ?
194
- now + ((expires_at - now) / 2) :
195
- expires_at - 1800
196
-
197
- @http_report = Util::HTTP.new(config, :report)
198
- @http_report.authentication = tok
199
- else
200
- if @http_report
201
- @refresh_at = now + 60
202
- end
203
- warn "server did not return a session token"
204
- end
205
- rescue Exception => e
206
- error "exception; msg=%s; class=%s", e.message, e.class
207
- t { e.backtrace.join("\n") }
208
- end
209
-
210
- def should_refresh_token?(now)
211
- now >= @refresh_at
212
- end
213
-
214
- def has_report_token?(now)
215
- return true if config.ignore_token?
216
- return unless @http_report
217
- now < @refresh_at + (3600 * 3 - 660)
218
- end
219
-
220
- def new_batch(now)
221
- Batch.new(config, @size, round(now), @interval)
222
- end
223
-
224
- def round(time)
225
- (time.to_i / @interval) * @interval
226
- end
227
-
228
- class Batch
229
- include Util::Logging
230
-
231
- attr_reader :config, :from, :counts, :sample, :flush_at
232
-
233
- def initialize(config, size, from, interval)
234
- @config = config
235
- @from = from
236
- @flush_at = from + interval
237
- @sample = Util::UniformSample.new(size)
238
- @counts = Hash.new(0)
239
- end
240
-
241
- def should_flush?(now)
242
- return true if @config.constant_flush?
243
- now >= @flush_at
244
- end
245
-
246
- def empty?
247
- @sample.empty?
248
- end
249
-
250
- def push(trace)
251
- # Count it
252
- @counts[trace.endpoint_name] += 1
253
- # Push the trace into the sample
254
- @sample << trace
255
- end
256
-
257
- def encode
258
- batch = Skylight::Batch.native_new(from, config[:hostname])
259
-
260
- sample.each do |trace|
261
- batch.native_move_in(trace.data)
262
- end
263
-
264
- @counts.each do |endpoint_name,count|
265
- batch.native_set_endpoint_count(endpoint_name, count)
266
- end
267
-
268
- batch.native_serialize
269
- end
270
- end
271
-
272
- end
273
- end
274
- end
@@ -1,87 +0,0 @@
1
- module Skylight
2
- module Worker
3
- # Represents the IPC client connection
4
- class Connection
5
- FRAME_HDR_LEN = 8
6
-
7
- attr_reader :sock, :throughput
8
-
9
- def initialize(sock)
10
- @sock = sock
11
- @len = nil
12
- @buf = ""
13
-
14
- # Metrics
15
- @throughput = Metrics::Meter.new
16
- end
17
-
18
- def read
19
- if msg = maybe_read_message
20
- return msg
21
- end
22
-
23
- if chunk = read_sock
24
-
25
- @buf << chunk
26
-
27
- if !@len && @buf.bytesize >= FRAME_HDR_LEN
28
- @len = read_len
29
- end
30
-
31
- maybe_read_message
32
- end
33
- end
34
-
35
- def cleanup
36
- # Any cleanup code here
37
- end
38
-
39
- private
40
-
41
- def read_len
42
- if len = @buf[4, 4]
43
- len.unpack("L")[0]
44
- end
45
- end
46
-
47
- def read_message_id
48
- if win = @buf[0, 4]
49
- win.unpack("L")[0]
50
- end
51
- end
52
-
53
- def maybe_read_message
54
- if @len && @buf.bytesize >= @len + FRAME_HDR_LEN
55
- mid = read_message_id
56
- klass = Messages::ID_TO_KLASS.fetch(mid) do
57
- raise IpcProtoError, "unknown message `#{mid}`"
58
- end
59
- data = @buf[FRAME_HDR_LEN, @len]
60
- @buf = @buf[(FRAME_HDR_LEN + @len)..-1] || ""
61
-
62
- if @buf.bytesize >= FRAME_HDR_LEN
63
- @len = read_len
64
- else
65
- @len = nil
66
- end
67
-
68
- begin
69
- return klass.deserialize(data)
70
- rescue Exception => e
71
- # reraise protobuf decoding exceptions
72
- raise IpcProtoError, e.message
73
- end
74
- end
75
- end
76
-
77
- def read_sock
78
- ret = @sock.read_nonblock(CHUNK_SIZE)
79
- # Track the throughput
80
- @throughput.mark(ret.bytesize) if ret
81
- ret
82
- rescue Errno::EAGAIN, Errno::EWOULDBLOCK
83
- end
84
-
85
- end
86
- end
87
- end
@@ -1,56 +0,0 @@
1
- require 'thread'
2
-
3
- module Skylight
4
- module Worker
5
- class ConnectionSet
6
- attr_reader :open_connections, :throughput
7
-
8
- def initialize
9
- @connections = {}
10
- @lock = Mutex.new
11
-
12
- # Metrics
13
- @open_connections = build_open_connections_metric
14
- @throughput = build_throughput_metric
15
- end
16
-
17
- def add(sock)
18
- conn = Connection.new(sock)
19
- @lock.synchronize { @connections[sock] = conn }
20
- conn
21
- end
22
-
23
- def socks
24
- @lock.synchronize { @connections.keys }
25
- end
26
-
27
- def [](sock)
28
- @lock.synchronize do
29
- @connections[sock]
30
- end
31
- end
32
-
33
- def cleanup(sock)
34
- if conn = @lock.synchronize { @connections.delete(sock) }
35
- conn.cleanup
36
- sock.close rescue nil
37
- end
38
- end
39
-
40
- private
41
-
42
- def build_open_connections_metric
43
- lambda do
44
- @lock.synchronize { @connections.length }
45
- end
46
- end
47
-
48
- def build_throughput_metric
49
- lambda do
50
- conns = @lock.synchronize { @connections.values }
51
- conns.map { |c| c.throughput.rate.to_i }
52
- end
53
- end
54
- end
55
- end
56
- end
@@ -1,24 +0,0 @@
1
- module Skylight
2
- module Worker
3
- class Embedded
4
- def initialize(collector)
5
- @collector = collector
6
- end
7
-
8
- def spawn
9
- @collector.spawn
10
- end
11
-
12
- def shutdown
13
- @collector.shutdown
14
- end
15
-
16
- def submit(msg)
17
- decoder = Messages::ID_TO_KLASS.fetch(Messages::KLASS_TO_ID.fetch(msg.class))
18
- msg = decoder.deserialize(msg.serialize)
19
-
20
- @collector.submit(msg)
21
- end
22
- end
23
- end
24
- end
@@ -1,104 +0,0 @@
1
- require 'thread'
2
- require 'rbconfig'
3
-
4
- module Skylight
5
- module Worker
6
- class MetricsReporter < Util::Task
7
-
8
- include Util::Logging
9
-
10
- attr_reader :config
11
-
12
- def initialize(config)
13
- super(1000, 0.25)
14
-
15
- @metrics = {}
16
- @config = config
17
- @interval = config[:'metrics.report_interval']
18
- @lock = Mutex.new
19
- @next_report_at = nil
20
- @http_auth = Util::HTTP.new(config, :accounts)
21
- end
22
-
23
- # A metric responds to #call and returns metric info
24
- def register(name, metric)
25
- @lock.synchronize { @metrics[name] = metric }
26
- end
27
-
28
- def unregister(name)
29
- @lock.synchronize { @metrics.delete(name) }
30
- end
31
-
32
- # msg is always nil, but we can use the Task abstraction anyway
33
- def handle(msg, now = Util::Clock.absolute_secs)
34
- # Initially set the next report at
35
- unless @next_report_at
36
- update_next_report_at(now)
37
- return true
38
- end
39
-
40
- if now < @next_report_at
41
- # Nothing to do
42
- return true
43
- end
44
-
45
- update_next_report_at(now)
46
- post_report
47
-
48
- true
49
- end
50
-
51
- def build_report
52
- report = {
53
- "hostname" => config[:'hostname'],
54
- "host.info" => RbConfig::CONFIG['arch'],
55
- "ruby.version" => "#{RUBY_VERSION}-p#{RUBY_PATCHLEVEL}",
56
- "ruby.engine" => RUBY_ENGINE,
57
- "rails.version" => defined?(Rails) ? Rails.version : nil,
58
- "skylight.version" => Skylight::VERSION
59
- }
60
-
61
- metric_names.each do |name|
62
- # Since we are operating in a concurrent environment, it is possible
63
- # that the metric for the current name is unregistered before we
64
- # access it here.
65
- unless m = metric(name)
66
- next
67
- end
68
-
69
- report[name] = m.call
70
- end
71
-
72
- report
73
- end
74
-
75
- def post_report
76
- report = build_report
77
-
78
- # Send the report
79
- t { fmt "reporting internal metrics; payload=%s", report.inspect }
80
-
81
- res = @http_auth.post("/agent/metrics", report: report)
82
-
83
- unless res.success?
84
- warn "internal metrics report failed; status=%s", res.status
85
- end
86
- end
87
-
88
- private
89
-
90
- def metric_names
91
- @lock.synchronize { @metrics.keys }
92
- end
93
-
94
- def metric(name)
95
- @lock.synchronize { @metrics[name] }
96
- end
97
-
98
- def update_next_report_at(now)
99
- @next_report_at = now + @interval
100
- end
101
-
102
- end
103
- end
104
- end