skylight 0.3.21 → 0.4.0.alpha1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (58) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +0 -4
  3. data/ext/extconf.rb +92 -47
  4. data/ext/libskylight.yml +4 -4
  5. data/ext/skylight_native.c +248 -286
  6. data/lib/skylight.rb +19 -114
  7. data/lib/skylight/api.rb +1 -1
  8. data/lib/skylight/config.rb +176 -146
  9. data/lib/skylight/data/cacert.pem +717 -719
  10. data/lib/skylight/formatters/http.rb +1 -1
  11. data/lib/skylight/instrumenter.rb +28 -35
  12. data/lib/skylight/native.rb +58 -72
  13. data/lib/skylight/normalizers.rb +0 -1
  14. data/lib/skylight/normalizers/active_record/sql.rb +0 -4
  15. data/lib/skylight/probes/excon/middleware.rb +3 -1
  16. data/lib/skylight/probes/net_http.rb +3 -1
  17. data/lib/skylight/subscriber.rb +0 -4
  18. data/lib/skylight/trace.rb +189 -0
  19. data/lib/skylight/util.rb +10 -12
  20. data/lib/skylight/util/hostname.rb +17 -0
  21. data/lib/skylight/util/http.rb +33 -36
  22. data/lib/skylight/util/logging.rb +20 -1
  23. data/lib/skylight/util/multi_io.rb +21 -0
  24. data/lib/skylight/util/native_ext_fetcher.rb +83 -69
  25. data/lib/skylight/util/platform.rb +67 -0
  26. data/lib/skylight/util/ssl.rb +50 -0
  27. data/lib/skylight/version.rb +1 -1
  28. metadata +9 -34
  29. data/ext/rust_support/ruby.h +0 -93
  30. data/ext/skylight.h +0 -85
  31. data/ext/skylight.map +0 -4
  32. data/ext/test/extconf.rb +0 -18
  33. data/ext/test/skylight_native_test.c +0 -82
  34. data/ext/test/skylight_test.h +0 -20
  35. data/lib/skylight/formatters.rb +0 -6
  36. data/lib/skylight/messages.rb +0 -21
  37. data/lib/skylight/messages/error.rb +0 -15
  38. data/lib/skylight/messages/hello.rb +0 -13
  39. data/lib/skylight/messages/trace.rb +0 -179
  40. data/lib/skylight/messages/trace_envelope.rb +0 -19
  41. data/lib/skylight/metrics.rb +0 -9
  42. data/lib/skylight/metrics/ewma.rb +0 -69
  43. data/lib/skylight/metrics/meter.rb +0 -58
  44. data/lib/skylight/metrics/process_cpu_gauge.rb +0 -65
  45. data/lib/skylight/metrics/process_mem_gauge.rb +0 -34
  46. data/lib/skylight/util/conversions.rb +0 -9
  47. data/lib/skylight/util/queue.rb +0 -96
  48. data/lib/skylight/util/task.rb +0 -172
  49. data/lib/skylight/util/uniform_sample.rb +0 -63
  50. data/lib/skylight/worker.rb +0 -19
  51. data/lib/skylight/worker/builder.rb +0 -73
  52. data/lib/skylight/worker/collector.rb +0 -274
  53. data/lib/skylight/worker/connection.rb +0 -87
  54. data/lib/skylight/worker/connection_set.rb +0 -56
  55. data/lib/skylight/worker/embedded.rb +0 -24
  56. data/lib/skylight/worker/metrics_reporter.rb +0 -104
  57. data/lib/skylight/worker/server.rb +0 -336
  58. data/lib/skylight/worker/standalone.rb +0 -421
@@ -1,274 +0,0 @@
1
- require 'uri'
2
-
3
- module Skylight
4
- module Worker
5
- class Collector < Util::Task
6
- include URI::Escape
7
-
8
- ENDPOINT = '/report'.freeze
9
- CONTENT_TYPE = 'content-type'.freeze
10
- SKYLIGHT_V2 = 'application/x-skylight-report-v2'.freeze
11
-
12
- include Util::Logging
13
-
14
- attr_reader :config, :metrics_reporter
15
-
16
- def initialize(config, metrics_reporter = nil)
17
- super(1000, 0.25)
18
-
19
- @config = config
20
- @size = config[:'agent.sample']
21
- @batch = nil
22
- @interval = config[:'agent.interval']
23
- @refresh_at = 0
24
- @http_auth = Util::HTTP.new(config, :accounts)
25
- @http_report = nil
26
- @report_meter = Metrics::Meter.new
27
- @report_success_meter = Metrics::Meter.new
28
- @metrics_reporter = metrics_reporter
29
-
30
- @metrics_reporter.register("collector.report-rate", @report_meter)
31
- @metrics_reporter.register("collector.report-success-rate", @report_success_meter)
32
-
33
- t { fmt "starting collector; interval=%d; size=%d", @interval, @size }
34
- end
35
-
36
- def self.build(config)
37
- new(config, MetricsReporter.new(config))
38
- end
39
-
40
- def prepare
41
- if @metrics_reporter
42
- @metrics_reporter.register("worker.collector.queue-depth", queue_depth_metric)
43
- @metrics_reporter.spawn
44
- end
45
- end
46
-
47
- def handle(msg, now = Util::Clock.absolute_secs)
48
- @batch ||= new_batch(now)
49
-
50
- if should_refresh_token?(now)
51
- refresh_report_token(now)
52
- end
53
-
54
- if @batch.should_flush?(now)
55
- if has_report_token?(now)
56
- flush(@batch)
57
- else
58
- warn "do not have valid session token -- dropping"
59
- return true
60
- end
61
-
62
- @batch = new_batch(now)
63
- end
64
-
65
- return true unless msg
66
-
67
- case msg
68
- when Messages::TraceEnvelope
69
- t { fmt "collector received trace" }
70
- @batch.push(msg)
71
- when Error
72
- send_error(msg)
73
- else
74
- debug "Received unknown message; class=%s", msg.class.to_s
75
- end
76
-
77
- true
78
- end
79
-
80
- def send_http_exception(http, response)
81
- send_exception(response.exception, additional_info: {
82
- host: http.host,
83
- port: http.port,
84
- path: response.request.path,
85
- method: response.request.method
86
- })
87
- end
88
-
89
- def send_exception(exception, data={})
90
- data = { class_name: exception.class.name,
91
- agent_info: @metrics_reporter.build_report }.merge(data)
92
-
93
- if Exception === exception
94
- data.merge!(message: exception.message, backtrace: exception.backtrace)
95
- end
96
-
97
- post_data(:exception, data, false)
98
- end
99
-
100
- private
101
-
102
- def post_data(type, data, notify = true)
103
- t { "posting data (#{type}): #{data.inspect}" }
104
-
105
- res = @http_auth.post("/agent/#{type}?hostname=#{escape(config[:'hostname'])}", data)
106
-
107
- unless res.success?
108
- warn "#{type} wasn't sent successfully; status=%s", res.status
109
- end
110
-
111
- if res.exception
112
- send_http_exception(@http_auth, res) if notify
113
- false
114
- else
115
- true
116
- end
117
- rescue Exception => e
118
- error "exception; msg=%s; class=%s", e.message, e.class
119
- t { e.backtrace.join("\n") }
120
- end
121
-
122
- def send_error(msg)
123
- details = msg.details ? JSON.parse(msg.details) : nil
124
- post_data(:error, type: msg.type, description: msg.description, details: details)
125
- end
126
-
127
- def finish
128
- t { fmt "collector finishing up" }
129
-
130
- now = Util::Clock.absolute_secs
131
-
132
- if should_refresh_token?(now)
133
- refresh_report_token(now)
134
- end
135
-
136
- if @batch && has_report_token?(now)
137
- flush(@batch)
138
- end
139
-
140
- @batch = nil
141
- ensure
142
- if @metrics_reporter
143
- @metrics_reporter.shutdown
144
- end
145
- end
146
-
147
- def flush(batch)
148
- return if batch.empty?
149
-
150
- debug "flushing batch; size=%d", batch.sample.count
151
-
152
- @report_meter.mark
153
-
154
- res = @http_report.post(ENDPOINT, batch.encode, CONTENT_TYPE => SKYLIGHT_V2)
155
-
156
- if res.exception
157
- send_http_exception(@http_report, res)
158
- else
159
- @report_success_meter.mark
160
- end
161
-
162
- nil
163
- end
164
-
165
- def refresh_report_token(now)
166
- res = @http_auth.get("/agent/authenticate?hostname=#{escape(config[:'hostname'])}")
167
-
168
- if res.exception
169
- send_http_exception(@http_auth, res)
170
- return
171
- end
172
-
173
- unless res.success?
174
- if (400..499).include? res.status
175
- warn "token request rejected; status=%s", res.status
176
- @http_report = nil
177
- end
178
-
179
- warn "could not fetch report session token; status=%s", res.status
180
- return
181
- end
182
-
183
- session = res.body['session']
184
- tok, expires_at = session['token'], session['expires_at'] if session
185
-
186
- if tok && expires_at
187
- if expires_at <= now
188
- error "token is expired: token=%s; expires_at=%s", tok, expires_at
189
- return
190
- end
191
-
192
- # 30 minute buffer or split the difference
193
- @refresh_at = expires_at - now > 3600 ?
194
- now + ((expires_at - now) / 2) :
195
- expires_at - 1800
196
-
197
- @http_report = Util::HTTP.new(config, :report)
198
- @http_report.authentication = tok
199
- else
200
- if @http_report
201
- @refresh_at = now + 60
202
- end
203
- warn "server did not return a session token"
204
- end
205
- rescue Exception => e
206
- error "exception; msg=%s; class=%s", e.message, e.class
207
- t { e.backtrace.join("\n") }
208
- end
209
-
210
- def should_refresh_token?(now)
211
- now >= @refresh_at
212
- end
213
-
214
- def has_report_token?(now)
215
- return true if config.ignore_token?
216
- return unless @http_report
217
- now < @refresh_at + (3600 * 3 - 660)
218
- end
219
-
220
- def new_batch(now)
221
- Batch.new(config, @size, round(now), @interval)
222
- end
223
-
224
- def round(time)
225
- (time.to_i / @interval) * @interval
226
- end
227
-
228
- class Batch
229
- include Util::Logging
230
-
231
- attr_reader :config, :from, :counts, :sample, :flush_at
232
-
233
- def initialize(config, size, from, interval)
234
- @config = config
235
- @from = from
236
- @flush_at = from + interval
237
- @sample = Util::UniformSample.new(size)
238
- @counts = Hash.new(0)
239
- end
240
-
241
- def should_flush?(now)
242
- return true if @config.constant_flush?
243
- now >= @flush_at
244
- end
245
-
246
- def empty?
247
- @sample.empty?
248
- end
249
-
250
- def push(trace)
251
- # Count it
252
- @counts[trace.endpoint_name] += 1
253
- # Push the trace into the sample
254
- @sample << trace
255
- end
256
-
257
- def encode
258
- batch = Skylight::Batch.native_new(from, config[:hostname])
259
-
260
- sample.each do |trace|
261
- batch.native_move_in(trace.data)
262
- end
263
-
264
- @counts.each do |endpoint_name,count|
265
- batch.native_set_endpoint_count(endpoint_name, count)
266
- end
267
-
268
- batch.native_serialize
269
- end
270
- end
271
-
272
- end
273
- end
274
- end
@@ -1,87 +0,0 @@
1
- module Skylight
2
- module Worker
3
- # Represents the IPC client connection
4
- class Connection
5
- FRAME_HDR_LEN = 8
6
-
7
- attr_reader :sock, :throughput
8
-
9
- def initialize(sock)
10
- @sock = sock
11
- @len = nil
12
- @buf = ""
13
-
14
- # Metrics
15
- @throughput = Metrics::Meter.new
16
- end
17
-
18
- def read
19
- if msg = maybe_read_message
20
- return msg
21
- end
22
-
23
- if chunk = read_sock
24
-
25
- @buf << chunk
26
-
27
- if !@len && @buf.bytesize >= FRAME_HDR_LEN
28
- @len = read_len
29
- end
30
-
31
- maybe_read_message
32
- end
33
- end
34
-
35
- def cleanup
36
- # Any cleanup code here
37
- end
38
-
39
- private
40
-
41
- def read_len
42
- if len = @buf[4, 4]
43
- len.unpack("L")[0]
44
- end
45
- end
46
-
47
- def read_message_id
48
- if win = @buf[0, 4]
49
- win.unpack("L")[0]
50
- end
51
- end
52
-
53
- def maybe_read_message
54
- if @len && @buf.bytesize >= @len + FRAME_HDR_LEN
55
- mid = read_message_id
56
- klass = Messages::ID_TO_KLASS.fetch(mid) do
57
- raise IpcProtoError, "unknown message `#{mid}`"
58
- end
59
- data = @buf[FRAME_HDR_LEN, @len]
60
- @buf = @buf[(FRAME_HDR_LEN + @len)..-1] || ""
61
-
62
- if @buf.bytesize >= FRAME_HDR_LEN
63
- @len = read_len
64
- else
65
- @len = nil
66
- end
67
-
68
- begin
69
- return klass.deserialize(data)
70
- rescue Exception => e
71
- # reraise protobuf decoding exceptions
72
- raise IpcProtoError, e.message
73
- end
74
- end
75
- end
76
-
77
- def read_sock
78
- ret = @sock.read_nonblock(CHUNK_SIZE)
79
- # Track the throughput
80
- @throughput.mark(ret.bytesize) if ret
81
- ret
82
- rescue Errno::EAGAIN, Errno::EWOULDBLOCK
83
- end
84
-
85
- end
86
- end
87
- end
@@ -1,56 +0,0 @@
1
- require 'thread'
2
-
3
- module Skylight
4
- module Worker
5
- class ConnectionSet
6
- attr_reader :open_connections, :throughput
7
-
8
- def initialize
9
- @connections = {}
10
- @lock = Mutex.new
11
-
12
- # Metrics
13
- @open_connections = build_open_connections_metric
14
- @throughput = build_throughput_metric
15
- end
16
-
17
- def add(sock)
18
- conn = Connection.new(sock)
19
- @lock.synchronize { @connections[sock] = conn }
20
- conn
21
- end
22
-
23
- def socks
24
- @lock.synchronize { @connections.keys }
25
- end
26
-
27
- def [](sock)
28
- @lock.synchronize do
29
- @connections[sock]
30
- end
31
- end
32
-
33
- def cleanup(sock)
34
- if conn = @lock.synchronize { @connections.delete(sock) }
35
- conn.cleanup
36
- sock.close rescue nil
37
- end
38
- end
39
-
40
- private
41
-
42
- def build_open_connections_metric
43
- lambda do
44
- @lock.synchronize { @connections.length }
45
- end
46
- end
47
-
48
- def build_throughput_metric
49
- lambda do
50
- conns = @lock.synchronize { @connections.values }
51
- conns.map { |c| c.throughput.rate.to_i }
52
- end
53
- end
54
- end
55
- end
56
- end
@@ -1,24 +0,0 @@
1
- module Skylight
2
- module Worker
3
- class Embedded
4
- def initialize(collector)
5
- @collector = collector
6
- end
7
-
8
- def spawn
9
- @collector.spawn
10
- end
11
-
12
- def shutdown
13
- @collector.shutdown
14
- end
15
-
16
- def submit(msg)
17
- decoder = Messages::ID_TO_KLASS.fetch(Messages::KLASS_TO_ID.fetch(msg.class))
18
- msg = decoder.deserialize(msg.serialize)
19
-
20
- @collector.submit(msg)
21
- end
22
- end
23
- end
24
- end
@@ -1,104 +0,0 @@
1
- require 'thread'
2
- require 'rbconfig'
3
-
4
- module Skylight
5
- module Worker
6
- class MetricsReporter < Util::Task
7
-
8
- include Util::Logging
9
-
10
- attr_reader :config
11
-
12
- def initialize(config)
13
- super(1000, 0.25)
14
-
15
- @metrics = {}
16
- @config = config
17
- @interval = config[:'metrics.report_interval']
18
- @lock = Mutex.new
19
- @next_report_at = nil
20
- @http_auth = Util::HTTP.new(config, :accounts)
21
- end
22
-
23
- # A metric responds to #call and returns metric info
24
- def register(name, metric)
25
- @lock.synchronize { @metrics[name] = metric }
26
- end
27
-
28
- def unregister(name)
29
- @lock.synchronize { @metrics.delete(name) }
30
- end
31
-
32
- # msg is always nil, but we can use the Task abstraction anyway
33
- def handle(msg, now = Util::Clock.absolute_secs)
34
- # Initially set the next report at
35
- unless @next_report_at
36
- update_next_report_at(now)
37
- return true
38
- end
39
-
40
- if now < @next_report_at
41
- # Nothing to do
42
- return true
43
- end
44
-
45
- update_next_report_at(now)
46
- post_report
47
-
48
- true
49
- end
50
-
51
- def build_report
52
- report = {
53
- "hostname" => config[:'hostname'],
54
- "host.info" => RbConfig::CONFIG['arch'],
55
- "ruby.version" => "#{RUBY_VERSION}-p#{RUBY_PATCHLEVEL}",
56
- "ruby.engine" => RUBY_ENGINE,
57
- "rails.version" => defined?(Rails) ? Rails.version : nil,
58
- "skylight.version" => Skylight::VERSION
59
- }
60
-
61
- metric_names.each do |name|
62
- # Since we are operating in a concurrent environment, it is possible
63
- # that the metric for the current name is unregistered before we
64
- # access it here.
65
- unless m = metric(name)
66
- next
67
- end
68
-
69
- report[name] = m.call
70
- end
71
-
72
- report
73
- end
74
-
75
- def post_report
76
- report = build_report
77
-
78
- # Send the report
79
- t { fmt "reporting internal metrics; payload=%s", report.inspect }
80
-
81
- res = @http_auth.post("/agent/metrics", report: report)
82
-
83
- unless res.success?
84
- warn "internal metrics report failed; status=%s", res.status
85
- end
86
- end
87
-
88
- private
89
-
90
- def metric_names
91
- @lock.synchronize { @metrics.keys }
92
- end
93
-
94
- def metric(name)
95
- @lock.synchronize { @metrics[name] }
96
- end
97
-
98
- def update_next_report_at(now)
99
- @next_report_at = now + @interval
100
- end
101
-
102
- end
103
- end
104
- end