flare 0.2.0 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +2 -17
- data/lib/flare/configuration.rb +14 -0
- data/lib/flare/engine.rb +4 -2
- data/lib/flare/filtering_span_processor.rb +279 -0
- data/lib/flare/http_transport.rb +62 -0
- data/lib/flare/marker.rb +106 -0
- data/lib/flare/metric_counter.rb +6 -0
- data/lib/flare/metric_flusher.rb +18 -5
- data/lib/flare/metric_storage.rb +5 -0
- data/lib/flare/rule_manager.rb +140 -0
- data/lib/flare/sampler.rb +130 -0
- data/lib/flare/trace_blob.rb +116 -0
- data/lib/flare/trace_exporter.rb +143 -0
- data/lib/flare/trace_health_reporter.rb +74 -0
- data/lib/flare/upload_url_pool.rb +108 -0
- data/lib/flare/version.rb +1 -1
- data/lib/flare/web_marker_subscriber.rb +76 -0
- data/lib/flare.rb +127 -7
- metadata +11 -1
|
@@ -0,0 +1,108 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "concurrent/atomic/atomic_reference"
|
|
4
|
+
|
|
5
|
+
module Flare
|
|
6
|
+
# Thread-safe pool of presigned R2 PUT URLs the RuleManager fills from
|
|
7
|
+
# the /api/rules response. TraceExporter checks one out before each
|
|
8
|
+
# upload; if the pool is empty (no active rules, no fresh URLs) it
|
|
9
|
+
# returns nil and the exporter gives up on that batch -- caller decides
|
|
10
|
+
# what to do.
|
|
11
|
+
#
|
|
12
|
+
# Each entry is a Hash: { upload_id:, key:, put_url:, expires_at: }.
|
|
13
|
+
# expires_at is a Time; entries past their expiry are skipped on checkout.
|
|
14
|
+
#
|
|
15
|
+
# Fork-safe: after_fork clears the pool so child processes don't reuse
|
|
16
|
+
# parent URLs (each child polls its own copy from /api/rules anyway).
|
|
17
|
+
class UploadUrlPool
|
|
18
|
+
attr_reader :checkouts, :empty_count, :expired_count
|
|
19
|
+
|
|
20
|
+
def initialize
|
|
21
|
+
@entries_ref = Concurrent::AtomicReference.new([].freeze)
|
|
22
|
+
@checkouts = Concurrent::AtomicFixnum.new(0)
|
|
23
|
+
@empty_count = Concurrent::AtomicFixnum.new(0)
|
|
24
|
+
@expired_count = Concurrent::AtomicFixnum.new(0)
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
def replace(entries)
|
|
28
|
+
normalized = (entries || []).filter_map { |raw| normalize(raw) }
|
|
29
|
+
@entries_ref.set(normalized.freeze)
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
def checkout
|
|
33
|
+
now = Time.now
|
|
34
|
+
loop do
|
|
35
|
+
current = @entries_ref.get
|
|
36
|
+
if current.empty?
|
|
37
|
+
@empty_count.increment
|
|
38
|
+
return nil
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
candidate, *rest = current
|
|
42
|
+
next_state = rest.freeze
|
|
43
|
+
next unless @entries_ref.compare_and_set(current, next_state)
|
|
44
|
+
|
|
45
|
+
if expired?(candidate, now)
|
|
46
|
+
@expired_count.increment
|
|
47
|
+
next # try the next one
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
@checkouts.increment
|
|
51
|
+
return candidate
|
|
52
|
+
end
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
def size
|
|
56
|
+
@entries_ref.get.length
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
def empty?
|
|
60
|
+
size.zero?
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
def clear
|
|
64
|
+
@entries_ref.set([].freeze)
|
|
65
|
+
end
|
|
66
|
+
|
|
67
|
+
# Drop URLs that have already passed their expires_at. Cheap; safe to
|
|
68
|
+
# call from RuleManager's scheduler in between polls.
|
|
69
|
+
def sweep
|
|
70
|
+
now = Time.now
|
|
71
|
+
current = @entries_ref.get
|
|
72
|
+
live = current.reject { |e| expired?(e, now) }
|
|
73
|
+
return 0 if live.length == current.length
|
|
74
|
+
|
|
75
|
+
@entries_ref.set(live.freeze)
|
|
76
|
+
current.length - live.length
|
|
77
|
+
end
|
|
78
|
+
|
|
79
|
+
# Call from Flare.after_fork. Parent's URLs aren't usable from the
|
|
80
|
+
# child's point of view (each child should get its own from a fresh
|
|
81
|
+
# /api/rules poll), so just drop them.
|
|
82
|
+
def after_fork
|
|
83
|
+
clear
|
|
84
|
+
end
|
|
85
|
+
|
|
86
|
+
private
|
|
87
|
+
|
|
88
|
+
def normalize(raw)
|
|
89
|
+
h = raw.is_a?(Hash) ? raw : nil
|
|
90
|
+
return nil unless h
|
|
91
|
+
|
|
92
|
+
upload_id = h[:upload_id] || h["upload_id"]
|
|
93
|
+
key = h[:key] || h["key"]
|
|
94
|
+
put_url = h[:put_url] || h["put_url"]
|
|
95
|
+
expires_at = h[:expires_at] || h["expires_at"]
|
|
96
|
+
return nil if upload_id.nil? || key.nil? || put_url.nil?
|
|
97
|
+
|
|
98
|
+
expires_at = Time.iso8601(expires_at) if expires_at.is_a?(String)
|
|
99
|
+
{ upload_id: upload_id, key: key, put_url: put_url, expires_at: expires_at }
|
|
100
|
+
rescue StandardError
|
|
101
|
+
nil
|
|
102
|
+
end
|
|
103
|
+
|
|
104
|
+
def expired?(entry, now)
|
|
105
|
+
entry[:expires_at] && entry[:expires_at] <= now
|
|
106
|
+
end
|
|
107
|
+
end
|
|
108
|
+
end
|
data/lib/flare/version.rb
CHANGED
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "opentelemetry/sdk"
|
|
4
|
+
|
|
5
|
+
module Flare
|
|
6
|
+
# Path 2: ActiveSupport::Notifications subscriber that fires on
|
|
7
|
+
# `start_processing.action_controller`, after Rails has routed to a
|
|
8
|
+
# controller#action. At that point the rack server span's start
|
|
9
|
+
# attributes don't yet carry code.namespace/code.function -- only the
|
|
10
|
+
# ActionPack instrumentation adds them, and Flare::Sampler's start-time
|
|
11
|
+
# decision (RECORD_ONLY) was already locked in.
|
|
12
|
+
#
|
|
13
|
+
# The subscriber consults the same sampler's rule set, finds any whose
|
|
14
|
+
# match_attributes match the now-known controller/action, applies the
|
|
15
|
+
# deterministic trace_id_ratio gate (CAF-1: no rate bypass on Path 2),
|
|
16
|
+
# and on pass calls marker.mark(trace_id, owner_span_id:, rule_id:).
|
|
17
|
+
# FilteringSpanProcessor then forwards every span in the trace to the
|
|
18
|
+
# exporter and unmarks when the owner (this rack span) finishes.
|
|
19
|
+
class WebMarkerSubscriber
|
|
20
|
+
NOTIFICATION = "start_processing.action_controller"
|
|
21
|
+
|
|
22
|
+
def initialize(sampler:, marker:)
|
|
23
|
+
@sampler = sampler
|
|
24
|
+
@marker = marker
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
def start
|
|
28
|
+
@subscriber = ActiveSupport::Notifications.subscribe(NOTIFICATION) do |*, payload|
|
|
29
|
+
handle(payload)
|
|
30
|
+
end
|
|
31
|
+
self
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
def stop
|
|
35
|
+
ActiveSupport::Notifications.unsubscribe(@subscriber) if @subscriber
|
|
36
|
+
@subscriber = nil
|
|
37
|
+
self
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
# Public for tests so they don't have to drive ActiveSupport::Notifications.
|
|
41
|
+
# current_span lets tests inject a context; in production it's the
|
|
42
|
+
# rack server span on the current thread.
|
|
43
|
+
def handle(payload, current_span: OpenTelemetry::Trace.current_span)
|
|
44
|
+
return unless current_span
|
|
45
|
+
ctx = current_span.context
|
|
46
|
+
return unless ctx && ctx.valid?
|
|
47
|
+
|
|
48
|
+
attrs = candidate_attributes(payload)
|
|
49
|
+
return if attrs.empty?
|
|
50
|
+
|
|
51
|
+
@sampler.rules.each do |rule|
|
|
52
|
+
next unless matches?(rule, attrs)
|
|
53
|
+
next unless @sampler.trace_id_ratio(ctx.trace_id) < rule.rate
|
|
54
|
+
|
|
55
|
+
current_span.set_attribute(Flare::Sampler::RULE_ID_ATTRIBUTE, rule.id) if current_span.respond_to?(:set_attribute)
|
|
56
|
+
@marker.mark(ctx.trace_id, owner_span_id: ctx.span_id, rule_id: rule.id)
|
|
57
|
+
break
|
|
58
|
+
end
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
private
|
|
62
|
+
|
|
63
|
+
def candidate_attributes(payload)
|
|
64
|
+
controller = payload[:controller] || payload["controller"]
|
|
65
|
+
action = payload[:action] || payload["action"]
|
|
66
|
+
{
|
|
67
|
+
"code.namespace" => controller,
|
|
68
|
+
"code.function" => action
|
|
69
|
+
}.compact
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
def matches?(rule, attrs)
|
|
73
|
+
rule.match_attributes.all? { |k, v| attrs[k] == v }
|
|
74
|
+
end
|
|
75
|
+
end
|
|
76
|
+
end
|
data/lib/flare.rb
CHANGED
|
@@ -13,6 +13,15 @@ require_relative "flare/metric_flusher"
|
|
|
13
13
|
require_relative "flare/backoff_policy"
|
|
14
14
|
require_relative "flare/metric_submitter"
|
|
15
15
|
|
|
16
|
+
require_relative "flare/sampler"
|
|
17
|
+
require_relative "flare/marker"
|
|
18
|
+
require_relative "flare/web_marker_subscriber"
|
|
19
|
+
require_relative "flare/filtering_span_processor"
|
|
20
|
+
require_relative "flare/upload_url_pool"
|
|
21
|
+
require_relative "flare/trace_exporter"
|
|
22
|
+
require_relative "flare/rule_manager"
|
|
23
|
+
require_relative "flare/trace_health_reporter"
|
|
24
|
+
|
|
16
25
|
module Flare
|
|
17
26
|
class Error < StandardError; end
|
|
18
27
|
|
|
@@ -114,15 +123,43 @@ module Flare
|
|
|
114
123
|
@metric_flusher = flusher
|
|
115
124
|
end
|
|
116
125
|
|
|
126
|
+
# Trace-sampling components, exposed for tests + manual after_fork wiring.
|
|
127
|
+
def sampler = @sampler
|
|
128
|
+
def marker = @marker
|
|
129
|
+
def upload_url_pool = @upload_url_pool
|
|
130
|
+
def rule_manager = @rule_manager
|
|
131
|
+
def trace_span_processor = @trace_span_processor
|
|
132
|
+
def trace_health_reporter = @trace_health_reporter
|
|
133
|
+
|
|
117
134
|
# Manually flush metrics (useful for testing or forced flushes).
|
|
118
135
|
def flush_metrics
|
|
119
136
|
@metric_flusher&.flush_now || 0
|
|
120
137
|
end
|
|
121
138
|
|
|
122
|
-
#
|
|
139
|
+
# Default project key, derived from the host Rails app's module name.
|
|
140
|
+
# Customers can override by configuring something else once we expose
|
|
141
|
+
# configuration.project; for v0.3 this matches MetricSubmitter's behavior.
|
|
142
|
+
def service_name_for_app
|
|
143
|
+
if defined?(Rails) && Rails.respond_to?(:application) && Rails.application
|
|
144
|
+
Rails.application.class.module_parent_name.underscore rescue "rails_app"
|
|
145
|
+
else
|
|
146
|
+
"app"
|
|
147
|
+
end
|
|
148
|
+
end
|
|
149
|
+
|
|
150
|
+
def rails_env_name
|
|
151
|
+
if defined?(Rails) && Rails.respond_to?(:env)
|
|
152
|
+
Rails.env.to_s
|
|
153
|
+
else
|
|
154
|
+
ENV.fetch("RACK_ENV", "development")
|
|
155
|
+
end
|
|
156
|
+
end
|
|
157
|
+
|
|
158
|
+
# Re-initialize background threads after fork.
|
|
123
159
|
# Call this from Puma/Unicorn after_fork hooks.
|
|
124
160
|
def after_fork
|
|
125
161
|
@metric_flusher&.after_fork
|
|
162
|
+
@rule_manager&.after_fork
|
|
126
163
|
end
|
|
127
164
|
|
|
128
165
|
# Configure OpenTelemetry SDK and instrumentations. Must run before the
|
|
@@ -135,11 +172,7 @@ module Flare
|
|
|
135
172
|
# Suppress noisy OTel INFO logs
|
|
136
173
|
OpenTelemetry.logger = Logger.new(STDOUT, level: Logger::WARN)
|
|
137
174
|
|
|
138
|
-
service_name =
|
|
139
|
-
Rails.application.class.module_parent_name.underscore rescue "rails_app"
|
|
140
|
-
else
|
|
141
|
-
"app"
|
|
142
|
-
end
|
|
175
|
+
service_name = service_name_for_app
|
|
143
176
|
|
|
144
177
|
# Require flare's bundled instrumentations
|
|
145
178
|
require "opentelemetry-instrumentation-rack"
|
|
@@ -201,12 +234,98 @@ module Flare
|
|
|
201
234
|
span_processor.shutdown
|
|
202
235
|
log "Span processor flushed and stopped"
|
|
203
236
|
end
|
|
237
|
+
if @trace_span_processor
|
|
238
|
+
@trace_span_processor.force_flush
|
|
239
|
+
@trace_span_processor.shutdown
|
|
240
|
+
log "Trace span processor flushed and stopped"
|
|
241
|
+
end
|
|
204
242
|
log "Shutdown complete"
|
|
205
243
|
end
|
|
206
244
|
|
|
207
245
|
@otel_configured = true
|
|
208
246
|
end
|
|
209
247
|
|
|
248
|
+
# Start the trace-rules poller. Polls GET /api/rules every
|
|
249
|
+
# tracing_poll_interval (default 30s) so the in-process sampler + URL
|
|
250
|
+
# pool stay current. Called from config.after_initialize -- after the
|
|
251
|
+
# user's configure block has run -- so configuration.url / .key /
|
|
252
|
+
# .tracing_enabled are settled.
|
|
253
|
+
def start_rule_manager
|
|
254
|
+
return unless configuration.tracing_submission_configured?
|
|
255
|
+
|
|
256
|
+
setup_tracing_components
|
|
257
|
+
return unless @sampler && @marker && @upload_url_pool
|
|
258
|
+
|
|
259
|
+
@rule_manager = RuleManager.new(
|
|
260
|
+
sampler: @sampler,
|
|
261
|
+
marker: @marker,
|
|
262
|
+
pool: @upload_url_pool,
|
|
263
|
+
base_url: configuration.url,
|
|
264
|
+
api_key: configuration.key,
|
|
265
|
+
project: service_name_for_app,
|
|
266
|
+
environment: rails_env_name,
|
|
267
|
+
interval: configuration.tracing_poll_interval
|
|
268
|
+
)
|
|
269
|
+
@rule_manager.start
|
|
270
|
+
log "Rule manager started (poll=#{configuration.tracing_poll_interval}s)"
|
|
271
|
+
|
|
272
|
+
at_exit { @rule_manager&.stop }
|
|
273
|
+
end
|
|
274
|
+
|
|
275
|
+
def setup_tracing_components
|
|
276
|
+
return if @trace_span_processor
|
|
277
|
+
|
|
278
|
+
@sampler = Sampler.new
|
|
279
|
+
@marker = Marker.new
|
|
280
|
+
@upload_url_pool = UploadUrlPool.new
|
|
281
|
+
|
|
282
|
+
# Trace sampling: server-controlled per-route capture. The sampler runs
|
|
283
|
+
# at span start; for routes it can't decide there (Rails web spans get
|
|
284
|
+
# their controller#action attributes set post-routing) the marker +
|
|
285
|
+
# WebMarkerSubscriber handle it. The RECORD_ONLY delegates keep children
|
|
286
|
+
# of unsampled local and remote parents recording so processors still see
|
|
287
|
+
# web requests that arrive with an unsampled traceparent header.
|
|
288
|
+
#
|
|
289
|
+
# Sampler is set on the tracer_provider AFTER SDK.configure -- the SDK's
|
|
290
|
+
# Configurator block doesn't expose a `sampler=`; the provider does.
|
|
291
|
+
OpenTelemetry.tracer_provider.sampler =
|
|
292
|
+
OpenTelemetry::SDK::Trace::Samplers.parent_based(
|
|
293
|
+
root: @sampler,
|
|
294
|
+
remote_parent_sampled: ALWAYS_RECORD_ONLY,
|
|
295
|
+
remote_parent_not_sampled: ALWAYS_RECORD_ONLY,
|
|
296
|
+
local_parent_not_sampled: ALWAYS_RECORD_ONLY
|
|
297
|
+
)
|
|
298
|
+
|
|
299
|
+
@trace_exporter = TraceExporter.new(
|
|
300
|
+
pool: @upload_url_pool,
|
|
301
|
+
notify_url: "#{configuration.url.to_s.chomp('/')}/api/traces",
|
|
302
|
+
api_key: configuration.key,
|
|
303
|
+
project: service_name_for_app,
|
|
304
|
+
environment: rails_env_name
|
|
305
|
+
)
|
|
306
|
+
|
|
307
|
+
@trace_span_processor = FilteringSpanProcessor.new(
|
|
308
|
+
exporter: @trace_exporter,
|
|
309
|
+
marker: @marker,
|
|
310
|
+
max_queue: configuration.tracing_max_queue
|
|
311
|
+
)
|
|
312
|
+
OpenTelemetry.tracer_provider.add_span_processor(@trace_span_processor)
|
|
313
|
+
|
|
314
|
+
@trace_health_reporter = TraceHealthReporter.new(
|
|
315
|
+
processor: @trace_span_processor,
|
|
316
|
+
pool: @upload_url_pool,
|
|
317
|
+
exporter: @trace_exporter
|
|
318
|
+
)
|
|
319
|
+
|
|
320
|
+
# Path 2 trace marking. Rails-only -- in non-Rails contexts the
|
|
321
|
+
# subscriber would never fire but creating it is harmless.
|
|
322
|
+
if defined?(ActiveSupport::Notifications)
|
|
323
|
+
@web_marker_subscriber = WebMarkerSubscriber.new(sampler: @sampler, marker: @marker).start
|
|
324
|
+
end
|
|
325
|
+
|
|
326
|
+
log "Tracing enabled (poll=#{configuration.tracing_poll_interval}s)"
|
|
327
|
+
end
|
|
328
|
+
|
|
210
329
|
# Start the metrics flusher. Called from config.after_initialize so
|
|
211
330
|
# user configuration (metrics_enabled, flush_interval, etc.) is applied.
|
|
212
331
|
def start_metrics_flusher
|
|
@@ -229,7 +348,8 @@ module Flare
|
|
|
229
348
|
@metric_flusher = MetricFlusher.new(
|
|
230
349
|
storage: @metric_storage,
|
|
231
350
|
submitter: submitter,
|
|
232
|
-
interval: configuration.metrics_flush_interval
|
|
351
|
+
interval: configuration.metrics_flush_interval,
|
|
352
|
+
health_reporters: @trace_health_reporter ? [@trace_health_reporter] : []
|
|
233
353
|
)
|
|
234
354
|
@metric_flusher.start
|
|
235
355
|
log "Metrics flusher started (interval=#{configuration.metrics_flush_interval}s)"
|
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: flare
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.
|
|
4
|
+
version: 0.3.0
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- John Nunemaker
|
|
@@ -198,18 +198,28 @@ files:
|
|
|
198
198
|
- lib/flare/cli/status_command.rb
|
|
199
199
|
- lib/flare/configuration.rb
|
|
200
200
|
- lib/flare/engine.rb
|
|
201
|
+
- lib/flare/filtering_span_processor.rb
|
|
201
202
|
- lib/flare/http_metrics_config.rb
|
|
203
|
+
- lib/flare/http_transport.rb
|
|
204
|
+
- lib/flare/marker.rb
|
|
202
205
|
- lib/flare/metric_counter.rb
|
|
203
206
|
- lib/flare/metric_flusher.rb
|
|
204
207
|
- lib/flare/metric_key.rb
|
|
205
208
|
- lib/flare/metric_span_processor.rb
|
|
206
209
|
- lib/flare/metric_storage.rb
|
|
207
210
|
- lib/flare/metric_submitter.rb
|
|
211
|
+
- lib/flare/rule_manager.rb
|
|
212
|
+
- lib/flare/sampler.rb
|
|
208
213
|
- lib/flare/source_location.rb
|
|
209
214
|
- lib/flare/sqlite_exporter.rb
|
|
210
215
|
- lib/flare/storage.rb
|
|
211
216
|
- lib/flare/storage/sqlite.rb
|
|
217
|
+
- lib/flare/trace_blob.rb
|
|
218
|
+
- lib/flare/trace_exporter.rb
|
|
219
|
+
- lib/flare/trace_health_reporter.rb
|
|
220
|
+
- lib/flare/upload_url_pool.rb
|
|
212
221
|
- lib/flare/version.rb
|
|
222
|
+
- lib/flare/web_marker_subscriber.rb
|
|
213
223
|
- public/flare-assets/flare.css
|
|
214
224
|
- public/flare-assets/images/flipper.png
|
|
215
225
|
homepage: https://github.com/jnunemaker/flare
|