flare 0.1.1 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +2 -4
- data/app/controllers/flare/application_controller.rb +8 -0
- data/app/helpers/flare/application_helper.rb +4 -4
- data/app/views/flare/jobs/show.html.erb +2 -2
- data/app/views/flare/requests/show.html.erb +8 -8
- data/lib/flare/configuration.rb +14 -0
- data/lib/flare/engine.rb +4 -2
- data/lib/flare/filtering_span_processor.rb +279 -0
- data/lib/flare/http_transport.rb +62 -0
- data/lib/flare/marker.rb +106 -0
- data/lib/flare/metric_counter.rb +6 -0
- data/lib/flare/metric_flusher.rb +18 -5
- data/lib/flare/metric_span_processor.rb +1 -1
- data/lib/flare/metric_storage.rb +5 -0
- data/lib/flare/rule_manager.rb +140 -0
- data/lib/flare/sampler.rb +130 -0
- data/lib/flare/storage/sqlite.rb +27 -22
- data/lib/flare/trace_blob.rb +116 -0
- data/lib/flare/trace_exporter.rb +143 -0
- data/lib/flare/trace_health_reporter.rb +74 -0
- data/lib/flare/upload_url_pool.rb +108 -0
- data/lib/flare/version.rb +1 -1
- data/lib/flare/web_marker_subscriber.rb +76 -0
- data/lib/flare.rb +146 -20
- metadata +11 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: dcfa9283973a11f13fd17397781c76cc03d3e39c9bd08a92025417397eb74eb9
|
|
4
|
+
data.tar.gz: 30e2f84a92f9689aabdd64b7c6839598dc0c3c2bb1dd32a31e2ab9b3a6f5f85a
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 581e6e9ed512b82ac086ab76644ff0d618408649a110ef3b623a3bf4a1648e3695dfcedcefad01bd54b7a11687a1c5ed154ed01e74f2cf14c27fa400bf97f73a
|
|
7
|
+
data.tar.gz: 4a407b09a229f512d6cb1f35ab04dfb8de7ecaa982837fd356c41435c7758d3c4e05ad33812670b2e570830bef5a21e9bdffe297234cec9e33f9ab5250c42af1
|
data/CHANGELOG.md
CHANGED
|
@@ -6,10 +6,18 @@ module Flare
|
|
|
6
6
|
|
|
7
7
|
layout "flare/application"
|
|
8
8
|
|
|
9
|
+
before_action :require_storage
|
|
10
|
+
|
|
9
11
|
helper_method :show_redis_tab?
|
|
10
12
|
|
|
11
13
|
private
|
|
12
14
|
|
|
15
|
+
def require_storage
|
|
16
|
+
return if Flare.storage
|
|
17
|
+
|
|
18
|
+
render plain: "Flare dashboard requires the sqlite3 gem. Add `gem 'sqlite3'` to your Gemfile.", status: :service_unavailable
|
|
19
|
+
end
|
|
20
|
+
|
|
13
21
|
# Only show the Redis tab if:
|
|
14
22
|
# 1. The Redis client library is loaded
|
|
15
23
|
# 2. There are Redis spans in the database
|
|
@@ -53,8 +53,8 @@ module Flare
|
|
|
53
53
|
{ primary: primary, secondary: nil }
|
|
54
54
|
when "http"
|
|
55
55
|
full_url = props["http.url"] || ""
|
|
56
|
-
target = props["http.target"] || ""
|
|
57
|
-
host = props["http.host"] || props["net.peer.name"] || props["peer.service"]
|
|
56
|
+
target = props["url.path"] || props["http.target"] || ""
|
|
57
|
+
host = props["server.address"] || props["http.host"] || props["net.peer.name"] || props["peer.service"]
|
|
58
58
|
uri = URI.parse(full_url) rescue nil
|
|
59
59
|
if uri && uri.host
|
|
60
60
|
domain = uri.host
|
|
@@ -64,8 +64,8 @@ module Flare
|
|
|
64
64
|
domain = host
|
|
65
65
|
path = target.presence || full_url
|
|
66
66
|
end
|
|
67
|
-
method = props["http.method"]
|
|
68
|
-
status = props["http.status_code"]
|
|
67
|
+
method = props["http.request.method"] || props["http.method"]
|
|
68
|
+
status = props["http.response.status_code"] || props["http.status_code"]
|
|
69
69
|
{ primary: path.to_s.truncate(100), secondary: domain, http_method: method, http_status: status }
|
|
70
70
|
when "mail"
|
|
71
71
|
mailer = props["mailer"]
|
|
@@ -220,8 +220,8 @@
|
|
|
220
220
|
span[:name]
|
|
221
221
|
end
|
|
222
222
|
elsif category == "http"
|
|
223
|
-
url = props["http.url"] || props["
|
|
224
|
-
method = props["http.method"]
|
|
223
|
+
url = props["http.url"] || props["url.path"]
|
|
224
|
+
method = props["http.request.method"]
|
|
225
225
|
"#{method} #{url}".strip.presence || span[:name]
|
|
226
226
|
elsif category == "controller"
|
|
227
227
|
ns = props["code.namespace"]
|
|
@@ -1,9 +1,9 @@
|
|
|
1
1
|
<%
|
|
2
2
|
# Get root span properties
|
|
3
3
|
root_props = @root_span ? @root_span[:properties] : {}
|
|
4
|
-
http_method = root_props["http.method"]
|
|
5
|
-
http_status = root_props["http.status_code"]
|
|
6
|
-
http_target = root_props["
|
|
4
|
+
http_method = root_props["http.request.method"]
|
|
5
|
+
http_status = root_props["http.response.status_code"]
|
|
6
|
+
http_target = root_props["url.path"]
|
|
7
7
|
controller = root_props["code.namespace"]
|
|
8
8
|
action = root_props["code.function"]
|
|
9
9
|
|
|
@@ -51,8 +51,8 @@
|
|
|
51
51
|
|
|
52
52
|
# Properties to exclude from "other" section (already shown elsewhere)
|
|
53
53
|
shown_keys = %w[
|
|
54
|
-
http.method http.status_code
|
|
55
|
-
code.namespace code.function http.route
|
|
54
|
+
http.request.method http.response.status_code url.path url.scheme server.address
|
|
55
|
+
code.namespace code.function http.route user_agent.original
|
|
56
56
|
]
|
|
57
57
|
other_props = root_props.reject { |k, v| shown_keys.include?(k) || v.is_a?(Hash) || v.is_a?(Array) }
|
|
58
58
|
%>
|
|
@@ -260,8 +260,8 @@
|
|
|
260
260
|
span[:name]
|
|
261
261
|
end
|
|
262
262
|
elsif category == "http"
|
|
263
|
-
url = props["http.url"] || props["
|
|
264
|
-
method = props["http.method"]
|
|
263
|
+
url = props["http.url"] || props["url.path"]
|
|
264
|
+
method = props["http.request.method"]
|
|
265
265
|
"#{method} #{url}".strip.presence || span[:name]
|
|
266
266
|
elsif category == "controller"
|
|
267
267
|
ns = props["code.namespace"]
|
|
@@ -387,7 +387,7 @@
|
|
|
387
387
|
extra = props['identifier'].replace(/.*\/app\/views\//, '');
|
|
388
388
|
} else if (category === 'http') {
|
|
389
389
|
name = 'HTTP';
|
|
390
|
-
extra = ((props['http.method'] || '') + ' ' + (props['http.url'] || props['
|
|
390
|
+
extra = ((props['http.request.method'] || '') + ' ' + (props['http.url'] || props['url.path'] || '')).trim();
|
|
391
391
|
}
|
|
392
392
|
|
|
393
393
|
let line = offset.padStart(8) + ' | ' + duration.padStart(8) + ' | ' + name;
|
data/lib/flare/configuration.rb
CHANGED
|
@@ -12,9 +12,15 @@ module Flare
|
|
|
12
12
|
|
|
13
13
|
# Spans: detailed trace data stored in SQLite (default: development only)
|
|
14
14
|
# Metrics: aggregated counters in memory, flushed periodically (default: production only)
|
|
15
|
+
# Tracing: server-controlled per-route trace sampling. Polls /api/rules
|
|
16
|
+
# for which routes/jobs to capture, ships matched traces direct to R2
|
|
17
|
+
# via presigned URLs, self-notifies POST /api/traces.
|
|
15
18
|
attr_accessor :spans_enabled
|
|
16
19
|
attr_accessor :metrics_enabled
|
|
17
20
|
attr_accessor :metrics_flush_interval # seconds between flushes (default: 60)
|
|
21
|
+
attr_accessor :tracing_enabled
|
|
22
|
+
attr_accessor :tracing_poll_interval # seconds between /api/rules polls (default: 30)
|
|
23
|
+
attr_accessor :tracing_max_queue # max traced spans buffered per process
|
|
18
24
|
|
|
19
25
|
# Metrics HTTP submission settings
|
|
20
26
|
attr_accessor :url # URL of the Flare metrics service
|
|
@@ -53,6 +59,9 @@ module Flare
|
|
|
53
59
|
@spans_enabled = rails_development?
|
|
54
60
|
@metrics_enabled = !rails_test?
|
|
55
61
|
@metrics_flush_interval = 60 # seconds
|
|
62
|
+
@tracing_enabled = !rails_test?
|
|
63
|
+
@tracing_poll_interval = 30 # seconds
|
|
64
|
+
@tracing_max_queue = 5_000
|
|
56
65
|
|
|
57
66
|
# Metrics HTTP submission defaults
|
|
58
67
|
@url = ENV.fetch("FLARE_URL", credentials_url || "https://flare.am")
|
|
@@ -67,6 +76,11 @@ module Flare
|
|
|
67
76
|
!@key.nil? && !@key.empty?
|
|
68
77
|
end
|
|
69
78
|
|
|
79
|
+
# Tracing reuses the same endpoint + key as metrics.
|
|
80
|
+
def tracing_submission_configured?
|
|
81
|
+
@tracing_enabled && metrics_submission_configured?
|
|
82
|
+
end
|
|
83
|
+
|
|
70
84
|
def database_path
|
|
71
85
|
@database_path || default_database_path
|
|
72
86
|
end
|
data/lib/flare/engine.rb
CHANGED
|
@@ -25,9 +25,11 @@ module Flare
|
|
|
25
25
|
Flare.configure_opentelemetry
|
|
26
26
|
end
|
|
27
27
|
|
|
28
|
-
# Phase 2: Start
|
|
29
|
-
# so user config (metrics_enabled, flush_interval,
|
|
28
|
+
# Phase 2: Start background threads after all initializers have run
|
|
29
|
+
# so user config (metrics_enabled, tracing_enabled, flush_interval,
|
|
30
|
+
# tracing_poll_interval, etc.) is applied.
|
|
30
31
|
config.after_initialize do
|
|
32
|
+
Flare.start_rule_manager
|
|
31
33
|
Flare.start_metrics_flusher
|
|
32
34
|
end
|
|
33
35
|
|
|
@@ -0,0 +1,279 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "concurrent/atomic/atomic_fixnum"
|
|
4
|
+
require "logger"
|
|
5
|
+
require "opentelemetry/sdk"
|
|
6
|
+
|
|
7
|
+
module Flare
|
|
8
|
+
# BSP-shaped span processor whose filter is `sampled OR marked` instead
|
|
9
|
+
# of BSP's `sampled` (BSP early-returns on RECORD_ONLY spans -- our
|
|
10
|
+
# Path 2 spans have sampled=false so they'd be dropped). Forwards
|
|
11
|
+
# matching spans to a trace exporter on a background worker thread so
|
|
12
|
+
# the exporter never runs on the request/job thread (CAF-3).
|
|
13
|
+
#
|
|
14
|
+
# On every on_finish we also check marker.owner?(trace_id, span_id) and
|
|
15
|
+
# unmark when the owning rack span finishes (CAF-2). Cleanup runs even
|
|
16
|
+
# for spans we don't export.
|
|
17
|
+
class FilteringSpanProcessor
|
|
18
|
+
SUCCESS = OpenTelemetry::SDK::Trace::Export::SUCCESS
|
|
19
|
+
FAILURE = OpenTelemetry::SDK::Trace::Export::FAILURE
|
|
20
|
+
|
|
21
|
+
DEFAULT_MAX_QUEUE = 5_000
|
|
22
|
+
DEFAULT_FLUSH_INTERVAL = 5 # seconds
|
|
23
|
+
DEFAULT_EXPORT_TIMEOUT = 30 # seconds
|
|
24
|
+
DEFAULT_MARKED_TRACE_GRACE_PERIOD = 1.0 # seconds
|
|
25
|
+
|
|
26
|
+
attr_reader :dropped_count, :failed_export_count, :exception_count, :buffer_high_watermark, :max_queue
|
|
27
|
+
|
|
28
|
+
def initialize(exporter:, marker:,
|
|
29
|
+
max_queue: DEFAULT_MAX_QUEUE,
|
|
30
|
+
flush_interval: DEFAULT_FLUSH_INTERVAL,
|
|
31
|
+
export_timeout: DEFAULT_EXPORT_TIMEOUT,
|
|
32
|
+
marked_trace_grace_period: DEFAULT_MARKED_TRACE_GRACE_PERIOD,
|
|
33
|
+
logger: nil)
|
|
34
|
+
@exporter = exporter
|
|
35
|
+
@marker = marker
|
|
36
|
+
@max_queue = max_queue
|
|
37
|
+
@flush_interval = flush_interval
|
|
38
|
+
@export_timeout = export_timeout
|
|
39
|
+
@marked_trace_grace_period = marked_trace_grace_period.to_f
|
|
40
|
+
@logger = logger || Logger.new($stderr, level: Logger::WARN)
|
|
41
|
+
|
|
42
|
+
@pending_by_trace = {}
|
|
43
|
+
@trace_order = []
|
|
44
|
+
@pending_count = 0
|
|
45
|
+
@ready_queue = []
|
|
46
|
+
@delayed_ready_by_trace = {}
|
|
47
|
+
@mutex = Mutex.new
|
|
48
|
+
@cond = ConditionVariable.new
|
|
49
|
+
@stopped = false
|
|
50
|
+
@pid = $$
|
|
51
|
+
|
|
52
|
+
@dropped_count = Concurrent::AtomicFixnum.new(0)
|
|
53
|
+
@failed_export_count = Concurrent::AtomicFixnum.new(0)
|
|
54
|
+
@exception_count = Concurrent::AtomicFixnum.new(0)
|
|
55
|
+
@buffer_high_watermark = Concurrent::AtomicFixnum.new(0)
|
|
56
|
+
|
|
57
|
+
start_worker
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
def on_start(_span, _parent_context); end
|
|
61
|
+
|
|
62
|
+
def on_finish(span)
|
|
63
|
+
detect_forking
|
|
64
|
+
|
|
65
|
+
ctx = span.context
|
|
66
|
+
sampled = ctx&.trace_flags&.sampled?
|
|
67
|
+
marked = ctx && @marker.marked?(ctx.trace_id)
|
|
68
|
+
owner_finished = marked && @marker.owner?(ctx.trace_id, ctx.span_id)
|
|
69
|
+
|
|
70
|
+
return unless sampled || marked
|
|
71
|
+
|
|
72
|
+
span_data = span.respond_to?(:to_span_data) ? span.to_span_data : span
|
|
73
|
+
enqueue(
|
|
74
|
+
span_data,
|
|
75
|
+
complete: owner_finished || sampled_completion_span?(span_data),
|
|
76
|
+
delay: owner_finished ? @marked_trace_grace_period : 0
|
|
77
|
+
)
|
|
78
|
+
end
|
|
79
|
+
|
|
80
|
+
def force_flush(timeout: nil)
|
|
81
|
+
drain_and_export(include_pending: true)
|
|
82
|
+
SUCCESS
|
|
83
|
+
end
|
|
84
|
+
|
|
85
|
+
def shutdown(timeout: nil)
|
|
86
|
+
@mutex.synchronize do
|
|
87
|
+
@stopped = true
|
|
88
|
+
@cond.broadcast
|
|
89
|
+
end
|
|
90
|
+
@worker.join(timeout || 5)
|
|
91
|
+
drain_and_export(include_pending: true)
|
|
92
|
+
@exporter.shutdown(timeout: timeout) if @exporter.respond_to?(:shutdown)
|
|
93
|
+
SUCCESS
|
|
94
|
+
end
|
|
95
|
+
|
|
96
|
+
def buffer_size
|
|
97
|
+
@mutex.synchronize { queued_span_count }
|
|
98
|
+
end
|
|
99
|
+
|
|
100
|
+
def reset_buffer_high_watermark
|
|
101
|
+
@buffer_high_watermark.value = buffer_size
|
|
102
|
+
end
|
|
103
|
+
|
|
104
|
+
private
|
|
105
|
+
|
|
106
|
+
def enqueue(span_data, complete:, delay: 0)
|
|
107
|
+
@mutex.synchronize do
|
|
108
|
+
trace_id = span_data.trace_id
|
|
109
|
+
@trace_order << trace_id unless @pending_by_trace.key?(trace_id)
|
|
110
|
+
@pending_by_trace[trace_id] ||= []
|
|
111
|
+
@pending_by_trace[trace_id] << span_data
|
|
112
|
+
@pending_count += 1
|
|
113
|
+
evict_oldest_spans
|
|
114
|
+
|
|
115
|
+
if complete
|
|
116
|
+
delay.positive? ? delay_trace_ready(trace_id, delay) : mark_trace_ready(trace_id)
|
|
117
|
+
end
|
|
118
|
+
evict_oldest_spans
|
|
119
|
+
update_buffer_high_watermark
|
|
120
|
+
end
|
|
121
|
+
end
|
|
122
|
+
|
|
123
|
+
def worker_loop
|
|
124
|
+
until stopped?
|
|
125
|
+
@mutex.synchronize do
|
|
126
|
+
timeout = next_wait_timeout
|
|
127
|
+
@cond.wait(@mutex, timeout) if @ready_queue.empty? && !@stopped
|
|
128
|
+
end
|
|
129
|
+
drain_and_export
|
|
130
|
+
end
|
|
131
|
+
end
|
|
132
|
+
|
|
133
|
+
def stopped?
|
|
134
|
+
@mutex.synchronize { @stopped }
|
|
135
|
+
end
|
|
136
|
+
|
|
137
|
+
def drain_and_export(include_pending: false)
|
|
138
|
+
batch = nil
|
|
139
|
+
@mutex.synchronize do
|
|
140
|
+
promote_due_delayed_traces
|
|
141
|
+
|
|
142
|
+
if include_pending
|
|
143
|
+
@ready_queue.concat(@pending_by_trace.values.flatten)
|
|
144
|
+
@pending_by_trace.clear
|
|
145
|
+
@trace_order.clear
|
|
146
|
+
@pending_count = 0
|
|
147
|
+
unmark_delayed_traces
|
|
148
|
+
@delayed_ready_by_trace.clear
|
|
149
|
+
end
|
|
150
|
+
|
|
151
|
+
return if @ready_queue.empty?
|
|
152
|
+
batch = @ready_queue
|
|
153
|
+
@ready_queue = []
|
|
154
|
+
end
|
|
155
|
+
|
|
156
|
+
result = @exporter.export(batch, timeout: @export_timeout)
|
|
157
|
+
@failed_export_count.increment if result != SUCCESS
|
|
158
|
+
rescue StandardError => e
|
|
159
|
+
@exception_count.increment
|
|
160
|
+
@logger.warn("[Flare::FilteringSpanProcessor] export failed: #{e.class}: #{e.message}")
|
|
161
|
+
end
|
|
162
|
+
|
|
163
|
+
def mark_trace_ready(trace_id)
|
|
164
|
+
batch = @pending_by_trace.delete(trace_id)
|
|
165
|
+
return unless batch
|
|
166
|
+
|
|
167
|
+
@trace_order.delete(trace_id)
|
|
168
|
+
@delayed_ready_by_trace.delete(trace_id)
|
|
169
|
+
@pending_count -= batch.length
|
|
170
|
+
@ready_queue.concat(batch)
|
|
171
|
+
@cond.signal
|
|
172
|
+
end
|
|
173
|
+
|
|
174
|
+
def delay_trace_ready(trace_id, delay)
|
|
175
|
+
@delayed_ready_by_trace[trace_id] = monotonic_now + delay
|
|
176
|
+
@cond.signal
|
|
177
|
+
end
|
|
178
|
+
|
|
179
|
+
def promote_due_delayed_traces
|
|
180
|
+
now = monotonic_now
|
|
181
|
+
ready_trace_ids = @delayed_ready_by_trace.select { |_, ready_at| ready_at <= now }.keys
|
|
182
|
+
ready_trace_ids.each do |trace_id|
|
|
183
|
+
mark_trace_ready(trace_id)
|
|
184
|
+
@marker.unmark(trace_id)
|
|
185
|
+
end
|
|
186
|
+
end
|
|
187
|
+
|
|
188
|
+
def unmark_delayed_traces
|
|
189
|
+
@delayed_ready_by_trace.each_key { |trace_id| @marker.unmark(trace_id) }
|
|
190
|
+
end
|
|
191
|
+
|
|
192
|
+
def next_wait_timeout
|
|
193
|
+
next_ready_at = @delayed_ready_by_trace.values.min
|
|
194
|
+
return @flush_interval unless next_ready_at
|
|
195
|
+
|
|
196
|
+
[next_ready_at - monotonic_now, 0].max
|
|
197
|
+
end
|
|
198
|
+
|
|
199
|
+
def evict_oldest_spans
|
|
200
|
+
while queued_span_count > @max_queue
|
|
201
|
+
trace_id = @trace_order.first
|
|
202
|
+
unless trace_id
|
|
203
|
+
@ready_queue.shift
|
|
204
|
+
@dropped_count.increment
|
|
205
|
+
next
|
|
206
|
+
end
|
|
207
|
+
|
|
208
|
+
spans = @pending_by_trace[trace_id]
|
|
209
|
+
if spans.nil? || spans.empty?
|
|
210
|
+
@trace_order.shift
|
|
211
|
+
next
|
|
212
|
+
end
|
|
213
|
+
|
|
214
|
+
spans.shift
|
|
215
|
+
@pending_count -= 1
|
|
216
|
+
@dropped_count.increment
|
|
217
|
+
|
|
218
|
+
if spans.empty?
|
|
219
|
+
@pending_by_trace.delete(trace_id)
|
|
220
|
+
@trace_order.shift
|
|
221
|
+
end
|
|
222
|
+
end
|
|
223
|
+
end
|
|
224
|
+
|
|
225
|
+
def queued_span_count
|
|
226
|
+
@pending_count + @ready_queue.length
|
|
227
|
+
end
|
|
228
|
+
|
|
229
|
+
def update_buffer_high_watermark
|
|
230
|
+
current = queued_span_count
|
|
231
|
+
@buffer_high_watermark.update { |previous| current > previous ? current : previous }
|
|
232
|
+
end
|
|
233
|
+
|
|
234
|
+
def monotonic_now
|
|
235
|
+
Process.clock_gettime(Process::CLOCK_MONOTONIC)
|
|
236
|
+
end
|
|
237
|
+
|
|
238
|
+
def sampled_completion_span?(span_data)
|
|
239
|
+
root_span?(span_data) || entry_span?(span_data)
|
|
240
|
+
end
|
|
241
|
+
|
|
242
|
+
def root_span?(span_data)
|
|
243
|
+
parent_id = span_data.parent_span_id if span_data.respond_to?(:parent_span_id)
|
|
244
|
+
parent_id.nil? ||
|
|
245
|
+
(parent_id.respond_to?(:empty?) && parent_id.empty?) ||
|
|
246
|
+
parent_id == OpenTelemetry::Trace::INVALID_SPAN_ID
|
|
247
|
+
end
|
|
248
|
+
|
|
249
|
+
def entry_span?(span_data)
|
|
250
|
+
return false unless span_data.respond_to?(:kind)
|
|
251
|
+
|
|
252
|
+
span_data.kind == :server || span_data.kind == :consumer
|
|
253
|
+
end
|
|
254
|
+
|
|
255
|
+
def detect_forking
|
|
256
|
+
return if @pid == $$
|
|
257
|
+
|
|
258
|
+
@mutex.synchronize do
|
|
259
|
+
return if @pid == $$
|
|
260
|
+
|
|
261
|
+
@pid = $$
|
|
262
|
+
@pending_by_trace.clear
|
|
263
|
+
@trace_order.clear
|
|
264
|
+
@ready_queue.clear
|
|
265
|
+
@delayed_ready_by_trace.clear
|
|
266
|
+
@pending_count = 0
|
|
267
|
+
@stopped = false
|
|
268
|
+
start_worker
|
|
269
|
+
end
|
|
270
|
+
end
|
|
271
|
+
|
|
272
|
+
def start_worker
|
|
273
|
+
return if @worker&.alive?
|
|
274
|
+
|
|
275
|
+
@worker = Thread.new { worker_loop }
|
|
276
|
+
@worker.name = "flare-filtering-span-processor"
|
|
277
|
+
end
|
|
278
|
+
end
|
|
279
|
+
end
|
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "net/http"
|
|
4
|
+
require "uri"
|
|
5
|
+
|
|
6
|
+
module Flare
|
|
7
|
+
# Tiny HTTP wrapper used by TraceExporter (and anything else that wants
|
|
8
|
+
# to PUT/POST without pulling in a heavy client). Designed for injection
|
|
9
|
+
# at the boundary so tests can swap in a recording fake; no other moving
|
|
10
|
+
# parts.
|
|
11
|
+
class HttpTransport
|
|
12
|
+
DEFAULT_OPEN_TIMEOUT = 2
|
|
13
|
+
DEFAULT_READ_TIMEOUT = 5
|
|
14
|
+
DEFAULT_WRITE_TIMEOUT = 5
|
|
15
|
+
|
|
16
|
+
Response = Struct.new(:code, :body, :headers, keyword_init: true) do
|
|
17
|
+
def header(name)
|
|
18
|
+
return nil unless headers
|
|
19
|
+
headers[name] || headers[name.downcase] || headers[name.upcase]
|
|
20
|
+
end
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
def initialize(open_timeout: DEFAULT_OPEN_TIMEOUT,
|
|
24
|
+
read_timeout: DEFAULT_READ_TIMEOUT,
|
|
25
|
+
write_timeout: DEFAULT_WRITE_TIMEOUT)
|
|
26
|
+
@open_timeout = open_timeout
|
|
27
|
+
@read_timeout = read_timeout
|
|
28
|
+
@write_timeout = write_timeout
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
def get(url, headers = {})
|
|
32
|
+
request(url, nil, headers, Net::HTTP::Get)
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
def put(url, body, headers = {})
|
|
36
|
+
request(url, body, headers, Net::HTTP::Put)
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
def post(url, body, headers = {})
|
|
40
|
+
request(url, body, headers, Net::HTTP::Post)
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
private
|
|
44
|
+
|
|
45
|
+
def request(url, body, headers, klass)
|
|
46
|
+
uri = URI(url)
|
|
47
|
+
http = Net::HTTP.new(uri.host, uri.port)
|
|
48
|
+
http.use_ssl = uri.scheme == "https"
|
|
49
|
+
http.open_timeout = @open_timeout
|
|
50
|
+
http.read_timeout = @read_timeout
|
|
51
|
+
http.write_timeout = @write_timeout if http.respond_to?(:write_timeout=)
|
|
52
|
+
|
|
53
|
+
req = klass.new(uri.request_uri == "" ? "/" : uri.request_uri)
|
|
54
|
+
headers.each { |k, v| req[k] = v }
|
|
55
|
+
req.body = body if body
|
|
56
|
+
|
|
57
|
+
response = http.request(req)
|
|
58
|
+
hash = response.each_header.to_h
|
|
59
|
+
Response.new(code: response.code.to_s, body: response.body, headers: hash)
|
|
60
|
+
end
|
|
61
|
+
end
|
|
62
|
+
end
|
data/lib/flare/marker.rb
ADDED
|
@@ -0,0 +1,106 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "concurrent/map"
|
|
4
|
+
require "concurrent/atomic/atomic_fixnum"
|
|
5
|
+
|
|
6
|
+
module Flare
|
|
7
|
+
# Thread-safe registry of trace_ids that Path 2 (the WebMarkerSubscriber)
|
|
8
|
+
# has marked for export. FilteringSpanProcessor checks marked? on every
|
|
9
|
+
# on_finish; matching spans get forwarded to the trace exporter, the rest
|
|
10
|
+
# are dropped.
|
|
11
|
+
#
|
|
12
|
+
# Each entry records the OWNER span_id (the local rack server span the
|
|
13
|
+
# subscriber was inside when it marked the trace). Cleanup is keyed on
|
|
14
|
+
# the owner finishing, not the trace root finishing -- remote-parented
|
|
15
|
+
# rack spans aren't trace roots, and child spans can outlive their parent
|
|
16
|
+
# in OTel, so root-driven cleanup would leak on the dominant production
|
|
17
|
+
# case (web app behind a load balancer or service mesh).
|
|
18
|
+
#
|
|
19
|
+
# Bounded by:
|
|
20
|
+
# - sweep(): drops entries older than max_age (default 5 min) so a rack
|
|
21
|
+
# span that never finishes (process killed mid-request, exception path
|
|
22
|
+
# that skips ensure) doesn't leak forever.
|
|
23
|
+
# - hard ceiling at max_entries (default 10k): on overflow, drop oldest
|
|
24
|
+
# 10% by marked_at.
|
|
25
|
+
class Marker
|
|
26
|
+
Entry = Struct.new(:owner_span_id, :rule_id, :marked_at, keyword_init: true)
|
|
27
|
+
|
|
28
|
+
DEFAULT_MAX_ENTRIES = 10_000
|
|
29
|
+
DEFAULT_MAX_AGE = 5 * 60 # seconds
|
|
30
|
+
|
|
31
|
+
attr_reader :evicted_count
|
|
32
|
+
|
|
33
|
+
def initialize(max_entries: DEFAULT_MAX_ENTRIES, max_age: DEFAULT_MAX_AGE)
|
|
34
|
+
@entries = Concurrent::Map.new
|
|
35
|
+
@max_entries = max_entries
|
|
36
|
+
@max_age = max_age
|
|
37
|
+
@evicted_count = Concurrent::AtomicFixnum.new(0)
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
def mark(trace_id, owner_span_id:, rule_id:)
|
|
41
|
+
@entries[trace_id] = Entry.new(
|
|
42
|
+
owner_span_id: owner_span_id,
|
|
43
|
+
rule_id: rule_id,
|
|
44
|
+
marked_at: monotonic_now
|
|
45
|
+
)
|
|
46
|
+
maybe_evict_oldest
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
def marked?(trace_id)
|
|
50
|
+
@entries.key?(trace_id)
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
# True only when span_id matches the marker's owner -- the rack span
|
|
54
|
+
# that originally marked this trace. Used by FilteringSpanProcessor to
|
|
55
|
+
# decide when to unmark (only when that exact span finishes, not on
|
|
56
|
+
# every span that happens to have this trace_id).
|
|
57
|
+
def owner?(trace_id, span_id)
|
|
58
|
+
entry = @entries[trace_id]
|
|
59
|
+
!entry.nil? && entry.owner_span_id == span_id
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
def rule_id(trace_id)
|
|
63
|
+
entry = @entries[trace_id]
|
|
64
|
+
entry&.rule_id
|
|
65
|
+
end
|
|
66
|
+
|
|
67
|
+
def unmark(trace_id)
|
|
68
|
+
@entries.delete(trace_id)
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
def size
|
|
72
|
+
@entries.size
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
# Drop entries older than max_age. Call periodically (the RuleManager's
|
|
76
|
+
# scheduler is the natural place) to handle the rack-span-never-finishes
|
|
77
|
+
# leak case (CAF-7).
|
|
78
|
+
def sweep
|
|
79
|
+
threshold = monotonic_now - @max_age
|
|
80
|
+
evicted = 0
|
|
81
|
+
@entries.each_pair do |trace_id, entry|
|
|
82
|
+
if entry.marked_at < threshold
|
|
83
|
+
@entries.delete(trace_id)
|
|
84
|
+
evicted += 1
|
|
85
|
+
end
|
|
86
|
+
end
|
|
87
|
+
@evicted_count.increment(evicted) if evicted.positive?
|
|
88
|
+
evicted
|
|
89
|
+
end
|
|
90
|
+
|
|
91
|
+
private
|
|
92
|
+
|
|
93
|
+
def maybe_evict_oldest
|
|
94
|
+
return if @entries.size <= @max_entries
|
|
95
|
+
|
|
96
|
+
to_drop = (@max_entries * 0.1).ceil
|
|
97
|
+
sorted = @entries.each_pair.to_a.sort_by { |_, entry| entry.marked_at }
|
|
98
|
+
sorted.first(to_drop).each { |trace_id, _| @entries.delete(trace_id) }
|
|
99
|
+
@evicted_count.increment(to_drop)
|
|
100
|
+
end
|
|
101
|
+
|
|
102
|
+
def monotonic_now
|
|
103
|
+
Process.clock_gettime(Process::CLOCK_MONOTONIC)
|
|
104
|
+
end
|
|
105
|
+
end
|
|
106
|
+
end
|
data/lib/flare/metric_counter.rb
CHANGED
|
@@ -22,6 +22,12 @@ module Flare
|
|
|
22
22
|
@error_count.increment if error
|
|
23
23
|
end
|
|
24
24
|
|
|
25
|
+
def add(count:, sum_ms:, error_count: 0)
|
|
26
|
+
@count.increment(count.to_i)
|
|
27
|
+
@sum_ms.increment(sum_ms.to_i)
|
|
28
|
+
@error_count.increment(error_count.to_i)
|
|
29
|
+
end
|
|
30
|
+
|
|
25
31
|
def count
|
|
26
32
|
@count.value
|
|
27
33
|
end
|