flare 0.1.1 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +2 -4
- data/app/controllers/flare/application_controller.rb +8 -0
- data/app/helpers/flare/application_helper.rb +4 -4
- data/app/views/flare/jobs/show.html.erb +2 -2
- data/app/views/flare/requests/show.html.erb +8 -8
- data/lib/flare/configuration.rb +14 -0
- data/lib/flare/engine.rb +4 -2
- data/lib/flare/filtering_span_processor.rb +279 -0
- data/lib/flare/http_transport.rb +62 -0
- data/lib/flare/marker.rb +106 -0
- data/lib/flare/metric_counter.rb +6 -0
- data/lib/flare/metric_flusher.rb +18 -5
- data/lib/flare/metric_span_processor.rb +1 -1
- data/lib/flare/metric_storage.rb +5 -0
- data/lib/flare/rule_manager.rb +140 -0
- data/lib/flare/sampler.rb +130 -0
- data/lib/flare/storage/sqlite.rb +27 -22
- data/lib/flare/trace_blob.rb +116 -0
- data/lib/flare/trace_exporter.rb +143 -0
- data/lib/flare/trace_health_reporter.rb +74 -0
- data/lib/flare/upload_url_pool.rb +108 -0
- data/lib/flare/version.rb +1 -1
- data/lib/flare/web_marker_subscriber.rb +76 -0
- data/lib/flare.rb +146 -20
- metadata +11 -1
data/lib/flare/metric_flusher.rb
CHANGED
|
@@ -15,11 +15,12 @@ module Flare
|
|
|
15
15
|
|
|
16
16
|
attr_reader :interval, :shutdown_timeout
|
|
17
17
|
|
|
18
|
-
def initialize(storage:, submitter:, interval: DEFAULT_INTERVAL, shutdown_timeout: DEFAULT_SHUTDOWN_TIMEOUT)
|
|
18
|
+
def initialize(storage:, submitter:, interval: DEFAULT_INTERVAL, shutdown_timeout: DEFAULT_SHUTDOWN_TIMEOUT, health_reporters: [])
|
|
19
19
|
@storage = storage
|
|
20
20
|
@submitter = submitter
|
|
21
21
|
@interval = interval
|
|
22
22
|
@shutdown_timeout = shutdown_timeout
|
|
23
|
+
@health_reporters = Array(health_reporters)
|
|
23
24
|
@pid = $$
|
|
24
25
|
@stopped = false
|
|
25
26
|
end
|
|
@@ -44,7 +45,7 @@ module Flare
|
|
|
44
45
|
|
|
45
46
|
@stopped = true
|
|
46
47
|
|
|
47
|
-
|
|
48
|
+
log "Shutting down metrics flusher, draining remaining metrics..."
|
|
48
49
|
|
|
49
50
|
if @timer
|
|
50
51
|
@timer.shutdown
|
|
@@ -59,7 +60,7 @@ module Flare
|
|
|
59
60
|
@pool.kill unless pool_terminated
|
|
60
61
|
end
|
|
61
62
|
|
|
62
|
-
|
|
63
|
+
log "Metrics flusher stopped"
|
|
63
64
|
end
|
|
64
65
|
|
|
65
66
|
def restart
|
|
@@ -72,6 +73,7 @@ module Flare
|
|
|
72
73
|
def flush_now
|
|
73
74
|
return 0 unless @storage && @submitter
|
|
74
75
|
|
|
76
|
+
record_health_metrics
|
|
75
77
|
drained = @storage.drain
|
|
76
78
|
return 0 if drained.empty?
|
|
77
79
|
|
|
@@ -100,13 +102,14 @@ module Flare
|
|
|
100
102
|
private
|
|
101
103
|
|
|
102
104
|
def post_to_pool
|
|
105
|
+
record_health_metrics
|
|
103
106
|
drained = @storage.drain
|
|
104
107
|
if drained.empty?
|
|
105
|
-
|
|
108
|
+
log "No metrics to flush"
|
|
106
109
|
return
|
|
107
110
|
end
|
|
108
111
|
|
|
109
|
-
|
|
112
|
+
log "Drained #{drained.size} metric keys for submission"
|
|
110
113
|
@pool.post { submit_to_cloud(drained) }
|
|
111
114
|
rescue => e
|
|
112
115
|
warn "[Flare] Metric drain error: #{e.message}"
|
|
@@ -120,5 +123,15 @@ module Flare
|
|
|
120
123
|
rescue => e
|
|
121
124
|
warn "[Flare] Metric submission error: #{e.message}"
|
|
122
125
|
end
|
|
126
|
+
|
|
127
|
+
def record_health_metrics
|
|
128
|
+
@health_reporters.each { |reporter| reporter.record(@storage) }
|
|
129
|
+
rescue => e
|
|
130
|
+
warn "[Flare] Health metric recording error: #{e.message}"
|
|
131
|
+
end
|
|
132
|
+
|
|
133
|
+
def log(message)
|
|
134
|
+
Flare.log(message) if Flare.respond_to?(:log)
|
|
135
|
+
end
|
|
123
136
|
end
|
|
124
137
|
end
|
|
@@ -156,7 +156,7 @@ module Flare
|
|
|
156
156
|
bucket: bucket_time(span),
|
|
157
157
|
namespace: "job",
|
|
158
158
|
service: extract_job_system(span),
|
|
159
|
-
target: transaction_name || span.attributes["code.namespace"] || span.attributes["messaging.destination"] || "unknown",
|
|
159
|
+
target: transaction_name || span.attributes["code.namespace"] || span.attributes["messaging.sidekiq.job_class"] || span.attributes["messaging.destination"] || "unknown",
|
|
160
160
|
operation: transaction_name ? "perform" : (span.attributes["code.function"] || span.name)
|
|
161
161
|
)
|
|
162
162
|
|
data/lib/flare/metric_storage.rb
CHANGED
|
@@ -16,6 +16,11 @@ module Flare
|
|
|
16
16
|
counter.increment(duration_ms: duration_ms, error: error)
|
|
17
17
|
end
|
|
18
18
|
|
|
19
|
+
def add(key, count:, sum_ms:, error_count: 0)
|
|
20
|
+
counter = @storage.compute_if_absent(key) { MetricCounter.new }
|
|
21
|
+
counter.add(count: count, sum_ms: sum_ms, error_count: error_count)
|
|
22
|
+
end
|
|
23
|
+
|
|
19
24
|
# Atomically retrieves and clears all metrics.
|
|
20
25
|
# Returns a frozen hash of MetricKey => counter data.
|
|
21
26
|
def drain
|
|
@@ -0,0 +1,140 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "json"
|
|
4
|
+
require "logger"
|
|
5
|
+
require "concurrent/timer_task"
|
|
6
|
+
require "concurrent/atomic/atomic_fixnum"
|
|
7
|
+
|
|
8
|
+
require_relative "http_transport"
|
|
9
|
+
|
|
10
|
+
module Flare
|
|
11
|
+
# The SDK's only poll. Every interval seconds (default 30) it does a
|
|
12
|
+
# GET /api/rules; the 200 response carries the active TraceRules (with
|
|
13
|
+
# server-computed sample_rate) plus a bag of presigned R2 PUT URLs.
|
|
14
|
+
# We hand the rules to Sampler#update_rules and the URLs to
|
|
15
|
+
# UploadUrlPool#replace, and sweep the Marker so stuck rack-span
|
|
16
|
+
# entries don't linger.
|
|
17
|
+
#
|
|
18
|
+
# ETag-guarded: subsequent polls send If-None-Match. A 304 still gets
|
|
19
|
+
# us a Marker.sweep but doesn't touch sampler or pool. 401/403 stops
|
|
20
|
+
# the poller (misconfigured token shouldn't beat down the server).
|
|
21
|
+
# 5xx and exceptions are logged + counted; the timer just tries again
|
|
22
|
+
# on the next tick.
|
|
23
|
+
#
|
|
24
|
+
# Fork-safe: after_fork clears the pool and restarts the timer in the
|
|
25
|
+
# child process so each child polls independently.
|
|
26
|
+
class RuleManager
|
|
27
|
+
DEFAULT_INTERVAL = 30
|
|
28
|
+
|
|
29
|
+
attr_reader :poll_count, :etag, :stopped_due_to_auth, :last_error_count
|
|
30
|
+
|
|
31
|
+
def initialize(sampler:, marker:, pool:, base_url:, api_key:, project:, environment:,
|
|
32
|
+
interval: DEFAULT_INTERVAL, transport: nil, logger: nil)
|
|
33
|
+
@sampler = sampler
|
|
34
|
+
@marker = marker
|
|
35
|
+
@pool = pool
|
|
36
|
+
@rules_url = "#{base_url.to_s.chomp('/')}/api/rules"
|
|
37
|
+
@api_key = api_key
|
|
38
|
+
@project = project
|
|
39
|
+
@environment = environment
|
|
40
|
+
@interval = interval
|
|
41
|
+
@transport = transport || HttpTransport.new
|
|
42
|
+
@logger = logger || Logger.new($stderr, level: Logger::WARN)
|
|
43
|
+
|
|
44
|
+
@etag = nil
|
|
45
|
+
@poll_count = Concurrent::AtomicFixnum.new(0)
|
|
46
|
+
@last_error_count = Concurrent::AtomicFixnum.new(0)
|
|
47
|
+
@stopped_due_to_auth = false
|
|
48
|
+
@pid = $$
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
def start
|
|
52
|
+
return self if @timer || @stopped_due_to_auth
|
|
53
|
+
|
|
54
|
+
@timer = Concurrent::TimerTask.execute(
|
|
55
|
+
execution_interval: @interval,
|
|
56
|
+
run_now: true,
|
|
57
|
+
name: "flare-rule-manager-timer"
|
|
58
|
+
) { poll_safely }
|
|
59
|
+
self
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
def stop
|
|
63
|
+
if @timer
|
|
64
|
+
@timer.shutdown
|
|
65
|
+
@timer.wait_for_termination(1)
|
|
66
|
+
@timer.kill unless @timer.shutdown?
|
|
67
|
+
@timer = nil
|
|
68
|
+
end
|
|
69
|
+
self
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
def running?
|
|
73
|
+
@timer ? @timer.running? : false
|
|
74
|
+
end
|
|
75
|
+
|
|
76
|
+
def after_fork
|
|
77
|
+
@pid = $$
|
|
78
|
+
@pool.after_fork
|
|
79
|
+
stop
|
|
80
|
+
start
|
|
81
|
+
end
|
|
82
|
+
|
|
83
|
+
# Public so callers can force a poll (tests + integration tests).
|
|
84
|
+
def poll_now
|
|
85
|
+
poll_safely
|
|
86
|
+
end
|
|
87
|
+
|
|
88
|
+
private
|
|
89
|
+
|
|
90
|
+
def poll_safely
|
|
91
|
+
poll
|
|
92
|
+
rescue StandardError => e
|
|
93
|
+
@last_error_count.increment
|
|
94
|
+
@logger.warn("[Flare::RuleManager] poll exception: #{e.class}: #{e.message}")
|
|
95
|
+
end
|
|
96
|
+
|
|
97
|
+
def poll
|
|
98
|
+
return if @stopped_due_to_auth
|
|
99
|
+
|
|
100
|
+
response = @transport.get(@rules_url, request_headers)
|
|
101
|
+
@poll_count.increment
|
|
102
|
+
|
|
103
|
+
case response.code
|
|
104
|
+
when "304"
|
|
105
|
+
@marker.sweep
|
|
106
|
+
when "200"
|
|
107
|
+
@etag = response.header("ETag")
|
|
108
|
+
apply(JSON.parse(response.body))
|
|
109
|
+
@marker.sweep
|
|
110
|
+
when "401", "403"
|
|
111
|
+
@stopped_due_to_auth = true
|
|
112
|
+
@logger.warn("[Flare::RuleManager] auth failed (#{response.code}); stopping poll")
|
|
113
|
+
stop
|
|
114
|
+
else
|
|
115
|
+
@last_error_count.increment
|
|
116
|
+
@logger.warn("[Flare::RuleManager] unexpected #{response.code}")
|
|
117
|
+
end
|
|
118
|
+
end
|
|
119
|
+
|
|
120
|
+
def request_headers
|
|
121
|
+
headers = {
|
|
122
|
+
"Authorization" => "Bearer #{@api_key}",
|
|
123
|
+
"Flare-Project" => @project,
|
|
124
|
+
"Flare-Environment" => @environment
|
|
125
|
+
}
|
|
126
|
+
headers["If-None-Match"] = @etag if @etag
|
|
127
|
+
headers
|
|
128
|
+
end
|
|
129
|
+
|
|
130
|
+
# Server payload shape (see tirana-v2 Api::RulesController):
|
|
131
|
+
# { "trace_rules": [{ "id", "match_attributes", "rate", ..., "urls": [...] }] }
|
|
132
|
+
def apply(payload)
|
|
133
|
+
rules = payload["trace_rules"] || []
|
|
134
|
+
@sampler.update_rules(rules)
|
|
135
|
+
|
|
136
|
+
url_entries = rules.flat_map { |r| Array(r["urls"]) }
|
|
137
|
+
@pool.replace(url_entries)
|
|
138
|
+
end
|
|
139
|
+
end
|
|
140
|
+
end
|
|
@@ -0,0 +1,130 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "concurrent/atomic/atomic_reference"
|
|
4
|
+
require "opentelemetry/sdk"
|
|
5
|
+
|
|
6
|
+
module Flare
|
|
7
|
+
# Path 1 trace sampler. At span start, iterates active rules; returns
|
|
8
|
+
# RECORD_AND_SAMPLE when one matches and the deterministic trace_id_ratio
|
|
9
|
+
# falls under the rule's rate. Otherwise RECORD_ONLY -- the span still
|
|
10
|
+
# records so MetricSpanProcessor sees it; the trace export decision for
|
|
11
|
+
# web spans is deferred to Path 2 via Flare::Marker.
|
|
12
|
+
#
|
|
13
|
+
# Used as the `root` sampler inside an OTel ParentBased sampler so root
|
|
14
|
+
# spans go through this logic but child spans inherit upstream decisions.
|
|
15
|
+
# The `local_parent_not_sampled` slot of the ParentBased should point at
|
|
16
|
+
# Flare::ALWAYS_RECORD_ONLY -- the default ALWAYS_OFF would drop children
|
|
17
|
+
# of an unsampled local parent, making them NoOp spans the processors
|
|
18
|
+
# never see.
|
|
19
|
+
#
|
|
20
|
+
# Rules are pushed in via update_rules from Flare::RuleManager; the swap
|
|
21
|
+
# is atomic, and malformed rule entries are dropped with a counter so a
|
|
22
|
+
# bad server payload can't crash the tracing path.
|
|
23
|
+
class Sampler
|
|
24
|
+
Decision = OpenTelemetry::SDK::Trace::Samplers::Decision
|
|
25
|
+
Result = OpenTelemetry::SDK::Trace::Samplers::Result
|
|
26
|
+
|
|
27
|
+
RULE_ID_ATTRIBUTE = "flare.rule_id"
|
|
28
|
+
|
|
29
|
+
Rule = Struct.new(:id, :match_attributes, :rate, keyword_init: true)
|
|
30
|
+
|
|
31
|
+
attr_reader :dropped_rule_count
|
|
32
|
+
|
|
33
|
+
def initialize
|
|
34
|
+
@rules_ref = Concurrent::AtomicReference.new([].freeze)
|
|
35
|
+
@dropped_rule_count = Concurrent::AtomicFixnum.new(0)
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
# new_rules: an array of rule hashes from GET /api/rules, e.g.
|
|
39
|
+
# [{ "id" => 1, "match_attributes" => {...}, "rate" => 0.5 }, ...]
|
|
40
|
+
# Entries that don't validate are skipped (counted in dropped_rule_count).
|
|
41
|
+
def update_rules(new_rules)
|
|
42
|
+
validated = (new_rules || []).filter_map { |r| validate(r) }
|
|
43
|
+
@dropped_rule_count.increment((new_rules || []).length - validated.length)
|
|
44
|
+
@rules_ref.set(validated.freeze)
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
def rules
|
|
48
|
+
@rules_ref.get
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
def should_sample?(trace_id:, parent_context:, links:, name:, kind:, attributes:)
|
|
52
|
+
tracestate = tracestate_from(parent_context)
|
|
53
|
+
|
|
54
|
+
rules.each do |rule|
|
|
55
|
+
next unless matches?(rule, attributes)
|
|
56
|
+
next unless trace_id_ratio(trace_id) < rule.rate
|
|
57
|
+
|
|
58
|
+
merged = (attributes || {}).merge(RULE_ID_ATTRIBUTE => rule.id)
|
|
59
|
+
return Result.new(decision: Decision::RECORD_AND_SAMPLE, attributes: merged, tracestate: tracestate)
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
Result.new(decision: Decision::RECORD_ONLY, tracestate: tracestate)
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
def description
|
|
66
|
+
"Flare::Sampler"
|
|
67
|
+
end
|
|
68
|
+
|
|
69
|
+
# Cross-language formula: last 8 bytes of the 16-byte raw trace_id as
|
|
70
|
+
# uint64-big-endian, divided by 2^64. Same in every Flare SDK so the
|
|
71
|
+
# server can reproduce the decision if it ever needs to.
|
|
72
|
+
def trace_id_ratio(trace_id)
|
|
73
|
+
bytes = trace_id.is_a?(String) ? trace_id.bytes : Array(trace_id)
|
|
74
|
+
tail = bytes.last(8)
|
|
75
|
+
n = 0
|
|
76
|
+
tail.each { |b| n = (n << 8) | b }
|
|
77
|
+
n.to_f / (1 << 64)
|
|
78
|
+
end
|
|
79
|
+
|
|
80
|
+
private
|
|
81
|
+
|
|
82
|
+
def tracestate_from(parent_context)
|
|
83
|
+
OpenTelemetry::Trace.current_span(parent_context).context.tracestate ||
|
|
84
|
+
OpenTelemetry::Trace::Tracestate::DEFAULT
|
|
85
|
+
end
|
|
86
|
+
|
|
87
|
+
def matches?(rule, attributes)
|
|
88
|
+
return false if attributes.nil?
|
|
89
|
+
rule.match_attributes.all? { |k, v| attributes[k] == v }
|
|
90
|
+
end
|
|
91
|
+
|
|
92
|
+
def validate(raw)
|
|
93
|
+
return nil unless raw.is_a?(Hash)
|
|
94
|
+
|
|
95
|
+
id = raw["id"] || raw[:id]
|
|
96
|
+
match = raw["match_attributes"] || raw[:match_attributes]
|
|
97
|
+
rate = raw["rate"] || raw[:rate]
|
|
98
|
+
|
|
99
|
+
return nil if id.nil?
|
|
100
|
+
return nil unless match.is_a?(Hash) && match.any?
|
|
101
|
+
return nil unless match.all? { |k, v| k.is_a?(String) && v.is_a?(String) && !v.empty? }
|
|
102
|
+
return nil unless rate.is_a?(Numeric) && rate > 0.0 && rate <= 1.0
|
|
103
|
+
|
|
104
|
+
Rule.new(id: id, match_attributes: match, rate: rate.to_f)
|
|
105
|
+
rescue StandardError
|
|
106
|
+
nil
|
|
107
|
+
end
|
|
108
|
+
end
|
|
109
|
+
|
|
110
|
+
# Tiny sampler whose should_sample? returns RECORD_ONLY for every span.
|
|
111
|
+
# Slot this into the ParentBased local_parent_not_sampled position so
|
|
112
|
+
# children of an unsampled local parent stay recording (the default
|
|
113
|
+
# ALWAYS_OFF turns them into NoOp spans no processor ever sees).
|
|
114
|
+
class AlwaysRecordOnly
|
|
115
|
+
Decision = OpenTelemetry::SDK::Trace::Samplers::Decision
|
|
116
|
+
Result = OpenTelemetry::SDK::Trace::Samplers::Result
|
|
117
|
+
|
|
118
|
+
def should_sample?(parent_context: nil, **)
|
|
119
|
+
tracestate = OpenTelemetry::Trace.current_span(parent_context).context.tracestate ||
|
|
120
|
+
OpenTelemetry::Trace::Tracestate::DEFAULT
|
|
121
|
+
Result.new(decision: Decision::RECORD_ONLY, tracestate: tracestate)
|
|
122
|
+
end
|
|
123
|
+
|
|
124
|
+
def description
|
|
125
|
+
"Flare::AlwaysRecordOnly"
|
|
126
|
+
end
|
|
127
|
+
end
|
|
128
|
+
|
|
129
|
+
ALWAYS_RECORD_ONLY = AlwaysRecordOnly.new
|
|
130
|
+
end
|
data/lib/flare/storage/sqlite.rb
CHANGED
|
@@ -34,25 +34,25 @@ module Flare
|
|
|
34
34
|
if status
|
|
35
35
|
case status
|
|
36
36
|
when "2xx"
|
|
37
|
-
conditions << "status_prop.value LIKE ?"
|
|
37
|
+
conditions << "COALESCE(status_prop.value, status_prop_old.value) LIKE ?"
|
|
38
38
|
values << "2%"
|
|
39
39
|
when "3xx"
|
|
40
|
-
conditions << "status_prop.value LIKE ?"
|
|
40
|
+
conditions << "COALESCE(status_prop.value, status_prop_old.value) LIKE ?"
|
|
41
41
|
values << "3%"
|
|
42
42
|
when "4xx"
|
|
43
|
-
conditions << "status_prop.value LIKE ?"
|
|
43
|
+
conditions << "COALESCE(status_prop.value, status_prop_old.value) LIKE ?"
|
|
44
44
|
values << "4%"
|
|
45
45
|
when "5xx"
|
|
46
|
-
conditions << "status_prop.value LIKE ?"
|
|
46
|
+
conditions << "COALESCE(status_prop.value, status_prop_old.value) LIKE ?"
|
|
47
47
|
values << "5%"
|
|
48
48
|
else
|
|
49
|
-
conditions << "status_prop.value = ?"
|
|
49
|
+
conditions << "COALESCE(status_prop.value, status_prop_old.value) = ?"
|
|
50
50
|
values << status.to_s
|
|
51
51
|
end
|
|
52
52
|
end
|
|
53
53
|
|
|
54
54
|
if method
|
|
55
|
-
conditions << "method_prop.value = ?"
|
|
55
|
+
conditions << "COALESCE(method_prop.value, method_prop_old.value) = ?"
|
|
56
56
|
values << "\"#{method}\""
|
|
57
57
|
end
|
|
58
58
|
|
|
@@ -79,19 +79,22 @@ module Flare
|
|
|
79
79
|
|
|
80
80
|
rows = query_all(<<~SQL, values)
|
|
81
81
|
SELECT s.*,
|
|
82
|
-
method_prop.value as http_method,
|
|
83
|
-
status_prop.value as http_status,
|
|
84
|
-
target_prop.value as http_target,
|
|
82
|
+
COALESCE(method_prop.value, method_prop_old.value) as http_method,
|
|
83
|
+
COALESCE(status_prop.value, status_prop_old.value) as http_status,
|
|
84
|
+
COALESCE(target_prop.value, target_prop_old.value) as http_target,
|
|
85
85
|
controller_prop.value as controller,
|
|
86
86
|
action_prop.value as action
|
|
87
87
|
FROM flare_spans s
|
|
88
|
-
LEFT JOIN flare_properties method_prop ON method_prop.owner_type = 'Flare::Span' AND method_prop.owner_id = s.id AND method_prop.key = 'http.method'
|
|
89
|
-
LEFT JOIN flare_properties
|
|
90
|
-
LEFT JOIN flare_properties
|
|
88
|
+
LEFT JOIN flare_properties method_prop ON method_prop.owner_type = 'Flare::Span' AND method_prop.owner_id = s.id AND method_prop.key = 'http.request.method'
|
|
89
|
+
LEFT JOIN flare_properties method_prop_old ON method_prop_old.owner_type = 'Flare::Span' AND method_prop_old.owner_id = s.id AND method_prop_old.key = 'http.method'
|
|
90
|
+
LEFT JOIN flare_properties status_prop ON status_prop.owner_type = 'Flare::Span' AND status_prop.owner_id = s.id AND status_prop.key = 'http.response.status_code'
|
|
91
|
+
LEFT JOIN flare_properties status_prop_old ON status_prop_old.owner_type = 'Flare::Span' AND status_prop_old.owner_id = s.id AND status_prop_old.key = 'http.status_code'
|
|
92
|
+
LEFT JOIN flare_properties target_prop ON target_prop.owner_type = 'Flare::Span' AND target_prop.owner_id = s.id AND target_prop.key = 'url.path'
|
|
93
|
+
LEFT JOIN flare_properties target_prop_old ON target_prop_old.owner_type = 'Flare::Span' AND target_prop_old.owner_id = s.id AND target_prop_old.key = 'http.target'
|
|
91
94
|
LEFT JOIN flare_properties controller_prop ON controller_prop.owner_type = 'Flare::Span' AND controller_prop.owner_id = s.id AND controller_prop.key = 'code.namespace'
|
|
92
95
|
LEFT JOIN flare_properties action_prop ON action_prop.owner_type = 'Flare::Span' AND action_prop.owner_id = s.id AND action_prop.key = 'code.function'
|
|
93
96
|
#{where_clause}
|
|
94
|
-
AND method_prop.value IS NOT NULL
|
|
97
|
+
AND COALESCE(method_prop.value, method_prop_old.value) IS NOT NULL
|
|
95
98
|
ORDER BY s.created_at DESC
|
|
96
99
|
LIMIT ? OFFSET ?
|
|
97
100
|
SQL
|
|
@@ -352,25 +355,25 @@ module Flare
|
|
|
352
355
|
if status
|
|
353
356
|
case status
|
|
354
357
|
when "2xx"
|
|
355
|
-
conditions << "status_prop.value LIKE ?"
|
|
358
|
+
conditions << "COALESCE(status_prop.value, status_prop_old.value) LIKE ?"
|
|
356
359
|
values << "2%"
|
|
357
360
|
when "3xx"
|
|
358
|
-
conditions << "status_prop.value LIKE ?"
|
|
361
|
+
conditions << "COALESCE(status_prop.value, status_prop_old.value) LIKE ?"
|
|
359
362
|
values << "3%"
|
|
360
363
|
when "4xx"
|
|
361
|
-
conditions << "status_prop.value LIKE ?"
|
|
364
|
+
conditions << "COALESCE(status_prop.value, status_prop_old.value) LIKE ?"
|
|
362
365
|
values << "4%"
|
|
363
366
|
when "5xx"
|
|
364
|
-
conditions << "status_prop.value LIKE ?"
|
|
367
|
+
conditions << "COALESCE(status_prop.value, status_prop_old.value) LIKE ?"
|
|
365
368
|
values << "5%"
|
|
366
369
|
else
|
|
367
|
-
conditions << "status_prop.value = ?"
|
|
370
|
+
conditions << "COALESCE(status_prop.value, status_prop_old.value) = ?"
|
|
368
371
|
values << status.to_s
|
|
369
372
|
end
|
|
370
373
|
end
|
|
371
374
|
|
|
372
375
|
if method
|
|
373
|
-
conditions << "method_prop.value = ?"
|
|
376
|
+
conditions << "COALESCE(method_prop.value, method_prop_old.value) = ?"
|
|
374
377
|
values << "\"#{method}\""
|
|
375
378
|
end
|
|
376
379
|
|
|
@@ -396,11 +399,13 @@ module Flare
|
|
|
396
399
|
row = query_one(<<~SQL, values)
|
|
397
400
|
SELECT COUNT(*) as count
|
|
398
401
|
FROM flare_spans s
|
|
399
|
-
LEFT JOIN flare_properties method_prop ON method_prop.owner_type = 'Flare::Span' AND method_prop.owner_id = s.id AND method_prop.key = 'http.method'
|
|
400
|
-
LEFT JOIN flare_properties
|
|
402
|
+
LEFT JOIN flare_properties method_prop ON method_prop.owner_type = 'Flare::Span' AND method_prop.owner_id = s.id AND method_prop.key = 'http.request.method'
|
|
403
|
+
LEFT JOIN flare_properties method_prop_old ON method_prop_old.owner_type = 'Flare::Span' AND method_prop_old.owner_id = s.id AND method_prop_old.key = 'http.method'
|
|
404
|
+
LEFT JOIN flare_properties status_prop ON status_prop.owner_type = 'Flare::Span' AND status_prop.owner_id = s.id AND status_prop.key = 'http.response.status_code'
|
|
405
|
+
LEFT JOIN flare_properties status_prop_old ON status_prop_old.owner_type = 'Flare::Span' AND status_prop_old.owner_id = s.id AND status_prop_old.key = 'http.status_code'
|
|
401
406
|
LEFT JOIN flare_properties controller_prop ON controller_prop.owner_type = 'Flare::Span' AND controller_prop.owner_id = s.id AND controller_prop.key = 'code.namespace'
|
|
402
407
|
#{where_clause}
|
|
403
|
-
AND method_prop.value IS NOT NULL
|
|
408
|
+
AND COALESCE(method_prop.value, method_prop_old.value) IS NOT NULL
|
|
404
409
|
SQL
|
|
405
410
|
|
|
406
411
|
row ? row["count"] : 0
|
|
@@ -0,0 +1,116 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "time"
|
|
4
|
+
|
|
5
|
+
module Flare
|
|
6
|
+
# Value object that turns a group of OTel span_data for a single trace
|
|
7
|
+
# into the Flare-JSON wire format the server expects:
|
|
8
|
+
#
|
|
9
|
+
# {
|
|
10
|
+
# "trace_id": "<hex>",
|
|
11
|
+
# "trace_rule_id": <int|nil>,
|
|
12
|
+
# "root_name": "<string>",
|
|
13
|
+
# "started_at": "<iso8601>",
|
|
14
|
+
# "duration_ms": <int>,
|
|
15
|
+
# "spans": [
|
|
16
|
+
# { "id", "parent_id", "name", "started_at",
|
|
17
|
+
# "duration_ms", "attributes" }
|
|
18
|
+
# ]
|
|
19
|
+
# }
|
|
20
|
+
#
|
|
21
|
+
# The trace_rule_id is read from any span carrying the
|
|
22
|
+
# `flare.rule_id` attribute (Path 1 sets it on the sampled root, Path 2
|
|
23
|
+
# sets it on the rack owner span via WebMarkerSubscriber).
|
|
24
|
+
class TraceBlob
|
|
25
|
+
ZERO_SPAN_ID = ("\x00".b * 8).freeze
|
|
26
|
+
ROOT_NAME_LIMIT = 255
|
|
27
|
+
|
|
28
|
+
def self.build(trace_id:, spans:)
|
|
29
|
+
return nil if spans.nil? || spans.empty?
|
|
30
|
+
new(trace_id: trace_id, spans: spans)
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
def initialize(trace_id:, spans:)
|
|
34
|
+
@trace_id = trace_id
|
|
35
|
+
@spans = spans
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
def to_h
|
|
39
|
+
root = find_root
|
|
40
|
+
{
|
|
41
|
+
"trace_id" => hexify(@trace_id),
|
|
42
|
+
"trace_rule_id" => rule_id_from_spans,
|
|
43
|
+
"root_name" => root_name(root),
|
|
44
|
+
"started_at" => iso(root&.start_timestamp),
|
|
45
|
+
"duration_ms" => duration_ms(root),
|
|
46
|
+
"spans" => @spans.map { |s| span_to_h(s) }
|
|
47
|
+
}
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
private
|
|
51
|
+
|
|
52
|
+
def find_root
|
|
53
|
+
@spans.find { |s| entry_span?(s) && rule_id_attribute(s) } ||
|
|
54
|
+
@spans.find { |s| root?(s) } ||
|
|
55
|
+
@spans.find { |s| entry_span?(s) } ||
|
|
56
|
+
@spans.find { |s| rule_id_attribute(s) } ||
|
|
57
|
+
@spans.first
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
def root?(span)
|
|
61
|
+
pid = span.parent_span_id
|
|
62
|
+
pid.nil? || pid.empty? || pid == ZERO_SPAN_ID
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
def entry_span?(span)
|
|
66
|
+
return false unless span.respond_to?(:kind)
|
|
67
|
+
|
|
68
|
+
span.kind == :server || span.kind == :consumer
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
def rule_id_from_spans
|
|
72
|
+
@spans.each do |s|
|
|
73
|
+
value = rule_id_attribute(s)
|
|
74
|
+
return value if value
|
|
75
|
+
end
|
|
76
|
+
nil
|
|
77
|
+
end
|
|
78
|
+
|
|
79
|
+
def rule_id_attribute(span)
|
|
80
|
+
attrs = span.attributes
|
|
81
|
+
return nil unless attrs
|
|
82
|
+
|
|
83
|
+
attrs[Sampler::RULE_ID_ATTRIBUTE] || attrs[Sampler::RULE_ID_ATTRIBUTE.to_sym]
|
|
84
|
+
end
|
|
85
|
+
|
|
86
|
+
def root_name(root)
|
|
87
|
+
root&.name&.to_s&.slice(0, ROOT_NAME_LIMIT)
|
|
88
|
+
end
|
|
89
|
+
|
|
90
|
+
def span_to_h(span)
|
|
91
|
+
{
|
|
92
|
+
"id" => hexify(span.span_id),
|
|
93
|
+
"parent_id" => root?(span) ? nil : hexify(span.parent_span_id),
|
|
94
|
+
"name" => span.name,
|
|
95
|
+
"started_at" => iso(span.start_timestamp),
|
|
96
|
+
"duration_ms" => duration_ms(span),
|
|
97
|
+
"attributes" => span.attributes || {}
|
|
98
|
+
}
|
|
99
|
+
end
|
|
100
|
+
|
|
101
|
+
def hexify(bytes)
|
|
102
|
+
return nil if bytes.nil?
|
|
103
|
+
bytes.unpack1("H*")
|
|
104
|
+
end
|
|
105
|
+
|
|
106
|
+
def iso(nanos)
|
|
107
|
+
return nil if nanos.nil?
|
|
108
|
+
Time.at(nanos / 1_000_000_000.0).utc.iso8601(6)
|
|
109
|
+
end
|
|
110
|
+
|
|
111
|
+
def duration_ms(span)
|
|
112
|
+
return nil unless span && span.start_timestamp && span.end_timestamp
|
|
113
|
+
((span.end_timestamp - span.start_timestamp) / 1_000_000).to_i
|
|
114
|
+
end
|
|
115
|
+
end
|
|
116
|
+
end
|