solid_events 0.1.0 → 0.2.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +53 -0
- data/LICENSE.txt +21 -0
- data/README.md +406 -10
- data/Rakefile +9 -5
- data/app/controllers/solid_events/api_controller.rb +782 -0
- data/app/controllers/solid_events/application_controller.rb +4 -0
- data/app/controllers/solid_events/incidents_controller.rb +38 -0
- data/app/controllers/solid_events/saved_views_controller.rb +43 -0
- data/app/controllers/solid_events/traces_controller.rb +731 -0
- data/app/helpers/solid_events/traces_helper.rb +79 -0
- data/app/jobs/solid_events/evaluate_incidents_job.rb +11 -0
- data/app/jobs/solid_events/prune_job.rb +26 -0
- data/app/models/solid_events/causal_edge.rb +9 -0
- data/app/models/solid_events/error_link.rb +11 -0
- data/app/models/solid_events/event.rb +11 -0
- data/app/models/solid_events/incident.rb +68 -0
- data/app/models/solid_events/incident_event.rb +13 -0
- data/app/models/solid_events/journey.rb +62 -0
- data/app/models/solid_events/record.rb +11 -0
- data/app/models/solid_events/record_link.rb +11 -0
- data/app/models/solid_events/saved_view.rb +11 -0
- data/app/models/solid_events/summary.rb +11 -0
- data/app/models/solid_events/trace.rb +85 -0
- data/app/views/layouts/solid_events/_style.html.erb +39 -0
- data/app/views/layouts/solid_events/application.html.erb +21 -14
- data/app/views/solid_events/incidents/events.html.erb +60 -0
- data/app/views/solid_events/traces/hot_path.html.erb +63 -0
- data/app/views/solid_events/traces/index.html.erb +532 -0
- data/app/views/solid_events/traces/show.html.erb +216 -0
- data/app/views/solid_events/traces/timeline.html.erb +54 -0
- data/config/locales/en.yml +4 -0
- data/config/routes.rb +35 -0
- data/db/migrate/20260216010000_create_solid_events_tables.rb +51 -0
- data/db/migrate/20260216020000_create_solid_events_summaries.rb +33 -0
- data/db/migrate/20260216030000_add_dimensions_to_solid_events_summaries.rb +20 -0
- data/db/migrate/20260216040000_add_request_id_to_solid_events_summaries.rb +8 -0
- data/db/migrate/20260216050000_add_sql_metrics_to_solid_events_summaries.rb +8 -0
- data/db/migrate/20260216060000_add_deploy_dimensions_to_solid_events_summaries.rb +17 -0
- data/db/migrate/20260216070000_create_solid_events_incidents.rb +30 -0
- data/db/migrate/20260216080000_add_schema_version_to_solid_events_summaries.rb +7 -0
- data/db/migrate/20260216090000_add_assignment_and_mute_to_solid_events_incidents.rb +12 -0
- data/db/migrate/20260216100000_add_resolution_metadata_to_solid_events_incidents.rb +11 -0
- data/db/migrate/20260216110000_add_assignment_audit_to_solid_events_incidents.rb +10 -0
- data/db/migrate/20260216120000_create_solid_events_saved_views.rb +17 -0
- data/db/migrate/20260216130000_create_solid_events_incident_events.rb +19 -0
- data/db/migrate/20260216140000_add_incident_event_lookup_indexes.rb +8 -0
- data/db/migrate/20260216150000_add_causal_links_to_solid_events.rb +15 -0
- data/db/migrate/20260216160000_create_solid_events_journeys_and_causal_edges.rb +45 -0
- data/lib/generators/solid_events/install/USAGE +8 -0
- data/lib/generators/solid_events/install/install_generator.rb +26 -0
- data/lib/generators/solid_events/install/templates/config/initializers/solid_events.rb +84 -0
- data/lib/generators/solid_events/install/templates/db/events_schema.rb +206 -0
- data/lib/solid_events/benchmark.rb +43 -0
- data/lib/solid_events/configuration.rb +167 -0
- data/lib/solid_events/context_scraper.rb +23 -0
- data/lib/solid_events/controller_tracing.rb +94 -0
- data/lib/solid_events/current.rb +15 -0
- data/lib/solid_events/engine.rb +93 -0
- data/lib/solid_events/incident_evaluator.rb +327 -0
- data/lib/solid_events/labeler.rb +21 -0
- data/lib/solid_events/notifiers/slack_webhook_notifier.rb +36 -0
- data/lib/solid_events/subscribers/action_cable_subscriber.rb +48 -0
- data/lib/solid_events/subscribers/controller_subscriber.rb +39 -0
- data/lib/solid_events/subscribers/enqueue_subscriber.rb +30 -0
- data/lib/solid_events/subscribers/error_subscriber.rb +107 -0
- data/lib/solid_events/subscribers/external_http_subscriber.rb +54 -0
- data/lib/solid_events/subscribers/job_subscriber.rb +45 -0
- data/lib/solid_events/subscribers/mailer_subscriber.rb +49 -0
- data/lib/solid_events/subscribers/sql_subscriber.rb +46 -0
- data/lib/solid_events/tracer.rb +672 -0
- data/lib/solid_events/version.rb +3 -1
- data/lib/solid_events.rb +210 -3
- data/lib/tasks/solid_events_tasks.rake +30 -4
- metadata +141 -28
- data/MIT-LICENSE +0 -20
- data/app/assets/config/solid_events_manifest.js +0 -1
- data/app/assets/stylesheets/solid_events/application.css +0 -15
- data/app/helpers/solid_events/application_helper.rb +0 -4
- data/app/jobs/solid_events/application_job.rb +0 -4
- data/app/mailers/solid_events/application_mailer.rb +0 -6
- data/app/models/solid_events/application_record.rb +0 -5
|
@@ -0,0 +1,672 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
require "digest"
|
|
3
|
+
require "json"
|
|
4
|
+
require "time"
|
|
5
|
+
|
|
6
|
+
module SolidEvents
|
|
7
|
+
module Tracer
|
|
8
|
+
module_function
|
|
9
|
+
|
|
10
|
+
def start_trace!(name:, trace_type:, source:, context: {}, caused_by_trace_id: nil, caused_by_event_id: nil)
|
|
11
|
+
return unless storage_available?
|
|
12
|
+
context_payload = guarded_payload(
|
|
13
|
+
redact_hash(normalize_context(context)),
|
|
14
|
+
max_bytes: SolidEvents.max_context_payload_bytes
|
|
15
|
+
)
|
|
16
|
+
|
|
17
|
+
trace = SolidEvents::Trace.create!(
|
|
18
|
+
name: name,
|
|
19
|
+
trace_type: trace_type,
|
|
20
|
+
source: source,
|
|
21
|
+
caused_by_trace_id: caused_by_trace_id,
|
|
22
|
+
caused_by_event_id: caused_by_event_id,
|
|
23
|
+
context: context_payload,
|
|
24
|
+
started_at: Time.current
|
|
25
|
+
)
|
|
26
|
+
SolidEvents::Current.trace = trace
|
|
27
|
+
SolidEvents::Current.trace_metrics = default_trace_metrics
|
|
28
|
+
create_causal_edge_for_trace!(trace)
|
|
29
|
+
trace
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
def current_trace
|
|
33
|
+
SolidEvents::Current.trace
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
def finish_trace!(status: "ok", context: {})
|
|
37
|
+
return unless storage_available?
|
|
38
|
+
|
|
39
|
+
trace = current_trace
|
|
40
|
+
return unless trace
|
|
41
|
+
|
|
42
|
+
existing_context = normalize_context(trace.context.to_h)
|
|
43
|
+
extra_context = normalize_context(context)
|
|
44
|
+
final_context = guarded_payload(
|
|
45
|
+
redact_hash(existing_context.merge(extra_context)),
|
|
46
|
+
max_bytes: SolidEvents.max_context_payload_bytes
|
|
47
|
+
)
|
|
48
|
+
trace.update!(status: status, finished_at: Time.current, context: final_context)
|
|
49
|
+
|
|
50
|
+
unless keep_trace?(trace, context: final_context)
|
|
51
|
+
trace.destroy!
|
|
52
|
+
SolidEvents::Current.trace = nil
|
|
53
|
+
SolidEvents::Current.trace_metrics = {}
|
|
54
|
+
return nil
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
upsert_summary!(trace)
|
|
58
|
+
emit_canonical_log_line!(trace)
|
|
59
|
+
SolidEvents::Current.trace = nil
|
|
60
|
+
SolidEvents::Current.trace_metrics = {}
|
|
61
|
+
trace
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
def record_event!(event_type:, name:, payload: {}, duration_ms: nil)
|
|
65
|
+
return unless storage_available?
|
|
66
|
+
|
|
67
|
+
trace = current_trace
|
|
68
|
+
return unless trace
|
|
69
|
+
|
|
70
|
+
metrics = SolidEvents::Current.trace_metrics
|
|
71
|
+
if metrics.blank?
|
|
72
|
+
metrics = default_trace_metrics
|
|
73
|
+
end
|
|
74
|
+
metrics["event_count"] += 1
|
|
75
|
+
metrics["event_counts"][event_type] = metrics["event_counts"].fetch(event_type, 0) + 1
|
|
76
|
+
if event_type.to_s == "sql"
|
|
77
|
+
metrics["sql_count"] += 1
|
|
78
|
+
metrics["sql_duration_ms"] += duration_ms.to_f
|
|
79
|
+
end
|
|
80
|
+
SolidEvents::Current.trace_metrics = metrics
|
|
81
|
+
|
|
82
|
+
created_event = nil
|
|
83
|
+
if !SolidEvents.wide_event_primary? || SolidEvents.persist_sub_events?
|
|
84
|
+
payload_for_event = guarded_payload(
|
|
85
|
+
redact_hash(normalize_context(payload)),
|
|
86
|
+
max_bytes: SolidEvents.max_event_payload_bytes
|
|
87
|
+
)
|
|
88
|
+
created_event = trace.events.create!(
|
|
89
|
+
event_type: event_type,
|
|
90
|
+
name: name,
|
|
91
|
+
payload: payload_for_event,
|
|
92
|
+
duration_ms: duration_ms,
|
|
93
|
+
occurred_at: Time.current
|
|
94
|
+
)
|
|
95
|
+
end
|
|
96
|
+
upsert_summary!(trace)
|
|
97
|
+
created_event
|
|
98
|
+
end
|
|
99
|
+
|
|
100
|
+
def annotate!(context = {})
|
|
101
|
+
return unless storage_available?
|
|
102
|
+
|
|
103
|
+
trace = current_trace
|
|
104
|
+
return unless trace
|
|
105
|
+
|
|
106
|
+
existing_context = normalize_context(trace.context.to_h)
|
|
107
|
+
trace.update!(
|
|
108
|
+
context: guarded_payload(
|
|
109
|
+
redact_hash(existing_context.merge(normalize_context(context))),
|
|
110
|
+
max_bytes: SolidEvents.max_context_payload_bytes
|
|
111
|
+
)
|
|
112
|
+
)
|
|
113
|
+
upsert_summary!(trace)
|
|
114
|
+
trace
|
|
115
|
+
end
|
|
116
|
+
|
|
117
|
+
def link_record!(record)
|
|
118
|
+
return unless storage_available?
|
|
119
|
+
|
|
120
|
+
trace = current_trace
|
|
121
|
+
return unless trace
|
|
122
|
+
return if record.is_a?(SolidEvents::Record)
|
|
123
|
+
return if SolidEvents.ignore_models.include?(record.class.name)
|
|
124
|
+
return if SolidEvents.ignore_model_prefixes.any? { |prefix| record.class.name.start_with?(prefix.to_s) }
|
|
125
|
+
|
|
126
|
+
trace.record_links.find_or_create_by!(record_type: record.class.name, record_id: record.id)
|
|
127
|
+
upsert_summary!(trace)
|
|
128
|
+
end
|
|
129
|
+
|
|
130
|
+
def record_state_diff!(record:, action:, before_state:, after_state:)
|
|
131
|
+
return unless storage_available?
|
|
132
|
+
|
|
133
|
+
trace = current_trace
|
|
134
|
+
return unless trace
|
|
135
|
+
return if ignored_record_for_linking?(record)
|
|
136
|
+
|
|
137
|
+
filtered_before, filtered_after, changed_fields = filtered_state_diff(
|
|
138
|
+
before_state: before_state,
|
|
139
|
+
after_state: after_state
|
|
140
|
+
)
|
|
141
|
+
return if changed_fields.empty?
|
|
142
|
+
|
|
143
|
+
record_event!(
|
|
144
|
+
event_type: "state_diff",
|
|
145
|
+
name: "#{record.class.name}##{action}",
|
|
146
|
+
payload: {
|
|
147
|
+
record_type: record.class.name,
|
|
148
|
+
record_id: record.id,
|
|
149
|
+
action: action,
|
|
150
|
+
changed_fields: changed_fields,
|
|
151
|
+
before: filtered_before,
|
|
152
|
+
after: filtered_after
|
|
153
|
+
}
|
|
154
|
+
)
|
|
155
|
+
end
|
|
156
|
+
|
|
157
|
+
def link_error!(solid_error_id)
|
|
158
|
+
return unless storage_available?
|
|
159
|
+
|
|
160
|
+
trace = current_trace
|
|
161
|
+
return unless trace
|
|
162
|
+
|
|
163
|
+
attach_error_link!(trace, solid_error_id)
|
|
164
|
+
end
|
|
165
|
+
|
|
166
|
+
def bind_exception_to_trace!(exception, trace: current_trace)
|
|
167
|
+
return unless storage_available?
|
|
168
|
+
return unless exception && trace
|
|
169
|
+
|
|
170
|
+
bindings = SolidEvents::Current.error_trace_bindings
|
|
171
|
+
bindings[exception.object_id] = trace.id
|
|
172
|
+
SolidEvents::Current.error_trace_bindings = bindings
|
|
173
|
+
end
|
|
174
|
+
|
|
175
|
+
def consume_bound_trace_for_exception(exception)
|
|
176
|
+
return unless storage_available?
|
|
177
|
+
return unless exception
|
|
178
|
+
|
|
179
|
+
bindings = SolidEvents::Current.error_trace_bindings
|
|
180
|
+
trace_id = bindings.delete(exception.object_id)
|
|
181
|
+
SolidEvents::Current.error_trace_bindings = bindings
|
|
182
|
+
return unless trace_id
|
|
183
|
+
|
|
184
|
+
SolidEvents::Trace.find_by(id: trace_id)
|
|
185
|
+
end
|
|
186
|
+
|
|
187
|
+
def register_async_causal_link!(job_id:, caused_by_trace_id:, caused_by_event_id:)
|
|
188
|
+
return if job_id.blank? || caused_by_trace_id.blank?
|
|
189
|
+
|
|
190
|
+
payload = {
|
|
191
|
+
"trace_id" => caused_by_trace_id.to_i,
|
|
192
|
+
"event_id" => caused_by_event_id&.to_i,
|
|
193
|
+
"recorded_at" => Time.current.to_i
|
|
194
|
+
}
|
|
195
|
+
if defined?(Rails) && Rails.cache
|
|
196
|
+
Rails.cache.write(async_causal_key(job_id), payload, expires_in: 6.hours)
|
|
197
|
+
else
|
|
198
|
+
@async_causal_memory ||= {}
|
|
199
|
+
@async_causal_memory[job_id.to_s] = payload
|
|
200
|
+
end
|
|
201
|
+
payload
|
|
202
|
+
rescue StandardError
|
|
203
|
+
nil
|
|
204
|
+
end
|
|
205
|
+
|
|
206
|
+
def consume_async_causal_link(job_id:)
|
|
207
|
+
return {} if job_id.blank?
|
|
208
|
+
|
|
209
|
+
value = if defined?(Rails) && Rails.cache
|
|
210
|
+
key = async_causal_key(job_id)
|
|
211
|
+
payload = Rails.cache.read(key)
|
|
212
|
+
Rails.cache.delete(key)
|
|
213
|
+
payload
|
|
214
|
+
else
|
|
215
|
+
@async_causal_memory ||= {}
|
|
216
|
+
@async_causal_memory.delete(job_id.to_s)
|
|
217
|
+
end
|
|
218
|
+
value.to_h.symbolize_keys
|
|
219
|
+
rescue StandardError
|
|
220
|
+
{}
|
|
221
|
+
end
|
|
222
|
+
|
|
223
|
+
def reconcile_error_link_for_trace!(trace, attempts: 6, exception: nil)
|
|
224
|
+
return unless storage_available?
|
|
225
|
+
return unless trace
|
|
226
|
+
return unless defined?(SolidErrors::Error)
|
|
227
|
+
return if trace.error_links.exists?
|
|
228
|
+
|
|
229
|
+
fingerprint = trace.context.to_h["error_fingerprint"]
|
|
230
|
+
if fingerprint.present?
|
|
231
|
+
by_fingerprint = SolidErrors::Error.find_by(fingerprint: fingerprint)
|
|
232
|
+
if by_fingerprint
|
|
233
|
+
attach_error_link!(trace, by_fingerprint.id)
|
|
234
|
+
return by_fingerprint
|
|
235
|
+
end
|
|
236
|
+
end
|
|
237
|
+
|
|
238
|
+
candidates = error_candidates_from(exception: exception, trace: trace)
|
|
239
|
+
if candidates.empty?
|
|
240
|
+
by_occurrence = find_solid_error_by_occurrence(trace)
|
|
241
|
+
if by_occurrence
|
|
242
|
+
attach_error_link!(trace, by_occurrence.id)
|
|
243
|
+
return by_occurrence
|
|
244
|
+
end
|
|
245
|
+
return
|
|
246
|
+
end
|
|
247
|
+
|
|
248
|
+
attempts.times do |attempt|
|
|
249
|
+
solid_error = find_matching_solid_error(candidates: candidates, trace: trace)
|
|
250
|
+
|
|
251
|
+
if solid_error
|
|
252
|
+
attach_error_link!(trace, solid_error.id)
|
|
253
|
+
return solid_error
|
|
254
|
+
end
|
|
255
|
+
|
|
256
|
+
sleep(0.03 * (attempt + 1))
|
|
257
|
+
end
|
|
258
|
+
|
|
259
|
+
nil
|
|
260
|
+
rescue StandardError
|
|
261
|
+
nil
|
|
262
|
+
end
|
|
263
|
+
|
|
264
|
+
def reconcile_recent_error_links!(limit: 25)
|
|
265
|
+
return unless storage_available?
|
|
266
|
+
return unless defined?(SolidErrors::Error)
|
|
267
|
+
|
|
268
|
+
SolidEvents::Trace
|
|
269
|
+
.where(status: "error")
|
|
270
|
+
.where("finished_at >= ?", 1.hour.ago)
|
|
271
|
+
.left_joins(:error_links)
|
|
272
|
+
.where(solid_events_error_links: {id: nil})
|
|
273
|
+
.order(finished_at: :desc)
|
|
274
|
+
.limit(limit)
|
|
275
|
+
.each do |trace|
|
|
276
|
+
reconcile_error_link_for_trace!(trace, attempts: 1)
|
|
277
|
+
end
|
|
278
|
+
end
|
|
279
|
+
|
|
280
|
+
def normalize_context(context)
|
|
281
|
+
return {} unless context.respond_to?(:to_h)
|
|
282
|
+
|
|
283
|
+
context.to_h.transform_keys(&:to_s)
|
|
284
|
+
end
|
|
285
|
+
|
|
286
|
+
def attach_error_link!(trace, solid_error_id)
|
|
287
|
+
return unless trace && solid_error_id
|
|
288
|
+
|
|
289
|
+
trace.error_links.find_or_create_by!(solid_error_id: solid_error_id)
|
|
290
|
+
upsert_summary!(trace)
|
|
291
|
+
end
|
|
292
|
+
|
|
293
|
+
def sanitize_exception_message(message)
|
|
294
|
+
return message.to_s unless defined?(SolidErrors::Sanitizer)
|
|
295
|
+
|
|
296
|
+
SolidErrors::Sanitizer.sanitize(message.to_s)
|
|
297
|
+
rescue StandardError
|
|
298
|
+
message.to_s
|
|
299
|
+
end
|
|
300
|
+
|
|
301
|
+
def error_fingerprint_for(exception, severity:, source:)
|
|
302
|
+
candidate = root_cause(exception)
|
|
303
|
+
message = sanitize_exception_message(candidate.message)
|
|
304
|
+
Digest::SHA256.hexdigest([candidate.class.name, message, severity, source].join)
|
|
305
|
+
end
|
|
306
|
+
|
|
307
|
+
def storage_available?
|
|
308
|
+
return @storage_available unless @storage_available.nil?
|
|
309
|
+
|
|
310
|
+
@storage_available = begin
|
|
311
|
+
connection = SolidEvents::Trace.connection
|
|
312
|
+
connection.data_source_exists?(SolidEvents::Trace.table_name) &&
|
|
313
|
+
connection.data_source_exists?(SolidEvents::Event.table_name) &&
|
|
314
|
+
connection.data_source_exists?(SolidEvents::RecordLink.table_name) &&
|
|
315
|
+
connection.data_source_exists?(SolidEvents::ErrorLink.table_name)
|
|
316
|
+
rescue StandardError
|
|
317
|
+
false
|
|
318
|
+
end
|
|
319
|
+
end
|
|
320
|
+
|
|
321
|
+
def reset_storage_availability_cache!
|
|
322
|
+
@storage_available = nil
|
|
323
|
+
@summary_storage_available = nil
|
|
324
|
+
end
|
|
325
|
+
|
|
326
|
+
def summary_storage_available?
|
|
327
|
+
return true if @summary_storage_available
|
|
328
|
+
|
|
329
|
+
available = begin
|
|
330
|
+
SolidEvents::Summary.connection.data_source_exists?(SolidEvents::Summary.table_name)
|
|
331
|
+
rescue StandardError
|
|
332
|
+
false
|
|
333
|
+
end
|
|
334
|
+
|
|
335
|
+
@summary_storage_available = true if available
|
|
336
|
+
available
|
|
337
|
+
end
|
|
338
|
+
|
|
339
|
+
def upsert_summary!(trace)
|
|
340
|
+
return unless trace
|
|
341
|
+
return unless summary_storage_available?
|
|
342
|
+
|
|
343
|
+
context = trace.context.to_h
|
|
344
|
+
entity = extract_primary_entity(trace)
|
|
345
|
+
http_status = context["status"].presence&.to_i
|
|
346
|
+
|
|
347
|
+
summary = SolidEvents::Summary.find_or_initialize_by(trace_id: trace.id)
|
|
348
|
+
metrics = aggregate_metrics_for(trace)
|
|
349
|
+
feature_slices = extract_feature_slices(context)
|
|
350
|
+
summary.assign_attributes(
|
|
351
|
+
name: trace.name,
|
|
352
|
+
trace_type: trace.trace_type,
|
|
353
|
+
source: trace.source,
|
|
354
|
+
status: trace.status,
|
|
355
|
+
caused_by_trace_id: trace.caused_by_trace_id,
|
|
356
|
+
caused_by_event_id: trace.caused_by_event_id,
|
|
357
|
+
outcome: trace.status == "error" ? "failure" : "success",
|
|
358
|
+
entity_type: entity[:type],
|
|
359
|
+
entity_id: entity[:id],
|
|
360
|
+
http_status: http_status,
|
|
361
|
+
request_method: context["method"],
|
|
362
|
+
request_id: context["request_id"],
|
|
363
|
+
path: context["path"],
|
|
364
|
+
job_class: trace.trace_type == "job" ? trace.source : nil,
|
|
365
|
+
queue_name: context["queue"],
|
|
366
|
+
schema_version: SolidEvents.canonical_schema_version,
|
|
367
|
+
service_name: context["service_name"],
|
|
368
|
+
environment_name: context["environment_name"],
|
|
369
|
+
service_version: context["service_version"],
|
|
370
|
+
deployment_id: context["deployment_id"],
|
|
371
|
+
region: context["region"],
|
|
372
|
+
started_at: trace.started_at,
|
|
373
|
+
finished_at: trace.finished_at,
|
|
374
|
+
duration_ms: trace.finished_at && trace.started_at ? ((trace.finished_at - trace.started_at) * 1000.0).round(2) : nil,
|
|
375
|
+
event_count: metrics[:event_count],
|
|
376
|
+
sql_count: metrics[:sql_count],
|
|
377
|
+
sql_duration_ms: metrics[:sql_duration_ms],
|
|
378
|
+
record_link_count: trace.record_links.count,
|
|
379
|
+
error_count: trace.error_links.count,
|
|
380
|
+
user_id: context["user_id"],
|
|
381
|
+
account_id: context["account_id"],
|
|
382
|
+
error_fingerprint: context["error_fingerprint"],
|
|
383
|
+
payload: {
|
|
384
|
+
event_counts: metrics[:event_counts],
|
|
385
|
+
error_link_ids: trace.error_links.pluck(:solid_error_id),
|
|
386
|
+
context: context,
|
|
387
|
+
feature_slices: feature_slices
|
|
388
|
+
}
|
|
389
|
+
)
|
|
390
|
+
summary.save!
|
|
391
|
+
materialize_journey!(summary)
|
|
392
|
+
summary
|
|
393
|
+
rescue StandardError
|
|
394
|
+
nil
|
|
395
|
+
end
|
|
396
|
+
|
|
397
|
+
def keep_trace?(trace, context:)
|
|
398
|
+
duration_ms = if trace.finished_at && trace.started_at
|
|
399
|
+
((trace.finished_at - trace.started_at) * 1000.0).round(2)
|
|
400
|
+
end
|
|
401
|
+
|
|
402
|
+
status_code = context["status"].to_i if context.key?("status")
|
|
403
|
+
return true if trace.status == "error"
|
|
404
|
+
return true if status_code && status_code >= 500
|
|
405
|
+
return true if duration_ms && duration_ms >= SolidEvents.tail_sample_slow_ms
|
|
406
|
+
|
|
407
|
+
always_sample_key_hit = SolidEvents.always_sample_context_keys.any? do |key|
|
|
408
|
+
value = context[key]
|
|
409
|
+
value.present? && value != false
|
|
410
|
+
end
|
|
411
|
+
return true if always_sample_key_hit
|
|
412
|
+
|
|
413
|
+
if SolidEvents.always_sample_when.respond_to?(:call)
|
|
414
|
+
return true if SolidEvents.always_sample_when.call(trace: trace, context: context, duration_ms: duration_ms)
|
|
415
|
+
end
|
|
416
|
+
|
|
417
|
+
sample_rate = SolidEvents.sample_rate.clamp(0.0, 1.0)
|
|
418
|
+
return true if sample_rate >= 1.0
|
|
419
|
+
return false if sample_rate <= 0.0
|
|
420
|
+
|
|
421
|
+
rand < sample_rate
|
|
422
|
+
rescue StandardError
|
|
423
|
+
true
|
|
424
|
+
end
|
|
425
|
+
|
|
426
|
+
def extract_feature_slices(context)
|
|
427
|
+
SolidEvents.feature_slice_keys.each_with_object({}) do |key, memo|
|
|
428
|
+
value = context[key]
|
|
429
|
+
memo[key] = value.to_s if value.present?
|
|
430
|
+
end
|
|
431
|
+
rescue StandardError
|
|
432
|
+
{}
|
|
433
|
+
end
|
|
434
|
+
|
|
435
|
+
def emit_canonical_log_line!(trace)
|
|
436
|
+
return unless SolidEvents.emit_canonical_log_line?
|
|
437
|
+
return unless defined?(Rails) && Rails.logger
|
|
438
|
+
|
|
439
|
+
payload = trace.canonical_event
|
|
440
|
+
payload[:emitted_at] = Time.current.iso8601
|
|
441
|
+
Rails.logger.info(payload.to_json)
|
|
442
|
+
rescue StandardError
|
|
443
|
+
nil
|
|
444
|
+
end
|
|
445
|
+
|
|
446
|
+
def extract_primary_entity(trace)
|
|
447
|
+
link = trace.record_links.order(:created_at, :id).first
|
|
448
|
+
return {type: nil, id: nil} unless link
|
|
449
|
+
|
|
450
|
+
{type: link.record_type, id: link.record_id}
|
|
451
|
+
rescue StandardError
|
|
452
|
+
{type: nil, id: nil}
|
|
453
|
+
end
|
|
454
|
+
|
|
455
|
+
def materialize_journey!(summary)
|
|
456
|
+
return unless defined?(SolidEvents::Journey)
|
|
457
|
+
return unless SolidEvents::Journey.connection.data_source_exists?(SolidEvents::Journey.table_name)
|
|
458
|
+
|
|
459
|
+
SolidEvents::Journey.materialize_from_summary!(summary)
|
|
460
|
+
rescue StandardError
|
|
461
|
+
nil
|
|
462
|
+
end
|
|
463
|
+
|
|
464
|
+
def create_causal_edge_for_trace!(trace)
|
|
465
|
+
return unless trace.caused_by_trace_id.present? || trace.caused_by_event_id.present?
|
|
466
|
+
return unless defined?(SolidEvents::CausalEdge)
|
|
467
|
+
return unless SolidEvents::CausalEdge.connection.data_source_exists?(SolidEvents::CausalEdge.table_name)
|
|
468
|
+
|
|
469
|
+
SolidEvents::CausalEdge.find_or_create_by!(
|
|
470
|
+
from_trace_id: trace.caused_by_trace_id,
|
|
471
|
+
from_event_id: trace.caused_by_event_id,
|
|
472
|
+
to_trace_id: trace.id,
|
|
473
|
+
edge_type: "caused_by"
|
|
474
|
+
) do |edge|
|
|
475
|
+
edge.to_event_id = nil
|
|
476
|
+
edge.occurred_at = trace.started_at || Time.current
|
|
477
|
+
edge.payload = {trace_type: trace.trace_type}
|
|
478
|
+
end
|
|
479
|
+
rescue StandardError
|
|
480
|
+
nil
|
|
481
|
+
end
|
|
482
|
+
|
|
483
|
+
def error_candidates_from(exception:, trace:)
|
|
484
|
+
if exception
|
|
485
|
+
chain = exception_chain(exception)
|
|
486
|
+
chain.map { |ex| [ex.class.name, sanitize_exception_message(ex.message)] }.uniq
|
|
487
|
+
else
|
|
488
|
+
context = trace.context.to_h
|
|
489
|
+
pairs = []
|
|
490
|
+
pairs << [context["exception_class"], sanitize_exception_message(context["exception_message"])]
|
|
491
|
+
pairs << [context["root_exception_class"], sanitize_exception_message(context["root_exception_message"])]
|
|
492
|
+
pairs.reject { |klass, msg| klass.blank? || msg.blank? }.uniq
|
|
493
|
+
end
|
|
494
|
+
end
|
|
495
|
+
|
|
496
|
+
def find_matching_solid_error(candidates:, trace:)
|
|
497
|
+
candidates.each do |exception_class, sanitized_message|
|
|
498
|
+
exact = SolidErrors::Error.where(
|
|
499
|
+
exception_class: exception_class,
|
|
500
|
+
message: sanitized_message
|
|
501
|
+
).order(updated_at: :desc).first
|
|
502
|
+
return exact if exact
|
|
503
|
+
end
|
|
504
|
+
|
|
505
|
+
# Fallback for wrapper exceptions (e.g. ActionView::Template::Error wrapping NameError)
|
|
506
|
+
# where SolidErrors stores the root cause class but message is still exact.
|
|
507
|
+
message_only = candidates.filter_map { |(_, msg)| msg.presence }.uniq
|
|
508
|
+
return nil if message_only.empty?
|
|
509
|
+
|
|
510
|
+
scope = SolidErrors::Error.where(message: message_only)
|
|
511
|
+
if trace.finished_at
|
|
512
|
+
scope = scope.where(updated_at: (trace.started_at - 5.minutes)..(trace.finished_at + 5.minutes))
|
|
513
|
+
end
|
|
514
|
+
scope.order(updated_at: :desc).first
|
|
515
|
+
end
|
|
516
|
+
|
|
517
|
+
def find_solid_error_by_occurrence(trace)
|
|
518
|
+
return unless defined?(SolidErrors::Occurrence)
|
|
519
|
+
return unless trace.started_at && trace.finished_at
|
|
520
|
+
|
|
521
|
+
range = (trace.started_at - 3.seconds)..(trace.finished_at + 3.seconds)
|
|
522
|
+
occurrences = SolidErrors::Occurrence
|
|
523
|
+
.includes(:error)
|
|
524
|
+
.where(created_at: range)
|
|
525
|
+
.order(created_at: :desc)
|
|
526
|
+
.limit(10)
|
|
527
|
+
|
|
528
|
+
controller_name = trace.source.to_s.split("#").first
|
|
529
|
+
scored = occurrences.filter_map do |occurrence|
|
|
530
|
+
next unless occurrence.error
|
|
531
|
+
controller_context = occurrence.context.to_h["controller"].to_s
|
|
532
|
+
next if controller_name.present? && controller_context.present? && !controller_context.include?(controller_name)
|
|
533
|
+
|
|
534
|
+
distance = (occurrence.created_at.to_f - trace.finished_at.to_f).abs
|
|
535
|
+
[distance, occurrence.error]
|
|
536
|
+
end
|
|
537
|
+
|
|
538
|
+
scored.min_by(&:first)&.last
|
|
539
|
+
rescue StandardError
|
|
540
|
+
nil
|
|
541
|
+
end
|
|
542
|
+
|
|
543
|
+
def exception_chain(exception)
|
|
544
|
+
chain = []
|
|
545
|
+
current = exception
|
|
546
|
+
depth = 0
|
|
547
|
+
while current && depth < 8
|
|
548
|
+
chain << current
|
|
549
|
+
current = current.cause if current.respond_to?(:cause)
|
|
550
|
+
depth += 1
|
|
551
|
+
end
|
|
552
|
+
chain
|
|
553
|
+
end
|
|
554
|
+
|
|
555
|
+
def root_cause(exception)
|
|
556
|
+
exception_chain(exception).last || exception
|
|
557
|
+
end
|
|
558
|
+
|
|
559
|
+
def ignored_record_for_linking?(record)
|
|
560
|
+
return true if record.is_a?(SolidEvents::Record)
|
|
561
|
+
return true if SolidEvents.ignore_models.include?(record.class.name)
|
|
562
|
+
return true if SolidEvents.ignore_model_prefixes.any? { |prefix| record.class.name.start_with?(prefix.to_s) }
|
|
563
|
+
return true unless track_state_diff_for_record?(record)
|
|
564
|
+
|
|
565
|
+
false
|
|
566
|
+
end
|
|
567
|
+
|
|
568
|
+
def track_state_diff_for_record?(record)
|
|
569
|
+
type = record.class.name.to_s
|
|
570
|
+
return false if SolidEvents.state_diff_blocklist.include?(type)
|
|
571
|
+
|
|
572
|
+
allowlist = SolidEvents.state_diff_allowlist
|
|
573
|
+
return true if allowlist.empty?
|
|
574
|
+
|
|
575
|
+
allowlist.include?(type)
|
|
576
|
+
end
|
|
577
|
+
|
|
578
|
+
def filtered_state_diff(before_state:, after_state:)
|
|
579
|
+
before_hash = normalize_context(before_state)
|
|
580
|
+
after_hash = normalize_context(after_state)
|
|
581
|
+
ignored_keys = %w[created_at updated_at]
|
|
582
|
+
changed_fields = (before_hash.keys | after_hash.keys).reject { |key| ignored_keys.include?(key) }.select do |key|
|
|
583
|
+
before_hash[key] != after_hash[key]
|
|
584
|
+
end.first(SolidEvents.state_diff_max_changed_fields)
|
|
585
|
+
filtered_before = before_hash.slice(*changed_fields)
|
|
586
|
+
filtered_after = after_hash.slice(*changed_fields)
|
|
587
|
+
[filtered_before, filtered_after, changed_fields]
|
|
588
|
+
end
|
|
589
|
+
|
|
590
|
+
def default_trace_metrics
|
|
591
|
+
{
|
|
592
|
+
"event_count" => 0,
|
|
593
|
+
"sql_count" => 0,
|
|
594
|
+
"sql_duration_ms" => 0.0,
|
|
595
|
+
"event_counts" => {}
|
|
596
|
+
}
|
|
597
|
+
end
|
|
598
|
+
|
|
599
|
+
def aggregate_metrics_for(trace)
|
|
600
|
+
current = SolidEvents::Current.trace
|
|
601
|
+
metrics = SolidEvents::Current.trace_metrics
|
|
602
|
+
if current && current.id == trace.id && metrics.present?
|
|
603
|
+
return {
|
|
604
|
+
event_count: metrics["event_count"].to_i,
|
|
605
|
+
sql_count: metrics["sql_count"].to_i,
|
|
606
|
+
sql_duration_ms: metrics["sql_duration_ms"].to_f.round(2),
|
|
607
|
+
event_counts: metrics["event_counts"].to_h
|
|
608
|
+
}
|
|
609
|
+
end
|
|
610
|
+
|
|
611
|
+
sql_scope = trace.events.where(event_type: "sql")
|
|
612
|
+
{
|
|
613
|
+
event_count: trace.events.count,
|
|
614
|
+
sql_count: sql_scope.count,
|
|
615
|
+
sql_duration_ms: sql_scope.sum(:duration_ms).to_f.round(2),
|
|
616
|
+
event_counts: trace.events.group(:event_type).count
|
|
617
|
+
}
|
|
618
|
+
end
|
|
619
|
+
|
|
620
|
+
def redact_hash(value, path: [])
|
|
621
|
+
case value
|
|
622
|
+
when Hash
|
|
623
|
+
value.each_with_object({}) do |(key, nested), output|
|
|
624
|
+
key_string = key.to_s
|
|
625
|
+
current_path = path + [key_string]
|
|
626
|
+
custom_replacement = redaction_replacement_for_path(current_path)
|
|
627
|
+
if custom_replacement
|
|
628
|
+
output[key_string] = custom_replacement
|
|
629
|
+
elsif sensitive_key?(key_string)
|
|
630
|
+
output[key_string] = SolidEvents.redaction_placeholder
|
|
631
|
+
else
|
|
632
|
+
output[key_string] = redact_hash(nested, path: current_path)
|
|
633
|
+
end
|
|
634
|
+
end
|
|
635
|
+
when Array
|
|
636
|
+
value.map.with_index { |entry, index| redact_hash(entry, path: path + [index.to_s]) }
|
|
637
|
+
else
|
|
638
|
+
value
|
|
639
|
+
end
|
|
640
|
+
end
|
|
641
|
+
|
|
642
|
+
def redaction_replacement_for_path(path_segments)
|
|
643
|
+
replacement = SolidEvents.redaction_paths[path_segments.join(".")]
|
|
644
|
+
return nil if replacement.nil?
|
|
645
|
+
return SolidEvents.redaction_placeholder if replacement == true
|
|
646
|
+
|
|
647
|
+
replacement.to_s
|
|
648
|
+
end
|
|
649
|
+
|
|
650
|
+
def guarded_payload(value, max_bytes:)
|
|
651
|
+
serialized = JSON.generate(value)
|
|
652
|
+
return value if serialized.bytesize <= max_bytes
|
|
653
|
+
|
|
654
|
+
{
|
|
655
|
+
"_truncated" => true,
|
|
656
|
+
"_original_bytes" => serialized.bytesize,
|
|
657
|
+
"_max_bytes" => max_bytes,
|
|
658
|
+
"_value" => SolidEvents.payload_truncation_placeholder
|
|
659
|
+
}
|
|
660
|
+
rescue StandardError
|
|
661
|
+
value
|
|
662
|
+
end
|
|
663
|
+
|
|
664
|
+
def async_causal_key(job_id)
|
|
665
|
+
"solid_events:causal:job:#{job_id}"
|
|
666
|
+
end
|
|
667
|
+
|
|
668
|
+
def sensitive_key?(key)
|
|
669
|
+
SolidEvents.sensitive_keys.any? { |sensitive| key.downcase.include?(sensitive.downcase) }
|
|
670
|
+
end
|
|
671
|
+
end
|
|
672
|
+
end
|
data/lib/solid_events/version.rb
CHANGED