opentrace 0.3.0 → 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +246 -3
- data/lib/opentrace/client.rb +5 -0
- data/lib/opentrace/config.rb +14 -1
- data/lib/opentrace/http_tracker.rb +54 -0
- data/lib/opentrace/middleware.rb +41 -0
- data/lib/opentrace/pool_monitor.rb +59 -0
- data/lib/opentrace/queue_monitor.rb +110 -0
- data/lib/opentrace/rails.rb +197 -4
- data/lib/opentrace/request_collector.rb +141 -0
- data/lib/opentrace/version.rb +1 -1
- data/lib/opentrace.rb +12 -0
- metadata +5 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 6e068caf2607eb830f1a48fbe159ac7f62d45111e26ebcb0928b9ff38bee5a3a
|
|
4
|
+
data.tar.gz: d2756da1df81af0b50af32dd769934fc88f57ea1647a8324f0511b9ba8586121
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: a051a516258c16868c88429cef1ded592f7662f67090c8865695ae63c5a71777d83178a9660440d11991c19139c13f9ee8d039cc792291077866fc6c1fdc21cd
|
|
7
|
+
data.tar.gz: 148e7476274f8f68a881d605f5f151febf6b3e1e86f7bac2c39a5f6d441e1d9eb3dada4ae497762c91040a88b7f29bd6ce366f882059f00513479a72632918e2
|
data/README.md
CHANGED
|
@@ -19,7 +19,7 @@ A thin, safe Ruby client that forwards structured application logs to an [OpenTr
|
|
|
19
19
|
- **Works with any server** -- Puma (threads), Unicorn (forks), Passenger, and Falcon (fibers)
|
|
20
20
|
- **Fork safe** -- detects forked worker processes and re-initializes cleanly
|
|
21
21
|
- **Fiber safe** -- uses `Fiber[]` storage for correct request isolation in fiber-based servers
|
|
22
|
-
- **Rails integration** -- auto-instruments controllers, SQL queries,
|
|
22
|
+
- **Rails integration** -- auto-instruments controllers, SQL queries, ActiveJob, views, cache, and more
|
|
23
23
|
- **Rack middleware** -- propagates `request_id` via fiber-local storage
|
|
24
24
|
- **Logger wrapper** -- drop-in replacement that forwards to OpenTrace while keeping your original logger
|
|
25
25
|
- **Rails 7.1+ BroadcastLogger** -- native support via `broadcast_to`
|
|
@@ -27,9 +27,17 @@ A thin, safe Ruby client that forwards structured application logs to an [OpenTr
|
|
|
27
27
|
- **Context support** -- attach global metadata to every log via Hash or Proc
|
|
28
28
|
- **Level filtering** -- `min_level` config to control which severities are forwarded
|
|
29
29
|
- **Auto-enrichment** -- every log includes `hostname`, `pid`, and `git_sha` automatically
|
|
30
|
-
- **Exception helper** -- `OpenTrace.error` captures class, message, and
|
|
30
|
+
- **Exception helper** -- `OpenTrace.error` captures class, message, cleaned backtrace, and error fingerprint
|
|
31
31
|
- **Runtime controls** -- enable/disable logging at runtime without restarting
|
|
32
32
|
- **Graceful shutdown** -- pending logs are flushed automatically on process exit
|
|
33
|
+
- **N+1 query detection** -- warns when a request exceeds 20 SQL queries
|
|
34
|
+
- **Per-request summary** -- one rich log per request with SQL, view, cache breakdown and timeline
|
|
35
|
+
- **Error fingerprinting** -- stable fingerprint for grouping identical errors across requests
|
|
36
|
+
- **Deprecation tracking** -- captures Rails deprecation warnings with callsite
|
|
37
|
+
- **DB pool monitoring** -- background thread reports connection pool saturation (opt-in)
|
|
38
|
+
- **Job queue depth** -- monitors Sidekiq, GoodJob, or SolidQueue queue sizes (opt-in)
|
|
39
|
+
- **Memory delta tracking** -- snapshots process RSS before/after each request (opt-in)
|
|
40
|
+
- **External HTTP tracking** -- captures outbound Net::HTTP calls with timing (opt-in)
|
|
33
41
|
|
|
34
42
|
## Installation
|
|
35
43
|
|
|
@@ -95,6 +103,24 @@ OpenTrace.configure do |c|
|
|
|
95
103
|
# SQL logging (Rails only)
|
|
96
104
|
c.sql_logging = true # default: true
|
|
97
105
|
c.sql_duration_threshold_ms = 100.0 # only log queries slower than this (default: 0.0 = all)
|
|
106
|
+
|
|
107
|
+
# Path filtering
|
|
108
|
+
c.ignore_paths = ["/health", %r{\A/assets/}] # skip noisy paths (default: [])
|
|
109
|
+
|
|
110
|
+
# Per-request summary (Rails only)
|
|
111
|
+
c.request_summary = true # accumulate events into one rich log (default: true)
|
|
112
|
+
c.timeline = true # include event timeline in summary (default: true)
|
|
113
|
+
c.timeline_max_events = 200 # cap timeline entries (default: 200)
|
|
114
|
+
|
|
115
|
+
# Background monitors (opt-in)
|
|
116
|
+
c.pool_monitoring = false # DB connection pool stats (default: false)
|
|
117
|
+
c.pool_monitoring_interval = 30 # seconds between checks (default: 30)
|
|
118
|
+
c.queue_monitoring = false # job queue depth monitoring (default: false)
|
|
119
|
+
c.queue_monitoring_interval = 60 # seconds between checks (default: 60)
|
|
120
|
+
|
|
121
|
+
# Advanced opt-in features
|
|
122
|
+
c.memory_tracking = false # RSS delta per request (default: false)
|
|
123
|
+
c.http_tracking = false # external HTTP call tracking (default: false)
|
|
98
124
|
end
|
|
99
125
|
```
|
|
100
126
|
|
|
@@ -134,7 +160,7 @@ Pass `trace_id` inside metadata and it will be promoted to a top-level field aut
|
|
|
134
160
|
|
|
135
161
|
### Exception Logging
|
|
136
162
|
|
|
137
|
-
Use `OpenTrace.error` to log exceptions with automatic class, message, and
|
|
163
|
+
Use `OpenTrace.error` to log exceptions with automatic class, message, backtrace, and fingerprint extraction:
|
|
138
164
|
|
|
139
165
|
```ruby
|
|
140
166
|
begin
|
|
@@ -148,6 +174,7 @@ This captures:
|
|
|
148
174
|
- `exception_class` -- the exception class name
|
|
149
175
|
- `exception_message` -- truncated to 500 characters
|
|
150
176
|
- `backtrace` -- cleaned (Rails backtrace cleaner or gem-filtered), limited to 15 frames
|
|
177
|
+
- `error_fingerprint` -- 12-char hash for grouping identical errors (stable across line number changes)
|
|
151
178
|
|
|
152
179
|
### Logger Wrapper
|
|
153
180
|
|
|
@@ -220,6 +247,63 @@ Request IDs are stored using `Fiber[]` (fiber-local storage), which works correc
|
|
|
220
247
|
|
|
221
248
|
All your existing `Rails.logger.info(...)` calls automatically get forwarded to OpenTrace.
|
|
222
249
|
|
|
250
|
+
### Per-Request Summary
|
|
251
|
+
|
|
252
|
+
When `request_summary` is enabled (the default), the gem accumulates all events during a request -- SQL queries, view renders, cache operations, HTTP calls -- into a single rich log entry emitted at request end. This avoids flooding the queue with hundreds of individual events.
|
|
253
|
+
|
|
254
|
+
Example payload:
|
|
255
|
+
|
|
256
|
+
```json
|
|
257
|
+
{
|
|
258
|
+
"level": "INFO",
|
|
259
|
+
"message": "GET /dashboard 200 2847ms",
|
|
260
|
+
"metadata": {
|
|
261
|
+
"request_id": "req-abc123",
|
|
262
|
+
"controller": "DashboardController",
|
|
263
|
+
"action": "index",
|
|
264
|
+
"method": "GET",
|
|
265
|
+
"path": "/dashboard",
|
|
266
|
+
"status": 200,
|
|
267
|
+
"duration_ms": 2847.3,
|
|
268
|
+
|
|
269
|
+
"request_user_agent": "Mozilla/5.0...",
|
|
270
|
+
"request_accept": "text/html",
|
|
271
|
+
|
|
272
|
+
"sql_query_count": 34,
|
|
273
|
+
"sql_total_ms": 423.1,
|
|
274
|
+
"sql_slowest_ms": 312.0,
|
|
275
|
+
"sql_slowest_name": "Order Count",
|
|
276
|
+
"n_plus_one_warning": true,
|
|
277
|
+
|
|
278
|
+
"view_render_count": 48,
|
|
279
|
+
"view_total_ms": 890.2,
|
|
280
|
+
"view_slowest_ms": 245.0,
|
|
281
|
+
"view_slowest_template": "dashboard/_activity_feed.html.erb",
|
|
282
|
+
|
|
283
|
+
"cache_reads": 8,
|
|
284
|
+
"cache_hits": 5,
|
|
285
|
+
"cache_writes": 3,
|
|
286
|
+
"cache_hit_ratio": 0.63,
|
|
287
|
+
|
|
288
|
+
"time_breakdown": {
|
|
289
|
+
"sql_pct": 14.9,
|
|
290
|
+
"view_pct": 31.3,
|
|
291
|
+
"http_pct": 0.0,
|
|
292
|
+
"other_pct": 53.8
|
|
293
|
+
},
|
|
294
|
+
|
|
295
|
+
"timeline": [
|
|
296
|
+
{ "t": "sql", "n": "User Load", "ms": 1.2, "at": 0.0 },
|
|
297
|
+
{ "t": "cache", "a": "read", "hit": true, "ms": 0.1, "at": 6.0 },
|
|
298
|
+
{ "t": "sql", "n": "Order Count", "ms": 312.0, "at": 10.0 },
|
|
299
|
+
{ "t": "view", "n": "dashboard/index.html.erb", "ms": 890.2, "at": 350.0 }
|
|
300
|
+
]
|
|
301
|
+
}
|
|
302
|
+
}
|
|
303
|
+
```
|
|
304
|
+
|
|
305
|
+
The timeline shows a waterfall of events in chronological order. Timeline keys are kept short to minimize payload size: `t` = type, `n` = name, `ms` = duration, `at` = offset from request start, `s` = status, `a` = action.
|
|
306
|
+
|
|
223
307
|
### Controller Subscriber
|
|
224
308
|
|
|
225
309
|
Subscribes to `process_action.action_controller` and captures:
|
|
@@ -238,12 +322,26 @@ Subscribes to `process_action.action_controller` and captures:
|
|
|
238
322
|
| `exception_class` | Exception class (if raised) |
|
|
239
323
|
| `exception_message` | Exception message (if raised) |
|
|
240
324
|
| `backtrace` | Cleaned backtrace (if exception raised) |
|
|
325
|
+
| `error_fingerprint` | 12-char fingerprint for error grouping |
|
|
326
|
+
| `request_content_type` | Request Content-Type header |
|
|
327
|
+
| `request_accept` | Request Accept header |
|
|
328
|
+
| `request_user_agent` | Request User-Agent (truncated to 200 chars) |
|
|
329
|
+
| `request_referer` | Request Referer header |
|
|
330
|
+
| `sql_query_count` | Total SQL queries in this request |
|
|
331
|
+
| `sql_total_ms` | Total SQL time in this request |
|
|
332
|
+
| `n_plus_one_warning` | `true` when query count exceeds 20 |
|
|
333
|
+
|
|
334
|
+
When request summary is enabled, the log also includes view render stats, cache stats, time breakdown, and timeline (see above).
|
|
241
335
|
|
|
242
336
|
Log levels are set automatically:
|
|
243
337
|
- **ERROR** -- exceptions or 5xx status
|
|
244
338
|
- **WARN** -- 4xx status
|
|
245
339
|
- **INFO** -- everything else
|
|
246
340
|
|
|
341
|
+
### N+1 Query Detection
|
|
342
|
+
|
|
343
|
+
Every request tracks the number of SQL queries via a Fiber-local counter. When a request exceeds 20 queries, the log entry includes `n_plus_one_warning: true`. This makes it easy to query OpenTrace for requests with potential N+1 issues.
|
|
344
|
+
|
|
247
345
|
### SQL Query Subscriber
|
|
248
346
|
|
|
249
347
|
Subscribes to `sql.active_record` and logs every query with:
|
|
@@ -280,12 +378,54 @@ Subscribes to `perform.active_job` and logs every job execution with:
|
|
|
280
378
|
| `executions` | Attempt number |
|
|
281
379
|
| `duration_ms` | Execution duration |
|
|
282
380
|
| `job_arguments` | Serialized arguments (truncated to 512 bytes) |
|
|
381
|
+
| `queue_latency_ms` | Time spent waiting in queue before execution |
|
|
382
|
+
| `enqueued_at` | When the job was enqueued |
|
|
283
383
|
| `exception_class` | Exception class (if failed) |
|
|
284
384
|
| `exception_message` | Exception message (if failed) |
|
|
285
385
|
| `backtrace` | Cleaned backtrace (if failed) |
|
|
386
|
+
| `error_fingerprint` | Fingerprint for error grouping (if failed) |
|
|
286
387
|
|
|
287
388
|
Failed jobs are logged as `ERROR`, successful jobs as `INFO`.
|
|
288
389
|
|
|
390
|
+
### Deprecation Warning Subscriber
|
|
391
|
+
|
|
392
|
+
Subscribes to `deprecation.rails` and logs all Rails deprecation warnings as `WARN`:
|
|
393
|
+
|
|
394
|
+
| Field | Description |
|
|
395
|
+
|---|---|
|
|
396
|
+
| `deprecation_message` | The deprecation message (truncated to 500 chars) |
|
|
397
|
+
| `deprecation_callsite` | File and line where the deprecated API was called |
|
|
398
|
+
| `request_id` | Current request ID (if in web context) |
|
|
399
|
+
|
|
400
|
+
### View Render Tracking
|
|
401
|
+
|
|
402
|
+
When request summary is enabled, subscribes to `render_template.action_view` and `render_partial.action_view`. View render events are accumulated in the RequestCollector and included in the per-request summary -- **no individual log entries are emitted** for views.
|
|
403
|
+
|
|
404
|
+
The summary includes:
|
|
405
|
+
- `view_render_count` -- total number of templates/partials rendered
|
|
406
|
+
- `view_total_ms` -- total rendering time
|
|
407
|
+
- `view_slowest_ms` / `view_slowest_template` -- the bottleneck template
|
|
408
|
+
|
|
409
|
+
Template paths are automatically shortened (e.g., `/Users/deploy/app/views/orders/show.html.erb` becomes `orders/show.html.erb`).
|
|
410
|
+
|
|
411
|
+
### Cache Operation Tracking
|
|
412
|
+
|
|
413
|
+
When request summary is enabled, subscribes to `cache_read.active_support`, `cache_write.active_support`, and `cache_delete.active_support`. Like views, cache events are accumulated -- no individual logs.
|
|
414
|
+
|
|
415
|
+
The summary includes:
|
|
416
|
+
- `cache_reads` / `cache_hits` / `cache_writes`
|
|
417
|
+
- `cache_hit_ratio` -- hit rate (0.0 to 1.0)
|
|
418
|
+
|
|
419
|
+
### Error Fingerprinting
|
|
420
|
+
|
|
421
|
+
Every error (in controller requests, job failures, and `OpenTrace.error` calls) includes an `error_fingerprint` -- a 12-character hash derived from the exception class and the first application frame in the backtrace. The fingerprint is:
|
|
422
|
+
|
|
423
|
+
- **Stable across deploys** -- line number changes don't affect it
|
|
424
|
+
- **Same error, same fingerprint** -- different error messages at the same location produce the same fingerprint
|
|
425
|
+
- **Different error, different fingerprint** -- different exception classes or different code locations produce different fingerprints
|
|
426
|
+
|
|
427
|
+
Use it to group and count errors in OpenTrace.
|
|
428
|
+
|
|
289
429
|
### TaggedLogging
|
|
290
430
|
|
|
291
431
|
If your wrapped logger uses `ActiveSupport::TaggedLogging`, tags are preserved and injected into the metadata:
|
|
@@ -297,6 +437,90 @@ Rails.logger.tagged("RequestID-123", "UserID-42") do
|
|
|
297
437
|
end
|
|
298
438
|
```
|
|
299
439
|
|
|
440
|
+
## Background Monitors
|
|
441
|
+
|
|
442
|
+
### DB Connection Pool Monitoring
|
|
443
|
+
|
|
444
|
+
Opt-in background thread that periodically reports ActiveRecord connection pool stats:
|
|
445
|
+
|
|
446
|
+
```ruby
|
|
447
|
+
OpenTrace.configure do |c|
|
|
448
|
+
# ...
|
|
449
|
+
c.pool_monitoring = true
|
|
450
|
+
c.pool_monitoring_interval = 30 # seconds (default: 30)
|
|
451
|
+
end
|
|
452
|
+
```
|
|
453
|
+
|
|
454
|
+
Reports `pool_size`, `connections_busy`, `connections_idle`, `threads_waiting`, and `checkout_timeout`. Logs at `WARN` when threads are waiting for a connection, `DEBUG` otherwise.
|
|
455
|
+
|
|
456
|
+
### Job Queue Depth Monitoring
|
|
457
|
+
|
|
458
|
+
Opt-in background thread that reports job queue sizes. Supports Sidekiq, GoodJob, and SolidQueue (auto-detected):
|
|
459
|
+
|
|
460
|
+
```ruby
|
|
461
|
+
OpenTrace.configure do |c|
|
|
462
|
+
# ...
|
|
463
|
+
c.queue_monitoring = true
|
|
464
|
+
c.queue_monitoring_interval = 60 # seconds (default: 60)
|
|
465
|
+
end
|
|
466
|
+
```
|
|
467
|
+
|
|
468
|
+
Reports per-queue sizes and total enqueued count. Logs at `WARN` when total exceeds 1,000.
|
|
469
|
+
|
|
470
|
+
## Advanced Opt-In Features
|
|
471
|
+
|
|
472
|
+
These features have measurable overhead or implementation risks. **Disabled by default.** Enable them after testing in staging.
|
|
473
|
+
|
|
474
|
+
### Memory Delta Tracking
|
|
475
|
+
|
|
476
|
+
Snapshots process memory (RSS) before and after each request:
|
|
477
|
+
|
|
478
|
+
```ruby
|
|
479
|
+
OpenTrace.configure do |c|
|
|
480
|
+
# ...
|
|
481
|
+
c.memory_tracking = true
|
|
482
|
+
end
|
|
483
|
+
```
|
|
484
|
+
|
|
485
|
+
Adds to the request summary:
|
|
486
|
+
- `memory_before_mb` -- RSS before request
|
|
487
|
+
- `memory_after_mb` -- RSS after request
|
|
488
|
+
- `memory_delta_mb` -- difference (positive = memory grew)
|
|
489
|
+
|
|
490
|
+
Uses `/proc/self/statm` on Linux (~10us) or `GC.stat` approximation on macOS (~5us). The delta is process-level, so concurrent requests will affect accuracy. Most accurate on single-threaded servers (Unicorn).
|
|
491
|
+
|
|
492
|
+
### External HTTP Tracking
|
|
493
|
+
|
|
494
|
+
Instruments outbound `Net::HTTP` calls to capture third-party API performance:
|
|
495
|
+
|
|
496
|
+
```ruby
|
|
497
|
+
OpenTrace.configure do |c|
|
|
498
|
+
# ...
|
|
499
|
+
c.http_tracking = true
|
|
500
|
+
end
|
|
501
|
+
```
|
|
502
|
+
|
|
503
|
+
Adds to the request summary:
|
|
504
|
+
- `http_external_count` -- number of outbound HTTP calls
|
|
505
|
+
- `http_external_total_ms` -- total time in external calls
|
|
506
|
+
- `http_slowest_ms` / `http_slowest_host` -- the bottleneck
|
|
507
|
+
|
|
508
|
+
Each HTTP call appears in the timeline:
|
|
509
|
+
|
|
510
|
+
```json
|
|
511
|
+
{ "t": "http", "n": "POST api.stripe.com", "ms": 184.0, "s": 200, "at": 55.0 }
|
|
512
|
+
```
|
|
513
|
+
|
|
514
|
+
Failed calls include an error type:
|
|
515
|
+
|
|
516
|
+
```json
|
|
517
|
+
{ "t": "http", "n": "POST api.stripe.com", "ms": 5200.0, "s": 0, "err": "Net::ReadTimeout", "at": 55.0 }
|
|
518
|
+
```
|
|
519
|
+
|
|
520
|
+
A recursion guard prevents OpenTrace's own HTTP calls to the server from being tracked. The `time_breakdown` in the request summary includes `http_pct` alongside `sql_pct` and `view_pct`.
|
|
521
|
+
|
|
522
|
+
**Note**: This works by prepending a module to `Net::HTTP`. Libraries that use `Net::HTTP` internally (Faraday, HTTParty, RestClient) are automatically captured.
|
|
523
|
+
|
|
300
524
|
## Runtime Controls
|
|
301
525
|
|
|
302
526
|
```ruby
|
|
@@ -349,6 +573,25 @@ Your App --log()--> [In-Memory Queue] --background thread--> POST /api/logs -->
|
|
|
349
573
|
- The HTTP timeout defaults to 1 second
|
|
350
574
|
- Pending logs are flushed on process exit via an `at_exit` hook
|
|
351
575
|
|
|
576
|
+
### Request Summary Architecture
|
|
577
|
+
|
|
578
|
+
When `request_summary` is enabled, events within a request are **accumulated** in a Fiber-local `RequestCollector` instead of being pushed to the queue individually:
|
|
579
|
+
|
|
580
|
+
```
|
|
581
|
+
Request Start
|
|
582
|
+
Middleware creates RequestCollector in Fiber[]
|
|
583
|
+
SQL events ──► collector.record_sql() (no queue push)
|
|
584
|
+
View events ──► collector.record_view() (no queue push)
|
|
585
|
+
Cache events ──► collector.record_cache() (no queue push)
|
|
586
|
+
HTTP events ──► collector.record_http() (no queue push)
|
|
587
|
+
Request End
|
|
588
|
+
Controller subscriber merges collector.summary() into one log
|
|
589
|
+
One queue push with everything
|
|
590
|
+
Middleware cleans up RequestCollector
|
|
591
|
+
```
|
|
592
|
+
|
|
593
|
+
This means a request with 30 SQL queries, 50 view renders, and 10 cache operations produces **one log entry** instead of 91.
|
|
594
|
+
|
|
352
595
|
## Log Payload Format
|
|
353
596
|
|
|
354
597
|
Each log is sent as a JSON object to `POST /api/logs`:
|
data/lib/opentrace/client.rb
CHANGED
|
@@ -127,6 +127,9 @@ module OpenTrace
|
|
|
127
127
|
end
|
|
128
128
|
|
|
129
129
|
def send_batch(uri, batch)
|
|
130
|
+
# Disable HTTP tracking for our own calls to prevent infinite recursion
|
|
131
|
+
Fiber[:opentrace_http_tracking_disabled] = true
|
|
132
|
+
|
|
130
133
|
# Apply per-payload truncation
|
|
131
134
|
batch = batch.map { |p| fit_payload(p) }.compact
|
|
132
135
|
return if batch.empty?
|
|
@@ -151,6 +154,8 @@ module OpenTrace
|
|
|
151
154
|
http.request(request)
|
|
152
155
|
rescue StandardError
|
|
153
156
|
# Swallow all network errors silently
|
|
157
|
+
ensure
|
|
158
|
+
Fiber[:opentrace_http_tracking_disabled] = nil
|
|
154
159
|
end
|
|
155
160
|
|
|
156
161
|
def build_http(uri)
|
data/lib/opentrace/config.rb
CHANGED
|
@@ -9,7 +9,11 @@ module OpenTrace
|
|
|
9
9
|
:context, :min_level, :hostname, :pid, :git_sha,
|
|
10
10
|
:batch_size, :flush_interval,
|
|
11
11
|
:sql_logging, :sql_duration_threshold_ms,
|
|
12
|
-
:ignore_paths
|
|
12
|
+
:ignore_paths,
|
|
13
|
+
:pool_monitoring, :pool_monitoring_interval,
|
|
14
|
+
:queue_monitoring, :queue_monitoring_interval,
|
|
15
|
+
:request_summary, :timeline, :timeline_max_events,
|
|
16
|
+
:memory_tracking, :http_tracking
|
|
13
17
|
|
|
14
18
|
def initialize
|
|
15
19
|
@endpoint = nil
|
|
@@ -28,6 +32,15 @@ module OpenTrace
|
|
|
28
32
|
@sql_logging = true
|
|
29
33
|
@sql_duration_threshold_ms = 0.0
|
|
30
34
|
@ignore_paths = []
|
|
35
|
+
@pool_monitoring = false
|
|
36
|
+
@pool_monitoring_interval = 30
|
|
37
|
+
@queue_monitoring = false
|
|
38
|
+
@queue_monitoring_interval = 60
|
|
39
|
+
@request_summary = true
|
|
40
|
+
@timeline = true
|
|
41
|
+
@timeline_max_events = 200
|
|
42
|
+
@memory_tracking = false
|
|
43
|
+
@http_tracking = false
|
|
31
44
|
end
|
|
32
45
|
|
|
33
46
|
def valid?
|
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "net/http"
|
|
4
|
+
|
|
5
|
+
module OpenTrace
|
|
6
|
+
module HttpTracker
|
|
7
|
+
def request(req, body = nil, &block)
|
|
8
|
+
# Guard 1: skip if disabled
|
|
9
|
+
return super unless OpenTrace.enabled?
|
|
10
|
+
|
|
11
|
+
# Guard 2: skip if this IS an OpenTrace dispatch call (prevent infinite recursion)
|
|
12
|
+
return super if Fiber[:opentrace_http_tracking_disabled]
|
|
13
|
+
|
|
14
|
+
collector = Fiber[:opentrace_collector]
|
|
15
|
+
start_time = Process.clock_gettime(Process::CLOCK_MONOTONIC)
|
|
16
|
+
|
|
17
|
+
response = super
|
|
18
|
+
|
|
19
|
+
duration_ms = (Process.clock_gettime(Process::CLOCK_MONOTONIC) - start_time) * 1000
|
|
20
|
+
host = address
|
|
21
|
+
port_str = (port == 443 || port == 80) ? "" : ":#{port}"
|
|
22
|
+
scheme = use_ssl? ? "https" : "http"
|
|
23
|
+
url = "#{scheme}://#{host}#{port_str}#{req.path}"
|
|
24
|
+
|
|
25
|
+
if collector
|
|
26
|
+
collector.record_http(
|
|
27
|
+
method: req.method,
|
|
28
|
+
url: url,
|
|
29
|
+
host: host,
|
|
30
|
+
status: response.code.to_i,
|
|
31
|
+
duration_ms: duration_ms
|
|
32
|
+
)
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
response
|
|
36
|
+
rescue IOError, SystemCallError, OpenSSL::SSL::SSLError, Timeout::Error, Net::ProtocolError => e
|
|
37
|
+
# Record the failed HTTP call, then re-raise
|
|
38
|
+
duration_ms = start_time ? (Process.clock_gettime(Process::CLOCK_MONOTONIC) - start_time) * 1000 : 0
|
|
39
|
+
|
|
40
|
+
if collector
|
|
41
|
+
collector.record_http(
|
|
42
|
+
method: req&.method,
|
|
43
|
+
url: "#{address}#{req&.path}",
|
|
44
|
+
host: address,
|
|
45
|
+
status: 0,
|
|
46
|
+
duration_ms: duration_ms,
|
|
47
|
+
error: e.class.name
|
|
48
|
+
)
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
raise # ALWAYS re-raise — never swallow app errors
|
|
52
|
+
end
|
|
53
|
+
end
|
|
54
|
+
end
|
data/lib/opentrace/middleware.rb
CHANGED
|
@@ -9,10 +9,51 @@ module OpenTrace
|
|
|
9
9
|
def call(env)
|
|
10
10
|
request_id = env["action_dispatch.request_id"] || env["HTTP_X_REQUEST_ID"]
|
|
11
11
|
OpenTrace.current_request_id = request_id
|
|
12
|
+
Fiber[:opentrace_sql_count] = 0
|
|
13
|
+
Fiber[:opentrace_sql_total_ms] = 0.0
|
|
14
|
+
|
|
15
|
+
# Create RequestCollector for accumulate-and-summarize pattern
|
|
16
|
+
if OpenTrace.enabled? && OpenTrace.config.request_summary
|
|
17
|
+
require_relative "request_collector"
|
|
18
|
+
collector = OpenTrace::RequestCollector.new(
|
|
19
|
+
max_timeline: OpenTrace.config.timeline_max_events
|
|
20
|
+
)
|
|
21
|
+
Fiber[:opentrace_collector] = collector
|
|
22
|
+
|
|
23
|
+
# Memory snapshot before request (opt-in)
|
|
24
|
+
if OpenTrace.config.memory_tracking
|
|
25
|
+
collector.memory_before = current_rss_mb
|
|
26
|
+
end
|
|
27
|
+
end
|
|
12
28
|
|
|
13
29
|
@app.call(env)
|
|
14
30
|
ensure
|
|
31
|
+
# Memory snapshot after request (opt-in)
|
|
32
|
+
collector = Fiber[:opentrace_collector]
|
|
33
|
+
if collector && OpenTrace.config.memory_tracking && collector.memory_before
|
|
34
|
+
collector.memory_after = current_rss_mb
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
Fiber[:opentrace_collector] = nil
|
|
38
|
+
Fiber[:opentrace_sql_count] = nil
|
|
39
|
+
Fiber[:opentrace_sql_total_ms] = nil
|
|
15
40
|
OpenTrace.current_request_id = nil
|
|
16
41
|
end
|
|
42
|
+
|
|
43
|
+
private
|
|
44
|
+
|
|
45
|
+
def current_rss_mb
|
|
46
|
+
if RUBY_PLATFORM.include?("linux")
|
|
47
|
+
# Linux: read from /proc — no fork, ~10μs
|
|
48
|
+
File.read("/proc/self/statm").split[1].to_i * 4096.0 / 1024 / 1024
|
|
49
|
+
else
|
|
50
|
+
# macOS/other: use GC.stat as lightweight approximation
|
|
51
|
+
# Avoids forking a `ps` subprocess which costs 2-5ms
|
|
52
|
+
gc = GC.stat
|
|
53
|
+
gc[:heap_live_slots].to_f * 40 / 1024 / 1024 # rough estimate: ~40 bytes per slot
|
|
54
|
+
end
|
|
55
|
+
rescue StandardError
|
|
56
|
+
nil
|
|
57
|
+
end
|
|
17
58
|
end
|
|
18
59
|
end
|
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module OpenTrace
|
|
4
|
+
class PoolMonitor
|
|
5
|
+
DEFAULT_INTERVAL = 30 # seconds
|
|
6
|
+
|
|
7
|
+
def initialize(interval: DEFAULT_INTERVAL)
|
|
8
|
+
@interval = interval
|
|
9
|
+
@thread = nil
|
|
10
|
+
@running = false
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
def start
|
|
14
|
+
return if @running
|
|
15
|
+
@running = true
|
|
16
|
+
|
|
17
|
+
@thread = Thread.new do
|
|
18
|
+
Thread.current.report_on_exception = false
|
|
19
|
+
loop do
|
|
20
|
+
sleep @interval
|
|
21
|
+
break unless @running
|
|
22
|
+
report_pool_stats
|
|
23
|
+
rescue Exception # rubocop:disable Lint/RescueException
|
|
24
|
+
# Swallow — never crash the host app
|
|
25
|
+
end
|
|
26
|
+
end
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
def stop
|
|
30
|
+
@running = false
|
|
31
|
+
@thread&.join(2)
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
private
|
|
35
|
+
|
|
36
|
+
def report_pool_stats
|
|
37
|
+
return unless OpenTrace.enabled?
|
|
38
|
+
return unless defined?(::ActiveRecord::Base)
|
|
39
|
+
|
|
40
|
+
pool = ActiveRecord::Base.connection_pool
|
|
41
|
+
stat = pool.stat
|
|
42
|
+
|
|
43
|
+
metadata = {
|
|
44
|
+
metric_type: "db_pool",
|
|
45
|
+
pool_size: stat[:size],
|
|
46
|
+
connections_busy: stat[:busy],
|
|
47
|
+
connections_dead: stat[:dead],
|
|
48
|
+
connections_idle: stat[:idle],
|
|
49
|
+
threads_waiting: stat[:waiting],
|
|
50
|
+
checkout_timeout: stat[:checkout_timeout]
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
level = stat[:waiting].to_i > 0 ? "WARN" : "DEBUG"
|
|
54
|
+
message = "DB pool: #{stat[:busy]}/#{stat[:size]} busy, #{stat[:waiting]} waiting"
|
|
55
|
+
|
|
56
|
+
OpenTrace.log(level, message, metadata)
|
|
57
|
+
end
|
|
58
|
+
end
|
|
59
|
+
end
|
|
@@ -0,0 +1,110 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module OpenTrace
|
|
4
|
+
class QueueMonitor
|
|
5
|
+
DEFAULT_INTERVAL = 60 # seconds
|
|
6
|
+
|
|
7
|
+
def initialize(interval: DEFAULT_INTERVAL)
|
|
8
|
+
@interval = interval
|
|
9
|
+
@thread = nil
|
|
10
|
+
@running = false
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
def start
|
|
14
|
+
return if @running
|
|
15
|
+
@running = true
|
|
16
|
+
|
|
17
|
+
@thread = Thread.new do
|
|
18
|
+
Thread.current.report_on_exception = false
|
|
19
|
+
loop do
|
|
20
|
+
sleep @interval
|
|
21
|
+
break unless @running
|
|
22
|
+
report_queue_stats
|
|
23
|
+
rescue Exception # rubocop:disable Lint/RescueException
|
|
24
|
+
# Swallow
|
|
25
|
+
end
|
|
26
|
+
end
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
def stop
|
|
30
|
+
@running = false
|
|
31
|
+
@thread&.join(2)
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
private
|
|
35
|
+
|
|
36
|
+
def report_queue_stats
|
|
37
|
+
return unless OpenTrace.enabled?
|
|
38
|
+
|
|
39
|
+
queues = collect_queue_data
|
|
40
|
+
return if queues.nil? || queues.empty?
|
|
41
|
+
|
|
42
|
+
total_enqueued = queues.values.sum { |q| q[:size] }
|
|
43
|
+
|
|
44
|
+
metadata = {
|
|
45
|
+
metric_type: "queue_depth",
|
|
46
|
+
queues: queues,
|
|
47
|
+
total_enqueued: total_enqueued,
|
|
48
|
+
adapter: detect_adapter
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
level = total_enqueued > 1000 ? "WARN" : "INFO"
|
|
52
|
+
summary = queues.map { |name, data| "#{name}=#{data[:size]}" }.join(", ")
|
|
53
|
+
message = "Queue stats: #{summary}"
|
|
54
|
+
|
|
55
|
+
OpenTrace.log(level, message, metadata)
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
def detect_adapter
|
|
59
|
+
if defined?(::Sidekiq::Queue)
|
|
60
|
+
"sidekiq"
|
|
61
|
+
elsif defined?(::GoodJob::Job)
|
|
62
|
+
"good_job"
|
|
63
|
+
elsif defined?(::SolidQueue::ReadyExecution)
|
|
64
|
+
"solid_queue"
|
|
65
|
+
end
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
def collect_queue_data
|
|
69
|
+
case detect_adapter
|
|
70
|
+
when "sidekiq" then sidekiq_stats
|
|
71
|
+
when "good_job" then good_job_stats
|
|
72
|
+
when "solid_queue" then solid_queue_stats
|
|
73
|
+
end
|
|
74
|
+
rescue StandardError
|
|
75
|
+
nil
|
|
76
|
+
end
|
|
77
|
+
|
|
78
|
+
def sidekiq_stats
|
|
79
|
+
queues = {}
|
|
80
|
+
Sidekiq::Queue.all.each do |queue|
|
|
81
|
+
queues[queue.name] = {
|
|
82
|
+
size: queue.size,
|
|
83
|
+
latency_ms: (queue.latency * 1000).round(1)
|
|
84
|
+
}
|
|
85
|
+
end
|
|
86
|
+
queues
|
|
87
|
+
end
|
|
88
|
+
|
|
89
|
+
def good_job_stats
|
|
90
|
+
queues = {}
|
|
91
|
+
GoodJob::Job.where(finished_at: nil)
|
|
92
|
+
.group(:queue_name)
|
|
93
|
+
.count
|
|
94
|
+
.each do |name, count|
|
|
95
|
+
queues[name] = { size: count }
|
|
96
|
+
end
|
|
97
|
+
queues
|
|
98
|
+
end
|
|
99
|
+
|
|
100
|
+
def solid_queue_stats
|
|
101
|
+
queues = {}
|
|
102
|
+
SolidQueue::ReadyExecution.group(:queue_name)
|
|
103
|
+
.count
|
|
104
|
+
.each do |name, count|
|
|
105
|
+
queues[name] = { size: count }
|
|
106
|
+
end
|
|
107
|
+
queues
|
|
108
|
+
end
|
|
109
|
+
end
|
|
110
|
+
end
|
data/lib/opentrace/rails.rb
CHANGED
|
@@ -34,14 +34,59 @@ if defined?(::Rails::Railtie)
|
|
|
34
34
|
# Swallow - never affect the host app
|
|
35
35
|
end
|
|
36
36
|
|
|
37
|
-
# Subscribe to SQL query notifications
|
|
37
|
+
# Subscribe to SQL query notifications (also increments N+1 counter and feeds collector)
|
|
38
38
|
if OpenTrace.config.sql_logging
|
|
39
39
|
ActiveSupport::Notifications.subscribe("sql.active_record") do |*args|
|
|
40
40
|
event = ActiveSupport::Notifications::Event.new(*args)
|
|
41
|
+
|
|
42
|
+
# Increment per-request SQL counter (Fiber-local, zero-cost)
|
|
43
|
+
if Fiber[:opentrace_sql_count]
|
|
44
|
+
Fiber[:opentrace_sql_count] += 1
|
|
45
|
+
Fiber[:opentrace_sql_total_ms] = (Fiber[:opentrace_sql_total_ms] || 0.0) + (event.duration || 0.0)
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
# Feed RequestCollector for timeline & summary
|
|
49
|
+
collector = Fiber[:opentrace_collector]
|
|
50
|
+
if collector
|
|
51
|
+
payload = event.payload
|
|
52
|
+
unless payload[:name] == "SCHEMA"
|
|
53
|
+
table = nil
|
|
54
|
+
if payload[:sql] =~ /\b(?:FROM|INTO|UPDATE|JOIN)\s+[`"]?(\w+)[`"]?/i
|
|
55
|
+
table = $1
|
|
56
|
+
end
|
|
57
|
+
collector.record_sql(name: payload[:name], duration_ms: event.duration || 0.0, table: table)
|
|
58
|
+
end
|
|
59
|
+
end
|
|
60
|
+
|
|
41
61
|
forward_sql_log(event)
|
|
42
62
|
rescue StandardError
|
|
43
63
|
# Swallow
|
|
44
64
|
end
|
|
65
|
+
else
|
|
66
|
+
# Even when sql_logging is off, still count queries for N+1 detection and feed collector
|
|
67
|
+
ActiveSupport::Notifications.subscribe("sql.active_record") do |*args|
|
|
68
|
+
event = ActiveSupport::Notifications::Event.new(*args)
|
|
69
|
+
|
|
70
|
+
if Fiber[:opentrace_sql_count]
|
|
71
|
+
Fiber[:opentrace_sql_count] += 1
|
|
72
|
+
Fiber[:opentrace_sql_total_ms] = (Fiber[:opentrace_sql_total_ms] || 0.0) + (event.duration || 0.0)
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
# Feed RequestCollector for timeline & summary
|
|
76
|
+
collector = Fiber[:opentrace_collector]
|
|
77
|
+
if collector
|
|
78
|
+
payload = event.payload
|
|
79
|
+
unless payload[:name] == "SCHEMA"
|
|
80
|
+
table = nil
|
|
81
|
+
if payload[:sql] =~ /\b(?:FROM|INTO|UPDATE|JOIN)\s+[`"]?(\w+)[`"]?/i
|
|
82
|
+
table = $1
|
|
83
|
+
end
|
|
84
|
+
collector.record_sql(name: payload[:name], duration_ms: event.duration || 0.0, table: table)
|
|
85
|
+
end
|
|
86
|
+
end
|
|
87
|
+
rescue StandardError
|
|
88
|
+
# Swallow
|
|
89
|
+
end
|
|
45
90
|
end
|
|
46
91
|
|
|
47
92
|
# Subscribe to ActiveJob notifications
|
|
@@ -51,6 +96,73 @@ if defined?(::Rails::Railtie)
|
|
|
51
96
|
rescue StandardError
|
|
52
97
|
# Swallow
|
|
53
98
|
end
|
|
99
|
+
|
|
100
|
+
# Subscribe to deprecation warnings
|
|
101
|
+
ActiveSupport::Notifications.subscribe("deprecation.rails") do |*args|
|
|
102
|
+
event = ActiveSupport::Notifications::Event.new(*args)
|
|
103
|
+
forward_deprecation_log(event)
|
|
104
|
+
rescue StandardError
|
|
105
|
+
# Swallow
|
|
106
|
+
end
|
|
107
|
+
|
|
108
|
+
# View render tracking (only records when RequestCollector exists)
|
|
109
|
+
%w[render_template.action_view render_partial.action_view].each do |event_name|
|
|
110
|
+
ActiveSupport::Notifications.subscribe(event_name) do |*args|
|
|
111
|
+
collector = Fiber[:opentrace_collector]
|
|
112
|
+
next unless collector
|
|
113
|
+
|
|
114
|
+
event = ActiveSupport::Notifications::Event.new(*args)
|
|
115
|
+
template = event.payload[:identifier]
|
|
116
|
+
# Shorten: /Users/deploy/app/views/orders/show.html.erb → orders/show.html.erb
|
|
117
|
+
template = template.split("views/").last if template&.include?("views/")
|
|
118
|
+
|
|
119
|
+
collector.record_view(template: template, duration_ms: event.duration || 0.0)
|
|
120
|
+
rescue StandardError
|
|
121
|
+
# Swallow
|
|
122
|
+
end
|
|
123
|
+
end
|
|
124
|
+
|
|
125
|
+
# Cache operation tracking (only records when RequestCollector exists)
|
|
126
|
+
%w[cache_read.active_support cache_write.active_support cache_delete.active_support].each do |event_name|
|
|
127
|
+
ActiveSupport::Notifications.subscribe(event_name) do |*args|
|
|
128
|
+
collector = Fiber[:opentrace_collector]
|
|
129
|
+
next unless collector
|
|
130
|
+
|
|
131
|
+
event = ActiveSupport::Notifications::Event.new(*args)
|
|
132
|
+
action = event_name.split(".").first.sub("cache_", "").to_sym # :read, :write, :delete
|
|
133
|
+
|
|
134
|
+
collector.record_cache(
|
|
135
|
+
action: action,
|
|
136
|
+
hit: event.payload[:hit],
|
|
137
|
+
duration_ms: event.duration || 0.0
|
|
138
|
+
)
|
|
139
|
+
rescue StandardError
|
|
140
|
+
# Swallow
|
|
141
|
+
end
|
|
142
|
+
end
|
|
143
|
+
|
|
144
|
+
# External HTTP tracking (opt-in, prepends Net::HTTP)
|
|
145
|
+
if OpenTrace.config.http_tracking
|
|
146
|
+
require_relative "http_tracker"
|
|
147
|
+
Net::HTTP.prepend(OpenTrace::HttpTracker)
|
|
148
|
+
end
|
|
149
|
+
|
|
150
|
+
# Start background monitors (opt-in)
|
|
151
|
+
if OpenTrace.config.pool_monitoring
|
|
152
|
+
require_relative "pool_monitor"
|
|
153
|
+
@pool_monitor = OpenTrace::PoolMonitor.new(
|
|
154
|
+
interval: OpenTrace.config.pool_monitoring_interval
|
|
155
|
+
)
|
|
156
|
+
@pool_monitor.start
|
|
157
|
+
end
|
|
158
|
+
|
|
159
|
+
if OpenTrace.config.queue_monitoring
|
|
160
|
+
require_relative "queue_monitor"
|
|
161
|
+
@queue_monitor = OpenTrace::QueueMonitor.new(
|
|
162
|
+
interval: OpenTrace.config.queue_monitoring_interval
|
|
163
|
+
)
|
|
164
|
+
@queue_monitor.start
|
|
165
|
+
end
|
|
54
166
|
end
|
|
55
167
|
|
|
56
168
|
class << self
|
|
@@ -75,7 +187,7 @@ if defined?(::Rails::Railtie)
|
|
|
75
187
|
user_id = extract_user_id(payload)
|
|
76
188
|
metadata[:user_id] = user_id if user_id
|
|
77
189
|
|
|
78
|
-
# Exception auto-capture
|
|
190
|
+
# Exception auto-capture with fingerprinting
|
|
79
191
|
if payload[:exception]
|
|
80
192
|
metadata[:exception_class] = payload[:exception][0]
|
|
81
193
|
metadata[:exception_message] = truncate(payload[:exception][1], 500)
|
|
@@ -84,11 +196,42 @@ if defined?(::Rails::Railtie)
|
|
|
84
196
|
if payload[:exception_object]&.backtrace
|
|
85
197
|
cleaned = clean_backtrace(payload[:exception_object].backtrace)
|
|
86
198
|
metadata[:backtrace] = cleaned.first(15)
|
|
199
|
+
metadata[:error_fingerprint] = OpenTrace.send(:compute_error_fingerprint,
|
|
200
|
+
payload[:exception][0], cleaned)
|
|
87
201
|
end
|
|
88
202
|
|
|
89
203
|
# Filtered request params
|
|
90
204
|
extract_params(payload, metadata)
|
|
91
205
|
|
|
206
|
+
# Request headers
|
|
207
|
+
extract_request_headers(payload, metadata)
|
|
208
|
+
|
|
209
|
+
# Merge collector summary (Phase 2) or fall back to Fiber-local counters (Phase 1)
|
|
210
|
+
collector = Fiber[:opentrace_collector]
|
|
211
|
+
if collector
|
|
212
|
+
metadata.merge!(collector.summary)
|
|
213
|
+
|
|
214
|
+
# Compute time breakdown
|
|
215
|
+
total = event.duration || 0.0
|
|
216
|
+
if total > 0
|
|
217
|
+
sql_pct = [((collector.sql_total_ms / total) * 100).round(1), 100.0].min
|
|
218
|
+
view_pct = [((collector.view_total_ms / total) * 100).round(1), 100.0].min
|
|
219
|
+
http_pct = collector.http_count > 0 ? [((collector.http_total_ms / total) * 100).round(1), 100.0].min : 0.0
|
|
220
|
+
other_pct = [100 - sql_pct - view_pct - http_pct, 0].max.round(1)
|
|
221
|
+
metadata[:time_breakdown] = {
|
|
222
|
+
sql_pct: sql_pct,
|
|
223
|
+
view_pct: view_pct,
|
|
224
|
+
http_pct: http_pct,
|
|
225
|
+
other_pct: other_pct
|
|
226
|
+
}
|
|
227
|
+
end
|
|
228
|
+
elsif Fiber[:opentrace_sql_count]
|
|
229
|
+
# Fallback: Phase 1 N+1 counter from Fiber-locals
|
|
230
|
+
metadata[:sql_query_count] = Fiber[:opentrace_sql_count]
|
|
231
|
+
metadata[:sql_total_ms] = Fiber[:opentrace_sql_total_ms]&.round(1)
|
|
232
|
+
metadata[:n_plus_one_warning] = true if Fiber[:opentrace_sql_count] > 20
|
|
233
|
+
end
|
|
234
|
+
|
|
92
235
|
level = if payload[:exception]
|
|
93
236
|
"ERROR"
|
|
94
237
|
elsif payload[:status].to_i >= 500
|
|
@@ -119,6 +262,19 @@ if defined?(::Rails::Railtie)
|
|
|
119
262
|
duration_ms: event.duration&.round(1)
|
|
120
263
|
}.compact
|
|
121
264
|
|
|
265
|
+
# Queue latency calculation
|
|
266
|
+
if job.respond_to?(:enqueued_at) && job.enqueued_at
|
|
267
|
+
enqueued_at = case job.enqueued_at
|
|
268
|
+
when Time then job.enqueued_at
|
|
269
|
+
when String then Time.parse(job.enqueued_at)
|
|
270
|
+
end
|
|
271
|
+
if enqueued_at
|
|
272
|
+
queue_latency_s = Time.now.utc - enqueued_at.utc
|
|
273
|
+
metadata[:queue_latency_ms] = (queue_latency_s * 1000).round(1) if queue_latency_s > 0
|
|
274
|
+
metadata[:enqueued_at] = enqueued_at.utc.strftime("%Y-%m-%dT%H:%M:%S.%6NZ")
|
|
275
|
+
end
|
|
276
|
+
end
|
|
277
|
+
|
|
122
278
|
# Capture arguments (truncated)
|
|
123
279
|
if job.respond_to?(:arguments)
|
|
124
280
|
args_json = JSON.generate(job.arguments)
|
|
@@ -129,12 +285,15 @@ if defined?(::Rails::Railtie)
|
|
|
129
285
|
end
|
|
130
286
|
end
|
|
131
287
|
|
|
132
|
-
# Capture exceptions from job failures
|
|
288
|
+
# Capture exceptions from job failures with fingerprinting
|
|
133
289
|
if payload[:exception_object]
|
|
134
290
|
metadata[:exception_class] = payload[:exception_object].class.name
|
|
135
291
|
metadata[:exception_message] = truncate(payload[:exception_object].message, 500)
|
|
136
292
|
if payload[:exception_object].backtrace
|
|
137
|
-
|
|
293
|
+
cleaned = clean_backtrace(payload[:exception_object].backtrace)
|
|
294
|
+
metadata[:backtrace] = cleaned.first(15)
|
|
295
|
+
metadata[:error_fingerprint] = OpenTrace.send(:compute_error_fingerprint,
|
|
296
|
+
payload[:exception_object].class.name, cleaned)
|
|
138
297
|
end
|
|
139
298
|
end
|
|
140
299
|
|
|
@@ -183,6 +342,40 @@ if defined?(::Rails::Railtie)
|
|
|
183
342
|
# Swallow
|
|
184
343
|
end
|
|
185
344
|
|
|
345
|
+
def forward_deprecation_log(event)
|
|
346
|
+
return unless OpenTrace.enabled?
|
|
347
|
+
|
|
348
|
+
payload = event.payload
|
|
349
|
+
message = payload[:message].to_s
|
|
350
|
+
callsite = payload[:callstack]&.first&.to_s
|
|
351
|
+
|
|
352
|
+
metadata = {
|
|
353
|
+
deprecation_message: truncate(message, 500),
|
|
354
|
+
deprecation_callsite: callsite
|
|
355
|
+
}.compact
|
|
356
|
+
|
|
357
|
+
metadata[:request_id] = OpenTrace.current_request_id if OpenTrace.current_request_id
|
|
358
|
+
|
|
359
|
+
OpenTrace.log("WARN", "DEPRECATION: #{truncate(message, 200)}", metadata)
|
|
360
|
+
rescue StandardError
|
|
361
|
+
# Swallow
|
|
362
|
+
end
|
|
363
|
+
|
|
364
|
+
def extract_request_headers(payload, metadata)
|
|
365
|
+
return unless payload[:headers]&.respond_to?(:env)
|
|
366
|
+
|
|
367
|
+
env = payload[:headers].env
|
|
368
|
+
headers = {
|
|
369
|
+
request_content_type: env["CONTENT_TYPE"],
|
|
370
|
+
request_accept: env["HTTP_ACCEPT"],
|
|
371
|
+
request_user_agent: truncate(env["HTTP_USER_AGENT"], 200),
|
|
372
|
+
request_referer: env["HTTP_REFERER"]
|
|
373
|
+
}.compact
|
|
374
|
+
metadata.merge!(headers) unless headers.empty?
|
|
375
|
+
rescue StandardError
|
|
376
|
+
# Swallow
|
|
377
|
+
end
|
|
378
|
+
|
|
186
379
|
def extract_user_id(payload)
|
|
187
380
|
controller = payload[:controller_instance]
|
|
188
381
|
return unless controller
|
|
@@ -0,0 +1,141 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module OpenTrace
|
|
4
|
+
class RequestCollector
|
|
5
|
+
MAX_TIMELINE_EVENTS = 200
|
|
6
|
+
|
|
7
|
+
attr_reader :sql_count, :sql_total_ms,
|
|
8
|
+
:view_count, :view_total_ms,
|
|
9
|
+
:cache_reads, :cache_hits, :cache_writes,
|
|
10
|
+
:http_count, :http_total_ms
|
|
11
|
+
attr_accessor :memory_before, :memory_after
|
|
12
|
+
|
|
13
|
+
def initialize(max_timeline: MAX_TIMELINE_EVENTS)
|
|
14
|
+
@max_timeline = max_timeline
|
|
15
|
+
|
|
16
|
+
@sql_count = 0
|
|
17
|
+
@sql_total_ms = 0.0
|
|
18
|
+
@sql_slowest_ms = 0.0
|
|
19
|
+
@sql_slowest_name = nil
|
|
20
|
+
|
|
21
|
+
@view_count = 0
|
|
22
|
+
@view_total_ms = 0.0
|
|
23
|
+
@view_slowest_ms = 0.0
|
|
24
|
+
@view_slowest_template = nil
|
|
25
|
+
|
|
26
|
+
@cache_reads = 0
|
|
27
|
+
@cache_hits = 0
|
|
28
|
+
@cache_writes = 0
|
|
29
|
+
@cache_deletes = 0
|
|
30
|
+
|
|
31
|
+
@http_count = 0
|
|
32
|
+
@http_total_ms = 0.0
|
|
33
|
+
@http_slowest_ms = 0.0
|
|
34
|
+
@http_slowest_host = nil
|
|
35
|
+
|
|
36
|
+
@memory_before = nil
|
|
37
|
+
@memory_after = nil
|
|
38
|
+
|
|
39
|
+
@timeline = []
|
|
40
|
+
@request_start = Process.clock_gettime(Process::CLOCK_MONOTONIC)
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
def record_sql(name:, duration_ms:, table: nil)
|
|
44
|
+
@sql_count += 1
|
|
45
|
+
@sql_total_ms += duration_ms
|
|
46
|
+
|
|
47
|
+
if duration_ms > @sql_slowest_ms
|
|
48
|
+
@sql_slowest_ms = duration_ms
|
|
49
|
+
@sql_slowest_name = name
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
append_timeline({ t: :sql, n: name, ms: duration_ms.round(1), at: offset_ms })
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
def record_view(template:, duration_ms:)
|
|
56
|
+
@view_count += 1
|
|
57
|
+
@view_total_ms += duration_ms
|
|
58
|
+
|
|
59
|
+
if duration_ms > @view_slowest_ms
|
|
60
|
+
@view_slowest_ms = duration_ms
|
|
61
|
+
@view_slowest_template = template
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
append_timeline({ t: :view, n: template, ms: duration_ms.round(1), at: offset_ms })
|
|
65
|
+
end
|
|
66
|
+
|
|
67
|
+
def record_cache(action:, hit: nil, duration_ms: 0.0)
|
|
68
|
+
case action
|
|
69
|
+
when :read
|
|
70
|
+
@cache_reads += 1
|
|
71
|
+
@cache_hits += 1 if hit
|
|
72
|
+
when :write
|
|
73
|
+
@cache_writes += 1
|
|
74
|
+
when :delete
|
|
75
|
+
@cache_deletes += 1
|
|
76
|
+
end
|
|
77
|
+
|
|
78
|
+
append_timeline({ t: :cache, a: action, hit: hit, ms: duration_ms.round(2), at: offset_ms })
|
|
79
|
+
end
|
|
80
|
+
|
|
81
|
+
def record_http(method:, url:, host:, status:, duration_ms:, error: nil)
|
|
82
|
+
@http_count += 1
|
|
83
|
+
@http_total_ms += duration_ms
|
|
84
|
+
|
|
85
|
+
if duration_ms > @http_slowest_ms
|
|
86
|
+
@http_slowest_ms = duration_ms
|
|
87
|
+
@http_slowest_host = host
|
|
88
|
+
end
|
|
89
|
+
|
|
90
|
+
entry = { t: :http, n: "#{method} #{host}", ms: duration_ms.round(1), s: status, at: offset_ms }
|
|
91
|
+
entry[:err] = error if error
|
|
92
|
+
append_timeline(entry)
|
|
93
|
+
end
|
|
94
|
+
|
|
95
|
+
def summary
|
|
96
|
+
result = {
|
|
97
|
+
sql_query_count: @sql_count,
|
|
98
|
+
sql_total_ms: @sql_total_ms.round(1),
|
|
99
|
+
sql_slowest_ms: @sql_slowest_ms.round(1),
|
|
100
|
+
sql_slowest_name: @sql_slowest_name,
|
|
101
|
+
view_render_count: @view_count,
|
|
102
|
+
view_total_ms: @view_total_ms.round(1),
|
|
103
|
+
view_slowest_ms: @view_slowest_ms.round(1),
|
|
104
|
+
view_slowest_template: @view_slowest_template,
|
|
105
|
+
cache_reads: @cache_reads,
|
|
106
|
+
cache_hits: @cache_hits,
|
|
107
|
+
cache_writes: @cache_writes,
|
|
108
|
+
cache_hit_ratio: @cache_reads > 0 ? (@cache_hits.to_f / @cache_reads).round(2) : nil,
|
|
109
|
+
n_plus_one_warning: @sql_count > 20 ? true : nil,
|
|
110
|
+
timeline: @timeline.empty? ? nil : @timeline
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
# HTTP stats (only present if calls were made)
|
|
114
|
+
if @http_count > 0
|
|
115
|
+
result[:http_external_count] = @http_count
|
|
116
|
+
result[:http_external_total_ms] = @http_total_ms.round(1)
|
|
117
|
+
result[:http_slowest_ms] = @http_slowest_ms.round(1)
|
|
118
|
+
result[:http_slowest_host] = @http_slowest_host
|
|
119
|
+
end
|
|
120
|
+
|
|
121
|
+
# Memory stats (only present if memory_tracking is enabled)
|
|
122
|
+
if @memory_before && @memory_after
|
|
123
|
+
result[:memory_before_mb] = @memory_before
|
|
124
|
+
result[:memory_after_mb] = @memory_after
|
|
125
|
+
result[:memory_delta_mb] = (@memory_after - @memory_before).round(1)
|
|
126
|
+
end
|
|
127
|
+
|
|
128
|
+
result.compact
|
|
129
|
+
end
|
|
130
|
+
|
|
131
|
+
private
|
|
132
|
+
|
|
133
|
+
def offset_ms
|
|
134
|
+
((Process.clock_gettime(Process::CLOCK_MONOTONIC) - @request_start) * 1000).round(1)
|
|
135
|
+
end
|
|
136
|
+
|
|
137
|
+
def append_timeline(entry)
|
|
138
|
+
@timeline << entry if @timeline.size < @max_timeline
|
|
139
|
+
end
|
|
140
|
+
end
|
|
141
|
+
end
|
data/lib/opentrace/version.rb
CHANGED
data/lib/opentrace.rb
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
3
|
require "socket"
|
|
4
|
+
require "digest"
|
|
4
5
|
require_relative "opentrace/version"
|
|
5
6
|
require_relative "opentrace/config"
|
|
6
7
|
require_relative "opentrace/client"
|
|
@@ -70,6 +71,7 @@ module OpenTrace
|
|
|
70
71
|
exception.backtrace.reject { |l| l.include?("/gems/") }
|
|
71
72
|
end
|
|
72
73
|
meta[:backtrace] = cleaned.first(15)
|
|
74
|
+
meta[:error_fingerprint] = compute_error_fingerprint(exception.class.name, cleaned)
|
|
73
75
|
end
|
|
74
76
|
|
|
75
77
|
log("ERROR", exception.message.to_s, meta)
|
|
@@ -145,6 +147,16 @@ module OpenTrace
|
|
|
145
147
|
{}
|
|
146
148
|
end
|
|
147
149
|
|
|
150
|
+
def compute_error_fingerprint(exception_class, backtrace)
|
|
151
|
+
origin = if backtrace.is_a?(Array)
|
|
152
|
+
backtrace.find { |l| l.include?("app/") || l.include?("lib/") } || backtrace.first
|
|
153
|
+
end
|
|
154
|
+
normalized_origin = origin&.gsub(/:\d+:/, ":") || "unknown"
|
|
155
|
+
Digest::MD5.hexdigest("#{exception_class}||#{normalized_origin}")[0, 12]
|
|
156
|
+
rescue StandardError
|
|
157
|
+
nil
|
|
158
|
+
end
|
|
159
|
+
|
|
148
160
|
def resolve_context
|
|
149
161
|
ctx = case config.context
|
|
150
162
|
when Proc then config.context.call
|
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: opentrace
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.
|
|
4
|
+
version: 0.6.0
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- OpenTrace
|
|
@@ -36,10 +36,14 @@ files:
|
|
|
36
36
|
- lib/opentrace.rb
|
|
37
37
|
- lib/opentrace/client.rb
|
|
38
38
|
- lib/opentrace/config.rb
|
|
39
|
+
- lib/opentrace/http_tracker.rb
|
|
39
40
|
- lib/opentrace/log_forwarder.rb
|
|
40
41
|
- lib/opentrace/logger.rb
|
|
41
42
|
- lib/opentrace/middleware.rb
|
|
43
|
+
- lib/opentrace/pool_monitor.rb
|
|
44
|
+
- lib/opentrace/queue_monitor.rb
|
|
42
45
|
- lib/opentrace/rails.rb
|
|
46
|
+
- lib/opentrace/request_collector.rb
|
|
43
47
|
- lib/opentrace/version.rb
|
|
44
48
|
homepage: https://github.com/adham90/opentrace-ruby
|
|
45
49
|
licenses:
|