opentrace 0.3.0 → 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 415cb9523a3c54703221a0604c9a301eb47b9f71c99737fcc194ced1d28ea178
4
- data.tar.gz: cb6d7f5a295c27bb68ff51a79b196297b18fe3ef568ac9e788c3d010828d2092
3
+ metadata.gz: fbe3de5afdb5f92afcef49d368b8f8b3714ca17fe453b168066f53a5b0e5e1ba
4
+ data.tar.gz: 8f749e943951c939e7daa8f93a454bb0c89646bc295c0550c680064ebf38db16
5
5
  SHA512:
6
- metadata.gz: 5e1a1a6f4d72827d11d4d879c867afd98e96e8fead065232a1e64fd32c74c309c3db52fde64ab60f24b0a40884cac78177e5b368a998cf14b026c0ca85388277
7
- data.tar.gz: 3d3c85e68ac60f2d25824a2ec687fec9f7e31f40ba25c6fb3ca3fa031c9f2abff677ffbde978a93de430538cb7adeea9e1f907ba4f8481ecf32270121dea5196
6
+ metadata.gz: e3ecb7c4b649951f64e1090bb7ea06da69cb603dcd44534c9a5e71d76eb697239fddf2267cd2a5e0cff2aa1bf86dc78be6733be754df8e5c6a435967d90809a8
7
+ data.tar.gz: 3625d9de1cd2dda011f69283b34a17964450cda3c44c0f19431fb450e2a24328f1c260a12ea9929331a54866ac90afd07addd65e16cb2eee9e42a939807f3a69
data/README.md CHANGED
@@ -19,17 +19,28 @@ A thin, safe Ruby client that forwards structured application logs to an [OpenTr
19
19
  - **Works with any server** -- Puma (threads), Unicorn (forks), Passenger, and Falcon (fibers)
20
20
  - **Fork safe** -- detects forked worker processes and re-initializes cleanly
21
21
  - **Fiber safe** -- uses `Fiber[]` storage for correct request isolation in fiber-based servers
22
- - **Rails integration** -- auto-instruments controllers, SQL queries, and ActiveJob via Railtie
22
+ - **Rails integration** -- auto-instruments controllers, SQL queries, ActiveJob, views, cache, and more
23
23
  - **Rack middleware** -- propagates `request_id` via fiber-local storage
24
24
  - **Logger wrapper** -- drop-in replacement that forwards to OpenTrace while keeping your original logger
25
25
  - **Rails 7.1+ BroadcastLogger** -- native support via `broadcast_to`
26
26
  - **TaggedLogging** -- preserves `ActiveSupport::TaggedLogging` tags in metadata
27
27
  - **Context support** -- attach global metadata to every log via Hash or Proc
28
- - **Level filtering** -- `min_level` config to control which severities are forwarded
28
+ - **Business events** -- `OpenTrace.event` sends typed events (e.g. `payment.completed`) that bypass level filtering
29
+ - **Level filtering** -- `min_level` threshold or `allowed_levels` list to control which severities are forwarded
29
30
  - **Auto-enrichment** -- every log includes `hostname`, `pid`, and `git_sha` automatically
30
- - **Exception helper** -- `OpenTrace.error` captures class, message, and cleaned backtrace
31
+ - **Exception helper** -- `OpenTrace.error` captures class, message, cleaned backtrace, and error fingerprint
31
32
  - **Runtime controls** -- enable/disable logging at runtime without restarting
32
33
  - **Graceful shutdown** -- pending logs are flushed automatically on process exit
34
+ - **N+1 query detection** -- warns when a request exceeds 20 SQL queries
35
+ - **Per-request summary** -- one rich log per request with SQL, view, cache breakdown and timeline
36
+ - **Error fingerprinting** -- stable fingerprint for grouping identical errors across requests
37
+ - **Deprecation tracking** -- captures Rails deprecation warnings with callsite
38
+ - **DB pool monitoring** -- background thread reports connection pool saturation (opt-in)
39
+ - **Job queue depth** -- monitors Sidekiq, GoodJob, or SolidQueue queue sizes (opt-in)
40
+ - **Memory delta tracking** -- snapshots process RSS before/after each request (opt-in)
41
+ - **External HTTP tracking** -- captures outbound Net::HTTP calls with timing (opt-in)
42
+ - **Version negotiation** -- startup compatibility check with capability-based feature detection
43
+ - **Distributed tracing** -- W3C Trace Context (`traceparent`) propagation across services with span IDs
33
44
 
34
45
  ## Installation
35
46
 
@@ -79,6 +90,7 @@ OpenTrace.configure do |c|
79
90
  c.timeout = 1.0 # HTTP timeout in seconds (default: 1.0)
80
91
  c.enabled = true # default: true
81
92
  c.min_level = :info # minimum level to forward (default: :debug)
93
+ c.allowed_levels = [:warn, :error] # explicit level list (overrides min_level, default: nil)
82
94
  c.batch_size = 50 # logs per batch (default: 50)
83
95
  c.flush_interval = 5.0 # seconds between flushes (default: 5.0)
84
96
 
@@ -95,6 +107,24 @@ OpenTrace.configure do |c|
95
107
  # SQL logging (Rails only)
96
108
  c.sql_logging = true # default: true
97
109
  c.sql_duration_threshold_ms = 100.0 # only log queries slower than this (default: 0.0 = all)
110
+
111
+ # Path filtering
112
+ c.ignore_paths = ["/health", %r{\A/assets/}] # skip noisy paths (default: [])
113
+
114
+ # Per-request summary (Rails only)
115
+ c.request_summary = true # accumulate events into one rich log (default: true)
116
+ c.timeline = true # include event timeline in summary (default: true)
117
+ c.timeline_max_events = 200 # cap timeline entries (default: 200)
118
+
119
+ # Background monitors (opt-in)
120
+ c.pool_monitoring = false # DB connection pool stats (default: false)
121
+ c.pool_monitoring_interval = 30 # seconds between checks (default: 30)
122
+ c.queue_monitoring = false # job queue depth monitoring (default: false)
123
+ c.queue_monitoring_interval = 60 # seconds between checks (default: 60)
124
+
125
+ # Advanced opt-in features
126
+ c.memory_tracking = false # RSS delta per request (default: false)
127
+ c.http_tracking = false # external HTTP call tracking (default: false)
98
128
  end
99
129
  ```
100
130
 
@@ -102,15 +132,21 @@ If any required field (`endpoint`, `api_key`, `service`) is missing or empty, th
102
132
 
103
133
  ### Level Filtering
104
134
 
105
- Control which log levels are forwarded with `min_level`:
135
+ Control which log levels are forwarded with `min_level` (threshold) or `allowed_levels` (explicit list):
106
136
 
107
137
  ```ruby
108
138
  OpenTrace.configure do |c|
109
139
  # ...
140
+ # Option A: Threshold — forward this level and above
110
141
  c.min_level = :warn # only forward WARN, ERROR, and FATAL
142
+
143
+ # Option B: Explicit list — forward only these levels (overrides min_level)
144
+ c.allowed_levels = [:warn, :error] # only forward WARN and ERROR
111
145
  end
112
146
  ```
113
147
 
148
+ When `allowed_levels` is set, it takes precedence over `min_level`. When `allowed_levels` is `nil` (the default), `min_level` is used.
149
+
114
150
  Available levels: `:debug`, `:info`, `:warn`, `:error`, `:fatal`
115
151
 
116
152
  ## Usage
@@ -134,7 +170,7 @@ Pass `trace_id` inside metadata and it will be promoted to a top-level field aut
134
170
 
135
171
  ### Exception Logging
136
172
 
137
- Use `OpenTrace.error` to log exceptions with automatic class, message, and backtrace extraction:
173
+ Use `OpenTrace.error` to log exceptions with automatic class, message, backtrace, and fingerprint extraction:
138
174
 
139
175
  ```ruby
140
176
  begin
@@ -148,6 +184,19 @@ This captures:
148
184
  - `exception_class` -- the exception class name
149
185
  - `exception_message` -- truncated to 500 characters
150
186
  - `backtrace` -- cleaned (Rails backtrace cleaner or gem-filtered), limited to 15 frames
187
+ - `error_fingerprint` -- 12-char hash for grouping identical errors (stable across line number changes)
188
+
189
+ ### Business Events
190
+
191
+ Use `OpenTrace.event` to send typed business events. Events always send at `INFO` level and **bypass level filtering** — they are never suppressed by `min_level` or `allowed_levels`:
192
+
193
+ ```ruby
194
+ OpenTrace.event("payment.completed", "User paid $49.99", { user_id: 42, amount: 49.99 })
195
+ OpenTrace.event("auth.login", "Google OAuth login", { provider: "google", user_id: 7 })
196
+ OpenTrace.event("order.shipped", "Order dispatched", { order_id: "ORD-123" })
197
+ ```
198
+
199
+ Events include an `event_type` field in the payload, making them filterable on the server. They inherit context, `request_id`, and static context just like normal logs.
151
200
 
152
201
  ### Logger Wrapper
153
202
 
@@ -220,6 +269,63 @@ Request IDs are stored using `Fiber[]` (fiber-local storage), which works correc
220
269
 
221
270
  All your existing `Rails.logger.info(...)` calls automatically get forwarded to OpenTrace.
222
271
 
272
+ ### Per-Request Summary
273
+
274
+ When `request_summary` is enabled (the default), the gem accumulates all events during a request -- SQL queries, view renders, cache operations, HTTP calls -- into a single rich log entry emitted at request end. This avoids flooding the queue with hundreds of individual events.
275
+
276
+ Example payload:
277
+
278
+ ```json
279
+ {
280
+ "level": "INFO",
281
+ "message": "GET /dashboard 200 2847ms",
282
+ "metadata": {
283
+ "request_id": "req-abc123",
284
+ "controller": "DashboardController",
285
+ "action": "index",
286
+ "method": "GET",
287
+ "path": "/dashboard",
288
+ "status": 200,
289
+ "duration_ms": 2847.3,
290
+
291
+ "request_user_agent": "Mozilla/5.0...",
292
+ "request_accept": "text/html",
293
+
294
+ "sql_query_count": 34,
295
+ "sql_total_ms": 423.1,
296
+ "sql_slowest_ms": 312.0,
297
+ "sql_slowest_name": "Order Count",
298
+ "n_plus_one_warning": true,
299
+
300
+ "view_render_count": 48,
301
+ "view_total_ms": 890.2,
302
+ "view_slowest_ms": 245.0,
303
+ "view_slowest_template": "dashboard/_activity_feed.html.erb",
304
+
305
+ "cache_reads": 8,
306
+ "cache_hits": 5,
307
+ "cache_writes": 3,
308
+ "cache_hit_ratio": 0.63,
309
+
310
+ "time_breakdown": {
311
+ "sql_pct": 14.9,
312
+ "view_pct": 31.3,
313
+ "http_pct": 0.0,
314
+ "other_pct": 53.8
315
+ },
316
+
317
+ "timeline": [
318
+ { "t": "sql", "n": "User Load", "ms": 1.2, "at": 0.0 },
319
+ { "t": "cache", "a": "read", "hit": true, "ms": 0.1, "at": 6.0 },
320
+ { "t": "sql", "n": "Order Count", "ms": 312.0, "at": 10.0 },
321
+ { "t": "view", "n": "dashboard/index.html.erb", "ms": 890.2, "at": 350.0 }
322
+ ]
323
+ }
324
+ }
325
+ ```
326
+
327
+ The timeline shows a waterfall of events in chronological order. Timeline keys are kept short to minimize payload size: `t` = type, `n` = name, `ms` = duration, `at` = offset from request start, `s` = status, `a` = action.
328
+
223
329
  ### Controller Subscriber
224
330
 
225
331
  Subscribes to `process_action.action_controller` and captures:
@@ -238,12 +344,26 @@ Subscribes to `process_action.action_controller` and captures:
238
344
  | `exception_class` | Exception class (if raised) |
239
345
  | `exception_message` | Exception message (if raised) |
240
346
  | `backtrace` | Cleaned backtrace (if exception raised) |
347
+ | `error_fingerprint` | 12-char fingerprint for error grouping |
348
+ | `request_content_type` | Request Content-Type header |
349
+ | `request_accept` | Request Accept header |
350
+ | `request_user_agent` | Request User-Agent (truncated to 200 chars) |
351
+ | `request_referer` | Request Referer header |
352
+ | `sql_query_count` | Total SQL queries in this request |
353
+ | `sql_total_ms` | Total SQL time in this request |
354
+ | `n_plus_one_warning` | `true` when query count exceeds 20 |
355
+
356
+ When request summary is enabled, the log also includes view render stats, cache stats, time breakdown, and timeline (see above).
241
357
 
242
358
  Log levels are set automatically:
243
359
  - **ERROR** -- exceptions or 5xx status
244
360
  - **WARN** -- 4xx status
245
361
  - **INFO** -- everything else
246
362
 
363
+ ### N+1 Query Detection
364
+
365
+ Every request tracks the number of SQL queries via a Fiber-local counter. When a request exceeds 20 queries, the log entry includes `n_plus_one_warning: true`. This makes it easy to query OpenTrace for requests with potential N+1 issues.
366
+
247
367
  ### SQL Query Subscriber
248
368
 
249
369
  Subscribes to `sql.active_record` and logs every query with:
@@ -280,12 +400,54 @@ Subscribes to `perform.active_job` and logs every job execution with:
280
400
  | `executions` | Attempt number |
281
401
  | `duration_ms` | Execution duration |
282
402
  | `job_arguments` | Serialized arguments (truncated to 512 bytes) |
403
+ | `queue_latency_ms` | Time spent waiting in queue before execution |
404
+ | `enqueued_at` | When the job was enqueued |
283
405
  | `exception_class` | Exception class (if failed) |
284
406
  | `exception_message` | Exception message (if failed) |
285
407
  | `backtrace` | Cleaned backtrace (if failed) |
408
+ | `error_fingerprint` | Fingerprint for error grouping (if failed) |
286
409
 
287
410
  Failed jobs are logged as `ERROR`, successful jobs as `INFO`.
288
411
 
412
+ ### Deprecation Warning Subscriber
413
+
414
+ Subscribes to `deprecation.rails` and logs all Rails deprecation warnings as `WARN`:
415
+
416
+ | Field | Description |
417
+ |---|---|
418
+ | `deprecation_message` | The deprecation message (truncated to 500 chars) |
419
+ | `deprecation_callsite` | File and line where the deprecated API was called |
420
+ | `request_id` | Current request ID (if in web context) |
421
+
422
+ ### View Render Tracking
423
+
424
+ When request summary is enabled, subscribes to `render_template.action_view` and `render_partial.action_view`. View render events are accumulated in the RequestCollector and included in the per-request summary -- **no individual log entries are emitted** for views.
425
+
426
+ The summary includes:
427
+ - `view_render_count` -- total number of templates/partials rendered
428
+ - `view_total_ms` -- total rendering time
429
+ - `view_slowest_ms` / `view_slowest_template` -- the bottleneck template
430
+
431
+ Template paths are automatically shortened (e.g., `/Users/deploy/app/views/orders/show.html.erb` becomes `orders/show.html.erb`).
432
+
433
+ ### Cache Operation Tracking
434
+
435
+ When request summary is enabled, subscribes to `cache_read.active_support`, `cache_write.active_support`, and `cache_delete.active_support`. Like views, cache events are accumulated -- no individual logs.
436
+
437
+ The summary includes:
438
+ - `cache_reads` / `cache_hits` / `cache_writes`
439
+ - `cache_hit_ratio` -- hit rate (0.0 to 1.0)
440
+
441
+ ### Error Fingerprinting
442
+
443
+ Every error (in controller requests, job failures, and `OpenTrace.error` calls) includes an `error_fingerprint` -- a 12-character hash derived from the exception class and the first application frame in the backtrace. The fingerprint is:
444
+
445
+ - **Stable across deploys** -- line number changes don't affect it
446
+ - **Same error, same fingerprint** -- different error messages at the same location produce the same fingerprint
447
+ - **Different error, different fingerprint** -- different exception classes or different code locations produce different fingerprints
448
+
449
+ Use it to group and count errors in OpenTrace.
450
+
289
451
  ### TaggedLogging
290
452
 
291
453
  If your wrapped logger uses `ActiveSupport::TaggedLogging`, tags are preserved and injected into the metadata:
@@ -297,6 +459,90 @@ Rails.logger.tagged("RequestID-123", "UserID-42") do
297
459
  end
298
460
  ```
299
461
 
462
+ ## Background Monitors
463
+
464
+ ### DB Connection Pool Monitoring
465
+
466
+ Opt-in background thread that periodically reports ActiveRecord connection pool stats:
467
+
468
+ ```ruby
469
+ OpenTrace.configure do |c|
470
+ # ...
471
+ c.pool_monitoring = true
472
+ c.pool_monitoring_interval = 30 # seconds (default: 30)
473
+ end
474
+ ```
475
+
476
+ Reports `pool_size`, `connections_busy`, `connections_idle`, `threads_waiting`, and `checkout_timeout`. Logs at `WARN` when threads are waiting for a connection, `DEBUG` otherwise.
477
+
478
+ ### Job Queue Depth Monitoring
479
+
480
+ Opt-in background thread that reports job queue sizes. Supports Sidekiq, GoodJob, and SolidQueue (auto-detected):
481
+
482
+ ```ruby
483
+ OpenTrace.configure do |c|
484
+ # ...
485
+ c.queue_monitoring = true
486
+ c.queue_monitoring_interval = 60 # seconds (default: 60)
487
+ end
488
+ ```
489
+
490
+ Reports per-queue sizes and total enqueued count. Logs at `WARN` when total exceeds 1,000.
491
+
492
+ ## Advanced Opt-In Features
493
+
494
+ These features have measurable overhead or implementation risks. **Disabled by default.** Enable them after testing in staging.
495
+
496
+ ### Memory Delta Tracking
497
+
498
+ Snapshots process memory (RSS) before and after each request:
499
+
500
+ ```ruby
501
+ OpenTrace.configure do |c|
502
+ # ...
503
+ c.memory_tracking = true
504
+ end
505
+ ```
506
+
507
+ Adds to the request summary:
508
+ - `memory_before_mb` -- RSS before request
509
+ - `memory_after_mb` -- RSS after request
510
+ - `memory_delta_mb` -- difference (positive = memory grew)
511
+
512
+ Uses `/proc/self/statm` on Linux (~10us) or `GC.stat` approximation on macOS (~5us). The delta is process-level, so concurrent requests will affect accuracy. Most accurate on single-threaded servers (Unicorn).
513
+
514
+ ### External HTTP Tracking
515
+
516
+ Instruments outbound `Net::HTTP` calls to capture third-party API performance:
517
+
518
+ ```ruby
519
+ OpenTrace.configure do |c|
520
+ # ...
521
+ c.http_tracking = true
522
+ end
523
+ ```
524
+
525
+ Adds to the request summary:
526
+ - `http_external_count` -- number of outbound HTTP calls
527
+ - `http_external_total_ms` -- total time in external calls
528
+ - `http_slowest_ms` / `http_slowest_host` -- the bottleneck
529
+
530
+ Each HTTP call appears in the timeline:
531
+
532
+ ```json
533
+ { "t": "http", "n": "POST api.stripe.com", "ms": 184.0, "s": 200, "at": 55.0 }
534
+ ```
535
+
536
+ Failed calls include an error type:
537
+
538
+ ```json
539
+ { "t": "http", "n": "POST api.stripe.com", "ms": 5200.0, "s": 0, "err": "Net::ReadTimeout", "at": 55.0 }
540
+ ```
541
+
542
+ A recursion guard prevents OpenTrace's own HTTP calls to the server from being tracked. The `time_breakdown` in the request summary includes `http_pct` alongside `sql_pct` and `view_pct`.
543
+
544
+ **Note**: This works by prepending a module to `Net::HTTP`. Libraries that use `Net::HTTP` internally (Faraday, HTTParty, RestClient) are automatically captured.
545
+
300
546
  ## Runtime Controls
301
547
 
302
548
  ```ruby
@@ -343,12 +589,214 @@ Your App --log()--> [In-Memory Queue] --background thread--> POST /api/logs -->
343
589
  - `enqueue` is non-blocking -- it uses `try_lock` so it never waits on a mutex
344
590
  - The thread is started lazily on the first log call -- no threads are created at boot
345
591
  - If the queue exceeds 1,000 items, new logs are dropped (oldest are preserved)
346
- - Payloads exceeding 32 KB are intelligently truncated (backtrace, params, SQL removed first)
592
+ - Payloads exceeding 256 KB (configurable via `max_payload_bytes`) are intelligently truncated (backtrace, params, SQL removed first)
347
593
  - If still too large after truncation, the payload is split and retried in smaller batches
348
- - All network errors (timeouts, connection refused, DNS failures) are swallowed silently
594
+ - Failed requests are retried with exponential backoff (up to 3 attempts by default)
595
+ - A circuit breaker stops sending when the server is unreachable, resuming after a cooldown
596
+ - Rate-limited responses (429) trigger a backoff delay, respecting the server's `Retry-After` header
597
+ - Authentication failures (401) suspend sending and print a one-time warning to STDERR
349
598
  - The HTTP timeout defaults to 1 second
350
599
  - Pending logs are flushed on process exit via an `at_exit` hook
351
600
 
601
+ ### Retry & Circuit Breaker
602
+
603
+ Failed HTTP requests are retried with exponential backoff and jitter. Only server errors (5xx) and network failures are retried -- client errors (4xx) are not.
604
+
605
+ ```ruby
606
+ OpenTrace.configure do |c|
607
+ # ...
608
+ c.max_retries = 2 # up to 3 total attempts (default: 2)
609
+ c.retry_base_delay = 0.1 # 100ms initial backoff (default: 0.1)
610
+ c.retry_max_delay = 2.0 # cap backoff at 2 seconds (default: 2.0)
611
+ end
612
+ ```
613
+
614
+ A circuit breaker prevents wasting resources when the server is down. After a threshold of consecutive failures, the circuit **opens** and all sends are skipped. After a cooldown, a single **probe** request is sent. If it succeeds, the circuit closes and normal operation resumes.
615
+
616
+ ```ruby
617
+ OpenTrace.configure do |c|
618
+ # ...
619
+ c.circuit_breaker_threshold = 5 # failures before opening (default: 5)
620
+ c.circuit_breaker_timeout = 30 # seconds before probe (default: 30)
621
+ end
622
+ ```
623
+
624
+ ### Backpressure Handling
625
+
626
+ The client responds intelligently to HTTP status codes:
627
+
628
+ | Status | Behavior |
629
+ |---|---|
630
+ | **2xx** | Success -- circuit breaker resets |
631
+ | **429** | Rate limited -- pauses for `Retry-After` seconds (or `rate_limit_backoff`), re-enqueues the batch |
632
+ | **401** | Auth failed -- suspends sending, prints one-time STDERR warning. Resumes after `OpenTrace.configure` |
633
+ | **5xx** | Server error -- retried with backoff, counts toward circuit breaker |
634
+ | **Other 4xx** | Client error -- batch dropped silently |
635
+
636
+ ```ruby
637
+ OpenTrace.configure do |c|
638
+ # ...
639
+ c.rate_limit_backoff = 5.0 # fallback when Retry-After header is missing (default: 5.0)
640
+ end
641
+ ```
642
+
643
+ ### Delivery Observability
644
+
645
+ The client exposes internal delivery statistics so you can monitor the health of the log pipeline:
646
+
647
+ ```ruby
648
+ OpenTrace.stats
649
+ # => {
650
+ # enqueued: 15234,
651
+ # delivered: 15100,
652
+ # dropped_queue_full: 34,
653
+ # dropped_circuit_open: 100,
654
+ # dropped_auth_suspended: 0,
655
+ # dropped_error: 0,
656
+ # retries: 12,
657
+ # rate_limited: 2,
658
+ # auth_failures: 0,
659
+ # payload_splits: 1,
660
+ # batches_sent: 302,
661
+ # bytes_sent: 4812300,
662
+ # queue_size: 23,
663
+ # circuit_state: :closed,
664
+ # auth_suspended: false,
665
+ # uptime_seconds: 3600
666
+ # }
667
+
668
+ OpenTrace.healthy? # true when circuit is closed and auth is not suspended
669
+ OpenTrace.reset_stats! # reset counters (useful after reading/reporting)
670
+ ```
671
+
672
+ #### Drop Callback
673
+
674
+ Register a callback to be notified when logs are dropped. The callback receives the count of dropped items and the reason:
675
+
676
+ ```ruby
677
+ OpenTrace.configure do |c|
678
+ # ...
679
+ c.on_drop = ->(count, reason) {
680
+ StatsD.increment("opentrace.dropped", count, tags: ["reason:#{reason}"])
681
+ }
682
+ end
683
+ ```
684
+
685
+ Reasons: `:queue_full`, `:circuit_open`, `:auth_suspended`, `:error`
686
+
687
+ The callback is called synchronously but **exceptions are always swallowed** -- a broken callback will never affect the client.
688
+
689
+ ### Gzip Compression
690
+
691
+ Outgoing batches are automatically gzip-compressed when they exceed the compression threshold (default: 1KB). This typically achieves 70-85% bandwidth reduction for log payloads with repetitive keys and values.
692
+
693
+ ```ruby
694
+ OpenTrace.configure do |c|
695
+ # ...
696
+ c.compression = true # enable gzip compression (default: true)
697
+ c.compression_threshold = 1024 # only compress payloads > 1KB (default: 1024)
698
+ c.max_payload_bytes = 262_144 # max batch size before splitting (default: 256KB)
699
+ end
700
+ ```
701
+
702
+ Compression uses `Zlib::BEST_SPEED` (level 1) for minimal CPU overhead (~0.14ms per batch). The server must support `Content-Encoding: gzip` on request bodies. OpenTrace server v0.6+ includes transparent decompression middleware.
703
+
704
+ ### Version Negotiation
705
+
706
+ On the first dispatch cycle, the client makes a lightweight `GET /api/version` call to discover the server's API version and capabilities. This runs once per process (or after fork) and never blocks `enqueue`.
707
+
708
+ ```ruby
709
+ # Check server capabilities programmatically
710
+ client = OpenTrace.send(:client)
711
+ client.supports?(:request_summaries) # true if server advertises it
712
+ client.supports?(:gzip_request) # true if server supports gzip
713
+ ```
714
+
715
+ If the server requires a newer client API version, a warning is printed to STDERR:
716
+
717
+ ```
718
+ [OpenTrace] Server requires API version >= 2, but this client supports version 1.
719
+ Please upgrade the opentrace gem. Log forwarding may not work correctly.
720
+ ```
721
+
722
+ Every request includes an `X-API-Version: 1` header so the server can reject incompatible clients with a clear error. Old servers without `/api/version` are handled gracefully — the check silently skips and all features remain enabled.
723
+
724
+ ### Distributed Tracing
725
+
726
+ When `trace_propagation` is enabled (the default), the middleware extracts or generates a W3C-compatible trace context for each request:
727
+
728
+ - **Incoming**: Reads `traceparent` header (W3C standard), falls back to `X-Trace-ID`, then `X-Request-ID`
729
+ - **Outgoing**: When `http_tracking` is enabled, injects `traceparent`, `X-Trace-ID`, and `X-Request-ID` into outbound HTTP requests
730
+
731
+ This enables cross-service correlation — all logs from a distributed request chain share the same `trace_id`.
732
+
733
+ ```ruby
734
+ OpenTrace.configure do |c|
735
+ # ...
736
+ c.trace_propagation = true # extract/propagate trace context (default: true)
737
+ c.http_tracking = true # also inject into outgoing HTTP calls (opt-in)
738
+ end
739
+ ```
740
+
741
+ Each log entry includes `trace_id`, `span_id`, and `parent_span_id` (when available) as top-level fields. The server indexes these for fast trace lookups.
742
+
743
+ ### Request Summary Architecture
744
+
745
+ When `request_summary` is enabled, events within a request are **accumulated** in a Fiber-local `RequestCollector` instead of being pushed to the queue individually:
746
+
747
+ ```
748
+ Request Start
749
+ Middleware creates RequestCollector in Fiber[]
750
+ SQL events ──► collector.record_sql() (no queue push)
751
+ View events ──► collector.record_view() (no queue push)
752
+ Cache events ──► collector.record_cache() (no queue push)
753
+ HTTP events ──► collector.record_http() (no queue push)
754
+ Request End
755
+ Controller subscriber builds request_summary from collector
756
+ One queue push: metadata (user/request context) + request_summary (perf data)
757
+ Middleware cleans up RequestCollector
758
+ ```
759
+
760
+ This means a request with 30 SQL queries, 50 view renders, and 10 cache operations produces **one log entry** instead of 91.
761
+
762
+ ### Structured Request Metrics
763
+
764
+ When a `RequestCollector` is active, performance data is sent as a **separate `request_summary` field** instead of being merged into metadata. This allows the server to store it in a dedicated `request_summaries` table with indexed columns for fast analytical queries.
765
+
766
+ ```ruby
767
+ # Sent automatically by the Rails subscriber — no code changes needed.
768
+ # The payload looks like:
769
+ {
770
+ "metadata": { "request_id": "req-abc", "user_id": 42 },
771
+ "request_summary": {
772
+ "controller": "InvoicesController",
773
+ "action": "index",
774
+ "method": "GET",
775
+ "path": "/invoices",
776
+ "status": 200,
777
+ "duration_ms": 45.2,
778
+ "sql_count": 3,
779
+ "sql_total_ms": 12.1,
780
+ "n_plus_one": false,
781
+ "view_count": 2,
782
+ "view_total_ms": 28.3,
783
+ "cache_reads": 1,
784
+ "cache_hits": 1,
785
+ "cache_hit_ratio": 1.0,
786
+ "timeline": [{"t": "sql", "n": "Invoice Load", "ms": 8.2, "at": 2.0}]
787
+ }
788
+ }
789
+ ```
790
+
791
+ You can also pass `request_summary:` manually:
792
+
793
+ ```ruby
794
+ OpenTrace.log("INFO", "Custom request", { user_id: 42 },
795
+ request_summary: { controller: "Custom", action: "run", sql_count: 5 })
796
+ ```
797
+
798
+ **Backward compatibility**: Old servers ignore the `request_summary` field. When no collector is active (background jobs, non-Rails), data falls back to metadata as before.
799
+
352
800
  ## Log Payload Format
353
801
 
354
802
  Each log is sent as a JSON object to `POST /api/logs`:
@@ -367,6 +815,19 @@ Each log is sent as a JSON object to `POST /api/logs`:
367
815
  "hostname": "web-01",
368
816
  "pid": 12345,
369
817
  "git_sha": "a1b2c3d"
818
+ },
819
+ "request_summary": {
820
+ "controller": "InvoicesController",
821
+ "action": "index",
822
+ "method": "GET",
823
+ "path": "/invoices",
824
+ "status": 200,
825
+ "duration_ms": 45.2,
826
+ "sql_count": 3,
827
+ "sql_total_ms": 12.1,
828
+ "view_count": 2,
829
+ "view_total_ms": 28.3,
830
+ "timeline": [...]
370
831
  }
371
832
  }
372
833
  ```
@@ -379,7 +840,11 @@ Each log is sent as a JSON object to `POST /api/logs`:
379
840
  | `service` | string | no |
380
841
  | `environment` | string | no |
381
842
  | `trace_id` | string | no |
843
+ | `span_id` | string | no |
844
+ | `parent_span_id` | string | no |
845
+ | `event_type` | string | no |
382
846
  | `metadata` | object | no |
847
+ | `request_summary` | object | no |
383
848
 
384
849
  The server accepts a single JSON object or an array of objects.
385
850
 
@@ -0,0 +1,61 @@
1
+ # frozen_string_literal: true
2
+
3
+ module OpenTrace
4
+ class CircuitBreaker
5
+ CLOSED = :closed
6
+ OPEN = :open
7
+ HALF_OPEN = :half_open
8
+
9
+ attr_reader :state
10
+
11
+ def initialize(failure_threshold:, recovery_timeout:)
12
+ @failure_threshold = failure_threshold
13
+ @recovery_timeout = recovery_timeout
14
+ @state = CLOSED
15
+ @failure_count = 0
16
+ @last_failure_at = nil
17
+ @mutex = Mutex.new
18
+ end
19
+
20
+ def allow_request?
21
+ @mutex.synchronize do
22
+ case @state
23
+ when CLOSED
24
+ true
25
+ when OPEN
26
+ if Time.now - @last_failure_at >= @recovery_timeout
27
+ @state = HALF_OPEN
28
+ true
29
+ else
30
+ false
31
+ end
32
+ when HALF_OPEN
33
+ false
34
+ end
35
+ end
36
+ end
37
+
38
+ def record_success
39
+ @mutex.synchronize do
40
+ @failure_count = 0
41
+ @state = CLOSED
42
+ end
43
+ end
44
+
45
+ def record_failure
46
+ @mutex.synchronize do
47
+ @failure_count += 1
48
+ @last_failure_at = Time.now
49
+ @state = OPEN if @failure_count >= @failure_threshold
50
+ end
51
+ end
52
+
53
+ def reset!
54
+ @mutex.synchronize do
55
+ @state = CLOSED
56
+ @failure_count = 0
57
+ @last_failure_at = nil
58
+ end
59
+ end
60
+ end
61
+ end