tracelit 0.1.7 → 0.1.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +2 -11
- data/lib/tracelit/error_span_processor.rb +70 -3
- data/lib/tracelit/metrics.rb +98 -0
- data/lib/tracelit/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: e7f493aa08c7a1d3595be84019013d49bfa48e2daae3a653cb38b17ad702a8f4
|
|
4
|
+
data.tar.gz: 07e1c738fb2a5eee274b48f30ff6fea1b97c613b14778de081471fd1fa9ed1d0
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 4a9c56ef732bc424fbc7a04484c5967bba3d9714d1f520f2b017b07c9fd939b8126b6fc7ff575fe538911be951a0a0da8025dde7e50724c42bdab005703219a8
|
|
7
|
+
data.tar.gz: 728eefa18600408fcb6475ec87fe83669a0f373500bcbe2a010e2f0f0776c8bd8a5120c4f16ffcca886125be307a58ab369e38161345ed1df567464ee7777ec8
|
data/README.md
CHANGED
|
@@ -6,12 +6,6 @@ Official Ruby SDK for [Tracelit](https://tracelit.io) — drop-in OpenTelemetry
|
|
|
6
6
|
|
|
7
7
|
---
|
|
8
8
|
|
|
9
|
-
## Set up with AI
|
|
10
|
-
|
|
11
|
-
Want an AI assistant (Cursor, Claude, ChatGPT, etc.) to integrate Tracelit into your app automatically? Copy the contents of [`llm_prompt.txt`](./llm_prompt.txt) and paste it as your prompt. It covers gem installation, initializer setup, manual spans, custom metrics, and test guard — everything the AI needs in one shot.
|
|
12
|
-
|
|
13
|
-
---
|
|
14
|
-
|
|
15
9
|
## Installation
|
|
16
10
|
|
|
17
11
|
Add to your `Gemfile` and run `bundle install`:
|
|
@@ -273,9 +267,6 @@ TRACELIT_ENABLED=false
|
|
|
273
267
|
|
|
274
268
|
---
|
|
275
269
|
|
|
276
|
-
##
|
|
270
|
+
## Changelog
|
|
277
271
|
|
|
278
|
-
|
|
279
|
-
bundle install
|
|
280
|
-
bundle exec rspec
|
|
281
|
-
```
|
|
272
|
+
See the [release history](https://docs.tracelit.io/changelog) on the Tracelit docs.
|
|
@@ -13,12 +13,27 @@ module Tracelit
|
|
|
13
13
|
# so there is no double-export for sampled error spans
|
|
14
14
|
#
|
|
15
15
|
# NOTE: opentelemetry-sdk 1.x uses on_finish (not on_end) as the hook name.
|
|
16
|
+
#
|
|
17
|
+
# Important: this processor must never block application threads. Exporting an
|
|
18
|
+
# unsampled error span synchronously in on_finish can block request / console
|
|
19
|
+
# threads when the ingest endpoint is slow or retrying. We enqueue span data
|
|
20
|
+
# into a bounded in-memory queue and export on a background worker thread.
|
|
16
21
|
class ErrorSpanProcessor
|
|
22
|
+
QUEUE_CAPACITY = 512
|
|
23
|
+
SHUTDOWN_SENTINEL = Object.new
|
|
24
|
+
|
|
17
25
|
def initialize(exporter)
|
|
18
26
|
@exporter = exporter
|
|
27
|
+
@queue = SizedQueue.new(QUEUE_CAPACITY)
|
|
28
|
+
@shutdown = false
|
|
29
|
+
@worker = Thread.new do
|
|
30
|
+
Thread.current[:tracelit_error_export_worker] = true
|
|
31
|
+
worker_loop
|
|
32
|
+
end
|
|
33
|
+
@worker.abort_on_exception = false
|
|
19
34
|
end
|
|
20
35
|
|
|
21
|
-
def on_start(
|
|
36
|
+
def on_start(_span, _parent_context)
|
|
22
37
|
# nothing to do at start
|
|
23
38
|
end
|
|
24
39
|
|
|
@@ -30,19 +45,71 @@ module Tracelit
|
|
|
30
45
|
# This prevents double-export of error spans on traces that were sampled.
|
|
31
46
|
return if span.context.trace_flags.sampled?
|
|
32
47
|
|
|
33
|
-
#
|
|
34
|
-
|
|
48
|
+
# Queue for background export; never block the caller.
|
|
49
|
+
enqueue(span.to_span_data)
|
|
35
50
|
rescue StandardError
|
|
36
51
|
# Never let processor errors propagate to the application
|
|
37
52
|
end
|
|
38
53
|
|
|
39
54
|
def force_flush(timeout: nil)
|
|
55
|
+
wait_for_queue_drain(timeout)
|
|
40
56
|
@exporter.force_flush(timeout: timeout)
|
|
41
57
|
end
|
|
42
58
|
|
|
43
59
|
def shutdown(timeout: nil)
|
|
60
|
+
return if @shutdown
|
|
61
|
+
@shutdown = true
|
|
62
|
+
enqueue_shutdown_signal
|
|
63
|
+
@worker&.join(timeout || 1)
|
|
44
64
|
# Do not shut down the shared exporter here —
|
|
45
65
|
# the BatchSpanProcessor owns its lifecycle
|
|
46
66
|
end
|
|
67
|
+
|
|
68
|
+
private
|
|
69
|
+
|
|
70
|
+
def worker_loop
|
|
71
|
+
loop do
|
|
72
|
+
item = @queue.pop
|
|
73
|
+
break if item.equal?(SHUTDOWN_SENTINEL)
|
|
74
|
+
|
|
75
|
+
begin
|
|
76
|
+
@exporter.export([item])
|
|
77
|
+
rescue StandardError
|
|
78
|
+
# Never let exporter failures crash the app worker thread
|
|
79
|
+
end
|
|
80
|
+
end
|
|
81
|
+
rescue StandardError
|
|
82
|
+
# Last-ditch guard: processor background failures must stay isolated.
|
|
83
|
+
end
|
|
84
|
+
|
|
85
|
+
def enqueue(span_data)
|
|
86
|
+
@queue.push(span_data, true)
|
|
87
|
+
rescue ThreadError
|
|
88
|
+
# Queue full — drop to protect application latency.
|
|
89
|
+
end
|
|
90
|
+
|
|
91
|
+
def enqueue_shutdown_signal
|
|
92
|
+
@queue.push(SHUTDOWN_SENTINEL, true)
|
|
93
|
+
rescue ThreadError
|
|
94
|
+
# Queue is full: drop one oldest item and retry so shutdown can proceed.
|
|
95
|
+
begin
|
|
96
|
+
@queue.pop(true)
|
|
97
|
+
rescue ThreadError
|
|
98
|
+
# no-op
|
|
99
|
+
end
|
|
100
|
+
begin
|
|
101
|
+
@queue.push(SHUTDOWN_SENTINEL, true)
|
|
102
|
+
rescue ThreadError
|
|
103
|
+
# no-op — join timeout is a final guard
|
|
104
|
+
end
|
|
105
|
+
end
|
|
106
|
+
|
|
107
|
+
def wait_for_queue_drain(timeout)
|
|
108
|
+
deadline = timeout ? Time.now + timeout : nil
|
|
109
|
+
until @queue.empty?
|
|
110
|
+
break if deadline && Time.now >= deadline
|
|
111
|
+
sleep(0.01)
|
|
112
|
+
end
|
|
113
|
+
end
|
|
47
114
|
end
|
|
48
115
|
end
|
data/lib/tracelit/metrics.rb
CHANGED
|
@@ -48,6 +48,7 @@ module Tracelit
|
|
|
48
48
|
install_sidekiq_middleware if defined?(::Sidekiq)
|
|
49
49
|
install_connection_pool_poller if defined?(::ActiveRecord)
|
|
50
50
|
install_memory_poller
|
|
51
|
+
install_cpu_poller
|
|
51
52
|
rescue StandardError => e
|
|
52
53
|
OpenTelemetry.logger.warn("[Tracelit] failed to set up metrics: #{e.message}")
|
|
53
54
|
end
|
|
@@ -62,8 +63,10 @@ module Tracelit
|
|
|
62
63
|
def self.restart_pollers(config)
|
|
63
64
|
@connection_pool_poller_installed = false
|
|
64
65
|
@memory_poller_installed = false
|
|
66
|
+
@cpu_poller_installed = false
|
|
65
67
|
install_connection_pool_poller if defined?(::ActiveRecord)
|
|
66
68
|
install_memory_poller
|
|
69
|
+
install_cpu_poller
|
|
67
70
|
rescue StandardError => e
|
|
68
71
|
OpenTelemetry.logger.warn("[Tracelit] failed to restart pollers after fork: #{e.message}")
|
|
69
72
|
end
|
|
@@ -349,5 +352,100 @@ module Tracelit
|
|
|
349
352
|
rescue StandardError => e
|
|
350
353
|
OpenTelemetry.logger.warn("[Tracelit] failed to install memory poller: #{e.message}")
|
|
351
354
|
end
|
|
355
|
+
|
|
356
|
+
# Polls process CPU utilisation every 30 seconds on a daemon thread.
|
|
357
|
+
# Computes a percentage by tracking the delta in CPU time (user + system)
|
|
358
|
+
# against wall-clock elapsed time — same approach as the Go and Node SDKs.
|
|
359
|
+
#
|
|
360
|
+
# On Linux: reads /proc/self/stat (utime + stime in jiffies at 100 Hz).
|
|
361
|
+
# On macOS: reads `ps -o %cpu= -p <pid>` as a direct percentage.
|
|
362
|
+
#
|
|
363
|
+
# Emits: process.runtime.cpu.usage (%)
|
|
364
|
+
# Attributes: process.pid, process.runtime
|
|
365
|
+
def self.install_cpu_poller
|
|
366
|
+
return if @cpu_poller_installed
|
|
367
|
+
@cpu_poller_installed = true
|
|
368
|
+
|
|
369
|
+
cpu_gauge = @meter.create_gauge(
|
|
370
|
+
"process.runtime.cpu.usage",
|
|
371
|
+
description: "Process CPU utilisation percentage",
|
|
372
|
+
unit: "%"
|
|
373
|
+
)
|
|
374
|
+
|
|
375
|
+
pid = Process.pid
|
|
376
|
+
linux = File.exist?("/proc/self/stat")
|
|
377
|
+
interval = 30 # seconds
|
|
378
|
+
|
|
379
|
+
thread = Thread.new do
|
|
380
|
+
Thread.current[:tracelit_cpu_poller] = true
|
|
381
|
+
|
|
382
|
+
last_cpu_time = read_cpu_time_s(pid, linux)
|
|
383
|
+
last_wall_time = Process.clock_gettime(Process::CLOCK_MONOTONIC)
|
|
384
|
+
|
|
385
|
+
loop do
|
|
386
|
+
sleep interval
|
|
387
|
+
begin
|
|
388
|
+
now = Process.clock_gettime(Process::CLOCK_MONOTONIC)
|
|
389
|
+
elapsed = now - last_wall_time
|
|
390
|
+
cpu_time = read_cpu_time_s(pid, linux)
|
|
391
|
+
|
|
392
|
+
next if elapsed <= 0 || cpu_time.nil? || last_cpu_time.nil?
|
|
393
|
+
|
|
394
|
+
delta = cpu_time - last_cpu_time
|
|
395
|
+
last_cpu_time = cpu_time
|
|
396
|
+
last_wall_time = now
|
|
397
|
+
|
|
398
|
+
next if delta < 0
|
|
399
|
+
|
|
400
|
+
pct = [[delta / elapsed * 100.0, 100.0].min, 0.0].max
|
|
401
|
+
|
|
402
|
+
cpu_gauge.record(pct, attributes: {
|
|
403
|
+
"process.pid" => pid.to_s,
|
|
404
|
+
"process.runtime" => "ruby",
|
|
405
|
+
})
|
|
406
|
+
rescue StandardError
|
|
407
|
+
# Retry next cycle — never crash on a metric poll failure
|
|
408
|
+
end
|
|
409
|
+
end
|
|
410
|
+
end
|
|
411
|
+
thread.abort_on_exception = false
|
|
412
|
+
thread
|
|
413
|
+
rescue StandardError => e
|
|
414
|
+
OpenTelemetry.logger.warn("[Tracelit] failed to install CPU poller: #{e.message}")
|
|
415
|
+
end
|
|
416
|
+
|
|
417
|
+
# Returns cumulative CPU time (user + system) for this process in seconds.
|
|
418
|
+
# On Linux reads /proc/self/stat; on macOS/BSD falls back to ps %cpu
|
|
419
|
+
# which gives an instantaneous percentage instead (treated as fractional
|
|
420
|
+
# seconds over a 1-second window — good enough for a 30 s gauge).
|
|
421
|
+
def self.read_cpu_time_s(pid, linux)
|
|
422
|
+
if linux
|
|
423
|
+
stat = begin
|
|
424
|
+
File.read("/proc/self/stat")
|
|
425
|
+
rescue
|
|
426
|
+
return nil
|
|
427
|
+
end
|
|
428
|
+
# Format: pid (comm) state ppid ... utime stime ...
|
|
429
|
+
# comm can contain spaces — find last ')' and split from there.
|
|
430
|
+
after_comm = stat[stat.rindex(")").to_i + 1..]
|
|
431
|
+
return nil unless after_comm
|
|
432
|
+
|
|
433
|
+
fields = after_comm.split
|
|
434
|
+
# After ')': state(0) ppid(1) ... utime(11) stime(12)
|
|
435
|
+
utime = fields[11]&.to_i
|
|
436
|
+
stime = fields[12]&.to_i
|
|
437
|
+
return nil unless utime && stime
|
|
438
|
+
|
|
439
|
+
# Jiffies at 100 Hz → seconds
|
|
440
|
+
(utime + stime) / 100.0
|
|
441
|
+
else
|
|
442
|
+
# macOS/BSD: `ps` gives current CPU % directly.
|
|
443
|
+
# Return it as a fractional "seconds per second" proxy so the
|
|
444
|
+
# delta calculation above yields the right percentage.
|
|
445
|
+
out = `ps -o %cpu= -p #{Integer(pid)} 2>/dev/null`.strip
|
|
446
|
+
return nil if out.empty?
|
|
447
|
+
out.to_f / 100.0
|
|
448
|
+
end
|
|
449
|
+
end
|
|
352
450
|
end
|
|
353
451
|
end
|
data/lib/tracelit/version.rb
CHANGED
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: tracelit
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.1.
|
|
4
|
+
version: 0.1.9
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Tracelit
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: bin
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date: 2026-05-
|
|
11
|
+
date: 2026-05-17 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
name: opentelemetry-sdk
|