vivarium 0.5.2 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,277 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "json"
4
+ require "net/http"
5
+ require "uri"
6
+
7
+ module Vivarium
8
+ # Background OTLP/HTTP(JSON) sender. Completed spans are enqueued and flushed
9
+ # in batches by a worker thread to {endpoint}/v1/traces. Send failures are
10
+ # logged and the batch is dropped so a resident observation never stalls.
11
+ class OtelHttpExporter
12
+ def initialize(endpoint:, flush_interval: 2.0, max_batch: 256, max_queue: 10_000)
13
+ @uri = build_uri(endpoint)
14
+ @flush_interval = flush_interval
15
+ @max_batch = max_batch
16
+ @max_queue = max_queue
17
+
18
+ @queue = []
19
+ @mutex = Mutex.new
20
+ @cond = ConditionVariable.new
21
+ @stop = false
22
+ @dropped = 0
23
+ @thread = nil
24
+ end
25
+
26
+ def start
27
+ return self if @thread
28
+
29
+ @thread = Thread.new { worker }
30
+ self
31
+ end
32
+
33
+ def enqueue(span)
34
+ @mutex.synchronize do
35
+ if @queue.size >= @max_queue
36
+ @dropped += 1
37
+ else
38
+ @queue << span
39
+ @cond.signal
40
+ end
41
+ end
42
+ end
43
+
44
+ def shutdown
45
+ @mutex.synchronize do
46
+ @stop = true
47
+ @cond.signal
48
+ end
49
+ @thread&.join
50
+ warn "[vivarium] otel: dropped #{@dropped} span(s) (queue overflow)" if @dropped.positive?
51
+ end
52
+
53
+ private
54
+
55
+ def worker
56
+ until stop_and_drained?
57
+ batch = take_batch
58
+ post_batch(batch) unless batch.empty?
59
+ end
60
+ rescue StandardError => e
61
+ warn "[vivarium] otel worker error: #{e.class}: #{e.message}"
62
+ end
63
+
64
+ def stop_and_drained?
65
+ @mutex.synchronize { @stop && @queue.empty? }
66
+ end
67
+
68
+ def take_batch
69
+ @mutex.synchronize do
70
+ @cond.wait(@mutex, @flush_interval) if @queue.empty? && !@stop
71
+ @queue.shift(@max_batch)
72
+ end
73
+ end
74
+
75
+ def post_batch(spans)
76
+ body = JSON.generate(Vivarium::OtelExporter.wrap_document(spans))
77
+ req = Net::HTTP::Post.new(@uri)
78
+ req["Content-Type"] = "application/json"
79
+ req.body = body
80
+
81
+ http = Net::HTTP.new(@uri.host, @uri.port)
82
+ http.use_ssl = (@uri.scheme == "https")
83
+ http.open_timeout = 5
84
+ http.read_timeout = 10
85
+ res = http.request(req)
86
+ return if res.code.to_s.start_with?("2")
87
+
88
+ warn "[vivarium] otel: collector returned HTTP #{res.code} for #{spans.size} span(s)"
89
+ rescue StandardError => e
90
+ warn "[vivarium] otel: POST failed (#{e.class}: #{e.message}); dropped #{spans.size} span(s)"
91
+ end
92
+
93
+ def build_uri(endpoint)
94
+ base = endpoint.to_s.strip.chomp("/")
95
+ base += "/v1/traces" unless base.end_with?("/v1/traces")
96
+ URI.parse(base)
97
+ end
98
+ end
99
+
100
+ # Reconstructs method-call spans live from the event stream and enqueues each
101
+ # completed span to an OtelHttpExporter. Each observation session becomes one
102
+ # trace with a "vivarium session" root span, per-thread child spans, and method
103
+ # spans nested under the current thread/method span.
104
+ class OtelSpanStreamer
105
+ SESSION_ROOT_SALT = 0x766976617269756d
106
+ THREAD_ROOT_SALT = 0x7468726561640000
107
+ PROCESS_EXIT_EVENT_NAME = "proc_exit"
108
+
109
+ def initialize(exporter:, session_start_iso:, session_start_ktime:, observer_pid:, main_tid:)
110
+ @exporter = exporter
111
+ start_unix = Vivarium::OtelExporter.iso_to_unix_ns(session_start_iso)
112
+ start_ktime = session_start_ktime.to_i
113
+ @to_unix = ->(k) { (start_unix + (k.to_i - start_ktime)).to_s }
114
+ @session_start_ktime = start_ktime
115
+ @observer_pid = observer_pid
116
+ @main_tid = main_tid
117
+ @trace_hi, @trace_lo = Vivarium.synth_trace_id(observer_pid, main_tid, SESSION_ROOT_SALT, start_ktime)
118
+ @session_span_id = Vivarium.synth_span_id(@trace_hi, @trace_lo, observer_pid, start_ktime ^ SESSION_ROOT_SALT)
119
+ @stacks = Hash.new { |h, k| h[k] = [] }
120
+ @thread_spans = {}
121
+ @bpf_thread_span_ids = {}
122
+ end
123
+
124
+ def on_event(ev)
125
+ return if Vivarium::OtelExporter.internal_comm?(ev.comm)
126
+
127
+ case ev.event_name
128
+ when "span_start" then handle_start(ev)
129
+ when "span_stop" then handle_stop(ev)
130
+ else handle_event(ev)
131
+ end
132
+ end
133
+
134
+ # Close any still-open spans (dangling) at end of observation.
135
+ def finalize(stop_ktime:)
136
+ @stacks.each_value do |stack|
137
+ emit_method_span(stack.pop, stop_ktime) until stack.empty?
138
+ end
139
+ @thread_spans.each_value { |rec| emit_thread_span(rec, stop_ktime) unless rec[:emitted] }
140
+ emit_session_span(stop_ktime)
141
+ end
142
+
143
+ private
144
+
145
+ def handle_start(ev)
146
+ thread = touch_thread_span(ev)
147
+ stack = @stacks[ev.tid]
148
+ parent = stack.empty? ? thread[:span_id] : stack.last[:span_id]
149
+
150
+ name, file, lineno = Vivarium::OtelExporter.read_span_payload(ev.payload)
151
+ stack.push(
152
+ tid: ev.tid, pid: ev.pid, trace_hi: @trace_hi, trace_lo: @trace_lo,
153
+ span_id: Vivarium.synth_span_id(ev.trace_hi.to_i, ev.trace_lo.to_i, ev.tid, ev.ktime_ns),
154
+ parent: parent, name: (name.nil? || name.empty? ? "<anonymous>" : name),
155
+ file: file, lineno: lineno, start_k: ev.ktime_ns, events: []
156
+ )
157
+ end
158
+
159
+ def handle_stop(ev)
160
+ touch_thread_span(ev)
161
+ rec = @stacks[ev.tid].pop
162
+ emit_method_span(rec, ev.ktime_ns) if rec
163
+ end
164
+
165
+ def handle_event(ev)
166
+ thread = touch_thread_span(ev)
167
+ if ev.event_name == PROCESS_EXIT_EVENT_NAME
168
+ emit_thread_span(thread, ev.ktime_ns) unless thread[:root]
169
+ return
170
+ end
171
+
172
+ stack = @stacks[ev.tid]
173
+ if stack.empty?
174
+ thread[:events] << Vivarium::OtelExporter.build_span_event(ev, @to_unix)
175
+ else
176
+ stack.last[:events] << Vivarium::OtelExporter.build_span_event(ev, @to_unix)
177
+ end
178
+ end
179
+
180
+ def touch_thread_span(ev)
181
+ rec = (@thread_spans[ev.tid] ||= new_thread_span(ev))
182
+ remember_bpf_thread_span(ev, rec)
183
+ rec[:comm] = ev.comm.to_s unless ev.comm.to_s.empty?
184
+ rec[:min_k] = ev.ktime_ns if ev.ktime_ns < rec[:min_k]
185
+ rec[:max_k] = ev.ktime_ns if ev.ktime_ns > rec[:max_k]
186
+ rec
187
+ end
188
+
189
+ def new_thread_span(ev)
190
+ {
191
+ tid: ev.tid, pid: ev.pid, comm: ev.comm.to_s,
192
+ span_id: thread_span_id(ev),
193
+ bpf_parent_span_id: ev.parent_span_id.to_i,
194
+ parent: ev.tid == @main_tid ? @session_span_id : nil,
195
+ min_k: ev.ktime_ns, max_k: ev.ktime_ns,
196
+ root: ev.tid == @main_tid,
197
+ events: [], emitted: false
198
+ }
199
+ end
200
+
201
+ def thread_span_id(ev)
202
+ span_id = ev.span_id.to_i
203
+ return span_id unless span_id.zero?
204
+
205
+ Vivarium.synth_span_id(@trace_hi ^ THREAD_ROOT_SALT, @trace_lo, ev.tid, ev.ktime_ns)
206
+ end
207
+
208
+ def remember_bpf_thread_span(ev, rec)
209
+ bpf_span_id = ev.span_id.to_i
210
+ @bpf_thread_span_ids[bpf_span_id] = rec[:span_id] unless bpf_span_id.zero?
211
+
212
+ bpf_parent_span_id = ev.parent_span_id.to_i
213
+ rec[:bpf_parent_span_id] = bpf_parent_span_id unless bpf_parent_span_id.zero?
214
+ end
215
+
216
+ def thread_parent_span_id(rec)
217
+ parent = rec[:parent]
218
+ return parent if parent
219
+
220
+ mapped = @bpf_thread_span_ids[rec[:bpf_parent_span_id]]
221
+ mapped && mapped != rec[:span_id] ? mapped : @session_span_id
222
+ end
223
+
224
+ def emit_method_span(rec, end_k)
225
+ attrs = [
226
+ Vivarium::OtelExporter.int_attr("thread.id", rec[:tid]),
227
+ Vivarium::OtelExporter.int_attr("process.pid", rec[:pid])
228
+ ]
229
+ attrs << Vivarium::OtelExporter.str_attr("code.filepath", rec[:file]) if rec[:file] && !rec[:file].empty?
230
+ attrs << Vivarium::OtelExporter.int_attr("code.lineno", rec[:lineno]) if rec[:lineno] && rec[:lineno] > 0
231
+
232
+ @exporter.enqueue(
233
+ Vivarium::OtelExporter.span_hash(
234
+ trace_hi: rec[:trace_hi], trace_lo: rec[:trace_lo], span_id: rec[:span_id],
235
+ parent: rec[:parent], name: rec[:name], start_k: rec[:start_k], stop_k: end_k,
236
+ to_unix: @to_unix, attributes: attrs, events: rec[:events]
237
+ )
238
+ )
239
+ end
240
+
241
+ def emit_thread_span(rec, stop_ktime)
242
+ return if rec[:emitted]
243
+
244
+ start_k = rec[:root] ? @session_start_ktime : rec[:min_k]
245
+ stop_k = rec[:root] ? stop_ktime : [rec[:max_k], stop_ktime].compact.max
246
+ name = rec[:comm].empty? ? "tid=#{rec[:tid]}" : rec[:comm]
247
+ attrs = [
248
+ Vivarium::OtelExporter.int_attr("thread.id", rec[:tid]),
249
+ Vivarium::OtelExporter.int_attr("process.pid", rec[:pid])
250
+ ]
251
+ attrs << Vivarium::OtelExporter.str_attr("process.command", rec[:comm]) unless rec[:comm].empty?
252
+ @exporter.enqueue(
253
+ Vivarium::OtelExporter.span_hash(
254
+ trace_hi: @trace_hi, trace_lo: @trace_lo, span_id: rec[:span_id], parent: thread_parent_span_id(rec),
255
+ name: name, start_k: start_k, stop_k: stop_k,
256
+ to_unix: @to_unix, attributes: attrs, events: rec[:events]
257
+ )
258
+ )
259
+ rec[:emitted] = true
260
+ end
261
+
262
+ def emit_session_span(stop_ktime)
263
+ @exporter.enqueue(
264
+ Vivarium::OtelExporter.span_hash(
265
+ trace_hi: @trace_hi, trace_lo: @trace_lo, span_id: @session_span_id, parent: 0,
266
+ name: "vivarium session", start_k: @session_start_ktime, stop_k: stop_ktime,
267
+ to_unix: @to_unix,
268
+ attributes: [
269
+ Vivarium::OtelExporter.int_attr("process.pid", @observer_pid),
270
+ Vivarium::OtelExporter.int_attr("thread.id", @main_tid)
271
+ ],
272
+ events: []
273
+ )
274
+ )
275
+ end
276
+ end
277
+ end
@@ -4,7 +4,9 @@ require "json"
4
4
 
5
5
  module Vivarium
6
6
  RawEvent = Struct.new(
7
- :ktime_ns, :pid, :tid, :event_name, :payload, :dropped_since_last,
7
+ :ktime_ns, :pid, :tid, :uid, :gid,
8
+ :trace_hi, :trace_lo, :span_id, :parent_span_id, :comm,
9
+ :event_name, :payload, :dropped_since_last,
8
10
  keyword_init: true
9
11
  )
10
12
 
@@ -16,12 +18,14 @@ module Vivarium
16
18
  class FormatError < StandardError; end
17
19
 
18
20
  FORMAT = "vivarium-raw"
19
- VERSION = 1
20
- PACK_FMT = "Q<L<L<a16a256Q<" # struct event_t (296B)
21
+ VERSION = 2
22
+ PACK_FMT = "Q<L<L<L<L<Q<Q<Q<Q<a16a16a256Q<" # struct event_t (352B)
21
23
 
22
24
  def self.pack_record(ev)
23
25
  [
24
- ev.ktime_ns, ev.pid, ev.tid,
26
+ ev.ktime_ns, ev.pid, ev.tid, ev.uid.to_i, ev.gid.to_i,
27
+ ev.trace_hi.to_i, ev.trace_lo.to_i, ev.span_id.to_i, ev.parent_span_id.to_i,
28
+ ev.comm.to_s.b.ljust(EVENT_COMM_SIZE, "\x00")[0, EVENT_COMM_SIZE],
25
29
  ev.event_name.to_s.b.ljust(EVENT_NAME_SIZE, "\x00")[0, EVENT_NAME_SIZE],
26
30
  ev.payload.to_s.b.ljust(EVENT_PAYLOAD_SIZE, "\x00")[0, EVENT_PAYLOAD_SIZE],
27
31
  ev.dropped_since_last
@@ -33,12 +37,19 @@ module Vivarium
33
37
  bytes = bytes.ljust(EVENT_STRUCT_SIZE, "\x00") if bytes.bytesize < EVENT_STRUCT_SIZE
34
38
 
35
39
  RawEvent.new(
36
- ktime_ns: bytes[EVENT_TS_OFFSET, EVENT_TS_SIZE].unpack1("Q<"),
37
- pid: bytes[EVENT_PID_OFFSET, 4].unpack1("L<"),
38
- tid: bytes[EVENT_TID_OFFSET, 4].unpack1("L<"),
40
+ ktime_ns: bytes[EVENT_TS_OFFSET, EVENT_TS_SIZE].unpack1("Q<"),
41
+ pid: bytes[EVENT_PID_OFFSET, 4].unpack1("L<"),
42
+ tid: bytes[EVENT_TID_OFFSET, 4].unpack1("L<"),
43
+ uid: bytes[EVENT_UID_OFFSET, 4].unpack1("L<"),
44
+ gid: bytes[EVENT_GID_OFFSET, 4].unpack1("L<"),
45
+ trace_hi: bytes[EVENT_TRACE_HI_OFFSET, 8].unpack1("Q<"),
46
+ trace_lo: bytes[EVENT_TRACE_LO_OFFSET, 8].unpack1("Q<"),
47
+ span_id: bytes[EVENT_SPAN_OFFSET, 8].unpack1("Q<"),
48
+ parent_span_id: bytes[EVENT_PARENT_SPAN_OFFSET, 8].unpack1("Q<"),
49
+ comm: Vivarium.c_string(bytes[EVENT_COMM_OFFSET, EVENT_COMM_SIZE]),
39
50
  event_name: Vivarium.c_string(bytes[EVENT_NAME_OFFSET, EVENT_NAME_SIZE]),
40
- payload: bytes[EVENT_PAYLOAD_OFFSET, EVENT_PAYLOAD_SIZE].to_s.b,
41
- dropped_since_last: bytes[EVENT_DROPPED_OFFSET, 8].unpack1("Q<")
51
+ payload: bytes[EVENT_PAYLOAD_OFFSET, EVENT_PAYLOAD_SIZE].to_s.b,
52
+ dropped_since_last: bytes[EVENT_DROPPED_OFFSET, 8].unpack1("Q<")
42
53
  )
43
54
  end
44
55
 
@@ -70,6 +81,13 @@ module Vivarium
70
81
  raise FormatError, "format=#{meta[:format].inspect} (expected #{FORMAT.inspect})"
71
82
  end
72
83
 
84
+ size = meta[:event_struct_size]
85
+ if size && size != EVENT_STRUCT_SIZE
86
+ raise FormatError,
87
+ "event_struct_size=#{size} (expected #{EVENT_STRUCT_SIZE}); " \
88
+ "incompatible capture (likely an older vivarium-raw v1 file)"
89
+ end
90
+
73
91
  events = []
74
92
  while (rec = io.read(EVENT_STRUCT_SIZE))
75
93
  break if rec.bytesize < EVENT_STRUCT_SIZE
@@ -24,6 +24,11 @@ module Vivarium
24
24
  UPROBE_EVENT_NAMES = %w[ssl_write].to_set.freeze
25
25
  DL_EVENT_NAMES = %w[dlopen mmap_exec].to_set.freeze
26
26
 
27
+ # Events whose traced value repeats heavily (same file/lib/env key opened over
28
+ # and over). With dedup_values on, each (event_name, value) pair is rendered
29
+ # only on its first occurrence in the session; later repeats are suppressed.
30
+ DEDUP_EVENT_NAMES = %w[path_open mmap_exec dlopen env_caccess].to_set.freeze
31
+
27
32
  SYNTHETIC_SPAN_NAME = "<no-span>"
28
33
  UNRESOLVED_METHOD_PREFIX = "<method_id="
29
34
 
@@ -67,6 +72,7 @@ module Vivarium
67
72
 
68
73
  @pid_comm = { observer_pid => "ruby" }
69
74
  @pid_parent = {}
75
+ @dedup_seen = Set.new
70
76
  end
71
77
 
72
78
  def render
@@ -366,6 +372,8 @@ module Vivarium
366
372
  next unless event_visible?(ev, span)
367
373
  end
368
374
 
375
+ next if dedup_suppressed?(ev, target_text)
376
+
369
377
  if ev.event_name == FORK_EVENT_NAME
370
378
  child_pid = read_proc_fork_child_pid(ev.payload)
371
379
  child_node = ProcNode.new(
@@ -505,6 +513,17 @@ module Vivarium
505
513
  @display_filter.allow_span_name?(span_display_name(span))
506
514
  end
507
515
 
516
+ # True when dedup_values is on and this (event_name, value) pair was already
517
+ # rendered earlier in the session. Only visible events reach this point, so a
518
+ # suppressed-by-filter event never consumes the "first occurrence" slot.
519
+ def dedup_suppressed?(ev, target_text)
520
+ return false unless @display_filter.dedup_values
521
+ return false unless DEDUP_EVENT_NAMES.include?(ev.event_name)
522
+
523
+ value = target_text || render_target(ev)
524
+ !@dedup_seen.add?([ev.event_name, value])
525
+ end
526
+
508
527
  def event_visible?(ev, span, target_text = nil)
509
528
  @display_filter.allow_event?(
510
529
  event_name: ev.event_name,
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Vivarium
4
- VERSION = "0.5.2"
4
+ VERSION = "0.6.0"
5
5
  end
data/lib/vivarium.rb CHANGED
@@ -6,6 +6,7 @@ require "net/http"
6
6
  require "optparse"
7
7
  require "pathname"
8
8
  require "rbbcc"
9
+ require "securerandom"
9
10
  require "set"
10
11
  require "socket"
11
12
  if defined?(Ruby) && defined?(Ruby::Box) && Ruby::Box.enabled?
@@ -31,15 +32,23 @@ module Vivarium
31
32
  EVENT_NAME_SIZE = 16
32
33
  EVENT_PAYLOAD_SIZE = 256
33
34
  EVENT_TS_SIZE = 8
35
+ EVENT_COMM_SIZE = 16
34
36
  PROC_EXEC_SLOT_SIZE = 64
35
37
  PROC_EXEC_SLOT_COUNT = 4
36
- EVENT_STRUCT_SIZE = 296
38
+ EVENT_STRUCT_SIZE = 352
37
39
  EVENT_TS_OFFSET = 0
38
40
  EVENT_PID_OFFSET = 8
39
41
  EVENT_TID_OFFSET = 12
40
- EVENT_NAME_OFFSET = 16
41
- EVENT_PAYLOAD_OFFSET = 32
42
- EVENT_DROPPED_OFFSET = 288
42
+ EVENT_UID_OFFSET = 16
43
+ EVENT_GID_OFFSET = 20
44
+ EVENT_TRACE_HI_OFFSET = 24
45
+ EVENT_TRACE_LO_OFFSET = 32
46
+ EVENT_SPAN_OFFSET = 40
47
+ EVENT_PARENT_SPAN_OFFSET = 48
48
+ EVENT_COMM_OFFSET = 56
49
+ EVENT_NAME_OFFSET = 72
50
+ EVENT_PAYLOAD_OFFSET = 88
51
+ EVENT_DROPPED_OFFSET = 344
43
52
  EVENTS_RINGBUF_PAGES = 256
44
53
 
45
54
  SPAN_METHOD_SIZE = 128
@@ -236,6 +245,36 @@ module Vivarium
236
245
  EVENT_SEVERITY_HIGH.include?(event_name.to_s) ? "high" : "medium"
237
246
  end
238
247
 
248
+ U64_MASK = 0xFFFFFFFFFFFFFFFF
249
+
250
+ # Deterministic 64-bit span id for a method-call span, derived by folding the
251
+ # trace id, tid, and span-start ktime through splitmix64. Non-zero, unique
252
+ # within a trace, and stable across re-runs. Shared by the report --dump-otel
253
+ # view and the OTLP exporter so both assign identical method span ids.
254
+ def self.synth_span_id(trace_hi, trace_lo, tid, start_ktime)
255
+ seed = mix64(trace_hi)
256
+ seed = mix64(seed ^ (trace_lo & U64_MASK))
257
+ seed = mix64(seed ^ (tid.to_i & U64_MASK))
258
+ seed = mix64(seed ^ (start_ktime.to_i & U64_MASK))
259
+ seed.zero? ? 1 : seed
260
+ end
261
+
262
+ def self.mix64(value)
263
+ x = (value.to_i + 0x9E3779B97F4A7C15) & U64_MASK
264
+ x = ((x ^ (x >> 30)) * 0xBF58476D1CE4E5B9) & U64_MASK
265
+ x = ((x ^ (x >> 27)) * 0x94D049BB133111EB) & U64_MASK
266
+ (x ^ (x >> 31)) & U64_MASK
267
+ end
268
+
269
+ # Deterministic 128-bit trace id (returned as [hi, lo], both non-zero) for a
270
+ # top-level method span in the streaming exporter, where each top span starts
271
+ # its own OTel trace. Folds the BPF trace id, tid, and span-start ktime.
272
+ def self.synth_trace_id(seed_hi, seed_lo, tid, start_ktime)
273
+ hi = synth_span_id(seed_hi, seed_lo, tid, start_ktime)
274
+ lo = synth_span_id(seed_lo ^ U64_MASK, seed_hi, tid, start_ktime)
275
+ [hi, lo]
276
+ end
277
+
239
278
  def self.decode_dns_qname(raw_payload)
240
279
  bytes = raw_payload.to_s.b.bytes
241
280
  labels = []
@@ -711,21 +750,47 @@ module Vivarium
711
750
  u16 transport_header;
712
751
  };
713
752
 
753
+ // trace_id is a 128-bit value carried as two u64 halves (hi/lo). They are
754
+ // kept as flat scalar fields (not a nested struct) because rbbcc/Fiddle's
755
+ // CParser cannot decode nested-struct members of a BPF map value type.
714
756
  struct event_t {
715
757
  u64 ktime_ns;
716
758
  u32 pid;
717
759
  u32 tid;
760
+ u32 uid;
761
+ u32 gid;
762
+ u64 trace_id_hi;
763
+ u64 trace_id_lo;
764
+ u64 span_id;
765
+ u64 parent_span_id;
766
+ char comm[#{EVENT_COMM_SIZE}];
718
767
  char event_name[16];
719
768
  char payload[#{EVENT_PAYLOAD_SIZE}];
720
769
  u64 dropped_since_last;
721
770
  };
722
771
 
772
+ // Per-thread OpenTelemetry context. trace_id (hi/lo) is issued by userspace
773
+ // at target registration and inherited by spawned children; span_id is
774
+ // re-issued per tid (root in userspace, children at fork).
775
+ struct otel_ctx_t {
776
+ u64 trace_id_hi;
777
+ u64 trace_id_lo;
778
+ u64 span_id;
779
+ u64 parent_span_id;
780
+ };
781
+
723
782
  BPF_HASH(config_root_targets, u32, u8, 1024);
724
783
  BPF_HASH(config_spawned_targets, u32, u8, 8192);
725
784
  BPF_HASH(dns_connected_tids, u32, u8, 8192);
785
+ BPF_HASH(otel_ctx, u32, struct otel_ctx_t, 8192);
726
786
  BPF_RINGBUF_OUTPUT(events, #{EVENTS_RINGBUF_PAGES});
727
787
  BPF_ARRAY(drop_counter, u64, 1);
728
788
 
789
+ static __always_inline u64 rand_span_id()
790
+ {
791
+ return ((u64)bpf_get_prandom_u32() << 32) | (u64)bpf_get_prandom_u32();
792
+ }
793
+
729
794
  static __always_inline int target_enabled(u32 pid, u32 tid)
730
795
  {
731
796
  u8 *enabled_root = config_root_targets.lookup(&pid);
@@ -783,6 +848,20 @@ module Vivarium
783
848
  ev->tid = (u32)bpf_get_current_pid_tgid();
784
849
  ev->dropped_since_last = 0;
785
850
 
851
+ u64 uid_gid = bpf_get_current_uid_gid();
852
+ ev->uid = (u32)uid_gid;
853
+ ev->gid = (u32)(uid_gid >> 32);
854
+ bpf_get_current_comm(&ev->comm, sizeof(ev->comm));
855
+
856
+ u32 ctid = (u32)bpf_get_current_pid_tgid();
857
+ struct otel_ctx_t *octx = otel_ctx.lookup(&ctid);
858
+ if (octx) {
859
+ ev->trace_id_hi = octx->trace_id_hi;
860
+ ev->trace_id_lo = octx->trace_id_lo;
861
+ ev->span_id = octx->span_id;
862
+ ev->parent_span_id = octx->parent_span_id;
863
+ }
864
+
786
865
  cnt = drop_counter.lookup(&key);
787
866
  if (cnt && *cnt > 0) {
788
867
  ev->dropped_since_last = __sync_lock_test_and_set(cnt, 0);
@@ -895,11 +974,26 @@ module Vivarium
895
974
 
896
975
  if (is_target) {
897
976
  u64 pid_tgid = bpf_get_current_pid_tgid();
977
+
978
+ // Re-issue a fresh span_id for the child, inheriting the parent's
979
+ // trace_id and linking the child's parent_span_id to the parent span.
980
+ u32 parent_tid = (u32)pid_tgid;
981
+ struct otel_ctx_t *pctx = otel_ctx.lookup(&parent_tid);
982
+ struct otel_ctx_t cctx = {};
983
+ u64 child_span = rand_span_id();
984
+ if (pctx) {
985
+ cctx.trace_id_hi = pctx->trace_id_hi;
986
+ cctx.trace_id_lo = pctx->trace_id_lo;
987
+ cctx.parent_span_id = pctx->span_id;
988
+ }
989
+ cctx.span_id = child_span;
990
+ otel_ctx.update(&child, &cctx);
991
+
898
992
  struct event_t ev = {};
899
993
  ev.pid = pid_tgid >> 32;
900
994
  __builtin_memcpy(ev.event_name, "proc_fork", 10);
901
995
  __builtin_memcpy(&ev.payload[0], &child, sizeof(child));
902
- __builtin_memcpy(&ev.payload[4], &child, sizeof(child));
996
+ __builtin_memcpy(&ev.payload[8], &child_span, sizeof(child_span));
903
997
  submit_event(&ev);
904
998
  }
905
999
 
@@ -908,9 +1002,18 @@ module Vivarium
908
1002
 
909
1003
  TRACEPOINT_PROBE(sched, sched_process_exit)
910
1004
  {
911
- u32 tid = (u32)bpf_get_current_pid_tgid();
1005
+ u64 pid_tgid = bpf_get_current_pid_tgid();
1006
+ u32 pid = pid_tgid >> 32;
1007
+ u32 tid = (u32)pid_tgid;
1008
+ if (target_enabled(pid, tid)) {
1009
+ struct event_t ev = {};
1010
+ ev.pid = pid;
1011
+ __builtin_memcpy(ev.event_name, "proc_exit", 10);
1012
+ submit_event(&ev);
1013
+ }
912
1014
  config_spawned_targets.delete(&tid);
913
1015
  dns_connected_tids.delete(&tid);
1016
+ otel_ctx.delete(&tid);
914
1017
  return 0;
915
1018
  }
916
1019
 
@@ -1779,16 +1882,18 @@ module Vivarium
1779
1882
 
1780
1883
  config_root_targets = bpf["config_root_targets"]
1781
1884
  config_spawned_targets = bpf["config_spawned_targets"]
1885
+ otel_ctx = bpf["otel_ctx"]
1782
1886
  events_ringbuf = bpf["events"]
1783
1887
 
1784
1888
  config_spawned_targets.clear
1889
+ otel_ctx.clear
1785
1890
 
1786
1891
  pin_map(config_root_targets, File.join(@pin_dir, "config_root_targets"))
1787
1892
  pin_map(config_spawned_targets, File.join(@pin_dir, "config_spawned_targets"))
1788
1893
  pin_map(events_ringbuf, File.join(@pin_dir, "events"))
1789
1894
 
1790
1895
  event_log = EventLog.new
1791
- registry = Registry.new(config_root_targets, config_spawned_targets)
1896
+ registry = Registry.new(config_root_targets, config_spawned_targets, otel_ctx)
1792
1897
  start_ringbuf_poller(bpf, events_ringbuf, event_log)
1793
1898
 
1794
1899
  @api_server = ApiServer.new(
@@ -2180,15 +2285,19 @@ module Vivarium
2180
2285
  end
2181
2286
  end
2182
2287
 
2183
- def self.observe(socket_path: self.socket_path, dest: $stdout, filter: nil, save_raw: nil, &block)
2288
+ def self.observe(socket_path: self.socket_path, dest: $stdout, filter: nil, save_raw: nil,
2289
+ otel_out: nil, otel_endpoint: nil, &block)
2184
2290
  if block_given?
2185
- return scoped_observe(socket_path: socket_path, dest: dest, filter: filter, save_raw: save_raw, &block)
2291
+ return scoped_observe(socket_path: socket_path, dest: dest, filter: filter,
2292
+ save_raw: save_raw, otel_out: otel_out, otel_endpoint: otel_endpoint, &block)
2186
2293
  end
2187
2294
 
2188
- top_observe(socket_path: socket_path, dest: dest, filter: filter, save_raw: save_raw)
2295
+ top_observe(socket_path: socket_path, dest: dest, filter: filter,
2296
+ save_raw: save_raw, otel_out: otel_out, otel_endpoint: otel_endpoint)
2189
2297
  end
2190
2298
 
2191
- def self.top_observe(socket_path: self.socket_path, dest: $stdout, filter: nil, save_raw: nil)
2299
+ def self.top_observe(socket_path: self.socket_path, dest: $stdout, filter: nil, save_raw: nil,
2300
+ otel_out: nil, otel_endpoint: nil)
2192
2301
  client = DaemonClient.new(socket_path: socket_path)
2193
2302
  pid = Process.pid
2194
2303
  main_tid = gettid
@@ -2199,7 +2308,9 @@ module Vivarium
2199
2308
  main_tid: main_tid,
2200
2309
  filter: filter,
2201
2310
  dest: dest,
2202
- save_raw: save_raw
2311
+ save_raw: save_raw,
2312
+ otel_out: otel_out,
2313
+ otel_endpoint: otel_endpoint
2203
2314
  )
2204
2315
  correlator.start
2205
2316
  client.register(pid)
@@ -2214,7 +2325,8 @@ module Vivarium
2214
2325
  session
2215
2326
  end
2216
2327
 
2217
- def self.scoped_observe(socket_path: self.socket_path, dest:, filter: nil, save_raw: nil)
2328
+ def self.scoped_observe(socket_path: self.socket_path, dest:, filter: nil, save_raw: nil,
2329
+ otel_out: nil, otel_endpoint: nil)
2218
2330
  client = DaemonClient.new(socket_path: socket_path)
2219
2331
  pid = Process.pid
2220
2332
  main_tid = gettid
@@ -2225,7 +2337,9 @@ module Vivarium
2225
2337
  main_tid: main_tid,
2226
2338
  filter: filter,
2227
2339
  dest: dest,
2228
- save_raw: save_raw
2340
+ save_raw: save_raw,
2341
+ otel_out: otel_out,
2342
+ otel_endpoint: otel_endpoint
2229
2343
  )
2230
2344
  correlator.start
2231
2345
  client.register(pid)
@@ -2360,6 +2474,8 @@ end
2360
2474
  require_relative "vivarium/daemon_client"
2361
2475
  require_relative "vivarium/api_server"
2362
2476
  require_relative "vivarium/raw_store"
2477
+ require_relative "vivarium/otel_exporter"
2478
+ require_relative "vivarium/otel_stream"
2363
2479
  require_relative "vivarium/correlator"
2364
2480
  require_relative "vivarium/display_filter"
2365
2481
  require_relative "vivarium/tree_renderer"