vivarium 0.1.2 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/lib/vivarium.rb CHANGED
@@ -7,7 +7,7 @@ require "pathname"
7
7
  require "rbbcc"
8
8
  require "socket"
9
9
  require_relative "vivarium/version"
10
- require_relative "vivarium/logger"
10
+ require_relative "vivarium/cli"
11
11
 
12
12
  module Vivarium
13
13
  class Error < StandardError; end
@@ -16,18 +16,85 @@ module Vivarium
16
16
  CONFIG_ROOT_TARGETS_PIN = File.join(PIN_DIR, "config_root_targets")
17
17
  CONFIG_SPAWNED_TARGETS_PIN = File.join(PIN_DIR, "config_spawned_targets")
18
18
  CONFIG_TARGETS_PIN = CONFIG_ROOT_TARGETS_PIN
19
- EVENT_INVOKED_PIN = File.join(PIN_DIR, "event_invoked")
20
- EVENT_WRITE_POS_PIN = File.join(PIN_DIR, "event_write_pos")
19
+ EVENTS_PIN = File.join(PIN_DIR, "events")
21
20
 
22
21
  EVENT_NAME_SIZE = 16
23
22
  EVENT_PAYLOAD_SIZE = 256
24
23
  EVENT_TS_SIZE = 8
24
+ PROC_EXEC_SLOT_SIZE = 64
25
+ PROC_EXEC_SLOT_COUNT = 4
25
26
  EVENT_STRUCT_SIZE = 288
26
27
  EVENT_TS_OFFSET = 0
27
28
  EVENT_PID_OFFSET = 8
28
- EVENT_NAME_OFFSET = 12
29
- EVENT_PAYLOAD_OFFSET = 28
30
- EVENT_CAPACITY = 1024
29
+ EVENT_TID_OFFSET = 12
30
+ EVENT_NAME_OFFSET = 16
31
+ EVENT_PAYLOAD_OFFSET = 32
32
+ EVENTS_RINGBUF_PAGES = 256
33
+ SPAN_ALLOWCLASSES = [
34
+ Socket,
35
+ BasicSocket,
36
+ IPSocket,
37
+ TCPSocket,
38
+ UDPSocket,
39
+ UNIXSocket,
40
+ File,
41
+ Dir,
42
+ Signal,
43
+ Process,
44
+ Process::UID,
45
+ Process::GID,
46
+ ]
47
+ SPAN_ALLOWLIST = [
48
+ "Kernel#system",
49
+ "Kernel#require",
50
+ "Kernel#require_relative",
51
+ "Kernel#load",
52
+ "Kernel#eval",
53
+ "Object#instance_eval",
54
+ "Object#instance_exec",
55
+ ].freeze
56
+ EVENT_SEVERITY_HIGH = %w[
57
+ capable_check bprm_creds setid_change task_kill
58
+ ptrace_check sb_mount kernel_read_file
59
+ ].freeze
60
+
61
+ CAPABILITY_NAMES = {
62
+ 0 => "CAP_CHOWN",
63
+ 1 => "CAP_DAC_OVERRIDE",
64
+ 2 => "CAP_DAC_READ_SEARCH",
65
+ 3 => "CAP_FOWNER",
66
+ 4 => "CAP_FSETID",
67
+ 5 => "CAP_KILL",
68
+ 6 => "CAP_SETGID",
69
+ 7 => "CAP_SETUID",
70
+ 8 => "CAP_SETPCAP",
71
+ 9 => "CAP_LINUX_IMMUTABLE",
72
+ 10 => "CAP_NET_BIND_SERVICE",
73
+ 12 => "CAP_NET_ADMIN",
74
+ 13 => "CAP_NET_RAW",
75
+ 16 => "CAP_SYS_MODULE",
76
+ 17 => "CAP_SYS_RAWIO",
77
+ 18 => "CAP_SYS_CHROOT",
78
+ 19 => "CAP_SYS_PTRACE",
79
+ 21 => "CAP_SYS_ADMIN",
80
+ 22 => "CAP_SYS_BOOT",
81
+ 23 => "CAP_SYS_NICE",
82
+ 24 => "CAP_SYS_RESOURCE",
83
+ 25 => "CAP_SYS_TIME",
84
+ 27 => "CAP_MKNOD",
85
+ 29 => "CAP_AUDIT_WRITE",
86
+ 37 => "CAP_AUDIT_READ",
87
+ 38 => "CAP_PERFMON",
88
+ 39 => "CAP_BPF",
89
+ 40 => "CAP_CHECKPOINT_RESTORE"
90
+ }.freeze
91
+
92
+ SETID_FLAG_NAMES = {
93
+ 0x01 => "LSM_SETID_ID",
94
+ 0x02 => "LSM_SETID_RE",
95
+ 0x04 => "LSM_SETID_RES",
96
+ 0x08 => "LSM_SETID_FS"
97
+ }.freeze
31
98
 
32
99
  @bpf_pin_dir = PIN_DIR
33
100
 
@@ -39,35 +106,16 @@ module Vivarium
39
106
  end
40
107
  end
41
108
 
42
- Event = Struct.new(:ktime_ns, :pid, :event_name, :payload, keyword_init: true) do
43
- def empty?
44
- ktime_ns.to_i.zero? && pid.to_i.zero? && event_name.to_s.empty? && payload.to_s.empty?
45
- end
46
-
47
- def self.from_binary(raw)
48
- bytes = raw.to_s.b
49
- bytes = bytes.ljust(EVENT_STRUCT_SIZE, "\x00")
50
-
51
- ktime_ns = bytes[EVENT_TS_OFFSET, EVENT_TS_SIZE].unpack1("Q<")
52
- pid = bytes[EVENT_PID_OFFSET, 4].unpack1("L<")
53
- event_name = c_string(bytes[EVENT_NAME_OFFSET, EVENT_NAME_SIZE])
54
- raw_payload = bytes[EVENT_PAYLOAD_OFFSET, EVENT_PAYLOAD_SIZE]
55
- payload = if %w[dns_req sock_connect odd_socket].include?(event_name)
56
- raw_payload
57
- else
58
- c_string(raw_payload)
59
- end
60
-
61
- new(ktime_ns: ktime_ns, pid: pid, event_name: event_name, payload: payload)
62
- end
109
+ def self.c_string(bytes)
110
+ str = bytes.to_s.b
111
+ nul = str.index("\x00")
112
+ return str if nul.nil?
63
113
 
64
- def self.c_string(bytes)
65
- str = bytes.to_s.b
66
- nul = str.index("\x00")
67
- return str if nul.nil?
114
+ str[0, nul]
115
+ end
68
116
 
69
- str[0, nul]
70
- end
117
+ def self.event_severity(event_name)
118
+ EVENT_SEVERITY_HIGH.include?(event_name.to_s) ? "high" : "medium"
71
119
  end
72
120
 
73
121
  def self.decode_dns_qname(raw_payload)
@@ -143,6 +191,182 @@ module Vivarium
143
191
  decode_odd_socket_payload(raw_payload)
144
192
  end
145
193
 
194
+ def self.decode_file_symlink_payload(raw_payload)
195
+ bytes = raw_payload.to_s.b
196
+ target = c_string(bytes[0, 128])
197
+ link_name = c_string(bytes[128, 128])
198
+ "target=#{target.inspect} link_name=#{link_name.inspect}"
199
+ end
200
+
201
+ def self.decode_file_hardlink_payload(raw_payload)
202
+ bytes = raw_payload.to_s.b
203
+ old_path = c_string(bytes[0, 128])
204
+ new_name = c_string(bytes[128, 128])
205
+ "old_path=#{old_path.inspect} new_name=#{new_name.inspect}"
206
+ end
207
+
208
+ def self.decode_file_rename_payload(raw_payload)
209
+ bytes = raw_payload.to_s.b
210
+ old_name = c_string(bytes[0, 128])
211
+ new_name = c_string(bytes[128, 128])
212
+ "old_name=#{old_name.inspect} new_name=#{new_name.inspect}"
213
+ end
214
+
215
+ def self.decode_file_chmod_payload(raw_payload)
216
+ bytes = raw_payload.to_s.b
217
+ return "" if bytes.bytesize < 2
218
+
219
+ mode = bytes[0, 2].unpack1("S<")
220
+ path = c_string(bytes[2, 254])
221
+ "mode=#{format('0o%o', mode)} path=#{path.inspect}"
222
+ end
223
+
224
+ def self.decode_file_getdents_payload(raw_payload)
225
+ bytes = raw_payload.to_s.b
226
+ return "" if bytes.bytesize < 8
227
+
228
+ fd = bytes[0, 4].unpack1("L<")
229
+ count = bytes[4, 4].unpack1("L<")
230
+ "fd=#{fd} count=#{count}"
231
+ end
232
+
233
+ def self.decode_proc_exec_payload(raw_payload)
234
+ bytes = raw_payload.to_s.b
235
+ slots = PROC_EXEC_SLOT_COUNT.times.map do |index|
236
+ offset = index * PROC_EXEC_SLOT_SIZE
237
+ c_string(bytes[offset, PROC_EXEC_SLOT_SIZE])
238
+ end
239
+ slots.reject!(&:empty?)
240
+ return "" if slots.empty?
241
+
242
+ filename = slots.shift
243
+ argv = slots
244
+ "filename=#{filename.inspect} argv=[#{argv.map(&:inspect).join(', ')}]"
245
+ end
246
+
247
+ def self.decode_ptrace_check_payload(raw_payload)
248
+ bytes = raw_payload.to_s.b
249
+ return "" if bytes.bytesize < 4
250
+
251
+ mode = bytes[0, 4].unpack1("L<")
252
+ "mode=0x#{mode.to_s(16)}"
253
+ end
254
+
255
+ def self.decode_sb_mount_payload(raw_payload)
256
+ bytes = raw_payload.to_s.b
257
+ return "" if bytes.bytesize < 248
258
+
259
+ flags = bytes[0, 8].unpack1("Q<")
260
+ dev_name = c_string(bytes[8, 120])
261
+ fs_type = c_string(bytes[128, 120])
262
+ "flags=0x#{flags.to_s(16)} dev_name=#{dev_name.inspect} fs_type=#{fs_type.inspect}"
263
+ end
264
+
265
+ def self.decode_kernel_read_file_payload(raw_payload)
266
+ bytes = raw_payload.to_s.b
267
+ return "" if bytes.bytesize < 8
268
+
269
+ id = bytes[0, 4].unpack1("L<")
270
+ contents = bytes[4, 4].unpack1("L<")
271
+ "id=#{id} contents=#{contents}"
272
+ end
273
+
274
+ def self.decode_task_kill_payload(raw_payload)
275
+ bytes = raw_payload.to_s.b
276
+ return "" if bytes.bytesize < 4
277
+
278
+ sig = bytes[0, 4].unpack1("l<")
279
+ signame = begin
280
+ Signal.signame(sig)
281
+ rescue ArgumentError
282
+ nil
283
+ end
284
+
285
+ signame ? "sig=#{sig} signame=#{signame}" : "sig=#{sig}"
286
+ end
287
+
288
+ def self.decode_setid_change_payload(raw_payload)
289
+ bytes = raw_payload.to_s.b
290
+ return "" if bytes.bytesize < 4
291
+
292
+ flags = bytes[0, 4].unpack1("L<")
293
+ names = SETID_FLAG_NAMES.each_with_object([]) do |(bit, name), acc|
294
+ acc << name if (flags & bit) != 0
295
+ end
296
+ names << "UNKNOWN" if names.empty?
297
+ "flags=0x#{flags.to_s(16)} kinds=[#{names.join(', ')}]"
298
+ end
299
+
300
+ def self.decode_capable_check_payload(raw_payload)
301
+ bytes = raw_payload.to_s.b
302
+ return "" if bytes.bytesize < 8
303
+
304
+ cap = bytes[0, 4].unpack1("L<")
305
+ opts = bytes[4, 4].unpack1("L<")
306
+ cap_name = CAPABILITY_NAMES.fetch(cap, "UNKNOWN")
307
+ "cap=#{cap}(#{cap_name}) opts=0x#{opts.to_s(16)}"
308
+ end
309
+
310
+ def self.decode_bprm_creds_payload(raw_payload)
311
+ bytes = raw_payload.to_s.b
312
+ return "" if bytes.bytesize < 2
313
+
314
+ has_file = bytes.getbyte(0).to_i
315
+ path = c_string(bytes[1, EVENT_PAYLOAD_SIZE - 1])
316
+ "has_file=#{has_file} file=#{path.inspect}"
317
+ end
318
+
319
+ def self.decode_proc_fork_payload(raw_payload)
320
+ bytes = raw_payload.to_s.b
321
+ return "" if bytes.bytesize < 8
322
+
323
+ child_pid = bytes[0, 4].unpack1("L<")
324
+ child_tid = bytes[4, 4].unpack1("L<")
325
+ "child_pid=#{child_pid} child_tid=#{child_tid}"
326
+ end
327
+
328
+ def self.decode_span_payload(raw_payload)
329
+ bytes = raw_payload.to_s.b
330
+ return "" if bytes.bytesize < 8
331
+
332
+ method_id = bytes[0, 8].unpack1("q<")
333
+ result = format("method_id=0x%016X", method_id & 0xFFFF_FFFF_FFFF_FFFF)
334
+
335
+ if bytes.bytesize >= 24
336
+ file_id = bytes[8, 8].unpack1("q<")
337
+ lineno = bytes[16, 8].unpack1("q<")
338
+ result += format(" file_id=0x%016X", file_id & 0xFFFF_FFFF_FFFF_FFFF) if file_id != -1
339
+ result += " lineno=#{lineno}" if lineno > 0
340
+ end
341
+
342
+ result
343
+ end
344
+
345
+ def self.decode_span_raise_payload(raw_payload)
346
+ bytes = raw_payload.to_s.b
347
+ return "" if bytes.bytesize < 8
348
+
349
+ error_id = bytes[0, 8].unpack1("q<")
350
+ result = format("error_id=0x%016X", error_id & 0xFFFF_FFFF_FFFF_FFFF)
351
+
352
+ if bytes.bytesize >= 16
353
+ message_id = bytes[8, 8].unpack1("q<")
354
+ result += format(" message_id=0x%016X", message_id & 0xFFFF_FFFF_FFFF_FFFF)
355
+ end
356
+
357
+ if bytes.bytesize >= 24
358
+ file_id = bytes[16, 8].unpack1("q<")
359
+ result += format(" file_id=0x%016X", file_id & 0xFFFF_FFFF_FFFF_FFFF) if file_id != -1
360
+ end
361
+
362
+ if bytes.bytesize >= 32
363
+ lineno = bytes[24, 8].unpack1("q<")
364
+ result += " lineno=#{lineno}" if lineno > 0
365
+ end
366
+
367
+ result
368
+ end
369
+
146
370
  def self.render_event_payload(event)
147
371
  case event.event_name
148
372
  when "dns_req"
@@ -154,11 +378,66 @@ module Vivarium
154
378
  when "odd_socket"
155
379
  decoded = decode_odd_socket_payload(event.payload)
156
380
  decoded.empty? ? event.payload.inspect : decoded
381
+ when "proc_exec"
382
+ decoded = decode_proc_exec_payload(event.payload)
383
+ decoded.empty? ? event.payload.inspect : decoded
384
+ when "ptrace_check"
385
+ decoded = decode_ptrace_check_payload(event.payload)
386
+ decoded.empty? ? event.payload.inspect : decoded
387
+ when "sb_mount"
388
+ decoded = decode_sb_mount_payload(event.payload)
389
+ decoded.empty? ? event.payload.inspect : decoded
390
+ when "kernel_read_file"
391
+ decoded = decode_kernel_read_file_payload(event.payload)
392
+ decoded.empty? ? event.payload.inspect : decoded
393
+ when "task_kill"
394
+ decoded = decode_task_kill_payload(event.payload)
395
+ decoded.empty? ? event.payload.inspect : decoded
396
+ when "setid_change"
397
+ decoded = decode_setid_change_payload(event.payload)
398
+ decoded.empty? ? event.payload.inspect : decoded
399
+ when "capable_check"
400
+ decoded = decode_capable_check_payload(event.payload)
401
+ decoded.empty? ? event.payload.inspect : decoded
402
+ when "bprm_creds"
403
+ decoded = decode_bprm_creds_payload(event.payload)
404
+ decoded.empty? ? event.payload.inspect : decoded
405
+ when "proc_fork"
406
+ decoded = decode_proc_fork_payload(event.payload)
407
+ decoded.empty? ? event.payload.inspect : decoded
408
+ when "span_start", "span_stop"
409
+ decoded = decode_span_payload(event.payload)
410
+ decoded.empty? ? event.payload.inspect : decoded
411
+ when "span_raise"
412
+ decoded = decode_span_raise_payload(event.payload)
413
+ decoded.empty? ? event.payload.inspect : decoded
414
+ when "file_symlink"
415
+ decoded = decode_file_symlink_payload(event.payload)
416
+ decoded.empty? ? event.payload.inspect : decoded
417
+ when "file_hardlink"
418
+ decoded = decode_file_hardlink_payload(event.payload)
419
+ decoded.empty? ? event.payload.inspect : decoded
420
+ when "file_rename"
421
+ decoded = decode_file_rename_payload(event.payload)
422
+ decoded.empty? ? event.payload.inspect : decoded
423
+ when "file_chmod"
424
+ decoded = decode_file_chmod_payload(event.payload)
425
+ decoded.empty? ? event.payload.inspect : decoded
426
+ when "file_getdents"
427
+ decoded = decode_file_getdents_payload(event.payload)
428
+ decoded.empty? ? event.payload.inspect : decoded
157
429
  else
158
- event.payload.inspect
430
+ strip_to_first_null(event.payload).inspect
159
431
  end
160
432
  end
161
433
 
434
+ def self.strip_to_first_null(bytes)
435
+ nul = bytes.index("\x00")
436
+ return bytes if nul.nil?
437
+
438
+ bytes[0, nul]
439
+ end
440
+
162
441
  class MapStore
163
442
  def initialize(pin_dir: Vivarium.bpf_pin_dir)
164
443
  @pin_dir = pin_dir
@@ -176,20 +455,6 @@ module Vivarium
176
455
  keysize: 4,
177
456
  leafsize: 1
178
457
  )
179
- @event_invoked = RbBCC::ArrayTable.from_pin(
180
- File.join(@pin_dir, "event_invoked"),
181
- "unsigned int",
182
- "char[#{EVENT_STRUCT_SIZE}]",
183
- keysize: 4,
184
- leafsize: EVENT_STRUCT_SIZE
185
- )
186
- @event_write_pos = RbBCC::ArrayTable.from_pin(
187
- File.join(@pin_dir, "event_write_pos"),
188
- "unsigned int",
189
- "unsigned int",
190
- keysize: 4,
191
- leafsize: 4
192
- )
193
458
  rescue StandardError => e
194
459
  raise Error, "failed to open pinned maps under #{@pin_dir}: #{e.class}: #{e.message}"
195
460
  end
@@ -204,31 +469,6 @@ module Vivarium
204
469
  rescue KeyError
205
470
  nil
206
471
  end
207
-
208
- def drain_events
209
- events = []
210
- EVENT_CAPACITY.times do |idx|
211
- ptr = @event_invoked[idx]
212
- next unless ptr
213
-
214
- event = Event.from_binary(ptr[0, EVENT_STRUCT_SIZE])
215
- next if event.empty?
216
-
217
- events << event
218
- @event_invoked[idx] = zeroed_event_ptr
219
- end
220
-
221
- @event_write_pos[0] = 0
222
- events
223
- end
224
-
225
- private
226
-
227
- def zeroed_event_ptr
228
- ptr = Fiddle::Pointer.malloc(EVENT_STRUCT_SIZE)
229
- ptr[0, EVENT_STRUCT_SIZE] = "\x00" * EVENT_STRUCT_SIZE
230
- ptr
231
- end
232
472
  end
233
473
 
234
474
  class Daemon
@@ -249,6 +489,11 @@ module Vivarium
249
489
  struct net;
250
490
  struct sock;
251
491
  struct sk_buff;
492
+ struct task_struct;
493
+ struct kernel_siginfo;
494
+ struct cred;
495
+ struct user_namespace;
496
+ struct linux_binprm;
252
497
 
253
498
  struct path {
254
499
  void *mnt;
@@ -259,6 +504,24 @@ module Vivarium
259
504
  struct path f_path;
260
505
  };
261
506
 
507
+ struct qstr {
508
+ union {
509
+ struct {
510
+ u64 hash_len;
511
+ };
512
+ struct {
513
+ u32 hash;
514
+ u32 len;
515
+ };
516
+ };
517
+ const unsigned char *name;
518
+ };
519
+
520
+ struct dentry_base {
521
+ char __pad[__VIVARIUM_DENTRY_D_NAME_OFFSET__];
522
+ struct qstr d_name;
523
+ };
524
+
262
525
  struct sockaddr_t {
263
526
  u16 sa_family;
264
527
  unsigned char sa_data[14];
@@ -316,6 +579,7 @@ module Vivarium
316
579
  struct event_t {
317
580
  u64 ktime_ns;
318
581
  u32 pid;
582
+ u32 tid;
319
583
  char event_name[16];
320
584
  char payload[#{EVENT_PAYLOAD_SIZE}];
321
585
  };
@@ -323,8 +587,7 @@ module Vivarium
323
587
  BPF_HASH(config_root_targets, u32, u8, 1024);
324
588
  BPF_HASH(config_spawned_targets, u32, u8, 8192);
325
589
  BPF_HASH(dns_connected_tids, u32, u8, 8192);
326
- BPF_ARRAY(event_invoked, struct event_t, #{EVENT_CAPACITY});
327
- BPF_ARRAY(event_write_pos, u32, 1);
590
+ BPF_RINGBUF_OUTPUT(events, #{EVENTS_RINGBUF_PAGES});
328
591
 
329
592
  static __always_inline int target_enabled(u32 pid, u32 tid)
330
593
  {
@@ -341,19 +604,41 @@ module Vivarium
341
604
  return 0;
342
605
  }
343
606
 
344
- static __always_inline void submit_event(struct event_t *ev)
607
+ static __always_inline int monitored_capability(int cap)
608
+ {
609
+ switch (cap) {
610
+ case 1: /* CAP_DAC_OVERRIDE */
611
+ case 2: /* CAP_DAC_READ_SEARCH */
612
+ case 6: /* CAP_SETGID */
613
+ case 7: /* CAP_SETUID */
614
+ case 12: /* CAP_NET_ADMIN */
615
+ case 16: /* CAP_SYS_MODULE */
616
+ case 17: /* CAP_SYS_RAWIO */
617
+ case 19: /* CAP_SYS_PTRACE */
618
+ case 21: /* CAP_SYS_ADMIN */
619
+ case 22: /* CAP_SYS_BOOT */
620
+ case 25: /* CAP_SYS_TIME */
621
+ case 38: /* CAP_PERFMON */
622
+ case 39: /* CAP_BPF */
623
+ case 40: /* CAP_CHECKPOINT_RESTORE */
624
+ return 1;
625
+ default:
626
+ return 0;
627
+ }
628
+ }
629
+
630
+ static __always_inline void submit_event(struct event_t *src)
345
631
  {
346
- u32 zero = 0;
347
- u32 *write_pos = event_write_pos.lookup(&zero);
348
- if (!write_pos) {
632
+ struct event_t *ev = events.ringbuf_reserve(sizeof(struct event_t));
633
+ if (!ev) {
349
634
  return;
350
635
  }
351
636
 
637
+ __builtin_memcpy(ev, src, sizeof(*ev));
352
638
  ev->ktime_ns = bpf_ktime_get_ns();
639
+ ev->tid = (u32)bpf_get_current_pid_tgid();
353
640
 
354
- u32 idx = *write_pos % #{EVENT_CAPACITY};
355
- __sync_fetch_and_add(write_pos, 1);
356
- event_invoked.update(&idx, ev);
641
+ events.ringbuf_submit(ev, 0);
357
642
  }
358
643
 
359
644
  static __always_inline int is_dns_destination(void *addr)
@@ -396,21 +681,56 @@ module Vivarium
396
681
  submit_event(&ev);
397
682
  }
398
683
 
684
+ static __always_inline int read_dentry_name(struct dentry *dentry, char *buffer, size_t max)
685
+ {
686
+ struct dentry_base d = {};
687
+ struct qstr qname = {};
688
+
689
+ if (!dentry || !buffer) {
690
+ return -1;
691
+ }
692
+
693
+ bpf_probe_read_kernel(&d, sizeof(d), (void *)dentry);
694
+ if (!d.d_name.name) {
695
+ return -1;
696
+ }
697
+
698
+ unsigned int len = d.d_name.len;
699
+ if (len > max) {
700
+ len = max;
701
+ }
702
+
703
+ bpf_probe_read_kernel_str(buffer, len + 1, (void *)d.d_name.name);
704
+ return len;
705
+ }
706
+
399
707
  TRACEPOINT_PROBE(sched, sched_process_fork)
400
708
  {
401
709
  u32 parent = args->parent_pid;
402
710
  u32 child = args->child_pid;
403
711
  u8 one = 1;
712
+ int is_target = 0;
404
713
 
405
714
  u8 *enabled_root = config_root_targets.lookup(&parent);
406
715
  if (enabled_root && *enabled_root == 1) {
716
+ is_target = 1;
407
717
  config_spawned_targets.update(&child, &one);
408
- return 0;
718
+ } else {
719
+ u8 *enabled_spawned = config_spawned_targets.lookup(&parent);
720
+ if (enabled_spawned && *enabled_spawned == 1) {
721
+ is_target = 1;
722
+ config_spawned_targets.update(&child, &one);
723
+ }
409
724
  }
410
725
 
411
- u8 *enabled_spawned = config_spawned_targets.lookup(&parent);
412
- if (enabled_spawned && *enabled_spawned == 1) {
413
- config_spawned_targets.update(&child, &one);
726
+ if (is_target) {
727
+ u64 pid_tgid = bpf_get_current_pid_tgid();
728
+ struct event_t ev = {};
729
+ ev.pid = pid_tgid >> 32;
730
+ __builtin_memcpy(ev.event_name, "proc_fork", 10);
731
+ __builtin_memcpy(&ev.payload[0], &child, sizeof(child));
732
+ __builtin_memcpy(&ev.payload[4], &child, sizeof(child));
733
+ submit_event(&ev);
414
734
  }
415
735
 
416
736
  return 0;
@@ -429,7 +749,6 @@ module Vivarium
429
749
  u64 pid_tgid = bpf_get_current_pid_tgid();
430
750
  u32 pid = pid_tgid >> 32;
431
751
  u32 tid = (u32)pid_tgid;
432
- bpf_trace_printk("vivarium: invoked pid=%d\\n", pid);
433
752
  if (!target_enabled(pid, tid)) {
434
753
  return 0;
435
754
  }
@@ -443,11 +762,9 @@ module Vivarium
443
762
  if (path_ret < 0) {
444
763
  if (ev.payload[0] == 0) {
445
764
  __builtin_memcpy(ev.payload, "<path_error>", 13);
446
- bpf_trace_printk("vivarium: failed to obtain full path. pid=%d path=%s\\n", pid, ev.payload);
447
765
  }
448
766
  }
449
767
 
450
- bpf_trace_printk("vivarium: pid=%d path=%s\\n", pid, ev.payload);
451
768
  submit_event(&ev);
452
769
 
453
770
  return 0;
@@ -630,6 +947,427 @@ module Vivarium
630
947
 
631
948
  return 0;
632
949
  }
950
+
951
+ TRACEPOINT_PROBE(syscalls, sys_enter_execve)
952
+ {
953
+ u64 pid_tgid = bpf_get_current_pid_tgid();
954
+ u32 pid = pid_tgid >> 32;
955
+ u32 tid = (u32)pid_tgid;
956
+ const char *argv0 = 0;
957
+ const char *argv1 = 0;
958
+ const char *argv2 = 0;
959
+
960
+ if (!target_enabled(pid, tid)) {
961
+ return 0;
962
+ }
963
+
964
+ if (!args->filename) {
965
+ return 0;
966
+ }
967
+
968
+ struct event_t ev = {};
969
+ ev.pid = pid;
970
+ __builtin_memcpy(ev.event_name, "proc_exec", 10);
971
+ bpf_probe_read_user_str(&ev.payload[0], #{PROC_EXEC_SLOT_SIZE}, args->filename);
972
+
973
+ if (args->argv) {
974
+ bpf_probe_read_user(&argv0, sizeof(argv0), &args->argv[0]);
975
+ bpf_probe_read_user(&argv1, sizeof(argv1), &args->argv[1]);
976
+ bpf_probe_read_user(&argv2, sizeof(argv2), &args->argv[2]);
977
+
978
+ if (argv0) {
979
+ bpf_probe_read_user_str(&ev.payload[#{PROC_EXEC_SLOT_SIZE}], #{PROC_EXEC_SLOT_SIZE}, argv0);
980
+ }
981
+ if (argv1) {
982
+ bpf_probe_read_user_str(&ev.payload[#{PROC_EXEC_SLOT_SIZE * 2}], #{PROC_EXEC_SLOT_SIZE}, argv1);
983
+ }
984
+ if (argv2) {
985
+ bpf_probe_read_user_str(&ev.payload[#{PROC_EXEC_SLOT_SIZE * 3}], #{PROC_EXEC_SLOT_SIZE}, argv2);
986
+ }
987
+ }
988
+
989
+ submit_event(&ev);
990
+ return 0;
991
+ }
992
+
993
+ LSM_PROBE(ptrace_access_check, struct task_struct *child, unsigned int mode)
994
+ {
995
+ u64 pid_tgid = bpf_get_current_pid_tgid();
996
+ u32 pid = pid_tgid >> 32;
997
+ u32 tid = (u32)pid_tgid;
998
+
999
+ if (!target_enabled(pid, tid)) {
1000
+ return 0;
1001
+ }
1002
+
1003
+ struct event_t ev = {};
1004
+ u32 mode32 = mode;
1005
+
1006
+ ev.pid = pid;
1007
+ __builtin_memcpy(ev.event_name, "ptrace_check", 13);
1008
+ __builtin_memcpy(&ev.payload[0], &mode32, sizeof(mode32));
1009
+ submit_event(&ev);
1010
+
1011
+ return 0;
1012
+ }
1013
+
1014
+ LSM_PROBE(sb_mount, const char *dev_name, const struct path *path, const char *type, unsigned long flags, void *data)
1015
+ {
1016
+ u64 pid_tgid = bpf_get_current_pid_tgid();
1017
+ u32 pid = pid_tgid >> 32;
1018
+ u32 tid = (u32)pid_tgid;
1019
+
1020
+ if (!target_enabled(pid, tid)) {
1021
+ return 0;
1022
+ }
1023
+
1024
+ struct event_t ev = {};
1025
+ u64 flags64 = flags;
1026
+
1027
+ ev.pid = pid;
1028
+ __builtin_memcpy(ev.event_name, "sb_mount", 9);
1029
+ __builtin_memcpy(&ev.payload[0], &flags64, sizeof(flags64));
1030
+
1031
+ if (dev_name) {
1032
+ bpf_probe_read_kernel_str(&ev.payload[8], 120, dev_name);
1033
+ }
1034
+ if (type) {
1035
+ bpf_probe_read_kernel_str(&ev.payload[128], 120, type);
1036
+ }
1037
+
1038
+ submit_event(&ev);
1039
+
1040
+ return 0;
1041
+ }
1042
+
1043
+ LSM_PROBE(kernel_read_file, struct file *file, int id, int contents)
1044
+ {
1045
+ u64 pid_tgid = bpf_get_current_pid_tgid();
1046
+ u32 pid = pid_tgid >> 32;
1047
+ u32 tid = (u32)pid_tgid;
1048
+
1049
+ if (!target_enabled(pid, tid)) {
1050
+ return 0;
1051
+ }
1052
+
1053
+ struct event_t ev = {};
1054
+ u32 id32 = id;
1055
+ u32 contents32 = contents;
1056
+
1057
+ ev.pid = pid;
1058
+ __builtin_memcpy(ev.event_name, "kernel_read_file", 16);
1059
+ __builtin_memcpy(&ev.payload[0], &id32, sizeof(id32));
1060
+ __builtin_memcpy(&ev.payload[4], &contents32, sizeof(contents32));
1061
+ submit_event(&ev);
1062
+
1063
+ return 0;
1064
+ }
1065
+
1066
+ LSM_PROBE(task_kill, struct task_struct *p, struct kernel_siginfo *info, int sig, const struct cred *cred)
1067
+ {
1068
+ u64 pid_tgid = bpf_get_current_pid_tgid();
1069
+ u32 pid = pid_tgid >> 32;
1070
+ u32 tid = (u32)pid_tgid;
1071
+
1072
+ if (!target_enabled(pid, tid)) {
1073
+ return 0;
1074
+ }
1075
+
1076
+ struct event_t ev = {};
1077
+
1078
+ ev.pid = pid;
1079
+ __builtin_memcpy(ev.event_name, "task_kill", 10);
1080
+ __builtin_memcpy(&ev.payload[0], &sig, sizeof(sig));
1081
+ submit_event(&ev);
1082
+
1083
+ return 0;
1084
+ }
1085
+
1086
+ LSM_PROBE(task_fix_setuid, struct cred *new, const struct cred *old, int flags)
1087
+ {
1088
+ u64 pid_tgid = bpf_get_current_pid_tgid();
1089
+ u32 pid = pid_tgid >> 32;
1090
+ u32 tid = (u32)pid_tgid;
1091
+
1092
+ if (!target_enabled(pid, tid)) {
1093
+ return 0;
1094
+ }
1095
+
1096
+ struct event_t ev = {};
1097
+ u32 flags32 = flags;
1098
+
1099
+ ev.pid = pid;
1100
+ __builtin_memcpy(ev.event_name, "setid_change", 13);
1101
+ __builtin_memcpy(&ev.payload[0], &flags32, sizeof(flags32));
1102
+ submit_event(&ev);
1103
+
1104
+ return 0;
1105
+ }
1106
+
1107
+ LSM_PROBE(capable, const struct cred *cred, struct user_namespace *targ_ns, int cap, unsigned int opts)
1108
+ {
1109
+ u64 pid_tgid = bpf_get_current_pid_tgid();
1110
+ u32 pid = pid_tgid >> 32;
1111
+ u32 tid = (u32)pid_tgid;
1112
+
1113
+ if (!target_enabled(pid, tid)) {
1114
+ return 0;
1115
+ }
1116
+
1117
+ if (!monitored_capability(cap)) {
1118
+ return 0;
1119
+ }
1120
+
1121
+ struct event_t ev = {};
1122
+ u32 cap32 = cap;
1123
+ u32 opts32 = opts;
1124
+
1125
+ ev.pid = pid;
1126
+ __builtin_memcpy(ev.event_name, "capable_check", 14);
1127
+ __builtin_memcpy(&ev.payload[0], &cap32, sizeof(cap32));
1128
+ __builtin_memcpy(&ev.payload[4], &opts32, sizeof(opts32));
1129
+ submit_event(&ev);
1130
+
1131
+ return 0;
1132
+ }
1133
+
1134
+ LSM_PROBE(bprm_creds_from_file, struct linux_binprm *bprm, struct file *file)
1135
+ {
1136
+ u64 pid_tgid = bpf_get_current_pid_tgid();
1137
+ u32 pid = pid_tgid >> 32;
1138
+ u32 tid = (u32)pid_tgid;
1139
+
1140
+ if (!target_enabled(pid, tid)) {
1141
+ return 0;
1142
+ }
1143
+
1144
+ struct event_t ev = {};
1145
+ u8 has_file = 0;
1146
+
1147
+ ev.pid = pid;
1148
+ __builtin_memcpy(ev.event_name, "bprm_creds", 11);
1149
+
1150
+ if (file) {
1151
+ has_file = 1;
1152
+ bpf_d_path(&file->f_path, &ev.payload[1], sizeof(ev.payload) - 1);
1153
+ }
1154
+
1155
+ __builtin_memcpy(&ev.payload[0], &has_file, sizeof(has_file));
1156
+ submit_event(&ev);
1157
+
1158
+ return 0;
1159
+ }
1160
+
1161
+ LSM_PROBE(inode_symlink, struct inode *dir, struct dentry *dentry, const char *oldname)
1162
+ {
1163
+ u64 pid_tgid = bpf_get_current_pid_tgid();
1164
+ u32 pid = pid_tgid >> 32;
1165
+ u32 tid = (u32)pid_tgid;
1166
+
1167
+ if (!target_enabled(pid, tid)) {
1168
+ return 0;
1169
+ }
1170
+
1171
+ struct event_t ev = {};
1172
+ ev.pid = pid;
1173
+ __builtin_memcpy(ev.event_name, "file_symlink", 13);
1174
+
1175
+ if (oldname) {
1176
+ bpf_probe_read_user_str(&ev.payload[0], 128, oldname);
1177
+ }
1178
+
1179
+ if (dentry) {
1180
+ read_dentry_name(dentry, &ev.payload[128], 128);
1181
+ }
1182
+
1183
+ submit_event(&ev);
1184
+ return 0;
1185
+ }
1186
+
1187
+ LSM_PROBE(inode_link, struct dentry *old_dentry, struct inode *dir, struct dentry *new_dentry)
1188
+ {
1189
+ u64 pid_tgid = bpf_get_current_pid_tgid();
1190
+ u32 pid = pid_tgid >> 32;
1191
+ u32 tid = (u32)pid_tgid;
1192
+
1193
+ if (!target_enabled(pid, tid)) {
1194
+ return 0;
1195
+ }
1196
+
1197
+ struct event_t ev = {};
1198
+ ev.pid = pid;
1199
+ __builtin_memcpy(ev.event_name, "file_hardlink", 14);
1200
+
1201
+ if (old_dentry) {
1202
+ read_dentry_name(old_dentry, &ev.payload[0], 128);
1203
+ }
1204
+
1205
+ if (new_dentry) {
1206
+ read_dentry_name(new_dentry, &ev.payload[128], 128);
1207
+ }
1208
+
1209
+ submit_event(&ev);
1210
+ return 0;
1211
+ }
1212
+
1213
+ LSM_PROBE(inode_rename, struct inode *old_dir, struct dentry *old_dentry,
1214
+ struct inode *new_dir, struct dentry *new_dentry, unsigned int flags)
1215
+ {
1216
+ u64 pid_tgid = bpf_get_current_pid_tgid();
1217
+ u32 pid = pid_tgid >> 32;
1218
+ u32 tid = (u32)pid_tgid;
1219
+
1220
+ if (!target_enabled(pid, tid)) {
1221
+ return 0;
1222
+ }
1223
+
1224
+ struct event_t ev = {};
1225
+ ev.pid = pid;
1226
+ __builtin_memcpy(ev.event_name, "file_rename", 12);
1227
+
1228
+ if (old_dentry) {
1229
+ read_dentry_name(old_dentry, &ev.payload[0], 128);
1230
+ }
1231
+
1232
+ if (new_dentry) {
1233
+ read_dentry_name(new_dentry, &ev.payload[128], 128);
1234
+ }
1235
+
1236
+ submit_event(&ev);
1237
+ return 0;
1238
+ }
1239
+
1240
+ LSM_PROBE(path_chmod, struct path *path, umode_t mode)
1241
+ {
1242
+ u64 pid_tgid = bpf_get_current_pid_tgid();
1243
+ u32 pid = pid_tgid >> 32;
1244
+ u32 tid = (u32)pid_tgid;
1245
+
1246
+ if (!target_enabled(pid, tid)) {
1247
+ return 0;
1248
+ }
1249
+
1250
+ if (!path) {
1251
+ return 0;
1252
+ }
1253
+
1254
+ struct event_t ev = {};
1255
+ u16 mode_short = mode & 0xFFFF;
1256
+ ev.pid = pid;
1257
+ __builtin_memcpy(ev.event_name, "file_chmod", 11);
1258
+ __builtin_memcpy(&ev.payload[0], &mode_short, sizeof(mode_short));
1259
+
1260
+ bpf_d_path(path, &ev.payload[2], sizeof(ev.payload) - 2);
1261
+ submit_event(&ev);
1262
+ return 0;
1263
+ }
1264
+
1265
+ TRACEPOINT_PROBE(syscalls, sys_enter_getdents64)
1266
+ {
1267
+ u64 pid_tgid = bpf_get_current_pid_tgid();
1268
+ u32 pid = pid_tgid >> 32;
1269
+ u32 tid = (u32)pid_tgid;
1270
+
1271
+ if (!target_enabled(pid, tid)) {
1272
+ return 0;
1273
+ }
1274
+
1275
+ struct event_t ev = {};
1276
+ u32 fd = args->fd;
1277
+ u32 count = args->count;
1278
+
1279
+ ev.pid = pid;
1280
+ __builtin_memcpy(ev.event_name, "file_getdents", 14);
1281
+ __builtin_memcpy(&ev.payload[0], &fd, sizeof(fd));
1282
+ __builtin_memcpy(&ev.payload[4], &count, sizeof(count));
1283
+
1284
+ submit_event(&ev);
1285
+ return 0;
1286
+ }
1287
+
1288
+ int on_span_start(struct pt_regs *ctx)
1289
+ {
1290
+ u64 pid_tgid = bpf_get_current_pid_tgid();
1291
+ u32 pid = pid_tgid >> 32;
1292
+ u32 tid = (u32)pid_tgid;
1293
+
1294
+ if (!target_enabled(pid, tid)) {
1295
+ return 0;
1296
+ }
1297
+
1298
+ u64 method_id = 0;
1299
+ u64 file_id = 0;
1300
+ u64 lineno = 0;
1301
+ bpf_usdt_readarg(1, ctx, &method_id);
1302
+ bpf_usdt_readarg(2, ctx, &file_id);
1303
+ bpf_usdt_readarg(3, ctx, &lineno);
1304
+
1305
+ struct event_t ev = {};
1306
+ ev.pid = pid;
1307
+ __builtin_memcpy(ev.event_name, "span_start", 11);
1308
+ __builtin_memcpy(&ev.payload[0], &method_id, sizeof(method_id));
1309
+ __builtin_memcpy(&ev.payload[8], &file_id, sizeof(file_id));
1310
+ __builtin_memcpy(&ev.payload[16], &lineno, sizeof(lineno));
1311
+ submit_event(&ev);
1312
+ return 0;
1313
+ }
1314
+
1315
+ int on_span_stop(struct pt_regs *ctx)
1316
+ {
1317
+ u64 pid_tgid = bpf_get_current_pid_tgid();
1318
+ u32 pid = pid_tgid >> 32;
1319
+ u32 tid = (u32)pid_tgid;
1320
+
1321
+ if (!target_enabled(pid, tid)) {
1322
+ return 0;
1323
+ }
1324
+
1325
+ u64 method_id = 0;
1326
+ u64 file_id = 0;
1327
+ u64 lineno = 0;
1328
+ bpf_usdt_readarg(1, ctx, &method_id);
1329
+ bpf_usdt_readarg(2, ctx, &file_id);
1330
+ bpf_usdt_readarg(3, ctx, &lineno);
1331
+
1332
+ struct event_t ev = {};
1333
+ ev.pid = pid;
1334
+ __builtin_memcpy(ev.event_name, "span_stop", 10);
1335
+ __builtin_memcpy(&ev.payload[0], &method_id, sizeof(method_id));
1336
+ __builtin_memcpy(&ev.payload[8], &file_id, sizeof(file_id));
1337
+ __builtin_memcpy(&ev.payload[16], &lineno, sizeof(lineno));
1338
+ submit_event(&ev);
1339
+ return 0;
1340
+ }
1341
+
1342
+ int on_span_raise(struct pt_regs *ctx)
1343
+ {
1344
+ u64 pid_tgid = bpf_get_current_pid_tgid();
1345
+ u32 pid = pid_tgid >> 32;
1346
+ u32 tid = (u32)pid_tgid;
1347
+
1348
+ if (!target_enabled(pid, tid)) {
1349
+ return 0;
1350
+ }
1351
+
1352
+ u64 error_id = 0;
1353
+ u64 message_id = 0;
1354
+ u64 file_id = 0;
1355
+ u64 lineno = 0;
1356
+ bpf_usdt_readarg(1, ctx, &error_id);
1357
+ bpf_usdt_readarg(2, ctx, &message_id);
1358
+ bpf_usdt_readarg(3, ctx, &file_id);
1359
+ bpf_usdt_readarg(4, ctx, &lineno);
1360
+
1361
+ struct event_t ev = {};
1362
+ ev.pid = pid;
1363
+ __builtin_memcpy(ev.event_name, "span_raise", 11);
1364
+ __builtin_memcpy(&ev.payload[0], &error_id, sizeof(error_id));
1365
+ __builtin_memcpy(&ev.payload[8], &message_id, sizeof(message_id));
1366
+ __builtin_memcpy(&ev.payload[16], &file_id, sizeof(file_id));
1367
+ __builtin_memcpy(&ev.payload[24], &lineno, sizeof(lineno));
1368
+ submit_event(&ev);
1369
+ return 0;
1370
+ }
633
1371
  CLANG
634
1372
 
635
1373
  def initialize(pin_dir: Vivarium.bpf_pin_dir)
@@ -641,40 +1379,39 @@ module Vivarium
641
1379
  FileUtils.mkdir_p(@pin_dir)
642
1380
 
643
1381
  f_path_offset = detect_f_path_offset
644
- program = BPF_PROGRAM_TEMPLATE.gsub("__VIVARIUM_F_PATH_OFFSET__", f_path_offset.to_s)
1382
+ d_name_offset = detect_dentry_d_name_offset
1383
+ program = BPF_PROGRAM_TEMPLATE
1384
+ .gsub("__VIVARIUM_F_PATH_OFFSET__", f_path_offset.to_s)
1385
+ .gsub("__VIVARIUM_DENTRY_D_NAME_OFFSET__", d_name_offset.to_s)
645
1386
 
646
- bpf = RbBCC::BCC.new(text: program)
647
- kprint_thread = start_kprint_logger(bpf)
1387
+ usdt_so_path = ENV.fetch("VIVARIUM_USDT_SO_PATH") { Vivarium.locate_vivarium_usdt_so }
1388
+ usdt = RbBCC::USDT.new(path: usdt_so_path)
1389
+ usdt.enable_probe(probe: "start_probe", fn_name: "on_span_start")
1390
+ usdt.enable_probe(probe: "stop_probe", fn_name: "on_span_stop")
1391
+ usdt.enable_probe(probe: "raise_probe", fn_name: "on_span_raise")
1392
+
1393
+ bpf = RbBCC::BCC.new(text: program, usdt_contexts: [usdt])
648
1394
 
649
1395
  config_root_targets = bpf["config_root_targets"]
650
1396
  config_spawned_targets = bpf["config_spawned_targets"]
651
- event_invoked = bpf["event_invoked"]
652
- event_write_pos = bpf["event_write_pos"]
1397
+ events_ringbuf = bpf["events"]
653
1398
 
654
- clear_event_slots(event_invoked)
655
- event_write_pos[0] = 0
656
1399
  config_spawned_targets.clear
657
1400
 
658
1401
  pin_map(config_root_targets, File.join(@pin_dir, "config_root_targets"))
659
1402
  pin_map(config_spawned_targets, File.join(@pin_dir, "config_spawned_targets"))
660
- pin_map(event_invoked, File.join(@pin_dir, "event_invoked"))
661
- pin_map(event_write_pos, File.join(@pin_dir, "event_write_pos"))
1403
+ pin_map(events_ringbuf, File.join(@pin_dir, "events"))
662
1404
 
663
1405
  puts "[vivariumd] started"
664
1406
  puts "[vivariumd] pinned maps in #{@pin_dir}"
665
1407
  puts "[vivariumd] watching LSM file_open (f_path offset=#{f_path_offset})"
666
- puts "[vivariumd] kprint logger enabled"
1408
+ puts "[vivariumd] USDT attached via #{usdt_so_path}"
667
1409
 
668
1410
  loop do
669
1411
  sleep 1
670
1412
  end
671
1413
  rescue Interrupt
672
1414
  puts "\n[vivariumd] stopping"
673
- ensure
674
- if kprint_thread
675
- kprint_thread.kill
676
- kprint_thread.join(0.2)
677
- end
678
1415
  end
679
1416
 
680
1417
  private
@@ -690,34 +1427,6 @@ module Vivarium
690
1427
  RbBCC::BCC.pin!(table.map_fd, path)
691
1428
  end
692
1429
 
693
- def clear_event_slots(table)
694
- ptr = Fiddle::Pointer.malloc(EVENT_STRUCT_SIZE)
695
- ptr[0, EVENT_STRUCT_SIZE] = "\x00" * EVENT_STRUCT_SIZE
696
- EVENT_CAPACITY.times do |idx|
697
- table[idx] = ptr
698
- end
699
- end
700
-
701
- def start_kprint_logger(bpf)
702
- Thread.new do
703
- begin
704
- bpf.trace_fields do |_task, pid, _cpu, _flags, ts, msg|
705
- line = msg.to_s.strip
706
- next unless line.start_with?("vivarium:")
707
-
708
- puts "[vivariumd:kprint #{ts} pid=#{pid}] #{line}"
709
- end
710
- rescue IOError, Errno::EINTR
711
- nil
712
- rescue StandardError => e
713
- warn "[vivariumd] kprint stream stopped: #{e.class}: #{e.message}"
714
- end
715
- end
716
- rescue StandardError => e
717
- warn "[vivariumd] failed to start kprint logger: #{e.class}: #{e.message}"
718
- nil
719
- end
720
-
721
1430
  def detect_f_path_offset
722
1431
  env_offset = ENV["VIVARIUM_FILE_F_PATH_OFFSET"]
723
1432
  return Integer(env_offset, 10) if env_offset
@@ -777,78 +1486,194 @@ module Vivarium
777
1486
  rescue Errno::ENOENT
778
1487
  raise Error, "bpftool is required to resolve struct file::f_path offset"
779
1488
  end
1489
+
1490
+ def detect_dentry_d_name_offset
1491
+ env_offset = ENV["VIVARIUM_DENTRY_D_NAME_OFFSET"]
1492
+ return Integer(env_offset, 10) if env_offset
1493
+
1494
+ raw = IO.popen(
1495
+ %w[bpftool btf dump file /sys/kernel/btf/vmlinux format raw],
1496
+ err: IO::NULL,
1497
+ &:read
1498
+ )
1499
+
1500
+ in_dentry_struct = false
1501
+ d_name_bits_offset = nil
1502
+
1503
+ raw.each_line do |line|
1504
+ if line =~ /^\[\d+\] STRUCT 'dentry' /
1505
+ in_dentry_struct = true
1506
+ next
1507
+ end
1508
+
1509
+ if in_dentry_struct && line.start_with?("[")
1510
+ break
1511
+ end
1512
+
1513
+ next unless in_dentry_struct
1514
+
1515
+ if (match = line.match(/'d_name'.*bits_offset=(\d+)/))
1516
+ d_name_bits_offset = Integer(match[1], 10)
1517
+ break
1518
+ end
1519
+ end
1520
+
1521
+ if d_name_bits_offset
1522
+ if (d_name_bits_offset % 8).positive?
1523
+ raise Error, "unsupported d_name bits offset=#{d_name_bits_offset}"
1524
+ end
1525
+
1526
+ if d_name_bits_offset >= 1024
1527
+ warn "[vivariumd] suspicious d_name offset=#{d_name_bits_offset / 8}, fallback to offset=32"
1528
+ return 32
1529
+ end
1530
+
1531
+ return d_name_bits_offset / 8
1532
+ end
1533
+
1534
+ warn "[vivariumd] could not find struct dentry::d_name in BTF, fallback to offset=32"
1535
+ 32
1536
+ rescue Errno::ENOENT
1537
+ raise Error, "bpftool is required to resolve struct dentry::d_name offset"
1538
+ end
780
1539
  end
781
1540
 
782
1541
  class ObservationSession
783
- def initialize(store:, pid:, tracer:)
1542
+ def initialize(store:, pid:, tracer:, correlator:)
784
1543
  @store = store
785
1544
  @pid = pid
786
1545
  @tracer = tracer
1546
+ @correlator = correlator
787
1547
  @stopped = false
788
1548
  end
789
1549
 
790
1550
  def stop
791
1551
  return if @stopped
792
1552
 
1553
+ @stopped = true
793
1554
  @tracer.disable
794
1555
  @store.unregister_pid(@pid)
795
- @stopped = true
1556
+ @correlator.stop
796
1557
  end
797
1558
  end
798
1559
 
799
- def self.observe(pin_dir: bpf_pin_dir, logger: nil, dest: $stdout, format: :human)
800
- return scoped_observe(pin_dir: pin_dir, logger: logger, dest: dest, format: format) { yield } if block_given?
1560
+ def self.observe(pin_dir: bpf_pin_dir, dest: $stdout, &block)
1561
+ return scoped_observe(pin_dir: pin_dir, dest: dest, &block) if block_given?
801
1562
 
802
- top_observe(pin_dir: pin_dir, logger: logger, dest: dest, format: format)
1563
+ top_observe(pin_dir: pin_dir, dest: dest)
803
1564
  end
804
1565
 
805
- def self.top_observe(pin_dir: bpf_pin_dir, logger: nil, dest: $stdout, format: :human)
806
- logger ||= Logger.new(dest: dest, format: format)
1566
+ def self.top_observe(pin_dir: bpf_pin_dir, dest: $stdout)
1567
+ require "vivarium_usdt"
1568
+
807
1569
  store = MapStore.new(pin_dir: pin_dir)
808
1570
  pid = Process.pid
809
1571
  store.register_pid(pid)
810
- logger.info("top-level observing with pid=#{pid}")
811
1572
 
812
- tracer = build_observe_tracepoint(store, logger)
1573
+ method_id_queue = Thread::Queue.new
1574
+ main_tid = gettid
1575
+
1576
+ correlator = Correlator.new(
1577
+ pin_dir: pin_dir,
1578
+ observer_pid: pid,
1579
+ main_tid: main_tid,
1580
+ method_id_queue: method_id_queue,
1581
+ dest: dest
1582
+ )
1583
+ correlator.start
1584
+
1585
+ tracer = build_observe_tracepoint(method_id_queue)
813
1586
  tracer.enable
814
1587
 
815
- session = ObservationSession.new(store: store, pid: pid, tracer: tracer)
1588
+ session = ObservationSession.new(
1589
+ store: store, pid: pid, tracer: tracer, correlator: correlator
1590
+ )
816
1591
  at_exit { session.stop }
817
1592
  session
818
1593
  end
819
1594
 
820
- def self.scoped_observe(pin_dir:, logger:, dest:, format:)
821
- logger ||= Logger.new(dest: dest, format: format)
1595
+ def self.scoped_observe(pin_dir:, dest:)
1596
+ require "vivarium_usdt"
1597
+
822
1598
  store = MapStore.new(pin_dir: pin_dir)
823
1599
  pid = Process.pid
824
1600
  store.register_pid(pid)
825
- logger.info("scoped observing with pid=#{pid}")
826
1601
 
827
- tracer = build_observe_tracepoint(store, logger)
1602
+ method_id_queue = Thread::Queue.new
1603
+ main_tid = gettid
1604
+
1605
+ correlator = Correlator.new(
1606
+ pin_dir: pin_dir,
1607
+ observer_pid: pid,
1608
+ main_tid: main_tid,
1609
+ method_id_queue: method_id_queue,
1610
+ dest: dest
1611
+ )
1612
+ correlator.start
1613
+
1614
+ tracer = build_observe_tracepoint(method_id_queue)
828
1615
  tracer.enable
829
1616
 
830
1617
  yield
831
1618
  ensure
832
1619
  tracer&.disable
833
1620
  store&.unregister_pid(pid)
1621
+ correlator&.stop
834
1622
  end
835
1623
 
836
- def self.build_observe_tracepoint(store, logger)
837
- TracePoint.new(:return, :c_return) do |tp|
838
- events = store.drain_events
839
- next if events.empty?
1624
+ def self.build_observe_tracepoint(method_id_queue)
1625
+ allow_classes = SPAN_ALLOWCLASSES
1626
+ allowlist = SPAN_ALLOWLIST
1627
+ TracePoint.new(:call, :c_call, :return, :c_return, :raise) do |tp|
1628
+ if tp.event == :raise
1629
+ Vivarium::Usdt.raise(
1630
+ tp.raised_exception.class.to_s,
1631
+ tp.raised_exception.message.to_s,
1632
+ file: tp.path,
1633
+ lineno: tp.lineno
1634
+ )
1635
+ next
1636
+ end
840
1637
 
841
- stack = caller_locations(2, 16)
842
- stack = stack.reject { |loc| loc.path.to_s.include?("vivarium") } if filter_internal_frames?
843
- logger.log(events, tp, stack)
1638
+ signature = "#{tp.defined_class}##{tp.method_id}"
1639
+ is_target = allowlist.include?(signature) || \
1640
+ allow_classes.any? { |klass| tp.defined_class == klass } || \
1641
+ allow_classes.any? { |klass| tp.defined_class == klass.singleton_class }
1642
+ next unless is_target
1643
+
1644
+ case tp.event
1645
+ when :call, :c_call
1646
+ method_id = Vivarium::Usdt.start(tp.defined_class.to_s, tp.method_id.to_s, file: tp.path, lineno: tp.lineno)
1647
+ method_id_queue << [method_id, signature]
1648
+ when :return, :c_return
1649
+ Vivarium::Usdt.stop(tp.defined_class.to_s, tp.method_id.to_s, file: tp.path, lineno: tp.lineno)
1650
+ end
844
1651
  end
845
1652
  end
846
1653
 
847
- def self.filter_internal_frames?
848
- value = ENV["VIVARIUM_FILTER_INTERNAL_FRAMES"]
849
- return true if value.nil?
1654
+ def self.gettid
1655
+ @gettid_func ||= begin
1656
+ libc = Fiddle.dlopen("libc.so.6")
1657
+ Fiddle::Function.new(libc["gettid"], [], Fiddle::TYPE_INT)
1658
+ rescue Fiddle::DLError
1659
+ libc = Fiddle.dlopen(nil)
1660
+ Fiddle::Function.new(libc["gettid"], [], Fiddle::TYPE_INT)
1661
+ end
1662
+ @gettid_func.call
1663
+ end
850
1664
 
851
- !%w[0 false off no].include?(value.strip.downcase)
1665
+ def self.monotonic_ktime_ns
1666
+ Process.clock_gettime(Process::CLOCK_MONOTONIC, :nanosecond)
1667
+ end
1668
+
1669
+ def self.locate_vivarium_usdt_so
1670
+ require "vivarium_usdt/vivarium_usdt"
1671
+ so = $LOADED_FEATURES.find { |p| p =~ %r{vivarium_usdt/vivarium_usdt\.(so|bundle|dylib)\z} }
1672
+ raise Error, "vivarium_usdt native extension not found in $LOADED_FEATURES" unless so
1673
+
1674
+ File.realpath(so)
1675
+ rescue LoadError => e
1676
+ raise Error, "failed to load vivarium_usdt: #{e.message}"
852
1677
  end
853
1678
 
854
1679
  def self.run_daemon!(argv = ARGV)
@@ -861,3 +1686,6 @@ module Vivarium
861
1686
  Daemon.new(pin_dir: options[:pin_dir]).run
862
1687
  end
863
1688
  end
1689
+
1690
+ require_relative "vivarium/correlator"
1691
+ require_relative "vivarium/tree_renderer"