agentguard-local 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,613 @@
1
+ use crate::pressure::{MemoryReading, PressureHysteresis, classify_pressure_with_previous};
2
+ use crate::process::{Signal, pid_alive, sample_process_group, send_signal_to_group};
3
+ use crate::protocol::{DoctorReport, PROTOCOL_VERSION, Request, Response};
4
+ use crate::scheduler::{AdmissionDecision, decide_admission, ordered_queue, pause_candidates};
5
+ use crate::store::{Store, event_payload};
6
+ use crate::types::{JobRecord, JobState, Policy, PressureState, Priority, StatusSnapshot};
7
+ use serde_json::json;
8
+ use std::fs;
9
+ use std::io::{BufRead, BufReader, Write};
10
+ use std::os::unix::fs::PermissionsExt;
11
+ use std::os::unix::net::{UnixListener, UnixStream};
12
+ use std::path::{Path, PathBuf};
13
+ use std::process::Command;
14
+ use uuid::Uuid;
15
+
16
+ #[derive(Debug, Clone)]
17
+ pub struct DaemonConfig {
18
+ pub socket_path: PathBuf,
19
+ pub db_path: PathBuf,
20
+ pub foreground: bool,
21
+ }
22
+
23
+ pub fn run_daemon(config: DaemonConfig) -> anyhow::Result<()> {
24
+ if config.foreground && std::env::var_os("AGENTGUARD_TEST_MODE").is_none() {
25
+ eprintln!(
26
+ "agentguardd: foreground mode, socket={}, db={}",
27
+ config.socket_path.display(),
28
+ config.db_path.display()
29
+ );
30
+ }
31
+ if let Some(parent) = config.socket_path.parent() {
32
+ fs::create_dir_all(parent)?;
33
+ fs::set_permissions(parent, fs::Permissions::from_mode(0o700))?;
34
+ }
35
+ if config.socket_path.exists() {
36
+ fs::remove_file(&config.socket_path)?;
37
+ }
38
+ let listener = UnixListener::bind(&config.socket_path)?;
39
+ fs::set_permissions(&config.socket_path, fs::Permissions::from_mode(0o600))?;
40
+
41
+ let mut daemon = Daemon::new(config)?;
42
+ daemon.reconcile_jobs()?;
43
+ for stream in listener.incoming() {
44
+ match stream {
45
+ Ok(stream) => {
46
+ if let Err(err) = daemon.handle_stream(stream) {
47
+ eprintln!("agentguardd: request failed: {err:#}");
48
+ }
49
+ }
50
+ Err(err) => eprintln!("agentguardd: socket error: {err}"),
51
+ }
52
+ }
53
+ Ok(())
54
+ }
55
+
56
+ struct Daemon {
57
+ config: DaemonConfig,
58
+ store: Store,
59
+ forced_pressure: Option<PressureState>,
60
+ pressure_hysteresis: PressureHysteresis,
61
+ }
62
+
63
+ impl Daemon {
64
+ fn new(config: DaemonConfig) -> anyhow::Result<Self> {
65
+ let store = Store::open(&config.db_path)?;
66
+ let policy = store.load_policy()?;
67
+ Ok(Self {
68
+ config,
69
+ store,
70
+ forced_pressure: None,
71
+ pressure_hysteresis: PressureHysteresis::new(policy.recovery_window_seconds as u32),
72
+ })
73
+ }
74
+
75
+ fn handle_stream(&mut self, stream: UnixStream) -> anyhow::Result<()> {
76
+ let mut reader = BufReader::new(stream.try_clone()?);
77
+ let mut line = String::new();
78
+ reader.read_line(&mut line)?;
79
+ let response = match serde_json::from_str::<Request>(&line) {
80
+ Ok(request) => self.handle_request(request),
81
+ Err(err) => Ok(Response::Error {
82
+ protocol_version: PROTOCOL_VERSION,
83
+ message: format!("invalid request: {err}"),
84
+ }),
85
+ }?;
86
+ let mut stream = stream;
87
+ serde_json::to_writer(&mut stream, &response)?;
88
+ stream.write_all(b"\n")?;
89
+ stream.flush()?;
90
+ Ok(())
91
+ }
92
+
93
+ fn handle_request(&mut self, request: Request) -> anyhow::Result<Response> {
94
+ if request.version() != PROTOCOL_VERSION {
95
+ return Ok(Response::Error {
96
+ protocol_version: PROTOCOL_VERSION,
97
+ message: format!("unsupported protocol version {}", request.version()),
98
+ });
99
+ }
100
+
101
+ if let Request::TestSetPressure { pressure, .. } = request {
102
+ if std::env::var_os("AGENTGUARD_TEST_MODE").is_none() {
103
+ return Ok(Response::Error {
104
+ protocol_version: PROTOCOL_VERSION,
105
+ message: "test pressure override requires AGENTGUARD_TEST_MODE=1".to_string(),
106
+ });
107
+ }
108
+ self.forced_pressure = Some(pressure);
109
+ self.store.add_event(
110
+ None,
111
+ "test_pressure_override",
112
+ json!({ "pressure": pressure }),
113
+ )?;
114
+ return Ok(Response::Ack {
115
+ protocol_version: PROTOCOL_VERSION,
116
+ message: format!("pressure set to {pressure}"),
117
+ });
118
+ }
119
+
120
+ let pressure = self.tick()?;
121
+ match request {
122
+ Request::Admission {
123
+ adapter,
124
+ command,
125
+ args,
126
+ cwd,
127
+ priority,
128
+ tty: _,
129
+ ..
130
+ } => self.admission(adapter, command, args, cwd, priority, pressure),
131
+ Request::LeaseCheck { job_id, .. } => self.lease_check(&job_id, pressure),
132
+ Request::Register {
133
+ job_id,
134
+ root_pid,
135
+ pgid,
136
+ ..
137
+ } => {
138
+ self.store.register_job(&job_id, root_pid, pgid)?;
139
+ self.store.add_event(
140
+ Some(&job_id),
141
+ "job_registered",
142
+ json!({ "root_pid": root_pid, "pgid": pgid }),
143
+ )?;
144
+ Ok(Response::Ack {
145
+ protocol_version: PROTOCOL_VERSION,
146
+ message: "registered".to_string(),
147
+ })
148
+ }
149
+ Request::Exit {
150
+ job_id, exit_code, ..
151
+ } => {
152
+ self.store.complete_job(&job_id, exit_code)?;
153
+ self.store.add_event(
154
+ Some(&job_id),
155
+ if exit_code == 0 {
156
+ "job_exited"
157
+ } else {
158
+ "job_failed"
159
+ },
160
+ json!({ "exit_code": exit_code }),
161
+ )?;
162
+ Ok(Response::Ack {
163
+ protocol_version: PROTOCOL_VERSION,
164
+ message: "exit recorded".to_string(),
165
+ })
166
+ }
167
+ Request::Status {
168
+ include_processes, ..
169
+ } => Ok(Response::Status {
170
+ protocol_version: PROTOCOL_VERSION,
171
+ snapshot: self.status_snapshot(pressure, include_processes)?,
172
+ }),
173
+ Request::PolicyGet { .. } => Ok(Response::Policy {
174
+ protocol_version: PROTOCOL_VERSION,
175
+ policy: self.store.load_policy()?,
176
+ }),
177
+ Request::PolicySet { key, value, .. } => {
178
+ let mut policy = self.store.load_policy()?;
179
+ policy.set_key(&key, &value)?;
180
+ self.store.save_policy(&policy)?;
181
+ self.store.add_event(
182
+ None,
183
+ "policy_updated",
184
+ json!({ "key": key, "value": value }),
185
+ )?;
186
+ Ok(Response::Policy {
187
+ protocol_version: PROTOCOL_VERSION,
188
+ policy,
189
+ })
190
+ }
191
+ Request::Pause { job_id, .. } => self.pause_job(&job_id, "user requested pause"),
192
+ Request::Resume { job_id, .. } => self.resume_job(&job_id, "user requested resume"),
193
+ Request::Cancel { job_id, .. } => self.cancel_job(&job_id),
194
+ Request::ToolPathSet {
195
+ tool, real_path, ..
196
+ } => {
197
+ self.store.set_tool_path(&tool, &real_path)?;
198
+ self.store.add_event(
199
+ None,
200
+ "tool_path_updated",
201
+ json!({ "tool": tool, "real_path": real_path }),
202
+ )?;
203
+ Ok(Response::Ack {
204
+ protocol_version: PROTOCOL_VERSION,
205
+ message: "tool path recorded".to_string(),
206
+ })
207
+ }
208
+ Request::Doctor { .. } => Ok(Response::Doctor {
209
+ protocol_version: PROTOCOL_VERSION,
210
+ report: self.doctor_report()?,
211
+ }),
212
+ Request::TestSetPressure { .. } => unreachable!("handled before tick"),
213
+ }
214
+ }
215
+
216
+ fn admission(
217
+ &mut self,
218
+ adapter: String,
219
+ command: String,
220
+ args: Vec<String>,
221
+ cwd: String,
222
+ priority: Priority,
223
+ pressure: PressureState,
224
+ ) -> anyhow::Result<Response> {
225
+ let policy = self.store.load_policy()?;
226
+ let id = Uuid::new_v4().to_string();
227
+ let mut job =
228
+ JobRecord::new_admission(id.clone(), adapter, command, args, cwd, priority, &policy)?;
229
+ let decision = decide_admission(&policy, pressure, self.store.active_count()?);
230
+ match decision {
231
+ AdmissionDecision::Admit => {
232
+ job.status = JobState::Starting;
233
+ job.started_at = Some(chrono::Utc::now());
234
+ self.store.insert_job(&job)?;
235
+ self.store.add_event(
236
+ Some(&id),
237
+ "admission_granted",
238
+ json!({ "pressure": pressure, "priority": priority }),
239
+ )?;
240
+ Ok(Response::Admission {
241
+ protocol_version: PROTOCOL_VERSION,
242
+ job_id: id,
243
+ admitted: true,
244
+ state: JobState::Starting.to_string(),
245
+ reason: None,
246
+ codex_max_threads: policy.default_codex_max_threads,
247
+ })
248
+ }
249
+ AdmissionDecision::Queue { reason } => {
250
+ job.status = JobState::Queued;
251
+ job.queued_reason = Some(reason.clone());
252
+ self.store.insert_job(&job)?;
253
+ self.store.add_event(
254
+ Some(&id),
255
+ "admission_queued",
256
+ json!({ "pressure": pressure, "reason": reason }),
257
+ )?;
258
+ Ok(Response::Admission {
259
+ protocol_version: PROTOCOL_VERSION,
260
+ job_id: id,
261
+ admitted: false,
262
+ state: JobState::Queued.to_string(),
263
+ reason: Some(reason),
264
+ codex_max_threads: policy.default_codex_max_threads,
265
+ })
266
+ }
267
+ }
268
+ }
269
+
270
+ fn lease_check(&mut self, job_id: &str, pressure: PressureState) -> anyhow::Result<Response> {
271
+ let Some(job) = self.store.get_job(job_id)? else {
272
+ return Ok(Response::Error {
273
+ protocol_version: PROTOCOL_VERSION,
274
+ message: format!("unknown job {job_id}"),
275
+ });
276
+ };
277
+ if job.status != JobState::Queued {
278
+ return Ok(Response::Lease {
279
+ protocol_version: PROTOCOL_VERSION,
280
+ job_id: job_id.to_string(),
281
+ admitted: matches!(job.status, JobState::Starting | JobState::Running),
282
+ state: job.status.to_string(),
283
+ reason: job.queued_reason,
284
+ });
285
+ }
286
+
287
+ let policy = self.store.load_policy()?;
288
+ let queued = ordered_queue(
289
+ self.store.list_jobs_by_state(&[JobState::Queued])?,
290
+ chrono::Utc::now(),
291
+ );
292
+ let is_next = queued.first().is_some_and(|next| next.id == job_id);
293
+ let active_count = self.store.active_count()?;
294
+ let decision = if is_next {
295
+ decide_admission(&policy, pressure, active_count)
296
+ } else {
297
+ AdmissionDecision::Queue {
298
+ reason: "waiting behind higher-priority queued job".to_string(),
299
+ }
300
+ };
301
+ match decision {
302
+ AdmissionDecision::Admit => {
303
+ self.store
304
+ .update_job_state(job_id, JobState::Starting, None)?;
305
+ self.store.add_event(
306
+ Some(job_id),
307
+ "queued_job_admitted",
308
+ json!({ "pressure": pressure }),
309
+ )?;
310
+ Ok(Response::Lease {
311
+ protocol_version: PROTOCOL_VERSION,
312
+ job_id: job_id.to_string(),
313
+ admitted: true,
314
+ state: JobState::Starting.to_string(),
315
+ reason: None,
316
+ })
317
+ }
318
+ AdmissionDecision::Queue { reason } => {
319
+ self.store
320
+ .update_job_state(job_id, JobState::Queued, Some(&reason))?;
321
+ Ok(Response::Lease {
322
+ protocol_version: PROTOCOL_VERSION,
323
+ job_id: job_id.to_string(),
324
+ admitted: false,
325
+ state: JobState::Queued.to_string(),
326
+ reason: Some(reason),
327
+ })
328
+ }
329
+ }
330
+ }
331
+
332
+ fn tick(&mut self) -> anyhow::Result<PressureState> {
333
+ let policy = self.store.load_policy()?;
334
+ self.pressure_hysteresis
335
+ .set_required_recovery_ticks(policy.recovery_window_seconds as u32);
336
+ let pressure = self.current_pressure(&policy);
337
+ if pressure == PressureState::Critical && policy.pause_enabled {
338
+ let jobs = self.store.list_jobs()?;
339
+ if let Some(job) = pause_candidates(&jobs).first()
340
+ && let Err(err) = self.pause_job(&job.id, "critical memory pressure")
341
+ {
342
+ self.store.add_event(
343
+ Some(&job.id),
344
+ "pause_failed",
345
+ json!({ "error": err.to_string() }),
346
+ )?;
347
+ }
348
+ } else if pressure == PressureState::Normal {
349
+ let paused = self.store.list_jobs_by_state(&[JobState::PausedByGuard])?;
350
+ if let Some(job) = paused.first()
351
+ && let Err(err) = self.resume_job(&job.id, "memory pressure recovered")
352
+ {
353
+ self.store.add_event(
354
+ Some(&job.id),
355
+ "resume_failed",
356
+ json!({ "error": err.to_string() }),
357
+ )?;
358
+ }
359
+ }
360
+ Ok(pressure)
361
+ }
362
+
363
+ fn current_pressure(&mut self, policy: &Policy) -> PressureState {
364
+ if let Some(pressure) = self.forced_pressure {
365
+ return self.pressure_hysteresis.observe(pressure);
366
+ }
367
+ if let Ok(path) = std::env::var("AGENTGUARD_FAKE_PRESSURE_FILE")
368
+ && let Ok(contents) = fs::read_to_string(path)
369
+ && let Ok(pressure) = contents.parse()
370
+ {
371
+ return self.pressure_hysteresis.observe(pressure);
372
+ }
373
+ if let Ok(value) = std::env::var("AGENTGUARD_FAKE_PRESSURE")
374
+ && let Ok(pressure) = value.parse()
375
+ {
376
+ return self.pressure_hysteresis.observe(pressure);
377
+ }
378
+ let reading = sample_system_memory_reading().unwrap_or_else(|| MemoryReading::quiet(0));
379
+ let classified =
380
+ classify_pressure_with_previous(policy, reading, self.pressure_hysteresis.state());
381
+ self.pressure_hysteresis.observe(classified)
382
+ }
383
+
384
+ fn pause_job(&mut self, job_id: &str, reason: &str) -> anyhow::Result<Response> {
385
+ let Some(job) = self.store.get_job(job_id)? else {
386
+ anyhow::bail!("unknown job {job_id}");
387
+ };
388
+ if let Some(pgid) = job.pgid {
389
+ send_signal_to_group(pgid, Signal::Stop)?;
390
+ }
391
+ self.store
392
+ .update_job_state(job_id, JobState::PausedByGuard, Some(reason))?;
393
+ self.store.add_event(
394
+ Some(job_id),
395
+ "job_paused",
396
+ json!({ "reason": reason, "pgid": job.pgid }),
397
+ )?;
398
+ Ok(Response::Ack {
399
+ protocol_version: PROTOCOL_VERSION,
400
+ message: "paused".to_string(),
401
+ })
402
+ }
403
+
404
+ fn resume_job(&mut self, job_id: &str, reason: &str) -> anyhow::Result<Response> {
405
+ let Some(job) = self.store.get_job(job_id)? else {
406
+ anyhow::bail!("unknown job {job_id}");
407
+ };
408
+ if let Some(pgid) = job.pgid {
409
+ send_signal_to_group(pgid, Signal::Continue)?;
410
+ }
411
+ self.store
412
+ .update_job_state(job_id, JobState::Running, None)?;
413
+ self.store.add_event(
414
+ Some(job_id),
415
+ "job_resumed",
416
+ json!({ "reason": reason, "pgid": job.pgid }),
417
+ )?;
418
+ Ok(Response::Ack {
419
+ protocol_version: PROTOCOL_VERSION,
420
+ message: "resumed".to_string(),
421
+ })
422
+ }
423
+
424
+ fn cancel_job(&mut self, job_id: &str) -> anyhow::Result<Response> {
425
+ let Some(job) = self.store.get_job(job_id)? else {
426
+ anyhow::bail!("unknown job {job_id}");
427
+ };
428
+ if let Some(pgid) = job.pgid
429
+ && matches!(job.status, JobState::Running | JobState::PausedByGuard)
430
+ {
431
+ send_signal_to_group(pgid, Signal::Terminate)?;
432
+ }
433
+ self.store
434
+ .update_job_state(job_id, JobState::Cancelled, Some("user requested cancel"))?;
435
+ self.store
436
+ .add_event(Some(job_id), "job_cancelled", event_payload("cancelled"))?;
437
+ Ok(Response::Ack {
438
+ protocol_version: PROTOCOL_VERSION,
439
+ message: "cancelled".to_string(),
440
+ })
441
+ }
442
+
443
+ fn status_snapshot(
444
+ &mut self,
445
+ pressure: PressureState,
446
+ include_processes: bool,
447
+ ) -> anyhow::Result<StatusSnapshot> {
448
+ if include_processes {
449
+ for job in self
450
+ .store
451
+ .list_jobs_by_state(&[JobState::Running, JobState::PausedByGuard])?
452
+ {
453
+ if let Some(pgid) = job.pgid {
454
+ let sample = sample_process_group(&job.id, pgid, pressure);
455
+ self.store.record_sample(&sample)?;
456
+ }
457
+ }
458
+ }
459
+ Ok(StatusSnapshot {
460
+ pressure,
461
+ policy: self.store.load_policy()?,
462
+ jobs: self.store.list_jobs()?,
463
+ events: self.store.recent_events(25)?,
464
+ tool_paths: self.store.list_tool_paths()?,
465
+ socket_path: self.config.socket_path.display().to_string(),
466
+ db_path: self.config.db_path.display().to_string(),
467
+ })
468
+ }
469
+
470
+ fn reconcile_jobs(&mut self) -> anyhow::Result<()> {
471
+ for job in self.store.list_jobs()? {
472
+ if job.status.is_terminal() || job.status == JobState::Queued {
473
+ continue;
474
+ }
475
+ let reconciled = job.root_pid.is_some_and(pid_alive);
476
+ if reconciled {
477
+ self.store.add_event(
478
+ Some(&job.id),
479
+ "daemon_restart_reconciled",
480
+ json!({ "root_pid": job.root_pid, "pgid": job.pgid }),
481
+ )?;
482
+ } else {
483
+ self.store.update_job_state(
484
+ &job.id,
485
+ JobState::Lost,
486
+ Some("daemon restarted and process could not be reconciled"),
487
+ )?;
488
+ self.store.add_event(
489
+ Some(&job.id),
490
+ "daemon_restart_lost",
491
+ json!({ "root_pid": job.root_pid, "pgid": job.pgid }),
492
+ )?;
493
+ }
494
+ }
495
+ Ok(())
496
+ }
497
+
498
+ fn doctor_report(&self) -> anyhow::Result<DoctorReport> {
499
+ let plist = home_dir()
500
+ .map(|home| home.join("Library/LaunchAgents/dev.agentguard.agentguardd.plist"))
501
+ .unwrap_or_default();
502
+ Ok(DoctorReport {
503
+ os_supported: std::env::consts::OS == "macos",
504
+ os: format!("{} {}", std::env::consts::OS, std::env::consts::ARCH),
505
+ socket_path: self.config.socket_path.display().to_string(),
506
+ socket_exists: self.config.socket_path.exists(),
507
+ db_path: self.config.db_path.display().to_string(),
508
+ db_exists: self.config.db_path.exists(),
509
+ launchd_plist_exists: plist.exists(),
510
+ discovered_tools: self
511
+ .store
512
+ .list_tool_paths()?
513
+ .into_iter()
514
+ .map(|tool| (tool.tool, tool.real_path))
515
+ .collect(),
516
+ })
517
+ }
518
+ }
519
+
520
+ fn sample_system_memory_reading() -> Option<MemoryReading> {
521
+ let total_bytes = sysctl_u64("hw.memsize")?;
522
+ let Ok(output) = Command::new("/usr/bin/vm_stat").output() else {
523
+ return None;
524
+ };
525
+ if !output.status.success() {
526
+ return None;
527
+ }
528
+ let text = String::from_utf8_lossy(&output.stdout);
529
+ let (page_size, available_pages) = parse_vm_stat(&text)?;
530
+ let available_bytes = available_pages.saturating_mul(page_size);
531
+ let used_percent =
532
+ 100_u64.saturating_sub(available_bytes.saturating_mul(100) / total_bytes) as u8;
533
+ Some(MemoryReading::quiet(used_percent))
534
+ }
535
+
536
+ fn sysctl_u64(name: &str) -> Option<u64> {
537
+ let output = Command::new("/usr/sbin/sysctl")
538
+ .args(["-n", name])
539
+ .output()
540
+ .ok()?;
541
+ if !output.status.success() {
542
+ return None;
543
+ }
544
+ String::from_utf8_lossy(&output.stdout).trim().parse().ok()
545
+ }
546
+
547
+ fn parse_vm_stat(text: &str) -> Option<(u64, u64)> {
548
+ let page_size = text
549
+ .lines()
550
+ .next()?
551
+ .split("page size of ")
552
+ .nth(1)?
553
+ .split_whitespace()
554
+ .next()?
555
+ .parse()
556
+ .ok()?;
557
+ let mut available = 0_u64;
558
+ for line in text.lines() {
559
+ if line.starts_with("Pages free:")
560
+ || line.starts_with("Pages speculative:")
561
+ || line.starts_with("Pages inactive:")
562
+ || line.starts_with("Pages purgeable:")
563
+ {
564
+ available += digits(line)?.parse::<u64>().ok()?;
565
+ }
566
+ }
567
+ Some((page_size, available))
568
+ }
569
+
570
+ fn digits(line: &str) -> Option<String> {
571
+ let digits: String = line.chars().filter(|ch| ch.is_ascii_digit()).collect();
572
+ if digits.is_empty() {
573
+ None
574
+ } else {
575
+ Some(digits)
576
+ }
577
+ }
578
+
579
+ fn home_dir() -> Option<PathBuf> {
580
+ std::env::var_os("HOME").map(PathBuf::from)
581
+ }
582
+
583
+ pub fn default_socket_path() -> PathBuf {
584
+ home_dir()
585
+ .unwrap_or_else(|| Path::new(".").to_path_buf())
586
+ .join(".agentguard/agentguard.sock")
587
+ }
588
+
589
+ pub fn default_db_path() -> PathBuf {
590
+ home_dir()
591
+ .unwrap_or_else(|| Path::new(".").to_path_buf())
592
+ .join(".agentguard/state.db")
593
+ }
594
+
595
+ #[cfg(test)]
596
+ mod tests {
597
+ use super::*;
598
+
599
+ #[test]
600
+ fn vm_stat_parser_counts_reclaimable_pages_as_available() {
601
+ let text = "\
602
+ Mach Virtual Memory Statistics: (page size of 16384 bytes)
603
+ Pages free: 10.
604
+ Pages active: 90.
605
+ Pages inactive: 20.
606
+ Pages speculative: 30.
607
+ Pages throttled: 0.
608
+ Pages wired down: 40.
609
+ Pages purgeable: 5.
610
+ ";
611
+ assert_eq!(parse_vm_stat(text), Some((16_384, 65)));
612
+ }
613
+ }