agentguard-local 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/Cargo.lock +727 -0
- package/Cargo.toml +22 -0
- package/README.md +140 -0
- package/crates/agentguard/Cargo.toml +23 -0
- package/crates/agentguard/src/bin/agentguard.rs +460 -0
- package/crates/agentguard/src/bin/agentguardd.rs +23 -0
- package/crates/agentguard/tests/integration.rs +384 -0
- package/crates/agentguard-core/Cargo.toml +19 -0
- package/crates/agentguard-core/src/adapter.rs +69 -0
- package/crates/agentguard-core/src/lib.rs +14 -0
- package/crates/agentguard-core/src/lifecycle.rs +41 -0
- package/crates/agentguard-core/src/policy.rs +117 -0
- package/crates/agentguard-core/src/pressure.rs +186 -0
- package/crates/agentguard-core/src/process.rs +100 -0
- package/crates/agentguard-core/src/protocol.rs +186 -0
- package/crates/agentguard-core/src/scheduler.rs +132 -0
- package/crates/agentguard-core/src/server.rs +613 -0
- package/crates/agentguard-core/src/store.rs +434 -0
- package/crates/agentguard-core/src/types.rs +248 -0
- package/docs/DECISIONS.md +37 -0
- package/docs/TESTING.md +75 -0
- package/npm/bin/agentguard +9 -0
- package/npm/bin/agentguardd +9 -0
- package/npm/bin/darwin-arm64/agentguard +0 -0
- package/npm/bin/darwin-arm64/agentguardd +0 -0
- package/package.json +44 -0
|
@@ -0,0 +1,186 @@
|
|
|
1
|
+
use crate::types::{Policy, PressureState};
|
|
2
|
+
use serde::{Deserialize, Serialize};
|
|
3
|
+
|
|
4
|
+
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
|
|
5
|
+
pub struct MemoryReading {
|
|
6
|
+
pub used_percent: u8,
|
|
7
|
+
pub dispatch_warning: bool,
|
|
8
|
+
pub dispatch_critical: bool,
|
|
9
|
+
pub pageout_delta: u64,
|
|
10
|
+
pub swap_used_percent: u8,
|
|
11
|
+
}
|
|
12
|
+
|
|
13
|
+
impl MemoryReading {
|
|
14
|
+
pub fn quiet(used_percent: u8) -> Self {
|
|
15
|
+
Self {
|
|
16
|
+
used_percent,
|
|
17
|
+
dispatch_warning: false,
|
|
18
|
+
dispatch_critical: false,
|
|
19
|
+
pageout_delta: 0,
|
|
20
|
+
swap_used_percent: 0,
|
|
21
|
+
}
|
|
22
|
+
}
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
pub fn classify_pressure(policy: &Policy, reading: MemoryReading) -> PressureState {
|
|
26
|
+
classify_pressure_with_previous(policy, reading, PressureState::Normal)
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
pub fn classify_pressure_with_previous(
|
|
30
|
+
policy: &Policy,
|
|
31
|
+
reading: MemoryReading,
|
|
32
|
+
previous: PressureState,
|
|
33
|
+
) -> PressureState {
|
|
34
|
+
if reading.dispatch_critical
|
|
35
|
+
|| reading.used_percent >= policy.critical_memory_pressure_percent
|
|
36
|
+
|| reading.swap_used_percent >= 80
|
|
37
|
+
|| reading.pageout_delta >= 10_000
|
|
38
|
+
{
|
|
39
|
+
return PressureState::Critical;
|
|
40
|
+
}
|
|
41
|
+
let is_soft_pressure = reading.dispatch_warning
|
|
42
|
+
|| reading.used_percent >= policy.soft_memory_pressure_percent
|
|
43
|
+
|| reading.swap_used_percent >= 50
|
|
44
|
+
|| reading.pageout_delta >= 2_500;
|
|
45
|
+
let is_recovering_above_threshold = previous != PressureState::Normal
|
|
46
|
+
&& reading.used_percent > policy.recovery_memory_pressure_percent;
|
|
47
|
+
if is_soft_pressure || is_recovering_above_threshold {
|
|
48
|
+
PressureState::Soft
|
|
49
|
+
} else {
|
|
50
|
+
PressureState::Normal
|
|
51
|
+
}
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
#[derive(Debug, Clone)]
|
|
55
|
+
pub struct PressureHysteresis {
|
|
56
|
+
state: PressureState,
|
|
57
|
+
required_recovery_ticks: u32,
|
|
58
|
+
recovery_ticks: u32,
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
impl PressureHysteresis {
|
|
62
|
+
pub fn new(required_recovery_ticks: u32) -> Self {
|
|
63
|
+
Self {
|
|
64
|
+
state: PressureState::Normal,
|
|
65
|
+
required_recovery_ticks: required_recovery_ticks.max(1),
|
|
66
|
+
recovery_ticks: 0,
|
|
67
|
+
}
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
pub fn state(&self) -> PressureState {
|
|
71
|
+
self.state
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
pub fn set_required_recovery_ticks(&mut self, ticks: u32) {
|
|
75
|
+
self.required_recovery_ticks = ticks.max(1);
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
pub fn observe(&mut self, classified: PressureState) -> PressureState {
|
|
79
|
+
match (self.state, classified) {
|
|
80
|
+
(_, PressureState::Critical) => {
|
|
81
|
+
self.state = PressureState::Critical;
|
|
82
|
+
self.recovery_ticks = 0;
|
|
83
|
+
}
|
|
84
|
+
(PressureState::Critical, PressureState::Soft)
|
|
85
|
+
| (PressureState::Soft, PressureState::Soft) => {
|
|
86
|
+
self.state = PressureState::Soft;
|
|
87
|
+
self.recovery_ticks = 0;
|
|
88
|
+
}
|
|
89
|
+
(PressureState::Normal, PressureState::Soft) => {
|
|
90
|
+
self.state = PressureState::Soft;
|
|
91
|
+
self.recovery_ticks = 0;
|
|
92
|
+
}
|
|
93
|
+
(PressureState::Normal, PressureState::Normal) => {
|
|
94
|
+
self.recovery_ticks = 0;
|
|
95
|
+
}
|
|
96
|
+
(_, PressureState::Normal) => {
|
|
97
|
+
self.recovery_ticks += 1;
|
|
98
|
+
if self.recovery_ticks >= self.required_recovery_ticks {
|
|
99
|
+
self.state = PressureState::Normal;
|
|
100
|
+
self.recovery_ticks = 0;
|
|
101
|
+
}
|
|
102
|
+
}
|
|
103
|
+
}
|
|
104
|
+
self.state
|
|
105
|
+
}
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
#[cfg(test)]
|
|
109
|
+
mod tests {
|
|
110
|
+
use super::*;
|
|
111
|
+
|
|
112
|
+
#[test]
|
|
113
|
+
fn pressure_classifier_uses_thresholds_and_events() {
|
|
114
|
+
let policy = Policy::default();
|
|
115
|
+
assert_eq!(
|
|
116
|
+
classify_pressure(&policy, MemoryReading::quiet(40)),
|
|
117
|
+
PressureState::Normal
|
|
118
|
+
);
|
|
119
|
+
assert_eq!(
|
|
120
|
+
classify_pressure(
|
|
121
|
+
&policy,
|
|
122
|
+
MemoryReading::quiet(policy.soft_memory_pressure_percent)
|
|
123
|
+
),
|
|
124
|
+
PressureState::Soft
|
|
125
|
+
);
|
|
126
|
+
assert_eq!(
|
|
127
|
+
classify_pressure(
|
|
128
|
+
&policy,
|
|
129
|
+
MemoryReading::quiet(policy.critical_memory_pressure_percent)
|
|
130
|
+
),
|
|
131
|
+
PressureState::Critical
|
|
132
|
+
);
|
|
133
|
+
assert_eq!(
|
|
134
|
+
classify_pressure(
|
|
135
|
+
&policy,
|
|
136
|
+
MemoryReading {
|
|
137
|
+
dispatch_warning: true,
|
|
138
|
+
..MemoryReading::quiet(10)
|
|
139
|
+
}
|
|
140
|
+
),
|
|
141
|
+
PressureState::Soft
|
|
142
|
+
);
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
#[test]
|
|
146
|
+
fn pressure_recovery_hysteresis_requires_stable_recovery() {
|
|
147
|
+
let mut tracker = PressureHysteresis::new(3);
|
|
148
|
+
assert_eq!(
|
|
149
|
+
tracker.observe(PressureState::Critical),
|
|
150
|
+
PressureState::Critical
|
|
151
|
+
);
|
|
152
|
+
assert_eq!(
|
|
153
|
+
tracker.observe(PressureState::Normal),
|
|
154
|
+
PressureState::Critical
|
|
155
|
+
);
|
|
156
|
+
assert_eq!(
|
|
157
|
+
tracker.observe(PressureState::Normal),
|
|
158
|
+
PressureState::Critical
|
|
159
|
+
);
|
|
160
|
+
assert_eq!(
|
|
161
|
+
tracker.observe(PressureState::Normal),
|
|
162
|
+
PressureState::Normal
|
|
163
|
+
);
|
|
164
|
+
}
|
|
165
|
+
|
|
166
|
+
#[test]
|
|
167
|
+
fn recovery_threshold_keeps_previous_pressure_until_safe() {
|
|
168
|
+
let policy = Policy::default();
|
|
169
|
+
assert_eq!(
|
|
170
|
+
classify_pressure_with_previous(
|
|
171
|
+
&policy,
|
|
172
|
+
MemoryReading::quiet(policy.recovery_memory_pressure_percent + 1),
|
|
173
|
+
PressureState::Critical,
|
|
174
|
+
),
|
|
175
|
+
PressureState::Soft
|
|
176
|
+
);
|
|
177
|
+
assert_eq!(
|
|
178
|
+
classify_pressure_with_previous(
|
|
179
|
+
&policy,
|
|
180
|
+
MemoryReading::quiet(policy.recovery_memory_pressure_percent),
|
|
181
|
+
PressureState::Soft,
|
|
182
|
+
),
|
|
183
|
+
PressureState::Normal
|
|
184
|
+
);
|
|
185
|
+
}
|
|
186
|
+
}
|
|
@@ -0,0 +1,100 @@
|
|
|
1
|
+
use crate::types::ProcessSample;
|
|
2
|
+
use chrono::Utc;
|
|
3
|
+
use std::os::unix::process::{CommandExt, ExitStatusExt};
|
|
4
|
+
use std::process::{Child, Command, ExitStatus};
|
|
5
|
+
|
|
6
|
+
#[derive(Debug, Clone, Copy)]
|
|
7
|
+
pub enum Signal {
|
|
8
|
+
Stop,
|
|
9
|
+
Continue,
|
|
10
|
+
Terminate,
|
|
11
|
+
}
|
|
12
|
+
|
|
13
|
+
impl Signal {
|
|
14
|
+
fn as_raw(self) -> i32 {
|
|
15
|
+
match self {
|
|
16
|
+
Signal::Stop => libc::SIGSTOP,
|
|
17
|
+
Signal::Continue => libc::SIGCONT,
|
|
18
|
+
Signal::Terminate => libc::SIGTERM,
|
|
19
|
+
}
|
|
20
|
+
}
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
pub fn spawn_in_process_group(command: &str, args: &[String]) -> anyhow::Result<Child> {
|
|
24
|
+
let mut cmd = Command::new(command);
|
|
25
|
+
cmd.args(args);
|
|
26
|
+
// SAFETY: pre_exec runs in the child after fork and before exec. setpgid is
|
|
27
|
+
// async-signal-safe and makes the child the leader of a fresh process group.
|
|
28
|
+
unsafe {
|
|
29
|
+
cmd.pre_exec(|| {
|
|
30
|
+
if libc::setpgid(0, 0) == -1 {
|
|
31
|
+
return Err(std::io::Error::last_os_error());
|
|
32
|
+
}
|
|
33
|
+
Ok(())
|
|
34
|
+
});
|
|
35
|
+
}
|
|
36
|
+
Ok(cmd.spawn()?)
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
pub fn send_signal_to_group(pgid: i64, signal: Signal) -> anyhow::Result<()> {
|
|
40
|
+
let rc = unsafe { libc::killpg(pgid as libc::pid_t, signal.as_raw()) };
|
|
41
|
+
if rc == -1 {
|
|
42
|
+
return Err(std::io::Error::last_os_error().into());
|
|
43
|
+
}
|
|
44
|
+
Ok(())
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
pub fn pid_alive(pid: i64) -> bool {
|
|
48
|
+
let rc = unsafe { libc::kill(pid as libc::pid_t, 0) };
|
|
49
|
+
rc == 0
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
pub fn exit_code(status: ExitStatus) -> i32 {
|
|
53
|
+
if let Some(code) = status.code() {
|
|
54
|
+
code
|
|
55
|
+
} else {
|
|
56
|
+
128 + status.signal().unwrap_or(1)
|
|
57
|
+
}
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
pub fn sample_process_group(
|
|
61
|
+
job_id: &str,
|
|
62
|
+
pgid: i64,
|
|
63
|
+
pressure_state: crate::types::PressureState,
|
|
64
|
+
) -> ProcessSample {
|
|
65
|
+
let output = Command::new("/bin/ps")
|
|
66
|
+
.args(["-o", "pid=,rss=,pcpu=", "-g", &pgid.to_string()])
|
|
67
|
+
.output();
|
|
68
|
+
let mut pid_count = 0_u32;
|
|
69
|
+
let mut rss_kb = 0_u64;
|
|
70
|
+
let mut cpu_pct = 0.0_f64;
|
|
71
|
+
|
|
72
|
+
if let Ok(output) = output
|
|
73
|
+
&& output.status.success()
|
|
74
|
+
{
|
|
75
|
+
let text = String::from_utf8_lossy(&output.stdout);
|
|
76
|
+
for line in text.lines() {
|
|
77
|
+
let mut parts = line.split_whitespace();
|
|
78
|
+
if parts.next().is_none() {
|
|
79
|
+
continue;
|
|
80
|
+
}
|
|
81
|
+
pid_count += 1;
|
|
82
|
+
if let Some(rss) = parts.next().and_then(|value| value.parse::<u64>().ok()) {
|
|
83
|
+
rss_kb += rss;
|
|
84
|
+
}
|
|
85
|
+
if let Some(cpu) = parts.next().and_then(|value| value.parse::<f64>().ok()) {
|
|
86
|
+
cpu_pct += cpu;
|
|
87
|
+
}
|
|
88
|
+
}
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
ProcessSample {
|
|
92
|
+
job_id: job_id.to_string(),
|
|
93
|
+
sampled_at: Utc::now(),
|
|
94
|
+
pid_count,
|
|
95
|
+
rss_bytes: rss_kb * 1024,
|
|
96
|
+
footprint_bytes: rss_kb * 1024,
|
|
97
|
+
cpu_pct,
|
|
98
|
+
pressure_state,
|
|
99
|
+
}
|
|
100
|
+
}
|
|
@@ -0,0 +1,186 @@
|
|
|
1
|
+
use crate::types::{Policy, PressureState, Priority, StatusSnapshot};
|
|
2
|
+
use serde::{Deserialize, Serialize};
|
|
3
|
+
|
|
4
|
+
pub const PROTOCOL_VERSION: u16 = 1;
|
|
5
|
+
|
|
6
|
+
#[derive(Debug, Clone, Serialize, Deserialize)]
|
|
7
|
+
#[serde(tag = "type", rename_all = "snake_case")]
|
|
8
|
+
pub enum Request {
|
|
9
|
+
Admission {
|
|
10
|
+
protocol_version: u16,
|
|
11
|
+
adapter: String,
|
|
12
|
+
command: String,
|
|
13
|
+
args: Vec<String>,
|
|
14
|
+
cwd: String,
|
|
15
|
+
priority: Priority,
|
|
16
|
+
tty: bool,
|
|
17
|
+
},
|
|
18
|
+
LeaseCheck {
|
|
19
|
+
protocol_version: u16,
|
|
20
|
+
job_id: String,
|
|
21
|
+
},
|
|
22
|
+
Register {
|
|
23
|
+
protocol_version: u16,
|
|
24
|
+
job_id: String,
|
|
25
|
+
root_pid: i64,
|
|
26
|
+
pgid: i64,
|
|
27
|
+
},
|
|
28
|
+
Exit {
|
|
29
|
+
protocol_version: u16,
|
|
30
|
+
job_id: String,
|
|
31
|
+
exit_code: i32,
|
|
32
|
+
},
|
|
33
|
+
Status {
|
|
34
|
+
protocol_version: u16,
|
|
35
|
+
include_processes: bool,
|
|
36
|
+
},
|
|
37
|
+
PolicyGet {
|
|
38
|
+
protocol_version: u16,
|
|
39
|
+
},
|
|
40
|
+
PolicySet {
|
|
41
|
+
protocol_version: u16,
|
|
42
|
+
key: String,
|
|
43
|
+
value: String,
|
|
44
|
+
},
|
|
45
|
+
Pause {
|
|
46
|
+
protocol_version: u16,
|
|
47
|
+
job_id: String,
|
|
48
|
+
},
|
|
49
|
+
Resume {
|
|
50
|
+
protocol_version: u16,
|
|
51
|
+
job_id: String,
|
|
52
|
+
},
|
|
53
|
+
Cancel {
|
|
54
|
+
protocol_version: u16,
|
|
55
|
+
job_id: String,
|
|
56
|
+
},
|
|
57
|
+
ToolPathSet {
|
|
58
|
+
protocol_version: u16,
|
|
59
|
+
tool: String,
|
|
60
|
+
real_path: String,
|
|
61
|
+
},
|
|
62
|
+
Doctor {
|
|
63
|
+
protocol_version: u16,
|
|
64
|
+
},
|
|
65
|
+
TestSetPressure {
|
|
66
|
+
protocol_version: u16,
|
|
67
|
+
pressure: PressureState,
|
|
68
|
+
},
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
#[derive(Debug, Clone, Serialize, Deserialize)]
|
|
72
|
+
#[serde(tag = "type", rename_all = "snake_case")]
|
|
73
|
+
pub enum Response {
|
|
74
|
+
Admission {
|
|
75
|
+
protocol_version: u16,
|
|
76
|
+
job_id: String,
|
|
77
|
+
admitted: bool,
|
|
78
|
+
state: String,
|
|
79
|
+
reason: Option<String>,
|
|
80
|
+
codex_max_threads: u32,
|
|
81
|
+
},
|
|
82
|
+
Lease {
|
|
83
|
+
protocol_version: u16,
|
|
84
|
+
job_id: String,
|
|
85
|
+
admitted: bool,
|
|
86
|
+
state: String,
|
|
87
|
+
reason: Option<String>,
|
|
88
|
+
},
|
|
89
|
+
Ack {
|
|
90
|
+
protocol_version: u16,
|
|
91
|
+
message: String,
|
|
92
|
+
},
|
|
93
|
+
Status {
|
|
94
|
+
protocol_version: u16,
|
|
95
|
+
snapshot: StatusSnapshot,
|
|
96
|
+
},
|
|
97
|
+
Policy {
|
|
98
|
+
protocol_version: u16,
|
|
99
|
+
policy: Policy,
|
|
100
|
+
},
|
|
101
|
+
Doctor {
|
|
102
|
+
protocol_version: u16,
|
|
103
|
+
report: DoctorReport,
|
|
104
|
+
},
|
|
105
|
+
Error {
|
|
106
|
+
protocol_version: u16,
|
|
107
|
+
message: String,
|
|
108
|
+
},
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
#[derive(Debug, Clone, Serialize, Deserialize)]
|
|
112
|
+
pub struct DoctorReport {
|
|
113
|
+
pub os_supported: bool,
|
|
114
|
+
pub os: String,
|
|
115
|
+
pub socket_path: String,
|
|
116
|
+
pub socket_exists: bool,
|
|
117
|
+
pub db_path: String,
|
|
118
|
+
pub db_exists: bool,
|
|
119
|
+
pub launchd_plist_exists: bool,
|
|
120
|
+
pub discovered_tools: Vec<(String, String)>,
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
impl Request {
|
|
124
|
+
pub fn version(&self) -> u16 {
|
|
125
|
+
match self {
|
|
126
|
+
Self::Admission {
|
|
127
|
+
protocol_version, ..
|
|
128
|
+
}
|
|
129
|
+
| Self::LeaseCheck {
|
|
130
|
+
protocol_version, ..
|
|
131
|
+
}
|
|
132
|
+
| Self::Register {
|
|
133
|
+
protocol_version, ..
|
|
134
|
+
}
|
|
135
|
+
| Self::Exit {
|
|
136
|
+
protocol_version, ..
|
|
137
|
+
}
|
|
138
|
+
| Self::Status {
|
|
139
|
+
protocol_version, ..
|
|
140
|
+
}
|
|
141
|
+
| Self::PolicyGet { protocol_version }
|
|
142
|
+
| Self::PolicySet {
|
|
143
|
+
protocol_version, ..
|
|
144
|
+
}
|
|
145
|
+
| Self::Pause {
|
|
146
|
+
protocol_version, ..
|
|
147
|
+
}
|
|
148
|
+
| Self::Resume {
|
|
149
|
+
protocol_version, ..
|
|
150
|
+
}
|
|
151
|
+
| Self::Cancel {
|
|
152
|
+
protocol_version, ..
|
|
153
|
+
}
|
|
154
|
+
| Self::ToolPathSet {
|
|
155
|
+
protocol_version, ..
|
|
156
|
+
}
|
|
157
|
+
| Self::Doctor { protocol_version }
|
|
158
|
+
| Self::TestSetPressure {
|
|
159
|
+
protocol_version, ..
|
|
160
|
+
} => *protocol_version,
|
|
161
|
+
}
|
|
162
|
+
}
|
|
163
|
+
}
|
|
164
|
+
|
|
165
|
+
#[cfg(test)]
|
|
166
|
+
mod tests {
|
|
167
|
+
use super::*;
|
|
168
|
+
|
|
169
|
+
#[test]
|
|
170
|
+
fn socket_protocol_serialization_is_versioned_and_compatible() {
|
|
171
|
+
let request = Request::Admission {
|
|
172
|
+
protocol_version: PROTOCOL_VERSION,
|
|
173
|
+
adapter: "codex".to_string(),
|
|
174
|
+
command: "codex".to_string(),
|
|
175
|
+
args: vec!["exec".to_string()],
|
|
176
|
+
cwd: "/tmp".to_string(),
|
|
177
|
+
priority: Priority::Normal,
|
|
178
|
+
tty: true,
|
|
179
|
+
};
|
|
180
|
+
let json = serde_json::to_string(&request).unwrap();
|
|
181
|
+
assert!(json.contains("\"protocol_version\":1"));
|
|
182
|
+
assert!(json.contains("\"type\":\"admission\""));
|
|
183
|
+
let decoded: Request = serde_json::from_str(&json).unwrap();
|
|
184
|
+
assert_eq!(decoded.version(), PROTOCOL_VERSION);
|
|
185
|
+
}
|
|
186
|
+
}
|
|
@@ -0,0 +1,132 @@
|
|
|
1
|
+
use crate::types::{JobRecord, JobState, Policy, PressureState, Priority};
|
|
2
|
+
use chrono::{DateTime, Utc};
|
|
3
|
+
|
|
4
|
+
#[derive(Debug, Clone, PartialEq, Eq)]
|
|
5
|
+
pub enum AdmissionDecision {
|
|
6
|
+
Admit,
|
|
7
|
+
Queue { reason: String },
|
|
8
|
+
}
|
|
9
|
+
|
|
10
|
+
pub fn decide_admission(
|
|
11
|
+
policy: &Policy,
|
|
12
|
+
pressure: PressureState,
|
|
13
|
+
active_jobs: usize,
|
|
14
|
+
) -> AdmissionDecision {
|
|
15
|
+
if pressure == PressureState::Critical {
|
|
16
|
+
return AdmissionDecision::Queue {
|
|
17
|
+
reason: "critical memory pressure".to_string(),
|
|
18
|
+
};
|
|
19
|
+
}
|
|
20
|
+
if pressure == PressureState::Soft {
|
|
21
|
+
return AdmissionDecision::Queue {
|
|
22
|
+
reason: "soft memory pressure".to_string(),
|
|
23
|
+
};
|
|
24
|
+
}
|
|
25
|
+
if active_jobs >= policy.max_active_jobs as usize {
|
|
26
|
+
return AdmissionDecision::Queue {
|
|
27
|
+
reason: format!(
|
|
28
|
+
"active job limit reached ({}/{})",
|
|
29
|
+
active_jobs, policy.max_active_jobs
|
|
30
|
+
),
|
|
31
|
+
};
|
|
32
|
+
}
|
|
33
|
+
AdmissionDecision::Admit
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
pub fn ordered_queue(mut jobs: Vec<JobRecord>, now: DateTime<Utc>) -> Vec<JobRecord> {
|
|
37
|
+
jobs.sort_by(|a, b| {
|
|
38
|
+
let a_score = queue_score(a.priority, a.created_at, now);
|
|
39
|
+
let b_score = queue_score(b.priority, b.created_at, now);
|
|
40
|
+
b_score
|
|
41
|
+
.cmp(&a_score)
|
|
42
|
+
.then_with(|| a.created_at.cmp(&b.created_at))
|
|
43
|
+
.then_with(|| a.id.cmp(&b.id))
|
|
44
|
+
});
|
|
45
|
+
jobs
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
fn queue_score(priority: Priority, queued_at: DateTime<Utc>, now: DateTime<Utc>) -> i64 {
|
|
49
|
+
let wait_minutes = (now - queued_at).num_minutes().max(0);
|
|
50
|
+
priority.scheduler_weight() + wait_minutes.min(30)
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
pub fn pause_candidates(jobs: &[JobRecord]) -> Vec<JobRecord> {
|
|
54
|
+
let mut candidates: Vec<_> = jobs
|
|
55
|
+
.iter()
|
|
56
|
+
.filter(|job| job.status == JobState::Running)
|
|
57
|
+
.cloned()
|
|
58
|
+
.collect();
|
|
59
|
+
candidates.sort_by(|a, b| {
|
|
60
|
+
a.priority
|
|
61
|
+
.cmp(&b.priority)
|
|
62
|
+
.then_with(|| a.started_at.cmp(&b.started_at))
|
|
63
|
+
.then_with(|| a.id.cmp(&b.id))
|
|
64
|
+
});
|
|
65
|
+
candidates
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
#[cfg(test)]
|
|
69
|
+
mod tests {
|
|
70
|
+
use super::*;
|
|
71
|
+
use crate::types::JobRecord;
|
|
72
|
+
use chrono::Duration;
|
|
73
|
+
|
|
74
|
+
fn job(id: &str, priority: Priority, created_at: DateTime<Utc>) -> JobRecord {
|
|
75
|
+
let policy = Policy::default();
|
|
76
|
+
let mut job = JobRecord::new_admission(
|
|
77
|
+
id.to_string(),
|
|
78
|
+
"fake".to_string(),
|
|
79
|
+
"fake".to_string(),
|
|
80
|
+
vec![],
|
|
81
|
+
".".to_string(),
|
|
82
|
+
priority,
|
|
83
|
+
&policy,
|
|
84
|
+
)
|
|
85
|
+
.unwrap();
|
|
86
|
+
job.created_at = created_at;
|
|
87
|
+
job.status = JobState::Queued;
|
|
88
|
+
job
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
#[test]
|
|
92
|
+
fn scheduler_admits_under_normal_pressure() {
|
|
93
|
+
let policy = Policy::default();
|
|
94
|
+
assert_eq!(
|
|
95
|
+
decide_admission(&policy, PressureState::Normal, 0),
|
|
96
|
+
AdmissionDecision::Admit
|
|
97
|
+
);
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
#[test]
|
|
101
|
+
fn scheduler_queues_under_soft_pressure() {
|
|
102
|
+
let policy = Policy::default();
|
|
103
|
+
assert_eq!(
|
|
104
|
+
decide_admission(&policy, PressureState::Soft, 0),
|
|
105
|
+
AdmissionDecision::Queue {
|
|
106
|
+
reason: "soft memory pressure".to_string()
|
|
107
|
+
}
|
|
108
|
+
);
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
#[test]
|
|
112
|
+
fn scheduler_selects_low_priority_for_critical_pause_first() {
|
|
113
|
+
let now = Utc::now();
|
|
114
|
+
let mut low = job("low", Priority::Low, now);
|
|
115
|
+
let mut high = job("high", Priority::High, now);
|
|
116
|
+
low.status = JobState::Running;
|
|
117
|
+
high.status = JobState::Running;
|
|
118
|
+
let candidates = pause_candidates(&[high, low]);
|
|
119
|
+
assert_eq!(candidates[0].id, "low");
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
#[test]
|
|
123
|
+
fn priority_ordering_allows_simple_aging() {
|
|
124
|
+
let now = Utc::now();
|
|
125
|
+
let old_low = job("old-low", Priority::Low, now - Duration::minutes(30));
|
|
126
|
+
let fresh_normal = job("fresh-normal", Priority::Normal, now);
|
|
127
|
+
let fresh_high = job("fresh-high", Priority::High, now);
|
|
128
|
+
let ordered = ordered_queue(vec![fresh_normal, old_low, fresh_high], now);
|
|
129
|
+
assert_eq!(ordered[0].id, "old-low");
|
|
130
|
+
assert_eq!(ordered[1].id, "fresh-high");
|
|
131
|
+
}
|
|
132
|
+
}
|