@team-agent/installer 0.3.9 → 0.3.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/Cargo.lock +1 -1
- package/Cargo.toml +1 -1
- package/crates/team-agent/src/coordinator/tests/spine.rs +6 -0
- package/crates/team-agent/src/coordinator/tick.rs +83 -1
- package/crates/team-agent/src/leader/lease.rs +19 -0
- package/crates/team-agent/src/leader/rediscover/tests.rs +12 -0
- package/crates/team-agent/src/leader/rediscover.rs +2 -0
- package/crates/team-agent/src/lifecycle/launch.rs +35 -0
- package/crates/team-agent/src/lifecycle/restart/agent.rs +17 -3
- package/crates/team-agent/src/lifecycle/restart/common.rs +75 -0
- package/crates/team-agent/src/lifecycle/restart/rebuild.rs +201 -3
- package/crates/team-agent/src/lifecycle/restart/selection.rs +51 -14
- package/crates/team-agent/src/lifecycle/restart.rs +1 -1
- package/crates/team-agent/src/lifecycle/tests/core.rs +89 -15
- package/crates/team-agent/src/lifecycle/tests/launch_spawn.rs +68 -3
- package/crates/team-agent/src/lifecycle/tests/main_preserved.rs +3 -1
- package/crates/team-agent/src/messaging/delivery.rs +83 -2
- package/crates/team-agent/src/messaging/tests/runtime.rs +90 -0
- package/crates/team-agent/src/provider/approvals/parsing.rs +43 -14
- package/crates/team-agent/src/provider/approvals/runtime_prompts.rs +12 -9
- package/crates/team-agent/src/transport/test_support.rs +12 -1
- package/package.json +4 -4
|
@@ -4,32 +4,35 @@ use super::common::*;
|
|
|
4
4
|
/// bug-085 四象限 `start_mode` 决策(`start.py:179-188` + `_resume_rollout_missing` `start.py:66-69`),
|
|
5
5
|
/// **从 start_agent 的整条 lock+spawn 路径里分离出的纯函数**(gate gap:porter 需要单元级 RED
|
|
6
6
|
/// for `FreshAfterMissingRollout`,而 start_agent 全路径不可单测)。语义:
|
|
7
|
-
/// -
|
|
7
|
+
/// - resume backing 缺失时不可 resume:codex/claude 用 transcript/rollout 文件,
|
|
8
|
+
/// copilot 用 session-store 行存在性(由调用方折叠进 `rollout_exists`)。
|
|
8
9
|
/// - 初始 `start_mode = if session_id { Resumed } else { Fresh }`(`start.py:179`)。
|
|
9
|
-
/// -
|
|
10
|
-
///
|
|
11
|
-
/// - 非 codex:rollout 永不"缺失",直接看 session_id。
|
|
10
|
+
/// - `missing && allow_fresh` 升级为 `FreshAfterMissingRollout` 并清空 session_id。
|
|
11
|
+
/// - `missing && !allow_fresh` 返回 `Noop`,调用方据此诚实拒绝并提示 `--allow-fresh`。
|
|
12
12
|
pub fn decide_start_mode(
|
|
13
13
|
provider: &str,
|
|
14
14
|
session_id: Option<&SessionId>,
|
|
15
|
-
|
|
15
|
+
_rollout_path: Option<&RolloutPath>,
|
|
16
16
|
rollout_exists: bool,
|
|
17
17
|
allow_fresh: bool,
|
|
18
18
|
) -> StartMode {
|
|
19
19
|
match session_id {
|
|
20
20
|
None => StartMode::Fresh,
|
|
21
21
|
Some(_) => {
|
|
22
|
-
let
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
StartMode::
|
|
26
|
-
|
|
27
|
-
StartMode::Resumed
|
|
22
|
+
let missing_resume_backing = resumable_provider_requires_backing(provider) && !rollout_exists;
|
|
23
|
+
match (missing_resume_backing, allow_fresh) {
|
|
24
|
+
(true, true) => StartMode::FreshAfterMissingRollout,
|
|
25
|
+
(true, false) => StartMode::Noop,
|
|
26
|
+
(false, _) => StartMode::Resumed,
|
|
28
27
|
}
|
|
29
28
|
}
|
|
30
29
|
}
|
|
31
30
|
}
|
|
32
31
|
|
|
32
|
+
pub(crate) fn resumable_provider_requires_backing(provider: &str) -> bool {
|
|
33
|
+
matches!(provider, "codex" | "claude" | "claude_code" | "copilot")
|
|
34
|
+
}
|
|
35
|
+
|
|
33
36
|
/// `first_send_at` 严格分类(`_classify_first_send_at`,`orchestration.py:399`)。
|
|
34
37
|
/// **绝不靠 truthiness**:`""`/`0`/`False`/`"null"`/非 ISO → `Corrupt`。
|
|
35
38
|
pub fn classify_first_send_at(raw: &serde_json::Value) -> FirstSendAtState {
|
|
@@ -129,6 +132,14 @@ pub fn python_type_name(value: &serde_json::Value) -> &'static str {
|
|
|
129
132
|
pub fn classify_restart_plan(
|
|
130
133
|
state: &serde_json::Value,
|
|
131
134
|
allow_fresh: bool,
|
|
135
|
+
) -> Result<RestartPlan, LifecycleError> {
|
|
136
|
+
classify_restart_plan_with_resume_validation(None, state, allow_fresh)
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
pub(crate) fn classify_restart_plan_with_resume_validation(
|
|
140
|
+
workspace: Option<&Path>,
|
|
141
|
+
state: &serde_json::Value,
|
|
142
|
+
allow_fresh: bool,
|
|
132
143
|
) -> Result<RestartPlan, LifecycleError> {
|
|
133
144
|
let mut decisions = Vec::new();
|
|
134
145
|
let mut corrupt_entries = Vec::new();
|
|
@@ -171,21 +182,47 @@ pub fn classify_restart_plan(
|
|
|
171
182
|
.and_then(|v| v.as_str())
|
|
172
183
|
.filter(|s| !s.is_empty())
|
|
173
184
|
.map(SessionId::new);
|
|
185
|
+
let agent_id = AgentId::new(worker_id.clone());
|
|
174
186
|
// E6 层2 (C2, 用户裁定"绝不静默 fresh"): null session 只有显式 --allow-fresh 才 fresh,
|
|
175
187
|
// 否则 Refuse(→ resume_not_ready + 指引)。删 `!interacted` 短路 —— 自启动 worker
|
|
176
188
|
// (leader 从未发消息 → first_send_at=null → interacted=false)会被它静默 fresh 丢上下文。
|
|
177
|
-
let
|
|
189
|
+
let provider = agent_provider(agent);
|
|
190
|
+
let provider_wire = provider_wire(provider);
|
|
191
|
+
let resume_backing_exists = match (workspace, session_id.as_ref()) {
|
|
192
|
+
(Some(workspace), Some(session)) => resume_backing_exists_for_agent(
|
|
193
|
+
workspace,
|
|
194
|
+
&agent_id,
|
|
195
|
+
agent,
|
|
196
|
+
provider,
|
|
197
|
+
session,
|
|
198
|
+
agent_rollout_path(agent).as_ref(),
|
|
199
|
+
),
|
|
200
|
+
(None, Some(_)) if resumable_provider_requires_backing(provider_wire) => {
|
|
201
|
+
agent_rollout_path(agent)
|
|
202
|
+
.as_ref()
|
|
203
|
+
.is_some_and(|path| path.as_path().exists())
|
|
204
|
+
}
|
|
205
|
+
_ => true,
|
|
206
|
+
};
|
|
207
|
+
let decision = if session_id.is_some() && resume_backing_exists {
|
|
178
208
|
ResumeDecision::Resume
|
|
209
|
+
} else if session_id.is_some() && allow_fresh {
|
|
210
|
+
ResumeDecision::FreshStart
|
|
211
|
+
} else if session_id.is_some() {
|
|
212
|
+
ResumeDecision::Refuse
|
|
179
213
|
} else if allow_fresh {
|
|
180
214
|
ResumeDecision::FreshStart
|
|
181
215
|
} else {
|
|
182
216
|
ResumeDecision::Refuse
|
|
183
217
|
};
|
|
184
|
-
let agent_id = AgentId::new(worker_id.clone());
|
|
185
218
|
if matches!(decision, ResumeDecision::Refuse) {
|
|
186
219
|
unresumable.push(UnresumableWorker {
|
|
187
220
|
agent_id: agent_id.clone(),
|
|
188
|
-
reason:
|
|
221
|
+
reason: if session_id.is_some() {
|
|
222
|
+
"session_unresumable".to_string()
|
|
223
|
+
} else {
|
|
224
|
+
"no_persisted_session_id".to_string()
|
|
225
|
+
},
|
|
189
226
|
session_id: session_id.clone(),
|
|
190
227
|
first_send_at: first_send_at_raw.as_str().map(|s| s.to_string()),
|
|
191
228
|
});
|
|
@@ -37,7 +37,7 @@ pub(crate) use common::refresh_missing_provider_sessions;
|
|
|
37
37
|
pub use orchestrator::{halt_plan, plan_status};
|
|
38
38
|
pub use rebuild::{
|
|
39
39
|
restart, restart_candidates, restart_with_session_convergence_deadline, restart_with_transport,
|
|
40
|
-
select_restart_state,
|
|
40
|
+
restart_with_transport_with_readiness_deadline, select_restart_state,
|
|
41
41
|
};
|
|
42
42
|
pub use remove::{remove_agent, remove_agent_with_transport};
|
|
43
43
|
pub use selection::{classify_first_send_at, classify_restart_plan, decide_start_mode, python_type_name};
|
|
@@ -342,13 +342,12 @@ fn start_mode_serde_names_match_python_start_mode_strings() {
|
|
|
342
342
|
}
|
|
343
343
|
|
|
344
344
|
// ───────────────────────────────────────────────────────────────────────
|
|
345
|
-
// decide_start_mode — bug-085 四象限
|
|
346
|
-
// golden 实跑(PYTHONPATH=… python3 /tmp/x.py,_resume_rollout_missing + start_mode 逻辑):
|
|
345
|
+
// decide_start_mode — bug-085 四象限 + E20 #264 gap closure.
|
|
347
346
|
// codex sess rollout-present any-fresh -> resumed
|
|
348
|
-
// codex sess
|
|
349
|
-
// codex sess
|
|
347
|
+
// codex sess backing-MISSING !allow_fresh -> noop/refuse (绝不静默 resume 死 session)
|
|
348
|
+
// codex sess backing-MISSING allow_fresh -> fresh_after_missing_rollout
|
|
350
349
|
// codex no-sess any -> fresh
|
|
351
|
-
// claude
|
|
350
|
+
// claude/copilot sess backing-missing -> fresh_after_missing_rollout 或 noop/refuse
|
|
352
351
|
// claude no-sess -> fresh
|
|
353
352
|
// 这是 bug-085 把 start_mode 分类从 start_agent 的 lock+spawn 全路径剥离出来的命门。
|
|
354
353
|
// ───────────────────────────────────────────────────────────────────────
|
|
@@ -375,11 +374,11 @@ fn decide_start_mode_codex_missing_rollout_with_allow_fresh_is_fresh_after_missi
|
|
|
375
374
|
}
|
|
376
375
|
|
|
377
376
|
#[test]
|
|
378
|
-
fn
|
|
379
|
-
//
|
|
377
|
+
fn decide_start_mode_codex_missing_rollout_without_allow_fresh_refuses() {
|
|
378
|
+
// E20 C①:backing 缺且 !allow_fresh → 诚实拒绝,绝不 resume 进死 session。
|
|
380
379
|
assert_eq!(
|
|
381
380
|
decide_start_mode("codex", Some(&sid("s1")), None, false, false),
|
|
382
|
-
StartMode::
|
|
381
|
+
StartMode::Noop
|
|
383
382
|
);
|
|
384
383
|
}
|
|
385
384
|
|
|
@@ -408,12 +407,24 @@ fn decide_start_mode_no_session_is_fresh() {
|
|
|
408
407
|
}
|
|
409
408
|
|
|
410
409
|
#[test]
|
|
411
|
-
fn
|
|
412
|
-
|
|
413
|
-
|
|
414
|
-
|
|
415
|
-
|
|
416
|
-
|
|
410
|
+
fn decide_start_mode_checks_backing_for_all_resumable_providers() {
|
|
411
|
+
for provider in ["claude", "claude_code", "copilot"] {
|
|
412
|
+
assert_eq!(
|
|
413
|
+
decide_start_mode(provider, Some(&sid("s1")), None, false, true),
|
|
414
|
+
StartMode::FreshAfterMissingRollout,
|
|
415
|
+
"{provider} missing backing + allow_fresh must not resume"
|
|
416
|
+
);
|
|
417
|
+
assert_eq!(
|
|
418
|
+
decide_start_mode(provider, Some(&sid("s1")), None, false, false),
|
|
419
|
+
StartMode::Noop,
|
|
420
|
+
"{provider} missing backing + !allow_fresh must refuse"
|
|
421
|
+
);
|
|
422
|
+
assert_eq!(
|
|
423
|
+
decide_start_mode(provider, Some(&sid("s1")), Some(&rp("/r")), true, false),
|
|
424
|
+
StartMode::Resumed,
|
|
425
|
+
"{provider} existing backing remains resumable"
|
|
426
|
+
);
|
|
427
|
+
}
|
|
417
428
|
assert_eq!(
|
|
418
429
|
decide_start_mode("claude", None, None, false, true),
|
|
419
430
|
StartMode::Fresh
|
|
@@ -533,8 +544,11 @@ fn classify_restart_plan_never_interacted_null_session_with_allow_fresh_marks_fo
|
|
|
533
544
|
fn classify_restart_plan_codex_with_session_still_resumes() {
|
|
534
545
|
// E6 层2 回归锁(不误伤): codex worker first_send_at=null 但 session_id 已捕 →
|
|
535
546
|
// 仍走 Resume(分流轴是 session_id 有无,不是 interacted)。防层2 修法把 has_session 也误判。
|
|
547
|
+
let ws = temp_ws();
|
|
548
|
+
let rollout = ws.join("codex-rollout.jsonl");
|
|
549
|
+
std::fs::write(&rollout, "{}\n").unwrap();
|
|
536
550
|
let state = json!({
|
|
537
|
-
"agents": { "w1": { "provider": "codex", "session_id": "sess-codex-abc" } }
|
|
551
|
+
"agents": { "w1": { "provider": "codex", "session_id": "sess-codex-abc", "rollout_path": rollout.to_string_lossy() } }
|
|
538
552
|
});
|
|
539
553
|
let plan = classify_restart_plan(&state, false).expect("纯验证不应 Err");
|
|
540
554
|
assert_eq!(plan.decisions.len(), 1);
|
|
@@ -978,6 +992,66 @@ fn leader_pane_env_cross_socket_all_probe_errors_stays_unknown() {
|
|
|
978
992
|
assert_eq!(state, LeaderPaneEnvState::Unknown);
|
|
979
993
|
}
|
|
980
994
|
|
|
995
|
+
#[test]
|
|
996
|
+
fn mcp_auto_approval_env_marks_leader_bypass_namespace_only() {
|
|
997
|
+
let mut env = std::collections::BTreeMap::new();
|
|
998
|
+
let safety = DangerousApproval {
|
|
999
|
+
enabled: true,
|
|
1000
|
+
source: DangerousApprovalSource::LeaderProcess,
|
|
1001
|
+
inherited: true,
|
|
1002
|
+
provider: Some("codex".to_string()),
|
|
1003
|
+
flag: Some("--dangerously-bypass-approvals-and-sandbox".to_string()),
|
|
1004
|
+
worker_capability_above_leader: false,
|
|
1005
|
+
ancestry_binary_name: Some("codex".to_string()),
|
|
1006
|
+
unexpected_binary: false,
|
|
1007
|
+
};
|
|
1008
|
+
|
|
1009
|
+
apply_mcp_auto_approval_env(&mut env, &safety);
|
|
1010
|
+
|
|
1011
|
+
assert_eq!(env.get("TEAM_AGENT_LEADER_BYPASS").map(String::as_str), Some("1"));
|
|
1012
|
+
assert_eq!(
|
|
1013
|
+
env.get("TEAM_AGENT_MCP_AUTO_APPROVE").map(String::as_str),
|
|
1014
|
+
Some("team_orchestrator")
|
|
1015
|
+
);
|
|
1016
|
+
assert_eq!(
|
|
1017
|
+
env.get("TEAM_AGENT_MCP_AUTO_APPROVE_SOURCE").map(String::as_str),
|
|
1018
|
+
Some("leader_bypass")
|
|
1019
|
+
);
|
|
1020
|
+
assert_eq!(
|
|
1021
|
+
env.get("TEAM_AGENT_LEADER_BYPASS_FLAG").map(String::as_str),
|
|
1022
|
+
Some("--dangerously-bypass-approvals-and-sandbox")
|
|
1023
|
+
);
|
|
1024
|
+
}
|
|
1025
|
+
|
|
1026
|
+
#[test]
|
|
1027
|
+
fn mcp_auto_approval_env_clears_when_leader_is_restricted() {
|
|
1028
|
+
let mut env = std::collections::BTreeMap::from([
|
|
1029
|
+
(
|
|
1030
|
+
"TEAM_AGENT_MCP_AUTO_APPROVE".to_string(),
|
|
1031
|
+
"team_orchestrator".to_string(),
|
|
1032
|
+
),
|
|
1033
|
+
("TEAM_AGENT_MCP_AUTO_APPROVE_SOURCE".to_string(), "leader_bypass".to_string()),
|
|
1034
|
+
]);
|
|
1035
|
+
let safety = DangerousApproval {
|
|
1036
|
+
enabled: false,
|
|
1037
|
+
source: DangerousApprovalSource::Disabled,
|
|
1038
|
+
inherited: false,
|
|
1039
|
+
provider: None,
|
|
1040
|
+
flag: None,
|
|
1041
|
+
worker_capability_above_leader: false,
|
|
1042
|
+
ancestry_binary_name: None,
|
|
1043
|
+
unexpected_binary: false,
|
|
1044
|
+
};
|
|
1045
|
+
|
|
1046
|
+
apply_mcp_auto_approval_env(&mut env, &safety);
|
|
1047
|
+
|
|
1048
|
+
assert_eq!(env.get("TEAM_AGENT_LEADER_BYPASS").map(String::as_str), Some("0"));
|
|
1049
|
+
assert!(
|
|
1050
|
+
!env.contains_key("TEAM_AGENT_MCP_AUTO_APPROVE"),
|
|
1051
|
+
"restricted leader must not leave MCP auto-approval env behind: {env:?}"
|
|
1052
|
+
);
|
|
1053
|
+
}
|
|
1054
|
+
|
|
981
1055
|
struct EnvVarGuard {
|
|
982
1056
|
key: &'static str,
|
|
983
1057
|
previous: Option<String>,
|
|
@@ -945,6 +945,10 @@ const DELEG_ROLE_WORKER2: &str = "---\nname: worker2\nrole: Second Worker\nprovi
|
|
|
945
945
|
pub(super) fn restart_ws_two_resumable_workers() -> PathBuf {
|
|
946
946
|
let ws = temp_ws().join("restartteam");
|
|
947
947
|
std::fs::create_dir_all(ws.join("agents")).unwrap();
|
|
948
|
+
let alpha_rollout = ws.join("alpha-rollout.jsonl");
|
|
949
|
+
let bravo_rollout = ws.join("bravo-rollout.jsonl");
|
|
950
|
+
std::fs::write(&alpha_rollout, "{}\n").unwrap();
|
|
951
|
+
std::fs::write(&bravo_rollout, "{}\n").unwrap();
|
|
948
952
|
std::fs::write(ws.join("TEAM.md"), "---\nname: restartteam\nobjective: Restart probe.\nprovider: codex\n---\n\nteam.\n").unwrap();
|
|
949
953
|
std::fs::write(ws.join("agents").join("alpha.md"), DELEG_ROLE_ALPHA).unwrap();
|
|
950
954
|
std::fs::write(ws.join("agents").join("bravo.md"), DELEG_ROLE_BRAVO).unwrap();
|
|
@@ -955,8 +959,8 @@ pub(super) fn restart_ws_two_resumable_workers() -> PathBuf {
|
|
|
955
959
|
&json!({
|
|
956
960
|
"session_name": "team-restartteam",
|
|
957
961
|
"agents": {
|
|
958
|
-
"alpha": {"status": "running", "provider": "codex", "session_id": "sess-a", "first_send_at": "2026-05-27T10:00:00+00:00"},
|
|
959
|
-
"bravo": {"status": "running", "provider": "codex", "session_id": "sess-b", "first_send_at": "2026-05-27T10:00:00+00:00"}
|
|
962
|
+
"alpha": {"status": "running", "provider": "codex", "session_id": "sess-a", "rollout_path": alpha_rollout.to_string_lossy(), "first_send_at": "2026-05-27T10:00:00+00:00"},
|
|
963
|
+
"bravo": {"status": "running", "provider": "codex", "session_id": "sess-b", "rollout_path": bravo_rollout.to_string_lossy(), "first_send_at": "2026-05-27T10:00:00+00:00"}
|
|
960
964
|
}
|
|
961
965
|
}),
|
|
962
966
|
)
|
|
@@ -965,6 +969,33 @@ pub(super) fn restart_ws_two_resumable_workers() -> PathBuf {
|
|
|
965
969
|
ws
|
|
966
970
|
}
|
|
967
971
|
|
|
972
|
+
fn restart_ws_one_resumable_worker() -> PathBuf {
|
|
973
|
+
let ws = temp_ws().join("restartone");
|
|
974
|
+
std::fs::create_dir_all(ws.join("agents")).unwrap();
|
|
975
|
+
let rollout = ws.join("alpha-rollout.jsonl");
|
|
976
|
+
std::fs::write(&rollout, "{}\n").unwrap();
|
|
977
|
+
std::fs::write(
|
|
978
|
+
ws.join("TEAM.md"),
|
|
979
|
+
"---\nname: restartone\nobjective: Restart readiness probe.\nprovider: codex\n---\n\nteam.\n",
|
|
980
|
+
)
|
|
981
|
+
.unwrap();
|
|
982
|
+
std::fs::write(ws.join("agents").join("alpha.md"), DELEG_ROLE_ALPHA).unwrap();
|
|
983
|
+
let spec = crate::compiler::compile_team(&ws).expect("compile 1-agent team");
|
|
984
|
+
std::fs::write(ws.join("team.spec.yaml"), crate::model::yaml::dumps(&spec)).unwrap();
|
|
985
|
+
crate::state::persist::save_runtime_state(
|
|
986
|
+
&ws,
|
|
987
|
+
&json!({
|
|
988
|
+
"session_name": "team-restartone",
|
|
989
|
+
"agents": {
|
|
990
|
+
"alpha": {"status": "running", "provider": "codex", "session_id": "sess-a", "rollout_path": rollout.to_string_lossy(), "first_send_at": "2026-05-27T10:00:00+00:00"}
|
|
991
|
+
}
|
|
992
|
+
}),
|
|
993
|
+
)
|
|
994
|
+
.unwrap();
|
|
995
|
+
seed_healthy_coordinator(&ws);
|
|
996
|
+
ws
|
|
997
|
+
}
|
|
998
|
+
|
|
968
999
|
// 2 [P0] — restart_with_transport must drive the REAL Route-B resume spawn: one spawn per resumable
|
|
969
1000
|
// worker. The first resumed worker recreates the session with spawn_first; later workers may use
|
|
970
1001
|
// spawn_into only after a live-session check proves that recreated session still exists. Each spawn
|
|
@@ -1020,6 +1051,38 @@ fn restart_with_transport_spawns_resumable_workers_not_stub() {
|
|
|
1020
1051
|
);
|
|
1021
1052
|
}
|
|
1022
1053
|
|
|
1054
|
+
#[test]
|
|
1055
|
+
fn restart_times_out_when_spawned_worker_pane_is_not_addressable() {
|
|
1056
|
+
let ws = restart_ws_one_resumable_worker();
|
|
1057
|
+
let transport = OfflineTransport::new().with_spawned_panes_addressable(false);
|
|
1058
|
+
|
|
1059
|
+
let result =
|
|
1060
|
+
restart_with_transport_with_readiness_deadline(&ws, false, None, &transport, Some(0));
|
|
1061
|
+
|
|
1062
|
+
let text = format!("{result:?}");
|
|
1063
|
+
assert!(
|
|
1064
|
+
text.contains("restart not ready")
|
|
1065
|
+
&& text.contains("worker pane addressable: no")
|
|
1066
|
+
&& text.contains("Action:")
|
|
1067
|
+
&& text.contains("Log:"),
|
|
1068
|
+
"restart must refuse with N38 readiness timeout details, not return ok; got {text}"
|
|
1069
|
+
);
|
|
1070
|
+
assert!(
|
|
1071
|
+
!matches!(result, Ok(RestartReport::Restarted { .. })),
|
|
1072
|
+
"restart readiness timeout must not return Restarted ok"
|
|
1073
|
+
);
|
|
1074
|
+
let events = crate::event_log::EventLog::new(&ws).tail(20).unwrap();
|
|
1075
|
+
let timeout = events
|
|
1076
|
+
.iter()
|
|
1077
|
+
.find(|event| event.get("event").and_then(|v| v.as_str()) == Some("restart.readiness_timeout"))
|
|
1078
|
+
.expect("restart.readiness_timeout event");
|
|
1079
|
+
assert_eq!(
|
|
1080
|
+
timeout.get("worker_pane_addressable").and_then(|v| v.as_bool()),
|
|
1081
|
+
Some(false),
|
|
1082
|
+
"timeout event must carry the failed readiness condition: {timeout}"
|
|
1083
|
+
);
|
|
1084
|
+
}
|
|
1085
|
+
|
|
1023
1086
|
// 3 [P0] — start_agent_with_transport on a non-paused agent with a session_id must spawn EXACTLY ONE
|
|
1024
1087
|
// worker (resume) carrying the provider build_command. Today the stub returns RequirementUnmet with
|
|
1025
1088
|
// ZERO spawns -> RED at recorded.len().
|
|
@@ -1027,11 +1090,13 @@ fn restart_with_transport_spawns_resumable_workers_not_stub() {
|
|
|
1027
1090
|
fn start_agent_with_transport_spawns_resume_not_stub() {
|
|
1028
1091
|
let ws = temp_ws().join("startagentws");
|
|
1029
1092
|
std::fs::create_dir_all(&ws).unwrap();
|
|
1093
|
+
let rollout = ws.join("alpha-rollout.jsonl");
|
|
1094
|
+
std::fs::write(&rollout, "{}\n").unwrap();
|
|
1030
1095
|
crate::state::persist::save_runtime_state(
|
|
1031
1096
|
&ws,
|
|
1032
1097
|
&json!({
|
|
1033
1098
|
"session_name": "team-sa",
|
|
1034
|
-
"agents": {"alpha": {"status": "running", "provider": "codex", "session_id": "sess-a", "first_send_at": "2026-05-27T10:00:00+00:00"}}
|
|
1099
|
+
"agents": {"alpha": {"status": "running", "provider": "codex", "session_id": "sess-a", "rollout_path": rollout.to_string_lossy(), "first_send_at": "2026-05-27T10:00:00+00:00"}}
|
|
1035
1100
|
}),
|
|
1036
1101
|
)
|
|
1037
1102
|
.unwrap();
|
|
@@ -88,11 +88,13 @@ impl crate::transport::Transport for SessionProbeRecordingTransport {
|
|
|
88
88
|
fn respawn_ws_one_resumable_worker() -> PathBuf {
|
|
89
89
|
let ws = temp_ws().join("respawn_dead_session");
|
|
90
90
|
std::fs::create_dir_all(&ws).unwrap();
|
|
91
|
+
let rollout = ws.join("alpha-rollout.jsonl");
|
|
92
|
+
std::fs::write(&rollout, "{}\n").unwrap();
|
|
91
93
|
crate::state::persist::save_runtime_state(
|
|
92
94
|
&ws,
|
|
93
95
|
&json!({
|
|
94
96
|
"session_name": "team-sa",
|
|
95
|
-
"agents": {"alpha": {"status": "running", "provider": "codex", "session_id": "sess-a", "first_send_at": "2026-05-27T10:00:00+00:00"}}
|
|
97
|
+
"agents": {"alpha": {"status": "running", "provider": "codex", "session_id": "sess-a", "rollout_path": rollout.to_string_lossy(), "first_send_at": "2026-05-27T10:00:00+00:00"}}
|
|
96
98
|
}),
|
|
97
99
|
)
|
|
98
100
|
.unwrap();
|
|
@@ -17,7 +17,7 @@ use crate::transport::{
|
|
|
17
17
|
use super::helpers::{message_exists, MessageStatusShadow};
|
|
18
18
|
use super::{
|
|
19
19
|
DeliveryOutcome, DeliveryRefusal, DeliveryStage, DeliveryStatus, MessagingError,
|
|
20
|
-
PaneWidthQuery, TrustRetryPayload,
|
|
20
|
+
PaneWidthQuery, TrustRetryPayload, SEND_RETRY_MAX_ATTEMPTS,
|
|
21
21
|
};
|
|
22
22
|
use crate::state::projection::OwnerTeamResolution;
|
|
23
23
|
|
|
@@ -286,7 +286,6 @@ pub fn deliver_pending_message(
|
|
|
286
286
|
"submit_unverified:{}",
|
|
287
287
|
submit_verification_wire(inject_report.submit_verification)
|
|
288
288
|
);
|
|
289
|
-
store.mark(message_id, "submitted_unverified", Some(&reason))?;
|
|
290
289
|
event_log.write(
|
|
291
290
|
"send.unverified",
|
|
292
291
|
serde_json::json!({
|
|
@@ -296,6 +295,29 @@ pub fn deliver_pending_message(
|
|
|
296
295
|
"attempts": inject_report.attempts,
|
|
297
296
|
}),
|
|
298
297
|
)?;
|
|
298
|
+
if inject_report.attempts >= u32::from(SEND_RETRY_MAX_ATTEMPTS) {
|
|
299
|
+
store.mark(message_id, "failed", Some("send_unverified_exhausted"))?;
|
|
300
|
+
emit_send_failed_exhausted(
|
|
301
|
+
workspace,
|
|
302
|
+
state,
|
|
303
|
+
event_log,
|
|
304
|
+
message_id,
|
|
305
|
+
&message.recipient,
|
|
306
|
+
inject_report.attempts,
|
|
307
|
+
&reason,
|
|
308
|
+
)?;
|
|
309
|
+
return Ok(DeliveryOutcome {
|
|
310
|
+
ok: false,
|
|
311
|
+
status: DeliveryStatus::Failed,
|
|
312
|
+
message_status: MessageStatusShadow("failed".to_string()),
|
|
313
|
+
message_id: Some(message_id.to_string()),
|
|
314
|
+
verification: Some(reason),
|
|
315
|
+
stage: Some(DeliveryStage::Submit),
|
|
316
|
+
reason: None,
|
|
317
|
+
channel: None,
|
|
318
|
+
});
|
|
319
|
+
}
|
|
320
|
+
store.mark(message_id, "submitted_unverified", Some(&reason))?;
|
|
299
321
|
return Ok(DeliveryOutcome {
|
|
300
322
|
ok: false,
|
|
301
323
|
status: DeliveryStatus::Failed,
|
|
@@ -538,6 +560,65 @@ fn leader_receiver_field_in_state<'a>(
|
|
|
538
560
|
.filter(|value| !value.is_empty())
|
|
539
561
|
}
|
|
540
562
|
|
|
563
|
+
fn emit_send_failed_exhausted(
|
|
564
|
+
workspace: &Path,
|
|
565
|
+
state: &serde_json::Value,
|
|
566
|
+
event_log: &EventLog,
|
|
567
|
+
message_id: &str,
|
|
568
|
+
recipient: &str,
|
|
569
|
+
attempts: u32,
|
|
570
|
+
verification: &str,
|
|
571
|
+
) -> Result<(), MessagingError> {
|
|
572
|
+
event_log.write(
|
|
573
|
+
"send.failed",
|
|
574
|
+
serde_json::json!({
|
|
575
|
+
"message_id": message_id,
|
|
576
|
+
"recipient": recipient,
|
|
577
|
+
"attempts": attempts,
|
|
578
|
+
"max_attempts": SEND_RETRY_MAX_ATTEMPTS,
|
|
579
|
+
"reason": "send_unverified_exhausted",
|
|
580
|
+
"verification": verification,
|
|
581
|
+
}),
|
|
582
|
+
)?;
|
|
583
|
+
let content = format!(
|
|
584
|
+
"send.failed\nerror: send to {recipient} remained unverified after {attempts}/{SEND_RETRY_MAX_ATTEMPTS} attempts\naction: inspect the target pane and retry the send\nlog: .team/logs/events.jsonl"
|
|
585
|
+
);
|
|
586
|
+
match crate::messaging::send_to_leader_receiver(
|
|
587
|
+
workspace,
|
|
588
|
+
state,
|
|
589
|
+
"leader",
|
|
590
|
+
&content,
|
|
591
|
+
None,
|
|
592
|
+
"coordinator",
|
|
593
|
+
false,
|
|
594
|
+
Some(&format!("send.failed:{message_id}")),
|
|
595
|
+
event_log,
|
|
596
|
+
) {
|
|
597
|
+
Ok(outcome) => {
|
|
598
|
+
event_log.write(
|
|
599
|
+
"send.failed_notification",
|
|
600
|
+
serde_json::json!({
|
|
601
|
+
"message_id": message_id,
|
|
602
|
+
"recipient": recipient,
|
|
603
|
+
"leader_notification_status": super::helpers::status_wire(outcome.status),
|
|
604
|
+
"leader_message_id": outcome.message_id,
|
|
605
|
+
}),
|
|
606
|
+
)?;
|
|
607
|
+
}
|
|
608
|
+
Err(error) => {
|
|
609
|
+
event_log.write(
|
|
610
|
+
"send.failed_notification_failed",
|
|
611
|
+
serde_json::json!({
|
|
612
|
+
"message_id": message_id,
|
|
613
|
+
"recipient": recipient,
|
|
614
|
+
"error": error.to_string(),
|
|
615
|
+
}),
|
|
616
|
+
)?;
|
|
617
|
+
}
|
|
618
|
+
}
|
|
619
|
+
Ok(())
|
|
620
|
+
}
|
|
621
|
+
|
|
541
622
|
fn active_team_entry(state: &serde_json::Value) -> Option<&serde_json::Value> {
|
|
542
623
|
let team = state.get("active_team_key").and_then(serde_json::Value::as_str)?;
|
|
543
624
|
state
|
|
@@ -1068,6 +1068,96 @@ fn fire_due_scheduled_events_fires_each_scheduled_kind() {
|
|
|
1068
1068
|
assert_eq!(fired.len(), 3, "exactly the three seeded due events fire, no extras");
|
|
1069
1069
|
}
|
|
1070
1070
|
|
|
1071
|
+
struct UnverifiedInjectTransport;
|
|
1072
|
+
impl Transport for UnverifiedInjectTransport {
|
|
1073
|
+
fn kind(&self) -> BackendKind {
|
|
1074
|
+
BackendKind::Tmux
|
|
1075
|
+
}
|
|
1076
|
+
fn spawn_first(&self, _s: &SessionName, _w: &WindowName, _a: &[String], _c: &Path, _e: &BTreeMap<String, String>) -> Result<SpawnResult, TransportError> {
|
|
1077
|
+
unimplemented!("not reached in delivery")
|
|
1078
|
+
}
|
|
1079
|
+
fn spawn_into(&self, _s: &SessionName, _w: &WindowName, _a: &[String], _c: &Path, _e: &BTreeMap<String, String>) -> Result<SpawnResult, TransportError> {
|
|
1080
|
+
unimplemented!("not reached in delivery")
|
|
1081
|
+
}
|
|
1082
|
+
fn inject(&self, _t: &Target, _p: &InjectPayload, _s: Key, _b: bool) -> Result<InjectReport, TransportError> {
|
|
1083
|
+
Ok(InjectReport {
|
|
1084
|
+
stage_reached: crate::transport::InjectStage::Submit,
|
|
1085
|
+
inject_verification: crate::transport::InjectVerification::CaptureContainsToken,
|
|
1086
|
+
submit_verification: crate::transport::SubmitVerification::PastedContentPromptStillPresentAfterSubmit,
|
|
1087
|
+
turn_verification: crate::transport::TurnVerification::NotYetObserved,
|
|
1088
|
+
attempts: u32::from(SEND_RETRY_MAX_ATTEMPTS),
|
|
1089
|
+
})
|
|
1090
|
+
}
|
|
1091
|
+
fn send_keys(&self, _t: &Target, _k: &[Key]) -> Result<(), TransportError> {
|
|
1092
|
+
Ok(())
|
|
1093
|
+
}
|
|
1094
|
+
fn capture(&self, _t: &Target, range: CaptureRange) -> Result<CapturedText, TransportError> {
|
|
1095
|
+
Ok(CapturedText { text: String::new(), range })
|
|
1096
|
+
}
|
|
1097
|
+
fn query(&self, _t: &Target, _f: PaneField) -> Result<Option<String>, TransportError> {
|
|
1098
|
+
Ok(None)
|
|
1099
|
+
}
|
|
1100
|
+
fn liveness(&self, _p: &PaneId) -> Result<PaneLiveness, TransportError> {
|
|
1101
|
+
Ok(PaneLiveness::Unknown)
|
|
1102
|
+
}
|
|
1103
|
+
fn list_targets(&self) -> Result<Vec<PaneInfo>, TransportError> {
|
|
1104
|
+
Ok(Vec::new())
|
|
1105
|
+
}
|
|
1106
|
+
fn has_session(&self, _s: &SessionName) -> Result<bool, TransportError> {
|
|
1107
|
+
Ok(true)
|
|
1108
|
+
}
|
|
1109
|
+
fn list_windows(&self, _s: &SessionName) -> Result<Vec<WindowName>, TransportError> {
|
|
1110
|
+
Ok(Vec::new())
|
|
1111
|
+
}
|
|
1112
|
+
fn set_session_env(&self, _s: &SessionName, _k: &str, _v: &str) -> Result<SetEnvOutcome, TransportError> {
|
|
1113
|
+
Ok(SetEnvOutcome::Applied)
|
|
1114
|
+
}
|
|
1115
|
+
fn kill_session(&self, _s: &SessionName) -> Result<(), TransportError> {
|
|
1116
|
+
Ok(())
|
|
1117
|
+
}
|
|
1118
|
+
fn kill_window(&self, _t: &Target) -> Result<(), TransportError> {
|
|
1119
|
+
Ok(())
|
|
1120
|
+
}
|
|
1121
|
+
fn attach_session(&self, _s: &SessionName) -> Result<AttachOutcome, TransportError> {
|
|
1122
|
+
Ok(AttachOutcome::Attached)
|
|
1123
|
+
}
|
|
1124
|
+
}
|
|
1125
|
+
|
|
1126
|
+
#[test]
|
|
1127
|
+
fn deliver_pending_exhausted_unverified_send_emits_failed_event() {
|
|
1128
|
+
let ws = tmp_ws("sendfailed");
|
|
1129
|
+
let store = store_for(&ws);
|
|
1130
|
+
let log = EventLog::new(&ws);
|
|
1131
|
+
let state = serde_json::json!({
|
|
1132
|
+
"session_name": "team-sendfailed",
|
|
1133
|
+
"leader_receiver": {"pane_id": "%leader"},
|
|
1134
|
+
"agents": {"w1": {"provider": "fake", "pane_id": "%1"}}
|
|
1135
|
+
});
|
|
1136
|
+
crate::state::persist::save_runtime_state(&ws, &state).unwrap();
|
|
1137
|
+
let message_id = store
|
|
1138
|
+
.create_message(None, "leader", "w1", "ping", None, false, None)
|
|
1139
|
+
.unwrap();
|
|
1140
|
+
|
|
1141
|
+
let out = deliver_pending_message(&ws, &store, &UnverifiedInjectTransport, &message_id, &log, &state)
|
|
1142
|
+
.unwrap();
|
|
1143
|
+
|
|
1144
|
+
assert!(!out.ok);
|
|
1145
|
+
assert_eq!(out.message_status.0, "failed");
|
|
1146
|
+
let events = log.tail(0).unwrap();
|
|
1147
|
+
assert!(
|
|
1148
|
+
events
|
|
1149
|
+
.iter()
|
|
1150
|
+
.any(|event| event.get("event").and_then(serde_json::Value::as_str) == Some("send.failed")),
|
|
1151
|
+
"exhausted unverified send must emit send.failed; got {events:?}"
|
|
1152
|
+
);
|
|
1153
|
+
assert!(
|
|
1154
|
+
events
|
|
1155
|
+
.iter()
|
|
1156
|
+
.any(|event| event.get("event").and_then(serde_json::Value::as_str) == Some("send.failed_notification")),
|
|
1157
|
+
"exhausted unverified send must queue a leader-visible notification; got {events:?}"
|
|
1158
|
+
);
|
|
1159
|
+
}
|
|
1160
|
+
|
|
1071
1161
|
// ════════════════════════════════════════════════════════════════════════
|
|
1072
1162
|
// GROUP V — retry_result_deliveries: re-route notify_failed watchers with
|
|
1073
1163
|
// dedupe_reason rebind_retry. result_delivery.py:19-35.
|