@team-agent/installer 0.3.9 → 0.3.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -4,32 +4,35 @@ use super::common::*;
4
4
  /// bug-085 四象限 `start_mode` 决策(`start.py:179-188` + `_resume_rollout_missing` `start.py:66-69`),
5
5
  /// **从 start_agent 的整条 lock+spawn 路径里分离出的纯函数**(gate gap:porter 需要单元级 RED
6
6
  /// for `FreshAfterMissingRollout`,而 start_agent 全路径不可单测)。语义:
7
- /// - `_resume_rollout_missing` codex 且有 session_id 时可能 true:`!rollout_path || !exists`。
7
+ /// - resume backing 缺失时不可 resume:codex/claude transcript/rollout 文件,
8
+ /// copilot 用 session-store 行存在性(由调用方折叠进 `rollout_exists`)。
8
9
  /// - 初始 `start_mode = if session_id { Resumed } else { Fresh }`(`start.py:179`)。
9
- /// - **仅当** `missing && allow_fresh` 才升级为 `FreshAfterMissingRollout` 并清空 session_id
10
- /// (`start.py:180-190`)。`missing && !allow_fresh` `Resumed`(随后真实 resume 会 fail)。
11
- /// - 非 codex:rollout 永不"缺失",直接看 session_id。
10
+ /// - `missing && allow_fresh` 升级为 `FreshAfterMissingRollout` 并清空 session_id
11
+ /// - `missing && !allow_fresh` 返回 `Noop`,调用方据此诚实拒绝并提示 `--allow-fresh`。
12
12
  pub fn decide_start_mode(
13
13
  provider: &str,
14
14
  session_id: Option<&SessionId>,
15
- rollout_path: Option<&RolloutPath>,
15
+ _rollout_path: Option<&RolloutPath>,
16
16
  rollout_exists: bool,
17
17
  allow_fresh: bool,
18
18
  ) -> StartMode {
19
19
  match session_id {
20
20
  None => StartMode::Fresh,
21
21
  Some(_) => {
22
- let missing_codex_rollout =
23
- provider == "codex" && (rollout_path.is_none() || !rollout_exists);
24
- if missing_codex_rollout && allow_fresh {
25
- StartMode::FreshAfterMissingRollout
26
- } else {
27
- StartMode::Resumed
22
+ let missing_resume_backing = resumable_provider_requires_backing(provider) && !rollout_exists;
23
+ match (missing_resume_backing, allow_fresh) {
24
+ (true, true) => StartMode::FreshAfterMissingRollout,
25
+ (true, false) => StartMode::Noop,
26
+ (false, _) => StartMode::Resumed,
28
27
  }
29
28
  }
30
29
  }
31
30
  }
32
31
 
32
+ pub(crate) fn resumable_provider_requires_backing(provider: &str) -> bool {
33
+ matches!(provider, "codex" | "claude" | "claude_code" | "copilot")
34
+ }
35
+
33
36
  /// `first_send_at` 严格分类(`_classify_first_send_at`,`orchestration.py:399`)。
34
37
  /// **绝不靠 truthiness**:`""`/`0`/`False`/`"null"`/非 ISO → `Corrupt`。
35
38
  pub fn classify_first_send_at(raw: &serde_json::Value) -> FirstSendAtState {
@@ -129,6 +132,14 @@ pub fn python_type_name(value: &serde_json::Value) -> &'static str {
129
132
  pub fn classify_restart_plan(
130
133
  state: &serde_json::Value,
131
134
  allow_fresh: bool,
135
+ ) -> Result<RestartPlan, LifecycleError> {
136
+ classify_restart_plan_with_resume_validation(None, state, allow_fresh)
137
+ }
138
+
139
+ pub(crate) fn classify_restart_plan_with_resume_validation(
140
+ workspace: Option<&Path>,
141
+ state: &serde_json::Value,
142
+ allow_fresh: bool,
132
143
  ) -> Result<RestartPlan, LifecycleError> {
133
144
  let mut decisions = Vec::new();
134
145
  let mut corrupt_entries = Vec::new();
@@ -171,21 +182,47 @@ pub fn classify_restart_plan(
171
182
  .and_then(|v| v.as_str())
172
183
  .filter(|s| !s.is_empty())
173
184
  .map(SessionId::new);
185
+ let agent_id = AgentId::new(worker_id.clone());
174
186
  // E6 层2 (C2, 用户裁定"绝不静默 fresh"): null session 只有显式 --allow-fresh 才 fresh,
175
187
  // 否则 Refuse(→ resume_not_ready + 指引)。删 `!interacted` 短路 —— 自启动 worker
176
188
  // (leader 从未发消息 → first_send_at=null → interacted=false)会被它静默 fresh 丢上下文。
177
- let decision = if session_id.is_some() {
189
+ let provider = agent_provider(agent);
190
+ let provider_wire = provider_wire(provider);
191
+ let resume_backing_exists = match (workspace, session_id.as_ref()) {
192
+ (Some(workspace), Some(session)) => resume_backing_exists_for_agent(
193
+ workspace,
194
+ &agent_id,
195
+ agent,
196
+ provider,
197
+ session,
198
+ agent_rollout_path(agent).as_ref(),
199
+ ),
200
+ (None, Some(_)) if resumable_provider_requires_backing(provider_wire) => {
201
+ agent_rollout_path(agent)
202
+ .as_ref()
203
+ .is_some_and(|path| path.as_path().exists())
204
+ }
205
+ _ => true,
206
+ };
207
+ let decision = if session_id.is_some() && resume_backing_exists {
178
208
  ResumeDecision::Resume
209
+ } else if session_id.is_some() && allow_fresh {
210
+ ResumeDecision::FreshStart
211
+ } else if session_id.is_some() {
212
+ ResumeDecision::Refuse
179
213
  } else if allow_fresh {
180
214
  ResumeDecision::FreshStart
181
215
  } else {
182
216
  ResumeDecision::Refuse
183
217
  };
184
- let agent_id = AgentId::new(worker_id.clone());
185
218
  if matches!(decision, ResumeDecision::Refuse) {
186
219
  unresumable.push(UnresumableWorker {
187
220
  agent_id: agent_id.clone(),
188
- reason: "no_persisted_session_id".to_string(),
221
+ reason: if session_id.is_some() {
222
+ "session_unresumable".to_string()
223
+ } else {
224
+ "no_persisted_session_id".to_string()
225
+ },
189
226
  session_id: session_id.clone(),
190
227
  first_send_at: first_send_at_raw.as_str().map(|s| s.to_string()),
191
228
  });
@@ -37,7 +37,7 @@ pub(crate) use common::refresh_missing_provider_sessions;
37
37
  pub use orchestrator::{halt_plan, plan_status};
38
38
  pub use rebuild::{
39
39
  restart, restart_candidates, restart_with_session_convergence_deadline, restart_with_transport,
40
- select_restart_state,
40
+ restart_with_transport_with_readiness_deadline, select_restart_state,
41
41
  };
42
42
  pub use remove::{remove_agent, remove_agent_with_transport};
43
43
  pub use selection::{classify_first_send_at, classify_restart_plan, decide_start_mode, python_type_name};
@@ -342,13 +342,12 @@ fn start_mode_serde_names_match_python_start_mode_strings() {
342
342
  }
343
343
 
344
344
  // ───────────────────────────────────────────────────────────────────────
345
- // decide_start_mode — bug-085 四象限 (start.py:66-69 + 179-190)
346
- // golden 实跑(PYTHONPATH=… python3 /tmp/x.py,_resume_rollout_missing + start_mode 逻辑):
345
+ // decide_start_mode — bug-085 四象限 + E20 #264 gap closure.
347
346
  // codex sess rollout-present any-fresh -> resumed
348
- // codex sess rollout-MISSING !allow_fresh -> resumed (随后真实 resume 失败)
349
- // codex sess rollout-MISSING allow_fresh -> fresh_after_missing_rollout ← bug-085 唯一臂
347
+ // codex sess backing-MISSING !allow_fresh -> noop/refuse (绝不静默 resume 死 session)
348
+ // codex sess backing-MISSING allow_fresh -> fresh_after_missing_rollout
350
349
  // codex no-sess any -> fresh
351
- // claude(非codex) sess rollout-missing fresh -> resumed (非 codex 永不"缺 rollout")
350
+ // claude/copilot sess backing-missing -> fresh_after_missing_rollout noop/refuse
352
351
  // claude no-sess -> fresh
353
352
  // 这是 bug-085 把 start_mode 分类从 start_agent 的 lock+spawn 全路径剥离出来的命门。
354
353
  // ───────────────────────────────────────────────────────────────────────
@@ -375,11 +374,11 @@ fn decide_start_mode_codex_missing_rollout_with_allow_fresh_is_fresh_after_missi
375
374
  }
376
375
 
377
376
  #[test]
378
- fn decide_start_mode_codex_missing_rollout_without_allow_fresh_stays_resumed() {
379
- // 关键陷阱:rollout 缺但 !allow_fresh → Resumed(start.py 不擅自丢 context)
377
+ fn decide_start_mode_codex_missing_rollout_without_allow_fresh_refuses() {
378
+ // E20 C①:backing 缺且 !allow_fresh → 诚实拒绝,绝不 resume 进死 session
380
379
  assert_eq!(
381
380
  decide_start_mode("codex", Some(&sid("s1")), None, false, false),
382
- StartMode::Resumed
381
+ StartMode::Noop
383
382
  );
384
383
  }
385
384
 
@@ -408,12 +407,24 @@ fn decide_start_mode_no_session_is_fresh() {
408
407
  }
409
408
 
410
409
  #[test]
411
- fn decide_start_mode_non_codex_never_fresh_after_missing_rollout() {
412
- // codex provider:rollout 概念不适用,_resume_rollout_missing false。
413
- assert_eq!(
414
- decide_start_mode("claude", Some(&sid("s1")), None, false, true),
415
- StartMode::Resumed
416
- );
410
+ fn decide_start_mode_checks_backing_for_all_resumable_providers() {
411
+ for provider in ["claude", "claude_code", "copilot"] {
412
+ assert_eq!(
413
+ decide_start_mode(provider, Some(&sid("s1")), None, false, true),
414
+ StartMode::FreshAfterMissingRollout,
415
+ "{provider} missing backing + allow_fresh must not resume"
416
+ );
417
+ assert_eq!(
418
+ decide_start_mode(provider, Some(&sid("s1")), None, false, false),
419
+ StartMode::Noop,
420
+ "{provider} missing backing + !allow_fresh must refuse"
421
+ );
422
+ assert_eq!(
423
+ decide_start_mode(provider, Some(&sid("s1")), Some(&rp("/r")), true, false),
424
+ StartMode::Resumed,
425
+ "{provider} existing backing remains resumable"
426
+ );
427
+ }
417
428
  assert_eq!(
418
429
  decide_start_mode("claude", None, None, false, true),
419
430
  StartMode::Fresh
@@ -533,8 +544,11 @@ fn classify_restart_plan_never_interacted_null_session_with_allow_fresh_marks_fo
533
544
  fn classify_restart_plan_codex_with_session_still_resumes() {
534
545
  // E6 层2 回归锁(不误伤): codex worker first_send_at=null 但 session_id 已捕 →
535
546
  // 仍走 Resume(分流轴是 session_id 有无,不是 interacted)。防层2 修法把 has_session 也误判。
547
+ let ws = temp_ws();
548
+ let rollout = ws.join("codex-rollout.jsonl");
549
+ std::fs::write(&rollout, "{}\n").unwrap();
536
550
  let state = json!({
537
- "agents": { "w1": { "provider": "codex", "session_id": "sess-codex-abc" } }
551
+ "agents": { "w1": { "provider": "codex", "session_id": "sess-codex-abc", "rollout_path": rollout.to_string_lossy() } }
538
552
  });
539
553
  let plan = classify_restart_plan(&state, false).expect("纯验证不应 Err");
540
554
  assert_eq!(plan.decisions.len(), 1);
@@ -978,6 +992,66 @@ fn leader_pane_env_cross_socket_all_probe_errors_stays_unknown() {
978
992
  assert_eq!(state, LeaderPaneEnvState::Unknown);
979
993
  }
980
994
 
995
+ #[test]
996
+ fn mcp_auto_approval_env_marks_leader_bypass_namespace_only() {
997
+ let mut env = std::collections::BTreeMap::new();
998
+ let safety = DangerousApproval {
999
+ enabled: true,
1000
+ source: DangerousApprovalSource::LeaderProcess,
1001
+ inherited: true,
1002
+ provider: Some("codex".to_string()),
1003
+ flag: Some("--dangerously-bypass-approvals-and-sandbox".to_string()),
1004
+ worker_capability_above_leader: false,
1005
+ ancestry_binary_name: Some("codex".to_string()),
1006
+ unexpected_binary: false,
1007
+ };
1008
+
1009
+ apply_mcp_auto_approval_env(&mut env, &safety);
1010
+
1011
+ assert_eq!(env.get("TEAM_AGENT_LEADER_BYPASS").map(String::as_str), Some("1"));
1012
+ assert_eq!(
1013
+ env.get("TEAM_AGENT_MCP_AUTO_APPROVE").map(String::as_str),
1014
+ Some("team_orchestrator")
1015
+ );
1016
+ assert_eq!(
1017
+ env.get("TEAM_AGENT_MCP_AUTO_APPROVE_SOURCE").map(String::as_str),
1018
+ Some("leader_bypass")
1019
+ );
1020
+ assert_eq!(
1021
+ env.get("TEAM_AGENT_LEADER_BYPASS_FLAG").map(String::as_str),
1022
+ Some("--dangerously-bypass-approvals-and-sandbox")
1023
+ );
1024
+ }
1025
+
1026
+ #[test]
1027
+ fn mcp_auto_approval_env_clears_when_leader_is_restricted() {
1028
+ let mut env = std::collections::BTreeMap::from([
1029
+ (
1030
+ "TEAM_AGENT_MCP_AUTO_APPROVE".to_string(),
1031
+ "team_orchestrator".to_string(),
1032
+ ),
1033
+ ("TEAM_AGENT_MCP_AUTO_APPROVE_SOURCE".to_string(), "leader_bypass".to_string()),
1034
+ ]);
1035
+ let safety = DangerousApproval {
1036
+ enabled: false,
1037
+ source: DangerousApprovalSource::Disabled,
1038
+ inherited: false,
1039
+ provider: None,
1040
+ flag: None,
1041
+ worker_capability_above_leader: false,
1042
+ ancestry_binary_name: None,
1043
+ unexpected_binary: false,
1044
+ };
1045
+
1046
+ apply_mcp_auto_approval_env(&mut env, &safety);
1047
+
1048
+ assert_eq!(env.get("TEAM_AGENT_LEADER_BYPASS").map(String::as_str), Some("0"));
1049
+ assert!(
1050
+ !env.contains_key("TEAM_AGENT_MCP_AUTO_APPROVE"),
1051
+ "restricted leader must not leave MCP auto-approval env behind: {env:?}"
1052
+ );
1053
+ }
1054
+
981
1055
  struct EnvVarGuard {
982
1056
  key: &'static str,
983
1057
  previous: Option<String>,
@@ -945,6 +945,10 @@ const DELEG_ROLE_WORKER2: &str = "---\nname: worker2\nrole: Second Worker\nprovi
945
945
  pub(super) fn restart_ws_two_resumable_workers() -> PathBuf {
946
946
  let ws = temp_ws().join("restartteam");
947
947
  std::fs::create_dir_all(ws.join("agents")).unwrap();
948
+ let alpha_rollout = ws.join("alpha-rollout.jsonl");
949
+ let bravo_rollout = ws.join("bravo-rollout.jsonl");
950
+ std::fs::write(&alpha_rollout, "{}\n").unwrap();
951
+ std::fs::write(&bravo_rollout, "{}\n").unwrap();
948
952
  std::fs::write(ws.join("TEAM.md"), "---\nname: restartteam\nobjective: Restart probe.\nprovider: codex\n---\n\nteam.\n").unwrap();
949
953
  std::fs::write(ws.join("agents").join("alpha.md"), DELEG_ROLE_ALPHA).unwrap();
950
954
  std::fs::write(ws.join("agents").join("bravo.md"), DELEG_ROLE_BRAVO).unwrap();
@@ -955,8 +959,8 @@ pub(super) fn restart_ws_two_resumable_workers() -> PathBuf {
955
959
  &json!({
956
960
  "session_name": "team-restartteam",
957
961
  "agents": {
958
- "alpha": {"status": "running", "provider": "codex", "session_id": "sess-a", "first_send_at": "2026-05-27T10:00:00+00:00"},
959
- "bravo": {"status": "running", "provider": "codex", "session_id": "sess-b", "first_send_at": "2026-05-27T10:00:00+00:00"}
962
+ "alpha": {"status": "running", "provider": "codex", "session_id": "sess-a", "rollout_path": alpha_rollout.to_string_lossy(), "first_send_at": "2026-05-27T10:00:00+00:00"},
963
+ "bravo": {"status": "running", "provider": "codex", "session_id": "sess-b", "rollout_path": bravo_rollout.to_string_lossy(), "first_send_at": "2026-05-27T10:00:00+00:00"}
960
964
  }
961
965
  }),
962
966
  )
@@ -965,6 +969,33 @@ pub(super) fn restart_ws_two_resumable_workers() -> PathBuf {
965
969
  ws
966
970
  }
967
971
 
972
+ fn restart_ws_one_resumable_worker() -> PathBuf {
973
+ let ws = temp_ws().join("restartone");
974
+ std::fs::create_dir_all(ws.join("agents")).unwrap();
975
+ let rollout = ws.join("alpha-rollout.jsonl");
976
+ std::fs::write(&rollout, "{}\n").unwrap();
977
+ std::fs::write(
978
+ ws.join("TEAM.md"),
979
+ "---\nname: restartone\nobjective: Restart readiness probe.\nprovider: codex\n---\n\nteam.\n",
980
+ )
981
+ .unwrap();
982
+ std::fs::write(ws.join("agents").join("alpha.md"), DELEG_ROLE_ALPHA).unwrap();
983
+ let spec = crate::compiler::compile_team(&ws).expect("compile 1-agent team");
984
+ std::fs::write(ws.join("team.spec.yaml"), crate::model::yaml::dumps(&spec)).unwrap();
985
+ crate::state::persist::save_runtime_state(
986
+ &ws,
987
+ &json!({
988
+ "session_name": "team-restartone",
989
+ "agents": {
990
+ "alpha": {"status": "running", "provider": "codex", "session_id": "sess-a", "rollout_path": rollout.to_string_lossy(), "first_send_at": "2026-05-27T10:00:00+00:00"}
991
+ }
992
+ }),
993
+ )
994
+ .unwrap();
995
+ seed_healthy_coordinator(&ws);
996
+ ws
997
+ }
998
+
968
999
  // 2 [P0] — restart_with_transport must drive the REAL Route-B resume spawn: one spawn per resumable
969
1000
  // worker. The first resumed worker recreates the session with spawn_first; later workers may use
970
1001
  // spawn_into only after a live-session check proves that recreated session still exists. Each spawn
@@ -1020,6 +1051,38 @@ fn restart_with_transport_spawns_resumable_workers_not_stub() {
1020
1051
  );
1021
1052
  }
1022
1053
 
1054
+ #[test]
1055
+ fn restart_times_out_when_spawned_worker_pane_is_not_addressable() {
1056
+ let ws = restart_ws_one_resumable_worker();
1057
+ let transport = OfflineTransport::new().with_spawned_panes_addressable(false);
1058
+
1059
+ let result =
1060
+ restart_with_transport_with_readiness_deadline(&ws, false, None, &transport, Some(0));
1061
+
1062
+ let text = format!("{result:?}");
1063
+ assert!(
1064
+ text.contains("restart not ready")
1065
+ && text.contains("worker pane addressable: no")
1066
+ && text.contains("Action:")
1067
+ && text.contains("Log:"),
1068
+ "restart must refuse with N38 readiness timeout details, not return ok; got {text}"
1069
+ );
1070
+ assert!(
1071
+ !matches!(result, Ok(RestartReport::Restarted { .. })),
1072
+ "restart readiness timeout must not return Restarted ok"
1073
+ );
1074
+ let events = crate::event_log::EventLog::new(&ws).tail(20).unwrap();
1075
+ let timeout = events
1076
+ .iter()
1077
+ .find(|event| event.get("event").and_then(|v| v.as_str()) == Some("restart.readiness_timeout"))
1078
+ .expect("restart.readiness_timeout event");
1079
+ assert_eq!(
1080
+ timeout.get("worker_pane_addressable").and_then(|v| v.as_bool()),
1081
+ Some(false),
1082
+ "timeout event must carry the failed readiness condition: {timeout}"
1083
+ );
1084
+ }
1085
+
1023
1086
  // 3 [P0] — start_agent_with_transport on a non-paused agent with a session_id must spawn EXACTLY ONE
1024
1087
  // worker (resume) carrying the provider build_command. Today the stub returns RequirementUnmet with
1025
1088
  // ZERO spawns -> RED at recorded.len().
@@ -1027,11 +1090,13 @@ fn restart_with_transport_spawns_resumable_workers_not_stub() {
1027
1090
  fn start_agent_with_transport_spawns_resume_not_stub() {
1028
1091
  let ws = temp_ws().join("startagentws");
1029
1092
  std::fs::create_dir_all(&ws).unwrap();
1093
+ let rollout = ws.join("alpha-rollout.jsonl");
1094
+ std::fs::write(&rollout, "{}\n").unwrap();
1030
1095
  crate::state::persist::save_runtime_state(
1031
1096
  &ws,
1032
1097
  &json!({
1033
1098
  "session_name": "team-sa",
1034
- "agents": {"alpha": {"status": "running", "provider": "codex", "session_id": "sess-a", "first_send_at": "2026-05-27T10:00:00+00:00"}}
1099
+ "agents": {"alpha": {"status": "running", "provider": "codex", "session_id": "sess-a", "rollout_path": rollout.to_string_lossy(), "first_send_at": "2026-05-27T10:00:00+00:00"}}
1035
1100
  }),
1036
1101
  )
1037
1102
  .unwrap();
@@ -88,11 +88,13 @@ impl crate::transport::Transport for SessionProbeRecordingTransport {
88
88
  fn respawn_ws_one_resumable_worker() -> PathBuf {
89
89
  let ws = temp_ws().join("respawn_dead_session");
90
90
  std::fs::create_dir_all(&ws).unwrap();
91
+ let rollout = ws.join("alpha-rollout.jsonl");
92
+ std::fs::write(&rollout, "{}\n").unwrap();
91
93
  crate::state::persist::save_runtime_state(
92
94
  &ws,
93
95
  &json!({
94
96
  "session_name": "team-sa",
95
- "agents": {"alpha": {"status": "running", "provider": "codex", "session_id": "sess-a", "first_send_at": "2026-05-27T10:00:00+00:00"}}
97
+ "agents": {"alpha": {"status": "running", "provider": "codex", "session_id": "sess-a", "rollout_path": rollout.to_string_lossy(), "first_send_at": "2026-05-27T10:00:00+00:00"}}
96
98
  }),
97
99
  )
98
100
  .unwrap();
@@ -17,7 +17,7 @@ use crate::transport::{
17
17
  use super::helpers::{message_exists, MessageStatusShadow};
18
18
  use super::{
19
19
  DeliveryOutcome, DeliveryRefusal, DeliveryStage, DeliveryStatus, MessagingError,
20
- PaneWidthQuery, TrustRetryPayload,
20
+ PaneWidthQuery, TrustRetryPayload, SEND_RETRY_MAX_ATTEMPTS,
21
21
  };
22
22
  use crate::state::projection::OwnerTeamResolution;
23
23
 
@@ -286,7 +286,6 @@ pub fn deliver_pending_message(
286
286
  "submit_unverified:{}",
287
287
  submit_verification_wire(inject_report.submit_verification)
288
288
  );
289
- store.mark(message_id, "submitted_unverified", Some(&reason))?;
290
289
  event_log.write(
291
290
  "send.unverified",
292
291
  serde_json::json!({
@@ -296,6 +295,29 @@ pub fn deliver_pending_message(
296
295
  "attempts": inject_report.attempts,
297
296
  }),
298
297
  )?;
298
+ if inject_report.attempts >= u32::from(SEND_RETRY_MAX_ATTEMPTS) {
299
+ store.mark(message_id, "failed", Some("send_unverified_exhausted"))?;
300
+ emit_send_failed_exhausted(
301
+ workspace,
302
+ state,
303
+ event_log,
304
+ message_id,
305
+ &message.recipient,
306
+ inject_report.attempts,
307
+ &reason,
308
+ )?;
309
+ return Ok(DeliveryOutcome {
310
+ ok: false,
311
+ status: DeliveryStatus::Failed,
312
+ message_status: MessageStatusShadow("failed".to_string()),
313
+ message_id: Some(message_id.to_string()),
314
+ verification: Some(reason),
315
+ stage: Some(DeliveryStage::Submit),
316
+ reason: None,
317
+ channel: None,
318
+ });
319
+ }
320
+ store.mark(message_id, "submitted_unverified", Some(&reason))?;
299
321
  return Ok(DeliveryOutcome {
300
322
  ok: false,
301
323
  status: DeliveryStatus::Failed,
@@ -538,6 +560,65 @@ fn leader_receiver_field_in_state<'a>(
538
560
  .filter(|value| !value.is_empty())
539
561
  }
540
562
 
563
+ fn emit_send_failed_exhausted(
564
+ workspace: &Path,
565
+ state: &serde_json::Value,
566
+ event_log: &EventLog,
567
+ message_id: &str,
568
+ recipient: &str,
569
+ attempts: u32,
570
+ verification: &str,
571
+ ) -> Result<(), MessagingError> {
572
+ event_log.write(
573
+ "send.failed",
574
+ serde_json::json!({
575
+ "message_id": message_id,
576
+ "recipient": recipient,
577
+ "attempts": attempts,
578
+ "max_attempts": SEND_RETRY_MAX_ATTEMPTS,
579
+ "reason": "send_unverified_exhausted",
580
+ "verification": verification,
581
+ }),
582
+ )?;
583
+ let content = format!(
584
+ "send.failed\nerror: send to {recipient} remained unverified after {attempts}/{SEND_RETRY_MAX_ATTEMPTS} attempts\naction: inspect the target pane and retry the send\nlog: .team/logs/events.jsonl"
585
+ );
586
+ match crate::messaging::send_to_leader_receiver(
587
+ workspace,
588
+ state,
589
+ "leader",
590
+ &content,
591
+ None,
592
+ "coordinator",
593
+ false,
594
+ Some(&format!("send.failed:{message_id}")),
595
+ event_log,
596
+ ) {
597
+ Ok(outcome) => {
598
+ event_log.write(
599
+ "send.failed_notification",
600
+ serde_json::json!({
601
+ "message_id": message_id,
602
+ "recipient": recipient,
603
+ "leader_notification_status": super::helpers::status_wire(outcome.status),
604
+ "leader_message_id": outcome.message_id,
605
+ }),
606
+ )?;
607
+ }
608
+ Err(error) => {
609
+ event_log.write(
610
+ "send.failed_notification_failed",
611
+ serde_json::json!({
612
+ "message_id": message_id,
613
+ "recipient": recipient,
614
+ "error": error.to_string(),
615
+ }),
616
+ )?;
617
+ }
618
+ }
619
+ Ok(())
620
+ }
621
+
541
622
  fn active_team_entry(state: &serde_json::Value) -> Option<&serde_json::Value> {
542
623
  let team = state.get("active_team_key").and_then(serde_json::Value::as_str)?;
543
624
  state
@@ -1068,6 +1068,96 @@ fn fire_due_scheduled_events_fires_each_scheduled_kind() {
1068
1068
  assert_eq!(fired.len(), 3, "exactly the three seeded due events fire, no extras");
1069
1069
  }
1070
1070
 
1071
+ struct UnverifiedInjectTransport;
1072
+ impl Transport for UnverifiedInjectTransport {
1073
+ fn kind(&self) -> BackendKind {
1074
+ BackendKind::Tmux
1075
+ }
1076
+ fn spawn_first(&self, _s: &SessionName, _w: &WindowName, _a: &[String], _c: &Path, _e: &BTreeMap<String, String>) -> Result<SpawnResult, TransportError> {
1077
+ unimplemented!("not reached in delivery")
1078
+ }
1079
+ fn spawn_into(&self, _s: &SessionName, _w: &WindowName, _a: &[String], _c: &Path, _e: &BTreeMap<String, String>) -> Result<SpawnResult, TransportError> {
1080
+ unimplemented!("not reached in delivery")
1081
+ }
1082
+ fn inject(&self, _t: &Target, _p: &InjectPayload, _s: Key, _b: bool) -> Result<InjectReport, TransportError> {
1083
+ Ok(InjectReport {
1084
+ stage_reached: crate::transport::InjectStage::Submit,
1085
+ inject_verification: crate::transport::InjectVerification::CaptureContainsToken,
1086
+ submit_verification: crate::transport::SubmitVerification::PastedContentPromptStillPresentAfterSubmit,
1087
+ turn_verification: crate::transport::TurnVerification::NotYetObserved,
1088
+ attempts: u32::from(SEND_RETRY_MAX_ATTEMPTS),
1089
+ })
1090
+ }
1091
+ fn send_keys(&self, _t: &Target, _k: &[Key]) -> Result<(), TransportError> {
1092
+ Ok(())
1093
+ }
1094
+ fn capture(&self, _t: &Target, range: CaptureRange) -> Result<CapturedText, TransportError> {
1095
+ Ok(CapturedText { text: String::new(), range })
1096
+ }
1097
+ fn query(&self, _t: &Target, _f: PaneField) -> Result<Option<String>, TransportError> {
1098
+ Ok(None)
1099
+ }
1100
+ fn liveness(&self, _p: &PaneId) -> Result<PaneLiveness, TransportError> {
1101
+ Ok(PaneLiveness::Unknown)
1102
+ }
1103
+ fn list_targets(&self) -> Result<Vec<PaneInfo>, TransportError> {
1104
+ Ok(Vec::new())
1105
+ }
1106
+ fn has_session(&self, _s: &SessionName) -> Result<bool, TransportError> {
1107
+ Ok(true)
1108
+ }
1109
+ fn list_windows(&self, _s: &SessionName) -> Result<Vec<WindowName>, TransportError> {
1110
+ Ok(Vec::new())
1111
+ }
1112
+ fn set_session_env(&self, _s: &SessionName, _k: &str, _v: &str) -> Result<SetEnvOutcome, TransportError> {
1113
+ Ok(SetEnvOutcome::Applied)
1114
+ }
1115
+ fn kill_session(&self, _s: &SessionName) -> Result<(), TransportError> {
1116
+ Ok(())
1117
+ }
1118
+ fn kill_window(&self, _t: &Target) -> Result<(), TransportError> {
1119
+ Ok(())
1120
+ }
1121
+ fn attach_session(&self, _s: &SessionName) -> Result<AttachOutcome, TransportError> {
1122
+ Ok(AttachOutcome::Attached)
1123
+ }
1124
+ }
1125
+
1126
+ #[test]
1127
+ fn deliver_pending_exhausted_unverified_send_emits_failed_event() {
1128
+ let ws = tmp_ws("sendfailed");
1129
+ let store = store_for(&ws);
1130
+ let log = EventLog::new(&ws);
1131
+ let state = serde_json::json!({
1132
+ "session_name": "team-sendfailed",
1133
+ "leader_receiver": {"pane_id": "%leader"},
1134
+ "agents": {"w1": {"provider": "fake", "pane_id": "%1"}}
1135
+ });
1136
+ crate::state::persist::save_runtime_state(&ws, &state).unwrap();
1137
+ let message_id = store
1138
+ .create_message(None, "leader", "w1", "ping", None, false, None)
1139
+ .unwrap();
1140
+
1141
+ let out = deliver_pending_message(&ws, &store, &UnverifiedInjectTransport, &message_id, &log, &state)
1142
+ .unwrap();
1143
+
1144
+ assert!(!out.ok);
1145
+ assert_eq!(out.message_status.0, "failed");
1146
+ let events = log.tail(0).unwrap();
1147
+ assert!(
1148
+ events
1149
+ .iter()
1150
+ .any(|event| event.get("event").and_then(serde_json::Value::as_str) == Some("send.failed")),
1151
+ "exhausted unverified send must emit send.failed; got {events:?}"
1152
+ );
1153
+ assert!(
1154
+ events
1155
+ .iter()
1156
+ .any(|event| event.get("event").and_then(serde_json::Value::as_str) == Some("send.failed_notification")),
1157
+ "exhausted unverified send must queue a leader-visible notification; got {events:?}"
1158
+ );
1159
+ }
1160
+
1071
1161
  // ════════════════════════════════════════════════════════════════════════
1072
1162
  // GROUP V — retry_result_deliveries: re-route notify_failed watchers with
1073
1163
  // dedupe_reason rebind_retry. result_delivery.py:19-35.