@team-agent/installer 0.3.9 → 0.3.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (31) hide show
  1. package/Cargo.lock +1 -1
  2. package/Cargo.toml +1 -1
  3. package/crates/team-agent/src/cli/send.rs +9 -2
  4. package/crates/team-agent/src/coordinator/backoff.rs +83 -2
  5. package/crates/team-agent/src/coordinator/tests/spine.rs +6 -0
  6. package/crates/team-agent/src/coordinator/tick.rs +410 -168
  7. package/crates/team-agent/src/leader/lease.rs +19 -0
  8. package/crates/team-agent/src/leader/rediscover/tests.rs +12 -0
  9. package/crates/team-agent/src/leader/rediscover.rs +2 -0
  10. package/crates/team-agent/src/lifecycle/launch.rs +35 -0
  11. package/crates/team-agent/src/lifecycle/restart/agent.rs +17 -3
  12. package/crates/team-agent/src/lifecycle/restart/common.rs +75 -0
  13. package/crates/team-agent/src/lifecycle/restart/rebuild.rs +201 -3
  14. package/crates/team-agent/src/lifecycle/restart/selection.rs +51 -14
  15. package/crates/team-agent/src/lifecycle/restart.rs +1 -1
  16. package/crates/team-agent/src/lifecycle/tests/core.rs +89 -15
  17. package/crates/team-agent/src/lifecycle/tests/launch_spawn.rs +68 -3
  18. package/crates/team-agent/src/lifecycle/tests/main_preserved.rs +3 -1
  19. package/crates/team-agent/src/mcp_server/helpers.rs +24 -5
  20. package/crates/team-agent/src/mcp_server/normalize.rs +13 -6
  21. package/crates/team-agent/src/mcp_server/tests/send.rs +310 -212
  22. package/crates/team-agent/src/messaging/delivery.rs +83 -2
  23. package/crates/team-agent/src/messaging/helpers.rs +30 -10
  24. package/crates/team-agent/src/messaging/send.rs +71 -14
  25. package/crates/team-agent/src/messaging/tests/basic.rs +25 -7
  26. package/crates/team-agent/src/messaging/tests/runtime.rs +565 -111
  27. package/crates/team-agent/src/messaging/types.rs +19 -4
  28. package/crates/team-agent/src/provider/approvals/parsing.rs +43 -14
  29. package/crates/team-agent/src/provider/approvals/runtime_prompts.rs +12 -9
  30. package/crates/team-agent/src/transport/test_support.rs +12 -1
  31. package/package.json +4 -4
@@ -654,6 +654,8 @@ fn leader_command_provider(command: &str) -> Option<Provider> {
654
654
  Some(Provider::ClaudeCode)
655
655
  } else if lower.contains("codex") {
656
656
  Some(Provider::Codex)
657
+ } else if lower.contains("copilot") {
658
+ Some(Provider::Copilot)
657
659
  } else if lower.contains("fake") {
658
660
  Some(Provider::Fake)
659
661
  } else {
@@ -1082,3 +1084,20 @@ pub fn detect_dual_state_divergence(
1082
1084
  "team_owner_epoch": team_epoch,
1083
1085
  })))
1084
1086
  }
1087
+
1088
+ #[cfg(test)]
1089
+ mod tests {
1090
+ use super::*;
1091
+
1092
+ #[test]
1093
+ fn leader_command_provider_recognizes_copilot() {
1094
+ assert_eq!(
1095
+ leader_command_provider("copilot --allow-all-tools"),
1096
+ Some(Provider::Copilot)
1097
+ );
1098
+ assert_eq!(
1099
+ leader_command_provider("/usr/local/bin/copilot"),
1100
+ Some(Provider::Copilot)
1101
+ );
1102
+ }
1103
+ }
@@ -80,6 +80,18 @@ fn event_named(events: &[Value], name: &str) -> Value {
80
80
  .unwrap_or_else(|| panic!("missing event {name}; got {events:?}"))
81
81
  }
82
82
 
83
+ #[test]
84
+ fn leader_command_provider_recognizes_copilot() {
85
+ assert_eq!(
86
+ leader_command_provider("copilot --allow-all-tools"),
87
+ Some(Provider::Copilot)
88
+ );
89
+ assert_eq!(
90
+ leader_command_provider("/usr/local/bin/copilot"),
91
+ Some(Provider::Copilot)
92
+ );
93
+ }
94
+
83
95
  fn last_event_named(events: &[Value], name: &str) -> Value {
84
96
  events
85
97
  .iter()
@@ -590,6 +590,8 @@ fn leader_command_provider(command: &str) -> Option<Provider> {
590
590
  Some(Provider::ClaudeCode)
591
591
  } else if lower.contains("codex") {
592
592
  Some(Provider::Codex)
593
+ } else if lower.contains("copilot") {
594
+ Some(Provider::Copilot)
593
595
  } else if lower.contains("fake") {
594
596
  Some(Provider::Fake)
595
597
  } else {
@@ -288,6 +288,7 @@ fn spawn_agents(
288
288
  let mut env =
289
289
  inherited_env_with_team_overrides(workspace, agent_id_raw, Some(&mcp_team_id));
290
290
  apply_profile_launch_env(&mut env, &profile_launch);
291
+ apply_mcp_auto_approval_env(&mut env, &safety);
291
292
  // Python providers.py:145 + launch/core.py:253 — fresh launch runs the worker
292
293
  // with cwd=workspace, same as the RS fork/add and restart paths.
293
294
  let env_unset: Vec<String> = profile_launch.env_unset.iter().cloned().collect();
@@ -1390,6 +1391,39 @@ pub(crate) fn inherited_env_with_team_overrides(
1390
1391
  env
1391
1392
  }
1392
1393
 
1394
+ pub(crate) fn apply_mcp_auto_approval_env(
1395
+ env: &mut BTreeMap<String, String>,
1396
+ safety: &DangerousApproval,
1397
+ ) {
1398
+ for key in [
1399
+ "TEAM_AGENT_LEADER_BYPASS",
1400
+ "TEAM_AGENT_LEADER_BYPASS_SOURCE",
1401
+ "TEAM_AGENT_LEADER_BYPASS_PROVIDER",
1402
+ "TEAM_AGENT_LEADER_BYPASS_FLAG",
1403
+ "TEAM_AGENT_MCP_AUTO_APPROVE",
1404
+ "TEAM_AGENT_MCP_AUTO_APPROVE_SOURCE",
1405
+ ] {
1406
+ env.remove(key);
1407
+ }
1408
+ if safety.enabled
1409
+ && matches!(safety.source, DangerousApprovalSource::LeaderProcess)
1410
+ && safety.inherited
1411
+ {
1412
+ env.insert("TEAM_AGENT_LEADER_BYPASS".to_string(), "1".to_string());
1413
+ env.insert("TEAM_AGENT_LEADER_BYPASS_SOURCE".to_string(), "leader_process".to_string());
1414
+ if let Some(provider) = safety.provider.as_deref() {
1415
+ env.insert("TEAM_AGENT_LEADER_BYPASS_PROVIDER".to_string(), provider.to_string());
1416
+ }
1417
+ if let Some(flag) = safety.flag.as_deref() {
1418
+ env.insert("TEAM_AGENT_LEADER_BYPASS_FLAG".to_string(), flag.to_string());
1419
+ }
1420
+ env.insert("TEAM_AGENT_MCP_AUTO_APPROVE".to_string(), "team_orchestrator".to_string());
1421
+ env.insert("TEAM_AGENT_MCP_AUTO_APPROVE_SOURCE".to_string(), "leader_bypass".to_string());
1422
+ } else {
1423
+ env.insert("TEAM_AGENT_LEADER_BYPASS".to_string(), "0".to_string());
1424
+ }
1425
+ }
1426
+
1393
1427
  /// BUG / B2 灵魂件 + C-1-2 + C-6-1 cr verdict — Copilot per-worker AGENTS.md
1394
1428
  /// 写入 + `COPILOT_CUSTOM_INSTRUCTIONS_DIRS` 注入。
1395
1429
  ///
@@ -2971,6 +3005,7 @@ pub fn fork_agent_with_transport(
2971
3005
  let mut env =
2972
3006
  inherited_env_with_team_overrides(&workspace, as_agent_id.as_str(), Some(&fork_team));
2973
3007
  apply_profile_launch_env(&mut env, &profile_launch);
3008
+ apply_mcp_auto_approval_env(&mut env, &safety);
2974
3009
  // golden operations.py:336 -> _tmux_start_command_for_agent_window (runtime.py:1017-1020): branch on
2975
3010
  // _tmux_session_exists — an ABSENT session => new-session (spawn_first), present => new-window
2976
3011
  // (spawn_into). The Rust restart seam (restart.rs spawn_agent_window) uses the same branch.
@@ -108,17 +108,31 @@ pub(crate) fn start_agent_at_paths(
108
108
  let provider = agent_provider(&agent);
109
109
  let session_id = agent_session_id(&agent);
110
110
  let rollout_path = agent_rollout_path(&agent);
111
- let rollout_exists = rollout_path
111
+ let resume_backing_exists = session_id
112
112
  .as_ref()
113
- .map(|p| p.as_path().exists())
113
+ .map(|session| {
114
+ resume_backing_exists_for_agent(
115
+ workspace,
116
+ agent_id,
117
+ &agent,
118
+ provider,
119
+ session,
120
+ rollout_path.as_ref(),
121
+ )
122
+ })
114
123
  .unwrap_or(false);
115
124
  let start_mode = decide_start_mode(
116
125
  provider_wire(provider),
117
126
  session_id.as_ref(),
118
127
  rollout_path.as_ref(),
119
- rollout_exists,
128
+ resume_backing_exists,
120
129
  allow_fresh,
121
130
  );
131
+ if matches!(start_mode, StartMode::Noop) {
132
+ return Err(LifecycleError::RequirementUnmet(format!(
133
+ "resume_not_ready: session backing store missing for agent {agent_id}; rerun with --allow-fresh to start fresh"
134
+ )));
135
+ }
122
136
  let spawn_session_id = if matches!(start_mode, StartMode::Resumed) {
123
137
  session_id.as_ref()
124
138
  } else {
@@ -122,6 +122,7 @@ pub(super) fn spawn_agent_window(
122
122
  team_id.as_deref(),
123
123
  );
124
124
  crate::lifecycle::launch::apply_profile_launch_env(&mut env, &profile_launch);
125
+ crate::lifecycle::launch::apply_mcp_auto_approval_env(&mut env, safety);
125
126
  let spawn_cwd = spawn_cwd_override
126
127
  .or_else(|| {
127
128
  agent
@@ -242,6 +243,80 @@ pub(super) fn agent_rollout_path(agent: &serde_json::Value) -> Option<RolloutPat
242
243
  .map(RolloutPath::new)
243
244
  }
244
245
 
246
+ pub(super) fn resume_backing_exists_for_agent(
247
+ workspace: &Path,
248
+ agent_id: &AgentId,
249
+ agent: &serde_json::Value,
250
+ provider: Provider,
251
+ session_id: &SessionId,
252
+ rollout_path: Option<&RolloutPath>,
253
+ ) -> bool {
254
+ match provider {
255
+ Provider::Codex => rollout_path_exists(rollout_path),
256
+ Provider::Claude | Provider::ClaudeCode => {
257
+ rollout_path_exists(rollout_path)
258
+ || event_log_transcript_exists(workspace, agent_id.as_str(), session_id.as_str())
259
+ }
260
+ Provider::Copilot => copilot_session_store_has_session(session_id.as_str()),
261
+ Provider::GeminiCli | Provider::Fake => {
262
+ let _ = agent;
263
+ true
264
+ }
265
+ }
266
+ }
267
+
268
+ fn rollout_path_exists(rollout_path: Option<&RolloutPath>) -> bool {
269
+ rollout_path
270
+ .as_ref()
271
+ .is_some_and(|path| path.as_path().exists())
272
+ }
273
+
274
+ fn event_log_transcript_exists(workspace: &Path, agent_id: &str, session_id: &str) -> bool {
275
+ let Ok(events) = crate::event_log::EventLog::new(workspace).tail(0) else {
276
+ return false;
277
+ };
278
+ events.iter().rev().any(|event| {
279
+ event.get("event").and_then(serde_json::Value::as_str) == Some("session.captured")
280
+ && ["agent_id", "worker_id"]
281
+ .iter()
282
+ .any(|key| event.get(*key).and_then(serde_json::Value::as_str) == Some(agent_id))
283
+ && event.get("session_id").and_then(serde_json::Value::as_str) == Some(session_id)
284
+ && event_transcript_path(event).is_some_and(|path| path.exists())
285
+ })
286
+ }
287
+
288
+ fn event_transcript_path(event: &serde_json::Value) -> Option<PathBuf> {
289
+ event
290
+ .get("rollout_path")
291
+ .or_else(|| event.get("transcript_path"))
292
+ .and_then(serde_json::Value::as_str)
293
+ .filter(|path| !path.is_empty())
294
+ .map(PathBuf::from)
295
+ }
296
+
297
+ fn copilot_session_store_has_session(session_id: &str) -> bool {
298
+ let Some(home) = std::env::var_os("HOME").map(PathBuf::from) else {
299
+ return false;
300
+ };
301
+ let db_path = home.join(".copilot").join("session-store.db");
302
+ if !db_path.exists() {
303
+ return false;
304
+ }
305
+ let Ok(conn) = rusqlite::Connection::open_with_flags(
306
+ db_path,
307
+ rusqlite::OpenFlags::SQLITE_OPEN_READ_ONLY | rusqlite::OpenFlags::SQLITE_OPEN_NO_MUTEX,
308
+ ) else {
309
+ return false;
310
+ };
311
+ conn
312
+ .query_row(
313
+ "select 1 from sessions where id = ?1 limit 1",
314
+ [session_id],
315
+ |_| Ok(()),
316
+ )
317
+ .is_ok()
318
+ }
319
+
245
320
  pub(crate) fn refresh_missing_provider_sessions(
246
321
  state: &mut serde_json::Value,
247
322
  ) -> Result<bool, LifecycleError> {
@@ -1,5 +1,5 @@
1
1
  use super::common::*;
2
- use super::selection::classify_restart_plan;
2
+ use super::selection::classify_restart_plan_with_resume_validation;
3
3
  use super::*;
4
4
 
5
5
  // ── lifecycle::restart —— 整队 Route B resume-or-fresh 重建 ──────────────────
@@ -29,6 +29,7 @@ pub fn restart_with_session_convergence_deadline(
29
29
  team,
30
30
  &crate::tmux_backend::TmuxBackend::for_workspace(&run_ws),
31
31
  session_converge_deadline_ms,
32
+ None,
32
33
  )
33
34
  }
34
35
 
@@ -40,6 +41,16 @@ pub fn restart_with_transport(
40
41
  allow_fresh: bool,
41
42
  team: Option<&str>,
42
43
  transport: &dyn crate::transport::Transport,
44
+ ) -> Result<RestartReport, LifecycleError> {
45
+ restart_with_transport_with_readiness_deadline(workspace, allow_fresh, team, transport, None)
46
+ }
47
+
48
+ pub fn restart_with_transport_with_readiness_deadline(
49
+ workspace: &Path,
50
+ allow_fresh: bool,
51
+ team: Option<&str>,
52
+ transport: &dyn crate::transport::Transport,
53
+ readiness_deadline_ms: Option<u64>,
43
54
  ) -> Result<RestartReport, LifecycleError> {
44
55
  match restart_with_transport_with_session_convergence_deadline(
45
56
  workspace,
@@ -47,6 +58,7 @@ pub fn restart_with_transport(
47
58
  team,
48
59
  transport,
49
60
  None,
61
+ readiness_deadline_ms,
50
62
  )? {
51
63
  RestartReport::RefusedResumeNotReady {
52
64
  missing,
@@ -76,6 +88,7 @@ pub fn restart_with_transport_with_session_convergence_deadline(
76
88
  team: Option<&str>,
77
89
  transport: &dyn crate::transport::Transport,
78
90
  session_converge_deadline_ms: Option<u64>,
91
+ readiness_deadline_ms: Option<u64>,
79
92
  ) -> Result<RestartReport, LifecycleError> {
80
93
  // RED-2-STILL(P0):入口门必须在 canonical_run_workspace 解析后的路径上判,不用 raw workspace。
81
94
  // 根因:quick-start <dir> 把 .team/runtime/spec 落在 team_workspace(dir)=**parent**/.team;
@@ -166,7 +179,7 @@ pub fn restart_with_transport_with_session_convergence_deadline(
166
179
  convergence.missing.iter().cloned().collect()
167
180
  };
168
181
  let forced_fresh_convergence = (!convergence.converged).then_some(convergence.clone());
169
- let plan = classify_restart_plan(&state, allow_fresh)?;
182
+ let plan = classify_restart_plan_with_resume_validation(Some(&selected.run_workspace), &state, allow_fresh)?;
170
183
  write_restart_resume_decision_events(
171
184
  &selected.run_workspace,
172
185
  &state,
@@ -186,7 +199,7 @@ pub fn restart_with_transport_with_session_convergence_deadline(
186
199
  return Ok(RestartReport::RefusedResumeAtomicity {
187
200
  unresumable: plan.unresumable,
188
201
  allow_fresh,
189
- error: "restart requires resumable workers before live spawn".to_string(),
202
+ error: "restart requires resumable workers before live spawn; rerun with --allow-fresh to start fresh".to_string(),
190
203
  });
191
204
  }
192
205
  let session_name = state_session_name(&state);
@@ -253,6 +266,15 @@ pub fn restart_with_transport_with_session_convergence_deadline(
253
266
  crate::state::projection::save_team_scoped_state(&selected.run_workspace, &state)
254
267
  .map_err(|e| LifecycleError::StatePersist(e.to_string()))?;
255
268
  let coordinator_started = start_coordinator_for_workspace(&selected.run_workspace)?;
269
+ wait_restart_readiness_or_timeout(
270
+ &selected.run_workspace,
271
+ &state,
272
+ &session_name,
273
+ &plan.decisions,
274
+ transport,
275
+ restart_readiness_deadline(readiness_deadline_ms),
276
+ restart_readiness_poll_interval(),
277
+ )?;
256
278
  let attach_commands = crate::tmux_backend::attach_commands_for_windows(
257
279
  &selected.run_workspace,
258
280
  &session_name,
@@ -455,6 +477,182 @@ fn parse_duration_value_seconds_ms(value: &str) -> Option<u64> {
455
477
  }
456
478
  }
457
479
 
480
+ fn restart_readiness_deadline(requested_ms: Option<u64>) -> std::time::Duration {
481
+ requested_ms.map(std::time::Duration::from_millis).unwrap_or_else(|| {
482
+ env_duration_ms(&["TEAM_AGENT_RESTART_READINESS_DEADLINE_MS"], 30_000)
483
+ })
484
+ }
485
+
486
+ fn restart_readiness_poll_interval() -> std::time::Duration {
487
+ env_duration_ms(&["TEAM_AGENT_RESTART_READINESS_POLL_MS"], 200)
488
+ }
489
+
490
+ #[derive(Debug, Clone, Copy)]
491
+ struct RestartReadiness {
492
+ session_created: bool,
493
+ worker_pane_addressable: bool,
494
+ coordinator_alive: bool,
495
+ }
496
+
497
+ impl RestartReadiness {
498
+ fn ready(self) -> bool {
499
+ self.session_created && self.worker_pane_addressable && self.coordinator_alive
500
+ }
501
+ }
502
+
503
+ fn wait_restart_readiness_or_timeout(
504
+ workspace: &Path,
505
+ state: &serde_json::Value,
506
+ session_name: &SessionName,
507
+ decisions: &[RestartedAgent],
508
+ transport: &dyn crate::transport::Transport,
509
+ deadline: std::time::Duration,
510
+ poll_interval: std::time::Duration,
511
+ ) -> Result<(), LifecycleError> {
512
+ let started = std::time::Instant::now();
513
+ loop {
514
+ let readiness = restart_readiness(workspace, state, session_name, decisions, transport);
515
+ if readiness.ready() {
516
+ return Ok(());
517
+ }
518
+ let elapsed = started.elapsed();
519
+ if elapsed >= deadline {
520
+ write_restart_readiness_timeout_event(workspace, readiness, deadline, elapsed)?;
521
+ return Err(LifecycleError::RequirementUnmet(restart_readiness_timeout_message(
522
+ workspace, readiness, deadline,
523
+ )));
524
+ }
525
+ std::thread::sleep(std::cmp::min(poll_interval, deadline.saturating_sub(elapsed)));
526
+ }
527
+ }
528
+
529
+ fn restart_readiness(
530
+ workspace: &Path,
531
+ state: &serde_json::Value,
532
+ session_name: &SessionName,
533
+ decisions: &[RestartedAgent],
534
+ transport: &dyn crate::transport::Transport,
535
+ ) -> RestartReadiness {
536
+ let session_created = session_live_or_default(transport, session_name, false);
537
+ let worker_pane_addressable = restart_worker_panes_addressable(state, decisions, transport);
538
+ let coordinator_workspace = crate::coordinator::WorkspacePath::new(workspace.to_path_buf());
539
+ let coordinator_alive =
540
+ crate::coordinator::coordinator_health(&coordinator_workspace).ok && session_created;
541
+ RestartReadiness { session_created, worker_pane_addressable, coordinator_alive }
542
+ }
543
+
544
+ fn restart_worker_panes_addressable(
545
+ state: &serde_json::Value,
546
+ decisions: &[RestartedAgent],
547
+ transport: &dyn crate::transport::Transport,
548
+ ) -> bool {
549
+ if decisions.is_empty() {
550
+ return true;
551
+ }
552
+ decisions.iter().all(|decision| {
553
+ let Some(pane_id) = state
554
+ .get("agents")
555
+ .and_then(|agents| agents.get(decision.agent_id.as_str()))
556
+ .and_then(|agent| agent.get("pane_id"))
557
+ .and_then(serde_json::Value::as_str)
558
+ .filter(|pane| !pane.is_empty())
559
+ .map(crate::transport::PaneId::new)
560
+ else {
561
+ return false;
562
+ };
563
+ pane_addressable(transport, &pane_id)
564
+ })
565
+ }
566
+
567
+ fn pane_addressable(
568
+ transport: &dyn crate::transport::Transport,
569
+ pane_id: &crate::transport::PaneId,
570
+ ) -> bool {
571
+ match transport.has_pane(pane_id) {
572
+ Ok(Some(present)) => present,
573
+ Ok(None) | Err(_) => {
574
+ transport
575
+ .list_targets()
576
+ .map(|targets| targets.iter().any(|pane| pane.pane_id == *pane_id))
577
+ .unwrap_or(false)
578
+ || transport
579
+ .liveness(pane_id)
580
+ .map(|state| state == crate::transport::PaneLiveness::Live)
581
+ .unwrap_or(false)
582
+ }
583
+ }
584
+ }
585
+
586
+ fn write_restart_readiness_timeout_event(
587
+ workspace: &Path,
588
+ readiness: RestartReadiness,
589
+ deadline: std::time::Duration,
590
+ elapsed: std::time::Duration,
591
+ ) -> Result<(), LifecycleError> {
592
+ crate::event_log::EventLog::new(workspace)
593
+ .write(
594
+ "restart.readiness_timeout",
595
+ serde_json::json!({
596
+ "tmux_session_created": readiness.session_created,
597
+ "worker_pane_addressable": readiness.worker_pane_addressable,
598
+ "coordinator_alive": readiness.coordinator_alive,
599
+ "deadline_ms": deadline.as_millis(),
600
+ "elapsed_ms": elapsed.as_millis(),
601
+ "coordinator_log": crate::coordinator::coordinator_log_path(
602
+ &crate::coordinator::WorkspacePath::new(workspace.to_path_buf())
603
+ ).display().to_string(),
604
+ "state_path": crate::state::persist::runtime_state_path(workspace).display().to_string(),
605
+ "pid_path": crate::coordinator::coordinator_pid_path(
606
+ &crate::coordinator::WorkspacePath::new(workspace.to_path_buf())
607
+ ).display().to_string(),
608
+ }),
609
+ )
610
+ .map(|_| ())
611
+ .map_err(|e| LifecycleError::StatePersist(e.to_string()))
612
+ }
613
+
614
+ fn restart_readiness_timeout_message(
615
+ workspace: &Path,
616
+ readiness: RestartReadiness,
617
+ deadline: std::time::Duration,
618
+ ) -> String {
619
+ let coordinator_workspace = crate::coordinator::WorkspacePath::new(workspace.to_path_buf());
620
+ let deadline_s = deadline.as_secs_f64();
621
+ format!(
622
+ "restart not ready within {deadline_s:.1}s: {missing}\n\
623
+ - tmux session created: {session}\n\
624
+ - worker pane addressable: {pane}\n\
625
+ - coordinator alive: {coordinator}\n\
626
+ Action: check coordinator log {log}, then `team-agent restart <agent> --allow-fresh` or `team-agent diagnose`\n\
627
+ Log: coordinator_log={log} state={state} pid_file={pid}",
628
+ missing = restart_readiness_missing_summary(readiness),
629
+ session = yes_no(readiness.session_created),
630
+ pane = yes_no(readiness.worker_pane_addressable),
631
+ coordinator = yes_no(readiness.coordinator_alive),
632
+ log = crate::coordinator::coordinator_log_path(&coordinator_workspace).display(),
633
+ state = crate::state::persist::runtime_state_path(workspace).display(),
634
+ pid = crate::coordinator::coordinator_pid_path(&coordinator_workspace).display(),
635
+ )
636
+ }
637
+
638
+ fn restart_readiness_missing_summary(readiness: RestartReadiness) -> String {
639
+ let mut missing = Vec::new();
640
+ if !readiness.session_created {
641
+ missing.push("tmux session created");
642
+ }
643
+ if !readiness.worker_pane_addressable {
644
+ missing.push("worker pane addressable");
645
+ }
646
+ if !readiness.coordinator_alive {
647
+ missing.push("coordinator alive");
648
+ }
649
+ missing.join(", ")
650
+ }
651
+
652
+ fn yes_no(value: bool) -> &'static str {
653
+ if value { "yes" } else { "no" }
654
+ }
655
+
458
656
  fn verify_spawned_agent_live(
459
657
  _agent_id: &AgentId,
460
658
  _spawn: &SpawnedAgentWindow,
@@ -4,32 +4,35 @@ use super::common::*;
4
4
  /// bug-085 四象限 `start_mode` 决策(`start.py:179-188` + `_resume_rollout_missing` `start.py:66-69`),
5
5
  /// **从 start_agent 的整条 lock+spawn 路径里分离出的纯函数**(gate gap:porter 需要单元级 RED
6
6
  /// for `FreshAfterMissingRollout`,而 start_agent 全路径不可单测)。语义:
7
- /// - `_resume_rollout_missing` codex 且有 session_id 时可能 true:`!rollout_path || !exists`。
7
+ /// - resume backing 缺失时不可 resume:codex/claude transcript/rollout 文件,
8
+ /// copilot 用 session-store 行存在性(由调用方折叠进 `rollout_exists`)。
8
9
  /// - 初始 `start_mode = if session_id { Resumed } else { Fresh }`(`start.py:179`)。
9
- /// - **仅当** `missing && allow_fresh` 才升级为 `FreshAfterMissingRollout` 并清空 session_id
10
- /// (`start.py:180-190`)。`missing && !allow_fresh` `Resumed`(随后真实 resume 会 fail)。
11
- /// - 非 codex:rollout 永不"缺失",直接看 session_id。
10
+ /// - `missing && allow_fresh` 升级为 `FreshAfterMissingRollout` 并清空 session_id
11
+ /// - `missing && !allow_fresh` 返回 `Noop`,调用方据此诚实拒绝并提示 `--allow-fresh`。
12
12
  pub fn decide_start_mode(
13
13
  provider: &str,
14
14
  session_id: Option<&SessionId>,
15
- rollout_path: Option<&RolloutPath>,
15
+ _rollout_path: Option<&RolloutPath>,
16
16
  rollout_exists: bool,
17
17
  allow_fresh: bool,
18
18
  ) -> StartMode {
19
19
  match session_id {
20
20
  None => StartMode::Fresh,
21
21
  Some(_) => {
22
- let missing_codex_rollout =
23
- provider == "codex" && (rollout_path.is_none() || !rollout_exists);
24
- if missing_codex_rollout && allow_fresh {
25
- StartMode::FreshAfterMissingRollout
26
- } else {
27
- StartMode::Resumed
22
+ let missing_resume_backing = resumable_provider_requires_backing(provider) && !rollout_exists;
23
+ match (missing_resume_backing, allow_fresh) {
24
+ (true, true) => StartMode::FreshAfterMissingRollout,
25
+ (true, false) => StartMode::Noop,
26
+ (false, _) => StartMode::Resumed,
28
27
  }
29
28
  }
30
29
  }
31
30
  }
32
31
 
32
+ pub(crate) fn resumable_provider_requires_backing(provider: &str) -> bool {
33
+ matches!(provider, "codex" | "claude" | "claude_code" | "copilot")
34
+ }
35
+
33
36
  /// `first_send_at` 严格分类(`_classify_first_send_at`,`orchestration.py:399`)。
34
37
  /// **绝不靠 truthiness**:`""`/`0`/`False`/`"null"`/非 ISO → `Corrupt`。
35
38
  pub fn classify_first_send_at(raw: &serde_json::Value) -> FirstSendAtState {
@@ -129,6 +132,14 @@ pub fn python_type_name(value: &serde_json::Value) -> &'static str {
129
132
  pub fn classify_restart_plan(
130
133
  state: &serde_json::Value,
131
134
  allow_fresh: bool,
135
+ ) -> Result<RestartPlan, LifecycleError> {
136
+ classify_restart_plan_with_resume_validation(None, state, allow_fresh)
137
+ }
138
+
139
+ pub(crate) fn classify_restart_plan_with_resume_validation(
140
+ workspace: Option<&Path>,
141
+ state: &serde_json::Value,
142
+ allow_fresh: bool,
132
143
  ) -> Result<RestartPlan, LifecycleError> {
133
144
  let mut decisions = Vec::new();
134
145
  let mut corrupt_entries = Vec::new();
@@ -171,21 +182,47 @@ pub fn classify_restart_plan(
171
182
  .and_then(|v| v.as_str())
172
183
  .filter(|s| !s.is_empty())
173
184
  .map(SessionId::new);
185
+ let agent_id = AgentId::new(worker_id.clone());
174
186
  // E6 层2 (C2, 用户裁定"绝不静默 fresh"): null session 只有显式 --allow-fresh 才 fresh,
175
187
  // 否则 Refuse(→ resume_not_ready + 指引)。删 `!interacted` 短路 —— 自启动 worker
176
188
  // (leader 从未发消息 → first_send_at=null → interacted=false)会被它静默 fresh 丢上下文。
177
- let decision = if session_id.is_some() {
189
+ let provider = agent_provider(agent);
190
+ let provider_wire = provider_wire(provider);
191
+ let resume_backing_exists = match (workspace, session_id.as_ref()) {
192
+ (Some(workspace), Some(session)) => resume_backing_exists_for_agent(
193
+ workspace,
194
+ &agent_id,
195
+ agent,
196
+ provider,
197
+ session,
198
+ agent_rollout_path(agent).as_ref(),
199
+ ),
200
+ (None, Some(_)) if resumable_provider_requires_backing(provider_wire) => {
201
+ agent_rollout_path(agent)
202
+ .as_ref()
203
+ .is_some_and(|path| path.as_path().exists())
204
+ }
205
+ _ => true,
206
+ };
207
+ let decision = if session_id.is_some() && resume_backing_exists {
178
208
  ResumeDecision::Resume
209
+ } else if session_id.is_some() && allow_fresh {
210
+ ResumeDecision::FreshStart
211
+ } else if session_id.is_some() {
212
+ ResumeDecision::Refuse
179
213
  } else if allow_fresh {
180
214
  ResumeDecision::FreshStart
181
215
  } else {
182
216
  ResumeDecision::Refuse
183
217
  };
184
- let agent_id = AgentId::new(worker_id.clone());
185
218
  if matches!(decision, ResumeDecision::Refuse) {
186
219
  unresumable.push(UnresumableWorker {
187
220
  agent_id: agent_id.clone(),
188
- reason: "no_persisted_session_id".to_string(),
221
+ reason: if session_id.is_some() {
222
+ "session_unresumable".to_string()
223
+ } else {
224
+ "no_persisted_session_id".to_string()
225
+ },
189
226
  session_id: session_id.clone(),
190
227
  first_send_at: first_send_at_raw.as_str().map(|s| s.to_string()),
191
228
  });
@@ -37,7 +37,7 @@ pub(crate) use common::refresh_missing_provider_sessions;
37
37
  pub use orchestrator::{halt_plan, plan_status};
38
38
  pub use rebuild::{
39
39
  restart, restart_candidates, restart_with_session_convergence_deadline, restart_with_transport,
40
- select_restart_state,
40
+ restart_with_transport_with_readiness_deadline, select_restart_state,
41
41
  };
42
42
  pub use remove::{remove_agent, remove_agent_with_transport};
43
43
  pub use selection::{classify_first_send_at, classify_restart_plan, decide_start_mode, python_type_name};