@team-agent/installer 0.3.8 → 0.3.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/Cargo.lock +1 -1
- package/Cargo.toml +1 -1
- package/crates/team-agent/src/cli/emit.rs +20 -4
- package/crates/team-agent/src/cli/leader.rs +12 -8
- package/crates/team-agent/src/cli/tests/base.rs +14 -0
- package/crates/team-agent/src/cli/tests/run_delegation.rs +6 -2
- package/crates/team-agent/src/coordinator/tests/spine.rs +6 -0
- package/crates/team-agent/src/coordinator/tick.rs +83 -1
- package/crates/team-agent/src/leader/lease.rs +19 -0
- package/crates/team-agent/src/leader/rediscover/tests.rs +12 -0
- package/crates/team-agent/src/leader/rediscover.rs +2 -0
- package/crates/team-agent/src/leader/start.rs +34 -23
- package/crates/team-agent/src/leader/tests/identity.rs +22 -0
- package/crates/team-agent/src/leader/tests/wake_start_owner.rs +13 -0
- package/crates/team-agent/src/lifecycle/launch.rs +35 -0
- package/crates/team-agent/src/lifecycle/restart/agent.rs +17 -3
- package/crates/team-agent/src/lifecycle/restart/common.rs +75 -0
- package/crates/team-agent/src/lifecycle/restart/rebuild.rs +211 -6
- package/crates/team-agent/src/lifecycle/restart/selection.rs +51 -14
- package/crates/team-agent/src/lifecycle/restart.rs +8 -4
- package/crates/team-agent/src/lifecycle/tests/core.rs +89 -15
- package/crates/team-agent/src/lifecycle/tests/launch_spawn.rs +144 -3
- package/crates/team-agent/src/lifecycle/tests/main_preserved.rs +3 -1
- package/crates/team-agent/src/messaging/delivery.rs +83 -2
- package/crates/team-agent/src/messaging/results.rs +27 -22
- package/crates/team-agent/src/messaging/tests/runtime.rs +108 -0
- package/crates/team-agent/src/provider/approvals/parsing.rs +43 -14
- package/crates/team-agent/src/provider/approvals/runtime_prompts.rs +12 -9
- package/crates/team-agent/src/transport/test_support.rs +12 -1
- package/package.json +4 -4
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
use super::common::*;
|
|
2
|
-
use super::selection::
|
|
2
|
+
use super::selection::classify_restart_plan_with_resume_validation;
|
|
3
3
|
use super::*;
|
|
4
4
|
|
|
5
5
|
// ── lifecycle::restart —— 整队 Route B resume-or-fresh 重建 ──────────────────
|
|
@@ -29,6 +29,7 @@ pub fn restart_with_session_convergence_deadline(
|
|
|
29
29
|
team,
|
|
30
30
|
&crate::tmux_backend::TmuxBackend::for_workspace(&run_ws),
|
|
31
31
|
session_converge_deadline_ms,
|
|
32
|
+
None,
|
|
32
33
|
)
|
|
33
34
|
}
|
|
34
35
|
|
|
@@ -40,6 +41,16 @@ pub fn restart_with_transport(
|
|
|
40
41
|
allow_fresh: bool,
|
|
41
42
|
team: Option<&str>,
|
|
42
43
|
transport: &dyn crate::transport::Transport,
|
|
44
|
+
) -> Result<RestartReport, LifecycleError> {
|
|
45
|
+
restart_with_transport_with_readiness_deadline(workspace, allow_fresh, team, transport, None)
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
pub fn restart_with_transport_with_readiness_deadline(
|
|
49
|
+
workspace: &Path,
|
|
50
|
+
allow_fresh: bool,
|
|
51
|
+
team: Option<&str>,
|
|
52
|
+
transport: &dyn crate::transport::Transport,
|
|
53
|
+
readiness_deadline_ms: Option<u64>,
|
|
43
54
|
) -> Result<RestartReport, LifecycleError> {
|
|
44
55
|
match restart_with_transport_with_session_convergence_deadline(
|
|
45
56
|
workspace,
|
|
@@ -47,6 +58,7 @@ pub fn restart_with_transport(
|
|
|
47
58
|
team,
|
|
48
59
|
transport,
|
|
49
60
|
None,
|
|
61
|
+
readiness_deadline_ms,
|
|
50
62
|
)? {
|
|
51
63
|
RestartReport::RefusedResumeNotReady {
|
|
52
64
|
missing,
|
|
@@ -76,11 +88,19 @@ pub fn restart_with_transport_with_session_convergence_deadline(
|
|
|
76
88
|
team: Option<&str>,
|
|
77
89
|
transport: &dyn crate::transport::Transport,
|
|
78
90
|
session_converge_deadline_ms: Option<u64>,
|
|
91
|
+
readiness_deadline_ms: Option<u64>,
|
|
79
92
|
) -> Result<RestartReport, LifecycleError> {
|
|
80
|
-
|
|
93
|
+
// RED-2-STILL(P0):入口门必须在 canonical_run_workspace 解析后的路径上判,不用 raw workspace。
|
|
94
|
+
// 根因:quick-start <dir> 把 .team/runtime/spec 落在 team_workspace(dir)=**parent**/.team;
|
|
95
|
+
// 入口门查 raw dir 自身的 .team/state(空,它在 parent)→ 误判"无 team context"早退,到不了
|
|
96
|
+
// 067f78f 下移后的第二道门。canonical_run_workspace 已能正确解析到 parent(走 parent.join(".team")
|
|
97
|
+
// 分支),在它之上判 input_has_no_local_team_context 才对齐 quick-start 落点。
|
|
98
|
+
let resolved_ws = crate::model::paths::canonical_run_workspace(workspace)
|
|
99
|
+
.map_err(|e| LifecycleError::StatePersist(e.to_string()))?;
|
|
100
|
+
if crate::lifecycle::restart::input_has_no_local_team_context(&resolved_ws) {
|
|
81
101
|
return Err(LifecycleError::TeamSelect(format!(
|
|
82
|
-
"missing spec for restart: {}",
|
|
83
|
-
|
|
102
|
+
"missing spec for restart: {} (run `team-agent quick-start <teamdir>` first)",
|
|
103
|
+
crate::model::paths::runtime_dir(&resolved_ws).display()
|
|
84
104
|
)));
|
|
85
105
|
}
|
|
86
106
|
// RED-2(P0)修:存在性门下移到 resolve 之后,用 selected.spec_path(读序 B:runtime 优先、
|
|
@@ -159,7 +179,7 @@ pub fn restart_with_transport_with_session_convergence_deadline(
|
|
|
159
179
|
convergence.missing.iter().cloned().collect()
|
|
160
180
|
};
|
|
161
181
|
let forced_fresh_convergence = (!convergence.converged).then_some(convergence.clone());
|
|
162
|
-
let plan =
|
|
182
|
+
let plan = classify_restart_plan_with_resume_validation(Some(&selected.run_workspace), &state, allow_fresh)?;
|
|
163
183
|
write_restart_resume_decision_events(
|
|
164
184
|
&selected.run_workspace,
|
|
165
185
|
&state,
|
|
@@ -179,7 +199,7 @@ pub fn restart_with_transport_with_session_convergence_deadline(
|
|
|
179
199
|
return Ok(RestartReport::RefusedResumeAtomicity {
|
|
180
200
|
unresumable: plan.unresumable,
|
|
181
201
|
allow_fresh,
|
|
182
|
-
error: "restart requires resumable workers before live spawn".to_string(),
|
|
202
|
+
error: "restart requires resumable workers before live spawn; rerun with --allow-fresh to start fresh".to_string(),
|
|
183
203
|
});
|
|
184
204
|
}
|
|
185
205
|
let session_name = state_session_name(&state);
|
|
@@ -246,6 +266,15 @@ pub fn restart_with_transport_with_session_convergence_deadline(
|
|
|
246
266
|
crate::state::projection::save_team_scoped_state(&selected.run_workspace, &state)
|
|
247
267
|
.map_err(|e| LifecycleError::StatePersist(e.to_string()))?;
|
|
248
268
|
let coordinator_started = start_coordinator_for_workspace(&selected.run_workspace)?;
|
|
269
|
+
wait_restart_readiness_or_timeout(
|
|
270
|
+
&selected.run_workspace,
|
|
271
|
+
&state,
|
|
272
|
+
&session_name,
|
|
273
|
+
&plan.decisions,
|
|
274
|
+
transport,
|
|
275
|
+
restart_readiness_deadline(readiness_deadline_ms),
|
|
276
|
+
restart_readiness_poll_interval(),
|
|
277
|
+
)?;
|
|
249
278
|
let attach_commands = crate::tmux_backend::attach_commands_for_windows(
|
|
250
279
|
&selected.run_workspace,
|
|
251
280
|
&session_name,
|
|
@@ -448,6 +477,182 @@ fn parse_duration_value_seconds_ms(value: &str) -> Option<u64> {
|
|
|
448
477
|
}
|
|
449
478
|
}
|
|
450
479
|
|
|
480
|
+
fn restart_readiness_deadline(requested_ms: Option<u64>) -> std::time::Duration {
|
|
481
|
+
requested_ms.map(std::time::Duration::from_millis).unwrap_or_else(|| {
|
|
482
|
+
env_duration_ms(&["TEAM_AGENT_RESTART_READINESS_DEADLINE_MS"], 30_000)
|
|
483
|
+
})
|
|
484
|
+
}
|
|
485
|
+
|
|
486
|
+
fn restart_readiness_poll_interval() -> std::time::Duration {
|
|
487
|
+
env_duration_ms(&["TEAM_AGENT_RESTART_READINESS_POLL_MS"], 200)
|
|
488
|
+
}
|
|
489
|
+
|
|
490
|
+
#[derive(Debug, Clone, Copy)]
|
|
491
|
+
struct RestartReadiness {
|
|
492
|
+
session_created: bool,
|
|
493
|
+
worker_pane_addressable: bool,
|
|
494
|
+
coordinator_alive: bool,
|
|
495
|
+
}
|
|
496
|
+
|
|
497
|
+
impl RestartReadiness {
|
|
498
|
+
fn ready(self) -> bool {
|
|
499
|
+
self.session_created && self.worker_pane_addressable && self.coordinator_alive
|
|
500
|
+
}
|
|
501
|
+
}
|
|
502
|
+
|
|
503
|
+
fn wait_restart_readiness_or_timeout(
|
|
504
|
+
workspace: &Path,
|
|
505
|
+
state: &serde_json::Value,
|
|
506
|
+
session_name: &SessionName,
|
|
507
|
+
decisions: &[RestartedAgent],
|
|
508
|
+
transport: &dyn crate::transport::Transport,
|
|
509
|
+
deadline: std::time::Duration,
|
|
510
|
+
poll_interval: std::time::Duration,
|
|
511
|
+
) -> Result<(), LifecycleError> {
|
|
512
|
+
let started = std::time::Instant::now();
|
|
513
|
+
loop {
|
|
514
|
+
let readiness = restart_readiness(workspace, state, session_name, decisions, transport);
|
|
515
|
+
if readiness.ready() {
|
|
516
|
+
return Ok(());
|
|
517
|
+
}
|
|
518
|
+
let elapsed = started.elapsed();
|
|
519
|
+
if elapsed >= deadline {
|
|
520
|
+
write_restart_readiness_timeout_event(workspace, readiness, deadline, elapsed)?;
|
|
521
|
+
return Err(LifecycleError::RequirementUnmet(restart_readiness_timeout_message(
|
|
522
|
+
workspace, readiness, deadline,
|
|
523
|
+
)));
|
|
524
|
+
}
|
|
525
|
+
std::thread::sleep(std::cmp::min(poll_interval, deadline.saturating_sub(elapsed)));
|
|
526
|
+
}
|
|
527
|
+
}
|
|
528
|
+
|
|
529
|
+
fn restart_readiness(
|
|
530
|
+
workspace: &Path,
|
|
531
|
+
state: &serde_json::Value,
|
|
532
|
+
session_name: &SessionName,
|
|
533
|
+
decisions: &[RestartedAgent],
|
|
534
|
+
transport: &dyn crate::transport::Transport,
|
|
535
|
+
) -> RestartReadiness {
|
|
536
|
+
let session_created = session_live_or_default(transport, session_name, false);
|
|
537
|
+
let worker_pane_addressable = restart_worker_panes_addressable(state, decisions, transport);
|
|
538
|
+
let coordinator_workspace = crate::coordinator::WorkspacePath::new(workspace.to_path_buf());
|
|
539
|
+
let coordinator_alive =
|
|
540
|
+
crate::coordinator::coordinator_health(&coordinator_workspace).ok && session_created;
|
|
541
|
+
RestartReadiness { session_created, worker_pane_addressable, coordinator_alive }
|
|
542
|
+
}
|
|
543
|
+
|
|
544
|
+
fn restart_worker_panes_addressable(
|
|
545
|
+
state: &serde_json::Value,
|
|
546
|
+
decisions: &[RestartedAgent],
|
|
547
|
+
transport: &dyn crate::transport::Transport,
|
|
548
|
+
) -> bool {
|
|
549
|
+
if decisions.is_empty() {
|
|
550
|
+
return true;
|
|
551
|
+
}
|
|
552
|
+
decisions.iter().all(|decision| {
|
|
553
|
+
let Some(pane_id) = state
|
|
554
|
+
.get("agents")
|
|
555
|
+
.and_then(|agents| agents.get(decision.agent_id.as_str()))
|
|
556
|
+
.and_then(|agent| agent.get("pane_id"))
|
|
557
|
+
.and_then(serde_json::Value::as_str)
|
|
558
|
+
.filter(|pane| !pane.is_empty())
|
|
559
|
+
.map(crate::transport::PaneId::new)
|
|
560
|
+
else {
|
|
561
|
+
return false;
|
|
562
|
+
};
|
|
563
|
+
pane_addressable(transport, &pane_id)
|
|
564
|
+
})
|
|
565
|
+
}
|
|
566
|
+
|
|
567
|
+
fn pane_addressable(
|
|
568
|
+
transport: &dyn crate::transport::Transport,
|
|
569
|
+
pane_id: &crate::transport::PaneId,
|
|
570
|
+
) -> bool {
|
|
571
|
+
match transport.has_pane(pane_id) {
|
|
572
|
+
Ok(Some(present)) => present,
|
|
573
|
+
Ok(None) | Err(_) => {
|
|
574
|
+
transport
|
|
575
|
+
.list_targets()
|
|
576
|
+
.map(|targets| targets.iter().any(|pane| pane.pane_id == *pane_id))
|
|
577
|
+
.unwrap_or(false)
|
|
578
|
+
|| transport
|
|
579
|
+
.liveness(pane_id)
|
|
580
|
+
.map(|state| state == crate::transport::PaneLiveness::Live)
|
|
581
|
+
.unwrap_or(false)
|
|
582
|
+
}
|
|
583
|
+
}
|
|
584
|
+
}
|
|
585
|
+
|
|
586
|
+
fn write_restart_readiness_timeout_event(
|
|
587
|
+
workspace: &Path,
|
|
588
|
+
readiness: RestartReadiness,
|
|
589
|
+
deadline: std::time::Duration,
|
|
590
|
+
elapsed: std::time::Duration,
|
|
591
|
+
) -> Result<(), LifecycleError> {
|
|
592
|
+
crate::event_log::EventLog::new(workspace)
|
|
593
|
+
.write(
|
|
594
|
+
"restart.readiness_timeout",
|
|
595
|
+
serde_json::json!({
|
|
596
|
+
"tmux_session_created": readiness.session_created,
|
|
597
|
+
"worker_pane_addressable": readiness.worker_pane_addressable,
|
|
598
|
+
"coordinator_alive": readiness.coordinator_alive,
|
|
599
|
+
"deadline_ms": deadline.as_millis(),
|
|
600
|
+
"elapsed_ms": elapsed.as_millis(),
|
|
601
|
+
"coordinator_log": crate::coordinator::coordinator_log_path(
|
|
602
|
+
&crate::coordinator::WorkspacePath::new(workspace.to_path_buf())
|
|
603
|
+
).display().to_string(),
|
|
604
|
+
"state_path": crate::state::persist::runtime_state_path(workspace).display().to_string(),
|
|
605
|
+
"pid_path": crate::coordinator::coordinator_pid_path(
|
|
606
|
+
&crate::coordinator::WorkspacePath::new(workspace.to_path_buf())
|
|
607
|
+
).display().to_string(),
|
|
608
|
+
}),
|
|
609
|
+
)
|
|
610
|
+
.map(|_| ())
|
|
611
|
+
.map_err(|e| LifecycleError::StatePersist(e.to_string()))
|
|
612
|
+
}
|
|
613
|
+
|
|
614
|
+
fn restart_readiness_timeout_message(
|
|
615
|
+
workspace: &Path,
|
|
616
|
+
readiness: RestartReadiness,
|
|
617
|
+
deadline: std::time::Duration,
|
|
618
|
+
) -> String {
|
|
619
|
+
let coordinator_workspace = crate::coordinator::WorkspacePath::new(workspace.to_path_buf());
|
|
620
|
+
let deadline_s = deadline.as_secs_f64();
|
|
621
|
+
format!(
|
|
622
|
+
"restart not ready within {deadline_s:.1}s: {missing}\n\
|
|
623
|
+
- tmux session created: {session}\n\
|
|
624
|
+
- worker pane addressable: {pane}\n\
|
|
625
|
+
- coordinator alive: {coordinator}\n\
|
|
626
|
+
Action: check coordinator log {log}, then `team-agent restart <agent> --allow-fresh` or `team-agent diagnose`\n\
|
|
627
|
+
Log: coordinator_log={log} state={state} pid_file={pid}",
|
|
628
|
+
missing = restart_readiness_missing_summary(readiness),
|
|
629
|
+
session = yes_no(readiness.session_created),
|
|
630
|
+
pane = yes_no(readiness.worker_pane_addressable),
|
|
631
|
+
coordinator = yes_no(readiness.coordinator_alive),
|
|
632
|
+
log = crate::coordinator::coordinator_log_path(&coordinator_workspace).display(),
|
|
633
|
+
state = crate::state::persist::runtime_state_path(workspace).display(),
|
|
634
|
+
pid = crate::coordinator::coordinator_pid_path(&coordinator_workspace).display(),
|
|
635
|
+
)
|
|
636
|
+
}
|
|
637
|
+
|
|
638
|
+
fn restart_readiness_missing_summary(readiness: RestartReadiness) -> String {
|
|
639
|
+
let mut missing = Vec::new();
|
|
640
|
+
if !readiness.session_created {
|
|
641
|
+
missing.push("tmux session created");
|
|
642
|
+
}
|
|
643
|
+
if !readiness.worker_pane_addressable {
|
|
644
|
+
missing.push("worker pane addressable");
|
|
645
|
+
}
|
|
646
|
+
if !readiness.coordinator_alive {
|
|
647
|
+
missing.push("coordinator alive");
|
|
648
|
+
}
|
|
649
|
+
missing.join(", ")
|
|
650
|
+
}
|
|
651
|
+
|
|
652
|
+
fn yes_no(value: bool) -> &'static str {
|
|
653
|
+
if value { "yes" } else { "no" }
|
|
654
|
+
}
|
|
655
|
+
|
|
451
656
|
fn verify_spawned_agent_live(
|
|
452
657
|
_agent_id: &AgentId,
|
|
453
658
|
_spawn: &SpawnedAgentWindow,
|
|
@@ -4,32 +4,35 @@ use super::common::*;
|
|
|
4
4
|
/// bug-085 四象限 `start_mode` 决策(`start.py:179-188` + `_resume_rollout_missing` `start.py:66-69`),
|
|
5
5
|
/// **从 start_agent 的整条 lock+spawn 路径里分离出的纯函数**(gate gap:porter 需要单元级 RED
|
|
6
6
|
/// for `FreshAfterMissingRollout`,而 start_agent 全路径不可单测)。语义:
|
|
7
|
-
/// -
|
|
7
|
+
/// - resume backing 缺失时不可 resume:codex/claude 用 transcript/rollout 文件,
|
|
8
|
+
/// copilot 用 session-store 行存在性(由调用方折叠进 `rollout_exists`)。
|
|
8
9
|
/// - 初始 `start_mode = if session_id { Resumed } else { Fresh }`(`start.py:179`)。
|
|
9
|
-
/// -
|
|
10
|
-
///
|
|
11
|
-
/// - 非 codex:rollout 永不"缺失",直接看 session_id。
|
|
10
|
+
/// - `missing && allow_fresh` 升级为 `FreshAfterMissingRollout` 并清空 session_id。
|
|
11
|
+
/// - `missing && !allow_fresh` 返回 `Noop`,调用方据此诚实拒绝并提示 `--allow-fresh`。
|
|
12
12
|
pub fn decide_start_mode(
|
|
13
13
|
provider: &str,
|
|
14
14
|
session_id: Option<&SessionId>,
|
|
15
|
-
|
|
15
|
+
_rollout_path: Option<&RolloutPath>,
|
|
16
16
|
rollout_exists: bool,
|
|
17
17
|
allow_fresh: bool,
|
|
18
18
|
) -> StartMode {
|
|
19
19
|
match session_id {
|
|
20
20
|
None => StartMode::Fresh,
|
|
21
21
|
Some(_) => {
|
|
22
|
-
let
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
StartMode::
|
|
26
|
-
|
|
27
|
-
StartMode::Resumed
|
|
22
|
+
let missing_resume_backing = resumable_provider_requires_backing(provider) && !rollout_exists;
|
|
23
|
+
match (missing_resume_backing, allow_fresh) {
|
|
24
|
+
(true, true) => StartMode::FreshAfterMissingRollout,
|
|
25
|
+
(true, false) => StartMode::Noop,
|
|
26
|
+
(false, _) => StartMode::Resumed,
|
|
28
27
|
}
|
|
29
28
|
}
|
|
30
29
|
}
|
|
31
30
|
}
|
|
32
31
|
|
|
32
|
+
pub(crate) fn resumable_provider_requires_backing(provider: &str) -> bool {
|
|
33
|
+
matches!(provider, "codex" | "claude" | "claude_code" | "copilot")
|
|
34
|
+
}
|
|
35
|
+
|
|
33
36
|
/// `first_send_at` 严格分类(`_classify_first_send_at`,`orchestration.py:399`)。
|
|
34
37
|
/// **绝不靠 truthiness**:`""`/`0`/`False`/`"null"`/非 ISO → `Corrupt`。
|
|
35
38
|
pub fn classify_first_send_at(raw: &serde_json::Value) -> FirstSendAtState {
|
|
@@ -129,6 +132,14 @@ pub fn python_type_name(value: &serde_json::Value) -> &'static str {
|
|
|
129
132
|
pub fn classify_restart_plan(
|
|
130
133
|
state: &serde_json::Value,
|
|
131
134
|
allow_fresh: bool,
|
|
135
|
+
) -> Result<RestartPlan, LifecycleError> {
|
|
136
|
+
classify_restart_plan_with_resume_validation(None, state, allow_fresh)
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
pub(crate) fn classify_restart_plan_with_resume_validation(
|
|
140
|
+
workspace: Option<&Path>,
|
|
141
|
+
state: &serde_json::Value,
|
|
142
|
+
allow_fresh: bool,
|
|
132
143
|
) -> Result<RestartPlan, LifecycleError> {
|
|
133
144
|
let mut decisions = Vec::new();
|
|
134
145
|
let mut corrupt_entries = Vec::new();
|
|
@@ -171,21 +182,47 @@ pub fn classify_restart_plan(
|
|
|
171
182
|
.and_then(|v| v.as_str())
|
|
172
183
|
.filter(|s| !s.is_empty())
|
|
173
184
|
.map(SessionId::new);
|
|
185
|
+
let agent_id = AgentId::new(worker_id.clone());
|
|
174
186
|
// E6 层2 (C2, 用户裁定"绝不静默 fresh"): null session 只有显式 --allow-fresh 才 fresh,
|
|
175
187
|
// 否则 Refuse(→ resume_not_ready + 指引)。删 `!interacted` 短路 —— 自启动 worker
|
|
176
188
|
// (leader 从未发消息 → first_send_at=null → interacted=false)会被它静默 fresh 丢上下文。
|
|
177
|
-
let
|
|
189
|
+
let provider = agent_provider(agent);
|
|
190
|
+
let provider_wire = provider_wire(provider);
|
|
191
|
+
let resume_backing_exists = match (workspace, session_id.as_ref()) {
|
|
192
|
+
(Some(workspace), Some(session)) => resume_backing_exists_for_agent(
|
|
193
|
+
workspace,
|
|
194
|
+
&agent_id,
|
|
195
|
+
agent,
|
|
196
|
+
provider,
|
|
197
|
+
session,
|
|
198
|
+
agent_rollout_path(agent).as_ref(),
|
|
199
|
+
),
|
|
200
|
+
(None, Some(_)) if resumable_provider_requires_backing(provider_wire) => {
|
|
201
|
+
agent_rollout_path(agent)
|
|
202
|
+
.as_ref()
|
|
203
|
+
.is_some_and(|path| path.as_path().exists())
|
|
204
|
+
}
|
|
205
|
+
_ => true,
|
|
206
|
+
};
|
|
207
|
+
let decision = if session_id.is_some() && resume_backing_exists {
|
|
178
208
|
ResumeDecision::Resume
|
|
209
|
+
} else if session_id.is_some() && allow_fresh {
|
|
210
|
+
ResumeDecision::FreshStart
|
|
211
|
+
} else if session_id.is_some() {
|
|
212
|
+
ResumeDecision::Refuse
|
|
179
213
|
} else if allow_fresh {
|
|
180
214
|
ResumeDecision::FreshStart
|
|
181
215
|
} else {
|
|
182
216
|
ResumeDecision::Refuse
|
|
183
217
|
};
|
|
184
|
-
let agent_id = AgentId::new(worker_id.clone());
|
|
185
218
|
if matches!(decision, ResumeDecision::Refuse) {
|
|
186
219
|
unresumable.push(UnresumableWorker {
|
|
187
220
|
agent_id: agent_id.clone(),
|
|
188
|
-
reason:
|
|
221
|
+
reason: if session_id.is_some() {
|
|
222
|
+
"session_unresumable".to_string()
|
|
223
|
+
} else {
|
|
224
|
+
"no_persisted_session_id".to_string()
|
|
225
|
+
},
|
|
189
226
|
session_id: session_id.clone(),
|
|
190
227
|
first_send_at: first_send_at_raw.as_str().map(|s| s.to_string()),
|
|
191
228
|
});
|
|
@@ -37,7 +37,7 @@ pub(crate) use common::refresh_missing_provider_sessions;
|
|
|
37
37
|
pub use orchestrator::{halt_plan, plan_status};
|
|
38
38
|
pub use rebuild::{
|
|
39
39
|
restart, restart_candidates, restart_with_session_convergence_deadline, restart_with_transport,
|
|
40
|
-
select_restart_state,
|
|
40
|
+
restart_with_transport_with_readiness_deadline, select_restart_state,
|
|
41
41
|
};
|
|
42
42
|
pub use remove::{remove_agent, remove_agent_with_transport};
|
|
43
43
|
pub use selection::{classify_first_send_at, classify_restart_plan, decide_start_mode, python_type_name};
|
|
@@ -49,11 +49,15 @@ pub(crate) fn lifecycle_run_workspace(workspace: &Path) -> Result<std::path::Pat
|
|
|
49
49
|
}
|
|
50
50
|
|
|
51
51
|
fn lifecycle_paths(workspace: &Path, team: Option<&str>) -> Result<LifecyclePaths, LifecycleError> {
|
|
52
|
-
|
|
52
|
+
// RED-2-STILL(P0):入口门在 canonical_run_workspace 解析后的路径上判(quick-start 的 .team 落
|
|
53
|
+
// team_dir 父目录,raw team_dir 必 miss)。期望路径报解析后 runtime 落点,不指 raw team_dir。
|
|
54
|
+
let resolved_ws = crate::model::paths::canonical_run_workspace(workspace)
|
|
55
|
+
.map_err(|e| LifecycleError::StatePersist(e.to_string()))?;
|
|
56
|
+
if input_has_no_local_team_context(&resolved_ws) {
|
|
53
57
|
return Err(LifecycleError::TeamSelect(format!(
|
|
54
|
-
"active team spec not found: input_workspace={}
|
|
58
|
+
"active team spec not found: input_workspace={} expected_runtime_dir={}",
|
|
55
59
|
workspace.display(),
|
|
56
|
-
|
|
60
|
+
crate::model::paths::runtime_dir(&resolved_ws).display()
|
|
57
61
|
)));
|
|
58
62
|
}
|
|
59
63
|
let selected = crate::state::selector::resolve_active_team(
|
|
@@ -342,13 +342,12 @@ fn start_mode_serde_names_match_python_start_mode_strings() {
|
|
|
342
342
|
}
|
|
343
343
|
|
|
344
344
|
// ───────────────────────────────────────────────────────────────────────
|
|
345
|
-
// decide_start_mode — bug-085 四象限
|
|
346
|
-
// golden 实跑(PYTHONPATH=… python3 /tmp/x.py,_resume_rollout_missing + start_mode 逻辑):
|
|
345
|
+
// decide_start_mode — bug-085 四象限 + E20 #264 gap closure.
|
|
347
346
|
// codex sess rollout-present any-fresh -> resumed
|
|
348
|
-
// codex sess
|
|
349
|
-
// codex sess
|
|
347
|
+
// codex sess backing-MISSING !allow_fresh -> noop/refuse (绝不静默 resume 死 session)
|
|
348
|
+
// codex sess backing-MISSING allow_fresh -> fresh_after_missing_rollout
|
|
350
349
|
// codex no-sess any -> fresh
|
|
351
|
-
// claude
|
|
350
|
+
// claude/copilot sess backing-missing -> fresh_after_missing_rollout 或 noop/refuse
|
|
352
351
|
// claude no-sess -> fresh
|
|
353
352
|
// 这是 bug-085 把 start_mode 分类从 start_agent 的 lock+spawn 全路径剥离出来的命门。
|
|
354
353
|
// ───────────────────────────────────────────────────────────────────────
|
|
@@ -375,11 +374,11 @@ fn decide_start_mode_codex_missing_rollout_with_allow_fresh_is_fresh_after_missi
|
|
|
375
374
|
}
|
|
376
375
|
|
|
377
376
|
#[test]
|
|
378
|
-
fn
|
|
379
|
-
//
|
|
377
|
+
fn decide_start_mode_codex_missing_rollout_without_allow_fresh_refuses() {
|
|
378
|
+
// E20 C①:backing 缺且 !allow_fresh → 诚实拒绝,绝不 resume 进死 session。
|
|
380
379
|
assert_eq!(
|
|
381
380
|
decide_start_mode("codex", Some(&sid("s1")), None, false, false),
|
|
382
|
-
StartMode::
|
|
381
|
+
StartMode::Noop
|
|
383
382
|
);
|
|
384
383
|
}
|
|
385
384
|
|
|
@@ -408,12 +407,24 @@ fn decide_start_mode_no_session_is_fresh() {
|
|
|
408
407
|
}
|
|
409
408
|
|
|
410
409
|
#[test]
|
|
411
|
-
fn
|
|
412
|
-
|
|
413
|
-
|
|
414
|
-
|
|
415
|
-
|
|
416
|
-
|
|
410
|
+
fn decide_start_mode_checks_backing_for_all_resumable_providers() {
|
|
411
|
+
for provider in ["claude", "claude_code", "copilot"] {
|
|
412
|
+
assert_eq!(
|
|
413
|
+
decide_start_mode(provider, Some(&sid("s1")), None, false, true),
|
|
414
|
+
StartMode::FreshAfterMissingRollout,
|
|
415
|
+
"{provider} missing backing + allow_fresh must not resume"
|
|
416
|
+
);
|
|
417
|
+
assert_eq!(
|
|
418
|
+
decide_start_mode(provider, Some(&sid("s1")), None, false, false),
|
|
419
|
+
StartMode::Noop,
|
|
420
|
+
"{provider} missing backing + !allow_fresh must refuse"
|
|
421
|
+
);
|
|
422
|
+
assert_eq!(
|
|
423
|
+
decide_start_mode(provider, Some(&sid("s1")), Some(&rp("/r")), true, false),
|
|
424
|
+
StartMode::Resumed,
|
|
425
|
+
"{provider} existing backing remains resumable"
|
|
426
|
+
);
|
|
427
|
+
}
|
|
417
428
|
assert_eq!(
|
|
418
429
|
decide_start_mode("claude", None, None, false, true),
|
|
419
430
|
StartMode::Fresh
|
|
@@ -533,8 +544,11 @@ fn classify_restart_plan_never_interacted_null_session_with_allow_fresh_marks_fo
|
|
|
533
544
|
fn classify_restart_plan_codex_with_session_still_resumes() {
|
|
534
545
|
// E6 层2 回归锁(不误伤): codex worker first_send_at=null 但 session_id 已捕 →
|
|
535
546
|
// 仍走 Resume(分流轴是 session_id 有无,不是 interacted)。防层2 修法把 has_session 也误判。
|
|
547
|
+
let ws = temp_ws();
|
|
548
|
+
let rollout = ws.join("codex-rollout.jsonl");
|
|
549
|
+
std::fs::write(&rollout, "{}\n").unwrap();
|
|
536
550
|
let state = json!({
|
|
537
|
-
"agents": { "w1": { "provider": "codex", "session_id": "sess-codex-abc" } }
|
|
551
|
+
"agents": { "w1": { "provider": "codex", "session_id": "sess-codex-abc", "rollout_path": rollout.to_string_lossy() } }
|
|
538
552
|
});
|
|
539
553
|
let plan = classify_restart_plan(&state, false).expect("纯验证不应 Err");
|
|
540
554
|
assert_eq!(plan.decisions.len(), 1);
|
|
@@ -978,6 +992,66 @@ fn leader_pane_env_cross_socket_all_probe_errors_stays_unknown() {
|
|
|
978
992
|
assert_eq!(state, LeaderPaneEnvState::Unknown);
|
|
979
993
|
}
|
|
980
994
|
|
|
995
|
+
#[test]
|
|
996
|
+
fn mcp_auto_approval_env_marks_leader_bypass_namespace_only() {
|
|
997
|
+
let mut env = std::collections::BTreeMap::new();
|
|
998
|
+
let safety = DangerousApproval {
|
|
999
|
+
enabled: true,
|
|
1000
|
+
source: DangerousApprovalSource::LeaderProcess,
|
|
1001
|
+
inherited: true,
|
|
1002
|
+
provider: Some("codex".to_string()),
|
|
1003
|
+
flag: Some("--dangerously-bypass-approvals-and-sandbox".to_string()),
|
|
1004
|
+
worker_capability_above_leader: false,
|
|
1005
|
+
ancestry_binary_name: Some("codex".to_string()),
|
|
1006
|
+
unexpected_binary: false,
|
|
1007
|
+
};
|
|
1008
|
+
|
|
1009
|
+
apply_mcp_auto_approval_env(&mut env, &safety);
|
|
1010
|
+
|
|
1011
|
+
assert_eq!(env.get("TEAM_AGENT_LEADER_BYPASS").map(String::as_str), Some("1"));
|
|
1012
|
+
assert_eq!(
|
|
1013
|
+
env.get("TEAM_AGENT_MCP_AUTO_APPROVE").map(String::as_str),
|
|
1014
|
+
Some("team_orchestrator")
|
|
1015
|
+
);
|
|
1016
|
+
assert_eq!(
|
|
1017
|
+
env.get("TEAM_AGENT_MCP_AUTO_APPROVE_SOURCE").map(String::as_str),
|
|
1018
|
+
Some("leader_bypass")
|
|
1019
|
+
);
|
|
1020
|
+
assert_eq!(
|
|
1021
|
+
env.get("TEAM_AGENT_LEADER_BYPASS_FLAG").map(String::as_str),
|
|
1022
|
+
Some("--dangerously-bypass-approvals-and-sandbox")
|
|
1023
|
+
);
|
|
1024
|
+
}
|
|
1025
|
+
|
|
1026
|
+
#[test]
|
|
1027
|
+
fn mcp_auto_approval_env_clears_when_leader_is_restricted() {
|
|
1028
|
+
let mut env = std::collections::BTreeMap::from([
|
|
1029
|
+
(
|
|
1030
|
+
"TEAM_AGENT_MCP_AUTO_APPROVE".to_string(),
|
|
1031
|
+
"team_orchestrator".to_string(),
|
|
1032
|
+
),
|
|
1033
|
+
("TEAM_AGENT_MCP_AUTO_APPROVE_SOURCE".to_string(), "leader_bypass".to_string()),
|
|
1034
|
+
]);
|
|
1035
|
+
let safety = DangerousApproval {
|
|
1036
|
+
enabled: false,
|
|
1037
|
+
source: DangerousApprovalSource::Disabled,
|
|
1038
|
+
inherited: false,
|
|
1039
|
+
provider: None,
|
|
1040
|
+
flag: None,
|
|
1041
|
+
worker_capability_above_leader: false,
|
|
1042
|
+
ancestry_binary_name: None,
|
|
1043
|
+
unexpected_binary: false,
|
|
1044
|
+
};
|
|
1045
|
+
|
|
1046
|
+
apply_mcp_auto_approval_env(&mut env, &safety);
|
|
1047
|
+
|
|
1048
|
+
assert_eq!(env.get("TEAM_AGENT_LEADER_BYPASS").map(String::as_str), Some("0"));
|
|
1049
|
+
assert!(
|
|
1050
|
+
!env.contains_key("TEAM_AGENT_MCP_AUTO_APPROVE"),
|
|
1051
|
+
"restricted leader must not leave MCP auto-approval env behind: {env:?}"
|
|
1052
|
+
);
|
|
1053
|
+
}
|
|
1054
|
+
|
|
981
1055
|
struct EnvVarGuard {
|
|
982
1056
|
key: &'static str,
|
|
983
1057
|
previous: Option<String>,
|