@team-agent/installer 0.3.9 → 0.3.11
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/Cargo.lock +1 -1
- package/Cargo.toml +1 -1
- package/crates/team-agent/src/cli/send.rs +9 -2
- package/crates/team-agent/src/coordinator/backoff.rs +83 -2
- package/crates/team-agent/src/coordinator/tests/spine.rs +6 -0
- package/crates/team-agent/src/coordinator/tick.rs +410 -168
- package/crates/team-agent/src/leader/lease.rs +19 -0
- package/crates/team-agent/src/leader/rediscover/tests.rs +12 -0
- package/crates/team-agent/src/leader/rediscover.rs +2 -0
- package/crates/team-agent/src/lifecycle/launch.rs +35 -0
- package/crates/team-agent/src/lifecycle/restart/agent.rs +17 -3
- package/crates/team-agent/src/lifecycle/restart/common.rs +75 -0
- package/crates/team-agent/src/lifecycle/restart/rebuild.rs +201 -3
- package/crates/team-agent/src/lifecycle/restart/selection.rs +51 -14
- package/crates/team-agent/src/lifecycle/restart.rs +1 -1
- package/crates/team-agent/src/lifecycle/tests/core.rs +89 -15
- package/crates/team-agent/src/lifecycle/tests/launch_spawn.rs +68 -3
- package/crates/team-agent/src/lifecycle/tests/main_preserved.rs +3 -1
- package/crates/team-agent/src/mcp_server/helpers.rs +24 -5
- package/crates/team-agent/src/mcp_server/normalize.rs +13 -6
- package/crates/team-agent/src/mcp_server/tests/send.rs +310 -212
- package/crates/team-agent/src/messaging/delivery.rs +83 -2
- package/crates/team-agent/src/messaging/helpers.rs +30 -10
- package/crates/team-agent/src/messaging/send.rs +71 -14
- package/crates/team-agent/src/messaging/tests/basic.rs +25 -7
- package/crates/team-agent/src/messaging/tests/runtime.rs +565 -111
- package/crates/team-agent/src/messaging/types.rs +19 -4
- package/crates/team-agent/src/provider/approvals/parsing.rs +43 -14
- package/crates/team-agent/src/provider/approvals/runtime_prompts.rs +12 -9
- package/crates/team-agent/src/transport/test_support.rs +12 -1
- package/package.json +4 -4
|
@@ -89,6 +89,9 @@ pub enum TickError {
|
|
|
89
89
|
/// messaging subsystem failure(delivery/scheduler/result watchers).
|
|
90
90
|
#[error("messaging: {0}")]
|
|
91
91
|
Messaging(#[from] crate::messaging::MessagingError),
|
|
92
|
+
/// coordinator.tick panic caught by the daemon loop.
|
|
93
|
+
#[error("panic: {0}")]
|
|
94
|
+
Panic(String),
|
|
92
95
|
}
|
|
93
96
|
|
|
94
97
|
// ===========================================================================
|
|
@@ -98,7 +101,8 @@ pub enum TickError {
|
|
|
98
101
|
/// tick 末原子 save 失败注入钩(bug-084)。生产装配为 `None`(走真实 `save_runtime_state`);
|
|
99
102
|
/// 测试装配一个返回 `Err` 的闭包,在不触碰真实磁盘的前提下强制 save 失败,断言 degraded
|
|
100
103
|
/// `TickReport` 而非 panic/Err。porter 在 `tick` 的「ATOMIC save」包裹点先查它再落真实 save。
|
|
101
|
-
pub type SaveHook =
|
|
104
|
+
pub type SaveHook =
|
|
105
|
+
Box<dyn Fn(&WorkspacePath, &Value) -> Result<(), crate::state::StateError> + Send + Sync>;
|
|
102
106
|
|
|
103
107
|
/// tick 链式副作用 ORDER 记录器(测试探针)。porter 在 `tick` 的每个原子调用点 push 一个
|
|
104
108
|
/// 稳定步骤名;测试断言固定序列。生产装配为 `None`(零开销,porter 用 `if let Some(rec)` 守卫)。
|
|
@@ -198,6 +202,7 @@ impl Coordinator {
|
|
|
198
202
|
"coordinator.session_missing",
|
|
199
203
|
serde_json::json!({"session": session_name}),
|
|
200
204
|
)?;
|
|
205
|
+
notify_session_missing(self.workspace.as_path(), &state, &event_log, session_name)?;
|
|
201
206
|
return Ok(empty_tick_report(
|
|
202
207
|
false,
|
|
203
208
|
true,
|
|
@@ -221,7 +226,9 @@ impl Coordinator {
|
|
|
221
226
|
// become deliverable. Reset them to `accepted` so the existing
|
|
222
227
|
// `deliver_pending` step below picks them up on THIS tick. Reuses the
|
|
223
228
|
// delivery pipeline; no new injector. Best-effort logging on inner errors.
|
|
224
|
-
if let Err(error) =
|
|
229
|
+
if let Err(error) =
|
|
230
|
+
self.requeue_trust_retries_for_handled_agents(&state, &store, &event_log)
|
|
231
|
+
{
|
|
225
232
|
let _ = event_log.write(
|
|
226
233
|
"messaging.trust_retry_requeue_failed",
|
|
227
234
|
serde_json::json!({"error": error.to_string()}),
|
|
@@ -376,7 +383,9 @@ impl Coordinator {
|
|
|
376
383
|
self.record_step("atomic_save");
|
|
377
384
|
let saved = match &self.save_hook {
|
|
378
385
|
Some(hook) => hook(&self.workspace, &state),
|
|
379
|
-
None =>
|
|
386
|
+
None => {
|
|
387
|
+
crate::state::projection::save_team_scoped_state(self.workspace.as_path(), &state)
|
|
388
|
+
}
|
|
380
389
|
};
|
|
381
390
|
if saved.is_err() {
|
|
382
391
|
return Ok(base_tick_report(
|
|
@@ -389,17 +398,13 @@ impl Coordinator {
|
|
|
389
398
|
}
|
|
390
399
|
|
|
391
400
|
self.record_step("collect_results");
|
|
392
|
-
collections.results =
|
|
393
|
-
crate::messaging::collect_results_and_notify_watchers(
|
|
394
|
-
|
|
401
|
+
collections.results =
|
|
402
|
+
collect_results(crate::messaging::collect_results_and_notify_watchers(
|
|
403
|
+
self.workspace.as_path(),
|
|
404
|
+
&event_log,
|
|
405
|
+
)?);
|
|
395
406
|
self.record_step("prune_dedupe_log");
|
|
396
|
-
Ok(base_tick_report(
|
|
397
|
-
true,
|
|
398
|
-
false,
|
|
399
|
-
None,
|
|
400
|
-
Some(true),
|
|
401
|
-
collections,
|
|
402
|
-
))
|
|
407
|
+
Ok(base_tick_report(true, false, None, Some(true), collections))
|
|
403
408
|
}
|
|
404
409
|
|
|
405
410
|
// #236 nag_removal (N35): the framework-synthesized idle/stuck/deadlock nag
|
|
@@ -407,7 +412,11 @@ impl Coordinator {
|
|
|
407
412
|
// were removed by design. Delivery primitives still flow through the rest of
|
|
408
413
|
// the tick body unchanged.
|
|
409
414
|
|
|
410
|
-
fn capture_missing_sessions(
|
|
415
|
+
fn capture_missing_sessions(
|
|
416
|
+
&self,
|
|
417
|
+
state: &mut Value,
|
|
418
|
+
event_log: &EventLog,
|
|
419
|
+
) -> Result<(), TickError> {
|
|
411
420
|
let report = crate::session_capture::capture_missing_provider_sessions_once(
|
|
412
421
|
state,
|
|
413
422
|
&mut |provider| self.provider_registry.adapter_for(provider),
|
|
@@ -437,7 +446,10 @@ impl Coordinator {
|
|
|
437
446
|
let snapshot = state.clone();
|
|
438
447
|
let team = crate::state::projection::team_state_key(&snapshot);
|
|
439
448
|
let team_key = Some(crate::model::ids::TeamKey::new(team.clone()));
|
|
440
|
-
let session_name = state
|
|
449
|
+
let session_name = state
|
|
450
|
+
.get("session_name")
|
|
451
|
+
.and_then(Value::as_str)
|
|
452
|
+
.map(str::to_string);
|
|
441
453
|
// B-4 / 036b N36 三路可用 — sync_health 内 per-agent capture 失败本就降级
|
|
442
454
|
// (写 coordinator.agent_capture_failed 后 continue),不打断 deliver_pending
|
|
443
455
|
// 主干。但 contract 要求一条【tick 级】可观测的 step-failed 信号 —
|
|
@@ -446,13 +458,17 @@ impl Coordinator {
|
|
|
446
458
|
let mut had_capture_failure = false;
|
|
447
459
|
// P5 (C-P5-2): one list-windows per SESSION per tick — memoized across the
|
|
448
460
|
// agent loop instead of one fork per agent.
|
|
449
|
-
let mut windows_by_session: BTreeMap<
|
|
450
|
-
|
|
461
|
+
let mut windows_by_session: BTreeMap<
|
|
462
|
+
String,
|
|
463
|
+
Result<Vec<crate::transport::WindowName>, String>,
|
|
464
|
+
> = BTreeMap::new();
|
|
451
465
|
let Some(agents) = state.get_mut("agents").and_then(Value::as_object_mut) else {
|
|
452
466
|
return Ok(captures);
|
|
453
467
|
};
|
|
454
468
|
for (agent_id, agent) in agents {
|
|
455
|
-
let Some((session, window, target)) =
|
|
469
|
+
let Some((session, window, target)) =
|
|
470
|
+
capture_window_target(agent, session_name.as_deref())
|
|
471
|
+
else {
|
|
456
472
|
continue;
|
|
457
473
|
};
|
|
458
474
|
let windows = match windows_by_session
|
|
@@ -534,7 +550,14 @@ impl Coordinator {
|
|
|
534
550
|
);
|
|
535
551
|
write_activity(agent, &activity, false);
|
|
536
552
|
let last_output_at = last_output_at_now;
|
|
537
|
-
write_agent_health(
|
|
553
|
+
write_agent_health(
|
|
554
|
+
store,
|
|
555
|
+
&team,
|
|
556
|
+
agent_id,
|
|
557
|
+
agent,
|
|
558
|
+
&activity,
|
|
559
|
+
last_output_at.as_deref(),
|
|
560
|
+
)?;
|
|
538
561
|
let pane_info = matching_capture_pane_info(agent, &session, &window, pane_infos);
|
|
539
562
|
let pane_id = pane_info
|
|
540
563
|
.as_ref()
|
|
@@ -546,7 +569,10 @@ impl Coordinator {
|
|
|
546
569
|
CapturedRuntimeFact {
|
|
547
570
|
team_key: team_key.clone(),
|
|
548
571
|
agent_id: AgentId::new(agent_id.clone()),
|
|
549
|
-
provider: agent
|
|
572
|
+
provider: agent
|
|
573
|
+
.get("provider")
|
|
574
|
+
.and_then(Value::as_str)
|
|
575
|
+
.and_then(parse_provider),
|
|
550
576
|
session_name: Some(session),
|
|
551
577
|
window: Some(window),
|
|
552
578
|
pane_id,
|
|
@@ -619,14 +645,22 @@ impl Coordinator {
|
|
|
619
645
|
let team = crate::state::projection::team_state_key(&snapshot);
|
|
620
646
|
let session_name = snapshot.get("session_name").and_then(Value::as_str);
|
|
621
647
|
for agent in abnormal_watch_agents(&snapshot) {
|
|
622
|
-
let rollout_path =
|
|
648
|
+
let rollout_path =
|
|
649
|
+
resolve_agent_rollout_path(self.workspace.as_path(), &agent.rollout_path);
|
|
623
650
|
let metadata = match std::fs::metadata(&rollout_path) {
|
|
624
651
|
Ok(metadata) => metadata,
|
|
625
652
|
Err(error) => {
|
|
626
653
|
upsert_abnormal_watch(
|
|
627
654
|
state,
|
|
628
655
|
&agent.agent_id,
|
|
629
|
-
abnormal_watch_payload(
|
|
656
|
+
abnormal_watch_payload(
|
|
657
|
+
&agent,
|
|
658
|
+
None,
|
|
659
|
+
None,
|
|
660
|
+
"unverifiable",
|
|
661
|
+
None,
|
|
662
|
+
Some(error.to_string()),
|
|
663
|
+
),
|
|
630
664
|
);
|
|
631
665
|
continue;
|
|
632
666
|
}
|
|
@@ -637,9 +671,10 @@ impl Coordinator {
|
|
|
637
671
|
// read at all (live sample: 332MB whole-file read per agent per 2s tick).
|
|
638
672
|
// ANY field change (including a size shrink / truncate) falls through to the
|
|
639
673
|
// re-read below.
|
|
640
|
-
if let (Some(mtime), Some(stored)) =
|
|
641
|
-
|
|
642
|
-
|
|
674
|
+
if let (Some(mtime), Some(stored)) = (
|
|
675
|
+
mtime_ns,
|
|
676
|
+
abnormal_watch_stored_metadata(&snapshot, &agent.agent_id),
|
|
677
|
+
) {
|
|
643
678
|
if stored == (size, mtime) {
|
|
644
679
|
continue;
|
|
645
680
|
}
|
|
@@ -653,17 +688,20 @@ impl Coordinator {
|
|
|
653
688
|
upsert_abnormal_watch(
|
|
654
689
|
state,
|
|
655
690
|
&agent.agent_id,
|
|
656
|
-
abnormal_watch_payload(
|
|
691
|
+
abnormal_watch_payload(
|
|
692
|
+
&agent,
|
|
693
|
+
Some(size),
|
|
694
|
+
mtime_ns,
|
|
695
|
+
"unverifiable",
|
|
696
|
+
None,
|
|
697
|
+
Some(error.to_string()),
|
|
698
|
+
),
|
|
657
699
|
);
|
|
658
700
|
continue;
|
|
659
701
|
}
|
|
660
702
|
};
|
|
661
|
-
let liveness =
|
|
662
|
-
&agent,
|
|
663
|
-
session_name,
|
|
664
|
-
targets,
|
|
665
|
-
self.transport.as_ref(),
|
|
666
|
-
);
|
|
703
|
+
let liveness =
|
|
704
|
+
agent_process_liveness(&agent, session_name, targets, self.transport.as_ref());
|
|
667
705
|
let fact = crate::provider::latest_explicit_error_fact(agent.provider, &text);
|
|
668
706
|
let decision = abnormal_exit_decision(liveness.state, fact.as_ref());
|
|
669
707
|
let check_key = abnormal_check_key(&agent, &liveness, fact.as_ref(), size);
|
|
@@ -679,8 +717,19 @@ impl Coordinator {
|
|
|
679
717
|
None,
|
|
680
718
|
),
|
|
681
719
|
);
|
|
682
|
-
if abnormal_last_check_key(state, &agent.agent_id).as_deref()
|
|
683
|
-
|
|
720
|
+
if abnormal_last_check_key(state, &agent.agent_id).as_deref()
|
|
721
|
+
!= Some(check_key.as_str())
|
|
722
|
+
{
|
|
723
|
+
write_abnormal_check(
|
|
724
|
+
event_log,
|
|
725
|
+
&team,
|
|
726
|
+
&agent,
|
|
727
|
+
&liveness,
|
|
728
|
+
fact.as_ref(),
|
|
729
|
+
decision,
|
|
730
|
+
size,
|
|
731
|
+
mtime_ns,
|
|
732
|
+
)?;
|
|
684
733
|
mark_abnormal_checked(state, &agent.agent_id, &check_key);
|
|
685
734
|
}
|
|
686
735
|
let fact = match (decision, fact) {
|
|
@@ -699,7 +748,9 @@ impl Coordinator {
|
|
|
699
748
|
(AbnormalExitDecision::Notify, None) => continue,
|
|
700
749
|
};
|
|
701
750
|
let dedupe_key = abnormal_dedupe_key(&agent, &fact, size);
|
|
702
|
-
if abnormal_last_notified_key(state, &agent.agent_id).as_deref()
|
|
751
|
+
if abnormal_last_notified_key(state, &agent.agent_id).as_deref()
|
|
752
|
+
== Some(dedupe_key.as_str())
|
|
753
|
+
{
|
|
703
754
|
continue;
|
|
704
755
|
}
|
|
705
756
|
let content = format_abnormal_exit_message(&team, &agent, &fact, &liveness, size);
|
|
@@ -754,7 +805,10 @@ impl Coordinator {
|
|
|
754
805
|
}
|
|
755
806
|
|
|
756
807
|
fn handle_startup_prompts(&self, state: &mut Value, event_log: &EventLog) {
|
|
757
|
-
let session_name = state
|
|
808
|
+
let session_name = state
|
|
809
|
+
.get("session_name")
|
|
810
|
+
.and_then(Value::as_str)
|
|
811
|
+
.map(str::to_string);
|
|
758
812
|
let Some(agents) = state.get_mut("agents").and_then(Value::as_object_mut) else {
|
|
759
813
|
return;
|
|
760
814
|
};
|
|
@@ -826,7 +880,10 @@ impl Coordinator {
|
|
|
826
880
|
continue;
|
|
827
881
|
};
|
|
828
882
|
agent_obj.insert("startup_prompts".to_string(), serde_json::json!("handled"));
|
|
829
|
-
agent_obj.insert(
|
|
883
|
+
agent_obj.insert(
|
|
884
|
+
"startup_prompt_status".to_string(),
|
|
885
|
+
serde_json::json!("handled"),
|
|
886
|
+
);
|
|
830
887
|
agent_obj.insert("startup_prompt_handled".to_string(), handled_payload);
|
|
831
888
|
}
|
|
832
889
|
}
|
|
@@ -890,7 +947,10 @@ impl Coordinator {
|
|
|
890
947
|
) -> Result<(), TickError> {
|
|
891
948
|
let snapshot = state.clone();
|
|
892
949
|
let team = crate::state::projection::team_state_key(&snapshot);
|
|
893
|
-
let session_name = snapshot
|
|
950
|
+
let session_name = snapshot
|
|
951
|
+
.get("session_name")
|
|
952
|
+
.and_then(Value::as_str)
|
|
953
|
+
.map(str::to_string);
|
|
894
954
|
let mut dedup_updates = Vec::new();
|
|
895
955
|
{
|
|
896
956
|
let Some(agents) = state.get_mut("agents").and_then(Value::as_object_mut) else {
|
|
@@ -941,37 +1001,38 @@ impl Coordinator {
|
|
|
941
1001
|
});
|
|
942
1002
|
let choice = choose_internal_mcp_approval_choice(&prompt);
|
|
943
1003
|
let keys = approval_choice_keys(&prompt, &captured.text, &choice)
|
|
944
|
-
|
|
945
|
-
|
|
946
|
-
|
|
947
|
-
|
|
948
|
-
|
|
949
|
-
|
|
950
|
-
|
|
951
|
-
|
|
952
|
-
|
|
953
|
-
|
|
954
|
-
|
|
955
|
-
|
|
956
|
-
|
|
957
|
-
|
|
958
|
-
|
|
959
|
-
|
|
960
|
-
|
|
961
|
-
|
|
962
|
-
|
|
963
|
-
|
|
964
|
-
|
|
965
|
-
|
|
966
|
-
|
|
967
|
-
|
|
968
|
-
|
|
969
|
-
|
|
1004
|
+
.into_iter()
|
|
1005
|
+
.filter_map(runtime_approval_key)
|
|
1006
|
+
.collect::<Vec<_>>();
|
|
1007
|
+
// A-6 / Python approvals/runtime_prompts.py:21-43: prompts are handled
|
|
1008
|
+
// per-agent with run_cmd(check=False) — one agent's tmux failure must
|
|
1009
|
+
// not abort the whole tick for the rest.
|
|
1010
|
+
if let Err(error) = self.transport.send_keys(&target, &keys) {
|
|
1011
|
+
event_log.write(
|
|
1012
|
+
"runtime_approval.send_keys_failed",
|
|
1013
|
+
serde_json::json!({
|
|
1014
|
+
"agent_id": agent_id,
|
|
1015
|
+
"target": format!("{target:?}"),
|
|
1016
|
+
"tool": prompt.tool,
|
|
1017
|
+
"error": error.to_string(),
|
|
1018
|
+
}),
|
|
1019
|
+
)?;
|
|
1020
|
+
continue;
|
|
1021
|
+
}
|
|
1022
|
+
let after = self
|
|
1023
|
+
.transport
|
|
1024
|
+
.capture(&target, crate::transport::CaptureRange::Tail(80))
|
|
1025
|
+
.ok()
|
|
1026
|
+
.and_then(|capture| extract_approval_prompt(agent_id, &capture.text));
|
|
1027
|
+
let cleared = after.as_ref().is_none_or(|after| {
|
|
1028
|
+
after.prompt != prompt.prompt || after.tool != prompt.tool
|
|
1029
|
+
});
|
|
970
1030
|
event_log.write(
|
|
971
1031
|
"runtime_approval.auto_approved",
|
|
972
1032
|
serde_json::json!({
|
|
973
1033
|
"agent_id": agent_id,
|
|
974
|
-
"
|
|
1034
|
+
"server": prompt.server.as_deref(),
|
|
1035
|
+
"tool": prompt.tool.as_deref(),
|
|
975
1036
|
"choice": choice,
|
|
976
1037
|
"cleared": cleared,
|
|
977
1038
|
"policy_source": approval_policy.source,
|
|
@@ -980,16 +1041,35 @@ impl Coordinator {
|
|
|
980
1041
|
"worker_capability_above_leader": approval_policy.worker_capability_above_leader,
|
|
981
1042
|
}),
|
|
982
1043
|
)?;
|
|
1044
|
+
event_log.write(
|
|
1045
|
+
"mcp.tool.auto_approved",
|
|
1046
|
+
serde_json::json!({
|
|
1047
|
+
"agent_id": agent_id,
|
|
1048
|
+
"server": prompt.server.as_deref(),
|
|
1049
|
+
"tool": prompt.tool.as_deref(),
|
|
1050
|
+
"choice": choice,
|
|
1051
|
+
"cleared": cleared,
|
|
1052
|
+
"inherit_reason": approval_policy.inherit_reason(),
|
|
1053
|
+
"bypass_source": approval_policy.source,
|
|
1054
|
+
"provider": approval_policy.provider,
|
|
1055
|
+
"flag": approval_policy.flag,
|
|
1056
|
+
"inherited": approval_policy.inherited,
|
|
1057
|
+
"explicit_yes_confirmed": approval_policy.explicit_yes_confirmed,
|
|
1058
|
+
"worker_capability_above_leader": approval_policy.worker_capability_above_leader,
|
|
1059
|
+
}),
|
|
1060
|
+
)?;
|
|
983
1061
|
}
|
|
984
1062
|
RuntimeApprovalDecision::AwaitingHumanConfirm => {
|
|
985
|
-
let Some(reason) =
|
|
1063
|
+
let Some(reason) =
|
|
1064
|
+
awaiting_human_confirm_reason(&prompt, auto_answer_allowed)
|
|
1065
|
+
else {
|
|
986
1066
|
continue;
|
|
987
1067
|
};
|
|
988
1068
|
let fact = awaiting_human_confirm_fact(&team, agent_id, &prompt, reason);
|
|
989
1069
|
let previous = agent
|
|
990
|
-
|
|
991
|
-
|
|
992
|
-
|
|
1070
|
+
.get("awaiting_human_confirm")
|
|
1071
|
+
.and_then(|v| v.get("fingerprint"))
|
|
1072
|
+
.and_then(Value::as_str);
|
|
993
1073
|
if previous == Some(fact.fingerprint.as_str())
|
|
994
1074
|
|| state_awaiting_human_confirm_fingerprint(&snapshot, &team, agent_id)
|
|
995
1075
|
.as_deref()
|
|
@@ -1001,10 +1081,10 @@ impl Coordinator {
|
|
|
1001
1081
|
let notification = awaiting_human_confirm_payload(agent, &fact);
|
|
1002
1082
|
let content = notification.to_string();
|
|
1003
1083
|
let _ = crate::messaging::send_to_leader_receiver(
|
|
1004
|
-
|
|
1005
|
-
|
|
1006
|
-
|
|
1007
|
-
|
|
1084
|
+
self.workspace.as_path(),
|
|
1085
|
+
&snapshot,
|
|
1086
|
+
"leader",
|
|
1087
|
+
&content,
|
|
1008
1088
|
None,
|
|
1009
1089
|
agent_id,
|
|
1010
1090
|
false,
|
|
@@ -1015,43 +1095,43 @@ impl Coordinator {
|
|
|
1015
1095
|
remember_awaiting_human_confirm(agent, &fact);
|
|
1016
1096
|
dedup_updates.push(AwaitingDedupUpdate::Remember(fact.clone()));
|
|
1017
1097
|
match reason {
|
|
1018
|
-
|
|
1019
|
-
|
|
1020
|
-
|
|
1021
|
-
|
|
1022
|
-
|
|
1023
|
-
|
|
1024
|
-
|
|
1025
|
-
|
|
1026
|
-
|
|
1027
|
-
|
|
1028
|
-
|
|
1029
|
-
|
|
1030
|
-
|
|
1031
|
-
|
|
1032
|
-
|
|
1033
|
-
|
|
1034
|
-
|
|
1035
|
-
|
|
1036
|
-
|
|
1037
|
-
|
|
1038
|
-
|
|
1039
|
-
|
|
1098
|
+
"tool_not_allowlisted" => {
|
|
1099
|
+
event_log.write(
|
|
1100
|
+
"runtime_approval.tool_not_allowlisted",
|
|
1101
|
+
serde_json::json!({
|
|
1102
|
+
"agent_id": agent_id,
|
|
1103
|
+
"tool": prompt.tool,
|
|
1104
|
+
"kind": prompt.kind,
|
|
1105
|
+
"prompt": prompt.prompt,
|
|
1106
|
+
}),
|
|
1107
|
+
)?;
|
|
1108
|
+
}
|
|
1109
|
+
"leader_restricted" | "leader_safety_restricted" => {
|
|
1110
|
+
event_log.write(
|
|
1111
|
+
"runtime_approval.blocked_by_leader_safety",
|
|
1112
|
+
serde_json::json!({
|
|
1113
|
+
"agent_id": agent_id,
|
|
1114
|
+
"tool": prompt.tool,
|
|
1115
|
+
"command": prompt.command,
|
|
1116
|
+
"kind": prompt.kind,
|
|
1117
|
+
"prompt": prompt.prompt,
|
|
1118
|
+
}),
|
|
1119
|
+
)?;
|
|
1120
|
+
}
|
|
1121
|
+
"command_approval_requires_human" => {
|
|
1122
|
+
event_log.write(
|
|
1123
|
+
"runtime_approval.command_approval_requires_human",
|
|
1124
|
+
serde_json::json!({
|
|
1125
|
+
"agent_id": agent_id,
|
|
1126
|
+
"tool": prompt.tool,
|
|
1127
|
+
"command": prompt.command,
|
|
1128
|
+
"kind": prompt.kind,
|
|
1129
|
+
"prompt": prompt.prompt,
|
|
1130
|
+
}),
|
|
1131
|
+
)?;
|
|
1132
|
+
}
|
|
1133
|
+
_ => {}
|
|
1040
1134
|
}
|
|
1041
|
-
"command_approval_requires_human" => {
|
|
1042
|
-
event_log.write(
|
|
1043
|
-
"runtime_approval.command_approval_requires_human",
|
|
1044
|
-
serde_json::json!({
|
|
1045
|
-
"agent_id": agent_id,
|
|
1046
|
-
"tool": prompt.tool,
|
|
1047
|
-
"command": prompt.command,
|
|
1048
|
-
"kind": prompt.kind,
|
|
1049
|
-
"prompt": prompt.prompt,
|
|
1050
|
-
}),
|
|
1051
|
-
)?;
|
|
1052
|
-
}
|
|
1053
|
-
_ => {}
|
|
1054
|
-
}
|
|
1055
1135
|
}
|
|
1056
1136
|
RuntimeApprovalDecision::Ignore => {
|
|
1057
1137
|
clear_awaiting_human_confirm(agent);
|
|
@@ -1065,7 +1145,9 @@ impl Coordinator {
|
|
|
1065
1145
|
}
|
|
1066
1146
|
for update in dedup_updates {
|
|
1067
1147
|
match update {
|
|
1068
|
-
AwaitingDedupUpdate::Remember(fact) =>
|
|
1148
|
+
AwaitingDedupUpdate::Remember(fact) => {
|
|
1149
|
+
remember_state_awaiting_human_confirm(state, &fact)
|
|
1150
|
+
}
|
|
1069
1151
|
AwaitingDedupUpdate::Clear { team, agent_id } => {
|
|
1070
1152
|
clear_state_awaiting_human_confirm(state, &team, &agent_id)
|
|
1071
1153
|
}
|
|
@@ -1107,7 +1189,9 @@ impl Coordinator {
|
|
|
1107
1189
|
/// Python 是 `python -m team_agent.coordinator`,`lifecycle.py:108`)。
|
|
1108
1190
|
/// **schema 兼容门**:三元任一不匹配 → restart_incompatible,**不可静默继续**(card §89)。
|
|
1109
1191
|
pub fn start(&self) -> Result<StartReport, StartError> {
|
|
1110
|
-
let health = self
|
|
1192
|
+
let health = self
|
|
1193
|
+
.health()
|
|
1194
|
+
.map_err(|e| std::io::Error::other(e.to_string()))?;
|
|
1111
1195
|
if health.ok {
|
|
1112
1196
|
return Ok(StartReport {
|
|
1113
1197
|
ok: true,
|
|
@@ -1145,14 +1229,26 @@ impl Coordinator {
|
|
|
1145
1229
|
pub fn stop(&self) -> Result<StopReport, StopError> {
|
|
1146
1230
|
let pid_path = coordinator_pid_path(&self.workspace);
|
|
1147
1231
|
if !pid_path.exists() {
|
|
1148
|
-
return Ok(StopReport {
|
|
1232
|
+
return Ok(StopReport {
|
|
1233
|
+
ok: true,
|
|
1234
|
+
status: StopOutcome::Missing,
|
|
1235
|
+
pid: None,
|
|
1236
|
+
});
|
|
1149
1237
|
}
|
|
1150
1238
|
let pid = read_pid_file(&pid_path);
|
|
1151
1239
|
remove_file_if_exists(&pid_path)?;
|
|
1152
1240
|
remove_file_if_exists(&coordinator_meta_path(&self.workspace))?;
|
|
1153
1241
|
match pid {
|
|
1154
|
-
Some(pid) => Ok(StopReport {
|
|
1155
|
-
|
|
1242
|
+
Some(pid) => Ok(StopReport {
|
|
1243
|
+
ok: true,
|
|
1244
|
+
status: StopOutcome::Stopped,
|
|
1245
|
+
pid: Some(pid),
|
|
1246
|
+
}),
|
|
1247
|
+
None => Ok(StopReport {
|
|
1248
|
+
ok: true,
|
|
1249
|
+
status: StopOutcome::InvalidPidRemoved,
|
|
1250
|
+
pid: None,
|
|
1251
|
+
}),
|
|
1156
1252
|
}
|
|
1157
1253
|
}
|
|
1158
1254
|
|
|
@@ -1217,20 +1313,16 @@ fn empty_tick_report(
|
|
|
1217
1313
|
reason: Option<TickStopReason>,
|
|
1218
1314
|
persisted: Option<bool>,
|
|
1219
1315
|
) -> TickReport {
|
|
1220
|
-
base_tick_report(
|
|
1221
|
-
ok,
|
|
1222
|
-
stop,
|
|
1223
|
-
reason,
|
|
1224
|
-
persisted,
|
|
1225
|
-
TickCollections::default(),
|
|
1226
|
-
)
|
|
1316
|
+
base_tick_report(ok, stop, reason, persisted, TickCollections::default())
|
|
1227
1317
|
}
|
|
1228
1318
|
|
|
1229
1319
|
fn collect_results(value: Value) -> Vec<CollectedResult> {
|
|
1230
1320
|
let Some(result_id) = value.get("result_id").and_then(Value::as_str) else {
|
|
1231
1321
|
return Vec::new();
|
|
1232
1322
|
};
|
|
1233
|
-
vec![CollectedResult {
|
|
1323
|
+
vec![CollectedResult {
|
|
1324
|
+
result_id: result_id.to_string(),
|
|
1325
|
+
}]
|
|
1234
1326
|
}
|
|
1235
1327
|
|
|
1236
1328
|
struct ProviderTurnClassifier;
|
|
@@ -1263,8 +1355,7 @@ impl TurnStateClassifier for ProviderTurnClassifier {
|
|
|
1263
1355
|
/// `coordinator.coordinator_tick_iteration_count` load fine (read-compat, C-P3-3) —
|
|
1264
1356
|
/// new versions simply stop writing it.
|
|
1265
1357
|
fn increment_coordinator_tick_iteration_count(workspace: &WorkspacePath) {
|
|
1266
|
-
let path =
|
|
1267
|
-
crate::model::paths::runtime_dir(workspace.as_path()).join("coordinator_tick.json");
|
|
1358
|
+
let path = crate::model::paths::runtime_dir(workspace.as_path()).join("coordinator_tick.json");
|
|
1268
1359
|
let next = std::fs::read_to_string(&path)
|
|
1269
1360
|
.ok()
|
|
1270
1361
|
.and_then(|text| serde_json::from_str::<Value>(&text).ok())
|
|
@@ -1404,13 +1495,13 @@ fn abnormal_watch_agents(state: &Value) -> Vec<AbnormalWatchAgent> {
|
|
|
1404
1495
|
agents
|
|
1405
1496
|
.iter()
|
|
1406
1497
|
.filter_map(|(agent_id, agent)| {
|
|
1407
|
-
if matches!(
|
|
1408
|
-
agent.get("status").and_then(Value::as_str),
|
|
1409
|
-
Some("paused")
|
|
1410
|
-
) {
|
|
1498
|
+
if matches!(agent.get("status").and_then(Value::as_str), Some("paused")) {
|
|
1411
1499
|
return None;
|
|
1412
1500
|
}
|
|
1413
|
-
let provider = agent
|
|
1501
|
+
let provider = agent
|
|
1502
|
+
.get("provider")
|
|
1503
|
+
.and_then(Value::as_str)
|
|
1504
|
+
.and_then(parse_provider)?;
|
|
1414
1505
|
let rollout_path_display = ["rollout_path", "transcript_path", "session_log_path"]
|
|
1415
1506
|
.into_iter()
|
|
1416
1507
|
.find_map(|key| agent.get(key).and_then(Value::as_str))
|
|
@@ -1421,10 +1512,19 @@ fn abnormal_watch_agents(state: &Value) -> Vec<AbnormalWatchAgent> {
|
|
|
1421
1512
|
provider,
|
|
1422
1513
|
rollout_path: PathBuf::from(&rollout_path_display),
|
|
1423
1514
|
rollout_path_display,
|
|
1424
|
-
status: agent
|
|
1515
|
+
status: agent
|
|
1516
|
+
.get("status")
|
|
1517
|
+
.and_then(Value::as_str)
|
|
1518
|
+
.map(str::to_string),
|
|
1425
1519
|
process_liveness: explicit_process_liveness(agent),
|
|
1426
|
-
window: agent
|
|
1427
|
-
|
|
1520
|
+
window: agent
|
|
1521
|
+
.get("window")
|
|
1522
|
+
.and_then(Value::as_str)
|
|
1523
|
+
.map(str::to_string),
|
|
1524
|
+
pane_id: agent
|
|
1525
|
+
.get("pane_id")
|
|
1526
|
+
.and_then(Value::as_str)
|
|
1527
|
+
.map(str::to_string),
|
|
1428
1528
|
pid: agent_pid(agent),
|
|
1429
1529
|
current_command: agent
|
|
1430
1530
|
.get("pane_current_command")
|
|
@@ -1443,12 +1543,19 @@ fn agent_pid(agent: &Value) -> Option<Pid> {
|
|
|
1443
1543
|
}
|
|
1444
1544
|
|
|
1445
1545
|
fn explicit_process_liveness(agent: &Value) -> Option<ProcessLiveness> {
|
|
1446
|
-
if let Some(process) = agent
|
|
1546
|
+
if let Some(process) = agent
|
|
1547
|
+
.get("provider_process")
|
|
1548
|
+
.or_else(|| agent.get("process"))
|
|
1549
|
+
{
|
|
1447
1550
|
if let Some(liveness) = explicit_process_liveness(process) {
|
|
1448
1551
|
return Some(liveness);
|
|
1449
1552
|
}
|
|
1450
1553
|
}
|
|
1451
|
-
for key in [
|
|
1554
|
+
for key in [
|
|
1555
|
+
"provider_process_liveness",
|
|
1556
|
+
"process_liveness",
|
|
1557
|
+
"pane_liveness",
|
|
1558
|
+
] {
|
|
1452
1559
|
match agent.get(key).and_then(Value::as_str) {
|
|
1453
1560
|
Some("dead") => return Some(ProcessLiveness::Dead),
|
|
1454
1561
|
Some("alive" | "live") => return Some(ProcessLiveness::Alive),
|
|
@@ -1456,14 +1563,32 @@ fn explicit_process_liveness(agent: &Value) -> Option<ProcessLiveness> {
|
|
|
1456
1563
|
_ => {}
|
|
1457
1564
|
}
|
|
1458
1565
|
}
|
|
1459
|
-
for key in [
|
|
1566
|
+
for key in [
|
|
1567
|
+
"provider_process_alive",
|
|
1568
|
+
"process_alive",
|
|
1569
|
+
"provider_alive",
|
|
1570
|
+
"alive",
|
|
1571
|
+
] {
|
|
1460
1572
|
if let Some(alive) = agent.get(key).and_then(Value::as_bool) {
|
|
1461
|
-
return Some(if alive {
|
|
1573
|
+
return Some(if alive {
|
|
1574
|
+
ProcessLiveness::Alive
|
|
1575
|
+
} else {
|
|
1576
|
+
ProcessLiveness::Dead
|
|
1577
|
+
});
|
|
1462
1578
|
}
|
|
1463
1579
|
}
|
|
1464
|
-
for key in [
|
|
1580
|
+
for key in [
|
|
1581
|
+
"provider_process_dead",
|
|
1582
|
+
"process_dead",
|
|
1583
|
+
"provider_dead",
|
|
1584
|
+
"dead",
|
|
1585
|
+
] {
|
|
1465
1586
|
if let Some(dead) = agent.get(key).and_then(Value::as_bool) {
|
|
1466
|
-
return Some(if dead {
|
|
1587
|
+
return Some(if dead {
|
|
1588
|
+
ProcessLiveness::Dead
|
|
1589
|
+
} else {
|
|
1590
|
+
ProcessLiveness::Alive
|
|
1591
|
+
});
|
|
1467
1592
|
}
|
|
1468
1593
|
}
|
|
1469
1594
|
for key in ["status", "state", "liveness"] {
|
|
@@ -1481,7 +1606,10 @@ fn explicit_process_liveness(agent: &Value) -> Option<ProcessLiveness> {
|
|
|
1481
1606
|
|
|
1482
1607
|
fn json_u32(value: Option<&Value>) -> Option<u32> {
|
|
1483
1608
|
value
|
|
1484
|
-
.and_then(|v|
|
|
1609
|
+
.and_then(|v| {
|
|
1610
|
+
v.as_u64()
|
|
1611
|
+
.or_else(|| v.as_i64().and_then(|n| u64::try_from(n).ok()))
|
|
1612
|
+
})
|
|
1485
1613
|
.and_then(|n| u32::try_from(n).ok())
|
|
1486
1614
|
}
|
|
1487
1615
|
|
|
@@ -1495,15 +1623,17 @@ fn agent_process_liveness(
|
|
|
1495
1623
|
return pid_process_check("pid", pid);
|
|
1496
1624
|
}
|
|
1497
1625
|
if let Some(liveness) = agent.process_liveness {
|
|
1498
|
-
return process_check(
|
|
1626
|
+
return process_check(
|
|
1627
|
+
liveness,
|
|
1628
|
+
format!("explicit:{}", process_liveness_wire(liveness)),
|
|
1629
|
+
);
|
|
1499
1630
|
}
|
|
1500
1631
|
if agent.status.as_deref().is_some_and(|status| {
|
|
1501
1632
|
matches!(
|
|
1502
1633
|
status,
|
|
1503
1634
|
"stopped" | "missing" | "error" | "dead" | "exited" | "terminated" | "crashed"
|
|
1504
1635
|
)
|
|
1505
|
-
})
|
|
1506
|
-
{
|
|
1636
|
+
}) {
|
|
1507
1637
|
return process_check(
|
|
1508
1638
|
ProcessLiveness::Dead,
|
|
1509
1639
|
format!("status:{}", agent.status.as_deref().unwrap_or("unknown")),
|
|
@@ -1519,7 +1649,10 @@ fn agent_process_liveness(
|
|
|
1519
1649
|
if let Some(pid) = target.pane_pid.map(Pid::new) {
|
|
1520
1650
|
return pid_process_check("pane_pid", pid);
|
|
1521
1651
|
}
|
|
1522
|
-
return process_check(
|
|
1652
|
+
return process_check(
|
|
1653
|
+
ProcessLiveness::Unverifiable,
|
|
1654
|
+
"pane_present_pid_unknown".to_string(),
|
|
1655
|
+
);
|
|
1523
1656
|
}
|
|
1524
1657
|
if let Some(pane_id) = agent.pane_id.as_deref() {
|
|
1525
1658
|
let pane = crate::transport::PaneId::new(pane_id);
|
|
@@ -1527,27 +1660,37 @@ fn agent_process_liveness(
|
|
|
1527
1660
|
Ok(crate::transport::PaneLiveness::Dead) => {
|
|
1528
1661
|
process_check(ProcessLiveness::Dead, format!("pane_dead:{pane_id}"))
|
|
1529
1662
|
}
|
|
1530
|
-
Ok(crate::transport::PaneLiveness::Live) =>
|
|
1531
|
-
|
|
1532
|
-
|
|
1533
|
-
|
|
1534
|
-
|
|
1535
|
-
|
|
1536
|
-
|
|
1537
|
-
|
|
1538
|
-
|
|
1663
|
+
Ok(crate::transport::PaneLiveness::Live) => process_check(
|
|
1664
|
+
ProcessLiveness::Unverifiable,
|
|
1665
|
+
format!("pane_live_pid_unknown:{pane_id}"),
|
|
1666
|
+
),
|
|
1667
|
+
Ok(crate::transport::PaneLiveness::Unknown) => process_check(
|
|
1668
|
+
ProcessLiveness::Unverifiable,
|
|
1669
|
+
format!("pane_unknown:{pane_id}"),
|
|
1670
|
+
),
|
|
1671
|
+
Err(error) => process_check(
|
|
1672
|
+
ProcessLiveness::Unverifiable,
|
|
1673
|
+
format!("pane_unverifiable:{pane_id}:{error}"),
|
|
1674
|
+
),
|
|
1539
1675
|
};
|
|
1540
1676
|
}
|
|
1541
1677
|
let (Some(session), Some(window)) = (session_name, agent.window.as_deref()) else {
|
|
1542
|
-
return process_check(
|
|
1678
|
+
return process_check(
|
|
1679
|
+
ProcessLiveness::Unverifiable,
|
|
1680
|
+
"missing_session_or_window".to_string(),
|
|
1681
|
+
);
|
|
1543
1682
|
};
|
|
1544
1683
|
let session = crate::transport::SessionName::new(session);
|
|
1545
1684
|
match transport.list_windows(&session) {
|
|
1546
|
-
Ok(windows) if windows.iter().any(|known| known.as_str() == window) =>
|
|
1547
|
-
|
|
1548
|
-
|
|
1685
|
+
Ok(windows) if windows.iter().any(|known| known.as_str() == window) => process_check(
|
|
1686
|
+
ProcessLiveness::Unverifiable,
|
|
1687
|
+
"window_present_pid_unknown".to_string(),
|
|
1688
|
+
),
|
|
1549
1689
|
Ok(_) => process_check(ProcessLiveness::Dead, format!("window_missing:{window}")),
|
|
1550
|
-
Err(error) => process_check(
|
|
1690
|
+
Err(error) => process_check(
|
|
1691
|
+
ProcessLiveness::Unverifiable,
|
|
1692
|
+
format!("window_unverifiable:{window}:{error}"),
|
|
1693
|
+
),
|
|
1551
1694
|
}
|
|
1552
1695
|
}
|
|
1553
1696
|
|
|
@@ -1557,7 +1700,10 @@ fn matching_agent_target<'a>(
|
|
|
1557
1700
|
targets: &'a [crate::transport::PaneInfo],
|
|
1558
1701
|
) -> Option<&'a crate::transport::PaneInfo> {
|
|
1559
1702
|
if let Some(pane_id) = agent.pane_id.as_deref() {
|
|
1560
|
-
if let Some(target) = targets
|
|
1703
|
+
if let Some(target) = targets
|
|
1704
|
+
.iter()
|
|
1705
|
+
.find(|target| target.pane_id.as_str() == pane_id)
|
|
1706
|
+
{
|
|
1561
1707
|
return Some(target);
|
|
1562
1708
|
}
|
|
1563
1709
|
}
|
|
@@ -1577,7 +1723,10 @@ fn pid_process_check(label: &str, pid: Pid) -> ProcessCheck {
|
|
|
1577
1723
|
match pid_is_running(pid) {
|
|
1578
1724
|
Ok(true) => process_check(ProcessLiveness::Alive, format!("{label}_running:{pid}")),
|
|
1579
1725
|
Ok(false) => process_check(ProcessLiveness::Dead, format!("{label}_not_running:{pid}")),
|
|
1580
|
-
Err(error) => process_check(
|
|
1726
|
+
Err(error) => process_check(
|
|
1727
|
+
ProcessLiveness::Unverifiable,
|
|
1728
|
+
format!("{label}_unverifiable:{pid}:{error}"),
|
|
1729
|
+
),
|
|
1581
1730
|
}
|
|
1582
1731
|
}
|
|
1583
1732
|
|
|
@@ -1585,7 +1734,10 @@ fn command_process_check(provider: crate::model::enums::Provider, command: &str)
|
|
|
1585
1734
|
if provider_command_matches(provider, command) {
|
|
1586
1735
|
process_check(ProcessLiveness::Alive, format!("current_command:{command}"))
|
|
1587
1736
|
} else {
|
|
1588
|
-
process_check(
|
|
1737
|
+
process_check(
|
|
1738
|
+
ProcessLiveness::Dead,
|
|
1739
|
+
format!("provider_not_foreground:{command}"),
|
|
1740
|
+
)
|
|
1589
1741
|
}
|
|
1590
1742
|
}
|
|
1591
1743
|
|
|
@@ -1784,7 +1936,10 @@ fn mark_abnormal_notified(state: &mut Value, agent_id: &str, key: &str) {
|
|
|
1784
1936
|
}
|
|
1785
1937
|
if let Some(obj) = entry.as_object_mut() {
|
|
1786
1938
|
obj.insert("last_notified_key".to_string(), serde_json::json!(key));
|
|
1787
|
-
obj.insert(
|
|
1939
|
+
obj.insert(
|
|
1940
|
+
"last_notified_at".to_string(),
|
|
1941
|
+
serde_json::json!(chrono::Utc::now().to_rfc3339()),
|
|
1942
|
+
);
|
|
1788
1943
|
}
|
|
1789
1944
|
}
|
|
1790
1945
|
}
|
|
@@ -1799,7 +1954,10 @@ fn mark_abnormal_suppressed(state: &mut Value, agent_id: &str, key: &str) {
|
|
|
1799
1954
|
}
|
|
1800
1955
|
if let Some(obj) = entry.as_object_mut() {
|
|
1801
1956
|
obj.insert("last_suppressed_key".to_string(), serde_json::json!(key));
|
|
1802
|
-
obj.insert(
|
|
1957
|
+
obj.insert(
|
|
1958
|
+
"last_suppressed_at".to_string(),
|
|
1959
|
+
serde_json::json!(chrono::Utc::now().to_rfc3339()),
|
|
1960
|
+
);
|
|
1803
1961
|
}
|
|
1804
1962
|
}
|
|
1805
1963
|
}
|
|
@@ -1836,7 +1994,10 @@ fn mark_abnormal_checked(state: &mut Value, agent_id: &str, key: &str) {
|
|
|
1836
1994
|
}
|
|
1837
1995
|
if let Some(obj) = entry.as_object_mut() {
|
|
1838
1996
|
obj.insert("last_check_key".to_string(), serde_json::json!(key));
|
|
1839
|
-
obj.insert(
|
|
1997
|
+
obj.insert(
|
|
1998
|
+
"last_check_at".to_string(),
|
|
1999
|
+
serde_json::json!(chrono::Utc::now().to_rfc3339()),
|
|
2000
|
+
);
|
|
1840
2001
|
}
|
|
1841
2002
|
}
|
|
1842
2003
|
}
|
|
@@ -2024,7 +2185,10 @@ fn capture_window_target(
|
|
|
2024
2185
|
crate::transport::WindowName,
|
|
2025
2186
|
crate::transport::Target,
|
|
2026
2187
|
)> {
|
|
2027
|
-
let window = agent
|
|
2188
|
+
let window = agent
|
|
2189
|
+
.get("window")
|
|
2190
|
+
.and_then(Value::as_str)
|
|
2191
|
+
.filter(|s| !s.is_empty())?;
|
|
2028
2192
|
let session = session_name.filter(|s| !s.is_empty())?;
|
|
2029
2193
|
let session = crate::transport::SessionName::new(session);
|
|
2030
2194
|
let window = crate::transport::WindowName::new(window);
|
|
@@ -2074,13 +2238,18 @@ fn agent_rollout_path(agent: &Value) -> Option<PathBuf> {
|
|
|
2074
2238
|
.map(PathBuf::from)
|
|
2075
2239
|
}
|
|
2076
2240
|
|
|
2077
|
-
fn runtime_approval_target(
|
|
2241
|
+
fn runtime_approval_target(
|
|
2242
|
+
agent: &Value,
|
|
2243
|
+
session_name: Option<&str>,
|
|
2244
|
+
) -> Option<crate::transport::Target> {
|
|
2078
2245
|
if let Some(pane_id) = agent
|
|
2079
2246
|
.get("pane_id")
|
|
2080
2247
|
.and_then(Value::as_str)
|
|
2081
2248
|
.filter(|pane_id| !pane_id.is_empty())
|
|
2082
2249
|
{
|
|
2083
|
-
return Some(crate::transport::Target::Pane(
|
|
2250
|
+
return Some(crate::transport::Target::Pane(
|
|
2251
|
+
crate::transport::PaneId::new(pane_id),
|
|
2252
|
+
));
|
|
2084
2253
|
}
|
|
2085
2254
|
capture_window_target(agent, session_name).map(|(_, _, target)| target)
|
|
2086
2255
|
}
|
|
@@ -2110,6 +2279,8 @@ struct RuntimeApprovalPolicy {
|
|
|
2110
2279
|
source: String,
|
|
2111
2280
|
inherited: bool,
|
|
2112
2281
|
explicit_yes_confirmed: bool,
|
|
2282
|
+
provider: Option<String>,
|
|
2283
|
+
flag: Option<String>,
|
|
2113
2284
|
worker_capability_above_leader: bool,
|
|
2114
2285
|
}
|
|
2115
2286
|
|
|
@@ -2127,6 +2298,14 @@ impl RuntimeApprovalPolicy {
|
|
|
2127
2298
|
&& (!self.worker_capability_above_leader
|
|
2128
2299
|
|| (self.source == "runtime_config" && self.explicit_yes_confirmed))
|
|
2129
2300
|
}
|
|
2301
|
+
|
|
2302
|
+
fn inherit_reason(&self) -> &'static str {
|
|
2303
|
+
match self.source.as_str() {
|
|
2304
|
+
"leader_process" if self.inherited => "leader_bypass",
|
|
2305
|
+
"runtime_config" if self.explicit_yes_confirmed => "runtime_config_explicit_yes",
|
|
2306
|
+
_ => "none",
|
|
2307
|
+
}
|
|
2308
|
+
}
|
|
2130
2309
|
}
|
|
2131
2310
|
|
|
2132
2311
|
fn runtime_approval_policy_from_agent(agent: &Value) -> RuntimeApprovalPolicy {
|
|
@@ -2151,6 +2330,14 @@ fn runtime_approval_policy_from_agent(agent: &Value) -> RuntimeApprovalPolicy {
|
|
|
2151
2330
|
.and_then(|p| p.get("explicit_yes_confirmed"))
|
|
2152
2331
|
.and_then(Value::as_bool)
|
|
2153
2332
|
.unwrap_or(false),
|
|
2333
|
+
provider: policy
|
|
2334
|
+
.and_then(|p| p.get("provider"))
|
|
2335
|
+
.and_then(Value::as_str)
|
|
2336
|
+
.map(str::to_string),
|
|
2337
|
+
flag: policy
|
|
2338
|
+
.and_then(|p| p.get("flag"))
|
|
2339
|
+
.and_then(Value::as_str)
|
|
2340
|
+
.map(str::to_string),
|
|
2154
2341
|
worker_capability_above_leader: policy
|
|
2155
2342
|
.and_then(|p| p.get("worker_capability_above_leader"))
|
|
2156
2343
|
.and_then(Value::as_bool)
|
|
@@ -2163,7 +2350,14 @@ fn awaiting_human_confirm_payload(
|
|
|
2163
2350
|
fact: &crate::provider::AwaitingHumanConfirmFact,
|
|
2164
2351
|
) -> Value {
|
|
2165
2352
|
let mut payload = fact.to_event_payload();
|
|
2166
|
-
let excerpt = fact
|
|
2353
|
+
let excerpt = fact
|
|
2354
|
+
.prompt
|
|
2355
|
+
.lines()
|
|
2356
|
+
.next()
|
|
2357
|
+
.unwrap_or("")
|
|
2358
|
+
.chars()
|
|
2359
|
+
.take(240)
|
|
2360
|
+
.collect::<String>();
|
|
2167
2361
|
if let Some(obj) = payload.as_object_mut() {
|
|
2168
2362
|
obj.insert("team_id".to_string(), serde_json::json!(fact.team));
|
|
2169
2363
|
obj.insert("owner_team_id".to_string(), serde_json::json!(fact.team));
|
|
@@ -2326,7 +2520,10 @@ fn write_activity(
|
|
|
2326
2520
|
activity: &crate::messaging::AgentActivity,
|
|
2327
2521
|
output_advanced: bool,
|
|
2328
2522
|
) -> Option<String> {
|
|
2329
|
-
let previous_last_output = agent
|
|
2523
|
+
let previous_last_output = agent
|
|
2524
|
+
.get("last_output_at")
|
|
2525
|
+
.and_then(Value::as_str)
|
|
2526
|
+
.map(str::to_string);
|
|
2330
2527
|
let Some(agent_obj) = agent.as_object_mut() else {
|
|
2331
2528
|
return previous_last_output;
|
|
2332
2529
|
};
|
|
@@ -2423,3 +2620,48 @@ fn remove_file_if_exists(path: &Path) -> Result<(), std::io::Error> {
|
|
|
2423
2620
|
Err(e) => Err(e),
|
|
2424
2621
|
}
|
|
2425
2622
|
}
|
|
2623
|
+
|
|
2624
|
+
fn notify_session_missing(
|
|
2625
|
+
workspace: &Path,
|
|
2626
|
+
state: &Value,
|
|
2627
|
+
event_log: &EventLog,
|
|
2628
|
+
session_name: &str,
|
|
2629
|
+
) -> Result<(), TickError> {
|
|
2630
|
+
let content = format!(
|
|
2631
|
+
"coordinator.session_missing\nerror: tmux session {session_name} is missing; coordinator is stopping\naction: restart the team or recover the missing tmux session\nlog: .team/logs/events.jsonl"
|
|
2632
|
+
);
|
|
2633
|
+
let dedupe_key = format!("coordinator.session_missing:{session_name}");
|
|
2634
|
+
match crate::messaging::send_to_leader_receiver(
|
|
2635
|
+
workspace,
|
|
2636
|
+
state,
|
|
2637
|
+
"leader",
|
|
2638
|
+
&content,
|
|
2639
|
+
None,
|
|
2640
|
+
"coordinator",
|
|
2641
|
+
false,
|
|
2642
|
+
Some(&dedupe_key),
|
|
2643
|
+
event_log,
|
|
2644
|
+
) {
|
|
2645
|
+
Ok(outcome) => {
|
|
2646
|
+
event_log.write(
|
|
2647
|
+
"coordinator.session_missing_alert",
|
|
2648
|
+
serde_json::json!({
|
|
2649
|
+
"session": session_name,
|
|
2650
|
+
"leader_notification_status": crate::messaging::helpers::status_wire(outcome.status),
|
|
2651
|
+
"message_id": outcome.message_id,
|
|
2652
|
+
}),
|
|
2653
|
+
)?;
|
|
2654
|
+
}
|
|
2655
|
+
Err(error) => {
|
|
2656
|
+
event_log.write(
|
|
2657
|
+
"coordinator.session_missing_alert_failed",
|
|
2658
|
+
serde_json::json!({
|
|
2659
|
+
"session": session_name,
|
|
2660
|
+
"error": error.to_string(),
|
|
2661
|
+
"action": "inspect .team/logs/events.jsonl and restart the team",
|
|
2662
|
+
}),
|
|
2663
|
+
)?;
|
|
2664
|
+
}
|
|
2665
|
+
}
|
|
2666
|
+
Ok(())
|
|
2667
|
+
}
|