@team-agent/installer 0.3.4 → 0.3.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/Cargo.lock +1 -1
- package/Cargo.toml +1 -1
- package/crates/team-agent/src/cli/adapters.rs +8 -0
- package/crates/team-agent/src/cli/diagnose.rs +51 -10
- package/crates/team-agent/src/cli/emit.rs +2 -1
- package/crates/team-agent/src/cli/mod.rs +217 -80
- package/crates/team-agent/src/cli/send.rs +1 -0
- package/crates/team-agent/src/cli/status_port.rs +135 -7
- package/crates/team-agent/src/cli/tests/missing_subcommands.rs +8 -1
- package/crates/team-agent/src/cli/tests/mod.rs +1 -0
- package/crates/team-agent/src/cli/tests/shutdown_kill_plan.rs +39 -0
- package/crates/team-agent/src/cli/types.rs +5 -1
- package/crates/team-agent/src/coordinator/backoff.rs +57 -9
- package/crates/team-agent/src/coordinator/health.rs +65 -2
- package/crates/team-agent/src/coordinator/runtime_detectors.rs +28 -16
- package/crates/team-agent/src/coordinator/tests/a0_lostupdate.rs +87 -0
- package/crates/team-agent/src/coordinator/tests/mod.rs +1 -0
- package/crates/team-agent/src/coordinator/tick.rs +195 -43
- package/crates/team-agent/src/leader/helpers.rs +2 -0
- package/crates/team-agent/src/leader/rediscover.rs +1 -0
- package/crates/team-agent/src/leader/start.rs +9 -1
- package/crates/team-agent/src/leader/takeover.rs +18 -1
- package/crates/team-agent/src/lifecycle/launch.rs +434 -29
- package/crates/team-agent/src/lifecycle/profile_launch.rs +110 -4
- package/crates/team-agent/src/lifecycle/profile_smoke.rs +4 -1
- package/crates/team-agent/src/lifecycle/restart/common.rs +19 -2
- package/crates/team-agent/src/lifecycle/tests/agent_ops.rs +2 -2
- package/crates/team-agent/src/lifecycle/tests/core.rs +1 -1
- package/crates/team-agent/src/lifecycle/tests/lane_ops.rs +4 -4
- package/crates/team-agent/src/lifecycle/tests/launch_spawn.rs +3 -1
- package/crates/team-agent/src/lifecycle/worker_command_context.rs +44 -9
- package/crates/team-agent/src/mcp_server/lifecycle_tools/agent_ops.rs +2 -1
- package/crates/team-agent/src/mcp_server/tests/scoped.rs +14 -1
- package/crates/team-agent/src/mcp_server/tests/send.rs +15 -1
- package/crates/team-agent/src/mcp_server/tools.rs +65 -9
- package/crates/team-agent/src/mcp_server/wire.rs +2 -1
- package/crates/team-agent/src/message_store.rs +80 -0
- package/crates/team-agent/src/messaging/results.rs +76 -5
- package/crates/team-agent/src/messaging/send.rs +3 -1
- package/crates/team-agent/src/messaging/types.rs +15 -1
- package/crates/team-agent/src/messaging/watchers.rs +68 -30
- package/crates/team-agent/src/model/enums.rs +7 -1
- package/crates/team-agent/src/model/permissions.rs +7 -0
- package/crates/team-agent/src/model/spec.rs +3 -1
- package/crates/team-agent/src/provider/adapter.rs +472 -7
- package/crates/team-agent/src/provider/classify.rs +6 -2
- package/crates/team-agent/src/provider/faults.rs +3 -2
- package/crates/team-agent/src/provider/startup_prompt.rs +25 -7
- package/crates/team-agent/src/provider/types.rs +11 -0
- package/crates/team-agent/src/session_capture.rs +1 -0
- package/crates/team-agent/src/state/persist.rs +95 -19
- package/crates/team-agent/src/tmux_backend/tests.rs +8 -7
- package/crates/team-agent/src/tmux_backend.rs +80 -6
- package/crates/team-agent/src/transport.rs +32 -0
- package/npm/install.mjs +21 -0
- package/package.json +4 -4
|
@@ -17,7 +17,9 @@ use rusqlite::params;
|
|
|
17
17
|
compact: bool,
|
|
18
18
|
detail: bool,
|
|
19
19
|
) -> Result<Value, CliError> {
|
|
20
|
-
|
|
20
|
+
// commands.py:99 — `--json --detail` maps to compact=False: detail wins and
|
|
21
|
+
// returns the FULL payload.
|
|
22
|
+
let compact = compact && !detail;
|
|
21
23
|
let resolved_owner_team_id = resolve_status_owner_team(workspace, owner_team_id)?;
|
|
22
24
|
let owner_team_id = resolved_owner_team_id.as_deref().or(owner_team_id);
|
|
23
25
|
let health = crate::coordinator::coordinator_health(
|
|
@@ -63,7 +65,7 @@ use rusqlite::params;
|
|
|
63
65
|
"messages": message_counts(&conn, owner_team_id)?,
|
|
64
66
|
"queued_messages": queued_messages(&conn, owner_team_id, 8)?,
|
|
65
67
|
"results": result_counts(&conn, owner_team_id)?,
|
|
66
|
-
"latest_results":
|
|
68
|
+
"latest_results": latest_result_summaries(&store, owner_team_id)?,
|
|
67
69
|
"readiness": readiness,
|
|
68
70
|
"coordinator": coordinator_health_value(health),
|
|
69
71
|
"last_events": Value::Array(
|
|
@@ -90,11 +92,137 @@ use rusqlite::params;
|
|
|
90
92
|
owner_team_id: Option<&str>,
|
|
91
93
|
agent: Option<&str>,
|
|
92
94
|
) -> Result<String, CliError> {
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
95
|
+
match agent {
|
|
96
|
+
// queries.py:130-162 — the agent branch renders the multi-line agent detail
|
|
97
|
+
// from the FULL status payload; an unknown agent id errors.
|
|
98
|
+
Some(agent) => {
|
|
99
|
+
let status = status_scoped(workspace, state, owner_team_id, false, false)?;
|
|
100
|
+
format_agent_status(workspace, &status, agent)
|
|
101
|
+
}
|
|
102
|
+
None => {
|
|
103
|
+
let status = status_scoped(workspace, state, owner_team_id, true, false)?;
|
|
104
|
+
Ok(crate::cli::format_status_summary(&status))
|
|
105
|
+
}
|
|
106
|
+
}
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
/// `format_status` agent 分支(`queries.py:135-162`)。
|
|
110
|
+
fn format_agent_status(
|
|
111
|
+
workspace: &Path,
|
|
112
|
+
status: &Value,
|
|
113
|
+
agent_id: &str,
|
|
114
|
+
) -> Result<String, CliError> {
|
|
115
|
+
let agents = status.get("agents").and_then(Value::as_object);
|
|
116
|
+
let health = status.get("agent_health").and_then(Value::as_object);
|
|
117
|
+
let known = agents.is_some_and(|map| map.contains_key(agent_id))
|
|
118
|
+
|| health.is_some_and(|map| map.contains_key(agent_id));
|
|
119
|
+
if !known {
|
|
120
|
+
return Err(CliError::Runtime(format!("unknown agent id: {agent_id}")));
|
|
121
|
+
}
|
|
122
|
+
let empty = json!({});
|
|
123
|
+
let agent = agents
|
|
124
|
+
.and_then(|map| map.get(agent_id))
|
|
125
|
+
.unwrap_or(&empty);
|
|
126
|
+
let row = health.and_then(|map| map.get(agent_id)).unwrap_or(&empty);
|
|
127
|
+
let status_text = row
|
|
128
|
+
.get("status")
|
|
129
|
+
.and_then(Value::as_str)
|
|
130
|
+
.map(str::to_string)
|
|
131
|
+
.unwrap_or_else(||
|
|
132
|
+
|
|
133
|
+
agent_health_status_text(agent.get("status").and_then(Value::as_str).unwrap_or(""))
|
|
134
|
+
);
|
|
135
|
+
let tasks = status.get("tasks").and_then(Value::as_array).cloned().unwrap_or_default();
|
|
136
|
+
let task_id = current_task_for_agent(&tasks, agent_id).unwrap_or_else(|| "-".to_string());
|
|
137
|
+
let inbox_rows = crate::message_store::MessageStore::open(workspace)
|
|
138
|
+
.map_err(|e| CliError::Runtime(e.to_string()))?
|
|
139
|
+
.inbox(agent_id, 3, None)
|
|
140
|
+
.map_err(|e| CliError::Runtime(e.to_string()))?;
|
|
141
|
+
let mut lines = vec![
|
|
142
|
+
format!("{agent_id} {status_text}"),
|
|
143
|
+
format!(" provider: {}", py_get(agent, "provider")),
|
|
144
|
+
format!(" model: {}", py_get(agent, "model")),
|
|
145
|
+
format!(" profile: {}", py_get(agent, "profile")),
|
|
146
|
+
format!(" session_id: {}", py_get_or_dash(agent, "session_id")),
|
|
147
|
+
format!(" captured_via: {}", py_get_or_dash(agent, "captured_via")),
|
|
148
|
+
format!(
|
|
149
|
+
" attribution_confidence: {}",
|
|
150
|
+
py_get_or_dash(agent, "attribution_confidence")
|
|
151
|
+
),
|
|
152
|
+
format!(" task: {task_id}"),
|
|
153
|
+
format!(" handoff: {}", py_get(agent, "handoff_path")),
|
|
154
|
+
" recent messages:".to_string(),
|
|
155
|
+
];
|
|
156
|
+
if inbox_rows.is_empty() {
|
|
157
|
+
lines.push(" none".to_string());
|
|
158
|
+
} else {
|
|
159
|
+
for item in &inbox_rows {
|
|
160
|
+
let content = item.get("content").and_then(Value::as_str).unwrap_or("");
|
|
161
|
+
let content: String = content.chars().take(120).collect();
|
|
162
|
+
lines.push(format!(
|
|
163
|
+
" {} {} -> {} {}: {content}",
|
|
164
|
+
py_get_or_dash(item, "created_at"),
|
|
165
|
+
py_get_or_dash(item, "sender"),
|
|
166
|
+
py_get_or_dash(item, "recipient"),
|
|
167
|
+
py_get_or_dash(item, "status"),
|
|
168
|
+
));
|
|
169
|
+
}
|
|
170
|
+
}
|
|
171
|
+
Ok(lines.join("\n"))
|
|
172
|
+
}
|
|
173
|
+
|
|
174
|
+
/// `current_task_for_agent`(`approvals/status.py:127-132`)。
|
|
175
|
+
fn current_task_for_agent(tasks: &[Value], agent_id: &str) -> Option<String> {
|
|
176
|
+
const ACTIVE: [&str; 5] = ["pending", "ready", "running", "blocked", "needs_retry"];
|
|
177
|
+
for task in tasks.iter().rev() {
|
|
178
|
+
let assignee = task.get("assignee").and_then(Value::as_str);
|
|
179
|
+
let status = task.get("status").and_then(Value::as_str).unwrap_or("pending");
|
|
180
|
+
if assignee == Some(agent_id) && ACTIVE.contains(&status) {
|
|
181
|
+
return task.get("id").and_then(Value::as_str).map(str::to_string);
|
|
182
|
+
}
|
|
183
|
+
}
|
|
184
|
+
None
|
|
185
|
+
}
|
|
186
|
+
|
|
187
|
+
fn agent_health_status_text(status: &str) -> String {
|
|
188
|
+
serde_json::to_value(crate::provider::agent_health_status(status))
|
|
189
|
+
.ok()
|
|
190
|
+
.and_then(|v| v.as_str().map(str::to_string))
|
|
191
|
+
.unwrap_or_else(|| "-".to_string())
|
|
192
|
+
}
|
|
193
|
+
|
|
194
|
+
/// Python `agent.get(key, '-')`:键缺失 → `-`;键存在但为 null → 打印 `None`。
|
|
195
|
+
fn py_get(agent: &Value, key: &str) -> String {
|
|
196
|
+
match agent.get(key) {
|
|
197
|
+
None => "-".to_string(),
|
|
198
|
+
Some(Value::Null) => "None".to_string(),
|
|
199
|
+
Some(Value::String(s)) => s.clone(),
|
|
200
|
+
Some(other) => other.to_string(),
|
|
201
|
+
}
|
|
202
|
+
}
|
|
203
|
+
|
|
204
|
+
/// Python `agent.get(key) or '-'`:缺失/null/空串都落 `-`。
|
|
205
|
+
fn py_get_or_dash(agent: &Value, key: &str) -> String {
|
|
206
|
+
match agent.get(key) {
|
|
207
|
+
Some(Value::String(s)) if !s.is_empty() => s.clone(),
|
|
208
|
+
Some(Value::Number(n)) => n.to_string(),
|
|
209
|
+
_ => "-".to_string(),
|
|
210
|
+
}
|
|
211
|
+
}
|
|
212
|
+
|
|
213
|
+
/// `latest_result_summaries`(`queries.py:83-89`)。
|
|
214
|
+
fn latest_result_summaries(
|
|
215
|
+
store: &crate::message_store::MessageStore,
|
|
216
|
+
owner_team_id: Option<&str>,
|
|
217
|
+
) -> Result<Value, CliError> {
|
|
218
|
+
let rows = store
|
|
219
|
+
.latest_results(5, owner_team_id)
|
|
220
|
+
.map_err(|e| CliError::Runtime(e.to_string()))?;
|
|
221
|
+
Ok(Value::Array(
|
|
222
|
+
rows.iter()
|
|
223
|
+
.filter_map(crate::message_store::result_summary_from_row)
|
|
224
|
+
.collect(),
|
|
225
|
+
))
|
|
98
226
|
}
|
|
99
227
|
/// `status.approvals(workspace, agent_id)`(JSON)/`format_approvals`(人读)。
|
|
100
228
|
pub fn approvals(workspace: &Path, agent: Option<&str>, as_json: bool) -> Result<Value, CliError> {
|
|
@@ -320,7 +320,12 @@ tasks:
|
|
|
320
320
|
// hinges solely on the cli_prompt_ready derivation.)
|
|
321
321
|
#[test]
|
|
322
322
|
fn contract_alive_worker_running_is_cli_prompt_ready_and_ready() {
|
|
323
|
-
|
|
323
|
+
// A-5: missing leader_receiver no longer counts as attached; this contract's
|
|
324
|
+
// subject is the cli_prompt_ready derivation, so the fixture carries an
|
|
325
|
+
// attached receiver to keep `ready` hinging on it alone.
|
|
326
|
+
let state = serde_json::json!({
|
|
327
|
+
"leader_receiver": {"status": "attached", "pane_id": "%9"},
|
|
328
|
+
"agents": {"w1": {
|
|
324
329
|
"status": "running",
|
|
325
330
|
"pane_id": "%1",
|
|
326
331
|
"mcp_ready": true,
|
|
@@ -361,6 +366,8 @@ tasks:
|
|
|
361
366
|
let state = serde_json::json!({
|
|
362
367
|
"session_name": "ta-fake",
|
|
363
368
|
"tmux_session_present": true, // golden status() top-level signal: the live tmux session exists
|
|
369
|
+
// A-5: missing leader_receiver no longer counts as attached (see above).
|
|
370
|
+
"leader_receiver": {"status": "attached", "pane_id": "%9"},
|
|
364
371
|
"agents": { "w1": {
|
|
365
372
|
"status": "running",
|
|
366
373
|
"mcp_config": mcp.to_string_lossy(),
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
//! B5/F1 · `sessions_to_kill_sparing_leader` 纯函数单测(kill 决策下沉后锁定)。
|
|
2
|
+
//!
|
|
3
|
+
//! 真相源 = `team-agent-leader-` 确定性命名前缀(leader/start.rs LEADER_SESSION_PREFIX);
|
|
4
|
+
//! 集成面由 tests/b5_leader_terminal_kill_red.rs 的真 tmux 契约覆盖,此处锁纯决策。
|
|
5
|
+
|
|
6
|
+
use crate::cli::lifecycle_port::sessions_to_kill_sparing_leader;
|
|
7
|
+
use crate::transport::SessionName;
|
|
8
|
+
|
|
9
|
+
fn names(raw: &[&str]) -> Vec<SessionName> {
|
|
10
|
+
raw.iter().map(|name| SessionName::new(*name)).collect()
|
|
11
|
+
}
|
|
12
|
+
|
|
13
|
+
#[test]
|
|
14
|
+
fn no_leader_session_means_whole_server_kill() {
|
|
15
|
+
assert_eq!(sessions_to_kill_sparing_leader(&names(&["team-x"])), None);
|
|
16
|
+
assert_eq!(sessions_to_kill_sparing_leader(&[]), None);
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
#[test]
|
|
20
|
+
fn leader_session_present_kills_only_non_leader_sessions() {
|
|
21
|
+
let sessions = names(&[
|
|
22
|
+
"team-agent-leader-claude-myws-deadbeef",
|
|
23
|
+
"team-x",
|
|
24
|
+
"team-y",
|
|
25
|
+
]);
|
|
26
|
+
let to_kill = sessions_to_kill_sparing_leader(&sessions)
|
|
27
|
+
.expect("leader present must switch to per-session kills");
|
|
28
|
+
assert_eq!(to_kill, names(&["team-x", "team-y"]));
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
#[test]
|
|
32
|
+
fn only_leader_sessions_left_kills_nothing_but_keeps_server() {
|
|
33
|
+
let sessions = names(&["team-agent-leader-codex-myws-cafe0123"]);
|
|
34
|
+
assert_eq!(
|
|
35
|
+
sessions_to_kill_sparing_leader(&sessions),
|
|
36
|
+
Some(Vec::new()),
|
|
37
|
+
"a socket holding only leader sessions must not be torn down"
|
|
38
|
+
);
|
|
39
|
+
}
|
|
@@ -513,10 +513,14 @@ pub struct DoctorArgs {
|
|
|
513
513
|
}
|
|
514
514
|
|
|
515
515
|
/// `doctor --gate` 选择(`commands.py:218-236`;clap choices)。
|
|
516
|
-
|
|
516
|
+
/// swallow batch 3: an unrecognized gate is carried verbatim so the doctor exit can
|
|
517
|
+
/// refuse with `unknown_gate` (Python commands.py:234-235 raises) instead of silently
|
|
518
|
+
/// falling through to the default doctor (empty green).
|
|
519
|
+
#[derive(Debug, Clone, PartialEq, Eq)]
|
|
517
520
|
pub enum DoctorGate {
|
|
518
521
|
Orphans,
|
|
519
522
|
Comms,
|
|
523
|
+
Unknown(String),
|
|
520
524
|
}
|
|
521
525
|
|
|
522
526
|
/// `sessions`(`parser.py:230`)。
|
|
@@ -68,8 +68,25 @@ pub fn run_daemon_with_coordinator(
|
|
|
68
68
|
Some(v) if v > 0.0 => v,
|
|
69
69
|
_ => resolve_tick_interval(&args.workspace)?,
|
|
70
70
|
};
|
|
71
|
+
// P7 (Gap 37b, Python __main__.py:44-59): capture the original parent BEFORE the
|
|
72
|
+
// loop; the orphan predicate fires only on the literal triple condition
|
|
73
|
+
// (ppid changed ∧ reparented to pid 1 ∧ workspace gone) — never wider.
|
|
74
|
+
let initial_ppid = current_ppid();
|
|
71
75
|
let mut consecutive_failures = 0_u32;
|
|
76
|
+
let mut last_failure_signature: Option<String> = None;
|
|
72
77
|
loop {
|
|
78
|
+
let ppid_now = current_ppid();
|
|
79
|
+
if super::should_orphan_self_terminate(initial_ppid, ppid_now, &args.workspace) {
|
|
80
|
+
let _ = event_log.write(
|
|
81
|
+
"coordinator.orphan_self_terminate",
|
|
82
|
+
serde_json::json!({
|
|
83
|
+
"initial_ppid": initial_ppid,
|
|
84
|
+
"current_ppid": ppid_now,
|
|
85
|
+
"workspace": args.workspace.as_path().to_string_lossy(),
|
|
86
|
+
}),
|
|
87
|
+
);
|
|
88
|
+
break;
|
|
89
|
+
}
|
|
73
90
|
match coordinator.tick() {
|
|
74
91
|
Ok(report) => {
|
|
75
92
|
if consecutive_failures > 0 {
|
|
@@ -78,6 +95,7 @@ pub fn run_daemon_with_coordinator(
|
|
|
78
95
|
serde_json::json!({"consecutive_failures": consecutive_failures}),
|
|
79
96
|
)?;
|
|
80
97
|
consecutive_failures = 0;
|
|
98
|
+
last_failure_signature = None;
|
|
81
99
|
}
|
|
82
100
|
if report.stop || args.once {
|
|
83
101
|
break;
|
|
@@ -87,15 +105,40 @@ pub fn run_daemon_with_coordinator(
|
|
|
87
105
|
Err(err) => {
|
|
88
106
|
consecutive_failures = consecutive_failures.saturating_add(1);
|
|
89
107
|
let next_sleep_sec = backoff_sleep_sec(tick_interval, consecutive_failures);
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
108
|
+
// P7-F2 (Python __main__.py:66-89): identical-signature failures emit
|
|
109
|
+
// ONE full tick_error; repeats only write `.suppressed` companions,
|
|
110
|
+
// except the Python periodic re-emit tiers (failure #1, every 12th
|
|
111
|
+
// failure, or the 40s/60s backoff steps).
|
|
112
|
+
let signature: String = err.to_string().chars().take(200).collect();
|
|
113
|
+
let signature_changed =
|
|
114
|
+
last_failure_signature.as_deref() != Some(signature.as_str());
|
|
115
|
+
if signature_changed {
|
|
116
|
+
last_failure_signature = Some(signature);
|
|
117
|
+
}
|
|
118
|
+
if signature_changed
|
|
119
|
+
|| consecutive_failures == 1
|
|
120
|
+
|| consecutive_failures % 12 == 0
|
|
121
|
+
|| next_sleep_sec == 40.0
|
|
122
|
+
|| next_sleep_sec == 60.0
|
|
123
|
+
{
|
|
124
|
+
event_log.write(
|
|
125
|
+
"coordinator.tick_error",
|
|
126
|
+
serde_json::json!({
|
|
127
|
+
"error": err.to_string(),
|
|
128
|
+
"exc_type": "TickError",
|
|
129
|
+
"consecutive_failures": consecutive_failures,
|
|
130
|
+
"next_sleep_sec": next_sleep_sec,
|
|
131
|
+
}),
|
|
132
|
+
)?;
|
|
133
|
+
} else {
|
|
134
|
+
event_log.write(
|
|
135
|
+
"coordinator.tick_error.suppressed",
|
|
136
|
+
serde_json::json!({
|
|
137
|
+
"consecutive_failures": consecutive_failures,
|
|
138
|
+
"next_sleep_sec": next_sleep_sec,
|
|
139
|
+
}),
|
|
140
|
+
)?;
|
|
141
|
+
}
|
|
99
142
|
if args.once {
|
|
100
143
|
return Err(DaemonError::Tick(err));
|
|
101
144
|
}
|
|
@@ -107,6 +150,11 @@ pub fn run_daemon_with_coordinator(
|
|
|
107
150
|
Ok(())
|
|
108
151
|
}
|
|
109
152
|
|
|
153
|
+
/// 当前 ppid(`os.getppid()`,孤儿自检输入)。
|
|
154
|
+
fn current_ppid() -> u32 {
|
|
155
|
+
u32::try_from(unsafe { libc::getppid() }).unwrap_or(0)
|
|
156
|
+
}
|
|
157
|
+
|
|
110
158
|
/// 计算 tick 间隔(`_tick_interval`,`__main__.py:104-115`)。读 spec `runtime.tick_interval_sec`,
|
|
111
159
|
/// 缺失/出错 → `DEFAULT_TICK_INTERVAL_SEC`;并确保 schema 存在(`MessageStore(workspace)`)。
|
|
112
160
|
pub fn resolve_tick_interval(workspace: &WorkspacePath) -> Result<f64, TickError> {
|
|
@@ -425,7 +425,7 @@ pub fn write_coordinator_metadata(
|
|
|
425
425
|
std::fs::write(path, text)
|
|
426
426
|
}
|
|
427
427
|
|
|
428
|
-
fn message_store_schema_health(workspace: &WorkspacePath) -> SchemaHealth {
|
|
428
|
+
pub(crate) fn message_store_schema_health(workspace: &WorkspacePath) -> SchemaHealth {
|
|
429
429
|
match MessageStore::open(workspace.as_path()) {
|
|
430
430
|
Ok(_) => SchemaHealth {
|
|
431
431
|
ok: true,
|
|
@@ -490,7 +490,17 @@ pub fn collect_watch_lines(
|
|
|
490
490
|
store: &MessageStore,
|
|
491
491
|
team: Option<&str>,
|
|
492
492
|
) -> Result<Vec<String>, WatchError> {
|
|
493
|
-
let
|
|
493
|
+
let mut lines = collect_event_lines(workspace, cursor, team)?;
|
|
494
|
+
lines.extend(collect_result_lines(workspace, cursor, store, team)?);
|
|
495
|
+
Ok(lines)
|
|
496
|
+
}
|
|
497
|
+
|
|
498
|
+
/// `_collect_event_lines`(`watch.py:66-97`):tail events.jsonl,按 team 过滤。
|
|
499
|
+
fn collect_event_lines(
|
|
500
|
+
workspace: &WorkspacePath,
|
|
501
|
+
cursor: &mut WatchCursor,
|
|
502
|
+
team: Option<&str>,
|
|
503
|
+
) -> Result<Vec<String>, WatchError> {
|
|
494
504
|
let logs = crate::model::paths::logs_dir(workspace.as_path());
|
|
495
505
|
let events_path = logs.join("events.jsonl");
|
|
496
506
|
let archive_path = logs.join("events.jsonl.1");
|
|
@@ -521,6 +531,10 @@ pub fn collect_watch_lines(
|
|
|
521
531
|
cursor.initialized = true;
|
|
522
532
|
for line in text.lines() {
|
|
523
533
|
if let Ok(event) = serde_json::from_str::<Value>(line) {
|
|
534
|
+
// watch.py:91 — `if team and _event_team_id(event) != team: continue`.
|
|
535
|
+
if team.is_some() && event_team_id(&event).as_deref() != team {
|
|
536
|
+
continue;
|
|
537
|
+
}
|
|
524
538
|
if let Some(rendered) = render_event_line(&event) {
|
|
525
539
|
lines.push(rendered);
|
|
526
540
|
}
|
|
@@ -529,6 +543,55 @@ pub fn collect_watch_lines(
|
|
|
529
543
|
Ok(lines)
|
|
530
544
|
}
|
|
531
545
|
|
|
546
|
+
/// `_event_team_id`(`watch.py:132-134`)。
|
|
547
|
+
fn event_team_id(event: &Value) -> Option<String> {
|
|
548
|
+
["team_id", "owner_team_id", "team"]
|
|
549
|
+
.iter()
|
|
550
|
+
.find_map(|key| event.get(*key))
|
|
551
|
+
.and_then(|value| match value {
|
|
552
|
+
Value::String(s) if !s.is_empty() => Some(s.clone()),
|
|
553
|
+
Value::Number(n) => Some(n.to_string()),
|
|
554
|
+
_ => None,
|
|
555
|
+
})
|
|
556
|
+
}
|
|
557
|
+
|
|
558
|
+
/// `_collect_result_lines`(`watch.py:100-112`):store.latest_results(owner_team_id=team)
|
|
559
|
+
/// 出 `result_received: {agent} -> {summary}` 行;按 cursor.seen_result_ids 去重。
|
|
560
|
+
fn collect_result_lines(
|
|
561
|
+
workspace: &WorkspacePath,
|
|
562
|
+
cursor: &mut WatchCursor,
|
|
563
|
+
store: &MessageStore,
|
|
564
|
+
team: Option<&str>,
|
|
565
|
+
) -> Result<Vec<String>, WatchError> {
|
|
566
|
+
let db_path = crate::model::paths::runtime_dir(workspace.as_path()).join("team.db");
|
|
567
|
+
if !db_path.exists() {
|
|
568
|
+
return Ok(Vec::new());
|
|
569
|
+
}
|
|
570
|
+
let mut lines = Vec::new();
|
|
571
|
+
for row in store.latest_results(20, team)? {
|
|
572
|
+
let Some(result_id) = row
|
|
573
|
+
.get("result_id")
|
|
574
|
+
.and_then(Value::as_str)
|
|
575
|
+
.filter(|id| !id.is_empty())
|
|
576
|
+
.map(str::to_string)
|
|
577
|
+
else {
|
|
578
|
+
continue;
|
|
579
|
+
};
|
|
580
|
+
if !cursor.seen_result_ids.insert(result_id) {
|
|
581
|
+
continue;
|
|
582
|
+
}
|
|
583
|
+
let mut summary = crate::message_store::result_summary_from_row(&row)
|
|
584
|
+
.unwrap_or_else(|| serde_json::json!({}));
|
|
585
|
+
if let Some(obj) = summary.as_object_mut() {
|
|
586
|
+
obj.insert("event".to_string(), Value::String("result_received".to_string()));
|
|
587
|
+
}
|
|
588
|
+
if let Some(rendered) = render_event_line(&summary) {
|
|
589
|
+
lines.push(rendered);
|
|
590
|
+
}
|
|
591
|
+
}
|
|
592
|
+
Ok(lines)
|
|
593
|
+
}
|
|
594
|
+
|
|
532
595
|
/// `render_event_line`(`watch.py:46-63`)。把一条 step 4 事件渲染成人类可读行;非可渲染事件 → `None`。
|
|
533
596
|
/// 消费的事件类型:`result_received` / `leader_receiver.{injected,submitted}` / `send.failed` /
|
|
534
597
|
/// `leader_receiver.rebind_required` / `leader.api_error`(card 表)。
|
|
@@ -54,18 +54,23 @@ fn detect_compaction(
|
|
|
54
54
|
.as_ref()
|
|
55
55
|
.map(|team| team.as_str().to_string())
|
|
56
56
|
.unwrap_or_else(|| crate::state::projection::team_state_key(state));
|
|
57
|
-
let current = update_compaction_count(state, &team, &fact.agent_id, count);
|
|
57
|
+
let (previous, current) = update_compaction_count(state, &team, &fact.agent_id, count);
|
|
58
58
|
let provider = fact.provider;
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
"
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
59
|
+
// P4 (C-P4-1, N35 anti-nag): the event is CHANGE-driven — an unchanged compaction
|
|
60
|
+
// count must not re-emit every tick (live sample: 1037 identical events / 19.5min).
|
|
61
|
+
// A changed value still emits (value-keyed dedup, not blanket suppression).
|
|
62
|
+
if current != previous {
|
|
63
|
+
let _ = event_log.write(
|
|
64
|
+
"coordinator.compaction_observed",
|
|
65
|
+
json!({
|
|
66
|
+
"agent_id": fact.agent_id.as_str(),
|
|
67
|
+
"provider": provider.map(provider_name),
|
|
68
|
+
"team": team,
|
|
69
|
+
"compaction_count": current,
|
|
70
|
+
"stuck_loop": false,
|
|
71
|
+
}),
|
|
72
|
+
);
|
|
73
|
+
}
|
|
69
74
|
let threshold = compaction_reset_threshold(state);
|
|
70
75
|
let recommendation = if provider == Some(Provider::Codex) && current >= threshold {
|
|
71
76
|
let message = format!(
|
|
@@ -231,15 +236,21 @@ fn count_compaction_markers(scrollback: &str) -> i64 {
|
|
|
231
236
|
+ lower.matches("compaction occurred").count() as i64
|
|
232
237
|
}
|
|
233
238
|
|
|
234
|
-
|
|
239
|
+
/// Returns `(previous, current)` so the caller can emit change-driven events (P4).
|
|
240
|
+
fn update_compaction_count(
|
|
241
|
+
state: &mut Value,
|
|
242
|
+
team: &str,
|
|
243
|
+
agent_id: &AgentId,
|
|
244
|
+
count: i64,
|
|
245
|
+
) -> (i64, i64) {
|
|
235
246
|
let Some(coordinator) = coordinator_object_mut(state) else {
|
|
236
|
-
return count;
|
|
247
|
+
return (0, count);
|
|
237
248
|
};
|
|
238
249
|
let Some(counts) = object_field_mut(coordinator, "compaction_counts") else {
|
|
239
|
-
return count;
|
|
250
|
+
return (0, count);
|
|
240
251
|
};
|
|
241
252
|
let Some(team_counts) = object_field_mut(counts, team) else {
|
|
242
|
-
return count;
|
|
253
|
+
return (0, count);
|
|
243
254
|
};
|
|
244
255
|
let previous = team_counts
|
|
245
256
|
.get(agent_id.as_str())
|
|
@@ -247,7 +258,7 @@ fn update_compaction_count(state: &mut Value, team: &str, agent_id: &AgentId, co
|
|
|
247
258
|
.unwrap_or(0);
|
|
248
259
|
let current = previous.max(count);
|
|
249
260
|
team_counts.insert(agent_id.as_str().to_string(), json!(current));
|
|
250
|
-
current
|
|
261
|
+
(previous, current)
|
|
251
262
|
}
|
|
252
263
|
|
|
253
264
|
fn compaction_reset_threshold(state: &Value) -> i64 {
|
|
@@ -448,6 +459,7 @@ fn provider_name(provider: Provider) -> &'static str {
|
|
|
448
459
|
Provider::Claude => "claude",
|
|
449
460
|
Provider::ClaudeCode => "claude_code",
|
|
450
461
|
Provider::Codex => "codex",
|
|
462
|
+
Provider::Copilot => "copilot",
|
|
451
463
|
Provider::GeminiCli => "gemini_cli",
|
|
452
464
|
Provider::Fake => "fake",
|
|
453
465
|
}
|
|
@@ -0,0 +1,87 @@
|
|
|
1
|
+
use super::*;
|
|
2
|
+
|
|
3
|
+
// ═════════════════════════════════════════════════════════════════════════
|
|
4
|
+
// A0 GREEN regression lock, integration tier (save_hook window injection).
|
|
5
|
+
// Locate doc: .team/artifacts/a0-rs-lostupdate-locate.md §5.4.
|
|
6
|
+
//
|
|
7
|
+
// Python 0.2.11 A0: the coordinator tick loads state (tick window opens), mutates in
|
|
8
|
+
// memory for seconds, then whole-file-saves with no merge — an add-agent registration
|
|
9
|
+
// landing inside that window is permanently overwritten (state.py:493). RS blocks this
|
|
10
|
+
// via the in-lock reload+merge at the save chokepoint (persist.rs:210-221, 272-313).
|
|
11
|
+
//
|
|
12
|
+
// This test pins the END-TO-END guard through the REAL tick: the `save_hook` seam
|
|
13
|
+
// (tick.rs save point) lets us deterministically land a concurrent registration on disk
|
|
14
|
+
// AFTER tick's load and BEFORE tick's save, then delegate to the real
|
|
15
|
+
// `save_team_scoped_state`. Zero sleeps, zero real races — ordering is fixed by the
|
|
16
|
+
// hook call order.
|
|
17
|
+
// ═════════════════════════════════════════════════════════════════════════
|
|
18
|
+
|
|
19
|
+
#[test]
|
|
20
|
+
fn a0_green_lock_tick_save_preserves_registration_landed_after_tick_load() {
|
|
21
|
+
let dir = std::env::temp_dir().join(format!(
|
|
22
|
+
"team-agent-coord-a0-{}-{}",
|
|
23
|
+
std::process::id(),
|
|
24
|
+
std::time::SystemTime::now()
|
|
25
|
+
.duration_since(std::time::UNIX_EPOCH)
|
|
26
|
+
.unwrap()
|
|
27
|
+
.as_nanos()
|
|
28
|
+
));
|
|
29
|
+
std::fs::create_dir_all(&dir).unwrap();
|
|
30
|
+
crate::state::persist::save_runtime_state(
|
|
31
|
+
&dir,
|
|
32
|
+
&serde_json::json!({
|
|
33
|
+
"session_name": "team-a0",
|
|
34
|
+
"active_team_key": "team-a0",
|
|
35
|
+
"agents": { "w1": { "provider": "codex", "status": "running", "agent_id": "w1", "window": "w1" } },
|
|
36
|
+
}),
|
|
37
|
+
)
|
|
38
|
+
.unwrap();
|
|
39
|
+
|
|
40
|
+
// The hook runs at tick's atomic-save point: first simulate the concurrent
|
|
41
|
+
// add-agent registration landing on disk (raw write, as another process would),
|
|
42
|
+
// then run the REAL save path with tick's stale in-memory state.
|
|
43
|
+
let hook: SaveHook = Box::new(|ws, tick_state| {
|
|
44
|
+
let path = crate::state::persist::runtime_state_path(ws.as_path());
|
|
45
|
+
let mut latest: serde_json::Value =
|
|
46
|
+
serde_json::from_str(&std::fs::read_to_string(&path).unwrap()).unwrap();
|
|
47
|
+
latest["agents"]["joiner"] = serde_json::json!({
|
|
48
|
+
"provider": "codex", "status": "running", "agent_id": "joiner", "window": "joiner",
|
|
49
|
+
});
|
|
50
|
+
std::fs::write(&path, serde_json::to_string_pretty(&latest).unwrap()).unwrap();
|
|
51
|
+
crate::state::projection::save_team_scoped_state(ws.as_path(), tick_state)
|
|
52
|
+
});
|
|
53
|
+
|
|
54
|
+
let ws = WorkspacePath::new(dir.clone());
|
|
55
|
+
let reg: Box<dyn ProviderRegistry> = Box::new(MockRegistry::new(&[], &[]));
|
|
56
|
+
let transport = MockTransport::new(true);
|
|
57
|
+
let coord = Coordinator::for_test(ws, reg, Box::new(transport), Some(hook), None);
|
|
58
|
+
let report = coord.tick().expect("tick should complete");
|
|
59
|
+
assert!(report.ok, "tick must not degrade; report={report:?}");
|
|
60
|
+
|
|
61
|
+
let saved: serde_json::Value = serde_json::from_str(
|
|
62
|
+
&std::fs::read_to_string(crate::state::persist::runtime_state_path(&dir)).unwrap(),
|
|
63
|
+
)
|
|
64
|
+
.unwrap();
|
|
65
|
+
assert!(
|
|
66
|
+
saved
|
|
67
|
+
.pointer("/agents/joiner")
|
|
68
|
+
.is_some_and(serde_json::Value::is_object),
|
|
69
|
+
"A0 GREEN lock: a registration landing between tick load and tick save must \
|
|
70
|
+
survive the tick's save (in-lock reload+merge, persist.rs:272-313); saved={saved}"
|
|
71
|
+
);
|
|
72
|
+
// 0.3.5 integration re-anchor (P3 / perf C-P3-1): the tick iteration counter moved
|
|
73
|
+
// OUT of persistent state into .team/runtime/coordinator_tick.json — state.json is
|
|
74
|
+
// counter-free BY DESIGN (p3_steady_tick_no_state_write). The original proxy
|
|
75
|
+
// ("tick's save really happened") is preserved via the counter metadata file.
|
|
76
|
+
let tick_meta: serde_json::Value = serde_json::from_str(
|
|
77
|
+
&std::fs::read_to_string(dir.join(".team/runtime/coordinator_tick.json")).unwrap(),
|
|
78
|
+
)
|
|
79
|
+
.unwrap();
|
|
80
|
+
assert!(
|
|
81
|
+
tick_meta
|
|
82
|
+
.get("coordinator_tick_iteration_count")
|
|
83
|
+
.and_then(serde_json::Value::as_u64)
|
|
84
|
+
.is_some_and(|count| count >= 1),
|
|
85
|
+
"the tick really ran (its iteration counter metadata landed); tick_meta={tick_meta}"
|
|
86
|
+
);
|
|
87
|
+
}
|