@team-agent/installer 0.3.10 → 0.3.12
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/Cargo.lock +1 -1
- package/Cargo.toml +1 -1
- package/crates/team-agent/src/cli/send.rs +9 -2
- package/crates/team-agent/src/coordinator/backoff.rs +83 -2
- package/crates/team-agent/src/coordinator/health.rs +63 -3
- package/crates/team-agent/src/coordinator/tick.rs +327 -167
- package/crates/team-agent/src/mcp_server/helpers.rs +24 -5
- package/crates/team-agent/src/mcp_server/normalize.rs +13 -6
- package/crates/team-agent/src/mcp_server/tests/send.rs +310 -212
- package/crates/team-agent/src/messaging/helpers.rs +30 -10
- package/crates/team-agent/src/messaging/send.rs +71 -14
- package/crates/team-agent/src/messaging/tests/basic.rs +25 -7
- package/crates/team-agent/src/messaging/tests/runtime.rs +489 -125
- package/crates/team-agent/src/messaging/types.rs +19 -4
- package/package.json +4 -4
package/Cargo.lock
CHANGED
package/Cargo.toml
CHANGED
|
@@ -22,7 +22,9 @@ pub fn cmd_send(args: &SendArgs) -> Result<CmdResult, CliError> {
|
|
|
22
22
|
// empty message body is a prompt-only invocation (`team-agent send "fix the build"`).
|
|
23
23
|
// The lone positional is CONTENT, not a target — reject with `routing_ambiguous`
|
|
24
24
|
// (NOT `target_not_in_team`, which would lie that the user did pick a target).
|
|
25
|
-
if let Some(amb) =
|
|
25
|
+
if let Some(amb) =
|
|
26
|
+
routing_ambiguous_value(&selected.run_workspace, args, &target, &content, &opts)
|
|
27
|
+
{
|
|
26
28
|
return Ok(CmdResult::from_json(amb, args.json));
|
|
27
29
|
}
|
|
28
30
|
let outcome = messaging::send_message(&selected.run_workspace, &target, &content, &opts)?;
|
|
@@ -117,7 +119,10 @@ fn watch_notice_json(target: &MessageTarget, opts: &SendOptions) -> Value {
|
|
|
117
119
|
let agent_id = match target {
|
|
118
120
|
MessageTarget::Single(agent) => agent.clone(),
|
|
119
121
|
MessageTarget::Broadcast => "*".to_string(),
|
|
120
|
-
MessageTarget::Fanout(recipients) => recipients
|
|
122
|
+
MessageTarget::Fanout(recipients) => recipients
|
|
123
|
+
.first()
|
|
124
|
+
.cloned()
|
|
125
|
+
.unwrap_or_else(|| "-".to_string()),
|
|
121
126
|
};
|
|
122
127
|
json!({
|
|
123
128
|
"status": "registered",
|
|
@@ -174,6 +179,7 @@ fn delivery_status_wire(status: DeliveryStatus) -> &'static str {
|
|
|
174
179
|
DeliveryStatus::Queued => "queued",
|
|
175
180
|
DeliveryStatus::Blocked => "blocked",
|
|
176
181
|
DeliveryStatus::Refused => "refused",
|
|
182
|
+
DeliveryStatus::Degraded => "degraded",
|
|
177
183
|
DeliveryStatus::RetryScheduled => "retry_scheduled",
|
|
178
184
|
DeliveryStatus::TrustAutoAnswerExhausted => "trust_auto_answer_exhausted",
|
|
179
185
|
DeliveryStatus::AlreadyDelivered => "already_delivered",
|
|
@@ -195,6 +201,7 @@ fn delivery_refusal_wire(reason: DeliveryRefusal) -> &'static str {
|
|
|
195
201
|
DeliveryRefusal::TmuxTargetMissing => "tmux_target_missing",
|
|
196
202
|
DeliveryRefusal::MessageAlreadyClaimed => "message_already_claimed",
|
|
197
203
|
DeliveryRefusal::LeaderNotAttached => "leader_not_attached",
|
|
204
|
+
DeliveryRefusal::CoordinatorUnavailable => "coordinator_unavailable",
|
|
198
205
|
DeliveryRefusal::NoCallerPane => "no_caller_pane",
|
|
199
206
|
DeliveryRefusal::TeamOwnerMismatch => "team_owner_mismatch",
|
|
200
207
|
DeliveryRefusal::Ambiguous => "ambiguous",
|
|
@@ -8,7 +8,7 @@ use crate::model::enums::Provider;
|
|
|
8
8
|
use crate::provider::ProviderAdapter;
|
|
9
9
|
|
|
10
10
|
use super::health::{coordinator_pid_path, write_coordinator_metadata};
|
|
11
|
-
use super::tick::TickError;
|
|
11
|
+
use super::tick::{TickError, TickReport};
|
|
12
12
|
use super::types::{
|
|
13
13
|
ErrorLists, MetadataSource, Pid, ProviderRegistry, WorkspacePath, BACKOFF_MAX_SEC,
|
|
14
14
|
DEFAULT_TICK_INTERVAL_SEC,
|
|
@@ -87,7 +87,7 @@ pub fn run_daemon_with_coordinator(
|
|
|
87
87
|
);
|
|
88
88
|
break;
|
|
89
89
|
}
|
|
90
|
-
match coordinator.tick() {
|
|
90
|
+
match run_tick_with_panic_marker(&event_log, || coordinator.tick()) {
|
|
91
91
|
Ok(report) => {
|
|
92
92
|
if consecutive_failures > 0 {
|
|
93
93
|
event_log.write(
|
|
@@ -150,6 +150,36 @@ pub fn run_daemon_with_coordinator(
|
|
|
150
150
|
Ok(())
|
|
151
151
|
}
|
|
152
152
|
|
|
153
|
+
fn run_tick_with_panic_marker<F>(event_log: &EventLog, tick: F) -> Result<TickReport, TickError>
|
|
154
|
+
where
|
|
155
|
+
F: FnOnce() -> Result<TickReport, TickError>,
|
|
156
|
+
{
|
|
157
|
+
match std::panic::catch_unwind(std::panic::AssertUnwindSafe(tick)) {
|
|
158
|
+
Ok(result) => result,
|
|
159
|
+
Err(payload) => {
|
|
160
|
+
let panic_message = panic_payload_message(payload.as_ref());
|
|
161
|
+
event_log.write(
|
|
162
|
+
"coordinator.tick_panic",
|
|
163
|
+
serde_json::json!({
|
|
164
|
+
"panic": panic_message,
|
|
165
|
+
"backtrace": std::backtrace::Backtrace::force_capture().to_string(),
|
|
166
|
+
}),
|
|
167
|
+
)?;
|
|
168
|
+
Err(TickError::Panic(panic_message))
|
|
169
|
+
}
|
|
170
|
+
}
|
|
171
|
+
}
|
|
172
|
+
|
|
173
|
+
fn panic_payload_message(payload: &(dyn std::any::Any + Send)) -> String {
|
|
174
|
+
if let Some(message) = payload.downcast_ref::<&str>() {
|
|
175
|
+
(*message).to_string()
|
|
176
|
+
} else if let Some(message) = payload.downcast_ref::<String>() {
|
|
177
|
+
message.clone()
|
|
178
|
+
} else {
|
|
179
|
+
"non-string panic payload".to_string()
|
|
180
|
+
}
|
|
181
|
+
}
|
|
182
|
+
|
|
153
183
|
/// 当前 ppid(`os.getppid()`,孤儿自检输入)。
|
|
154
184
|
fn current_ppid() -> u32 {
|
|
155
185
|
u32::try_from(unsafe { libc::getppid() }).unwrap_or(0)
|
|
@@ -199,3 +229,54 @@ pub enum DaemonError {
|
|
|
199
229
|
#[error("tick: {0}")]
|
|
200
230
|
Tick(#[from] TickError),
|
|
201
231
|
}
|
|
232
|
+
|
|
233
|
+
#[cfg(test)]
|
|
234
|
+
mod tests {
|
|
235
|
+
use super::*;
|
|
236
|
+
|
|
237
|
+
fn tmp_ws(tag: &str) -> std::path::PathBuf {
|
|
238
|
+
use std::sync::atomic::{AtomicU64, Ordering};
|
|
239
|
+
static N: AtomicU64 = AtomicU64::new(0);
|
|
240
|
+
let n = N.fetch_add(1, Ordering::Relaxed);
|
|
241
|
+
let path =
|
|
242
|
+
std::env::temp_dir().join(format!("ta-rs-coord-{tag}-{}-{n}", std::process::id()));
|
|
243
|
+
std::fs::create_dir_all(&path).unwrap();
|
|
244
|
+
path
|
|
245
|
+
}
|
|
246
|
+
|
|
247
|
+
#[test]
|
|
248
|
+
fn coordinator_tick_panic_writes_durable_marker() {
|
|
249
|
+
let workspace = tmp_ws("tick-panic");
|
|
250
|
+
let event_log = EventLog::new(&workspace);
|
|
251
|
+
|
|
252
|
+
let old_hook = std::panic::take_hook();
|
|
253
|
+
std::panic::set_hook(Box::new(|_| {}));
|
|
254
|
+
let result = run_tick_with_panic_marker(&event_log, || -> Result<TickReport, TickError> {
|
|
255
|
+
panic!("synthetic tick panic")
|
|
256
|
+
});
|
|
257
|
+
std::panic::set_hook(old_hook);
|
|
258
|
+
|
|
259
|
+
assert!(
|
|
260
|
+
matches!(result, Err(TickError::Panic(message)) if message == "synthetic tick panic")
|
|
261
|
+
);
|
|
262
|
+
let events = event_log.tail(20).unwrap();
|
|
263
|
+
let panic_event = events
|
|
264
|
+
.iter()
|
|
265
|
+
.find(|event| {
|
|
266
|
+
event.get("event").and_then(serde_json::Value::as_str)
|
|
267
|
+
== Some("coordinator.tick_panic")
|
|
268
|
+
})
|
|
269
|
+
.expect("coordinator.tick_panic event");
|
|
270
|
+
assert_eq!(
|
|
271
|
+
panic_event.get("panic").and_then(serde_json::Value::as_str),
|
|
272
|
+
Some("synthetic tick panic")
|
|
273
|
+
);
|
|
274
|
+
assert!(
|
|
275
|
+
panic_event
|
|
276
|
+
.get("backtrace")
|
|
277
|
+
.and_then(serde_json::Value::as_str)
|
|
278
|
+
.is_some_and(|backtrace| !backtrace.is_empty()),
|
|
279
|
+
"panic marker must carry a backtrace; event={panic_event}"
|
|
280
|
+
);
|
|
281
|
+
}
|
|
282
|
+
}
|
|
@@ -1,6 +1,8 @@
|
|
|
1
1
|
//! coordinator 健康/身份 & 只读可观测面:metadata 身份原语 + coordinator 路径 + watch 实时流。
|
|
2
2
|
|
|
3
3
|
use std::io::{Read, Seek, SeekFrom};
|
|
4
|
+
#[cfg(unix)]
|
|
5
|
+
use std::os::unix::process::CommandExt;
|
|
4
6
|
use std::path::{Path, PathBuf};
|
|
5
7
|
use std::process::{Command, Stdio};
|
|
6
8
|
use std::time::Duration;
|
|
@@ -96,13 +98,15 @@ pub fn start_coordinator(workspace: &WorkspacePath) -> Result<StartReport, Start
|
|
|
96
98
|
.append(true)
|
|
97
99
|
.open(&log_path)?;
|
|
98
100
|
let log_err = log.try_clone()?;
|
|
99
|
-
let
|
|
101
|
+
let mut command = Command::new(std::env::current_exe()?);
|
|
102
|
+
command
|
|
100
103
|
.args(["coordinator", "--workspace"])
|
|
101
104
|
.arg(workspace.as_path())
|
|
102
105
|
.stdin(Stdio::null())
|
|
103
106
|
.stdout(Stdio::from(log))
|
|
104
|
-
.stderr(Stdio::from(log_err))
|
|
105
|
-
|
|
107
|
+
.stderr(Stdio::from(log_err));
|
|
108
|
+
detach_daemon_child(&mut command);
|
|
109
|
+
let child = command.spawn()?;
|
|
106
110
|
let pid = Pid::new(child.id());
|
|
107
111
|
std::fs::write(coordinator_pid_path(workspace), pid.to_string())?;
|
|
108
112
|
write_coordinator_metadata(workspace, pid, MetadataSource::Start)?;
|
|
@@ -116,6 +120,24 @@ pub fn start_coordinator(workspace: &WorkspacePath) -> Result<StartReport, Start
|
|
|
116
120
|
})
|
|
117
121
|
}
|
|
118
122
|
|
|
123
|
+
#[cfg(unix)]
|
|
124
|
+
fn detach_daemon_child(command: &mut Command) {
|
|
125
|
+
// The coordinator is a daemon: it must not remain in the launcher's process
|
|
126
|
+
// group, otherwise bare SSH command teardown can SIGHUP it after quick-start exits.
|
|
127
|
+
unsafe {
|
|
128
|
+
command.pre_exec(|| {
|
|
129
|
+
if libc::setsid() == -1 {
|
|
130
|
+
Err(std::io::Error::last_os_error())
|
|
131
|
+
} else {
|
|
132
|
+
Ok(())
|
|
133
|
+
}
|
|
134
|
+
});
|
|
135
|
+
}
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
#[cfg(not(unix))]
|
|
139
|
+
fn detach_daemon_child(_command: &mut Command) {}
|
|
140
|
+
|
|
119
141
|
/// `stop_coordinator`(`lifecycle.py:228-247`):SIGTERM pid + 清 pid/meta → typed report。
|
|
120
142
|
pub fn stop_coordinator(workspace: &WorkspacePath) -> Result<StopReport, StopError> {
|
|
121
143
|
let pid_path = coordinator_pid_path(workspace);
|
|
@@ -696,3 +718,41 @@ fn clean_text(text: &str) -> String {
|
|
|
696
718
|
fn prefix_chars(text: &str, max: usize) -> String {
|
|
697
719
|
text.chars().take(max).collect()
|
|
698
720
|
}
|
|
721
|
+
|
|
722
|
+
#[cfg(all(test, unix))]
|
|
723
|
+
mod tests {
|
|
724
|
+
use super::*;
|
|
725
|
+
|
|
726
|
+
struct ChildGuard(std::process::Child);
|
|
727
|
+
|
|
728
|
+
impl Drop for ChildGuard {
|
|
729
|
+
fn drop(&mut self) {
|
|
730
|
+
unsafe {
|
|
731
|
+
libc::kill(self.0.id() as libc::pid_t, libc::SIGTERM);
|
|
732
|
+
}
|
|
733
|
+
let _ = self.0.wait();
|
|
734
|
+
}
|
|
735
|
+
}
|
|
736
|
+
|
|
737
|
+
#[test]
|
|
738
|
+
fn coordinator_daemon_spawn_helper_detaches_session() {
|
|
739
|
+
let mut command = Command::new("/bin/sleep");
|
|
740
|
+
command
|
|
741
|
+
.arg("30")
|
|
742
|
+
.stdin(Stdio::null())
|
|
743
|
+
.stdout(Stdio::null())
|
|
744
|
+
.stderr(Stdio::null());
|
|
745
|
+
detach_daemon_child(&mut command);
|
|
746
|
+
|
|
747
|
+
let child = command.spawn().expect("spawn detached child");
|
|
748
|
+
let guard = ChildGuard(child);
|
|
749
|
+
let pid = guard.0.id() as libc::pid_t;
|
|
750
|
+
let sid = unsafe { libc::getsid(pid) };
|
|
751
|
+
|
|
752
|
+
assert_ne!(sid, -1, "getsid({pid}) failed");
|
|
753
|
+
assert_eq!(
|
|
754
|
+
sid, pid,
|
|
755
|
+
"detached coordinator children must become session leaders so launcher SIGHUP does not reach them"
|
|
756
|
+
);
|
|
757
|
+
}
|
|
758
|
+
}
|