@team-agent/installer 0.3.10 → 0.3.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/Cargo.lock CHANGED
@@ -566,7 +566,7 @@ dependencies = [
566
566
 
567
567
  [[package]]
568
568
  name = "team-agent"
569
- version = "0.3.10"
569
+ version = "0.3.12"
570
570
  dependencies = [
571
571
  "anyhow",
572
572
  "chrono",
package/Cargo.toml CHANGED
@@ -9,7 +9,7 @@ members = ["crates/team-agent"]
9
9
 
10
10
  [workspace.package]
11
11
  edition = "2021"
12
- version = "0.3.10"
12
+ version = "0.3.12"
13
13
  license = "AGPL-3.0"
14
14
  rust-version = "1.95"
15
15
 
@@ -22,7 +22,9 @@ pub fn cmd_send(args: &SendArgs) -> Result<CmdResult, CliError> {
22
22
  // empty message body is a prompt-only invocation (`team-agent send "fix the build"`).
23
23
  // The lone positional is CONTENT, not a target — reject with `routing_ambiguous`
24
24
  // (NOT `target_not_in_team`, which would lie that the user did pick a target).
25
- if let Some(amb) = routing_ambiguous_value(&selected.run_workspace, args, &target, &content, &opts) {
25
+ if let Some(amb) =
26
+ routing_ambiguous_value(&selected.run_workspace, args, &target, &content, &opts)
27
+ {
26
28
  return Ok(CmdResult::from_json(amb, args.json));
27
29
  }
28
30
  let outcome = messaging::send_message(&selected.run_workspace, &target, &content, &opts)?;
@@ -117,7 +119,10 @@ fn watch_notice_json(target: &MessageTarget, opts: &SendOptions) -> Value {
117
119
  let agent_id = match target {
118
120
  MessageTarget::Single(agent) => agent.clone(),
119
121
  MessageTarget::Broadcast => "*".to_string(),
120
- MessageTarget::Fanout(recipients) => recipients.first().cloned().unwrap_or_else(|| "-".to_string()),
122
+ MessageTarget::Fanout(recipients) => recipients
123
+ .first()
124
+ .cloned()
125
+ .unwrap_or_else(|| "-".to_string()),
121
126
  };
122
127
  json!({
123
128
  "status": "registered",
@@ -174,6 +179,7 @@ fn delivery_status_wire(status: DeliveryStatus) -> &'static str {
174
179
  DeliveryStatus::Queued => "queued",
175
180
  DeliveryStatus::Blocked => "blocked",
176
181
  DeliveryStatus::Refused => "refused",
182
+ DeliveryStatus::Degraded => "degraded",
177
183
  DeliveryStatus::RetryScheduled => "retry_scheduled",
178
184
  DeliveryStatus::TrustAutoAnswerExhausted => "trust_auto_answer_exhausted",
179
185
  DeliveryStatus::AlreadyDelivered => "already_delivered",
@@ -195,6 +201,7 @@ fn delivery_refusal_wire(reason: DeliveryRefusal) -> &'static str {
195
201
  DeliveryRefusal::TmuxTargetMissing => "tmux_target_missing",
196
202
  DeliveryRefusal::MessageAlreadyClaimed => "message_already_claimed",
197
203
  DeliveryRefusal::LeaderNotAttached => "leader_not_attached",
204
+ DeliveryRefusal::CoordinatorUnavailable => "coordinator_unavailable",
198
205
  DeliveryRefusal::NoCallerPane => "no_caller_pane",
199
206
  DeliveryRefusal::TeamOwnerMismatch => "team_owner_mismatch",
200
207
  DeliveryRefusal::Ambiguous => "ambiguous",
@@ -8,7 +8,7 @@ use crate::model::enums::Provider;
8
8
  use crate::provider::ProviderAdapter;
9
9
 
10
10
  use super::health::{coordinator_pid_path, write_coordinator_metadata};
11
- use super::tick::TickError;
11
+ use super::tick::{TickError, TickReport};
12
12
  use super::types::{
13
13
  ErrorLists, MetadataSource, Pid, ProviderRegistry, WorkspacePath, BACKOFF_MAX_SEC,
14
14
  DEFAULT_TICK_INTERVAL_SEC,
@@ -87,7 +87,7 @@ pub fn run_daemon_with_coordinator(
87
87
  );
88
88
  break;
89
89
  }
90
- match coordinator.tick() {
90
+ match run_tick_with_panic_marker(&event_log, || coordinator.tick()) {
91
91
  Ok(report) => {
92
92
  if consecutive_failures > 0 {
93
93
  event_log.write(
@@ -150,6 +150,36 @@ pub fn run_daemon_with_coordinator(
150
150
  Ok(())
151
151
  }
152
152
 
153
+ fn run_tick_with_panic_marker<F>(event_log: &EventLog, tick: F) -> Result<TickReport, TickError>
154
+ where
155
+ F: FnOnce() -> Result<TickReport, TickError>,
156
+ {
157
+ match std::panic::catch_unwind(std::panic::AssertUnwindSafe(tick)) {
158
+ Ok(result) => result,
159
+ Err(payload) => {
160
+ let panic_message = panic_payload_message(payload.as_ref());
161
+ event_log.write(
162
+ "coordinator.tick_panic",
163
+ serde_json::json!({
164
+ "panic": panic_message,
165
+ "backtrace": std::backtrace::Backtrace::force_capture().to_string(),
166
+ }),
167
+ )?;
168
+ Err(TickError::Panic(panic_message))
169
+ }
170
+ }
171
+ }
172
+
173
+ fn panic_payload_message(payload: &(dyn std::any::Any + Send)) -> String {
174
+ if let Some(message) = payload.downcast_ref::<&str>() {
175
+ (*message).to_string()
176
+ } else if let Some(message) = payload.downcast_ref::<String>() {
177
+ message.clone()
178
+ } else {
179
+ "non-string panic payload".to_string()
180
+ }
181
+ }
182
+
153
183
  /// 当前 ppid(`os.getppid()`,孤儿自检输入)。
154
184
  fn current_ppid() -> u32 {
155
185
  u32::try_from(unsafe { libc::getppid() }).unwrap_or(0)
@@ -199,3 +229,54 @@ pub enum DaemonError {
199
229
  #[error("tick: {0}")]
200
230
  Tick(#[from] TickError),
201
231
  }
232
+
233
+ #[cfg(test)]
234
+ mod tests {
235
+ use super::*;
236
+
237
+ fn tmp_ws(tag: &str) -> std::path::PathBuf {
238
+ use std::sync::atomic::{AtomicU64, Ordering};
239
+ static N: AtomicU64 = AtomicU64::new(0);
240
+ let n = N.fetch_add(1, Ordering::Relaxed);
241
+ let path =
242
+ std::env::temp_dir().join(format!("ta-rs-coord-{tag}-{}-{n}", std::process::id()));
243
+ std::fs::create_dir_all(&path).unwrap();
244
+ path
245
+ }
246
+
247
+ #[test]
248
+ fn coordinator_tick_panic_writes_durable_marker() {
249
+ let workspace = tmp_ws("tick-panic");
250
+ let event_log = EventLog::new(&workspace);
251
+
252
+ let old_hook = std::panic::take_hook();
253
+ std::panic::set_hook(Box::new(|_| {}));
254
+ let result = run_tick_with_panic_marker(&event_log, || -> Result<TickReport, TickError> {
255
+ panic!("synthetic tick panic")
256
+ });
257
+ std::panic::set_hook(old_hook);
258
+
259
+ assert!(
260
+ matches!(result, Err(TickError::Panic(message)) if message == "synthetic tick panic")
261
+ );
262
+ let events = event_log.tail(20).unwrap();
263
+ let panic_event = events
264
+ .iter()
265
+ .find(|event| {
266
+ event.get("event").and_then(serde_json::Value::as_str)
267
+ == Some("coordinator.tick_panic")
268
+ })
269
+ .expect("coordinator.tick_panic event");
270
+ assert_eq!(
271
+ panic_event.get("panic").and_then(serde_json::Value::as_str),
272
+ Some("synthetic tick panic")
273
+ );
274
+ assert!(
275
+ panic_event
276
+ .get("backtrace")
277
+ .and_then(serde_json::Value::as_str)
278
+ .is_some_and(|backtrace| !backtrace.is_empty()),
279
+ "panic marker must carry a backtrace; event={panic_event}"
280
+ );
281
+ }
282
+ }
@@ -1,6 +1,8 @@
1
1
  //! coordinator 健康/身份 & 只读可观测面:metadata 身份原语 + coordinator 路径 + watch 实时流。
2
2
 
3
3
  use std::io::{Read, Seek, SeekFrom};
4
+ #[cfg(unix)]
5
+ use std::os::unix::process::CommandExt;
4
6
  use std::path::{Path, PathBuf};
5
7
  use std::process::{Command, Stdio};
6
8
  use std::time::Duration;
@@ -96,13 +98,15 @@ pub fn start_coordinator(workspace: &WorkspacePath) -> Result<StartReport, Start
96
98
  .append(true)
97
99
  .open(&log_path)?;
98
100
  let log_err = log.try_clone()?;
99
- let child = Command::new(std::env::current_exe()?)
101
+ let mut command = Command::new(std::env::current_exe()?);
102
+ command
100
103
  .args(["coordinator", "--workspace"])
101
104
  .arg(workspace.as_path())
102
105
  .stdin(Stdio::null())
103
106
  .stdout(Stdio::from(log))
104
- .stderr(Stdio::from(log_err))
105
- .spawn()?;
107
+ .stderr(Stdio::from(log_err));
108
+ detach_daemon_child(&mut command);
109
+ let child = command.spawn()?;
106
110
  let pid = Pid::new(child.id());
107
111
  std::fs::write(coordinator_pid_path(workspace), pid.to_string())?;
108
112
  write_coordinator_metadata(workspace, pid, MetadataSource::Start)?;
@@ -116,6 +120,24 @@ pub fn start_coordinator(workspace: &WorkspacePath) -> Result<StartReport, Start
116
120
  })
117
121
  }
118
122
 
123
+ #[cfg(unix)]
124
+ fn detach_daemon_child(command: &mut Command) {
125
+ // The coordinator is a daemon: it must not remain in the launcher's process
126
+ // group, otherwise bare SSH command teardown can SIGHUP it after quick-start exits.
127
+ unsafe {
128
+ command.pre_exec(|| {
129
+ if libc::setsid() == -1 {
130
+ Err(std::io::Error::last_os_error())
131
+ } else {
132
+ Ok(())
133
+ }
134
+ });
135
+ }
136
+ }
137
+
138
+ #[cfg(not(unix))]
139
+ fn detach_daemon_child(_command: &mut Command) {}
140
+
119
141
  /// `stop_coordinator`(`lifecycle.py:228-247`):SIGTERM pid + 清 pid/meta → typed report。
120
142
  pub fn stop_coordinator(workspace: &WorkspacePath) -> Result<StopReport, StopError> {
121
143
  let pid_path = coordinator_pid_path(workspace);
@@ -696,3 +718,41 @@ fn clean_text(text: &str) -> String {
696
718
  fn prefix_chars(text: &str, max: usize) -> String {
697
719
  text.chars().take(max).collect()
698
720
  }
721
+
722
+ #[cfg(all(test, unix))]
723
+ mod tests {
724
+ use super::*;
725
+
726
+ struct ChildGuard(std::process::Child);
727
+
728
+ impl Drop for ChildGuard {
729
+ fn drop(&mut self) {
730
+ unsafe {
731
+ libc::kill(self.0.id() as libc::pid_t, libc::SIGTERM);
732
+ }
733
+ let _ = self.0.wait();
734
+ }
735
+ }
736
+
737
+ #[test]
738
+ fn coordinator_daemon_spawn_helper_detaches_session() {
739
+ let mut command = Command::new("/bin/sleep");
740
+ command
741
+ .arg("30")
742
+ .stdin(Stdio::null())
743
+ .stdout(Stdio::null())
744
+ .stderr(Stdio::null());
745
+ detach_daemon_child(&mut command);
746
+
747
+ let child = command.spawn().expect("spawn detached child");
748
+ let guard = ChildGuard(child);
749
+ let pid = guard.0.id() as libc::pid_t;
750
+ let sid = unsafe { libc::getsid(pid) };
751
+
752
+ assert_ne!(sid, -1, "getsid({pid}) failed");
753
+ assert_eq!(
754
+ sid, pid,
755
+ "detached coordinator children must become session leaders so launcher SIGHUP does not reach them"
756
+ );
757
+ }
758
+ }