npm - @team-agent/installer - Versions diffs - 0.2.1 → 0.2.3 - Mend

@team-agent/installer 0.2.1 → 0.2.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (39) hide show

package/package.json +1 -1
package/schemas/team.schema.json +6 -0
package/src/team_agent/approvals/runtime_prompts.py +1 -1
package/src/team_agent/cli/commands.py +122 -6
package/src/team_agent/cli/parser.py +42 -1
package/src/team_agent/coordinator/__main__.py +21 -2
package/src/team_agent/coordinator/lifecycle.py +11 -0
package/src/team_agent/diagnose/orphan_cleanup.py +364 -0
package/src/team_agent/events.py +47 -0
package/src/team_agent/launch/core.py +2 -1
package/src/team_agent/leader/__init__.py +273 -60
package/src/team_agent/lifecycle/agents.py +54 -2
package/src/team_agent/lifecycle/operations.py +87 -9
package/src/team_agent/lifecycle/start.py +1 -1
package/src/team_agent/message_store/core.py +8 -7
package/src/team_agent/message_store/leader_notification_log.py +132 -0
package/src/team_agent/message_store/result_watchers.py +144 -1
package/src/team_agent/message_store/schema.py +31 -2
package/src/team_agent/messaging/delivery.py +293 -1
package/src/team_agent/messaging/idle_alerts.py +109 -9
package/src/team_agent/messaging/leader.py +179 -10
package/src/team_agent/messaging/leader_api_errors.py +216 -0
package/src/team_agent/messaging/leader_panes.py +393 -23
package/src/team_agent/messaging/result_delivery.py +219 -4
package/src/team_agent/messaging/results.py +12 -21
package/src/team_agent/messaging/scheduler.py +24 -2
package/src/team_agent/messaging/send.py +21 -26
package/src/team_agent/messaging/tmux_io.py +153 -23
package/src/team_agent/messaging/tmux_prompt.py +87 -0
package/src/team_agent/messaging/trust_auto_answer.py +44 -0
package/src/team_agent/restart/orchestration.py +207 -4
package/src/team_agent/runtime.py +7 -7
package/src/team_agent/rust_core.py +157 -3
package/src/team_agent/sessions/capture.py +65 -15
package/src/team_agent/spec.py +59 -0
package/src/team_agent/state.py +153 -10
package/src/team_agent/status/inbox.py +33 -3
package/src/team_agent/status/queries.py +32 -1
package/src/team_agent/watch/__init__.py +145 -0

package/src/team_agent/messaging/result_delivery.py CHANGED Viewed

@@ -1,11 +1,14 @@
 from __future__ import annotations
 import json
+from datetime import datetime, timezone
 from pathlib import Path
 from typing import Any
 from team_agent.events import EventLog
 from team_agent.message_store import MessageStore
+from team_agent.message_store.leader_notification_log import peek_leader_notification
+from team_agent.message_store.result_watchers import leader_notified_message_id_for_result
 from team_agent.messaging.deps import send_message
 from team_agent.messaging.internal_delivery import deliver_stored_message
@@ -22,7 +25,13 @@ def retry_result_deliveries(workspace: Path, event_log: EventLog) -> list[dict[s
         row = store.result_by_id(str(watcher["result_id"]))
         if not row:
             continue
-        notified.extend(notify_result_watchers(workspace, _result_entry_from_row(row), event_log, watchers=[watcher]))
+        notified.extend(notify_result_watchers(
+            workspace,
+            _result_entry_from_row(row),
+            event_log,
+            watchers=[watcher],
+            dedupe_reason="rebind_retry",
+        ))
     return notified
@@ -31,6 +40,7 @@ def notify_result_watchers(
     result: dict[str, Any],
     event_log: EventLog,
     watchers: list[dict[str, Any]] | None = None,
+    dedupe_reason: str | None = None,
 ) -> list[dict[str, Any]]:
     store = MessageStore(workspace)
     candidates = [
@@ -67,9 +77,44 @@ def notify_result_watchers(
             }
         )
     attempts = result_delivery_attempts(event_log, primary["watcher_id"], str(result.get("result_id") or ""))
+    # Stage 12 (Gap 26 ∩ Gap 32 roundtable consolidation 2026-05-26): exactly-once dedupe
+    # lives in leader_notification_log keyed by (result_id, leader_session_uuid) and is
+    # consulted atomically at the injection boundary inside _send_to_leader_receiver. Here
+    # we add a read-only fast-path peek so concurrent notify_result_watchers calls for the
+    # same result short-circuit without spinning up a deliver_stored_message round-trip.
+    # The peek is NOT the dedupe primitive — the atomic INSERT OR IGNORE at injection is.
+    result_id_str = str(result.get("result_id") or "") or None
+    if result_id_str:
+        leader_uuid = _resolve_leader_session_uuid(workspace, primary.get("owner_team_id"))
+        if leader_uuid:
+            prior = peek_leader_notification(
+                store, result_id=result_id_str, leader_session_uuid=leader_uuid,
+            )
+            if prior:
+                notified.append(_mark_watcher_dedupe_skip(
+                    store, event_log, primary, result, attempts,
+                    prior["notified_message_id"],
+                    dedupe_reason or "injection_log_already_notified",
+                    notified_at=prior.get("notified_at"),
+                    leader_session_uuid=leader_uuid,
+                ))
+                return notified
+        # Legacy compat: watcher.notified_message_id set by a prior path (Gap 32 reversal of
+        # 78055bc, or any pre-Stage-12 code) also blocks redelivery. This preserves the
+        # Stage 11.9-11.12 era contract while the new gate (leader_notification_log) is the
+        # authoritative dedupe primitive going forward.
+        legacy_canonical = leader_notified_message_id_for_result(
+            store, primary.get("owner_team_id"), result_id_str,
+        )
+        if legacy_canonical:
+            notified.append(_mark_watcher_dedupe_skip(
+                store, event_log, primary, result, attempts,
+                legacy_canonical,
+                dedupe_reason or "rebind_retry",
+            ))
+            return notified
     existing = delivered_result_message(
-        store,
-        str(result.get("result_id") or ""),
+        store, str(result.get("result_id") or ""),
         task_id=result.get("task_id"),
         owner_team_id=primary.get("owner_team_id"),
     )
@@ -83,6 +128,75 @@ def notify_result_watchers(
     return notified
+def _resolve_leader_session_uuid(workspace: Path, owner_team_id: str | None) -> str | None:
+    """Helper: read the team's leader_session_uuid from runtime state for gate lookups."""
+    try:
+        from team_agent.messaging.deps import load_runtime_state, team_state_key
+        state = load_runtime_state(workspace)
+        if owner_team_id and isinstance(state.get("teams"), dict):
+            scoped = state["teams"].get(owner_team_id)
+            if isinstance(scoped, dict):
+                state = scoped
+        elif owner_team_id and team_state_key(state) != owner_team_id:
+            return None
+        owner = state.get("team_owner") or {}
+        return str(owner.get("leader_session_uuid") or "") or None
+    except Exception:
+        return None
+def _infer_dedupe_reason(primary: dict[str, Any], store: MessageStore) -> str:
+    if primary.get("notified_message_id"):
+        return "rebind_retry"
+    return "watcher_duplicate"
+def _mark_watcher_dedupe_skip(
+    store: MessageStore,
+    event_log: EventLog,
+    watcher: dict[str, Any],
+    result: dict[str, Any],
+    attempts: int,
+    canonical_message_id: str,
+    reason: str,
+    *,
+    notified_at: str | None = None,
+    leader_session_uuid: str | None = None,
+) -> dict[str, Any]:
+    original_message_id = watcher.get("notified_message_id")
+    # Stage 12: the canonical message_id (or sentinel from the gate) is auditing metadata
+    # here. The authoritative dedupe gate is leader_notification_log; this mark just keeps
+    # the watcher row from being re-picked by retry scans.
+    store.mark_result_watcher(
+        watcher["watcher_id"],
+        "notified",
+        result_id=result.get("result_id"),
+        notified_message_id=canonical_message_id,
+    )
+    event_log.write(
+        "leader_receiver.notification_dedupe_skip",
+        result_id=result.get("result_id"),
+        original_message_id=original_message_id,
+        suppressed_message_id=canonical_message_id,
+        reason=reason,
+        team_id=watcher.get("owner_team_id"),
+        watcher_id=watcher["watcher_id"],
+        task_id=result.get("task_id"),
+        agent_id=result.get("agent_id"),
+        attempt=attempts + 1,
+        leader_session_uuid=leader_session_uuid,
+        prior_notified_at=notified_at,
+    )
+    return {
+        "watcher_id": watcher["watcher_id"],
+        "result_id": result.get("result_id"),
+        "ok": True,
+        "message_id": canonical_message_id,
+        "deduped": True,
+        "dedupe_reason": reason,
+    }
 def _dedupe_watchers_for_result(
     watchers: list[dict[str, Any]],
 ) -> tuple[dict[str, Any], list[dict[str, Any]]]:
@@ -114,11 +228,19 @@ def _deliver_result_to_watcher(
         return _mark_delivery_failed(store, event_log, watcher, result, attempts, str(exc))
     status = "notified" if delivery.get("ok") else "notify_failed"
     error = delivery.get("reason") or delivery.get("error")
+    # Stage 12: notified_message_id is now auditing metadata. The exactly-once contract
+    # lives in the leader_notification_log table consulted by _send_to_leader_receiver;
+    # whatever the gate suppresses comes back as ok=true deduped=true, and the watcher row
+    # records this as a successful notification with the canonical message_id.
+    persisted_message_id = (
+        delivery.get("canonical_message_id") if delivery.get("deduped")
+        else (delivery.get("message_id") if delivery.get("ok") else None)
+    )
     store.mark_result_watcher(
         watcher["watcher_id"],
         status,
         result_id=result.get("result_id"),
-        notified_message_id=delivery.get("message_id"),
+        notified_message_id=persisted_message_id,
         error=error,
     )
     event_log.write(
@@ -279,6 +401,99 @@ def watcher_matches_result(watcher: dict[str, Any], result: dict[str, Any]) -> b
     return (not task_id or task_id == result.get("task_id")) and (not agent_id or agent_id == result.get("agent_id"))
+def requeue_after_claim_leader(
+    workspace: Path,
+    store: MessageStore,
+    event_log: EventLog,
+    owner_team_id: str,
+    claimed_pane_id: str,
+    *,
+    incident_ts: str | None = None,
+) -> list[dict[str, Any]]:
+    """Post-claim hook (Gap 26 / Mac mini Stage 11 Scenarios 3, 11.10): re-route every
+    not-yet-delivered leader-bound notification to the newly claimed pane. Returns the
+    list of requeued watcher records (may be empty).
+    Stage 11.10 semantic reframe: claim-leader means "all not-yet-delivered leader-bound
+    notifications for this team_id reroute to the claimed pane". Watcher status is
+    irrelevant — `notified_message_id` is the only dedupe gate. Gap 32 exactly-once
+    contract still holds: notified_message_id non-null blocks redelivery.
+    Selection rules:
+      - watcher is scoped to this team (owner_team_id match)
+      - watcher has no notified_message_id (Gap 32 once-only)
+      - watcher's latest activity timestamp (completed_at fallback created_at) is
+        at-or-after incident_ts when provided; without an incident_ts every
+        un-notified watcher is requeued.
+      - watcher status is otherwise ignored (pending / delivery_blocked /
+        delivery_exhausted / notify_failed all become candidates).
+    Atomicity vs coordinator's own scheduled retry: just before flipping a watcher's
+    status, re-fetch the row from the store. If notified_message_id became non-null
+    in the gap (the scheduled retry beat us), emit a benign
+    leader_receiver.claim_requeue_already_in_flight event and skip. If the race
+    leaks past this check, Gap 32 dedupe inside notify_result_watchers still
+    guarantees exactly-once injection.
+    """
+    # Stage 11.12: CAS re-fetch + claim_requeue_already_in_flight event retired. The atomic
+    # UPSERT in notify_result_watchers (claim_leader_notification) is now the single race
+    # gate. We mark eligible watchers to notify_failed and let retry_result_deliveries route
+    # through the UPSERT — concurrent claim/scheduled-retry paths both pass through the
+    # same atomic claim and only one fires deliver_attempt.
+    incident_dt = _parse_iso(incident_ts)
+    requeued: list[dict[str, Any]] = []
+    for watcher in store.result_watchers(owner_team_id=owner_team_id):
+        if watcher.get("notified_message_id"):
+            continue
+        latest_ts = _parse_iso(watcher.get("completed_at")) or _parse_iso(watcher.get("created_at"))
+        if incident_dt and latest_ts and latest_ts < incident_dt:
+            continue
+        watcher_id = watcher["watcher_id"]
+        prior_state = str(watcher.get("status") or "")
+        store.mark_result_watcher(
+            watcher_id, "notify_failed",
+            result_id=watcher.get("result_id"),
+        )
+        event_log.write(
+            "leader_receiver.claim_requeue",
+            result_id=watcher.get("result_id"),
+            watcher_id=watcher_id,
+            prior_state=prior_state,
+            requeued_at=datetime.now(timezone.utc).isoformat(),
+            claimed_pane_id=claimed_pane_id,
+            team_id=owner_team_id,
+        )
+        requeued.append({
+            "watcher_id": watcher_id,
+            "result_id": watcher.get("result_id"),
+            "prior_state": prior_state,
+        })
+    if requeued:
+        try:
+            retry_result_deliveries(workspace, event_log)
+        except Exception as exc:
+            event_log.write(
+                "leader_receiver.claim_requeue_delivery_failed",
+                error=str(exc),
+                watcher_ids=[r["watcher_id"] for r in requeued],
+                team_id=owner_team_id,
+                claimed_pane_id=claimed_pane_id,
+            )
+    return requeued
+def _parse_iso(text: Any) -> datetime | None:
+    if not isinstance(text, str) or not text:
+        return None
+    try:
+        dt = datetime.fromisoformat(text.replace("Z", "+00:00"))
+    except ValueError:
+        return None
+    if dt.tzinfo is None:
+        dt = dt.replace(tzinfo=timezone.utc)
+    return dt
 def format_result_watcher_notification(result: dict[str, Any]) -> str:
     task_id = result.get("task_id") or "unknown task"
     agent_id = result.get("agent_id") or "unknown agent"

package/src/team_agent/messaging/results.py CHANGED Viewed

@@ -359,36 +359,27 @@ def _refresh_leader_receiver_or_flag_rebind(
     receiver = state.get("leader_receiver") or {}
     if receiver.get("mode") != "direct_tmux":
         return state
-    validation = _validate_leader_receiver(receiver)
+    owner_identity = state.get("team_owner") or None
+    receiver_for_validation = dict(receiver)
+    if owner_identity and owner_identity.get("leader_session_uuid") and not receiver_for_validation.get("leader_session_uuid"):
+        receiver_for_validation["leader_session_uuid"] = owner_identity["leader_session_uuid"]
+    validation = _validate_leader_receiver(receiver_for_validation)
     if validation.get("ok"):
         return state
-    owner_identity = state.get("team_owner") or None
-    rediscovered = _rediscover_leader_receiver(receiver, event_log, owner_identity)
+    rediscovered = _rediscover_leader_receiver(
+        receiver_for_validation,
+        event_log,
+        owner_identity,
+        invalidation_reason=validation.get("reason"),
+        team_id=team_state_key(state),
+    )
     if rediscovered.get("status") == "updated":
         state["leader_receiver"] = rediscovered["receiver"]
         if persist:
             save_runtime_state(workspace, state)
         else:
             save_team_scoped_state(workspace, state)
-        event_log.write(
-            "leader_receiver.rebind_applied",
-            old_pane_id=receiver.get("pane_id"),
-            new_pane_id=rediscovered["receiver"].get("pane_id"),
-            reason=validation.get("reason"),
-            source="report_result_notify",
-            owner_identity=owner_identity,
-        )
         return state
-    event_log.write(
-        "leader_receiver.rebind_required",
-        old_pane_id=receiver.get("pane_id"),
-        reason=validation.get("reason"),
-        validation_error=validation.get("error"),
-        rediscovery_status=rediscovered.get("status"),
-        provider=receiver.get("provider"),
-        source="report_result_notify",
-        owner_identity=owner_identity,
-    )
     return state

package/src/team_agent/messaging/scheduler.py CHANGED Viewed

@@ -84,6 +84,18 @@ def _fire_due_scheduled_events(workspace: Path, store: MessageStore, event_log:
             elif row["kind"] == "health_ping":
                 result = {"ok": True, "status": "logged"}
                 event_log.write("coordinator.health_ping", target=row["target"], payload=payload)
+            elif row["kind"] == "trust_retry":
+                # Spark MEDIUM sweep #3 (2026-05-26) — bounded-backoff consumer
+                # for delivery.py:_handle_trust_retry_needed. payload carries the
+                # message_id and current attempt; _execute_trust_retry resets the
+                # row to 'accepted', re-runs _deliver_pending_message with the
+                # attempt threaded through, and either delivers, reschedules, or
+                # hits the terminal trust_auto_answer_exhausted branch.
+                from team_agent.messaging.delivery import _execute_trust_retry
+                result = _execute_trust_retry(
+                    workspace, store, event_log, payload,
+                    owner_team_id=row.get("owner_team_id"),
+                )
             else:
                 result = {"ok": False, "error": f"unknown scheduled event kind: {row['kind']}"}
             if not result.get("ok") and row["kind"] == "send":
@@ -409,8 +421,18 @@ def _recent_restart_or_reset_event(event_log: EventLog, agent_id: str, since: da
     for event in reversed(event_log.tail(200)):
         if event.get("event") not in _RESTART_RESET_EVENTS:
             continue
-        if event.get("agent_id") != agent_id and agent_id not in set(event.get("agents") or []):
-            continue
+        if event.get("agent_id") != agent_id:
+            agents_field = event.get("agents") or []
+            agent_ids: set[str] = set()
+            for entry in agents_field:
+                if isinstance(entry, str):
+                    agent_ids.add(entry)
+                elif isinstance(entry, dict):
+                    aid = entry.get("agent_id")
+                    if isinstance(aid, str):
+                        agent_ids.add(aid)
+            if agent_id not in agent_ids:
+                continue
         try:
             ts = datetime.fromisoformat(str(event.get("ts")))
         except ValueError:

package/src/team_agent/messaging/send.py CHANGED Viewed

@@ -34,19 +34,10 @@ from pathlib import Path
 from typing import Any
 def send_message(
-    workspace: Path,
-    target: str | list[str] | None,
-    content: str,
-    task_id: str | None = None,
-    sender: str = "leader",
-    requires_ack: bool = True,
-    confirm_human: bool = False,
-    wait_visible: bool = True,
-    timeout: float = 30.0,
-    lock_timeout: float = 5.0,
-    watch_result: bool = False,
-    block_until_delivered: bool = True,
-    team: str | None = None,
+    workspace: Path, target: str | list[str] | None, content: str, task_id: str | None = None,
+    sender: str = "leader", requires_ack: bool = True, confirm_human: bool = False,
+    wait_visible: bool = True, timeout: float = 30.0, lock_timeout: float = 5.0,
+    watch_result: bool = False, block_until_delivered: bool = True, team: str | None = None,
 ) -> dict[str, Any]:
     with _runtime_lock(workspace, "send", timeout=lock_timeout):
         return _send_message_unlocked(
@@ -66,18 +57,10 @@ def send_message(
 def _send_message_unlocked(
-    workspace: Path,
-    target: str | list[str] | None,
-    content: str,
-    task_id: str | None = None,
-    sender: str = "leader",
-    requires_ack: bool = True,
-    confirm_human: bool = False,
-    wait_visible: bool = True,
-    timeout: float = 30.0,
-    watch_result: bool = False,
-    block_until_delivered: bool = True,
-    team: str | None = None,
+    workspace: Path, target: str | list[str] | None, content: str, task_id: str | None = None,
+    sender: str = "leader", requires_ack: bool = True, confirm_human: bool = False,
+    wait_visible: bool = True, timeout: float = 30.0, watch_result: bool = False,
+    block_until_delivered: bool = True, team: str | None = None,
 ) -> dict[str, Any]:
     if team is None:
         ambiguous = ambiguous_team_target_result(load_runtime_state(workspace))
@@ -336,6 +319,8 @@ def _send_single_message_unlocked(
         "submit_verification": delivered_result.get("submit_verification"),
         "turn_verification": delivered_result.get("turn_verification"),
     }
+    result.update({key: delivered_result[key] for key in ("reason", "stage") if delivered_result.get(key)})
+    result.update(_structured_delivery_refusal(delivered_result))
     if delivered_result.get("queued"):
         result["queued"] = True
         result["reason"] = delivered_result.get("reason")
@@ -490,7 +475,7 @@ def _broadcast_targets(state: dict[str, Any], spec: dict[str, Any], sender: str)
 def _compact_broadcast_delivery(result: dict[str, Any]) -> dict[str, Any]:
-    keys = ["ok", "status", "message_id", "to", "reason", "channel"]
+    keys = ["ok", "status", "message_id", "to", "reason", "channel", "detected", "pane_id", "pane_mode", "pane_capture_tail", "stage", "verification"]
     return {key: result[key] for key in keys if key in result}
@@ -498,3 +483,13 @@ def _compact_fanout_delivery(result: dict[str, Any]) -> dict[str, Any]:
     compact = _compact_broadcast_delivery(result)
     compact["delivered"] = bool(result.get("submitted") or result.get("visible") or result.get("status") in {"submitted", "visible", "delivered", "acknowledged"})
     return compact
+def _structured_delivery_refusal(delivered_result: dict[str, Any]) -> dict[str, Any]:
+    attempts = delivered_result.get("paste_attempts")
+    if not isinstance(attempts, list):
+        return {}
+    for attempt in attempts:
+        if isinstance(attempt, dict) and attempt.get("reason") == "recipient_pane_in_non_input_mode":
+            return {key: attempt[key] for key in ("detected", "pane_id", "pane_mode", "pane_capture_tail") if key in attempt}
+    return {}