@integrity-labs/agt-cli 0.27.124 → 0.27.126

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -17,7 +17,7 @@ import {
17
17
  provisionStopHook,
18
18
  requireHost,
19
19
  safeWriteJsonAtomic
20
- } from "../chunk-MQHZBG7J.js";
20
+ } from "../chunk-LTZS7SHV.js";
21
21
  import {
22
22
  getProjectDir as getProjectDir2,
23
23
  getReadyTasks,
@@ -56,7 +56,7 @@ import {
56
56
  stopPersistentSession,
57
57
  takeWatchdogGiveUpCount,
58
58
  takeZombieDetection
59
- } from "../chunk-AZEYTJ4L.js";
59
+ } from "../chunk-QON5CU3L.js";
60
60
  import {
61
61
  KANBAN_CHECK_COMMAND,
62
62
  SUPPRESS_SENTINEL,
@@ -83,7 +83,7 @@ import {
83
83
  resolveDmTarget,
84
84
  worseConnectivityOutcome,
85
85
  wrapScheduledTaskPrompt
86
- } from "../chunk-Z6JJRLHH.js";
86
+ } from "../chunk-MH7HA6QV.js";
87
87
  import {
88
88
  parsePsRows,
89
89
  reapOrphanChannelMcps
@@ -127,6 +127,22 @@ function channelSecretValueHash(envEntries, channelSecretKeys) {
127
127
  return createHash("sha256").update(basis).digest("hex").slice(0, 16);
128
128
  }
129
129
 
130
+ // src/lib/mcp-restart-verify.ts
131
+ var RESTART_VERIFY_DEADLINE_MS = 9e4;
132
+ var RESTART_VERIFY_MAX_ATTEMPTS = 3;
133
+ function decidePostRestartVerification(pending, sessionStartedAt, sessionHealthy, sessionRespawnId, now, opts = {}) {
134
+ const deadlineMs = opts.deadlineMs ?? RESTART_VERIFY_DEADLINE_MS;
135
+ const maxAttempts = opts.maxAttempts ?? RESTART_VERIFY_MAX_ATTEMPTS;
136
+ if (sessionStartedAt !== null && sessionStartedAt > pending.firedAt && sessionHealthy && sessionRespawnId !== null) {
137
+ return { kind: "verified" };
138
+ }
139
+ if (now - pending.firedAt < deadlineMs) {
140
+ return { kind: "waiting" };
141
+ }
142
+ const attempt = pending.attempts + 1;
143
+ return { kind: "unverified", attempt, final: attempt >= maxAttempts };
144
+ }
145
+
130
146
  // src/lib/integration-hash.ts
131
147
  import { createHash as createHash2 } from "crypto";
132
148
  function canonicalize(value) {
@@ -3959,6 +3975,7 @@ function hasRevokedResiduals(state6) {
3959
3975
  return state6.gatewayRunning || state6.portAllocated || state6.provisionDirExists;
3960
3976
  }
3961
3977
  var pendingSessionRestarts = /* @__PURE__ */ new Map();
3978
+ var pendingRestartVerifications = /* @__PURE__ */ new Map();
3962
3979
  var restartBreaker = new RestartBreaker();
3963
3980
  var reportedTrips = /* @__PURE__ */ new Map();
3964
3981
  var mcpFlapDampener = new McpFlapDampener();
@@ -3974,6 +3991,20 @@ function recordConfigChurnEvent(agentId, codeName, channel, signature) {
3974
3991
  );
3975
3992
  });
3976
3993
  }
3994
+ async function reportReaperEvent(agentId, reaper, action, reason) {
3995
+ try {
3996
+ await api.post("/host/reaper-event", {
3997
+ agent_id: agentId,
3998
+ reaper,
3999
+ action,
4000
+ reason
4001
+ });
4002
+ } catch (err) {
4003
+ log(
4004
+ `[${reaper}] failed to report reaper-event (ENG-6184): ${err.message} \u2014 local reap still proceeded; the alert may be missed`
4005
+ );
4006
+ }
4007
+ }
3977
4008
  function recordRestartForBreaker(codeName, reason) {
3978
4009
  const result = restartBreaker.record(codeName, reason);
3979
4010
  if (!result.tripped || !result.trip) return;
@@ -4087,11 +4118,19 @@ function scheduleSessionRestart(codeName, delayMs, reason, breakerReason = "hot-
4087
4118
  runningMcpHashes.delete(codeName);
4088
4119
  recordRestartForBreaker(codeName, breakerReason);
4089
4120
  log(`[hot-reload] Session stopped for '${codeName}' \u2014 will respawn with ${reason}`);
4121
+ if (breakerReason === "hot-reload-mcp") {
4122
+ const prior = pendingRestartVerifications.get(codeName);
4123
+ pendingRestartVerifications.set(codeName, {
4124
+ firedAt: Date.now(),
4125
+ attempts: prior?.attempts ?? 0
4126
+ });
4127
+ }
4090
4128
  }, delayMs);
4091
4129
  timer.unref?.();
4092
4130
  pendingSessionRestarts.set(codeName, timer);
4093
4131
  }
4094
4132
  function cancelPendingSessionRestart(codeName) {
4133
+ pendingRestartVerifications.delete(codeName);
4095
4134
  const existing = pendingSessionRestarts.get(codeName);
4096
4135
  if (!existing) return;
4097
4136
  clearTimeout(existing);
@@ -4099,6 +4138,37 @@ function cancelPendingSessionRestart(codeName) {
4099
4138
  deferLogThrottle.delete(codeName);
4100
4139
  log(`[hot-reload] Cancelled pending restart timer for '${codeName}' (another teardown path is handling it)`);
4101
4140
  }
4141
+ function verifyPendingRestarts(now) {
4142
+ if (pendingRestartVerifications.size === 0) return;
4143
+ for (const [codeName, pending] of pendingRestartVerifications) {
4144
+ const healthy = isSessionHealthy(codeName);
4145
+ const session = getSessionState(codeName);
4146
+ const startedAt = session?.startedAt ?? null;
4147
+ const respawnId = session?.currentSessionId ?? null;
4148
+ const outcome = decidePostRestartVerification(pending, startedAt, healthy, respawnId, now);
4149
+ switch (outcome.kind) {
4150
+ case "verified":
4151
+ pendingRestartVerifications.delete(codeName);
4152
+ log(`[restart-verify] '${codeName}' respawned healthy after MCP change \u2014 tools bound (ENG-6174)`);
4153
+ break;
4154
+ case "waiting":
4155
+ break;
4156
+ case "unverified":
4157
+ if (outcome.final) {
4158
+ pendingRestartVerifications.delete(codeName);
4159
+ log(
4160
+ `[restart-verify] ERROR '${codeName}' did NOT respawn healthy after ${outcome.attempt} attempts following an MCP change \u2014 the live session may be stuck on its pre-restart tools. Check session health / manager.log; a manual session restart may be required (ENG-6174)`
4161
+ );
4162
+ } else {
4163
+ pendingRestartVerifications.set(codeName, { firedAt: now, attempts: outcome.attempt });
4164
+ log(
4165
+ `[restart-verify] WARN '${codeName}' not yet respawned healthy after MCP change (attempt ${outcome.attempt}) \u2014 still watching (ENG-6174)`
4166
+ );
4167
+ }
4168
+ break;
4169
+ }
4170
+ }
4171
+ }
4102
4172
  var RESTART_DEFER_RECHECK_MS = 6e4;
4103
4173
  var DEFER_LOG_THROTTLE_MS = 6e5;
4104
4174
  var deferLogThrottle = /* @__PURE__ */ new Map();
@@ -4424,7 +4494,7 @@ var cachedMaintenanceWindow = null;
4424
4494
  var lastVersionCheckAt = 0;
4425
4495
  var VERSION_CHECK_INTERVAL_MS = 5 * 60 * 1e3;
4426
4496
  var lastResponsivenessProbeAt = 0;
4427
- var agtCliVersion = true ? "0.27.124" : "dev";
4497
+ var agtCliVersion = true ? "0.27.126" : "dev";
4428
4498
  function resolveBrewPath(execFileSync4) {
4429
4499
  try {
4430
4500
  const out = execFileSync4("which", ["brew"], { timeout: 5e3 }).toString().trim();
@@ -5617,7 +5687,7 @@ async function pollCycle() {
5617
5687
  }
5618
5688
  try {
5619
5689
  const { detectHostSecurity } = await import("../host-security-6PDFG7F5.js");
5620
- const { collectDiagnostics } = await import("../persistent-session-YWGDREIZ.js");
5690
+ const { collectDiagnostics } = await import("../persistent-session-PJQZYG2L.js");
5621
5691
  const diagCodeNames = [...agentState.persistentSessionAgents];
5622
5692
  const agentDiagnostics = diagCodeNames.length > 0 ? collectDiagnostics(diagCodeNames) : void 0;
5623
5693
  let tailscaleHostname;
@@ -5704,12 +5774,12 @@ async function pollCycle() {
5704
5774
  const {
5705
5775
  collectResponsivenessProbes,
5706
5776
  getResponsivenessIntervalMs
5707
- } = await import("../responsiveness-probe-6YYCQAFI.js");
5777
+ } = await import("../responsiveness-probe-MGMZQSP7.js");
5708
5778
  const probeIntervalMs = getResponsivenessIntervalMs();
5709
5779
  if (now - lastResponsivenessProbeAt > probeIntervalMs) {
5710
5780
  const probeCodeNames = [...agentState.persistentSessionAgents];
5711
5781
  if (probeCodeNames.length > 0) {
5712
- const { takeAcpxExecFailureCount, creditAcpxExecFailureCount } = await import("../persistent-session-YWGDREIZ.js");
5782
+ const { takeAcpxExecFailureCount, creditAcpxExecFailureCount } = await import("../persistent-session-PJQZYG2L.js");
5713
5783
  const drainedGiveUps = /* @__PURE__ */ new Map();
5714
5784
  const drainedAcpxFailures = /* @__PURE__ */ new Map();
5715
5785
  const probes = collectResponsivenessProbes(probeCodeNames).map((p) => {
@@ -5743,7 +5813,7 @@ async function pollCycle() {
5743
5813
  collectResponsivenessProbes,
5744
5814
  livePendingInboundOldestAgeSeconds,
5745
5815
  deadLetterPendingInbound
5746
- } = await import("../responsiveness-probe-6YYCQAFI.js");
5816
+ } = await import("../responsiveness-probe-MGMZQSP7.js");
5747
5817
  const wedgeNow = /* @__PURE__ */ new Date();
5748
5818
  const liveAgents = agentState.persistentSessionAgents;
5749
5819
  for (const tracked of consecutiveWedgeCycles.keys()) {
@@ -6092,6 +6162,7 @@ async function pollCycle() {
6092
6162
  log(`[poll-backoff] recovered after ${consecutivePollFailures} failure(s), resuming normal interval`);
6093
6163
  consecutivePollFailures = 0;
6094
6164
  }
6165
+ verifyPendingRestarts(Date.now());
6095
6166
  send({ type: "state-update", state: state5 });
6096
6167
  } catch (err) {
6097
6168
  state5.errorCount++;
@@ -7550,6 +7621,12 @@ async function processAgent(agent, agentStates) {
7550
7621
  `[mcp-presence-reaper] failed to record restart event for '${codeName}' (ENG-5286): ${err.message} \u2014 local restart still proceeded; CloudWatch metric will under-count this event`
7551
7622
  );
7552
7623
  });
7624
+ void reportReaperEvent(
7625
+ agent.agent_id,
7626
+ "mcp-presence-reaper",
7627
+ "restart",
7628
+ givenUpKeys.length > 0 ? `restarted session; MCP server(s) still down after restart budget: ${givenUpKeys.join(", ")}` : `restarted session to recover MCP server(s): ${activeKeys.join(", ")}`
7629
+ );
7553
7630
  },
7554
7631
  // ENG-5932: classify each declared key for the quarantine path. Reads a
7555
7632
  // server-declared criticality field off the .mcp.json entry when
@@ -7580,6 +7657,12 @@ async function processAgent(agent, agentStates) {
7580
7657
  log(
7581
7658
  `[channel-quarantine] persisted quarantine marker for '${codeName}:${serverKey}' \u2014 will be dropped from .mcp.json on next provisioning poll (ENG-5932)`
7582
7659
  );
7660
+ void reportReaperEvent(
7661
+ agent.agent_id,
7662
+ "mcp-presence-reaper",
7663
+ "quarantine",
7664
+ `optional channel '${serverKey}' dead past restart budget + dwell \u2014 quarantined`
7665
+ );
7583
7666
  }
7584
7667
  }
7585
7668
  });
@@ -10227,7 +10310,7 @@ async function processClaudePairSessions(agents) {
10227
10310
  killPairSession,
10228
10311
  pairTmuxSession,
10229
10312
  finalizeClaudePairOnboarding
10230
- } = await import("../claude-pair-runtime-ERGB26MZ.js");
10313
+ } = await import("../claude-pair-runtime-QNOWFDJ7.js");
10231
10314
  for (const pairId of pendingResp.cancelled_pair_ids ?? []) {
10232
10315
  log(`[claude-pair] sweeping orphan tmux session for pair ${pairId.slice(0, 8)}`);
10233
10316
  const killed = await killPairSession(pairTmuxSession(pairId));