@integrity-labs/agt-cli 0.27.145 → 0.27.146

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -18,7 +18,7 @@ import {
18
18
  provisionStopHook,
19
19
  requireHost,
20
20
  safeWriteJsonAtomic
21
- } from "../chunk-4UKWARE3.js";
21
+ } from "../chunk-3LRQ45BZ.js";
22
22
  import {
23
23
  getProjectDir as getProjectDir2,
24
24
  getReadyTasks,
@@ -37,10 +37,13 @@ import {
37
37
  getSessionState,
38
38
  injectMessage,
39
39
  injectMessageWithStatus,
40
+ isAgentIdle,
40
41
  isAgentPromptReady,
41
42
  isSessionHealthy,
43
+ isStaleForToday,
42
44
  paneLogPath,
43
45
  parseEnvIntegrations,
46
+ peekCurrentSession,
44
47
  prepareForRespawn,
45
48
  probeMcpEnvSubstitution,
46
49
  readPaneLogTail,
@@ -49,12 +52,14 @@ import {
49
52
  rotateSessionForWedge,
50
53
  sanitizeMcpJson,
51
54
  sendToAgent,
55
+ sessionTranscriptDir,
52
56
  startPersistentSession,
53
57
  stopAllSessionsAndWait,
54
58
  stopPersistentSession,
55
59
  takeWatchdogGiveUpCount,
56
- takeZombieDetection
57
- } from "../chunk-7GKJZBTB.js";
60
+ takeZombieDetection,
61
+ transcriptActivityAgeSeconds
62
+ } from "../chunk-J2HPXKP5.js";
58
63
  import {
59
64
  KANBAN_CHECK_COMMAND,
60
65
  MAX_AVATAR_ENV_URL_BYTES,
@@ -84,12 +89,6 @@ import {
84
89
  worseConnectivityOutcome,
85
90
  wrapScheduledTaskPrompt
86
91
  } from "../chunk-WOOYOAPG.js";
87
- import {
88
- isAgentIdle,
89
- isStaleForToday,
90
- peekCurrentSession,
91
- sessionTranscriptDir
92
- } from "../chunk-354FAVQR.js";
93
92
  import {
94
93
  parsePsRows,
95
94
  reapOrphanChannelMcps
@@ -1515,6 +1514,7 @@ function isUrgentUpgrade(opts) {
1515
1514
  }
1516
1515
  var RESTART_IDLE_THRESHOLD_SECONDS = 120;
1517
1516
  var RESTART_INBOUND_QUIET_SECONDS = 300;
1517
+ var RESTART_TRANSCRIPT_STALE_SECONDS = 60;
1518
1518
  var GATEABLE_RESTART_REASONS = /* @__PURE__ */ new Set([
1519
1519
  "model-change",
1520
1520
  "channel-set-change",
@@ -1533,9 +1533,11 @@ function decideRestartGate(opts) {
1533
1533
  }
1534
1534
  const paneThreshold = opts.idleThresholdSeconds ?? RESTART_IDLE_THRESHOLD_SECONDS;
1535
1535
  const inboundThreshold = opts.inboundQuietSeconds ?? RESTART_INBOUND_QUIET_SECONDS;
1536
- const paneBusy = opts.paneLogAgeSeconds !== null && opts.paneLogAgeSeconds < paneThreshold;
1536
+ const transcriptThreshold = opts.transcriptStaleSeconds ?? RESTART_TRANSCRIPT_STALE_SECONDS;
1537
+ const transcriptAge = opts.transcriptAgeSeconds ?? null;
1538
+ const progressBusy = transcriptAge !== null ? transcriptAge < transcriptThreshold : opts.paneLogAgeSeconds !== null && opts.paneLogAgeSeconds < paneThreshold;
1537
1539
  const inboundBusy = opts.inboundAgeSeconds !== null && opts.inboundAgeSeconds < inboundThreshold;
1538
- if (paneBusy || inboundBusy) return "defer-idle";
1540
+ if (progressBusy || inboundBusy) return "defer-idle";
1539
1541
  return "proceed";
1540
1542
  }
1541
1543
 
@@ -3377,13 +3379,18 @@ function clearAgentState(agentId, codeName) {
3377
3379
  // src/lib/wedge-detection.ts
3378
3380
  var DEFAULTS = {
3379
3381
  inboundWaitSeconds: 120,
3380
- // ENG-6238: the hard cap is now an ABSOLUTE BACKSTOP, not the primary
3381
- // discriminator — a still-producing session (e.g. a runaway loop) is only
3382
- // reaped here. Raised 300→1200 because the soft path now reliably catches
3383
- // the frozen-turn wedge via the transcript signal, so the hard cap no longer
3384
- // needs to fire early (which is exactly what false-killed kylie's long
3385
- // legitimate turns, ENG-6238).
3386
- inboundHardWaitSeconds: 1200,
3382
+ // ENG-6264: DISABLED by default (0). A session that's actively producing
3383
+ // tokens is never force-respawned — a working agent must not be killed just
3384
+ // because a message has been queued behind its turn, no matter how long.
3385
+ // ENG-6238 made this an absolute backstop (1200s) to still catch a
3386
+ // producing-but-never-draining runaway loop, but that re-introduced the exact
3387
+ // failure we set out to kill: cutting off real work on a long turn. Runaway
3388
+ // token burn is owned by the cost guardrail (ENG-5556); a producing-but-silent
3389
+ // loop still trips the synthetic-probe alarm. So the backstop is now opt-in:
3390
+ // set AGT_WEDGE_INBOUND_HARD_WAIT_SECONDS to a positive value to re-enable it
3391
+ // (floored at inboundWaitSeconds). 0 = the frozen/hung wedge (transcript
3392
+ // static) is still caught by the soft path; only the *producing* path is spared.
3393
+ inboundHardWaitSeconds: 0,
3387
3394
  paneStaleSeconds: 120,
3388
3395
  transcriptStaleSeconds: 60,
3389
3396
  minCycles: 3
@@ -3402,10 +3409,12 @@ function resolveWedgeConfig(env = process.env) {
3402
3409
  DEFAULTS.inboundWaitSeconds,
3403
3410
  30
3404
3411
  );
3405
- const inboundHardWaitSeconds = Math.max(
3406
- inboundWaitSeconds,
3407
- parsePositiveInt(env.AGT_WEDGE_INBOUND_HARD_WAIT_SECONDS, DEFAULTS.inboundHardWaitSeconds, 30)
3412
+ const inboundHardWaitRaw = parsePositiveInt(
3413
+ env.AGT_WEDGE_INBOUND_HARD_WAIT_SECONDS,
3414
+ DEFAULTS.inboundHardWaitSeconds,
3415
+ 0
3408
3416
  );
3417
+ const inboundHardWaitSeconds = inboundHardWaitRaw <= 0 ? 0 : Math.max(inboundWaitSeconds, inboundHardWaitRaw);
3409
3418
  return {
3410
3419
  mode: parseMode(env.AGT_WEDGE_RESTART_MODE),
3411
3420
  inboundWaitSeconds,
@@ -3430,6 +3439,7 @@ function isWedgeCandidateCycle(signals, config2) {
3430
3439
  if (inboundAge === null) return false;
3431
3440
  if (inboundAge < config2.inboundWaitSeconds) return false;
3432
3441
  if (isSessionProducing(signals, config2)) {
3442
+ if (config2.inboundHardWaitSeconds <= 0) return false;
3433
3443
  return inboundAge >= config2.inboundHardWaitSeconds;
3434
3444
  }
3435
3445
  return true;
@@ -4417,11 +4427,16 @@ function paneLogAgeSecondsFor(codeName) {
4417
4427
  return 0;
4418
4428
  }
4419
4429
  }
4420
- function restartGateFor(codeName, breakerReason) {
4421
- if (!isGateableRestartReason(breakerReason)) return "bypass";
4430
+ function transcriptAgeSecondsFor(codeName) {
4431
+ const sessionId = getSessionState(codeName)?.currentSessionId ?? null;
4432
+ return transcriptActivityAgeSeconds(getProjectDir2(codeName), sessionId, /* @__PURE__ */ new Date());
4433
+ }
4434
+ function restartGateFor(codeName, reason) {
4435
+ if (!isGateableRestartReason(reason)) return "bypass";
4422
4436
  return decideRestartGate({
4423
4437
  window: cachedMaintenanceWindow,
4424
4438
  paneLogAgeSeconds: paneLogAgeSecondsFor(codeName),
4439
+ transcriptAgeSeconds: transcriptAgeSecondsFor(codeName),
4425
4440
  inboundAgeSeconds: inboundAgeSecondsFor(codeName),
4426
4441
  now: /* @__PURE__ */ new Date()
4427
4442
  });
@@ -4580,10 +4595,10 @@ async function runAgentConnectivityProbes(agent, integrations, projectDir) {
4580
4595
  );
4581
4596
  }
4582
4597
  }
4583
- function stopPersistentSessionAndForgetMcpBaseline(codeName, breakerReason) {
4584
- const gate = restartGateFor(codeName, breakerReason);
4598
+ function stopPersistentSessionAndForgetMcpBaseline(codeName, breakerReason, gateReason = breakerReason) {
4599
+ const gate = restartGateFor(codeName, gateReason);
4585
4600
  if (gate !== "bypass" && gate !== "proceed") {
4586
- log(`[maintenance-window] Deferring '${breakerReason}' restart for '${codeName}' (${gate})`);
4601
+ log(`[maintenance-window] Deferring '${gateReason}' restart for '${codeName}' (${gate})`);
4587
4602
  return;
4588
4603
  }
4589
4604
  cancelPendingSessionRestart(codeName);
@@ -4760,7 +4775,7 @@ var cachedMaintenanceWindow = null;
4760
4775
  var lastVersionCheckAt = 0;
4761
4776
  var VERSION_CHECK_INTERVAL_MS = 5 * 60 * 1e3;
4762
4777
  var lastResponsivenessProbeAt = 0;
4763
- var agtCliVersion = true ? "0.27.145" : "dev";
4778
+ var agtCliVersion = true ? "0.27.146" : "dev";
4764
4779
  function resolveBrewPath(execFileSync4) {
4765
4780
  try {
4766
4781
  const out = execFileSync4("which", ["brew"], { timeout: 5e3 }).toString().trim();
@@ -5958,7 +5973,7 @@ async function pollCycle() {
5958
5973
  }
5959
5974
  try {
5960
5975
  const { detectHostSecurity } = await import("../host-security-6PDFG7F5.js");
5961
- const { collectDiagnostics } = await import("../persistent-session-35PWSTLO.js");
5976
+ const { collectDiagnostics } = await import("../persistent-session-EUOWPJPS.js");
5962
5977
  const diagCodeNames = [...agentState.persistentSessionAgents];
5963
5978
  const agentDiagnostics = diagCodeNames.length > 0 ? collectDiagnostics(diagCodeNames) : void 0;
5964
5979
  let tailscaleHostname;
@@ -6045,12 +6060,12 @@ async function pollCycle() {
6045
6060
  const {
6046
6061
  collectResponsivenessProbes,
6047
6062
  getResponsivenessIntervalMs
6048
- } = await import("../responsiveness-probe-MA4M2QM4.js");
6063
+ } = await import("../responsiveness-probe-IU3ALQYB.js");
6049
6064
  const probeIntervalMs = getResponsivenessIntervalMs();
6050
6065
  if (now - lastResponsivenessProbeAt > probeIntervalMs) {
6051
6066
  const probeCodeNames = [...agentState.persistentSessionAgents];
6052
6067
  if (probeCodeNames.length > 0) {
6053
- const { takeAcpxExecFailureCount, creditAcpxExecFailureCount } = await import("../persistent-session-35PWSTLO.js");
6068
+ const { takeAcpxExecFailureCount, creditAcpxExecFailureCount } = await import("../persistent-session-EUOWPJPS.js");
6054
6069
  const drainedGiveUps = /* @__PURE__ */ new Map();
6055
6070
  const drainedAcpxFailures = /* @__PURE__ */ new Map();
6056
6071
  const probes = collectResponsivenessProbes(probeCodeNames).map((p) => {
@@ -6084,8 +6099,7 @@ async function pollCycle() {
6084
6099
  collectResponsivenessProbes,
6085
6100
  livePendingInboundOldestAgeSeconds,
6086
6101
  deadLetterPendingInbound
6087
- } = await import("../responsiveness-probe-MA4M2QM4.js");
6088
- const { transcriptActivityAgeSeconds } = await import("../daily-session-PNQX5URX.js");
6102
+ } = await import("../responsiveness-probe-IU3ALQYB.js");
6089
6103
  const { getProjectDir: wedgeProjectDir } = await import("../claude-scheduler-FATCLHDM.js");
6090
6104
  const wedgeNow = /* @__PURE__ */ new Date();
6091
6105
  const liveAgents = agentState.persistentSessionAgents;
@@ -7868,7 +7882,19 @@ async function processAgent(agent, agentStates) {
7868
7882
  // isolated and the agent keeps running degraded instead of being
7869
7883
  // paused wholesale. reaperRestartBreakerReason() encodes that
7870
7884
  // single-vs-multi decision; undefined means "restart, don't count".
7871
- stopSession: (codeName, ctx) => stopPersistentSessionAndForgetMcpBaseline(codeName, reaperRestartBreakerReason(ctx.activeKeys)),
7885
+ //
7886
+ // ENG-6264: the breaker-count reason (above, undefined for a single dead
7887
+ // MCP) is decoupled from the GATE reason. Pre-6264 an undefined breaker
7888
+ // reason also made the restart non-gateable → 'bypass' → the session was
7889
+ // torn down mid-turn (the common single-MCP case interrupted busy
7890
+ // agents). Always pass 'mcp-presence-reaper' as the gate reason so the
7891
+ // restart defers-until-idle, while breakerReason still governs whether it
7892
+ // counts against the breaker.
7893
+ stopSession: (codeName, ctx) => stopPersistentSessionAndForgetMcpBaseline(
7894
+ codeName,
7895
+ reaperRestartBreakerReason(ctx.activeKeys),
7896
+ "mcp-presence-reaper"
7897
+ ),
7872
7898
  // ENG-5292: when the reaper gives up on a managed MCP (cap from
7873
7899
  // ENG-5279 + state-preservation from ENG-5285 both said "this
7874
7900
  // MCP keeps failing after 3 restart cycles"), mark the matching
@@ -10615,7 +10641,7 @@ async function processClaudePairSessions(agents) {
10615
10641
  killPairSession,
10616
10642
  pairTmuxSession,
10617
10643
  finalizeClaudePairOnboarding
10618
- } = await import("../claude-pair-runtime-GIUCD7IG.js");
10644
+ } = await import("../claude-pair-runtime-EZ4HC6D7.js");
10619
10645
  for (const pairId of pendingResp.cancelled_pair_ids ?? []) {
10620
10646
  log(`[claude-pair] sweeping orphan tmux session for pair ${pairId.slice(0, 8)}`);
10621
10647
  const killed = await killPairSession(pairTmuxSession(pairId));