@integrity-labs/agt-cli 0.28.5 → 0.28.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/bin/agt.js CHANGED
@@ -33,7 +33,7 @@ import {
33
33
  success,
34
34
  table,
35
35
  warn
36
- } from "../chunk-4DTVE2UF.js";
36
+ } from "../chunk-BEHCN5RZ.js";
37
37
  import {
38
38
  CHANNEL_REGISTRY,
39
39
  DEPLOYMENT_TEMPLATES,
@@ -4773,7 +4773,7 @@ import { execFileSync, execSync } from "child_process";
4773
4773
  import { existsSync as existsSync10, realpathSync as realpathSync2 } from "fs";
4774
4774
  import chalk18 from "chalk";
4775
4775
  import ora16 from "ora";
4776
- var cliVersion = true ? "0.28.5" : "dev";
4776
+ var cliVersion = true ? "0.28.6" : "dev";
4777
4777
  async function fetchLatestVersion() {
4778
4778
  const host2 = getHost();
4779
4779
  if (!host2) return null;
@@ -5696,7 +5696,7 @@ function handleError(err) {
5696
5696
  }
5697
5697
 
5698
5698
  // src/bin/agt.ts
5699
- var cliVersion2 = true ? "0.28.5" : "dev";
5699
+ var cliVersion2 = true ? "0.28.6" : "dev";
5700
5700
  var program = new Command();
5701
5701
  program.name("agt").description("Augmented CLI \u2014 agent provisioning and management").version(cliVersion2).option("--json", "Emit machine-readable JSON output (suppress spinners and colors)").option("--skip-update-check", "Skip the automatic update check on startup");
5702
5702
  program.hook("preAction", async (thisCommand, actionCommand) => {
@@ -8275,4 +8275,4 @@ export {
8275
8275
  managerInstallSystemUnitCommand,
8276
8276
  managerUninstallSystemUnitCommand
8277
8277
  };
8278
- //# sourceMappingURL=chunk-4DTVE2UF.js.map
8278
+ //# sourceMappingURL=chunk-BEHCN5RZ.js.map
@@ -22,7 +22,7 @@ import {
22
22
  provisionStopHook,
23
23
  requireHost,
24
24
  safeWriteJsonAtomic
25
- } from "../chunk-4DTVE2UF.js";
25
+ } from "../chunk-BEHCN5RZ.js";
26
26
  import {
27
27
  getProjectDir as getProjectDir2,
28
28
  getReadyTasks,
@@ -5079,7 +5079,7 @@ var cachedMaintenanceWindow = null;
5079
5079
  var lastVersionCheckAt = 0;
5080
5080
  var VERSION_CHECK_INTERVAL_MS = 5 * 60 * 1e3;
5081
5081
  var lastResponsivenessProbeAt = 0;
5082
- var agtCliVersion = true ? "0.28.5" : "dev";
5082
+ var agtCliVersion = true ? "0.28.6" : "dev";
5083
5083
  function resolveBrewPath(execFileSync4) {
5084
5084
  try {
5085
5085
  const out = execFileSync4("which", ["brew"], { timeout: 5e3 }).toString().trim();
@@ -6385,7 +6385,7 @@ async function pollCycle() {
6385
6385
  const {
6386
6386
  collectResponsivenessProbes,
6387
6387
  getResponsivenessIntervalMs
6388
- } = await import("../responsiveness-probe-GX5W4ZMI.js");
6388
+ } = await import("../responsiveness-probe-53KDTOUT.js");
6389
6389
  const probeIntervalMs = getResponsivenessIntervalMs();
6390
6390
  if (now - lastResponsivenessProbeAt > probeIntervalMs) {
6391
6391
  const probeCodeNames = [...agentState.persistentSessionAgents];
@@ -6417,7 +6417,7 @@ async function pollCycle() {
6417
6417
  collectResponsivenessProbes,
6418
6418
  livePendingInboundOldestAgeSeconds,
6419
6419
  parkPendingInbound
6420
- } = await import("../responsiveness-probe-GX5W4ZMI.js");
6420
+ } = await import("../responsiveness-probe-53KDTOUT.js");
6421
6421
  const { getProjectDir: wedgeProjectDir } = await import("../claude-scheduler-FATCLHDM.js");
6422
6422
  const wedgeNow = /* @__PURE__ */ new Date();
6423
6423
  const liveAgents = agentState.persistentSessionAgents;
@@ -15810,6 +15810,37 @@ function mergeInboundText(text, blocks) {
15810
15810
  return { content, recoveredTable: rendered.length > 0 && content !== text };
15811
15811
  }
15812
15812
 
15813
+ // src/inbound-dedup.ts
15814
+ var DEFAULT_TTL_MS = 5 * 6e4;
15815
+ var MAX_ENTRIES = 2e3;
15816
+ var delivered = /* @__PURE__ */ new Map();
15817
+ function inboundDedupKey(channel, providerId) {
15818
+ return `${channel}:${providerId}`;
15819
+ }
15820
+ function markInboundDelivered(channel, providerId, opts) {
15821
+ if (!providerId) return false;
15822
+ const now = opts?.now ?? Date.now();
15823
+ const ttl = opts?.ttlMs ?? DEFAULT_TTL_MS;
15824
+ const key2 = inboundDedupKey(channel, providerId);
15825
+ const exp = delivered.get(key2);
15826
+ if (exp !== void 0 && exp > now) {
15827
+ delivered.set(key2, now + ttl);
15828
+ return true;
15829
+ }
15830
+ delivered.set(key2, now + ttl);
15831
+ if (delivered.size > MAX_ENTRIES) {
15832
+ for (const [k, e] of delivered) {
15833
+ if (e <= now) delivered.delete(k);
15834
+ }
15835
+ while (delivered.size > MAX_ENTRIES) {
15836
+ const oldest = delivered.keys().next().value;
15837
+ if (oldest === void 0) break;
15838
+ delivered.delete(oldest);
15839
+ }
15840
+ }
15841
+ return false;
15842
+ }
15843
+
15813
15844
  // src/slack-allowlist-source.ts
15814
15845
  function parseAllowedUsersCsv(raw) {
15815
15846
  return new Set(
@@ -19490,6 +19521,14 @@ async function connectSocketMode() {
19490
19521
  const evt = msg.payload.event;
19491
19522
  if (evt.user === botUserId) return;
19492
19523
  if (evt.type !== "app_mention" && evt.type !== "message") return;
19524
+ if (evt.channel && evt.ts && markInboundDelivered("slack", `${evt.channel}:${evt.ts}`)) {
19525
+ recordChannelDeflection(SLACK_AGENT_DIR, "slack", "duplicate");
19526
+ process.stderr.write(
19527
+ `slack-channel(${AGENT_CODE_NAME}): [channel-duplicate] dropped re-delivery channel=${redactSlackId(evt.channel)} ts=${redactSlackId(evt.ts)} type=${evt.type}
19528
+ `
19529
+ );
19530
+ return;
19531
+ }
19493
19532
  const isBot = Boolean(evt.bot_id);
19494
19533
  const filterDecision = decideSlackMessageForward(evt);
19495
19534
  const senderPolicyDecision = decideSenderPolicyForward(evt, SLACK_SENDER_POLICY);
@@ -16310,6 +16310,37 @@ function recordChannelDeflection(agentDir, channel, cause) {
16310
16310
  }
16311
16311
  }
16312
16312
 
16313
+ // src/inbound-dedup.ts
16314
+ var DEFAULT_TTL_MS = 5 * 6e4;
16315
+ var MAX_ENTRIES = 2e3;
16316
+ var delivered = /* @__PURE__ */ new Map();
16317
+ function inboundDedupKey(channel, providerId) {
16318
+ return `${channel}:${providerId}`;
16319
+ }
16320
+ function markInboundDelivered(channel, providerId, opts) {
16321
+ if (!providerId) return false;
16322
+ const now = opts?.now ?? Date.now();
16323
+ const ttl = opts?.ttlMs ?? DEFAULT_TTL_MS;
16324
+ const key2 = inboundDedupKey(channel, providerId);
16325
+ const exp = delivered.get(key2);
16326
+ if (exp !== void 0 && exp > now) {
16327
+ delivered.set(key2, now + ttl);
16328
+ return true;
16329
+ }
16330
+ delivered.set(key2, now + ttl);
16331
+ if (delivered.size > MAX_ENTRIES) {
16332
+ for (const [k, e] of delivered) {
16333
+ if (e <= now) delivered.delete(k);
16334
+ }
16335
+ while (delivered.size > MAX_ENTRIES) {
16336
+ const oldest = delivered.keys().next().value;
16337
+ if (oldest === void 0) break;
16338
+ delivered.delete(oldest);
16339
+ }
16340
+ }
16341
+ return false;
16342
+ }
16343
+
16313
16344
  // src/telegram-channel.ts
16314
16345
  function redactId(id) {
16315
16346
  return createHash("sha256").update(String(id)).digest("hex").slice(0, 8);
@@ -18441,6 +18472,14 @@ async function pollLoop() {
18441
18472
  const classifiedAttachments = classifyTelegramAttachments(msg);
18442
18473
  const content = typeof msg.text === "string" && msg.text.length > 0 ? msg.text : typeof msg.caption === "string" && msg.caption.length > 0 ? msg.caption : "";
18443
18474
  if (content.length === 0 && classifiedAttachments.length === 0) continue;
18475
+ if (markInboundDelivered("telegram", String(update.update_id))) {
18476
+ recordChannelDeflection(AGENT_DIR, "telegram", "duplicate");
18477
+ process.stderr.write(
18478
+ `telegram-channel(${AGENT_CODE_NAME}): [channel-duplicate] dropped re-delivery update_id=${update.update_id}
18479
+ `
18480
+ );
18481
+ continue;
18482
+ }
18444
18483
  const chatId = String(msg.chat.id);
18445
18484
  const trimmedContent = content.trim();
18446
18485
  const isFromBot = !!msg.from?.is_bot;
@@ -13,6 +13,8 @@ var KNOWN_DEFLECTION_CAUSES = [
13
13
  "session_dead",
14
14
  "wedged",
15
15
  "busy",
16
+ "duplicate",
17
+ // ENG-6270 — a redundant re-delivery dropped at fresh ingress
16
18
  "unknown"
17
19
  ];
18
20
  function readAndResetChannelDeflections(agentHomeDir) {
@@ -246,4 +248,4 @@ export {
246
248
  parkPendingInbound,
247
249
  readAndResetChannelDeflections
248
250
  };
249
- //# sourceMappingURL=responsiveness-probe-GX5W4ZMI.js.map
251
+ //# sourceMappingURL=responsiveness-probe-53KDTOUT.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"sources":["../src/lib/responsiveness-probe.ts"],"sourcesContent":["/**\n * ENG-5399 — Tier 1 responsiveness probe (manager-side).\n *\n * Cheap, fast-cadence canary that catches \"agent went silent\" inside\n * minutes, well before the existing synthetic-probe cron's ~35 min\n * staleness window (`SyntheticReplyAgeSeconds`, ENG-5122).\n *\n * Mechanism: for each managed agent, read the mtime of the agent's\n * `pane.log` and report `now - mtime` as `PaneActivityAgeSeconds` via\n * a new `/host/responsiveness-probe` endpoint. `pane.log` is the\n * tmux pipe-pane sink set up by `setupPaneLog()` — any visible\n * activity (assistant turns, tool calls, in-place progress\n * heartbeats) bumps its mtime. A silent agent has a steadily\n * climbing age that lands in CloudWatch and trips a per-agent alarm.\n *\n * ENG-6017 adds a second per-agent signal on the same cadence:\n * `pending_inbound_oldest_age_seconds` — the age of the oldest marker\n * file across the agent's `*-pending-inbound/` directories (written by\n * the channel MCP servers for inbounds awaiting delivery). This is the\n * one artifact of the \"message typed but never submitted\" failure mode\n * that every other canary is blind to: in the koda incident\n * (2026-06-04) an operator Slack DM sat undelivered for 40+ minutes\n * while pane-activity stayed fresh (health checks), synthetic probes\n * were answered by the one-shot fallback, and heartbeat/session-alive\n * only reflect manager health. The field is OMITTED (not zero) when the\n * agent has no pending-inbound markers — the API treats absent as\n * \"no signal\", never as \"healthy\" (absent-vs-zero matters for\n * mixed-version fleets where old CLIs don't report it at all).\n *\n * Run from `pollCycle()` in `manager-worker.ts` on a configurable\n * interval (default 5 min via `AUGMENTED_RESPONSIVENESS_INTERVAL_MS`).\n */\n\nimport { mkdirSync, readdirSync, readFileSync, renameSync, statSync, unlinkSync } from 'node:fs';\nimport { dirname, join } from 'node:path';\nimport { paneLogPath } from './persistent-session.js';\n\nexport interface ResponsivenessProbeResult {\n code_name: string;\n pane_activity_age_seconds: number;\n /**\n * ENG-6017: age (s) of the oldest marker file across the agent's\n * `*-pending-inbound/` directories. Omitted when no markers exist —\n * absent means \"no signal\", NOT \"zero / healthy\".\n */\n pending_inbound_oldest_age_seconds?: number;\n /**\n * ENG-6327: per-cause inbound deflection counts since the last probe (read +\n * RESET from the channel servers' `<channel>-deflections.json` counter files).\n * Omitted when there were none. Keyed by DeflectionCause.\n */\n deflections?: Record<string, number>;\n}\n\n/**\n * ENG-6327 — per-channel deflection counter file-layout contract. The channel\n * MCP servers increment `<channel>-deflections.json` in the agent home (via\n * recordChannelDeflection in packages/mcp/ack-reaction.ts); apps/cli does not\n * depend on @integrity-labs/mcp, so the manager re-implements the read against\n * this contract — the same read-only, no-IPC posture as the `*-pending-inbound`\n * scan above. Body is a JSON object keyed by cause with integer counts.\n */\nconst DEFLECTION_COUNTER_SUFFIX = '-deflections.json';\nconst KNOWN_DEFLECTION_CAUSES = [\n 'integration_down',\n 'session_dead',\n 'wedged',\n 'busy',\n 'duplicate', // ENG-6270 — a redundant re-delivery dropped at fresh ingress\n 'unknown',\n] as const;\n\n/**\n * Read and RESET every `<channel>-deflections.json` in an agent home, returning\n * the summed counts by cause (or null when there are none). Consumes via atomic\n * rename-then-read-then-unlink so an increment racing the reset is carried into\n * the next window rather than lost. Only known causes are accumulated so a\n * corrupt file can't smuggle an unbounded dimension into CloudWatch.\n *\n * Reset-on-read is required: these are deltas-since-last-consume, so the metric\n * would over-count if they weren't cleared each cycle. The trade-off is that a\n * failed POST loses that cycle's counts (under-count) — acceptable for a\n * frequency metric and consistent with the other best-effort probe siblings;\n * unlike the in-memory give-up counter, file-based counts aren't re-credited\n * on POST failure.\n */\nexport function readAndResetChannelDeflections(\n agentHomeDir: string,\n): Record<string, number> | null {\n let names: string[];\n try {\n names = readdirSync(agentHomeDir);\n } catch {\n return null;\n }\n const total: Record<string, number> = {};\n let any = false;\n for (const name of names) {\n if (!name.endsWith(DEFLECTION_COUNTER_SUFFIX)) continue;\n const full = join(agentHomeDir, name);\n const consuming = `${full}.consuming`;\n try {\n renameSync(full, consuming); // atomic; replaces any stale .consuming from a crashed cycle\n } catch {\n continue; // vanished / not a regular file — skip\n }\n try {\n const parsed = JSON.parse(readFileSync(consuming, 'utf8')) as Record<string, unknown>;\n if (parsed && typeof parsed === 'object') {\n for (const cause of KNOWN_DEFLECTION_CAUSES) {\n const n = parsed[cause];\n if (typeof n === 'number' && Number.isFinite(n) && n > 0) {\n total[cause] = (total[cause] ?? 0) + Math.floor(n);\n any = true;\n }\n }\n }\n } catch {\n /* corrupt — drop it */\n }\n try {\n unlinkSync(consuming);\n } catch {\n /* non-fatal */\n }\n }\n return any ? total : null;\n}\n\nconst DEFAULT_INTERVAL_MS = 5 * 60 * 1000;\n\nexport function getResponsivenessIntervalMs(): number {\n const raw = process.env.AUGMENTED_RESPONSIVENESS_INTERVAL_MS;\n if (!raw) return DEFAULT_INTERVAL_MS;\n const parsed = Number.parseInt(raw, 10);\n return Number.isFinite(parsed) && parsed > 0 ? parsed : DEFAULT_INTERVAL_MS;\n}\n\n/**\n * ENG-6017: oldest pending-inbound marker mtime (ms epoch) for an agent,\n * or null when the agent has no markers / no pending-inbound dirs.\n *\n * The channel MCP servers (slack-channel, telegram-channel, …) write one\n * marker file per inbound into `~/.augmented/<codeName>/<channel>-pending-\n * inbound/` and clear it when the agent acknowledges the message. The\n * directory layout is the contract here — read-only, no IPC with the MCP\n * (the MCP and CLI release independently; file mtimes need no protocol).\n *\n * ENG-6072: only plain, non-hidden files count as markers. The msteams MCP\n * keeps `.markers/` and `.processed/` housekeeping SUBDIRECTORIES inside its\n * pending-inbound dir; their mtimes never advance, so statting every dirent\n * made the gauge climb forever and fired pending-inbound-stale on agents with\n * zero stranded messages (kylie ~3.4d / scout ~34h false ALARMs the moment\n * ENG-6023 activated the alarm). Dot-entries are skipped wholesale — the\n * hidden namespace is reserved for MCP bookkeeping, never for markers.\n */\nfunction oldestPendingInboundMtimeMs(agentHomeDir: string): number | null {\n let oldest: number | null = null;\n let entries;\n try {\n entries = readdirSync(agentHomeDir, { withFileTypes: true });\n } catch {\n return null; // agent home missing — nothing to report\n }\n for (const entry of entries) {\n if (!entry.isDirectory() || !entry.name.endsWith('-pending-inbound')) continue;\n const dir = join(agentHomeDir, entry.name);\n let files;\n try {\n files = readdirSync(dir, { withFileTypes: true });\n } catch {\n continue;\n }\n for (const file of files) {\n if (!file.isFile() || file.name.startsWith('.')) continue;\n try {\n const mtimeMs = statSync(join(dir, file.name)).mtimeMs;\n if (oldest === null || mtimeMs < oldest) oldest = mtimeMs;\n } catch {\n // Marker drained between readdir and stat — that's the happy path.\n }\n }\n }\n return oldest;\n}\n\n/**\n * ENG-6160 / ENG-6319: read a marker's classification flags.\n * - `{...}` → parsed flags.\n * - `null` → vanished mid-scan (ENOENT) — drained between stat and\n * read, the happy path; callers exclude it.\n * - `'malformed'` → present but unreadable for another reason. The live\n * scan treats this as LIVE (a corrupt marker can never\n * mask a real wedge); park leaves it in place (a corrupt\n * marker must never become a dropped message).\n */\ninterface MarkerFlags {\n /** ENG-5846: dead-lettered by the channel (⏳-noticed, never drainable). */\n undeliverable: boolean;\n /**\n * ENG-6319: auto-followed participant-thread inbound the agent may\n * legitimately skip. Excluded from the wedge live-scan (a deliberate skip\n * must not read as \"failing to drain\" and force-respawn a healthy\n * session) but still parked across respawns — it may be a real\n * operator message awaiting a reply.\n */\n discretionary: boolean;\n}\n\nfunction readMarkerFlags(markerPath: string): MarkerFlags | null | 'malformed' {\n try {\n const parsed = JSON.parse(readFileSync(markerPath, 'utf8')) as {\n undeliverable?: unknown;\n discretionary?: unknown;\n };\n return {\n undeliverable: parsed?.undeliverable === true,\n discretionary: parsed?.discretionary === true,\n };\n } catch (error) {\n return (error as NodeJS.ErrnoException).code === 'ENOENT' ? null : 'malformed';\n }\n}\n\n/**\n * ENG-6160: oldest *LIVE* pending-inbound marker mtime (ms epoch) for an agent,\n * or null when there is no live marker. \"Live\" excludes:\n *\n * - markers older than `sessionStartMs` — a marker written before the current\n * session started is a leftover from a PREVIOUS session and cannot mean\n * *this* session is failing to drain. This is the load-bearing exclusion:\n * without it, an orphan marker survives a fresh respawn and the wedge\n * detector re-fires forever on a healthy idle agent (the sherlock enforce\n * loop, 2026-06-08: `inboundAge=3389s` on a `● Ready.` session).\n * - markers flagged `undeliverable: true` — already dead-lettered by the channel.\n * - markers flagged `discretionary: true` (ENG-6319) — skippable auto-followed\n * participant-thread inbound; a deliberate skip must not force-respawn a\n * healthy session.\n *\n * Distinct from `oldestPendingInboundMtimeMs` (which counts ALL markers and\n * feeds the ENG-6017 `pending-inbound-stale` CloudWatch alarm — that alarm\n * *wants* to fire on a stuck inbound, so its semantics must NOT change). This\n * variant is wedge-detection-only.\n */\nexport function oldestLivePendingInboundMtimeMs(\n agentHomeDir: string,\n opts: { sessionStartMs?: number | null } = {},\n): number | null {\n const sessionStartMs = opts.sessionStartMs ?? null;\n let oldest: number | null = null;\n let entries;\n try {\n entries = readdirSync(agentHomeDir, { withFileTypes: true });\n } catch {\n return null;\n }\n for (const entry of entries) {\n if (!entry.isDirectory() || !entry.name.endsWith('-pending-inbound')) continue;\n const dir = join(agentHomeDir, entry.name);\n let files;\n try {\n files = readdirSync(dir, { withFileTypes: true });\n } catch {\n continue;\n }\n for (const file of files) {\n if (!file.isFile() || file.name.startsWith('.')) continue;\n const full = join(dir, file.name);\n let mtimeMs: number;\n try {\n mtimeMs = statSync(full).mtimeMs;\n } catch {\n continue; // drained between readdir and stat — happy path\n }\n if (sessionStartMs !== null && mtimeMs < sessionStartMs) continue; // pre-session leftover\n const flags = readMarkerFlags(full);\n if (flags === null) continue; // vanished between stat and read — drained, exclude\n if (flags !== 'malformed' && (flags.undeliverable || flags.discretionary)) continue;\n if (oldest === null || mtimeMs < oldest) oldest = mtimeMs;\n }\n }\n return oldest;\n}\n\n/**\n * ENG-6160: age (s) of the oldest LIVE pending-inbound marker for an agent, or\n * null when none. The wedge detector uses this instead of the alarm-facing\n * `pending_inbound_oldest_age_seconds` so a stale/dead-letter marker can't\n * false-fire a respawn.\n */\nexport function livePendingInboundOldestAgeSeconds(\n codeName: string,\n sessionStartMs: number | null,\n now: Date = new Date(),\n): number | null {\n const oldest = oldestLivePendingInboundMtimeMs(dirname(paneLogPath(codeName)), { sessionStartMs });\n if (oldest === null) return null;\n return Math.max(0, Math.floor((now.getTime() - oldest) / 1000));\n}\n\n/**\n * ENG-6160: move every pending-inbound marker for an agent aside into a sibling\n * `<channel>-pending-inbound-stale/` directory (NOT silently deleted — the\n * payload pointer is preserved for forensics), returning the count moved.\n *\n * ENG-6289: no longer called on wedge respawn — the wedge path parks instead\n * (see `parkPendingInbound` above; blanket dead-letter permanently dropped the\n * user's message). Kept as the explicit \"move everything aside\" seam for tests\n * and operator emergencies. The stale dir does not end in `-pending-inbound`,\n * so neither the probe nor this scan re-counts moved markers.\n */\n/**\n * ENG-6289: park-not-drop. On a force-fresh wedge respawn, KEEP undrained\n * pending-inbound markers in their live dirs instead of dead-lettering them —\n * the fresh session's orient hook surfaces them (\"N queued messages\" + details)\n * and ENG-5969 replay (when enabled) re-pushes their payloads. Markers are NOT\n * rewritten (no counter, no mtime bump): the ENG-6160 pre-session exclusion in\n * `oldestLivePendingInboundMtimeMs` already keeps an untouched parked marker\n * out of the fresh session's wedge signal, and re-delivery stays bounded by\n * the existing machinery — the channel-side `replay_count` cap (≤3) and the\n * orphan sweep's received_at TTL. A manager-side rewrite would be the first\n * cross-process writer into marker files, where a torn read in the channel\n * sweep DELETES the marker (`unlinkSync` on parse failure) — the exact drop\n * this function exists to prevent.\n *\n * Only markers already flagged `undeliverable: true` are dead-lettered (moved\n * to `-stale`) — the channel already gave the user the ⏳ notice for those, so\n * nothing can ever drain them. Malformed markers are LEFT IN PLACE, matching\n * the live-scan philosophy above (a corrupt marker must never mask — or\n * become — a dropped message).\n *\n * The msteams dir is skipped wholesale: its top-level files are raw Bot\n * Framework activity payloads (the transport queue, not bookkeeping — real\n * markers live in the hidden `.markers/` subdir this scan never touches), and\n * the teams channel server's boot drain redelivers them to the fresh session\n * on its own. Moving them aside (what the pre-ENG-6289 dead-letter did) was\n * actively defeating the one channel with native respawn recovery.\n */\nexport interface ParkPendingInboundResult {\n parked: number;\n deadLettered: number;\n}\n\n/**\n * Move one marker into the sibling `-stale` dead-letter dir (moved, not\n * deleted — preserved for forensics). Returns true on success; best-effort —\n * a marker that vanished or can't move is left as-is.\n */\nfunction moveMarkerToStale(dir: string, deadDir: string, name: string): boolean {\n try {\n mkdirSync(deadDir, { recursive: true });\n renameSync(join(dir, name), join(deadDir, name));\n return true;\n } catch {\n return false;\n }\n}\n\nexport function parkPendingInbound(codeName: string, _now: Date = new Date()): ParkPendingInboundResult {\n const home = dirname(paneLogPath(codeName));\n const result: ParkPendingInboundResult = { parked: 0, deadLettered: 0 };\n let entries;\n try {\n entries = readdirSync(home, { withFileTypes: true });\n } catch {\n return result;\n }\n for (const entry of entries) {\n if (!entry.isDirectory() || !entry.name.endsWith('-pending-inbound')) continue;\n if (entry.name === 'msteams-pending-inbound') continue; // transport queue — boot drain owns recovery\n const dir = join(home, entry.name);\n const deadDir = join(home, `${entry.name}-stale`);\n let files;\n try {\n files = readdirSync(dir, { withFileTypes: true });\n } catch {\n continue;\n }\n for (const file of files) {\n if (!file.isFile() || file.name.startsWith('.')) continue;\n const flags = readMarkerFlags(join(dir, file.name));\n if (flags === null) continue; // drained mid-scan — already gone\n // Only undeliverable markers dead-letter. Discretionary markers\n // (ENG-6319) PARK like engaged ones — they may be a real operator\n // message awaiting a reply (the live silent-loss class); malformed\n // markers park too (corrupt must never become a drop).\n if (flags !== 'malformed' && flags.undeliverable) {\n if (moveMarkerToStale(dir, deadDir, file.name)) result.deadLettered++;\n } else {\n result.parked++;\n }\n }\n }\n return result;\n}\n\nexport function deadLetterPendingInbound(codeName: string, _now: Date = new Date()): number {\n const home = dirname(paneLogPath(codeName));\n let moved = 0;\n let entries;\n try {\n entries = readdirSync(home, { withFileTypes: true });\n } catch {\n return 0;\n }\n for (const entry of entries) {\n if (!entry.isDirectory() || !entry.name.endsWith('-pending-inbound')) continue;\n const dir = join(home, entry.name);\n const deadDir = join(home, `${entry.name}-stale`);\n let files;\n try {\n files = readdirSync(dir, { withFileTypes: true });\n } catch {\n continue;\n }\n for (const file of files) {\n if (!file.isFile() || file.name.startsWith('.')) continue;\n if (moveMarkerToStale(dir, deadDir, file.name)) moved++;\n }\n }\n return moved;\n}\n\n/**\n * Compute the pane.log age for each agent. Missing or unreadable\n * pane.log returns null — the caller should drop those entries\n * rather than fabricate a \"fresh\" or \"ancient\" value. A missing\n * file means the agent has never spawned in this manager generation,\n * which is a separate problem covered by SessionAliveAgeSeconds.\n */\nexport function collectResponsivenessProbes(\n codeNames: string[],\n now: Date = new Date(),\n): ResponsivenessProbeResult[] {\n const nowMs = now.getTime();\n const results: ResponsivenessProbeResult[] = [];\n for (const codeName of codeNames) {\n try {\n const panePath = paneLogPath(codeName);\n const mtimeMs = statSync(panePath).mtimeMs;\n const ageSeconds = Math.max(0, Math.floor((nowMs - mtimeMs) / 1000));\n const result: ResponsivenessProbeResult = {\n code_name: codeName,\n pane_activity_age_seconds: ageSeconds,\n };\n // ENG-6017: piggyback the pending-inbound drain-age scan on the same\n // cadence. Field omitted (not 0) when there are no markers.\n const oldestMarkerMs = oldestPendingInboundMtimeMs(dirname(panePath));\n if (oldestMarkerMs !== null) {\n result.pending_inbound_oldest_age_seconds = Math.max(\n 0,\n Math.floor((nowMs - oldestMarkerMs) / 1000),\n );\n }\n // ENG-6327: read + RESET the per-channel deflection counters on the same\n // cadence so each cause's frequency reaches CloudWatch (ChannelDeflections,\n // Cause dimension). Omitted when there were none this window.\n const deflections = readAndResetChannelDeflections(dirname(panePath));\n if (deflections) {\n result.deflections = deflections;\n }\n results.push(result);\n } catch {\n // No pane.log yet (fresh agent, never spawned) — skip. The\n // session-alive monitor already covers the \"should be running\n // but isn't\" case.\n }\n }\n return results;\n}\n"],"mappings":";;;;;;;AAiCA,SAAS,WAAW,aAAa,cAAc,YAAY,UAAU,kBAAkB;AACvF,SAAS,SAAS,YAAY;AA4B9B,IAAM,4BAA4B;AAClC,IAAM,0BAA0B;AAAA,EAC9B;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA;AAAA,EACA;AACF;AAgBO,SAAS,+BACd,cAC+B;AAC/B,MAAI;AACJ,MAAI;AACF,YAAQ,YAAY,YAAY;AAAA,EAClC,QAAQ;AACN,WAAO;AAAA,EACT;AACA,QAAM,QAAgC,CAAC;AACvC,MAAI,MAAM;AACV,aAAW,QAAQ,OAAO;AACxB,QAAI,CAAC,KAAK,SAAS,yBAAyB,EAAG;AAC/C,UAAM,OAAO,KAAK,cAAc,IAAI;AACpC,UAAM,YAAY,GAAG,IAAI;AACzB,QAAI;AACF,iBAAW,MAAM,SAAS;AAAA,IAC5B,QAAQ;AACN;AAAA,IACF;AACA,QAAI;AACF,YAAM,SAAS,KAAK,MAAM,aAAa,WAAW,MAAM,CAAC;AACzD,UAAI,UAAU,OAAO,WAAW,UAAU;AACxC,mBAAW,SAAS,yBAAyB;AAC3C,gBAAM,IAAI,OAAO,KAAK;AACtB,cAAI,OAAO,MAAM,YAAY,OAAO,SAAS,CAAC,KAAK,IAAI,GAAG;AACxD,kBAAM,KAAK,KAAK,MAAM,KAAK,KAAK,KAAK,KAAK,MAAM,CAAC;AACjD,kBAAM;AAAA,UACR;AAAA,QACF;AAAA,MACF;AAAA,IACF,QAAQ;AAAA,IAER;AACA,QAAI;AACF,iBAAW,SAAS;AAAA,IACtB,QAAQ;AAAA,IAER;AAAA,EACF;AACA,SAAO,MAAM,QAAQ;AACvB;AAEA,IAAM,sBAAsB,IAAI,KAAK;AAE9B,SAAS,8BAAsC;AACpD,QAAM,MAAM,QAAQ,IAAI;AACxB,MAAI,CAAC,IAAK,QAAO;AACjB,QAAM,SAAS,OAAO,SAAS,KAAK,EAAE;AACtC,SAAO,OAAO,SAAS,MAAM,KAAK,SAAS,IAAI,SAAS;AAC1D;AAoBA,SAAS,4BAA4B,cAAqC;AACxE,MAAI,SAAwB;AAC5B,MAAI;AACJ,MAAI;AACF,cAAU,YAAY,cAAc,EAAE,eAAe,KAAK,CAAC;AAAA,EAC7D,QAAQ;AACN,WAAO;AAAA,EACT;AACA,aAAW,SAAS,SAAS;AAC3B,QAAI,CAAC,MAAM,YAAY,KAAK,CAAC,MAAM,KAAK,SAAS,kBAAkB,EAAG;AACtE,UAAM,MAAM,KAAK,cAAc,MAAM,IAAI;AACzC,QAAI;AACJ,QAAI;AACF,cAAQ,YAAY,KAAK,EAAE,eAAe,KAAK,CAAC;AAAA,IAClD,QAAQ;AACN;AAAA,IACF;AACA,eAAW,QAAQ,OAAO;AACxB,UAAI,CAAC,KAAK,OAAO,KAAK,KAAK,KAAK,WAAW,GAAG,EAAG;AACjD,UAAI;AACF,cAAM,UAAU,SAAS,KAAK,KAAK,KAAK,IAAI,CAAC,EAAE;AAC/C,YAAI,WAAW,QAAQ,UAAU,OAAQ,UAAS;AAAA,MACpD,QAAQ;AAAA,MAER;AAAA,IACF;AAAA,EACF;AACA,SAAO;AACT;AAyBA,SAAS,gBAAgB,YAAsD;AAC7E,MAAI;AACF,UAAM,SAAS,KAAK,MAAM,aAAa,YAAY,MAAM,CAAC;AAI1D,WAAO;AAAA,MACL,eAAe,QAAQ,kBAAkB;AAAA,MACzC,eAAe,QAAQ,kBAAkB;AAAA,IAC3C;AAAA,EACF,SAAS,OAAO;AACd,WAAQ,MAAgC,SAAS,WAAW,OAAO;AAAA,EACrE;AACF;AAsBO,SAAS,gCACd,cACA,OAA2C,CAAC,GAC7B;AACf,QAAM,iBAAiB,KAAK,kBAAkB;AAC9C,MAAI,SAAwB;AAC5B,MAAI;AACJ,MAAI;AACF,cAAU,YAAY,cAAc,EAAE,eAAe,KAAK,CAAC;AAAA,EAC7D,QAAQ;AACN,WAAO;AAAA,EACT;AACA,aAAW,SAAS,SAAS;AAC3B,QAAI,CAAC,MAAM,YAAY,KAAK,CAAC,MAAM,KAAK,SAAS,kBAAkB,EAAG;AACtE,UAAM,MAAM,KAAK,cAAc,MAAM,IAAI;AACzC,QAAI;AACJ,QAAI;AACF,cAAQ,YAAY,KAAK,EAAE,eAAe,KAAK,CAAC;AAAA,IAClD,QAAQ;AACN;AAAA,IACF;AACA,eAAW,QAAQ,OAAO;AACxB,UAAI,CAAC,KAAK,OAAO,KAAK,KAAK,KAAK,WAAW,GAAG,EAAG;AACjD,YAAM,OAAO,KAAK,KAAK,KAAK,IAAI;AAChC,UAAI;AACJ,UAAI;AACF,kBAAU,SAAS,IAAI,EAAE;AAAA,MAC3B,QAAQ;AACN;AAAA,MACF;AACA,UAAI,mBAAmB,QAAQ,UAAU,eAAgB;AACzD,YAAM,QAAQ,gBAAgB,IAAI;AAClC,UAAI,UAAU,KAAM;AACpB,UAAI,UAAU,gBAAgB,MAAM,iBAAiB,MAAM,eAAgB;AAC3E,UAAI,WAAW,QAAQ,UAAU,OAAQ,UAAS;AAAA,IACpD;AAAA,EACF;AACA,SAAO;AACT;AAQO,SAAS,mCACd,UACA,gBACA,MAAY,oBAAI,KAAK,GACN;AACf,QAAM,SAAS,gCAAgC,QAAQ,YAAY,QAAQ,CAAC,GAAG,EAAE,eAAe,CAAC;AACjG,MAAI,WAAW,KAAM,QAAO;AAC5B,SAAO,KAAK,IAAI,GAAG,KAAK,OAAO,IAAI,QAAQ,IAAI,UAAU,GAAI,CAAC;AAChE;AAkDA,SAAS,kBAAkB,KAAa,SAAiB,MAAuB;AAC9E,MAAI;AACF,cAAU,SAAS,EAAE,WAAW,KAAK,CAAC;AACtC,eAAW,KAAK,KAAK,IAAI,GAAG,KAAK,SAAS,IAAI,CAAC;AAC/C,WAAO;AAAA,EACT,QAAQ;AACN,WAAO;AAAA,EACT;AACF;AAEO,SAAS,mBAAmB,UAAkB,OAAa,oBAAI,KAAK,GAA6B;AACtG,QAAM,OAAO,QAAQ,YAAY,QAAQ,CAAC;AAC1C,QAAM,SAAmC,EAAE,QAAQ,GAAG,cAAc,EAAE;AACtE,MAAI;AACJ,MAAI;AACF,cAAU,YAAY,MAAM,EAAE,eAAe,KAAK,CAAC;AAAA,EACrD,QAAQ;AACN,WAAO;AAAA,EACT;AACA,aAAW,SAAS,SAAS;AAC3B,QAAI,CAAC,MAAM,YAAY,KAAK,CAAC,MAAM,KAAK,SAAS,kBAAkB,EAAG;AACtE,QAAI,MAAM,SAAS,0BAA2B;AAC9C,UAAM,MAAM,KAAK,MAAM,MAAM,IAAI;AACjC,UAAM,UAAU,KAAK,MAAM,GAAG,MAAM,IAAI,QAAQ;AAChD,QAAI;AACJ,QAAI;AACF,cAAQ,YAAY,KAAK,EAAE,eAAe,KAAK,CAAC;AAAA,IAClD,QAAQ;AACN;AAAA,IACF;AACA,eAAW,QAAQ,OAAO;AACxB,UAAI,CAAC,KAAK,OAAO,KAAK,KAAK,KAAK,WAAW,GAAG,EAAG;AACjD,YAAM,QAAQ,gBAAgB,KAAK,KAAK,KAAK,IAAI,CAAC;AAClD,UAAI,UAAU,KAAM;AAKpB,UAAI,UAAU,eAAe,MAAM,eAAe;AAChD,YAAI,kBAAkB,KAAK,SAAS,KAAK,IAAI,EAAG,QAAO;AAAA,MACzD,OAAO;AACL,eAAO;AAAA,MACT;AAAA,IACF;AAAA,EACF;AACA,SAAO;AACT;AAEO,SAAS,yBAAyB,UAAkB,OAAa,oBAAI,KAAK,GAAW;AAC1F,QAAM,OAAO,QAAQ,YAAY,QAAQ,CAAC;AAC1C,MAAI,QAAQ;AACZ,MAAI;AACJ,MAAI;AACF,cAAU,YAAY,MAAM,EAAE,eAAe,KAAK,CAAC;AAAA,EACrD,QAAQ;AACN,WAAO;AAAA,EACT;AACA,aAAW,SAAS,SAAS;AAC3B,QAAI,CAAC,MAAM,YAAY,KAAK,CAAC,MAAM,KAAK,SAAS,kBAAkB,EAAG;AACtE,UAAM,MAAM,KAAK,MAAM,MAAM,IAAI;AACjC,UAAM,UAAU,KAAK,MAAM,GAAG,MAAM,IAAI,QAAQ;AAChD,QAAI;AACJ,QAAI;AACF,cAAQ,YAAY,KAAK,EAAE,eAAe,KAAK,CAAC;AAAA,IAClD,QAAQ;AACN;AAAA,IACF;AACA,eAAW,QAAQ,OAAO;AACxB,UAAI,CAAC,KAAK,OAAO,KAAK,KAAK,KAAK,WAAW,GAAG,EAAG;AACjD,UAAI,kBAAkB,KAAK,SAAS,KAAK,IAAI,EAAG;AAAA,IAClD;AAAA,EACF;AACA,SAAO;AACT;AASO,SAAS,4BACd,WACA,MAAY,oBAAI,KAAK,GACQ;AAC7B,QAAM,QAAQ,IAAI,QAAQ;AAC1B,QAAM,UAAuC,CAAC;AAC9C,aAAW,YAAY,WAAW;AAChC,QAAI;AACF,YAAM,WAAW,YAAY,QAAQ;AACrC,YAAM,UAAU,SAAS,QAAQ,EAAE;AACnC,YAAM,aAAa,KAAK,IAAI,GAAG,KAAK,OAAO,QAAQ,WAAW,GAAI,CAAC;AACnE,YAAM,SAAoC;AAAA,QACxC,WAAW;AAAA,QACX,2BAA2B;AAAA,MAC7B;AAGA,YAAM,iBAAiB,4BAA4B,QAAQ,QAAQ,CAAC;AACpE,UAAI,mBAAmB,MAAM;AAC3B,eAAO,qCAAqC,KAAK;AAAA,UAC/C;AAAA,UACA,KAAK,OAAO,QAAQ,kBAAkB,GAAI;AAAA,QAC5C;AAAA,MACF;AAIA,YAAM,cAAc,+BAA+B,QAAQ,QAAQ,CAAC;AACpE,UAAI,aAAa;AACf,eAAO,cAAc;AAAA,MACvB;AACA,cAAQ,KAAK,MAAM;AAAA,IACrB,QAAQ;AAAA,IAIR;AAAA,EACF;AACA,SAAO;AACT;","names":[]}
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@integrity-labs/agt-cli",
3
- "version": "0.28.5",
3
+ "version": "0.28.6",
4
4
  "description": "Augmented Team CLI — agent provisioning and management",
5
5
  "type": "module",
6
6
  "engines": {
@@ -1 +0,0 @@
1
- {"version":3,"sources":["../src/lib/responsiveness-probe.ts"],"sourcesContent":["/**\n * ENG-5399 — Tier 1 responsiveness probe (manager-side).\n *\n * Cheap, fast-cadence canary that catches \"agent went silent\" inside\n * minutes, well before the existing synthetic-probe cron's ~35 min\n * staleness window (`SyntheticReplyAgeSeconds`, ENG-5122).\n *\n * Mechanism: for each managed agent, read the mtime of the agent's\n * `pane.log` and report `now - mtime` as `PaneActivityAgeSeconds` via\n * a new `/host/responsiveness-probe` endpoint. `pane.log` is the\n * tmux pipe-pane sink set up by `setupPaneLog()` — any visible\n * activity (assistant turns, tool calls, in-place progress\n * heartbeats) bumps its mtime. A silent agent has a steadily\n * climbing age that lands in CloudWatch and trips a per-agent alarm.\n *\n * ENG-6017 adds a second per-agent signal on the same cadence:\n * `pending_inbound_oldest_age_seconds` — the age of the oldest marker\n * file across the agent's `*-pending-inbound/` directories (written by\n * the channel MCP servers for inbounds awaiting delivery). This is the\n * one artifact of the \"message typed but never submitted\" failure mode\n * that every other canary is blind to: in the koda incident\n * (2026-06-04) an operator Slack DM sat undelivered for 40+ minutes\n * while pane-activity stayed fresh (health checks), synthetic probes\n * were answered by the one-shot fallback, and heartbeat/session-alive\n * only reflect manager health. The field is OMITTED (not zero) when the\n * agent has no pending-inbound markers — the API treats absent as\n * \"no signal\", never as \"healthy\" (absent-vs-zero matters for\n * mixed-version fleets where old CLIs don't report it at all).\n *\n * Run from `pollCycle()` in `manager-worker.ts` on a configurable\n * interval (default 5 min via `AUGMENTED_RESPONSIVENESS_INTERVAL_MS`).\n */\n\nimport { mkdirSync, readdirSync, readFileSync, renameSync, statSync, unlinkSync } from 'node:fs';\nimport { dirname, join } from 'node:path';\nimport { paneLogPath } from './persistent-session.js';\n\nexport interface ResponsivenessProbeResult {\n code_name: string;\n pane_activity_age_seconds: number;\n /**\n * ENG-6017: age (s) of the oldest marker file across the agent's\n * `*-pending-inbound/` directories. Omitted when no markers exist —\n * absent means \"no signal\", NOT \"zero / healthy\".\n */\n pending_inbound_oldest_age_seconds?: number;\n /**\n * ENG-6327: per-cause inbound deflection counts since the last probe (read +\n * RESET from the channel servers' `<channel>-deflections.json` counter files).\n * Omitted when there were none. Keyed by DeflectionCause.\n */\n deflections?: Record<string, number>;\n}\n\n/**\n * ENG-6327 — per-channel deflection counter file-layout contract. The channel\n * MCP servers increment `<channel>-deflections.json` in the agent home (via\n * recordChannelDeflection in packages/mcp/ack-reaction.ts); apps/cli does not\n * depend on @integrity-labs/mcp, so the manager re-implements the read against\n * this contract — the same read-only, no-IPC posture as the `*-pending-inbound`\n * scan above. Body is a JSON object keyed by cause with integer counts.\n */\nconst DEFLECTION_COUNTER_SUFFIX = '-deflections.json';\nconst KNOWN_DEFLECTION_CAUSES = [\n 'integration_down',\n 'session_dead',\n 'wedged',\n 'busy',\n 'unknown',\n] as const;\n\n/**\n * Read and RESET every `<channel>-deflections.json` in an agent home, returning\n * the summed counts by cause (or null when there are none). Consumes via atomic\n * rename-then-read-then-unlink so an increment racing the reset is carried into\n * the next window rather than lost. Only known causes are accumulated so a\n * corrupt file can't smuggle an unbounded dimension into CloudWatch.\n *\n * Reset-on-read is required: these are deltas-since-last-consume, so the metric\n * would over-count if they weren't cleared each cycle. The trade-off is that a\n * failed POST loses that cycle's counts (under-count) — acceptable for a\n * frequency metric and consistent with the other best-effort probe siblings;\n * unlike the in-memory give-up counter, file-based counts aren't re-credited\n * on POST failure.\n */\nexport function readAndResetChannelDeflections(\n agentHomeDir: string,\n): Record<string, number> | null {\n let names: string[];\n try {\n names = readdirSync(agentHomeDir);\n } catch {\n return null;\n }\n const total: Record<string, number> = {};\n let any = false;\n for (const name of names) {\n if (!name.endsWith(DEFLECTION_COUNTER_SUFFIX)) continue;\n const full = join(agentHomeDir, name);\n const consuming = `${full}.consuming`;\n try {\n renameSync(full, consuming); // atomic; replaces any stale .consuming from a crashed cycle\n } catch {\n continue; // vanished / not a regular file — skip\n }\n try {\n const parsed = JSON.parse(readFileSync(consuming, 'utf8')) as Record<string, unknown>;\n if (parsed && typeof parsed === 'object') {\n for (const cause of KNOWN_DEFLECTION_CAUSES) {\n const n = parsed[cause];\n if (typeof n === 'number' && Number.isFinite(n) && n > 0) {\n total[cause] = (total[cause] ?? 0) + Math.floor(n);\n any = true;\n }\n }\n }\n } catch {\n /* corrupt — drop it */\n }\n try {\n unlinkSync(consuming);\n } catch {\n /* non-fatal */\n }\n }\n return any ? total : null;\n}\n\nconst DEFAULT_INTERVAL_MS = 5 * 60 * 1000;\n\nexport function getResponsivenessIntervalMs(): number {\n const raw = process.env.AUGMENTED_RESPONSIVENESS_INTERVAL_MS;\n if (!raw) return DEFAULT_INTERVAL_MS;\n const parsed = Number.parseInt(raw, 10);\n return Number.isFinite(parsed) && parsed > 0 ? parsed : DEFAULT_INTERVAL_MS;\n}\n\n/**\n * ENG-6017: oldest pending-inbound marker mtime (ms epoch) for an agent,\n * or null when the agent has no markers / no pending-inbound dirs.\n *\n * The channel MCP servers (slack-channel, telegram-channel, …) write one\n * marker file per inbound into `~/.augmented/<codeName>/<channel>-pending-\n * inbound/` and clear it when the agent acknowledges the message. The\n * directory layout is the contract here — read-only, no IPC with the MCP\n * (the MCP and CLI release independently; file mtimes need no protocol).\n *\n * ENG-6072: only plain, non-hidden files count as markers. The msteams MCP\n * keeps `.markers/` and `.processed/` housekeeping SUBDIRECTORIES inside its\n * pending-inbound dir; their mtimes never advance, so statting every dirent\n * made the gauge climb forever and fired pending-inbound-stale on agents with\n * zero stranded messages (kylie ~3.4d / scout ~34h false ALARMs the moment\n * ENG-6023 activated the alarm). Dot-entries are skipped wholesale — the\n * hidden namespace is reserved for MCP bookkeeping, never for markers.\n */\nfunction oldestPendingInboundMtimeMs(agentHomeDir: string): number | null {\n let oldest: number | null = null;\n let entries;\n try {\n entries = readdirSync(agentHomeDir, { withFileTypes: true });\n } catch {\n return null; // agent home missing — nothing to report\n }\n for (const entry of entries) {\n if (!entry.isDirectory() || !entry.name.endsWith('-pending-inbound')) continue;\n const dir = join(agentHomeDir, entry.name);\n let files;\n try {\n files = readdirSync(dir, { withFileTypes: true });\n } catch {\n continue;\n }\n for (const file of files) {\n if (!file.isFile() || file.name.startsWith('.')) continue;\n try {\n const mtimeMs = statSync(join(dir, file.name)).mtimeMs;\n if (oldest === null || mtimeMs < oldest) oldest = mtimeMs;\n } catch {\n // Marker drained between readdir and stat — that's the happy path.\n }\n }\n }\n return oldest;\n}\n\n/**\n * ENG-6160 / ENG-6319: read a marker's classification flags.\n * - `{...}` → parsed flags.\n * - `null` → vanished mid-scan (ENOENT) — drained between stat and\n * read, the happy path; callers exclude it.\n * - `'malformed'` → present but unreadable for another reason. The live\n * scan treats this as LIVE (a corrupt marker can never\n * mask a real wedge); park leaves it in place (a corrupt\n * marker must never become a dropped message).\n */\ninterface MarkerFlags {\n /** ENG-5846: dead-lettered by the channel (⏳-noticed, never drainable). */\n undeliverable: boolean;\n /**\n * ENG-6319: auto-followed participant-thread inbound the agent may\n * legitimately skip. Excluded from the wedge live-scan (a deliberate skip\n * must not read as \"failing to drain\" and force-respawn a healthy\n * session) but still parked across respawns — it may be a real\n * operator message awaiting a reply.\n */\n discretionary: boolean;\n}\n\nfunction readMarkerFlags(markerPath: string): MarkerFlags | null | 'malformed' {\n try {\n const parsed = JSON.parse(readFileSync(markerPath, 'utf8')) as {\n undeliverable?: unknown;\n discretionary?: unknown;\n };\n return {\n undeliverable: parsed?.undeliverable === true,\n discretionary: parsed?.discretionary === true,\n };\n } catch (error) {\n return (error as NodeJS.ErrnoException).code === 'ENOENT' ? null : 'malformed';\n }\n}\n\n/**\n * ENG-6160: oldest *LIVE* pending-inbound marker mtime (ms epoch) for an agent,\n * or null when there is no live marker. \"Live\" excludes:\n *\n * - markers older than `sessionStartMs` — a marker written before the current\n * session started is a leftover from a PREVIOUS session and cannot mean\n * *this* session is failing to drain. This is the load-bearing exclusion:\n * without it, an orphan marker survives a fresh respawn and the wedge\n * detector re-fires forever on a healthy idle agent (the sherlock enforce\n * loop, 2026-06-08: `inboundAge=3389s` on a `● Ready.` session).\n * - markers flagged `undeliverable: true` — already dead-lettered by the channel.\n * - markers flagged `discretionary: true` (ENG-6319) — skippable auto-followed\n * participant-thread inbound; a deliberate skip must not force-respawn a\n * healthy session.\n *\n * Distinct from `oldestPendingInboundMtimeMs` (which counts ALL markers and\n * feeds the ENG-6017 `pending-inbound-stale` CloudWatch alarm — that alarm\n * *wants* to fire on a stuck inbound, so its semantics must NOT change). This\n * variant is wedge-detection-only.\n */\nexport function oldestLivePendingInboundMtimeMs(\n agentHomeDir: string,\n opts: { sessionStartMs?: number | null } = {},\n): number | null {\n const sessionStartMs = opts.sessionStartMs ?? null;\n let oldest: number | null = null;\n let entries;\n try {\n entries = readdirSync(agentHomeDir, { withFileTypes: true });\n } catch {\n return null;\n }\n for (const entry of entries) {\n if (!entry.isDirectory() || !entry.name.endsWith('-pending-inbound')) continue;\n const dir = join(agentHomeDir, entry.name);\n let files;\n try {\n files = readdirSync(dir, { withFileTypes: true });\n } catch {\n continue;\n }\n for (const file of files) {\n if (!file.isFile() || file.name.startsWith('.')) continue;\n const full = join(dir, file.name);\n let mtimeMs: number;\n try {\n mtimeMs = statSync(full).mtimeMs;\n } catch {\n continue; // drained between readdir and stat — happy path\n }\n if (sessionStartMs !== null && mtimeMs < sessionStartMs) continue; // pre-session leftover\n const flags = readMarkerFlags(full);\n if (flags === null) continue; // vanished between stat and read — drained, exclude\n if (flags !== 'malformed' && (flags.undeliverable || flags.discretionary)) continue;\n if (oldest === null || mtimeMs < oldest) oldest = mtimeMs;\n }\n }\n return oldest;\n}\n\n/**\n * ENG-6160: age (s) of the oldest LIVE pending-inbound marker for an agent, or\n * null when none. The wedge detector uses this instead of the alarm-facing\n * `pending_inbound_oldest_age_seconds` so a stale/dead-letter marker can't\n * false-fire a respawn.\n */\nexport function livePendingInboundOldestAgeSeconds(\n codeName: string,\n sessionStartMs: number | null,\n now: Date = new Date(),\n): number | null {\n const oldest = oldestLivePendingInboundMtimeMs(dirname(paneLogPath(codeName)), { sessionStartMs });\n if (oldest === null) return null;\n return Math.max(0, Math.floor((now.getTime() - oldest) / 1000));\n}\n\n/**\n * ENG-6160: move every pending-inbound marker for an agent aside into a sibling\n * `<channel>-pending-inbound-stale/` directory (NOT silently deleted — the\n * payload pointer is preserved for forensics), returning the count moved.\n *\n * ENG-6289: no longer called on wedge respawn — the wedge path parks instead\n * (see `parkPendingInbound` above; blanket dead-letter permanently dropped the\n * user's message). Kept as the explicit \"move everything aside\" seam for tests\n * and operator emergencies. The stale dir does not end in `-pending-inbound`,\n * so neither the probe nor this scan re-counts moved markers.\n */\n/**\n * ENG-6289: park-not-drop. On a force-fresh wedge respawn, KEEP undrained\n * pending-inbound markers in their live dirs instead of dead-lettering them —\n * the fresh session's orient hook surfaces them (\"N queued messages\" + details)\n * and ENG-5969 replay (when enabled) re-pushes their payloads. Markers are NOT\n * rewritten (no counter, no mtime bump): the ENG-6160 pre-session exclusion in\n * `oldestLivePendingInboundMtimeMs` already keeps an untouched parked marker\n * out of the fresh session's wedge signal, and re-delivery stays bounded by\n * the existing machinery — the channel-side `replay_count` cap (≤3) and the\n * orphan sweep's received_at TTL. A manager-side rewrite would be the first\n * cross-process writer into marker files, where a torn read in the channel\n * sweep DELETES the marker (`unlinkSync` on parse failure) — the exact drop\n * this function exists to prevent.\n *\n * Only markers already flagged `undeliverable: true` are dead-lettered (moved\n * to `-stale`) — the channel already gave the user the ⏳ notice for those, so\n * nothing can ever drain them. Malformed markers are LEFT IN PLACE, matching\n * the live-scan philosophy above (a corrupt marker must never mask — or\n * become — a dropped message).\n *\n * The msteams dir is skipped wholesale: its top-level files are raw Bot\n * Framework activity payloads (the transport queue, not bookkeeping — real\n * markers live in the hidden `.markers/` subdir this scan never touches), and\n * the teams channel server's boot drain redelivers them to the fresh session\n * on its own. Moving them aside (what the pre-ENG-6289 dead-letter did) was\n * actively defeating the one channel with native respawn recovery.\n */\nexport interface ParkPendingInboundResult {\n parked: number;\n deadLettered: number;\n}\n\n/**\n * Move one marker into the sibling `-stale` dead-letter dir (moved, not\n * deleted — preserved for forensics). Returns true on success; best-effort —\n * a marker that vanished or can't move is left as-is.\n */\nfunction moveMarkerToStale(dir: string, deadDir: string, name: string): boolean {\n try {\n mkdirSync(deadDir, { recursive: true });\n renameSync(join(dir, name), join(deadDir, name));\n return true;\n } catch {\n return false;\n }\n}\n\nexport function parkPendingInbound(codeName: string, _now: Date = new Date()): ParkPendingInboundResult {\n const home = dirname(paneLogPath(codeName));\n const result: ParkPendingInboundResult = { parked: 0, deadLettered: 0 };\n let entries;\n try {\n entries = readdirSync(home, { withFileTypes: true });\n } catch {\n return result;\n }\n for (const entry of entries) {\n if (!entry.isDirectory() || !entry.name.endsWith('-pending-inbound')) continue;\n if (entry.name === 'msteams-pending-inbound') continue; // transport queue — boot drain owns recovery\n const dir = join(home, entry.name);\n const deadDir = join(home, `${entry.name}-stale`);\n let files;\n try {\n files = readdirSync(dir, { withFileTypes: true });\n } catch {\n continue;\n }\n for (const file of files) {\n if (!file.isFile() || file.name.startsWith('.')) continue;\n const flags = readMarkerFlags(join(dir, file.name));\n if (flags === null) continue; // drained mid-scan — already gone\n // Only undeliverable markers dead-letter. Discretionary markers\n // (ENG-6319) PARK like engaged ones — they may be a real operator\n // message awaiting a reply (the live silent-loss class); malformed\n // markers park too (corrupt must never become a drop).\n if (flags !== 'malformed' && flags.undeliverable) {\n if (moveMarkerToStale(dir, deadDir, file.name)) result.deadLettered++;\n } else {\n result.parked++;\n }\n }\n }\n return result;\n}\n\nexport function deadLetterPendingInbound(codeName: string, _now: Date = new Date()): number {\n const home = dirname(paneLogPath(codeName));\n let moved = 0;\n let entries;\n try {\n entries = readdirSync(home, { withFileTypes: true });\n } catch {\n return 0;\n }\n for (const entry of entries) {\n if (!entry.isDirectory() || !entry.name.endsWith('-pending-inbound')) continue;\n const dir = join(home, entry.name);\n const deadDir = join(home, `${entry.name}-stale`);\n let files;\n try {\n files = readdirSync(dir, { withFileTypes: true });\n } catch {\n continue;\n }\n for (const file of files) {\n if (!file.isFile() || file.name.startsWith('.')) continue;\n if (moveMarkerToStale(dir, deadDir, file.name)) moved++;\n }\n }\n return moved;\n}\n\n/**\n * Compute the pane.log age for each agent. Missing or unreadable\n * pane.log returns null — the caller should drop those entries\n * rather than fabricate a \"fresh\" or \"ancient\" value. A missing\n * file means the agent has never spawned in this manager generation,\n * which is a separate problem covered by SessionAliveAgeSeconds.\n */\nexport function collectResponsivenessProbes(\n codeNames: string[],\n now: Date = new Date(),\n): ResponsivenessProbeResult[] {\n const nowMs = now.getTime();\n const results: ResponsivenessProbeResult[] = [];\n for (const codeName of codeNames) {\n try {\n const panePath = paneLogPath(codeName);\n const mtimeMs = statSync(panePath).mtimeMs;\n const ageSeconds = Math.max(0, Math.floor((nowMs - mtimeMs) / 1000));\n const result: ResponsivenessProbeResult = {\n code_name: codeName,\n pane_activity_age_seconds: ageSeconds,\n };\n // ENG-6017: piggyback the pending-inbound drain-age scan on the same\n // cadence. Field omitted (not 0) when there are no markers.\n const oldestMarkerMs = oldestPendingInboundMtimeMs(dirname(panePath));\n if (oldestMarkerMs !== null) {\n result.pending_inbound_oldest_age_seconds = Math.max(\n 0,\n Math.floor((nowMs - oldestMarkerMs) / 1000),\n );\n }\n // ENG-6327: read + RESET the per-channel deflection counters on the same\n // cadence so each cause's frequency reaches CloudWatch (ChannelDeflections,\n // Cause dimension). Omitted when there were none this window.\n const deflections = readAndResetChannelDeflections(dirname(panePath));\n if (deflections) {\n result.deflections = deflections;\n }\n results.push(result);\n } catch {\n // No pane.log yet (fresh agent, never spawned) — skip. The\n // session-alive monitor already covers the \"should be running\n // but isn't\" case.\n }\n }\n return results;\n}\n"],"mappings":";;;;;;;AAiCA,SAAS,WAAW,aAAa,cAAc,YAAY,UAAU,kBAAkB;AACvF,SAAS,SAAS,YAAY;AA4B9B,IAAM,4BAA4B;AAClC,IAAM,0BAA0B;AAAA,EAC9B;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AACF;AAgBO,SAAS,+BACd,cAC+B;AAC/B,MAAI;AACJ,MAAI;AACF,YAAQ,YAAY,YAAY;AAAA,EAClC,QAAQ;AACN,WAAO;AAAA,EACT;AACA,QAAM,QAAgC,CAAC;AACvC,MAAI,MAAM;AACV,aAAW,QAAQ,OAAO;AACxB,QAAI,CAAC,KAAK,SAAS,yBAAyB,EAAG;AAC/C,UAAM,OAAO,KAAK,cAAc,IAAI;AACpC,UAAM,YAAY,GAAG,IAAI;AACzB,QAAI;AACF,iBAAW,MAAM,SAAS;AAAA,IAC5B,QAAQ;AACN;AAAA,IACF;AACA,QAAI;AACF,YAAM,SAAS,KAAK,MAAM,aAAa,WAAW,MAAM,CAAC;AACzD,UAAI,UAAU,OAAO,WAAW,UAAU;AACxC,mBAAW,SAAS,yBAAyB;AAC3C,gBAAM,IAAI,OAAO,KAAK;AACtB,cAAI,OAAO,MAAM,YAAY,OAAO,SAAS,CAAC,KAAK,IAAI,GAAG;AACxD,kBAAM,KAAK,KAAK,MAAM,KAAK,KAAK,KAAK,KAAK,MAAM,CAAC;AACjD,kBAAM;AAAA,UACR;AAAA,QACF;AAAA,MACF;AAAA,IACF,QAAQ;AAAA,IAER;AACA,QAAI;AACF,iBAAW,SAAS;AAAA,IACtB,QAAQ;AAAA,IAER;AAAA,EACF;AACA,SAAO,MAAM,QAAQ;AACvB;AAEA,IAAM,sBAAsB,IAAI,KAAK;AAE9B,SAAS,8BAAsC;AACpD,QAAM,MAAM,QAAQ,IAAI;AACxB,MAAI,CAAC,IAAK,QAAO;AACjB,QAAM,SAAS,OAAO,SAAS,KAAK,EAAE;AACtC,SAAO,OAAO,SAAS,MAAM,KAAK,SAAS,IAAI,SAAS;AAC1D;AAoBA,SAAS,4BAA4B,cAAqC;AACxE,MAAI,SAAwB;AAC5B,MAAI;AACJ,MAAI;AACF,cAAU,YAAY,cAAc,EAAE,eAAe,KAAK,CAAC;AAAA,EAC7D,QAAQ;AACN,WAAO;AAAA,EACT;AACA,aAAW,SAAS,SAAS;AAC3B,QAAI,CAAC,MAAM,YAAY,KAAK,CAAC,MAAM,KAAK,SAAS,kBAAkB,EAAG;AACtE,UAAM,MAAM,KAAK,cAAc,MAAM,IAAI;AACzC,QAAI;AACJ,QAAI;AACF,cAAQ,YAAY,KAAK,EAAE,eAAe,KAAK,CAAC;AAAA,IAClD,QAAQ;AACN;AAAA,IACF;AACA,eAAW,QAAQ,OAAO;AACxB,UAAI,CAAC,KAAK,OAAO,KAAK,KAAK,KAAK,WAAW,GAAG,EAAG;AACjD,UAAI;AACF,cAAM,UAAU,SAAS,KAAK,KAAK,KAAK,IAAI,CAAC,EAAE;AAC/C,YAAI,WAAW,QAAQ,UAAU,OAAQ,UAAS;AAAA,MACpD,QAAQ;AAAA,MAER;AAAA,IACF;AAAA,EACF;AACA,SAAO;AACT;AAyBA,SAAS,gBAAgB,YAAsD;AAC7E,MAAI;AACF,UAAM,SAAS,KAAK,MAAM,aAAa,YAAY,MAAM,CAAC;AAI1D,WAAO;AAAA,MACL,eAAe,QAAQ,kBAAkB;AAAA,MACzC,eAAe,QAAQ,kBAAkB;AAAA,IAC3C;AAAA,EACF,SAAS,OAAO;AACd,WAAQ,MAAgC,SAAS,WAAW,OAAO;AAAA,EACrE;AACF;AAsBO,SAAS,gCACd,cACA,OAA2C,CAAC,GAC7B;AACf,QAAM,iBAAiB,KAAK,kBAAkB;AAC9C,MAAI,SAAwB;AAC5B,MAAI;AACJ,MAAI;AACF,cAAU,YAAY,cAAc,EAAE,eAAe,KAAK,CAAC;AAAA,EAC7D,QAAQ;AACN,WAAO;AAAA,EACT;AACA,aAAW,SAAS,SAAS;AAC3B,QAAI,CAAC,MAAM,YAAY,KAAK,CAAC,MAAM,KAAK,SAAS,kBAAkB,EAAG;AACtE,UAAM,MAAM,KAAK,cAAc,MAAM,IAAI;AACzC,QAAI;AACJ,QAAI;AACF,cAAQ,YAAY,KAAK,EAAE,eAAe,KAAK,CAAC;AAAA,IAClD,QAAQ;AACN;AAAA,IACF;AACA,eAAW,QAAQ,OAAO;AACxB,UAAI,CAAC,KAAK,OAAO,KAAK,KAAK,KAAK,WAAW,GAAG,EAAG;AACjD,YAAM,OAAO,KAAK,KAAK,KAAK,IAAI;AAChC,UAAI;AACJ,UAAI;AACF,kBAAU,SAAS,IAAI,EAAE;AAAA,MAC3B,QAAQ;AACN;AAAA,MACF;AACA,UAAI,mBAAmB,QAAQ,UAAU,eAAgB;AACzD,YAAM,QAAQ,gBAAgB,IAAI;AAClC,UAAI,UAAU,KAAM;AACpB,UAAI,UAAU,gBAAgB,MAAM,iBAAiB,MAAM,eAAgB;AAC3E,UAAI,WAAW,QAAQ,UAAU,OAAQ,UAAS;AAAA,IACpD;AAAA,EACF;AACA,SAAO;AACT;AAQO,SAAS,mCACd,UACA,gBACA,MAAY,oBAAI,KAAK,GACN;AACf,QAAM,SAAS,gCAAgC,QAAQ,YAAY,QAAQ,CAAC,GAAG,EAAE,eAAe,CAAC;AACjG,MAAI,WAAW,KAAM,QAAO;AAC5B,SAAO,KAAK,IAAI,GAAG,KAAK,OAAO,IAAI,QAAQ,IAAI,UAAU,GAAI,CAAC;AAChE;AAkDA,SAAS,kBAAkB,KAAa,SAAiB,MAAuB;AAC9E,MAAI;AACF,cAAU,SAAS,EAAE,WAAW,KAAK,CAAC;AACtC,eAAW,KAAK,KAAK,IAAI,GAAG,KAAK,SAAS,IAAI,CAAC;AAC/C,WAAO;AAAA,EACT,QAAQ;AACN,WAAO;AAAA,EACT;AACF;AAEO,SAAS,mBAAmB,UAAkB,OAAa,oBAAI,KAAK,GAA6B;AACtG,QAAM,OAAO,QAAQ,YAAY,QAAQ,CAAC;AAC1C,QAAM,SAAmC,EAAE,QAAQ,GAAG,cAAc,EAAE;AACtE,MAAI;AACJ,MAAI;AACF,cAAU,YAAY,MAAM,EAAE,eAAe,KAAK,CAAC;AAAA,EACrD,QAAQ;AACN,WAAO;AAAA,EACT;AACA,aAAW,SAAS,SAAS;AAC3B,QAAI,CAAC,MAAM,YAAY,KAAK,CAAC,MAAM,KAAK,SAAS,kBAAkB,EAAG;AACtE,QAAI,MAAM,SAAS,0BAA2B;AAC9C,UAAM,MAAM,KAAK,MAAM,MAAM,IAAI;AACjC,UAAM,UAAU,KAAK,MAAM,GAAG,MAAM,IAAI,QAAQ;AAChD,QAAI;AACJ,QAAI;AACF,cAAQ,YAAY,KAAK,EAAE,eAAe,KAAK,CAAC;AAAA,IAClD,QAAQ;AACN;AAAA,IACF;AACA,eAAW,QAAQ,OAAO;AACxB,UAAI,CAAC,KAAK,OAAO,KAAK,KAAK,KAAK,WAAW,GAAG,EAAG;AACjD,YAAM,QAAQ,gBAAgB,KAAK,KAAK,KAAK,IAAI,CAAC;AAClD,UAAI,UAAU,KAAM;AAKpB,UAAI,UAAU,eAAe,MAAM,eAAe;AAChD,YAAI,kBAAkB,KAAK,SAAS,KAAK,IAAI,EAAG,QAAO;AAAA,MACzD,OAAO;AACL,eAAO;AAAA,MACT;AAAA,IACF;AAAA,EACF;AACA,SAAO;AACT;AAEO,SAAS,yBAAyB,UAAkB,OAAa,oBAAI,KAAK,GAAW;AAC1F,QAAM,OAAO,QAAQ,YAAY,QAAQ,CAAC;AAC1C,MAAI,QAAQ;AACZ,MAAI;AACJ,MAAI;AACF,cAAU,YAAY,MAAM,EAAE,eAAe,KAAK,CAAC;AAAA,EACrD,QAAQ;AACN,WAAO;AAAA,EACT;AACA,aAAW,SAAS,SAAS;AAC3B,QAAI,CAAC,MAAM,YAAY,KAAK,CAAC,MAAM,KAAK,SAAS,kBAAkB,EAAG;AACtE,UAAM,MAAM,KAAK,MAAM,MAAM,IAAI;AACjC,UAAM,UAAU,KAAK,MAAM,GAAG,MAAM,IAAI,QAAQ;AAChD,QAAI;AACJ,QAAI;AACF,cAAQ,YAAY,KAAK,EAAE,eAAe,KAAK,CAAC;AAAA,IAClD,QAAQ;AACN;AAAA,IACF;AACA,eAAW,QAAQ,OAAO;AACxB,UAAI,CAAC,KAAK,OAAO,KAAK,KAAK,KAAK,WAAW,GAAG,EAAG;AACjD,UAAI,kBAAkB,KAAK,SAAS,KAAK,IAAI,EAAG;AAAA,IAClD;AAAA,EACF;AACA,SAAO;AACT;AASO,SAAS,4BACd,WACA,MAAY,oBAAI,KAAK,GACQ;AAC7B,QAAM,QAAQ,IAAI,QAAQ;AAC1B,QAAM,UAAuC,CAAC;AAC9C,aAAW,YAAY,WAAW;AAChC,QAAI;AACF,YAAM,WAAW,YAAY,QAAQ;AACrC,YAAM,UAAU,SAAS,QAAQ,EAAE;AACnC,YAAM,aAAa,KAAK,IAAI,GAAG,KAAK,OAAO,QAAQ,WAAW,GAAI,CAAC;AACnE,YAAM,SAAoC;AAAA,QACxC,WAAW;AAAA,QACX,2BAA2B;AAAA,MAC7B;AAGA,YAAM,iBAAiB,4BAA4B,QAAQ,QAAQ,CAAC;AACpE,UAAI,mBAAmB,MAAM;AAC3B,eAAO,qCAAqC,KAAK;AAAA,UAC/C;AAAA,UACA,KAAK,OAAO,QAAQ,kBAAkB,GAAI;AAAA,QAC5C;AAAA,MACF;AAIA,YAAM,cAAc,+BAA+B,QAAQ,QAAQ,CAAC;AACpE,UAAI,aAAa;AACf,eAAO,cAAc;AAAA,MACvB;AACA,cAAQ,KAAK,MAAM;AAAA,IACrB,QAAQ;AAAA,IAIR;AAAA,EACF;AACA,SAAO;AACT;","names":[]}