@integrity-labs/agt-cli 0.27.165 → 0.27.167
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/bin/agt.js +3 -3
- package/dist/{chunk-76ZM4WNY.js → chunk-IU32SVV7.js} +1 -1
- package/dist/lib/manager-worker.js +33 -13
- package/dist/lib/manager-worker.js.map +1 -1
- package/dist/mcp/slack-channel.js +32 -13
- package/dist/mcp/telegram-channel.js +1 -0
- package/dist/{responsiveness-probe-AL3O7SYZ.js → responsiveness-probe-7NDEHXXZ.js} +13 -10
- package/dist/responsiveness-probe-7NDEHXXZ.js.map +1 -0
- package/package.json +1 -1
- package/dist/responsiveness-probe-AL3O7SYZ.js.map +0 -1
- /package/dist/{chunk-76ZM4WNY.js.map → chunk-IU32SVV7.js.map} +0 -0
|
@@ -14208,6 +14208,9 @@ function decideSlackEngagement(input) {
|
|
|
14208
14208
|
const isExplicitMention = input.type === "app_mention" || !!input.botUserId && input.text.includes(`<@${input.botUserId}>`);
|
|
14209
14209
|
return !(input.isAutoFollowed && !isExplicitMention);
|
|
14210
14210
|
}
|
|
14211
|
+
function decideMarkerArm(input) {
|
|
14212
|
+
return input.shouldEngage || input.isAutoFollowed && input.botPostedInThread;
|
|
14213
|
+
}
|
|
14211
14214
|
function decideSlackAckReaction(input) {
|
|
14212
14215
|
return Boolean(input.channel && input.ts);
|
|
14213
14216
|
}
|
|
@@ -14585,6 +14588,7 @@ function oldestPendingMarkerAgeMs(dir, now = Date.now()) {
|
|
|
14585
14588
|
let receivedAt;
|
|
14586
14589
|
try {
|
|
14587
14590
|
const raw = JSON.parse(readFileSync2(join2(dir, name), "utf-8"));
|
|
14591
|
+
if (raw.discretionary === true) continue;
|
|
14588
14592
|
receivedAt = raw.received_at;
|
|
14589
14593
|
} catch {
|
|
14590
14594
|
continue;
|
|
@@ -15439,7 +15443,11 @@ function loadThreadStore(filePath, opts = {}) {
|
|
|
15439
15443
|
pruned++;
|
|
15440
15444
|
continue;
|
|
15441
15445
|
}
|
|
15442
|
-
threads.set(key2, {
|
|
15446
|
+
threads.set(key2, {
|
|
15447
|
+
involvement: entry.involvement,
|
|
15448
|
+
last_seen_at: entry.last_seen_at,
|
|
15449
|
+
...entry.bot_posted === true ? { bot_posted: true } : {}
|
|
15450
|
+
});
|
|
15443
15451
|
}
|
|
15444
15452
|
return { threads, pruned };
|
|
15445
15453
|
}
|
|
@@ -16600,7 +16608,7 @@ function slackPendingInboundPath(channel, threadTs, messageTs) {
|
|
|
16600
16608
|
if (!SLACK_PENDING_INBOUND_DIR) return null;
|
|
16601
16609
|
return join7(SLACK_PENDING_INBOUND_DIR, safeSlackMarkerName(channel, threadTs, messageTs));
|
|
16602
16610
|
}
|
|
16603
|
-
function writeSlackPendingInboundMarker(channel, threadTs, messageTs, undeliverable = false) {
|
|
16611
|
+
function writeSlackPendingInboundMarker(channel, threadTs, messageTs, undeliverable = false, discretionary = false) {
|
|
16604
16612
|
const path = slackPendingInboundPath(channel, threadTs, messageTs);
|
|
16605
16613
|
if (!path || !SLACK_PENDING_INBOUND_DIR) return;
|
|
16606
16614
|
const marker = {
|
|
@@ -16608,8 +16616,9 @@ function writeSlackPendingInboundMarker(channel, threadTs, messageTs, undelivera
|
|
|
16608
16616
|
thread_ts: threadTs,
|
|
16609
16617
|
message_ts: messageTs,
|
|
16610
16618
|
received_at: (/* @__PURE__ */ new Date()).toISOString(),
|
|
16611
|
-
// Only persist the
|
|
16612
|
-
...undeliverable ? { undeliverable: true } : {}
|
|
16619
|
+
// Only persist the flags when set — keeps healthy-path markers byte-identical.
|
|
16620
|
+
...undeliverable ? { undeliverable: true } : {},
|
|
16621
|
+
...discretionary ? { discretionary: true } : {}
|
|
16613
16622
|
};
|
|
16614
16623
|
try {
|
|
16615
16624
|
mkdirSync5(SLACK_PENDING_INBOUND_DIR, { recursive: true, mode: 448 });
|
|
@@ -16957,8 +16966,9 @@ function startSlackRecoveryOutboxWatcher() {
|
|
|
16957
16966
|
}
|
|
16958
16967
|
startSlackRecoveryOutboxWatcher();
|
|
16959
16968
|
var STALE_MARKER_MS = 24 * 60 * 60 * 1e3;
|
|
16960
|
-
|
|
16961
|
-
|
|
16969
|
+
var DISCRETIONARY_MARKER_MS = 10 * 60 * 1e3;
|
|
16970
|
+
function trackPendingMessage(channel, threadTs, messageTs, undeliverable = false, discretionary = false) {
|
|
16971
|
+
writeSlackPendingInboundMarker(channel, threadTs, messageTs, undeliverable, discretionary);
|
|
16962
16972
|
}
|
|
16963
16973
|
function sweepSlackStaleMarkers(thresholdMs) {
|
|
16964
16974
|
if (!SLACK_PENDING_INBOUND_DIR) return;
|
|
@@ -16995,7 +17005,8 @@ function sweepSlackStaleMarkers(thresholdMs) {
|
|
|
16995
17005
|
continue;
|
|
16996
17006
|
}
|
|
16997
17007
|
const { channel, thread_ts, message_ts, received_at } = marker;
|
|
16998
|
-
|
|
17008
|
+
const effectiveThresholdMs = marker.discretionary ? Math.min(thresholdMs, DISCRETIONARY_MARKER_MS) : thresholdMs;
|
|
17009
|
+
if (!channel || !thread_ts || !message_ts || isPendingMarkerStale(received_at, now, effectiveThresholdMs)) {
|
|
16999
17010
|
try {
|
|
17000
17011
|
unlinkSync4(fullPath);
|
|
17001
17012
|
} catch {
|
|
@@ -17031,6 +17042,7 @@ function listPendingSlackConversations() {
|
|
|
17031
17042
|
);
|
|
17032
17043
|
if (typeof marker.channel !== "string" || !marker.channel) continue;
|
|
17033
17044
|
if (typeof marker.thread_ts !== "string" || !marker.thread_ts) continue;
|
|
17045
|
+
if (marker.discretionary === true) continue;
|
|
17034
17046
|
const isThreadReply = typeof marker.message_ts === "string" && marker.message_ts !== marker.thread_ts;
|
|
17035
17047
|
const key2 = isThreadReply ? `${marker.channel}:${marker.thread_ts}` : marker.channel;
|
|
17036
17048
|
if (!byKey.has(key2)) {
|
|
@@ -17910,7 +17922,11 @@ function rememberThread(channel, threadTs, involvement, opts = {}) {
|
|
|
17910
17922
|
const existing = trackedThreads.get(key2);
|
|
17911
17923
|
const next = {
|
|
17912
17924
|
involvement: opts.override || !existing ? involvement : existing.involvement,
|
|
17913
|
-
last_seen_at: (/* @__PURE__ */ new Date()).toISOString()
|
|
17925
|
+
last_seen_at: (/* @__PURE__ */ new Date()).toISOString(),
|
|
17926
|
+
// ENG-6319: sticky — once the bot has posted in a thread it stays a
|
|
17927
|
+
// conversational participant for marker-arming (persisted only when true
|
|
17928
|
+
// so pre-flag entries and never-posted threads stay byte-identical).
|
|
17929
|
+
...opts.botPosted || existing?.bot_posted ? { bot_posted: true } : {}
|
|
17914
17930
|
};
|
|
17915
17931
|
trackedThreads.set(key2, next);
|
|
17916
17932
|
if (isShuttingDown) {
|
|
@@ -18378,7 +18394,7 @@ mcp.setRequestHandler(CallToolRequestSchema, async (req) => {
|
|
|
18378
18394
|
recordActivity("reply");
|
|
18379
18395
|
if (THREAD_AUTO_FOLLOW !== "off") {
|
|
18380
18396
|
const trackTs = effectiveThreadTs ?? data.ts ?? void 0;
|
|
18381
|
-
rememberThread(channel, trackTs, thread_ts ? "mentioned" : "started");
|
|
18397
|
+
rememberThread(channel, trackTs, thread_ts ? "mentioned" : "started", { botPosted: true });
|
|
18382
18398
|
}
|
|
18383
18399
|
if (interactiveHostAvailable() && data.ts) {
|
|
18384
18400
|
const cfg = {
|
|
@@ -19502,6 +19518,9 @@ async function connectSocketMode() {
|
|
|
19502
19518
|
isAutoFollowed,
|
|
19503
19519
|
botUserId
|
|
19504
19520
|
});
|
|
19521
|
+
const participantEntry = threadKey ? trackedThreads.get(threadKey) : void 0;
|
|
19522
|
+
const botPostedInThread = participantEntry?.bot_posted === true || participantEntry?.involvement === "started";
|
|
19523
|
+
const armMarker = decideMarkerArm({ shouldEngage, isAutoFollowed, botPostedInThread });
|
|
19505
19524
|
const ackProbe = process.env.TMUX && AGENT_CODE_NAME ? probeAgentSessionCached(AGENT_CODE_NAME) : { tmux: "unknown", claude: "unknown" };
|
|
19506
19525
|
let paneLogFreshAgeMs = null;
|
|
19507
19526
|
if (SLACK_AGENT_DIR) {
|
|
@@ -19531,13 +19550,13 @@ async function connectSocketMode() {
|
|
|
19531
19550
|
body: JSON.stringify({ channel, timestamp: ts, name: reactionName })
|
|
19532
19551
|
}).catch(() => {
|
|
19533
19552
|
});
|
|
19534
|
-
if (ackDecision === "undeliverable" && channel &&
|
|
19553
|
+
if (ackDecision === "undeliverable" && channel && armMarker) {
|
|
19535
19554
|
postUndeliverableNotice(channel, threadTs, isThreadReply);
|
|
19536
19555
|
}
|
|
19537
19556
|
}
|
|
19538
|
-
if (channel && ts &&
|
|
19539
|
-
trackPendingMessage(channel, threadTs, ts, ackDecision === "undeliverable");
|
|
19540
|
-
if (ackDecision === "ack") {
|
|
19557
|
+
if (channel && ts && armMarker) {
|
|
19558
|
+
trackPendingMessage(channel, threadTs, ts, ackDecision === "undeliverable", !shouldEngage);
|
|
19559
|
+
if (ackDecision === "ack" && shouldEngage) {
|
|
19541
19560
|
scheduleBusyAck(channel, threadTs, ts, isThreadReply);
|
|
19542
19561
|
}
|
|
19543
19562
|
}
|
|
@@ -16204,6 +16204,7 @@ function oldestPendingMarkerAgeMs(dir, now = Date.now()) {
|
|
|
16204
16204
|
let receivedAt;
|
|
16205
16205
|
try {
|
|
16206
16206
|
const raw = JSON.parse(readFileSync5(join5(dir, name), "utf-8"));
|
|
16207
|
+
if (raw.discretionary === true) continue;
|
|
16207
16208
|
receivedAt = raw.received_at;
|
|
16208
16209
|
} catch {
|
|
16209
16210
|
continue;
|
|
@@ -42,12 +42,15 @@ function oldestPendingInboundMtimeMs(agentHomeDir) {
|
|
|
42
42
|
}
|
|
43
43
|
return oldest;
|
|
44
44
|
}
|
|
45
|
-
function
|
|
45
|
+
function readMarkerFlags(markerPath) {
|
|
46
46
|
try {
|
|
47
47
|
const parsed = JSON.parse(readFileSync(markerPath, "utf8"));
|
|
48
|
-
return
|
|
48
|
+
return {
|
|
49
|
+
undeliverable: parsed?.undeliverable === true,
|
|
50
|
+
discretionary: parsed?.discretionary === true
|
|
51
|
+
};
|
|
49
52
|
} catch (error) {
|
|
50
|
-
return error.code === "ENOENT" ? null :
|
|
53
|
+
return error.code === "ENOENT" ? null : "malformed";
|
|
51
54
|
}
|
|
52
55
|
}
|
|
53
56
|
function oldestLivePendingInboundMtimeMs(agentHomeDir, opts = {}) {
|
|
@@ -78,9 +81,9 @@ function oldestLivePendingInboundMtimeMs(agentHomeDir, opts = {}) {
|
|
|
78
81
|
continue;
|
|
79
82
|
}
|
|
80
83
|
if (sessionStartMs !== null && mtimeMs < sessionStartMs) continue;
|
|
81
|
-
const
|
|
82
|
-
if (
|
|
83
|
-
if (undeliverable) continue;
|
|
84
|
+
const flags = readMarkerFlags(full);
|
|
85
|
+
if (flags === null) continue;
|
|
86
|
+
if (flags !== "malformed" && (flags.undeliverable || flags.discretionary)) continue;
|
|
84
87
|
if (oldest === null || mtimeMs < oldest) oldest = mtimeMs;
|
|
85
88
|
}
|
|
86
89
|
}
|
|
@@ -122,9 +125,9 @@ function parkPendingInbound(codeName, _now = /* @__PURE__ */ new Date()) {
|
|
|
122
125
|
}
|
|
123
126
|
for (const file of files) {
|
|
124
127
|
if (!file.isFile() || file.name.startsWith(".")) continue;
|
|
125
|
-
const
|
|
126
|
-
if (
|
|
127
|
-
if (undeliverable) {
|
|
128
|
+
const flags = readMarkerFlags(join(dir, file.name));
|
|
129
|
+
if (flags === null) continue;
|
|
130
|
+
if (flags !== "malformed" && flags.undeliverable) {
|
|
128
131
|
if (moveMarkerToStale(dir, deadDir, file.name)) result.deadLettered++;
|
|
129
132
|
} else {
|
|
130
133
|
result.parked++;
|
|
@@ -192,4 +195,4 @@ export {
|
|
|
192
195
|
oldestLivePendingInboundMtimeMs,
|
|
193
196
|
parkPendingInbound
|
|
194
197
|
};
|
|
195
|
-
//# sourceMappingURL=responsiveness-probe-
|
|
198
|
+
//# sourceMappingURL=responsiveness-probe-7NDEHXXZ.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"sources":["../src/lib/responsiveness-probe.ts"],"sourcesContent":["/**\n * ENG-5399 — Tier 1 responsiveness probe (manager-side).\n *\n * Cheap, fast-cadence canary that catches \"agent went silent\" inside\n * minutes, well before the existing synthetic-probe cron's ~35 min\n * staleness window (`SyntheticReplyAgeSeconds`, ENG-5122).\n *\n * Mechanism: for each managed agent, read the mtime of the agent's\n * `pane.log` and report `now - mtime` as `PaneActivityAgeSeconds` via\n * a new `/host/responsiveness-probe` endpoint. `pane.log` is the\n * tmux pipe-pane sink set up by `setupPaneLog()` — any visible\n * activity (assistant turns, tool calls, in-place progress\n * heartbeats) bumps its mtime. A silent agent has a steadily\n * climbing age that lands in CloudWatch and trips a per-agent alarm.\n *\n * ENG-6017 adds a second per-agent signal on the same cadence:\n * `pending_inbound_oldest_age_seconds` — the age of the oldest marker\n * file across the agent's `*-pending-inbound/` directories (written by\n * the channel MCP servers for inbounds awaiting delivery). This is the\n * one artifact of the \"message typed but never submitted\" failure mode\n * that every other canary is blind to: in the koda incident\n * (2026-06-04) an operator Slack DM sat undelivered for 40+ minutes\n * while pane-activity stayed fresh (health checks), synthetic probes\n * were answered by the one-shot fallback, and heartbeat/session-alive\n * only reflect manager health. The field is OMITTED (not zero) when the\n * agent has no pending-inbound markers — the API treats absent as\n * \"no signal\", never as \"healthy\" (absent-vs-zero matters for\n * mixed-version fleets where old CLIs don't report it at all).\n *\n * Run from `pollCycle()` in `manager-worker.ts` on a configurable\n * interval (default 5 min via `AUGMENTED_RESPONSIVENESS_INTERVAL_MS`).\n */\n\nimport { mkdirSync, readdirSync, readFileSync, renameSync, statSync } from 'node:fs';\nimport { dirname, join } from 'node:path';\nimport { paneLogPath } from './persistent-session.js';\n\nexport interface ResponsivenessProbeResult {\n code_name: string;\n pane_activity_age_seconds: number;\n /**\n * ENG-6017: age (s) of the oldest marker file across the agent's\n * `*-pending-inbound/` directories. Omitted when no markers exist —\n * absent means \"no signal\", NOT \"zero / healthy\".\n */\n pending_inbound_oldest_age_seconds?: number;\n}\n\nconst DEFAULT_INTERVAL_MS = 5 * 60 * 1000;\n\nexport function getResponsivenessIntervalMs(): number {\n const raw = process.env.AUGMENTED_RESPONSIVENESS_INTERVAL_MS;\n if (!raw) return DEFAULT_INTERVAL_MS;\n const parsed = Number.parseInt(raw, 10);\n return Number.isFinite(parsed) && parsed > 0 ? parsed : DEFAULT_INTERVAL_MS;\n}\n\n/**\n * ENG-6017: oldest pending-inbound marker mtime (ms epoch) for an agent,\n * or null when the agent has no markers / no pending-inbound dirs.\n *\n * The channel MCP servers (slack-channel, telegram-channel, …) write one\n * marker file per inbound into `~/.augmented/<codeName>/<channel>-pending-\n * inbound/` and clear it when the agent acknowledges the message. The\n * directory layout is the contract here — read-only, no IPC with the MCP\n * (the MCP and CLI release independently; file mtimes need no protocol).\n *\n * ENG-6072: only plain, non-hidden files count as markers. The msteams MCP\n * keeps `.markers/` and `.processed/` housekeeping SUBDIRECTORIES inside its\n * pending-inbound dir; their mtimes never advance, so statting every dirent\n * made the gauge climb forever and fired pending-inbound-stale on agents with\n * zero stranded messages (kylie ~3.4d / scout ~34h false ALARMs the moment\n * ENG-6023 activated the alarm). Dot-entries are skipped wholesale — the\n * hidden namespace is reserved for MCP bookkeeping, never for markers.\n */\nfunction oldestPendingInboundMtimeMs(agentHomeDir: string): number | null {\n let oldest: number | null = null;\n let entries;\n try {\n entries = readdirSync(agentHomeDir, { withFileTypes: true });\n } catch {\n return null; // agent home missing — nothing to report\n }\n for (const entry of entries) {\n if (!entry.isDirectory() || !entry.name.endsWith('-pending-inbound')) continue;\n const dir = join(agentHomeDir, entry.name);\n let files;\n try {\n files = readdirSync(dir, { withFileTypes: true });\n } catch {\n continue;\n }\n for (const file of files) {\n if (!file.isFile() || file.name.startsWith('.')) continue;\n try {\n const mtimeMs = statSync(join(dir, file.name)).mtimeMs;\n if (oldest === null || mtimeMs < oldest) oldest = mtimeMs;\n } catch {\n // Marker drained between readdir and stat — that's the happy path.\n }\n }\n }\n return oldest;\n}\n\n/**\n * ENG-6160 / ENG-6319: read a marker's classification flags.\n * - `{...}` → parsed flags.\n * - `null` → vanished mid-scan (ENOENT) — drained between stat and\n * read, the happy path; callers exclude it.\n * - `'malformed'` → present but unreadable for another reason. The live\n * scan treats this as LIVE (a corrupt marker can never\n * mask a real wedge); park leaves it in place (a corrupt\n * marker must never become a dropped message).\n */\ninterface MarkerFlags {\n /** ENG-5846: dead-lettered by the channel (⏳-noticed, never drainable). */\n undeliverable: boolean;\n /**\n * ENG-6319: auto-followed participant-thread inbound the agent may\n * legitimately skip. Excluded from the wedge live-scan (a deliberate skip\n * must not read as \"failing to drain\" and force-respawn a healthy\n * session) but still parked across respawns — it may be a real\n * operator message awaiting a reply.\n */\n discretionary: boolean;\n}\n\nfunction readMarkerFlags(markerPath: string): MarkerFlags | null | 'malformed' {\n try {\n const parsed = JSON.parse(readFileSync(markerPath, 'utf8')) as {\n undeliverable?: unknown;\n discretionary?: unknown;\n };\n return {\n undeliverable: parsed?.undeliverable === true,\n discretionary: parsed?.discretionary === true,\n };\n } catch (error) {\n return (error as NodeJS.ErrnoException).code === 'ENOENT' ? null : 'malformed';\n }\n}\n\n/**\n * ENG-6160: oldest *LIVE* pending-inbound marker mtime (ms epoch) for an agent,\n * or null when there is no live marker. \"Live\" excludes:\n *\n * - markers older than `sessionStartMs` — a marker written before the current\n * session started is a leftover from a PREVIOUS session and cannot mean\n * *this* session is failing to drain. This is the load-bearing exclusion:\n * without it, an orphan marker survives a fresh respawn and the wedge\n * detector re-fires forever on a healthy idle agent (the sherlock enforce\n * loop, 2026-06-08: `inboundAge=3389s` on a `● Ready.` session).\n * - markers flagged `undeliverable: true` — already dead-lettered by the channel.\n * - markers flagged `discretionary: true` (ENG-6319) — skippable auto-followed\n * participant-thread inbound; a deliberate skip must not force-respawn a\n * healthy session.\n *\n * Distinct from `oldestPendingInboundMtimeMs` (which counts ALL markers and\n * feeds the ENG-6017 `pending-inbound-stale` CloudWatch alarm — that alarm\n * *wants* to fire on a stuck inbound, so its semantics must NOT change). This\n * variant is wedge-detection-only.\n */\nexport function oldestLivePendingInboundMtimeMs(\n agentHomeDir: string,\n opts: { sessionStartMs?: number | null } = {},\n): number | null {\n const sessionStartMs = opts.sessionStartMs ?? null;\n let oldest: number | null = null;\n let entries;\n try {\n entries = readdirSync(agentHomeDir, { withFileTypes: true });\n } catch {\n return null;\n }\n for (const entry of entries) {\n if (!entry.isDirectory() || !entry.name.endsWith('-pending-inbound')) continue;\n const dir = join(agentHomeDir, entry.name);\n let files;\n try {\n files = readdirSync(dir, { withFileTypes: true });\n } catch {\n continue;\n }\n for (const file of files) {\n if (!file.isFile() || file.name.startsWith('.')) continue;\n const full = join(dir, file.name);\n let mtimeMs: number;\n try {\n mtimeMs = statSync(full).mtimeMs;\n } catch {\n continue; // drained between readdir and stat — happy path\n }\n if (sessionStartMs !== null && mtimeMs < sessionStartMs) continue; // pre-session leftover\n const flags = readMarkerFlags(full);\n if (flags === null) continue; // vanished between stat and read — drained, exclude\n if (flags !== 'malformed' && (flags.undeliverable || flags.discretionary)) continue;\n if (oldest === null || mtimeMs < oldest) oldest = mtimeMs;\n }\n }\n return oldest;\n}\n\n/**\n * ENG-6160: age (s) of the oldest LIVE pending-inbound marker for an agent, or\n * null when none. The wedge detector uses this instead of the alarm-facing\n * `pending_inbound_oldest_age_seconds` so a stale/dead-letter marker can't\n * false-fire a respawn.\n */\nexport function livePendingInboundOldestAgeSeconds(\n codeName: string,\n sessionStartMs: number | null,\n now: Date = new Date(),\n): number | null {\n const oldest = oldestLivePendingInboundMtimeMs(dirname(paneLogPath(codeName)), { sessionStartMs });\n if (oldest === null) return null;\n return Math.max(0, Math.floor((now.getTime() - oldest) / 1000));\n}\n\n/**\n * ENG-6160: move every pending-inbound marker for an agent aside into a sibling\n * `<channel>-pending-inbound-stale/` directory (NOT silently deleted — the\n * payload pointer is preserved for forensics), returning the count moved.\n *\n * ENG-6289: no longer called on wedge respawn — the wedge path parks instead\n * (see `parkPendingInbound` above; blanket dead-letter permanently dropped the\n * user's message). Kept as the explicit \"move everything aside\" seam for tests\n * and operator emergencies. The stale dir does not end in `-pending-inbound`,\n * so neither the probe nor this scan re-counts moved markers.\n */\n/**\n * ENG-6289: park-not-drop. On a force-fresh wedge respawn, KEEP undrained\n * pending-inbound markers in their live dirs instead of dead-lettering them —\n * the fresh session's orient hook surfaces them (\"N queued messages\" + details)\n * and ENG-5969 replay (when enabled) re-pushes their payloads. Markers are NOT\n * rewritten (no counter, no mtime bump): the ENG-6160 pre-session exclusion in\n * `oldestLivePendingInboundMtimeMs` already keeps an untouched parked marker\n * out of the fresh session's wedge signal, and re-delivery stays bounded by\n * the existing machinery — the channel-side `replay_count` cap (≤3) and the\n * orphan sweep's received_at TTL. A manager-side rewrite would be the first\n * cross-process writer into marker files, where a torn read in the channel\n * sweep DELETES the marker (`unlinkSync` on parse failure) — the exact drop\n * this function exists to prevent.\n *\n * Only markers already flagged `undeliverable: true` are dead-lettered (moved\n * to `-stale`) — the channel already gave the user the ⏳ notice for those, so\n * nothing can ever drain them. Malformed markers are LEFT IN PLACE, matching\n * the live-scan philosophy above (a corrupt marker must never mask — or\n * become — a dropped message).\n *\n * The msteams dir is skipped wholesale: its top-level files are raw Bot\n * Framework activity payloads (the transport queue, not bookkeeping — real\n * markers live in the hidden `.markers/` subdir this scan never touches), and\n * the teams channel server's boot drain redelivers them to the fresh session\n * on its own. Moving them aside (what the pre-ENG-6289 dead-letter did) was\n * actively defeating the one channel with native respawn recovery.\n */\nexport interface ParkPendingInboundResult {\n parked: number;\n deadLettered: number;\n}\n\n/**\n * Move one marker into the sibling `-stale` dead-letter dir (moved, not\n * deleted — preserved for forensics). Returns true on success; best-effort —\n * a marker that vanished or can't move is left as-is.\n */\nfunction moveMarkerToStale(dir: string, deadDir: string, name: string): boolean {\n try {\n mkdirSync(deadDir, { recursive: true });\n renameSync(join(dir, name), join(deadDir, name));\n return true;\n } catch {\n return false;\n }\n}\n\nexport function parkPendingInbound(codeName: string, _now: Date = new Date()): ParkPendingInboundResult {\n const home = dirname(paneLogPath(codeName));\n const result: ParkPendingInboundResult = { parked: 0, deadLettered: 0 };\n let entries;\n try {\n entries = readdirSync(home, { withFileTypes: true });\n } catch {\n return result;\n }\n for (const entry of entries) {\n if (!entry.isDirectory() || !entry.name.endsWith('-pending-inbound')) continue;\n if (entry.name === 'msteams-pending-inbound') continue; // transport queue — boot drain owns recovery\n const dir = join(home, entry.name);\n const deadDir = join(home, `${entry.name}-stale`);\n let files;\n try {\n files = readdirSync(dir, { withFileTypes: true });\n } catch {\n continue;\n }\n for (const file of files) {\n if (!file.isFile() || file.name.startsWith('.')) continue;\n const flags = readMarkerFlags(join(dir, file.name));\n if (flags === null) continue; // drained mid-scan — already gone\n // Only undeliverable markers dead-letter. Discretionary markers\n // (ENG-6319) PARK like engaged ones — they may be a real operator\n // message awaiting a reply (the live silent-loss class); malformed\n // markers park too (corrupt must never become a drop).\n if (flags !== 'malformed' && flags.undeliverable) {\n if (moveMarkerToStale(dir, deadDir, file.name)) result.deadLettered++;\n } else {\n result.parked++;\n }\n }\n }\n return result;\n}\n\nexport function deadLetterPendingInbound(codeName: string, _now: Date = new Date()): number {\n const home = dirname(paneLogPath(codeName));\n let moved = 0;\n let entries;\n try {\n entries = readdirSync(home, { withFileTypes: true });\n } catch {\n return 0;\n }\n for (const entry of entries) {\n if (!entry.isDirectory() || !entry.name.endsWith('-pending-inbound')) continue;\n const dir = join(home, entry.name);\n const deadDir = join(home, `${entry.name}-stale`);\n let files;\n try {\n files = readdirSync(dir, { withFileTypes: true });\n } catch {\n continue;\n }\n for (const file of files) {\n if (!file.isFile() || file.name.startsWith('.')) continue;\n if (moveMarkerToStale(dir, deadDir, file.name)) moved++;\n }\n }\n return moved;\n}\n\n/**\n * Compute the pane.log age for each agent. Missing or unreadable\n * pane.log returns null — the caller should drop those entries\n * rather than fabricate a \"fresh\" or \"ancient\" value. A missing\n * file means the agent has never spawned in this manager generation,\n * which is a separate problem covered by SessionAliveAgeSeconds.\n */\nexport function collectResponsivenessProbes(\n codeNames: string[],\n now: Date = new Date(),\n): ResponsivenessProbeResult[] {\n const nowMs = now.getTime();\n const results: ResponsivenessProbeResult[] = [];\n for (const codeName of codeNames) {\n try {\n const panePath = paneLogPath(codeName);\n const mtimeMs = statSync(panePath).mtimeMs;\n const ageSeconds = Math.max(0, Math.floor((nowMs - mtimeMs) / 1000));\n const result: ResponsivenessProbeResult = {\n code_name: codeName,\n pane_activity_age_seconds: ageSeconds,\n };\n // ENG-6017: piggyback the pending-inbound drain-age scan on the same\n // cadence. Field omitted (not 0) when there are no markers.\n const oldestMarkerMs = oldestPendingInboundMtimeMs(dirname(panePath));\n if (oldestMarkerMs !== null) {\n result.pending_inbound_oldest_age_seconds = Math.max(\n 0,\n Math.floor((nowMs - oldestMarkerMs) / 1000),\n );\n }\n results.push(result);\n } catch {\n // No pane.log yet (fresh agent, never spawned) — skip. The\n // session-alive monitor already covers the \"should be running\n // but isn't\" case.\n }\n }\n return results;\n}\n"],"mappings":";;;;;;;AAiCA,SAAS,WAAW,aAAa,cAAc,YAAY,gBAAgB;AAC3E,SAAS,SAAS,YAAY;AAc9B,IAAM,sBAAsB,IAAI,KAAK;AAE9B,SAAS,8BAAsC;AACpD,QAAM,MAAM,QAAQ,IAAI;AACxB,MAAI,CAAC,IAAK,QAAO;AACjB,QAAM,SAAS,OAAO,SAAS,KAAK,EAAE;AACtC,SAAO,OAAO,SAAS,MAAM,KAAK,SAAS,IAAI,SAAS;AAC1D;AAoBA,SAAS,4BAA4B,cAAqC;AACxE,MAAI,SAAwB;AAC5B,MAAI;AACJ,MAAI;AACF,cAAU,YAAY,cAAc,EAAE,eAAe,KAAK,CAAC;AAAA,EAC7D,QAAQ;AACN,WAAO;AAAA,EACT;AACA,aAAW,SAAS,SAAS;AAC3B,QAAI,CAAC,MAAM,YAAY,KAAK,CAAC,MAAM,KAAK,SAAS,kBAAkB,EAAG;AACtE,UAAM,MAAM,KAAK,cAAc,MAAM,IAAI;AACzC,QAAI;AACJ,QAAI;AACF,cAAQ,YAAY,KAAK,EAAE,eAAe,KAAK,CAAC;AAAA,IAClD,QAAQ;AACN;AAAA,IACF;AACA,eAAW,QAAQ,OAAO;AACxB,UAAI,CAAC,KAAK,OAAO,KAAK,KAAK,KAAK,WAAW,GAAG,EAAG;AACjD,UAAI;AACF,cAAM,UAAU,SAAS,KAAK,KAAK,KAAK,IAAI,CAAC,EAAE;AAC/C,YAAI,WAAW,QAAQ,UAAU,OAAQ,UAAS;AAAA,MACpD,QAAQ;AAAA,MAER;AAAA,IACF;AAAA,EACF;AACA,SAAO;AACT;AAyBA,SAAS,gBAAgB,YAAsD;AAC7E,MAAI;AACF,UAAM,SAAS,KAAK,MAAM,aAAa,YAAY,MAAM,CAAC;AAI1D,WAAO;AAAA,MACL,eAAe,QAAQ,kBAAkB;AAAA,MACzC,eAAe,QAAQ,kBAAkB;AAAA,IAC3C;AAAA,EACF,SAAS,OAAO;AACd,WAAQ,MAAgC,SAAS,WAAW,OAAO;AAAA,EACrE;AACF;AAsBO,SAAS,gCACd,cACA,OAA2C,CAAC,GAC7B;AACf,QAAM,iBAAiB,KAAK,kBAAkB;AAC9C,MAAI,SAAwB;AAC5B,MAAI;AACJ,MAAI;AACF,cAAU,YAAY,cAAc,EAAE,eAAe,KAAK,CAAC;AAAA,EAC7D,QAAQ;AACN,WAAO;AAAA,EACT;AACA,aAAW,SAAS,SAAS;AAC3B,QAAI,CAAC,MAAM,YAAY,KAAK,CAAC,MAAM,KAAK,SAAS,kBAAkB,EAAG;AACtE,UAAM,MAAM,KAAK,cAAc,MAAM,IAAI;AACzC,QAAI;AACJ,QAAI;AACF,cAAQ,YAAY,KAAK,EAAE,eAAe,KAAK,CAAC;AAAA,IAClD,QAAQ;AACN;AAAA,IACF;AACA,eAAW,QAAQ,OAAO;AACxB,UAAI,CAAC,KAAK,OAAO,KAAK,KAAK,KAAK,WAAW,GAAG,EAAG;AACjD,YAAM,OAAO,KAAK,KAAK,KAAK,IAAI;AAChC,UAAI;AACJ,UAAI;AACF,kBAAU,SAAS,IAAI,EAAE;AAAA,MAC3B,QAAQ;AACN;AAAA,MACF;AACA,UAAI,mBAAmB,QAAQ,UAAU,eAAgB;AACzD,YAAM,QAAQ,gBAAgB,IAAI;AAClC,UAAI,UAAU,KAAM;AACpB,UAAI,UAAU,gBAAgB,MAAM,iBAAiB,MAAM,eAAgB;AAC3E,UAAI,WAAW,QAAQ,UAAU,OAAQ,UAAS;AAAA,IACpD;AAAA,EACF;AACA,SAAO;AACT;AAQO,SAAS,mCACd,UACA,gBACA,MAAY,oBAAI,KAAK,GACN;AACf,QAAM,SAAS,gCAAgC,QAAQ,YAAY,QAAQ,CAAC,GAAG,EAAE,eAAe,CAAC;AACjG,MAAI,WAAW,KAAM,QAAO;AAC5B,SAAO,KAAK,IAAI,GAAG,KAAK,OAAO,IAAI,QAAQ,IAAI,UAAU,GAAI,CAAC;AAChE;AAkDA,SAAS,kBAAkB,KAAa,SAAiB,MAAuB;AAC9E,MAAI;AACF,cAAU,SAAS,EAAE,WAAW,KAAK,CAAC;AACtC,eAAW,KAAK,KAAK,IAAI,GAAG,KAAK,SAAS,IAAI,CAAC;AAC/C,WAAO;AAAA,EACT,QAAQ;AACN,WAAO;AAAA,EACT;AACF;AAEO,SAAS,mBAAmB,UAAkB,OAAa,oBAAI,KAAK,GAA6B;AACtG,QAAM,OAAO,QAAQ,YAAY,QAAQ,CAAC;AAC1C,QAAM,SAAmC,EAAE,QAAQ,GAAG,cAAc,EAAE;AACtE,MAAI;AACJ,MAAI;AACF,cAAU,YAAY,MAAM,EAAE,eAAe,KAAK,CAAC;AAAA,EACrD,QAAQ;AACN,WAAO;AAAA,EACT;AACA,aAAW,SAAS,SAAS;AAC3B,QAAI,CAAC,MAAM,YAAY,KAAK,CAAC,MAAM,KAAK,SAAS,kBAAkB,EAAG;AACtE,QAAI,MAAM,SAAS,0BAA2B;AAC9C,UAAM,MAAM,KAAK,MAAM,MAAM,IAAI;AACjC,UAAM,UAAU,KAAK,MAAM,GAAG,MAAM,IAAI,QAAQ;AAChD,QAAI;AACJ,QAAI;AACF,cAAQ,YAAY,KAAK,EAAE,eAAe,KAAK,CAAC;AAAA,IAClD,QAAQ;AACN;AAAA,IACF;AACA,eAAW,QAAQ,OAAO;AACxB,UAAI,CAAC,KAAK,OAAO,KAAK,KAAK,KAAK,WAAW,GAAG,EAAG;AACjD,YAAM,QAAQ,gBAAgB,KAAK,KAAK,KAAK,IAAI,CAAC;AAClD,UAAI,UAAU,KAAM;AAKpB,UAAI,UAAU,eAAe,MAAM,eAAe;AAChD,YAAI,kBAAkB,KAAK,SAAS,KAAK,IAAI,EAAG,QAAO;AAAA,MACzD,OAAO;AACL,eAAO;AAAA,MACT;AAAA,IACF;AAAA,EACF;AACA,SAAO;AACT;AAEO,SAAS,yBAAyB,UAAkB,OAAa,oBAAI,KAAK,GAAW;AAC1F,QAAM,OAAO,QAAQ,YAAY,QAAQ,CAAC;AAC1C,MAAI,QAAQ;AACZ,MAAI;AACJ,MAAI;AACF,cAAU,YAAY,MAAM,EAAE,eAAe,KAAK,CAAC;AAAA,EACrD,QAAQ;AACN,WAAO;AAAA,EACT;AACA,aAAW,SAAS,SAAS;AAC3B,QAAI,CAAC,MAAM,YAAY,KAAK,CAAC,MAAM,KAAK,SAAS,kBAAkB,EAAG;AACtE,UAAM,MAAM,KAAK,MAAM,MAAM,IAAI;AACjC,UAAM,UAAU,KAAK,MAAM,GAAG,MAAM,IAAI,QAAQ;AAChD,QAAI;AACJ,QAAI;AACF,cAAQ,YAAY,KAAK,EAAE,eAAe,KAAK,CAAC;AAAA,IAClD,QAAQ;AACN;AAAA,IACF;AACA,eAAW,QAAQ,OAAO;AACxB,UAAI,CAAC,KAAK,OAAO,KAAK,KAAK,KAAK,WAAW,GAAG,EAAG;AACjD,UAAI,kBAAkB,KAAK,SAAS,KAAK,IAAI,EAAG;AAAA,IAClD;AAAA,EACF;AACA,SAAO;AACT;AASO,SAAS,4BACd,WACA,MAAY,oBAAI,KAAK,GACQ;AAC7B,QAAM,QAAQ,IAAI,QAAQ;AAC1B,QAAM,UAAuC,CAAC;AAC9C,aAAW,YAAY,WAAW;AAChC,QAAI;AACF,YAAM,WAAW,YAAY,QAAQ;AACrC,YAAM,UAAU,SAAS,QAAQ,EAAE;AACnC,YAAM,aAAa,KAAK,IAAI,GAAG,KAAK,OAAO,QAAQ,WAAW,GAAI,CAAC;AACnE,YAAM,SAAoC;AAAA,QACxC,WAAW;AAAA,QACX,2BAA2B;AAAA,MAC7B;AAGA,YAAM,iBAAiB,4BAA4B,QAAQ,QAAQ,CAAC;AACpE,UAAI,mBAAmB,MAAM;AAC3B,eAAO,qCAAqC,KAAK;AAAA,UAC/C;AAAA,UACA,KAAK,OAAO,QAAQ,kBAAkB,GAAI;AAAA,QAC5C;AAAA,MACF;AACA,cAAQ,KAAK,MAAM;AAAA,IACrB,QAAQ;AAAA,IAIR;AAAA,EACF;AACA,SAAO;AACT;","names":[]}
|
package/package.json
CHANGED
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"sources":["../src/lib/responsiveness-probe.ts"],"sourcesContent":["/**\n * ENG-5399 — Tier 1 responsiveness probe (manager-side).\n *\n * Cheap, fast-cadence canary that catches \"agent went silent\" inside\n * minutes, well before the existing synthetic-probe cron's ~35 min\n * staleness window (`SyntheticReplyAgeSeconds`, ENG-5122).\n *\n * Mechanism: for each managed agent, read the mtime of the agent's\n * `pane.log` and report `now - mtime` as `PaneActivityAgeSeconds` via\n * a new `/host/responsiveness-probe` endpoint. `pane.log` is the\n * tmux pipe-pane sink set up by `setupPaneLog()` — any visible\n * activity (assistant turns, tool calls, in-place progress\n * heartbeats) bumps its mtime. A silent agent has a steadily\n * climbing age that lands in CloudWatch and trips a per-agent alarm.\n *\n * ENG-6017 adds a second per-agent signal on the same cadence:\n * `pending_inbound_oldest_age_seconds` — the age of the oldest marker\n * file across the agent's `*-pending-inbound/` directories (written by\n * the channel MCP servers for inbounds awaiting delivery). This is the\n * one artifact of the \"message typed but never submitted\" failure mode\n * that every other canary is blind to: in the koda incident\n * (2026-06-04) an operator Slack DM sat undelivered for 40+ minutes\n * while pane-activity stayed fresh (health checks), synthetic probes\n * were answered by the one-shot fallback, and heartbeat/session-alive\n * only reflect manager health. The field is OMITTED (not zero) when the\n * agent has no pending-inbound markers — the API treats absent as\n * \"no signal\", never as \"healthy\" (absent-vs-zero matters for\n * mixed-version fleets where old CLIs don't report it at all).\n *\n * Run from `pollCycle()` in `manager-worker.ts` on a configurable\n * interval (default 5 min via `AUGMENTED_RESPONSIVENESS_INTERVAL_MS`).\n */\n\nimport { mkdirSync, readdirSync, readFileSync, renameSync, statSync } from 'node:fs';\nimport { dirname, join } from 'node:path';\nimport { paneLogPath } from './persistent-session.js';\n\nexport interface ResponsivenessProbeResult {\n code_name: string;\n pane_activity_age_seconds: number;\n /**\n * ENG-6017: age (s) of the oldest marker file across the agent's\n * `*-pending-inbound/` directories. Omitted when no markers exist —\n * absent means \"no signal\", NOT \"zero / healthy\".\n */\n pending_inbound_oldest_age_seconds?: number;\n}\n\nconst DEFAULT_INTERVAL_MS = 5 * 60 * 1000;\n\nexport function getResponsivenessIntervalMs(): number {\n const raw = process.env.AUGMENTED_RESPONSIVENESS_INTERVAL_MS;\n if (!raw) return DEFAULT_INTERVAL_MS;\n const parsed = Number.parseInt(raw, 10);\n return Number.isFinite(parsed) && parsed > 0 ? parsed : DEFAULT_INTERVAL_MS;\n}\n\n/**\n * ENG-6017: oldest pending-inbound marker mtime (ms epoch) for an agent,\n * or null when the agent has no markers / no pending-inbound dirs.\n *\n * The channel MCP servers (slack-channel, telegram-channel, …) write one\n * marker file per inbound into `~/.augmented/<codeName>/<channel>-pending-\n * inbound/` and clear it when the agent acknowledges the message. The\n * directory layout is the contract here — read-only, no IPC with the MCP\n * (the MCP and CLI release independently; file mtimes need no protocol).\n *\n * ENG-6072: only plain, non-hidden files count as markers. The msteams MCP\n * keeps `.markers/` and `.processed/` housekeeping SUBDIRECTORIES inside its\n * pending-inbound dir; their mtimes never advance, so statting every dirent\n * made the gauge climb forever and fired pending-inbound-stale on agents with\n * zero stranded messages (kylie ~3.4d / scout ~34h false ALARMs the moment\n * ENG-6023 activated the alarm). Dot-entries are skipped wholesale — the\n * hidden namespace is reserved for MCP bookkeeping, never for markers.\n */\nfunction oldestPendingInboundMtimeMs(agentHomeDir: string): number | null {\n let oldest: number | null = null;\n let entries;\n try {\n entries = readdirSync(agentHomeDir, { withFileTypes: true });\n } catch {\n return null; // agent home missing — nothing to report\n }\n for (const entry of entries) {\n if (!entry.isDirectory() || !entry.name.endsWith('-pending-inbound')) continue;\n const dir = join(agentHomeDir, entry.name);\n let files;\n try {\n files = readdirSync(dir, { withFileTypes: true });\n } catch {\n continue;\n }\n for (const file of files) {\n if (!file.isFile() || file.name.startsWith('.')) continue;\n try {\n const mtimeMs = statSync(join(dir, file.name)).mtimeMs;\n if (oldest === null || mtimeMs < oldest) oldest = mtimeMs;\n } catch {\n // Marker drained between readdir and stat — that's the happy path.\n }\n }\n }\n return oldest;\n}\n\n/**\n * ENG-6160: classify a marker file for the LIVE-inbound scan.\n * - `true` → flagged `\"undeliverable\": true` (dead-letter, exclude).\n * - `null` → vanished mid-scan (ENOENT) — drained between stat and read, the\n * happy path; exclude it rather than count an already-gone file.\n * - `false` → still present but malformed / unreadable for another reason —\n * treated as LIVE so a corrupt marker can never mask a real wedge.\n */\nfunction isUndeliverableMarker(markerPath: string): boolean | null {\n try {\n const parsed = JSON.parse(readFileSync(markerPath, 'utf8')) as { undeliverable?: unknown };\n return parsed?.undeliverable === true;\n } catch (error) {\n return (error as NodeJS.ErrnoException).code === 'ENOENT' ? null : false;\n }\n}\n\n/**\n * ENG-6160: oldest *LIVE* pending-inbound marker mtime (ms epoch) for an agent,\n * or null when there is no live marker. \"Live\" excludes:\n *\n * - markers older than `sessionStartMs` — a marker written before the current\n * session started is a leftover from a PREVIOUS session and cannot mean\n * *this* session is failing to drain. This is the load-bearing exclusion:\n * without it, an orphan marker survives a fresh respawn and the wedge\n * detector re-fires forever on a healthy idle agent (the sherlock enforce\n * loop, 2026-06-08: `inboundAge=3389s` on a `● Ready.` session).\n * - markers flagged `undeliverable: true` — already dead-lettered by the channel.\n *\n * Distinct from `oldestPendingInboundMtimeMs` (which counts ALL markers and\n * feeds the ENG-6017 `pending-inbound-stale` CloudWatch alarm — that alarm\n * *wants* to fire on a stuck inbound, so its semantics must NOT change). This\n * variant is wedge-detection-only.\n */\nexport function oldestLivePendingInboundMtimeMs(\n agentHomeDir: string,\n opts: { sessionStartMs?: number | null } = {},\n): number | null {\n const sessionStartMs = opts.sessionStartMs ?? null;\n let oldest: number | null = null;\n let entries;\n try {\n entries = readdirSync(agentHomeDir, { withFileTypes: true });\n } catch {\n return null;\n }\n for (const entry of entries) {\n if (!entry.isDirectory() || !entry.name.endsWith('-pending-inbound')) continue;\n const dir = join(agentHomeDir, entry.name);\n let files;\n try {\n files = readdirSync(dir, { withFileTypes: true });\n } catch {\n continue;\n }\n for (const file of files) {\n if (!file.isFile() || file.name.startsWith('.')) continue;\n const full = join(dir, file.name);\n let mtimeMs: number;\n try {\n mtimeMs = statSync(full).mtimeMs;\n } catch {\n continue; // drained between readdir and stat — happy path\n }\n if (sessionStartMs !== null && mtimeMs < sessionStartMs) continue; // pre-session leftover\n const undeliverable = isUndeliverableMarker(full);\n if (undeliverable === null) continue; // vanished between stat and read — drained, exclude\n if (undeliverable) continue; // already dead-lettered\n if (oldest === null || mtimeMs < oldest) oldest = mtimeMs;\n }\n }\n return oldest;\n}\n\n/**\n * ENG-6160: age (s) of the oldest LIVE pending-inbound marker for an agent, or\n * null when none. The wedge detector uses this instead of the alarm-facing\n * `pending_inbound_oldest_age_seconds` so a stale/dead-letter marker can't\n * false-fire a respawn.\n */\nexport function livePendingInboundOldestAgeSeconds(\n codeName: string,\n sessionStartMs: number | null,\n now: Date = new Date(),\n): number | null {\n const oldest = oldestLivePendingInboundMtimeMs(dirname(paneLogPath(codeName)), { sessionStartMs });\n if (oldest === null) return null;\n return Math.max(0, Math.floor((now.getTime() - oldest) / 1000));\n}\n\n/**\n * ENG-6160: move every pending-inbound marker for an agent aside into a sibling\n * `<channel>-pending-inbound-stale/` directory (NOT silently deleted — the\n * payload pointer is preserved for forensics), returning the count moved.\n *\n * ENG-6289: no longer called on wedge respawn — the wedge path parks instead\n * (see `parkPendingInbound` above; blanket dead-letter permanently dropped the\n * user's message). Kept as the explicit \"move everything aside\" seam for tests\n * and operator emergencies. The stale dir does not end in `-pending-inbound`,\n * so neither the probe nor this scan re-counts moved markers.\n */\n/**\n * ENG-6289: park-not-drop. On a force-fresh wedge respawn, KEEP undrained\n * pending-inbound markers in their live dirs instead of dead-lettering them —\n * the fresh session's orient hook surfaces them (\"N queued messages\" + details)\n * and ENG-5969 replay (when enabled) re-pushes their payloads. Markers are NOT\n * rewritten (no counter, no mtime bump): the ENG-6160 pre-session exclusion in\n * `oldestLivePendingInboundMtimeMs` already keeps an untouched parked marker\n * out of the fresh session's wedge signal, and re-delivery stays bounded by\n * the existing machinery — the channel-side `replay_count` cap (≤3) and the\n * orphan sweep's received_at TTL. A manager-side rewrite would be the first\n * cross-process writer into marker files, where a torn read in the channel\n * sweep DELETES the marker (`unlinkSync` on parse failure) — the exact drop\n * this function exists to prevent.\n *\n * Only markers already flagged `undeliverable: true` are dead-lettered (moved\n * to `-stale`) — the channel already gave the user the ⏳ notice for those, so\n * nothing can ever drain them. Malformed markers are LEFT IN PLACE, matching\n * the live-scan philosophy above (a corrupt marker must never mask — or\n * become — a dropped message).\n *\n * The msteams dir is skipped wholesale: its top-level files are raw Bot\n * Framework activity payloads (the transport queue, not bookkeeping — real\n * markers live in the hidden `.markers/` subdir this scan never touches), and\n * the teams channel server's boot drain redelivers them to the fresh session\n * on its own. Moving them aside (what the pre-ENG-6289 dead-letter did) was\n * actively defeating the one channel with native respawn recovery.\n */\nexport interface ParkPendingInboundResult {\n parked: number;\n deadLettered: number;\n}\n\n/**\n * Move one marker into the sibling `-stale` dead-letter dir (moved, not\n * deleted — preserved for forensics). Returns true on success; best-effort —\n * a marker that vanished or can't move is left as-is.\n */\nfunction moveMarkerToStale(dir: string, deadDir: string, name: string): boolean {\n try {\n mkdirSync(deadDir, { recursive: true });\n renameSync(join(dir, name), join(deadDir, name));\n return true;\n } catch {\n return false;\n }\n}\n\nexport function parkPendingInbound(codeName: string, _now: Date = new Date()): ParkPendingInboundResult {\n const home = dirname(paneLogPath(codeName));\n const result: ParkPendingInboundResult = { parked: 0, deadLettered: 0 };\n let entries;\n try {\n entries = readdirSync(home, { withFileTypes: true });\n } catch {\n return result;\n }\n for (const entry of entries) {\n if (!entry.isDirectory() || !entry.name.endsWith('-pending-inbound')) continue;\n if (entry.name === 'msteams-pending-inbound') continue; // transport queue — boot drain owns recovery\n const dir = join(home, entry.name);\n const deadDir = join(home, `${entry.name}-stale`);\n let files;\n try {\n files = readdirSync(dir, { withFileTypes: true });\n } catch {\n continue;\n }\n for (const file of files) {\n if (!file.isFile() || file.name.startsWith('.')) continue;\n const undeliverable = isUndeliverableMarker(join(dir, file.name));\n if (undeliverable === null) continue; // drained mid-scan — already gone\n if (undeliverable) {\n if (moveMarkerToStale(dir, deadDir, file.name)) result.deadLettered++;\n } else {\n result.parked++;\n }\n }\n }\n return result;\n}\n\nexport function deadLetterPendingInbound(codeName: string, _now: Date = new Date()): number {\n const home = dirname(paneLogPath(codeName));\n let moved = 0;\n let entries;\n try {\n entries = readdirSync(home, { withFileTypes: true });\n } catch {\n return 0;\n }\n for (const entry of entries) {\n if (!entry.isDirectory() || !entry.name.endsWith('-pending-inbound')) continue;\n const dir = join(home, entry.name);\n const deadDir = join(home, `${entry.name}-stale`);\n let files;\n try {\n files = readdirSync(dir, { withFileTypes: true });\n } catch {\n continue;\n }\n for (const file of files) {\n if (!file.isFile() || file.name.startsWith('.')) continue;\n if (moveMarkerToStale(dir, deadDir, file.name)) moved++;\n }\n }\n return moved;\n}\n\n/**\n * Compute the pane.log age for each agent. Missing or unreadable\n * pane.log returns null — the caller should drop those entries\n * rather than fabricate a \"fresh\" or \"ancient\" value. A missing\n * file means the agent has never spawned in this manager generation,\n * which is a separate problem covered by SessionAliveAgeSeconds.\n */\nexport function collectResponsivenessProbes(\n codeNames: string[],\n now: Date = new Date(),\n): ResponsivenessProbeResult[] {\n const nowMs = now.getTime();\n const results: ResponsivenessProbeResult[] = [];\n for (const codeName of codeNames) {\n try {\n const panePath = paneLogPath(codeName);\n const mtimeMs = statSync(panePath).mtimeMs;\n const ageSeconds = Math.max(0, Math.floor((nowMs - mtimeMs) / 1000));\n const result: ResponsivenessProbeResult = {\n code_name: codeName,\n pane_activity_age_seconds: ageSeconds,\n };\n // ENG-6017: piggyback the pending-inbound drain-age scan on the same\n // cadence. Field omitted (not 0) when there are no markers.\n const oldestMarkerMs = oldestPendingInboundMtimeMs(dirname(panePath));\n if (oldestMarkerMs !== null) {\n result.pending_inbound_oldest_age_seconds = Math.max(\n 0,\n Math.floor((nowMs - oldestMarkerMs) / 1000),\n );\n }\n results.push(result);\n } catch {\n // No pane.log yet (fresh agent, never spawned) — skip. The\n // session-alive monitor already covers the \"should be running\n // but isn't\" case.\n }\n }\n return results;\n}\n"],"mappings":";;;;;;;AAiCA,SAAS,WAAW,aAAa,cAAc,YAAY,gBAAgB;AAC3E,SAAS,SAAS,YAAY;AAc9B,IAAM,sBAAsB,IAAI,KAAK;AAE9B,SAAS,8BAAsC;AACpD,QAAM,MAAM,QAAQ,IAAI;AACxB,MAAI,CAAC,IAAK,QAAO;AACjB,QAAM,SAAS,OAAO,SAAS,KAAK,EAAE;AACtC,SAAO,OAAO,SAAS,MAAM,KAAK,SAAS,IAAI,SAAS;AAC1D;AAoBA,SAAS,4BAA4B,cAAqC;AACxE,MAAI,SAAwB;AAC5B,MAAI;AACJ,MAAI;AACF,cAAU,YAAY,cAAc,EAAE,eAAe,KAAK,CAAC;AAAA,EAC7D,QAAQ;AACN,WAAO;AAAA,EACT;AACA,aAAW,SAAS,SAAS;AAC3B,QAAI,CAAC,MAAM,YAAY,KAAK,CAAC,MAAM,KAAK,SAAS,kBAAkB,EAAG;AACtE,UAAM,MAAM,KAAK,cAAc,MAAM,IAAI;AACzC,QAAI;AACJ,QAAI;AACF,cAAQ,YAAY,KAAK,EAAE,eAAe,KAAK,CAAC;AAAA,IAClD,QAAQ;AACN;AAAA,IACF;AACA,eAAW,QAAQ,OAAO;AACxB,UAAI,CAAC,KAAK,OAAO,KAAK,KAAK,KAAK,WAAW,GAAG,EAAG;AACjD,UAAI;AACF,cAAM,UAAU,SAAS,KAAK,KAAK,KAAK,IAAI,CAAC,EAAE;AAC/C,YAAI,WAAW,QAAQ,UAAU,OAAQ,UAAS;AAAA,MACpD,QAAQ;AAAA,MAER;AAAA,IACF;AAAA,EACF;AACA,SAAO;AACT;AAUA,SAAS,sBAAsB,YAAoC;AACjE,MAAI;AACF,UAAM,SAAS,KAAK,MAAM,aAAa,YAAY,MAAM,CAAC;AAC1D,WAAO,QAAQ,kBAAkB;AAAA,EACnC,SAAS,OAAO;AACd,WAAQ,MAAgC,SAAS,WAAW,OAAO;AAAA,EACrE;AACF;AAmBO,SAAS,gCACd,cACA,OAA2C,CAAC,GAC7B;AACf,QAAM,iBAAiB,KAAK,kBAAkB;AAC9C,MAAI,SAAwB;AAC5B,MAAI;AACJ,MAAI;AACF,cAAU,YAAY,cAAc,EAAE,eAAe,KAAK,CAAC;AAAA,EAC7D,QAAQ;AACN,WAAO;AAAA,EACT;AACA,aAAW,SAAS,SAAS;AAC3B,QAAI,CAAC,MAAM,YAAY,KAAK,CAAC,MAAM,KAAK,SAAS,kBAAkB,EAAG;AACtE,UAAM,MAAM,KAAK,cAAc,MAAM,IAAI;AACzC,QAAI;AACJ,QAAI;AACF,cAAQ,YAAY,KAAK,EAAE,eAAe,KAAK,CAAC;AAAA,IAClD,QAAQ;AACN;AAAA,IACF;AACA,eAAW,QAAQ,OAAO;AACxB,UAAI,CAAC,KAAK,OAAO,KAAK,KAAK,KAAK,WAAW,GAAG,EAAG;AACjD,YAAM,OAAO,KAAK,KAAK,KAAK,IAAI;AAChC,UAAI;AACJ,UAAI;AACF,kBAAU,SAAS,IAAI,EAAE;AAAA,MAC3B,QAAQ;AACN;AAAA,MACF;AACA,UAAI,mBAAmB,QAAQ,UAAU,eAAgB;AACzD,YAAM,gBAAgB,sBAAsB,IAAI;AAChD,UAAI,kBAAkB,KAAM;AAC5B,UAAI,cAAe;AACnB,UAAI,WAAW,QAAQ,UAAU,OAAQ,UAAS;AAAA,IACpD;AAAA,EACF;AACA,SAAO;AACT;AAQO,SAAS,mCACd,UACA,gBACA,MAAY,oBAAI,KAAK,GACN;AACf,QAAM,SAAS,gCAAgC,QAAQ,YAAY,QAAQ,CAAC,GAAG,EAAE,eAAe,CAAC;AACjG,MAAI,WAAW,KAAM,QAAO;AAC5B,SAAO,KAAK,IAAI,GAAG,KAAK,OAAO,IAAI,QAAQ,IAAI,UAAU,GAAI,CAAC;AAChE;AAkDA,SAAS,kBAAkB,KAAa,SAAiB,MAAuB;AAC9E,MAAI;AACF,cAAU,SAAS,EAAE,WAAW,KAAK,CAAC;AACtC,eAAW,KAAK,KAAK,IAAI,GAAG,KAAK,SAAS,IAAI,CAAC;AAC/C,WAAO;AAAA,EACT,QAAQ;AACN,WAAO;AAAA,EACT;AACF;AAEO,SAAS,mBAAmB,UAAkB,OAAa,oBAAI,KAAK,GAA6B;AACtG,QAAM,OAAO,QAAQ,YAAY,QAAQ,CAAC;AAC1C,QAAM,SAAmC,EAAE,QAAQ,GAAG,cAAc,EAAE;AACtE,MAAI;AACJ,MAAI;AACF,cAAU,YAAY,MAAM,EAAE,eAAe,KAAK,CAAC;AAAA,EACrD,QAAQ;AACN,WAAO;AAAA,EACT;AACA,aAAW,SAAS,SAAS;AAC3B,QAAI,CAAC,MAAM,YAAY,KAAK,CAAC,MAAM,KAAK,SAAS,kBAAkB,EAAG;AACtE,QAAI,MAAM,SAAS,0BAA2B;AAC9C,UAAM,MAAM,KAAK,MAAM,MAAM,IAAI;AACjC,UAAM,UAAU,KAAK,MAAM,GAAG,MAAM,IAAI,QAAQ;AAChD,QAAI;AACJ,QAAI;AACF,cAAQ,YAAY,KAAK,EAAE,eAAe,KAAK,CAAC;AAAA,IAClD,QAAQ;AACN;AAAA,IACF;AACA,eAAW,QAAQ,OAAO;AACxB,UAAI,CAAC,KAAK,OAAO,KAAK,KAAK,KAAK,WAAW,GAAG,EAAG;AACjD,YAAM,gBAAgB,sBAAsB,KAAK,KAAK,KAAK,IAAI,CAAC;AAChE,UAAI,kBAAkB,KAAM;AAC5B,UAAI,eAAe;AACjB,YAAI,kBAAkB,KAAK,SAAS,KAAK,IAAI,EAAG,QAAO;AAAA,MACzD,OAAO;AACL,eAAO;AAAA,MACT;AAAA,IACF;AAAA,EACF;AACA,SAAO;AACT;AAEO,SAAS,yBAAyB,UAAkB,OAAa,oBAAI,KAAK,GAAW;AAC1F,QAAM,OAAO,QAAQ,YAAY,QAAQ,CAAC;AAC1C,MAAI,QAAQ;AACZ,MAAI;AACJ,MAAI;AACF,cAAU,YAAY,MAAM,EAAE,eAAe,KAAK,CAAC;AAAA,EACrD,QAAQ;AACN,WAAO;AAAA,EACT;AACA,aAAW,SAAS,SAAS;AAC3B,QAAI,CAAC,MAAM,YAAY,KAAK,CAAC,MAAM,KAAK,SAAS,kBAAkB,EAAG;AACtE,UAAM,MAAM,KAAK,MAAM,MAAM,IAAI;AACjC,UAAM,UAAU,KAAK,MAAM,GAAG,MAAM,IAAI,QAAQ;AAChD,QAAI;AACJ,QAAI;AACF,cAAQ,YAAY,KAAK,EAAE,eAAe,KAAK,CAAC;AAAA,IAClD,QAAQ;AACN;AAAA,IACF;AACA,eAAW,QAAQ,OAAO;AACxB,UAAI,CAAC,KAAK,OAAO,KAAK,KAAK,KAAK,WAAW,GAAG,EAAG;AACjD,UAAI,kBAAkB,KAAK,SAAS,KAAK,IAAI,EAAG;AAAA,IAClD;AAAA,EACF;AACA,SAAO;AACT;AASO,SAAS,4BACd,WACA,MAAY,oBAAI,KAAK,GACQ;AAC7B,QAAM,QAAQ,IAAI,QAAQ;AAC1B,QAAM,UAAuC,CAAC;AAC9C,aAAW,YAAY,WAAW;AAChC,QAAI;AACF,YAAM,WAAW,YAAY,QAAQ;AACrC,YAAM,UAAU,SAAS,QAAQ,EAAE;AACnC,YAAM,aAAa,KAAK,IAAI,GAAG,KAAK,OAAO,QAAQ,WAAW,GAAI,CAAC;AACnE,YAAM,SAAoC;AAAA,QACxC,WAAW;AAAA,QACX,2BAA2B;AAAA,MAC7B;AAGA,YAAM,iBAAiB,4BAA4B,QAAQ,QAAQ,CAAC;AACpE,UAAI,mBAAmB,MAAM;AAC3B,eAAO,qCAAqC,KAAK;AAAA,UAC/C;AAAA,UACA,KAAK,OAAO,QAAQ,kBAAkB,GAAI;AAAA,QAC5C;AAAA,MACF;AACA,cAAQ,KAAK,MAAM;AAAA,IACrB,QAAQ;AAAA,IAIR;AAAA,EACF;AACA,SAAO;AACT;","names":[]}
|
|
File without changes
|