runcap 0.2.1 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +22 -4
- package/package.json +5 -2
- package/scripts/acceptance.mjs +67 -0
- package/scripts/delta-test.mjs +130 -0
- package/scripts/demo-flow.mjs +20 -0
- package/scripts/loop-test.mjs +84 -0
- package/scripts/make-demo-svg.mjs +75 -0
- package/scripts/make-linkedin-delta-video.mjs +412 -0
- package/scripts/validate-demo.mjs +49 -0
- package/src/compressor.mjs +268 -1
- package/src/mission-control.mjs +40 -3
package/src/compressor.mjs
CHANGED
|
@@ -18,17 +18,107 @@
|
|
|
18
18
|
// "X tokens saved by compression". Token counts are an estimate (~4 chars/token),
|
|
19
19
|
// labeled `estimated`, never claimed as provider-exact.
|
|
20
20
|
|
|
21
|
+
import { createHash } from "node:crypto";
|
|
22
|
+
|
|
21
23
|
const CHARS_PER_TOKEN = 4;
|
|
22
24
|
const MIN_FIELD_CHARS = 200; // below this, compression overhead isn't worth it
|
|
25
|
+
const MIN_DEDUP_CHARS = 256; // only dedup blocks big enough to be worth a stub
|
|
23
26
|
const LOG_HEAD_LINES = 12;
|
|
24
27
|
const LOG_TAIL_LINES = 8;
|
|
25
28
|
const LOG_COLLAPSE_THRESHOLD = 40; // collapse runs longer than this
|
|
26
29
|
|
|
30
|
+
// --- delta-encoding of near-duplicate blocks ---
|
|
31
|
+
// When a block is similar (not identical) to one seen earlier in the same
|
|
32
|
+
// request, we replace it with a line-diff against the original. This is the
|
|
33
|
+
// case identical-dedup misses: an agent re-reads a file AFTER editing it.
|
|
34
|
+
// Lossless: the exact text is recoverable from (original block + diff).
|
|
35
|
+
const DELTA_MIN_SIMILARITY = 0.5; // below this a diff isn't smaller than the original
|
|
36
|
+
const DELTA_MAX_LINES = 2500; // LCS is O(n*m); above ~2500 lines a diff can cost >25ms, so skip to protect the hot path
|
|
37
|
+
|
|
27
38
|
export function estimateTokens(text) {
|
|
28
39
|
if (!text) return 0;
|
|
29
40
|
return Math.ceil(String(text).length / CHARS_PER_TOKEN);
|
|
30
41
|
}
|
|
31
42
|
|
|
43
|
+
function shortHash(text) {
|
|
44
|
+
return createHash("sha1").update(text).digest("hex").slice(0, 8);
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
// Cheap line-overlap ratio. Used only to decide whether a full LCS diff is
|
|
48
|
+
// worth computing; the real saving is measured against the emitted delta.
|
|
49
|
+
export function lineSimilarity(aLines, bLines) {
|
|
50
|
+
const aSet = new Set(aLines);
|
|
51
|
+
let shared = 0;
|
|
52
|
+
for (const l of bLines) if (aSet.has(l)) shared++;
|
|
53
|
+
return shared / Math.max(aLines.length, bLines.length, 1);
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
// LCS-based line diff. Emits a compact op list of CHANGES only:
|
|
57
|
+
// { at: <line index in the original>, del: <lines removed>, ins: [<lines added>] }
|
|
58
|
+
// Unchanged ranges are implied. Reconstruction walks the original applying ops.
|
|
59
|
+
function lineDiff(aLines, bLines) {
|
|
60
|
+
const n = aLines.length, m = bLines.length;
|
|
61
|
+
const dp = Array.from({ length: n + 1 }, () => new Int32Array(m + 1));
|
|
62
|
+
for (let i = n - 1; i >= 0; i--) {
|
|
63
|
+
for (let j = m - 1; j >= 0; j--) {
|
|
64
|
+
dp[i][j] = aLines[i] === bLines[j]
|
|
65
|
+
? dp[i + 1][j + 1] + 1
|
|
66
|
+
: Math.max(dp[i + 1][j], dp[i][j + 1]);
|
|
67
|
+
}
|
|
68
|
+
}
|
|
69
|
+
const ops = [];
|
|
70
|
+
let i = 0, j = 0, cur = null;
|
|
71
|
+
const flush = () => { if (cur) { ops.push(cur); cur = null; } };
|
|
72
|
+
while (i < n && j < m) {
|
|
73
|
+
if (aLines[i] === bLines[j]) { flush(); i++; j++; }
|
|
74
|
+
else if (dp[i + 1][j] >= dp[i][j + 1]) {
|
|
75
|
+
if (!cur || cur.at !== i) { flush(); cur = { at: i, del: 0, ins: [] }; }
|
|
76
|
+
cur.del++; i++;
|
|
77
|
+
} else {
|
|
78
|
+
if (!cur) cur = { at: i, del: 0, ins: [] };
|
|
79
|
+
cur.ins.push(bLines[j]); j++;
|
|
80
|
+
}
|
|
81
|
+
}
|
|
82
|
+
while (i < n) { if (!cur || cur.at !== i) { flush(); cur = { at: i, del: 0, ins: [] }; } cur.del++; i++; }
|
|
83
|
+
if (j < m) { if (!cur) cur = { at: i, del: 0, ins: [] }; while (j < m) cur.ins.push(bLines[j++]); }
|
|
84
|
+
flush();
|
|
85
|
+
return ops;
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
// Exact inverse of lineDiff: (original lines + ops) -> reconstructed string.
|
|
89
|
+
// Walks ops in order (they are emitted sorted by `at`), copying untouched
|
|
90
|
+
// original lines up to each op's anchor, then applying the op's deletes/inserts.
|
|
91
|
+
// Order-based, so duplicate `at` values across ops are handled correctly.
|
|
92
|
+
// Kept in-module so tests can prove losslessness against the real code path.
|
|
93
|
+
export function applyLineDiff(aLines, ops) {
|
|
94
|
+
const out = [];
|
|
95
|
+
let i = 0; // cursor into aLines
|
|
96
|
+
for (const op of ops) {
|
|
97
|
+
while (i < op.at && i < aLines.length) { out.push(aLines[i]); i++; }
|
|
98
|
+
for (const ins of op.ins) out.push(ins);
|
|
99
|
+
i += op.del;
|
|
100
|
+
}
|
|
101
|
+
while (i < aLines.length) { out.push(aLines[i]); i++; }
|
|
102
|
+
return out.join("\n");
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
// Render a delta as a block the MODEL can read and apply in its head. The header
|
|
106
|
+
// names the base (sha + which message it first appeared in) so the model knows
|
|
107
|
+
// what to patch; each op is shown as removed/added lines at a 1-based line number.
|
|
108
|
+
function renderDelta(baseHash, firstIndex, ops) {
|
|
109
|
+
const lines = [
|
|
110
|
+
`[runcap delta vs the identical block first seen in message ${firstIndex + 1} (sha:${baseHash}).`,
|
|
111
|
+
` Reconstruct the current text by applying these line changes to that block; all other lines are unchanged.]`
|
|
112
|
+
];
|
|
113
|
+
for (const op of ops) {
|
|
114
|
+
const at1 = op.at + 1;
|
|
115
|
+
if (op.del > 0) lines.push(`@@ line ${at1}: remove ${op.del} line(s)`);
|
|
116
|
+
else lines.push(`@@ line ${at1}: insert`);
|
|
117
|
+
for (const ins of op.ins) lines.push(`+ ${ins}`);
|
|
118
|
+
}
|
|
119
|
+
return lines.join("\n");
|
|
120
|
+
}
|
|
121
|
+
|
|
32
122
|
// Re-serialize an embedded JSON string compactly. Handles two shapes safely:
|
|
33
123
|
// 1. The whole field is JSON ("{...}" or "[...]").
|
|
34
124
|
// 2. A short text prefix followed by a JSON blob ("Here is the data:\n{...}").
|
|
@@ -109,6 +199,120 @@ function compressField(value) {
|
|
|
109
199
|
return out;
|
|
110
200
|
}
|
|
111
201
|
|
|
202
|
+
// Deduplicate identical content blocks within a single request. In a long
|
|
203
|
+
// agentic session the same file dump or tool_result ships as a fresh block on
|
|
204
|
+
// every turn (the agent re-reads auth.ts five times); the model already saw
|
|
205
|
+
// those exact bytes earlier in the SAME request, so replacing the repeats with
|
|
206
|
+
// a deterministic stub is lossless-by-construction. This is where the real
|
|
207
|
+
// savings on agentic traffic live — per-field whitespace/JSON trimming barely
|
|
208
|
+
// moves the needle by comparison.
|
|
209
|
+
//
|
|
210
|
+
// Walks messages in order. The first occurrence of a block is kept verbatim;
|
|
211
|
+
// any later block with the same content hash becomes:
|
|
212
|
+
// [runcap: identical content seen at message N, sha:abcd1234]
|
|
213
|
+
// We only dedup blocks >= MIN_DEDUP_CHARS so a tiny stub never costs more than
|
|
214
|
+
// the original. Mutates the message tree in place on the already-cloned `next`.
|
|
215
|
+
function dedupRepeatedBlocks(body) {
|
|
216
|
+
let saved = 0;
|
|
217
|
+
let blocks = 0;
|
|
218
|
+
let deltas = 0;
|
|
219
|
+
// hash -> { index, text, lines } for the first occurrence of each block.
|
|
220
|
+
const seen = new Map();
|
|
221
|
+
// Ordered list of prior blocks, for near-duplicate (delta) matching.
|
|
222
|
+
const priors = [];
|
|
223
|
+
|
|
224
|
+
const stubFor = (hash, firstIndex) =>
|
|
225
|
+
`[runcap: identical content seen at message ${firstIndex + 1}, sha:${hash}]`;
|
|
226
|
+
|
|
227
|
+
// Try to encode `text` as a delta against the most similar prior block.
|
|
228
|
+
// Returns the delta string if it is smaller than the original, else null.
|
|
229
|
+
const tryDelta = (text) => {
|
|
230
|
+
const bLines = text.split("\n");
|
|
231
|
+
if (bLines.length > DELTA_MAX_LINES) return null; // protect the hot path
|
|
232
|
+
let best = null;
|
|
233
|
+
for (const p of priors) {
|
|
234
|
+
if (p.lines.length > DELTA_MAX_LINES) continue;
|
|
235
|
+
const sim = lineSimilarity(p.lines, bLines);
|
|
236
|
+
if (sim < DELTA_MIN_SIMILARITY) continue;
|
|
237
|
+
if (!best || sim > best.sim) best = { ...p, sim };
|
|
238
|
+
}
|
|
239
|
+
if (!best) return null;
|
|
240
|
+
const ops = lineDiff(best.lines, bLines);
|
|
241
|
+
// Safety: only emit if it reconstructs exactly (lossless-by-construction).
|
|
242
|
+
if (applyLineDiff(best.lines, ops) !== text) return null;
|
|
243
|
+
const rendered = renderDelta(best.hash, best.index, ops);
|
|
244
|
+
return rendered.length < text.length ? rendered : null;
|
|
245
|
+
};
|
|
246
|
+
|
|
247
|
+
const dedupString = (text, msgIndex) => {
|
|
248
|
+
if (typeof text !== "string" || text.length < MIN_DEDUP_CHARS) return text;
|
|
249
|
+
const hash = shortHash(text);
|
|
250
|
+
const firstSeen = seen.get(hash);
|
|
251
|
+
if (firstSeen === undefined) {
|
|
252
|
+
// First time we see this exact block. Try a delta vs an earlier *similar*
|
|
253
|
+
// block before recording it as a fresh original.
|
|
254
|
+
const delta = tryDelta(text);
|
|
255
|
+
const record = { index: msgIndex, hash, text, lines: text.split("\n") };
|
|
256
|
+
seen.set(hash, record);
|
|
257
|
+
priors.push(record);
|
|
258
|
+
if (delta !== null) {
|
|
259
|
+
saved += text.length - delta.length;
|
|
260
|
+
blocks += 1;
|
|
261
|
+
deltas += 1;
|
|
262
|
+
return delta;
|
|
263
|
+
}
|
|
264
|
+
return text;
|
|
265
|
+
}
|
|
266
|
+
const stub = stubFor(hash, firstSeen.index);
|
|
267
|
+
if (stub.length >= text.length) return text;
|
|
268
|
+
saved += text.length - stub.length;
|
|
269
|
+
blocks += 1;
|
|
270
|
+
return stub;
|
|
271
|
+
};
|
|
272
|
+
|
|
273
|
+
const dedupContent = (content, msgIndex) => {
|
|
274
|
+
if (typeof content === "string") return dedupString(content, msgIndex);
|
|
275
|
+
if (Array.isArray(content)) {
|
|
276
|
+
return content.map((part) => {
|
|
277
|
+
if (!part || typeof part !== "object") return part;
|
|
278
|
+
// OpenAI/Anthropic text parts
|
|
279
|
+
if (typeof part.text === "string") {
|
|
280
|
+
return { ...part, text: dedupString(part.text, msgIndex) };
|
|
281
|
+
}
|
|
282
|
+
// Anthropic tool_result blocks: content can be string or array of parts
|
|
283
|
+
if (part.type === "tool_result") {
|
|
284
|
+
if (typeof part.content === "string") {
|
|
285
|
+
return { ...part, content: dedupString(part.content, msgIndex) };
|
|
286
|
+
}
|
|
287
|
+
if (Array.isArray(part.content)) {
|
|
288
|
+
return {
|
|
289
|
+
...part,
|
|
290
|
+
content: part.content.map((c) =>
|
|
291
|
+
c && typeof c === "object" && typeof c.text === "string"
|
|
292
|
+
? { ...c, text: dedupString(c.text, msgIndex) }
|
|
293
|
+
: c
|
|
294
|
+
)
|
|
295
|
+
};
|
|
296
|
+
}
|
|
297
|
+
}
|
|
298
|
+
return part;
|
|
299
|
+
});
|
|
300
|
+
}
|
|
301
|
+
return content;
|
|
302
|
+
};
|
|
303
|
+
|
|
304
|
+
let next = body;
|
|
305
|
+
if (Array.isArray(body.messages)) {
|
|
306
|
+
next = {
|
|
307
|
+
...body,
|
|
308
|
+
messages: body.messages.map((m, i) =>
|
|
309
|
+
m && typeof m === "object" && "content" in m ? { ...m, content: dedupContent(m.content, i) } : m
|
|
310
|
+
)
|
|
311
|
+
};
|
|
312
|
+
}
|
|
313
|
+
return { body: next, saved, blocks, deltas };
|
|
314
|
+
}
|
|
315
|
+
|
|
112
316
|
// Walk an OpenAI- or Anthropic-shaped request body and compress message content.
|
|
113
317
|
// Returns { body, before, after, savedChars, savedTokens, touched }.
|
|
114
318
|
export function compressRequestBody(body) {
|
|
@@ -156,6 +360,12 @@ export function compressRequestBody(body) {
|
|
|
156
360
|
next = { ...next, input: compressContent(next.input) };
|
|
157
361
|
}
|
|
158
362
|
|
|
363
|
+
// Cross-message dedup of identical blocks + delta-encoding of near-duplicates
|
|
364
|
+
// (the big win on agentic traffic: re-reads after an edit).
|
|
365
|
+
const deduped = dedupRepeatedBlocks(next);
|
|
366
|
+
next = deduped.body;
|
|
367
|
+
touched += deduped.blocks;
|
|
368
|
+
|
|
159
369
|
const measureAfter = JSON.stringify(next).length;
|
|
160
370
|
const savedChars = Math.max(0, measureBefore - measureAfter);
|
|
161
371
|
return {
|
|
@@ -164,6 +374,63 @@ export function compressRequestBody(body) {
|
|
|
164
374
|
after: measureAfter,
|
|
165
375
|
savedChars,
|
|
166
376
|
savedTokens: Math.round(savedChars / CHARS_PER_TOKEN),
|
|
167
|
-
touched
|
|
377
|
+
touched,
|
|
378
|
+
deltas: deduped.deltas
|
|
379
|
+
};
|
|
380
|
+
}
|
|
381
|
+
|
|
382
|
+
// --- loop / circling detection (the "looks productive but stuck" signal) ---
|
|
383
|
+
// The gateway sees every request the agent sends. An agent that is circling the
|
|
384
|
+
// same failure with reworded attempts sends prompts that are SIMILAR-but-not-
|
|
385
|
+
// identical turn after turn: the conversation tail barely moves while tokens
|
|
386
|
+
// keep burning. Plain hashing misses this (the text differs slightly each loop);
|
|
387
|
+
// this catches it with the same line-similarity primitive the delta-encoder uses.
|
|
388
|
+
const LOOP_SIMILARITY = 0.92; // two consecutive prompts this similar = no real progress made between them
|
|
389
|
+
const LOOP_MIN_REPEATS = 3; // how many near-identical prompts in a row before we warn
|
|
390
|
+
|
|
391
|
+
// Pull the comparable "shape" of a request: the concatenated text the agent is
|
|
392
|
+
// actually sending this turn (messages / input / system), order-preserving.
|
|
393
|
+
export function requestShapeText(body) {
|
|
394
|
+
if (!body || typeof body !== "object") return "";
|
|
395
|
+
const parts = [];
|
|
396
|
+
const push = (content) => {
|
|
397
|
+
if (typeof content === "string") parts.push(content);
|
|
398
|
+
else if (Array.isArray(content)) {
|
|
399
|
+
for (const p of content) if (p && typeof p === "object" && typeof p.text === "string") parts.push(p.text);
|
|
400
|
+
}
|
|
401
|
+
};
|
|
402
|
+
if (Array.isArray(body.messages)) for (const m of body.messages) if (m && typeof m === "object") push(m.content);
|
|
403
|
+
if (body.system !== undefined) push(body.system);
|
|
404
|
+
if (typeof body.input === "string") push(body.input);
|
|
405
|
+
return parts.join("\n");
|
|
406
|
+
}
|
|
407
|
+
|
|
408
|
+
// Given the current request and a rolling history of prior request shapes,
|
|
409
|
+
// decide whether the agent is circling. Returns { looping, repeats, similarity }.
|
|
410
|
+
// History is oldest->newest of prior requestShapeText() strings in this session.
|
|
411
|
+
export function detectLoop(currentShape, history, {
|
|
412
|
+
similarityThreshold = LOOP_SIMILARITY,
|
|
413
|
+
minRepeats = LOOP_MIN_REPEATS
|
|
414
|
+
} = {}) {
|
|
415
|
+
if (!currentShape || !Array.isArray(history) || history.length === 0) {
|
|
416
|
+
return { looping: false, repeats: 0, similarity: 0 };
|
|
417
|
+
}
|
|
418
|
+
const curLines = String(currentShape).split("\n");
|
|
419
|
+
let repeats = 0;
|
|
420
|
+
let lastSimilarity = 0;
|
|
421
|
+
// Walk backward through history; count the unbroken run of near-identical turns.
|
|
422
|
+
for (let i = history.length - 1; i >= 0; i--) {
|
|
423
|
+
const sim = lineSimilarity(curLines, String(history[i]).split("\n"));
|
|
424
|
+
if (sim >= similarityThreshold) {
|
|
425
|
+
repeats += 1;
|
|
426
|
+
lastSimilarity = sim;
|
|
427
|
+
} else {
|
|
428
|
+
break;
|
|
429
|
+
}
|
|
430
|
+
}
|
|
431
|
+
return {
|
|
432
|
+
looping: repeats >= minRepeats,
|
|
433
|
+
repeats,
|
|
434
|
+
similarity: Number(lastSimilarity.toFixed(3))
|
|
168
435
|
};
|
|
169
436
|
}
|
package/src/mission-control.mjs
CHANGED
|
@@ -7,7 +7,7 @@ import path from "node:path";
|
|
|
7
7
|
import process from "node:process";
|
|
8
8
|
import { syncRun } from "./cloud.mjs";
|
|
9
9
|
import { sendAlert } from "./alerts.mjs";
|
|
10
|
-
import { compressRequestBody, estimateTokens } from "./compressor.mjs";
|
|
10
|
+
import { compressRequestBody, estimateTokens, requestShapeText, detectLoop } from "./compressor.mjs";
|
|
11
11
|
|
|
12
12
|
const STORE_DIR = ".runcap";
|
|
13
13
|
const MISSIONS_DIR = path.join(STORE_DIR, "missions");
|
|
@@ -523,6 +523,12 @@ function createGatewayServer({ port = 8792, mock = false, upstream = {} } = {})
|
|
|
523
523
|
if (gatewayMode !== "mock" && !openaiKey && !anthropicKey) {
|
|
524
524
|
throw new Error("Missing upstream key. Set OPENAI_API_KEY (for /v1/chat/completions) and/or ANTHROPIC_API_KEY (for /v1/messages). The gateway cannot proxy without at least one.");
|
|
525
525
|
}
|
|
526
|
+
// Rolling history of recent request shapes (per gateway process) so we can
|
|
527
|
+
// detect an agent circling the same failure with reworded prompts: similar-
|
|
528
|
+
// but-not-identical turns, which plain hashing never catches.
|
|
529
|
+
const loopEnabled = (process.env.AIM_LOOP_DETECT ?? "on").toLowerCase() !== "off";
|
|
530
|
+
const shapeHistory = [];
|
|
531
|
+
const SHAPE_HISTORY_MAX = 12;
|
|
526
532
|
const server = http.createServer(async (request, response) => {
|
|
527
533
|
const started = Date.now();
|
|
528
534
|
try {
|
|
@@ -545,6 +551,17 @@ function createGatewayServer({ port = 8792, mock = false, upstream = {} } = {})
|
|
|
545
551
|
|
|
546
552
|
const bodyText = await readRequestBody(request);
|
|
547
553
|
const requestBody = safeJson(bodyText) ?? {};
|
|
554
|
+
// Loop signal: compare this request's shape against the recent run.
|
|
555
|
+
let loop = null;
|
|
556
|
+
if (loopEnabled) {
|
|
557
|
+
const shape = requestShapeText(requestBody);
|
|
558
|
+
if (shape) {
|
|
559
|
+
const result = detectLoop(shape, shapeHistory);
|
|
560
|
+
loop = { looping: result.looping, repeats: result.repeats, similarity: result.similarity, truth: "calculated" };
|
|
561
|
+
shapeHistory.push(shape);
|
|
562
|
+
if (shapeHistory.length > SHAPE_HISTORY_MAX) shapeHistory.shift();
|
|
563
|
+
}
|
|
564
|
+
}
|
|
548
565
|
const budget = readBudget();
|
|
549
566
|
const summary = await readGatewaySummary({ windowMs: budgetWindowMs() });
|
|
550
567
|
// Compress the request body once (safe, lossless-by-construction). Disable with AIM_COMPRESS=off.
|
|
@@ -561,6 +578,7 @@ function createGatewayServer({ port = 8792, mock = false, upstream = {} } = {})
|
|
|
561
578
|
beforeChars: c.before,
|
|
562
579
|
afterChars: c.after,
|
|
563
580
|
fieldsTouched: c.touched,
|
|
581
|
+
deltas: c.deltas ?? 0,
|
|
564
582
|
truth: "estimated"
|
|
565
583
|
};
|
|
566
584
|
}
|
|
@@ -590,6 +608,7 @@ function createGatewayServer({ port = 8792, mock = false, upstream = {} } = {})
|
|
|
590
608
|
capUsd: budget,
|
|
591
609
|
blockedByThisCall
|
|
592
610
|
},
|
|
611
|
+
loop,
|
|
593
612
|
error: blockedByThisCall
|
|
594
613
|
? `Budget would be exceeded by this call: $${summary.estimatedCostUsd} spent + ~$${callEstimate} this call > cap $${budget}`
|
|
595
614
|
: `Budget exceeded: ${summary.estimatedCostUsd} >= ${budget}`,
|
|
@@ -630,6 +649,7 @@ function createGatewayServer({ port = 8792, mock = false, upstream = {} } = {})
|
|
|
630
649
|
usage: responseBody.usage,
|
|
631
650
|
cost: estimateApiCost(responseBody.usage, requestBody.model ?? responseBody.model),
|
|
632
651
|
compression,
|
|
652
|
+
loop,
|
|
633
653
|
truth: "mock_provider_usage",
|
|
634
654
|
requestHash: createHash("sha1").update(bodyText).digest("hex")
|
|
635
655
|
});
|
|
@@ -681,9 +701,14 @@ function createGatewayServer({ port = 8792, mock = false, upstream = {} } = {})
|
|
|
681
701
|
usage: responseBody.usage ?? null,
|
|
682
702
|
cost: estimateApiCost(responseBody.usage, requestBody.model ?? responseBody.model),
|
|
683
703
|
compression,
|
|
704
|
+
loop,
|
|
684
705
|
truth: responseBody.usage ? "provider_usage" : "unknown",
|
|
685
706
|
requestHash: createHash("sha1").update(bodyText).digest("hex")
|
|
686
707
|
});
|
|
708
|
+
if (loop && loop.looping) {
|
|
709
|
+
sendAlert(`Runcap: possible stuck loop. The agent has sent ${loop.repeats} near-identical prompts in a row (${Math.round(loop.similarity * 100)}% similar) without the conversation moving forward. It may be circling the same failure with reworded attempts.`)
|
|
710
|
+
.catch(() => {});
|
|
711
|
+
}
|
|
687
712
|
if (responseBody.usage) {
|
|
688
713
|
const spent = await readGatewaySummary({ windowMs: budgetWindowMs() });
|
|
689
714
|
syncRun({
|
|
@@ -768,19 +793,23 @@ export async function showStatus(options = {}) {
|
|
|
768
793
|
|
|
769
794
|
const gateway = await readGatewaySummary();
|
|
770
795
|
const gatewayLine = `Gateway: ${gateway.callCount} calls, ${gateway.totalTokens} tokens, $${gateway.estimatedCostUsd} estimated (${gateway.truth})`;
|
|
796
|
+
const loopLine = gateway.loop?.looping
|
|
797
|
+
? `Loop warning: last ${gateway.loop.repeats} prompts were ${Math.round(gateway.loop.similarity * 100)}% identical with no progress. The agent may be circling the same failure (truth: calculated).`
|
|
798
|
+
: null;
|
|
771
799
|
const latest = await latestMissionId();
|
|
772
|
-
if (!latest) return
|
|
800
|
+
if (!latest) return [fuelLine, gatewayLine, loopLine, "No missions recorded yet."].filter(Boolean).join("\n");
|
|
773
801
|
const mission = await readMission(latest);
|
|
774
802
|
return [
|
|
775
803
|
fuelLine,
|
|
776
804
|
gatewayLine,
|
|
805
|
+
loopLine,
|
|
777
806
|
`Latest mission: ${mission.id}`,
|
|
778
807
|
`Status: ${mission.stuck.status}`,
|
|
779
808
|
`Exit code: ${mission.exitCode}`,
|
|
780
809
|
`Changed files: ${mission.diffEvidence.changedFiles.length}`,
|
|
781
810
|
`Errors: ${mission.errors.length}`,
|
|
782
811
|
`Report: ${path.join(MISSIONS_DIR, mission.id, "report.md")}`
|
|
783
|
-
].join("\n");
|
|
812
|
+
].filter(Boolean).join("\n");
|
|
784
813
|
}
|
|
785
814
|
|
|
786
815
|
export async function recordFuel(value) {
|
|
@@ -1418,6 +1447,13 @@ async function readGatewaySummary({ windowMs } = {}) {
|
|
|
1418
1447
|
const inputRate = pricing ? pricing.inputPerMillion : 3; // fall back to a mid Sonnet-ish rate
|
|
1419
1448
|
return sum + (saved * inputRate) / 1_000_000;
|
|
1420
1449
|
}, 0);
|
|
1450
|
+
// Loop signal: the most recent event that carries a loop verdict tells us
|
|
1451
|
+
// whether the agent is currently circling (similar-but-not-identical prompts
|
|
1452
|
+
// repeated without progress). This is the "looks productive but stuck" case.
|
|
1453
|
+
const lastWithLoop = [...events].reverse().find((event) => event.loop);
|
|
1454
|
+
const loop = lastWithLoop
|
|
1455
|
+
? { ...lastWithLoop.loop, at: lastWithLoop.at, model: lastWithLoop.model }
|
|
1456
|
+
: { looping: false, repeats: 0, similarity: 0, truth: "calculated" };
|
|
1421
1457
|
return {
|
|
1422
1458
|
callCount: events.length,
|
|
1423
1459
|
successfulCallCount: successful.length,
|
|
@@ -1426,6 +1462,7 @@ async function readGatewaySummary({ windowMs } = {}) {
|
|
|
1426
1462
|
savedTokens,
|
|
1427
1463
|
savedUsd: Number(savedUsd.toFixed(6)),
|
|
1428
1464
|
wouldHaveSpentUsd: Number((estimatedCost + savedUsd).toFixed(6)),
|
|
1465
|
+
loop,
|
|
1429
1466
|
truth: events.some((event) => event.truth === "provider_usage" || event.truth === "mock_provider_usage")
|
|
1430
1467
|
? "usage_plus_static_price_table"
|
|
1431
1468
|
: "unknown",
|