runcap 0.2.1 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -18,17 +18,107 @@
18
18
  // "X tokens saved by compression". Token counts are an estimate (~4 chars/token),
19
19
  // labeled `estimated`, never claimed as provider-exact.
20
20
 
21
+ import { createHash } from "node:crypto";
22
+
21
23
  const CHARS_PER_TOKEN = 4;
22
24
  const MIN_FIELD_CHARS = 200; // below this, compression overhead isn't worth it
25
+ const MIN_DEDUP_CHARS = 256; // only dedup blocks big enough to be worth a stub
23
26
  const LOG_HEAD_LINES = 12;
24
27
  const LOG_TAIL_LINES = 8;
25
28
  const LOG_COLLAPSE_THRESHOLD = 40; // collapse runs longer than this
26
29
 
30
+ // --- delta-encoding of near-duplicate blocks ---
31
+ // When a block is similar (not identical) to one seen earlier in the same
32
+ // request, we replace it with a line-diff against the original. This is the
33
+ // case identical-dedup misses: an agent re-reads a file AFTER editing it.
34
+ // Lossless: the exact text is recoverable from (original block + diff).
35
+ const DELTA_MIN_SIMILARITY = 0.5; // below this a diff isn't smaller than the original
36
+ const DELTA_MAX_LINES = 2500; // LCS is O(n*m); above ~2500 lines a diff can cost >25ms, so skip to protect the hot path
37
+
27
38
  export function estimateTokens(text) {
28
39
  if (!text) return 0;
29
40
  return Math.ceil(String(text).length / CHARS_PER_TOKEN);
30
41
  }
31
42
 
43
+ function shortHash(text) {
44
+ return createHash("sha1").update(text).digest("hex").slice(0, 8);
45
+ }
46
+
47
+ // Cheap line-overlap ratio. Used only to decide whether a full LCS diff is
48
+ // worth computing; the real saving is measured against the emitted delta.
49
+ export function lineSimilarity(aLines, bLines) {
50
+ const aSet = new Set(aLines);
51
+ let shared = 0;
52
+ for (const l of bLines) if (aSet.has(l)) shared++;
53
+ return shared / Math.max(aLines.length, bLines.length, 1);
54
+ }
55
+
56
+ // LCS-based line diff. Emits a compact op list of CHANGES only:
57
+ // { at: <line index in the original>, del: <lines removed>, ins: [<lines added>] }
58
+ // Unchanged ranges are implied. Reconstruction walks the original applying ops.
59
+ function lineDiff(aLines, bLines) {
60
+ const n = aLines.length, m = bLines.length;
61
+ const dp = Array.from({ length: n + 1 }, () => new Int32Array(m + 1));
62
+ for (let i = n - 1; i >= 0; i--) {
63
+ for (let j = m - 1; j >= 0; j--) {
64
+ dp[i][j] = aLines[i] === bLines[j]
65
+ ? dp[i + 1][j + 1] + 1
66
+ : Math.max(dp[i + 1][j], dp[i][j + 1]);
67
+ }
68
+ }
69
+ const ops = [];
70
+ let i = 0, j = 0, cur = null;
71
+ const flush = () => { if (cur) { ops.push(cur); cur = null; } };
72
+ while (i < n && j < m) {
73
+ if (aLines[i] === bLines[j]) { flush(); i++; j++; }
74
+ else if (dp[i + 1][j] >= dp[i][j + 1]) {
75
+ if (!cur || cur.at !== i) { flush(); cur = { at: i, del: 0, ins: [] }; }
76
+ cur.del++; i++;
77
+ } else {
78
+ if (!cur) cur = { at: i, del: 0, ins: [] };
79
+ cur.ins.push(bLines[j]); j++;
80
+ }
81
+ }
82
+ while (i < n) { if (!cur || cur.at !== i) { flush(); cur = { at: i, del: 0, ins: [] }; } cur.del++; i++; }
83
+ if (j < m) { if (!cur) cur = { at: i, del: 0, ins: [] }; while (j < m) cur.ins.push(bLines[j++]); }
84
+ flush();
85
+ return ops;
86
+ }
87
+
88
+ // Exact inverse of lineDiff: (original lines + ops) -> reconstructed string.
89
+ // Walks ops in order (they are emitted sorted by `at`), copying untouched
90
+ // original lines up to each op's anchor, then applying the op's deletes/inserts.
91
+ // Order-based, so duplicate `at` values across ops are handled correctly.
92
+ // Kept in-module so tests can prove losslessness against the real code path.
93
+ export function applyLineDiff(aLines, ops) {
94
+ const out = [];
95
+ let i = 0; // cursor into aLines
96
+ for (const op of ops) {
97
+ while (i < op.at && i < aLines.length) { out.push(aLines[i]); i++; }
98
+ for (const ins of op.ins) out.push(ins);
99
+ i += op.del;
100
+ }
101
+ while (i < aLines.length) { out.push(aLines[i]); i++; }
102
+ return out.join("\n");
103
+ }
104
+
105
+ // Render a delta as a block the MODEL can read and apply in its head. The header
106
+ // names the base (sha + which message it first appeared in) so the model knows
107
+ // what to patch; each op is shown as removed/added lines at a 1-based line number.
108
+ function renderDelta(baseHash, firstIndex, ops) {
109
+ const lines = [
110
+ `[runcap delta vs the identical block first seen in message ${firstIndex + 1} (sha:${baseHash}).`,
111
+ ` Reconstruct the current text by applying these line changes to that block; all other lines are unchanged.]`
112
+ ];
113
+ for (const op of ops) {
114
+ const at1 = op.at + 1;
115
+ if (op.del > 0) lines.push(`@@ line ${at1}: remove ${op.del} line(s)`);
116
+ else lines.push(`@@ line ${at1}: insert`);
117
+ for (const ins of op.ins) lines.push(`+ ${ins}`);
118
+ }
119
+ return lines.join("\n");
120
+ }
121
+
32
122
  // Re-serialize an embedded JSON string compactly. Handles two shapes safely:
33
123
  // 1. The whole field is JSON ("{...}" or "[...]").
34
124
  // 2. A short text prefix followed by a JSON blob ("Here is the data:\n{...}").
@@ -109,6 +199,120 @@ function compressField(value) {
109
199
  return out;
110
200
  }
111
201
 
202
+ // Deduplicate identical content blocks within a single request. In a long
203
+ // agentic session the same file dump or tool_result ships as a fresh block on
204
+ // every turn (the agent re-reads auth.ts five times); the model already saw
205
+ // those exact bytes earlier in the SAME request, so replacing the repeats with
206
+ // a deterministic stub is lossless-by-construction. This is where the real
207
+ // savings on agentic traffic live — per-field whitespace/JSON trimming barely
208
+ // moves the needle by comparison.
209
+ //
210
+ // Walks messages in order. The first occurrence of a block is kept verbatim;
211
+ // any later block with the same content hash becomes:
212
+ // [runcap: identical content seen at message N, sha:abcd1234]
213
+ // We only dedup blocks >= MIN_DEDUP_CHARS so a tiny stub never costs more than
214
+ // the original. Mutates the message tree in place on the already-cloned `next`.
215
+ function dedupRepeatedBlocks(body) {
216
+ let saved = 0;
217
+ let blocks = 0;
218
+ let deltas = 0;
219
+ // hash -> { index, text, lines } for the first occurrence of each block.
220
+ const seen = new Map();
221
+ // Ordered list of prior blocks, for near-duplicate (delta) matching.
222
+ const priors = [];
223
+
224
+ const stubFor = (hash, firstIndex) =>
225
+ `[runcap: identical content seen at message ${firstIndex + 1}, sha:${hash}]`;
226
+
227
+ // Try to encode `text` as a delta against the most similar prior block.
228
+ // Returns the delta string if it is smaller than the original, else null.
229
+ const tryDelta = (text) => {
230
+ const bLines = text.split("\n");
231
+ if (bLines.length > DELTA_MAX_LINES) return null; // protect the hot path
232
+ let best = null;
233
+ for (const p of priors) {
234
+ if (p.lines.length > DELTA_MAX_LINES) continue;
235
+ const sim = lineSimilarity(p.lines, bLines);
236
+ if (sim < DELTA_MIN_SIMILARITY) continue;
237
+ if (!best || sim > best.sim) best = { ...p, sim };
238
+ }
239
+ if (!best) return null;
240
+ const ops = lineDiff(best.lines, bLines);
241
+ // Safety: only emit if it reconstructs exactly (lossless-by-construction).
242
+ if (applyLineDiff(best.lines, ops) !== text) return null;
243
+ const rendered = renderDelta(best.hash, best.index, ops);
244
+ return rendered.length < text.length ? rendered : null;
245
+ };
246
+
247
+ const dedupString = (text, msgIndex) => {
248
+ if (typeof text !== "string" || text.length < MIN_DEDUP_CHARS) return text;
249
+ const hash = shortHash(text);
250
+ const firstSeen = seen.get(hash);
251
+ if (firstSeen === undefined) {
252
+ // First time we see this exact block. Try a delta vs an earlier *similar*
253
+ // block before recording it as a fresh original.
254
+ const delta = tryDelta(text);
255
+ const record = { index: msgIndex, hash, text, lines: text.split("\n") };
256
+ seen.set(hash, record);
257
+ priors.push(record);
258
+ if (delta !== null) {
259
+ saved += text.length - delta.length;
260
+ blocks += 1;
261
+ deltas += 1;
262
+ return delta;
263
+ }
264
+ return text;
265
+ }
266
+ const stub = stubFor(hash, firstSeen.index);
267
+ if (stub.length >= text.length) return text;
268
+ saved += text.length - stub.length;
269
+ blocks += 1;
270
+ return stub;
271
+ };
272
+
273
+ const dedupContent = (content, msgIndex) => {
274
+ if (typeof content === "string") return dedupString(content, msgIndex);
275
+ if (Array.isArray(content)) {
276
+ return content.map((part) => {
277
+ if (!part || typeof part !== "object") return part;
278
+ // OpenAI/Anthropic text parts
279
+ if (typeof part.text === "string") {
280
+ return { ...part, text: dedupString(part.text, msgIndex) };
281
+ }
282
+ // Anthropic tool_result blocks: content can be string or array of parts
283
+ if (part.type === "tool_result") {
284
+ if (typeof part.content === "string") {
285
+ return { ...part, content: dedupString(part.content, msgIndex) };
286
+ }
287
+ if (Array.isArray(part.content)) {
288
+ return {
289
+ ...part,
290
+ content: part.content.map((c) =>
291
+ c && typeof c === "object" && typeof c.text === "string"
292
+ ? { ...c, text: dedupString(c.text, msgIndex) }
293
+ : c
294
+ )
295
+ };
296
+ }
297
+ }
298
+ return part;
299
+ });
300
+ }
301
+ return content;
302
+ };
303
+
304
+ let next = body;
305
+ if (Array.isArray(body.messages)) {
306
+ next = {
307
+ ...body,
308
+ messages: body.messages.map((m, i) =>
309
+ m && typeof m === "object" && "content" in m ? { ...m, content: dedupContent(m.content, i) } : m
310
+ )
311
+ };
312
+ }
313
+ return { body: next, saved, blocks, deltas };
314
+ }
315
+
112
316
  // Walk an OpenAI- or Anthropic-shaped request body and compress message content.
113
317
  // Returns { body, before, after, savedChars, savedTokens, touched }.
114
318
  export function compressRequestBody(body) {
@@ -156,6 +360,12 @@ export function compressRequestBody(body) {
156
360
  next = { ...next, input: compressContent(next.input) };
157
361
  }
158
362
 
363
+ // Cross-message dedup of identical blocks + delta-encoding of near-duplicates
364
+ // (the big win on agentic traffic: re-reads after an edit).
365
+ const deduped = dedupRepeatedBlocks(next);
366
+ next = deduped.body;
367
+ touched += deduped.blocks;
368
+
159
369
  const measureAfter = JSON.stringify(next).length;
160
370
  const savedChars = Math.max(0, measureBefore - measureAfter);
161
371
  return {
@@ -164,6 +374,63 @@ export function compressRequestBody(body) {
164
374
  after: measureAfter,
165
375
  savedChars,
166
376
  savedTokens: Math.round(savedChars / CHARS_PER_TOKEN),
167
- touched
377
+ touched,
378
+ deltas: deduped.deltas
379
+ };
380
+ }
381
+
382
+ // --- loop / circling detection (the "looks productive but stuck" signal) ---
383
+ // The gateway sees every request the agent sends. An agent that is circling the
384
+ // same failure with reworded attempts sends prompts that are SIMILAR-but-not-
385
+ // identical turn after turn: the conversation tail barely moves while tokens
386
+ // keep burning. Plain hashing misses this (the text differs slightly each loop);
387
+ // this catches it with the same line-similarity primitive the delta-encoder uses.
388
+ const LOOP_SIMILARITY = 0.92; // two consecutive prompts this similar = no real progress made between them
389
+ const LOOP_MIN_REPEATS = 3; // how many near-identical prompts in a row before we warn
390
+
391
+ // Pull the comparable "shape" of a request: the concatenated text the agent is
392
+ // actually sending this turn (messages / input / system), order-preserving.
393
+ export function requestShapeText(body) {
394
+ if (!body || typeof body !== "object") return "";
395
+ const parts = [];
396
+ const push = (content) => {
397
+ if (typeof content === "string") parts.push(content);
398
+ else if (Array.isArray(content)) {
399
+ for (const p of content) if (p && typeof p === "object" && typeof p.text === "string") parts.push(p.text);
400
+ }
401
+ };
402
+ if (Array.isArray(body.messages)) for (const m of body.messages) if (m && typeof m === "object") push(m.content);
403
+ if (body.system !== undefined) push(body.system);
404
+ if (typeof body.input === "string") push(body.input);
405
+ return parts.join("\n");
406
+ }
407
+
408
+ // Given the current request and a rolling history of prior request shapes,
409
+ // decide whether the agent is circling. Returns { looping, repeats, similarity }.
410
+ // History is oldest->newest of prior requestShapeText() strings in this session.
411
+ export function detectLoop(currentShape, history, {
412
+ similarityThreshold = LOOP_SIMILARITY,
413
+ minRepeats = LOOP_MIN_REPEATS
414
+ } = {}) {
415
+ if (!currentShape || !Array.isArray(history) || history.length === 0) {
416
+ return { looping: false, repeats: 0, similarity: 0 };
417
+ }
418
+ const curLines = String(currentShape).split("\n");
419
+ let repeats = 0;
420
+ let lastSimilarity = 0;
421
+ // Walk backward through history; count the unbroken run of near-identical turns.
422
+ for (let i = history.length - 1; i >= 0; i--) {
423
+ const sim = lineSimilarity(curLines, String(history[i]).split("\n"));
424
+ if (sim >= similarityThreshold) {
425
+ repeats += 1;
426
+ lastSimilarity = sim;
427
+ } else {
428
+ break;
429
+ }
430
+ }
431
+ return {
432
+ looping: repeats >= minRepeats,
433
+ repeats,
434
+ similarity: Number(lastSimilarity.toFixed(3))
168
435
  };
169
436
  }
@@ -7,7 +7,7 @@ import path from "node:path";
7
7
  import process from "node:process";
8
8
  import { syncRun } from "./cloud.mjs";
9
9
  import { sendAlert } from "./alerts.mjs";
10
- import { compressRequestBody, estimateTokens } from "./compressor.mjs";
10
+ import { compressRequestBody, estimateTokens, requestShapeText, detectLoop } from "./compressor.mjs";
11
11
 
12
12
  const STORE_DIR = ".runcap";
13
13
  const MISSIONS_DIR = path.join(STORE_DIR, "missions");
@@ -523,6 +523,12 @@ function createGatewayServer({ port = 8792, mock = false, upstream = {} } = {})
523
523
  if (gatewayMode !== "mock" && !openaiKey && !anthropicKey) {
524
524
  throw new Error("Missing upstream key. Set OPENAI_API_KEY (for /v1/chat/completions) and/or ANTHROPIC_API_KEY (for /v1/messages). The gateway cannot proxy without at least one.");
525
525
  }
526
+ // Rolling history of recent request shapes (per gateway process) so we can
527
+ // detect an agent circling the same failure with reworded prompts: similar-
528
+ // but-not-identical turns, which plain hashing never catches.
529
+ const loopEnabled = (process.env.AIM_LOOP_DETECT ?? "on").toLowerCase() !== "off";
530
+ const shapeHistory = [];
531
+ const SHAPE_HISTORY_MAX = 12;
526
532
  const server = http.createServer(async (request, response) => {
527
533
  const started = Date.now();
528
534
  try {
@@ -545,6 +551,17 @@ function createGatewayServer({ port = 8792, mock = false, upstream = {} } = {})
545
551
 
546
552
  const bodyText = await readRequestBody(request);
547
553
  const requestBody = safeJson(bodyText) ?? {};
554
+ // Loop signal: compare this request's shape against the recent run.
555
+ let loop = null;
556
+ if (loopEnabled) {
557
+ const shape = requestShapeText(requestBody);
558
+ if (shape) {
559
+ const result = detectLoop(shape, shapeHistory);
560
+ loop = { looping: result.looping, repeats: result.repeats, similarity: result.similarity, truth: "calculated" };
561
+ shapeHistory.push(shape);
562
+ if (shapeHistory.length > SHAPE_HISTORY_MAX) shapeHistory.shift();
563
+ }
564
+ }
548
565
  const budget = readBudget();
549
566
  const summary = await readGatewaySummary({ windowMs: budgetWindowMs() });
550
567
  // Compress the request body once (safe, lossless-by-construction). Disable with AIM_COMPRESS=off.
@@ -561,6 +578,7 @@ function createGatewayServer({ port = 8792, mock = false, upstream = {} } = {})
561
578
  beforeChars: c.before,
562
579
  afterChars: c.after,
563
580
  fieldsTouched: c.touched,
581
+ deltas: c.deltas ?? 0,
564
582
  truth: "estimated"
565
583
  };
566
584
  }
@@ -590,6 +608,7 @@ function createGatewayServer({ port = 8792, mock = false, upstream = {} } = {})
590
608
  capUsd: budget,
591
609
  blockedByThisCall
592
610
  },
611
+ loop,
593
612
  error: blockedByThisCall
594
613
  ? `Budget would be exceeded by this call: $${summary.estimatedCostUsd} spent + ~$${callEstimate} this call > cap $${budget}`
595
614
  : `Budget exceeded: ${summary.estimatedCostUsd} >= ${budget}`,
@@ -630,6 +649,7 @@ function createGatewayServer({ port = 8792, mock = false, upstream = {} } = {})
630
649
  usage: responseBody.usage,
631
650
  cost: estimateApiCost(responseBody.usage, requestBody.model ?? responseBody.model),
632
651
  compression,
652
+ loop,
633
653
  truth: "mock_provider_usage",
634
654
  requestHash: createHash("sha1").update(bodyText).digest("hex")
635
655
  });
@@ -681,9 +701,14 @@ function createGatewayServer({ port = 8792, mock = false, upstream = {} } = {})
681
701
  usage: responseBody.usage ?? null,
682
702
  cost: estimateApiCost(responseBody.usage, requestBody.model ?? responseBody.model),
683
703
  compression,
704
+ loop,
684
705
  truth: responseBody.usage ? "provider_usage" : "unknown",
685
706
  requestHash: createHash("sha1").update(bodyText).digest("hex")
686
707
  });
708
+ if (loop && loop.looping) {
709
+ sendAlert(`Runcap: possible stuck loop. The agent has sent ${loop.repeats} near-identical prompts in a row (${Math.round(loop.similarity * 100)}% similar) without the conversation moving forward. It may be circling the same failure with reworded attempts.`)
710
+ .catch(() => {});
711
+ }
687
712
  if (responseBody.usage) {
688
713
  const spent = await readGatewaySummary({ windowMs: budgetWindowMs() });
689
714
  syncRun({
@@ -768,19 +793,23 @@ export async function showStatus(options = {}) {
768
793
 
769
794
  const gateway = await readGatewaySummary();
770
795
  const gatewayLine = `Gateway: ${gateway.callCount} calls, ${gateway.totalTokens} tokens, $${gateway.estimatedCostUsd} estimated (${gateway.truth})`;
796
+ const loopLine = gateway.loop?.looping
797
+ ? `Loop warning: last ${gateway.loop.repeats} prompts were ${Math.round(gateway.loop.similarity * 100)}% identical with no progress. The agent may be circling the same failure (truth: calculated).`
798
+ : null;
771
799
  const latest = await latestMissionId();
772
- if (!latest) return `${fuelLine}\n${gatewayLine}\nNo missions recorded yet.`;
800
+ if (!latest) return [fuelLine, gatewayLine, loopLine, "No missions recorded yet."].filter(Boolean).join("\n");
773
801
  const mission = await readMission(latest);
774
802
  return [
775
803
  fuelLine,
776
804
  gatewayLine,
805
+ loopLine,
777
806
  `Latest mission: ${mission.id}`,
778
807
  `Status: ${mission.stuck.status}`,
779
808
  `Exit code: ${mission.exitCode}`,
780
809
  `Changed files: ${mission.diffEvidence.changedFiles.length}`,
781
810
  `Errors: ${mission.errors.length}`,
782
811
  `Report: ${path.join(MISSIONS_DIR, mission.id, "report.md")}`
783
- ].join("\n");
812
+ ].filter(Boolean).join("\n");
784
813
  }
785
814
 
786
815
  export async function recordFuel(value) {
@@ -1418,6 +1447,13 @@ async function readGatewaySummary({ windowMs } = {}) {
1418
1447
  const inputRate = pricing ? pricing.inputPerMillion : 3; // fall back to a mid Sonnet-ish rate
1419
1448
  return sum + (saved * inputRate) / 1_000_000;
1420
1449
  }, 0);
1450
+ // Loop signal: the most recent event that carries a loop verdict tells us
1451
+ // whether the agent is currently circling (similar-but-not-identical prompts
1452
+ // repeated without progress). This is the "looks productive but stuck" case.
1453
+ const lastWithLoop = [...events].reverse().find((event) => event.loop);
1454
+ const loop = lastWithLoop
1455
+ ? { ...lastWithLoop.loop, at: lastWithLoop.at, model: lastWithLoop.model }
1456
+ : { looping: false, repeats: 0, similarity: 0, truth: "calculated" };
1421
1457
  return {
1422
1458
  callCount: events.length,
1423
1459
  successfulCallCount: successful.length,
@@ -1426,6 +1462,7 @@ async function readGatewaySummary({ windowMs } = {}) {
1426
1462
  savedTokens,
1427
1463
  savedUsd: Number(savedUsd.toFixed(6)),
1428
1464
  wouldHaveSpentUsd: Number((estimatedCost + savedUsd).toFixed(6)),
1465
+ loop,
1429
1466
  truth: events.some((event) => event.truth === "provider_usage" || event.truth === "mock_provider_usage")
1430
1467
  ? "usage_plus_static_price_table"
1431
1468
  : "unknown",