runcap 0.2.2 → 0.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +15 -3
- package/package.json +3 -2
- package/scripts/loop-e2e.mjs +137 -0
- package/scripts/loop-test.mjs +128 -0
- package/scripts/make-demo-svg.mjs +19 -18
- package/scripts/make-linkedin-delta-video.mjs +412 -0
- package/scripts/make-linkedin-loop-video.mjs +338 -0
- package/src/compressor.mjs +125 -1
- package/src/mission-control.mjs +69 -6
|
@@ -0,0 +1,338 @@
|
|
|
1
|
+
// Renders a LinkedIn-ready MP4 for the Runcap loop-detection post.
|
|
2
|
+
// Narrative: a circling agent looks busy but burns money -> Runcap catches the
|
|
3
|
+
// loop in real time -> proven 37.9% compression -> hard cap stops the run.
|
|
4
|
+
// Output: docs/assets/media/runcap-linkedin-loop-demo.mp4
|
|
5
|
+
// Requires: playwright + ffmpeg available on the machine.
|
|
6
|
+
import { spawnSync } from "node:child_process";
|
|
7
|
+
import { mkdirSync, readdirSync, rmSync } from "node:fs";
|
|
8
|
+
import { dirname, join, resolve } from "node:path";
|
|
9
|
+
import { fileURLToPath } from "node:url";
|
|
10
|
+
import { chromium } from "playwright";
|
|
11
|
+
|
|
12
|
+
const __dirname = dirname(fileURLToPath(import.meta.url));
|
|
13
|
+
const root = resolve(__dirname, "..");
|
|
14
|
+
const outDir = resolve(root, "docs/assets/media");
|
|
15
|
+
const framesDir = "/private/tmp/runcap-linkedin-loop-frames";
|
|
16
|
+
const outFile = join(outDir, "runcap-linkedin-loop-demo.mp4");
|
|
17
|
+
|
|
18
|
+
const width = 1080;
|
|
19
|
+
const height = 1080;
|
|
20
|
+
const fps = 30;
|
|
21
|
+
const duration = 13;
|
|
22
|
+
const frameCount = fps * duration;
|
|
23
|
+
|
|
24
|
+
mkdirSync(outDir, { recursive: true });
|
|
25
|
+
mkdirSync(framesDir, { recursive: true });
|
|
26
|
+
for (const file of readdirSync(framesDir)) {
|
|
27
|
+
if (file.startsWith("frame-") && file.endsWith(".png")) {
|
|
28
|
+
rmSync(join(framesDir, file));
|
|
29
|
+
}
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
const html = `<!doctype html>
|
|
33
|
+
<html>
|
|
34
|
+
<head>
|
|
35
|
+
<meta charset="utf-8" />
|
|
36
|
+
<style>
|
|
37
|
+
* { box-sizing: border-box; }
|
|
38
|
+
html, body {
|
|
39
|
+
margin: 0;
|
|
40
|
+
width: ${width}px;
|
|
41
|
+
height: ${height}px;
|
|
42
|
+
overflow: hidden;
|
|
43
|
+
background: #f4f6fb;
|
|
44
|
+
font-family: Inter, ui-sans-serif, system-ui, -apple-system, BlinkMacSystemFont, "Segoe UI", sans-serif;
|
|
45
|
+
color: #f8fafc;
|
|
46
|
+
}
|
|
47
|
+
.stage {
|
|
48
|
+
width: ${width}px;
|
|
49
|
+
height: ${height}px;
|
|
50
|
+
padding: 58px;
|
|
51
|
+
display: grid;
|
|
52
|
+
place-items: center;
|
|
53
|
+
background:
|
|
54
|
+
radial-gradient(circle at 15% 10%, rgba(167, 139, 250, .2), transparent 32%),
|
|
55
|
+
radial-gradient(circle at 85% 12%, rgba(34, 211, 238, .16), transparent 34%),
|
|
56
|
+
linear-gradient(135deg, #eef2ff, #f8fafc);
|
|
57
|
+
}
|
|
58
|
+
.card {
|
|
59
|
+
width: 964px;
|
|
60
|
+
height: 964px;
|
|
61
|
+
border-radius: 42px;
|
|
62
|
+
padding: 42px;
|
|
63
|
+
background: #080b12;
|
|
64
|
+
box-shadow: 0 36px 90px rgba(15, 23, 42, .25);
|
|
65
|
+
position: relative;
|
|
66
|
+
overflow: hidden;
|
|
67
|
+
}
|
|
68
|
+
.card::before {
|
|
69
|
+
content: "";
|
|
70
|
+
position: absolute;
|
|
71
|
+
inset: 0;
|
|
72
|
+
background:
|
|
73
|
+
radial-gradient(circle at 50% -10%, rgba(167, 139, 250, .18), transparent 36%),
|
|
74
|
+
linear-gradient(180deg, rgba(255,255,255,.06), transparent 28%);
|
|
75
|
+
pointer-events: none;
|
|
76
|
+
}
|
|
77
|
+
.top {
|
|
78
|
+
position: relative;
|
|
79
|
+
display: flex;
|
|
80
|
+
justify-content: space-between;
|
|
81
|
+
align-items: center;
|
|
82
|
+
color: #94a3b8;
|
|
83
|
+
font-size: 23px;
|
|
84
|
+
letter-spacing: -0.02em;
|
|
85
|
+
}
|
|
86
|
+
.brand {
|
|
87
|
+
display: flex;
|
|
88
|
+
gap: 14px;
|
|
89
|
+
align-items: center;
|
|
90
|
+
font-weight: 800;
|
|
91
|
+
color: #fff;
|
|
92
|
+
font-size: 30px;
|
|
93
|
+
}
|
|
94
|
+
.logo {
|
|
95
|
+
width: 42px;
|
|
96
|
+
height: 42px;
|
|
97
|
+
border-radius: 13px;
|
|
98
|
+
display: grid;
|
|
99
|
+
place-items: center;
|
|
100
|
+
background: linear-gradient(135deg, #22d3ee, #34d399);
|
|
101
|
+
color: #021014;
|
|
102
|
+
font-weight: 900;
|
|
103
|
+
}
|
|
104
|
+
.pill {
|
|
105
|
+
border: 1px solid rgba(148, 163, 184, .28);
|
|
106
|
+
background: rgba(15, 23, 42, .68);
|
|
107
|
+
color: #cbd5e1;
|
|
108
|
+
border-radius: 999px;
|
|
109
|
+
padding: 10px 16px;
|
|
110
|
+
font-size: 18px;
|
|
111
|
+
font-weight: 650;
|
|
112
|
+
}
|
|
113
|
+
.content {
|
|
114
|
+
position: relative;
|
|
115
|
+
height: 818px;
|
|
116
|
+
padding-top: 44px;
|
|
117
|
+
}
|
|
118
|
+
.headline {
|
|
119
|
+
margin: 0;
|
|
120
|
+
color: #f8fafc;
|
|
121
|
+
font-size: 68px;
|
|
122
|
+
line-height: .98;
|
|
123
|
+
letter-spacing: -0.06em;
|
|
124
|
+
max-width: 840px;
|
|
125
|
+
}
|
|
126
|
+
.sub {
|
|
127
|
+
margin-top: 22px;
|
|
128
|
+
color: #cbd5e1;
|
|
129
|
+
font-size: 29px;
|
|
130
|
+
line-height: 1.28;
|
|
131
|
+
letter-spacing: -0.03em;
|
|
132
|
+
max-width: 820px;
|
|
133
|
+
}
|
|
134
|
+
.accent { color: #67e8f9; }
|
|
135
|
+
.green { color: #34d399; }
|
|
136
|
+
.red { color: #fb7185; }
|
|
137
|
+
.violet { color: #a78bfa; }
|
|
138
|
+
.mono {
|
|
139
|
+
font-family: "SF Mono", "JetBrains Mono", Menlo, Consolas, monospace;
|
|
140
|
+
letter-spacing: -0.04em;
|
|
141
|
+
}
|
|
142
|
+
.terminal {
|
|
143
|
+
margin-top: 38px;
|
|
144
|
+
border: 1px solid rgba(148, 163, 184, .22);
|
|
145
|
+
background: rgba(2, 6, 23, .82);
|
|
146
|
+
border-radius: 24px;
|
|
147
|
+
padding: 26px;
|
|
148
|
+
font-size: 24px;
|
|
149
|
+
line-height: 1.5;
|
|
150
|
+
color: #dbeafe;
|
|
151
|
+
box-shadow: inset 0 1px 0 rgba(255,255,255,.05);
|
|
152
|
+
}
|
|
153
|
+
.terminal .line { opacity: 1; }
|
|
154
|
+
.warning {
|
|
155
|
+
margin-top: 28px;
|
|
156
|
+
border: 1px solid rgba(167, 139, 250, .4);
|
|
157
|
+
background: rgba(167, 139, 250, .12);
|
|
158
|
+
color: #ddd6fe;
|
|
159
|
+
border-radius: 22px;
|
|
160
|
+
padding: 22px 26px;
|
|
161
|
+
font-size: 28px;
|
|
162
|
+
font-weight: 850;
|
|
163
|
+
letter-spacing: -0.04em;
|
|
164
|
+
}
|
|
165
|
+
.numbers {
|
|
166
|
+
margin-top: 46px;
|
|
167
|
+
display: grid;
|
|
168
|
+
grid-template-columns: 1fr 1fr;
|
|
169
|
+
gap: 28px;
|
|
170
|
+
align-items: end;
|
|
171
|
+
}
|
|
172
|
+
.number-card {
|
|
173
|
+
border-radius: 26px;
|
|
174
|
+
padding: 28px;
|
|
175
|
+
background: rgba(15, 23, 42, .9);
|
|
176
|
+
border: 1px solid rgba(148, 163, 184, .22);
|
|
177
|
+
}
|
|
178
|
+
.label {
|
|
179
|
+
color: #94a3b8;
|
|
180
|
+
font-size: 22px;
|
|
181
|
+
margin-bottom: 12px;
|
|
182
|
+
letter-spacing: -0.03em;
|
|
183
|
+
}
|
|
184
|
+
.big {
|
|
185
|
+
font-size: 78px;
|
|
186
|
+
line-height: .9;
|
|
187
|
+
font-weight: 900;
|
|
188
|
+
letter-spacing: -0.08em;
|
|
189
|
+
}
|
|
190
|
+
.bar {
|
|
191
|
+
margin-top: 32px;
|
|
192
|
+
height: 34px;
|
|
193
|
+
border-radius: 999px;
|
|
194
|
+
background: rgba(148, 163, 184, .16);
|
|
195
|
+
overflow: hidden;
|
|
196
|
+
border: 1px solid rgba(148, 163, 184, .24);
|
|
197
|
+
}
|
|
198
|
+
.fill {
|
|
199
|
+
height: 100%;
|
|
200
|
+
width: 37.9%;
|
|
201
|
+
border-radius: 999px;
|
|
202
|
+
background: linear-gradient(90deg, #22d3ee, #34d399);
|
|
203
|
+
}
|
|
204
|
+
.footer {
|
|
205
|
+
position: absolute;
|
|
206
|
+
left: 42px;
|
|
207
|
+
right: 42px;
|
|
208
|
+
bottom: 34px;
|
|
209
|
+
display: flex;
|
|
210
|
+
justify-content: space-between;
|
|
211
|
+
align-items: center;
|
|
212
|
+
color: #94a3b8;
|
|
213
|
+
font-size: 20px;
|
|
214
|
+
}
|
|
215
|
+
.scene {
|
|
216
|
+
position: absolute;
|
|
217
|
+
inset: 44px 0 0 0;
|
|
218
|
+
opacity: 0;
|
|
219
|
+
transform: translateY(24px) scale(.985);
|
|
220
|
+
transition: opacity .24s ease, transform .24s ease;
|
|
221
|
+
}
|
|
222
|
+
.scene.active {
|
|
223
|
+
opacity: 1;
|
|
224
|
+
transform: translateY(0) scale(1);
|
|
225
|
+
}
|
|
226
|
+
</style>
|
|
227
|
+
</head>
|
|
228
|
+
<body>
|
|
229
|
+
<div class="stage">
|
|
230
|
+
<div class="card">
|
|
231
|
+
<div class="top">
|
|
232
|
+
<div class="brand"><div class="logo">R</div> Runcap</div>
|
|
233
|
+
<div class="pill">local-first AI cost control</div>
|
|
234
|
+
</div>
|
|
235
|
+
<div class="content">
|
|
236
|
+
<section class="scene active" id="s0">
|
|
237
|
+
<h1 class="headline">Your AI agent looks busy. It is just circling.</h1>
|
|
238
|
+
<p class="sub">Same failure, reworded every turn. It produces output, makes no progress, and keeps spending your tokens.</p>
|
|
239
|
+
<div class="terminal mono">
|
|
240
|
+
<div class="line">attempt 1: guard the undefined with an if check</div>
|
|
241
|
+
<div class="line">attempt 2: add an optional chain before .id</div>
|
|
242
|
+
<div class="line">attempt 3: default the object to {} first</div>
|
|
243
|
+
<div class="line red">test still fails. budget still draining.</div>
|
|
244
|
+
</div>
|
|
245
|
+
</section>
|
|
246
|
+
<section class="scene" id="s1">
|
|
247
|
+
<h1 class="headline">Plain hashing never catches this.</h1>
|
|
248
|
+
<p class="sub">The prompt is similar but never byte-identical between loops, so the hash changes every turn and nothing trips.</p>
|
|
249
|
+
<div class="terminal mono">
|
|
250
|
+
<div class="line">hash(attempt 1) = a91f... hash(attempt 2) = c4d2...</div>
|
|
251
|
+
<div class="line red">different hash every time -> loop invisible</div>
|
|
252
|
+
</div>
|
|
253
|
+
</section>
|
|
254
|
+
<section class="scene" id="s2">
|
|
255
|
+
<h1 class="headline">Runcap measures similarity, not hashes.</h1>
|
|
256
|
+
<p class="sub">A local gateway sees every request in real time and compares each prompt's shape against the recent run.</p>
|
|
257
|
+
<div class="warning">loop: last 3 prompts 97.7% identical, no progress. The agent is circling the same failure.</div>
|
|
258
|
+
<div class="terminal mono">
|
|
259
|
+
<div class="line green">$ runcap status</div>
|
|
260
|
+
<div class="line violet">Loop warning: stepping in before it burns more budget.</div>
|
|
261
|
+
</div>
|
|
262
|
+
</section>
|
|
263
|
+
<section class="scene" id="s3">
|
|
264
|
+
<h1 class="headline">And it compresses every call it lets through.</h1>
|
|
265
|
+
<div class="numbers">
|
|
266
|
+
<div class="number-card">
|
|
267
|
+
<div class="label">baseline prompt</div>
|
|
268
|
+
<div class="big red mono">1,186</div>
|
|
269
|
+
<div class="label">tokens</div>
|
|
270
|
+
</div>
|
|
271
|
+
<div class="number-card">
|
|
272
|
+
<div class="label">with Runcap</div>
|
|
273
|
+
<div class="big green mono">737</div>
|
|
274
|
+
<div class="label">tokens</div>
|
|
275
|
+
</div>
|
|
276
|
+
</div>
|
|
277
|
+
<div class="bar"><div class="fill"></div></div>
|
|
278
|
+
<p class="sub"><span class="green">37.9% saved</span> on a real OpenAI call. The model still answered correctly about the changed line.</p>
|
|
279
|
+
</section>
|
|
280
|
+
<section class="scene" id="s4">
|
|
281
|
+
<h1 class="headline">Estimate. Cap. Compress. Catch the loop.</h1>
|
|
282
|
+
<p class="sub">Point your OpenAI or Anthropic-compatible tools at the local gateway. When the ceiling is crossed, the next call stops.</p>
|
|
283
|
+
<div class="terminal mono">
|
|
284
|
+
<div class="line green">$ AIM_DAILY_BUDGET_USD=10 runcap gateway</div>
|
|
285
|
+
<div class="line">gateway up · compress on · hard cap armed · loop guard on</div>
|
|
286
|
+
<div class="line red">HTTP 429 budget_guard</div>
|
|
287
|
+
<div class="line accent">stopped before money left your account</div>
|
|
288
|
+
</div>
|
|
289
|
+
</section>
|
|
290
|
+
</div>
|
|
291
|
+
<div class="footer">
|
|
292
|
+
<span class="mono">npm install -g runcap</span>
|
|
293
|
+
<span>Free · MIT · 100% local</span>
|
|
294
|
+
</div>
|
|
295
|
+
</div>
|
|
296
|
+
</div>
|
|
297
|
+
<script>
|
|
298
|
+
const scenes = [...document.querySelectorAll(".scene")];
|
|
299
|
+
window.renderFrame = (seconds) => {
|
|
300
|
+
const index =
|
|
301
|
+
seconds < 2.8 ? 0 :
|
|
302
|
+
seconds < 5.2 ? 1 :
|
|
303
|
+
seconds < 8.2 ? 2 :
|
|
304
|
+
seconds < 10.6 ? 3 : 4;
|
|
305
|
+
scenes.forEach((scene, i) => scene.classList.toggle("active", i === index));
|
|
306
|
+
};
|
|
307
|
+
</script>
|
|
308
|
+
</body>
|
|
309
|
+
</html>`;
|
|
310
|
+
|
|
311
|
+
const browser = await chromium.launch({ headless: true });
|
|
312
|
+
const page = await browser.newPage({ viewport: { width, height }, deviceScaleFactor: 1 });
|
|
313
|
+
await page.setContent(html);
|
|
314
|
+
await page.waitForTimeout(100);
|
|
315
|
+
|
|
316
|
+
for (let i = 0; i < frameCount; i += 1) {
|
|
317
|
+
const seconds = i / fps;
|
|
318
|
+
await page.evaluate((t) => window.renderFrame(t), seconds);
|
|
319
|
+
await page.screenshot({ path: join(framesDir, `frame-${String(i).padStart(4, "0")}.png`) });
|
|
320
|
+
}
|
|
321
|
+
await browser.close();
|
|
322
|
+
|
|
323
|
+
const ffmpeg = spawnSync("ffmpeg", [
|
|
324
|
+
"-y",
|
|
325
|
+
"-framerate", String(fps),
|
|
326
|
+
"-i", join(framesDir, "frame-%04d.png"),
|
|
327
|
+
"-c:v", "libx264",
|
|
328
|
+
"-pix_fmt", "yuv420p",
|
|
329
|
+
"-movflags", "+faststart",
|
|
330
|
+
"-crf", "18",
|
|
331
|
+
outFile
|
|
332
|
+
], { stdio: "inherit" });
|
|
333
|
+
|
|
334
|
+
if (ffmpeg.status !== 0) {
|
|
335
|
+
process.exit(ffmpeg.status ?? 1);
|
|
336
|
+
}
|
|
337
|
+
|
|
338
|
+
console.log(`wrote ${outFile}`);
|
package/src/compressor.mjs
CHANGED
|
@@ -46,7 +46,7 @@ function shortHash(text) {
|
|
|
46
46
|
|
|
47
47
|
// Cheap line-overlap ratio. Used only to decide whether a full LCS diff is
|
|
48
48
|
// worth computing; the real saving is measured against the emitted delta.
|
|
49
|
-
function lineSimilarity(aLines, bLines) {
|
|
49
|
+
export function lineSimilarity(aLines, bLines) {
|
|
50
50
|
const aSet = new Set(aLines);
|
|
51
51
|
let shared = 0;
|
|
52
52
|
for (const l of bLines) if (aSet.has(l)) shared++;
|
|
@@ -378,3 +378,127 @@ export function compressRequestBody(body) {
|
|
|
378
378
|
deltas: deduped.deltas
|
|
379
379
|
};
|
|
380
380
|
}
|
|
381
|
+
|
|
382
|
+
// --- loop / circling detection (the "looks productive but stuck" signal) ---
|
|
383
|
+
// The gateway sees every request the agent sends. An agent that is circling the
|
|
384
|
+
// same failure with reworded attempts sends prompts that are SIMILAR-but-not-
|
|
385
|
+
// identical turn after turn: the conversation tail barely moves while tokens
|
|
386
|
+
// keep burning. Plain hashing misses this (the text differs slightly each loop);
|
|
387
|
+
// this catches it with the same line-similarity primitive the delta-encoder uses.
|
|
388
|
+
const LOOP_SIMILARITY = 0.92; // two consecutive prompts this similar = no real progress made between them
|
|
389
|
+
const LOOP_MIN_REPEATS = 3; // how many near-identical prompts in a row before we warn
|
|
390
|
+
|
|
391
|
+
// Pull the comparable "shape" of a request: the concatenated text the agent is
|
|
392
|
+
// actually sending this turn (messages / input / system), order-preserving.
|
|
393
|
+
export function requestShapeText(body) {
|
|
394
|
+
if (!body || typeof body !== "object") return "";
|
|
395
|
+
const parts = [];
|
|
396
|
+
const push = (content) => {
|
|
397
|
+
if (typeof content === "string") parts.push(content);
|
|
398
|
+
else if (Array.isArray(content)) {
|
|
399
|
+
for (const p of content) if (p && typeof p === "object" && typeof p.text === "string") parts.push(p.text);
|
|
400
|
+
}
|
|
401
|
+
};
|
|
402
|
+
if (Array.isArray(body.messages)) for (const m of body.messages) if (m && typeof m === "object") push(m.content);
|
|
403
|
+
if (body.system !== undefined) push(body.system);
|
|
404
|
+
if (typeof body.input === "string") push(body.input);
|
|
405
|
+
return parts.join("\n");
|
|
406
|
+
}
|
|
407
|
+
|
|
408
|
+
// Pull the "did the work move?" signal out of an upstream RESPONSE. Similar
|
|
409
|
+
// prompts alone can't tell circling from convergence: a run closing in on a fix
|
|
410
|
+
// also sends near-identical prompts turn after turn. The tell is whether the
|
|
411
|
+
// observation changed - the error/test output coming back. We reduce a response
|
|
412
|
+
// to the assistant's returned text (plus any explicit error), which carries the
|
|
413
|
+
// error/stack/test signature the next prompt is reacting to.
|
|
414
|
+
export function responseSignature(body) {
|
|
415
|
+
if (!body || typeof body !== "object") return "";
|
|
416
|
+
const parts = [];
|
|
417
|
+
const push = (content) => {
|
|
418
|
+
if (typeof content === "string") parts.push(content);
|
|
419
|
+
else if (Array.isArray(content)) {
|
|
420
|
+
for (const p of content) if (p && typeof p === "object" && typeof p.text === "string") parts.push(p.text);
|
|
421
|
+
}
|
|
422
|
+
};
|
|
423
|
+
// OpenAI chat: choices[].message.content
|
|
424
|
+
if (Array.isArray(body.choices)) {
|
|
425
|
+
for (const ch of body.choices) {
|
|
426
|
+
if (ch && typeof ch === "object" && ch.message) push(ch.message.content);
|
|
427
|
+
}
|
|
428
|
+
}
|
|
429
|
+
// Anthropic messages: content blocks at top level
|
|
430
|
+
if (Array.isArray(body.content)) push(body.content);
|
|
431
|
+
// Provider error envelopes (OpenAI {error:{message}}, Anthropic {error:{message}})
|
|
432
|
+
if (body.error) {
|
|
433
|
+
if (typeof body.error === "string") parts.push(body.error);
|
|
434
|
+
else if (typeof body.error.message === "string") parts.push(body.error.message);
|
|
435
|
+
}
|
|
436
|
+
return parts.join("\n");
|
|
437
|
+
}
|
|
438
|
+
|
|
439
|
+
// Given the current request and a rolling history of prior request shapes,
|
|
440
|
+
// decide whether the agent is circling. Returns { looping, repeats, similarity }.
|
|
441
|
+
// History is oldest->newest of prior requestShapeText() strings in this session.
|
|
442
|
+
//
|
|
443
|
+
// Prompt similarity is the cheap pre-filter. When response signatures are
|
|
444
|
+
// available it becomes a GATE, not the verdict: a run only counts as circling
|
|
445
|
+
// when the prompts are near-identical AND the upstream response did not move
|
|
446
|
+
// (same error/output signature). A converging run sends similar prompts but the
|
|
447
|
+
// observation shifts, so it passes. Pass responseSignatures (oldest->newest,
|
|
448
|
+
// aligned with history) and currentResponseSignature to enable the gate; omit
|
|
449
|
+
// them and detection falls back to prompt-similarity-only (prior behavior).
|
|
450
|
+
export function detectLoop(currentShape, history, {
|
|
451
|
+
similarityThreshold = LOOP_SIMILARITY,
|
|
452
|
+
minRepeats = LOOP_MIN_REPEATS,
|
|
453
|
+
responseSignatures = null,
|
|
454
|
+
currentResponseSignature = null,
|
|
455
|
+
responseMovedThreshold = LOOP_SIMILARITY
|
|
456
|
+
} = {}) {
|
|
457
|
+
if (!currentShape || !Array.isArray(history) || history.length === 0) {
|
|
458
|
+
return { looping: false, repeats: 0, similarity: 0, responseMoved: false };
|
|
459
|
+
}
|
|
460
|
+
const curLines = String(currentShape).split("\n");
|
|
461
|
+
const haveResponses = Array.isArray(responseSignatures) && currentResponseSignature != null;
|
|
462
|
+
let repeats = 0;
|
|
463
|
+
let lastSimilarity = 0;
|
|
464
|
+
let responseMoved = false;
|
|
465
|
+
|
|
466
|
+
// Response-side gate. Prompt similarity alone can't separate circling from
|
|
467
|
+
// convergence: a run closing in on a fix also sends near-identical prompts.
|
|
468
|
+
// The tell is the observation - the error/output coming back. A change in the
|
|
469
|
+
// response between consecutive turns is progress, and it breaks the run the
|
|
470
|
+
// same way a dissimilar prompt does. So we walk backward counting only the
|
|
471
|
+
// trailing turns that are BOTH prompt-similar AND error-stuck; the first turn
|
|
472
|
+
// where the prompt differs OR the response moved ends the run. This means a
|
|
473
|
+
// run that made progress and THEN got stuck on one error still flags once it
|
|
474
|
+
// has circled that same error long enough. With no response data we fall back
|
|
475
|
+
// to prompt-similarity-only (prior behavior).
|
|
476
|
+
//
|
|
477
|
+
// Responses, newest->oldest: currentResponseSignature (what the current prompt
|
|
478
|
+
// is reacting to), then responseSignatures[N-1], [N-2], ... A "stuck" step
|
|
479
|
+
// between turn i and the next-newer turn means their responses match.
|
|
480
|
+
let newerResp = haveResponses ? currentResponseSignature : null;
|
|
481
|
+
for (let i = history.length - 1; i >= 0; i--) {
|
|
482
|
+
const sim = lineSimilarity(curLines, String(history[i]).split("\n"));
|
|
483
|
+
if (sim < similarityThreshold) break;
|
|
484
|
+
if (haveResponses) {
|
|
485
|
+
const olderResp = responseSignatures[i];
|
|
486
|
+
const haveBoth = olderResp != null && newerResp != null &&
|
|
487
|
+
String(olderResp).length && String(newerResp).length;
|
|
488
|
+
if (haveBoth) {
|
|
489
|
+
const respSim = lineSimilarity(String(newerResp).split("\n"), String(olderResp).split("\n"));
|
|
490
|
+
if (respSim < responseMovedThreshold) { responseMoved = true; break; }
|
|
491
|
+
}
|
|
492
|
+
newerResp = olderResp;
|
|
493
|
+
}
|
|
494
|
+
repeats += 1;
|
|
495
|
+
lastSimilarity = sim;
|
|
496
|
+
}
|
|
497
|
+
|
|
498
|
+
return {
|
|
499
|
+
looping: repeats >= minRepeats,
|
|
500
|
+
repeats,
|
|
501
|
+
similarity: Number(lastSimilarity.toFixed(3)),
|
|
502
|
+
responseMoved
|
|
503
|
+
};
|
|
504
|
+
}
|
package/src/mission-control.mjs
CHANGED
|
@@ -7,7 +7,7 @@ import path from "node:path";
|
|
|
7
7
|
import process from "node:process";
|
|
8
8
|
import { syncRun } from "./cloud.mjs";
|
|
9
9
|
import { sendAlert } from "./alerts.mjs";
|
|
10
|
-
import { compressRequestBody, estimateTokens } from "./compressor.mjs";
|
|
10
|
+
import { compressRequestBody, estimateTokens, requestShapeText, detectLoop, responseSignature } from "./compressor.mjs";
|
|
11
11
|
|
|
12
12
|
const STORE_DIR = ".runcap";
|
|
13
13
|
const MISSIONS_DIR = path.join(STORE_DIR, "missions");
|
|
@@ -523,6 +523,19 @@ function createGatewayServer({ port = 8792, mock = false, upstream = {} } = {})
|
|
|
523
523
|
if (gatewayMode !== "mock" && !openaiKey && !anthropicKey) {
|
|
524
524
|
throw new Error("Missing upstream key. Set OPENAI_API_KEY (for /v1/chat/completions) and/or ANTHROPIC_API_KEY (for /v1/messages). The gateway cannot proxy without at least one.");
|
|
525
525
|
}
|
|
526
|
+
// Rolling history of recent request shapes (per gateway process) so we can
|
|
527
|
+
// detect an agent circling the same failure with reworded prompts: similar-
|
|
528
|
+
// but-not-identical turns, which plain hashing never catches.
|
|
529
|
+
const loopEnabled = (process.env.AIM_LOOP_DETECT ?? "on").toLowerCase() !== "off";
|
|
530
|
+
const shapeHistory = [];
|
|
531
|
+
// Response signatures aligned with shapeHistory (the observation each prior
|
|
532
|
+
// prompt produced). Lets the loop detector tell circling from convergence:
|
|
533
|
+
// similar prompts only count as a loop when the response did not move either.
|
|
534
|
+
// Each entry is a mutable holder { sig } so the slot for an in-flight turn can
|
|
535
|
+
// be captured by reference and filled once its upstream response returns, even
|
|
536
|
+
// if concurrent turns push new entries or shift() trims the array meanwhile.
|
|
537
|
+
const responseHistory = [];
|
|
538
|
+
const SHAPE_HISTORY_MAX = 12;
|
|
526
539
|
const server = http.createServer(async (request, response) => {
|
|
527
540
|
const started = Date.now();
|
|
528
541
|
try {
|
|
@@ -545,6 +558,34 @@ function createGatewayServer({ port = 8792, mock = false, upstream = {} } = {})
|
|
|
545
558
|
|
|
546
559
|
const bodyText = await readRequestBody(request);
|
|
547
560
|
const requestBody = safeJson(bodyText) ?? {};
|
|
561
|
+
// Loop signal: compare this request's shape against the recent run. The
|
|
562
|
+
// response signatures gate prompt-similarity so a converging run (similar
|
|
563
|
+
// prompts, but the error/output is changing) is not flagged as circling.
|
|
564
|
+
let loop = null;
|
|
565
|
+
let currentShape = null;
|
|
566
|
+
let responseSlot = null; // holder for THIS turn's response signature
|
|
567
|
+
if (loopEnabled) {
|
|
568
|
+
const shape = requestShapeText(requestBody);
|
|
569
|
+
if (shape) {
|
|
570
|
+
currentShape = shape;
|
|
571
|
+
const result = detectLoop(shape, shapeHistory, {
|
|
572
|
+
responseSignatures: responseHistory.map((h) => h.sig),
|
|
573
|
+
currentResponseSignature: responseHistory.length ? responseHistory[responseHistory.length - 1].sig : null
|
|
574
|
+
});
|
|
575
|
+
loop = {
|
|
576
|
+
looping: result.looping,
|
|
577
|
+
repeats: result.repeats,
|
|
578
|
+
similarity: result.similarity,
|
|
579
|
+
responseMoved: result.responseMoved,
|
|
580
|
+
truth: "calculated"
|
|
581
|
+
};
|
|
582
|
+
shapeHistory.push(shape);
|
|
583
|
+
responseSlot = { sig: "" }; // filled by reference once upstream returns
|
|
584
|
+
responseHistory.push(responseSlot);
|
|
585
|
+
if (shapeHistory.length > SHAPE_HISTORY_MAX) shapeHistory.shift();
|
|
586
|
+
if (responseHistory.length > SHAPE_HISTORY_MAX) responseHistory.shift();
|
|
587
|
+
}
|
|
588
|
+
}
|
|
548
589
|
const budget = readBudget();
|
|
549
590
|
const summary = await readGatewaySummary({ windowMs: budgetWindowMs() });
|
|
550
591
|
// Compress the request body once (safe, lossless-by-construction). Disable with AIM_COMPRESS=off.
|
|
@@ -591,6 +632,7 @@ function createGatewayServer({ port = 8792, mock = false, upstream = {} } = {})
|
|
|
591
632
|
capUsd: budget,
|
|
592
633
|
blockedByThisCall
|
|
593
634
|
},
|
|
635
|
+
loop,
|
|
594
636
|
error: blockedByThisCall
|
|
595
637
|
? `Budget would be exceeded by this call: $${summary.estimatedCostUsd} spent + ~$${callEstimate} this call > cap $${budget}`
|
|
596
638
|
: `Budget exceeded: ${summary.estimatedCostUsd} >= ${budget}`,
|
|
@@ -621,6 +663,8 @@ function createGatewayServer({ port = 8792, mock = false, upstream = {} } = {})
|
|
|
621
663
|
if (gatewayMode === "mock") {
|
|
622
664
|
const responseBody = mockCompletion(requestBody, url.pathname);
|
|
623
665
|
const responseText = JSON.stringify(responseBody);
|
|
666
|
+
// Record before unblocking the client so a concurrent next turn sees it.
|
|
667
|
+
if (responseSlot) responseSlot.sig = responseSignature(responseBody);
|
|
624
668
|
send(response, 200, responseText, "application/json; charset=utf-8");
|
|
625
669
|
await appendGatewayEvent({
|
|
626
670
|
at: new Date().toISOString(),
|
|
@@ -631,6 +675,7 @@ function createGatewayServer({ port = 8792, mock = false, upstream = {} } = {})
|
|
|
631
675
|
usage: responseBody.usage,
|
|
632
676
|
cost: estimateApiCost(responseBody.usage, requestBody.model ?? responseBody.model),
|
|
633
677
|
compression,
|
|
678
|
+
loop,
|
|
634
679
|
truth: "mock_provider_usage",
|
|
635
680
|
requestHash: createHash("sha1").update(bodyText).digest("hex")
|
|
636
681
|
});
|
|
@@ -666,13 +711,14 @@ function createGatewayServer({ port = 8792, mock = false, upstream = {} } = {})
|
|
|
666
711
|
body: forwardBody
|
|
667
712
|
});
|
|
668
713
|
const responseText = await upstreamResponse.text();
|
|
714
|
+
const responseBody = safeJson(responseText) ?? {};
|
|
715
|
+
// Record before unblocking the client so a concurrent next turn sees it.
|
|
716
|
+
if (responseSlot) responseSlot.sig = responseSignature(responseBody);
|
|
669
717
|
response.writeHead(upstreamResponse.status, {
|
|
670
718
|
"content-type": upstreamResponse.headers.get("content-type") ?? "application/json",
|
|
671
719
|
"cache-control": "no-store"
|
|
672
720
|
});
|
|
673
721
|
response.end(responseText);
|
|
674
|
-
|
|
675
|
-
const responseBody = safeJson(responseText) ?? {};
|
|
676
722
|
await appendGatewayEvent({
|
|
677
723
|
at: new Date().toISOString(),
|
|
678
724
|
path: url.pathname,
|
|
@@ -682,9 +728,14 @@ function createGatewayServer({ port = 8792, mock = false, upstream = {} } = {})
|
|
|
682
728
|
usage: responseBody.usage ?? null,
|
|
683
729
|
cost: estimateApiCost(responseBody.usage, requestBody.model ?? responseBody.model),
|
|
684
730
|
compression,
|
|
731
|
+
loop,
|
|
685
732
|
truth: responseBody.usage ? "provider_usage" : "unknown",
|
|
686
733
|
requestHash: createHash("sha1").update(bodyText).digest("hex")
|
|
687
734
|
});
|
|
735
|
+
if (loop && loop.looping) {
|
|
736
|
+
sendAlert(`Runcap: possible stuck loop. The agent has sent ${loop.repeats} near-identical prompts in a row (${Math.round(loop.similarity * 100)}% similar) without the conversation moving forward. It may be circling the same failure with reworded attempts.`)
|
|
737
|
+
.catch(() => {});
|
|
738
|
+
}
|
|
688
739
|
if (responseBody.usage) {
|
|
689
740
|
const spent = await readGatewaySummary({ windowMs: budgetWindowMs() });
|
|
690
741
|
syncRun({
|
|
@@ -737,7 +788,7 @@ export async function startGateway({ port = 8792, mock = false } = {}) {
|
|
|
737
788
|
// it down afterward. Upstream is pinned from the CURRENT env before the child's
|
|
738
789
|
// base URLs are rewritten, so the gateway proxies to the real provider, not to
|
|
739
790
|
// itself.
|
|
740
|
-
async function startEphemeralGateway({ mock = false } = {}) {
|
|
791
|
+
export async function startEphemeralGateway({ mock = false } = {}) {
|
|
741
792
|
await ensureStore();
|
|
742
793
|
const upstream = {
|
|
743
794
|
openaiKey: process.env.AIM_UPSTREAM_API_KEY ?? process.env.OPENAI_API_KEY,
|
|
@@ -769,19 +820,23 @@ export async function showStatus(options = {}) {
|
|
|
769
820
|
|
|
770
821
|
const gateway = await readGatewaySummary();
|
|
771
822
|
const gatewayLine = `Gateway: ${gateway.callCount} calls, ${gateway.totalTokens} tokens, $${gateway.estimatedCostUsd} estimated (${gateway.truth})`;
|
|
823
|
+
const loopLine = gateway.loop?.looping
|
|
824
|
+
? `Loop warning: last ${gateway.loop.repeats} prompts were ${Math.round(gateway.loop.similarity * 100)}% identical with no progress. The agent may be circling the same failure (truth: calculated).`
|
|
825
|
+
: null;
|
|
772
826
|
const latest = await latestMissionId();
|
|
773
|
-
if (!latest) return
|
|
827
|
+
if (!latest) return [fuelLine, gatewayLine, loopLine, "No missions recorded yet."].filter(Boolean).join("\n");
|
|
774
828
|
const mission = await readMission(latest);
|
|
775
829
|
return [
|
|
776
830
|
fuelLine,
|
|
777
831
|
gatewayLine,
|
|
832
|
+
loopLine,
|
|
778
833
|
`Latest mission: ${mission.id}`,
|
|
779
834
|
`Status: ${mission.stuck.status}`,
|
|
780
835
|
`Exit code: ${mission.exitCode}`,
|
|
781
836
|
`Changed files: ${mission.diffEvidence.changedFiles.length}`,
|
|
782
837
|
`Errors: ${mission.errors.length}`,
|
|
783
838
|
`Report: ${path.join(MISSIONS_DIR, mission.id, "report.md")}`
|
|
784
|
-
].join("\n");
|
|
839
|
+
].filter(Boolean).join("\n");
|
|
785
840
|
}
|
|
786
841
|
|
|
787
842
|
export async function recordFuel(value) {
|
|
@@ -1419,6 +1474,13 @@ async function readGatewaySummary({ windowMs } = {}) {
|
|
|
1419
1474
|
const inputRate = pricing ? pricing.inputPerMillion : 3; // fall back to a mid Sonnet-ish rate
|
|
1420
1475
|
return sum + (saved * inputRate) / 1_000_000;
|
|
1421
1476
|
}, 0);
|
|
1477
|
+
// Loop signal: the most recent event that carries a loop verdict tells us
|
|
1478
|
+
// whether the agent is currently circling (similar-but-not-identical prompts
|
|
1479
|
+
// repeated without progress). This is the "looks productive but stuck" case.
|
|
1480
|
+
const lastWithLoop = [...events].reverse().find((event) => event.loop);
|
|
1481
|
+
const loop = lastWithLoop
|
|
1482
|
+
? { ...lastWithLoop.loop, at: lastWithLoop.at, model: lastWithLoop.model }
|
|
1483
|
+
: { looping: false, repeats: 0, similarity: 0, truth: "calculated" };
|
|
1422
1484
|
return {
|
|
1423
1485
|
callCount: events.length,
|
|
1424
1486
|
successfulCallCount: successful.length,
|
|
@@ -1427,6 +1489,7 @@ async function readGatewaySummary({ windowMs } = {}) {
|
|
|
1427
1489
|
savedTokens,
|
|
1428
1490
|
savedUsd: Number(savedUsd.toFixed(6)),
|
|
1429
1491
|
wouldHaveSpentUsd: Number((estimatedCost + savedUsd).toFixed(6)),
|
|
1492
|
+
loop,
|
|
1430
1493
|
truth: events.some((event) => event.truth === "provider_usage" || event.truth === "mock_provider_usage")
|
|
1431
1494
|
? "usage_plus_static_price_table"
|
|
1432
1495
|
: "unknown",
|