runcap 0.3.0 → 0.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +2 -2
- package/scripts/loop-e2e.mjs +137 -0
- package/scripts/loop-test.mjs +45 -1
- package/scripts/make-demo-svg.mjs +19 -18
- package/scripts/make-linkedin-loop-video.mjs +338 -0
- package/src/compressor.mjs +77 -9
- package/src/mission-control.mjs +34 -7
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "runcap",
|
|
3
|
-
"version": "0.3.
|
|
3
|
+
"version": "0.3.1",
|
|
4
4
|
"description": "Cap every agent run before it starts: estimate cost, set a hard ceiling that stops the run, rescue stuck agents. Local, MIT, nothing uploaded.",
|
|
5
5
|
"license": "MIT",
|
|
6
6
|
"type": "module",
|
|
@@ -45,7 +45,7 @@
|
|
|
45
45
|
"acceptance": "node ./scripts/acceptance.mjs",
|
|
46
46
|
"smoke": "node ./bin/runcap.mjs run --label smoke -- npm --prefix examples/broken-ts-app run build",
|
|
47
47
|
"demo:broken": "node ./bin/runcap.mjs run --label broken-ts-demo -- npm --prefix examples/broken-ts-app run build",
|
|
48
|
-
"test": "node ./scripts/delta-test.mjs && node ./scripts/loop-test.mjs && node ./scripts/validate-demo.mjs",
|
|
48
|
+
"test": "node ./scripts/delta-test.mjs && node ./scripts/loop-test.mjs && node ./scripts/loop-e2e.mjs && node ./scripts/validate-demo.mjs",
|
|
49
49
|
"test:delta": "node ./scripts/delta-test.mjs",
|
|
50
50
|
"test:loop": "node ./scripts/loop-test.mjs",
|
|
51
51
|
"status": "node ./bin/runcap.mjs status",
|
|
@@ -0,0 +1,137 @@
|
|
|
1
|
+
// End-to-end proof that the response-side loop gate works through the REAL
|
|
2
|
+
// gateway over HTTP, not just in unit tests. We stand up a tiny local "upstream"
|
|
3
|
+
// that returns a caller-chosen error string, point the real Runcap gateway at
|
|
4
|
+
// it, and drive near-identical prompts through the wire:
|
|
5
|
+
// A) error CHANGES each turn (convergence) -> gateway must NOT flag a loop
|
|
6
|
+
// B) error STAYS the same each turn (circling) -> gateway MUST flag a loop
|
|
7
|
+
// The gateway records its loop verdict per call in the gateway event log, which
|
|
8
|
+
// we read back to assert the real server behaved correctly.
|
|
9
|
+
//
|
|
10
|
+
// Pure Node, no framework. Exits non-zero on any failure so it can gate CI.
|
|
11
|
+
|
|
12
|
+
import http from "node:http";
|
|
13
|
+
import os from "node:os";
|
|
14
|
+
import path from "node:path";
|
|
15
|
+
import { mkdtempSync, readFileSync, existsSync } from "node:fs";
|
|
16
|
+
|
|
17
|
+
// Isolate all gateway state (the .runcap event log lives under cwd) in a
|
|
18
|
+
// throwaway dir so this never touches real data. The gateway writes its event
|
|
19
|
+
// log to ./.runcap, so we chdir into the temp dir before starting it.
|
|
20
|
+
const tmpHome = mkdtempSync(path.join(os.tmpdir(), "runcap-e2e-"));
|
|
21
|
+
process.chdir(tmpHome);
|
|
22
|
+
process.env.AIM_COMPRESS = "off"; // keep the wire bytes predictable
|
|
23
|
+
process.env.AIM_LOOP_DETECT = "on";
|
|
24
|
+
|
|
25
|
+
// A controllable upstream: returns an OpenAI-shaped completion whose assistant
|
|
26
|
+
// text is whatever error we tell it to via a field in the request body. We use
|
|
27
|
+
// the body (not a header) on purpose: the gateway forwards the request body
|
|
28
|
+
// upstream but rewrites headers, so the body is the channel that actually
|
|
29
|
+
// reaches this stub through the real gateway.
|
|
30
|
+
const upstream = http.createServer((req, res) => {
|
|
31
|
+
let body = "";
|
|
32
|
+
req.on("data", (c) => (body += c));
|
|
33
|
+
req.on("end", () => {
|
|
34
|
+
let err = "default error";
|
|
35
|
+
try { err = JSON.parse(body)?.mock_error ?? err; } catch {}
|
|
36
|
+
const payload = {
|
|
37
|
+
id: "chatcmpl-stub",
|
|
38
|
+
object: "chat.completion",
|
|
39
|
+
created: Math.floor(Date.now() / 1000),
|
|
40
|
+
model: "stub-model",
|
|
41
|
+
choices: [{ index: 0, message: { role: "assistant", content: String(err) }, finish_reason: "stop" }],
|
|
42
|
+
usage: { prompt_tokens: 50, completion_tokens: 10, total_tokens: 60 }
|
|
43
|
+
};
|
|
44
|
+
res.writeHead(200, { "content-type": "application/json" });
|
|
45
|
+
res.end(JSON.stringify(payload));
|
|
46
|
+
});
|
|
47
|
+
});
|
|
48
|
+
|
|
49
|
+
async function listen(server, port = 0) {
|
|
50
|
+
await new Promise((r) => server.listen(port, "127.0.0.1", r));
|
|
51
|
+
return server.address().port;
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
let failures = 0;
|
|
55
|
+
function check(name, pass, detail) {
|
|
56
|
+
if (!pass) failures++;
|
|
57
|
+
console.log(`${pass ? "PASS" : "FAIL"} ${name}${detail ? " — " + detail : ""}`);
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
const stableTail = [
|
|
61
|
+
"You are a coding agent. Fix the failing build.",
|
|
62
|
+
...Array.from({ length: 40 }, (_, i) => `context line ${i}: prior file content the agent keeps resending`)
|
|
63
|
+
].join("\n");
|
|
64
|
+
|
|
65
|
+
async function send(port, wording, mockError) {
|
|
66
|
+
// mock_error rides in the body so it survives the gateway's header rewrite and
|
|
67
|
+
// reaches the upstream stub, which echoes it back as the assistant response.
|
|
68
|
+
const body = JSON.stringify({
|
|
69
|
+
model: "stub-model",
|
|
70
|
+
mock_error: mockError,
|
|
71
|
+
messages: [{ role: "user", content: stableTail + "\nLet me try this: " + wording }]
|
|
72
|
+
});
|
|
73
|
+
const res = await fetch(`http://127.0.0.1:${port}/v1/chat/completions`, {
|
|
74
|
+
method: "POST",
|
|
75
|
+
headers: { "content-type": "application/json" },
|
|
76
|
+
body
|
|
77
|
+
});
|
|
78
|
+
await res.text();
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
function readEvents() {
|
|
82
|
+
const log = path.join(tmpHome, ".runcap", "gateway-events.jsonl");
|
|
83
|
+
if (!existsSync(log)) return [];
|
|
84
|
+
return readFileSync(log, "utf8").trim().split("\n").filter(Boolean).map((l) => JSON.parse(l));
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
// Loop verdicts accumulate across both scenarios in one shared gateway process
|
|
88
|
+
// (the shape history is per-process), so each scenario asserts against only the
|
|
89
|
+
// events it produced. We snapshot the event count before scenario B.
|
|
90
|
+
|
|
91
|
+
const run = async () => {
|
|
92
|
+
const upstreamPort = await listen(upstream);
|
|
93
|
+
process.env.AIM_UPSTREAM_BASE_URL = `http://127.0.0.1:${upstreamPort}/v1`;
|
|
94
|
+
process.env.AIM_UPSTREAM_API_KEY = "test-key";
|
|
95
|
+
|
|
96
|
+
// Import AFTER env is set so the gateway reads our isolated config.
|
|
97
|
+
const { startEphemeralGateway } = await import("../src/mission-control.mjs");
|
|
98
|
+
const gw = await startEphemeralGateway();
|
|
99
|
+
const gwPort = gw.port;
|
|
100
|
+
|
|
101
|
+
// Scenario A: same prompt framing, but the error MOVES every turn (convergence).
|
|
102
|
+
for (const [w, e] of [
|
|
103
|
+
["guard the undefined", "TypeError: cannot read property 'id' of undefined"],
|
|
104
|
+
["optional chain", "TypeError: cannot read property 'name' of undefined"],
|
|
105
|
+
["default to {}", "ReferenceError: parser is not defined"],
|
|
106
|
+
["try/catch", "AssertionError: expected 200 but got 404"]
|
|
107
|
+
]) {
|
|
108
|
+
await send(gwPort, w, e);
|
|
109
|
+
}
|
|
110
|
+
const afterA = readEvents();
|
|
111
|
+
const aFlagged = afterA.filter((ev) => ev.loop && ev.loop.looping).length;
|
|
112
|
+
check("E2E convergence (moving error) is NOT flagged through real gateway", aFlagged === 0,
|
|
113
|
+
`loops flagged in scenario A=${aFlagged}`);
|
|
114
|
+
|
|
115
|
+
// Scenario B: same prompt framing AND the SAME error every turn (circling).
|
|
116
|
+
const stuck = "TypeError: cannot read property 'id' of undefined";
|
|
117
|
+
for (const w of ["attempt one", "attempt two reworded", "attempt three reworded", "attempt four reworded", "attempt five reworded"]) {
|
|
118
|
+
await send(gwPort, w, stuck);
|
|
119
|
+
}
|
|
120
|
+
const afterB = readEvents().slice(afterA.length); // only scenario-B events
|
|
121
|
+
const bFlagged = afterB.filter((ev) => ev.loop && ev.loop.looping).length;
|
|
122
|
+
check("E2E circling (stuck error) IS flagged through real gateway", bFlagged > 0,
|
|
123
|
+
`loops flagged in scenario B=${bFlagged}`);
|
|
124
|
+
|
|
125
|
+
await gw.close();
|
|
126
|
+
upstream.close();
|
|
127
|
+
};
|
|
128
|
+
|
|
129
|
+
run()
|
|
130
|
+
.then(() => {
|
|
131
|
+
console.log("\n" + (failures === 0 ? "ALL LOOP E2E TESTS PASSED" : `${failures} LOOP E2E TEST(S) FAILED`));
|
|
132
|
+
process.exit(failures === 0 ? 0 : 1);
|
|
133
|
+
})
|
|
134
|
+
.catch((e) => {
|
|
135
|
+
console.error("E2E harness error:", e);
|
|
136
|
+
process.exit(1);
|
|
137
|
+
});
|
package/scripts/loop-test.mjs
CHANGED
|
@@ -7,7 +7,7 @@
|
|
|
7
7
|
//
|
|
8
8
|
// Pure Node, no test framework. Exits non-zero on any failure so it can gate CI.
|
|
9
9
|
|
|
10
|
-
import { detectLoop, requestShapeText } from "../src/compressor.mjs";
|
|
10
|
+
import { detectLoop, requestShapeText, responseSignature } from "../src/compressor.mjs";
|
|
11
11
|
|
|
12
12
|
let failures = 0;
|
|
13
13
|
function check(name, pass, detail) {
|
|
@@ -80,5 +80,49 @@ function attempt(wording) {
|
|
|
80
80
|
`openai="${openai}" anthropic="${anthropic}"`);
|
|
81
81
|
}
|
|
82
82
|
|
|
83
|
+
// --- Test 6: response-side gate — similar prompts but a MOVING error is NOT a loop ---
|
|
84
|
+
// The edge case raised on the thread: a converging run also sends near-identical
|
|
85
|
+
// prompts (same files, same framing) while it closes in on the fix. The tell is
|
|
86
|
+
// the observation: if the error/test output changes between turns, that's
|
|
87
|
+
// progress, not circling. Prompts are near-identical here, but each response
|
|
88
|
+
// carries a DIFFERENT error, so the gate must keep it from being flagged.
|
|
89
|
+
{
|
|
90
|
+
const history = [attempt("try A"), attempt("try B"), attempt("try C")];
|
|
91
|
+
const current = attempt("try D");
|
|
92
|
+
const responseSignatures = [
|
|
93
|
+
"TypeError: cannot read property 'id' of undefined",
|
|
94
|
+
"TypeError: cannot read property 'name' of undefined",
|
|
95
|
+
"ReferenceError: parser is not defined"
|
|
96
|
+
];
|
|
97
|
+
const currentResponseSignature = "AssertionError: expected 200 but got 404";
|
|
98
|
+
const r = detectLoop(current, history, { responseSignatures, currentResponseSignature });
|
|
99
|
+
check("similar prompts but MOVING error are NOT flagged (convergence)", !r.looping && r.responseMoved,
|
|
100
|
+
`looping=${r.looping}, repeats=${r.repeats}, responseMoved=${r.responseMoved}`);
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
// --- Test 7: response-side gate — similar prompts AND a STUCK error IS a loop ---
|
|
104
|
+
// Same near-identical prompts, but the identical error keeps coming back. Now
|
|
105
|
+
// both signals agree the run is circling, so it must still be flagged.
|
|
106
|
+
{
|
|
107
|
+
const history = [attempt("try A"), attempt("try B"), attempt("try C")];
|
|
108
|
+
const current = attempt("try D");
|
|
109
|
+
const sameError = "TypeError: cannot read property 'id' of undefined";
|
|
110
|
+
const responseSignatures = [sameError, sameError, sameError];
|
|
111
|
+
const currentResponseSignature = sameError;
|
|
112
|
+
const r = detectLoop(current, history, { responseSignatures, currentResponseSignature });
|
|
113
|
+
check("similar prompts AND stuck error ARE flagged as loop", r.looping && !r.responseMoved && r.repeats >= 3,
|
|
114
|
+
`looping=${r.looping}, repeats=${r.repeats}, responseMoved=${r.responseMoved}`);
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
// --- Test 8: responseSignature extracts the error/text from both provider shapes ---
|
|
118
|
+
{
|
|
119
|
+
const openai = responseSignature({ choices: [{ message: { content: "boom: it failed" } }] });
|
|
120
|
+
const anthropic = responseSignature({ content: [{ type: "text", text: "boom: it failed" }] });
|
|
121
|
+
const errEnvelope = responseSignature({ error: { message: "rate limited" } });
|
|
122
|
+
check("responseSignature reads OpenAI, Anthropic, and error shapes",
|
|
123
|
+
openai === "boom: it failed" && anthropic === "boom: it failed" && errEnvelope === "rate limited",
|
|
124
|
+
`openai="${openai}" anthropic="${anthropic}" err="${errEnvelope}"`);
|
|
125
|
+
}
|
|
126
|
+
|
|
83
127
|
console.log("\n" + (failures === 0 ? "ALL LOOP TESTS PASSED" : `${failures} LOOP TEST(S) FAILED`));
|
|
84
128
|
process.exit(failures === 0 ? 0 : 1);
|
|
@@ -16,27 +16,28 @@ const C = {
|
|
|
16
16
|
|
|
17
17
|
const lines = [
|
|
18
18
|
{ t: "$ runcap plan --fuel 24 -- \"build a small auth feature and verify it\"", c: C.prompt, at: 0.3 },
|
|
19
|
-
{ t: "Estimate: $3 - $7 (range, not an oracle)", c: C.text, at: 1.
|
|
20
|
-
{ t: "Recommended cap: $10", c: C.ok, at: 1.
|
|
21
|
-
{ t: "", c: C.text, at: 1.
|
|
22
|
-
{ t: "$ ANTHROPIC_BASE_URL=http://127.0.0.1:8792/v1 \\", c: C.prompt, at: 2.
|
|
23
|
-
{ t: " AIM_DAILY_BUDGET_USD=10 runcap gateway", c: C.prompt, at: 2.
|
|
24
|
-
{ t: "gateway up ·
|
|
25
|
-
{ t: "", c: C.text, at: 3.
|
|
26
|
-
{ t: "→ request 10,144 tokens", c: C.text, at: 3.
|
|
27
|
-
{ t: "→ compressed 1,260 tokens (
|
|
28
|
-
{ t: "", c: C.text, at: 4.
|
|
29
|
-
{ t: "
|
|
30
|
-
{ t: "", c: C.
|
|
31
|
-
{ t: "
|
|
32
|
-
{ t: "
|
|
19
|
+
{ t: "Estimate: $3 - $7 (range, not an oracle)", c: C.text, at: 1.0 },
|
|
20
|
+
{ t: "Recommended cap: $10", c: C.ok, at: 1.4 },
|
|
21
|
+
{ t: "", c: C.text, at: 1.5 },
|
|
22
|
+
{ t: "$ ANTHROPIC_BASE_URL=http://127.0.0.1:8792/v1 \\", c: C.prompt, at: 2.0 },
|
|
23
|
+
{ t: " AIM_DAILY_BUDGET_USD=10 runcap gateway", c: C.prompt, at: 2.3 },
|
|
24
|
+
{ t: "gateway up · compress on · hard cap armed · loop guard on", c: C.dim, at: 2.9 },
|
|
25
|
+
{ t: "", c: C.text, at: 3.0 },
|
|
26
|
+
{ t: "→ request 10,144 tokens", c: C.text, at: 3.5 },
|
|
27
|
+
{ t: "→ compressed 1,260 tokens (1,186 → 737 on a real call: 37.9% saved)", c: C.ok, at: 4.1 },
|
|
28
|
+
{ t: "", c: C.text, at: 4.2 },
|
|
29
|
+
{ t: "⚠ loop: last 3 prompts 97.7% identical - agent circling the same fail", c: C.violet, at: 5.0 },
|
|
30
|
+
{ t: " (looks busy, makes no progress, keeps spending)", c: C.dim, at: 5.5 },
|
|
31
|
+
{ t: "", c: C.text, at: 5.6 },
|
|
32
|
+
{ t: "→ next call would cross the ceiling", c: C.text, at: 6.2 },
|
|
33
|
+
{ t: "HTTP 429 budget_guard - run stopped before money left your account", c: C.bad, at: 6.9 }
|
|
33
34
|
];
|
|
34
35
|
|
|
35
|
-
const W = 920, H =
|
|
36
|
+
const W = 920, H = 588;
|
|
36
37
|
const padX = 28, top = 78, lh = 27, fs = 16.5;
|
|
37
38
|
const esc = (s) => s.replace(/&/g, "&").replace(/</g, "<").replace(/>/g, ">");
|
|
38
39
|
|
|
39
|
-
const total =
|
|
40
|
+
const total = 9.0; // loop length seconds
|
|
40
41
|
const rows = lines.map((ln, i) => {
|
|
41
42
|
const y = top + i * lh;
|
|
42
43
|
// fade+slide in at ln.at, hold, then reset at end of loop
|
|
@@ -46,7 +47,7 @@ const rows = lines.map((ln, i) => {
|
|
|
46
47
|
${esc(ln.t)}</text>`;
|
|
47
48
|
}).join("\n");
|
|
48
49
|
|
|
49
|
-
const svg = `<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 ${W} ${H}" width="${W}" height="${H}" role="img" aria-label="Runcap terminal demo: plan, cap, compress, stop">
|
|
50
|
+
const svg = `<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 ${W} ${H}" width="${W}" height="${H}" role="img" aria-label="Runcap terminal demo: plan, cap, compress, detect loop, stop">
|
|
50
51
|
<defs>
|
|
51
52
|
<linearGradient id="brand" x1="0" y1="0" x2="1" y2="0">
|
|
52
53
|
<stop offset="0" stop-color="#22d3ee"/><stop offset="1" stop-color="#34d399"/>
|
|
@@ -64,7 +65,7 @@ const svg = `<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 ${W} ${H}" wid
|
|
|
64
65
|
<circle cx="26" cy="28" r="6" fill="#f87171"/>
|
|
65
66
|
<circle cx="48" cy="28" r="6" fill="#fbbf24"/>
|
|
66
67
|
<circle cx="70" cy="28" r="6" fill="#34d399"/>
|
|
67
|
-
<text x="100" y="33" fill="#8a8a8a" font-family="'JetBrains Mono',monospace" font-size="14">runcap
|
|
68
|
+
<text x="100" y="33" fill="#8a8a8a" font-family="'JetBrains Mono',monospace" font-size="14">runcap · estimate · cap · compress · loop · rescue</text>
|
|
68
69
|
<text x="${W-150}" y="33" fill="url(#brand)" font-family="'JetBrains Mono',monospace" font-weight="700" font-size="15">run·cap</text>
|
|
69
70
|
</g>
|
|
70
71
|
<line x1="0" y1="50" x2="${W}" y2="50" stroke="#1c1c1f"/>
|
|
@@ -0,0 +1,338 @@
|
|
|
1
|
+
// Renders a LinkedIn-ready MP4 for the Runcap loop-detection post.
|
|
2
|
+
// Narrative: a circling agent looks busy but burns money -> Runcap catches the
|
|
3
|
+
// loop in real time -> proven 37.9% compression -> hard cap stops the run.
|
|
4
|
+
// Output: docs/assets/media/runcap-linkedin-loop-demo.mp4
|
|
5
|
+
// Requires: playwright + ffmpeg available on the machine.
|
|
6
|
+
import { spawnSync } from "node:child_process";
|
|
7
|
+
import { mkdirSync, readdirSync, rmSync } from "node:fs";
|
|
8
|
+
import { dirname, join, resolve } from "node:path";
|
|
9
|
+
import { fileURLToPath } from "node:url";
|
|
10
|
+
import { chromium } from "playwright";
|
|
11
|
+
|
|
12
|
+
const __dirname = dirname(fileURLToPath(import.meta.url));
|
|
13
|
+
const root = resolve(__dirname, "..");
|
|
14
|
+
const outDir = resolve(root, "docs/assets/media");
|
|
15
|
+
const framesDir = "/private/tmp/runcap-linkedin-loop-frames";
|
|
16
|
+
const outFile = join(outDir, "runcap-linkedin-loop-demo.mp4");
|
|
17
|
+
|
|
18
|
+
const width = 1080;
|
|
19
|
+
const height = 1080;
|
|
20
|
+
const fps = 30;
|
|
21
|
+
const duration = 13;
|
|
22
|
+
const frameCount = fps * duration;
|
|
23
|
+
|
|
24
|
+
mkdirSync(outDir, { recursive: true });
|
|
25
|
+
mkdirSync(framesDir, { recursive: true });
|
|
26
|
+
for (const file of readdirSync(framesDir)) {
|
|
27
|
+
if (file.startsWith("frame-") && file.endsWith(".png")) {
|
|
28
|
+
rmSync(join(framesDir, file));
|
|
29
|
+
}
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
const html = `<!doctype html>
|
|
33
|
+
<html>
|
|
34
|
+
<head>
|
|
35
|
+
<meta charset="utf-8" />
|
|
36
|
+
<style>
|
|
37
|
+
* { box-sizing: border-box; }
|
|
38
|
+
html, body {
|
|
39
|
+
margin: 0;
|
|
40
|
+
width: ${width}px;
|
|
41
|
+
height: ${height}px;
|
|
42
|
+
overflow: hidden;
|
|
43
|
+
background: #f4f6fb;
|
|
44
|
+
font-family: Inter, ui-sans-serif, system-ui, -apple-system, BlinkMacSystemFont, "Segoe UI", sans-serif;
|
|
45
|
+
color: #f8fafc;
|
|
46
|
+
}
|
|
47
|
+
.stage {
|
|
48
|
+
width: ${width}px;
|
|
49
|
+
height: ${height}px;
|
|
50
|
+
padding: 58px;
|
|
51
|
+
display: grid;
|
|
52
|
+
place-items: center;
|
|
53
|
+
background:
|
|
54
|
+
radial-gradient(circle at 15% 10%, rgba(167, 139, 250, .2), transparent 32%),
|
|
55
|
+
radial-gradient(circle at 85% 12%, rgba(34, 211, 238, .16), transparent 34%),
|
|
56
|
+
linear-gradient(135deg, #eef2ff, #f8fafc);
|
|
57
|
+
}
|
|
58
|
+
.card {
|
|
59
|
+
width: 964px;
|
|
60
|
+
height: 964px;
|
|
61
|
+
border-radius: 42px;
|
|
62
|
+
padding: 42px;
|
|
63
|
+
background: #080b12;
|
|
64
|
+
box-shadow: 0 36px 90px rgba(15, 23, 42, .25);
|
|
65
|
+
position: relative;
|
|
66
|
+
overflow: hidden;
|
|
67
|
+
}
|
|
68
|
+
.card::before {
|
|
69
|
+
content: "";
|
|
70
|
+
position: absolute;
|
|
71
|
+
inset: 0;
|
|
72
|
+
background:
|
|
73
|
+
radial-gradient(circle at 50% -10%, rgba(167, 139, 250, .18), transparent 36%),
|
|
74
|
+
linear-gradient(180deg, rgba(255,255,255,.06), transparent 28%);
|
|
75
|
+
pointer-events: none;
|
|
76
|
+
}
|
|
77
|
+
.top {
|
|
78
|
+
position: relative;
|
|
79
|
+
display: flex;
|
|
80
|
+
justify-content: space-between;
|
|
81
|
+
align-items: center;
|
|
82
|
+
color: #94a3b8;
|
|
83
|
+
font-size: 23px;
|
|
84
|
+
letter-spacing: -0.02em;
|
|
85
|
+
}
|
|
86
|
+
.brand {
|
|
87
|
+
display: flex;
|
|
88
|
+
gap: 14px;
|
|
89
|
+
align-items: center;
|
|
90
|
+
font-weight: 800;
|
|
91
|
+
color: #fff;
|
|
92
|
+
font-size: 30px;
|
|
93
|
+
}
|
|
94
|
+
.logo {
|
|
95
|
+
width: 42px;
|
|
96
|
+
height: 42px;
|
|
97
|
+
border-radius: 13px;
|
|
98
|
+
display: grid;
|
|
99
|
+
place-items: center;
|
|
100
|
+
background: linear-gradient(135deg, #22d3ee, #34d399);
|
|
101
|
+
color: #021014;
|
|
102
|
+
font-weight: 900;
|
|
103
|
+
}
|
|
104
|
+
.pill {
|
|
105
|
+
border: 1px solid rgba(148, 163, 184, .28);
|
|
106
|
+
background: rgba(15, 23, 42, .68);
|
|
107
|
+
color: #cbd5e1;
|
|
108
|
+
border-radius: 999px;
|
|
109
|
+
padding: 10px 16px;
|
|
110
|
+
font-size: 18px;
|
|
111
|
+
font-weight: 650;
|
|
112
|
+
}
|
|
113
|
+
.content {
|
|
114
|
+
position: relative;
|
|
115
|
+
height: 818px;
|
|
116
|
+
padding-top: 44px;
|
|
117
|
+
}
|
|
118
|
+
.headline {
|
|
119
|
+
margin: 0;
|
|
120
|
+
color: #f8fafc;
|
|
121
|
+
font-size: 68px;
|
|
122
|
+
line-height: .98;
|
|
123
|
+
letter-spacing: -0.06em;
|
|
124
|
+
max-width: 840px;
|
|
125
|
+
}
|
|
126
|
+
.sub {
|
|
127
|
+
margin-top: 22px;
|
|
128
|
+
color: #cbd5e1;
|
|
129
|
+
font-size: 29px;
|
|
130
|
+
line-height: 1.28;
|
|
131
|
+
letter-spacing: -0.03em;
|
|
132
|
+
max-width: 820px;
|
|
133
|
+
}
|
|
134
|
+
.accent { color: #67e8f9; }
|
|
135
|
+
.green { color: #34d399; }
|
|
136
|
+
.red { color: #fb7185; }
|
|
137
|
+
.violet { color: #a78bfa; }
|
|
138
|
+
.mono {
|
|
139
|
+
font-family: "SF Mono", "JetBrains Mono", Menlo, Consolas, monospace;
|
|
140
|
+
letter-spacing: -0.04em;
|
|
141
|
+
}
|
|
142
|
+
.terminal {
|
|
143
|
+
margin-top: 38px;
|
|
144
|
+
border: 1px solid rgba(148, 163, 184, .22);
|
|
145
|
+
background: rgba(2, 6, 23, .82);
|
|
146
|
+
border-radius: 24px;
|
|
147
|
+
padding: 26px;
|
|
148
|
+
font-size: 24px;
|
|
149
|
+
line-height: 1.5;
|
|
150
|
+
color: #dbeafe;
|
|
151
|
+
box-shadow: inset 0 1px 0 rgba(255,255,255,.05);
|
|
152
|
+
}
|
|
153
|
+
.terminal .line { opacity: 1; }
|
|
154
|
+
.warning {
|
|
155
|
+
margin-top: 28px;
|
|
156
|
+
border: 1px solid rgba(167, 139, 250, .4);
|
|
157
|
+
background: rgba(167, 139, 250, .12);
|
|
158
|
+
color: #ddd6fe;
|
|
159
|
+
border-radius: 22px;
|
|
160
|
+
padding: 22px 26px;
|
|
161
|
+
font-size: 28px;
|
|
162
|
+
font-weight: 850;
|
|
163
|
+
letter-spacing: -0.04em;
|
|
164
|
+
}
|
|
165
|
+
.numbers {
|
|
166
|
+
margin-top: 46px;
|
|
167
|
+
display: grid;
|
|
168
|
+
grid-template-columns: 1fr 1fr;
|
|
169
|
+
gap: 28px;
|
|
170
|
+
align-items: end;
|
|
171
|
+
}
|
|
172
|
+
.number-card {
|
|
173
|
+
border-radius: 26px;
|
|
174
|
+
padding: 28px;
|
|
175
|
+
background: rgba(15, 23, 42, .9);
|
|
176
|
+
border: 1px solid rgba(148, 163, 184, .22);
|
|
177
|
+
}
|
|
178
|
+
.label {
|
|
179
|
+
color: #94a3b8;
|
|
180
|
+
font-size: 22px;
|
|
181
|
+
margin-bottom: 12px;
|
|
182
|
+
letter-spacing: -0.03em;
|
|
183
|
+
}
|
|
184
|
+
.big {
|
|
185
|
+
font-size: 78px;
|
|
186
|
+
line-height: .9;
|
|
187
|
+
font-weight: 900;
|
|
188
|
+
letter-spacing: -0.08em;
|
|
189
|
+
}
|
|
190
|
+
.bar {
|
|
191
|
+
margin-top: 32px;
|
|
192
|
+
height: 34px;
|
|
193
|
+
border-radius: 999px;
|
|
194
|
+
background: rgba(148, 163, 184, .16);
|
|
195
|
+
overflow: hidden;
|
|
196
|
+
border: 1px solid rgba(148, 163, 184, .24);
|
|
197
|
+
}
|
|
198
|
+
.fill {
|
|
199
|
+
height: 100%;
|
|
200
|
+
width: 37.9%;
|
|
201
|
+
border-radius: 999px;
|
|
202
|
+
background: linear-gradient(90deg, #22d3ee, #34d399);
|
|
203
|
+
}
|
|
204
|
+
.footer {
|
|
205
|
+
position: absolute;
|
|
206
|
+
left: 42px;
|
|
207
|
+
right: 42px;
|
|
208
|
+
bottom: 34px;
|
|
209
|
+
display: flex;
|
|
210
|
+
justify-content: space-between;
|
|
211
|
+
align-items: center;
|
|
212
|
+
color: #94a3b8;
|
|
213
|
+
font-size: 20px;
|
|
214
|
+
}
|
|
215
|
+
.scene {
|
|
216
|
+
position: absolute;
|
|
217
|
+
inset: 44px 0 0 0;
|
|
218
|
+
opacity: 0;
|
|
219
|
+
transform: translateY(24px) scale(.985);
|
|
220
|
+
transition: opacity .24s ease, transform .24s ease;
|
|
221
|
+
}
|
|
222
|
+
.scene.active {
|
|
223
|
+
opacity: 1;
|
|
224
|
+
transform: translateY(0) scale(1);
|
|
225
|
+
}
|
|
226
|
+
</style>
|
|
227
|
+
</head>
|
|
228
|
+
<body>
|
|
229
|
+
<div class="stage">
|
|
230
|
+
<div class="card">
|
|
231
|
+
<div class="top">
|
|
232
|
+
<div class="brand"><div class="logo">R</div> Runcap</div>
|
|
233
|
+
<div class="pill">local-first AI cost control</div>
|
|
234
|
+
</div>
|
|
235
|
+
<div class="content">
|
|
236
|
+
<section class="scene active" id="s0">
|
|
237
|
+
<h1 class="headline">Your AI agent looks busy. It is just circling.</h1>
|
|
238
|
+
<p class="sub">Same failure, reworded every turn. It produces output, makes no progress, and keeps spending your tokens.</p>
|
|
239
|
+
<div class="terminal mono">
|
|
240
|
+
<div class="line">attempt 1: guard the undefined with an if check</div>
|
|
241
|
+
<div class="line">attempt 2: add an optional chain before .id</div>
|
|
242
|
+
<div class="line">attempt 3: default the object to {} first</div>
|
|
243
|
+
<div class="line red">test still fails. budget still draining.</div>
|
|
244
|
+
</div>
|
|
245
|
+
</section>
|
|
246
|
+
<section class="scene" id="s1">
|
|
247
|
+
<h1 class="headline">Plain hashing never catches this.</h1>
|
|
248
|
+
<p class="sub">The prompt is similar but never byte-identical between loops, so the hash changes every turn and nothing trips.</p>
|
|
249
|
+
<div class="terminal mono">
|
|
250
|
+
<div class="line">hash(attempt 1) = a91f... hash(attempt 2) = c4d2...</div>
|
|
251
|
+
<div class="line red">different hash every time -> loop invisible</div>
|
|
252
|
+
</div>
|
|
253
|
+
</section>
|
|
254
|
+
<section class="scene" id="s2">
|
|
255
|
+
<h1 class="headline">Runcap measures similarity, not hashes.</h1>
|
|
256
|
+
<p class="sub">A local gateway sees every request in real time and compares each prompt's shape against the recent run.</p>
|
|
257
|
+
<div class="warning">loop: last 3 prompts 97.7% identical, no progress. The agent is circling the same failure.</div>
|
|
258
|
+
<div class="terminal mono">
|
|
259
|
+
<div class="line green">$ runcap status</div>
|
|
260
|
+
<div class="line violet">Loop warning: stepping in before it burns more budget.</div>
|
|
261
|
+
</div>
|
|
262
|
+
</section>
|
|
263
|
+
<section class="scene" id="s3">
|
|
264
|
+
<h1 class="headline">And it compresses every call it lets through.</h1>
|
|
265
|
+
<div class="numbers">
|
|
266
|
+
<div class="number-card">
|
|
267
|
+
<div class="label">baseline prompt</div>
|
|
268
|
+
<div class="big red mono">1,186</div>
|
|
269
|
+
<div class="label">tokens</div>
|
|
270
|
+
</div>
|
|
271
|
+
<div class="number-card">
|
|
272
|
+
<div class="label">with Runcap</div>
|
|
273
|
+
<div class="big green mono">737</div>
|
|
274
|
+
<div class="label">tokens</div>
|
|
275
|
+
</div>
|
|
276
|
+
</div>
|
|
277
|
+
<div class="bar"><div class="fill"></div></div>
|
|
278
|
+
<p class="sub"><span class="green">37.9% saved</span> on a real OpenAI call. The model still answered correctly about the changed line.</p>
|
|
279
|
+
</section>
|
|
280
|
+
<section class="scene" id="s4">
|
|
281
|
+
<h1 class="headline">Estimate. Cap. Compress. Catch the loop.</h1>
|
|
282
|
+
<p class="sub">Point your OpenAI or Anthropic-compatible tools at the local gateway. When the ceiling is crossed, the next call stops.</p>
|
|
283
|
+
<div class="terminal mono">
|
|
284
|
+
<div class="line green">$ AIM_DAILY_BUDGET_USD=10 runcap gateway</div>
|
|
285
|
+
<div class="line">gateway up · compress on · hard cap armed · loop guard on</div>
|
|
286
|
+
<div class="line red">HTTP 429 budget_guard</div>
|
|
287
|
+
<div class="line accent">stopped before money left your account</div>
|
|
288
|
+
</div>
|
|
289
|
+
</section>
|
|
290
|
+
</div>
|
|
291
|
+
<div class="footer">
|
|
292
|
+
<span class="mono">npm install -g runcap</span>
|
|
293
|
+
<span>Free · MIT · 100% local</span>
|
|
294
|
+
</div>
|
|
295
|
+
</div>
|
|
296
|
+
</div>
|
|
297
|
+
<script>
|
|
298
|
+
const scenes = [...document.querySelectorAll(".scene")];
|
|
299
|
+
window.renderFrame = (seconds) => {
|
|
300
|
+
const index =
|
|
301
|
+
seconds < 2.8 ? 0 :
|
|
302
|
+
seconds < 5.2 ? 1 :
|
|
303
|
+
seconds < 8.2 ? 2 :
|
|
304
|
+
seconds < 10.6 ? 3 : 4;
|
|
305
|
+
scenes.forEach((scene, i) => scene.classList.toggle("active", i === index));
|
|
306
|
+
};
|
|
307
|
+
</script>
|
|
308
|
+
</body>
|
|
309
|
+
</html>`;
|
|
310
|
+
|
|
311
|
+
const browser = await chromium.launch({ headless: true });
|
|
312
|
+
const page = await browser.newPage({ viewport: { width, height }, deviceScaleFactor: 1 });
|
|
313
|
+
await page.setContent(html);
|
|
314
|
+
await page.waitForTimeout(100);
|
|
315
|
+
|
|
316
|
+
for (let i = 0; i < frameCount; i += 1) {
|
|
317
|
+
const seconds = i / fps;
|
|
318
|
+
await page.evaluate((t) => window.renderFrame(t), seconds);
|
|
319
|
+
await page.screenshot({ path: join(framesDir, `frame-${String(i).padStart(4, "0")}.png`) });
|
|
320
|
+
}
|
|
321
|
+
await browser.close();
|
|
322
|
+
|
|
323
|
+
const ffmpeg = spawnSync("ffmpeg", [
|
|
324
|
+
"-y",
|
|
325
|
+
"-framerate", String(fps),
|
|
326
|
+
"-i", join(framesDir, "frame-%04d.png"),
|
|
327
|
+
"-c:v", "libx264",
|
|
328
|
+
"-pix_fmt", "yuv420p",
|
|
329
|
+
"-movflags", "+faststart",
|
|
330
|
+
"-crf", "18",
|
|
331
|
+
outFile
|
|
332
|
+
], { stdio: "inherit" });
|
|
333
|
+
|
|
334
|
+
if (ffmpeg.status !== 0) {
|
|
335
|
+
process.exit(ffmpeg.status ?? 1);
|
|
336
|
+
}
|
|
337
|
+
|
|
338
|
+
console.log(`wrote ${outFile}`);
|
package/src/compressor.mjs
CHANGED
|
@@ -405,32 +405,100 @@ export function requestShapeText(body) {
|
|
|
405
405
|
return parts.join("\n");
|
|
406
406
|
}
|
|
407
407
|
|
|
408
|
+
// Pull the "did the work move?" signal out of an upstream RESPONSE. Similar
|
|
409
|
+
// prompts alone can't tell circling from convergence: a run closing in on a fix
|
|
410
|
+
// also sends near-identical prompts turn after turn. The tell is whether the
|
|
411
|
+
// observation changed - the error/test output coming back. We reduce a response
|
|
412
|
+
// to the assistant's returned text (plus any explicit error), which carries the
|
|
413
|
+
// error/stack/test signature the next prompt is reacting to.
|
|
414
|
+
export function responseSignature(body) {
|
|
415
|
+
if (!body || typeof body !== "object") return "";
|
|
416
|
+
const parts = [];
|
|
417
|
+
const push = (content) => {
|
|
418
|
+
if (typeof content === "string") parts.push(content);
|
|
419
|
+
else if (Array.isArray(content)) {
|
|
420
|
+
for (const p of content) if (p && typeof p === "object" && typeof p.text === "string") parts.push(p.text);
|
|
421
|
+
}
|
|
422
|
+
};
|
|
423
|
+
// OpenAI chat: choices[].message.content
|
|
424
|
+
if (Array.isArray(body.choices)) {
|
|
425
|
+
for (const ch of body.choices) {
|
|
426
|
+
if (ch && typeof ch === "object" && ch.message) push(ch.message.content);
|
|
427
|
+
}
|
|
428
|
+
}
|
|
429
|
+
// Anthropic messages: content blocks at top level
|
|
430
|
+
if (Array.isArray(body.content)) push(body.content);
|
|
431
|
+
// Provider error envelopes (OpenAI {error:{message}}, Anthropic {error:{message}})
|
|
432
|
+
if (body.error) {
|
|
433
|
+
if (typeof body.error === "string") parts.push(body.error);
|
|
434
|
+
else if (typeof body.error.message === "string") parts.push(body.error.message);
|
|
435
|
+
}
|
|
436
|
+
return parts.join("\n");
|
|
437
|
+
}
|
|
438
|
+
|
|
408
439
|
// Given the current request and a rolling history of prior request shapes,
|
|
409
440
|
// decide whether the agent is circling. Returns { looping, repeats, similarity }.
|
|
410
441
|
// History is oldest->newest of prior requestShapeText() strings in this session.
|
|
442
|
+
//
|
|
443
|
+
// Prompt similarity is the cheap pre-filter. When response signatures are
|
|
444
|
+
// available it becomes a GATE, not the verdict: a run only counts as circling
|
|
445
|
+
// when the prompts are near-identical AND the upstream response did not move
|
|
446
|
+
// (same error/output signature). A converging run sends similar prompts but the
|
|
447
|
+
// observation shifts, so it passes. Pass responseSignatures (oldest->newest,
|
|
448
|
+
// aligned with history) and currentResponseSignature to enable the gate; omit
|
|
449
|
+
// them and detection falls back to prompt-similarity-only (prior behavior).
|
|
411
450
|
export function detectLoop(currentShape, history, {
|
|
412
451
|
similarityThreshold = LOOP_SIMILARITY,
|
|
413
|
-
minRepeats = LOOP_MIN_REPEATS
|
|
452
|
+
minRepeats = LOOP_MIN_REPEATS,
|
|
453
|
+
responseSignatures = null,
|
|
454
|
+
currentResponseSignature = null,
|
|
455
|
+
responseMovedThreshold = LOOP_SIMILARITY
|
|
414
456
|
} = {}) {
|
|
415
457
|
if (!currentShape || !Array.isArray(history) || history.length === 0) {
|
|
416
|
-
return { looping: false, repeats: 0, similarity: 0 };
|
|
458
|
+
return { looping: false, repeats: 0, similarity: 0, responseMoved: false };
|
|
417
459
|
}
|
|
418
460
|
const curLines = String(currentShape).split("\n");
|
|
461
|
+
const haveResponses = Array.isArray(responseSignatures) && currentResponseSignature != null;
|
|
419
462
|
let repeats = 0;
|
|
420
463
|
let lastSimilarity = 0;
|
|
421
|
-
|
|
464
|
+
let responseMoved = false;
|
|
465
|
+
|
|
466
|
+
// Response-side gate. Prompt similarity alone can't separate circling from
|
|
467
|
+
// convergence: a run closing in on a fix also sends near-identical prompts.
|
|
468
|
+
// The tell is the observation - the error/output coming back. A change in the
|
|
469
|
+
// response between consecutive turns is progress, and it breaks the run the
|
|
470
|
+
// same way a dissimilar prompt does. So we walk backward counting only the
|
|
471
|
+
// trailing turns that are BOTH prompt-similar AND error-stuck; the first turn
|
|
472
|
+
// where the prompt differs OR the response moved ends the run. This means a
|
|
473
|
+
// run that made progress and THEN got stuck on one error still flags once it
|
|
474
|
+
// has circled that same error long enough. With no response data we fall back
|
|
475
|
+
// to prompt-similarity-only (prior behavior).
|
|
476
|
+
//
|
|
477
|
+
// Responses, newest->oldest: currentResponseSignature (what the current prompt
|
|
478
|
+
// is reacting to), then responseSignatures[N-1], [N-2], ... A "stuck" step
|
|
479
|
+
// between turn i and the next-newer turn means their responses match.
|
|
480
|
+
let newerResp = haveResponses ? currentResponseSignature : null;
|
|
422
481
|
for (let i = history.length - 1; i >= 0; i--) {
|
|
423
482
|
const sim = lineSimilarity(curLines, String(history[i]).split("\n"));
|
|
424
|
-
if (sim
|
|
425
|
-
|
|
426
|
-
|
|
427
|
-
|
|
428
|
-
|
|
483
|
+
if (sim < similarityThreshold) break;
|
|
484
|
+
if (haveResponses) {
|
|
485
|
+
const olderResp = responseSignatures[i];
|
|
486
|
+
const haveBoth = olderResp != null && newerResp != null &&
|
|
487
|
+
String(olderResp).length && String(newerResp).length;
|
|
488
|
+
if (haveBoth) {
|
|
489
|
+
const respSim = lineSimilarity(String(newerResp).split("\n"), String(olderResp).split("\n"));
|
|
490
|
+
if (respSim < responseMovedThreshold) { responseMoved = true; break; }
|
|
491
|
+
}
|
|
492
|
+
newerResp = olderResp;
|
|
429
493
|
}
|
|
494
|
+
repeats += 1;
|
|
495
|
+
lastSimilarity = sim;
|
|
430
496
|
}
|
|
497
|
+
|
|
431
498
|
return {
|
|
432
499
|
looping: repeats >= minRepeats,
|
|
433
500
|
repeats,
|
|
434
|
-
similarity: Number(lastSimilarity.toFixed(3))
|
|
501
|
+
similarity: Number(lastSimilarity.toFixed(3)),
|
|
502
|
+
responseMoved
|
|
435
503
|
};
|
|
436
504
|
}
|
package/src/mission-control.mjs
CHANGED
|
@@ -7,7 +7,7 @@ import path from "node:path";
|
|
|
7
7
|
import process from "node:process";
|
|
8
8
|
import { syncRun } from "./cloud.mjs";
|
|
9
9
|
import { sendAlert } from "./alerts.mjs";
|
|
10
|
-
import { compressRequestBody, estimateTokens, requestShapeText, detectLoop } from "./compressor.mjs";
|
|
10
|
+
import { compressRequestBody, estimateTokens, requestShapeText, detectLoop, responseSignature } from "./compressor.mjs";
|
|
11
11
|
|
|
12
12
|
const STORE_DIR = ".runcap";
|
|
13
13
|
const MISSIONS_DIR = path.join(STORE_DIR, "missions");
|
|
@@ -528,6 +528,13 @@ function createGatewayServer({ port = 8792, mock = false, upstream = {} } = {})
|
|
|
528
528
|
// but-not-identical turns, which plain hashing never catches.
|
|
529
529
|
const loopEnabled = (process.env.AIM_LOOP_DETECT ?? "on").toLowerCase() !== "off";
|
|
530
530
|
const shapeHistory = [];
|
|
531
|
+
// Response signatures aligned with shapeHistory (the observation each prior
|
|
532
|
+
// prompt produced). Lets the loop detector tell circling from convergence:
|
|
533
|
+
// similar prompts only count as a loop when the response did not move either.
|
|
534
|
+
// Each entry is a mutable holder { sig } so the slot for an in-flight turn can
|
|
535
|
+
// be captured by reference and filled once its upstream response returns, even
|
|
536
|
+
// if concurrent turns push new entries or shift() trims the array meanwhile.
|
|
537
|
+
const responseHistory = [];
|
|
531
538
|
const SHAPE_HISTORY_MAX = 12;
|
|
532
539
|
const server = http.createServer(async (request, response) => {
|
|
533
540
|
const started = Date.now();
|
|
@@ -551,15 +558,32 @@ function createGatewayServer({ port = 8792, mock = false, upstream = {} } = {})
|
|
|
551
558
|
|
|
552
559
|
const bodyText = await readRequestBody(request);
|
|
553
560
|
const requestBody = safeJson(bodyText) ?? {};
|
|
554
|
-
// Loop signal: compare this request's shape against the recent run.
|
|
561
|
+
// Loop signal: compare this request's shape against the recent run. The
|
|
562
|
+
// response signatures gate prompt-similarity so a converging run (similar
|
|
563
|
+
// prompts, but the error/output is changing) is not flagged as circling.
|
|
555
564
|
let loop = null;
|
|
565
|
+
let currentShape = null;
|
|
566
|
+
let responseSlot = null; // holder for THIS turn's response signature
|
|
556
567
|
if (loopEnabled) {
|
|
557
568
|
const shape = requestShapeText(requestBody);
|
|
558
569
|
if (shape) {
|
|
559
|
-
|
|
560
|
-
|
|
570
|
+
currentShape = shape;
|
|
571
|
+
const result = detectLoop(shape, shapeHistory, {
|
|
572
|
+
responseSignatures: responseHistory.map((h) => h.sig),
|
|
573
|
+
currentResponseSignature: responseHistory.length ? responseHistory[responseHistory.length - 1].sig : null
|
|
574
|
+
});
|
|
575
|
+
loop = {
|
|
576
|
+
looping: result.looping,
|
|
577
|
+
repeats: result.repeats,
|
|
578
|
+
similarity: result.similarity,
|
|
579
|
+
responseMoved: result.responseMoved,
|
|
580
|
+
truth: "calculated"
|
|
581
|
+
};
|
|
561
582
|
shapeHistory.push(shape);
|
|
583
|
+
responseSlot = { sig: "" }; // filled by reference once upstream returns
|
|
584
|
+
responseHistory.push(responseSlot);
|
|
562
585
|
if (shapeHistory.length > SHAPE_HISTORY_MAX) shapeHistory.shift();
|
|
586
|
+
if (responseHistory.length > SHAPE_HISTORY_MAX) responseHistory.shift();
|
|
563
587
|
}
|
|
564
588
|
}
|
|
565
589
|
const budget = readBudget();
|
|
@@ -639,6 +663,8 @@ function createGatewayServer({ port = 8792, mock = false, upstream = {} } = {})
|
|
|
639
663
|
if (gatewayMode === "mock") {
|
|
640
664
|
const responseBody = mockCompletion(requestBody, url.pathname);
|
|
641
665
|
const responseText = JSON.stringify(responseBody);
|
|
666
|
+
// Record before unblocking the client so a concurrent next turn sees it.
|
|
667
|
+
if (responseSlot) responseSlot.sig = responseSignature(responseBody);
|
|
642
668
|
send(response, 200, responseText, "application/json; charset=utf-8");
|
|
643
669
|
await appendGatewayEvent({
|
|
644
670
|
at: new Date().toISOString(),
|
|
@@ -685,13 +711,14 @@ function createGatewayServer({ port = 8792, mock = false, upstream = {} } = {})
|
|
|
685
711
|
body: forwardBody
|
|
686
712
|
});
|
|
687
713
|
const responseText = await upstreamResponse.text();
|
|
714
|
+
const responseBody = safeJson(responseText) ?? {};
|
|
715
|
+
// Record before unblocking the client so a concurrent next turn sees it.
|
|
716
|
+
if (responseSlot) responseSlot.sig = responseSignature(responseBody);
|
|
688
717
|
response.writeHead(upstreamResponse.status, {
|
|
689
718
|
"content-type": upstreamResponse.headers.get("content-type") ?? "application/json",
|
|
690
719
|
"cache-control": "no-store"
|
|
691
720
|
});
|
|
692
721
|
response.end(responseText);
|
|
693
|
-
|
|
694
|
-
const responseBody = safeJson(responseText) ?? {};
|
|
695
722
|
await appendGatewayEvent({
|
|
696
723
|
at: new Date().toISOString(),
|
|
697
724
|
path: url.pathname,
|
|
@@ -761,7 +788,7 @@ export async function startGateway({ port = 8792, mock = false } = {}) {
|
|
|
761
788
|
// it down afterward. Upstream is pinned from the CURRENT env before the child's
|
|
762
789
|
// base URLs are rewritten, so the gateway proxies to the real provider, not to
|
|
763
790
|
// itself.
|
|
764
|
-
async function startEphemeralGateway({ mock = false } = {}) {
|
|
791
|
+
export async function startEphemeralGateway({ mock = false } = {}) {
|
|
765
792
|
await ensureStore();
|
|
766
793
|
const upstream = {
|
|
767
794
|
openaiKey: process.env.AIM_UPSTREAM_API_KEY ?? process.env.OPENAI_API_KEY,
|