runcap 0.3.0 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +211 -9
- package/bin/runcap.mjs +153 -0
- package/examples/outcome-demo/agent-fixes.mjs +24 -0
- package/examples/outcome-demo/agent-spins.mjs +20 -0
- package/examples/outcome-demo/broken.mjs +5 -0
- package/examples/outcome-demo/verify.mjs +7 -0
- package/package.json +11 -2
- package/scripts/guard-test.mjs +76 -0
- package/scripts/loop-e2e.mjs +137 -0
- package/scripts/loop-test.mjs +45 -1
- package/scripts/make-demo-svg.mjs +20 -19
- package/scripts/make-linkedin-loop-video.mjs +338 -0
- package/scripts/mission-test.mjs +148 -0
- package/scripts/outcome-test.mjs +48 -0
- package/scripts/policy-test.mjs +121 -0
- package/scripts/render-media-screenshots.mjs +37 -0
- package/src/compressor.mjs +77 -9
- package/src/mission-control.mjs +475 -8
- package/src/policy.mjs +208 -0
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
import { chromium } from "playwright";
|
|
2
|
+
import { pathToFileURL } from "node:url";
|
|
3
|
+
import { resolve } from "node:path";
|
|
4
|
+
|
|
5
|
+
const root = resolve(import.meta.dirname, "..");
|
|
6
|
+
const mediaDir = resolve(root, "docs/assets/media");
|
|
7
|
+
|
|
8
|
+
const shots = [
|
|
9
|
+
{
|
|
10
|
+
html: resolve(mediaDir, "cover.html"),
|
|
11
|
+
png: resolve(mediaDir, "cover.png"),
|
|
12
|
+
width: 1200,
|
|
13
|
+
height: 630
|
|
14
|
+
},
|
|
15
|
+
{
|
|
16
|
+
html: resolve(mediaDir, "demo.html"),
|
|
17
|
+
png: resolve(mediaDir, "demo.png"),
|
|
18
|
+
width: 1200,
|
|
19
|
+
height: 750
|
|
20
|
+
}
|
|
21
|
+
];
|
|
22
|
+
|
|
23
|
+
const browser = await chromium.launch();
|
|
24
|
+
try {
|
|
25
|
+
for (const shot of shots) {
|
|
26
|
+
const page = await browser.newPage({
|
|
27
|
+
viewport: { width: shot.width, height: shot.height },
|
|
28
|
+
deviceScaleFactor: 2
|
|
29
|
+
});
|
|
30
|
+
await page.goto(pathToFileURL(shot.html).href, { waitUntil: "networkidle" });
|
|
31
|
+
await page.screenshot({ path: shot.png, fullPage: false });
|
|
32
|
+
await page.close();
|
|
33
|
+
console.log(`rendered ${shot.png}`);
|
|
34
|
+
}
|
|
35
|
+
} finally {
|
|
36
|
+
await browser.close();
|
|
37
|
+
}
|
package/src/compressor.mjs
CHANGED
|
@@ -405,32 +405,100 @@ export function requestShapeText(body) {
|
|
|
405
405
|
return parts.join("\n");
|
|
406
406
|
}
|
|
407
407
|
|
|
408
|
+
// Pull the "did the work move?" signal out of an upstream RESPONSE. Similar
|
|
409
|
+
// prompts alone can't tell circling from convergence: a run closing in on a fix
|
|
410
|
+
// also sends near-identical prompts turn after turn. The tell is whether the
|
|
411
|
+
// observation changed - the error/test output coming back. We reduce a response
|
|
412
|
+
// to the assistant's returned text (plus any explicit error), which carries the
|
|
413
|
+
// error/stack/test signature the next prompt is reacting to.
|
|
414
|
+
export function responseSignature(body) {
|
|
415
|
+
if (!body || typeof body !== "object") return "";
|
|
416
|
+
const parts = [];
|
|
417
|
+
const push = (content) => {
|
|
418
|
+
if (typeof content === "string") parts.push(content);
|
|
419
|
+
else if (Array.isArray(content)) {
|
|
420
|
+
for (const p of content) if (p && typeof p === "object" && typeof p.text === "string") parts.push(p.text);
|
|
421
|
+
}
|
|
422
|
+
};
|
|
423
|
+
// OpenAI chat: choices[].message.content
|
|
424
|
+
if (Array.isArray(body.choices)) {
|
|
425
|
+
for (const ch of body.choices) {
|
|
426
|
+
if (ch && typeof ch === "object" && ch.message) push(ch.message.content);
|
|
427
|
+
}
|
|
428
|
+
}
|
|
429
|
+
// Anthropic messages: content blocks at top level
|
|
430
|
+
if (Array.isArray(body.content)) push(body.content);
|
|
431
|
+
// Provider error envelopes (OpenAI {error:{message}}, Anthropic {error:{message}})
|
|
432
|
+
if (body.error) {
|
|
433
|
+
if (typeof body.error === "string") parts.push(body.error);
|
|
434
|
+
else if (typeof body.error.message === "string") parts.push(body.error.message);
|
|
435
|
+
}
|
|
436
|
+
return parts.join("\n");
|
|
437
|
+
}
|
|
438
|
+
|
|
408
439
|
// Given the current request and a rolling history of prior request shapes,
|
|
409
440
|
// decide whether the agent is circling. Returns { looping, repeats, similarity }.
|
|
410
441
|
// History is oldest->newest of prior requestShapeText() strings in this session.
|
|
442
|
+
//
|
|
443
|
+
// Prompt similarity is the cheap pre-filter. When response signatures are
|
|
444
|
+
// available it becomes a GATE, not the verdict: a run only counts as circling
|
|
445
|
+
// when the prompts are near-identical AND the upstream response did not move
|
|
446
|
+
// (same error/output signature). A converging run sends similar prompts but the
|
|
447
|
+
// observation shifts, so it passes. Pass responseSignatures (oldest->newest,
|
|
448
|
+
// aligned with history) and currentResponseSignature to enable the gate; omit
|
|
449
|
+
// them and detection falls back to prompt-similarity-only (prior behavior).
|
|
411
450
|
export function detectLoop(currentShape, history, {
|
|
412
451
|
similarityThreshold = LOOP_SIMILARITY,
|
|
413
|
-
minRepeats = LOOP_MIN_REPEATS
|
|
452
|
+
minRepeats = LOOP_MIN_REPEATS,
|
|
453
|
+
responseSignatures = null,
|
|
454
|
+
currentResponseSignature = null,
|
|
455
|
+
responseMovedThreshold = LOOP_SIMILARITY
|
|
414
456
|
} = {}) {
|
|
415
457
|
if (!currentShape || !Array.isArray(history) || history.length === 0) {
|
|
416
|
-
return { looping: false, repeats: 0, similarity: 0 };
|
|
458
|
+
return { looping: false, repeats: 0, similarity: 0, responseMoved: false };
|
|
417
459
|
}
|
|
418
460
|
const curLines = String(currentShape).split("\n");
|
|
461
|
+
const haveResponses = Array.isArray(responseSignatures) && currentResponseSignature != null;
|
|
419
462
|
let repeats = 0;
|
|
420
463
|
let lastSimilarity = 0;
|
|
421
|
-
|
|
464
|
+
let responseMoved = false;
|
|
465
|
+
|
|
466
|
+
// Response-side gate. Prompt similarity alone can't separate circling from
|
|
467
|
+
// convergence: a run closing in on a fix also sends near-identical prompts.
|
|
468
|
+
// The tell is the observation - the error/output coming back. A change in the
|
|
469
|
+
// response between consecutive turns is progress, and it breaks the run the
|
|
470
|
+
// same way a dissimilar prompt does. So we walk backward counting only the
|
|
471
|
+
// trailing turns that are BOTH prompt-similar AND error-stuck; the first turn
|
|
472
|
+
// where the prompt differs OR the response moved ends the run. This means a
|
|
473
|
+
// run that made progress and THEN got stuck on one error still flags once it
|
|
474
|
+
// has circled that same error long enough. With no response data we fall back
|
|
475
|
+
// to prompt-similarity-only (prior behavior).
|
|
476
|
+
//
|
|
477
|
+
// Responses, newest->oldest: currentResponseSignature (what the current prompt
|
|
478
|
+
// is reacting to), then responseSignatures[N-1], [N-2], ... A "stuck" step
|
|
479
|
+
// between turn i and the next-newer turn means their responses match.
|
|
480
|
+
let newerResp = haveResponses ? currentResponseSignature : null;
|
|
422
481
|
for (let i = history.length - 1; i >= 0; i--) {
|
|
423
482
|
const sim = lineSimilarity(curLines, String(history[i]).split("\n"));
|
|
424
|
-
if (sim
|
|
425
|
-
|
|
426
|
-
|
|
427
|
-
|
|
428
|
-
|
|
483
|
+
if (sim < similarityThreshold) break;
|
|
484
|
+
if (haveResponses) {
|
|
485
|
+
const olderResp = responseSignatures[i];
|
|
486
|
+
const haveBoth = olderResp != null && newerResp != null &&
|
|
487
|
+
String(olderResp).length && String(newerResp).length;
|
|
488
|
+
if (haveBoth) {
|
|
489
|
+
const respSim = lineSimilarity(String(newerResp).split("\n"), String(olderResp).split("\n"));
|
|
490
|
+
if (respSim < responseMovedThreshold) { responseMoved = true; break; }
|
|
491
|
+
}
|
|
492
|
+
newerResp = olderResp;
|
|
429
493
|
}
|
|
494
|
+
repeats += 1;
|
|
495
|
+
lastSimilarity = sim;
|
|
430
496
|
}
|
|
497
|
+
|
|
431
498
|
return {
|
|
432
499
|
looping: repeats >= minRepeats,
|
|
433
500
|
repeats,
|
|
434
|
-
similarity: Number(lastSimilarity.toFixed(3))
|
|
501
|
+
similarity: Number(lastSimilarity.toFixed(3)),
|
|
502
|
+
responseMoved
|
|
435
503
|
};
|
|
436
504
|
}
|