cairn-engine 1.3.0 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +3 -3
- package/dist/adapters/critics/assertion.d.ts +8 -3
- package/dist/adapters/critics/assertion.js +19 -9
- package/dist/adapters/critics/assertion.js.map +1 -1
- package/dist/adapters/critics/llm.d.ts +1 -1
- package/dist/adapters/critics/llm.js +2 -2
- package/dist/adapters/critics/llm.js.map +1 -1
- package/dist/adapters/drivers/chrome.d.ts +5 -4
- package/dist/adapters/drivers/chrome.js +8 -4
- package/dist/adapters/drivers/chrome.js.map +1 -1
- package/dist/adapters/drivers/self-heal.d.ts +3 -2
- package/dist/adapters/drivers/self-heal.js +10 -7
- package/dist/adapters/drivers/self-heal.js.map +1 -1
- package/dist/adapters/skills/file-store.d.ts +7 -4
- package/dist/adapters/skills/file-store.js +27 -10
- package/dist/adapters/skills/file-store.js.map +1 -1
- package/dist/browser.d.ts +1 -0
- package/dist/browser.js +1 -0
- package/dist/browser.js.map +1 -1
- package/dist/cli.js +11 -6
- package/dist/cli.js.map +1 -1
- package/dist/core/discover.d.ts +6 -2
- package/dist/core/discover.js +39 -6
- package/dist/core/discover.js.map +1 -1
- package/dist/core/pipeline.d.ts +3 -1
- package/dist/core/pipeline.js +30 -2
- package/dist/core/pipeline.js.map +1 -1
- package/dist/core/ports.d.ts +10 -4
- package/dist/core/step-heal.d.ts +14 -0
- package/dist/core/step-heal.js +55 -0
- package/dist/core/step-heal.js.map +1 -0
- package/dist/core/steps.d.ts +11 -1
- package/dist/core/steps.js +4 -2
- package/dist/core/steps.js.map +1 -1
- package/dist/core/types.d.ts +14 -3
- package/dist/index.d.ts +2 -1
- package/dist/index.js +2 -1
- package/dist/index.js.map +1 -1
- package/dist/run.d.ts +12 -4
- package/dist/run.js +42 -13
- package/dist/run.js.map +1 -1
- package/package.json +1 -1
package/dist/run.js
CHANGED
|
@@ -13,6 +13,7 @@ import { ChromeDevToolsDriver } from "./adapters/drivers/chrome.js";
|
|
|
13
13
|
import { SelfHealingDriver } from "./adapters/drivers/self-heal.js";
|
|
14
14
|
import { ConsoleReporter } from "./adapters/reporters/console.js";
|
|
15
15
|
import { createLlmClient } from "./adapters/llm/factory.js";
|
|
16
|
+
import { LlmStepHealer } from "./core/step-heal.js";
|
|
16
17
|
export function needsLlmCritic(scenario) {
|
|
17
18
|
return scenario.assertions.some((a) => a.kind === "expect");
|
|
18
19
|
}
|
|
@@ -21,45 +22,64 @@ function firstGotoUrl(scenario) {
|
|
|
21
22
|
const first = scenario.steps[0];
|
|
22
23
|
return first && first.kind === "goto" ? first.url : undefined;
|
|
23
24
|
}
|
|
24
|
-
/** Rewrite a scenario's targets with the
|
|
25
|
+
/** Rewrite a scenario's targets with the (re-located) targets self-heal substituted, for re-freezing.
|
|
26
|
+
* Keyed by the original target's object identity — which flows unchanged from the step through the
|
|
27
|
+
* driver into the Heal — so two steps sharing a label don't rewrite together (#39). */
|
|
25
28
|
export function applyHeals(scenario, heals) {
|
|
26
29
|
if (!heals.length)
|
|
27
30
|
return scenario;
|
|
28
|
-
const byOriginal = new Map(heals.map((h) => [h.original
|
|
31
|
+
const byOriginal = new Map(heals.map((h) => [h.original, h.healed]));
|
|
29
32
|
return {
|
|
30
33
|
...scenario,
|
|
31
34
|
steps: scenario.steps.map((step) => {
|
|
32
|
-
if ("target" in step
|
|
33
|
-
const healed = byOriginal.get(step.target
|
|
35
|
+
if ("target" in step) {
|
|
36
|
+
const healed = byOriginal.get(step.target);
|
|
34
37
|
if (healed)
|
|
35
|
-
return { ...step, target:
|
|
38
|
+
return { ...step, target: healed };
|
|
36
39
|
}
|
|
37
40
|
return step;
|
|
38
41
|
}),
|
|
39
42
|
};
|
|
40
43
|
}
|
|
44
|
+
/** Replace surgically-healed steps in place (keyed by index, so same-label steps don't collide). */
|
|
45
|
+
export function applyStepHeals(scenario, heals) {
|
|
46
|
+
if (!heals.length)
|
|
47
|
+
return scenario;
|
|
48
|
+
const byIndex = new Map(heals.map((h) => [h.index, h.step]));
|
|
49
|
+
return { ...scenario, steps: scenario.steps.map((step, i) => byIndex.get(i) ?? step) };
|
|
50
|
+
}
|
|
41
51
|
export async function runScenario(scenario, opts = {}) {
|
|
42
52
|
// Build the LLM lazily and once — only if the critic or heal needs it.
|
|
43
53
|
let llmCache = opts.llm;
|
|
44
54
|
const getLlm = () => (llmCache ??= createLlmClient(opts.model ? { model: opts.model } : {}));
|
|
45
55
|
const critic = opts.critic ??
|
|
46
|
-
(needsLlmCritic(scenario)
|
|
56
|
+
(needsLlmCritic(scenario)
|
|
57
|
+
? new LlmCritic(getLlm(), opts.custom, opts.benign)
|
|
58
|
+
: new AssertionCritic(opts.custom, opts.benign));
|
|
47
59
|
const baseDriver = opts.driver ?? new ChromeDevToolsDriver();
|
|
48
60
|
let healer;
|
|
49
61
|
const driver = opts.heal
|
|
50
62
|
? (healer = new SelfHealingDriver(baseDriver, getLlm(), { onHeal: opts.onHeal }))
|
|
51
63
|
: baseDriver;
|
|
64
|
+
const stepHealer = opts.heal ? new LlmStepHealer(getLlm()) : undefined;
|
|
52
65
|
const result = await runHarness({
|
|
53
66
|
context: opts.context ?? new InlineContextProvider(),
|
|
54
67
|
planner: new StaticPlanner(scenario),
|
|
55
68
|
driver,
|
|
56
69
|
critic,
|
|
57
70
|
reporter: opts.reporter ?? new ConsoleReporter(),
|
|
58
|
-
}, scenario.name, {
|
|
71
|
+
}, scenario.name, {
|
|
72
|
+
signal: opts.signal,
|
|
73
|
+
onStep: opts.onStep,
|
|
74
|
+
captureScreenshots: opts.screenshots,
|
|
75
|
+
actions: opts.actions,
|
|
76
|
+
stepHealer,
|
|
77
|
+
});
|
|
59
78
|
const heals = healer?.heals ?? [];
|
|
60
|
-
|
|
61
|
-
//
|
|
62
|
-
//
|
|
79
|
+
const stepHeals = stepHealer?.heals ?? [];
|
|
80
|
+
// Outcome-aware heal: the steps ran (locators/steps may even have healed) but the run still failed
|
|
81
|
+
// its assertions — the frozen path no longer reaches the goal, a break surgical-heal couldn't fix.
|
|
82
|
+
// Re-discover from the start (invariant #4 sanctioned use (b)); only on failure.
|
|
63
83
|
if (opts.heal && !result.verdict.passed) {
|
|
64
84
|
const repaired = await discover(scenario.name, {
|
|
65
85
|
driver: baseDriver,
|
|
@@ -69,13 +89,22 @@ export async function runScenario(scenario, opts = {}) {
|
|
|
69
89
|
});
|
|
70
90
|
const ctx = await (opts.context ?? new InlineContextProvider()).provide(scenario.name);
|
|
71
91
|
const evidence = await baseDriver.observe();
|
|
72
|
-
|
|
92
|
+
// Judge against the ORIGINAL goal assertions, not the ones the re-discovery derived for itself —
|
|
93
|
+
// else a path that reaches a different end-state passes as green (P2 false green).
|
|
94
|
+
const verdict = await critic.judge(evidence, scenario.assertions, ctx);
|
|
73
95
|
return {
|
|
74
96
|
result: { scenario: repaired.name, context: ctx, evidence, verdict },
|
|
75
97
|
heals,
|
|
76
|
-
|
|
98
|
+
stepHeals,
|
|
99
|
+
healedScenario: { ...repaired, assertions: scenario.assertions },
|
|
77
100
|
};
|
|
78
101
|
}
|
|
79
|
-
|
|
102
|
+
const rewritten = applyStepHeals(applyHeals(scenario, heals), stepHeals);
|
|
103
|
+
return {
|
|
104
|
+
result,
|
|
105
|
+
heals,
|
|
106
|
+
stepHeals,
|
|
107
|
+
healedScenario: heals.length || stepHeals.length ? rewritten : undefined,
|
|
108
|
+
};
|
|
80
109
|
}
|
|
81
110
|
//# sourceMappingURL=run.js.map
|
package/dist/run.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"run.js","sourceRoot":"","sources":["../src/run.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AACH,OAAO,EAAE,UAAU,EAAE,MAAM,oBAAoB,CAAC;AAChD,OAAO,EAAE,QAAQ,EAAE,MAAM,oBAAoB,CAAC;AAE9C,OAAO,EAAE,qBAAqB,EAAE,MAAM,8BAA8B,CAAC;AACrE,OAAO,EAAE,aAAa,EAAE,MAAM,+BAA+B,CAAC;AAC9D,OAAO,EAAE,eAAe,EAAE,MAAM,iCAAiC,CAAC;AAElE,OAAO,EAAE,SAAS,EAAE,MAAM,2BAA2B,CAAC;AACtD,OAAO,EAAE,oBAAoB,EAAE,MAAM,8BAA8B,CAAC;AACpE,OAAO,EAAE,iBAAiB,EAAE,MAAM,iCAAiC,CAAC;AACpE,OAAO,EAAE,eAAe,EAAE,MAAM,iCAAiC,CAAC;AAClE,OAAO,EAAE,eAAe,EAAE,MAAM,2BAA2B,CAAC;
|
|
1
|
+
{"version":3,"file":"run.js","sourceRoot":"","sources":["../src/run.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AACH,OAAO,EAAE,UAAU,EAAE,MAAM,oBAAoB,CAAC;AAChD,OAAO,EAAE,QAAQ,EAAE,MAAM,oBAAoB,CAAC;AAE9C,OAAO,EAAE,qBAAqB,EAAE,MAAM,8BAA8B,CAAC;AACrE,OAAO,EAAE,aAAa,EAAE,MAAM,+BAA+B,CAAC;AAC9D,OAAO,EAAE,eAAe,EAAE,MAAM,iCAAiC,CAAC;AAElE,OAAO,EAAE,SAAS,EAAE,MAAM,2BAA2B,CAAC;AACtD,OAAO,EAAE,oBAAoB,EAAE,MAAM,8BAA8B,CAAC;AACpE,OAAO,EAAE,iBAAiB,EAAE,MAAM,iCAAiC,CAAC;AACpE,OAAO,EAAE,eAAe,EAAE,MAAM,iCAAiC,CAAC;AAClE,OAAO,EAAE,eAAe,EAAE,MAAM,2BAA2B,CAAC;AAC5D,OAAO,EAAE,aAAa,EAAE,MAAM,qBAAqB,CAAC;AA8CpD,MAAM,UAAU,cAAc,CAAC,QAAkB;IAC/C,OAAO,QAAQ,CAAC,UAAU,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,KAAK,QAAQ,CAAC,CAAC;AAC9D,CAAC;AAED,wGAAwG;AACxG,SAAS,YAAY,CAAC,QAAkB;IACtC,MAAM,KAAK,GAAG,QAAQ,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;IAChC,OAAO,KAAK,IAAI,KAAK,CAAC,IAAI,KAAK,MAAM,CAAC,CAAC,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,SAAS,CAAC;AAChE,CAAC;AAED;;uFAEuF;AACvF,MAAM,UAAU,UAAU,CAAC,QAAkB,EAAE,KAAa;IAC1D,IAAI,CAAC,KAAK,CAAC,MAAM;QAAE,OAAO,QAAQ,CAAC;IACnC,MAAM,UAAU,GAAG,IAAI,GAAG,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,QAAQ,EAAE,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC;IACrE,OAAO;QACL,GAAG,QAAQ;QACX,KAAK,EAAE,QAAQ,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE;YACjC,IAAI,QAAQ,IAAI,IAAI,EAAE,CAAC;gBACrB,MAAM,MAAM,GAAG,UAAU,CAAC,GAAG,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;gBAC3C,IAAI,MAAM;oBAAE,OAAO,EAAE,GAAG,IAAI,EAAE,MAAM,EAAE,MAAM,EAAE,CAAC;YACjD,CAAC;YACD,OAAO,IAAI,CAAC;QACd,CAAC,CAAC;KACH,CAAC;AACJ,CAAC;AAED,oGAAoG;AACpG,MAAM,UAAU,cAAc,CAAC,QAAkB,EAAE,KAAiB;IAClE,IAAI,CAAC,KAAK,CAAC,MAAM;QAAE,OAAO,QAAQ,CAAC;IACnC,MAAM,OAAO,GAAG,IAAI,GAAG,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,KAAK,EAAE,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAC7D,OAAO,EAAE,GAAG,QAAQ,EAAE,KAAK,EAAE,QAAQ,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,CAAC,EAAE,EAAE,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,IAAI,IAAI,CAAC,EAAE,CAAC;AACzF,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,WAAW,CAC/B,QAAkB,EAClB,OAA2B,EAAE;IAE7B,uEAAuE;IACvE,IAAI,QAAQ,GAAG,IAAI,CAAC,GAAG,CAAC;IACxB,MAAM,MAAM,GAAG,GAAc,EAAE,CAAC,CAAC,QAAQ,KAAK,eAAe,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE,KAAK,EAAE,IAAI,CAAC,KAAK,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC;IAExG,MAAM,MAAM,GACV,IAAI,CAAC,MAAM;QACX,CAAC,cAAc,CAAC,QAAQ,CAAC;YACvB,CAAC,CAAC,IAAI,SAAS,CAAC,MAAM,EAAE,EAAE,IAAI,CAAC,MAAM,EAAE,IAAI,CAAC,MAAM,CAAC;YACnD,CAAC,CAAC,IAAI,eAAe,CAAC,IAAI,CAAC,MAAM,EAAE,IAAI,CAAC,MAAM,CAAC,CAAC,CAAC;IAErD,MAAM,UAAU,GAAG,IAAI,CAAC,MAAM,IAAI,IAAI,oBAAoB,EAAE,CAAC;IAC7D,IAAI,MAAqC,CAAC;IAC1C,MAAM,MAAM,GAAG,IAAI,CAAC,IAAI;QACtB,CAAC,CAAC,CAAC,MAAM,GAAG,IAAI,iBAAiB,CAAC,UAAU,EAAE,MAAM,EAAE,EAAE,EAAE,MAAM,EAAE,IAAI,CAAC,MAAM,EAAE,CAAC,CAAC;QACjF,CAAC,CAAC,UAAU,CAAC;IACf,MAAM,UAAU,GAAG,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC,IAAI,aAAa,CAAC,MAAM,EAAE,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC;IAEvE,MAAM,MAAM,GAAG,MAAM,UAAU,CAC7B;QACE,OAAO,EAAE,IAAI,CAAC,OAAO,IAAI,IAAI,qBAAqB,EAAE;QACpD,OAAO,EAAE,IAAI,aAAa,CAAC,QAAQ,CAAC;QACpC,MAAM;QACN,MAAM;QACN,QAAQ,EAAE,IAAI,CAAC,QAAQ,IAAI,IAAI,eAAe,EAAE;KACjD,EACD,QAAQ,CAAC,IAAI,EACb;QACE,MAAM,EAAE,IAAI,CAAC,MAAM;QACnB,MAAM,EAAE,IAAI,CAAC,MAAM;QACnB,kBAAkB,EAAE,IAAI,CAAC,WAAW;QACpC,OAAO,EAAE,IAAI,CAAC,OAAO;QACrB,UAAU;KACX,CACF,CAAC;IAEF,MAAM,KAAK,GAAG,MAAM,EAAE,KAAK,IAAI,EAAE,CAAC;IAClC,MAAM,SAAS,GAAG,UAAU,EAAE,KAAK,IAAI,EAAE,CAAC;IAE1C,mGAAmG;IACnG,mGAAmG;IACnG,iFAAiF;IACjF,IAAI,IAAI,CAAC,IAAI,IAAI,CAAC,MAAM,CAAC,OAAO,CAAC,MAAM,EAAE,CAAC;QACxC,MAAM,QAAQ,GAAG,MAAM,QAAQ,CAAC,QAAQ,CAAC,IAAI,EAAE;YAC7C,MAAM,EAAE,UAAU;YAClB,GAAG,EAAE,MAAM,EAAE;YACb,OAAO,EAAE,YAAY,CAAC,QAAQ,CAAC;YAC/B,MAAM,EAAE,IAAI,CAAC,MAAM;SACpB,CAAC,CAAC;QACH,MAAM,GAAG,GAAG,MAAM,CAAC,IAAI,CAAC,OAAO,IAAI,IAAI,qBAAqB,EAAE,CAAC,CAAC,OAAO,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC;QACvF,MAAM,QAAQ,GAAG,MAAM,UAAU,CAAC,OAAO,EAAE,CAAC;QAC5C,iGAAiG;QACjG,mFAAmF;QACnF,MAAM,OAAO,GAAG,MAAM,MAAM,CAAC,KAAK,CAAC,QAAQ,EAAE,QAAQ,CAAC,UAAU,EAAE,GAAG,CAAC,CAAC;QACvE,OAAO;YACL,MAAM,EAAE,EAAE,QAAQ,EAAE,QAAQ,CAAC,IAAI,EAAE,OAAO,EAAE,GAAG,EAAE,QAAQ,EAAE,OAAO,EAAE;YACpE,KAAK;YACL,SAAS;YACT,cAAc,EAAE,EAAE,GAAG,QAAQ,EAAE,UAAU,EAAE,QAAQ,CAAC,UAAU,EAAE;SACjE,CAAC;IACJ,CAAC;IAED,MAAM,SAAS,GAAG,cAAc,CAAC,UAAU,CAAC,QAAQ,EAAE,KAAK,CAAC,EAAE,SAAS,CAAC,CAAC;IACzE,OAAO;QACL,MAAM;QACN,KAAK;QACL,SAAS;QACT,cAAc,EAAE,KAAK,CAAC,MAAM,IAAI,SAAS,CAAC,MAAM,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,SAAS;KACzE,CAAC;AACJ,CAAC"}
|