cairn-engine 1.0.0 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +8 -0
- package/dist/adapters/context/inline.d.ts +0 -2
- package/dist/adapters/context/inline.js +1 -5
- package/dist/adapters/context/inline.js.map +1 -1
- package/dist/adapters/critics/assertion.d.ts +19 -5
- package/dist/adapters/critics/assertion.js +42 -14
- package/dist/adapters/critics/assertion.js.map +1 -1
- package/dist/adapters/critics/llm.d.ts +11 -6
- package/dist/adapters/critics/llm.js +33 -14
- package/dist/adapters/critics/llm.js.map +1 -1
- package/dist/adapters/planners/static.d.ts +1 -1
- package/dist/adapters/planners/static.js +4 -2
- package/dist/adapters/planners/static.js.map +1 -1
- package/dist/browser.d.ts +26 -0
- package/dist/browser.js +23 -0
- package/dist/browser.js.map +1 -0
- package/dist/core/pipeline.d.ts +4 -3
- package/dist/core/pipeline.js +10 -40
- package/dist/core/pipeline.js.map +1 -1
- package/dist/core/ports.d.ts +24 -3
- package/dist/core/steps.d.ts +22 -0
- package/dist/core/steps.js +57 -0
- package/dist/core/steps.js.map +1 -0
- package/dist/core/types.d.ts +0 -2
- package/dist/index.d.ts +4 -3
- package/dist/index.js +3 -2
- package/dist/index.js.map +1 -1
- package/dist/run.d.ts +1 -1
- package/package.json +9 -2
package/README.md
CHANGED
|
@@ -55,6 +55,14 @@ Every layer is replaceable: bring your own `Driver` (e.g. Playwright), `Critic`,
|
|
|
55
55
|
assertions / `actions` for what doesn't fit the built-ins. Nothing forces your product
|
|
56
56
|
through only what we decided.
|
|
57
57
|
|
|
58
|
+
**Browser or extension (no Node)?** `runScenario` and the default Chrome DevTools MCP driver
|
|
59
|
+
need Node. Import the browser-safe core from `cairn-engine/browser` and compose `runHarness`
|
|
60
|
+
with your own `Driver` (e.g. one over `chrome.debugger`) plus a fetch-based `LlmClient`:
|
|
61
|
+
|
|
62
|
+
```ts
|
|
63
|
+
import { runHarness, StaticPlanner, AssertionCritic, AnthropicLlmClient } from "cairn-engine/browser";
|
|
64
|
+
```
|
|
65
|
+
|
|
58
66
|
No API key needed if you have **Claude Code** installed (cairn shells out to it); set
|
|
59
67
|
`ANTHROPIC_API_KEY` to use the Anthropic API instead.
|
|
60
68
|
|
|
@@ -2,7 +2,5 @@
|
|
|
2
2
|
import type { ContextProvider } from "../../core/ports.js";
|
|
3
3
|
import type { Context } from "../../core/types.js";
|
|
4
4
|
export declare class InlineContextProvider implements ContextProvider {
|
|
5
|
-
private readonly baseUrl?;
|
|
6
|
-
constructor(baseUrl?: string | undefined);
|
|
7
5
|
provide(task: string): Promise<Context>;
|
|
8
6
|
}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"inline.js","sourceRoot":"","sources":["../../../src/adapters/context/inline.ts"],"names":[],"mappings":"AAIA,MAAM,OAAO,qBAAqB;
|
|
1
|
+
{"version":3,"file":"inline.js","sourceRoot":"","sources":["../../../src/adapters/context/inline.ts"],"names":[],"mappings":"AAIA,MAAM,OAAO,qBAAqB;IAChC,KAAK,CAAC,OAAO,CAAC,IAAY;QACxB,OAAO,EAAE,MAAM,EAAE,IAAI,EAAE,CAAC;IAC1B,CAAC;CACF"}
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
/** Deterministic Critic for the replay path — checks assertions against evidence, no LLM (invariant #4). */
|
|
2
|
-
import type { Critic } from "../../core/ports.js";
|
|
3
|
-
import type { Assertion, AssertionResult, Evidence, Verdict } from "../../core/types.js";
|
|
2
|
+
import type { AssertionHandler, Critic } from "../../core/ports.js";
|
|
3
|
+
import type { Assertion, AssertionResult, Context, Evidence, Verdict } from "../../core/types.js";
|
|
4
4
|
/** A product-defined check for a `{ kind: "custom", name }` assertion — the host decides what success means. */
|
|
5
5
|
export type CustomCheck = (params: Record<string, unknown>, evidence: Evidence) => boolean | {
|
|
6
6
|
passed: boolean;
|
|
@@ -10,12 +10,26 @@ export type CustomCheck = (params: Record<string, unknown>, evidence: Evidence)
|
|
|
10
10
|
detail?: string;
|
|
11
11
|
}>;
|
|
12
12
|
export type CustomChecks = Record<string, CustomCheck>;
|
|
13
|
-
/** Resolve any assertion — a registered `custom` handler, else the built-in mechanical check. */
|
|
14
|
-
export declare function resolveAssertion(assertion: Assertion, evidence: Evidence, custom?: CustomChecks): Promise<AssertionResult>;
|
|
15
13
|
/** Evaluate one mechanical assertion. `expect` is not mechanical — returns unsupported (LlmCritic handles it). */
|
|
16
14
|
export declare function checkAssertion(assertion: Assertion, evidence: Evidence): AssertionResult;
|
|
17
|
-
|
|
15
|
+
/** Built-in mechanical checks — every kind except product `custom` (`expect` yields its LlmCritic hint). */
|
|
16
|
+
export declare class MechanicalAssertionHandler implements AssertionHandler {
|
|
17
|
+
supports(assertion: Assertion): boolean;
|
|
18
|
+
judge(assertion: Assertion, evidence: Evidence): AssertionResult;
|
|
19
|
+
}
|
|
20
|
+
/** Product-defined `{ kind: "custom", name }` checks via a name→check registry. */
|
|
21
|
+
export declare class CustomAssertionHandler implements AssertionHandler {
|
|
18
22
|
private readonly custom;
|
|
23
|
+
constructor(custom?: CustomChecks);
|
|
24
|
+
supports(assertion: Assertion): boolean;
|
|
25
|
+
judge(assertion: Assertion, evidence: Evidence): Promise<AssertionResult>;
|
|
26
|
+
}
|
|
27
|
+
/** Route one assertion to the first handler that supports it (mirror of the Execute-stage step dispatch). */
|
|
28
|
+
export declare function judgeAssertion(handlers: AssertionHandler[], assertion: Assertion, evidence: Evidence, ctx?: Context): Promise<AssertionResult>;
|
|
29
|
+
/** Resolve any assertion — a registered `custom` handler, else the built-in mechanical check. */
|
|
30
|
+
export declare function resolveAssertion(assertion: Assertion, evidence: Evidence, custom?: CustomChecks): Promise<AssertionResult>;
|
|
31
|
+
export declare class AssertionCritic implements Critic {
|
|
32
|
+
private readonly handlers;
|
|
19
33
|
/** @param custom product-defined checks for `custom` assertions, keyed by name. */
|
|
20
34
|
constructor(custom?: CustomChecks);
|
|
21
35
|
judge(evidence: Evidence, assertions: Assertion[]): Promise<Verdict>;
|
|
@@ -1,14 +1,3 @@
|
|
|
1
|
-
/** Resolve any assertion — a registered `custom` handler, else the built-in mechanical check. */
|
|
2
|
-
export async function resolveAssertion(assertion, evidence, custom = {}) {
|
|
3
|
-
if (assertion.kind === "custom") {
|
|
4
|
-
const handler = custom[assertion.name];
|
|
5
|
-
if (!handler)
|
|
6
|
-
return { assertion, passed: false, detail: `no custom check registered for "${assertion.name}"` };
|
|
7
|
-
const r = await handler(assertion.params ?? {}, evidence);
|
|
8
|
-
return typeof r === "boolean" ? { assertion, passed: r } : { assertion, passed: r.passed, detail: r.detail };
|
|
9
|
-
}
|
|
10
|
-
return checkAssertion(assertion, evidence);
|
|
11
|
-
}
|
|
12
1
|
/** Requests whose failure is noise, not a regression — excluded from `no-failed-requests`. */
|
|
13
2
|
function isBenignRequest(url) {
|
|
14
3
|
return /\/favicon\.ico(\?|$)/i.test(url) || /\/robots\.txt(\?|$)/i.test(url);
|
|
@@ -52,14 +41,53 @@ export function checkAssertion(assertion, evidence) {
|
|
|
52
41
|
return { assertion, passed: false, detail: `custom check "${assertion.name}" needs a registered handler` };
|
|
53
42
|
}
|
|
54
43
|
}
|
|
55
|
-
|
|
44
|
+
/** Built-in mechanical checks — every kind except product `custom` (`expect` yields its LlmCritic hint). */
|
|
45
|
+
export class MechanicalAssertionHandler {
|
|
46
|
+
supports(assertion) {
|
|
47
|
+
return assertion.kind !== "custom";
|
|
48
|
+
}
|
|
49
|
+
judge(assertion, evidence) {
|
|
50
|
+
return checkAssertion(assertion, evidence);
|
|
51
|
+
}
|
|
52
|
+
}
|
|
53
|
+
/** Product-defined `{ kind: "custom", name }` checks via a name→check registry. */
|
|
54
|
+
export class CustomAssertionHandler {
|
|
56
55
|
custom;
|
|
57
|
-
/** @param custom product-defined checks for `custom` assertions, keyed by name. */
|
|
58
56
|
constructor(custom = {}) {
|
|
59
57
|
this.custom = custom;
|
|
60
58
|
}
|
|
59
|
+
supports(assertion) {
|
|
60
|
+
return assertion.kind === "custom";
|
|
61
|
+
}
|
|
62
|
+
async judge(assertion, evidence) {
|
|
63
|
+
if (assertion.kind !== "custom")
|
|
64
|
+
throw new Error(`custom handler received "${assertion.kind}" assertion`);
|
|
65
|
+
const check = this.custom[assertion.name];
|
|
66
|
+
if (!check)
|
|
67
|
+
return { assertion, passed: false, detail: `no custom check registered for "${assertion.name}"` };
|
|
68
|
+
const r = await check(assertion.params ?? {}, evidence);
|
|
69
|
+
return typeof r === "boolean" ? { assertion, passed: r } : { assertion, passed: r.passed, detail: r.detail };
|
|
70
|
+
}
|
|
71
|
+
}
|
|
72
|
+
/** Route one assertion to the first handler that supports it (mirror of the Execute-stage step dispatch). */
|
|
73
|
+
export async function judgeAssertion(handlers, assertion, evidence, ctx) {
|
|
74
|
+
const handler = handlers.find((h) => h.supports(assertion));
|
|
75
|
+
if (!handler)
|
|
76
|
+
return { assertion, passed: false, detail: `no critic handles "${assertion.kind}"` };
|
|
77
|
+
return handler.judge(assertion, evidence, ctx);
|
|
78
|
+
}
|
|
79
|
+
/** Resolve any assertion — a registered `custom` handler, else the built-in mechanical check. */
|
|
80
|
+
export function resolveAssertion(assertion, evidence, custom = {}) {
|
|
81
|
+
return judgeAssertion([new MechanicalAssertionHandler(), new CustomAssertionHandler(custom)], assertion, evidence);
|
|
82
|
+
}
|
|
83
|
+
export class AssertionCritic {
|
|
84
|
+
handlers;
|
|
85
|
+
/** @param custom product-defined checks for `custom` assertions, keyed by name. */
|
|
86
|
+
constructor(custom = {}) {
|
|
87
|
+
this.handlers = [new MechanicalAssertionHandler(), new CustomAssertionHandler(custom)];
|
|
88
|
+
}
|
|
61
89
|
async judge(evidence, assertions) {
|
|
62
|
-
const results = await Promise.all(assertions.map((a) =>
|
|
90
|
+
const results = await Promise.all(assertions.map((a) => judgeAssertion(this.handlers, a, evidence)));
|
|
63
91
|
return { passed: results.every((r) => r.passed), results };
|
|
64
92
|
}
|
|
65
93
|
}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"assertion.js","sourceRoot":"","sources":["../../../src/adapters/critics/assertion.ts"],"names":[],"mappings":"AAYA,
|
|
1
|
+
{"version":3,"file":"assertion.js","sourceRoot":"","sources":["../../../src/adapters/critics/assertion.ts"],"names":[],"mappings":"AAYA,8FAA8F;AAC9F,SAAS,eAAe,CAAC,GAAW;IAClC,OAAO,uBAAuB,CAAC,IAAI,CAAC,GAAG,CAAC,IAAI,sBAAsB,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;AAC/E,CAAC;AAED,kHAAkH;AAClH,MAAM,UAAU,cAAc,CAAC,SAAoB,EAAE,QAAkB;IACrE,QAAQ,SAAS,CAAC,IAAI,EAAE,CAAC;QACvB,KAAK,WAAW,CAAC,CAAC,CAAC;YACjB,MAAM,EAAE,SAAS,EAAE,QAAQ,EAAE,GAAG,QAAQ,CAAC,SAAS,CAAC;YACnD,IAAI,CAAC,SAAS;gBAAE,OAAO,EAAE,SAAS,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,EAAE,wBAAwB,EAAE,CAAC;YACtF,IAAI,SAAS,CAAC,EAAE,IAAI,CAAC,CAAC,QAAQ,IAAI,EAAE,CAAC,CAAC,QAAQ,CAAC,SAAS,CAAC,EAAE,CAAC,EAAE,CAAC;gBAC7D,OAAO,EAAE,SAAS,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,EAAE,aAAa,QAAQ,qBAAqB,SAAS,CAAC,EAAE,EAAE,EAAE,CAAC;YACxG,CAAC;YACD,OAAO,EAAE,SAAS,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,EAAE,QAAQ,EAAE,CAAC;QACvD,CAAC;QACD,KAAK,mBAAmB,CAAC,CAAC,CAAC;YACzB,MAAM,MAAM,GAAG,QAAQ,CAAC,KAAK,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,KAAK,OAAO,CAAC,CAAC;YACxE,OAAO,MAAM,CAAC,MAAM,KAAK,CAAC;gBACxB,CAAC,CAAC,EAAE,SAAS,EAAE,MAAM,EAAE,IAAI,EAAE;gBAC7B,CAAC,CAAC,EAAE,SAAS,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,EAAE,GAAG,MAAM,CAAC,MAAM,sBAAsB,MAAM,CAAC,CAAC,CAAC,EAAE,IAAI,EAAE,EAAE,CAAC;QACpG,CAAC;QACD,KAAK,oBAAoB,CAAC,CAAC,CAAC;YAC1B,sFAAsF;YACtF,MAAM,MAAM,GAAG,QAAQ,CAAC,KAAK,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,IAAI,GAAG,IAAI,CAAC,eAAe,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC;YACjG,OAAO,MAAM,CAAC,MAAM,KAAK,CAAC;gBACxB,CAAC,CAAC,EAAE,SAAS,EAAE,MAAM,EAAE,IAAI,EAAE;gBAC7B,CAAC,CAAC,EAAE,SAAS,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,EAAE,GAAG,MAAM,CAAC,MAAM,uBAAuB,MAAM,CAAC,CAAC,CAAC,EAAE,MAAM,IAAI,MAAM,CAAC,CAAC,CAAC,EAAE,GAAG,EAAE,EAAE,CAAC;QACzH,CAAC;QACD,KAAK,gBAAgB,CAAC,CAAC,CAAC;YACtB,MAAM,KAAK,GAAG,QAAQ,CAAC,KAAK,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,GAAG,CAAC,QAAQ,CAAC,SAAS,CAAC,WAAW,CAAC,CAAC,CAAC;YACzF,IAAI,CAAC,KAAK;gBAAE,OAAO,EAAE,SAAS,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,EAAE,uBAAuB,SAAS,CAAC,WAAW,EAAE,EAAE,CAAC;YACxG,OAAO,KAAK,CAAC,MAAM,KAAK,SAAS,CAAC,MAAM;gBACtC,CAAC,CAAC,EAAE,SAAS,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,EAAE,GAAG,KAAK,CAAC,MAAM,IAAI,KAAK,CAAC,GAAG,EAAE,EAAE;gBACrE,CAAC,CAAC,EAAE,SAAS,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,EAAE,YAAY,SAAS,CAAC,MAAM,SAAS,KAAK,CAAC,MAAM,QAAQ,KAAK,CAAC,GAAG,EAAE,EAAE,CAAC;QACjH,CAAC;QACD,KAAK,QAAQ;YACX,OAAO,EAAE,SAAS,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,EAAE,+DAA+D,EAAE,CAAC;QAC/G,KAAK,QAAQ;YACX,OAAO,EAAE,SAAS,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,EAAE,iBAAiB,SAAS,CAAC,IAAI,8BAA8B,EAAE,CAAC;IAC/G,CAAC;AACH,CAAC;AAED,4GAA4G;AAC5G,MAAM,OAAO,0BAA0B;IACrC,QAAQ,CAAC,SAAoB;QAC3B,OAAO,SAAS,CAAC,IAAI,KAAK,QAAQ,CAAC;IACrC,CAAC;IAED,KAAK,CAAC,SAAoB,EAAE,QAAkB;QAC5C,OAAO,cAAc,CAAC,SAAS,EAAE,QAAQ,CAAC,CAAC;IAC7C,CAAC;CACF;AAED,mFAAmF;AACnF,MAAM,OAAO,sBAAsB;IACJ;IAA7B,YAA6B,SAAuB,EAAE;QAAzB,WAAM,GAAN,MAAM,CAAmB;IAAG,CAAC;IAE1D,QAAQ,CAAC,SAAoB;QAC3B,OAAO,SAAS,CAAC,IAAI,KAAK,QAAQ,CAAC;IACrC,CAAC;IAED,KAAK,CAAC,KAAK,CAAC,SAAoB,EAAE,QAAkB;QAClD,IAAI,SAAS,CAAC,IAAI,KAAK,QAAQ;YAAE,MAAM,IAAI,KAAK,CAAC,4BAA4B,SAAS,CAAC,IAAI,aAAa,CAAC,CAAC;QAC1G,MAAM,KAAK,GAAG,IAAI,CAAC,MAAM,CAAC,SAAS,CAAC,IAAI,CAAC,CAAC;QAC1C,IAAI,CAAC,KAAK;YAAE,OAAO,EAAE,SAAS,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,EAAE,mCAAmC,SAAS,CAAC,IAAI,GAAG,EAAE,CAAC;QAC9G,MAAM,CAAC,GAAG,MAAM,KAAK,CAAC,SAAS,CAAC,MAAM,IAAI,EAAE,EAAE,QAAQ,CAAC,CAAC;QACxD,OAAO,OAAO,CAAC,KAAK,SAAS,CAAC,CAAC,CAAC,EAAE,SAAS,EAAE,MAAM,EAAE,CAAC,EAAE,CAAC,CAAC,CAAC,EAAE,SAAS,EAAE,MAAM,EAAE,CAAC,CAAC,MAAM,EAAE,MAAM,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC;IAC/G,CAAC;CACF;AAED,6GAA6G;AAC7G,MAAM,CAAC,KAAK,UAAU,cAAc,CAClC,QAA4B,EAC5B,SAAoB,EACpB,QAAkB,EAClB,GAAa;IAEb,MAAM,OAAO,GAAG,QAAQ,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,QAAQ,CAAC,SAAS,CAAC,CAAC,CAAC;IAC5D,IAAI,CAAC,OAAO;QAAE,OAAO,EAAE,SAAS,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,EAAE,sBAAsB,SAAS,CAAC,IAAI,GAAG,EAAE,CAAC;IACnG,OAAO,OAAO,CAAC,KAAK,CAAC,SAAS,EAAE,QAAQ,EAAE,GAAG,CAAC,CAAC;AACjD,CAAC;AAED,iGAAiG;AACjG,MAAM,UAAU,gBAAgB,CAC9B,SAAoB,EACpB,QAAkB,EAClB,SAAuB,EAAE;IAEzB,OAAO,cAAc,CAAC,CAAC,IAAI,0BAA0B,EAAE,EAAE,IAAI,sBAAsB,CAAC,MAAM,CAAC,CAAC,EAAE,SAAS,EAAE,QAAQ,CAAC,CAAC;AACrH,CAAC;AAED,MAAM,OAAO,eAAe;IACT,QAAQ,CAAqB;IAE9C,mFAAmF;IACnF,YAAY,SAAuB,EAAE;QACnC,IAAI,CAAC,QAAQ,GAAG,CAAC,IAAI,0BAA0B,EAAE,EAAE,IAAI,sBAAsB,CAAC,MAAM,CAAC,CAAC,CAAC;IACzF,CAAC;IAED,KAAK,CAAC,KAAK,CAAC,QAAkB,EAAE,UAAuB;QACrD,MAAM,OAAO,GAAG,MAAM,OAAO,CAAC,GAAG,CAAC,UAAU,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,cAAc,CAAC,IAAI,CAAC,QAAQ,EAAE,CAAC,EAAE,QAAQ,CAAC,CAAC,CAAC,CAAC;QACrG,OAAO,EAAE,MAAM,EAAE,OAAO,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,CAAC,EAAE,OAAO,EAAE,CAAC;IAC7D,CAAC;CACF"}
|
|
@@ -1,12 +1,17 @@
|
|
|
1
1
|
import type { CustomChecks } from "./assertion.js";
|
|
2
|
-
import type { Critic, LlmClient } from "../../core/ports.js";
|
|
3
|
-
import type { Assertion, Evidence, Verdict } from "../../core/types.js";
|
|
2
|
+
import type { AssertionHandler, Critic, LlmClient } from "../../core/ports.js";
|
|
3
|
+
import type { Assertion, AssertionResult, Context, Evidence, Verdict } from "../../core/types.js";
|
|
4
4
|
/** Compact, judge-friendly rendering of the three evidence layers. */
|
|
5
5
|
export declare function summarizeEvidence(evidence: Evidence): string;
|
|
6
|
-
|
|
6
|
+
/** Judges natural-language `expect` criteria with an LLM, grounded in the evidence and task intent. */
|
|
7
|
+
export declare class ExpectAssertionHandler implements AssertionHandler {
|
|
7
8
|
private readonly llm;
|
|
8
|
-
|
|
9
|
+
constructor(llm: LlmClient);
|
|
10
|
+
supports(assertion: Assertion): boolean;
|
|
11
|
+
judge(assertion: Assertion, evidence: Evidence, ctx?: Context): Promise<AssertionResult>;
|
|
12
|
+
}
|
|
13
|
+
export declare class LlmCritic implements Critic {
|
|
14
|
+
private readonly handlers;
|
|
9
15
|
constructor(llm: LlmClient, custom?: CustomChecks);
|
|
10
|
-
|
|
11
|
-
judge(evidence: Evidence, assertions: Assertion[]): Promise<Verdict>;
|
|
16
|
+
judge(evidence: Evidence, assertions: Assertion[], ctx?: Context): Promise<Verdict>;
|
|
12
17
|
}
|
|
@@ -2,9 +2,10 @@
|
|
|
2
2
|
* Critic that judges natural-language `expect` criteria with an LLM and delegates mechanical
|
|
3
3
|
* assertions to the deterministic checker. The LLM runs ONLY for `expect`, so a scenario with
|
|
4
4
|
* none makes zero LLM calls and stays deterministic (invariant #4). Judgment is grounded in the
|
|
5
|
-
* three-layer evidence (design §6),
|
|
5
|
+
* three-layer evidence (design §6) and, when a ContextProvider supplied one, the task intent —
|
|
6
|
+
* behind the LlmClient seam (invariant #5).
|
|
6
7
|
*/
|
|
7
|
-
import {
|
|
8
|
+
import { CustomAssertionHandler, MechanicalAssertionHandler, judgeAssertion } from "./assertion.js";
|
|
8
9
|
const SYSTEM = "You are a QA critic. Given observed evidence from a browser run and a success " +
|
|
9
10
|
"criterion, decide whether the criterion is satisfied. Judge only from the evidence; " +
|
|
10
11
|
'do not assume. Respond with strict JSON, no prose, no code fences: {"passed":true|false,"detail":"<short reason>"}.';
|
|
@@ -35,22 +36,30 @@ function parseVerdict(text) {
|
|
|
35
36
|
const obj = JSON.parse(s.slice(start, end + 1));
|
|
36
37
|
return { passed: obj.passed === true, detail: typeof obj.detail === "string" ? obj.detail : undefined };
|
|
37
38
|
}
|
|
38
|
-
|
|
39
|
+
/** Judges natural-language `expect` criteria with an LLM, grounded in the evidence and task intent. */
|
|
40
|
+
export class ExpectAssertionHandler {
|
|
39
41
|
llm;
|
|
40
|
-
|
|
41
|
-
constructor(llm, custom = {}) {
|
|
42
|
+
constructor(llm) {
|
|
42
43
|
this.llm = llm;
|
|
43
|
-
this.custom = custom;
|
|
44
44
|
}
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
45
|
+
supports(assertion) {
|
|
46
|
+
return assertion.kind === "expect";
|
|
47
|
+
}
|
|
48
|
+
async judge(assertion, evidence, ctx) {
|
|
49
|
+
if (assertion.kind !== "expect")
|
|
50
|
+
throw new Error(`expect handler received "${assertion.kind}" assertion`);
|
|
51
|
+
const lines = [
|
|
52
|
+
`Success criterion: ${assertion.criterion}`,
|
|
48
53
|
``,
|
|
49
54
|
`Evidence:`,
|
|
50
55
|
summarizeEvidence(evidence),
|
|
51
56
|
``,
|
|
52
57
|
`Is the criterion satisfied? Respond with JSON only.`,
|
|
53
|
-
]
|
|
58
|
+
];
|
|
59
|
+
// Ground the judgment in the run's intent when a ContextProvider supplied one.
|
|
60
|
+
if (ctx?.intent)
|
|
61
|
+
lines.unshift(`Task intent: ${ctx.intent}`, ``);
|
|
62
|
+
const prompt = lines.join("\n");
|
|
54
63
|
try {
|
|
55
64
|
const reply = await this.llm.complete(prompt, { system: SYSTEM });
|
|
56
65
|
const v = parseVerdict(reply);
|
|
@@ -60,10 +69,20 @@ export class LlmCritic {
|
|
|
60
69
|
return { assertion, passed: false, detail: `LLM judgment failed: ${err instanceof Error ? err.message : String(err)}` };
|
|
61
70
|
}
|
|
62
71
|
}
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
72
|
+
}
|
|
73
|
+
export class LlmCritic {
|
|
74
|
+
handlers;
|
|
75
|
+
constructor(llm, custom = {}) {
|
|
76
|
+
// `expect` → LLM (first, so it wins); everything else falls through to the same
|
|
77
|
+
// mechanical/custom handlers AssertionCritic uses. The two critics differ only here.
|
|
78
|
+
this.handlers = [
|
|
79
|
+
new ExpectAssertionHandler(llm),
|
|
80
|
+
new MechanicalAssertionHandler(),
|
|
81
|
+
new CustomAssertionHandler(custom),
|
|
82
|
+
];
|
|
83
|
+
}
|
|
84
|
+
async judge(evidence, assertions, ctx) {
|
|
85
|
+
const results = await Promise.all(assertions.map((a) => judgeAssertion(this.handlers, a, evidence, ctx)));
|
|
67
86
|
return { passed: results.every((r) => r.passed), results };
|
|
68
87
|
}
|
|
69
88
|
}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"llm.js","sourceRoot":"","sources":["../../../src/adapters/critics/llm.ts"],"names":[],"mappings":"AAAA
|
|
1
|
+
{"version":3,"file":"llm.js","sourceRoot":"","sources":["../../../src/adapters/critics/llm.ts"],"names":[],"mappings":"AAAA;;;;;;GAMG;AACH,OAAO,EAAE,sBAAsB,EAAE,0BAA0B,EAAE,cAAc,EAAE,MAAM,gBAAgB,CAAC;AAKpG,MAAM,MAAM,GACV,gFAAgF;IAChF,sFAAsF;IACtF,qHAAqH,CAAC;AAExH,sEAAsE;AACtE,MAAM,UAAU,iBAAiB,CAAC,QAAkB;IAClD,MAAM,EAAE,SAAS,EAAE,KAAK,EAAE,GAAG,QAAQ,CAAC;IACtC,MAAM,QAAQ,GAAG,KAAK,CAAC,QAAQ;SAC5B,KAAK,CAAC,CAAC,EAAE,EAAE,CAAC;SACZ,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,GAAG,CAAC,CAAC,MAAM,IAAI,CAAC,CAAC,MAAM,IAAI,CAAC,CAAC,GAAG,EAAE,CAAC;SAC9C,IAAI,CAAC,IAAI,CAAC,CAAC;IACd,MAAM,MAAM,GAAG,KAAK,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,KAAK,OAAO,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC;IAClF,OAAO;QACL,cAAc,SAAS,CAAC,SAAS,EAAE;QACnC,aAAa,SAAS,CAAC,QAAQ,IAAI,QAAQ,EAAE;QAC7C,YAAY,SAAS,CAAC,OAAO,EAAE;QAC/B,aAAa,KAAK,CAAC,QAAQ,CAAC,MAAM,IAAI;QACtC,QAAQ,IAAI,QAAQ;QACpB,mBAAmB,MAAM,CAAC,MAAM,IAAI;QACpC,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,IAAI,QAAQ;KAC9B,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;AACf,CAAC;AAED,SAAS,YAAY,CAAC,IAAY;IAChC,IAAI,CAAC,GAAG,IAAI,CAAC,IAAI,EAAE,CAAC,OAAO,CAAC,mBAAmB,EAAE,EAAE,CAAC,CAAC,OAAO,CAAC,UAAU,EAAE,EAAE,CAAC,CAAC;IAC7E,MAAM,KAAK,GAAG,CAAC,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC;IAC7B,MAAM,GAAG,GAAG,CAAC,CAAC,WAAW,CAAC,GAAG,CAAC,CAAC;IAC/B,IAAI,KAAK,KAAK,CAAC,CAAC,IAAI,GAAG,KAAK,CAAC,CAAC;QAAE,MAAM,IAAI,KAAK,CAAC,4BAA4B,IAAI,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC,EAAE,CAAC,CAAC;IAClG,MAAM,GAAG,GAAG,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,KAAK,EAAE,GAAG,GAAG,CAAC,CAAC,CAA2C,CAAC;IAC1F,OAAO,EAAE,MAAM,EAAE,GAAG,CAAC,MAAM,KAAK,IAAI,EAAE,MAAM,EAAE,OAAO,GAAG,CAAC,MAAM,KAAK,QAAQ,CAAC,CAAC,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC,CAAC,SAAS,EAAE,CAAC;AAC1G,CAAC;AAED,uGAAuG;AACvG,MAAM,OAAO,sBAAsB;IACJ;IAA7B,YAA6B,GAAc;QAAd,QAAG,GAAH,GAAG,CAAW;IAAG,CAAC;IAE/C,QAAQ,CAAC,SAAoB;QAC3B,OAAO,SAAS,CAAC,IAAI,KAAK,QAAQ,CAAC;IACrC,CAAC;IAED,KAAK,CAAC,KAAK,CAAC,SAAoB,EAAE,QAAkB,EAAE,GAAa;QACjE,IAAI,SAAS,CAAC,IAAI,KAAK,QAAQ;YAAE,MAAM,IAAI,KAAK,CAAC,4BAA4B,SAAS,CAAC,IAAI,aAAa,CAAC,CAAC;QAC1G,MAAM,KAAK,GAAG;YACZ,sBAAsB,SAAS,CAAC,SAAS,EAAE;YAC3C,EAAE;YACF,WAAW;YACX,iBAAiB,CAAC,QAAQ,CAAC;YAC3B,EAAE;YACF,qDAAqD;SACtD,CAAC;QACF,+EAA+E;QAC/E,IAAI,GAAG,EAAE,MAAM;YAAE,KAAK,CAAC,OAAO,CAAC,gBAAgB,GAAG,CAAC,MAAM,EAAE,EAAE,EAAE,CAAC,CAAC;QACjE,MAAM,MAAM,GAAG,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QAChC,IAAI,CAAC;YACH,MAAM,KAAK,GAAG,MAAM,IAAI,CAAC,GAAG,CAAC,QAAQ,CAAC,MAAM,EAAE,EAAE,MAAM,EAAE,MAAM,EAAE,CAAC,CAAC;YAClE,MAAM,CAAC,GAAG,YAAY,CAAC,KAAK,CAAC,CAAC;YAC9B,OAAO,EAAE,SAAS,EAAE,MAAM,EAAE,CAAC,CAAC,MAAM,EAAE,MAAM,EAAE,CAAC,CAAC,MAAM,IAAI,aAAa,IAAI,CAAC,GAAG,CAAC,EAAE,EAAE,EAAE,CAAC;QACzF,CAAC;QAAC,OAAO,GAAG,EAAE,CAAC;YACb,OAAO,EAAE,SAAS,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,EAAE,wBAAwB,GAAG,YAAY,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,GAAG,CAAC,EAAE,EAAE,CAAC;QAC1H,CAAC;IACH,CAAC;CACF;AAED,MAAM,OAAO,SAAS;IACH,QAAQ,CAAqB;IAE9C,YAAY,GAAc,EAAE,SAAuB,EAAE;QACnD,gFAAgF;QAChF,qFAAqF;QACrF,IAAI,CAAC,QAAQ,GAAG;YACd,IAAI,sBAAsB,CAAC,GAAG,CAAC;YAC/B,IAAI,0BAA0B,EAAE;YAChC,IAAI,sBAAsB,CAAC,MAAM,CAAC;SACnC,CAAC;IACJ,CAAC;IAED,KAAK,CAAC,KAAK,CAAC,QAAkB,EAAE,UAAuB,EAAE,GAAa;QACpE,MAAM,OAAO,GAAG,MAAM,OAAO,CAAC,GAAG,CAC/B,UAAU,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,cAAc,CAAC,IAAI,CAAC,QAAQ,EAAE,CAAC,EAAE,QAAQ,EAAE,GAAG,CAAC,CAAC,CACvE,CAAC;QACF,OAAO,EAAE,MAAM,EAAE,OAAO,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,CAAC,EAAE,OAAO,EAAE,CAAC;IAC7D,CAAC;CACF"}
|
|
@@ -3,8 +3,10 @@ export class StaticPlanner {
|
|
|
3
3
|
constructor(scenario) {
|
|
4
4
|
this.scenario = scenario;
|
|
5
5
|
}
|
|
6
|
-
async plan(
|
|
7
|
-
|
|
6
|
+
async plan(ctx) {
|
|
7
|
+
// A custom ContextProvider can relabel the run through intent; on the default
|
|
8
|
+
// replay path the task is scenario.name, so intent === name and this is a no-op.
|
|
9
|
+
return { ...this.scenario, name: ctx.intent || this.scenario.name };
|
|
8
10
|
}
|
|
9
11
|
}
|
|
10
12
|
//# sourceMappingURL=static.js.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"static.js","sourceRoot":"","sources":["../../../src/adapters/planners/static.ts"],"names":[],"mappings":"AAIA,MAAM,OAAO,aAAa;IACK;IAA7B,YAA6B,QAAkB;QAAlB,aAAQ,GAAR,QAAQ,CAAU;IAAG,CAAC;IAEnD,KAAK,CAAC,IAAI,CAAC,
|
|
1
|
+
{"version":3,"file":"static.js","sourceRoot":"","sources":["../../../src/adapters/planners/static.ts"],"names":[],"mappings":"AAIA,MAAM,OAAO,aAAa;IACK;IAA7B,YAA6B,QAAkB;QAAlB,aAAQ,GAAR,QAAQ,CAAU;IAAG,CAAC;IAEnD,KAAK,CAAC,IAAI,CAAC,GAAY;QACrB,8EAA8E;QAC9E,iFAAiF;QACjF,OAAO,EAAE,GAAG,IAAI,CAAC,QAAQ,EAAE,IAAI,EAAE,GAAG,CAAC,MAAM,IAAI,IAAI,CAAC,QAAQ,CAAC,IAAI,EAAE,CAAC;IACtE,CAAC;CACF"}
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Browser-safe surface of cairn-engine — the runtime-agnostic core plus the pure adapters,
|
|
3
|
+
* for environments without Node (a Chrome extension, a web app). Excludes the Node-only
|
|
4
|
+
* adapters (Chrome DevTools MCP driver, Claude CLI client, fs-based reporters / skill store)
|
|
5
|
+
* and `runScenario`, which statically wires them.
|
|
6
|
+
*
|
|
7
|
+
* Compose `runHarness` with your own `Driver` (e.g. a CDP/extension driver) and, for discover or
|
|
8
|
+
* `expect`, a fetch-based `LlmClient` (`AnthropicLlmClient`). Same core, different hands.
|
|
9
|
+
*/
|
|
10
|
+
export * from "./core/types.js";
|
|
11
|
+
export * from "./core/ports.js";
|
|
12
|
+
export { runHarness } from "./core/pipeline.js";
|
|
13
|
+
export type { RunHarnessOptions } from "./core/pipeline.js";
|
|
14
|
+
export { BuiltinStepHandler, CustomStepHandler, defaultStepHandlers } from "./core/steps.js";
|
|
15
|
+
export { InlineContextProvider } from "./adapters/context/inline.js";
|
|
16
|
+
export { StaticPlanner } from "./adapters/planners/static.js";
|
|
17
|
+
export { AssertionCritic, checkAssertion, resolveAssertion, judgeAssertion, MechanicalAssertionHandler, CustomAssertionHandler, } from "./adapters/critics/assertion.js";
|
|
18
|
+
export type { CustomCheck, CustomChecks } from "./adapters/critics/assertion.js";
|
|
19
|
+
export { LlmCritic, ExpectAssertionHandler, summarizeEvidence } from "./adapters/critics/llm.js";
|
|
20
|
+
export { ConsoleReporter } from "./adapters/reporters/console.js";
|
|
21
|
+
export { FakeDriver } from "./adapters/drivers/fake.js";
|
|
22
|
+
export { SelfHealingDriver, parseHealChoice } from "./adapters/drivers/self-heal.js";
|
|
23
|
+
export type { Heal, SelfHealOptions } from "./adapters/drivers/self-heal.js";
|
|
24
|
+
export { AnthropicLlmClient } from "./adapters/llm/anthropic.js";
|
|
25
|
+
export { discover, parseDecision } from "./core/discover.js";
|
|
26
|
+
export type { DiscoverOptions, Decision } from "./core/discover.js";
|
package/dist/browser.js
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Browser-safe surface of cairn-engine — the runtime-agnostic core plus the pure adapters,
|
|
3
|
+
* for environments without Node (a Chrome extension, a web app). Excludes the Node-only
|
|
4
|
+
* adapters (Chrome DevTools MCP driver, Claude CLI client, fs-based reporters / skill store)
|
|
5
|
+
* and `runScenario`, which statically wires them.
|
|
6
|
+
*
|
|
7
|
+
* Compose `runHarness` with your own `Driver` (e.g. a CDP/extension driver) and, for discover or
|
|
8
|
+
* `expect`, a fetch-based `LlmClient` (`AnthropicLlmClient`). Same core, different hands.
|
|
9
|
+
*/
|
|
10
|
+
export * from "./core/types.js";
|
|
11
|
+
export * from "./core/ports.js";
|
|
12
|
+
export { runHarness } from "./core/pipeline.js";
|
|
13
|
+
export { BuiltinStepHandler, CustomStepHandler, defaultStepHandlers } from "./core/steps.js";
|
|
14
|
+
export { InlineContextProvider } from "./adapters/context/inline.js";
|
|
15
|
+
export { StaticPlanner } from "./adapters/planners/static.js";
|
|
16
|
+
export { AssertionCritic, checkAssertion, resolveAssertion, judgeAssertion, MechanicalAssertionHandler, CustomAssertionHandler, } from "./adapters/critics/assertion.js";
|
|
17
|
+
export { LlmCritic, ExpectAssertionHandler, summarizeEvidence } from "./adapters/critics/llm.js";
|
|
18
|
+
export { ConsoleReporter } from "./adapters/reporters/console.js";
|
|
19
|
+
export { FakeDriver } from "./adapters/drivers/fake.js";
|
|
20
|
+
export { SelfHealingDriver, parseHealChoice } from "./adapters/drivers/self-heal.js";
|
|
21
|
+
export { AnthropicLlmClient } from "./adapters/llm/anthropic.js";
|
|
22
|
+
export { discover, parseDecision } from "./core/discover.js";
|
|
23
|
+
//# sourceMappingURL=browser.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"browser.js","sourceRoot":"","sources":["../src/browser.ts"],"names":[],"mappings":"AAAA;;;;;;;;GAQG;AACH,cAAc,iBAAiB,CAAC;AAChC,cAAc,iBAAiB,CAAC;AAChC,OAAO,EAAE,UAAU,EAAE,MAAM,oBAAoB,CAAC;AAEhD,OAAO,EAAE,kBAAkB,EAAE,iBAAiB,EAAE,mBAAmB,EAAE,MAAM,iBAAiB,CAAC;AAE7F,OAAO,EAAE,qBAAqB,EAAE,MAAM,8BAA8B,CAAC;AACrE,OAAO,EAAE,aAAa,EAAE,MAAM,+BAA+B,CAAC;AAC9D,OAAO,EACL,eAAe,EACf,cAAc,EACd,gBAAgB,EAChB,cAAc,EACd,0BAA0B,EAC1B,sBAAsB,GACvB,MAAM,iCAAiC,CAAC;AAEzC,OAAO,EAAE,SAAS,EAAE,sBAAsB,EAAE,iBAAiB,EAAE,MAAM,2BAA2B,CAAC;AACjG,OAAO,EAAE,eAAe,EAAE,MAAM,iCAAiC,CAAC;AAClE,OAAO,EAAE,UAAU,EAAE,MAAM,4BAA4B,CAAC;AACxD,OAAO,EAAE,iBAAiB,EAAE,eAAe,EAAE,MAAM,iCAAiC,CAAC;AAGrF,OAAO,EAAE,kBAAkB,EAAE,MAAM,6BAA6B,CAAC;AAEjE,OAAO,EAAE,QAAQ,EAAE,aAAa,EAAE,MAAM,oBAAoB,CAAC"}
|
package/dist/core/pipeline.d.ts
CHANGED
|
@@ -3,10 +3,8 @@
|
|
|
3
3
|
* is injected (invariant #2); with a fixed-scenario Planner + deterministic Critic, no LLM
|
|
4
4
|
* runs (invariant #4).
|
|
5
5
|
*/
|
|
6
|
-
import type {
|
|
6
|
+
import type { CustomAction, Harness, StepHandler } from "./ports.js";
|
|
7
7
|
import type { Result, StepProgress } from "./types.js";
|
|
8
|
-
/** A product-defined interaction for a `{ kind: "custom", name }` step — composes the Driver. */
|
|
9
|
-
export type CustomAction = (driver: Driver, params: Record<string, unknown>) => Promise<void>;
|
|
10
8
|
/**
|
|
11
9
|
* Seams a host (CLI, desktop app, CI) plugs into — the engine emits/accepts, the host
|
|
12
10
|
* decides what to do. `onStep` for a live timeline, `captureScreenshots` for visual
|
|
@@ -17,6 +15,9 @@ export interface RunHarnessOptions {
|
|
|
17
15
|
signal?: AbortSignal;
|
|
18
16
|
onStep?: (progress: StepProgress) => void;
|
|
19
17
|
captureScreenshots?: boolean;
|
|
18
|
+
/** Product-defined interactions for `{ kind: "custom", name }` steps, registered by name. */
|
|
20
19
|
actions?: Record<string, CustomAction>;
|
|
20
|
+
/** Replace the Execute-stage dispatch chain entirely (advanced); defaults to built-ins + `actions`. */
|
|
21
|
+
stepHandlers?: StepHandler[];
|
|
21
22
|
}
|
|
22
23
|
export declare function runHarness(harness: Harness, task: string, opts?: RunHarnessOptions): Promise<Result>;
|
package/dist/core/pipeline.js
CHANGED
|
@@ -1,42 +1,11 @@
|
|
|
1
|
-
|
|
1
|
+
import { defaultStepHandlers } from "./steps.js";
|
|
2
|
+
/** Route one step to the first handler that supports it; record success/failure either way. */
|
|
3
|
+
async function executeStep(handlers, step, driver) {
|
|
2
4
|
try {
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
case "click":
|
|
8
|
-
await driver.click(step.target);
|
|
9
|
-
break;
|
|
10
|
-
case "doubleClick":
|
|
11
|
-
await driver.doubleClick(step.target);
|
|
12
|
-
break;
|
|
13
|
-
case "hover":
|
|
14
|
-
await driver.hover(step.target);
|
|
15
|
-
break;
|
|
16
|
-
case "type":
|
|
17
|
-
await driver.type(step.target, step.text);
|
|
18
|
-
break;
|
|
19
|
-
case "select":
|
|
20
|
-
await driver.select(step.target, step.value);
|
|
21
|
-
break;
|
|
22
|
-
case "pressKey":
|
|
23
|
-
await driver.pressKey(step.key);
|
|
24
|
-
break;
|
|
25
|
-
case "scroll":
|
|
26
|
-
await driver.scroll(step.direction);
|
|
27
|
-
break;
|
|
28
|
-
case "custom": {
|
|
29
|
-
const handler = actions[step.name];
|
|
30
|
-
if (!handler)
|
|
31
|
-
throw new Error(`no handler registered for custom action "${step.name}"`);
|
|
32
|
-
await handler(driver, step.params ?? {});
|
|
33
|
-
break;
|
|
34
|
-
}
|
|
35
|
-
default: {
|
|
36
|
-
const unhandled = step;
|
|
37
|
-
throw new Error(`unhandled step kind: ${JSON.stringify(unhandled)}`);
|
|
38
|
-
}
|
|
39
|
-
}
|
|
5
|
+
const handler = handlers.find((h) => h.supports(step));
|
|
6
|
+
if (!handler)
|
|
7
|
+
throw new Error(`no step handler for kind "${step.kind}"`);
|
|
8
|
+
await handler.execute(step, driver);
|
|
40
9
|
return { step, ok: true };
|
|
41
10
|
}
|
|
42
11
|
catch (err) {
|
|
@@ -45,6 +14,7 @@ async function executeStep(driver, step, actions) {
|
|
|
45
14
|
}
|
|
46
15
|
export async function runHarness(harness, task, opts = {}) {
|
|
47
16
|
const { context, planner, driver, critic, reporter } = harness;
|
|
17
|
+
const handlers = opts.stepHandlers ?? defaultStepHandlers(opts.actions ?? {});
|
|
48
18
|
const ctx = await context.provide(task);
|
|
49
19
|
const scenario = await planner.plan(ctx);
|
|
50
20
|
// Drive steps; stop on the first failure but still observe the resulting state.
|
|
@@ -52,7 +22,7 @@ export async function runHarness(harness, task, opts = {}) {
|
|
|
52
22
|
try {
|
|
53
23
|
for (const step of scenario.steps) {
|
|
54
24
|
opts.signal?.throwIfAborted(); // cooperative cancellation between steps (host owns Stop)
|
|
55
|
-
const result = await executeStep(
|
|
25
|
+
const result = await executeStep(handlers, step, driver);
|
|
56
26
|
actions.push(result);
|
|
57
27
|
if (opts.onStep) {
|
|
58
28
|
const screenshot = opts.captureScreenshots ? await driver.screenshot().catch(() => undefined) : undefined;
|
|
@@ -68,7 +38,7 @@ export async function runHarness(harness, task, opts = {}) {
|
|
|
68
38
|
...observed,
|
|
69
39
|
execution: { ...observed.execution, actions, blocked: actions.some((a) => !a.ok) },
|
|
70
40
|
};
|
|
71
|
-
const verdict = await critic.judge(evidence, scenario.assertions);
|
|
41
|
+
const verdict = await critic.judge(evidence, scenario.assertions, ctx);
|
|
72
42
|
const out = { scenario: scenario.name, context: ctx, evidence, verdict };
|
|
73
43
|
await reporter.emit(out);
|
|
74
44
|
return out;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"pipeline.js","sourceRoot":"","sources":["../../src/core/pipeline.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"pipeline.js","sourceRoot":"","sources":["../../src/core/pipeline.ts"],"names":[],"mappings":"AAOA,OAAO,EAAE,mBAAmB,EAAE,MAAM,YAAY,CAAC;AAkBjD,+FAA+F;AAC/F,KAAK,UAAU,WAAW,CAAC,QAAuB,EAAE,IAAU,EAAE,MAAc;IAC5E,IAAI,CAAC;QACH,MAAM,OAAO,GAAG,QAAQ,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC,CAAC;QACvD,IAAI,CAAC,OAAO;YAAE,MAAM,IAAI,KAAK,CAAC,6BAA6B,IAAI,CAAC,IAAI,GAAG,CAAC,CAAC;QACzE,MAAM,OAAO,CAAC,OAAO,CAAC,IAAI,EAAE,MAAM,CAAC,CAAC;QACpC,OAAO,EAAE,IAAI,EAAE,EAAE,EAAE,IAAI,EAAE,CAAC;IAC5B,CAAC;IAAC,OAAO,GAAG,EAAE,CAAC;QACb,OAAO,EAAE,IAAI,EAAE,EAAE,EAAE,KAAK,EAAE,KAAK,EAAE,GAAG,YAAY,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,GAAG,CAAC,EAAE,CAAC;IACtF,CAAC;AACH,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,UAAU,CAC9B,OAAgB,EAChB,IAAY,EACZ,OAA0B,EAAE;IAE5B,MAAM,EAAE,OAAO,EAAE,OAAO,EAAE,MAAM,EAAE,MAAM,EAAE,QAAQ,EAAE,GAAG,OAAO,CAAC;IAC/D,MAAM,QAAQ,GAAG,IAAI,CAAC,YAAY,IAAI,mBAAmB,CAAC,IAAI,CAAC,OAAO,IAAI,EAAE,CAAC,CAAC;IAE9E,MAAM,GAAG,GAAG,MAAM,OAAO,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC;IACxC,MAAM,QAAQ,GAAG,MAAM,OAAO,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;IAEzC,gFAAgF;IAChF,MAAM,OAAO,GAAqB,EAAE,CAAC;IACrC,IAAI,CAAC;QACH,KAAK,MAAM,IAAI,IAAI,QAAQ,CAAC,KAAK,EAAE,CAAC;YAClC,IAAI,CAAC,MAAM,EAAE,cAAc,EAAE,CAAC,CAAC,0DAA0D;YACzF,MAAM,MAAM,GAAG,MAAM,WAAW,CAAC,QAAQ,EAAE,IAAI,EAAE,MAAM,CAAC,CAAC;YACzD,OAAO,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;YACrB,IAAI,IAAI,CAAC,MAAM,EAAE,CAAC;gBAChB,MAAM,UAAU,GAAG,IAAI,CAAC,kBAAkB,CAAC,CAAC,CAAC,MAAM,MAAM,CAAC,UAAU,EAAE,CAAC,KAAK,CAAC,GAAG,EAAE,CAAC,SAAS,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC;gBAC1G,IAAI,CAAC,MAAM,CAAC,EAAE,KAAK,EAAE,OAAO,CAAC,MAAM,GAAG,CAAC,EAAE,IAAI,EAAE,EAAE,EAAE,MAAM,CAAC,EAAE,EAAE,KAAK,EAAE,MAAM,CAAC,KAAK,EAAE,UAAU,EAAE,CAAC,CAAC;YACnG,CAAC;YACD,IAAI,CAAC,MAAM,CAAC,EAAE;gBAAE,MAAM;QACxB,CAAC;QAED,6FAA6F;QAC7F,MAAM,MAAM,CAAC,MAAM,EAAE,CAAC;QAEtB,MAAM,QAAQ,GAAG,MAAM,MAAM,CAAC,OAAO,EAAE,CAAC;QACxC,MAAM,QAAQ,GAAa;YACzB,GAAG,QAAQ;YACX,SAAS,EAAE,EAAE,GAAG,QAAQ,CAAC,SAAS,EAAE,OAAO,EAAE,OAAO,EAAE,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE;SACnF,CAAC;QAEF,MAAM,OAAO,GAAG,MAAM,MAAM,CAAC,KAAK,CAAC,QAAQ,EAAE,QAAQ,CAAC,UAAU,EAAE,GAAG,CAAC,CAAC;QACvE,MAAM,GAAG,GAAW,EAAE,QAAQ,EAAE,QAAQ,CAAC,IAAI,EAAE,OAAO,EAAE,GAAG,EAAE,QAAQ,EAAE,OAAO,EAAE,CAAC;QACjF,MAAM,QAAQ,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;QACzB,OAAO,GAAG,CAAC;IACb,CAAC;YAAS,CAAC;QACT,MAAM,MAAM,CAAC,KAAK,EAAE,CAAC;IACvB,CAAC;AACH,CAAC"}
|
package/dist/core/ports.d.ts
CHANGED
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
* The ports of the cairn engine (invariant #2: add behavior by implementing one of these,
|
|
3
3
|
* never by branching inside a stage). Core depends only on these; `../adapters` implement them.
|
|
4
4
|
*/
|
|
5
|
-
import type { Assertion, Context, Evidence, PageElement, Result, Scenario, SettleOptions, Target, Verdict } from "./types.js";
|
|
5
|
+
import type { Assertion, AssertionResult, Context, Evidence, PageElement, Result, Scenario, SettleOptions, Step, Target, Verdict } from "./types.js";
|
|
6
6
|
/** Grounding from any source (NL, git diff, ticket, RAG). */
|
|
7
7
|
export interface ContextProvider {
|
|
8
8
|
provide(task: string): Promise<Context>;
|
|
@@ -34,6 +34,17 @@ export interface Driver {
|
|
|
34
34
|
observe(): Promise<Evidence>;
|
|
35
35
|
close(): Promise<void>;
|
|
36
36
|
}
|
|
37
|
+
/** A product-defined interaction for a `{ kind: "custom", name }` step — composes the Driver. */
|
|
38
|
+
export type CustomAction = (driver: Driver, params: Record<string, unknown>) => Promise<void>;
|
|
39
|
+
/**
|
|
40
|
+
* One link in the Execute stage's dispatch chain (invariant #2): the pipeline routes each Step
|
|
41
|
+
* to the first handler that `supports` it, instead of branching inside the stage. Built-in kinds
|
|
42
|
+
* and product `custom` actions resolve through this one seam (Spring `HandlerAdapter`-style).
|
|
43
|
+
*/
|
|
44
|
+
export interface StepHandler {
|
|
45
|
+
supports(step: Step): boolean;
|
|
46
|
+
execute(step: Step, driver: Driver): Promise<void>;
|
|
47
|
+
}
|
|
37
48
|
export interface Skill {
|
|
38
49
|
name: string;
|
|
39
50
|
scenario: Scenario;
|
|
@@ -41,9 +52,19 @@ export interface Skill {
|
|
|
41
52
|
export interface SkillStore {
|
|
42
53
|
resolve(name: string): Promise<Skill | undefined>;
|
|
43
54
|
}
|
|
44
|
-
/**
|
|
55
|
+
/**
|
|
56
|
+
* One link in the Judge stage's dispatch chain (mirror of StepHandler): a Critic routes each
|
|
57
|
+
* Assertion to the first handler that `supports` it. Mechanical, product `custom`, and LLM
|
|
58
|
+
* `expect` checks compose as separate handlers — critics differ only by which they register.
|
|
59
|
+
* Optional `ctx` grounds LLM judgment (e.g. the task intent); deterministic handlers ignore it.
|
|
60
|
+
*/
|
|
61
|
+
export interface AssertionHandler {
|
|
62
|
+
supports(assertion: Assertion): boolean;
|
|
63
|
+
judge(assertion: Assertion, evidence: Evidence, ctx?: Context): AssertionResult | Promise<AssertionResult>;
|
|
64
|
+
}
|
|
65
|
+
/** Judges evidence against assertions (mechanical, baseline, or LLM). Optional `ctx` grounds LLM judgment (e.g. the task intent); deterministic critics ignore it, so replay stays deterministic (invariant #4). */
|
|
45
66
|
export interface Critic {
|
|
46
|
-
judge(evidence: Evidence, assertions: Assertion[]): Promise<Verdict>;
|
|
67
|
+
judge(evidence: Evidence, assertions: Assertion[], ctx?: Context): Promise<Verdict>;
|
|
47
68
|
}
|
|
48
69
|
/** Emits a result anywhere — console, json, an arbitrary tracker. */
|
|
49
70
|
export interface Reporter {
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Execute-stage step handlers (invariant #2). The pipeline routes each Step to the first
|
|
3
|
+
* handler that `supports` it (DispatcherServlet-style) — built-in kinds and product-defined
|
|
4
|
+
* `custom` actions resolve through the same `StepHandler` seam, so adding an action means
|
|
5
|
+
* registering a handler, never editing a stage. Depends only on core ports/types.
|
|
6
|
+
*/
|
|
7
|
+
import type { CustomAction, Driver, StepHandler } from "./ports.js";
|
|
8
|
+
import type { Step } from "./types.js";
|
|
9
|
+
/** Handles cairn's built-in step vocabulary — every kind except product-defined `custom`. */
|
|
10
|
+
export declare class BuiltinStepHandler implements StepHandler {
|
|
11
|
+
supports(step: Step): boolean;
|
|
12
|
+
execute(step: Step, driver: Driver): Promise<void>;
|
|
13
|
+
}
|
|
14
|
+
/** Handles product-defined `{ kind: "custom", name }` steps via a name→action registry. */
|
|
15
|
+
export declare class CustomStepHandler implements StepHandler {
|
|
16
|
+
private readonly actions;
|
|
17
|
+
constructor(actions?: Record<string, CustomAction>);
|
|
18
|
+
supports(step: Step): boolean;
|
|
19
|
+
execute(step: Step, driver: Driver): Promise<void>;
|
|
20
|
+
}
|
|
21
|
+
/** The engine's default Execute-stage chain: built-ins first, then product `custom` actions. */
|
|
22
|
+
export declare function defaultStepHandlers(actions?: Record<string, CustomAction>): StepHandler[];
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
/** Handles cairn's built-in step vocabulary — every kind except product-defined `custom`. */
|
|
2
|
+
export class BuiltinStepHandler {
|
|
3
|
+
supports(step) {
|
|
4
|
+
return step.kind !== "custom";
|
|
5
|
+
}
|
|
6
|
+
async execute(step, driver) {
|
|
7
|
+
switch (step.kind) {
|
|
8
|
+
case "goto":
|
|
9
|
+
return driver.goto(step.url);
|
|
10
|
+
case "click":
|
|
11
|
+
return driver.click(step.target);
|
|
12
|
+
case "doubleClick":
|
|
13
|
+
return driver.doubleClick(step.target);
|
|
14
|
+
case "hover":
|
|
15
|
+
return driver.hover(step.target);
|
|
16
|
+
case "type":
|
|
17
|
+
return driver.type(step.target, step.text);
|
|
18
|
+
case "select":
|
|
19
|
+
return driver.select(step.target, step.value);
|
|
20
|
+
case "pressKey":
|
|
21
|
+
return driver.pressKey(step.key);
|
|
22
|
+
case "scroll":
|
|
23
|
+
return driver.scroll(step.direction);
|
|
24
|
+
case "custom":
|
|
25
|
+
// Owned by CustomStepHandler; reaching here means a handler-ordering bug, not bad input.
|
|
26
|
+
throw new Error(`built-in handler received custom step "${step.name}"`);
|
|
27
|
+
default: {
|
|
28
|
+
// Exhaustiveness guard: a new Step kind that no case handles fails to compile here.
|
|
29
|
+
const unhandled = step;
|
|
30
|
+
throw new Error(`unhandled step kind: ${JSON.stringify(unhandled)}`);
|
|
31
|
+
}
|
|
32
|
+
}
|
|
33
|
+
}
|
|
34
|
+
}
|
|
35
|
+
/** Handles product-defined `{ kind: "custom", name }` steps via a name→action registry. */
|
|
36
|
+
export class CustomStepHandler {
|
|
37
|
+
actions;
|
|
38
|
+
constructor(actions = {}) {
|
|
39
|
+
this.actions = actions;
|
|
40
|
+
}
|
|
41
|
+
supports(step) {
|
|
42
|
+
return step.kind === "custom";
|
|
43
|
+
}
|
|
44
|
+
async execute(step, driver) {
|
|
45
|
+
if (step.kind !== "custom")
|
|
46
|
+
throw new Error(`custom handler received "${step.kind}" step`);
|
|
47
|
+
const action = this.actions[step.name];
|
|
48
|
+
if (!action)
|
|
49
|
+
throw new Error(`no handler registered for custom action "${step.name}"`);
|
|
50
|
+
await action(driver, step.params ?? {});
|
|
51
|
+
}
|
|
52
|
+
}
|
|
53
|
+
/** The engine's default Execute-stage chain: built-ins first, then product `custom` actions. */
|
|
54
|
+
export function defaultStepHandlers(actions = {}) {
|
|
55
|
+
return [new BuiltinStepHandler(), new CustomStepHandler(actions)];
|
|
56
|
+
}
|
|
57
|
+
//# sourceMappingURL=steps.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"steps.js","sourceRoot":"","sources":["../../src/core/steps.ts"],"names":[],"mappings":"AASA,6FAA6F;AAC7F,MAAM,OAAO,kBAAkB;IAC7B,QAAQ,CAAC,IAAU;QACjB,OAAO,IAAI,CAAC,IAAI,KAAK,QAAQ,CAAC;IAChC,CAAC;IAED,KAAK,CAAC,OAAO,CAAC,IAAU,EAAE,MAAc;QACtC,QAAQ,IAAI,CAAC,IAAI,EAAE,CAAC;YAClB,KAAK,MAAM;gBACT,OAAO,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;YAC/B,KAAK,OAAO;gBACV,OAAO,MAAM,CAAC,KAAK,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;YACnC,KAAK,aAAa;gBAChB,OAAO,MAAM,CAAC,WAAW,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;YACzC,KAAK,OAAO;gBACV,OAAO,MAAM,CAAC,KAAK,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;YACnC,KAAK,MAAM;gBACT,OAAO,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,MAAM,EAAE,IAAI,CAAC,IAAI,CAAC,CAAC;YAC7C,KAAK,QAAQ;gBACX,OAAO,MAAM,CAAC,MAAM,CAAC,IAAI,CAAC,MAAM,EAAE,IAAI,CAAC,KAAK,CAAC,CAAC;YAChD,KAAK,UAAU;gBACb,OAAO,MAAM,CAAC,QAAQ,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;YACnC,KAAK,QAAQ;gBACX,OAAO,MAAM,CAAC,MAAM,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;YACvC,KAAK,QAAQ;gBACX,yFAAyF;gBACzF,MAAM,IAAI,KAAK,CAAC,0CAA0C,IAAI,CAAC,IAAI,GAAG,CAAC,CAAC;YAC1E,OAAO,CAAC,CAAC,CAAC;gBACR,oFAAoF;gBACpF,MAAM,SAAS,GAAU,IAAI,CAAC;gBAC9B,MAAM,IAAI,KAAK,CAAC,wBAAwB,IAAI,CAAC,SAAS,CAAC,SAAS,CAAC,EAAE,CAAC,CAAC;YACvE,CAAC;QACH,CAAC;IACH,CAAC;CACF;AAED,2FAA2F;AAC3F,MAAM,OAAO,iBAAiB;IACC;IAA7B,YAA6B,UAAwC,EAAE;QAA1C,YAAO,GAAP,OAAO,CAAmC;IAAG,CAAC;IAE3E,QAAQ,CAAC,IAAU;QACjB,OAAO,IAAI,CAAC,IAAI,KAAK,QAAQ,CAAC;IAChC,CAAC;IAED,KAAK,CAAC,OAAO,CAAC,IAAU,EAAE,MAAc;QACtC,IAAI,IAAI,CAAC,IAAI,KAAK,QAAQ;YAAE,MAAM,IAAI,KAAK,CAAC,4BAA4B,IAAI,CAAC,IAAI,QAAQ,CAAC,CAAC;QAC3F,MAAM,MAAM,GAAG,IAAI,CAAC,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QACvC,IAAI,CAAC,MAAM;YAAE,MAAM,IAAI,KAAK,CAAC,4CAA4C,IAAI,CAAC,IAAI,GAAG,CAAC,CAAC;QACvF,MAAM,MAAM,CAAC,MAAM,EAAE,IAAI,CAAC,MAAM,IAAI,EAAE,CAAC,CAAC;IAC1C,CAAC;CACF;AAED,gGAAgG;AAChG,MAAM,UAAU,mBAAmB,CAAC,UAAwC,EAAE;IAC5E,OAAO,CAAC,IAAI,kBAAkB,EAAE,EAAE,IAAI,iBAAiB,CAAC,OAAO,CAAC,CAAC,CAAC;AACpE,CAAC"}
|
package/dist/core/types.d.ts
CHANGED
package/dist/index.d.ts
CHANGED
|
@@ -2,14 +2,15 @@
|
|
|
2
2
|
export * from "./core/types.js";
|
|
3
3
|
export * from "./core/ports.js";
|
|
4
4
|
export { runHarness } from "./core/pipeline.js";
|
|
5
|
-
export type { RunHarnessOptions
|
|
5
|
+
export type { RunHarnessOptions } from "./core/pipeline.js";
|
|
6
|
+
export { BuiltinStepHandler, CustomStepHandler, defaultStepHandlers } from "./core/steps.js";
|
|
6
7
|
export { runScenario, needsLlmCritic, applyHeals } from "./run.js";
|
|
7
8
|
export type { RunScenarioOptions, RunScenarioResult } from "./run.js";
|
|
8
9
|
export { InlineContextProvider } from "./adapters/context/inline.js";
|
|
9
10
|
export { StaticPlanner } from "./adapters/planners/static.js";
|
|
10
|
-
export { AssertionCritic, checkAssertion, resolveAssertion } from "./adapters/critics/assertion.js";
|
|
11
|
+
export { AssertionCritic, checkAssertion, resolveAssertion, judgeAssertion, MechanicalAssertionHandler, CustomAssertionHandler, } from "./adapters/critics/assertion.js";
|
|
11
12
|
export type { CustomCheck, CustomChecks } from "./adapters/critics/assertion.js";
|
|
12
|
-
export { LlmCritic, summarizeEvidence } from "./adapters/critics/llm.js";
|
|
13
|
+
export { LlmCritic, ExpectAssertionHandler, summarizeEvidence } from "./adapters/critics/llm.js";
|
|
13
14
|
export { ConsoleReporter } from "./adapters/reporters/console.js";
|
|
14
15
|
export { JsonReporter } from "./adapters/reporters/json.js";
|
|
15
16
|
export { FakeDriver } from "./adapters/drivers/fake.js";
|
package/dist/index.js
CHANGED
|
@@ -2,11 +2,12 @@
|
|
|
2
2
|
export * from "./core/types.js";
|
|
3
3
|
export * from "./core/ports.js";
|
|
4
4
|
export { runHarness } from "./core/pipeline.js";
|
|
5
|
+
export { BuiltinStepHandler, CustomStepHandler, defaultStepHandlers } from "./core/steps.js";
|
|
5
6
|
export { runScenario, needsLlmCritic, applyHeals } from "./run.js";
|
|
6
7
|
export { InlineContextProvider } from "./adapters/context/inline.js";
|
|
7
8
|
export { StaticPlanner } from "./adapters/planners/static.js";
|
|
8
|
-
export { AssertionCritic, checkAssertion, resolveAssertion } from "./adapters/critics/assertion.js";
|
|
9
|
-
export { LlmCritic, summarizeEvidence } from "./adapters/critics/llm.js";
|
|
9
|
+
export { AssertionCritic, checkAssertion, resolveAssertion, judgeAssertion, MechanicalAssertionHandler, CustomAssertionHandler, } from "./adapters/critics/assertion.js";
|
|
10
|
+
export { LlmCritic, ExpectAssertionHandler, summarizeEvidence } from "./adapters/critics/llm.js";
|
|
10
11
|
export { ConsoleReporter } from "./adapters/reporters/console.js";
|
|
11
12
|
export { JsonReporter } from "./adapters/reporters/json.js";
|
|
12
13
|
export { FakeDriver } from "./adapters/drivers/fake.js";
|
package/dist/index.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,sCAAsC;AACtC,cAAc,iBAAiB,CAAC;AAChC,cAAc,iBAAiB,CAAC;AAChC,OAAO,EAAE,UAAU,EAAE,MAAM,oBAAoB,CAAC;AAEhD,OAAO,EAAE,WAAW,EAAE,cAAc,EAAE,UAAU,EAAE,MAAM,UAAU,CAAC;AAGnE,OAAO,EAAE,qBAAqB,EAAE,MAAM,8BAA8B,CAAC;AACrE,OAAO,EAAE,aAAa,EAAE,MAAM,+BAA+B,CAAC;AAC9D,OAAO,
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,sCAAsC;AACtC,cAAc,iBAAiB,CAAC;AAChC,cAAc,iBAAiB,CAAC;AAChC,OAAO,EAAE,UAAU,EAAE,MAAM,oBAAoB,CAAC;AAEhD,OAAO,EAAE,kBAAkB,EAAE,iBAAiB,EAAE,mBAAmB,EAAE,MAAM,iBAAiB,CAAC;AAC7F,OAAO,EAAE,WAAW,EAAE,cAAc,EAAE,UAAU,EAAE,MAAM,UAAU,CAAC;AAGnE,OAAO,EAAE,qBAAqB,EAAE,MAAM,8BAA8B,CAAC;AACrE,OAAO,EAAE,aAAa,EAAE,MAAM,+BAA+B,CAAC;AAC9D,OAAO,EACL,eAAe,EACf,cAAc,EACd,gBAAgB,EAChB,cAAc,EACd,0BAA0B,EAC1B,sBAAsB,GACvB,MAAM,iCAAiC,CAAC;AAEzC,OAAO,EAAE,SAAS,EAAE,sBAAsB,EAAE,iBAAiB,EAAE,MAAM,2BAA2B,CAAC;AACjG,OAAO,EAAE,eAAe,EAAE,MAAM,iCAAiC,CAAC;AAClE,OAAO,EAAE,YAAY,EAAE,MAAM,8BAA8B,CAAC;AAC5D,OAAO,EAAE,UAAU,EAAE,MAAM,4BAA4B,CAAC;AACxD,OAAO,EAAE,oBAAoB,EAAE,MAAM,8BAA8B,CAAC;AACpE,OAAO,EAAE,iBAAiB,EAAE,eAAe,EAAE,MAAM,iCAAiC,CAAC;AAGrF,OAAO,EAAE,mBAAmB,EAAE,MAAM,+BAA+B,CAAC;AACpE,OAAO,EAAE,kBAAkB,EAAE,MAAM,6BAA6B,CAAC;AACjE,OAAO,EAAE,eAAe,EAAE,MAAM,2BAA2B,CAAC;AAE5D,OAAO,EAAE,cAAc,EAAE,aAAa,EAAE,MAAM,iCAAiC,CAAC;AAEhF,OAAO,EAAE,QAAQ,EAAE,aAAa,EAAE,MAAM,oBAAoB,CAAC"}
|
package/dist/run.d.ts
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import type { CustomAction } from "./core/
|
|
1
|
+
import type { CustomAction } from "./core/ports.js";
|
|
2
2
|
import type { CustomChecks } from "./adapters/critics/assertion.js";
|
|
3
3
|
import type { ContextProvider, Critic, Driver, LlmClient, Reporter } from "./core/ports.js";
|
|
4
4
|
import type { Heal } from "./adapters/drivers/self-heal.js";
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "cairn-engine",
|
|
3
|
-
"version": "1.
|
|
3
|
+
"version": "1.1.0",
|
|
4
4
|
"description": "An engine for self-healing E2E browser tests — discovered once by an AI, replayed deterministically.",
|
|
5
5
|
"keywords": [
|
|
6
6
|
"e2e",
|
|
@@ -29,7 +29,14 @@
|
|
|
29
29
|
"cairn": "./dist/cli.js"
|
|
30
30
|
},
|
|
31
31
|
"exports": {
|
|
32
|
-
".":
|
|
32
|
+
".": {
|
|
33
|
+
"types": "./dist/index.d.ts",
|
|
34
|
+
"default": "./dist/index.js"
|
|
35
|
+
},
|
|
36
|
+
"./browser": {
|
|
37
|
+
"types": "./dist/browser.d.ts",
|
|
38
|
+
"default": "./dist/browser.js"
|
|
39
|
+
}
|
|
33
40
|
},
|
|
34
41
|
"files": [
|
|
35
42
|
"dist",
|