@restormel/testing-runner 0.1.4 → 0.1.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/ac-agent-loop.d.ts +10 -0
- package/dist/ac-agent-loop.d.ts.map +1 -1
- package/dist/ac-agent-loop.js +69 -20
- package/dist/ac-agent-loop.js.map +1 -1
- package/dist/ac-judge.d.ts +4 -1
- package/dist/ac-judge.d.ts.map +1 -1
- package/dist/ac-judge.js +69 -74
- package/dist/ac-judge.js.map +1 -1
- package/dist/ac-llm.d.ts +13 -7
- package/dist/ac-llm.d.ts.map +1 -1
- package/dist/ac-llm.js +27 -12
- package/dist/ac-llm.js.map +1 -1
- package/dist/browser-goal.d.ts +5 -0
- package/dist/browser-goal.d.ts.map +1 -1
- package/dist/browser-goal.js +14 -3
- package/dist/browser-goal.js.map +1 -1
- package/dist/egress-browser-context.d.ts +23 -0
- package/dist/egress-browser-context.d.ts.map +1 -0
- package/dist/egress-browser-context.js +46 -0
- package/dist/egress-browser-context.js.map +1 -0
- package/dist/egress-navigation.d.ts +12 -0
- package/dist/egress-navigation.d.ts.map +1 -0
- package/dist/egress-navigation.js +70 -0
- package/dist/egress-navigation.js.map +1 -0
- package/dist/evaluate-criteria.d.ts +5 -0
- package/dist/evaluate-criteria.d.ts.map +1 -1
- package/dist/evaluate-criteria.js +64 -68
- package/dist/evaluate-criteria.js.map +1 -1
- package/dist/llm-usage.d.ts +10 -0
- package/dist/llm-usage.d.ts.map +1 -0
- package/dist/llm-usage.js +22 -0
- package/dist/llm-usage.js.map +1 -0
- package/dist/run-ac-sequence-goal.d.ts.map +1 -1
- package/dist/run-ac-sequence-goal.js +37 -8
- package/dist/run-ac-sequence-goal.js.map +1 -1
- package/dist/run-suite.d.ts.map +1 -1
- package/dist/run-suite.js +140 -65
- package/dist/run-suite.js.map +1 -1
- package/dist/suite-llm-budget.d.ts +50 -0
- package/dist/suite-llm-budget.d.ts.map +1 -0
- package/dist/suite-llm-budget.js +154 -0
- package/dist/suite-llm-budget.js.map +1 -0
- package/package.json +5 -5
package/dist/ac-agent-loop.d.ts
CHANGED
|
@@ -1,15 +1,23 @@
|
|
|
1
1
|
import type { Page } from "playwright";
|
|
2
2
|
import type { AcceptanceCriterionDefinition } from "@restormel/testing-core";
|
|
3
3
|
import type { ResolvedModel } from "@restormel/testing-keys-adapter";
|
|
4
|
+
import type { SuiteLlmBudgetTracker } from "./suite-llm-budget.js";
|
|
5
|
+
/** Sum of provider-reported tokens across AC agent chat completions in this loop (when APIs return `usage`). */
|
|
6
|
+
export type AcAgentAggregatedUsage = {
|
|
7
|
+
promptTokens: number;
|
|
8
|
+
completionTokens: number;
|
|
9
|
+
};
|
|
4
10
|
export type AcAgentLoopResult = {
|
|
5
11
|
ok: true;
|
|
6
12
|
roundsUsed: number;
|
|
7
13
|
finished: "done" | "max_rounds";
|
|
14
|
+
aggregatedTokenUsage?: AcAgentAggregatedUsage;
|
|
8
15
|
} | {
|
|
9
16
|
ok: false;
|
|
10
17
|
roundsUsed: number;
|
|
11
18
|
reasonCode: string;
|
|
12
19
|
summary: string;
|
|
20
|
+
aggregatedTokenUsage?: AcAgentAggregatedUsage;
|
|
13
21
|
};
|
|
14
22
|
/**
|
|
15
23
|
* Multi-turn tool-use style loop: model proposes JSON actions until `done` / `give_up` / max rounds.
|
|
@@ -17,5 +25,7 @@ export type AcAgentLoopResult = {
|
|
|
17
25
|
export declare function runBuiltInAcAgentLoop(page: Page, ac: AcceptanceCriterionDefinition, model: ResolvedModel, baseUrl: string, options?: {
|
|
18
26
|
maxRounds?: number;
|
|
19
27
|
instructions?: string;
|
|
28
|
+
egressAllowHosts?: string[];
|
|
29
|
+
suiteLlmBudget?: SuiteLlmBudgetTracker;
|
|
20
30
|
}): Promise<AcAgentLoopResult>;
|
|
21
31
|
//# sourceMappingURL=ac-agent-loop.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"ac-agent-loop.d.ts","sourceRoot":"","sources":["../src/ac-agent-loop.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AACvC,OAAO,KAAK,EAAE,6BAA6B,EAAE,MAAM,yBAAyB,CAAC;AAC7E,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,iCAAiC,CAAC;
|
|
1
|
+
{"version":3,"file":"ac-agent-loop.d.ts","sourceRoot":"","sources":["../src/ac-agent-loop.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AACvC,OAAO,KAAK,EAAE,6BAA6B,EAAE,MAAM,yBAAyB,CAAC;AAC7E,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,iCAAiC,CAAC;AAGrE,OAAO,KAAK,EAAE,qBAAqB,EAAE,MAAM,uBAAuB,CAAC;AAmHnE,gHAAgH;AAChH,MAAM,MAAM,sBAAsB,GAAG;IAAE,YAAY,EAAE,MAAM,CAAC;IAAC,gBAAgB,EAAE,MAAM,CAAA;CAAE,CAAC;AAExF,MAAM,MAAM,iBAAiB,GACzB;IAAE,EAAE,EAAE,IAAI,CAAC;IAAC,UAAU,EAAE,MAAM,CAAC;IAAC,QAAQ,EAAE,MAAM,GAAG,YAAY,CAAC;IAAC,oBAAoB,CAAC,EAAE,sBAAsB,CAAA;CAAE,GAChH;IACE,EAAE,EAAE,KAAK,CAAC;IACV,UAAU,EAAE,MAAM,CAAC;IACnB,UAAU,EAAE,MAAM,CAAC;IACnB,OAAO,EAAE,MAAM,CAAC;IAChB,oBAAoB,CAAC,EAAE,sBAAsB,CAAC;CAC/C,CAAC;AAEN;;GAEG;AACH,wBAAsB,qBAAqB,CACzC,IAAI,EAAE,IAAI,EACV,EAAE,EAAE,6BAA6B,EACjC,KAAK,EAAE,aAAa,EACpB,OAAO,EAAE,MAAM,EACf,OAAO,CAAC,EAAE;IACR,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,gBAAgB,CAAC,EAAE,MAAM,EAAE,CAAC;IAC5B,cAAc,CAAC,EAAE,qBAAqB,CAAC;CACxC,GACA,OAAO,CAAC,iBAAiB,CAAC,CAiI5B"}
|
package/dist/ac-agent-loop.js
CHANGED
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import { postChatCompletions } from "./ac-llm.js";
|
|
2
|
+
import { normalizeEgressAllowHosts, resolveAgentNavigateUrl } from "./egress-navigation.js";
|
|
2
3
|
function truncate(s, n) {
|
|
3
4
|
return s.length <= n ? s : `${s.slice(0, n)}…`;
|
|
4
5
|
}
|
|
@@ -8,18 +9,6 @@ async function pageSnippet(page) {
|
|
|
8
9
|
const body = await page.locator("body").innerText().catch(() => "");
|
|
9
10
|
return `URL: ${url}\nTitle: ${title}\nBody (truncated):\n${truncate(body.trim(), 6000)}`;
|
|
10
11
|
}
|
|
11
|
-
function resolveNavUrl(href, baseUrl) {
|
|
12
|
-
try {
|
|
13
|
-
const b = new URL(baseUrl);
|
|
14
|
-
const t = new URL(href.trim(), b);
|
|
15
|
-
if (t.origin !== b.origin)
|
|
16
|
-
return null;
|
|
17
|
-
return t.href;
|
|
18
|
-
}
|
|
19
|
-
catch {
|
|
20
|
-
return null;
|
|
21
|
-
}
|
|
22
|
-
}
|
|
23
12
|
function parseAgentAction(raw) {
|
|
24
13
|
let o;
|
|
25
14
|
try {
|
|
@@ -52,19 +41,29 @@ function parseAgentAction(raw) {
|
|
|
52
41
|
? a.state
|
|
53
42
|
: undefined,
|
|
54
43
|
};
|
|
44
|
+
if (action === "scroll_into_view" && typeof a.selector === "string")
|
|
45
|
+
return { action: "scroll_into_view", selector: a.selector };
|
|
46
|
+
if (action === "snapshot_a11y")
|
|
47
|
+
return { action: "snapshot_a11y" };
|
|
55
48
|
if (action === "done")
|
|
56
49
|
return { action: "done" };
|
|
57
50
|
if (action === "give_up")
|
|
58
51
|
return { action: "give_up", reason: typeof a.reason === "string" ? a.reason : undefined };
|
|
59
52
|
return undefined;
|
|
60
53
|
}
|
|
61
|
-
async function executeAction(page, act, baseUrl) {
|
|
54
|
+
async function executeAction(page, act, baseUrl, egressAllowHosts) {
|
|
62
55
|
const timeout = 15_000;
|
|
63
56
|
try {
|
|
64
57
|
if (act.action === "navigate") {
|
|
65
|
-
const
|
|
66
|
-
|
|
67
|
-
|
|
58
|
+
const cur = page.url();
|
|
59
|
+
const u = resolveAgentNavigateUrl(act.url, baseUrl, cur, egressAllowHosts) ??
|
|
60
|
+
resolveAgentNavigateUrl(act.url, baseUrl, undefined, egressAllowHosts);
|
|
61
|
+
if (!u) {
|
|
62
|
+
return {
|
|
63
|
+
ok: false,
|
|
64
|
+
err: "navigate: URL not allowed (same-origin as base_url or egress_allow_hosts only)",
|
|
65
|
+
};
|
|
66
|
+
}
|
|
68
67
|
await page.goto(u, { waitUntil: "load", timeout });
|
|
69
68
|
return { ok: true };
|
|
70
69
|
}
|
|
@@ -86,6 +85,10 @@ async function executeAction(page, act, baseUrl) {
|
|
|
86
85
|
await page.waitForLoadState(act.state ?? "networkidle", { timeout });
|
|
87
86
|
return { ok: true };
|
|
88
87
|
}
|
|
88
|
+
if (act.action === "scroll_into_view") {
|
|
89
|
+
await page.locator(act.selector).first().scrollIntoViewIfNeeded({ timeout });
|
|
90
|
+
return { ok: true };
|
|
91
|
+
}
|
|
89
92
|
return { ok: false, err: "unhandled action" };
|
|
90
93
|
}
|
|
91
94
|
catch (e) {
|
|
@@ -101,17 +104,23 @@ function escapeReg(s) {
|
|
|
101
104
|
export async function runBuiltInAcAgentLoop(page, ac, model, baseUrl, options) {
|
|
102
105
|
const maxRounds = options?.maxRounds ?? 12;
|
|
103
106
|
const extra = options?.instructions?.trim() ? `\n${options.instructions.trim()}` : "";
|
|
107
|
+
const egress = normalizeEgressAllowHosts(options?.egressAllowHosts);
|
|
108
|
+
const egressNote = egress.length > 0
|
|
109
|
+
? ` Additional navigation is allowed to these hostnames (https): ${egress.join(", ")}.`
|
|
110
|
+
: "";
|
|
104
111
|
const system = `You are a browser automation agent. You must satisfy ONE acceptance criterion at a time using the page.
|
|
105
112
|
Output a single JSON object per message (no markdown). Allowed actions:
|
|
106
|
-
- {"action":"navigate","url":"<path or absolute
|
|
113
|
+
- {"action":"navigate","url":"<path or absolute URL on allowed host(s)>"}
|
|
107
114
|
- {"action":"click_css","selector":"<CSS selector>"}
|
|
108
115
|
- {"action":"click_role","role":"<aria role>","name":"<optional accessible name substring>"}
|
|
109
116
|
- {"action":"fill","role":"textbox","name":"<optional>","value":"<text>"}
|
|
110
117
|
- {"action":"wait_load","state":"networkidle"|"load"|"domcontentloaded"}
|
|
118
|
+
- {"action":"scroll_into_view","selector":"<CSS selector>"} to bring an element into view before clicking
|
|
119
|
+
- {"action":"snapshot_a11y"} to receive a fresh accessibility tree (ARIA) for the page — use when DOM text is ambiguous
|
|
111
120
|
- {"action":"done"} when the criterion is satisfied on the current page
|
|
112
121
|
- {"action":"give_up","reason":"..."} if blocked
|
|
113
122
|
|
|
114
|
-
Rules: stay on the same origin as the starting base URL
|
|
123
|
+
Rules: stay on the same origin as the starting base URL${egressNote} Prefer role-based actions over fragile CSS. Never output secrets.${extra}`;
|
|
115
124
|
const messages = [
|
|
116
125
|
{ role: "system", content: system },
|
|
117
126
|
{
|
|
@@ -120,15 +129,35 @@ Rules: stay on the same origin as the starting base URL. Prefer role-based actio
|
|
|
120
129
|
},
|
|
121
130
|
];
|
|
122
131
|
let roundsUsed = 0;
|
|
132
|
+
let promptAgg = 0;
|
|
133
|
+
let completionAgg = 0;
|
|
134
|
+
const bumpUsage = (u) => {
|
|
135
|
+
promptAgg += u?.promptTokens ?? 0;
|
|
136
|
+
completionAgg += u?.completionTokens ?? 0;
|
|
137
|
+
};
|
|
138
|
+
const agg = () => promptAgg > 0 || completionAgg > 0 ? { promptTokens: promptAgg, completionTokens: completionAgg } : undefined;
|
|
123
139
|
for (let i = 0; i < maxRounds; i++) {
|
|
140
|
+
const block = options?.suiteLlmBudget?.tryConsumeLlm("ac_round");
|
|
141
|
+
if (block) {
|
|
142
|
+
return {
|
|
143
|
+
ok: false,
|
|
144
|
+
roundsUsed,
|
|
145
|
+
reasonCode: block.code,
|
|
146
|
+
summary: block.summary,
|
|
147
|
+
aggregatedTokenUsage: agg(),
|
|
148
|
+
};
|
|
149
|
+
}
|
|
124
150
|
roundsUsed++;
|
|
125
151
|
const chat = await postChatCompletions(model, messages, { maxTokens: 400, temperature: 0 });
|
|
152
|
+
options?.suiteLlmBudget?.recordLlmCall("ac_round", chat.usage);
|
|
153
|
+
bumpUsage(chat.usage);
|
|
126
154
|
if (!chat.ok) {
|
|
127
155
|
return {
|
|
128
156
|
ok: false,
|
|
129
157
|
roundsUsed,
|
|
130
158
|
reasonCode: "AC_AGENT_LLM_ERROR",
|
|
131
159
|
summary: chat.summary,
|
|
160
|
+
aggregatedTokenUsage: agg(),
|
|
132
161
|
};
|
|
133
162
|
}
|
|
134
163
|
messages.push({ role: "assistant", content: chat.content });
|
|
@@ -141,7 +170,7 @@ Rules: stay on the same origin as the starting base URL. Prefer role-based actio
|
|
|
141
170
|
continue;
|
|
142
171
|
}
|
|
143
172
|
if (act.action === "done") {
|
|
144
|
-
return { ok: true, roundsUsed, finished: "done" };
|
|
173
|
+
return { ok: true, roundsUsed, finished: "done", aggregatedTokenUsage: agg() };
|
|
145
174
|
}
|
|
146
175
|
if (act.action === "give_up") {
|
|
147
176
|
return {
|
|
@@ -149,9 +178,28 @@ Rules: stay on the same origin as the starting base URL. Prefer role-based actio
|
|
|
149
178
|
roundsUsed,
|
|
150
179
|
reasonCode: "AC_AGENT_GAVE_UP",
|
|
151
180
|
summary: act.reason ?? "Agent gave up",
|
|
181
|
+
aggregatedTokenUsage: agg(),
|
|
152
182
|
};
|
|
153
183
|
}
|
|
154
|
-
|
|
184
|
+
if (act.action === "snapshot_a11y") {
|
|
185
|
+
try {
|
|
186
|
+
const snap = await page.locator("body").ariaSnapshot();
|
|
187
|
+
const cap = 14_000;
|
|
188
|
+
const text = snap.length <= cap ? snap : `${snap.slice(0, cap)}…`;
|
|
189
|
+
messages.push({
|
|
190
|
+
role: "user",
|
|
191
|
+
content: `ARIA snapshot (Playwright ariaSnapshot, truncated if long):\n${text}`,
|
|
192
|
+
});
|
|
193
|
+
}
|
|
194
|
+
catch (e) {
|
|
195
|
+
messages.push({
|
|
196
|
+
role: "user",
|
|
197
|
+
content: `ARIA snapshot failed: ${e instanceof Error ? e.message : String(e)}`,
|
|
198
|
+
});
|
|
199
|
+
}
|
|
200
|
+
continue;
|
|
201
|
+
}
|
|
202
|
+
const ex = await executeAction(page, act, baseUrl, egress.length > 0 ? egress : undefined);
|
|
155
203
|
if (!ex.ok) {
|
|
156
204
|
messages.push({
|
|
157
205
|
role: "user",
|
|
@@ -169,6 +217,7 @@ Rules: stay on the same origin as the starting base URL. Prefer role-based actio
|
|
|
169
217
|
roundsUsed,
|
|
170
218
|
reasonCode: "AC_AGENT_MAX_ROUNDS",
|
|
171
219
|
summary: `Exceeded ${maxRounds} rounds without done`,
|
|
220
|
+
aggregatedTokenUsage: agg(),
|
|
172
221
|
};
|
|
173
222
|
}
|
|
174
223
|
//# sourceMappingURL=ac-agent-loop.js.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"ac-agent-loop.js","sourceRoot":"","sources":["../src/ac-agent-loop.ts"],"names":[],"mappings":"AAGA,OAAO,EAAE,mBAAmB,EAAoB,MAAM,aAAa,CAAC;
|
|
1
|
+
{"version":3,"file":"ac-agent-loop.js","sourceRoot":"","sources":["../src/ac-agent-loop.ts"],"names":[],"mappings":"AAGA,OAAO,EAAE,mBAAmB,EAAoB,MAAM,aAAa,CAAC;AACpE,OAAO,EAAE,yBAAyB,EAAE,uBAAuB,EAAE,MAAM,wBAAwB,CAAC;AAG5F,SAAS,QAAQ,CAAC,CAAS,EAAE,CAAS;IACpC,OAAO,CAAC,CAAC,MAAM,IAAI,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,GAAG,CAAC;AACjD,CAAC;AAED,KAAK,UAAU,WAAW,CAAC,IAAU;IACnC,MAAM,GAAG,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;IACvB,MAAM,KAAK,GAAG,MAAM,IAAI,CAAC,KAAK,EAAE,CAAC,KAAK,CAAC,GAAG,EAAE,CAAC,EAAE,CAAC,CAAC;IACjD,MAAM,IAAI,GAAG,MAAM,IAAI,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC,SAAS,EAAE,CAAC,KAAK,CAAC,GAAG,EAAE,CAAC,EAAE,CAAC,CAAC;IACpE,OAAO,QAAQ,GAAG,YAAY,KAAK,wBAAwB,QAAQ,CAAC,IAAI,CAAC,IAAI,EAAE,EAAE,IAAI,CAAC,EAAE,CAAC;AAC3F,CAAC;AAaD,SAAS,gBAAgB,CAAC,GAAW;IACnC,IAAI,CAAU,CAAC;IACf,IAAI,CAAC;QACH,CAAC,GAAG,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC;IACtB,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,SAAS,CAAC;IACnB,CAAC;IACD,IAAI,CAAC,KAAK,IAAI,IAAI,OAAO,CAAC,KAAK,QAAQ;QAAE,OAAO,SAAS,CAAC;IAC1D,MAAM,CAAC,GAAG,CAA4B,CAAC;IACvC,MAAM,MAAM,GAAG,OAAO,CAAC,CAAC,MAAM,KAAK,QAAQ,CAAC,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,WAAW,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;IAC1E,IAAI,MAAM,KAAK,UAAU,IAAI,OAAO,CAAC,CAAC,GAAG,KAAK,QAAQ;QAAE,OAAO,EAAE,MAAM,EAAE,UAAU,EAAE,GAAG,EAAE,CAAC,CAAC,GAAG,EAAE,CAAC;IAClG,IAAI,MAAM,KAAK,WAAW,IAAI,OAAO,CAAC,CAAC,QAAQ,KAAK,QAAQ;QAAE,OAAO,EAAE,MAAM,EAAE,WAAW,EAAE,QAAQ,EAAE,CAAC,CAAC,QAAQ,EAAE,CAAC;IACnH,IAAI,MAAM,KAAK,YAAY,IAAI,OAAO,CAAC,CAAC,IAAI,KAAK,QAAQ;QACvD,OAAO,EAAE,MAAM,EAAE,YAAY,EAAE,IAAI,EAAE,CAAC,CAAC,IAAI,EAAE,IAAI,EAAE,OAAO,CAAC,CAAC,IAAI,KAAK,QAAQ,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,SAAS,EAAE,CAAC;IACvG,IAAI,MAAM,KAAK,MAAM,IAAI,OAAO,CAAC,CAAC,IAAI,KAAK,QAAQ,IAAI,OAAO,CAAC,CAAC,KAAK,KAAK,QAAQ;QAChF,OAAO;YACL,MAAM,EAAE,MAAM;YACd,IAAI,EAAE,CAAC,CAAC,IAAI;YACZ,IAAI,EAAE,OAAO,CAAC,CAAC,IAAI,KAAK,QAAQ,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,SAAS;YACrD,KAAK,EAAE,CAAC,CAAC,KAAK;SACf,CAAC;IACJ,IAAI,MAAM,KAAK,WAAW;QACxB,OAAO;YACL,MAAM,EAAE,WAAW;YACnB,KAAK,EACH,CAAC,CAAC,KAAK,KAAK,kBAAkB,IAAI,CAAC,CAAC,KAAK,KAAK,aAAa,IAAI,CAAC,CAAC,KAAK,KAAK,MAAM;gBAC/E,CAAC,CAAC,CAAC,CAAC,KAAK;gBACT,CAAC,CAAC,SAAS;SAChB,CAAC;IACJ,IAAI,MAAM,KAAK,kBAAkB,IAAI,OAAO,CAAC,CAAC,QAAQ,KAAK,QAAQ;QACjE,OAAO,EAAE,MAAM,EAAE,kBAAkB,EAAE,QAAQ,EAAE,CAAC,CAAC,QAAQ,EAAE,CAAC;IAC9D,IAAI,MAAM,KAAK,eAAe;QAAE,OAAO,EAAE,MAAM,EAAE,eAAe,EAAE,CAAC;IACnE,IAAI,MAAM,KAAK,MAAM;QAAE,OAAO,EAAE,MAAM,EAAE,MAAM,EAAE,CAAC;IACjD,IAAI,MAAM,KAAK,SAAS;QAAE,OAAO,EAAE,MAAM,EAAE,SAAS,EAAE,MAAM,EAAE,OAAO,CAAC,CAAC,MAAM,KAAK,QAAQ,CAAC,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,SAAS,EAAE,CAAC;IACpH,OAAO,SAAS,CAAC;AACnB,CAAC;AAED,KAAK,UAAU,aAAa,CAC1B,IAAU,EACV,GAAgB,EAChB,OAAe,EACf,gBAAsC;IAEtC,MAAM,OAAO,GAAG,MAAM,CAAC;IACvB,IAAI,CAAC;QACH,IAAI,GAAG,CAAC,MAAM,KAAK,UAAU,EAAE,CAAC;YAC9B,MAAM,GAAG,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;YACvB,MAAM,CAAC,GACL,uBAAuB,CAAC,GAAG,CAAC,GAAG,EAAE,OAAO,EAAE,GAAG,EAAE,gBAAgB,CAAC;gBAChE,uBAAuB,CAAC,GAAG,CAAC,GAAG,EAAE,OAAO,EAAE,SAAS,EAAE,gBAAgB,CAAC,CAAC;YACzE,IAAI,CAAC,CAAC,EAAE,CAAC;gBACP,OAAO;oBACL,EAAE,EAAE,KAAK;oBACT,GAAG,EAAE,gFAAgF;iBACtF,CAAC;YACJ,CAAC;YACD,MAAM,IAAI,CAAC,IAAI,CAAC,CAAC,EAAE,EAAE,SAAS,EAAE,MAAM,EAAE,OAAO,EAAE,CAAC,CAAC;YACnD,OAAO,EAAE,EAAE,EAAE,IAAI,EAAE,CAAC;QACtB,CAAC;QACD,IAAI,GAAG,CAAC,MAAM,KAAK,WAAW,EAAE,CAAC;YAC/B,MAAM,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAC,KAAK,EAAE,CAAC,KAAK,CAAC,EAAE,OAAO,EAAE,CAAC,CAAC;YAC5D,OAAO,EAAE,EAAE,EAAE,IAAI,EAAE,CAAC;QACtB,CAAC;QACD,IAAI,GAAG,CAAC,MAAM,KAAK,YAAY,EAAE,CAAC;YAChC,MAAM,GAAG,GAAG,IAAI,CAAC,SAAS,CAAC,GAAG,CAAC,IAAgB,EAAE,GAAG,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,IAAI,EAAE,IAAI,MAAM,CAAC,SAAS,CAAC,GAAG,CAAC,IAAI,CAAC,EAAE,GAAG,CAAC,EAAE,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC;YACxH,MAAM,GAAG,CAAC,KAAK,EAAE,CAAC,KAAK,CAAC,EAAE,OAAO,EAAE,CAAC,CAAC;YACrC,OAAO,EAAE,EAAE,EAAE,IAAI,EAAE,CAAC;QACtB,CAAC;QACD,IAAI,GAAG,CAAC,MAAM,KAAK,MAAM,EAAE,CAAC;YAC1B,MAAM,GAAG,GAAG,IAAI,CAAC,SAAS,CAAC,GAAG,CAAC,IAAiB,EAAE,GAAG,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,IAAI,EAAE,IAAI,MAAM,CAAC,SAAS,CAAC,GAAG,CAAC,IAAI,CAAC,EAAE,GAAG,CAAC,EAAE,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC;YACzH,MAAM,GAAG,CAAC,KAAK,EAAE,CAAC,IAAI,CAAC,GAAG,CAAC,KAAK,EAAE,EAAE,OAAO,EAAE,CAAC,CAAC;YAC/C,OAAO,EAAE,EAAE,EAAE,IAAI,EAAE,CAAC;QACtB,CAAC;QACD,IAAI,GAAG,CAAC,MAAM,KAAK,WAAW,EAAE,CAAC;YAC/B,MAAM,IAAI,CAAC,gBAAgB,CAAC,GAAG,CAAC,KAAK,IAAI,aAAa,EAAE,EAAE,OAAO,EAAE,CAAC,CAAC;YACrE,OAAO,EAAE,EAAE,EAAE,IAAI,EAAE,CAAC;QACtB,CAAC;QACD,IAAI,GAAG,CAAC,MAAM,KAAK,kBAAkB,EAAE,CAAC;YACtC,MAAM,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAC,KAAK,EAAE,CAAC,sBAAsB,CAAC,EAAE,OAAO,EAAE,CAAC,CAAC;YAC7E,OAAO,EAAE,EAAE,EAAE,IAAI,EAAE,CAAC;QACtB,CAAC;QACD,OAAO,EAAE,EAAE,EAAE,KAAK,EAAE,GAAG,EAAE,kBAAkB,EAAE,CAAC;IAChD,CAAC;IAAC,OAAO,CAAC,EAAE,CAAC;QACX,OAAO,EAAE,EAAE,EAAE,KAAK,EAAE,GAAG,EAAE,CAAC,YAAY,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC;IACxE,CAAC;AACH,CAAC;AAED,SAAS,SAAS,CAAC,CAAS;IAC1B,OAAO,CAAC,CAAC,OAAO,CAAC,qBAAqB,EAAE,MAAM,CAAC,CAAC;AAClD,CAAC;AAeD;;GAEG;AACH,MAAM,CAAC,KAAK,UAAU,qBAAqB,CACzC,IAAU,EACV,EAAiC,EACjC,KAAoB,EACpB,OAAe,EACf,OAKC;IAED,MAAM,SAAS,GAAG,OAAO,EAAE,SAAS,IAAI,EAAE,CAAC;IAC3C,MAAM,KAAK,GAAG,OAAO,EAAE,YAAY,EAAE,IAAI,EAAE,CAAC,CAAC,CAAC,KAAK,OAAO,CAAC,YAAY,CAAC,IAAI,EAAE,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;IACtF,MAAM,MAAM,GAAG,yBAAyB,CAAC,OAAO,EAAE,gBAAgB,CAAC,CAAC;IACpE,MAAM,UAAU,GACd,MAAM,CAAC,MAAM,GAAG,CAAC;QACf,CAAC,CAAC,iEAAiE,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,GAAG;QACvF,CAAC,CAAC,EAAE,CAAC;IAET,MAAM,MAAM,GAAG;;;;;;;;;;;;yDAYwC,UAAU,qEAAqE,KAAK,EAAE,CAAC;IAE9I,MAAM,QAAQ,GAAkB;QAC9B,EAAE,IAAI,EAAE,QAAQ,EAAE,OAAO,EAAE,MAAM,EAAE;QACnC;YACE,IAAI,EAAE,MAAM;YACZ,OAAO,EAAE,iBAAiB,EAAE,CAAC,EAAE,gBAAgB,EAAE,CAAC,IAAI,OAAO,MAAM,WAAW,CAAC,IAAI,CAAC,EAAE;SACvF;KACF,CAAC;IAEF,IAAI,UAAU,GAAG,CAAC,CAAC;IACnB,IAAI,SAAS,GAAG,CAAC,CAAC;IAClB,IAAI,aAAa,GAAG,CAAC,CAAC;IACtB,MAAM,SAAS,GAAG,CAAC,CAAmE,EAAE,EAAE;QACxF,SAAS,IAAI,CAAC,EAAE,YAAY,IAAI,CAAC,CAAC;QAClC,aAAa,IAAI,CAAC,EAAE,gBAAgB,IAAI,CAAC,CAAC;IAC5C,CAAC,CAAC;IACF,MAAM,GAAG,GAAG,GAAuC,EAAE,CACnD,SAAS,GAAG,CAAC,IAAI,aAAa,GAAG,CAAC,CAAC,CAAC,CAAC,EAAE,YAAY,EAAE,SAAS,EAAE,gBAAgB,EAAE,aAAa,EAAE,CAAC,CAAC,CAAC,SAAS,CAAC;IAEhH,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,SAAS,EAAE,CAAC,EAAE,EAAE,CAAC;QACnC,MAAM,KAAK,GAAG,OAAO,EAAE,cAAc,EAAE,aAAa,CAAC,UAAU,CAAC,CAAC;QACjE,IAAI,KAAK,EAAE,CAAC;YACV,OAAO;gBACL,EAAE,EAAE,KAAK;gBACT,UAAU;gBACV,UAAU,EAAE,KAAK,CAAC,IAAI;gBACtB,OAAO,EAAE,KAAK,CAAC,OAAO;gBACtB,oBAAoB,EAAE,GAAG,EAAE;aAC5B,CAAC;QACJ,CAAC;QACD,UAAU,EAAE,CAAC;QACb,MAAM,IAAI,GAAG,MAAM,mBAAmB,CAAC,KAAK,EAAE,QAAQ,EAAE,EAAE,SAAS,EAAE,GAAG,EAAE,WAAW,EAAE,CAAC,EAAE,CAAC,CAAC;QAC5F,OAAO,EAAE,cAAc,EAAE,aAAa,CAAC,UAAU,EAAE,IAAI,CAAC,KAAK,CAAC,CAAC;QAC/D,SAAS,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;QACtB,IAAI,CAAC,IAAI,CAAC,EAAE,EAAE,CAAC;YACb,OAAO;gBACL,EAAE,EAAE,KAAK;gBACT,UAAU;gBACV,UAAU,EAAE,oBAAoB;gBAChC,OAAO,EAAE,IAAI,CAAC,OAAO;gBACrB,oBAAoB,EAAE,GAAG,EAAE;aAC5B,CAAC;QACJ,CAAC;QAED,QAAQ,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,WAAW,EAAE,OAAO,EAAE,IAAI,CAAC,OAAO,EAAE,CAAC,CAAC;QAC5D,MAAM,GAAG,GAAG,gBAAgB,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;QAC3C,IAAI,CAAC,GAAG,EAAE,CAAC;YACT,QAAQ,CAAC,IAAI,CAAC;gBACZ,IAAI,EAAE,MAAM;gBACZ,OAAO,EAAE,kEAAkE;aAC5E,CAAC,CAAC;YACH,SAAS;QACX,CAAC;QAED,IAAI,GAAG,CAAC,MAAM,KAAK,MAAM,EAAE,CAAC;YAC1B,OAAO,EAAE,EAAE,EAAE,IAAI,EAAE,UAAU,EAAE,QAAQ,EAAE,MAAM,EAAE,oBAAoB,EAAE,GAAG,EAAE,EAAE,CAAC;QACjF,CAAC;QACD,IAAI,GAAG,CAAC,MAAM,KAAK,SAAS,EAAE,CAAC;YAC7B,OAAO;gBACL,EAAE,EAAE,KAAK;gBACT,UAAU;gBACV,UAAU,EAAE,kBAAkB;gBAC9B,OAAO,EAAE,GAAG,CAAC,MAAM,IAAI,eAAe;gBACtC,oBAAoB,EAAE,GAAG,EAAE;aAC5B,CAAC;QACJ,CAAC;QAED,IAAI,GAAG,CAAC,MAAM,KAAK,eAAe,EAAE,CAAC;YACnC,IAAI,CAAC;gBACH,MAAM,IAAI,GAAG,MAAM,IAAI,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC,YAAY,EAAE,CAAC;gBACvD,MAAM,GAAG,GAAG,MAAM,CAAC;gBACnB,MAAM,IAAI,GAAG,IAAI,CAAC,MAAM,IAAI,GAAG,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,GAAG,IAAI,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC,GAAG,CAAC;gBAClE,QAAQ,CAAC,IAAI,CAAC;oBACZ,IAAI,EAAE,MAAM;oBACZ,OAAO,EAAE,gEAAgE,IAAI,EAAE;iBAChF,CAAC,CAAC;YACL,CAAC;YAAC,OAAO,CAAC,EAAE,CAAC;gBACX,QAAQ,CAAC,IAAI,CAAC;oBACZ,IAAI,EAAE,MAAM;oBACZ,OAAO,EAAE,yBAAyB,CAAC,YAAY,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE;iBAC/E,CAAC,CAAC;YACL,CAAC;YACD,SAAS;QACX,CAAC;QAED,MAAM,EAAE,GAAG,MAAM,aAAa,CAAC,IAAI,EAAE,GAAG,EAAE,OAAO,EAAE,MAAM,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC;QAC3F,IAAI,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC;YACX,QAAQ,CAAC,IAAI,CAAC;gBACZ,IAAI,EAAE,MAAM;gBACZ,OAAO,EAAE,kBAAkB,EAAE,CAAC,GAAG,KAAK,MAAM,WAAW,CAAC,IAAI,CAAC,EAAE;aAChE,CAAC,CAAC;YACH,SAAS;QACX,CAAC;QAED,QAAQ,CAAC,IAAI,CAAC;YACZ,IAAI,EAAE,MAAM;YACZ,OAAO,EAAE,eAAe,MAAM,WAAW,CAAC,IAAI,CAAC,EAAE;SAClD,CAAC,CAAC;IACL,CAAC;IAED,OAAO;QACL,EAAE,EAAE,KAAK;QACT,UAAU;QACV,UAAU,EAAE,qBAAqB;QACjC,OAAO,EAAE,YAAY,SAAS,sBAAsB;QACpD,oBAAoB,EAAE,GAAG,EAAE;KAC5B,CAAC;AACJ,CAAC"}
|
package/dist/ac-judge.d.ts
CHANGED
|
@@ -1,12 +1,15 @@
|
|
|
1
1
|
import type { Page } from "playwright";
|
|
2
|
-
import type { CriteriaEvaluation } from "./evaluate-criteria.js";
|
|
3
2
|
import type { JudgeRubric } from "@restormel/testing-core";
|
|
4
3
|
import type { ResolvedModel } from "@restormel/testing-keys-adapter";
|
|
4
|
+
import type { CriteriaEvaluation } from "./evaluate-criteria.js";
|
|
5
|
+
import type { SuiteLlmBudgetTracker } from "./suite-llm-budget.js";
|
|
5
6
|
/**
|
|
6
7
|
* Per-AC judge: response JSON must include matching `ac_id` (R-BA-5).
|
|
7
8
|
*/
|
|
8
9
|
export declare function runAcShapedJudgeRubric(page: Page, rubric: JudgeRubric, model: ResolvedModel, ac: {
|
|
9
10
|
id: string;
|
|
10
11
|
text: string;
|
|
12
|
+
}, options?: {
|
|
13
|
+
suiteLlmBudget?: SuiteLlmBudgetTracker;
|
|
11
14
|
}): Promise<CriteriaEvaluation>;
|
|
12
15
|
//# sourceMappingURL=ac-judge.d.ts.map
|
package/dist/ac-judge.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"ac-judge.d.ts","sourceRoot":"","sources":["../src/ac-judge.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AACvC,OAAO,KAAK,EAAE,
|
|
1
|
+
{"version":3,"file":"ac-judge.d.ts","sourceRoot":"","sources":["../src/ac-judge.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,IAAI,EAAE,MAAM,YAAY,CAAC;AACvC,OAAO,KAAK,EAAE,WAAW,EAAE,MAAM,yBAAyB,CAAC;AAC3D,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,iCAAiC,CAAC;AAErE,OAAO,KAAK,EAAE,kBAAkB,EAAE,MAAM,wBAAwB,CAAC;AACjE,OAAO,KAAK,EAAE,qBAAqB,EAAE,MAAM,uBAAuB,CAAC;AAmBnE;;GAEG;AACH,wBAAsB,sBAAsB,CAC1C,IAAI,EAAE,IAAI,EACV,MAAM,EAAE,WAAW,EACnB,KAAK,EAAE,aAAa,EACpB,EAAE,EAAE;IAAE,EAAE,EAAE,MAAM,CAAC;IAAC,IAAI,EAAE,MAAM,CAAA;CAAE,EAChC,OAAO,CAAC,EAAE;IAAE,cAAc,CAAC,EAAE,qBAAqB,CAAA;CAAE,GACnD,OAAO,CAAC,kBAAkB,CAAC,CAoG7B"}
|
package/dist/ac-judge.js
CHANGED
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import { postChatCompletions } from "./ac-llm.js";
|
|
1
2
|
function truncate(s, n) {
|
|
2
3
|
return s.length <= n ? s : `${s.slice(0, n)}…`;
|
|
3
4
|
}
|
|
@@ -16,10 +17,8 @@ async function sampleTextForAcJudge(page, rubric) {
|
|
|
16
17
|
/**
|
|
17
18
|
* Per-AC judge: response JSON must include matching `ac_id` (R-BA-5).
|
|
18
19
|
*/
|
|
19
|
-
export async function runAcShapedJudgeRubric(page, rubric, model, ac) {
|
|
20
|
+
export async function runAcShapedJudgeRubric(page, rubric, model, ac, options) {
|
|
20
21
|
const sample = await sampleTextForAcJudge(page, rubric);
|
|
21
|
-
const base = model.providerBaseUrl?.replace(/\/?$/, "") ?? "https://api.openai.com/v1";
|
|
22
|
-
const url = `${base}/chat/completions`;
|
|
23
22
|
const system = `You are a test oracle for one acceptance criterion. Reply with a single JSON object only:
|
|
24
23
|
{"verdict":"pass"|"fail"|"uncertain","ac_id":"<string>","reason":"<short>"}
|
|
25
24
|
The ac_id field MUST exactly equal the acceptance criterion id provided in the user message.`;
|
|
@@ -31,86 +30,82 @@ Rubric summary: ${rubric.summary ?? "(none)"}
|
|
|
31
30
|
|
|
32
31
|
Page text:
|
|
33
32
|
${sample}`;
|
|
33
|
+
const block = options?.suiteLlmBudget?.tryConsumeLlm("chat");
|
|
34
|
+
if (block) {
|
|
35
|
+
return {
|
|
36
|
+
verdict: "failed",
|
|
37
|
+
reasonCode: block.code,
|
|
38
|
+
summary: block.summary,
|
|
39
|
+
judgeModelInvocations: 0,
|
|
40
|
+
};
|
|
41
|
+
}
|
|
42
|
+
const chat = await postChatCompletions(model, [
|
|
43
|
+
{ role: "system", content: system },
|
|
44
|
+
{ role: "user", content: user },
|
|
45
|
+
], { maxTokens: 120, temperature: 0, responseFormat: "json_object" });
|
|
46
|
+
options?.suiteLlmBudget?.recordLlmCall("chat", chat.usage);
|
|
47
|
+
const usageFields = {};
|
|
48
|
+
if (chat.usage?.promptTokens !== undefined)
|
|
49
|
+
usageFields.judgePromptTokens = chat.usage.promptTokens;
|
|
50
|
+
if (chat.usage?.completionTokens !== undefined)
|
|
51
|
+
usageFields.judgeCompletionTokens = chat.usage.completionTokens;
|
|
52
|
+
if (!chat.ok) {
|
|
53
|
+
return {
|
|
54
|
+
verdict: "indeterminate",
|
|
55
|
+
reasonCode: "JUDGE_HTTP_ERROR",
|
|
56
|
+
summary: `AC judge HTTP error: ${chat.summary}`,
|
|
57
|
+
judgeModelInvocations: 1,
|
|
58
|
+
...usageFields,
|
|
59
|
+
};
|
|
60
|
+
}
|
|
61
|
+
let parsed;
|
|
34
62
|
try {
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
"content-type": "application/json",
|
|
39
|
-
authorization: `Bearer ${model.credentials.apiKey}`,
|
|
40
|
-
},
|
|
41
|
-
body: JSON.stringify({
|
|
42
|
-
model: model.modelId,
|
|
43
|
-
messages: [
|
|
44
|
-
{ role: "system", content: system },
|
|
45
|
-
{ role: "user", content: user },
|
|
46
|
-
],
|
|
47
|
-
max_tokens: 120,
|
|
48
|
-
temperature: 0,
|
|
49
|
-
response_format: { type: "json_object" },
|
|
50
|
-
}),
|
|
51
|
-
});
|
|
52
|
-
if (!res.ok) {
|
|
53
|
-
const t = await res.text().catch(() => "");
|
|
54
|
-
return {
|
|
55
|
-
verdict: "indeterminate",
|
|
56
|
-
reasonCode: "JUDGE_HTTP_ERROR",
|
|
57
|
-
summary: `AC judge HTTP ${res.status}: ${truncate(t, 80)}`,
|
|
58
|
-
judgeModelInvocations: 1,
|
|
59
|
-
};
|
|
60
|
-
}
|
|
61
|
-
const data = (await res.json());
|
|
62
|
-
const raw = data.choices?.[0]?.message?.content ?? "{}";
|
|
63
|
-
let parsed;
|
|
64
|
-
try {
|
|
65
|
-
parsed = JSON.parse(raw);
|
|
66
|
-
}
|
|
67
|
-
catch {
|
|
68
|
-
return {
|
|
69
|
-
verdict: "indeterminate",
|
|
70
|
-
reasonCode: "JUDGE_PARSE_ERROR",
|
|
71
|
-
summary: "AC judge response was not valid JSON",
|
|
72
|
-
judgeModelInvocations: 1,
|
|
73
|
-
};
|
|
74
|
-
}
|
|
75
|
-
if (parsed.ac_id !== ac.id) {
|
|
76
|
-
return {
|
|
77
|
-
verdict: "indeterminate",
|
|
78
|
-
reasonCode: "JUDGE_AC_ID_MISMATCH",
|
|
79
|
-
summary: `Model ac_id ${JSON.stringify(parsed.ac_id)} did not match expected ${JSON.stringify(ac.id)}`,
|
|
80
|
-
judgeModelInvocations: 1,
|
|
81
|
-
};
|
|
82
|
-
}
|
|
83
|
-
const v = (parsed.verdict ?? "").toLowerCase();
|
|
84
|
-
const reason = parsed.reason ? `: ${parsed.reason}` : "";
|
|
85
|
-
if (v === "pass") {
|
|
86
|
-
return {
|
|
87
|
-
verdict: "passed",
|
|
88
|
-
reasonCode: "JUDGE_AC_PASS",
|
|
89
|
-
summary: `AC judge passed${reason}`,
|
|
90
|
-
judgeModelInvocations: 1,
|
|
91
|
-
};
|
|
92
|
-
}
|
|
93
|
-
if (v === "fail") {
|
|
94
|
-
return {
|
|
95
|
-
verdict: "failed",
|
|
96
|
-
reasonCode: "JUDGE_AC_FAIL",
|
|
97
|
-
summary: `AC judge failed${reason}`,
|
|
98
|
-
judgeModelInvocations: 1,
|
|
99
|
-
};
|
|
100
|
-
}
|
|
63
|
+
parsed = JSON.parse(chat.content);
|
|
64
|
+
}
|
|
65
|
+
catch {
|
|
101
66
|
return {
|
|
102
67
|
verdict: "indeterminate",
|
|
103
|
-
reasonCode: "
|
|
104
|
-
summary:
|
|
68
|
+
reasonCode: "JUDGE_PARSE_ERROR",
|
|
69
|
+
summary: "AC judge response was not valid JSON",
|
|
105
70
|
judgeModelInvocations: 1,
|
|
71
|
+
...usageFields,
|
|
106
72
|
};
|
|
107
73
|
}
|
|
108
|
-
|
|
74
|
+
if (parsed.ac_id !== ac.id) {
|
|
109
75
|
return {
|
|
110
76
|
verdict: "indeterminate",
|
|
111
|
-
reasonCode: "
|
|
112
|
-
summary: `
|
|
77
|
+
reasonCode: "JUDGE_AC_ID_MISMATCH",
|
|
78
|
+
summary: `Model ac_id ${JSON.stringify(parsed.ac_id)} did not match expected ${JSON.stringify(ac.id)}`,
|
|
79
|
+
judgeModelInvocations: 1,
|
|
80
|
+
...usageFields,
|
|
81
|
+
};
|
|
82
|
+
}
|
|
83
|
+
const v = (parsed.verdict ?? "").toLowerCase();
|
|
84
|
+
const reason = parsed.reason ? `: ${parsed.reason}` : "";
|
|
85
|
+
if (v === "pass") {
|
|
86
|
+
return {
|
|
87
|
+
verdict: "passed",
|
|
88
|
+
reasonCode: "JUDGE_AC_PASS",
|
|
89
|
+
summary: `AC judge passed${reason}`,
|
|
90
|
+
judgeModelInvocations: 1,
|
|
91
|
+
...usageFields,
|
|
92
|
+
};
|
|
93
|
+
}
|
|
94
|
+
if (v === "fail") {
|
|
95
|
+
return {
|
|
96
|
+
verdict: "failed",
|
|
97
|
+
reasonCode: "JUDGE_AC_FAIL",
|
|
98
|
+
summary: `AC judge failed${reason}`,
|
|
99
|
+
judgeModelInvocations: 1,
|
|
100
|
+
...usageFields,
|
|
113
101
|
};
|
|
114
102
|
}
|
|
103
|
+
return {
|
|
104
|
+
verdict: "indeterminate",
|
|
105
|
+
reasonCode: "JUDGE_AC_UNCERTAIN",
|
|
106
|
+
summary: `AC judge uncertain${reason}`,
|
|
107
|
+
judgeModelInvocations: 1,
|
|
108
|
+
...usageFields,
|
|
109
|
+
};
|
|
115
110
|
}
|
|
116
111
|
//# sourceMappingURL=ac-judge.js.map
|
package/dist/ac-judge.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"ac-judge.js","sourceRoot":"","sources":["../src/ac-judge.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"ac-judge.js","sourceRoot":"","sources":["../src/ac-judge.ts"],"names":[],"mappings":"AAGA,OAAO,EAAE,mBAAmB,EAAE,MAAM,aAAa,CAAC;AAIlD,SAAS,QAAQ,CAAC,CAAS,EAAE,CAAS;IACpC,OAAO,CAAC,CAAC,MAAM,IAAI,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,GAAG,CAAC;AACjD,CAAC;AAED,MAAM,UAAU,GAAG,IAAI,CAAC;AAExB,KAAK,UAAU,oBAAoB,CAAC,IAAU,EAAE,MAAmB;IACjE,IAAI,MAAM,CAAC,eAAe,KAAK,SAAS,IAAI,MAAM,CAAC,eAAe,CAAC,IAAI,EAAE,KAAK,EAAE,EAAE,CAAC;QACjF,MAAM,CAAC,GAAG,MAAM,IAAI,CAAC,OAAO,CAAC,MAAM,CAAC,eAAe,CAAC,CAAC,KAAK,EAAE,CAAC,SAAS,EAAE,CAAC,KAAK,CAAC,GAAG,EAAE,CAAC,EAAE,CAAC,CAAC;QACzF,OAAO,QAAQ,CAAC,CAAC,CAAC,IAAI,EAAE,EAAE,UAAU,CAAC,CAAC;IACxC,CAAC;IACD,MAAM,QAAQ,GAAG,MAAM,IAAI,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC,KAAK,EAAE,CAAC,SAAS,EAAE,CAAC,KAAK,CAAC,GAAG,EAAE,CAAC,EAAE,CAAC,CAAC;IAChF,IAAI,QAAQ,CAAC,IAAI,EAAE,CAAC,MAAM,GAAG,CAAC;QAAE,OAAO,QAAQ,CAAC,QAAQ,CAAC,IAAI,EAAE,EAAE,UAAU,CAAC,CAAC;IAC7E,MAAM,QAAQ,GAAG,MAAM,IAAI,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC,SAAS,EAAE,CAAC,KAAK,CAAC,GAAG,EAAE,CAAC,EAAE,CAAC,CAAC;IACxE,OAAO,QAAQ,CAAC,QAAQ,CAAC,IAAI,EAAE,EAAE,UAAU,CAAC,CAAC;AAC/C,CAAC;AAED;;GAEG;AACH,MAAM,CAAC,KAAK,UAAU,sBAAsB,CAC1C,IAAU,EACV,MAAmB,EACnB,KAAoB,EACpB,EAAgC,EAChC,OAAoD;IAEpD,MAAM,MAAM,GAAG,MAAM,oBAAoB,CAAC,IAAI,EAAE,MAAM,CAAC,CAAC;IAExD,MAAM,MAAM,GAAG;;6FAE4E,CAAC;IAE5F,MAAM,IAAI,GAAG,4BAA4B,EAAE,CAAC,EAAE;kBAC9B,EAAE,CAAC,IAAI;;aAEZ,MAAM,CAAC,EAAE;kBACJ,MAAM,CAAC,OAAO,IAAI,QAAQ;;;EAG1C,MAAM,EAAE,CAAC;IAET,MAAM,KAAK,GAAG,OAAO,EAAE,cAAc,EAAE,aAAa,CAAC,MAAM,CAAC,CAAC;IAC7D,IAAI,KAAK,EAAE,CAAC;QACV,OAAO;YACL,OAAO,EAAE,QAAQ;YACjB,UAAU,EAAE,KAAK,CAAC,IAAI;YACtB,OAAO,EAAE,KAAK,CAAC,OAAO;YACtB,qBAAqB,EAAE,CAAC;SACzB,CAAC;IACJ,CAAC;IAED,MAAM,IAAI,GAAG,MAAM,mBAAmB,CACpC,KAAK,EACL;QACE,EAAE,IAAI,EAAE,QAAQ,EAAE,OAAO,EAAE,MAAM,EAAE;QACnC,EAAE,IAAI,EAAE,MAAM,EAAE,OAAO,EAAE,IAAI,EAAE;KAChC,EACD,EAAE,SAAS,EAAE,GAAG,EAAE,WAAW,EAAE,CAAC,EAAE,cAAc,EAAE,aAAa,EAAE,CAClE,CAAC;IACF,OAAO,EAAE,cAAc,EAAE,aAAa,CAAC,MAAM,EAAE,IAAI,CAAC,KAAK,CAAC,CAAC;IAE3D,MAAM,WAAW,GAA4E,EAAE,CAAC;IAChG,IAAI,IAAI,CAAC,KAAK,EAAE,YAAY,KAAK,SAAS;QAAE,WAAW,CAAC,iBAAiB,GAAG,IAAI,CAAC,KAAK,CAAC,YAAY,CAAC;IACpG,IAAI,IAAI,CAAC,KAAK,EAAE,gBAAgB,KAAK,SAAS;QAAE,WAAW,CAAC,qBAAqB,GAAG,IAAI,CAAC,KAAK,CAAC,gBAAgB,CAAC;IAEhH,IAAI,CAAC,IAAI,CAAC,EAAE,EAAE,CAAC;QACb,OAAO;YACL,OAAO,EAAE,eAAe;YACxB,UAAU,EAAE,kBAAkB;YAC9B,OAAO,EAAE,wBAAwB,IAAI,CAAC,OAAO,EAAE;YAC/C,qBAAqB,EAAE,CAAC;YACxB,GAAG,WAAW;SACf,CAAC;IACJ,CAAC;IAED,IAAI,MAA6D,CAAC;IAClE,IAAI,CAAC;QACH,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,OAAO,CAA0D,CAAC;IAC7F,CAAC;IAAC,MAAM,CAAC;QACP,OAAO;YACL,OAAO,EAAE,eAAe;YACxB,UAAU,EAAE,mBAAmB;YAC/B,OAAO,EAAE,sCAAsC;YAC/C,qBAAqB,EAAE,CAAC;YACxB,GAAG,WAAW;SACf,CAAC;IACJ,CAAC;IAED,IAAI,MAAM,CAAC,KAAK,KAAK,EAAE,CAAC,EAAE,EAAE,CAAC;QAC3B,OAAO;YACL,OAAO,EAAE,eAAe;YACxB,UAAU,EAAE,sBAAsB;YAClC,OAAO,EAAE,eAAe,IAAI,CAAC,SAAS,CAAC,MAAM,CAAC,KAAK,CAAC,2BAA2B,IAAI,CAAC,SAAS,CAAC,EAAE,CAAC,EAAE,CAAC,EAAE;YACtG,qBAAqB,EAAE,CAAC;YACxB,GAAG,WAAW;SACf,CAAC;IACJ,CAAC;IAED,MAAM,CAAC,GAAG,CAAC,MAAM,CAAC,OAAO,IAAI,EAAE,CAAC,CAAC,WAAW,EAAE,CAAC;IAC/C,MAAM,MAAM,GAAG,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,KAAK,MAAM,CAAC,MAAM,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;IACzD,IAAI,CAAC,KAAK,MAAM,EAAE,CAAC;QACjB,OAAO;YACL,OAAO,EAAE,QAAQ;YACjB,UAAU,EAAE,eAAe;YAC3B,OAAO,EAAE,kBAAkB,MAAM,EAAE;YACnC,qBAAqB,EAAE,CAAC;YACxB,GAAG,WAAW;SACf,CAAC;IACJ,CAAC;IACD,IAAI,CAAC,KAAK,MAAM,EAAE,CAAC;QACjB,OAAO;YACL,OAAO,EAAE,QAAQ;YACjB,UAAU,EAAE,eAAe;YAC3B,OAAO,EAAE,kBAAkB,MAAM,EAAE;YACnC,qBAAqB,EAAE,CAAC;YACxB,GAAG,WAAW;SACf,CAAC;IACJ,CAAC;IACD,OAAO;QACL,OAAO,EAAE,eAAe;QACxB,UAAU,EAAE,oBAAoB;QAChC,OAAO,EAAE,qBAAqB,MAAM,EAAE;QACtC,qBAAqB,EAAE,CAAC;QACxB,GAAG,WAAW;KACf,CAAC;AACJ,CAAC"}
|
package/dist/ac-llm.d.ts
CHANGED
|
@@ -1,19 +1,25 @@
|
|
|
1
1
|
import type { ResolvedModel } from "@restormel/testing-keys-adapter";
|
|
2
|
+
import { type LlmTokenUsage } from "./llm-usage.js";
|
|
2
3
|
export type ChatMessage = {
|
|
3
4
|
role: "system" | "user" | "assistant";
|
|
4
5
|
content: string;
|
|
5
6
|
};
|
|
7
|
+
export type PostChatResult = {
|
|
8
|
+
ok: true;
|
|
9
|
+
content: string;
|
|
10
|
+
usage?: LlmTokenUsage;
|
|
11
|
+
} | {
|
|
12
|
+
ok: false;
|
|
13
|
+
summary: string;
|
|
14
|
+
usage?: LlmTokenUsage;
|
|
15
|
+
};
|
|
6
16
|
/**
|
|
7
17
|
* OpenAI-compatible chat/completions call (same transport as judge rubric).
|
|
18
|
+
* Parses `usage` when the provider includes it (prompt_tokens / completion_tokens / total_tokens).
|
|
8
19
|
*/
|
|
9
20
|
export declare function postChatCompletions(model: ResolvedModel, messages: ChatMessage[], options?: {
|
|
10
21
|
maxTokens?: number;
|
|
11
22
|
temperature?: number;
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
content: string;
|
|
15
|
-
} | {
|
|
16
|
-
ok: false;
|
|
17
|
-
summary: string;
|
|
18
|
-
}>;
|
|
23
|
+
responseFormat?: "json_object";
|
|
24
|
+
}): Promise<PostChatResult>;
|
|
19
25
|
//# sourceMappingURL=ac-llm.d.ts.map
|
package/dist/ac-llm.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"ac-llm.d.ts","sourceRoot":"","sources":["../src/ac-llm.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,iCAAiC,CAAC;
|
|
1
|
+
{"version":3,"file":"ac-llm.d.ts","sourceRoot":"","sources":["../src/ac-llm.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,iCAAiC,CAAC;AACrE,OAAO,EAA8B,KAAK,aAAa,EAAE,MAAM,gBAAgB,CAAC;AAEhF,MAAM,MAAM,WAAW,GAAG;IAAE,IAAI,EAAE,QAAQ,GAAG,MAAM,GAAG,WAAW,CAAC;IAAC,OAAO,EAAE,MAAM,CAAA;CAAE,CAAC;AAMrF,MAAM,MAAM,cAAc,GACtB;IAAE,EAAE,EAAE,IAAI,CAAC;IAAC,OAAO,EAAE,MAAM,CAAC;IAAC,KAAK,CAAC,EAAE,aAAa,CAAA;CAAE,GACpD;IAAE,EAAE,EAAE,KAAK,CAAC;IAAC,OAAO,EAAE,MAAM,CAAC;IAAC,KAAK,CAAC,EAAE,aAAa,CAAA;CAAE,CAAC;AAE1D;;;GAGG;AACH,wBAAsB,mBAAmB,CACvC,KAAK,EAAE,aAAa,EACpB,QAAQ,EAAE,WAAW,EAAE,EACvB,OAAO,CAAC,EAAE;IAAE,SAAS,CAAC,EAAE,MAAM,CAAC;IAAC,WAAW,CAAC,EAAE,MAAM,CAAC;IAAC,cAAc,CAAC,EAAE,aAAa,CAAA;CAAE,GACrF,OAAO,CAAC,cAAc,CAAC,CAuDzB"}
|
package/dist/ac-llm.js
CHANGED
|
@@ -1,14 +1,25 @@
|
|
|
1
|
+
import { extractChatCompletionUsage } from "./llm-usage.js";
|
|
1
2
|
function redactForLog(s) {
|
|
2
3
|
return s.replace(/\bBearer\s+[\w-_.]+\b/gi, "Bearer [redacted]").replace(/\bsk-[a-zA-Z0-9]{10,}\b/g, "sk-[redacted]");
|
|
3
4
|
}
|
|
4
5
|
/**
|
|
5
6
|
* OpenAI-compatible chat/completions call (same transport as judge rubric).
|
|
7
|
+
* Parses `usage` when the provider includes it (prompt_tokens / completion_tokens / total_tokens).
|
|
6
8
|
*/
|
|
7
9
|
export async function postChatCompletions(model, messages, options) {
|
|
8
10
|
const base = model.providerBaseUrl?.replace(/\/?$/, "") ?? "https://api.openai.com/v1";
|
|
9
11
|
const url = `${base}/chat/completions`;
|
|
10
12
|
const maxTokens = options?.maxTokens ?? 400;
|
|
11
13
|
const temperature = options?.temperature ?? 0;
|
|
14
|
+
const body = {
|
|
15
|
+
model: model.modelId,
|
|
16
|
+
messages,
|
|
17
|
+
max_tokens: maxTokens,
|
|
18
|
+
temperature,
|
|
19
|
+
};
|
|
20
|
+
if (options?.responseFormat === "json_object") {
|
|
21
|
+
body.response_format = { type: "json_object" };
|
|
22
|
+
}
|
|
12
23
|
try {
|
|
13
24
|
const res = await fetch(url, {
|
|
14
25
|
method: "POST",
|
|
@@ -16,26 +27,30 @@ export async function postChatCompletions(model, messages, options) {
|
|
|
16
27
|
"content-type": "application/json",
|
|
17
28
|
authorization: `Bearer ${model.credentials.apiKey}`,
|
|
18
29
|
},
|
|
19
|
-
body: JSON.stringify(
|
|
20
|
-
model: model.modelId,
|
|
21
|
-
messages,
|
|
22
|
-
max_tokens: maxTokens,
|
|
23
|
-
temperature,
|
|
24
|
-
}),
|
|
30
|
+
body: JSON.stringify(body),
|
|
25
31
|
});
|
|
32
|
+
const textBody = await res.text().catch(() => "");
|
|
33
|
+
let data;
|
|
34
|
+
try {
|
|
35
|
+
data = textBody.length > 0 ? JSON.parse(textBody) : {};
|
|
36
|
+
}
|
|
37
|
+
catch {
|
|
38
|
+
data = {};
|
|
39
|
+
}
|
|
40
|
+
const usage = extractChatCompletionUsage(data);
|
|
26
41
|
if (!res.ok) {
|
|
27
|
-
const t = await res.text().catch(() => "");
|
|
28
42
|
return {
|
|
29
43
|
ok: false,
|
|
30
|
-
summary: `LLM HTTP ${res.status} ${redactForLog(
|
|
44
|
+
summary: `LLM HTTP ${res.status} ${redactForLog(textBody).slice(0, 120)}`,
|
|
45
|
+
usage,
|
|
31
46
|
};
|
|
32
47
|
}
|
|
33
|
-
const
|
|
34
|
-
const content =
|
|
48
|
+
const d = data;
|
|
49
|
+
const content = d.choices?.[0]?.message?.content?.trim() ?? "";
|
|
35
50
|
if (!content) {
|
|
36
|
-
return { ok: false, summary: "Empty model response" };
|
|
51
|
+
return { ok: false, summary: "Empty model response", usage };
|
|
37
52
|
}
|
|
38
|
-
return { ok: true, content };
|
|
53
|
+
return { ok: true, content, usage };
|
|
39
54
|
}
|
|
40
55
|
catch (e) {
|
|
41
56
|
return {
|
package/dist/ac-llm.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"ac-llm.js","sourceRoot":"","sources":["../src/ac-llm.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"ac-llm.js","sourceRoot":"","sources":["../src/ac-llm.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,0BAA0B,EAAsB,MAAM,gBAAgB,CAAC;AAIhF,SAAS,YAAY,CAAC,CAAS;IAC7B,OAAO,CAAC,CAAC,OAAO,CAAC,yBAAyB,EAAE,mBAAmB,CAAC,CAAC,OAAO,CAAC,0BAA0B,EAAE,eAAe,CAAC,CAAC;AACxH,CAAC;AAMD;;;GAGG;AACH,MAAM,CAAC,KAAK,UAAU,mBAAmB,CACvC,KAAoB,EACpB,QAAuB,EACvB,OAAsF;IAEtF,MAAM,IAAI,GAAG,KAAK,CAAC,eAAe,EAAE,OAAO,CAAC,MAAM,EAAE,EAAE,CAAC,IAAI,2BAA2B,CAAC;IACvF,MAAM,GAAG,GAAG,GAAG,IAAI,mBAAmB,CAAC;IACvC,MAAM,SAAS,GAAG,OAAO,EAAE,SAAS,IAAI,GAAG,CAAC;IAC5C,MAAM,WAAW,GAAG,OAAO,EAAE,WAAW,IAAI,CAAC,CAAC;IAE9C,MAAM,IAAI,GAA4B;QACpC,KAAK,EAAE,KAAK,CAAC,OAAO;QACpB,QAAQ;QACR,UAAU,EAAE,SAAS;QACrB,WAAW;KACZ,CAAC;IACF,IAAI,OAAO,EAAE,cAAc,KAAK,aAAa,EAAE,CAAC;QAC9C,IAAI,CAAC,eAAe,GAAG,EAAE,IAAI,EAAE,aAAa,EAAE,CAAC;IACjD,CAAC;IAED,IAAI,CAAC;QACH,MAAM,GAAG,GAAG,MAAM,KAAK,CAAC,GAAG,EAAE;YAC3B,MAAM,EAAE,MAAM;YACd,OAAO,EAAE;gBACP,cAAc,EAAE,kBAAkB;gBAClC,aAAa,EAAE,UAAU,KAAK,CAAC,WAAW,CAAC,MAAM,EAAE;aACpD;YACD,IAAI,EAAE,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC;SAC3B,CAAC,CAAC;QAEH,MAAM,QAAQ,GAAG,MAAM,GAAG,CAAC,IAAI,EAAE,CAAC,KAAK,CAAC,GAAG,EAAE,CAAC,EAAE,CAAC,CAAC;QAClD,IAAI,IAAa,CAAC;QAClB,IAAI,CAAC;YACH,IAAI,GAAG,QAAQ,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;QACzD,CAAC;QAAC,MAAM,CAAC;YACP,IAAI,GAAG,EAAE,CAAC;QACZ,CAAC;QACD,MAAM,KAAK,GAAG,0BAA0B,CAAC,IAAI,CAAC,CAAC;QAE/C,IAAI,CAAC,GAAG,CAAC,EAAE,EAAE,CAAC;YACZ,OAAO;gBACL,EAAE,EAAE,KAAK;gBACT,OAAO,EAAE,YAAY,GAAG,CAAC,MAAM,IAAI,YAAY,CAAC,QAAQ,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC,EAAE;gBACzE,KAAK;aACN,CAAC;QACJ,CAAC;QAED,MAAM,CAAC,GAAG,IAA+D,CAAC;QAC1E,MAAM,OAAO,GAAG,CAAC,CAAC,OAAO,EAAE,CAAC,CAAC,CAAC,EAAE,OAAO,EAAE,OAAO,EAAE,IAAI,EAAE,IAAI,EAAE,CAAC;QAC/D,IAAI,CAAC,OAAO,EAAE,CAAC;YACb,OAAO,EAAE,EAAE,EAAE,KAAK,EAAE,OAAO,EAAE,sBAAsB,EAAE,KAAK,EAAE,CAAC;QAC/D,CAAC;QACD,OAAO,EAAE,EAAE,EAAE,IAAI,EAAE,OAAO,EAAE,KAAK,EAAE,CAAC;IACtC,CAAC;IAAC,OAAO,CAAC,EAAE,CAAC;QACX,OAAO;YACL,EAAE,EAAE,KAAK;YACT,OAAO,EAAE,cAAc,CAAC,YAAY,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE;SACpE,CAAC;IACJ,CAAC;AACH,CAAC"}
|
package/dist/browser-goal.d.ts
CHANGED
|
@@ -2,6 +2,7 @@ import type { PlaywrightTestingSessionOptions, TestingBrowserSession } from "@re
|
|
|
2
2
|
import type { GoalRunRecord, KeysModelMeta, TestGoal, TraceEvent } from "@restormel/testing-core";
|
|
3
3
|
import type { RetryPolicy } from "@restormel/testing-core";
|
|
4
4
|
import { type KeysModelAdapterOptions } from "@restormel/testing-keys-adapter";
|
|
5
|
+
import type { SuiteLlmBudgetTracker } from "./suite-llm-budget.js";
|
|
5
6
|
export interface RunBrowserGoalOptions {
|
|
6
7
|
runId: string;
|
|
7
8
|
goal: TestGoal;
|
|
@@ -17,6 +18,10 @@ export interface RunBrowserGoalOptions {
|
|
|
17
18
|
resolvedKeys: Record<string, string>;
|
|
18
19
|
keysAdapterOptions?: KeysModelAdapterOptions;
|
|
19
20
|
startingStepIndex: number;
|
|
21
|
+
/** From environment `egress_allow_hosts` — agent `navigate` plus **context-level** default-deny for other requests. */
|
|
22
|
+
egressAllowHosts?: string[];
|
|
23
|
+
/** Suite- and goal-level LLM budgets and token aggregation for {@link RunRecord.costEstimate}. */
|
|
24
|
+
suiteLlmBudget?: SuiteLlmBudgetTracker;
|
|
20
25
|
}
|
|
21
26
|
export interface RunBrowserGoalResult {
|
|
22
27
|
goalRecord: GoalRunRecord;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"browser-goal.d.ts","sourceRoot":"","sources":["../src/browser-goal.ts"],"names":[],"mappings":"AAIA,OAAO,KAAK,EACV,+BAA+B,EAC/B,qBAAqB,EACtB,MAAM,uCAAuC,CAAC;AAC/C,OAAO,KAAK,EAAE,aAAa,EAAE,aAAa,EAAE,QAAQ,EAAE,UAAU,EAAE,MAAM,yBAAyB,CAAC;AAElG,OAAO,KAAK,EAAE,WAAW,EAAE,MAAM,yBAAyB,CAAC;AAC3D,OAAO,EAAgB,KAAK,uBAAuB,EAAsB,MAAM,iCAAiC,CAAC;
|
|
1
|
+
{"version":3,"file":"browser-goal.d.ts","sourceRoot":"","sources":["../src/browser-goal.ts"],"names":[],"mappings":"AAIA,OAAO,KAAK,EACV,+BAA+B,EAC/B,qBAAqB,EACtB,MAAM,uCAAuC,CAAC;AAC/C,OAAO,KAAK,EAAE,aAAa,EAAE,aAAa,EAAE,QAAQ,EAAE,UAAU,EAAE,MAAM,yBAAyB,CAAC;AAElG,OAAO,KAAK,EAAE,WAAW,EAAE,MAAM,yBAAyB,CAAC;AAC3D,OAAO,EAAgB,KAAK,uBAAuB,EAAsB,MAAM,iCAAiC,CAAC;AAKjH,OAAO,KAAK,EAAE,qBAAqB,EAAE,MAAM,uBAAuB,CAAC;AAGnE,MAAM,WAAW,qBAAqB;IACpC,KAAK,EAAE,MAAM,CAAC;IACd,IAAI,EAAE,QAAQ,CAAC;IACf,OAAO,EAAE,MAAM,CAAC;IAChB,SAAS,EAAE,MAAM,CAAC;IAClB,WAAW,EAAE,WAAW,CAAC;IACzB,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,0BAA0B,EAAE,OAAO,CAAC;IACpC,QAAQ,CAAC,EAAE,OAAO,CAAC;IACnB,sEAAsE;IACtE,gBAAgB,CAAC,EAAE,MAAM,CAAC;IAC1B,oBAAoB,CAAC,EAAE,CAAC,IAAI,CAAC,EAAE,+BAA+B,KAAK,OAAO,CAAC,qBAAqB,CAAC,CAAC;IAClG,YAAY,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IACrC,kBAAkB,CAAC,EAAE,uBAAuB,CAAC;IAC7C,iBAAiB,EAAE,MAAM,CAAC;IAC1B,uHAAuH;IACvH,gBAAgB,CAAC,EAAE,MAAM,EAAE,CAAC;IAC5B,kGAAkG;IAClG,cAAc,CAAC,EAAE,qBAAqB,CAAC;CACxC;AAED,MAAM,WAAW,oBAAoB;IACnC,UAAU,EAAE,aAAa,CAAC;IAC1B,MAAM,EAAE,UAAU,EAAE,CAAC;IACrB,aAAa,EAAE,MAAM,CAAC;IACtB,QAAQ,EAAE,MAAM,EAAE,CAAC;IACnB,sBAAsB,EAAE,aAAa,EAAE,CAAC;CACzC;AAMD,8EAA8E;AAC9E,wBAAgB,YAAY,CAAC,OAAO,EAAE,MAAM,EAAE,SAAS,CAAC,EAAE,MAAM,GAAG,MAAM,CASxE;AAsCD,wBAAsB,cAAc,CAAC,OAAO,EAAE,qBAAqB,GAAG,OAAO,CAAC,oBAAoB,CAAC,CAwOlG"}
|