@huydao/karrot 0.1.4 → 0.1.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/assertions/assertion.js +23 -22
- package/dist/executors/execute.d.ts +3 -0
- package/dist/executors/execute.js +1 -0
- package/dist/executors/executor.d.ts +1 -0
- package/dist/executors/executor.js +58 -17
- package/dist/reports/report.js +54 -42
- package/dist/scenarios/scenario.d.ts +1 -0
- package/dist/utils/config.d.ts +1 -0
- package/package.json +1 -1
|
@@ -225,7 +225,7 @@ function evaluateToolCallWithContentAssertion(assertion, logContent) {
|
|
|
225
225
|
.filter((value) => typeof value === 'string')
|
|
226
226
|
.map((value) => value.trim())
|
|
227
227
|
.filter(Boolean);
|
|
228
|
-
const
|
|
228
|
+
const toolCallNamesById = new Map();
|
|
229
229
|
const eventPayloadsByToolCallId = new Map();
|
|
230
230
|
const parsedArgumentsByToolCallId = new Map();
|
|
231
231
|
for (const rawLine of logContent.split('\n')) {
|
|
@@ -238,25 +238,23 @@ function evaluateToolCallWithContentAssertion(assertion, logContent) {
|
|
|
238
238
|
if (!event.toolCallId) {
|
|
239
239
|
continue;
|
|
240
240
|
}
|
|
241
|
-
|
|
242
|
-
|
|
241
|
+
eventPayloadsByToolCallId.set(event.toolCallId, [
|
|
242
|
+
...(eventPayloadsByToolCallId.get(event.toolCallId) ?? []),
|
|
243
|
+
line,
|
|
244
|
+
]);
|
|
245
|
+
if (typeof event.toolCallName === 'string' && event.toolCallName.trim()) {
|
|
246
|
+
toolCallNamesById.set(event.toolCallId, event.toolCallName.trim());
|
|
243
247
|
}
|
|
244
|
-
if (
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
parsedArguments,
|
|
255
|
-
]);
|
|
256
|
-
}
|
|
257
|
-
catch {
|
|
258
|
-
// Ignore unparsable arguments and fall back to raw text matching.
|
|
259
|
-
}
|
|
248
|
+
if (typeof event.arguments === 'string' && event.arguments.trim()) {
|
|
249
|
+
try {
|
|
250
|
+
const parsedArguments = JSON.parse(event.arguments);
|
|
251
|
+
parsedArgumentsByToolCallId.set(event.toolCallId, [
|
|
252
|
+
...(parsedArgumentsByToolCallId.get(event.toolCallId) ?? []),
|
|
253
|
+
parsedArguments,
|
|
254
|
+
]);
|
|
255
|
+
}
|
|
256
|
+
catch {
|
|
257
|
+
// Ignore unparsable arguments and fall back to raw text matching.
|
|
260
258
|
}
|
|
261
259
|
}
|
|
262
260
|
}
|
|
@@ -264,7 +262,10 @@ function evaluateToolCallWithContentAssertion(assertion, logContent) {
|
|
|
264
262
|
continue;
|
|
265
263
|
}
|
|
266
264
|
}
|
|
267
|
-
|
|
265
|
+
const matchingToolCallIds = [...toolCallNamesById.entries()]
|
|
266
|
+
.filter(([, toolCallName]) => toolCallName === expected.name)
|
|
267
|
+
.map(([toolCallId]) => toolCallId);
|
|
268
|
+
if (matchingToolCallIds.length === 0) {
|
|
268
269
|
return {
|
|
269
270
|
kind: assertion.kind,
|
|
270
271
|
matcher: assertion.matcher,
|
|
@@ -274,12 +275,12 @@ function evaluateToolCallWithContentAssertion(assertion, logContent) {
|
|
|
274
275
|
reason: `${buildNormalizedAssertionReason(assertion)} failed. Tool call "${expected.name}" was not found in the run log.`,
|
|
275
276
|
};
|
|
276
277
|
}
|
|
277
|
-
const combinedPayload =
|
|
278
|
+
const combinedPayload = matchingToolCallIds
|
|
278
279
|
.flatMap((toolCallId) => eventPayloadsByToolCallId.get(toolCallId) ?? [])
|
|
279
280
|
.join('\n');
|
|
280
281
|
const missingTexts = expectedTexts.filter((text) => !combinedPayload.includes(text));
|
|
281
282
|
const hasPropertiesMatch = expected.hasProperties === undefined ||
|
|
282
|
-
|
|
283
|
+
matchingToolCallIds.some((toolCallId) => (parsedArgumentsByToolCallId.get(toolCallId) ?? []).some((parsedArguments) => matchesExpectedProperties(parsedArguments, expected.hasProperties)));
|
|
283
284
|
const passed = missingTexts.length === 0 && hasPropertiesMatch;
|
|
284
285
|
return {
|
|
285
286
|
kind: assertion.kind,
|
|
@@ -122,6 +122,7 @@ async function execute(configOrPath, options) {
|
|
|
122
122
|
? createAgUiRunner(resolvedConfig)
|
|
123
123
|
: createAgUiPostRunner(resolvedConfig),
|
|
124
124
|
stopOnFailure: resolvedConfig.execution?.stopOnFailure ?? false,
|
|
125
|
+
concurrency: options.execution?.concurrency ?? resolvedConfig.execution?.concurrency,
|
|
125
126
|
});
|
|
126
127
|
let reportPaths;
|
|
127
128
|
if (resolvedConfig.report?.enabled !== false && resolvedConfig.report) {
|
|
@@ -10,6 +10,12 @@ function readPositiveTimeoutMs(value) {
|
|
|
10
10
|
const parsed = Number(value);
|
|
11
11
|
return Number.isFinite(parsed) && parsed > 0 ? parsed : undefined;
|
|
12
12
|
}
|
|
13
|
+
function normalizeConcurrency(value, totalScenarios) {
|
|
14
|
+
if (!Number.isFinite(value) || (value ?? 0) <= 1) {
|
|
15
|
+
return 1;
|
|
16
|
+
}
|
|
17
|
+
return Math.min(Math.floor(value), Math.max(1, totalScenarios));
|
|
18
|
+
}
|
|
13
19
|
function resolveTurnProcessTimeoutMs(options) {
|
|
14
20
|
const envOverrideMs = readPositiveTimeoutMs(options.env.AI_TURN_TIMEOUT_MS);
|
|
15
21
|
const requestedMs = typeof envOverrideMs === 'number'
|
|
@@ -95,6 +101,7 @@ async function runSingleScenario(scenario, context, env, outputDirectory, deadli
|
|
|
95
101
|
toolCalls: run.toolCalls,
|
|
96
102
|
env: turnEnv,
|
|
97
103
|
outputDirectory,
|
|
104
|
+
outputPath: run.outputPath,
|
|
98
105
|
});
|
|
99
106
|
const failedAssertions = assertionResults.filter((assertion) => !assertion.passed);
|
|
100
107
|
const assertionFailureNote = failedAssertions.length > 0
|
|
@@ -126,7 +133,11 @@ async function runSingleScenario(scenario, context, env, outputDirectory, deadli
|
|
|
126
133
|
result.turns.push(turnResult);
|
|
127
134
|
turnRecorded = true;
|
|
128
135
|
if (assertionFailureNote) {
|
|
129
|
-
|
|
136
|
+
result.status = 'FAIL';
|
|
137
|
+
result.note = [result.note, assertionFailureNote].filter(Boolean).join(' ') || undefined;
|
|
138
|
+
if (!scenario.continueOnAssertionFailure) {
|
|
139
|
+
throw new Error(assertionFailureNote);
|
|
140
|
+
}
|
|
130
141
|
}
|
|
131
142
|
}
|
|
132
143
|
catch (error) {
|
|
@@ -173,31 +184,61 @@ async function runSingleScenario(scenario, context, env, outputDirectory, deadli
|
|
|
173
184
|
}
|
|
174
185
|
async function runScenario(scenario, options) {
|
|
175
186
|
const scenarios = Array.isArray(scenario) ? scenario : [scenario];
|
|
176
|
-
const results = [];
|
|
177
187
|
const shouldStopOnFailure = options.stopOnFailure ?? true;
|
|
178
188
|
const deadlineAt = typeof options.maxDurationMs === 'number' ? Date.now() + options.maxDurationMs : undefined;
|
|
179
|
-
|
|
189
|
+
const concurrency = normalizeConcurrency(options.concurrency, scenarios.length);
|
|
190
|
+
const runScenarioAtIndex = async (currentScenario) => {
|
|
180
191
|
try {
|
|
181
|
-
|
|
192
|
+
return await runSingleScenario(currentScenario, options.context, options.env, options.outputDirectory, deadlineAt, options.messageRunner, concurrency === 1 ? options.initialThreadId : undefined);
|
|
182
193
|
}
|
|
183
194
|
catch (error) {
|
|
184
195
|
if (error instanceof report_1.ScenarioExecutionError) {
|
|
185
|
-
|
|
186
|
-
}
|
|
187
|
-
else {
|
|
188
|
-
results.push({
|
|
189
|
-
id: currentScenario.id,
|
|
190
|
-
name: currentScenario.name,
|
|
191
|
-
status: 'FAIL',
|
|
192
|
-
note: error instanceof Error ? error.message : String(error),
|
|
193
|
-
turns: [],
|
|
194
|
-
metrics: {},
|
|
195
|
-
});
|
|
196
|
+
return error.result;
|
|
196
197
|
}
|
|
197
|
-
|
|
198
|
+
return {
|
|
199
|
+
id: currentScenario.id,
|
|
200
|
+
name: currentScenario.name,
|
|
201
|
+
status: 'FAIL',
|
|
202
|
+
note: error instanceof Error ? error.message : String(error),
|
|
203
|
+
turns: [],
|
|
204
|
+
metrics: {},
|
|
205
|
+
};
|
|
206
|
+
}
|
|
207
|
+
};
|
|
208
|
+
if (concurrency === 1) {
|
|
209
|
+
const results = [];
|
|
210
|
+
for (const currentScenario of scenarios) {
|
|
211
|
+
const result = await runScenarioAtIndex(currentScenario);
|
|
212
|
+
results.push(result);
|
|
213
|
+
if (shouldStopOnFailure && result.status === 'FAIL') {
|
|
198
214
|
break;
|
|
199
215
|
}
|
|
200
216
|
}
|
|
217
|
+
return Array.isArray(scenario) ? results : results[0];
|
|
201
218
|
}
|
|
202
|
-
|
|
219
|
+
const results = new Array(scenarios.length);
|
|
220
|
+
let nextIndex = 0;
|
|
221
|
+
let stopScheduling = false;
|
|
222
|
+
const worker = async () => {
|
|
223
|
+
while (true) {
|
|
224
|
+
if (shouldStopOnFailure && stopScheduling) {
|
|
225
|
+
return;
|
|
226
|
+
}
|
|
227
|
+
const currentIndex = nextIndex;
|
|
228
|
+
nextIndex += 1;
|
|
229
|
+
if (currentIndex >= scenarios.length) {
|
|
230
|
+
return;
|
|
231
|
+
}
|
|
232
|
+
const result = await runScenarioAtIndex(scenarios[currentIndex]);
|
|
233
|
+
results[currentIndex] = result;
|
|
234
|
+
if (shouldStopOnFailure && result.status === 'FAIL') {
|
|
235
|
+
stopScheduling = true;
|
|
236
|
+
}
|
|
237
|
+
}
|
|
238
|
+
};
|
|
239
|
+
await Promise.all(Array.from({ length: concurrency }, async () => {
|
|
240
|
+
await worker();
|
|
241
|
+
}));
|
|
242
|
+
const completedResults = results.filter((result) => result != null);
|
|
243
|
+
return Array.isArray(scenario) ? completedResults : completedResults[0];
|
|
203
244
|
}
|
package/dist/reports/report.js
CHANGED
|
@@ -144,7 +144,7 @@ function renderAssertionExpected(expected) {
|
|
|
144
144
|
return expected.length > 0 ? escapeHtml(expected.join(', ')) : '<span class="muted">[]</span>';
|
|
145
145
|
}
|
|
146
146
|
if (typeof expected === 'object' && expected !== null) {
|
|
147
|
-
return `<pre>${escapeHtml(stringifyAssertionExpected(expected))}</pre>`;
|
|
147
|
+
return `<pre class="assertion-expected-object">${escapeHtml(stringifyAssertionExpected(expected))}</pre>`;
|
|
148
148
|
}
|
|
149
149
|
return escapeHtml(expected);
|
|
150
150
|
}
|
|
@@ -279,58 +279,70 @@ function buildScenarioRunHtml(payload) {
|
|
|
279
279
|
'<meta name="viewport" content="width=device-width, initial-scale=1" />',
|
|
280
280
|
`<title>${escapeHtml(`${payload.projectName} AI Scenario Report`)}</title>`,
|
|
281
281
|
'<style>',
|
|
282
|
-
'
|
|
283
|
-
'
|
|
284
|
-
'
|
|
285
|
-
'.
|
|
286
|
-
'.hero
|
|
287
|
-
'.summary-
|
|
288
|
-
'.
|
|
289
|
-
'.
|
|
290
|
-
'.
|
|
291
|
-
'.
|
|
292
|
-
'.summary-
|
|
293
|
-
'.
|
|
294
|
-
'.
|
|
295
|
-
'.
|
|
296
|
-
'.
|
|
282
|
+
':root{--ink:#0b1220;--text:#273247;--muted:#6f7b91;--line:#e3ebf5;--panel:#fff;--panel-soft:#f8fafd;--page:#eaf1f8;--blue:#2f63e5;--blue-dark:#153f9f;--green:#3f9a8f;--red:#dc3d4d;--amber:#b7791f;--shadow:0 14px 34px rgba(21,45,85,.08);}',
|
|
283
|
+
'*{box-sizing:border-box;}',
|
|
284
|
+
'body{margin:0;font-family:"Avenir Next","Nunito Sans",ui-sans-serif,system-ui,-apple-system,BlinkMacSystemFont,"Segoe UI",sans-serif;background:radial-gradient(circle at 8% -10%,rgba(47,99,229,.13),transparent 26rem),linear-gradient(180deg,#edf4fb 0%,var(--page) 100%);color:var(--text);font-size:15px;line-height:1.55;}',
|
|
285
|
+
'.page{max-width:1360px;margin:0 auto;padding:24px 18px 56px;}',
|
|
286
|
+
'.hero,.summary-card,.panel,.turn-card,.scenario-card{position:relative;background:var(--panel);border:1px solid #d8e3f0;border-radius:10px;box-shadow:var(--shadow);overflow:hidden;}',
|
|
287
|
+
'.hero::before,.summary-card::before,.panel::before,.turn-card::before,.scenario-card::before{content:"";position:absolute;inset:0 0 auto;height:4px;background:linear-gradient(90deg,#071326 0%,var(--blue) 78%,#4b7cff 100%);}',
|
|
288
|
+
'.hero{display:grid;grid-template-columns:minmax(0,1fr) auto;gap:18px;padding:26px 30px 24px;margin-bottom:22px;}',
|
|
289
|
+
'.hero h1{grid-column:1/-1;margin:0 0 2px;color:var(--ink);font-size:29px;line-height:1.16;font-weight:700;letter-spacing:-.02em;}',
|
|
290
|
+
'.hero p{margin:0;color:var(--muted);font-weight:700;}',
|
|
291
|
+
'.hero strong{color:var(--ink);font-weight:700;}',
|
|
292
|
+
'.summary-grid{display:grid;grid-template-columns:repeat(auto-fit,minmax(190px,1fr));gap:16px;margin:0 0 18px;}',
|
|
293
|
+
'.summary-card{padding:22px 22px 18px;min-height:124px;}',
|
|
294
|
+
'.summary-card .label,.score-card .label,.meta-label{display:block;font-size:11px;text-transform:uppercase;letter-spacing:.06em;color:var(--muted);font-weight:700;margin-bottom:8px;}',
|
|
295
|
+
'.summary-card .value{font-size:31px;line-height:1.06;font-weight:700;color:var(--ink);letter-spacing:-.025em;}',
|
|
296
|
+
'.summary-card .sub{margin-top:8px;font-size:14px;color:var(--muted);font-weight:700;}',
|
|
297
|
+
'.panels{display:grid;grid-template-columns:repeat(auto-fit,minmax(300px,1fr));gap:16px;margin-bottom:18px;}',
|
|
298
|
+
'.panel{padding:22px;}',
|
|
299
|
+
'.panel h2{margin:0 0 14px;color:var(--ink);font-size:17px;line-height:1.22;font-weight:700;letter-spacing:-.01em;}',
|
|
300
|
+
'.panel pre{margin:0;white-space:pre-wrap;word-break:break-word;background:var(--panel-soft);border:1px solid #edf2f8;border-radius:6px;padding:14px;font-size:13px;color:#334155;}',
|
|
297
301
|
'.scenario-list{display:grid;gap:18px;}',
|
|
298
|
-
'.scenario-card summary{list-style:none;display:
|
|
302
|
+
'.scenario-card summary{list-style:none;display:grid;grid-template-columns:minmax(0,1fr) auto minmax(160px,36%);gap:14px;align-items:center;padding:20px 24px 18px;cursor:pointer;}',
|
|
299
303
|
'.scenario-card summary::-webkit-details-marker{display:none;}',
|
|
300
|
-
'.scenario-title{font-weight:700;font-size:
|
|
301
|
-
'.summary-note{color
|
|
302
|
-
'.scenario-body{padding:0
|
|
303
|
-
'.meta-grid{display:grid;grid-template-columns:repeat(auto-fit,minmax(
|
|
304
|
-
'.meta-grid>div{background
|
|
305
|
-
'.
|
|
306
|
-
'.turn-card{
|
|
307
|
-
'.turn-card h4,.content-block h5{margin:0 0 10px;}',
|
|
304
|
+
'.scenario-title{font-weight:700;font-size:17px;color:var(--ink);letter-spacing:-.01em;overflow-wrap:anywhere;}',
|
|
305
|
+
'.summary-note{color:var(--muted);font-size:14px;font-weight:400;text-align:right;overflow-wrap:anywhere;}',
|
|
306
|
+
'.scenario-body{padding:0 24px 24px;}',
|
|
307
|
+
'.meta-grid{display:grid;grid-template-columns:repeat(auto-fit,minmax(230px,1fr));gap:12px;margin:8px 0 18px;}',
|
|
308
|
+
'.meta-grid>div{background:var(--panel-soft);border:1px solid #edf2f8;border-radius:6px;padding:13px 14px;}',
|
|
309
|
+
'.turn-card{padding:22px;margin-top:16px;border-radius:8px;box-shadow:0 10px 24px rgba(21,45,85,.06);}',
|
|
310
|
+
'.turn-card h4{margin:0 0 14px;color:var(--ink);font-size:16px;font-weight:700;letter-spacing:0;}',
|
|
308
311
|
'.content-block{margin-top:14px;}',
|
|
309
|
-
'.content-block
|
|
310
|
-
'.content-block p{margin:0;background
|
|
311
|
-
'.
|
|
312
|
+
'.content-block h5{margin:0 0 8px;color:var(--ink);font-size:12px;text-transform:uppercase;letter-spacing:.06em;font-weight:700;}',
|
|
313
|
+
'.content-block pre,.content-block p{margin:0;background:var(--panel-soft);border:1px solid #edf2f8;border-radius:6px;padding:14px;}',
|
|
314
|
+
'.content-block pre{white-space:pre-wrap;word-break:break-word;max-height:420px;overflow:auto;color:#29364b;}',
|
|
315
|
+
'.assertions,.evaluations{width:100%;border-collapse:separate;border-spacing:0 8px;font-size:14px;}',
|
|
312
316
|
'.assertions{table-layout:fixed;}',
|
|
313
|
-
'.assertions th,.assertions td,.evaluations th,.evaluations td{padding:10px 12px;
|
|
314
|
-
'.assertions th,.evaluations th{font-size:
|
|
317
|
+
'.assertions th,.assertions td,.evaluations th,.evaluations td{padding:10px 12px;vertical-align:top;text-align:left;}',
|
|
318
|
+
'.assertions th,.evaluations th{font-size:11px;text-transform:uppercase;letter-spacing:.06em;color:var(--muted);font-weight:700;}',
|
|
319
|
+
'.assertions tbody tr,.evaluations tbody tr{background:var(--panel-soft);}',
|
|
320
|
+
'.assertions tbody td,.evaluations tbody td{border-top:1px solid #edf2f8;border-bottom:1px solid #edf2f8;}',
|
|
321
|
+
'.assertions tbody td:first-child,.evaluations tbody td:first-child{border-left:1px solid #edf2f8;border-radius:6px 0 0 6px;}',
|
|
322
|
+
'.assertions tbody td:last-child,.evaluations tbody td:last-child{border-right:1px solid #edf2f8;border-radius:0 6px 6px 0;}',
|
|
323
|
+
'.assertions td{word-break:break-word;overflow-wrap:anywhere;}',
|
|
315
324
|
'.assertions th:nth-child(1),.assertions td:nth-child(1){width:8%;}',
|
|
316
325
|
'.assertions th:nth-child(2),.assertions td:nth-child(2){width:12%;}',
|
|
317
326
|
'.assertions th:nth-child(3),.assertions td:nth-child(3){width:34%;}',
|
|
318
327
|
'.assertions th:nth-child(4),.assertions td:nth-child(4){width:16%;}',
|
|
319
328
|
'.assertions th:nth-child(5),.assertions td:nth-child(5){width:30%;}',
|
|
320
|
-
'.
|
|
321
|
-
'.badge
|
|
322
|
-
'.badge.
|
|
323
|
-
'.badge.
|
|
324
|
-
'.
|
|
329
|
+
'.assertions .assertion-expected-object{margin:0;white-space:pre-wrap;word-break:break-word;overflow-wrap:anywhere;background:#fff;border:1px solid #e5edf7;border-radius:6px;padding:12px;font-size:12px;line-height:1.45;max-height:none;overflow:visible;}',
|
|
330
|
+
'.badge{display:inline-flex;align-items:center;justify-content:center;border-radius:999px;padding:5px 11px;font-size:12px;font-weight:900;min-width:58px;letter-spacing:.02em;}',
|
|
331
|
+
'.badge.pass{background:#e4f8ef;color:#157347;}',
|
|
332
|
+
'.badge.fail{background:#ffe8eb;color:#b42332;}',
|
|
333
|
+
'.badge.skip{background:#edf2f7;color:#4a5568;}',
|
|
334
|
+
'.scenario-card.pass::before{background:linear-gradient(90deg,#071326 0%,#22a06b 100%);}',
|
|
335
|
+
'.scenario-card.fail::before{background:linear-gradient(90deg,#071326 0%,var(--red) 100%);}',
|
|
336
|
+
'.scenario-card.skip::before{background:linear-gradient(90deg,#071326 0%,#8a94a6 100%);}',
|
|
337
|
+
'.dimension-chip{display:inline-flex;align-items:center;justify-content:center;border-radius:999px;padding:5px 11px;margin:0 6px 6px 0;background:#eef4ff;color:var(--blue);font-size:12px;font-weight:900;}',
|
|
325
338
|
'.score-grid{display:grid;grid-template-columns:repeat(auto-fit,minmax(160px,1fr));gap:12px;}',
|
|
326
|
-
'.score-card{background
|
|
327
|
-
'.score-card .
|
|
328
|
-
'.score-
|
|
329
|
-
'.
|
|
330
|
-
'.muted{color:#7c8ba1;}',
|
|
339
|
+
'.score-card{background:var(--panel-soft);border:1px solid #edf2f8;border-radius:6px;padding:14px;}',
|
|
340
|
+
'.score-card .value{font-size:23px;font-weight:700;color:var(--ink);letter-spacing:-.015em;}',
|
|
341
|
+
'.score-pill{display:inline-flex;align-items:center;justify-content:center;border-radius:999px;padding:5px 11px;background:#e8f0ff;color:var(--blue);font-weight:900;min-width:58px;}',
|
|
342
|
+
'.muted{color:#8a95a8;}',
|
|
331
343
|
'code{font-family:ui-monospace,SFMono-Regular,Menlo,Monaco,Consolas,"Liberation Mono",monospace;font-size:12px;word-break:break-all;}',
|
|
332
|
-
'.footer{margin
|
|
333
|
-
'@media (max-width:900px){.scenario-card summary{
|
|
344
|
+
'.footer{margin:24px 4px 0;color:var(--muted);font-size:13px;font-weight:700;}',
|
|
345
|
+
'@media (max-width:900px){.hero{display:block;padding:24px 20px;}.hero p{margin-top:6px;}.scenario-card summary{grid-template-columns:1fr;align-items:start;}.summary-note{text-align:left;}.page{padding:14px 10px 40px;}.assertions,.evaluations{display:block;overflow-x:auto;white-space:normal;}.summary-card .value{font-size:30px;}}',
|
|
334
346
|
'</style>',
|
|
335
347
|
'</head>',
|
|
336
348
|
'<body>',
|
|
@@ -51,6 +51,7 @@ export type AiTurn<TContext extends BaseAiScenarioContext = BaseAiScenarioContex
|
|
|
51
51
|
export type AiScenario<TContext extends BaseAiScenarioContext = BaseAiScenarioContext> = {
|
|
52
52
|
id: string;
|
|
53
53
|
name: string;
|
|
54
|
+
continueOnAssertionFailure?: boolean;
|
|
54
55
|
turns: AiTurn<TContext>[];
|
|
55
56
|
};
|
|
56
57
|
export declare class AiScenarioSet<TContext extends BaseAiScenarioContext = BaseAiScenarioContext> {
|
package/dist/utils/config.d.ts
CHANGED