@huydao/karrot 0.1.6 → 0.1.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,505 @@
1
+ <!doctype html>
2
+ <html lang="en">
3
+ <head>
4
+ <meta charset="utf-8">
5
+ <meta name="viewport" content="width=device-width, initial-scale=1">
6
+ <title>Karrot Docs - Guide</title>
7
+ <meta name="description" content="Guide for setting up Karrot config, scenarios, assertions, evals, reports, Playwright integration, and existing session recall.">
8
+ <link rel="icon" href="../assets/karrot-mark.svg" type="image/svg+xml">
9
+ <link rel="stylesheet" href="../assets/styles.css">
10
+ </head>
11
+ <body>
12
+ <header class="site-header">
13
+ <div class="site-header__inner">
14
+ <a class="brand" href="../index.html" aria-label="Karrot home">
15
+ <img src="../assets/karrot-mark.svg" alt="">
16
+ <span>Karrot <small>Docs</small></span>
17
+ </a>
18
+ <nav class="top-nav" aria-label="Primary navigation">
19
+ <a href="../index.html">Home</a>
20
+ <a class="is-active" href="index.html">Docs</a>
21
+ <a href="https://www.npmjs.com/package/@huydao/karrot">npm</a>
22
+ </nav>
23
+ <button class="mobile-nav-toggle" type="button" data-nav-toggle aria-expanded="false" aria-controls="docs-sidebar">
24
+ Menu
25
+ </button>
26
+ </div>
27
+ </header>
28
+
29
+ <div class="sidebar-backdrop" data-sidebar-backdrop></div>
30
+
31
+ <div class="docs-shell">
32
+ <aside class="docs-sidebar" id="docs-sidebar" data-sidebar aria-label="Docs navigation">
33
+ <div class="docs-sidebar__title">Karrot guide</div>
34
+ <div class="sidebar-group">
35
+ <strong>Start</strong>
36
+ <a data-section-link href="#introduction">Introduction</a>
37
+ <a data-section-link href="#quick-start">Installation</a>
38
+ <a data-section-link href="#yaml-config">YAML config</a>
39
+ <a data-section-link href="#run-script">Run script</a>
40
+ </div>
41
+ <div class="sidebar-group">
42
+ <strong>Authoring</strong>
43
+ <a data-section-link href="#scenario-basics">Scenario basics</a>
44
+ <a data-section-link href="#turn-model">Turn model</a>
45
+ <a data-section-link href="#assertions">Assertions</a>
46
+ <a data-section-link href="#eval">Eval</a>
47
+ </div>
48
+ <div class="sidebar-group">
49
+ <strong>Operations</strong>
50
+ <a data-section-link href="#reports">Reports and artifacts</a>
51
+ <a data-section-link href="#playwright">Playwright integration</a>
52
+ <a data-section-link href="#existing-session">Existing session recall</a>
53
+ </div>
54
+ </aside>
55
+
56
+ <main class="docs-content">
57
+ <div class="docs-hero">
58
+ <div class="eyebrow">Documentation</div>
59
+ <h1>Guide to using Karrot</h1>
60
+ <p>
61
+ Use this page as the onboarding path for engineers and agents. It explains the files Karrot expects,
62
+ the scenario model, and how to embed Karrot in a Playwright workflow.
63
+ </p>
64
+ </div>
65
+
66
+ <section id="introduction">
67
+ <h2>Introduction</h2>
68
+ <p>
69
+ Karrot is a reusable AI-level test runner. It sends user turns to an assistant, collects output,
70
+ runs assertions, scores eval dimensions, and writes stable artifacts and reports.
71
+ </p>
72
+ <div class="callout">
73
+ <strong>Boundary</strong>
74
+ <p>Karrot owns the AI test layer. Your project owns auth, runtime discovery, transport secrets, and product-specific context.</p>
75
+ </div>
76
+ <ul>
77
+ <li>Use Karrot for product AI chat testing, multi-turn regression, and prompt quality checks.</li>
78
+ <li>Keep product-specific login and runtime discovery outside Karrot.</li>
79
+ <li>Pass runtime values through YAML variables and `execute()` options.</li>
80
+ </ul>
81
+ </section>
82
+
83
+ <section id="quick-start">
84
+ <h2>Installation</h2>
85
+ <p>Add Karrot to the project that will run your assistant tests.</p>
86
+ <pre><code class="language-bash">npm install @huydao/karrot
87
+ npm install -D tsx typescript</code></pre>
88
+ <div class="callout">
89
+ <strong>OpenAI requirement</strong>
90
+ <p>Set `OPENAI_API_KEY` when you use `aiAssert`, `eval`, or generated user messages with `aiGen`.</p>
91
+ </div>
92
+ <pre><code class="language-bash">export OPENAI_API_KEY=&lt;your-openai-api-key&gt;</code></pre>
93
+ <p>The normal setup has three files: `karrot.config.yml`, a runner script, and one scenario module.</p>
94
+ </section>
95
+
96
+ <section id="yaml-config">
97
+ <h2>YAML config</h2>
98
+ <p>
99
+ The YAML config describes transport, artifacts, evaluation prompt paths, scenario context, and report metadata.
100
+ Karrot resolves `${VARIABLE}` placeholders from `execute({ variables })`, then from `process.env`.
101
+ </p>
102
+ <h3 id="ag-ui-wss">AG-UI WSS transport</h3>
103
+ <pre><code class="language-yml">version: 1
104
+
105
+ transport:
106
+ type: ag-ui-wss
107
+ env:
108
+ AGENT_URL: ${AGENT_URL}
109
+ AGENT_ID: ${AGENT_ID}
110
+ WS_URL: ${WS_URL}
111
+ WS_TOPIC: ${WS_TOPIC}
112
+ AUTH_TOKEN: ${AUTH_TOKEN}
113
+ WS_STOMP_HEADERS: Authorization:${AUTH_TOKEN}
114
+ WS_HEADERS: Origin:${APP_BASE_URL},User-Agent:Mozilla/5.0
115
+ processTimeoutMs: 120000
116
+
117
+ artifacts:
118
+ directory: ./artifacts/karrot
119
+
120
+ execution:
121
+ stopOnFailure: false
122
+ concurrency: 1
123
+
124
+ context:
125
+ appBaseUrl: ${APP_BASE_URL}
126
+ projectId: ${PROJECT_ID}
127
+
128
+ evaluation:
129
+ promptDirectory: ./prompts/eval
130
+
131
+ report:
132
+ enabled: true
133
+ environment: ${TEST_ENV}
134
+ projectName: ${PROJECT_NAME}
135
+ runtime:
136
+ agentUrl: ${AGENT_URL}
137
+ agentId: ${AGENT_ID}
138
+ wsUrl: ${WS_URL}
139
+ wsTopic: ${WS_TOPIC}
140
+ accountId: ${ACCOUNT_ID}
141
+ projectId: ${PROJECT_ID}
142
+ appBaseUrl: ${APP_BASE_URL}</code></pre>
143
+ <div class="callout">
144
+ <strong>Secrets</strong>
145
+ <p>Keep tokens, account IDs, and auth headers in environment variables or CI secrets. Do not commit them in YAML.</p>
146
+ </div>
147
+ <h3 id="ag-ui-post">AG-UI POST transport</h3>
148
+ <pre><code class="language-yml">version: 1
149
+
150
+ transport:
151
+ type: ag-ui-post
152
+ injectMessage: true
153
+ run:
154
+ url: ${AGENT_RUN_URL}
155
+ headers:
156
+ Authorization: Bearer ${AUTH_TOKEN}
157
+ Content-Type: application/json
158
+ payload:
159
+ body:
160
+ threadId: ${THREAD_ID}
161
+ messages: []
162
+ processTimeoutMs: 120000</code></pre>
163
+ </section>
164
+
165
+ <section id="run-script">
166
+ <h2>Run script</h2>
167
+ <p>The run script collects runtime values, points Karrot at a scenario file, and returns a failed exit code when scenarios fail.</p>
168
+ <pre><code class="language-ts">import { execute, getScenarioRunStatus } from '@huydao/karrot';
169
+
170
+ function required(name: string): string {
171
+ const value = process.env[name]?.trim();
172
+
173
+ if (!value) {
174
+ throw new Error(`Missing required environment variable: ${name}`);
175
+ }
176
+
177
+ return value;
178
+ }
179
+
180
+ const execution = await execute('./karrot.config.yml', {
181
+ variables: {
182
+ TEST_ENV: process.env.TEST_ENV ?? 'local',
183
+ PROJECT_NAME: process.env.PROJECT_NAME ?? 'Demo Agent',
184
+ APP_BASE_URL: required('APP_BASE_URL'),
185
+ AGENT_URL: required('AGENT_URL'),
186
+ AGENT_ID: required('AGENT_ID'),
187
+ WS_URL: required('WS_URL'),
188
+ WS_TOPIC: required('WS_TOPIC'),
189
+ AUTH_TOKEN: required('AUTH_TOKEN'),
190
+ ACCOUNT_ID: process.env.ACCOUNT_ID ?? '',
191
+ PROJECT_ID: process.env.PROJECT_ID ?? '',
192
+ },
193
+ scenario: {
194
+ file: './src/scenarios/basic.ts',
195
+ ids: process.env.SCENARIO_IDS?.split(',').map((id) => id.trim()).filter(Boolean),
196
+ },
197
+ });
198
+
199
+ if (getScenarioRunStatus(execution.results) === 'FAIL') {
200
+ process.exitCode = 1;
201
+ }</code></pre>
202
+ <pre><code class="language-bash">TEST_ENV=qa npx tsx ./src/run-karrot.ts</code></pre>
203
+ </section>
204
+
205
+ <section id="scenario-basics">
206
+ <h2>Scenario basics</h2>
207
+ <p>A scenario module exports `scenarioSet`. Export `buildScenarioContext(baseContext)` when the scenario needs typed or derived context.</p>
208
+ <pre><code class="language-ts">import {
209
+ aiGen,
210
+ AiScenarioSet,
211
+ type AiScenario,
212
+ type BaseAiScenarioContext,
213
+ } from '@huydao/karrot';
214
+
215
+ type DemoContext = BaseAiScenarioContext & {
216
+ appBaseUrl: string;
217
+ };
218
+
219
+ export function buildScenarioContext(baseContext: BaseAiScenarioContext): DemoContext {
220
+ return {
221
+ ...baseContext,
222
+ appBaseUrl: String(baseContext.appBaseUrl ?? ''),
223
+ };
224
+ }
225
+
226
+ const scenarios: AiScenario&lt;DemoContext&gt;[] = [
227
+ {
228
+ id: 'BASIC-CHAT-01',
229
+ name: 'Agent answers and suggests next steps',
230
+ turns: [
231
+ {
232
+ label: 'Ask what the agent can do',
233
+ message: () =&gt; 'What can you help me do in this product?',
234
+ assertions: [
235
+ {
236
+ assert: { hasText: 'help' },
237
+ description: 'The response should explain useful capabilities',
238
+ },
239
+ ],
240
+ eval: ['correctness', 'helpfulness', 'clarity'],
241
+ },
242
+ {
243
+ label: 'Ask for follow-up prompts',
244
+ message: aiGen.fromGuidance(
245
+ 'Ask for three short follow-up prompts based on the previous answer.',
246
+ ),
247
+ },
248
+ ],
249
+ },
250
+ ];
251
+
252
+ export const scenarioSet = new AiScenarioSet(scenarios);</code></pre>
253
+ <ul>
254
+ <li>`id` is stable and used by filters, CI, and reports.</li>
255
+ <li>`name` is the readable report name.</li>
256
+ <li>`turns` are ordered user messages sent to the same conversation thread.</li>
257
+ </ul>
258
+ </section>
259
+
260
+ <section id="turn-model">
261
+ <h2>Turn model</h2>
262
+ <p>Each turn describes one user message plus optional checks and quality scoring.</p>
263
+ <ul>
264
+ <li>`label`: readable turn label shown in reports.</li>
265
+ <li>`message`: user input as a function or `aiGen` helper.</li>
266
+ <li>`idleTimeoutMs`: optional timeout for assistant inactivity.</li>
267
+ <li>`processTimeoutMs`: optional hard timeout for long-running operations.</li>
268
+ <li>`assertions`: hard pass/fail checks.</li>
269
+ <li>`eval`: response quality dimensions.</li>
270
+ <li>`onComplete`: optional callback after output is returned.</li>
271
+ </ul>
272
+ <pre><code class="language-ts">{
273
+ label: 'Ask for next prompts',
274
+ message: aiGen.fromGuidance(
275
+ 'Ask for three concise follow-up prompts based on the previous answer.',
276
+ ),
277
+ idleTimeoutMs: 120_000,
278
+ processTimeoutMs: 120_000,
279
+ assertions: [
280
+ { assert: { hasText: 'prompt' } },
281
+ ],
282
+ eval: ['relevance', 'conciseness'],
283
+ }</code></pre>
284
+ </section>
285
+
286
+ <section id="assertions">
287
+ <h2>Assertions</h2>
288
+ <p>Assertions decide whether a turn passes or fails. Use them for required behavior.</p>
289
+ <h3 id="direct-assertions">Direct assertions</h3>
290
+ <pre><code class="language-ts">assertions: [
291
+ { assert: { hasText: 'created successfully' } },
292
+ { assert: { toolcall: ['create_test_case'] } },
293
+ { assert: { toolcall: [] } },
294
+ {
295
+ assert: {
296
+ toolcallWithContent: {
297
+ name: 'create_test_case',
298
+ hasText: ['login', 'password'],
299
+ hasProperties: {
300
+ priority: 'High',
301
+ },
302
+ },
303
+ },
304
+ },
305
+ ]</code></pre>
306
+ <h3 id="semantic-assertions">Semantic AI assertions</h3>
307
+ <pre><code class="language-ts">assertions: [
308
+ {
309
+ aiAssert: {
310
+ hasContent: 'The answer explains the next action and why it is needed.',
311
+ },
312
+ },
313
+ {
314
+ aiAssert: {
315
+ notHasContent: 'The answer invents product capabilities not present in the prompt.',
316
+ },
317
+ },
318
+ ]</code></pre>
319
+ <div class="callout">
320
+ <strong>When to use each</strong>
321
+ <p>Use `assert` for deterministic text or tool checks. Use `aiAssert` when the requirement is semantic and exact matching would be brittle.</p>
322
+ </div>
323
+ </section>
324
+
325
+ <section id="eval">
326
+ <h2>Eval</h2>
327
+ <p>Eval is separate from assertions. Assertions are hard requirements; evals score response quality.</p>
328
+ <pre><code class="language-ts">eval: [
329
+ 'correctness',
330
+ 'coverage',
331
+ 'helpfulness',
332
+ {
333
+ dimension: 'productFit',
334
+ guidance: 'Score whether the answer is specifically useful for users of this product.',
335
+ },
336
+ ]</code></pre>
337
+ <p>Common dimensions include correctness, coverage, helpfulness, clarity, completeness, conciseness, relevance, actionability, structure, consistency, and safety.</p>
338
+ <pre><code class="language-yml">evaluation:
339
+ promptDirectory: ./prompts/eval</code></pre>
340
+ </section>
341
+
342
+ <section id="reports">
343
+ <h2>Reports and artifacts</h2>
344
+ <p>Each run creates an artifact directory under `artifacts/&lt;timestamp&gt;` or the configured `artifacts.directory`.</p>
345
+ <ul>
346
+ <li>Raw transport logs such as `.jsonl` or `.sse`.</li>
347
+ <li>Generated-message traces.</li>
348
+ <li>AI assertion traces.</li>
349
+ <li>JSON run report.</li>
350
+ <li>HTML run report.</li>
351
+ </ul>
352
+ <pre><code class="language-ts">console.log([
353
+ `Status: ${status}`,
354
+ `Artifacts: ${execution.outputDirectory}`,
355
+ `JSON report: ${execution.reportPaths?.jsonPath ?? '-'}`,
356
+ `HTML report: ${execution.reportPaths?.htmlPath ?? '-'}`,
357
+ ].join('\n'));</code></pre>
358
+ </section>
359
+
360
+ <section id="playwright">
361
+ <h2>Playwright integration</h2>
362
+ <p>Use `execute()` when the scenario file owns the full conversation. Karrot starts a thread on the first turn and reuses it for later turns in the scenario.</p>
363
+ <pre><code class="language-ts">import path from 'node:path';
364
+ import { expect, test } from '@playwright/test';
365
+ import { execute, getScenarioRunStatus } from '@huydao/karrot';
366
+
367
+ test('agent completes the basic flow', async () =&gt; {
368
+ const execution = await execute(path.resolve(__dirname, '../karrot.config.yml'), {
369
+ variables: {
370
+ TEST_ENV: process.env.TEST_ENV ?? 'qa',
371
+ PROJECT_NAME: 'Demo Agent',
372
+ APP_BASE_URL: process.env.APP_BASE_URL,
373
+ AGENT_URL: process.env.AGENT_URL,
374
+ AGENT_ID: process.env.AGENT_ID,
375
+ WS_URL: process.env.WS_URL,
376
+ WS_TOPIC: process.env.WS_TOPIC,
377
+ AUTH_TOKEN: process.env.AUTH_TOKEN,
378
+ ACCOUNT_ID: process.env.ACCOUNT_ID,
379
+ PROJECT_ID: process.env.PROJECT_ID,
380
+ },
381
+ scenario: {
382
+ file: path.resolve(__dirname, '../src/scenarios/basic.ts'),
383
+ ids: ['BASIC-CHAT-01'],
384
+ },
385
+ });
386
+
387
+ expect(getScenarioRunStatus(execution.results)).toBe('PASS');
388
+ });</code></pre>
389
+ </section>
390
+
391
+ <section id="existing-session">
392
+ <h2>Existing session recall</h2>
393
+ <p>Use `runScenario()` when Playwright needs to create or recall an existing agent session itself. Pass the known thread ID as `initialThreadId`.</p>
394
+ <div class="callout">
395
+ <strong>Concurrency rule</strong>
396
+ <p>Keep `concurrency: 1` when using `initialThreadId`. A single existing session should not be shared by parallel scenarios.</p>
397
+ </div>
398
+ <pre><code class="language-ts">import { expect, test } from '@playwright/test';
399
+ import {
400
+ AiScenarioSet,
401
+ createRunArtifactDirectory,
402
+ runScenario,
403
+ type AiScenario,
404
+ type BaseAiScenarioContext,
405
+ } from '@huydao/karrot';
406
+ import { runAgUiMessage } from '@huydao/karrot/adapters/ag-ui';
407
+
408
+ type SessionContext = BaseAiScenarioContext & {
409
+ projectId: string;
410
+ };
411
+
412
+ const scenarios: AiScenario&lt;SessionContext&gt;[] = [
413
+ {
414
+ id: 'RESUME-SESSION-01',
415
+ name: 'Continue an existing assistant session',
416
+ turns: [
417
+ {
418
+ label: 'Recall current context',
419
+ message: ({ projectId }) =&gt;
420
+ `Continue in project ${projectId}. Summarize what we have already discussed.`,
421
+ assertions: [
422
+ {
423
+ aiAssert: {
424
+ hasContent: 'The answer uses the existing conversation context.',
425
+ },
426
+ },
427
+ ],
428
+ eval: ['relevance', 'helpfulness'],
429
+ },
430
+ ],
431
+ },
432
+ ];
433
+
434
+ test('continues an existing agent session', async () =&gt; {
435
+ const initialThreadId = process.env.KARROT_THREAD_ID;
436
+
437
+ if (!initialThreadId) {
438
+ throw new Error('KARROT_THREAD_ID is required to resume an existing session.');
439
+ }
440
+
441
+ const outputDirectory = await createRunArtifactDirectory('./artifacts/karrot-playwright');
442
+ const env = {
443
+ ...process.env,
444
+ WS_STOMP_HEADERS: `Authorization:${process.env.AUTH_TOKEN ?? ''}`,
445
+ WS_HEADERS: `Origin:${process.env.APP_BASE_URL ?? ''},User-Agent:Mozilla/5.0`,
446
+ };
447
+
448
+ const scenarioSet = new AiScenarioSet(scenarios);
449
+ const [result] = await runScenario(scenarioSet.select(['RESUME-SESSION-01']), {
450
+ context: {
451
+ projectId: process.env.PROJECT_ID ?? '',
452
+ },
453
+ env,
454
+ outputDirectory,
455
+ initialThreadId,
456
+ concurrency: 1,
457
+ messageRunner: async ({ message, outputDirectory, threadId, processTimeoutMs }) =&gt;
458
+ await runAgUiMessage({
459
+ message,
460
+ env,
461
+ outputDirectory,
462
+ threadId,
463
+ processTimeoutMs,
464
+ }),
465
+ });
466
+
467
+ expect(result.status).toBe('PASS');
468
+ expect(result.threadId).toBe(initialThreadId);
469
+ });</code></pre>
470
+ <h3 id="init-then-recall">Init then recall</h3>
471
+ <p>To start a new session and continue it later in the same script, run once without `initialThreadId`, then pass the returned `threadId` into the next call.</p>
472
+ <pre><code class="language-ts">const [createdSession] = await runScenario(initScenarioSet.select(['INIT-SESSION-01']), {
473
+ context,
474
+ env,
475
+ outputDirectory,
476
+ concurrency: 1,
477
+ messageRunner,
478
+ });
479
+
480
+ const threadId = createdSession.threadId;
481
+
482
+ if (!threadId) {
483
+ throw new Error('Initial scenario did not return a threadId.');
484
+ }
485
+
486
+ const [continuedSession] = await runScenario(recallScenarioSet.select(['RECALL-SESSION-01']), {
487
+ context,
488
+ env,
489
+ outputDirectory,
490
+ initialThreadId: threadId,
491
+ concurrency: 1,
492
+ messageRunner,
493
+ });</code></pre>
494
+ </section>
495
+ </main>
496
+
497
+ <aside class="docs-toc" aria-label="On this page">
498
+ <div class="toc-title">On this page</div>
499
+ <nav data-toc></nav>
500
+ </aside>
501
+ </div>
502
+
503
+ <script src="../assets/app.js"></script>
504
+ </body>
505
+ </html>
@@ -0,0 +1,162 @@
1
+ <!doctype html>
2
+ <html lang="en">
3
+ <head>
4
+ <meta charset="utf-8">
5
+ <meta name="viewport" content="width=device-width, initial-scale=1">
6
+ <title>Karrot - AI scenario testing for multi-turn agents</title>
7
+ <meta name="description" content="Karrot is a reusable AI scenario runner for testing multi-turn agents with assertions, evals, and reports.">
8
+ <link rel="icon" href="assets/karrot-mark.svg" type="image/svg+xml">
9
+ <link rel="stylesheet" href="assets/styles.css">
10
+ </head>
11
+ <body>
12
+ <header class="site-header">
13
+ <div class="site-header__inner">
14
+ <a class="brand" href="index.html" aria-label="Karrot home">
15
+ <img src="assets/karrot-mark.svg" alt="">
16
+ <span>Karrot <small>AI test runner</small></span>
17
+ </a>
18
+ <nav class="top-nav" aria-label="Primary navigation">
19
+ <a href="docs/index.html">Docs</a>
20
+ <a href="docs/index.html#quick-start">Quick start</a>
21
+ <a href="https://www.npmjs.com/package/@huydao/karrot">npm</a>
22
+ </nav>
23
+ </div>
24
+ </header>
25
+
26
+ <main>
27
+ <section class="hero">
28
+ <div class="hero-grid">
29
+ <div>
30
+ <div class="eyebrow">Reusable assistant testing</div>
31
+ <h1>AI scenario testing for multi-turn agents</h1>
32
+ <p class="lead">
33
+ Karrot runs agent conversations as repeatable scenarios, checks required behavior with assertions,
34
+ scores response quality with evals, and writes reports that humans and CI can read.
35
+ </p>
36
+ <div class="hero-actions">
37
+ <a class="button button--primary" href="docs/index.html#quick-start">Get started</a>
38
+ <a class="button" href="docs/index.html">Read docs</a>
39
+ </div>
40
+ </div>
41
+ <div class="hero-panel" aria-label="Karrot command preview">
42
+ <div class="panel-bar" aria-hidden="true">
43
+ <span class="dot"></span>
44
+ <span class="dot"></span>
45
+ <span class="dot"></span>
46
+ </div>
47
+ <pre class="terminal"><code><span class="muted">$</span> npm install @huydao/karrot
48
+ <span class="muted">$</span> TEST_ENV=qa npx tsx ./src/run-karrot.ts
49
+
50
+ Status: PASS
51
+ Artifacts: artifacts/karrot/2026-05-18T09-12-44
52
+ JSON report: scenario-run-report.json
53
+ HTML report: scenario-run-report.html</code></pre>
54
+ </div>
55
+ </div>
56
+ </section>
57
+
58
+ <section class="section">
59
+ <div class="section-header">
60
+ <div>
61
+ <div class="eyebrow">What it handles</div>
62
+ <h2>Purpose-built for agent regression</h2>
63
+ </div>
64
+ <p>Karrot owns the AI test layer. Your product code owns login, runtime discovery, and product-specific context.</p>
65
+ </div>
66
+ <div class="feature-grid">
67
+ <article class="feature-card">
68
+ <span>1</span>
69
+ <h3>Multi-turn scenarios</h3>
70
+ <p>Model user turns as ordered conversations and keep the same agent thread across each scenario.</p>
71
+ </article>
72
+ <article class="feature-card">
73
+ <span>2</span>
74
+ <h3>AG-UI transports</h3>
75
+ <p>Run agents over AG-UI WebSocket/STOMP or HTTP POST patterns without baking product auth into Karrot.</p>
76
+ </article>
77
+ <article class="feature-card">
78
+ <span>3</span>
79
+ <h3>Assertions</h3>
80
+ <p>Use deterministic checks for text/tool calls and semantic AI assertions for behavior that is hard to match literally.</p>
81
+ </article>
82
+ <article class="feature-card">
83
+ <span>4</span>
84
+ <h3>Eval dimensions</h3>
85
+ <p>Score response quality with built-in or project-specific dimensions such as correctness, coverage, and helpfulness.</p>
86
+ </article>
87
+ <article class="feature-card">
88
+ <span>5</span>
89
+ <h3>Reports</h3>
90
+ <p>Produce JSON and HTML reports with turn output, assertion results, eval scores, artifacts, and thread IDs.</p>
91
+ </article>
92
+ <article class="feature-card">
93
+ <span>6</span>
94
+ <h3>Playwright friendly</h3>
95
+ <p>Call Karrot from Playwright tests, attach artifacts, and resume an existing agent session with `initialThreadId`.</p>
96
+ </article>
97
+ </div>
98
+ </section>
99
+
100
+ <section class="section" id="quick-start">
101
+ <div class="section-header">
102
+ <div>
103
+ <div class="eyebrow">Quick start</div>
104
+ <h2>Three files to a runnable scenario</h2>
105
+ </div>
106
+ <p>Start with a YAML config, a small runner script, and a scenario module that exports `scenarioSet`.</p>
107
+ </div>
108
+ <div class="quick-grid">
109
+ <article class="quick-card">
110
+ <h3>Install</h3>
111
+ <p>Add Karrot to the project that already knows how to authenticate and discover your agent runtime.</p>
112
+ <pre><code class="language-bash">npm install @huydao/karrot
113
+ npm install -D tsx typescript
114
+ export OPENAI_API_KEY=&lt;your-openai-api-key&gt;</code></pre>
115
+ </article>
116
+ <article class="quick-card">
117
+ <h3>Run</h3>
118
+ <p>Keep runtime values in environment variables or CI secrets, then execute the scenario file.</p>
119
+ <pre><code class="language-bash">TEST_ENV=qa npx tsx ./src/run-karrot.ts</code></pre>
120
+ </article>
121
+ </div>
122
+ </section>
123
+
124
+ <section class="section">
125
+ <div class="section-header">
126
+ <div>
127
+ <div class="eyebrow">Minimal example</div>
128
+ <h2>Config plus scenario plus runner</h2>
129
+ </div>
130
+ <p>The docs page expands each part, including Playwright integration and existing-session recall.</p>
131
+ </div>
132
+ <pre><code class="language-ts">import { execute, getScenarioRunStatus } from '@huydao/karrot';
133
+
134
+ const execution = await execute('./karrot.config.yml', {
135
+ variables: {
136
+ AGENT_URL: process.env.AGENT_URL,
137
+ AGENT_ID: process.env.AGENT_ID,
138
+ WS_URL: process.env.WS_URL,
139
+ WS_TOPIC: process.env.WS_TOPIC,
140
+ AUTH_TOKEN: process.env.AUTH_TOKEN,
141
+ },
142
+ scenario: {
143
+ file: './src/scenarios/basic.ts',
144
+ ids: ['BASIC-CHAT-01'],
145
+ },
146
+ });
147
+
148
+ if (getScenarioRunStatus(execution.results) === 'FAIL') {
149
+ process.exitCode = 1;
150
+ }</code></pre>
151
+ </section>
152
+ </main>
153
+
154
+ <footer class="footer">
155
+ <div class="footer__inner">
156
+ <span>Karrot 0.1.6 - reusable AI scenario execution, assertion, evaluation, and reporting.</span>
157
+ <span><a href="docs/index.html">Docs</a> · <a href="https://www.npmjs.com/package/@huydao/karrot">npm package</a></span>
158
+ </div>
159
+ </footer>
160
+ <script src="assets/app.js"></script>
161
+ </body>
162
+ </html>