@huydao/karrot 0.1.6 → 0.1.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -6,6 +6,7 @@ export type RunAgUiPostMessageOptions = {
6
6
  processTimeoutMs?: number;
7
7
  injectMessage?: boolean;
8
8
  injectRunMetadata?: boolean;
9
+ textEvents?: AgUiPostTextEventConfig[];
9
10
  run?: {
10
11
  url: string;
11
12
  headers?: Record<string, string>;
@@ -52,4 +53,13 @@ export type RunAgUiPostMessageOptions = {
52
53
  timeoutMs?: number;
53
54
  };
54
55
  };
56
+ export type AgUiPostTextEventConfig = {
57
+ type: string;
58
+ name?: string;
59
+ role?: string;
60
+ textPath?: string;
61
+ contentPath?: string;
62
+ deltaPath?: string;
63
+ mode?: 'content' | 'delta';
64
+ };
55
65
  export declare function runAgUiPostMessage(options: RunAgUiPostMessageOptions): Promise<MessageRunResult>;
@@ -56,6 +56,13 @@ function getStringAtPath(payload, pathExpression) {
56
56
  const rawValue = getValueByPath(payload, pathExpression);
57
57
  return typeof rawValue === 'string' ? rawValue.trim() : '';
58
58
  }
59
+ function getRawStringAtPath(payload, pathExpression) {
60
+ if (!pathExpression) {
61
+ return '';
62
+ }
63
+ const rawValue = getValueByPath(payload, pathExpression);
64
+ return typeof rawValue === 'string' ? rawValue : '';
65
+ }
59
66
  function getArrayAtPath(payload, pathExpression) {
60
67
  const rawValue = getValueByPath(payload, pathExpression);
61
68
  return Array.isArray(rawValue) ? rawValue : [];
@@ -258,6 +265,100 @@ function parseSseBlock(block) {
258
265
  data: dataLines.join('\n'),
259
266
  };
260
267
  }
268
+ function normalizeEventTimestamp(value) {
269
+ if (typeof value === 'number' && Number.isFinite(value)) {
270
+ return value > 10_000_000_000 ? value : value * 1000;
271
+ }
272
+ if (typeof value === 'string' && value.trim()) {
273
+ const numeric = Number(value);
274
+ if (Number.isFinite(numeric)) {
275
+ return numeric > 10_000_000_000 ? numeric : numeric * 1000;
276
+ }
277
+ const parsed = Date.parse(value);
278
+ return Number.isFinite(parsed) ? parsed : undefined;
279
+ }
280
+ return undefined;
281
+ }
282
+ function roundSeconds(startTimeMs, endTimeMs) {
283
+ if (typeof startTimeMs !== 'number' || typeof endTimeMs !== 'number' || endTimeMs < startTimeMs) {
284
+ return undefined;
285
+ }
286
+ return Number(((endTimeMs - startTimeMs) / 1000).toFixed(1));
287
+ }
288
+ function isStandardAssistantTextEvent(event) {
289
+ return (event.type === 'TEXT_MESSAGE_CONTENT' ||
290
+ event.type === 'TEXT_MESSAGE_CHUNK');
291
+ }
292
+ function isToolStartEvent(event) {
293
+ return (event.type === 'TOOL_CALL_START' ||
294
+ (event.type === 'CUSTOM' && event.name === 'super-testing-agent.tool_started'));
295
+ }
296
+ function matchesConfiguredTextEvent(event, config) {
297
+ if (event.type !== config.type) {
298
+ return false;
299
+ }
300
+ if (config.name != null && event.name !== config.name) {
301
+ return false;
302
+ }
303
+ if (config.role != null && event.role !== config.role) {
304
+ return false;
305
+ }
306
+ return true;
307
+ }
308
+ function getConfiguredTextEventValue(event, textEvents) {
309
+ for (const config of textEvents ?? []) {
310
+ if (!matchesConfiguredTextEvent(event, config)) {
311
+ continue;
312
+ }
313
+ const content = getRawStringAtPath(event, config.contentPath);
314
+ if (content.trim()) {
315
+ return { content: content.trim() };
316
+ }
317
+ const delta = getRawStringAtPath(event, config.deltaPath);
318
+ if (delta) {
319
+ return { delta };
320
+ }
321
+ const text = getRawStringAtPath(event, config.textPath);
322
+ if (!text) {
323
+ return {};
324
+ }
325
+ return config.mode === 'content' ? { content: text.trim() } : { delta: text };
326
+ }
327
+ return {};
328
+ }
329
+ function isAssistantTextEvent(event, textEvents) {
330
+ return (isStandardAssistantTextEvent(event) ||
331
+ Boolean((textEvents ?? []).find((config) => matchesConfiguredTextEvent(event, config))));
332
+ }
333
+ function getAssistantContent(event, textEvents) {
334
+ const configured = getConfiguredTextEventValue(event, textEvents);
335
+ if (configured.content) {
336
+ return configured.content;
337
+ }
338
+ if (typeof event.content === 'string' && event.content.trim()) {
339
+ return event.content.trim();
340
+ }
341
+ if (typeof event.value?.content === 'string' && event.value.content.trim()) {
342
+ return event.value.content.trim();
343
+ }
344
+ if (typeof event.value?.output === 'string' && event.value.output.trim()) {
345
+ return event.value.output.trim();
346
+ }
347
+ return undefined;
348
+ }
349
+ function getAssistantDelta(event, textEvents) {
350
+ const configured = getConfiguredTextEventValue(event, textEvents);
351
+ if (configured.delta) {
352
+ return configured.delta;
353
+ }
354
+ if (typeof event.delta === 'string' && event.delta) {
355
+ return event.delta;
356
+ }
357
+ if (typeof event.value?.delta === 'string' && event.value.delta) {
358
+ return event.value.delta;
359
+ }
360
+ return undefined;
361
+ }
261
362
  function createConnectCollector(options) {
262
363
  const assistantFragments = [];
263
364
  const toolCalls = [];
@@ -266,6 +367,11 @@ function createConnectCollector(options) {
266
367
  let started = false;
267
368
  let finished = false;
268
369
  let sawAnyEvent = false;
370
+ let runStartedAt;
371
+ let firstTextAt;
372
+ let firstToolAt;
373
+ let runFinishedAt;
374
+ const eventTime = (event) => normalizeEventTimestamp(event.timestamp) ?? Date.now();
269
375
  const consume = (sseEvent) => {
270
376
  if (!sseEvent.data) {
271
377
  return false;
@@ -286,6 +392,7 @@ function createConnectCollector(options) {
286
392
  }
287
393
  if (parsed.type === 'RUN_STARTED' && parsed.runId === options.targetRunId) {
288
394
  started = true;
395
+ runStartedAt = eventTime(parsed);
289
396
  return false;
290
397
  }
291
398
  if (!started) {
@@ -300,19 +407,26 @@ function createConnectCollector(options) {
300
407
  });
301
408
  }
302
409
  const isAssistantMessage = parsed.role === 'assistant' || typeof parsed.role !== 'string';
303
- if ((parsed.type === 'TEXT_MESSAGE_CHUNK' || parsed.type === 'TEXT_MESSAGE_CONTENT') && isAssistantMessage) {
304
- if (typeof parsed.content === 'string' && parsed.content.trim()) {
305
- latestAssistantContent = parsed.content.trim();
410
+ if (isAssistantTextEvent(parsed, options.textEvents) && isAssistantMessage) {
411
+ firstTextAt ??= eventTime(parsed);
412
+ const content = getAssistantContent(parsed, options.textEvents);
413
+ const delta = getAssistantDelta(parsed, options.textEvents);
414
+ if (content) {
415
+ latestAssistantContent = content;
306
416
  }
307
- else if (typeof parsed.delta === 'string' && parsed.delta) {
308
- assistantFragments.push(parsed.delta);
417
+ else if (delta) {
418
+ assistantFragments.push(delta);
309
419
  }
310
420
  }
311
- if (parsed.type === 'TOOL_CALL_START' && typeof parsed.toolCallName === 'string' && parsed.toolCallName.trim()) {
312
- toolCalls.push(parsed.toolCallName.trim());
421
+ if (isToolStartEvent(parsed)) {
422
+ firstToolAt ??= eventTime(parsed);
423
+ if (typeof parsed.toolCallName === 'string' && parsed.toolCallName.trim()) {
424
+ toolCalls.push(parsed.toolCallName.trim());
425
+ }
313
426
  }
314
427
  if (parsed.type === 'RUN_FINISHED' && (!parsed.runId || parsed.runId === options.targetRunId)) {
315
428
  finished = true;
429
+ runFinishedAt = eventTime(parsed);
316
430
  return true;
317
431
  }
318
432
  return false;
@@ -320,12 +434,19 @@ function createConnectCollector(options) {
320
434
  return {
321
435
  consume,
322
436
  getResult() {
437
+ const turnCompleteSeconds = roundSeconds(runStartedAt, runFinishedAt);
323
438
  return {
324
439
  output: latestAssistantContent?.trim() || assistantFragments.join('').trim(),
325
440
  toolCalls: [...toolCalls],
326
441
  threadId: resolvedThreadId,
327
442
  finished,
328
443
  sawAnyEvent,
444
+ metrics: {
445
+ ttfTextSeconds: roundSeconds(runStartedAt, firstTextAt),
446
+ ttfToolSeconds: roundSeconds(runStartedAt, firstToolAt),
447
+ turnCompleteSeconds,
448
+ totalSeconds: turnCompleteSeconds,
449
+ },
329
450
  };
330
451
  },
331
452
  };
@@ -337,6 +458,7 @@ async function postAndCaptureResponse(options) {
337
458
  : undefined;
338
459
  try {
339
460
  await promises_1.default.writeFile(options.outputPath, '', 'utf8');
461
+ const startedAtMs = Date.now();
340
462
  const response = await fetch(options.url, {
341
463
  method: 'POST',
342
464
  headers: {
@@ -369,6 +491,8 @@ async function postAndCaptureResponse(options) {
369
491
  return {
370
492
  status: response.status,
371
493
  rawContent,
494
+ startedAtMs,
495
+ finishedAtMs: Date.now(),
372
496
  };
373
497
  }
374
498
  catch (error) {
@@ -419,17 +543,19 @@ function startConnectStream(options) {
419
543
  const collector = createConnectCollector({
420
544
  targetRunId: options.targetRunId,
421
545
  fallbackThreadId: options.fallbackThreadId,
546
+ textEvents: options.textEvents,
422
547
  });
423
548
  if (!response.body) {
424
549
  const rawContent = await response.text();
425
550
  await promises_1.default.writeFile(options.outputPath, rawContent, 'utf8');
426
- const parsed = extractResultFromSse(rawContent, options.fallbackThreadId);
551
+ const parsed = extractResultFromSse(rawContent, options.fallbackThreadId, {}, options.textEvents);
427
552
  return {
428
553
  output: parsed.output,
429
554
  toolCalls: parsed.toolCalls,
430
555
  threadId: parsed.threadId,
431
556
  finished: parsed.finished,
432
557
  sawAnyEvent: parsed.sawAnyEvent,
558
+ metrics: parsed.metrics,
433
559
  };
434
560
  }
435
561
  const reader = response.body.getReader();
@@ -495,13 +621,17 @@ function startConnectStream(options) {
495
621
  result,
496
622
  };
497
623
  }
498
- function extractResultFromSse(rawContent, fallbackThreadId) {
624
+ function extractResultFromSse(rawContent, fallbackThreadId, fallbackMetrics = {}, textEvents) {
499
625
  const fragments = [];
500
626
  let latestContent;
501
627
  let resolvedThreadId = fallbackThreadId;
502
628
  const toolCalls = [];
503
629
  let finished = false;
504
630
  let sawAnyEvent = false;
631
+ let runStartedAt;
632
+ let firstTextAt;
633
+ let firstToolAt;
634
+ let runFinishedAt;
505
635
  for (const sseEvent of parseSseEvents(rawContent)) {
506
636
  if (!sseEvent.data) {
507
637
  continue;
@@ -515,19 +645,29 @@ function extractResultFromSse(rawContent, fallbackThreadId) {
515
645
  else if (typeof parsed.conversationId === 'string' && parsed.conversationId.trim()) {
516
646
  resolvedThreadId = parsed.conversationId.trim();
517
647
  }
518
- if (parsed.type === 'TEXT_MESSAGE_CONTENT') {
519
- if (typeof parsed.content === 'string' && parsed.content.trim()) {
520
- latestContent = parsed.content.trim();
648
+ if (parsed.type === 'RUN_STARTED' && typeof runStartedAt !== 'number') {
649
+ runStartedAt = normalizeEventTimestamp(parsed.timestamp);
650
+ }
651
+ if (isAssistantTextEvent(parsed, textEvents)) {
652
+ firstTextAt ??= normalizeEventTimestamp(parsed.timestamp);
653
+ const content = getAssistantContent(parsed, textEvents);
654
+ const delta = getAssistantDelta(parsed, textEvents);
655
+ if (content) {
656
+ latestContent = content;
521
657
  }
522
- else if (typeof parsed.delta === 'string' && parsed.delta) {
523
- fragments.push(parsed.delta);
658
+ else if (delta) {
659
+ fragments.push(delta);
524
660
  }
525
661
  }
526
- if (parsed.type === 'TOOL_CALL_START' && typeof parsed.toolCallName === 'string' && parsed.toolCallName.trim()) {
527
- toolCalls.push(parsed.toolCallName.trim());
662
+ if (isToolStartEvent(parsed)) {
663
+ firstToolAt ??= normalizeEventTimestamp(parsed.timestamp);
664
+ if (typeof parsed.toolCallName === 'string' && parsed.toolCallName.trim()) {
665
+ toolCalls.push(parsed.toolCallName.trim());
666
+ }
528
667
  }
529
668
  if (parsed.type === 'RUN_FINISHED') {
530
669
  finished = true;
670
+ runFinishedAt = normalizeEventTimestamp(parsed.timestamp);
531
671
  }
532
672
  if (parsed.type === 'RUN_ERROR') {
533
673
  throw new run_result_1.MessageRunError(parsed.error?.trim() || 'Agent run failed.', {
@@ -544,12 +684,21 @@ function extractResultFromSse(rawContent, fallbackThreadId) {
544
684
  }
545
685
  }
546
686
  }
687
+ const effectiveStartedAt = runStartedAt ?? fallbackMetrics.startTimeMs;
688
+ const effectiveFinishedAt = runFinishedAt ?? fallbackMetrics.finishedTimeMs;
689
+ const turnCompleteSeconds = roundSeconds(effectiveStartedAt, effectiveFinishedAt);
547
690
  return {
548
691
  output: latestContent?.trim() || fragments.join('').trim(),
549
692
  toolCalls,
550
693
  threadId: resolvedThreadId,
551
694
  finished,
552
695
  sawAnyEvent,
696
+ metrics: {
697
+ ttfTextSeconds: roundSeconds(runStartedAt, firstTextAt),
698
+ ttfToolSeconds: roundSeconds(runStartedAt, firstToolAt),
699
+ turnCompleteSeconds,
700
+ totalSeconds: turnCompleteSeconds,
701
+ },
553
702
  };
554
703
  }
555
704
  async function runAgUiPostMessage(options) {
@@ -591,6 +740,7 @@ async function runAgUiPostMessage(options) {
591
740
  targetRunId: runId,
592
741
  fallbackThreadId: resolvedThreadId,
593
742
  processTimeoutMs: options.connect.processTimeoutMs ?? options.processTimeoutMs,
743
+ textEvents: options.textEvents,
594
744
  });
595
745
  await connectStream.ready;
596
746
  const runResponse = await postAndCaptureResponse({
@@ -625,7 +775,7 @@ async function runAgUiPostMessage(options) {
625
775
  note: `Run log: ${node_path_1.default.basename(runOutputPath)}`,
626
776
  toolCallCount: connected.toolCalls.length,
627
777
  toolCalls: connected.toolCalls,
628
- metrics: {},
778
+ metrics: connected.metrics,
629
779
  };
630
780
  }
631
781
  const runResponse = await postAndCaptureResponse({
@@ -642,7 +792,10 @@ async function runAgUiPostMessage(options) {
642
792
  output: runResponse.rawContent,
643
793
  });
644
794
  }
645
- const parsed = extractResultFromSse(runResponse.rawContent, resolvedThreadId);
795
+ const parsed = extractResultFromSse(runResponse.rawContent, resolvedThreadId, {
796
+ startTimeMs: runResponse.startedAtMs,
797
+ finishedTimeMs: runResponse.finishedAtMs,
798
+ }, options.textEvents);
646
799
  if (options.observe) {
647
800
  await promises_1.default.writeFile(observePath, '', 'utf8');
648
801
  const observed = await waitForObservedCompletion({
@@ -659,7 +812,7 @@ async function runAgUiPostMessage(options) {
659
812
  note: `Observe status: ${observed.status}. Observe log: ${node_path_1.default.basename(observePath)}`,
660
813
  toolCallCount: parsed.toolCalls.length,
661
814
  toolCalls: parsed.toolCalls,
662
- metrics: {},
815
+ metrics: parsed.metrics,
663
816
  };
664
817
  }
665
818
  if (options.completionCheck) {
@@ -671,7 +824,7 @@ async function runAgUiPostMessage(options) {
671
824
  note: `Completion status: ${completion.status}`,
672
825
  toolCallCount: parsed.toolCalls.length,
673
826
  toolCalls: parsed.toolCalls,
674
- metrics: {},
827
+ metrics: parsed.metrics,
675
828
  };
676
829
  }
677
830
  if (!parsed.finished && !parsed.output) {
@@ -691,7 +844,7 @@ async function runAgUiPostMessage(options) {
691
844
  outputPath,
692
845
  toolCallCount: parsed.toolCalls.length,
693
846
  toolCalls: parsed.toolCalls,
694
- metrics: {},
847
+ metrics: parsed.metrics,
695
848
  };
696
849
  }
697
850
  catch (error) {
@@ -9,8 +9,8 @@ exports.extractAppendedLog = extractAppendedLog;
9
9
  exports.runAgUiMessage = runAgUiMessage;
10
10
  const promises_1 = __importDefault(require("node:fs/promises"));
11
11
  const node_path_1 = __importDefault(require("node:path"));
12
+ const node_crypto_1 = require("node:crypto");
12
13
  const stompjs_1 = require("@stomp/stompjs");
13
- const uuid_1 = require("uuid");
14
14
  const ws_1 = __importDefault(require("ws"));
15
15
  const run_result_1 = require("../run-result");
16
16
  Object.assign(globalThis, { WebSocket: ws_1.default });
@@ -169,6 +169,7 @@ function computeMetrics(state) {
169
169
  return {
170
170
  ttfToolSeconds,
171
171
  ttfTextSeconds,
172
+ turnCompleteSeconds: totalSeconds,
172
173
  totalSeconds,
173
174
  protocolUsedKb,
174
175
  protocolTotalKb,
@@ -179,6 +180,7 @@ function writeMetricsToStdout(metrics) {
179
180
  const parts = [
180
181
  metrics.ttfToolSeconds != null ? `TTF-Tool: ${metrics.ttfToolSeconds.toFixed(1)}s` : undefined,
181
182
  metrics.ttfTextSeconds != null ? `TTF-Text: ${metrics.ttfTextSeconds.toFixed(1)}s` : undefined,
183
+ metrics.turnCompleteSeconds != null ? `Turn complete: ${metrics.turnCompleteSeconds.toFixed(1)}s` : undefined,
182
184
  metrics.totalSeconds != null ? `Total: ${metrics.totalSeconds.toFixed(1)}s` : undefined,
183
185
  metrics.protocolUsedKb != null && metrics.protocolTotalKb != null && metrics.efficiencyPercent != null
184
186
  ? `Protocol efficiency: ${metrics.protocolUsedKb.toFixed(1)}KB/${metrics.protocolTotalKb.toFixed(1)}KB (${metrics.efficiencyPercent}%)`
@@ -430,8 +432,8 @@ async function connectAndRun(options) {
430
432
  async function runAgUiMessage(options) {
431
433
  await promises_1.default.mkdir(options.outputDirectory, { recursive: true });
432
434
  const config = parseAgUiEnv(options.env);
433
- const threadId = options.threadId ?? options.threadIdFallback ?? (0, uuid_1.v7)();
434
- const runId = (0, uuid_1.v7)();
435
+ const threadId = options.threadId ?? options.threadIdFallback ?? (0, node_crypto_1.randomUUID)();
436
+ const runId = (0, node_crypto_1.randomUUID)();
435
437
  const logPath = node_path_1.default.join(options.outputDirectory, `${threadId}.jsonl`);
436
438
  const previousLogContent = await readJsonl(logPath);
437
439
  const state = await connectAndRun({
@@ -48,6 +48,7 @@ function createAgUiPostRunner(config) {
48
48
  processTimeoutMs: processTimeoutMs ?? transport.processTimeoutMs,
49
49
  injectMessage: transport.injectMessage,
50
50
  injectRunMetadata: transport.injectRunMetadata,
51
+ textEvents: transport.textEvents,
51
52
  run: transport.run ?? transport.request,
52
53
  connect: transport.connect,
53
54
  observe: transport.observe,
@@ -135,7 +135,8 @@ async function runSingleScenario(scenario, context, env, outputDirectory, deadli
135
135
  if (assertionFailureNote) {
136
136
  result.status = 'FAIL';
137
137
  result.note = [result.note, assertionFailureNote].filter(Boolean).join(' ') || undefined;
138
- if (!scenario.continueOnAssertionFailure) {
138
+ const shouldContinueOnAssertionFailure = turn.continueOnAssertionFailure ?? scenario.continueOnAssertionFailure ?? false;
139
+ if (!shouldContinueOnAssertionFailure) {
139
140
  throw new Error(assertionFailureNote);
140
141
  }
141
142
  }
@@ -1,7 +1,10 @@
1
1
  export type TimingMetrics = {
2
2
  ttfToolSeconds?: number;
3
3
  ttfTextSeconds?: number;
4
+ turnCompleteSeconds?: number;
4
5
  totalSeconds?: number;
6
+ averageTtfTextSeconds?: number;
7
+ averageTurnCompleteSeconds?: number;
5
8
  protocolUsedKb?: number;
6
9
  protocolTotalKb?: number;
7
10
  efficiencyPercent?: number;
@@ -26,6 +26,16 @@ function sumNumbers(values) {
26
26
  }
27
27
  return Number(definedValues.reduce((total, value) => total + value, 0).toFixed(1));
28
28
  }
29
+ function averageNumbers(values) {
30
+ const definedValues = values.filter((value) => typeof value === 'number');
31
+ if (definedValues.length === 0) {
32
+ return undefined;
33
+ }
34
+ return Number((definedValues.reduce((total, value) => total + value, 0) / definedValues.length).toFixed(1));
35
+ }
36
+ function turnCompleteSeconds(metrics) {
37
+ return metrics.turnCompleteSeconds ?? metrics.totalSeconds;
38
+ }
29
39
  function summarizeScenarioMetrics(turns) {
30
40
  const protocolUsedKb = sumNumbers(turns.map((turn) => turn.metrics.protocolUsedKb));
31
41
  const protocolTotalKb = sumNumbers(turns.map((turn) => turn.metrics.protocolTotalKb));
@@ -35,7 +45,10 @@ function summarizeScenarioMetrics(turns) {
35
45
  return {
36
46
  ttfToolSeconds: sumNumbers(turns.map((turn) => turn.metrics.ttfToolSeconds)),
37
47
  ttfTextSeconds: sumNumbers(turns.map((turn) => turn.metrics.ttfTextSeconds)),
48
+ turnCompleteSeconds: sumNumbers(turns.map((turn) => turnCompleteSeconds(turn.metrics))),
38
49
  totalSeconds: sumNumbers(turns.map((turn) => turn.metrics.totalSeconds)),
50
+ averageTtfTextSeconds: averageNumbers(turns.map((turn) => turn.metrics.ttfTextSeconds)),
51
+ averageTurnCompleteSeconds: averageNumbers(turns.map((turn) => turnCompleteSeconds(turn.metrics))),
39
52
  protocolUsedKb,
40
53
  protocolTotalKb,
41
54
  efficiencyPercent,
@@ -112,6 +125,8 @@ function buildScenarioRunSummary(results) {
112
125
  failedAssertions: assertions.filter((assertion) => !assertion.passed).length,
113
126
  totalToolCalls: turns.reduce((total, turn) => total + turn.toolCallCount, 0),
114
127
  totalEvaluations: evaluations.length,
128
+ averageTtfTextSeconds: averageNumbers(turns.map((turn) => turn.metrics.ttfTextSeconds)),
129
+ averageTurnCompleteSeconds: averageNumbers(turns.map((turn) => turnCompleteSeconds(turn.metrics))),
115
130
  averageScoresByDimension,
116
131
  requestedEvalDimensions,
117
132
  };
@@ -155,7 +170,10 @@ function renderMetrics(metrics) {
155
170
  return [
156
171
  `TTF Tool: ${formatSeconds(metrics.ttfToolSeconds)}`,
157
172
  `TTF Text: ${formatSeconds(metrics.ttfTextSeconds)}`,
173
+ `Turn Complete: ${formatSeconds(turnCompleteSeconds(metrics))}`,
158
174
  `Total: ${formatSeconds(metrics.totalSeconds)}`,
175
+ `Avg TTF Text: ${formatSeconds(metrics.averageTtfTextSeconds)}`,
176
+ `Avg Complete: ${formatSeconds(metrics.averageTurnCompleteSeconds)}`,
159
177
  `Efficiency: ${formatPercent(metrics.efficiencyPercent)}`,
160
178
  ].join(' | ');
161
179
  }
@@ -356,6 +374,8 @@ function buildScenarioRunHtml(payload) {
356
374
  '<section class="summary-grid">',
357
375
  `<article class="summary-card"><span class="label">Scenarios</span><div class="value">${payload.summary.totalScenarios}</div><div class="sub">${payload.summary.passedScenarios} pass / ${payload.summary.failedScenarios} fail / ${payload.summary.skippedScenarios} skip</div></article>`,
358
376
  `<article class="summary-card"><span class="label">Turns</span><div class="value">${payload.summary.totalTurns}</div><div class="sub">${payload.summary.totalToolCalls} tool calls total</div></article>`,
377
+ `<article class="summary-card"><span class="label">Avg TTF Text</span><div class="value">${formatSeconds(payload.summary.averageTtfTextSeconds)}</div><div class="sub">First assistant text per turn</div></article>`,
378
+ `<article class="summary-card"><span class="label">Avg Turn Complete</span><div class="value">${formatSeconds(payload.summary.averageTurnCompleteSeconds)}</div><div class="sub">Run started to run finished</div></article>`,
359
379
  `<article class="summary-card"><span class="label">Assertions</span><div class="value">${payload.summary.totalAssertions}</div><div class="sub">${payload.summary.passedAssertions} pass / ${payload.summary.failedAssertions} fail</div></article>`,
360
380
  `<article class="summary-card"><span class="label">Evaluations</span><div class="value">${payload.summary.totalEvaluations}</div><div class="sub">LLM-scored dimensions</div></article>`,
361
381
  '</section>',
@@ -44,6 +44,7 @@ export type AiTurn<TContext extends BaseAiScenarioContext = BaseAiScenarioContex
44
44
  message: AiTurnMessage<TContext>;
45
45
  idleTimeoutMs?: number;
46
46
  processTimeoutMs?: number;
47
+ continueOnAssertionFailure?: boolean;
47
48
  assertions?: AiTurnAssertion[];
48
49
  eval?: AiTurnEvalDefinition[];
49
50
  onComplete?: (args: AiTurnCompletionArgs<TContext>) => void | Promise<void>;
@@ -22,6 +22,15 @@ export type KarrotConfig = {
22
22
  type: 'ag-ui-post';
23
23
  injectMessage?: boolean;
24
24
  injectRunMetadata?: boolean;
25
+ textEvents?: Array<{
26
+ type: string;
27
+ name?: string;
28
+ role?: string;
29
+ textPath?: string;
30
+ contentPath?: string;
31
+ deltaPath?: string;
32
+ mode?: 'content' | 'delta';
33
+ }>;
25
34
  run?: {
26
35
  url: string;
27
36
  headers?: Record<string, string>;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@huydao/karrot",
3
- "version": "0.1.6",
3
+ "version": "0.1.8",
4
4
  "description": "Reusable AI scenario execution, assertion, evaluation, and reporting toolkit",
5
5
  "license": "ISC",
6
6
  "type": "commonjs",
@@ -126,12 +126,15 @@
126
126
  },
127
127
  "files": [
128
128
  "dist",
129
+ "site",
129
130
  "README.md",
130
131
  "GUIDE.md"
131
132
  ],
132
133
  "scripts": {
133
134
  "build": "rm -rf dist && tsc -p tsconfig.json && mkdir -p dist/prompts && cp prompts/*.md dist/prompts/",
134
- "prepack": "npm run build"
135
+ "prepack": "npm run build",
136
+ "site:serve": "node site/serve.js",
137
+ "site:check": "node site/check.js"
135
138
  },
136
139
  "dependencies": {
137
140
  "@stomp/stompjs": "^7.3.0",