veryfront 0.1.522 → 0.1.524

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (33) hide show
  1. package/esm/deno.d.ts +0 -5
  2. package/esm/deno.js +1 -13
  3. package/esm/src/agent/testing/durable-run-canaries/index.d.ts +2 -0
  4. package/esm/src/agent/testing/durable-run-canaries/index.d.ts.map +1 -0
  5. package/esm/src/agent/testing/durable-run-canaries/index.js +1 -0
  6. package/esm/src/agent/testing/durable-run-canaries/runner.d.ts +102 -0
  7. package/esm/src/agent/testing/durable-run-canaries/runner.d.ts.map +1 -0
  8. package/esm/src/agent/testing/durable-run-canaries/runner.js +372 -0
  9. package/esm/src/agent/testing/index.d.ts +2 -1
  10. package/esm/src/agent/testing/index.d.ts.map +1 -1
  11. package/esm/src/agent/testing/index.js +2 -1
  12. package/esm/src/agent/testing/live-evals/index.d.ts +2 -1
  13. package/esm/src/agent/testing/live-evals/index.d.ts.map +1 -1
  14. package/esm/src/agent/testing/live-evals/index.js +2 -1
  15. package/esm/src/agent/testing/live-evals/request.d.ts +16 -17
  16. package/esm/src/agent/testing/live-evals/request.d.ts.map +1 -1
  17. package/esm/src/agent/testing/live-evals/runner.d.ts +124 -0
  18. package/esm/src/agent/testing/live-evals/runner.d.ts.map +1 -0
  19. package/esm/src/agent/testing/live-evals/runner.js +391 -0
  20. package/esm/src/server/handlers/request/agent-stream.handler.d.ts.map +1 -1
  21. package/esm/src/server/handlers/request/agent-stream.handler.js +10 -1
  22. package/esm/src/utils/version-constant.d.ts +1 -1
  23. package/esm/src/utils/version-constant.js +1 -1
  24. package/package.json +1 -1
  25. package/src/deno.js +1 -13
  26. package/src/src/agent/testing/durable-run-canaries/index.ts +18 -0
  27. package/src/src/agent/testing/durable-run-canaries/runner.ts +582 -0
  28. package/src/src/agent/testing/index.ts +31 -0
  29. package/src/src/agent/testing/live-evals/index.ts +18 -1
  30. package/src/src/agent/testing/live-evals/request.ts +19 -1
  31. package/src/src/agent/testing/live-evals/runner.ts +629 -0
  32. package/src/src/server/handlers/request/agent-stream.handler.ts +18 -1
  33. package/src/src/utils/version-constant.ts +1 -1
@@ -0,0 +1,582 @@
1
+ import * as dntShim from "../../../../_dnt.shims.js";
2
+ import { defineSchema } from "../../../schemas/index.js";
3
+ import type { InferSchema } from "../../../extensions/schema/index.js";
4
+
5
+ export interface DurableRunCanaryApiConfig {
6
+ apiUrl: string;
7
+ authToken: string;
8
+ agentId: string;
9
+ projectId: string | null;
10
+ branchId?: string | null;
11
+ requestTimeoutMs: number;
12
+ fetch?: (input: string | URL | Request, init?: RequestInit) => Promise<Response>;
13
+ }
14
+
15
+ export interface DurableRunCanaryCreateRootRunInput {
16
+ conversationId: string;
17
+ runId: string;
18
+ }
19
+
20
+ export interface DurableRunCanarySendUserMessageInput {
21
+ conversationId: string;
22
+ prompt: string;
23
+ }
24
+
25
+ export interface DurableRunCanaryStartRunInput extends DurableRunCanaryCreateRootRunInput {
26
+ messageId: string;
27
+ prompt: string;
28
+ userMessageId: string;
29
+ }
30
+
31
+ export const getDurableRunCanaryMessageSchema = defineSchema((v) =>
32
+ v.object({
33
+ id: v.string(),
34
+ role: v.enum(["user", "assistant", "system", "tool"] as const),
35
+ status: v.string().optional(),
36
+ parts: v.array(v.object({ type: v.string() }).passthrough()).default([]),
37
+ }).passthrough()
38
+ );
39
+
40
+ export type DurableRunCanaryMessage = InferSchema<
41
+ ReturnType<typeof getDurableRunCanaryMessageSchema>
42
+ >;
43
+
44
+ export interface DurableRunCanaryRunSummary {
45
+ runId: string;
46
+ conversationId: string;
47
+ messageId: string;
48
+ agentId: string;
49
+ status: string;
50
+ latestEventId: number;
51
+ latestExternalEventSequence: number | null;
52
+ waitingToolCallId: string | null;
53
+ waitingToolName: string | null;
54
+ terminalErrorCode: string | null;
55
+ terminalErrorMessage: string | null;
56
+ startedAt: string | null;
57
+ finishedAt: string | null;
58
+ }
59
+
60
+ const getSnakeRunSummarySchema = defineSchema((v) =>
61
+ v.object({
62
+ run_id: v.string(),
63
+ conversation_id: v.string().uuid(),
64
+ message_id: v.string().uuid(),
65
+ agent_id: v.string(),
66
+ status: v.string(),
67
+ latest_event_id: v.number().int().nonnegative(),
68
+ latest_external_event_sequence: v.number().int().nonnegative().optional(),
69
+ waiting_tool_call_id: v.string().nullable().optional(),
70
+ waiting_tool_name: v.string().nullable().optional(),
71
+ terminal_error_code: v.string().nullable().optional(),
72
+ terminal_error_message: v.string().nullable().optional(),
73
+ started_at: v.string().nullable().optional(),
74
+ finished_at: v.string().nullable().optional(),
75
+ }).passthrough()
76
+ );
77
+
78
+ const getCamelRunSummarySchema = defineSchema((v) =>
79
+ v.object({
80
+ runId: v.string(),
81
+ conversationId: v.string().uuid(),
82
+ messageId: v.string().uuid(),
83
+ agentId: v.string(),
84
+ status: v.string(),
85
+ latestEventId: v.number().int().nonnegative(),
86
+ latestExternalEventSequence: v.number().int().nonnegative().optional(),
87
+ waitingToolCallId: v.string().nullable().optional(),
88
+ waitingToolName: v.string().nullable().optional(),
89
+ terminalErrorCode: v.string().nullable().optional(),
90
+ terminalErrorMessage: v.string().nullable().optional(),
91
+ startedAt: v.string().nullable().optional(),
92
+ finishedAt: v.string().nullable().optional(),
93
+ }).passthrough()
94
+ );
95
+
96
+ const getDurableRunCanaryMessageListSchema = defineSchema((v) =>
97
+ v.object({
98
+ data: v.array(getDurableRunCanaryMessageSchema()),
99
+ })
100
+ );
101
+
102
+ export function parseDurableRunCanaryRunSummary(value: unknown): DurableRunCanaryRunSummary {
103
+ const snake = getSnakeRunSummarySchema().safeParse(value);
104
+ if (snake.success) {
105
+ return {
106
+ runId: snake.data.run_id,
107
+ conversationId: snake.data.conversation_id,
108
+ messageId: snake.data.message_id,
109
+ agentId: snake.data.agent_id,
110
+ status: snake.data.status,
111
+ latestEventId: snake.data.latest_event_id,
112
+ latestExternalEventSequence: snake.data.latest_external_event_sequence ?? null,
113
+ waitingToolCallId: snake.data.waiting_tool_call_id ?? null,
114
+ waitingToolName: snake.data.waiting_tool_name ?? null,
115
+ terminalErrorCode: snake.data.terminal_error_code ?? null,
116
+ terminalErrorMessage: snake.data.terminal_error_message ?? null,
117
+ startedAt: snake.data.started_at ?? null,
118
+ finishedAt: snake.data.finished_at ?? null,
119
+ };
120
+ }
121
+
122
+ const camel = getCamelRunSummarySchema().parse(value);
123
+ return {
124
+ runId: camel.runId,
125
+ conversationId: camel.conversationId,
126
+ messageId: camel.messageId,
127
+ agentId: camel.agentId,
128
+ status: camel.status,
129
+ latestEventId: camel.latestEventId,
130
+ latestExternalEventSequence: camel.latestExternalEventSequence ?? null,
131
+ waitingToolCallId: camel.waitingToolCallId ?? null,
132
+ waitingToolName: camel.waitingToolName ?? null,
133
+ terminalErrorCode: camel.terminalErrorCode ?? null,
134
+ terminalErrorMessage: camel.terminalErrorMessage ?? null,
135
+ startedAt: camel.startedAt ?? null,
136
+ finishedAt: camel.finishedAt ?? null,
137
+ };
138
+ }
139
+
140
+ function createJsonHeaders(config: DurableRunCanaryApiConfig, headers?: HeadersInit): Headers {
141
+ const result = new Headers(headers);
142
+ if (!result.has("Content-Type")) {
143
+ result.set("Content-Type", "application/json");
144
+ }
145
+ result.set("Authorization", `Bearer ${config.authToken}`);
146
+ return result;
147
+ }
148
+
149
+ function createFetch(config: DurableRunCanaryApiConfig) {
150
+ return config.fetch ?? fetch;
151
+ }
152
+
153
+ function createApiUrl(config: DurableRunCanaryApiConfig, path: string): URL {
154
+ const baseHref = config.apiUrl.endsWith("/") ? config.apiUrl : `${config.apiUrl}/`;
155
+ const relativePath = path.startsWith("/") ? path.slice(1) : path;
156
+ return new URL(relativePath, baseHref);
157
+ }
158
+
159
+ function buildCreateRootRunBody(
160
+ config: DurableRunCanaryApiConfig,
161
+ input: DurableRunCanaryCreateRootRunInput,
162
+ ) {
163
+ return {
164
+ kind: "agent",
165
+ owner: {
166
+ kind: "conversation",
167
+ id: input.conversationId,
168
+ },
169
+ public_id: input.runId,
170
+ request: {
171
+ mode: "default_chat",
172
+ agent_id: config.agentId,
173
+ initial_status: "pending",
174
+ ...(config.projectId
175
+ ? {
176
+ source_target_kind: "project",
177
+ runtime_target_kind: "production",
178
+ runtime_target_branch_id: config.branchId ?? null,
179
+ }
180
+ : {}),
181
+ },
182
+ };
183
+ }
184
+
185
+ function buildStartRunBody(
186
+ config: DurableRunCanaryApiConfig,
187
+ input: DurableRunCanaryStartRunInput,
188
+ ) {
189
+ return {
190
+ kind: "agent",
191
+ owner: {
192
+ kind: "conversation",
193
+ id: input.conversationId,
194
+ },
195
+ public_id: input.runId,
196
+ request: {
197
+ mode: "default_chat",
198
+ agent_id: config.agentId,
199
+ input: {
200
+ messages: [
201
+ {
202
+ id: input.userMessageId,
203
+ role: "user",
204
+ parts: [{ type: "text", text: input.prompt }],
205
+ },
206
+ ],
207
+ context: {
208
+ conversation_id: input.conversationId,
209
+ project_id: config.projectId,
210
+ branch_id: config.branchId ?? null,
211
+ },
212
+ durable_root_run: {
213
+ run_id: input.runId,
214
+ message_id: input.messageId,
215
+ },
216
+ },
217
+ },
218
+ };
219
+ }
220
+
221
+ export interface DurableRunCanaryApiClient {
222
+ createDurableRootRun: (input: DurableRunCanaryCreateRootRunInput) => Promise<void>;
223
+ getRunSummary: (input: DurableRunCanaryCreateRootRunInput) => Promise<DurableRunCanaryRunSummary>;
224
+ listMessagesForCanary: (input: { conversationId: string }) => Promise<DurableRunCanaryMessage[]>;
225
+ sendUserMessageForCanary: (
226
+ input: DurableRunCanarySendUserMessageInput,
227
+ ) => Promise<DurableRunCanaryMessage>;
228
+ startDurableRun: (input: DurableRunCanaryStartRunInput) => Promise<void>;
229
+ }
230
+
231
+ export function createDurableRunCanaryApiClient(
232
+ config: DurableRunCanaryApiConfig,
233
+ ): DurableRunCanaryApiClient {
234
+ const request = createFetch(config);
235
+
236
+ async function apiFetch<T>(
237
+ path: string,
238
+ init: RequestInit | undefined,
239
+ parse: (value: unknown) => T,
240
+ ): Promise<T>;
241
+ async function apiFetch(path: string, init?: RequestInit): Promise<unknown>;
242
+ async function apiFetch<T>(
243
+ path: string,
244
+ init?: RequestInit,
245
+ parse?: (value: unknown) => T,
246
+ ): Promise<T | unknown> {
247
+ const response = await request(createApiUrl(config, path), {
248
+ ...init,
249
+ headers: createJsonHeaders(config, init?.headers),
250
+ signal: AbortSignal.timeout(config.requestTimeoutMs),
251
+ });
252
+
253
+ if (!response.ok) {
254
+ throw new Error(
255
+ `API ${init?.method ?? "GET"} ${path} failed: ${response.status} ${await response.text()}`,
256
+ );
257
+ }
258
+
259
+ const payload: unknown = await response.json();
260
+ return parse ? parse(payload) : payload;
261
+ }
262
+
263
+ async function sendUserMessageForCanary(input: DurableRunCanarySendUserMessageInput) {
264
+ return apiFetch(
265
+ `/conversations/${input.conversationId}/messages`,
266
+ {
267
+ method: "POST",
268
+ body: JSON.stringify({
269
+ role: "user",
270
+ parts: [{ type: "text", text: input.prompt }],
271
+ }),
272
+ },
273
+ (value) => getDurableRunCanaryMessageSchema().parse(value),
274
+ );
275
+ }
276
+
277
+ async function createDurableRootRun(input: DurableRunCanaryCreateRootRunInput): Promise<void> {
278
+ await apiFetch("/runs", {
279
+ method: "POST",
280
+ body: JSON.stringify(buildCreateRootRunBody(config, input)),
281
+ });
282
+ }
283
+
284
+ async function startDurableRun(input: DurableRunCanaryStartRunInput): Promise<void> {
285
+ await apiFetch("/runs", {
286
+ method: "POST",
287
+ body: JSON.stringify(buildStartRunBody(config, input)),
288
+ });
289
+ }
290
+
291
+ async function getRunSummary(input: DurableRunCanaryCreateRootRunInput) {
292
+ const response = await apiFetch(`/conversations/${input.conversationId}/runs/${input.runId}`);
293
+ return parseDurableRunCanaryRunSummary(response);
294
+ }
295
+
296
+ async function listMessagesForCanary(input: { conversationId: string }) {
297
+ const payload = await apiFetch(
298
+ `/conversations/${input.conversationId}/messages?limit=100`,
299
+ undefined,
300
+ (value) => getDurableRunCanaryMessageListSchema().parse(value),
301
+ );
302
+
303
+ return payload.data;
304
+ }
305
+
306
+ return {
307
+ createDurableRootRun,
308
+ getRunSummary,
309
+ listMessagesForCanary,
310
+ sendUserMessageForCanary,
311
+ startDurableRun,
312
+ };
313
+ }
314
+
315
+ export interface DurableRunCanaryResult {
316
+ id: string;
317
+ label: string;
318
+ status: "pass" | "fail";
319
+ details: string;
320
+ durationMs: number;
321
+ conversationId: string;
322
+ runId: string;
323
+ artifactPaths?: string[];
324
+ }
325
+
326
+ export interface DurableRunCanaryPreparedCase {
327
+ artifactPaths?: string[] | ((runId: string) => string[]);
328
+ cleanup: (input?: { runId: string }) => Promise<void>;
329
+ conversationId: string;
330
+ prompt: string;
331
+ startSidecar?: () => Promise<(() => Promise<void>) | void>;
332
+ title: string;
333
+ validate: (input: {
334
+ messages: DurableRunCanaryMessage[];
335
+ run: DurableRunCanaryRunSummary;
336
+ }) => Promise<void> | void;
337
+ }
338
+
339
+ export interface DurableRunCanaryCase {
340
+ id: string;
341
+ label: string;
342
+ prepare: () => Promise<DurableRunCanaryPreparedCase>;
343
+ }
344
+
345
+ export interface DurableRunCanaryRunnerConfig extends DurableRunCanaryApiConfig {
346
+ keepSuccessfulEvidence: boolean;
347
+ }
348
+
349
+ interface RunSummaryLocator {
350
+ conversationId: string;
351
+ runId: string;
352
+ }
353
+
354
+ interface WaitForRunInput extends RunSummaryLocator {
355
+ getRunSummary: (input: RunSummaryLocator) => Promise<DurableRunCanaryRunSummary>;
356
+ }
357
+
358
+ const UUID_PATTERN = /^[0-9a-f]{8}-[0-9a-f]{4}-[1-8][0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$/i;
359
+
360
+ function isRecord(value: unknown): value is Record<string, unknown> {
361
+ return value !== null && typeof value === "object" && !Array.isArray(value);
362
+ }
363
+
364
+ function collectChildConversationIdsFromValue(
365
+ value: unknown,
366
+ childConversationIds: Set<string>,
367
+ depth = 0,
368
+ ): void {
369
+ if (depth > 8) {
370
+ return;
371
+ }
372
+
373
+ if (typeof value === "string") {
374
+ try {
375
+ collectChildConversationIdsFromValue(JSON.parse(value), childConversationIds, depth + 1);
376
+ } catch {
377
+ return;
378
+ }
379
+ return;
380
+ }
381
+
382
+ if (Array.isArray(value)) {
383
+ for (const entry of value) {
384
+ collectChildConversationIdsFromValue(entry, childConversationIds, depth + 1);
385
+ }
386
+ return;
387
+ }
388
+
389
+ if (!isRecord(value)) {
390
+ return;
391
+ }
392
+
393
+ for (const key of ["childConversationId", "child_conversation_id"]) {
394
+ const childConversationId = value[key];
395
+ if (typeof childConversationId === "string" && UUID_PATTERN.test(childConversationId)) {
396
+ childConversationIds.add(childConversationId);
397
+ }
398
+ }
399
+
400
+ for (const nestedValue of Object.values(value)) {
401
+ collectChildConversationIdsFromValue(nestedValue, childConversationIds, depth + 1);
402
+ }
403
+ }
404
+
405
+ function collectReferencedChildConversationIds(messages: DurableRunCanaryMessage[]): string[] {
406
+ const childConversationIds = new Set<string>();
407
+
408
+ for (const message of messages) {
409
+ for (const part of message.parts) {
410
+ if (!isRecord(part) || (part.type !== "tool_result" && part.type !== "tool-result")) {
411
+ continue;
412
+ }
413
+
414
+ collectChildConversationIdsFromValue(part.output, childConversationIds);
415
+ }
416
+ }
417
+
418
+ return [...childConversationIds];
419
+ }
420
+
421
+ function sleep(ms: number): Promise<void> {
422
+ return new Promise((resolve) => {
423
+ dntShim.setTimeout(resolve, ms);
424
+ });
425
+ }
426
+
427
+ function isTerminalRunStatus(status: string): boolean {
428
+ return status === "completed" || status === "failed" || status === "cancelled";
429
+ }
430
+
431
+ function createDurableRunCanaryRunId(): string {
432
+ return `run_${dntShim.crypto.randomUUID()}`;
433
+ }
434
+
435
+ async function waitForRunSummaryVisibility(
436
+ input: WaitForRunInput,
437
+ ): Promise<DurableRunCanaryRunSummary> {
438
+ const deadline = Date.now() + 30_000;
439
+
440
+ while (Date.now() < deadline) {
441
+ try {
442
+ return await input.getRunSummary(input);
443
+ } catch (error) {
444
+ if (!(error instanceof Error) || !error.message.includes(" 404 ")) {
445
+ throw error;
446
+ }
447
+ }
448
+
449
+ await sleep(500);
450
+ }
451
+
452
+ throw new Error(`Run ${input.runId} did not become visible in time`);
453
+ }
454
+
455
+ async function waitForTerminalRun(
456
+ input: WaitForRunInput & { requestTimeoutMs: number },
457
+ ): Promise<DurableRunCanaryRunSummary> {
458
+ const deadline = Date.now() + input.requestTimeoutMs;
459
+
460
+ while (Date.now() < deadline) {
461
+ const run = await input.getRunSummary(input);
462
+ if (isTerminalRunStatus(run.status)) {
463
+ return run;
464
+ }
465
+
466
+ await sleep(1_500);
467
+ }
468
+
469
+ throw new Error(`Timed out waiting for run ${input.runId} to reach a terminal state`);
470
+ }
471
+
472
+ export function createDurableRunCanaryRunner(
473
+ config: DurableRunCanaryRunnerConfig,
474
+ apiClient: DurableRunCanaryApiClient = createDurableRunCanaryApiClient(config),
475
+ ) {
476
+ const getRunSummary = apiClient.getRunSummary;
477
+
478
+ async function listMessagesWithReferencedChildren(
479
+ conversationId: string,
480
+ ): Promise<DurableRunCanaryMessage[]> {
481
+ const messages = await apiClient.listMessagesForCanary({ conversationId });
482
+ const childConversationIds = collectReferencedChildConversationIds(messages);
483
+ const childMessages = await Promise.all(
484
+ childConversationIds.map((childConversationId) =>
485
+ apiClient.listMessagesForCanary({ conversationId: childConversationId })
486
+ ),
487
+ );
488
+
489
+ return [...messages, ...childMessages.flat()];
490
+ }
491
+
492
+ async function runCase(testCase: DurableRunCanaryCase): Promise<DurableRunCanaryResult> {
493
+ const startedAt = Date.now();
494
+ const prepared = await testCase.prepare();
495
+ let runId = "unknown";
496
+ const stopSidecar = await prepared.startSidecar?.();
497
+ const resolveArtifactPaths = (currentRunId: string): string[] | undefined =>
498
+ typeof prepared.artifactPaths === "function"
499
+ ? prepared.artifactPaths(currentRunId)
500
+ : prepared.artifactPaths;
501
+
502
+ try {
503
+ const userMessage = await apiClient.sendUserMessageForCanary({
504
+ conversationId: prepared.conversationId,
505
+ prompt: prepared.prompt,
506
+ });
507
+ runId = createDurableRunCanaryRunId();
508
+
509
+ await apiClient.createDurableRootRun({
510
+ conversationId: prepared.conversationId,
511
+ runId,
512
+ });
513
+ const visibleRun = await waitForRunSummaryVisibility({
514
+ conversationId: prepared.conversationId,
515
+ getRunSummary,
516
+ runId,
517
+ });
518
+
519
+ await apiClient.startDurableRun({
520
+ conversationId: prepared.conversationId,
521
+ messageId: visibleRun.messageId,
522
+ prompt: prepared.prompt,
523
+ runId,
524
+ userMessageId: userMessage.id,
525
+ });
526
+
527
+ const terminalRun = await waitForTerminalRun({
528
+ conversationId: prepared.conversationId,
529
+ getRunSummary,
530
+ requestTimeoutMs: config.requestTimeoutMs,
531
+ runId,
532
+ });
533
+ const messages = await listMessagesWithReferencedChildren(prepared.conversationId);
534
+
535
+ await prepared.validate({
536
+ messages,
537
+ run: terminalRun,
538
+ });
539
+
540
+ const artifactPaths = resolveArtifactPaths(runId);
541
+
542
+ if (!config.keepSuccessfulEvidence) {
543
+ await prepared.cleanup({ runId });
544
+ }
545
+
546
+ return {
547
+ id: testCase.id,
548
+ label: testCase.label,
549
+ status: "pass",
550
+ details: "OK",
551
+ durationMs: Date.now() - startedAt,
552
+ conversationId: prepared.conversationId,
553
+ runId,
554
+ ...(artifactPaths?.length ? { artifactPaths } : {}),
555
+ };
556
+ } catch (error) {
557
+ const artifactPaths = resolveArtifactPaths(runId);
558
+
559
+ return {
560
+ id: testCase.id,
561
+ label: testCase.label,
562
+ status: "fail",
563
+ details: error instanceof Error ? error.message : String(error),
564
+ durationMs: Date.now() - startedAt,
565
+ conversationId: prepared.conversationId,
566
+ runId,
567
+ ...(artifactPaths?.length ? { artifactPaths } : {}),
568
+ };
569
+ } finally {
570
+ await stopSidecar?.();
571
+ }
572
+ }
573
+
574
+ return {
575
+ runCase,
576
+ };
577
+ }
578
+
579
+ export const durableRunCanaryRunnerInternals = {
580
+ collectReferencedChildConversationIds,
581
+ isTerminalRunStatus,
582
+ };
@@ -17,6 +17,25 @@ export {
17
17
  type TestSuite,
18
18
  } from "./agent-tester.js";
19
19
 
20
+ export {
21
+ createDurableRunCanaryApiClient,
22
+ createDurableRunCanaryRunner,
23
+ type DurableRunCanaryApiClient,
24
+ type DurableRunCanaryApiConfig,
25
+ type DurableRunCanaryCase,
26
+ type DurableRunCanaryCreateRootRunInput,
27
+ type DurableRunCanaryMessage,
28
+ type DurableRunCanaryPreparedCase,
29
+ type DurableRunCanaryResult,
30
+ type DurableRunCanaryRunnerConfig,
31
+ durableRunCanaryRunnerInternals,
32
+ type DurableRunCanaryRunSummary,
33
+ type DurableRunCanarySendUserMessageInput,
34
+ type DurableRunCanaryStartRunInput,
35
+ getDurableRunCanaryMessageSchema,
36
+ parseDurableRunCanaryRunSummary,
37
+ } from "./durable-run-canaries/index.js";
38
+
20
39
  export {
21
40
  buildFailureSuffix,
22
41
  buildLiveEvalCaseTagSummary,
@@ -27,17 +46,29 @@ export {
27
46
  buildProgressLine,
28
47
  buildRuntimePerformanceSummary,
29
48
  containsOrderedSubsequence,
49
+ containsSkillLoad,
50
+ countStepStartedEvents,
30
51
  createFailedEvalResult,
52
+ createLiveEvalCaseSupport,
31
53
  createPassedEvalResult,
32
54
  createPlainTextPdf,
33
55
  createSkippedEvalResult,
34
56
  hasEveryLiveEvalTag,
57
+ hasFinished,
58
+ type LiveEvalCase,
35
59
  type LiveEvalCaseMetadata,
36
60
  type LiveEvalCaseSelectionInput,
61
+ type LiveEvalContext,
62
+ type LiveEvalProjectFile,
63
+ type LiveEvalProjectFileReaderInput,
64
+ type LiveEvalRequestBody,
37
65
  type LiveEvalResultForPerformance,
38
66
  type LiveEvalResultForReport,
39
67
  type LiveEvalResultRecord,
68
+ type LiveEvalRunnerConfig,
69
+ liveEvalRunnerInternals,
40
70
  type LiveEvalRuntime,
71
+ type PreparedLiveEvalInput,
41
72
  resolveLiveEvalRequestedCaseIds,
42
73
  type RuntimePerformanceSummary,
43
74
  selectLiveEvalCases,
@@ -10,7 +10,11 @@ export {
10
10
  type LiveEvalRuntime,
11
11
  type RuntimePerformanceSummary,
12
12
  } from "./performance.js";
13
- export { buildLiveEvalRequestBody, type BuildLiveEvalRequestBodyInput } from "./request.js";
13
+ export {
14
+ buildLiveEvalRequestBody,
15
+ type BuildLiveEvalRequestBodyInput,
16
+ type LiveEvalRequestBody,
17
+ } from "./request.js";
14
18
  export {
15
19
  buildLiveEvalCaseTagSummary,
16
20
  buildLiveEvalRuntimeSummary,
@@ -28,3 +32,16 @@ export {
28
32
  createSkippedEvalResult,
29
33
  type LiveEvalResultRecord,
30
34
  } from "./result.js";
35
+ export {
36
+ containsSkillLoad,
37
+ countStepStartedEvents,
38
+ createLiveEvalCaseSupport,
39
+ hasFinished,
40
+ type LiveEvalCase,
41
+ type LiveEvalContext,
42
+ type LiveEvalProjectFile,
43
+ type LiveEvalProjectFileReaderInput,
44
+ type LiveEvalRunnerConfig,
45
+ liveEvalRunnerInternals,
46
+ type PreparedLiveEvalInput,
47
+ } from "./runner.js";
@@ -1,4 +1,20 @@
1
1
  import * as dntShim from "../../../../_dnt.shims.js";
2
+ export interface LiveEvalRequestBody {
3
+ threadId: string;
4
+ runId: string;
5
+ state: Record<string, string>;
6
+ tools: unknown[];
7
+ context: unknown[];
8
+ forwardedProps?: {
9
+ veryfront: Record<string, unknown>;
10
+ };
11
+ messages: Array<{
12
+ id: string;
13
+ role: "user";
14
+ content: string;
15
+ }>;
16
+ }
17
+
2
18
  export interface BuildLiveEvalRequestBodyInput {
3
19
  testCaseId: string;
4
20
  prompt: string;
@@ -12,7 +28,9 @@ export interface BuildLiveEvalRequestBodyInput {
12
28
  maxSteps?: number;
13
29
  }
14
30
 
15
- export function buildLiveEvalRequestBody(input: BuildLiveEvalRequestBodyInput) {
31
+ export function buildLiveEvalRequestBody(
32
+ input: BuildLiveEvalRequestBodyInput,
33
+ ): LiveEvalRequestBody {
16
34
  const veryfront: Record<string, unknown> = {};
17
35
  if (input.projectId) {
18
36
  veryfront.projectId = input.projectId;