@wix/evalforge-types 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. package/build/index.js +1047 -0
  2. package/build/index.js.map +7 -0
  3. package/build/index.mjs +928 -0
  4. package/build/index.mjs.map +7 -0
  5. package/build/types/common/base-entity.d.ts +26 -0
  6. package/build/types/common/index.d.ts +3 -0
  7. package/build/types/common/mcp.d.ts +17 -0
  8. package/build/types/common/models.d.ts +55 -0
  9. package/build/types/evaluation/eval-result.d.ts +239 -0
  10. package/build/types/evaluation/eval-run.d.ts +581 -0
  11. package/build/types/evaluation/index.d.ts +4 -0
  12. package/build/types/evaluation/live-trace.d.ts +47 -0
  13. package/build/types/evaluation/metrics.d.ts +157 -0
  14. package/build/types/improvement/improvement.d.ts +140 -0
  15. package/build/types/improvement/index.d.ts +1 -0
  16. package/build/types/index.d.ts +24 -0
  17. package/build/types/project/index.d.ts +1 -0
  18. package/build/types/project/project.d.ts +41 -0
  19. package/build/types/scenario/environment.d.ts +58 -0
  20. package/build/types/scenario/index.d.ts +2 -0
  21. package/build/types/scenario/test-scenario.d.ts +50 -0
  22. package/build/types/suite/index.d.ts +1 -0
  23. package/build/types/suite/test-suite.d.ts +37 -0
  24. package/build/types/target/agent.d.ts +53 -0
  25. package/build/types/target/index.d.ts +4 -0
  26. package/build/types/target/skill.d.ts +78 -0
  27. package/build/types/target/skills-group.d.ts +37 -0
  28. package/build/types/target/target.d.ts +17 -0
  29. package/build/types/template/index.d.ts +1 -0
  30. package/build/types/template/template.d.ts +38 -0
  31. package/build/types/test/base.d.ts +43 -0
  32. package/build/types/test/build-check.d.ts +29 -0
  33. package/build/types/test/command-execution.d.ts +31 -0
  34. package/build/types/test/file-content.d.ts +52 -0
  35. package/build/types/test/file-presence.d.ts +24 -0
  36. package/build/types/test/index.d.ts +124 -0
  37. package/build/types/test/llm.d.ts +36 -0
  38. package/build/types/test/playwright-nl.d.ts +28 -0
  39. package/build/types/test/site-config.d.ts +32 -0
  40. package/build/types/test/tool.d.ts +26 -0
  41. package/build/types/test/vitest.d.ts +30 -0
  42. package/package.json +50 -0
@@ -0,0 +1,581 @@
1
+ import { z } from 'zod';
2
+ /**
3
+ * Trigger types for evaluations.
4
+ */
5
+ export declare enum TriggerType {
6
+ RESOURCES_UPDATED = "RESOURCES_UPDATED",
7
+ MCP_VERSION_RELEASE = "MCP_VERSION_RELEASE",
8
+ MCP_PREVIEW_CREATED = "MCP_PREVIEW_CREATED",
9
+ MANUAL = "MANUAL"
10
+ }
11
+ /**
12
+ * Trigger metadata schema.
13
+ */
14
+ export declare const TriggerMetadataSchema: z.ZodObject<{
15
+ version: z.ZodOptional<z.ZodString>;
16
+ resourceUpdated: z.ZodOptional<z.ZodArray<z.ZodString>>;
17
+ }, z.core.$strip>;
18
+ export type TriggerMetadata = z.infer<typeof TriggerMetadataSchema>;
19
+ /**
20
+ * Trigger schema.
21
+ */
22
+ export declare const TriggerSchema: z.ZodObject<{
23
+ id: z.ZodString;
24
+ metadata: z.ZodOptional<z.ZodObject<{
25
+ version: z.ZodOptional<z.ZodString>;
26
+ resourceUpdated: z.ZodOptional<z.ZodArray<z.ZodString>>;
27
+ }, z.core.$strip>>;
28
+ type: z.ZodEnum<typeof TriggerType>;
29
+ }, z.core.$strip>;
30
+ export type Trigger = z.infer<typeof TriggerSchema>;
31
+ /**
32
+ * Failure category enum.
33
+ */
34
+ export declare enum FailureCategory {
35
+ MISSING_FILE = "missing_file",
36
+ WRONG_CONTENT = "wrong_content",
37
+ BUILD_ERROR = "build_error",
38
+ TEST_FAILURE = "test_failure",
39
+ RUNTIME_ERROR = "runtime_error",
40
+ PERFORMANCE = "performance"
41
+ }
42
+ /**
43
+ * Failure severity enum.
44
+ */
45
+ export declare enum FailureSeverity {
46
+ CRITICAL = "critical",
47
+ HIGH = "high",
48
+ MEDIUM = "medium",
49
+ LOW = "low"
50
+ }
51
+ /**
52
+ * Diff line type schema.
53
+ */
54
+ export declare const DiffLineTypeSchema: z.ZodEnum<{
55
+ added: "added";
56
+ removed: "removed";
57
+ unchanged: "unchanged";
58
+ }>;
59
+ export type DiffLineType = z.infer<typeof DiffLineTypeSchema>;
60
+ /**
61
+ * Diff line schema - represents a single line in a diff.
62
+ */
63
+ export declare const DiffLineSchema: z.ZodObject<{
64
+ type: z.ZodEnum<{
65
+ added: "added";
66
+ removed: "removed";
67
+ unchanged: "unchanged";
68
+ }>;
69
+ content: z.ZodString;
70
+ lineNumber: z.ZodNumber;
71
+ }, z.core.$strip>;
72
+ export type DiffLine = z.infer<typeof DiffLineSchema>;
73
+ /**
74
+ * Diff content schema - represents a file diff.
75
+ */
76
+ export declare const DiffContentSchema: z.ZodObject<{
77
+ path: z.ZodString;
78
+ expected: z.ZodString;
79
+ actual: z.ZodString;
80
+ diffLines: z.ZodArray<z.ZodObject<{
81
+ type: z.ZodEnum<{
82
+ added: "added";
83
+ removed: "removed";
84
+ unchanged: "unchanged";
85
+ }>;
86
+ content: z.ZodString;
87
+ lineNumber: z.ZodNumber;
88
+ }, z.core.$strip>>;
89
+ }, z.core.$strip>;
90
+ export type DiffContent = z.infer<typeof DiffContentSchema>;
91
+ /**
92
+ * Command execution schema.
93
+ */
94
+ export declare const CommandExecutionSchema: z.ZodObject<{
95
+ command: z.ZodString;
96
+ exitCode: z.ZodNumber;
97
+ output: z.ZodOptional<z.ZodString>;
98
+ duration: z.ZodNumber;
99
+ }, z.core.$strip>;
100
+ export type CommandExecution = z.infer<typeof CommandExecutionSchema>;
101
+ /**
102
+ * File modification schema.
103
+ */
104
+ export declare const FileModificationSchema: z.ZodObject<{
105
+ path: z.ZodString;
106
+ action: z.ZodEnum<{
107
+ deleted: "deleted";
108
+ created: "created";
109
+ modified: "modified";
110
+ }>;
111
+ }, z.core.$strip>;
112
+ export type FileModification = z.infer<typeof FileModificationSchema>;
113
+ /**
114
+ * API call schema.
115
+ */
116
+ export declare const ApiCallSchema: z.ZodObject<{
117
+ endpoint: z.ZodString;
118
+ tokensUsed: z.ZodNumber;
119
+ duration: z.ZodNumber;
120
+ }, z.core.$strip>;
121
+ export type ApiCall = z.infer<typeof ApiCallSchema>;
122
+ /**
123
+ * Execution trace schema - represents detailed execution information.
124
+ */
125
+ export declare const ExecutionTraceSchema: z.ZodObject<{
126
+ commands: z.ZodArray<z.ZodObject<{
127
+ command: z.ZodString;
128
+ exitCode: z.ZodNumber;
129
+ output: z.ZodOptional<z.ZodString>;
130
+ duration: z.ZodNumber;
131
+ }, z.core.$strip>>;
132
+ filesModified: z.ZodArray<z.ZodObject<{
133
+ path: z.ZodString;
134
+ action: z.ZodEnum<{
135
+ deleted: "deleted";
136
+ created: "created";
137
+ modified: "modified";
138
+ }>;
139
+ }, z.core.$strip>>;
140
+ apiCalls: z.ZodArray<z.ZodObject<{
141
+ endpoint: z.ZodString;
142
+ tokensUsed: z.ZodNumber;
143
+ duration: z.ZodNumber;
144
+ }, z.core.$strip>>;
145
+ totalDuration: z.ZodNumber;
146
+ }, z.core.$strip>;
147
+ export type ExecutionTrace = z.infer<typeof ExecutionTraceSchema>;
148
+ /**
149
+ * Failure analysis schema.
150
+ */
151
+ export declare const FailureAnalysisSchema: z.ZodObject<{
152
+ category: z.ZodEnum<typeof FailureCategory>;
153
+ severity: z.ZodEnum<typeof FailureSeverity>;
154
+ summary: z.ZodString;
155
+ details: z.ZodString;
156
+ rootCause: z.ZodString;
157
+ suggestedFix: z.ZodString;
158
+ relatedAssertions: z.ZodArray<z.ZodString>;
159
+ codeSnippet: z.ZodOptional<z.ZodString>;
160
+ similarIssues: z.ZodOptional<z.ZodArray<z.ZodString>>;
161
+ patternId: z.ZodOptional<z.ZodString>;
162
+ diff: z.ZodOptional<z.ZodObject<{
163
+ path: z.ZodString;
164
+ expected: z.ZodString;
165
+ actual: z.ZodString;
166
+ diffLines: z.ZodArray<z.ZodObject<{
167
+ type: z.ZodEnum<{
168
+ added: "added";
169
+ removed: "removed";
170
+ unchanged: "unchanged";
171
+ }>;
172
+ content: z.ZodString;
173
+ lineNumber: z.ZodNumber;
174
+ }, z.core.$strip>>;
175
+ }, z.core.$strip>>;
176
+ executionTrace: z.ZodOptional<z.ZodObject<{
177
+ commands: z.ZodArray<z.ZodObject<{
178
+ command: z.ZodString;
179
+ exitCode: z.ZodNumber;
180
+ output: z.ZodOptional<z.ZodString>;
181
+ duration: z.ZodNumber;
182
+ }, z.core.$strip>>;
183
+ filesModified: z.ZodArray<z.ZodObject<{
184
+ path: z.ZodString;
185
+ action: z.ZodEnum<{
186
+ deleted: "deleted";
187
+ created: "created";
188
+ modified: "modified";
189
+ }>;
190
+ }, z.core.$strip>>;
191
+ apiCalls: z.ZodArray<z.ZodObject<{
192
+ endpoint: z.ZodString;
193
+ tokensUsed: z.ZodNumber;
194
+ duration: z.ZodNumber;
195
+ }, z.core.$strip>>;
196
+ totalDuration: z.ZodNumber;
197
+ }, z.core.$strip>>;
198
+ }, z.core.$strip>;
199
+ export type FailureAnalysis = z.infer<typeof FailureAnalysisSchema>;
200
+ /**
201
+ * Evaluation run schema.
202
+ *
203
+ * Represents a complete evaluation run with configuration, results, and metrics.
204
+ */
205
+ export declare const EvalRunSchema: z.ZodObject<{
206
+ id: z.ZodString;
207
+ name: z.ZodString;
208
+ description: z.ZodString;
209
+ createdAt: z.ZodString;
210
+ updatedAt: z.ZodString;
211
+ deleted: z.ZodOptional<z.ZodBoolean>;
212
+ projectId: z.ZodString;
213
+ agentId: z.ZodOptional<z.ZodString>;
214
+ skillsGroupId: z.ZodOptional<z.ZodString>;
215
+ scenarioIds: z.ZodArray<z.ZodString>;
216
+ status: z.ZodEnum<typeof import("./metrics.js").EvalStatus>;
217
+ progress: z.ZodNumber;
218
+ results: z.ZodArray<z.ZodObject<{
219
+ id: z.ZodString;
220
+ targetId: z.ZodString;
221
+ targetName: z.ZodOptional<z.ZodString>;
222
+ scenarioId: z.ZodString;
223
+ scenarioName: z.ZodString;
224
+ modelConfig: z.ZodOptional<z.ZodObject<{
225
+ model: z.ZodEnum<typeof import("../index.js").ModelIds>;
226
+ temperature: z.ZodOptional<z.ZodNumber>;
227
+ maxTokens: z.ZodOptional<z.ZodNumber>;
228
+ }, z.core.$strip>>;
229
+ assertionResults: z.ZodArray<z.ZodObject<{
230
+ id: z.ZodString;
231
+ assertionId: z.ZodString;
232
+ assertionType: z.ZodString;
233
+ assertionName: z.ZodString;
234
+ status: z.ZodEnum<typeof import("./eval-result.js").AssertionResultStatus>;
235
+ message: z.ZodOptional<z.ZodString>;
236
+ expected: z.ZodOptional<z.ZodString>;
237
+ actual: z.ZodOptional<z.ZodString>;
238
+ duration: z.ZodOptional<z.ZodNumber>;
239
+ details: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodUnknown>>;
240
+ llmTraceSteps: z.ZodOptional<z.ZodArray<z.ZodObject<{
241
+ id: z.ZodString;
242
+ stepNumber: z.ZodNumber;
243
+ type: z.ZodEnum<typeof import("./metrics.js").LLMStepType>;
244
+ model: z.ZodString;
245
+ provider: z.ZodString;
246
+ startedAt: z.ZodString;
247
+ durationMs: z.ZodNumber;
248
+ tokenUsage: z.ZodObject<{
249
+ prompt: z.ZodNumber;
250
+ completion: z.ZodNumber;
251
+ total: z.ZodNumber;
252
+ }, z.core.$strip>;
253
+ costUsd: z.ZodNumber;
254
+ toolName: z.ZodOptional<z.ZodString>;
255
+ toolArguments: z.ZodOptional<z.ZodString>;
256
+ inputPreview: z.ZodOptional<z.ZodString>;
257
+ outputPreview: z.ZodOptional<z.ZodString>;
258
+ success: z.ZodBoolean;
259
+ error: z.ZodOptional<z.ZodString>;
260
+ }, z.core.$strip>>>;
261
+ }, z.core.$strip>>;
262
+ metrics: z.ZodOptional<z.ZodObject<{
263
+ totalAssertions: z.ZodNumber;
264
+ passed: z.ZodNumber;
265
+ failed: z.ZodNumber;
266
+ skipped: z.ZodNumber;
267
+ errors: z.ZodNumber;
268
+ passRate: z.ZodNumber;
269
+ avgDuration: z.ZodNumber;
270
+ totalDuration: z.ZodNumber;
271
+ }, z.core.$strip>>;
272
+ passed: z.ZodNumber;
273
+ failed: z.ZodNumber;
274
+ passRate: z.ZodNumber;
275
+ duration: z.ZodNumber;
276
+ outputText: z.ZodOptional<z.ZodString>;
277
+ files: z.ZodOptional<z.ZodArray<z.ZodObject<{
278
+ path: z.ZodString;
279
+ content: z.ZodOptional<z.ZodString>;
280
+ }, z.core.$strip>>>;
281
+ startedAt: z.ZodOptional<z.ZodString>;
282
+ completedAt: z.ZodOptional<z.ZodString>;
283
+ llmTrace: z.ZodOptional<z.ZodObject<{
284
+ id: z.ZodString;
285
+ steps: z.ZodArray<z.ZodObject<{
286
+ id: z.ZodString;
287
+ stepNumber: z.ZodNumber;
288
+ type: z.ZodEnum<typeof import("./metrics.js").LLMStepType>;
289
+ model: z.ZodString;
290
+ provider: z.ZodString;
291
+ startedAt: z.ZodString;
292
+ durationMs: z.ZodNumber;
293
+ tokenUsage: z.ZodObject<{
294
+ prompt: z.ZodNumber;
295
+ completion: z.ZodNumber;
296
+ total: z.ZodNumber;
297
+ }, z.core.$strip>;
298
+ costUsd: z.ZodNumber;
299
+ toolName: z.ZodOptional<z.ZodString>;
300
+ toolArguments: z.ZodOptional<z.ZodString>;
301
+ inputPreview: z.ZodOptional<z.ZodString>;
302
+ outputPreview: z.ZodOptional<z.ZodString>;
303
+ success: z.ZodBoolean;
304
+ error: z.ZodOptional<z.ZodString>;
305
+ }, z.core.$strip>>;
306
+ summary: z.ZodObject<{
307
+ totalSteps: z.ZodNumber;
308
+ totalDurationMs: z.ZodNumber;
309
+ totalTokens: z.ZodObject<{
310
+ prompt: z.ZodNumber;
311
+ completion: z.ZodNumber;
312
+ total: z.ZodNumber;
313
+ }, z.core.$strip>;
314
+ totalCostUsd: z.ZodNumber;
315
+ stepTypeBreakdown: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodObject<{
316
+ count: z.ZodNumber;
317
+ durationMs: z.ZodNumber;
318
+ tokens: z.ZodNumber;
319
+ costUsd: z.ZodNumber;
320
+ }, z.core.$strip>>>;
321
+ modelBreakdown: z.ZodRecord<z.ZodString, z.ZodObject<{
322
+ count: z.ZodNumber;
323
+ durationMs: z.ZodNumber;
324
+ tokens: z.ZodNumber;
325
+ costUsd: z.ZodNumber;
326
+ }, z.core.$strip>>;
327
+ modelsUsed: z.ZodArray<z.ZodString>;
328
+ }, z.core.$strip>;
329
+ }, z.core.$strip>>;
330
+ }, z.core.$strip>>;
331
+ aggregateMetrics: z.ZodObject<{
332
+ totalAssertions: z.ZodNumber;
333
+ passed: z.ZodNumber;
334
+ failed: z.ZodNumber;
335
+ skipped: z.ZodNumber;
336
+ errors: z.ZodNumber;
337
+ passRate: z.ZodNumber;
338
+ avgDuration: z.ZodNumber;
339
+ totalDuration: z.ZodNumber;
340
+ }, z.core.$strip>;
341
+ failureAnalyses: z.ZodOptional<z.ZodArray<z.ZodObject<{
342
+ category: z.ZodEnum<typeof FailureCategory>;
343
+ severity: z.ZodEnum<typeof FailureSeverity>;
344
+ summary: z.ZodString;
345
+ details: z.ZodString;
346
+ rootCause: z.ZodString;
347
+ suggestedFix: z.ZodString;
348
+ relatedAssertions: z.ZodArray<z.ZodString>;
349
+ codeSnippet: z.ZodOptional<z.ZodString>;
350
+ similarIssues: z.ZodOptional<z.ZodArray<z.ZodString>>;
351
+ patternId: z.ZodOptional<z.ZodString>;
352
+ diff: z.ZodOptional<z.ZodObject<{
353
+ path: z.ZodString;
354
+ expected: z.ZodString;
355
+ actual: z.ZodString;
356
+ diffLines: z.ZodArray<z.ZodObject<{
357
+ type: z.ZodEnum<{
358
+ added: "added";
359
+ removed: "removed";
360
+ unchanged: "unchanged";
361
+ }>;
362
+ content: z.ZodString;
363
+ lineNumber: z.ZodNumber;
364
+ }, z.core.$strip>>;
365
+ }, z.core.$strip>>;
366
+ executionTrace: z.ZodOptional<z.ZodObject<{
367
+ commands: z.ZodArray<z.ZodObject<{
368
+ command: z.ZodString;
369
+ exitCode: z.ZodNumber;
370
+ output: z.ZodOptional<z.ZodString>;
371
+ duration: z.ZodNumber;
372
+ }, z.core.$strip>>;
373
+ filesModified: z.ZodArray<z.ZodObject<{
374
+ path: z.ZodString;
375
+ action: z.ZodEnum<{
376
+ deleted: "deleted";
377
+ created: "created";
378
+ modified: "modified";
379
+ }>;
380
+ }, z.core.$strip>>;
381
+ apiCalls: z.ZodArray<z.ZodObject<{
382
+ endpoint: z.ZodString;
383
+ tokensUsed: z.ZodNumber;
384
+ duration: z.ZodNumber;
385
+ }, z.core.$strip>>;
386
+ totalDuration: z.ZodNumber;
387
+ }, z.core.$strip>>;
388
+ }, z.core.$strip>>>;
389
+ llmTraceSummary: z.ZodOptional<z.ZodObject<{
390
+ totalSteps: z.ZodNumber;
391
+ totalDurationMs: z.ZodNumber;
392
+ totalTokens: z.ZodObject<{
393
+ prompt: z.ZodNumber;
394
+ completion: z.ZodNumber;
395
+ total: z.ZodNumber;
396
+ }, z.core.$strip>;
397
+ totalCostUsd: z.ZodNumber;
398
+ stepTypeBreakdown: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodObject<{
399
+ count: z.ZodNumber;
400
+ durationMs: z.ZodNumber;
401
+ tokens: z.ZodNumber;
402
+ costUsd: z.ZodNumber;
403
+ }, z.core.$strip>>>;
404
+ modelBreakdown: z.ZodRecord<z.ZodString, z.ZodObject<{
405
+ count: z.ZodNumber;
406
+ durationMs: z.ZodNumber;
407
+ tokens: z.ZodNumber;
408
+ costUsd: z.ZodNumber;
409
+ }, z.core.$strip>>;
410
+ modelsUsed: z.ZodArray<z.ZodString>;
411
+ }, z.core.$strip>>;
412
+ trigger: z.ZodOptional<z.ZodObject<{
413
+ id: z.ZodString;
414
+ metadata: z.ZodOptional<z.ZodObject<{
415
+ version: z.ZodOptional<z.ZodString>;
416
+ resourceUpdated: z.ZodOptional<z.ZodArray<z.ZodString>>;
417
+ }, z.core.$strip>>;
418
+ type: z.ZodEnum<typeof TriggerType>;
419
+ }, z.core.$strip>>;
420
+ startedAt: z.ZodOptional<z.ZodString>;
421
+ completedAt: z.ZodOptional<z.ZodString>;
422
+ liveTraceEvents: z.ZodOptional<z.ZodArray<z.ZodObject<{
423
+ evalRunId: z.ZodString;
424
+ scenarioId: z.ZodString;
425
+ scenarioName: z.ZodString;
426
+ targetId: z.ZodString;
427
+ targetName: z.ZodString;
428
+ stepNumber: z.ZodNumber;
429
+ type: z.ZodEnum<typeof import("./live-trace.js").LiveTraceEventType>;
430
+ toolName: z.ZodOptional<z.ZodString>;
431
+ toolArgs: z.ZodOptional<z.ZodString>;
432
+ outputPreview: z.ZodOptional<z.ZodString>;
433
+ timestamp: z.ZodString;
434
+ isComplete: z.ZodBoolean;
435
+ }, z.core.$strip>>>;
436
+ }, z.core.$strip>;
437
+ export type EvalRun = z.infer<typeof EvalRunSchema>;
438
+ /**
439
+ * Input schema for creating a new EvalRun.
440
+ */
441
+ export declare const CreateEvalRunInputSchema: z.ZodObject<{
442
+ name: z.ZodString;
443
+ description: z.ZodString;
444
+ deleted: z.ZodOptional<z.ZodBoolean>;
445
+ projectId: z.ZodString;
446
+ scenarioIds: z.ZodArray<z.ZodString>;
447
+ agentId: z.ZodOptional<z.ZodString>;
448
+ skillsGroupId: z.ZodOptional<z.ZodString>;
449
+ failureAnalyses: z.ZodOptional<z.ZodArray<z.ZodObject<{
450
+ category: z.ZodEnum<typeof FailureCategory>;
451
+ severity: z.ZodEnum<typeof FailureSeverity>;
452
+ summary: z.ZodString;
453
+ details: z.ZodString;
454
+ rootCause: z.ZodString;
455
+ suggestedFix: z.ZodString;
456
+ relatedAssertions: z.ZodArray<z.ZodString>;
457
+ codeSnippet: z.ZodOptional<z.ZodString>;
458
+ similarIssues: z.ZodOptional<z.ZodArray<z.ZodString>>;
459
+ patternId: z.ZodOptional<z.ZodString>;
460
+ diff: z.ZodOptional<z.ZodObject<{
461
+ path: z.ZodString;
462
+ expected: z.ZodString;
463
+ actual: z.ZodString;
464
+ diffLines: z.ZodArray<z.ZodObject<{
465
+ type: z.ZodEnum<{
466
+ added: "added";
467
+ removed: "removed";
468
+ unchanged: "unchanged";
469
+ }>;
470
+ content: z.ZodString;
471
+ lineNumber: z.ZodNumber;
472
+ }, z.core.$strip>>;
473
+ }, z.core.$strip>>;
474
+ executionTrace: z.ZodOptional<z.ZodObject<{
475
+ commands: z.ZodArray<z.ZodObject<{
476
+ command: z.ZodString;
477
+ exitCode: z.ZodNumber;
478
+ output: z.ZodOptional<z.ZodString>;
479
+ duration: z.ZodNumber;
480
+ }, z.core.$strip>>;
481
+ filesModified: z.ZodArray<z.ZodObject<{
482
+ path: z.ZodString;
483
+ action: z.ZodEnum<{
484
+ deleted: "deleted";
485
+ created: "created";
486
+ modified: "modified";
487
+ }>;
488
+ }, z.core.$strip>>;
489
+ apiCalls: z.ZodArray<z.ZodObject<{
490
+ endpoint: z.ZodString;
491
+ tokensUsed: z.ZodNumber;
492
+ duration: z.ZodNumber;
493
+ }, z.core.$strip>>;
494
+ totalDuration: z.ZodNumber;
495
+ }, z.core.$strip>>;
496
+ }, z.core.$strip>>>;
497
+ llmTraceSummary: z.ZodOptional<z.ZodObject<{
498
+ totalSteps: z.ZodNumber;
499
+ totalDurationMs: z.ZodNumber;
500
+ totalTokens: z.ZodObject<{
501
+ prompt: z.ZodNumber;
502
+ completion: z.ZodNumber;
503
+ total: z.ZodNumber;
504
+ }, z.core.$strip>;
505
+ totalCostUsd: z.ZodNumber;
506
+ stepTypeBreakdown: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodObject<{
507
+ count: z.ZodNumber;
508
+ durationMs: z.ZodNumber;
509
+ tokens: z.ZodNumber;
510
+ costUsd: z.ZodNumber;
511
+ }, z.core.$strip>>>;
512
+ modelBreakdown: z.ZodRecord<z.ZodString, z.ZodObject<{
513
+ count: z.ZodNumber;
514
+ durationMs: z.ZodNumber;
515
+ tokens: z.ZodNumber;
516
+ costUsd: z.ZodNumber;
517
+ }, z.core.$strip>>;
518
+ modelsUsed: z.ZodArray<z.ZodString>;
519
+ }, z.core.$strip>>;
520
+ trigger: z.ZodOptional<z.ZodObject<{
521
+ id: z.ZodString;
522
+ metadata: z.ZodOptional<z.ZodObject<{
523
+ version: z.ZodOptional<z.ZodString>;
524
+ resourceUpdated: z.ZodOptional<z.ZodArray<z.ZodString>>;
525
+ }, z.core.$strip>>;
526
+ type: z.ZodEnum<typeof TriggerType>;
527
+ }, z.core.$strip>>;
528
+ liveTraceEvents: z.ZodOptional<z.ZodArray<z.ZodObject<{
529
+ evalRunId: z.ZodString;
530
+ scenarioId: z.ZodString;
531
+ scenarioName: z.ZodString;
532
+ targetId: z.ZodString;
533
+ targetName: z.ZodString;
534
+ stepNumber: z.ZodNumber;
535
+ type: z.ZodEnum<typeof import("./live-trace.js").LiveTraceEventType>;
536
+ toolName: z.ZodOptional<z.ZodString>;
537
+ toolArgs: z.ZodOptional<z.ZodString>;
538
+ outputPreview: z.ZodOptional<z.ZodString>;
539
+ timestamp: z.ZodString;
540
+ isComplete: z.ZodBoolean;
541
+ }, z.core.$strip>>>;
542
+ }, z.core.$strip>;
543
+ export type CreateEvalRunInput = z.infer<typeof CreateEvalRunInputSchema>;
544
+ /**
545
+ * Evaluation progress schema.
546
+ */
547
+ export declare const EvaluationProgressSchema: z.ZodObject<{
548
+ runId: z.ZodString;
549
+ targetId: z.ZodString;
550
+ totalScenarios: z.ZodNumber;
551
+ completedScenarios: z.ZodNumber;
552
+ scenarioProgress: z.ZodArray<z.ZodObject<{
553
+ scenarioId: z.ZodString;
554
+ currentStep: z.ZodString;
555
+ error: z.ZodOptional<z.ZodString>;
556
+ }, z.core.$strip>>;
557
+ createdAt: z.ZodNumber;
558
+ }, z.core.$strip>;
559
+ export type EvaluationProgress = z.infer<typeof EvaluationProgressSchema>;
560
+ /**
561
+ * Evaluation log schema.
562
+ */
563
+ export declare const EvaluationLogSchema: z.ZodObject<{
564
+ runId: z.ZodString;
565
+ scenarioId: z.ZodString;
566
+ log: z.ZodObject<{
567
+ level: z.ZodEnum<{
568
+ error: "error";
569
+ info: "info";
570
+ debug: "debug";
571
+ }>;
572
+ message: z.ZodOptional<z.ZodString>;
573
+ args: z.ZodOptional<z.ZodArray<z.ZodAny>>;
574
+ error: z.ZodOptional<z.ZodString>;
575
+ }, z.core.$strip>;
576
+ }, z.core.$strip>;
577
+ export type EvaluationLog = z.infer<typeof EvaluationLogSchema>;
578
+ /**
579
+ * LLM timeout constant (2 minutes).
580
+ */
581
+ export declare const LLM_TIMEOUT = 120000;
@@ -0,0 +1,4 @@
1
+ export * from './metrics.js';
2
+ export * from './eval-result.js';
3
+ export * from './eval-run.js';
4
+ export * from './live-trace.js';
@@ -0,0 +1,47 @@
1
+ import { z } from 'zod';
2
+ /**
3
+ * Live trace event type enum.
4
+ * Maps to the step types but includes streaming states.
5
+ */
6
+ export declare enum LiveTraceEventType {
7
+ THINKING = "thinking",
8
+ TOOL_USE = "tool_use",
9
+ COMPLETION = "completion",
10
+ TOOL_RESULT = "tool_result"
11
+ }
12
+ /**
13
+ * Live trace event schema.
14
+ * Represents a single trace event emitted during agent execution.
15
+ */
16
+ export declare const LiveTraceEventSchema: z.ZodObject<{
17
+ evalRunId: z.ZodString;
18
+ scenarioId: z.ZodString;
19
+ scenarioName: z.ZodString;
20
+ targetId: z.ZodString;
21
+ targetName: z.ZodString;
22
+ stepNumber: z.ZodNumber;
23
+ type: z.ZodEnum<typeof LiveTraceEventType>;
24
+ toolName: z.ZodOptional<z.ZodString>;
25
+ toolArgs: z.ZodOptional<z.ZodString>;
26
+ outputPreview: z.ZodOptional<z.ZodString>;
27
+ timestamp: z.ZodString;
28
+ isComplete: z.ZodBoolean;
29
+ }, z.core.$strip>;
30
+ export type LiveTraceEvent = z.infer<typeof LiveTraceEventSchema>;
31
+ /**
32
+ * Prefix used in stdout to identify trace events.
33
+ * Format: TRACE_EVENT:{json}
34
+ */
35
+ export declare const TRACE_EVENT_PREFIX = "TRACE_EVENT:";
36
+ /**
37
+ * Parse a line from stdout to extract a trace event if present.
38
+ * @param line - A line from stdout
39
+ * @returns The parsed LiveTraceEvent or null if not a trace event line
40
+ */
41
+ export declare function parseTraceEventLine(line: string): LiveTraceEvent | null;
42
+ /**
43
+ * Format a trace event as a stdout line.
44
+ * @param event - The trace event to format
45
+ * @returns The formatted line with prefix
46
+ */
47
+ export declare function formatTraceEventLine(event: LiveTraceEvent): string;