@wix/evalforge-types 0.20.0 → 0.22.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/build/index.mjs CHANGED
@@ -14,18 +14,21 @@ var TenantEntitySchema = BaseEntitySchema.extend({
14
14
 
15
15
  // src/common/mcp.ts
16
16
  import { z as z2 } from "zod";
17
- var MCPServerConfigSchema = z2.object({
18
- /** Unique name for this MCP server */
19
- name: z2.string(),
20
- /** Command to start the MCP server */
21
- command: z2.string(),
22
- /** Command line arguments */
23
- args: z2.array(z2.string()).optional(),
24
- /** Environment variables for the server process */
25
- envVars: z2.record(z2.string(), z2.string()).optional(),
26
- /** Tools to disable for this MCP server */
27
- disabledTools: z2.array(z2.string()).optional()
17
+ var MCP_SERVERS_JSON_KEY = "mcpServers";
18
+ var MCPEntitySchema = TenantEntitySchema.extend({
19
+ /** Display name and key in mcp.json mcpServers object */
20
+ name: z2.string().min(1),
21
+ /** MCP server config (command/args, url/headers, etc.) - stored as-is for mcp.json */
22
+ config: z2.record(z2.string(), z2.unknown())
23
+ });
24
+ var CreateMcpInputSchema = MCPEntitySchema.omit({
25
+ id: true,
26
+ createdAt: true,
27
+ updatedAt: true,
28
+ deleted: true
28
29
  });
30
+ var UpdateMcpInputSchema = CreateMcpInputSchema.partial();
31
+ var MCPServerConfigSchema = z2.record(z2.string(), z2.unknown());
29
32
 
30
33
  // src/common/models.ts
31
34
  import { z as z3 } from "zod";
@@ -205,11 +208,26 @@ var CreateSkillsGroupInputSchema = SkillsGroupSchema.omit({
205
208
  });
206
209
  var UpdateSkillsGroupInputSchema = CreateSkillsGroupInputSchema.partial();
207
210
 
211
+ // src/target/sub-agent.ts
212
+ import { z as z7 } from "zod";
213
+ var SubAgentSchema = TargetSchema.extend({
214
+ /** The full sub-agent markdown content (YAML frontmatter + body) */
215
+ subAgentMd: z7.string()
216
+ });
217
+ var SubAgentInputBaseSchema = SubAgentSchema.omit({
218
+ id: true,
219
+ createdAt: true,
220
+ updatedAt: true,
221
+ deleted: true
222
+ });
223
+ var CreateSubAgentInputSchema = SubAgentInputBaseSchema;
224
+ var UpdateSubAgentInputSchema = SubAgentInputBaseSchema.partial();
225
+
208
226
  // src/test/index.ts
209
- import { z as z17 } from "zod";
227
+ import { z as z18 } from "zod";
210
228
 
211
229
  // src/test/base.ts
212
- import { z as z7 } from "zod";
230
+ import { z as z8 } from "zod";
213
231
  var TestType = /* @__PURE__ */ ((TestType2) => {
214
232
  TestType2["LLM"] = "LLM";
215
233
  TestType2["TOOL"] = "TOOL";
@@ -222,7 +240,7 @@ var TestType = /* @__PURE__ */ ((TestType2) => {
222
240
  TestType2["PLAYWRIGHT_NL"] = "PLAYWRIGHT_NL";
223
241
  return TestType2;
224
242
  })(TestType || {});
225
- var TestTypeSchema = z7.enum(TestType);
243
+ var TestTypeSchema = z8.enum(TestType);
226
244
  var TestImportance = /* @__PURE__ */ ((TestImportance2) => {
227
245
  TestImportance2["LOW"] = "low";
228
246
  TestImportance2["MEDIUM"] = "medium";
@@ -230,153 +248,153 @@ var TestImportance = /* @__PURE__ */ ((TestImportance2) => {
230
248
  TestImportance2["CRITICAL"] = "critical";
231
249
  return TestImportance2;
232
250
  })(TestImportance || {});
233
- var TestImportanceSchema = z7.enum(TestImportance);
234
- var BaseTestSchema = z7.object({
235
- id: z7.string(),
251
+ var TestImportanceSchema = z8.enum(TestImportance);
252
+ var BaseTestSchema = z8.object({
253
+ id: z8.string(),
236
254
  type: TestTypeSchema,
237
- name: z7.string().min(3),
238
- description: z7.string().optional(),
255
+ name: z8.string().min(3),
256
+ description: z8.string().optional(),
239
257
  importance: TestImportanceSchema.optional()
240
258
  });
241
259
 
242
260
  // src/test/llm.ts
243
- import { z as z8 } from "zod";
261
+ import { z as z9 } from "zod";
244
262
  var LLMTestSchema = BaseTestSchema.extend({
245
- type: z8.literal("LLM" /* LLM */),
263
+ type: z9.literal("LLM" /* LLM */),
246
264
  /** Maximum steps for the LLM to take */
247
- maxSteps: z8.number().min(1).max(100),
265
+ maxSteps: z9.number().min(1).max(100),
248
266
  /** Prompt to send to the evaluator */
249
- prompt: z8.string().min(1),
267
+ prompt: z9.string().min(1),
250
268
  /** ID of the evaluator agent to use */
251
- evaluatorId: z8.string()
269
+ evaluatorId: z9.string()
252
270
  });
253
271
 
254
272
  // src/test/tool.ts
255
- import { z as z9 } from "zod";
273
+ import { z as z10 } from "zod";
256
274
  var ToolTestSchema = BaseTestSchema.extend({
257
- type: z9.literal("TOOL" /* TOOL */),
275
+ type: z10.literal("TOOL" /* TOOL */),
258
276
  /** Name of the tool that should be called */
259
- toolName: z9.string().min(3),
277
+ toolName: z10.string().min(3),
260
278
  /** Expected arguments for the tool call */
261
- args: z9.record(z9.string(), z9.any()),
279
+ args: z10.record(z10.string(), z10.any()),
262
280
  /** Expected content in the tool results */
263
- resultsContent: z9.string()
281
+ resultsContent: z10.string()
264
282
  });
265
283
 
266
284
  // src/test/site-config.ts
267
- import { z as z10 } from "zod";
285
+ import { z as z11 } from "zod";
268
286
  var SiteConfigTestSchema = BaseTestSchema.extend({
269
- type: z10.literal("SITE_CONFIG" /* SITE_CONFIG */),
287
+ type: z11.literal("SITE_CONFIG" /* SITE_CONFIG */),
270
288
  /** URL to call */
271
- url: z10.string().url(),
289
+ url: z11.string().url(),
272
290
  /** HTTP method */
273
- method: z10.enum(["GET", "POST"]),
291
+ method: z11.enum(["GET", "POST"]),
274
292
  /** Request body (for POST) */
275
- body: z10.string().optional(),
293
+ body: z11.string().optional(),
276
294
  /** Expected HTTP status code */
277
- expectedStatusCode: z10.number().int().min(100).max(599),
295
+ expectedStatusCode: z11.number().int().min(100).max(599),
278
296
  /** Expected response content */
279
- expectedResponse: z10.string().optional(),
297
+ expectedResponse: z11.string().optional(),
280
298
  /** JMESPath expression to extract from response */
281
- expectedResponseJMESPath: z10.string().optional()
299
+ expectedResponseJMESPath: z11.string().optional()
282
300
  });
283
301
 
284
302
  // src/test/command-execution.ts
285
- import { z as z11 } from "zod";
303
+ import { z as z12 } from "zod";
286
304
  var AllowedCommands = [
287
305
  "yarn install --no-immutable && yarn build",
288
306
  "npm run build",
289
307
  "yarn typecheck"
290
308
  ];
291
309
  var CommandExecutionTestSchema = BaseTestSchema.extend({
292
- type: z11.literal("COMMAND_EXECUTION" /* COMMAND_EXECUTION */),
310
+ type: z12.literal("COMMAND_EXECUTION" /* COMMAND_EXECUTION */),
293
311
  /** Command to execute (must be in AllowedCommands) */
294
- command: z11.string().refine((value) => AllowedCommands.includes(value), {
312
+ command: z12.string().refine((value) => AllowedCommands.includes(value), {
295
313
  message: `Command must be one of: ${AllowedCommands.join(", ")}`
296
314
  }),
297
315
  /** Expected exit code (default: 0) */
298
- expectedExitCode: z11.number().default(0).optional()
316
+ expectedExitCode: z12.number().default(0).optional()
299
317
  });
300
318
 
301
319
  // src/test/file-presence.ts
302
- import { z as z12 } from "zod";
320
+ import { z as z13 } from "zod";
303
321
  var FilePresenceTestSchema = BaseTestSchema.extend({
304
- type: z12.literal("FILE_PRESENCE" /* FILE_PRESENCE */),
322
+ type: z13.literal("FILE_PRESENCE" /* FILE_PRESENCE */),
305
323
  /** Paths to check */
306
- paths: z12.array(z12.string()),
324
+ paths: z13.array(z13.string()),
307
325
  /** Whether files should exist (true) or not exist (false) */
308
- shouldExist: z12.boolean()
326
+ shouldExist: z13.boolean()
309
327
  });
310
328
 
311
329
  // src/test/file-content.ts
312
- import { z as z13 } from "zod";
313
- var FileContentCheckSchema = z13.object({
330
+ import { z as z14 } from "zod";
331
+ var FileContentCheckSchema = z14.object({
314
332
  /** Strings that must be present in the file */
315
- contains: z13.array(z13.string()).optional(),
333
+ contains: z14.array(z14.string()).optional(),
316
334
  /** Strings that must NOT be present in the file */
317
- notContains: z13.array(z13.string()).optional(),
335
+ notContains: z14.array(z14.string()).optional(),
318
336
  /** Regex pattern the content must match */
319
- matches: z13.string().optional(),
337
+ matches: z14.string().optional(),
320
338
  /** JSON path checks for structured content */
321
- jsonPath: z13.array(
322
- z13.object({
323
- path: z13.string(),
324
- value: z13.unknown()
339
+ jsonPath: z14.array(
340
+ z14.object({
341
+ path: z14.string(),
342
+ value: z14.unknown()
325
343
  })
326
344
  ).optional(),
327
345
  /** Lines that should be added (for diff checking) */
328
- added: z13.array(z13.string()).optional(),
346
+ added: z14.array(z14.string()).optional(),
329
347
  /** Lines that should be removed (for diff checking) */
330
- removed: z13.array(z13.string()).optional()
348
+ removed: z14.array(z14.string()).optional()
331
349
  });
332
350
  var FileContentTestSchema = BaseTestSchema.extend({
333
- type: z13.literal("FILE_CONTENT" /* FILE_CONTENT */),
351
+ type: z14.literal("FILE_CONTENT" /* FILE_CONTENT */),
334
352
  /** Path to the file to check */
335
- path: z13.string(),
353
+ path: z14.string(),
336
354
  /** Content checks to perform */
337
355
  checks: FileContentCheckSchema
338
356
  });
339
357
 
340
358
  // src/test/build-check.ts
341
- import { z as z14 } from "zod";
359
+ import { z as z15 } from "zod";
342
360
  var BuildCheckTestSchema = BaseTestSchema.extend({
343
- type: z14.literal("BUILD_CHECK" /* BUILD_CHECK */),
361
+ type: z15.literal("BUILD_CHECK" /* BUILD_CHECK */),
344
362
  /** Build command to execute */
345
- command: z14.string(),
363
+ command: z15.string(),
346
364
  /** Whether the build should succeed */
347
- expectSuccess: z14.boolean(),
365
+ expectSuccess: z15.boolean(),
348
366
  /** Maximum allowed warnings (optional) */
349
- allowedWarnings: z14.number().optional(),
367
+ allowedWarnings: z15.number().optional(),
350
368
  /** Timeout in milliseconds */
351
- timeout: z14.number().optional()
369
+ timeout: z15.number().optional()
352
370
  });
353
371
 
354
372
  // src/test/vitest.ts
355
- import { z as z15 } from "zod";
373
+ import { z as z16 } from "zod";
356
374
  var VitestTestSchema = BaseTestSchema.extend({
357
- type: z15.literal("VITEST" /* VITEST */),
375
+ type: z16.literal("VITEST" /* VITEST */),
358
376
  /** Test file content */
359
- testFile: z15.string(),
377
+ testFile: z16.string(),
360
378
  /** Name of the test file */
361
- testFileName: z15.string(),
379
+ testFileName: z16.string(),
362
380
  /** Minimum pass rate required (0-100) */
363
- minPassRate: z15.number().min(0).max(100)
381
+ minPassRate: z16.number().min(0).max(100)
364
382
  });
365
383
 
366
384
  // src/test/playwright-nl.ts
367
- import { z as z16 } from "zod";
385
+ import { z as z17 } from "zod";
368
386
  var PlaywrightNLTestSchema = BaseTestSchema.extend({
369
- type: z16.literal("PLAYWRIGHT_NL" /* PLAYWRIGHT_NL */),
387
+ type: z17.literal("PLAYWRIGHT_NL" /* PLAYWRIGHT_NL */),
370
388
  /** Natural language steps to execute */
371
- steps: z16.array(z16.string()),
389
+ steps: z17.array(z17.string()),
372
390
  /** Expected outcome description */
373
- expectedOutcome: z16.string(),
391
+ expectedOutcome: z17.string(),
374
392
  /** Timeout in milliseconds */
375
- timeout: z16.number().optional()
393
+ timeout: z17.number().optional()
376
394
  });
377
395
 
378
396
  // src/test/index.ts
379
- var TestSchema = z17.discriminatedUnion("type", [
397
+ var TestSchema = z18.discriminatedUnion("type", [
380
398
  LLMTestSchema,
381
399
  ToolTestSchema,
382
400
  SiteConfigTestSchema,
@@ -389,66 +407,66 @@ var TestSchema = z17.discriminatedUnion("type", [
389
407
  ]);
390
408
 
391
409
  // src/scenario/assertions.ts
392
- import { z as z18 } from "zod";
393
- var SkillWasCalledAssertionSchema = z18.object({
394
- type: z18.literal("skill_was_called"),
410
+ import { z as z19 } from "zod";
411
+ var SkillWasCalledAssertionSchema = z19.object({
412
+ type: z19.literal("skill_was_called"),
395
413
  /** Name of the skill that must have been called (matched against trace Skill tool args) */
396
- skillName: z18.string()
414
+ skillName: z19.string()
397
415
  });
398
- var BuildPassedAssertionSchema = z18.object({
399
- type: z18.literal("build_passed"),
416
+ var BuildPassedAssertionSchema = z19.object({
417
+ type: z19.literal("build_passed"),
400
418
  /** Command to run (default: "yarn build") */
401
- command: z18.string().optional(),
419
+ command: z19.string().optional(),
402
420
  /** Expected exit code (default: 0) */
403
- expectedExitCode: z18.number().int().optional()
421
+ expectedExitCode: z19.number().int().optional()
404
422
  });
405
- var LlmJudgeAssertionSchema = z18.object({
406
- type: z18.literal("llm_judge"),
423
+ var LlmJudgeAssertionSchema = z19.object({
424
+ type: z19.literal("llm_judge"),
407
425
  /** Prompt template; placeholders: {{output}}, {{cwd}}, {{changedFiles}}, {{trace}} */
408
- prompt: z18.string(),
426
+ prompt: z19.string(),
409
427
  /** Optional system prompt for the judge (default asks for JSON with score) */
410
- systemPrompt: z18.string().optional(),
428
+ systemPrompt: z19.string().optional(),
411
429
  /** Minimum score to pass (0-100, default 70) */
412
- minScore: z18.number().int().min(0).max(100).optional(),
430
+ minScore: z19.number().int().min(0).max(100).optional(),
413
431
  /** Model for the judge (e.g. claude-3-5-haiku) */
414
- model: z18.string().optional(),
415
- maxTokens: z18.number().int().optional(),
416
- temperature: z18.number().min(0).max(1).optional()
432
+ model: z19.string().optional(),
433
+ maxTokens: z19.number().int().optional(),
434
+ temperature: z19.number().min(0).max(1).optional()
417
435
  });
418
- var AssertionSchema = z18.union([
436
+ var AssertionSchema = z19.union([
419
437
  SkillWasCalledAssertionSchema,
420
438
  BuildPassedAssertionSchema,
421
439
  LlmJudgeAssertionSchema
422
440
  ]);
423
441
 
424
442
  // src/scenario/environment.ts
425
- import { z as z19 } from "zod";
426
- var LocalProjectConfigSchema = z19.object({
443
+ import { z as z20 } from "zod";
444
+ var LocalProjectConfigSchema = z20.object({
427
445
  /** Template ID to use for the local project */
428
- templateId: z19.string().optional(),
446
+ templateId: z20.string().optional(),
429
447
  /** Files to create in the project */
430
- files: z19.array(
431
- z19.object({
432
- path: z19.string().min(1),
433
- content: z19.string().min(1)
448
+ files: z20.array(
449
+ z20.object({
450
+ path: z20.string().min(1),
451
+ content: z20.string().min(1)
434
452
  })
435
453
  ).optional()
436
454
  });
437
- var MetaSiteConfigSchema = z19.object({
438
- configurations: z19.array(
439
- z19.object({
440
- name: z19.string().min(1),
441
- apiCalls: z19.array(
442
- z19.object({
443
- url: z19.string().url(),
444
- method: z19.enum(["POST", "PUT"]),
445
- body: z19.string()
455
+ var MetaSiteConfigSchema = z20.object({
456
+ configurations: z20.array(
457
+ z20.object({
458
+ name: z20.string().min(1),
459
+ apiCalls: z20.array(
460
+ z20.object({
461
+ url: z20.string().url(),
462
+ method: z20.enum(["POST", "PUT"]),
463
+ body: z20.string()
446
464
  })
447
465
  )
448
466
  })
449
467
  ).optional()
450
468
  });
451
- var EnvironmentSchema = z19.object({
469
+ var EnvironmentSchema = z20.object({
452
470
  /** Local project configuration */
453
471
  localProject: LocalProjectConfigSchema.optional(),
454
472
  /** Meta site configuration */
@@ -456,54 +474,54 @@ var EnvironmentSchema = z19.object({
456
474
  });
457
475
 
458
476
  // src/scenario/test-scenario.ts
459
- import { z as z21 } from "zod";
477
+ import { z as z22 } from "zod";
460
478
 
461
479
  // src/assertion/assertion.ts
462
- import { z as z20 } from "zod";
463
- var AssertionTypeSchema = z20.enum([
480
+ import { z as z21 } from "zod";
481
+ var AssertionTypeSchema = z21.enum([
464
482
  "skill_was_called",
465
483
  "build_passed",
466
484
  "llm_judge"
467
485
  ]);
468
- var AssertionParameterTypeSchema = z20.enum([
486
+ var AssertionParameterTypeSchema = z21.enum([
469
487
  "string",
470
488
  "number",
471
489
  "boolean"
472
490
  ]);
473
- var AssertionParameterSchema = z20.object({
491
+ var AssertionParameterSchema = z21.object({
474
492
  /** Parameter name (used as key in params object) */
475
- name: z20.string().min(1),
493
+ name: z21.string().min(1),
476
494
  /** Display label for the parameter */
477
- label: z20.string().min(1),
495
+ label: z21.string().min(1),
478
496
  /** Parameter type */
479
497
  type: AssertionParameterTypeSchema,
480
498
  /** Whether this parameter is required */
481
- required: z20.boolean(),
499
+ required: z21.boolean(),
482
500
  /** Default value (optional, used when not provided) */
483
- defaultValue: z20.union([z20.string(), z20.number(), z20.boolean()]).optional(),
501
+ defaultValue: z21.union([z21.string(), z21.number(), z21.boolean()]).optional(),
484
502
  /** If true, parameter is hidden by default behind "Show advanced options" */
485
- advanced: z20.boolean().optional()
503
+ advanced: z21.boolean().optional()
486
504
  });
487
- var ScenarioAssertionLinkSchema = z20.object({
505
+ var ScenarioAssertionLinkSchema = z21.object({
488
506
  /** ID of the assertion (can be system assertion like 'system:skill_was_called' or custom assertion UUID) */
489
- assertionId: z20.string(),
507
+ assertionId: z21.string(),
490
508
  /** Parameter values for this assertion in this scenario */
491
- params: z20.record(
492
- z20.string(),
493
- z20.union([z20.string(), z20.number(), z20.boolean(), z20.null()])
509
+ params: z21.record(
510
+ z21.string(),
511
+ z21.union([z21.string(), z21.number(), z21.boolean(), z21.null()])
494
512
  ).optional()
495
513
  });
496
- var SkillWasCalledConfigSchema = z20.object({
514
+ var SkillWasCalledConfigSchema = z21.object({
497
515
  /** Name of the skill that must have been called */
498
- skillName: z20.string().min(1)
516
+ skillName: z21.string().min(1)
499
517
  });
500
- var BuildPassedConfigSchema = z20.strictObject({
518
+ var BuildPassedConfigSchema = z21.strictObject({
501
519
  /** Command to run (default: "yarn build") */
502
- command: z20.string().optional(),
520
+ command: z21.string().optional(),
503
521
  /** Expected exit code (default: 0) */
504
- expectedExitCode: z20.number().int().optional()
522
+ expectedExitCode: z21.number().int().optional()
505
523
  });
506
- var LlmJudgeConfigSchema = z20.object({
524
+ var LlmJudgeConfigSchema = z21.object({
507
525
  /**
508
526
  * Prompt template with placeholders:
509
527
  * - {{output}}: agent's final output
@@ -514,28 +532,28 @@ var LlmJudgeConfigSchema = z20.object({
514
532
  * - {{trace}}: step-by-step trace of tool calls
515
533
  * - Custom parameters defined in the parameters array
516
534
  */
517
- prompt: z20.string().min(1),
535
+ prompt: z21.string().min(1),
518
536
  /** Optional system prompt for the judge */
519
- systemPrompt: z20.string().optional(),
537
+ systemPrompt: z21.string().optional(),
520
538
  /** Minimum score to pass (0-100, default 70) */
521
- minScore: z20.number().int().min(0).max(100).optional(),
539
+ minScore: z21.number().int().min(0).max(100).optional(),
522
540
  /** Model for the judge (e.g. claude-3-5-haiku-20241022) */
523
- model: z20.string().optional(),
541
+ model: z21.string().optional(),
524
542
  /** Max output tokens */
525
- maxTokens: z20.number().int().optional(),
543
+ maxTokens: z21.number().int().optional(),
526
544
  /** Temperature (0-1) */
527
- temperature: z20.number().min(0).max(1).optional(),
545
+ temperature: z21.number().min(0).max(1).optional(),
528
546
  /** User-defined parameters for this assertion */
529
- parameters: z20.array(AssertionParameterSchema).optional()
547
+ parameters: z21.array(AssertionParameterSchema).optional()
530
548
  });
531
- var AssertionConfigSchema = z20.union([
549
+ var AssertionConfigSchema = z21.union([
532
550
  LlmJudgeConfigSchema,
533
551
  // requires prompt - check first
534
552
  SkillWasCalledConfigSchema,
535
553
  // requires skillName
536
554
  BuildPassedConfigSchema,
537
555
  // all optional, uses strictObject to reject unknown keys
538
- z20.object({})
556
+ z21.object({})
539
557
  // fallback empty config
540
558
  ]);
541
559
  var CustomAssertionSchema = TenantEntitySchema.extend({
@@ -580,23 +598,23 @@ function getLlmJudgeConfig(assertion) {
580
598
  }
581
599
 
582
600
  // src/scenario/test-scenario.ts
583
- var ExpectedFileSchema = z21.object({
601
+ var ExpectedFileSchema = z22.object({
584
602
  /** Relative path where the file should be created */
585
- path: z21.string(),
603
+ path: z22.string(),
586
604
  /** Optional expected content */
587
- content: z21.string().optional()
605
+ content: z22.string().optional()
588
606
  });
589
607
  var TestScenarioSchema = TenantEntitySchema.extend({
590
608
  /** The prompt sent to the agent to trigger the task */
591
- triggerPrompt: z21.string().min(10),
609
+ triggerPrompt: z22.string().min(10),
592
610
  /** ID of the template to use for this scenario (null = no template) */
593
- templateId: z21.string().nullish(),
611
+ templateId: z22.string().nullish(),
594
612
  /** Inline assertions to evaluate for this scenario (legacy) */
595
- assertions: z21.array(AssertionSchema).optional(),
613
+ assertions: z22.array(AssertionSchema).optional(),
596
614
  /** IDs of saved assertions to evaluate (from assertions table) - legacy, use assertionLinks */
597
- assertionIds: z21.array(z21.string()).optional(),
615
+ assertionIds: z22.array(z22.string()).optional(),
598
616
  /** Linked assertions with per-scenario parameter values */
599
- assertionLinks: z21.array(ScenarioAssertionLinkSchema).optional()
617
+ assertionLinks: z22.array(ScenarioAssertionLinkSchema).optional()
600
618
  });
601
619
  var CreateTestScenarioInputSchema = TestScenarioSchema.omit({
602
620
  id: true,
@@ -607,10 +625,10 @@ var CreateTestScenarioInputSchema = TestScenarioSchema.omit({
607
625
  var UpdateTestScenarioInputSchema = CreateTestScenarioInputSchema.partial();
608
626
 
609
627
  // src/suite/test-suite.ts
610
- import { z as z22 } from "zod";
628
+ import { z as z23 } from "zod";
611
629
  var TestSuiteSchema = TenantEntitySchema.extend({
612
630
  /** IDs of test scenarios in this suite */
613
- scenarioIds: z22.array(z22.string())
631
+ scenarioIds: z23.array(z23.string())
614
632
  });
615
633
  var CreateTestSuiteInputSchema = TestSuiteSchema.omit({
616
634
  id: true,
@@ -621,21 +639,21 @@ var CreateTestSuiteInputSchema = TestSuiteSchema.omit({
621
639
  var UpdateTestSuiteInputSchema = CreateTestSuiteInputSchema.partial();
622
640
 
623
641
  // src/evaluation/metrics.ts
624
- import { z as z23 } from "zod";
625
- var TokenUsageSchema = z23.object({
626
- prompt: z23.number(),
627
- completion: z23.number(),
628
- total: z23.number()
629
- });
630
- var EvalMetricsSchema = z23.object({
631
- totalAssertions: z23.number(),
632
- passed: z23.number(),
633
- failed: z23.number(),
634
- skipped: z23.number(),
635
- errors: z23.number(),
636
- passRate: z23.number(),
637
- avgDuration: z23.number(),
638
- totalDuration: z23.number()
642
+ import { z as z24 } from "zod";
643
+ var TokenUsageSchema = z24.object({
644
+ prompt: z24.number(),
645
+ completion: z24.number(),
646
+ total: z24.number()
647
+ });
648
+ var EvalMetricsSchema = z24.object({
649
+ totalAssertions: z24.number(),
650
+ passed: z24.number(),
651
+ failed: z24.number(),
652
+ skipped: z24.number(),
653
+ errors: z24.number(),
654
+ passRate: z24.number(),
655
+ avgDuration: z24.number(),
656
+ totalDuration: z24.number()
639
657
  });
640
658
  var EvalStatus = /* @__PURE__ */ ((EvalStatus2) => {
641
659
  EvalStatus2["PENDING"] = "pending";
@@ -645,7 +663,7 @@ var EvalStatus = /* @__PURE__ */ ((EvalStatus2) => {
645
663
  EvalStatus2["CANCELLED"] = "cancelled";
646
664
  return EvalStatus2;
647
665
  })(EvalStatus || {});
648
- var EvalStatusSchema = z23.enum(EvalStatus);
666
+ var EvalStatusSchema = z24.enum(EvalStatus);
649
667
  var LLMStepType = /* @__PURE__ */ ((LLMStepType2) => {
650
668
  LLMStepType2["COMPLETION"] = "completion";
651
669
  LLMStepType2["TOOL_USE"] = "tool_use";
@@ -653,52 +671,52 @@ var LLMStepType = /* @__PURE__ */ ((LLMStepType2) => {
653
671
  LLMStepType2["THINKING"] = "thinking";
654
672
  return LLMStepType2;
655
673
  })(LLMStepType || {});
656
- var LLMTraceStepSchema = z23.object({
657
- id: z23.string(),
658
- stepNumber: z23.number(),
659
- type: z23.enum(LLMStepType),
660
- model: z23.string(),
661
- provider: z23.string(),
662
- startedAt: z23.string(),
663
- durationMs: z23.number(),
674
+ var LLMTraceStepSchema = z24.object({
675
+ id: z24.string(),
676
+ stepNumber: z24.number(),
677
+ type: z24.enum(LLMStepType),
678
+ model: z24.string(),
679
+ provider: z24.string(),
680
+ startedAt: z24.string(),
681
+ durationMs: z24.number(),
664
682
  tokenUsage: TokenUsageSchema,
665
- costUsd: z23.number(),
666
- toolName: z23.string().optional(),
667
- toolArguments: z23.string().optional(),
668
- inputPreview: z23.string().optional(),
669
- outputPreview: z23.string().optional(),
670
- success: z23.boolean(),
671
- error: z23.string().optional()
672
- });
673
- var LLMBreakdownStatsSchema = z23.object({
674
- count: z23.number(),
675
- durationMs: z23.number(),
676
- tokens: z23.number(),
677
- costUsd: z23.number()
678
- });
679
- var LLMTraceSummarySchema = z23.object({
680
- totalSteps: z23.number(),
681
- totalDurationMs: z23.number(),
683
+ costUsd: z24.number(),
684
+ toolName: z24.string().optional(),
685
+ toolArguments: z24.string().optional(),
686
+ inputPreview: z24.string().optional(),
687
+ outputPreview: z24.string().optional(),
688
+ success: z24.boolean(),
689
+ error: z24.string().optional()
690
+ });
691
+ var LLMBreakdownStatsSchema = z24.object({
692
+ count: z24.number(),
693
+ durationMs: z24.number(),
694
+ tokens: z24.number(),
695
+ costUsd: z24.number()
696
+ });
697
+ var LLMTraceSummarySchema = z24.object({
698
+ totalSteps: z24.number(),
699
+ totalDurationMs: z24.number(),
682
700
  totalTokens: TokenUsageSchema,
683
- totalCostUsd: z23.number(),
684
- stepTypeBreakdown: z23.record(z23.string(), LLMBreakdownStatsSchema).optional(),
685
- modelBreakdown: z23.record(z23.string(), LLMBreakdownStatsSchema),
686
- modelsUsed: z23.array(z23.string())
687
- });
688
- var LLMTraceSchema = z23.object({
689
- id: z23.string(),
690
- steps: z23.array(LLMTraceStepSchema),
701
+ totalCostUsd: z24.number(),
702
+ stepTypeBreakdown: z24.record(z24.string(), LLMBreakdownStatsSchema).optional(),
703
+ modelBreakdown: z24.record(z24.string(), LLMBreakdownStatsSchema),
704
+ modelsUsed: z24.array(z24.string())
705
+ });
706
+ var LLMTraceSchema = z24.object({
707
+ id: z24.string(),
708
+ steps: z24.array(LLMTraceStepSchema),
691
709
  summary: LLMTraceSummarySchema
692
710
  });
693
711
 
694
712
  // src/evaluation/eval-result.ts
695
- import { z as z26 } from "zod";
713
+ import { z as z27 } from "zod";
696
714
 
697
715
  // src/evaluation/eval-run.ts
698
- import { z as z25 } from "zod";
716
+ import { z as z26 } from "zod";
699
717
 
700
718
  // src/evaluation/live-trace.ts
701
- import { z as z24 } from "zod";
719
+ import { z as z25 } from "zod";
702
720
  var LiveTraceEventType = /* @__PURE__ */ ((LiveTraceEventType2) => {
703
721
  LiveTraceEventType2["THINKING"] = "thinking";
704
722
  LiveTraceEventType2["TOOL_USE"] = "tool_use";
@@ -712,37 +730,37 @@ var LiveTraceEventType = /* @__PURE__ */ ((LiveTraceEventType2) => {
712
730
  LiveTraceEventType2["USER"] = "user";
713
731
  return LiveTraceEventType2;
714
732
  })(LiveTraceEventType || {});
715
- var LiveTraceEventSchema = z24.object({
733
+ var LiveTraceEventSchema = z25.object({
716
734
  /** The evaluation run ID */
717
- evalRunId: z24.string(),
735
+ evalRunId: z25.string(),
718
736
  /** The scenario ID being executed */
719
- scenarioId: z24.string(),
737
+ scenarioId: z25.string(),
720
738
  /** The scenario name for display */
721
- scenarioName: z24.string(),
739
+ scenarioName: z25.string(),
722
740
  /** The target ID (skill, agent, etc.) */
723
- targetId: z24.string(),
741
+ targetId: z25.string(),
724
742
  /** The target name for display */
725
- targetName: z24.string(),
743
+ targetName: z25.string(),
726
744
  /** Step number in the current scenario execution */
727
- stepNumber: z24.number(),
745
+ stepNumber: z25.number(),
728
746
  /** Type of trace event */
729
- type: z24.enum(LiveTraceEventType),
747
+ type: z25.enum(LiveTraceEventType),
730
748
  /** Tool name if this is a tool_use event */
731
- toolName: z24.string().optional(),
749
+ toolName: z25.string().optional(),
732
750
  /** Tool arguments preview (truncated JSON) */
733
- toolArgs: z24.string().optional(),
751
+ toolArgs: z25.string().optional(),
734
752
  /** Output preview (truncated text) */
735
- outputPreview: z24.string().optional(),
753
+ outputPreview: z25.string().optional(),
736
754
  /** File path for file operations */
737
- filePath: z24.string().optional(),
755
+ filePath: z25.string().optional(),
738
756
  /** Elapsed time in milliseconds for progress events */
739
- elapsedMs: z24.number().optional(),
757
+ elapsedMs: z25.number().optional(),
740
758
  /** Thinking/reasoning text from Claude */
741
- thinking: z24.string().optional(),
759
+ thinking: z25.string().optional(),
742
760
  /** Timestamp when this event occurred */
743
- timestamp: z24.string(),
761
+ timestamp: z25.string(),
744
762
  /** Whether this is the final event for this scenario */
745
- isComplete: z24.boolean()
763
+ isComplete: z25.boolean()
746
764
  });
747
765
  var TRACE_EVENT_PREFIX = "TRACE_EVENT:";
748
766
  function parseTraceEventLine(line) {
@@ -770,14 +788,14 @@ var TriggerType = /* @__PURE__ */ ((TriggerType2) => {
770
788
  TriggerType2["MANUAL"] = "MANUAL";
771
789
  return TriggerType2;
772
790
  })(TriggerType || {});
773
- var TriggerMetadataSchema = z25.object({
774
- version: z25.string().optional(),
775
- resourceUpdated: z25.array(z25.string()).optional()
791
+ var TriggerMetadataSchema = z26.object({
792
+ version: z26.string().optional(),
793
+ resourceUpdated: z26.array(z26.string()).optional()
776
794
  });
777
- var TriggerSchema = z25.object({
778
- id: z25.string(),
795
+ var TriggerSchema = z26.object({
796
+ id: z26.string(),
779
797
  metadata: TriggerMetadataSchema.optional(),
780
- type: z25.enum(TriggerType)
798
+ type: z26.enum(TriggerType)
781
799
  });
782
800
  var FailureCategory = /* @__PURE__ */ ((FailureCategory2) => {
783
801
  FailureCategory2["MISSING_FILE"] = "missing_file";
@@ -795,28 +813,28 @@ var FailureSeverity = /* @__PURE__ */ ((FailureSeverity2) => {
795
813
  FailureSeverity2["LOW"] = "low";
796
814
  return FailureSeverity2;
797
815
  })(FailureSeverity || {});
798
- var DiffLineTypeSchema = z25.enum(["added", "removed", "unchanged"]);
799
- var DiffLineSchema = z25.object({
816
+ var DiffLineTypeSchema = z26.enum(["added", "removed", "unchanged"]);
817
+ var DiffLineSchema = z26.object({
800
818
  type: DiffLineTypeSchema,
801
- content: z25.string(),
802
- lineNumber: z25.number()
803
- });
804
- var DiffContentSchema = z25.object({
805
- path: z25.string(),
806
- expected: z25.string(),
807
- actual: z25.string(),
808
- diffLines: z25.array(DiffLineSchema),
809
- renamedFrom: z25.string().optional()
810
- });
811
- var CommandExecutionSchema = z25.object({
812
- command: z25.string(),
813
- exitCode: z25.number(),
814
- output: z25.string().optional(),
815
- duration: z25.number()
816
- });
817
- var FileModificationSchema = z25.object({
818
- path: z25.string(),
819
- action: z25.enum(["created", "modified", "deleted"])
819
+ content: z26.string(),
820
+ lineNumber: z26.number()
821
+ });
822
+ var DiffContentSchema = z26.object({
823
+ path: z26.string(),
824
+ expected: z26.string(),
825
+ actual: z26.string(),
826
+ diffLines: z26.array(DiffLineSchema),
827
+ renamedFrom: z26.string().optional()
828
+ });
829
+ var CommandExecutionSchema = z26.object({
830
+ command: z26.string(),
831
+ exitCode: z26.number(),
832
+ output: z26.string().optional(),
833
+ duration: z26.number()
834
+ });
835
+ var FileModificationSchema = z26.object({
836
+ path: z26.string(),
837
+ action: z26.enum(["created", "modified", "deleted"])
820
838
  });
821
839
  var TemplateFileStatus = /* @__PURE__ */ ((TemplateFileStatus2) => {
822
840
  TemplateFileStatus2["NEW"] = "new";
@@ -824,75 +842,79 @@ var TemplateFileStatus = /* @__PURE__ */ ((TemplateFileStatus2) => {
824
842
  TemplateFileStatus2["UNCHANGED"] = "unchanged";
825
843
  return TemplateFileStatus2;
826
844
  })(TemplateFileStatus || {});
827
- var TemplateFileSchema = z25.object({
845
+ var TemplateFileSchema = z26.object({
828
846
  /** Relative path within the template */
829
- path: z25.string(),
847
+ path: z26.string(),
830
848
  /** Full file content after execution */
831
- content: z25.string(),
849
+ content: z26.string(),
832
850
  /** File status (new, modified, unchanged) */
833
- status: z25.enum(["new", "modified", "unchanged"])
834
- });
835
- var ApiCallSchema = z25.object({
836
- endpoint: z25.string(),
837
- tokensUsed: z25.number(),
838
- duration: z25.number()
839
- });
840
- var ExecutionTraceSchema = z25.object({
841
- commands: z25.array(CommandExecutionSchema),
842
- filesModified: z25.array(FileModificationSchema),
843
- apiCalls: z25.array(ApiCallSchema),
844
- totalDuration: z25.number()
845
- });
846
- var FailureAnalysisSchema = z25.object({
847
- category: z25.enum(FailureCategory),
848
- severity: z25.enum(FailureSeverity),
849
- summary: z25.string(),
850
- details: z25.string(),
851
- rootCause: z25.string(),
852
- suggestedFix: z25.string(),
853
- relatedAssertions: z25.array(z25.string()),
854
- codeSnippet: z25.string().optional(),
855
- similarIssues: z25.array(z25.string()).optional(),
856
- patternId: z25.string().optional(),
851
+ status: z26.enum(["new", "modified", "unchanged"])
852
+ });
853
+ var ApiCallSchema = z26.object({
854
+ endpoint: z26.string(),
855
+ tokensUsed: z26.number(),
856
+ duration: z26.number()
857
+ });
858
+ var ExecutionTraceSchema = z26.object({
859
+ commands: z26.array(CommandExecutionSchema),
860
+ filesModified: z26.array(FileModificationSchema),
861
+ apiCalls: z26.array(ApiCallSchema),
862
+ totalDuration: z26.number()
863
+ });
864
+ var FailureAnalysisSchema = z26.object({
865
+ category: z26.enum(FailureCategory),
866
+ severity: z26.enum(FailureSeverity),
867
+ summary: z26.string(),
868
+ details: z26.string(),
869
+ rootCause: z26.string(),
870
+ suggestedFix: z26.string(),
871
+ relatedAssertions: z26.array(z26.string()),
872
+ codeSnippet: z26.string().optional(),
873
+ similarIssues: z26.array(z26.string()).optional(),
874
+ patternId: z26.string().optional(),
857
875
  // Extended fields for detailed debugging
858
876
  diff: DiffContentSchema.optional(),
859
877
  executionTrace: ExecutionTraceSchema.optional()
860
878
  });
861
879
  var EvalRunSchema = TenantEntitySchema.extend({
862
880
  /** Agent ID for this run */
863
- agentId: z25.string().optional(),
881
+ agentId: z26.string().optional(),
864
882
  /** Skills group ID for this run */
865
- skillsGroupId: z25.string().optional(),
883
+ skillsGroupId: z26.string().optional(),
866
884
  /** Scenario IDs to run */
867
- scenarioIds: z25.array(z25.string()),
885
+ scenarioIds: z26.array(z26.string()),
868
886
  /** Current status */
869
887
  status: EvalStatusSchema,
870
888
  /** Progress percentage (0-100) */
871
- progress: z25.number(),
889
+ progress: z26.number(),
872
890
  /** Results for each scenario/target combination (lazy to break eval-result ↔ eval-run cycle) */
873
- results: z25.array(z25.lazy(() => EvalRunResultSchema)),
891
+ results: z26.array(z26.lazy(() => EvalRunResultSchema)),
874
892
  /** Aggregated metrics across all results */
875
893
  aggregateMetrics: EvalMetricsSchema,
876
894
  /** Failure analyses */
877
- failureAnalyses: z25.array(FailureAnalysisSchema).optional(),
895
+ failureAnalyses: z26.array(FailureAnalysisSchema).optional(),
878
896
  /** Aggregated LLM trace summary */
879
897
  llmTraceSummary: LLMTraceSummarySchema.optional(),
880
898
  /** What triggered this run */
881
899
  trigger: TriggerSchema.optional(),
882
900
  /** When the run started (set when evaluation is triggered) */
883
- startedAt: z25.string().optional(),
901
+ startedAt: z26.string().optional(),
884
902
  /** When the run completed */
885
- completedAt: z25.string().optional(),
903
+ completedAt: z26.string().optional(),
886
904
  /** Live trace events captured during execution (for playback on results page) */
887
- liveTraceEvents: z25.array(LiveTraceEventSchema).optional(),
905
+ liveTraceEvents: z26.array(LiveTraceEventSchema).optional(),
888
906
  /** Remote job ID for tracking execution in Dev Machines */
889
- jobId: z25.string().optional(),
907
+ jobId: z26.string().optional(),
890
908
  /** Remote job status from the Dev Machine API (PENDING, RUNNING, COMPLETED, FAILED, CANCELLED) */
891
- jobStatus: z25.string().optional(),
909
+ jobStatus: z26.string().optional(),
892
910
  /** Remote job error message if the job failed */
893
- jobError: z25.string().optional(),
911
+ jobError: z26.string().optional(),
894
912
  /** Timestamp of the last job status check */
895
- jobStatusCheckedAt: z25.string().optional()
913
+ jobStatusCheckedAt: z26.string().optional(),
914
+ /** MCP server IDs to enable for this run (optional) */
915
+ mcpIds: z26.array(z26.string()).optional(),
916
+ /** Sub-agent IDs to enable for this run (optional) */
917
+ subAgentIds: z26.array(z26.string()).optional()
896
918
  });
897
919
  var CreateEvalRunInputSchema = EvalRunSchema.omit({
898
920
  id: true,
@@ -905,28 +927,28 @@ var CreateEvalRunInputSchema = EvalRunSchema.omit({
905
927
  startedAt: true,
906
928
  completedAt: true
907
929
  });
908
- var EvaluationProgressSchema = z25.object({
909
- runId: z25.string(),
910
- targetId: z25.string(),
911
- totalScenarios: z25.number(),
912
- completedScenarios: z25.number(),
913
- scenarioProgress: z25.array(
914
- z25.object({
915
- scenarioId: z25.string(),
916
- currentStep: z25.string(),
917
- error: z25.string().optional()
930
+ var EvaluationProgressSchema = z26.object({
931
+ runId: z26.string(),
932
+ targetId: z26.string(),
933
+ totalScenarios: z26.number(),
934
+ completedScenarios: z26.number(),
935
+ scenarioProgress: z26.array(
936
+ z26.object({
937
+ scenarioId: z26.string(),
938
+ currentStep: z26.string(),
939
+ error: z26.string().optional()
918
940
  })
919
941
  ),
920
- createdAt: z25.number()
942
+ createdAt: z26.number()
921
943
  });
922
- var EvaluationLogSchema = z25.object({
923
- runId: z25.string(),
924
- scenarioId: z25.string(),
925
- log: z25.object({
926
- level: z25.enum(["info", "error", "debug"]),
927
- message: z25.string().optional(),
928
- args: z25.array(z25.any()).optional(),
929
- error: z25.string().optional()
944
+ var EvaluationLogSchema = z26.object({
945
+ runId: z26.string(),
946
+ scenarioId: z26.string(),
947
+ log: z26.object({
948
+ level: z26.enum(["info", "error", "debug"]),
949
+ message: z26.string().optional(),
950
+ args: z26.array(z26.any()).optional(),
951
+ error: z26.string().optional()
930
952
  })
931
953
  });
932
954
  var LLM_TIMEOUT = 12e4;
@@ -939,91 +961,91 @@ var AssertionResultStatus = /* @__PURE__ */ ((AssertionResultStatus2) => {
939
961
  AssertionResultStatus2["ERROR"] = "error";
940
962
  return AssertionResultStatus2;
941
963
  })(AssertionResultStatus || {});
942
- var AssertionResultSchema = z26.object({
943
- id: z26.string(),
944
- assertionId: z26.string(),
945
- assertionType: z26.string(),
946
- assertionName: z26.string(),
947
- status: z26.enum(AssertionResultStatus),
948
- message: z26.string().optional(),
949
- expected: z26.string().optional(),
950
- actual: z26.string().optional(),
951
- duration: z26.number().optional(),
952
- details: z26.record(z26.string(), z26.unknown()).optional(),
953
- llmTraceSteps: z26.array(LLMTraceStepSchema).optional()
954
- });
955
- var EvalRunResultSchema = z26.object({
956
- id: z26.string(),
957
- targetId: z26.string(),
958
- targetName: z26.string().optional(),
959
- scenarioId: z26.string(),
960
- scenarioName: z26.string(),
964
+ var AssertionResultSchema = z27.object({
965
+ id: z27.string(),
966
+ assertionId: z27.string(),
967
+ assertionType: z27.string(),
968
+ assertionName: z27.string(),
969
+ status: z27.enum(AssertionResultStatus),
970
+ message: z27.string().optional(),
971
+ expected: z27.string().optional(),
972
+ actual: z27.string().optional(),
973
+ duration: z27.number().optional(),
974
+ details: z27.record(z27.string(), z27.unknown()).optional(),
975
+ llmTraceSteps: z27.array(LLMTraceStepSchema).optional()
976
+ });
977
+ var EvalRunResultSchema = z27.object({
978
+ id: z27.string(),
979
+ targetId: z27.string(),
980
+ targetName: z27.string().optional(),
981
+ scenarioId: z27.string(),
982
+ scenarioName: z27.string(),
961
983
  modelConfig: ModelConfigSchema.optional(),
962
- assertionResults: z26.array(AssertionResultSchema),
984
+ assertionResults: z27.array(AssertionResultSchema),
963
985
  metrics: EvalMetricsSchema.optional(),
964
- passed: z26.number(),
965
- failed: z26.number(),
966
- passRate: z26.number(),
967
- duration: z26.number(),
968
- outputText: z26.string().optional(),
969
- files: z26.array(ExpectedFileSchema).optional(),
970
- fileDiffs: z26.array(DiffContentSchema).optional(),
986
+ passed: z27.number(),
987
+ failed: z27.number(),
988
+ passRate: z27.number(),
989
+ duration: z27.number(),
990
+ outputText: z27.string().optional(),
991
+ files: z27.array(ExpectedFileSchema).optional(),
992
+ fileDiffs: z27.array(DiffContentSchema).optional(),
971
993
  /** Full template files after execution with status indicators */
972
- templateFiles: z26.array(TemplateFileSchema).optional(),
973
- startedAt: z26.string().optional(),
974
- completedAt: z26.string().optional(),
994
+ templateFiles: z27.array(TemplateFileSchema).optional(),
995
+ startedAt: z27.string().optional(),
996
+ completedAt: z27.string().optional(),
975
997
  llmTrace: LLMTraceSchema.optional()
976
998
  });
977
- var PromptResultSchema = z26.object({
978
- text: z26.string(),
979
- files: z26.array(z26.unknown()).optional(),
980
- finishReason: z26.string().optional(),
981
- reasoning: z26.string().optional(),
982
- reasoningDetails: z26.unknown().optional(),
983
- toolCalls: z26.array(z26.unknown()).optional(),
984
- toolResults: z26.array(z26.unknown()).optional(),
985
- warnings: z26.array(z26.unknown()).optional(),
986
- sources: z26.array(z26.unknown()).optional(),
987
- steps: z26.array(z26.unknown()),
988
- generationTimeMs: z26.number(),
989
- prompt: z26.string(),
990
- systemPrompt: z26.string(),
991
- usage: z26.object({
992
- totalTokens: z26.number().optional(),
993
- totalMicrocentsSpent: z26.number().optional()
999
+ var PromptResultSchema = z27.object({
1000
+ text: z27.string(),
1001
+ files: z27.array(z27.unknown()).optional(),
1002
+ finishReason: z27.string().optional(),
1003
+ reasoning: z27.string().optional(),
1004
+ reasoningDetails: z27.unknown().optional(),
1005
+ toolCalls: z27.array(z27.unknown()).optional(),
1006
+ toolResults: z27.array(z27.unknown()).optional(),
1007
+ warnings: z27.array(z27.unknown()).optional(),
1008
+ sources: z27.array(z27.unknown()).optional(),
1009
+ steps: z27.array(z27.unknown()),
1010
+ generationTimeMs: z27.number(),
1011
+ prompt: z27.string(),
1012
+ systemPrompt: z27.string(),
1013
+ usage: z27.object({
1014
+ totalTokens: z27.number().optional(),
1015
+ totalMicrocentsSpent: z27.number().optional()
994
1016
  })
995
1017
  });
996
- var EvaluationResultSchema = z26.object({
997
- id: z26.string(),
998
- runId: z26.string(),
999
- timestamp: z26.number(),
1018
+ var EvaluationResultSchema = z27.object({
1019
+ id: z27.string(),
1020
+ runId: z27.string(),
1021
+ timestamp: z27.number(),
1000
1022
  promptResult: PromptResultSchema,
1001
- testResults: z26.array(z26.unknown()),
1002
- tags: z26.array(z26.string()).optional(),
1003
- feedback: z26.string().optional(),
1004
- score: z26.number(),
1005
- suiteId: z26.string().optional()
1006
- });
1007
- var LeanEvaluationResultSchema = z26.object({
1008
- id: z26.string(),
1009
- runId: z26.string(),
1010
- timestamp: z26.number(),
1011
- tags: z26.array(z26.string()).optional(),
1012
- scenarioId: z26.string(),
1013
- scenarioVersion: z26.number().optional(),
1014
- targetId: z26.string(),
1015
- targetVersion: z26.number().optional(),
1016
- suiteId: z26.string().optional(),
1017
- score: z26.number(),
1018
- time: z26.number().optional(),
1019
- microcentsSpent: z26.number().optional()
1023
+ testResults: z27.array(z27.unknown()),
1024
+ tags: z27.array(z27.string()).optional(),
1025
+ feedback: z27.string().optional(),
1026
+ score: z27.number(),
1027
+ suiteId: z27.string().optional()
1028
+ });
1029
+ var LeanEvaluationResultSchema = z27.object({
1030
+ id: z27.string(),
1031
+ runId: z27.string(),
1032
+ timestamp: z27.number(),
1033
+ tags: z27.array(z27.string()).optional(),
1034
+ scenarioId: z27.string(),
1035
+ scenarioVersion: z27.number().optional(),
1036
+ targetId: z27.string(),
1037
+ targetVersion: z27.number().optional(),
1038
+ suiteId: z27.string().optional(),
1039
+ score: z27.number(),
1040
+ time: z27.number().optional(),
1041
+ microcentsSpent: z27.number().optional()
1020
1042
  });
1021
1043
 
1022
1044
  // src/project/project.ts
1023
- import { z as z27 } from "zod";
1045
+ import { z as z28 } from "zod";
1024
1046
  var ProjectSchema = BaseEntitySchema.extend({
1025
- appId: z27.string().optional().describe("The ID of the app in Dev Center"),
1026
- appSecret: z27.string().optional().describe("The secret of the app in Dev Center")
1047
+ appId: z28.string().optional().describe("The ID of the app in Dev Center"),
1048
+ appSecret: z28.string().optional().describe("The secret of the app in Dev Center")
1027
1049
  });
1028
1050
  var CreateProjectInputSchema = ProjectSchema.omit({
1029
1051
  id: true,
@@ -1034,10 +1056,10 @@ var CreateProjectInputSchema = ProjectSchema.omit({
1034
1056
  var UpdateProjectInputSchema = CreateProjectInputSchema.partial();
1035
1057
 
1036
1058
  // src/template/template.ts
1037
- import { z as z28 } from "zod";
1059
+ import { z as z29 } from "zod";
1038
1060
  var TemplateSchema = TenantEntitySchema.extend({
1039
1061
  /** URL to download the template from */
1040
- downloadUrl: z28.url()
1062
+ downloadUrl: z29.url()
1041
1063
  });
1042
1064
  var CreateTemplateInputSchema = TemplateSchema.omit({
1043
1065
  id: true,
@@ -1172,9 +1194,11 @@ export {
1172
1194
  CreateAgentInputSchema,
1173
1195
  CreateCustomAssertionInputSchema,
1174
1196
  CreateEvalRunInputSchema,
1197
+ CreateMcpInputSchema,
1175
1198
  CreateProjectInputSchema,
1176
1199
  CreateSkillInputSchema,
1177
1200
  CreateSkillsGroupInputSchema,
1201
+ CreateSubAgentInputSchema,
1178
1202
  CreateTemplateInputSchema,
1179
1203
  CreateTestScenarioInputSchema,
1180
1204
  CreateTestSuiteInputSchema,
@@ -1213,7 +1237,9 @@ export {
1213
1237
  LlmJudgeAssertionSchema,
1214
1238
  LlmJudgeConfigSchema,
1215
1239
  LocalProjectConfigSchema,
1240
+ MCPEntitySchema,
1216
1241
  MCPServerConfigSchema,
1242
+ MCP_SERVERS_JSON_KEY,
1217
1243
  MetaSiteConfigSchema,
1218
1244
  ModelConfigSchema,
1219
1245
  ModelIds,
@@ -1234,6 +1260,7 @@ export {
1234
1260
  SkillWasCalledAssertionSchema,
1235
1261
  SkillWasCalledConfigSchema,
1236
1262
  SkillsGroupSchema,
1263
+ SubAgentSchema,
1237
1264
  TRACE_EVENT_PREFIX,
1238
1265
  TargetSchema,
1239
1266
  TemplateFileSchema,
@@ -1254,9 +1281,11 @@ export {
1254
1281
  TriggerType,
1255
1282
  UpdateAgentInputSchema,
1256
1283
  UpdateCustomAssertionInputSchema,
1284
+ UpdateMcpInputSchema,
1257
1285
  UpdateProjectInputSchema,
1258
1286
  UpdateSkillInputSchema,
1259
1287
  UpdateSkillsGroupInputSchema,
1288
+ UpdateSubAgentInputSchema,
1260
1289
  UpdateTemplateInputSchema,
1261
1290
  UpdateTestScenarioInputSchema,
1262
1291
  UpdateTestSuiteInputSchema,