@wix/evalforge-types 0.20.0 → 0.21.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/build/index.js CHANGED
@@ -42,9 +42,11 @@ __export(index_exports, {
42
42
  CreateAgentInputSchema: () => CreateAgentInputSchema,
43
43
  CreateCustomAssertionInputSchema: () => CreateCustomAssertionInputSchema,
44
44
  CreateEvalRunInputSchema: () => CreateEvalRunInputSchema,
45
+ CreateMcpInputSchema: () => CreateMcpInputSchema,
45
46
  CreateProjectInputSchema: () => CreateProjectInputSchema,
46
47
  CreateSkillInputSchema: () => CreateSkillInputSchema,
47
48
  CreateSkillsGroupInputSchema: () => CreateSkillsGroupInputSchema,
49
+ CreateSubAgentInputSchema: () => CreateSubAgentInputSchema,
48
50
  CreateTemplateInputSchema: () => CreateTemplateInputSchema,
49
51
  CreateTestScenarioInputSchema: () => CreateTestScenarioInputSchema,
50
52
  CreateTestSuiteInputSchema: () => CreateTestSuiteInputSchema,
@@ -83,7 +85,9 @@ __export(index_exports, {
83
85
  LlmJudgeAssertionSchema: () => LlmJudgeAssertionSchema,
84
86
  LlmJudgeConfigSchema: () => LlmJudgeConfigSchema,
85
87
  LocalProjectConfigSchema: () => LocalProjectConfigSchema,
88
+ MCPEntitySchema: () => MCPEntitySchema,
86
89
  MCPServerConfigSchema: () => MCPServerConfigSchema,
90
+ MCP_SERVERS_JSON_KEY: () => MCP_SERVERS_JSON_KEY,
87
91
  MetaSiteConfigSchema: () => MetaSiteConfigSchema,
88
92
  ModelConfigSchema: () => ModelConfigSchema,
89
93
  ModelIds: () => ModelIds,
@@ -104,6 +108,7 @@ __export(index_exports, {
104
108
  SkillWasCalledAssertionSchema: () => SkillWasCalledAssertionSchema,
105
109
  SkillWasCalledConfigSchema: () => SkillWasCalledConfigSchema,
106
110
  SkillsGroupSchema: () => SkillsGroupSchema,
111
+ SubAgentSchema: () => SubAgentSchema,
107
112
  TRACE_EVENT_PREFIX: () => TRACE_EVENT_PREFIX,
108
113
  TargetSchema: () => TargetSchema,
109
114
  TemplateFileSchema: () => TemplateFileSchema,
@@ -124,9 +129,11 @@ __export(index_exports, {
124
129
  TriggerType: () => TriggerType,
125
130
  UpdateAgentInputSchema: () => UpdateAgentInputSchema,
126
131
  UpdateCustomAssertionInputSchema: () => UpdateCustomAssertionInputSchema,
132
+ UpdateMcpInputSchema: () => UpdateMcpInputSchema,
127
133
  UpdateProjectInputSchema: () => UpdateProjectInputSchema,
128
134
  UpdateSkillInputSchema: () => UpdateSkillInputSchema,
129
135
  UpdateSkillsGroupInputSchema: () => UpdateSkillsGroupInputSchema,
136
+ UpdateSubAgentInputSchema: () => UpdateSubAgentInputSchema,
130
137
  UpdateTemplateInputSchema: () => UpdateTemplateInputSchema,
131
138
  UpdateTestScenarioInputSchema: () => UpdateTestScenarioInputSchema,
132
139
  UpdateTestSuiteInputSchema: () => UpdateTestSuiteInputSchema,
@@ -160,18 +167,21 @@ var TenantEntitySchema = BaseEntitySchema.extend({
160
167
 
161
168
  // src/common/mcp.ts
162
169
  var import_zod2 = require("zod");
163
- var MCPServerConfigSchema = import_zod2.z.object({
164
- /** Unique name for this MCP server */
165
- name: import_zod2.z.string(),
166
- /** Command to start the MCP server */
167
- command: import_zod2.z.string(),
168
- /** Command line arguments */
169
- args: import_zod2.z.array(import_zod2.z.string()).optional(),
170
- /** Environment variables for the server process */
171
- envVars: import_zod2.z.record(import_zod2.z.string(), import_zod2.z.string()).optional(),
172
- /** Tools to disable for this MCP server */
173
- disabledTools: import_zod2.z.array(import_zod2.z.string()).optional()
170
+ var MCP_SERVERS_JSON_KEY = "mcpServers";
171
+ var MCPEntitySchema = TenantEntitySchema.extend({
172
+ /** Display name and key in mcp.json mcpServers object */
173
+ name: import_zod2.z.string().min(1),
174
+ /** MCP server config (command/args, url/headers, etc.) - stored as-is for mcp.json */
175
+ config: import_zod2.z.record(import_zod2.z.string(), import_zod2.z.unknown())
176
+ });
177
+ var CreateMcpInputSchema = MCPEntitySchema.omit({
178
+ id: true,
179
+ createdAt: true,
180
+ updatedAt: true,
181
+ deleted: true
174
182
  });
183
+ var UpdateMcpInputSchema = CreateMcpInputSchema.partial();
184
+ var MCPServerConfigSchema = import_zod2.z.record(import_zod2.z.string(), import_zod2.z.unknown());
175
185
 
176
186
  // src/common/models.ts
177
187
  var import_zod3 = require("zod");
@@ -351,11 +361,26 @@ var CreateSkillsGroupInputSchema = SkillsGroupSchema.omit({
351
361
  });
352
362
  var UpdateSkillsGroupInputSchema = CreateSkillsGroupInputSchema.partial();
353
363
 
364
+ // src/target/sub-agent.ts
365
+ var import_zod7 = require("zod");
366
+ var SubAgentSchema = TargetSchema.extend({
367
+ /** The full sub-agent markdown content (YAML frontmatter + body) */
368
+ subAgentMd: import_zod7.z.string()
369
+ });
370
+ var SubAgentInputBaseSchema = SubAgentSchema.omit({
371
+ id: true,
372
+ createdAt: true,
373
+ updatedAt: true,
374
+ deleted: true
375
+ });
376
+ var CreateSubAgentInputSchema = SubAgentInputBaseSchema;
377
+ var UpdateSubAgentInputSchema = SubAgentInputBaseSchema.partial();
378
+
354
379
  // src/test/index.ts
355
- var import_zod17 = require("zod");
380
+ var import_zod18 = require("zod");
356
381
 
357
382
  // src/test/base.ts
358
- var import_zod7 = require("zod");
383
+ var import_zod8 = require("zod");
359
384
  var TestType = /* @__PURE__ */ ((TestType2) => {
360
385
  TestType2["LLM"] = "LLM";
361
386
  TestType2["TOOL"] = "TOOL";
@@ -368,7 +393,7 @@ var TestType = /* @__PURE__ */ ((TestType2) => {
368
393
  TestType2["PLAYWRIGHT_NL"] = "PLAYWRIGHT_NL";
369
394
  return TestType2;
370
395
  })(TestType || {});
371
- var TestTypeSchema = import_zod7.z.enum(TestType);
396
+ var TestTypeSchema = import_zod8.z.enum(TestType);
372
397
  var TestImportance = /* @__PURE__ */ ((TestImportance2) => {
373
398
  TestImportance2["LOW"] = "low";
374
399
  TestImportance2["MEDIUM"] = "medium";
@@ -376,153 +401,153 @@ var TestImportance = /* @__PURE__ */ ((TestImportance2) => {
376
401
  TestImportance2["CRITICAL"] = "critical";
377
402
  return TestImportance2;
378
403
  })(TestImportance || {});
379
- var TestImportanceSchema = import_zod7.z.enum(TestImportance);
380
- var BaseTestSchema = import_zod7.z.object({
381
- id: import_zod7.z.string(),
404
+ var TestImportanceSchema = import_zod8.z.enum(TestImportance);
405
+ var BaseTestSchema = import_zod8.z.object({
406
+ id: import_zod8.z.string(),
382
407
  type: TestTypeSchema,
383
- name: import_zod7.z.string().min(3),
384
- description: import_zod7.z.string().optional(),
408
+ name: import_zod8.z.string().min(3),
409
+ description: import_zod8.z.string().optional(),
385
410
  importance: TestImportanceSchema.optional()
386
411
  });
387
412
 
388
413
  // src/test/llm.ts
389
- var import_zod8 = require("zod");
414
+ var import_zod9 = require("zod");
390
415
  var LLMTestSchema = BaseTestSchema.extend({
391
- type: import_zod8.z.literal("LLM" /* LLM */),
416
+ type: import_zod9.z.literal("LLM" /* LLM */),
392
417
  /** Maximum steps for the LLM to take */
393
- maxSteps: import_zod8.z.number().min(1).max(100),
418
+ maxSteps: import_zod9.z.number().min(1).max(100),
394
419
  /** Prompt to send to the evaluator */
395
- prompt: import_zod8.z.string().min(1),
420
+ prompt: import_zod9.z.string().min(1),
396
421
  /** ID of the evaluator agent to use */
397
- evaluatorId: import_zod8.z.string()
422
+ evaluatorId: import_zod9.z.string()
398
423
  });
399
424
 
400
425
  // src/test/tool.ts
401
- var import_zod9 = require("zod");
426
+ var import_zod10 = require("zod");
402
427
  var ToolTestSchema = BaseTestSchema.extend({
403
- type: import_zod9.z.literal("TOOL" /* TOOL */),
428
+ type: import_zod10.z.literal("TOOL" /* TOOL */),
404
429
  /** Name of the tool that should be called */
405
- toolName: import_zod9.z.string().min(3),
430
+ toolName: import_zod10.z.string().min(3),
406
431
  /** Expected arguments for the tool call */
407
- args: import_zod9.z.record(import_zod9.z.string(), import_zod9.z.any()),
432
+ args: import_zod10.z.record(import_zod10.z.string(), import_zod10.z.any()),
408
433
  /** Expected content in the tool results */
409
- resultsContent: import_zod9.z.string()
434
+ resultsContent: import_zod10.z.string()
410
435
  });
411
436
 
412
437
  // src/test/site-config.ts
413
- var import_zod10 = require("zod");
438
+ var import_zod11 = require("zod");
414
439
  var SiteConfigTestSchema = BaseTestSchema.extend({
415
- type: import_zod10.z.literal("SITE_CONFIG" /* SITE_CONFIG */),
440
+ type: import_zod11.z.literal("SITE_CONFIG" /* SITE_CONFIG */),
416
441
  /** URL to call */
417
- url: import_zod10.z.string().url(),
442
+ url: import_zod11.z.string().url(),
418
443
  /** HTTP method */
419
- method: import_zod10.z.enum(["GET", "POST"]),
444
+ method: import_zod11.z.enum(["GET", "POST"]),
420
445
  /** Request body (for POST) */
421
- body: import_zod10.z.string().optional(),
446
+ body: import_zod11.z.string().optional(),
422
447
  /** Expected HTTP status code */
423
- expectedStatusCode: import_zod10.z.number().int().min(100).max(599),
448
+ expectedStatusCode: import_zod11.z.number().int().min(100).max(599),
424
449
  /** Expected response content */
425
- expectedResponse: import_zod10.z.string().optional(),
450
+ expectedResponse: import_zod11.z.string().optional(),
426
451
  /** JMESPath expression to extract from response */
427
- expectedResponseJMESPath: import_zod10.z.string().optional()
452
+ expectedResponseJMESPath: import_zod11.z.string().optional()
428
453
  });
429
454
 
430
455
  // src/test/command-execution.ts
431
- var import_zod11 = require("zod");
456
+ var import_zod12 = require("zod");
432
457
  var AllowedCommands = [
433
458
  "yarn install --no-immutable && yarn build",
434
459
  "npm run build",
435
460
  "yarn typecheck"
436
461
  ];
437
462
  var CommandExecutionTestSchema = BaseTestSchema.extend({
438
- type: import_zod11.z.literal("COMMAND_EXECUTION" /* COMMAND_EXECUTION */),
463
+ type: import_zod12.z.literal("COMMAND_EXECUTION" /* COMMAND_EXECUTION */),
439
464
  /** Command to execute (must be in AllowedCommands) */
440
- command: import_zod11.z.string().refine((value) => AllowedCommands.includes(value), {
465
+ command: import_zod12.z.string().refine((value) => AllowedCommands.includes(value), {
441
466
  message: `Command must be one of: ${AllowedCommands.join(", ")}`
442
467
  }),
443
468
  /** Expected exit code (default: 0) */
444
- expectedExitCode: import_zod11.z.number().default(0).optional()
469
+ expectedExitCode: import_zod12.z.number().default(0).optional()
445
470
  });
446
471
 
447
472
  // src/test/file-presence.ts
448
- var import_zod12 = require("zod");
473
+ var import_zod13 = require("zod");
449
474
  var FilePresenceTestSchema = BaseTestSchema.extend({
450
- type: import_zod12.z.literal("FILE_PRESENCE" /* FILE_PRESENCE */),
475
+ type: import_zod13.z.literal("FILE_PRESENCE" /* FILE_PRESENCE */),
451
476
  /** Paths to check */
452
- paths: import_zod12.z.array(import_zod12.z.string()),
477
+ paths: import_zod13.z.array(import_zod13.z.string()),
453
478
  /** Whether files should exist (true) or not exist (false) */
454
- shouldExist: import_zod12.z.boolean()
479
+ shouldExist: import_zod13.z.boolean()
455
480
  });
456
481
 
457
482
  // src/test/file-content.ts
458
- var import_zod13 = require("zod");
459
- var FileContentCheckSchema = import_zod13.z.object({
483
+ var import_zod14 = require("zod");
484
+ var FileContentCheckSchema = import_zod14.z.object({
460
485
  /** Strings that must be present in the file */
461
- contains: import_zod13.z.array(import_zod13.z.string()).optional(),
486
+ contains: import_zod14.z.array(import_zod14.z.string()).optional(),
462
487
  /** Strings that must NOT be present in the file */
463
- notContains: import_zod13.z.array(import_zod13.z.string()).optional(),
488
+ notContains: import_zod14.z.array(import_zod14.z.string()).optional(),
464
489
  /** Regex pattern the content must match */
465
- matches: import_zod13.z.string().optional(),
490
+ matches: import_zod14.z.string().optional(),
466
491
  /** JSON path checks for structured content */
467
- jsonPath: import_zod13.z.array(
468
- import_zod13.z.object({
469
- path: import_zod13.z.string(),
470
- value: import_zod13.z.unknown()
492
+ jsonPath: import_zod14.z.array(
493
+ import_zod14.z.object({
494
+ path: import_zod14.z.string(),
495
+ value: import_zod14.z.unknown()
471
496
  })
472
497
  ).optional(),
473
498
  /** Lines that should be added (for diff checking) */
474
- added: import_zod13.z.array(import_zod13.z.string()).optional(),
499
+ added: import_zod14.z.array(import_zod14.z.string()).optional(),
475
500
  /** Lines that should be removed (for diff checking) */
476
- removed: import_zod13.z.array(import_zod13.z.string()).optional()
501
+ removed: import_zod14.z.array(import_zod14.z.string()).optional()
477
502
  });
478
503
  var FileContentTestSchema = BaseTestSchema.extend({
479
- type: import_zod13.z.literal("FILE_CONTENT" /* FILE_CONTENT */),
504
+ type: import_zod14.z.literal("FILE_CONTENT" /* FILE_CONTENT */),
480
505
  /** Path to the file to check */
481
- path: import_zod13.z.string(),
506
+ path: import_zod14.z.string(),
482
507
  /** Content checks to perform */
483
508
  checks: FileContentCheckSchema
484
509
  });
485
510
 
486
511
  // src/test/build-check.ts
487
- var import_zod14 = require("zod");
512
+ var import_zod15 = require("zod");
488
513
  var BuildCheckTestSchema = BaseTestSchema.extend({
489
- type: import_zod14.z.literal("BUILD_CHECK" /* BUILD_CHECK */),
514
+ type: import_zod15.z.literal("BUILD_CHECK" /* BUILD_CHECK */),
490
515
  /** Build command to execute */
491
- command: import_zod14.z.string(),
516
+ command: import_zod15.z.string(),
492
517
  /** Whether the build should succeed */
493
- expectSuccess: import_zod14.z.boolean(),
518
+ expectSuccess: import_zod15.z.boolean(),
494
519
  /** Maximum allowed warnings (optional) */
495
- allowedWarnings: import_zod14.z.number().optional(),
520
+ allowedWarnings: import_zod15.z.number().optional(),
496
521
  /** Timeout in milliseconds */
497
- timeout: import_zod14.z.number().optional()
522
+ timeout: import_zod15.z.number().optional()
498
523
  });
499
524
 
500
525
  // src/test/vitest.ts
501
- var import_zod15 = require("zod");
526
+ var import_zod16 = require("zod");
502
527
  var VitestTestSchema = BaseTestSchema.extend({
503
- type: import_zod15.z.literal("VITEST" /* VITEST */),
528
+ type: import_zod16.z.literal("VITEST" /* VITEST */),
504
529
  /** Test file content */
505
- testFile: import_zod15.z.string(),
530
+ testFile: import_zod16.z.string(),
506
531
  /** Name of the test file */
507
- testFileName: import_zod15.z.string(),
532
+ testFileName: import_zod16.z.string(),
508
533
  /** Minimum pass rate required (0-100) */
509
- minPassRate: import_zod15.z.number().min(0).max(100)
534
+ minPassRate: import_zod16.z.number().min(0).max(100)
510
535
  });
511
536
 
512
537
  // src/test/playwright-nl.ts
513
- var import_zod16 = require("zod");
538
+ var import_zod17 = require("zod");
514
539
  var PlaywrightNLTestSchema = BaseTestSchema.extend({
515
- type: import_zod16.z.literal("PLAYWRIGHT_NL" /* PLAYWRIGHT_NL */),
540
+ type: import_zod17.z.literal("PLAYWRIGHT_NL" /* PLAYWRIGHT_NL */),
516
541
  /** Natural language steps to execute */
517
- steps: import_zod16.z.array(import_zod16.z.string()),
542
+ steps: import_zod17.z.array(import_zod17.z.string()),
518
543
  /** Expected outcome description */
519
- expectedOutcome: import_zod16.z.string(),
544
+ expectedOutcome: import_zod17.z.string(),
520
545
  /** Timeout in milliseconds */
521
- timeout: import_zod16.z.number().optional()
546
+ timeout: import_zod17.z.number().optional()
522
547
  });
523
548
 
524
549
  // src/test/index.ts
525
- var TestSchema = import_zod17.z.discriminatedUnion("type", [
550
+ var TestSchema = import_zod18.z.discriminatedUnion("type", [
526
551
  LLMTestSchema,
527
552
  ToolTestSchema,
528
553
  SiteConfigTestSchema,
@@ -535,66 +560,66 @@ var TestSchema = import_zod17.z.discriminatedUnion("type", [
535
560
  ]);
536
561
 
537
562
  // src/scenario/assertions.ts
538
- var import_zod18 = require("zod");
539
- var SkillWasCalledAssertionSchema = import_zod18.z.object({
540
- type: import_zod18.z.literal("skill_was_called"),
563
+ var import_zod19 = require("zod");
564
+ var SkillWasCalledAssertionSchema = import_zod19.z.object({
565
+ type: import_zod19.z.literal("skill_was_called"),
541
566
  /** Name of the skill that must have been called (matched against trace Skill tool args) */
542
- skillName: import_zod18.z.string()
567
+ skillName: import_zod19.z.string()
543
568
  });
544
- var BuildPassedAssertionSchema = import_zod18.z.object({
545
- type: import_zod18.z.literal("build_passed"),
569
+ var BuildPassedAssertionSchema = import_zod19.z.object({
570
+ type: import_zod19.z.literal("build_passed"),
546
571
  /** Command to run (default: "yarn build") */
547
- command: import_zod18.z.string().optional(),
572
+ command: import_zod19.z.string().optional(),
548
573
  /** Expected exit code (default: 0) */
549
- expectedExitCode: import_zod18.z.number().int().optional()
574
+ expectedExitCode: import_zod19.z.number().int().optional()
550
575
  });
551
- var LlmJudgeAssertionSchema = import_zod18.z.object({
552
- type: import_zod18.z.literal("llm_judge"),
576
+ var LlmJudgeAssertionSchema = import_zod19.z.object({
577
+ type: import_zod19.z.literal("llm_judge"),
553
578
  /** Prompt template; placeholders: {{output}}, {{cwd}}, {{changedFiles}}, {{trace}} */
554
- prompt: import_zod18.z.string(),
579
+ prompt: import_zod19.z.string(),
555
580
  /** Optional system prompt for the judge (default asks for JSON with score) */
556
- systemPrompt: import_zod18.z.string().optional(),
581
+ systemPrompt: import_zod19.z.string().optional(),
557
582
  /** Minimum score to pass (0-100, default 70) */
558
- minScore: import_zod18.z.number().int().min(0).max(100).optional(),
583
+ minScore: import_zod19.z.number().int().min(0).max(100).optional(),
559
584
  /** Model for the judge (e.g. claude-3-5-haiku) */
560
- model: import_zod18.z.string().optional(),
561
- maxTokens: import_zod18.z.number().int().optional(),
562
- temperature: import_zod18.z.number().min(0).max(1).optional()
585
+ model: import_zod19.z.string().optional(),
586
+ maxTokens: import_zod19.z.number().int().optional(),
587
+ temperature: import_zod19.z.number().min(0).max(1).optional()
563
588
  });
564
- var AssertionSchema = import_zod18.z.union([
589
+ var AssertionSchema = import_zod19.z.union([
565
590
  SkillWasCalledAssertionSchema,
566
591
  BuildPassedAssertionSchema,
567
592
  LlmJudgeAssertionSchema
568
593
  ]);
569
594
 
570
595
  // src/scenario/environment.ts
571
- var import_zod19 = require("zod");
572
- var LocalProjectConfigSchema = import_zod19.z.object({
596
+ var import_zod20 = require("zod");
597
+ var LocalProjectConfigSchema = import_zod20.z.object({
573
598
  /** Template ID to use for the local project */
574
- templateId: import_zod19.z.string().optional(),
599
+ templateId: import_zod20.z.string().optional(),
575
600
  /** Files to create in the project */
576
- files: import_zod19.z.array(
577
- import_zod19.z.object({
578
- path: import_zod19.z.string().min(1),
579
- content: import_zod19.z.string().min(1)
601
+ files: import_zod20.z.array(
602
+ import_zod20.z.object({
603
+ path: import_zod20.z.string().min(1),
604
+ content: import_zod20.z.string().min(1)
580
605
  })
581
606
  ).optional()
582
607
  });
583
- var MetaSiteConfigSchema = import_zod19.z.object({
584
- configurations: import_zod19.z.array(
585
- import_zod19.z.object({
586
- name: import_zod19.z.string().min(1),
587
- apiCalls: import_zod19.z.array(
588
- import_zod19.z.object({
589
- url: import_zod19.z.string().url(),
590
- method: import_zod19.z.enum(["POST", "PUT"]),
591
- body: import_zod19.z.string()
608
+ var MetaSiteConfigSchema = import_zod20.z.object({
609
+ configurations: import_zod20.z.array(
610
+ import_zod20.z.object({
611
+ name: import_zod20.z.string().min(1),
612
+ apiCalls: import_zod20.z.array(
613
+ import_zod20.z.object({
614
+ url: import_zod20.z.string().url(),
615
+ method: import_zod20.z.enum(["POST", "PUT"]),
616
+ body: import_zod20.z.string()
592
617
  })
593
618
  )
594
619
  })
595
620
  ).optional()
596
621
  });
597
- var EnvironmentSchema = import_zod19.z.object({
622
+ var EnvironmentSchema = import_zod20.z.object({
598
623
  /** Local project configuration */
599
624
  localProject: LocalProjectConfigSchema.optional(),
600
625
  /** Meta site configuration */
@@ -602,54 +627,54 @@ var EnvironmentSchema = import_zod19.z.object({
602
627
  });
603
628
 
604
629
  // src/scenario/test-scenario.ts
605
- var import_zod21 = require("zod");
630
+ var import_zod22 = require("zod");
606
631
 
607
632
  // src/assertion/assertion.ts
608
- var import_zod20 = require("zod");
609
- var AssertionTypeSchema = import_zod20.z.enum([
633
+ var import_zod21 = require("zod");
634
+ var AssertionTypeSchema = import_zod21.z.enum([
610
635
  "skill_was_called",
611
636
  "build_passed",
612
637
  "llm_judge"
613
638
  ]);
614
- var AssertionParameterTypeSchema = import_zod20.z.enum([
639
+ var AssertionParameterTypeSchema = import_zod21.z.enum([
615
640
  "string",
616
641
  "number",
617
642
  "boolean"
618
643
  ]);
619
- var AssertionParameterSchema = import_zod20.z.object({
644
+ var AssertionParameterSchema = import_zod21.z.object({
620
645
  /** Parameter name (used as key in params object) */
621
- name: import_zod20.z.string().min(1),
646
+ name: import_zod21.z.string().min(1),
622
647
  /** Display label for the parameter */
623
- label: import_zod20.z.string().min(1),
648
+ label: import_zod21.z.string().min(1),
624
649
  /** Parameter type */
625
650
  type: AssertionParameterTypeSchema,
626
651
  /** Whether this parameter is required */
627
- required: import_zod20.z.boolean(),
652
+ required: import_zod21.z.boolean(),
628
653
  /** Default value (optional, used when not provided) */
629
- defaultValue: import_zod20.z.union([import_zod20.z.string(), import_zod20.z.number(), import_zod20.z.boolean()]).optional(),
654
+ defaultValue: import_zod21.z.union([import_zod21.z.string(), import_zod21.z.number(), import_zod21.z.boolean()]).optional(),
630
655
  /** If true, parameter is hidden by default behind "Show advanced options" */
631
- advanced: import_zod20.z.boolean().optional()
656
+ advanced: import_zod21.z.boolean().optional()
632
657
  });
633
- var ScenarioAssertionLinkSchema = import_zod20.z.object({
658
+ var ScenarioAssertionLinkSchema = import_zod21.z.object({
634
659
  /** ID of the assertion (can be system assertion like 'system:skill_was_called' or custom assertion UUID) */
635
- assertionId: import_zod20.z.string(),
660
+ assertionId: import_zod21.z.string(),
636
661
  /** Parameter values for this assertion in this scenario */
637
- params: import_zod20.z.record(
638
- import_zod20.z.string(),
639
- import_zod20.z.union([import_zod20.z.string(), import_zod20.z.number(), import_zod20.z.boolean(), import_zod20.z.null()])
662
+ params: import_zod21.z.record(
663
+ import_zod21.z.string(),
664
+ import_zod21.z.union([import_zod21.z.string(), import_zod21.z.number(), import_zod21.z.boolean(), import_zod21.z.null()])
640
665
  ).optional()
641
666
  });
642
- var SkillWasCalledConfigSchema = import_zod20.z.object({
667
+ var SkillWasCalledConfigSchema = import_zod21.z.object({
643
668
  /** Name of the skill that must have been called */
644
- skillName: import_zod20.z.string().min(1)
669
+ skillName: import_zod21.z.string().min(1)
645
670
  });
646
- var BuildPassedConfigSchema = import_zod20.z.strictObject({
671
+ var BuildPassedConfigSchema = import_zod21.z.strictObject({
647
672
  /** Command to run (default: "yarn build") */
648
- command: import_zod20.z.string().optional(),
673
+ command: import_zod21.z.string().optional(),
649
674
  /** Expected exit code (default: 0) */
650
- expectedExitCode: import_zod20.z.number().int().optional()
675
+ expectedExitCode: import_zod21.z.number().int().optional()
651
676
  });
652
- var LlmJudgeConfigSchema = import_zod20.z.object({
677
+ var LlmJudgeConfigSchema = import_zod21.z.object({
653
678
  /**
654
679
  * Prompt template with placeholders:
655
680
  * - {{output}}: agent's final output
@@ -660,28 +685,28 @@ var LlmJudgeConfigSchema = import_zod20.z.object({
660
685
  * - {{trace}}: step-by-step trace of tool calls
661
686
  * - Custom parameters defined in the parameters array
662
687
  */
663
- prompt: import_zod20.z.string().min(1),
688
+ prompt: import_zod21.z.string().min(1),
664
689
  /** Optional system prompt for the judge */
665
- systemPrompt: import_zod20.z.string().optional(),
690
+ systemPrompt: import_zod21.z.string().optional(),
666
691
  /** Minimum score to pass (0-100, default 70) */
667
- minScore: import_zod20.z.number().int().min(0).max(100).optional(),
692
+ minScore: import_zod21.z.number().int().min(0).max(100).optional(),
668
693
  /** Model for the judge (e.g. claude-3-5-haiku-20241022) */
669
- model: import_zod20.z.string().optional(),
694
+ model: import_zod21.z.string().optional(),
670
695
  /** Max output tokens */
671
- maxTokens: import_zod20.z.number().int().optional(),
696
+ maxTokens: import_zod21.z.number().int().optional(),
672
697
  /** Temperature (0-1) */
673
- temperature: import_zod20.z.number().min(0).max(1).optional(),
698
+ temperature: import_zod21.z.number().min(0).max(1).optional(),
674
699
  /** User-defined parameters for this assertion */
675
- parameters: import_zod20.z.array(AssertionParameterSchema).optional()
700
+ parameters: import_zod21.z.array(AssertionParameterSchema).optional()
676
701
  });
677
- var AssertionConfigSchema = import_zod20.z.union([
702
+ var AssertionConfigSchema = import_zod21.z.union([
678
703
  LlmJudgeConfigSchema,
679
704
  // requires prompt - check first
680
705
  SkillWasCalledConfigSchema,
681
706
  // requires skillName
682
707
  BuildPassedConfigSchema,
683
708
  // all optional, uses strictObject to reject unknown keys
684
- import_zod20.z.object({})
709
+ import_zod21.z.object({})
685
710
  // fallback empty config
686
711
  ]);
687
712
  var CustomAssertionSchema = TenantEntitySchema.extend({
@@ -726,23 +751,23 @@ function getLlmJudgeConfig(assertion) {
726
751
  }
727
752
 
728
753
  // src/scenario/test-scenario.ts
729
- var ExpectedFileSchema = import_zod21.z.object({
754
+ var ExpectedFileSchema = import_zod22.z.object({
730
755
  /** Relative path where the file should be created */
731
- path: import_zod21.z.string(),
756
+ path: import_zod22.z.string(),
732
757
  /** Optional expected content */
733
- content: import_zod21.z.string().optional()
758
+ content: import_zod22.z.string().optional()
734
759
  });
735
760
  var TestScenarioSchema = TenantEntitySchema.extend({
736
761
  /** The prompt sent to the agent to trigger the task */
737
- triggerPrompt: import_zod21.z.string().min(10),
762
+ triggerPrompt: import_zod22.z.string().min(10),
738
763
  /** ID of the template to use for this scenario (null = no template) */
739
- templateId: import_zod21.z.string().nullish(),
764
+ templateId: import_zod22.z.string().nullish(),
740
765
  /** Inline assertions to evaluate for this scenario (legacy) */
741
- assertions: import_zod21.z.array(AssertionSchema).optional(),
766
+ assertions: import_zod22.z.array(AssertionSchema).optional(),
742
767
  /** IDs of saved assertions to evaluate (from assertions table) - legacy, use assertionLinks */
743
- assertionIds: import_zod21.z.array(import_zod21.z.string()).optional(),
768
+ assertionIds: import_zod22.z.array(import_zod22.z.string()).optional(),
744
769
  /** Linked assertions with per-scenario parameter values */
745
- assertionLinks: import_zod21.z.array(ScenarioAssertionLinkSchema).optional()
770
+ assertionLinks: import_zod22.z.array(ScenarioAssertionLinkSchema).optional()
746
771
  });
747
772
  var CreateTestScenarioInputSchema = TestScenarioSchema.omit({
748
773
  id: true,
@@ -753,10 +778,10 @@ var CreateTestScenarioInputSchema = TestScenarioSchema.omit({
753
778
  var UpdateTestScenarioInputSchema = CreateTestScenarioInputSchema.partial();
754
779
 
755
780
  // src/suite/test-suite.ts
756
- var import_zod22 = require("zod");
781
+ var import_zod23 = require("zod");
757
782
  var TestSuiteSchema = TenantEntitySchema.extend({
758
783
  /** IDs of test scenarios in this suite */
759
- scenarioIds: import_zod22.z.array(import_zod22.z.string())
784
+ scenarioIds: import_zod23.z.array(import_zod23.z.string())
760
785
  });
761
786
  var CreateTestSuiteInputSchema = TestSuiteSchema.omit({
762
787
  id: true,
@@ -767,21 +792,21 @@ var CreateTestSuiteInputSchema = TestSuiteSchema.omit({
767
792
  var UpdateTestSuiteInputSchema = CreateTestSuiteInputSchema.partial();
768
793
 
769
794
  // src/evaluation/metrics.ts
770
- var import_zod23 = require("zod");
771
- var TokenUsageSchema = import_zod23.z.object({
772
- prompt: import_zod23.z.number(),
773
- completion: import_zod23.z.number(),
774
- total: import_zod23.z.number()
775
- });
776
- var EvalMetricsSchema = import_zod23.z.object({
777
- totalAssertions: import_zod23.z.number(),
778
- passed: import_zod23.z.number(),
779
- failed: import_zod23.z.number(),
780
- skipped: import_zod23.z.number(),
781
- errors: import_zod23.z.number(),
782
- passRate: import_zod23.z.number(),
783
- avgDuration: import_zod23.z.number(),
784
- totalDuration: import_zod23.z.number()
795
+ var import_zod24 = require("zod");
796
+ var TokenUsageSchema = import_zod24.z.object({
797
+ prompt: import_zod24.z.number(),
798
+ completion: import_zod24.z.number(),
799
+ total: import_zod24.z.number()
800
+ });
801
+ var EvalMetricsSchema = import_zod24.z.object({
802
+ totalAssertions: import_zod24.z.number(),
803
+ passed: import_zod24.z.number(),
804
+ failed: import_zod24.z.number(),
805
+ skipped: import_zod24.z.number(),
806
+ errors: import_zod24.z.number(),
807
+ passRate: import_zod24.z.number(),
808
+ avgDuration: import_zod24.z.number(),
809
+ totalDuration: import_zod24.z.number()
785
810
  });
786
811
  var EvalStatus = /* @__PURE__ */ ((EvalStatus2) => {
787
812
  EvalStatus2["PENDING"] = "pending";
@@ -791,7 +816,7 @@ var EvalStatus = /* @__PURE__ */ ((EvalStatus2) => {
791
816
  EvalStatus2["CANCELLED"] = "cancelled";
792
817
  return EvalStatus2;
793
818
  })(EvalStatus || {});
794
- var EvalStatusSchema = import_zod23.z.enum(EvalStatus);
819
+ var EvalStatusSchema = import_zod24.z.enum(EvalStatus);
795
820
  var LLMStepType = /* @__PURE__ */ ((LLMStepType2) => {
796
821
  LLMStepType2["COMPLETION"] = "completion";
797
822
  LLMStepType2["TOOL_USE"] = "tool_use";
@@ -799,52 +824,52 @@ var LLMStepType = /* @__PURE__ */ ((LLMStepType2) => {
799
824
  LLMStepType2["THINKING"] = "thinking";
800
825
  return LLMStepType2;
801
826
  })(LLMStepType || {});
802
- var LLMTraceStepSchema = import_zod23.z.object({
803
- id: import_zod23.z.string(),
804
- stepNumber: import_zod23.z.number(),
805
- type: import_zod23.z.enum(LLMStepType),
806
- model: import_zod23.z.string(),
807
- provider: import_zod23.z.string(),
808
- startedAt: import_zod23.z.string(),
809
- durationMs: import_zod23.z.number(),
827
+ var LLMTraceStepSchema = import_zod24.z.object({
828
+ id: import_zod24.z.string(),
829
+ stepNumber: import_zod24.z.number(),
830
+ type: import_zod24.z.enum(LLMStepType),
831
+ model: import_zod24.z.string(),
832
+ provider: import_zod24.z.string(),
833
+ startedAt: import_zod24.z.string(),
834
+ durationMs: import_zod24.z.number(),
810
835
  tokenUsage: TokenUsageSchema,
811
- costUsd: import_zod23.z.number(),
812
- toolName: import_zod23.z.string().optional(),
813
- toolArguments: import_zod23.z.string().optional(),
814
- inputPreview: import_zod23.z.string().optional(),
815
- outputPreview: import_zod23.z.string().optional(),
816
- success: import_zod23.z.boolean(),
817
- error: import_zod23.z.string().optional()
818
- });
819
- var LLMBreakdownStatsSchema = import_zod23.z.object({
820
- count: import_zod23.z.number(),
821
- durationMs: import_zod23.z.number(),
822
- tokens: import_zod23.z.number(),
823
- costUsd: import_zod23.z.number()
824
- });
825
- var LLMTraceSummarySchema = import_zod23.z.object({
826
- totalSteps: import_zod23.z.number(),
827
- totalDurationMs: import_zod23.z.number(),
836
+ costUsd: import_zod24.z.number(),
837
+ toolName: import_zod24.z.string().optional(),
838
+ toolArguments: import_zod24.z.string().optional(),
839
+ inputPreview: import_zod24.z.string().optional(),
840
+ outputPreview: import_zod24.z.string().optional(),
841
+ success: import_zod24.z.boolean(),
842
+ error: import_zod24.z.string().optional()
843
+ });
844
+ var LLMBreakdownStatsSchema = import_zod24.z.object({
845
+ count: import_zod24.z.number(),
846
+ durationMs: import_zod24.z.number(),
847
+ tokens: import_zod24.z.number(),
848
+ costUsd: import_zod24.z.number()
849
+ });
850
+ var LLMTraceSummarySchema = import_zod24.z.object({
851
+ totalSteps: import_zod24.z.number(),
852
+ totalDurationMs: import_zod24.z.number(),
828
853
  totalTokens: TokenUsageSchema,
829
- totalCostUsd: import_zod23.z.number(),
830
- stepTypeBreakdown: import_zod23.z.record(import_zod23.z.string(), LLMBreakdownStatsSchema).optional(),
831
- modelBreakdown: import_zod23.z.record(import_zod23.z.string(), LLMBreakdownStatsSchema),
832
- modelsUsed: import_zod23.z.array(import_zod23.z.string())
833
- });
834
- var LLMTraceSchema = import_zod23.z.object({
835
- id: import_zod23.z.string(),
836
- steps: import_zod23.z.array(LLMTraceStepSchema),
854
+ totalCostUsd: import_zod24.z.number(),
855
+ stepTypeBreakdown: import_zod24.z.record(import_zod24.z.string(), LLMBreakdownStatsSchema).optional(),
856
+ modelBreakdown: import_zod24.z.record(import_zod24.z.string(), LLMBreakdownStatsSchema),
857
+ modelsUsed: import_zod24.z.array(import_zod24.z.string())
858
+ });
859
+ var LLMTraceSchema = import_zod24.z.object({
860
+ id: import_zod24.z.string(),
861
+ steps: import_zod24.z.array(LLMTraceStepSchema),
837
862
  summary: LLMTraceSummarySchema
838
863
  });
839
864
 
840
865
  // src/evaluation/eval-result.ts
841
- var import_zod26 = require("zod");
866
+ var import_zod27 = require("zod");
842
867
 
843
868
  // src/evaluation/eval-run.ts
844
- var import_zod25 = require("zod");
869
+ var import_zod26 = require("zod");
845
870
 
846
871
  // src/evaluation/live-trace.ts
847
- var import_zod24 = require("zod");
872
+ var import_zod25 = require("zod");
848
873
  var LiveTraceEventType = /* @__PURE__ */ ((LiveTraceEventType2) => {
849
874
  LiveTraceEventType2["THINKING"] = "thinking";
850
875
  LiveTraceEventType2["TOOL_USE"] = "tool_use";
@@ -858,37 +883,37 @@ var LiveTraceEventType = /* @__PURE__ */ ((LiveTraceEventType2) => {
858
883
  LiveTraceEventType2["USER"] = "user";
859
884
  return LiveTraceEventType2;
860
885
  })(LiveTraceEventType || {});
861
- var LiveTraceEventSchema = import_zod24.z.object({
886
+ var LiveTraceEventSchema = import_zod25.z.object({
862
887
  /** The evaluation run ID */
863
- evalRunId: import_zod24.z.string(),
888
+ evalRunId: import_zod25.z.string(),
864
889
  /** The scenario ID being executed */
865
- scenarioId: import_zod24.z.string(),
890
+ scenarioId: import_zod25.z.string(),
866
891
  /** The scenario name for display */
867
- scenarioName: import_zod24.z.string(),
892
+ scenarioName: import_zod25.z.string(),
868
893
  /** The target ID (skill, agent, etc.) */
869
- targetId: import_zod24.z.string(),
894
+ targetId: import_zod25.z.string(),
870
895
  /** The target name for display */
871
- targetName: import_zod24.z.string(),
896
+ targetName: import_zod25.z.string(),
872
897
  /** Step number in the current scenario execution */
873
- stepNumber: import_zod24.z.number(),
898
+ stepNumber: import_zod25.z.number(),
874
899
  /** Type of trace event */
875
- type: import_zod24.z.enum(LiveTraceEventType),
900
+ type: import_zod25.z.enum(LiveTraceEventType),
876
901
  /** Tool name if this is a tool_use event */
877
- toolName: import_zod24.z.string().optional(),
902
+ toolName: import_zod25.z.string().optional(),
878
903
  /** Tool arguments preview (truncated JSON) */
879
- toolArgs: import_zod24.z.string().optional(),
904
+ toolArgs: import_zod25.z.string().optional(),
880
905
  /** Output preview (truncated text) */
881
- outputPreview: import_zod24.z.string().optional(),
906
+ outputPreview: import_zod25.z.string().optional(),
882
907
  /** File path for file operations */
883
- filePath: import_zod24.z.string().optional(),
908
+ filePath: import_zod25.z.string().optional(),
884
909
  /** Elapsed time in milliseconds for progress events */
885
- elapsedMs: import_zod24.z.number().optional(),
910
+ elapsedMs: import_zod25.z.number().optional(),
886
911
  /** Thinking/reasoning text from Claude */
887
- thinking: import_zod24.z.string().optional(),
912
+ thinking: import_zod25.z.string().optional(),
888
913
  /** Timestamp when this event occurred */
889
- timestamp: import_zod24.z.string(),
914
+ timestamp: import_zod25.z.string(),
890
915
  /** Whether this is the final event for this scenario */
891
- isComplete: import_zod24.z.boolean()
916
+ isComplete: import_zod25.z.boolean()
892
917
  });
893
918
  var TRACE_EVENT_PREFIX = "TRACE_EVENT:";
894
919
  function parseTraceEventLine(line) {
@@ -916,14 +941,14 @@ var TriggerType = /* @__PURE__ */ ((TriggerType2) => {
916
941
  TriggerType2["MANUAL"] = "MANUAL";
917
942
  return TriggerType2;
918
943
  })(TriggerType || {});
919
- var TriggerMetadataSchema = import_zod25.z.object({
920
- version: import_zod25.z.string().optional(),
921
- resourceUpdated: import_zod25.z.array(import_zod25.z.string()).optional()
944
+ var TriggerMetadataSchema = import_zod26.z.object({
945
+ version: import_zod26.z.string().optional(),
946
+ resourceUpdated: import_zod26.z.array(import_zod26.z.string()).optional()
922
947
  });
923
- var TriggerSchema = import_zod25.z.object({
924
- id: import_zod25.z.string(),
948
+ var TriggerSchema = import_zod26.z.object({
949
+ id: import_zod26.z.string(),
925
950
  metadata: TriggerMetadataSchema.optional(),
926
- type: import_zod25.z.enum(TriggerType)
951
+ type: import_zod26.z.enum(TriggerType)
927
952
  });
928
953
  var FailureCategory = /* @__PURE__ */ ((FailureCategory2) => {
929
954
  FailureCategory2["MISSING_FILE"] = "missing_file";
@@ -941,28 +966,28 @@ var FailureSeverity = /* @__PURE__ */ ((FailureSeverity2) => {
941
966
  FailureSeverity2["LOW"] = "low";
942
967
  return FailureSeverity2;
943
968
  })(FailureSeverity || {});
944
- var DiffLineTypeSchema = import_zod25.z.enum(["added", "removed", "unchanged"]);
945
- var DiffLineSchema = import_zod25.z.object({
969
+ var DiffLineTypeSchema = import_zod26.z.enum(["added", "removed", "unchanged"]);
970
+ var DiffLineSchema = import_zod26.z.object({
946
971
  type: DiffLineTypeSchema,
947
- content: import_zod25.z.string(),
948
- lineNumber: import_zod25.z.number()
949
- });
950
- var DiffContentSchema = import_zod25.z.object({
951
- path: import_zod25.z.string(),
952
- expected: import_zod25.z.string(),
953
- actual: import_zod25.z.string(),
954
- diffLines: import_zod25.z.array(DiffLineSchema),
955
- renamedFrom: import_zod25.z.string().optional()
956
- });
957
- var CommandExecutionSchema = import_zod25.z.object({
958
- command: import_zod25.z.string(),
959
- exitCode: import_zod25.z.number(),
960
- output: import_zod25.z.string().optional(),
961
- duration: import_zod25.z.number()
962
- });
963
- var FileModificationSchema = import_zod25.z.object({
964
- path: import_zod25.z.string(),
965
- action: import_zod25.z.enum(["created", "modified", "deleted"])
972
+ content: import_zod26.z.string(),
973
+ lineNumber: import_zod26.z.number()
974
+ });
975
+ var DiffContentSchema = import_zod26.z.object({
976
+ path: import_zod26.z.string(),
977
+ expected: import_zod26.z.string(),
978
+ actual: import_zod26.z.string(),
979
+ diffLines: import_zod26.z.array(DiffLineSchema),
980
+ renamedFrom: import_zod26.z.string().optional()
981
+ });
982
+ var CommandExecutionSchema = import_zod26.z.object({
983
+ command: import_zod26.z.string(),
984
+ exitCode: import_zod26.z.number(),
985
+ output: import_zod26.z.string().optional(),
986
+ duration: import_zod26.z.number()
987
+ });
988
+ var FileModificationSchema = import_zod26.z.object({
989
+ path: import_zod26.z.string(),
990
+ action: import_zod26.z.enum(["created", "modified", "deleted"])
966
991
  });
967
992
  var TemplateFileStatus = /* @__PURE__ */ ((TemplateFileStatus2) => {
968
993
  TemplateFileStatus2["NEW"] = "new";
@@ -970,75 +995,79 @@ var TemplateFileStatus = /* @__PURE__ */ ((TemplateFileStatus2) => {
970
995
  TemplateFileStatus2["UNCHANGED"] = "unchanged";
971
996
  return TemplateFileStatus2;
972
997
  })(TemplateFileStatus || {});
973
- var TemplateFileSchema = import_zod25.z.object({
998
+ var TemplateFileSchema = import_zod26.z.object({
974
999
  /** Relative path within the template */
975
- path: import_zod25.z.string(),
1000
+ path: import_zod26.z.string(),
976
1001
  /** Full file content after execution */
977
- content: import_zod25.z.string(),
1002
+ content: import_zod26.z.string(),
978
1003
  /** File status (new, modified, unchanged) */
979
- status: import_zod25.z.enum(["new", "modified", "unchanged"])
980
- });
981
- var ApiCallSchema = import_zod25.z.object({
982
- endpoint: import_zod25.z.string(),
983
- tokensUsed: import_zod25.z.number(),
984
- duration: import_zod25.z.number()
985
- });
986
- var ExecutionTraceSchema = import_zod25.z.object({
987
- commands: import_zod25.z.array(CommandExecutionSchema),
988
- filesModified: import_zod25.z.array(FileModificationSchema),
989
- apiCalls: import_zod25.z.array(ApiCallSchema),
990
- totalDuration: import_zod25.z.number()
991
- });
992
- var FailureAnalysisSchema = import_zod25.z.object({
993
- category: import_zod25.z.enum(FailureCategory),
994
- severity: import_zod25.z.enum(FailureSeverity),
995
- summary: import_zod25.z.string(),
996
- details: import_zod25.z.string(),
997
- rootCause: import_zod25.z.string(),
998
- suggestedFix: import_zod25.z.string(),
999
- relatedAssertions: import_zod25.z.array(import_zod25.z.string()),
1000
- codeSnippet: import_zod25.z.string().optional(),
1001
- similarIssues: import_zod25.z.array(import_zod25.z.string()).optional(),
1002
- patternId: import_zod25.z.string().optional(),
1004
+ status: import_zod26.z.enum(["new", "modified", "unchanged"])
1005
+ });
1006
+ var ApiCallSchema = import_zod26.z.object({
1007
+ endpoint: import_zod26.z.string(),
1008
+ tokensUsed: import_zod26.z.number(),
1009
+ duration: import_zod26.z.number()
1010
+ });
1011
+ var ExecutionTraceSchema = import_zod26.z.object({
1012
+ commands: import_zod26.z.array(CommandExecutionSchema),
1013
+ filesModified: import_zod26.z.array(FileModificationSchema),
1014
+ apiCalls: import_zod26.z.array(ApiCallSchema),
1015
+ totalDuration: import_zod26.z.number()
1016
+ });
1017
+ var FailureAnalysisSchema = import_zod26.z.object({
1018
+ category: import_zod26.z.enum(FailureCategory),
1019
+ severity: import_zod26.z.enum(FailureSeverity),
1020
+ summary: import_zod26.z.string(),
1021
+ details: import_zod26.z.string(),
1022
+ rootCause: import_zod26.z.string(),
1023
+ suggestedFix: import_zod26.z.string(),
1024
+ relatedAssertions: import_zod26.z.array(import_zod26.z.string()),
1025
+ codeSnippet: import_zod26.z.string().optional(),
1026
+ similarIssues: import_zod26.z.array(import_zod26.z.string()).optional(),
1027
+ patternId: import_zod26.z.string().optional(),
1003
1028
  // Extended fields for detailed debugging
1004
1029
  diff: DiffContentSchema.optional(),
1005
1030
  executionTrace: ExecutionTraceSchema.optional()
1006
1031
  });
1007
1032
  var EvalRunSchema = TenantEntitySchema.extend({
1008
1033
  /** Agent ID for this run */
1009
- agentId: import_zod25.z.string().optional(),
1034
+ agentId: import_zod26.z.string().optional(),
1010
1035
  /** Skills group ID for this run */
1011
- skillsGroupId: import_zod25.z.string().optional(),
1036
+ skillsGroupId: import_zod26.z.string().optional(),
1012
1037
  /** Scenario IDs to run */
1013
- scenarioIds: import_zod25.z.array(import_zod25.z.string()),
1038
+ scenarioIds: import_zod26.z.array(import_zod26.z.string()),
1014
1039
  /** Current status */
1015
1040
  status: EvalStatusSchema,
1016
1041
  /** Progress percentage (0-100) */
1017
- progress: import_zod25.z.number(),
1042
+ progress: import_zod26.z.number(),
1018
1043
  /** Results for each scenario/target combination (lazy to break eval-result ↔ eval-run cycle) */
1019
- results: import_zod25.z.array(import_zod25.z.lazy(() => EvalRunResultSchema)),
1044
+ results: import_zod26.z.array(import_zod26.z.lazy(() => EvalRunResultSchema)),
1020
1045
  /** Aggregated metrics across all results */
1021
1046
  aggregateMetrics: EvalMetricsSchema,
1022
1047
  /** Failure analyses */
1023
- failureAnalyses: import_zod25.z.array(FailureAnalysisSchema).optional(),
1048
+ failureAnalyses: import_zod26.z.array(FailureAnalysisSchema).optional(),
1024
1049
  /** Aggregated LLM trace summary */
1025
1050
  llmTraceSummary: LLMTraceSummarySchema.optional(),
1026
1051
  /** What triggered this run */
1027
1052
  trigger: TriggerSchema.optional(),
1028
1053
  /** When the run started (set when evaluation is triggered) */
1029
- startedAt: import_zod25.z.string().optional(),
1054
+ startedAt: import_zod26.z.string().optional(),
1030
1055
  /** When the run completed */
1031
- completedAt: import_zod25.z.string().optional(),
1056
+ completedAt: import_zod26.z.string().optional(),
1032
1057
  /** Live trace events captured during execution (for playback on results page) */
1033
- liveTraceEvents: import_zod25.z.array(LiveTraceEventSchema).optional(),
1058
+ liveTraceEvents: import_zod26.z.array(LiveTraceEventSchema).optional(),
1034
1059
  /** Remote job ID for tracking execution in Dev Machines */
1035
- jobId: import_zod25.z.string().optional(),
1060
+ jobId: import_zod26.z.string().optional(),
1036
1061
  /** Remote job status from the Dev Machine API (PENDING, RUNNING, COMPLETED, FAILED, CANCELLED) */
1037
- jobStatus: import_zod25.z.string().optional(),
1062
+ jobStatus: import_zod26.z.string().optional(),
1038
1063
  /** Remote job error message if the job failed */
1039
- jobError: import_zod25.z.string().optional(),
1064
+ jobError: import_zod26.z.string().optional(),
1040
1065
  /** Timestamp of the last job status check */
1041
- jobStatusCheckedAt: import_zod25.z.string().optional()
1066
+ jobStatusCheckedAt: import_zod26.z.string().optional(),
1067
+ /** MCP server IDs to enable for this run (optional) */
1068
+ mcpIds: import_zod26.z.array(import_zod26.z.string()).optional(),
1069
+ /** Sub-agent IDs to enable for this run (optional) */
1070
+ subAgentIds: import_zod26.z.array(import_zod26.z.string()).optional()
1042
1071
  });
1043
1072
  var CreateEvalRunInputSchema = EvalRunSchema.omit({
1044
1073
  id: true,
@@ -1051,28 +1080,28 @@ var CreateEvalRunInputSchema = EvalRunSchema.omit({
1051
1080
  startedAt: true,
1052
1081
  completedAt: true
1053
1082
  });
1054
- var EvaluationProgressSchema = import_zod25.z.object({
1055
- runId: import_zod25.z.string(),
1056
- targetId: import_zod25.z.string(),
1057
- totalScenarios: import_zod25.z.number(),
1058
- completedScenarios: import_zod25.z.number(),
1059
- scenarioProgress: import_zod25.z.array(
1060
- import_zod25.z.object({
1061
- scenarioId: import_zod25.z.string(),
1062
- currentStep: import_zod25.z.string(),
1063
- error: import_zod25.z.string().optional()
1083
+ var EvaluationProgressSchema = import_zod26.z.object({
1084
+ runId: import_zod26.z.string(),
1085
+ targetId: import_zod26.z.string(),
1086
+ totalScenarios: import_zod26.z.number(),
1087
+ completedScenarios: import_zod26.z.number(),
1088
+ scenarioProgress: import_zod26.z.array(
1089
+ import_zod26.z.object({
1090
+ scenarioId: import_zod26.z.string(),
1091
+ currentStep: import_zod26.z.string(),
1092
+ error: import_zod26.z.string().optional()
1064
1093
  })
1065
1094
  ),
1066
- createdAt: import_zod25.z.number()
1095
+ createdAt: import_zod26.z.number()
1067
1096
  });
1068
- var EvaluationLogSchema = import_zod25.z.object({
1069
- runId: import_zod25.z.string(),
1070
- scenarioId: import_zod25.z.string(),
1071
- log: import_zod25.z.object({
1072
- level: import_zod25.z.enum(["info", "error", "debug"]),
1073
- message: import_zod25.z.string().optional(),
1074
- args: import_zod25.z.array(import_zod25.z.any()).optional(),
1075
- error: import_zod25.z.string().optional()
1097
+ var EvaluationLogSchema = import_zod26.z.object({
1098
+ runId: import_zod26.z.string(),
1099
+ scenarioId: import_zod26.z.string(),
1100
+ log: import_zod26.z.object({
1101
+ level: import_zod26.z.enum(["info", "error", "debug"]),
1102
+ message: import_zod26.z.string().optional(),
1103
+ args: import_zod26.z.array(import_zod26.z.any()).optional(),
1104
+ error: import_zod26.z.string().optional()
1076
1105
  })
1077
1106
  });
1078
1107
  var LLM_TIMEOUT = 12e4;
@@ -1085,91 +1114,91 @@ var AssertionResultStatus = /* @__PURE__ */ ((AssertionResultStatus2) => {
1085
1114
  AssertionResultStatus2["ERROR"] = "error";
1086
1115
  return AssertionResultStatus2;
1087
1116
  })(AssertionResultStatus || {});
1088
- var AssertionResultSchema = import_zod26.z.object({
1089
- id: import_zod26.z.string(),
1090
- assertionId: import_zod26.z.string(),
1091
- assertionType: import_zod26.z.string(),
1092
- assertionName: import_zod26.z.string(),
1093
- status: import_zod26.z.enum(AssertionResultStatus),
1094
- message: import_zod26.z.string().optional(),
1095
- expected: import_zod26.z.string().optional(),
1096
- actual: import_zod26.z.string().optional(),
1097
- duration: import_zod26.z.number().optional(),
1098
- details: import_zod26.z.record(import_zod26.z.string(), import_zod26.z.unknown()).optional(),
1099
- llmTraceSteps: import_zod26.z.array(LLMTraceStepSchema).optional()
1100
- });
1101
- var EvalRunResultSchema = import_zod26.z.object({
1102
- id: import_zod26.z.string(),
1103
- targetId: import_zod26.z.string(),
1104
- targetName: import_zod26.z.string().optional(),
1105
- scenarioId: import_zod26.z.string(),
1106
- scenarioName: import_zod26.z.string(),
1117
+ var AssertionResultSchema = import_zod27.z.object({
1118
+ id: import_zod27.z.string(),
1119
+ assertionId: import_zod27.z.string(),
1120
+ assertionType: import_zod27.z.string(),
1121
+ assertionName: import_zod27.z.string(),
1122
+ status: import_zod27.z.enum(AssertionResultStatus),
1123
+ message: import_zod27.z.string().optional(),
1124
+ expected: import_zod27.z.string().optional(),
1125
+ actual: import_zod27.z.string().optional(),
1126
+ duration: import_zod27.z.number().optional(),
1127
+ details: import_zod27.z.record(import_zod27.z.string(), import_zod27.z.unknown()).optional(),
1128
+ llmTraceSteps: import_zod27.z.array(LLMTraceStepSchema).optional()
1129
+ });
1130
+ var EvalRunResultSchema = import_zod27.z.object({
1131
+ id: import_zod27.z.string(),
1132
+ targetId: import_zod27.z.string(),
1133
+ targetName: import_zod27.z.string().optional(),
1134
+ scenarioId: import_zod27.z.string(),
1135
+ scenarioName: import_zod27.z.string(),
1107
1136
  modelConfig: ModelConfigSchema.optional(),
1108
- assertionResults: import_zod26.z.array(AssertionResultSchema),
1137
+ assertionResults: import_zod27.z.array(AssertionResultSchema),
1109
1138
  metrics: EvalMetricsSchema.optional(),
1110
- passed: import_zod26.z.number(),
1111
- failed: import_zod26.z.number(),
1112
- passRate: import_zod26.z.number(),
1113
- duration: import_zod26.z.number(),
1114
- outputText: import_zod26.z.string().optional(),
1115
- files: import_zod26.z.array(ExpectedFileSchema).optional(),
1116
- fileDiffs: import_zod26.z.array(DiffContentSchema).optional(),
1139
+ passed: import_zod27.z.number(),
1140
+ failed: import_zod27.z.number(),
1141
+ passRate: import_zod27.z.number(),
1142
+ duration: import_zod27.z.number(),
1143
+ outputText: import_zod27.z.string().optional(),
1144
+ files: import_zod27.z.array(ExpectedFileSchema).optional(),
1145
+ fileDiffs: import_zod27.z.array(DiffContentSchema).optional(),
1117
1146
  /** Full template files after execution with status indicators */
1118
- templateFiles: import_zod26.z.array(TemplateFileSchema).optional(),
1119
- startedAt: import_zod26.z.string().optional(),
1120
- completedAt: import_zod26.z.string().optional(),
1147
+ templateFiles: import_zod27.z.array(TemplateFileSchema).optional(),
1148
+ startedAt: import_zod27.z.string().optional(),
1149
+ completedAt: import_zod27.z.string().optional(),
1121
1150
  llmTrace: LLMTraceSchema.optional()
1122
1151
  });
1123
- var PromptResultSchema = import_zod26.z.object({
1124
- text: import_zod26.z.string(),
1125
- files: import_zod26.z.array(import_zod26.z.unknown()).optional(),
1126
- finishReason: import_zod26.z.string().optional(),
1127
- reasoning: import_zod26.z.string().optional(),
1128
- reasoningDetails: import_zod26.z.unknown().optional(),
1129
- toolCalls: import_zod26.z.array(import_zod26.z.unknown()).optional(),
1130
- toolResults: import_zod26.z.array(import_zod26.z.unknown()).optional(),
1131
- warnings: import_zod26.z.array(import_zod26.z.unknown()).optional(),
1132
- sources: import_zod26.z.array(import_zod26.z.unknown()).optional(),
1133
- steps: import_zod26.z.array(import_zod26.z.unknown()),
1134
- generationTimeMs: import_zod26.z.number(),
1135
- prompt: import_zod26.z.string(),
1136
- systemPrompt: import_zod26.z.string(),
1137
- usage: import_zod26.z.object({
1138
- totalTokens: import_zod26.z.number().optional(),
1139
- totalMicrocentsSpent: import_zod26.z.number().optional()
1152
+ var PromptResultSchema = import_zod27.z.object({
1153
+ text: import_zod27.z.string(),
1154
+ files: import_zod27.z.array(import_zod27.z.unknown()).optional(),
1155
+ finishReason: import_zod27.z.string().optional(),
1156
+ reasoning: import_zod27.z.string().optional(),
1157
+ reasoningDetails: import_zod27.z.unknown().optional(),
1158
+ toolCalls: import_zod27.z.array(import_zod27.z.unknown()).optional(),
1159
+ toolResults: import_zod27.z.array(import_zod27.z.unknown()).optional(),
1160
+ warnings: import_zod27.z.array(import_zod27.z.unknown()).optional(),
1161
+ sources: import_zod27.z.array(import_zod27.z.unknown()).optional(),
1162
+ steps: import_zod27.z.array(import_zod27.z.unknown()),
1163
+ generationTimeMs: import_zod27.z.number(),
1164
+ prompt: import_zod27.z.string(),
1165
+ systemPrompt: import_zod27.z.string(),
1166
+ usage: import_zod27.z.object({
1167
+ totalTokens: import_zod27.z.number().optional(),
1168
+ totalMicrocentsSpent: import_zod27.z.number().optional()
1140
1169
  })
1141
1170
  });
1142
- var EvaluationResultSchema = import_zod26.z.object({
1143
- id: import_zod26.z.string(),
1144
- runId: import_zod26.z.string(),
1145
- timestamp: import_zod26.z.number(),
1171
+ var EvaluationResultSchema = import_zod27.z.object({
1172
+ id: import_zod27.z.string(),
1173
+ runId: import_zod27.z.string(),
1174
+ timestamp: import_zod27.z.number(),
1146
1175
  promptResult: PromptResultSchema,
1147
- testResults: import_zod26.z.array(import_zod26.z.unknown()),
1148
- tags: import_zod26.z.array(import_zod26.z.string()).optional(),
1149
- feedback: import_zod26.z.string().optional(),
1150
- score: import_zod26.z.number(),
1151
- suiteId: import_zod26.z.string().optional()
1152
- });
1153
- var LeanEvaluationResultSchema = import_zod26.z.object({
1154
- id: import_zod26.z.string(),
1155
- runId: import_zod26.z.string(),
1156
- timestamp: import_zod26.z.number(),
1157
- tags: import_zod26.z.array(import_zod26.z.string()).optional(),
1158
- scenarioId: import_zod26.z.string(),
1159
- scenarioVersion: import_zod26.z.number().optional(),
1160
- targetId: import_zod26.z.string(),
1161
- targetVersion: import_zod26.z.number().optional(),
1162
- suiteId: import_zod26.z.string().optional(),
1163
- score: import_zod26.z.number(),
1164
- time: import_zod26.z.number().optional(),
1165
- microcentsSpent: import_zod26.z.number().optional()
1176
+ testResults: import_zod27.z.array(import_zod27.z.unknown()),
1177
+ tags: import_zod27.z.array(import_zod27.z.string()).optional(),
1178
+ feedback: import_zod27.z.string().optional(),
1179
+ score: import_zod27.z.number(),
1180
+ suiteId: import_zod27.z.string().optional()
1181
+ });
1182
+ var LeanEvaluationResultSchema = import_zod27.z.object({
1183
+ id: import_zod27.z.string(),
1184
+ runId: import_zod27.z.string(),
1185
+ timestamp: import_zod27.z.number(),
1186
+ tags: import_zod27.z.array(import_zod27.z.string()).optional(),
1187
+ scenarioId: import_zod27.z.string(),
1188
+ scenarioVersion: import_zod27.z.number().optional(),
1189
+ targetId: import_zod27.z.string(),
1190
+ targetVersion: import_zod27.z.number().optional(),
1191
+ suiteId: import_zod27.z.string().optional(),
1192
+ score: import_zod27.z.number(),
1193
+ time: import_zod27.z.number().optional(),
1194
+ microcentsSpent: import_zod27.z.number().optional()
1166
1195
  });
1167
1196
 
1168
1197
  // src/project/project.ts
1169
- var import_zod27 = require("zod");
1198
+ var import_zod28 = require("zod");
1170
1199
  var ProjectSchema = BaseEntitySchema.extend({
1171
- appId: import_zod27.z.string().optional().describe("The ID of the app in Dev Center"),
1172
- appSecret: import_zod27.z.string().optional().describe("The secret of the app in Dev Center")
1200
+ appId: import_zod28.z.string().optional().describe("The ID of the app in Dev Center"),
1201
+ appSecret: import_zod28.z.string().optional().describe("The secret of the app in Dev Center")
1173
1202
  });
1174
1203
  var CreateProjectInputSchema = ProjectSchema.omit({
1175
1204
  id: true,
@@ -1180,10 +1209,10 @@ var CreateProjectInputSchema = ProjectSchema.omit({
1180
1209
  var UpdateProjectInputSchema = CreateProjectInputSchema.partial();
1181
1210
 
1182
1211
  // src/template/template.ts
1183
- var import_zod28 = require("zod");
1212
+ var import_zod29 = require("zod");
1184
1213
  var TemplateSchema = TenantEntitySchema.extend({
1185
1214
  /** URL to download the template from */
1186
- downloadUrl: import_zod28.z.url()
1215
+ downloadUrl: import_zod29.z.url()
1187
1216
  });
1188
1217
  var CreateTemplateInputSchema = TemplateSchema.omit({
1189
1218
  id: true,
@@ -1319,9 +1348,11 @@ function getSystemAssertion(id) {
1319
1348
  CreateAgentInputSchema,
1320
1349
  CreateCustomAssertionInputSchema,
1321
1350
  CreateEvalRunInputSchema,
1351
+ CreateMcpInputSchema,
1322
1352
  CreateProjectInputSchema,
1323
1353
  CreateSkillInputSchema,
1324
1354
  CreateSkillsGroupInputSchema,
1355
+ CreateSubAgentInputSchema,
1325
1356
  CreateTemplateInputSchema,
1326
1357
  CreateTestScenarioInputSchema,
1327
1358
  CreateTestSuiteInputSchema,
@@ -1360,7 +1391,9 @@ function getSystemAssertion(id) {
1360
1391
  LlmJudgeAssertionSchema,
1361
1392
  LlmJudgeConfigSchema,
1362
1393
  LocalProjectConfigSchema,
1394
+ MCPEntitySchema,
1363
1395
  MCPServerConfigSchema,
1396
+ MCP_SERVERS_JSON_KEY,
1364
1397
  MetaSiteConfigSchema,
1365
1398
  ModelConfigSchema,
1366
1399
  ModelIds,
@@ -1381,6 +1414,7 @@ function getSystemAssertion(id) {
1381
1414
  SkillWasCalledAssertionSchema,
1382
1415
  SkillWasCalledConfigSchema,
1383
1416
  SkillsGroupSchema,
1417
+ SubAgentSchema,
1384
1418
  TRACE_EVENT_PREFIX,
1385
1419
  TargetSchema,
1386
1420
  TemplateFileSchema,
@@ -1401,9 +1435,11 @@ function getSystemAssertion(id) {
1401
1435
  TriggerType,
1402
1436
  UpdateAgentInputSchema,
1403
1437
  UpdateCustomAssertionInputSchema,
1438
+ UpdateMcpInputSchema,
1404
1439
  UpdateProjectInputSchema,
1405
1440
  UpdateSkillInputSchema,
1406
1441
  UpdateSkillsGroupInputSchema,
1442
+ UpdateSubAgentInputSchema,
1407
1443
  UpdateTemplateInputSchema,
1408
1444
  UpdateTestScenarioInputSchema,
1409
1445
  UpdateTestSuiteInputSchema,