@wix/evalforge-types 0.19.0 → 0.21.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/build/index.js CHANGED
@@ -42,9 +42,11 @@ __export(index_exports, {
42
42
  CreateAgentInputSchema: () => CreateAgentInputSchema,
43
43
  CreateCustomAssertionInputSchema: () => CreateCustomAssertionInputSchema,
44
44
  CreateEvalRunInputSchema: () => CreateEvalRunInputSchema,
45
+ CreateMcpInputSchema: () => CreateMcpInputSchema,
45
46
  CreateProjectInputSchema: () => CreateProjectInputSchema,
46
47
  CreateSkillInputSchema: () => CreateSkillInputSchema,
47
48
  CreateSkillsGroupInputSchema: () => CreateSkillsGroupInputSchema,
49
+ CreateSubAgentInputSchema: () => CreateSubAgentInputSchema,
48
50
  CreateTemplateInputSchema: () => CreateTemplateInputSchema,
49
51
  CreateTestScenarioInputSchema: () => CreateTestScenarioInputSchema,
50
52
  CreateTestSuiteInputSchema: () => CreateTestSuiteInputSchema,
@@ -83,7 +85,9 @@ __export(index_exports, {
83
85
  LlmJudgeAssertionSchema: () => LlmJudgeAssertionSchema,
84
86
  LlmJudgeConfigSchema: () => LlmJudgeConfigSchema,
85
87
  LocalProjectConfigSchema: () => LocalProjectConfigSchema,
88
+ MCPEntitySchema: () => MCPEntitySchema,
86
89
  MCPServerConfigSchema: () => MCPServerConfigSchema,
90
+ MCP_SERVERS_JSON_KEY: () => MCP_SERVERS_JSON_KEY,
87
91
  MetaSiteConfigSchema: () => MetaSiteConfigSchema,
88
92
  ModelConfigSchema: () => ModelConfigSchema,
89
93
  ModelIds: () => ModelIds,
@@ -104,6 +108,7 @@ __export(index_exports, {
104
108
  SkillWasCalledAssertionSchema: () => SkillWasCalledAssertionSchema,
105
109
  SkillWasCalledConfigSchema: () => SkillWasCalledConfigSchema,
106
110
  SkillsGroupSchema: () => SkillsGroupSchema,
111
+ SubAgentSchema: () => SubAgentSchema,
107
112
  TRACE_EVENT_PREFIX: () => TRACE_EVENT_PREFIX,
108
113
  TargetSchema: () => TargetSchema,
109
114
  TemplateFileSchema: () => TemplateFileSchema,
@@ -124,9 +129,11 @@ __export(index_exports, {
124
129
  TriggerType: () => TriggerType,
125
130
  UpdateAgentInputSchema: () => UpdateAgentInputSchema,
126
131
  UpdateCustomAssertionInputSchema: () => UpdateCustomAssertionInputSchema,
132
+ UpdateMcpInputSchema: () => UpdateMcpInputSchema,
127
133
  UpdateProjectInputSchema: () => UpdateProjectInputSchema,
128
134
  UpdateSkillInputSchema: () => UpdateSkillInputSchema,
129
135
  UpdateSkillsGroupInputSchema: () => UpdateSkillsGroupInputSchema,
136
+ UpdateSubAgentInputSchema: () => UpdateSubAgentInputSchema,
130
137
  UpdateTemplateInputSchema: () => UpdateTemplateInputSchema,
131
138
  UpdateTestScenarioInputSchema: () => UpdateTestScenarioInputSchema,
132
139
  UpdateTestSuiteInputSchema: () => UpdateTestSuiteInputSchema,
@@ -160,18 +167,21 @@ var TenantEntitySchema = BaseEntitySchema.extend({
160
167
 
161
168
  // src/common/mcp.ts
162
169
  var import_zod2 = require("zod");
163
- var MCPServerConfigSchema = import_zod2.z.object({
164
- /** Unique name for this MCP server */
165
- name: import_zod2.z.string(),
166
- /** Command to start the MCP server */
167
- command: import_zod2.z.string(),
168
- /** Command line arguments */
169
- args: import_zod2.z.array(import_zod2.z.string()).optional(),
170
- /** Environment variables for the server process */
171
- envVars: import_zod2.z.record(import_zod2.z.string(), import_zod2.z.string()).optional(),
172
- /** Tools to disable for this MCP server */
173
- disabledTools: import_zod2.z.array(import_zod2.z.string()).optional()
170
+ var MCP_SERVERS_JSON_KEY = "mcpServers";
171
+ var MCPEntitySchema = TenantEntitySchema.extend({
172
+ /** Display name and key in mcp.json mcpServers object */
173
+ name: import_zod2.z.string().min(1),
174
+ /** MCP server config (command/args, url/headers, etc.) - stored as-is for mcp.json */
175
+ config: import_zod2.z.record(import_zod2.z.string(), import_zod2.z.unknown())
176
+ });
177
+ var CreateMcpInputSchema = MCPEntitySchema.omit({
178
+ id: true,
179
+ createdAt: true,
180
+ updatedAt: true,
181
+ deleted: true
174
182
  });
183
+ var UpdateMcpInputSchema = CreateMcpInputSchema.partial();
184
+ var MCPServerConfigSchema = import_zod2.z.record(import_zod2.z.string(), import_zod2.z.unknown());
175
185
 
176
186
  // src/common/models.ts
177
187
  var import_zod3 = require("zod");
@@ -290,7 +300,9 @@ var CreateAgentInputSchema = AgentSchema.omit({
290
300
  updatedAt: true,
291
301
  deleted: true
292
302
  });
293
- var UpdateAgentInputSchema = CreateAgentInputSchema.partial();
303
+ var UpdateAgentInputSchema = CreateAgentInputSchema.partial().extend({
304
+ modelConfig: ModelConfigSchema.optional().nullable()
305
+ });
294
306
 
295
307
  // src/target/skill.ts
296
308
  var import_zod5 = require("zod");
@@ -349,11 +361,26 @@ var CreateSkillsGroupInputSchema = SkillsGroupSchema.omit({
349
361
  });
350
362
  var UpdateSkillsGroupInputSchema = CreateSkillsGroupInputSchema.partial();
351
363
 
364
+ // src/target/sub-agent.ts
365
+ var import_zod7 = require("zod");
366
+ var SubAgentSchema = TargetSchema.extend({
367
+ /** The full sub-agent markdown content (YAML frontmatter + body) */
368
+ subAgentMd: import_zod7.z.string()
369
+ });
370
+ var SubAgentInputBaseSchema = SubAgentSchema.omit({
371
+ id: true,
372
+ createdAt: true,
373
+ updatedAt: true,
374
+ deleted: true
375
+ });
376
+ var CreateSubAgentInputSchema = SubAgentInputBaseSchema;
377
+ var UpdateSubAgentInputSchema = SubAgentInputBaseSchema.partial();
378
+
352
379
  // src/test/index.ts
353
- var import_zod17 = require("zod");
380
+ var import_zod18 = require("zod");
354
381
 
355
382
  // src/test/base.ts
356
- var import_zod7 = require("zod");
383
+ var import_zod8 = require("zod");
357
384
  var TestType = /* @__PURE__ */ ((TestType2) => {
358
385
  TestType2["LLM"] = "LLM";
359
386
  TestType2["TOOL"] = "TOOL";
@@ -366,7 +393,7 @@ var TestType = /* @__PURE__ */ ((TestType2) => {
366
393
  TestType2["PLAYWRIGHT_NL"] = "PLAYWRIGHT_NL";
367
394
  return TestType2;
368
395
  })(TestType || {});
369
- var TestTypeSchema = import_zod7.z.enum(TestType);
396
+ var TestTypeSchema = import_zod8.z.enum(TestType);
370
397
  var TestImportance = /* @__PURE__ */ ((TestImportance2) => {
371
398
  TestImportance2["LOW"] = "low";
372
399
  TestImportance2["MEDIUM"] = "medium";
@@ -374,153 +401,153 @@ var TestImportance = /* @__PURE__ */ ((TestImportance2) => {
374
401
  TestImportance2["CRITICAL"] = "critical";
375
402
  return TestImportance2;
376
403
  })(TestImportance || {});
377
- var TestImportanceSchema = import_zod7.z.enum(TestImportance);
378
- var BaseTestSchema = import_zod7.z.object({
379
- id: import_zod7.z.string(),
404
+ var TestImportanceSchema = import_zod8.z.enum(TestImportance);
405
+ var BaseTestSchema = import_zod8.z.object({
406
+ id: import_zod8.z.string(),
380
407
  type: TestTypeSchema,
381
- name: import_zod7.z.string().min(3),
382
- description: import_zod7.z.string().optional(),
408
+ name: import_zod8.z.string().min(3),
409
+ description: import_zod8.z.string().optional(),
383
410
  importance: TestImportanceSchema.optional()
384
411
  });
385
412
 
386
413
  // src/test/llm.ts
387
- var import_zod8 = require("zod");
414
+ var import_zod9 = require("zod");
388
415
  var LLMTestSchema = BaseTestSchema.extend({
389
- type: import_zod8.z.literal("LLM" /* LLM */),
416
+ type: import_zod9.z.literal("LLM" /* LLM */),
390
417
  /** Maximum steps for the LLM to take */
391
- maxSteps: import_zod8.z.number().min(1).max(100),
418
+ maxSteps: import_zod9.z.number().min(1).max(100),
392
419
  /** Prompt to send to the evaluator */
393
- prompt: import_zod8.z.string().min(1),
420
+ prompt: import_zod9.z.string().min(1),
394
421
  /** ID of the evaluator agent to use */
395
- evaluatorId: import_zod8.z.string()
422
+ evaluatorId: import_zod9.z.string()
396
423
  });
397
424
 
398
425
  // src/test/tool.ts
399
- var import_zod9 = require("zod");
426
+ var import_zod10 = require("zod");
400
427
  var ToolTestSchema = BaseTestSchema.extend({
401
- type: import_zod9.z.literal("TOOL" /* TOOL */),
428
+ type: import_zod10.z.literal("TOOL" /* TOOL */),
402
429
  /** Name of the tool that should be called */
403
- toolName: import_zod9.z.string().min(3),
430
+ toolName: import_zod10.z.string().min(3),
404
431
  /** Expected arguments for the tool call */
405
- args: import_zod9.z.record(import_zod9.z.string(), import_zod9.z.any()),
432
+ args: import_zod10.z.record(import_zod10.z.string(), import_zod10.z.any()),
406
433
  /** Expected content in the tool results */
407
- resultsContent: import_zod9.z.string()
434
+ resultsContent: import_zod10.z.string()
408
435
  });
409
436
 
410
437
  // src/test/site-config.ts
411
- var import_zod10 = require("zod");
438
+ var import_zod11 = require("zod");
412
439
  var SiteConfigTestSchema = BaseTestSchema.extend({
413
- type: import_zod10.z.literal("SITE_CONFIG" /* SITE_CONFIG */),
440
+ type: import_zod11.z.literal("SITE_CONFIG" /* SITE_CONFIG */),
414
441
  /** URL to call */
415
- url: import_zod10.z.string().url(),
442
+ url: import_zod11.z.string().url(),
416
443
  /** HTTP method */
417
- method: import_zod10.z.enum(["GET", "POST"]),
444
+ method: import_zod11.z.enum(["GET", "POST"]),
418
445
  /** Request body (for POST) */
419
- body: import_zod10.z.string().optional(),
446
+ body: import_zod11.z.string().optional(),
420
447
  /** Expected HTTP status code */
421
- expectedStatusCode: import_zod10.z.number().int().min(100).max(599),
448
+ expectedStatusCode: import_zod11.z.number().int().min(100).max(599),
422
449
  /** Expected response content */
423
- expectedResponse: import_zod10.z.string().optional(),
450
+ expectedResponse: import_zod11.z.string().optional(),
424
451
  /** JMESPath expression to extract from response */
425
- expectedResponseJMESPath: import_zod10.z.string().optional()
452
+ expectedResponseJMESPath: import_zod11.z.string().optional()
426
453
  });
427
454
 
428
455
  // src/test/command-execution.ts
429
- var import_zod11 = require("zod");
456
+ var import_zod12 = require("zod");
430
457
  var AllowedCommands = [
431
458
  "yarn install --no-immutable && yarn build",
432
459
  "npm run build",
433
460
  "yarn typecheck"
434
461
  ];
435
462
  var CommandExecutionTestSchema = BaseTestSchema.extend({
436
- type: import_zod11.z.literal("COMMAND_EXECUTION" /* COMMAND_EXECUTION */),
463
+ type: import_zod12.z.literal("COMMAND_EXECUTION" /* COMMAND_EXECUTION */),
437
464
  /** Command to execute (must be in AllowedCommands) */
438
- command: import_zod11.z.string().refine((value) => AllowedCommands.includes(value), {
465
+ command: import_zod12.z.string().refine((value) => AllowedCommands.includes(value), {
439
466
  message: `Command must be one of: ${AllowedCommands.join(", ")}`
440
467
  }),
441
468
  /** Expected exit code (default: 0) */
442
- expectedExitCode: import_zod11.z.number().default(0).optional()
469
+ expectedExitCode: import_zod12.z.number().default(0).optional()
443
470
  });
444
471
 
445
472
  // src/test/file-presence.ts
446
- var import_zod12 = require("zod");
473
+ var import_zod13 = require("zod");
447
474
  var FilePresenceTestSchema = BaseTestSchema.extend({
448
- type: import_zod12.z.literal("FILE_PRESENCE" /* FILE_PRESENCE */),
475
+ type: import_zod13.z.literal("FILE_PRESENCE" /* FILE_PRESENCE */),
449
476
  /** Paths to check */
450
- paths: import_zod12.z.array(import_zod12.z.string()),
477
+ paths: import_zod13.z.array(import_zod13.z.string()),
451
478
  /** Whether files should exist (true) or not exist (false) */
452
- shouldExist: import_zod12.z.boolean()
479
+ shouldExist: import_zod13.z.boolean()
453
480
  });
454
481
 
455
482
  // src/test/file-content.ts
456
- var import_zod13 = require("zod");
457
- var FileContentCheckSchema = import_zod13.z.object({
483
+ var import_zod14 = require("zod");
484
+ var FileContentCheckSchema = import_zod14.z.object({
458
485
  /** Strings that must be present in the file */
459
- contains: import_zod13.z.array(import_zod13.z.string()).optional(),
486
+ contains: import_zod14.z.array(import_zod14.z.string()).optional(),
460
487
  /** Strings that must NOT be present in the file */
461
- notContains: import_zod13.z.array(import_zod13.z.string()).optional(),
488
+ notContains: import_zod14.z.array(import_zod14.z.string()).optional(),
462
489
  /** Regex pattern the content must match */
463
- matches: import_zod13.z.string().optional(),
490
+ matches: import_zod14.z.string().optional(),
464
491
  /** JSON path checks for structured content */
465
- jsonPath: import_zod13.z.array(
466
- import_zod13.z.object({
467
- path: import_zod13.z.string(),
468
- value: import_zod13.z.unknown()
492
+ jsonPath: import_zod14.z.array(
493
+ import_zod14.z.object({
494
+ path: import_zod14.z.string(),
495
+ value: import_zod14.z.unknown()
469
496
  })
470
497
  ).optional(),
471
498
  /** Lines that should be added (for diff checking) */
472
- added: import_zod13.z.array(import_zod13.z.string()).optional(),
499
+ added: import_zod14.z.array(import_zod14.z.string()).optional(),
473
500
  /** Lines that should be removed (for diff checking) */
474
- removed: import_zod13.z.array(import_zod13.z.string()).optional()
501
+ removed: import_zod14.z.array(import_zod14.z.string()).optional()
475
502
  });
476
503
  var FileContentTestSchema = BaseTestSchema.extend({
477
- type: import_zod13.z.literal("FILE_CONTENT" /* FILE_CONTENT */),
504
+ type: import_zod14.z.literal("FILE_CONTENT" /* FILE_CONTENT */),
478
505
  /** Path to the file to check */
479
- path: import_zod13.z.string(),
506
+ path: import_zod14.z.string(),
480
507
  /** Content checks to perform */
481
508
  checks: FileContentCheckSchema
482
509
  });
483
510
 
484
511
  // src/test/build-check.ts
485
- var import_zod14 = require("zod");
512
+ var import_zod15 = require("zod");
486
513
  var BuildCheckTestSchema = BaseTestSchema.extend({
487
- type: import_zod14.z.literal("BUILD_CHECK" /* BUILD_CHECK */),
514
+ type: import_zod15.z.literal("BUILD_CHECK" /* BUILD_CHECK */),
488
515
  /** Build command to execute */
489
- command: import_zod14.z.string(),
516
+ command: import_zod15.z.string(),
490
517
  /** Whether the build should succeed */
491
- expectSuccess: import_zod14.z.boolean(),
518
+ expectSuccess: import_zod15.z.boolean(),
492
519
  /** Maximum allowed warnings (optional) */
493
- allowedWarnings: import_zod14.z.number().optional(),
520
+ allowedWarnings: import_zod15.z.number().optional(),
494
521
  /** Timeout in milliseconds */
495
- timeout: import_zod14.z.number().optional()
522
+ timeout: import_zod15.z.number().optional()
496
523
  });
497
524
 
498
525
  // src/test/vitest.ts
499
- var import_zod15 = require("zod");
526
+ var import_zod16 = require("zod");
500
527
  var VitestTestSchema = BaseTestSchema.extend({
501
- type: import_zod15.z.literal("VITEST" /* VITEST */),
528
+ type: import_zod16.z.literal("VITEST" /* VITEST */),
502
529
  /** Test file content */
503
- testFile: import_zod15.z.string(),
530
+ testFile: import_zod16.z.string(),
504
531
  /** Name of the test file */
505
- testFileName: import_zod15.z.string(),
532
+ testFileName: import_zod16.z.string(),
506
533
  /** Minimum pass rate required (0-100) */
507
- minPassRate: import_zod15.z.number().min(0).max(100)
534
+ minPassRate: import_zod16.z.number().min(0).max(100)
508
535
  });
509
536
 
510
537
  // src/test/playwright-nl.ts
511
- var import_zod16 = require("zod");
538
+ var import_zod17 = require("zod");
512
539
  var PlaywrightNLTestSchema = BaseTestSchema.extend({
513
- type: import_zod16.z.literal("PLAYWRIGHT_NL" /* PLAYWRIGHT_NL */),
540
+ type: import_zod17.z.literal("PLAYWRIGHT_NL" /* PLAYWRIGHT_NL */),
514
541
  /** Natural language steps to execute */
515
- steps: import_zod16.z.array(import_zod16.z.string()),
542
+ steps: import_zod17.z.array(import_zod17.z.string()),
516
543
  /** Expected outcome description */
517
- expectedOutcome: import_zod16.z.string(),
544
+ expectedOutcome: import_zod17.z.string(),
518
545
  /** Timeout in milliseconds */
519
- timeout: import_zod16.z.number().optional()
546
+ timeout: import_zod17.z.number().optional()
520
547
  });
521
548
 
522
549
  // src/test/index.ts
523
- var TestSchema = import_zod17.z.discriminatedUnion("type", [
550
+ var TestSchema = import_zod18.z.discriminatedUnion("type", [
524
551
  LLMTestSchema,
525
552
  ToolTestSchema,
526
553
  SiteConfigTestSchema,
@@ -533,66 +560,66 @@ var TestSchema = import_zod17.z.discriminatedUnion("type", [
533
560
  ]);
534
561
 
535
562
  // src/scenario/assertions.ts
536
- var import_zod18 = require("zod");
537
- var SkillWasCalledAssertionSchema = import_zod18.z.object({
538
- type: import_zod18.z.literal("skill_was_called"),
563
+ var import_zod19 = require("zod");
564
+ var SkillWasCalledAssertionSchema = import_zod19.z.object({
565
+ type: import_zod19.z.literal("skill_was_called"),
539
566
  /** Name of the skill that must have been called (matched against trace Skill tool args) */
540
- skillName: import_zod18.z.string()
567
+ skillName: import_zod19.z.string()
541
568
  });
542
- var BuildPassedAssertionSchema = import_zod18.z.object({
543
- type: import_zod18.z.literal("build_passed"),
569
+ var BuildPassedAssertionSchema = import_zod19.z.object({
570
+ type: import_zod19.z.literal("build_passed"),
544
571
  /** Command to run (default: "yarn build") */
545
- command: import_zod18.z.string().optional(),
572
+ command: import_zod19.z.string().optional(),
546
573
  /** Expected exit code (default: 0) */
547
- expectedExitCode: import_zod18.z.number().int().optional()
574
+ expectedExitCode: import_zod19.z.number().int().optional()
548
575
  });
549
- var LlmJudgeAssertionSchema = import_zod18.z.object({
550
- type: import_zod18.z.literal("llm_judge"),
576
+ var LlmJudgeAssertionSchema = import_zod19.z.object({
577
+ type: import_zod19.z.literal("llm_judge"),
551
578
  /** Prompt template; placeholders: {{output}}, {{cwd}}, {{changedFiles}}, {{trace}} */
552
- prompt: import_zod18.z.string(),
579
+ prompt: import_zod19.z.string(),
553
580
  /** Optional system prompt for the judge (default asks for JSON with score) */
554
- systemPrompt: import_zod18.z.string().optional(),
581
+ systemPrompt: import_zod19.z.string().optional(),
555
582
  /** Minimum score to pass (0-100, default 70) */
556
- minScore: import_zod18.z.number().int().min(0).max(100).optional(),
583
+ minScore: import_zod19.z.number().int().min(0).max(100).optional(),
557
584
  /** Model for the judge (e.g. claude-3-5-haiku) */
558
- model: import_zod18.z.string().optional(),
559
- maxTokens: import_zod18.z.number().int().optional(),
560
- temperature: import_zod18.z.number().min(0).max(1).optional()
585
+ model: import_zod19.z.string().optional(),
586
+ maxTokens: import_zod19.z.number().int().optional(),
587
+ temperature: import_zod19.z.number().min(0).max(1).optional()
561
588
  });
562
- var AssertionSchema = import_zod18.z.union([
589
+ var AssertionSchema = import_zod19.z.union([
563
590
  SkillWasCalledAssertionSchema,
564
591
  BuildPassedAssertionSchema,
565
592
  LlmJudgeAssertionSchema
566
593
  ]);
567
594
 
568
595
  // src/scenario/environment.ts
569
- var import_zod19 = require("zod");
570
- var LocalProjectConfigSchema = import_zod19.z.object({
596
+ var import_zod20 = require("zod");
597
+ var LocalProjectConfigSchema = import_zod20.z.object({
571
598
  /** Template ID to use for the local project */
572
- templateId: import_zod19.z.string().optional(),
599
+ templateId: import_zod20.z.string().optional(),
573
600
  /** Files to create in the project */
574
- files: import_zod19.z.array(
575
- import_zod19.z.object({
576
- path: import_zod19.z.string().min(1),
577
- content: import_zod19.z.string().min(1)
601
+ files: import_zod20.z.array(
602
+ import_zod20.z.object({
603
+ path: import_zod20.z.string().min(1),
604
+ content: import_zod20.z.string().min(1)
578
605
  })
579
606
  ).optional()
580
607
  });
581
- var MetaSiteConfigSchema = import_zod19.z.object({
582
- configurations: import_zod19.z.array(
583
- import_zod19.z.object({
584
- name: import_zod19.z.string().min(1),
585
- apiCalls: import_zod19.z.array(
586
- import_zod19.z.object({
587
- url: import_zod19.z.string().url(),
588
- method: import_zod19.z.enum(["POST", "PUT"]),
589
- body: import_zod19.z.string()
608
+ var MetaSiteConfigSchema = import_zod20.z.object({
609
+ configurations: import_zod20.z.array(
610
+ import_zod20.z.object({
611
+ name: import_zod20.z.string().min(1),
612
+ apiCalls: import_zod20.z.array(
613
+ import_zod20.z.object({
614
+ url: import_zod20.z.string().url(),
615
+ method: import_zod20.z.enum(["POST", "PUT"]),
616
+ body: import_zod20.z.string()
590
617
  })
591
618
  )
592
619
  })
593
620
  ).optional()
594
621
  });
595
- var EnvironmentSchema = import_zod19.z.object({
622
+ var EnvironmentSchema = import_zod20.z.object({
596
623
  /** Local project configuration */
597
624
  localProject: LocalProjectConfigSchema.optional(),
598
625
  /** Meta site configuration */
@@ -600,54 +627,54 @@ var EnvironmentSchema = import_zod19.z.object({
600
627
  });
601
628
 
602
629
  // src/scenario/test-scenario.ts
603
- var import_zod21 = require("zod");
630
+ var import_zod22 = require("zod");
604
631
 
605
632
  // src/assertion/assertion.ts
606
- var import_zod20 = require("zod");
607
- var AssertionTypeSchema = import_zod20.z.enum([
633
+ var import_zod21 = require("zod");
634
+ var AssertionTypeSchema = import_zod21.z.enum([
608
635
  "skill_was_called",
609
636
  "build_passed",
610
637
  "llm_judge"
611
638
  ]);
612
- var AssertionParameterTypeSchema = import_zod20.z.enum([
639
+ var AssertionParameterTypeSchema = import_zod21.z.enum([
613
640
  "string",
614
641
  "number",
615
642
  "boolean"
616
643
  ]);
617
- var AssertionParameterSchema = import_zod20.z.object({
644
+ var AssertionParameterSchema = import_zod21.z.object({
618
645
  /** Parameter name (used as key in params object) */
619
- name: import_zod20.z.string().min(1),
646
+ name: import_zod21.z.string().min(1),
620
647
  /** Display label for the parameter */
621
- label: import_zod20.z.string().min(1),
648
+ label: import_zod21.z.string().min(1),
622
649
  /** Parameter type */
623
650
  type: AssertionParameterTypeSchema,
624
651
  /** Whether this parameter is required */
625
- required: import_zod20.z.boolean(),
652
+ required: import_zod21.z.boolean(),
626
653
  /** Default value (optional, used when not provided) */
627
- defaultValue: import_zod20.z.union([import_zod20.z.string(), import_zod20.z.number(), import_zod20.z.boolean()]).optional(),
654
+ defaultValue: import_zod21.z.union([import_zod21.z.string(), import_zod21.z.number(), import_zod21.z.boolean()]).optional(),
628
655
  /** If true, parameter is hidden by default behind "Show advanced options" */
629
- advanced: import_zod20.z.boolean().optional()
656
+ advanced: import_zod21.z.boolean().optional()
630
657
  });
631
- var ScenarioAssertionLinkSchema = import_zod20.z.object({
658
+ var ScenarioAssertionLinkSchema = import_zod21.z.object({
632
659
  /** ID of the assertion (can be system assertion like 'system:skill_was_called' or custom assertion UUID) */
633
- assertionId: import_zod20.z.string(),
660
+ assertionId: import_zod21.z.string(),
634
661
  /** Parameter values for this assertion in this scenario */
635
- params: import_zod20.z.record(
636
- import_zod20.z.string(),
637
- import_zod20.z.union([import_zod20.z.string(), import_zod20.z.number(), import_zod20.z.boolean(), import_zod20.z.null()])
662
+ params: import_zod21.z.record(
663
+ import_zod21.z.string(),
664
+ import_zod21.z.union([import_zod21.z.string(), import_zod21.z.number(), import_zod21.z.boolean(), import_zod21.z.null()])
638
665
  ).optional()
639
666
  });
640
- var SkillWasCalledConfigSchema = import_zod20.z.object({
667
+ var SkillWasCalledConfigSchema = import_zod21.z.object({
641
668
  /** Name of the skill that must have been called */
642
- skillName: import_zod20.z.string().min(1)
669
+ skillName: import_zod21.z.string().min(1)
643
670
  });
644
- var BuildPassedConfigSchema = import_zod20.z.strictObject({
671
+ var BuildPassedConfigSchema = import_zod21.z.strictObject({
645
672
  /** Command to run (default: "yarn build") */
646
- command: import_zod20.z.string().optional(),
673
+ command: import_zod21.z.string().optional(),
647
674
  /** Expected exit code (default: 0) */
648
- expectedExitCode: import_zod20.z.number().int().optional()
675
+ expectedExitCode: import_zod21.z.number().int().optional()
649
676
  });
650
- var LlmJudgeConfigSchema = import_zod20.z.object({
677
+ var LlmJudgeConfigSchema = import_zod21.z.object({
651
678
  /**
652
679
  * Prompt template with placeholders:
653
680
  * - {{output}}: agent's final output
@@ -658,28 +685,28 @@ var LlmJudgeConfigSchema = import_zod20.z.object({
658
685
  * - {{trace}}: step-by-step trace of tool calls
659
686
  * - Custom parameters defined in the parameters array
660
687
  */
661
- prompt: import_zod20.z.string().min(1),
688
+ prompt: import_zod21.z.string().min(1),
662
689
  /** Optional system prompt for the judge */
663
- systemPrompt: import_zod20.z.string().optional(),
690
+ systemPrompt: import_zod21.z.string().optional(),
664
691
  /** Minimum score to pass (0-100, default 70) */
665
- minScore: import_zod20.z.number().int().min(0).max(100).optional(),
692
+ minScore: import_zod21.z.number().int().min(0).max(100).optional(),
666
693
  /** Model for the judge (e.g. claude-3-5-haiku-20241022) */
667
- model: import_zod20.z.string().optional(),
694
+ model: import_zod21.z.string().optional(),
668
695
  /** Max output tokens */
669
- maxTokens: import_zod20.z.number().int().optional(),
696
+ maxTokens: import_zod21.z.number().int().optional(),
670
697
  /** Temperature (0-1) */
671
- temperature: import_zod20.z.number().min(0).max(1).optional(),
698
+ temperature: import_zod21.z.number().min(0).max(1).optional(),
672
699
  /** User-defined parameters for this assertion */
673
- parameters: import_zod20.z.array(AssertionParameterSchema).optional()
700
+ parameters: import_zod21.z.array(AssertionParameterSchema).optional()
674
701
  });
675
- var AssertionConfigSchema = import_zod20.z.union([
702
+ var AssertionConfigSchema = import_zod21.z.union([
676
703
  LlmJudgeConfigSchema,
677
704
  // requires prompt - check first
678
705
  SkillWasCalledConfigSchema,
679
706
  // requires skillName
680
707
  BuildPassedConfigSchema,
681
708
  // all optional, uses strictObject to reject unknown keys
682
- import_zod20.z.object({})
709
+ import_zod21.z.object({})
683
710
  // fallback empty config
684
711
  ]);
685
712
  var CustomAssertionSchema = TenantEntitySchema.extend({
@@ -724,23 +751,23 @@ function getLlmJudgeConfig(assertion) {
724
751
  }
725
752
 
726
753
  // src/scenario/test-scenario.ts
727
- var ExpectedFileSchema = import_zod21.z.object({
754
+ var ExpectedFileSchema = import_zod22.z.object({
728
755
  /** Relative path where the file should be created */
729
- path: import_zod21.z.string(),
756
+ path: import_zod22.z.string(),
730
757
  /** Optional expected content */
731
- content: import_zod21.z.string().optional()
758
+ content: import_zod22.z.string().optional()
732
759
  });
733
760
  var TestScenarioSchema = TenantEntitySchema.extend({
734
761
  /** The prompt sent to the agent to trigger the task */
735
- triggerPrompt: import_zod21.z.string().min(10),
762
+ triggerPrompt: import_zod22.z.string().min(10),
736
763
  /** ID of the template to use for this scenario (null = no template) */
737
- templateId: import_zod21.z.string().nullish(),
764
+ templateId: import_zod22.z.string().nullish(),
738
765
  /** Inline assertions to evaluate for this scenario (legacy) */
739
- assertions: import_zod21.z.array(AssertionSchema).optional(),
766
+ assertions: import_zod22.z.array(AssertionSchema).optional(),
740
767
  /** IDs of saved assertions to evaluate (from assertions table) - legacy, use assertionLinks */
741
- assertionIds: import_zod21.z.array(import_zod21.z.string()).optional(),
768
+ assertionIds: import_zod22.z.array(import_zod22.z.string()).optional(),
742
769
  /** Linked assertions with per-scenario parameter values */
743
- assertionLinks: import_zod21.z.array(ScenarioAssertionLinkSchema).optional()
770
+ assertionLinks: import_zod22.z.array(ScenarioAssertionLinkSchema).optional()
744
771
  });
745
772
  var CreateTestScenarioInputSchema = TestScenarioSchema.omit({
746
773
  id: true,
@@ -751,10 +778,10 @@ var CreateTestScenarioInputSchema = TestScenarioSchema.omit({
751
778
  var UpdateTestScenarioInputSchema = CreateTestScenarioInputSchema.partial();
752
779
 
753
780
  // src/suite/test-suite.ts
754
- var import_zod22 = require("zod");
781
+ var import_zod23 = require("zod");
755
782
  var TestSuiteSchema = TenantEntitySchema.extend({
756
783
  /** IDs of test scenarios in this suite */
757
- scenarioIds: import_zod22.z.array(import_zod22.z.string())
784
+ scenarioIds: import_zod23.z.array(import_zod23.z.string())
758
785
  });
759
786
  var CreateTestSuiteInputSchema = TestSuiteSchema.omit({
760
787
  id: true,
@@ -765,21 +792,21 @@ var CreateTestSuiteInputSchema = TestSuiteSchema.omit({
765
792
  var UpdateTestSuiteInputSchema = CreateTestSuiteInputSchema.partial();
766
793
 
767
794
  // src/evaluation/metrics.ts
768
- var import_zod23 = require("zod");
769
- var TokenUsageSchema = import_zod23.z.object({
770
- prompt: import_zod23.z.number(),
771
- completion: import_zod23.z.number(),
772
- total: import_zod23.z.number()
773
- });
774
- var EvalMetricsSchema = import_zod23.z.object({
775
- totalAssertions: import_zod23.z.number(),
776
- passed: import_zod23.z.number(),
777
- failed: import_zod23.z.number(),
778
- skipped: import_zod23.z.number(),
779
- errors: import_zod23.z.number(),
780
- passRate: import_zod23.z.number(),
781
- avgDuration: import_zod23.z.number(),
782
- totalDuration: import_zod23.z.number()
795
+ var import_zod24 = require("zod");
796
+ var TokenUsageSchema = import_zod24.z.object({
797
+ prompt: import_zod24.z.number(),
798
+ completion: import_zod24.z.number(),
799
+ total: import_zod24.z.number()
800
+ });
801
+ var EvalMetricsSchema = import_zod24.z.object({
802
+ totalAssertions: import_zod24.z.number(),
803
+ passed: import_zod24.z.number(),
804
+ failed: import_zod24.z.number(),
805
+ skipped: import_zod24.z.number(),
806
+ errors: import_zod24.z.number(),
807
+ passRate: import_zod24.z.number(),
808
+ avgDuration: import_zod24.z.number(),
809
+ totalDuration: import_zod24.z.number()
783
810
  });
784
811
  var EvalStatus = /* @__PURE__ */ ((EvalStatus2) => {
785
812
  EvalStatus2["PENDING"] = "pending";
@@ -789,7 +816,7 @@ var EvalStatus = /* @__PURE__ */ ((EvalStatus2) => {
789
816
  EvalStatus2["CANCELLED"] = "cancelled";
790
817
  return EvalStatus2;
791
818
  })(EvalStatus || {});
792
- var EvalStatusSchema = import_zod23.z.enum(EvalStatus);
819
+ var EvalStatusSchema = import_zod24.z.enum(EvalStatus);
793
820
  var LLMStepType = /* @__PURE__ */ ((LLMStepType2) => {
794
821
  LLMStepType2["COMPLETION"] = "completion";
795
822
  LLMStepType2["TOOL_USE"] = "tool_use";
@@ -797,52 +824,52 @@ var LLMStepType = /* @__PURE__ */ ((LLMStepType2) => {
797
824
  LLMStepType2["THINKING"] = "thinking";
798
825
  return LLMStepType2;
799
826
  })(LLMStepType || {});
800
- var LLMTraceStepSchema = import_zod23.z.object({
801
- id: import_zod23.z.string(),
802
- stepNumber: import_zod23.z.number(),
803
- type: import_zod23.z.enum(LLMStepType),
804
- model: import_zod23.z.string(),
805
- provider: import_zod23.z.string(),
806
- startedAt: import_zod23.z.string(),
807
- durationMs: import_zod23.z.number(),
827
+ var LLMTraceStepSchema = import_zod24.z.object({
828
+ id: import_zod24.z.string(),
829
+ stepNumber: import_zod24.z.number(),
830
+ type: import_zod24.z.enum(LLMStepType),
831
+ model: import_zod24.z.string(),
832
+ provider: import_zod24.z.string(),
833
+ startedAt: import_zod24.z.string(),
834
+ durationMs: import_zod24.z.number(),
808
835
  tokenUsage: TokenUsageSchema,
809
- costUsd: import_zod23.z.number(),
810
- toolName: import_zod23.z.string().optional(),
811
- toolArguments: import_zod23.z.string().optional(),
812
- inputPreview: import_zod23.z.string().optional(),
813
- outputPreview: import_zod23.z.string().optional(),
814
- success: import_zod23.z.boolean(),
815
- error: import_zod23.z.string().optional()
816
- });
817
- var LLMBreakdownStatsSchema = import_zod23.z.object({
818
- count: import_zod23.z.number(),
819
- durationMs: import_zod23.z.number(),
820
- tokens: import_zod23.z.number(),
821
- costUsd: import_zod23.z.number()
822
- });
823
- var LLMTraceSummarySchema = import_zod23.z.object({
824
- totalSteps: import_zod23.z.number(),
825
- totalDurationMs: import_zod23.z.number(),
836
+ costUsd: import_zod24.z.number(),
837
+ toolName: import_zod24.z.string().optional(),
838
+ toolArguments: import_zod24.z.string().optional(),
839
+ inputPreview: import_zod24.z.string().optional(),
840
+ outputPreview: import_zod24.z.string().optional(),
841
+ success: import_zod24.z.boolean(),
842
+ error: import_zod24.z.string().optional()
843
+ });
844
+ var LLMBreakdownStatsSchema = import_zod24.z.object({
845
+ count: import_zod24.z.number(),
846
+ durationMs: import_zod24.z.number(),
847
+ tokens: import_zod24.z.number(),
848
+ costUsd: import_zod24.z.number()
849
+ });
850
+ var LLMTraceSummarySchema = import_zod24.z.object({
851
+ totalSteps: import_zod24.z.number(),
852
+ totalDurationMs: import_zod24.z.number(),
826
853
  totalTokens: TokenUsageSchema,
827
- totalCostUsd: import_zod23.z.number(),
828
- stepTypeBreakdown: import_zod23.z.record(import_zod23.z.string(), LLMBreakdownStatsSchema).optional(),
829
- modelBreakdown: import_zod23.z.record(import_zod23.z.string(), LLMBreakdownStatsSchema),
830
- modelsUsed: import_zod23.z.array(import_zod23.z.string())
831
- });
832
- var LLMTraceSchema = import_zod23.z.object({
833
- id: import_zod23.z.string(),
834
- steps: import_zod23.z.array(LLMTraceStepSchema),
854
+ totalCostUsd: import_zod24.z.number(),
855
+ stepTypeBreakdown: import_zod24.z.record(import_zod24.z.string(), LLMBreakdownStatsSchema).optional(),
856
+ modelBreakdown: import_zod24.z.record(import_zod24.z.string(), LLMBreakdownStatsSchema),
857
+ modelsUsed: import_zod24.z.array(import_zod24.z.string())
858
+ });
859
+ var LLMTraceSchema = import_zod24.z.object({
860
+ id: import_zod24.z.string(),
861
+ steps: import_zod24.z.array(LLMTraceStepSchema),
835
862
  summary: LLMTraceSummarySchema
836
863
  });
837
864
 
838
865
  // src/evaluation/eval-result.ts
839
- var import_zod26 = require("zod");
866
+ var import_zod27 = require("zod");
840
867
 
841
868
  // src/evaluation/eval-run.ts
842
- var import_zod25 = require("zod");
869
+ var import_zod26 = require("zod");
843
870
 
844
871
  // src/evaluation/live-trace.ts
845
- var import_zod24 = require("zod");
872
+ var import_zod25 = require("zod");
846
873
  var LiveTraceEventType = /* @__PURE__ */ ((LiveTraceEventType2) => {
847
874
  LiveTraceEventType2["THINKING"] = "thinking";
848
875
  LiveTraceEventType2["TOOL_USE"] = "tool_use";
@@ -856,37 +883,37 @@ var LiveTraceEventType = /* @__PURE__ */ ((LiveTraceEventType2) => {
856
883
  LiveTraceEventType2["USER"] = "user";
857
884
  return LiveTraceEventType2;
858
885
  })(LiveTraceEventType || {});
859
- var LiveTraceEventSchema = import_zod24.z.object({
886
+ var LiveTraceEventSchema = import_zod25.z.object({
860
887
  /** The evaluation run ID */
861
- evalRunId: import_zod24.z.string(),
888
+ evalRunId: import_zod25.z.string(),
862
889
  /** The scenario ID being executed */
863
- scenarioId: import_zod24.z.string(),
890
+ scenarioId: import_zod25.z.string(),
864
891
  /** The scenario name for display */
865
- scenarioName: import_zod24.z.string(),
892
+ scenarioName: import_zod25.z.string(),
866
893
  /** The target ID (skill, agent, etc.) */
867
- targetId: import_zod24.z.string(),
894
+ targetId: import_zod25.z.string(),
868
895
  /** The target name for display */
869
- targetName: import_zod24.z.string(),
896
+ targetName: import_zod25.z.string(),
870
897
  /** Step number in the current scenario execution */
871
- stepNumber: import_zod24.z.number(),
898
+ stepNumber: import_zod25.z.number(),
872
899
  /** Type of trace event */
873
- type: import_zod24.z.enum(LiveTraceEventType),
900
+ type: import_zod25.z.enum(LiveTraceEventType),
874
901
  /** Tool name if this is a tool_use event */
875
- toolName: import_zod24.z.string().optional(),
902
+ toolName: import_zod25.z.string().optional(),
876
903
  /** Tool arguments preview (truncated JSON) */
877
- toolArgs: import_zod24.z.string().optional(),
904
+ toolArgs: import_zod25.z.string().optional(),
878
905
  /** Output preview (truncated text) */
879
- outputPreview: import_zod24.z.string().optional(),
906
+ outputPreview: import_zod25.z.string().optional(),
880
907
  /** File path for file operations */
881
- filePath: import_zod24.z.string().optional(),
908
+ filePath: import_zod25.z.string().optional(),
882
909
  /** Elapsed time in milliseconds for progress events */
883
- elapsedMs: import_zod24.z.number().optional(),
910
+ elapsedMs: import_zod25.z.number().optional(),
884
911
  /** Thinking/reasoning text from Claude */
885
- thinking: import_zod24.z.string().optional(),
912
+ thinking: import_zod25.z.string().optional(),
886
913
  /** Timestamp when this event occurred */
887
- timestamp: import_zod24.z.string(),
914
+ timestamp: import_zod25.z.string(),
888
915
  /** Whether this is the final event for this scenario */
889
- isComplete: import_zod24.z.boolean()
916
+ isComplete: import_zod25.z.boolean()
890
917
  });
891
918
  var TRACE_EVENT_PREFIX = "TRACE_EVENT:";
892
919
  function parseTraceEventLine(line) {
@@ -914,14 +941,14 @@ var TriggerType = /* @__PURE__ */ ((TriggerType2) => {
914
941
  TriggerType2["MANUAL"] = "MANUAL";
915
942
  return TriggerType2;
916
943
  })(TriggerType || {});
917
- var TriggerMetadataSchema = import_zod25.z.object({
918
- version: import_zod25.z.string().optional(),
919
- resourceUpdated: import_zod25.z.array(import_zod25.z.string()).optional()
944
+ var TriggerMetadataSchema = import_zod26.z.object({
945
+ version: import_zod26.z.string().optional(),
946
+ resourceUpdated: import_zod26.z.array(import_zod26.z.string()).optional()
920
947
  });
921
- var TriggerSchema = import_zod25.z.object({
922
- id: import_zod25.z.string(),
948
+ var TriggerSchema = import_zod26.z.object({
949
+ id: import_zod26.z.string(),
923
950
  metadata: TriggerMetadataSchema.optional(),
924
- type: import_zod25.z.enum(TriggerType)
951
+ type: import_zod26.z.enum(TriggerType)
925
952
  });
926
953
  var FailureCategory = /* @__PURE__ */ ((FailureCategory2) => {
927
954
  FailureCategory2["MISSING_FILE"] = "missing_file";
@@ -939,28 +966,28 @@ var FailureSeverity = /* @__PURE__ */ ((FailureSeverity2) => {
939
966
  FailureSeverity2["LOW"] = "low";
940
967
  return FailureSeverity2;
941
968
  })(FailureSeverity || {});
942
- var DiffLineTypeSchema = import_zod25.z.enum(["added", "removed", "unchanged"]);
943
- var DiffLineSchema = import_zod25.z.object({
969
+ var DiffLineTypeSchema = import_zod26.z.enum(["added", "removed", "unchanged"]);
970
+ var DiffLineSchema = import_zod26.z.object({
944
971
  type: DiffLineTypeSchema,
945
- content: import_zod25.z.string(),
946
- lineNumber: import_zod25.z.number()
947
- });
948
- var DiffContentSchema = import_zod25.z.object({
949
- path: import_zod25.z.string(),
950
- expected: import_zod25.z.string(),
951
- actual: import_zod25.z.string(),
952
- diffLines: import_zod25.z.array(DiffLineSchema),
953
- renamedFrom: import_zod25.z.string().optional()
954
- });
955
- var CommandExecutionSchema = import_zod25.z.object({
956
- command: import_zod25.z.string(),
957
- exitCode: import_zod25.z.number(),
958
- output: import_zod25.z.string().optional(),
959
- duration: import_zod25.z.number()
960
- });
961
- var FileModificationSchema = import_zod25.z.object({
962
- path: import_zod25.z.string(),
963
- action: import_zod25.z.enum(["created", "modified", "deleted"])
972
+ content: import_zod26.z.string(),
973
+ lineNumber: import_zod26.z.number()
974
+ });
975
+ var DiffContentSchema = import_zod26.z.object({
976
+ path: import_zod26.z.string(),
977
+ expected: import_zod26.z.string(),
978
+ actual: import_zod26.z.string(),
979
+ diffLines: import_zod26.z.array(DiffLineSchema),
980
+ renamedFrom: import_zod26.z.string().optional()
981
+ });
982
+ var CommandExecutionSchema = import_zod26.z.object({
983
+ command: import_zod26.z.string(),
984
+ exitCode: import_zod26.z.number(),
985
+ output: import_zod26.z.string().optional(),
986
+ duration: import_zod26.z.number()
987
+ });
988
+ var FileModificationSchema = import_zod26.z.object({
989
+ path: import_zod26.z.string(),
990
+ action: import_zod26.z.enum(["created", "modified", "deleted"])
964
991
  });
965
992
  var TemplateFileStatus = /* @__PURE__ */ ((TemplateFileStatus2) => {
966
993
  TemplateFileStatus2["NEW"] = "new";
@@ -968,75 +995,79 @@ var TemplateFileStatus = /* @__PURE__ */ ((TemplateFileStatus2) => {
968
995
  TemplateFileStatus2["UNCHANGED"] = "unchanged";
969
996
  return TemplateFileStatus2;
970
997
  })(TemplateFileStatus || {});
971
- var TemplateFileSchema = import_zod25.z.object({
998
+ var TemplateFileSchema = import_zod26.z.object({
972
999
  /** Relative path within the template */
973
- path: import_zod25.z.string(),
1000
+ path: import_zod26.z.string(),
974
1001
  /** Full file content after execution */
975
- content: import_zod25.z.string(),
1002
+ content: import_zod26.z.string(),
976
1003
  /** File status (new, modified, unchanged) */
977
- status: import_zod25.z.enum(["new", "modified", "unchanged"])
978
- });
979
- var ApiCallSchema = import_zod25.z.object({
980
- endpoint: import_zod25.z.string(),
981
- tokensUsed: import_zod25.z.number(),
982
- duration: import_zod25.z.number()
983
- });
984
- var ExecutionTraceSchema = import_zod25.z.object({
985
- commands: import_zod25.z.array(CommandExecutionSchema),
986
- filesModified: import_zod25.z.array(FileModificationSchema),
987
- apiCalls: import_zod25.z.array(ApiCallSchema),
988
- totalDuration: import_zod25.z.number()
989
- });
990
- var FailureAnalysisSchema = import_zod25.z.object({
991
- category: import_zod25.z.enum(FailureCategory),
992
- severity: import_zod25.z.enum(FailureSeverity),
993
- summary: import_zod25.z.string(),
994
- details: import_zod25.z.string(),
995
- rootCause: import_zod25.z.string(),
996
- suggestedFix: import_zod25.z.string(),
997
- relatedAssertions: import_zod25.z.array(import_zod25.z.string()),
998
- codeSnippet: import_zod25.z.string().optional(),
999
- similarIssues: import_zod25.z.array(import_zod25.z.string()).optional(),
1000
- patternId: import_zod25.z.string().optional(),
1004
+ status: import_zod26.z.enum(["new", "modified", "unchanged"])
1005
+ });
1006
+ var ApiCallSchema = import_zod26.z.object({
1007
+ endpoint: import_zod26.z.string(),
1008
+ tokensUsed: import_zod26.z.number(),
1009
+ duration: import_zod26.z.number()
1010
+ });
1011
+ var ExecutionTraceSchema = import_zod26.z.object({
1012
+ commands: import_zod26.z.array(CommandExecutionSchema),
1013
+ filesModified: import_zod26.z.array(FileModificationSchema),
1014
+ apiCalls: import_zod26.z.array(ApiCallSchema),
1015
+ totalDuration: import_zod26.z.number()
1016
+ });
1017
+ var FailureAnalysisSchema = import_zod26.z.object({
1018
+ category: import_zod26.z.enum(FailureCategory),
1019
+ severity: import_zod26.z.enum(FailureSeverity),
1020
+ summary: import_zod26.z.string(),
1021
+ details: import_zod26.z.string(),
1022
+ rootCause: import_zod26.z.string(),
1023
+ suggestedFix: import_zod26.z.string(),
1024
+ relatedAssertions: import_zod26.z.array(import_zod26.z.string()),
1025
+ codeSnippet: import_zod26.z.string().optional(),
1026
+ similarIssues: import_zod26.z.array(import_zod26.z.string()).optional(),
1027
+ patternId: import_zod26.z.string().optional(),
1001
1028
  // Extended fields for detailed debugging
1002
1029
  diff: DiffContentSchema.optional(),
1003
1030
  executionTrace: ExecutionTraceSchema.optional()
1004
1031
  });
1005
1032
  var EvalRunSchema = TenantEntitySchema.extend({
1006
1033
  /** Agent ID for this run */
1007
- agentId: import_zod25.z.string().optional(),
1034
+ agentId: import_zod26.z.string().optional(),
1008
1035
  /** Skills group ID for this run */
1009
- skillsGroupId: import_zod25.z.string().optional(),
1036
+ skillsGroupId: import_zod26.z.string().optional(),
1010
1037
  /** Scenario IDs to run */
1011
- scenarioIds: import_zod25.z.array(import_zod25.z.string()),
1038
+ scenarioIds: import_zod26.z.array(import_zod26.z.string()),
1012
1039
  /** Current status */
1013
1040
  status: EvalStatusSchema,
1014
1041
  /** Progress percentage (0-100) */
1015
- progress: import_zod25.z.number(),
1042
+ progress: import_zod26.z.number(),
1016
1043
  /** Results for each scenario/target combination (lazy to break eval-result ↔ eval-run cycle) */
1017
- results: import_zod25.z.array(import_zod25.z.lazy(() => EvalRunResultSchema)),
1044
+ results: import_zod26.z.array(import_zod26.z.lazy(() => EvalRunResultSchema)),
1018
1045
  /** Aggregated metrics across all results */
1019
1046
  aggregateMetrics: EvalMetricsSchema,
1020
1047
  /** Failure analyses */
1021
- failureAnalyses: import_zod25.z.array(FailureAnalysisSchema).optional(),
1048
+ failureAnalyses: import_zod26.z.array(FailureAnalysisSchema).optional(),
1022
1049
  /** Aggregated LLM trace summary */
1023
1050
  llmTraceSummary: LLMTraceSummarySchema.optional(),
1024
1051
  /** What triggered this run */
1025
1052
  trigger: TriggerSchema.optional(),
1026
1053
  /** When the run started (set when evaluation is triggered) */
1027
- startedAt: import_zod25.z.string().optional(),
1054
+ startedAt: import_zod26.z.string().optional(),
1028
1055
  /** When the run completed */
1029
- completedAt: import_zod25.z.string().optional(),
1056
+ completedAt: import_zod26.z.string().optional(),
1030
1057
  /** Live trace events captured during execution (for playback on results page) */
1031
- liveTraceEvents: import_zod25.z.array(LiveTraceEventSchema).optional(),
1058
+ liveTraceEvents: import_zod26.z.array(LiveTraceEventSchema).optional(),
1032
1059
  /** Remote job ID for tracking execution in Dev Machines */
1033
- jobId: import_zod25.z.string().optional(),
1060
+ jobId: import_zod26.z.string().optional(),
1034
1061
  /** Remote job status from the Dev Machine API (PENDING, RUNNING, COMPLETED, FAILED, CANCELLED) */
1035
- jobStatus: import_zod25.z.string().optional(),
1062
+ jobStatus: import_zod26.z.string().optional(),
1036
1063
  /** Remote job error message if the job failed */
1037
- jobError: import_zod25.z.string().optional(),
1064
+ jobError: import_zod26.z.string().optional(),
1038
1065
  /** Timestamp of the last job status check */
1039
- jobStatusCheckedAt: import_zod25.z.string().optional()
1066
+ jobStatusCheckedAt: import_zod26.z.string().optional(),
1067
+ /** MCP server IDs to enable for this run (optional) */
1068
+ mcpIds: import_zod26.z.array(import_zod26.z.string()).optional(),
1069
+ /** Sub-agent IDs to enable for this run (optional) */
1070
+ subAgentIds: import_zod26.z.array(import_zod26.z.string()).optional()
1040
1071
  });
1041
1072
  var CreateEvalRunInputSchema = EvalRunSchema.omit({
1042
1073
  id: true,
@@ -1049,28 +1080,28 @@ var CreateEvalRunInputSchema = EvalRunSchema.omit({
1049
1080
  startedAt: true,
1050
1081
  completedAt: true
1051
1082
  });
1052
- var EvaluationProgressSchema = import_zod25.z.object({
1053
- runId: import_zod25.z.string(),
1054
- targetId: import_zod25.z.string(),
1055
- totalScenarios: import_zod25.z.number(),
1056
- completedScenarios: import_zod25.z.number(),
1057
- scenarioProgress: import_zod25.z.array(
1058
- import_zod25.z.object({
1059
- scenarioId: import_zod25.z.string(),
1060
- currentStep: import_zod25.z.string(),
1061
- error: import_zod25.z.string().optional()
1083
+ var EvaluationProgressSchema = import_zod26.z.object({
1084
+ runId: import_zod26.z.string(),
1085
+ targetId: import_zod26.z.string(),
1086
+ totalScenarios: import_zod26.z.number(),
1087
+ completedScenarios: import_zod26.z.number(),
1088
+ scenarioProgress: import_zod26.z.array(
1089
+ import_zod26.z.object({
1090
+ scenarioId: import_zod26.z.string(),
1091
+ currentStep: import_zod26.z.string(),
1092
+ error: import_zod26.z.string().optional()
1062
1093
  })
1063
1094
  ),
1064
- createdAt: import_zod25.z.number()
1095
+ createdAt: import_zod26.z.number()
1065
1096
  });
1066
- var EvaluationLogSchema = import_zod25.z.object({
1067
- runId: import_zod25.z.string(),
1068
- scenarioId: import_zod25.z.string(),
1069
- log: import_zod25.z.object({
1070
- level: import_zod25.z.enum(["info", "error", "debug"]),
1071
- message: import_zod25.z.string().optional(),
1072
- args: import_zod25.z.array(import_zod25.z.any()).optional(),
1073
- error: import_zod25.z.string().optional()
1097
+ var EvaluationLogSchema = import_zod26.z.object({
1098
+ runId: import_zod26.z.string(),
1099
+ scenarioId: import_zod26.z.string(),
1100
+ log: import_zod26.z.object({
1101
+ level: import_zod26.z.enum(["info", "error", "debug"]),
1102
+ message: import_zod26.z.string().optional(),
1103
+ args: import_zod26.z.array(import_zod26.z.any()).optional(),
1104
+ error: import_zod26.z.string().optional()
1074
1105
  })
1075
1106
  });
1076
1107
  var LLM_TIMEOUT = 12e4;
@@ -1083,91 +1114,91 @@ var AssertionResultStatus = /* @__PURE__ */ ((AssertionResultStatus2) => {
1083
1114
  AssertionResultStatus2["ERROR"] = "error";
1084
1115
  return AssertionResultStatus2;
1085
1116
  })(AssertionResultStatus || {});
1086
- var AssertionResultSchema = import_zod26.z.object({
1087
- id: import_zod26.z.string(),
1088
- assertionId: import_zod26.z.string(),
1089
- assertionType: import_zod26.z.string(),
1090
- assertionName: import_zod26.z.string(),
1091
- status: import_zod26.z.enum(AssertionResultStatus),
1092
- message: import_zod26.z.string().optional(),
1093
- expected: import_zod26.z.string().optional(),
1094
- actual: import_zod26.z.string().optional(),
1095
- duration: import_zod26.z.number().optional(),
1096
- details: import_zod26.z.record(import_zod26.z.string(), import_zod26.z.unknown()).optional(),
1097
- llmTraceSteps: import_zod26.z.array(LLMTraceStepSchema).optional()
1098
- });
1099
- var EvalRunResultSchema = import_zod26.z.object({
1100
- id: import_zod26.z.string(),
1101
- targetId: import_zod26.z.string(),
1102
- targetName: import_zod26.z.string().optional(),
1103
- scenarioId: import_zod26.z.string(),
1104
- scenarioName: import_zod26.z.string(),
1117
+ var AssertionResultSchema = import_zod27.z.object({
1118
+ id: import_zod27.z.string(),
1119
+ assertionId: import_zod27.z.string(),
1120
+ assertionType: import_zod27.z.string(),
1121
+ assertionName: import_zod27.z.string(),
1122
+ status: import_zod27.z.enum(AssertionResultStatus),
1123
+ message: import_zod27.z.string().optional(),
1124
+ expected: import_zod27.z.string().optional(),
1125
+ actual: import_zod27.z.string().optional(),
1126
+ duration: import_zod27.z.number().optional(),
1127
+ details: import_zod27.z.record(import_zod27.z.string(), import_zod27.z.unknown()).optional(),
1128
+ llmTraceSteps: import_zod27.z.array(LLMTraceStepSchema).optional()
1129
+ });
1130
+ var EvalRunResultSchema = import_zod27.z.object({
1131
+ id: import_zod27.z.string(),
1132
+ targetId: import_zod27.z.string(),
1133
+ targetName: import_zod27.z.string().optional(),
1134
+ scenarioId: import_zod27.z.string(),
1135
+ scenarioName: import_zod27.z.string(),
1105
1136
  modelConfig: ModelConfigSchema.optional(),
1106
- assertionResults: import_zod26.z.array(AssertionResultSchema),
1137
+ assertionResults: import_zod27.z.array(AssertionResultSchema),
1107
1138
  metrics: EvalMetricsSchema.optional(),
1108
- passed: import_zod26.z.number(),
1109
- failed: import_zod26.z.number(),
1110
- passRate: import_zod26.z.number(),
1111
- duration: import_zod26.z.number(),
1112
- outputText: import_zod26.z.string().optional(),
1113
- files: import_zod26.z.array(ExpectedFileSchema).optional(),
1114
- fileDiffs: import_zod26.z.array(DiffContentSchema).optional(),
1139
+ passed: import_zod27.z.number(),
1140
+ failed: import_zod27.z.number(),
1141
+ passRate: import_zod27.z.number(),
1142
+ duration: import_zod27.z.number(),
1143
+ outputText: import_zod27.z.string().optional(),
1144
+ files: import_zod27.z.array(ExpectedFileSchema).optional(),
1145
+ fileDiffs: import_zod27.z.array(DiffContentSchema).optional(),
1115
1146
  /** Full template files after execution with status indicators */
1116
- templateFiles: import_zod26.z.array(TemplateFileSchema).optional(),
1117
- startedAt: import_zod26.z.string().optional(),
1118
- completedAt: import_zod26.z.string().optional(),
1147
+ templateFiles: import_zod27.z.array(TemplateFileSchema).optional(),
1148
+ startedAt: import_zod27.z.string().optional(),
1149
+ completedAt: import_zod27.z.string().optional(),
1119
1150
  llmTrace: LLMTraceSchema.optional()
1120
1151
  });
1121
- var PromptResultSchema = import_zod26.z.object({
1122
- text: import_zod26.z.string(),
1123
- files: import_zod26.z.array(import_zod26.z.unknown()).optional(),
1124
- finishReason: import_zod26.z.string().optional(),
1125
- reasoning: import_zod26.z.string().optional(),
1126
- reasoningDetails: import_zod26.z.unknown().optional(),
1127
- toolCalls: import_zod26.z.array(import_zod26.z.unknown()).optional(),
1128
- toolResults: import_zod26.z.array(import_zod26.z.unknown()).optional(),
1129
- warnings: import_zod26.z.array(import_zod26.z.unknown()).optional(),
1130
- sources: import_zod26.z.array(import_zod26.z.unknown()).optional(),
1131
- steps: import_zod26.z.array(import_zod26.z.unknown()),
1132
- generationTimeMs: import_zod26.z.number(),
1133
- prompt: import_zod26.z.string(),
1134
- systemPrompt: import_zod26.z.string(),
1135
- usage: import_zod26.z.object({
1136
- totalTokens: import_zod26.z.number().optional(),
1137
- totalMicrocentsSpent: import_zod26.z.number().optional()
1152
+ var PromptResultSchema = import_zod27.z.object({
1153
+ text: import_zod27.z.string(),
1154
+ files: import_zod27.z.array(import_zod27.z.unknown()).optional(),
1155
+ finishReason: import_zod27.z.string().optional(),
1156
+ reasoning: import_zod27.z.string().optional(),
1157
+ reasoningDetails: import_zod27.z.unknown().optional(),
1158
+ toolCalls: import_zod27.z.array(import_zod27.z.unknown()).optional(),
1159
+ toolResults: import_zod27.z.array(import_zod27.z.unknown()).optional(),
1160
+ warnings: import_zod27.z.array(import_zod27.z.unknown()).optional(),
1161
+ sources: import_zod27.z.array(import_zod27.z.unknown()).optional(),
1162
+ steps: import_zod27.z.array(import_zod27.z.unknown()),
1163
+ generationTimeMs: import_zod27.z.number(),
1164
+ prompt: import_zod27.z.string(),
1165
+ systemPrompt: import_zod27.z.string(),
1166
+ usage: import_zod27.z.object({
1167
+ totalTokens: import_zod27.z.number().optional(),
1168
+ totalMicrocentsSpent: import_zod27.z.number().optional()
1138
1169
  })
1139
1170
  });
1140
- var EvaluationResultSchema = import_zod26.z.object({
1141
- id: import_zod26.z.string(),
1142
- runId: import_zod26.z.string(),
1143
- timestamp: import_zod26.z.number(),
1171
+ var EvaluationResultSchema = import_zod27.z.object({
1172
+ id: import_zod27.z.string(),
1173
+ runId: import_zod27.z.string(),
1174
+ timestamp: import_zod27.z.number(),
1144
1175
  promptResult: PromptResultSchema,
1145
- testResults: import_zod26.z.array(import_zod26.z.unknown()),
1146
- tags: import_zod26.z.array(import_zod26.z.string()).optional(),
1147
- feedback: import_zod26.z.string().optional(),
1148
- score: import_zod26.z.number(),
1149
- suiteId: import_zod26.z.string().optional()
1150
- });
1151
- var LeanEvaluationResultSchema = import_zod26.z.object({
1152
- id: import_zod26.z.string(),
1153
- runId: import_zod26.z.string(),
1154
- timestamp: import_zod26.z.number(),
1155
- tags: import_zod26.z.array(import_zod26.z.string()).optional(),
1156
- scenarioId: import_zod26.z.string(),
1157
- scenarioVersion: import_zod26.z.number().optional(),
1158
- targetId: import_zod26.z.string(),
1159
- targetVersion: import_zod26.z.number().optional(),
1160
- suiteId: import_zod26.z.string().optional(),
1161
- score: import_zod26.z.number(),
1162
- time: import_zod26.z.number().optional(),
1163
- microcentsSpent: import_zod26.z.number().optional()
1176
+ testResults: import_zod27.z.array(import_zod27.z.unknown()),
1177
+ tags: import_zod27.z.array(import_zod27.z.string()).optional(),
1178
+ feedback: import_zod27.z.string().optional(),
1179
+ score: import_zod27.z.number(),
1180
+ suiteId: import_zod27.z.string().optional()
1181
+ });
1182
+ var LeanEvaluationResultSchema = import_zod27.z.object({
1183
+ id: import_zod27.z.string(),
1184
+ runId: import_zod27.z.string(),
1185
+ timestamp: import_zod27.z.number(),
1186
+ tags: import_zod27.z.array(import_zod27.z.string()).optional(),
1187
+ scenarioId: import_zod27.z.string(),
1188
+ scenarioVersion: import_zod27.z.number().optional(),
1189
+ targetId: import_zod27.z.string(),
1190
+ targetVersion: import_zod27.z.number().optional(),
1191
+ suiteId: import_zod27.z.string().optional(),
1192
+ score: import_zod27.z.number(),
1193
+ time: import_zod27.z.number().optional(),
1194
+ microcentsSpent: import_zod27.z.number().optional()
1164
1195
  });
1165
1196
 
1166
1197
  // src/project/project.ts
1167
- var import_zod27 = require("zod");
1198
+ var import_zod28 = require("zod");
1168
1199
  var ProjectSchema = BaseEntitySchema.extend({
1169
- appId: import_zod27.z.string().optional().describe("The ID of the app in Dev Center"),
1170
- appSecret: import_zod27.z.string().optional().describe("The secret of the app in Dev Center")
1200
+ appId: import_zod28.z.string().optional().describe("The ID of the app in Dev Center"),
1201
+ appSecret: import_zod28.z.string().optional().describe("The secret of the app in Dev Center")
1171
1202
  });
1172
1203
  var CreateProjectInputSchema = ProjectSchema.omit({
1173
1204
  id: true,
@@ -1178,10 +1209,10 @@ var CreateProjectInputSchema = ProjectSchema.omit({
1178
1209
  var UpdateProjectInputSchema = CreateProjectInputSchema.partial();
1179
1210
 
1180
1211
  // src/template/template.ts
1181
- var import_zod28 = require("zod");
1212
+ var import_zod29 = require("zod");
1182
1213
  var TemplateSchema = TenantEntitySchema.extend({
1183
1214
  /** URL to download the template from */
1184
- downloadUrl: import_zod28.z.url()
1215
+ downloadUrl: import_zod29.z.url()
1185
1216
  });
1186
1217
  var CreateTemplateInputSchema = TemplateSchema.omit({
1187
1218
  id: true,
@@ -1317,9 +1348,11 @@ function getSystemAssertion(id) {
1317
1348
  CreateAgentInputSchema,
1318
1349
  CreateCustomAssertionInputSchema,
1319
1350
  CreateEvalRunInputSchema,
1351
+ CreateMcpInputSchema,
1320
1352
  CreateProjectInputSchema,
1321
1353
  CreateSkillInputSchema,
1322
1354
  CreateSkillsGroupInputSchema,
1355
+ CreateSubAgentInputSchema,
1323
1356
  CreateTemplateInputSchema,
1324
1357
  CreateTestScenarioInputSchema,
1325
1358
  CreateTestSuiteInputSchema,
@@ -1358,7 +1391,9 @@ function getSystemAssertion(id) {
1358
1391
  LlmJudgeAssertionSchema,
1359
1392
  LlmJudgeConfigSchema,
1360
1393
  LocalProjectConfigSchema,
1394
+ MCPEntitySchema,
1361
1395
  MCPServerConfigSchema,
1396
+ MCP_SERVERS_JSON_KEY,
1362
1397
  MetaSiteConfigSchema,
1363
1398
  ModelConfigSchema,
1364
1399
  ModelIds,
@@ -1379,6 +1414,7 @@ function getSystemAssertion(id) {
1379
1414
  SkillWasCalledAssertionSchema,
1380
1415
  SkillWasCalledConfigSchema,
1381
1416
  SkillsGroupSchema,
1417
+ SubAgentSchema,
1382
1418
  TRACE_EVENT_PREFIX,
1383
1419
  TargetSchema,
1384
1420
  TemplateFileSchema,
@@ -1399,9 +1435,11 @@ function getSystemAssertion(id) {
1399
1435
  TriggerType,
1400
1436
  UpdateAgentInputSchema,
1401
1437
  UpdateCustomAssertionInputSchema,
1438
+ UpdateMcpInputSchema,
1402
1439
  UpdateProjectInputSchema,
1403
1440
  UpdateSkillInputSchema,
1404
1441
  UpdateSkillsGroupInputSchema,
1442
+ UpdateSubAgentInputSchema,
1405
1443
  UpdateTemplateInputSchema,
1406
1444
  UpdateTestScenarioInputSchema,
1407
1445
  UpdateTestSuiteInputSchema,