@wix/evalforge-types 0.74.0 → 0.75.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/build/index.mjs CHANGED
@@ -349,10 +349,14 @@ var PresetSchema = TenantEntitySchema.extend({
349
349
  /** Sub-agent IDs included in this preset */
350
350
  subAgentIds: z9.array(z9.string()).default([]),
351
351
  /** Rule IDs included in this preset */
352
- ruleIds: z9.array(z9.string()).default([])
353
- });
354
- var atLeastOneEntity = (data) => (data.skillIds?.length ?? 0) > 0 || (data.mcpIds?.length ?? 0) > 0 || (data.subAgentIds?.length ?? 0) > 0 || (data.ruleIds?.length ?? 0) > 0;
355
- var AT_LEAST_ONE_ENTITY_MESSAGE = "At least one of skillIds, mcpIds, subAgentIds, or ruleIds must be non-empty";
352
+ ruleIds: z9.array(z9.string()).default([]),
353
+ /** Unified capability IDs (replaces skill/mcp/subAgent/ruleIds) */
354
+ capabilityIds: z9.array(z9.string()).optional(),
355
+ /** Map of capabilityId to capabilityVersionId for version pinning */
356
+ capabilityVersions: z9.record(z9.string(), z9.string()).optional()
357
+ });
358
+ var atLeastOneEntity = (data) => (data.capabilityIds?.length ?? 0) > 0 || (data.skillIds?.length ?? 0) > 0 || (data.mcpIds?.length ?? 0) > 0 || (data.subAgentIds?.length ?? 0) > 0 || (data.ruleIds?.length ?? 0) > 0;
359
+ var AT_LEAST_ONE_ENTITY_MESSAGE = "At least one of capabilityIds, skillIds, mcpIds, subAgentIds, or ruleIds must be non-empty";
356
360
  var CreatePresetInputSchema = PresetSchema.omit({
357
361
  id: true,
358
362
  createdAt: true,
@@ -366,11 +370,179 @@ var UpdatePresetInputSchema = PresetSchema.omit({
366
370
  deleted: true
367
371
  }).partial();
368
372
 
373
+ // src/target/capability.ts
374
+ import { z as z10 } from "zod";
375
+ var CapabilityTypeSchema = z10.enum([
376
+ "SKILL",
377
+ "SUB_AGENT",
378
+ "RULE",
379
+ "MCP"
380
+ ]);
381
+ var CAPABILITY_NAME_REGEX = /^[a-z0-9]+(-[a-z0-9]+)*$/;
382
+ function isValidCapabilityName(name) {
383
+ return typeof name === "string" && name.length > 0 && CAPABILITY_NAME_REGEX.test(name);
384
+ }
385
+ var KEBAB_CASE_MESSAGE2 = "Name must be in kebab-case (lowercase letters, numbers, hyphens only, e.g. my-capability)";
386
+ var CapabilityContentSchema = z10.record(z10.string(), z10.unknown());
387
+ var CapabilityVersionOriginSchema = z10.enum(["manual", "pr", "master"]);
388
+ var CapabilitySchema = TenantEntitySchema.extend({
389
+ capabilityType: CapabilityTypeSchema,
390
+ source: GitHubSourceSchema.optional()
391
+ });
392
+ var CapabilityVersionSchema = z10.object({
393
+ id: z10.string(),
394
+ projectId: z10.string(),
395
+ capabilityId: z10.string(),
396
+ version: z10.string(),
397
+ origin: CapabilityVersionOriginSchema,
398
+ source: GitHubSourceSchema.optional(),
399
+ content: CapabilityContentSchema.optional(),
400
+ notes: z10.string().optional(),
401
+ createdAt: z10.string()
402
+ });
403
+ var CapabilityWithLatestVersionSchema = CapabilitySchema.extend({
404
+ latestVersion: CapabilityVersionSchema.optional()
405
+ });
406
+ var CapabilityInputBaseSchema = CapabilitySchema.omit({
407
+ id: true,
408
+ createdAt: true,
409
+ updatedAt: true,
410
+ deleted: true,
411
+ description: true,
412
+ source: true
413
+ }).extend({
414
+ description: z10.string().optional(),
415
+ source: GitHubSourceSchema.optional()
416
+ });
417
+ var InitialCapabilityVersionInputSchema = z10.object({
418
+ content: CapabilityContentSchema.optional(),
419
+ notes: z10.string().optional(),
420
+ source: GitHubSourceSchema.optional(),
421
+ version: z10.string().optional(),
422
+ origin: CapabilityVersionOriginSchema.optional()
423
+ });
424
+ var CreateCapabilityInputSchema = CapabilityInputBaseSchema.extend({
425
+ initialVersion: InitialCapabilityVersionInputSchema.optional()
426
+ }).refine((data) => isValidCapabilityName(data.name), {
427
+ message: KEBAB_CASE_MESSAGE2,
428
+ path: ["name"]
429
+ });
430
+ var UpdateCapabilityInputSchema = CapabilityInputBaseSchema.omit({
431
+ capabilityType: true
432
+ }).partial().refine(
433
+ (data) => data.name === void 0 || isValidCapabilityName(data.name),
434
+ { message: KEBAB_CASE_MESSAGE2, path: ["name"] }
435
+ );
436
+ var CreateCapabilityVersionInputSchema = z10.object({
437
+ source: GitHubSourceSchema.optional(),
438
+ version: z10.string().min(1),
439
+ notes: z10.string().optional(),
440
+ origin: CapabilityVersionOriginSchema.optional(),
441
+ content: CapabilityContentSchema.optional()
442
+ });
443
+
444
+ // src/target/capability-converters.ts
445
+ function capabilityToSkill(cap) {
446
+ return {
447
+ id: cap.id,
448
+ projectId: cap.projectId,
449
+ name: cap.name,
450
+ description: cap.description,
451
+ source: cap.source,
452
+ createdAt: cap.createdAt,
453
+ updatedAt: cap.updatedAt,
454
+ deleted: cap.deleted
455
+ };
456
+ }
457
+ function capabilityVersionToSkillVersion(cv) {
458
+ const content = cv.content;
459
+ return {
460
+ id: cv.id,
461
+ projectId: cv.projectId,
462
+ skillId: cv.capabilityId,
463
+ version: cv.version,
464
+ origin: cv.origin,
465
+ source: cv.source,
466
+ files: content?.files,
467
+ notes: cv.notes,
468
+ createdAt: cv.createdAt
469
+ };
470
+ }
471
+ function capabilityToSkillWithLatestVersion(cap) {
472
+ const skill = capabilityToSkill(cap);
473
+ const latestVersion = cap.latestVersion ? capabilityVersionToSkillVersion(cap.latestVersion) : void 0;
474
+ return { ...skill, latestVersion };
475
+ }
476
+ function capabilityToSubAgent(cap) {
477
+ const content = cap.latestVersion?.content;
478
+ return {
479
+ id: cap.id,
480
+ projectId: cap.projectId,
481
+ name: cap.name,
482
+ description: cap.description,
483
+ subAgentMd: content?.subAgentMd ?? "",
484
+ source: cap.source,
485
+ createdAt: cap.createdAt,
486
+ updatedAt: cap.updatedAt,
487
+ deleted: cap.deleted
488
+ };
489
+ }
490
+ function capabilityToRule(cap) {
491
+ const content = cap.latestVersion?.content;
492
+ return {
493
+ id: cap.id,
494
+ projectId: cap.projectId,
495
+ name: cap.name,
496
+ description: cap.description,
497
+ ruleType: content?.ruleType ?? "claude-md",
498
+ content: content?.content ?? "",
499
+ createdAt: cap.createdAt,
500
+ updatedAt: cap.updatedAt,
501
+ deleted: cap.deleted
502
+ };
503
+ }
504
+ function capabilityToMcp(cap) {
505
+ const content = cap.latestVersion?.content;
506
+ return {
507
+ id: cap.id,
508
+ projectId: cap.projectId,
509
+ name: cap.name,
510
+ description: cap.description,
511
+ config: content?.config ?? {},
512
+ createdAt: cap.createdAt,
513
+ updatedAt: cap.updatedAt,
514
+ deleted: cap.deleted
515
+ };
516
+ }
517
+ function groupCapabilitiesByType(capabilities) {
518
+ const skills = [];
519
+ const subAgents = [];
520
+ const rules = [];
521
+ const mcps = [];
522
+ for (const cap of capabilities) {
523
+ switch (cap.capabilityType) {
524
+ case "SKILL":
525
+ skills.push(capabilityToSkillWithLatestVersion(cap));
526
+ break;
527
+ case "SUB_AGENT":
528
+ subAgents.push(capabilityToSubAgent(cap));
529
+ break;
530
+ case "RULE":
531
+ rules.push(capabilityToRule(cap));
532
+ break;
533
+ case "MCP":
534
+ mcps.push(capabilityToMcp(cap));
535
+ break;
536
+ }
537
+ }
538
+ return { skills, subAgents, rules, mcps };
539
+ }
540
+
369
541
  // src/test/index.ts
370
- import { z as z20 } from "zod";
542
+ import { z as z21 } from "zod";
371
543
 
372
544
  // src/test/base.ts
373
- import { z as z10 } from "zod";
545
+ import { z as z11 } from "zod";
374
546
  var TestType = /* @__PURE__ */ ((TestType2) => {
375
547
  TestType2["LLM"] = "LLM";
376
548
  TestType2["TOOL"] = "TOOL";
@@ -383,7 +555,7 @@ var TestType = /* @__PURE__ */ ((TestType2) => {
383
555
  TestType2["PLAYWRIGHT_NL"] = "PLAYWRIGHT_NL";
384
556
  return TestType2;
385
557
  })(TestType || {});
386
- var TestTypeSchema = z10.enum(TestType);
558
+ var TestTypeSchema = z11.enum(TestType);
387
559
  var TestImportance = /* @__PURE__ */ ((TestImportance2) => {
388
560
  TestImportance2["LOW"] = "low";
389
561
  TestImportance2["MEDIUM"] = "medium";
@@ -391,153 +563,153 @@ var TestImportance = /* @__PURE__ */ ((TestImportance2) => {
391
563
  TestImportance2["CRITICAL"] = "critical";
392
564
  return TestImportance2;
393
565
  })(TestImportance || {});
394
- var TestImportanceSchema = z10.enum(TestImportance);
395
- var BaseTestSchema = z10.object({
396
- id: z10.string(),
566
+ var TestImportanceSchema = z11.enum(TestImportance);
567
+ var BaseTestSchema = z11.object({
568
+ id: z11.string(),
397
569
  type: TestTypeSchema,
398
- name: z10.string().min(3),
399
- description: z10.string().optional(),
570
+ name: z11.string().min(3),
571
+ description: z11.string().optional(),
400
572
  importance: TestImportanceSchema.optional()
401
573
  });
402
574
 
403
575
  // src/test/llm.ts
404
- import { z as z11 } from "zod";
576
+ import { z as z12 } from "zod";
405
577
  var LLMTestSchema = BaseTestSchema.extend({
406
- type: z11.literal("LLM" /* LLM */),
578
+ type: z12.literal("LLM" /* LLM */),
407
579
  /** Maximum steps for the LLM to take */
408
- maxSteps: z11.number().min(1).max(100),
580
+ maxSteps: z12.number().min(1).max(100),
409
581
  /** Prompt to send to the evaluator */
410
- prompt: z11.string().min(1),
582
+ prompt: z12.string().min(1),
411
583
  /** ID of the evaluator agent to use */
412
- evaluatorId: z11.string()
584
+ evaluatorId: z12.string()
413
585
  });
414
586
 
415
587
  // src/test/tool.ts
416
- import { z as z12 } from "zod";
588
+ import { z as z13 } from "zod";
417
589
  var ToolTestSchema = BaseTestSchema.extend({
418
- type: z12.literal("TOOL" /* TOOL */),
590
+ type: z13.literal("TOOL" /* TOOL */),
419
591
  /** Name of the tool that should be called */
420
- toolName: z12.string().min(3),
592
+ toolName: z13.string().min(3),
421
593
  /** Expected arguments for the tool call */
422
- args: z12.record(z12.string(), z12.any()),
594
+ args: z13.record(z13.string(), z13.any()),
423
595
  /** Expected content in the tool results */
424
- resultsContent: z12.string()
596
+ resultsContent: z13.string()
425
597
  });
426
598
 
427
599
  // src/test/site-config.ts
428
- import { z as z13 } from "zod";
600
+ import { z as z14 } from "zod";
429
601
  var SiteConfigTestSchema = BaseTestSchema.extend({
430
- type: z13.literal("SITE_CONFIG" /* SITE_CONFIG */),
602
+ type: z14.literal("SITE_CONFIG" /* SITE_CONFIG */),
431
603
  /** URL to call */
432
- url: z13.string().url(),
604
+ url: z14.string().url(),
433
605
  /** HTTP method */
434
- method: z13.enum(["GET", "POST"]),
606
+ method: z14.enum(["GET", "POST"]),
435
607
  /** Request body (for POST) */
436
- body: z13.string().optional(),
608
+ body: z14.string().optional(),
437
609
  /** Expected HTTP status code */
438
- expectedStatusCode: z13.number().int().min(100).max(599),
610
+ expectedStatusCode: z14.number().int().min(100).max(599),
439
611
  /** Expected response content */
440
- expectedResponse: z13.string().optional(),
612
+ expectedResponse: z14.string().optional(),
441
613
  /** JMESPath expression to extract from response */
442
- expectedResponseJMESPath: z13.string().optional()
614
+ expectedResponseJMESPath: z14.string().optional()
443
615
  });
444
616
 
445
617
  // src/test/command-execution.ts
446
- import { z as z14 } from "zod";
618
+ import { z as z15 } from "zod";
447
619
  var AllowedCommands = [
448
620
  "yarn install --no-immutable && yarn build",
449
621
  "npm run build",
450
622
  "yarn typecheck"
451
623
  ];
452
624
  var CommandExecutionTestSchema = BaseTestSchema.extend({
453
- type: z14.literal("COMMAND_EXECUTION" /* COMMAND_EXECUTION */),
625
+ type: z15.literal("COMMAND_EXECUTION" /* COMMAND_EXECUTION */),
454
626
  /** Command to execute (must be in AllowedCommands) */
455
- command: z14.string().refine((value) => AllowedCommands.includes(value), {
627
+ command: z15.string().refine((value) => AllowedCommands.includes(value), {
456
628
  message: `Command must be one of: ${AllowedCommands.join(", ")}`
457
629
  }),
458
630
  /** Expected exit code (default: 0) */
459
- expectedExitCode: z14.number().default(0).optional()
631
+ expectedExitCode: z15.number().default(0).optional()
460
632
  });
461
633
 
462
634
  // src/test/file-presence.ts
463
- import { z as z15 } from "zod";
635
+ import { z as z16 } from "zod";
464
636
  var FilePresenceTestSchema = BaseTestSchema.extend({
465
- type: z15.literal("FILE_PRESENCE" /* FILE_PRESENCE */),
637
+ type: z16.literal("FILE_PRESENCE" /* FILE_PRESENCE */),
466
638
  /** Paths to check */
467
- paths: z15.array(z15.string()),
639
+ paths: z16.array(z16.string()),
468
640
  /** Whether files should exist (true) or not exist (false) */
469
- shouldExist: z15.boolean()
641
+ shouldExist: z16.boolean()
470
642
  });
471
643
 
472
644
  // src/test/file-content.ts
473
- import { z as z16 } from "zod";
474
- var FileContentCheckSchema = z16.object({
645
+ import { z as z17 } from "zod";
646
+ var FileContentCheckSchema = z17.object({
475
647
  /** Strings that must be present in the file */
476
- contains: z16.array(z16.string()).optional(),
648
+ contains: z17.array(z17.string()).optional(),
477
649
  /** Strings that must NOT be present in the file */
478
- notContains: z16.array(z16.string()).optional(),
650
+ notContains: z17.array(z17.string()).optional(),
479
651
  /** Regex pattern the content must match */
480
- matches: z16.string().optional(),
652
+ matches: z17.string().optional(),
481
653
  /** JSON path checks for structured content */
482
- jsonPath: z16.array(
483
- z16.object({
484
- path: z16.string(),
485
- value: z16.unknown()
654
+ jsonPath: z17.array(
655
+ z17.object({
656
+ path: z17.string(),
657
+ value: z17.unknown()
486
658
  })
487
659
  ).optional(),
488
660
  /** Lines that should be added (for diff checking) */
489
- added: z16.array(z16.string()).optional(),
661
+ added: z17.array(z17.string()).optional(),
490
662
  /** Lines that should be removed (for diff checking) */
491
- removed: z16.array(z16.string()).optional()
663
+ removed: z17.array(z17.string()).optional()
492
664
  });
493
665
  var FileContentTestSchema = BaseTestSchema.extend({
494
- type: z16.literal("FILE_CONTENT" /* FILE_CONTENT */),
666
+ type: z17.literal("FILE_CONTENT" /* FILE_CONTENT */),
495
667
  /** Path to the file to check */
496
- path: z16.string(),
668
+ path: z17.string(),
497
669
  /** Content checks to perform */
498
670
  checks: FileContentCheckSchema
499
671
  });
500
672
 
501
673
  // src/test/build-check.ts
502
- import { z as z17 } from "zod";
674
+ import { z as z18 } from "zod";
503
675
  var BuildCheckTestSchema = BaseTestSchema.extend({
504
- type: z17.literal("BUILD_CHECK" /* BUILD_CHECK */),
676
+ type: z18.literal("BUILD_CHECK" /* BUILD_CHECK */),
505
677
  /** Build command to execute */
506
- command: z17.string(),
678
+ command: z18.string(),
507
679
  /** Whether the build should succeed */
508
- expectSuccess: z17.boolean(),
680
+ expectSuccess: z18.boolean(),
509
681
  /** Maximum allowed warnings (optional) */
510
- allowedWarnings: z17.number().optional(),
682
+ allowedWarnings: z18.number().optional(),
511
683
  /** Timeout in milliseconds */
512
- timeout: z17.number().optional()
684
+ timeout: z18.number().optional()
513
685
  });
514
686
 
515
687
  // src/test/vitest.ts
516
- import { z as z18 } from "zod";
688
+ import { z as z19 } from "zod";
517
689
  var VitestTestSchema = BaseTestSchema.extend({
518
- type: z18.literal("VITEST" /* VITEST */),
690
+ type: z19.literal("VITEST" /* VITEST */),
519
691
  /** Test file content */
520
- testFile: z18.string(),
692
+ testFile: z19.string(),
521
693
  /** Name of the test file */
522
- testFileName: z18.string(),
694
+ testFileName: z19.string(),
523
695
  /** Minimum pass rate required (0-100) */
524
- minPassRate: z18.number().min(0).max(100)
696
+ minPassRate: z19.number().min(0).max(100)
525
697
  });
526
698
 
527
699
  // src/test/playwright-nl.ts
528
- import { z as z19 } from "zod";
700
+ import { z as z20 } from "zod";
529
701
  var PlaywrightNLTestSchema = BaseTestSchema.extend({
530
- type: z19.literal("PLAYWRIGHT_NL" /* PLAYWRIGHT_NL */),
702
+ type: z20.literal("PLAYWRIGHT_NL" /* PLAYWRIGHT_NL */),
531
703
  /** Natural language steps to execute */
532
- steps: z19.array(z19.string()),
704
+ steps: z20.array(z20.string()),
533
705
  /** Expected outcome description */
534
- expectedOutcome: z19.string(),
706
+ expectedOutcome: z20.string(),
535
707
  /** Timeout in milliseconds */
536
- timeout: z19.number().optional()
708
+ timeout: z20.number().optional()
537
709
  });
538
710
 
539
711
  // src/test/index.ts
540
- var TestSchema = z20.discriminatedUnion("type", [
712
+ var TestSchema = z21.discriminatedUnion("type", [
541
713
  LLMTestSchema,
542
714
  ToolTestSchema,
543
715
  SiteConfigTestSchema,
@@ -550,33 +722,33 @@ var TestSchema = z20.discriminatedUnion("type", [
550
722
  ]);
551
723
 
552
724
  // src/scenario/environment.ts
553
- import { z as z21 } from "zod";
554
- var LocalProjectConfigSchema = z21.object({
725
+ import { z as z22 } from "zod";
726
+ var LocalProjectConfigSchema = z22.object({
555
727
  /** Template ID to use for the local project */
556
- templateId: z21.string().optional(),
728
+ templateId: z22.string().optional(),
557
729
  /** Files to create in the project */
558
- files: z21.array(
559
- z21.object({
560
- path: z21.string().min(1),
561
- content: z21.string().min(1)
730
+ files: z22.array(
731
+ z22.object({
732
+ path: z22.string().min(1),
733
+ content: z22.string().min(1)
562
734
  })
563
735
  ).optional()
564
736
  });
565
- var MetaSiteConfigSchema = z21.object({
566
- configurations: z21.array(
567
- z21.object({
568
- name: z21.string().min(1),
569
- apiCalls: z21.array(
570
- z21.object({
571
- url: z21.string().url(),
572
- method: z21.enum(["POST", "PUT"]),
573
- body: z21.string()
737
+ var MetaSiteConfigSchema = z22.object({
738
+ configurations: z22.array(
739
+ z22.object({
740
+ name: z22.string().min(1),
741
+ apiCalls: z22.array(
742
+ z22.object({
743
+ url: z22.string().url(),
744
+ method: z22.enum(["POST", "PUT"]),
745
+ body: z22.string()
574
746
  })
575
747
  )
576
748
  })
577
749
  ).optional()
578
750
  });
579
- var EnvironmentSchema = z21.object({
751
+ var EnvironmentSchema = z22.object({
580
752
  /** Local project configuration */
581
753
  localProject: LocalProjectConfigSchema.optional(),
582
754
  /** Meta site configuration */
@@ -584,13 +756,13 @@ var EnvironmentSchema = z21.object({
584
756
  });
585
757
 
586
758
  // src/scenario/test-scenario.ts
587
- import { z as z24 } from "zod";
759
+ import { z as z25 } from "zod";
588
760
 
589
761
  // src/assertion/assertion.ts
590
- import { z as z23 } from "zod";
762
+ import { z as z24 } from "zod";
591
763
 
592
764
  // src/assertion/build-passed-command.ts
593
- import { z as z22 } from "zod";
765
+ import { z as z23 } from "zod";
594
766
  var ALLOWED_BUILD_COMMANDS = [
595
767
  "yarn build",
596
768
  "npm run build",
@@ -616,10 +788,10 @@ function parseBuildCommandToArgv(command) {
616
788
  return BUILD_COMMAND_ARGV[trimmed];
617
789
  }
618
790
  var enumTuple = ALLOWED_BUILD_COMMANDS;
619
- var BuildPassedCommandStringSchema = z22.enum(enumTuple);
791
+ var BuildPassedCommandStringSchema = z23.enum(enumTuple);
620
792
 
621
793
  // src/assertion/assertion.ts
622
- var AssertionTypeSchema = z23.enum([
794
+ var AssertionTypeSchema = z24.enum([
623
795
  "skill_was_called",
624
796
  "tool_called_with_param",
625
797
  "build_passed",
@@ -628,61 +800,61 @@ var AssertionTypeSchema = z23.enum([
628
800
  "llm_judge",
629
801
  "api_call"
630
802
  ]);
631
- var AssertionParameterTypeSchema = z23.enum([
803
+ var AssertionParameterTypeSchema = z24.enum([
632
804
  "string",
633
805
  "number",
634
806
  "boolean"
635
807
  ]);
636
- var AssertionParameterSchema = z23.object({
808
+ var AssertionParameterSchema = z24.object({
637
809
  /** Parameter name (used as key in params object) */
638
- name: z23.string().min(1),
810
+ name: z24.string().min(1),
639
811
  /** Display label for the parameter */
640
- label: z23.string().min(1),
812
+ label: z24.string().min(1),
641
813
  /** Parameter type */
642
814
  type: AssertionParameterTypeSchema,
643
815
  /** Whether this parameter is required */
644
- required: z23.boolean(),
816
+ required: z24.boolean(),
645
817
  /** Default value (optional, used when not provided) */
646
- defaultValue: z23.union([z23.string(), z23.number(), z23.boolean()]).optional(),
818
+ defaultValue: z24.union([z24.string(), z24.number(), z24.boolean()]).optional(),
647
819
  /** If true, parameter is hidden by default behind "Show advanced options" */
648
- advanced: z23.boolean().optional()
820
+ advanced: z24.boolean().optional()
649
821
  });
650
- var ScenarioAssertionLinkSchema = z23.object({
822
+ var ScenarioAssertionLinkSchema = z24.object({
651
823
  /** ID of the system assertion (e.g., 'system:skill_was_called') */
652
- assertionId: z23.string(),
824
+ assertionId: z24.string(),
653
825
  /** Parameter values for this assertion in this scenario */
654
- params: z23.record(
655
- z23.string(),
656
- z23.union([z23.string(), z23.number(), z23.boolean(), z23.null()])
826
+ params: z24.record(
827
+ z24.string(),
828
+ z24.union([z24.string(), z24.number(), z24.boolean(), z24.null()])
657
829
  ).optional()
658
830
  });
659
- var SkillWasCalledConfigSchema = z23.object({
831
+ var SkillWasCalledConfigSchema = z24.object({
660
832
  /** Names of the skills that must have been called */
661
- skillNames: z23.array(z23.string().min(1)).min(1)
833
+ skillNames: z24.array(z24.string().min(1)).min(1)
662
834
  });
663
- var CostConfigSchema = z23.strictObject({
835
+ var CostConfigSchema = z24.strictObject({
664
836
  /** Maximum allowed cost in USD */
665
- maxCostUsd: z23.number().positive()
837
+ maxCostUsd: z24.number().positive()
666
838
  });
667
- var ToolCalledWithParamConfigSchema = z23.strictObject({
839
+ var ToolCalledWithParamConfigSchema = z24.strictObject({
668
840
  /** Name of the tool that must have been called */
669
- toolName: z23.string().min(1),
841
+ toolName: z24.string().min(1),
670
842
  /** JSON string of key-value pairs for expected parameters (substring match). Optional — when omitted, only checks tool presence. */
671
- expectedParams: z23.string().min(1).optional(),
843
+ expectedParams: z24.string().min(1).optional(),
672
844
  /** If true, the matching tool call must also have succeeded (step.success === true) */
673
- requireSuccess: z23.boolean().optional()
845
+ requireSuccess: z24.boolean().optional()
674
846
  });
675
- var BuildPassedConfigSchema = z23.strictObject({
847
+ var BuildPassedConfigSchema = z24.strictObject({
676
848
  /** Allowlisted command only (default at runtime: "yarn build") */
677
849
  command: BuildPassedCommandStringSchema.optional(),
678
850
  /** Expected exit code (default: 0) */
679
- expectedExitCode: z23.number().int().optional()
851
+ expectedExitCode: z24.number().int().optional()
680
852
  });
681
- var TimeConfigSchema = z23.strictObject({
853
+ var TimeConfigSchema = z24.strictObject({
682
854
  /** Maximum allowed duration in milliseconds */
683
- maxDurationMs: z23.number().int().positive()
855
+ maxDurationMs: z24.number().int().positive()
684
856
  });
685
- var LlmJudgeConfigSchema = z23.object({
857
+ var LlmJudgeConfigSchema = z24.object({
686
858
  /**
687
859
  * Prompt template with placeholders:
688
860
  * - {{output}}: agent's final output
@@ -693,65 +865,65 @@ var LlmJudgeConfigSchema = z23.object({
693
865
  * - {{trace}}: step-by-step trace of tool calls
694
866
  * - Custom parameters defined in the parameters array
695
867
  */
696
- prompt: z23.string().min(1),
868
+ prompt: z24.string().min(1),
697
869
  /** Minimum score to pass (0-10, default 7) */
698
- minScore: z23.number().int().min(0).max(10).optional(),
870
+ minScore: z24.number().int().min(0).max(10).optional(),
699
871
  /** Model for the judge (e.g. claude-3-5-haiku-20241022) */
700
- model: z23.string().optional(),
872
+ model: z24.string().optional(),
701
873
  /** Max output tokens */
702
- maxTokens: z23.number().int().optional(),
874
+ maxTokens: z24.number().int().optional(),
703
875
  /** Temperature (0-1) */
704
- temperature: z23.number().min(0).max(1).optional(),
876
+ temperature: z24.number().min(0).max(1).optional(),
705
877
  /** User-defined parameters for this assertion */
706
- parameters: z23.array(AssertionParameterSchema).optional()
878
+ parameters: z24.array(AssertionParameterSchema).optional()
707
879
  });
708
- var ApiCallConfigSchema = z23.strictObject({
880
+ var ApiCallConfigSchema = z24.strictObject({
709
881
  /** URL to call */
710
- url: z23.string().min(1),
882
+ url: z24.string().min(1),
711
883
  /** HTTP method (default GET) */
712
- method: z23.enum(["GET", "POST"]).optional(),
884
+ method: z24.enum(["GET", "POST"]).optional(),
713
885
  /** Request body (JSON string, for POST requests) */
714
- requestBody: z23.string().optional(),
886
+ requestBody: z24.string().optional(),
715
887
  /** Expected JSON response to validate against (subset match — extra fields in actual are OK) */
716
- expectedResponse: z23.string().min(1),
888
+ expectedResponse: z24.string().min(1),
717
889
  /** Request headers as JSON string of key-value pairs */
718
- requestHeaders: z23.string().optional(),
890
+ requestHeaders: z24.string().optional(),
719
891
  /** Request timeout in milliseconds (default 30000) */
720
- timeoutMs: z23.number().int().positive().optional()
892
+ timeoutMs: z24.number().int().positive().optional()
721
893
  });
722
894
  var AssertionBaseFields = {
723
895
  /** When true, the assertion's pass/fail logic is inverted (NOT operator). */
724
- negate: z23.boolean().optional()
896
+ negate: z24.boolean().optional()
725
897
  };
726
898
  var SkillWasCalledAssertionSchema = SkillWasCalledConfigSchema.extend({
727
- type: z23.literal("skill_was_called"),
899
+ type: z24.literal("skill_was_called"),
728
900
  ...AssertionBaseFields
729
901
  });
730
902
  var ToolCalledWithParamAssertionSchema = ToolCalledWithParamConfigSchema.extend({
731
- type: z23.literal("tool_called_with_param"),
903
+ type: z24.literal("tool_called_with_param"),
732
904
  ...AssertionBaseFields
733
905
  });
734
906
  var BuildPassedAssertionSchema = BuildPassedConfigSchema.extend({
735
- type: z23.literal("build_passed"),
907
+ type: z24.literal("build_passed"),
736
908
  ...AssertionBaseFields
737
909
  });
738
910
  var CostAssertionSchema = CostConfigSchema.extend({
739
- type: z23.literal("cost"),
911
+ type: z24.literal("cost"),
740
912
  ...AssertionBaseFields
741
913
  });
742
914
  var LlmJudgeAssertionSchema = LlmJudgeConfigSchema.extend({
743
- type: z23.literal("llm_judge"),
915
+ type: z24.literal("llm_judge"),
744
916
  ...AssertionBaseFields
745
917
  });
746
918
  var ApiCallAssertionSchema = ApiCallConfigSchema.extend({
747
- type: z23.literal("api_call"),
919
+ type: z24.literal("api_call"),
748
920
  ...AssertionBaseFields
749
921
  });
750
922
  var TimeAssertionSchema = TimeConfigSchema.extend({
751
- type: z23.literal("time_limit"),
923
+ type: z24.literal("time_limit"),
752
924
  ...AssertionBaseFields
753
925
  });
754
- var AssertionSchema = z23.union([
926
+ var AssertionSchema = z24.union([
755
927
  SkillWasCalledAssertionSchema,
756
928
  ToolCalledWithParamAssertionSchema,
757
929
  BuildPassedAssertionSchema,
@@ -760,7 +932,7 @@ var AssertionSchema = z23.union([
760
932
  LlmJudgeAssertionSchema,
761
933
  ApiCallAssertionSchema
762
934
  ]);
763
- var AssertionConfigSchema = z23.union([
935
+ var AssertionConfigSchema = z24.union([
764
936
  LlmJudgeConfigSchema,
765
937
  // requires prompt - check first
766
938
  SkillWasCalledConfigSchema,
@@ -775,7 +947,7 @@ var AssertionConfigSchema = z23.union([
775
947
  // requires maxCostUsd, uses strictObject
776
948
  BuildPassedConfigSchema,
777
949
  // all optional, uses strictObject to reject unknown keys
778
- z23.object({})
950
+ z24.object({})
779
951
  // fallback empty config
780
952
  ]);
781
953
  function validateAssertionConfig(type, config) {
@@ -1021,35 +1193,35 @@ function getSystemAssertion(id) {
1021
1193
 
1022
1194
  // src/scenario/test-scenario.ts
1023
1195
  var MAX_IMAGE_BASE64_LENGTH = 4 * Math.ceil(2 * 1024 * 1024 / 3);
1024
- var TriggerPromptImageSchema = z24.object({
1196
+ var TriggerPromptImageSchema = z25.object({
1025
1197
  /** Base64-encoded image data (no data URL prefix) */
1026
- base64: z24.string().max(MAX_IMAGE_BASE64_LENGTH, "Image exceeds 2 MB size limit"),
1198
+ base64: z25.string().max(MAX_IMAGE_BASE64_LENGTH, "Image exceeds 2 MB size limit"),
1027
1199
  /** MIME type of the image */
1028
- mediaType: z24.enum(["image/jpeg", "image/png", "image/gif", "image/webp"]),
1200
+ mediaType: z25.enum(["image/jpeg", "image/png", "image/gif", "image/webp"]),
1029
1201
  /** Original filename of the image */
1030
- name: z24.string()
1202
+ name: z25.string()
1031
1203
  });
1032
- var ExpectedFileSchema = z24.object({
1204
+ var ExpectedFileSchema = z25.object({
1033
1205
  /** Relative path where the file should be created */
1034
- path: z24.string(),
1206
+ path: z25.string(),
1035
1207
  /** Optional expected content */
1036
- content: z24.string().optional()
1208
+ content: z25.string().optional()
1037
1209
  });
1038
1210
  var TestScenarioSchema = TenantEntitySchema.extend({
1039
1211
  /** The prompt sent to the agent to trigger the task */
1040
- triggerPrompt: z24.string().min(10),
1212
+ triggerPrompt: z25.string().min(10),
1041
1213
  /** ID of the template to use for this scenario (null = no template) */
1042
- templateId: z24.string().nullish(),
1214
+ templateId: z25.string().nullish(),
1043
1215
  /** Inline assertions to evaluate for this scenario (legacy) */
1044
- assertions: z24.array(AssertionSchema).optional(),
1216
+ assertions: z25.array(AssertionSchema).optional(),
1045
1217
  /** IDs of saved assertions to evaluate (from assertions table) - legacy, use assertionLinks */
1046
- assertionIds: z24.array(z24.string()).optional(),
1218
+ assertionIds: z25.array(z25.string()).optional(),
1047
1219
  /** Linked assertions with per-scenario parameter values */
1048
- assertionLinks: z24.array(ScenarioAssertionLinkSchema).optional(),
1220
+ assertionLinks: z25.array(ScenarioAssertionLinkSchema).optional(),
1049
1221
  /** Tags for categorisation and filtering */
1050
- tags: z24.array(z24.string()).optional(),
1222
+ tags: z25.array(z25.string()).optional(),
1051
1223
  /** Base64-encoded images attached to the trigger prompt (max 3) */
1052
- triggerPromptImages: z24.array(TriggerPromptImageSchema).max(3).optional()
1224
+ triggerPromptImages: z25.array(TriggerPromptImageSchema).max(3).optional()
1053
1225
  });
1054
1226
  function validateBuildPassedParamsInAssertionLinks(links, ctx) {
1055
1227
  if (!links) return;
@@ -1060,7 +1232,7 @@ function validateBuildPassedParamsInAssertionLinks(links, ctx) {
1060
1232
  if (cmd === void 0 || cmd === null) continue;
1061
1233
  if (typeof cmd !== "string") {
1062
1234
  ctx.addIssue({
1063
- code: z24.ZodIssueCode.custom,
1235
+ code: z25.ZodIssueCode.custom,
1064
1236
  message: "build_passed command must be a string",
1065
1237
  path: ["assertionLinks", i, "params", "command"]
1066
1238
  });
@@ -1068,7 +1240,7 @@ function validateBuildPassedParamsInAssertionLinks(links, ctx) {
1068
1240
  }
1069
1241
  if (!isAllowedBuildCommandString(cmd)) {
1070
1242
  ctx.addIssue({
1071
- code: z24.ZodIssueCode.custom,
1243
+ code: z25.ZodIssueCode.custom,
1072
1244
  message: "Invalid build_passed command. Allowed: yarn build, npm run build, pnpm run build, pnpm build",
1073
1245
  path: ["assertionLinks", i, "params", "command"]
1074
1246
  });
@@ -1091,19 +1263,19 @@ var UpdateTestScenarioInputSchema = TestScenarioCreateBaseSchema.partial().super
1091
1263
  });
1092
1264
 
1093
1265
  // src/scenario/batch-import.ts
1094
- import { z as z25 } from "zod";
1266
+ import { z as z26 } from "zod";
1095
1267
  var UUID_REGEX = /^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$/i;
1096
- var BatchAssertionLinkSchema = z25.union([
1097
- z25.string().min(1),
1268
+ var BatchAssertionLinkSchema = z26.union([
1269
+ z26.string().min(1),
1098
1270
  ScenarioAssertionLinkSchema
1099
1271
  ]);
1100
- var BatchScenarioEntrySchema = z25.object({
1101
- name: z25.string().min(1, "name: Required"),
1102
- description: z25.string().optional().default(""),
1103
- triggerPrompt: z25.string().min(10, "triggerPrompt: Must be at least 10 characters"),
1104
- templateId: z25.string().nullish(),
1105
- tags: z25.array(z25.string()).optional(),
1106
- assertionLinks: z25.array(BatchAssertionLinkSchema).optional()
1272
+ var BatchScenarioEntrySchema = z26.object({
1273
+ name: z26.string().min(1, "name: Required"),
1274
+ description: z26.string().optional().default(""),
1275
+ triggerPrompt: z26.string().min(10, "triggerPrompt: Must be at least 10 characters"),
1276
+ templateId: z26.string().nullish(),
1277
+ tags: z26.array(z26.string()).optional(),
1278
+ assertionLinks: z26.array(BatchAssertionLinkSchema).optional()
1107
1279
  }).superRefine((data, ctx) => {
1108
1280
  if (!data.assertionLinks) return;
1109
1281
  const objectLinks = data.assertionLinks.filter(
@@ -1113,8 +1285,8 @@ var BatchScenarioEntrySchema = z25.object({
1113
1285
  validateBuildPassedParamsInAssertionLinks(objectLinks, ctx);
1114
1286
  }
1115
1287
  });
1116
- var BatchImportPayloadSchema = z25.object({
1117
- scenarios: z25.array(BatchScenarioEntrySchema).min(1, "scenarios array must contain at least one entry").max(100, "Maximum 100 scenarios per upload")
1288
+ var BatchImportPayloadSchema = z26.object({
1289
+ scenarios: z26.array(BatchScenarioEntrySchema).min(1, "scenarios array must contain at least one entry").max(100, "Maximum 100 scenarios per upload")
1118
1290
  });
1119
1291
  var BATCH_IMPORT_LIMITS = {
1120
1292
  MAX_SCENARIOS: 100,
@@ -1136,29 +1308,29 @@ function normalizeBatchAssertionLink(link) {
1136
1308
  }
1137
1309
  return link;
1138
1310
  }
1139
- var BatchResultItemSchema = z25.object({
1140
- index: z25.number(),
1141
- name: z25.string(),
1142
- status: z25.enum(["valid", "invalid"]),
1143
- id: z25.string().nullable().optional(),
1144
- errors: z25.array(z25.string()).optional()
1145
- });
1146
- var BatchSummarySchema = z25.object({
1147
- total: z25.number(),
1148
- valid: z25.number(),
1149
- invalid: z25.number(),
1150
- created: z25.number()
1151
- });
1152
- var BatchImportResponseSchema = z25.object({
1311
+ var BatchResultItemSchema = z26.object({
1312
+ index: z26.number(),
1313
+ name: z26.string(),
1314
+ status: z26.enum(["valid", "invalid"]),
1315
+ id: z26.string().nullable().optional(),
1316
+ errors: z26.array(z26.string()).optional()
1317
+ });
1318
+ var BatchSummarySchema = z26.object({
1319
+ total: z26.number(),
1320
+ valid: z26.number(),
1321
+ invalid: z26.number(),
1322
+ created: z26.number()
1323
+ });
1324
+ var BatchImportResponseSchema = z26.object({
1153
1325
  summary: BatchSummarySchema,
1154
- results: z25.array(BatchResultItemSchema)
1326
+ results: z26.array(BatchResultItemSchema)
1155
1327
  });
1156
1328
 
1157
1329
  // src/suite/test-suite.ts
1158
- import { z as z26 } from "zod";
1330
+ import { z as z27 } from "zod";
1159
1331
  var TestSuiteSchema = TenantEntitySchema.extend({
1160
1332
  /** IDs of test scenarios in this suite */
1161
- scenarioIds: z26.array(z26.string())
1333
+ scenarioIds: z27.array(z27.string())
1162
1334
  });
1163
1335
  var CreateTestSuiteInputSchema = TestSuiteSchema.omit({
1164
1336
  id: true,
@@ -1169,21 +1341,21 @@ var CreateTestSuiteInputSchema = TestSuiteSchema.omit({
1169
1341
  var UpdateTestSuiteInputSchema = CreateTestSuiteInputSchema.partial();
1170
1342
 
1171
1343
  // src/evaluation/metrics.ts
1172
- import { z as z27 } from "zod";
1173
- var TokenUsageSchema = z27.object({
1174
- prompt: z27.number(),
1175
- completion: z27.number(),
1176
- total: z27.number()
1177
- });
1178
- var EvalMetricsSchema = z27.object({
1179
- totalAssertions: z27.number(),
1180
- passed: z27.number(),
1181
- failed: z27.number(),
1182
- skipped: z27.number(),
1183
- errors: z27.number(),
1184
- passRate: z27.number(),
1185
- avgDuration: z27.number(),
1186
- totalDuration: z27.number()
1344
+ import { z as z28 } from "zod";
1345
+ var TokenUsageSchema = z28.object({
1346
+ prompt: z28.number(),
1347
+ completion: z28.number(),
1348
+ total: z28.number()
1349
+ });
1350
+ var EvalMetricsSchema = z28.object({
1351
+ totalAssertions: z28.number(),
1352
+ passed: z28.number(),
1353
+ failed: z28.number(),
1354
+ skipped: z28.number(),
1355
+ errors: z28.number(),
1356
+ passRate: z28.number(),
1357
+ avgDuration: z28.number(),
1358
+ totalDuration: z28.number()
1187
1359
  });
1188
1360
  var EvalStatus = /* @__PURE__ */ ((EvalStatus2) => {
1189
1361
  EvalStatus2["PENDING"] = "pending";
@@ -1193,7 +1365,7 @@ var EvalStatus = /* @__PURE__ */ ((EvalStatus2) => {
1193
1365
  EvalStatus2["CANCELLED"] = "cancelled";
1194
1366
  return EvalStatus2;
1195
1367
  })(EvalStatus || {});
1196
- var EvalStatusSchema = z27.enum(EvalStatus);
1368
+ var EvalStatusSchema = z28.enum(EvalStatus);
1197
1369
  var LLMStepType = /* @__PURE__ */ ((LLMStepType2) => {
1198
1370
  LLMStepType2["COMPLETION"] = "completion";
1199
1371
  LLMStepType2["TOOL_USE"] = "tool_use";
@@ -1201,54 +1373,54 @@ var LLMStepType = /* @__PURE__ */ ((LLMStepType2) => {
1201
1373
  LLMStepType2["THINKING"] = "thinking";
1202
1374
  return LLMStepType2;
1203
1375
  })(LLMStepType || {});
1204
- var LLMTraceStepSchema = z27.object({
1205
- id: z27.string(),
1206
- stepNumber: z27.number(),
1207
- type: z27.enum(LLMStepType),
1208
- model: z27.string(),
1209
- provider: z27.string(),
1210
- startedAt: z27.string(),
1211
- durationMs: z27.number(),
1376
+ var LLMTraceStepSchema = z28.object({
1377
+ id: z28.string(),
1378
+ stepNumber: z28.number(),
1379
+ type: z28.enum(LLMStepType),
1380
+ model: z28.string(),
1381
+ provider: z28.string(),
1382
+ startedAt: z28.string(),
1383
+ durationMs: z28.number(),
1212
1384
  tokenUsage: TokenUsageSchema,
1213
- costUsd: z27.number(),
1214
- toolName: z27.string().optional(),
1215
- toolArguments: z27.string().optional(),
1216
- inputPreview: z27.string().optional(),
1217
- outputPreview: z27.string().optional(),
1218
- success: z27.boolean(),
1219
- error: z27.string().optional(),
1220
- turnIndex: z27.number().optional()
1221
- });
1222
- var LLMBreakdownStatsSchema = z27.object({
1223
- count: z27.number(),
1224
- durationMs: z27.number(),
1225
- tokens: z27.number(),
1226
- costUsd: z27.number()
1227
- });
1228
- var LLMTraceSummarySchema = z27.object({
1229
- totalSteps: z27.number(),
1230
- totalTurns: z27.number().optional(),
1231
- totalDurationMs: z27.number(),
1385
+ costUsd: z28.number(),
1386
+ toolName: z28.string().optional(),
1387
+ toolArguments: z28.string().optional(),
1388
+ inputPreview: z28.string().optional(),
1389
+ outputPreview: z28.string().optional(),
1390
+ success: z28.boolean(),
1391
+ error: z28.string().optional(),
1392
+ turnIndex: z28.number().optional()
1393
+ });
1394
+ var LLMBreakdownStatsSchema = z28.object({
1395
+ count: z28.number(),
1396
+ durationMs: z28.number(),
1397
+ tokens: z28.number(),
1398
+ costUsd: z28.number()
1399
+ });
1400
+ var LLMTraceSummarySchema = z28.object({
1401
+ totalSteps: z28.number(),
1402
+ totalTurns: z28.number().optional(),
1403
+ totalDurationMs: z28.number(),
1232
1404
  totalTokens: TokenUsageSchema,
1233
- totalCostUsd: z27.number(),
1234
- stepTypeBreakdown: z27.record(z27.string(), LLMBreakdownStatsSchema).optional(),
1235
- modelBreakdown: z27.record(z27.string(), LLMBreakdownStatsSchema),
1236
- modelsUsed: z27.array(z27.string())
1237
- });
1238
- var LLMTraceSchema = z27.object({
1239
- id: z27.string(),
1240
- steps: z27.array(LLMTraceStepSchema),
1405
+ totalCostUsd: z28.number(),
1406
+ stepTypeBreakdown: z28.record(z28.string(), LLMBreakdownStatsSchema).optional(),
1407
+ modelBreakdown: z28.record(z28.string(), LLMBreakdownStatsSchema),
1408
+ modelsUsed: z28.array(z28.string())
1409
+ });
1410
+ var LLMTraceSchema = z28.object({
1411
+ id: z28.string(),
1412
+ steps: z28.array(LLMTraceStepSchema),
1241
1413
  summary: LLMTraceSummarySchema
1242
1414
  });
1243
1415
 
1244
1416
  // src/evaluation/eval-result.ts
1245
- import { z as z31 } from "zod";
1417
+ import { z as z32 } from "zod";
1246
1418
 
1247
1419
  // src/evaluation/eval-run.ts
1248
- import { z as z29 } from "zod";
1420
+ import { z as z30 } from "zod";
1249
1421
 
1250
1422
  // src/evaluation/live-trace.ts
1251
- import { z as z28 } from "zod";
1423
+ import { z as z29 } from "zod";
1252
1424
  var LiveTraceEventType = /* @__PURE__ */ ((LiveTraceEventType2) => {
1253
1425
  LiveTraceEventType2["THINKING"] = "thinking";
1254
1426
  LiveTraceEventType2["TOOL_USE"] = "tool_use";
@@ -1262,37 +1434,37 @@ var LiveTraceEventType = /* @__PURE__ */ ((LiveTraceEventType2) => {
1262
1434
  LiveTraceEventType2["USER"] = "user";
1263
1435
  return LiveTraceEventType2;
1264
1436
  })(LiveTraceEventType || {});
1265
- var LiveTraceEventSchema = z28.object({
1437
+ var LiveTraceEventSchema = z29.object({
1266
1438
  /** The evaluation run ID */
1267
- evalRunId: z28.string(),
1439
+ evalRunId: z29.string(),
1268
1440
  /** The scenario ID being executed */
1269
- scenarioId: z28.string(),
1441
+ scenarioId: z29.string(),
1270
1442
  /** The scenario name for display */
1271
- scenarioName: z28.string(),
1443
+ scenarioName: z29.string(),
1272
1444
  /** The target ID (skill, agent, etc.) */
1273
- targetId: z28.string(),
1445
+ targetId: z29.string(),
1274
1446
  /** The target name for display */
1275
- targetName: z28.string(),
1447
+ targetName: z29.string(),
1276
1448
  /** Step number in the current scenario execution */
1277
- stepNumber: z28.number(),
1449
+ stepNumber: z29.number(),
1278
1450
  /** Type of trace event */
1279
- type: z28.enum(LiveTraceEventType),
1451
+ type: z29.enum(LiveTraceEventType),
1280
1452
  /** Tool name if this is a tool_use event */
1281
- toolName: z28.string().optional(),
1453
+ toolName: z29.string().optional(),
1282
1454
  /** Tool arguments preview (truncated JSON) */
1283
- toolArgs: z28.string().optional(),
1455
+ toolArgs: z29.string().optional(),
1284
1456
  /** Output preview (truncated text) */
1285
- outputPreview: z28.string().optional(),
1457
+ outputPreview: z29.string().optional(),
1286
1458
  /** File path for file operations */
1287
- filePath: z28.string().optional(),
1459
+ filePath: z29.string().optional(),
1288
1460
  /** Elapsed time in milliseconds for progress events */
1289
- elapsedMs: z28.number().optional(),
1461
+ elapsedMs: z29.number().optional(),
1290
1462
  /** Thinking/reasoning text from Claude */
1291
- thinking: z28.string().optional(),
1463
+ thinking: z29.string().optional(),
1292
1464
  /** Timestamp when this event occurred */
1293
- timestamp: z28.string(),
1465
+ timestamp: z29.string(),
1294
1466
  /** Whether this is the final event for this scenario */
1295
- isComplete: z28.boolean()
1467
+ isComplete: z29.boolean()
1296
1468
  });
1297
1469
  var TRACE_EVENT_PREFIX = "TRACE_EVENT:";
1298
1470
  function parseTraceEventLine(line) {
@@ -1321,40 +1493,40 @@ var TriggerType = /* @__PURE__ */ ((TriggerType2) => {
1321
1493
  TriggerType2["SCHEDULED"] = "SCHEDULED";
1322
1494
  return TriggerType2;
1323
1495
  })(TriggerType || {});
1324
- var TriggerMetadataSchema = z29.object({
1325
- version: z29.string().optional(),
1326
- resourceUpdated: z29.array(z29.string()).optional(),
1327
- scheduleId: z29.string().optional()
1496
+ var TriggerMetadataSchema = z30.object({
1497
+ version: z30.string().optional(),
1498
+ resourceUpdated: z30.array(z30.string()).optional(),
1499
+ scheduleId: z30.string().optional()
1328
1500
  });
1329
- var TriggerSchema = z29.object({
1330
- id: z29.string(),
1501
+ var TriggerSchema = z30.object({
1502
+ id: z30.string(),
1331
1503
  metadata: TriggerMetadataSchema.optional(),
1332
- type: z29.nativeEnum(TriggerType)
1504
+ type: z30.nativeEnum(TriggerType)
1333
1505
  });
1334
- var DiffLineTypeSchema = z29.enum(["added", "removed", "unchanged"]);
1335
- var DiffLineSchema = z29.object({
1506
+ var DiffLineTypeSchema = z30.enum(["added", "removed", "unchanged"]);
1507
+ var DiffLineSchema = z30.object({
1336
1508
  type: DiffLineTypeSchema,
1337
- content: z29.string(),
1338
- lineNumber: z29.number()
1339
- });
1340
- var DiffContentSchema = z29.object({
1341
- path: z29.string(),
1342
- expected: z29.string(),
1343
- actual: z29.string(),
1344
- diffLines: z29.array(DiffLineSchema),
1345
- renamedFrom: z29.string().optional(),
1509
+ content: z30.string(),
1510
+ lineNumber: z30.number()
1511
+ });
1512
+ var DiffContentSchema = z30.object({
1513
+ path: z30.string(),
1514
+ expected: z30.string(),
1515
+ actual: z30.string(),
1516
+ diffLines: z30.array(DiffLineSchema),
1517
+ renamedFrom: z30.string().optional(),
1346
1518
  /** Whether this file is an infrastructure/config file (e.g. .claude/settings.json, .mcp.json) */
1347
- isInfrastructure: z29.boolean().optional()
1519
+ isInfrastructure: z30.boolean().optional()
1348
1520
  });
1349
- var CommandExecutionSchema = z29.object({
1350
- command: z29.string(),
1351
- exitCode: z29.number(),
1352
- output: z29.string().optional(),
1353
- duration: z29.number()
1521
+ var CommandExecutionSchema = z30.object({
1522
+ command: z30.string(),
1523
+ exitCode: z30.number(),
1524
+ output: z30.string().optional(),
1525
+ duration: z30.number()
1354
1526
  });
1355
- var FileModificationSchema = z29.object({
1356
- path: z29.string(),
1357
- action: z29.enum(["created", "modified", "deleted"])
1527
+ var FileModificationSchema = z30.object({
1528
+ path: z30.string(),
1529
+ action: z30.enum(["created", "modified", "deleted"])
1358
1530
  });
1359
1531
  var TemplateFileStatus = /* @__PURE__ */ ((TemplateFileStatus2) => {
1360
1532
  TemplateFileStatus2["NEW"] = "new";
@@ -1362,62 +1534,62 @@ var TemplateFileStatus = /* @__PURE__ */ ((TemplateFileStatus2) => {
1362
1534
  TemplateFileStatus2["UNCHANGED"] = "unchanged";
1363
1535
  return TemplateFileStatus2;
1364
1536
  })(TemplateFileStatus || {});
1365
- var TemplateFileSchema = z29.object({
1537
+ var TemplateFileSchema = z30.object({
1366
1538
  /** Relative path within the template */
1367
- path: z29.string(),
1539
+ path: z30.string(),
1368
1540
  /** Full file content after execution */
1369
- content: z29.string(),
1541
+ content: z30.string(),
1370
1542
  /** File status (new, modified, unchanged) */
1371
- status: z29.enum(["new", "modified", "unchanged"]),
1543
+ status: z30.enum(["new", "modified", "unchanged"]),
1372
1544
  /** Whether this file is an infrastructure/config file (e.g. .claude/settings.json, .mcp.json) */
1373
- isInfrastructure: z29.boolean().optional()
1545
+ isInfrastructure: z30.boolean().optional()
1374
1546
  });
1375
- var ApiCallSchema = z29.object({
1376
- endpoint: z29.string(),
1377
- tokensUsed: z29.number(),
1378
- duration: z29.number()
1547
+ var ApiCallSchema = z30.object({
1548
+ endpoint: z30.string(),
1549
+ tokensUsed: z30.number(),
1550
+ duration: z30.number()
1379
1551
  });
1380
- var ExecutionTraceSchema = z29.object({
1381
- commands: z29.array(CommandExecutionSchema),
1382
- filesModified: z29.array(FileModificationSchema),
1383
- apiCalls: z29.array(ApiCallSchema),
1384
- totalDuration: z29.number()
1552
+ var ExecutionTraceSchema = z30.object({
1553
+ commands: z30.array(CommandExecutionSchema),
1554
+ filesModified: z30.array(FileModificationSchema),
1555
+ apiCalls: z30.array(ApiCallSchema),
1556
+ totalDuration: z30.number()
1385
1557
  });
1386
- var RunAnalysisFindingSchema = z29.object({
1387
- category: z29.enum([
1558
+ var RunAnalysisFindingSchema = z30.object({
1559
+ category: z30.enum([
1388
1560
  "failure_pattern",
1389
1561
  "cost_waste",
1390
1562
  "flakiness",
1391
1563
  "inefficiency",
1392
1564
  "positive"
1393
1565
  ]),
1394
- severity: z29.enum(["high", "medium", "low"]),
1395
- description: z29.string(),
1396
- affectedScenarios: z29.array(z29.string()),
1397
- recommendation: z29.string().optional()
1566
+ severity: z30.enum(["high", "medium", "low"]),
1567
+ description: z30.string(),
1568
+ affectedScenarios: z30.array(z30.string()),
1569
+ recommendation: z30.string().optional()
1398
1570
  });
1399
- var RunAnalysisSchema = z29.object({
1400
- generatedAt: z29.string(),
1401
- summary: z29.string(),
1402
- findings: z29.array(RunAnalysisFindingSchema)
1571
+ var RunAnalysisSchema = z30.object({
1572
+ generatedAt: z30.string(),
1573
+ summary: z30.string(),
1574
+ findings: z30.array(RunAnalysisFindingSchema)
1403
1575
  });
1404
1576
  var EvalRunSchema = TenantEntitySchema.extend({
1405
1577
  /** Agent ID for this run */
1406
- agentId: z29.string().optional(),
1578
+ agentId: z30.string().optional(),
1407
1579
  /** Preset ID that originated this run (optional) */
1408
- presetId: z29.string().optional(),
1580
+ presetId: z30.string().optional(),
1409
1581
  /** Skill IDs for this run */
1410
- skillIds: z29.array(z29.string()).optional(),
1582
+ skillIds: z30.array(z30.string()).optional(),
1411
1583
  /** Map of skillId to skillVersionId for this run */
1412
- skillVersions: z29.record(z29.string(), z29.string()).optional(),
1584
+ skillVersions: z30.record(z30.string(), z30.string()).optional(),
1413
1585
  /** Scenario IDs to run (always present — resolved server-side from tags when needed) */
1414
- scenarioIds: z29.array(z29.string()),
1586
+ scenarioIds: z30.array(z30.string()),
1415
1587
  /** Current status */
1416
1588
  status: EvalStatusSchema,
1417
1589
  /** Progress percentage (0-100) */
1418
- progress: z29.number(),
1590
+ progress: z30.number(),
1419
1591
  /** Results for each scenario/target combination (lazy to break eval-result ↔ eval-run cycle) */
1420
- results: z29.array(z29.lazy(() => EvalRunResultSchema)),
1592
+ results: z30.array(z30.lazy(() => EvalRunResultSchema)),
1421
1593
  /** Aggregated metrics across all results */
1422
1594
  aggregateMetrics: EvalMetricsSchema,
1423
1595
  /** Aggregated LLM trace summary */
@@ -1425,41 +1597,45 @@ var EvalRunSchema = TenantEntitySchema.extend({
1425
1597
  /** What triggered this run */
1426
1598
  trigger: TriggerSchema.optional(),
1427
1599
  /** When the run started (set when evaluation is triggered) */
1428
- startedAt: z29.string().optional(),
1600
+ startedAt: z30.string().optional(),
1429
1601
  /** When the run completed */
1430
- completedAt: z29.string().optional(),
1602
+ completedAt: z30.string().optional(),
1431
1603
  /** Live trace events captured during execution (for playback on results page) */
1432
- liveTraceEvents: z29.array(LiveTraceEventSchema).optional(),
1604
+ liveTraceEvents: z30.array(LiveTraceEventSchema).optional(),
1433
1605
  /** Remote job ID for tracking execution in Dev Machines */
1434
- jobId: z29.string().optional(),
1606
+ jobId: z30.string().optional(),
1435
1607
  /** Remote job status from the Dev Machine API (PENDING, RUNNING, COMPLETED, FAILED, CANCELLED) */
1436
- jobStatus: z29.string().optional(),
1608
+ jobStatus: z30.string().optional(),
1437
1609
  /** Remote job error message if the job failed */
1438
- jobError: z29.string().optional(),
1610
+ jobError: z30.string().optional(),
1439
1611
  /** Timestamp of the last job status check */
1440
- jobStatusCheckedAt: z29.string().optional(),
1612
+ jobStatusCheckedAt: z30.string().optional(),
1441
1613
  /** MCP server IDs to enable for this run (optional) */
1442
- mcpIds: z29.array(z29.string()).optional(),
1614
+ mcpIds: z30.array(z30.string()).optional(),
1443
1615
  /** Sub-agent IDs to enable for this run (optional) */
1444
- subAgentIds: z29.array(z29.string()).optional(),
1616
+ subAgentIds: z30.array(z30.string()).optional(),
1445
1617
  /** Rule IDs to enable for this run (optional) */
1446
- ruleIds: z29.array(z29.string()).optional(),
1618
+ ruleIds: z30.array(z30.string()).optional(),
1619
+ /** Unified capability IDs (replaces skill/mcp/subAgent/ruleIds) */
1620
+ capabilityIds: z30.array(z30.string()).optional(),
1621
+ /** Map of capabilityId to capabilityVersionId for version pinning */
1622
+ capabilityVersions: z30.record(z30.string(), z30.string()).optional(),
1447
1623
  /** Tags used to select scenarios for this run (for traceability) */
1448
- tags: z29.array(z29.string()).optional(),
1624
+ tags: z30.array(z30.string()).optional(),
1449
1625
  /** How many times each scenario is executed within this eval run. Default: 1. Max: 20. */
1450
- runsPerScenario: z29.number().int().min(1).max(20).optional(),
1626
+ runsPerScenario: z30.number().int().min(1).max(20).optional(),
1451
1627
  /** Snapshot of agent configuration captured at run creation time */
1452
- agentSnapshot: z29.object({
1453
- name: z29.string().optional(),
1628
+ agentSnapshot: z30.object({
1629
+ name: z30.string().optional(),
1454
1630
  agentType: AgentTypeSchema.optional(),
1455
1631
  runCommand: AgentRunCommandSchema.optional(),
1456
- systemPrompt: z29.string().nullable().optional(),
1632
+ systemPrompt: z30.string().nullable().optional(),
1457
1633
  modelConfig: ModelConfigSchema.optional()
1458
1634
  }).optional(),
1459
1635
  /** UUID linking all runs in a comparison group */
1460
- comparisonGroupId: z29.string().optional(),
1636
+ comparisonGroupId: z30.string().optional(),
1461
1637
  /** Human-readable label for this variant (e.g., "MCP: Wix Stores") */
1462
- comparisonLabel: z29.string().optional(),
1638
+ comparisonLabel: z30.string().optional(),
1463
1639
  /** LLM-generated analysis of the completed run */
1464
1640
  runAnalysis: RunAnalysisSchema.optional()
1465
1641
  });
@@ -1477,60 +1653,60 @@ var CreateEvalRunInputSchema = EvalRunSchema.omit({
1477
1653
  agentSnapshot: true
1478
1654
  }).extend({
1479
1655
  /** Optional on input — backend resolves from tags when not provided */
1480
- scenarioIds: z29.array(z29.string()).optional()
1656
+ scenarioIds: z30.array(z30.string()).optional()
1481
1657
  }).refine(
1482
1658
  (data) => data.scenarioIds && data.scenarioIds.length > 0 || data.tags && data.tags.length > 0,
1483
1659
  { message: "Either scenarioIds or tags must be provided" }
1484
1660
  );
1485
- var EvaluationProgressSchema = z29.object({
1486
- runId: z29.string(),
1487
- targetId: z29.string(),
1488
- totalScenarios: z29.number(),
1489
- completedScenarios: z29.number(),
1490
- scenarioProgress: z29.array(
1491
- z29.object({
1492
- scenarioId: z29.string(),
1493
- currentStep: z29.string(),
1494
- error: z29.string().optional()
1661
+ var EvaluationProgressSchema = z30.object({
1662
+ runId: z30.string(),
1663
+ targetId: z30.string(),
1664
+ totalScenarios: z30.number(),
1665
+ completedScenarios: z30.number(),
1666
+ scenarioProgress: z30.array(
1667
+ z30.object({
1668
+ scenarioId: z30.string(),
1669
+ currentStep: z30.string(),
1670
+ error: z30.string().optional()
1495
1671
  })
1496
1672
  ),
1497
- createdAt: z29.number()
1498
- });
1499
- var EvaluationLogSchema = z29.object({
1500
- runId: z29.string(),
1501
- scenarioId: z29.string(),
1502
- log: z29.object({
1503
- level: z29.enum(["info", "error", "debug"]),
1504
- message: z29.string().optional(),
1505
- args: z29.array(z29.any()).optional(),
1506
- error: z29.string().optional()
1673
+ createdAt: z30.number()
1674
+ });
1675
+ var EvaluationLogSchema = z30.object({
1676
+ runId: z30.string(),
1677
+ scenarioId: z30.string(),
1678
+ log: z30.object({
1679
+ level: z30.enum(["info", "error", "debug"]),
1680
+ message: z30.string().optional(),
1681
+ args: z30.array(z30.any()).optional(),
1682
+ error: z30.string().optional()
1507
1683
  })
1508
1684
  });
1509
1685
  var LLM_TIMEOUT = 12e4;
1510
1686
 
1511
1687
  // src/evaluation/conversation.ts
1512
- import { z as z30 } from "zod";
1513
- var TextBlockSchema = z30.object({
1514
- type: z30.literal("text"),
1515
- text: z30.string()
1516
- });
1517
- var ThinkingBlockSchema = z30.object({
1518
- type: z30.literal("thinking"),
1519
- thinking: z30.string()
1520
- });
1521
- var ToolUseBlockSchema = z30.object({
1522
- type: z30.literal("tool_use"),
1523
- toolName: z30.string(),
1524
- toolId: z30.string(),
1525
- input: z30.unknown()
1526
- });
1527
- var ToolResultBlockSchema = z30.object({
1528
- type: z30.literal("tool_result"),
1529
- toolUseId: z30.string(),
1530
- content: z30.string(),
1531
- isError: z30.boolean().optional()
1532
- });
1533
- var ConversationBlockSchema = z30.discriminatedUnion("type", [
1688
+ import { z as z31 } from "zod";
1689
+ var TextBlockSchema = z31.object({
1690
+ type: z31.literal("text"),
1691
+ text: z31.string()
1692
+ });
1693
+ var ThinkingBlockSchema = z31.object({
1694
+ type: z31.literal("thinking"),
1695
+ thinking: z31.string()
1696
+ });
1697
+ var ToolUseBlockSchema = z31.object({
1698
+ type: z31.literal("tool_use"),
1699
+ toolName: z31.string(),
1700
+ toolId: z31.string(),
1701
+ input: z31.unknown()
1702
+ });
1703
+ var ToolResultBlockSchema = z31.object({
1704
+ type: z31.literal("tool_result"),
1705
+ toolUseId: z31.string(),
1706
+ content: z31.string(),
1707
+ isError: z31.boolean().optional()
1708
+ });
1709
+ var ConversationBlockSchema = z31.discriminatedUnion("type", [
1534
1710
  TextBlockSchema,
1535
1711
  ThinkingBlockSchema,
1536
1712
  ToolUseBlockSchema,
@@ -1541,18 +1717,18 @@ var ConversationMessageRoles = [
1541
1717
  "user",
1542
1718
  "system"
1543
1719
  ];
1544
- var ConversationMessageSchema = z30.object({
1545
- role: z30.enum(ConversationMessageRoles),
1546
- content: z30.array(ConversationBlockSchema),
1547
- timestamp: z30.string()
1720
+ var ConversationMessageSchema = z31.object({
1721
+ role: z31.enum(ConversationMessageRoles),
1722
+ content: z31.array(ConversationBlockSchema),
1723
+ timestamp: z31.string()
1548
1724
  });
1549
- var ScenarioConversationSchema = z30.object({
1550
- id: z30.string(),
1551
- projectId: z30.string(),
1552
- evalRunId: z30.string(),
1553
- resultId: z30.string(),
1554
- messages: z30.array(ConversationMessageSchema),
1555
- createdAt: z30.string()
1725
+ var ScenarioConversationSchema = z31.object({
1726
+ id: z31.string(),
1727
+ projectId: z31.string(),
1728
+ evalRunId: z31.string(),
1729
+ resultId: z31.string(),
1730
+ messages: z31.array(ConversationMessageSchema),
1731
+ createdAt: z31.string()
1556
1732
  });
1557
1733
 
1558
1734
  // src/evaluation/eval-result.ts
@@ -1563,98 +1739,98 @@ var AssertionResultStatus = /* @__PURE__ */ ((AssertionResultStatus2) => {
1563
1739
  AssertionResultStatus2["ERROR"] = "error";
1564
1740
  return AssertionResultStatus2;
1565
1741
  })(AssertionResultStatus || {});
1566
- var AssertionResultSchema = z31.object({
1567
- id: z31.string(),
1568
- assertionId: z31.string(),
1569
- assertionType: z31.string(),
1570
- assertionName: z31.string(),
1571
- status: z31.enum(AssertionResultStatus),
1572
- message: z31.string().optional(),
1573
- expected: z31.string().optional(),
1574
- actual: z31.string().optional(),
1575
- duration: z31.number().optional(),
1576
- details: z31.record(z31.string(), z31.unknown()).optional(),
1577
- llmTraceSteps: z31.array(LLMTraceStepSchema).optional()
1578
- });
1579
- var EvalRunResultSchema = z31.object({
1580
- id: z31.string(),
1581
- targetId: z31.string(),
1582
- targetName: z31.string().optional(),
1742
+ var AssertionResultSchema = z32.object({
1743
+ id: z32.string(),
1744
+ assertionId: z32.string(),
1745
+ assertionType: z32.string(),
1746
+ assertionName: z32.string(),
1747
+ status: z32.enum(AssertionResultStatus),
1748
+ message: z32.string().optional(),
1749
+ expected: z32.string().optional(),
1750
+ actual: z32.string().optional(),
1751
+ duration: z32.number().optional(),
1752
+ details: z32.record(z32.string(), z32.unknown()).optional(),
1753
+ llmTraceSteps: z32.array(LLMTraceStepSchema).optional()
1754
+ });
1755
+ var EvalRunResultSchema = z32.object({
1756
+ id: z32.string(),
1757
+ targetId: z32.string(),
1758
+ targetName: z32.string().optional(),
1583
1759
  /** SkillVersion ID used for this evaluation (for version tracking) */
1584
- skillVersionId: z31.string().optional(),
1760
+ skillVersionId: z32.string().optional(),
1585
1761
  /** SkillVersion semver string (e.g., "1.0.0", "1.2.3") for display */
1586
- skillVersion: z31.string().optional(),
1587
- scenarioId: z31.string(),
1588
- scenarioName: z31.string(),
1762
+ skillVersion: z32.string().optional(),
1763
+ scenarioId: z32.string(),
1764
+ scenarioName: z32.string(),
1589
1765
  /** Snapshot of the trigger prompt used during the run (prevents stale display after edits) */
1590
- triggerPrompt: z31.string().optional(),
1766
+ triggerPrompt: z32.string().optional(),
1591
1767
  modelConfig: ModelConfigSchema.optional(),
1592
- assertionResults: z31.array(AssertionResultSchema),
1768
+ assertionResults: z32.array(AssertionResultSchema),
1593
1769
  metrics: EvalMetricsSchema.optional(),
1594
- passed: z31.number(),
1595
- failed: z31.number(),
1596
- passRate: z31.number(),
1597
- duration: z31.number(),
1598
- outputText: z31.string().optional(),
1599
- files: z31.array(ExpectedFileSchema).optional(),
1600
- fileDiffs: z31.array(DiffContentSchema).optional(),
1770
+ passed: z32.number(),
1771
+ failed: z32.number(),
1772
+ passRate: z32.number(),
1773
+ duration: z32.number(),
1774
+ outputText: z32.string().optional(),
1775
+ files: z32.array(ExpectedFileSchema).optional(),
1776
+ fileDiffs: z32.array(DiffContentSchema).optional(),
1601
1777
  /** Full template files after execution with status indicators */
1602
- templateFiles: z31.array(TemplateFileSchema).optional(),
1603
- startedAt: z31.string().optional(),
1604
- completedAt: z31.string().optional(),
1778
+ templateFiles: z32.array(TemplateFileSchema).optional(),
1779
+ startedAt: z32.string().optional(),
1780
+ completedAt: z32.string().optional(),
1605
1781
  llmTrace: LLMTraceSchema.optional(),
1606
1782
  /** Full conversation messages (only present in transit; stripped before DB storage) */
1607
- conversation: z31.array(ConversationMessageSchema).optional(),
1783
+ conversation: z32.array(ConversationMessageSchema).optional(),
1608
1784
  /** 0-based iteration index when a scenario is run multiple times within a single eval run */
1609
- iterationIndex: z31.number().int().min(0).optional()
1610
- });
1611
- var PromptResultSchema = z31.object({
1612
- text: z31.string(),
1613
- files: z31.array(z31.unknown()).optional(),
1614
- finishReason: z31.string().optional(),
1615
- reasoning: z31.string().optional(),
1616
- reasoningDetails: z31.unknown().optional(),
1617
- toolCalls: z31.array(z31.unknown()).optional(),
1618
- toolResults: z31.array(z31.unknown()).optional(),
1619
- warnings: z31.array(z31.unknown()).optional(),
1620
- sources: z31.array(z31.unknown()).optional(),
1621
- steps: z31.array(z31.unknown()),
1622
- generationTimeMs: z31.number(),
1623
- prompt: z31.string(),
1624
- systemPrompt: z31.string(),
1625
- usage: z31.object({
1626
- totalTokens: z31.number().optional(),
1627
- totalMicrocentsSpent: z31.number().optional()
1785
+ iterationIndex: z32.number().int().min(0).optional()
1786
+ });
1787
+ var PromptResultSchema = z32.object({
1788
+ text: z32.string(),
1789
+ files: z32.array(z32.unknown()).optional(),
1790
+ finishReason: z32.string().optional(),
1791
+ reasoning: z32.string().optional(),
1792
+ reasoningDetails: z32.unknown().optional(),
1793
+ toolCalls: z32.array(z32.unknown()).optional(),
1794
+ toolResults: z32.array(z32.unknown()).optional(),
1795
+ warnings: z32.array(z32.unknown()).optional(),
1796
+ sources: z32.array(z32.unknown()).optional(),
1797
+ steps: z32.array(z32.unknown()),
1798
+ generationTimeMs: z32.number(),
1799
+ prompt: z32.string(),
1800
+ systemPrompt: z32.string(),
1801
+ usage: z32.object({
1802
+ totalTokens: z32.number().optional(),
1803
+ totalMicrocentsSpent: z32.number().optional()
1628
1804
  })
1629
1805
  });
1630
- var EvaluationResultSchema = z31.object({
1631
- id: z31.string(),
1632
- runId: z31.string(),
1633
- timestamp: z31.number(),
1806
+ var EvaluationResultSchema = z32.object({
1807
+ id: z32.string(),
1808
+ runId: z32.string(),
1809
+ timestamp: z32.number(),
1634
1810
  promptResult: PromptResultSchema,
1635
- testResults: z31.array(z31.unknown()),
1636
- tags: z31.array(z31.string()).optional(),
1637
- feedback: z31.string().optional(),
1638
- score: z31.number(),
1639
- suiteId: z31.string().optional()
1640
- });
1641
- var LeanEvaluationResultSchema = z31.object({
1642
- id: z31.string(),
1643
- runId: z31.string(),
1644
- timestamp: z31.number(),
1645
- tags: z31.array(z31.string()).optional(),
1646
- scenarioId: z31.string(),
1647
- scenarioVersion: z31.number().optional(),
1648
- targetId: z31.string(),
1649
- targetVersion: z31.number().optional(),
1650
- suiteId: z31.string().optional(),
1651
- score: z31.number(),
1652
- time: z31.number().optional(),
1653
- microcentsSpent: z31.number().optional()
1811
+ testResults: z32.array(z32.unknown()),
1812
+ tags: z32.array(z32.string()).optional(),
1813
+ feedback: z32.string().optional(),
1814
+ score: z32.number(),
1815
+ suiteId: z32.string().optional()
1816
+ });
1817
+ var LeanEvaluationResultSchema = z32.object({
1818
+ id: z32.string(),
1819
+ runId: z32.string(),
1820
+ timestamp: z32.number(),
1821
+ tags: z32.array(z32.string()).optional(),
1822
+ scenarioId: z32.string(),
1823
+ scenarioVersion: z32.number().optional(),
1824
+ targetId: z32.string(),
1825
+ targetVersion: z32.number().optional(),
1826
+ suiteId: z32.string().optional(),
1827
+ score: z32.number(),
1828
+ time: z32.number().optional(),
1829
+ microcentsSpent: z32.number().optional()
1654
1830
  });
1655
1831
 
1656
1832
  // src/evaluation/eval-run-folder.ts
1657
- import { z as z32 } from "zod";
1833
+ import { z as z33 } from "zod";
1658
1834
  var EvalRunFolderSchema = TenantEntitySchema.extend({});
1659
1835
  var CreateEvalRunFolderInputSchema = EvalRunFolderSchema.omit({
1660
1836
  id: true,
@@ -1668,26 +1844,26 @@ var UpdateEvalRunFolderInputSchema = EvalRunFolderSchema.omit({
1668
1844
  updatedAt: true,
1669
1845
  deleted: true
1670
1846
  }).partial();
1671
- var EvalRunFolderMembershipSchema = z32.object({
1672
- folderId: z32.string(),
1673
- evalRunId: z32.string(),
1674
- projectId: z32.string(),
1675
- createdAt: z32.string()
1847
+ var EvalRunFolderMembershipSchema = z33.object({
1848
+ folderId: z33.string(),
1849
+ evalRunId: z33.string(),
1850
+ projectId: z33.string(),
1851
+ createdAt: z33.string()
1676
1852
  });
1677
1853
 
1678
1854
  // src/project/project.ts
1679
- import { z as z33 } from "zod";
1855
+ import { z as z34 } from "zod";
1680
1856
  var ProjectSchema = BaseEntitySchema.extend({
1681
- appId: z33.string().optional().describe("The ID of the app in Dev Center"),
1682
- scenarioTags: z33.array(z33.string()).optional().describe("Project-level tag vocabulary for scenarios"),
1857
+ appId: z34.string().optional().describe("The ID of the app in Dev Center"),
1858
+ scenarioTags: z34.array(z34.string()).optional().describe("Project-level tag vocabulary for scenarios"),
1683
1859
  /** Per-project Wix auth token (write-only — never returned in GET responses). null = clear. */
1684
- wixAuthToken: z33.string().nullable().optional().describe("Wix auth token for CLI/MCP authentication (encrypted at rest)"),
1860
+ wixAuthToken: z34.string().nullable().optional().describe("Wix auth token for CLI/MCP authentication (encrypted at rest)"),
1685
1861
  /** Per-project Base44 auth file content (write-only — never returned in GET responses). null = clear. */
1686
- base44AuthFile: z33.string().nullable().optional().describe("Base64-encoded Base44 auth file content (encrypted at rest)"),
1862
+ base44AuthFile: z34.string().nullable().optional().describe("Base64-encoded Base44 auth file content (encrypted at rest)"),
1687
1863
  /** Resolved at runtime from the encrypted Wix auth token */
1688
- wixAuthEmail: z33.string().optional().describe("Email associated with the Wix auth token (resolved at runtime)"),
1864
+ wixAuthEmail: z34.string().optional().describe("Email associated with the Wix auth token (resolved at runtime)"),
1689
1865
  /** Resolved at runtime from the encrypted Base44 auth file */
1690
- base44AuthEmail: z33.string().optional().describe("Email from the Base44 auth file (resolved at runtime)")
1866
+ base44AuthEmail: z34.string().optional().describe("Email from the Base44 auth file (resolved at runtime)")
1691
1867
  });
1692
1868
  var CreateProjectInputSchema = ProjectSchema.omit({
1693
1869
  id: true,
@@ -1697,7 +1873,7 @@ var CreateProjectInputSchema = ProjectSchema.omit({
1697
1873
  wixAuthEmail: true,
1698
1874
  base44AuthEmail: true
1699
1875
  }).extend({
1700
- appId: z33.string().describe(
1876
+ appId: z34.string().describe(
1701
1877
  "Required: The ID of the app in Dev Center for credential scoping"
1702
1878
  )
1703
1879
  });
@@ -1717,7 +1893,7 @@ var CreateTemplateInputSchema = TemplateSchema.omit({
1717
1893
  var UpdateTemplateInputSchema = CreateTemplateInputSchema.partial();
1718
1894
 
1719
1895
  // src/schedule/eval-schedule.ts
1720
- import { z as z34 } from "zod";
1896
+ import { z as z35 } from "zod";
1721
1897
  var FrequencyType = /* @__PURE__ */ ((FrequencyType2) => {
1722
1898
  FrequencyType2["DAILY"] = "daily";
1723
1899
  FrequencyType2["WEEKDAY"] = "weekday";
@@ -1727,29 +1903,29 @@ var FrequencyType = /* @__PURE__ */ ((FrequencyType2) => {
1727
1903
  })(FrequencyType || {});
1728
1904
  var EvalScheduleSchema = TenantEntitySchema.extend({
1729
1905
  /** Whether the schedule is active */
1730
- enabled: z34.boolean(),
1906
+ enabled: z35.boolean(),
1731
1907
  /** Test suite to run */
1732
- suiteId: z34.string(),
1908
+ suiteId: z35.string(),
1733
1909
  /** Preset that provides agent + entities for this schedule */
1734
- presetId: z34.string(),
1910
+ presetId: z35.string(),
1735
1911
  /** How often to run */
1736
- frequencyType: z34.nativeEnum(FrequencyType),
1912
+ frequencyType: z35.nativeEnum(FrequencyType),
1737
1913
  /** Time of day in 24h format (HH:MM), hours 00-23, minutes 00-59 */
1738
- timeOfDay: z34.string().regex(/^([01]\d|2[0-3]):[0-5]\d$/),
1914
+ timeOfDay: z35.string().regex(/^([01]\d|2[0-3]):[0-5]\d$/),
1739
1915
  /** Day of week (0=Sun, 6=Sat) for weekly schedules */
1740
- dayOfWeek: z34.number().min(0).max(6).optional(),
1916
+ dayOfWeek: z35.number().min(0).max(6).optional(),
1741
1917
  /** Day of month (1-31) for monthly schedules */
1742
- dayOfMonth: z34.number().min(1).max(31).optional(),
1918
+ dayOfMonth: z35.number().min(1).max(31).optional(),
1743
1919
  /** IANA timezone (e.g., 'America/New_York') */
1744
- timezone: z34.string(),
1920
+ timezone: z35.string(),
1745
1921
  /** ID of the last eval run created by this schedule */
1746
- lastRunId: z34.string().optional(),
1922
+ lastRunId: z35.string().optional(),
1747
1923
  /** Denormalized status of the last run */
1748
- lastRunStatus: z34.string().optional(),
1924
+ lastRunStatus: z35.string().optional(),
1749
1925
  /** ISO timestamp of the last run */
1750
- lastRunAt: z34.string().optional(),
1926
+ lastRunAt: z35.string().optional(),
1751
1927
  /** Next scheduled run time in UTC (pre-computed for efficient querying, set by backend) */
1752
- nextRunAt: z34.string().optional()
1928
+ nextRunAt: z35.string().optional()
1753
1929
  });
1754
1930
  function isValidTimezone(tz) {
1755
1931
  try {
@@ -1762,14 +1938,14 @@ function isValidTimezone(tz) {
1762
1938
  function validateScheduleFields(data, ctx, options) {
1763
1939
  if (data.frequencyType === "weekly" /* WEEKLY */ && data.dayOfWeek == null) {
1764
1940
  ctx.addIssue({
1765
- code: z34.ZodIssueCode.custom,
1941
+ code: z35.ZodIssueCode.custom,
1766
1942
  message: "dayOfWeek is required for weekly schedules",
1767
1943
  path: ["dayOfWeek"]
1768
1944
  });
1769
1945
  }
1770
1946
  if (data.frequencyType === "monthly" /* MONTHLY */ && data.dayOfMonth == null) {
1771
1947
  ctx.addIssue({
1772
- code: z34.ZodIssueCode.custom,
1948
+ code: z35.ZodIssueCode.custom,
1773
1949
  message: "dayOfMonth is required for monthly schedules",
1774
1950
  path: ["dayOfMonth"]
1775
1951
  });
@@ -1777,7 +1953,7 @@ function validateScheduleFields(data, ctx, options) {
1777
1953
  const shouldValidateTz = options.partial ? data.timezone !== void 0 : true;
1778
1954
  if (shouldValidateTz && !isValidTimezone(data.timezone)) {
1779
1955
  ctx.addIssue({
1780
- code: z34.ZodIssueCode.custom,
1956
+ code: z35.ZodIssueCode.custom,
1781
1957
  message: "Invalid IANA timezone",
1782
1958
  path: ["timezone"]
1783
1959
  });
@@ -1841,6 +2017,13 @@ export {
1841
2017
  BulkImportResultItemSchema,
1842
2018
  BulkImportResultSchema,
1843
2019
  BulkImportSkillsInputSchema,
2020
+ CAPABILITY_NAME_REGEX,
2021
+ CapabilityContentSchema,
2022
+ CapabilitySchema,
2023
+ CapabilityTypeSchema,
2024
+ CapabilityVersionOriginSchema,
2025
+ CapabilityVersionSchema,
2026
+ CapabilityWithLatestVersionSchema,
1844
2027
  ClaudeModel,
1845
2028
  ClaudeModelSchema,
1846
2029
  CommandExecutionSchema,
@@ -1851,6 +2034,8 @@ export {
1851
2034
  CostAssertionSchema,
1852
2035
  CostConfigSchema,
1853
2036
  CreateAgentInputSchema,
2037
+ CreateCapabilityInputSchema,
2038
+ CreateCapabilityVersionInputSchema,
1854
2039
  CreateEvalRunFolderInputSchema,
1855
2040
  CreateEvalRunInputSchema,
1856
2041
  CreateEvalScheduleInputSchema,
@@ -1890,6 +2075,7 @@ export {
1890
2075
  FilePresenceTestSchema,
1891
2076
  FrequencyType,
1892
2077
  GitHubSourceSchema,
2078
+ InitialCapabilityVersionInputSchema,
1893
2079
  InitialVersionInputSchema,
1894
2080
  LEGACY_MODEL_ID_MAP,
1895
2081
  LLMBreakdownStatsSchema,
@@ -1966,6 +2152,7 @@ export {
1966
2152
  TriggerSchema,
1967
2153
  TriggerType,
1968
2154
  UpdateAgentInputSchema,
2155
+ UpdateCapabilityInputSchema,
1969
2156
  UpdateEvalRunFolderInputSchema,
1970
2157
  UpdateEvalScheduleInputSchema,
1971
2158
  UpdateMcpInputSchema,
@@ -1978,12 +2165,20 @@ export {
1978
2165
  UpdateTestScenarioInputSchema,
1979
2166
  UpdateTestSuiteInputSchema,
1980
2167
  VitestTestSchema,
2168
+ capabilityToMcp,
2169
+ capabilityToRule,
2170
+ capabilityToSkill,
2171
+ capabilityToSkillWithLatestVersion,
2172
+ capabilityToSubAgent,
2173
+ capabilityVersionToSkillVersion,
1981
2174
  classifyAssertionRef,
1982
2175
  formatTraceEventLine,
1983
2176
  getSystemAssertion,
1984
2177
  getSystemAssertions,
2178
+ groupCapabilitiesByType,
1985
2179
  isAllowedBuildCommandString,
1986
2180
  isSystemAssertionId,
2181
+ isValidCapabilityName,
1987
2182
  isValidSkillFolderName,
1988
2183
  normalizeBatchAssertionLink,
1989
2184
  normalizeModelId,