@wix/evalforge-types 0.74.0 → 0.76.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/build/index.mjs CHANGED
@@ -340,25 +340,19 @@ import { z as z9 } from "zod";
340
340
  var PresetSchema = TenantEntitySchema.extend({
341
341
  /** Agent ID for this preset */
342
342
  agentId: z9.string(),
343
- /** Skill IDs included in this preset */
344
- skillIds: z9.array(z9.string()).default([]),
345
- /** Optional map of skillId skillVersionId for version pinning */
346
- skillVersions: z9.record(z9.string(), z9.string()).optional(),
347
- /** MCP server IDs included in this preset */
348
- mcpIds: z9.array(z9.string()).default([]),
349
- /** Sub-agent IDs included in this preset */
350
- subAgentIds: z9.array(z9.string()).default([]),
351
- /** Rule IDs included in this preset */
352
- ruleIds: z9.array(z9.string()).default([])
353
- });
354
- var atLeastOneEntity = (data) => (data.skillIds?.length ?? 0) > 0 || (data.mcpIds?.length ?? 0) > 0 || (data.subAgentIds?.length ?? 0) > 0 || (data.ruleIds?.length ?? 0) > 0;
355
- var AT_LEAST_ONE_ENTITY_MESSAGE = "At least one of skillIds, mcpIds, subAgentIds, or ruleIds must be non-empty";
343
+ /** Unified capability IDs */
344
+ capabilityIds: z9.array(z9.string()).optional(),
345
+ /** Map of capabilityId to capabilityVersionId for version pinning */
346
+ capabilityVersions: z9.record(z9.string(), z9.string()).optional()
347
+ });
348
+ var hasCapabilities = (data) => (data.capabilityIds?.length ?? 0) > 0;
349
+ var CAPABILITY_IDS_REQUIRED_MESSAGE = "capabilityIds must be non-empty";
356
350
  var CreatePresetInputSchema = PresetSchema.omit({
357
351
  id: true,
358
352
  createdAt: true,
359
353
  updatedAt: true,
360
354
  deleted: true
361
- }).refine(atLeastOneEntity, { message: AT_LEAST_ONE_ENTITY_MESSAGE });
355
+ }).refine(hasCapabilities, { message: CAPABILITY_IDS_REQUIRED_MESSAGE });
362
356
  var UpdatePresetInputSchema = PresetSchema.omit({
363
357
  id: true,
364
358
  createdAt: true,
@@ -366,11 +360,179 @@ var UpdatePresetInputSchema = PresetSchema.omit({
366
360
  deleted: true
367
361
  }).partial();
368
362
 
363
+ // src/target/capability.ts
364
+ import { z as z10 } from "zod";
365
+ var CapabilityTypeSchema = z10.enum([
366
+ "SKILL",
367
+ "SUB_AGENT",
368
+ "RULE",
369
+ "MCP"
370
+ ]);
371
+ var CAPABILITY_NAME_REGEX = /^[a-z0-9]+(-[a-z0-9]+)*$/;
372
+ function isValidCapabilityName(name) {
373
+ return typeof name === "string" && name.length > 0 && CAPABILITY_NAME_REGEX.test(name);
374
+ }
375
+ var KEBAB_CASE_MESSAGE2 = "Name must be in kebab-case (lowercase letters, numbers, hyphens only, e.g. my-capability)";
376
+ var CapabilityContentSchema = z10.record(z10.string(), z10.unknown());
377
+ var CapabilityVersionOriginSchema = z10.enum(["manual", "pr", "master"]);
378
+ var CapabilitySchema = TenantEntitySchema.extend({
379
+ capabilityType: CapabilityTypeSchema,
380
+ source: GitHubSourceSchema.optional()
381
+ });
382
+ var CapabilityVersionSchema = z10.object({
383
+ id: z10.string(),
384
+ projectId: z10.string(),
385
+ capabilityId: z10.string(),
386
+ version: z10.string(),
387
+ origin: CapabilityVersionOriginSchema,
388
+ source: GitHubSourceSchema.optional(),
389
+ content: CapabilityContentSchema.optional(),
390
+ notes: z10.string().optional(),
391
+ createdAt: z10.string()
392
+ });
393
+ var CapabilityWithLatestVersionSchema = CapabilitySchema.extend({
394
+ latestVersion: CapabilityVersionSchema.optional()
395
+ });
396
+ var CapabilityInputBaseSchema = CapabilitySchema.omit({
397
+ id: true,
398
+ createdAt: true,
399
+ updatedAt: true,
400
+ deleted: true,
401
+ description: true,
402
+ source: true
403
+ }).extend({
404
+ description: z10.string().optional(),
405
+ source: GitHubSourceSchema.optional()
406
+ });
407
+ var InitialCapabilityVersionInputSchema = z10.object({
408
+ content: CapabilityContentSchema.optional(),
409
+ notes: z10.string().optional(),
410
+ source: GitHubSourceSchema.optional(),
411
+ version: z10.string().optional(),
412
+ origin: CapabilityVersionOriginSchema.optional()
413
+ });
414
+ var CreateCapabilityInputSchema = CapabilityInputBaseSchema.extend({
415
+ initialVersion: InitialCapabilityVersionInputSchema.optional()
416
+ }).refine((data) => isValidCapabilityName(data.name), {
417
+ message: KEBAB_CASE_MESSAGE2,
418
+ path: ["name"]
419
+ });
420
+ var UpdateCapabilityInputSchema = CapabilityInputBaseSchema.omit({
421
+ capabilityType: true
422
+ }).partial().refine(
423
+ (data) => data.name === void 0 || isValidCapabilityName(data.name),
424
+ { message: KEBAB_CASE_MESSAGE2, path: ["name"] }
425
+ );
426
+ var CreateCapabilityVersionInputSchema = z10.object({
427
+ source: GitHubSourceSchema.optional(),
428
+ version: z10.string().min(1),
429
+ notes: z10.string().optional(),
430
+ origin: CapabilityVersionOriginSchema.optional(),
431
+ content: CapabilityContentSchema.optional()
432
+ });
433
+
434
+ // src/target/capability-converters.ts
435
+ function capabilityToSkill(cap) {
436
+ return {
437
+ id: cap.id,
438
+ projectId: cap.projectId,
439
+ name: cap.name,
440
+ description: cap.description,
441
+ source: cap.source,
442
+ createdAt: cap.createdAt,
443
+ updatedAt: cap.updatedAt,
444
+ deleted: cap.deleted
445
+ };
446
+ }
447
+ function capabilityVersionToSkillVersion(cv) {
448
+ const content = cv.content;
449
+ return {
450
+ id: cv.id,
451
+ projectId: cv.projectId,
452
+ skillId: cv.capabilityId,
453
+ version: cv.version,
454
+ origin: cv.origin,
455
+ source: cv.source,
456
+ files: content?.files,
457
+ notes: cv.notes,
458
+ createdAt: cv.createdAt
459
+ };
460
+ }
461
+ function capabilityToSkillWithLatestVersion(cap) {
462
+ const skill = capabilityToSkill(cap);
463
+ const latestVersion = cap.latestVersion ? capabilityVersionToSkillVersion(cap.latestVersion) : void 0;
464
+ return { ...skill, latestVersion };
465
+ }
466
+ function capabilityToSubAgent(cap) {
467
+ const content = cap.latestVersion?.content;
468
+ return {
469
+ id: cap.id,
470
+ projectId: cap.projectId,
471
+ name: cap.name,
472
+ description: cap.description,
473
+ subAgentMd: content?.subAgentMd ?? "",
474
+ source: cap.source,
475
+ createdAt: cap.createdAt,
476
+ updatedAt: cap.updatedAt,
477
+ deleted: cap.deleted
478
+ };
479
+ }
480
+ function capabilityToRule(cap) {
481
+ const content = cap.latestVersion?.content;
482
+ return {
483
+ id: cap.id,
484
+ projectId: cap.projectId,
485
+ name: cap.name,
486
+ description: cap.description,
487
+ ruleType: content?.ruleType ?? "claude-md",
488
+ content: content?.content ?? "",
489
+ createdAt: cap.createdAt,
490
+ updatedAt: cap.updatedAt,
491
+ deleted: cap.deleted
492
+ };
493
+ }
494
+ function capabilityToMcp(cap) {
495
+ const content = cap.latestVersion?.content;
496
+ return {
497
+ id: cap.id,
498
+ projectId: cap.projectId,
499
+ name: cap.name,
500
+ description: cap.description,
501
+ config: content?.config ?? {},
502
+ createdAt: cap.createdAt,
503
+ updatedAt: cap.updatedAt,
504
+ deleted: cap.deleted
505
+ };
506
+ }
507
+ function groupCapabilitiesByType(capabilities) {
508
+ const skills = [];
509
+ const subAgents = [];
510
+ const rules = [];
511
+ const mcps = [];
512
+ for (const cap of capabilities) {
513
+ switch (cap.capabilityType) {
514
+ case "SKILL":
515
+ skills.push(capabilityToSkillWithLatestVersion(cap));
516
+ break;
517
+ case "SUB_AGENT":
518
+ subAgents.push(capabilityToSubAgent(cap));
519
+ break;
520
+ case "RULE":
521
+ rules.push(capabilityToRule(cap));
522
+ break;
523
+ case "MCP":
524
+ mcps.push(capabilityToMcp(cap));
525
+ break;
526
+ }
527
+ }
528
+ return { skills, subAgents, rules, mcps };
529
+ }
530
+
369
531
  // src/test/index.ts
370
- import { z as z20 } from "zod";
532
+ import { z as z21 } from "zod";
371
533
 
372
534
  // src/test/base.ts
373
- import { z as z10 } from "zod";
535
+ import { z as z11 } from "zod";
374
536
  var TestType = /* @__PURE__ */ ((TestType2) => {
375
537
  TestType2["LLM"] = "LLM";
376
538
  TestType2["TOOL"] = "TOOL";
@@ -383,7 +545,7 @@ var TestType = /* @__PURE__ */ ((TestType2) => {
383
545
  TestType2["PLAYWRIGHT_NL"] = "PLAYWRIGHT_NL";
384
546
  return TestType2;
385
547
  })(TestType || {});
386
- var TestTypeSchema = z10.enum(TestType);
548
+ var TestTypeSchema = z11.enum(TestType);
387
549
  var TestImportance = /* @__PURE__ */ ((TestImportance2) => {
388
550
  TestImportance2["LOW"] = "low";
389
551
  TestImportance2["MEDIUM"] = "medium";
@@ -391,153 +553,153 @@ var TestImportance = /* @__PURE__ */ ((TestImportance2) => {
391
553
  TestImportance2["CRITICAL"] = "critical";
392
554
  return TestImportance2;
393
555
  })(TestImportance || {});
394
- var TestImportanceSchema = z10.enum(TestImportance);
395
- var BaseTestSchema = z10.object({
396
- id: z10.string(),
556
+ var TestImportanceSchema = z11.enum(TestImportance);
557
+ var BaseTestSchema = z11.object({
558
+ id: z11.string(),
397
559
  type: TestTypeSchema,
398
- name: z10.string().min(3),
399
- description: z10.string().optional(),
560
+ name: z11.string().min(3),
561
+ description: z11.string().optional(),
400
562
  importance: TestImportanceSchema.optional()
401
563
  });
402
564
 
403
565
  // src/test/llm.ts
404
- import { z as z11 } from "zod";
566
+ import { z as z12 } from "zod";
405
567
  var LLMTestSchema = BaseTestSchema.extend({
406
- type: z11.literal("LLM" /* LLM */),
568
+ type: z12.literal("LLM" /* LLM */),
407
569
  /** Maximum steps for the LLM to take */
408
- maxSteps: z11.number().min(1).max(100),
570
+ maxSteps: z12.number().min(1).max(100),
409
571
  /** Prompt to send to the evaluator */
410
- prompt: z11.string().min(1),
572
+ prompt: z12.string().min(1),
411
573
  /** ID of the evaluator agent to use */
412
- evaluatorId: z11.string()
574
+ evaluatorId: z12.string()
413
575
  });
414
576
 
415
577
  // src/test/tool.ts
416
- import { z as z12 } from "zod";
578
+ import { z as z13 } from "zod";
417
579
  var ToolTestSchema = BaseTestSchema.extend({
418
- type: z12.literal("TOOL" /* TOOL */),
580
+ type: z13.literal("TOOL" /* TOOL */),
419
581
  /** Name of the tool that should be called */
420
- toolName: z12.string().min(3),
582
+ toolName: z13.string().min(3),
421
583
  /** Expected arguments for the tool call */
422
- args: z12.record(z12.string(), z12.any()),
584
+ args: z13.record(z13.string(), z13.any()),
423
585
  /** Expected content in the tool results */
424
- resultsContent: z12.string()
586
+ resultsContent: z13.string()
425
587
  });
426
588
 
427
589
  // src/test/site-config.ts
428
- import { z as z13 } from "zod";
590
+ import { z as z14 } from "zod";
429
591
  var SiteConfigTestSchema = BaseTestSchema.extend({
430
- type: z13.literal("SITE_CONFIG" /* SITE_CONFIG */),
592
+ type: z14.literal("SITE_CONFIG" /* SITE_CONFIG */),
431
593
  /** URL to call */
432
- url: z13.string().url(),
594
+ url: z14.string().url(),
433
595
  /** HTTP method */
434
- method: z13.enum(["GET", "POST"]),
596
+ method: z14.enum(["GET", "POST"]),
435
597
  /** Request body (for POST) */
436
- body: z13.string().optional(),
598
+ body: z14.string().optional(),
437
599
  /** Expected HTTP status code */
438
- expectedStatusCode: z13.number().int().min(100).max(599),
600
+ expectedStatusCode: z14.number().int().min(100).max(599),
439
601
  /** Expected response content */
440
- expectedResponse: z13.string().optional(),
602
+ expectedResponse: z14.string().optional(),
441
603
  /** JMESPath expression to extract from response */
442
- expectedResponseJMESPath: z13.string().optional()
604
+ expectedResponseJMESPath: z14.string().optional()
443
605
  });
444
606
 
445
607
  // src/test/command-execution.ts
446
- import { z as z14 } from "zod";
608
+ import { z as z15 } from "zod";
447
609
  var AllowedCommands = [
448
610
  "yarn install --no-immutable && yarn build",
449
611
  "npm run build",
450
612
  "yarn typecheck"
451
613
  ];
452
614
  var CommandExecutionTestSchema = BaseTestSchema.extend({
453
- type: z14.literal("COMMAND_EXECUTION" /* COMMAND_EXECUTION */),
615
+ type: z15.literal("COMMAND_EXECUTION" /* COMMAND_EXECUTION */),
454
616
  /** Command to execute (must be in AllowedCommands) */
455
- command: z14.string().refine((value) => AllowedCommands.includes(value), {
617
+ command: z15.string().refine((value) => AllowedCommands.includes(value), {
456
618
  message: `Command must be one of: ${AllowedCommands.join(", ")}`
457
619
  }),
458
620
  /** Expected exit code (default: 0) */
459
- expectedExitCode: z14.number().default(0).optional()
621
+ expectedExitCode: z15.number().default(0).optional()
460
622
  });
461
623
 
462
624
  // src/test/file-presence.ts
463
- import { z as z15 } from "zod";
625
+ import { z as z16 } from "zod";
464
626
  var FilePresenceTestSchema = BaseTestSchema.extend({
465
- type: z15.literal("FILE_PRESENCE" /* FILE_PRESENCE */),
627
+ type: z16.literal("FILE_PRESENCE" /* FILE_PRESENCE */),
466
628
  /** Paths to check */
467
- paths: z15.array(z15.string()),
629
+ paths: z16.array(z16.string()),
468
630
  /** Whether files should exist (true) or not exist (false) */
469
- shouldExist: z15.boolean()
631
+ shouldExist: z16.boolean()
470
632
  });
471
633
 
472
634
  // src/test/file-content.ts
473
- import { z as z16 } from "zod";
474
- var FileContentCheckSchema = z16.object({
635
+ import { z as z17 } from "zod";
636
+ var FileContentCheckSchema = z17.object({
475
637
  /** Strings that must be present in the file */
476
- contains: z16.array(z16.string()).optional(),
638
+ contains: z17.array(z17.string()).optional(),
477
639
  /** Strings that must NOT be present in the file */
478
- notContains: z16.array(z16.string()).optional(),
640
+ notContains: z17.array(z17.string()).optional(),
479
641
  /** Regex pattern the content must match */
480
- matches: z16.string().optional(),
642
+ matches: z17.string().optional(),
481
643
  /** JSON path checks for structured content */
482
- jsonPath: z16.array(
483
- z16.object({
484
- path: z16.string(),
485
- value: z16.unknown()
644
+ jsonPath: z17.array(
645
+ z17.object({
646
+ path: z17.string(),
647
+ value: z17.unknown()
486
648
  })
487
649
  ).optional(),
488
650
  /** Lines that should be added (for diff checking) */
489
- added: z16.array(z16.string()).optional(),
651
+ added: z17.array(z17.string()).optional(),
490
652
  /** Lines that should be removed (for diff checking) */
491
- removed: z16.array(z16.string()).optional()
653
+ removed: z17.array(z17.string()).optional()
492
654
  });
493
655
  var FileContentTestSchema = BaseTestSchema.extend({
494
- type: z16.literal("FILE_CONTENT" /* FILE_CONTENT */),
656
+ type: z17.literal("FILE_CONTENT" /* FILE_CONTENT */),
495
657
  /** Path to the file to check */
496
- path: z16.string(),
658
+ path: z17.string(),
497
659
  /** Content checks to perform */
498
660
  checks: FileContentCheckSchema
499
661
  });
500
662
 
501
663
  // src/test/build-check.ts
502
- import { z as z17 } from "zod";
664
+ import { z as z18 } from "zod";
503
665
  var BuildCheckTestSchema = BaseTestSchema.extend({
504
- type: z17.literal("BUILD_CHECK" /* BUILD_CHECK */),
666
+ type: z18.literal("BUILD_CHECK" /* BUILD_CHECK */),
505
667
  /** Build command to execute */
506
- command: z17.string(),
668
+ command: z18.string(),
507
669
  /** Whether the build should succeed */
508
- expectSuccess: z17.boolean(),
670
+ expectSuccess: z18.boolean(),
509
671
  /** Maximum allowed warnings (optional) */
510
- allowedWarnings: z17.number().optional(),
672
+ allowedWarnings: z18.number().optional(),
511
673
  /** Timeout in milliseconds */
512
- timeout: z17.number().optional()
674
+ timeout: z18.number().optional()
513
675
  });
514
676
 
515
677
  // src/test/vitest.ts
516
- import { z as z18 } from "zod";
678
+ import { z as z19 } from "zod";
517
679
  var VitestTestSchema = BaseTestSchema.extend({
518
- type: z18.literal("VITEST" /* VITEST */),
680
+ type: z19.literal("VITEST" /* VITEST */),
519
681
  /** Test file content */
520
- testFile: z18.string(),
682
+ testFile: z19.string(),
521
683
  /** Name of the test file */
522
- testFileName: z18.string(),
684
+ testFileName: z19.string(),
523
685
  /** Minimum pass rate required (0-100) */
524
- minPassRate: z18.number().min(0).max(100)
686
+ minPassRate: z19.number().min(0).max(100)
525
687
  });
526
688
 
527
689
  // src/test/playwright-nl.ts
528
- import { z as z19 } from "zod";
690
+ import { z as z20 } from "zod";
529
691
  var PlaywrightNLTestSchema = BaseTestSchema.extend({
530
- type: z19.literal("PLAYWRIGHT_NL" /* PLAYWRIGHT_NL */),
692
+ type: z20.literal("PLAYWRIGHT_NL" /* PLAYWRIGHT_NL */),
531
693
  /** Natural language steps to execute */
532
- steps: z19.array(z19.string()),
694
+ steps: z20.array(z20.string()),
533
695
  /** Expected outcome description */
534
- expectedOutcome: z19.string(),
696
+ expectedOutcome: z20.string(),
535
697
  /** Timeout in milliseconds */
536
- timeout: z19.number().optional()
698
+ timeout: z20.number().optional()
537
699
  });
538
700
 
539
701
  // src/test/index.ts
540
- var TestSchema = z20.discriminatedUnion("type", [
702
+ var TestSchema = z21.discriminatedUnion("type", [
541
703
  LLMTestSchema,
542
704
  ToolTestSchema,
543
705
  SiteConfigTestSchema,
@@ -550,33 +712,33 @@ var TestSchema = z20.discriminatedUnion("type", [
550
712
  ]);
551
713
 
552
714
  // src/scenario/environment.ts
553
- import { z as z21 } from "zod";
554
- var LocalProjectConfigSchema = z21.object({
715
+ import { z as z22 } from "zod";
716
+ var LocalProjectConfigSchema = z22.object({
555
717
  /** Template ID to use for the local project */
556
- templateId: z21.string().optional(),
718
+ templateId: z22.string().optional(),
557
719
  /** Files to create in the project */
558
- files: z21.array(
559
- z21.object({
560
- path: z21.string().min(1),
561
- content: z21.string().min(1)
720
+ files: z22.array(
721
+ z22.object({
722
+ path: z22.string().min(1),
723
+ content: z22.string().min(1)
562
724
  })
563
725
  ).optional()
564
726
  });
565
- var MetaSiteConfigSchema = z21.object({
566
- configurations: z21.array(
567
- z21.object({
568
- name: z21.string().min(1),
569
- apiCalls: z21.array(
570
- z21.object({
571
- url: z21.string().url(),
572
- method: z21.enum(["POST", "PUT"]),
573
- body: z21.string()
727
+ var MetaSiteConfigSchema = z22.object({
728
+ configurations: z22.array(
729
+ z22.object({
730
+ name: z22.string().min(1),
731
+ apiCalls: z22.array(
732
+ z22.object({
733
+ url: z22.string().url(),
734
+ method: z22.enum(["POST", "PUT"]),
735
+ body: z22.string()
574
736
  })
575
737
  )
576
738
  })
577
739
  ).optional()
578
740
  });
579
- var EnvironmentSchema = z21.object({
741
+ var EnvironmentSchema = z22.object({
580
742
  /** Local project configuration */
581
743
  localProject: LocalProjectConfigSchema.optional(),
582
744
  /** Meta site configuration */
@@ -584,13 +746,13 @@ var EnvironmentSchema = z21.object({
584
746
  });
585
747
 
586
748
  // src/scenario/test-scenario.ts
587
- import { z as z24 } from "zod";
749
+ import { z as z25 } from "zod";
588
750
 
589
751
  // src/assertion/assertion.ts
590
- import { z as z23 } from "zod";
752
+ import { z as z24 } from "zod";
591
753
 
592
754
  // src/assertion/build-passed-command.ts
593
- import { z as z22 } from "zod";
755
+ import { z as z23 } from "zod";
594
756
  var ALLOWED_BUILD_COMMANDS = [
595
757
  "yarn build",
596
758
  "npm run build",
@@ -616,10 +778,10 @@ function parseBuildCommandToArgv(command) {
616
778
  return BUILD_COMMAND_ARGV[trimmed];
617
779
  }
618
780
  var enumTuple = ALLOWED_BUILD_COMMANDS;
619
- var BuildPassedCommandStringSchema = z22.enum(enumTuple);
781
+ var BuildPassedCommandStringSchema = z23.enum(enumTuple);
620
782
 
621
783
  // src/assertion/assertion.ts
622
- var AssertionTypeSchema = z23.enum([
784
+ var AssertionTypeSchema = z24.enum([
623
785
  "skill_was_called",
624
786
  "tool_called_with_param",
625
787
  "build_passed",
@@ -628,61 +790,61 @@ var AssertionTypeSchema = z23.enum([
628
790
  "llm_judge",
629
791
  "api_call"
630
792
  ]);
631
- var AssertionParameterTypeSchema = z23.enum([
793
+ var AssertionParameterTypeSchema = z24.enum([
632
794
  "string",
633
795
  "number",
634
796
  "boolean"
635
797
  ]);
636
- var AssertionParameterSchema = z23.object({
798
+ var AssertionParameterSchema = z24.object({
637
799
  /** Parameter name (used as key in params object) */
638
- name: z23.string().min(1),
800
+ name: z24.string().min(1),
639
801
  /** Display label for the parameter */
640
- label: z23.string().min(1),
802
+ label: z24.string().min(1),
641
803
  /** Parameter type */
642
804
  type: AssertionParameterTypeSchema,
643
805
  /** Whether this parameter is required */
644
- required: z23.boolean(),
806
+ required: z24.boolean(),
645
807
  /** Default value (optional, used when not provided) */
646
- defaultValue: z23.union([z23.string(), z23.number(), z23.boolean()]).optional(),
808
+ defaultValue: z24.union([z24.string(), z24.number(), z24.boolean()]).optional(),
647
809
  /** If true, parameter is hidden by default behind "Show advanced options" */
648
- advanced: z23.boolean().optional()
810
+ advanced: z24.boolean().optional()
649
811
  });
650
- var ScenarioAssertionLinkSchema = z23.object({
812
+ var ScenarioAssertionLinkSchema = z24.object({
651
813
  /** ID of the system assertion (e.g., 'system:skill_was_called') */
652
- assertionId: z23.string(),
814
+ assertionId: z24.string(),
653
815
  /** Parameter values for this assertion in this scenario */
654
- params: z23.record(
655
- z23.string(),
656
- z23.union([z23.string(), z23.number(), z23.boolean(), z23.null()])
816
+ params: z24.record(
817
+ z24.string(),
818
+ z24.union([z24.string(), z24.number(), z24.boolean(), z24.null()])
657
819
  ).optional()
658
820
  });
659
- var SkillWasCalledConfigSchema = z23.object({
821
+ var SkillWasCalledConfigSchema = z24.object({
660
822
  /** Names of the skills that must have been called */
661
- skillNames: z23.array(z23.string().min(1)).min(1)
823
+ skillNames: z24.array(z24.string().min(1)).min(1)
662
824
  });
663
- var CostConfigSchema = z23.strictObject({
825
+ var CostConfigSchema = z24.strictObject({
664
826
  /** Maximum allowed cost in USD */
665
- maxCostUsd: z23.number().positive()
827
+ maxCostUsd: z24.number().positive()
666
828
  });
667
- var ToolCalledWithParamConfigSchema = z23.strictObject({
829
+ var ToolCalledWithParamConfigSchema = z24.strictObject({
668
830
  /** Name of the tool that must have been called */
669
- toolName: z23.string().min(1),
831
+ toolName: z24.string().min(1),
670
832
  /** JSON string of key-value pairs for expected parameters (substring match). Optional — when omitted, only checks tool presence. */
671
- expectedParams: z23.string().min(1).optional(),
833
+ expectedParams: z24.string().min(1).optional(),
672
834
  /** If true, the matching tool call must also have succeeded (step.success === true) */
673
- requireSuccess: z23.boolean().optional()
835
+ requireSuccess: z24.boolean().optional()
674
836
  });
675
- var BuildPassedConfigSchema = z23.strictObject({
837
+ var BuildPassedConfigSchema = z24.strictObject({
676
838
  /** Allowlisted command only (default at runtime: "yarn build") */
677
839
  command: BuildPassedCommandStringSchema.optional(),
678
840
  /** Expected exit code (default: 0) */
679
- expectedExitCode: z23.number().int().optional()
841
+ expectedExitCode: z24.number().int().optional()
680
842
  });
681
- var TimeConfigSchema = z23.strictObject({
843
+ var TimeConfigSchema = z24.strictObject({
682
844
  /** Maximum allowed duration in milliseconds */
683
- maxDurationMs: z23.number().int().positive()
845
+ maxDurationMs: z24.number().int().positive()
684
846
  });
685
- var LlmJudgeConfigSchema = z23.object({
847
+ var LlmJudgeConfigSchema = z24.object({
686
848
  /**
687
849
  * Prompt template with placeholders:
688
850
  * - {{output}}: agent's final output
@@ -693,65 +855,65 @@ var LlmJudgeConfigSchema = z23.object({
693
855
  * - {{trace}}: step-by-step trace of tool calls
694
856
  * - Custom parameters defined in the parameters array
695
857
  */
696
- prompt: z23.string().min(1),
858
+ prompt: z24.string().min(1),
697
859
  /** Minimum score to pass (0-10, default 7) */
698
- minScore: z23.number().int().min(0).max(10).optional(),
860
+ minScore: z24.number().int().min(0).max(10).optional(),
699
861
  /** Model for the judge (e.g. claude-3-5-haiku-20241022) */
700
- model: z23.string().optional(),
862
+ model: z24.string().optional(),
701
863
  /** Max output tokens */
702
- maxTokens: z23.number().int().optional(),
864
+ maxTokens: z24.number().int().optional(),
703
865
  /** Temperature (0-1) */
704
- temperature: z23.number().min(0).max(1).optional(),
866
+ temperature: z24.number().min(0).max(1).optional(),
705
867
  /** User-defined parameters for this assertion */
706
- parameters: z23.array(AssertionParameterSchema).optional()
868
+ parameters: z24.array(AssertionParameterSchema).optional()
707
869
  });
708
- var ApiCallConfigSchema = z23.strictObject({
870
+ var ApiCallConfigSchema = z24.strictObject({
709
871
  /** URL to call */
710
- url: z23.string().min(1),
872
+ url: z24.string().min(1),
711
873
  /** HTTP method (default GET) */
712
- method: z23.enum(["GET", "POST"]).optional(),
874
+ method: z24.enum(["GET", "POST"]).optional(),
713
875
  /** Request body (JSON string, for POST requests) */
714
- requestBody: z23.string().optional(),
876
+ requestBody: z24.string().optional(),
715
877
  /** Expected JSON response to validate against (subset match — extra fields in actual are OK) */
716
- expectedResponse: z23.string().min(1),
878
+ expectedResponse: z24.string().min(1),
717
879
  /** Request headers as JSON string of key-value pairs */
718
- requestHeaders: z23.string().optional(),
880
+ requestHeaders: z24.string().optional(),
719
881
  /** Request timeout in milliseconds (default 30000) */
720
- timeoutMs: z23.number().int().positive().optional()
882
+ timeoutMs: z24.number().int().positive().optional()
721
883
  });
722
884
  var AssertionBaseFields = {
723
885
  /** When true, the assertion's pass/fail logic is inverted (NOT operator). */
724
- negate: z23.boolean().optional()
886
+ negate: z24.boolean().optional()
725
887
  };
726
888
  var SkillWasCalledAssertionSchema = SkillWasCalledConfigSchema.extend({
727
- type: z23.literal("skill_was_called"),
889
+ type: z24.literal("skill_was_called"),
728
890
  ...AssertionBaseFields
729
891
  });
730
892
  var ToolCalledWithParamAssertionSchema = ToolCalledWithParamConfigSchema.extend({
731
- type: z23.literal("tool_called_with_param"),
893
+ type: z24.literal("tool_called_with_param"),
732
894
  ...AssertionBaseFields
733
895
  });
734
896
  var BuildPassedAssertionSchema = BuildPassedConfigSchema.extend({
735
- type: z23.literal("build_passed"),
897
+ type: z24.literal("build_passed"),
736
898
  ...AssertionBaseFields
737
899
  });
738
900
  var CostAssertionSchema = CostConfigSchema.extend({
739
- type: z23.literal("cost"),
901
+ type: z24.literal("cost"),
740
902
  ...AssertionBaseFields
741
903
  });
742
904
  var LlmJudgeAssertionSchema = LlmJudgeConfigSchema.extend({
743
- type: z23.literal("llm_judge"),
905
+ type: z24.literal("llm_judge"),
744
906
  ...AssertionBaseFields
745
907
  });
746
908
  var ApiCallAssertionSchema = ApiCallConfigSchema.extend({
747
- type: z23.literal("api_call"),
909
+ type: z24.literal("api_call"),
748
910
  ...AssertionBaseFields
749
911
  });
750
912
  var TimeAssertionSchema = TimeConfigSchema.extend({
751
- type: z23.literal("time_limit"),
913
+ type: z24.literal("time_limit"),
752
914
  ...AssertionBaseFields
753
915
  });
754
- var AssertionSchema = z23.union([
916
+ var AssertionSchema = z24.union([
755
917
  SkillWasCalledAssertionSchema,
756
918
  ToolCalledWithParamAssertionSchema,
757
919
  BuildPassedAssertionSchema,
@@ -760,7 +922,7 @@ var AssertionSchema = z23.union([
760
922
  LlmJudgeAssertionSchema,
761
923
  ApiCallAssertionSchema
762
924
  ]);
763
- var AssertionConfigSchema = z23.union([
925
+ var AssertionConfigSchema = z24.union([
764
926
  LlmJudgeConfigSchema,
765
927
  // requires prompt - check first
766
928
  SkillWasCalledConfigSchema,
@@ -775,7 +937,7 @@ var AssertionConfigSchema = z23.union([
775
937
  // requires maxCostUsd, uses strictObject
776
938
  BuildPassedConfigSchema,
777
939
  // all optional, uses strictObject to reject unknown keys
778
- z23.object({})
940
+ z24.object({})
779
941
  // fallback empty config
780
942
  ]);
781
943
  function validateAssertionConfig(type, config) {
@@ -1021,35 +1183,35 @@ function getSystemAssertion(id) {
1021
1183
 
1022
1184
  // src/scenario/test-scenario.ts
1023
1185
  var MAX_IMAGE_BASE64_LENGTH = 4 * Math.ceil(2 * 1024 * 1024 / 3);
1024
- var TriggerPromptImageSchema = z24.object({
1186
+ var TriggerPromptImageSchema = z25.object({
1025
1187
  /** Base64-encoded image data (no data URL prefix) */
1026
- base64: z24.string().max(MAX_IMAGE_BASE64_LENGTH, "Image exceeds 2 MB size limit"),
1188
+ base64: z25.string().max(MAX_IMAGE_BASE64_LENGTH, "Image exceeds 2 MB size limit"),
1027
1189
  /** MIME type of the image */
1028
- mediaType: z24.enum(["image/jpeg", "image/png", "image/gif", "image/webp"]),
1190
+ mediaType: z25.enum(["image/jpeg", "image/png", "image/gif", "image/webp"]),
1029
1191
  /** Original filename of the image */
1030
- name: z24.string()
1192
+ name: z25.string()
1031
1193
  });
1032
- var ExpectedFileSchema = z24.object({
1194
+ var ExpectedFileSchema = z25.object({
1033
1195
  /** Relative path where the file should be created */
1034
- path: z24.string(),
1196
+ path: z25.string(),
1035
1197
  /** Optional expected content */
1036
- content: z24.string().optional()
1198
+ content: z25.string().optional()
1037
1199
  });
1038
1200
  var TestScenarioSchema = TenantEntitySchema.extend({
1039
1201
  /** The prompt sent to the agent to trigger the task */
1040
- triggerPrompt: z24.string().min(10),
1202
+ triggerPrompt: z25.string().min(10),
1041
1203
  /** ID of the template to use for this scenario (null = no template) */
1042
- templateId: z24.string().nullish(),
1204
+ templateId: z25.string().nullish(),
1043
1205
  /** Inline assertions to evaluate for this scenario (legacy) */
1044
- assertions: z24.array(AssertionSchema).optional(),
1206
+ assertions: z25.array(AssertionSchema).optional(),
1045
1207
  /** IDs of saved assertions to evaluate (from assertions table) - legacy, use assertionLinks */
1046
- assertionIds: z24.array(z24.string()).optional(),
1208
+ assertionIds: z25.array(z25.string()).optional(),
1047
1209
  /** Linked assertions with per-scenario parameter values */
1048
- assertionLinks: z24.array(ScenarioAssertionLinkSchema).optional(),
1210
+ assertionLinks: z25.array(ScenarioAssertionLinkSchema).optional(),
1049
1211
  /** Tags for categorisation and filtering */
1050
- tags: z24.array(z24.string()).optional(),
1212
+ tags: z25.array(z25.string()).optional(),
1051
1213
  /** Base64-encoded images attached to the trigger prompt (max 3) */
1052
- triggerPromptImages: z24.array(TriggerPromptImageSchema).max(3).optional()
1214
+ triggerPromptImages: z25.array(TriggerPromptImageSchema).max(3).optional()
1053
1215
  });
1054
1216
  function validateBuildPassedParamsInAssertionLinks(links, ctx) {
1055
1217
  if (!links) return;
@@ -1060,7 +1222,7 @@ function validateBuildPassedParamsInAssertionLinks(links, ctx) {
1060
1222
  if (cmd === void 0 || cmd === null) continue;
1061
1223
  if (typeof cmd !== "string") {
1062
1224
  ctx.addIssue({
1063
- code: z24.ZodIssueCode.custom,
1225
+ code: z25.ZodIssueCode.custom,
1064
1226
  message: "build_passed command must be a string",
1065
1227
  path: ["assertionLinks", i, "params", "command"]
1066
1228
  });
@@ -1068,7 +1230,7 @@ function validateBuildPassedParamsInAssertionLinks(links, ctx) {
1068
1230
  }
1069
1231
  if (!isAllowedBuildCommandString(cmd)) {
1070
1232
  ctx.addIssue({
1071
- code: z24.ZodIssueCode.custom,
1233
+ code: z25.ZodIssueCode.custom,
1072
1234
  message: "Invalid build_passed command. Allowed: yarn build, npm run build, pnpm run build, pnpm build",
1073
1235
  path: ["assertionLinks", i, "params", "command"]
1074
1236
  });
@@ -1091,19 +1253,19 @@ var UpdateTestScenarioInputSchema = TestScenarioCreateBaseSchema.partial().super
1091
1253
  });
1092
1254
 
1093
1255
  // src/scenario/batch-import.ts
1094
- import { z as z25 } from "zod";
1256
+ import { z as z26 } from "zod";
1095
1257
  var UUID_REGEX = /^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$/i;
1096
- var BatchAssertionLinkSchema = z25.union([
1097
- z25.string().min(1),
1258
+ var BatchAssertionLinkSchema = z26.union([
1259
+ z26.string().min(1),
1098
1260
  ScenarioAssertionLinkSchema
1099
1261
  ]);
1100
- var BatchScenarioEntrySchema = z25.object({
1101
- name: z25.string().min(1, "name: Required"),
1102
- description: z25.string().optional().default(""),
1103
- triggerPrompt: z25.string().min(10, "triggerPrompt: Must be at least 10 characters"),
1104
- templateId: z25.string().nullish(),
1105
- tags: z25.array(z25.string()).optional(),
1106
- assertionLinks: z25.array(BatchAssertionLinkSchema).optional()
1262
+ var BatchScenarioEntrySchema = z26.object({
1263
+ name: z26.string().min(1, "name: Required"),
1264
+ description: z26.string().optional().default(""),
1265
+ triggerPrompt: z26.string().min(10, "triggerPrompt: Must be at least 10 characters"),
1266
+ templateId: z26.string().nullish(),
1267
+ tags: z26.array(z26.string()).optional(),
1268
+ assertionLinks: z26.array(BatchAssertionLinkSchema).optional()
1107
1269
  }).superRefine((data, ctx) => {
1108
1270
  if (!data.assertionLinks) return;
1109
1271
  const objectLinks = data.assertionLinks.filter(
@@ -1113,8 +1275,8 @@ var BatchScenarioEntrySchema = z25.object({
1113
1275
  validateBuildPassedParamsInAssertionLinks(objectLinks, ctx);
1114
1276
  }
1115
1277
  });
1116
- var BatchImportPayloadSchema = z25.object({
1117
- scenarios: z25.array(BatchScenarioEntrySchema).min(1, "scenarios array must contain at least one entry").max(100, "Maximum 100 scenarios per upload")
1278
+ var BatchImportPayloadSchema = z26.object({
1279
+ scenarios: z26.array(BatchScenarioEntrySchema).min(1, "scenarios array must contain at least one entry").max(100, "Maximum 100 scenarios per upload")
1118
1280
  });
1119
1281
  var BATCH_IMPORT_LIMITS = {
1120
1282
  MAX_SCENARIOS: 100,
@@ -1136,29 +1298,29 @@ function normalizeBatchAssertionLink(link) {
1136
1298
  }
1137
1299
  return link;
1138
1300
  }
1139
- var BatchResultItemSchema = z25.object({
1140
- index: z25.number(),
1141
- name: z25.string(),
1142
- status: z25.enum(["valid", "invalid"]),
1143
- id: z25.string().nullable().optional(),
1144
- errors: z25.array(z25.string()).optional()
1145
- });
1146
- var BatchSummarySchema = z25.object({
1147
- total: z25.number(),
1148
- valid: z25.number(),
1149
- invalid: z25.number(),
1150
- created: z25.number()
1151
- });
1152
- var BatchImportResponseSchema = z25.object({
1301
+ var BatchResultItemSchema = z26.object({
1302
+ index: z26.number(),
1303
+ name: z26.string(),
1304
+ status: z26.enum(["valid", "invalid"]),
1305
+ id: z26.string().nullable().optional(),
1306
+ errors: z26.array(z26.string()).optional()
1307
+ });
1308
+ var BatchSummarySchema = z26.object({
1309
+ total: z26.number(),
1310
+ valid: z26.number(),
1311
+ invalid: z26.number(),
1312
+ created: z26.number()
1313
+ });
1314
+ var BatchImportResponseSchema = z26.object({
1153
1315
  summary: BatchSummarySchema,
1154
- results: z25.array(BatchResultItemSchema)
1316
+ results: z26.array(BatchResultItemSchema)
1155
1317
  });
1156
1318
 
1157
1319
  // src/suite/test-suite.ts
1158
- import { z as z26 } from "zod";
1320
+ import { z as z27 } from "zod";
1159
1321
  var TestSuiteSchema = TenantEntitySchema.extend({
1160
1322
  /** IDs of test scenarios in this suite */
1161
- scenarioIds: z26.array(z26.string())
1323
+ scenarioIds: z27.array(z27.string())
1162
1324
  });
1163
1325
  var CreateTestSuiteInputSchema = TestSuiteSchema.omit({
1164
1326
  id: true,
@@ -1169,21 +1331,21 @@ var CreateTestSuiteInputSchema = TestSuiteSchema.omit({
1169
1331
  var UpdateTestSuiteInputSchema = CreateTestSuiteInputSchema.partial();
1170
1332
 
1171
1333
  // src/evaluation/metrics.ts
1172
- import { z as z27 } from "zod";
1173
- var TokenUsageSchema = z27.object({
1174
- prompt: z27.number(),
1175
- completion: z27.number(),
1176
- total: z27.number()
1177
- });
1178
- var EvalMetricsSchema = z27.object({
1179
- totalAssertions: z27.number(),
1180
- passed: z27.number(),
1181
- failed: z27.number(),
1182
- skipped: z27.number(),
1183
- errors: z27.number(),
1184
- passRate: z27.number(),
1185
- avgDuration: z27.number(),
1186
- totalDuration: z27.number()
1334
+ import { z as z28 } from "zod";
1335
+ var TokenUsageSchema = z28.object({
1336
+ prompt: z28.number(),
1337
+ completion: z28.number(),
1338
+ total: z28.number()
1339
+ });
1340
+ var EvalMetricsSchema = z28.object({
1341
+ totalAssertions: z28.number(),
1342
+ passed: z28.number(),
1343
+ failed: z28.number(),
1344
+ skipped: z28.number(),
1345
+ errors: z28.number(),
1346
+ passRate: z28.number(),
1347
+ avgDuration: z28.number(),
1348
+ totalDuration: z28.number()
1187
1349
  });
1188
1350
  var EvalStatus = /* @__PURE__ */ ((EvalStatus2) => {
1189
1351
  EvalStatus2["PENDING"] = "pending";
@@ -1193,7 +1355,7 @@ var EvalStatus = /* @__PURE__ */ ((EvalStatus2) => {
1193
1355
  EvalStatus2["CANCELLED"] = "cancelled";
1194
1356
  return EvalStatus2;
1195
1357
  })(EvalStatus || {});
1196
- var EvalStatusSchema = z27.enum(EvalStatus);
1358
+ var EvalStatusSchema = z28.enum(EvalStatus);
1197
1359
  var LLMStepType = /* @__PURE__ */ ((LLMStepType2) => {
1198
1360
  LLMStepType2["COMPLETION"] = "completion";
1199
1361
  LLMStepType2["TOOL_USE"] = "tool_use";
@@ -1201,54 +1363,54 @@ var LLMStepType = /* @__PURE__ */ ((LLMStepType2) => {
1201
1363
  LLMStepType2["THINKING"] = "thinking";
1202
1364
  return LLMStepType2;
1203
1365
  })(LLMStepType || {});
1204
- var LLMTraceStepSchema = z27.object({
1205
- id: z27.string(),
1206
- stepNumber: z27.number(),
1207
- type: z27.enum(LLMStepType),
1208
- model: z27.string(),
1209
- provider: z27.string(),
1210
- startedAt: z27.string(),
1211
- durationMs: z27.number(),
1366
+ var LLMTraceStepSchema = z28.object({
1367
+ id: z28.string(),
1368
+ stepNumber: z28.number(),
1369
+ type: z28.enum(LLMStepType),
1370
+ model: z28.string(),
1371
+ provider: z28.string(),
1372
+ startedAt: z28.string(),
1373
+ durationMs: z28.number(),
1212
1374
  tokenUsage: TokenUsageSchema,
1213
- costUsd: z27.number(),
1214
- toolName: z27.string().optional(),
1215
- toolArguments: z27.string().optional(),
1216
- inputPreview: z27.string().optional(),
1217
- outputPreview: z27.string().optional(),
1218
- success: z27.boolean(),
1219
- error: z27.string().optional(),
1220
- turnIndex: z27.number().optional()
1221
- });
1222
- var LLMBreakdownStatsSchema = z27.object({
1223
- count: z27.number(),
1224
- durationMs: z27.number(),
1225
- tokens: z27.number(),
1226
- costUsd: z27.number()
1227
- });
1228
- var LLMTraceSummarySchema = z27.object({
1229
- totalSteps: z27.number(),
1230
- totalTurns: z27.number().optional(),
1231
- totalDurationMs: z27.number(),
1375
+ costUsd: z28.number(),
1376
+ toolName: z28.string().optional(),
1377
+ toolArguments: z28.string().optional(),
1378
+ inputPreview: z28.string().optional(),
1379
+ outputPreview: z28.string().optional(),
1380
+ success: z28.boolean(),
1381
+ error: z28.string().optional(),
1382
+ turnIndex: z28.number().optional()
1383
+ });
1384
+ var LLMBreakdownStatsSchema = z28.object({
1385
+ count: z28.number(),
1386
+ durationMs: z28.number(),
1387
+ tokens: z28.number(),
1388
+ costUsd: z28.number()
1389
+ });
1390
+ var LLMTraceSummarySchema = z28.object({
1391
+ totalSteps: z28.number(),
1392
+ totalTurns: z28.number().optional(),
1393
+ totalDurationMs: z28.number(),
1232
1394
  totalTokens: TokenUsageSchema,
1233
- totalCostUsd: z27.number(),
1234
- stepTypeBreakdown: z27.record(z27.string(), LLMBreakdownStatsSchema).optional(),
1235
- modelBreakdown: z27.record(z27.string(), LLMBreakdownStatsSchema),
1236
- modelsUsed: z27.array(z27.string())
1237
- });
1238
- var LLMTraceSchema = z27.object({
1239
- id: z27.string(),
1240
- steps: z27.array(LLMTraceStepSchema),
1395
+ totalCostUsd: z28.number(),
1396
+ stepTypeBreakdown: z28.record(z28.string(), LLMBreakdownStatsSchema).optional(),
1397
+ modelBreakdown: z28.record(z28.string(), LLMBreakdownStatsSchema),
1398
+ modelsUsed: z28.array(z28.string())
1399
+ });
1400
+ var LLMTraceSchema = z28.object({
1401
+ id: z28.string(),
1402
+ steps: z28.array(LLMTraceStepSchema),
1241
1403
  summary: LLMTraceSummarySchema
1242
1404
  });
1243
1405
 
1244
1406
  // src/evaluation/eval-result.ts
1245
- import { z as z31 } from "zod";
1407
+ import { z as z32 } from "zod";
1246
1408
 
1247
1409
  // src/evaluation/eval-run.ts
1248
- import { z as z29 } from "zod";
1410
+ import { z as z30 } from "zod";
1249
1411
 
1250
1412
  // src/evaluation/live-trace.ts
1251
- import { z as z28 } from "zod";
1413
+ import { z as z29 } from "zod";
1252
1414
  var LiveTraceEventType = /* @__PURE__ */ ((LiveTraceEventType2) => {
1253
1415
  LiveTraceEventType2["THINKING"] = "thinking";
1254
1416
  LiveTraceEventType2["TOOL_USE"] = "tool_use";
@@ -1262,37 +1424,37 @@ var LiveTraceEventType = /* @__PURE__ */ ((LiveTraceEventType2) => {
1262
1424
  LiveTraceEventType2["USER"] = "user";
1263
1425
  return LiveTraceEventType2;
1264
1426
  })(LiveTraceEventType || {});
1265
- var LiveTraceEventSchema = z28.object({
1427
+ var LiveTraceEventSchema = z29.object({
1266
1428
  /** The evaluation run ID */
1267
- evalRunId: z28.string(),
1429
+ evalRunId: z29.string(),
1268
1430
  /** The scenario ID being executed */
1269
- scenarioId: z28.string(),
1431
+ scenarioId: z29.string(),
1270
1432
  /** The scenario name for display */
1271
- scenarioName: z28.string(),
1433
+ scenarioName: z29.string(),
1272
1434
  /** The target ID (skill, agent, etc.) */
1273
- targetId: z28.string(),
1435
+ targetId: z29.string(),
1274
1436
  /** The target name for display */
1275
- targetName: z28.string(),
1437
+ targetName: z29.string(),
1276
1438
  /** Step number in the current scenario execution */
1277
- stepNumber: z28.number(),
1439
+ stepNumber: z29.number(),
1278
1440
  /** Type of trace event */
1279
- type: z28.enum(LiveTraceEventType),
1441
+ type: z29.enum(LiveTraceEventType),
1280
1442
  /** Tool name if this is a tool_use event */
1281
- toolName: z28.string().optional(),
1443
+ toolName: z29.string().optional(),
1282
1444
  /** Tool arguments preview (truncated JSON) */
1283
- toolArgs: z28.string().optional(),
1445
+ toolArgs: z29.string().optional(),
1284
1446
  /** Output preview (truncated text) */
1285
- outputPreview: z28.string().optional(),
1447
+ outputPreview: z29.string().optional(),
1286
1448
  /** File path for file operations */
1287
- filePath: z28.string().optional(),
1449
+ filePath: z29.string().optional(),
1288
1450
  /** Elapsed time in milliseconds for progress events */
1289
- elapsedMs: z28.number().optional(),
1451
+ elapsedMs: z29.number().optional(),
1290
1452
  /** Thinking/reasoning text from Claude */
1291
- thinking: z28.string().optional(),
1453
+ thinking: z29.string().optional(),
1292
1454
  /** Timestamp when this event occurred */
1293
- timestamp: z28.string(),
1455
+ timestamp: z29.string(),
1294
1456
  /** Whether this is the final event for this scenario */
1295
- isComplete: z28.boolean()
1457
+ isComplete: z29.boolean()
1296
1458
  });
1297
1459
  var TRACE_EVENT_PREFIX = "TRACE_EVENT:";
1298
1460
  function parseTraceEventLine(line) {
@@ -1321,40 +1483,40 @@ var TriggerType = /* @__PURE__ */ ((TriggerType2) => {
1321
1483
  TriggerType2["SCHEDULED"] = "SCHEDULED";
1322
1484
  return TriggerType2;
1323
1485
  })(TriggerType || {});
1324
- var TriggerMetadataSchema = z29.object({
1325
- version: z29.string().optional(),
1326
- resourceUpdated: z29.array(z29.string()).optional(),
1327
- scheduleId: z29.string().optional()
1486
+ var TriggerMetadataSchema = z30.object({
1487
+ version: z30.string().optional(),
1488
+ resourceUpdated: z30.array(z30.string()).optional(),
1489
+ scheduleId: z30.string().optional()
1328
1490
  });
1329
- var TriggerSchema = z29.object({
1330
- id: z29.string(),
1491
+ var TriggerSchema = z30.object({
1492
+ id: z30.string(),
1331
1493
  metadata: TriggerMetadataSchema.optional(),
1332
- type: z29.nativeEnum(TriggerType)
1494
+ type: z30.nativeEnum(TriggerType)
1333
1495
  });
1334
- var DiffLineTypeSchema = z29.enum(["added", "removed", "unchanged"]);
1335
- var DiffLineSchema = z29.object({
1496
+ var DiffLineTypeSchema = z30.enum(["added", "removed", "unchanged"]);
1497
+ var DiffLineSchema = z30.object({
1336
1498
  type: DiffLineTypeSchema,
1337
- content: z29.string(),
1338
- lineNumber: z29.number()
1339
- });
1340
- var DiffContentSchema = z29.object({
1341
- path: z29.string(),
1342
- expected: z29.string(),
1343
- actual: z29.string(),
1344
- diffLines: z29.array(DiffLineSchema),
1345
- renamedFrom: z29.string().optional(),
1499
+ content: z30.string(),
1500
+ lineNumber: z30.number()
1501
+ });
1502
+ var DiffContentSchema = z30.object({
1503
+ path: z30.string(),
1504
+ expected: z30.string(),
1505
+ actual: z30.string(),
1506
+ diffLines: z30.array(DiffLineSchema),
1507
+ renamedFrom: z30.string().optional(),
1346
1508
  /** Whether this file is an infrastructure/config file (e.g. .claude/settings.json, .mcp.json) */
1347
- isInfrastructure: z29.boolean().optional()
1509
+ isInfrastructure: z30.boolean().optional()
1348
1510
  });
1349
- var CommandExecutionSchema = z29.object({
1350
- command: z29.string(),
1351
- exitCode: z29.number(),
1352
- output: z29.string().optional(),
1353
- duration: z29.number()
1511
+ var CommandExecutionSchema = z30.object({
1512
+ command: z30.string(),
1513
+ exitCode: z30.number(),
1514
+ output: z30.string().optional(),
1515
+ duration: z30.number()
1354
1516
  });
1355
- var FileModificationSchema = z29.object({
1356
- path: z29.string(),
1357
- action: z29.enum(["created", "modified", "deleted"])
1517
+ var FileModificationSchema = z30.object({
1518
+ path: z30.string(),
1519
+ action: z30.enum(["created", "modified", "deleted"])
1358
1520
  });
1359
1521
  var TemplateFileStatus = /* @__PURE__ */ ((TemplateFileStatus2) => {
1360
1522
  TemplateFileStatus2["NEW"] = "new";
@@ -1362,62 +1524,58 @@ var TemplateFileStatus = /* @__PURE__ */ ((TemplateFileStatus2) => {
1362
1524
  TemplateFileStatus2["UNCHANGED"] = "unchanged";
1363
1525
  return TemplateFileStatus2;
1364
1526
  })(TemplateFileStatus || {});
1365
- var TemplateFileSchema = z29.object({
1527
+ var TemplateFileSchema = z30.object({
1366
1528
  /** Relative path within the template */
1367
- path: z29.string(),
1529
+ path: z30.string(),
1368
1530
  /** Full file content after execution */
1369
- content: z29.string(),
1531
+ content: z30.string(),
1370
1532
  /** File status (new, modified, unchanged) */
1371
- status: z29.enum(["new", "modified", "unchanged"]),
1533
+ status: z30.enum(["new", "modified", "unchanged"]),
1372
1534
  /** Whether this file is an infrastructure/config file (e.g. .claude/settings.json, .mcp.json) */
1373
- isInfrastructure: z29.boolean().optional()
1535
+ isInfrastructure: z30.boolean().optional()
1374
1536
  });
1375
- var ApiCallSchema = z29.object({
1376
- endpoint: z29.string(),
1377
- tokensUsed: z29.number(),
1378
- duration: z29.number()
1537
+ var ApiCallSchema = z30.object({
1538
+ endpoint: z30.string(),
1539
+ tokensUsed: z30.number(),
1540
+ duration: z30.number()
1379
1541
  });
1380
- var ExecutionTraceSchema = z29.object({
1381
- commands: z29.array(CommandExecutionSchema),
1382
- filesModified: z29.array(FileModificationSchema),
1383
- apiCalls: z29.array(ApiCallSchema),
1384
- totalDuration: z29.number()
1542
+ var ExecutionTraceSchema = z30.object({
1543
+ commands: z30.array(CommandExecutionSchema),
1544
+ filesModified: z30.array(FileModificationSchema),
1545
+ apiCalls: z30.array(ApiCallSchema),
1546
+ totalDuration: z30.number()
1385
1547
  });
1386
- var RunAnalysisFindingSchema = z29.object({
1387
- category: z29.enum([
1548
+ var RunAnalysisFindingSchema = z30.object({
1549
+ category: z30.enum([
1388
1550
  "failure_pattern",
1389
1551
  "cost_waste",
1390
1552
  "flakiness",
1391
1553
  "inefficiency",
1392
1554
  "positive"
1393
1555
  ]),
1394
- severity: z29.enum(["high", "medium", "low"]),
1395
- description: z29.string(),
1396
- affectedScenarios: z29.array(z29.string()),
1397
- recommendation: z29.string().optional()
1556
+ severity: z30.enum(["high", "medium", "low"]),
1557
+ description: z30.string(),
1558
+ affectedScenarios: z30.array(z30.string()),
1559
+ recommendation: z30.string().optional()
1398
1560
  });
1399
- var RunAnalysisSchema = z29.object({
1400
- generatedAt: z29.string(),
1401
- summary: z29.string(),
1402
- findings: z29.array(RunAnalysisFindingSchema)
1561
+ var RunAnalysisSchema = z30.object({
1562
+ generatedAt: z30.string(),
1563
+ summary: z30.string(),
1564
+ findings: z30.array(RunAnalysisFindingSchema)
1403
1565
  });
1404
1566
  var EvalRunSchema = TenantEntitySchema.extend({
1405
1567
  /** Agent ID for this run */
1406
- agentId: z29.string().optional(),
1568
+ agentId: z30.string().optional(),
1407
1569
  /** Preset ID that originated this run (optional) */
1408
- presetId: z29.string().optional(),
1409
- /** Skill IDs for this run */
1410
- skillIds: z29.array(z29.string()).optional(),
1411
- /** Map of skillId to skillVersionId for this run */
1412
- skillVersions: z29.record(z29.string(), z29.string()).optional(),
1570
+ presetId: z30.string().optional(),
1413
1571
  /** Scenario IDs to run (always present — resolved server-side from tags when needed) */
1414
- scenarioIds: z29.array(z29.string()),
1572
+ scenarioIds: z30.array(z30.string()),
1415
1573
  /** Current status */
1416
1574
  status: EvalStatusSchema,
1417
1575
  /** Progress percentage (0-100) */
1418
- progress: z29.number(),
1576
+ progress: z30.number(),
1419
1577
  /** Results for each scenario/target combination (lazy to break eval-result ↔ eval-run cycle) */
1420
- results: z29.array(z29.lazy(() => EvalRunResultSchema)),
1578
+ results: z30.array(z30.lazy(() => EvalRunResultSchema)),
1421
1579
  /** Aggregated metrics across all results */
1422
1580
  aggregateMetrics: EvalMetricsSchema,
1423
1581
  /** Aggregated LLM trace summary */
@@ -1425,41 +1583,39 @@ var EvalRunSchema = TenantEntitySchema.extend({
1425
1583
  /** What triggered this run */
1426
1584
  trigger: TriggerSchema.optional(),
1427
1585
  /** When the run started (set when evaluation is triggered) */
1428
- startedAt: z29.string().optional(),
1586
+ startedAt: z30.string().optional(),
1429
1587
  /** When the run completed */
1430
- completedAt: z29.string().optional(),
1588
+ completedAt: z30.string().optional(),
1431
1589
  /** Live trace events captured during execution (for playback on results page) */
1432
- liveTraceEvents: z29.array(LiveTraceEventSchema).optional(),
1590
+ liveTraceEvents: z30.array(LiveTraceEventSchema).optional(),
1433
1591
  /** Remote job ID for tracking execution in Dev Machines */
1434
- jobId: z29.string().optional(),
1592
+ jobId: z30.string().optional(),
1435
1593
  /** Remote job status from the Dev Machine API (PENDING, RUNNING, COMPLETED, FAILED, CANCELLED) */
1436
- jobStatus: z29.string().optional(),
1594
+ jobStatus: z30.string().optional(),
1437
1595
  /** Remote job error message if the job failed */
1438
- jobError: z29.string().optional(),
1596
+ jobError: z30.string().optional(),
1439
1597
  /** Timestamp of the last job status check */
1440
- jobStatusCheckedAt: z29.string().optional(),
1441
- /** MCP server IDs to enable for this run (optional) */
1442
- mcpIds: z29.array(z29.string()).optional(),
1443
- /** Sub-agent IDs to enable for this run (optional) */
1444
- subAgentIds: z29.array(z29.string()).optional(),
1445
- /** Rule IDs to enable for this run (optional) */
1446
- ruleIds: z29.array(z29.string()).optional(),
1598
+ jobStatusCheckedAt: z30.string().optional(),
1599
+ /** Unified capability IDs */
1600
+ capabilityIds: z30.array(z30.string()).optional(),
1601
+ /** Map of capabilityId to capabilityVersionId for version pinning */
1602
+ capabilityVersions: z30.record(z30.string(), z30.string()).optional(),
1447
1603
  /** Tags used to select scenarios for this run (for traceability) */
1448
- tags: z29.array(z29.string()).optional(),
1604
+ tags: z30.array(z30.string()).optional(),
1449
1605
  /** How many times each scenario is executed within this eval run. Default: 1. Max: 20. */
1450
- runsPerScenario: z29.number().int().min(1).max(20).optional(),
1606
+ runsPerScenario: z30.number().int().min(1).max(20).optional(),
1451
1607
  /** Snapshot of agent configuration captured at run creation time */
1452
- agentSnapshot: z29.object({
1453
- name: z29.string().optional(),
1608
+ agentSnapshot: z30.object({
1609
+ name: z30.string().optional(),
1454
1610
  agentType: AgentTypeSchema.optional(),
1455
1611
  runCommand: AgentRunCommandSchema.optional(),
1456
- systemPrompt: z29.string().nullable().optional(),
1612
+ systemPrompt: z30.string().nullable().optional(),
1457
1613
  modelConfig: ModelConfigSchema.optional()
1458
1614
  }).optional(),
1459
1615
  /** UUID linking all runs in a comparison group */
1460
- comparisonGroupId: z29.string().optional(),
1616
+ comparisonGroupId: z30.string().optional(),
1461
1617
  /** Human-readable label for this variant (e.g., "MCP: Wix Stores") */
1462
- comparisonLabel: z29.string().optional(),
1618
+ comparisonLabel: z30.string().optional(),
1463
1619
  /** LLM-generated analysis of the completed run */
1464
1620
  runAnalysis: RunAnalysisSchema.optional()
1465
1621
  });
@@ -1477,60 +1633,60 @@ var CreateEvalRunInputSchema = EvalRunSchema.omit({
1477
1633
  agentSnapshot: true
1478
1634
  }).extend({
1479
1635
  /** Optional on input — backend resolves from tags when not provided */
1480
- scenarioIds: z29.array(z29.string()).optional()
1636
+ scenarioIds: z30.array(z30.string()).optional()
1481
1637
  }).refine(
1482
1638
  (data) => data.scenarioIds && data.scenarioIds.length > 0 || data.tags && data.tags.length > 0,
1483
1639
  { message: "Either scenarioIds or tags must be provided" }
1484
1640
  );
1485
- var EvaluationProgressSchema = z29.object({
1486
- runId: z29.string(),
1487
- targetId: z29.string(),
1488
- totalScenarios: z29.number(),
1489
- completedScenarios: z29.number(),
1490
- scenarioProgress: z29.array(
1491
- z29.object({
1492
- scenarioId: z29.string(),
1493
- currentStep: z29.string(),
1494
- error: z29.string().optional()
1641
+ var EvaluationProgressSchema = z30.object({
1642
+ runId: z30.string(),
1643
+ targetId: z30.string(),
1644
+ totalScenarios: z30.number(),
1645
+ completedScenarios: z30.number(),
1646
+ scenarioProgress: z30.array(
1647
+ z30.object({
1648
+ scenarioId: z30.string(),
1649
+ currentStep: z30.string(),
1650
+ error: z30.string().optional()
1495
1651
  })
1496
1652
  ),
1497
- createdAt: z29.number()
1498
- });
1499
- var EvaluationLogSchema = z29.object({
1500
- runId: z29.string(),
1501
- scenarioId: z29.string(),
1502
- log: z29.object({
1503
- level: z29.enum(["info", "error", "debug"]),
1504
- message: z29.string().optional(),
1505
- args: z29.array(z29.any()).optional(),
1506
- error: z29.string().optional()
1653
+ createdAt: z30.number()
1654
+ });
1655
+ var EvaluationLogSchema = z30.object({
1656
+ runId: z30.string(),
1657
+ scenarioId: z30.string(),
1658
+ log: z30.object({
1659
+ level: z30.enum(["info", "error", "debug"]),
1660
+ message: z30.string().optional(),
1661
+ args: z30.array(z30.any()).optional(),
1662
+ error: z30.string().optional()
1507
1663
  })
1508
1664
  });
1509
1665
  var LLM_TIMEOUT = 12e4;
1510
1666
 
1511
1667
  // src/evaluation/conversation.ts
1512
- import { z as z30 } from "zod";
1513
- var TextBlockSchema = z30.object({
1514
- type: z30.literal("text"),
1515
- text: z30.string()
1516
- });
1517
- var ThinkingBlockSchema = z30.object({
1518
- type: z30.literal("thinking"),
1519
- thinking: z30.string()
1520
- });
1521
- var ToolUseBlockSchema = z30.object({
1522
- type: z30.literal("tool_use"),
1523
- toolName: z30.string(),
1524
- toolId: z30.string(),
1525
- input: z30.unknown()
1526
- });
1527
- var ToolResultBlockSchema = z30.object({
1528
- type: z30.literal("tool_result"),
1529
- toolUseId: z30.string(),
1530
- content: z30.string(),
1531
- isError: z30.boolean().optional()
1532
- });
1533
- var ConversationBlockSchema = z30.discriminatedUnion("type", [
1668
+ import { z as z31 } from "zod";
1669
+ var TextBlockSchema = z31.object({
1670
+ type: z31.literal("text"),
1671
+ text: z31.string()
1672
+ });
1673
+ var ThinkingBlockSchema = z31.object({
1674
+ type: z31.literal("thinking"),
1675
+ thinking: z31.string()
1676
+ });
1677
+ var ToolUseBlockSchema = z31.object({
1678
+ type: z31.literal("tool_use"),
1679
+ toolName: z31.string(),
1680
+ toolId: z31.string(),
1681
+ input: z31.unknown()
1682
+ });
1683
+ var ToolResultBlockSchema = z31.object({
1684
+ type: z31.literal("tool_result"),
1685
+ toolUseId: z31.string(),
1686
+ content: z31.string(),
1687
+ isError: z31.boolean().optional()
1688
+ });
1689
+ var ConversationBlockSchema = z31.discriminatedUnion("type", [
1534
1690
  TextBlockSchema,
1535
1691
  ThinkingBlockSchema,
1536
1692
  ToolUseBlockSchema,
@@ -1541,18 +1697,18 @@ var ConversationMessageRoles = [
1541
1697
  "user",
1542
1698
  "system"
1543
1699
  ];
1544
- var ConversationMessageSchema = z30.object({
1545
- role: z30.enum(ConversationMessageRoles),
1546
- content: z30.array(ConversationBlockSchema),
1547
- timestamp: z30.string()
1700
+ var ConversationMessageSchema = z31.object({
1701
+ role: z31.enum(ConversationMessageRoles),
1702
+ content: z31.array(ConversationBlockSchema),
1703
+ timestamp: z31.string()
1548
1704
  });
1549
- var ScenarioConversationSchema = z30.object({
1550
- id: z30.string(),
1551
- projectId: z30.string(),
1552
- evalRunId: z30.string(),
1553
- resultId: z30.string(),
1554
- messages: z30.array(ConversationMessageSchema),
1555
- createdAt: z30.string()
1705
+ var ScenarioConversationSchema = z31.object({
1706
+ id: z31.string(),
1707
+ projectId: z31.string(),
1708
+ evalRunId: z31.string(),
1709
+ resultId: z31.string(),
1710
+ messages: z31.array(ConversationMessageSchema),
1711
+ createdAt: z31.string()
1556
1712
  });
1557
1713
 
1558
1714
  // src/evaluation/eval-result.ts
@@ -1563,98 +1719,98 @@ var AssertionResultStatus = /* @__PURE__ */ ((AssertionResultStatus2) => {
1563
1719
  AssertionResultStatus2["ERROR"] = "error";
1564
1720
  return AssertionResultStatus2;
1565
1721
  })(AssertionResultStatus || {});
1566
- var AssertionResultSchema = z31.object({
1567
- id: z31.string(),
1568
- assertionId: z31.string(),
1569
- assertionType: z31.string(),
1570
- assertionName: z31.string(),
1571
- status: z31.enum(AssertionResultStatus),
1572
- message: z31.string().optional(),
1573
- expected: z31.string().optional(),
1574
- actual: z31.string().optional(),
1575
- duration: z31.number().optional(),
1576
- details: z31.record(z31.string(), z31.unknown()).optional(),
1577
- llmTraceSteps: z31.array(LLMTraceStepSchema).optional()
1578
- });
1579
- var EvalRunResultSchema = z31.object({
1580
- id: z31.string(),
1581
- targetId: z31.string(),
1582
- targetName: z31.string().optional(),
1722
+ var AssertionResultSchema = z32.object({
1723
+ id: z32.string(),
1724
+ assertionId: z32.string(),
1725
+ assertionType: z32.string(),
1726
+ assertionName: z32.string(),
1727
+ status: z32.enum(AssertionResultStatus),
1728
+ message: z32.string().optional(),
1729
+ expected: z32.string().optional(),
1730
+ actual: z32.string().optional(),
1731
+ duration: z32.number().optional(),
1732
+ details: z32.record(z32.string(), z32.unknown()).optional(),
1733
+ llmTraceSteps: z32.array(LLMTraceStepSchema).optional()
1734
+ });
1735
+ var EvalRunResultSchema = z32.object({
1736
+ id: z32.string(),
1737
+ targetId: z32.string(),
1738
+ targetName: z32.string().optional(),
1583
1739
  /** SkillVersion ID used for this evaluation (for version tracking) */
1584
- skillVersionId: z31.string().optional(),
1740
+ skillVersionId: z32.string().optional(),
1585
1741
  /** SkillVersion semver string (e.g., "1.0.0", "1.2.3") for display */
1586
- skillVersion: z31.string().optional(),
1587
- scenarioId: z31.string(),
1588
- scenarioName: z31.string(),
1742
+ skillVersion: z32.string().optional(),
1743
+ scenarioId: z32.string(),
1744
+ scenarioName: z32.string(),
1589
1745
  /** Snapshot of the trigger prompt used during the run (prevents stale display after edits) */
1590
- triggerPrompt: z31.string().optional(),
1746
+ triggerPrompt: z32.string().optional(),
1591
1747
  modelConfig: ModelConfigSchema.optional(),
1592
- assertionResults: z31.array(AssertionResultSchema),
1748
+ assertionResults: z32.array(AssertionResultSchema),
1593
1749
  metrics: EvalMetricsSchema.optional(),
1594
- passed: z31.number(),
1595
- failed: z31.number(),
1596
- passRate: z31.number(),
1597
- duration: z31.number(),
1598
- outputText: z31.string().optional(),
1599
- files: z31.array(ExpectedFileSchema).optional(),
1600
- fileDiffs: z31.array(DiffContentSchema).optional(),
1750
+ passed: z32.number(),
1751
+ failed: z32.number(),
1752
+ passRate: z32.number(),
1753
+ duration: z32.number(),
1754
+ outputText: z32.string().optional(),
1755
+ files: z32.array(ExpectedFileSchema).optional(),
1756
+ fileDiffs: z32.array(DiffContentSchema).optional(),
1601
1757
  /** Full template files after execution with status indicators */
1602
- templateFiles: z31.array(TemplateFileSchema).optional(),
1603
- startedAt: z31.string().optional(),
1604
- completedAt: z31.string().optional(),
1758
+ templateFiles: z32.array(TemplateFileSchema).optional(),
1759
+ startedAt: z32.string().optional(),
1760
+ completedAt: z32.string().optional(),
1605
1761
  llmTrace: LLMTraceSchema.optional(),
1606
1762
  /** Full conversation messages (only present in transit; stripped before DB storage) */
1607
- conversation: z31.array(ConversationMessageSchema).optional(),
1763
+ conversation: z32.array(ConversationMessageSchema).optional(),
1608
1764
  /** 0-based iteration index when a scenario is run multiple times within a single eval run */
1609
- iterationIndex: z31.number().int().min(0).optional()
1610
- });
1611
- var PromptResultSchema = z31.object({
1612
- text: z31.string(),
1613
- files: z31.array(z31.unknown()).optional(),
1614
- finishReason: z31.string().optional(),
1615
- reasoning: z31.string().optional(),
1616
- reasoningDetails: z31.unknown().optional(),
1617
- toolCalls: z31.array(z31.unknown()).optional(),
1618
- toolResults: z31.array(z31.unknown()).optional(),
1619
- warnings: z31.array(z31.unknown()).optional(),
1620
- sources: z31.array(z31.unknown()).optional(),
1621
- steps: z31.array(z31.unknown()),
1622
- generationTimeMs: z31.number(),
1623
- prompt: z31.string(),
1624
- systemPrompt: z31.string(),
1625
- usage: z31.object({
1626
- totalTokens: z31.number().optional(),
1627
- totalMicrocentsSpent: z31.number().optional()
1765
+ iterationIndex: z32.number().int().min(0).optional()
1766
+ });
1767
+ var PromptResultSchema = z32.object({
1768
+ text: z32.string(),
1769
+ files: z32.array(z32.unknown()).optional(),
1770
+ finishReason: z32.string().optional(),
1771
+ reasoning: z32.string().optional(),
1772
+ reasoningDetails: z32.unknown().optional(),
1773
+ toolCalls: z32.array(z32.unknown()).optional(),
1774
+ toolResults: z32.array(z32.unknown()).optional(),
1775
+ warnings: z32.array(z32.unknown()).optional(),
1776
+ sources: z32.array(z32.unknown()).optional(),
1777
+ steps: z32.array(z32.unknown()),
1778
+ generationTimeMs: z32.number(),
1779
+ prompt: z32.string(),
1780
+ systemPrompt: z32.string(),
1781
+ usage: z32.object({
1782
+ totalTokens: z32.number().optional(),
1783
+ totalMicrocentsSpent: z32.number().optional()
1628
1784
  })
1629
1785
  });
1630
- var EvaluationResultSchema = z31.object({
1631
- id: z31.string(),
1632
- runId: z31.string(),
1633
- timestamp: z31.number(),
1786
+ var EvaluationResultSchema = z32.object({
1787
+ id: z32.string(),
1788
+ runId: z32.string(),
1789
+ timestamp: z32.number(),
1634
1790
  promptResult: PromptResultSchema,
1635
- testResults: z31.array(z31.unknown()),
1636
- tags: z31.array(z31.string()).optional(),
1637
- feedback: z31.string().optional(),
1638
- score: z31.number(),
1639
- suiteId: z31.string().optional()
1640
- });
1641
- var LeanEvaluationResultSchema = z31.object({
1642
- id: z31.string(),
1643
- runId: z31.string(),
1644
- timestamp: z31.number(),
1645
- tags: z31.array(z31.string()).optional(),
1646
- scenarioId: z31.string(),
1647
- scenarioVersion: z31.number().optional(),
1648
- targetId: z31.string(),
1649
- targetVersion: z31.number().optional(),
1650
- suiteId: z31.string().optional(),
1651
- score: z31.number(),
1652
- time: z31.number().optional(),
1653
- microcentsSpent: z31.number().optional()
1791
+ testResults: z32.array(z32.unknown()),
1792
+ tags: z32.array(z32.string()).optional(),
1793
+ feedback: z32.string().optional(),
1794
+ score: z32.number(),
1795
+ suiteId: z32.string().optional()
1796
+ });
1797
+ var LeanEvaluationResultSchema = z32.object({
1798
+ id: z32.string(),
1799
+ runId: z32.string(),
1800
+ timestamp: z32.number(),
1801
+ tags: z32.array(z32.string()).optional(),
1802
+ scenarioId: z32.string(),
1803
+ scenarioVersion: z32.number().optional(),
1804
+ targetId: z32.string(),
1805
+ targetVersion: z32.number().optional(),
1806
+ suiteId: z32.string().optional(),
1807
+ score: z32.number(),
1808
+ time: z32.number().optional(),
1809
+ microcentsSpent: z32.number().optional()
1654
1810
  });
1655
1811
 
1656
1812
  // src/evaluation/eval-run-folder.ts
1657
- import { z as z32 } from "zod";
1813
+ import { z as z33 } from "zod";
1658
1814
  var EvalRunFolderSchema = TenantEntitySchema.extend({});
1659
1815
  var CreateEvalRunFolderInputSchema = EvalRunFolderSchema.omit({
1660
1816
  id: true,
@@ -1668,26 +1824,26 @@ var UpdateEvalRunFolderInputSchema = EvalRunFolderSchema.omit({
1668
1824
  updatedAt: true,
1669
1825
  deleted: true
1670
1826
  }).partial();
1671
- var EvalRunFolderMembershipSchema = z32.object({
1672
- folderId: z32.string(),
1673
- evalRunId: z32.string(),
1674
- projectId: z32.string(),
1675
- createdAt: z32.string()
1827
+ var EvalRunFolderMembershipSchema = z33.object({
1828
+ folderId: z33.string(),
1829
+ evalRunId: z33.string(),
1830
+ projectId: z33.string(),
1831
+ createdAt: z33.string()
1676
1832
  });
1677
1833
 
1678
1834
  // src/project/project.ts
1679
- import { z as z33 } from "zod";
1835
+ import { z as z34 } from "zod";
1680
1836
  var ProjectSchema = BaseEntitySchema.extend({
1681
- appId: z33.string().optional().describe("The ID of the app in Dev Center"),
1682
- scenarioTags: z33.array(z33.string()).optional().describe("Project-level tag vocabulary for scenarios"),
1837
+ appId: z34.string().optional().describe("The ID of the app in Dev Center"),
1838
+ scenarioTags: z34.array(z34.string()).optional().describe("Project-level tag vocabulary for scenarios"),
1683
1839
  /** Per-project Wix auth token (write-only — never returned in GET responses). null = clear. */
1684
- wixAuthToken: z33.string().nullable().optional().describe("Wix auth token for CLI/MCP authentication (encrypted at rest)"),
1840
+ wixAuthToken: z34.string().nullable().optional().describe("Wix auth token for CLI/MCP authentication (encrypted at rest)"),
1685
1841
  /** Per-project Base44 auth file content (write-only — never returned in GET responses). null = clear. */
1686
- base44AuthFile: z33.string().nullable().optional().describe("Base64-encoded Base44 auth file content (encrypted at rest)"),
1842
+ base44AuthFile: z34.string().nullable().optional().describe("Base64-encoded Base44 auth file content (encrypted at rest)"),
1687
1843
  /** Resolved at runtime from the encrypted Wix auth token */
1688
- wixAuthEmail: z33.string().optional().describe("Email associated with the Wix auth token (resolved at runtime)"),
1844
+ wixAuthEmail: z34.string().optional().describe("Email associated with the Wix auth token (resolved at runtime)"),
1689
1845
  /** Resolved at runtime from the encrypted Base44 auth file */
1690
- base44AuthEmail: z33.string().optional().describe("Email from the Base44 auth file (resolved at runtime)")
1846
+ base44AuthEmail: z34.string().optional().describe("Email from the Base44 auth file (resolved at runtime)")
1691
1847
  });
1692
1848
  var CreateProjectInputSchema = ProjectSchema.omit({
1693
1849
  id: true,
@@ -1697,7 +1853,7 @@ var CreateProjectInputSchema = ProjectSchema.omit({
1697
1853
  wixAuthEmail: true,
1698
1854
  base44AuthEmail: true
1699
1855
  }).extend({
1700
- appId: z33.string().describe(
1856
+ appId: z34.string().describe(
1701
1857
  "Required: The ID of the app in Dev Center for credential scoping"
1702
1858
  )
1703
1859
  });
@@ -1717,7 +1873,7 @@ var CreateTemplateInputSchema = TemplateSchema.omit({
1717
1873
  var UpdateTemplateInputSchema = CreateTemplateInputSchema.partial();
1718
1874
 
1719
1875
  // src/schedule/eval-schedule.ts
1720
- import { z as z34 } from "zod";
1876
+ import { z as z35 } from "zod";
1721
1877
  var FrequencyType = /* @__PURE__ */ ((FrequencyType2) => {
1722
1878
  FrequencyType2["DAILY"] = "daily";
1723
1879
  FrequencyType2["WEEKDAY"] = "weekday";
@@ -1727,29 +1883,29 @@ var FrequencyType = /* @__PURE__ */ ((FrequencyType2) => {
1727
1883
  })(FrequencyType || {});
1728
1884
  var EvalScheduleSchema = TenantEntitySchema.extend({
1729
1885
  /** Whether the schedule is active */
1730
- enabled: z34.boolean(),
1886
+ enabled: z35.boolean(),
1731
1887
  /** Test suite to run */
1732
- suiteId: z34.string(),
1888
+ suiteId: z35.string(),
1733
1889
  /** Preset that provides agent + entities for this schedule */
1734
- presetId: z34.string(),
1890
+ presetId: z35.string(),
1735
1891
  /** How often to run */
1736
- frequencyType: z34.nativeEnum(FrequencyType),
1892
+ frequencyType: z35.nativeEnum(FrequencyType),
1737
1893
  /** Time of day in 24h format (HH:MM), hours 00-23, minutes 00-59 */
1738
- timeOfDay: z34.string().regex(/^([01]\d|2[0-3]):[0-5]\d$/),
1894
+ timeOfDay: z35.string().regex(/^([01]\d|2[0-3]):[0-5]\d$/),
1739
1895
  /** Day of week (0=Sun, 6=Sat) for weekly schedules */
1740
- dayOfWeek: z34.number().min(0).max(6).optional(),
1896
+ dayOfWeek: z35.number().min(0).max(6).optional(),
1741
1897
  /** Day of month (1-31) for monthly schedules */
1742
- dayOfMonth: z34.number().min(1).max(31).optional(),
1898
+ dayOfMonth: z35.number().min(1).max(31).optional(),
1743
1899
  /** IANA timezone (e.g., 'America/New_York') */
1744
- timezone: z34.string(),
1900
+ timezone: z35.string(),
1745
1901
  /** ID of the last eval run created by this schedule */
1746
- lastRunId: z34.string().optional(),
1902
+ lastRunId: z35.string().optional(),
1747
1903
  /** Denormalized status of the last run */
1748
- lastRunStatus: z34.string().optional(),
1904
+ lastRunStatus: z35.string().optional(),
1749
1905
  /** ISO timestamp of the last run */
1750
- lastRunAt: z34.string().optional(),
1906
+ lastRunAt: z35.string().optional(),
1751
1907
  /** Next scheduled run time in UTC (pre-computed for efficient querying, set by backend) */
1752
- nextRunAt: z34.string().optional()
1908
+ nextRunAt: z35.string().optional()
1753
1909
  });
1754
1910
  function isValidTimezone(tz) {
1755
1911
  try {
@@ -1762,14 +1918,14 @@ function isValidTimezone(tz) {
1762
1918
  function validateScheduleFields(data, ctx, options) {
1763
1919
  if (data.frequencyType === "weekly" /* WEEKLY */ && data.dayOfWeek == null) {
1764
1920
  ctx.addIssue({
1765
- code: z34.ZodIssueCode.custom,
1921
+ code: z35.ZodIssueCode.custom,
1766
1922
  message: "dayOfWeek is required for weekly schedules",
1767
1923
  path: ["dayOfWeek"]
1768
1924
  });
1769
1925
  }
1770
1926
  if (data.frequencyType === "monthly" /* MONTHLY */ && data.dayOfMonth == null) {
1771
1927
  ctx.addIssue({
1772
- code: z34.ZodIssueCode.custom,
1928
+ code: z35.ZodIssueCode.custom,
1773
1929
  message: "dayOfMonth is required for monthly schedules",
1774
1930
  path: ["dayOfMonth"]
1775
1931
  });
@@ -1777,7 +1933,7 @@ function validateScheduleFields(data, ctx, options) {
1777
1933
  const shouldValidateTz = options.partial ? data.timezone !== void 0 : true;
1778
1934
  if (shouldValidateTz && !isValidTimezone(data.timezone)) {
1779
1935
  ctx.addIssue({
1780
- code: z34.ZodIssueCode.custom,
1936
+ code: z35.ZodIssueCode.custom,
1781
1937
  message: "Invalid IANA timezone",
1782
1938
  path: ["timezone"]
1783
1939
  });
@@ -1841,6 +1997,13 @@ export {
1841
1997
  BulkImportResultItemSchema,
1842
1998
  BulkImportResultSchema,
1843
1999
  BulkImportSkillsInputSchema,
2000
+ CAPABILITY_NAME_REGEX,
2001
+ CapabilityContentSchema,
2002
+ CapabilitySchema,
2003
+ CapabilityTypeSchema,
2004
+ CapabilityVersionOriginSchema,
2005
+ CapabilityVersionSchema,
2006
+ CapabilityWithLatestVersionSchema,
1844
2007
  ClaudeModel,
1845
2008
  ClaudeModelSchema,
1846
2009
  CommandExecutionSchema,
@@ -1851,6 +2014,8 @@ export {
1851
2014
  CostAssertionSchema,
1852
2015
  CostConfigSchema,
1853
2016
  CreateAgentInputSchema,
2017
+ CreateCapabilityInputSchema,
2018
+ CreateCapabilityVersionInputSchema,
1854
2019
  CreateEvalRunFolderInputSchema,
1855
2020
  CreateEvalRunInputSchema,
1856
2021
  CreateEvalScheduleInputSchema,
@@ -1890,6 +2055,7 @@ export {
1890
2055
  FilePresenceTestSchema,
1891
2056
  FrequencyType,
1892
2057
  GitHubSourceSchema,
2058
+ InitialCapabilityVersionInputSchema,
1893
2059
  InitialVersionInputSchema,
1894
2060
  LEGACY_MODEL_ID_MAP,
1895
2061
  LLMBreakdownStatsSchema,
@@ -1966,6 +2132,7 @@ export {
1966
2132
  TriggerSchema,
1967
2133
  TriggerType,
1968
2134
  UpdateAgentInputSchema,
2135
+ UpdateCapabilityInputSchema,
1969
2136
  UpdateEvalRunFolderInputSchema,
1970
2137
  UpdateEvalScheduleInputSchema,
1971
2138
  UpdateMcpInputSchema,
@@ -1978,12 +2145,20 @@ export {
1978
2145
  UpdateTestScenarioInputSchema,
1979
2146
  UpdateTestSuiteInputSchema,
1980
2147
  VitestTestSchema,
2148
+ capabilityToMcp,
2149
+ capabilityToRule,
2150
+ capabilityToSkill,
2151
+ capabilityToSkillWithLatestVersion,
2152
+ capabilityToSubAgent,
2153
+ capabilityVersionToSkillVersion,
1981
2154
  classifyAssertionRef,
1982
2155
  formatTraceEventLine,
1983
2156
  getSystemAssertion,
1984
2157
  getSystemAssertions,
2158
+ groupCapabilitiesByType,
1985
2159
  isAllowedBuildCommandString,
1986
2160
  isSystemAssertionId,
2161
+ isValidCapabilityName,
1987
2162
  isValidSkillFolderName,
1988
2163
  normalizeBatchAssertionLink,
1989
2164
  normalizeModelId,