@wix/evalforge-types 0.74.0 → 0.76.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -1
- package/build/index.js +698 -504
- package/build/index.js.map +4 -4
- package/build/index.mjs +679 -504
- package/build/index.mjs.map +4 -4
- package/build/types/evaluation/eval-run.d.ts +4 -10
- package/build/types/target/capability-converters.d.ts +25 -0
- package/build/types/target/capability.d.ts +254 -0
- package/build/types/target/index.d.ts +2 -0
- package/build/types/target/preset.d.ts +6 -15
- package/package.json +2 -2
package/build/index.js
CHANGED
|
@@ -60,6 +60,13 @@ __export(index_exports, {
|
|
|
60
60
|
BulkImportResultItemSchema: () => BulkImportResultItemSchema,
|
|
61
61
|
BulkImportResultSchema: () => BulkImportResultSchema,
|
|
62
62
|
BulkImportSkillsInputSchema: () => BulkImportSkillsInputSchema,
|
|
63
|
+
CAPABILITY_NAME_REGEX: () => CAPABILITY_NAME_REGEX,
|
|
64
|
+
CapabilityContentSchema: () => CapabilityContentSchema,
|
|
65
|
+
CapabilitySchema: () => CapabilitySchema,
|
|
66
|
+
CapabilityTypeSchema: () => CapabilityTypeSchema,
|
|
67
|
+
CapabilityVersionOriginSchema: () => CapabilityVersionOriginSchema,
|
|
68
|
+
CapabilityVersionSchema: () => CapabilityVersionSchema,
|
|
69
|
+
CapabilityWithLatestVersionSchema: () => CapabilityWithLatestVersionSchema,
|
|
63
70
|
ClaudeModel: () => ClaudeModel,
|
|
64
71
|
ClaudeModelSchema: () => ClaudeModelSchema,
|
|
65
72
|
CommandExecutionSchema: () => CommandExecutionSchema,
|
|
@@ -70,6 +77,8 @@ __export(index_exports, {
|
|
|
70
77
|
CostAssertionSchema: () => CostAssertionSchema,
|
|
71
78
|
CostConfigSchema: () => CostConfigSchema,
|
|
72
79
|
CreateAgentInputSchema: () => CreateAgentInputSchema,
|
|
80
|
+
CreateCapabilityInputSchema: () => CreateCapabilityInputSchema,
|
|
81
|
+
CreateCapabilityVersionInputSchema: () => CreateCapabilityVersionInputSchema,
|
|
73
82
|
CreateEvalRunFolderInputSchema: () => CreateEvalRunFolderInputSchema,
|
|
74
83
|
CreateEvalRunInputSchema: () => CreateEvalRunInputSchema,
|
|
75
84
|
CreateEvalScheduleInputSchema: () => CreateEvalScheduleInputSchema,
|
|
@@ -109,6 +118,7 @@ __export(index_exports, {
|
|
|
109
118
|
FilePresenceTestSchema: () => FilePresenceTestSchema,
|
|
110
119
|
FrequencyType: () => FrequencyType,
|
|
111
120
|
GitHubSourceSchema: () => GitHubSourceSchema,
|
|
121
|
+
InitialCapabilityVersionInputSchema: () => InitialCapabilityVersionInputSchema,
|
|
112
122
|
InitialVersionInputSchema: () => InitialVersionInputSchema,
|
|
113
123
|
LEGACY_MODEL_ID_MAP: () => LEGACY_MODEL_ID_MAP,
|
|
114
124
|
LLMBreakdownStatsSchema: () => LLMBreakdownStatsSchema,
|
|
@@ -185,6 +195,7 @@ __export(index_exports, {
|
|
|
185
195
|
TriggerSchema: () => TriggerSchema,
|
|
186
196
|
TriggerType: () => TriggerType,
|
|
187
197
|
UpdateAgentInputSchema: () => UpdateAgentInputSchema,
|
|
198
|
+
UpdateCapabilityInputSchema: () => UpdateCapabilityInputSchema,
|
|
188
199
|
UpdateEvalRunFolderInputSchema: () => UpdateEvalRunFolderInputSchema,
|
|
189
200
|
UpdateEvalScheduleInputSchema: () => UpdateEvalScheduleInputSchema,
|
|
190
201
|
UpdateMcpInputSchema: () => UpdateMcpInputSchema,
|
|
@@ -197,12 +208,20 @@ __export(index_exports, {
|
|
|
197
208
|
UpdateTestScenarioInputSchema: () => UpdateTestScenarioInputSchema,
|
|
198
209
|
UpdateTestSuiteInputSchema: () => UpdateTestSuiteInputSchema,
|
|
199
210
|
VitestTestSchema: () => VitestTestSchema,
|
|
211
|
+
capabilityToMcp: () => capabilityToMcp,
|
|
212
|
+
capabilityToRule: () => capabilityToRule,
|
|
213
|
+
capabilityToSkill: () => capabilityToSkill,
|
|
214
|
+
capabilityToSkillWithLatestVersion: () => capabilityToSkillWithLatestVersion,
|
|
215
|
+
capabilityToSubAgent: () => capabilityToSubAgent,
|
|
216
|
+
capabilityVersionToSkillVersion: () => capabilityVersionToSkillVersion,
|
|
200
217
|
classifyAssertionRef: () => classifyAssertionRef,
|
|
201
218
|
formatTraceEventLine: () => formatTraceEventLine,
|
|
202
219
|
getSystemAssertion: () => getSystemAssertion,
|
|
203
220
|
getSystemAssertions: () => getSystemAssertions,
|
|
221
|
+
groupCapabilitiesByType: () => groupCapabilitiesByType,
|
|
204
222
|
isAllowedBuildCommandString: () => isAllowedBuildCommandString,
|
|
205
223
|
isSystemAssertionId: () => isSystemAssertionId,
|
|
224
|
+
isValidCapabilityName: () => isValidCapabilityName,
|
|
206
225
|
isValidSkillFolderName: () => isValidSkillFolderName,
|
|
207
226
|
normalizeBatchAssertionLink: () => normalizeBatchAssertionLink,
|
|
208
227
|
normalizeModelId: () => normalizeModelId,
|
|
@@ -555,25 +574,19 @@ var import_zod9 = require("zod");
|
|
|
555
574
|
var PresetSchema = TenantEntitySchema.extend({
|
|
556
575
|
/** Agent ID for this preset */
|
|
557
576
|
agentId: import_zod9.z.string(),
|
|
558
|
-
/**
|
|
559
|
-
|
|
560
|
-
/**
|
|
561
|
-
|
|
562
|
-
|
|
563
|
-
|
|
564
|
-
|
|
565
|
-
subAgentIds: import_zod9.z.array(import_zod9.z.string()).default([]),
|
|
566
|
-
/** Rule IDs included in this preset */
|
|
567
|
-
ruleIds: import_zod9.z.array(import_zod9.z.string()).default([])
|
|
568
|
-
});
|
|
569
|
-
var atLeastOneEntity = (data) => (data.skillIds?.length ?? 0) > 0 || (data.mcpIds?.length ?? 0) > 0 || (data.subAgentIds?.length ?? 0) > 0 || (data.ruleIds?.length ?? 0) > 0;
|
|
570
|
-
var AT_LEAST_ONE_ENTITY_MESSAGE = "At least one of skillIds, mcpIds, subAgentIds, or ruleIds must be non-empty";
|
|
577
|
+
/** Unified capability IDs */
|
|
578
|
+
capabilityIds: import_zod9.z.array(import_zod9.z.string()).optional(),
|
|
579
|
+
/** Map of capabilityId to capabilityVersionId for version pinning */
|
|
580
|
+
capabilityVersions: import_zod9.z.record(import_zod9.z.string(), import_zod9.z.string()).optional()
|
|
581
|
+
});
|
|
582
|
+
var hasCapabilities = (data) => (data.capabilityIds?.length ?? 0) > 0;
|
|
583
|
+
var CAPABILITY_IDS_REQUIRED_MESSAGE = "capabilityIds must be non-empty";
|
|
571
584
|
var CreatePresetInputSchema = PresetSchema.omit({
|
|
572
585
|
id: true,
|
|
573
586
|
createdAt: true,
|
|
574
587
|
updatedAt: true,
|
|
575
588
|
deleted: true
|
|
576
|
-
}).refine(
|
|
589
|
+
}).refine(hasCapabilities, { message: CAPABILITY_IDS_REQUIRED_MESSAGE });
|
|
577
590
|
var UpdatePresetInputSchema = PresetSchema.omit({
|
|
578
591
|
id: true,
|
|
579
592
|
createdAt: true,
|
|
@@ -581,11 +594,179 @@ var UpdatePresetInputSchema = PresetSchema.omit({
|
|
|
581
594
|
deleted: true
|
|
582
595
|
}).partial();
|
|
583
596
|
|
|
597
|
+
// src/target/capability.ts
|
|
598
|
+
var import_zod10 = require("zod");
|
|
599
|
+
var CapabilityTypeSchema = import_zod10.z.enum([
|
|
600
|
+
"SKILL",
|
|
601
|
+
"SUB_AGENT",
|
|
602
|
+
"RULE",
|
|
603
|
+
"MCP"
|
|
604
|
+
]);
|
|
605
|
+
var CAPABILITY_NAME_REGEX = /^[a-z0-9]+(-[a-z0-9]+)*$/;
|
|
606
|
+
function isValidCapabilityName(name) {
|
|
607
|
+
return typeof name === "string" && name.length > 0 && CAPABILITY_NAME_REGEX.test(name);
|
|
608
|
+
}
|
|
609
|
+
var KEBAB_CASE_MESSAGE2 = "Name must be in kebab-case (lowercase letters, numbers, hyphens only, e.g. my-capability)";
|
|
610
|
+
var CapabilityContentSchema = import_zod10.z.record(import_zod10.z.string(), import_zod10.z.unknown());
|
|
611
|
+
var CapabilityVersionOriginSchema = import_zod10.z.enum(["manual", "pr", "master"]);
|
|
612
|
+
var CapabilitySchema = TenantEntitySchema.extend({
|
|
613
|
+
capabilityType: CapabilityTypeSchema,
|
|
614
|
+
source: GitHubSourceSchema.optional()
|
|
615
|
+
});
|
|
616
|
+
var CapabilityVersionSchema = import_zod10.z.object({
|
|
617
|
+
id: import_zod10.z.string(),
|
|
618
|
+
projectId: import_zod10.z.string(),
|
|
619
|
+
capabilityId: import_zod10.z.string(),
|
|
620
|
+
version: import_zod10.z.string(),
|
|
621
|
+
origin: CapabilityVersionOriginSchema,
|
|
622
|
+
source: GitHubSourceSchema.optional(),
|
|
623
|
+
content: CapabilityContentSchema.optional(),
|
|
624
|
+
notes: import_zod10.z.string().optional(),
|
|
625
|
+
createdAt: import_zod10.z.string()
|
|
626
|
+
});
|
|
627
|
+
var CapabilityWithLatestVersionSchema = CapabilitySchema.extend({
|
|
628
|
+
latestVersion: CapabilityVersionSchema.optional()
|
|
629
|
+
});
|
|
630
|
+
var CapabilityInputBaseSchema = CapabilitySchema.omit({
|
|
631
|
+
id: true,
|
|
632
|
+
createdAt: true,
|
|
633
|
+
updatedAt: true,
|
|
634
|
+
deleted: true,
|
|
635
|
+
description: true,
|
|
636
|
+
source: true
|
|
637
|
+
}).extend({
|
|
638
|
+
description: import_zod10.z.string().optional(),
|
|
639
|
+
source: GitHubSourceSchema.optional()
|
|
640
|
+
});
|
|
641
|
+
var InitialCapabilityVersionInputSchema = import_zod10.z.object({
|
|
642
|
+
content: CapabilityContentSchema.optional(),
|
|
643
|
+
notes: import_zod10.z.string().optional(),
|
|
644
|
+
source: GitHubSourceSchema.optional(),
|
|
645
|
+
version: import_zod10.z.string().optional(),
|
|
646
|
+
origin: CapabilityVersionOriginSchema.optional()
|
|
647
|
+
});
|
|
648
|
+
var CreateCapabilityInputSchema = CapabilityInputBaseSchema.extend({
|
|
649
|
+
initialVersion: InitialCapabilityVersionInputSchema.optional()
|
|
650
|
+
}).refine((data) => isValidCapabilityName(data.name), {
|
|
651
|
+
message: KEBAB_CASE_MESSAGE2,
|
|
652
|
+
path: ["name"]
|
|
653
|
+
});
|
|
654
|
+
var UpdateCapabilityInputSchema = CapabilityInputBaseSchema.omit({
|
|
655
|
+
capabilityType: true
|
|
656
|
+
}).partial().refine(
|
|
657
|
+
(data) => data.name === void 0 || isValidCapabilityName(data.name),
|
|
658
|
+
{ message: KEBAB_CASE_MESSAGE2, path: ["name"] }
|
|
659
|
+
);
|
|
660
|
+
var CreateCapabilityVersionInputSchema = import_zod10.z.object({
|
|
661
|
+
source: GitHubSourceSchema.optional(),
|
|
662
|
+
version: import_zod10.z.string().min(1),
|
|
663
|
+
notes: import_zod10.z.string().optional(),
|
|
664
|
+
origin: CapabilityVersionOriginSchema.optional(),
|
|
665
|
+
content: CapabilityContentSchema.optional()
|
|
666
|
+
});
|
|
667
|
+
|
|
668
|
+
// src/target/capability-converters.ts
|
|
669
|
+
function capabilityToSkill(cap) {
|
|
670
|
+
return {
|
|
671
|
+
id: cap.id,
|
|
672
|
+
projectId: cap.projectId,
|
|
673
|
+
name: cap.name,
|
|
674
|
+
description: cap.description,
|
|
675
|
+
source: cap.source,
|
|
676
|
+
createdAt: cap.createdAt,
|
|
677
|
+
updatedAt: cap.updatedAt,
|
|
678
|
+
deleted: cap.deleted
|
|
679
|
+
};
|
|
680
|
+
}
|
|
681
|
+
function capabilityVersionToSkillVersion(cv) {
|
|
682
|
+
const content = cv.content;
|
|
683
|
+
return {
|
|
684
|
+
id: cv.id,
|
|
685
|
+
projectId: cv.projectId,
|
|
686
|
+
skillId: cv.capabilityId,
|
|
687
|
+
version: cv.version,
|
|
688
|
+
origin: cv.origin,
|
|
689
|
+
source: cv.source,
|
|
690
|
+
files: content?.files,
|
|
691
|
+
notes: cv.notes,
|
|
692
|
+
createdAt: cv.createdAt
|
|
693
|
+
};
|
|
694
|
+
}
|
|
695
|
+
function capabilityToSkillWithLatestVersion(cap) {
|
|
696
|
+
const skill = capabilityToSkill(cap);
|
|
697
|
+
const latestVersion = cap.latestVersion ? capabilityVersionToSkillVersion(cap.latestVersion) : void 0;
|
|
698
|
+
return { ...skill, latestVersion };
|
|
699
|
+
}
|
|
700
|
+
function capabilityToSubAgent(cap) {
|
|
701
|
+
const content = cap.latestVersion?.content;
|
|
702
|
+
return {
|
|
703
|
+
id: cap.id,
|
|
704
|
+
projectId: cap.projectId,
|
|
705
|
+
name: cap.name,
|
|
706
|
+
description: cap.description,
|
|
707
|
+
subAgentMd: content?.subAgentMd ?? "",
|
|
708
|
+
source: cap.source,
|
|
709
|
+
createdAt: cap.createdAt,
|
|
710
|
+
updatedAt: cap.updatedAt,
|
|
711
|
+
deleted: cap.deleted
|
|
712
|
+
};
|
|
713
|
+
}
|
|
714
|
+
function capabilityToRule(cap) {
|
|
715
|
+
const content = cap.latestVersion?.content;
|
|
716
|
+
return {
|
|
717
|
+
id: cap.id,
|
|
718
|
+
projectId: cap.projectId,
|
|
719
|
+
name: cap.name,
|
|
720
|
+
description: cap.description,
|
|
721
|
+
ruleType: content?.ruleType ?? "claude-md",
|
|
722
|
+
content: content?.content ?? "",
|
|
723
|
+
createdAt: cap.createdAt,
|
|
724
|
+
updatedAt: cap.updatedAt,
|
|
725
|
+
deleted: cap.deleted
|
|
726
|
+
};
|
|
727
|
+
}
|
|
728
|
+
function capabilityToMcp(cap) {
|
|
729
|
+
const content = cap.latestVersion?.content;
|
|
730
|
+
return {
|
|
731
|
+
id: cap.id,
|
|
732
|
+
projectId: cap.projectId,
|
|
733
|
+
name: cap.name,
|
|
734
|
+
description: cap.description,
|
|
735
|
+
config: content?.config ?? {},
|
|
736
|
+
createdAt: cap.createdAt,
|
|
737
|
+
updatedAt: cap.updatedAt,
|
|
738
|
+
deleted: cap.deleted
|
|
739
|
+
};
|
|
740
|
+
}
|
|
741
|
+
function groupCapabilitiesByType(capabilities) {
|
|
742
|
+
const skills = [];
|
|
743
|
+
const subAgents = [];
|
|
744
|
+
const rules = [];
|
|
745
|
+
const mcps = [];
|
|
746
|
+
for (const cap of capabilities) {
|
|
747
|
+
switch (cap.capabilityType) {
|
|
748
|
+
case "SKILL":
|
|
749
|
+
skills.push(capabilityToSkillWithLatestVersion(cap));
|
|
750
|
+
break;
|
|
751
|
+
case "SUB_AGENT":
|
|
752
|
+
subAgents.push(capabilityToSubAgent(cap));
|
|
753
|
+
break;
|
|
754
|
+
case "RULE":
|
|
755
|
+
rules.push(capabilityToRule(cap));
|
|
756
|
+
break;
|
|
757
|
+
case "MCP":
|
|
758
|
+
mcps.push(capabilityToMcp(cap));
|
|
759
|
+
break;
|
|
760
|
+
}
|
|
761
|
+
}
|
|
762
|
+
return { skills, subAgents, rules, mcps };
|
|
763
|
+
}
|
|
764
|
+
|
|
584
765
|
// src/test/index.ts
|
|
585
|
-
var
|
|
766
|
+
var import_zod21 = require("zod");
|
|
586
767
|
|
|
587
768
|
// src/test/base.ts
|
|
588
|
-
var
|
|
769
|
+
var import_zod11 = require("zod");
|
|
589
770
|
var TestType = /* @__PURE__ */ ((TestType2) => {
|
|
590
771
|
TestType2["LLM"] = "LLM";
|
|
591
772
|
TestType2["TOOL"] = "TOOL";
|
|
@@ -598,7 +779,7 @@ var TestType = /* @__PURE__ */ ((TestType2) => {
|
|
|
598
779
|
TestType2["PLAYWRIGHT_NL"] = "PLAYWRIGHT_NL";
|
|
599
780
|
return TestType2;
|
|
600
781
|
})(TestType || {});
|
|
601
|
-
var TestTypeSchema =
|
|
782
|
+
var TestTypeSchema = import_zod11.z.enum(TestType);
|
|
602
783
|
var TestImportance = /* @__PURE__ */ ((TestImportance2) => {
|
|
603
784
|
TestImportance2["LOW"] = "low";
|
|
604
785
|
TestImportance2["MEDIUM"] = "medium";
|
|
@@ -606,153 +787,153 @@ var TestImportance = /* @__PURE__ */ ((TestImportance2) => {
|
|
|
606
787
|
TestImportance2["CRITICAL"] = "critical";
|
|
607
788
|
return TestImportance2;
|
|
608
789
|
})(TestImportance || {});
|
|
609
|
-
var TestImportanceSchema =
|
|
610
|
-
var BaseTestSchema =
|
|
611
|
-
id:
|
|
790
|
+
var TestImportanceSchema = import_zod11.z.enum(TestImportance);
|
|
791
|
+
var BaseTestSchema = import_zod11.z.object({
|
|
792
|
+
id: import_zod11.z.string(),
|
|
612
793
|
type: TestTypeSchema,
|
|
613
|
-
name:
|
|
614
|
-
description:
|
|
794
|
+
name: import_zod11.z.string().min(3),
|
|
795
|
+
description: import_zod11.z.string().optional(),
|
|
615
796
|
importance: TestImportanceSchema.optional()
|
|
616
797
|
});
|
|
617
798
|
|
|
618
799
|
// src/test/llm.ts
|
|
619
|
-
var
|
|
800
|
+
var import_zod12 = require("zod");
|
|
620
801
|
var LLMTestSchema = BaseTestSchema.extend({
|
|
621
|
-
type:
|
|
802
|
+
type: import_zod12.z.literal("LLM" /* LLM */),
|
|
622
803
|
/** Maximum steps for the LLM to take */
|
|
623
|
-
maxSteps:
|
|
804
|
+
maxSteps: import_zod12.z.number().min(1).max(100),
|
|
624
805
|
/** Prompt to send to the evaluator */
|
|
625
|
-
prompt:
|
|
806
|
+
prompt: import_zod12.z.string().min(1),
|
|
626
807
|
/** ID of the evaluator agent to use */
|
|
627
|
-
evaluatorId:
|
|
808
|
+
evaluatorId: import_zod12.z.string()
|
|
628
809
|
});
|
|
629
810
|
|
|
630
811
|
// src/test/tool.ts
|
|
631
|
-
var
|
|
812
|
+
var import_zod13 = require("zod");
|
|
632
813
|
var ToolTestSchema = BaseTestSchema.extend({
|
|
633
|
-
type:
|
|
814
|
+
type: import_zod13.z.literal("TOOL" /* TOOL */),
|
|
634
815
|
/** Name of the tool that should be called */
|
|
635
|
-
toolName:
|
|
816
|
+
toolName: import_zod13.z.string().min(3),
|
|
636
817
|
/** Expected arguments for the tool call */
|
|
637
|
-
args:
|
|
818
|
+
args: import_zod13.z.record(import_zod13.z.string(), import_zod13.z.any()),
|
|
638
819
|
/** Expected content in the tool results */
|
|
639
|
-
resultsContent:
|
|
820
|
+
resultsContent: import_zod13.z.string()
|
|
640
821
|
});
|
|
641
822
|
|
|
642
823
|
// src/test/site-config.ts
|
|
643
|
-
var
|
|
824
|
+
var import_zod14 = require("zod");
|
|
644
825
|
var SiteConfigTestSchema = BaseTestSchema.extend({
|
|
645
|
-
type:
|
|
826
|
+
type: import_zod14.z.literal("SITE_CONFIG" /* SITE_CONFIG */),
|
|
646
827
|
/** URL to call */
|
|
647
|
-
url:
|
|
828
|
+
url: import_zod14.z.string().url(),
|
|
648
829
|
/** HTTP method */
|
|
649
|
-
method:
|
|
830
|
+
method: import_zod14.z.enum(["GET", "POST"]),
|
|
650
831
|
/** Request body (for POST) */
|
|
651
|
-
body:
|
|
832
|
+
body: import_zod14.z.string().optional(),
|
|
652
833
|
/** Expected HTTP status code */
|
|
653
|
-
expectedStatusCode:
|
|
834
|
+
expectedStatusCode: import_zod14.z.number().int().min(100).max(599),
|
|
654
835
|
/** Expected response content */
|
|
655
|
-
expectedResponse:
|
|
836
|
+
expectedResponse: import_zod14.z.string().optional(),
|
|
656
837
|
/** JMESPath expression to extract from response */
|
|
657
|
-
expectedResponseJMESPath:
|
|
838
|
+
expectedResponseJMESPath: import_zod14.z.string().optional()
|
|
658
839
|
});
|
|
659
840
|
|
|
660
841
|
// src/test/command-execution.ts
|
|
661
|
-
var
|
|
842
|
+
var import_zod15 = require("zod");
|
|
662
843
|
var AllowedCommands = [
|
|
663
844
|
"yarn install --no-immutable && yarn build",
|
|
664
845
|
"npm run build",
|
|
665
846
|
"yarn typecheck"
|
|
666
847
|
];
|
|
667
848
|
var CommandExecutionTestSchema = BaseTestSchema.extend({
|
|
668
|
-
type:
|
|
849
|
+
type: import_zod15.z.literal("COMMAND_EXECUTION" /* COMMAND_EXECUTION */),
|
|
669
850
|
/** Command to execute (must be in AllowedCommands) */
|
|
670
|
-
command:
|
|
851
|
+
command: import_zod15.z.string().refine((value) => AllowedCommands.includes(value), {
|
|
671
852
|
message: `Command must be one of: ${AllowedCommands.join(", ")}`
|
|
672
853
|
}),
|
|
673
854
|
/** Expected exit code (default: 0) */
|
|
674
|
-
expectedExitCode:
|
|
855
|
+
expectedExitCode: import_zod15.z.number().default(0).optional()
|
|
675
856
|
});
|
|
676
857
|
|
|
677
858
|
// src/test/file-presence.ts
|
|
678
|
-
var
|
|
859
|
+
var import_zod16 = require("zod");
|
|
679
860
|
var FilePresenceTestSchema = BaseTestSchema.extend({
|
|
680
|
-
type:
|
|
861
|
+
type: import_zod16.z.literal("FILE_PRESENCE" /* FILE_PRESENCE */),
|
|
681
862
|
/** Paths to check */
|
|
682
|
-
paths:
|
|
863
|
+
paths: import_zod16.z.array(import_zod16.z.string()),
|
|
683
864
|
/** Whether files should exist (true) or not exist (false) */
|
|
684
|
-
shouldExist:
|
|
865
|
+
shouldExist: import_zod16.z.boolean()
|
|
685
866
|
});
|
|
686
867
|
|
|
687
868
|
// src/test/file-content.ts
|
|
688
|
-
var
|
|
689
|
-
var FileContentCheckSchema =
|
|
869
|
+
var import_zod17 = require("zod");
|
|
870
|
+
var FileContentCheckSchema = import_zod17.z.object({
|
|
690
871
|
/** Strings that must be present in the file */
|
|
691
|
-
contains:
|
|
872
|
+
contains: import_zod17.z.array(import_zod17.z.string()).optional(),
|
|
692
873
|
/** Strings that must NOT be present in the file */
|
|
693
|
-
notContains:
|
|
874
|
+
notContains: import_zod17.z.array(import_zod17.z.string()).optional(),
|
|
694
875
|
/** Regex pattern the content must match */
|
|
695
|
-
matches:
|
|
876
|
+
matches: import_zod17.z.string().optional(),
|
|
696
877
|
/** JSON path checks for structured content */
|
|
697
|
-
jsonPath:
|
|
698
|
-
|
|
699
|
-
path:
|
|
700
|
-
value:
|
|
878
|
+
jsonPath: import_zod17.z.array(
|
|
879
|
+
import_zod17.z.object({
|
|
880
|
+
path: import_zod17.z.string(),
|
|
881
|
+
value: import_zod17.z.unknown()
|
|
701
882
|
})
|
|
702
883
|
).optional(),
|
|
703
884
|
/** Lines that should be added (for diff checking) */
|
|
704
|
-
added:
|
|
885
|
+
added: import_zod17.z.array(import_zod17.z.string()).optional(),
|
|
705
886
|
/** Lines that should be removed (for diff checking) */
|
|
706
|
-
removed:
|
|
887
|
+
removed: import_zod17.z.array(import_zod17.z.string()).optional()
|
|
707
888
|
});
|
|
708
889
|
var FileContentTestSchema = BaseTestSchema.extend({
|
|
709
|
-
type:
|
|
890
|
+
type: import_zod17.z.literal("FILE_CONTENT" /* FILE_CONTENT */),
|
|
710
891
|
/** Path to the file to check */
|
|
711
|
-
path:
|
|
892
|
+
path: import_zod17.z.string(),
|
|
712
893
|
/** Content checks to perform */
|
|
713
894
|
checks: FileContentCheckSchema
|
|
714
895
|
});
|
|
715
896
|
|
|
716
897
|
// src/test/build-check.ts
|
|
717
|
-
var
|
|
898
|
+
var import_zod18 = require("zod");
|
|
718
899
|
var BuildCheckTestSchema = BaseTestSchema.extend({
|
|
719
|
-
type:
|
|
900
|
+
type: import_zod18.z.literal("BUILD_CHECK" /* BUILD_CHECK */),
|
|
720
901
|
/** Build command to execute */
|
|
721
|
-
command:
|
|
902
|
+
command: import_zod18.z.string(),
|
|
722
903
|
/** Whether the build should succeed */
|
|
723
|
-
expectSuccess:
|
|
904
|
+
expectSuccess: import_zod18.z.boolean(),
|
|
724
905
|
/** Maximum allowed warnings (optional) */
|
|
725
|
-
allowedWarnings:
|
|
906
|
+
allowedWarnings: import_zod18.z.number().optional(),
|
|
726
907
|
/** Timeout in milliseconds */
|
|
727
|
-
timeout:
|
|
908
|
+
timeout: import_zod18.z.number().optional()
|
|
728
909
|
});
|
|
729
910
|
|
|
730
911
|
// src/test/vitest.ts
|
|
731
|
-
var
|
|
912
|
+
var import_zod19 = require("zod");
|
|
732
913
|
var VitestTestSchema = BaseTestSchema.extend({
|
|
733
|
-
type:
|
|
914
|
+
type: import_zod19.z.literal("VITEST" /* VITEST */),
|
|
734
915
|
/** Test file content */
|
|
735
|
-
testFile:
|
|
916
|
+
testFile: import_zod19.z.string(),
|
|
736
917
|
/** Name of the test file */
|
|
737
|
-
testFileName:
|
|
918
|
+
testFileName: import_zod19.z.string(),
|
|
738
919
|
/** Minimum pass rate required (0-100) */
|
|
739
|
-
minPassRate:
|
|
920
|
+
minPassRate: import_zod19.z.number().min(0).max(100)
|
|
740
921
|
});
|
|
741
922
|
|
|
742
923
|
// src/test/playwright-nl.ts
|
|
743
|
-
var
|
|
924
|
+
var import_zod20 = require("zod");
|
|
744
925
|
var PlaywrightNLTestSchema = BaseTestSchema.extend({
|
|
745
|
-
type:
|
|
926
|
+
type: import_zod20.z.literal("PLAYWRIGHT_NL" /* PLAYWRIGHT_NL */),
|
|
746
927
|
/** Natural language steps to execute */
|
|
747
|
-
steps:
|
|
928
|
+
steps: import_zod20.z.array(import_zod20.z.string()),
|
|
748
929
|
/** Expected outcome description */
|
|
749
|
-
expectedOutcome:
|
|
930
|
+
expectedOutcome: import_zod20.z.string(),
|
|
750
931
|
/** Timeout in milliseconds */
|
|
751
|
-
timeout:
|
|
932
|
+
timeout: import_zod20.z.number().optional()
|
|
752
933
|
});
|
|
753
934
|
|
|
754
935
|
// src/test/index.ts
|
|
755
|
-
var TestSchema =
|
|
936
|
+
var TestSchema = import_zod21.z.discriminatedUnion("type", [
|
|
756
937
|
LLMTestSchema,
|
|
757
938
|
ToolTestSchema,
|
|
758
939
|
SiteConfigTestSchema,
|
|
@@ -765,33 +946,33 @@ var TestSchema = import_zod20.z.discriminatedUnion("type", [
|
|
|
765
946
|
]);
|
|
766
947
|
|
|
767
948
|
// src/scenario/environment.ts
|
|
768
|
-
var
|
|
769
|
-
var LocalProjectConfigSchema =
|
|
949
|
+
var import_zod22 = require("zod");
|
|
950
|
+
var LocalProjectConfigSchema = import_zod22.z.object({
|
|
770
951
|
/** Template ID to use for the local project */
|
|
771
|
-
templateId:
|
|
952
|
+
templateId: import_zod22.z.string().optional(),
|
|
772
953
|
/** Files to create in the project */
|
|
773
|
-
files:
|
|
774
|
-
|
|
775
|
-
path:
|
|
776
|
-
content:
|
|
954
|
+
files: import_zod22.z.array(
|
|
955
|
+
import_zod22.z.object({
|
|
956
|
+
path: import_zod22.z.string().min(1),
|
|
957
|
+
content: import_zod22.z.string().min(1)
|
|
777
958
|
})
|
|
778
959
|
).optional()
|
|
779
960
|
});
|
|
780
|
-
var MetaSiteConfigSchema =
|
|
781
|
-
configurations:
|
|
782
|
-
|
|
783
|
-
name:
|
|
784
|
-
apiCalls:
|
|
785
|
-
|
|
786
|
-
url:
|
|
787
|
-
method:
|
|
788
|
-
body:
|
|
961
|
+
var MetaSiteConfigSchema = import_zod22.z.object({
|
|
962
|
+
configurations: import_zod22.z.array(
|
|
963
|
+
import_zod22.z.object({
|
|
964
|
+
name: import_zod22.z.string().min(1),
|
|
965
|
+
apiCalls: import_zod22.z.array(
|
|
966
|
+
import_zod22.z.object({
|
|
967
|
+
url: import_zod22.z.string().url(),
|
|
968
|
+
method: import_zod22.z.enum(["POST", "PUT"]),
|
|
969
|
+
body: import_zod22.z.string()
|
|
789
970
|
})
|
|
790
971
|
)
|
|
791
972
|
})
|
|
792
973
|
).optional()
|
|
793
974
|
});
|
|
794
|
-
var EnvironmentSchema =
|
|
975
|
+
var EnvironmentSchema = import_zod22.z.object({
|
|
795
976
|
/** Local project configuration */
|
|
796
977
|
localProject: LocalProjectConfigSchema.optional(),
|
|
797
978
|
/** Meta site configuration */
|
|
@@ -799,13 +980,13 @@ var EnvironmentSchema = import_zod21.z.object({
|
|
|
799
980
|
});
|
|
800
981
|
|
|
801
982
|
// src/scenario/test-scenario.ts
|
|
802
|
-
var
|
|
983
|
+
var import_zod25 = require("zod");
|
|
803
984
|
|
|
804
985
|
// src/assertion/assertion.ts
|
|
805
|
-
var
|
|
986
|
+
var import_zod24 = require("zod");
|
|
806
987
|
|
|
807
988
|
// src/assertion/build-passed-command.ts
|
|
808
|
-
var
|
|
989
|
+
var import_zod23 = require("zod");
|
|
809
990
|
var ALLOWED_BUILD_COMMANDS = [
|
|
810
991
|
"yarn build",
|
|
811
992
|
"npm run build",
|
|
@@ -831,10 +1012,10 @@ function parseBuildCommandToArgv(command) {
|
|
|
831
1012
|
return BUILD_COMMAND_ARGV[trimmed];
|
|
832
1013
|
}
|
|
833
1014
|
var enumTuple = ALLOWED_BUILD_COMMANDS;
|
|
834
|
-
var BuildPassedCommandStringSchema =
|
|
1015
|
+
var BuildPassedCommandStringSchema = import_zod23.z.enum(enumTuple);
|
|
835
1016
|
|
|
836
1017
|
// src/assertion/assertion.ts
|
|
837
|
-
var AssertionTypeSchema =
|
|
1018
|
+
var AssertionTypeSchema = import_zod24.z.enum([
|
|
838
1019
|
"skill_was_called",
|
|
839
1020
|
"tool_called_with_param",
|
|
840
1021
|
"build_passed",
|
|
@@ -843,61 +1024,61 @@ var AssertionTypeSchema = import_zod23.z.enum([
|
|
|
843
1024
|
"llm_judge",
|
|
844
1025
|
"api_call"
|
|
845
1026
|
]);
|
|
846
|
-
var AssertionParameterTypeSchema =
|
|
1027
|
+
var AssertionParameterTypeSchema = import_zod24.z.enum([
|
|
847
1028
|
"string",
|
|
848
1029
|
"number",
|
|
849
1030
|
"boolean"
|
|
850
1031
|
]);
|
|
851
|
-
var AssertionParameterSchema =
|
|
1032
|
+
var AssertionParameterSchema = import_zod24.z.object({
|
|
852
1033
|
/** Parameter name (used as key in params object) */
|
|
853
|
-
name:
|
|
1034
|
+
name: import_zod24.z.string().min(1),
|
|
854
1035
|
/** Display label for the parameter */
|
|
855
|
-
label:
|
|
1036
|
+
label: import_zod24.z.string().min(1),
|
|
856
1037
|
/** Parameter type */
|
|
857
1038
|
type: AssertionParameterTypeSchema,
|
|
858
1039
|
/** Whether this parameter is required */
|
|
859
|
-
required:
|
|
1040
|
+
required: import_zod24.z.boolean(),
|
|
860
1041
|
/** Default value (optional, used when not provided) */
|
|
861
|
-
defaultValue:
|
|
1042
|
+
defaultValue: import_zod24.z.union([import_zod24.z.string(), import_zod24.z.number(), import_zod24.z.boolean()]).optional(),
|
|
862
1043
|
/** If true, parameter is hidden by default behind "Show advanced options" */
|
|
863
|
-
advanced:
|
|
1044
|
+
advanced: import_zod24.z.boolean().optional()
|
|
864
1045
|
});
|
|
865
|
-
var ScenarioAssertionLinkSchema =
|
|
1046
|
+
var ScenarioAssertionLinkSchema = import_zod24.z.object({
|
|
866
1047
|
/** ID of the system assertion (e.g., 'system:skill_was_called') */
|
|
867
|
-
assertionId:
|
|
1048
|
+
assertionId: import_zod24.z.string(),
|
|
868
1049
|
/** Parameter values for this assertion in this scenario */
|
|
869
|
-
params:
|
|
870
|
-
|
|
871
|
-
|
|
1050
|
+
params: import_zod24.z.record(
|
|
1051
|
+
import_zod24.z.string(),
|
|
1052
|
+
import_zod24.z.union([import_zod24.z.string(), import_zod24.z.number(), import_zod24.z.boolean(), import_zod24.z.null()])
|
|
872
1053
|
).optional()
|
|
873
1054
|
});
|
|
874
|
-
var SkillWasCalledConfigSchema =
|
|
1055
|
+
var SkillWasCalledConfigSchema = import_zod24.z.object({
|
|
875
1056
|
/** Names of the skills that must have been called */
|
|
876
|
-
skillNames:
|
|
1057
|
+
skillNames: import_zod24.z.array(import_zod24.z.string().min(1)).min(1)
|
|
877
1058
|
});
|
|
878
|
-
var CostConfigSchema =
|
|
1059
|
+
var CostConfigSchema = import_zod24.z.strictObject({
|
|
879
1060
|
/** Maximum allowed cost in USD */
|
|
880
|
-
maxCostUsd:
|
|
1061
|
+
maxCostUsd: import_zod24.z.number().positive()
|
|
881
1062
|
});
|
|
882
|
-
var ToolCalledWithParamConfigSchema =
|
|
1063
|
+
var ToolCalledWithParamConfigSchema = import_zod24.z.strictObject({
|
|
883
1064
|
/** Name of the tool that must have been called */
|
|
884
|
-
toolName:
|
|
1065
|
+
toolName: import_zod24.z.string().min(1),
|
|
885
1066
|
/** JSON string of key-value pairs for expected parameters (substring match). Optional — when omitted, only checks tool presence. */
|
|
886
|
-
expectedParams:
|
|
1067
|
+
expectedParams: import_zod24.z.string().min(1).optional(),
|
|
887
1068
|
/** If true, the matching tool call must also have succeeded (step.success === true) */
|
|
888
|
-
requireSuccess:
|
|
1069
|
+
requireSuccess: import_zod24.z.boolean().optional()
|
|
889
1070
|
});
|
|
890
|
-
var BuildPassedConfigSchema =
|
|
1071
|
+
var BuildPassedConfigSchema = import_zod24.z.strictObject({
|
|
891
1072
|
/** Allowlisted command only (default at runtime: "yarn build") */
|
|
892
1073
|
command: BuildPassedCommandStringSchema.optional(),
|
|
893
1074
|
/** Expected exit code (default: 0) */
|
|
894
|
-
expectedExitCode:
|
|
1075
|
+
expectedExitCode: import_zod24.z.number().int().optional()
|
|
895
1076
|
});
|
|
896
|
-
var TimeConfigSchema =
|
|
1077
|
+
var TimeConfigSchema = import_zod24.z.strictObject({
|
|
897
1078
|
/** Maximum allowed duration in milliseconds */
|
|
898
|
-
maxDurationMs:
|
|
1079
|
+
maxDurationMs: import_zod24.z.number().int().positive()
|
|
899
1080
|
});
|
|
900
|
-
var LlmJudgeConfigSchema =
|
|
1081
|
+
var LlmJudgeConfigSchema = import_zod24.z.object({
|
|
901
1082
|
/**
|
|
902
1083
|
* Prompt template with placeholders:
|
|
903
1084
|
* - {{output}}: agent's final output
|
|
@@ -908,65 +1089,65 @@ var LlmJudgeConfigSchema = import_zod23.z.object({
|
|
|
908
1089
|
* - {{trace}}: step-by-step trace of tool calls
|
|
909
1090
|
* - Custom parameters defined in the parameters array
|
|
910
1091
|
*/
|
|
911
|
-
prompt:
|
|
1092
|
+
prompt: import_zod24.z.string().min(1),
|
|
912
1093
|
/** Minimum score to pass (0-10, default 7) */
|
|
913
|
-
minScore:
|
|
1094
|
+
minScore: import_zod24.z.number().int().min(0).max(10).optional(),
|
|
914
1095
|
/** Model for the judge (e.g. claude-3-5-haiku-20241022) */
|
|
915
|
-
model:
|
|
1096
|
+
model: import_zod24.z.string().optional(),
|
|
916
1097
|
/** Max output tokens */
|
|
917
|
-
maxTokens:
|
|
1098
|
+
maxTokens: import_zod24.z.number().int().optional(),
|
|
918
1099
|
/** Temperature (0-1) */
|
|
919
|
-
temperature:
|
|
1100
|
+
temperature: import_zod24.z.number().min(0).max(1).optional(),
|
|
920
1101
|
/** User-defined parameters for this assertion */
|
|
921
|
-
parameters:
|
|
1102
|
+
parameters: import_zod24.z.array(AssertionParameterSchema).optional()
|
|
922
1103
|
});
|
|
923
|
-
var ApiCallConfigSchema =
|
|
1104
|
+
var ApiCallConfigSchema = import_zod24.z.strictObject({
|
|
924
1105
|
/** URL to call */
|
|
925
|
-
url:
|
|
1106
|
+
url: import_zod24.z.string().min(1),
|
|
926
1107
|
/** HTTP method (default GET) */
|
|
927
|
-
method:
|
|
1108
|
+
method: import_zod24.z.enum(["GET", "POST"]).optional(),
|
|
928
1109
|
/** Request body (JSON string, for POST requests) */
|
|
929
|
-
requestBody:
|
|
1110
|
+
requestBody: import_zod24.z.string().optional(),
|
|
930
1111
|
/** Expected JSON response to validate against (subset match — extra fields in actual are OK) */
|
|
931
|
-
expectedResponse:
|
|
1112
|
+
expectedResponse: import_zod24.z.string().min(1),
|
|
932
1113
|
/** Request headers as JSON string of key-value pairs */
|
|
933
|
-
requestHeaders:
|
|
1114
|
+
requestHeaders: import_zod24.z.string().optional(),
|
|
934
1115
|
/** Request timeout in milliseconds (default 30000) */
|
|
935
|
-
timeoutMs:
|
|
1116
|
+
timeoutMs: import_zod24.z.number().int().positive().optional()
|
|
936
1117
|
});
|
|
937
1118
|
var AssertionBaseFields = {
|
|
938
1119
|
/** When true, the assertion's pass/fail logic is inverted (NOT operator). */
|
|
939
|
-
negate:
|
|
1120
|
+
negate: import_zod24.z.boolean().optional()
|
|
940
1121
|
};
|
|
941
1122
|
var SkillWasCalledAssertionSchema = SkillWasCalledConfigSchema.extend({
|
|
942
|
-
type:
|
|
1123
|
+
type: import_zod24.z.literal("skill_was_called"),
|
|
943
1124
|
...AssertionBaseFields
|
|
944
1125
|
});
|
|
945
1126
|
var ToolCalledWithParamAssertionSchema = ToolCalledWithParamConfigSchema.extend({
|
|
946
|
-
type:
|
|
1127
|
+
type: import_zod24.z.literal("tool_called_with_param"),
|
|
947
1128
|
...AssertionBaseFields
|
|
948
1129
|
});
|
|
949
1130
|
var BuildPassedAssertionSchema = BuildPassedConfigSchema.extend({
|
|
950
|
-
type:
|
|
1131
|
+
type: import_zod24.z.literal("build_passed"),
|
|
951
1132
|
...AssertionBaseFields
|
|
952
1133
|
});
|
|
953
1134
|
var CostAssertionSchema = CostConfigSchema.extend({
|
|
954
|
-
type:
|
|
1135
|
+
type: import_zod24.z.literal("cost"),
|
|
955
1136
|
...AssertionBaseFields
|
|
956
1137
|
});
|
|
957
1138
|
var LlmJudgeAssertionSchema = LlmJudgeConfigSchema.extend({
|
|
958
|
-
type:
|
|
1139
|
+
type: import_zod24.z.literal("llm_judge"),
|
|
959
1140
|
...AssertionBaseFields
|
|
960
1141
|
});
|
|
961
1142
|
var ApiCallAssertionSchema = ApiCallConfigSchema.extend({
|
|
962
|
-
type:
|
|
1143
|
+
type: import_zod24.z.literal("api_call"),
|
|
963
1144
|
...AssertionBaseFields
|
|
964
1145
|
});
|
|
965
1146
|
var TimeAssertionSchema = TimeConfigSchema.extend({
|
|
966
|
-
type:
|
|
1147
|
+
type: import_zod24.z.literal("time_limit"),
|
|
967
1148
|
...AssertionBaseFields
|
|
968
1149
|
});
|
|
969
|
-
var AssertionSchema =
|
|
1150
|
+
var AssertionSchema = import_zod24.z.union([
|
|
970
1151
|
SkillWasCalledAssertionSchema,
|
|
971
1152
|
ToolCalledWithParamAssertionSchema,
|
|
972
1153
|
BuildPassedAssertionSchema,
|
|
@@ -975,7 +1156,7 @@ var AssertionSchema = import_zod23.z.union([
|
|
|
975
1156
|
LlmJudgeAssertionSchema,
|
|
976
1157
|
ApiCallAssertionSchema
|
|
977
1158
|
]);
|
|
978
|
-
var AssertionConfigSchema =
|
|
1159
|
+
var AssertionConfigSchema = import_zod24.z.union([
|
|
979
1160
|
LlmJudgeConfigSchema,
|
|
980
1161
|
// requires prompt - check first
|
|
981
1162
|
SkillWasCalledConfigSchema,
|
|
@@ -990,7 +1171,7 @@ var AssertionConfigSchema = import_zod23.z.union([
|
|
|
990
1171
|
// requires maxCostUsd, uses strictObject
|
|
991
1172
|
BuildPassedConfigSchema,
|
|
992
1173
|
// all optional, uses strictObject to reject unknown keys
|
|
993
|
-
|
|
1174
|
+
import_zod24.z.object({})
|
|
994
1175
|
// fallback empty config
|
|
995
1176
|
]);
|
|
996
1177
|
function validateAssertionConfig(type, config) {
|
|
@@ -1236,35 +1417,35 @@ function getSystemAssertion(id) {
|
|
|
1236
1417
|
|
|
1237
1418
|
// src/scenario/test-scenario.ts
|
|
1238
1419
|
var MAX_IMAGE_BASE64_LENGTH = 4 * Math.ceil(2 * 1024 * 1024 / 3);
|
|
1239
|
-
var TriggerPromptImageSchema =
|
|
1420
|
+
var TriggerPromptImageSchema = import_zod25.z.object({
|
|
1240
1421
|
/** Base64-encoded image data (no data URL prefix) */
|
|
1241
|
-
base64:
|
|
1422
|
+
base64: import_zod25.z.string().max(MAX_IMAGE_BASE64_LENGTH, "Image exceeds 2 MB size limit"),
|
|
1242
1423
|
/** MIME type of the image */
|
|
1243
|
-
mediaType:
|
|
1424
|
+
mediaType: import_zod25.z.enum(["image/jpeg", "image/png", "image/gif", "image/webp"]),
|
|
1244
1425
|
/** Original filename of the image */
|
|
1245
|
-
name:
|
|
1426
|
+
name: import_zod25.z.string()
|
|
1246
1427
|
});
|
|
1247
|
-
var ExpectedFileSchema =
|
|
1428
|
+
var ExpectedFileSchema = import_zod25.z.object({
|
|
1248
1429
|
/** Relative path where the file should be created */
|
|
1249
|
-
path:
|
|
1430
|
+
path: import_zod25.z.string(),
|
|
1250
1431
|
/** Optional expected content */
|
|
1251
|
-
content:
|
|
1432
|
+
content: import_zod25.z.string().optional()
|
|
1252
1433
|
});
|
|
1253
1434
|
var TestScenarioSchema = TenantEntitySchema.extend({
|
|
1254
1435
|
/** The prompt sent to the agent to trigger the task */
|
|
1255
|
-
triggerPrompt:
|
|
1436
|
+
triggerPrompt: import_zod25.z.string().min(10),
|
|
1256
1437
|
/** ID of the template to use for this scenario (null = no template) */
|
|
1257
|
-
templateId:
|
|
1438
|
+
templateId: import_zod25.z.string().nullish(),
|
|
1258
1439
|
/** Inline assertions to evaluate for this scenario (legacy) */
|
|
1259
|
-
assertions:
|
|
1440
|
+
assertions: import_zod25.z.array(AssertionSchema).optional(),
|
|
1260
1441
|
/** IDs of saved assertions to evaluate (from assertions table) - legacy, use assertionLinks */
|
|
1261
|
-
assertionIds:
|
|
1442
|
+
assertionIds: import_zod25.z.array(import_zod25.z.string()).optional(),
|
|
1262
1443
|
/** Linked assertions with per-scenario parameter values */
|
|
1263
|
-
assertionLinks:
|
|
1444
|
+
assertionLinks: import_zod25.z.array(ScenarioAssertionLinkSchema).optional(),
|
|
1264
1445
|
/** Tags for categorisation and filtering */
|
|
1265
|
-
tags:
|
|
1446
|
+
tags: import_zod25.z.array(import_zod25.z.string()).optional(),
|
|
1266
1447
|
/** Base64-encoded images attached to the trigger prompt (max 3) */
|
|
1267
|
-
triggerPromptImages:
|
|
1448
|
+
triggerPromptImages: import_zod25.z.array(TriggerPromptImageSchema).max(3).optional()
|
|
1268
1449
|
});
|
|
1269
1450
|
function validateBuildPassedParamsInAssertionLinks(links, ctx) {
|
|
1270
1451
|
if (!links) return;
|
|
@@ -1275,7 +1456,7 @@ function validateBuildPassedParamsInAssertionLinks(links, ctx) {
|
|
|
1275
1456
|
if (cmd === void 0 || cmd === null) continue;
|
|
1276
1457
|
if (typeof cmd !== "string") {
|
|
1277
1458
|
ctx.addIssue({
|
|
1278
|
-
code:
|
|
1459
|
+
code: import_zod25.z.ZodIssueCode.custom,
|
|
1279
1460
|
message: "build_passed command must be a string",
|
|
1280
1461
|
path: ["assertionLinks", i, "params", "command"]
|
|
1281
1462
|
});
|
|
@@ -1283,7 +1464,7 @@ function validateBuildPassedParamsInAssertionLinks(links, ctx) {
|
|
|
1283
1464
|
}
|
|
1284
1465
|
if (!isAllowedBuildCommandString(cmd)) {
|
|
1285
1466
|
ctx.addIssue({
|
|
1286
|
-
code:
|
|
1467
|
+
code: import_zod25.z.ZodIssueCode.custom,
|
|
1287
1468
|
message: "Invalid build_passed command. Allowed: yarn build, npm run build, pnpm run build, pnpm build",
|
|
1288
1469
|
path: ["assertionLinks", i, "params", "command"]
|
|
1289
1470
|
});
|
|
@@ -1306,19 +1487,19 @@ var UpdateTestScenarioInputSchema = TestScenarioCreateBaseSchema.partial().super
|
|
|
1306
1487
|
});
|
|
1307
1488
|
|
|
1308
1489
|
// src/scenario/batch-import.ts
|
|
1309
|
-
var
|
|
1490
|
+
var import_zod26 = require("zod");
|
|
1310
1491
|
var UUID_REGEX = /^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$/i;
|
|
1311
|
-
var BatchAssertionLinkSchema =
|
|
1312
|
-
|
|
1492
|
+
var BatchAssertionLinkSchema = import_zod26.z.union([
|
|
1493
|
+
import_zod26.z.string().min(1),
|
|
1313
1494
|
ScenarioAssertionLinkSchema
|
|
1314
1495
|
]);
|
|
1315
|
-
var BatchScenarioEntrySchema =
|
|
1316
|
-
name:
|
|
1317
|
-
description:
|
|
1318
|
-
triggerPrompt:
|
|
1319
|
-
templateId:
|
|
1320
|
-
tags:
|
|
1321
|
-
assertionLinks:
|
|
1496
|
+
var BatchScenarioEntrySchema = import_zod26.z.object({
|
|
1497
|
+
name: import_zod26.z.string().min(1, "name: Required"),
|
|
1498
|
+
description: import_zod26.z.string().optional().default(""),
|
|
1499
|
+
triggerPrompt: import_zod26.z.string().min(10, "triggerPrompt: Must be at least 10 characters"),
|
|
1500
|
+
templateId: import_zod26.z.string().nullish(),
|
|
1501
|
+
tags: import_zod26.z.array(import_zod26.z.string()).optional(),
|
|
1502
|
+
assertionLinks: import_zod26.z.array(BatchAssertionLinkSchema).optional()
|
|
1322
1503
|
}).superRefine((data, ctx) => {
|
|
1323
1504
|
if (!data.assertionLinks) return;
|
|
1324
1505
|
const objectLinks = data.assertionLinks.filter(
|
|
@@ -1328,8 +1509,8 @@ var BatchScenarioEntrySchema = import_zod25.z.object({
|
|
|
1328
1509
|
validateBuildPassedParamsInAssertionLinks(objectLinks, ctx);
|
|
1329
1510
|
}
|
|
1330
1511
|
});
|
|
1331
|
-
var BatchImportPayloadSchema =
|
|
1332
|
-
scenarios:
|
|
1512
|
+
var BatchImportPayloadSchema = import_zod26.z.object({
|
|
1513
|
+
scenarios: import_zod26.z.array(BatchScenarioEntrySchema).min(1, "scenarios array must contain at least one entry").max(100, "Maximum 100 scenarios per upload")
|
|
1333
1514
|
});
|
|
1334
1515
|
var BATCH_IMPORT_LIMITS = {
|
|
1335
1516
|
MAX_SCENARIOS: 100,
|
|
@@ -1351,29 +1532,29 @@ function normalizeBatchAssertionLink(link) {
|
|
|
1351
1532
|
}
|
|
1352
1533
|
return link;
|
|
1353
1534
|
}
|
|
1354
|
-
var BatchResultItemSchema =
|
|
1355
|
-
index:
|
|
1356
|
-
name:
|
|
1357
|
-
status:
|
|
1358
|
-
id:
|
|
1359
|
-
errors:
|
|
1360
|
-
});
|
|
1361
|
-
var BatchSummarySchema =
|
|
1362
|
-
total:
|
|
1363
|
-
valid:
|
|
1364
|
-
invalid:
|
|
1365
|
-
created:
|
|
1366
|
-
});
|
|
1367
|
-
var BatchImportResponseSchema =
|
|
1535
|
+
var BatchResultItemSchema = import_zod26.z.object({
|
|
1536
|
+
index: import_zod26.z.number(),
|
|
1537
|
+
name: import_zod26.z.string(),
|
|
1538
|
+
status: import_zod26.z.enum(["valid", "invalid"]),
|
|
1539
|
+
id: import_zod26.z.string().nullable().optional(),
|
|
1540
|
+
errors: import_zod26.z.array(import_zod26.z.string()).optional()
|
|
1541
|
+
});
|
|
1542
|
+
var BatchSummarySchema = import_zod26.z.object({
|
|
1543
|
+
total: import_zod26.z.number(),
|
|
1544
|
+
valid: import_zod26.z.number(),
|
|
1545
|
+
invalid: import_zod26.z.number(),
|
|
1546
|
+
created: import_zod26.z.number()
|
|
1547
|
+
});
|
|
1548
|
+
var BatchImportResponseSchema = import_zod26.z.object({
|
|
1368
1549
|
summary: BatchSummarySchema,
|
|
1369
|
-
results:
|
|
1550
|
+
results: import_zod26.z.array(BatchResultItemSchema)
|
|
1370
1551
|
});
|
|
1371
1552
|
|
|
1372
1553
|
// src/suite/test-suite.ts
|
|
1373
|
-
var
|
|
1554
|
+
var import_zod27 = require("zod");
|
|
1374
1555
|
var TestSuiteSchema = TenantEntitySchema.extend({
|
|
1375
1556
|
/** IDs of test scenarios in this suite */
|
|
1376
|
-
scenarioIds:
|
|
1557
|
+
scenarioIds: import_zod27.z.array(import_zod27.z.string())
|
|
1377
1558
|
});
|
|
1378
1559
|
var CreateTestSuiteInputSchema = TestSuiteSchema.omit({
|
|
1379
1560
|
id: true,
|
|
@@ -1384,21 +1565,21 @@ var CreateTestSuiteInputSchema = TestSuiteSchema.omit({
|
|
|
1384
1565
|
var UpdateTestSuiteInputSchema = CreateTestSuiteInputSchema.partial();
|
|
1385
1566
|
|
|
1386
1567
|
// src/evaluation/metrics.ts
|
|
1387
|
-
var
|
|
1388
|
-
var TokenUsageSchema =
|
|
1389
|
-
prompt:
|
|
1390
|
-
completion:
|
|
1391
|
-
total:
|
|
1392
|
-
});
|
|
1393
|
-
var EvalMetricsSchema =
|
|
1394
|
-
totalAssertions:
|
|
1395
|
-
passed:
|
|
1396
|
-
failed:
|
|
1397
|
-
skipped:
|
|
1398
|
-
errors:
|
|
1399
|
-
passRate:
|
|
1400
|
-
avgDuration:
|
|
1401
|
-
totalDuration:
|
|
1568
|
+
var import_zod28 = require("zod");
|
|
1569
|
+
var TokenUsageSchema = import_zod28.z.object({
|
|
1570
|
+
prompt: import_zod28.z.number(),
|
|
1571
|
+
completion: import_zod28.z.number(),
|
|
1572
|
+
total: import_zod28.z.number()
|
|
1573
|
+
});
|
|
1574
|
+
var EvalMetricsSchema = import_zod28.z.object({
|
|
1575
|
+
totalAssertions: import_zod28.z.number(),
|
|
1576
|
+
passed: import_zod28.z.number(),
|
|
1577
|
+
failed: import_zod28.z.number(),
|
|
1578
|
+
skipped: import_zod28.z.number(),
|
|
1579
|
+
errors: import_zod28.z.number(),
|
|
1580
|
+
passRate: import_zod28.z.number(),
|
|
1581
|
+
avgDuration: import_zod28.z.number(),
|
|
1582
|
+
totalDuration: import_zod28.z.number()
|
|
1402
1583
|
});
|
|
1403
1584
|
var EvalStatus = /* @__PURE__ */ ((EvalStatus2) => {
|
|
1404
1585
|
EvalStatus2["PENDING"] = "pending";
|
|
@@ -1408,7 +1589,7 @@ var EvalStatus = /* @__PURE__ */ ((EvalStatus2) => {
|
|
|
1408
1589
|
EvalStatus2["CANCELLED"] = "cancelled";
|
|
1409
1590
|
return EvalStatus2;
|
|
1410
1591
|
})(EvalStatus || {});
|
|
1411
|
-
var EvalStatusSchema =
|
|
1592
|
+
var EvalStatusSchema = import_zod28.z.enum(EvalStatus);
|
|
1412
1593
|
var LLMStepType = /* @__PURE__ */ ((LLMStepType2) => {
|
|
1413
1594
|
LLMStepType2["COMPLETION"] = "completion";
|
|
1414
1595
|
LLMStepType2["TOOL_USE"] = "tool_use";
|
|
@@ -1416,54 +1597,54 @@ var LLMStepType = /* @__PURE__ */ ((LLMStepType2) => {
|
|
|
1416
1597
|
LLMStepType2["THINKING"] = "thinking";
|
|
1417
1598
|
return LLMStepType2;
|
|
1418
1599
|
})(LLMStepType || {});
|
|
1419
|
-
var LLMTraceStepSchema =
|
|
1420
|
-
id:
|
|
1421
|
-
stepNumber:
|
|
1422
|
-
type:
|
|
1423
|
-
model:
|
|
1424
|
-
provider:
|
|
1425
|
-
startedAt:
|
|
1426
|
-
durationMs:
|
|
1600
|
+
var LLMTraceStepSchema = import_zod28.z.object({
|
|
1601
|
+
id: import_zod28.z.string(),
|
|
1602
|
+
stepNumber: import_zod28.z.number(),
|
|
1603
|
+
type: import_zod28.z.enum(LLMStepType),
|
|
1604
|
+
model: import_zod28.z.string(),
|
|
1605
|
+
provider: import_zod28.z.string(),
|
|
1606
|
+
startedAt: import_zod28.z.string(),
|
|
1607
|
+
durationMs: import_zod28.z.number(),
|
|
1427
1608
|
tokenUsage: TokenUsageSchema,
|
|
1428
|
-
costUsd:
|
|
1429
|
-
toolName:
|
|
1430
|
-
toolArguments:
|
|
1431
|
-
inputPreview:
|
|
1432
|
-
outputPreview:
|
|
1433
|
-
success:
|
|
1434
|
-
error:
|
|
1435
|
-
turnIndex:
|
|
1436
|
-
});
|
|
1437
|
-
var LLMBreakdownStatsSchema =
|
|
1438
|
-
count:
|
|
1439
|
-
durationMs:
|
|
1440
|
-
tokens:
|
|
1441
|
-
costUsd:
|
|
1442
|
-
});
|
|
1443
|
-
var LLMTraceSummarySchema =
|
|
1444
|
-
totalSteps:
|
|
1445
|
-
totalTurns:
|
|
1446
|
-
totalDurationMs:
|
|
1609
|
+
costUsd: import_zod28.z.number(),
|
|
1610
|
+
toolName: import_zod28.z.string().optional(),
|
|
1611
|
+
toolArguments: import_zod28.z.string().optional(),
|
|
1612
|
+
inputPreview: import_zod28.z.string().optional(),
|
|
1613
|
+
outputPreview: import_zod28.z.string().optional(),
|
|
1614
|
+
success: import_zod28.z.boolean(),
|
|
1615
|
+
error: import_zod28.z.string().optional(),
|
|
1616
|
+
turnIndex: import_zod28.z.number().optional()
|
|
1617
|
+
});
|
|
1618
|
+
var LLMBreakdownStatsSchema = import_zod28.z.object({
|
|
1619
|
+
count: import_zod28.z.number(),
|
|
1620
|
+
durationMs: import_zod28.z.number(),
|
|
1621
|
+
tokens: import_zod28.z.number(),
|
|
1622
|
+
costUsd: import_zod28.z.number()
|
|
1623
|
+
});
|
|
1624
|
+
var LLMTraceSummarySchema = import_zod28.z.object({
|
|
1625
|
+
totalSteps: import_zod28.z.number(),
|
|
1626
|
+
totalTurns: import_zod28.z.number().optional(),
|
|
1627
|
+
totalDurationMs: import_zod28.z.number(),
|
|
1447
1628
|
totalTokens: TokenUsageSchema,
|
|
1448
|
-
totalCostUsd:
|
|
1449
|
-
stepTypeBreakdown:
|
|
1450
|
-
modelBreakdown:
|
|
1451
|
-
modelsUsed:
|
|
1452
|
-
});
|
|
1453
|
-
var LLMTraceSchema =
|
|
1454
|
-
id:
|
|
1455
|
-
steps:
|
|
1629
|
+
totalCostUsd: import_zod28.z.number(),
|
|
1630
|
+
stepTypeBreakdown: import_zod28.z.record(import_zod28.z.string(), LLMBreakdownStatsSchema).optional(),
|
|
1631
|
+
modelBreakdown: import_zod28.z.record(import_zod28.z.string(), LLMBreakdownStatsSchema),
|
|
1632
|
+
modelsUsed: import_zod28.z.array(import_zod28.z.string())
|
|
1633
|
+
});
|
|
1634
|
+
var LLMTraceSchema = import_zod28.z.object({
|
|
1635
|
+
id: import_zod28.z.string(),
|
|
1636
|
+
steps: import_zod28.z.array(LLMTraceStepSchema),
|
|
1456
1637
|
summary: LLMTraceSummarySchema
|
|
1457
1638
|
});
|
|
1458
1639
|
|
|
1459
1640
|
// src/evaluation/eval-result.ts
|
|
1460
|
-
var
|
|
1641
|
+
var import_zod32 = require("zod");
|
|
1461
1642
|
|
|
1462
1643
|
// src/evaluation/eval-run.ts
|
|
1463
|
-
var
|
|
1644
|
+
var import_zod30 = require("zod");
|
|
1464
1645
|
|
|
1465
1646
|
// src/evaluation/live-trace.ts
|
|
1466
|
-
var
|
|
1647
|
+
var import_zod29 = require("zod");
|
|
1467
1648
|
var LiveTraceEventType = /* @__PURE__ */ ((LiveTraceEventType2) => {
|
|
1468
1649
|
LiveTraceEventType2["THINKING"] = "thinking";
|
|
1469
1650
|
LiveTraceEventType2["TOOL_USE"] = "tool_use";
|
|
@@ -1477,37 +1658,37 @@ var LiveTraceEventType = /* @__PURE__ */ ((LiveTraceEventType2) => {
|
|
|
1477
1658
|
LiveTraceEventType2["USER"] = "user";
|
|
1478
1659
|
return LiveTraceEventType2;
|
|
1479
1660
|
})(LiveTraceEventType || {});
|
|
1480
|
-
var LiveTraceEventSchema =
|
|
1661
|
+
var LiveTraceEventSchema = import_zod29.z.object({
|
|
1481
1662
|
/** The evaluation run ID */
|
|
1482
|
-
evalRunId:
|
|
1663
|
+
evalRunId: import_zod29.z.string(),
|
|
1483
1664
|
/** The scenario ID being executed */
|
|
1484
|
-
scenarioId:
|
|
1665
|
+
scenarioId: import_zod29.z.string(),
|
|
1485
1666
|
/** The scenario name for display */
|
|
1486
|
-
scenarioName:
|
|
1667
|
+
scenarioName: import_zod29.z.string(),
|
|
1487
1668
|
/** The target ID (skill, agent, etc.) */
|
|
1488
|
-
targetId:
|
|
1669
|
+
targetId: import_zod29.z.string(),
|
|
1489
1670
|
/** The target name for display */
|
|
1490
|
-
targetName:
|
|
1671
|
+
targetName: import_zod29.z.string(),
|
|
1491
1672
|
/** Step number in the current scenario execution */
|
|
1492
|
-
stepNumber:
|
|
1673
|
+
stepNumber: import_zod29.z.number(),
|
|
1493
1674
|
/** Type of trace event */
|
|
1494
|
-
type:
|
|
1675
|
+
type: import_zod29.z.enum(LiveTraceEventType),
|
|
1495
1676
|
/** Tool name if this is a tool_use event */
|
|
1496
|
-
toolName:
|
|
1677
|
+
toolName: import_zod29.z.string().optional(),
|
|
1497
1678
|
/** Tool arguments preview (truncated JSON) */
|
|
1498
|
-
toolArgs:
|
|
1679
|
+
toolArgs: import_zod29.z.string().optional(),
|
|
1499
1680
|
/** Output preview (truncated text) */
|
|
1500
|
-
outputPreview:
|
|
1681
|
+
outputPreview: import_zod29.z.string().optional(),
|
|
1501
1682
|
/** File path for file operations */
|
|
1502
|
-
filePath:
|
|
1683
|
+
filePath: import_zod29.z.string().optional(),
|
|
1503
1684
|
/** Elapsed time in milliseconds for progress events */
|
|
1504
|
-
elapsedMs:
|
|
1685
|
+
elapsedMs: import_zod29.z.number().optional(),
|
|
1505
1686
|
/** Thinking/reasoning text from Claude */
|
|
1506
|
-
thinking:
|
|
1687
|
+
thinking: import_zod29.z.string().optional(),
|
|
1507
1688
|
/** Timestamp when this event occurred */
|
|
1508
|
-
timestamp:
|
|
1689
|
+
timestamp: import_zod29.z.string(),
|
|
1509
1690
|
/** Whether this is the final event for this scenario */
|
|
1510
|
-
isComplete:
|
|
1691
|
+
isComplete: import_zod29.z.boolean()
|
|
1511
1692
|
});
|
|
1512
1693
|
var TRACE_EVENT_PREFIX = "TRACE_EVENT:";
|
|
1513
1694
|
function parseTraceEventLine(line) {
|
|
@@ -1536,40 +1717,40 @@ var TriggerType = /* @__PURE__ */ ((TriggerType2) => {
|
|
|
1536
1717
|
TriggerType2["SCHEDULED"] = "SCHEDULED";
|
|
1537
1718
|
return TriggerType2;
|
|
1538
1719
|
})(TriggerType || {});
|
|
1539
|
-
var TriggerMetadataSchema =
|
|
1540
|
-
version:
|
|
1541
|
-
resourceUpdated:
|
|
1542
|
-
scheduleId:
|
|
1720
|
+
var TriggerMetadataSchema = import_zod30.z.object({
|
|
1721
|
+
version: import_zod30.z.string().optional(),
|
|
1722
|
+
resourceUpdated: import_zod30.z.array(import_zod30.z.string()).optional(),
|
|
1723
|
+
scheduleId: import_zod30.z.string().optional()
|
|
1543
1724
|
});
|
|
1544
|
-
var TriggerSchema =
|
|
1545
|
-
id:
|
|
1725
|
+
var TriggerSchema = import_zod30.z.object({
|
|
1726
|
+
id: import_zod30.z.string(),
|
|
1546
1727
|
metadata: TriggerMetadataSchema.optional(),
|
|
1547
|
-
type:
|
|
1728
|
+
type: import_zod30.z.nativeEnum(TriggerType)
|
|
1548
1729
|
});
|
|
1549
|
-
var DiffLineTypeSchema =
|
|
1550
|
-
var DiffLineSchema =
|
|
1730
|
+
var DiffLineTypeSchema = import_zod30.z.enum(["added", "removed", "unchanged"]);
|
|
1731
|
+
var DiffLineSchema = import_zod30.z.object({
|
|
1551
1732
|
type: DiffLineTypeSchema,
|
|
1552
|
-
content:
|
|
1553
|
-
lineNumber:
|
|
1554
|
-
});
|
|
1555
|
-
var DiffContentSchema =
|
|
1556
|
-
path:
|
|
1557
|
-
expected:
|
|
1558
|
-
actual:
|
|
1559
|
-
diffLines:
|
|
1560
|
-
renamedFrom:
|
|
1733
|
+
content: import_zod30.z.string(),
|
|
1734
|
+
lineNumber: import_zod30.z.number()
|
|
1735
|
+
});
|
|
1736
|
+
var DiffContentSchema = import_zod30.z.object({
|
|
1737
|
+
path: import_zod30.z.string(),
|
|
1738
|
+
expected: import_zod30.z.string(),
|
|
1739
|
+
actual: import_zod30.z.string(),
|
|
1740
|
+
diffLines: import_zod30.z.array(DiffLineSchema),
|
|
1741
|
+
renamedFrom: import_zod30.z.string().optional(),
|
|
1561
1742
|
/** Whether this file is an infrastructure/config file (e.g. .claude/settings.json, .mcp.json) */
|
|
1562
|
-
isInfrastructure:
|
|
1743
|
+
isInfrastructure: import_zod30.z.boolean().optional()
|
|
1563
1744
|
});
|
|
1564
|
-
var CommandExecutionSchema =
|
|
1565
|
-
command:
|
|
1566
|
-
exitCode:
|
|
1567
|
-
output:
|
|
1568
|
-
duration:
|
|
1745
|
+
var CommandExecutionSchema = import_zod30.z.object({
|
|
1746
|
+
command: import_zod30.z.string(),
|
|
1747
|
+
exitCode: import_zod30.z.number(),
|
|
1748
|
+
output: import_zod30.z.string().optional(),
|
|
1749
|
+
duration: import_zod30.z.number()
|
|
1569
1750
|
});
|
|
1570
|
-
var FileModificationSchema =
|
|
1571
|
-
path:
|
|
1572
|
-
action:
|
|
1751
|
+
var FileModificationSchema = import_zod30.z.object({
|
|
1752
|
+
path: import_zod30.z.string(),
|
|
1753
|
+
action: import_zod30.z.enum(["created", "modified", "deleted"])
|
|
1573
1754
|
});
|
|
1574
1755
|
var TemplateFileStatus = /* @__PURE__ */ ((TemplateFileStatus2) => {
|
|
1575
1756
|
TemplateFileStatus2["NEW"] = "new";
|
|
@@ -1577,62 +1758,58 @@ var TemplateFileStatus = /* @__PURE__ */ ((TemplateFileStatus2) => {
|
|
|
1577
1758
|
TemplateFileStatus2["UNCHANGED"] = "unchanged";
|
|
1578
1759
|
return TemplateFileStatus2;
|
|
1579
1760
|
})(TemplateFileStatus || {});
|
|
1580
|
-
var TemplateFileSchema =
|
|
1761
|
+
var TemplateFileSchema = import_zod30.z.object({
|
|
1581
1762
|
/** Relative path within the template */
|
|
1582
|
-
path:
|
|
1763
|
+
path: import_zod30.z.string(),
|
|
1583
1764
|
/** Full file content after execution */
|
|
1584
|
-
content:
|
|
1765
|
+
content: import_zod30.z.string(),
|
|
1585
1766
|
/** File status (new, modified, unchanged) */
|
|
1586
|
-
status:
|
|
1767
|
+
status: import_zod30.z.enum(["new", "modified", "unchanged"]),
|
|
1587
1768
|
/** Whether this file is an infrastructure/config file (e.g. .claude/settings.json, .mcp.json) */
|
|
1588
|
-
isInfrastructure:
|
|
1769
|
+
isInfrastructure: import_zod30.z.boolean().optional()
|
|
1589
1770
|
});
|
|
1590
|
-
var ApiCallSchema =
|
|
1591
|
-
endpoint:
|
|
1592
|
-
tokensUsed:
|
|
1593
|
-
duration:
|
|
1771
|
+
var ApiCallSchema = import_zod30.z.object({
|
|
1772
|
+
endpoint: import_zod30.z.string(),
|
|
1773
|
+
tokensUsed: import_zod30.z.number(),
|
|
1774
|
+
duration: import_zod30.z.number()
|
|
1594
1775
|
});
|
|
1595
|
-
var ExecutionTraceSchema =
|
|
1596
|
-
commands:
|
|
1597
|
-
filesModified:
|
|
1598
|
-
apiCalls:
|
|
1599
|
-
totalDuration:
|
|
1776
|
+
var ExecutionTraceSchema = import_zod30.z.object({
|
|
1777
|
+
commands: import_zod30.z.array(CommandExecutionSchema),
|
|
1778
|
+
filesModified: import_zod30.z.array(FileModificationSchema),
|
|
1779
|
+
apiCalls: import_zod30.z.array(ApiCallSchema),
|
|
1780
|
+
totalDuration: import_zod30.z.number()
|
|
1600
1781
|
});
|
|
1601
|
-
var RunAnalysisFindingSchema =
|
|
1602
|
-
category:
|
|
1782
|
+
var RunAnalysisFindingSchema = import_zod30.z.object({
|
|
1783
|
+
category: import_zod30.z.enum([
|
|
1603
1784
|
"failure_pattern",
|
|
1604
1785
|
"cost_waste",
|
|
1605
1786
|
"flakiness",
|
|
1606
1787
|
"inefficiency",
|
|
1607
1788
|
"positive"
|
|
1608
1789
|
]),
|
|
1609
|
-
severity:
|
|
1610
|
-
description:
|
|
1611
|
-
affectedScenarios:
|
|
1612
|
-
recommendation:
|
|
1790
|
+
severity: import_zod30.z.enum(["high", "medium", "low"]),
|
|
1791
|
+
description: import_zod30.z.string(),
|
|
1792
|
+
affectedScenarios: import_zod30.z.array(import_zod30.z.string()),
|
|
1793
|
+
recommendation: import_zod30.z.string().optional()
|
|
1613
1794
|
});
|
|
1614
|
-
var RunAnalysisSchema =
|
|
1615
|
-
generatedAt:
|
|
1616
|
-
summary:
|
|
1617
|
-
findings:
|
|
1795
|
+
var RunAnalysisSchema = import_zod30.z.object({
|
|
1796
|
+
generatedAt: import_zod30.z.string(),
|
|
1797
|
+
summary: import_zod30.z.string(),
|
|
1798
|
+
findings: import_zod30.z.array(RunAnalysisFindingSchema)
|
|
1618
1799
|
});
|
|
1619
1800
|
var EvalRunSchema = TenantEntitySchema.extend({
|
|
1620
1801
|
/** Agent ID for this run */
|
|
1621
|
-
agentId:
|
|
1802
|
+
agentId: import_zod30.z.string().optional(),
|
|
1622
1803
|
/** Preset ID that originated this run (optional) */
|
|
1623
|
-
presetId:
|
|
1624
|
-
/** Skill IDs for this run */
|
|
1625
|
-
skillIds: import_zod29.z.array(import_zod29.z.string()).optional(),
|
|
1626
|
-
/** Map of skillId to skillVersionId for this run */
|
|
1627
|
-
skillVersions: import_zod29.z.record(import_zod29.z.string(), import_zod29.z.string()).optional(),
|
|
1804
|
+
presetId: import_zod30.z.string().optional(),
|
|
1628
1805
|
/** Scenario IDs to run (always present — resolved server-side from tags when needed) */
|
|
1629
|
-
scenarioIds:
|
|
1806
|
+
scenarioIds: import_zod30.z.array(import_zod30.z.string()),
|
|
1630
1807
|
/** Current status */
|
|
1631
1808
|
status: EvalStatusSchema,
|
|
1632
1809
|
/** Progress percentage (0-100) */
|
|
1633
|
-
progress:
|
|
1810
|
+
progress: import_zod30.z.number(),
|
|
1634
1811
|
/** Results for each scenario/target combination (lazy to break eval-result ↔ eval-run cycle) */
|
|
1635
|
-
results:
|
|
1812
|
+
results: import_zod30.z.array(import_zod30.z.lazy(() => EvalRunResultSchema)),
|
|
1636
1813
|
/** Aggregated metrics across all results */
|
|
1637
1814
|
aggregateMetrics: EvalMetricsSchema,
|
|
1638
1815
|
/** Aggregated LLM trace summary */
|
|
@@ -1640,41 +1817,39 @@ var EvalRunSchema = TenantEntitySchema.extend({
|
|
|
1640
1817
|
/** What triggered this run */
|
|
1641
1818
|
trigger: TriggerSchema.optional(),
|
|
1642
1819
|
/** When the run started (set when evaluation is triggered) */
|
|
1643
|
-
startedAt:
|
|
1820
|
+
startedAt: import_zod30.z.string().optional(),
|
|
1644
1821
|
/** When the run completed */
|
|
1645
|
-
completedAt:
|
|
1822
|
+
completedAt: import_zod30.z.string().optional(),
|
|
1646
1823
|
/** Live trace events captured during execution (for playback on results page) */
|
|
1647
|
-
liveTraceEvents:
|
|
1824
|
+
liveTraceEvents: import_zod30.z.array(LiveTraceEventSchema).optional(),
|
|
1648
1825
|
/** Remote job ID for tracking execution in Dev Machines */
|
|
1649
|
-
jobId:
|
|
1826
|
+
jobId: import_zod30.z.string().optional(),
|
|
1650
1827
|
/** Remote job status from the Dev Machine API (PENDING, RUNNING, COMPLETED, FAILED, CANCELLED) */
|
|
1651
|
-
jobStatus:
|
|
1828
|
+
jobStatus: import_zod30.z.string().optional(),
|
|
1652
1829
|
/** Remote job error message if the job failed */
|
|
1653
|
-
jobError:
|
|
1830
|
+
jobError: import_zod30.z.string().optional(),
|
|
1654
1831
|
/** Timestamp of the last job status check */
|
|
1655
|
-
jobStatusCheckedAt:
|
|
1656
|
-
/**
|
|
1657
|
-
|
|
1658
|
-
/**
|
|
1659
|
-
|
|
1660
|
-
/** Rule IDs to enable for this run (optional) */
|
|
1661
|
-
ruleIds: import_zod29.z.array(import_zod29.z.string()).optional(),
|
|
1832
|
+
jobStatusCheckedAt: import_zod30.z.string().optional(),
|
|
1833
|
+
/** Unified capability IDs */
|
|
1834
|
+
capabilityIds: import_zod30.z.array(import_zod30.z.string()).optional(),
|
|
1835
|
+
/** Map of capabilityId to capabilityVersionId for version pinning */
|
|
1836
|
+
capabilityVersions: import_zod30.z.record(import_zod30.z.string(), import_zod30.z.string()).optional(),
|
|
1662
1837
|
/** Tags used to select scenarios for this run (for traceability) */
|
|
1663
|
-
tags:
|
|
1838
|
+
tags: import_zod30.z.array(import_zod30.z.string()).optional(),
|
|
1664
1839
|
/** How many times each scenario is executed within this eval run. Default: 1. Max: 20. */
|
|
1665
|
-
runsPerScenario:
|
|
1840
|
+
runsPerScenario: import_zod30.z.number().int().min(1).max(20).optional(),
|
|
1666
1841
|
/** Snapshot of agent configuration captured at run creation time */
|
|
1667
|
-
agentSnapshot:
|
|
1668
|
-
name:
|
|
1842
|
+
agentSnapshot: import_zod30.z.object({
|
|
1843
|
+
name: import_zod30.z.string().optional(),
|
|
1669
1844
|
agentType: AgentTypeSchema.optional(),
|
|
1670
1845
|
runCommand: AgentRunCommandSchema.optional(),
|
|
1671
|
-
systemPrompt:
|
|
1846
|
+
systemPrompt: import_zod30.z.string().nullable().optional(),
|
|
1672
1847
|
modelConfig: ModelConfigSchema.optional()
|
|
1673
1848
|
}).optional(),
|
|
1674
1849
|
/** UUID linking all runs in a comparison group */
|
|
1675
|
-
comparisonGroupId:
|
|
1850
|
+
comparisonGroupId: import_zod30.z.string().optional(),
|
|
1676
1851
|
/** Human-readable label for this variant (e.g., "MCP: Wix Stores") */
|
|
1677
|
-
comparisonLabel:
|
|
1852
|
+
comparisonLabel: import_zod30.z.string().optional(),
|
|
1678
1853
|
/** LLM-generated analysis of the completed run */
|
|
1679
1854
|
runAnalysis: RunAnalysisSchema.optional()
|
|
1680
1855
|
});
|
|
@@ -1692,60 +1867,60 @@ var CreateEvalRunInputSchema = EvalRunSchema.omit({
|
|
|
1692
1867
|
agentSnapshot: true
|
|
1693
1868
|
}).extend({
|
|
1694
1869
|
/** Optional on input — backend resolves from tags when not provided */
|
|
1695
|
-
scenarioIds:
|
|
1870
|
+
scenarioIds: import_zod30.z.array(import_zod30.z.string()).optional()
|
|
1696
1871
|
}).refine(
|
|
1697
1872
|
(data) => data.scenarioIds && data.scenarioIds.length > 0 || data.tags && data.tags.length > 0,
|
|
1698
1873
|
{ message: "Either scenarioIds or tags must be provided" }
|
|
1699
1874
|
);
|
|
1700
|
-
var EvaluationProgressSchema =
|
|
1701
|
-
runId:
|
|
1702
|
-
targetId:
|
|
1703
|
-
totalScenarios:
|
|
1704
|
-
completedScenarios:
|
|
1705
|
-
scenarioProgress:
|
|
1706
|
-
|
|
1707
|
-
scenarioId:
|
|
1708
|
-
currentStep:
|
|
1709
|
-
error:
|
|
1875
|
+
var EvaluationProgressSchema = import_zod30.z.object({
|
|
1876
|
+
runId: import_zod30.z.string(),
|
|
1877
|
+
targetId: import_zod30.z.string(),
|
|
1878
|
+
totalScenarios: import_zod30.z.number(),
|
|
1879
|
+
completedScenarios: import_zod30.z.number(),
|
|
1880
|
+
scenarioProgress: import_zod30.z.array(
|
|
1881
|
+
import_zod30.z.object({
|
|
1882
|
+
scenarioId: import_zod30.z.string(),
|
|
1883
|
+
currentStep: import_zod30.z.string(),
|
|
1884
|
+
error: import_zod30.z.string().optional()
|
|
1710
1885
|
})
|
|
1711
1886
|
),
|
|
1712
|
-
createdAt:
|
|
1713
|
-
});
|
|
1714
|
-
var EvaluationLogSchema =
|
|
1715
|
-
runId:
|
|
1716
|
-
scenarioId:
|
|
1717
|
-
log:
|
|
1718
|
-
level:
|
|
1719
|
-
message:
|
|
1720
|
-
args:
|
|
1721
|
-
error:
|
|
1887
|
+
createdAt: import_zod30.z.number()
|
|
1888
|
+
});
|
|
1889
|
+
var EvaluationLogSchema = import_zod30.z.object({
|
|
1890
|
+
runId: import_zod30.z.string(),
|
|
1891
|
+
scenarioId: import_zod30.z.string(),
|
|
1892
|
+
log: import_zod30.z.object({
|
|
1893
|
+
level: import_zod30.z.enum(["info", "error", "debug"]),
|
|
1894
|
+
message: import_zod30.z.string().optional(),
|
|
1895
|
+
args: import_zod30.z.array(import_zod30.z.any()).optional(),
|
|
1896
|
+
error: import_zod30.z.string().optional()
|
|
1722
1897
|
})
|
|
1723
1898
|
});
|
|
1724
1899
|
var LLM_TIMEOUT = 12e4;
|
|
1725
1900
|
|
|
1726
1901
|
// src/evaluation/conversation.ts
|
|
1727
|
-
var
|
|
1728
|
-
var TextBlockSchema =
|
|
1729
|
-
type:
|
|
1730
|
-
text:
|
|
1731
|
-
});
|
|
1732
|
-
var ThinkingBlockSchema =
|
|
1733
|
-
type:
|
|
1734
|
-
thinking:
|
|
1735
|
-
});
|
|
1736
|
-
var ToolUseBlockSchema =
|
|
1737
|
-
type:
|
|
1738
|
-
toolName:
|
|
1739
|
-
toolId:
|
|
1740
|
-
input:
|
|
1741
|
-
});
|
|
1742
|
-
var ToolResultBlockSchema =
|
|
1743
|
-
type:
|
|
1744
|
-
toolUseId:
|
|
1745
|
-
content:
|
|
1746
|
-
isError:
|
|
1747
|
-
});
|
|
1748
|
-
var ConversationBlockSchema =
|
|
1902
|
+
var import_zod31 = require("zod");
|
|
1903
|
+
var TextBlockSchema = import_zod31.z.object({
|
|
1904
|
+
type: import_zod31.z.literal("text"),
|
|
1905
|
+
text: import_zod31.z.string()
|
|
1906
|
+
});
|
|
1907
|
+
var ThinkingBlockSchema = import_zod31.z.object({
|
|
1908
|
+
type: import_zod31.z.literal("thinking"),
|
|
1909
|
+
thinking: import_zod31.z.string()
|
|
1910
|
+
});
|
|
1911
|
+
var ToolUseBlockSchema = import_zod31.z.object({
|
|
1912
|
+
type: import_zod31.z.literal("tool_use"),
|
|
1913
|
+
toolName: import_zod31.z.string(),
|
|
1914
|
+
toolId: import_zod31.z.string(),
|
|
1915
|
+
input: import_zod31.z.unknown()
|
|
1916
|
+
});
|
|
1917
|
+
var ToolResultBlockSchema = import_zod31.z.object({
|
|
1918
|
+
type: import_zod31.z.literal("tool_result"),
|
|
1919
|
+
toolUseId: import_zod31.z.string(),
|
|
1920
|
+
content: import_zod31.z.string(),
|
|
1921
|
+
isError: import_zod31.z.boolean().optional()
|
|
1922
|
+
});
|
|
1923
|
+
var ConversationBlockSchema = import_zod31.z.discriminatedUnion("type", [
|
|
1749
1924
|
TextBlockSchema,
|
|
1750
1925
|
ThinkingBlockSchema,
|
|
1751
1926
|
ToolUseBlockSchema,
|
|
@@ -1756,18 +1931,18 @@ var ConversationMessageRoles = [
|
|
|
1756
1931
|
"user",
|
|
1757
1932
|
"system"
|
|
1758
1933
|
];
|
|
1759
|
-
var ConversationMessageSchema =
|
|
1760
|
-
role:
|
|
1761
|
-
content:
|
|
1762
|
-
timestamp:
|
|
1934
|
+
var ConversationMessageSchema = import_zod31.z.object({
|
|
1935
|
+
role: import_zod31.z.enum(ConversationMessageRoles),
|
|
1936
|
+
content: import_zod31.z.array(ConversationBlockSchema),
|
|
1937
|
+
timestamp: import_zod31.z.string()
|
|
1763
1938
|
});
|
|
1764
|
-
var ScenarioConversationSchema =
|
|
1765
|
-
id:
|
|
1766
|
-
projectId:
|
|
1767
|
-
evalRunId:
|
|
1768
|
-
resultId:
|
|
1769
|
-
messages:
|
|
1770
|
-
createdAt:
|
|
1939
|
+
var ScenarioConversationSchema = import_zod31.z.object({
|
|
1940
|
+
id: import_zod31.z.string(),
|
|
1941
|
+
projectId: import_zod31.z.string(),
|
|
1942
|
+
evalRunId: import_zod31.z.string(),
|
|
1943
|
+
resultId: import_zod31.z.string(),
|
|
1944
|
+
messages: import_zod31.z.array(ConversationMessageSchema),
|
|
1945
|
+
createdAt: import_zod31.z.string()
|
|
1771
1946
|
});
|
|
1772
1947
|
|
|
1773
1948
|
// src/evaluation/eval-result.ts
|
|
@@ -1778,98 +1953,98 @@ var AssertionResultStatus = /* @__PURE__ */ ((AssertionResultStatus2) => {
|
|
|
1778
1953
|
AssertionResultStatus2["ERROR"] = "error";
|
|
1779
1954
|
return AssertionResultStatus2;
|
|
1780
1955
|
})(AssertionResultStatus || {});
|
|
1781
|
-
var AssertionResultSchema =
|
|
1782
|
-
id:
|
|
1783
|
-
assertionId:
|
|
1784
|
-
assertionType:
|
|
1785
|
-
assertionName:
|
|
1786
|
-
status:
|
|
1787
|
-
message:
|
|
1788
|
-
expected:
|
|
1789
|
-
actual:
|
|
1790
|
-
duration:
|
|
1791
|
-
details:
|
|
1792
|
-
llmTraceSteps:
|
|
1793
|
-
});
|
|
1794
|
-
var EvalRunResultSchema =
|
|
1795
|
-
id:
|
|
1796
|
-
targetId:
|
|
1797
|
-
targetName:
|
|
1956
|
+
var AssertionResultSchema = import_zod32.z.object({
|
|
1957
|
+
id: import_zod32.z.string(),
|
|
1958
|
+
assertionId: import_zod32.z.string(),
|
|
1959
|
+
assertionType: import_zod32.z.string(),
|
|
1960
|
+
assertionName: import_zod32.z.string(),
|
|
1961
|
+
status: import_zod32.z.enum(AssertionResultStatus),
|
|
1962
|
+
message: import_zod32.z.string().optional(),
|
|
1963
|
+
expected: import_zod32.z.string().optional(),
|
|
1964
|
+
actual: import_zod32.z.string().optional(),
|
|
1965
|
+
duration: import_zod32.z.number().optional(),
|
|
1966
|
+
details: import_zod32.z.record(import_zod32.z.string(), import_zod32.z.unknown()).optional(),
|
|
1967
|
+
llmTraceSteps: import_zod32.z.array(LLMTraceStepSchema).optional()
|
|
1968
|
+
});
|
|
1969
|
+
var EvalRunResultSchema = import_zod32.z.object({
|
|
1970
|
+
id: import_zod32.z.string(),
|
|
1971
|
+
targetId: import_zod32.z.string(),
|
|
1972
|
+
targetName: import_zod32.z.string().optional(),
|
|
1798
1973
|
/** SkillVersion ID used for this evaluation (for version tracking) */
|
|
1799
|
-
skillVersionId:
|
|
1974
|
+
skillVersionId: import_zod32.z.string().optional(),
|
|
1800
1975
|
/** SkillVersion semver string (e.g., "1.0.0", "1.2.3") for display */
|
|
1801
|
-
skillVersion:
|
|
1802
|
-
scenarioId:
|
|
1803
|
-
scenarioName:
|
|
1976
|
+
skillVersion: import_zod32.z.string().optional(),
|
|
1977
|
+
scenarioId: import_zod32.z.string(),
|
|
1978
|
+
scenarioName: import_zod32.z.string(),
|
|
1804
1979
|
/** Snapshot of the trigger prompt used during the run (prevents stale display after edits) */
|
|
1805
|
-
triggerPrompt:
|
|
1980
|
+
triggerPrompt: import_zod32.z.string().optional(),
|
|
1806
1981
|
modelConfig: ModelConfigSchema.optional(),
|
|
1807
|
-
assertionResults:
|
|
1982
|
+
assertionResults: import_zod32.z.array(AssertionResultSchema),
|
|
1808
1983
|
metrics: EvalMetricsSchema.optional(),
|
|
1809
|
-
passed:
|
|
1810
|
-
failed:
|
|
1811
|
-
passRate:
|
|
1812
|
-
duration:
|
|
1813
|
-
outputText:
|
|
1814
|
-
files:
|
|
1815
|
-
fileDiffs:
|
|
1984
|
+
passed: import_zod32.z.number(),
|
|
1985
|
+
failed: import_zod32.z.number(),
|
|
1986
|
+
passRate: import_zod32.z.number(),
|
|
1987
|
+
duration: import_zod32.z.number(),
|
|
1988
|
+
outputText: import_zod32.z.string().optional(),
|
|
1989
|
+
files: import_zod32.z.array(ExpectedFileSchema).optional(),
|
|
1990
|
+
fileDiffs: import_zod32.z.array(DiffContentSchema).optional(),
|
|
1816
1991
|
/** Full template files after execution with status indicators */
|
|
1817
|
-
templateFiles:
|
|
1818
|
-
startedAt:
|
|
1819
|
-
completedAt:
|
|
1992
|
+
templateFiles: import_zod32.z.array(TemplateFileSchema).optional(),
|
|
1993
|
+
startedAt: import_zod32.z.string().optional(),
|
|
1994
|
+
completedAt: import_zod32.z.string().optional(),
|
|
1820
1995
|
llmTrace: LLMTraceSchema.optional(),
|
|
1821
1996
|
/** Full conversation messages (only present in transit; stripped before DB storage) */
|
|
1822
|
-
conversation:
|
|
1997
|
+
conversation: import_zod32.z.array(ConversationMessageSchema).optional(),
|
|
1823
1998
|
/** 0-based iteration index when a scenario is run multiple times within a single eval run */
|
|
1824
|
-
iterationIndex:
|
|
1825
|
-
});
|
|
1826
|
-
var PromptResultSchema =
|
|
1827
|
-
text:
|
|
1828
|
-
files:
|
|
1829
|
-
finishReason:
|
|
1830
|
-
reasoning:
|
|
1831
|
-
reasoningDetails:
|
|
1832
|
-
toolCalls:
|
|
1833
|
-
toolResults:
|
|
1834
|
-
warnings:
|
|
1835
|
-
sources:
|
|
1836
|
-
steps:
|
|
1837
|
-
generationTimeMs:
|
|
1838
|
-
prompt:
|
|
1839
|
-
systemPrompt:
|
|
1840
|
-
usage:
|
|
1841
|
-
totalTokens:
|
|
1842
|
-
totalMicrocentsSpent:
|
|
1999
|
+
iterationIndex: import_zod32.z.number().int().min(0).optional()
|
|
2000
|
+
});
|
|
2001
|
+
var PromptResultSchema = import_zod32.z.object({
|
|
2002
|
+
text: import_zod32.z.string(),
|
|
2003
|
+
files: import_zod32.z.array(import_zod32.z.unknown()).optional(),
|
|
2004
|
+
finishReason: import_zod32.z.string().optional(),
|
|
2005
|
+
reasoning: import_zod32.z.string().optional(),
|
|
2006
|
+
reasoningDetails: import_zod32.z.unknown().optional(),
|
|
2007
|
+
toolCalls: import_zod32.z.array(import_zod32.z.unknown()).optional(),
|
|
2008
|
+
toolResults: import_zod32.z.array(import_zod32.z.unknown()).optional(),
|
|
2009
|
+
warnings: import_zod32.z.array(import_zod32.z.unknown()).optional(),
|
|
2010
|
+
sources: import_zod32.z.array(import_zod32.z.unknown()).optional(),
|
|
2011
|
+
steps: import_zod32.z.array(import_zod32.z.unknown()),
|
|
2012
|
+
generationTimeMs: import_zod32.z.number(),
|
|
2013
|
+
prompt: import_zod32.z.string(),
|
|
2014
|
+
systemPrompt: import_zod32.z.string(),
|
|
2015
|
+
usage: import_zod32.z.object({
|
|
2016
|
+
totalTokens: import_zod32.z.number().optional(),
|
|
2017
|
+
totalMicrocentsSpent: import_zod32.z.number().optional()
|
|
1843
2018
|
})
|
|
1844
2019
|
});
|
|
1845
|
-
var EvaluationResultSchema =
|
|
1846
|
-
id:
|
|
1847
|
-
runId:
|
|
1848
|
-
timestamp:
|
|
2020
|
+
var EvaluationResultSchema = import_zod32.z.object({
|
|
2021
|
+
id: import_zod32.z.string(),
|
|
2022
|
+
runId: import_zod32.z.string(),
|
|
2023
|
+
timestamp: import_zod32.z.number(),
|
|
1849
2024
|
promptResult: PromptResultSchema,
|
|
1850
|
-
testResults:
|
|
1851
|
-
tags:
|
|
1852
|
-
feedback:
|
|
1853
|
-
score:
|
|
1854
|
-
suiteId:
|
|
1855
|
-
});
|
|
1856
|
-
var LeanEvaluationResultSchema =
|
|
1857
|
-
id:
|
|
1858
|
-
runId:
|
|
1859
|
-
timestamp:
|
|
1860
|
-
tags:
|
|
1861
|
-
scenarioId:
|
|
1862
|
-
scenarioVersion:
|
|
1863
|
-
targetId:
|
|
1864
|
-
targetVersion:
|
|
1865
|
-
suiteId:
|
|
1866
|
-
score:
|
|
1867
|
-
time:
|
|
1868
|
-
microcentsSpent:
|
|
2025
|
+
testResults: import_zod32.z.array(import_zod32.z.unknown()),
|
|
2026
|
+
tags: import_zod32.z.array(import_zod32.z.string()).optional(),
|
|
2027
|
+
feedback: import_zod32.z.string().optional(),
|
|
2028
|
+
score: import_zod32.z.number(),
|
|
2029
|
+
suiteId: import_zod32.z.string().optional()
|
|
2030
|
+
});
|
|
2031
|
+
var LeanEvaluationResultSchema = import_zod32.z.object({
|
|
2032
|
+
id: import_zod32.z.string(),
|
|
2033
|
+
runId: import_zod32.z.string(),
|
|
2034
|
+
timestamp: import_zod32.z.number(),
|
|
2035
|
+
tags: import_zod32.z.array(import_zod32.z.string()).optional(),
|
|
2036
|
+
scenarioId: import_zod32.z.string(),
|
|
2037
|
+
scenarioVersion: import_zod32.z.number().optional(),
|
|
2038
|
+
targetId: import_zod32.z.string(),
|
|
2039
|
+
targetVersion: import_zod32.z.number().optional(),
|
|
2040
|
+
suiteId: import_zod32.z.string().optional(),
|
|
2041
|
+
score: import_zod32.z.number(),
|
|
2042
|
+
time: import_zod32.z.number().optional(),
|
|
2043
|
+
microcentsSpent: import_zod32.z.number().optional()
|
|
1869
2044
|
});
|
|
1870
2045
|
|
|
1871
2046
|
// src/evaluation/eval-run-folder.ts
|
|
1872
|
-
var
|
|
2047
|
+
var import_zod33 = require("zod");
|
|
1873
2048
|
var EvalRunFolderSchema = TenantEntitySchema.extend({});
|
|
1874
2049
|
var CreateEvalRunFolderInputSchema = EvalRunFolderSchema.omit({
|
|
1875
2050
|
id: true,
|
|
@@ -1883,26 +2058,26 @@ var UpdateEvalRunFolderInputSchema = EvalRunFolderSchema.omit({
|
|
|
1883
2058
|
updatedAt: true,
|
|
1884
2059
|
deleted: true
|
|
1885
2060
|
}).partial();
|
|
1886
|
-
var EvalRunFolderMembershipSchema =
|
|
1887
|
-
folderId:
|
|
1888
|
-
evalRunId:
|
|
1889
|
-
projectId:
|
|
1890
|
-
createdAt:
|
|
2061
|
+
var EvalRunFolderMembershipSchema = import_zod33.z.object({
|
|
2062
|
+
folderId: import_zod33.z.string(),
|
|
2063
|
+
evalRunId: import_zod33.z.string(),
|
|
2064
|
+
projectId: import_zod33.z.string(),
|
|
2065
|
+
createdAt: import_zod33.z.string()
|
|
1891
2066
|
});
|
|
1892
2067
|
|
|
1893
2068
|
// src/project/project.ts
|
|
1894
|
-
var
|
|
2069
|
+
var import_zod34 = require("zod");
|
|
1895
2070
|
var ProjectSchema = BaseEntitySchema.extend({
|
|
1896
|
-
appId:
|
|
1897
|
-
scenarioTags:
|
|
2071
|
+
appId: import_zod34.z.string().optional().describe("The ID of the app in Dev Center"),
|
|
2072
|
+
scenarioTags: import_zod34.z.array(import_zod34.z.string()).optional().describe("Project-level tag vocabulary for scenarios"),
|
|
1898
2073
|
/** Per-project Wix auth token (write-only — never returned in GET responses). null = clear. */
|
|
1899
|
-
wixAuthToken:
|
|
2074
|
+
wixAuthToken: import_zod34.z.string().nullable().optional().describe("Wix auth token for CLI/MCP authentication (encrypted at rest)"),
|
|
1900
2075
|
/** Per-project Base44 auth file content (write-only — never returned in GET responses). null = clear. */
|
|
1901
|
-
base44AuthFile:
|
|
2076
|
+
base44AuthFile: import_zod34.z.string().nullable().optional().describe("Base64-encoded Base44 auth file content (encrypted at rest)"),
|
|
1902
2077
|
/** Resolved at runtime from the encrypted Wix auth token */
|
|
1903
|
-
wixAuthEmail:
|
|
2078
|
+
wixAuthEmail: import_zod34.z.string().optional().describe("Email associated with the Wix auth token (resolved at runtime)"),
|
|
1904
2079
|
/** Resolved at runtime from the encrypted Base44 auth file */
|
|
1905
|
-
base44AuthEmail:
|
|
2080
|
+
base44AuthEmail: import_zod34.z.string().optional().describe("Email from the Base44 auth file (resolved at runtime)")
|
|
1906
2081
|
});
|
|
1907
2082
|
var CreateProjectInputSchema = ProjectSchema.omit({
|
|
1908
2083
|
id: true,
|
|
@@ -1912,7 +2087,7 @@ var CreateProjectInputSchema = ProjectSchema.omit({
|
|
|
1912
2087
|
wixAuthEmail: true,
|
|
1913
2088
|
base44AuthEmail: true
|
|
1914
2089
|
}).extend({
|
|
1915
|
-
appId:
|
|
2090
|
+
appId: import_zod34.z.string().describe(
|
|
1916
2091
|
"Required: The ID of the app in Dev Center for credential scoping"
|
|
1917
2092
|
)
|
|
1918
2093
|
});
|
|
@@ -1932,7 +2107,7 @@ var CreateTemplateInputSchema = TemplateSchema.omit({
|
|
|
1932
2107
|
var UpdateTemplateInputSchema = CreateTemplateInputSchema.partial();
|
|
1933
2108
|
|
|
1934
2109
|
// src/schedule/eval-schedule.ts
|
|
1935
|
-
var
|
|
2110
|
+
var import_zod35 = require("zod");
|
|
1936
2111
|
var FrequencyType = /* @__PURE__ */ ((FrequencyType2) => {
|
|
1937
2112
|
FrequencyType2["DAILY"] = "daily";
|
|
1938
2113
|
FrequencyType2["WEEKDAY"] = "weekday";
|
|
@@ -1942,29 +2117,29 @@ var FrequencyType = /* @__PURE__ */ ((FrequencyType2) => {
|
|
|
1942
2117
|
})(FrequencyType || {});
|
|
1943
2118
|
var EvalScheduleSchema = TenantEntitySchema.extend({
|
|
1944
2119
|
/** Whether the schedule is active */
|
|
1945
|
-
enabled:
|
|
2120
|
+
enabled: import_zod35.z.boolean(),
|
|
1946
2121
|
/** Test suite to run */
|
|
1947
|
-
suiteId:
|
|
2122
|
+
suiteId: import_zod35.z.string(),
|
|
1948
2123
|
/** Preset that provides agent + entities for this schedule */
|
|
1949
|
-
presetId:
|
|
2124
|
+
presetId: import_zod35.z.string(),
|
|
1950
2125
|
/** How often to run */
|
|
1951
|
-
frequencyType:
|
|
2126
|
+
frequencyType: import_zod35.z.nativeEnum(FrequencyType),
|
|
1952
2127
|
/** Time of day in 24h format (HH:MM), hours 00-23, minutes 00-59 */
|
|
1953
|
-
timeOfDay:
|
|
2128
|
+
timeOfDay: import_zod35.z.string().regex(/^([01]\d|2[0-3]):[0-5]\d$/),
|
|
1954
2129
|
/** Day of week (0=Sun, 6=Sat) for weekly schedules */
|
|
1955
|
-
dayOfWeek:
|
|
2130
|
+
dayOfWeek: import_zod35.z.number().min(0).max(6).optional(),
|
|
1956
2131
|
/** Day of month (1-31) for monthly schedules */
|
|
1957
|
-
dayOfMonth:
|
|
2132
|
+
dayOfMonth: import_zod35.z.number().min(1).max(31).optional(),
|
|
1958
2133
|
/** IANA timezone (e.g., 'America/New_York') */
|
|
1959
|
-
timezone:
|
|
2134
|
+
timezone: import_zod35.z.string(),
|
|
1960
2135
|
/** ID of the last eval run created by this schedule */
|
|
1961
|
-
lastRunId:
|
|
2136
|
+
lastRunId: import_zod35.z.string().optional(),
|
|
1962
2137
|
/** Denormalized status of the last run */
|
|
1963
|
-
lastRunStatus:
|
|
2138
|
+
lastRunStatus: import_zod35.z.string().optional(),
|
|
1964
2139
|
/** ISO timestamp of the last run */
|
|
1965
|
-
lastRunAt:
|
|
2140
|
+
lastRunAt: import_zod35.z.string().optional(),
|
|
1966
2141
|
/** Next scheduled run time in UTC (pre-computed for efficient querying, set by backend) */
|
|
1967
|
-
nextRunAt:
|
|
2142
|
+
nextRunAt: import_zod35.z.string().optional()
|
|
1968
2143
|
});
|
|
1969
2144
|
function isValidTimezone(tz) {
|
|
1970
2145
|
try {
|
|
@@ -1977,14 +2152,14 @@ function isValidTimezone(tz) {
|
|
|
1977
2152
|
function validateScheduleFields(data, ctx, options) {
|
|
1978
2153
|
if (data.frequencyType === "weekly" /* WEEKLY */ && data.dayOfWeek == null) {
|
|
1979
2154
|
ctx.addIssue({
|
|
1980
|
-
code:
|
|
2155
|
+
code: import_zod35.z.ZodIssueCode.custom,
|
|
1981
2156
|
message: "dayOfWeek is required for weekly schedules",
|
|
1982
2157
|
path: ["dayOfWeek"]
|
|
1983
2158
|
});
|
|
1984
2159
|
}
|
|
1985
2160
|
if (data.frequencyType === "monthly" /* MONTHLY */ && data.dayOfMonth == null) {
|
|
1986
2161
|
ctx.addIssue({
|
|
1987
|
-
code:
|
|
2162
|
+
code: import_zod35.z.ZodIssueCode.custom,
|
|
1988
2163
|
message: "dayOfMonth is required for monthly schedules",
|
|
1989
2164
|
path: ["dayOfMonth"]
|
|
1990
2165
|
});
|
|
@@ -1992,7 +2167,7 @@ function validateScheduleFields(data, ctx, options) {
|
|
|
1992
2167
|
const shouldValidateTz = options.partial ? data.timezone !== void 0 : true;
|
|
1993
2168
|
if (shouldValidateTz && !isValidTimezone(data.timezone)) {
|
|
1994
2169
|
ctx.addIssue({
|
|
1995
|
-
code:
|
|
2170
|
+
code: import_zod35.z.ZodIssueCode.custom,
|
|
1996
2171
|
message: "Invalid IANA timezone",
|
|
1997
2172
|
path: ["timezone"]
|
|
1998
2173
|
});
|
|
@@ -2057,6 +2232,13 @@ var UpdateEvalScheduleInputSchema = BaseCreateScheduleSchema.partial().superRefi
|
|
|
2057
2232
|
BulkImportResultItemSchema,
|
|
2058
2233
|
BulkImportResultSchema,
|
|
2059
2234
|
BulkImportSkillsInputSchema,
|
|
2235
|
+
CAPABILITY_NAME_REGEX,
|
|
2236
|
+
CapabilityContentSchema,
|
|
2237
|
+
CapabilitySchema,
|
|
2238
|
+
CapabilityTypeSchema,
|
|
2239
|
+
CapabilityVersionOriginSchema,
|
|
2240
|
+
CapabilityVersionSchema,
|
|
2241
|
+
CapabilityWithLatestVersionSchema,
|
|
2060
2242
|
ClaudeModel,
|
|
2061
2243
|
ClaudeModelSchema,
|
|
2062
2244
|
CommandExecutionSchema,
|
|
@@ -2067,6 +2249,8 @@ var UpdateEvalScheduleInputSchema = BaseCreateScheduleSchema.partial().superRefi
|
|
|
2067
2249
|
CostAssertionSchema,
|
|
2068
2250
|
CostConfigSchema,
|
|
2069
2251
|
CreateAgentInputSchema,
|
|
2252
|
+
CreateCapabilityInputSchema,
|
|
2253
|
+
CreateCapabilityVersionInputSchema,
|
|
2070
2254
|
CreateEvalRunFolderInputSchema,
|
|
2071
2255
|
CreateEvalRunInputSchema,
|
|
2072
2256
|
CreateEvalScheduleInputSchema,
|
|
@@ -2106,6 +2290,7 @@ var UpdateEvalScheduleInputSchema = BaseCreateScheduleSchema.partial().superRefi
|
|
|
2106
2290
|
FilePresenceTestSchema,
|
|
2107
2291
|
FrequencyType,
|
|
2108
2292
|
GitHubSourceSchema,
|
|
2293
|
+
InitialCapabilityVersionInputSchema,
|
|
2109
2294
|
InitialVersionInputSchema,
|
|
2110
2295
|
LEGACY_MODEL_ID_MAP,
|
|
2111
2296
|
LLMBreakdownStatsSchema,
|
|
@@ -2182,6 +2367,7 @@ var UpdateEvalScheduleInputSchema = BaseCreateScheduleSchema.partial().superRefi
|
|
|
2182
2367
|
TriggerSchema,
|
|
2183
2368
|
TriggerType,
|
|
2184
2369
|
UpdateAgentInputSchema,
|
|
2370
|
+
UpdateCapabilityInputSchema,
|
|
2185
2371
|
UpdateEvalRunFolderInputSchema,
|
|
2186
2372
|
UpdateEvalScheduleInputSchema,
|
|
2187
2373
|
UpdateMcpInputSchema,
|
|
@@ -2194,12 +2380,20 @@ var UpdateEvalScheduleInputSchema = BaseCreateScheduleSchema.partial().superRefi
|
|
|
2194
2380
|
UpdateTestScenarioInputSchema,
|
|
2195
2381
|
UpdateTestSuiteInputSchema,
|
|
2196
2382
|
VitestTestSchema,
|
|
2383
|
+
capabilityToMcp,
|
|
2384
|
+
capabilityToRule,
|
|
2385
|
+
capabilityToSkill,
|
|
2386
|
+
capabilityToSkillWithLatestVersion,
|
|
2387
|
+
capabilityToSubAgent,
|
|
2388
|
+
capabilityVersionToSkillVersion,
|
|
2197
2389
|
classifyAssertionRef,
|
|
2198
2390
|
formatTraceEventLine,
|
|
2199
2391
|
getSystemAssertion,
|
|
2200
2392
|
getSystemAssertions,
|
|
2393
|
+
groupCapabilitiesByType,
|
|
2201
2394
|
isAllowedBuildCommandString,
|
|
2202
2395
|
isSystemAssertionId,
|
|
2396
|
+
isValidCapabilityName,
|
|
2203
2397
|
isValidSkillFolderName,
|
|
2204
2398
|
normalizeBatchAssertionLink,
|
|
2205
2399
|
normalizeModelId,
|