@wix/evalforge-types 0.73.0 → 0.75.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -1
- package/build/index.js +706 -488
- package/build/index.js.map +4 -4
- package/build/index.mjs +687 -488
- package/build/index.mjs.map +4 -4
- package/build/types/evaluation/eval-run.d.ts +4 -0
- package/build/types/project/project.d.ts +5 -2
- package/build/types/target/capability-converters.d.ts +25 -0
- package/build/types/target/capability.d.ts +254 -0
- package/build/types/target/index.d.ts +2 -0
- package/build/types/target/preset.d.ts +6 -0
- package/package.json +2 -2
package/build/index.js
CHANGED
|
@@ -60,6 +60,13 @@ __export(index_exports, {
|
|
|
60
60
|
BulkImportResultItemSchema: () => BulkImportResultItemSchema,
|
|
61
61
|
BulkImportResultSchema: () => BulkImportResultSchema,
|
|
62
62
|
BulkImportSkillsInputSchema: () => BulkImportSkillsInputSchema,
|
|
63
|
+
CAPABILITY_NAME_REGEX: () => CAPABILITY_NAME_REGEX,
|
|
64
|
+
CapabilityContentSchema: () => CapabilityContentSchema,
|
|
65
|
+
CapabilitySchema: () => CapabilitySchema,
|
|
66
|
+
CapabilityTypeSchema: () => CapabilityTypeSchema,
|
|
67
|
+
CapabilityVersionOriginSchema: () => CapabilityVersionOriginSchema,
|
|
68
|
+
CapabilityVersionSchema: () => CapabilityVersionSchema,
|
|
69
|
+
CapabilityWithLatestVersionSchema: () => CapabilityWithLatestVersionSchema,
|
|
63
70
|
ClaudeModel: () => ClaudeModel,
|
|
64
71
|
ClaudeModelSchema: () => ClaudeModelSchema,
|
|
65
72
|
CommandExecutionSchema: () => CommandExecutionSchema,
|
|
@@ -70,6 +77,8 @@ __export(index_exports, {
|
|
|
70
77
|
CostAssertionSchema: () => CostAssertionSchema,
|
|
71
78
|
CostConfigSchema: () => CostConfigSchema,
|
|
72
79
|
CreateAgentInputSchema: () => CreateAgentInputSchema,
|
|
80
|
+
CreateCapabilityInputSchema: () => CreateCapabilityInputSchema,
|
|
81
|
+
CreateCapabilityVersionInputSchema: () => CreateCapabilityVersionInputSchema,
|
|
73
82
|
CreateEvalRunFolderInputSchema: () => CreateEvalRunFolderInputSchema,
|
|
74
83
|
CreateEvalRunInputSchema: () => CreateEvalRunInputSchema,
|
|
75
84
|
CreateEvalScheduleInputSchema: () => CreateEvalScheduleInputSchema,
|
|
@@ -109,6 +118,7 @@ __export(index_exports, {
|
|
|
109
118
|
FilePresenceTestSchema: () => FilePresenceTestSchema,
|
|
110
119
|
FrequencyType: () => FrequencyType,
|
|
111
120
|
GitHubSourceSchema: () => GitHubSourceSchema,
|
|
121
|
+
InitialCapabilityVersionInputSchema: () => InitialCapabilityVersionInputSchema,
|
|
112
122
|
InitialVersionInputSchema: () => InitialVersionInputSchema,
|
|
113
123
|
LEGACY_MODEL_ID_MAP: () => LEGACY_MODEL_ID_MAP,
|
|
114
124
|
LLMBreakdownStatsSchema: () => LLMBreakdownStatsSchema,
|
|
@@ -185,6 +195,7 @@ __export(index_exports, {
|
|
|
185
195
|
TriggerSchema: () => TriggerSchema,
|
|
186
196
|
TriggerType: () => TriggerType,
|
|
187
197
|
UpdateAgentInputSchema: () => UpdateAgentInputSchema,
|
|
198
|
+
UpdateCapabilityInputSchema: () => UpdateCapabilityInputSchema,
|
|
188
199
|
UpdateEvalRunFolderInputSchema: () => UpdateEvalRunFolderInputSchema,
|
|
189
200
|
UpdateEvalScheduleInputSchema: () => UpdateEvalScheduleInputSchema,
|
|
190
201
|
UpdateMcpInputSchema: () => UpdateMcpInputSchema,
|
|
@@ -197,12 +208,20 @@ __export(index_exports, {
|
|
|
197
208
|
UpdateTestScenarioInputSchema: () => UpdateTestScenarioInputSchema,
|
|
198
209
|
UpdateTestSuiteInputSchema: () => UpdateTestSuiteInputSchema,
|
|
199
210
|
VitestTestSchema: () => VitestTestSchema,
|
|
211
|
+
capabilityToMcp: () => capabilityToMcp,
|
|
212
|
+
capabilityToRule: () => capabilityToRule,
|
|
213
|
+
capabilityToSkill: () => capabilityToSkill,
|
|
214
|
+
capabilityToSkillWithLatestVersion: () => capabilityToSkillWithLatestVersion,
|
|
215
|
+
capabilityToSubAgent: () => capabilityToSubAgent,
|
|
216
|
+
capabilityVersionToSkillVersion: () => capabilityVersionToSkillVersion,
|
|
200
217
|
classifyAssertionRef: () => classifyAssertionRef,
|
|
201
218
|
formatTraceEventLine: () => formatTraceEventLine,
|
|
202
219
|
getSystemAssertion: () => getSystemAssertion,
|
|
203
220
|
getSystemAssertions: () => getSystemAssertions,
|
|
221
|
+
groupCapabilitiesByType: () => groupCapabilitiesByType,
|
|
204
222
|
isAllowedBuildCommandString: () => isAllowedBuildCommandString,
|
|
205
223
|
isSystemAssertionId: () => isSystemAssertionId,
|
|
224
|
+
isValidCapabilityName: () => isValidCapabilityName,
|
|
206
225
|
isValidSkillFolderName: () => isValidSkillFolderName,
|
|
207
226
|
normalizeBatchAssertionLink: () => normalizeBatchAssertionLink,
|
|
208
227
|
normalizeModelId: () => normalizeModelId,
|
|
@@ -564,10 +583,14 @@ var PresetSchema = TenantEntitySchema.extend({
|
|
|
564
583
|
/** Sub-agent IDs included in this preset */
|
|
565
584
|
subAgentIds: import_zod9.z.array(import_zod9.z.string()).default([]),
|
|
566
585
|
/** Rule IDs included in this preset */
|
|
567
|
-
ruleIds: import_zod9.z.array(import_zod9.z.string()).default([])
|
|
568
|
-
|
|
569
|
-
|
|
570
|
-
|
|
586
|
+
ruleIds: import_zod9.z.array(import_zod9.z.string()).default([]),
|
|
587
|
+
/** Unified capability IDs (replaces skill/mcp/subAgent/ruleIds) */
|
|
588
|
+
capabilityIds: import_zod9.z.array(import_zod9.z.string()).optional(),
|
|
589
|
+
/** Map of capabilityId to capabilityVersionId for version pinning */
|
|
590
|
+
capabilityVersions: import_zod9.z.record(import_zod9.z.string(), import_zod9.z.string()).optional()
|
|
591
|
+
});
|
|
592
|
+
var atLeastOneEntity = (data) => (data.capabilityIds?.length ?? 0) > 0 || (data.skillIds?.length ?? 0) > 0 || (data.mcpIds?.length ?? 0) > 0 || (data.subAgentIds?.length ?? 0) > 0 || (data.ruleIds?.length ?? 0) > 0;
|
|
593
|
+
var AT_LEAST_ONE_ENTITY_MESSAGE = "At least one of capabilityIds, skillIds, mcpIds, subAgentIds, or ruleIds must be non-empty";
|
|
571
594
|
var CreatePresetInputSchema = PresetSchema.omit({
|
|
572
595
|
id: true,
|
|
573
596
|
createdAt: true,
|
|
@@ -581,11 +604,179 @@ var UpdatePresetInputSchema = PresetSchema.omit({
|
|
|
581
604
|
deleted: true
|
|
582
605
|
}).partial();
|
|
583
606
|
|
|
607
|
+
// src/target/capability.ts
|
|
608
|
+
var import_zod10 = require("zod");
|
|
609
|
+
var CapabilityTypeSchema = import_zod10.z.enum([
|
|
610
|
+
"SKILL",
|
|
611
|
+
"SUB_AGENT",
|
|
612
|
+
"RULE",
|
|
613
|
+
"MCP"
|
|
614
|
+
]);
|
|
615
|
+
var CAPABILITY_NAME_REGEX = /^[a-z0-9]+(-[a-z0-9]+)*$/;
|
|
616
|
+
function isValidCapabilityName(name) {
|
|
617
|
+
return typeof name === "string" && name.length > 0 && CAPABILITY_NAME_REGEX.test(name);
|
|
618
|
+
}
|
|
619
|
+
var KEBAB_CASE_MESSAGE2 = "Name must be in kebab-case (lowercase letters, numbers, hyphens only, e.g. my-capability)";
|
|
620
|
+
var CapabilityContentSchema = import_zod10.z.record(import_zod10.z.string(), import_zod10.z.unknown());
|
|
621
|
+
var CapabilityVersionOriginSchema = import_zod10.z.enum(["manual", "pr", "master"]);
|
|
622
|
+
var CapabilitySchema = TenantEntitySchema.extend({
|
|
623
|
+
capabilityType: CapabilityTypeSchema,
|
|
624
|
+
source: GitHubSourceSchema.optional()
|
|
625
|
+
});
|
|
626
|
+
var CapabilityVersionSchema = import_zod10.z.object({
|
|
627
|
+
id: import_zod10.z.string(),
|
|
628
|
+
projectId: import_zod10.z.string(),
|
|
629
|
+
capabilityId: import_zod10.z.string(),
|
|
630
|
+
version: import_zod10.z.string(),
|
|
631
|
+
origin: CapabilityVersionOriginSchema,
|
|
632
|
+
source: GitHubSourceSchema.optional(),
|
|
633
|
+
content: CapabilityContentSchema.optional(),
|
|
634
|
+
notes: import_zod10.z.string().optional(),
|
|
635
|
+
createdAt: import_zod10.z.string()
|
|
636
|
+
});
|
|
637
|
+
var CapabilityWithLatestVersionSchema = CapabilitySchema.extend({
|
|
638
|
+
latestVersion: CapabilityVersionSchema.optional()
|
|
639
|
+
});
|
|
640
|
+
var CapabilityInputBaseSchema = CapabilitySchema.omit({
|
|
641
|
+
id: true,
|
|
642
|
+
createdAt: true,
|
|
643
|
+
updatedAt: true,
|
|
644
|
+
deleted: true,
|
|
645
|
+
description: true,
|
|
646
|
+
source: true
|
|
647
|
+
}).extend({
|
|
648
|
+
description: import_zod10.z.string().optional(),
|
|
649
|
+
source: GitHubSourceSchema.optional()
|
|
650
|
+
});
|
|
651
|
+
var InitialCapabilityVersionInputSchema = import_zod10.z.object({
|
|
652
|
+
content: CapabilityContentSchema.optional(),
|
|
653
|
+
notes: import_zod10.z.string().optional(),
|
|
654
|
+
source: GitHubSourceSchema.optional(),
|
|
655
|
+
version: import_zod10.z.string().optional(),
|
|
656
|
+
origin: CapabilityVersionOriginSchema.optional()
|
|
657
|
+
});
|
|
658
|
+
var CreateCapabilityInputSchema = CapabilityInputBaseSchema.extend({
|
|
659
|
+
initialVersion: InitialCapabilityVersionInputSchema.optional()
|
|
660
|
+
}).refine((data) => isValidCapabilityName(data.name), {
|
|
661
|
+
message: KEBAB_CASE_MESSAGE2,
|
|
662
|
+
path: ["name"]
|
|
663
|
+
});
|
|
664
|
+
var UpdateCapabilityInputSchema = CapabilityInputBaseSchema.omit({
|
|
665
|
+
capabilityType: true
|
|
666
|
+
}).partial().refine(
|
|
667
|
+
(data) => data.name === void 0 || isValidCapabilityName(data.name),
|
|
668
|
+
{ message: KEBAB_CASE_MESSAGE2, path: ["name"] }
|
|
669
|
+
);
|
|
670
|
+
var CreateCapabilityVersionInputSchema = import_zod10.z.object({
|
|
671
|
+
source: GitHubSourceSchema.optional(),
|
|
672
|
+
version: import_zod10.z.string().min(1),
|
|
673
|
+
notes: import_zod10.z.string().optional(),
|
|
674
|
+
origin: CapabilityVersionOriginSchema.optional(),
|
|
675
|
+
content: CapabilityContentSchema.optional()
|
|
676
|
+
});
|
|
677
|
+
|
|
678
|
+
// src/target/capability-converters.ts
|
|
679
|
+
function capabilityToSkill(cap) {
|
|
680
|
+
return {
|
|
681
|
+
id: cap.id,
|
|
682
|
+
projectId: cap.projectId,
|
|
683
|
+
name: cap.name,
|
|
684
|
+
description: cap.description,
|
|
685
|
+
source: cap.source,
|
|
686
|
+
createdAt: cap.createdAt,
|
|
687
|
+
updatedAt: cap.updatedAt,
|
|
688
|
+
deleted: cap.deleted
|
|
689
|
+
};
|
|
690
|
+
}
|
|
691
|
+
function capabilityVersionToSkillVersion(cv) {
|
|
692
|
+
const content = cv.content;
|
|
693
|
+
return {
|
|
694
|
+
id: cv.id,
|
|
695
|
+
projectId: cv.projectId,
|
|
696
|
+
skillId: cv.capabilityId,
|
|
697
|
+
version: cv.version,
|
|
698
|
+
origin: cv.origin,
|
|
699
|
+
source: cv.source,
|
|
700
|
+
files: content?.files,
|
|
701
|
+
notes: cv.notes,
|
|
702
|
+
createdAt: cv.createdAt
|
|
703
|
+
};
|
|
704
|
+
}
|
|
705
|
+
function capabilityToSkillWithLatestVersion(cap) {
|
|
706
|
+
const skill = capabilityToSkill(cap);
|
|
707
|
+
const latestVersion = cap.latestVersion ? capabilityVersionToSkillVersion(cap.latestVersion) : void 0;
|
|
708
|
+
return { ...skill, latestVersion };
|
|
709
|
+
}
|
|
710
|
+
function capabilityToSubAgent(cap) {
|
|
711
|
+
const content = cap.latestVersion?.content;
|
|
712
|
+
return {
|
|
713
|
+
id: cap.id,
|
|
714
|
+
projectId: cap.projectId,
|
|
715
|
+
name: cap.name,
|
|
716
|
+
description: cap.description,
|
|
717
|
+
subAgentMd: content?.subAgentMd ?? "",
|
|
718
|
+
source: cap.source,
|
|
719
|
+
createdAt: cap.createdAt,
|
|
720
|
+
updatedAt: cap.updatedAt,
|
|
721
|
+
deleted: cap.deleted
|
|
722
|
+
};
|
|
723
|
+
}
|
|
724
|
+
function capabilityToRule(cap) {
|
|
725
|
+
const content = cap.latestVersion?.content;
|
|
726
|
+
return {
|
|
727
|
+
id: cap.id,
|
|
728
|
+
projectId: cap.projectId,
|
|
729
|
+
name: cap.name,
|
|
730
|
+
description: cap.description,
|
|
731
|
+
ruleType: content?.ruleType ?? "claude-md",
|
|
732
|
+
content: content?.content ?? "",
|
|
733
|
+
createdAt: cap.createdAt,
|
|
734
|
+
updatedAt: cap.updatedAt,
|
|
735
|
+
deleted: cap.deleted
|
|
736
|
+
};
|
|
737
|
+
}
|
|
738
|
+
function capabilityToMcp(cap) {
|
|
739
|
+
const content = cap.latestVersion?.content;
|
|
740
|
+
return {
|
|
741
|
+
id: cap.id,
|
|
742
|
+
projectId: cap.projectId,
|
|
743
|
+
name: cap.name,
|
|
744
|
+
description: cap.description,
|
|
745
|
+
config: content?.config ?? {},
|
|
746
|
+
createdAt: cap.createdAt,
|
|
747
|
+
updatedAt: cap.updatedAt,
|
|
748
|
+
deleted: cap.deleted
|
|
749
|
+
};
|
|
750
|
+
}
|
|
751
|
+
function groupCapabilitiesByType(capabilities) {
|
|
752
|
+
const skills = [];
|
|
753
|
+
const subAgents = [];
|
|
754
|
+
const rules = [];
|
|
755
|
+
const mcps = [];
|
|
756
|
+
for (const cap of capabilities) {
|
|
757
|
+
switch (cap.capabilityType) {
|
|
758
|
+
case "SKILL":
|
|
759
|
+
skills.push(capabilityToSkillWithLatestVersion(cap));
|
|
760
|
+
break;
|
|
761
|
+
case "SUB_AGENT":
|
|
762
|
+
subAgents.push(capabilityToSubAgent(cap));
|
|
763
|
+
break;
|
|
764
|
+
case "RULE":
|
|
765
|
+
rules.push(capabilityToRule(cap));
|
|
766
|
+
break;
|
|
767
|
+
case "MCP":
|
|
768
|
+
mcps.push(capabilityToMcp(cap));
|
|
769
|
+
break;
|
|
770
|
+
}
|
|
771
|
+
}
|
|
772
|
+
return { skills, subAgents, rules, mcps };
|
|
773
|
+
}
|
|
774
|
+
|
|
584
775
|
// src/test/index.ts
|
|
585
|
-
var
|
|
776
|
+
var import_zod21 = require("zod");
|
|
586
777
|
|
|
587
778
|
// src/test/base.ts
|
|
588
|
-
var
|
|
779
|
+
var import_zod11 = require("zod");
|
|
589
780
|
var TestType = /* @__PURE__ */ ((TestType2) => {
|
|
590
781
|
TestType2["LLM"] = "LLM";
|
|
591
782
|
TestType2["TOOL"] = "TOOL";
|
|
@@ -598,7 +789,7 @@ var TestType = /* @__PURE__ */ ((TestType2) => {
|
|
|
598
789
|
TestType2["PLAYWRIGHT_NL"] = "PLAYWRIGHT_NL";
|
|
599
790
|
return TestType2;
|
|
600
791
|
})(TestType || {});
|
|
601
|
-
var TestTypeSchema =
|
|
792
|
+
var TestTypeSchema = import_zod11.z.enum(TestType);
|
|
602
793
|
var TestImportance = /* @__PURE__ */ ((TestImportance2) => {
|
|
603
794
|
TestImportance2["LOW"] = "low";
|
|
604
795
|
TestImportance2["MEDIUM"] = "medium";
|
|
@@ -606,153 +797,153 @@ var TestImportance = /* @__PURE__ */ ((TestImportance2) => {
|
|
|
606
797
|
TestImportance2["CRITICAL"] = "critical";
|
|
607
798
|
return TestImportance2;
|
|
608
799
|
})(TestImportance || {});
|
|
609
|
-
var TestImportanceSchema =
|
|
610
|
-
var BaseTestSchema =
|
|
611
|
-
id:
|
|
800
|
+
var TestImportanceSchema = import_zod11.z.enum(TestImportance);
|
|
801
|
+
var BaseTestSchema = import_zod11.z.object({
|
|
802
|
+
id: import_zod11.z.string(),
|
|
612
803
|
type: TestTypeSchema,
|
|
613
|
-
name:
|
|
614
|
-
description:
|
|
804
|
+
name: import_zod11.z.string().min(3),
|
|
805
|
+
description: import_zod11.z.string().optional(),
|
|
615
806
|
importance: TestImportanceSchema.optional()
|
|
616
807
|
});
|
|
617
808
|
|
|
618
809
|
// src/test/llm.ts
|
|
619
|
-
var
|
|
810
|
+
var import_zod12 = require("zod");
|
|
620
811
|
var LLMTestSchema = BaseTestSchema.extend({
|
|
621
|
-
type:
|
|
812
|
+
type: import_zod12.z.literal("LLM" /* LLM */),
|
|
622
813
|
/** Maximum steps for the LLM to take */
|
|
623
|
-
maxSteps:
|
|
814
|
+
maxSteps: import_zod12.z.number().min(1).max(100),
|
|
624
815
|
/** Prompt to send to the evaluator */
|
|
625
|
-
prompt:
|
|
816
|
+
prompt: import_zod12.z.string().min(1),
|
|
626
817
|
/** ID of the evaluator agent to use */
|
|
627
|
-
evaluatorId:
|
|
818
|
+
evaluatorId: import_zod12.z.string()
|
|
628
819
|
});
|
|
629
820
|
|
|
630
821
|
// src/test/tool.ts
|
|
631
|
-
var
|
|
822
|
+
var import_zod13 = require("zod");
|
|
632
823
|
var ToolTestSchema = BaseTestSchema.extend({
|
|
633
|
-
type:
|
|
824
|
+
type: import_zod13.z.literal("TOOL" /* TOOL */),
|
|
634
825
|
/** Name of the tool that should be called */
|
|
635
|
-
toolName:
|
|
826
|
+
toolName: import_zod13.z.string().min(3),
|
|
636
827
|
/** Expected arguments for the tool call */
|
|
637
|
-
args:
|
|
828
|
+
args: import_zod13.z.record(import_zod13.z.string(), import_zod13.z.any()),
|
|
638
829
|
/** Expected content in the tool results */
|
|
639
|
-
resultsContent:
|
|
830
|
+
resultsContent: import_zod13.z.string()
|
|
640
831
|
});
|
|
641
832
|
|
|
642
833
|
// src/test/site-config.ts
|
|
643
|
-
var
|
|
834
|
+
var import_zod14 = require("zod");
|
|
644
835
|
var SiteConfigTestSchema = BaseTestSchema.extend({
|
|
645
|
-
type:
|
|
836
|
+
type: import_zod14.z.literal("SITE_CONFIG" /* SITE_CONFIG */),
|
|
646
837
|
/** URL to call */
|
|
647
|
-
url:
|
|
838
|
+
url: import_zod14.z.string().url(),
|
|
648
839
|
/** HTTP method */
|
|
649
|
-
method:
|
|
840
|
+
method: import_zod14.z.enum(["GET", "POST"]),
|
|
650
841
|
/** Request body (for POST) */
|
|
651
|
-
body:
|
|
842
|
+
body: import_zod14.z.string().optional(),
|
|
652
843
|
/** Expected HTTP status code */
|
|
653
|
-
expectedStatusCode:
|
|
844
|
+
expectedStatusCode: import_zod14.z.number().int().min(100).max(599),
|
|
654
845
|
/** Expected response content */
|
|
655
|
-
expectedResponse:
|
|
846
|
+
expectedResponse: import_zod14.z.string().optional(),
|
|
656
847
|
/** JMESPath expression to extract from response */
|
|
657
|
-
expectedResponseJMESPath:
|
|
848
|
+
expectedResponseJMESPath: import_zod14.z.string().optional()
|
|
658
849
|
});
|
|
659
850
|
|
|
660
851
|
// src/test/command-execution.ts
|
|
661
|
-
var
|
|
852
|
+
var import_zod15 = require("zod");
|
|
662
853
|
var AllowedCommands = [
|
|
663
854
|
"yarn install --no-immutable && yarn build",
|
|
664
855
|
"npm run build",
|
|
665
856
|
"yarn typecheck"
|
|
666
857
|
];
|
|
667
858
|
var CommandExecutionTestSchema = BaseTestSchema.extend({
|
|
668
|
-
type:
|
|
859
|
+
type: import_zod15.z.literal("COMMAND_EXECUTION" /* COMMAND_EXECUTION */),
|
|
669
860
|
/** Command to execute (must be in AllowedCommands) */
|
|
670
|
-
command:
|
|
861
|
+
command: import_zod15.z.string().refine((value) => AllowedCommands.includes(value), {
|
|
671
862
|
message: `Command must be one of: ${AllowedCommands.join(", ")}`
|
|
672
863
|
}),
|
|
673
864
|
/** Expected exit code (default: 0) */
|
|
674
|
-
expectedExitCode:
|
|
865
|
+
expectedExitCode: import_zod15.z.number().default(0).optional()
|
|
675
866
|
});
|
|
676
867
|
|
|
677
868
|
// src/test/file-presence.ts
|
|
678
|
-
var
|
|
869
|
+
var import_zod16 = require("zod");
|
|
679
870
|
var FilePresenceTestSchema = BaseTestSchema.extend({
|
|
680
|
-
type:
|
|
871
|
+
type: import_zod16.z.literal("FILE_PRESENCE" /* FILE_PRESENCE */),
|
|
681
872
|
/** Paths to check */
|
|
682
|
-
paths:
|
|
873
|
+
paths: import_zod16.z.array(import_zod16.z.string()),
|
|
683
874
|
/** Whether files should exist (true) or not exist (false) */
|
|
684
|
-
shouldExist:
|
|
875
|
+
shouldExist: import_zod16.z.boolean()
|
|
685
876
|
});
|
|
686
877
|
|
|
687
878
|
// src/test/file-content.ts
|
|
688
|
-
var
|
|
689
|
-
var FileContentCheckSchema =
|
|
879
|
+
var import_zod17 = require("zod");
|
|
880
|
+
var FileContentCheckSchema = import_zod17.z.object({
|
|
690
881
|
/** Strings that must be present in the file */
|
|
691
|
-
contains:
|
|
882
|
+
contains: import_zod17.z.array(import_zod17.z.string()).optional(),
|
|
692
883
|
/** Strings that must NOT be present in the file */
|
|
693
|
-
notContains:
|
|
884
|
+
notContains: import_zod17.z.array(import_zod17.z.string()).optional(),
|
|
694
885
|
/** Regex pattern the content must match */
|
|
695
|
-
matches:
|
|
886
|
+
matches: import_zod17.z.string().optional(),
|
|
696
887
|
/** JSON path checks for structured content */
|
|
697
|
-
jsonPath:
|
|
698
|
-
|
|
699
|
-
path:
|
|
700
|
-
value:
|
|
888
|
+
jsonPath: import_zod17.z.array(
|
|
889
|
+
import_zod17.z.object({
|
|
890
|
+
path: import_zod17.z.string(),
|
|
891
|
+
value: import_zod17.z.unknown()
|
|
701
892
|
})
|
|
702
893
|
).optional(),
|
|
703
894
|
/** Lines that should be added (for diff checking) */
|
|
704
|
-
added:
|
|
895
|
+
added: import_zod17.z.array(import_zod17.z.string()).optional(),
|
|
705
896
|
/** Lines that should be removed (for diff checking) */
|
|
706
|
-
removed:
|
|
897
|
+
removed: import_zod17.z.array(import_zod17.z.string()).optional()
|
|
707
898
|
});
|
|
708
899
|
var FileContentTestSchema = BaseTestSchema.extend({
|
|
709
|
-
type:
|
|
900
|
+
type: import_zod17.z.literal("FILE_CONTENT" /* FILE_CONTENT */),
|
|
710
901
|
/** Path to the file to check */
|
|
711
|
-
path:
|
|
902
|
+
path: import_zod17.z.string(),
|
|
712
903
|
/** Content checks to perform */
|
|
713
904
|
checks: FileContentCheckSchema
|
|
714
905
|
});
|
|
715
906
|
|
|
716
907
|
// src/test/build-check.ts
|
|
717
|
-
var
|
|
908
|
+
var import_zod18 = require("zod");
|
|
718
909
|
var BuildCheckTestSchema = BaseTestSchema.extend({
|
|
719
|
-
type:
|
|
910
|
+
type: import_zod18.z.literal("BUILD_CHECK" /* BUILD_CHECK */),
|
|
720
911
|
/** Build command to execute */
|
|
721
|
-
command:
|
|
912
|
+
command: import_zod18.z.string(),
|
|
722
913
|
/** Whether the build should succeed */
|
|
723
|
-
expectSuccess:
|
|
914
|
+
expectSuccess: import_zod18.z.boolean(),
|
|
724
915
|
/** Maximum allowed warnings (optional) */
|
|
725
|
-
allowedWarnings:
|
|
916
|
+
allowedWarnings: import_zod18.z.number().optional(),
|
|
726
917
|
/** Timeout in milliseconds */
|
|
727
|
-
timeout:
|
|
918
|
+
timeout: import_zod18.z.number().optional()
|
|
728
919
|
});
|
|
729
920
|
|
|
730
921
|
// src/test/vitest.ts
|
|
731
|
-
var
|
|
922
|
+
var import_zod19 = require("zod");
|
|
732
923
|
var VitestTestSchema = BaseTestSchema.extend({
|
|
733
|
-
type:
|
|
924
|
+
type: import_zod19.z.literal("VITEST" /* VITEST */),
|
|
734
925
|
/** Test file content */
|
|
735
|
-
testFile:
|
|
926
|
+
testFile: import_zod19.z.string(),
|
|
736
927
|
/** Name of the test file */
|
|
737
|
-
testFileName:
|
|
928
|
+
testFileName: import_zod19.z.string(),
|
|
738
929
|
/** Minimum pass rate required (0-100) */
|
|
739
|
-
minPassRate:
|
|
930
|
+
minPassRate: import_zod19.z.number().min(0).max(100)
|
|
740
931
|
});
|
|
741
932
|
|
|
742
933
|
// src/test/playwright-nl.ts
|
|
743
|
-
var
|
|
934
|
+
var import_zod20 = require("zod");
|
|
744
935
|
var PlaywrightNLTestSchema = BaseTestSchema.extend({
|
|
745
|
-
type:
|
|
936
|
+
type: import_zod20.z.literal("PLAYWRIGHT_NL" /* PLAYWRIGHT_NL */),
|
|
746
937
|
/** Natural language steps to execute */
|
|
747
|
-
steps:
|
|
938
|
+
steps: import_zod20.z.array(import_zod20.z.string()),
|
|
748
939
|
/** Expected outcome description */
|
|
749
|
-
expectedOutcome:
|
|
940
|
+
expectedOutcome: import_zod20.z.string(),
|
|
750
941
|
/** Timeout in milliseconds */
|
|
751
|
-
timeout:
|
|
942
|
+
timeout: import_zod20.z.number().optional()
|
|
752
943
|
});
|
|
753
944
|
|
|
754
945
|
// src/test/index.ts
|
|
755
|
-
var TestSchema =
|
|
946
|
+
var TestSchema = import_zod21.z.discriminatedUnion("type", [
|
|
756
947
|
LLMTestSchema,
|
|
757
948
|
ToolTestSchema,
|
|
758
949
|
SiteConfigTestSchema,
|
|
@@ -765,33 +956,33 @@ var TestSchema = import_zod20.z.discriminatedUnion("type", [
|
|
|
765
956
|
]);
|
|
766
957
|
|
|
767
958
|
// src/scenario/environment.ts
|
|
768
|
-
var
|
|
769
|
-
var LocalProjectConfigSchema =
|
|
959
|
+
var import_zod22 = require("zod");
|
|
960
|
+
var LocalProjectConfigSchema = import_zod22.z.object({
|
|
770
961
|
/** Template ID to use for the local project */
|
|
771
|
-
templateId:
|
|
962
|
+
templateId: import_zod22.z.string().optional(),
|
|
772
963
|
/** Files to create in the project */
|
|
773
|
-
files:
|
|
774
|
-
|
|
775
|
-
path:
|
|
776
|
-
content:
|
|
964
|
+
files: import_zod22.z.array(
|
|
965
|
+
import_zod22.z.object({
|
|
966
|
+
path: import_zod22.z.string().min(1),
|
|
967
|
+
content: import_zod22.z.string().min(1)
|
|
777
968
|
})
|
|
778
969
|
).optional()
|
|
779
970
|
});
|
|
780
|
-
var MetaSiteConfigSchema =
|
|
781
|
-
configurations:
|
|
782
|
-
|
|
783
|
-
name:
|
|
784
|
-
apiCalls:
|
|
785
|
-
|
|
786
|
-
url:
|
|
787
|
-
method:
|
|
788
|
-
body:
|
|
971
|
+
var MetaSiteConfigSchema = import_zod22.z.object({
|
|
972
|
+
configurations: import_zod22.z.array(
|
|
973
|
+
import_zod22.z.object({
|
|
974
|
+
name: import_zod22.z.string().min(1),
|
|
975
|
+
apiCalls: import_zod22.z.array(
|
|
976
|
+
import_zod22.z.object({
|
|
977
|
+
url: import_zod22.z.string().url(),
|
|
978
|
+
method: import_zod22.z.enum(["POST", "PUT"]),
|
|
979
|
+
body: import_zod22.z.string()
|
|
789
980
|
})
|
|
790
981
|
)
|
|
791
982
|
})
|
|
792
983
|
).optional()
|
|
793
984
|
});
|
|
794
|
-
var EnvironmentSchema =
|
|
985
|
+
var EnvironmentSchema = import_zod22.z.object({
|
|
795
986
|
/** Local project configuration */
|
|
796
987
|
localProject: LocalProjectConfigSchema.optional(),
|
|
797
988
|
/** Meta site configuration */
|
|
@@ -799,13 +990,13 @@ var EnvironmentSchema = import_zod21.z.object({
|
|
|
799
990
|
});
|
|
800
991
|
|
|
801
992
|
// src/scenario/test-scenario.ts
|
|
802
|
-
var
|
|
993
|
+
var import_zod25 = require("zod");
|
|
803
994
|
|
|
804
995
|
// src/assertion/assertion.ts
|
|
805
|
-
var
|
|
996
|
+
var import_zod24 = require("zod");
|
|
806
997
|
|
|
807
998
|
// src/assertion/build-passed-command.ts
|
|
808
|
-
var
|
|
999
|
+
var import_zod23 = require("zod");
|
|
809
1000
|
var ALLOWED_BUILD_COMMANDS = [
|
|
810
1001
|
"yarn build",
|
|
811
1002
|
"npm run build",
|
|
@@ -831,10 +1022,10 @@ function parseBuildCommandToArgv(command) {
|
|
|
831
1022
|
return BUILD_COMMAND_ARGV[trimmed];
|
|
832
1023
|
}
|
|
833
1024
|
var enumTuple = ALLOWED_BUILD_COMMANDS;
|
|
834
|
-
var BuildPassedCommandStringSchema =
|
|
1025
|
+
var BuildPassedCommandStringSchema = import_zod23.z.enum(enumTuple);
|
|
835
1026
|
|
|
836
1027
|
// src/assertion/assertion.ts
|
|
837
|
-
var AssertionTypeSchema =
|
|
1028
|
+
var AssertionTypeSchema = import_zod24.z.enum([
|
|
838
1029
|
"skill_was_called",
|
|
839
1030
|
"tool_called_with_param",
|
|
840
1031
|
"build_passed",
|
|
@@ -843,61 +1034,61 @@ var AssertionTypeSchema = import_zod23.z.enum([
|
|
|
843
1034
|
"llm_judge",
|
|
844
1035
|
"api_call"
|
|
845
1036
|
]);
|
|
846
|
-
var AssertionParameterTypeSchema =
|
|
1037
|
+
var AssertionParameterTypeSchema = import_zod24.z.enum([
|
|
847
1038
|
"string",
|
|
848
1039
|
"number",
|
|
849
1040
|
"boolean"
|
|
850
1041
|
]);
|
|
851
|
-
var AssertionParameterSchema =
|
|
1042
|
+
var AssertionParameterSchema = import_zod24.z.object({
|
|
852
1043
|
/** Parameter name (used as key in params object) */
|
|
853
|
-
name:
|
|
1044
|
+
name: import_zod24.z.string().min(1),
|
|
854
1045
|
/** Display label for the parameter */
|
|
855
|
-
label:
|
|
1046
|
+
label: import_zod24.z.string().min(1),
|
|
856
1047
|
/** Parameter type */
|
|
857
1048
|
type: AssertionParameterTypeSchema,
|
|
858
1049
|
/** Whether this parameter is required */
|
|
859
|
-
required:
|
|
1050
|
+
required: import_zod24.z.boolean(),
|
|
860
1051
|
/** Default value (optional, used when not provided) */
|
|
861
|
-
defaultValue:
|
|
1052
|
+
defaultValue: import_zod24.z.union([import_zod24.z.string(), import_zod24.z.number(), import_zod24.z.boolean()]).optional(),
|
|
862
1053
|
/** If true, parameter is hidden by default behind "Show advanced options" */
|
|
863
|
-
advanced:
|
|
1054
|
+
advanced: import_zod24.z.boolean().optional()
|
|
864
1055
|
});
|
|
865
|
-
var ScenarioAssertionLinkSchema =
|
|
1056
|
+
var ScenarioAssertionLinkSchema = import_zod24.z.object({
|
|
866
1057
|
/** ID of the system assertion (e.g., 'system:skill_was_called') */
|
|
867
|
-
assertionId:
|
|
1058
|
+
assertionId: import_zod24.z.string(),
|
|
868
1059
|
/** Parameter values for this assertion in this scenario */
|
|
869
|
-
params:
|
|
870
|
-
|
|
871
|
-
|
|
1060
|
+
params: import_zod24.z.record(
|
|
1061
|
+
import_zod24.z.string(),
|
|
1062
|
+
import_zod24.z.union([import_zod24.z.string(), import_zod24.z.number(), import_zod24.z.boolean(), import_zod24.z.null()])
|
|
872
1063
|
).optional()
|
|
873
1064
|
});
|
|
874
|
-
var SkillWasCalledConfigSchema =
|
|
1065
|
+
var SkillWasCalledConfigSchema = import_zod24.z.object({
|
|
875
1066
|
/** Names of the skills that must have been called */
|
|
876
|
-
skillNames:
|
|
1067
|
+
skillNames: import_zod24.z.array(import_zod24.z.string().min(1)).min(1)
|
|
877
1068
|
});
|
|
878
|
-
var CostConfigSchema =
|
|
1069
|
+
var CostConfigSchema = import_zod24.z.strictObject({
|
|
879
1070
|
/** Maximum allowed cost in USD */
|
|
880
|
-
maxCostUsd:
|
|
1071
|
+
maxCostUsd: import_zod24.z.number().positive()
|
|
881
1072
|
});
|
|
882
|
-
var ToolCalledWithParamConfigSchema =
|
|
1073
|
+
var ToolCalledWithParamConfigSchema = import_zod24.z.strictObject({
|
|
883
1074
|
/** Name of the tool that must have been called */
|
|
884
|
-
toolName:
|
|
1075
|
+
toolName: import_zod24.z.string().min(1),
|
|
885
1076
|
/** JSON string of key-value pairs for expected parameters (substring match). Optional — when omitted, only checks tool presence. */
|
|
886
|
-
expectedParams:
|
|
1077
|
+
expectedParams: import_zod24.z.string().min(1).optional(),
|
|
887
1078
|
/** If true, the matching tool call must also have succeeded (step.success === true) */
|
|
888
|
-
requireSuccess:
|
|
1079
|
+
requireSuccess: import_zod24.z.boolean().optional()
|
|
889
1080
|
});
|
|
890
|
-
var BuildPassedConfigSchema =
|
|
1081
|
+
var BuildPassedConfigSchema = import_zod24.z.strictObject({
|
|
891
1082
|
/** Allowlisted command only (default at runtime: "yarn build") */
|
|
892
1083
|
command: BuildPassedCommandStringSchema.optional(),
|
|
893
1084
|
/** Expected exit code (default: 0) */
|
|
894
|
-
expectedExitCode:
|
|
1085
|
+
expectedExitCode: import_zod24.z.number().int().optional()
|
|
895
1086
|
});
|
|
896
|
-
var TimeConfigSchema =
|
|
1087
|
+
var TimeConfigSchema = import_zod24.z.strictObject({
|
|
897
1088
|
/** Maximum allowed duration in milliseconds */
|
|
898
|
-
maxDurationMs:
|
|
1089
|
+
maxDurationMs: import_zod24.z.number().int().positive()
|
|
899
1090
|
});
|
|
900
|
-
var LlmJudgeConfigSchema =
|
|
1091
|
+
var LlmJudgeConfigSchema = import_zod24.z.object({
|
|
901
1092
|
/**
|
|
902
1093
|
* Prompt template with placeholders:
|
|
903
1094
|
* - {{output}}: agent's final output
|
|
@@ -908,65 +1099,65 @@ var LlmJudgeConfigSchema = import_zod23.z.object({
|
|
|
908
1099
|
* - {{trace}}: step-by-step trace of tool calls
|
|
909
1100
|
* - Custom parameters defined in the parameters array
|
|
910
1101
|
*/
|
|
911
|
-
prompt:
|
|
1102
|
+
prompt: import_zod24.z.string().min(1),
|
|
912
1103
|
/** Minimum score to pass (0-10, default 7) */
|
|
913
|
-
minScore:
|
|
1104
|
+
minScore: import_zod24.z.number().int().min(0).max(10).optional(),
|
|
914
1105
|
/** Model for the judge (e.g. claude-3-5-haiku-20241022) */
|
|
915
|
-
model:
|
|
1106
|
+
model: import_zod24.z.string().optional(),
|
|
916
1107
|
/** Max output tokens */
|
|
917
|
-
maxTokens:
|
|
1108
|
+
maxTokens: import_zod24.z.number().int().optional(),
|
|
918
1109
|
/** Temperature (0-1) */
|
|
919
|
-
temperature:
|
|
1110
|
+
temperature: import_zod24.z.number().min(0).max(1).optional(),
|
|
920
1111
|
/** User-defined parameters for this assertion */
|
|
921
|
-
parameters:
|
|
1112
|
+
parameters: import_zod24.z.array(AssertionParameterSchema).optional()
|
|
922
1113
|
});
|
|
923
|
-
var ApiCallConfigSchema =
|
|
1114
|
+
var ApiCallConfigSchema = import_zod24.z.strictObject({
|
|
924
1115
|
/** URL to call */
|
|
925
|
-
url:
|
|
1116
|
+
url: import_zod24.z.string().min(1),
|
|
926
1117
|
/** HTTP method (default GET) */
|
|
927
|
-
method:
|
|
1118
|
+
method: import_zod24.z.enum(["GET", "POST"]).optional(),
|
|
928
1119
|
/** Request body (JSON string, for POST requests) */
|
|
929
|
-
requestBody:
|
|
1120
|
+
requestBody: import_zod24.z.string().optional(),
|
|
930
1121
|
/** Expected JSON response to validate against (subset match — extra fields in actual are OK) */
|
|
931
|
-
expectedResponse:
|
|
1122
|
+
expectedResponse: import_zod24.z.string().min(1),
|
|
932
1123
|
/** Request headers as JSON string of key-value pairs */
|
|
933
|
-
requestHeaders:
|
|
1124
|
+
requestHeaders: import_zod24.z.string().optional(),
|
|
934
1125
|
/** Request timeout in milliseconds (default 30000) */
|
|
935
|
-
timeoutMs:
|
|
1126
|
+
timeoutMs: import_zod24.z.number().int().positive().optional()
|
|
936
1127
|
});
|
|
937
1128
|
var AssertionBaseFields = {
|
|
938
1129
|
/** When true, the assertion's pass/fail logic is inverted (NOT operator). */
|
|
939
|
-
negate:
|
|
1130
|
+
negate: import_zod24.z.boolean().optional()
|
|
940
1131
|
};
|
|
941
1132
|
var SkillWasCalledAssertionSchema = SkillWasCalledConfigSchema.extend({
|
|
942
|
-
type:
|
|
1133
|
+
type: import_zod24.z.literal("skill_was_called"),
|
|
943
1134
|
...AssertionBaseFields
|
|
944
1135
|
});
|
|
945
1136
|
var ToolCalledWithParamAssertionSchema = ToolCalledWithParamConfigSchema.extend({
|
|
946
|
-
type:
|
|
1137
|
+
type: import_zod24.z.literal("tool_called_with_param"),
|
|
947
1138
|
...AssertionBaseFields
|
|
948
1139
|
});
|
|
949
1140
|
var BuildPassedAssertionSchema = BuildPassedConfigSchema.extend({
|
|
950
|
-
type:
|
|
1141
|
+
type: import_zod24.z.literal("build_passed"),
|
|
951
1142
|
...AssertionBaseFields
|
|
952
1143
|
});
|
|
953
1144
|
var CostAssertionSchema = CostConfigSchema.extend({
|
|
954
|
-
type:
|
|
1145
|
+
type: import_zod24.z.literal("cost"),
|
|
955
1146
|
...AssertionBaseFields
|
|
956
1147
|
});
|
|
957
1148
|
var LlmJudgeAssertionSchema = LlmJudgeConfigSchema.extend({
|
|
958
|
-
type:
|
|
1149
|
+
type: import_zod24.z.literal("llm_judge"),
|
|
959
1150
|
...AssertionBaseFields
|
|
960
1151
|
});
|
|
961
1152
|
var ApiCallAssertionSchema = ApiCallConfigSchema.extend({
|
|
962
|
-
type:
|
|
1153
|
+
type: import_zod24.z.literal("api_call"),
|
|
963
1154
|
...AssertionBaseFields
|
|
964
1155
|
});
|
|
965
1156
|
var TimeAssertionSchema = TimeConfigSchema.extend({
|
|
966
|
-
type:
|
|
1157
|
+
type: import_zod24.z.literal("time_limit"),
|
|
967
1158
|
...AssertionBaseFields
|
|
968
1159
|
});
|
|
969
|
-
var AssertionSchema =
|
|
1160
|
+
var AssertionSchema = import_zod24.z.union([
|
|
970
1161
|
SkillWasCalledAssertionSchema,
|
|
971
1162
|
ToolCalledWithParamAssertionSchema,
|
|
972
1163
|
BuildPassedAssertionSchema,
|
|
@@ -975,7 +1166,7 @@ var AssertionSchema = import_zod23.z.union([
|
|
|
975
1166
|
LlmJudgeAssertionSchema,
|
|
976
1167
|
ApiCallAssertionSchema
|
|
977
1168
|
]);
|
|
978
|
-
var AssertionConfigSchema =
|
|
1169
|
+
var AssertionConfigSchema = import_zod24.z.union([
|
|
979
1170
|
LlmJudgeConfigSchema,
|
|
980
1171
|
// requires prompt - check first
|
|
981
1172
|
SkillWasCalledConfigSchema,
|
|
@@ -990,7 +1181,7 @@ var AssertionConfigSchema = import_zod23.z.union([
|
|
|
990
1181
|
// requires maxCostUsd, uses strictObject
|
|
991
1182
|
BuildPassedConfigSchema,
|
|
992
1183
|
// all optional, uses strictObject to reject unknown keys
|
|
993
|
-
|
|
1184
|
+
import_zod24.z.object({})
|
|
994
1185
|
// fallback empty config
|
|
995
1186
|
]);
|
|
996
1187
|
function validateAssertionConfig(type, config) {
|
|
@@ -1236,35 +1427,35 @@ function getSystemAssertion(id) {
|
|
|
1236
1427
|
|
|
1237
1428
|
// src/scenario/test-scenario.ts
|
|
1238
1429
|
var MAX_IMAGE_BASE64_LENGTH = 4 * Math.ceil(2 * 1024 * 1024 / 3);
|
|
1239
|
-
var TriggerPromptImageSchema =
|
|
1430
|
+
var TriggerPromptImageSchema = import_zod25.z.object({
|
|
1240
1431
|
/** Base64-encoded image data (no data URL prefix) */
|
|
1241
|
-
base64:
|
|
1432
|
+
base64: import_zod25.z.string().max(MAX_IMAGE_BASE64_LENGTH, "Image exceeds 2 MB size limit"),
|
|
1242
1433
|
/** MIME type of the image */
|
|
1243
|
-
mediaType:
|
|
1434
|
+
mediaType: import_zod25.z.enum(["image/jpeg", "image/png", "image/gif", "image/webp"]),
|
|
1244
1435
|
/** Original filename of the image */
|
|
1245
|
-
name:
|
|
1436
|
+
name: import_zod25.z.string()
|
|
1246
1437
|
});
|
|
1247
|
-
var ExpectedFileSchema =
|
|
1438
|
+
var ExpectedFileSchema = import_zod25.z.object({
|
|
1248
1439
|
/** Relative path where the file should be created */
|
|
1249
|
-
path:
|
|
1440
|
+
path: import_zod25.z.string(),
|
|
1250
1441
|
/** Optional expected content */
|
|
1251
|
-
content:
|
|
1442
|
+
content: import_zod25.z.string().optional()
|
|
1252
1443
|
});
|
|
1253
1444
|
var TestScenarioSchema = TenantEntitySchema.extend({
|
|
1254
1445
|
/** The prompt sent to the agent to trigger the task */
|
|
1255
|
-
triggerPrompt:
|
|
1446
|
+
triggerPrompt: import_zod25.z.string().min(10),
|
|
1256
1447
|
/** ID of the template to use for this scenario (null = no template) */
|
|
1257
|
-
templateId:
|
|
1448
|
+
templateId: import_zod25.z.string().nullish(),
|
|
1258
1449
|
/** Inline assertions to evaluate for this scenario (legacy) */
|
|
1259
|
-
assertions:
|
|
1450
|
+
assertions: import_zod25.z.array(AssertionSchema).optional(),
|
|
1260
1451
|
/** IDs of saved assertions to evaluate (from assertions table) - legacy, use assertionLinks */
|
|
1261
|
-
assertionIds:
|
|
1452
|
+
assertionIds: import_zod25.z.array(import_zod25.z.string()).optional(),
|
|
1262
1453
|
/** Linked assertions with per-scenario parameter values */
|
|
1263
|
-
assertionLinks:
|
|
1454
|
+
assertionLinks: import_zod25.z.array(ScenarioAssertionLinkSchema).optional(),
|
|
1264
1455
|
/** Tags for categorisation and filtering */
|
|
1265
|
-
tags:
|
|
1456
|
+
tags: import_zod25.z.array(import_zod25.z.string()).optional(),
|
|
1266
1457
|
/** Base64-encoded images attached to the trigger prompt (max 3) */
|
|
1267
|
-
triggerPromptImages:
|
|
1458
|
+
triggerPromptImages: import_zod25.z.array(TriggerPromptImageSchema).max(3).optional()
|
|
1268
1459
|
});
|
|
1269
1460
|
function validateBuildPassedParamsInAssertionLinks(links, ctx) {
|
|
1270
1461
|
if (!links) return;
|
|
@@ -1275,7 +1466,7 @@ function validateBuildPassedParamsInAssertionLinks(links, ctx) {
|
|
|
1275
1466
|
if (cmd === void 0 || cmd === null) continue;
|
|
1276
1467
|
if (typeof cmd !== "string") {
|
|
1277
1468
|
ctx.addIssue({
|
|
1278
|
-
code:
|
|
1469
|
+
code: import_zod25.z.ZodIssueCode.custom,
|
|
1279
1470
|
message: "build_passed command must be a string",
|
|
1280
1471
|
path: ["assertionLinks", i, "params", "command"]
|
|
1281
1472
|
});
|
|
@@ -1283,7 +1474,7 @@ function validateBuildPassedParamsInAssertionLinks(links, ctx) {
|
|
|
1283
1474
|
}
|
|
1284
1475
|
if (!isAllowedBuildCommandString(cmd)) {
|
|
1285
1476
|
ctx.addIssue({
|
|
1286
|
-
code:
|
|
1477
|
+
code: import_zod25.z.ZodIssueCode.custom,
|
|
1287
1478
|
message: "Invalid build_passed command. Allowed: yarn build, npm run build, pnpm run build, pnpm build",
|
|
1288
1479
|
path: ["assertionLinks", i, "params", "command"]
|
|
1289
1480
|
});
|
|
@@ -1306,19 +1497,19 @@ var UpdateTestScenarioInputSchema = TestScenarioCreateBaseSchema.partial().super
|
|
|
1306
1497
|
});
|
|
1307
1498
|
|
|
1308
1499
|
// src/scenario/batch-import.ts
|
|
1309
|
-
var
|
|
1500
|
+
var import_zod26 = require("zod");
|
|
1310
1501
|
var UUID_REGEX = /^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$/i;
|
|
1311
|
-
var BatchAssertionLinkSchema =
|
|
1312
|
-
|
|
1502
|
+
var BatchAssertionLinkSchema = import_zod26.z.union([
|
|
1503
|
+
import_zod26.z.string().min(1),
|
|
1313
1504
|
ScenarioAssertionLinkSchema
|
|
1314
1505
|
]);
|
|
1315
|
-
var BatchScenarioEntrySchema =
|
|
1316
|
-
name:
|
|
1317
|
-
description:
|
|
1318
|
-
triggerPrompt:
|
|
1319
|
-
templateId:
|
|
1320
|
-
tags:
|
|
1321
|
-
assertionLinks:
|
|
1506
|
+
var BatchScenarioEntrySchema = import_zod26.z.object({
|
|
1507
|
+
name: import_zod26.z.string().min(1, "name: Required"),
|
|
1508
|
+
description: import_zod26.z.string().optional().default(""),
|
|
1509
|
+
triggerPrompt: import_zod26.z.string().min(10, "triggerPrompt: Must be at least 10 characters"),
|
|
1510
|
+
templateId: import_zod26.z.string().nullish(),
|
|
1511
|
+
tags: import_zod26.z.array(import_zod26.z.string()).optional(),
|
|
1512
|
+
assertionLinks: import_zod26.z.array(BatchAssertionLinkSchema).optional()
|
|
1322
1513
|
}).superRefine((data, ctx) => {
|
|
1323
1514
|
if (!data.assertionLinks) return;
|
|
1324
1515
|
const objectLinks = data.assertionLinks.filter(
|
|
@@ -1328,8 +1519,8 @@ var BatchScenarioEntrySchema = import_zod25.z.object({
|
|
|
1328
1519
|
validateBuildPassedParamsInAssertionLinks(objectLinks, ctx);
|
|
1329
1520
|
}
|
|
1330
1521
|
});
|
|
1331
|
-
var BatchImportPayloadSchema =
|
|
1332
|
-
scenarios:
|
|
1522
|
+
var BatchImportPayloadSchema = import_zod26.z.object({
|
|
1523
|
+
scenarios: import_zod26.z.array(BatchScenarioEntrySchema).min(1, "scenarios array must contain at least one entry").max(100, "Maximum 100 scenarios per upload")
|
|
1333
1524
|
});
|
|
1334
1525
|
var BATCH_IMPORT_LIMITS = {
|
|
1335
1526
|
MAX_SCENARIOS: 100,
|
|
@@ -1351,29 +1542,29 @@ function normalizeBatchAssertionLink(link) {
|
|
|
1351
1542
|
}
|
|
1352
1543
|
return link;
|
|
1353
1544
|
}
|
|
1354
|
-
var BatchResultItemSchema =
|
|
1355
|
-
index:
|
|
1356
|
-
name:
|
|
1357
|
-
status:
|
|
1358
|
-
id:
|
|
1359
|
-
errors:
|
|
1360
|
-
});
|
|
1361
|
-
var BatchSummarySchema =
|
|
1362
|
-
total:
|
|
1363
|
-
valid:
|
|
1364
|
-
invalid:
|
|
1365
|
-
created:
|
|
1366
|
-
});
|
|
1367
|
-
var BatchImportResponseSchema =
|
|
1545
|
+
var BatchResultItemSchema = import_zod26.z.object({
|
|
1546
|
+
index: import_zod26.z.number(),
|
|
1547
|
+
name: import_zod26.z.string(),
|
|
1548
|
+
status: import_zod26.z.enum(["valid", "invalid"]),
|
|
1549
|
+
id: import_zod26.z.string().nullable().optional(),
|
|
1550
|
+
errors: import_zod26.z.array(import_zod26.z.string()).optional()
|
|
1551
|
+
});
|
|
1552
|
+
var BatchSummarySchema = import_zod26.z.object({
|
|
1553
|
+
total: import_zod26.z.number(),
|
|
1554
|
+
valid: import_zod26.z.number(),
|
|
1555
|
+
invalid: import_zod26.z.number(),
|
|
1556
|
+
created: import_zod26.z.number()
|
|
1557
|
+
});
|
|
1558
|
+
var BatchImportResponseSchema = import_zod26.z.object({
|
|
1368
1559
|
summary: BatchSummarySchema,
|
|
1369
|
-
results:
|
|
1560
|
+
results: import_zod26.z.array(BatchResultItemSchema)
|
|
1370
1561
|
});
|
|
1371
1562
|
|
|
1372
1563
|
// src/suite/test-suite.ts
|
|
1373
|
-
var
|
|
1564
|
+
var import_zod27 = require("zod");
|
|
1374
1565
|
var TestSuiteSchema = TenantEntitySchema.extend({
|
|
1375
1566
|
/** IDs of test scenarios in this suite */
|
|
1376
|
-
scenarioIds:
|
|
1567
|
+
scenarioIds: import_zod27.z.array(import_zod27.z.string())
|
|
1377
1568
|
});
|
|
1378
1569
|
var CreateTestSuiteInputSchema = TestSuiteSchema.omit({
|
|
1379
1570
|
id: true,
|
|
@@ -1384,21 +1575,21 @@ var CreateTestSuiteInputSchema = TestSuiteSchema.omit({
|
|
|
1384
1575
|
var UpdateTestSuiteInputSchema = CreateTestSuiteInputSchema.partial();
|
|
1385
1576
|
|
|
1386
1577
|
// src/evaluation/metrics.ts
|
|
1387
|
-
var
|
|
1388
|
-
var TokenUsageSchema =
|
|
1389
|
-
prompt:
|
|
1390
|
-
completion:
|
|
1391
|
-
total:
|
|
1392
|
-
});
|
|
1393
|
-
var EvalMetricsSchema =
|
|
1394
|
-
totalAssertions:
|
|
1395
|
-
passed:
|
|
1396
|
-
failed:
|
|
1397
|
-
skipped:
|
|
1398
|
-
errors:
|
|
1399
|
-
passRate:
|
|
1400
|
-
avgDuration:
|
|
1401
|
-
totalDuration:
|
|
1578
|
+
var import_zod28 = require("zod");
|
|
1579
|
+
var TokenUsageSchema = import_zod28.z.object({
|
|
1580
|
+
prompt: import_zod28.z.number(),
|
|
1581
|
+
completion: import_zod28.z.number(),
|
|
1582
|
+
total: import_zod28.z.number()
|
|
1583
|
+
});
|
|
1584
|
+
var EvalMetricsSchema = import_zod28.z.object({
|
|
1585
|
+
totalAssertions: import_zod28.z.number(),
|
|
1586
|
+
passed: import_zod28.z.number(),
|
|
1587
|
+
failed: import_zod28.z.number(),
|
|
1588
|
+
skipped: import_zod28.z.number(),
|
|
1589
|
+
errors: import_zod28.z.number(),
|
|
1590
|
+
passRate: import_zod28.z.number(),
|
|
1591
|
+
avgDuration: import_zod28.z.number(),
|
|
1592
|
+
totalDuration: import_zod28.z.number()
|
|
1402
1593
|
});
|
|
1403
1594
|
var EvalStatus = /* @__PURE__ */ ((EvalStatus2) => {
|
|
1404
1595
|
EvalStatus2["PENDING"] = "pending";
|
|
@@ -1408,7 +1599,7 @@ var EvalStatus = /* @__PURE__ */ ((EvalStatus2) => {
|
|
|
1408
1599
|
EvalStatus2["CANCELLED"] = "cancelled";
|
|
1409
1600
|
return EvalStatus2;
|
|
1410
1601
|
})(EvalStatus || {});
|
|
1411
|
-
var EvalStatusSchema =
|
|
1602
|
+
var EvalStatusSchema = import_zod28.z.enum(EvalStatus);
|
|
1412
1603
|
var LLMStepType = /* @__PURE__ */ ((LLMStepType2) => {
|
|
1413
1604
|
LLMStepType2["COMPLETION"] = "completion";
|
|
1414
1605
|
LLMStepType2["TOOL_USE"] = "tool_use";
|
|
@@ -1416,54 +1607,54 @@ var LLMStepType = /* @__PURE__ */ ((LLMStepType2) => {
|
|
|
1416
1607
|
LLMStepType2["THINKING"] = "thinking";
|
|
1417
1608
|
return LLMStepType2;
|
|
1418
1609
|
})(LLMStepType || {});
|
|
1419
|
-
var LLMTraceStepSchema =
|
|
1420
|
-
id:
|
|
1421
|
-
stepNumber:
|
|
1422
|
-
type:
|
|
1423
|
-
model:
|
|
1424
|
-
provider:
|
|
1425
|
-
startedAt:
|
|
1426
|
-
durationMs:
|
|
1610
|
+
var LLMTraceStepSchema = import_zod28.z.object({
|
|
1611
|
+
id: import_zod28.z.string(),
|
|
1612
|
+
stepNumber: import_zod28.z.number(),
|
|
1613
|
+
type: import_zod28.z.enum(LLMStepType),
|
|
1614
|
+
model: import_zod28.z.string(),
|
|
1615
|
+
provider: import_zod28.z.string(),
|
|
1616
|
+
startedAt: import_zod28.z.string(),
|
|
1617
|
+
durationMs: import_zod28.z.number(),
|
|
1427
1618
|
tokenUsage: TokenUsageSchema,
|
|
1428
|
-
costUsd:
|
|
1429
|
-
toolName:
|
|
1430
|
-
toolArguments:
|
|
1431
|
-
inputPreview:
|
|
1432
|
-
outputPreview:
|
|
1433
|
-
success:
|
|
1434
|
-
error:
|
|
1435
|
-
turnIndex:
|
|
1436
|
-
});
|
|
1437
|
-
var LLMBreakdownStatsSchema =
|
|
1438
|
-
count:
|
|
1439
|
-
durationMs:
|
|
1440
|
-
tokens:
|
|
1441
|
-
costUsd:
|
|
1442
|
-
});
|
|
1443
|
-
var LLMTraceSummarySchema =
|
|
1444
|
-
totalSteps:
|
|
1445
|
-
totalTurns:
|
|
1446
|
-
totalDurationMs:
|
|
1619
|
+
costUsd: import_zod28.z.number(),
|
|
1620
|
+
toolName: import_zod28.z.string().optional(),
|
|
1621
|
+
toolArguments: import_zod28.z.string().optional(),
|
|
1622
|
+
inputPreview: import_zod28.z.string().optional(),
|
|
1623
|
+
outputPreview: import_zod28.z.string().optional(),
|
|
1624
|
+
success: import_zod28.z.boolean(),
|
|
1625
|
+
error: import_zod28.z.string().optional(),
|
|
1626
|
+
turnIndex: import_zod28.z.number().optional()
|
|
1627
|
+
});
|
|
1628
|
+
var LLMBreakdownStatsSchema = import_zod28.z.object({
|
|
1629
|
+
count: import_zod28.z.number(),
|
|
1630
|
+
durationMs: import_zod28.z.number(),
|
|
1631
|
+
tokens: import_zod28.z.number(),
|
|
1632
|
+
costUsd: import_zod28.z.number()
|
|
1633
|
+
});
|
|
1634
|
+
var LLMTraceSummarySchema = import_zod28.z.object({
|
|
1635
|
+
totalSteps: import_zod28.z.number(),
|
|
1636
|
+
totalTurns: import_zod28.z.number().optional(),
|
|
1637
|
+
totalDurationMs: import_zod28.z.number(),
|
|
1447
1638
|
totalTokens: TokenUsageSchema,
|
|
1448
|
-
totalCostUsd:
|
|
1449
|
-
stepTypeBreakdown:
|
|
1450
|
-
modelBreakdown:
|
|
1451
|
-
modelsUsed:
|
|
1452
|
-
});
|
|
1453
|
-
var LLMTraceSchema =
|
|
1454
|
-
id:
|
|
1455
|
-
steps:
|
|
1639
|
+
totalCostUsd: import_zod28.z.number(),
|
|
1640
|
+
stepTypeBreakdown: import_zod28.z.record(import_zod28.z.string(), LLMBreakdownStatsSchema).optional(),
|
|
1641
|
+
modelBreakdown: import_zod28.z.record(import_zod28.z.string(), LLMBreakdownStatsSchema),
|
|
1642
|
+
modelsUsed: import_zod28.z.array(import_zod28.z.string())
|
|
1643
|
+
});
|
|
1644
|
+
var LLMTraceSchema = import_zod28.z.object({
|
|
1645
|
+
id: import_zod28.z.string(),
|
|
1646
|
+
steps: import_zod28.z.array(LLMTraceStepSchema),
|
|
1456
1647
|
summary: LLMTraceSummarySchema
|
|
1457
1648
|
});
|
|
1458
1649
|
|
|
1459
1650
|
// src/evaluation/eval-result.ts
|
|
1460
|
-
var
|
|
1651
|
+
var import_zod32 = require("zod");
|
|
1461
1652
|
|
|
1462
1653
|
// src/evaluation/eval-run.ts
|
|
1463
|
-
var
|
|
1654
|
+
var import_zod30 = require("zod");
|
|
1464
1655
|
|
|
1465
1656
|
// src/evaluation/live-trace.ts
|
|
1466
|
-
var
|
|
1657
|
+
var import_zod29 = require("zod");
|
|
1467
1658
|
var LiveTraceEventType = /* @__PURE__ */ ((LiveTraceEventType2) => {
|
|
1468
1659
|
LiveTraceEventType2["THINKING"] = "thinking";
|
|
1469
1660
|
LiveTraceEventType2["TOOL_USE"] = "tool_use";
|
|
@@ -1477,37 +1668,37 @@ var LiveTraceEventType = /* @__PURE__ */ ((LiveTraceEventType2) => {
|
|
|
1477
1668
|
LiveTraceEventType2["USER"] = "user";
|
|
1478
1669
|
return LiveTraceEventType2;
|
|
1479
1670
|
})(LiveTraceEventType || {});
|
|
1480
|
-
var LiveTraceEventSchema =
|
|
1671
|
+
var LiveTraceEventSchema = import_zod29.z.object({
|
|
1481
1672
|
/** The evaluation run ID */
|
|
1482
|
-
evalRunId:
|
|
1673
|
+
evalRunId: import_zod29.z.string(),
|
|
1483
1674
|
/** The scenario ID being executed */
|
|
1484
|
-
scenarioId:
|
|
1675
|
+
scenarioId: import_zod29.z.string(),
|
|
1485
1676
|
/** The scenario name for display */
|
|
1486
|
-
scenarioName:
|
|
1677
|
+
scenarioName: import_zod29.z.string(),
|
|
1487
1678
|
/** The target ID (skill, agent, etc.) */
|
|
1488
|
-
targetId:
|
|
1679
|
+
targetId: import_zod29.z.string(),
|
|
1489
1680
|
/** The target name for display */
|
|
1490
|
-
targetName:
|
|
1681
|
+
targetName: import_zod29.z.string(),
|
|
1491
1682
|
/** Step number in the current scenario execution */
|
|
1492
|
-
stepNumber:
|
|
1683
|
+
stepNumber: import_zod29.z.number(),
|
|
1493
1684
|
/** Type of trace event */
|
|
1494
|
-
type:
|
|
1685
|
+
type: import_zod29.z.enum(LiveTraceEventType),
|
|
1495
1686
|
/** Tool name if this is a tool_use event */
|
|
1496
|
-
toolName:
|
|
1687
|
+
toolName: import_zod29.z.string().optional(),
|
|
1497
1688
|
/** Tool arguments preview (truncated JSON) */
|
|
1498
|
-
toolArgs:
|
|
1689
|
+
toolArgs: import_zod29.z.string().optional(),
|
|
1499
1690
|
/** Output preview (truncated text) */
|
|
1500
|
-
outputPreview:
|
|
1691
|
+
outputPreview: import_zod29.z.string().optional(),
|
|
1501
1692
|
/** File path for file operations */
|
|
1502
|
-
filePath:
|
|
1693
|
+
filePath: import_zod29.z.string().optional(),
|
|
1503
1694
|
/** Elapsed time in milliseconds for progress events */
|
|
1504
|
-
elapsedMs:
|
|
1695
|
+
elapsedMs: import_zod29.z.number().optional(),
|
|
1505
1696
|
/** Thinking/reasoning text from Claude */
|
|
1506
|
-
thinking:
|
|
1697
|
+
thinking: import_zod29.z.string().optional(),
|
|
1507
1698
|
/** Timestamp when this event occurred */
|
|
1508
|
-
timestamp:
|
|
1699
|
+
timestamp: import_zod29.z.string(),
|
|
1509
1700
|
/** Whether this is the final event for this scenario */
|
|
1510
|
-
isComplete:
|
|
1701
|
+
isComplete: import_zod29.z.boolean()
|
|
1511
1702
|
});
|
|
1512
1703
|
var TRACE_EVENT_PREFIX = "TRACE_EVENT:";
|
|
1513
1704
|
function parseTraceEventLine(line) {
|
|
@@ -1536,40 +1727,40 @@ var TriggerType = /* @__PURE__ */ ((TriggerType2) => {
|
|
|
1536
1727
|
TriggerType2["SCHEDULED"] = "SCHEDULED";
|
|
1537
1728
|
return TriggerType2;
|
|
1538
1729
|
})(TriggerType || {});
|
|
1539
|
-
var TriggerMetadataSchema =
|
|
1540
|
-
version:
|
|
1541
|
-
resourceUpdated:
|
|
1542
|
-
scheduleId:
|
|
1730
|
+
var TriggerMetadataSchema = import_zod30.z.object({
|
|
1731
|
+
version: import_zod30.z.string().optional(),
|
|
1732
|
+
resourceUpdated: import_zod30.z.array(import_zod30.z.string()).optional(),
|
|
1733
|
+
scheduleId: import_zod30.z.string().optional()
|
|
1543
1734
|
});
|
|
1544
|
-
var TriggerSchema =
|
|
1545
|
-
id:
|
|
1735
|
+
var TriggerSchema = import_zod30.z.object({
|
|
1736
|
+
id: import_zod30.z.string(),
|
|
1546
1737
|
metadata: TriggerMetadataSchema.optional(),
|
|
1547
|
-
type:
|
|
1738
|
+
type: import_zod30.z.nativeEnum(TriggerType)
|
|
1548
1739
|
});
|
|
1549
|
-
var DiffLineTypeSchema =
|
|
1550
|
-
var DiffLineSchema =
|
|
1740
|
+
var DiffLineTypeSchema = import_zod30.z.enum(["added", "removed", "unchanged"]);
|
|
1741
|
+
var DiffLineSchema = import_zod30.z.object({
|
|
1551
1742
|
type: DiffLineTypeSchema,
|
|
1552
|
-
content:
|
|
1553
|
-
lineNumber:
|
|
1554
|
-
});
|
|
1555
|
-
var DiffContentSchema =
|
|
1556
|
-
path:
|
|
1557
|
-
expected:
|
|
1558
|
-
actual:
|
|
1559
|
-
diffLines:
|
|
1560
|
-
renamedFrom:
|
|
1743
|
+
content: import_zod30.z.string(),
|
|
1744
|
+
lineNumber: import_zod30.z.number()
|
|
1745
|
+
});
|
|
1746
|
+
var DiffContentSchema = import_zod30.z.object({
|
|
1747
|
+
path: import_zod30.z.string(),
|
|
1748
|
+
expected: import_zod30.z.string(),
|
|
1749
|
+
actual: import_zod30.z.string(),
|
|
1750
|
+
diffLines: import_zod30.z.array(DiffLineSchema),
|
|
1751
|
+
renamedFrom: import_zod30.z.string().optional(),
|
|
1561
1752
|
/** Whether this file is an infrastructure/config file (e.g. .claude/settings.json, .mcp.json) */
|
|
1562
|
-
isInfrastructure:
|
|
1753
|
+
isInfrastructure: import_zod30.z.boolean().optional()
|
|
1563
1754
|
});
|
|
1564
|
-
var CommandExecutionSchema =
|
|
1565
|
-
command:
|
|
1566
|
-
exitCode:
|
|
1567
|
-
output:
|
|
1568
|
-
duration:
|
|
1755
|
+
var CommandExecutionSchema = import_zod30.z.object({
|
|
1756
|
+
command: import_zod30.z.string(),
|
|
1757
|
+
exitCode: import_zod30.z.number(),
|
|
1758
|
+
output: import_zod30.z.string().optional(),
|
|
1759
|
+
duration: import_zod30.z.number()
|
|
1569
1760
|
});
|
|
1570
|
-
var FileModificationSchema =
|
|
1571
|
-
path:
|
|
1572
|
-
action:
|
|
1761
|
+
var FileModificationSchema = import_zod30.z.object({
|
|
1762
|
+
path: import_zod30.z.string(),
|
|
1763
|
+
action: import_zod30.z.enum(["created", "modified", "deleted"])
|
|
1573
1764
|
});
|
|
1574
1765
|
var TemplateFileStatus = /* @__PURE__ */ ((TemplateFileStatus2) => {
|
|
1575
1766
|
TemplateFileStatus2["NEW"] = "new";
|
|
@@ -1577,62 +1768,62 @@ var TemplateFileStatus = /* @__PURE__ */ ((TemplateFileStatus2) => {
|
|
|
1577
1768
|
TemplateFileStatus2["UNCHANGED"] = "unchanged";
|
|
1578
1769
|
return TemplateFileStatus2;
|
|
1579
1770
|
})(TemplateFileStatus || {});
|
|
1580
|
-
var TemplateFileSchema =
|
|
1771
|
+
var TemplateFileSchema = import_zod30.z.object({
|
|
1581
1772
|
/** Relative path within the template */
|
|
1582
|
-
path:
|
|
1773
|
+
path: import_zod30.z.string(),
|
|
1583
1774
|
/** Full file content after execution */
|
|
1584
|
-
content:
|
|
1775
|
+
content: import_zod30.z.string(),
|
|
1585
1776
|
/** File status (new, modified, unchanged) */
|
|
1586
|
-
status:
|
|
1777
|
+
status: import_zod30.z.enum(["new", "modified", "unchanged"]),
|
|
1587
1778
|
/** Whether this file is an infrastructure/config file (e.g. .claude/settings.json, .mcp.json) */
|
|
1588
|
-
isInfrastructure:
|
|
1779
|
+
isInfrastructure: import_zod30.z.boolean().optional()
|
|
1589
1780
|
});
|
|
1590
|
-
var ApiCallSchema =
|
|
1591
|
-
endpoint:
|
|
1592
|
-
tokensUsed:
|
|
1593
|
-
duration:
|
|
1781
|
+
var ApiCallSchema = import_zod30.z.object({
|
|
1782
|
+
endpoint: import_zod30.z.string(),
|
|
1783
|
+
tokensUsed: import_zod30.z.number(),
|
|
1784
|
+
duration: import_zod30.z.number()
|
|
1594
1785
|
});
|
|
1595
|
-
var ExecutionTraceSchema =
|
|
1596
|
-
commands:
|
|
1597
|
-
filesModified:
|
|
1598
|
-
apiCalls:
|
|
1599
|
-
totalDuration:
|
|
1786
|
+
var ExecutionTraceSchema = import_zod30.z.object({
|
|
1787
|
+
commands: import_zod30.z.array(CommandExecutionSchema),
|
|
1788
|
+
filesModified: import_zod30.z.array(FileModificationSchema),
|
|
1789
|
+
apiCalls: import_zod30.z.array(ApiCallSchema),
|
|
1790
|
+
totalDuration: import_zod30.z.number()
|
|
1600
1791
|
});
|
|
1601
|
-
var RunAnalysisFindingSchema =
|
|
1602
|
-
category:
|
|
1792
|
+
var RunAnalysisFindingSchema = import_zod30.z.object({
|
|
1793
|
+
category: import_zod30.z.enum([
|
|
1603
1794
|
"failure_pattern",
|
|
1604
1795
|
"cost_waste",
|
|
1605
1796
|
"flakiness",
|
|
1606
1797
|
"inefficiency",
|
|
1607
1798
|
"positive"
|
|
1608
1799
|
]),
|
|
1609
|
-
severity:
|
|
1610
|
-
description:
|
|
1611
|
-
affectedScenarios:
|
|
1612
|
-
recommendation:
|
|
1800
|
+
severity: import_zod30.z.enum(["high", "medium", "low"]),
|
|
1801
|
+
description: import_zod30.z.string(),
|
|
1802
|
+
affectedScenarios: import_zod30.z.array(import_zod30.z.string()),
|
|
1803
|
+
recommendation: import_zod30.z.string().optional()
|
|
1613
1804
|
});
|
|
1614
|
-
var RunAnalysisSchema =
|
|
1615
|
-
generatedAt:
|
|
1616
|
-
summary:
|
|
1617
|
-
findings:
|
|
1805
|
+
var RunAnalysisSchema = import_zod30.z.object({
|
|
1806
|
+
generatedAt: import_zod30.z.string(),
|
|
1807
|
+
summary: import_zod30.z.string(),
|
|
1808
|
+
findings: import_zod30.z.array(RunAnalysisFindingSchema)
|
|
1618
1809
|
});
|
|
1619
1810
|
var EvalRunSchema = TenantEntitySchema.extend({
|
|
1620
1811
|
/** Agent ID for this run */
|
|
1621
|
-
agentId:
|
|
1812
|
+
agentId: import_zod30.z.string().optional(),
|
|
1622
1813
|
/** Preset ID that originated this run (optional) */
|
|
1623
|
-
presetId:
|
|
1814
|
+
presetId: import_zod30.z.string().optional(),
|
|
1624
1815
|
/** Skill IDs for this run */
|
|
1625
|
-
skillIds:
|
|
1816
|
+
skillIds: import_zod30.z.array(import_zod30.z.string()).optional(),
|
|
1626
1817
|
/** Map of skillId to skillVersionId for this run */
|
|
1627
|
-
skillVersions:
|
|
1818
|
+
skillVersions: import_zod30.z.record(import_zod30.z.string(), import_zod30.z.string()).optional(),
|
|
1628
1819
|
/** Scenario IDs to run (always present — resolved server-side from tags when needed) */
|
|
1629
|
-
scenarioIds:
|
|
1820
|
+
scenarioIds: import_zod30.z.array(import_zod30.z.string()),
|
|
1630
1821
|
/** Current status */
|
|
1631
1822
|
status: EvalStatusSchema,
|
|
1632
1823
|
/** Progress percentage (0-100) */
|
|
1633
|
-
progress:
|
|
1824
|
+
progress: import_zod30.z.number(),
|
|
1634
1825
|
/** Results for each scenario/target combination (lazy to break eval-result ↔ eval-run cycle) */
|
|
1635
|
-
results:
|
|
1826
|
+
results: import_zod30.z.array(import_zod30.z.lazy(() => EvalRunResultSchema)),
|
|
1636
1827
|
/** Aggregated metrics across all results */
|
|
1637
1828
|
aggregateMetrics: EvalMetricsSchema,
|
|
1638
1829
|
/** Aggregated LLM trace summary */
|
|
@@ -1640,41 +1831,45 @@ var EvalRunSchema = TenantEntitySchema.extend({
|
|
|
1640
1831
|
/** What triggered this run */
|
|
1641
1832
|
trigger: TriggerSchema.optional(),
|
|
1642
1833
|
/** When the run started (set when evaluation is triggered) */
|
|
1643
|
-
startedAt:
|
|
1834
|
+
startedAt: import_zod30.z.string().optional(),
|
|
1644
1835
|
/** When the run completed */
|
|
1645
|
-
completedAt:
|
|
1836
|
+
completedAt: import_zod30.z.string().optional(),
|
|
1646
1837
|
/** Live trace events captured during execution (for playback on results page) */
|
|
1647
|
-
liveTraceEvents:
|
|
1838
|
+
liveTraceEvents: import_zod30.z.array(LiveTraceEventSchema).optional(),
|
|
1648
1839
|
/** Remote job ID for tracking execution in Dev Machines */
|
|
1649
|
-
jobId:
|
|
1840
|
+
jobId: import_zod30.z.string().optional(),
|
|
1650
1841
|
/** Remote job status from the Dev Machine API (PENDING, RUNNING, COMPLETED, FAILED, CANCELLED) */
|
|
1651
|
-
jobStatus:
|
|
1842
|
+
jobStatus: import_zod30.z.string().optional(),
|
|
1652
1843
|
/** Remote job error message if the job failed */
|
|
1653
|
-
jobError:
|
|
1844
|
+
jobError: import_zod30.z.string().optional(),
|
|
1654
1845
|
/** Timestamp of the last job status check */
|
|
1655
|
-
jobStatusCheckedAt:
|
|
1846
|
+
jobStatusCheckedAt: import_zod30.z.string().optional(),
|
|
1656
1847
|
/** MCP server IDs to enable for this run (optional) */
|
|
1657
|
-
mcpIds:
|
|
1848
|
+
mcpIds: import_zod30.z.array(import_zod30.z.string()).optional(),
|
|
1658
1849
|
/** Sub-agent IDs to enable for this run (optional) */
|
|
1659
|
-
subAgentIds:
|
|
1850
|
+
subAgentIds: import_zod30.z.array(import_zod30.z.string()).optional(),
|
|
1660
1851
|
/** Rule IDs to enable for this run (optional) */
|
|
1661
|
-
ruleIds:
|
|
1852
|
+
ruleIds: import_zod30.z.array(import_zod30.z.string()).optional(),
|
|
1853
|
+
/** Unified capability IDs (replaces skill/mcp/subAgent/ruleIds) */
|
|
1854
|
+
capabilityIds: import_zod30.z.array(import_zod30.z.string()).optional(),
|
|
1855
|
+
/** Map of capabilityId to capabilityVersionId for version pinning */
|
|
1856
|
+
capabilityVersions: import_zod30.z.record(import_zod30.z.string(), import_zod30.z.string()).optional(),
|
|
1662
1857
|
/** Tags used to select scenarios for this run (for traceability) */
|
|
1663
|
-
tags:
|
|
1858
|
+
tags: import_zod30.z.array(import_zod30.z.string()).optional(),
|
|
1664
1859
|
/** How many times each scenario is executed within this eval run. Default: 1. Max: 20. */
|
|
1665
|
-
runsPerScenario:
|
|
1860
|
+
runsPerScenario: import_zod30.z.number().int().min(1).max(20).optional(),
|
|
1666
1861
|
/** Snapshot of agent configuration captured at run creation time */
|
|
1667
|
-
agentSnapshot:
|
|
1668
|
-
name:
|
|
1862
|
+
agentSnapshot: import_zod30.z.object({
|
|
1863
|
+
name: import_zod30.z.string().optional(),
|
|
1669
1864
|
agentType: AgentTypeSchema.optional(),
|
|
1670
1865
|
runCommand: AgentRunCommandSchema.optional(),
|
|
1671
|
-
systemPrompt:
|
|
1866
|
+
systemPrompt: import_zod30.z.string().nullable().optional(),
|
|
1672
1867
|
modelConfig: ModelConfigSchema.optional()
|
|
1673
1868
|
}).optional(),
|
|
1674
1869
|
/** UUID linking all runs in a comparison group */
|
|
1675
|
-
comparisonGroupId:
|
|
1870
|
+
comparisonGroupId: import_zod30.z.string().optional(),
|
|
1676
1871
|
/** Human-readable label for this variant (e.g., "MCP: Wix Stores") */
|
|
1677
|
-
comparisonLabel:
|
|
1872
|
+
comparisonLabel: import_zod30.z.string().optional(),
|
|
1678
1873
|
/** LLM-generated analysis of the completed run */
|
|
1679
1874
|
runAnalysis: RunAnalysisSchema.optional()
|
|
1680
1875
|
});
|
|
@@ -1692,60 +1887,60 @@ var CreateEvalRunInputSchema = EvalRunSchema.omit({
|
|
|
1692
1887
|
agentSnapshot: true
|
|
1693
1888
|
}).extend({
|
|
1694
1889
|
/** Optional on input — backend resolves from tags when not provided */
|
|
1695
|
-
scenarioIds:
|
|
1890
|
+
scenarioIds: import_zod30.z.array(import_zod30.z.string()).optional()
|
|
1696
1891
|
}).refine(
|
|
1697
1892
|
(data) => data.scenarioIds && data.scenarioIds.length > 0 || data.tags && data.tags.length > 0,
|
|
1698
1893
|
{ message: "Either scenarioIds or tags must be provided" }
|
|
1699
1894
|
);
|
|
1700
|
-
var EvaluationProgressSchema =
|
|
1701
|
-
runId:
|
|
1702
|
-
targetId:
|
|
1703
|
-
totalScenarios:
|
|
1704
|
-
completedScenarios:
|
|
1705
|
-
scenarioProgress:
|
|
1706
|
-
|
|
1707
|
-
scenarioId:
|
|
1708
|
-
currentStep:
|
|
1709
|
-
error:
|
|
1895
|
+
var EvaluationProgressSchema = import_zod30.z.object({
|
|
1896
|
+
runId: import_zod30.z.string(),
|
|
1897
|
+
targetId: import_zod30.z.string(),
|
|
1898
|
+
totalScenarios: import_zod30.z.number(),
|
|
1899
|
+
completedScenarios: import_zod30.z.number(),
|
|
1900
|
+
scenarioProgress: import_zod30.z.array(
|
|
1901
|
+
import_zod30.z.object({
|
|
1902
|
+
scenarioId: import_zod30.z.string(),
|
|
1903
|
+
currentStep: import_zod30.z.string(),
|
|
1904
|
+
error: import_zod30.z.string().optional()
|
|
1710
1905
|
})
|
|
1711
1906
|
),
|
|
1712
|
-
createdAt:
|
|
1713
|
-
});
|
|
1714
|
-
var EvaluationLogSchema =
|
|
1715
|
-
runId:
|
|
1716
|
-
scenarioId:
|
|
1717
|
-
log:
|
|
1718
|
-
level:
|
|
1719
|
-
message:
|
|
1720
|
-
args:
|
|
1721
|
-
error:
|
|
1907
|
+
createdAt: import_zod30.z.number()
|
|
1908
|
+
});
|
|
1909
|
+
var EvaluationLogSchema = import_zod30.z.object({
|
|
1910
|
+
runId: import_zod30.z.string(),
|
|
1911
|
+
scenarioId: import_zod30.z.string(),
|
|
1912
|
+
log: import_zod30.z.object({
|
|
1913
|
+
level: import_zod30.z.enum(["info", "error", "debug"]),
|
|
1914
|
+
message: import_zod30.z.string().optional(),
|
|
1915
|
+
args: import_zod30.z.array(import_zod30.z.any()).optional(),
|
|
1916
|
+
error: import_zod30.z.string().optional()
|
|
1722
1917
|
})
|
|
1723
1918
|
});
|
|
1724
1919
|
var LLM_TIMEOUT = 12e4;
|
|
1725
1920
|
|
|
1726
1921
|
// src/evaluation/conversation.ts
|
|
1727
|
-
var
|
|
1728
|
-
var TextBlockSchema =
|
|
1729
|
-
type:
|
|
1730
|
-
text:
|
|
1731
|
-
});
|
|
1732
|
-
var ThinkingBlockSchema =
|
|
1733
|
-
type:
|
|
1734
|
-
thinking:
|
|
1735
|
-
});
|
|
1736
|
-
var ToolUseBlockSchema =
|
|
1737
|
-
type:
|
|
1738
|
-
toolName:
|
|
1739
|
-
toolId:
|
|
1740
|
-
input:
|
|
1741
|
-
});
|
|
1742
|
-
var ToolResultBlockSchema =
|
|
1743
|
-
type:
|
|
1744
|
-
toolUseId:
|
|
1745
|
-
content:
|
|
1746
|
-
isError:
|
|
1747
|
-
});
|
|
1748
|
-
var ConversationBlockSchema =
|
|
1922
|
+
var import_zod31 = require("zod");
|
|
1923
|
+
var TextBlockSchema = import_zod31.z.object({
|
|
1924
|
+
type: import_zod31.z.literal("text"),
|
|
1925
|
+
text: import_zod31.z.string()
|
|
1926
|
+
});
|
|
1927
|
+
var ThinkingBlockSchema = import_zod31.z.object({
|
|
1928
|
+
type: import_zod31.z.literal("thinking"),
|
|
1929
|
+
thinking: import_zod31.z.string()
|
|
1930
|
+
});
|
|
1931
|
+
var ToolUseBlockSchema = import_zod31.z.object({
|
|
1932
|
+
type: import_zod31.z.literal("tool_use"),
|
|
1933
|
+
toolName: import_zod31.z.string(),
|
|
1934
|
+
toolId: import_zod31.z.string(),
|
|
1935
|
+
input: import_zod31.z.unknown()
|
|
1936
|
+
});
|
|
1937
|
+
var ToolResultBlockSchema = import_zod31.z.object({
|
|
1938
|
+
type: import_zod31.z.literal("tool_result"),
|
|
1939
|
+
toolUseId: import_zod31.z.string(),
|
|
1940
|
+
content: import_zod31.z.string(),
|
|
1941
|
+
isError: import_zod31.z.boolean().optional()
|
|
1942
|
+
});
|
|
1943
|
+
var ConversationBlockSchema = import_zod31.z.discriminatedUnion("type", [
|
|
1749
1944
|
TextBlockSchema,
|
|
1750
1945
|
ThinkingBlockSchema,
|
|
1751
1946
|
ToolUseBlockSchema,
|
|
@@ -1756,18 +1951,18 @@ var ConversationMessageRoles = [
|
|
|
1756
1951
|
"user",
|
|
1757
1952
|
"system"
|
|
1758
1953
|
];
|
|
1759
|
-
var ConversationMessageSchema =
|
|
1760
|
-
role:
|
|
1761
|
-
content:
|
|
1762
|
-
timestamp:
|
|
1954
|
+
var ConversationMessageSchema = import_zod31.z.object({
|
|
1955
|
+
role: import_zod31.z.enum(ConversationMessageRoles),
|
|
1956
|
+
content: import_zod31.z.array(ConversationBlockSchema),
|
|
1957
|
+
timestamp: import_zod31.z.string()
|
|
1763
1958
|
});
|
|
1764
|
-
var ScenarioConversationSchema =
|
|
1765
|
-
id:
|
|
1766
|
-
projectId:
|
|
1767
|
-
evalRunId:
|
|
1768
|
-
resultId:
|
|
1769
|
-
messages:
|
|
1770
|
-
createdAt:
|
|
1959
|
+
var ScenarioConversationSchema = import_zod31.z.object({
|
|
1960
|
+
id: import_zod31.z.string(),
|
|
1961
|
+
projectId: import_zod31.z.string(),
|
|
1962
|
+
evalRunId: import_zod31.z.string(),
|
|
1963
|
+
resultId: import_zod31.z.string(),
|
|
1964
|
+
messages: import_zod31.z.array(ConversationMessageSchema),
|
|
1965
|
+
createdAt: import_zod31.z.string()
|
|
1771
1966
|
});
|
|
1772
1967
|
|
|
1773
1968
|
// src/evaluation/eval-result.ts
|
|
@@ -1778,98 +1973,98 @@ var AssertionResultStatus = /* @__PURE__ */ ((AssertionResultStatus2) => {
|
|
|
1778
1973
|
AssertionResultStatus2["ERROR"] = "error";
|
|
1779
1974
|
return AssertionResultStatus2;
|
|
1780
1975
|
})(AssertionResultStatus || {});
|
|
1781
|
-
var AssertionResultSchema =
|
|
1782
|
-
id:
|
|
1783
|
-
assertionId:
|
|
1784
|
-
assertionType:
|
|
1785
|
-
assertionName:
|
|
1786
|
-
status:
|
|
1787
|
-
message:
|
|
1788
|
-
expected:
|
|
1789
|
-
actual:
|
|
1790
|
-
duration:
|
|
1791
|
-
details:
|
|
1792
|
-
llmTraceSteps:
|
|
1793
|
-
});
|
|
1794
|
-
var EvalRunResultSchema =
|
|
1795
|
-
id:
|
|
1796
|
-
targetId:
|
|
1797
|
-
targetName:
|
|
1976
|
+
var AssertionResultSchema = import_zod32.z.object({
|
|
1977
|
+
id: import_zod32.z.string(),
|
|
1978
|
+
assertionId: import_zod32.z.string(),
|
|
1979
|
+
assertionType: import_zod32.z.string(),
|
|
1980
|
+
assertionName: import_zod32.z.string(),
|
|
1981
|
+
status: import_zod32.z.enum(AssertionResultStatus),
|
|
1982
|
+
message: import_zod32.z.string().optional(),
|
|
1983
|
+
expected: import_zod32.z.string().optional(),
|
|
1984
|
+
actual: import_zod32.z.string().optional(),
|
|
1985
|
+
duration: import_zod32.z.number().optional(),
|
|
1986
|
+
details: import_zod32.z.record(import_zod32.z.string(), import_zod32.z.unknown()).optional(),
|
|
1987
|
+
llmTraceSteps: import_zod32.z.array(LLMTraceStepSchema).optional()
|
|
1988
|
+
});
|
|
1989
|
+
var EvalRunResultSchema = import_zod32.z.object({
|
|
1990
|
+
id: import_zod32.z.string(),
|
|
1991
|
+
targetId: import_zod32.z.string(),
|
|
1992
|
+
targetName: import_zod32.z.string().optional(),
|
|
1798
1993
|
/** SkillVersion ID used for this evaluation (for version tracking) */
|
|
1799
|
-
skillVersionId:
|
|
1994
|
+
skillVersionId: import_zod32.z.string().optional(),
|
|
1800
1995
|
/** SkillVersion semver string (e.g., "1.0.0", "1.2.3") for display */
|
|
1801
|
-
skillVersion:
|
|
1802
|
-
scenarioId:
|
|
1803
|
-
scenarioName:
|
|
1996
|
+
skillVersion: import_zod32.z.string().optional(),
|
|
1997
|
+
scenarioId: import_zod32.z.string(),
|
|
1998
|
+
scenarioName: import_zod32.z.string(),
|
|
1804
1999
|
/** Snapshot of the trigger prompt used during the run (prevents stale display after edits) */
|
|
1805
|
-
triggerPrompt:
|
|
2000
|
+
triggerPrompt: import_zod32.z.string().optional(),
|
|
1806
2001
|
modelConfig: ModelConfigSchema.optional(),
|
|
1807
|
-
assertionResults:
|
|
2002
|
+
assertionResults: import_zod32.z.array(AssertionResultSchema),
|
|
1808
2003
|
metrics: EvalMetricsSchema.optional(),
|
|
1809
|
-
passed:
|
|
1810
|
-
failed:
|
|
1811
|
-
passRate:
|
|
1812
|
-
duration:
|
|
1813
|
-
outputText:
|
|
1814
|
-
files:
|
|
1815
|
-
fileDiffs:
|
|
2004
|
+
passed: import_zod32.z.number(),
|
|
2005
|
+
failed: import_zod32.z.number(),
|
|
2006
|
+
passRate: import_zod32.z.number(),
|
|
2007
|
+
duration: import_zod32.z.number(),
|
|
2008
|
+
outputText: import_zod32.z.string().optional(),
|
|
2009
|
+
files: import_zod32.z.array(ExpectedFileSchema).optional(),
|
|
2010
|
+
fileDiffs: import_zod32.z.array(DiffContentSchema).optional(),
|
|
1816
2011
|
/** Full template files after execution with status indicators */
|
|
1817
|
-
templateFiles:
|
|
1818
|
-
startedAt:
|
|
1819
|
-
completedAt:
|
|
2012
|
+
templateFiles: import_zod32.z.array(TemplateFileSchema).optional(),
|
|
2013
|
+
startedAt: import_zod32.z.string().optional(),
|
|
2014
|
+
completedAt: import_zod32.z.string().optional(),
|
|
1820
2015
|
llmTrace: LLMTraceSchema.optional(),
|
|
1821
2016
|
/** Full conversation messages (only present in transit; stripped before DB storage) */
|
|
1822
|
-
conversation:
|
|
2017
|
+
conversation: import_zod32.z.array(ConversationMessageSchema).optional(),
|
|
1823
2018
|
/** 0-based iteration index when a scenario is run multiple times within a single eval run */
|
|
1824
|
-
iterationIndex:
|
|
1825
|
-
});
|
|
1826
|
-
var PromptResultSchema =
|
|
1827
|
-
text:
|
|
1828
|
-
files:
|
|
1829
|
-
finishReason:
|
|
1830
|
-
reasoning:
|
|
1831
|
-
reasoningDetails:
|
|
1832
|
-
toolCalls:
|
|
1833
|
-
toolResults:
|
|
1834
|
-
warnings:
|
|
1835
|
-
sources:
|
|
1836
|
-
steps:
|
|
1837
|
-
generationTimeMs:
|
|
1838
|
-
prompt:
|
|
1839
|
-
systemPrompt:
|
|
1840
|
-
usage:
|
|
1841
|
-
totalTokens:
|
|
1842
|
-
totalMicrocentsSpent:
|
|
2019
|
+
iterationIndex: import_zod32.z.number().int().min(0).optional()
|
|
2020
|
+
});
|
|
2021
|
+
var PromptResultSchema = import_zod32.z.object({
|
|
2022
|
+
text: import_zod32.z.string(),
|
|
2023
|
+
files: import_zod32.z.array(import_zod32.z.unknown()).optional(),
|
|
2024
|
+
finishReason: import_zod32.z.string().optional(),
|
|
2025
|
+
reasoning: import_zod32.z.string().optional(),
|
|
2026
|
+
reasoningDetails: import_zod32.z.unknown().optional(),
|
|
2027
|
+
toolCalls: import_zod32.z.array(import_zod32.z.unknown()).optional(),
|
|
2028
|
+
toolResults: import_zod32.z.array(import_zod32.z.unknown()).optional(),
|
|
2029
|
+
warnings: import_zod32.z.array(import_zod32.z.unknown()).optional(),
|
|
2030
|
+
sources: import_zod32.z.array(import_zod32.z.unknown()).optional(),
|
|
2031
|
+
steps: import_zod32.z.array(import_zod32.z.unknown()),
|
|
2032
|
+
generationTimeMs: import_zod32.z.number(),
|
|
2033
|
+
prompt: import_zod32.z.string(),
|
|
2034
|
+
systemPrompt: import_zod32.z.string(),
|
|
2035
|
+
usage: import_zod32.z.object({
|
|
2036
|
+
totalTokens: import_zod32.z.number().optional(),
|
|
2037
|
+
totalMicrocentsSpent: import_zod32.z.number().optional()
|
|
1843
2038
|
})
|
|
1844
2039
|
});
|
|
1845
|
-
var EvaluationResultSchema =
|
|
1846
|
-
id:
|
|
1847
|
-
runId:
|
|
1848
|
-
timestamp:
|
|
2040
|
+
var EvaluationResultSchema = import_zod32.z.object({
|
|
2041
|
+
id: import_zod32.z.string(),
|
|
2042
|
+
runId: import_zod32.z.string(),
|
|
2043
|
+
timestamp: import_zod32.z.number(),
|
|
1849
2044
|
promptResult: PromptResultSchema,
|
|
1850
|
-
testResults:
|
|
1851
|
-
tags:
|
|
1852
|
-
feedback:
|
|
1853
|
-
score:
|
|
1854
|
-
suiteId:
|
|
1855
|
-
});
|
|
1856
|
-
var LeanEvaluationResultSchema =
|
|
1857
|
-
id:
|
|
1858
|
-
runId:
|
|
1859
|
-
timestamp:
|
|
1860
|
-
tags:
|
|
1861
|
-
scenarioId:
|
|
1862
|
-
scenarioVersion:
|
|
1863
|
-
targetId:
|
|
1864
|
-
targetVersion:
|
|
1865
|
-
suiteId:
|
|
1866
|
-
score:
|
|
1867
|
-
time:
|
|
1868
|
-
microcentsSpent:
|
|
2045
|
+
testResults: import_zod32.z.array(import_zod32.z.unknown()),
|
|
2046
|
+
tags: import_zod32.z.array(import_zod32.z.string()).optional(),
|
|
2047
|
+
feedback: import_zod32.z.string().optional(),
|
|
2048
|
+
score: import_zod32.z.number(),
|
|
2049
|
+
suiteId: import_zod32.z.string().optional()
|
|
2050
|
+
});
|
|
2051
|
+
var LeanEvaluationResultSchema = import_zod32.z.object({
|
|
2052
|
+
id: import_zod32.z.string(),
|
|
2053
|
+
runId: import_zod32.z.string(),
|
|
2054
|
+
timestamp: import_zod32.z.number(),
|
|
2055
|
+
tags: import_zod32.z.array(import_zod32.z.string()).optional(),
|
|
2056
|
+
scenarioId: import_zod32.z.string(),
|
|
2057
|
+
scenarioVersion: import_zod32.z.number().optional(),
|
|
2058
|
+
targetId: import_zod32.z.string(),
|
|
2059
|
+
targetVersion: import_zod32.z.number().optional(),
|
|
2060
|
+
suiteId: import_zod32.z.string().optional(),
|
|
2061
|
+
score: import_zod32.z.number(),
|
|
2062
|
+
time: import_zod32.z.number().optional(),
|
|
2063
|
+
microcentsSpent: import_zod32.z.number().optional()
|
|
1869
2064
|
});
|
|
1870
2065
|
|
|
1871
2066
|
// src/evaluation/eval-run-folder.ts
|
|
1872
|
-
var
|
|
2067
|
+
var import_zod33 = require("zod");
|
|
1873
2068
|
var EvalRunFolderSchema = TenantEntitySchema.extend({});
|
|
1874
2069
|
var CreateEvalRunFolderInputSchema = EvalRunFolderSchema.omit({
|
|
1875
2070
|
id: true,
|
|
@@ -1883,26 +2078,26 @@ var UpdateEvalRunFolderInputSchema = EvalRunFolderSchema.omit({
|
|
|
1883
2078
|
updatedAt: true,
|
|
1884
2079
|
deleted: true
|
|
1885
2080
|
}).partial();
|
|
1886
|
-
var EvalRunFolderMembershipSchema =
|
|
1887
|
-
folderId:
|
|
1888
|
-
evalRunId:
|
|
1889
|
-
projectId:
|
|
1890
|
-
createdAt:
|
|
2081
|
+
var EvalRunFolderMembershipSchema = import_zod33.z.object({
|
|
2082
|
+
folderId: import_zod33.z.string(),
|
|
2083
|
+
evalRunId: import_zod33.z.string(),
|
|
2084
|
+
projectId: import_zod33.z.string(),
|
|
2085
|
+
createdAt: import_zod33.z.string()
|
|
1891
2086
|
});
|
|
1892
2087
|
|
|
1893
2088
|
// src/project/project.ts
|
|
1894
|
-
var
|
|
2089
|
+
var import_zod34 = require("zod");
|
|
1895
2090
|
var ProjectSchema = BaseEntitySchema.extend({
|
|
1896
|
-
appId:
|
|
1897
|
-
scenarioTags:
|
|
2091
|
+
appId: import_zod34.z.string().optional().describe("The ID of the app in Dev Center"),
|
|
2092
|
+
scenarioTags: import_zod34.z.array(import_zod34.z.string()).optional().describe("Project-level tag vocabulary for scenarios"),
|
|
1898
2093
|
/** Per-project Wix auth token (write-only — never returned in GET responses). null = clear. */
|
|
1899
|
-
wixAuthToken:
|
|
2094
|
+
wixAuthToken: import_zod34.z.string().nullable().optional().describe("Wix auth token for CLI/MCP authentication (encrypted at rest)"),
|
|
1900
2095
|
/** Per-project Base44 auth file content (write-only — never returned in GET responses). null = clear. */
|
|
1901
|
-
base44AuthFile:
|
|
2096
|
+
base44AuthFile: import_zod34.z.string().nullable().optional().describe("Base64-encoded Base44 auth file content (encrypted at rest)"),
|
|
1902
2097
|
/** Resolved at runtime from the encrypted Wix auth token */
|
|
1903
|
-
wixAuthEmail:
|
|
2098
|
+
wixAuthEmail: import_zod34.z.string().optional().describe("Email associated with the Wix auth token (resolved at runtime)"),
|
|
1904
2099
|
/** Resolved at runtime from the encrypted Base44 auth file */
|
|
1905
|
-
base44AuthEmail:
|
|
2100
|
+
base44AuthEmail: import_zod34.z.string().optional().describe("Email from the Base44 auth file (resolved at runtime)")
|
|
1906
2101
|
});
|
|
1907
2102
|
var CreateProjectInputSchema = ProjectSchema.omit({
|
|
1908
2103
|
id: true,
|
|
@@ -1911,6 +2106,10 @@ var CreateProjectInputSchema = ProjectSchema.omit({
|
|
|
1911
2106
|
deleted: true,
|
|
1912
2107
|
wixAuthEmail: true,
|
|
1913
2108
|
base44AuthEmail: true
|
|
2109
|
+
}).extend({
|
|
2110
|
+
appId: import_zod34.z.string().describe(
|
|
2111
|
+
"Required: The ID of the app in Dev Center for credential scoping"
|
|
2112
|
+
)
|
|
1914
2113
|
});
|
|
1915
2114
|
var UpdateProjectInputSchema = CreateProjectInputSchema.partial();
|
|
1916
2115
|
|
|
@@ -1928,7 +2127,7 @@ var CreateTemplateInputSchema = TemplateSchema.omit({
|
|
|
1928
2127
|
var UpdateTemplateInputSchema = CreateTemplateInputSchema.partial();
|
|
1929
2128
|
|
|
1930
2129
|
// src/schedule/eval-schedule.ts
|
|
1931
|
-
var
|
|
2130
|
+
var import_zod35 = require("zod");
|
|
1932
2131
|
var FrequencyType = /* @__PURE__ */ ((FrequencyType2) => {
|
|
1933
2132
|
FrequencyType2["DAILY"] = "daily";
|
|
1934
2133
|
FrequencyType2["WEEKDAY"] = "weekday";
|
|
@@ -1938,29 +2137,29 @@ var FrequencyType = /* @__PURE__ */ ((FrequencyType2) => {
|
|
|
1938
2137
|
})(FrequencyType || {});
|
|
1939
2138
|
var EvalScheduleSchema = TenantEntitySchema.extend({
|
|
1940
2139
|
/** Whether the schedule is active */
|
|
1941
|
-
enabled:
|
|
2140
|
+
enabled: import_zod35.z.boolean(),
|
|
1942
2141
|
/** Test suite to run */
|
|
1943
|
-
suiteId:
|
|
2142
|
+
suiteId: import_zod35.z.string(),
|
|
1944
2143
|
/** Preset that provides agent + entities for this schedule */
|
|
1945
|
-
presetId:
|
|
2144
|
+
presetId: import_zod35.z.string(),
|
|
1946
2145
|
/** How often to run */
|
|
1947
|
-
frequencyType:
|
|
2146
|
+
frequencyType: import_zod35.z.nativeEnum(FrequencyType),
|
|
1948
2147
|
/** Time of day in 24h format (HH:MM), hours 00-23, minutes 00-59 */
|
|
1949
|
-
timeOfDay:
|
|
2148
|
+
timeOfDay: import_zod35.z.string().regex(/^([01]\d|2[0-3]):[0-5]\d$/),
|
|
1950
2149
|
/** Day of week (0=Sun, 6=Sat) for weekly schedules */
|
|
1951
|
-
dayOfWeek:
|
|
2150
|
+
dayOfWeek: import_zod35.z.number().min(0).max(6).optional(),
|
|
1952
2151
|
/** Day of month (1-31) for monthly schedules */
|
|
1953
|
-
dayOfMonth:
|
|
2152
|
+
dayOfMonth: import_zod35.z.number().min(1).max(31).optional(),
|
|
1954
2153
|
/** IANA timezone (e.g., 'America/New_York') */
|
|
1955
|
-
timezone:
|
|
2154
|
+
timezone: import_zod35.z.string(),
|
|
1956
2155
|
/** ID of the last eval run created by this schedule */
|
|
1957
|
-
lastRunId:
|
|
2156
|
+
lastRunId: import_zod35.z.string().optional(),
|
|
1958
2157
|
/** Denormalized status of the last run */
|
|
1959
|
-
lastRunStatus:
|
|
2158
|
+
lastRunStatus: import_zod35.z.string().optional(),
|
|
1960
2159
|
/** ISO timestamp of the last run */
|
|
1961
|
-
lastRunAt:
|
|
2160
|
+
lastRunAt: import_zod35.z.string().optional(),
|
|
1962
2161
|
/** Next scheduled run time in UTC (pre-computed for efficient querying, set by backend) */
|
|
1963
|
-
nextRunAt:
|
|
2162
|
+
nextRunAt: import_zod35.z.string().optional()
|
|
1964
2163
|
});
|
|
1965
2164
|
function isValidTimezone(tz) {
|
|
1966
2165
|
try {
|
|
@@ -1973,14 +2172,14 @@ function isValidTimezone(tz) {
|
|
|
1973
2172
|
function validateScheduleFields(data, ctx, options) {
|
|
1974
2173
|
if (data.frequencyType === "weekly" /* WEEKLY */ && data.dayOfWeek == null) {
|
|
1975
2174
|
ctx.addIssue({
|
|
1976
|
-
code:
|
|
2175
|
+
code: import_zod35.z.ZodIssueCode.custom,
|
|
1977
2176
|
message: "dayOfWeek is required for weekly schedules",
|
|
1978
2177
|
path: ["dayOfWeek"]
|
|
1979
2178
|
});
|
|
1980
2179
|
}
|
|
1981
2180
|
if (data.frequencyType === "monthly" /* MONTHLY */ && data.dayOfMonth == null) {
|
|
1982
2181
|
ctx.addIssue({
|
|
1983
|
-
code:
|
|
2182
|
+
code: import_zod35.z.ZodIssueCode.custom,
|
|
1984
2183
|
message: "dayOfMonth is required for monthly schedules",
|
|
1985
2184
|
path: ["dayOfMonth"]
|
|
1986
2185
|
});
|
|
@@ -1988,7 +2187,7 @@ function validateScheduleFields(data, ctx, options) {
|
|
|
1988
2187
|
const shouldValidateTz = options.partial ? data.timezone !== void 0 : true;
|
|
1989
2188
|
if (shouldValidateTz && !isValidTimezone(data.timezone)) {
|
|
1990
2189
|
ctx.addIssue({
|
|
1991
|
-
code:
|
|
2190
|
+
code: import_zod35.z.ZodIssueCode.custom,
|
|
1992
2191
|
message: "Invalid IANA timezone",
|
|
1993
2192
|
path: ["timezone"]
|
|
1994
2193
|
});
|
|
@@ -2053,6 +2252,13 @@ var UpdateEvalScheduleInputSchema = BaseCreateScheduleSchema.partial().superRefi
|
|
|
2053
2252
|
BulkImportResultItemSchema,
|
|
2054
2253
|
BulkImportResultSchema,
|
|
2055
2254
|
BulkImportSkillsInputSchema,
|
|
2255
|
+
CAPABILITY_NAME_REGEX,
|
|
2256
|
+
CapabilityContentSchema,
|
|
2257
|
+
CapabilitySchema,
|
|
2258
|
+
CapabilityTypeSchema,
|
|
2259
|
+
CapabilityVersionOriginSchema,
|
|
2260
|
+
CapabilityVersionSchema,
|
|
2261
|
+
CapabilityWithLatestVersionSchema,
|
|
2056
2262
|
ClaudeModel,
|
|
2057
2263
|
ClaudeModelSchema,
|
|
2058
2264
|
CommandExecutionSchema,
|
|
@@ -2063,6 +2269,8 @@ var UpdateEvalScheduleInputSchema = BaseCreateScheduleSchema.partial().superRefi
|
|
|
2063
2269
|
CostAssertionSchema,
|
|
2064
2270
|
CostConfigSchema,
|
|
2065
2271
|
CreateAgentInputSchema,
|
|
2272
|
+
CreateCapabilityInputSchema,
|
|
2273
|
+
CreateCapabilityVersionInputSchema,
|
|
2066
2274
|
CreateEvalRunFolderInputSchema,
|
|
2067
2275
|
CreateEvalRunInputSchema,
|
|
2068
2276
|
CreateEvalScheduleInputSchema,
|
|
@@ -2102,6 +2310,7 @@ var UpdateEvalScheduleInputSchema = BaseCreateScheduleSchema.partial().superRefi
|
|
|
2102
2310
|
FilePresenceTestSchema,
|
|
2103
2311
|
FrequencyType,
|
|
2104
2312
|
GitHubSourceSchema,
|
|
2313
|
+
InitialCapabilityVersionInputSchema,
|
|
2105
2314
|
InitialVersionInputSchema,
|
|
2106
2315
|
LEGACY_MODEL_ID_MAP,
|
|
2107
2316
|
LLMBreakdownStatsSchema,
|
|
@@ -2178,6 +2387,7 @@ var UpdateEvalScheduleInputSchema = BaseCreateScheduleSchema.partial().superRefi
|
|
|
2178
2387
|
TriggerSchema,
|
|
2179
2388
|
TriggerType,
|
|
2180
2389
|
UpdateAgentInputSchema,
|
|
2390
|
+
UpdateCapabilityInputSchema,
|
|
2181
2391
|
UpdateEvalRunFolderInputSchema,
|
|
2182
2392
|
UpdateEvalScheduleInputSchema,
|
|
2183
2393
|
UpdateMcpInputSchema,
|
|
@@ -2190,12 +2400,20 @@ var UpdateEvalScheduleInputSchema = BaseCreateScheduleSchema.partial().superRefi
|
|
|
2190
2400
|
UpdateTestScenarioInputSchema,
|
|
2191
2401
|
UpdateTestSuiteInputSchema,
|
|
2192
2402
|
VitestTestSchema,
|
|
2403
|
+
capabilityToMcp,
|
|
2404
|
+
capabilityToRule,
|
|
2405
|
+
capabilityToSkill,
|
|
2406
|
+
capabilityToSkillWithLatestVersion,
|
|
2407
|
+
capabilityToSubAgent,
|
|
2408
|
+
capabilityVersionToSkillVersion,
|
|
2193
2409
|
classifyAssertionRef,
|
|
2194
2410
|
formatTraceEventLine,
|
|
2195
2411
|
getSystemAssertion,
|
|
2196
2412
|
getSystemAssertions,
|
|
2413
|
+
groupCapabilitiesByType,
|
|
2197
2414
|
isAllowedBuildCommandString,
|
|
2198
2415
|
isSystemAssertionId,
|
|
2416
|
+
isValidCapabilityName,
|
|
2199
2417
|
isValidSkillFolderName,
|
|
2200
2418
|
normalizeBatchAssertionLink,
|
|
2201
2419
|
normalizeModelId,
|