@wix/evalforge-types 0.91.0 → 0.93.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/build/index.js +587 -449
- package/build/index.js.map +4 -4
- package/build/index.mjs +573 -448
- package/build/index.mjs.map +4 -4
- package/build/types/common/tool-names.d.ts +1 -1
- package/build/types/scenario/index.d.ts +2 -0
- package/build/types/scenario/site-setup.d.ts +132 -0
- package/build/types/scenario/test-scenario.d.ts +120 -0
- package/build/types/scenario/wix-origin-template-ids.d.ts +5 -0
- package/package.json +2 -2
package/build/index.mjs
CHANGED
|
@@ -186,6 +186,7 @@ var AVAILABLE_TOOL_NAMES = [
|
|
|
186
186
|
"Grep",
|
|
187
187
|
"Read",
|
|
188
188
|
"Skill",
|
|
189
|
+
"WebFetch",
|
|
189
190
|
"Write"
|
|
190
191
|
];
|
|
191
192
|
|
|
@@ -820,14 +821,65 @@ var EnvironmentSchema = z22.object({
|
|
|
820
821
|
metaSite: MetaSiteConfigSchema.optional()
|
|
821
822
|
});
|
|
822
823
|
|
|
824
|
+
// src/scenario/site-setup.ts
|
|
825
|
+
import { z as z23 } from "zod";
|
|
826
|
+
var SiteBootstrapHttpMethodSchema = z23.enum([
|
|
827
|
+
"get",
|
|
828
|
+
"post",
|
|
829
|
+
"put",
|
|
830
|
+
"patch",
|
|
831
|
+
"delete"
|
|
832
|
+
]);
|
|
833
|
+
var SiteBootstrapStepSchema = z23.object({
|
|
834
|
+
label: z23.string().optional(),
|
|
835
|
+
method: SiteBootstrapHttpMethodSchema,
|
|
836
|
+
url: z23.string().min(1),
|
|
837
|
+
body: z23.record(z23.string(), z23.unknown()).optional()
|
|
838
|
+
});
|
|
839
|
+
var SiteBootstrapSchema = z23.object({
|
|
840
|
+
steps: z23.array(SiteBootstrapStepSchema).default([])
|
|
841
|
+
});
|
|
842
|
+
var SiteBootstrapStepResultSchema = z23.object({
|
|
843
|
+
label: z23.string().optional(),
|
|
844
|
+
statusCode: z23.number().int(),
|
|
845
|
+
ok: z23.boolean(),
|
|
846
|
+
error: z23.string().optional()
|
|
847
|
+
});
|
|
848
|
+
var SiteBootstrapResultSchema = z23.object({
|
|
849
|
+
steps: z23.array(SiteBootstrapStepResultSchema)
|
|
850
|
+
});
|
|
851
|
+
var SiteSetupConfigSchema = z23.discriminatedUnion("mode", [
|
|
852
|
+
z23.object({ mode: z23.literal("none") }),
|
|
853
|
+
z23.object({
|
|
854
|
+
mode: z23.literal("clone"),
|
|
855
|
+
sourceSiteId: z23.string().min(1),
|
|
856
|
+
bootstrap: SiteBootstrapSchema.optional()
|
|
857
|
+
}),
|
|
858
|
+
z23.object({
|
|
859
|
+
mode: z23.literal("template"),
|
|
860
|
+
templateId: z23.string().min(1),
|
|
861
|
+
bootstrap: SiteBootstrapSchema.optional()
|
|
862
|
+
})
|
|
863
|
+
]);
|
|
864
|
+
var WixSiteSummarySchema = z23.object({
|
|
865
|
+
id: z23.string(),
|
|
866
|
+
displayName: z23.string(),
|
|
867
|
+
url: z23.string().optional()
|
|
868
|
+
});
|
|
869
|
+
var ProvisionedSiteSchema = z23.object({
|
|
870
|
+
id: z23.string(),
|
|
871
|
+
url: z23.string().optional(),
|
|
872
|
+
editorUrl: z23.string().optional()
|
|
873
|
+
});
|
|
874
|
+
|
|
823
875
|
// src/scenario/test-scenario.ts
|
|
824
|
-
import { z as
|
|
876
|
+
import { z as z26 } from "zod";
|
|
825
877
|
|
|
826
878
|
// src/assertion/assertion.ts
|
|
827
|
-
import { z as
|
|
879
|
+
import { z as z25 } from "zod";
|
|
828
880
|
|
|
829
881
|
// src/assertion/build-passed-command.ts
|
|
830
|
-
import { z as
|
|
882
|
+
import { z as z24 } from "zod";
|
|
831
883
|
var ALLOWED_BUILD_COMMANDS = [
|
|
832
884
|
"yarn build",
|
|
833
885
|
"npm run build",
|
|
@@ -853,10 +905,10 @@ function parseBuildCommandToArgv(command) {
|
|
|
853
905
|
return BUILD_COMMAND_ARGV[trimmed];
|
|
854
906
|
}
|
|
855
907
|
var enumTuple = ALLOWED_BUILD_COMMANDS;
|
|
856
|
-
var BuildPassedCommandStringSchema =
|
|
908
|
+
var BuildPassedCommandStringSchema = z24.enum(enumTuple);
|
|
857
909
|
|
|
858
910
|
// src/assertion/assertion.ts
|
|
859
|
-
var AssertionTypeSchema =
|
|
911
|
+
var AssertionTypeSchema = z25.enum([
|
|
860
912
|
"skill_was_called",
|
|
861
913
|
"tool_called_with_param",
|
|
862
914
|
"build_passed",
|
|
@@ -865,61 +917,61 @@ var AssertionTypeSchema = z24.enum([
|
|
|
865
917
|
"llm_judge",
|
|
866
918
|
"api_call"
|
|
867
919
|
]);
|
|
868
|
-
var AssertionParameterTypeSchema =
|
|
920
|
+
var AssertionParameterTypeSchema = z25.enum([
|
|
869
921
|
"string",
|
|
870
922
|
"number",
|
|
871
923
|
"boolean"
|
|
872
924
|
]);
|
|
873
|
-
var AssertionParameterSchema =
|
|
925
|
+
var AssertionParameterSchema = z25.object({
|
|
874
926
|
/** Parameter name (used as key in params object) */
|
|
875
|
-
name:
|
|
927
|
+
name: z25.string().min(1),
|
|
876
928
|
/** Display label for the parameter */
|
|
877
|
-
label:
|
|
929
|
+
label: z25.string().min(1),
|
|
878
930
|
/** Parameter type */
|
|
879
931
|
type: AssertionParameterTypeSchema,
|
|
880
932
|
/** Whether this parameter is required */
|
|
881
|
-
required:
|
|
933
|
+
required: z25.boolean(),
|
|
882
934
|
/** Default value (optional, used when not provided) */
|
|
883
|
-
defaultValue:
|
|
935
|
+
defaultValue: z25.union([z25.string(), z25.number(), z25.boolean()]).optional(),
|
|
884
936
|
/** If true, parameter is hidden by default behind "Show advanced options" */
|
|
885
|
-
advanced:
|
|
937
|
+
advanced: z25.boolean().optional()
|
|
886
938
|
});
|
|
887
|
-
var ScenarioAssertionLinkSchema =
|
|
939
|
+
var ScenarioAssertionLinkSchema = z25.object({
|
|
888
940
|
/** ID of the system assertion (e.g., 'system:skill_was_called') */
|
|
889
|
-
assertionId:
|
|
941
|
+
assertionId: z25.string(),
|
|
890
942
|
/** Parameter values for this assertion in this scenario */
|
|
891
|
-
params:
|
|
892
|
-
|
|
893
|
-
|
|
943
|
+
params: z25.record(
|
|
944
|
+
z25.string(),
|
|
945
|
+
z25.union([z25.string(), z25.number(), z25.boolean(), z25.null()])
|
|
894
946
|
).optional()
|
|
895
947
|
});
|
|
896
|
-
var SkillWasCalledConfigSchema =
|
|
948
|
+
var SkillWasCalledConfigSchema = z25.object({
|
|
897
949
|
/** Names of the skills that must have been called */
|
|
898
|
-
skillNames:
|
|
950
|
+
skillNames: z25.array(z25.string().min(1)).min(1)
|
|
899
951
|
});
|
|
900
|
-
var CostConfigSchema =
|
|
952
|
+
var CostConfigSchema = z25.strictObject({
|
|
901
953
|
/** Maximum allowed cost in USD */
|
|
902
|
-
maxCostUsd:
|
|
954
|
+
maxCostUsd: z25.number().positive()
|
|
903
955
|
});
|
|
904
|
-
var ToolCalledWithParamConfigSchema =
|
|
956
|
+
var ToolCalledWithParamConfigSchema = z25.strictObject({
|
|
905
957
|
/** Name of the tool that must have been called */
|
|
906
|
-
toolName:
|
|
958
|
+
toolName: z25.string().min(1),
|
|
907
959
|
/** JSON string of key-value pairs for expected parameters (substring match). Optional — when omitted, only checks tool presence. */
|
|
908
|
-
expectedParams:
|
|
960
|
+
expectedParams: z25.string().min(1).optional(),
|
|
909
961
|
/** If true, the matching tool call must also have succeeded (step.success === true) */
|
|
910
|
-
requireSuccess:
|
|
962
|
+
requireSuccess: z25.boolean().optional()
|
|
911
963
|
});
|
|
912
|
-
var BuildPassedConfigSchema =
|
|
964
|
+
var BuildPassedConfigSchema = z25.strictObject({
|
|
913
965
|
/** Allowlisted command only (default at runtime: "npm run build") */
|
|
914
966
|
command: BuildPassedCommandStringSchema.optional(),
|
|
915
967
|
/** Expected exit code (default: 0) */
|
|
916
|
-
expectedExitCode:
|
|
968
|
+
expectedExitCode: z25.number().int().optional()
|
|
917
969
|
});
|
|
918
|
-
var TimeConfigSchema =
|
|
970
|
+
var TimeConfigSchema = z25.strictObject({
|
|
919
971
|
/** Maximum allowed duration in milliseconds */
|
|
920
|
-
maxDurationMs:
|
|
972
|
+
maxDurationMs: z25.number().int().positive()
|
|
921
973
|
});
|
|
922
|
-
var LlmJudgeConfigSchema =
|
|
974
|
+
var LlmJudgeConfigSchema = z25.object({
|
|
923
975
|
/**
|
|
924
976
|
* Prompt template with placeholders:
|
|
925
977
|
* - {{output}}: agent's final output
|
|
@@ -930,65 +982,65 @@ var LlmJudgeConfigSchema = z24.object({
|
|
|
930
982
|
* - {{trace}}: step-by-step trace of tool calls
|
|
931
983
|
* - Custom parameters defined in the parameters array
|
|
932
984
|
*/
|
|
933
|
-
prompt:
|
|
985
|
+
prompt: z25.string().min(1),
|
|
934
986
|
/** Minimum score to pass (0-10, default 7) */
|
|
935
|
-
minScore:
|
|
987
|
+
minScore: z25.number().int().min(0).max(10).optional(),
|
|
936
988
|
/** Model for the judge (e.g. claude-3-5-haiku-20241022) */
|
|
937
|
-
model:
|
|
989
|
+
model: z25.string().optional(),
|
|
938
990
|
/** Max output tokens */
|
|
939
|
-
maxTokens:
|
|
991
|
+
maxTokens: z25.number().int().optional(),
|
|
940
992
|
/** Temperature (0-1) */
|
|
941
|
-
temperature:
|
|
993
|
+
temperature: z25.number().min(0).max(1).optional(),
|
|
942
994
|
/** User-defined parameters for this assertion */
|
|
943
|
-
parameters:
|
|
995
|
+
parameters: z25.array(AssertionParameterSchema).optional()
|
|
944
996
|
});
|
|
945
|
-
var ApiCallConfigSchema =
|
|
997
|
+
var ApiCallConfigSchema = z25.strictObject({
|
|
946
998
|
/** URL to call */
|
|
947
|
-
url:
|
|
999
|
+
url: z25.string().min(1),
|
|
948
1000
|
/** HTTP method (default GET) */
|
|
949
|
-
method:
|
|
1001
|
+
method: z25.enum(["GET", "POST"]).optional(),
|
|
950
1002
|
/** Request body (JSON string, for POST requests) */
|
|
951
|
-
requestBody:
|
|
1003
|
+
requestBody: z25.string().optional(),
|
|
952
1004
|
/** Expected JSON response to validate against (subset match — extra fields in actual are OK) */
|
|
953
|
-
expectedResponse:
|
|
1005
|
+
expectedResponse: z25.string().min(1),
|
|
954
1006
|
/** Request headers as JSON string of key-value pairs */
|
|
955
|
-
requestHeaders:
|
|
1007
|
+
requestHeaders: z25.string().optional(),
|
|
956
1008
|
/** Request timeout in milliseconds (default 30000) */
|
|
957
|
-
timeoutMs:
|
|
1009
|
+
timeoutMs: z25.number().int().positive().optional()
|
|
958
1010
|
});
|
|
959
1011
|
var AssertionBaseFields = {
|
|
960
1012
|
/** When true, the assertion's pass/fail logic is inverted (NOT operator). */
|
|
961
|
-
negate:
|
|
1013
|
+
negate: z25.boolean().optional()
|
|
962
1014
|
};
|
|
963
1015
|
var SkillWasCalledAssertionSchema = SkillWasCalledConfigSchema.extend({
|
|
964
|
-
type:
|
|
1016
|
+
type: z25.literal("skill_was_called"),
|
|
965
1017
|
...AssertionBaseFields
|
|
966
1018
|
});
|
|
967
1019
|
var ToolCalledWithParamAssertionSchema = ToolCalledWithParamConfigSchema.extend({
|
|
968
|
-
type:
|
|
1020
|
+
type: z25.literal("tool_called_with_param"),
|
|
969
1021
|
...AssertionBaseFields
|
|
970
1022
|
});
|
|
971
1023
|
var BuildPassedAssertionSchema = BuildPassedConfigSchema.extend({
|
|
972
|
-
type:
|
|
1024
|
+
type: z25.literal("build_passed"),
|
|
973
1025
|
...AssertionBaseFields
|
|
974
1026
|
});
|
|
975
1027
|
var CostAssertionSchema = CostConfigSchema.extend({
|
|
976
|
-
type:
|
|
1028
|
+
type: z25.literal("cost"),
|
|
977
1029
|
...AssertionBaseFields
|
|
978
1030
|
});
|
|
979
1031
|
var LlmJudgeAssertionSchema = LlmJudgeConfigSchema.extend({
|
|
980
|
-
type:
|
|
1032
|
+
type: z25.literal("llm_judge"),
|
|
981
1033
|
...AssertionBaseFields
|
|
982
1034
|
});
|
|
983
1035
|
var ApiCallAssertionSchema = ApiCallConfigSchema.extend({
|
|
984
|
-
type:
|
|
1036
|
+
type: z25.literal("api_call"),
|
|
985
1037
|
...AssertionBaseFields
|
|
986
1038
|
});
|
|
987
1039
|
var TimeAssertionSchema = TimeConfigSchema.extend({
|
|
988
|
-
type:
|
|
1040
|
+
type: z25.literal("time_limit"),
|
|
989
1041
|
...AssertionBaseFields
|
|
990
1042
|
});
|
|
991
|
-
var AssertionSchema =
|
|
1043
|
+
var AssertionSchema = z25.union([
|
|
992
1044
|
SkillWasCalledAssertionSchema,
|
|
993
1045
|
ToolCalledWithParamAssertionSchema,
|
|
994
1046
|
BuildPassedAssertionSchema,
|
|
@@ -997,7 +1049,7 @@ var AssertionSchema = z24.union([
|
|
|
997
1049
|
LlmJudgeAssertionSchema,
|
|
998
1050
|
ApiCallAssertionSchema
|
|
999
1051
|
]);
|
|
1000
|
-
var AssertionConfigSchema =
|
|
1052
|
+
var AssertionConfigSchema = z25.union([
|
|
1001
1053
|
LlmJudgeConfigSchema,
|
|
1002
1054
|
// requires prompt - check first
|
|
1003
1055
|
SkillWasCalledConfigSchema,
|
|
@@ -1012,7 +1064,7 @@ var AssertionConfigSchema = z24.union([
|
|
|
1012
1064
|
// requires maxCostUsd, uses strictObject
|
|
1013
1065
|
BuildPassedConfigSchema,
|
|
1014
1066
|
// all optional, uses strictObject to reject unknown keys
|
|
1015
|
-
|
|
1067
|
+
z25.object({})
|
|
1016
1068
|
// fallback empty config
|
|
1017
1069
|
]);
|
|
1018
1070
|
function validateAssertionConfig(type, config) {
|
|
@@ -1258,36 +1310,67 @@ function getSystemAssertion(id) {
|
|
|
1258
1310
|
|
|
1259
1311
|
// src/scenario/test-scenario.ts
|
|
1260
1312
|
var MAX_IMAGE_BASE64_LENGTH = 4 * Math.ceil(2 * 1024 * 1024 / 3);
|
|
1261
|
-
var TriggerPromptImageSchema =
|
|
1313
|
+
var TriggerPromptImageSchema = z26.object({
|
|
1262
1314
|
/** Base64-encoded image data (no data URL prefix) */
|
|
1263
|
-
base64:
|
|
1315
|
+
base64: z26.string().max(MAX_IMAGE_BASE64_LENGTH, "Image exceeds 2 MB size limit"),
|
|
1264
1316
|
/** MIME type of the image */
|
|
1265
|
-
mediaType:
|
|
1317
|
+
mediaType: z26.enum(["image/jpeg", "image/png", "image/gif", "image/webp"]),
|
|
1266
1318
|
/** Original filename of the image */
|
|
1267
|
-
name:
|
|
1319
|
+
name: z26.string()
|
|
1268
1320
|
});
|
|
1269
|
-
var ExpectedFileSchema =
|
|
1321
|
+
var ExpectedFileSchema = z26.object({
|
|
1270
1322
|
/** Relative path where the file should be created */
|
|
1271
|
-
path:
|
|
1323
|
+
path: z26.string(),
|
|
1272
1324
|
/** Optional expected content */
|
|
1273
|
-
content:
|
|
1325
|
+
content: z26.string().optional()
|
|
1274
1326
|
});
|
|
1275
1327
|
var TestScenarioSchema = TenantEntitySchema.extend({
|
|
1276
1328
|
/** The prompt sent to the agent to trigger the task */
|
|
1277
|
-
triggerPrompt:
|
|
1329
|
+
triggerPrompt: z26.string().min(10),
|
|
1278
1330
|
/** ID of the template to use for this scenario (null = no template) */
|
|
1279
|
-
templateId:
|
|
1331
|
+
templateId: z26.string().nullish(),
|
|
1280
1332
|
/** Inline assertions to evaluate for this scenario (legacy) */
|
|
1281
|
-
assertions:
|
|
1333
|
+
assertions: z26.array(AssertionSchema).optional(),
|
|
1282
1334
|
/** IDs of saved assertions to evaluate (from assertions table) - legacy, use assertionLinks */
|
|
1283
|
-
assertionIds:
|
|
1335
|
+
assertionIds: z26.array(z26.string()).optional(),
|
|
1284
1336
|
/** Linked assertions with per-scenario parameter values */
|
|
1285
|
-
assertionLinks:
|
|
1337
|
+
assertionLinks: z26.array(ScenarioAssertionLinkSchema).optional(),
|
|
1286
1338
|
/** Tags for categorisation and filtering */
|
|
1287
|
-
tags:
|
|
1339
|
+
tags: z26.array(z26.string()).optional(),
|
|
1288
1340
|
/** Base64-encoded images attached to the trigger prompt (max 3) */
|
|
1289
|
-
triggerPromptImages:
|
|
1290
|
-
|
|
1341
|
+
triggerPromptImages: z26.array(TriggerPromptImageSchema).max(3).optional(),
|
|
1342
|
+
/** Optional per-scenario Wix site provisioning instructions. Absent ≡ no site. */
|
|
1343
|
+
siteSetup: SiteSetupConfigSchema.optional()
|
|
1344
|
+
});
|
|
1345
|
+
var SITE_SETUP_EXCLUSIVE_VARIABLES = ["site-id"];
|
|
1346
|
+
function extractVariableNamesFromPrompt(prompt) {
|
|
1347
|
+
const names = /* @__PURE__ */ new Set();
|
|
1348
|
+
for (const match of prompt.matchAll(/\{\{([\w-]+)\}\}/g)) {
|
|
1349
|
+
names.add(match[1]);
|
|
1350
|
+
}
|
|
1351
|
+
return [...names];
|
|
1352
|
+
}
|
|
1353
|
+
function promptUsesSiteSetupExclusiveVariables(prompt) {
|
|
1354
|
+
const names = extractVariableNamesFromPrompt(prompt);
|
|
1355
|
+
return SITE_SETUP_EXCLUSIVE_VARIABLES.some(
|
|
1356
|
+
(exclusive) => names.includes(exclusive)
|
|
1357
|
+
);
|
|
1358
|
+
}
|
|
1359
|
+
function hasActiveSiteSetup(siteSetup) {
|
|
1360
|
+
return siteSetup?.mode === "clone" || siteSetup?.mode === "template";
|
|
1361
|
+
}
|
|
1362
|
+
function validateSiteSetupExclusivity(data, ctx) {
|
|
1363
|
+
if (!hasActiveSiteSetup(data.siteSetup)) return;
|
|
1364
|
+
const prompt = data.triggerPrompt;
|
|
1365
|
+
if (prompt === void 0 || !promptUsesSiteSetupExclusiveVariables(prompt)) {
|
|
1366
|
+
return;
|
|
1367
|
+
}
|
|
1368
|
+
ctx.addIssue({
|
|
1369
|
+
code: z26.ZodIssueCode.custom,
|
|
1370
|
+
message: "Site setup and {{site-id}} run variables cannot be used together. Remove {{site-id}} from the trigger prompt or disable site setup.",
|
|
1371
|
+
path: ["triggerPrompt"]
|
|
1372
|
+
});
|
|
1373
|
+
}
|
|
1291
1374
|
function validateBuildPassedParamsInAssertionLinks(links, ctx) {
|
|
1292
1375
|
if (!links) return;
|
|
1293
1376
|
for (let i = 0; i < links.length; i++) {
|
|
@@ -1297,7 +1380,7 @@ function validateBuildPassedParamsInAssertionLinks(links, ctx) {
|
|
|
1297
1380
|
if (cmd === void 0 || cmd === null) continue;
|
|
1298
1381
|
if (typeof cmd !== "string") {
|
|
1299
1382
|
ctx.addIssue({
|
|
1300
|
-
code:
|
|
1383
|
+
code: z26.ZodIssueCode.custom,
|
|
1301
1384
|
message: "build_passed command must be a string",
|
|
1302
1385
|
path: ["assertionLinks", i, "params", "command"]
|
|
1303
1386
|
});
|
|
@@ -1305,7 +1388,7 @@ function validateBuildPassedParamsInAssertionLinks(links, ctx) {
|
|
|
1305
1388
|
}
|
|
1306
1389
|
if (!isAllowedBuildCommandString(cmd)) {
|
|
1307
1390
|
ctx.addIssue({
|
|
1308
|
-
code:
|
|
1391
|
+
code: z26.ZodIssueCode.custom,
|
|
1309
1392
|
message: "Invalid build_passed command. Allowed: yarn build, npm run build, pnpm run build, pnpm build",
|
|
1310
1393
|
path: ["assertionLinks", i, "params", "command"]
|
|
1311
1394
|
});
|
|
@@ -1320,27 +1403,56 @@ var TestScenarioCreateBaseSchema = TestScenarioSchema.omit({
|
|
|
1320
1403
|
});
|
|
1321
1404
|
var CreateTestScenarioInputSchema = TestScenarioCreateBaseSchema.superRefine((data, ctx) => {
|
|
1322
1405
|
validateBuildPassedParamsInAssertionLinks(data.assertionLinks, ctx);
|
|
1406
|
+
validateSiteSetupExclusivity(data, ctx);
|
|
1323
1407
|
});
|
|
1324
1408
|
var UpdateTestScenarioInputSchema = TestScenarioCreateBaseSchema.partial().superRefine((data, ctx) => {
|
|
1325
1409
|
if (data.assertionLinks !== void 0) {
|
|
1326
1410
|
validateBuildPassedParamsInAssertionLinks(data.assertionLinks, ctx);
|
|
1327
1411
|
}
|
|
1412
|
+
validateSiteSetupExclusivity(data, ctx);
|
|
1328
1413
|
});
|
|
1329
1414
|
|
|
1415
|
+
// src/scenario/wix-origin-template-ids.ts
|
|
1416
|
+
var WIX_ORIGIN_TEMPLATE_ID_BY_ALIAS = {
|
|
1417
|
+
ecommerce: "e5da13f4-c01e-4b61-a9c7-55dacd961d54",
|
|
1418
|
+
default: "212b41cb-0da6-4401-9c72-7c579e6477a2",
|
|
1419
|
+
blog: "68fc7371-365f-44c6-8467-69d88bfc172e",
|
|
1420
|
+
astrowind: "9e9292c1-1a35-4ba0-8986-d06f2ecb5366",
|
|
1421
|
+
scheduler: "72ade0e3-1871-4c04-ac54-419ca874d9d3",
|
|
1422
|
+
registration: "e5d63bf1-cd06-48eb-ad77-0da9235adcf1",
|
|
1423
|
+
"picasso-studio": "61f05de1-b0ce-4873-b9f5-52241a6fd262",
|
|
1424
|
+
"picasso-ecom": "daa9187d-f010-4eb0-bd49-e658b5a5037a",
|
|
1425
|
+
picasso: "99b9a3c7-82ad-4e1b-9066-e490bb9863af",
|
|
1426
|
+
"ecom-editorless": "738c7c0b-046e-4bf0-87dd-9a06ee5a52c4"
|
|
1427
|
+
};
|
|
1428
|
+
var GUID_PATTERN = /^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$/i;
|
|
1429
|
+
function resolveWixOriginTemplateId(templateId) {
|
|
1430
|
+
if (GUID_PATTERN.test(templateId)) {
|
|
1431
|
+
return templateId;
|
|
1432
|
+
}
|
|
1433
|
+
const originTemplateId = WIX_ORIGIN_TEMPLATE_ID_BY_ALIAS[templateId];
|
|
1434
|
+
if (!originTemplateId) {
|
|
1435
|
+
throw new Error(
|
|
1436
|
+
`Unknown Wix site template alias "${templateId}". Use a GUID or one of: ${Object.keys(WIX_ORIGIN_TEMPLATE_ID_BY_ALIAS).join(", ")}`
|
|
1437
|
+
);
|
|
1438
|
+
}
|
|
1439
|
+
return originTemplateId;
|
|
1440
|
+
}
|
|
1441
|
+
|
|
1330
1442
|
// src/scenario/batch-import.ts
|
|
1331
|
-
import { z as
|
|
1443
|
+
import { z as z27 } from "zod";
|
|
1332
1444
|
var UUID_REGEX = /^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$/i;
|
|
1333
|
-
var BatchAssertionLinkSchema =
|
|
1334
|
-
|
|
1445
|
+
var BatchAssertionLinkSchema = z27.union([
|
|
1446
|
+
z27.string().min(1),
|
|
1335
1447
|
ScenarioAssertionLinkSchema
|
|
1336
1448
|
]);
|
|
1337
|
-
var BatchScenarioEntrySchema =
|
|
1338
|
-
name:
|
|
1339
|
-
description:
|
|
1340
|
-
triggerPrompt:
|
|
1341
|
-
templateId:
|
|
1342
|
-
tags:
|
|
1343
|
-
assertionLinks:
|
|
1449
|
+
var BatchScenarioEntrySchema = z27.object({
|
|
1450
|
+
name: z27.string().min(1, "name: Required"),
|
|
1451
|
+
description: z27.string().optional().default(""),
|
|
1452
|
+
triggerPrompt: z27.string().min(10, "triggerPrompt: Must be at least 10 characters"),
|
|
1453
|
+
templateId: z27.string().nullish(),
|
|
1454
|
+
tags: z27.array(z27.string()).optional(),
|
|
1455
|
+
assertionLinks: z27.array(BatchAssertionLinkSchema).optional()
|
|
1344
1456
|
}).superRefine((data, ctx) => {
|
|
1345
1457
|
if (!data.assertionLinks) return;
|
|
1346
1458
|
const objectLinks = data.assertionLinks.filter(
|
|
@@ -1350,8 +1462,8 @@ var BatchScenarioEntrySchema = z26.object({
|
|
|
1350
1462
|
validateBuildPassedParamsInAssertionLinks(objectLinks, ctx);
|
|
1351
1463
|
}
|
|
1352
1464
|
});
|
|
1353
|
-
var BatchImportPayloadSchema =
|
|
1354
|
-
scenarios:
|
|
1465
|
+
var BatchImportPayloadSchema = z27.object({
|
|
1466
|
+
scenarios: z27.array(BatchScenarioEntrySchema).min(1, "scenarios array must contain at least one entry").max(100, "Maximum 100 scenarios per upload")
|
|
1355
1467
|
});
|
|
1356
1468
|
var BATCH_IMPORT_LIMITS = {
|
|
1357
1469
|
MAX_SCENARIOS: 100,
|
|
@@ -1373,29 +1485,29 @@ function normalizeBatchAssertionLink(link) {
|
|
|
1373
1485
|
}
|
|
1374
1486
|
return link;
|
|
1375
1487
|
}
|
|
1376
|
-
var BatchResultItemSchema =
|
|
1377
|
-
index:
|
|
1378
|
-
name:
|
|
1379
|
-
status:
|
|
1380
|
-
id:
|
|
1381
|
-
errors:
|
|
1382
|
-
});
|
|
1383
|
-
var BatchSummarySchema =
|
|
1384
|
-
total:
|
|
1385
|
-
valid:
|
|
1386
|
-
invalid:
|
|
1387
|
-
created:
|
|
1388
|
-
});
|
|
1389
|
-
var BatchImportResponseSchema =
|
|
1488
|
+
var BatchResultItemSchema = z27.object({
|
|
1489
|
+
index: z27.number(),
|
|
1490
|
+
name: z27.string(),
|
|
1491
|
+
status: z27.enum(["valid", "invalid"]),
|
|
1492
|
+
id: z27.string().nullable().optional(),
|
|
1493
|
+
errors: z27.array(z27.string()).optional()
|
|
1494
|
+
});
|
|
1495
|
+
var BatchSummarySchema = z27.object({
|
|
1496
|
+
total: z27.number(),
|
|
1497
|
+
valid: z27.number(),
|
|
1498
|
+
invalid: z27.number(),
|
|
1499
|
+
created: z27.number()
|
|
1500
|
+
});
|
|
1501
|
+
var BatchImportResponseSchema = z27.object({
|
|
1390
1502
|
summary: BatchSummarySchema,
|
|
1391
|
-
results:
|
|
1503
|
+
results: z27.array(BatchResultItemSchema)
|
|
1392
1504
|
});
|
|
1393
1505
|
|
|
1394
1506
|
// src/suite/test-suite.ts
|
|
1395
|
-
import { z as
|
|
1507
|
+
import { z as z28 } from "zod";
|
|
1396
1508
|
var TestSuiteSchema = TenantEntitySchema.extend({
|
|
1397
1509
|
/** IDs of test scenarios in this suite */
|
|
1398
|
-
scenarioIds:
|
|
1510
|
+
scenarioIds: z28.array(z28.string())
|
|
1399
1511
|
});
|
|
1400
1512
|
var CreateTestSuiteInputSchema = TestSuiteSchema.omit({
|
|
1401
1513
|
id: true,
|
|
@@ -1406,21 +1518,21 @@ var CreateTestSuiteInputSchema = TestSuiteSchema.omit({
|
|
|
1406
1518
|
var UpdateTestSuiteInputSchema = CreateTestSuiteInputSchema.partial();
|
|
1407
1519
|
|
|
1408
1520
|
// src/evaluation/metrics.ts
|
|
1409
|
-
import { z as
|
|
1410
|
-
var TokenUsageSchema =
|
|
1411
|
-
prompt:
|
|
1412
|
-
completion:
|
|
1413
|
-
total:
|
|
1414
|
-
});
|
|
1415
|
-
var EvalMetricsSchema =
|
|
1416
|
-
totalAssertions:
|
|
1417
|
-
passed:
|
|
1418
|
-
failed:
|
|
1419
|
-
skipped:
|
|
1420
|
-
errors:
|
|
1421
|
-
passRate:
|
|
1422
|
-
avgDuration:
|
|
1423
|
-
totalDuration:
|
|
1521
|
+
import { z as z29 } from "zod";
|
|
1522
|
+
var TokenUsageSchema = z29.object({
|
|
1523
|
+
prompt: z29.number(),
|
|
1524
|
+
completion: z29.number(),
|
|
1525
|
+
total: z29.number()
|
|
1526
|
+
});
|
|
1527
|
+
var EvalMetricsSchema = z29.object({
|
|
1528
|
+
totalAssertions: z29.number(),
|
|
1529
|
+
passed: z29.number(),
|
|
1530
|
+
failed: z29.number(),
|
|
1531
|
+
skipped: z29.number(),
|
|
1532
|
+
errors: z29.number(),
|
|
1533
|
+
passRate: z29.number(),
|
|
1534
|
+
avgDuration: z29.number(),
|
|
1535
|
+
totalDuration: z29.number()
|
|
1424
1536
|
});
|
|
1425
1537
|
var EvalStatus = /* @__PURE__ */ ((EvalStatus2) => {
|
|
1426
1538
|
EvalStatus2["PENDING"] = "pending";
|
|
@@ -1430,7 +1542,7 @@ var EvalStatus = /* @__PURE__ */ ((EvalStatus2) => {
|
|
|
1430
1542
|
EvalStatus2["CANCELLED"] = "cancelled";
|
|
1431
1543
|
return EvalStatus2;
|
|
1432
1544
|
})(EvalStatus || {});
|
|
1433
|
-
var EvalStatusSchema =
|
|
1545
|
+
var EvalStatusSchema = z29.enum(EvalStatus);
|
|
1434
1546
|
var LLMStepType = /* @__PURE__ */ ((LLMStepType2) => {
|
|
1435
1547
|
LLMStepType2["COMPLETION"] = "completion";
|
|
1436
1548
|
LLMStepType2["TOOL_USE"] = "tool_use";
|
|
@@ -1438,54 +1550,54 @@ var LLMStepType = /* @__PURE__ */ ((LLMStepType2) => {
|
|
|
1438
1550
|
LLMStepType2["THINKING"] = "thinking";
|
|
1439
1551
|
return LLMStepType2;
|
|
1440
1552
|
})(LLMStepType || {});
|
|
1441
|
-
var LLMTraceStepSchema =
|
|
1442
|
-
id:
|
|
1443
|
-
stepNumber:
|
|
1444
|
-
type:
|
|
1445
|
-
model:
|
|
1446
|
-
provider:
|
|
1447
|
-
startedAt:
|
|
1448
|
-
durationMs:
|
|
1553
|
+
var LLMTraceStepSchema = z29.object({
|
|
1554
|
+
id: z29.string(),
|
|
1555
|
+
stepNumber: z29.number(),
|
|
1556
|
+
type: z29.enum(LLMStepType),
|
|
1557
|
+
model: z29.string(),
|
|
1558
|
+
provider: z29.string(),
|
|
1559
|
+
startedAt: z29.string(),
|
|
1560
|
+
durationMs: z29.number(),
|
|
1449
1561
|
tokenUsage: TokenUsageSchema,
|
|
1450
|
-
costUsd:
|
|
1451
|
-
toolName:
|
|
1452
|
-
toolArguments:
|
|
1453
|
-
inputPreview:
|
|
1454
|
-
outputPreview:
|
|
1455
|
-
success:
|
|
1456
|
-
error:
|
|
1457
|
-
turnIndex:
|
|
1458
|
-
});
|
|
1459
|
-
var LLMBreakdownStatsSchema =
|
|
1460
|
-
count:
|
|
1461
|
-
durationMs:
|
|
1462
|
-
tokens:
|
|
1463
|
-
costUsd:
|
|
1464
|
-
});
|
|
1465
|
-
var LLMTraceSummarySchema =
|
|
1466
|
-
totalSteps:
|
|
1467
|
-
totalTurns:
|
|
1468
|
-
totalDurationMs:
|
|
1562
|
+
costUsd: z29.number(),
|
|
1563
|
+
toolName: z29.string().optional(),
|
|
1564
|
+
toolArguments: z29.string().optional(),
|
|
1565
|
+
inputPreview: z29.string().optional(),
|
|
1566
|
+
outputPreview: z29.string().optional(),
|
|
1567
|
+
success: z29.boolean(),
|
|
1568
|
+
error: z29.string().optional(),
|
|
1569
|
+
turnIndex: z29.number().optional()
|
|
1570
|
+
});
|
|
1571
|
+
var LLMBreakdownStatsSchema = z29.object({
|
|
1572
|
+
count: z29.number(),
|
|
1573
|
+
durationMs: z29.number(),
|
|
1574
|
+
tokens: z29.number(),
|
|
1575
|
+
costUsd: z29.number()
|
|
1576
|
+
});
|
|
1577
|
+
var LLMTraceSummarySchema = z29.object({
|
|
1578
|
+
totalSteps: z29.number(),
|
|
1579
|
+
totalTurns: z29.number().optional(),
|
|
1580
|
+
totalDurationMs: z29.number(),
|
|
1469
1581
|
totalTokens: TokenUsageSchema,
|
|
1470
|
-
totalCostUsd:
|
|
1471
|
-
stepTypeBreakdown:
|
|
1472
|
-
modelBreakdown:
|
|
1473
|
-
modelsUsed:
|
|
1474
|
-
});
|
|
1475
|
-
var LLMTraceSchema =
|
|
1476
|
-
id:
|
|
1477
|
-
steps:
|
|
1582
|
+
totalCostUsd: z29.number(),
|
|
1583
|
+
stepTypeBreakdown: z29.record(z29.string(), LLMBreakdownStatsSchema).optional(),
|
|
1584
|
+
modelBreakdown: z29.record(z29.string(), LLMBreakdownStatsSchema),
|
|
1585
|
+
modelsUsed: z29.array(z29.string())
|
|
1586
|
+
});
|
|
1587
|
+
var LLMTraceSchema = z29.object({
|
|
1588
|
+
id: z29.string(),
|
|
1589
|
+
steps: z29.array(LLMTraceStepSchema),
|
|
1478
1590
|
summary: LLMTraceSummarySchema
|
|
1479
1591
|
});
|
|
1480
1592
|
|
|
1481
1593
|
// src/evaluation/eval-result.ts
|
|
1482
|
-
import { z as
|
|
1594
|
+
import { z as z33 } from "zod";
|
|
1483
1595
|
|
|
1484
1596
|
// src/evaluation/eval-run.ts
|
|
1485
|
-
import { z as
|
|
1597
|
+
import { z as z31 } from "zod";
|
|
1486
1598
|
|
|
1487
1599
|
// src/evaluation/live-trace.ts
|
|
1488
|
-
import { z as
|
|
1600
|
+
import { z as z30 } from "zod";
|
|
1489
1601
|
var LiveTraceEventType = /* @__PURE__ */ ((LiveTraceEventType2) => {
|
|
1490
1602
|
LiveTraceEventType2["THINKING"] = "thinking";
|
|
1491
1603
|
LiveTraceEventType2["TOOL_USE"] = "tool_use";
|
|
@@ -1499,37 +1611,37 @@ var LiveTraceEventType = /* @__PURE__ */ ((LiveTraceEventType2) => {
|
|
|
1499
1611
|
LiveTraceEventType2["USER"] = "user";
|
|
1500
1612
|
return LiveTraceEventType2;
|
|
1501
1613
|
})(LiveTraceEventType || {});
|
|
1502
|
-
var LiveTraceEventSchema =
|
|
1614
|
+
var LiveTraceEventSchema = z30.object({
|
|
1503
1615
|
/** The evaluation run ID */
|
|
1504
|
-
evalRunId:
|
|
1616
|
+
evalRunId: z30.string(),
|
|
1505
1617
|
/** The scenario ID being executed */
|
|
1506
|
-
scenarioId:
|
|
1618
|
+
scenarioId: z30.string(),
|
|
1507
1619
|
/** The scenario name for display */
|
|
1508
|
-
scenarioName:
|
|
1620
|
+
scenarioName: z30.string(),
|
|
1509
1621
|
/** The target ID (skill, agent, etc.) */
|
|
1510
|
-
targetId:
|
|
1622
|
+
targetId: z30.string(),
|
|
1511
1623
|
/** The target name for display */
|
|
1512
|
-
targetName:
|
|
1624
|
+
targetName: z30.string(),
|
|
1513
1625
|
/** Step number in the current scenario execution */
|
|
1514
|
-
stepNumber:
|
|
1626
|
+
stepNumber: z30.number(),
|
|
1515
1627
|
/** Type of trace event */
|
|
1516
|
-
type:
|
|
1628
|
+
type: z30.enum(LiveTraceEventType),
|
|
1517
1629
|
/** Tool name if this is a tool_use event */
|
|
1518
|
-
toolName:
|
|
1630
|
+
toolName: z30.string().optional(),
|
|
1519
1631
|
/** Tool arguments preview (truncated JSON) */
|
|
1520
|
-
toolArgs:
|
|
1632
|
+
toolArgs: z30.string().optional(),
|
|
1521
1633
|
/** Output preview (truncated text) */
|
|
1522
|
-
outputPreview:
|
|
1634
|
+
outputPreview: z30.string().optional(),
|
|
1523
1635
|
/** File path for file operations */
|
|
1524
|
-
filePath:
|
|
1636
|
+
filePath: z30.string().optional(),
|
|
1525
1637
|
/** Elapsed time in milliseconds for progress events */
|
|
1526
|
-
elapsedMs:
|
|
1638
|
+
elapsedMs: z30.number().optional(),
|
|
1527
1639
|
/** Thinking/reasoning text from Claude */
|
|
1528
|
-
thinking:
|
|
1640
|
+
thinking: z30.string().optional(),
|
|
1529
1641
|
/** Timestamp when this event occurred */
|
|
1530
|
-
timestamp:
|
|
1642
|
+
timestamp: z30.string(),
|
|
1531
1643
|
/** Whether this is the final event for this scenario */
|
|
1532
|
-
isComplete:
|
|
1644
|
+
isComplete: z30.boolean()
|
|
1533
1645
|
});
|
|
1534
1646
|
var TRACE_EVENT_PREFIX = "TRACE_EVENT:";
|
|
1535
1647
|
function parseTraceEventLine(line) {
|
|
@@ -1558,40 +1670,40 @@ var TriggerType = /* @__PURE__ */ ((TriggerType2) => {
|
|
|
1558
1670
|
TriggerType2["SCHEDULED"] = "SCHEDULED";
|
|
1559
1671
|
return TriggerType2;
|
|
1560
1672
|
})(TriggerType || {});
|
|
1561
|
-
var TriggerMetadataSchema =
|
|
1562
|
-
version:
|
|
1563
|
-
resourceUpdated:
|
|
1564
|
-
scheduleId:
|
|
1673
|
+
var TriggerMetadataSchema = z31.object({
|
|
1674
|
+
version: z31.string().optional(),
|
|
1675
|
+
resourceUpdated: z31.array(z31.string()).optional(),
|
|
1676
|
+
scheduleId: z31.string().optional()
|
|
1565
1677
|
});
|
|
1566
|
-
var TriggerSchema =
|
|
1567
|
-
id:
|
|
1678
|
+
var TriggerSchema = z31.object({
|
|
1679
|
+
id: z31.string(),
|
|
1568
1680
|
metadata: TriggerMetadataSchema.optional(),
|
|
1569
|
-
type:
|
|
1681
|
+
type: z31.nativeEnum(TriggerType)
|
|
1570
1682
|
});
|
|
1571
|
-
var DiffLineTypeSchema =
|
|
1572
|
-
var DiffLineSchema =
|
|
1683
|
+
var DiffLineTypeSchema = z31.enum(["added", "removed", "unchanged"]);
|
|
1684
|
+
var DiffLineSchema = z31.object({
|
|
1573
1685
|
type: DiffLineTypeSchema,
|
|
1574
|
-
content:
|
|
1575
|
-
lineNumber:
|
|
1576
|
-
});
|
|
1577
|
-
var DiffContentSchema =
|
|
1578
|
-
path:
|
|
1579
|
-
expected:
|
|
1580
|
-
actual:
|
|
1581
|
-
diffLines:
|
|
1582
|
-
renamedFrom:
|
|
1686
|
+
content: z31.string(),
|
|
1687
|
+
lineNumber: z31.number()
|
|
1688
|
+
});
|
|
1689
|
+
var DiffContentSchema = z31.object({
|
|
1690
|
+
path: z31.string(),
|
|
1691
|
+
expected: z31.string(),
|
|
1692
|
+
actual: z31.string(),
|
|
1693
|
+
diffLines: z31.array(DiffLineSchema),
|
|
1694
|
+
renamedFrom: z31.string().optional(),
|
|
1583
1695
|
/** Whether this file is an infrastructure/config file (e.g. .claude/settings.json, .mcp.json) */
|
|
1584
|
-
isInfrastructure:
|
|
1696
|
+
isInfrastructure: z31.boolean().optional()
|
|
1585
1697
|
});
|
|
1586
|
-
var CommandExecutionSchema =
|
|
1587
|
-
command:
|
|
1588
|
-
exitCode:
|
|
1589
|
-
output:
|
|
1590
|
-
duration:
|
|
1698
|
+
var CommandExecutionSchema = z31.object({
|
|
1699
|
+
command: z31.string(),
|
|
1700
|
+
exitCode: z31.number(),
|
|
1701
|
+
output: z31.string().optional(),
|
|
1702
|
+
duration: z31.number()
|
|
1591
1703
|
});
|
|
1592
|
-
var FileModificationSchema =
|
|
1593
|
-
path:
|
|
1594
|
-
action:
|
|
1704
|
+
var FileModificationSchema = z31.object({
|
|
1705
|
+
path: z31.string(),
|
|
1706
|
+
action: z31.enum(["created", "modified", "deleted"])
|
|
1595
1707
|
});
|
|
1596
1708
|
var TemplateFileStatus = /* @__PURE__ */ ((TemplateFileStatus2) => {
|
|
1597
1709
|
TemplateFileStatus2["NEW"] = "new";
|
|
@@ -1599,58 +1711,58 @@ var TemplateFileStatus = /* @__PURE__ */ ((TemplateFileStatus2) => {
|
|
|
1599
1711
|
TemplateFileStatus2["UNCHANGED"] = "unchanged";
|
|
1600
1712
|
return TemplateFileStatus2;
|
|
1601
1713
|
})(TemplateFileStatus || {});
|
|
1602
|
-
var TemplateFileSchema =
|
|
1714
|
+
var TemplateFileSchema = z31.object({
|
|
1603
1715
|
/** Relative path within the template */
|
|
1604
|
-
path:
|
|
1716
|
+
path: z31.string(),
|
|
1605
1717
|
/** Full file content after execution */
|
|
1606
|
-
content:
|
|
1718
|
+
content: z31.string(),
|
|
1607
1719
|
/** File status (new, modified, unchanged) */
|
|
1608
|
-
status:
|
|
1720
|
+
status: z31.enum(["new", "modified", "unchanged"]),
|
|
1609
1721
|
/** Whether this file is an infrastructure/config file (e.g. .claude/settings.json, .mcp.json) */
|
|
1610
|
-
isInfrastructure:
|
|
1722
|
+
isInfrastructure: z31.boolean().optional()
|
|
1611
1723
|
});
|
|
1612
|
-
var ApiCallSchema =
|
|
1613
|
-
endpoint:
|
|
1614
|
-
tokensUsed:
|
|
1615
|
-
duration:
|
|
1724
|
+
var ApiCallSchema = z31.object({
|
|
1725
|
+
endpoint: z31.string(),
|
|
1726
|
+
tokensUsed: z31.number(),
|
|
1727
|
+
duration: z31.number()
|
|
1616
1728
|
});
|
|
1617
|
-
var ExecutionTraceSchema =
|
|
1618
|
-
commands:
|
|
1619
|
-
filesModified:
|
|
1620
|
-
apiCalls:
|
|
1621
|
-
totalDuration:
|
|
1729
|
+
var ExecutionTraceSchema = z31.object({
|
|
1730
|
+
commands: z31.array(CommandExecutionSchema),
|
|
1731
|
+
filesModified: z31.array(FileModificationSchema),
|
|
1732
|
+
apiCalls: z31.array(ApiCallSchema),
|
|
1733
|
+
totalDuration: z31.number()
|
|
1622
1734
|
});
|
|
1623
|
-
var RunAnalysisFindingSchema =
|
|
1624
|
-
category:
|
|
1735
|
+
var RunAnalysisFindingSchema = z31.object({
|
|
1736
|
+
category: z31.enum([
|
|
1625
1737
|
"failure_pattern",
|
|
1626
1738
|
"cost_waste",
|
|
1627
1739
|
"flakiness",
|
|
1628
1740
|
"inefficiency",
|
|
1629
1741
|
"positive"
|
|
1630
1742
|
]),
|
|
1631
|
-
severity:
|
|
1632
|
-
description:
|
|
1633
|
-
affectedScenarios:
|
|
1634
|
-
recommendation:
|
|
1743
|
+
severity: z31.enum(["high", "medium", "low"]),
|
|
1744
|
+
description: z31.string(),
|
|
1745
|
+
affectedScenarios: z31.array(z31.string()),
|
|
1746
|
+
recommendation: z31.string().optional()
|
|
1635
1747
|
});
|
|
1636
|
-
var RunAnalysisSchema =
|
|
1637
|
-
generatedAt:
|
|
1638
|
-
summary:
|
|
1639
|
-
findings:
|
|
1748
|
+
var RunAnalysisSchema = z31.object({
|
|
1749
|
+
generatedAt: z31.string(),
|
|
1750
|
+
summary: z31.string(),
|
|
1751
|
+
findings: z31.array(RunAnalysisFindingSchema)
|
|
1640
1752
|
});
|
|
1641
1753
|
var EvalRunSchema = TenantEntitySchema.extend({
|
|
1642
1754
|
/** Agent ID for this run */
|
|
1643
|
-
agentId:
|
|
1755
|
+
agentId: z31.string().optional(),
|
|
1644
1756
|
/** Preset ID that originated this run (optional) */
|
|
1645
|
-
presetId:
|
|
1757
|
+
presetId: z31.string().optional(),
|
|
1646
1758
|
/** Scenario IDs to run (always present — resolved server-side from tags when needed) */
|
|
1647
|
-
scenarioIds:
|
|
1759
|
+
scenarioIds: z31.array(z31.string()),
|
|
1648
1760
|
/** Current status */
|
|
1649
1761
|
status: EvalStatusSchema,
|
|
1650
1762
|
/** Progress percentage (0-100) */
|
|
1651
|
-
progress:
|
|
1763
|
+
progress: z31.number(),
|
|
1652
1764
|
/** Results for each scenario/target combination (lazy to break eval-result ↔ eval-run cycle) */
|
|
1653
|
-
results:
|
|
1765
|
+
results: z31.array(z31.lazy(() => EvalRunResultSchema)),
|
|
1654
1766
|
/** Aggregated metrics across all results */
|
|
1655
1767
|
aggregateMetrics: EvalMetricsSchema,
|
|
1656
1768
|
/** Aggregated LLM trace summary */
|
|
@@ -1658,49 +1770,49 @@ var EvalRunSchema = TenantEntitySchema.extend({
|
|
|
1658
1770
|
/** What triggered this run */
|
|
1659
1771
|
trigger: TriggerSchema.optional(),
|
|
1660
1772
|
/** When the run started (set when evaluation is triggered) */
|
|
1661
|
-
startedAt:
|
|
1773
|
+
startedAt: z31.string().optional(),
|
|
1662
1774
|
/** When the run completed */
|
|
1663
|
-
completedAt:
|
|
1775
|
+
completedAt: z31.string().optional(),
|
|
1664
1776
|
/** Live trace events captured during execution (for playback on results page) */
|
|
1665
|
-
liveTraceEvents:
|
|
1777
|
+
liveTraceEvents: z31.array(LiveTraceEventSchema).optional(),
|
|
1666
1778
|
/** Remote job ID for tracking execution in Dev Machines */
|
|
1667
|
-
jobId:
|
|
1779
|
+
jobId: z31.string().optional(),
|
|
1668
1780
|
/** Remote job status from the Dev Machine API (PENDING, RUNNING, COMPLETED, FAILED, CANCELLED) */
|
|
1669
|
-
jobStatus:
|
|
1781
|
+
jobStatus: z31.string().optional(),
|
|
1670
1782
|
/** Remote job error message if the job failed */
|
|
1671
|
-
jobError:
|
|
1783
|
+
jobError: z31.string().optional(),
|
|
1672
1784
|
/** Timestamp of the last job status check */
|
|
1673
|
-
jobStatusCheckedAt:
|
|
1785
|
+
jobStatusCheckedAt: z31.string().optional(),
|
|
1674
1786
|
/** Unified capability IDs */
|
|
1675
|
-
capabilityIds:
|
|
1787
|
+
capabilityIds: z31.array(z31.string()).optional(),
|
|
1676
1788
|
/** Map of capabilityId to capabilityVersionId for version pinning */
|
|
1677
|
-
capabilityVersions:
|
|
1789
|
+
capabilityVersions: z31.record(z31.string(), z31.string()).optional(),
|
|
1678
1790
|
/** Tags used to select scenarios for this run (for traceability) */
|
|
1679
|
-
tags:
|
|
1791
|
+
tags: z31.array(z31.string()).optional(),
|
|
1680
1792
|
/** How many times each scenario is executed within this eval run. Default: 1. Max: 20. */
|
|
1681
|
-
runsPerScenario:
|
|
1793
|
+
runsPerScenario: z31.number().int().min(1).max(20).optional(),
|
|
1682
1794
|
/** Variable values to substitute in scenario trigger prompts at runtime */
|
|
1683
|
-
variables:
|
|
1795
|
+
variables: z31.record(z31.string(), z31.string()).optional(),
|
|
1684
1796
|
/** Snapshot of agent configuration captured at run creation time */
|
|
1685
|
-
agentSnapshot:
|
|
1686
|
-
name:
|
|
1797
|
+
agentSnapshot: z31.object({
|
|
1798
|
+
name: z31.string().optional(),
|
|
1687
1799
|
agentType: AgentTypeSchema.optional(),
|
|
1688
1800
|
runCommand: AgentRunCommandSchema.optional(),
|
|
1689
|
-
systemPrompt:
|
|
1801
|
+
systemPrompt: z31.string().nullable().optional(),
|
|
1690
1802
|
/** @deprecated retained for backward compat with stored snapshots */
|
|
1691
1803
|
modelConfig: ModelConfigSchema.optional(),
|
|
1692
|
-
config:
|
|
1804
|
+
config: z31.record(z31.string(), z31.unknown()).optional()
|
|
1693
1805
|
}).optional(),
|
|
1694
1806
|
/** UUID linking all runs in a comparison group */
|
|
1695
|
-
comparisonGroupId:
|
|
1807
|
+
comparisonGroupId: z31.string().optional(),
|
|
1696
1808
|
/** Human-readable label for this variant (e.g., "MCP: Wix Stores") */
|
|
1697
|
-
comparisonLabel:
|
|
1809
|
+
comparisonLabel: z31.string().optional(),
|
|
1698
1810
|
/** LLM-generated analysis of the completed run */
|
|
1699
1811
|
runAnalysis: RunAnalysisSchema.optional(),
|
|
1700
1812
|
/** IDs of folders this run belongs to (read-only, managed via AddRunToFolder / RemoveRunFromFolder) */
|
|
1701
|
-
folderIds:
|
|
1813
|
+
folderIds: z31.array(z31.string()).optional(),
|
|
1702
1814
|
/** ID of the schedule that triggered this run, if any (read-only) */
|
|
1703
|
-
scheduleId:
|
|
1815
|
+
scheduleId: z31.string().optional()
|
|
1704
1816
|
});
|
|
1705
1817
|
var CreateEvalRunInputSchema = EvalRunSchema.omit({
|
|
1706
1818
|
id: true,
|
|
@@ -1716,60 +1828,60 @@ var CreateEvalRunInputSchema = EvalRunSchema.omit({
|
|
|
1716
1828
|
agentSnapshot: true
|
|
1717
1829
|
}).extend({
|
|
1718
1830
|
/** Optional on input — backend resolves from tags when not provided */
|
|
1719
|
-
scenarioIds:
|
|
1831
|
+
scenarioIds: z31.array(z31.string()).optional()
|
|
1720
1832
|
}).refine(
|
|
1721
1833
|
(data) => data.scenarioIds && data.scenarioIds.length > 0 || data.tags && data.tags.length > 0,
|
|
1722
1834
|
{ message: "Either scenarioIds or tags must be provided" }
|
|
1723
1835
|
);
|
|
1724
|
-
var EvaluationProgressSchema =
|
|
1725
|
-
runId:
|
|
1726
|
-
targetId:
|
|
1727
|
-
totalScenarios:
|
|
1728
|
-
completedScenarios:
|
|
1729
|
-
scenarioProgress:
|
|
1730
|
-
|
|
1731
|
-
scenarioId:
|
|
1732
|
-
currentStep:
|
|
1733
|
-
error:
|
|
1836
|
+
var EvaluationProgressSchema = z31.object({
|
|
1837
|
+
runId: z31.string(),
|
|
1838
|
+
targetId: z31.string(),
|
|
1839
|
+
totalScenarios: z31.number(),
|
|
1840
|
+
completedScenarios: z31.number(),
|
|
1841
|
+
scenarioProgress: z31.array(
|
|
1842
|
+
z31.object({
|
|
1843
|
+
scenarioId: z31.string(),
|
|
1844
|
+
currentStep: z31.string(),
|
|
1845
|
+
error: z31.string().optional()
|
|
1734
1846
|
})
|
|
1735
1847
|
),
|
|
1736
|
-
createdAt:
|
|
1737
|
-
});
|
|
1738
|
-
var EvaluationLogSchema =
|
|
1739
|
-
runId:
|
|
1740
|
-
scenarioId:
|
|
1741
|
-
log:
|
|
1742
|
-
level:
|
|
1743
|
-
message:
|
|
1744
|
-
args:
|
|
1745
|
-
error:
|
|
1848
|
+
createdAt: z31.number()
|
|
1849
|
+
});
|
|
1850
|
+
var EvaluationLogSchema = z31.object({
|
|
1851
|
+
runId: z31.string(),
|
|
1852
|
+
scenarioId: z31.string(),
|
|
1853
|
+
log: z31.object({
|
|
1854
|
+
level: z31.enum(["info", "error", "debug"]),
|
|
1855
|
+
message: z31.string().optional(),
|
|
1856
|
+
args: z31.array(z31.any()).optional(),
|
|
1857
|
+
error: z31.string().optional()
|
|
1746
1858
|
})
|
|
1747
1859
|
});
|
|
1748
1860
|
var LLM_TIMEOUT = 12e4;
|
|
1749
1861
|
|
|
1750
1862
|
// src/evaluation/conversation.ts
|
|
1751
|
-
import { z as
|
|
1752
|
-
var TextBlockSchema =
|
|
1753
|
-
type:
|
|
1754
|
-
text:
|
|
1755
|
-
});
|
|
1756
|
-
var ThinkingBlockSchema =
|
|
1757
|
-
type:
|
|
1758
|
-
thinking:
|
|
1759
|
-
});
|
|
1760
|
-
var ToolUseBlockSchema =
|
|
1761
|
-
type:
|
|
1762
|
-
toolName:
|
|
1763
|
-
toolId:
|
|
1764
|
-
input:
|
|
1765
|
-
});
|
|
1766
|
-
var ToolResultBlockSchema =
|
|
1767
|
-
type:
|
|
1768
|
-
toolUseId:
|
|
1769
|
-
content:
|
|
1770
|
-
isError:
|
|
1771
|
-
});
|
|
1772
|
-
var ConversationBlockSchema =
|
|
1863
|
+
import { z as z32 } from "zod";
|
|
1864
|
+
var TextBlockSchema = z32.object({
|
|
1865
|
+
type: z32.literal("text"),
|
|
1866
|
+
text: z32.string()
|
|
1867
|
+
});
|
|
1868
|
+
var ThinkingBlockSchema = z32.object({
|
|
1869
|
+
type: z32.literal("thinking"),
|
|
1870
|
+
thinking: z32.string()
|
|
1871
|
+
});
|
|
1872
|
+
var ToolUseBlockSchema = z32.object({
|
|
1873
|
+
type: z32.literal("tool_use"),
|
|
1874
|
+
toolName: z32.string(),
|
|
1875
|
+
toolId: z32.string(),
|
|
1876
|
+
input: z32.unknown()
|
|
1877
|
+
});
|
|
1878
|
+
var ToolResultBlockSchema = z32.object({
|
|
1879
|
+
type: z32.literal("tool_result"),
|
|
1880
|
+
toolUseId: z32.string(),
|
|
1881
|
+
content: z32.string(),
|
|
1882
|
+
isError: z32.boolean().optional()
|
|
1883
|
+
});
|
|
1884
|
+
var ConversationBlockSchema = z32.discriminatedUnion("type", [
|
|
1773
1885
|
TextBlockSchema,
|
|
1774
1886
|
ThinkingBlockSchema,
|
|
1775
1887
|
ToolUseBlockSchema,
|
|
@@ -1780,22 +1892,22 @@ var ConversationMessageRoles = [
|
|
|
1780
1892
|
"user",
|
|
1781
1893
|
"system"
|
|
1782
1894
|
];
|
|
1783
|
-
var ConversationMessageSchema =
|
|
1784
|
-
role:
|
|
1785
|
-
content:
|
|
1786
|
-
timestamp:
|
|
1895
|
+
var ConversationMessageSchema = z32.object({
|
|
1896
|
+
role: z32.enum(ConversationMessageRoles),
|
|
1897
|
+
content: z32.array(ConversationBlockSchema),
|
|
1898
|
+
timestamp: z32.string()
|
|
1787
1899
|
});
|
|
1788
|
-
var ScenarioConversationSchema =
|
|
1789
|
-
id:
|
|
1790
|
-
projectId:
|
|
1791
|
-
evalRunId:
|
|
1792
|
-
resultId:
|
|
1793
|
-
messages:
|
|
1794
|
-
createdAt:
|
|
1900
|
+
var ScenarioConversationSchema = z32.object({
|
|
1901
|
+
id: z32.string(),
|
|
1902
|
+
projectId: z32.string(),
|
|
1903
|
+
evalRunId: z32.string(),
|
|
1904
|
+
resultId: z32.string(),
|
|
1905
|
+
messages: z32.array(ConversationMessageSchema),
|
|
1906
|
+
createdAt: z32.string()
|
|
1795
1907
|
});
|
|
1796
|
-
var ConversationResponseSchema =
|
|
1797
|
-
messages:
|
|
1798
|
-
isPartial:
|
|
1908
|
+
var ConversationResponseSchema = z32.object({
|
|
1909
|
+
messages: z32.array(ConversationMessageSchema),
|
|
1910
|
+
isPartial: z32.boolean()
|
|
1799
1911
|
});
|
|
1800
1912
|
|
|
1801
1913
|
// src/evaluation/eval-result.ts
|
|
@@ -1806,98 +1918,98 @@ var AssertionResultStatus = /* @__PURE__ */ ((AssertionResultStatus2) => {
|
|
|
1806
1918
|
AssertionResultStatus2["ERROR"] = "error";
|
|
1807
1919
|
return AssertionResultStatus2;
|
|
1808
1920
|
})(AssertionResultStatus || {});
|
|
1809
|
-
var AssertionResultSchema =
|
|
1810
|
-
id:
|
|
1811
|
-
assertionId:
|
|
1812
|
-
assertionType:
|
|
1813
|
-
assertionName:
|
|
1814
|
-
status:
|
|
1815
|
-
message:
|
|
1816
|
-
expected:
|
|
1817
|
-
actual:
|
|
1818
|
-
duration:
|
|
1819
|
-
details:
|
|
1820
|
-
llmTraceSteps:
|
|
1821
|
-
});
|
|
1822
|
-
var EvalRunResultSchema =
|
|
1823
|
-
id:
|
|
1824
|
-
targetId:
|
|
1825
|
-
targetName:
|
|
1921
|
+
var AssertionResultSchema = z33.object({
|
|
1922
|
+
id: z33.string(),
|
|
1923
|
+
assertionId: z33.string(),
|
|
1924
|
+
assertionType: z33.string(),
|
|
1925
|
+
assertionName: z33.string(),
|
|
1926
|
+
status: z33.enum(AssertionResultStatus),
|
|
1927
|
+
message: z33.string().optional(),
|
|
1928
|
+
expected: z33.string().optional(),
|
|
1929
|
+
actual: z33.string().optional(),
|
|
1930
|
+
duration: z33.number().optional(),
|
|
1931
|
+
details: z33.record(z33.string(), z33.unknown()).optional(),
|
|
1932
|
+
llmTraceSteps: z33.array(LLMTraceStepSchema).optional()
|
|
1933
|
+
});
|
|
1934
|
+
var EvalRunResultSchema = z33.object({
|
|
1935
|
+
id: z33.string(),
|
|
1936
|
+
targetId: z33.string(),
|
|
1937
|
+
targetName: z33.string().optional(),
|
|
1826
1938
|
/** SkillVersion ID used for this evaluation (for version tracking) */
|
|
1827
|
-
skillVersionId:
|
|
1939
|
+
skillVersionId: z33.string().optional(),
|
|
1828
1940
|
/** SkillVersion semver string (e.g., "1.0.0", "1.2.3") for display */
|
|
1829
|
-
skillVersion:
|
|
1830
|
-
scenarioId:
|
|
1831
|
-
scenarioName:
|
|
1941
|
+
skillVersion: z33.string().optional(),
|
|
1942
|
+
scenarioId: z33.string(),
|
|
1943
|
+
scenarioName: z33.string(),
|
|
1832
1944
|
/** Snapshot of the trigger prompt used during the run (prevents stale display after edits) */
|
|
1833
|
-
triggerPrompt:
|
|
1945
|
+
triggerPrompt: z33.string().optional(),
|
|
1834
1946
|
modelConfig: ModelConfigSchema.optional(),
|
|
1835
|
-
assertionResults:
|
|
1947
|
+
assertionResults: z33.array(AssertionResultSchema),
|
|
1836
1948
|
metrics: EvalMetricsSchema.optional(),
|
|
1837
|
-
passed:
|
|
1838
|
-
failed:
|
|
1839
|
-
passRate:
|
|
1840
|
-
duration:
|
|
1841
|
-
outputText:
|
|
1842
|
-
files:
|
|
1843
|
-
fileDiffs:
|
|
1949
|
+
passed: z33.number(),
|
|
1950
|
+
failed: z33.number(),
|
|
1951
|
+
passRate: z33.number(),
|
|
1952
|
+
duration: z33.number(),
|
|
1953
|
+
outputText: z33.string().optional(),
|
|
1954
|
+
files: z33.array(ExpectedFileSchema).optional(),
|
|
1955
|
+
fileDiffs: z33.array(DiffContentSchema).optional(),
|
|
1844
1956
|
/** Full template files after execution with status indicators */
|
|
1845
|
-
templateFiles:
|
|
1846
|
-
startedAt:
|
|
1847
|
-
completedAt:
|
|
1957
|
+
templateFiles: z33.array(TemplateFileSchema).optional(),
|
|
1958
|
+
startedAt: z33.string().optional(),
|
|
1959
|
+
completedAt: z33.string().optional(),
|
|
1848
1960
|
llmTrace: LLMTraceSchema.optional(),
|
|
1849
1961
|
/** Full conversation messages (only present in transit; stripped before DB storage) */
|
|
1850
|
-
conversation:
|
|
1962
|
+
conversation: z33.array(ConversationMessageSchema).optional(),
|
|
1851
1963
|
/** 0-based iteration index when a scenario is run multiple times within a single eval run */
|
|
1852
|
-
iterationIndex:
|
|
1853
|
-
});
|
|
1854
|
-
var PromptResultSchema =
|
|
1855
|
-
text:
|
|
1856
|
-
files:
|
|
1857
|
-
finishReason:
|
|
1858
|
-
reasoning:
|
|
1859
|
-
reasoningDetails:
|
|
1860
|
-
toolCalls:
|
|
1861
|
-
toolResults:
|
|
1862
|
-
warnings:
|
|
1863
|
-
sources:
|
|
1864
|
-
steps:
|
|
1865
|
-
generationTimeMs:
|
|
1866
|
-
prompt:
|
|
1867
|
-
systemPrompt:
|
|
1868
|
-
usage:
|
|
1869
|
-
totalTokens:
|
|
1870
|
-
totalMicrocentsSpent:
|
|
1964
|
+
iterationIndex: z33.number().int().min(0).optional()
|
|
1965
|
+
});
|
|
1966
|
+
var PromptResultSchema = z33.object({
|
|
1967
|
+
text: z33.string(),
|
|
1968
|
+
files: z33.array(z33.unknown()).optional(),
|
|
1969
|
+
finishReason: z33.string().optional(),
|
|
1970
|
+
reasoning: z33.string().optional(),
|
|
1971
|
+
reasoningDetails: z33.unknown().optional(),
|
|
1972
|
+
toolCalls: z33.array(z33.unknown()).optional(),
|
|
1973
|
+
toolResults: z33.array(z33.unknown()).optional(),
|
|
1974
|
+
warnings: z33.array(z33.unknown()).optional(),
|
|
1975
|
+
sources: z33.array(z33.unknown()).optional(),
|
|
1976
|
+
steps: z33.array(z33.unknown()),
|
|
1977
|
+
generationTimeMs: z33.number(),
|
|
1978
|
+
prompt: z33.string(),
|
|
1979
|
+
systemPrompt: z33.string(),
|
|
1980
|
+
usage: z33.object({
|
|
1981
|
+
totalTokens: z33.number().optional(),
|
|
1982
|
+
totalMicrocentsSpent: z33.number().optional()
|
|
1871
1983
|
})
|
|
1872
1984
|
});
|
|
1873
|
-
var EvaluationResultSchema =
|
|
1874
|
-
id:
|
|
1875
|
-
runId:
|
|
1876
|
-
timestamp:
|
|
1985
|
+
var EvaluationResultSchema = z33.object({
|
|
1986
|
+
id: z33.string(),
|
|
1987
|
+
runId: z33.string(),
|
|
1988
|
+
timestamp: z33.number(),
|
|
1877
1989
|
promptResult: PromptResultSchema,
|
|
1878
|
-
testResults:
|
|
1879
|
-
tags:
|
|
1880
|
-
feedback:
|
|
1881
|
-
score:
|
|
1882
|
-
suiteId:
|
|
1883
|
-
});
|
|
1884
|
-
var LeanEvaluationResultSchema =
|
|
1885
|
-
id:
|
|
1886
|
-
runId:
|
|
1887
|
-
timestamp:
|
|
1888
|
-
tags:
|
|
1889
|
-
scenarioId:
|
|
1890
|
-
scenarioVersion:
|
|
1891
|
-
targetId:
|
|
1892
|
-
targetVersion:
|
|
1893
|
-
suiteId:
|
|
1894
|
-
score:
|
|
1895
|
-
time:
|
|
1896
|
-
microcentsSpent:
|
|
1990
|
+
testResults: z33.array(z33.unknown()),
|
|
1991
|
+
tags: z33.array(z33.string()).optional(),
|
|
1992
|
+
feedback: z33.string().optional(),
|
|
1993
|
+
score: z33.number(),
|
|
1994
|
+
suiteId: z33.string().optional()
|
|
1995
|
+
});
|
|
1996
|
+
var LeanEvaluationResultSchema = z33.object({
|
|
1997
|
+
id: z33.string(),
|
|
1998
|
+
runId: z33.string(),
|
|
1999
|
+
timestamp: z33.number(),
|
|
2000
|
+
tags: z33.array(z33.string()).optional(),
|
|
2001
|
+
scenarioId: z33.string(),
|
|
2002
|
+
scenarioVersion: z33.number().optional(),
|
|
2003
|
+
targetId: z33.string(),
|
|
2004
|
+
targetVersion: z33.number().optional(),
|
|
2005
|
+
suiteId: z33.string().optional(),
|
|
2006
|
+
score: z33.number(),
|
|
2007
|
+
time: z33.number().optional(),
|
|
2008
|
+
microcentsSpent: z33.number().optional()
|
|
1897
2009
|
});
|
|
1898
2010
|
|
|
1899
2011
|
// src/evaluation/eval-run-folder.ts
|
|
1900
|
-
import { z as
|
|
2012
|
+
import { z as z34 } from "zod";
|
|
1901
2013
|
var EvalRunFolderSchema = TenantEntitySchema.extend({});
|
|
1902
2014
|
var CreateEvalRunFolderInputSchema = EvalRunFolderSchema.omit({
|
|
1903
2015
|
id: true,
|
|
@@ -1911,26 +2023,26 @@ var UpdateEvalRunFolderInputSchema = EvalRunFolderSchema.omit({
|
|
|
1911
2023
|
updatedAt: true,
|
|
1912
2024
|
deleted: true
|
|
1913
2025
|
}).partial();
|
|
1914
|
-
var EvalRunFolderMembershipSchema =
|
|
1915
|
-
folderId:
|
|
1916
|
-
evalRunId:
|
|
1917
|
-
projectId:
|
|
1918
|
-
createdAt:
|
|
2026
|
+
var EvalRunFolderMembershipSchema = z34.object({
|
|
2027
|
+
folderId: z34.string(),
|
|
2028
|
+
evalRunId: z34.string(),
|
|
2029
|
+
projectId: z34.string(),
|
|
2030
|
+
createdAt: z34.string()
|
|
1919
2031
|
});
|
|
1920
2032
|
|
|
1921
2033
|
// src/project/project.ts
|
|
1922
|
-
import { z as
|
|
2034
|
+
import { z as z35 } from "zod";
|
|
1923
2035
|
var ProjectSchema = BaseEntitySchema.extend({
|
|
1924
|
-
appId:
|
|
1925
|
-
scenarioTags:
|
|
2036
|
+
appId: z35.string().optional().describe("The ID of the app in Dev Center"),
|
|
2037
|
+
scenarioTags: z35.array(z35.string()).optional().describe("Project-level tag vocabulary for scenarios"),
|
|
1926
2038
|
/** Per-project Wix auth token (write-only — never returned in GET responses). null = clear. */
|
|
1927
|
-
wixAuthToken:
|
|
2039
|
+
wixAuthToken: z35.string().nullable().optional().describe("Wix auth token for CLI/MCP authentication (encrypted at rest)"),
|
|
1928
2040
|
/** Per-project Base44 auth file content (write-only — never returned in GET responses). null = clear. */
|
|
1929
|
-
base44AuthFile:
|
|
2041
|
+
base44AuthFile: z35.string().nullable().optional().describe("Base64-encoded Base44 auth file content (encrypted at rest)"),
|
|
1930
2042
|
/** Resolved at runtime from the encrypted Wix auth token */
|
|
1931
|
-
wixAuthEmail:
|
|
2043
|
+
wixAuthEmail: z35.string().optional().describe("Email associated with the Wix auth token (resolved at runtime)"),
|
|
1932
2044
|
/** Resolved at runtime from the encrypted Base44 auth file */
|
|
1933
|
-
base44AuthEmail:
|
|
2045
|
+
base44AuthEmail: z35.string().optional().describe("Email from the Base44 auth file (resolved at runtime)")
|
|
1934
2046
|
});
|
|
1935
2047
|
var CreateProjectInputSchema = ProjectSchema.omit({
|
|
1936
2048
|
id: true,
|
|
@@ -1940,34 +2052,34 @@ var CreateProjectInputSchema = ProjectSchema.omit({
|
|
|
1940
2052
|
wixAuthEmail: true,
|
|
1941
2053
|
base44AuthEmail: true
|
|
1942
2054
|
}).extend({
|
|
1943
|
-
appId:
|
|
2055
|
+
appId: z35.string().describe(
|
|
1944
2056
|
"Required: The ID of the app in Dev Center for credential scoping"
|
|
1945
2057
|
)
|
|
1946
2058
|
});
|
|
1947
2059
|
var UpdateProjectInputSchema = CreateProjectInputSchema.partial();
|
|
1948
2060
|
|
|
1949
2061
|
// src/template/template.ts
|
|
1950
|
-
import { z as
|
|
1951
|
-
var SourceFileSchema =
|
|
1952
|
-
path:
|
|
1953
|
-
content:
|
|
2062
|
+
import { z as z36 } from "zod";
|
|
2063
|
+
var SourceFileSchema = z36.object({
|
|
2064
|
+
path: z36.string().min(1),
|
|
2065
|
+
content: z36.string()
|
|
1954
2066
|
});
|
|
1955
|
-
var ExtraFileSchema =
|
|
1956
|
-
path:
|
|
1957
|
-
content:
|
|
2067
|
+
var ExtraFileSchema = z36.object({
|
|
2068
|
+
path: z36.string().min(1),
|
|
2069
|
+
content: z36.string().optional(),
|
|
1958
2070
|
gitSource: GitHubSourceSchema.optional()
|
|
1959
2071
|
}).refine((ef) => ef.content !== void 0 || ef.gitSource !== void 0, {
|
|
1960
2072
|
message: "ExtraFile must have either content or gitSource"
|
|
1961
2073
|
});
|
|
1962
|
-
var TemplateFileEntrySchema =
|
|
1963
|
-
path:
|
|
1964
|
-
content:
|
|
1965
|
-
extra:
|
|
2074
|
+
var TemplateFileEntrySchema = z36.object({
|
|
2075
|
+
path: z36.string().min(1),
|
|
2076
|
+
content: z36.string(),
|
|
2077
|
+
extra: z36.boolean()
|
|
1966
2078
|
});
|
|
1967
2079
|
var TemplateSchema = TenantEntitySchema.extend({
|
|
1968
2080
|
source: GitHubSourceSchema.optional(),
|
|
1969
|
-
sourceFiles:
|
|
1970
|
-
extraFiles:
|
|
2081
|
+
sourceFiles: z36.array(SourceFileSchema).optional(),
|
|
2082
|
+
extraFiles: z36.array(ExtraFileSchema).optional()
|
|
1971
2083
|
});
|
|
1972
2084
|
var singleSourceKind = (t) => !(t.source && t.sourceFiles?.length);
|
|
1973
2085
|
var singleSourceKindError = {
|
|
@@ -1987,66 +2099,66 @@ var UpdateTemplateInputSchema = TemplateSchema.omit({
|
|
|
1987
2099
|
}).partial().refine(singleSourceKind, singleSourceKindError);
|
|
1988
2100
|
|
|
1989
2101
|
// src/agent/agent-config.ts
|
|
1990
|
-
import { z as
|
|
1991
|
-
var BaseAgentConfigSchema =
|
|
2102
|
+
import { z as z37 } from "zod";
|
|
2103
|
+
var BaseAgentConfigSchema = z37.object({
|
|
1992
2104
|
/** Model ID (Claude or OpenAI). */
|
|
1993
2105
|
model: AnyModelSchema.optional(),
|
|
1994
2106
|
/** Sampling temperature (0–1). */
|
|
1995
|
-
temperature:
|
|
2107
|
+
temperature: z37.number().min(0).max(1).optional(),
|
|
1996
2108
|
/** Max output tokens per turn. */
|
|
1997
|
-
maxTokens:
|
|
2109
|
+
maxTokens: z37.number().int().min(1).optional(),
|
|
1998
2110
|
/** Number of agentic turns. 0 = unlimited. */
|
|
1999
|
-
maxTurns:
|
|
2111
|
+
maxTurns: z37.number().int().min(0).optional(),
|
|
2000
2112
|
/** Execution timeout in milliseconds. Overrides the default maxTurns-based calculation. */
|
|
2001
|
-
maxDurationMs:
|
|
2113
|
+
maxDurationMs: z37.number().int().min(0).optional()
|
|
2002
2114
|
});
|
|
2003
|
-
var EffortLevelSchema =
|
|
2115
|
+
var EffortLevelSchema = z37.enum(["low", "medium", "high", "max"]);
|
|
2004
2116
|
var ClaudeCodeConfigSchema = BaseAgentConfigSchema.extend({
|
|
2005
2117
|
/** Extended thinking token budget. */
|
|
2006
|
-
maxThinkingTokens:
|
|
2118
|
+
maxThinkingTokens: z37.number().int().min(0).optional(),
|
|
2007
2119
|
/** Override the default allowedTools list passed to the SDK. */
|
|
2008
|
-
allowedTools:
|
|
2120
|
+
allowedTools: z37.array(z37.string()).optional(),
|
|
2009
2121
|
/** Tools to remove from the model's context entirely. */
|
|
2010
|
-
disallowedTools:
|
|
2122
|
+
disallowedTools: z37.array(z37.string()).optional(),
|
|
2011
2123
|
/** Controls thinking depth: low, medium, high, max. */
|
|
2012
2124
|
effort: EffortLevelSchema.optional(),
|
|
2013
2125
|
/** Maximum USD spend per run. Stops execution when reached. */
|
|
2014
|
-
maxBudgetUsd:
|
|
2126
|
+
maxBudgetUsd: z37.number().min(0).optional()
|
|
2015
2127
|
});
|
|
2016
|
-
var PermissionValueSchema =
|
|
2017
|
-
var OpenCodePermissionSchema =
|
|
2018
|
-
|
|
2019
|
-
|
|
2128
|
+
var PermissionValueSchema = z37.enum(["allow", "deny"]);
|
|
2129
|
+
var OpenCodePermissionSchema = z37.record(
|
|
2130
|
+
z37.string(),
|
|
2131
|
+
z37.union([PermissionValueSchema, z37.record(z37.string(), PermissionValueSchema)])
|
|
2020
2132
|
);
|
|
2021
|
-
var ThinkingVariantSchema =
|
|
2133
|
+
var ThinkingVariantSchema = z37.enum(["high", "low", "none"]);
|
|
2022
2134
|
var OpenCodeConfigSchema = BaseAgentConfigSchema.extend({
|
|
2023
2135
|
/** Permission overrides (defaults: allow-all). */
|
|
2024
2136
|
permission: OpenCodePermissionSchema.optional(),
|
|
2025
2137
|
/** Maps to `--variant` CLI flag. 'none' omits --thinking entirely. Default: 'high'. */
|
|
2026
2138
|
thinkingVariant: ThinkingVariantSchema.optional(),
|
|
2027
2139
|
/** Nucleus sampling (0–1). Alternative to temperature. */
|
|
2028
|
-
topP:
|
|
2140
|
+
topP: z37.number().min(0).max(1).optional()
|
|
2029
2141
|
}).omit({ maxTokens: true });
|
|
2030
|
-
var ReasoningEffortSchema =
|
|
2142
|
+
var ReasoningEffortSchema = z37.enum(["low", "medium", "high"]);
|
|
2031
2143
|
var SimpleAgentConfigSchema = BaseAgentConfigSchema.extend({
|
|
2032
2144
|
/** Anthropic thinking budget in tokens. Default: 10 000. */
|
|
2033
|
-
thinkingBudgetTokens:
|
|
2145
|
+
thinkingBudgetTokens: z37.number().int().min(0).optional(),
|
|
2034
2146
|
/** Nucleus sampling (0–1). Alternative to temperature. */
|
|
2035
|
-
topP:
|
|
2147
|
+
topP: z37.number().min(0).max(1).optional(),
|
|
2036
2148
|
/** Integer seed for deterministic/reproducible results (if model supports it). */
|
|
2037
|
-
seed:
|
|
2149
|
+
seed: z37.number().int().optional(),
|
|
2038
2150
|
/** Stop sequences — model stops when generating any of these strings. */
|
|
2039
|
-
stopSequences:
|
|
2151
|
+
stopSequences: z37.array(z37.string()).optional(),
|
|
2040
2152
|
/** OpenAI reasoning effort level. Default: 'high'. */
|
|
2041
2153
|
reasoningEffort: ReasoningEffortSchema.optional(),
|
|
2042
2154
|
/** Frequency penalty (−2 to 2). Reduces repetition of same tokens. */
|
|
2043
|
-
frequencyPenalty:
|
|
2155
|
+
frequencyPenalty: z37.number().min(-2).max(2).optional(),
|
|
2044
2156
|
/** Presence penalty (−2 to 2). Encourages topic diversity. */
|
|
2045
|
-
presencePenalty:
|
|
2157
|
+
presencePenalty: z37.number().min(-2).max(2).optional()
|
|
2046
2158
|
});
|
|
2047
2159
|
|
|
2048
2160
|
// src/schedule/eval-schedule.ts
|
|
2049
|
-
import { z as
|
|
2161
|
+
import { z as z38 } from "zod";
|
|
2050
2162
|
var FrequencyType = /* @__PURE__ */ ((FrequencyType2) => {
|
|
2051
2163
|
FrequencyType2["DAILY"] = "daily";
|
|
2052
2164
|
FrequencyType2["WEEKDAY"] = "weekday";
|
|
@@ -2056,31 +2168,31 @@ var FrequencyType = /* @__PURE__ */ ((FrequencyType2) => {
|
|
|
2056
2168
|
})(FrequencyType || {});
|
|
2057
2169
|
var EvalScheduleSchema = TenantEntitySchema.extend({
|
|
2058
2170
|
/** Whether the schedule is active */
|
|
2059
|
-
enabled:
|
|
2171
|
+
enabled: z38.boolean(),
|
|
2060
2172
|
/** Test suite to run */
|
|
2061
|
-
suiteId:
|
|
2173
|
+
suiteId: z38.string(),
|
|
2062
2174
|
/** Preset that provides agent + entities for this schedule */
|
|
2063
|
-
presetId:
|
|
2175
|
+
presetId: z38.string(),
|
|
2064
2176
|
/** How often to run */
|
|
2065
|
-
frequencyType:
|
|
2177
|
+
frequencyType: z38.nativeEnum(FrequencyType),
|
|
2066
2178
|
/** Time of day in 24h format (HH:MM), hours 00-23, minutes 00-59 */
|
|
2067
|
-
timeOfDay:
|
|
2179
|
+
timeOfDay: z38.string().regex(/^([01]\d|2[0-3]):[0-5]\d$/),
|
|
2068
2180
|
/** Day of week (0=Sun, 6=Sat) for weekly schedules */
|
|
2069
|
-
dayOfWeek:
|
|
2181
|
+
dayOfWeek: z38.number().min(0).max(6).optional(),
|
|
2070
2182
|
/** Day of month (1-31) for monthly schedules */
|
|
2071
|
-
dayOfMonth:
|
|
2183
|
+
dayOfMonth: z38.number().min(1).max(31).optional(),
|
|
2072
2184
|
/** IANA timezone (e.g., 'America/New_York') */
|
|
2073
|
-
timezone:
|
|
2185
|
+
timezone: z38.string(),
|
|
2074
2186
|
/** ID of the last eval run created by this schedule */
|
|
2075
|
-
lastRunId:
|
|
2187
|
+
lastRunId: z38.string().optional(),
|
|
2076
2188
|
/** Denormalized status of the last run */
|
|
2077
|
-
lastRunStatus:
|
|
2189
|
+
lastRunStatus: z38.string().optional(),
|
|
2078
2190
|
/** ISO timestamp of the last run */
|
|
2079
|
-
lastRunAt:
|
|
2191
|
+
lastRunAt: z38.string().optional(),
|
|
2080
2192
|
/** Next scheduled run time in UTC (pre-computed for efficient querying, set by backend) */
|
|
2081
|
-
nextRunAt:
|
|
2193
|
+
nextRunAt: z38.string().optional(),
|
|
2082
2194
|
/** Per-scenario variable values forwarded to runs triggered by this schedule (scenarioId → varName → value) */
|
|
2083
|
-
variables:
|
|
2195
|
+
variables: z38.record(z38.string(), z38.record(z38.string(), z38.string())).optional()
|
|
2084
2196
|
});
|
|
2085
2197
|
function isValidTimezone(tz) {
|
|
2086
2198
|
try {
|
|
@@ -2093,14 +2205,14 @@ function isValidTimezone(tz) {
|
|
|
2093
2205
|
function validateScheduleFields(data, ctx, options) {
|
|
2094
2206
|
if (data.frequencyType === "weekly" /* WEEKLY */ && data.dayOfWeek == null) {
|
|
2095
2207
|
ctx.addIssue({
|
|
2096
|
-
code:
|
|
2208
|
+
code: z38.ZodIssueCode.custom,
|
|
2097
2209
|
message: "dayOfWeek is required for weekly schedules",
|
|
2098
2210
|
path: ["dayOfWeek"]
|
|
2099
2211
|
});
|
|
2100
2212
|
}
|
|
2101
2213
|
if (data.frequencyType === "monthly" /* MONTHLY */ && data.dayOfMonth == null) {
|
|
2102
2214
|
ctx.addIssue({
|
|
2103
|
-
code:
|
|
2215
|
+
code: z38.ZodIssueCode.custom,
|
|
2104
2216
|
message: "dayOfMonth is required for monthly schedules",
|
|
2105
2217
|
path: ["dayOfMonth"]
|
|
2106
2218
|
});
|
|
@@ -2108,7 +2220,7 @@ function validateScheduleFields(data, ctx, options) {
|
|
|
2108
2220
|
const shouldValidateTz = options.partial ? data.timezone !== void 0 : true;
|
|
2109
2221
|
if (shouldValidateTz && !isValidTimezone(data.timezone)) {
|
|
2110
2222
|
ctx.addIssue({
|
|
2111
|
-
code:
|
|
2223
|
+
code: z38.ZodIssueCode.custom,
|
|
2112
2224
|
message: "Invalid IANA timezone",
|
|
2113
2225
|
path: ["timezone"]
|
|
2114
2226
|
});
|
|
@@ -2271,6 +2383,7 @@ export {
|
|
|
2271
2383
|
PresetSchema,
|
|
2272
2384
|
ProjectSchema,
|
|
2273
2385
|
PromptResultSchema,
|
|
2386
|
+
ProvisionedSiteSchema,
|
|
2274
2387
|
RUN_COMMAND_LABELS,
|
|
2275
2388
|
ReasoningEffortSchema,
|
|
2276
2389
|
RuleSchema,
|
|
@@ -2278,13 +2391,20 @@ export {
|
|
|
2278
2391
|
RunAnalysisFindingSchema,
|
|
2279
2392
|
RunAnalysisSchema,
|
|
2280
2393
|
SEMVER_REGEX,
|
|
2394
|
+
SITE_SETUP_EXCLUSIVE_VARIABLES,
|
|
2281
2395
|
SKILL_FOLDER_NAME_REGEX,
|
|
2282
2396
|
SYSTEM_ASSERTIONS,
|
|
2283
2397
|
SYSTEM_ASSERTION_IDS,
|
|
2284
2398
|
ScenarioAssertionLinkSchema,
|
|
2285
2399
|
ScenarioConversationSchema,
|
|
2286
2400
|
SimpleAgentConfigSchema,
|
|
2401
|
+
SiteBootstrapHttpMethodSchema,
|
|
2402
|
+
SiteBootstrapResultSchema,
|
|
2403
|
+
SiteBootstrapSchema,
|
|
2404
|
+
SiteBootstrapStepResultSchema,
|
|
2405
|
+
SiteBootstrapStepSchema,
|
|
2287
2406
|
SiteConfigTestSchema,
|
|
2407
|
+
SiteSetupConfigSchema,
|
|
2288
2408
|
SkillFileSchema,
|
|
2289
2409
|
SkillMetadataSchema,
|
|
2290
2410
|
SkillSchema,
|
|
@@ -2338,6 +2458,7 @@ export {
|
|
|
2338
2458
|
UpdateTestScenarioInputSchema,
|
|
2339
2459
|
UpdateTestSuiteInputSchema,
|
|
2340
2460
|
VitestTestSchema,
|
|
2461
|
+
WixSiteSummarySchema,
|
|
2341
2462
|
capabilityToMcp,
|
|
2342
2463
|
capabilityToRule,
|
|
2343
2464
|
capabilityToSkill,
|
|
@@ -2345,6 +2466,7 @@ export {
|
|
|
2345
2466
|
capabilityToSubAgent,
|
|
2346
2467
|
capabilityVersionToSkillVersion,
|
|
2347
2468
|
classifyAssertionRef,
|
|
2469
|
+
extractVariableNamesFromPrompt,
|
|
2348
2470
|
formatTraceEventLine,
|
|
2349
2471
|
getSystemAssertion,
|
|
2350
2472
|
getSystemAssertions,
|
|
@@ -2357,7 +2479,10 @@ export {
|
|
|
2357
2479
|
normalizeModelId,
|
|
2358
2480
|
parseBuildCommandToArgv,
|
|
2359
2481
|
parseTraceEventLine,
|
|
2482
|
+
promptUsesSiteSetupExclusiveVariables,
|
|
2483
|
+
resolveWixOriginTemplateId,
|
|
2360
2484
|
validateAssertionConfig,
|
|
2361
|
-
validateBuildPassedParamsInAssertionLinks
|
|
2485
|
+
validateBuildPassedParamsInAssertionLinks,
|
|
2486
|
+
validateSiteSetupExclusivity
|
|
2362
2487
|
};
|
|
2363
2488
|
//# sourceMappingURL=index.mjs.map
|