@wix/evalforge-types 0.91.0 → 0.92.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/build/index.js +586 -449
- package/build/index.js.map +4 -4
- package/build/index.mjs +572 -448
- package/build/index.mjs.map +4 -4
- package/build/types/scenario/index.d.ts +2 -0
- package/build/types/scenario/site-setup.d.ts +132 -0
- package/build/types/scenario/test-scenario.d.ts +120 -0
- package/build/types/scenario/wix-origin-template-ids.d.ts +5 -0
- package/package.json +2 -2
package/build/index.mjs
CHANGED
|
@@ -820,14 +820,65 @@ var EnvironmentSchema = z22.object({
|
|
|
820
820
|
metaSite: MetaSiteConfigSchema.optional()
|
|
821
821
|
});
|
|
822
822
|
|
|
823
|
+
// src/scenario/site-setup.ts
|
|
824
|
+
import { z as z23 } from "zod";
|
|
825
|
+
var SiteBootstrapHttpMethodSchema = z23.enum([
|
|
826
|
+
"get",
|
|
827
|
+
"post",
|
|
828
|
+
"put",
|
|
829
|
+
"patch",
|
|
830
|
+
"delete"
|
|
831
|
+
]);
|
|
832
|
+
var SiteBootstrapStepSchema = z23.object({
|
|
833
|
+
label: z23.string().optional(),
|
|
834
|
+
method: SiteBootstrapHttpMethodSchema,
|
|
835
|
+
url: z23.string().min(1),
|
|
836
|
+
body: z23.record(z23.string(), z23.unknown()).optional()
|
|
837
|
+
});
|
|
838
|
+
var SiteBootstrapSchema = z23.object({
|
|
839
|
+
steps: z23.array(SiteBootstrapStepSchema).default([])
|
|
840
|
+
});
|
|
841
|
+
var SiteBootstrapStepResultSchema = z23.object({
|
|
842
|
+
label: z23.string().optional(),
|
|
843
|
+
statusCode: z23.number().int(),
|
|
844
|
+
ok: z23.boolean(),
|
|
845
|
+
error: z23.string().optional()
|
|
846
|
+
});
|
|
847
|
+
var SiteBootstrapResultSchema = z23.object({
|
|
848
|
+
steps: z23.array(SiteBootstrapStepResultSchema)
|
|
849
|
+
});
|
|
850
|
+
var SiteSetupConfigSchema = z23.discriminatedUnion("mode", [
|
|
851
|
+
z23.object({ mode: z23.literal("none") }),
|
|
852
|
+
z23.object({
|
|
853
|
+
mode: z23.literal("clone"),
|
|
854
|
+
sourceSiteId: z23.string().min(1),
|
|
855
|
+
bootstrap: SiteBootstrapSchema.optional()
|
|
856
|
+
}),
|
|
857
|
+
z23.object({
|
|
858
|
+
mode: z23.literal("template"),
|
|
859
|
+
templateId: z23.string().min(1),
|
|
860
|
+
bootstrap: SiteBootstrapSchema.optional()
|
|
861
|
+
})
|
|
862
|
+
]);
|
|
863
|
+
var WixSiteSummarySchema = z23.object({
|
|
864
|
+
id: z23.string(),
|
|
865
|
+
displayName: z23.string(),
|
|
866
|
+
url: z23.string().optional()
|
|
867
|
+
});
|
|
868
|
+
var ProvisionedSiteSchema = z23.object({
|
|
869
|
+
id: z23.string(),
|
|
870
|
+
url: z23.string().optional(),
|
|
871
|
+
editorUrl: z23.string().optional()
|
|
872
|
+
});
|
|
873
|
+
|
|
823
874
|
// src/scenario/test-scenario.ts
|
|
824
|
-
import { z as
|
|
875
|
+
import { z as z26 } from "zod";
|
|
825
876
|
|
|
826
877
|
// src/assertion/assertion.ts
|
|
827
|
-
import { z as
|
|
878
|
+
import { z as z25 } from "zod";
|
|
828
879
|
|
|
829
880
|
// src/assertion/build-passed-command.ts
|
|
830
|
-
import { z as
|
|
881
|
+
import { z as z24 } from "zod";
|
|
831
882
|
var ALLOWED_BUILD_COMMANDS = [
|
|
832
883
|
"yarn build",
|
|
833
884
|
"npm run build",
|
|
@@ -853,10 +904,10 @@ function parseBuildCommandToArgv(command) {
|
|
|
853
904
|
return BUILD_COMMAND_ARGV[trimmed];
|
|
854
905
|
}
|
|
855
906
|
var enumTuple = ALLOWED_BUILD_COMMANDS;
|
|
856
|
-
var BuildPassedCommandStringSchema =
|
|
907
|
+
var BuildPassedCommandStringSchema = z24.enum(enumTuple);
|
|
857
908
|
|
|
858
909
|
// src/assertion/assertion.ts
|
|
859
|
-
var AssertionTypeSchema =
|
|
910
|
+
var AssertionTypeSchema = z25.enum([
|
|
860
911
|
"skill_was_called",
|
|
861
912
|
"tool_called_with_param",
|
|
862
913
|
"build_passed",
|
|
@@ -865,61 +916,61 @@ var AssertionTypeSchema = z24.enum([
|
|
|
865
916
|
"llm_judge",
|
|
866
917
|
"api_call"
|
|
867
918
|
]);
|
|
868
|
-
var AssertionParameterTypeSchema =
|
|
919
|
+
var AssertionParameterTypeSchema = z25.enum([
|
|
869
920
|
"string",
|
|
870
921
|
"number",
|
|
871
922
|
"boolean"
|
|
872
923
|
]);
|
|
873
|
-
var AssertionParameterSchema =
|
|
924
|
+
var AssertionParameterSchema = z25.object({
|
|
874
925
|
/** Parameter name (used as key in params object) */
|
|
875
|
-
name:
|
|
926
|
+
name: z25.string().min(1),
|
|
876
927
|
/** Display label for the parameter */
|
|
877
|
-
label:
|
|
928
|
+
label: z25.string().min(1),
|
|
878
929
|
/** Parameter type */
|
|
879
930
|
type: AssertionParameterTypeSchema,
|
|
880
931
|
/** Whether this parameter is required */
|
|
881
|
-
required:
|
|
932
|
+
required: z25.boolean(),
|
|
882
933
|
/** Default value (optional, used when not provided) */
|
|
883
|
-
defaultValue:
|
|
934
|
+
defaultValue: z25.union([z25.string(), z25.number(), z25.boolean()]).optional(),
|
|
884
935
|
/** If true, parameter is hidden by default behind "Show advanced options" */
|
|
885
|
-
advanced:
|
|
936
|
+
advanced: z25.boolean().optional()
|
|
886
937
|
});
|
|
887
|
-
var ScenarioAssertionLinkSchema =
|
|
938
|
+
var ScenarioAssertionLinkSchema = z25.object({
|
|
888
939
|
/** ID of the system assertion (e.g., 'system:skill_was_called') */
|
|
889
|
-
assertionId:
|
|
940
|
+
assertionId: z25.string(),
|
|
890
941
|
/** Parameter values for this assertion in this scenario */
|
|
891
|
-
params:
|
|
892
|
-
|
|
893
|
-
|
|
942
|
+
params: z25.record(
|
|
943
|
+
z25.string(),
|
|
944
|
+
z25.union([z25.string(), z25.number(), z25.boolean(), z25.null()])
|
|
894
945
|
).optional()
|
|
895
946
|
});
|
|
896
|
-
var SkillWasCalledConfigSchema =
|
|
947
|
+
var SkillWasCalledConfigSchema = z25.object({
|
|
897
948
|
/** Names of the skills that must have been called */
|
|
898
|
-
skillNames:
|
|
949
|
+
skillNames: z25.array(z25.string().min(1)).min(1)
|
|
899
950
|
});
|
|
900
|
-
var CostConfigSchema =
|
|
951
|
+
var CostConfigSchema = z25.strictObject({
|
|
901
952
|
/** Maximum allowed cost in USD */
|
|
902
|
-
maxCostUsd:
|
|
953
|
+
maxCostUsd: z25.number().positive()
|
|
903
954
|
});
|
|
904
|
-
var ToolCalledWithParamConfigSchema =
|
|
955
|
+
var ToolCalledWithParamConfigSchema = z25.strictObject({
|
|
905
956
|
/** Name of the tool that must have been called */
|
|
906
|
-
toolName:
|
|
957
|
+
toolName: z25.string().min(1),
|
|
907
958
|
/** JSON string of key-value pairs for expected parameters (substring match). Optional — when omitted, only checks tool presence. */
|
|
908
|
-
expectedParams:
|
|
959
|
+
expectedParams: z25.string().min(1).optional(),
|
|
909
960
|
/** If true, the matching tool call must also have succeeded (step.success === true) */
|
|
910
|
-
requireSuccess:
|
|
961
|
+
requireSuccess: z25.boolean().optional()
|
|
911
962
|
});
|
|
912
|
-
var BuildPassedConfigSchema =
|
|
963
|
+
var BuildPassedConfigSchema = z25.strictObject({
|
|
913
964
|
/** Allowlisted command only (default at runtime: "npm run build") */
|
|
914
965
|
command: BuildPassedCommandStringSchema.optional(),
|
|
915
966
|
/** Expected exit code (default: 0) */
|
|
916
|
-
expectedExitCode:
|
|
967
|
+
expectedExitCode: z25.number().int().optional()
|
|
917
968
|
});
|
|
918
|
-
var TimeConfigSchema =
|
|
969
|
+
var TimeConfigSchema = z25.strictObject({
|
|
919
970
|
/** Maximum allowed duration in milliseconds */
|
|
920
|
-
maxDurationMs:
|
|
971
|
+
maxDurationMs: z25.number().int().positive()
|
|
921
972
|
});
|
|
922
|
-
var LlmJudgeConfigSchema =
|
|
973
|
+
var LlmJudgeConfigSchema = z25.object({
|
|
923
974
|
/**
|
|
924
975
|
* Prompt template with placeholders:
|
|
925
976
|
* - {{output}}: agent's final output
|
|
@@ -930,65 +981,65 @@ var LlmJudgeConfigSchema = z24.object({
|
|
|
930
981
|
* - {{trace}}: step-by-step trace of tool calls
|
|
931
982
|
* - Custom parameters defined in the parameters array
|
|
932
983
|
*/
|
|
933
|
-
prompt:
|
|
984
|
+
prompt: z25.string().min(1),
|
|
934
985
|
/** Minimum score to pass (0-10, default 7) */
|
|
935
|
-
minScore:
|
|
986
|
+
minScore: z25.number().int().min(0).max(10).optional(),
|
|
936
987
|
/** Model for the judge (e.g. claude-3-5-haiku-20241022) */
|
|
937
|
-
model:
|
|
988
|
+
model: z25.string().optional(),
|
|
938
989
|
/** Max output tokens */
|
|
939
|
-
maxTokens:
|
|
990
|
+
maxTokens: z25.number().int().optional(),
|
|
940
991
|
/** Temperature (0-1) */
|
|
941
|
-
temperature:
|
|
992
|
+
temperature: z25.number().min(0).max(1).optional(),
|
|
942
993
|
/** User-defined parameters for this assertion */
|
|
943
|
-
parameters:
|
|
994
|
+
parameters: z25.array(AssertionParameterSchema).optional()
|
|
944
995
|
});
|
|
945
|
-
var ApiCallConfigSchema =
|
|
996
|
+
var ApiCallConfigSchema = z25.strictObject({
|
|
946
997
|
/** URL to call */
|
|
947
|
-
url:
|
|
998
|
+
url: z25.string().min(1),
|
|
948
999
|
/** HTTP method (default GET) */
|
|
949
|
-
method:
|
|
1000
|
+
method: z25.enum(["GET", "POST"]).optional(),
|
|
950
1001
|
/** Request body (JSON string, for POST requests) */
|
|
951
|
-
requestBody:
|
|
1002
|
+
requestBody: z25.string().optional(),
|
|
952
1003
|
/** Expected JSON response to validate against (subset match — extra fields in actual are OK) */
|
|
953
|
-
expectedResponse:
|
|
1004
|
+
expectedResponse: z25.string().min(1),
|
|
954
1005
|
/** Request headers as JSON string of key-value pairs */
|
|
955
|
-
requestHeaders:
|
|
1006
|
+
requestHeaders: z25.string().optional(),
|
|
956
1007
|
/** Request timeout in milliseconds (default 30000) */
|
|
957
|
-
timeoutMs:
|
|
1008
|
+
timeoutMs: z25.number().int().positive().optional()
|
|
958
1009
|
});
|
|
959
1010
|
var AssertionBaseFields = {
|
|
960
1011
|
/** When true, the assertion's pass/fail logic is inverted (NOT operator). */
|
|
961
|
-
negate:
|
|
1012
|
+
negate: z25.boolean().optional()
|
|
962
1013
|
};
|
|
963
1014
|
var SkillWasCalledAssertionSchema = SkillWasCalledConfigSchema.extend({
|
|
964
|
-
type:
|
|
1015
|
+
type: z25.literal("skill_was_called"),
|
|
965
1016
|
...AssertionBaseFields
|
|
966
1017
|
});
|
|
967
1018
|
var ToolCalledWithParamAssertionSchema = ToolCalledWithParamConfigSchema.extend({
|
|
968
|
-
type:
|
|
1019
|
+
type: z25.literal("tool_called_with_param"),
|
|
969
1020
|
...AssertionBaseFields
|
|
970
1021
|
});
|
|
971
1022
|
var BuildPassedAssertionSchema = BuildPassedConfigSchema.extend({
|
|
972
|
-
type:
|
|
1023
|
+
type: z25.literal("build_passed"),
|
|
973
1024
|
...AssertionBaseFields
|
|
974
1025
|
});
|
|
975
1026
|
var CostAssertionSchema = CostConfigSchema.extend({
|
|
976
|
-
type:
|
|
1027
|
+
type: z25.literal("cost"),
|
|
977
1028
|
...AssertionBaseFields
|
|
978
1029
|
});
|
|
979
1030
|
var LlmJudgeAssertionSchema = LlmJudgeConfigSchema.extend({
|
|
980
|
-
type:
|
|
1031
|
+
type: z25.literal("llm_judge"),
|
|
981
1032
|
...AssertionBaseFields
|
|
982
1033
|
});
|
|
983
1034
|
var ApiCallAssertionSchema = ApiCallConfigSchema.extend({
|
|
984
|
-
type:
|
|
1035
|
+
type: z25.literal("api_call"),
|
|
985
1036
|
...AssertionBaseFields
|
|
986
1037
|
});
|
|
987
1038
|
var TimeAssertionSchema = TimeConfigSchema.extend({
|
|
988
|
-
type:
|
|
1039
|
+
type: z25.literal("time_limit"),
|
|
989
1040
|
...AssertionBaseFields
|
|
990
1041
|
});
|
|
991
|
-
var AssertionSchema =
|
|
1042
|
+
var AssertionSchema = z25.union([
|
|
992
1043
|
SkillWasCalledAssertionSchema,
|
|
993
1044
|
ToolCalledWithParamAssertionSchema,
|
|
994
1045
|
BuildPassedAssertionSchema,
|
|
@@ -997,7 +1048,7 @@ var AssertionSchema = z24.union([
|
|
|
997
1048
|
LlmJudgeAssertionSchema,
|
|
998
1049
|
ApiCallAssertionSchema
|
|
999
1050
|
]);
|
|
1000
|
-
var AssertionConfigSchema =
|
|
1051
|
+
var AssertionConfigSchema = z25.union([
|
|
1001
1052
|
LlmJudgeConfigSchema,
|
|
1002
1053
|
// requires prompt - check first
|
|
1003
1054
|
SkillWasCalledConfigSchema,
|
|
@@ -1012,7 +1063,7 @@ var AssertionConfigSchema = z24.union([
|
|
|
1012
1063
|
// requires maxCostUsd, uses strictObject
|
|
1013
1064
|
BuildPassedConfigSchema,
|
|
1014
1065
|
// all optional, uses strictObject to reject unknown keys
|
|
1015
|
-
|
|
1066
|
+
z25.object({})
|
|
1016
1067
|
// fallback empty config
|
|
1017
1068
|
]);
|
|
1018
1069
|
function validateAssertionConfig(type, config) {
|
|
@@ -1258,36 +1309,67 @@ function getSystemAssertion(id) {
|
|
|
1258
1309
|
|
|
1259
1310
|
// src/scenario/test-scenario.ts
|
|
1260
1311
|
var MAX_IMAGE_BASE64_LENGTH = 4 * Math.ceil(2 * 1024 * 1024 / 3);
|
|
1261
|
-
var TriggerPromptImageSchema =
|
|
1312
|
+
var TriggerPromptImageSchema = z26.object({
|
|
1262
1313
|
/** Base64-encoded image data (no data URL prefix) */
|
|
1263
|
-
base64:
|
|
1314
|
+
base64: z26.string().max(MAX_IMAGE_BASE64_LENGTH, "Image exceeds 2 MB size limit"),
|
|
1264
1315
|
/** MIME type of the image */
|
|
1265
|
-
mediaType:
|
|
1316
|
+
mediaType: z26.enum(["image/jpeg", "image/png", "image/gif", "image/webp"]),
|
|
1266
1317
|
/** Original filename of the image */
|
|
1267
|
-
name:
|
|
1318
|
+
name: z26.string()
|
|
1268
1319
|
});
|
|
1269
|
-
var ExpectedFileSchema =
|
|
1320
|
+
var ExpectedFileSchema = z26.object({
|
|
1270
1321
|
/** Relative path where the file should be created */
|
|
1271
|
-
path:
|
|
1322
|
+
path: z26.string(),
|
|
1272
1323
|
/** Optional expected content */
|
|
1273
|
-
content:
|
|
1324
|
+
content: z26.string().optional()
|
|
1274
1325
|
});
|
|
1275
1326
|
var TestScenarioSchema = TenantEntitySchema.extend({
|
|
1276
1327
|
/** The prompt sent to the agent to trigger the task */
|
|
1277
|
-
triggerPrompt:
|
|
1328
|
+
triggerPrompt: z26.string().min(10),
|
|
1278
1329
|
/** ID of the template to use for this scenario (null = no template) */
|
|
1279
|
-
templateId:
|
|
1330
|
+
templateId: z26.string().nullish(),
|
|
1280
1331
|
/** Inline assertions to evaluate for this scenario (legacy) */
|
|
1281
|
-
assertions:
|
|
1332
|
+
assertions: z26.array(AssertionSchema).optional(),
|
|
1282
1333
|
/** IDs of saved assertions to evaluate (from assertions table) - legacy, use assertionLinks */
|
|
1283
|
-
assertionIds:
|
|
1334
|
+
assertionIds: z26.array(z26.string()).optional(),
|
|
1284
1335
|
/** Linked assertions with per-scenario parameter values */
|
|
1285
|
-
assertionLinks:
|
|
1336
|
+
assertionLinks: z26.array(ScenarioAssertionLinkSchema).optional(),
|
|
1286
1337
|
/** Tags for categorisation and filtering */
|
|
1287
|
-
tags:
|
|
1338
|
+
tags: z26.array(z26.string()).optional(),
|
|
1288
1339
|
/** Base64-encoded images attached to the trigger prompt (max 3) */
|
|
1289
|
-
triggerPromptImages:
|
|
1290
|
-
|
|
1340
|
+
triggerPromptImages: z26.array(TriggerPromptImageSchema).max(3).optional(),
|
|
1341
|
+
/** Optional per-scenario Wix site provisioning instructions. Absent ≡ no site. */
|
|
1342
|
+
siteSetup: SiteSetupConfigSchema.optional()
|
|
1343
|
+
});
|
|
1344
|
+
var SITE_SETUP_EXCLUSIVE_VARIABLES = ["site-id"];
|
|
1345
|
+
function extractVariableNamesFromPrompt(prompt) {
|
|
1346
|
+
const names = /* @__PURE__ */ new Set();
|
|
1347
|
+
for (const match of prompt.matchAll(/\{\{([\w-]+)\}\}/g)) {
|
|
1348
|
+
names.add(match[1]);
|
|
1349
|
+
}
|
|
1350
|
+
return [...names];
|
|
1351
|
+
}
|
|
1352
|
+
function promptUsesSiteSetupExclusiveVariables(prompt) {
|
|
1353
|
+
const names = extractVariableNamesFromPrompt(prompt);
|
|
1354
|
+
return SITE_SETUP_EXCLUSIVE_VARIABLES.some(
|
|
1355
|
+
(exclusive) => names.includes(exclusive)
|
|
1356
|
+
);
|
|
1357
|
+
}
|
|
1358
|
+
function hasActiveSiteSetup(siteSetup) {
|
|
1359
|
+
return siteSetup?.mode === "clone" || siteSetup?.mode === "template";
|
|
1360
|
+
}
|
|
1361
|
+
function validateSiteSetupExclusivity(data, ctx) {
|
|
1362
|
+
if (!hasActiveSiteSetup(data.siteSetup)) return;
|
|
1363
|
+
const prompt = data.triggerPrompt;
|
|
1364
|
+
if (prompt === void 0 || !promptUsesSiteSetupExclusiveVariables(prompt)) {
|
|
1365
|
+
return;
|
|
1366
|
+
}
|
|
1367
|
+
ctx.addIssue({
|
|
1368
|
+
code: z26.ZodIssueCode.custom,
|
|
1369
|
+
message: "Site setup and {{site-id}} run variables cannot be used together. Remove {{site-id}} from the trigger prompt or disable site setup.",
|
|
1370
|
+
path: ["triggerPrompt"]
|
|
1371
|
+
});
|
|
1372
|
+
}
|
|
1291
1373
|
function validateBuildPassedParamsInAssertionLinks(links, ctx) {
|
|
1292
1374
|
if (!links) return;
|
|
1293
1375
|
for (let i = 0; i < links.length; i++) {
|
|
@@ -1297,7 +1379,7 @@ function validateBuildPassedParamsInAssertionLinks(links, ctx) {
|
|
|
1297
1379
|
if (cmd === void 0 || cmd === null) continue;
|
|
1298
1380
|
if (typeof cmd !== "string") {
|
|
1299
1381
|
ctx.addIssue({
|
|
1300
|
-
code:
|
|
1382
|
+
code: z26.ZodIssueCode.custom,
|
|
1301
1383
|
message: "build_passed command must be a string",
|
|
1302
1384
|
path: ["assertionLinks", i, "params", "command"]
|
|
1303
1385
|
});
|
|
@@ -1305,7 +1387,7 @@ function validateBuildPassedParamsInAssertionLinks(links, ctx) {
|
|
|
1305
1387
|
}
|
|
1306
1388
|
if (!isAllowedBuildCommandString(cmd)) {
|
|
1307
1389
|
ctx.addIssue({
|
|
1308
|
-
code:
|
|
1390
|
+
code: z26.ZodIssueCode.custom,
|
|
1309
1391
|
message: "Invalid build_passed command. Allowed: yarn build, npm run build, pnpm run build, pnpm build",
|
|
1310
1392
|
path: ["assertionLinks", i, "params", "command"]
|
|
1311
1393
|
});
|
|
@@ -1320,27 +1402,56 @@ var TestScenarioCreateBaseSchema = TestScenarioSchema.omit({
|
|
|
1320
1402
|
});
|
|
1321
1403
|
var CreateTestScenarioInputSchema = TestScenarioCreateBaseSchema.superRefine((data, ctx) => {
|
|
1322
1404
|
validateBuildPassedParamsInAssertionLinks(data.assertionLinks, ctx);
|
|
1405
|
+
validateSiteSetupExclusivity(data, ctx);
|
|
1323
1406
|
});
|
|
1324
1407
|
var UpdateTestScenarioInputSchema = TestScenarioCreateBaseSchema.partial().superRefine((data, ctx) => {
|
|
1325
1408
|
if (data.assertionLinks !== void 0) {
|
|
1326
1409
|
validateBuildPassedParamsInAssertionLinks(data.assertionLinks, ctx);
|
|
1327
1410
|
}
|
|
1411
|
+
validateSiteSetupExclusivity(data, ctx);
|
|
1328
1412
|
});
|
|
1329
1413
|
|
|
1414
|
+
// src/scenario/wix-origin-template-ids.ts
|
|
1415
|
+
var WIX_ORIGIN_TEMPLATE_ID_BY_ALIAS = {
|
|
1416
|
+
ecommerce: "e5da13f4-c01e-4b61-a9c7-55dacd961d54",
|
|
1417
|
+
default: "212b41cb-0da6-4401-9c72-7c579e6477a2",
|
|
1418
|
+
blog: "68fc7371-365f-44c6-8467-69d88bfc172e",
|
|
1419
|
+
astrowind: "9e9292c1-1a35-4ba0-8986-d06f2ecb5366",
|
|
1420
|
+
scheduler: "72ade0e3-1871-4c04-ac54-419ca874d9d3",
|
|
1421
|
+
registration: "e5d63bf1-cd06-48eb-ad77-0da9235adcf1",
|
|
1422
|
+
"picasso-studio": "61f05de1-b0ce-4873-b9f5-52241a6fd262",
|
|
1423
|
+
"picasso-ecom": "daa9187d-f010-4eb0-bd49-e658b5a5037a",
|
|
1424
|
+
picasso: "99b9a3c7-82ad-4e1b-9066-e490bb9863af",
|
|
1425
|
+
"ecom-editorless": "738c7c0b-046e-4bf0-87dd-9a06ee5a52c4"
|
|
1426
|
+
};
|
|
1427
|
+
var GUID_PATTERN = /^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$/i;
|
|
1428
|
+
function resolveWixOriginTemplateId(templateId) {
|
|
1429
|
+
if (GUID_PATTERN.test(templateId)) {
|
|
1430
|
+
return templateId;
|
|
1431
|
+
}
|
|
1432
|
+
const originTemplateId = WIX_ORIGIN_TEMPLATE_ID_BY_ALIAS[templateId];
|
|
1433
|
+
if (!originTemplateId) {
|
|
1434
|
+
throw new Error(
|
|
1435
|
+
`Unknown Wix site template alias "${templateId}". Use a GUID or one of: ${Object.keys(WIX_ORIGIN_TEMPLATE_ID_BY_ALIAS).join(", ")}`
|
|
1436
|
+
);
|
|
1437
|
+
}
|
|
1438
|
+
return originTemplateId;
|
|
1439
|
+
}
|
|
1440
|
+
|
|
1330
1441
|
// src/scenario/batch-import.ts
|
|
1331
|
-
import { z as
|
|
1442
|
+
import { z as z27 } from "zod";
|
|
1332
1443
|
var UUID_REGEX = /^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$/i;
|
|
1333
|
-
var BatchAssertionLinkSchema =
|
|
1334
|
-
|
|
1444
|
+
var BatchAssertionLinkSchema = z27.union([
|
|
1445
|
+
z27.string().min(1),
|
|
1335
1446
|
ScenarioAssertionLinkSchema
|
|
1336
1447
|
]);
|
|
1337
|
-
var BatchScenarioEntrySchema =
|
|
1338
|
-
name:
|
|
1339
|
-
description:
|
|
1340
|
-
triggerPrompt:
|
|
1341
|
-
templateId:
|
|
1342
|
-
tags:
|
|
1343
|
-
assertionLinks:
|
|
1448
|
+
var BatchScenarioEntrySchema = z27.object({
|
|
1449
|
+
name: z27.string().min(1, "name: Required"),
|
|
1450
|
+
description: z27.string().optional().default(""),
|
|
1451
|
+
triggerPrompt: z27.string().min(10, "triggerPrompt: Must be at least 10 characters"),
|
|
1452
|
+
templateId: z27.string().nullish(),
|
|
1453
|
+
tags: z27.array(z27.string()).optional(),
|
|
1454
|
+
assertionLinks: z27.array(BatchAssertionLinkSchema).optional()
|
|
1344
1455
|
}).superRefine((data, ctx) => {
|
|
1345
1456
|
if (!data.assertionLinks) return;
|
|
1346
1457
|
const objectLinks = data.assertionLinks.filter(
|
|
@@ -1350,8 +1461,8 @@ var BatchScenarioEntrySchema = z26.object({
|
|
|
1350
1461
|
validateBuildPassedParamsInAssertionLinks(objectLinks, ctx);
|
|
1351
1462
|
}
|
|
1352
1463
|
});
|
|
1353
|
-
var BatchImportPayloadSchema =
|
|
1354
|
-
scenarios:
|
|
1464
|
+
var BatchImportPayloadSchema = z27.object({
|
|
1465
|
+
scenarios: z27.array(BatchScenarioEntrySchema).min(1, "scenarios array must contain at least one entry").max(100, "Maximum 100 scenarios per upload")
|
|
1355
1466
|
});
|
|
1356
1467
|
var BATCH_IMPORT_LIMITS = {
|
|
1357
1468
|
MAX_SCENARIOS: 100,
|
|
@@ -1373,29 +1484,29 @@ function normalizeBatchAssertionLink(link) {
|
|
|
1373
1484
|
}
|
|
1374
1485
|
return link;
|
|
1375
1486
|
}
|
|
1376
|
-
var BatchResultItemSchema =
|
|
1377
|
-
index:
|
|
1378
|
-
name:
|
|
1379
|
-
status:
|
|
1380
|
-
id:
|
|
1381
|
-
errors:
|
|
1382
|
-
});
|
|
1383
|
-
var BatchSummarySchema =
|
|
1384
|
-
total:
|
|
1385
|
-
valid:
|
|
1386
|
-
invalid:
|
|
1387
|
-
created:
|
|
1388
|
-
});
|
|
1389
|
-
var BatchImportResponseSchema =
|
|
1487
|
+
var BatchResultItemSchema = z27.object({
|
|
1488
|
+
index: z27.number(),
|
|
1489
|
+
name: z27.string(),
|
|
1490
|
+
status: z27.enum(["valid", "invalid"]),
|
|
1491
|
+
id: z27.string().nullable().optional(),
|
|
1492
|
+
errors: z27.array(z27.string()).optional()
|
|
1493
|
+
});
|
|
1494
|
+
var BatchSummarySchema = z27.object({
|
|
1495
|
+
total: z27.number(),
|
|
1496
|
+
valid: z27.number(),
|
|
1497
|
+
invalid: z27.number(),
|
|
1498
|
+
created: z27.number()
|
|
1499
|
+
});
|
|
1500
|
+
var BatchImportResponseSchema = z27.object({
|
|
1390
1501
|
summary: BatchSummarySchema,
|
|
1391
|
-
results:
|
|
1502
|
+
results: z27.array(BatchResultItemSchema)
|
|
1392
1503
|
});
|
|
1393
1504
|
|
|
1394
1505
|
// src/suite/test-suite.ts
|
|
1395
|
-
import { z as
|
|
1506
|
+
import { z as z28 } from "zod";
|
|
1396
1507
|
var TestSuiteSchema = TenantEntitySchema.extend({
|
|
1397
1508
|
/** IDs of test scenarios in this suite */
|
|
1398
|
-
scenarioIds:
|
|
1509
|
+
scenarioIds: z28.array(z28.string())
|
|
1399
1510
|
});
|
|
1400
1511
|
var CreateTestSuiteInputSchema = TestSuiteSchema.omit({
|
|
1401
1512
|
id: true,
|
|
@@ -1406,21 +1517,21 @@ var CreateTestSuiteInputSchema = TestSuiteSchema.omit({
|
|
|
1406
1517
|
var UpdateTestSuiteInputSchema = CreateTestSuiteInputSchema.partial();
|
|
1407
1518
|
|
|
1408
1519
|
// src/evaluation/metrics.ts
|
|
1409
|
-
import { z as
|
|
1410
|
-
var TokenUsageSchema =
|
|
1411
|
-
prompt:
|
|
1412
|
-
completion:
|
|
1413
|
-
total:
|
|
1414
|
-
});
|
|
1415
|
-
var EvalMetricsSchema =
|
|
1416
|
-
totalAssertions:
|
|
1417
|
-
passed:
|
|
1418
|
-
failed:
|
|
1419
|
-
skipped:
|
|
1420
|
-
errors:
|
|
1421
|
-
passRate:
|
|
1422
|
-
avgDuration:
|
|
1423
|
-
totalDuration:
|
|
1520
|
+
import { z as z29 } from "zod";
|
|
1521
|
+
var TokenUsageSchema = z29.object({
|
|
1522
|
+
prompt: z29.number(),
|
|
1523
|
+
completion: z29.number(),
|
|
1524
|
+
total: z29.number()
|
|
1525
|
+
});
|
|
1526
|
+
var EvalMetricsSchema = z29.object({
|
|
1527
|
+
totalAssertions: z29.number(),
|
|
1528
|
+
passed: z29.number(),
|
|
1529
|
+
failed: z29.number(),
|
|
1530
|
+
skipped: z29.number(),
|
|
1531
|
+
errors: z29.number(),
|
|
1532
|
+
passRate: z29.number(),
|
|
1533
|
+
avgDuration: z29.number(),
|
|
1534
|
+
totalDuration: z29.number()
|
|
1424
1535
|
});
|
|
1425
1536
|
var EvalStatus = /* @__PURE__ */ ((EvalStatus2) => {
|
|
1426
1537
|
EvalStatus2["PENDING"] = "pending";
|
|
@@ -1430,7 +1541,7 @@ var EvalStatus = /* @__PURE__ */ ((EvalStatus2) => {
|
|
|
1430
1541
|
EvalStatus2["CANCELLED"] = "cancelled";
|
|
1431
1542
|
return EvalStatus2;
|
|
1432
1543
|
})(EvalStatus || {});
|
|
1433
|
-
var EvalStatusSchema =
|
|
1544
|
+
var EvalStatusSchema = z29.enum(EvalStatus);
|
|
1434
1545
|
var LLMStepType = /* @__PURE__ */ ((LLMStepType2) => {
|
|
1435
1546
|
LLMStepType2["COMPLETION"] = "completion";
|
|
1436
1547
|
LLMStepType2["TOOL_USE"] = "tool_use";
|
|
@@ -1438,54 +1549,54 @@ var LLMStepType = /* @__PURE__ */ ((LLMStepType2) => {
|
|
|
1438
1549
|
LLMStepType2["THINKING"] = "thinking";
|
|
1439
1550
|
return LLMStepType2;
|
|
1440
1551
|
})(LLMStepType || {});
|
|
1441
|
-
var LLMTraceStepSchema =
|
|
1442
|
-
id:
|
|
1443
|
-
stepNumber:
|
|
1444
|
-
type:
|
|
1445
|
-
model:
|
|
1446
|
-
provider:
|
|
1447
|
-
startedAt:
|
|
1448
|
-
durationMs:
|
|
1552
|
+
var LLMTraceStepSchema = z29.object({
|
|
1553
|
+
id: z29.string(),
|
|
1554
|
+
stepNumber: z29.number(),
|
|
1555
|
+
type: z29.enum(LLMStepType),
|
|
1556
|
+
model: z29.string(),
|
|
1557
|
+
provider: z29.string(),
|
|
1558
|
+
startedAt: z29.string(),
|
|
1559
|
+
durationMs: z29.number(),
|
|
1449
1560
|
tokenUsage: TokenUsageSchema,
|
|
1450
|
-
costUsd:
|
|
1451
|
-
toolName:
|
|
1452
|
-
toolArguments:
|
|
1453
|
-
inputPreview:
|
|
1454
|
-
outputPreview:
|
|
1455
|
-
success:
|
|
1456
|
-
error:
|
|
1457
|
-
turnIndex:
|
|
1458
|
-
});
|
|
1459
|
-
var LLMBreakdownStatsSchema =
|
|
1460
|
-
count:
|
|
1461
|
-
durationMs:
|
|
1462
|
-
tokens:
|
|
1463
|
-
costUsd:
|
|
1464
|
-
});
|
|
1465
|
-
var LLMTraceSummarySchema =
|
|
1466
|
-
totalSteps:
|
|
1467
|
-
totalTurns:
|
|
1468
|
-
totalDurationMs:
|
|
1561
|
+
costUsd: z29.number(),
|
|
1562
|
+
toolName: z29.string().optional(),
|
|
1563
|
+
toolArguments: z29.string().optional(),
|
|
1564
|
+
inputPreview: z29.string().optional(),
|
|
1565
|
+
outputPreview: z29.string().optional(),
|
|
1566
|
+
success: z29.boolean(),
|
|
1567
|
+
error: z29.string().optional(),
|
|
1568
|
+
turnIndex: z29.number().optional()
|
|
1569
|
+
});
|
|
1570
|
+
var LLMBreakdownStatsSchema = z29.object({
|
|
1571
|
+
count: z29.number(),
|
|
1572
|
+
durationMs: z29.number(),
|
|
1573
|
+
tokens: z29.number(),
|
|
1574
|
+
costUsd: z29.number()
|
|
1575
|
+
});
|
|
1576
|
+
var LLMTraceSummarySchema = z29.object({
|
|
1577
|
+
totalSteps: z29.number(),
|
|
1578
|
+
totalTurns: z29.number().optional(),
|
|
1579
|
+
totalDurationMs: z29.number(),
|
|
1469
1580
|
totalTokens: TokenUsageSchema,
|
|
1470
|
-
totalCostUsd:
|
|
1471
|
-
stepTypeBreakdown:
|
|
1472
|
-
modelBreakdown:
|
|
1473
|
-
modelsUsed:
|
|
1474
|
-
});
|
|
1475
|
-
var LLMTraceSchema =
|
|
1476
|
-
id:
|
|
1477
|
-
steps:
|
|
1581
|
+
totalCostUsd: z29.number(),
|
|
1582
|
+
stepTypeBreakdown: z29.record(z29.string(), LLMBreakdownStatsSchema).optional(),
|
|
1583
|
+
modelBreakdown: z29.record(z29.string(), LLMBreakdownStatsSchema),
|
|
1584
|
+
modelsUsed: z29.array(z29.string())
|
|
1585
|
+
});
|
|
1586
|
+
var LLMTraceSchema = z29.object({
|
|
1587
|
+
id: z29.string(),
|
|
1588
|
+
steps: z29.array(LLMTraceStepSchema),
|
|
1478
1589
|
summary: LLMTraceSummarySchema
|
|
1479
1590
|
});
|
|
1480
1591
|
|
|
1481
1592
|
// src/evaluation/eval-result.ts
|
|
1482
|
-
import { z as
|
|
1593
|
+
import { z as z33 } from "zod";
|
|
1483
1594
|
|
|
1484
1595
|
// src/evaluation/eval-run.ts
|
|
1485
|
-
import { z as
|
|
1596
|
+
import { z as z31 } from "zod";
|
|
1486
1597
|
|
|
1487
1598
|
// src/evaluation/live-trace.ts
|
|
1488
|
-
import { z as
|
|
1599
|
+
import { z as z30 } from "zod";
|
|
1489
1600
|
var LiveTraceEventType = /* @__PURE__ */ ((LiveTraceEventType2) => {
|
|
1490
1601
|
LiveTraceEventType2["THINKING"] = "thinking";
|
|
1491
1602
|
LiveTraceEventType2["TOOL_USE"] = "tool_use";
|
|
@@ -1499,37 +1610,37 @@ var LiveTraceEventType = /* @__PURE__ */ ((LiveTraceEventType2) => {
|
|
|
1499
1610
|
LiveTraceEventType2["USER"] = "user";
|
|
1500
1611
|
return LiveTraceEventType2;
|
|
1501
1612
|
})(LiveTraceEventType || {});
|
|
1502
|
-
var LiveTraceEventSchema =
|
|
1613
|
+
var LiveTraceEventSchema = z30.object({
|
|
1503
1614
|
/** The evaluation run ID */
|
|
1504
|
-
evalRunId:
|
|
1615
|
+
evalRunId: z30.string(),
|
|
1505
1616
|
/** The scenario ID being executed */
|
|
1506
|
-
scenarioId:
|
|
1617
|
+
scenarioId: z30.string(),
|
|
1507
1618
|
/** The scenario name for display */
|
|
1508
|
-
scenarioName:
|
|
1619
|
+
scenarioName: z30.string(),
|
|
1509
1620
|
/** The target ID (skill, agent, etc.) */
|
|
1510
|
-
targetId:
|
|
1621
|
+
targetId: z30.string(),
|
|
1511
1622
|
/** The target name for display */
|
|
1512
|
-
targetName:
|
|
1623
|
+
targetName: z30.string(),
|
|
1513
1624
|
/** Step number in the current scenario execution */
|
|
1514
|
-
stepNumber:
|
|
1625
|
+
stepNumber: z30.number(),
|
|
1515
1626
|
/** Type of trace event */
|
|
1516
|
-
type:
|
|
1627
|
+
type: z30.enum(LiveTraceEventType),
|
|
1517
1628
|
/** Tool name if this is a tool_use event */
|
|
1518
|
-
toolName:
|
|
1629
|
+
toolName: z30.string().optional(),
|
|
1519
1630
|
/** Tool arguments preview (truncated JSON) */
|
|
1520
|
-
toolArgs:
|
|
1631
|
+
toolArgs: z30.string().optional(),
|
|
1521
1632
|
/** Output preview (truncated text) */
|
|
1522
|
-
outputPreview:
|
|
1633
|
+
outputPreview: z30.string().optional(),
|
|
1523
1634
|
/** File path for file operations */
|
|
1524
|
-
filePath:
|
|
1635
|
+
filePath: z30.string().optional(),
|
|
1525
1636
|
/** Elapsed time in milliseconds for progress events */
|
|
1526
|
-
elapsedMs:
|
|
1637
|
+
elapsedMs: z30.number().optional(),
|
|
1527
1638
|
/** Thinking/reasoning text from Claude */
|
|
1528
|
-
thinking:
|
|
1639
|
+
thinking: z30.string().optional(),
|
|
1529
1640
|
/** Timestamp when this event occurred */
|
|
1530
|
-
timestamp:
|
|
1641
|
+
timestamp: z30.string(),
|
|
1531
1642
|
/** Whether this is the final event for this scenario */
|
|
1532
|
-
isComplete:
|
|
1643
|
+
isComplete: z30.boolean()
|
|
1533
1644
|
});
|
|
1534
1645
|
var TRACE_EVENT_PREFIX = "TRACE_EVENT:";
|
|
1535
1646
|
function parseTraceEventLine(line) {
|
|
@@ -1558,40 +1669,40 @@ var TriggerType = /* @__PURE__ */ ((TriggerType2) => {
|
|
|
1558
1669
|
TriggerType2["SCHEDULED"] = "SCHEDULED";
|
|
1559
1670
|
return TriggerType2;
|
|
1560
1671
|
})(TriggerType || {});
|
|
1561
|
-
var TriggerMetadataSchema =
|
|
1562
|
-
version:
|
|
1563
|
-
resourceUpdated:
|
|
1564
|
-
scheduleId:
|
|
1672
|
+
var TriggerMetadataSchema = z31.object({
|
|
1673
|
+
version: z31.string().optional(),
|
|
1674
|
+
resourceUpdated: z31.array(z31.string()).optional(),
|
|
1675
|
+
scheduleId: z31.string().optional()
|
|
1565
1676
|
});
|
|
1566
|
-
var TriggerSchema =
|
|
1567
|
-
id:
|
|
1677
|
+
var TriggerSchema = z31.object({
|
|
1678
|
+
id: z31.string(),
|
|
1568
1679
|
metadata: TriggerMetadataSchema.optional(),
|
|
1569
|
-
type:
|
|
1680
|
+
type: z31.nativeEnum(TriggerType)
|
|
1570
1681
|
});
|
|
1571
|
-
var DiffLineTypeSchema =
|
|
1572
|
-
var DiffLineSchema =
|
|
1682
|
+
var DiffLineTypeSchema = z31.enum(["added", "removed", "unchanged"]);
|
|
1683
|
+
var DiffLineSchema = z31.object({
|
|
1573
1684
|
type: DiffLineTypeSchema,
|
|
1574
|
-
content:
|
|
1575
|
-
lineNumber:
|
|
1576
|
-
});
|
|
1577
|
-
var DiffContentSchema =
|
|
1578
|
-
path:
|
|
1579
|
-
expected:
|
|
1580
|
-
actual:
|
|
1581
|
-
diffLines:
|
|
1582
|
-
renamedFrom:
|
|
1685
|
+
content: z31.string(),
|
|
1686
|
+
lineNumber: z31.number()
|
|
1687
|
+
});
|
|
1688
|
+
var DiffContentSchema = z31.object({
|
|
1689
|
+
path: z31.string(),
|
|
1690
|
+
expected: z31.string(),
|
|
1691
|
+
actual: z31.string(),
|
|
1692
|
+
diffLines: z31.array(DiffLineSchema),
|
|
1693
|
+
renamedFrom: z31.string().optional(),
|
|
1583
1694
|
/** Whether this file is an infrastructure/config file (e.g. .claude/settings.json, .mcp.json) */
|
|
1584
|
-
isInfrastructure:
|
|
1695
|
+
isInfrastructure: z31.boolean().optional()
|
|
1585
1696
|
});
|
|
1586
|
-
var CommandExecutionSchema =
|
|
1587
|
-
command:
|
|
1588
|
-
exitCode:
|
|
1589
|
-
output:
|
|
1590
|
-
duration:
|
|
1697
|
+
var CommandExecutionSchema = z31.object({
|
|
1698
|
+
command: z31.string(),
|
|
1699
|
+
exitCode: z31.number(),
|
|
1700
|
+
output: z31.string().optional(),
|
|
1701
|
+
duration: z31.number()
|
|
1591
1702
|
});
|
|
1592
|
-
var FileModificationSchema =
|
|
1593
|
-
path:
|
|
1594
|
-
action:
|
|
1703
|
+
var FileModificationSchema = z31.object({
|
|
1704
|
+
path: z31.string(),
|
|
1705
|
+
action: z31.enum(["created", "modified", "deleted"])
|
|
1595
1706
|
});
|
|
1596
1707
|
var TemplateFileStatus = /* @__PURE__ */ ((TemplateFileStatus2) => {
|
|
1597
1708
|
TemplateFileStatus2["NEW"] = "new";
|
|
@@ -1599,58 +1710,58 @@ var TemplateFileStatus = /* @__PURE__ */ ((TemplateFileStatus2) => {
|
|
|
1599
1710
|
TemplateFileStatus2["UNCHANGED"] = "unchanged";
|
|
1600
1711
|
return TemplateFileStatus2;
|
|
1601
1712
|
})(TemplateFileStatus || {});
|
|
1602
|
-
var TemplateFileSchema =
|
|
1713
|
+
var TemplateFileSchema = z31.object({
|
|
1603
1714
|
/** Relative path within the template */
|
|
1604
|
-
path:
|
|
1715
|
+
path: z31.string(),
|
|
1605
1716
|
/** Full file content after execution */
|
|
1606
|
-
content:
|
|
1717
|
+
content: z31.string(),
|
|
1607
1718
|
/** File status (new, modified, unchanged) */
|
|
1608
|
-
status:
|
|
1719
|
+
status: z31.enum(["new", "modified", "unchanged"]),
|
|
1609
1720
|
/** Whether this file is an infrastructure/config file (e.g. .claude/settings.json, .mcp.json) */
|
|
1610
|
-
isInfrastructure:
|
|
1721
|
+
isInfrastructure: z31.boolean().optional()
|
|
1611
1722
|
});
|
|
1612
|
-
var ApiCallSchema =
|
|
1613
|
-
endpoint:
|
|
1614
|
-
tokensUsed:
|
|
1615
|
-
duration:
|
|
1723
|
+
var ApiCallSchema = z31.object({
|
|
1724
|
+
endpoint: z31.string(),
|
|
1725
|
+
tokensUsed: z31.number(),
|
|
1726
|
+
duration: z31.number()
|
|
1616
1727
|
});
|
|
1617
|
-
var ExecutionTraceSchema =
|
|
1618
|
-
commands:
|
|
1619
|
-
filesModified:
|
|
1620
|
-
apiCalls:
|
|
1621
|
-
totalDuration:
|
|
1728
|
+
var ExecutionTraceSchema = z31.object({
|
|
1729
|
+
commands: z31.array(CommandExecutionSchema),
|
|
1730
|
+
filesModified: z31.array(FileModificationSchema),
|
|
1731
|
+
apiCalls: z31.array(ApiCallSchema),
|
|
1732
|
+
totalDuration: z31.number()
|
|
1622
1733
|
});
|
|
1623
|
-
var RunAnalysisFindingSchema =
|
|
1624
|
-
category:
|
|
1734
|
+
var RunAnalysisFindingSchema = z31.object({
|
|
1735
|
+
category: z31.enum([
|
|
1625
1736
|
"failure_pattern",
|
|
1626
1737
|
"cost_waste",
|
|
1627
1738
|
"flakiness",
|
|
1628
1739
|
"inefficiency",
|
|
1629
1740
|
"positive"
|
|
1630
1741
|
]),
|
|
1631
|
-
severity:
|
|
1632
|
-
description:
|
|
1633
|
-
affectedScenarios:
|
|
1634
|
-
recommendation:
|
|
1742
|
+
severity: z31.enum(["high", "medium", "low"]),
|
|
1743
|
+
description: z31.string(),
|
|
1744
|
+
affectedScenarios: z31.array(z31.string()),
|
|
1745
|
+
recommendation: z31.string().optional()
|
|
1635
1746
|
});
|
|
1636
|
-
var RunAnalysisSchema =
|
|
1637
|
-
generatedAt:
|
|
1638
|
-
summary:
|
|
1639
|
-
findings:
|
|
1747
|
+
var RunAnalysisSchema = z31.object({
|
|
1748
|
+
generatedAt: z31.string(),
|
|
1749
|
+
summary: z31.string(),
|
|
1750
|
+
findings: z31.array(RunAnalysisFindingSchema)
|
|
1640
1751
|
});
|
|
1641
1752
|
var EvalRunSchema = TenantEntitySchema.extend({
|
|
1642
1753
|
/** Agent ID for this run */
|
|
1643
|
-
agentId:
|
|
1754
|
+
agentId: z31.string().optional(),
|
|
1644
1755
|
/** Preset ID that originated this run (optional) */
|
|
1645
|
-
presetId:
|
|
1756
|
+
presetId: z31.string().optional(),
|
|
1646
1757
|
/** Scenario IDs to run (always present — resolved server-side from tags when needed) */
|
|
1647
|
-
scenarioIds:
|
|
1758
|
+
scenarioIds: z31.array(z31.string()),
|
|
1648
1759
|
/** Current status */
|
|
1649
1760
|
status: EvalStatusSchema,
|
|
1650
1761
|
/** Progress percentage (0-100) */
|
|
1651
|
-
progress:
|
|
1762
|
+
progress: z31.number(),
|
|
1652
1763
|
/** Results for each scenario/target combination (lazy to break eval-result ↔ eval-run cycle) */
|
|
1653
|
-
results:
|
|
1764
|
+
results: z31.array(z31.lazy(() => EvalRunResultSchema)),
|
|
1654
1765
|
/** Aggregated metrics across all results */
|
|
1655
1766
|
aggregateMetrics: EvalMetricsSchema,
|
|
1656
1767
|
/** Aggregated LLM trace summary */
|
|
@@ -1658,49 +1769,49 @@ var EvalRunSchema = TenantEntitySchema.extend({
|
|
|
1658
1769
|
/** What triggered this run */
|
|
1659
1770
|
trigger: TriggerSchema.optional(),
|
|
1660
1771
|
/** When the run started (set when evaluation is triggered) */
|
|
1661
|
-
startedAt:
|
|
1772
|
+
startedAt: z31.string().optional(),
|
|
1662
1773
|
/** When the run completed */
|
|
1663
|
-
completedAt:
|
|
1774
|
+
completedAt: z31.string().optional(),
|
|
1664
1775
|
/** Live trace events captured during execution (for playback on results page) */
|
|
1665
|
-
liveTraceEvents:
|
|
1776
|
+
liveTraceEvents: z31.array(LiveTraceEventSchema).optional(),
|
|
1666
1777
|
/** Remote job ID for tracking execution in Dev Machines */
|
|
1667
|
-
jobId:
|
|
1778
|
+
jobId: z31.string().optional(),
|
|
1668
1779
|
/** Remote job status from the Dev Machine API (PENDING, RUNNING, COMPLETED, FAILED, CANCELLED) */
|
|
1669
|
-
jobStatus:
|
|
1780
|
+
jobStatus: z31.string().optional(),
|
|
1670
1781
|
/** Remote job error message if the job failed */
|
|
1671
|
-
jobError:
|
|
1782
|
+
jobError: z31.string().optional(),
|
|
1672
1783
|
/** Timestamp of the last job status check */
|
|
1673
|
-
jobStatusCheckedAt:
|
|
1784
|
+
jobStatusCheckedAt: z31.string().optional(),
|
|
1674
1785
|
/** Unified capability IDs */
|
|
1675
|
-
capabilityIds:
|
|
1786
|
+
capabilityIds: z31.array(z31.string()).optional(),
|
|
1676
1787
|
/** Map of capabilityId to capabilityVersionId for version pinning */
|
|
1677
|
-
capabilityVersions:
|
|
1788
|
+
capabilityVersions: z31.record(z31.string(), z31.string()).optional(),
|
|
1678
1789
|
/** Tags used to select scenarios for this run (for traceability) */
|
|
1679
|
-
tags:
|
|
1790
|
+
tags: z31.array(z31.string()).optional(),
|
|
1680
1791
|
/** How many times each scenario is executed within this eval run. Default: 1. Max: 20. */
|
|
1681
|
-
runsPerScenario:
|
|
1792
|
+
runsPerScenario: z31.number().int().min(1).max(20).optional(),
|
|
1682
1793
|
/** Variable values to substitute in scenario trigger prompts at runtime */
|
|
1683
|
-
variables:
|
|
1794
|
+
variables: z31.record(z31.string(), z31.string()).optional(),
|
|
1684
1795
|
/** Snapshot of agent configuration captured at run creation time */
|
|
1685
|
-
agentSnapshot:
|
|
1686
|
-
name:
|
|
1796
|
+
agentSnapshot: z31.object({
|
|
1797
|
+
name: z31.string().optional(),
|
|
1687
1798
|
agentType: AgentTypeSchema.optional(),
|
|
1688
1799
|
runCommand: AgentRunCommandSchema.optional(),
|
|
1689
|
-
systemPrompt:
|
|
1800
|
+
systemPrompt: z31.string().nullable().optional(),
|
|
1690
1801
|
/** @deprecated retained for backward compat with stored snapshots */
|
|
1691
1802
|
modelConfig: ModelConfigSchema.optional(),
|
|
1692
|
-
config:
|
|
1803
|
+
config: z31.record(z31.string(), z31.unknown()).optional()
|
|
1693
1804
|
}).optional(),
|
|
1694
1805
|
/** UUID linking all runs in a comparison group */
|
|
1695
|
-
comparisonGroupId:
|
|
1806
|
+
comparisonGroupId: z31.string().optional(),
|
|
1696
1807
|
/** Human-readable label for this variant (e.g., "MCP: Wix Stores") */
|
|
1697
|
-
comparisonLabel:
|
|
1808
|
+
comparisonLabel: z31.string().optional(),
|
|
1698
1809
|
/** LLM-generated analysis of the completed run */
|
|
1699
1810
|
runAnalysis: RunAnalysisSchema.optional(),
|
|
1700
1811
|
/** IDs of folders this run belongs to (read-only, managed via AddRunToFolder / RemoveRunFromFolder) */
|
|
1701
|
-
folderIds:
|
|
1812
|
+
folderIds: z31.array(z31.string()).optional(),
|
|
1702
1813
|
/** ID of the schedule that triggered this run, if any (read-only) */
|
|
1703
|
-
scheduleId:
|
|
1814
|
+
scheduleId: z31.string().optional()
|
|
1704
1815
|
});
|
|
1705
1816
|
var CreateEvalRunInputSchema = EvalRunSchema.omit({
|
|
1706
1817
|
id: true,
|
|
@@ -1716,60 +1827,60 @@ var CreateEvalRunInputSchema = EvalRunSchema.omit({
|
|
|
1716
1827
|
agentSnapshot: true
|
|
1717
1828
|
}).extend({
|
|
1718
1829
|
/** Optional on input — backend resolves from tags when not provided */
|
|
1719
|
-
scenarioIds:
|
|
1830
|
+
scenarioIds: z31.array(z31.string()).optional()
|
|
1720
1831
|
}).refine(
|
|
1721
1832
|
(data) => data.scenarioIds && data.scenarioIds.length > 0 || data.tags && data.tags.length > 0,
|
|
1722
1833
|
{ message: "Either scenarioIds or tags must be provided" }
|
|
1723
1834
|
);
|
|
1724
|
-
var EvaluationProgressSchema =
|
|
1725
|
-
runId:
|
|
1726
|
-
targetId:
|
|
1727
|
-
totalScenarios:
|
|
1728
|
-
completedScenarios:
|
|
1729
|
-
scenarioProgress:
|
|
1730
|
-
|
|
1731
|
-
scenarioId:
|
|
1732
|
-
currentStep:
|
|
1733
|
-
error:
|
|
1835
|
+
var EvaluationProgressSchema = z31.object({
|
|
1836
|
+
runId: z31.string(),
|
|
1837
|
+
targetId: z31.string(),
|
|
1838
|
+
totalScenarios: z31.number(),
|
|
1839
|
+
completedScenarios: z31.number(),
|
|
1840
|
+
scenarioProgress: z31.array(
|
|
1841
|
+
z31.object({
|
|
1842
|
+
scenarioId: z31.string(),
|
|
1843
|
+
currentStep: z31.string(),
|
|
1844
|
+
error: z31.string().optional()
|
|
1734
1845
|
})
|
|
1735
1846
|
),
|
|
1736
|
-
createdAt:
|
|
1737
|
-
});
|
|
1738
|
-
var EvaluationLogSchema =
|
|
1739
|
-
runId:
|
|
1740
|
-
scenarioId:
|
|
1741
|
-
log:
|
|
1742
|
-
level:
|
|
1743
|
-
message:
|
|
1744
|
-
args:
|
|
1745
|
-
error:
|
|
1847
|
+
createdAt: z31.number()
|
|
1848
|
+
});
|
|
1849
|
+
var EvaluationLogSchema = z31.object({
|
|
1850
|
+
runId: z31.string(),
|
|
1851
|
+
scenarioId: z31.string(),
|
|
1852
|
+
log: z31.object({
|
|
1853
|
+
level: z31.enum(["info", "error", "debug"]),
|
|
1854
|
+
message: z31.string().optional(),
|
|
1855
|
+
args: z31.array(z31.any()).optional(),
|
|
1856
|
+
error: z31.string().optional()
|
|
1746
1857
|
})
|
|
1747
1858
|
});
|
|
1748
1859
|
var LLM_TIMEOUT = 12e4;
|
|
1749
1860
|
|
|
1750
1861
|
// src/evaluation/conversation.ts
|
|
1751
|
-
import { z as
|
|
1752
|
-
var TextBlockSchema =
|
|
1753
|
-
type:
|
|
1754
|
-
text:
|
|
1755
|
-
});
|
|
1756
|
-
var ThinkingBlockSchema =
|
|
1757
|
-
type:
|
|
1758
|
-
thinking:
|
|
1759
|
-
});
|
|
1760
|
-
var ToolUseBlockSchema =
|
|
1761
|
-
type:
|
|
1762
|
-
toolName:
|
|
1763
|
-
toolId:
|
|
1764
|
-
input:
|
|
1765
|
-
});
|
|
1766
|
-
var ToolResultBlockSchema =
|
|
1767
|
-
type:
|
|
1768
|
-
toolUseId:
|
|
1769
|
-
content:
|
|
1770
|
-
isError:
|
|
1771
|
-
});
|
|
1772
|
-
var ConversationBlockSchema =
|
|
1862
|
+
import { z as z32 } from "zod";
|
|
1863
|
+
var TextBlockSchema = z32.object({
|
|
1864
|
+
type: z32.literal("text"),
|
|
1865
|
+
text: z32.string()
|
|
1866
|
+
});
|
|
1867
|
+
var ThinkingBlockSchema = z32.object({
|
|
1868
|
+
type: z32.literal("thinking"),
|
|
1869
|
+
thinking: z32.string()
|
|
1870
|
+
});
|
|
1871
|
+
var ToolUseBlockSchema = z32.object({
|
|
1872
|
+
type: z32.literal("tool_use"),
|
|
1873
|
+
toolName: z32.string(),
|
|
1874
|
+
toolId: z32.string(),
|
|
1875
|
+
input: z32.unknown()
|
|
1876
|
+
});
|
|
1877
|
+
var ToolResultBlockSchema = z32.object({
|
|
1878
|
+
type: z32.literal("tool_result"),
|
|
1879
|
+
toolUseId: z32.string(),
|
|
1880
|
+
content: z32.string(),
|
|
1881
|
+
isError: z32.boolean().optional()
|
|
1882
|
+
});
|
|
1883
|
+
var ConversationBlockSchema = z32.discriminatedUnion("type", [
|
|
1773
1884
|
TextBlockSchema,
|
|
1774
1885
|
ThinkingBlockSchema,
|
|
1775
1886
|
ToolUseBlockSchema,
|
|
@@ -1780,22 +1891,22 @@ var ConversationMessageRoles = [
|
|
|
1780
1891
|
"user",
|
|
1781
1892
|
"system"
|
|
1782
1893
|
];
|
|
1783
|
-
var ConversationMessageSchema =
|
|
1784
|
-
role:
|
|
1785
|
-
content:
|
|
1786
|
-
timestamp:
|
|
1894
|
+
var ConversationMessageSchema = z32.object({
|
|
1895
|
+
role: z32.enum(ConversationMessageRoles),
|
|
1896
|
+
content: z32.array(ConversationBlockSchema),
|
|
1897
|
+
timestamp: z32.string()
|
|
1787
1898
|
});
|
|
1788
|
-
var ScenarioConversationSchema =
|
|
1789
|
-
id:
|
|
1790
|
-
projectId:
|
|
1791
|
-
evalRunId:
|
|
1792
|
-
resultId:
|
|
1793
|
-
messages:
|
|
1794
|
-
createdAt:
|
|
1899
|
+
var ScenarioConversationSchema = z32.object({
|
|
1900
|
+
id: z32.string(),
|
|
1901
|
+
projectId: z32.string(),
|
|
1902
|
+
evalRunId: z32.string(),
|
|
1903
|
+
resultId: z32.string(),
|
|
1904
|
+
messages: z32.array(ConversationMessageSchema),
|
|
1905
|
+
createdAt: z32.string()
|
|
1795
1906
|
});
|
|
1796
|
-
var ConversationResponseSchema =
|
|
1797
|
-
messages:
|
|
1798
|
-
isPartial:
|
|
1907
|
+
var ConversationResponseSchema = z32.object({
|
|
1908
|
+
messages: z32.array(ConversationMessageSchema),
|
|
1909
|
+
isPartial: z32.boolean()
|
|
1799
1910
|
});
|
|
1800
1911
|
|
|
1801
1912
|
// src/evaluation/eval-result.ts
|
|
@@ -1806,98 +1917,98 @@ var AssertionResultStatus = /* @__PURE__ */ ((AssertionResultStatus2) => {
|
|
|
1806
1917
|
AssertionResultStatus2["ERROR"] = "error";
|
|
1807
1918
|
return AssertionResultStatus2;
|
|
1808
1919
|
})(AssertionResultStatus || {});
|
|
1809
|
-
var AssertionResultSchema =
|
|
1810
|
-
id:
|
|
1811
|
-
assertionId:
|
|
1812
|
-
assertionType:
|
|
1813
|
-
assertionName:
|
|
1814
|
-
status:
|
|
1815
|
-
message:
|
|
1816
|
-
expected:
|
|
1817
|
-
actual:
|
|
1818
|
-
duration:
|
|
1819
|
-
details:
|
|
1820
|
-
llmTraceSteps:
|
|
1821
|
-
});
|
|
1822
|
-
var EvalRunResultSchema =
|
|
1823
|
-
id:
|
|
1824
|
-
targetId:
|
|
1825
|
-
targetName:
|
|
1920
|
+
var AssertionResultSchema = z33.object({
|
|
1921
|
+
id: z33.string(),
|
|
1922
|
+
assertionId: z33.string(),
|
|
1923
|
+
assertionType: z33.string(),
|
|
1924
|
+
assertionName: z33.string(),
|
|
1925
|
+
status: z33.enum(AssertionResultStatus),
|
|
1926
|
+
message: z33.string().optional(),
|
|
1927
|
+
expected: z33.string().optional(),
|
|
1928
|
+
actual: z33.string().optional(),
|
|
1929
|
+
duration: z33.number().optional(),
|
|
1930
|
+
details: z33.record(z33.string(), z33.unknown()).optional(),
|
|
1931
|
+
llmTraceSteps: z33.array(LLMTraceStepSchema).optional()
|
|
1932
|
+
});
|
|
1933
|
+
var EvalRunResultSchema = z33.object({
|
|
1934
|
+
id: z33.string(),
|
|
1935
|
+
targetId: z33.string(),
|
|
1936
|
+
targetName: z33.string().optional(),
|
|
1826
1937
|
/** SkillVersion ID used for this evaluation (for version tracking) */
|
|
1827
|
-
skillVersionId:
|
|
1938
|
+
skillVersionId: z33.string().optional(),
|
|
1828
1939
|
/** SkillVersion semver string (e.g., "1.0.0", "1.2.3") for display */
|
|
1829
|
-
skillVersion:
|
|
1830
|
-
scenarioId:
|
|
1831
|
-
scenarioName:
|
|
1940
|
+
skillVersion: z33.string().optional(),
|
|
1941
|
+
scenarioId: z33.string(),
|
|
1942
|
+
scenarioName: z33.string(),
|
|
1832
1943
|
/** Snapshot of the trigger prompt used during the run (prevents stale display after edits) */
|
|
1833
|
-
triggerPrompt:
|
|
1944
|
+
triggerPrompt: z33.string().optional(),
|
|
1834
1945
|
modelConfig: ModelConfigSchema.optional(),
|
|
1835
|
-
assertionResults:
|
|
1946
|
+
assertionResults: z33.array(AssertionResultSchema),
|
|
1836
1947
|
metrics: EvalMetricsSchema.optional(),
|
|
1837
|
-
passed:
|
|
1838
|
-
failed:
|
|
1839
|
-
passRate:
|
|
1840
|
-
duration:
|
|
1841
|
-
outputText:
|
|
1842
|
-
files:
|
|
1843
|
-
fileDiffs:
|
|
1948
|
+
passed: z33.number(),
|
|
1949
|
+
failed: z33.number(),
|
|
1950
|
+
passRate: z33.number(),
|
|
1951
|
+
duration: z33.number(),
|
|
1952
|
+
outputText: z33.string().optional(),
|
|
1953
|
+
files: z33.array(ExpectedFileSchema).optional(),
|
|
1954
|
+
fileDiffs: z33.array(DiffContentSchema).optional(),
|
|
1844
1955
|
/** Full template files after execution with status indicators */
|
|
1845
|
-
templateFiles:
|
|
1846
|
-
startedAt:
|
|
1847
|
-
completedAt:
|
|
1956
|
+
templateFiles: z33.array(TemplateFileSchema).optional(),
|
|
1957
|
+
startedAt: z33.string().optional(),
|
|
1958
|
+
completedAt: z33.string().optional(),
|
|
1848
1959
|
llmTrace: LLMTraceSchema.optional(),
|
|
1849
1960
|
/** Full conversation messages (only present in transit; stripped before DB storage) */
|
|
1850
|
-
conversation:
|
|
1961
|
+
conversation: z33.array(ConversationMessageSchema).optional(),
|
|
1851
1962
|
/** 0-based iteration index when a scenario is run multiple times within a single eval run */
|
|
1852
|
-
iterationIndex:
|
|
1853
|
-
});
|
|
1854
|
-
var PromptResultSchema =
|
|
1855
|
-
text:
|
|
1856
|
-
files:
|
|
1857
|
-
finishReason:
|
|
1858
|
-
reasoning:
|
|
1859
|
-
reasoningDetails:
|
|
1860
|
-
toolCalls:
|
|
1861
|
-
toolResults:
|
|
1862
|
-
warnings:
|
|
1863
|
-
sources:
|
|
1864
|
-
steps:
|
|
1865
|
-
generationTimeMs:
|
|
1866
|
-
prompt:
|
|
1867
|
-
systemPrompt:
|
|
1868
|
-
usage:
|
|
1869
|
-
totalTokens:
|
|
1870
|
-
totalMicrocentsSpent:
|
|
1963
|
+
iterationIndex: z33.number().int().min(0).optional()
|
|
1964
|
+
});
|
|
1965
|
+
var PromptResultSchema = z33.object({
|
|
1966
|
+
text: z33.string(),
|
|
1967
|
+
files: z33.array(z33.unknown()).optional(),
|
|
1968
|
+
finishReason: z33.string().optional(),
|
|
1969
|
+
reasoning: z33.string().optional(),
|
|
1970
|
+
reasoningDetails: z33.unknown().optional(),
|
|
1971
|
+
toolCalls: z33.array(z33.unknown()).optional(),
|
|
1972
|
+
toolResults: z33.array(z33.unknown()).optional(),
|
|
1973
|
+
warnings: z33.array(z33.unknown()).optional(),
|
|
1974
|
+
sources: z33.array(z33.unknown()).optional(),
|
|
1975
|
+
steps: z33.array(z33.unknown()),
|
|
1976
|
+
generationTimeMs: z33.number(),
|
|
1977
|
+
prompt: z33.string(),
|
|
1978
|
+
systemPrompt: z33.string(),
|
|
1979
|
+
usage: z33.object({
|
|
1980
|
+
totalTokens: z33.number().optional(),
|
|
1981
|
+
totalMicrocentsSpent: z33.number().optional()
|
|
1871
1982
|
})
|
|
1872
1983
|
});
|
|
1873
|
-
var EvaluationResultSchema =
|
|
1874
|
-
id:
|
|
1875
|
-
runId:
|
|
1876
|
-
timestamp:
|
|
1984
|
+
var EvaluationResultSchema = z33.object({
|
|
1985
|
+
id: z33.string(),
|
|
1986
|
+
runId: z33.string(),
|
|
1987
|
+
timestamp: z33.number(),
|
|
1877
1988
|
promptResult: PromptResultSchema,
|
|
1878
|
-
testResults:
|
|
1879
|
-
tags:
|
|
1880
|
-
feedback:
|
|
1881
|
-
score:
|
|
1882
|
-
suiteId:
|
|
1883
|
-
});
|
|
1884
|
-
var LeanEvaluationResultSchema =
|
|
1885
|
-
id:
|
|
1886
|
-
runId:
|
|
1887
|
-
timestamp:
|
|
1888
|
-
tags:
|
|
1889
|
-
scenarioId:
|
|
1890
|
-
scenarioVersion:
|
|
1891
|
-
targetId:
|
|
1892
|
-
targetVersion:
|
|
1893
|
-
suiteId:
|
|
1894
|
-
score:
|
|
1895
|
-
time:
|
|
1896
|
-
microcentsSpent:
|
|
1989
|
+
testResults: z33.array(z33.unknown()),
|
|
1990
|
+
tags: z33.array(z33.string()).optional(),
|
|
1991
|
+
feedback: z33.string().optional(),
|
|
1992
|
+
score: z33.number(),
|
|
1993
|
+
suiteId: z33.string().optional()
|
|
1994
|
+
});
|
|
1995
|
+
var LeanEvaluationResultSchema = z33.object({
|
|
1996
|
+
id: z33.string(),
|
|
1997
|
+
runId: z33.string(),
|
|
1998
|
+
timestamp: z33.number(),
|
|
1999
|
+
tags: z33.array(z33.string()).optional(),
|
|
2000
|
+
scenarioId: z33.string(),
|
|
2001
|
+
scenarioVersion: z33.number().optional(),
|
|
2002
|
+
targetId: z33.string(),
|
|
2003
|
+
targetVersion: z33.number().optional(),
|
|
2004
|
+
suiteId: z33.string().optional(),
|
|
2005
|
+
score: z33.number(),
|
|
2006
|
+
time: z33.number().optional(),
|
|
2007
|
+
microcentsSpent: z33.number().optional()
|
|
1897
2008
|
});
|
|
1898
2009
|
|
|
1899
2010
|
// src/evaluation/eval-run-folder.ts
|
|
1900
|
-
import { z as
|
|
2011
|
+
import { z as z34 } from "zod";
|
|
1901
2012
|
var EvalRunFolderSchema = TenantEntitySchema.extend({});
|
|
1902
2013
|
var CreateEvalRunFolderInputSchema = EvalRunFolderSchema.omit({
|
|
1903
2014
|
id: true,
|
|
@@ -1911,26 +2022,26 @@ var UpdateEvalRunFolderInputSchema = EvalRunFolderSchema.omit({
|
|
|
1911
2022
|
updatedAt: true,
|
|
1912
2023
|
deleted: true
|
|
1913
2024
|
}).partial();
|
|
1914
|
-
var EvalRunFolderMembershipSchema =
|
|
1915
|
-
folderId:
|
|
1916
|
-
evalRunId:
|
|
1917
|
-
projectId:
|
|
1918
|
-
createdAt:
|
|
2025
|
+
var EvalRunFolderMembershipSchema = z34.object({
|
|
2026
|
+
folderId: z34.string(),
|
|
2027
|
+
evalRunId: z34.string(),
|
|
2028
|
+
projectId: z34.string(),
|
|
2029
|
+
createdAt: z34.string()
|
|
1919
2030
|
});
|
|
1920
2031
|
|
|
1921
2032
|
// src/project/project.ts
|
|
1922
|
-
import { z as
|
|
2033
|
+
import { z as z35 } from "zod";
|
|
1923
2034
|
var ProjectSchema = BaseEntitySchema.extend({
|
|
1924
|
-
appId:
|
|
1925
|
-
scenarioTags:
|
|
2035
|
+
appId: z35.string().optional().describe("The ID of the app in Dev Center"),
|
|
2036
|
+
scenarioTags: z35.array(z35.string()).optional().describe("Project-level tag vocabulary for scenarios"),
|
|
1926
2037
|
/** Per-project Wix auth token (write-only — never returned in GET responses). null = clear. */
|
|
1927
|
-
wixAuthToken:
|
|
2038
|
+
wixAuthToken: z35.string().nullable().optional().describe("Wix auth token for CLI/MCP authentication (encrypted at rest)"),
|
|
1928
2039
|
/** Per-project Base44 auth file content (write-only — never returned in GET responses). null = clear. */
|
|
1929
|
-
base44AuthFile:
|
|
2040
|
+
base44AuthFile: z35.string().nullable().optional().describe("Base64-encoded Base44 auth file content (encrypted at rest)"),
|
|
1930
2041
|
/** Resolved at runtime from the encrypted Wix auth token */
|
|
1931
|
-
wixAuthEmail:
|
|
2042
|
+
wixAuthEmail: z35.string().optional().describe("Email associated with the Wix auth token (resolved at runtime)"),
|
|
1932
2043
|
/** Resolved at runtime from the encrypted Base44 auth file */
|
|
1933
|
-
base44AuthEmail:
|
|
2044
|
+
base44AuthEmail: z35.string().optional().describe("Email from the Base44 auth file (resolved at runtime)")
|
|
1934
2045
|
});
|
|
1935
2046
|
var CreateProjectInputSchema = ProjectSchema.omit({
|
|
1936
2047
|
id: true,
|
|
@@ -1940,34 +2051,34 @@ var CreateProjectInputSchema = ProjectSchema.omit({
|
|
|
1940
2051
|
wixAuthEmail: true,
|
|
1941
2052
|
base44AuthEmail: true
|
|
1942
2053
|
}).extend({
|
|
1943
|
-
appId:
|
|
2054
|
+
appId: z35.string().describe(
|
|
1944
2055
|
"Required: The ID of the app in Dev Center for credential scoping"
|
|
1945
2056
|
)
|
|
1946
2057
|
});
|
|
1947
2058
|
var UpdateProjectInputSchema = CreateProjectInputSchema.partial();
|
|
1948
2059
|
|
|
1949
2060
|
// src/template/template.ts
|
|
1950
|
-
import { z as
|
|
1951
|
-
var SourceFileSchema =
|
|
1952
|
-
path:
|
|
1953
|
-
content:
|
|
2061
|
+
import { z as z36 } from "zod";
|
|
2062
|
+
var SourceFileSchema = z36.object({
|
|
2063
|
+
path: z36.string().min(1),
|
|
2064
|
+
content: z36.string()
|
|
1954
2065
|
});
|
|
1955
|
-
var ExtraFileSchema =
|
|
1956
|
-
path:
|
|
1957
|
-
content:
|
|
2066
|
+
var ExtraFileSchema = z36.object({
|
|
2067
|
+
path: z36.string().min(1),
|
|
2068
|
+
content: z36.string().optional(),
|
|
1958
2069
|
gitSource: GitHubSourceSchema.optional()
|
|
1959
2070
|
}).refine((ef) => ef.content !== void 0 || ef.gitSource !== void 0, {
|
|
1960
2071
|
message: "ExtraFile must have either content or gitSource"
|
|
1961
2072
|
});
|
|
1962
|
-
var TemplateFileEntrySchema =
|
|
1963
|
-
path:
|
|
1964
|
-
content:
|
|
1965
|
-
extra:
|
|
2073
|
+
var TemplateFileEntrySchema = z36.object({
|
|
2074
|
+
path: z36.string().min(1),
|
|
2075
|
+
content: z36.string(),
|
|
2076
|
+
extra: z36.boolean()
|
|
1966
2077
|
});
|
|
1967
2078
|
var TemplateSchema = TenantEntitySchema.extend({
|
|
1968
2079
|
source: GitHubSourceSchema.optional(),
|
|
1969
|
-
sourceFiles:
|
|
1970
|
-
extraFiles:
|
|
2080
|
+
sourceFiles: z36.array(SourceFileSchema).optional(),
|
|
2081
|
+
extraFiles: z36.array(ExtraFileSchema).optional()
|
|
1971
2082
|
});
|
|
1972
2083
|
var singleSourceKind = (t) => !(t.source && t.sourceFiles?.length);
|
|
1973
2084
|
var singleSourceKindError = {
|
|
@@ -1987,66 +2098,66 @@ var UpdateTemplateInputSchema = TemplateSchema.omit({
|
|
|
1987
2098
|
}).partial().refine(singleSourceKind, singleSourceKindError);
|
|
1988
2099
|
|
|
1989
2100
|
// src/agent/agent-config.ts
|
|
1990
|
-
import { z as
|
|
1991
|
-
var BaseAgentConfigSchema =
|
|
2101
|
+
import { z as z37 } from "zod";
|
|
2102
|
+
var BaseAgentConfigSchema = z37.object({
|
|
1992
2103
|
/** Model ID (Claude or OpenAI). */
|
|
1993
2104
|
model: AnyModelSchema.optional(),
|
|
1994
2105
|
/** Sampling temperature (0–1). */
|
|
1995
|
-
temperature:
|
|
2106
|
+
temperature: z37.number().min(0).max(1).optional(),
|
|
1996
2107
|
/** Max output tokens per turn. */
|
|
1997
|
-
maxTokens:
|
|
2108
|
+
maxTokens: z37.number().int().min(1).optional(),
|
|
1998
2109
|
/** Number of agentic turns. 0 = unlimited. */
|
|
1999
|
-
maxTurns:
|
|
2110
|
+
maxTurns: z37.number().int().min(0).optional(),
|
|
2000
2111
|
/** Execution timeout in milliseconds. Overrides the default maxTurns-based calculation. */
|
|
2001
|
-
maxDurationMs:
|
|
2112
|
+
maxDurationMs: z37.number().int().min(0).optional()
|
|
2002
2113
|
});
|
|
2003
|
-
var EffortLevelSchema =
|
|
2114
|
+
var EffortLevelSchema = z37.enum(["low", "medium", "high", "max"]);
|
|
2004
2115
|
var ClaudeCodeConfigSchema = BaseAgentConfigSchema.extend({
|
|
2005
2116
|
/** Extended thinking token budget. */
|
|
2006
|
-
maxThinkingTokens:
|
|
2117
|
+
maxThinkingTokens: z37.number().int().min(0).optional(),
|
|
2007
2118
|
/** Override the default allowedTools list passed to the SDK. */
|
|
2008
|
-
allowedTools:
|
|
2119
|
+
allowedTools: z37.array(z37.string()).optional(),
|
|
2009
2120
|
/** Tools to remove from the model's context entirely. */
|
|
2010
|
-
disallowedTools:
|
|
2121
|
+
disallowedTools: z37.array(z37.string()).optional(),
|
|
2011
2122
|
/** Controls thinking depth: low, medium, high, max. */
|
|
2012
2123
|
effort: EffortLevelSchema.optional(),
|
|
2013
2124
|
/** Maximum USD spend per run. Stops execution when reached. */
|
|
2014
|
-
maxBudgetUsd:
|
|
2125
|
+
maxBudgetUsd: z37.number().min(0).optional()
|
|
2015
2126
|
});
|
|
2016
|
-
var PermissionValueSchema =
|
|
2017
|
-
var OpenCodePermissionSchema =
|
|
2018
|
-
|
|
2019
|
-
|
|
2127
|
+
var PermissionValueSchema = z37.enum(["allow", "deny"]);
|
|
2128
|
+
var OpenCodePermissionSchema = z37.record(
|
|
2129
|
+
z37.string(),
|
|
2130
|
+
z37.union([PermissionValueSchema, z37.record(z37.string(), PermissionValueSchema)])
|
|
2020
2131
|
);
|
|
2021
|
-
var ThinkingVariantSchema =
|
|
2132
|
+
var ThinkingVariantSchema = z37.enum(["high", "low", "none"]);
|
|
2022
2133
|
var OpenCodeConfigSchema = BaseAgentConfigSchema.extend({
|
|
2023
2134
|
/** Permission overrides (defaults: allow-all). */
|
|
2024
2135
|
permission: OpenCodePermissionSchema.optional(),
|
|
2025
2136
|
/** Maps to `--variant` CLI flag. 'none' omits --thinking entirely. Default: 'high'. */
|
|
2026
2137
|
thinkingVariant: ThinkingVariantSchema.optional(),
|
|
2027
2138
|
/** Nucleus sampling (0–1). Alternative to temperature. */
|
|
2028
|
-
topP:
|
|
2139
|
+
topP: z37.number().min(0).max(1).optional()
|
|
2029
2140
|
}).omit({ maxTokens: true });
|
|
2030
|
-
var ReasoningEffortSchema =
|
|
2141
|
+
var ReasoningEffortSchema = z37.enum(["low", "medium", "high"]);
|
|
2031
2142
|
var SimpleAgentConfigSchema = BaseAgentConfigSchema.extend({
|
|
2032
2143
|
/** Anthropic thinking budget in tokens. Default: 10 000. */
|
|
2033
|
-
thinkingBudgetTokens:
|
|
2144
|
+
thinkingBudgetTokens: z37.number().int().min(0).optional(),
|
|
2034
2145
|
/** Nucleus sampling (0–1). Alternative to temperature. */
|
|
2035
|
-
topP:
|
|
2146
|
+
topP: z37.number().min(0).max(1).optional(),
|
|
2036
2147
|
/** Integer seed for deterministic/reproducible results (if model supports it). */
|
|
2037
|
-
seed:
|
|
2148
|
+
seed: z37.number().int().optional(),
|
|
2038
2149
|
/** Stop sequences — model stops when generating any of these strings. */
|
|
2039
|
-
stopSequences:
|
|
2150
|
+
stopSequences: z37.array(z37.string()).optional(),
|
|
2040
2151
|
/** OpenAI reasoning effort level. Default: 'high'. */
|
|
2041
2152
|
reasoningEffort: ReasoningEffortSchema.optional(),
|
|
2042
2153
|
/** Frequency penalty (−2 to 2). Reduces repetition of same tokens. */
|
|
2043
|
-
frequencyPenalty:
|
|
2154
|
+
frequencyPenalty: z37.number().min(-2).max(2).optional(),
|
|
2044
2155
|
/** Presence penalty (−2 to 2). Encourages topic diversity. */
|
|
2045
|
-
presencePenalty:
|
|
2156
|
+
presencePenalty: z37.number().min(-2).max(2).optional()
|
|
2046
2157
|
});
|
|
2047
2158
|
|
|
2048
2159
|
// src/schedule/eval-schedule.ts
|
|
2049
|
-
import { z as
|
|
2160
|
+
import { z as z38 } from "zod";
|
|
2050
2161
|
var FrequencyType = /* @__PURE__ */ ((FrequencyType2) => {
|
|
2051
2162
|
FrequencyType2["DAILY"] = "daily";
|
|
2052
2163
|
FrequencyType2["WEEKDAY"] = "weekday";
|
|
@@ -2056,31 +2167,31 @@ var FrequencyType = /* @__PURE__ */ ((FrequencyType2) => {
|
|
|
2056
2167
|
})(FrequencyType || {});
|
|
2057
2168
|
var EvalScheduleSchema = TenantEntitySchema.extend({
|
|
2058
2169
|
/** Whether the schedule is active */
|
|
2059
|
-
enabled:
|
|
2170
|
+
enabled: z38.boolean(),
|
|
2060
2171
|
/** Test suite to run */
|
|
2061
|
-
suiteId:
|
|
2172
|
+
suiteId: z38.string(),
|
|
2062
2173
|
/** Preset that provides agent + entities for this schedule */
|
|
2063
|
-
presetId:
|
|
2174
|
+
presetId: z38.string(),
|
|
2064
2175
|
/** How often to run */
|
|
2065
|
-
frequencyType:
|
|
2176
|
+
frequencyType: z38.nativeEnum(FrequencyType),
|
|
2066
2177
|
/** Time of day in 24h format (HH:MM), hours 00-23, minutes 00-59 */
|
|
2067
|
-
timeOfDay:
|
|
2178
|
+
timeOfDay: z38.string().regex(/^([01]\d|2[0-3]):[0-5]\d$/),
|
|
2068
2179
|
/** Day of week (0=Sun, 6=Sat) for weekly schedules */
|
|
2069
|
-
dayOfWeek:
|
|
2180
|
+
dayOfWeek: z38.number().min(0).max(6).optional(),
|
|
2070
2181
|
/** Day of month (1-31) for monthly schedules */
|
|
2071
|
-
dayOfMonth:
|
|
2182
|
+
dayOfMonth: z38.number().min(1).max(31).optional(),
|
|
2072
2183
|
/** IANA timezone (e.g., 'America/New_York') */
|
|
2073
|
-
timezone:
|
|
2184
|
+
timezone: z38.string(),
|
|
2074
2185
|
/** ID of the last eval run created by this schedule */
|
|
2075
|
-
lastRunId:
|
|
2186
|
+
lastRunId: z38.string().optional(),
|
|
2076
2187
|
/** Denormalized status of the last run */
|
|
2077
|
-
lastRunStatus:
|
|
2188
|
+
lastRunStatus: z38.string().optional(),
|
|
2078
2189
|
/** ISO timestamp of the last run */
|
|
2079
|
-
lastRunAt:
|
|
2190
|
+
lastRunAt: z38.string().optional(),
|
|
2080
2191
|
/** Next scheduled run time in UTC (pre-computed for efficient querying, set by backend) */
|
|
2081
|
-
nextRunAt:
|
|
2192
|
+
nextRunAt: z38.string().optional(),
|
|
2082
2193
|
/** Per-scenario variable values forwarded to runs triggered by this schedule (scenarioId → varName → value) */
|
|
2083
|
-
variables:
|
|
2194
|
+
variables: z38.record(z38.string(), z38.record(z38.string(), z38.string())).optional()
|
|
2084
2195
|
});
|
|
2085
2196
|
function isValidTimezone(tz) {
|
|
2086
2197
|
try {
|
|
@@ -2093,14 +2204,14 @@ function isValidTimezone(tz) {
|
|
|
2093
2204
|
function validateScheduleFields(data, ctx, options) {
|
|
2094
2205
|
if (data.frequencyType === "weekly" /* WEEKLY */ && data.dayOfWeek == null) {
|
|
2095
2206
|
ctx.addIssue({
|
|
2096
|
-
code:
|
|
2207
|
+
code: z38.ZodIssueCode.custom,
|
|
2097
2208
|
message: "dayOfWeek is required for weekly schedules",
|
|
2098
2209
|
path: ["dayOfWeek"]
|
|
2099
2210
|
});
|
|
2100
2211
|
}
|
|
2101
2212
|
if (data.frequencyType === "monthly" /* MONTHLY */ && data.dayOfMonth == null) {
|
|
2102
2213
|
ctx.addIssue({
|
|
2103
|
-
code:
|
|
2214
|
+
code: z38.ZodIssueCode.custom,
|
|
2104
2215
|
message: "dayOfMonth is required for monthly schedules",
|
|
2105
2216
|
path: ["dayOfMonth"]
|
|
2106
2217
|
});
|
|
@@ -2108,7 +2219,7 @@ function validateScheduleFields(data, ctx, options) {
|
|
|
2108
2219
|
const shouldValidateTz = options.partial ? data.timezone !== void 0 : true;
|
|
2109
2220
|
if (shouldValidateTz && !isValidTimezone(data.timezone)) {
|
|
2110
2221
|
ctx.addIssue({
|
|
2111
|
-
code:
|
|
2222
|
+
code: z38.ZodIssueCode.custom,
|
|
2112
2223
|
message: "Invalid IANA timezone",
|
|
2113
2224
|
path: ["timezone"]
|
|
2114
2225
|
});
|
|
@@ -2271,6 +2382,7 @@ export {
|
|
|
2271
2382
|
PresetSchema,
|
|
2272
2383
|
ProjectSchema,
|
|
2273
2384
|
PromptResultSchema,
|
|
2385
|
+
ProvisionedSiteSchema,
|
|
2274
2386
|
RUN_COMMAND_LABELS,
|
|
2275
2387
|
ReasoningEffortSchema,
|
|
2276
2388
|
RuleSchema,
|
|
@@ -2278,13 +2390,20 @@ export {
|
|
|
2278
2390
|
RunAnalysisFindingSchema,
|
|
2279
2391
|
RunAnalysisSchema,
|
|
2280
2392
|
SEMVER_REGEX,
|
|
2393
|
+
SITE_SETUP_EXCLUSIVE_VARIABLES,
|
|
2281
2394
|
SKILL_FOLDER_NAME_REGEX,
|
|
2282
2395
|
SYSTEM_ASSERTIONS,
|
|
2283
2396
|
SYSTEM_ASSERTION_IDS,
|
|
2284
2397
|
ScenarioAssertionLinkSchema,
|
|
2285
2398
|
ScenarioConversationSchema,
|
|
2286
2399
|
SimpleAgentConfigSchema,
|
|
2400
|
+
SiteBootstrapHttpMethodSchema,
|
|
2401
|
+
SiteBootstrapResultSchema,
|
|
2402
|
+
SiteBootstrapSchema,
|
|
2403
|
+
SiteBootstrapStepResultSchema,
|
|
2404
|
+
SiteBootstrapStepSchema,
|
|
2287
2405
|
SiteConfigTestSchema,
|
|
2406
|
+
SiteSetupConfigSchema,
|
|
2288
2407
|
SkillFileSchema,
|
|
2289
2408
|
SkillMetadataSchema,
|
|
2290
2409
|
SkillSchema,
|
|
@@ -2338,6 +2457,7 @@ export {
|
|
|
2338
2457
|
UpdateTestScenarioInputSchema,
|
|
2339
2458
|
UpdateTestSuiteInputSchema,
|
|
2340
2459
|
VitestTestSchema,
|
|
2460
|
+
WixSiteSummarySchema,
|
|
2341
2461
|
capabilityToMcp,
|
|
2342
2462
|
capabilityToRule,
|
|
2343
2463
|
capabilityToSkill,
|
|
@@ -2345,6 +2465,7 @@ export {
|
|
|
2345
2465
|
capabilityToSubAgent,
|
|
2346
2466
|
capabilityVersionToSkillVersion,
|
|
2347
2467
|
classifyAssertionRef,
|
|
2468
|
+
extractVariableNamesFromPrompt,
|
|
2348
2469
|
formatTraceEventLine,
|
|
2349
2470
|
getSystemAssertion,
|
|
2350
2471
|
getSystemAssertions,
|
|
@@ -2357,7 +2478,10 @@ export {
|
|
|
2357
2478
|
normalizeModelId,
|
|
2358
2479
|
parseBuildCommandToArgv,
|
|
2359
2480
|
parseTraceEventLine,
|
|
2481
|
+
promptUsesSiteSetupExclusiveVariables,
|
|
2482
|
+
resolveWixOriginTemplateId,
|
|
2360
2483
|
validateAssertionConfig,
|
|
2361
|
-
validateBuildPassedParamsInAssertionLinks
|
|
2484
|
+
validateBuildPassedParamsInAssertionLinks,
|
|
2485
|
+
validateSiteSetupExclusivity
|
|
2362
2486
|
};
|
|
2363
2487
|
//# sourceMappingURL=index.mjs.map
|