@wix/evalforge-types 0.37.0 → 0.38.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -1
- package/build/index.js +439 -413
- package/build/index.js.map +4 -4
- package/build/index.mjs +435 -413
- package/build/index.mjs.map +4 -4
- package/build/types/agent/adapter.d.ts +3 -0
- package/build/types/common/index.d.ts +1 -0
- package/build/types/common/rule.d.ts +47 -0
- package/build/types/evaluation/eval-run.d.ts +2 -0
- package/package.json +2 -2
package/build/index.js
CHANGED
|
@@ -944,6 +944,7 @@ __export(index_exports, {
|
|
|
944
944
|
CreateEvalRunInputSchema: () => CreateEvalRunInputSchema,
|
|
945
945
|
CreateMcpInputSchema: () => CreateMcpInputSchema,
|
|
946
946
|
CreateProjectInputSchema: () => CreateProjectInputSchema,
|
|
947
|
+
CreateRuleInputSchema: () => CreateRuleInputSchema,
|
|
947
948
|
CreateSkillInputSchema: () => CreateSkillInputSchema,
|
|
948
949
|
CreateSkillVersionInputSchema: () => CreateSkillVersionInputSchema,
|
|
949
950
|
CreateSkillsGroupInputSchema: () => CreateSkillsGroupInputSchema,
|
|
@@ -998,6 +999,8 @@ __export(index_exports, {
|
|
|
998
999
|
ProjectSchema: () => ProjectSchema,
|
|
999
1000
|
PromptResultSchema: () => PromptResultSchema,
|
|
1000
1001
|
RUN_COMMAND_LABELS: () => RUN_COMMAND_LABELS,
|
|
1002
|
+
RuleSchema: () => RuleSchema,
|
|
1003
|
+
RuleTypeSchema: () => RuleTypeSchema,
|
|
1001
1004
|
SEMVER_REGEX: () => SEMVER_REGEX,
|
|
1002
1005
|
SKILL_FOLDER_NAME_REGEX: () => SKILL_FOLDER_NAME_REGEX,
|
|
1003
1006
|
SYSTEM_ASSERTIONS: () => SYSTEM_ASSERTIONS,
|
|
@@ -1038,6 +1041,7 @@ __export(index_exports, {
|
|
|
1038
1041
|
UpdateCustomAssertionInputSchema: () => UpdateCustomAssertionInputSchema,
|
|
1039
1042
|
UpdateMcpInputSchema: () => UpdateMcpInputSchema,
|
|
1040
1043
|
UpdateProjectInputSchema: () => UpdateProjectInputSchema,
|
|
1044
|
+
UpdateRuleInputSchema: () => UpdateRuleInputSchema,
|
|
1041
1045
|
UpdateSkillInputSchema: () => UpdateSkillInputSchema,
|
|
1042
1046
|
UpdateSkillsGroupInputSchema: () => UpdateSkillsGroupInputSchema,
|
|
1043
1047
|
UpdateSubAgentInputSchema: () => UpdateSubAgentInputSchema,
|
|
@@ -1128,6 +1132,22 @@ var ModelConfigSchema = import_zod4.z.object({
|
|
|
1128
1132
|
maxTokens: import_zod4.z.preprocess(nullToUndefined, import_zod4.z.number().min(1).optional())
|
|
1129
1133
|
});
|
|
1130
1134
|
|
|
1135
|
+
// src/common/rule.ts
|
|
1136
|
+
var import_zod5 = require("zod");
|
|
1137
|
+
var RuleTypeSchema = import_zod5.z.enum(["claude-md", "agents-md", "cursor-rule"]);
|
|
1138
|
+
var RuleSchema = TenantEntitySchema.extend({
|
|
1139
|
+
ruleType: RuleTypeSchema,
|
|
1140
|
+
content: import_zod5.z.string()
|
|
1141
|
+
});
|
|
1142
|
+
var RuleInputBaseSchema = RuleSchema.omit({
|
|
1143
|
+
id: true,
|
|
1144
|
+
createdAt: true,
|
|
1145
|
+
updatedAt: true,
|
|
1146
|
+
deleted: true
|
|
1147
|
+
});
|
|
1148
|
+
var CreateRuleInputSchema = RuleInputBaseSchema;
|
|
1149
|
+
var UpdateRuleInputSchema = RuleInputBaseSchema.partial();
|
|
1150
|
+
|
|
1131
1151
|
// src/target/target.ts
|
|
1132
1152
|
var TargetSchema = TenantEntitySchema.extend({
|
|
1133
1153
|
// Base for all testable entities
|
|
@@ -1135,7 +1155,7 @@ var TargetSchema = TenantEntitySchema.extend({
|
|
|
1135
1155
|
});
|
|
1136
1156
|
|
|
1137
1157
|
// src/target/agent.ts
|
|
1138
|
-
var
|
|
1158
|
+
var import_zod6 = require("zod");
|
|
1139
1159
|
var AgentRunCommand = /* @__PURE__ */ ((AgentRunCommand2) => {
|
|
1140
1160
|
AgentRunCommand2["CLAUDE"] = "claude";
|
|
1141
1161
|
return AgentRunCommand2;
|
|
@@ -1144,7 +1164,7 @@ var AVAILABLE_RUN_COMMANDS = Object.values(AgentRunCommand);
|
|
|
1144
1164
|
var RUN_COMMAND_LABELS = {
|
|
1145
1165
|
["claude" /* CLAUDE */]: "Claude Code"
|
|
1146
1166
|
};
|
|
1147
|
-
var AgentRunCommandSchema =
|
|
1167
|
+
var AgentRunCommandSchema = import_zod6.z.nativeEnum(AgentRunCommand);
|
|
1148
1168
|
var AgentSchema = TargetSchema.extend({
|
|
1149
1169
|
/** Command to run the agent */
|
|
1150
1170
|
runCommand: AgentRunCommandSchema,
|
|
@@ -1162,51 +1182,51 @@ var UpdateAgentInputSchema = CreateAgentInputSchema.partial().extend({
|
|
|
1162
1182
|
});
|
|
1163
1183
|
|
|
1164
1184
|
// src/target/skill.ts
|
|
1165
|
-
var
|
|
1185
|
+
var import_zod7 = require("zod");
|
|
1166
1186
|
var SKILL_FOLDER_NAME_REGEX = /^[a-z0-9]+(-[a-z0-9]+)*$/;
|
|
1167
1187
|
var SEMVER_REGEX = /^\d+\.\d+\.\d+$/;
|
|
1168
|
-
var SkillVersionOriginSchema =
|
|
1188
|
+
var SkillVersionOriginSchema = import_zod7.z.enum(["manual", "pr", "master"]);
|
|
1169
1189
|
function isValidSkillFolderName(name) {
|
|
1170
1190
|
return typeof name === "string" && name.length > 0 && SKILL_FOLDER_NAME_REGEX.test(name.trim());
|
|
1171
1191
|
}
|
|
1172
|
-
var SkillMetadataSchema =
|
|
1173
|
-
name:
|
|
1174
|
-
description:
|
|
1175
|
-
allowedTools:
|
|
1176
|
-
skills:
|
|
1192
|
+
var SkillMetadataSchema = import_zod7.z.object({
|
|
1193
|
+
name: import_zod7.z.string(),
|
|
1194
|
+
description: import_zod7.z.string(),
|
|
1195
|
+
allowedTools: import_zod7.z.array(import_zod7.z.string()).optional(),
|
|
1196
|
+
skills: import_zod7.z.array(import_zod7.z.string()).optional()
|
|
1177
1197
|
});
|
|
1178
|
-
var SkillFileSchema =
|
|
1198
|
+
var SkillFileSchema = import_zod7.z.object({
|
|
1179
1199
|
/** Relative path within the skill directory, e.g. "SKILL.md" or "references/API_SPEC.md" */
|
|
1180
|
-
path:
|
|
1200
|
+
path: import_zod7.z.string().min(1),
|
|
1181
1201
|
/** File content (UTF-8 text) */
|
|
1182
|
-
content:
|
|
1202
|
+
content: import_zod7.z.string()
|
|
1183
1203
|
});
|
|
1184
|
-
var SkillVersionSchema =
|
|
1185
|
-
id:
|
|
1186
|
-
projectId:
|
|
1187
|
-
skillId:
|
|
1204
|
+
var SkillVersionSchema = import_zod7.z.object({
|
|
1205
|
+
id: import_zod7.z.string(),
|
|
1206
|
+
projectId: import_zod7.z.string(),
|
|
1207
|
+
skillId: import_zod7.z.string(),
|
|
1188
1208
|
/** Semver string (e.g. "1.2.0") or Falcon fingerprint */
|
|
1189
|
-
version:
|
|
1209
|
+
version: import_zod7.z.string(),
|
|
1190
1210
|
/** How this version was created */
|
|
1191
1211
|
origin: SkillVersionOriginSchema,
|
|
1192
1212
|
/** Where this snapshot was taken from */
|
|
1193
1213
|
source: GitHubSourceSchema.optional(),
|
|
1194
1214
|
/** Frozen snapshot of all files in the skill directory */
|
|
1195
|
-
files:
|
|
1215
|
+
files: import_zod7.z.array(SkillFileSchema).optional(),
|
|
1196
1216
|
/** Optional notes about this version (changelog, reason for change) */
|
|
1197
|
-
notes:
|
|
1198
|
-
createdAt:
|
|
1217
|
+
notes: import_zod7.z.string().optional(),
|
|
1218
|
+
createdAt: import_zod7.z.string()
|
|
1199
1219
|
});
|
|
1200
|
-
var CreateSkillVersionInputSchema =
|
|
1220
|
+
var CreateSkillVersionInputSchema = import_zod7.z.object({
|
|
1201
1221
|
/** GitHub source to snapshot from. If not provided, uses the Skill's source. */
|
|
1202
1222
|
source: GitHubSourceSchema.optional(),
|
|
1203
1223
|
/** Version string for this snapshot (e.g. "1.0.0", "1.0.3"). */
|
|
1204
|
-
version:
|
|
1205
|
-
notes:
|
|
1224
|
+
version: import_zod7.z.string().min(1),
|
|
1225
|
+
notes: import_zod7.z.string().optional(),
|
|
1206
1226
|
/** Origin of this version. Defaults to 'manual' in backend. */
|
|
1207
1227
|
origin: SkillVersionOriginSchema.optional(),
|
|
1208
1228
|
/** Pre-edited files to store directly (bypasses GitHub fetch when provided) */
|
|
1209
|
-
files:
|
|
1229
|
+
files: import_zod7.z.array(SkillFileSchema).optional()
|
|
1210
1230
|
});
|
|
1211
1231
|
var SkillSchema = TargetSchema.extend({
|
|
1212
1232
|
/** GitHub source reference for live content fetching */
|
|
@@ -1222,15 +1242,15 @@ var SkillInputBaseSchema = SkillSchema.omit({
|
|
|
1222
1242
|
source: true
|
|
1223
1243
|
}).extend({
|
|
1224
1244
|
/** Optional - not stored on Skill; content description lives in SkillVersion */
|
|
1225
|
-
description:
|
|
1245
|
+
description: import_zod7.z.string().optional(),
|
|
1226
1246
|
/** GitHub source reference for live content fetching */
|
|
1227
1247
|
source: GitHubSourceSchema.optional()
|
|
1228
1248
|
});
|
|
1229
|
-
var InitialVersionInputSchema =
|
|
1230
|
-
files:
|
|
1231
|
-
notes:
|
|
1249
|
+
var InitialVersionInputSchema = import_zod7.z.object({
|
|
1250
|
+
files: import_zod7.z.array(SkillFileSchema).optional(),
|
|
1251
|
+
notes: import_zod7.z.string().optional(),
|
|
1232
1252
|
source: GitHubSourceSchema.optional(),
|
|
1233
|
-
version:
|
|
1253
|
+
version: import_zod7.z.string().optional(),
|
|
1234
1254
|
origin: SkillVersionOriginSchema.optional()
|
|
1235
1255
|
});
|
|
1236
1256
|
var CreateSkillInputSchema = SkillInputBaseSchema.extend({
|
|
@@ -1248,10 +1268,10 @@ var SkillWithLatestVersionSchema = SkillSchema.extend({
|
|
|
1248
1268
|
});
|
|
1249
1269
|
|
|
1250
1270
|
// src/target/skills-group.ts
|
|
1251
|
-
var
|
|
1271
|
+
var import_zod8 = require("zod");
|
|
1252
1272
|
var SkillsGroupSchema = TenantEntitySchema.extend({
|
|
1253
1273
|
/** IDs of skills in this group */
|
|
1254
|
-
skillIds:
|
|
1274
|
+
skillIds: import_zod8.z.array(import_zod8.z.string())
|
|
1255
1275
|
});
|
|
1256
1276
|
var CreateSkillsGroupInputSchema = SkillsGroupSchema.omit({
|
|
1257
1277
|
id: true,
|
|
@@ -1262,10 +1282,10 @@ var CreateSkillsGroupInputSchema = SkillsGroupSchema.omit({
|
|
|
1262
1282
|
var UpdateSkillsGroupInputSchema = CreateSkillsGroupInputSchema.partial();
|
|
1263
1283
|
|
|
1264
1284
|
// src/target/sub-agent.ts
|
|
1265
|
-
var
|
|
1285
|
+
var import_zod9 = require("zod");
|
|
1266
1286
|
var SubAgentSchema = TargetSchema.extend({
|
|
1267
1287
|
/** The full sub-agent markdown content (YAML frontmatter + body) */
|
|
1268
|
-
subAgentMd:
|
|
1288
|
+
subAgentMd: import_zod9.z.string()
|
|
1269
1289
|
});
|
|
1270
1290
|
var SubAgentInputBaseSchema = SubAgentSchema.omit({
|
|
1271
1291
|
id: true,
|
|
@@ -1277,10 +1297,10 @@ var CreateSubAgentInputSchema = SubAgentInputBaseSchema;
|
|
|
1277
1297
|
var UpdateSubAgentInputSchema = SubAgentInputBaseSchema.partial();
|
|
1278
1298
|
|
|
1279
1299
|
// src/test/index.ts
|
|
1280
|
-
var
|
|
1300
|
+
var import_zod20 = require("zod");
|
|
1281
1301
|
|
|
1282
1302
|
// src/test/base.ts
|
|
1283
|
-
var
|
|
1303
|
+
var import_zod10 = require("zod");
|
|
1284
1304
|
var TestType = /* @__PURE__ */ ((TestType2) => {
|
|
1285
1305
|
TestType2["LLM"] = "LLM";
|
|
1286
1306
|
TestType2["TOOL"] = "TOOL";
|
|
@@ -1293,7 +1313,7 @@ var TestType = /* @__PURE__ */ ((TestType2) => {
|
|
|
1293
1313
|
TestType2["PLAYWRIGHT_NL"] = "PLAYWRIGHT_NL";
|
|
1294
1314
|
return TestType2;
|
|
1295
1315
|
})(TestType || {});
|
|
1296
|
-
var TestTypeSchema =
|
|
1316
|
+
var TestTypeSchema = import_zod10.z.enum(TestType);
|
|
1297
1317
|
var TestImportance = /* @__PURE__ */ ((TestImportance2) => {
|
|
1298
1318
|
TestImportance2["LOW"] = "low";
|
|
1299
1319
|
TestImportance2["MEDIUM"] = "medium";
|
|
@@ -1301,153 +1321,153 @@ var TestImportance = /* @__PURE__ */ ((TestImportance2) => {
|
|
|
1301
1321
|
TestImportance2["CRITICAL"] = "critical";
|
|
1302
1322
|
return TestImportance2;
|
|
1303
1323
|
})(TestImportance || {});
|
|
1304
|
-
var TestImportanceSchema =
|
|
1305
|
-
var BaseTestSchema =
|
|
1306
|
-
id:
|
|
1324
|
+
var TestImportanceSchema = import_zod10.z.enum(TestImportance);
|
|
1325
|
+
var BaseTestSchema = import_zod10.z.object({
|
|
1326
|
+
id: import_zod10.z.string(),
|
|
1307
1327
|
type: TestTypeSchema,
|
|
1308
|
-
name:
|
|
1309
|
-
description:
|
|
1328
|
+
name: import_zod10.z.string().min(3),
|
|
1329
|
+
description: import_zod10.z.string().optional(),
|
|
1310
1330
|
importance: TestImportanceSchema.optional()
|
|
1311
1331
|
});
|
|
1312
1332
|
|
|
1313
1333
|
// src/test/llm.ts
|
|
1314
|
-
var
|
|
1334
|
+
var import_zod11 = require("zod");
|
|
1315
1335
|
var LLMTestSchema = BaseTestSchema.extend({
|
|
1316
|
-
type:
|
|
1336
|
+
type: import_zod11.z.literal("LLM" /* LLM */),
|
|
1317
1337
|
/** Maximum steps for the LLM to take */
|
|
1318
|
-
maxSteps:
|
|
1338
|
+
maxSteps: import_zod11.z.number().min(1).max(100),
|
|
1319
1339
|
/** Prompt to send to the evaluator */
|
|
1320
|
-
prompt:
|
|
1340
|
+
prompt: import_zod11.z.string().min(1),
|
|
1321
1341
|
/** ID of the evaluator agent to use */
|
|
1322
|
-
evaluatorId:
|
|
1342
|
+
evaluatorId: import_zod11.z.string()
|
|
1323
1343
|
});
|
|
1324
1344
|
|
|
1325
1345
|
// src/test/tool.ts
|
|
1326
|
-
var
|
|
1346
|
+
var import_zod12 = require("zod");
|
|
1327
1347
|
var ToolTestSchema = BaseTestSchema.extend({
|
|
1328
|
-
type:
|
|
1348
|
+
type: import_zod12.z.literal("TOOL" /* TOOL */),
|
|
1329
1349
|
/** Name of the tool that should be called */
|
|
1330
|
-
toolName:
|
|
1350
|
+
toolName: import_zod12.z.string().min(3),
|
|
1331
1351
|
/** Expected arguments for the tool call */
|
|
1332
|
-
args:
|
|
1352
|
+
args: import_zod12.z.record(import_zod12.z.string(), import_zod12.z.any()),
|
|
1333
1353
|
/** Expected content in the tool results */
|
|
1334
|
-
resultsContent:
|
|
1354
|
+
resultsContent: import_zod12.z.string()
|
|
1335
1355
|
});
|
|
1336
1356
|
|
|
1337
1357
|
// src/test/site-config.ts
|
|
1338
|
-
var
|
|
1358
|
+
var import_zod13 = require("zod");
|
|
1339
1359
|
var SiteConfigTestSchema = BaseTestSchema.extend({
|
|
1340
|
-
type:
|
|
1360
|
+
type: import_zod13.z.literal("SITE_CONFIG" /* SITE_CONFIG */),
|
|
1341
1361
|
/** URL to call */
|
|
1342
|
-
url:
|
|
1362
|
+
url: import_zod13.z.string().url(),
|
|
1343
1363
|
/** HTTP method */
|
|
1344
|
-
method:
|
|
1364
|
+
method: import_zod13.z.enum(["GET", "POST"]),
|
|
1345
1365
|
/** Request body (for POST) */
|
|
1346
|
-
body:
|
|
1366
|
+
body: import_zod13.z.string().optional(),
|
|
1347
1367
|
/** Expected HTTP status code */
|
|
1348
|
-
expectedStatusCode:
|
|
1368
|
+
expectedStatusCode: import_zod13.z.number().int().min(100).max(599),
|
|
1349
1369
|
/** Expected response content */
|
|
1350
|
-
expectedResponse:
|
|
1370
|
+
expectedResponse: import_zod13.z.string().optional(),
|
|
1351
1371
|
/** JMESPath expression to extract from response */
|
|
1352
|
-
expectedResponseJMESPath:
|
|
1372
|
+
expectedResponseJMESPath: import_zod13.z.string().optional()
|
|
1353
1373
|
});
|
|
1354
1374
|
|
|
1355
1375
|
// src/test/command-execution.ts
|
|
1356
|
-
var
|
|
1376
|
+
var import_zod14 = require("zod");
|
|
1357
1377
|
var AllowedCommands = [
|
|
1358
1378
|
"yarn install --no-immutable && yarn build",
|
|
1359
1379
|
"npm run build",
|
|
1360
1380
|
"yarn typecheck"
|
|
1361
1381
|
];
|
|
1362
1382
|
var CommandExecutionTestSchema = BaseTestSchema.extend({
|
|
1363
|
-
type:
|
|
1383
|
+
type: import_zod14.z.literal("COMMAND_EXECUTION" /* COMMAND_EXECUTION */),
|
|
1364
1384
|
/** Command to execute (must be in AllowedCommands) */
|
|
1365
|
-
command:
|
|
1385
|
+
command: import_zod14.z.string().refine((value) => AllowedCommands.includes(value), {
|
|
1366
1386
|
message: `Command must be one of: ${AllowedCommands.join(", ")}`
|
|
1367
1387
|
}),
|
|
1368
1388
|
/** Expected exit code (default: 0) */
|
|
1369
|
-
expectedExitCode:
|
|
1389
|
+
expectedExitCode: import_zod14.z.number().default(0).optional()
|
|
1370
1390
|
});
|
|
1371
1391
|
|
|
1372
1392
|
// src/test/file-presence.ts
|
|
1373
|
-
var
|
|
1393
|
+
var import_zod15 = require("zod");
|
|
1374
1394
|
var FilePresenceTestSchema = BaseTestSchema.extend({
|
|
1375
|
-
type:
|
|
1395
|
+
type: import_zod15.z.literal("FILE_PRESENCE" /* FILE_PRESENCE */),
|
|
1376
1396
|
/** Paths to check */
|
|
1377
|
-
paths:
|
|
1397
|
+
paths: import_zod15.z.array(import_zod15.z.string()),
|
|
1378
1398
|
/** Whether files should exist (true) or not exist (false) */
|
|
1379
|
-
shouldExist:
|
|
1399
|
+
shouldExist: import_zod15.z.boolean()
|
|
1380
1400
|
});
|
|
1381
1401
|
|
|
1382
1402
|
// src/test/file-content.ts
|
|
1383
|
-
var
|
|
1384
|
-
var FileContentCheckSchema =
|
|
1403
|
+
var import_zod16 = require("zod");
|
|
1404
|
+
var FileContentCheckSchema = import_zod16.z.object({
|
|
1385
1405
|
/** Strings that must be present in the file */
|
|
1386
|
-
contains:
|
|
1406
|
+
contains: import_zod16.z.array(import_zod16.z.string()).optional(),
|
|
1387
1407
|
/** Strings that must NOT be present in the file */
|
|
1388
|
-
notContains:
|
|
1408
|
+
notContains: import_zod16.z.array(import_zod16.z.string()).optional(),
|
|
1389
1409
|
/** Regex pattern the content must match */
|
|
1390
|
-
matches:
|
|
1410
|
+
matches: import_zod16.z.string().optional(),
|
|
1391
1411
|
/** JSON path checks for structured content */
|
|
1392
|
-
jsonPath:
|
|
1393
|
-
|
|
1394
|
-
path:
|
|
1395
|
-
value:
|
|
1412
|
+
jsonPath: import_zod16.z.array(
|
|
1413
|
+
import_zod16.z.object({
|
|
1414
|
+
path: import_zod16.z.string(),
|
|
1415
|
+
value: import_zod16.z.unknown()
|
|
1396
1416
|
})
|
|
1397
1417
|
).optional(),
|
|
1398
1418
|
/** Lines that should be added (for diff checking) */
|
|
1399
|
-
added:
|
|
1419
|
+
added: import_zod16.z.array(import_zod16.z.string()).optional(),
|
|
1400
1420
|
/** Lines that should be removed (for diff checking) */
|
|
1401
|
-
removed:
|
|
1421
|
+
removed: import_zod16.z.array(import_zod16.z.string()).optional()
|
|
1402
1422
|
});
|
|
1403
1423
|
var FileContentTestSchema = BaseTestSchema.extend({
|
|
1404
|
-
type:
|
|
1424
|
+
type: import_zod16.z.literal("FILE_CONTENT" /* FILE_CONTENT */),
|
|
1405
1425
|
/** Path to the file to check */
|
|
1406
|
-
path:
|
|
1426
|
+
path: import_zod16.z.string(),
|
|
1407
1427
|
/** Content checks to perform */
|
|
1408
1428
|
checks: FileContentCheckSchema
|
|
1409
1429
|
});
|
|
1410
1430
|
|
|
1411
1431
|
// src/test/build-check.ts
|
|
1412
|
-
var
|
|
1432
|
+
var import_zod17 = require("zod");
|
|
1413
1433
|
var BuildCheckTestSchema = BaseTestSchema.extend({
|
|
1414
|
-
type:
|
|
1434
|
+
type: import_zod17.z.literal("BUILD_CHECK" /* BUILD_CHECK */),
|
|
1415
1435
|
/** Build command to execute */
|
|
1416
|
-
command:
|
|
1436
|
+
command: import_zod17.z.string(),
|
|
1417
1437
|
/** Whether the build should succeed */
|
|
1418
|
-
expectSuccess:
|
|
1438
|
+
expectSuccess: import_zod17.z.boolean(),
|
|
1419
1439
|
/** Maximum allowed warnings (optional) */
|
|
1420
|
-
allowedWarnings:
|
|
1440
|
+
allowedWarnings: import_zod17.z.number().optional(),
|
|
1421
1441
|
/** Timeout in milliseconds */
|
|
1422
|
-
timeout:
|
|
1442
|
+
timeout: import_zod17.z.number().optional()
|
|
1423
1443
|
});
|
|
1424
1444
|
|
|
1425
1445
|
// src/test/vitest.ts
|
|
1426
|
-
var
|
|
1446
|
+
var import_zod18 = require("zod");
|
|
1427
1447
|
var VitestTestSchema = BaseTestSchema.extend({
|
|
1428
|
-
type:
|
|
1448
|
+
type: import_zod18.z.literal("VITEST" /* VITEST */),
|
|
1429
1449
|
/** Test file content */
|
|
1430
|
-
testFile:
|
|
1450
|
+
testFile: import_zod18.z.string(),
|
|
1431
1451
|
/** Name of the test file */
|
|
1432
|
-
testFileName:
|
|
1452
|
+
testFileName: import_zod18.z.string(),
|
|
1433
1453
|
/** Minimum pass rate required (0-100) */
|
|
1434
|
-
minPassRate:
|
|
1454
|
+
minPassRate: import_zod18.z.number().min(0).max(100)
|
|
1435
1455
|
});
|
|
1436
1456
|
|
|
1437
1457
|
// src/test/playwright-nl.ts
|
|
1438
|
-
var
|
|
1458
|
+
var import_zod19 = require("zod");
|
|
1439
1459
|
var PlaywrightNLTestSchema = BaseTestSchema.extend({
|
|
1440
|
-
type:
|
|
1460
|
+
type: import_zod19.z.literal("PLAYWRIGHT_NL" /* PLAYWRIGHT_NL */),
|
|
1441
1461
|
/** Natural language steps to execute */
|
|
1442
|
-
steps:
|
|
1462
|
+
steps: import_zod19.z.array(import_zod19.z.string()),
|
|
1443
1463
|
/** Expected outcome description */
|
|
1444
|
-
expectedOutcome:
|
|
1464
|
+
expectedOutcome: import_zod19.z.string(),
|
|
1445
1465
|
/** Timeout in milliseconds */
|
|
1446
|
-
timeout:
|
|
1466
|
+
timeout: import_zod19.z.number().optional()
|
|
1447
1467
|
});
|
|
1448
1468
|
|
|
1449
1469
|
// src/test/index.ts
|
|
1450
|
-
var TestSchema =
|
|
1470
|
+
var TestSchema = import_zod20.z.discriminatedUnion("type", [
|
|
1451
1471
|
LLMTestSchema,
|
|
1452
1472
|
ToolTestSchema,
|
|
1453
1473
|
SiteConfigTestSchema,
|
|
@@ -1460,43 +1480,43 @@ var TestSchema = import_zod19.z.discriminatedUnion("type", [
|
|
|
1460
1480
|
]);
|
|
1461
1481
|
|
|
1462
1482
|
// src/scenario/assertions.ts
|
|
1463
|
-
var
|
|
1464
|
-
var SkillWasCalledAssertionSchema =
|
|
1465
|
-
type:
|
|
1483
|
+
var import_zod21 = require("zod");
|
|
1484
|
+
var SkillWasCalledAssertionSchema = import_zod21.z.object({
|
|
1485
|
+
type: import_zod21.z.literal("skill_was_called"),
|
|
1466
1486
|
/** Names of the skills that must have been called (matched against trace Skill tool args) */
|
|
1467
|
-
skillNames:
|
|
1487
|
+
skillNames: import_zod21.z.array(import_zod21.z.string().min(1)).min(1)
|
|
1468
1488
|
});
|
|
1469
|
-
var BuildPassedAssertionSchema =
|
|
1470
|
-
type:
|
|
1489
|
+
var BuildPassedAssertionSchema = import_zod21.z.object({
|
|
1490
|
+
type: import_zod21.z.literal("build_passed"),
|
|
1471
1491
|
/** Command to run (default: "yarn build") */
|
|
1472
|
-
command:
|
|
1492
|
+
command: import_zod21.z.string().optional(),
|
|
1473
1493
|
/** Expected exit code (default: 0) */
|
|
1474
|
-
expectedExitCode:
|
|
1494
|
+
expectedExitCode: import_zod21.z.number().int().optional()
|
|
1475
1495
|
});
|
|
1476
|
-
var CostAssertionSchema =
|
|
1477
|
-
type:
|
|
1496
|
+
var CostAssertionSchema = import_zod21.z.object({
|
|
1497
|
+
type: import_zod21.z.literal("cost"),
|
|
1478
1498
|
/** Maximum allowed cost in USD */
|
|
1479
|
-
maxCostUsd:
|
|
1499
|
+
maxCostUsd: import_zod21.z.number().positive()
|
|
1480
1500
|
});
|
|
1481
|
-
var LlmJudgeAssertionSchema =
|
|
1482
|
-
type:
|
|
1501
|
+
var LlmJudgeAssertionSchema = import_zod21.z.object({
|
|
1502
|
+
type: import_zod21.z.literal("llm_judge"),
|
|
1483
1503
|
/** Prompt template; placeholders: {{output}}, {{cwd}}, {{changedFiles}}, {{trace}} */
|
|
1484
|
-
prompt:
|
|
1504
|
+
prompt: import_zod21.z.string(),
|
|
1485
1505
|
/** Optional system prompt for the judge (default asks for JSON with score) */
|
|
1486
|
-
systemPrompt:
|
|
1506
|
+
systemPrompt: import_zod21.z.string().optional(),
|
|
1487
1507
|
/** Minimum score to pass (0-100, default 70) */
|
|
1488
|
-
minScore:
|
|
1508
|
+
minScore: import_zod21.z.number().int().min(0).max(100).optional(),
|
|
1489
1509
|
/** Model for the judge (e.g. claude-3-5-haiku) */
|
|
1490
|
-
model:
|
|
1491
|
-
maxTokens:
|
|
1492
|
-
temperature:
|
|
1510
|
+
model: import_zod21.z.string().optional(),
|
|
1511
|
+
maxTokens: import_zod21.z.number().int().optional(),
|
|
1512
|
+
temperature: import_zod21.z.number().min(0).max(1).optional()
|
|
1493
1513
|
});
|
|
1494
|
-
var TimeAssertionSchema =
|
|
1495
|
-
type:
|
|
1514
|
+
var TimeAssertionSchema = import_zod21.z.object({
|
|
1515
|
+
type: import_zod21.z.literal("time_limit"),
|
|
1496
1516
|
/** Maximum allowed duration in milliseconds */
|
|
1497
|
-
maxDurationMs:
|
|
1517
|
+
maxDurationMs: import_zod21.z.number().int().positive()
|
|
1498
1518
|
});
|
|
1499
|
-
var AssertionSchema =
|
|
1519
|
+
var AssertionSchema = import_zod21.z.union([
|
|
1500
1520
|
SkillWasCalledAssertionSchema,
|
|
1501
1521
|
BuildPassedAssertionSchema,
|
|
1502
1522
|
TimeAssertionSchema,
|
|
@@ -1505,33 +1525,33 @@ var AssertionSchema = import_zod20.z.union([
|
|
|
1505
1525
|
]);
|
|
1506
1526
|
|
|
1507
1527
|
// src/scenario/environment.ts
|
|
1508
|
-
var
|
|
1509
|
-
var LocalProjectConfigSchema =
|
|
1528
|
+
var import_zod22 = require("zod");
|
|
1529
|
+
var LocalProjectConfigSchema = import_zod22.z.object({
|
|
1510
1530
|
/** Template ID to use for the local project */
|
|
1511
|
-
templateId:
|
|
1531
|
+
templateId: import_zod22.z.string().optional(),
|
|
1512
1532
|
/** Files to create in the project */
|
|
1513
|
-
files:
|
|
1514
|
-
|
|
1515
|
-
path:
|
|
1516
|
-
content:
|
|
1533
|
+
files: import_zod22.z.array(
|
|
1534
|
+
import_zod22.z.object({
|
|
1535
|
+
path: import_zod22.z.string().min(1),
|
|
1536
|
+
content: import_zod22.z.string().min(1)
|
|
1517
1537
|
})
|
|
1518
1538
|
).optional()
|
|
1519
1539
|
});
|
|
1520
|
-
var MetaSiteConfigSchema =
|
|
1521
|
-
configurations:
|
|
1522
|
-
|
|
1523
|
-
name:
|
|
1524
|
-
apiCalls:
|
|
1525
|
-
|
|
1526
|
-
url:
|
|
1527
|
-
method:
|
|
1528
|
-
body:
|
|
1540
|
+
var MetaSiteConfigSchema = import_zod22.z.object({
|
|
1541
|
+
configurations: import_zod22.z.array(
|
|
1542
|
+
import_zod22.z.object({
|
|
1543
|
+
name: import_zod22.z.string().min(1),
|
|
1544
|
+
apiCalls: import_zod22.z.array(
|
|
1545
|
+
import_zod22.z.object({
|
|
1546
|
+
url: import_zod22.z.string().url(),
|
|
1547
|
+
method: import_zod22.z.enum(["POST", "PUT"]),
|
|
1548
|
+
body: import_zod22.z.string()
|
|
1529
1549
|
})
|
|
1530
1550
|
)
|
|
1531
1551
|
})
|
|
1532
1552
|
).optional()
|
|
1533
1553
|
});
|
|
1534
|
-
var EnvironmentSchema =
|
|
1554
|
+
var EnvironmentSchema = import_zod22.z.object({
|
|
1535
1555
|
/** Local project configuration */
|
|
1536
1556
|
localProject: LocalProjectConfigSchema.optional(),
|
|
1537
1557
|
/** Meta site configuration */
|
|
@@ -1539,64 +1559,64 @@ var EnvironmentSchema = import_zod21.z.object({
|
|
|
1539
1559
|
});
|
|
1540
1560
|
|
|
1541
1561
|
// src/scenario/test-scenario.ts
|
|
1542
|
-
var
|
|
1562
|
+
var import_zod24 = require("zod");
|
|
1543
1563
|
|
|
1544
1564
|
// src/assertion/assertion.ts
|
|
1545
|
-
var
|
|
1546
|
-
var AssertionTypeSchema =
|
|
1565
|
+
var import_zod23 = require("zod");
|
|
1566
|
+
var AssertionTypeSchema = import_zod23.z.enum([
|
|
1547
1567
|
"skill_was_called",
|
|
1548
1568
|
"build_passed",
|
|
1549
1569
|
"time_limit",
|
|
1550
1570
|
"cost",
|
|
1551
1571
|
"llm_judge"
|
|
1552
1572
|
]);
|
|
1553
|
-
var AssertionParameterTypeSchema =
|
|
1573
|
+
var AssertionParameterTypeSchema = import_zod23.z.enum([
|
|
1554
1574
|
"string",
|
|
1555
1575
|
"number",
|
|
1556
1576
|
"boolean"
|
|
1557
1577
|
]);
|
|
1558
|
-
var AssertionParameterSchema =
|
|
1578
|
+
var AssertionParameterSchema = import_zod23.z.object({
|
|
1559
1579
|
/** Parameter name (used as key in params object) */
|
|
1560
|
-
name:
|
|
1580
|
+
name: import_zod23.z.string().min(1),
|
|
1561
1581
|
/** Display label for the parameter */
|
|
1562
|
-
label:
|
|
1582
|
+
label: import_zod23.z.string().min(1),
|
|
1563
1583
|
/** Parameter type */
|
|
1564
1584
|
type: AssertionParameterTypeSchema,
|
|
1565
1585
|
/** Whether this parameter is required */
|
|
1566
|
-
required:
|
|
1586
|
+
required: import_zod23.z.boolean(),
|
|
1567
1587
|
/** Default value (optional, used when not provided) */
|
|
1568
|
-
defaultValue:
|
|
1588
|
+
defaultValue: import_zod23.z.union([import_zod23.z.string(), import_zod23.z.number(), import_zod23.z.boolean()]).optional(),
|
|
1569
1589
|
/** If true, parameter is hidden by default behind "Show advanced options" */
|
|
1570
|
-
advanced:
|
|
1590
|
+
advanced: import_zod23.z.boolean().optional()
|
|
1571
1591
|
});
|
|
1572
|
-
var ScenarioAssertionLinkSchema =
|
|
1592
|
+
var ScenarioAssertionLinkSchema = import_zod23.z.object({
|
|
1573
1593
|
/** ID of the assertion (can be system assertion like 'system:skill_was_called' or custom assertion UUID) */
|
|
1574
|
-
assertionId:
|
|
1594
|
+
assertionId: import_zod23.z.string(),
|
|
1575
1595
|
/** Parameter values for this assertion in this scenario */
|
|
1576
|
-
params:
|
|
1577
|
-
|
|
1578
|
-
|
|
1596
|
+
params: import_zod23.z.record(
|
|
1597
|
+
import_zod23.z.string(),
|
|
1598
|
+
import_zod23.z.union([import_zod23.z.string(), import_zod23.z.number(), import_zod23.z.boolean(), import_zod23.z.null()])
|
|
1579
1599
|
).optional()
|
|
1580
1600
|
});
|
|
1581
|
-
var SkillWasCalledConfigSchema =
|
|
1601
|
+
var SkillWasCalledConfigSchema = import_zod23.z.object({
|
|
1582
1602
|
/** Names of the skills that must have been called */
|
|
1583
|
-
skillNames:
|
|
1603
|
+
skillNames: import_zod23.z.array(import_zod23.z.string().min(1)).min(1)
|
|
1584
1604
|
});
|
|
1585
|
-
var CostConfigSchema =
|
|
1605
|
+
var CostConfigSchema = import_zod23.z.strictObject({
|
|
1586
1606
|
/** Maximum allowed cost in USD */
|
|
1587
|
-
maxCostUsd:
|
|
1607
|
+
maxCostUsd: import_zod23.z.number().positive()
|
|
1588
1608
|
});
|
|
1589
|
-
var BuildPassedConfigSchema =
|
|
1609
|
+
var BuildPassedConfigSchema = import_zod23.z.strictObject({
|
|
1590
1610
|
/** Command to run (default: "yarn build") */
|
|
1591
|
-
command:
|
|
1611
|
+
command: import_zod23.z.string().optional(),
|
|
1592
1612
|
/** Expected exit code (default: 0) */
|
|
1593
|
-
expectedExitCode:
|
|
1613
|
+
expectedExitCode: import_zod23.z.number().int().optional()
|
|
1594
1614
|
});
|
|
1595
|
-
var TimeConfigSchema =
|
|
1615
|
+
var TimeConfigSchema = import_zod23.z.strictObject({
|
|
1596
1616
|
/** Maximum allowed duration in milliseconds */
|
|
1597
|
-
maxDurationMs:
|
|
1617
|
+
maxDurationMs: import_zod23.z.number().int().positive()
|
|
1598
1618
|
});
|
|
1599
|
-
var LlmJudgeConfigSchema =
|
|
1619
|
+
var LlmJudgeConfigSchema = import_zod23.z.object({
|
|
1600
1620
|
/**
|
|
1601
1621
|
* Prompt template with placeholders:
|
|
1602
1622
|
* - {{output}}: agent's final output
|
|
@@ -1607,21 +1627,21 @@ var LlmJudgeConfigSchema = import_zod22.z.object({
|
|
|
1607
1627
|
* - {{trace}}: step-by-step trace of tool calls
|
|
1608
1628
|
* - Custom parameters defined in the parameters array
|
|
1609
1629
|
*/
|
|
1610
|
-
prompt:
|
|
1630
|
+
prompt: import_zod23.z.string().min(1),
|
|
1611
1631
|
/** Optional system prompt for the judge */
|
|
1612
|
-
systemPrompt:
|
|
1632
|
+
systemPrompt: import_zod23.z.string().optional(),
|
|
1613
1633
|
/** Minimum score to pass (0-100, default 70) */
|
|
1614
|
-
minScore:
|
|
1634
|
+
minScore: import_zod23.z.number().int().min(0).max(100).optional(),
|
|
1615
1635
|
/** Model for the judge (e.g. claude-3-5-haiku-20241022) */
|
|
1616
|
-
model:
|
|
1636
|
+
model: import_zod23.z.string().optional(),
|
|
1617
1637
|
/** Max output tokens */
|
|
1618
|
-
maxTokens:
|
|
1638
|
+
maxTokens: import_zod23.z.number().int().optional(),
|
|
1619
1639
|
/** Temperature (0-1) */
|
|
1620
|
-
temperature:
|
|
1640
|
+
temperature: import_zod23.z.number().min(0).max(1).optional(),
|
|
1621
1641
|
/** User-defined parameters for this assertion */
|
|
1622
|
-
parameters:
|
|
1642
|
+
parameters: import_zod23.z.array(AssertionParameterSchema).optional()
|
|
1623
1643
|
});
|
|
1624
|
-
var AssertionConfigSchema =
|
|
1644
|
+
var AssertionConfigSchema = import_zod23.z.union([
|
|
1625
1645
|
LlmJudgeConfigSchema,
|
|
1626
1646
|
// requires prompt - check first
|
|
1627
1647
|
SkillWasCalledConfigSchema,
|
|
@@ -1632,7 +1652,7 @@ var AssertionConfigSchema = import_zod22.z.union([
|
|
|
1632
1652
|
// requires maxCostUsd, uses strictObject
|
|
1633
1653
|
BuildPassedConfigSchema,
|
|
1634
1654
|
// all optional, uses strictObject to reject unknown keys
|
|
1635
|
-
|
|
1655
|
+
import_zod23.z.object({})
|
|
1636
1656
|
// fallback empty config
|
|
1637
1657
|
]);
|
|
1638
1658
|
var CustomAssertionSchema = TenantEntitySchema.extend({
|
|
@@ -1681,23 +1701,23 @@ function getLlmJudgeConfig(assertion) {
|
|
|
1681
1701
|
}
|
|
1682
1702
|
|
|
1683
1703
|
// src/scenario/test-scenario.ts
|
|
1684
|
-
var ExpectedFileSchema =
|
|
1704
|
+
var ExpectedFileSchema = import_zod24.z.object({
|
|
1685
1705
|
/** Relative path where the file should be created */
|
|
1686
|
-
path:
|
|
1706
|
+
path: import_zod24.z.string(),
|
|
1687
1707
|
/** Optional expected content */
|
|
1688
|
-
content:
|
|
1708
|
+
content: import_zod24.z.string().optional()
|
|
1689
1709
|
});
|
|
1690
1710
|
var TestScenarioSchema = TenantEntitySchema.extend({
|
|
1691
1711
|
/** The prompt sent to the agent to trigger the task */
|
|
1692
|
-
triggerPrompt:
|
|
1712
|
+
triggerPrompt: import_zod24.z.string().min(10),
|
|
1693
1713
|
/** ID of the template to use for this scenario (null = no template) */
|
|
1694
|
-
templateId:
|
|
1714
|
+
templateId: import_zod24.z.string().nullish(),
|
|
1695
1715
|
/** Inline assertions to evaluate for this scenario (legacy) */
|
|
1696
|
-
assertions:
|
|
1716
|
+
assertions: import_zod24.z.array(AssertionSchema).optional(),
|
|
1697
1717
|
/** IDs of saved assertions to evaluate (from assertions table) - legacy, use assertionLinks */
|
|
1698
|
-
assertionIds:
|
|
1718
|
+
assertionIds: import_zod24.z.array(import_zod24.z.string()).optional(),
|
|
1699
1719
|
/** Linked assertions with per-scenario parameter values */
|
|
1700
|
-
assertionLinks:
|
|
1720
|
+
assertionLinks: import_zod24.z.array(ScenarioAssertionLinkSchema).optional()
|
|
1701
1721
|
});
|
|
1702
1722
|
var CreateTestScenarioInputSchema = TestScenarioSchema.omit({
|
|
1703
1723
|
id: true,
|
|
@@ -1708,10 +1728,10 @@ var CreateTestScenarioInputSchema = TestScenarioSchema.omit({
|
|
|
1708
1728
|
var UpdateTestScenarioInputSchema = CreateTestScenarioInputSchema.partial();
|
|
1709
1729
|
|
|
1710
1730
|
// src/suite/test-suite.ts
|
|
1711
|
-
var
|
|
1731
|
+
var import_zod25 = require("zod");
|
|
1712
1732
|
var TestSuiteSchema = TenantEntitySchema.extend({
|
|
1713
1733
|
/** IDs of test scenarios in this suite */
|
|
1714
|
-
scenarioIds:
|
|
1734
|
+
scenarioIds: import_zod25.z.array(import_zod25.z.string())
|
|
1715
1735
|
});
|
|
1716
1736
|
var CreateTestSuiteInputSchema = TestSuiteSchema.omit({
|
|
1717
1737
|
id: true,
|
|
@@ -1722,21 +1742,21 @@ var CreateTestSuiteInputSchema = TestSuiteSchema.omit({
|
|
|
1722
1742
|
var UpdateTestSuiteInputSchema = CreateTestSuiteInputSchema.partial();
|
|
1723
1743
|
|
|
1724
1744
|
// src/evaluation/metrics.ts
|
|
1725
|
-
var
|
|
1726
|
-
var TokenUsageSchema =
|
|
1727
|
-
prompt:
|
|
1728
|
-
completion:
|
|
1729
|
-
total:
|
|
1730
|
-
});
|
|
1731
|
-
var EvalMetricsSchema =
|
|
1732
|
-
totalAssertions:
|
|
1733
|
-
passed:
|
|
1734
|
-
failed:
|
|
1735
|
-
skipped:
|
|
1736
|
-
errors:
|
|
1737
|
-
passRate:
|
|
1738
|
-
avgDuration:
|
|
1739
|
-
totalDuration:
|
|
1745
|
+
var import_zod26 = require("zod");
|
|
1746
|
+
var TokenUsageSchema = import_zod26.z.object({
|
|
1747
|
+
prompt: import_zod26.z.number(),
|
|
1748
|
+
completion: import_zod26.z.number(),
|
|
1749
|
+
total: import_zod26.z.number()
|
|
1750
|
+
});
|
|
1751
|
+
var EvalMetricsSchema = import_zod26.z.object({
|
|
1752
|
+
totalAssertions: import_zod26.z.number(),
|
|
1753
|
+
passed: import_zod26.z.number(),
|
|
1754
|
+
failed: import_zod26.z.number(),
|
|
1755
|
+
skipped: import_zod26.z.number(),
|
|
1756
|
+
errors: import_zod26.z.number(),
|
|
1757
|
+
passRate: import_zod26.z.number(),
|
|
1758
|
+
avgDuration: import_zod26.z.number(),
|
|
1759
|
+
totalDuration: import_zod26.z.number()
|
|
1740
1760
|
});
|
|
1741
1761
|
var EvalStatus = /* @__PURE__ */ ((EvalStatus2) => {
|
|
1742
1762
|
EvalStatus2["PENDING"] = "pending";
|
|
@@ -1746,7 +1766,7 @@ var EvalStatus = /* @__PURE__ */ ((EvalStatus2) => {
|
|
|
1746
1766
|
EvalStatus2["CANCELLED"] = "cancelled";
|
|
1747
1767
|
return EvalStatus2;
|
|
1748
1768
|
})(EvalStatus || {});
|
|
1749
|
-
var EvalStatusSchema =
|
|
1769
|
+
var EvalStatusSchema = import_zod26.z.enum(EvalStatus);
|
|
1750
1770
|
var LLMStepType = /* @__PURE__ */ ((LLMStepType2) => {
|
|
1751
1771
|
LLMStepType2["COMPLETION"] = "completion";
|
|
1752
1772
|
LLMStepType2["TOOL_USE"] = "tool_use";
|
|
@@ -1754,52 +1774,52 @@ var LLMStepType = /* @__PURE__ */ ((LLMStepType2) => {
|
|
|
1754
1774
|
LLMStepType2["THINKING"] = "thinking";
|
|
1755
1775
|
return LLMStepType2;
|
|
1756
1776
|
})(LLMStepType || {});
|
|
1757
|
-
var LLMTraceStepSchema =
|
|
1758
|
-
id:
|
|
1759
|
-
stepNumber:
|
|
1760
|
-
type:
|
|
1761
|
-
model:
|
|
1762
|
-
provider:
|
|
1763
|
-
startedAt:
|
|
1764
|
-
durationMs:
|
|
1777
|
+
var LLMTraceStepSchema = import_zod26.z.object({
|
|
1778
|
+
id: import_zod26.z.string(),
|
|
1779
|
+
stepNumber: import_zod26.z.number(),
|
|
1780
|
+
type: import_zod26.z.enum(LLMStepType),
|
|
1781
|
+
model: import_zod26.z.string(),
|
|
1782
|
+
provider: import_zod26.z.string(),
|
|
1783
|
+
startedAt: import_zod26.z.string(),
|
|
1784
|
+
durationMs: import_zod26.z.number(),
|
|
1765
1785
|
tokenUsage: TokenUsageSchema,
|
|
1766
|
-
costUsd:
|
|
1767
|
-
toolName:
|
|
1768
|
-
toolArguments:
|
|
1769
|
-
inputPreview:
|
|
1770
|
-
outputPreview:
|
|
1771
|
-
success:
|
|
1772
|
-
error:
|
|
1773
|
-
});
|
|
1774
|
-
var LLMBreakdownStatsSchema =
|
|
1775
|
-
count:
|
|
1776
|
-
durationMs:
|
|
1777
|
-
tokens:
|
|
1778
|
-
costUsd:
|
|
1779
|
-
});
|
|
1780
|
-
var LLMTraceSummarySchema =
|
|
1781
|
-
totalSteps:
|
|
1782
|
-
totalDurationMs:
|
|
1786
|
+
costUsd: import_zod26.z.number(),
|
|
1787
|
+
toolName: import_zod26.z.string().optional(),
|
|
1788
|
+
toolArguments: import_zod26.z.string().optional(),
|
|
1789
|
+
inputPreview: import_zod26.z.string().optional(),
|
|
1790
|
+
outputPreview: import_zod26.z.string().optional(),
|
|
1791
|
+
success: import_zod26.z.boolean(),
|
|
1792
|
+
error: import_zod26.z.string().optional()
|
|
1793
|
+
});
|
|
1794
|
+
var LLMBreakdownStatsSchema = import_zod26.z.object({
|
|
1795
|
+
count: import_zod26.z.number(),
|
|
1796
|
+
durationMs: import_zod26.z.number(),
|
|
1797
|
+
tokens: import_zod26.z.number(),
|
|
1798
|
+
costUsd: import_zod26.z.number()
|
|
1799
|
+
});
|
|
1800
|
+
var LLMTraceSummarySchema = import_zod26.z.object({
|
|
1801
|
+
totalSteps: import_zod26.z.number(),
|
|
1802
|
+
totalDurationMs: import_zod26.z.number(),
|
|
1783
1803
|
totalTokens: TokenUsageSchema,
|
|
1784
|
-
totalCostUsd:
|
|
1785
|
-
stepTypeBreakdown:
|
|
1786
|
-
modelBreakdown:
|
|
1787
|
-
modelsUsed:
|
|
1788
|
-
});
|
|
1789
|
-
var LLMTraceSchema =
|
|
1790
|
-
id:
|
|
1791
|
-
steps:
|
|
1804
|
+
totalCostUsd: import_zod26.z.number(),
|
|
1805
|
+
stepTypeBreakdown: import_zod26.z.record(import_zod26.z.string(), LLMBreakdownStatsSchema).optional(),
|
|
1806
|
+
modelBreakdown: import_zod26.z.record(import_zod26.z.string(), LLMBreakdownStatsSchema),
|
|
1807
|
+
modelsUsed: import_zod26.z.array(import_zod26.z.string())
|
|
1808
|
+
});
|
|
1809
|
+
var LLMTraceSchema = import_zod26.z.object({
|
|
1810
|
+
id: import_zod26.z.string(),
|
|
1811
|
+
steps: import_zod26.z.array(LLMTraceStepSchema),
|
|
1792
1812
|
summary: LLMTraceSummarySchema
|
|
1793
1813
|
});
|
|
1794
1814
|
|
|
1795
1815
|
// src/evaluation/eval-result.ts
|
|
1796
|
-
var
|
|
1816
|
+
var import_zod29 = require("zod");
|
|
1797
1817
|
|
|
1798
1818
|
// src/evaluation/eval-run.ts
|
|
1799
|
-
var
|
|
1819
|
+
var import_zod28 = require("zod");
|
|
1800
1820
|
|
|
1801
1821
|
// src/evaluation/live-trace.ts
|
|
1802
|
-
var
|
|
1822
|
+
var import_zod27 = require("zod");
|
|
1803
1823
|
var LiveTraceEventType = /* @__PURE__ */ ((LiveTraceEventType2) => {
|
|
1804
1824
|
LiveTraceEventType2["THINKING"] = "thinking";
|
|
1805
1825
|
LiveTraceEventType2["TOOL_USE"] = "tool_use";
|
|
@@ -1813,37 +1833,37 @@ var LiveTraceEventType = /* @__PURE__ */ ((LiveTraceEventType2) => {
|
|
|
1813
1833
|
LiveTraceEventType2["USER"] = "user";
|
|
1814
1834
|
return LiveTraceEventType2;
|
|
1815
1835
|
})(LiveTraceEventType || {});
|
|
1816
|
-
var LiveTraceEventSchema =
|
|
1836
|
+
var LiveTraceEventSchema = import_zod27.z.object({
|
|
1817
1837
|
/** The evaluation run ID */
|
|
1818
|
-
evalRunId:
|
|
1838
|
+
evalRunId: import_zod27.z.string(),
|
|
1819
1839
|
/** The scenario ID being executed */
|
|
1820
|
-
scenarioId:
|
|
1840
|
+
scenarioId: import_zod27.z.string(),
|
|
1821
1841
|
/** The scenario name for display */
|
|
1822
|
-
scenarioName:
|
|
1842
|
+
scenarioName: import_zod27.z.string(),
|
|
1823
1843
|
/** The target ID (skill, agent, etc.) */
|
|
1824
|
-
targetId:
|
|
1844
|
+
targetId: import_zod27.z.string(),
|
|
1825
1845
|
/** The target name for display */
|
|
1826
|
-
targetName:
|
|
1846
|
+
targetName: import_zod27.z.string(),
|
|
1827
1847
|
/** Step number in the current scenario execution */
|
|
1828
|
-
stepNumber:
|
|
1848
|
+
stepNumber: import_zod27.z.number(),
|
|
1829
1849
|
/** Type of trace event */
|
|
1830
|
-
type:
|
|
1850
|
+
type: import_zod27.z.enum(LiveTraceEventType),
|
|
1831
1851
|
/** Tool name if this is a tool_use event */
|
|
1832
|
-
toolName:
|
|
1852
|
+
toolName: import_zod27.z.string().optional(),
|
|
1833
1853
|
/** Tool arguments preview (truncated JSON) */
|
|
1834
|
-
toolArgs:
|
|
1854
|
+
toolArgs: import_zod27.z.string().optional(),
|
|
1835
1855
|
/** Output preview (truncated text) */
|
|
1836
|
-
outputPreview:
|
|
1856
|
+
outputPreview: import_zod27.z.string().optional(),
|
|
1837
1857
|
/** File path for file operations */
|
|
1838
|
-
filePath:
|
|
1858
|
+
filePath: import_zod27.z.string().optional(),
|
|
1839
1859
|
/** Elapsed time in milliseconds for progress events */
|
|
1840
|
-
elapsedMs:
|
|
1860
|
+
elapsedMs: import_zod27.z.number().optional(),
|
|
1841
1861
|
/** Thinking/reasoning text from Claude */
|
|
1842
|
-
thinking:
|
|
1862
|
+
thinking: import_zod27.z.string().optional(),
|
|
1843
1863
|
/** Timestamp when this event occurred */
|
|
1844
|
-
timestamp:
|
|
1864
|
+
timestamp: import_zod27.z.string(),
|
|
1845
1865
|
/** Whether this is the final event for this scenario */
|
|
1846
|
-
isComplete:
|
|
1866
|
+
isComplete: import_zod27.z.boolean()
|
|
1847
1867
|
});
|
|
1848
1868
|
var TRACE_EVENT_PREFIX = "TRACE_EVENT:";
|
|
1849
1869
|
function parseTraceEventLine(line) {
|
|
@@ -1871,14 +1891,14 @@ var TriggerType = /* @__PURE__ */ ((TriggerType2) => {
|
|
|
1871
1891
|
TriggerType2["MANUAL"] = "MANUAL";
|
|
1872
1892
|
return TriggerType2;
|
|
1873
1893
|
})(TriggerType || {});
|
|
1874
|
-
var TriggerMetadataSchema =
|
|
1875
|
-
version:
|
|
1876
|
-
resourceUpdated:
|
|
1894
|
+
var TriggerMetadataSchema = import_zod28.z.object({
|
|
1895
|
+
version: import_zod28.z.string().optional(),
|
|
1896
|
+
resourceUpdated: import_zod28.z.array(import_zod28.z.string()).optional()
|
|
1877
1897
|
});
|
|
1878
|
-
var TriggerSchema =
|
|
1879
|
-
id:
|
|
1898
|
+
var TriggerSchema = import_zod28.z.object({
|
|
1899
|
+
id: import_zod28.z.string(),
|
|
1880
1900
|
metadata: TriggerMetadataSchema.optional(),
|
|
1881
|
-
type:
|
|
1901
|
+
type: import_zod28.z.enum(TriggerType)
|
|
1882
1902
|
});
|
|
1883
1903
|
var FailureCategory = /* @__PURE__ */ ((FailureCategory2) => {
|
|
1884
1904
|
FailureCategory2["MISSING_FILE"] = "missing_file";
|
|
@@ -1896,28 +1916,28 @@ var FailureSeverity = /* @__PURE__ */ ((FailureSeverity2) => {
|
|
|
1896
1916
|
FailureSeverity2["LOW"] = "low";
|
|
1897
1917
|
return FailureSeverity2;
|
|
1898
1918
|
})(FailureSeverity || {});
|
|
1899
|
-
var DiffLineTypeSchema =
|
|
1900
|
-
var DiffLineSchema =
|
|
1919
|
+
var DiffLineTypeSchema = import_zod28.z.enum(["added", "removed", "unchanged"]);
|
|
1920
|
+
var DiffLineSchema = import_zod28.z.object({
|
|
1901
1921
|
type: DiffLineTypeSchema,
|
|
1902
|
-
content:
|
|
1903
|
-
lineNumber:
|
|
1904
|
-
});
|
|
1905
|
-
var DiffContentSchema =
|
|
1906
|
-
path:
|
|
1907
|
-
expected:
|
|
1908
|
-
actual:
|
|
1909
|
-
diffLines:
|
|
1910
|
-
renamedFrom:
|
|
1911
|
-
});
|
|
1912
|
-
var CommandExecutionSchema =
|
|
1913
|
-
command:
|
|
1914
|
-
exitCode:
|
|
1915
|
-
output:
|
|
1916
|
-
duration:
|
|
1917
|
-
});
|
|
1918
|
-
var FileModificationSchema =
|
|
1919
|
-
path:
|
|
1920
|
-
action:
|
|
1922
|
+
content: import_zod28.z.string(),
|
|
1923
|
+
lineNumber: import_zod28.z.number()
|
|
1924
|
+
});
|
|
1925
|
+
var DiffContentSchema = import_zod28.z.object({
|
|
1926
|
+
path: import_zod28.z.string(),
|
|
1927
|
+
expected: import_zod28.z.string(),
|
|
1928
|
+
actual: import_zod28.z.string(),
|
|
1929
|
+
diffLines: import_zod28.z.array(DiffLineSchema),
|
|
1930
|
+
renamedFrom: import_zod28.z.string().optional()
|
|
1931
|
+
});
|
|
1932
|
+
var CommandExecutionSchema = import_zod28.z.object({
|
|
1933
|
+
command: import_zod28.z.string(),
|
|
1934
|
+
exitCode: import_zod28.z.number(),
|
|
1935
|
+
output: import_zod28.z.string().optional(),
|
|
1936
|
+
duration: import_zod28.z.number()
|
|
1937
|
+
});
|
|
1938
|
+
var FileModificationSchema = import_zod28.z.object({
|
|
1939
|
+
path: import_zod28.z.string(),
|
|
1940
|
+
action: import_zod28.z.enum(["created", "modified", "deleted"])
|
|
1921
1941
|
});
|
|
1922
1942
|
var TemplateFileStatus = /* @__PURE__ */ ((TemplateFileStatus2) => {
|
|
1923
1943
|
TemplateFileStatus2["NEW"] = "new";
|
|
@@ -1925,81 +1945,83 @@ var TemplateFileStatus = /* @__PURE__ */ ((TemplateFileStatus2) => {
|
|
|
1925
1945
|
TemplateFileStatus2["UNCHANGED"] = "unchanged";
|
|
1926
1946
|
return TemplateFileStatus2;
|
|
1927
1947
|
})(TemplateFileStatus || {});
|
|
1928
|
-
var TemplateFileSchema =
|
|
1948
|
+
var TemplateFileSchema = import_zod28.z.object({
|
|
1929
1949
|
/** Relative path within the template */
|
|
1930
|
-
path:
|
|
1950
|
+
path: import_zod28.z.string(),
|
|
1931
1951
|
/** Full file content after execution */
|
|
1932
|
-
content:
|
|
1952
|
+
content: import_zod28.z.string(),
|
|
1933
1953
|
/** File status (new, modified, unchanged) */
|
|
1934
|
-
status:
|
|
1935
|
-
});
|
|
1936
|
-
var ApiCallSchema =
|
|
1937
|
-
endpoint:
|
|
1938
|
-
tokensUsed:
|
|
1939
|
-
duration:
|
|
1940
|
-
});
|
|
1941
|
-
var ExecutionTraceSchema =
|
|
1942
|
-
commands:
|
|
1943
|
-
filesModified:
|
|
1944
|
-
apiCalls:
|
|
1945
|
-
totalDuration:
|
|
1946
|
-
});
|
|
1947
|
-
var FailureAnalysisSchema =
|
|
1948
|
-
category:
|
|
1949
|
-
severity:
|
|
1950
|
-
summary:
|
|
1951
|
-
details:
|
|
1952
|
-
rootCause:
|
|
1953
|
-
suggestedFix:
|
|
1954
|
-
relatedAssertions:
|
|
1955
|
-
codeSnippet:
|
|
1956
|
-
similarIssues:
|
|
1957
|
-
patternId:
|
|
1954
|
+
status: import_zod28.z.enum(["new", "modified", "unchanged"])
|
|
1955
|
+
});
|
|
1956
|
+
var ApiCallSchema = import_zod28.z.object({
|
|
1957
|
+
endpoint: import_zod28.z.string(),
|
|
1958
|
+
tokensUsed: import_zod28.z.number(),
|
|
1959
|
+
duration: import_zod28.z.number()
|
|
1960
|
+
});
|
|
1961
|
+
var ExecutionTraceSchema = import_zod28.z.object({
|
|
1962
|
+
commands: import_zod28.z.array(CommandExecutionSchema),
|
|
1963
|
+
filesModified: import_zod28.z.array(FileModificationSchema),
|
|
1964
|
+
apiCalls: import_zod28.z.array(ApiCallSchema),
|
|
1965
|
+
totalDuration: import_zod28.z.number()
|
|
1966
|
+
});
|
|
1967
|
+
var FailureAnalysisSchema = import_zod28.z.object({
|
|
1968
|
+
category: import_zod28.z.enum(FailureCategory),
|
|
1969
|
+
severity: import_zod28.z.enum(FailureSeverity),
|
|
1970
|
+
summary: import_zod28.z.string(),
|
|
1971
|
+
details: import_zod28.z.string(),
|
|
1972
|
+
rootCause: import_zod28.z.string(),
|
|
1973
|
+
suggestedFix: import_zod28.z.string(),
|
|
1974
|
+
relatedAssertions: import_zod28.z.array(import_zod28.z.string()),
|
|
1975
|
+
codeSnippet: import_zod28.z.string().optional(),
|
|
1976
|
+
similarIssues: import_zod28.z.array(import_zod28.z.string()).optional(),
|
|
1977
|
+
patternId: import_zod28.z.string().optional(),
|
|
1958
1978
|
// Extended fields for detailed debugging
|
|
1959
1979
|
diff: DiffContentSchema.optional(),
|
|
1960
1980
|
executionTrace: ExecutionTraceSchema.optional()
|
|
1961
1981
|
});
|
|
1962
1982
|
var EvalRunSchema = TenantEntitySchema.extend({
|
|
1963
1983
|
/** Agent ID for this run */
|
|
1964
|
-
agentId:
|
|
1984
|
+
agentId: import_zod28.z.string().optional(),
|
|
1965
1985
|
/** Skills group ID for this run */
|
|
1966
|
-
skillsGroupId:
|
|
1986
|
+
skillsGroupId: import_zod28.z.string().optional(),
|
|
1967
1987
|
/** Map of skillId to skillVersionId for this run */
|
|
1968
|
-
skillVersions:
|
|
1988
|
+
skillVersions: import_zod28.z.record(import_zod28.z.string(), import_zod28.z.string()).optional(),
|
|
1969
1989
|
/** Scenario IDs to run */
|
|
1970
|
-
scenarioIds:
|
|
1990
|
+
scenarioIds: import_zod28.z.array(import_zod28.z.string()),
|
|
1971
1991
|
/** Current status */
|
|
1972
1992
|
status: EvalStatusSchema,
|
|
1973
1993
|
/** Progress percentage (0-100) */
|
|
1974
|
-
progress:
|
|
1994
|
+
progress: import_zod28.z.number(),
|
|
1975
1995
|
/** Results for each scenario/target combination (lazy to break eval-result ↔ eval-run cycle) */
|
|
1976
|
-
results:
|
|
1996
|
+
results: import_zod28.z.array(import_zod28.z.lazy(() => EvalRunResultSchema)),
|
|
1977
1997
|
/** Aggregated metrics across all results */
|
|
1978
1998
|
aggregateMetrics: EvalMetricsSchema,
|
|
1979
1999
|
/** Failure analyses */
|
|
1980
|
-
failureAnalyses:
|
|
2000
|
+
failureAnalyses: import_zod28.z.array(FailureAnalysisSchema).optional(),
|
|
1981
2001
|
/** Aggregated LLM trace summary */
|
|
1982
2002
|
llmTraceSummary: LLMTraceSummarySchema.optional(),
|
|
1983
2003
|
/** What triggered this run */
|
|
1984
2004
|
trigger: TriggerSchema.optional(),
|
|
1985
2005
|
/** When the run started (set when evaluation is triggered) */
|
|
1986
|
-
startedAt:
|
|
2006
|
+
startedAt: import_zod28.z.string().optional(),
|
|
1987
2007
|
/** When the run completed */
|
|
1988
|
-
completedAt:
|
|
2008
|
+
completedAt: import_zod28.z.string().optional(),
|
|
1989
2009
|
/** Live trace events captured during execution (for playback on results page) */
|
|
1990
|
-
liveTraceEvents:
|
|
2010
|
+
liveTraceEvents: import_zod28.z.array(LiveTraceEventSchema).optional(),
|
|
1991
2011
|
/** Remote job ID for tracking execution in Dev Machines */
|
|
1992
|
-
jobId:
|
|
2012
|
+
jobId: import_zod28.z.string().optional(),
|
|
1993
2013
|
/** Remote job status from the Dev Machine API (PENDING, RUNNING, COMPLETED, FAILED, CANCELLED) */
|
|
1994
|
-
jobStatus:
|
|
2014
|
+
jobStatus: import_zod28.z.string().optional(),
|
|
1995
2015
|
/** Remote job error message if the job failed */
|
|
1996
|
-
jobError:
|
|
2016
|
+
jobError: import_zod28.z.string().optional(),
|
|
1997
2017
|
/** Timestamp of the last job status check */
|
|
1998
|
-
jobStatusCheckedAt:
|
|
2018
|
+
jobStatusCheckedAt: import_zod28.z.string().optional(),
|
|
1999
2019
|
/** MCP server IDs to enable for this run (optional) */
|
|
2000
|
-
mcpIds:
|
|
2020
|
+
mcpIds: import_zod28.z.array(import_zod28.z.string()).optional(),
|
|
2001
2021
|
/** Sub-agent IDs to enable for this run (optional) */
|
|
2002
|
-
subAgentIds:
|
|
2022
|
+
subAgentIds: import_zod28.z.array(import_zod28.z.string()).optional(),
|
|
2023
|
+
/** Rule IDs to enable for this run (optional) */
|
|
2024
|
+
ruleIds: import_zod28.z.array(import_zod28.z.string()).optional()
|
|
2003
2025
|
});
|
|
2004
2026
|
var CreateEvalRunInputSchema = EvalRunSchema.omit({
|
|
2005
2027
|
id: true,
|
|
@@ -2012,28 +2034,28 @@ var CreateEvalRunInputSchema = EvalRunSchema.omit({
|
|
|
2012
2034
|
startedAt: true,
|
|
2013
2035
|
completedAt: true
|
|
2014
2036
|
});
|
|
2015
|
-
var EvaluationProgressSchema =
|
|
2016
|
-
runId:
|
|
2017
|
-
targetId:
|
|
2018
|
-
totalScenarios:
|
|
2019
|
-
completedScenarios:
|
|
2020
|
-
scenarioProgress:
|
|
2021
|
-
|
|
2022
|
-
scenarioId:
|
|
2023
|
-
currentStep:
|
|
2024
|
-
error:
|
|
2037
|
+
var EvaluationProgressSchema = import_zod28.z.object({
|
|
2038
|
+
runId: import_zod28.z.string(),
|
|
2039
|
+
targetId: import_zod28.z.string(),
|
|
2040
|
+
totalScenarios: import_zod28.z.number(),
|
|
2041
|
+
completedScenarios: import_zod28.z.number(),
|
|
2042
|
+
scenarioProgress: import_zod28.z.array(
|
|
2043
|
+
import_zod28.z.object({
|
|
2044
|
+
scenarioId: import_zod28.z.string(),
|
|
2045
|
+
currentStep: import_zod28.z.string(),
|
|
2046
|
+
error: import_zod28.z.string().optional()
|
|
2025
2047
|
})
|
|
2026
2048
|
),
|
|
2027
|
-
createdAt:
|
|
2049
|
+
createdAt: import_zod28.z.number()
|
|
2028
2050
|
});
|
|
2029
|
-
var EvaluationLogSchema =
|
|
2030
|
-
runId:
|
|
2031
|
-
scenarioId:
|
|
2032
|
-
log:
|
|
2033
|
-
level:
|
|
2034
|
-
message:
|
|
2035
|
-
args:
|
|
2036
|
-
error:
|
|
2051
|
+
var EvaluationLogSchema = import_zod28.z.object({
|
|
2052
|
+
runId: import_zod28.z.string(),
|
|
2053
|
+
scenarioId: import_zod28.z.string(),
|
|
2054
|
+
log: import_zod28.z.object({
|
|
2055
|
+
level: import_zod28.z.enum(["info", "error", "debug"]),
|
|
2056
|
+
message: import_zod28.z.string().optional(),
|
|
2057
|
+
args: import_zod28.z.array(import_zod28.z.any()).optional(),
|
|
2058
|
+
error: import_zod28.z.string().optional()
|
|
2037
2059
|
})
|
|
2038
2060
|
});
|
|
2039
2061
|
var LLM_TIMEOUT = 12e4;
|
|
@@ -2046,95 +2068,95 @@ var AssertionResultStatus = /* @__PURE__ */ ((AssertionResultStatus2) => {
|
|
|
2046
2068
|
AssertionResultStatus2["ERROR"] = "error";
|
|
2047
2069
|
return AssertionResultStatus2;
|
|
2048
2070
|
})(AssertionResultStatus || {});
|
|
2049
|
-
var AssertionResultSchema =
|
|
2050
|
-
id:
|
|
2051
|
-
assertionId:
|
|
2052
|
-
assertionType:
|
|
2053
|
-
assertionName:
|
|
2054
|
-
status:
|
|
2055
|
-
message:
|
|
2056
|
-
expected:
|
|
2057
|
-
actual:
|
|
2058
|
-
duration:
|
|
2059
|
-
details:
|
|
2060
|
-
llmTraceSteps:
|
|
2061
|
-
});
|
|
2062
|
-
var EvalRunResultSchema =
|
|
2063
|
-
id:
|
|
2064
|
-
targetId:
|
|
2065
|
-
targetName:
|
|
2071
|
+
var AssertionResultSchema = import_zod29.z.object({
|
|
2072
|
+
id: import_zod29.z.string(),
|
|
2073
|
+
assertionId: import_zod29.z.string(),
|
|
2074
|
+
assertionType: import_zod29.z.string(),
|
|
2075
|
+
assertionName: import_zod29.z.string(),
|
|
2076
|
+
status: import_zod29.z.enum(AssertionResultStatus),
|
|
2077
|
+
message: import_zod29.z.string().optional(),
|
|
2078
|
+
expected: import_zod29.z.string().optional(),
|
|
2079
|
+
actual: import_zod29.z.string().optional(),
|
|
2080
|
+
duration: import_zod29.z.number().optional(),
|
|
2081
|
+
details: import_zod29.z.record(import_zod29.z.string(), import_zod29.z.unknown()).optional(),
|
|
2082
|
+
llmTraceSteps: import_zod29.z.array(LLMTraceStepSchema).optional()
|
|
2083
|
+
});
|
|
2084
|
+
var EvalRunResultSchema = import_zod29.z.object({
|
|
2085
|
+
id: import_zod29.z.string(),
|
|
2086
|
+
targetId: import_zod29.z.string(),
|
|
2087
|
+
targetName: import_zod29.z.string().optional(),
|
|
2066
2088
|
/** SkillVersion ID used for this evaluation (for version tracking) */
|
|
2067
|
-
skillVersionId:
|
|
2089
|
+
skillVersionId: import_zod29.z.string().optional(),
|
|
2068
2090
|
/** SkillVersion semver string (e.g., "1.0.0", "1.2.3") for display */
|
|
2069
|
-
skillVersion:
|
|
2070
|
-
scenarioId:
|
|
2071
|
-
scenarioName:
|
|
2091
|
+
skillVersion: import_zod29.z.string().optional(),
|
|
2092
|
+
scenarioId: import_zod29.z.string(),
|
|
2093
|
+
scenarioName: import_zod29.z.string(),
|
|
2072
2094
|
modelConfig: ModelConfigSchema.optional(),
|
|
2073
|
-
assertionResults:
|
|
2095
|
+
assertionResults: import_zod29.z.array(AssertionResultSchema),
|
|
2074
2096
|
metrics: EvalMetricsSchema.optional(),
|
|
2075
|
-
passed:
|
|
2076
|
-
failed:
|
|
2077
|
-
passRate:
|
|
2078
|
-
duration:
|
|
2079
|
-
outputText:
|
|
2080
|
-
files:
|
|
2081
|
-
fileDiffs:
|
|
2097
|
+
passed: import_zod29.z.number(),
|
|
2098
|
+
failed: import_zod29.z.number(),
|
|
2099
|
+
passRate: import_zod29.z.number(),
|
|
2100
|
+
duration: import_zod29.z.number(),
|
|
2101
|
+
outputText: import_zod29.z.string().optional(),
|
|
2102
|
+
files: import_zod29.z.array(ExpectedFileSchema).optional(),
|
|
2103
|
+
fileDiffs: import_zod29.z.array(DiffContentSchema).optional(),
|
|
2082
2104
|
/** Full template files after execution with status indicators */
|
|
2083
|
-
templateFiles:
|
|
2084
|
-
startedAt:
|
|
2085
|
-
completedAt:
|
|
2105
|
+
templateFiles: import_zod29.z.array(TemplateFileSchema).optional(),
|
|
2106
|
+
startedAt: import_zod29.z.string().optional(),
|
|
2107
|
+
completedAt: import_zod29.z.string().optional(),
|
|
2086
2108
|
llmTrace: LLMTraceSchema.optional()
|
|
2087
2109
|
});
|
|
2088
|
-
var PromptResultSchema =
|
|
2089
|
-
text:
|
|
2090
|
-
files:
|
|
2091
|
-
finishReason:
|
|
2092
|
-
reasoning:
|
|
2093
|
-
reasoningDetails:
|
|
2094
|
-
toolCalls:
|
|
2095
|
-
toolResults:
|
|
2096
|
-
warnings:
|
|
2097
|
-
sources:
|
|
2098
|
-
steps:
|
|
2099
|
-
generationTimeMs:
|
|
2100
|
-
prompt:
|
|
2101
|
-
systemPrompt:
|
|
2102
|
-
usage:
|
|
2103
|
-
totalTokens:
|
|
2104
|
-
totalMicrocentsSpent:
|
|
2110
|
+
var PromptResultSchema = import_zod29.z.object({
|
|
2111
|
+
text: import_zod29.z.string(),
|
|
2112
|
+
files: import_zod29.z.array(import_zod29.z.unknown()).optional(),
|
|
2113
|
+
finishReason: import_zod29.z.string().optional(),
|
|
2114
|
+
reasoning: import_zod29.z.string().optional(),
|
|
2115
|
+
reasoningDetails: import_zod29.z.unknown().optional(),
|
|
2116
|
+
toolCalls: import_zod29.z.array(import_zod29.z.unknown()).optional(),
|
|
2117
|
+
toolResults: import_zod29.z.array(import_zod29.z.unknown()).optional(),
|
|
2118
|
+
warnings: import_zod29.z.array(import_zod29.z.unknown()).optional(),
|
|
2119
|
+
sources: import_zod29.z.array(import_zod29.z.unknown()).optional(),
|
|
2120
|
+
steps: import_zod29.z.array(import_zod29.z.unknown()),
|
|
2121
|
+
generationTimeMs: import_zod29.z.number(),
|
|
2122
|
+
prompt: import_zod29.z.string(),
|
|
2123
|
+
systemPrompt: import_zod29.z.string(),
|
|
2124
|
+
usage: import_zod29.z.object({
|
|
2125
|
+
totalTokens: import_zod29.z.number().optional(),
|
|
2126
|
+
totalMicrocentsSpent: import_zod29.z.number().optional()
|
|
2105
2127
|
})
|
|
2106
2128
|
});
|
|
2107
|
-
var EvaluationResultSchema =
|
|
2108
|
-
id:
|
|
2109
|
-
runId:
|
|
2110
|
-
timestamp:
|
|
2129
|
+
var EvaluationResultSchema = import_zod29.z.object({
|
|
2130
|
+
id: import_zod29.z.string(),
|
|
2131
|
+
runId: import_zod29.z.string(),
|
|
2132
|
+
timestamp: import_zod29.z.number(),
|
|
2111
2133
|
promptResult: PromptResultSchema,
|
|
2112
|
-
testResults:
|
|
2113
|
-
tags:
|
|
2114
|
-
feedback:
|
|
2115
|
-
score:
|
|
2116
|
-
suiteId:
|
|
2117
|
-
});
|
|
2118
|
-
var LeanEvaluationResultSchema =
|
|
2119
|
-
id:
|
|
2120
|
-
runId:
|
|
2121
|
-
timestamp:
|
|
2122
|
-
tags:
|
|
2123
|
-
scenarioId:
|
|
2124
|
-
scenarioVersion:
|
|
2125
|
-
targetId:
|
|
2126
|
-
targetVersion:
|
|
2127
|
-
suiteId:
|
|
2128
|
-
score:
|
|
2129
|
-
time:
|
|
2130
|
-
microcentsSpent:
|
|
2134
|
+
testResults: import_zod29.z.array(import_zod29.z.unknown()),
|
|
2135
|
+
tags: import_zod29.z.array(import_zod29.z.string()).optional(),
|
|
2136
|
+
feedback: import_zod29.z.string().optional(),
|
|
2137
|
+
score: import_zod29.z.number(),
|
|
2138
|
+
suiteId: import_zod29.z.string().optional()
|
|
2139
|
+
});
|
|
2140
|
+
var LeanEvaluationResultSchema = import_zod29.z.object({
|
|
2141
|
+
id: import_zod29.z.string(),
|
|
2142
|
+
runId: import_zod29.z.string(),
|
|
2143
|
+
timestamp: import_zod29.z.number(),
|
|
2144
|
+
tags: import_zod29.z.array(import_zod29.z.string()).optional(),
|
|
2145
|
+
scenarioId: import_zod29.z.string(),
|
|
2146
|
+
scenarioVersion: import_zod29.z.number().optional(),
|
|
2147
|
+
targetId: import_zod29.z.string(),
|
|
2148
|
+
targetVersion: import_zod29.z.number().optional(),
|
|
2149
|
+
suiteId: import_zod29.z.string().optional(),
|
|
2150
|
+
score: import_zod29.z.number(),
|
|
2151
|
+
time: import_zod29.z.number().optional(),
|
|
2152
|
+
microcentsSpent: import_zod29.z.number().optional()
|
|
2131
2153
|
});
|
|
2132
2154
|
|
|
2133
2155
|
// src/project/project.ts
|
|
2134
|
-
var
|
|
2156
|
+
var import_zod30 = require("zod");
|
|
2135
2157
|
var ProjectSchema = BaseEntitySchema.extend({
|
|
2136
|
-
appId:
|
|
2137
|
-
appSecret:
|
|
2158
|
+
appId: import_zod30.z.string().optional().describe("The ID of the app in Dev Center"),
|
|
2159
|
+
appSecret: import_zod30.z.string().optional().describe("The secret of the app in Dev Center")
|
|
2138
2160
|
});
|
|
2139
2161
|
var CreateProjectInputSchema = ProjectSchema.omit({
|
|
2140
2162
|
id: true,
|
|
@@ -2329,6 +2351,7 @@ function getSystemAssertion(id) {
|
|
|
2329
2351
|
CreateEvalRunInputSchema,
|
|
2330
2352
|
CreateMcpInputSchema,
|
|
2331
2353
|
CreateProjectInputSchema,
|
|
2354
|
+
CreateRuleInputSchema,
|
|
2332
2355
|
CreateSkillInputSchema,
|
|
2333
2356
|
CreateSkillVersionInputSchema,
|
|
2334
2357
|
CreateSkillsGroupInputSchema,
|
|
@@ -2383,6 +2406,8 @@ function getSystemAssertion(id) {
|
|
|
2383
2406
|
ProjectSchema,
|
|
2384
2407
|
PromptResultSchema,
|
|
2385
2408
|
RUN_COMMAND_LABELS,
|
|
2409
|
+
RuleSchema,
|
|
2410
|
+
RuleTypeSchema,
|
|
2386
2411
|
SEMVER_REGEX,
|
|
2387
2412
|
SKILL_FOLDER_NAME_REGEX,
|
|
2388
2413
|
SYSTEM_ASSERTIONS,
|
|
@@ -2423,6 +2448,7 @@ function getSystemAssertion(id) {
|
|
|
2423
2448
|
UpdateCustomAssertionInputSchema,
|
|
2424
2449
|
UpdateMcpInputSchema,
|
|
2425
2450
|
UpdateProjectInputSchema,
|
|
2451
|
+
UpdateRuleInputSchema,
|
|
2426
2452
|
UpdateSkillInputSchema,
|
|
2427
2453
|
UpdateSkillsGroupInputSchema,
|
|
2428
2454
|
UpdateSubAgentInputSchema,
|